commit a76fa2899e70b7cdaa06e99d654e133b14ffb99c Author: ModelHub XC Date: Sun Jun 14 08:57:19 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: rbelanec/train_qqp_42_1779207273 Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..3a64eae --- /dev/null +++ b/README.md @@ -0,0 +1,81 @@ +--- +library_name: transformers +license: llama3.2 +base_model: meta-llama/Llama-3.2-1B-Instruct +tags: +- peft-factory +- full +- llama-factory +- generated_from_trainer +model-index: +- name: train_qqp_42_1779207273 + results: [] +--- + + + +# train_qqp_42_1779207273 + +This model is a fine-tuned version of [meta-llama/Llama-3.2-1B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct) on the qqp dataset. +It achieves the following results on the evaluation set: +- Loss: 0.1128 +- Num Input Tokens Seen: 137941664 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 2e-06 +- train_batch_size: 8 +- eval_batch_size: 8 +- seed: 42 +- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.1 +- num_epochs: 5 + +### Training results + +| Training Loss | Epoch | Step | Validation Loss | Input Tokens Seen | +|:-------------:|:------:|:------:|:---------------:|:-----------------:| +| 0.1388 | 0.2500 | 10234 | 0.1504 | 6910656 | +| 0.1021 | 0.5000 | 20468 | 0.1128 | 13780928 | +| 0.0459 | 0.7501 | 30702 | 0.1215 | 20680640 | +| 0.1214 | 1.0001 | 40936 | 0.1224 | 27591776 | +| 0.1395 | 1.2501 | 51170 | 0.1528 | 34492320 | +| 0.0425 | 1.5001 | 61404 | 0.1524 | 41393504 | +| 0.1097 | 1.7501 | 71638 | 0.1231 | 48287456 | +| 0.0019 | 2.0001 | 81872 | 0.1237 | 55178600 | +| 0.0001 | 2.2502 | 92106 | 0.2041 | 62093992 | +| 0.0236 | 2.5002 | 102340 | 0.1835 | 68988456 | +| 0.0008 | 2.7502 | 112574 | 0.2039 | 75874280 | +| 0.0003 | 3.0002 | 122808 | 0.1936 | 82772304 | +| 0.0 | 3.2502 | 133042 | 0.2610 | 89675984 | +| 0.0332 | 3.5003 | 143276 | 0.2494 | 96560720 | +| 0.0 | 3.7503 | 153510 | 0.2414 | 103465808 | +| 0.0 | 4.0003 | 163744 | 0.2473 | 110357352 | +| 0.0 | 4.2503 | 173978 | 0.3375 | 117230952 | +| 0.0 | 4.5003 | 184212 | 0.3128 | 124100264 | +| 0.0727 | 4.7503 | 194446 | 0.3178 | 131030440 | + + +### Framework versions + +- Transformers 4.51.3 +- Pytorch 2.10.0+cu128 +- Datasets 4.0.0 +- Tokenizers 0.21.4 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..5e3ff23 --- /dev/null +++ b/all_results.json @@ -0,0 +1,13 @@ +{ + "epoch": 5.0, + "eval_loss": 0.11281616985797882, + "eval_runtime": 47.5121, + "eval_samples_per_second": 765.805, + "eval_steps_per_second": 95.744, + "num_input_tokens_seen": 137941664, + "total_flos": 8.054243640264622e+17, + "train_loss": 0.052412337061547826, + "train_runtime": 15894.9677, + "train_samples_per_second": 103.008, + "train_steps_per_second": 12.876 +} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..5a2b93f --- /dev/null +++ b/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.51.3", + "use_cache": false, + "vocab_size": 128256 +} diff --git a/eval_results.json b/eval_results.json new file mode 100644 index 0000000..d758b88 --- /dev/null +++ b/eval_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 5.0, + "eval_loss": 0.11281616985797882, + "eval_runtime": 47.5121, + "eval_samples_per_second": 765.805, + "eval_steps_per_second": 95.744, + "num_input_tokens_seen": 137941664 +} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..2b8ae57 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,12 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.51.3" +} diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..97f998d --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8df177d2c3f3f41781538d137ebac602187bd2c359a020c613016d434c4fa7de +size 4943274328 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..14daf45 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,26 @@ +{ + "additional_special_tokens": [ + { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..1c1d8d5 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..ddc3ce0 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2069 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- if strftime_now is defined %}\n {%- set date_string = strftime_now(\"%d %b %Y\") %}\n {%- else %}\n {%- set date_string = \"26 Jul 2024\" %}\n {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/train.yaml b/train.yaml new file mode 100644 index 0000000..9b1893c --- /dev/null +++ b/train.yaml @@ -0,0 +1,55 @@ +seed: 42 + +### model +model_name_or_path: meta-llama/Llama-3.2-1B-Instruct +trust_remote_code: true +flash_attn: auto +use_cache: false + +### method +stage: sft +do_train: true +finetuning_type: full + +### dataset +dataset: qqp +template: llama3 +cutoff_len: 2048 +overwrite_cache: true +preprocessing_num_workers: 4 +dataloader_num_workers: 4 +packing: false + +### output +output_dir: saves_bts_preliminary/base/llama-3.2-1b-instruct/train_qqp_42_1779207273 +logging_steps: 5 +save_steps: 0.05 +overwrite_output_dir: true +save_only_model: false +plot_loss: true +include_num_input_tokens_seen: true +push_to_hub: true +push_to_hub_organization: rbelanec +load_best_model_at_end: true +save_total_limit: 1 + +### train +per_device_train_batch_size: 8 +learning_rate: 2.0e-6 +num_train_epochs: 5 +weight_decay: 1.0e-2 +lr_scheduler_type: cosine +bf16: true +ddp_timeout: 180000000 +resume_from_checkpoint: null +warmup_ratio: 0.1 +optim: adamw_torch +report_to: +- wandb +run_name: base_llama-3.2-1b-instruct_train_qqp_42_1779207273 + +### eval +per_device_eval_batch_size: 8 +eval_strategy: steps +eval_steps: 0.05 +val_size: 0.1 \ No newline at end of file diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..6cb3808 --- /dev/null +++ b/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 5.0, + "num_input_tokens_seen": 137941664, + "total_flos": 8.054243640264622e+17, + "train_loss": 0.052412337061547826, + "train_runtime": 15894.9677, + "train_samples_per_second": 103.008, + "train_steps_per_second": 12.876 +} \ No newline at end of file diff --git a/trainer_log.jsonl b/trainer_log.jsonl new file mode 100644 index 0000000..65b5a53 --- /dev/null +++ b/trainer_log.jsonl @@ -0,0 +1,40953 @@ +{"current_steps": 5, "total_steps": 204665, "loss": 1.5874, "lr": 3.908731128157522e-10, "epoch": 0.00012215083184716487, "percentage": 0.0, "elapsed_time": "0:00:00", "remaining_time": "7:56:10", "throughput": 4584.47, "total_tokens": 3200} +{"current_steps": 10, "total_steps": 204665, "loss": 1.5585, "lr": 8.794645038354424e-10, "epoch": 0.00024430166369432974, "percentage": 0.0, "elapsed_time": "0:00:01", "remaining_time": "5:59:47", "throughput": 6188.56, "total_tokens": 6528} +{"current_steps": 15, "total_steps": 204665, "loss": 1.5632, "lr": 1.3680558948551327e-09, "epoch": 0.00036645249554149463, "percentage": 0.01, "elapsed_time": "0:00:01", "remaining_time": "5:19:48", "throughput": 7007.61, "total_tokens": 9856} +{"current_steps": 20, "total_steps": 204665, "loss": 1.6307, "lr": 1.8566472858748227e-09, "epoch": 0.0004886033273886595, "percentage": 0.01, "elapsed_time": "0:00:01", "remaining_time": "4:57:47", "throughput": 7366.72, "total_tokens": 12864} +{"current_steps": 25, "total_steps": 204665, "loss": 1.559, "lr": 2.345238676894513e-09, "epoch": 0.0006107541592358244, "percentage": 0.01, "elapsed_time": "0:00:02", "remaining_time": "4:49:47", "throughput": 7894.03, "total_tokens": 16768} +{"current_steps": 30, "total_steps": 204665, "loss": 1.604, "lr": 2.833830067914203e-09, "epoch": 0.0007329049910829893, "percentage": 0.01, "elapsed_time": "0:00:02", "remaining_time": "4:40:07", "throughput": 8051.77, "total_tokens": 19840} +{"current_steps": 35, "total_steps": 204665, "loss": 1.4891, "lr": 3.3224214589338933e-09, "epoch": 0.0008550558229301541, "percentage": 0.02, "elapsed_time": "0:00:02", "remaining_time": "4:34:40", "throughput": 8332.44, "total_tokens": 23488} +{"current_steps": 40, "total_steps": 204665, "loss": 1.63, "lr": 3.811012849953584e-09, "epoch": 0.000977206654777319, "percentage": 0.02, "elapsed_time": "0:00:03", "remaining_time": "4:30:03", "throughput": 8465.94, "total_tokens": 26816} +{"current_steps": 45, "total_steps": 204665, "loss": 1.6983, "lr": 4.299604240973273e-09, "epoch": 0.001099357486624484, "percentage": 0.02, "elapsed_time": "0:00:03", "remaining_time": "4:27:51", "throughput": 8619.01, "total_tokens": 30464} +{"current_steps": 50, "total_steps": 204665, "loss": 1.6001, "lr": 4.788195631992964e-09, "epoch": 0.0012215083184716488, "percentage": 0.02, "elapsed_time": "0:00:03", "remaining_time": "4:24:52", "throughput": 8717.95, "total_tokens": 33856} +{"current_steps": 55, "total_steps": 204665, "loss": 1.6222, "lr": 5.276787023012655e-09, "epoch": 0.0013436591503188137, "percentage": 0.03, "elapsed_time": "0:00:04", "remaining_time": "4:22:16", "throughput": 8805.64, "total_tokens": 37248} +{"current_steps": 60, "total_steps": 204665, "loss": 1.3772, "lr": 5.7653784140323445e-09, "epoch": 0.0014658099821659785, "percentage": 0.03, "elapsed_time": "0:00:04", "remaining_time": "4:20:47", "throughput": 8898.4, "total_tokens": 40832} +{"current_steps": 65, "total_steps": 204665, "loss": 1.7392, "lr": 6.253969805052035e-09, "epoch": 0.0015879608140131434, "percentage": 0.03, "elapsed_time": "0:00:04", "remaining_time": "4:18:47", "throughput": 8925.77, "total_tokens": 44032} +{"current_steps": 70, "total_steps": 204665, "loss": 1.5064, "lr": 6.7425611960717245e-09, "epoch": 0.0017101116458603082, "percentage": 0.03, "elapsed_time": "0:00:05", "remaining_time": "4:16:37", "throughput": 8904.81, "total_tokens": 46912} +{"current_steps": 75, "total_steps": 204665, "loss": 1.5169, "lr": 7.231152587091415e-09, "epoch": 0.0018322624777074733, "percentage": 0.04, "elapsed_time": "0:00:05", "remaining_time": "4:14:43", "throughput": 8887.18, "total_tokens": 49792} +{"current_steps": 80, "total_steps": 204665, "loss": 1.6833, "lr": 7.719743978111105e-09, "epoch": 0.001954413309554638, "percentage": 0.04, "elapsed_time": "0:00:05", "remaining_time": "4:13:12", "throughput": 8887.84, "total_tokens": 52800} +{"current_steps": 85, "total_steps": 204665, "loss": 1.7706, "lr": 8.208335369130795e-09, "epoch": 0.002076564141401803, "percentage": 0.04, "elapsed_time": "0:00:06", "remaining_time": "4:12:44", "throughput": 8999.85, "total_tokens": 56704} +{"current_steps": 90, "total_steps": 204665, "loss": 1.5604, "lr": 8.696926760150486e-09, "epoch": 0.002198714973248968, "percentage": 0.04, "elapsed_time": "0:00:06", "remaining_time": "4:11:56", "throughput": 9036.68, "total_tokens": 60096} +{"current_steps": 95, "total_steps": 204665, "loss": 1.5772, "lr": 9.185518151170177e-09, "epoch": 0.0023208658050961326, "percentage": 0.05, "elapsed_time": "0:00:06", "remaining_time": "4:11:00", "throughput": 9050.1, "total_tokens": 63296} +{"current_steps": 100, "total_steps": 204665, "loss": 1.4907, "lr": 9.674109542189865e-09, "epoch": 0.0024430166369432977, "percentage": 0.05, "elapsed_time": "0:00:07", "remaining_time": "4:10:29", "throughput": 9094.53, "total_tokens": 66816} +{"current_steps": 105, "total_steps": 204665, "loss": 1.5834, "lr": 1.0162700933209557e-08, "epoch": 0.0025651674687904623, "percentage": 0.05, "elapsed_time": "0:00:07", "remaining_time": "4:09:59", "throughput": 9127.33, "total_tokens": 70272} +{"current_steps": 110, "total_steps": 204665, "loss": 1.6402, "lr": 1.0651292324229246e-08, "epoch": 0.0026873183006376274, "percentage": 0.05, "elapsed_time": "0:00:08", "remaining_time": "4:09:15", "throughput": 9119.41, "total_tokens": 73344} +{"current_steps": 115, "total_steps": 204665, "loss": 1.3031, "lr": 1.1139883715248937e-08, "epoch": 0.0028094691324847924, "percentage": 0.06, "elapsed_time": "0:00:08", "remaining_time": "4:08:39", "throughput": 9117.69, "total_tokens": 76480} +{"current_steps": 120, "total_steps": 204665, "loss": 1.8053, "lr": 1.1628475106268627e-08, "epoch": 0.002931619964331957, "percentage": 0.06, "elapsed_time": "0:00:08", "remaining_time": "4:08:22", "throughput": 9143.17, "total_tokens": 79936} +{"current_steps": 125, "total_steps": 204665, "loss": 1.5657, "lr": 1.2117066497288317e-08, "epoch": 0.003053770796179122, "percentage": 0.06, "elapsed_time": "0:00:09", "remaining_time": "4:07:52", "throughput": 9153.63, "total_tokens": 83200} +{"current_steps": 130, "total_steps": 204665, "loss": 1.4574, "lr": 1.2605657888308008e-08, "epoch": 0.0031759216280262867, "percentage": 0.06, "elapsed_time": "0:00:09", "remaining_time": "4:07:31", "throughput": 9187.11, "total_tokens": 86720} +{"current_steps": 135, "total_steps": 204665, "loss": 1.4738, "lr": 1.3094249279327697e-08, "epoch": 0.003298072459873452, "percentage": 0.07, "elapsed_time": "0:00:09", "remaining_time": "4:07:02", "throughput": 9197.15, "total_tokens": 89984} +{"current_steps": 140, "total_steps": 204665, "loss": 1.5492, "lr": 1.3582840670347389e-08, "epoch": 0.0034202232917206164, "percentage": 0.07, "elapsed_time": "0:00:10", "remaining_time": "4:06:32", "throughput": 9196.66, "total_tokens": 93120} +{"current_steps": 145, "total_steps": 204665, "loss": 1.5314, "lr": 1.4071432061367078e-08, "epoch": 0.0035423741235677815, "percentage": 0.07, "elapsed_time": "0:00:10", "remaining_time": "4:06:39", "throughput": 9253.27, "total_tokens": 97088} +{"current_steps": 150, "total_steps": 204665, "loss": 1.3488, "lr": 1.4560023452386768e-08, "epoch": 0.0036645249554149465, "percentage": 0.07, "elapsed_time": "0:00:10", "remaining_time": "4:06:10", "throughput": 9245.64, "total_tokens": 100160} +{"current_steps": 155, "total_steps": 204665, "loss": 1.3233, "lr": 1.504861484340646e-08, "epoch": 0.003786675787262111, "percentage": 0.08, "elapsed_time": "0:00:11", "remaining_time": "4:05:53", "throughput": 9254.88, "total_tokens": 103488} +{"current_steps": 160, "total_steps": 204665, "loss": 1.4491, "lr": 1.553720623442615e-08, "epoch": 0.003908826619109276, "percentage": 0.08, "elapsed_time": "0:00:11", "remaining_time": "4:05:39", "throughput": 9262.43, "total_tokens": 106816} +{"current_steps": 165, "total_steps": 204665, "loss": 1.327, "lr": 1.6025797625445838e-08, "epoch": 0.004030977450956441, "percentage": 0.08, "elapsed_time": "0:00:11", "remaining_time": "4:05:23", "throughput": 9277.34, "total_tokens": 110208} +{"current_steps": 170, "total_steps": 204665, "loss": 1.3947, "lr": 1.651438901646553e-08, "epoch": 0.004153128282803606, "percentage": 0.08, "elapsed_time": "0:00:12", "remaining_time": "4:05:02", "throughput": 9278.55, "total_tokens": 113408} +{"current_steps": 175, "total_steps": 204665, "loss": 1.2108, "lr": 1.700298040748522e-08, "epoch": 0.0042752791146507705, "percentage": 0.09, "elapsed_time": "0:00:12", "remaining_time": "4:04:36", "throughput": 9258.53, "total_tokens": 116288} +{"current_steps": 180, "total_steps": 204665, "loss": 1.3352, "lr": 1.749157179850491e-08, "epoch": 0.004397429946497936, "percentage": 0.09, "elapsed_time": "0:00:12", "remaining_time": "4:04:15", "throughput": 9247.46, "total_tokens": 119296} +{"current_steps": 185, "total_steps": 204665, "loss": 1.3764, "lr": 1.79801631895246e-08, "epoch": 0.004519580778345101, "percentage": 0.09, "elapsed_time": "0:00:13", "remaining_time": "4:04:02", "throughput": 9256.55, "total_tokens": 122624} +{"current_steps": 190, "total_steps": 204665, "loss": 1.4257, "lr": 1.8468754580544288e-08, "epoch": 0.004641731610192265, "percentage": 0.09, "elapsed_time": "0:00:13", "remaining_time": "4:03:53", "throughput": 9277.03, "total_tokens": 126144} +{"current_steps": 195, "total_steps": 204665, "loss": 1.3153, "lr": 1.8957345971563982e-08, "epoch": 0.00476388244203943, "percentage": 0.1, "elapsed_time": "0:00:14", "remaining_time": "4:04:42", "throughput": 9401.67, "total_tokens": 131648} +{"current_steps": 200, "total_steps": 204665, "loss": 1.3291, "lr": 1.944593736258367e-08, "epoch": 0.004886033273886595, "percentage": 0.1, "elapsed_time": "0:00:14", "remaining_time": "4:04:29", "throughput": 9402.25, "total_tokens": 134912} +{"current_steps": 205, "total_steps": 204665, "loss": 1.0956, "lr": 1.9934528753603358e-08, "epoch": 0.00500818410573376, "percentage": 0.1, "elapsed_time": "0:00:14", "remaining_time": "4:04:18", "throughput": 9410.45, "total_tokens": 138304} +{"current_steps": 210, "total_steps": 204665, "loss": 0.9825, "lr": 2.0423120144623053e-08, "epoch": 0.005130334937580925, "percentage": 0.1, "elapsed_time": "0:00:15", "remaining_time": "4:04:14", "throughput": 9426.37, "total_tokens": 141888} +{"current_steps": 215, "total_steps": 204665, "loss": 0.9918, "lr": 2.091171153564274e-08, "epoch": 0.00525248576942809, "percentage": 0.11, "elapsed_time": "0:00:15", "remaining_time": "4:04:02", "throughput": 9422.18, "total_tokens": 145088} +{"current_steps": 220, "total_steps": 204665, "loss": 0.9288, "lr": 2.1400302926662432e-08, "epoch": 0.005374636601275255, "percentage": 0.11, "elapsed_time": "0:00:15", "remaining_time": "4:03:54", "throughput": 9428.35, "total_tokens": 148480} +{"current_steps": 225, "total_steps": 204665, "loss": 0.8789, "lr": 2.1888894317682123e-08, "epoch": 0.005496787433122419, "percentage": 0.11, "elapsed_time": "0:00:16", "remaining_time": "4:03:55", "throughput": 9452.74, "total_tokens": 152256} +{"current_steps": 230, "total_steps": 204665, "loss": 0.9644, "lr": 2.237748570870181e-08, "epoch": 0.005618938264969585, "percentage": 0.11, "elapsed_time": "0:00:16", "remaining_time": "4:03:45", "throughput": 9455.54, "total_tokens": 155584} +{"current_steps": 235, "total_steps": 204665, "loss": 0.9388, "lr": 2.2866077099721502e-08, "epoch": 0.0057410890968167495, "percentage": 0.11, "elapsed_time": "0:00:16", "remaining_time": "4:03:40", "throughput": 9466.51, "total_tokens": 159104} +{"current_steps": 240, "total_steps": 204665, "loss": 1.0616, "lr": 2.335466849074119e-08, "epoch": 0.005863239928663914, "percentage": 0.12, "elapsed_time": "0:00:17", "remaining_time": "4:03:27", "throughput": 9456.34, "total_tokens": 162176} +{"current_steps": 245, "total_steps": 204665, "loss": 0.7593, "lr": 2.384325988176088e-08, "epoch": 0.005985390760511079, "percentage": 0.12, "elapsed_time": "0:00:17", "remaining_time": "4:03:18", "throughput": 9455.48, "total_tokens": 165440} +{"current_steps": 250, "total_steps": 204665, "loss": 0.8342, "lr": 2.4331851272780573e-08, "epoch": 0.006107541592358244, "percentage": 0.12, "elapsed_time": "0:00:17", "remaining_time": "4:03:24", "throughput": 9466.92, "total_tokens": 169088} +{"current_steps": 255, "total_steps": 204665, "loss": 0.8074, "lr": 2.482044266380026e-08, "epoch": 0.006229692424205409, "percentage": 0.12, "elapsed_time": "0:00:18", "remaining_time": "4:03:18", "throughput": 9471.05, "total_tokens": 172480} +{"current_steps": 260, "total_steps": 204665, "loss": 0.7764, "lr": 2.5309034054819955e-08, "epoch": 0.0063518432560525735, "percentage": 0.13, "elapsed_time": "0:00:18", "remaining_time": "4:03:07", "throughput": 9467.87, "total_tokens": 175680} +{"current_steps": 265, "total_steps": 204665, "loss": 0.7722, "lr": 2.5797625445839643e-08, "epoch": 0.006473994087899739, "percentage": 0.13, "elapsed_time": "0:00:18", "remaining_time": "4:03:00", "throughput": 9469.75, "total_tokens": 179008} +{"current_steps": 270, "total_steps": 204665, "loss": 0.7188, "lr": 2.628621683685933e-08, "epoch": 0.006596144919746904, "percentage": 0.13, "elapsed_time": "0:00:19", "remaining_time": "4:02:54", "throughput": 9480.88, "total_tokens": 182528} +{"current_steps": 275, "total_steps": 204665, "loss": 0.4984, "lr": 2.6774808227879026e-08, "epoch": 0.006718295751594068, "percentage": 0.13, "elapsed_time": "0:00:19", "remaining_time": "4:02:51", "throughput": 9492.96, "total_tokens": 186112} +{"current_steps": 280, "total_steps": 204665, "loss": 0.4016, "lr": 2.7263399618898714e-08, "epoch": 0.006840446583441233, "percentage": 0.14, "elapsed_time": "0:00:19", "remaining_time": "4:02:46", "throughput": 9508.85, "total_tokens": 189760} +{"current_steps": 285, "total_steps": 204665, "loss": 0.4292, "lr": 2.7751991009918405e-08, "epoch": 0.006962597415288398, "percentage": 0.14, "elapsed_time": "0:00:20", "remaining_time": "4:02:42", "throughput": 9515.04, "total_tokens": 193216} +{"current_steps": 290, "total_steps": 204665, "loss": 0.3719, "lr": 2.8240582400938093e-08, "epoch": 0.007084748247135563, "percentage": 0.14, "elapsed_time": "0:00:20", "remaining_time": "4:02:42", "throughput": 9524.27, "total_tokens": 196800} +{"current_steps": 295, "total_steps": 204665, "loss": 0.266, "lr": 2.8729173791957784e-08, "epoch": 0.007206899078982728, "percentage": 0.14, "elapsed_time": "0:00:21", "remaining_time": "4:02:30", "throughput": 9522.67, "total_tokens": 200000} +{"current_steps": 300, "total_steps": 204665, "loss": 0.3052, "lr": 2.9217765182977475e-08, "epoch": 0.007329049910829893, "percentage": 0.15, "elapsed_time": "0:00:21", "remaining_time": "4:02:23", "throughput": 9526.74, "total_tokens": 203392} +{"current_steps": 305, "total_steps": 204665, "loss": 0.3312, "lr": 2.9706356573997163e-08, "epoch": 0.007451200742677058, "percentage": 0.15, "elapsed_time": "0:00:21", "remaining_time": "4:02:16", "throughput": 9525.72, "total_tokens": 206656} +{"current_steps": 310, "total_steps": 204665, "loss": 0.2604, "lr": 3.0194947965016854e-08, "epoch": 0.007573351574524222, "percentage": 0.15, "elapsed_time": "0:00:22", "remaining_time": "4:02:18", "throughput": 9538.87, "total_tokens": 210368} +{"current_steps": 315, "total_steps": 204665, "loss": 0.3638, "lr": 3.068353935603654e-08, "epoch": 0.007695502406371387, "percentage": 0.15, "elapsed_time": "0:00:22", "remaining_time": "4:02:13", "throughput": 9541.29, "total_tokens": 213760} +{"current_steps": 320, "total_steps": 204665, "loss": 0.2793, "lr": 3.117213074705624e-08, "epoch": 0.007817653238218552, "percentage": 0.16, "elapsed_time": "0:00:22", "remaining_time": "4:02:08", "throughput": 9544.57, "total_tokens": 217152} +{"current_steps": 325, "total_steps": 204665, "loss": 0.2819, "lr": 3.1660722138075925e-08, "epoch": 0.007939804070065717, "percentage": 0.16, "elapsed_time": "0:00:23", "remaining_time": "4:02:11", "throughput": 9559.25, "total_tokens": 220928} +{"current_steps": 330, "total_steps": 204665, "loss": 0.2614, "lr": 3.214931352909562e-08, "epoch": 0.008061954901912883, "percentage": 0.16, "elapsed_time": "0:00:23", "remaining_time": "4:02:22", "throughput": 9581.14, "total_tokens": 225024} +{"current_steps": 335, "total_steps": 204665, "loss": 0.2997, "lr": 3.263790492011531e-08, "epoch": 0.008184105733760046, "percentage": 0.16, "elapsed_time": "0:00:23", "remaining_time": "4:02:17", "throughput": 9583.75, "total_tokens": 228416} +{"current_steps": 340, "total_steps": 204665, "loss": 0.2678, "lr": 3.3126496311134995e-08, "epoch": 0.008306256565607212, "percentage": 0.17, "elapsed_time": "0:00:24", "remaining_time": "4:02:09", "throughput": 9577.25, "total_tokens": 231552} +{"current_steps": 345, "total_steps": 204665, "loss": 0.2656, "lr": 3.361508770215469e-08, "epoch": 0.008428407397454377, "percentage": 0.17, "elapsed_time": "0:00:24", "remaining_time": "4:02:01", "throughput": 9574.02, "total_tokens": 234752} +{"current_steps": 350, "total_steps": 204665, "loss": 0.2684, "lr": 3.410367909317437e-08, "epoch": 0.008550558229301541, "percentage": 0.17, "elapsed_time": "0:00:24", "remaining_time": "4:01:54", "throughput": 9572.63, "total_tokens": 238016} +{"current_steps": 355, "total_steps": 204665, "loss": 0.3036, "lr": 3.4592270484194066e-08, "epoch": 0.008672709061148707, "percentage": 0.17, "elapsed_time": "0:00:25", "remaining_time": "4:01:53", "throughput": 9578.2, "total_tokens": 241536} +{"current_steps": 360, "total_steps": 204665, "loss": 0.2641, "lr": 3.508086187521376e-08, "epoch": 0.008794859892995872, "percentage": 0.18, "elapsed_time": "0:00:25", "remaining_time": "4:01:49", "throughput": 9579.72, "total_tokens": 244928} +{"current_steps": 365, "total_steps": 204665, "loss": 0.2976, "lr": 3.556945326623344e-08, "epoch": 0.008917010724843036, "percentage": 0.18, "elapsed_time": "0:00:25", "remaining_time": "4:01:48", "throughput": 9589.62, "total_tokens": 248576} +{"current_steps": 370, "total_steps": 204665, "loss": 0.3249, "lr": 3.6058044657253136e-08, "epoch": 0.009039161556690201, "percentage": 0.18, "elapsed_time": "0:00:26", "remaining_time": "4:01:42", "throughput": 9583.43, "total_tokens": 251712} +{"current_steps": 375, "total_steps": 204665, "loss": 0.255, "lr": 3.654663604827283e-08, "epoch": 0.009161312388537367, "percentage": 0.18, "elapsed_time": "0:00:26", "remaining_time": "4:01:39", "throughput": 9589.36, "total_tokens": 255232} +{"current_steps": 380, "total_steps": 204665, "loss": 0.3003, "lr": 3.703522743929252e-08, "epoch": 0.00928346322038453, "percentage": 0.19, "elapsed_time": "0:00:26", "remaining_time": "4:01:40", "throughput": 9597.99, "total_tokens": 258880} +{"current_steps": 385, "total_steps": 204665, "loss": 0.2777, "lr": 3.7523818830312206e-08, "epoch": 0.009405614052231696, "percentage": 0.19, "elapsed_time": "0:00:27", "remaining_time": "4:01:37", "throughput": 9601.43, "total_tokens": 262336} +{"current_steps": 390, "total_steps": 204665, "loss": 0.2748, "lr": 3.80124102213319e-08, "epoch": 0.00952776488407886, "percentage": 0.19, "elapsed_time": "0:00:27", "remaining_time": "4:01:30", "throughput": 9602.8, "total_tokens": 265664} +{"current_steps": 395, "total_steps": 204665, "loss": 0.2474, "lr": 3.850100161235159e-08, "epoch": 0.009649915715926025, "percentage": 0.19, "elapsed_time": "0:00:28", "remaining_time": "4:01:28", "throughput": 9603.72, "total_tokens": 269056} +{"current_steps": 400, "total_steps": 204665, "loss": 0.2575, "lr": 3.898959300337128e-08, "epoch": 0.00977206654777319, "percentage": 0.2, "elapsed_time": "0:00:28", "remaining_time": "4:01:28", "throughput": 9609.72, "total_tokens": 272640} +{"current_steps": 405, "total_steps": 204665, "loss": 0.2952, "lr": 3.947818439439097e-08, "epoch": 0.009894217379620355, "percentage": 0.2, "elapsed_time": "0:00:28", "remaining_time": "4:01:25", "throughput": 9610.61, "total_tokens": 276032} +{"current_steps": 410, "total_steps": 204665, "loss": 0.2455, "lr": 3.996677578541066e-08, "epoch": 0.01001636821146752, "percentage": 0.2, "elapsed_time": "0:00:29", "remaining_time": "4:01:23", "throughput": 9613.63, "total_tokens": 279488} +{"current_steps": 415, "total_steps": 204665, "loss": 0.2665, "lr": 4.045536717643035e-08, "epoch": 0.010138519043314686, "percentage": 0.2, "elapsed_time": "0:00:29", "remaining_time": "4:01:18", "throughput": 9611.89, "total_tokens": 282752} +{"current_steps": 420, "total_steps": 204665, "loss": 0.244, "lr": 4.094395856745004e-08, "epoch": 0.01026066987516185, "percentage": 0.21, "elapsed_time": "0:00:29", "remaining_time": "4:01:12", "throughput": 9608.32, "total_tokens": 285952} +{"current_steps": 425, "total_steps": 204665, "loss": 0.2549, "lr": 4.143254995846973e-08, "epoch": 0.010382820707009015, "percentage": 0.21, "elapsed_time": "0:00:30", "remaining_time": "4:01:05", "throughput": 9601.51, "total_tokens": 289024} +{"current_steps": 430, "total_steps": 204665, "loss": 0.2537, "lr": 4.192114134948942e-08, "epoch": 0.01050497153885618, "percentage": 0.21, "elapsed_time": "0:00:30", "remaining_time": "4:01:02", "throughput": 9601.27, "total_tokens": 292352} +{"current_steps": 435, "total_steps": 204665, "loss": 0.2425, "lr": 4.240973274050911e-08, "epoch": 0.010627122370703344, "percentage": 0.21, "elapsed_time": "0:00:30", "remaining_time": "4:00:58", "throughput": 9599.27, "total_tokens": 295616} +{"current_steps": 440, "total_steps": 204665, "loss": 0.2613, "lr": 4.28983241315288e-08, "epoch": 0.01074927320255051, "percentage": 0.21, "elapsed_time": "0:00:31", "remaining_time": "4:00:57", "throughput": 9605.61, "total_tokens": 299200} +{"current_steps": 445, "total_steps": 204665, "loss": 0.2972, "lr": 4.3386915522548495e-08, "epoch": 0.010871424034397675, "percentage": 0.22, "elapsed_time": "0:00:31", "remaining_time": "4:00:51", "throughput": 9598.95, "total_tokens": 302272} +{"current_steps": 450, "total_steps": 204665, "loss": 0.2497, "lr": 4.3875506913568176e-08, "epoch": 0.010993574866244839, "percentage": 0.22, "elapsed_time": "0:00:31", "remaining_time": "4:00:53", "throughput": 9595.33, "total_tokens": 305600} +{"current_steps": 455, "total_steps": 204665, "loss": 0.2463, "lr": 4.436409830458787e-08, "epoch": 0.011115725698092004, "percentage": 0.22, "elapsed_time": "0:00:32", "remaining_time": "4:00:59", "throughput": 9615.05, "total_tokens": 309760} +{"current_steps": 460, "total_steps": 204665, "loss": 0.2568, "lr": 4.4852689695607565e-08, "epoch": 0.01123787652993917, "percentage": 0.22, "elapsed_time": "0:00:32", "remaining_time": "4:01:04", "throughput": 9632.63, "total_tokens": 313856} +{"current_steps": 465, "total_steps": 204665, "loss": 0.2602, "lr": 4.5341281086627246e-08, "epoch": 0.011360027361786334, "percentage": 0.23, "elapsed_time": "0:00:32", "remaining_time": "4:01:06", "throughput": 9644.06, "total_tokens": 317696} +{"current_steps": 470, "total_steps": 204665, "loss": 0.2595, "lr": 4.582987247764694e-08, "epoch": 0.011482178193633499, "percentage": 0.23, "elapsed_time": "0:00:33", "remaining_time": "4:00:59", "throughput": 9635.79, "total_tokens": 320704} +{"current_steps": 475, "total_steps": 204665, "loss": 0.2515, "lr": 4.6318463868666636e-08, "epoch": 0.011604329025480663, "percentage": 0.23, "elapsed_time": "0:00:33", "remaining_time": "4:00:57", "throughput": 9638.23, "total_tokens": 324160} +{"current_steps": 480, "total_steps": 204665, "loss": 0.2352, "lr": 4.680705525968632e-08, "epoch": 0.011726479857327828, "percentage": 0.23, "elapsed_time": "0:00:33", "remaining_time": "4:00:58", "throughput": 9648.22, "total_tokens": 327936} +{"current_steps": 485, "total_steps": 204665, "loss": 0.2296, "lr": 4.729564665070601e-08, "epoch": 0.011848630689174994, "percentage": 0.24, "elapsed_time": "0:00:34", "remaining_time": "4:00:55", "throughput": 9647.68, "total_tokens": 331264} +{"current_steps": 490, "total_steps": 204665, "loss": 0.2708, "lr": 4.7784238041725706e-08, "epoch": 0.011970781521022157, "percentage": 0.24, "elapsed_time": "0:00:34", "remaining_time": "4:00:51", "throughput": 9647.11, "total_tokens": 334592} +{"current_steps": 495, "total_steps": 204665, "loss": 0.2384, "lr": 4.8272829432745394e-08, "epoch": 0.012092932352869323, "percentage": 0.24, "elapsed_time": "0:00:35", "remaining_time": "4:00:55", "throughput": 9663.69, "total_tokens": 338688} +{"current_steps": 500, "total_steps": 204665, "loss": 0.2568, "lr": 4.876142082376508e-08, "epoch": 0.012215083184716488, "percentage": 0.24, "elapsed_time": "0:00:35", "remaining_time": "4:00:51", "throughput": 9660.01, "total_tokens": 341888} +{"current_steps": 505, "total_steps": 204665, "loss": 0.2555, "lr": 4.9250012214784776e-08, "epoch": 0.012337234016563652, "percentage": 0.25, "elapsed_time": "0:00:35", "remaining_time": "4:00:47", "throughput": 9656.43, "total_tokens": 345088} +{"current_steps": 510, "total_steps": 204665, "loss": 0.2275, "lr": 4.9738603605804464e-08, "epoch": 0.012459384848410818, "percentage": 0.25, "elapsed_time": "0:00:36", "remaining_time": "4:00:44", "throughput": 9657.38, "total_tokens": 348480} +{"current_steps": 515, "total_steps": 204665, "loss": 0.2045, "lr": 5.022719499682415e-08, "epoch": 0.012581535680257983, "percentage": 0.25, "elapsed_time": "0:00:36", "remaining_time": "4:00:42", "throughput": 9659.45, "total_tokens": 351936} +{"current_steps": 520, "total_steps": 204665, "loss": 0.2334, "lr": 5.071578638784385e-08, "epoch": 0.012703686512105147, "percentage": 0.25, "elapsed_time": "0:00:36", "remaining_time": "4:00:45", "throughput": 9667.42, "total_tokens": 355712} +{"current_steps": 525, "total_steps": 204665, "loss": 0.2546, "lr": 5.1204377778863535e-08, "epoch": 0.012825837343952312, "percentage": 0.26, "elapsed_time": "0:00:37", "remaining_time": "4:00:43", "throughput": 9671.07, "total_tokens": 359232} +{"current_steps": 530, "total_steps": 204665, "loss": 0.2338, "lr": 5.169296916988322e-08, "epoch": 0.012947988175799478, "percentage": 0.26, "elapsed_time": "0:00:37", "remaining_time": "4:00:41", "throughput": 9671.25, "total_tokens": 362624} +{"current_steps": 535, "total_steps": 204665, "loss": 0.2105, "lr": 5.218156056090292e-08, "epoch": 0.013070139007646642, "percentage": 0.26, "elapsed_time": "0:00:37", "remaining_time": "4:00:42", "throughput": 9678.27, "total_tokens": 366336} +{"current_steps": 540, "total_steps": 204665, "loss": 0.1841, "lr": 5.2670151951922605e-08, "epoch": 0.013192289839493807, "percentage": 0.26, "elapsed_time": "0:00:38", "remaining_time": "4:00:43", "throughput": 9686.36, "total_tokens": 370112} +{"current_steps": 545, "total_steps": 204665, "loss": 0.1867, "lr": 5.315874334294229e-08, "epoch": 0.013314440671340971, "percentage": 0.27, "elapsed_time": "0:00:38", "remaining_time": "4:00:38", "throughput": 9678.71, "total_tokens": 373120} +{"current_steps": 550, "total_steps": 204665, "loss": 0.2397, "lr": 5.364733473396198e-08, "epoch": 0.013436591503188136, "percentage": 0.27, "elapsed_time": "0:00:38", "remaining_time": "4:00:38", "throughput": 9684.1, "total_tokens": 376768} +{"current_steps": 555, "total_steps": 204665, "loss": 0.1875, "lr": 5.4135926124981675e-08, "epoch": 0.013558742335035302, "percentage": 0.27, "elapsed_time": "0:00:39", "remaining_time": "4:00:40", "throughput": 9691.72, "total_tokens": 380544} +{"current_steps": 560, "total_steps": 204665, "loss": 0.224, "lr": 5.462451751600137e-08, "epoch": 0.013680893166882466, "percentage": 0.27, "elapsed_time": "0:00:39", "remaining_time": "4:00:33", "throughput": 9685.35, "total_tokens": 383552} +{"current_steps": 565, "total_steps": 204665, "loss": 0.2137, "lr": 5.511310890702105e-08, "epoch": 0.013803043998729631, "percentage": 0.28, "elapsed_time": "0:00:39", "remaining_time": "4:00:35", "throughput": 9695.91, "total_tokens": 387456} +{"current_steps": 570, "total_steps": 204665, "loss": 0.1913, "lr": 5.5601700298040746e-08, "epoch": 0.013925194830576797, "percentage": 0.28, "elapsed_time": "0:00:40", "remaining_time": "4:00:38", "throughput": 9711.48, "total_tokens": 391616} +{"current_steps": 575, "total_steps": 204665, "loss": 0.1981, "lr": 5.609029168906044e-08, "epoch": 0.01404734566242396, "percentage": 0.28, "elapsed_time": "0:00:40", "remaining_time": "4:00:39", "throughput": 9719.1, "total_tokens": 395392} +{"current_steps": 580, "total_steps": 204665, "loss": 0.1901, "lr": 5.657888308008012e-08, "epoch": 0.014169496494271126, "percentage": 0.28, "elapsed_time": "0:00:41", "remaining_time": "4:00:53", "throughput": 9747.54, "total_tokens": 400384} +{"current_steps": 585, "total_steps": 204665, "loss": 0.2619, "lr": 5.7067474471099816e-08, "epoch": 0.014291647326118291, "percentage": 0.29, "elapsed_time": "0:00:41", "remaining_time": "4:00:47", "throughput": 9740.25, "total_tokens": 403392} +{"current_steps": 590, "total_steps": 204665, "loss": 0.1605, "lr": 5.755606586211951e-08, "epoch": 0.014413798157965455, "percentage": 0.29, "elapsed_time": "0:00:41", "remaining_time": "4:00:43", "throughput": 9736.88, "total_tokens": 406592} +{"current_steps": 595, "total_steps": 204665, "loss": 0.1769, "lr": 5.804465725313919e-08, "epoch": 0.01453594898981262, "percentage": 0.29, "elapsed_time": "0:00:42", "remaining_time": "4:00:40", "throughput": 9735.88, "total_tokens": 409920} +{"current_steps": 600, "total_steps": 204665, "loss": 0.2082, "lr": 5.853324864415889e-08, "epoch": 0.014658099821659786, "percentage": 0.29, "elapsed_time": "0:00:42", "remaining_time": "4:00:39", "throughput": 9738.11, "total_tokens": 413440} +{"current_steps": 605, "total_steps": 204665, "loss": 0.1786, "lr": 5.902184003517858e-08, "epoch": 0.01478025065350695, "percentage": 0.3, "elapsed_time": "0:00:42", "remaining_time": "4:00:37", "throughput": 9739.34, "total_tokens": 416896} +{"current_steps": 610, "total_steps": 204665, "loss": 0.182, "lr": 5.951043142619827e-08, "epoch": 0.014902401485354115, "percentage": 0.3, "elapsed_time": "0:00:43", "remaining_time": "4:00:36", "throughput": 9741.57, "total_tokens": 420416} +{"current_steps": 615, "total_steps": 204665, "loss": 0.1692, "lr": 5.999902281721795e-08, "epoch": 0.015024552317201281, "percentage": 0.3, "elapsed_time": "0:00:43", "remaining_time": "4:00:34", "throughput": 9744.25, "total_tokens": 423936} +{"current_steps": 620, "total_steps": 204665, "loss": 0.2483, "lr": 6.048761420823765e-08, "epoch": 0.015146703149048445, "percentage": 0.3, "elapsed_time": "0:00:43", "remaining_time": "4:00:36", "throughput": 9752.19, "total_tokens": 427776} +{"current_steps": 625, "total_steps": 204665, "loss": 0.1979, "lr": 6.097620559925734e-08, "epoch": 0.01526885398089561, "percentage": 0.31, "elapsed_time": "0:00:44", "remaining_time": "4:00:37", "throughput": 9759.65, "total_tokens": 431616} +{"current_steps": 630, "total_steps": 204665, "loss": 0.2048, "lr": 6.146479699027702e-08, "epoch": 0.015391004812742774, "percentage": 0.31, "elapsed_time": "0:00:44", "remaining_time": "4:00:34", "throughput": 9758.47, "total_tokens": 434944} +{"current_steps": 635, "total_steps": 204665, "loss": 0.2058, "lr": 6.195338838129672e-08, "epoch": 0.01551315564458994, "percentage": 0.31, "elapsed_time": "0:00:44", "remaining_time": "4:00:32", "throughput": 9757.38, "total_tokens": 438272} +{"current_steps": 640, "total_steps": 204665, "loss": 0.2192, "lr": 6.244197977231641e-08, "epoch": 0.015635306476437103, "percentage": 0.31, "elapsed_time": "0:00:45", "remaining_time": "4:00:30", "throughput": 9759.75, "total_tokens": 441792} +{"current_steps": 645, "total_steps": 204665, "loss": 0.2148, "lr": 6.293057116333609e-08, "epoch": 0.01575745730828427, "percentage": 0.32, "elapsed_time": "0:00:45", "remaining_time": "4:00:26", "throughput": 9753.98, "total_tokens": 444864} +{"current_steps": 650, "total_steps": 204665, "loss": 0.153, "lr": 6.34191625543558e-08, "epoch": 0.015879608140131434, "percentage": 0.32, "elapsed_time": "0:00:45", "remaining_time": "4:00:27", "throughput": 9757.53, "total_tokens": 448512} +{"current_steps": 655, "total_steps": 204665, "loss": 0.1999, "lr": 6.390775394537548e-08, "epoch": 0.0160017589719786, "percentage": 0.32, "elapsed_time": "0:00:46", "remaining_time": "4:00:22", "throughput": 9749.67, "total_tokens": 451456} +{"current_steps": 660, "total_steps": 204665, "loss": 0.205, "lr": 6.439634533639516e-08, "epoch": 0.016123909803825765, "percentage": 0.32, "elapsed_time": "0:00:46", "remaining_time": "4:00:21", "throughput": 9750.31, "total_tokens": 454912} +{"current_steps": 665, "total_steps": 204665, "loss": 0.1934, "lr": 6.488493672741487e-08, "epoch": 0.01624606063567293, "percentage": 0.32, "elapsed_time": "0:00:47", "remaining_time": "4:00:23", "throughput": 9755.86, "total_tokens": 458688} +{"current_steps": 670, "total_steps": 204665, "loss": 0.1651, "lr": 6.537352811843455e-08, "epoch": 0.016368211467520093, "percentage": 0.33, "elapsed_time": "0:00:47", "remaining_time": "4:00:16", "throughput": 9746.65, "total_tokens": 461504} +{"current_steps": 675, "total_steps": 204665, "loss": 0.2466, "lr": 6.586211950945423e-08, "epoch": 0.016490362299367258, "percentage": 0.33, "elapsed_time": "0:00:47", "remaining_time": "4:00:11", "throughput": 9740.63, "total_tokens": 464512} +{"current_steps": 680, "total_steps": 204665, "loss": 0.2258, "lr": 6.635071090047394e-08, "epoch": 0.016612513131214424, "percentage": 0.33, "elapsed_time": "0:00:48", "remaining_time": "4:00:07", "throughput": 9735.31, "total_tokens": 467584} +{"current_steps": 685, "total_steps": 204665, "loss": 0.2461, "lr": 6.683930229149362e-08, "epoch": 0.01673466396306159, "percentage": 0.33, "elapsed_time": "0:00:48", "remaining_time": "4:00:06", "throughput": 9732.55, "total_tokens": 470848} +{"current_steps": 690, "total_steps": 204665, "loss": 0.216, "lr": 6.73278936825133e-08, "epoch": 0.016856814794908755, "percentage": 0.34, "elapsed_time": "0:00:48", "remaining_time": "4:00:02", "throughput": 9727.5, "total_tokens": 473920} +{"current_steps": 695, "total_steps": 204665, "loss": 0.2186, "lr": 6.781648507353301e-08, "epoch": 0.016978965626755917, "percentage": 0.34, "elapsed_time": "0:00:49", "remaining_time": "4:00:00", "throughput": 9725.33, "total_tokens": 477184} +{"current_steps": 700, "total_steps": 204665, "loss": 0.2767, "lr": 6.830507646455269e-08, "epoch": 0.017101116458603082, "percentage": 0.34, "elapsed_time": "0:00:49", "remaining_time": "4:00:15", "throughput": 9756.63, "total_tokens": 482688} +{"current_steps": 705, "total_steps": 204665, "loss": 0.1857, "lr": 6.879366785557237e-08, "epoch": 0.017223267290450248, "percentage": 0.34, "elapsed_time": "0:00:49", "remaining_time": "4:00:12", "throughput": 9753.48, "total_tokens": 485888} +{"current_steps": 710, "total_steps": 204665, "loss": 0.206, "lr": 6.928225924659208e-08, "epoch": 0.017345418122297413, "percentage": 0.35, "elapsed_time": "0:00:50", "remaining_time": "4:00:12", "throughput": 9756.89, "total_tokens": 489536} +{"current_steps": 715, "total_steps": 204665, "loss": 0.1713, "lr": 6.977085063761176e-08, "epoch": 0.01746756895414458, "percentage": 0.35, "elapsed_time": "0:00:50", "remaining_time": "4:00:11", "throughput": 9758.72, "total_tokens": 493056} +{"current_steps": 720, "total_steps": 204665, "loss": 0.2117, "lr": 7.025944202863144e-08, "epoch": 0.017589719785991744, "percentage": 0.35, "elapsed_time": "0:00:50", "remaining_time": "4:00:09", "throughput": 9755.08, "total_tokens": 496256} +{"current_steps": 725, "total_steps": 204665, "loss": 0.1537, "lr": 7.074803341965115e-08, "epoch": 0.017711870617838906, "percentage": 0.35, "elapsed_time": "0:00:51", "remaining_time": "4:00:10", "throughput": 9762.91, "total_tokens": 500160} +{"current_steps": 730, "total_steps": 204665, "loss": 0.2369, "lr": 7.123662481067083e-08, "epoch": 0.01783402144968607, "percentage": 0.36, "elapsed_time": "0:00:51", "remaining_time": "4:00:08", "throughput": 9762.32, "total_tokens": 503488} +{"current_steps": 735, "total_steps": 204665, "loss": 0.2371, "lr": 7.172521620169051e-08, "epoch": 0.017956172281533237, "percentage": 0.36, "elapsed_time": "0:00:51", "remaining_time": "4:00:03", "throughput": 9757.6, "total_tokens": 506560} +{"current_steps": 740, "total_steps": 204665, "loss": 0.1702, "lr": 7.221380759271022e-08, "epoch": 0.018078323113380403, "percentage": 0.36, "elapsed_time": "0:00:52", "remaining_time": "4:00:01", "throughput": 9756.55, "total_tokens": 509888} +{"current_steps": 745, "total_steps": 204665, "loss": 0.2225, "lr": 7.27023989837299e-08, "epoch": 0.018200473945227568, "percentage": 0.36, "elapsed_time": "0:00:52", "remaining_time": "3:59:57", "throughput": 9751.82, "total_tokens": 512960} +{"current_steps": 750, "total_steps": 204665, "loss": 0.2422, "lr": 7.31909903747496e-08, "epoch": 0.018322624777074734, "percentage": 0.37, "elapsed_time": "0:00:52", "remaining_time": "3:59:57", "throughput": 9752.27, "total_tokens": 516416} +{"current_steps": 755, "total_steps": 204665, "loss": 0.2441, "lr": 7.367958176576929e-08, "epoch": 0.018444775608921896, "percentage": 0.37, "elapsed_time": "0:00:53", "remaining_time": "3:59:55", "throughput": 9750.15, "total_tokens": 519680} +{"current_steps": 760, "total_steps": 204665, "loss": 0.1828, "lr": 7.416817315678897e-08, "epoch": 0.01856692644076906, "percentage": 0.37, "elapsed_time": "0:00:53", "remaining_time": "3:59:52", "throughput": 9748.42, "total_tokens": 522944} +{"current_steps": 765, "total_steps": 204665, "loss": 0.1292, "lr": 7.465676454780867e-08, "epoch": 0.018689077272616227, "percentage": 0.37, "elapsed_time": "0:00:53", "remaining_time": "3:59:50", "throughput": 9745.23, "total_tokens": 526144} +{"current_steps": 770, "total_steps": 204665, "loss": 0.1908, "lr": 7.514535593882836e-08, "epoch": 0.018811228104463392, "percentage": 0.38, "elapsed_time": "0:00:54", "remaining_time": "3:59:46", "throughput": 9739.54, "total_tokens": 529152} +{"current_steps": 775, "total_steps": 204665, "loss": 0.2619, "lr": 7.563394732984804e-08, "epoch": 0.018933378936310558, "percentage": 0.38, "elapsed_time": "0:00:54", "remaining_time": "3:59:45", "throughput": 9740.42, "total_tokens": 532608} +{"current_steps": 780, "total_steps": 204665, "loss": 0.1843, "lr": 7.612253872086774e-08, "epoch": 0.01905552976815772, "percentage": 0.38, "elapsed_time": "0:00:55", "remaining_time": "3:59:44", "throughput": 9745.52, "total_tokens": 536320} +{"current_steps": 785, "total_steps": 204665, "loss": 0.1081, "lr": 7.661113011188742e-08, "epoch": 0.019177680600004885, "percentage": 0.38, "elapsed_time": "0:00:55", "remaining_time": "3:59:42", "throughput": 9742.81, "total_tokens": 539520} +{"current_steps": 790, "total_steps": 204665, "loss": 0.2077, "lr": 7.709972150290711e-08, "epoch": 0.01929983143185205, "percentage": 0.39, "elapsed_time": "0:00:55", "remaining_time": "3:59:40", "throughput": 9741.82, "total_tokens": 542848} +{"current_steps": 795, "total_steps": 204665, "loss": 0.1838, "lr": 7.758831289392681e-08, "epoch": 0.019421982263699216, "percentage": 0.39, "elapsed_time": "0:00:56", "remaining_time": "3:59:39", "throughput": 9742.42, "total_tokens": 546304} +{"current_steps": 800, "total_steps": 204665, "loss": 0.2594, "lr": 7.807690428494649e-08, "epoch": 0.01954413309554638, "percentage": 0.39, "elapsed_time": "0:00:56", "remaining_time": "3:59:34", "throughput": 9734.82, "total_tokens": 549120} +{"current_steps": 805, "total_steps": 204665, "loss": 0.1731, "lr": 7.856549567596618e-08, "epoch": 0.019666283927393547, "percentage": 0.39, "elapsed_time": "0:00:56", "remaining_time": "3:59:34", "throughput": 9739.21, "total_tokens": 552832} +{"current_steps": 810, "total_steps": 204665, "loss": 0.1918, "lr": 7.905408706698588e-08, "epoch": 0.01978843475924071, "percentage": 0.4, "elapsed_time": "0:00:57", "remaining_time": "3:59:33", "throughput": 9739.04, "total_tokens": 556224} +{"current_steps": 815, "total_steps": 204665, "loss": 0.1981, "lr": 7.954267845800556e-08, "epoch": 0.019910585591087875, "percentage": 0.4, "elapsed_time": "0:00:57", "remaining_time": "3:59:31", "throughput": 9738.35, "total_tokens": 559552} +{"current_steps": 820, "total_steps": 204665, "loss": 0.1791, "lr": 8.003126984902525e-08, "epoch": 0.02003273642293504, "percentage": 0.4, "elapsed_time": "0:00:57", "remaining_time": "3:59:29", "throughput": 9735.87, "total_tokens": 562752} +{"current_steps": 825, "total_steps": 204665, "loss": 0.1723, "lr": 8.051986124004495e-08, "epoch": 0.020154887254782206, "percentage": 0.4, "elapsed_time": "0:00:58", "remaining_time": "3:59:27", "throughput": 9735.0, "total_tokens": 566080} +{"current_steps": 830, "total_steps": 204665, "loss": 0.2432, "lr": 8.100845263106463e-08, "epoch": 0.02027703808662937, "percentage": 0.41, "elapsed_time": "0:00:58", "remaining_time": "3:59:25", "throughput": 9734.32, "total_tokens": 569408} +{"current_steps": 835, "total_steps": 204665, "loss": 0.1827, "lr": 8.149704402208433e-08, "epoch": 0.020399188918476537, "percentage": 0.41, "elapsed_time": "0:00:58", "remaining_time": "3:59:24", "throughput": 9736.15, "total_tokens": 572928} +{"current_steps": 840, "total_steps": 204665, "loss": 0.256, "lr": 8.198563541310402e-08, "epoch": 0.0205213397503237, "percentage": 0.41, "elapsed_time": "0:00:59", "remaining_time": "3:59:23", "throughput": 9738.08, "total_tokens": 576448} +{"current_steps": 845, "total_steps": 204665, "loss": 0.2329, "lr": 8.24742268041237e-08, "epoch": 0.020643490582170864, "percentage": 0.41, "elapsed_time": "0:00:59", "remaining_time": "3:59:21", "throughput": 9736.64, "total_tokens": 579712} +{"current_steps": 850, "total_steps": 204665, "loss": 0.1903, "lr": 8.29628181951434e-08, "epoch": 0.02076564141401803, "percentage": 0.42, "elapsed_time": "0:00:59", "remaining_time": "3:59:20", "throughput": 9736.47, "total_tokens": 583104} +{"current_steps": 855, "total_steps": 204665, "loss": 0.2145, "lr": 8.345140958616309e-08, "epoch": 0.020887792245865195, "percentage": 0.42, "elapsed_time": "0:01:00", "remaining_time": "3:59:19", "throughput": 9737.34, "total_tokens": 586560} +{"current_steps": 860, "total_steps": 204665, "loss": 0.1631, "lr": 8.394000097718277e-08, "epoch": 0.02100994307771236, "percentage": 0.42, "elapsed_time": "0:01:00", "remaining_time": "3:59:19", "throughput": 9742.36, "total_tokens": 590336} +{"current_steps": 865, "total_steps": 204665, "loss": 0.2079, "lr": 8.442859236820248e-08, "epoch": 0.021132093909559523, "percentage": 0.42, "elapsed_time": "0:01:00", "remaining_time": "3:59:18", "throughput": 9742.2, "total_tokens": 593728} +{"current_steps": 870, "total_steps": 204665, "loss": 0.1904, "lr": 8.491718375922216e-08, "epoch": 0.021254244741406688, "percentage": 0.43, "elapsed_time": "0:01:01", "remaining_time": "3:59:16", "throughput": 9741.5, "total_tokens": 597056} +{"current_steps": 875, "total_steps": 204665, "loss": 0.1724, "lr": 8.540577515024184e-08, "epoch": 0.021376395573253854, "percentage": 0.43, "elapsed_time": "0:01:01", "remaining_time": "3:59:15", "throughput": 9740.66, "total_tokens": 600384} +{"current_steps": 880, "total_steps": 204665, "loss": 0.137, "lr": 8.589436654126155e-08, "epoch": 0.02149854640510102, "percentage": 0.43, "elapsed_time": "0:01:01", "remaining_time": "3:59:15", "throughput": 9742.57, "total_tokens": 603968} +{"current_steps": 885, "total_steps": 204665, "loss": 0.2714, "lr": 8.638295793228123e-08, "epoch": 0.021620697236948185, "percentage": 0.43, "elapsed_time": "0:01:02", "remaining_time": "3:59:14", "throughput": 9742.4, "total_tokens": 607360} +{"current_steps": 890, "total_steps": 204665, "loss": 0.2029, "lr": 8.687154932330091e-08, "epoch": 0.02174284806879535, "percentage": 0.43, "elapsed_time": "0:01:02", "remaining_time": "3:59:15", "throughput": 9745.23, "total_tokens": 611008} +{"current_steps": 895, "total_steps": 204665, "loss": 0.267, "lr": 8.736014071432062e-08, "epoch": 0.021864998900642512, "percentage": 0.44, "elapsed_time": "0:01:03", "remaining_time": "3:59:12", "throughput": 9741.42, "total_tokens": 614080} +{"current_steps": 900, "total_steps": 204665, "loss": 0.2438, "lr": 8.78487321053403e-08, "epoch": 0.021987149732489678, "percentage": 0.44, "elapsed_time": "0:01:03", "remaining_time": "3:59:10", "throughput": 9740.68, "total_tokens": 617408} +{"current_steps": 905, "total_steps": 204665, "loss": 0.2295, "lr": 8.833732349635998e-08, "epoch": 0.022109300564336843, "percentage": 0.44, "elapsed_time": "0:01:03", "remaining_time": "3:59:10", "throughput": 9743.89, "total_tokens": 621056} +{"current_steps": 910, "total_steps": 204665, "loss": 0.1759, "lr": 8.882591488737969e-08, "epoch": 0.02223145139618401, "percentage": 0.44, "elapsed_time": "0:01:04", "remaining_time": "3:59:07", "throughput": 9739.16, "total_tokens": 624064} +{"current_steps": 915, "total_steps": 204665, "loss": 0.1532, "lr": 8.931450627839937e-08, "epoch": 0.022353602228031174, "percentage": 0.45, "elapsed_time": "0:01:04", "remaining_time": "3:59:06", "throughput": 9739.9, "total_tokens": 627520} +{"current_steps": 920, "total_steps": 204665, "loss": 0.1624, "lr": 8.980309766941905e-08, "epoch": 0.02247575305987834, "percentage": 0.45, "elapsed_time": "0:01:04", "remaining_time": "3:59:06", "throughput": 9743.4, "total_tokens": 631168} +{"current_steps": 925, "total_steps": 204665, "loss": 0.1618, "lr": 9.029168906043876e-08, "epoch": 0.0225979038917255, "percentage": 0.45, "elapsed_time": "0:01:05", "remaining_time": "3:59:03", "throughput": 9739.68, "total_tokens": 634240} +{"current_steps": 930, "total_steps": 204665, "loss": 0.1677, "lr": 9.078028045145844e-08, "epoch": 0.022720054723572667, "percentage": 0.45, "elapsed_time": "0:01:05", "remaining_time": "3:59:02", "throughput": 9740.14, "total_tokens": 637696} +{"current_steps": 935, "total_steps": 204665, "loss": 0.1632, "lr": 9.126887184247812e-08, "epoch": 0.022842205555419832, "percentage": 0.46, "elapsed_time": "0:01:05", "remaining_time": "3:59:02", "throughput": 9741.62, "total_tokens": 641216} +{"current_steps": 940, "total_steps": 204665, "loss": 0.1576, "lr": 9.175746323349783e-08, "epoch": 0.022964356387266998, "percentage": 0.46, "elapsed_time": "0:01:06", "remaining_time": "3:59:00", "throughput": 9740.84, "total_tokens": 644544} +{"current_steps": 945, "total_steps": 204665, "loss": 0.2266, "lr": 9.224605462451751e-08, "epoch": 0.023086507219114163, "percentage": 0.46, "elapsed_time": "0:01:06", "remaining_time": "3:58:58", "throughput": 9735.93, "total_tokens": 647552} +{"current_steps": 950, "total_steps": 204665, "loss": 0.2041, "lr": 9.27346460155372e-08, "epoch": 0.023208658050961326, "percentage": 0.46, "elapsed_time": "0:01:06", "remaining_time": "3:58:59", "throughput": 9739.34, "total_tokens": 651264} +{"current_steps": 955, "total_steps": 204665, "loss": 0.1553, "lr": 9.32232374065569e-08, "epoch": 0.02333080888280849, "percentage": 0.47, "elapsed_time": "0:01:07", "remaining_time": "3:58:57", "throughput": 9738.12, "total_tokens": 654528} +{"current_steps": 960, "total_steps": 204665, "loss": 0.2058, "lr": 9.371182879757658e-08, "epoch": 0.023452959714655656, "percentage": 0.47, "elapsed_time": "0:01:07", "remaining_time": "3:58:56", "throughput": 9739.74, "total_tokens": 658048} +{"current_steps": 965, "total_steps": 204665, "loss": 0.1971, "lr": 9.420042018859626e-08, "epoch": 0.023575110546502822, "percentage": 0.47, "elapsed_time": "0:01:07", "remaining_time": "3:58:54", "throughput": 9738.13, "total_tokens": 661312} +{"current_steps": 970, "total_steps": 204665, "loss": 0.1805, "lr": 9.468901157961596e-08, "epoch": 0.023697261378349987, "percentage": 0.47, "elapsed_time": "0:01:08", "remaining_time": "3:58:50", "throughput": 9731.88, "total_tokens": 664128} +{"current_steps": 975, "total_steps": 204665, "loss": 0.2021, "lr": 9.517760297063565e-08, "epoch": 0.023819412210197153, "percentage": 0.48, "elapsed_time": "0:01:08", "remaining_time": "3:58:48", "throughput": 9729.46, "total_tokens": 667328} +{"current_steps": 980, "total_steps": 204665, "loss": 0.1853, "lr": 9.566619436165535e-08, "epoch": 0.023941563042044315, "percentage": 0.48, "elapsed_time": "0:01:08", "remaining_time": "3:58:46", "throughput": 9726.63, "total_tokens": 670464} +{"current_steps": 985, "total_steps": 204665, "loss": 0.1505, "lr": 9.615478575267503e-08, "epoch": 0.02406371387389148, "percentage": 0.48, "elapsed_time": "0:01:09", "remaining_time": "3:58:46", "throughput": 9727.97, "total_tokens": 673984} +{"current_steps": 990, "total_steps": 204665, "loss": 0.1624, "lr": 9.664337714369472e-08, "epoch": 0.024185864705738646, "percentage": 0.48, "elapsed_time": "0:01:09", "remaining_time": "3:58:44", "throughput": 9724.2, "total_tokens": 677056} +{"current_steps": 995, "total_steps": 204665, "loss": 0.1981, "lr": 9.713196853471442e-08, "epoch": 0.02430801553758581, "percentage": 0.49, "elapsed_time": "0:01:09", "remaining_time": "3:58:44", "throughput": 9726.96, "total_tokens": 680704} +{"current_steps": 1000, "total_steps": 204665, "loss": 0.2078, "lr": 9.76205599257341e-08, "epoch": 0.024430166369432977, "percentage": 0.49, "elapsed_time": "0:01:10", "remaining_time": "3:58:43", "throughput": 9725.5, "total_tokens": 683968} +{"current_steps": 1005, "total_steps": 204665, "loss": 0.1137, "lr": 9.81091513167538e-08, "epoch": 0.024552317201280142, "percentage": 0.49, "elapsed_time": "0:01:10", "remaining_time": "3:58:43", "throughput": 9727.71, "total_tokens": 687552} +{"current_steps": 1010, "total_steps": 204665, "loss": 0.1055, "lr": 9.859774270777349e-08, "epoch": 0.024674468033127304, "percentage": 0.49, "elapsed_time": "0:01:11", "remaining_time": "3:58:40", "throughput": 9723.99, "total_tokens": 690624} +{"current_steps": 1015, "total_steps": 204665, "loss": 0.1542, "lr": 9.908633409879317e-08, "epoch": 0.02479661886497447, "percentage": 0.5, "elapsed_time": "0:01:11", "remaining_time": "3:58:40", "throughput": 9723.93, "total_tokens": 694016} +{"current_steps": 1020, "total_steps": 204665, "loss": 0.1864, "lr": 9.957492548981286e-08, "epoch": 0.024918769696821635, "percentage": 0.5, "elapsed_time": "0:01:11", "remaining_time": "3:58:37", "throughput": 9721.51, "total_tokens": 697152} +{"current_steps": 1025, "total_steps": 204665, "loss": 0.2495, "lr": 1.0006351688083256e-07, "epoch": 0.0250409205286688, "percentage": 0.5, "elapsed_time": "0:01:12", "remaining_time": "3:58:37", "throughput": 9722.75, "total_tokens": 700672} +{"current_steps": 1030, "total_steps": 204665, "loss": 0.1592, "lr": 1.0055210827185224e-07, "epoch": 0.025163071360515966, "percentage": 0.5, "elapsed_time": "0:01:12", "remaining_time": "3:58:37", "throughput": 9724.83, "total_tokens": 704256} +{"current_steps": 1035, "total_steps": 204665, "loss": 0.1961, "lr": 1.0104069966287194e-07, "epoch": 0.02528522219236313, "percentage": 0.51, "elapsed_time": "0:01:12", "remaining_time": "3:58:36", "throughput": 9725.63, "total_tokens": 707712} +{"current_steps": 1040, "total_steps": 204665, "loss": 0.1893, "lr": 1.0152929105389163e-07, "epoch": 0.025407373024210294, "percentage": 0.51, "elapsed_time": "0:01:13", "remaining_time": "3:58:36", "throughput": 9726.91, "total_tokens": 711232} +{"current_steps": 1045, "total_steps": 204665, "loss": 0.1576, "lr": 1.0201788244491131e-07, "epoch": 0.02552952385605746, "percentage": 0.51, "elapsed_time": "0:01:13", "remaining_time": "3:58:35", "throughput": 9726.29, "total_tokens": 714560} +{"current_steps": 1050, "total_steps": 204665, "loss": 0.1167, "lr": 1.02506473835931e-07, "epoch": 0.025651674687904625, "percentage": 0.51, "elapsed_time": "0:01:13", "remaining_time": "3:58:35", "throughput": 9728.27, "total_tokens": 718144} +{"current_steps": 1055, "total_steps": 204665, "loss": 0.1414, "lr": 1.029950652269507e-07, "epoch": 0.02577382551975179, "percentage": 0.52, "elapsed_time": "0:01:14", "remaining_time": "3:58:34", "throughput": 9729.31, "total_tokens": 721600} +{"current_steps": 1060, "total_steps": 204665, "loss": 0.3008, "lr": 1.0348365661797038e-07, "epoch": 0.025895976351598956, "percentage": 0.52, "elapsed_time": "0:01:14", "remaining_time": "3:58:32", "throughput": 9728.68, "total_tokens": 724928} +{"current_steps": 1065, "total_steps": 204665, "loss": 0.218, "lr": 1.0397224800899008e-07, "epoch": 0.026018127183446118, "percentage": 0.52, "elapsed_time": "0:01:14", "remaining_time": "3:58:33", "throughput": 9732.91, "total_tokens": 728704} +{"current_steps": 1070, "total_steps": 204665, "loss": 0.1445, "lr": 1.0446083940000977e-07, "epoch": 0.026140278015293283, "percentage": 0.52, "elapsed_time": "0:01:15", "remaining_time": "3:58:31", "throughput": 9731.53, "total_tokens": 731968} +{"current_steps": 1075, "total_steps": 204665, "loss": 0.1742, "lr": 1.0494943079102945e-07, "epoch": 0.02626242884714045, "percentage": 0.53, "elapsed_time": "0:01:15", "remaining_time": "3:58:29", "throughput": 9727.56, "total_tokens": 734976} +{"current_steps": 1080, "total_steps": 204665, "loss": 0.2347, "lr": 1.0543802218204915e-07, "epoch": 0.026384579678987614, "percentage": 0.53, "elapsed_time": "0:01:15", "remaining_time": "3:58:27", "throughput": 9725.68, "total_tokens": 738176} +{"current_steps": 1085, "total_steps": 204665, "loss": 0.1286, "lr": 1.0592661357306884e-07, "epoch": 0.02650673051083478, "percentage": 0.53, "elapsed_time": "0:01:16", "remaining_time": "3:58:29", "throughput": 9730.67, "total_tokens": 742080} +{"current_steps": 1090, "total_steps": 204665, "loss": 0.1454, "lr": 1.0641520496408852e-07, "epoch": 0.026628881342681942, "percentage": 0.53, "elapsed_time": "0:01:16", "remaining_time": "3:58:26", "throughput": 9727.6, "total_tokens": 745152} +{"current_steps": 1095, "total_steps": 204665, "loss": 0.2185, "lr": 1.0690379635510823e-07, "epoch": 0.026751032174529107, "percentage": 0.54, "elapsed_time": "0:01:16", "remaining_time": "3:58:25", "throughput": 9728.32, "total_tokens": 748608} +{"current_steps": 1100, "total_steps": 204665, "loss": 0.2498, "lr": 1.0739238774612791e-07, "epoch": 0.026873183006376273, "percentage": 0.54, "elapsed_time": "0:01:17", "remaining_time": "3:58:26", "throughput": 9730.0, "total_tokens": 752192} +{"current_steps": 1105, "total_steps": 204665, "loss": 0.2033, "lr": 1.0788097913714759e-07, "epoch": 0.02699533383822344, "percentage": 0.54, "elapsed_time": "0:01:17", "remaining_time": "3:58:27", "throughput": 9735.08, "total_tokens": 756096} +{"current_steps": 1110, "total_steps": 204665, "loss": 0.1271, "lr": 1.083695705281673e-07, "epoch": 0.027117484670070604, "percentage": 0.54, "elapsed_time": "0:01:18", "remaining_time": "3:58:25", "throughput": 9733.27, "total_tokens": 759296} +{"current_steps": 1115, "total_steps": 204665, "loss": 0.1702, "lr": 1.0885816191918698e-07, "epoch": 0.02723963550191777, "percentage": 0.54, "elapsed_time": "0:01:18", "remaining_time": "3:58:23", "throughput": 9730.16, "total_tokens": 762368} +{"current_steps": 1120, "total_steps": 204665, "loss": 0.1896, "lr": 1.0934675331020666e-07, "epoch": 0.02736178633376493, "percentage": 0.55, "elapsed_time": "0:01:18", "remaining_time": "3:58:21", "throughput": 9728.4, "total_tokens": 765568} +{"current_steps": 1125, "total_steps": 204665, "loss": 0.1672, "lr": 1.0983534470122637e-07, "epoch": 0.027483937165612097, "percentage": 0.55, "elapsed_time": "0:01:19", "remaining_time": "3:58:20", "throughput": 9728.29, "total_tokens": 768960} +{"current_steps": 1130, "total_steps": 204665, "loss": 0.1767, "lr": 1.1032393609224605e-07, "epoch": 0.027606087997459262, "percentage": 0.55, "elapsed_time": "0:01:19", "remaining_time": "3:58:19", "throughput": 9728.42, "total_tokens": 772352} +{"current_steps": 1135, "total_steps": 204665, "loss": 0.2014, "lr": 1.1081252748326573e-07, "epoch": 0.027728238829306428, "percentage": 0.55, "elapsed_time": "0:01:19", "remaining_time": "3:58:18", "throughput": 9727.14, "total_tokens": 775616} +{"current_steps": 1140, "total_steps": 204665, "loss": 0.2096, "lr": 1.1130111887428544e-07, "epoch": 0.027850389661153593, "percentage": 0.56, "elapsed_time": "0:01:20", "remaining_time": "3:58:16", "throughput": 9726.13, "total_tokens": 778880} +{"current_steps": 1145, "total_steps": 204665, "loss": 0.161, "lr": 1.1178971026530512e-07, "epoch": 0.02797254049300076, "percentage": 0.56, "elapsed_time": "0:01:20", "remaining_time": "3:58:14", "throughput": 9721.66, "total_tokens": 781824} +{"current_steps": 1150, "total_steps": 204665, "loss": 0.1846, "lr": 1.122783016563248e-07, "epoch": 0.02809469132484792, "percentage": 0.56, "elapsed_time": "0:01:20", "remaining_time": "3:58:13", "throughput": 9721.15, "total_tokens": 785152} +{"current_steps": 1155, "total_steps": 204665, "loss": 0.2438, "lr": 1.1276689304734451e-07, "epoch": 0.028216842156695086, "percentage": 0.56, "elapsed_time": "0:01:21", "remaining_time": "3:58:11", "throughput": 9717.5, "total_tokens": 788160} +{"current_steps": 1160, "total_steps": 204665, "loss": 0.1536, "lr": 1.1325548443836419e-07, "epoch": 0.028338992988542252, "percentage": 0.57, "elapsed_time": "0:01:21", "remaining_time": "3:58:11", "throughput": 9719.97, "total_tokens": 791808} +{"current_steps": 1165, "total_steps": 204665, "loss": 0.1637, "lr": 1.1374407582938387e-07, "epoch": 0.028461143820389417, "percentage": 0.57, "elapsed_time": "0:01:21", "remaining_time": "3:58:11", "throughput": 9721.81, "total_tokens": 795392} +{"current_steps": 1170, "total_steps": 204665, "loss": 0.2223, "lr": 1.1423266722040357e-07, "epoch": 0.028583294652236583, "percentage": 0.57, "elapsed_time": "0:01:22", "remaining_time": "3:58:12", "throughput": 9725.48, "total_tokens": 799168} +{"current_steps": 1175, "total_steps": 204665, "loss": 0.2553, "lr": 1.1472125861142326e-07, "epoch": 0.028705445484083745, "percentage": 0.57, "elapsed_time": "0:01:22", "remaining_time": "3:58:11", "throughput": 9726.36, "total_tokens": 802624} +{"current_steps": 1180, "total_steps": 204665, "loss": 0.1225, "lr": 1.1520985000244294e-07, "epoch": 0.02882759631593091, "percentage": 0.58, "elapsed_time": "0:01:22", "remaining_time": "3:58:12", "throughput": 9729.82, "total_tokens": 806400} +{"current_steps": 1185, "total_steps": 204665, "loss": 0.1684, "lr": 1.1569844139346264e-07, "epoch": 0.028949747147778076, "percentage": 0.58, "elapsed_time": "0:01:23", "remaining_time": "3:58:11", "throughput": 9732.52, "total_tokens": 810048} +{"current_steps": 1190, "total_steps": 204665, "loss": 0.1959, "lr": 1.1618703278448233e-07, "epoch": 0.02907189797962524, "percentage": 0.58, "elapsed_time": "0:01:23", "remaining_time": "3:58:09", "throughput": 9731.06, "total_tokens": 813248} +{"current_steps": 1195, "total_steps": 204665, "loss": 0.1737, "lr": 1.1667562417550202e-07, "epoch": 0.029194048811472407, "percentage": 0.58, "elapsed_time": "0:01:23", "remaining_time": "3:58:08", "throughput": 9727.93, "total_tokens": 816320} +{"current_steps": 1200, "total_steps": 204665, "loss": 0.1948, "lr": 1.1716421556652171e-07, "epoch": 0.029316199643319572, "percentage": 0.59, "elapsed_time": "0:01:24", "remaining_time": "3:58:06", "throughput": 9726.96, "total_tokens": 819584} +{"current_steps": 1205, "total_steps": 204665, "loss": 0.1995, "lr": 1.176528069575414e-07, "epoch": 0.029438350475166734, "percentage": 0.59, "elapsed_time": "0:01:24", "remaining_time": "3:58:02", "throughput": 9722.14, "total_tokens": 822400} +{"current_steps": 1210, "total_steps": 204665, "loss": 0.1063, "lr": 1.181413983485611e-07, "epoch": 0.0295605013070139, "percentage": 0.59, "elapsed_time": "0:01:24", "remaining_time": "3:58:01", "throughput": 9721.61, "total_tokens": 825728} +{"current_steps": 1215, "total_steps": 204665, "loss": 0.2259, "lr": 1.1862998973958078e-07, "epoch": 0.029682652138861065, "percentage": 0.59, "elapsed_time": "0:01:25", "remaining_time": "3:58:00", "throughput": 9720.55, "total_tokens": 828992} +{"current_steps": 1220, "total_steps": 204665, "loss": 0.2222, "lr": 1.1911858113060047e-07, "epoch": 0.02980480297070823, "percentage": 0.6, "elapsed_time": "0:01:25", "remaining_time": "3:57:57", "throughput": 9716.73, "total_tokens": 831936} +{"current_steps": 1225, "total_steps": 204665, "loss": 0.2204, "lr": 1.1960717252162018e-07, "epoch": 0.029926953802555396, "percentage": 0.6, "elapsed_time": "0:01:25", "remaining_time": "3:57:57", "throughput": 9718.78, "total_tokens": 835520} +{"current_steps": 1230, "total_steps": 204665, "loss": 0.147, "lr": 1.2009576391263986e-07, "epoch": 0.030049104634402562, "percentage": 0.6, "elapsed_time": "0:01:26", "remaining_time": "3:57:56", "throughput": 9719.46, "total_tokens": 838976} +{"current_steps": 1235, "total_steps": 204665, "loss": 0.2507, "lr": 1.2058435530365955e-07, "epoch": 0.030171255466249724, "percentage": 0.6, "elapsed_time": "0:01:26", "remaining_time": "3:57:56", "throughput": 9719.45, "total_tokens": 842368} +{"current_steps": 1240, "total_steps": 204665, "loss": 0.1361, "lr": 1.2107294669467925e-07, "epoch": 0.03029340629809689, "percentage": 0.61, "elapsed_time": "0:01:27", "remaining_time": "3:57:55", "throughput": 9717.96, "total_tokens": 845632} +{"current_steps": 1245, "total_steps": 204665, "loss": 0.1966, "lr": 1.2156153808569893e-07, "epoch": 0.030415557129944055, "percentage": 0.61, "elapsed_time": "0:01:27", "remaining_time": "3:57:53", "throughput": 9716.3, "total_tokens": 848832} +{"current_steps": 1250, "total_steps": 204665, "loss": 0.1246, "lr": 1.2205012947671862e-07, "epoch": 0.03053770796179122, "percentage": 0.61, "elapsed_time": "0:01:27", "remaining_time": "3:57:53", "throughput": 9718.08, "total_tokens": 852416} +{"current_steps": 1255, "total_steps": 204665, "loss": 0.1815, "lr": 1.225387208677383e-07, "epoch": 0.030659858793638386, "percentage": 0.61, "elapsed_time": "0:01:28", "remaining_time": "3:57:53", "throughput": 9718.05, "total_tokens": 855808} +{"current_steps": 1260, "total_steps": 204665, "loss": 0.1261, "lr": 1.23027312258758e-07, "epoch": 0.030782009625485548, "percentage": 0.62, "elapsed_time": "0:01:28", "remaining_time": "3:57:51", "throughput": 9716.48, "total_tokens": 859008} +{"current_steps": 1265, "total_steps": 204665, "loss": 0.0881, "lr": 1.2351590364977769e-07, "epoch": 0.030904160457332713, "percentage": 0.62, "elapsed_time": "0:01:28", "remaining_time": "3:57:53", "throughput": 9721.54, "total_tokens": 862976} +{"current_steps": 1270, "total_steps": 204665, "loss": 0.2887, "lr": 1.2400449504079737e-07, "epoch": 0.03102631128917988, "percentage": 0.62, "elapsed_time": "0:01:29", "remaining_time": "3:57:51", "throughput": 9720.66, "total_tokens": 866240} +{"current_steps": 1275, "total_steps": 204665, "loss": 0.202, "lr": 1.2449308643181707e-07, "epoch": 0.031148462121027044, "percentage": 0.62, "elapsed_time": "0:01:29", "remaining_time": "3:57:50", "throughput": 9718.39, "total_tokens": 869376} +{"current_steps": 1280, "total_steps": 204665, "loss": 0.1258, "lr": 1.2498167782283676e-07, "epoch": 0.031270612952874206, "percentage": 0.63, "elapsed_time": "0:01:29", "remaining_time": "3:57:49", "throughput": 9719.79, "total_tokens": 872896} +{"current_steps": 1285, "total_steps": 204665, "loss": 0.1506, "lr": 1.2547026921385644e-07, "epoch": 0.031392763784721375, "percentage": 0.63, "elapsed_time": "0:01:30", "remaining_time": "3:57:49", "throughput": 9720.97, "total_tokens": 876416} +{"current_steps": 1290, "total_steps": 204665, "loss": 0.2235, "lr": 1.2595886060487615e-07, "epoch": 0.03151491461656854, "percentage": 0.63, "elapsed_time": "0:01:30", "remaining_time": "3:57:48", "throughput": 9719.91, "total_tokens": 879680} +{"current_steps": 1295, "total_steps": 204665, "loss": 0.1283, "lr": 1.2644745199589583e-07, "epoch": 0.031637065448415706, "percentage": 0.63, "elapsed_time": "0:01:30", "remaining_time": "3:57:47", "throughput": 9719.18, "total_tokens": 883008} +{"current_steps": 1300, "total_steps": 204665, "loss": 0.2082, "lr": 1.269360433869155e-07, "epoch": 0.03175921628026287, "percentage": 0.64, "elapsed_time": "0:01:31", "remaining_time": "3:57:48", "throughput": 9723.23, "total_tokens": 886848} +{"current_steps": 1305, "total_steps": 204665, "loss": 0.1701, "lr": 1.2742463477793522e-07, "epoch": 0.03188136711211003, "percentage": 0.64, "elapsed_time": "0:01:31", "remaining_time": "3:57:47", "throughput": 9722.72, "total_tokens": 890176} +{"current_steps": 1310, "total_steps": 204665, "loss": 0.1525, "lr": 1.279132261689549e-07, "epoch": 0.0320035179439572, "percentage": 0.64, "elapsed_time": "0:01:31", "remaining_time": "3:57:45", "throughput": 9719.49, "total_tokens": 893184} +{"current_steps": 1315, "total_steps": 204665, "loss": 0.1911, "lr": 1.2840181755997458e-07, "epoch": 0.03212566877580436, "percentage": 0.64, "elapsed_time": "0:01:32", "remaining_time": "3:57:44", "throughput": 9718.42, "total_tokens": 896448} +{"current_steps": 1320, "total_steps": 204665, "loss": 0.2272, "lr": 1.2889040895099429e-07, "epoch": 0.03224781960765153, "percentage": 0.64, "elapsed_time": "0:01:32", "remaining_time": "3:57:44", "throughput": 9718.06, "total_tokens": 899840} +{"current_steps": 1325, "total_steps": 204665, "loss": 0.1963, "lr": 1.2937900034201397e-07, "epoch": 0.03236997043949869, "percentage": 0.65, "elapsed_time": "0:01:32", "remaining_time": "3:57:42", "throughput": 9715.57, "total_tokens": 902912} +{"current_steps": 1330, "total_steps": 204665, "loss": 0.1968, "lr": 1.2986759173303365e-07, "epoch": 0.03249212127134586, "percentage": 0.65, "elapsed_time": "0:01:33", "remaining_time": "3:57:42", "throughput": 9718.95, "total_tokens": 906688} +{"current_steps": 1335, "total_steps": 204665, "loss": 0.3204, "lr": 1.3035618312405336e-07, "epoch": 0.03261427210319302, "percentage": 0.65, "elapsed_time": "0:01:33", "remaining_time": "3:57:42", "throughput": 9720.63, "total_tokens": 910272} +{"current_steps": 1340, "total_steps": 204665, "loss": 0.1802, "lr": 1.3084477451507304e-07, "epoch": 0.032736422935040185, "percentage": 0.65, "elapsed_time": "0:01:33", "remaining_time": "3:57:41", "throughput": 9719.16, "total_tokens": 913472} +{"current_steps": 1345, "total_steps": 204665, "loss": 0.1534, "lr": 1.3133336590609272e-07, "epoch": 0.032858573766887354, "percentage": 0.66, "elapsed_time": "0:01:34", "remaining_time": "3:57:40", "throughput": 9718.79, "total_tokens": 916800} +{"current_steps": 1350, "total_steps": 204665, "loss": 0.1086, "lr": 1.3182195729711243e-07, "epoch": 0.032980724598734516, "percentage": 0.66, "elapsed_time": "0:01:34", "remaining_time": "3:57:39", "throughput": 9720.43, "total_tokens": 920384} +{"current_steps": 1355, "total_steps": 204665, "loss": 0.1473, "lr": 1.323105486881321e-07, "epoch": 0.033102875430581685, "percentage": 0.66, "elapsed_time": "0:01:35", "remaining_time": "3:57:37", "throughput": 9718.08, "total_tokens": 923456} +{"current_steps": 1360, "total_steps": 204665, "loss": 0.1892, "lr": 1.327991400791518e-07, "epoch": 0.03322502626242885, "percentage": 0.66, "elapsed_time": "0:01:35", "remaining_time": "3:57:37", "throughput": 9718.07, "total_tokens": 926848} +{"current_steps": 1365, "total_steps": 204665, "loss": 0.1372, "lr": 1.332877314701715e-07, "epoch": 0.03334717709427601, "percentage": 0.67, "elapsed_time": "0:01:35", "remaining_time": "3:57:37", "throughput": 9720.73, "total_tokens": 930560} +{"current_steps": 1370, "total_steps": 204665, "loss": 0.2135, "lr": 1.3377632286119118e-07, "epoch": 0.03346932792612318, "percentage": 0.67, "elapsed_time": "0:01:36", "remaining_time": "3:57:37", "throughput": 9722.49, "total_tokens": 934144} +{"current_steps": 1375, "total_steps": 204665, "loss": 0.1484, "lr": 1.3426491425221086e-07, "epoch": 0.03359147875797034, "percentage": 0.67, "elapsed_time": "0:01:36", "remaining_time": "3:57:35", "throughput": 9719.03, "total_tokens": 937088} +{"current_steps": 1380, "total_steps": 204665, "loss": 0.2285, "lr": 1.3475350564323057e-07, "epoch": 0.03371362958981751, "percentage": 0.67, "elapsed_time": "0:01:36", "remaining_time": "3:57:35", "throughput": 9719.28, "total_tokens": 940544} +{"current_steps": 1385, "total_steps": 204665, "loss": 0.1582, "lr": 1.3524209703425025e-07, "epoch": 0.03383578042166467, "percentage": 0.68, "elapsed_time": "0:01:37", "remaining_time": "3:57:34", "throughput": 9721.67, "total_tokens": 944192} +{"current_steps": 1390, "total_steps": 204665, "loss": 0.1455, "lr": 1.3573068842526993e-07, "epoch": 0.03395793125351183, "percentage": 0.68, "elapsed_time": "0:01:37", "remaining_time": "3:57:34", "throughput": 9722.87, "total_tokens": 947712} +{"current_steps": 1395, "total_steps": 204665, "loss": 0.193, "lr": 1.3621927981628964e-07, "epoch": 0.034080082085359, "percentage": 0.68, "elapsed_time": "0:01:37", "remaining_time": "3:57:32", "throughput": 9720.3, "total_tokens": 950784} +{"current_steps": 1400, "total_steps": 204665, "loss": 0.1433, "lr": 1.3670787120730932e-07, "epoch": 0.034202232917206164, "percentage": 0.68, "elapsed_time": "0:01:38", "remaining_time": "3:57:33", "throughput": 9722.34, "total_tokens": 954432} +{"current_steps": 1405, "total_steps": 204665, "loss": 0.1772, "lr": 1.37196462598329e-07, "epoch": 0.03432438374905333, "percentage": 0.69, "elapsed_time": "0:01:38", "remaining_time": "3:57:31", "throughput": 9719.7, "total_tokens": 957504} +{"current_steps": 1410, "total_steps": 204665, "loss": 0.1693, "lr": 1.376850539893487e-07, "epoch": 0.034446534580900495, "percentage": 0.69, "elapsed_time": "0:01:38", "remaining_time": "3:57:29", "throughput": 9716.14, "total_tokens": 960448} +{"current_steps": 1415, "total_steps": 204665, "loss": 0.1895, "lr": 1.381736453803684e-07, "epoch": 0.034568685412747664, "percentage": 0.69, "elapsed_time": "0:01:39", "remaining_time": "3:57:28", "throughput": 9716.32, "total_tokens": 963840} +{"current_steps": 1420, "total_steps": 204665, "loss": 0.174, "lr": 1.3866223677138807e-07, "epoch": 0.034690836244594826, "percentage": 0.69, "elapsed_time": "0:01:39", "remaining_time": "3:57:28", "throughput": 9716.18, "total_tokens": 967232} +{"current_steps": 1425, "total_steps": 204665, "loss": 0.2, "lr": 1.3915082816240778e-07, "epoch": 0.03481298707644199, "percentage": 0.7, "elapsed_time": "0:01:39", "remaining_time": "3:57:29", "throughput": 9719.07, "total_tokens": 971008} +{"current_steps": 1430, "total_steps": 204665, "loss": 0.1766, "lr": 1.3963941955342746e-07, "epoch": 0.03493513790828916, "percentage": 0.7, "elapsed_time": "0:01:40", "remaining_time": "3:57:29", "throughput": 9722.17, "total_tokens": 974784} +{"current_steps": 1435, "total_steps": 204665, "loss": 0.243, "lr": 1.4012801094444714e-07, "epoch": 0.03505728874013632, "percentage": 0.7, "elapsed_time": "0:01:40", "remaining_time": "3:57:27", "throughput": 9719.2, "total_tokens": 977792} +{"current_steps": 1440, "total_steps": 204665, "loss": 0.14, "lr": 1.4061660233546685e-07, "epoch": 0.03517943957198349, "percentage": 0.7, "elapsed_time": "0:01:40", "remaining_time": "3:57:27", "throughput": 9718.14, "total_tokens": 981056} +{"current_steps": 1445, "total_steps": 204665, "loss": 0.1969, "lr": 1.4110519372648653e-07, "epoch": 0.03530159040383065, "percentage": 0.71, "elapsed_time": "0:01:41", "remaining_time": "3:57:25", "throughput": 9716.43, "total_tokens": 984192} +{"current_steps": 1450, "total_steps": 204665, "loss": 0.1798, "lr": 1.415937851175062e-07, "epoch": 0.03542374123567781, "percentage": 0.71, "elapsed_time": "0:01:41", "remaining_time": "3:57:24", "throughput": 9716.04, "total_tokens": 987520} +{"current_steps": 1455, "total_steps": 204665, "loss": 0.1784, "lr": 1.4208237650852592e-07, "epoch": 0.03554589206752498, "percentage": 0.71, "elapsed_time": "0:01:41", "remaining_time": "3:57:24", "throughput": 9718.22, "total_tokens": 991168} +{"current_steps": 1460, "total_steps": 204665, "loss": 0.1598, "lr": 1.425709678995456e-07, "epoch": 0.03566804289937214, "percentage": 0.71, "elapsed_time": "0:01:42", "remaining_time": "3:57:23", "throughput": 9717.87, "total_tokens": 994496} +{"current_steps": 1465, "total_steps": 204665, "loss": 0.1563, "lr": 1.4305955929056528e-07, "epoch": 0.03579019373121931, "percentage": 0.72, "elapsed_time": "0:01:42", "remaining_time": "3:57:23", "throughput": 9720.49, "total_tokens": 998208} +{"current_steps": 1470, "total_steps": 204665, "loss": 0.159, "lr": 1.43548150681585e-07, "epoch": 0.035912344563066474, "percentage": 0.72, "elapsed_time": "0:01:43", "remaining_time": "3:57:24", "throughput": 9723.52, "total_tokens": 1001984} +{"current_steps": 1475, "total_steps": 204665, "loss": 0.229, "lr": 1.4403674207260467e-07, "epoch": 0.036034495394913636, "percentage": 0.72, "elapsed_time": "0:01:43", "remaining_time": "3:57:22", "throughput": 9720.63, "total_tokens": 1004992} +{"current_steps": 1480, "total_steps": 204665, "loss": 0.1666, "lr": 1.4452533346362435e-07, "epoch": 0.036156646226760805, "percentage": 0.72, "elapsed_time": "0:01:43", "remaining_time": "3:57:21", "throughput": 9720.22, "total_tokens": 1008320} +{"current_steps": 1485, "total_steps": 204665, "loss": 0.1998, "lr": 1.4501392485464406e-07, "epoch": 0.03627879705860797, "percentage": 0.73, "elapsed_time": "0:01:44", "remaining_time": "3:57:20", "throughput": 9720.78, "total_tokens": 1011776} +{"current_steps": 1490, "total_steps": 204665, "loss": 0.1194, "lr": 1.4550251624566374e-07, "epoch": 0.036400947890455136, "percentage": 0.73, "elapsed_time": "0:01:44", "remaining_time": "3:57:19", "throughput": 9719.41, "total_tokens": 1014976} +{"current_steps": 1495, "total_steps": 204665, "loss": 0.1989, "lr": 1.4599110763668342e-07, "epoch": 0.0365230987223023, "percentage": 0.73, "elapsed_time": "0:01:44", "remaining_time": "3:57:19", "throughput": 9720.46, "total_tokens": 1018496} +{"current_steps": 1500, "total_steps": 204665, "loss": 0.1353, "lr": 1.4647969902770313e-07, "epoch": 0.03664524955414947, "percentage": 0.73, "elapsed_time": "0:01:45", "remaining_time": "3:57:18", "throughput": 9718.98, "total_tokens": 1021696} +{"current_steps": 1505, "total_steps": 204665, "loss": 0.2075, "lr": 1.4696829041872284e-07, "epoch": 0.03676740038599663, "percentage": 0.74, "elapsed_time": "0:01:45", "remaining_time": "3:57:18", "throughput": 9720.66, "total_tokens": 1025344} +{"current_steps": 1510, "total_steps": 204665, "loss": 0.1738, "lr": 1.474568818097425e-07, "epoch": 0.03688955121784379, "percentage": 0.74, "elapsed_time": "0:01:45", "remaining_time": "3:57:18", "throughput": 9720.23, "total_tokens": 1028672} +{"current_steps": 1515, "total_steps": 204665, "loss": 0.1116, "lr": 1.479454732007622e-07, "epoch": 0.03701170204969096, "percentage": 0.74, "elapsed_time": "0:01:46", "remaining_time": "3:57:17", "throughput": 9719.2, "total_tokens": 1031936} +{"current_steps": 1520, "total_steps": 204665, "loss": 0.1733, "lr": 1.484340645917819e-07, "epoch": 0.03713385288153812, "percentage": 0.74, "elapsed_time": "0:01:46", "remaining_time": "3:57:16", "throughput": 9718.21, "total_tokens": 1035200} +{"current_steps": 1525, "total_steps": 204665, "loss": 0.1415, "lr": 1.4892265598280156e-07, "epoch": 0.03725600371338529, "percentage": 0.75, "elapsed_time": "0:01:46", "remaining_time": "3:57:16", "throughput": 9718.46, "total_tokens": 1038656} +{"current_steps": 1530, "total_steps": 204665, "loss": 0.138, "lr": 1.4941124737382127e-07, "epoch": 0.03737815454523245, "percentage": 0.75, "elapsed_time": "0:01:47", "remaining_time": "3:57:15", "throughput": 9717.85, "total_tokens": 1041984} +{"current_steps": 1535, "total_steps": 204665, "loss": 0.1636, "lr": 1.4989983876484098e-07, "epoch": 0.037500305377079615, "percentage": 0.75, "elapsed_time": "0:01:47", "remaining_time": "3:57:14", "throughput": 9716.67, "total_tokens": 1045184} +{"current_steps": 1540, "total_steps": 204665, "loss": 0.1178, "lr": 1.5038843015586063e-07, "epoch": 0.037622456208926784, "percentage": 0.75, "elapsed_time": "0:01:47", "remaining_time": "3:57:13", "throughput": 9717.5, "total_tokens": 1048640} +{"current_steps": 1545, "total_steps": 204665, "loss": 0.1089, "lr": 1.5087702154688034e-07, "epoch": 0.037744607040773946, "percentage": 0.75, "elapsed_time": "0:01:48", "remaining_time": "3:57:13", "throughput": 9719.85, "total_tokens": 1052352} +{"current_steps": 1550, "total_steps": 204665, "loss": 0.1198, "lr": 1.5136561293790005e-07, "epoch": 0.037866757872621115, "percentage": 0.76, "elapsed_time": "0:01:48", "remaining_time": "3:57:14", "throughput": 9723.31, "total_tokens": 1056192} +{"current_steps": 1555, "total_steps": 204665, "loss": 0.2486, "lr": 1.518542043289197e-07, "epoch": 0.03798890870446828, "percentage": 0.76, "elapsed_time": "0:01:48", "remaining_time": "3:57:13", "throughput": 9722.1, "total_tokens": 1059392} +{"current_steps": 1560, "total_steps": 204665, "loss": 0.1491, "lr": 1.523427957199394e-07, "epoch": 0.03811105953631544, "percentage": 0.76, "elapsed_time": "0:01:49", "remaining_time": "3:57:11", "throughput": 9719.49, "total_tokens": 1062400} +{"current_steps": 1565, "total_steps": 204665, "loss": 0.1352, "lr": 1.5283138711095912e-07, "epoch": 0.03823321036816261, "percentage": 0.76, "elapsed_time": "0:01:49", "remaining_time": "3:57:10", "throughput": 9719.55, "total_tokens": 1065792} +{"current_steps": 1570, "total_steps": 204665, "loss": 0.2562, "lr": 1.5331997850197878e-07, "epoch": 0.03835536120000977, "percentage": 0.77, "elapsed_time": "0:01:49", "remaining_time": "3:57:08", "throughput": 9717.75, "total_tokens": 1068864} +{"current_steps": 1575, "total_steps": 204665, "loss": 0.1579, "lr": 1.5380856989299848e-07, "epoch": 0.03847751203185694, "percentage": 0.77, "elapsed_time": "0:01:50", "remaining_time": "3:57:06", "throughput": 9715.33, "total_tokens": 1071872} +{"current_steps": 1580, "total_steps": 204665, "loss": 0.1577, "lr": 1.542971612840182e-07, "epoch": 0.0385996628637041, "percentage": 0.77, "elapsed_time": "0:01:50", "remaining_time": "3:57:04", "throughput": 9712.72, "total_tokens": 1074880} +{"current_steps": 1585, "total_steps": 204665, "loss": 0.1595, "lr": 1.5478575267503785e-07, "epoch": 0.03872181369555127, "percentage": 0.77, "elapsed_time": "0:01:51", "remaining_time": "3:57:07", "throughput": 9718.55, "total_tokens": 1079168} +{"current_steps": 1590, "total_steps": 204665, "loss": 0.2213, "lr": 1.5527434406605755e-07, "epoch": 0.03884396452739843, "percentage": 0.78, "elapsed_time": "0:01:51", "remaining_time": "3:57:06", "throughput": 9718.25, "total_tokens": 1082496} +{"current_steps": 1595, "total_steps": 204665, "loss": 0.1883, "lr": 1.5576293545707726e-07, "epoch": 0.038966115359245594, "percentage": 0.78, "elapsed_time": "0:01:51", "remaining_time": "3:57:05", "throughput": 9717.69, "total_tokens": 1085824} +{"current_steps": 1600, "total_steps": 204665, "loss": 0.1329, "lr": 1.5625152684809692e-07, "epoch": 0.03908826619109276, "percentage": 0.78, "elapsed_time": "0:01:52", "remaining_time": "3:57:07", "throughput": 9722.78, "total_tokens": 1089920} +{"current_steps": 1605, "total_steps": 204665, "loss": 0.1146, "lr": 1.5674011823911662e-07, "epoch": 0.039210417022939925, "percentage": 0.78, "elapsed_time": "0:01:52", "remaining_time": "3:57:07", "throughput": 9726.08, "total_tokens": 1093760} +{"current_steps": 1610, "total_steps": 204665, "loss": 0.1238, "lr": 1.5722870963013633e-07, "epoch": 0.039332567854787094, "percentage": 0.79, "elapsed_time": "0:01:52", "remaining_time": "3:57:07", "throughput": 9726.58, "total_tokens": 1097216} +{"current_steps": 1615, "total_steps": 204665, "loss": 0.1502, "lr": 1.5771730102115599e-07, "epoch": 0.039454718686634256, "percentage": 0.79, "elapsed_time": "0:01:53", "remaining_time": "3:57:06", "throughput": 9726.72, "total_tokens": 1100608} +{"current_steps": 1620, "total_steps": 204665, "loss": 0.1395, "lr": 1.582058924121757e-07, "epoch": 0.03957686951848142, "percentage": 0.79, "elapsed_time": "0:01:53", "remaining_time": "3:57:06", "throughput": 9727.06, "total_tokens": 1104064} +{"current_steps": 1625, "total_steps": 204665, "loss": 0.1604, "lr": 1.586944838031954e-07, "epoch": 0.03969902035032859, "percentage": 0.79, "elapsed_time": "0:01:53", "remaining_time": "3:57:06", "throughput": 9727.91, "total_tokens": 1107584} +{"current_steps": 1630, "total_steps": 204665, "loss": 0.0803, "lr": 1.5918307519421506e-07, "epoch": 0.03982117118217575, "percentage": 0.8, "elapsed_time": "0:01:54", "remaining_time": "3:57:04", "throughput": 9726.08, "total_tokens": 1110656} +{"current_steps": 1635, "total_steps": 204665, "loss": 0.158, "lr": 1.5967166658523476e-07, "epoch": 0.03994332201402292, "percentage": 0.8, "elapsed_time": "0:01:54", "remaining_time": "3:57:02", "throughput": 9724.33, "total_tokens": 1113792} +{"current_steps": 1640, "total_steps": 204665, "loss": 0.1535, "lr": 1.6016025797625445e-07, "epoch": 0.04006547284587008, "percentage": 0.8, "elapsed_time": "0:01:54", "remaining_time": "3:57:03", "throughput": 9726.05, "total_tokens": 1117440} +{"current_steps": 1645, "total_steps": 204665, "loss": 0.1075, "lr": 1.6064884936727413e-07, "epoch": 0.04018762367771724, "percentage": 0.8, "elapsed_time": "0:01:55", "remaining_time": "3:57:02", "throughput": 9727.13, "total_tokens": 1120960} +{"current_steps": 1650, "total_steps": 204665, "loss": 0.063, "lr": 1.6113744075829384e-07, "epoch": 0.04030977450956441, "percentage": 0.81, "elapsed_time": "0:01:55", "remaining_time": "3:57:00", "throughput": 9725.11, "total_tokens": 1124032} +{"current_steps": 1655, "total_steps": 204665, "loss": 0.0726, "lr": 1.6162603214931352e-07, "epoch": 0.04043192534141157, "percentage": 0.81, "elapsed_time": "0:01:55", "remaining_time": "3:57:00", "throughput": 9725.07, "total_tokens": 1127424} +{"current_steps": 1660, "total_steps": 204665, "loss": 0.2601, "lr": 1.621146235403332e-07, "epoch": 0.04055407617325874, "percentage": 0.81, "elapsed_time": "0:01:56", "remaining_time": "3:56:59", "throughput": 9723.9, "total_tokens": 1130624} +{"current_steps": 1665, "total_steps": 204665, "loss": 0.2492, "lr": 1.626032149313529e-07, "epoch": 0.040676227005105904, "percentage": 0.81, "elapsed_time": "0:01:56", "remaining_time": "3:56:59", "throughput": 9725.61, "total_tokens": 1134272} +{"current_steps": 1670, "total_steps": 204665, "loss": 0.1489, "lr": 1.630918063223726e-07, "epoch": 0.04079837783695307, "percentage": 0.82, "elapsed_time": "0:01:56", "remaining_time": "3:56:59", "throughput": 9726.49, "total_tokens": 1137792} +{"current_steps": 1675, "total_steps": 204665, "loss": 0.1915, "lr": 1.6358039771339227e-07, "epoch": 0.040920528668800235, "percentage": 0.82, "elapsed_time": "0:01:57", "remaining_time": "3:56:58", "throughput": 9725.86, "total_tokens": 1141056} +{"current_steps": 1680, "total_steps": 204665, "loss": 0.2433, "lr": 1.6406898910441198e-07, "epoch": 0.0410426795006474, "percentage": 0.82, "elapsed_time": "0:01:57", "remaining_time": "3:56:57", "throughput": 9726.88, "total_tokens": 1144576} +{"current_steps": 1685, "total_steps": 204665, "loss": 0.1577, "lr": 1.6455758049543166e-07, "epoch": 0.041164830332494566, "percentage": 0.82, "elapsed_time": "0:01:58", "remaining_time": "3:56:56", "throughput": 9726.3, "total_tokens": 1147840} +{"current_steps": 1690, "total_steps": 204665, "loss": 0.105, "lr": 1.6504617188645134e-07, "epoch": 0.04128698116434173, "percentage": 0.83, "elapsed_time": "0:01:58", "remaining_time": "3:56:56", "throughput": 9728.67, "total_tokens": 1151552} +{"current_steps": 1695, "total_steps": 204665, "loss": 0.1276, "lr": 1.6553476327747105e-07, "epoch": 0.0414091319961889, "percentage": 0.83, "elapsed_time": "0:01:58", "remaining_time": "3:56:54", "throughput": 9726.25, "total_tokens": 1154560} +{"current_steps": 1700, "total_steps": 204665, "loss": 0.1723, "lr": 1.6602335466849073e-07, "epoch": 0.04153128282803606, "percentage": 0.83, "elapsed_time": "0:01:59", "remaining_time": "3:56:53", "throughput": 9725.08, "total_tokens": 1157760} +{"current_steps": 1705, "total_steps": 204665, "loss": 0.2233, "lr": 1.665119460595104e-07, "epoch": 0.04165343365988322, "percentage": 0.83, "elapsed_time": "0:01:59", "remaining_time": "3:56:53", "throughput": 9727.4, "total_tokens": 1161472} +{"current_steps": 1710, "total_steps": 204665, "loss": 0.1753, "lr": 1.6700053745053012e-07, "epoch": 0.04177558449173039, "percentage": 0.84, "elapsed_time": "0:01:59", "remaining_time": "3:56:51", "throughput": 9724.93, "total_tokens": 1164480} +{"current_steps": 1715, "total_steps": 204665, "loss": 0.1315, "lr": 1.674891288415498e-07, "epoch": 0.04189773532357755, "percentage": 0.84, "elapsed_time": "0:02:00", "remaining_time": "3:56:52", "throughput": 9728.22, "total_tokens": 1168384} +{"current_steps": 1720, "total_steps": 204665, "loss": 0.158, "lr": 1.6797772023256948e-07, "epoch": 0.04201988615542472, "percentage": 0.84, "elapsed_time": "0:02:00", "remaining_time": "3:56:51", "throughput": 9727.4, "total_tokens": 1171648} +{"current_steps": 1725, "total_steps": 204665, "loss": 0.1922, "lr": 1.684663116235892e-07, "epoch": 0.04214203698727188, "percentage": 0.84, "elapsed_time": "0:02:00", "remaining_time": "3:56:50", "throughput": 9727.11, "total_tokens": 1174976} +{"current_steps": 1730, "total_steps": 204665, "loss": 0.2365, "lr": 1.6895490301460887e-07, "epoch": 0.042264187819119045, "percentage": 0.85, "elapsed_time": "0:02:01", "remaining_time": "3:56:50", "throughput": 9725.83, "total_tokens": 1178176} +{"current_steps": 1735, "total_steps": 204665, "loss": 0.114, "lr": 1.6944349440562858e-07, "epoch": 0.042386338650966214, "percentage": 0.85, "elapsed_time": "0:02:01", "remaining_time": "3:56:50", "throughput": 9726.44, "total_tokens": 1181696} +{"current_steps": 1740, "total_steps": 204665, "loss": 0.16, "lr": 1.6993208579664826e-07, "epoch": 0.042508489482813376, "percentage": 0.85, "elapsed_time": "0:02:01", "remaining_time": "3:56:49", "throughput": 9727.28, "total_tokens": 1185216} +{"current_steps": 1745, "total_steps": 204665, "loss": 0.1776, "lr": 1.7042067718766794e-07, "epoch": 0.042630640314660545, "percentage": 0.85, "elapsed_time": "0:02:02", "remaining_time": "3:56:49", "throughput": 9727.75, "total_tokens": 1188672} +{"current_steps": 1750, "total_steps": 204665, "loss": 0.1947, "lr": 1.7090926857868765e-07, "epoch": 0.04275279114650771, "percentage": 0.86, "elapsed_time": "0:02:02", "remaining_time": "3:56:50", "throughput": 9730.64, "total_tokens": 1192512} +{"current_steps": 1755, "total_steps": 204665, "loss": 0.1722, "lr": 1.7139785996970733e-07, "epoch": 0.042874941978354876, "percentage": 0.86, "elapsed_time": "0:02:02", "remaining_time": "3:56:48", "throughput": 9728.25, "total_tokens": 1195520} +{"current_steps": 1760, "total_steps": 204665, "loss": 0.1517, "lr": 1.71886451360727e-07, "epoch": 0.04299709281020204, "percentage": 0.86, "elapsed_time": "0:02:03", "remaining_time": "3:56:47", "throughput": 9729.29, "total_tokens": 1199040} +{"current_steps": 1765, "total_steps": 204665, "loss": 0.1429, "lr": 1.7237504275174672e-07, "epoch": 0.0431192436420492, "percentage": 0.86, "elapsed_time": "0:02:03", "remaining_time": "3:56:47", "throughput": 9728.02, "total_tokens": 1202240} +{"current_steps": 1770, "total_steps": 204665, "loss": 0.1743, "lr": 1.728636341427664e-07, "epoch": 0.04324139447389637, "percentage": 0.86, "elapsed_time": "0:02:03", "remaining_time": "3:56:45", "throughput": 9727.46, "total_tokens": 1205504} +{"current_steps": 1775, "total_steps": 204665, "loss": 0.1751, "lr": 1.7335222553378608e-07, "epoch": 0.04336354530574353, "percentage": 0.87, "elapsed_time": "0:02:04", "remaining_time": "3:56:45", "throughput": 9727.5, "total_tokens": 1208896} +{"current_steps": 1780, "total_steps": 204665, "loss": 0.1709, "lr": 1.738408169248058e-07, "epoch": 0.0434856961375907, "percentage": 0.87, "elapsed_time": "0:02:04", "remaining_time": "3:56:46", "throughput": 9730.88, "total_tokens": 1212864} +{"current_steps": 1785, "total_steps": 204665, "loss": 0.0921, "lr": 1.7432940831582547e-07, "epoch": 0.04360784696943786, "percentage": 0.87, "elapsed_time": "0:02:04", "remaining_time": "3:56:45", "throughput": 9730.61, "total_tokens": 1216192} +{"current_steps": 1790, "total_steps": 204665, "loss": 0.2697, "lr": 1.7481799970684515e-07, "epoch": 0.043729997801285024, "percentage": 0.87, "elapsed_time": "0:02:05", "remaining_time": "3:56:43", "throughput": 9728.98, "total_tokens": 1219264} +{"current_steps": 1795, "total_steps": 204665, "loss": 0.1171, "lr": 1.7530659109786486e-07, "epoch": 0.04385214863313219, "percentage": 0.88, "elapsed_time": "0:02:05", "remaining_time": "3:56:42", "throughput": 9727.42, "total_tokens": 1222400} +{"current_steps": 1800, "total_steps": 204665, "loss": 0.1516, "lr": 1.7579518248888454e-07, "epoch": 0.043974299464979355, "percentage": 0.88, "elapsed_time": "0:02:06", "remaining_time": "3:56:41", "throughput": 9725.61, "total_tokens": 1225472} +{"current_steps": 1805, "total_steps": 204665, "loss": 0.2495, "lr": 1.7628377387990422e-07, "epoch": 0.044096450296826524, "percentage": 0.88, "elapsed_time": "0:02:06", "remaining_time": "3:56:39", "throughput": 9725.02, "total_tokens": 1228736} +{"current_steps": 1810, "total_steps": 204665, "loss": 0.098, "lr": 1.7677236527092393e-07, "epoch": 0.044218601128673686, "percentage": 0.88, "elapsed_time": "0:02:06", "remaining_time": "3:56:41", "throughput": 9730.13, "total_tokens": 1232960} +{"current_steps": 1815, "total_steps": 204665, "loss": 0.1844, "lr": 1.772609566619436e-07, "epoch": 0.04434075196052085, "percentage": 0.89, "elapsed_time": "0:02:07", "remaining_time": "3:56:41", "throughput": 9730.88, "total_tokens": 1236480} +{"current_steps": 1820, "total_steps": 204665, "loss": 0.0747, "lr": 1.777495480529633e-07, "epoch": 0.04446290279236802, "percentage": 0.89, "elapsed_time": "0:02:07", "remaining_time": "3:56:42", "throughput": 9734.32, "total_tokens": 1240448} +{"current_steps": 1825, "total_steps": 204665, "loss": 0.1022, "lr": 1.78238139443983e-07, "epoch": 0.04458505362421518, "percentage": 0.89, "elapsed_time": "0:02:07", "remaining_time": "3:56:41", "throughput": 9733.31, "total_tokens": 1243648} +{"current_steps": 1830, "total_steps": 204665, "loss": 0.1376, "lr": 1.7872673083500268e-07, "epoch": 0.04470720445606235, "percentage": 0.89, "elapsed_time": "0:02:08", "remaining_time": "3:56:40", "throughput": 9733.29, "total_tokens": 1247040} +{"current_steps": 1835, "total_steps": 204665, "loss": 0.2258, "lr": 1.7921532222602236e-07, "epoch": 0.04482935528790951, "percentage": 0.9, "elapsed_time": "0:02:08", "remaining_time": "3:56:41", "throughput": 9735.77, "total_tokens": 1250816} +{"current_steps": 1840, "total_steps": 204665, "loss": 0.1251, "lr": 1.7970391361704207e-07, "epoch": 0.04495150611975668, "percentage": 0.9, "elapsed_time": "0:02:08", "remaining_time": "3:56:41", "throughput": 9738.48, "total_tokens": 1254656} +{"current_steps": 1845, "total_steps": 204665, "loss": 0.1035, "lr": 1.8019250500806175e-07, "epoch": 0.04507365695160384, "percentage": 0.9, "elapsed_time": "0:02:09", "remaining_time": "3:56:41", "throughput": 9739.35, "total_tokens": 1258176} +{"current_steps": 1850, "total_steps": 204665, "loss": 0.1309, "lr": 1.8068109639908143e-07, "epoch": 0.045195807783451, "percentage": 0.9, "elapsed_time": "0:02:09", "remaining_time": "3:56:39", "throughput": 9737.34, "total_tokens": 1261248} +{"current_steps": 1855, "total_steps": 204665, "loss": 0.2031, "lr": 1.8116968779010114e-07, "epoch": 0.04531795861529817, "percentage": 0.91, "elapsed_time": "0:02:09", "remaining_time": "3:56:39", "throughput": 9737.02, "total_tokens": 1264576} +{"current_steps": 1860, "total_steps": 204665, "loss": 0.1752, "lr": 1.8165827918112082e-07, "epoch": 0.045440109447145334, "percentage": 0.91, "elapsed_time": "0:02:10", "remaining_time": "3:56:38", "throughput": 9735.97, "total_tokens": 1267776} +{"current_steps": 1865, "total_steps": 204665, "loss": 0.1825, "lr": 1.821468705721405e-07, "epoch": 0.0455622602789925, "percentage": 0.91, "elapsed_time": "0:02:10", "remaining_time": "3:56:37", "throughput": 9735.17, "total_tokens": 1271040} +{"current_steps": 1870, "total_steps": 204665, "loss": 0.099, "lr": 1.826354619631602e-07, "epoch": 0.045684411110839665, "percentage": 0.91, "elapsed_time": "0:02:10", "remaining_time": "3:56:37", "throughput": 9735.42, "total_tokens": 1274496} +{"current_steps": 1875, "total_steps": 204665, "loss": 0.2014, "lr": 1.831240533541799e-07, "epoch": 0.04580656194268683, "percentage": 0.92, "elapsed_time": "0:02:11", "remaining_time": "3:56:36", "throughput": 9735.29, "total_tokens": 1277824} +{"current_steps": 1880, "total_steps": 204665, "loss": 0.1235, "lr": 1.8361264474519957e-07, "epoch": 0.045928712774533996, "percentage": 0.92, "elapsed_time": "0:02:11", "remaining_time": "3:56:34", "throughput": 9733.05, "total_tokens": 1280832} +{"current_steps": 1885, "total_steps": 204665, "loss": 0.122, "lr": 1.8410123613621928e-07, "epoch": 0.04605086360638116, "percentage": 0.92, "elapsed_time": "0:02:11", "remaining_time": "3:56:33", "throughput": 9732.67, "total_tokens": 1284160} +{"current_steps": 1890, "total_steps": 204665, "loss": 0.1444, "lr": 1.8458982752723896e-07, "epoch": 0.04617301443822833, "percentage": 0.92, "elapsed_time": "0:02:12", "remaining_time": "3:56:32", "throughput": 9730.39, "total_tokens": 1287168} +{"current_steps": 1895, "total_steps": 204665, "loss": 0.093, "lr": 1.8507841891825864e-07, "epoch": 0.04629516527007549, "percentage": 0.93, "elapsed_time": "0:02:12", "remaining_time": "3:56:32", "throughput": 9732.21, "total_tokens": 1290880} +{"current_steps": 1900, "total_steps": 204665, "loss": 0.1654, "lr": 1.8556701030927835e-07, "epoch": 0.04641731610192265, "percentage": 0.93, "elapsed_time": "0:02:12", "remaining_time": "3:56:33", "throughput": 9734.57, "total_tokens": 1294656} +{"current_steps": 1905, "total_steps": 204665, "loss": 0.1705, "lr": 1.8605560170029803e-07, "epoch": 0.04653946693376982, "percentage": 0.93, "elapsed_time": "0:02:13", "remaining_time": "3:56:31", "throughput": 9732.76, "total_tokens": 1297728} +{"current_steps": 1910, "total_steps": 204665, "loss": 0.1407, "lr": 1.8654419309131771e-07, "epoch": 0.04666161776561698, "percentage": 0.93, "elapsed_time": "0:02:13", "remaining_time": "3:56:30", "throughput": 9731.83, "total_tokens": 1300928} +{"current_steps": 1915, "total_steps": 204665, "loss": 0.2134, "lr": 1.8703278448233742e-07, "epoch": 0.04678376859746415, "percentage": 0.94, "elapsed_time": "0:02:14", "remaining_time": "3:56:28", "throughput": 9728.85, "total_tokens": 1303808} +{"current_steps": 1920, "total_steps": 204665, "loss": 0.1497, "lr": 1.875213758733571e-07, "epoch": 0.04690591942931131, "percentage": 0.94, "elapsed_time": "0:02:14", "remaining_time": "3:56:29", "throughput": 9731.54, "total_tokens": 1307648} +{"current_steps": 1925, "total_steps": 204665, "loss": 0.1378, "lr": 1.8800996726437678e-07, "epoch": 0.04702807026115848, "percentage": 0.94, "elapsed_time": "0:02:14", "remaining_time": "3:56:28", "throughput": 9730.46, "total_tokens": 1310848} +{"current_steps": 1930, "total_steps": 204665, "loss": 0.2215, "lr": 1.884985586553965e-07, "epoch": 0.047150221093005644, "percentage": 0.94, "elapsed_time": "0:02:15", "remaining_time": "3:56:28", "throughput": 9731.6, "total_tokens": 1314432} +{"current_steps": 1935, "total_steps": 204665, "loss": 0.2139, "lr": 1.8898715004641617e-07, "epoch": 0.047272371924852806, "percentage": 0.95, "elapsed_time": "0:02:15", "remaining_time": "3:56:28", "throughput": 9732.98, "total_tokens": 1318080} +{"current_steps": 1940, "total_steps": 204665, "loss": 0.2444, "lr": 1.8947574143743585e-07, "epoch": 0.047394522756699975, "percentage": 0.95, "elapsed_time": "0:02:15", "remaining_time": "3:56:26", "throughput": 9730.57, "total_tokens": 1321024} +{"current_steps": 1945, "total_steps": 204665, "loss": 0.1379, "lr": 1.8996433282845556e-07, "epoch": 0.04751667358854714, "percentage": 0.95, "elapsed_time": "0:02:16", "remaining_time": "3:56:26", "throughput": 9732.43, "total_tokens": 1324736} +{"current_steps": 1950, "total_steps": 204665, "loss": 0.1321, "lr": 1.9045292421947524e-07, "epoch": 0.047638824420394306, "percentage": 0.95, "elapsed_time": "0:02:16", "remaining_time": "3:56:26", "throughput": 9731.65, "total_tokens": 1328000} +{"current_steps": 1955, "total_steps": 204665, "loss": 0.124, "lr": 1.9094151561049492e-07, "epoch": 0.04776097525224147, "percentage": 0.96, "elapsed_time": "0:02:16", "remaining_time": "3:56:24", "throughput": 9729.45, "total_tokens": 1331008} +{"current_steps": 1960, "total_steps": 204665, "loss": 0.1557, "lr": 1.9143010700151463e-07, "epoch": 0.04788312608408863, "percentage": 0.96, "elapsed_time": "0:02:17", "remaining_time": "3:56:23", "throughput": 9728.29, "total_tokens": 1334208} +{"current_steps": 1965, "total_steps": 204665, "loss": 0.1513, "lr": 1.9191869839253434e-07, "epoch": 0.0480052769159358, "percentage": 0.96, "elapsed_time": "0:02:17", "remaining_time": "3:56:23", "throughput": 9727.59, "total_tokens": 1337472} +{"current_steps": 1970, "total_steps": 204665, "loss": 0.2155, "lr": 1.92407289783554e-07, "epoch": 0.04812742774778296, "percentage": 0.96, "elapsed_time": "0:02:17", "remaining_time": "3:56:22", "throughput": 9727.82, "total_tokens": 1340928} +{"current_steps": 1975, "total_steps": 204665, "loss": 0.1602, "lr": 1.928958811745737e-07, "epoch": 0.04824957857963013, "percentage": 0.96, "elapsed_time": "0:02:18", "remaining_time": "3:56:22", "throughput": 9726.61, "total_tokens": 1344128} +{"current_steps": 1980, "total_steps": 204665, "loss": 0.0793, "lr": 1.933844725655934e-07, "epoch": 0.04837172941147729, "percentage": 0.97, "elapsed_time": "0:02:18", "remaining_time": "3:56:22", "throughput": 9728.89, "total_tokens": 1347904} +{"current_steps": 1985, "total_steps": 204665, "loss": 0.1941, "lr": 1.9387306395661307e-07, "epoch": 0.048493880243324454, "percentage": 0.97, "elapsed_time": "0:02:18", "remaining_time": "3:56:22", "throughput": 9730.32, "total_tokens": 1351552} +{"current_steps": 1990, "total_steps": 204665, "loss": 0.1792, "lr": 1.9436165534763277e-07, "epoch": 0.04861603107517162, "percentage": 0.97, "elapsed_time": "0:02:19", "remaining_time": "3:56:21", "throughput": 9729.57, "total_tokens": 1354816} +{"current_steps": 1995, "total_steps": 204665, "loss": 0.2255, "lr": 1.9485024673865248e-07, "epoch": 0.048738181907018785, "percentage": 0.97, "elapsed_time": "0:02:19", "remaining_time": "3:56:20", "throughput": 9727.45, "total_tokens": 1357824} +{"current_steps": 2000, "total_steps": 204665, "loss": 0.1439, "lr": 1.9533883812967214e-07, "epoch": 0.048860332738865954, "percentage": 0.98, "elapsed_time": "0:02:19", "remaining_time": "3:56:19", "throughput": 9725.81, "total_tokens": 1360896} +{"current_steps": 2005, "total_steps": 204665, "loss": 0.1135, "lr": 1.9582742952069184e-07, "epoch": 0.048982483570713116, "percentage": 0.98, "elapsed_time": "0:02:20", "remaining_time": "3:56:18", "throughput": 9725.93, "total_tokens": 1364288} +{"current_steps": 2010, "total_steps": 204665, "loss": 0.1285, "lr": 1.9631602091171155e-07, "epoch": 0.049104634402560285, "percentage": 0.98, "elapsed_time": "0:02:20", "remaining_time": "3:56:17", "throughput": 9724.73, "total_tokens": 1367424} +{"current_steps": 2015, "total_steps": 204665, "loss": 0.149, "lr": 1.968046123027312e-07, "epoch": 0.04922678523440745, "percentage": 0.98, "elapsed_time": "0:02:20", "remaining_time": "3:56:16", "throughput": 9725.15, "total_tokens": 1370880} +{"current_steps": 2020, "total_steps": 204665, "loss": 0.0844, "lr": 1.9729320369375091e-07, "epoch": 0.04934893606625461, "percentage": 0.99, "elapsed_time": "0:02:21", "remaining_time": "3:56:16", "throughput": 9724.95, "total_tokens": 1374272} +{"current_steps": 2025, "total_steps": 204665, "loss": 0.1647, "lr": 1.977817950847706e-07, "epoch": 0.04947108689810178, "percentage": 0.99, "elapsed_time": "0:02:21", "remaining_time": "3:56:15", "throughput": 9724.67, "total_tokens": 1377600} +{"current_steps": 2030, "total_steps": 204665, "loss": 0.1265, "lr": 1.9827038647579028e-07, "epoch": 0.04959323772994894, "percentage": 0.99, "elapsed_time": "0:02:21", "remaining_time": "3:56:14", "throughput": 9722.15, "total_tokens": 1380544} +{"current_steps": 2035, "total_steps": 204665, "loss": 0.1444, "lr": 1.9875897786680998e-07, "epoch": 0.04971538856179611, "percentage": 0.99, "elapsed_time": "0:02:22", "remaining_time": "3:56:14", "throughput": 9722.8, "total_tokens": 1384064} +{"current_steps": 2040, "total_steps": 204665, "loss": 0.1977, "lr": 1.9924756925782967e-07, "epoch": 0.04983753939364327, "percentage": 1.0, "elapsed_time": "0:02:22", "remaining_time": "3:56:13", "throughput": 9721.65, "total_tokens": 1387264} +{"current_steps": 2045, "total_steps": 204665, "loss": 0.2471, "lr": 1.9973616064884935e-07, "epoch": 0.04995969022549043, "percentage": 1.0, "elapsed_time": "0:02:23", "remaining_time": "3:56:12", "throughput": 9720.71, "total_tokens": 1390464} +{"current_steps": 2050, "total_steps": 204665, "loss": 0.1957, "lr": 2.0022475203986905e-07, "epoch": 0.0500818410573376, "percentage": 1.0, "elapsed_time": "0:02:23", "remaining_time": "3:56:13", "throughput": 9723.67, "total_tokens": 1394368} +{"current_steps": 2055, "total_steps": 204665, "loss": 0.2043, "lr": 2.0071334343088874e-07, "epoch": 0.050203991889184764, "percentage": 1.0, "elapsed_time": "0:02:23", "remaining_time": "3:56:12", "throughput": 9723.94, "total_tokens": 1397824} +{"current_steps": 2060, "total_steps": 204665, "loss": 0.0892, "lr": 2.0120193482190842e-07, "epoch": 0.05032614272103193, "percentage": 1.01, "elapsed_time": "0:02:24", "remaining_time": "3:56:13", "throughput": 9726.33, "total_tokens": 1401664} +{"current_steps": 2065, "total_steps": 204665, "loss": 0.1588, "lr": 2.0169052621292813e-07, "epoch": 0.050448293552879095, "percentage": 1.01, "elapsed_time": "0:02:24", "remaining_time": "3:56:13", "throughput": 9728.89, "total_tokens": 1405504} +{"current_steps": 2070, "total_steps": 204665, "loss": 0.131, "lr": 2.021791176039478e-07, "epoch": 0.05057044438472626, "percentage": 1.01, "elapsed_time": "0:02:24", "remaining_time": "3:56:12", "throughput": 9728.37, "total_tokens": 1408768} +{"current_steps": 2075, "total_steps": 204665, "loss": 0.1556, "lr": 2.026677089949675e-07, "epoch": 0.050692595216573426, "percentage": 1.01, "elapsed_time": "0:02:25", "remaining_time": "3:56:12", "throughput": 9727.22, "total_tokens": 1411968} +{"current_steps": 2080, "total_steps": 204665, "loss": 0.1472, "lr": 2.031563003859872e-07, "epoch": 0.05081474604842059, "percentage": 1.02, "elapsed_time": "0:02:25", "remaining_time": "3:56:11", "throughput": 9726.24, "total_tokens": 1415168} +{"current_steps": 2085, "total_steps": 204665, "loss": 0.149, "lr": 2.0364489177700688e-07, "epoch": 0.05093689688026776, "percentage": 1.02, "elapsed_time": "0:02:25", "remaining_time": "3:56:10", "throughput": 9727.08, "total_tokens": 1418688} +{"current_steps": 2090, "total_steps": 204665, "loss": 0.1877, "lr": 2.0413348316802656e-07, "epoch": 0.05105904771211492, "percentage": 1.02, "elapsed_time": "0:02:26", "remaining_time": "3:56:10", "throughput": 9727.35, "total_tokens": 1422144} +{"current_steps": 2095, "total_steps": 204665, "loss": 0.1552, "lr": 2.0462207455904627e-07, "epoch": 0.05118119854396209, "percentage": 1.02, "elapsed_time": "0:02:26", "remaining_time": "3:56:10", "throughput": 9728.81, "total_tokens": 1425792} +{"current_steps": 2100, "total_steps": 204665, "loss": 0.181, "lr": 2.0511066595006595e-07, "epoch": 0.05130334937580925, "percentage": 1.03, "elapsed_time": "0:02:26", "remaining_time": "3:56:09", "throughput": 9728.97, "total_tokens": 1429184} +{"current_steps": 2105, "total_steps": 204665, "loss": 0.2218, "lr": 2.0559925734108563e-07, "epoch": 0.05142550020765641, "percentage": 1.03, "elapsed_time": "0:02:27", "remaining_time": "3:56:08", "throughput": 9727.01, "total_tokens": 1432192} +{"current_steps": 2110, "total_steps": 204665, "loss": 0.0809, "lr": 2.0608784873210534e-07, "epoch": 0.05154765103950358, "percentage": 1.03, "elapsed_time": "0:02:27", "remaining_time": "3:56:07", "throughput": 9726.65, "total_tokens": 1435520} +{"current_steps": 2115, "total_steps": 204665, "loss": 0.1314, "lr": 2.0657644012312502e-07, "epoch": 0.05166980187135074, "percentage": 1.03, "elapsed_time": "0:02:27", "remaining_time": "3:56:07", "throughput": 9726.23, "total_tokens": 1438848} +{"current_steps": 2120, "total_steps": 204665, "loss": 0.1002, "lr": 2.070650315141447e-07, "epoch": 0.05179195270319791, "percentage": 1.04, "elapsed_time": "0:02:28", "remaining_time": "3:56:07", "throughput": 9727.32, "total_tokens": 1442432} +{"current_steps": 2125, "total_steps": 204665, "loss": 0.1973, "lr": 2.075536229051644e-07, "epoch": 0.051914103535045074, "percentage": 1.04, "elapsed_time": "0:02:28", "remaining_time": "3:56:06", "throughput": 9725.95, "total_tokens": 1445568} +{"current_steps": 2130, "total_steps": 204665, "loss": 0.223, "lr": 2.080422142961841e-07, "epoch": 0.052036254366892236, "percentage": 1.04, "elapsed_time": "0:02:28", "remaining_time": "3:56:06", "throughput": 9727.02, "total_tokens": 1449152} +{"current_steps": 2135, "total_steps": 204665, "loss": 0.1381, "lr": 2.0853080568720377e-07, "epoch": 0.052158405198739405, "percentage": 1.04, "elapsed_time": "0:02:29", "remaining_time": "3:56:05", "throughput": 9725.38, "total_tokens": 1452224} +{"current_steps": 2140, "total_steps": 204665, "loss": 0.1132, "lr": 2.0901939707822348e-07, "epoch": 0.05228055603058657, "percentage": 1.05, "elapsed_time": "0:02:29", "remaining_time": "3:56:05", "throughput": 9727.74, "total_tokens": 1456064} +{"current_steps": 2145, "total_steps": 204665, "loss": 0.1221, "lr": 2.0950798846924316e-07, "epoch": 0.052402706862433736, "percentage": 1.05, "elapsed_time": "0:02:30", "remaining_time": "3:56:05", "throughput": 9728.76, "total_tokens": 1459648} +{"current_steps": 2150, "total_steps": 204665, "loss": 0.1435, "lr": 2.0999657986026284e-07, "epoch": 0.0525248576942809, "percentage": 1.05, "elapsed_time": "0:02:30", "remaining_time": "3:56:05", "throughput": 9730.21, "total_tokens": 1463296} +{"current_steps": 2155, "total_steps": 204665, "loss": 0.0786, "lr": 2.1048517125128255e-07, "epoch": 0.05264700852612806, "percentage": 1.05, "elapsed_time": "0:02:30", "remaining_time": "3:56:06", "throughput": 9729.51, "total_tokens": 1466752} +{"current_steps": 2160, "total_steps": 204665, "loss": 0.2243, "lr": 2.1097376264230223e-07, "epoch": 0.05276915935797523, "percentage": 1.06, "elapsed_time": "0:02:31", "remaining_time": "3:56:04", "throughput": 9727.76, "total_tokens": 1469760} +{"current_steps": 2165, "total_steps": 204665, "loss": 0.2312, "lr": 2.114623540333219e-07, "epoch": 0.05289131018982239, "percentage": 1.06, "elapsed_time": "0:02:31", "remaining_time": "3:56:04", "throughput": 9729.24, "total_tokens": 1473408} +{"current_steps": 2170, "total_steps": 204665, "loss": 0.1557, "lr": 2.1195094542434162e-07, "epoch": 0.05301346102166956, "percentage": 1.06, "elapsed_time": "0:02:31", "remaining_time": "3:56:04", "throughput": 9728.86, "total_tokens": 1476736} +{"current_steps": 2175, "total_steps": 204665, "loss": 0.1821, "lr": 2.124395368153613e-07, "epoch": 0.05313561185351672, "percentage": 1.06, "elapsed_time": "0:02:32", "remaining_time": "3:56:03", "throughput": 9727.77, "total_tokens": 1479936} +{"current_steps": 2180, "total_steps": 204665, "loss": 0.1161, "lr": 2.1292812820638098e-07, "epoch": 0.053257762685363884, "percentage": 1.07, "elapsed_time": "0:02:32", "remaining_time": "3:56:03", "throughput": 9727.74, "total_tokens": 1483328} +{"current_steps": 2185, "total_steps": 204665, "loss": 0.1854, "lr": 2.134167195974007e-07, "epoch": 0.05337991351721105, "percentage": 1.07, "elapsed_time": "0:02:32", "remaining_time": "3:56:02", "throughput": 9726.88, "total_tokens": 1486592} +{"current_steps": 2190, "total_steps": 204665, "loss": 0.0968, "lr": 2.1390531098842037e-07, "epoch": 0.053502064349058215, "percentage": 1.07, "elapsed_time": "0:02:33", "remaining_time": "3:56:01", "throughput": 9725.97, "total_tokens": 1489792} +{"current_steps": 2195, "total_steps": 204665, "loss": 0.1555, "lr": 2.1439390237944008e-07, "epoch": 0.053624215180905384, "percentage": 1.07, "elapsed_time": "0:02:33", "remaining_time": "3:56:01", "throughput": 9724.5, "total_tokens": 1492928} +{"current_steps": 2200, "total_steps": 204665, "loss": 0.1967, "lr": 2.1488249377045976e-07, "epoch": 0.053746366012752546, "percentage": 1.07, "elapsed_time": "0:02:33", "remaining_time": "3:56:01", "throughput": 9725.5, "total_tokens": 1496512} +{"current_steps": 2205, "total_steps": 204665, "loss": 0.1324, "lr": 2.1537108516147944e-07, "epoch": 0.053868516844599715, "percentage": 1.08, "elapsed_time": "0:02:34", "remaining_time": "3:56:00", "throughput": 9725.41, "total_tokens": 1499840} +{"current_steps": 2210, "total_steps": 204665, "loss": 0.1308, "lr": 2.1585967655249915e-07, "epoch": 0.05399066767644688, "percentage": 1.08, "elapsed_time": "0:02:34", "remaining_time": "3:55:59", "throughput": 9724.16, "total_tokens": 1502976} +{"current_steps": 2215, "total_steps": 204665, "loss": 0.24, "lr": 2.1634826794351883e-07, "epoch": 0.05411281850829404, "percentage": 1.08, "elapsed_time": "0:02:34", "remaining_time": "3:55:58", "throughput": 9724.57, "total_tokens": 1506432} +{"current_steps": 2220, "total_steps": 204665, "loss": 0.2346, "lr": 2.168368593345385e-07, "epoch": 0.05423496934014121, "percentage": 1.08, "elapsed_time": "0:02:35", "remaining_time": "3:55:58", "throughput": 9724.92, "total_tokens": 1509888} +{"current_steps": 2225, "total_steps": 204665, "loss": 0.2164, "lr": 2.1732545072555822e-07, "epoch": 0.05435712017198837, "percentage": 1.09, "elapsed_time": "0:02:35", "remaining_time": "3:55:57", "throughput": 9723.66, "total_tokens": 1513024} +{"current_steps": 2230, "total_steps": 204665, "loss": 0.2196, "lr": 2.178140421165779e-07, "epoch": 0.05447927100383554, "percentage": 1.09, "elapsed_time": "0:02:35", "remaining_time": "3:55:57", "throughput": 9724.28, "total_tokens": 1516544} +{"current_steps": 2235, "total_steps": 204665, "loss": 0.1195, "lr": 2.1830263350759758e-07, "epoch": 0.0546014218356827, "percentage": 1.09, "elapsed_time": "0:02:36", "remaining_time": "3:55:58", "throughput": 9727.51, "total_tokens": 1520576} +{"current_steps": 2240, "total_steps": 204665, "loss": 0.1513, "lr": 2.187912248986173e-07, "epoch": 0.05472357266752986, "percentage": 1.09, "elapsed_time": "0:02:36", "remaining_time": "3:55:56", "throughput": 9726.31, "total_tokens": 1523712} +{"current_steps": 2245, "total_steps": 204665, "loss": 0.0863, "lr": 2.1927981628963697e-07, "epoch": 0.05484572349937703, "percentage": 1.1, "elapsed_time": "0:02:37", "remaining_time": "3:55:57", "throughput": 9729.86, "total_tokens": 1527808} +{"current_steps": 2250, "total_steps": 204665, "loss": 0.232, "lr": 2.1976840768065665e-07, "epoch": 0.054967874331224194, "percentage": 1.1, "elapsed_time": "0:02:37", "remaining_time": "3:55:57", "throughput": 9730.19, "total_tokens": 1531264} +{"current_steps": 2255, "total_steps": 204665, "loss": 0.1553, "lr": 2.2025699907167636e-07, "epoch": 0.05509002516307136, "percentage": 1.1, "elapsed_time": "0:02:37", "remaining_time": "3:55:57", "throughput": 9731.38, "total_tokens": 1534912} +{"current_steps": 2260, "total_steps": 204665, "loss": 0.14, "lr": 2.2074559046269604e-07, "epoch": 0.055212175994918525, "percentage": 1.1, "elapsed_time": "0:02:38", "remaining_time": "3:55:56", "throughput": 9730.93, "total_tokens": 1538176} +{"current_steps": 2265, "total_steps": 204665, "loss": 0.1312, "lr": 2.2123418185371572e-07, "epoch": 0.05533432682676569, "percentage": 1.11, "elapsed_time": "0:02:38", "remaining_time": "3:55:56", "throughput": 9732.05, "total_tokens": 1541760} +{"current_steps": 2270, "total_steps": 204665, "loss": 0.2186, "lr": 2.2172277324473543e-07, "epoch": 0.055456477658612856, "percentage": 1.11, "elapsed_time": "0:02:38", "remaining_time": "3:55:55", "throughput": 9732.22, "total_tokens": 1545152} +{"current_steps": 2275, "total_steps": 204665, "loss": 0.1079, "lr": 2.222113646357551e-07, "epoch": 0.05557862849046002, "percentage": 1.11, "elapsed_time": "0:02:39", "remaining_time": "3:55:54", "throughput": 9730.03, "total_tokens": 1548096} +{"current_steps": 2280, "total_steps": 204665, "loss": 0.0889, "lr": 2.226999560267748e-07, "epoch": 0.05570077932230719, "percentage": 1.11, "elapsed_time": "0:02:39", "remaining_time": "3:55:53", "throughput": 9728.58, "total_tokens": 1551168} +{"current_steps": 2285, "total_steps": 204665, "loss": 0.2402, "lr": 2.231885474177945e-07, "epoch": 0.05582293015415435, "percentage": 1.12, "elapsed_time": "0:02:39", "remaining_time": "3:55:52", "throughput": 9727.97, "total_tokens": 1554432} +{"current_steps": 2290, "total_steps": 204665, "loss": 0.1479, "lr": 2.2367713880881418e-07, "epoch": 0.05594508098600152, "percentage": 1.12, "elapsed_time": "0:02:40", "remaining_time": "3:55:51", "throughput": 9726.8, "total_tokens": 1557568} +{"current_steps": 2295, "total_steps": 204665, "loss": 0.1292, "lr": 2.2416573019983386e-07, "epoch": 0.05606723181784868, "percentage": 1.12, "elapsed_time": "0:02:40", "remaining_time": "3:55:51", "throughput": 9727.79, "total_tokens": 1561152} +{"current_steps": 2300, "total_steps": 204665, "loss": 0.1956, "lr": 2.2465432159085357e-07, "epoch": 0.05618938264969584, "percentage": 1.12, "elapsed_time": "0:02:40", "remaining_time": "3:55:49", "throughput": 9725.97, "total_tokens": 1564160} +{"current_steps": 2305, "total_steps": 204665, "loss": 0.101, "lr": 2.2514291298187325e-07, "epoch": 0.05631153348154301, "percentage": 1.13, "elapsed_time": "0:02:41", "remaining_time": "3:55:49", "throughput": 9726.35, "total_tokens": 1567616} +{"current_steps": 2310, "total_steps": 204665, "loss": 0.1142, "lr": 2.2563150437289293e-07, "epoch": 0.05643368431339017, "percentage": 1.13, "elapsed_time": "0:02:41", "remaining_time": "3:55:49", "throughput": 9726.0, "total_tokens": 1570944} +{"current_steps": 2315, "total_steps": 204665, "loss": 0.1082, "lr": 2.2612009576391264e-07, "epoch": 0.05655583514523734, "percentage": 1.13, "elapsed_time": "0:02:41", "remaining_time": "3:55:48", "throughput": 9725.4, "total_tokens": 1574208} +{"current_steps": 2320, "total_steps": 204665, "loss": 0.1785, "lr": 2.2660868715493232e-07, "epoch": 0.056677985977084504, "percentage": 1.13, "elapsed_time": "0:02:42", "remaining_time": "3:55:47", "throughput": 9725.8, "total_tokens": 1577664} +{"current_steps": 2325, "total_steps": 204665, "loss": 0.1554, "lr": 2.27097278545952e-07, "epoch": 0.056800136808931666, "percentage": 1.14, "elapsed_time": "0:02:42", "remaining_time": "3:55:47", "throughput": 9725.75, "total_tokens": 1580992} +{"current_steps": 2330, "total_steps": 204665, "loss": 0.1593, "lr": 2.275858699369717e-07, "epoch": 0.056922287640778835, "percentage": 1.14, "elapsed_time": "0:02:42", "remaining_time": "3:55:45", "throughput": 9723.12, "total_tokens": 1583808} +{"current_steps": 2335, "total_steps": 204665, "loss": 0.1516, "lr": 2.280744613279914e-07, "epoch": 0.057044438472626, "percentage": 1.14, "elapsed_time": "0:02:43", "remaining_time": "3:55:44", "throughput": 9722.92, "total_tokens": 1587136} +{"current_steps": 2340, "total_steps": 204665, "loss": 0.0974, "lr": 2.2856305271901107e-07, "epoch": 0.057166589304473166, "percentage": 1.14, "elapsed_time": "0:02:43", "remaining_time": "3:55:43", "throughput": 9723.48, "total_tokens": 1590592} +{"current_steps": 2345, "total_steps": 204665, "loss": 0.1008, "lr": 2.2905164411003078e-07, "epoch": 0.05728874013632033, "percentage": 1.15, "elapsed_time": "0:02:43", "remaining_time": "3:55:43", "throughput": 9723.62, "total_tokens": 1593984} +{"current_steps": 2350, "total_steps": 204665, "loss": 0.1619, "lr": 2.2954023550105044e-07, "epoch": 0.05741089096816749, "percentage": 1.15, "elapsed_time": "0:02:44", "remaining_time": "3:55:42", "throughput": 9723.04, "total_tokens": 1597248} +{"current_steps": 2355, "total_steps": 204665, "loss": 0.0828, "lr": 2.3002882689207014e-07, "epoch": 0.05753304180001466, "percentage": 1.15, "elapsed_time": "0:02:44", "remaining_time": "3:55:41", "throughput": 9722.61, "total_tokens": 1600512} +{"current_steps": 2360, "total_steps": 204665, "loss": 0.229, "lr": 2.3051741828308985e-07, "epoch": 0.05765519263186182, "percentage": 1.15, "elapsed_time": "0:02:44", "remaining_time": "3:55:40", "throughput": 9721.01, "total_tokens": 1603520} +{"current_steps": 2365, "total_steps": 204665, "loss": 0.2023, "lr": 2.310060096741095e-07, "epoch": 0.05777734346370899, "percentage": 1.16, "elapsed_time": "0:02:45", "remaining_time": "3:55:39", "throughput": 9720.66, "total_tokens": 1606848} +{"current_steps": 2370, "total_steps": 204665, "loss": 0.1168, "lr": 2.3149460106512921e-07, "epoch": 0.05789949429555615, "percentage": 1.16, "elapsed_time": "0:02:45", "remaining_time": "3:55:40", "throughput": 9722.9, "total_tokens": 1610688} +{"current_steps": 2375, "total_steps": 204665, "loss": 0.0904, "lr": 2.3198319245614892e-07, "epoch": 0.05802164512740332, "percentage": 1.16, "elapsed_time": "0:02:46", "remaining_time": "3:55:40", "throughput": 9723.98, "total_tokens": 1614336} +{"current_steps": 2380, "total_steps": 204665, "loss": 0.1832, "lr": 2.3247178384716858e-07, "epoch": 0.05814379595925048, "percentage": 1.16, "elapsed_time": "0:02:46", "remaining_time": "3:55:38", "throughput": 9721.8, "total_tokens": 1617216} +{"current_steps": 2385, "total_steps": 204665, "loss": 0.157, "lr": 2.3296037523818829e-07, "epoch": 0.058265946791097645, "percentage": 1.17, "elapsed_time": "0:02:46", "remaining_time": "3:55:38", "throughput": 9721.42, "total_tokens": 1620544} +{"current_steps": 2390, "total_steps": 204665, "loss": 0.1079, "lr": 2.33448966629208e-07, "epoch": 0.058388097622944814, "percentage": 1.17, "elapsed_time": "0:02:47", "remaining_time": "3:55:37", "throughput": 9720.28, "total_tokens": 1623680} +{"current_steps": 2395, "total_steps": 204665, "loss": 0.1297, "lr": 2.3393755802022765e-07, "epoch": 0.058510248454791976, "percentage": 1.17, "elapsed_time": "0:02:47", "remaining_time": "3:55:36", "throughput": 9719.14, "total_tokens": 1626816} +{"current_steps": 2400, "total_steps": 204665, "loss": 0.2271, "lr": 2.3442614941124736e-07, "epoch": 0.058632399286639145, "percentage": 1.17, "elapsed_time": "0:02:47", "remaining_time": "3:55:35", "throughput": 9719.88, "total_tokens": 1630336} +{"current_steps": 2405, "total_steps": 204665, "loss": 0.1235, "lr": 2.3491474080226706e-07, "epoch": 0.05875455011848631, "percentage": 1.18, "elapsed_time": "0:02:48", "remaining_time": "3:55:36", "throughput": 9722.49, "total_tokens": 1634304} +{"current_steps": 2410, "total_steps": 204665, "loss": 0.1444, "lr": 2.3540333219328672e-07, "epoch": 0.05887670095033347, "percentage": 1.18, "elapsed_time": "0:02:48", "remaining_time": "3:55:36", "throughput": 9722.52, "total_tokens": 1637760} +{"current_steps": 2415, "total_steps": 204665, "loss": 0.1087, "lr": 2.3589192358430643e-07, "epoch": 0.05899885178218064, "percentage": 1.18, "elapsed_time": "0:02:48", "remaining_time": "3:55:35", "throughput": 9721.52, "total_tokens": 1640896} +{"current_steps": 2420, "total_steps": 204665, "loss": 0.1224, "lr": 2.3638051497532613e-07, "epoch": 0.0591210026140278, "percentage": 1.18, "elapsed_time": "0:02:49", "remaining_time": "3:55:35", "throughput": 9721.77, "total_tokens": 1644352} +{"current_steps": 2425, "total_steps": 204665, "loss": 0.2265, "lr": 2.3686910636634582e-07, "epoch": 0.05924315344587497, "percentage": 1.18, "elapsed_time": "0:02:49", "remaining_time": "3:55:36", "throughput": 9724.45, "total_tokens": 1648320} +{"current_steps": 2430, "total_steps": 204665, "loss": 0.2262, "lr": 2.373576977573655e-07, "epoch": 0.05936530427772213, "percentage": 1.19, "elapsed_time": "0:02:49", "remaining_time": "3:55:35", "throughput": 9724.62, "total_tokens": 1651712} +{"current_steps": 2435, "total_steps": 204665, "loss": 0.1233, "lr": 2.378462891483852e-07, "epoch": 0.05948745510956929, "percentage": 1.19, "elapsed_time": "0:02:50", "remaining_time": "3:55:34", "throughput": 9723.54, "total_tokens": 1654848} +{"current_steps": 2440, "total_steps": 204665, "loss": 0.187, "lr": 2.3833488053940489e-07, "epoch": 0.05960960594141646, "percentage": 1.19, "elapsed_time": "0:02:50", "remaining_time": "3:55:33", "throughput": 9723.66, "total_tokens": 1658240} +{"current_steps": 2445, "total_steps": 204665, "loss": 0.1262, "lr": 2.3882347193042457e-07, "epoch": 0.059731756773263624, "percentage": 1.19, "elapsed_time": "0:02:50", "remaining_time": "3:55:33", "throughput": 9722.68, "total_tokens": 1661440} +{"current_steps": 2450, "total_steps": 204665, "loss": 0.0881, "lr": 2.3931206332144425e-07, "epoch": 0.05985390760511079, "percentage": 1.2, "elapsed_time": "0:02:51", "remaining_time": "3:55:32", "throughput": 9722.09, "total_tokens": 1664704} +{"current_steps": 2455, "total_steps": 204665, "loss": 0.1146, "lr": 2.39800654712464e-07, "epoch": 0.059976058436957955, "percentage": 1.2, "elapsed_time": "0:02:51", "remaining_time": "3:55:32", "throughput": 9723.41, "total_tokens": 1668352} +{"current_steps": 2460, "total_steps": 204665, "loss": 0.198, "lr": 2.4028924610348366e-07, "epoch": 0.060098209268805124, "percentage": 1.2, "elapsed_time": "0:02:51", "remaining_time": "3:55:32", "throughput": 9725.85, "total_tokens": 1672256} +{"current_steps": 2465, "total_steps": 204665, "loss": 0.2135, "lr": 2.4077783749450335e-07, "epoch": 0.060220360100652286, "percentage": 1.2, "elapsed_time": "0:02:52", "remaining_time": "3:55:30", "throughput": 9723.19, "total_tokens": 1675008} +{"current_steps": 2470, "total_steps": 204665, "loss": 0.1464, "lr": 2.41266428885523e-07, "epoch": 0.06034251093249945, "percentage": 1.21, "elapsed_time": "0:02:52", "remaining_time": "3:55:30", "throughput": 9723.9, "total_tokens": 1678528} +{"current_steps": 2475, "total_steps": 204665, "loss": 0.1469, "lr": 2.417550202765427e-07, "epoch": 0.06046466176434662, "percentage": 1.21, "elapsed_time": "0:02:52", "remaining_time": "3:55:29", "throughput": 9722.38, "total_tokens": 1681536} +{"current_steps": 2480, "total_steps": 204665, "loss": 0.2015, "lr": 2.422436116675624e-07, "epoch": 0.06058681259619378, "percentage": 1.21, "elapsed_time": "0:02:53", "remaining_time": "3:55:28", "throughput": 9720.72, "total_tokens": 1684544} +{"current_steps": 2485, "total_steps": 204665, "loss": 0.2508, "lr": 2.427322030585821e-07, "epoch": 0.06070896342804095, "percentage": 1.21, "elapsed_time": "0:02:53", "remaining_time": "3:55:27", "throughput": 9721.05, "total_tokens": 1688000} +{"current_steps": 2490, "total_steps": 204665, "loss": 0.1095, "lr": 2.432207944496018e-07, "epoch": 0.06083111425988811, "percentage": 1.22, "elapsed_time": "0:02:53", "remaining_time": "3:55:27", "throughput": 9720.36, "total_tokens": 1691264} +{"current_steps": 2495, "total_steps": 204665, "loss": 0.0887, "lr": 2.437093858406215e-07, "epoch": 0.06095326509173527, "percentage": 1.22, "elapsed_time": "0:02:54", "remaining_time": "3:55:26", "throughput": 9720.35, "total_tokens": 1694656} +{"current_steps": 2500, "total_steps": 204665, "loss": 0.1181, "lr": 2.4419797723164117e-07, "epoch": 0.06107541592358244, "percentage": 1.22, "elapsed_time": "0:02:54", "remaining_time": "3:55:26", "throughput": 9721.49, "total_tokens": 1698304} +{"current_steps": 2505, "total_steps": 204665, "loss": 0.1827, "lr": 2.4468656862266085e-07, "epoch": 0.0611975667554296, "percentage": 1.22, "elapsed_time": "0:02:55", "remaining_time": "3:55:25", "throughput": 9720.15, "total_tokens": 1701376} +{"current_steps": 2510, "total_steps": 204665, "loss": 0.1661, "lr": 2.4517516001368053e-07, "epoch": 0.06131971758727677, "percentage": 1.23, "elapsed_time": "0:02:55", "remaining_time": "3:55:24", "throughput": 9718.27, "total_tokens": 1704320} +{"current_steps": 2515, "total_steps": 204665, "loss": 0.0947, "lr": 2.4566375140470026e-07, "epoch": 0.061441868419123934, "percentage": 1.23, "elapsed_time": "0:02:55", "remaining_time": "3:55:24", "throughput": 9718.81, "total_tokens": 1707840} +{"current_steps": 2520, "total_steps": 204665, "loss": 0.0512, "lr": 2.4615234279571995e-07, "epoch": 0.061564019250971096, "percentage": 1.23, "elapsed_time": "0:02:56", "remaining_time": "3:55:24", "throughput": 9718.8, "total_tokens": 1711232} +{"current_steps": 2525, "total_steps": 204665, "loss": 0.1004, "lr": 2.4664093418673963e-07, "epoch": 0.061686170082818265, "percentage": 1.23, "elapsed_time": "0:02:56", "remaining_time": "3:55:24", "throughput": 9720.27, "total_tokens": 1714944} +{"current_steps": 2530, "total_steps": 204665, "loss": 0.0867, "lr": 2.471295255777593e-07, "epoch": 0.06180832091466543, "percentage": 1.24, "elapsed_time": "0:02:56", "remaining_time": "3:55:23", "throughput": 9720.4, "total_tokens": 1718336} +{"current_steps": 2535, "total_steps": 204665, "loss": 0.1462, "lr": 2.47618116968779e-07, "epoch": 0.061930471746512596, "percentage": 1.24, "elapsed_time": "0:02:57", "remaining_time": "3:55:22", "throughput": 9719.61, "total_tokens": 1721536} +{"current_steps": 2540, "total_steps": 204665, "loss": 0.1976, "lr": 2.4810670835979867e-07, "epoch": 0.06205262257835976, "percentage": 1.24, "elapsed_time": "0:02:57", "remaining_time": "3:55:22", "throughput": 9719.56, "total_tokens": 1724928} +{"current_steps": 2545, "total_steps": 204665, "loss": 0.1417, "lr": 2.485952997508184e-07, "epoch": 0.06217477341020693, "percentage": 1.24, "elapsed_time": "0:02:57", "remaining_time": "3:55:21", "throughput": 9718.47, "total_tokens": 1728064} +{"current_steps": 2550, "total_steps": 204665, "loss": 0.142, "lr": 2.4908389114183803e-07, "epoch": 0.06229692424205409, "percentage": 1.25, "elapsed_time": "0:02:58", "remaining_time": "3:55:21", "throughput": 9718.1, "total_tokens": 1731392} +{"current_steps": 2555, "total_steps": 204665, "loss": 0.1403, "lr": 2.4957248253285777e-07, "epoch": 0.06241907507390125, "percentage": 1.25, "elapsed_time": "0:02:58", "remaining_time": "3:55:21", "throughput": 9718.63, "total_tokens": 1734912} +{"current_steps": 2560, "total_steps": 204665, "loss": 0.138, "lr": 2.5006107392387745e-07, "epoch": 0.06254122590574841, "percentage": 1.25, "elapsed_time": "0:02:58", "remaining_time": "3:55:20", "throughput": 9717.34, "total_tokens": 1737984} +{"current_steps": 2565, "total_steps": 204665, "loss": 0.2083, "lr": 2.5054966531489713e-07, "epoch": 0.06266337673759559, "percentage": 1.25, "elapsed_time": "0:02:59", "remaining_time": "3:55:19", "throughput": 9718.01, "total_tokens": 1741504} +{"current_steps": 2570, "total_steps": 204665, "loss": 0.1017, "lr": 2.510382567059168e-07, "epoch": 0.06278552756944275, "percentage": 1.26, "elapsed_time": "0:02:59", "remaining_time": "3:55:19", "throughput": 9719.34, "total_tokens": 1745152} +{"current_steps": 2575, "total_steps": 204665, "loss": 0.1951, "lr": 2.5152684809693655e-07, "epoch": 0.06290767840128991, "percentage": 1.26, "elapsed_time": "0:02:59", "remaining_time": "3:55:18", "throughput": 9719.1, "total_tokens": 1748480} +{"current_steps": 2580, "total_steps": 204665, "loss": 0.1033, "lr": 2.5201543948795623e-07, "epoch": 0.06302982923313707, "percentage": 1.26, "elapsed_time": "0:03:00", "remaining_time": "3:55:18", "throughput": 9719.46, "total_tokens": 1751936} +{"current_steps": 2585, "total_steps": 204665, "loss": 0.257, "lr": 2.525040308789759e-07, "epoch": 0.06315198006498424, "percentage": 1.26, "elapsed_time": "0:03:00", "remaining_time": "3:55:17", "throughput": 9719.09, "total_tokens": 1755200} +{"current_steps": 2590, "total_steps": 204665, "loss": 0.1453, "lr": 2.529926222699956e-07, "epoch": 0.06327413089683141, "percentage": 1.27, "elapsed_time": "0:03:00", "remaining_time": "3:55:17", "throughput": 9719.46, "total_tokens": 1758656} +{"current_steps": 2595, "total_steps": 204665, "loss": 0.1116, "lr": 2.5348121366101527e-07, "epoch": 0.06339628172867857, "percentage": 1.27, "elapsed_time": "0:03:01", "remaining_time": "3:55:16", "throughput": 9717.85, "total_tokens": 1761664} +{"current_steps": 2600, "total_steps": 204665, "loss": 0.0599, "lr": 2.5396980505203495e-07, "epoch": 0.06351843256052574, "percentage": 1.27, "elapsed_time": "0:03:01", "remaining_time": "3:55:15", "throughput": 9717.51, "total_tokens": 1764992} +{"current_steps": 2605, "total_steps": 204665, "loss": 0.0763, "lr": 2.544583964430547e-07, "epoch": 0.0636405833923729, "percentage": 1.27, "elapsed_time": "0:03:01", "remaining_time": "3:55:15", "throughput": 9718.05, "total_tokens": 1768512} +{"current_steps": 2610, "total_steps": 204665, "loss": 0.1781, "lr": 2.5494698783407437e-07, "epoch": 0.06376273422422006, "percentage": 1.28, "elapsed_time": "0:03:02", "remaining_time": "3:55:15", "throughput": 9717.52, "total_tokens": 1771776} +{"current_steps": 2615, "total_steps": 204665, "loss": 0.1103, "lr": 2.5543557922509405e-07, "epoch": 0.06388488505606724, "percentage": 1.28, "elapsed_time": "0:03:02", "remaining_time": "3:55:15", "throughput": 9718.9, "total_tokens": 1775488} +{"current_steps": 2620, "total_steps": 204665, "loss": 0.1475, "lr": 2.5592417061611373e-07, "epoch": 0.0640070358879144, "percentage": 1.28, "elapsed_time": "0:03:03", "remaining_time": "3:55:14", "throughput": 9718.91, "total_tokens": 1778880} +{"current_steps": 2625, "total_steps": 204665, "loss": 0.1251, "lr": 2.564127620071334e-07, "epoch": 0.06412918671976156, "percentage": 1.28, "elapsed_time": "0:03:03", "remaining_time": "3:55:15", "throughput": 9721.37, "total_tokens": 1782848} +{"current_steps": 2630, "total_steps": 204665, "loss": 0.1095, "lr": 2.569013533981531e-07, "epoch": 0.06425133755160872, "percentage": 1.29, "elapsed_time": "0:03:03", "remaining_time": "3:55:14", "throughput": 9721.52, "total_tokens": 1786240} +{"current_steps": 2635, "total_steps": 204665, "loss": 0.1151, "lr": 2.5738994478917283e-07, "epoch": 0.06437348838345588, "percentage": 1.29, "elapsed_time": "0:03:04", "remaining_time": "3:55:14", "throughput": 9722.05, "total_tokens": 1789760} +{"current_steps": 2640, "total_steps": 204665, "loss": 0.0562, "lr": 2.578785361801925e-07, "epoch": 0.06449563921530306, "percentage": 1.29, "elapsed_time": "0:03:04", "remaining_time": "3:55:15", "throughput": 9723.61, "total_tokens": 1793536} +{"current_steps": 2645, "total_steps": 204665, "loss": 0.1524, "lr": 2.583671275712122e-07, "epoch": 0.06461779004715022, "percentage": 1.29, "elapsed_time": "0:03:04", "remaining_time": "3:55:14", "throughput": 9723.73, "total_tokens": 1796928} +{"current_steps": 2650, "total_steps": 204665, "loss": 0.2607, "lr": 2.5885571896223187e-07, "epoch": 0.06473994087899738, "percentage": 1.29, "elapsed_time": "0:03:05", "remaining_time": "3:55:13", "throughput": 9724.2, "total_tokens": 1800384} +{"current_steps": 2655, "total_steps": 204665, "loss": 0.1124, "lr": 2.5934431035325155e-07, "epoch": 0.06486209171084455, "percentage": 1.3, "elapsed_time": "0:03:05", "remaining_time": "3:55:13", "throughput": 9723.66, "total_tokens": 1803648} +{"current_steps": 2660, "total_steps": 204665, "loss": 0.1881, "lr": 2.5983290174427123e-07, "epoch": 0.06498424254269172, "percentage": 1.3, "elapsed_time": "0:03:05", "remaining_time": "3:55:12", "throughput": 9722.72, "total_tokens": 1806784} +{"current_steps": 2665, "total_steps": 204665, "loss": 0.3077, "lr": 2.6032149313529097e-07, "epoch": 0.06510639337453888, "percentage": 1.3, "elapsed_time": "0:03:06", "remaining_time": "3:55:11", "throughput": 9720.8, "total_tokens": 1809728} +{"current_steps": 2670, "total_steps": 204665, "loss": 0.1816, "lr": 2.6081008452631065e-07, "epoch": 0.06522854420638605, "percentage": 1.3, "elapsed_time": "0:03:06", "remaining_time": "3:55:10", "throughput": 9720.58, "total_tokens": 1813056} +{"current_steps": 2675, "total_steps": 204665, "loss": 0.1059, "lr": 2.6129867591733033e-07, "epoch": 0.06535069503823321, "percentage": 1.31, "elapsed_time": "0:03:06", "remaining_time": "3:55:10", "throughput": 9720.54, "total_tokens": 1816448} +{"current_steps": 2680, "total_steps": 204665, "loss": 0.08, "lr": 2.6178726730835e-07, "epoch": 0.06547284587008037, "percentage": 1.31, "elapsed_time": "0:03:07", "remaining_time": "3:55:09", "throughput": 9720.0, "total_tokens": 1819712} +{"current_steps": 2685, "total_steps": 204665, "loss": 0.2463, "lr": 2.622758586993697e-07, "epoch": 0.06559499670192755, "percentage": 1.31, "elapsed_time": "0:03:07", "remaining_time": "3:55:08", "throughput": 9718.87, "total_tokens": 1822784} +{"current_steps": 2690, "total_steps": 204665, "loss": 0.1522, "lr": 2.627644500903894e-07, "epoch": 0.06571714753377471, "percentage": 1.31, "elapsed_time": "0:03:07", "remaining_time": "3:55:08", "throughput": 9718.34, "total_tokens": 1826048} +{"current_steps": 2695, "total_steps": 204665, "loss": 0.1671, "lr": 2.632530414814091e-07, "epoch": 0.06583929836562187, "percentage": 1.32, "elapsed_time": "0:03:08", "remaining_time": "3:55:06", "throughput": 9716.76, "total_tokens": 1829056} +{"current_steps": 2700, "total_steps": 204665, "loss": 0.1142, "lr": 2.637416328724288e-07, "epoch": 0.06596144919746903, "percentage": 1.32, "elapsed_time": "0:03:08", "remaining_time": "3:55:06", "throughput": 9717.43, "total_tokens": 1832576} +{"current_steps": 2705, "total_steps": 204665, "loss": 0.2161, "lr": 2.6423022426344847e-07, "epoch": 0.0660836000293162, "percentage": 1.32, "elapsed_time": "0:03:08", "remaining_time": "3:55:06", "throughput": 9718.56, "total_tokens": 1836224} +{"current_steps": 2710, "total_steps": 204665, "loss": 0.0834, "lr": 2.6471881565446815e-07, "epoch": 0.06620575086116337, "percentage": 1.32, "elapsed_time": "0:03:09", "remaining_time": "3:55:05", "throughput": 9717.85, "total_tokens": 1839424} +{"current_steps": 2715, "total_steps": 204665, "loss": 0.1834, "lr": 2.6520740704548783e-07, "epoch": 0.06632790169301053, "percentage": 1.33, "elapsed_time": "0:03:09", "remaining_time": "3:55:04", "throughput": 9716.29, "total_tokens": 1842432} +{"current_steps": 2720, "total_steps": 204665, "loss": 0.1845, "lr": 2.656959984365075e-07, "epoch": 0.0664500525248577, "percentage": 1.33, "elapsed_time": "0:03:09", "remaining_time": "3:55:04", "throughput": 9716.93, "total_tokens": 1845952} +{"current_steps": 2725, "total_steps": 204665, "loss": 0.1038, "lr": 2.6618458982752725e-07, "epoch": 0.06657220335670486, "percentage": 1.33, "elapsed_time": "0:03:10", "remaining_time": "3:55:04", "throughput": 9717.88, "total_tokens": 1849600} +{"current_steps": 2730, "total_steps": 204665, "loss": 0.139, "lr": 2.6667318121854693e-07, "epoch": 0.06669435418855202, "percentage": 1.33, "elapsed_time": "0:03:10", "remaining_time": "3:55:04", "throughput": 9717.91, "total_tokens": 1852992} +{"current_steps": 2735, "total_steps": 204665, "loss": 0.1293, "lr": 2.671617726095666e-07, "epoch": 0.0668165050203992, "percentage": 1.34, "elapsed_time": "0:03:11", "remaining_time": "3:55:02", "throughput": 9715.92, "total_tokens": 1855872} +{"current_steps": 2740, "total_steps": 204665, "loss": 0.1518, "lr": 2.676503640005863e-07, "epoch": 0.06693865585224636, "percentage": 1.34, "elapsed_time": "0:03:11", "remaining_time": "3:55:02", "throughput": 9715.75, "total_tokens": 1859200} +{"current_steps": 2745, "total_steps": 204665, "loss": 0.1194, "lr": 2.68138955391606e-07, "epoch": 0.06706080668409352, "percentage": 1.34, "elapsed_time": "0:03:11", "remaining_time": "3:55:02", "throughput": 9718.32, "total_tokens": 1863168} +{"current_steps": 2750, "total_steps": 204665, "loss": 0.173, "lr": 2.6862754678262566e-07, "epoch": 0.06718295751594068, "percentage": 1.34, "elapsed_time": "0:03:12", "remaining_time": "3:55:01", "throughput": 9716.63, "total_tokens": 1866112} +{"current_steps": 2755, "total_steps": 204665, "loss": 0.1099, "lr": 2.691161381736454e-07, "epoch": 0.06730510834778784, "percentage": 1.35, "elapsed_time": "0:03:12", "remaining_time": "3:55:00", "throughput": 9716.25, "total_tokens": 1869376} +{"current_steps": 2760, "total_steps": 204665, "loss": 0.1761, "lr": 2.6960472956466507e-07, "epoch": 0.06742725917963502, "percentage": 1.35, "elapsed_time": "0:03:12", "remaining_time": "3:55:00", "throughput": 9717.81, "total_tokens": 1873152} +{"current_steps": 2765, "total_steps": 204665, "loss": 0.1735, "lr": 2.7009332095568475e-07, "epoch": 0.06754941001148218, "percentage": 1.35, "elapsed_time": "0:03:13", "remaining_time": "3:55:00", "throughput": 9719.81, "total_tokens": 1876992} +{"current_steps": 2770, "total_steps": 204665, "loss": 0.158, "lr": 2.705819123467045e-07, "epoch": 0.06767156084332934, "percentage": 1.35, "elapsed_time": "0:03:13", "remaining_time": "3:55:00", "throughput": 9719.51, "total_tokens": 1880320} +{"current_steps": 2775, "total_steps": 204665, "loss": 0.0991, "lr": 2.710705037377241e-07, "epoch": 0.0677937116751765, "percentage": 1.36, "elapsed_time": "0:03:13", "remaining_time": "3:54:59", "throughput": 9718.11, "total_tokens": 1883328} +{"current_steps": 2780, "total_steps": 204665, "loss": 0.2085, "lr": 2.715590951287438e-07, "epoch": 0.06791586250702367, "percentage": 1.36, "elapsed_time": "0:03:14", "remaining_time": "3:54:58", "throughput": 9718.43, "total_tokens": 1886784} +{"current_steps": 2785, "total_steps": 204665, "loss": 0.1438, "lr": 2.7204768651976353e-07, "epoch": 0.06803801333887084, "percentage": 1.36, "elapsed_time": "0:03:14", "remaining_time": "3:54:58", "throughput": 9718.53, "total_tokens": 1890176} +{"current_steps": 2790, "total_steps": 204665, "loss": 0.0999, "lr": 2.725362779107832e-07, "epoch": 0.068160164170718, "percentage": 1.36, "elapsed_time": "0:03:14", "remaining_time": "3:54:57", "throughput": 9717.75, "total_tokens": 1893376} +{"current_steps": 2795, "total_steps": 204665, "loss": 0.1893, "lr": 2.730248693018029e-07, "epoch": 0.06828231500256517, "percentage": 1.37, "elapsed_time": "0:03:15", "remaining_time": "3:54:57", "throughput": 9717.9, "total_tokens": 1896768} +{"current_steps": 2800, "total_steps": 204665, "loss": 0.189, "lr": 2.7351346069282263e-07, "epoch": 0.06840446583441233, "percentage": 1.37, "elapsed_time": "0:03:15", "remaining_time": "3:54:56", "throughput": 9716.94, "total_tokens": 1899904} +{"current_steps": 2805, "total_steps": 204665, "loss": 0.1138, "lr": 2.7400205208384226e-07, "epoch": 0.06852661666625949, "percentage": 1.37, "elapsed_time": "0:03:15", "remaining_time": "3:54:56", "throughput": 9717.78, "total_tokens": 1903488} +{"current_steps": 2810, "total_steps": 204665, "loss": 0.1319, "lr": 2.7449064347486194e-07, "epoch": 0.06864876749810667, "percentage": 1.37, "elapsed_time": "0:03:16", "remaining_time": "3:54:56", "throughput": 9719.8, "total_tokens": 1907392} +{"current_steps": 2815, "total_steps": 204665, "loss": 0.1733, "lr": 2.7497923486588167e-07, "epoch": 0.06877091832995383, "percentage": 1.38, "elapsed_time": "0:03:16", "remaining_time": "3:54:56", "throughput": 9720.79, "total_tokens": 1911040} +{"current_steps": 2820, "total_steps": 204665, "loss": 0.1078, "lr": 2.7546782625690135e-07, "epoch": 0.06889306916180099, "percentage": 1.38, "elapsed_time": "0:03:16", "remaining_time": "3:54:55", "throughput": 9719.34, "total_tokens": 1914048} +{"current_steps": 2825, "total_steps": 204665, "loss": 0.1456, "lr": 2.7595641764792103e-07, "epoch": 0.06901521999364815, "percentage": 1.38, "elapsed_time": "0:03:17", "remaining_time": "3:54:54", "throughput": 9718.07, "total_tokens": 1917120} +{"current_steps": 2830, "total_steps": 204665, "loss": 0.238, "lr": 2.7644500903894077e-07, "epoch": 0.06913737082549533, "percentage": 1.38, "elapsed_time": "0:03:17", "remaining_time": "3:54:54", "throughput": 9717.9, "total_tokens": 1920448} +{"current_steps": 2835, "total_steps": 204665, "loss": 0.2612, "lr": 2.769336004299604e-07, "epoch": 0.06925952165734249, "percentage": 1.39, "elapsed_time": "0:03:17", "remaining_time": "3:54:53", "throughput": 9718.61, "total_tokens": 1923968} +{"current_steps": 2840, "total_steps": 204665, "loss": 0.1823, "lr": 2.774221918209801e-07, "epoch": 0.06938167248918965, "percentage": 1.39, "elapsed_time": "0:03:18", "remaining_time": "3:54:52", "throughput": 9716.97, "total_tokens": 1926912} +{"current_steps": 2845, "total_steps": 204665, "loss": 0.0955, "lr": 2.779107832119998e-07, "epoch": 0.06950382332103681, "percentage": 1.39, "elapsed_time": "0:03:18", "remaining_time": "3:54:52", "throughput": 9716.97, "total_tokens": 1930304} +{"current_steps": 2850, "total_steps": 204665, "loss": 0.0827, "lr": 2.783993746030195e-07, "epoch": 0.06962597415288398, "percentage": 1.39, "elapsed_time": "0:03:18", "remaining_time": "3:54:51", "throughput": 9717.11, "total_tokens": 1933696} +{"current_steps": 2855, "total_steps": 204665, "loss": 0.1063, "lr": 2.788879659940392e-07, "epoch": 0.06974812498473115, "percentage": 1.39, "elapsed_time": "0:03:19", "remaining_time": "3:54:51", "throughput": 9718.62, "total_tokens": 1937472} +{"current_steps": 2860, "total_steps": 204665, "loss": 0.1743, "lr": 2.793765573850589e-07, "epoch": 0.06987027581657831, "percentage": 1.4, "elapsed_time": "0:03:19", "remaining_time": "3:54:51", "throughput": 9719.22, "total_tokens": 1940992} +{"current_steps": 2865, "total_steps": 204665, "loss": 0.1996, "lr": 2.7986514877607854e-07, "epoch": 0.06999242664842548, "percentage": 1.4, "elapsed_time": "0:03:20", "remaining_time": "3:54:51", "throughput": 9720.51, "total_tokens": 1944704} +{"current_steps": 2870, "total_steps": 204665, "loss": 0.2116, "lr": 2.803537401670982e-07, "epoch": 0.07011457748027264, "percentage": 1.4, "elapsed_time": "0:03:20", "remaining_time": "3:54:50", "throughput": 9719.56, "total_tokens": 1947840} +{"current_steps": 2875, "total_steps": 204665, "loss": 0.1368, "lr": 2.8084233155811795e-07, "epoch": 0.0702367283121198, "percentage": 1.4, "elapsed_time": "0:03:20", "remaining_time": "3:54:50", "throughput": 9719.27, "total_tokens": 1951168} +{"current_steps": 2880, "total_steps": 204665, "loss": 0.087, "lr": 2.8133092294913764e-07, "epoch": 0.07035887914396698, "percentage": 1.41, "elapsed_time": "0:03:21", "remaining_time": "3:54:50", "throughput": 9719.47, "total_tokens": 1954624} +{"current_steps": 2885, "total_steps": 204665, "loss": 0.1982, "lr": 2.818195143401573e-07, "epoch": 0.07048102997581414, "percentage": 1.41, "elapsed_time": "0:03:21", "remaining_time": "3:54:49", "throughput": 9719.48, "total_tokens": 1958016} +{"current_steps": 2890, "total_steps": 204665, "loss": 0.2, "lr": 2.8230810573117705e-07, "epoch": 0.0706031808076613, "percentage": 1.41, "elapsed_time": "0:03:21", "remaining_time": "3:54:49", "throughput": 9718.48, "total_tokens": 1961152} +{"current_steps": 2895, "total_steps": 204665, "loss": 0.1631, "lr": 2.827966971221967e-07, "epoch": 0.07072533163950846, "percentage": 1.41, "elapsed_time": "0:03:22", "remaining_time": "3:54:48", "throughput": 9717.32, "total_tokens": 1964224} +{"current_steps": 2900, "total_steps": 204665, "loss": 0.1541, "lr": 2.8328528851321636e-07, "epoch": 0.07084748247135562, "percentage": 1.42, "elapsed_time": "0:03:22", "remaining_time": "3:54:46", "throughput": 9715.45, "total_tokens": 1967104} +{"current_steps": 2905, "total_steps": 204665, "loss": 0.1006, "lr": 2.837738799042361e-07, "epoch": 0.0709696333032028, "percentage": 1.42, "elapsed_time": "0:03:22", "remaining_time": "3:54:47", "throughput": 9717.24, "total_tokens": 1970944} +{"current_steps": 2910, "total_steps": 204665, "loss": 0.1888, "lr": 2.842624712952558e-07, "epoch": 0.07109178413504996, "percentage": 1.42, "elapsed_time": "0:03:23", "remaining_time": "3:54:46", "throughput": 9716.09, "total_tokens": 1974016} +{"current_steps": 2915, "total_steps": 204665, "loss": 0.0826, "lr": 2.8475106268627546e-07, "epoch": 0.07121393496689712, "percentage": 1.42, "elapsed_time": "0:03:23", "remaining_time": "3:54:46", "throughput": 9717.09, "total_tokens": 1977664} +{"current_steps": 2920, "total_steps": 204665, "loss": 0.2103, "lr": 2.852396540772952e-07, "epoch": 0.07133608579874429, "percentage": 1.43, "elapsed_time": "0:03:23", "remaining_time": "3:54:45", "throughput": 9716.58, "total_tokens": 1980928} +{"current_steps": 2925, "total_steps": 204665, "loss": 0.1023, "lr": 2.857282454683148e-07, "epoch": 0.07145823663059145, "percentage": 1.43, "elapsed_time": "0:03:24", "remaining_time": "3:54:45", "throughput": 9716.41, "total_tokens": 1984256} +{"current_steps": 2930, "total_steps": 204665, "loss": 0.0925, "lr": 2.862168368593345e-07, "epoch": 0.07158038746243862, "percentage": 1.43, "elapsed_time": "0:03:24", "remaining_time": "3:54:45", "throughput": 9717.33, "total_tokens": 1987904} +{"current_steps": 2935, "total_steps": 204665, "loss": 0.0685, "lr": 2.867054282503542e-07, "epoch": 0.07170253829428579, "percentage": 1.43, "elapsed_time": "0:03:24", "remaining_time": "3:54:44", "throughput": 9716.65, "total_tokens": 1991104} +{"current_steps": 2940, "total_steps": 204665, "loss": 0.1111, "lr": 2.871940196413739e-07, "epoch": 0.07182468912613295, "percentage": 1.44, "elapsed_time": "0:03:25", "remaining_time": "3:54:44", "throughput": 9717.13, "total_tokens": 1994624} +{"current_steps": 2945, "total_steps": 204665, "loss": 0.1573, "lr": 2.876826110323936e-07, "epoch": 0.07194683995798011, "percentage": 1.44, "elapsed_time": "0:03:25", "remaining_time": "3:54:43", "throughput": 9716.15, "total_tokens": 1997696} +{"current_steps": 2950, "total_steps": 204665, "loss": 0.125, "lr": 2.8817120242341333e-07, "epoch": 0.07206899078982727, "percentage": 1.44, "elapsed_time": "0:03:25", "remaining_time": "3:54:42", "throughput": 9715.69, "total_tokens": 2000960} +{"current_steps": 2955, "total_steps": 204665, "loss": 0.2306, "lr": 2.8865979381443296e-07, "epoch": 0.07219114162167445, "percentage": 1.44, "elapsed_time": "0:03:26", "remaining_time": "3:54:41", "throughput": 9715.2, "total_tokens": 2004224} +{"current_steps": 2960, "total_steps": 204665, "loss": 0.2147, "lr": 2.8914838520545264e-07, "epoch": 0.07231329245352161, "percentage": 1.45, "elapsed_time": "0:03:26", "remaining_time": "3:54:41", "throughput": 9715.52, "total_tokens": 2007680} +{"current_steps": 2965, "total_steps": 204665, "loss": 0.249, "lr": 2.896369765964723e-07, "epoch": 0.07243544328536877, "percentage": 1.45, "elapsed_time": "0:03:26", "remaining_time": "3:54:40", "throughput": 9715.48, "total_tokens": 2011008} +{"current_steps": 2970, "total_steps": 204665, "loss": 0.183, "lr": 2.9012556798749206e-07, "epoch": 0.07255759411721593, "percentage": 1.45, "elapsed_time": "0:03:27", "remaining_time": "3:54:40", "throughput": 9715.66, "total_tokens": 2014464} +{"current_steps": 2975, "total_steps": 204665, "loss": 0.1893, "lr": 2.9061415937851174e-07, "epoch": 0.0726797449490631, "percentage": 1.45, "elapsed_time": "0:03:27", "remaining_time": "3:54:40", "throughput": 9717.19, "total_tokens": 2018240} +{"current_steps": 2980, "total_steps": 204665, "loss": 0.1779, "lr": 2.911027507695314e-07, "epoch": 0.07280189578091027, "percentage": 1.46, "elapsed_time": "0:03:28", "remaining_time": "3:54:40", "throughput": 9716.55, "total_tokens": 2021440} +{"current_steps": 2985, "total_steps": 204665, "loss": 0.2133, "lr": 2.915913421605511e-07, "epoch": 0.07292404661275743, "percentage": 1.46, "elapsed_time": "0:03:28", "remaining_time": "3:54:39", "throughput": 9716.69, "total_tokens": 2024832} +{"current_steps": 2990, "total_steps": 204665, "loss": 0.1293, "lr": 2.920799335515708e-07, "epoch": 0.0730461974446046, "percentage": 1.46, "elapsed_time": "0:03:28", "remaining_time": "3:54:39", "throughput": 9717.64, "total_tokens": 2028480} +{"current_steps": 2995, "total_steps": 204665, "loss": 0.1734, "lr": 2.9256852494259046e-07, "epoch": 0.07316834827645176, "percentage": 1.46, "elapsed_time": "0:03:29", "remaining_time": "3:54:39", "throughput": 9717.34, "total_tokens": 2031808} +{"current_steps": 3000, "total_steps": 204665, "loss": 0.1972, "lr": 2.930571163336102e-07, "epoch": 0.07329049910829893, "percentage": 1.47, "elapsed_time": "0:03:29", "remaining_time": "3:54:37", "throughput": 9714.89, "total_tokens": 2034496} +{"current_steps": 3005, "total_steps": 204665, "loss": 0.1524, "lr": 2.935457077246299e-07, "epoch": 0.0734126499401461, "percentage": 1.47, "elapsed_time": "0:03:29", "remaining_time": "3:54:36", "throughput": 9713.5, "total_tokens": 2037504} +{"current_steps": 3010, "total_steps": 204665, "loss": 0.1649, "lr": 2.9403429911564956e-07, "epoch": 0.07353480077199326, "percentage": 1.47, "elapsed_time": "0:03:30", "remaining_time": "3:54:35", "throughput": 9712.25, "total_tokens": 2040512} +{"current_steps": 3015, "total_steps": 204665, "loss": 0.1462, "lr": 2.945228905066693e-07, "epoch": 0.07365695160384042, "percentage": 1.47, "elapsed_time": "0:03:30", "remaining_time": "3:54:34", "throughput": 9711.65, "total_tokens": 2043712} +{"current_steps": 3020, "total_steps": 204665, "loss": 0.1338, "lr": 2.950114818976889e-07, "epoch": 0.07377910243568758, "percentage": 1.48, "elapsed_time": "0:03:30", "remaining_time": "3:54:34", "throughput": 9711.56, "total_tokens": 2047104} +{"current_steps": 3025, "total_steps": 204665, "loss": 0.1718, "lr": 2.955000732887086e-07, "epoch": 0.07390125326753476, "percentage": 1.48, "elapsed_time": "0:03:31", "remaining_time": "3:54:34", "throughput": 9713.3, "total_tokens": 2050944} +{"current_steps": 3030, "total_steps": 204665, "loss": 0.0823, "lr": 2.9598866467972834e-07, "epoch": 0.07402340409938192, "percentage": 1.48, "elapsed_time": "0:03:31", "remaining_time": "3:54:33", "throughput": 9712.25, "total_tokens": 2054016} +{"current_steps": 3035, "total_steps": 204665, "loss": 0.091, "lr": 2.96477256070748e-07, "epoch": 0.07414555493122908, "percentage": 1.48, "elapsed_time": "0:03:31", "remaining_time": "3:54:32", "throughput": 9710.45, "total_tokens": 2056896} +{"current_steps": 3040, "total_steps": 204665, "loss": 0.1407, "lr": 2.969658474617677e-07, "epoch": 0.07426770576307624, "percentage": 1.49, "elapsed_time": "0:03:32", "remaining_time": "3:54:31", "throughput": 9710.33, "total_tokens": 2060224} +{"current_steps": 3045, "total_steps": 204665, "loss": 0.1161, "lr": 2.9745443885278744e-07, "epoch": 0.0743898565949234, "percentage": 1.49, "elapsed_time": "0:03:32", "remaining_time": "3:54:31", "throughput": 9711.12, "total_tokens": 2063808} +{"current_steps": 3050, "total_steps": 204665, "loss": 0.2451, "lr": 2.9794303024380706e-07, "epoch": 0.07451200742677058, "percentage": 1.49, "elapsed_time": "0:03:32", "remaining_time": "3:54:31", "throughput": 9710.68, "total_tokens": 2067072} +{"current_steps": 3055, "total_steps": 204665, "loss": 0.1461, "lr": 2.9843162163482675e-07, "epoch": 0.07463415825861774, "percentage": 1.49, "elapsed_time": "0:03:33", "remaining_time": "3:54:31", "throughput": 9711.73, "total_tokens": 2070720} +{"current_steps": 3060, "total_steps": 204665, "loss": 0.0868, "lr": 2.989202130258465e-07, "epoch": 0.0747563090904649, "percentage": 1.5, "elapsed_time": "0:03:33", "remaining_time": "3:54:30", "throughput": 9712.07, "total_tokens": 2074176} +{"current_steps": 3065, "total_steps": 204665, "loss": 0.2104, "lr": 2.9940880441686616e-07, "epoch": 0.07487845992231207, "percentage": 1.5, "elapsed_time": "0:03:33", "remaining_time": "3:54:30", "throughput": 9711.36, "total_tokens": 2077376} +{"current_steps": 3070, "total_steps": 204665, "loss": 0.1225, "lr": 2.9989739580788584e-07, "epoch": 0.07500061075415923, "percentage": 1.5, "elapsed_time": "0:03:34", "remaining_time": "3:54:29", "throughput": 9711.98, "total_tokens": 2080896} +{"current_steps": 3075, "total_steps": 204665, "loss": 0.167, "lr": 3.003859871989056e-07, "epoch": 0.0751227615860064, "percentage": 1.5, "elapsed_time": "0:03:34", "remaining_time": "3:54:29", "throughput": 9711.26, "total_tokens": 2084096} +{"current_steps": 3080, "total_steps": 204665, "loss": 0.2051, "lr": 3.008745785899252e-07, "epoch": 0.07524491241785357, "percentage": 1.5, "elapsed_time": "0:03:34", "remaining_time": "3:54:28", "throughput": 9710.67, "total_tokens": 2087296} +{"current_steps": 3085, "total_steps": 204665, "loss": 0.1622, "lr": 3.013631699809449e-07, "epoch": 0.07536706324970073, "percentage": 1.51, "elapsed_time": "0:03:35", "remaining_time": "3:54:27", "throughput": 9710.54, "total_tokens": 2090624} +{"current_steps": 3090, "total_steps": 204665, "loss": 0.0648, "lr": 3.018517613719646e-07, "epoch": 0.07548921408154789, "percentage": 1.51, "elapsed_time": "0:03:35", "remaining_time": "3:54:28", "throughput": 9713.55, "total_tokens": 2094848} +{"current_steps": 3095, "total_steps": 204665, "loss": 0.1455, "lr": 3.023403527629843e-07, "epoch": 0.07561136491339505, "percentage": 1.51, "elapsed_time": "0:03:36", "remaining_time": "3:54:27", "throughput": 9712.95, "total_tokens": 2098048} +{"current_steps": 3100, "total_steps": 204665, "loss": 0.076, "lr": 3.02828944154004e-07, "epoch": 0.07573351574524223, "percentage": 1.51, "elapsed_time": "0:03:36", "remaining_time": "3:54:28", "throughput": 9714.62, "total_tokens": 2101888} +{"current_steps": 3105, "total_steps": 204665, "loss": 0.2214, "lr": 3.033175355450237e-07, "epoch": 0.07585566657708939, "percentage": 1.52, "elapsed_time": "0:03:36", "remaining_time": "3:54:27", "throughput": 9713.47, "total_tokens": 2104960} +{"current_steps": 3110, "total_steps": 204665, "loss": 0.1427, "lr": 3.0380612693604335e-07, "epoch": 0.07597781740893655, "percentage": 1.52, "elapsed_time": "0:03:37", "remaining_time": "3:54:26", "throughput": 9712.12, "total_tokens": 2107968} +{"current_steps": 3115, "total_steps": 204665, "loss": 0.1003, "lr": 3.0429471832706303e-07, "epoch": 0.07609996824078372, "percentage": 1.52, "elapsed_time": "0:03:37", "remaining_time": "3:54:25", "throughput": 9711.51, "total_tokens": 2111168} +{"current_steps": 3120, "total_steps": 204665, "loss": 0.1005, "lr": 3.0478330971808276e-07, "epoch": 0.07622211907263088, "percentage": 1.52, "elapsed_time": "0:03:37", "remaining_time": "3:54:25", "throughput": 9711.26, "total_tokens": 2114496} +{"current_steps": 3125, "total_steps": 204665, "loss": 0.0685, "lr": 3.0527190110910244e-07, "epoch": 0.07634426990447805, "percentage": 1.53, "elapsed_time": "0:03:38", "remaining_time": "3:54:25", "throughput": 9711.43, "total_tokens": 2117952} +{"current_steps": 3130, "total_steps": 204665, "loss": 0.2478, "lr": 3.057604925001221e-07, "epoch": 0.07646642073632522, "percentage": 1.53, "elapsed_time": "0:03:38", "remaining_time": "3:54:24", "throughput": 9710.56, "total_tokens": 2121088} +{"current_steps": 3135, "total_steps": 204665, "loss": 0.1306, "lr": 3.0624908389114186e-07, "epoch": 0.07658857156817238, "percentage": 1.53, "elapsed_time": "0:03:38", "remaining_time": "3:54:23", "throughput": 9709.79, "total_tokens": 2124224} +{"current_steps": 3140, "total_steps": 204665, "loss": 0.1123, "lr": 3.067376752821615e-07, "epoch": 0.07671072240001954, "percentage": 1.53, "elapsed_time": "0:03:39", "remaining_time": "3:54:22", "throughput": 9708.75, "total_tokens": 2127296} +{"current_steps": 3145, "total_steps": 204665, "loss": 0.1153, "lr": 3.0722626667318117e-07, "epoch": 0.0768328732318667, "percentage": 1.54, "elapsed_time": "0:03:39", "remaining_time": "3:54:22", "throughput": 9709.93, "total_tokens": 2131008} +{"current_steps": 3150, "total_steps": 204665, "loss": 0.2439, "lr": 3.077148580642009e-07, "epoch": 0.07695502406371388, "percentage": 1.54, "elapsed_time": "0:03:39", "remaining_time": "3:54:21", "throughput": 9708.65, "total_tokens": 2134016} +{"current_steps": 3155, "total_steps": 204665, "loss": 0.1683, "lr": 3.082034494552206e-07, "epoch": 0.07707717489556104, "percentage": 1.54, "elapsed_time": "0:03:40", "remaining_time": "3:54:22", "throughput": 9710.93, "total_tokens": 2138048} +{"current_steps": 3160, "total_steps": 204665, "loss": 0.099, "lr": 3.0869204084624027e-07, "epoch": 0.0771993257274082, "percentage": 1.54, "elapsed_time": "0:03:40", "remaining_time": "3:54:21", "throughput": 9711.23, "total_tokens": 2141504} +{"current_steps": 3165, "total_steps": 204665, "loss": 0.1558, "lr": 3.0918063223726e-07, "epoch": 0.07732147655925536, "percentage": 1.55, "elapsed_time": "0:03:40", "remaining_time": "3:54:20", "throughput": 9710.21, "total_tokens": 2144576} +{"current_steps": 3170, "total_steps": 204665, "loss": 0.1402, "lr": 3.0966922362827963e-07, "epoch": 0.07744362739110254, "percentage": 1.55, "elapsed_time": "0:03:41", "remaining_time": "3:54:20", "throughput": 9710.22, "total_tokens": 2147904} +{"current_steps": 3175, "total_steps": 204665, "loss": 0.1425, "lr": 3.101578150192993e-07, "epoch": 0.0775657782229497, "percentage": 1.55, "elapsed_time": "0:03:41", "remaining_time": "3:54:19", "throughput": 9710.61, "total_tokens": 2151360} +{"current_steps": 3180, "total_steps": 204665, "loss": 0.1607, "lr": 3.1064640641031904e-07, "epoch": 0.07768792905479686, "percentage": 1.55, "elapsed_time": "0:03:41", "remaining_time": "3:54:19", "throughput": 9710.9, "total_tokens": 2154816} +{"current_steps": 3185, "total_steps": 204665, "loss": 0.1372, "lr": 3.111349978013387e-07, "epoch": 0.07781007988664403, "percentage": 1.56, "elapsed_time": "0:03:42", "remaining_time": "3:54:18", "throughput": 9710.05, "total_tokens": 2157952} +{"current_steps": 3190, "total_steps": 204665, "loss": 0.139, "lr": 3.116235891923584e-07, "epoch": 0.07793223071849119, "percentage": 1.56, "elapsed_time": "0:03:42", "remaining_time": "3:54:17", "throughput": 9709.73, "total_tokens": 2161216} +{"current_steps": 3195, "total_steps": 204665, "loss": 0.1316, "lr": 3.1211218058337814e-07, "epoch": 0.07805438155033836, "percentage": 1.56, "elapsed_time": "0:03:42", "remaining_time": "3:54:17", "throughput": 9709.14, "total_tokens": 2164416} +{"current_steps": 3200, "total_steps": 204665, "loss": 0.1019, "lr": 3.1260077197439777e-07, "epoch": 0.07817653238218553, "percentage": 1.56, "elapsed_time": "0:03:43", "remaining_time": "3:54:16", "throughput": 9708.74, "total_tokens": 2167680} +{"current_steps": 3205, "total_steps": 204665, "loss": 0.177, "lr": 3.1308936336541745e-07, "epoch": 0.07829868321403269, "percentage": 1.57, "elapsed_time": "0:03:43", "remaining_time": "3:54:15", "throughput": 9707.42, "total_tokens": 2170688} +{"current_steps": 3210, "total_steps": 204665, "loss": 0.0725, "lr": 3.135779547564372e-07, "epoch": 0.07842083404587985, "percentage": 1.57, "elapsed_time": "0:03:43", "remaining_time": "3:54:14", "throughput": 9706.13, "total_tokens": 2173696} +{"current_steps": 3215, "total_steps": 204665, "loss": 0.1236, "lr": 3.1406654614745687e-07, "epoch": 0.07854298487772701, "percentage": 1.57, "elapsed_time": "0:03:44", "remaining_time": "3:54:14", "throughput": 9707.1, "total_tokens": 2177344} +{"current_steps": 3220, "total_steps": 204665, "loss": 0.13, "lr": 3.1455513753847655e-07, "epoch": 0.07866513570957419, "percentage": 1.57, "elapsed_time": "0:03:44", "remaining_time": "3:54:14", "throughput": 9706.93, "total_tokens": 2180672} +{"current_steps": 3225, "total_steps": 204665, "loss": 0.1509, "lr": 3.150437289294963e-07, "epoch": 0.07878728654142135, "percentage": 1.58, "elapsed_time": "0:03:44", "remaining_time": "3:54:13", "throughput": 9706.05, "total_tokens": 2183808} +{"current_steps": 3230, "total_steps": 204665, "loss": 0.1114, "lr": 3.1553232032051596e-07, "epoch": 0.07890943737326851, "percentage": 1.58, "elapsed_time": "0:03:45", "remaining_time": "3:54:13", "throughput": 9707.45, "total_tokens": 2187584} +{"current_steps": 3235, "total_steps": 204665, "loss": 0.1227, "lr": 3.160209117115356e-07, "epoch": 0.07903158820511567, "percentage": 1.58, "elapsed_time": "0:03:45", "remaining_time": "3:54:12", "throughput": 9706.96, "total_tokens": 2190784} +{"current_steps": 3240, "total_steps": 204665, "loss": 0.1845, "lr": 3.165095031025553e-07, "epoch": 0.07915373903696284, "percentage": 1.58, "elapsed_time": "0:03:46", "remaining_time": "3:54:12", "throughput": 9706.82, "total_tokens": 2194112} +{"current_steps": 3245, "total_steps": 204665, "loss": 0.1198, "lr": 3.16998094493575e-07, "epoch": 0.07927588986881001, "percentage": 1.59, "elapsed_time": "0:03:46", "remaining_time": "3:54:11", "throughput": 9706.93, "total_tokens": 2197504} +{"current_steps": 3250, "total_steps": 204665, "loss": 0.1323, "lr": 3.174866858845947e-07, "epoch": 0.07939804070065717, "percentage": 1.59, "elapsed_time": "0:03:46", "remaining_time": "3:54:10", "throughput": 9705.78, "total_tokens": 2200512} +{"current_steps": 3255, "total_steps": 204665, "loss": 0.1792, "lr": 3.179752772756144e-07, "epoch": 0.07952019153250434, "percentage": 1.59, "elapsed_time": "0:03:47", "remaining_time": "3:54:10", "throughput": 9706.95, "total_tokens": 2204224} +{"current_steps": 3260, "total_steps": 204665, "loss": 0.1659, "lr": 3.184638686666341e-07, "epoch": 0.0796423423643515, "percentage": 1.59, "elapsed_time": "0:03:47", "remaining_time": "3:54:10", "throughput": 9706.62, "total_tokens": 2207488} +{"current_steps": 3265, "total_steps": 204665, "loss": 0.1968, "lr": 3.1895246005765373e-07, "epoch": 0.07976449319619866, "percentage": 1.6, "elapsed_time": "0:03:47", "remaining_time": "3:54:09", "throughput": 9705.33, "total_tokens": 2210496} +{"current_steps": 3270, "total_steps": 204665, "loss": 0.0935, "lr": 3.1944105144867347e-07, "epoch": 0.07988664402804584, "percentage": 1.6, "elapsed_time": "0:03:48", "remaining_time": "3:54:08", "throughput": 9705.18, "total_tokens": 2213824} +{"current_steps": 3275, "total_steps": 204665, "loss": 0.1406, "lr": 3.1992964283969315e-07, "epoch": 0.080008794859893, "percentage": 1.6, "elapsed_time": "0:03:48", "remaining_time": "3:54:08", "throughput": 9706.6, "total_tokens": 2217600} +{"current_steps": 3280, "total_steps": 204665, "loss": 0.1532, "lr": 3.2041823423071283e-07, "epoch": 0.08013094569174016, "percentage": 1.6, "elapsed_time": "0:03:48", "remaining_time": "3:54:08", "throughput": 9706.89, "total_tokens": 2221056} +{"current_steps": 3285, "total_steps": 204665, "loss": 0.094, "lr": 3.2090682562173256e-07, "epoch": 0.08025309652358732, "percentage": 1.61, "elapsed_time": "0:03:49", "remaining_time": "3:54:08", "throughput": 9707.0, "total_tokens": 2224448} +{"current_steps": 3290, "total_steps": 204665, "loss": 0.1089, "lr": 3.2139541701275224e-07, "epoch": 0.08037524735543448, "percentage": 1.61, "elapsed_time": "0:03:49", "remaining_time": "3:54:08", "throughput": 9708.22, "total_tokens": 2228160} +{"current_steps": 3295, "total_steps": 204665, "loss": 0.127, "lr": 3.2188400840377187e-07, "epoch": 0.08049739818728166, "percentage": 1.61, "elapsed_time": "0:03:49", "remaining_time": "3:54:07", "throughput": 9708.07, "total_tokens": 2231488} +{"current_steps": 3300, "total_steps": 204665, "loss": 0.0842, "lr": 3.223725997947916e-07, "epoch": 0.08061954901912882, "percentage": 1.61, "elapsed_time": "0:03:50", "remaining_time": "3:54:07", "throughput": 9708.41, "total_tokens": 2235008} +{"current_steps": 3305, "total_steps": 204665, "loss": 0.2433, "lr": 3.228611911858113e-07, "epoch": 0.08074169985097598, "percentage": 1.61, "elapsed_time": "0:03:50", "remaining_time": "3:54:06", "throughput": 9707.15, "total_tokens": 2238016} +{"current_steps": 3310, "total_steps": 204665, "loss": 0.1516, "lr": 3.2334978257683097e-07, "epoch": 0.08086385068282315, "percentage": 1.62, "elapsed_time": "0:03:50", "remaining_time": "3:54:06", "throughput": 9707.98, "total_tokens": 2241664} +{"current_steps": 3315, "total_steps": 204665, "loss": 0.2021, "lr": 3.238383739678507e-07, "epoch": 0.08098600151467031, "percentage": 1.62, "elapsed_time": "0:03:51", "remaining_time": "3:54:06", "throughput": 9707.25, "total_tokens": 2244864} +{"current_steps": 3320, "total_steps": 204665, "loss": 0.0696, "lr": 3.243269653588704e-07, "epoch": 0.08110815234651748, "percentage": 1.62, "elapsed_time": "0:03:51", "remaining_time": "3:54:05", "throughput": 9707.53, "total_tokens": 2248320} +{"current_steps": 3325, "total_steps": 204665, "loss": 0.088, "lr": 3.2481555674989e-07, "epoch": 0.08123030317836465, "percentage": 1.62, "elapsed_time": "0:03:51", "remaining_time": "3:54:05", "throughput": 9707.43, "total_tokens": 2251712} +{"current_steps": 3330, "total_steps": 204665, "loss": 0.1939, "lr": 3.2530414814090975e-07, "epoch": 0.08135245401021181, "percentage": 1.63, "elapsed_time": "0:03:52", "remaining_time": "3:54:05", "throughput": 9707.04, "total_tokens": 2254976} +{"current_steps": 3335, "total_steps": 204665, "loss": 0.2149, "lr": 3.2579273953192943e-07, "epoch": 0.08147460484205897, "percentage": 1.63, "elapsed_time": "0:03:52", "remaining_time": "3:54:04", "throughput": 9706.95, "total_tokens": 2258304} +{"current_steps": 3340, "total_steps": 204665, "loss": 0.1501, "lr": 3.262813309229491e-07, "epoch": 0.08159675567390615, "percentage": 1.63, "elapsed_time": "0:03:52", "remaining_time": "3:54:04", "throughput": 9706.83, "total_tokens": 2261632} +{"current_steps": 3345, "total_steps": 204665, "loss": 0.1428, "lr": 3.2676992231396884e-07, "epoch": 0.08171890650575331, "percentage": 1.63, "elapsed_time": "0:03:53", "remaining_time": "3:54:03", "throughput": 9705.33, "total_tokens": 2264576} +{"current_steps": 3350, "total_steps": 204665, "loss": 0.2068, "lr": 3.272585137049885e-07, "epoch": 0.08184105733760047, "percentage": 1.64, "elapsed_time": "0:03:53", "remaining_time": "3:54:02", "throughput": 9704.05, "total_tokens": 2267520} +{"current_steps": 3355, "total_steps": 204665, "loss": 0.0769, "lr": 3.2774710509600815e-07, "epoch": 0.08196320816944763, "percentage": 1.64, "elapsed_time": "0:03:54", "remaining_time": "3:54:02", "throughput": 9706.25, "total_tokens": 2271552} +{"current_steps": 3360, "total_steps": 204665, "loss": 0.0671, "lr": 3.282356964870279e-07, "epoch": 0.0820853590012948, "percentage": 1.64, "elapsed_time": "0:03:54", "remaining_time": "3:54:02", "throughput": 9706.68, "total_tokens": 2275072} +{"current_steps": 3365, "total_steps": 204665, "loss": 0.1913, "lr": 3.2872428787804757e-07, "epoch": 0.08220750983314197, "percentage": 1.64, "elapsed_time": "0:03:54", "remaining_time": "3:54:02", "throughput": 9708.18, "total_tokens": 2278912} +{"current_steps": 3370, "total_steps": 204665, "loss": 0.1205, "lr": 3.2921287926906725e-07, "epoch": 0.08232966066498913, "percentage": 1.65, "elapsed_time": "0:03:55", "remaining_time": "3:54:02", "throughput": 9709.29, "total_tokens": 2282624} +{"current_steps": 3375, "total_steps": 204665, "loss": 0.192, "lr": 3.29701470660087e-07, "epoch": 0.0824518114968363, "percentage": 1.65, "elapsed_time": "0:03:55", "remaining_time": "3:54:02", "throughput": 9710.27, "total_tokens": 2286336} +{"current_steps": 3380, "total_steps": 204665, "loss": 0.1847, "lr": 3.3019006205110667e-07, "epoch": 0.08257396232868346, "percentage": 1.65, "elapsed_time": "0:03:55", "remaining_time": "3:54:02", "throughput": 9710.53, "total_tokens": 2289792} +{"current_steps": 3385, "total_steps": 204665, "loss": 0.1688, "lr": 3.306786534421263e-07, "epoch": 0.08269611316053062, "percentage": 1.65, "elapsed_time": "0:03:56", "remaining_time": "3:54:02", "throughput": 9710.89, "total_tokens": 2293248} +{"current_steps": 3390, "total_steps": 204665, "loss": 0.1734, "lr": 3.3116724483314603e-07, "epoch": 0.0828182639923778, "percentage": 1.66, "elapsed_time": "0:03:56", "remaining_time": "3:54:01", "throughput": 9710.6, "total_tokens": 2296512} +{"current_steps": 3395, "total_steps": 204665, "loss": 0.08, "lr": 3.316558362241657e-07, "epoch": 0.08294041482422496, "percentage": 1.66, "elapsed_time": "0:03:56", "remaining_time": "3:54:01", "throughput": 9710.59, "total_tokens": 2299904} +{"current_steps": 3400, "total_steps": 204665, "loss": 0.0663, "lr": 3.321444276151854e-07, "epoch": 0.08306256565607212, "percentage": 1.66, "elapsed_time": "0:03:57", "remaining_time": "3:54:00", "throughput": 9710.7, "total_tokens": 2303296} +{"current_steps": 3405, "total_steps": 204665, "loss": 0.1287, "lr": 3.326330190062051e-07, "epoch": 0.08318471648791928, "percentage": 1.66, "elapsed_time": "0:03:57", "remaining_time": "3:54:00", "throughput": 9711.94, "total_tokens": 2307072} +{"current_steps": 3410, "total_steps": 204665, "loss": 0.1319, "lr": 3.331216103972248e-07, "epoch": 0.08330686731976644, "percentage": 1.67, "elapsed_time": "0:03:57", "remaining_time": "3:54:00", "throughput": 9710.85, "total_tokens": 2310144} +{"current_steps": 3415, "total_steps": 204665, "loss": 0.1975, "lr": 3.3361020178824444e-07, "epoch": 0.08342901815161362, "percentage": 1.67, "elapsed_time": "0:03:58", "remaining_time": "3:53:59", "throughput": 9710.71, "total_tokens": 2313472} +{"current_steps": 3420, "total_steps": 204665, "loss": 0.077, "lr": 3.3409879317926417e-07, "epoch": 0.08355116898346078, "percentage": 1.67, "elapsed_time": "0:03:58", "remaining_time": "3:53:59", "throughput": 9710.13, "total_tokens": 2316672} +{"current_steps": 3425, "total_steps": 204665, "loss": 0.0846, "lr": 3.3458738457028385e-07, "epoch": 0.08367331981530794, "percentage": 1.67, "elapsed_time": "0:03:58", "remaining_time": "3:53:58", "throughput": 9709.49, "total_tokens": 2319872} +{"current_steps": 3430, "total_steps": 204665, "loss": 0.0846, "lr": 3.3507597596130353e-07, "epoch": 0.0837954706471551, "percentage": 1.68, "elapsed_time": "0:03:59", "remaining_time": "3:53:58", "throughput": 9709.76, "total_tokens": 2323328} +{"current_steps": 3435, "total_steps": 204665, "loss": 0.1385, "lr": 3.3556456735232327e-07, "epoch": 0.08391762147900227, "percentage": 1.68, "elapsed_time": "0:03:59", "remaining_time": "3:53:56", "throughput": 9708.24, "total_tokens": 2326208} +{"current_steps": 3440, "total_steps": 204665, "loss": 0.0969, "lr": 3.3605315874334295e-07, "epoch": 0.08403977231084944, "percentage": 1.68, "elapsed_time": "0:03:59", "remaining_time": "3:53:56", "throughput": 9708.06, "total_tokens": 2329536} +{"current_steps": 3445, "total_steps": 204665, "loss": 0.1586, "lr": 3.365417501343626e-07, "epoch": 0.0841619231426966, "percentage": 1.68, "elapsed_time": "0:04:00", "remaining_time": "3:53:56", "throughput": 9709.71, "total_tokens": 2333440} +{"current_steps": 3450, "total_steps": 204665, "loss": 0.1598, "lr": 3.370303415253823e-07, "epoch": 0.08428407397454377, "percentage": 1.69, "elapsed_time": "0:04:00", "remaining_time": "3:53:56", "throughput": 9709.07, "total_tokens": 2336640} +{"current_steps": 3455, "total_steps": 204665, "loss": 0.1757, "lr": 3.37518932916402e-07, "epoch": 0.08440622480639093, "percentage": 1.69, "elapsed_time": "0:04:01", "remaining_time": "3:53:55", "throughput": 9708.69, "total_tokens": 2339904} +{"current_steps": 3460, "total_steps": 204665, "loss": 0.0961, "lr": 3.3800752430742167e-07, "epoch": 0.08452837563823809, "percentage": 1.69, "elapsed_time": "0:04:01", "remaining_time": "3:53:56", "throughput": 9710.43, "total_tokens": 2343808} +{"current_steps": 3465, "total_steps": 204665, "loss": 0.2321, "lr": 3.384961156984414e-07, "epoch": 0.08465052647008527, "percentage": 1.69, "elapsed_time": "0:04:01", "remaining_time": "3:53:55", "throughput": 9710.59, "total_tokens": 2347264} +{"current_steps": 3470, "total_steps": 204665, "loss": 0.187, "lr": 3.389847070894611e-07, "epoch": 0.08477267730193243, "percentage": 1.7, "elapsed_time": "0:04:02", "remaining_time": "3:53:55", "throughput": 9709.96, "total_tokens": 2350464} +{"current_steps": 3475, "total_steps": 204665, "loss": 0.2035, "lr": 3.3947329848048077e-07, "epoch": 0.08489482813377959, "percentage": 1.7, "elapsed_time": "0:04:02", "remaining_time": "3:53:54", "throughput": 9708.91, "total_tokens": 2353536} +{"current_steps": 3480, "total_steps": 204665, "loss": 0.1203, "lr": 3.3996188987150045e-07, "epoch": 0.08501697896562675, "percentage": 1.7, "elapsed_time": "0:04:02", "remaining_time": "3:53:54", "throughput": 9708.64, "total_tokens": 2356864} +{"current_steps": 3485, "total_steps": 204665, "loss": 0.0895, "lr": 3.4045048126252013e-07, "epoch": 0.08513912979747391, "percentage": 1.7, "elapsed_time": "0:04:03", "remaining_time": "3:53:54", "throughput": 9708.37, "total_tokens": 2360192} +{"current_steps": 3490, "total_steps": 204665, "loss": 0.0905, "lr": 3.409390726535398e-07, "epoch": 0.08526128062932109, "percentage": 1.71, "elapsed_time": "0:04:03", "remaining_time": "3:53:53", "throughput": 9708.13, "total_tokens": 2363520} +{"current_steps": 3495, "total_steps": 204665, "loss": 0.0922, "lr": 3.4142766404455955e-07, "epoch": 0.08538343146116825, "percentage": 1.71, "elapsed_time": "0:04:03", "remaining_time": "3:53:52", "throughput": 9707.41, "total_tokens": 2366656} +{"current_steps": 3500, "total_steps": 204665, "loss": 0.1001, "lr": 3.4191625543557923e-07, "epoch": 0.08550558229301541, "percentage": 1.71, "elapsed_time": "0:04:04", "remaining_time": "3:53:52", "throughput": 9707.0, "total_tokens": 2369920} +{"current_steps": 3505, "total_steps": 204665, "loss": 0.1379, "lr": 3.424048468265989e-07, "epoch": 0.08562773312486258, "percentage": 1.71, "elapsed_time": "0:04:04", "remaining_time": "3:53:52", "throughput": 9707.0, "total_tokens": 2373312} +{"current_steps": 3510, "total_steps": 204665, "loss": 0.1846, "lr": 3.428934382176186e-07, "epoch": 0.08574988395670975, "percentage": 1.71, "elapsed_time": "0:04:04", "remaining_time": "3:53:51", "throughput": 9706.13, "total_tokens": 2376448} +{"current_steps": 3515, "total_steps": 204665, "loss": 0.1408, "lr": 3.433820296086383e-07, "epoch": 0.08587203478855691, "percentage": 1.72, "elapsed_time": "0:04:05", "remaining_time": "3:53:51", "throughput": 9707.04, "total_tokens": 2380160} +{"current_steps": 3520, "total_steps": 204665, "loss": 0.156, "lr": 3.4387062099965796e-07, "epoch": 0.08599418562040408, "percentage": 1.72, "elapsed_time": "0:04:05", "remaining_time": "3:53:51", "throughput": 9707.3, "total_tokens": 2383616} +{"current_steps": 3525, "total_steps": 204665, "loss": 0.0705, "lr": 3.443592123906777e-07, "epoch": 0.08611633645225124, "percentage": 1.72, "elapsed_time": "0:04:05", "remaining_time": "3:53:51", "throughput": 9707.15, "total_tokens": 2386944} +{"current_steps": 3530, "total_steps": 204665, "loss": 0.0863, "lr": 3.4484780378169737e-07, "epoch": 0.0862384872840984, "percentage": 1.72, "elapsed_time": "0:04:06", "remaining_time": "3:53:50", "throughput": 9707.16, "total_tokens": 2390336} +{"current_steps": 3535, "total_steps": 204665, "loss": 0.1225, "lr": 3.4533639517271705e-07, "epoch": 0.08636063811594558, "percentage": 1.73, "elapsed_time": "0:04:06", "remaining_time": "3:53:50", "throughput": 9706.77, "total_tokens": 2393600} +{"current_steps": 3540, "total_steps": 204665, "loss": 0.1515, "lr": 3.4582498656373673e-07, "epoch": 0.08648278894779274, "percentage": 1.73, "elapsed_time": "0:04:06", "remaining_time": "3:53:49", "throughput": 9705.59, "total_tokens": 2396608} +{"current_steps": 3545, "total_steps": 204665, "loss": 0.1262, "lr": 3.463135779547564e-07, "epoch": 0.0866049397796399, "percentage": 1.73, "elapsed_time": "0:04:07", "remaining_time": "3:53:48", "throughput": 9704.74, "total_tokens": 2399680} +{"current_steps": 3550, "total_steps": 204665, "loss": 0.0761, "lr": 3.468021693457761e-07, "epoch": 0.08672709061148706, "percentage": 1.73, "elapsed_time": "0:04:07", "remaining_time": "3:53:47", "throughput": 9702.79, "total_tokens": 2402432} +{"current_steps": 3555, "total_steps": 204665, "loss": 0.2028, "lr": 3.4729076073679583e-07, "epoch": 0.08684924144333422, "percentage": 1.74, "elapsed_time": "0:04:07", "remaining_time": "3:53:46", "throughput": 9701.84, "total_tokens": 2405504} +{"current_steps": 3560, "total_steps": 204665, "loss": 0.1644, "lr": 3.477793521278155e-07, "epoch": 0.0869713922751814, "percentage": 1.74, "elapsed_time": "0:04:08", "remaining_time": "3:53:46", "throughput": 9702.23, "total_tokens": 2409024} +{"current_steps": 3565, "total_steps": 204665, "loss": 0.1153, "lr": 3.482679435188352e-07, "epoch": 0.08709354310702856, "percentage": 1.74, "elapsed_time": "0:04:08", "remaining_time": "3:53:45", "throughput": 9702.22, "total_tokens": 2412416} +{"current_steps": 3570, "total_steps": 204665, "loss": 0.1801, "lr": 3.487565349098549e-07, "epoch": 0.08721569393887572, "percentage": 1.74, "elapsed_time": "0:04:09", "remaining_time": "3:53:46", "throughput": 9703.52, "total_tokens": 2416192} +{"current_steps": 3575, "total_steps": 204665, "loss": 0.1723, "lr": 3.4924512630087456e-07, "epoch": 0.08733784477072289, "percentage": 1.75, "elapsed_time": "0:04:09", "remaining_time": "3:53:45", "throughput": 9704.4, "total_tokens": 2419840} +{"current_steps": 3580, "total_steps": 204665, "loss": 0.0996, "lr": 3.4973371769189424e-07, "epoch": 0.08745999560257005, "percentage": 1.75, "elapsed_time": "0:04:09", "remaining_time": "3:53:45", "throughput": 9703.63, "total_tokens": 2422976} +{"current_steps": 3585, "total_steps": 204665, "loss": 0.1036, "lr": 3.5022230908291397e-07, "epoch": 0.08758214643441722, "percentage": 1.75, "elapsed_time": "0:04:10", "remaining_time": "3:53:45", "throughput": 9704.01, "total_tokens": 2426496} +{"current_steps": 3590, "total_steps": 204665, "loss": 0.0971, "lr": 3.5071090047393365e-07, "epoch": 0.08770429726626439, "percentage": 1.75, "elapsed_time": "0:04:10", "remaining_time": "3:53:44", "throughput": 9702.49, "total_tokens": 2429376} +{"current_steps": 3595, "total_steps": 204665, "loss": 0.1917, "lr": 3.5119949186495333e-07, "epoch": 0.08782644809811155, "percentage": 1.76, "elapsed_time": "0:04:10", "remaining_time": "3:53:43", "throughput": 9701.97, "total_tokens": 2432576} +{"current_steps": 3600, "total_steps": 204665, "loss": 0.1268, "lr": 3.51688083255973e-07, "epoch": 0.08794859892995871, "percentage": 1.76, "elapsed_time": "0:04:11", "remaining_time": "3:53:43", "throughput": 9702.48, "total_tokens": 2436096} +{"current_steps": 3605, "total_steps": 204665, "loss": 0.1253, "lr": 3.521766746469927e-07, "epoch": 0.08807074976180587, "percentage": 1.76, "elapsed_time": "0:04:11", "remaining_time": "3:53:43", "throughput": 9704.25, "total_tokens": 2440064} +{"current_steps": 3610, "total_steps": 204665, "loss": 0.1452, "lr": 3.526652660380124e-07, "epoch": 0.08819290059365305, "percentage": 1.76, "elapsed_time": "0:04:11", "remaining_time": "3:53:42", "throughput": 9703.47, "total_tokens": 2443200} +{"current_steps": 3615, "total_steps": 204665, "loss": 0.2102, "lr": 3.531538574290321e-07, "epoch": 0.08831505142550021, "percentage": 1.77, "elapsed_time": "0:04:12", "remaining_time": "3:53:43", "throughput": 9704.24, "total_tokens": 2446848} +{"current_steps": 3620, "total_steps": 204665, "loss": 0.131, "lr": 3.536424488200518e-07, "epoch": 0.08843720225734737, "percentage": 1.77, "elapsed_time": "0:04:12", "remaining_time": "3:53:42", "throughput": 9704.64, "total_tokens": 2450368} +{"current_steps": 3625, "total_steps": 204665, "loss": 0.109, "lr": 3.541310402110715e-07, "epoch": 0.08855935308919453, "percentage": 1.77, "elapsed_time": "0:04:12", "remaining_time": "3:53:42", "throughput": 9705.54, "total_tokens": 2454016} +{"current_steps": 3630, "total_steps": 204665, "loss": 0.1683, "lr": 3.5461963160209116e-07, "epoch": 0.0886815039210417, "percentage": 1.77, "elapsed_time": "0:04:13", "remaining_time": "3:53:42", "throughput": 9705.79, "total_tokens": 2457472} +{"current_steps": 3635, "total_steps": 204665, "loss": 0.0988, "lr": 3.5510822299311084e-07, "epoch": 0.08880365475288887, "percentage": 1.78, "elapsed_time": "0:04:13", "remaining_time": "3:53:42", "throughput": 9707.21, "total_tokens": 2461312} +{"current_steps": 3640, "total_steps": 204665, "loss": 0.1124, "lr": 3.555968143841305e-07, "epoch": 0.08892580558473603, "percentage": 1.78, "elapsed_time": "0:04:13", "remaining_time": "3:53:42", "throughput": 9706.59, "total_tokens": 2464512} +{"current_steps": 3645, "total_steps": 204665, "loss": 0.1564, "lr": 3.5608540577515025e-07, "epoch": 0.0890479564165832, "percentage": 1.78, "elapsed_time": "0:04:14", "remaining_time": "3:53:42", "throughput": 9707.81, "total_tokens": 2468288} +{"current_steps": 3650, "total_steps": 204665, "loss": 0.1885, "lr": 3.5657399716616993e-07, "epoch": 0.08917010724843036, "percentage": 1.78, "elapsed_time": "0:04:14", "remaining_time": "3:53:41", "throughput": 9707.08, "total_tokens": 2471424} +{"current_steps": 3655, "total_steps": 204665, "loss": 0.0834, "lr": 3.570625885571896e-07, "epoch": 0.08929225808027752, "percentage": 1.79, "elapsed_time": "0:04:14", "remaining_time": "3:53:41", "throughput": 9707.77, "total_tokens": 2475008} +{"current_steps": 3660, "total_steps": 204665, "loss": 0.1104, "lr": 3.5755117994820924e-07, "epoch": 0.0894144089121247, "percentage": 1.79, "elapsed_time": "0:04:15", "remaining_time": "3:53:40", "throughput": 9706.74, "total_tokens": 2478080} +{"current_steps": 3665, "total_steps": 204665, "loss": 0.136, "lr": 3.58039771339229e-07, "epoch": 0.08953655974397186, "percentage": 1.79, "elapsed_time": "0:04:15", "remaining_time": "3:53:40", "throughput": 9706.73, "total_tokens": 2481472} +{"current_steps": 3670, "total_steps": 204665, "loss": 0.2465, "lr": 3.5852836273024866e-07, "epoch": 0.08965871057581902, "percentage": 1.79, "elapsed_time": "0:04:15", "remaining_time": "3:53:39", "throughput": 9706.4, "total_tokens": 2484736} +{"current_steps": 3675, "total_steps": 204665, "loss": 0.0973, "lr": 3.590169541212684e-07, "epoch": 0.08978086140766618, "percentage": 1.8, "elapsed_time": "0:04:16", "remaining_time": "3:53:39", "throughput": 9705.55, "total_tokens": 2487872} +{"current_steps": 3680, "total_steps": 204665, "loss": 0.1254, "lr": 3.595055455122881e-07, "epoch": 0.08990301223951336, "percentage": 1.8, "elapsed_time": "0:04:16", "remaining_time": "3:53:39", "throughput": 9705.92, "total_tokens": 2491392} +{"current_steps": 3685, "total_steps": 204665, "loss": 0.1701, "lr": 3.5999413690330776e-07, "epoch": 0.09002516307136052, "percentage": 1.8, "elapsed_time": "0:04:17", "remaining_time": "3:53:38", "throughput": 9705.91, "total_tokens": 2494784} +{"current_steps": 3690, "total_steps": 204665, "loss": 0.0829, "lr": 3.604827282943275e-07, "epoch": 0.09014731390320768, "percentage": 1.8, "elapsed_time": "0:04:17", "remaining_time": "3:53:38", "throughput": 9705.38, "total_tokens": 2497984} +{"current_steps": 3695, "total_steps": 204665, "loss": 0.2115, "lr": 3.609713196853471e-07, "epoch": 0.09026946473505484, "percentage": 1.81, "elapsed_time": "0:04:17", "remaining_time": "3:53:37", "throughput": 9704.31, "total_tokens": 2500992} +{"current_steps": 3700, "total_steps": 204665, "loss": 0.1073, "lr": 3.614599110763668e-07, "epoch": 0.090391615566902, "percentage": 1.81, "elapsed_time": "0:04:18", "remaining_time": "3:53:37", "throughput": 9704.29, "total_tokens": 2504384} +{"current_steps": 3705, "total_steps": 204665, "loss": 0.1459, "lr": 3.619485024673865e-07, "epoch": 0.09051376639874918, "percentage": 1.81, "elapsed_time": "0:04:18", "remaining_time": "3:53:37", "throughput": 9705.25, "total_tokens": 2508096} +{"current_steps": 3710, "total_steps": 204665, "loss": 0.2189, "lr": 3.624370938584062e-07, "epoch": 0.09063591723059634, "percentage": 1.81, "elapsed_time": "0:04:18", "remaining_time": "3:53:36", "throughput": 9704.54, "total_tokens": 2511232} +{"current_steps": 3715, "total_steps": 204665, "loss": 0.1667, "lr": 3.629256852494259e-07, "epoch": 0.0907580680624435, "percentage": 1.82, "elapsed_time": "0:04:19", "remaining_time": "3:53:35", "throughput": 9703.37, "total_tokens": 2514240} +{"current_steps": 3720, "total_steps": 204665, "loss": 0.1413, "lr": 3.6341427664044563e-07, "epoch": 0.09088021889429067, "percentage": 1.82, "elapsed_time": "0:04:19", "remaining_time": "3:53:36", "throughput": 9704.84, "total_tokens": 2518144} +{"current_steps": 3725, "total_steps": 204665, "loss": 0.1179, "lr": 3.6390286803146526e-07, "epoch": 0.09100236972613783, "percentage": 1.82, "elapsed_time": "0:04:19", "remaining_time": "3:53:35", "throughput": 9704.3, "total_tokens": 2521344} +{"current_steps": 3730, "total_steps": 204665, "loss": 0.2325, "lr": 3.6439145942248494e-07, "epoch": 0.091124520557985, "percentage": 1.82, "elapsed_time": "0:04:20", "remaining_time": "3:53:34", "throughput": 9703.02, "total_tokens": 2524288} +{"current_steps": 3735, "total_steps": 204665, "loss": 0.1381, "lr": 3.648800508135046e-07, "epoch": 0.09124667138983217, "percentage": 1.82, "elapsed_time": "0:04:20", "remaining_time": "3:53:34", "throughput": 9704.18, "total_tokens": 2528064} +{"current_steps": 3740, "total_steps": 204665, "loss": 0.1633, "lr": 3.6536864220452436e-07, "epoch": 0.09136882222167933, "percentage": 1.83, "elapsed_time": "0:04:20", "remaining_time": "3:53:34", "throughput": 9703.91, "total_tokens": 2531392} +{"current_steps": 3745, "total_steps": 204665, "loss": 0.0835, "lr": 3.6585723359554404e-07, "epoch": 0.09149097305352649, "percentage": 1.83, "elapsed_time": "0:04:21", "remaining_time": "3:53:34", "throughput": 9703.65, "total_tokens": 2534720} +{"current_steps": 3750, "total_steps": 204665, "loss": 0.2312, "lr": 3.663458249865637e-07, "epoch": 0.09161312388537365, "percentage": 1.83, "elapsed_time": "0:04:21", "remaining_time": "3:53:34", "throughput": 9704.23, "total_tokens": 2538304} +{"current_steps": 3755, "total_steps": 204665, "loss": 0.1795, "lr": 3.668344163775834e-07, "epoch": 0.09173527471722083, "percentage": 1.83, "elapsed_time": "0:04:21", "remaining_time": "3:53:33", "throughput": 9704.21, "total_tokens": 2541696} +{"current_steps": 3760, "total_steps": 204665, "loss": 0.1341, "lr": 3.673230077686031e-07, "epoch": 0.09185742554906799, "percentage": 1.84, "elapsed_time": "0:04:22", "remaining_time": "3:53:33", "throughput": 9704.57, "total_tokens": 2545216} +{"current_steps": 3765, "total_steps": 204665, "loss": 0.126, "lr": 3.6781159915962276e-07, "epoch": 0.09197957638091515, "percentage": 1.84, "elapsed_time": "0:04:22", "remaining_time": "3:53:33", "throughput": 9704.16, "total_tokens": 2548480} +{"current_steps": 3770, "total_steps": 204665, "loss": 0.0626, "lr": 3.683001905506425e-07, "epoch": 0.09210172721276232, "percentage": 1.84, "elapsed_time": "0:04:22", "remaining_time": "3:53:32", "throughput": 9703.99, "total_tokens": 2551808} +{"current_steps": 3775, "total_steps": 204665, "loss": 0.1606, "lr": 3.687887819416622e-07, "epoch": 0.09222387804460948, "percentage": 1.84, "elapsed_time": "0:04:23", "remaining_time": "3:53:32", "throughput": 9704.04, "total_tokens": 2555200} +{"current_steps": 3780, "total_steps": 204665, "loss": 0.1056, "lr": 3.6927737333268186e-07, "epoch": 0.09234602887645665, "percentage": 1.85, "elapsed_time": "0:04:23", "remaining_time": "3:53:31", "throughput": 9703.25, "total_tokens": 2558336} +{"current_steps": 3785, "total_steps": 204665, "loss": 0.148, "lr": 3.6976596472370154e-07, "epoch": 0.09246817970830382, "percentage": 1.85, "elapsed_time": "0:04:24", "remaining_time": "3:53:31", "throughput": 9703.68, "total_tokens": 2561856} +{"current_steps": 3790, "total_steps": 204665, "loss": 0.2257, "lr": 3.702545561147212e-07, "epoch": 0.09259033054015098, "percentage": 1.85, "elapsed_time": "0:04:24", "remaining_time": "3:53:30", "throughput": 9703.43, "total_tokens": 2565120} +{"current_steps": 3795, "total_steps": 204665, "loss": 0.1475, "lr": 3.707431475057409e-07, "epoch": 0.09271248137199814, "percentage": 1.85, "elapsed_time": "0:04:24", "remaining_time": "3:53:30", "throughput": 9703.98, "total_tokens": 2568704} +{"current_steps": 3800, "total_steps": 204665, "loss": 0.1184, "lr": 3.7123173889676064e-07, "epoch": 0.0928346322038453, "percentage": 1.86, "elapsed_time": "0:04:25", "remaining_time": "3:53:30", "throughput": 9704.61, "total_tokens": 2572288} +{"current_steps": 3805, "total_steps": 204665, "loss": 0.1522, "lr": 3.717203302877803e-07, "epoch": 0.09295678303569248, "percentage": 1.86, "elapsed_time": "0:04:25", "remaining_time": "3:53:30", "throughput": 9705.24, "total_tokens": 2575872} +{"current_steps": 3810, "total_steps": 204665, "loss": 0.1005, "lr": 3.722089216788e-07, "epoch": 0.09307893386753964, "percentage": 1.86, "elapsed_time": "0:04:25", "remaining_time": "3:53:29", "throughput": 9703.45, "total_tokens": 2578624} +{"current_steps": 3815, "total_steps": 204665, "loss": 0.075, "lr": 3.726975130698197e-07, "epoch": 0.0932010846993868, "percentage": 1.86, "elapsed_time": "0:04:26", "remaining_time": "3:53:28", "throughput": 9703.18, "total_tokens": 2581888} +{"current_steps": 3820, "total_steps": 204665, "loss": 0.1201, "lr": 3.7318610446083936e-07, "epoch": 0.09332323553123396, "percentage": 1.87, "elapsed_time": "0:04:26", "remaining_time": "3:53:28", "throughput": 9702.97, "total_tokens": 2585216} +{"current_steps": 3825, "total_steps": 204665, "loss": 0.1435, "lr": 3.7367469585185904e-07, "epoch": 0.09344538636308113, "percentage": 1.87, "elapsed_time": "0:04:26", "remaining_time": "3:53:27", "throughput": 9702.63, "total_tokens": 2588480} +{"current_steps": 3830, "total_steps": 204665, "loss": 0.1068, "lr": 3.741632872428788e-07, "epoch": 0.0935675371949283, "percentage": 1.87, "elapsed_time": "0:04:27", "remaining_time": "3:53:27", "throughput": 9701.57, "total_tokens": 2591488} +{"current_steps": 3835, "total_steps": 204665, "loss": 0.0608, "lr": 3.7465187863389846e-07, "epoch": 0.09368968802677546, "percentage": 1.87, "elapsed_time": "0:04:27", "remaining_time": "3:53:26", "throughput": 9700.65, "total_tokens": 2594560} +{"current_steps": 3840, "total_steps": 204665, "loss": 0.166, "lr": 3.7514047002491814e-07, "epoch": 0.09381183885862263, "percentage": 1.88, "elapsed_time": "0:04:27", "remaining_time": "3:53:26", "throughput": 9700.68, "total_tokens": 2597952} +{"current_steps": 3845, "total_steps": 204665, "loss": 0.2698, "lr": 3.756290614159378e-07, "epoch": 0.09393398969046979, "percentage": 1.88, "elapsed_time": "0:04:28", "remaining_time": "3:53:26", "throughput": 9703.09, "total_tokens": 2602112} +{"current_steps": 3850, "total_steps": 204665, "loss": 0.1339, "lr": 3.761176528069575e-07, "epoch": 0.09405614052231696, "percentage": 1.88, "elapsed_time": "0:04:28", "remaining_time": "3:53:25", "throughput": 9702.26, "total_tokens": 2605184} +{"current_steps": 3855, "total_steps": 204665, "loss": 0.1611, "lr": 3.766062441979772e-07, "epoch": 0.09417829135416413, "percentage": 1.88, "elapsed_time": "0:04:28", "remaining_time": "3:53:24", "throughput": 9701.86, "total_tokens": 2608384} +{"current_steps": 3860, "total_steps": 204665, "loss": 0.0959, "lr": 3.770948355889969e-07, "epoch": 0.09430044218601129, "percentage": 1.89, "elapsed_time": "0:04:29", "remaining_time": "3:53:24", "throughput": 9702.49, "total_tokens": 2611968} +{"current_steps": 3865, "total_steps": 204665, "loss": 0.3012, "lr": 3.775834269800166e-07, "epoch": 0.09442259301785845, "percentage": 1.89, "elapsed_time": "0:04:29", "remaining_time": "3:53:23", "throughput": 9702.03, "total_tokens": 2615168} +{"current_steps": 3870, "total_steps": 204665, "loss": 0.1067, "lr": 3.780720183710363e-07, "epoch": 0.09454474384970561, "percentage": 1.89, "elapsed_time": "0:04:29", "remaining_time": "3:53:23", "throughput": 9702.43, "total_tokens": 2618688} +{"current_steps": 3875, "total_steps": 204665, "loss": 0.1673, "lr": 3.7856060976205596e-07, "epoch": 0.09466689468155279, "percentage": 1.89, "elapsed_time": "0:04:30", "remaining_time": "3:53:23", "throughput": 9702.54, "total_tokens": 2622080} +{"current_steps": 3880, "total_steps": 204665, "loss": 0.1458, "lr": 3.7904920115307564e-07, "epoch": 0.09478904551339995, "percentage": 1.9, "elapsed_time": "0:04:30", "remaining_time": "3:53:22", "throughput": 9701.86, "total_tokens": 2625216} +{"current_steps": 3885, "total_steps": 204665, "loss": 0.0596, "lr": 3.795377925440953e-07, "epoch": 0.09491119634524711, "percentage": 1.9, "elapsed_time": "0:04:30", "remaining_time": "3:53:22", "throughput": 9702.27, "total_tokens": 2628736} +{"current_steps": 3890, "total_steps": 204665, "loss": 0.0805, "lr": 3.8002638393511506e-07, "epoch": 0.09503334717709427, "percentage": 1.9, "elapsed_time": "0:04:31", "remaining_time": "3:53:21", "throughput": 9701.22, "total_tokens": 2631744} +{"current_steps": 3895, "total_steps": 204665, "loss": 0.1671, "lr": 3.8051497532613474e-07, "epoch": 0.09515549800894144, "percentage": 1.9, "elapsed_time": "0:04:31", "remaining_time": "3:53:20", "throughput": 9700.02, "total_tokens": 2634688} +{"current_steps": 3900, "total_steps": 204665, "loss": 0.1827, "lr": 3.810035667171544e-07, "epoch": 0.09527764884078861, "percentage": 1.91, "elapsed_time": "0:04:31", "remaining_time": "3:53:19", "throughput": 9699.34, "total_tokens": 2637824} +{"current_steps": 3905, "total_steps": 204665, "loss": 0.1409, "lr": 3.814921581081741e-07, "epoch": 0.09539979967263577, "percentage": 1.91, "elapsed_time": "0:04:32", "remaining_time": "3:53:18", "throughput": 9698.17, "total_tokens": 2640768} +{"current_steps": 3910, "total_steps": 204665, "loss": 0.256, "lr": 3.819807494991938e-07, "epoch": 0.09552195050448294, "percentage": 1.91, "elapsed_time": "0:04:32", "remaining_time": "3:53:17", "throughput": 9696.86, "total_tokens": 2643648} +{"current_steps": 3915, "total_steps": 204665, "loss": 0.155, "lr": 3.8246934089021347e-07, "epoch": 0.0956441013363301, "percentage": 1.91, "elapsed_time": "0:04:32", "remaining_time": "3:53:17", "throughput": 9697.72, "total_tokens": 2647296} +{"current_steps": 3920, "total_steps": 204665, "loss": 0.1863, "lr": 3.829579322812332e-07, "epoch": 0.09576625216817726, "percentage": 1.92, "elapsed_time": "0:04:33", "remaining_time": "3:53:16", "throughput": 9696.9, "total_tokens": 2650368} +{"current_steps": 3925, "total_steps": 204665, "loss": 0.1506, "lr": 3.834465236722529e-07, "epoch": 0.09588840300002444, "percentage": 1.92, "elapsed_time": "0:04:33", "remaining_time": "3:53:16", "throughput": 9696.79, "total_tokens": 2653696} +{"current_steps": 3930, "total_steps": 204665, "loss": 0.1153, "lr": 3.8393511506327256e-07, "epoch": 0.0960105538318716, "percentage": 1.92, "elapsed_time": "0:04:34", "remaining_time": "3:53:15", "throughput": 9696.24, "total_tokens": 2656896} +{"current_steps": 3935, "total_steps": 204665, "loss": 0.0772, "lr": 3.844237064542923e-07, "epoch": 0.09613270466371876, "percentage": 1.92, "elapsed_time": "0:04:34", "remaining_time": "3:53:15", "throughput": 9696.03, "total_tokens": 2660160} +{"current_steps": 3940, "total_steps": 204665, "loss": 0.1788, "lr": 3.8491229784531193e-07, "epoch": 0.09625485549556592, "percentage": 1.93, "elapsed_time": "0:04:34", "remaining_time": "3:53:14", "throughput": 9695.15, "total_tokens": 2663232} +{"current_steps": 3945, "total_steps": 204665, "loss": 0.0492, "lr": 3.854008892363316e-07, "epoch": 0.09637700632741308, "percentage": 1.93, "elapsed_time": "0:04:35", "remaining_time": "3:53:13", "throughput": 9694.7, "total_tokens": 2666432} +{"current_steps": 3950, "total_steps": 204665, "loss": 0.2631, "lr": 3.8588948062735134e-07, "epoch": 0.09649915715926026, "percentage": 1.93, "elapsed_time": "0:04:35", "remaining_time": "3:53:13", "throughput": 9693.83, "total_tokens": 2669504} +{"current_steps": 3955, "total_steps": 204665, "loss": 0.0781, "lr": 3.86378072018371e-07, "epoch": 0.09662130799110742, "percentage": 1.93, "elapsed_time": "0:04:35", "remaining_time": "3:53:12", "throughput": 9693.87, "total_tokens": 2672896} +{"current_steps": 3960, "total_steps": 204665, "loss": 0.0871, "lr": 3.868666634093907e-07, "epoch": 0.09674345882295458, "percentage": 1.93, "elapsed_time": "0:04:36", "remaining_time": "3:53:12", "throughput": 9693.34, "total_tokens": 2676096} +{"current_steps": 3965, "total_steps": 204665, "loss": 0.0841, "lr": 3.8735525480041044e-07, "epoch": 0.09686560965480175, "percentage": 1.94, "elapsed_time": "0:04:36", "remaining_time": "3:53:11", "throughput": 9692.54, "total_tokens": 2679168} +{"current_steps": 3970, "total_steps": 204665, "loss": 0.18, "lr": 3.8784384619143007e-07, "epoch": 0.09698776048664891, "percentage": 1.94, "elapsed_time": "0:04:36", "remaining_time": "3:53:10", "throughput": 9691.83, "total_tokens": 2682240} +{"current_steps": 3975, "total_steps": 204665, "loss": 0.2067, "lr": 3.8833243758244975e-07, "epoch": 0.09710991131849608, "percentage": 1.94, "elapsed_time": "0:04:37", "remaining_time": "3:53:10", "throughput": 9692.99, "total_tokens": 2686016} +{"current_steps": 3980, "total_steps": 204665, "loss": 0.0556, "lr": 3.888210289734695e-07, "epoch": 0.09723206215034325, "percentage": 1.94, "elapsed_time": "0:04:37", "remaining_time": "3:53:09", "throughput": 9692.18, "total_tokens": 2689088} +{"current_steps": 3985, "total_steps": 204665, "loss": 0.215, "lr": 3.8930962036448916e-07, "epoch": 0.09735421298219041, "percentage": 1.95, "elapsed_time": "0:04:37", "remaining_time": "3:53:09", "throughput": 9692.09, "total_tokens": 2692416} +{"current_steps": 3990, "total_steps": 204665, "loss": 0.1439, "lr": 3.8979821175550885e-07, "epoch": 0.09747636381403757, "percentage": 1.95, "elapsed_time": "0:04:38", "remaining_time": "3:53:08", "throughput": 9692.09, "total_tokens": 2695744} +{"current_steps": 3995, "total_steps": 204665, "loss": 0.1125, "lr": 3.902868031465286e-07, "epoch": 0.09759851464588473, "percentage": 1.95, "elapsed_time": "0:04:38", "remaining_time": "3:53:08", "throughput": 9691.51, "total_tokens": 2698880} +{"current_steps": 4000, "total_steps": 204665, "loss": 0.1243, "lr": 3.907753945375482e-07, "epoch": 0.09772066547773191, "percentage": 1.95, "elapsed_time": "0:04:38", "remaining_time": "3:53:08", "throughput": 9693.76, "total_tokens": 2703040} +{"current_steps": 4005, "total_steps": 204665, "loss": 0.137, "lr": 3.912639859285679e-07, "epoch": 0.09784281630957907, "percentage": 1.96, "elapsed_time": "0:04:39", "remaining_time": "3:53:08", "throughput": 9694.92, "total_tokens": 2706816} +{"current_steps": 4010, "total_steps": 204665, "loss": 0.1429, "lr": 3.917525773195876e-07, "epoch": 0.09796496714142623, "percentage": 1.96, "elapsed_time": "0:04:39", "remaining_time": "3:53:08", "throughput": 9695.23, "total_tokens": 2710336} +{"current_steps": 4015, "total_steps": 204665, "loss": 0.0883, "lr": 3.922411687106073e-07, "epoch": 0.0980871179732734, "percentage": 1.96, "elapsed_time": "0:04:39", "remaining_time": "3:53:07", "throughput": 9694.22, "total_tokens": 2713344} +{"current_steps": 4020, "total_steps": 204665, "loss": 0.1977, "lr": 3.92729760101627e-07, "epoch": 0.09820926880512057, "percentage": 1.96, "elapsed_time": "0:04:40", "remaining_time": "3:53:07", "throughput": 9694.29, "total_tokens": 2716736} +{"current_steps": 4025, "total_steps": 204665, "loss": 0.2043, "lr": 3.932183514926467e-07, "epoch": 0.09833141963696773, "percentage": 1.97, "elapsed_time": "0:04:40", "remaining_time": "3:53:07", "throughput": 9695.81, "total_tokens": 2720640} +{"current_steps": 4030, "total_steps": 204665, "loss": 0.257, "lr": 3.9370694288366635e-07, "epoch": 0.0984535704688149, "percentage": 1.97, "elapsed_time": "0:04:40", "remaining_time": "3:53:06", "throughput": 9695.36, "total_tokens": 2723840} +{"current_steps": 4035, "total_steps": 204665, "loss": 0.094, "lr": 3.9419553427468603e-07, "epoch": 0.09857572130066206, "percentage": 1.97, "elapsed_time": "0:04:41", "remaining_time": "3:53:06", "throughput": 9694.71, "total_tokens": 2726976} +{"current_steps": 4040, "total_steps": 204665, "loss": 0.2761, "lr": 3.9468412566570576e-07, "epoch": 0.09869787213250922, "percentage": 1.97, "elapsed_time": "0:04:41", "remaining_time": "3:53:05", "throughput": 9694.84, "total_tokens": 2730368} +{"current_steps": 4045, "total_steps": 204665, "loss": 0.1444, "lr": 3.9517271705672545e-07, "epoch": 0.0988200229643564, "percentage": 1.98, "elapsed_time": "0:04:41", "remaining_time": "3:53:05", "throughput": 9694.18, "total_tokens": 2733504} +{"current_steps": 4050, "total_steps": 204665, "loss": 0.1586, "lr": 3.9566130844774513e-07, "epoch": 0.09894217379620356, "percentage": 1.98, "elapsed_time": "0:04:42", "remaining_time": "3:53:04", "throughput": 9693.84, "total_tokens": 2736704} +{"current_steps": 4055, "total_steps": 204665, "loss": 0.0806, "lr": 3.9614989983876486e-07, "epoch": 0.09906432462805072, "percentage": 1.98, "elapsed_time": "0:04:42", "remaining_time": "3:53:03", "throughput": 9693.39, "total_tokens": 2739904} +{"current_steps": 4060, "total_steps": 204665, "loss": 0.0966, "lr": 3.966384912297845e-07, "epoch": 0.09918647545989788, "percentage": 1.98, "elapsed_time": "0:04:43", "remaining_time": "3:53:03", "throughput": 9693.77, "total_tokens": 2743424} +{"current_steps": 4065, "total_steps": 204665, "loss": 0.0883, "lr": 3.9712708262080417e-07, "epoch": 0.09930862629174504, "percentage": 1.99, "elapsed_time": "0:04:43", "remaining_time": "3:53:02", "throughput": 9693.19, "total_tokens": 2746560} +{"current_steps": 4070, "total_steps": 204665, "loss": 0.2489, "lr": 3.976156740118239e-07, "epoch": 0.09943077712359222, "percentage": 1.99, "elapsed_time": "0:04:43", "remaining_time": "3:53:02", "throughput": 9694.9, "total_tokens": 2750528} +{"current_steps": 4075, "total_steps": 204665, "loss": 0.1553, "lr": 3.981042654028436e-07, "epoch": 0.09955292795543938, "percentage": 1.99, "elapsed_time": "0:04:44", "remaining_time": "3:53:02", "throughput": 9695.3, "total_tokens": 2754048} +{"current_steps": 4080, "total_steps": 204665, "loss": 0.2288, "lr": 3.9859285679386327e-07, "epoch": 0.09967507878728654, "percentage": 1.99, "elapsed_time": "0:04:44", "remaining_time": "3:53:02", "throughput": 9695.92, "total_tokens": 2757632} +{"current_steps": 4085, "total_steps": 204665, "loss": 0.1119, "lr": 3.99081448184883e-07, "epoch": 0.0997972296191337, "percentage": 2.0, "elapsed_time": "0:04:44", "remaining_time": "3:53:02", "throughput": 9696.97, "total_tokens": 2761408} +{"current_steps": 4090, "total_steps": 204665, "loss": 0.1223, "lr": 3.9957003957590263e-07, "epoch": 0.09991938045098087, "percentage": 2.0, "elapsed_time": "0:04:45", "remaining_time": "3:53:02", "throughput": 9696.5, "total_tokens": 2764608} +{"current_steps": 4095, "total_steps": 204665, "loss": 0.1659, "lr": 4.000586309669223e-07, "epoch": 0.10004153128282804, "percentage": 2.0, "elapsed_time": "0:04:45", "remaining_time": "3:53:01", "throughput": 9695.5, "total_tokens": 2767616} +{"current_steps": 4100, "total_steps": 204665, "loss": 0.0863, "lr": 4.0054722235794205e-07, "epoch": 0.1001636821146752, "percentage": 2.0, "elapsed_time": "0:04:45", "remaining_time": "3:53:00", "throughput": 9694.65, "total_tokens": 2770688} +{"current_steps": 4105, "total_steps": 204665, "loss": 0.1221, "lr": 4.0103581374896173e-07, "epoch": 0.10028583294652237, "percentage": 2.01, "elapsed_time": "0:04:46", "remaining_time": "3:52:59", "throughput": 9694.07, "total_tokens": 2773824} +{"current_steps": 4110, "total_steps": 204665, "loss": 0.0746, "lr": 4.015244051399814e-07, "epoch": 0.10040798377836953, "percentage": 2.01, "elapsed_time": "0:04:46", "remaining_time": "3:52:59", "throughput": 9694.52, "total_tokens": 2777344} +{"current_steps": 4115, "total_steps": 204665, "loss": 0.0951, "lr": 4.0201299653100114e-07, "epoch": 0.10053013461021669, "percentage": 2.01, "elapsed_time": "0:04:46", "remaining_time": "3:53:00", "throughput": 9696.81, "total_tokens": 2781632} +{"current_steps": 4120, "total_steps": 204665, "loss": 0.2181, "lr": 4.0250158792202077e-07, "epoch": 0.10065228544206387, "percentage": 2.01, "elapsed_time": "0:04:47", "remaining_time": "3:53:00", "throughput": 9696.83, "total_tokens": 2785024} +{"current_steps": 4125, "total_steps": 204665, "loss": 0.2985, "lr": 4.0299017931304045e-07, "epoch": 0.10077443627391103, "percentage": 2.02, "elapsed_time": "0:04:47", "remaining_time": "3:52:59", "throughput": 9696.85, "total_tokens": 2788416} +{"current_steps": 4130, "total_steps": 204665, "loss": 0.1828, "lr": 4.034787707040602e-07, "epoch": 0.10089658710575819, "percentage": 2.02, "elapsed_time": "0:04:47", "remaining_time": "3:52:59", "throughput": 9697.12, "total_tokens": 2791872} +{"current_steps": 4135, "total_steps": 204665, "loss": 0.0847, "lr": 4.0396736209507987e-07, "epoch": 0.10101873793760535, "percentage": 2.02, "elapsed_time": "0:04:48", "remaining_time": "3:52:59", "throughput": 9696.94, "total_tokens": 2795200} +{"current_steps": 4140, "total_steps": 204665, "loss": 0.1232, "lr": 4.0445595348609955e-07, "epoch": 0.10114088876945251, "percentage": 2.02, "elapsed_time": "0:04:48", "remaining_time": "3:52:59", "throughput": 9697.52, "total_tokens": 2798784} +{"current_steps": 4145, "total_steps": 204665, "loss": 0.1623, "lr": 4.049445448771193e-07, "epoch": 0.10126303960129969, "percentage": 2.03, "elapsed_time": "0:04:48", "remaining_time": "3:52:58", "throughput": 9698.31, "total_tokens": 2802432} +{"current_steps": 4150, "total_steps": 204665, "loss": 0.1074, "lr": 4.0543313626813897e-07, "epoch": 0.10138519043314685, "percentage": 2.03, "elapsed_time": "0:04:49", "remaining_time": "3:52:58", "throughput": 9699.26, "total_tokens": 2806144} +{"current_steps": 4155, "total_steps": 204665, "loss": 0.116, "lr": 4.059217276591586e-07, "epoch": 0.10150734126499401, "percentage": 2.03, "elapsed_time": "0:04:49", "remaining_time": "3:52:58", "throughput": 9699.16, "total_tokens": 2809472} +{"current_steps": 4160, "total_steps": 204665, "loss": 0.1349, "lr": 4.0641031905017833e-07, "epoch": 0.10162949209684118, "percentage": 2.03, "elapsed_time": "0:04:50", "remaining_time": "3:52:58", "throughput": 9701.26, "total_tokens": 2813632} +{"current_steps": 4165, "total_steps": 204665, "loss": 0.1785, "lr": 4.06898910441198e-07, "epoch": 0.10175164292868834, "percentage": 2.04, "elapsed_time": "0:04:50", "remaining_time": "3:52:58", "throughput": 9700.08, "total_tokens": 2816576} +{"current_steps": 4170, "total_steps": 204665, "loss": 0.1915, "lr": 4.073875018322177e-07, "epoch": 0.10187379376053551, "percentage": 2.04, "elapsed_time": "0:04:50", "remaining_time": "3:52:57", "throughput": 9699.87, "total_tokens": 2819904} +{"current_steps": 4175, "total_steps": 204665, "loss": 0.09, "lr": 4.078760932232374e-07, "epoch": 0.10199594459238268, "percentage": 2.04, "elapsed_time": "0:04:51", "remaining_time": "3:52:57", "throughput": 9700.11, "total_tokens": 2823360} +{"current_steps": 4180, "total_steps": 204665, "loss": 0.1443, "lr": 4.083646846142571e-07, "epoch": 0.10211809542422984, "percentage": 2.04, "elapsed_time": "0:04:51", "remaining_time": "3:52:56", "throughput": 9700.23, "total_tokens": 2826752} +{"current_steps": 4185, "total_steps": 204665, "loss": 0.1226, "lr": 4.0885327600527673e-07, "epoch": 0.102240246256077, "percentage": 2.04, "elapsed_time": "0:04:51", "remaining_time": "3:52:56", "throughput": 9699.95, "total_tokens": 2830016} +{"current_steps": 4190, "total_steps": 204665, "loss": 0.1663, "lr": 4.0934186739629647e-07, "epoch": 0.10236239708792418, "percentage": 2.05, "elapsed_time": "0:04:52", "remaining_time": "3:52:55", "throughput": 9700.13, "total_tokens": 2833408} +{"current_steps": 4195, "total_steps": 204665, "loss": 0.1124, "lr": 4.0983045878731615e-07, "epoch": 0.10248454791977134, "percentage": 2.05, "elapsed_time": "0:04:52", "remaining_time": "3:52:55", "throughput": 9699.37, "total_tokens": 2836480} +{"current_steps": 4200, "total_steps": 204665, "loss": 0.1346, "lr": 4.1031905017833583e-07, "epoch": 0.1026066987516185, "percentage": 2.05, "elapsed_time": "0:04:52", "remaining_time": "3:52:54", "throughput": 9699.09, "total_tokens": 2839744} +{"current_steps": 4205, "total_steps": 204665, "loss": 0.1257, "lr": 4.1080764156935557e-07, "epoch": 0.10272884958346566, "percentage": 2.05, "elapsed_time": "0:04:53", "remaining_time": "3:52:53", "throughput": 9698.43, "total_tokens": 2842880} +{"current_steps": 4210, "total_steps": 204665, "loss": 0.0673, "lr": 4.1129623296037525e-07, "epoch": 0.10285100041531282, "percentage": 2.06, "elapsed_time": "0:04:53", "remaining_time": "3:52:53", "throughput": 9698.55, "total_tokens": 2846272} +{"current_steps": 4215, "total_steps": 204665, "loss": 0.0861, "lr": 4.117848243513949e-07, "epoch": 0.10297315124716, "percentage": 2.06, "elapsed_time": "0:04:53", "remaining_time": "3:52:52", "throughput": 9698.34, "total_tokens": 2849536} +{"current_steps": 4220, "total_steps": 204665, "loss": 0.1275, "lr": 4.122734157424146e-07, "epoch": 0.10309530207900716, "percentage": 2.06, "elapsed_time": "0:04:54", "remaining_time": "3:52:52", "throughput": 9698.14, "total_tokens": 2852800} +{"current_steps": 4225, "total_steps": 204665, "loss": 0.1408, "lr": 4.127620071334343e-07, "epoch": 0.10321745291085432, "percentage": 2.06, "elapsed_time": "0:04:54", "remaining_time": "3:52:51", "throughput": 9697.74, "total_tokens": 2856000} +{"current_steps": 4230, "total_steps": 204665, "loss": 0.1893, "lr": 4.1325059852445397e-07, "epoch": 0.10333960374270149, "percentage": 2.07, "elapsed_time": "0:04:54", "remaining_time": "3:52:50", "throughput": 9696.58, "total_tokens": 2858944} +{"current_steps": 4235, "total_steps": 204665, "loss": 0.1007, "lr": 4.137391899154737e-07, "epoch": 0.10346175457454865, "percentage": 2.07, "elapsed_time": "0:04:55", "remaining_time": "3:52:50", "throughput": 9696.29, "total_tokens": 2862208} +{"current_steps": 4240, "total_steps": 204665, "loss": 0.0926, "lr": 4.142277813064934e-07, "epoch": 0.10358390540639582, "percentage": 2.07, "elapsed_time": "0:04:55", "remaining_time": "3:52:49", "throughput": 9695.9, "total_tokens": 2865408} +{"current_steps": 4245, "total_steps": 204665, "loss": 0.1382, "lr": 4.14716372697513e-07, "epoch": 0.10370605623824299, "percentage": 2.07, "elapsed_time": "0:04:55", "remaining_time": "3:52:49", "throughput": 9696.77, "total_tokens": 2869120} +{"current_steps": 4250, "total_steps": 204665, "loss": 0.0831, "lr": 4.1520496408853275e-07, "epoch": 0.10382820707009015, "percentage": 2.08, "elapsed_time": "0:04:56", "remaining_time": "3:52:49", "throughput": 9696.78, "total_tokens": 2872448} +{"current_steps": 4255, "total_steps": 204665, "loss": 0.2276, "lr": 4.1569355547955243e-07, "epoch": 0.10395035790193731, "percentage": 2.08, "elapsed_time": "0:04:56", "remaining_time": "3:52:48", "throughput": 9696.68, "total_tokens": 2875776} +{"current_steps": 4260, "total_steps": 204665, "loss": 0.1392, "lr": 4.161821468705721e-07, "epoch": 0.10407250873378447, "percentage": 2.08, "elapsed_time": "0:04:56", "remaining_time": "3:52:47", "throughput": 9695.64, "total_tokens": 2878720} +{"current_steps": 4265, "total_steps": 204665, "loss": 0.154, "lr": 4.1667073826159185e-07, "epoch": 0.10419465956563165, "percentage": 2.08, "elapsed_time": "0:04:57", "remaining_time": "3:52:47", "throughput": 9696.55, "total_tokens": 2882432} +{"current_steps": 4270, "total_steps": 204665, "loss": 0.1137, "lr": 4.1715932965261153e-07, "epoch": 0.10431681039747881, "percentage": 2.09, "elapsed_time": "0:04:57", "remaining_time": "3:52:47", "throughput": 9696.6, "total_tokens": 2885824} +{"current_steps": 4275, "total_steps": 204665, "loss": 0.15, "lr": 4.1764792104363116e-07, "epoch": 0.10443896122932597, "percentage": 2.09, "elapsed_time": "0:04:57", "remaining_time": "3:52:46", "throughput": 9697.18, "total_tokens": 2889408} +{"current_steps": 4280, "total_steps": 204665, "loss": 0.221, "lr": 4.181365124346509e-07, "epoch": 0.10456111206117313, "percentage": 2.09, "elapsed_time": "0:04:58", "remaining_time": "3:52:46", "throughput": 9696.24, "total_tokens": 2892416} +{"current_steps": 4285, "total_steps": 204665, "loss": 0.1015, "lr": 4.1862510382567057e-07, "epoch": 0.1046832628930203, "percentage": 2.09, "elapsed_time": "0:04:58", "remaining_time": "3:52:45", "throughput": 9695.6, "total_tokens": 2895552} +{"current_steps": 4290, "total_steps": 204665, "loss": 0.1397, "lr": 4.1911369521669025e-07, "epoch": 0.10480541372486747, "percentage": 2.1, "elapsed_time": "0:04:59", "remaining_time": "3:52:45", "throughput": 9696.4, "total_tokens": 2899264} +{"current_steps": 4295, "total_steps": 204665, "loss": 0.0841, "lr": 4.1960228660771e-07, "epoch": 0.10492756455671463, "percentage": 2.1, "elapsed_time": "0:04:59", "remaining_time": "3:52:45", "throughput": 9695.92, "total_tokens": 2902464} +{"current_steps": 4300, "total_steps": 204665, "loss": 0.1638, "lr": 4.2009087799872967e-07, "epoch": 0.1050497153885618, "percentage": 2.1, "elapsed_time": "0:04:59", "remaining_time": "3:52:45", "throughput": 9697.12, "total_tokens": 2906304} +{"current_steps": 4305, "total_steps": 204665, "loss": 0.1394, "lr": 4.205794693897493e-07, "epoch": 0.10517186622040896, "percentage": 2.1, "elapsed_time": "0:05:00", "remaining_time": "3:52:44", "throughput": 9697.03, "total_tokens": 2909632} +{"current_steps": 4310, "total_steps": 204665, "loss": 0.0915, "lr": 4.2106806078076903e-07, "epoch": 0.10529401705225612, "percentage": 2.11, "elapsed_time": "0:05:00", "remaining_time": "3:52:44", "throughput": 9697.72, "total_tokens": 2913280} +{"current_steps": 4315, "total_steps": 204665, "loss": 0.1507, "lr": 4.215566521717887e-07, "epoch": 0.1054161678841033, "percentage": 2.11, "elapsed_time": "0:05:00", "remaining_time": "3:52:44", "throughput": 9697.83, "total_tokens": 2916672} +{"current_steps": 4320, "total_steps": 204665, "loss": 0.1375, "lr": 4.220452435628084e-07, "epoch": 0.10553831871595046, "percentage": 2.11, "elapsed_time": "0:05:01", "remaining_time": "3:52:45", "throughput": 9701.19, "total_tokens": 2921472} +{"current_steps": 4325, "total_steps": 204665, "loss": 0.1117, "lr": 4.2253383495382813e-07, "epoch": 0.10566046954779762, "percentage": 2.11, "elapsed_time": "0:05:01", "remaining_time": "3:52:45", "throughput": 9700.91, "total_tokens": 2924736} +{"current_steps": 4330, "total_steps": 204665, "loss": 0.1468, "lr": 4.230224263448478e-07, "epoch": 0.10578262037964478, "percentage": 2.12, "elapsed_time": "0:05:01", "remaining_time": "3:52:44", "throughput": 9700.46, "total_tokens": 2927936} +{"current_steps": 4335, "total_steps": 204665, "loss": 0.1953, "lr": 4.2351101773586744e-07, "epoch": 0.10590477121149194, "percentage": 2.12, "elapsed_time": "0:05:02", "remaining_time": "3:52:44", "throughput": 9700.81, "total_tokens": 2931456} +{"current_steps": 4340, "total_steps": 204665, "loss": 0.1076, "lr": 4.2399960912688717e-07, "epoch": 0.10602692204333912, "percentage": 2.12, "elapsed_time": "0:05:02", "remaining_time": "3:52:44", "throughput": 9700.3, "total_tokens": 2934656} +{"current_steps": 4345, "total_steps": 204665, "loss": 0.145, "lr": 4.2448820051790685e-07, "epoch": 0.10614907287518628, "percentage": 2.12, "elapsed_time": "0:05:02", "remaining_time": "3:52:43", "throughput": 9699.53, "total_tokens": 2937728} +{"current_steps": 4350, "total_steps": 204665, "loss": 0.1833, "lr": 4.2497679190892654e-07, "epoch": 0.10627122370703344, "percentage": 2.13, "elapsed_time": "0:05:03", "remaining_time": "3:52:43", "throughput": 9699.22, "total_tokens": 2940992} +{"current_steps": 4355, "total_steps": 204665, "loss": 0.0839, "lr": 4.2546538329994627e-07, "epoch": 0.1063933745388806, "percentage": 2.13, "elapsed_time": "0:05:03", "remaining_time": "3:52:43", "throughput": 9699.88, "total_tokens": 2944640} +{"current_steps": 4360, "total_steps": 204665, "loss": 0.0561, "lr": 4.2595397469096595e-07, "epoch": 0.10651552537072777, "percentage": 2.13, "elapsed_time": "0:05:03", "remaining_time": "3:52:42", "throughput": 9700.44, "total_tokens": 2948224} +{"current_steps": 4365, "total_steps": 204665, "loss": 0.1876, "lr": 4.264425660819856e-07, "epoch": 0.10663767620257494, "percentage": 2.13, "elapsed_time": "0:05:04", "remaining_time": "3:52:43", "throughput": 9702.33, "total_tokens": 2952320} +{"current_steps": 4370, "total_steps": 204665, "loss": 0.0542, "lr": 4.269311574730053e-07, "epoch": 0.1067598270344221, "percentage": 2.14, "elapsed_time": "0:05:04", "remaining_time": "3:52:42", "throughput": 9701.69, "total_tokens": 2955456} +{"current_steps": 4375, "total_steps": 204665, "loss": 0.1754, "lr": 4.27419748864025e-07, "epoch": 0.10688197786626927, "percentage": 2.14, "elapsed_time": "0:05:04", "remaining_time": "3:52:42", "throughput": 9701.71, "total_tokens": 2958848} +{"current_steps": 4380, "total_steps": 204665, "loss": 0.0646, "lr": 4.279083402550447e-07, "epoch": 0.10700412869811643, "percentage": 2.14, "elapsed_time": "0:05:05", "remaining_time": "3:52:42", "throughput": 9703.26, "total_tokens": 2962816} +{"current_steps": 4385, "total_steps": 204665, "loss": 0.133, "lr": 4.283969316460644e-07, "epoch": 0.1071262795299636, "percentage": 2.14, "elapsed_time": "0:05:05", "remaining_time": "3:52:41", "throughput": 9702.44, "total_tokens": 2965888} +{"current_steps": 4390, "total_steps": 204665, "loss": 0.1174, "lr": 4.288855230370841e-07, "epoch": 0.10724843036181077, "percentage": 2.14, "elapsed_time": "0:05:06", "remaining_time": "3:52:41", "throughput": 9703.27, "total_tokens": 2969600} +{"current_steps": 4395, "total_steps": 204665, "loss": 0.051, "lr": 4.2937411442810377e-07, "epoch": 0.10737058119365793, "percentage": 2.15, "elapsed_time": "0:05:06", "remaining_time": "3:52:41", "throughput": 9702.96, "total_tokens": 2972864} +{"current_steps": 4400, "total_steps": 204665, "loss": 0.198, "lr": 4.2986270581912345e-07, "epoch": 0.10749273202550509, "percentage": 2.15, "elapsed_time": "0:05:06", "remaining_time": "3:52:40", "throughput": 9702.93, "total_tokens": 2976192} +{"current_steps": 4405, "total_steps": 204665, "loss": 0.2707, "lr": 4.3035129721014314e-07, "epoch": 0.10761488285735225, "percentage": 2.15, "elapsed_time": "0:05:07", "remaining_time": "3:52:40", "throughput": 9703.73, "total_tokens": 2979840} +{"current_steps": 4410, "total_steps": 204665, "loss": 0.149, "lr": 4.308398886011628e-07, "epoch": 0.10773703368919943, "percentage": 2.15, "elapsed_time": "0:05:07", "remaining_time": "3:52:40", "throughput": 9703.62, "total_tokens": 2983168} +{"current_steps": 4415, "total_steps": 204665, "loss": 0.0545, "lr": 4.3132847999218255e-07, "epoch": 0.10785918452104659, "percentage": 2.16, "elapsed_time": "0:05:07", "remaining_time": "3:52:40", "throughput": 9704.99, "total_tokens": 2987072} +{"current_steps": 4420, "total_steps": 204665, "loss": 0.129, "lr": 4.3181707138320223e-07, "epoch": 0.10798133535289375, "percentage": 2.16, "elapsed_time": "0:05:08", "remaining_time": "3:52:39", "throughput": 9704.25, "total_tokens": 2990144} +{"current_steps": 4425, "total_steps": 204665, "loss": 0.2385, "lr": 4.323056627742219e-07, "epoch": 0.10810348618474092, "percentage": 2.16, "elapsed_time": "0:05:08", "remaining_time": "3:52:39", "throughput": 9705.99, "total_tokens": 2994176} +{"current_steps": 4430, "total_steps": 204665, "loss": 0.125, "lr": 4.3279425416524154e-07, "epoch": 0.10822563701658808, "percentage": 2.16, "elapsed_time": "0:05:08", "remaining_time": "3:52:38", "throughput": 9705.24, "total_tokens": 2997248} +{"current_steps": 4435, "total_steps": 204665, "loss": 0.2284, "lr": 4.332828455562613e-07, "epoch": 0.10834778784843525, "percentage": 2.17, "elapsed_time": "0:05:09", "remaining_time": "3:52:38", "throughput": 9705.81, "total_tokens": 3000832} +{"current_steps": 4440, "total_steps": 204665, "loss": 0.2434, "lr": 4.3377143694728096e-07, "epoch": 0.10846993868028242, "percentage": 2.17, "elapsed_time": "0:05:09", "remaining_time": "3:52:38", "throughput": 9705.51, "total_tokens": 3004096} +{"current_steps": 4445, "total_steps": 204665, "loss": 0.2088, "lr": 4.342600283383007e-07, "epoch": 0.10859208951212958, "percentage": 2.17, "elapsed_time": "0:05:09", "remaining_time": "3:52:37", "throughput": 9704.98, "total_tokens": 3007296} +{"current_steps": 4450, "total_steps": 204665, "loss": 0.1412, "lr": 4.3474861972932037e-07, "epoch": 0.10871424034397674, "percentage": 2.17, "elapsed_time": "0:05:10", "remaining_time": "3:52:37", "throughput": 9704.04, "total_tokens": 3010304} +{"current_steps": 4455, "total_steps": 204665, "loss": 0.2006, "lr": 4.3523721112034005e-07, "epoch": 0.1088363911758239, "percentage": 2.18, "elapsed_time": "0:05:10", "remaining_time": "3:52:36", "throughput": 9704.13, "total_tokens": 3013696} +{"current_steps": 4460, "total_steps": 204665, "loss": 0.1489, "lr": 4.357258025113597e-07, "epoch": 0.10895854200767108, "percentage": 2.18, "elapsed_time": "0:05:10", "remaining_time": "3:52:35", "throughput": 9703.21, "total_tokens": 3016704} +{"current_steps": 4465, "total_steps": 204665, "loss": 0.1612, "lr": 4.362143939023794e-07, "epoch": 0.10908069283951824, "percentage": 2.18, "elapsed_time": "0:05:11", "remaining_time": "3:52:35", "throughput": 9703.12, "total_tokens": 3020032} +{"current_steps": 4470, "total_steps": 204665, "loss": 0.1526, "lr": 4.367029852933991e-07, "epoch": 0.1092028436713654, "percentage": 2.18, "elapsed_time": "0:05:11", "remaining_time": "3:52:34", "throughput": 9702.7, "total_tokens": 3023232} +{"current_steps": 4475, "total_steps": 204665, "loss": 0.1318, "lr": 4.371915766844188e-07, "epoch": 0.10932499450321256, "percentage": 2.19, "elapsed_time": "0:05:11", "remaining_time": "3:52:34", "throughput": 9702.19, "total_tokens": 3026432} +{"current_steps": 4480, "total_steps": 204665, "loss": 0.0994, "lr": 4.376801680754385e-07, "epoch": 0.10944714533505973, "percentage": 2.19, "elapsed_time": "0:05:12", "remaining_time": "3:52:33", "throughput": 9701.66, "total_tokens": 3029568} +{"current_steps": 4485, "total_steps": 204665, "loss": 0.166, "lr": 4.381687594664582e-07, "epoch": 0.1095692961669069, "percentage": 2.19, "elapsed_time": "0:05:12", "remaining_time": "3:52:32", "throughput": 9700.75, "total_tokens": 3032576} +{"current_steps": 4490, "total_steps": 204665, "loss": 0.0961, "lr": 4.386573508574778e-07, "epoch": 0.10969144699875406, "percentage": 2.19, "elapsed_time": "0:05:12", "remaining_time": "3:52:32", "throughput": 9700.68, "total_tokens": 3035904} +{"current_steps": 4495, "total_steps": 204665, "loss": 0.1549, "lr": 4.3914594224849756e-07, "epoch": 0.10981359783060123, "percentage": 2.2, "elapsed_time": "0:05:13", "remaining_time": "3:52:32", "throughput": 9702.71, "total_tokens": 3040064} +{"current_steps": 4500, "total_steps": 204665, "loss": 0.1122, "lr": 4.3963453363951724e-07, "epoch": 0.10993574866244839, "percentage": 2.2, "elapsed_time": "0:05:13", "remaining_time": "3:52:32", "throughput": 9703.22, "total_tokens": 3043648} +{"current_steps": 4505, "total_steps": 204665, "loss": 0.1506, "lr": 4.401231250305369e-07, "epoch": 0.11005789949429555, "percentage": 2.2, "elapsed_time": "0:05:14", "remaining_time": "3:52:31", "throughput": 9702.32, "total_tokens": 3046656} +{"current_steps": 4510, "total_steps": 204665, "loss": 0.1529, "lr": 4.4061171642155665e-07, "epoch": 0.11018005032614273, "percentage": 2.2, "elapsed_time": "0:05:14", "remaining_time": "3:52:31", "throughput": 9702.9, "total_tokens": 3050304} +{"current_steps": 4515, "total_steps": 204665, "loss": 0.1095, "lr": 4.4110030781257634e-07, "epoch": 0.11030220115798989, "percentage": 2.21, "elapsed_time": "0:05:14", "remaining_time": "3:52:31", "throughput": 9703.56, "total_tokens": 3053952} +{"current_steps": 4520, "total_steps": 204665, "loss": 0.2287, "lr": 4.4158889920359596e-07, "epoch": 0.11042435198983705, "percentage": 2.21, "elapsed_time": "0:05:15", "remaining_time": "3:52:31", "throughput": 9703.08, "total_tokens": 3057152} +{"current_steps": 4525, "total_steps": 204665, "loss": 0.1766, "lr": 4.420774905946157e-07, "epoch": 0.11054650282168421, "percentage": 2.21, "elapsed_time": "0:05:15", "remaining_time": "3:52:30", "throughput": 9702.87, "total_tokens": 3060416} +{"current_steps": 4530, "total_steps": 204665, "loss": 0.145, "lr": 4.425660819856354e-07, "epoch": 0.11066865365353137, "percentage": 2.21, "elapsed_time": "0:05:15", "remaining_time": "3:52:30", "throughput": 9702.45, "total_tokens": 3063616} +{"current_steps": 4535, "total_steps": 204665, "loss": 0.107, "lr": 4.4305467337665506e-07, "epoch": 0.11079080448537855, "percentage": 2.22, "elapsed_time": "0:05:16", "remaining_time": "3:52:29", "throughput": 9702.67, "total_tokens": 3067072} +{"current_steps": 4540, "total_steps": 204665, "loss": 0.1432, "lr": 4.435432647676748e-07, "epoch": 0.11091295531722571, "percentage": 2.22, "elapsed_time": "0:05:16", "remaining_time": "3:52:29", "throughput": 9703.0, "total_tokens": 3070592} +{"current_steps": 4545, "total_steps": 204665, "loss": 0.1287, "lr": 4.440318561586945e-07, "epoch": 0.11103510614907287, "percentage": 2.22, "elapsed_time": "0:05:16", "remaining_time": "3:52:29", "throughput": 9703.46, "total_tokens": 3074176} +{"current_steps": 4550, "total_steps": 204665, "loss": 0.1766, "lr": 4.445204475497141e-07, "epoch": 0.11115725698092004, "percentage": 2.22, "elapsed_time": "0:05:17", "remaining_time": "3:52:29", "throughput": 9703.69, "total_tokens": 3077632} +{"current_steps": 4555, "total_steps": 204665, "loss": 0.1604, "lr": 4.4500903894073384e-07, "epoch": 0.11127940781276721, "percentage": 2.23, "elapsed_time": "0:05:17", "remaining_time": "3:52:28", "throughput": 9703.84, "total_tokens": 3081088} +{"current_steps": 4560, "total_steps": 204665, "loss": 0.0403, "lr": 4.454976303317535e-07, "epoch": 0.11140155864461437, "percentage": 2.23, "elapsed_time": "0:05:17", "remaining_time": "3:52:28", "throughput": 9703.32, "total_tokens": 3084224} +{"current_steps": 4565, "total_steps": 204665, "loss": 0.1672, "lr": 4.459862217227732e-07, "epoch": 0.11152370947646154, "percentage": 2.23, "elapsed_time": "0:05:18", "remaining_time": "3:52:27", "throughput": 9702.86, "total_tokens": 3087424} +{"current_steps": 4570, "total_steps": 204665, "loss": 0.2273, "lr": 4.4647481311379294e-07, "epoch": 0.1116458603083087, "percentage": 2.23, "elapsed_time": "0:05:18", "remaining_time": "3:52:27", "throughput": 9703.3, "total_tokens": 3091008} +{"current_steps": 4575, "total_steps": 204665, "loss": 0.1293, "lr": 4.469634045048126e-07, "epoch": 0.11176801114015586, "percentage": 2.24, "elapsed_time": "0:05:18", "remaining_time": "3:52:26", "throughput": 9702.42, "total_tokens": 3094016} +{"current_steps": 4580, "total_steps": 204665, "loss": 0.13, "lr": 4.4745199589583225e-07, "epoch": 0.11189016197200304, "percentage": 2.24, "elapsed_time": "0:05:19", "remaining_time": "3:52:26", "throughput": 9702.73, "total_tokens": 3097536} +{"current_steps": 4585, "total_steps": 204665, "loss": 0.0926, "lr": 4.47940587286852e-07, "epoch": 0.1120123128038502, "percentage": 2.24, "elapsed_time": "0:05:19", "remaining_time": "3:52:26", "throughput": 9702.46, "total_tokens": 3100800} +{"current_steps": 4590, "total_steps": 204665, "loss": 0.1141, "lr": 4.4842917867787166e-07, "epoch": 0.11213446363569736, "percentage": 2.24, "elapsed_time": "0:05:19", "remaining_time": "3:52:25", "throughput": 9702.37, "total_tokens": 3104128} +{"current_steps": 4595, "total_steps": 204665, "loss": 0.1559, "lr": 4.4891777006889134e-07, "epoch": 0.11225661446754452, "percentage": 2.25, "elapsed_time": "0:05:20", "remaining_time": "3:52:25", "throughput": 9702.2, "total_tokens": 3107456} +{"current_steps": 4600, "total_steps": 204665, "loss": 0.1458, "lr": 4.494063614599111e-07, "epoch": 0.11237876529939168, "percentage": 2.25, "elapsed_time": "0:05:20", "remaining_time": "3:52:25", "throughput": 9703.3, "total_tokens": 3111296} +{"current_steps": 4605, "total_steps": 204665, "loss": 0.1157, "lr": 4.4989495285093076e-07, "epoch": 0.11250091613123886, "percentage": 2.25, "elapsed_time": "0:05:20", "remaining_time": "3:52:25", "throughput": 9703.42, "total_tokens": 3114752} +{"current_steps": 4610, "total_steps": 204665, "loss": 0.1587, "lr": 4.5038354424195044e-07, "epoch": 0.11262306696308602, "percentage": 2.25, "elapsed_time": "0:05:21", "remaining_time": "3:52:24", "throughput": 9702.65, "total_tokens": 3117824} +{"current_steps": 4615, "total_steps": 204665, "loss": 0.0831, "lr": 4.508721356329701e-07, "epoch": 0.11274521779493318, "percentage": 2.25, "elapsed_time": "0:05:21", "remaining_time": "3:52:24", "throughput": 9704.14, "total_tokens": 3121792} +{"current_steps": 4620, "total_steps": 204665, "loss": 0.0569, "lr": 4.513607270239898e-07, "epoch": 0.11286736862678035, "percentage": 2.26, "elapsed_time": "0:05:22", "remaining_time": "3:52:24", "throughput": 9704.35, "total_tokens": 3125248} +{"current_steps": 4625, "total_steps": 204665, "loss": 0.1616, "lr": 4.518493184150095e-07, "epoch": 0.11298951945862751, "percentage": 2.26, "elapsed_time": "0:05:22", "remaining_time": "3:52:24", "throughput": 9704.46, "total_tokens": 3128640} +{"current_steps": 4630, "total_steps": 204665, "loss": 0.1618, "lr": 4.523379098060292e-07, "epoch": 0.11311167029047468, "percentage": 2.26, "elapsed_time": "0:05:22", "remaining_time": "3:52:23", "throughput": 9703.75, "total_tokens": 3131776} +{"current_steps": 4635, "total_steps": 204665, "loss": 0.1112, "lr": 4.528265011970489e-07, "epoch": 0.11323382112232185, "percentage": 2.26, "elapsed_time": "0:05:23", "remaining_time": "3:52:23", "throughput": 9703.72, "total_tokens": 3135168} +{"current_steps": 4640, "total_steps": 204665, "loss": 0.247, "lr": 4.533150925880686e-07, "epoch": 0.11335597195416901, "percentage": 2.27, "elapsed_time": "0:05:23", "remaining_time": "3:52:23", "throughput": 9703.99, "total_tokens": 3138688} +{"current_steps": 4645, "total_steps": 204665, "loss": 0.1139, "lr": 4.5380368397908826e-07, "epoch": 0.11347812278601617, "percentage": 2.27, "elapsed_time": "0:05:23", "remaining_time": "3:52:22", "throughput": 9703.67, "total_tokens": 3141952} +{"current_steps": 4650, "total_steps": 204665, "loss": 0.1915, "lr": 4.5429227537010794e-07, "epoch": 0.11360027361786333, "percentage": 2.27, "elapsed_time": "0:05:24", "remaining_time": "3:52:22", "throughput": 9703.46, "total_tokens": 3145280} +{"current_steps": 4655, "total_steps": 204665, "loss": 0.1277, "lr": 4.547808667611276e-07, "epoch": 0.11372242444971051, "percentage": 2.27, "elapsed_time": "0:05:24", "remaining_time": "3:52:22", "throughput": 9703.59, "total_tokens": 3148736} +{"current_steps": 4660, "total_steps": 204665, "loss": 0.121, "lr": 4.5526945815214736e-07, "epoch": 0.11384457528155767, "percentage": 2.28, "elapsed_time": "0:05:24", "remaining_time": "3:52:22", "throughput": 9704.07, "total_tokens": 3152320} +{"current_steps": 4665, "total_steps": 204665, "loss": 0.11, "lr": 4.5575804954316704e-07, "epoch": 0.11396672611340483, "percentage": 2.28, "elapsed_time": "0:05:25", "remaining_time": "3:52:22", "throughput": 9704.55, "total_tokens": 3155904} +{"current_steps": 4670, "total_steps": 204665, "loss": 0.1673, "lr": 4.562466409341867e-07, "epoch": 0.114088876945252, "percentage": 2.28, "elapsed_time": "0:05:25", "remaining_time": "3:52:22", "throughput": 9704.94, "total_tokens": 3159488} +{"current_steps": 4675, "total_steps": 204665, "loss": 0.2023, "lr": 4.567352323252064e-07, "epoch": 0.11421102777709916, "percentage": 2.28, "elapsed_time": "0:05:25", "remaining_time": "3:52:21", "throughput": 9704.68, "total_tokens": 3162752} +{"current_steps": 4680, "total_steps": 204665, "loss": 0.134, "lr": 4.572238237162261e-07, "epoch": 0.11433317860894633, "percentage": 2.29, "elapsed_time": "0:05:26", "remaining_time": "3:52:21", "throughput": 9704.04, "total_tokens": 3165888} +{"current_steps": 4685, "total_steps": 204665, "loss": 0.1225, "lr": 4.5771241510724577e-07, "epoch": 0.1144553294407935, "percentage": 2.29, "elapsed_time": "0:05:26", "remaining_time": "3:52:21", "throughput": 9704.4, "total_tokens": 3169472} +{"current_steps": 4690, "total_steps": 204665, "loss": 0.1116, "lr": 4.582010064982655e-07, "epoch": 0.11457748027264066, "percentage": 2.29, "elapsed_time": "0:05:26", "remaining_time": "3:52:20", "throughput": 9703.79, "total_tokens": 3172608} +{"current_steps": 4695, "total_steps": 204665, "loss": 0.0563, "lr": 4.586895978892852e-07, "epoch": 0.11469963110448782, "percentage": 2.29, "elapsed_time": "0:05:27", "remaining_time": "3:52:19", "throughput": 9703.06, "total_tokens": 3175680} +{"current_steps": 4700, "total_steps": 204665, "loss": 0.1314, "lr": 4.5917818928030486e-07, "epoch": 0.11482178193633498, "percentage": 2.3, "elapsed_time": "0:05:27", "remaining_time": "3:52:19", "throughput": 9702.74, "total_tokens": 3178944} +{"current_steps": 4705, "total_steps": 204665, "loss": 0.1484, "lr": 4.5966678067132454e-07, "epoch": 0.11494393276818216, "percentage": 2.3, "elapsed_time": "0:05:27", "remaining_time": "3:52:19", "throughput": 9702.55, "total_tokens": 3182272} +{"current_steps": 4710, "total_steps": 204665, "loss": 0.2703, "lr": 4.601553720623442e-07, "epoch": 0.11506608360002932, "percentage": 2.3, "elapsed_time": "0:05:28", "remaining_time": "3:52:18", "throughput": 9702.13, "total_tokens": 3185536} +{"current_steps": 4715, "total_steps": 204665, "loss": 0.0611, "lr": 4.606439634533639e-07, "epoch": 0.11518823443187648, "percentage": 2.3, "elapsed_time": "0:05:28", "remaining_time": "3:52:18", "throughput": 9702.09, "total_tokens": 3188928} +{"current_steps": 4720, "total_steps": 204665, "loss": 0.1684, "lr": 4.6113255484438364e-07, "epoch": 0.11531038526372364, "percentage": 2.31, "elapsed_time": "0:05:29", "remaining_time": "3:52:18", "throughput": 9703.3, "total_tokens": 3192832} +{"current_steps": 4725, "total_steps": 204665, "loss": 0.1242, "lr": 4.616211462354033e-07, "epoch": 0.11543253609557082, "percentage": 2.31, "elapsed_time": "0:05:29", "remaining_time": "3:52:18", "throughput": 9703.74, "total_tokens": 3196416} +{"current_steps": 4730, "total_steps": 204665, "loss": 0.0917, "lr": 4.62109737626423e-07, "epoch": 0.11555468692741798, "percentage": 2.31, "elapsed_time": "0:05:29", "remaining_time": "3:52:18", "throughput": 9704.19, "total_tokens": 3200000} +{"current_steps": 4735, "total_steps": 204665, "loss": 0.0778, "lr": 4.625983290174427e-07, "epoch": 0.11567683775926514, "percentage": 2.31, "elapsed_time": "0:05:30", "remaining_time": "3:52:18", "throughput": 9704.35, "total_tokens": 3203456} +{"current_steps": 4740, "total_steps": 204665, "loss": 0.1563, "lr": 4.6308692040846237e-07, "epoch": 0.1157989885911123, "percentage": 2.32, "elapsed_time": "0:05:30", "remaining_time": "3:52:17", "throughput": 9704.09, "total_tokens": 3206720} +{"current_steps": 4745, "total_steps": 204665, "loss": 0.1827, "lr": 4.6357551179948205e-07, "epoch": 0.11592113942295947, "percentage": 2.32, "elapsed_time": "0:05:30", "remaining_time": "3:52:17", "throughput": 9704.23, "total_tokens": 3210176} +{"current_steps": 4750, "total_steps": 204665, "loss": 0.1546, "lr": 4.640641031905018e-07, "epoch": 0.11604329025480664, "percentage": 2.32, "elapsed_time": "0:05:31", "remaining_time": "3:52:16", "throughput": 9703.06, "total_tokens": 3213056} +{"current_steps": 4755, "total_steps": 204665, "loss": 0.206, "lr": 4.6455269458152146e-07, "epoch": 0.1161654410866538, "percentage": 2.32, "elapsed_time": "0:05:31", "remaining_time": "3:52:16", "throughput": 9702.06, "total_tokens": 3216064} +{"current_steps": 4760, "total_steps": 204665, "loss": 0.075, "lr": 4.6504128597254114e-07, "epoch": 0.11628759191850097, "percentage": 2.33, "elapsed_time": "0:05:31", "remaining_time": "3:52:15", "throughput": 9701.61, "total_tokens": 3219264} +{"current_steps": 4765, "total_steps": 204665, "loss": 0.0856, "lr": 4.655298773635608e-07, "epoch": 0.11640974275034813, "percentage": 2.33, "elapsed_time": "0:05:32", "remaining_time": "3:52:15", "throughput": 9701.97, "total_tokens": 3222784} +{"current_steps": 4770, "total_steps": 204665, "loss": 0.2882, "lr": 4.660184687545805e-07, "epoch": 0.11653189358219529, "percentage": 2.33, "elapsed_time": "0:05:32", "remaining_time": "3:52:14", "throughput": 9701.21, "total_tokens": 3225856} +{"current_steps": 4775, "total_steps": 204665, "loss": 0.1822, "lr": 4.665070601456002e-07, "epoch": 0.11665404441404247, "percentage": 2.33, "elapsed_time": "0:05:32", "remaining_time": "3:52:14", "throughput": 9701.01, "total_tokens": 3229120} +{"current_steps": 4780, "total_steps": 204665, "loss": 0.0688, "lr": 4.669956515366199e-07, "epoch": 0.11677619524588963, "percentage": 2.34, "elapsed_time": "0:05:33", "remaining_time": "3:52:14", "throughput": 9702.25, "total_tokens": 3233024} +{"current_steps": 4785, "total_steps": 204665, "loss": 0.0929, "lr": 4.674842429276396e-07, "epoch": 0.11689834607773679, "percentage": 2.34, "elapsed_time": "0:05:33", "remaining_time": "3:52:14", "throughput": 9702.74, "total_tokens": 3236608} +{"current_steps": 4790, "total_steps": 204665, "loss": 0.0559, "lr": 4.679728343186593e-07, "epoch": 0.11702049690958395, "percentage": 2.34, "elapsed_time": "0:05:33", "remaining_time": "3:52:13", "throughput": 9702.29, "total_tokens": 3239808} +{"current_steps": 4795, "total_steps": 204665, "loss": 0.1595, "lr": 4.6846142570967897e-07, "epoch": 0.11714264774143111, "percentage": 2.34, "elapsed_time": "0:05:34", "remaining_time": "3:52:13", "throughput": 9702.11, "total_tokens": 3243072} +{"current_steps": 4800, "total_steps": 204665, "loss": 0.1409, "lr": 4.6895001710069865e-07, "epoch": 0.11726479857327829, "percentage": 2.35, "elapsed_time": "0:05:34", "remaining_time": "3:52:12", "throughput": 9701.54, "total_tokens": 3246208} +{"current_steps": 4805, "total_steps": 204665, "loss": 0.1298, "lr": 4.6943860849171833e-07, "epoch": 0.11738694940512545, "percentage": 2.35, "elapsed_time": "0:05:34", "remaining_time": "3:52:13", "throughput": 9703.62, "total_tokens": 3250496} +{"current_steps": 4810, "total_steps": 204665, "loss": 0.1475, "lr": 4.6992719988273806e-07, "epoch": 0.11750910023697261, "percentage": 2.35, "elapsed_time": "0:05:35", "remaining_time": "3:52:12", "throughput": 9704.46, "total_tokens": 3254208} +{"current_steps": 4815, "total_steps": 204665, "loss": 0.0992, "lr": 4.7041579127375774e-07, "epoch": 0.11763125106881978, "percentage": 2.35, "elapsed_time": "0:05:35", "remaining_time": "3:52:12", "throughput": 9704.46, "total_tokens": 3257600} +{"current_steps": 4820, "total_steps": 204665, "loss": 0.2301, "lr": 4.709043826647774e-07, "epoch": 0.11775340190066694, "percentage": 2.36, "elapsed_time": "0:05:36", "remaining_time": "3:52:12", "throughput": 9704.35, "total_tokens": 3260928} +{"current_steps": 4825, "total_steps": 204665, "loss": 0.0816, "lr": 4.713929740557971e-07, "epoch": 0.11787555273251411, "percentage": 2.36, "elapsed_time": "0:05:36", "remaining_time": "3:52:11", "throughput": 9703.99, "total_tokens": 3264192} +{"current_steps": 4830, "total_steps": 204665, "loss": 0.0875, "lr": 4.718815654468168e-07, "epoch": 0.11799770356436128, "percentage": 2.36, "elapsed_time": "0:05:36", "remaining_time": "3:52:11", "throughput": 9703.53, "total_tokens": 3267392} +{"current_steps": 4835, "total_steps": 204665, "loss": 0.1291, "lr": 4.7237015683783647e-07, "epoch": 0.11811985439620844, "percentage": 2.36, "elapsed_time": "0:05:37", "remaining_time": "3:52:10", "throughput": 9702.39, "total_tokens": 3270272} +{"current_steps": 4840, "total_steps": 204665, "loss": 0.1015, "lr": 4.728587482288562e-07, "epoch": 0.1182420052280556, "percentage": 2.36, "elapsed_time": "0:05:37", "remaining_time": "3:52:09", "throughput": 9701.83, "total_tokens": 3273408} +{"current_steps": 4845, "total_steps": 204665, "loss": 0.1972, "lr": 4.733473396198759e-07, "epoch": 0.11836415605990276, "percentage": 2.37, "elapsed_time": "0:05:37", "remaining_time": "3:52:09", "throughput": 9701.85, "total_tokens": 3276800} +{"current_steps": 4850, "total_steps": 204665, "loss": 0.0876, "lr": 4.7383593101089557e-07, "epoch": 0.11848630689174994, "percentage": 2.37, "elapsed_time": "0:05:38", "remaining_time": "3:52:09", "throughput": 9701.29, "total_tokens": 3279936} +{"current_steps": 4855, "total_steps": 204665, "loss": 0.1071, "lr": 4.743245224019153e-07, "epoch": 0.1186084577235971, "percentage": 2.37, "elapsed_time": "0:05:38", "remaining_time": "3:52:08", "throughput": 9701.6, "total_tokens": 3283456} +{"current_steps": 4860, "total_steps": 204665, "loss": 0.16, "lr": 4.7481311379293493e-07, "epoch": 0.11873060855544426, "percentage": 2.37, "elapsed_time": "0:05:38", "remaining_time": "3:52:08", "throughput": 9701.63, "total_tokens": 3286848} +{"current_steps": 4865, "total_steps": 204665, "loss": 0.1569, "lr": 4.753017051839546e-07, "epoch": 0.11885275938729142, "percentage": 2.38, "elapsed_time": "0:05:39", "remaining_time": "3:52:08", "throughput": 9702.17, "total_tokens": 3290432} +{"current_steps": 4870, "total_steps": 204665, "loss": 0.1321, "lr": 4.7579029657497434e-07, "epoch": 0.11897491021913859, "percentage": 2.38, "elapsed_time": "0:05:39", "remaining_time": "3:52:07", "throughput": 9702.03, "total_tokens": 3293760} +{"current_steps": 4875, "total_steps": 204665, "loss": 0.1658, "lr": 4.76278887965994e-07, "epoch": 0.11909706105098576, "percentage": 2.38, "elapsed_time": "0:05:39", "remaining_time": "3:52:07", "throughput": 9701.17, "total_tokens": 3296768} +{"current_steps": 4880, "total_steps": 204665, "loss": 0.2226, "lr": 4.767674793570137e-07, "epoch": 0.11921921188283292, "percentage": 2.38, "elapsed_time": "0:05:40", "remaining_time": "3:52:06", "throughput": 9700.79, "total_tokens": 3299968} +{"current_steps": 4885, "total_steps": 204665, "loss": 0.1144, "lr": 4.772560707480334e-07, "epoch": 0.11934136271468009, "percentage": 2.39, "elapsed_time": "0:05:40", "remaining_time": "3:52:06", "throughput": 9701.15, "total_tokens": 3303488} +{"current_steps": 4890, "total_steps": 204665, "loss": 0.1006, "lr": 4.777446621390531e-07, "epoch": 0.11946351354652725, "percentage": 2.39, "elapsed_time": "0:05:40", "remaining_time": "3:52:05", "throughput": 9701.26, "total_tokens": 3306880} +{"current_steps": 4895, "total_steps": 204665, "loss": 0.1341, "lr": 4.782332535300728e-07, "epoch": 0.11958566437837442, "percentage": 2.39, "elapsed_time": "0:05:41", "remaining_time": "3:52:05", "throughput": 9701.39, "total_tokens": 3310336} +{"current_steps": 4900, "total_steps": 204665, "loss": 0.1649, "lr": 4.787218449210924e-07, "epoch": 0.11970781521022159, "percentage": 2.39, "elapsed_time": "0:05:41", "remaining_time": "3:52:05", "throughput": 9701.79, "total_tokens": 3313856} +{"current_steps": 4905, "total_steps": 204665, "loss": 0.1185, "lr": 4.792104363121121e-07, "epoch": 0.11982996604206875, "percentage": 2.4, "elapsed_time": "0:05:41", "remaining_time": "3:52:04", "throughput": 9701.61, "total_tokens": 3317120} +{"current_steps": 4910, "total_steps": 204665, "loss": 0.1056, "lr": 4.796990277031319e-07, "epoch": 0.11995211687391591, "percentage": 2.4, "elapsed_time": "0:05:42", "remaining_time": "3:52:04", "throughput": 9702.25, "total_tokens": 3320768} +{"current_steps": 4915, "total_steps": 204665, "loss": 0.0633, "lr": 4.801876190941516e-07, "epoch": 0.12007426770576307, "percentage": 2.4, "elapsed_time": "0:05:42", "remaining_time": "3:52:04", "throughput": 9702.63, "total_tokens": 3324288} +{"current_steps": 4920, "total_steps": 204665, "loss": 0.1208, "lr": 4.806762104851712e-07, "epoch": 0.12019641853761025, "percentage": 2.4, "elapsed_time": "0:05:42", "remaining_time": "3:52:04", "throughput": 9702.59, "total_tokens": 3327680} +{"current_steps": 4925, "total_steps": 204665, "loss": 0.1176, "lr": 4.811648018761909e-07, "epoch": 0.12031856936945741, "percentage": 2.41, "elapsed_time": "0:05:43", "remaining_time": "3:52:03", "throughput": 9702.41, "total_tokens": 3331008} +{"current_steps": 4930, "total_steps": 204665, "loss": 0.151, "lr": 4.816533932672106e-07, "epoch": 0.12044072020130457, "percentage": 2.41, "elapsed_time": "0:05:43", "remaining_time": "3:52:03", "throughput": 9702.15, "total_tokens": 3334272} +{"current_steps": 4935, "total_steps": 204665, "loss": 0.0791, "lr": 4.821419846582303e-07, "epoch": 0.12056287103315173, "percentage": 2.41, "elapsed_time": "0:05:44", "remaining_time": "3:52:03", "throughput": 9702.28, "total_tokens": 3337728} +{"current_steps": 4940, "total_steps": 204665, "loss": 0.1625, "lr": 4.8263057604925e-07, "epoch": 0.1206850218649989, "percentage": 2.41, "elapsed_time": "0:05:44", "remaining_time": "3:52:02", "throughput": 9702.2, "total_tokens": 3341056} +{"current_steps": 4945, "total_steps": 204665, "loss": 0.14, "lr": 4.831191674402697e-07, "epoch": 0.12080717269684607, "percentage": 2.42, "elapsed_time": "0:05:44", "remaining_time": "3:52:02", "throughput": 9702.79, "total_tokens": 3344704} +{"current_steps": 4950, "total_steps": 204665, "loss": 0.2221, "lr": 4.836077588312894e-07, "epoch": 0.12092932352869323, "percentage": 2.42, "elapsed_time": "0:05:45", "remaining_time": "3:52:01", "throughput": 9701.85, "total_tokens": 3347648} +{"current_steps": 4955, "total_steps": 204665, "loss": 0.0901, "lr": 4.84096350222309e-07, "epoch": 0.1210514743605404, "percentage": 2.42, "elapsed_time": "0:05:45", "remaining_time": "3:52:01", "throughput": 9701.76, "total_tokens": 3350976} +{"current_steps": 4960, "total_steps": 204665, "loss": 0.2851, "lr": 4.845849416133287e-07, "epoch": 0.12117362519238756, "percentage": 2.42, "elapsed_time": "0:05:45", "remaining_time": "3:52:00", "throughput": 9701.24, "total_tokens": 3354112} +{"current_steps": 4965, "total_steps": 204665, "loss": 0.1281, "lr": 4.850735330043484e-07, "epoch": 0.12129577602423472, "percentage": 2.43, "elapsed_time": "0:05:46", "remaining_time": "3:52:00", "throughput": 9701.7, "total_tokens": 3357696} +{"current_steps": 4970, "total_steps": 204665, "loss": 0.1526, "lr": 4.855621243953682e-07, "epoch": 0.1214179268560819, "percentage": 2.43, "elapsed_time": "0:05:46", "remaining_time": "3:51:59", "throughput": 9701.68, "total_tokens": 3361024} +{"current_steps": 4975, "total_steps": 204665, "loss": 0.2137, "lr": 4.860507157863879e-07, "epoch": 0.12154007768792906, "percentage": 2.43, "elapsed_time": "0:05:46", "remaining_time": "3:51:59", "throughput": 9700.87, "total_tokens": 3364032} +{"current_steps": 4980, "total_steps": 204665, "loss": 0.1128, "lr": 4.865393071774074e-07, "epoch": 0.12166222851977622, "percentage": 2.43, "elapsed_time": "0:05:47", "remaining_time": "3:51:58", "throughput": 9701.7, "total_tokens": 3367744} +{"current_steps": 4985, "total_steps": 204665, "loss": 0.1315, "lr": 4.870278985684272e-07, "epoch": 0.12178437935162338, "percentage": 2.44, "elapsed_time": "0:05:47", "remaining_time": "3:51:58", "throughput": 9701.15, "total_tokens": 3370880} +{"current_steps": 4990, "total_steps": 204665, "loss": 0.0926, "lr": 4.875164899594469e-07, "epoch": 0.12190653018347054, "percentage": 2.44, "elapsed_time": "0:05:47", "remaining_time": "3:51:58", "throughput": 9702.22, "total_tokens": 3374720} +{"current_steps": 4995, "total_steps": 204665, "loss": 0.0964, "lr": 4.880050813504666e-07, "epoch": 0.12202868101531772, "percentage": 2.44, "elapsed_time": "0:05:48", "remaining_time": "3:51:57", "throughput": 9701.94, "total_tokens": 3377984} +{"current_steps": 5000, "total_steps": 204665, "loss": 0.0963, "lr": 4.884936727414863e-07, "epoch": 0.12215083184716488, "percentage": 2.44, "elapsed_time": "0:05:48", "remaining_time": "3:51:57", "throughput": 9701.97, "total_tokens": 3381376} +{"current_steps": 5005, "total_steps": 204665, "loss": 0.1253, "lr": 4.88982264132506e-07, "epoch": 0.12227298267901204, "percentage": 2.45, "elapsed_time": "0:05:48", "remaining_time": "3:51:57", "throughput": 9702.16, "total_tokens": 3384832} +{"current_steps": 5010, "total_steps": 204665, "loss": 0.1456, "lr": 4.894708555235256e-07, "epoch": 0.1223951335108592, "percentage": 2.45, "elapsed_time": "0:05:49", "remaining_time": "3:51:57", "throughput": 9702.87, "total_tokens": 3388480} +{"current_steps": 5015, "total_steps": 204665, "loss": 0.1403, "lr": 4.899594469145453e-07, "epoch": 0.12251728434270637, "percentage": 2.45, "elapsed_time": "0:05:49", "remaining_time": "3:51:56", "throughput": 9702.61, "total_tokens": 3391744} +{"current_steps": 5020, "total_steps": 204665, "loss": 0.1419, "lr": 4.90448038305565e-07, "epoch": 0.12263943517455354, "percentage": 2.45, "elapsed_time": "0:05:49", "remaining_time": "3:51:56", "throughput": 9702.57, "total_tokens": 3395136} +{"current_steps": 5025, "total_steps": 204665, "loss": 0.1163, "lr": 4.909366296965847e-07, "epoch": 0.1227615860064007, "percentage": 2.46, "elapsed_time": "0:05:50", "remaining_time": "3:51:55", "throughput": 9702.07, "total_tokens": 3398272} +{"current_steps": 5030, "total_steps": 204665, "loss": 0.1212, "lr": 4.914252210876045e-07, "epoch": 0.12288373683824787, "percentage": 2.46, "elapsed_time": "0:05:50", "remaining_time": "3:51:55", "throughput": 9703.03, "total_tokens": 3402112} +{"current_steps": 5035, "total_steps": 204665, "loss": 0.1536, "lr": 4.919138124786241e-07, "epoch": 0.12300588767009503, "percentage": 2.46, "elapsed_time": "0:05:50", "remaining_time": "3:51:55", "throughput": 9702.89, "total_tokens": 3405440} +{"current_steps": 5040, "total_steps": 204665, "loss": 0.0942, "lr": 4.924024038696437e-07, "epoch": 0.12312803850194219, "percentage": 2.46, "elapsed_time": "0:05:51", "remaining_time": "3:51:54", "throughput": 9701.94, "total_tokens": 3408384} +{"current_steps": 5045, "total_steps": 204665, "loss": 0.157, "lr": 4.928909952606635e-07, "epoch": 0.12325018933378937, "percentage": 2.47, "elapsed_time": "0:05:51", "remaining_time": "3:51:54", "throughput": 9702.41, "total_tokens": 3411968} +{"current_steps": 5050, "total_steps": 204665, "loss": 0.1502, "lr": 4.933795866516832e-07, "epoch": 0.12337234016563653, "percentage": 2.47, "elapsed_time": "0:05:52", "remaining_time": "3:51:54", "throughput": 9702.38, "total_tokens": 3415360} +{"current_steps": 5055, "total_steps": 204665, "loss": 0.1455, "lr": 4.938681780427029e-07, "epoch": 0.12349449099748369, "percentage": 2.47, "elapsed_time": "0:05:52", "remaining_time": "3:51:53", "throughput": 9701.9, "total_tokens": 3418560} +{"current_steps": 5060, "total_steps": 204665, "loss": 0.1418, "lr": 4.943567694337226e-07, "epoch": 0.12361664182933085, "percentage": 2.47, "elapsed_time": "0:05:52", "remaining_time": "3:51:53", "throughput": 9701.77, "total_tokens": 3421888} +{"current_steps": 5065, "total_steps": 204665, "loss": 0.1016, "lr": 4.948453608247422e-07, "epoch": 0.12373879266117803, "percentage": 2.47, "elapsed_time": "0:05:53", "remaining_time": "3:51:53", "throughput": 9702.34, "total_tokens": 3425536} +{"current_steps": 5070, "total_steps": 204665, "loss": 0.0608, "lr": 4.953339522157619e-07, "epoch": 0.12386094349302519, "percentage": 2.48, "elapsed_time": "0:05:53", "remaining_time": "3:51:53", "throughput": 9703.07, "total_tokens": 3429248} +{"current_steps": 5075, "total_steps": 204665, "loss": 0.1167, "lr": 4.958225436067816e-07, "epoch": 0.12398309432487235, "percentage": 2.48, "elapsed_time": "0:05:53", "remaining_time": "3:51:52", "throughput": 9702.77, "total_tokens": 3432512} +{"current_steps": 5080, "total_steps": 204665, "loss": 0.1865, "lr": 4.963111349978013e-07, "epoch": 0.12410524515671952, "percentage": 2.48, "elapsed_time": "0:05:54", "remaining_time": "3:51:52", "throughput": 9702.34, "total_tokens": 3435712} +{"current_steps": 5085, "total_steps": 204665, "loss": 0.0807, "lr": 4.96799726388821e-07, "epoch": 0.12422739598856668, "percentage": 2.48, "elapsed_time": "0:05:54", "remaining_time": "3:51:51", "throughput": 9701.8, "total_tokens": 3438848} +{"current_steps": 5090, "total_steps": 204665, "loss": 0.0983, "lr": 4.972883177798407e-07, "epoch": 0.12434954682041385, "percentage": 2.49, "elapsed_time": "0:05:54", "remaining_time": "3:51:51", "throughput": 9702.09, "total_tokens": 3442368} +{"current_steps": 5095, "total_steps": 204665, "loss": 0.0881, "lr": 4.977769091708604e-07, "epoch": 0.12447169765226102, "percentage": 2.49, "elapsed_time": "0:05:55", "remaining_time": "3:51:51", "throughput": 9702.12, "total_tokens": 3445760} +{"current_steps": 5100, "total_steps": 204665, "loss": 0.1125, "lr": 4.982655005618801e-07, "epoch": 0.12459384848410818, "percentage": 2.49, "elapsed_time": "0:05:55", "remaining_time": "3:51:50", "throughput": 9701.33, "total_tokens": 3448768} +{"current_steps": 5105, "total_steps": 204665, "loss": 0.1208, "lr": 4.987540919528998e-07, "epoch": 0.12471599931595534, "percentage": 2.49, "elapsed_time": "0:05:55", "remaining_time": "3:51:50", "throughput": 9700.97, "total_tokens": 3451968} +{"current_steps": 5110, "total_steps": 204665, "loss": 0.1621, "lr": 4.992426833439195e-07, "epoch": 0.1248381501478025, "percentage": 2.5, "elapsed_time": "0:05:56", "remaining_time": "3:51:50", "throughput": 9701.65, "total_tokens": 3455680} +{"current_steps": 5115, "total_steps": 204665, "loss": 0.0785, "lr": 4.997312747349392e-07, "epoch": 0.12496030097964968, "percentage": 2.5, "elapsed_time": "0:05:56", "remaining_time": "3:51:49", "throughput": 9701.13, "total_tokens": 3458816} +{"current_steps": 5120, "total_steps": 204665, "loss": 0.2852, "lr": 5.002198661259588e-07, "epoch": 0.12508245181149683, "percentage": 2.5, "elapsed_time": "0:05:56", "remaining_time": "3:51:48", "throughput": 9700.41, "total_tokens": 3461824} +{"current_steps": 5125, "total_steps": 204665, "loss": 0.1053, "lr": 5.007084575169785e-07, "epoch": 0.125204602643344, "percentage": 2.5, "elapsed_time": "0:05:57", "remaining_time": "3:51:49", "throughput": 9702.9, "total_tokens": 3466304} +{"current_steps": 5130, "total_steps": 204665, "loss": 0.1095, "lr": 5.011970489079982e-07, "epoch": 0.12532675347519118, "percentage": 2.51, "elapsed_time": "0:05:57", "remaining_time": "3:51:48", "throughput": 9702.99, "total_tokens": 3469696} +{"current_steps": 5135, "total_steps": 204665, "loss": 0.1306, "lr": 5.01685640299018e-07, "epoch": 0.12544890430703834, "percentage": 2.51, "elapsed_time": "0:05:57", "remaining_time": "3:51:48", "throughput": 9703.35, "total_tokens": 3473216} +{"current_steps": 5140, "total_steps": 204665, "loss": 0.113, "lr": 5.021742316900376e-07, "epoch": 0.1255710551388855, "percentage": 2.51, "elapsed_time": "0:05:58", "remaining_time": "3:51:47", "throughput": 9702.84, "total_tokens": 3476352} +{"current_steps": 5145, "total_steps": 204665, "loss": 0.0987, "lr": 5.026628230810573e-07, "epoch": 0.12569320597073266, "percentage": 2.51, "elapsed_time": "0:05:58", "remaining_time": "3:51:47", "throughput": 9702.75, "total_tokens": 3479680} +{"current_steps": 5150, "total_steps": 204665, "loss": 0.0808, "lr": 5.03151414472077e-07, "epoch": 0.12581535680257983, "percentage": 2.52, "elapsed_time": "0:05:58", "remaining_time": "3:51:47", "throughput": 9703.55, "total_tokens": 3483392} +{"current_steps": 5155, "total_steps": 204665, "loss": 0.0885, "lr": 5.036400058630966e-07, "epoch": 0.125937507634427, "percentage": 2.52, "elapsed_time": "0:05:59", "remaining_time": "3:51:46", "throughput": 9703.4, "total_tokens": 3486720} +{"current_steps": 5160, "total_steps": 204665, "loss": 0.0894, "lr": 5.041285972541164e-07, "epoch": 0.12605965846627415, "percentage": 2.52, "elapsed_time": "0:05:59", "remaining_time": "3:51:46", "throughput": 9703.05, "total_tokens": 3489920} +{"current_steps": 5165, "total_steps": 204665, "loss": 0.1762, "lr": 5.046171886451361e-07, "epoch": 0.1261818092981213, "percentage": 2.52, "elapsed_time": "0:06:00", "remaining_time": "3:51:45", "throughput": 9702.98, "total_tokens": 3493248} +{"current_steps": 5170, "total_steps": 204665, "loss": 0.0735, "lr": 5.051057800361558e-07, "epoch": 0.12630396012996847, "percentage": 2.53, "elapsed_time": "0:06:00", "remaining_time": "3:51:45", "throughput": 9703.01, "total_tokens": 3496640} +{"current_steps": 5175, "total_steps": 204665, "loss": 0.1073, "lr": 5.055943714271754e-07, "epoch": 0.12642611096181566, "percentage": 2.53, "elapsed_time": "0:06:00", "remaining_time": "3:51:44", "throughput": 9702.3, "total_tokens": 3499648} +{"current_steps": 5180, "total_steps": 204665, "loss": 0.0827, "lr": 5.060829628181951e-07, "epoch": 0.12654826179366283, "percentage": 2.53, "elapsed_time": "0:06:01", "remaining_time": "3:51:43", "throughput": 9701.36, "total_tokens": 3502592} +{"current_steps": 5185, "total_steps": 204665, "loss": 0.1814, "lr": 5.065715542092148e-07, "epoch": 0.12667041262551, "percentage": 2.53, "elapsed_time": "0:06:01", "remaining_time": "3:51:43", "throughput": 9701.38, "total_tokens": 3505984} +{"current_steps": 5190, "total_steps": 204665, "loss": 0.2021, "lr": 5.070601456002345e-07, "epoch": 0.12679256345735715, "percentage": 2.54, "elapsed_time": "0:06:01", "remaining_time": "3:51:42", "throughput": 9700.93, "total_tokens": 3509120} +{"current_steps": 5195, "total_steps": 204665, "loss": 0.1381, "lr": 5.075487369912543e-07, "epoch": 0.1269147142892043, "percentage": 2.54, "elapsed_time": "0:06:02", "remaining_time": "3:51:42", "throughput": 9700.32, "total_tokens": 3512192} +{"current_steps": 5200, "total_steps": 204665, "loss": 0.1622, "lr": 5.080373283822738e-07, "epoch": 0.12703686512105147, "percentage": 2.54, "elapsed_time": "0:06:02", "remaining_time": "3:51:41", "throughput": 9699.54, "total_tokens": 3515200} +{"current_steps": 5205, "total_steps": 204665, "loss": 0.1644, "lr": 5.085259197732936e-07, "epoch": 0.12715901595289864, "percentage": 2.54, "elapsed_time": "0:06:02", "remaining_time": "3:51:40", "throughput": 9698.86, "total_tokens": 3518272} +{"current_steps": 5210, "total_steps": 204665, "loss": 0.0878, "lr": 5.090145111643133e-07, "epoch": 0.1272811667847458, "percentage": 2.55, "elapsed_time": "0:06:03", "remaining_time": "3:51:40", "throughput": 9698.72, "total_tokens": 3521600} +{"current_steps": 5215, "total_steps": 204665, "loss": 0.1429, "lr": 5.095031025553329e-07, "epoch": 0.12740331761659296, "percentage": 2.55, "elapsed_time": "0:06:03", "remaining_time": "3:51:39", "throughput": 9697.77, "total_tokens": 3524544} +{"current_steps": 5220, "total_steps": 204665, "loss": 0.1227, "lr": 5.099916939463527e-07, "epoch": 0.12752546844844012, "percentage": 2.55, "elapsed_time": "0:06:03", "remaining_time": "3:51:39", "throughput": 9697.9, "total_tokens": 3528000} +{"current_steps": 5225, "total_steps": 204665, "loss": 0.1693, "lr": 5.104802853373724e-07, "epoch": 0.1276476192802873, "percentage": 2.55, "elapsed_time": "0:06:04", "remaining_time": "3:51:38", "throughput": 9697.29, "total_tokens": 3531072} +{"current_steps": 5230, "total_steps": 204665, "loss": 0.1392, "lr": 5.10968876728392e-07, "epoch": 0.12776977011213447, "percentage": 2.56, "elapsed_time": "0:06:04", "remaining_time": "3:51:38", "throughput": 9698.35, "total_tokens": 3534912} +{"current_steps": 5235, "total_steps": 204665, "loss": 0.1636, "lr": 5.114574681194117e-07, "epoch": 0.12789192094398164, "percentage": 2.56, "elapsed_time": "0:06:04", "remaining_time": "3:51:38", "throughput": 9697.85, "total_tokens": 3538048} +{"current_steps": 5240, "total_steps": 204665, "loss": 0.1147, "lr": 5.119460595104314e-07, "epoch": 0.1280140717758288, "percentage": 2.56, "elapsed_time": "0:06:05", "remaining_time": "3:51:37", "throughput": 9697.52, "total_tokens": 3541248} +{"current_steps": 5245, "total_steps": 204665, "loss": 0.1623, "lr": 5.124346509014511e-07, "epoch": 0.12813622260767596, "percentage": 2.56, "elapsed_time": "0:06:05", "remaining_time": "3:51:37", "throughput": 9697.55, "total_tokens": 3544640} +{"current_steps": 5250, "total_steps": 204665, "loss": 0.1917, "lr": 5.129232422924708e-07, "epoch": 0.12825837343952312, "percentage": 2.57, "elapsed_time": "0:06:05", "remaining_time": "3:51:37", "throughput": 9697.93, "total_tokens": 3548160} +{"current_steps": 5255, "total_steps": 204665, "loss": 0.3176, "lr": 5.134118336834905e-07, "epoch": 0.12838052427137028, "percentage": 2.57, "elapsed_time": "0:06:06", "remaining_time": "3:51:36", "throughput": 9697.77, "total_tokens": 3551488} +{"current_steps": 5260, "total_steps": 204665, "loss": 0.1757, "lr": 5.139004250745101e-07, "epoch": 0.12850267510321745, "percentage": 2.57, "elapsed_time": "0:06:06", "remaining_time": "3:51:36", "throughput": 9699.02, "total_tokens": 3555456} +{"current_steps": 5265, "total_steps": 204665, "loss": 0.0561, "lr": 5.143890164655299e-07, "epoch": 0.1286248259350646, "percentage": 2.57, "elapsed_time": "0:06:06", "remaining_time": "3:51:36", "throughput": 9698.96, "total_tokens": 3558848} +{"current_steps": 5270, "total_steps": 204665, "loss": 0.1261, "lr": 5.148776078565496e-07, "epoch": 0.12874697676691177, "percentage": 2.57, "elapsed_time": "0:06:07", "remaining_time": "3:51:36", "throughput": 9699.91, "total_tokens": 3562624} +{"current_steps": 5275, "total_steps": 204665, "loss": 0.0671, "lr": 5.153661992475692e-07, "epoch": 0.12886912759875896, "percentage": 2.58, "elapsed_time": "0:06:07", "remaining_time": "3:51:36", "throughput": 9700.64, "total_tokens": 3566336} +{"current_steps": 5280, "total_steps": 204665, "loss": 0.0612, "lr": 5.15854790638589e-07, "epoch": 0.12899127843060612, "percentage": 2.58, "elapsed_time": "0:06:07", "remaining_time": "3:51:35", "throughput": 9699.78, "total_tokens": 3569280} +{"current_steps": 5285, "total_steps": 204665, "loss": 0.1168, "lr": 5.163433820296086e-07, "epoch": 0.12911342926245328, "percentage": 2.58, "elapsed_time": "0:06:08", "remaining_time": "3:51:34", "throughput": 9698.81, "total_tokens": 3572160} +{"current_steps": 5290, "total_steps": 204665, "loss": 0.1233, "lr": 5.168319734206283e-07, "epoch": 0.12923558009430045, "percentage": 2.58, "elapsed_time": "0:06:08", "remaining_time": "3:51:34", "throughput": 9698.47, "total_tokens": 3575360} +{"current_steps": 5295, "total_steps": 204665, "loss": 0.2233, "lr": 5.17320564811648e-07, "epoch": 0.1293577309261476, "percentage": 2.59, "elapsed_time": "0:06:09", "remaining_time": "3:51:33", "throughput": 9698.63, "total_tokens": 3578816} +{"current_steps": 5300, "total_steps": 204665, "loss": 0.1171, "lr": 5.178091562026678e-07, "epoch": 0.12947988175799477, "percentage": 2.59, "elapsed_time": "0:06:09", "remaining_time": "3:51:33", "throughput": 9698.71, "total_tokens": 3582208} +{"current_steps": 5305, "total_steps": 204665, "loss": 0.1628, "lr": 5.182977475936874e-07, "epoch": 0.12960203258984193, "percentage": 2.59, "elapsed_time": "0:06:09", "remaining_time": "3:51:32", "throughput": 9698.49, "total_tokens": 3585472} +{"current_steps": 5310, "total_steps": 204665, "loss": 0.2128, "lr": 5.18786338984707e-07, "epoch": 0.1297241834216891, "percentage": 2.59, "elapsed_time": "0:06:10", "remaining_time": "3:51:32", "throughput": 9697.8, "total_tokens": 3588480} +{"current_steps": 5315, "total_steps": 204665, "loss": 0.2443, "lr": 5.192749303757268e-07, "epoch": 0.12984633425353626, "percentage": 2.6, "elapsed_time": "0:06:10", "remaining_time": "3:51:31", "throughput": 9697.77, "total_tokens": 3591872} +{"current_steps": 5320, "total_steps": 204665, "loss": 0.106, "lr": 5.197635217667464e-07, "epoch": 0.12996848508538345, "percentage": 2.6, "elapsed_time": "0:06:10", "remaining_time": "3:51:31", "throughput": 9698.04, "total_tokens": 3595328} +{"current_steps": 5325, "total_steps": 204665, "loss": 0.0865, "lr": 5.202521131577662e-07, "epoch": 0.1300906359172306, "percentage": 2.6, "elapsed_time": "0:06:11", "remaining_time": "3:51:31", "throughput": 9699.02, "total_tokens": 3599168} +{"current_steps": 5330, "total_steps": 204665, "loss": 0.1366, "lr": 5.207407045487859e-07, "epoch": 0.13021278674907777, "percentage": 2.6, "elapsed_time": "0:06:11", "remaining_time": "3:51:30", "throughput": 9698.26, "total_tokens": 3602176} +{"current_steps": 5335, "total_steps": 204665, "loss": 0.0841, "lr": 5.212292959398054e-07, "epoch": 0.13033493758092493, "percentage": 2.61, "elapsed_time": "0:06:11", "remaining_time": "3:51:30", "throughput": 9699.49, "total_tokens": 3606144} +{"current_steps": 5340, "total_steps": 204665, "loss": 0.1475, "lr": 5.217178873308252e-07, "epoch": 0.1304570884127721, "percentage": 2.61, "elapsed_time": "0:06:12", "remaining_time": "3:51:30", "throughput": 9699.16, "total_tokens": 3609344} +{"current_steps": 5345, "total_steps": 204665, "loss": 0.0872, "lr": 5.222064787218449e-07, "epoch": 0.13057923924461926, "percentage": 2.61, "elapsed_time": "0:06:12", "remaining_time": "3:51:29", "throughput": 9698.92, "total_tokens": 3612608} +{"current_steps": 5350, "total_steps": 204665, "loss": 0.2108, "lr": 5.226950701128646e-07, "epoch": 0.13070139007646642, "percentage": 2.61, "elapsed_time": "0:06:12", "remaining_time": "3:51:29", "throughput": 9698.32, "total_tokens": 3615680} +{"current_steps": 5355, "total_steps": 204665, "loss": 0.1861, "lr": 5.231836615038843e-07, "epoch": 0.13082354090831358, "percentage": 2.62, "elapsed_time": "0:06:13", "remaining_time": "3:51:28", "throughput": 9698.09, "total_tokens": 3618944} +{"current_steps": 5360, "total_steps": 204665, "loss": 0.0905, "lr": 5.236722528949041e-07, "epoch": 0.13094569174016074, "percentage": 2.62, "elapsed_time": "0:06:13", "remaining_time": "3:51:28", "throughput": 9697.61, "total_tokens": 3622080} +{"current_steps": 5365, "total_steps": 204665, "loss": 0.1503, "lr": 5.241608442859236e-07, "epoch": 0.1310678425720079, "percentage": 2.62, "elapsed_time": "0:06:13", "remaining_time": "3:51:27", "throughput": 9697.28, "total_tokens": 3625280} +{"current_steps": 5370, "total_steps": 204665, "loss": 0.2217, "lr": 5.246494356769433e-07, "epoch": 0.1311899934038551, "percentage": 2.62, "elapsed_time": "0:06:14", "remaining_time": "3:51:27", "throughput": 9697.32, "total_tokens": 3628672} +{"current_steps": 5375, "total_steps": 204665, "loss": 0.1845, "lr": 5.251380270679631e-07, "epoch": 0.13131214423570226, "percentage": 2.63, "elapsed_time": "0:06:14", "remaining_time": "3:51:26", "throughput": 9696.73, "total_tokens": 3631744} +{"current_steps": 5380, "total_steps": 204665, "loss": 0.0909, "lr": 5.256266184589827e-07, "epoch": 0.13143429506754942, "percentage": 2.63, "elapsed_time": "0:06:14", "remaining_time": "3:51:26", "throughput": 9696.57, "total_tokens": 3635008} +{"current_steps": 5385, "total_steps": 204665, "loss": 0.1601, "lr": 5.261152098500025e-07, "epoch": 0.13155644589939658, "percentage": 2.63, "elapsed_time": "0:06:15", "remaining_time": "3:51:26", "throughput": 9697.97, "total_tokens": 3639040} +{"current_steps": 5390, "total_steps": 204665, "loss": 0.1047, "lr": 5.266038012410222e-07, "epoch": 0.13167859673124374, "percentage": 2.63, "elapsed_time": "0:06:15", "remaining_time": "3:51:25", "throughput": 9697.28, "total_tokens": 3642048} +{"current_steps": 5395, "total_steps": 204665, "loss": 0.1907, "lr": 5.270923926320417e-07, "epoch": 0.1318007475630909, "percentage": 2.64, "elapsed_time": "0:06:15", "remaining_time": "3:51:25", "throughput": 9697.41, "total_tokens": 3645504} +{"current_steps": 5400, "total_steps": 204665, "loss": 0.0751, "lr": 5.275809840230615e-07, "epoch": 0.13192289839493807, "percentage": 2.64, "elapsed_time": "0:06:16", "remaining_time": "3:51:24", "throughput": 9696.91, "total_tokens": 3648640} +{"current_steps": 5405, "total_steps": 204665, "loss": 0.1491, "lr": 5.280695754140812e-07, "epoch": 0.13204504922678523, "percentage": 2.64, "elapsed_time": "0:06:16", "remaining_time": "3:51:24", "throughput": 9696.68, "total_tokens": 3651904} +{"current_steps": 5410, "total_steps": 204665, "loss": 0.1306, "lr": 5.285581668051009e-07, "epoch": 0.1321672000586324, "percentage": 2.64, "elapsed_time": "0:06:16", "remaining_time": "3:51:23", "throughput": 9695.93, "total_tokens": 3654912} +{"current_steps": 5415, "total_steps": 204665, "loss": 0.1601, "lr": 5.290467581961206e-07, "epoch": 0.13228935089047955, "percentage": 2.65, "elapsed_time": "0:06:17", "remaining_time": "3:51:22", "throughput": 9695.61, "total_tokens": 3658112} +{"current_steps": 5420, "total_steps": 204665, "loss": 0.1715, "lr": 5.295353495871403e-07, "epoch": 0.13241150172232674, "percentage": 2.65, "elapsed_time": "0:06:17", "remaining_time": "3:51:22", "throughput": 9695.54, "total_tokens": 3661440} +{"current_steps": 5425, "total_steps": 204665, "loss": 0.1183, "lr": 5.300239409781599e-07, "epoch": 0.1325336525541739, "percentage": 2.65, "elapsed_time": "0:06:17", "remaining_time": "3:51:22", "throughput": 9695.41, "total_tokens": 3664768} +{"current_steps": 5430, "total_steps": 204665, "loss": 0.1052, "lr": 5.305125323691796e-07, "epoch": 0.13265580338602107, "percentage": 2.65, "elapsed_time": "0:06:18", "remaining_time": "3:51:22", "throughput": 9696.85, "total_tokens": 3668800} +{"current_steps": 5435, "total_steps": 204665, "loss": 0.0333, "lr": 5.310011237601994e-07, "epoch": 0.13277795421786823, "percentage": 2.66, "elapsed_time": "0:06:18", "remaining_time": "3:51:21", "throughput": 9695.86, "total_tokens": 3671680} +{"current_steps": 5440, "total_steps": 204665, "loss": 0.123, "lr": 5.31489715151219e-07, "epoch": 0.1329001050497154, "percentage": 2.66, "elapsed_time": "0:06:19", "remaining_time": "3:51:21", "throughput": 9696.73, "total_tokens": 3675456} +{"current_steps": 5445, "total_steps": 204665, "loss": 0.0356, "lr": 5.319783065422388e-07, "epoch": 0.13302225588156255, "percentage": 2.66, "elapsed_time": "0:06:19", "remaining_time": "3:51:21", "throughput": 9697.17, "total_tokens": 3679040} +{"current_steps": 5450, "total_steps": 204665, "loss": 0.0859, "lr": 5.324668979332584e-07, "epoch": 0.1331444067134097, "percentage": 2.66, "elapsed_time": "0:06:19", "remaining_time": "3:51:20", "throughput": 9698.09, "total_tokens": 3682816} +{"current_steps": 5455, "total_steps": 204665, "loss": 0.2202, "lr": 5.32955489324278e-07, "epoch": 0.13326655754525688, "percentage": 2.67, "elapsed_time": "0:06:20", "remaining_time": "3:51:20", "throughput": 9697.66, "total_tokens": 3685952} +{"current_steps": 5460, "total_steps": 204665, "loss": 0.1361, "lr": 5.334440807152978e-07, "epoch": 0.13338870837710404, "percentage": 2.67, "elapsed_time": "0:06:20", "remaining_time": "3:51:19", "throughput": 9697.07, "total_tokens": 3689024} +{"current_steps": 5465, "total_steps": 204665, "loss": 0.1323, "lr": 5.339326721063175e-07, "epoch": 0.1335108592089512, "percentage": 2.67, "elapsed_time": "0:06:20", "remaining_time": "3:51:19", "throughput": 9697.27, "total_tokens": 3692544} +{"current_steps": 5470, "total_steps": 204665, "loss": 0.1305, "lr": 5.344212634973372e-07, "epoch": 0.1336330100407984, "percentage": 2.67, "elapsed_time": "0:06:21", "remaining_time": "3:51:19", "throughput": 9697.46, "total_tokens": 3696000} +{"current_steps": 5475, "total_steps": 204665, "loss": 0.2171, "lr": 5.349098548883568e-07, "epoch": 0.13375516087264555, "percentage": 2.68, "elapsed_time": "0:06:21", "remaining_time": "3:51:18", "throughput": 9696.98, "total_tokens": 3699136} +{"current_steps": 5480, "total_steps": 204665, "loss": 0.3066, "lr": 5.353984462793766e-07, "epoch": 0.1338773117044927, "percentage": 2.68, "elapsed_time": "0:06:21", "remaining_time": "3:51:18", "throughput": 9697.0, "total_tokens": 3702528} +{"current_steps": 5485, "total_steps": 204665, "loss": 0.2215, "lr": 5.358870376703962e-07, "epoch": 0.13399946253633988, "percentage": 2.68, "elapsed_time": "0:06:22", "remaining_time": "3:51:17", "throughput": 9697.19, "total_tokens": 3705984} +{"current_steps": 5490, "total_steps": 204665, "loss": 0.238, "lr": 5.363756290614159e-07, "epoch": 0.13412161336818704, "percentage": 2.68, "elapsed_time": "0:06:22", "remaining_time": "3:51:17", "throughput": 9696.87, "total_tokens": 3709184} +{"current_steps": 5495, "total_steps": 204665, "loss": 0.1463, "lr": 5.368642204524357e-07, "epoch": 0.1342437642000342, "percentage": 2.68, "elapsed_time": "0:06:22", "remaining_time": "3:51:16", "throughput": 9696.38, "total_tokens": 3712320} +{"current_steps": 5500, "total_steps": 204665, "loss": 0.0943, "lr": 5.373528118434552e-07, "epoch": 0.13436591503188136, "percentage": 2.69, "elapsed_time": "0:06:23", "remaining_time": "3:51:16", "throughput": 9695.8, "total_tokens": 3715392} +{"current_steps": 5505, "total_steps": 204665, "loss": 0.143, "lr": 5.37841403234475e-07, "epoch": 0.13448806586372852, "percentage": 2.69, "elapsed_time": "0:06:23", "remaining_time": "3:51:15", "throughput": 9695.3, "total_tokens": 3718528} +{"current_steps": 5510, "total_steps": 204665, "loss": 0.1029, "lr": 5.383299946254947e-07, "epoch": 0.13461021669557569, "percentage": 2.69, "elapsed_time": "0:06:23", "remaining_time": "3:51:15", "throughput": 9694.73, "total_tokens": 3721600} +{"current_steps": 5515, "total_steps": 204665, "loss": 0.1197, "lr": 5.388185860165143e-07, "epoch": 0.13473236752742287, "percentage": 2.69, "elapsed_time": "0:06:24", "remaining_time": "3:51:14", "throughput": 9695.48, "total_tokens": 3725312} +{"current_steps": 5520, "total_steps": 204665, "loss": 0.1296, "lr": 5.393071774075341e-07, "epoch": 0.13485451835927004, "percentage": 2.7, "elapsed_time": "0:06:24", "remaining_time": "3:51:14", "throughput": 9694.99, "total_tokens": 3728448} +{"current_steps": 5525, "total_steps": 204665, "loss": 0.1721, "lr": 5.397957687985537e-07, "epoch": 0.1349766691911172, "percentage": 2.7, "elapsed_time": "0:06:24", "remaining_time": "3:51:14", "throughput": 9695.68, "total_tokens": 3732160} +{"current_steps": 5530, "total_steps": 204665, "loss": 0.1383, "lr": 5.402843601895734e-07, "epoch": 0.13509882002296436, "percentage": 2.7, "elapsed_time": "0:06:25", "remaining_time": "3:51:13", "throughput": 9695.65, "total_tokens": 3735552} +{"current_steps": 5535, "total_steps": 204665, "loss": 0.1532, "lr": 5.407729515805931e-07, "epoch": 0.13522097085481152, "percentage": 2.7, "elapsed_time": "0:06:25", "remaining_time": "3:51:13", "throughput": 9695.69, "total_tokens": 3738944} +{"current_steps": 5540, "total_steps": 204665, "loss": 0.177, "lr": 5.412615429716129e-07, "epoch": 0.13534312168665869, "percentage": 2.71, "elapsed_time": "0:06:25", "remaining_time": "3:51:13", "throughput": 9696.52, "total_tokens": 3742720} +{"current_steps": 5545, "total_steps": 204665, "loss": 0.1112, "lr": 5.417501343626325e-07, "epoch": 0.13546527251850585, "percentage": 2.71, "elapsed_time": "0:06:26", "remaining_time": "3:51:13", "throughput": 9696.34, "total_tokens": 3745984} +{"current_steps": 5550, "total_steps": 204665, "loss": 0.0857, "lr": 5.422387257536522e-07, "epoch": 0.135587423350353, "percentage": 2.71, "elapsed_time": "0:06:26", "remaining_time": "3:51:12", "throughput": 9697.05, "total_tokens": 3749696} +{"current_steps": 5555, "total_steps": 204665, "loss": 0.0957, "lr": 5.42727317144672e-07, "epoch": 0.13570957418220017, "percentage": 2.71, "elapsed_time": "0:06:27", "remaining_time": "3:51:12", "throughput": 9697.16, "total_tokens": 3753152} +{"current_steps": 5560, "total_steps": 204665, "loss": 0.0524, "lr": 5.432159085356915e-07, "epoch": 0.13583172501404733, "percentage": 2.72, "elapsed_time": "0:06:27", "remaining_time": "3:51:12", "throughput": 9697.81, "total_tokens": 3756800} +{"current_steps": 5565, "total_steps": 204665, "loss": 0.1252, "lr": 5.437044999267113e-07, "epoch": 0.13595387584589452, "percentage": 2.72, "elapsed_time": "0:06:27", "remaining_time": "3:51:12", "throughput": 9699.32, "total_tokens": 3760896} +{"current_steps": 5570, "total_steps": 204665, "loss": 0.0735, "lr": 5.441930913177309e-07, "epoch": 0.13607602667774168, "percentage": 2.72, "elapsed_time": "0:06:28", "remaining_time": "3:51:12", "throughput": 9698.78, "total_tokens": 3764032} +{"current_steps": 5575, "total_steps": 204665, "loss": 0.0557, "lr": 5.446816827087507e-07, "epoch": 0.13619817750958885, "percentage": 2.72, "elapsed_time": "0:06:28", "remaining_time": "3:51:11", "throughput": 9698.96, "total_tokens": 3767488} +{"current_steps": 5580, "total_steps": 204665, "loss": 0.2006, "lr": 5.451702740997704e-07, "epoch": 0.136320328341436, "percentage": 2.73, "elapsed_time": "0:06:28", "remaining_time": "3:51:11", "throughput": 9699.14, "total_tokens": 3770944} +{"current_steps": 5585, "total_steps": 204665, "loss": 0.1705, "lr": 5.456588654907899e-07, "epoch": 0.13644247917328317, "percentage": 2.73, "elapsed_time": "0:06:29", "remaining_time": "3:51:10", "throughput": 9698.92, "total_tokens": 3774208} +{"current_steps": 5590, "total_steps": 204665, "loss": 0.1656, "lr": 5.461474568818097e-07, "epoch": 0.13656463000513033, "percentage": 2.73, "elapsed_time": "0:06:29", "remaining_time": "3:51:10", "throughput": 9699.6, "total_tokens": 3777920} +{"current_steps": 5595, "total_steps": 204665, "loss": 0.0259, "lr": 5.466360482728294e-07, "epoch": 0.1366867808369775, "percentage": 2.73, "elapsed_time": "0:06:29", "remaining_time": "3:51:10", "throughput": 9699.99, "total_tokens": 3781504} +{"current_steps": 5600, "total_steps": 204665, "loss": 0.1219, "lr": 5.471246396638492e-07, "epoch": 0.13680893166882466, "percentage": 2.74, "elapsed_time": "0:06:30", "remaining_time": "3:51:10", "throughput": 9700.82, "total_tokens": 3785280} +{"current_steps": 5605, "total_steps": 204665, "loss": 0.1452, "lr": 5.476132310548688e-07, "epoch": 0.13693108250067182, "percentage": 2.74, "elapsed_time": "0:06:30", "remaining_time": "3:51:12", "throughput": 9705.63, "total_tokens": 3791168} +{"current_steps": 5610, "total_steps": 204665, "loss": 0.0706, "lr": 5.481018224458884e-07, "epoch": 0.13705323333251898, "percentage": 2.74, "elapsed_time": "0:06:30", "remaining_time": "3:51:12", "throughput": 9705.46, "total_tokens": 3794432} +{"current_steps": 5615, "total_steps": 204665, "loss": 0.0769, "lr": 5.485904138369081e-07, "epoch": 0.13717538416436617, "percentage": 2.74, "elapsed_time": "0:06:31", "remaining_time": "3:51:11", "throughput": 9704.89, "total_tokens": 3797504} +{"current_steps": 5620, "total_steps": 204665, "loss": 0.1494, "lr": 5.490790052279278e-07, "epoch": 0.13729753499621333, "percentage": 2.75, "elapsed_time": "0:06:31", "remaining_time": "3:51:11", "throughput": 9705.66, "total_tokens": 3801280} +{"current_steps": 5625, "total_steps": 204665, "loss": 0.1295, "lr": 5.495675966189476e-07, "epoch": 0.1374196858280605, "percentage": 2.75, "elapsed_time": "0:06:32", "remaining_time": "3:51:10", "throughput": 9705.9, "total_tokens": 3804736} +{"current_steps": 5630, "total_steps": 204665, "loss": 0.1242, "lr": 5.500561880099672e-07, "epoch": 0.13754183665990766, "percentage": 2.75, "elapsed_time": "0:06:32", "remaining_time": "3:51:10", "throughput": 9706.66, "total_tokens": 3808512} +{"current_steps": 5635, "total_steps": 204665, "loss": 0.161, "lr": 5.50544779400987e-07, "epoch": 0.13766398749175482, "percentage": 2.75, "elapsed_time": "0:06:32", "remaining_time": "3:51:10", "throughput": 9706.49, "total_tokens": 3811776} +{"current_steps": 5640, "total_steps": 204665, "loss": 0.1648, "lr": 5.510333707920066e-07, "epoch": 0.13778613832360198, "percentage": 2.76, "elapsed_time": "0:06:33", "remaining_time": "3:51:10", "throughput": 9706.58, "total_tokens": 3815232} +{"current_steps": 5645, "total_steps": 204665, "loss": 0.0756, "lr": 5.515219621830262e-07, "epoch": 0.13790828915544914, "percentage": 2.76, "elapsed_time": "0:06:33", "remaining_time": "3:51:10", "throughput": 9707.0, "total_tokens": 3818816} +{"current_steps": 5650, "total_steps": 204665, "loss": 0.147, "lr": 5.52010553574046e-07, "epoch": 0.1380304399872963, "percentage": 2.76, "elapsed_time": "0:06:33", "remaining_time": "3:51:09", "throughput": 9706.58, "total_tokens": 3821952} +{"current_steps": 5655, "total_steps": 204665, "loss": 0.0421, "lr": 5.524991449650657e-07, "epoch": 0.13815259081914347, "percentage": 2.76, "elapsed_time": "0:06:34", "remaining_time": "3:51:08", "throughput": 9706.81, "total_tokens": 3825408} +{"current_steps": 5660, "total_steps": 204665, "loss": 0.0934, "lr": 5.529877363560854e-07, "epoch": 0.13827474165099066, "percentage": 2.77, "elapsed_time": "0:06:34", "remaining_time": "3:51:08", "throughput": 9706.94, "total_tokens": 3828864} +{"current_steps": 5665, "total_steps": 204665, "loss": 0.0789, "lr": 5.53476327747105e-07, "epoch": 0.13839689248283782, "percentage": 2.77, "elapsed_time": "0:06:34", "remaining_time": "3:51:08", "throughput": 9706.61, "total_tokens": 3832064} +{"current_steps": 5670, "total_steps": 204665, "loss": 0.0692, "lr": 5.539649191381247e-07, "epoch": 0.13851904331468498, "percentage": 2.77, "elapsed_time": "0:06:35", "remaining_time": "3:51:07", "throughput": 9706.29, "total_tokens": 3835264} +{"current_steps": 5675, "total_steps": 204665, "loss": 0.1247, "lr": 5.544535105291444e-07, "epoch": 0.13864119414653214, "percentage": 2.77, "elapsed_time": "0:06:35", "remaining_time": "3:51:07", "throughput": 9706.13, "total_tokens": 3838528} +{"current_steps": 5680, "total_steps": 204665, "loss": 0.1948, "lr": 5.549421019201641e-07, "epoch": 0.1387633449783793, "percentage": 2.78, "elapsed_time": "0:06:35", "remaining_time": "3:51:06", "throughput": 9706.3, "total_tokens": 3841984} +{"current_steps": 5685, "total_steps": 204665, "loss": 0.1382, "lr": 5.554306933111839e-07, "epoch": 0.13888549581022647, "percentage": 2.78, "elapsed_time": "0:06:36", "remaining_time": "3:51:06", "throughput": 9706.47, "total_tokens": 3845440} +{"current_steps": 5690, "total_steps": 204665, "loss": 0.1565, "lr": 5.559192847022035e-07, "epoch": 0.13900764664207363, "percentage": 2.78, "elapsed_time": "0:06:36", "remaining_time": "3:51:05", "throughput": 9706.31, "total_tokens": 3848704} +{"current_steps": 5695, "total_steps": 204665, "loss": 0.1449, "lr": 5.564078760932232e-07, "epoch": 0.1391297974739208, "percentage": 2.78, "elapsed_time": "0:06:36", "remaining_time": "3:51:05", "throughput": 9707.07, "total_tokens": 3852480} +{"current_steps": 5700, "total_steps": 204665, "loss": 0.2077, "lr": 5.568964674842429e-07, "epoch": 0.13925194830576795, "percentage": 2.79, "elapsed_time": "0:06:37", "remaining_time": "3:51:05", "throughput": 9706.6, "total_tokens": 3855616} +{"current_steps": 5705, "total_steps": 204665, "loss": 0.0602, "lr": 5.573850588752625e-07, "epoch": 0.13937409913761512, "percentage": 2.79, "elapsed_time": "0:06:37", "remaining_time": "3:51:05", "throughput": 9707.0, "total_tokens": 3859200} +{"current_steps": 5710, "total_steps": 204665, "loss": 0.2262, "lr": 5.578736502662823e-07, "epoch": 0.1394962499694623, "percentage": 2.79, "elapsed_time": "0:06:37", "remaining_time": "3:51:04", "throughput": 9707.42, "total_tokens": 3862784} +{"current_steps": 5715, "total_steps": 204665, "loss": 0.0682, "lr": 5.58362241657302e-07, "epoch": 0.13961840080130947, "percentage": 2.79, "elapsed_time": "0:06:38", "remaining_time": "3:51:04", "throughput": 9706.94, "total_tokens": 3865920} +{"current_steps": 5720, "total_steps": 204665, "loss": 0.036, "lr": 5.588508330483217e-07, "epoch": 0.13974055163315663, "percentage": 2.79, "elapsed_time": "0:06:38", "remaining_time": "3:51:04", "throughput": 9707.67, "total_tokens": 3869632} +{"current_steps": 5725, "total_steps": 204665, "loss": 0.1062, "lr": 5.593394244393413e-07, "epoch": 0.1398627024650038, "percentage": 2.8, "elapsed_time": "0:06:38", "remaining_time": "3:51:03", "throughput": 9706.86, "total_tokens": 3872576} +{"current_steps": 5730, "total_steps": 204665, "loss": 0.2479, "lr": 5.59828015830361e-07, "epoch": 0.13998485329685095, "percentage": 2.8, "elapsed_time": "0:06:39", "remaining_time": "3:51:03", "throughput": 9707.57, "total_tokens": 3876288} +{"current_steps": 5735, "total_steps": 204665, "loss": 0.138, "lr": 5.603166072213807e-07, "epoch": 0.14010700412869811, "percentage": 2.8, "elapsed_time": "0:06:39", "remaining_time": "3:51:02", "throughput": 9707.01, "total_tokens": 3879360} +{"current_steps": 5740, "total_steps": 204665, "loss": 0.1795, "lr": 5.608051986124004e-07, "epoch": 0.14022915496054528, "percentage": 2.8, "elapsed_time": "0:06:40", "remaining_time": "3:51:02", "throughput": 9707.98, "total_tokens": 3883200} +{"current_steps": 5745, "total_steps": 204665, "loss": 0.1743, "lr": 5.612937900034202e-07, "epoch": 0.14035130579239244, "percentage": 2.81, "elapsed_time": "0:06:40", "remaining_time": "3:51:02", "throughput": 9709.37, "total_tokens": 3887296} +{"current_steps": 5750, "total_steps": 204665, "loss": 0.1256, "lr": 5.617823813944397e-07, "epoch": 0.1404734566242396, "percentage": 2.81, "elapsed_time": "0:06:40", "remaining_time": "3:51:02", "throughput": 9709.77, "total_tokens": 3890880} +{"current_steps": 5755, "total_steps": 204665, "loss": 0.1736, "lr": 5.622709727854595e-07, "epoch": 0.14059560745608676, "percentage": 2.81, "elapsed_time": "0:06:41", "remaining_time": "3:51:01", "throughput": 9709.7, "total_tokens": 3894208} +{"current_steps": 5760, "total_steps": 204665, "loss": 0.1214, "lr": 5.627595641764792e-07, "epoch": 0.14071775828793395, "percentage": 2.81, "elapsed_time": "0:06:41", "remaining_time": "3:51:01", "throughput": 9709.68, "total_tokens": 3897536} +{"current_steps": 5765, "total_steps": 204665, "loss": 0.1589, "lr": 5.632481555674988e-07, "epoch": 0.14083990911978111, "percentage": 2.82, "elapsed_time": "0:06:41", "remaining_time": "3:51:00", "throughput": 9708.88, "total_tokens": 3900480} +{"current_steps": 5770, "total_steps": 204665, "loss": 0.1105, "lr": 5.637367469585186e-07, "epoch": 0.14096205995162828, "percentage": 2.82, "elapsed_time": "0:06:42", "remaining_time": "3:51:00", "throughput": 9708.32, "total_tokens": 3903552} +{"current_steps": 5775, "total_steps": 204665, "loss": 0.0396, "lr": 5.642253383495383e-07, "epoch": 0.14108421078347544, "percentage": 2.82, "elapsed_time": "0:06:42", "remaining_time": "3:50:59", "throughput": 9707.85, "total_tokens": 3906688} +{"current_steps": 5780, "total_steps": 204665, "loss": 0.1107, "lr": 5.647139297405579e-07, "epoch": 0.1412063616153226, "percentage": 2.82, "elapsed_time": "0:06:42", "remaining_time": "3:50:59", "throughput": 9707.87, "total_tokens": 3910080} +{"current_steps": 5785, "total_steps": 204665, "loss": 0.1385, "lr": 5.652025211315776e-07, "epoch": 0.14132851244716976, "percentage": 2.83, "elapsed_time": "0:06:43", "remaining_time": "3:50:58", "throughput": 9706.86, "total_tokens": 3912960} +{"current_steps": 5790, "total_steps": 204665, "loss": 0.1114, "lr": 5.656911125225974e-07, "epoch": 0.14145066327901692, "percentage": 2.83, "elapsed_time": "0:06:43", "remaining_time": "3:50:57", "throughput": 9706.07, "total_tokens": 3915904} +{"current_steps": 5795, "total_steps": 204665, "loss": 0.1246, "lr": 5.66179703913617e-07, "epoch": 0.1415728141108641, "percentage": 2.83, "elapsed_time": "0:06:43", "remaining_time": "3:50:57", "throughput": 9705.76, "total_tokens": 3919104} +{"current_steps": 5800, "total_steps": 204665, "loss": 0.2857, "lr": 5.666682953046367e-07, "epoch": 0.14169496494271125, "percentage": 2.83, "elapsed_time": "0:06:44", "remaining_time": "3:50:56", "throughput": 9705.78, "total_tokens": 3922496} +{"current_steps": 5805, "total_steps": 204665, "loss": 0.0916, "lr": 5.671568866956564e-07, "epoch": 0.1418171157745584, "percentage": 2.84, "elapsed_time": "0:06:44", "remaining_time": "3:50:56", "throughput": 9705.63, "total_tokens": 3925824} +{"current_steps": 5810, "total_steps": 204665, "loss": 0.1194, "lr": 5.67645478086676e-07, "epoch": 0.1419392666064056, "percentage": 2.84, "elapsed_time": "0:06:44", "remaining_time": "3:50:56", "throughput": 9705.88, "total_tokens": 3929344} +{"current_steps": 5815, "total_steps": 204665, "loss": 0.1381, "lr": 5.681340694776958e-07, "epoch": 0.14206141743825276, "percentage": 2.84, "elapsed_time": "0:06:45", "remaining_time": "3:50:55", "throughput": 9705.65, "total_tokens": 3932608} +{"current_steps": 5820, "total_steps": 204665, "loss": 0.1014, "lr": 5.686226608687155e-07, "epoch": 0.14218356827009992, "percentage": 2.84, "elapsed_time": "0:06:45", "remaining_time": "3:50:55", "throughput": 9705.58, "total_tokens": 3935936} +{"current_steps": 5825, "total_steps": 204665, "loss": 0.129, "lr": 5.691112522597351e-07, "epoch": 0.1423057191019471, "percentage": 2.85, "elapsed_time": "0:06:45", "remaining_time": "3:50:55", "throughput": 9706.15, "total_tokens": 3939648} +{"current_steps": 5830, "total_steps": 204665, "loss": 0.0574, "lr": 5.695998436507549e-07, "epoch": 0.14242786993379425, "percentage": 2.85, "elapsed_time": "0:06:46", "remaining_time": "3:50:55", "throughput": 9706.01, "total_tokens": 3942976} +{"current_steps": 5835, "total_steps": 204665, "loss": 0.1438, "lr": 5.700884350417745e-07, "epoch": 0.1425500207656414, "percentage": 2.85, "elapsed_time": "0:06:46", "remaining_time": "3:50:54", "throughput": 9705.41, "total_tokens": 3946048} +{"current_steps": 5840, "total_steps": 204665, "loss": 0.1404, "lr": 5.705770264327942e-07, "epoch": 0.14267217159748857, "percentage": 2.85, "elapsed_time": "0:06:46", "remaining_time": "3:50:54", "throughput": 9705.73, "total_tokens": 3949568} +{"current_steps": 5845, "total_steps": 204665, "loss": 0.1302, "lr": 5.710656178238139e-07, "epoch": 0.14279432242933573, "percentage": 2.86, "elapsed_time": "0:06:47", "remaining_time": "3:50:53", "throughput": 9706.09, "total_tokens": 3953088} +{"current_steps": 5850, "total_steps": 204665, "loss": 0.063, "lr": 5.715542092148337e-07, "epoch": 0.1429164732611829, "percentage": 2.86, "elapsed_time": "0:06:47", "remaining_time": "3:50:53", "throughput": 9706.02, "total_tokens": 3956416} +{"current_steps": 5855, "total_steps": 204665, "loss": 0.1239, "lr": 5.720428006058533e-07, "epoch": 0.1430386240930301, "percentage": 2.86, "elapsed_time": "0:06:47", "remaining_time": "3:50:52", "throughput": 9705.64, "total_tokens": 3959616} +{"current_steps": 5860, "total_steps": 204665, "loss": 0.0998, "lr": 5.725313919968729e-07, "epoch": 0.14316077492487725, "percentage": 2.86, "elapsed_time": "0:06:48", "remaining_time": "3:50:52", "throughput": 9705.57, "total_tokens": 3962944} +{"current_steps": 5865, "total_steps": 204665, "loss": 0.1661, "lr": 5.730199833878927e-07, "epoch": 0.1432829257567244, "percentage": 2.87, "elapsed_time": "0:06:48", "remaining_time": "3:50:52", "throughput": 9705.98, "total_tokens": 3966528} +{"current_steps": 5870, "total_steps": 204665, "loss": 0.0673, "lr": 5.735085747789123e-07, "epoch": 0.14340507658857157, "percentage": 2.87, "elapsed_time": "0:06:49", "remaining_time": "3:50:51", "throughput": 9705.67, "total_tokens": 3969728} +{"current_steps": 5875, "total_steps": 204665, "loss": 0.1448, "lr": 5.739971661699321e-07, "epoch": 0.14352722742041873, "percentage": 2.87, "elapsed_time": "0:06:49", "remaining_time": "3:50:51", "throughput": 9705.6, "total_tokens": 3973056} +{"current_steps": 5880, "total_steps": 204665, "loss": 0.063, "lr": 5.744857575609518e-07, "epoch": 0.1436493782522659, "percentage": 2.87, "elapsed_time": "0:06:49", "remaining_time": "3:50:50", "throughput": 9705.21, "total_tokens": 3976192} +{"current_steps": 5885, "total_steps": 204665, "loss": 0.2091, "lr": 5.749743489519713e-07, "epoch": 0.14377152908411306, "percentage": 2.88, "elapsed_time": "0:06:50", "remaining_time": "3:50:50", "throughput": 9707.0, "total_tokens": 3980480} +{"current_steps": 5890, "total_steps": 204665, "loss": 0.15, "lr": 5.754629403429911e-07, "epoch": 0.14389367991596022, "percentage": 2.88, "elapsed_time": "0:06:50", "remaining_time": "3:50:50", "throughput": 9707.25, "total_tokens": 3984000} +{"current_steps": 5895, "total_steps": 204665, "loss": 0.1052, "lr": 5.759515317340108e-07, "epoch": 0.14401583074780738, "percentage": 2.88, "elapsed_time": "0:06:50", "remaining_time": "3:50:50", "throughput": 9707.03, "total_tokens": 3987264} +{"current_steps": 5900, "total_steps": 204665, "loss": 0.0707, "lr": 5.764401231250305e-07, "epoch": 0.14413798157965454, "percentage": 2.88, "elapsed_time": "0:06:51", "remaining_time": "3:50:49", "throughput": 9707.28, "total_tokens": 3990784} +{"current_steps": 5905, "total_steps": 204665, "loss": 0.1064, "lr": 5.769287145160502e-07, "epoch": 0.14426013241150173, "percentage": 2.89, "elapsed_time": "0:06:51", "remaining_time": "3:50:49", "throughput": 9707.58, "total_tokens": 3994304} +{"current_steps": 5910, "total_steps": 204665, "loss": 0.0527, "lr": 5.7741730590707e-07, "epoch": 0.1443822832433489, "percentage": 2.89, "elapsed_time": "0:06:51", "remaining_time": "3:50:49", "throughput": 9707.2, "total_tokens": 3997504} +{"current_steps": 5915, "total_steps": 204665, "loss": 0.1981, "lr": 5.779058972980895e-07, "epoch": 0.14450443407519606, "percentage": 2.89, "elapsed_time": "0:06:52", "remaining_time": "3:50:48", "throughput": 9707.12, "total_tokens": 4000832} +{"current_steps": 5920, "total_steps": 204665, "loss": 0.0958, "lr": 5.783944886891092e-07, "epoch": 0.14462658490704322, "percentage": 2.89, "elapsed_time": "0:06:52", "remaining_time": "3:50:48", "throughput": 9707.45, "total_tokens": 4004352} +{"current_steps": 5925, "total_steps": 204665, "loss": 0.1188, "lr": 5.78883080080129e-07, "epoch": 0.14474873573889038, "percentage": 2.89, "elapsed_time": "0:06:52", "remaining_time": "3:50:48", "throughput": 9707.06, "total_tokens": 4007552} +{"current_steps": 5930, "total_steps": 204665, "loss": 0.1757, "lr": 5.793716714711486e-07, "epoch": 0.14487088657073754, "percentage": 2.9, "elapsed_time": "0:06:53", "remaining_time": "3:50:47", "throughput": 9707.07, "total_tokens": 4010944} +{"current_steps": 5935, "total_steps": 204665, "loss": 0.0767, "lr": 5.798602628621684e-07, "epoch": 0.1449930374025847, "percentage": 2.9, "elapsed_time": "0:06:53", "remaining_time": "3:50:47", "throughput": 9707.45, "total_tokens": 4014528} +{"current_steps": 5940, "total_steps": 204665, "loss": 0.1921, "lr": 5.80348854253188e-07, "epoch": 0.14511518823443187, "percentage": 2.9, "elapsed_time": "0:06:53", "remaining_time": "3:50:47", "throughput": 9708.36, "total_tokens": 4018368} +{"current_steps": 5945, "total_steps": 204665, "loss": 0.0764, "lr": 5.808374456442076e-07, "epoch": 0.14523733906627903, "percentage": 2.9, "elapsed_time": "0:06:54", "remaining_time": "3:50:47", "throughput": 9708.64, "total_tokens": 4021888} +{"current_steps": 5950, "total_steps": 204665, "loss": 0.0979, "lr": 5.813260370352274e-07, "epoch": 0.1453594898981262, "percentage": 2.91, "elapsed_time": "0:06:54", "remaining_time": "3:50:46", "throughput": 9708.25, "total_tokens": 4025024} +{"current_steps": 5955, "total_steps": 204665, "loss": 0.1028, "lr": 5.818146284262471e-07, "epoch": 0.14548164072997338, "percentage": 2.91, "elapsed_time": "0:06:54", "remaining_time": "3:50:46", "throughput": 9708.31, "total_tokens": 4028416} +{"current_steps": 5960, "total_steps": 204665, "loss": 0.1325, "lr": 5.823032198172668e-07, "epoch": 0.14560379156182054, "percentage": 2.91, "elapsed_time": "0:06:55", "remaining_time": "3:50:45", "throughput": 9707.83, "total_tokens": 4031488} +{"current_steps": 5965, "total_steps": 204665, "loss": 0.305, "lr": 5.827918112082865e-07, "epoch": 0.1457259423936677, "percentage": 2.91, "elapsed_time": "0:06:55", "remaining_time": "3:50:45", "throughput": 9708.09, "total_tokens": 4035008} +{"current_steps": 5970, "total_steps": 204665, "loss": 0.1991, "lr": 5.832804025993062e-07, "epoch": 0.14584809322551487, "percentage": 2.92, "elapsed_time": "0:06:55", "remaining_time": "3:50:44", "throughput": 9707.87, "total_tokens": 4038272} +{"current_steps": 5975, "total_steps": 204665, "loss": 0.1803, "lr": 5.837689939903258e-07, "epoch": 0.14597024405736203, "percentage": 2.92, "elapsed_time": "0:06:56", "remaining_time": "3:50:44", "throughput": 9707.64, "total_tokens": 4041536} +{"current_steps": 5980, "total_steps": 204665, "loss": 0.2937, "lr": 5.842575853813455e-07, "epoch": 0.1460923948892092, "percentage": 2.92, "elapsed_time": "0:06:56", "remaining_time": "3:50:43", "throughput": 9707.1, "total_tokens": 4044608} +{"current_steps": 5985, "total_steps": 204665, "loss": 0.0937, "lr": 5.847461767723653e-07, "epoch": 0.14621454572105635, "percentage": 2.92, "elapsed_time": "0:06:57", "remaining_time": "3:50:43", "throughput": 9707.26, "total_tokens": 4048064} +{"current_steps": 5990, "total_steps": 204665, "loss": 0.098, "lr": 5.852347681633849e-07, "epoch": 0.14633669655290352, "percentage": 2.93, "elapsed_time": "0:06:57", "remaining_time": "3:50:42", "throughput": 9707.14, "total_tokens": 4051392} +{"current_steps": 5995, "total_steps": 204665, "loss": 0.1357, "lr": 5.857233595544047e-07, "epoch": 0.14645884738475068, "percentage": 2.93, "elapsed_time": "0:06:57", "remaining_time": "3:50:42", "throughput": 9708.01, "total_tokens": 4055232} +{"current_steps": 6000, "total_steps": 204665, "loss": 0.1316, "lr": 5.862119509454243e-07, "epoch": 0.14658099821659787, "percentage": 2.93, "elapsed_time": "0:06:58", "remaining_time": "3:50:42", "throughput": 9707.11, "total_tokens": 4058112} +{"current_steps": 6005, "total_steps": 204665, "loss": 0.1374, "lr": 5.86700542336444e-07, "epoch": 0.14670314904844503, "percentage": 2.93, "elapsed_time": "0:06:58", "remaining_time": "3:50:41", "throughput": 9706.35, "total_tokens": 4061056} +{"current_steps": 6010, "total_steps": 204665, "loss": 0.112, "lr": 5.871891337274637e-07, "epoch": 0.1468252998802922, "percentage": 2.94, "elapsed_time": "0:06:58", "remaining_time": "3:50:41", "throughput": 9706.74, "total_tokens": 4064640} +{"current_steps": 6015, "total_steps": 204665, "loss": 0.1227, "lr": 5.876777251184834e-07, "epoch": 0.14694745071213935, "percentage": 2.94, "elapsed_time": "0:06:59", "remaining_time": "3:50:40", "throughput": 9706.53, "total_tokens": 4067904} +{"current_steps": 6020, "total_steps": 204665, "loss": 0.213, "lr": 5.881663165095031e-07, "epoch": 0.14706960154398652, "percentage": 2.94, "elapsed_time": "0:06:59", "remaining_time": "3:50:40", "throughput": 9706.47, "total_tokens": 4071232} +{"current_steps": 6025, "total_steps": 204665, "loss": 0.0935, "lr": 5.886549079005227e-07, "epoch": 0.14719175237583368, "percentage": 2.94, "elapsed_time": "0:06:59", "remaining_time": "3:50:39", "throughput": 9706.25, "total_tokens": 4074496} +{"current_steps": 6030, "total_steps": 204665, "loss": 0.1533, "lr": 5.891434992915425e-07, "epoch": 0.14731390320768084, "percentage": 2.95, "elapsed_time": "0:07:00", "remaining_time": "3:50:39", "throughput": 9706.32, "total_tokens": 4077888} +{"current_steps": 6035, "total_steps": 204665, "loss": 0.1824, "lr": 5.896320906825621e-07, "epoch": 0.147436054039528, "percentage": 2.95, "elapsed_time": "0:07:00", "remaining_time": "3:50:38", "throughput": 9705.74, "total_tokens": 4080960} +{"current_steps": 6040, "total_steps": 204665, "loss": 0.1432, "lr": 5.901206820735818e-07, "epoch": 0.14755820487137516, "percentage": 2.95, "elapsed_time": "0:07:00", "remaining_time": "3:50:38", "throughput": 9705.84, "total_tokens": 4084416} +{"current_steps": 6045, "total_steps": 204665, "loss": 0.0937, "lr": 5.906092734646016e-07, "epoch": 0.14768035570322233, "percentage": 2.95, "elapsed_time": "0:07:01", "remaining_time": "3:50:38", "throughput": 9705.76, "total_tokens": 4087744} +{"current_steps": 6050, "total_steps": 204665, "loss": 0.1235, "lr": 5.910978648556211e-07, "epoch": 0.14780250653506952, "percentage": 2.96, "elapsed_time": "0:07:01", "remaining_time": "3:50:37", "throughput": 9705.92, "total_tokens": 4091200} +{"current_steps": 6055, "total_steps": 204665, "loss": 0.0512, "lr": 5.915864562466409e-07, "epoch": 0.14792465736691668, "percentage": 2.96, "elapsed_time": "0:07:01", "remaining_time": "3:50:37", "throughput": 9705.41, "total_tokens": 4094336} +{"current_steps": 6060, "total_steps": 204665, "loss": 0.1712, "lr": 5.920750476376606e-07, "epoch": 0.14804680819876384, "percentage": 2.96, "elapsed_time": "0:07:02", "remaining_time": "3:50:37", "throughput": 9706.01, "total_tokens": 4098048} +{"current_steps": 6065, "total_steps": 204665, "loss": 0.1851, "lr": 5.925636390286803e-07, "epoch": 0.148168959030611, "percentage": 2.96, "elapsed_time": "0:07:02", "remaining_time": "3:50:37", "throughput": 9706.64, "total_tokens": 4101760} +{"current_steps": 6070, "total_steps": 204665, "loss": 0.1798, "lr": 5.930522304197e-07, "epoch": 0.14829110986245816, "percentage": 2.97, "elapsed_time": "0:07:02", "remaining_time": "3:50:37", "throughput": 9707.17, "total_tokens": 4105408} +{"current_steps": 6075, "total_steps": 204665, "loss": 0.1347, "lr": 5.935408218107197e-07, "epoch": 0.14841326069430533, "percentage": 2.97, "elapsed_time": "0:07:03", "remaining_time": "3:50:37", "throughput": 9708.11, "total_tokens": 4109312} +{"current_steps": 6080, "total_steps": 204665, "loss": 0.1295, "lr": 5.940294132017393e-07, "epoch": 0.1485354115261525, "percentage": 2.97, "elapsed_time": "0:07:03", "remaining_time": "3:50:36", "throughput": 9707.99, "total_tokens": 4112640} +{"current_steps": 6085, "total_steps": 204665, "loss": 0.1796, "lr": 5.94518004592759e-07, "epoch": 0.14865756235799965, "percentage": 2.97, "elapsed_time": "0:07:03", "remaining_time": "3:50:36", "throughput": 9707.71, "total_tokens": 4115904} +{"current_steps": 6090, "total_steps": 204665, "loss": 0.0824, "lr": 5.950065959837788e-07, "epoch": 0.1487797131898468, "percentage": 2.98, "elapsed_time": "0:07:04", "remaining_time": "3:50:36", "throughput": 9707.86, "total_tokens": 4119360} +{"current_steps": 6095, "total_steps": 204665, "loss": 0.1154, "lr": 5.954951873747984e-07, "epoch": 0.14890186402169397, "percentage": 2.98, "elapsed_time": "0:07:04", "remaining_time": "3:50:35", "throughput": 9708.02, "total_tokens": 4122816} +{"current_steps": 6100, "total_steps": 204665, "loss": 0.1101, "lr": 5.959837787658181e-07, "epoch": 0.14902401485354116, "percentage": 2.98, "elapsed_time": "0:07:05", "remaining_time": "3:50:35", "throughput": 9707.88, "total_tokens": 4126144} +{"current_steps": 6105, "total_steps": 204665, "loss": 0.0952, "lr": 5.964723701568379e-07, "epoch": 0.14914616568538833, "percentage": 2.98, "elapsed_time": "0:07:05", "remaining_time": "3:50:34", "throughput": 9707.35, "total_tokens": 4129216} +{"current_steps": 6110, "total_steps": 204665, "loss": 0.1513, "lr": 5.969609615478574e-07, "epoch": 0.1492683165172355, "percentage": 2.99, "elapsed_time": "0:07:05", "remaining_time": "3:50:33", "throughput": 9706.37, "total_tokens": 4132032} +{"current_steps": 6115, "total_steps": 204665, "loss": 0.1582, "lr": 5.974495529388772e-07, "epoch": 0.14939046734908265, "percentage": 2.99, "elapsed_time": "0:07:06", "remaining_time": "3:50:33", "throughput": 9706.77, "total_tokens": 4135616} +{"current_steps": 6120, "total_steps": 204665, "loss": 0.1166, "lr": 5.979381443298969e-07, "epoch": 0.1495126181809298, "percentage": 2.99, "elapsed_time": "0:07:06", "remaining_time": "3:50:33", "throughput": 9706.97, "total_tokens": 4139136} +{"current_steps": 6125, "total_steps": 204665, "loss": 0.223, "lr": 5.984267357209166e-07, "epoch": 0.14963476901277697, "percentage": 2.99, "elapsed_time": "0:07:06", "remaining_time": "3:50:33", "throughput": 9706.9, "total_tokens": 4142464} +{"current_steps": 6130, "total_steps": 204665, "loss": 0.063, "lr": 5.989153271119363e-07, "epoch": 0.14975691984462414, "percentage": 3.0, "elapsed_time": "0:07:07", "remaining_time": "3:50:32", "throughput": 9707.07, "total_tokens": 4145920} +{"current_steps": 6135, "total_steps": 204665, "loss": 0.0648, "lr": 5.994039185029559e-07, "epoch": 0.1498790706764713, "percentage": 3.0, "elapsed_time": "0:07:07", "remaining_time": "3:50:32", "throughput": 9707.57, "total_tokens": 4149568} +{"current_steps": 6140, "total_steps": 204665, "loss": 0.2033, "lr": 5.998925098939756e-07, "epoch": 0.15000122150831846, "percentage": 3.0, "elapsed_time": "0:07:08", "remaining_time": "3:50:41", "throughput": 9701.71, "total_tokens": 4153088} +{"current_steps": 6145, "total_steps": 204665, "loss": 0.1186, "lr": 6.003811012849953e-07, "epoch": 0.15012337234016562, "percentage": 3.0, "elapsed_time": "0:07:08", "remaining_time": "3:50:40", "throughput": 9701.86, "total_tokens": 4156544} +{"current_steps": 6150, "total_steps": 204665, "loss": 0.086, "lr": 6.008696926760151e-07, "epoch": 0.1502455231720128, "percentage": 3.0, "elapsed_time": "0:07:08", "remaining_time": "3:50:40", "throughput": 9701.79, "total_tokens": 4159872} +{"current_steps": 6155, "total_steps": 204665, "loss": 0.1547, "lr": 6.013582840670347e-07, "epoch": 0.15036767400385997, "percentage": 3.01, "elapsed_time": "0:07:09", "remaining_time": "3:50:39", "throughput": 9701.11, "total_tokens": 4162880} +{"current_steps": 6160, "total_steps": 204665, "loss": 0.1011, "lr": 6.018468754580543e-07, "epoch": 0.15048982483570714, "percentage": 3.01, "elapsed_time": "0:07:09", "remaining_time": "3:50:39", "throughput": 9701.04, "total_tokens": 4166208} +{"current_steps": 6165, "total_steps": 204665, "loss": 0.0551, "lr": 6.023354668490741e-07, "epoch": 0.1506119756675543, "percentage": 3.01, "elapsed_time": "0:07:09", "remaining_time": "3:50:38", "throughput": 9700.82, "total_tokens": 4169472} +{"current_steps": 6170, "total_steps": 204665, "loss": 0.1453, "lr": 6.028240582400937e-07, "epoch": 0.15073412649940146, "percentage": 3.01, "elapsed_time": "0:07:10", "remaining_time": "3:50:38", "throughput": 9700.3, "total_tokens": 4172544} +{"current_steps": 6175, "total_steps": 204665, "loss": 0.2232, "lr": 6.033126496311135e-07, "epoch": 0.15085627733124862, "percentage": 3.02, "elapsed_time": "0:07:10", "remaining_time": "3:50:38", "throughput": 9701.16, "total_tokens": 4176384} +{"current_steps": 6180, "total_steps": 204665, "loss": 0.0965, "lr": 6.038012410221332e-07, "epoch": 0.15097842816309578, "percentage": 3.02, "elapsed_time": "0:07:10", "remaining_time": "3:50:37", "throughput": 9701.39, "total_tokens": 4179904} +{"current_steps": 6185, "total_steps": 204665, "loss": 0.0665, "lr": 6.042898324131529e-07, "epoch": 0.15110057899494295, "percentage": 3.02, "elapsed_time": "0:07:11", "remaining_time": "3:50:37", "throughput": 9701.16, "total_tokens": 4183104} +{"current_steps": 6190, "total_steps": 204665, "loss": 0.1853, "lr": 6.047784238041725e-07, "epoch": 0.1512227298267901, "percentage": 3.02, "elapsed_time": "0:07:11", "remaining_time": "3:50:37", "throughput": 9701.42, "total_tokens": 4186624} +{"current_steps": 6195, "total_steps": 204665, "loss": 0.1416, "lr": 6.052670151951922e-07, "epoch": 0.1513448806586373, "percentage": 3.03, "elapsed_time": "0:07:11", "remaining_time": "3:50:36", "throughput": 9700.67, "total_tokens": 4189568} +{"current_steps": 6200, "total_steps": 204665, "loss": 0.0886, "lr": 6.057556065862119e-07, "epoch": 0.15146703149048446, "percentage": 3.03, "elapsed_time": "0:07:12", "remaining_time": "3:50:35", "throughput": 9700.73, "total_tokens": 4192960} +{"current_steps": 6205, "total_steps": 204665, "loss": 0.1363, "lr": 6.062441979772316e-07, "epoch": 0.15158918232233162, "percentage": 3.03, "elapsed_time": "0:07:12", "remaining_time": "3:50:35", "throughput": 9700.21, "total_tokens": 4196032} +{"current_steps": 6210, "total_steps": 204665, "loss": 0.1335, "lr": 6.067327893682514e-07, "epoch": 0.15171133315417878, "percentage": 3.03, "elapsed_time": "0:07:12", "remaining_time": "3:50:35", "throughput": 9700.32, "total_tokens": 4199488} +{"current_steps": 6215, "total_steps": 204665, "loss": 0.1709, "lr": 6.07221380759271e-07, "epoch": 0.15183348398602595, "percentage": 3.04, "elapsed_time": "0:07:13", "remaining_time": "3:50:34", "throughput": 9700.24, "total_tokens": 4202816} +{"current_steps": 6220, "total_steps": 204665, "loss": 0.1961, "lr": 6.077099721502907e-07, "epoch": 0.1519556348178731, "percentage": 3.04, "elapsed_time": "0:07:13", "remaining_time": "3:50:33", "throughput": 9699.45, "total_tokens": 4205760} +{"current_steps": 6225, "total_steps": 204665, "loss": 0.1633, "lr": 6.081985635413104e-07, "epoch": 0.15207778564972027, "percentage": 3.04, "elapsed_time": "0:07:13", "remaining_time": "3:50:33", "throughput": 9699.54, "total_tokens": 4209152} +{"current_steps": 6230, "total_steps": 204665, "loss": 0.2286, "lr": 6.0868715493233e-07, "epoch": 0.15219993648156743, "percentage": 3.04, "elapsed_time": "0:07:14", "remaining_time": "3:50:32", "throughput": 9699.12, "total_tokens": 4212288} +{"current_steps": 6235, "total_steps": 204665, "loss": 0.1305, "lr": 6.091757463233498e-07, "epoch": 0.1523220873134146, "percentage": 3.05, "elapsed_time": "0:07:14", "remaining_time": "3:50:32", "throughput": 9699.27, "total_tokens": 4215744} +{"current_steps": 6240, "total_steps": 204665, "loss": 0.1388, "lr": 6.096643377143695e-07, "epoch": 0.15244423814526176, "percentage": 3.05, "elapsed_time": "0:07:14", "remaining_time": "3:50:32", "throughput": 9699.06, "total_tokens": 4219008} +{"current_steps": 6245, "total_steps": 204665, "loss": 0.0912, "lr": 6.101529291053891e-07, "epoch": 0.15256638897710895, "percentage": 3.05, "elapsed_time": "0:07:15", "remaining_time": "3:50:31", "throughput": 9698.86, "total_tokens": 4222272} +{"current_steps": 6250, "total_steps": 204665, "loss": 0.1363, "lr": 6.106415204964088e-07, "epoch": 0.1526885398089561, "percentage": 3.05, "elapsed_time": "0:07:15", "remaining_time": "3:50:31", "throughput": 9699.19, "total_tokens": 4225792} +{"current_steps": 6255, "total_steps": 204665, "loss": 0.1123, "lr": 6.111301118874285e-07, "epoch": 0.15281069064080327, "percentage": 3.06, "elapsed_time": "0:07:16", "remaining_time": "3:50:30", "throughput": 9698.53, "total_tokens": 4228800} +{"current_steps": 6260, "total_steps": 204665, "loss": 0.1594, "lr": 6.116187032784482e-07, "epoch": 0.15293284147265043, "percentage": 3.06, "elapsed_time": "0:07:16", "remaining_time": "3:50:30", "throughput": 9698.24, "total_tokens": 4232000} +{"current_steps": 6265, "total_steps": 204665, "loss": 0.1191, "lr": 6.121072946694679e-07, "epoch": 0.1530549923044976, "percentage": 3.06, "elapsed_time": "0:07:16", "remaining_time": "3:50:29", "throughput": 9698.12, "total_tokens": 4235328} +{"current_steps": 6270, "total_steps": 204665, "loss": 0.1284, "lr": 6.125958860604877e-07, "epoch": 0.15317714313634476, "percentage": 3.06, "elapsed_time": "0:07:17", "remaining_time": "3:50:29", "throughput": 9698.29, "total_tokens": 4238784} +{"current_steps": 6275, "total_steps": 204665, "loss": 0.161, "lr": 6.130844774515072e-07, "epoch": 0.15329929396819192, "percentage": 3.07, "elapsed_time": "0:07:17", "remaining_time": "3:50:29", "throughput": 9698.22, "total_tokens": 4242112} +{"current_steps": 6280, "total_steps": 204665, "loss": 0.1836, "lr": 6.13573068842527e-07, "epoch": 0.15342144480003908, "percentage": 3.07, "elapsed_time": "0:07:17", "remaining_time": "3:50:28", "throughput": 9698.38, "total_tokens": 4245568} +{"current_steps": 6285, "total_steps": 204665, "loss": 0.1363, "lr": 6.140616602335467e-07, "epoch": 0.15354359563188624, "percentage": 3.07, "elapsed_time": "0:07:18", "remaining_time": "3:50:28", "throughput": 9698.33, "total_tokens": 4248896} +{"current_steps": 6290, "total_steps": 204665, "loss": 0.1812, "lr": 6.145502516245663e-07, "epoch": 0.1536657464637334, "percentage": 3.07, "elapsed_time": "0:07:18", "remaining_time": "3:50:27", "throughput": 9698.05, "total_tokens": 4252096} +{"current_steps": 6295, "total_steps": 204665, "loss": 0.1504, "lr": 6.150388430155861e-07, "epoch": 0.1537878972955806, "percentage": 3.08, "elapsed_time": "0:07:18", "remaining_time": "3:50:27", "throughput": 9697.98, "total_tokens": 4255424} +{"current_steps": 6300, "total_steps": 204665, "loss": 0.1123, "lr": 6.155274344066057e-07, "epoch": 0.15391004812742776, "percentage": 3.08, "elapsed_time": "0:07:19", "remaining_time": "3:50:27", "throughput": 9698.02, "total_tokens": 4258816} +{"current_steps": 6305, "total_steps": 204665, "loss": 0.0778, "lr": 6.160160257976254e-07, "epoch": 0.15403219895927492, "percentage": 3.08, "elapsed_time": "0:07:19", "remaining_time": "3:50:26", "throughput": 9698.12, "total_tokens": 4262272} +{"current_steps": 6310, "total_steps": 204665, "loss": 0.1027, "lr": 6.165046171886451e-07, "epoch": 0.15415434979112208, "percentage": 3.08, "elapsed_time": "0:07:19", "remaining_time": "3:50:26", "throughput": 9697.8, "total_tokens": 4265472} +{"current_steps": 6315, "total_steps": 204665, "loss": 0.0871, "lr": 6.169932085796648e-07, "epoch": 0.15427650062296924, "percentage": 3.09, "elapsed_time": "0:07:20", "remaining_time": "3:50:25", "throughput": 9697.02, "total_tokens": 4268416} +{"current_steps": 6320, "total_steps": 204665, "loss": 0.1307, "lr": 6.174817999706845e-07, "epoch": 0.1543986514548164, "percentage": 3.09, "elapsed_time": "0:07:20", "remaining_time": "3:50:24", "throughput": 9696.08, "total_tokens": 4271232} +{"current_steps": 6325, "total_steps": 204665, "loss": 0.0743, "lr": 6.179703913617041e-07, "epoch": 0.15452080228666357, "percentage": 3.09, "elapsed_time": "0:07:20", "remaining_time": "3:50:24", "throughput": 9696.15, "total_tokens": 4274624} +{"current_steps": 6330, "total_steps": 204665, "loss": 0.0905, "lr": 6.184589827527239e-07, "epoch": 0.15464295311851073, "percentage": 3.09, "elapsed_time": "0:07:21", "remaining_time": "3:50:24", "throughput": 9695.81, "total_tokens": 4277824} +{"current_steps": 6335, "total_steps": 204665, "loss": 0.1555, "lr": 6.189475741437435e-07, "epoch": 0.1547651039503579, "percentage": 3.1, "elapsed_time": "0:07:21", "remaining_time": "3:50:23", "throughput": 9695.09, "total_tokens": 4280768} +{"current_steps": 6340, "total_steps": 204665, "loss": 0.1338, "lr": 6.194361655347633e-07, "epoch": 0.15488725478220508, "percentage": 3.1, "elapsed_time": "0:07:21", "remaining_time": "3:50:22", "throughput": 9694.45, "total_tokens": 4283776} +{"current_steps": 6345, "total_steps": 204665, "loss": 0.0846, "lr": 6.19924756925783e-07, "epoch": 0.15500940561405224, "percentage": 3.1, "elapsed_time": "0:07:22", "remaining_time": "3:50:22", "throughput": 9694.47, "total_tokens": 4287168} +{"current_steps": 6350, "total_steps": 204665, "loss": 0.0945, "lr": 6.204133483168026e-07, "epoch": 0.1551315564458994, "percentage": 3.1, "elapsed_time": "0:07:22", "remaining_time": "3:50:21", "throughput": 9694.41, "total_tokens": 4290496} +{"current_steps": 6355, "total_steps": 204665, "loss": 0.1313, "lr": 6.209019397078223e-07, "epoch": 0.15525370727774657, "percentage": 3.11, "elapsed_time": "0:07:22", "remaining_time": "3:50:21", "throughput": 9695.5, "total_tokens": 4294464} +{"current_steps": 6360, "total_steps": 204665, "loss": 0.113, "lr": 6.21390531098842e-07, "epoch": 0.15537585810959373, "percentage": 3.11, "elapsed_time": "0:07:23", "remaining_time": "3:50:21", "throughput": 9695.84, "total_tokens": 4298048} +{"current_steps": 6365, "total_steps": 204665, "loss": 0.0559, "lr": 6.218791224898617e-07, "epoch": 0.1554980089414409, "percentage": 3.11, "elapsed_time": "0:07:23", "remaining_time": "3:50:20", "throughput": 9694.99, "total_tokens": 4300928} +{"current_steps": 6370, "total_steps": 204665, "loss": 0.1924, "lr": 6.223677138808814e-07, "epoch": 0.15562015977328805, "percentage": 3.11, "elapsed_time": "0:07:23", "remaining_time": "3:50:20", "throughput": 9695.01, "total_tokens": 4304320} +{"current_steps": 6375, "total_steps": 204665, "loss": 0.0631, "lr": 6.228563052719011e-07, "epoch": 0.15574231060513521, "percentage": 3.11, "elapsed_time": "0:07:24", "remaining_time": "3:50:20", "throughput": 9694.81, "total_tokens": 4307584} +{"current_steps": 6380, "total_steps": 204665, "loss": 0.2685, "lr": 6.233448966629207e-07, "epoch": 0.15586446143698238, "percentage": 3.12, "elapsed_time": "0:07:24", "remaining_time": "3:50:19", "throughput": 9694.04, "total_tokens": 4310528} +{"current_steps": 6385, "total_steps": 204665, "loss": 0.1225, "lr": 6.238334880539404e-07, "epoch": 0.15598661226882954, "percentage": 3.12, "elapsed_time": "0:07:25", "remaining_time": "3:50:19", "throughput": 9693.62, "total_tokens": 4313664} +{"current_steps": 6390, "total_steps": 204665, "loss": 0.0834, "lr": 6.243220794449602e-07, "epoch": 0.15610876310067673, "percentage": 3.12, "elapsed_time": "0:07:25", "remaining_time": "3:50:18", "throughput": 9693.61, "total_tokens": 4316992} +{"current_steps": 6395, "total_steps": 204665, "loss": 0.1642, "lr": 6.248106708359798e-07, "epoch": 0.1562309139325239, "percentage": 3.12, "elapsed_time": "0:07:25", "remaining_time": "3:50:18", "throughput": 9693.55, "total_tokens": 4320320} +{"current_steps": 6400, "total_steps": 204665, "loss": 0.1726, "lr": 6.252992622269996e-07, "epoch": 0.15635306476437105, "percentage": 3.13, "elapsed_time": "0:07:26", "remaining_time": "3:50:17", "throughput": 9693.79, "total_tokens": 4323840} +{"current_steps": 6405, "total_steps": 204665, "loss": 0.0551, "lr": 6.257878536180193e-07, "epoch": 0.15647521559621821, "percentage": 3.13, "elapsed_time": "0:07:26", "remaining_time": "3:50:17", "throughput": 9693.66, "total_tokens": 4327104} +{"current_steps": 6410, "total_steps": 204665, "loss": 0.1901, "lr": 6.262764450090388e-07, "epoch": 0.15659736642806538, "percentage": 3.13, "elapsed_time": "0:07:26", "remaining_time": "3:50:17", "throughput": 9693.78, "total_tokens": 4330560} +{"current_steps": 6415, "total_steps": 204665, "loss": 0.1802, "lr": 6.267650364000586e-07, "epoch": 0.15671951725991254, "percentage": 3.13, "elapsed_time": "0:07:27", "remaining_time": "3:50:16", "throughput": 9693.81, "total_tokens": 4333952} +{"current_steps": 6420, "total_steps": 204665, "loss": 0.0586, "lr": 6.272536277910783e-07, "epoch": 0.1568416680917597, "percentage": 3.14, "elapsed_time": "0:07:27", "remaining_time": "3:50:16", "throughput": 9693.7, "total_tokens": 4337280} +{"current_steps": 6425, "total_steps": 204665, "loss": 0.0911, "lr": 6.27742219182098e-07, "epoch": 0.15696381892360686, "percentage": 3.14, "elapsed_time": "0:07:27", "remaining_time": "3:50:16", "throughput": 9693.86, "total_tokens": 4340736} +{"current_steps": 6430, "total_steps": 204665, "loss": 0.1349, "lr": 6.282308105731177e-07, "epoch": 0.15708596975545402, "percentage": 3.14, "elapsed_time": "0:07:28", "remaining_time": "3:50:15", "throughput": 9693.38, "total_tokens": 4343808} +{"current_steps": 6435, "total_steps": 204665, "loss": 0.1372, "lr": 6.287194019641373e-07, "epoch": 0.1572081205873012, "percentage": 3.14, "elapsed_time": "0:07:28", "remaining_time": "3:50:15", "throughput": 9693.28, "total_tokens": 4347136} +{"current_steps": 6440, "total_steps": 204665, "loss": 0.0889, "lr": 6.29207993355157e-07, "epoch": 0.15733027141914838, "percentage": 3.15, "elapsed_time": "0:07:28", "remaining_time": "3:50:14", "throughput": 9693.01, "total_tokens": 4350336} +{"current_steps": 6445, "total_steps": 204665, "loss": 0.099, "lr": 6.296965847461767e-07, "epoch": 0.15745242225099554, "percentage": 3.15, "elapsed_time": "0:07:29", "remaining_time": "3:50:14", "throughput": 9692.83, "total_tokens": 4353600} +{"current_steps": 6450, "total_steps": 204665, "loss": 0.2071, "lr": 6.301851761371965e-07, "epoch": 0.1575745730828427, "percentage": 3.15, "elapsed_time": "0:07:29", "remaining_time": "3:50:13", "throughput": 9693.0, "total_tokens": 4357056} +{"current_steps": 6455, "total_steps": 204665, "loss": 0.2063, "lr": 6.306737675282161e-07, "epoch": 0.15769672391468986, "percentage": 3.15, "elapsed_time": "0:07:29", "remaining_time": "3:50:13", "throughput": 9693.47, "total_tokens": 4360704} +{"current_steps": 6460, "total_steps": 204665, "loss": 0.1524, "lr": 6.311623589192359e-07, "epoch": 0.15781887474653702, "percentage": 3.16, "elapsed_time": "0:07:30", "remaining_time": "3:50:13", "throughput": 9693.98, "total_tokens": 4364352} +{"current_steps": 6465, "total_steps": 204665, "loss": 0.1161, "lr": 6.316509503102555e-07, "epoch": 0.1579410255783842, "percentage": 3.16, "elapsed_time": "0:07:30", "remaining_time": "3:50:13", "throughput": 9694.81, "total_tokens": 4368192} +{"current_steps": 6470, "total_steps": 204665, "loss": 0.1139, "lr": 6.321395417012751e-07, "epoch": 0.15806317641023135, "percentage": 3.16, "elapsed_time": "0:07:30", "remaining_time": "3:50:12", "throughput": 9694.62, "total_tokens": 4371456} +{"current_steps": 6475, "total_steps": 204665, "loss": 0.1646, "lr": 6.326281330922949e-07, "epoch": 0.1581853272420785, "percentage": 3.16, "elapsed_time": "0:07:31", "remaining_time": "3:50:12", "throughput": 9693.93, "total_tokens": 4374400} +{"current_steps": 6480, "total_steps": 204665, "loss": 0.1281, "lr": 6.331167244833146e-07, "epoch": 0.15830747807392567, "percentage": 3.17, "elapsed_time": "0:07:31", "remaining_time": "3:50:11", "throughput": 9694.0, "total_tokens": 4377792} +{"current_steps": 6485, "total_steps": 204665, "loss": 0.1209, "lr": 6.336053158743343e-07, "epoch": 0.15842962890577283, "percentage": 3.17, "elapsed_time": "0:07:31", "remaining_time": "3:50:11", "throughput": 9693.95, "total_tokens": 4381120} +{"current_steps": 6490, "total_steps": 204665, "loss": 0.2482, "lr": 6.34093907265354e-07, "epoch": 0.15855177973762002, "percentage": 3.17, "elapsed_time": "0:07:32", "remaining_time": "3:50:10", "throughput": 9693.84, "total_tokens": 4384448} +{"current_steps": 6495, "total_steps": 204665, "loss": 0.113, "lr": 6.345824986563737e-07, "epoch": 0.1586739305694672, "percentage": 3.17, "elapsed_time": "0:07:32", "remaining_time": "3:50:10", "throughput": 9693.36, "total_tokens": 4387520} +{"current_steps": 6500, "total_steps": 204665, "loss": 0.0973, "lr": 6.350710900473933e-07, "epoch": 0.15879608140131435, "percentage": 3.18, "elapsed_time": "0:07:32", "remaining_time": "3:50:09", "throughput": 9693.65, "total_tokens": 4391040} +{"current_steps": 6505, "total_steps": 204665, "loss": 0.151, "lr": 6.35559681438413e-07, "epoch": 0.1589182322331615, "percentage": 3.18, "elapsed_time": "0:07:33", "remaining_time": "3:50:10", "throughput": 9694.42, "total_tokens": 4394880} +{"current_steps": 6510, "total_steps": 204665, "loss": 0.0759, "lr": 6.360482728294328e-07, "epoch": 0.15904038306500867, "percentage": 3.18, "elapsed_time": "0:07:33", "remaining_time": "3:50:09", "throughput": 9693.72, "total_tokens": 4397824} +{"current_steps": 6515, "total_steps": 204665, "loss": 0.0951, "lr": 6.365368642204524e-07, "epoch": 0.15916253389685583, "percentage": 3.18, "elapsed_time": "0:07:34", "remaining_time": "3:50:08", "throughput": 9693.66, "total_tokens": 4401152} +{"current_steps": 6520, "total_steps": 204665, "loss": 0.0996, "lr": 6.370254556114721e-07, "epoch": 0.159284684728703, "percentage": 3.19, "elapsed_time": "0:07:34", "remaining_time": "3:50:08", "throughput": 9693.39, "total_tokens": 4404352} +{"current_steps": 6525, "total_steps": 204665, "loss": 0.1367, "lr": 6.375140470024918e-07, "epoch": 0.15940683556055016, "percentage": 3.19, "elapsed_time": "0:07:34", "remaining_time": "3:50:07", "throughput": 9692.98, "total_tokens": 4407488} +{"current_steps": 6530, "total_steps": 204665, "loss": 0.0958, "lr": 6.380026383935114e-07, "epoch": 0.15952898639239732, "percentage": 3.19, "elapsed_time": "0:07:35", "remaining_time": "3:50:07", "throughput": 9692.92, "total_tokens": 4410816} +{"current_steps": 6535, "total_steps": 204665, "loss": 0.015, "lr": 6.384912297845312e-07, "epoch": 0.1596511372242445, "percentage": 3.19, "elapsed_time": "0:07:35", "remaining_time": "3:50:06", "throughput": 9692.65, "total_tokens": 4414016} +{"current_steps": 6540, "total_steps": 204665, "loss": 0.271, "lr": 6.389798211755509e-07, "epoch": 0.15977328805609167, "percentage": 3.2, "elapsed_time": "0:07:35", "remaining_time": "3:50:06", "throughput": 9692.79, "total_tokens": 4417472} +{"current_steps": 6545, "total_steps": 204665, "loss": 0.1196, "lr": 6.394684125665705e-07, "epoch": 0.15989543888793883, "percentage": 3.2, "elapsed_time": "0:07:36", "remaining_time": "3:50:05", "throughput": 9692.35, "total_tokens": 4420544} +{"current_steps": 6550, "total_steps": 204665, "loss": 0.0459, "lr": 6.399570039575902e-07, "epoch": 0.160017589719786, "percentage": 3.2, "elapsed_time": "0:07:36", "remaining_time": "3:50:05", "throughput": 9692.36, "total_tokens": 4423936} +{"current_steps": 6555, "total_steps": 204665, "loss": 0.1126, "lr": 6.4044559534861e-07, "epoch": 0.16013974055163316, "percentage": 3.2, "elapsed_time": "0:07:36", "remaining_time": "3:50:05", "throughput": 9692.73, "total_tokens": 4427520} +{"current_steps": 6560, "total_steps": 204665, "loss": 0.1337, "lr": 6.409341867396296e-07, "epoch": 0.16026189138348032, "percentage": 3.21, "elapsed_time": "0:07:37", "remaining_time": "3:50:05", "throughput": 9693.45, "total_tokens": 4431296} +{"current_steps": 6565, "total_steps": 204665, "loss": 0.2227, "lr": 6.414227781306493e-07, "epoch": 0.16038404221532748, "percentage": 3.21, "elapsed_time": "0:07:37", "remaining_time": "3:50:04", "throughput": 9692.67, "total_tokens": 4434176} +{"current_steps": 6570, "total_steps": 204665, "loss": 0.0667, "lr": 6.419113695216691e-07, "epoch": 0.16050619304717464, "percentage": 3.21, "elapsed_time": "0:07:37", "remaining_time": "3:50:03", "throughput": 9692.8, "total_tokens": 4437568} +{"current_steps": 6575, "total_steps": 204665, "loss": 0.0748, "lr": 6.423999609126886e-07, "epoch": 0.1606283438790218, "percentage": 3.21, "elapsed_time": "0:07:38", "remaining_time": "3:50:03", "throughput": 9692.88, "total_tokens": 4440960} +{"current_steps": 6580, "total_steps": 204665, "loss": 0.1785, "lr": 6.428885523037084e-07, "epoch": 0.16075049471086897, "percentage": 3.22, "elapsed_time": "0:07:38", "remaining_time": "3:50:03", "throughput": 9692.69, "total_tokens": 4444224} +{"current_steps": 6585, "total_steps": 204665, "loss": 0.1116, "lr": 6.433771436947281e-07, "epoch": 0.16087264554271616, "percentage": 3.22, "elapsed_time": "0:07:38", "remaining_time": "3:50:02", "throughput": 9692.42, "total_tokens": 4447424} +{"current_steps": 6590, "total_steps": 204665, "loss": 0.178, "lr": 6.438657350857477e-07, "epoch": 0.16099479637456332, "percentage": 3.22, "elapsed_time": "0:07:39", "remaining_time": "3:50:02", "throughput": 9692.28, "total_tokens": 4450688} +{"current_steps": 6595, "total_steps": 204665, "loss": 0.2095, "lr": 6.443543264767675e-07, "epoch": 0.16111694720641048, "percentage": 3.22, "elapsed_time": "0:07:39", "remaining_time": "3:50:01", "throughput": 9692.8, "total_tokens": 4454336} +{"current_steps": 6600, "total_steps": 204665, "loss": 0.0842, "lr": 6.448429178677871e-07, "epoch": 0.16123909803825764, "percentage": 3.22, "elapsed_time": "0:07:39", "remaining_time": "3:50:01", "throughput": 9692.96, "total_tokens": 4457792} +{"current_steps": 6605, "total_steps": 204665, "loss": 0.1051, "lr": 6.453315092588068e-07, "epoch": 0.1613612488701048, "percentage": 3.23, "elapsed_time": "0:07:40", "remaining_time": "3:50:01", "throughput": 9692.69, "total_tokens": 4460992} +{"current_steps": 6610, "total_steps": 204665, "loss": 0.1712, "lr": 6.458201006498265e-07, "epoch": 0.16148339970195197, "percentage": 3.23, "elapsed_time": "0:07:40", "remaining_time": "3:50:00", "throughput": 9693.0, "total_tokens": 4464576} +{"current_steps": 6615, "total_steps": 204665, "loss": 0.1776, "lr": 6.463086920408463e-07, "epoch": 0.16160555053379913, "percentage": 3.23, "elapsed_time": "0:07:40", "remaining_time": "3:50:00", "throughput": 9692.45, "total_tokens": 4467584} +{"current_steps": 6620, "total_steps": 204665, "loss": 0.2165, "lr": 6.467972834318659e-07, "epoch": 0.1617277013656463, "percentage": 3.23, "elapsed_time": "0:07:41", "remaining_time": "3:49:59", "throughput": 9692.26, "total_tokens": 4470848} +{"current_steps": 6625, "total_steps": 204665, "loss": 0.2397, "lr": 6.472858748228856e-07, "epoch": 0.16184985219749345, "percentage": 3.24, "elapsed_time": "0:07:41", "remaining_time": "3:49:59", "throughput": 9692.5, "total_tokens": 4474368} +{"current_steps": 6630, "total_steps": 204665, "loss": 0.1456, "lr": 6.477744662139053e-07, "epoch": 0.16197200302934062, "percentage": 3.24, "elapsed_time": "0:07:41", "remaining_time": "3:49:59", "throughput": 9693.33, "total_tokens": 4478208} +{"current_steps": 6635, "total_steps": 204665, "loss": 0.1784, "lr": 6.482630576049249e-07, "epoch": 0.1620941538611878, "percentage": 3.24, "elapsed_time": "0:07:42", "remaining_time": "3:49:59", "throughput": 9693.49, "total_tokens": 4481664} +{"current_steps": 6640, "total_steps": 204665, "loss": 0.1122, "lr": 6.487516489959447e-07, "epoch": 0.16221630469303497, "percentage": 3.24, "elapsed_time": "0:07:42", "remaining_time": "3:49:58", "throughput": 9693.01, "total_tokens": 4484736} +{"current_steps": 6645, "total_steps": 204665, "loss": 0.0607, "lr": 6.492402403869644e-07, "epoch": 0.16233845552488213, "percentage": 3.25, "elapsed_time": "0:07:43", "remaining_time": "3:49:58", "throughput": 9694.6, "total_tokens": 4489024} +{"current_steps": 6650, "total_steps": 204665, "loss": 0.0822, "lr": 6.49728831777984e-07, "epoch": 0.1624606063567293, "percentage": 3.25, "elapsed_time": "0:07:43", "remaining_time": "3:49:58", "throughput": 9694.53, "total_tokens": 4492352} +{"current_steps": 6655, "total_steps": 204665, "loss": 0.1032, "lr": 6.502174231690037e-07, "epoch": 0.16258275718857645, "percentage": 3.25, "elapsed_time": "0:07:43", "remaining_time": "3:49:57", "throughput": 9693.88, "total_tokens": 4495360} +{"current_steps": 6660, "total_steps": 204665, "loss": 0.0636, "lr": 6.507060145600234e-07, "epoch": 0.16270490802042362, "percentage": 3.25, "elapsed_time": "0:07:44", "remaining_time": "3:49:57", "throughput": 9693.9, "total_tokens": 4498752} +{"current_steps": 6665, "total_steps": 204665, "loss": 0.2362, "lr": 6.511946059510431e-07, "epoch": 0.16282705885227078, "percentage": 3.26, "elapsed_time": "0:07:44", "remaining_time": "3:49:56", "throughput": 9693.29, "total_tokens": 4501760} +{"current_steps": 6670, "total_steps": 204665, "loss": 0.243, "lr": 6.516831973420628e-07, "epoch": 0.16294920968411794, "percentage": 3.26, "elapsed_time": "0:07:44", "remaining_time": "3:49:56", "throughput": 9693.68, "total_tokens": 4505344} +{"current_steps": 6675, "total_steps": 204665, "loss": 0.093, "lr": 6.521717887330826e-07, "epoch": 0.1630713605159651, "percentage": 3.26, "elapsed_time": "0:07:45", "remaining_time": "3:49:56", "throughput": 9693.62, "total_tokens": 4508672} +{"current_steps": 6680, "total_steps": 204665, "loss": 0.1341, "lr": 6.526603801241022e-07, "epoch": 0.1631935113478123, "percentage": 3.26, "elapsed_time": "0:07:45", "remaining_time": "3:49:56", "throughput": 9694.85, "total_tokens": 4512768} +{"current_steps": 6685, "total_steps": 204665, "loss": 0.1077, "lr": 6.531489715151218e-07, "epoch": 0.16331566217965945, "percentage": 3.27, "elapsed_time": "0:07:45", "remaining_time": "3:49:55", "throughput": 9694.36, "total_tokens": 4515840} +{"current_steps": 6690, "total_steps": 204665, "loss": 0.1545, "lr": 6.536375629061416e-07, "epoch": 0.16343781301150662, "percentage": 3.27, "elapsed_time": "0:07:46", "remaining_time": "3:49:55", "throughput": 9694.37, "total_tokens": 4519232} +{"current_steps": 6695, "total_steps": 204665, "loss": 0.1031, "lr": 6.541261542971612e-07, "epoch": 0.16355996384335378, "percentage": 3.27, "elapsed_time": "0:07:46", "remaining_time": "3:49:55", "throughput": 9694.73, "total_tokens": 4522816} +{"current_steps": 6700, "total_steps": 204665, "loss": 0.1403, "lr": 6.54614745688181e-07, "epoch": 0.16368211467520094, "percentage": 3.27, "elapsed_time": "0:07:46", "remaining_time": "3:49:54", "throughput": 9694.26, "total_tokens": 4525888} +{"current_steps": 6705, "total_steps": 204665, "loss": 0.1446, "lr": 6.551033370792007e-07, "epoch": 0.1638042655070481, "percentage": 3.28, "elapsed_time": "0:07:47", "remaining_time": "3:49:54", "throughput": 9694.28, "total_tokens": 4529280} +{"current_steps": 6710, "total_steps": 204665, "loss": 0.186, "lr": 6.555919284702204e-07, "epoch": 0.16392641633889526, "percentage": 3.28, "elapsed_time": "0:07:47", "remaining_time": "3:49:53", "throughput": 9694.08, "total_tokens": 4532544} +{"current_steps": 6715, "total_steps": 204665, "loss": 0.1599, "lr": 6.5608051986124e-07, "epoch": 0.16404856717074243, "percentage": 3.28, "elapsed_time": "0:07:47", "remaining_time": "3:49:52", "throughput": 9693.14, "total_tokens": 4535360} +{"current_steps": 6720, "total_steps": 204665, "loss": 0.1133, "lr": 6.565691112522597e-07, "epoch": 0.1641707180025896, "percentage": 3.28, "elapsed_time": "0:07:48", "remaining_time": "3:49:52", "throughput": 9692.53, "total_tokens": 4538368} +{"current_steps": 6725, "total_steps": 204665, "loss": 0.0491, "lr": 6.570577026432794e-07, "epoch": 0.16429286883443675, "percentage": 3.29, "elapsed_time": "0:07:48", "remaining_time": "3:49:51", "throughput": 9692.39, "total_tokens": 4541632} +{"current_steps": 6730, "total_steps": 204665, "loss": 0.1565, "lr": 6.575462940342991e-07, "epoch": 0.16441501966628394, "percentage": 3.29, "elapsed_time": "0:07:48", "remaining_time": "3:49:51", "throughput": 9691.58, "total_tokens": 4544512} +{"current_steps": 6735, "total_steps": 204665, "loss": 0.102, "lr": 6.580348854253189e-07, "epoch": 0.1645371704981311, "percentage": 3.29, "elapsed_time": "0:07:49", "remaining_time": "3:49:50", "throughput": 9691.39, "total_tokens": 4547776} +{"current_steps": 6740, "total_steps": 204665, "loss": 0.1985, "lr": 6.585234768163384e-07, "epoch": 0.16465932132997826, "percentage": 3.29, "elapsed_time": "0:07:49", "remaining_time": "3:49:50", "throughput": 9691.01, "total_tokens": 4550912} +{"current_steps": 6745, "total_steps": 204665, "loss": 0.0903, "lr": 6.590120682073581e-07, "epoch": 0.16478147216182543, "percentage": 3.3, "elapsed_time": "0:07:49", "remaining_time": "3:49:49", "throughput": 9690.7, "total_tokens": 4554112} +{"current_steps": 6750, "total_steps": 204665, "loss": 0.18, "lr": 6.595006595983779e-07, "epoch": 0.1649036229936726, "percentage": 3.3, "elapsed_time": "0:07:50", "remaining_time": "3:49:49", "throughput": 9690.09, "total_tokens": 4557120} +{"current_steps": 6755, "total_steps": 204665, "loss": 0.0676, "lr": 6.599892509893975e-07, "epoch": 0.16502577382551975, "percentage": 3.3, "elapsed_time": "0:07:50", "remaining_time": "3:49:48", "throughput": 9690.28, "total_tokens": 4560576} +{"current_steps": 6760, "total_steps": 204665, "loss": 0.1992, "lr": 6.604778423804173e-07, "epoch": 0.1651479246573669, "percentage": 3.3, "elapsed_time": "0:07:50", "remaining_time": "3:49:48", "throughput": 9690.48, "total_tokens": 4564096} +{"current_steps": 6765, "total_steps": 204665, "loss": 0.1163, "lr": 6.60966433771437e-07, "epoch": 0.16527007548921407, "percentage": 3.31, "elapsed_time": "0:07:51", "remaining_time": "3:49:48", "throughput": 9689.84, "total_tokens": 4567104} +{"current_steps": 6770, "total_steps": 204665, "loss": 0.1756, "lr": 6.614550251624566e-07, "epoch": 0.16539222632106124, "percentage": 3.31, "elapsed_time": "0:07:51", "remaining_time": "3:49:47", "throughput": 9689.75, "total_tokens": 4570496} +{"current_steps": 6775, "total_steps": 204665, "loss": 0.0301, "lr": 6.619436165534763e-07, "epoch": 0.1655143771529084, "percentage": 3.31, "elapsed_time": "0:07:52", "remaining_time": "3:49:49", "throughput": 9693.03, "total_tokens": 4576000} +{"current_steps": 6780, "total_steps": 204665, "loss": 0.1214, "lr": 6.62432207944496e-07, "epoch": 0.1656365279847556, "percentage": 3.31, "elapsed_time": "0:07:52", "remaining_time": "3:49:48", "throughput": 9693.03, "total_tokens": 4579392} +{"current_steps": 6785, "total_steps": 204665, "loss": 0.0789, "lr": 6.629207993355157e-07, "epoch": 0.16575867881660275, "percentage": 3.32, "elapsed_time": "0:07:52", "remaining_time": "3:49:48", "throughput": 9692.76, "total_tokens": 4582656} +{"current_steps": 6790, "total_steps": 204665, "loss": 0.1426, "lr": 6.634093907265354e-07, "epoch": 0.1658808296484499, "percentage": 3.32, "elapsed_time": "0:07:53", "remaining_time": "3:49:48", "throughput": 9693.15, "total_tokens": 4586304} +{"current_steps": 6795, "total_steps": 204665, "loss": 0.0402, "lr": 6.638979821175551e-07, "epoch": 0.16600298048029707, "percentage": 3.32, "elapsed_time": "0:07:53", "remaining_time": "3:49:48", "throughput": 9692.64, "total_tokens": 4589376} +{"current_steps": 6800, "total_steps": 204665, "loss": 0.1095, "lr": 6.643865735085747e-07, "epoch": 0.16612513131214424, "percentage": 3.32, "elapsed_time": "0:07:53", "remaining_time": "3:49:47", "throughput": 9692.57, "total_tokens": 4592704} +{"current_steps": 6805, "total_steps": 204665, "loss": 0.1261, "lr": 6.648751648995944e-07, "epoch": 0.1662472821439914, "percentage": 3.32, "elapsed_time": "0:07:54", "remaining_time": "3:49:47", "throughput": 9693.54, "total_tokens": 4596672} +{"current_steps": 6810, "total_steps": 204665, "loss": 0.1199, "lr": 6.653637562906142e-07, "epoch": 0.16636943297583856, "percentage": 3.33, "elapsed_time": "0:07:54", "remaining_time": "3:49:47", "throughput": 9693.63, "total_tokens": 4600128} +{"current_steps": 6815, "total_steps": 204665, "loss": 0.1429, "lr": 6.658523476816338e-07, "epoch": 0.16649158380768572, "percentage": 3.33, "elapsed_time": "0:07:54", "remaining_time": "3:49:46", "throughput": 9693.2, "total_tokens": 4603200} +{"current_steps": 6820, "total_steps": 204665, "loss": 0.1214, "lr": 6.663409390726536e-07, "epoch": 0.16661373463953288, "percentage": 3.33, "elapsed_time": "0:07:55", "remaining_time": "3:49:46", "throughput": 9692.59, "total_tokens": 4606208} +{"current_steps": 6825, "total_steps": 204665, "loss": 0.0518, "lr": 6.668295304636732e-07, "epoch": 0.16673588547138005, "percentage": 3.33, "elapsed_time": "0:07:55", "remaining_time": "3:49:45", "throughput": 9692.13, "total_tokens": 4609280} +{"current_steps": 6830, "total_steps": 204665, "loss": 0.1066, "lr": 6.673181218546929e-07, "epoch": 0.16685803630322724, "percentage": 3.34, "elapsed_time": "0:07:55", "remaining_time": "3:49:44", "throughput": 9691.68, "total_tokens": 4612352} +{"current_steps": 6835, "total_steps": 204665, "loss": 0.1821, "lr": 6.678067132457126e-07, "epoch": 0.1669801871350744, "percentage": 3.34, "elapsed_time": "0:07:56", "remaining_time": "3:49:44", "throughput": 9691.73, "total_tokens": 4615744} +{"current_steps": 6840, "total_steps": 204665, "loss": 0.3838, "lr": 6.682953046367323e-07, "epoch": 0.16710233796692156, "percentage": 3.34, "elapsed_time": "0:07:56", "remaining_time": "3:49:44", "throughput": 9692.16, "total_tokens": 4619392} +{"current_steps": 6845, "total_steps": 204665, "loss": 0.0645, "lr": 6.68783896027752e-07, "epoch": 0.16722448879876872, "percentage": 3.34, "elapsed_time": "0:07:56", "remaining_time": "3:49:43", "throughput": 9691.48, "total_tokens": 4622336} +{"current_steps": 6850, "total_steps": 204665, "loss": 0.1702, "lr": 6.692724874187716e-07, "epoch": 0.16734663963061588, "percentage": 3.35, "elapsed_time": "0:07:57", "remaining_time": "3:49:43", "throughput": 9690.76, "total_tokens": 4625280} +{"current_steps": 6855, "total_steps": 204665, "loss": 0.2138, "lr": 6.697610788097914e-07, "epoch": 0.16746879046246305, "percentage": 3.35, "elapsed_time": "0:07:57", "remaining_time": "3:49:42", "throughput": 9690.56, "total_tokens": 4628544} +{"current_steps": 6860, "total_steps": 204665, "loss": 0.1501, "lr": 6.70249670200811e-07, "epoch": 0.1675909412943102, "percentage": 3.35, "elapsed_time": "0:07:57", "remaining_time": "3:49:42", "throughput": 9691.31, "total_tokens": 4632384} +{"current_steps": 6865, "total_steps": 204665, "loss": 0.0843, "lr": 6.707382615918307e-07, "epoch": 0.16771309212615737, "percentage": 3.35, "elapsed_time": "0:07:58", "remaining_time": "3:49:42", "throughput": 9691.23, "total_tokens": 4635712} +{"current_steps": 6870, "total_steps": 204665, "loss": 0.0591, "lr": 6.712268529828505e-07, "epoch": 0.16783524295800453, "percentage": 3.36, "elapsed_time": "0:07:58", "remaining_time": "3:49:41", "throughput": 9690.82, "total_tokens": 4638848} +{"current_steps": 6875, "total_steps": 204665, "loss": 0.0569, "lr": 6.7171544437387e-07, "epoch": 0.16795739378985172, "percentage": 3.36, "elapsed_time": "0:07:59", "remaining_time": "3:49:41", "throughput": 9690.05, "total_tokens": 4641728} +{"current_steps": 6880, "total_steps": 204665, "loss": 0.2355, "lr": 6.722040357648898e-07, "epoch": 0.16807954462169888, "percentage": 3.36, "elapsed_time": "0:07:59", "remaining_time": "3:49:40", "throughput": 9689.82, "total_tokens": 4644992} +{"current_steps": 6885, "total_steps": 204665, "loss": 0.1886, "lr": 6.726926271559095e-07, "epoch": 0.16820169545354605, "percentage": 3.36, "elapsed_time": "0:07:59", "remaining_time": "3:49:40", "throughput": 9689.08, "total_tokens": 4647872} +{"current_steps": 6890, "total_steps": 204665, "loss": 0.1627, "lr": 6.731812185469292e-07, "epoch": 0.1683238462853932, "percentage": 3.37, "elapsed_time": "0:08:00", "remaining_time": "3:49:39", "throughput": 9689.48, "total_tokens": 4651520} +{"current_steps": 6895, "total_steps": 204665, "loss": 0.1422, "lr": 6.736698099379489e-07, "epoch": 0.16844599711724037, "percentage": 3.37, "elapsed_time": "0:08:00", "remaining_time": "3:49:39", "throughput": 9690.45, "total_tokens": 4655488} +{"current_steps": 6900, "total_steps": 204665, "loss": 0.2005, "lr": 6.741584013289686e-07, "epoch": 0.16856814794908753, "percentage": 3.37, "elapsed_time": "0:08:00", "remaining_time": "3:49:39", "throughput": 9690.59, "total_tokens": 4658944} +{"current_steps": 6905, "total_steps": 204665, "loss": 0.2195, "lr": 6.746469927199882e-07, "epoch": 0.1686902987809347, "percentage": 3.37, "elapsed_time": "0:08:01", "remaining_time": "3:49:39", "throughput": 9689.99, "total_tokens": 4661952} +{"current_steps": 6910, "total_steps": 204665, "loss": 0.2332, "lr": 6.751355841110079e-07, "epoch": 0.16881244961278186, "percentage": 3.38, "elapsed_time": "0:08:01", "remaining_time": "3:49:38", "throughput": 9690.23, "total_tokens": 4665472} +{"current_steps": 6915, "total_steps": 204665, "loss": 0.1836, "lr": 6.756241755020277e-07, "epoch": 0.16893460044462902, "percentage": 3.38, "elapsed_time": "0:08:01", "remaining_time": "3:49:38", "throughput": 9690.17, "total_tokens": 4668800} +{"current_steps": 6920, "total_steps": 204665, "loss": 0.132, "lr": 6.761127668930473e-07, "epoch": 0.16905675127647618, "percentage": 3.38, "elapsed_time": "0:08:02", "remaining_time": "3:49:38", "throughput": 9690.72, "total_tokens": 4672512} +{"current_steps": 6925, "total_steps": 204665, "loss": 0.1959, "lr": 6.766013582840671e-07, "epoch": 0.16917890210832337, "percentage": 3.38, "elapsed_time": "0:08:02", "remaining_time": "3:49:37", "throughput": 9690.73, "total_tokens": 4675904} +{"current_steps": 6930, "total_steps": 204665, "loss": 0.0929, "lr": 6.770899496750868e-07, "epoch": 0.16930105294017053, "percentage": 3.39, "elapsed_time": "0:08:02", "remaining_time": "3:49:37", "throughput": 9690.99, "total_tokens": 4679424} +{"current_steps": 6935, "total_steps": 204665, "loss": 0.1339, "lr": 6.775785410661063e-07, "epoch": 0.1694232037720177, "percentage": 3.39, "elapsed_time": "0:08:03", "remaining_time": "3:49:37", "throughput": 9692.18, "total_tokens": 4683520} +{"current_steps": 6940, "total_steps": 204665, "loss": 0.138, "lr": 6.780671324571261e-07, "epoch": 0.16954535460386486, "percentage": 3.39, "elapsed_time": "0:08:03", "remaining_time": "3:49:37", "throughput": 9692.16, "total_tokens": 4686848} +{"current_steps": 6945, "total_steps": 204665, "loss": 0.068, "lr": 6.785557238481458e-07, "epoch": 0.16966750543571202, "percentage": 3.39, "elapsed_time": "0:08:03", "remaining_time": "3:49:37", "throughput": 9692.24, "total_tokens": 4690304} +{"current_steps": 6950, "total_steps": 204665, "loss": 0.0549, "lr": 6.790443152391655e-07, "epoch": 0.16978965626755918, "percentage": 3.4, "elapsed_time": "0:08:04", "remaining_time": "3:49:36", "throughput": 9692.17, "total_tokens": 4693632} +{"current_steps": 6955, "total_steps": 204665, "loss": 0.104, "lr": 6.795329066301852e-07, "epoch": 0.16991180709940634, "percentage": 3.4, "elapsed_time": "0:08:04", "remaining_time": "3:49:36", "throughput": 9691.92, "total_tokens": 4696832} +{"current_steps": 6960, "total_steps": 204665, "loss": 0.1351, "lr": 6.800214980212048e-07, "epoch": 0.1700339579312535, "percentage": 3.4, "elapsed_time": "0:08:04", "remaining_time": "3:49:35", "throughput": 9692.72, "total_tokens": 4700672} +{"current_steps": 6965, "total_steps": 204665, "loss": 0.1338, "lr": 6.805100894122245e-07, "epoch": 0.17015610876310067, "percentage": 3.4, "elapsed_time": "0:08:05", "remaining_time": "3:49:35", "throughput": 9693.27, "total_tokens": 4704384} +{"current_steps": 6970, "total_steps": 204665, "loss": 0.0839, "lr": 6.809986808032442e-07, "epoch": 0.17027825959494783, "percentage": 3.41, "elapsed_time": "0:08:05", "remaining_time": "3:49:35", "throughput": 9693.15, "total_tokens": 4707712} +{"current_steps": 6975, "total_steps": 204665, "loss": 0.0875, "lr": 6.81487272194264e-07, "epoch": 0.17040041042679502, "percentage": 3.41, "elapsed_time": "0:08:06", "remaining_time": "3:49:34", "throughput": 9692.52, "total_tokens": 4710720} +{"current_steps": 6980, "total_steps": 204665, "loss": 0.0822, "lr": 6.819758635852836e-07, "epoch": 0.17052256125864218, "percentage": 3.41, "elapsed_time": "0:08:06", "remaining_time": "3:49:34", "throughput": 9692.41, "total_tokens": 4714048} +{"current_steps": 6985, "total_steps": 204665, "loss": 0.1104, "lr": 6.824644549763034e-07, "epoch": 0.17064471209048934, "percentage": 3.41, "elapsed_time": "0:08:06", "remaining_time": "3:49:34", "throughput": 9692.24, "total_tokens": 4717312} +{"current_steps": 6990, "total_steps": 204665, "loss": 0.0742, "lr": 6.82953046367323e-07, "epoch": 0.1707668629223365, "percentage": 3.42, "elapsed_time": "0:08:07", "remaining_time": "3:49:34", "throughput": 9693.0, "total_tokens": 4721152} +{"current_steps": 6995, "total_steps": 204665, "loss": 0.1043, "lr": 6.834416377583426e-07, "epoch": 0.17088901375418367, "percentage": 3.42, "elapsed_time": "0:08:07", "remaining_time": "3:49:33", "throughput": 9692.81, "total_tokens": 4724416} +{"current_steps": 7000, "total_steps": 204665, "loss": 0.1324, "lr": 6.839302291493624e-07, "epoch": 0.17101116458603083, "percentage": 3.42, "elapsed_time": "0:08:07", "remaining_time": "3:49:33", "throughput": 9692.9, "total_tokens": 4727872} +{"current_steps": 7005, "total_steps": 204665, "loss": 0.2628, "lr": 6.844188205403821e-07, "epoch": 0.171133315417878, "percentage": 3.42, "elapsed_time": "0:08:08", "remaining_time": "3:49:33", "throughput": 9693.07, "total_tokens": 4731328} +{"current_steps": 7010, "total_steps": 204665, "loss": 0.1661, "lr": 6.849074119314018e-07, "epoch": 0.17125546624972515, "percentage": 3.43, "elapsed_time": "0:08:08", "remaining_time": "3:49:32", "throughput": 9693.48, "total_tokens": 4734912} +{"current_steps": 7015, "total_steps": 204665, "loss": 0.0935, "lr": 6.853960033224214e-07, "epoch": 0.17137761708157231, "percentage": 3.43, "elapsed_time": "0:08:08", "remaining_time": "3:49:32", "throughput": 9694.32, "total_tokens": 4738816} +{"current_steps": 7020, "total_steps": 204665, "loss": 0.1215, "lr": 6.85884594713441e-07, "epoch": 0.1714997679134195, "percentage": 3.43, "elapsed_time": "0:08:09", "remaining_time": "3:49:32", "throughput": 9693.81, "total_tokens": 4741888} +{"current_steps": 7025, "total_steps": 204665, "loss": 0.1481, "lr": 6.863731861044608e-07, "epoch": 0.17162191874526667, "percentage": 3.43, "elapsed_time": "0:08:09", "remaining_time": "3:49:31", "throughput": 9693.6, "total_tokens": 4745152} +{"current_steps": 7030, "total_steps": 204665, "loss": 0.1287, "lr": 6.868617774954805e-07, "epoch": 0.17174406957711383, "percentage": 3.43, "elapsed_time": "0:08:09", "remaining_time": "3:49:31", "throughput": 9693.69, "total_tokens": 4748608} +{"current_steps": 7035, "total_steps": 204665, "loss": 0.1947, "lr": 6.873503688865003e-07, "epoch": 0.171866220408961, "percentage": 3.44, "elapsed_time": "0:08:10", "remaining_time": "3:49:31", "throughput": 9692.99, "total_tokens": 4751552} +{"current_steps": 7040, "total_steps": 204665, "loss": 0.0742, "lr": 6.878389602775198e-07, "epoch": 0.17198837124080815, "percentage": 3.44, "elapsed_time": "0:08:10", "remaining_time": "3:49:30", "throughput": 9692.65, "total_tokens": 4754688} +{"current_steps": 7045, "total_steps": 204665, "loss": 0.1217, "lr": 6.883275516685396e-07, "epoch": 0.17211052207265531, "percentage": 3.44, "elapsed_time": "0:08:10", "remaining_time": "3:49:29", "throughput": 9692.15, "total_tokens": 4757760} +{"current_steps": 7050, "total_steps": 204665, "loss": 0.2113, "lr": 6.888161430595593e-07, "epoch": 0.17223267290450248, "percentage": 3.44, "elapsed_time": "0:08:11", "remaining_time": "3:49:29", "throughput": 9691.76, "total_tokens": 4760896} +{"current_steps": 7055, "total_steps": 204665, "loss": 0.1273, "lr": 6.893047344505789e-07, "epoch": 0.17235482373634964, "percentage": 3.45, "elapsed_time": "0:08:11", "remaining_time": "3:49:28", "throughput": 9691.22, "total_tokens": 4763904} +{"current_steps": 7060, "total_steps": 204665, "loss": 0.1319, "lr": 6.897933258415987e-07, "epoch": 0.1724769745681968, "percentage": 3.45, "elapsed_time": "0:08:11", "remaining_time": "3:49:28", "throughput": 9691.84, "total_tokens": 4767680} +{"current_steps": 7065, "total_steps": 204665, "loss": 0.1141, "lr": 6.902819172326183e-07, "epoch": 0.17259912540004396, "percentage": 3.45, "elapsed_time": "0:08:12", "remaining_time": "3:49:28", "throughput": 9692.01, "total_tokens": 4771200} +{"current_steps": 7070, "total_steps": 204665, "loss": 0.0704, "lr": 6.90770508623638e-07, "epoch": 0.17272127623189115, "percentage": 3.45, "elapsed_time": "0:08:12", "remaining_time": "3:49:28", "throughput": 9692.03, "total_tokens": 4774592} +{"current_steps": 7075, "total_steps": 204665, "loss": 0.1583, "lr": 6.912591000146577e-07, "epoch": 0.17284342706373831, "percentage": 3.46, "elapsed_time": "0:08:12", "remaining_time": "3:49:27", "throughput": 9691.45, "total_tokens": 4777600} +{"current_steps": 7080, "total_steps": 204665, "loss": 0.0724, "lr": 6.917476914056773e-07, "epoch": 0.17296557789558548, "percentage": 3.46, "elapsed_time": "0:08:13", "remaining_time": "3:49:27", "throughput": 9691.08, "total_tokens": 4780736} +{"current_steps": 7085, "total_steps": 204665, "loss": 0.196, "lr": 6.922362827966971e-07, "epoch": 0.17308772872743264, "percentage": 3.46, "elapsed_time": "0:08:13", "remaining_time": "3:49:26", "throughput": 9690.77, "total_tokens": 4783936} +{"current_steps": 7090, "total_steps": 204665, "loss": 0.088, "lr": 6.927248741877168e-07, "epoch": 0.1732098795592798, "percentage": 3.46, "elapsed_time": "0:08:14", "remaining_time": "3:49:26", "throughput": 9690.83, "total_tokens": 4787328} +{"current_steps": 7095, "total_steps": 204665, "loss": 0.0997, "lr": 6.932134655787366e-07, "epoch": 0.17333203039112696, "percentage": 3.47, "elapsed_time": "0:08:14", "remaining_time": "3:49:25", "throughput": 9690.53, "total_tokens": 4790528} +{"current_steps": 7100, "total_steps": 204665, "loss": 0.0858, "lr": 6.937020569697561e-07, "epoch": 0.17345418122297412, "percentage": 3.47, "elapsed_time": "0:08:14", "remaining_time": "3:49:25", "throughput": 9690.27, "total_tokens": 4793728} +{"current_steps": 7105, "total_steps": 204665, "loss": 0.1983, "lr": 6.941906483607759e-07, "epoch": 0.1735763320548213, "percentage": 3.47, "elapsed_time": "0:08:15", "remaining_time": "3:49:25", "throughput": 9690.61, "total_tokens": 4797312} +{"current_steps": 7110, "total_steps": 204665, "loss": 0.1578, "lr": 6.946792397517955e-07, "epoch": 0.17369848288666845, "percentage": 3.47, "elapsed_time": "0:08:15", "remaining_time": "3:49:25", "throughput": 9691.66, "total_tokens": 4801344} +{"current_steps": 7115, "total_steps": 204665, "loss": 0.0992, "lr": 6.951678311428152e-07, "epoch": 0.1738206337185156, "percentage": 3.48, "elapsed_time": "0:08:15", "remaining_time": "3:49:24", "throughput": 9692.02, "total_tokens": 4804928} +{"current_steps": 7120, "total_steps": 204665, "loss": 0.101, "lr": 6.95656422533835e-07, "epoch": 0.1739427845503628, "percentage": 3.48, "elapsed_time": "0:08:16", "remaining_time": "3:49:24", "throughput": 9692.88, "total_tokens": 4808832} +{"current_steps": 7125, "total_steps": 204665, "loss": 0.3265, "lr": 6.961450139248545e-07, "epoch": 0.17406493538220996, "percentage": 3.48, "elapsed_time": "0:08:16", "remaining_time": "3:49:24", "throughput": 9691.99, "total_tokens": 4811648} +{"current_steps": 7130, "total_steps": 204665, "loss": 0.0718, "lr": 6.966336053158743e-07, "epoch": 0.17418708621405712, "percentage": 3.48, "elapsed_time": "0:08:16", "remaining_time": "3:49:24", "throughput": 9692.51, "total_tokens": 4815360} +{"current_steps": 7135, "total_steps": 204665, "loss": 0.0902, "lr": 6.97122196706894e-07, "epoch": 0.1743092370459043, "percentage": 3.49, "elapsed_time": "0:08:17", "remaining_time": "3:49:23", "throughput": 9692.59, "total_tokens": 4818816} +{"current_steps": 7140, "total_steps": 204665, "loss": 0.1007, "lr": 6.976107880979138e-07, "epoch": 0.17443138787775145, "percentage": 3.49, "elapsed_time": "0:08:17", "remaining_time": "3:49:23", "throughput": 9693.05, "total_tokens": 4822464} +{"current_steps": 7145, "total_steps": 204665, "loss": 0.177, "lr": 6.980993794889334e-07, "epoch": 0.1745535387095986, "percentage": 3.49, "elapsed_time": "0:08:17", "remaining_time": "3:49:23", "throughput": 9692.8, "total_tokens": 4825664} +{"current_steps": 7150, "total_steps": 204665, "loss": 0.1748, "lr": 6.98587970879953e-07, "epoch": 0.17467568954144577, "percentage": 3.49, "elapsed_time": "0:08:18", "remaining_time": "3:49:22", "throughput": 9691.97, "total_tokens": 4828480} +{"current_steps": 7155, "total_steps": 204665, "loss": 0.1292, "lr": 6.990765622709727e-07, "epoch": 0.17479784037329293, "percentage": 3.5, "elapsed_time": "0:08:18", "remaining_time": "3:49:22", "throughput": 9692.22, "total_tokens": 4832000} +{"current_steps": 7160, "total_steps": 204665, "loss": 0.1327, "lr": 6.995651536619924e-07, "epoch": 0.1749199912051401, "percentage": 3.5, "elapsed_time": "0:08:18", "remaining_time": "3:49:21", "throughput": 9692.59, "total_tokens": 4835584} +{"current_steps": 7165, "total_steps": 204665, "loss": 0.1325, "lr": 7.000537450530122e-07, "epoch": 0.17504214203698726, "percentage": 3.5, "elapsed_time": "0:08:19", "remaining_time": "3:49:21", "throughput": 9692.41, "total_tokens": 4838848} +{"current_steps": 7170, "total_steps": 204665, "loss": 0.0945, "lr": 7.005423364440318e-07, "epoch": 0.17516429286883445, "percentage": 3.5, "elapsed_time": "0:08:19", "remaining_time": "3:49:20", "throughput": 9692.14, "total_tokens": 4842048} +{"current_steps": 7175, "total_steps": 204665, "loss": 0.0969, "lr": 7.010309278350515e-07, "epoch": 0.1752864437006816, "percentage": 3.51, "elapsed_time": "0:08:19", "remaining_time": "3:49:20", "throughput": 9692.46, "total_tokens": 4845632} +{"current_steps": 7180, "total_steps": 204665, "loss": 0.1881, "lr": 7.015195192260712e-07, "epoch": 0.17540859453252877, "percentage": 3.51, "elapsed_time": "0:08:20", "remaining_time": "3:49:20", "throughput": 9692.2, "total_tokens": 4848832} +{"current_steps": 7185, "total_steps": 204665, "loss": 0.1016, "lr": 7.020081106170908e-07, "epoch": 0.17553074536437593, "percentage": 3.51, "elapsed_time": "0:08:20", "remaining_time": "3:49:19", "throughput": 9691.82, "total_tokens": 4851968} +{"current_steps": 7190, "total_steps": 204665, "loss": 0.0988, "lr": 7.024967020081106e-07, "epoch": 0.1756528961962231, "percentage": 3.51, "elapsed_time": "0:08:20", "remaining_time": "3:49:19", "throughput": 9691.82, "total_tokens": 4855360} +{"current_steps": 7195, "total_steps": 204665, "loss": 0.1393, "lr": 7.029852933991303e-07, "epoch": 0.17577504702807026, "percentage": 3.52, "elapsed_time": "0:08:21", "remaining_time": "3:49:18", "throughput": 9691.43, "total_tokens": 4858496} +{"current_steps": 7200, "total_steps": 204665, "loss": 0.0923, "lr": 7.0347388479015e-07, "epoch": 0.17589719785991742, "percentage": 3.52, "elapsed_time": "0:08:21", "remaining_time": "3:49:18", "throughput": 9691.64, "total_tokens": 4862016} +{"current_steps": 7205, "total_steps": 204665, "loss": 0.1801, "lr": 7.039624761811696e-07, "epoch": 0.17601934869176458, "percentage": 3.52, "elapsed_time": "0:08:22", "remaining_time": "3:49:18", "throughput": 9691.33, "total_tokens": 4865216} +{"current_steps": 7210, "total_steps": 204665, "loss": 0.0319, "lr": 7.044510675721893e-07, "epoch": 0.17614149952361174, "percentage": 3.52, "elapsed_time": "0:08:22", "remaining_time": "3:49:17", "throughput": 9690.94, "total_tokens": 4868352} +{"current_steps": 7215, "total_steps": 204665, "loss": 0.082, "lr": 7.04939658963209e-07, "epoch": 0.17626365035545893, "percentage": 3.53, "elapsed_time": "0:08:22", "remaining_time": "3:49:17", "throughput": 9690.95, "total_tokens": 4871744} +{"current_steps": 7220, "total_steps": 204665, "loss": 0.0897, "lr": 7.054282503542287e-07, "epoch": 0.1763858011873061, "percentage": 3.53, "elapsed_time": "0:08:23", "remaining_time": "3:49:17", "throughput": 9691.54, "total_tokens": 4875520} +{"current_steps": 7225, "total_steps": 204665, "loss": 0.0919, "lr": 7.059168417452485e-07, "epoch": 0.17650795201915326, "percentage": 3.53, "elapsed_time": "0:08:23", "remaining_time": "3:49:17", "throughput": 9691.6, "total_tokens": 4878912} +{"current_steps": 7230, "total_steps": 204665, "loss": 0.0748, "lr": 7.064054331362681e-07, "epoch": 0.17663010285100042, "percentage": 3.53, "elapsed_time": "0:08:23", "remaining_time": "3:49:16", "throughput": 9691.73, "total_tokens": 4882368} +{"current_steps": 7235, "total_steps": 204665, "loss": 0.0947, "lr": 7.068940245272877e-07, "epoch": 0.17675225368284758, "percentage": 3.54, "elapsed_time": "0:08:24", "remaining_time": "3:49:16", "throughput": 9691.35, "total_tokens": 4885504} +{"current_steps": 7240, "total_steps": 204665, "loss": 0.0184, "lr": 7.073826159183075e-07, "epoch": 0.17687440451469474, "percentage": 3.54, "elapsed_time": "0:08:24", "remaining_time": "3:49:16", "throughput": 9692.02, "total_tokens": 4889344} +{"current_steps": 7245, "total_steps": 204665, "loss": 0.0587, "lr": 7.078712073093271e-07, "epoch": 0.1769965553465419, "percentage": 3.54, "elapsed_time": "0:08:24", "remaining_time": "3:49:15", "throughput": 9691.68, "total_tokens": 4892480} +{"current_steps": 7250, "total_steps": 204665, "loss": 0.1063, "lr": 7.083597987003469e-07, "epoch": 0.17711870617838907, "percentage": 3.54, "elapsed_time": "0:08:25", "remaining_time": "3:49:15", "throughput": 9691.8, "total_tokens": 4895936} +{"current_steps": 7255, "total_steps": 204665, "loss": 0.2293, "lr": 7.088483900913666e-07, "epoch": 0.17724085701023623, "percentage": 3.54, "elapsed_time": "0:08:25", "remaining_time": "3:49:15", "throughput": 9691.61, "total_tokens": 4899200} +{"current_steps": 7260, "total_steps": 204665, "loss": 0.1442, "lr": 7.093369814823862e-07, "epoch": 0.1773630078420834, "percentage": 3.55, "elapsed_time": "0:08:25", "remaining_time": "3:49:14", "throughput": 9691.41, "total_tokens": 4902464} +{"current_steps": 7265, "total_steps": 204665, "loss": 0.35, "lr": 7.098255728734059e-07, "epoch": 0.17748515867393058, "percentage": 3.55, "elapsed_time": "0:08:26", "remaining_time": "3:49:14", "throughput": 9691.96, "total_tokens": 4906240} +{"current_steps": 7270, "total_steps": 204665, "loss": 0.0915, "lr": 7.103141642644256e-07, "epoch": 0.17760730950577774, "percentage": 3.55, "elapsed_time": "0:08:26", "remaining_time": "3:49:14", "throughput": 9692.2, "total_tokens": 4909760} +{"current_steps": 7275, "total_steps": 204665, "loss": 0.1682, "lr": 7.108027556554453e-07, "epoch": 0.1777294603376249, "percentage": 3.55, "elapsed_time": "0:08:26", "remaining_time": "3:49:13", "throughput": 9691.63, "total_tokens": 4912768} +{"current_steps": 7280, "total_steps": 204665, "loss": 0.0714, "lr": 7.11291347046465e-07, "epoch": 0.17785161116947207, "percentage": 3.56, "elapsed_time": "0:08:27", "remaining_time": "3:49:13", "throughput": 9691.38, "total_tokens": 4915968} +{"current_steps": 7285, "total_steps": 204665, "loss": 0.211, "lr": 7.117799384374848e-07, "epoch": 0.17797376200131923, "percentage": 3.56, "elapsed_time": "0:08:27", "remaining_time": "3:49:12", "throughput": 9690.8, "total_tokens": 4918976} +{"current_steps": 7290, "total_steps": 204665, "loss": 0.1074, "lr": 7.122685298285043e-07, "epoch": 0.1780959128331664, "percentage": 3.56, "elapsed_time": "0:08:27", "remaining_time": "3:49:12", "throughput": 9691.58, "total_tokens": 4922816} +{"current_steps": 7295, "total_steps": 204665, "loss": 0.1798, "lr": 7.12757121219524e-07, "epoch": 0.17821806366501355, "percentage": 3.56, "elapsed_time": "0:08:28", "remaining_time": "3:49:11", "throughput": 9691.02, "total_tokens": 4925824} +{"current_steps": 7300, "total_steps": 204665, "loss": 0.1859, "lr": 7.132457126105438e-07, "epoch": 0.17834021449686072, "percentage": 3.57, "elapsed_time": "0:08:28", "remaining_time": "3:49:11", "throughput": 9691.73, "total_tokens": 4929664} +{"current_steps": 7305, "total_steps": 204665, "loss": 0.0412, "lr": 7.137343040015634e-07, "epoch": 0.17846236532870788, "percentage": 3.57, "elapsed_time": "0:08:28", "remaining_time": "3:49:11", "throughput": 9691.93, "total_tokens": 4933184} +{"current_steps": 7310, "total_steps": 204665, "loss": 0.0465, "lr": 7.142228953925832e-07, "epoch": 0.17858451616055504, "percentage": 3.57, "elapsed_time": "0:08:29", "remaining_time": "3:49:11", "throughput": 9691.56, "total_tokens": 4936320} +{"current_steps": 7315, "total_steps": 204665, "loss": 0.0908, "lr": 7.147114867836028e-07, "epoch": 0.17870666699240223, "percentage": 3.57, "elapsed_time": "0:08:29", "remaining_time": "3:49:11", "throughput": 9692.26, "total_tokens": 4940160} +{"current_steps": 7320, "total_steps": 204665, "loss": 0.1059, "lr": 7.152000781746225e-07, "epoch": 0.1788288178242494, "percentage": 3.58, "elapsed_time": "0:08:30", "remaining_time": "3:49:10", "throughput": 9692.47, "total_tokens": 4943680} +{"current_steps": 7325, "total_steps": 204665, "loss": 0.0785, "lr": 7.156886695656422e-07, "epoch": 0.17895096865609655, "percentage": 3.58, "elapsed_time": "0:08:30", "remaining_time": "3:49:10", "throughput": 9691.76, "total_tokens": 4946560} +{"current_steps": 7330, "total_steps": 204665, "loss": 0.1813, "lr": 7.161772609566619e-07, "epoch": 0.17907311948794372, "percentage": 3.58, "elapsed_time": "0:08:30", "remaining_time": "3:49:09", "throughput": 9691.52, "total_tokens": 4949760} +{"current_steps": 7335, "total_steps": 204665, "loss": 0.0774, "lr": 7.166658523476816e-07, "epoch": 0.17919527031979088, "percentage": 3.58, "elapsed_time": "0:08:31", "remaining_time": "3:49:09", "throughput": 9691.28, "total_tokens": 4953024} +{"current_steps": 7340, "total_steps": 204665, "loss": 0.095, "lr": 7.171544437387013e-07, "epoch": 0.17931742115163804, "percentage": 3.59, "elapsed_time": "0:08:31", "remaining_time": "3:49:09", "throughput": 9691.7, "total_tokens": 4956672} +{"current_steps": 7345, "total_steps": 204665, "loss": 0.2202, "lr": 7.17643035129721e-07, "epoch": 0.1794395719834852, "percentage": 3.59, "elapsed_time": "0:08:31", "remaining_time": "3:49:08", "throughput": 9691.61, "total_tokens": 4959936} +{"current_steps": 7350, "total_steps": 204665, "loss": 0.0811, "lr": 7.181316265207406e-07, "epoch": 0.17956172281533236, "percentage": 3.59, "elapsed_time": "0:08:32", "remaining_time": "3:49:08", "throughput": 9691.74, "total_tokens": 4963392} +{"current_steps": 7355, "total_steps": 204665, "loss": 0.2623, "lr": 7.186202179117603e-07, "epoch": 0.17968387364717953, "percentage": 3.59, "elapsed_time": "0:08:32", "remaining_time": "3:49:07", "throughput": 9691.66, "total_tokens": 4966720} +{"current_steps": 7360, "total_steps": 204665, "loss": 0.1531, "lr": 7.191088093027801e-07, "epoch": 0.17980602447902672, "percentage": 3.6, "elapsed_time": "0:08:32", "remaining_time": "3:49:07", "throughput": 9691.17, "total_tokens": 4969792} +{"current_steps": 7365, "total_steps": 204665, "loss": 0.1272, "lr": 7.195974006937997e-07, "epoch": 0.17992817531087388, "percentage": 3.6, "elapsed_time": "0:08:33", "remaining_time": "3:49:07", "throughput": 9691.17, "total_tokens": 4973184} +{"current_steps": 7370, "total_steps": 204665, "loss": 0.1201, "lr": 7.200859920848194e-07, "epoch": 0.18005032614272104, "percentage": 3.6, "elapsed_time": "0:08:33", "remaining_time": "3:49:06", "throughput": 9690.66, "total_tokens": 4976192} +{"current_steps": 7375, "total_steps": 204665, "loss": 0.1259, "lr": 7.205745834758391e-07, "epoch": 0.1801724769745682, "percentage": 3.6, "elapsed_time": "0:08:33", "remaining_time": "3:49:06", "throughput": 9690.71, "total_tokens": 4979584} +{"current_steps": 7380, "total_steps": 204665, "loss": 0.1405, "lr": 7.210631748668588e-07, "epoch": 0.18029462780641536, "percentage": 3.61, "elapsed_time": "0:08:34", "remaining_time": "3:49:05", "throughput": 9690.45, "total_tokens": 4982784} +{"current_steps": 7385, "total_steps": 204665, "loss": 0.1383, "lr": 7.215517662578785e-07, "epoch": 0.18041677863826253, "percentage": 3.61, "elapsed_time": "0:08:34", "remaining_time": "3:49:05", "throughput": 9690.52, "total_tokens": 4986240} +{"current_steps": 7390, "total_steps": 204665, "loss": 0.133, "lr": 7.220403576488982e-07, "epoch": 0.1805389294701097, "percentage": 3.61, "elapsed_time": "0:08:34", "remaining_time": "3:49:05", "throughput": 9690.74, "total_tokens": 4989760} +{"current_steps": 7395, "total_steps": 204665, "loss": 0.1669, "lr": 7.225289490399179e-07, "epoch": 0.18066108030195685, "percentage": 3.61, "elapsed_time": "0:08:35", "remaining_time": "3:49:04", "throughput": 9690.87, "total_tokens": 4993216} +{"current_steps": 7400, "total_steps": 204665, "loss": 0.2285, "lr": 7.230175404309375e-07, "epoch": 0.180783231133804, "percentage": 3.62, "elapsed_time": "0:08:35", "remaining_time": "3:49:04", "throughput": 9691.01, "total_tokens": 4996672} +{"current_steps": 7405, "total_steps": 204665, "loss": 0.1672, "lr": 7.235061318219573e-07, "epoch": 0.18090538196565117, "percentage": 3.62, "elapsed_time": "0:08:35", "remaining_time": "3:49:04", "throughput": 9691.39, "total_tokens": 5000320} +{"current_steps": 7410, "total_steps": 204665, "loss": 0.0913, "lr": 7.239947232129769e-07, "epoch": 0.18102753279749836, "percentage": 3.62, "elapsed_time": "0:08:36", "remaining_time": "3:49:03", "throughput": 9690.94, "total_tokens": 5003392} +{"current_steps": 7415, "total_steps": 204665, "loss": 0.1249, "lr": 7.244833146039967e-07, "epoch": 0.18114968362934553, "percentage": 3.62, "elapsed_time": "0:08:36", "remaining_time": "3:49:03", "throughput": 9690.52, "total_tokens": 5006528} +{"current_steps": 7420, "total_steps": 204665, "loss": 0.1705, "lr": 7.249719059950164e-07, "epoch": 0.1812718344611927, "percentage": 3.63, "elapsed_time": "0:08:36", "remaining_time": "3:49:02", "throughput": 9689.96, "total_tokens": 5009536} +{"current_steps": 7425, "total_steps": 204665, "loss": 0.1101, "lr": 7.254604973860359e-07, "epoch": 0.18139398529303985, "percentage": 3.63, "elapsed_time": "0:08:37", "remaining_time": "3:49:02", "throughput": 9690.35, "total_tokens": 5013184} +{"current_steps": 7430, "total_steps": 204665, "loss": 0.0673, "lr": 7.259490887770557e-07, "epoch": 0.181516136124887, "percentage": 3.63, "elapsed_time": "0:08:37", "remaining_time": "3:49:02", "throughput": 9689.98, "total_tokens": 5016320} +{"current_steps": 7435, "total_steps": 204665, "loss": 0.085, "lr": 7.264376801680754e-07, "epoch": 0.18163828695673417, "percentage": 3.63, "elapsed_time": "0:08:38", "remaining_time": "3:49:01", "throughput": 9689.66, "total_tokens": 5019456} +{"current_steps": 7440, "total_steps": 204665, "loss": 0.1711, "lr": 7.269262715590951e-07, "epoch": 0.18176043778858134, "percentage": 3.64, "elapsed_time": "0:08:38", "remaining_time": "3:49:01", "throughput": 9689.48, "total_tokens": 5022720} +{"current_steps": 7445, "total_steps": 204665, "loss": 0.0828, "lr": 7.274148629501148e-07, "epoch": 0.1818825886204285, "percentage": 3.64, "elapsed_time": "0:08:38", "remaining_time": "3:49:00", "throughput": 9688.86, "total_tokens": 5025664} +{"current_steps": 7450, "total_steps": 204665, "loss": 0.1175, "lr": 7.279034543411345e-07, "epoch": 0.18200473945227566, "percentage": 3.64, "elapsed_time": "0:08:39", "remaining_time": "3:49:00", "throughput": 9688.95, "total_tokens": 5029120} +{"current_steps": 7455, "total_steps": 204665, "loss": 0.0589, "lr": 7.283920457321541e-07, "epoch": 0.18212689028412282, "percentage": 3.64, "elapsed_time": "0:08:39", "remaining_time": "3:48:59", "throughput": 9688.9, "total_tokens": 5032448} +{"current_steps": 7460, "total_steps": 204665, "loss": 0.1039, "lr": 7.288806371231738e-07, "epoch": 0.18224904111597, "percentage": 3.64, "elapsed_time": "0:08:39", "remaining_time": "3:48:59", "throughput": 9688.34, "total_tokens": 5035456} +{"current_steps": 7465, "total_steps": 204665, "loss": 0.2372, "lr": 7.293692285141936e-07, "epoch": 0.18237119194781717, "percentage": 3.65, "elapsed_time": "0:08:40", "remaining_time": "3:48:58", "throughput": 9688.08, "total_tokens": 5038656} +{"current_steps": 7470, "total_steps": 204665, "loss": 0.1839, "lr": 7.298578199052132e-07, "epoch": 0.18249334277966434, "percentage": 3.65, "elapsed_time": "0:08:40", "remaining_time": "3:48:58", "throughput": 9688.5, "total_tokens": 5042304} +{"current_steps": 7475, "total_steps": 204665, "loss": 0.1081, "lr": 7.30346411296233e-07, "epoch": 0.1826154936115115, "percentage": 3.65, "elapsed_time": "0:08:40", "remaining_time": "3:48:58", "throughput": 9688.13, "total_tokens": 5045440} +{"current_steps": 7480, "total_steps": 204665, "loss": 0.2091, "lr": 7.308350026872526e-07, "epoch": 0.18273764444335866, "percentage": 3.65, "elapsed_time": "0:08:41", "remaining_time": "3:48:57", "throughput": 9687.76, "total_tokens": 5048576} +{"current_steps": 7485, "total_steps": 204665, "loss": 0.0761, "lr": 7.313235940782722e-07, "epoch": 0.18285979527520582, "percentage": 3.66, "elapsed_time": "0:08:41", "remaining_time": "3:48:57", "throughput": 9687.57, "total_tokens": 5051840} +{"current_steps": 7490, "total_steps": 204665, "loss": 0.2031, "lr": 7.31812185469292e-07, "epoch": 0.18298194610705298, "percentage": 3.66, "elapsed_time": "0:08:41", "remaining_time": "3:48:56", "throughput": 9687.21, "total_tokens": 5054976} +{"current_steps": 7495, "total_steps": 204665, "loss": 0.1034, "lr": 7.323007768603117e-07, "epoch": 0.18310409693890015, "percentage": 3.66, "elapsed_time": "0:08:42", "remaining_time": "3:48:56", "throughput": 9687.59, "total_tokens": 5058560} +{"current_steps": 7500, "total_steps": 204665, "loss": 0.0831, "lr": 7.327893682513314e-07, "epoch": 0.1832262477707473, "percentage": 3.66, "elapsed_time": "0:08:42", "remaining_time": "3:48:56", "throughput": 9687.01, "total_tokens": 5061568} +{"current_steps": 7505, "total_steps": 204665, "loss": 0.0543, "lr": 7.332779596423511e-07, "epoch": 0.18334839860259447, "percentage": 3.67, "elapsed_time": "0:08:42", "remaining_time": "3:48:55", "throughput": 9686.73, "total_tokens": 5064768} +{"current_steps": 7510, "total_steps": 204665, "loss": 0.0832, "lr": 7.337665510333707e-07, "epoch": 0.18347054943444166, "percentage": 3.67, "elapsed_time": "0:08:43", "remaining_time": "3:48:56", "throughput": 9689.86, "total_tokens": 5070336} +{"current_steps": 7515, "total_steps": 204665, "loss": 0.0929, "lr": 7.342551424243904e-07, "epoch": 0.18359270026628882, "percentage": 3.67, "elapsed_time": "0:08:43", "remaining_time": "3:48:56", "throughput": 9689.53, "total_tokens": 5073472} +{"current_steps": 7520, "total_steps": 204665, "loss": 0.0853, "lr": 7.347437338154101e-07, "epoch": 0.18371485109813598, "percentage": 3.67, "elapsed_time": "0:08:43", "remaining_time": "3:48:56", "throughput": 9689.68, "total_tokens": 5076992} +{"current_steps": 7525, "total_steps": 204665, "loss": 0.1682, "lr": 7.352323252064299e-07, "epoch": 0.18383700192998315, "percentage": 3.68, "elapsed_time": "0:08:44", "remaining_time": "3:48:55", "throughput": 9689.63, "total_tokens": 5080320} +{"current_steps": 7530, "total_steps": 204665, "loss": 0.1089, "lr": 7.357209165974495e-07, "epoch": 0.1839591527618303, "percentage": 3.68, "elapsed_time": "0:08:44", "remaining_time": "3:48:55", "throughput": 9689.5, "total_tokens": 5083584} +{"current_steps": 7535, "total_steps": 204665, "loss": 0.2105, "lr": 7.362095079884692e-07, "epoch": 0.18408130359367747, "percentage": 3.68, "elapsed_time": "0:08:44", "remaining_time": "3:48:54", "throughput": 9689.44, "total_tokens": 5086912} +{"current_steps": 7540, "total_steps": 204665, "loss": 0.1018, "lr": 7.366980993794889e-07, "epoch": 0.18420345442552463, "percentage": 3.68, "elapsed_time": "0:08:45", "remaining_time": "3:48:54", "throughput": 9689.36, "total_tokens": 5090240} +{"current_steps": 7545, "total_steps": 204665, "loss": 0.1192, "lr": 7.371866907705085e-07, "epoch": 0.1843256052573718, "percentage": 3.69, "elapsed_time": "0:08:45", "remaining_time": "3:48:54", "throughput": 9689.42, "total_tokens": 5093632} +{"current_steps": 7550, "total_steps": 204665, "loss": 0.1163, "lr": 7.376752821615283e-07, "epoch": 0.18444775608921896, "percentage": 3.69, "elapsed_time": "0:08:46", "remaining_time": "3:48:54", "throughput": 9690.53, "total_tokens": 5097792} +{"current_steps": 7555, "total_steps": 204665, "loss": 0.098, "lr": 7.38163873552548e-07, "epoch": 0.18456990692106615, "percentage": 3.69, "elapsed_time": "0:08:46", "remaining_time": "3:48:53", "throughput": 9690.27, "total_tokens": 5100992} +{"current_steps": 7560, "total_steps": 204665, "loss": 0.1937, "lr": 7.386524649435677e-07, "epoch": 0.1846920577529133, "percentage": 3.69, "elapsed_time": "0:08:46", "remaining_time": "3:48:53", "throughput": 9689.95, "total_tokens": 5104128} +{"current_steps": 7565, "total_steps": 204665, "loss": 0.1587, "lr": 7.391410563345873e-07, "epoch": 0.18481420858476047, "percentage": 3.7, "elapsed_time": "0:08:47", "remaining_time": "3:48:52", "throughput": 9689.63, "total_tokens": 5107328} +{"current_steps": 7570, "total_steps": 204665, "loss": 0.117, "lr": 7.39629647725607e-07, "epoch": 0.18493635941660763, "percentage": 3.7, "elapsed_time": "0:08:47", "remaining_time": "3:48:52", "throughput": 9689.02, "total_tokens": 5110272} +{"current_steps": 7575, "total_steps": 204665, "loss": 0.1317, "lr": 7.401182391166267e-07, "epoch": 0.1850585102484548, "percentage": 3.7, "elapsed_time": "0:08:47", "remaining_time": "3:48:52", "throughput": 9689.21, "total_tokens": 5113792} +{"current_steps": 7580, "total_steps": 204665, "loss": 0.2001, "lr": 7.406068305076464e-07, "epoch": 0.18518066108030196, "percentage": 3.7, "elapsed_time": "0:08:48", "remaining_time": "3:48:51", "throughput": 9688.96, "total_tokens": 5116992} +{"current_steps": 7585, "total_steps": 204665, "loss": 0.1577, "lr": 7.410954218986662e-07, "epoch": 0.18530281191214912, "percentage": 3.71, "elapsed_time": "0:08:48", "remaining_time": "3:48:51", "throughput": 9688.72, "total_tokens": 5120192} +{"current_steps": 7590, "total_steps": 204665, "loss": 0.0709, "lr": 7.415840132896857e-07, "epoch": 0.18542496274399628, "percentage": 3.71, "elapsed_time": "0:08:48", "remaining_time": "3:48:50", "throughput": 9689.02, "total_tokens": 5123776} +{"current_steps": 7595, "total_steps": 204665, "loss": 0.1228, "lr": 7.420726046807055e-07, "epoch": 0.18554711357584344, "percentage": 3.71, "elapsed_time": "0:08:49", "remaining_time": "3:48:50", "throughput": 9689.27, "total_tokens": 5127360} +{"current_steps": 7600, "total_steps": 204665, "loss": 0.0538, "lr": 7.425611960717252e-07, "epoch": 0.1856692644076906, "percentage": 3.71, "elapsed_time": "0:08:49", "remaining_time": "3:48:50", "throughput": 9688.85, "total_tokens": 5130432} +{"current_steps": 7605, "total_steps": 204665, "loss": 0.0803, "lr": 7.430497874627448e-07, "epoch": 0.1857914152395378, "percentage": 3.72, "elapsed_time": "0:08:49", "remaining_time": "3:48:49", "throughput": 9688.6, "total_tokens": 5133632} +{"current_steps": 7610, "total_steps": 204665, "loss": 0.1216, "lr": 7.435383788537646e-07, "epoch": 0.18591356607138496, "percentage": 3.72, "elapsed_time": "0:08:50", "remaining_time": "3:48:49", "throughput": 9687.99, "total_tokens": 5136576} +{"current_steps": 7615, "total_steps": 204665, "loss": 0.1015, "lr": 7.440269702447843e-07, "epoch": 0.18603571690323212, "percentage": 3.72, "elapsed_time": "0:08:50", "remaining_time": "3:48:48", "throughput": 9687.85, "total_tokens": 5139840} +{"current_steps": 7620, "total_steps": 204665, "loss": 0.0851, "lr": 7.445155616358039e-07, "epoch": 0.18615786773507928, "percentage": 3.72, "elapsed_time": "0:08:50", "remaining_time": "3:48:48", "throughput": 9687.54, "total_tokens": 5142976} +{"current_steps": 7625, "total_steps": 204665, "loss": 0.0948, "lr": 7.450041530268236e-07, "epoch": 0.18628001856692644, "percentage": 3.73, "elapsed_time": "0:08:51", "remaining_time": "3:48:47", "throughput": 9687.6, "total_tokens": 5146368} +{"current_steps": 7630, "total_steps": 204665, "loss": 0.1405, "lr": 7.454927444178434e-07, "epoch": 0.1864021693987736, "percentage": 3.73, "elapsed_time": "0:08:51", "remaining_time": "3:48:47", "throughput": 9688.09, "total_tokens": 5150080} +{"current_steps": 7635, "total_steps": 204665, "loss": 0.1601, "lr": 7.45981335808863e-07, "epoch": 0.18652432023062077, "percentage": 3.73, "elapsed_time": "0:08:51", "remaining_time": "3:48:47", "throughput": 9687.81, "total_tokens": 5153280} +{"current_steps": 7640, "total_steps": 204665, "loss": 0.1544, "lr": 7.464699271998827e-07, "epoch": 0.18664647106246793, "percentage": 3.73, "elapsed_time": "0:08:52", "remaining_time": "3:48:46", "throughput": 9686.94, "total_tokens": 5156032} +{"current_steps": 7645, "total_steps": 204665, "loss": 0.0794, "lr": 7.469585185909024e-07, "epoch": 0.1867686218943151, "percentage": 3.74, "elapsed_time": "0:08:52", "remaining_time": "3:48:46", "throughput": 9686.77, "total_tokens": 5159296} +{"current_steps": 7650, "total_steps": 204665, "loss": 0.172, "lr": 7.47447109981922e-07, "epoch": 0.18689077272616225, "percentage": 3.74, "elapsed_time": "0:08:52", "remaining_time": "3:48:45", "throughput": 9686.85, "total_tokens": 5162752} +{"current_steps": 7655, "total_steps": 204665, "loss": 0.3807, "lr": 7.479357013729418e-07, "epoch": 0.18701292355800944, "percentage": 3.74, "elapsed_time": "0:08:53", "remaining_time": "3:48:45", "throughput": 9686.81, "total_tokens": 5166080} +{"current_steps": 7660, "total_steps": 204665, "loss": 0.1094, "lr": 7.484242927639615e-07, "epoch": 0.1871350743898566, "percentage": 3.74, "elapsed_time": "0:08:53", "remaining_time": "3:48:45", "throughput": 9686.93, "total_tokens": 5169536} +{"current_steps": 7665, "total_steps": 204665, "loss": 0.0831, "lr": 7.489128841549811e-07, "epoch": 0.18725722522170377, "percentage": 3.75, "elapsed_time": "0:08:54", "remaining_time": "3:48:44", "throughput": 9686.99, "total_tokens": 5172928} +{"current_steps": 7670, "total_steps": 204665, "loss": 0.1211, "lr": 7.494014755460009e-07, "epoch": 0.18737937605355093, "percentage": 3.75, "elapsed_time": "0:08:54", "remaining_time": "3:48:44", "throughput": 9687.23, "total_tokens": 5176448} +{"current_steps": 7675, "total_steps": 204665, "loss": 0.2272, "lr": 7.498900669370205e-07, "epoch": 0.1875015268853981, "percentage": 3.75, "elapsed_time": "0:08:54", "remaining_time": "3:48:44", "throughput": 9687.72, "total_tokens": 5180160} +{"current_steps": 7680, "total_steps": 204665, "loss": 0.1483, "lr": 7.503786583280402e-07, "epoch": 0.18762367771724525, "percentage": 3.75, "elapsed_time": "0:08:55", "remaining_time": "3:48:43", "throughput": 9687.48, "total_tokens": 5183360} +{"current_steps": 7685, "total_steps": 204665, "loss": 0.1039, "lr": 7.508672497190599e-07, "epoch": 0.18774582854909241, "percentage": 3.75, "elapsed_time": "0:08:55", "remaining_time": "3:48:43", "throughput": 9688.23, "total_tokens": 5187264} +{"current_steps": 7690, "total_steps": 204665, "loss": 0.0943, "lr": 7.513558411100797e-07, "epoch": 0.18786797938093958, "percentage": 3.76, "elapsed_time": "0:08:55", "remaining_time": "3:48:43", "throughput": 9688.11, "total_tokens": 5190528} +{"current_steps": 7695, "total_steps": 204665, "loss": 0.1337, "lr": 7.518444325010993e-07, "epoch": 0.18799013021278674, "percentage": 3.76, "elapsed_time": "0:08:56", "remaining_time": "3:48:42", "throughput": 9687.63, "total_tokens": 5193536} +{"current_steps": 7700, "total_steps": 204665, "loss": 0.0767, "lr": 7.523330238921189e-07, "epoch": 0.18811228104463393, "percentage": 3.76, "elapsed_time": "0:08:56", "remaining_time": "3:48:42", "throughput": 9687.83, "total_tokens": 5197056} +{"current_steps": 7705, "total_steps": 204665, "loss": 0.1273, "lr": 7.528216152831387e-07, "epoch": 0.1882344318764811, "percentage": 3.76, "elapsed_time": "0:08:56", "remaining_time": "3:48:42", "throughput": 9688.44, "total_tokens": 5200832} +{"current_steps": 7710, "total_steps": 204665, "loss": 0.1268, "lr": 7.533102066741583e-07, "epoch": 0.18835658270832825, "percentage": 3.77, "elapsed_time": "0:08:57", "remaining_time": "3:48:41", "throughput": 9688.63, "total_tokens": 5204352} +{"current_steps": 7715, "total_steps": 204665, "loss": 0.1765, "lr": 7.537987980651781e-07, "epoch": 0.1884787335401754, "percentage": 3.77, "elapsed_time": "0:08:57", "remaining_time": "3:48:41", "throughput": 9688.29, "total_tokens": 5207488} +{"current_steps": 7720, "total_steps": 204665, "loss": 0.0459, "lr": 7.542873894561978e-07, "epoch": 0.18860088437202258, "percentage": 3.77, "elapsed_time": "0:08:57", "remaining_time": "3:48:41", "throughput": 9688.44, "total_tokens": 5211008} +{"current_steps": 7725, "total_steps": 204665, "loss": 0.1811, "lr": 7.547759808472174e-07, "epoch": 0.18872303520386974, "percentage": 3.77, "elapsed_time": "0:08:58", "remaining_time": "3:48:40", "throughput": 9688.48, "total_tokens": 5214400} +{"current_steps": 7730, "total_steps": 204665, "loss": 0.1476, "lr": 7.552645722382371e-07, "epoch": 0.1888451860357169, "percentage": 3.78, "elapsed_time": "0:08:58", "remaining_time": "3:48:40", "throughput": 9688.15, "total_tokens": 5217536} +{"current_steps": 7735, "total_steps": 204665, "loss": 0.0754, "lr": 7.557531636292568e-07, "epoch": 0.18896733686756406, "percentage": 3.78, "elapsed_time": "0:08:58", "remaining_time": "3:48:40", "throughput": 9688.0, "total_tokens": 5220800} +{"current_steps": 7740, "total_steps": 204665, "loss": 0.1234, "lr": 7.562417550202765e-07, "epoch": 0.18908948769941122, "percentage": 3.78, "elapsed_time": "0:08:59", "remaining_time": "3:48:39", "throughput": 9687.85, "total_tokens": 5224064} +{"current_steps": 7745, "total_steps": 204665, "loss": 0.124, "lr": 7.567303464112962e-07, "epoch": 0.18921163853125839, "percentage": 3.78, "elapsed_time": "0:08:59", "remaining_time": "3:48:39", "throughput": 9688.31, "total_tokens": 5227712} +{"current_steps": 7750, "total_steps": 204665, "loss": 0.0938, "lr": 7.57218937802316e-07, "epoch": 0.18933378936310558, "percentage": 3.79, "elapsed_time": "0:08:59", "remaining_time": "3:48:38", "throughput": 9688.44, "total_tokens": 5231168} +{"current_steps": 7755, "total_steps": 204665, "loss": 0.0911, "lr": 7.577075291933355e-07, "epoch": 0.18945594019495274, "percentage": 3.79, "elapsed_time": "0:09:00", "remaining_time": "3:48:38", "throughput": 9688.28, "total_tokens": 5234432} +{"current_steps": 7760, "total_steps": 204665, "loss": 0.1401, "lr": 7.581961205843552e-07, "epoch": 0.1895780910267999, "percentage": 3.79, "elapsed_time": "0:09:00", "remaining_time": "3:48:38", "throughput": 9688.17, "total_tokens": 5237696} +{"current_steps": 7765, "total_steps": 204665, "loss": 0.0962, "lr": 7.58684711975375e-07, "epoch": 0.18970024185864706, "percentage": 3.79, "elapsed_time": "0:09:01", "remaining_time": "3:48:39", "throughput": 9691.22, "total_tokens": 5243264} +{"current_steps": 7770, "total_steps": 204665, "loss": 0.1051, "lr": 7.591733033663946e-07, "epoch": 0.18982239269049422, "percentage": 3.8, "elapsed_time": "0:09:01", "remaining_time": "3:48:38", "throughput": 9690.74, "total_tokens": 5246272} +{"current_steps": 7775, "total_steps": 204665, "loss": 0.1633, "lr": 7.596618947574144e-07, "epoch": 0.18994454352234139, "percentage": 3.8, "elapsed_time": "0:09:01", "remaining_time": "3:48:38", "throughput": 9690.76, "total_tokens": 5249664} +{"current_steps": 7780, "total_steps": 204665, "loss": 0.1516, "lr": 7.601504861484341e-07, "epoch": 0.19006669435418855, "percentage": 3.8, "elapsed_time": "0:09:02", "remaining_time": "3:48:37", "throughput": 9690.78, "total_tokens": 5253056} +{"current_steps": 7785, "total_steps": 204665, "loss": 0.0891, "lr": 7.606390775394536e-07, "epoch": 0.1901888451860357, "percentage": 3.8, "elapsed_time": "0:09:02", "remaining_time": "3:48:37", "throughput": 9690.84, "total_tokens": 5256448} +{"current_steps": 7790, "total_steps": 204665, "loss": 0.1017, "lr": 7.611276689304734e-07, "epoch": 0.19031099601788287, "percentage": 3.81, "elapsed_time": "0:09:02", "remaining_time": "3:48:36", "throughput": 9690.73, "total_tokens": 5259712} +{"current_steps": 7795, "total_steps": 204665, "loss": 0.039, "lr": 7.616162603214931e-07, "epoch": 0.19043314684973003, "percentage": 3.81, "elapsed_time": "0:09:03", "remaining_time": "3:48:36", "throughput": 9690.68, "total_tokens": 5263040} +{"current_steps": 7800, "total_steps": 204665, "loss": 0.1871, "lr": 7.621048517125128e-07, "epoch": 0.19055529768157722, "percentage": 3.81, "elapsed_time": "0:09:03", "remaining_time": "3:48:36", "throughput": 9690.7, "total_tokens": 5266432} +{"current_steps": 7805, "total_steps": 204665, "loss": 0.2188, "lr": 7.625934431035325e-07, "epoch": 0.19067744851342439, "percentage": 3.81, "elapsed_time": "0:09:03", "remaining_time": "3:48:35", "throughput": 9690.91, "total_tokens": 5269952} +{"current_steps": 7810, "total_steps": 204665, "loss": 0.052, "lr": 7.630820344945523e-07, "epoch": 0.19079959934527155, "percentage": 3.82, "elapsed_time": "0:09:04", "remaining_time": "3:48:35", "throughput": 9690.58, "total_tokens": 5273088} +{"current_steps": 7815, "total_steps": 204665, "loss": 0.2614, "lr": 7.635706258855718e-07, "epoch": 0.1909217501771187, "percentage": 3.82, "elapsed_time": "0:09:04", "remaining_time": "3:48:35", "throughput": 9691.03, "total_tokens": 5276800} +{"current_steps": 7820, "total_steps": 204665, "loss": 0.1203, "lr": 7.640592172765915e-07, "epoch": 0.19104390100896587, "percentage": 3.82, "elapsed_time": "0:09:04", "remaining_time": "3:48:35", "throughput": 9691.34, "total_tokens": 5280384} +{"current_steps": 7825, "total_steps": 204665, "loss": 0.1966, "lr": 7.645478086676113e-07, "epoch": 0.19116605184081303, "percentage": 3.82, "elapsed_time": "0:09:05", "remaining_time": "3:48:34", "throughput": 9691.06, "total_tokens": 5283584} +{"current_steps": 7830, "total_steps": 204665, "loss": 0.0947, "lr": 7.650364000586309e-07, "epoch": 0.1912882026726602, "percentage": 3.83, "elapsed_time": "0:09:05", "remaining_time": "3:48:34", "throughput": 9691.06, "total_tokens": 5286976} +{"current_steps": 7835, "total_steps": 204665, "loss": 0.1253, "lr": 7.655249914496507e-07, "epoch": 0.19141035350450736, "percentage": 3.83, "elapsed_time": "0:09:05", "remaining_time": "3:48:34", "throughput": 9691.13, "total_tokens": 5290432} +{"current_steps": 7840, "total_steps": 204665, "loss": 0.1757, "lr": 7.660135828406703e-07, "epoch": 0.19153250433635452, "percentage": 3.83, "elapsed_time": "0:09:06", "remaining_time": "3:48:33", "throughput": 9691.64, "total_tokens": 5294144} +{"current_steps": 7845, "total_steps": 204665, "loss": 0.1266, "lr": 7.6650217423169e-07, "epoch": 0.19165465516820168, "percentage": 3.83, "elapsed_time": "0:09:06", "remaining_time": "3:48:33", "throughput": 9691.35, "total_tokens": 5297344} +{"current_steps": 7850, "total_steps": 204665, "loss": 0.0449, "lr": 7.669907656227097e-07, "epoch": 0.19177680600004887, "percentage": 3.84, "elapsed_time": "0:09:06", "remaining_time": "3:48:33", "throughput": 9691.76, "total_tokens": 5300992} +{"current_steps": 7855, "total_steps": 204665, "loss": 0.1091, "lr": 7.674793570137294e-07, "epoch": 0.19189895683189603, "percentage": 3.84, "elapsed_time": "0:09:07", "remaining_time": "3:48:32", "throughput": 9691.8, "total_tokens": 5304384} +{"current_steps": 7860, "total_steps": 204665, "loss": 0.3198, "lr": 7.679679484047491e-07, "epoch": 0.1920211076637432, "percentage": 3.84, "elapsed_time": "0:09:07", "remaining_time": "3:48:32", "throughput": 9691.93, "total_tokens": 5307840} +{"current_steps": 7865, "total_steps": 204665, "loss": 0.2131, "lr": 7.684565397957687e-07, "epoch": 0.19214325849559036, "percentage": 3.84, "elapsed_time": "0:09:07", "remaining_time": "3:48:31", "throughput": 9691.27, "total_tokens": 5310720} +{"current_steps": 7870, "total_steps": 204665, "loss": 0.2205, "lr": 7.689451311867885e-07, "epoch": 0.19226540932743752, "percentage": 3.85, "elapsed_time": "0:09:08", "remaining_time": "3:48:31", "throughput": 9691.58, "total_tokens": 5314304} +{"current_steps": 7875, "total_steps": 204665, "loss": 0.1237, "lr": 7.694337225778081e-07, "epoch": 0.19238756015928468, "percentage": 3.85, "elapsed_time": "0:09:08", "remaining_time": "3:48:31", "throughput": 9691.11, "total_tokens": 5317376} +{"current_steps": 7880, "total_steps": 204665, "loss": 0.0954, "lr": 7.699223139688278e-07, "epoch": 0.19250971099113184, "percentage": 3.85, "elapsed_time": "0:09:09", "remaining_time": "3:48:30", "throughput": 9691.01, "total_tokens": 5320704} +{"current_steps": 7885, "total_steps": 204665, "loss": 0.085, "lr": 7.704109053598476e-07, "epoch": 0.192631861822979, "percentage": 3.85, "elapsed_time": "0:09:09", "remaining_time": "3:48:30", "throughput": 9690.6, "total_tokens": 5323776} +{"current_steps": 7890, "total_steps": 204665, "loss": 0.1251, "lr": 7.708994967508672e-07, "epoch": 0.19275401265482617, "percentage": 3.86, "elapsed_time": "0:09:09", "remaining_time": "3:48:29", "throughput": 9690.25, "total_tokens": 5326912} +{"current_steps": 7895, "total_steps": 204665, "loss": 0.1403, "lr": 7.713880881418869e-07, "epoch": 0.19287616348667336, "percentage": 3.86, "elapsed_time": "0:09:10", "remaining_time": "3:48:29", "throughput": 9690.09, "total_tokens": 5330176} +{"current_steps": 7900, "total_steps": 204665, "loss": 0.2507, "lr": 7.718766795329066e-07, "epoch": 0.19299831431852052, "percentage": 3.86, "elapsed_time": "0:09:10", "remaining_time": "3:48:28", "throughput": 9689.86, "total_tokens": 5333376} +{"current_steps": 7905, "total_steps": 204665, "loss": 0.1507, "lr": 7.723652709239263e-07, "epoch": 0.19312046515036768, "percentage": 3.86, "elapsed_time": "0:09:10", "remaining_time": "3:48:28", "throughput": 9690.1, "total_tokens": 5336896} +{"current_steps": 7910, "total_steps": 204665, "loss": 0.1085, "lr": 7.72853862314946e-07, "epoch": 0.19324261598221484, "percentage": 3.86, "elapsed_time": "0:09:11", "remaining_time": "3:48:28", "throughput": 9689.78, "total_tokens": 5340032} +{"current_steps": 7915, "total_steps": 204665, "loss": 0.1381, "lr": 7.733424537059657e-07, "epoch": 0.193364766814062, "percentage": 3.87, "elapsed_time": "0:09:11", "remaining_time": "3:48:27", "throughput": 9689.34, "total_tokens": 5343104} +{"current_steps": 7920, "total_steps": 204665, "loss": 0.0414, "lr": 7.738310450969853e-07, "epoch": 0.19348691764590917, "percentage": 3.87, "elapsed_time": "0:09:11", "remaining_time": "3:48:27", "throughput": 9689.71, "total_tokens": 5346752} +{"current_steps": 7925, "total_steps": 204665, "loss": 0.1919, "lr": 7.74319636488005e-07, "epoch": 0.19360906847775633, "percentage": 3.87, "elapsed_time": "0:09:12", "remaining_time": "3:48:27", "throughput": 9689.59, "total_tokens": 5350016} +{"current_steps": 7930, "total_steps": 204665, "loss": 0.1732, "lr": 7.748082278790248e-07, "epoch": 0.1937312193096035, "percentage": 3.87, "elapsed_time": "0:09:12", "remaining_time": "3:48:26", "throughput": 9689.37, "total_tokens": 5353216} +{"current_steps": 7935, "total_steps": 204665, "loss": 0.3179, "lr": 7.752968192700444e-07, "epoch": 0.19385337014145065, "percentage": 3.88, "elapsed_time": "0:09:12", "remaining_time": "3:48:26", "throughput": 9689.73, "total_tokens": 5356864} +{"current_steps": 7940, "total_steps": 204665, "loss": 0.0838, "lr": 7.757854106610641e-07, "epoch": 0.19397552097329782, "percentage": 3.88, "elapsed_time": "0:09:13", "remaining_time": "3:48:25", "throughput": 9689.69, "total_tokens": 5360192} +{"current_steps": 7945, "total_steps": 204665, "loss": 0.1119, "lr": 7.762740020520839e-07, "epoch": 0.194097671805145, "percentage": 3.88, "elapsed_time": "0:09:13", "remaining_time": "3:48:25", "throughput": 9689.81, "total_tokens": 5363648} +{"current_steps": 7950, "total_steps": 204665, "loss": 0.1827, "lr": 7.767625934431034e-07, "epoch": 0.19421982263699217, "percentage": 3.88, "elapsed_time": "0:09:13", "remaining_time": "3:48:25", "throughput": 9689.1, "total_tokens": 5366528} +{"current_steps": 7955, "total_steps": 204665, "loss": 0.1905, "lr": 7.772511848341232e-07, "epoch": 0.19434197346883933, "percentage": 3.89, "elapsed_time": "0:09:14", "remaining_time": "3:48:24", "throughput": 9689.4, "total_tokens": 5370112} +{"current_steps": 7960, "total_steps": 204665, "loss": 0.0704, "lr": 7.777397762251429e-07, "epoch": 0.1944641243006865, "percentage": 3.89, "elapsed_time": "0:09:14", "remaining_time": "3:48:24", "throughput": 9689.8, "total_tokens": 5373760} +{"current_steps": 7965, "total_steps": 204665, "loss": 0.153, "lr": 7.782283676161626e-07, "epoch": 0.19458627513253365, "percentage": 3.89, "elapsed_time": "0:09:14", "remaining_time": "3:48:25", "throughput": 9693.0, "total_tokens": 5379520} +{"current_steps": 7970, "total_steps": 204665, "loss": 0.0725, "lr": 7.787169590071823e-07, "epoch": 0.19470842596438082, "percentage": 3.89, "elapsed_time": "0:09:15", "remaining_time": "3:48:25", "throughput": 9692.4, "total_tokens": 5382464} +{"current_steps": 7975, "total_steps": 204665, "loss": 0.1389, "lr": 7.792055503982019e-07, "epoch": 0.19483057679622798, "percentage": 3.9, "elapsed_time": "0:09:15", "remaining_time": "3:48:24", "throughput": 9692.34, "total_tokens": 5385792} +{"current_steps": 7980, "total_steps": 204665, "loss": 0.1474, "lr": 7.796941417892216e-07, "epoch": 0.19495272762807514, "percentage": 3.9, "elapsed_time": "0:09:16", "remaining_time": "3:48:24", "throughput": 9692.29, "total_tokens": 5389120} +{"current_steps": 7985, "total_steps": 204665, "loss": 0.0684, "lr": 7.801827331802413e-07, "epoch": 0.1950748784599223, "percentage": 3.9, "elapsed_time": "0:09:16", "remaining_time": "3:48:24", "throughput": 9692.42, "total_tokens": 5392576} +{"current_steps": 7990, "total_steps": 204665, "loss": 0.1549, "lr": 7.806713245712611e-07, "epoch": 0.19519702929176946, "percentage": 3.9, "elapsed_time": "0:09:16", "remaining_time": "3:48:23", "throughput": 9692.72, "total_tokens": 5396160} +{"current_steps": 7995, "total_steps": 204665, "loss": 0.0496, "lr": 7.811599159622807e-07, "epoch": 0.19531918012361665, "percentage": 3.91, "elapsed_time": "0:09:17", "remaining_time": "3:48:23", "throughput": 9692.56, "total_tokens": 5399424} +{"current_steps": 8000, "total_steps": 204665, "loss": 0.1259, "lr": 7.816485073533004e-07, "epoch": 0.19544133095546382, "percentage": 3.91, "elapsed_time": "0:09:17", "remaining_time": "3:48:23", "throughput": 9692.47, "total_tokens": 5402752} +{"current_steps": 8005, "total_steps": 204665, "loss": 0.0967, "lr": 7.821370987443201e-07, "epoch": 0.19556348178731098, "percentage": 3.91, "elapsed_time": "0:09:17", "remaining_time": "3:48:22", "throughput": 9692.19, "total_tokens": 5405952} +{"current_steps": 8010, "total_steps": 204665, "loss": 0.1424, "lr": 7.826256901353397e-07, "epoch": 0.19568563261915814, "percentage": 3.91, "elapsed_time": "0:09:18", "remaining_time": "3:48:22", "throughput": 9692.37, "total_tokens": 5409472} +{"current_steps": 8015, "total_steps": 204665, "loss": 0.0976, "lr": 7.831142815263595e-07, "epoch": 0.1958077834510053, "percentage": 3.92, "elapsed_time": "0:09:18", "remaining_time": "3:48:22", "throughput": 9692.32, "total_tokens": 5412800} +{"current_steps": 8020, "total_steps": 204665, "loss": 0.133, "lr": 7.836028729173792e-07, "epoch": 0.19592993428285246, "percentage": 3.92, "elapsed_time": "0:09:18", "remaining_time": "3:48:21", "throughput": 9692.44, "total_tokens": 5416256} +{"current_steps": 8025, "total_steps": 204665, "loss": 0.3602, "lr": 7.840914643083989e-07, "epoch": 0.19605208511469963, "percentage": 3.92, "elapsed_time": "0:09:19", "remaining_time": "3:48:21", "throughput": 9692.32, "total_tokens": 5419520} +{"current_steps": 8030, "total_steps": 204665, "loss": 0.1214, "lr": 7.845800556994185e-07, "epoch": 0.1961742359465468, "percentage": 3.92, "elapsed_time": "0:09:19", "remaining_time": "3:48:20", "throughput": 9692.4, "total_tokens": 5422976} +{"current_steps": 8035, "total_steps": 204665, "loss": 0.0961, "lr": 7.850686470904382e-07, "epoch": 0.19629638677839395, "percentage": 3.93, "elapsed_time": "0:09:19", "remaining_time": "3:48:20", "throughput": 9692.16, "total_tokens": 5426176} +{"current_steps": 8040, "total_steps": 204665, "loss": 0.282, "lr": 7.855572384814579e-07, "epoch": 0.19641853761024114, "percentage": 3.93, "elapsed_time": "0:09:20", "remaining_time": "3:48:20", "throughput": 9692.41, "total_tokens": 5429696} +{"current_steps": 8045, "total_steps": 204665, "loss": 0.0883, "lr": 7.860458298724776e-07, "epoch": 0.1965406884420883, "percentage": 3.93, "elapsed_time": "0:09:20", "remaining_time": "3:48:19", "throughput": 9692.06, "total_tokens": 5432832} +{"current_steps": 8050, "total_steps": 204665, "loss": 0.0787, "lr": 7.865344212634974e-07, "epoch": 0.19666283927393546, "percentage": 3.93, "elapsed_time": "0:09:20", "remaining_time": "3:48:19", "throughput": 9692.0, "total_tokens": 5436160} +{"current_steps": 8055, "total_steps": 204665, "loss": 0.137, "lr": 7.87023012654517e-07, "epoch": 0.19678499010578263, "percentage": 3.94, "elapsed_time": "0:09:21", "remaining_time": "3:48:18", "throughput": 9691.53, "total_tokens": 5439168} +{"current_steps": 8060, "total_steps": 204665, "loss": 0.0778, "lr": 7.875116040455367e-07, "epoch": 0.1969071409376298, "percentage": 3.94, "elapsed_time": "0:09:21", "remaining_time": "3:48:18", "throughput": 9691.43, "total_tokens": 5442496} +{"current_steps": 8065, "total_steps": 204665, "loss": 0.1975, "lr": 7.880001954365564e-07, "epoch": 0.19702929176947695, "percentage": 3.94, "elapsed_time": "0:09:21", "remaining_time": "3:48:18", "throughput": 9691.77, "total_tokens": 5446144} +{"current_steps": 8070, "total_steps": 204665, "loss": 0.2025, "lr": 7.88488786827576e-07, "epoch": 0.1971514426013241, "percentage": 3.94, "elapsed_time": "0:09:22", "remaining_time": "3:48:17", "throughput": 9691.65, "total_tokens": 5449408} +{"current_steps": 8075, "total_steps": 204665, "loss": 0.1698, "lr": 7.889773782185958e-07, "epoch": 0.19727359343317127, "percentage": 3.95, "elapsed_time": "0:09:22", "remaining_time": "3:48:17", "throughput": 9692.02, "total_tokens": 5453056} +{"current_steps": 8080, "total_steps": 204665, "loss": 0.3359, "lr": 7.894659696096155e-07, "epoch": 0.19739574426501844, "percentage": 3.95, "elapsed_time": "0:09:22", "remaining_time": "3:48:17", "throughput": 9691.53, "total_tokens": 5456064} +{"current_steps": 8085, "total_steps": 204665, "loss": 0.1443, "lr": 7.899545610006351e-07, "epoch": 0.1975178950968656, "percentage": 3.95, "elapsed_time": "0:09:23", "remaining_time": "3:48:16", "throughput": 9691.3, "total_tokens": 5459264} +{"current_steps": 8090, "total_steps": 204665, "loss": 0.0892, "lr": 7.904431523916548e-07, "epoch": 0.1976400459287128, "percentage": 3.95, "elapsed_time": "0:09:23", "remaining_time": "3:48:16", "throughput": 9691.77, "total_tokens": 5462976} +{"current_steps": 8095, "total_steps": 204665, "loss": 0.2065, "lr": 7.909317437826745e-07, "epoch": 0.19776219676055995, "percentage": 3.96, "elapsed_time": "0:09:24", "remaining_time": "3:48:16", "throughput": 9691.68, "total_tokens": 5466304} +{"current_steps": 8100, "total_steps": 204665, "loss": 0.0773, "lr": 7.914203351736942e-07, "epoch": 0.1978843475924071, "percentage": 3.96, "elapsed_time": "0:09:24", "remaining_time": "3:48:15", "throughput": 9691.59, "total_tokens": 5469632} +{"current_steps": 8105, "total_steps": 204665, "loss": 0.0448, "lr": 7.919089265647139e-07, "epoch": 0.19800649842425427, "percentage": 3.96, "elapsed_time": "0:09:24", "remaining_time": "3:48:15", "throughput": 9691.25, "total_tokens": 5472768} +{"current_steps": 8110, "total_steps": 204665, "loss": 0.1155, "lr": 7.923975179557337e-07, "epoch": 0.19812864925610144, "percentage": 3.96, "elapsed_time": "0:09:25", "remaining_time": "3:48:14", "throughput": 9691.08, "total_tokens": 5476032} +{"current_steps": 8115, "total_steps": 204665, "loss": 0.2373, "lr": 7.928861093467532e-07, "epoch": 0.1982508000879486, "percentage": 3.97, "elapsed_time": "0:09:25", "remaining_time": "3:48:14", "throughput": 9690.96, "total_tokens": 5479296} +{"current_steps": 8120, "total_steps": 204665, "loss": 0.217, "lr": 7.93374700737773e-07, "epoch": 0.19837295091979576, "percentage": 3.97, "elapsed_time": "0:09:25", "remaining_time": "3:48:14", "throughput": 9691.49, "total_tokens": 5483072} +{"current_steps": 8125, "total_steps": 204665, "loss": 0.084, "lr": 7.938632921287927e-07, "epoch": 0.19849510175164292, "percentage": 3.97, "elapsed_time": "0:09:26", "remaining_time": "3:48:13", "throughput": 9691.21, "total_tokens": 5486208} +{"current_steps": 8130, "total_steps": 204665, "loss": 0.0706, "lr": 7.943518835198123e-07, "epoch": 0.19861725258349008, "percentage": 3.97, "elapsed_time": "0:09:26", "remaining_time": "3:48:13", "throughput": 9690.82, "total_tokens": 5489280} +{"current_steps": 8135, "total_steps": 204665, "loss": 0.1443, "lr": 7.948404749108321e-07, "epoch": 0.19873940341533725, "percentage": 3.97, "elapsed_time": "0:09:26", "remaining_time": "3:48:12", "throughput": 9690.77, "total_tokens": 5492608} +{"current_steps": 8140, "total_steps": 204665, "loss": 0.1016, "lr": 7.953290663018517e-07, "epoch": 0.19886155424718444, "percentage": 3.98, "elapsed_time": "0:09:27", "remaining_time": "3:48:12", "throughput": 9690.49, "total_tokens": 5495744} +{"current_steps": 8145, "total_steps": 204665, "loss": 0.0974, "lr": 7.958176576928714e-07, "epoch": 0.1989837050790316, "percentage": 3.98, "elapsed_time": "0:09:27", "remaining_time": "3:48:12", "throughput": 9691.03, "total_tokens": 5499520} +{"current_steps": 8150, "total_steps": 204665, "loss": 0.096, "lr": 7.963062490838911e-07, "epoch": 0.19910585591087876, "percentage": 3.98, "elapsed_time": "0:09:27", "remaining_time": "3:48:11", "throughput": 9691.11, "total_tokens": 5502976} +{"current_steps": 8155, "total_steps": 204665, "loss": 0.1639, "lr": 7.967948404749108e-07, "epoch": 0.19922800674272592, "percentage": 3.98, "elapsed_time": "0:09:28", "remaining_time": "3:48:11", "throughput": 9690.65, "total_tokens": 5505984} +{"current_steps": 8160, "total_steps": 204665, "loss": 0.0544, "lr": 7.972834318659305e-07, "epoch": 0.19935015757457308, "percentage": 3.99, "elapsed_time": "0:09:28", "remaining_time": "3:48:11", "throughput": 9691.2, "total_tokens": 5509760} +{"current_steps": 8165, "total_steps": 204665, "loss": 0.2632, "lr": 7.977720232569502e-07, "epoch": 0.19947230840642025, "percentage": 3.99, "elapsed_time": "0:09:28", "remaining_time": "3:48:10", "throughput": 9690.64, "total_tokens": 5512704} +{"current_steps": 8170, "total_steps": 204665, "loss": 0.0972, "lr": 7.982606146479699e-07, "epoch": 0.1995944592382674, "percentage": 3.99, "elapsed_time": "0:09:29", "remaining_time": "3:48:10", "throughput": 9690.71, "total_tokens": 5516096} +{"current_steps": 8175, "total_steps": 204665, "loss": 0.2251, "lr": 7.987492060389895e-07, "epoch": 0.19971661007011457, "percentage": 3.99, "elapsed_time": "0:09:29", "remaining_time": "3:48:09", "throughput": 9691.24, "total_tokens": 5519872} +{"current_steps": 8180, "total_steps": 204665, "loss": 0.1235, "lr": 7.992377974300093e-07, "epoch": 0.19983876090196173, "percentage": 4.0, "elapsed_time": "0:09:29", "remaining_time": "3:48:09", "throughput": 9691.26, "total_tokens": 5523264} +{"current_steps": 8185, "total_steps": 204665, "loss": 0.083, "lr": 7.99726388821029e-07, "epoch": 0.1999609117338089, "percentage": 4.0, "elapsed_time": "0:09:30", "remaining_time": "3:48:09", "throughput": 9690.8, "total_tokens": 5526272} +{"current_steps": 8190, "total_steps": 204665, "loss": 0.1533, "lr": 8.002149802120486e-07, "epoch": 0.20008306256565608, "percentage": 4.0, "elapsed_time": "0:09:30", "remaining_time": "3:48:08", "throughput": 9690.99, "total_tokens": 5529792} +{"current_steps": 8195, "total_steps": 204665, "loss": 0.092, "lr": 8.007035716030683e-07, "epoch": 0.20020521339750325, "percentage": 4.0, "elapsed_time": "0:09:30", "remaining_time": "3:48:08", "throughput": 9690.73, "total_tokens": 5532992} +{"current_steps": 8200, "total_steps": 204665, "loss": 0.0971, "lr": 8.01192162994088e-07, "epoch": 0.2003273642293504, "percentage": 4.01, "elapsed_time": "0:09:31", "remaining_time": "3:48:08", "throughput": 9691.09, "total_tokens": 5536640} +{"current_steps": 8205, "total_steps": 204665, "loss": 0.0859, "lr": 8.016807543851077e-07, "epoch": 0.20044951506119757, "percentage": 4.01, "elapsed_time": "0:09:31", "remaining_time": "3:48:07", "throughput": 9690.99, "total_tokens": 5539968} +{"current_steps": 8210, "total_steps": 204665, "loss": 0.1228, "lr": 8.021693457761274e-07, "epoch": 0.20057166589304473, "percentage": 4.01, "elapsed_time": "0:09:31", "remaining_time": "3:48:07", "throughput": 9690.42, "total_tokens": 5542912} +{"current_steps": 8215, "total_steps": 204665, "loss": 0.1139, "lr": 8.026579371671471e-07, "epoch": 0.2006938167248919, "percentage": 4.01, "elapsed_time": "0:09:32", "remaining_time": "3:48:06", "throughput": 9689.71, "total_tokens": 5545728} +{"current_steps": 8220, "total_steps": 204665, "loss": 0.1316, "lr": 8.031465285581668e-07, "epoch": 0.20081596755673906, "percentage": 4.02, "elapsed_time": "0:09:32", "remaining_time": "3:48:06", "throughput": 9689.37, "total_tokens": 5548864} +{"current_steps": 8225, "total_steps": 204665, "loss": 0.1198, "lr": 8.036351199491864e-07, "epoch": 0.20093811838858622, "percentage": 4.02, "elapsed_time": "0:09:33", "remaining_time": "3:48:05", "throughput": 9688.69, "total_tokens": 5551744} +{"current_steps": 8230, "total_steps": 204665, "loss": 0.1258, "lr": 8.041237113402062e-07, "epoch": 0.20106026922043338, "percentage": 4.02, "elapsed_time": "0:09:33", "remaining_time": "3:48:05", "throughput": 9689.85, "total_tokens": 5555968} +{"current_steps": 8235, "total_steps": 204665, "loss": 0.1186, "lr": 8.046123027312258e-07, "epoch": 0.20118242005228057, "percentage": 4.02, "elapsed_time": "0:09:33", "remaining_time": "3:48:05", "throughput": 9689.86, "total_tokens": 5559360} +{"current_steps": 8240, "total_steps": 204665, "loss": 0.0886, "lr": 8.051008941222456e-07, "epoch": 0.20130457088412773, "percentage": 4.03, "elapsed_time": "0:09:34", "remaining_time": "3:48:05", "throughput": 9690.2, "total_tokens": 5563008} +{"current_steps": 8245, "total_steps": 204665, "loss": 0.113, "lr": 8.055894855132653e-07, "epoch": 0.2014267217159749, "percentage": 4.03, "elapsed_time": "0:09:34", "remaining_time": "3:48:04", "throughput": 9690.31, "total_tokens": 5566464} +{"current_steps": 8250, "total_steps": 204665, "loss": 0.0882, "lr": 8.060780769042848e-07, "epoch": 0.20154887254782206, "percentage": 4.03, "elapsed_time": "0:09:34", "remaining_time": "3:48:04", "throughput": 9689.7, "total_tokens": 5569408} +{"current_steps": 8255, "total_steps": 204665, "loss": 0.1449, "lr": 8.065666682953046e-07, "epoch": 0.20167102337966922, "percentage": 4.03, "elapsed_time": "0:09:35", "remaining_time": "3:48:03", "throughput": 9690.09, "total_tokens": 5573056} +{"current_steps": 8260, "total_steps": 204665, "loss": 0.1259, "lr": 8.070552596863243e-07, "epoch": 0.20179317421151638, "percentage": 4.04, "elapsed_time": "0:09:35", "remaining_time": "3:48:03", "throughput": 9690.2, "total_tokens": 5576512} +{"current_steps": 8265, "total_steps": 204665, "loss": 0.2285, "lr": 8.07543851077344e-07, "epoch": 0.20191532504336354, "percentage": 4.04, "elapsed_time": "0:09:35", "remaining_time": "3:48:03", "throughput": 9689.8, "total_tokens": 5579584} +{"current_steps": 8270, "total_steps": 204665, "loss": 0.1879, "lr": 8.080324424683637e-07, "epoch": 0.2020374758752107, "percentage": 4.04, "elapsed_time": "0:09:36", "remaining_time": "3:48:02", "throughput": 9689.58, "total_tokens": 5582784} +{"current_steps": 8275, "total_steps": 204665, "loss": 0.1162, "lr": 8.085210338593834e-07, "epoch": 0.20215962670705787, "percentage": 4.04, "elapsed_time": "0:09:36", "remaining_time": "3:48:02", "throughput": 9689.26, "total_tokens": 5585920} +{"current_steps": 8280, "total_steps": 204665, "loss": 0.1593, "lr": 8.09009625250403e-07, "epoch": 0.20228177753890503, "percentage": 4.05, "elapsed_time": "0:09:36", "remaining_time": "3:48:01", "throughput": 9689.54, "total_tokens": 5589504} +{"current_steps": 8285, "total_steps": 204665, "loss": 0.082, "lr": 8.094982166414227e-07, "epoch": 0.20240392837075222, "percentage": 4.05, "elapsed_time": "0:09:37", "remaining_time": "3:48:01", "throughput": 9689.15, "total_tokens": 5592576} +{"current_steps": 8290, "total_steps": 204665, "loss": 0.1413, "lr": 8.099868080324425e-07, "epoch": 0.20252607920259938, "percentage": 4.05, "elapsed_time": "0:09:37", "remaining_time": "3:48:00", "throughput": 9688.69, "total_tokens": 5595648} +{"current_steps": 8295, "total_steps": 204665, "loss": 0.079, "lr": 8.104753994234621e-07, "epoch": 0.20264823003444654, "percentage": 4.05, "elapsed_time": "0:09:37", "remaining_time": "3:48:00", "throughput": 9688.85, "total_tokens": 5599104} +{"current_steps": 8300, "total_steps": 204665, "loss": 0.0501, "lr": 8.109639908144819e-07, "epoch": 0.2027703808662937, "percentage": 4.06, "elapsed_time": "0:09:38", "remaining_time": "3:48:00", "throughput": 9688.76, "total_tokens": 5602432} +{"current_steps": 8305, "total_steps": 204665, "loss": 0.1506, "lr": 8.114525822055015e-07, "epoch": 0.20289253169814087, "percentage": 4.06, "elapsed_time": "0:09:38", "remaining_time": "3:47:59", "throughput": 9688.37, "total_tokens": 5605504} +{"current_steps": 8310, "total_steps": 204665, "loss": 0.1131, "lr": 8.119411735965211e-07, "epoch": 0.20301468252998803, "percentage": 4.06, "elapsed_time": "0:09:38", "remaining_time": "3:47:59", "throughput": 9688.83, "total_tokens": 5609216} +{"current_steps": 8315, "total_steps": 204665, "loss": 0.1505, "lr": 8.124297649875409e-07, "epoch": 0.2031368333618352, "percentage": 4.06, "elapsed_time": "0:09:39", "remaining_time": "3:47:59", "throughput": 9689.71, "total_tokens": 5613248} +{"current_steps": 8320, "total_steps": 204665, "loss": 0.0719, "lr": 8.129183563785606e-07, "epoch": 0.20325898419368235, "percentage": 4.07, "elapsed_time": "0:09:39", "remaining_time": "3:47:59", "throughput": 9690.0, "total_tokens": 5616832} +{"current_steps": 8325, "total_steps": 204665, "loss": 0.1667, "lr": 8.134069477695803e-07, "epoch": 0.2033811350255295, "percentage": 4.07, "elapsed_time": "0:09:40", "remaining_time": "3:47:59", "throughput": 9690.79, "total_tokens": 5620800} +{"current_steps": 8330, "total_steps": 204665, "loss": 0.1361, "lr": 8.138955391606e-07, "epoch": 0.20350328585737668, "percentage": 4.07, "elapsed_time": "0:09:40", "remaining_time": "3:47:59", "throughput": 9691.15, "total_tokens": 5624448} +{"current_steps": 8335, "total_steps": 204665, "loss": 0.1627, "lr": 8.143841305516197e-07, "epoch": 0.20362543668922387, "percentage": 4.07, "elapsed_time": "0:09:40", "remaining_time": "3:47:58", "throughput": 9691.01, "total_tokens": 5627712} +{"current_steps": 8340, "total_steps": 204665, "loss": 0.1031, "lr": 8.148727219426393e-07, "epoch": 0.20374758752107103, "percentage": 4.07, "elapsed_time": "0:09:41", "remaining_time": "3:47:58", "throughput": 9690.92, "total_tokens": 5631040} +{"current_steps": 8345, "total_steps": 204665, "loss": 0.1729, "lr": 8.15361313333659e-07, "epoch": 0.2038697383529182, "percentage": 4.08, "elapsed_time": "0:09:41", "remaining_time": "3:47:58", "throughput": 9691.21, "total_tokens": 5634624} +{"current_steps": 8350, "total_steps": 204665, "loss": 0.1207, "lr": 8.158499047246788e-07, "epoch": 0.20399188918476535, "percentage": 4.08, "elapsed_time": "0:09:41", "remaining_time": "3:47:57", "throughput": 9691.28, "total_tokens": 5638080} +{"current_steps": 8355, "total_steps": 204665, "loss": 0.1336, "lr": 8.163384961156984e-07, "epoch": 0.2041140400166125, "percentage": 4.08, "elapsed_time": "0:09:42", "remaining_time": "3:47:57", "throughput": 9691.39, "total_tokens": 5641536} +{"current_steps": 8360, "total_steps": 204665, "loss": 0.0766, "lr": 8.168270875067181e-07, "epoch": 0.20423619084845968, "percentage": 4.08, "elapsed_time": "0:09:42", "remaining_time": "3:47:56", "throughput": 9690.9, "total_tokens": 5644544} +{"current_steps": 8365, "total_steps": 204665, "loss": 0.1265, "lr": 8.173156788977378e-07, "epoch": 0.20435834168030684, "percentage": 4.09, "elapsed_time": "0:09:42", "remaining_time": "3:47:56", "throughput": 9690.92, "total_tokens": 5647936} +{"current_steps": 8370, "total_steps": 204665, "loss": 0.1076, "lr": 8.178042702887574e-07, "epoch": 0.204480492512154, "percentage": 4.09, "elapsed_time": "0:09:43", "remaining_time": "3:47:56", "throughput": 9691.3, "total_tokens": 5651584} +{"current_steps": 8375, "total_steps": 204665, "loss": 0.0794, "lr": 8.182928616797772e-07, "epoch": 0.20460264334400116, "percentage": 4.09, "elapsed_time": "0:09:43", "remaining_time": "3:47:55", "throughput": 9691.08, "total_tokens": 5654784} +{"current_steps": 8380, "total_steps": 204665, "loss": 0.2906, "lr": 8.187814530707969e-07, "epoch": 0.20472479417584835, "percentage": 4.09, "elapsed_time": "0:09:43", "remaining_time": "3:47:55", "throughput": 9690.87, "total_tokens": 5657984} +{"current_steps": 8385, "total_steps": 204665, "loss": 0.1992, "lr": 8.192700444618166e-07, "epoch": 0.2048469450076955, "percentage": 4.1, "elapsed_time": "0:09:44", "remaining_time": "3:47:55", "throughput": 9690.68, "total_tokens": 5661248} +{"current_steps": 8390, "total_steps": 204665, "loss": 0.1088, "lr": 8.197586358528362e-07, "epoch": 0.20496909583954268, "percentage": 4.1, "elapsed_time": "0:09:44", "remaining_time": "3:47:54", "throughput": 9690.4, "total_tokens": 5664384} +{"current_steps": 8395, "total_steps": 204665, "loss": 0.1534, "lr": 8.20247227243856e-07, "epoch": 0.20509124667138984, "percentage": 4.1, "elapsed_time": "0:09:44", "remaining_time": "3:47:54", "throughput": 9690.52, "total_tokens": 5667840} +{"current_steps": 8400, "total_steps": 204665, "loss": 0.082, "lr": 8.207358186348756e-07, "epoch": 0.205213397503237, "percentage": 4.1, "elapsed_time": "0:09:45", "remaining_time": "3:47:53", "throughput": 9690.04, "total_tokens": 5670848} +{"current_steps": 8405, "total_steps": 204665, "loss": 0.1136, "lr": 8.212244100258953e-07, "epoch": 0.20533554833508416, "percentage": 4.11, "elapsed_time": "0:09:45", "remaining_time": "3:47:53", "throughput": 9690.01, "total_tokens": 5674176} +{"current_steps": 8410, "total_steps": 204665, "loss": 0.1473, "lr": 8.217130014169151e-07, "epoch": 0.20545769916693132, "percentage": 4.11, "elapsed_time": "0:09:45", "remaining_time": "3:47:52", "throughput": 9689.85, "total_tokens": 5677376} +{"current_steps": 8415, "total_steps": 204665, "loss": 0.083, "lr": 8.222015928079346e-07, "epoch": 0.20557984999877849, "percentage": 4.11, "elapsed_time": "0:09:46", "remaining_time": "3:47:52", "throughput": 9689.75, "total_tokens": 5680640} +{"current_steps": 8420, "total_steps": 204665, "loss": 0.1466, "lr": 8.226901841989544e-07, "epoch": 0.20570200083062565, "percentage": 4.11, "elapsed_time": "0:09:46", "remaining_time": "3:47:52", "throughput": 9690.12, "total_tokens": 5684288} +{"current_steps": 8425, "total_steps": 204665, "loss": 0.1573, "lr": 8.231787755899741e-07, "epoch": 0.2058241516624728, "percentage": 4.12, "elapsed_time": "0:09:46", "remaining_time": "3:47:51", "throughput": 9690.15, "total_tokens": 5687680} +{"current_steps": 8430, "total_steps": 204665, "loss": 0.0939, "lr": 8.236673669809937e-07, "epoch": 0.20594630249432, "percentage": 4.12, "elapsed_time": "0:09:47", "remaining_time": "3:47:51", "throughput": 9690.56, "total_tokens": 5691328} +{"current_steps": 8435, "total_steps": 204665, "loss": 0.1198, "lr": 8.241559583720135e-07, "epoch": 0.20606845332616716, "percentage": 4.12, "elapsed_time": "0:09:47", "remaining_time": "3:47:50", "throughput": 9690.1, "total_tokens": 5694336} +{"current_steps": 8440, "total_steps": 204665, "loss": 0.2146, "lr": 8.246445497630332e-07, "epoch": 0.20619060415801432, "percentage": 4.12, "elapsed_time": "0:09:47", "remaining_time": "3:47:50", "throughput": 9689.63, "total_tokens": 5697344} +{"current_steps": 8445, "total_steps": 204665, "loss": 0.1408, "lr": 8.251331411540528e-07, "epoch": 0.20631275498986149, "percentage": 4.13, "elapsed_time": "0:09:48", "remaining_time": "3:47:49", "throughput": 9689.73, "total_tokens": 5700800} +{"current_steps": 8450, "total_steps": 204665, "loss": 0.1243, "lr": 8.256217325450725e-07, "epoch": 0.20643490582170865, "percentage": 4.13, "elapsed_time": "0:09:48", "remaining_time": "3:47:49", "throughput": 9690.4, "total_tokens": 5704640} +{"current_steps": 8455, "total_steps": 204665, "loss": 0.1146, "lr": 8.261103239360923e-07, "epoch": 0.2065570566535558, "percentage": 4.13, "elapsed_time": "0:09:49", "remaining_time": "3:47:49", "throughput": 9690.9, "total_tokens": 5708352} +{"current_steps": 8460, "total_steps": 204665, "loss": 0.1354, "lr": 8.265989153271119e-07, "epoch": 0.20667920748540297, "percentage": 4.13, "elapsed_time": "0:09:49", "remaining_time": "3:47:49", "throughput": 9690.89, "total_tokens": 5711744} +{"current_steps": 8465, "total_steps": 204665, "loss": 0.1455, "lr": 8.270875067181316e-07, "epoch": 0.20680135831725013, "percentage": 4.14, "elapsed_time": "0:09:49", "remaining_time": "3:47:48", "throughput": 9690.64, "total_tokens": 5714880} +{"current_steps": 8470, "total_steps": 204665, "loss": 0.0521, "lr": 8.275760981091513e-07, "epoch": 0.2069235091490973, "percentage": 4.14, "elapsed_time": "0:09:50", "remaining_time": "3:47:48", "throughput": 9690.61, "total_tokens": 5718208} +{"current_steps": 8475, "total_steps": 204665, "loss": 0.0445, "lr": 8.280646895001709e-07, "epoch": 0.20704565998094446, "percentage": 4.14, "elapsed_time": "0:09:50", "remaining_time": "3:47:47", "throughput": 9690.42, "total_tokens": 5721408} +{"current_steps": 8480, "total_steps": 204665, "loss": 0.1252, "lr": 8.285532808911907e-07, "epoch": 0.20716781081279165, "percentage": 4.14, "elapsed_time": "0:09:50", "remaining_time": "3:47:47", "throughput": 9690.49, "total_tokens": 5724800} +{"current_steps": 8485, "total_steps": 204665, "loss": 0.2579, "lr": 8.290418722822104e-07, "epoch": 0.2072899616446388, "percentage": 4.15, "elapsed_time": "0:09:51", "remaining_time": "3:47:46", "throughput": 9690.26, "total_tokens": 5728000} +{"current_steps": 8490, "total_steps": 204665, "loss": 0.0754, "lr": 8.2953046367323e-07, "epoch": 0.20741211247648597, "percentage": 4.15, "elapsed_time": "0:09:51", "remaining_time": "3:47:46", "throughput": 9690.39, "total_tokens": 5731456} +{"current_steps": 8495, "total_steps": 204665, "loss": 0.1841, "lr": 8.300190550642498e-07, "epoch": 0.20753426330833313, "percentage": 4.15, "elapsed_time": "0:09:51", "remaining_time": "3:47:45", "throughput": 9689.87, "total_tokens": 5734400} +{"current_steps": 8500, "total_steps": 204665, "loss": 0.1131, "lr": 8.305076464552694e-07, "epoch": 0.2076564141401803, "percentage": 4.15, "elapsed_time": "0:09:52", "remaining_time": "3:47:45", "throughput": 9690.11, "total_tokens": 5737920} +{"current_steps": 8505, "total_steps": 204665, "loss": 0.1254, "lr": 8.309962378462891e-07, "epoch": 0.20777856497202746, "percentage": 4.16, "elapsed_time": "0:09:52", "remaining_time": "3:47:45", "throughput": 9689.81, "total_tokens": 5741056} +{"current_steps": 8510, "total_steps": 204665, "loss": 0.1324, "lr": 8.314848292373088e-07, "epoch": 0.20790071580387462, "percentage": 4.16, "elapsed_time": "0:09:52", "remaining_time": "3:47:44", "throughput": 9690.01, "total_tokens": 5744576} +{"current_steps": 8515, "total_steps": 204665, "loss": 0.197, "lr": 8.319734206283286e-07, "epoch": 0.20802286663572178, "percentage": 4.16, "elapsed_time": "0:09:53", "remaining_time": "3:47:44", "throughput": 9690.69, "total_tokens": 5748480} +{"current_steps": 8520, "total_steps": 204665, "loss": 0.1648, "lr": 8.324620120193482e-07, "epoch": 0.20814501746756894, "percentage": 4.16, "elapsed_time": "0:09:53", "remaining_time": "3:47:44", "throughput": 9690.94, "total_tokens": 5752000} +{"current_steps": 8525, "total_steps": 204665, "loss": 0.2212, "lr": 8.329506034103678e-07, "epoch": 0.2082671682994161, "percentage": 4.17, "elapsed_time": "0:09:53", "remaining_time": "3:47:44", "throughput": 9691.33, "total_tokens": 5755648} +{"current_steps": 8530, "total_steps": 204665, "loss": 0.2119, "lr": 8.334391948013876e-07, "epoch": 0.2083893191312633, "percentage": 4.17, "elapsed_time": "0:09:54", "remaining_time": "3:47:43", "throughput": 9691.29, "total_tokens": 5759040} +{"current_steps": 8535, "total_steps": 204665, "loss": 0.0682, "lr": 8.339277861924072e-07, "epoch": 0.20851146996311046, "percentage": 4.17, "elapsed_time": "0:09:54", "remaining_time": "3:47:44", "throughput": 9691.06, "total_tokens": 5762496} +{"current_steps": 8540, "total_steps": 204665, "loss": 0.0486, "lr": 8.34416377583427e-07, "epoch": 0.20863362079495762, "percentage": 4.17, "elapsed_time": "0:09:54", "remaining_time": "3:47:43", "throughput": 9690.41, "total_tokens": 5765312} +{"current_steps": 8545, "total_steps": 204665, "loss": 0.1615, "lr": 8.349049689744467e-07, "epoch": 0.20875577162680478, "percentage": 4.18, "elapsed_time": "0:09:55", "remaining_time": "3:47:43", "throughput": 9690.47, "total_tokens": 5768768} +{"current_steps": 8550, "total_steps": 204665, "loss": 0.0643, "lr": 8.353935603654664e-07, "epoch": 0.20887792245865194, "percentage": 4.18, "elapsed_time": "0:09:55", "remaining_time": "3:47:42", "throughput": 9690.27, "total_tokens": 5771968} +{"current_steps": 8555, "total_steps": 204665, "loss": 0.2121, "lr": 8.35882151756486e-07, "epoch": 0.2090000732904991, "percentage": 4.18, "elapsed_time": "0:09:55", "remaining_time": "3:47:42", "throughput": 9690.21, "total_tokens": 5775296} +{"current_steps": 8560, "total_steps": 204665, "loss": 0.0623, "lr": 8.363707431475056e-07, "epoch": 0.20912222412234627, "percentage": 4.18, "elapsed_time": "0:09:56", "remaining_time": "3:47:41", "throughput": 9689.38, "total_tokens": 5778048} +{"current_steps": 8565, "total_steps": 204665, "loss": 0.1852, "lr": 8.368593345385254e-07, "epoch": 0.20924437495419343, "percentage": 4.18, "elapsed_time": "0:09:56", "remaining_time": "3:47:41", "throughput": 9689.21, "total_tokens": 5781312} +{"current_steps": 8570, "total_steps": 204665, "loss": 0.0873, "lr": 8.373479259295451e-07, "epoch": 0.2093665257860406, "percentage": 4.19, "elapsed_time": "0:09:57", "remaining_time": "3:47:40", "throughput": 9689.27, "total_tokens": 5784704} +{"current_steps": 8575, "total_steps": 204665, "loss": 0.0896, "lr": 8.378365173205649e-07, "epoch": 0.20948867661788778, "percentage": 4.19, "elapsed_time": "0:09:57", "remaining_time": "3:47:40", "throughput": 9689.45, "total_tokens": 5788224} +{"current_steps": 8580, "total_steps": 204665, "loss": 0.1468, "lr": 8.383251087115844e-07, "epoch": 0.20961082744973494, "percentage": 4.19, "elapsed_time": "0:09:57", "remaining_time": "3:47:40", "throughput": 9689.27, "total_tokens": 5791488} +{"current_steps": 8585, "total_steps": 204665, "loss": 0.1301, "lr": 8.388137001026041e-07, "epoch": 0.2097329782815821, "percentage": 4.19, "elapsed_time": "0:09:58", "remaining_time": "3:47:39", "throughput": 9689.17, "total_tokens": 5794752} +{"current_steps": 8590, "total_steps": 204665, "loss": 0.077, "lr": 8.393022914936239e-07, "epoch": 0.20985512911342927, "percentage": 4.2, "elapsed_time": "0:09:58", "remaining_time": "3:47:39", "throughput": 9689.48, "total_tokens": 5798336} +{"current_steps": 8595, "total_steps": 204665, "loss": 0.1719, "lr": 8.397908828846435e-07, "epoch": 0.20997727994527643, "percentage": 4.2, "elapsed_time": "0:09:58", "remaining_time": "3:47:39", "throughput": 9689.92, "total_tokens": 5802048} +{"current_steps": 8600, "total_steps": 204665, "loss": 0.1771, "lr": 8.402794742756633e-07, "epoch": 0.2100994307771236, "percentage": 4.2, "elapsed_time": "0:09:59", "remaining_time": "3:47:38", "throughput": 9689.79, "total_tokens": 5805312} +{"current_steps": 8605, "total_steps": 204665, "loss": 0.102, "lr": 8.407680656666829e-07, "epoch": 0.21022158160897075, "percentage": 4.2, "elapsed_time": "0:09:59", "remaining_time": "3:47:38", "throughput": 9689.49, "total_tokens": 5808448} +{"current_steps": 8610, "total_steps": 204665, "loss": 0.1211, "lr": 8.412566570577026e-07, "epoch": 0.21034373244081792, "percentage": 4.21, "elapsed_time": "0:09:59", "remaining_time": "3:47:37", "throughput": 9689.46, "total_tokens": 5811776} +{"current_steps": 8615, "total_steps": 204665, "loss": 0.0809, "lr": 8.417452484487223e-07, "epoch": 0.21046588327266508, "percentage": 4.21, "elapsed_time": "0:10:00", "remaining_time": "3:47:37", "throughput": 9689.58, "total_tokens": 5815232} +{"current_steps": 8620, "total_steps": 204665, "loss": 0.0449, "lr": 8.422338398397419e-07, "epoch": 0.21058803410451224, "percentage": 4.21, "elapsed_time": "0:10:00", "remaining_time": "3:47:37", "throughput": 9689.53, "total_tokens": 5818560} +{"current_steps": 8625, "total_steps": 204665, "loss": 0.1412, "lr": 8.427224312307617e-07, "epoch": 0.21071018493635943, "percentage": 4.21, "elapsed_time": "0:10:00", "remaining_time": "3:47:36", "throughput": 9690.04, "total_tokens": 5822272} +{"current_steps": 8630, "total_steps": 204665, "loss": 0.1022, "lr": 8.432110226217814e-07, "epoch": 0.2108323357682066, "percentage": 4.22, "elapsed_time": "0:10:01", "remaining_time": "3:47:36", "throughput": 9689.99, "total_tokens": 5825600} +{"current_steps": 8635, "total_steps": 204665, "loss": 0.1595, "lr": 8.436996140128011e-07, "epoch": 0.21095448660005375, "percentage": 4.22, "elapsed_time": "0:10:01", "remaining_time": "3:47:36", "throughput": 9689.6, "total_tokens": 5828672} +{"current_steps": 8640, "total_steps": 204665, "loss": 0.0977, "lr": 8.441882054038207e-07, "epoch": 0.21107663743190092, "percentage": 4.22, "elapsed_time": "0:10:01", "remaining_time": "3:47:35", "throughput": 9688.95, "total_tokens": 5831552} +{"current_steps": 8645, "total_steps": 204665, "loss": 0.1556, "lr": 8.446767967948404e-07, "epoch": 0.21119878826374808, "percentage": 4.22, "elapsed_time": "0:10:02", "remaining_time": "3:47:34", "throughput": 9688.26, "total_tokens": 5834368} +{"current_steps": 8650, "total_steps": 204665, "loss": 0.2125, "lr": 8.451653881858601e-07, "epoch": 0.21132093909559524, "percentage": 4.23, "elapsed_time": "0:10:02", "remaining_time": "3:47:34", "throughput": 9687.9, "total_tokens": 5837440} +{"current_steps": 8655, "total_steps": 204665, "loss": 0.1083, "lr": 8.456539795768798e-07, "epoch": 0.2114430899274424, "percentage": 4.23, "elapsed_time": "0:10:02", "remaining_time": "3:47:33", "throughput": 9687.7, "total_tokens": 5840640} +{"current_steps": 8660, "total_steps": 204665, "loss": 0.1269, "lr": 8.461425709678996e-07, "epoch": 0.21156524075928956, "percentage": 4.23, "elapsed_time": "0:10:03", "remaining_time": "3:47:33", "throughput": 9687.34, "total_tokens": 5843712} +{"current_steps": 8665, "total_steps": 204665, "loss": 0.1657, "lr": 8.466311623589191e-07, "epoch": 0.21168739159113673, "percentage": 4.23, "elapsed_time": "0:10:03", "remaining_time": "3:47:32", "throughput": 9687.36, "total_tokens": 5847104} +{"current_steps": 8670, "total_steps": 204665, "loss": 0.0945, "lr": 8.471197537499389e-07, "epoch": 0.2118095424229839, "percentage": 4.24, "elapsed_time": "0:10:03", "remaining_time": "3:47:32", "throughput": 9687.33, "total_tokens": 5850432} +{"current_steps": 8675, "total_steps": 204665, "loss": 0.0999, "lr": 8.476083451409586e-07, "epoch": 0.21193169325483108, "percentage": 4.24, "elapsed_time": "0:10:04", "remaining_time": "3:47:32", "throughput": 9687.36, "total_tokens": 5853824} +{"current_steps": 8680, "total_steps": 204665, "loss": 0.0518, "lr": 8.480969365319782e-07, "epoch": 0.21205384408667824, "percentage": 4.24, "elapsed_time": "0:10:04", "remaining_time": "3:47:31", "throughput": 9687.72, "total_tokens": 5857472} +{"current_steps": 8685, "total_steps": 204665, "loss": 0.1515, "lr": 8.48585527922998e-07, "epoch": 0.2121759949185254, "percentage": 4.24, "elapsed_time": "0:10:04", "remaining_time": "3:47:31", "throughput": 9688.32, "total_tokens": 5861312} +{"current_steps": 8690, "total_steps": 204665, "loss": 0.1207, "lr": 8.490741193140176e-07, "epoch": 0.21229814575037256, "percentage": 4.25, "elapsed_time": "0:10:05", "remaining_time": "3:47:31", "throughput": 9688.52, "total_tokens": 5864832} +{"current_steps": 8695, "total_steps": 204665, "loss": 0.0963, "lr": 8.495627107050373e-07, "epoch": 0.21242029658221973, "percentage": 4.25, "elapsed_time": "0:10:05", "remaining_time": "3:47:30", "throughput": 9688.02, "total_tokens": 5867776} +{"current_steps": 8700, "total_steps": 204665, "loss": 0.1344, "lr": 8.50051302096057e-07, "epoch": 0.2125424474140669, "percentage": 4.25, "elapsed_time": "0:10:06", "remaining_time": "3:47:30", "throughput": 9687.61, "total_tokens": 5870848} +{"current_steps": 8705, "total_steps": 204665, "loss": 0.1278, "lr": 8.505398934870767e-07, "epoch": 0.21266459824591405, "percentage": 4.25, "elapsed_time": "0:10:06", "remaining_time": "3:47:30", "throughput": 9688.08, "total_tokens": 5874560} +{"current_steps": 8710, "total_steps": 204665, "loss": 0.098, "lr": 8.510284848780964e-07, "epoch": 0.2127867490777612, "percentage": 4.26, "elapsed_time": "0:10:06", "remaining_time": "3:47:29", "throughput": 9687.92, "total_tokens": 5877824} +{"current_steps": 8715, "total_steps": 204665, "loss": 0.1258, "lr": 8.51517076269116e-07, "epoch": 0.21290889990960837, "percentage": 4.26, "elapsed_time": "0:10:07", "remaining_time": "3:47:29", "throughput": 9687.35, "total_tokens": 5880704} +{"current_steps": 8720, "total_steps": 204665, "loss": 0.1202, "lr": 8.520056676601358e-07, "epoch": 0.21303105074145554, "percentage": 4.26, "elapsed_time": "0:10:07", "remaining_time": "3:47:28", "throughput": 9687.16, "total_tokens": 5883904} +{"current_steps": 8725, "total_steps": 204665, "loss": 0.0826, "lr": 8.524942590511554e-07, "epoch": 0.21315320157330273, "percentage": 4.26, "elapsed_time": "0:10:07", "remaining_time": "3:47:28", "throughput": 9687.19, "total_tokens": 5887296} +{"current_steps": 8730, "total_steps": 204665, "loss": 0.0917, "lr": 8.529828504421752e-07, "epoch": 0.2132753524051499, "percentage": 4.27, "elapsed_time": "0:10:08", "remaining_time": "3:47:27", "throughput": 9686.75, "total_tokens": 5890304} +{"current_steps": 8735, "total_steps": 204665, "loss": 0.2655, "lr": 8.534714418331949e-07, "epoch": 0.21339750323699705, "percentage": 4.27, "elapsed_time": "0:10:08", "remaining_time": "3:47:27", "throughput": 9686.68, "total_tokens": 5893632} +{"current_steps": 8740, "total_steps": 204665, "loss": 0.0348, "lr": 8.539600332242145e-07, "epoch": 0.2135196540688442, "percentage": 4.27, "elapsed_time": "0:10:08", "remaining_time": "3:47:26", "throughput": 9686.59, "total_tokens": 5896960} +{"current_steps": 8745, "total_steps": 204665, "loss": 0.0938, "lr": 8.544486246152342e-07, "epoch": 0.21364180490069137, "percentage": 4.27, "elapsed_time": "0:10:09", "remaining_time": "3:47:26", "throughput": 9686.43, "total_tokens": 5900224} +{"current_steps": 8750, "total_steps": 204665, "loss": 0.1702, "lr": 8.549372160062539e-07, "epoch": 0.21376395573253854, "percentage": 4.28, "elapsed_time": "0:10:09", "remaining_time": "3:47:26", "throughput": 9686.11, "total_tokens": 5903360} +{"current_steps": 8755, "total_steps": 204665, "loss": 0.1216, "lr": 8.554258073972736e-07, "epoch": 0.2138861065643857, "percentage": 4.28, "elapsed_time": "0:10:09", "remaining_time": "3:47:25", "throughput": 9685.72, "total_tokens": 5906432} +{"current_steps": 8760, "total_steps": 204665, "loss": 0.0419, "lr": 8.559143987882933e-07, "epoch": 0.21400825739623286, "percentage": 4.28, "elapsed_time": "0:10:10", "remaining_time": "3:47:25", "throughput": 9685.43, "total_tokens": 5909568} +{"current_steps": 8765, "total_steps": 204665, "loss": 0.1031, "lr": 8.564029901793131e-07, "epoch": 0.21413040822808002, "percentage": 4.28, "elapsed_time": "0:10:10", "remaining_time": "3:47:24", "throughput": 9685.92, "total_tokens": 5913280} +{"current_steps": 8770, "total_steps": 204665, "loss": 0.0859, "lr": 8.568915815703327e-07, "epoch": 0.2142525590599272, "percentage": 4.29, "elapsed_time": "0:10:10", "remaining_time": "3:47:24", "throughput": 9685.56, "total_tokens": 5916352} +{"current_steps": 8775, "total_steps": 204665, "loss": 0.1947, "lr": 8.573801729613523e-07, "epoch": 0.21437470989177437, "percentage": 4.29, "elapsed_time": "0:10:11", "remaining_time": "3:47:23", "throughput": 9684.93, "total_tokens": 5919232} +{"current_steps": 8780, "total_steps": 204665, "loss": 0.2758, "lr": 8.578687643523721e-07, "epoch": 0.21449686072362154, "percentage": 4.29, "elapsed_time": "0:10:11", "remaining_time": "3:47:23", "throughput": 9684.16, "total_tokens": 5921984} +{"current_steps": 8785, "total_steps": 204665, "loss": 0.1436, "lr": 8.583573557433917e-07, "epoch": 0.2146190115554687, "percentage": 4.29, "elapsed_time": "0:10:11", "remaining_time": "3:47:22", "throughput": 9684.38, "total_tokens": 5925504} +{"current_steps": 8790, "total_steps": 204665, "loss": 0.0936, "lr": 8.588459471344115e-07, "epoch": 0.21474116238731586, "percentage": 4.29, "elapsed_time": "0:10:12", "remaining_time": "3:47:22", "throughput": 9683.77, "total_tokens": 5928384} +{"current_steps": 8795, "total_steps": 204665, "loss": 0.0995, "lr": 8.593345385254312e-07, "epoch": 0.21486331321916302, "percentage": 4.3, "elapsed_time": "0:10:12", "remaining_time": "3:47:21", "throughput": 9683.83, "total_tokens": 5931776} +{"current_steps": 8800, "total_steps": 204665, "loss": 0.1071, "lr": 8.598231299164507e-07, "epoch": 0.21498546405101018, "percentage": 4.3, "elapsed_time": "0:10:12", "remaining_time": "3:47:21", "throughput": 9683.41, "total_tokens": 5934784} +{"current_steps": 8805, "total_steps": 204665, "loss": 0.1719, "lr": 8.603117213074705e-07, "epoch": 0.21510761488285735, "percentage": 4.3, "elapsed_time": "0:10:13", "remaining_time": "3:47:20", "throughput": 9683.98, "total_tokens": 5938560} +{"current_steps": 8810, "total_steps": 204665, "loss": 0.169, "lr": 8.608003126984902e-07, "epoch": 0.2152297657147045, "percentage": 4.3, "elapsed_time": "0:10:13", "remaining_time": "3:47:20", "throughput": 9683.84, "total_tokens": 5941824} +{"current_steps": 8815, "total_steps": 204665, "loss": 0.2145, "lr": 8.612889040895099e-07, "epoch": 0.21535191654655167, "percentage": 4.31, "elapsed_time": "0:10:13", "remaining_time": "3:47:20", "throughput": 9683.8, "total_tokens": 5945152} +{"current_steps": 8820, "total_steps": 204665, "loss": 0.1402, "lr": 8.617774954805296e-07, "epoch": 0.21547406737839886, "percentage": 4.31, "elapsed_time": "0:10:14", "remaining_time": "3:47:20", "throughput": 9684.4, "total_tokens": 5948992} +{"current_steps": 8825, "total_steps": 204665, "loss": 0.1242, "lr": 8.622660868715494e-07, "epoch": 0.21559621821024602, "percentage": 4.31, "elapsed_time": "0:10:14", "remaining_time": "3:47:21", "throughput": 9682.97, "total_tokens": 5952256} +{"current_steps": 8830, "total_steps": 204665, "loss": 0.1165, "lr": 8.627546782625689e-07, "epoch": 0.21571836904209318, "percentage": 4.31, "elapsed_time": "0:10:15", "remaining_time": "3:47:21", "throughput": 9682.99, "total_tokens": 5955648} +{"current_steps": 8835, "total_steps": 204665, "loss": 0.0812, "lr": 8.632432696535886e-07, "epoch": 0.21584051987394035, "percentage": 4.32, "elapsed_time": "0:10:15", "remaining_time": "3:47:20", "throughput": 9683.02, "total_tokens": 5959040} +{"current_steps": 8840, "total_steps": 204665, "loss": 0.1652, "lr": 8.637318610446084e-07, "epoch": 0.2159626707057875, "percentage": 4.32, "elapsed_time": "0:10:15", "remaining_time": "3:47:20", "throughput": 9683.25, "total_tokens": 5962560} +{"current_steps": 8845, "total_steps": 204665, "loss": 0.1325, "lr": 8.64220452435628e-07, "epoch": 0.21608482153763467, "percentage": 4.32, "elapsed_time": "0:10:16", "remaining_time": "3:47:20", "throughput": 9683.29, "total_tokens": 5965952} +{"current_steps": 8850, "total_steps": 204665, "loss": 0.0561, "lr": 8.647090438266478e-07, "epoch": 0.21620697236948183, "percentage": 4.32, "elapsed_time": "0:10:16", "remaining_time": "3:47:19", "throughput": 9683.27, "total_tokens": 5969280} +{"current_steps": 8855, "total_steps": 204665, "loss": 0.1161, "lr": 8.651976352176674e-07, "epoch": 0.216329123201329, "percentage": 4.33, "elapsed_time": "0:10:16", "remaining_time": "3:47:19", "throughput": 9683.09, "total_tokens": 5972480} +{"current_steps": 8860, "total_steps": 204665, "loss": 0.1002, "lr": 8.65686226608687e-07, "epoch": 0.21645127403317616, "percentage": 4.33, "elapsed_time": "0:10:17", "remaining_time": "3:47:18", "throughput": 9683.19, "total_tokens": 5975872} +{"current_steps": 8865, "total_steps": 204665, "loss": 0.1086, "lr": 8.661748179997068e-07, "epoch": 0.21657342486502332, "percentage": 4.33, "elapsed_time": "0:10:17", "remaining_time": "3:47:18", "throughput": 9682.8, "total_tokens": 5978944} +{"current_steps": 8870, "total_steps": 204665, "loss": 0.135, "lr": 8.666634093907265e-07, "epoch": 0.2166955756968705, "percentage": 4.33, "elapsed_time": "0:10:17", "remaining_time": "3:47:17", "throughput": 9683.29, "total_tokens": 5982656} +{"current_steps": 8875, "total_steps": 204665, "loss": 0.0856, "lr": 8.671520007817462e-07, "epoch": 0.21681772652871767, "percentage": 4.34, "elapsed_time": "0:10:18", "remaining_time": "3:47:17", "throughput": 9682.83, "total_tokens": 5985664} +{"current_steps": 8880, "total_steps": 204665, "loss": 0.1775, "lr": 8.676405921727659e-07, "epoch": 0.21693987736056483, "percentage": 4.34, "elapsed_time": "0:10:18", "remaining_time": "3:47:17", "throughput": 9682.92, "total_tokens": 5989120} +{"current_steps": 8885, "total_steps": 204665, "loss": 0.0496, "lr": 8.681291835637856e-07, "epoch": 0.217062028192412, "percentage": 4.34, "elapsed_time": "0:10:18", "remaining_time": "3:47:16", "throughput": 9683.33, "total_tokens": 5992768} +{"current_steps": 8890, "total_steps": 204665, "loss": 0.2056, "lr": 8.686177749548052e-07, "epoch": 0.21718417902425916, "percentage": 4.34, "elapsed_time": "0:10:19", "remaining_time": "3:47:16", "throughput": 9683.56, "total_tokens": 5996288} +{"current_steps": 8895, "total_steps": 204665, "loss": 0.1601, "lr": 8.691063663458249e-07, "epoch": 0.21730632985610632, "percentage": 4.35, "elapsed_time": "0:10:19", "remaining_time": "3:47:16", "throughput": 9683.75, "total_tokens": 5999808} +{"current_steps": 8900, "total_steps": 204665, "loss": 0.1511, "lr": 8.695949577368447e-07, "epoch": 0.21742848068795348, "percentage": 4.35, "elapsed_time": "0:10:19", "remaining_time": "3:47:15", "throughput": 9683.46, "total_tokens": 6002944} +{"current_steps": 8905, "total_steps": 204665, "loss": 0.1434, "lr": 8.700835491278643e-07, "epoch": 0.21755063151980064, "percentage": 4.35, "elapsed_time": "0:10:20", "remaining_time": "3:47:15", "throughput": 9683.17, "total_tokens": 6006080} +{"current_steps": 8910, "total_steps": 204665, "loss": 0.0495, "lr": 8.70572140518884e-07, "epoch": 0.2176727823516478, "percentage": 4.35, "elapsed_time": "0:10:20", "remaining_time": "3:47:14", "throughput": 9683.4, "total_tokens": 6009600} +{"current_steps": 8915, "total_steps": 204665, "loss": 0.0918, "lr": 8.710607319099037e-07, "epoch": 0.217794933183495, "percentage": 4.36, "elapsed_time": "0:10:20", "remaining_time": "3:47:14", "throughput": 9683.12, "total_tokens": 6012736} +{"current_steps": 8920, "total_steps": 204665, "loss": 0.0888, "lr": 8.715493233009233e-07, "epoch": 0.21791708401534216, "percentage": 4.36, "elapsed_time": "0:10:21", "remaining_time": "3:47:14", "throughput": 9683.05, "total_tokens": 6016064} +{"current_steps": 8925, "total_steps": 204665, "loss": 0.1266, "lr": 8.720379146919431e-07, "epoch": 0.21803923484718932, "percentage": 4.36, "elapsed_time": "0:10:21", "remaining_time": "3:47:13", "throughput": 9682.93, "total_tokens": 6019328} +{"current_steps": 8930, "total_steps": 204665, "loss": 0.1356, "lr": 8.725265060829628e-07, "epoch": 0.21816138567903648, "percentage": 4.36, "elapsed_time": "0:10:22", "remaining_time": "3:47:13", "throughput": 9683.33, "total_tokens": 6023040} +{"current_steps": 8935, "total_steps": 204665, "loss": 0.1061, "lr": 8.730150974739825e-07, "epoch": 0.21828353651088364, "percentage": 4.37, "elapsed_time": "0:10:22", "remaining_time": "3:47:13", "throughput": 9683.42, "total_tokens": 6026496} +{"current_steps": 8940, "total_steps": 204665, "loss": 0.0705, "lr": 8.735036888650021e-07, "epoch": 0.2184056873427308, "percentage": 4.37, "elapsed_time": "0:10:22", "remaining_time": "3:47:12", "throughput": 9683.7, "total_tokens": 6030080} +{"current_steps": 8945, "total_steps": 204665, "loss": 0.105, "lr": 8.739922802560219e-07, "epoch": 0.21852783817457797, "percentage": 4.37, "elapsed_time": "0:10:23", "remaining_time": "3:47:12", "throughput": 9683.67, "total_tokens": 6033408} +{"current_steps": 8950, "total_steps": 204665, "loss": 0.156, "lr": 8.744808716470415e-07, "epoch": 0.21864998900642513, "percentage": 4.37, "elapsed_time": "0:10:23", "remaining_time": "3:47:12", "throughput": 9684.12, "total_tokens": 6037120} +{"current_steps": 8955, "total_steps": 204665, "loss": 0.0852, "lr": 8.749694630380612e-07, "epoch": 0.2187721398382723, "percentage": 4.38, "elapsed_time": "0:10:23", "remaining_time": "3:47:12", "throughput": 9684.3, "total_tokens": 6040640} +{"current_steps": 8960, "total_steps": 204665, "loss": 0.1522, "lr": 8.75458054429081e-07, "epoch": 0.21889429067011945, "percentage": 4.38, "elapsed_time": "0:10:24", "remaining_time": "3:47:11", "throughput": 9684.41, "total_tokens": 6044096} +{"current_steps": 8965, "total_steps": 204665, "loss": 0.1482, "lr": 8.759466458201005e-07, "epoch": 0.21901644150196664, "percentage": 4.38, "elapsed_time": "0:10:24", "remaining_time": "3:47:11", "throughput": 9684.44, "total_tokens": 6047488} +{"current_steps": 8970, "total_steps": 204665, "loss": 0.1361, "lr": 8.764352372111203e-07, "epoch": 0.2191385923338138, "percentage": 4.38, "elapsed_time": "0:10:24", "remaining_time": "3:47:11", "throughput": 9684.69, "total_tokens": 6051072} +{"current_steps": 8975, "total_steps": 204665, "loss": 0.1871, "lr": 8.7692382860214e-07, "epoch": 0.21926074316566097, "percentage": 4.39, "elapsed_time": "0:10:25", "remaining_time": "3:47:10", "throughput": 9684.29, "total_tokens": 6054080} +{"current_steps": 8980, "total_steps": 204665, "loss": 0.0459, "lr": 8.774124199931597e-07, "epoch": 0.21938289399750813, "percentage": 4.39, "elapsed_time": "0:10:25", "remaining_time": "3:47:10", "throughput": 9684.79, "total_tokens": 6057856} +{"current_steps": 8985, "total_steps": 204665, "loss": 0.1141, "lr": 8.779010113841794e-07, "epoch": 0.2195050448293553, "percentage": 4.39, "elapsed_time": "0:10:25", "remaining_time": "3:47:10", "throughput": 9684.66, "total_tokens": 6061120} +{"current_steps": 8990, "total_steps": 204665, "loss": 0.142, "lr": 8.78389602775199e-07, "epoch": 0.21962719566120245, "percentage": 4.39, "elapsed_time": "0:10:26", "remaining_time": "3:47:09", "throughput": 9684.69, "total_tokens": 6064512} +{"current_steps": 8995, "total_steps": 204665, "loss": 0.0917, "lr": 8.788781941662187e-07, "epoch": 0.2197493464930496, "percentage": 4.39, "elapsed_time": "0:10:26", "remaining_time": "3:47:09", "throughput": 9684.72, "total_tokens": 6067904} +{"current_steps": 9000, "total_steps": 204665, "loss": 0.0879, "lr": 8.793667855572384e-07, "epoch": 0.21987149732489678, "percentage": 4.4, "elapsed_time": "0:10:26", "remaining_time": "3:47:08", "throughput": 9684.44, "total_tokens": 6071040} +{"current_steps": 9005, "total_steps": 204665, "loss": 0.0989, "lr": 8.798553769482582e-07, "epoch": 0.21999364815674394, "percentage": 4.4, "elapsed_time": "0:10:27", "remaining_time": "3:47:08", "throughput": 9684.24, "total_tokens": 6074240} +{"current_steps": 9010, "total_steps": 204665, "loss": 0.2064, "lr": 8.803439683392778e-07, "epoch": 0.2201157989885911, "percentage": 4.4, "elapsed_time": "0:10:27", "remaining_time": "3:47:08", "throughput": 9684.36, "total_tokens": 6077696} +{"current_steps": 9015, "total_steps": 204665, "loss": 0.0578, "lr": 8.808325597302975e-07, "epoch": 0.2202379498204383, "percentage": 4.4, "elapsed_time": "0:10:27", "remaining_time": "3:47:07", "throughput": 9684.24, "total_tokens": 6080960} +{"current_steps": 9020, "total_steps": 204665, "loss": 0.0985, "lr": 8.813211511213172e-07, "epoch": 0.22036010065228545, "percentage": 4.41, "elapsed_time": "0:10:28", "remaining_time": "3:47:07", "throughput": 9684.73, "total_tokens": 6084736} +{"current_steps": 9025, "total_steps": 204665, "loss": 0.2164, "lr": 8.818097425123368e-07, "epoch": 0.2204822514841326, "percentage": 4.41, "elapsed_time": "0:10:28", "remaining_time": "3:47:07", "throughput": 9684.45, "total_tokens": 6087872} +{"current_steps": 9030, "total_steps": 204665, "loss": 0.0508, "lr": 8.822983339033566e-07, "epoch": 0.22060440231597978, "percentage": 4.41, "elapsed_time": "0:10:28", "remaining_time": "3:47:06", "throughput": 9684.49, "total_tokens": 6091264} +{"current_steps": 9035, "total_steps": 204665, "loss": 0.0883, "lr": 8.827869252943763e-07, "epoch": 0.22072655314782694, "percentage": 4.41, "elapsed_time": "0:10:29", "remaining_time": "3:47:06", "throughput": 9684.7, "total_tokens": 6094784} +{"current_steps": 9040, "total_steps": 204665, "loss": 0.132, "lr": 8.83275516685396e-07, "epoch": 0.2208487039796741, "percentage": 4.42, "elapsed_time": "0:10:29", "remaining_time": "3:47:06", "throughput": 9685.23, "total_tokens": 6098560} +{"current_steps": 9045, "total_steps": 204665, "loss": 0.1693, "lr": 8.837641080764157e-07, "epoch": 0.22097085481152126, "percentage": 4.42, "elapsed_time": "0:10:30", "remaining_time": "3:47:05", "throughput": 9685.26, "total_tokens": 6101952} +{"current_steps": 9050, "total_steps": 204665, "loss": 0.1167, "lr": 8.842526994674353e-07, "epoch": 0.22109300564336842, "percentage": 4.42, "elapsed_time": "0:10:30", "remaining_time": "3:47:05", "throughput": 9685.22, "total_tokens": 6105280} +{"current_steps": 9055, "total_steps": 204665, "loss": 0.1305, "lr": 8.84741290858455e-07, "epoch": 0.22121515647521559, "percentage": 4.42, "elapsed_time": "0:10:30", "remaining_time": "3:47:04", "throughput": 9684.74, "total_tokens": 6108224} +{"current_steps": 9060, "total_steps": 204665, "loss": 0.1659, "lr": 8.852298822494747e-07, "epoch": 0.22133730730706275, "percentage": 4.43, "elapsed_time": "0:10:31", "remaining_time": "3:47:04", "throughput": 9684.61, "total_tokens": 6111488} +{"current_steps": 9065, "total_steps": 204665, "loss": 0.1377, "lr": 8.857184736404945e-07, "epoch": 0.22145945813890994, "percentage": 4.43, "elapsed_time": "0:10:31", "remaining_time": "3:47:03", "throughput": 9684.48, "total_tokens": 6114752} +{"current_steps": 9070, "total_steps": 204665, "loss": 0.13, "lr": 8.862070650315141e-07, "epoch": 0.2215816089707571, "percentage": 4.43, "elapsed_time": "0:10:31", "remaining_time": "3:47:03", "throughput": 9684.14, "total_tokens": 6117824} +{"current_steps": 9075, "total_steps": 204665, "loss": 0.1427, "lr": 8.866956564225337e-07, "epoch": 0.22170375980260426, "percentage": 4.43, "elapsed_time": "0:10:32", "remaining_time": "3:47:03", "throughput": 9684.16, "total_tokens": 6121216} +{"current_steps": 9080, "total_steps": 204665, "loss": 0.2194, "lr": 8.871842478135535e-07, "epoch": 0.22182591063445142, "percentage": 4.44, "elapsed_time": "0:10:32", "remaining_time": "3:47:02", "throughput": 9684.38, "total_tokens": 6124736} +{"current_steps": 9085, "total_steps": 204665, "loss": 0.1909, "lr": 8.876728392045731e-07, "epoch": 0.22194806146629859, "percentage": 4.44, "elapsed_time": "0:10:32", "remaining_time": "3:47:02", "throughput": 9684.46, "total_tokens": 6128192} +{"current_steps": 9090, "total_steps": 204665, "loss": 0.0626, "lr": 8.881614305955929e-07, "epoch": 0.22207021229814575, "percentage": 4.44, "elapsed_time": "0:10:33", "remaining_time": "3:47:02", "throughput": 9684.33, "total_tokens": 6131456} +{"current_steps": 9095, "total_steps": 204665, "loss": 0.1528, "lr": 8.886500219866126e-07, "epoch": 0.2221923631299929, "percentage": 4.44, "elapsed_time": "0:10:33", "remaining_time": "3:47:01", "throughput": 9684.72, "total_tokens": 6135104} +{"current_steps": 9100, "total_steps": 204665, "loss": 0.0779, "lr": 8.891386133776323e-07, "epoch": 0.22231451396184007, "percentage": 4.45, "elapsed_time": "0:10:33", "remaining_time": "3:47:01", "throughput": 9684.8, "total_tokens": 6138560} +{"current_steps": 9105, "total_steps": 204665, "loss": 0.168, "lr": 8.896272047686519e-07, "epoch": 0.22243666479368723, "percentage": 4.45, "elapsed_time": "0:10:34", "remaining_time": "3:47:01", "throughput": 9684.65, "total_tokens": 6141760} +{"current_steps": 9110, "total_steps": 204665, "loss": 0.1326, "lr": 8.901157961596716e-07, "epoch": 0.22255881562553442, "percentage": 4.45, "elapsed_time": "0:10:34", "remaining_time": "3:47:00", "throughput": 9684.82, "total_tokens": 6145280} +{"current_steps": 9115, "total_steps": 204665, "loss": 0.2119, "lr": 8.906043875506913e-07, "epoch": 0.22268096645738159, "percentage": 4.45, "elapsed_time": "0:10:34", "remaining_time": "3:47:00", "throughput": 9684.79, "total_tokens": 6148608} +{"current_steps": 9120, "total_steps": 204665, "loss": 0.1082, "lr": 8.91092978941711e-07, "epoch": 0.22280311728922875, "percentage": 4.46, "elapsed_time": "0:10:35", "remaining_time": "3:47:00", "throughput": 9685.01, "total_tokens": 6152128} +{"current_steps": 9125, "total_steps": 204665, "loss": 0.1069, "lr": 8.915815703327308e-07, "epoch": 0.2229252681210759, "percentage": 4.46, "elapsed_time": "0:10:35", "remaining_time": "3:46:59", "throughput": 9684.93, "total_tokens": 6155456} +{"current_steps": 9130, "total_steps": 204665, "loss": 0.0835, "lr": 8.920701617237503e-07, "epoch": 0.22304741895292307, "percentage": 4.46, "elapsed_time": "0:10:35", "remaining_time": "3:46:59", "throughput": 9685.6, "total_tokens": 6159360} +{"current_steps": 9135, "total_steps": 204665, "loss": 0.101, "lr": 8.9255875311477e-07, "epoch": 0.22316956978477023, "percentage": 4.46, "elapsed_time": "0:10:36", "remaining_time": "3:46:59", "throughput": 9685.85, "total_tokens": 6162944} +{"current_steps": 9140, "total_steps": 204665, "loss": 0.12, "lr": 8.930473445057898e-07, "epoch": 0.2232917206166174, "percentage": 4.47, "elapsed_time": "0:10:36", "remaining_time": "3:46:58", "throughput": 9685.41, "total_tokens": 6165952} +{"current_steps": 9145, "total_steps": 204665, "loss": 0.1024, "lr": 8.935359358968094e-07, "epoch": 0.22341387144846456, "percentage": 4.47, "elapsed_time": "0:10:36", "remaining_time": "3:46:58", "throughput": 9685.74, "total_tokens": 6169600} +{"current_steps": 9150, "total_steps": 204665, "loss": 0.0809, "lr": 8.940245272878292e-07, "epoch": 0.22353602228031172, "percentage": 4.47, "elapsed_time": "0:10:37", "remaining_time": "3:46:58", "throughput": 9685.76, "total_tokens": 6172992} +{"current_steps": 9155, "total_steps": 204665, "loss": 0.0282, "lr": 8.945131186788489e-07, "epoch": 0.22365817311215888, "percentage": 4.47, "elapsed_time": "0:10:37", "remaining_time": "3:46:57", "throughput": 9685.45, "total_tokens": 6176064} +{"current_steps": 9160, "total_steps": 204665, "loss": 0.0747, "lr": 8.950017100698685e-07, "epoch": 0.22378032394400607, "percentage": 4.48, "elapsed_time": "0:10:38", "remaining_time": "3:46:57", "throughput": 9685.72, "total_tokens": 6179648} +{"current_steps": 9165, "total_steps": 204665, "loss": 0.0868, "lr": 8.954903014608882e-07, "epoch": 0.22390247477585323, "percentage": 4.48, "elapsed_time": "0:10:38", "remaining_time": "3:46:57", "throughput": 9686.02, "total_tokens": 6183360} +{"current_steps": 9170, "total_steps": 204665, "loss": 0.1656, "lr": 8.959788928519079e-07, "epoch": 0.2240246256077004, "percentage": 4.48, "elapsed_time": "0:10:38", "remaining_time": "3:46:56", "throughput": 9685.56, "total_tokens": 6186368} +{"current_steps": 9175, "total_steps": 204665, "loss": 0.1054, "lr": 8.964674842429276e-07, "epoch": 0.22414677643954756, "percentage": 4.48, "elapsed_time": "0:10:39", "remaining_time": "3:46:56", "throughput": 9686.08, "total_tokens": 6190208} +{"current_steps": 9180, "total_steps": 204665, "loss": 0.1436, "lr": 8.969560756339473e-07, "epoch": 0.22426892727139472, "percentage": 4.49, "elapsed_time": "0:10:39", "remaining_time": "3:46:56", "throughput": 9686.24, "total_tokens": 6193728} +{"current_steps": 9185, "total_steps": 204665, "loss": 0.1397, "lr": 8.97444667024967e-07, "epoch": 0.22439107810324188, "percentage": 4.49, "elapsed_time": "0:10:39", "remaining_time": "3:46:56", "throughput": 9685.95, "total_tokens": 6196864} +{"current_steps": 9190, "total_steps": 204665, "loss": 0.0629, "lr": 8.979332584159866e-07, "epoch": 0.22451322893508904, "percentage": 4.49, "elapsed_time": "0:10:40", "remaining_time": "3:46:55", "throughput": 9686.13, "total_tokens": 6200384} +{"current_steps": 9195, "total_steps": 204665, "loss": 0.1048, "lr": 8.984218498070063e-07, "epoch": 0.2246353797669362, "percentage": 4.49, "elapsed_time": "0:10:40", "remaining_time": "3:46:55", "throughput": 9686.45, "total_tokens": 6204032} +{"current_steps": 9200, "total_steps": 204665, "loss": 0.0767, "lr": 8.989104411980261e-07, "epoch": 0.22475753059878337, "percentage": 4.5, "elapsed_time": "0:10:40", "remaining_time": "3:46:54", "throughput": 9685.8, "total_tokens": 6206848} +{"current_steps": 9205, "total_steps": 204665, "loss": 0.083, "lr": 8.993990325890457e-07, "epoch": 0.22487968143063053, "percentage": 4.5, "elapsed_time": "0:10:41", "remaining_time": "3:46:54", "throughput": 9686.17, "total_tokens": 6210496} +{"current_steps": 9210, "total_steps": 204665, "loss": 0.1987, "lr": 8.998876239800655e-07, "epoch": 0.22500183226247772, "percentage": 4.5, "elapsed_time": "0:10:41", "remaining_time": "3:46:54", "throughput": 9685.89, "total_tokens": 6213632} +{"current_steps": 9215, "total_steps": 204665, "loss": 0.1313, "lr": 9.003762153710851e-07, "epoch": 0.22512398309432488, "percentage": 4.5, "elapsed_time": "0:10:41", "remaining_time": "3:46:53", "throughput": 9685.98, "total_tokens": 6217088} +{"current_steps": 9220, "total_steps": 204665, "loss": 0.0453, "lr": 9.008648067621048e-07, "epoch": 0.22524613392617204, "percentage": 4.5, "elapsed_time": "0:10:42", "remaining_time": "3:46:53", "throughput": 9685.72, "total_tokens": 6220224} +{"current_steps": 9225, "total_steps": 204665, "loss": 0.1343, "lr": 9.013533981531245e-07, "epoch": 0.2253682847580192, "percentage": 4.51, "elapsed_time": "0:10:42", "remaining_time": "3:46:53", "throughput": 9685.98, "total_tokens": 6223808} +{"current_steps": 9230, "total_steps": 204665, "loss": 0.1499, "lr": 9.018419895441442e-07, "epoch": 0.22549043558986637, "percentage": 4.51, "elapsed_time": "0:10:42", "remaining_time": "3:46:53", "throughput": 9686.87, "total_tokens": 6227904} +{"current_steps": 9235, "total_steps": 204665, "loss": 0.1896, "lr": 9.023305809351639e-07, "epoch": 0.22561258642171353, "percentage": 4.51, "elapsed_time": "0:10:43", "remaining_time": "3:46:52", "throughput": 9686.82, "total_tokens": 6231232} +{"current_steps": 9240, "total_steps": 204665, "loss": 0.1545, "lr": 9.028191723261835e-07, "epoch": 0.2257347372535607, "percentage": 4.51, "elapsed_time": "0:10:43", "remaining_time": "3:46:52", "throughput": 9687.24, "total_tokens": 6234944} +{"current_steps": 9245, "total_steps": 204665, "loss": 0.0667, "lr": 9.033077637172033e-07, "epoch": 0.22585688808540785, "percentage": 4.52, "elapsed_time": "0:10:43", "remaining_time": "3:46:51", "throughput": 9686.5, "total_tokens": 6237696} +{"current_steps": 9250, "total_steps": 204665, "loss": 0.0615, "lr": 9.037963551082229e-07, "epoch": 0.22597903891725502, "percentage": 4.52, "elapsed_time": "0:10:44", "remaining_time": "3:46:51", "throughput": 9686.3, "total_tokens": 6240896} +{"current_steps": 9255, "total_steps": 204665, "loss": 0.0874, "lr": 9.042849464992427e-07, "epoch": 0.2261011897491022, "percentage": 4.52, "elapsed_time": "0:10:44", "remaining_time": "3:46:51", "throughput": 9686.16, "total_tokens": 6244160} +{"current_steps": 9260, "total_steps": 204665, "loss": 0.1483, "lr": 9.047735378902624e-07, "epoch": 0.22622334058094937, "percentage": 4.52, "elapsed_time": "0:10:44", "remaining_time": "3:46:50", "throughput": 9685.61, "total_tokens": 6247040} +{"current_steps": 9265, "total_steps": 204665, "loss": 0.1543, "lr": 9.05262129281282e-07, "epoch": 0.22634549141279653, "percentage": 4.53, "elapsed_time": "0:10:45", "remaining_time": "3:46:50", "throughput": 9686.01, "total_tokens": 6250752} +{"current_steps": 9270, "total_steps": 204665, "loss": 0.1016, "lr": 9.057507206723017e-07, "epoch": 0.2264676422446437, "percentage": 4.53, "elapsed_time": "0:10:45", "remaining_time": "3:46:49", "throughput": 9685.65, "total_tokens": 6253824} +{"current_steps": 9275, "total_steps": 204665, "loss": 0.1943, "lr": 9.062393120633214e-07, "epoch": 0.22658979307649085, "percentage": 4.53, "elapsed_time": "0:10:46", "remaining_time": "3:46:49", "throughput": 9685.81, "total_tokens": 6257344} +{"current_steps": 9280, "total_steps": 204665, "loss": 0.0592, "lr": 9.067279034543411e-07, "epoch": 0.22671194390833802, "percentage": 4.53, "elapsed_time": "0:10:46", "remaining_time": "3:46:49", "throughput": 9685.77, "total_tokens": 6260672} +{"current_steps": 9285, "total_steps": 204665, "loss": 0.1493, "lr": 9.072164948453608e-07, "epoch": 0.22683409474018518, "percentage": 4.54, "elapsed_time": "0:10:46", "remaining_time": "3:46:48", "throughput": 9685.57, "total_tokens": 6263872} +{"current_steps": 9290, "total_steps": 204665, "loss": 0.1311, "lr": 9.077050862363805e-07, "epoch": 0.22695624557203234, "percentage": 4.54, "elapsed_time": "0:10:47", "remaining_time": "3:46:48", "throughput": 9685.19, "total_tokens": 6266944} +{"current_steps": 9295, "total_steps": 204665, "loss": 0.1872, "lr": 9.081936776274001e-07, "epoch": 0.2270783964038795, "percentage": 4.54, "elapsed_time": "0:10:47", "remaining_time": "3:46:47", "throughput": 9685.0, "total_tokens": 6270144} +{"current_steps": 9300, "total_steps": 204665, "loss": 0.1492, "lr": 9.086822690184198e-07, "epoch": 0.22720054723572666, "percentage": 4.54, "elapsed_time": "0:10:47", "remaining_time": "3:46:47", "throughput": 9685.04, "total_tokens": 6273536} +{"current_steps": 9305, "total_steps": 204665, "loss": 0.0562, "lr": 9.091708604094396e-07, "epoch": 0.22732269806757385, "percentage": 4.55, "elapsed_time": "0:10:48", "remaining_time": "3:46:46", "throughput": 9684.69, "total_tokens": 6276608} +{"current_steps": 9310, "total_steps": 204665, "loss": 0.2263, "lr": 9.096594518004592e-07, "epoch": 0.22744484889942101, "percentage": 4.55, "elapsed_time": "0:10:48", "remaining_time": "3:46:46", "throughput": 9684.85, "total_tokens": 6280128} +{"current_steps": 9315, "total_steps": 204665, "loss": 0.1597, "lr": 9.10148043191479e-07, "epoch": 0.22756699973126818, "percentage": 4.55, "elapsed_time": "0:10:48", "remaining_time": "3:46:46", "throughput": 9685.54, "total_tokens": 6284032} +{"current_steps": 9320, "total_steps": 204665, "loss": 0.1385, "lr": 9.106366345824987e-07, "epoch": 0.22768915056311534, "percentage": 4.55, "elapsed_time": "0:10:49", "remaining_time": "3:46:46", "throughput": 9685.7, "total_tokens": 6287552} +{"current_steps": 9325, "total_steps": 204665, "loss": 0.1459, "lr": 9.111252259735182e-07, "epoch": 0.2278113013949625, "percentage": 4.56, "elapsed_time": "0:10:49", "remaining_time": "3:46:45", "throughput": 9686.03, "total_tokens": 6291200} +{"current_steps": 9330, "total_steps": 204665, "loss": 0.1791, "lr": 9.11613817364538e-07, "epoch": 0.22793345222680966, "percentage": 4.56, "elapsed_time": "0:10:49", "remaining_time": "3:46:45", "throughput": 9685.72, "total_tokens": 6294336} +{"current_steps": 9335, "total_steps": 204665, "loss": 0.1285, "lr": 9.121024087555577e-07, "epoch": 0.22805560305865683, "percentage": 4.56, "elapsed_time": "0:10:50", "remaining_time": "3:46:45", "throughput": 9685.89, "total_tokens": 6297856} +{"current_steps": 9340, "total_steps": 204665, "loss": 0.1282, "lr": 9.125910001465774e-07, "epoch": 0.228177753890504, "percentage": 4.56, "elapsed_time": "0:10:50", "remaining_time": "3:46:45", "throughput": 9686.14, "total_tokens": 6301440} +{"current_steps": 9345, "total_steps": 204665, "loss": 0.119, "lr": 9.130795915375971e-07, "epoch": 0.22829990472235115, "percentage": 4.57, "elapsed_time": "0:10:50", "remaining_time": "3:46:44", "throughput": 9686.43, "total_tokens": 6305024} +{"current_steps": 9350, "total_steps": 204665, "loss": 0.0722, "lr": 9.135681829286167e-07, "epoch": 0.2284220555541983, "percentage": 4.57, "elapsed_time": "0:10:51", "remaining_time": "3:46:44", "throughput": 9686.24, "total_tokens": 6308224} +{"current_steps": 9355, "total_steps": 204665, "loss": 0.1011, "lr": 9.140567743196364e-07, "epoch": 0.2285442063860455, "percentage": 4.57, "elapsed_time": "0:10:51", "remaining_time": "3:46:44", "throughput": 9686.5, "total_tokens": 6311808} +{"current_steps": 9360, "total_steps": 204665, "loss": 0.1862, "lr": 9.145453657106561e-07, "epoch": 0.22866635721789266, "percentage": 4.57, "elapsed_time": "0:10:51", "remaining_time": "3:46:43", "throughput": 9686.5, "total_tokens": 6315200} +{"current_steps": 9365, "total_steps": 204665, "loss": 0.0647, "lr": 9.150339571016759e-07, "epoch": 0.22878850804973982, "percentage": 4.58, "elapsed_time": "0:10:52", "remaining_time": "3:46:43", "throughput": 9686.54, "total_tokens": 6318592} +{"current_steps": 9370, "total_steps": 204665, "loss": 0.0721, "lr": 9.155225484926955e-07, "epoch": 0.228910658881587, "percentage": 4.58, "elapsed_time": "0:10:52", "remaining_time": "3:46:42", "throughput": 9686.09, "total_tokens": 6321600} +{"current_steps": 9375, "total_steps": 204665, "loss": 0.064, "lr": 9.160111398837153e-07, "epoch": 0.22903280971343415, "percentage": 4.58, "elapsed_time": "0:10:52", "remaining_time": "3:46:42", "throughput": 9685.66, "total_tokens": 6324608} +{"current_steps": 9380, "total_steps": 204665, "loss": 0.1505, "lr": 9.164997312747349e-07, "epoch": 0.2291549605452813, "percentage": 4.58, "elapsed_time": "0:10:53", "remaining_time": "3:46:41", "throughput": 9685.62, "total_tokens": 6327936} +{"current_steps": 9385, "total_steps": 204665, "loss": 0.3507, "lr": 9.169883226657545e-07, "epoch": 0.22927711137712847, "percentage": 4.59, "elapsed_time": "0:10:53", "remaining_time": "3:46:41", "throughput": 9684.98, "total_tokens": 6330752} +{"current_steps": 9390, "total_steps": 204665, "loss": 0.1794, "lr": 9.174769140567743e-07, "epoch": 0.22939926220897564, "percentage": 4.59, "elapsed_time": "0:10:54", "remaining_time": "3:46:41", "throughput": 9685.24, "total_tokens": 6334336} +{"current_steps": 9395, "total_steps": 204665, "loss": 0.1112, "lr": 9.17965505447794e-07, "epoch": 0.2295214130408228, "percentage": 4.59, "elapsed_time": "0:10:54", "remaining_time": "3:46:40", "throughput": 9685.25, "total_tokens": 6337728} +{"current_steps": 9400, "total_steps": 204665, "loss": 0.0702, "lr": 9.184540968388137e-07, "epoch": 0.22964356387266996, "percentage": 4.59, "elapsed_time": "0:10:54", "remaining_time": "3:46:40", "throughput": 9685.11, "total_tokens": 6340992} +{"current_steps": 9405, "total_steps": 204665, "loss": 0.068, "lr": 9.189426882298333e-07, "epoch": 0.22976571470451715, "percentage": 4.6, "elapsed_time": "0:10:55", "remaining_time": "3:46:39", "throughput": 9684.72, "total_tokens": 6344000} +{"current_steps": 9410, "total_steps": 204665, "loss": 0.1371, "lr": 9.19431279620853e-07, "epoch": 0.2298878655363643, "percentage": 4.6, "elapsed_time": "0:10:55", "remaining_time": "3:46:39", "throughput": 9684.5, "total_tokens": 6347200} +{"current_steps": 9415, "total_steps": 204665, "loss": 0.1161, "lr": 9.199198710118727e-07, "epoch": 0.23001001636821147, "percentage": 4.6, "elapsed_time": "0:10:55", "remaining_time": "3:46:39", "throughput": 9684.84, "total_tokens": 6350848} +{"current_steps": 9420, "total_steps": 204665, "loss": 0.1124, "lr": 9.204084624028924e-07, "epoch": 0.23013216720005863, "percentage": 4.6, "elapsed_time": "0:10:56", "remaining_time": "3:46:38", "throughput": 9685.19, "total_tokens": 6354496} +{"current_steps": 9425, "total_steps": 204665, "loss": 0.054, "lr": 9.208970537939122e-07, "epoch": 0.2302543180319058, "percentage": 4.61, "elapsed_time": "0:10:56", "remaining_time": "3:46:38", "throughput": 9685.43, "total_tokens": 6358016} +{"current_steps": 9430, "total_steps": 204665, "loss": 0.1934, "lr": 9.213856451849317e-07, "epoch": 0.23037646886375296, "percentage": 4.61, "elapsed_time": "0:10:56", "remaining_time": "3:46:38", "throughput": 9685.59, "total_tokens": 6361536} +{"current_steps": 9435, "total_steps": 204665, "loss": 0.1495, "lr": 9.218742365759515e-07, "epoch": 0.23049861969560012, "percentage": 4.61, "elapsed_time": "0:10:57", "remaining_time": "3:46:37", "throughput": 9685.19, "total_tokens": 6364544} +{"current_steps": 9440, "total_steps": 204665, "loss": 0.1165, "lr": 9.223628279669712e-07, "epoch": 0.23062077052744728, "percentage": 4.61, "elapsed_time": "0:10:57", "remaining_time": "3:46:37", "throughput": 9686.28, "total_tokens": 6368832} +{"current_steps": 9445, "total_steps": 204665, "loss": 0.0733, "lr": 9.228514193579908e-07, "epoch": 0.23074292135929445, "percentage": 4.61, "elapsed_time": "0:10:57", "remaining_time": "3:46:37", "throughput": 9686.25, "total_tokens": 6372160} +{"current_steps": 9450, "total_steps": 204665, "loss": 0.136, "lr": 9.233400107490106e-07, "epoch": 0.23086507219114163, "percentage": 4.62, "elapsed_time": "0:10:58", "remaining_time": "3:46:36", "throughput": 9686.28, "total_tokens": 6375552} +{"current_steps": 9455, "total_steps": 204665, "loss": 0.0873, "lr": 9.238286021400303e-07, "epoch": 0.2309872230229888, "percentage": 4.62, "elapsed_time": "0:10:58", "remaining_time": "3:46:36", "throughput": 9686.29, "total_tokens": 6378944} +{"current_steps": 9460, "total_steps": 204665, "loss": 0.1288, "lr": 9.243171935310499e-07, "epoch": 0.23110937385483596, "percentage": 4.62, "elapsed_time": "0:10:58", "remaining_time": "3:46:36", "throughput": 9685.8, "total_tokens": 6381888} +{"current_steps": 9465, "total_steps": 204665, "loss": 0.1454, "lr": 9.248057849220696e-07, "epoch": 0.23123152468668312, "percentage": 4.62, "elapsed_time": "0:10:59", "remaining_time": "3:46:35", "throughput": 9685.61, "total_tokens": 6385088} +{"current_steps": 9470, "total_steps": 204665, "loss": 0.0999, "lr": 9.252943763130894e-07, "epoch": 0.23135367551853028, "percentage": 4.63, "elapsed_time": "0:10:59", "remaining_time": "3:46:35", "throughput": 9685.31, "total_tokens": 6388224} +{"current_steps": 9475, "total_steps": 204665, "loss": 0.1227, "lr": 9.25782967704109e-07, "epoch": 0.23147582635037744, "percentage": 4.63, "elapsed_time": "0:10:59", "remaining_time": "3:46:34", "throughput": 9685.23, "total_tokens": 6391552} +{"current_steps": 9480, "total_steps": 204665, "loss": 0.0858, "lr": 9.262715590951287e-07, "epoch": 0.2315979771822246, "percentage": 4.63, "elapsed_time": "0:11:00", "remaining_time": "3:46:34", "throughput": 9685.56, "total_tokens": 6395200} +{"current_steps": 9485, "total_steps": 204665, "loss": 0.0834, "lr": 9.267601504861485e-07, "epoch": 0.23172012801407177, "percentage": 4.63, "elapsed_time": "0:11:00", "remaining_time": "3:46:34", "throughput": 9686.06, "total_tokens": 6398976} +{"current_steps": 9490, "total_steps": 204665, "loss": 0.1258, "lr": 9.27248741877168e-07, "epoch": 0.23184227884591893, "percentage": 4.64, "elapsed_time": "0:11:00", "remaining_time": "3:46:34", "throughput": 9686.03, "total_tokens": 6402304} +{"current_steps": 9495, "total_steps": 204665, "loss": 0.1547, "lr": 9.277373332681878e-07, "epoch": 0.2319644296777661, "percentage": 4.64, "elapsed_time": "0:11:01", "remaining_time": "3:46:33", "throughput": 9686.03, "total_tokens": 6405696} +{"current_steps": 9500, "total_steps": 204665, "loss": 0.269, "lr": 9.282259246592075e-07, "epoch": 0.23208658050961328, "percentage": 4.64, "elapsed_time": "0:11:01", "remaining_time": "3:46:33", "throughput": 9686.29, "total_tokens": 6409280} +{"current_steps": 9505, "total_steps": 204665, "loss": 0.1537, "lr": 9.287145160502271e-07, "epoch": 0.23220873134146044, "percentage": 4.64, "elapsed_time": "0:11:02", "remaining_time": "3:46:33", "throughput": 9687.26, "total_tokens": 6413504} +{"current_steps": 9510, "total_steps": 204665, "loss": 0.0875, "lr": 9.292031074412469e-07, "epoch": 0.2323308821733076, "percentage": 4.65, "elapsed_time": "0:11:02", "remaining_time": "3:46:33", "throughput": 9687.56, "total_tokens": 6417088} +{"current_steps": 9515, "total_steps": 204665, "loss": 0.123, "lr": 9.296916988322665e-07, "epoch": 0.23245303300515477, "percentage": 4.65, "elapsed_time": "0:11:02", "remaining_time": "3:46:32", "throughput": 9687.43, "total_tokens": 6420352} +{"current_steps": 9520, "total_steps": 204665, "loss": 0.1393, "lr": 9.301802902232862e-07, "epoch": 0.23257518383700193, "percentage": 4.65, "elapsed_time": "0:11:03", "remaining_time": "3:46:32", "throughput": 9687.98, "total_tokens": 6424192} +{"current_steps": 9525, "total_steps": 204665, "loss": 0.1615, "lr": 9.306688816143059e-07, "epoch": 0.2326973346688491, "percentage": 4.65, "elapsed_time": "0:11:03", "remaining_time": "3:46:32", "throughput": 9687.94, "total_tokens": 6427520} +{"current_steps": 9530, "total_steps": 204665, "loss": 0.0897, "lr": 9.311574730053257e-07, "epoch": 0.23281948550069625, "percentage": 4.66, "elapsed_time": "0:11:03", "remaining_time": "3:46:31", "throughput": 9688.24, "total_tokens": 6431104} +{"current_steps": 9535, "total_steps": 204665, "loss": 0.1335, "lr": 9.316460643963453e-07, "epoch": 0.23294163633254342, "percentage": 4.66, "elapsed_time": "0:11:04", "remaining_time": "3:46:31", "throughput": 9688.51, "total_tokens": 6434688} +{"current_steps": 9540, "total_steps": 204665, "loss": 0.1245, "lr": 9.32134655787365e-07, "epoch": 0.23306378716439058, "percentage": 4.66, "elapsed_time": "0:11:04", "remaining_time": "3:46:31", "throughput": 9688.53, "total_tokens": 6438080} +{"current_steps": 9545, "total_steps": 204665, "loss": 0.0828, "lr": 9.326232471783847e-07, "epoch": 0.23318593799623774, "percentage": 4.66, "elapsed_time": "0:11:04", "remaining_time": "3:46:30", "throughput": 9688.48, "total_tokens": 6441408} +{"current_steps": 9550, "total_steps": 204665, "loss": 0.1374, "lr": 9.331118385694043e-07, "epoch": 0.23330808882808493, "percentage": 4.67, "elapsed_time": "0:11:05", "remaining_time": "3:46:30", "throughput": 9688.74, "total_tokens": 6444992} +{"current_steps": 9555, "total_steps": 204665, "loss": 0.1874, "lr": 9.336004299604241e-07, "epoch": 0.2334302396599321, "percentage": 4.67, "elapsed_time": "0:11:05", "remaining_time": "3:46:30", "throughput": 9689.19, "total_tokens": 6448768} +{"current_steps": 9560, "total_steps": 204665, "loss": 0.125, "lr": 9.340890213514438e-07, "epoch": 0.23355239049177925, "percentage": 4.67, "elapsed_time": "0:11:05", "remaining_time": "3:46:30", "throughput": 9688.85, "total_tokens": 6451840} +{"current_steps": 9565, "total_steps": 204665, "loss": 0.1218, "lr": 9.345776127424634e-07, "epoch": 0.23367454132362642, "percentage": 4.67, "elapsed_time": "0:11:06", "remaining_time": "3:46:29", "throughput": 9688.92, "total_tokens": 6455296} +{"current_steps": 9570, "total_steps": 204665, "loss": 0.1065, "lr": 9.350662041334831e-07, "epoch": 0.23379669215547358, "percentage": 4.68, "elapsed_time": "0:11:06", "remaining_time": "3:46:29", "throughput": 9688.73, "total_tokens": 6458496} +{"current_steps": 9575, "total_steps": 204665, "loss": 0.1746, "lr": 9.355547955245028e-07, "epoch": 0.23391884298732074, "percentage": 4.68, "elapsed_time": "0:11:06", "remaining_time": "3:46:29", "throughput": 9688.56, "total_tokens": 6461760} +{"current_steps": 9580, "total_steps": 204665, "loss": 0.0387, "lr": 9.360433869155225e-07, "epoch": 0.2340409938191679, "percentage": 4.68, "elapsed_time": "0:11:07", "remaining_time": "3:46:28", "throughput": 9688.95, "total_tokens": 6465472} +{"current_steps": 9585, "total_steps": 204665, "loss": 0.1766, "lr": 9.365319783065422e-07, "epoch": 0.23416314465101506, "percentage": 4.68, "elapsed_time": "0:11:07", "remaining_time": "3:46:28", "throughput": 9688.58, "total_tokens": 6468544} +{"current_steps": 9590, "total_steps": 204665, "loss": 0.0384, "lr": 9.37020569697562e-07, "epoch": 0.23428529548286223, "percentage": 4.69, "elapsed_time": "0:11:07", "remaining_time": "3:46:28", "throughput": 9688.86, "total_tokens": 6472128} +{"current_steps": 9595, "total_steps": 204665, "loss": 0.307, "lr": 9.375091610885816e-07, "epoch": 0.23440744631470942, "percentage": 4.69, "elapsed_time": "0:11:08", "remaining_time": "3:46:27", "throughput": 9688.78, "total_tokens": 6475392} +{"current_steps": 9600, "total_steps": 204665, "loss": 0.159, "lr": 9.379977524796012e-07, "epoch": 0.23452959714655658, "percentage": 4.69, "elapsed_time": "0:11:08", "remaining_time": "3:46:27", "throughput": 9688.74, "total_tokens": 6478720} +{"current_steps": 9605, "total_steps": 204665, "loss": 0.2908, "lr": 9.38486343870621e-07, "epoch": 0.23465174797840374, "percentage": 4.69, "elapsed_time": "0:11:09", "remaining_time": "3:46:27", "throughput": 9689.45, "total_tokens": 6482688} +{"current_steps": 9610, "total_steps": 204665, "loss": 0.1376, "lr": 9.389749352616406e-07, "epoch": 0.2347738988102509, "percentage": 4.7, "elapsed_time": "0:11:09", "remaining_time": "3:46:26", "throughput": 9689.41, "total_tokens": 6486016} +{"current_steps": 9615, "total_steps": 204665, "loss": 0.0965, "lr": 9.394635266526604e-07, "epoch": 0.23489604964209806, "percentage": 4.7, "elapsed_time": "0:11:09", "remaining_time": "3:46:26", "throughput": 9689.29, "total_tokens": 6489280} +{"current_steps": 9620, "total_steps": 204665, "loss": 0.0924, "lr": 9.399521180436801e-07, "epoch": 0.23501820047394523, "percentage": 4.7, "elapsed_time": "0:11:10", "remaining_time": "3:46:25", "throughput": 9688.98, "total_tokens": 6492416} +{"current_steps": 9625, "total_steps": 204665, "loss": 0.1259, "lr": 9.404407094346996e-07, "epoch": 0.2351403513057924, "percentage": 4.7, "elapsed_time": "0:11:10", "remaining_time": "3:46:25", "throughput": 9688.9, "total_tokens": 6495744} +{"current_steps": 9630, "total_steps": 204665, "loss": 0.1663, "lr": 9.409293008257194e-07, "epoch": 0.23526250213763955, "percentage": 4.71, "elapsed_time": "0:11:10", "remaining_time": "3:46:25", "throughput": 9688.91, "total_tokens": 6499136} +{"current_steps": 9635, "total_steps": 204665, "loss": 0.0544, "lr": 9.414178922167391e-07, "epoch": 0.2353846529694867, "percentage": 4.71, "elapsed_time": "0:11:11", "remaining_time": "3:46:24", "throughput": 9688.96, "total_tokens": 6502528} +{"current_steps": 9640, "total_steps": 204665, "loss": 0.2107, "lr": 9.419064836077588e-07, "epoch": 0.23550680380133387, "percentage": 4.71, "elapsed_time": "0:11:11", "remaining_time": "3:46:24", "throughput": 9689.2, "total_tokens": 6506112} +{"current_steps": 9645, "total_steps": 204665, "loss": 0.0687, "lr": 9.423950749987785e-07, "epoch": 0.23562895463318106, "percentage": 4.71, "elapsed_time": "0:11:11", "remaining_time": "3:46:24", "throughput": 9689.4, "total_tokens": 6509696} +{"current_steps": 9650, "total_steps": 204665, "loss": 0.0455, "lr": 9.428836663897983e-07, "epoch": 0.23575110546502823, "percentage": 4.72, "elapsed_time": "0:11:12", "remaining_time": "3:46:24", "throughput": 9689.79, "total_tokens": 6513408} +{"current_steps": 9655, "total_steps": 204665, "loss": 0.0809, "lr": 9.433722577808178e-07, "epoch": 0.2358732562968754, "percentage": 4.72, "elapsed_time": "0:11:12", "remaining_time": "3:46:23", "throughput": 9689.84, "total_tokens": 6516864} +{"current_steps": 9660, "total_steps": 204665, "loss": 0.2114, "lr": 9.438608491718375e-07, "epoch": 0.23599540712872255, "percentage": 4.72, "elapsed_time": "0:11:12", "remaining_time": "3:46:23", "throughput": 9689.41, "total_tokens": 6519872} +{"current_steps": 9665, "total_steps": 204665, "loss": 0.1159, "lr": 9.443494405628573e-07, "epoch": 0.2361175579605697, "percentage": 4.72, "elapsed_time": "0:11:13", "remaining_time": "3:46:22", "throughput": 9689.12, "total_tokens": 6522944} +{"current_steps": 9670, "total_steps": 204665, "loss": 0.1237, "lr": 9.448380319538769e-07, "epoch": 0.23623970879241687, "percentage": 4.72, "elapsed_time": "0:11:13", "remaining_time": "3:46:22", "throughput": 9688.84, "total_tokens": 6526080} +{"current_steps": 9675, "total_steps": 204665, "loss": 0.0633, "lr": 9.453266233448967e-07, "epoch": 0.23636185962426404, "percentage": 4.73, "elapsed_time": "0:11:13", "remaining_time": "3:46:22", "throughput": 9688.92, "total_tokens": 6529536} +{"current_steps": 9680, "total_steps": 204665, "loss": 0.1407, "lr": 9.458152147359163e-07, "epoch": 0.2364840104561112, "percentage": 4.73, "elapsed_time": "0:11:14", "remaining_time": "3:46:21", "throughput": 9689.1, "total_tokens": 6533056} +{"current_steps": 9685, "total_steps": 204665, "loss": 0.1077, "lr": 9.46303806126936e-07, "epoch": 0.23660616128795836, "percentage": 4.73, "elapsed_time": "0:11:14", "remaining_time": "3:46:21", "throughput": 9689.01, "total_tokens": 6536384} +{"current_steps": 9690, "total_steps": 204665, "loss": 0.0448, "lr": 9.467923975179557e-07, "epoch": 0.23672831211980552, "percentage": 4.73, "elapsed_time": "0:11:14", "remaining_time": "3:46:21", "throughput": 9689.51, "total_tokens": 6540160} +{"current_steps": 9695, "total_steps": 204665, "loss": 0.239, "lr": 9.472809889089754e-07, "epoch": 0.2368504629516527, "percentage": 4.74, "elapsed_time": "0:11:15", "remaining_time": "3:46:21", "throughput": 9690.07, "total_tokens": 6544000} +{"current_steps": 9700, "total_steps": 204665, "loss": 0.2482, "lr": 9.477695802999951e-07, "epoch": 0.23697261378349987, "percentage": 4.74, "elapsed_time": "0:11:15", "remaining_time": "3:46:20", "throughput": 9690.3, "total_tokens": 6547584} +{"current_steps": 9705, "total_steps": 204665, "loss": 0.1092, "lr": 9.482581716910148e-07, "epoch": 0.23709476461534704, "percentage": 4.74, "elapsed_time": "0:11:16", "remaining_time": "3:46:20", "throughput": 9689.96, "total_tokens": 6550656} +{"current_steps": 9710, "total_steps": 204665, "loss": 0.0269, "lr": 9.487467630820345e-07, "epoch": 0.2372169154471942, "percentage": 4.74, "elapsed_time": "0:11:16", "remaining_time": "3:46:19", "throughput": 9689.84, "total_tokens": 6553920} +{"current_steps": 9715, "total_steps": 204665, "loss": 0.2107, "lr": 9.492353544730541e-07, "epoch": 0.23733906627904136, "percentage": 4.75, "elapsed_time": "0:11:16", "remaining_time": "3:46:19", "throughput": 9689.9, "total_tokens": 6557312} +{"current_steps": 9720, "total_steps": 204665, "loss": 0.1803, "lr": 9.497239458640738e-07, "epoch": 0.23746121711088852, "percentage": 4.75, "elapsed_time": "0:11:17", "remaining_time": "3:46:19", "throughput": 9689.71, "total_tokens": 6560512} +{"current_steps": 9725, "total_steps": 204665, "loss": 0.0741, "lr": 9.502125372550936e-07, "epoch": 0.23758336794273568, "percentage": 4.75, "elapsed_time": "0:11:17", "remaining_time": "3:46:19", "throughput": 9690.44, "total_tokens": 6564480} +{"current_steps": 9730, "total_steps": 204665, "loss": 0.1461, "lr": 9.507011286461132e-07, "epoch": 0.23770551877458285, "percentage": 4.75, "elapsed_time": "0:11:17", "remaining_time": "3:46:18", "throughput": 9690.37, "total_tokens": 6567808} +{"current_steps": 9735, "total_steps": 204665, "loss": 0.0947, "lr": 9.511897200371329e-07, "epoch": 0.23782766960643, "percentage": 4.76, "elapsed_time": "0:11:18", "remaining_time": "3:46:18", "throughput": 9690.4, "total_tokens": 6571200} +{"current_steps": 9740, "total_steps": 204665, "loss": 0.0608, "lr": 9.516783114281526e-07, "epoch": 0.23794982043827717, "percentage": 4.76, "elapsed_time": "0:11:18", "remaining_time": "3:46:18", "throughput": 9690.78, "total_tokens": 6574912} +{"current_steps": 9745, "total_steps": 204665, "loss": 0.1259, "lr": 9.521669028191723e-07, "epoch": 0.23807197127012436, "percentage": 4.76, "elapsed_time": "0:11:18", "remaining_time": "3:46:17", "throughput": 9690.95, "total_tokens": 6578432} +{"current_steps": 9750, "total_steps": 204665, "loss": 0.1251, "lr": 9.52655494210192e-07, "epoch": 0.23819412210197152, "percentage": 4.76, "elapsed_time": "0:11:19", "remaining_time": "3:46:17", "throughput": 9690.89, "total_tokens": 6581760} +{"current_steps": 9755, "total_steps": 204665, "loss": 0.0943, "lr": 9.531440856012117e-07, "epoch": 0.23831627293381868, "percentage": 4.77, "elapsed_time": "0:11:19", "remaining_time": "3:46:17", "throughput": 9691.05, "total_tokens": 6585280} +{"current_steps": 9760, "total_steps": 204665, "loss": 0.0738, "lr": 9.536326769922314e-07, "epoch": 0.23843842376566585, "percentage": 4.77, "elapsed_time": "0:11:19", "remaining_time": "3:46:16", "throughput": 9690.77, "total_tokens": 6588416} +{"current_steps": 9765, "total_steps": 204665, "loss": 0.1411, "lr": 9.54121268383251e-07, "epoch": 0.238560574597513, "percentage": 4.77, "elapsed_time": "0:11:20", "remaining_time": "3:46:16", "throughput": 9690.74, "total_tokens": 6591808} +{"current_steps": 9770, "total_steps": 204665, "loss": 0.2692, "lr": 9.546098597742707e-07, "epoch": 0.23868272542936017, "percentage": 4.77, "elapsed_time": "0:11:20", "remaining_time": "3:46:16", "throughput": 9690.9, "total_tokens": 6595328} +{"current_steps": 9775, "total_steps": 204665, "loss": 0.1692, "lr": 9.550984511652904e-07, "epoch": 0.23880487626120733, "percentage": 4.78, "elapsed_time": "0:11:20", "remaining_time": "3:46:15", "throughput": 9690.62, "total_tokens": 6598464} +{"current_steps": 9780, "total_steps": 204665, "loss": 0.1712, "lr": 9.5558704255631e-07, "epoch": 0.2389270270930545, "percentage": 4.78, "elapsed_time": "0:11:21", "remaining_time": "3:46:15", "throughput": 9690.78, "total_tokens": 6601984} +{"current_steps": 9785, "total_steps": 204665, "loss": 0.0982, "lr": 9.560756339473298e-07, "epoch": 0.23904917792490166, "percentage": 4.78, "elapsed_time": "0:11:21", "remaining_time": "3:46:15", "throughput": 9690.59, "total_tokens": 6605184} +{"current_steps": 9790, "total_steps": 204665, "loss": 0.3098, "lr": 9.565642253383494e-07, "epoch": 0.23917132875674885, "percentage": 4.78, "elapsed_time": "0:11:21", "remaining_time": "3:46:14", "throughput": 9690.4, "total_tokens": 6608384} +{"current_steps": 9795, "total_steps": 204665, "loss": 0.0284, "lr": 9.570528167293691e-07, "epoch": 0.239293479588596, "percentage": 4.79, "elapsed_time": "0:11:22", "remaining_time": "3:46:14", "throughput": 9690.4, "total_tokens": 6611776} +{"current_steps": 9800, "total_steps": 204665, "loss": 0.0463, "lr": 9.575414081203888e-07, "epoch": 0.23941563042044317, "percentage": 4.79, "elapsed_time": "0:11:22", "remaining_time": "3:46:13", "throughput": 9690.4, "total_tokens": 6615168} +{"current_steps": 9805, "total_steps": 204665, "loss": 0.2122, "lr": 9.580299995114087e-07, "epoch": 0.23953778125229033, "percentage": 4.79, "elapsed_time": "0:11:22", "remaining_time": "3:46:13", "throughput": 9690.36, "total_tokens": 6618496} +{"current_steps": 9810, "total_steps": 204665, "loss": 0.0728, "lr": 9.585185909024282e-07, "epoch": 0.2396599320841375, "percentage": 4.79, "elapsed_time": "0:11:23", "remaining_time": "3:46:13", "throughput": 9690.4, "total_tokens": 6621888} +{"current_steps": 9815, "total_steps": 204665, "loss": 0.1752, "lr": 9.590071822934478e-07, "epoch": 0.23978208291598466, "percentage": 4.8, "elapsed_time": "0:11:23", "remaining_time": "3:46:12", "throughput": 9690.29, "total_tokens": 6625152} +{"current_steps": 9820, "total_steps": 204665, "loss": 0.0904, "lr": 9.594957736844677e-07, "epoch": 0.23990423374783182, "percentage": 4.8, "elapsed_time": "0:11:24", "remaining_time": "3:46:12", "throughput": 9690.55, "total_tokens": 6628736} +{"current_steps": 9825, "total_steps": 204665, "loss": 0.1229, "lr": 9.599843650754872e-07, "epoch": 0.24002638457967898, "percentage": 4.8, "elapsed_time": "0:11:24", "remaining_time": "3:46:12", "throughput": 9690.61, "total_tokens": 6632192} +{"current_steps": 9830, "total_steps": 204665, "loss": 0.0957, "lr": 9.60472956466507e-07, "epoch": 0.24014853541152614, "percentage": 4.8, "elapsed_time": "0:11:24", "remaining_time": "3:46:11", "throughput": 9690.66, "total_tokens": 6635648} +{"current_steps": 9835, "total_steps": 204665, "loss": 0.178, "lr": 9.609615478575268e-07, "epoch": 0.2402706862433733, "percentage": 4.81, "elapsed_time": "0:11:25", "remaining_time": "3:46:11", "throughput": 9690.76, "total_tokens": 6639104} +{"current_steps": 9840, "total_steps": 204665, "loss": 0.2036, "lr": 9.614501392485463e-07, "epoch": 0.2403928370752205, "percentage": 4.81, "elapsed_time": "0:11:25", "remaining_time": "3:46:11", "throughput": 9691.06, "total_tokens": 6642752} +{"current_steps": 9845, "total_steps": 204665, "loss": 0.0657, "lr": 9.619387306395661e-07, "epoch": 0.24051498790706766, "percentage": 4.81, "elapsed_time": "0:11:25", "remaining_time": "3:46:11", "throughput": 9691.15, "total_tokens": 6646208} +{"current_steps": 9850, "total_steps": 204665, "loss": 0.122, "lr": 9.624273220305858e-07, "epoch": 0.24063713873891482, "percentage": 4.81, "elapsed_time": "0:11:26", "remaining_time": "3:46:10", "throughput": 9691.15, "total_tokens": 6649600} +{"current_steps": 9855, "total_steps": 204665, "loss": 0.1153, "lr": 9.629159134216055e-07, "epoch": 0.24075928957076198, "percentage": 4.82, "elapsed_time": "0:11:26", "remaining_time": "3:46:10", "throughput": 9691.44, "total_tokens": 6653248} +{"current_steps": 9860, "total_steps": 204665, "loss": 0.0749, "lr": 9.634045048126252e-07, "epoch": 0.24088144040260914, "percentage": 4.82, "elapsed_time": "0:11:26", "remaining_time": "3:46:10", "throughput": 9691.31, "total_tokens": 6656512} +{"current_steps": 9865, "total_steps": 204665, "loss": 0.1768, "lr": 9.638930962036449e-07, "epoch": 0.2410035912344563, "percentage": 4.82, "elapsed_time": "0:11:27", "remaining_time": "3:46:10", "throughput": 9691.59, "total_tokens": 6660160} +{"current_steps": 9870, "total_steps": 204665, "loss": 0.185, "lr": 9.643816875946646e-07, "epoch": 0.24112574206630347, "percentage": 4.82, "elapsed_time": "0:11:27", "remaining_time": "3:46:09", "throughput": 9691.18, "total_tokens": 6663168} +{"current_steps": 9875, "total_steps": 204665, "loss": 0.1417, "lr": 9.648702789856842e-07, "epoch": 0.24124789289815063, "percentage": 4.82, "elapsed_time": "0:11:27", "remaining_time": "3:46:09", "throughput": 9691.18, "total_tokens": 6666496} +{"current_steps": 9880, "total_steps": 204665, "loss": 0.1139, "lr": 9.65358870376704e-07, "epoch": 0.2413700437299978, "percentage": 4.83, "elapsed_time": "0:11:28", "remaining_time": "3:46:08", "throughput": 9690.76, "total_tokens": 6669504} +{"current_steps": 9885, "total_steps": 204665, "loss": 0.1812, "lr": 9.658474617677236e-07, "epoch": 0.24149219456184495, "percentage": 4.83, "elapsed_time": "0:11:28", "remaining_time": "3:46:08", "throughput": 9691.06, "total_tokens": 6673152} +{"current_steps": 9890, "total_steps": 204665, "loss": 0.062, "lr": 9.663360531587433e-07, "epoch": 0.24161434539369214, "percentage": 4.83, "elapsed_time": "0:11:28", "remaining_time": "3:46:08", "throughput": 9691.16, "total_tokens": 6676608} +{"current_steps": 9895, "total_steps": 204665, "loss": 0.0548, "lr": 9.66824644549763e-07, "epoch": 0.2417364962255393, "percentage": 4.83, "elapsed_time": "0:11:29", "remaining_time": "3:46:07", "throughput": 9690.63, "total_tokens": 6679488} +{"current_steps": 9900, "total_steps": 204665, "loss": 0.2652, "lr": 9.673132359407826e-07, "epoch": 0.24185864705738647, "percentage": 4.84, "elapsed_time": "0:11:29", "remaining_time": "3:46:06", "throughput": 9690.39, "total_tokens": 6682624} +{"current_steps": 9905, "total_steps": 204665, "loss": 0.0769, "lr": 9.678018273318023e-07, "epoch": 0.24198079788923363, "percentage": 4.84, "elapsed_time": "0:11:29", "remaining_time": "3:46:06", "throughput": 9690.34, "total_tokens": 6685952} +{"current_steps": 9910, "total_steps": 204665, "loss": 0.1698, "lr": 9.68290418722822e-07, "epoch": 0.2421029487210808, "percentage": 4.84, "elapsed_time": "0:11:30", "remaining_time": "3:46:06", "throughput": 9691.01, "total_tokens": 6689920} +{"current_steps": 9915, "total_steps": 204665, "loss": 0.146, "lr": 9.687790101138417e-07, "epoch": 0.24222509955292795, "percentage": 4.84, "elapsed_time": "0:11:30", "remaining_time": "3:46:06", "throughput": 9690.84, "total_tokens": 6693184} +{"current_steps": 9920, "total_steps": 204665, "loss": 0.0271, "lr": 9.692676015048614e-07, "epoch": 0.24234725038477511, "percentage": 4.85, "elapsed_time": "0:11:31", "remaining_time": "3:46:05", "throughput": 9690.57, "total_tokens": 6696320} +{"current_steps": 9925, "total_steps": 204665, "loss": 0.0984, "lr": 9.697561928958813e-07, "epoch": 0.24246940121662228, "percentage": 4.85, "elapsed_time": "0:11:31", "remaining_time": "3:46:05", "throughput": 9689.95, "total_tokens": 6699136} +{"current_steps": 9930, "total_steps": 204665, "loss": 0.113, "lr": 9.702447842869007e-07, "epoch": 0.24259155204846944, "percentage": 4.85, "elapsed_time": "0:11:31", "remaining_time": "3:46:05", "throughput": 9690.91, "total_tokens": 6703360} +{"current_steps": 9935, "total_steps": 204665, "loss": 0.1492, "lr": 9.707333756779204e-07, "epoch": 0.24271370288031663, "percentage": 4.85, "elapsed_time": "0:11:32", "remaining_time": "3:46:04", "throughput": 9690.92, "total_tokens": 6706752} +{"current_steps": 9940, "total_steps": 204665, "loss": 0.1364, "lr": 9.712219670689403e-07, "epoch": 0.2428358537121638, "percentage": 4.86, "elapsed_time": "0:11:32", "remaining_time": "3:46:04", "throughput": 9690.93, "total_tokens": 6710144} +{"current_steps": 9945, "total_steps": 204665, "loss": 0.2191, "lr": 9.717105584599598e-07, "epoch": 0.24295800454401095, "percentage": 4.86, "elapsed_time": "0:11:32", "remaining_time": "3:46:03", "throughput": 9690.43, "total_tokens": 6713088} +{"current_steps": 9950, "total_steps": 204665, "loss": 0.1625, "lr": 9.721991498509797e-07, "epoch": 0.24308015537585811, "percentage": 4.86, "elapsed_time": "0:11:33", "remaining_time": "3:46:03", "throughput": 9690.33, "total_tokens": 6716352} +{"current_steps": 9955, "total_steps": 204665, "loss": 0.1616, "lr": 9.726877412419993e-07, "epoch": 0.24320230620770528, "percentage": 4.86, "elapsed_time": "0:11:33", "remaining_time": "3:46:02", "throughput": 9690.1, "total_tokens": 6719488} +{"current_steps": 9960, "total_steps": 204665, "loss": 0.0587, "lr": 9.73176332633019e-07, "epoch": 0.24332445703955244, "percentage": 4.87, "elapsed_time": "0:11:33", "remaining_time": "3:46:02", "throughput": 9690.1, "total_tokens": 6722880} +{"current_steps": 9965, "total_steps": 204665, "loss": 0.1104, "lr": 9.736649240240387e-07, "epoch": 0.2434466078713996, "percentage": 4.87, "elapsed_time": "0:11:34", "remaining_time": "3:46:02", "throughput": 9690.67, "total_tokens": 6726784} +{"current_steps": 9970, "total_steps": 204665, "loss": 0.076, "lr": 9.741535154150584e-07, "epoch": 0.24356875870324676, "percentage": 4.87, "elapsed_time": "0:11:34", "remaining_time": "3:46:02", "throughput": 9691.13, "total_tokens": 6730560} +{"current_steps": 9975, "total_steps": 204665, "loss": 0.0844, "lr": 9.74642106806078e-07, "epoch": 0.24369090953509392, "percentage": 4.87, "elapsed_time": "0:11:34", "remaining_time": "3:46:02", "throughput": 9691.33, "total_tokens": 6734144} +{"current_steps": 9980, "total_steps": 204665, "loss": 0.0711, "lr": 9.751306981970978e-07, "epoch": 0.2438130603669411, "percentage": 4.88, "elapsed_time": "0:11:35", "remaining_time": "3:46:01", "throughput": 9691.33, "total_tokens": 6737536} +{"current_steps": 9985, "total_steps": 204665, "loss": 0.0827, "lr": 9.756192895881174e-07, "epoch": 0.24393521119878828, "percentage": 4.88, "elapsed_time": "0:11:35", "remaining_time": "3:46:01", "throughput": 9691.26, "total_tokens": 6740864} +{"current_steps": 9990, "total_steps": 204665, "loss": 0.119, "lr": 9.761078809791371e-07, "epoch": 0.24405736203063544, "percentage": 4.88, "elapsed_time": "0:11:35", "remaining_time": "3:46:01", "throughput": 9691.26, "total_tokens": 6744256} +{"current_steps": 9995, "total_steps": 204665, "loss": 0.1792, "lr": 9.765964723701568e-07, "epoch": 0.2441795128624826, "percentage": 4.88, "elapsed_time": "0:11:36", "remaining_time": "3:46:01", "throughput": 9691.55, "total_tokens": 6747904} +{"current_steps": 10000, "total_steps": 204665, "loss": 0.1775, "lr": 9.770850637611765e-07, "epoch": 0.24430166369432976, "percentage": 4.89, "elapsed_time": "0:11:36", "remaining_time": "3:46:00", "throughput": 9691.51, "total_tokens": 6751232} +{"current_steps": 10005, "total_steps": 204665, "loss": 0.2521, "lr": 9.775736551521962e-07, "epoch": 0.24442381452617692, "percentage": 4.89, "elapsed_time": "0:11:36", "remaining_time": "3:46:00", "throughput": 9691.24, "total_tokens": 6754368} +{"current_steps": 10010, "total_steps": 204665, "loss": 0.0733, "lr": 9.780622465432158e-07, "epoch": 0.2445459653580241, "percentage": 4.89, "elapsed_time": "0:11:37", "remaining_time": "3:45:59", "throughput": 9691.11, "total_tokens": 6757632} +{"current_steps": 10015, "total_steps": 204665, "loss": 0.2123, "lr": 9.785508379342355e-07, "epoch": 0.24466811618987125, "percentage": 4.89, "elapsed_time": "0:11:37", "remaining_time": "3:45:59", "throughput": 9691.15, "total_tokens": 6761024} +{"current_steps": 10020, "total_steps": 204665, "loss": 0.1199, "lr": 9.790394293252552e-07, "epoch": 0.2447902670217184, "percentage": 4.9, "elapsed_time": "0:11:37", "remaining_time": "3:45:59", "throughput": 9691.25, "total_tokens": 6764480} +{"current_steps": 10025, "total_steps": 204665, "loss": 0.0975, "lr": 9.795280207162749e-07, "epoch": 0.24491241785356557, "percentage": 4.9, "elapsed_time": "0:11:38", "remaining_time": "3:45:58", "throughput": 9691.06, "total_tokens": 6767680} +{"current_steps": 10030, "total_steps": 204665, "loss": 0.1569, "lr": 9.800166121072946e-07, "epoch": 0.24503456868541273, "percentage": 4.9, "elapsed_time": "0:11:38", "remaining_time": "3:45:58", "throughput": 9691.05, "total_tokens": 6771008} +{"current_steps": 10035, "total_steps": 204665, "loss": 0.1284, "lr": 9.805052034983142e-07, "epoch": 0.24515671951725992, "percentage": 4.9, "elapsed_time": "0:11:39", "remaining_time": "3:45:57", "throughput": 9690.74, "total_tokens": 6774080} +{"current_steps": 10040, "total_steps": 204665, "loss": 0.0622, "lr": 9.80993794889334e-07, "epoch": 0.2452788703491071, "percentage": 4.91, "elapsed_time": "0:11:39", "remaining_time": "3:45:57", "throughput": 9690.74, "total_tokens": 6777472} +{"current_steps": 10045, "total_steps": 204665, "loss": 0.087, "lr": 9.814823862803538e-07, "epoch": 0.24540102118095425, "percentage": 4.91, "elapsed_time": "0:11:39", "remaining_time": "3:45:56", "throughput": 9690.68, "total_tokens": 6780736} +{"current_steps": 10050, "total_steps": 204665, "loss": 0.192, "lr": 9.819709776713733e-07, "epoch": 0.2455231720128014, "percentage": 4.91, "elapsed_time": "0:11:40", "remaining_time": "3:45:56", "throughput": 9690.44, "total_tokens": 6783872} +{"current_steps": 10055, "total_steps": 204665, "loss": 0.1749, "lr": 9.82459569062393e-07, "epoch": 0.24564532284464857, "percentage": 4.91, "elapsed_time": "0:11:40", "remaining_time": "3:45:56", "throughput": 9690.53, "total_tokens": 6787328} +{"current_steps": 10060, "total_steps": 204665, "loss": 0.1122, "lr": 9.829481604534129e-07, "epoch": 0.24576747367649573, "percentage": 4.92, "elapsed_time": "0:11:40", "remaining_time": "3:45:56", "throughput": 9691.19, "total_tokens": 6791296} +{"current_steps": 10065, "total_steps": 204665, "loss": 0.0593, "lr": 9.834367518444323e-07, "epoch": 0.2458896245083429, "percentage": 4.92, "elapsed_time": "0:11:41", "remaining_time": "3:45:55", "throughput": 9691.14, "total_tokens": 6794624} +{"current_steps": 10070, "total_steps": 204665, "loss": 0.092, "lr": 9.839253432354522e-07, "epoch": 0.24601177534019006, "percentage": 4.92, "elapsed_time": "0:11:41", "remaining_time": "3:45:55", "throughput": 9690.59, "total_tokens": 6797504} +{"current_steps": 10075, "total_steps": 204665, "loss": 0.0173, "lr": 9.84413934626472e-07, "epoch": 0.24613392617203722, "percentage": 4.92, "elapsed_time": "0:11:41", "remaining_time": "3:45:54", "throughput": 9690.69, "total_tokens": 6800960} +{"current_steps": 10080, "total_steps": 204665, "loss": 0.0718, "lr": 9.849025260174916e-07, "epoch": 0.24625607700388438, "percentage": 4.93, "elapsed_time": "0:11:42", "remaining_time": "3:45:54", "throughput": 9690.8, "total_tokens": 6804416} +{"current_steps": 10085, "total_steps": 204665, "loss": 0.0776, "lr": 9.853911174085113e-07, "epoch": 0.24637822783573157, "percentage": 4.93, "elapsed_time": "0:11:42", "remaining_time": "3:45:54", "throughput": 9691.17, "total_tokens": 6808128} +{"current_steps": 10090, "total_steps": 204665, "loss": 0.0546, "lr": 9.85879708799531e-07, "epoch": 0.24650037866757873, "percentage": 4.93, "elapsed_time": "0:11:42", "remaining_time": "3:45:53", "throughput": 9691.27, "total_tokens": 6811584} +{"current_steps": 10095, "total_steps": 204665, "loss": 0.1041, "lr": 9.863683001905506e-07, "epoch": 0.2466225294994259, "percentage": 4.93, "elapsed_time": "0:11:43", "remaining_time": "3:45:53", "throughput": 9691.62, "total_tokens": 6815232} +{"current_steps": 10100, "total_steps": 204665, "loss": 0.1665, "lr": 9.868568915815703e-07, "epoch": 0.24674468033127306, "percentage": 4.93, "elapsed_time": "0:11:43", "remaining_time": "3:45:53", "throughput": 9691.83, "total_tokens": 6818816} +{"current_steps": 10105, "total_steps": 204665, "loss": 0.2181, "lr": 9.8734548297259e-07, "epoch": 0.24686683116312022, "percentage": 4.94, "elapsed_time": "0:11:43", "remaining_time": "3:45:52", "throughput": 9691.7, "total_tokens": 6822080} +{"current_steps": 10110, "total_steps": 204665, "loss": 0.1648, "lr": 9.878340743636097e-07, "epoch": 0.24698898199496738, "percentage": 4.94, "elapsed_time": "0:11:44", "remaining_time": "3:45:52", "throughput": 9691.47, "total_tokens": 6825216} +{"current_steps": 10115, "total_steps": 204665, "loss": 0.2491, "lr": 9.883226657546294e-07, "epoch": 0.24711113282681454, "percentage": 4.94, "elapsed_time": "0:11:44", "remaining_time": "3:45:52", "throughput": 9691.48, "total_tokens": 6828608} +{"current_steps": 10120, "total_steps": 204665, "loss": 0.281, "lr": 9.88811257145649e-07, "epoch": 0.2472332836586617, "percentage": 4.94, "elapsed_time": "0:11:44", "remaining_time": "3:45:51", "throughput": 9691.38, "total_tokens": 6831872} +{"current_steps": 10125, "total_steps": 204665, "loss": 0.0527, "lr": 9.892998485366687e-07, "epoch": 0.24735543449050887, "percentage": 4.95, "elapsed_time": "0:11:45", "remaining_time": "3:45:51", "throughput": 9691.61, "total_tokens": 6835456} +{"current_steps": 10130, "total_steps": 204665, "loss": 0.1532, "lr": 9.897884399276884e-07, "epoch": 0.24747758532235606, "percentage": 4.95, "elapsed_time": "0:11:45", "remaining_time": "3:45:51", "throughput": 9692.46, "total_tokens": 6839616} +{"current_steps": 10135, "total_steps": 204665, "loss": 0.1079, "lr": 9.90277031318708e-07, "epoch": 0.24759973615420322, "percentage": 4.95, "elapsed_time": "0:11:46", "remaining_time": "3:45:51", "throughput": 9692.37, "total_tokens": 6842880} +{"current_steps": 10140, "total_steps": 204665, "loss": 0.075, "lr": 9.907656227097278e-07, "epoch": 0.24772188698605038, "percentage": 4.95, "elapsed_time": "0:11:46", "remaining_time": "3:45:50", "throughput": 9692.52, "total_tokens": 6846400} +{"current_steps": 10145, "total_steps": 204665, "loss": 0.1472, "lr": 9.912542141007474e-07, "epoch": 0.24784403781789754, "percentage": 4.96, "elapsed_time": "0:11:46", "remaining_time": "3:45:50", "throughput": 9692.48, "total_tokens": 6849728} +{"current_steps": 10150, "total_steps": 204665, "loss": 0.1916, "lr": 9.917428054917671e-07, "epoch": 0.2479661886497447, "percentage": 4.96, "elapsed_time": "0:11:47", "remaining_time": "3:45:49", "throughput": 9691.91, "total_tokens": 6852544} +{"current_steps": 10155, "total_steps": 204665, "loss": 0.1914, "lr": 9.922313968827868e-07, "epoch": 0.24808833948159187, "percentage": 4.96, "elapsed_time": "0:11:47", "remaining_time": "3:45:49", "throughput": 9691.9, "total_tokens": 6855936} +{"current_steps": 10160, "total_steps": 204665, "loss": 0.1312, "lr": 9.927199882738065e-07, "epoch": 0.24821049031343903, "percentage": 4.96, "elapsed_time": "0:11:47", "remaining_time": "3:45:49", "throughput": 9692.18, "total_tokens": 6859584} +{"current_steps": 10165, "total_steps": 204665, "loss": 0.2342, "lr": 9.932085796648264e-07, "epoch": 0.2483326411452862, "percentage": 4.97, "elapsed_time": "0:11:48", "remaining_time": "3:45:48", "throughput": 9691.9, "total_tokens": 6862720} +{"current_steps": 10170, "total_steps": 204665, "loss": 0.0421, "lr": 9.936971710558459e-07, "epoch": 0.24845479197713335, "percentage": 4.97, "elapsed_time": "0:11:48", "remaining_time": "3:45:48", "throughput": 9692.2, "total_tokens": 6866368} +{"current_steps": 10175, "total_steps": 204665, "loss": 0.0989, "lr": 9.941857624468657e-07, "epoch": 0.24857694280898052, "percentage": 4.97, "elapsed_time": "0:11:48", "remaining_time": "3:45:48", "throughput": 9692.01, "total_tokens": 6869568} +{"current_steps": 10180, "total_steps": 204665, "loss": 0.0829, "lr": 9.946743538378854e-07, "epoch": 0.2486990936408277, "percentage": 4.97, "elapsed_time": "0:11:49", "remaining_time": "3:45:47", "throughput": 9691.91, "total_tokens": 6872832} +{"current_steps": 10185, "total_steps": 204665, "loss": 0.1695, "lr": 9.95162945228905e-07, "epoch": 0.24882124447267487, "percentage": 4.98, "elapsed_time": "0:11:49", "remaining_time": "3:45:47", "throughput": 9692.41, "total_tokens": 6876672} +{"current_steps": 10190, "total_steps": 204665, "loss": 0.1031, "lr": 9.956515366199248e-07, "epoch": 0.24894339530452203, "percentage": 4.98, "elapsed_time": "0:11:49", "remaining_time": "3:45:47", "throughput": 9692.36, "total_tokens": 6880000} +{"current_steps": 10195, "total_steps": 204665, "loss": 0.0678, "lr": 9.961401280109445e-07, "epoch": 0.2490655461363692, "percentage": 4.98, "elapsed_time": "0:11:50", "remaining_time": "3:45:46", "throughput": 9692.5, "total_tokens": 6883520} +{"current_steps": 10200, "total_steps": 204665, "loss": 0.107, "lr": 9.966287194019642e-07, "epoch": 0.24918769696821635, "percentage": 4.98, "elapsed_time": "0:11:50", "remaining_time": "3:45:46", "throughput": 9692.77, "total_tokens": 6887168} +{"current_steps": 10205, "total_steps": 204665, "loss": 0.0546, "lr": 9.971173107929838e-07, "epoch": 0.24930984780006352, "percentage": 4.99, "elapsed_time": "0:11:50", "remaining_time": "3:45:46", "throughput": 9693.22, "total_tokens": 6890944} +{"current_steps": 10210, "total_steps": 204665, "loss": 0.1747, "lr": 9.976059021840035e-07, "epoch": 0.24943199863191068, "percentage": 4.99, "elapsed_time": "0:11:51", "remaining_time": "3:45:46", "throughput": 9692.91, "total_tokens": 6894080} +{"current_steps": 10215, "total_steps": 204665, "loss": 0.1655, "lr": 9.980944935750232e-07, "epoch": 0.24955414946375784, "percentage": 4.99, "elapsed_time": "0:11:51", "remaining_time": "3:45:45", "throughput": 9692.87, "total_tokens": 6897408} +{"current_steps": 10220, "total_steps": 204665, "loss": 0.1251, "lr": 9.985830849660429e-07, "epoch": 0.249676300295605, "percentage": 4.99, "elapsed_time": "0:11:51", "remaining_time": "3:45:45", "throughput": 9692.67, "total_tokens": 6900608} +{"current_steps": 10225, "total_steps": 204665, "loss": 0.1599, "lr": 9.990716763570626e-07, "epoch": 0.24979845112745216, "percentage": 5.0, "elapsed_time": "0:11:52", "remaining_time": "3:45:45", "throughput": 9692.87, "total_tokens": 6904192} +{"current_steps": 10230, "total_steps": 204665, "loss": 0.1388, "lr": 9.995602677480822e-07, "epoch": 0.24992060195929935, "percentage": 5.0, "elapsed_time": "0:11:52", "remaining_time": "3:45:44", "throughput": 9693.43, "total_tokens": 6908032} +{"current_steps": 10234, "total_steps": 204665, "eval_loss": 0.150357186794281, "epoch": 0.2500183226247771, "percentage": 5.0, "elapsed_time": "0:12:40", "remaining_time": "4:00:51", "throughput": 9084.98, "total_tokens": 6910656} +{"current_steps": 10235, "total_steps": 204665, "loss": 0.2217, "lr": 1.000048859139102e-06, "epoch": 0.2500427527911465, "percentage": 5.0, "elapsed_time": "0:13:16", "remaining_time": "4:12:12", "throughput": 8676.3, "total_tokens": 6911360} +{"current_steps": 10240, "total_steps": 204665, "loss": 0.1544, "lr": 1.0005374505301216e-06, "epoch": 0.25016490362299365, "percentage": 5.0, "elapsed_time": "0:13:16", "remaining_time": "4:12:11", "throughput": 8676.57, "total_tokens": 6914624} +{"current_steps": 10245, "total_steps": 204665, "loss": 0.1194, "lr": 1.0010260419211413e-06, "epoch": 0.25028705445484084, "percentage": 5.01, "elapsed_time": "0:13:17", "remaining_time": "4:12:10", "throughput": 8677.13, "total_tokens": 6918144} +{"current_steps": 10250, "total_steps": 204665, "loss": 0.0953, "lr": 1.001514633312161e-06, "epoch": 0.250409205286688, "percentage": 5.01, "elapsed_time": "0:13:17", "remaining_time": "4:12:08", "throughput": 8677.44, "total_tokens": 6921408} +{"current_steps": 10255, "total_steps": 204665, "loss": 0.0691, "lr": 1.0020032247031806e-06, "epoch": 0.25053135611853516, "percentage": 5.01, "elapsed_time": "0:13:17", "remaining_time": "4:12:08", "throughput": 8678.33, "total_tokens": 6925248} +{"current_steps": 10260, "total_steps": 204665, "loss": 0.1579, "lr": 1.0024918160942003e-06, "epoch": 0.25065350695038235, "percentage": 5.01, "elapsed_time": "0:13:18", "remaining_time": "4:12:06", "throughput": 8678.63, "total_tokens": 6928512} +{"current_steps": 10265, "total_steps": 204665, "loss": 0.1694, "lr": 1.0029804074852202e-06, "epoch": 0.2507756577822295, "percentage": 5.02, "elapsed_time": "0:13:18", "remaining_time": "4:12:05", "throughput": 8679.06, "total_tokens": 6931904} +{"current_steps": 10270, "total_steps": 204665, "loss": 0.1749, "lr": 1.0034689988762397e-06, "epoch": 0.2508978086140767, "percentage": 5.02, "elapsed_time": "0:13:19", "remaining_time": "4:12:04", "throughput": 8679.77, "total_tokens": 6935552} +{"current_steps": 10275, "total_steps": 204665, "loss": 0.1575, "lr": 1.0039575902672594e-06, "epoch": 0.2510199594459238, "percentage": 5.02, "elapsed_time": "0:13:19", "remaining_time": "4:12:03", "throughput": 8680.47, "total_tokens": 6939200} +{"current_steps": 10280, "total_steps": 204665, "loss": 0.1271, "lr": 1.0044461816582793e-06, "epoch": 0.251142110277771, "percentage": 5.02, "elapsed_time": "0:13:19", "remaining_time": "4:12:02", "throughput": 8680.69, "total_tokens": 6942400} +{"current_steps": 10285, "total_steps": 204665, "loss": 0.1603, "lr": 1.0049347730492987e-06, "epoch": 0.25126426110961814, "percentage": 5.03, "elapsed_time": "0:13:20", "remaining_time": "4:12:01", "throughput": 8680.7, "total_tokens": 6945344} +{"current_steps": 10290, "total_steps": 204665, "loss": 0.1221, "lr": 1.0054233644403184e-06, "epoch": 0.2513864119414653, "percentage": 5.03, "elapsed_time": "0:13:20", "remaining_time": "4:12:00", "throughput": 8681.07, "total_tokens": 6948672} +{"current_steps": 10295, "total_steps": 204665, "loss": 0.1968, "lr": 1.0059119558313383e-06, "epoch": 0.25150856277331246, "percentage": 5.03, "elapsed_time": "0:13:20", "remaining_time": "4:11:58", "throughput": 8681.66, "total_tokens": 6952192} +{"current_steps": 10300, "total_steps": 204665, "loss": 0.0807, "lr": 1.0064005472223578e-06, "epoch": 0.25163071360515965, "percentage": 5.03, "elapsed_time": "0:13:21", "remaining_time": "4:11:57", "throughput": 8681.84, "total_tokens": 6955328} +{"current_steps": 10305, "total_steps": 204665, "loss": 0.166, "lr": 1.0068891386133775e-06, "epoch": 0.25175286443700684, "percentage": 5.04, "elapsed_time": "0:13:21", "remaining_time": "4:11:56", "throughput": 8681.91, "total_tokens": 6958336} +{"current_steps": 10310, "total_steps": 204665, "loss": 0.1161, "lr": 1.0073777300043974e-06, "epoch": 0.251875015268854, "percentage": 5.04, "elapsed_time": "0:13:21", "remaining_time": "4:11:55", "throughput": 8682.56, "total_tokens": 6961920} +{"current_steps": 10315, "total_steps": 204665, "loss": 0.1002, "lr": 1.007866321395417e-06, "epoch": 0.25199716610070116, "percentage": 5.04, "elapsed_time": "0:13:22", "remaining_time": "4:11:54", "throughput": 8682.85, "total_tokens": 6965184} +{"current_steps": 10320, "total_steps": 204665, "loss": 0.1075, "lr": 1.0083549127864365e-06, "epoch": 0.2521193169325483, "percentage": 5.04, "elapsed_time": "0:13:22", "remaining_time": "4:11:53", "throughput": 8683.61, "total_tokens": 6968896} +{"current_steps": 10325, "total_steps": 204665, "loss": 0.1228, "lr": 1.0088435041774564e-06, "epoch": 0.2522414677643955, "percentage": 5.04, "elapsed_time": "0:13:22", "remaining_time": "4:11:52", "throughput": 8684.11, "total_tokens": 6972352} +{"current_steps": 10330, "total_steps": 204665, "loss": 0.0779, "lr": 1.009332095568476e-06, "epoch": 0.2523636185962426, "percentage": 5.05, "elapsed_time": "0:13:23", "remaining_time": "4:11:51", "throughput": 8684.47, "total_tokens": 6975680} +{"current_steps": 10335, "total_steps": 204665, "loss": 0.1664, "lr": 1.0098206869594955e-06, "epoch": 0.2524857694280898, "percentage": 5.05, "elapsed_time": "0:13:23", "remaining_time": "4:11:49", "throughput": 8684.63, "total_tokens": 6978816} +{"current_steps": 10340, "total_steps": 204665, "loss": 0.1542, "lr": 1.0103092783505154e-06, "epoch": 0.25260792025993695, "percentage": 5.05, "elapsed_time": "0:13:23", "remaining_time": "4:11:48", "throughput": 8685.31, "total_tokens": 6982464} +{"current_steps": 10345, "total_steps": 204665, "loss": 0.1632, "lr": 1.0107978697415351e-06, "epoch": 0.25273007109178414, "percentage": 5.05, "elapsed_time": "0:13:24", "remaining_time": "4:11:47", "throughput": 8685.58, "total_tokens": 6985664} +{"current_steps": 10350, "total_steps": 204665, "loss": 0.0819, "lr": 1.0112864611325548e-06, "epoch": 0.2528522219236313, "percentage": 5.06, "elapsed_time": "0:13:24", "remaining_time": "4:11:46", "throughput": 8685.55, "total_tokens": 6988608} +{"current_steps": 10355, "total_steps": 204665, "loss": 0.0703, "lr": 1.0117750525235745e-06, "epoch": 0.25297437275547846, "percentage": 5.06, "elapsed_time": "0:13:24", "remaining_time": "4:11:45", "throughput": 8686.07, "total_tokens": 6992064} +{"current_steps": 10360, "total_steps": 204665, "loss": 0.1059, "lr": 1.0122636439145942e-06, "epoch": 0.25309652358732565, "percentage": 5.06, "elapsed_time": "0:13:25", "remaining_time": "4:11:44", "throughput": 8686.49, "total_tokens": 6995456} +{"current_steps": 10365, "total_steps": 204665, "loss": 0.0914, "lr": 1.0127522353056138e-06, "epoch": 0.2532186744191728, "percentage": 5.06, "elapsed_time": "0:13:25", "remaining_time": "4:11:43", "throughput": 8687.6, "total_tokens": 6999488} +{"current_steps": 10370, "total_steps": 204665, "loss": 0.1598, "lr": 1.0132408266966335e-06, "epoch": 0.25334082525102, "percentage": 5.07, "elapsed_time": "0:13:26", "remaining_time": "4:11:41", "throughput": 8687.67, "total_tokens": 7002496} +{"current_steps": 10375, "total_steps": 204665, "loss": 0.2147, "lr": 1.0137294180876532e-06, "epoch": 0.2534629760828671, "percentage": 5.07, "elapsed_time": "0:13:26", "remaining_time": "4:11:40", "throughput": 8687.93, "total_tokens": 7005696} +{"current_steps": 10380, "total_steps": 204665, "loss": 0.2037, "lr": 1.0142180094786729e-06, "epoch": 0.2535851269147143, "percentage": 5.07, "elapsed_time": "0:13:26", "remaining_time": "4:11:39", "throughput": 8688.48, "total_tokens": 7009216} +{"current_steps": 10385, "total_steps": 204665, "loss": 0.0145, "lr": 1.0147066008696928e-06, "epoch": 0.25370727774656143, "percentage": 5.07, "elapsed_time": "0:13:27", "remaining_time": "4:11:38", "throughput": 8688.8, "total_tokens": 7012544} +{"current_steps": 10390, "total_steps": 204665, "loss": 0.1001, "lr": 1.0151951922607123e-06, "epoch": 0.2538294285784086, "percentage": 5.08, "elapsed_time": "0:13:27", "remaining_time": "4:11:37", "throughput": 8689.2, "total_tokens": 7015936} +{"current_steps": 10395, "total_steps": 204665, "loss": 0.104, "lr": 1.015683783651732e-06, "epoch": 0.25395157941025576, "percentage": 5.08, "elapsed_time": "0:13:27", "remaining_time": "4:11:36", "throughput": 8689.61, "total_tokens": 7019328} +{"current_steps": 10400, "total_steps": 204665, "loss": 0.1893, "lr": 1.0161723750427518e-06, "epoch": 0.25407373024210295, "percentage": 5.08, "elapsed_time": "0:13:28", "remaining_time": "4:11:35", "throughput": 8690.11, "total_tokens": 7022784} +{"current_steps": 10405, "total_steps": 204665, "loss": 0.0968, "lr": 1.0166609664337713e-06, "epoch": 0.25419588107395014, "percentage": 5.08, "elapsed_time": "0:13:28", "remaining_time": "4:11:34", "throughput": 8690.2, "total_tokens": 7025856} +{"current_steps": 10410, "total_steps": 204665, "loss": 0.1591, "lr": 1.017149557824791e-06, "epoch": 0.25431803190579727, "percentage": 5.09, "elapsed_time": "0:13:28", "remaining_time": "4:11:33", "throughput": 8690.76, "total_tokens": 7029376} +{"current_steps": 10415, "total_steps": 204665, "loss": 0.1463, "lr": 1.0176381492158109e-06, "epoch": 0.25444018273764446, "percentage": 5.09, "elapsed_time": "0:13:29", "remaining_time": "4:11:32", "throughput": 8691.25, "total_tokens": 7032832} +{"current_steps": 10420, "total_steps": 204665, "loss": 0.0194, "lr": 1.0181267406068306e-06, "epoch": 0.2545623335694916, "percentage": 5.09, "elapsed_time": "0:13:29", "remaining_time": "4:11:30", "throughput": 8691.56, "total_tokens": 7036096} +{"current_steps": 10425, "total_steps": 204665, "loss": 0.1483, "lr": 1.01861533199785e-06, "epoch": 0.2546844844013388, "percentage": 5.09, "elapsed_time": "0:13:29", "remaining_time": "4:11:29", "throughput": 8691.89, "total_tokens": 7039360} +{"current_steps": 10430, "total_steps": 204665, "loss": 0.1019, "lr": 1.01910392338887e-06, "epoch": 0.2548066352331859, "percentage": 5.1, "elapsed_time": "0:13:30", "remaining_time": "4:11:28", "throughput": 8692.09, "total_tokens": 7042496} +{"current_steps": 10435, "total_steps": 204665, "loss": 0.3398, "lr": 1.0195925147798896e-06, "epoch": 0.2549287860650331, "percentage": 5.1, "elapsed_time": "0:13:30", "remaining_time": "4:11:27", "throughput": 8692.28, "total_tokens": 7045632} +{"current_steps": 10440, "total_steps": 204665, "loss": 0.1183, "lr": 1.020081106170909e-06, "epoch": 0.25505093689688024, "percentage": 5.1, "elapsed_time": "0:13:30", "remaining_time": "4:11:26", "throughput": 8692.71, "total_tokens": 7049024} +{"current_steps": 10445, "total_steps": 204665, "loss": 0.0926, "lr": 1.020569697561929e-06, "epoch": 0.25517308772872743, "percentage": 5.1, "elapsed_time": "0:13:31", "remaining_time": "4:11:25", "throughput": 8693.59, "total_tokens": 7053056} +{"current_steps": 10450, "total_steps": 204665, "loss": 0.1251, "lr": 1.0210582889529486e-06, "epoch": 0.2552952385605746, "percentage": 5.11, "elapsed_time": "0:13:31", "remaining_time": "4:11:24", "throughput": 8693.75, "total_tokens": 7056128} +{"current_steps": 10455, "total_steps": 204665, "loss": 0.0557, "lr": 1.0215468803439681e-06, "epoch": 0.25541738939242176, "percentage": 5.11, "elapsed_time": "0:13:31", "remaining_time": "4:11:23", "throughput": 8694.25, "total_tokens": 7059584} +{"current_steps": 10460, "total_steps": 204665, "loss": 0.0973, "lr": 1.022035471734988e-06, "epoch": 0.25553954022426895, "percentage": 5.11, "elapsed_time": "0:13:32", "remaining_time": "4:11:22", "throughput": 8694.81, "total_tokens": 7063104} +{"current_steps": 10465, "total_steps": 204665, "loss": 0.1217, "lr": 1.0225240631260077e-06, "epoch": 0.2556616910561161, "percentage": 5.11, "elapsed_time": "0:13:32", "remaining_time": "4:11:21", "throughput": 8695.11, "total_tokens": 7066368} +{"current_steps": 10470, "total_steps": 204665, "loss": 0.1888, "lr": 1.0230126545170274e-06, "epoch": 0.25578384188796327, "percentage": 5.12, "elapsed_time": "0:13:33", "remaining_time": "4:11:19", "throughput": 8695.29, "total_tokens": 7069504} +{"current_steps": 10475, "total_steps": 204665, "loss": 0.1105, "lr": 1.023501245908047e-06, "epoch": 0.2559059927198104, "percentage": 5.12, "elapsed_time": "0:13:33", "remaining_time": "4:11:18", "throughput": 8695.92, "total_tokens": 7073152} +{"current_steps": 10480, "total_steps": 204665, "loss": 0.1404, "lr": 1.0239898372990667e-06, "epoch": 0.2560281435516576, "percentage": 5.12, "elapsed_time": "0:13:33", "remaining_time": "4:11:17", "throughput": 8696.4, "total_tokens": 7076608} +{"current_steps": 10485, "total_steps": 204665, "loss": 0.1086, "lr": 1.0244784286900864e-06, "epoch": 0.25615029438350473, "percentage": 5.12, "elapsed_time": "0:13:34", "remaining_time": "4:11:16", "throughput": 8696.82, "total_tokens": 7080000} +{"current_steps": 10490, "total_steps": 204665, "loss": 0.1311, "lr": 1.024967020081106e-06, "epoch": 0.2562724452153519, "percentage": 5.13, "elapsed_time": "0:13:34", "remaining_time": "4:11:15", "throughput": 8697.13, "total_tokens": 7083264} +{"current_steps": 10495, "total_steps": 204665, "loss": 0.1045, "lr": 1.0254556114721258e-06, "epoch": 0.2563945960471991, "percentage": 5.13, "elapsed_time": "0:13:34", "remaining_time": "4:11:14", "throughput": 8697.4, "total_tokens": 7086528} +{"current_steps": 10500, "total_steps": 204665, "loss": 0.2151, "lr": 1.0259442028631455e-06, "epoch": 0.25651674687904624, "percentage": 5.13, "elapsed_time": "0:13:35", "remaining_time": "4:11:13", "throughput": 8697.51, "total_tokens": 7089600} +{"current_steps": 10505, "total_steps": 204665, "loss": 0.19, "lr": 1.0264327942541653e-06, "epoch": 0.25663889771089343, "percentage": 5.13, "elapsed_time": "0:13:35", "remaining_time": "4:11:12", "throughput": 8697.62, "total_tokens": 7092672} +{"current_steps": 10510, "total_steps": 204665, "loss": 0.1237, "lr": 1.0269213856451848e-06, "epoch": 0.25676104854274057, "percentage": 5.14, "elapsed_time": "0:13:35", "remaining_time": "4:11:10", "throughput": 8698.12, "total_tokens": 7096128} +{"current_steps": 10515, "total_steps": 204665, "loss": 0.0726, "lr": 1.0274099770362045e-06, "epoch": 0.25688319937458776, "percentage": 5.14, "elapsed_time": "0:13:36", "remaining_time": "4:11:10", "throughput": 8698.59, "total_tokens": 7099712} +{"current_steps": 10520, "total_steps": 204665, "loss": 0.0353, "lr": 1.0278985684272244e-06, "epoch": 0.2570053502064349, "percentage": 5.14, "elapsed_time": "0:13:36", "remaining_time": "4:11:09", "throughput": 8698.94, "total_tokens": 7103040} +{"current_steps": 10525, "total_steps": 204665, "loss": 0.14, "lr": 1.0283871598182439e-06, "epoch": 0.2571275010382821, "percentage": 5.14, "elapsed_time": "0:13:36", "remaining_time": "4:11:07", "throughput": 8699.27, "total_tokens": 7106304} +{"current_steps": 10530, "total_steps": 204665, "loss": 0.1152, "lr": 1.0288757512092635e-06, "epoch": 0.2572496518701292, "percentage": 5.14, "elapsed_time": "0:13:37", "remaining_time": "4:11:06", "throughput": 8699.84, "total_tokens": 7109824} +{"current_steps": 10535, "total_steps": 204665, "loss": 0.1283, "lr": 1.0293643426002834e-06, "epoch": 0.2573718027019764, "percentage": 5.15, "elapsed_time": "0:13:37", "remaining_time": "4:11:06", "throughput": 8699.69, "total_tokens": 7112896} +{"current_steps": 10540, "total_steps": 204665, "loss": 0.1707, "lr": 1.0298529339913031e-06, "epoch": 0.25749395353382354, "percentage": 5.15, "elapsed_time": "0:13:37", "remaining_time": "4:11:04", "throughput": 8699.92, "total_tokens": 7116096} +{"current_steps": 10545, "total_steps": 204665, "loss": 0.1114, "lr": 1.0303415253823226e-06, "epoch": 0.25761610436567073, "percentage": 5.15, "elapsed_time": "0:13:38", "remaining_time": "4:11:03", "throughput": 8700.34, "total_tokens": 7119488} +{"current_steps": 10550, "total_steps": 204665, "loss": 0.3013, "lr": 1.0308301167733425e-06, "epoch": 0.2577382551975179, "percentage": 5.15, "elapsed_time": "0:13:38", "remaining_time": "4:11:02", "throughput": 8700.61, "total_tokens": 7122688} +{"current_steps": 10555, "total_steps": 204665, "loss": 0.1704, "lr": 1.0313187081643622e-06, "epoch": 0.25786040602936505, "percentage": 5.16, "elapsed_time": "0:13:38", "remaining_time": "4:11:01", "throughput": 8701.04, "total_tokens": 7126080} +{"current_steps": 10560, "total_steps": 204665, "loss": 0.1579, "lr": 1.0318072995553816e-06, "epoch": 0.25798255686121224, "percentage": 5.16, "elapsed_time": "0:13:39", "remaining_time": "4:11:00", "throughput": 8701.3, "total_tokens": 7129280} +{"current_steps": 10565, "total_steps": 204665, "loss": 0.0861, "lr": 1.0322958909464015e-06, "epoch": 0.2581047076930594, "percentage": 5.16, "elapsed_time": "0:13:39", "remaining_time": "4:10:59", "throughput": 8701.71, "total_tokens": 7132672} +{"current_steps": 10570, "total_steps": 204665, "loss": 0.1046, "lr": 1.0327844823374212e-06, "epoch": 0.25822685852490657, "percentage": 5.16, "elapsed_time": "0:13:40", "remaining_time": "4:10:58", "throughput": 8702.35, "total_tokens": 7136320} +{"current_steps": 10575, "total_steps": 204665, "loss": 0.1541, "lr": 1.0332730737284407e-06, "epoch": 0.2583490093567537, "percentage": 5.17, "elapsed_time": "0:13:40", "remaining_time": "4:10:57", "throughput": 8702.76, "total_tokens": 7139712} +{"current_steps": 10580, "total_steps": 204665, "loss": 0.1296, "lr": 1.0337616651194606e-06, "epoch": 0.2584711601886009, "percentage": 5.17, "elapsed_time": "0:13:40", "remaining_time": "4:10:56", "throughput": 8702.82, "total_tokens": 7142784} +{"current_steps": 10585, "total_steps": 204665, "loss": 0.0574, "lr": 1.0342502565104803e-06, "epoch": 0.258593311020448, "percentage": 5.17, "elapsed_time": "0:13:41", "remaining_time": "4:10:55", "throughput": 8703.11, "total_tokens": 7146112} +{"current_steps": 10590, "total_steps": 204665, "loss": 0.0815, "lr": 1.0347388479015e-06, "epoch": 0.2587154618522952, "percentage": 5.17, "elapsed_time": "0:13:41", "remaining_time": "4:10:53", "throughput": 8703.29, "total_tokens": 7149248} +{"current_steps": 10595, "total_steps": 204665, "loss": 0.1847, "lr": 1.0352274392925196e-06, "epoch": 0.2588376126841424, "percentage": 5.18, "elapsed_time": "0:13:41", "remaining_time": "4:10:52", "throughput": 8703.38, "total_tokens": 7152320} +{"current_steps": 10600, "total_steps": 204665, "loss": 0.0792, "lr": 1.0357160306835393e-06, "epoch": 0.25895976351598954, "percentage": 5.18, "elapsed_time": "0:13:42", "remaining_time": "4:10:51", "throughput": 8703.47, "total_tokens": 7155392} +{"current_steps": 10605, "total_steps": 204665, "loss": 0.1473, "lr": 1.036204622074559e-06, "epoch": 0.25908191434783673, "percentage": 5.18, "elapsed_time": "0:13:42", "remaining_time": "4:10:50", "throughput": 8704.5, "total_tokens": 7159424} +{"current_steps": 10610, "total_steps": 204665, "loss": 0.1038, "lr": 1.0366932134655787e-06, "epoch": 0.25920406517968386, "percentage": 5.18, "elapsed_time": "0:13:42", "remaining_time": "4:10:49", "throughput": 8704.82, "total_tokens": 7162752} +{"current_steps": 10615, "total_steps": 204665, "loss": 0.0652, "lr": 1.0371818048565983e-06, "epoch": 0.25932621601153105, "percentage": 5.19, "elapsed_time": "0:13:43", "remaining_time": "4:10:48", "throughput": 8705.15, "total_tokens": 7166080} +{"current_steps": 10620, "total_steps": 204665, "loss": 0.0808, "lr": 1.037670396247618e-06, "epoch": 0.2594483668433782, "percentage": 5.19, "elapsed_time": "0:13:43", "remaining_time": "4:10:47", "throughput": 8705.37, "total_tokens": 7169280} +{"current_steps": 10625, "total_steps": 204665, "loss": 0.1462, "lr": 1.038158987638638e-06, "epoch": 0.2595705176752254, "percentage": 5.19, "elapsed_time": "0:13:43", "remaining_time": "4:10:46", "throughput": 8705.55, "total_tokens": 7172480} +{"current_steps": 10630, "total_steps": 204665, "loss": 0.0809, "lr": 1.0386475790296574e-06, "epoch": 0.2596926685070725, "percentage": 5.19, "elapsed_time": "0:13:44", "remaining_time": "4:10:45", "throughput": 8706.14, "total_tokens": 7176128} +{"current_steps": 10635, "total_steps": 204665, "loss": 0.1069, "lr": 1.039136170420677e-06, "epoch": 0.2598148193389197, "percentage": 5.2, "elapsed_time": "0:13:44", "remaining_time": "4:10:44", "throughput": 8706.13, "total_tokens": 7179136} +{"current_steps": 10640, "total_steps": 204665, "loss": 0.2446, "lr": 1.039624761811697e-06, "epoch": 0.2599369701707669, "percentage": 5.2, "elapsed_time": "0:13:44", "remaining_time": "4:10:43", "throughput": 8706.29, "total_tokens": 7182272} +{"current_steps": 10645, "total_steps": 204665, "loss": 0.0538, "lr": 1.0401133532027164e-06, "epoch": 0.260059121002614, "percentage": 5.2, "elapsed_time": "0:13:45", "remaining_time": "4:10:42", "throughput": 8706.09, "total_tokens": 7185088} +{"current_steps": 10650, "total_steps": 204665, "loss": 0.1061, "lr": 1.0406019445937361e-06, "epoch": 0.2601812718344612, "percentage": 5.2, "elapsed_time": "0:13:45", "remaining_time": "4:10:41", "throughput": 8706.42, "total_tokens": 7188480} +{"current_steps": 10655, "total_steps": 204665, "loss": 0.0527, "lr": 1.041090535984756e-06, "epoch": 0.26030342266630835, "percentage": 5.21, "elapsed_time": "0:13:45", "remaining_time": "4:10:40", "throughput": 8706.5, "total_tokens": 7191552} +{"current_steps": 10660, "total_steps": 204665, "loss": 0.1326, "lr": 1.0415791273757757e-06, "epoch": 0.26042557349815554, "percentage": 5.21, "elapsed_time": "0:13:46", "remaining_time": "4:10:38", "throughput": 8706.72, "total_tokens": 7194752} +{"current_steps": 10665, "total_steps": 204665, "loss": 0.3496, "lr": 1.0420677187667952e-06, "epoch": 0.2605477243300027, "percentage": 5.21, "elapsed_time": "0:13:46", "remaining_time": "4:10:38", "throughput": 8707.49, "total_tokens": 7198528} +{"current_steps": 10670, "total_steps": 204665, "loss": 0.3142, "lr": 1.042556310157815e-06, "epoch": 0.26066987516184986, "percentage": 5.21, "elapsed_time": "0:13:47", "remaining_time": "4:10:36", "throughput": 8707.68, "total_tokens": 7201728} +{"current_steps": 10675, "total_steps": 204665, "loss": 0.1681, "lr": 1.0430449015488347e-06, "epoch": 0.260792025993697, "percentage": 5.22, "elapsed_time": "0:13:47", "remaining_time": "4:10:35", "throughput": 8708.06, "total_tokens": 7205120} +{"current_steps": 10680, "total_steps": 204665, "loss": 0.0965, "lr": 1.0435334929398542e-06, "epoch": 0.2609141768255442, "percentage": 5.22, "elapsed_time": "0:13:47", "remaining_time": "4:10:34", "throughput": 8708.53, "total_tokens": 7208576} +{"current_steps": 10685, "total_steps": 204665, "loss": 0.3016, "lr": 1.044022084330874e-06, "epoch": 0.2610363276573913, "percentage": 5.22, "elapsed_time": "0:13:48", "remaining_time": "4:10:33", "throughput": 8708.51, "total_tokens": 7211584} +{"current_steps": 10690, "total_steps": 204665, "loss": 0.1004, "lr": 1.0445106757218938e-06, "epoch": 0.2611584784892385, "percentage": 5.22, "elapsed_time": "0:13:48", "remaining_time": "4:10:32", "throughput": 8708.84, "total_tokens": 7214976} +{"current_steps": 10695, "total_steps": 204665, "loss": 0.0837, "lr": 1.0449992671129135e-06, "epoch": 0.2612806293210857, "percentage": 5.23, "elapsed_time": "0:13:48", "remaining_time": "4:10:31", "throughput": 8709.07, "total_tokens": 7218240} +{"current_steps": 10700, "total_steps": 204665, "loss": 0.1402, "lr": 1.0454878585039331e-06, "epoch": 0.26140278015293283, "percentage": 5.23, "elapsed_time": "0:13:49", "remaining_time": "4:10:31", "throughput": 8709.72, "total_tokens": 7221952} +{"current_steps": 10705, "total_steps": 204665, "loss": 0.0783, "lr": 1.0459764498949528e-06, "epoch": 0.26152493098478, "percentage": 5.23, "elapsed_time": "0:13:49", "remaining_time": "4:10:30", "throughput": 8710.08, "total_tokens": 7225344} +{"current_steps": 10710, "total_steps": 204665, "loss": 0.0884, "lr": 1.0464650412859725e-06, "epoch": 0.26164708181662716, "percentage": 5.23, "elapsed_time": "0:13:49", "remaining_time": "4:10:29", "throughput": 8710.49, "total_tokens": 7228800} +{"current_steps": 10715, "total_steps": 204665, "loss": 0.1519, "lr": 1.0469536326769922e-06, "epoch": 0.26176923264847435, "percentage": 5.24, "elapsed_time": "0:13:50", "remaining_time": "4:10:28", "throughput": 8710.85, "total_tokens": 7232192} +{"current_steps": 10720, "total_steps": 204665, "loss": 0.0741, "lr": 1.0474422240680119e-06, "epoch": 0.2618913834803215, "percentage": 5.24, "elapsed_time": "0:13:50", "remaining_time": "4:10:27", "throughput": 8711.27, "total_tokens": 7235648} +{"current_steps": 10725, "total_steps": 204665, "loss": 0.104, "lr": 1.0479308154590315e-06, "epoch": 0.26201353431216867, "percentage": 5.24, "elapsed_time": "0:13:50", "remaining_time": "4:10:26", "throughput": 8711.57, "total_tokens": 7238976} +{"current_steps": 10730, "total_steps": 204665, "loss": 0.1013, "lr": 1.0484194068500512e-06, "epoch": 0.2621356851440158, "percentage": 5.24, "elapsed_time": "0:13:51", "remaining_time": "4:10:25", "throughput": 8711.9, "total_tokens": 7242304} +{"current_steps": 10735, "total_steps": 204665, "loss": 0.1474, "lr": 1.048907998241071e-06, "epoch": 0.262257835975863, "percentage": 5.25, "elapsed_time": "0:13:51", "remaining_time": "4:10:24", "throughput": 8712.22, "total_tokens": 7245632} +{"current_steps": 10740, "total_steps": 204665, "loss": 0.2086, "lr": 1.0493965896320906e-06, "epoch": 0.2623799868077102, "percentage": 5.25, "elapsed_time": "0:13:52", "remaining_time": "4:10:23", "throughput": 8712.45, "total_tokens": 7248896} +{"current_steps": 10745, "total_steps": 204665, "loss": 0.1583, "lr": 1.0498851810231105e-06, "epoch": 0.2625021376395573, "percentage": 5.25, "elapsed_time": "0:13:52", "remaining_time": "4:10:22", "throughput": 8712.84, "total_tokens": 7252352} +{"current_steps": 10750, "total_steps": 204665, "loss": 0.1325, "lr": 1.05037377241413e-06, "epoch": 0.2626242884714045, "percentage": 5.25, "elapsed_time": "0:13:52", "remaining_time": "4:10:21", "throughput": 8712.99, "total_tokens": 7255552} +{"current_steps": 10755, "total_steps": 204665, "loss": 0.106, "lr": 1.0508623638051496e-06, "epoch": 0.26274643930325164, "percentage": 5.25, "elapsed_time": "0:13:53", "remaining_time": "4:10:20", "throughput": 8713.43, "total_tokens": 7259008} +{"current_steps": 10760, "total_steps": 204665, "loss": 0.1296, "lr": 1.0513509551961695e-06, "epoch": 0.26286859013509883, "percentage": 5.26, "elapsed_time": "0:13:53", "remaining_time": "4:10:19", "throughput": 8713.86, "total_tokens": 7262464} +{"current_steps": 10765, "total_steps": 204665, "loss": 0.0801, "lr": 1.051839546587189e-06, "epoch": 0.26299074096694597, "percentage": 5.26, "elapsed_time": "0:13:53", "remaining_time": "4:10:18", "throughput": 8714.16, "total_tokens": 7265792} +{"current_steps": 10770, "total_steps": 204665, "loss": 0.086, "lr": 1.0523281379782087e-06, "epoch": 0.26311289179879316, "percentage": 5.26, "elapsed_time": "0:13:54", "remaining_time": "4:10:17", "throughput": 8714.76, "total_tokens": 7269376} +{"current_steps": 10775, "total_steps": 204665, "loss": 0.1174, "lr": 1.0528167293692286e-06, "epoch": 0.2632350426306403, "percentage": 5.26, "elapsed_time": "0:13:54", "remaining_time": "4:10:16", "throughput": 8715.2, "total_tokens": 7272832} +{"current_steps": 10780, "total_steps": 204665, "loss": 0.0803, "lr": 1.0533053207602482e-06, "epoch": 0.2633571934624875, "percentage": 5.27, "elapsed_time": "0:13:54", "remaining_time": "4:10:15", "throughput": 8715.47, "total_tokens": 7276096} +{"current_steps": 10785, "total_steps": 204665, "loss": 0.0768, "lr": 1.0537939121512677e-06, "epoch": 0.26347934429433467, "percentage": 5.27, "elapsed_time": "0:13:55", "remaining_time": "4:10:14", "throughput": 8715.9, "total_tokens": 7279488} +{"current_steps": 10790, "total_steps": 204665, "loss": 0.1289, "lr": 1.0542825035422876e-06, "epoch": 0.2636014951261818, "percentage": 5.27, "elapsed_time": "0:13:55", "remaining_time": "4:10:13", "throughput": 8716.37, "total_tokens": 7282944} +{"current_steps": 10795, "total_steps": 204665, "loss": 0.0317, "lr": 1.0547710949333073e-06, "epoch": 0.263723645958029, "percentage": 5.27, "elapsed_time": "0:13:55", "remaining_time": "4:10:12", "throughput": 8716.89, "total_tokens": 7286464} +{"current_steps": 10800, "total_steps": 204665, "loss": 0.1985, "lr": 1.0552596863243268e-06, "epoch": 0.26384579678987613, "percentage": 5.28, "elapsed_time": "0:13:56", "remaining_time": "4:10:11", "throughput": 8717.24, "total_tokens": 7289792} +{"current_steps": 10805, "total_steps": 204665, "loss": 0.2123, "lr": 1.0557482777153467e-06, "epoch": 0.2639679476217233, "percentage": 5.28, "elapsed_time": "0:13:56", "remaining_time": "4:10:09", "throughput": 8717.59, "total_tokens": 7293120} +{"current_steps": 10810, "total_steps": 204665, "loss": 0.0863, "lr": 1.0562368691063663e-06, "epoch": 0.26409009845357045, "percentage": 5.28, "elapsed_time": "0:13:56", "remaining_time": "4:10:09", "throughput": 8718.1, "total_tokens": 7296640} +{"current_steps": 10815, "total_steps": 204665, "loss": 0.1017, "lr": 1.056725460497386e-06, "epoch": 0.26421224928541764, "percentage": 5.28, "elapsed_time": "0:13:57", "remaining_time": "4:10:07", "throughput": 8718.08, "total_tokens": 7299584} +{"current_steps": 10820, "total_steps": 204665, "loss": 0.0895, "lr": 1.0572140518884057e-06, "epoch": 0.2643344001172648, "percentage": 5.29, "elapsed_time": "0:13:57", "remaining_time": "4:10:06", "throughput": 8718.07, "total_tokens": 7302528} +{"current_steps": 10825, "total_steps": 204665, "loss": 0.2, "lr": 1.0577026432794254e-06, "epoch": 0.26445655094911197, "percentage": 5.29, "elapsed_time": "0:13:57", "remaining_time": "4:10:05", "throughput": 8718.3, "total_tokens": 7305728} +{"current_steps": 10830, "total_steps": 204665, "loss": 0.1894, "lr": 1.058191234670445e-06, "epoch": 0.2645787017809591, "percentage": 5.29, "elapsed_time": "0:13:58", "remaining_time": "4:10:04", "throughput": 8718.9, "total_tokens": 7309312} +{"current_steps": 10835, "total_steps": 204665, "loss": 0.1635, "lr": 1.0586798260614647e-06, "epoch": 0.2647008526128063, "percentage": 5.29, "elapsed_time": "0:13:58", "remaining_time": "4:10:03", "throughput": 8719.05, "total_tokens": 7312448} +{"current_steps": 10840, "total_steps": 204665, "loss": 0.1332, "lr": 1.0591684174524844e-06, "epoch": 0.2648230034446535, "percentage": 5.3, "elapsed_time": "0:13:59", "remaining_time": "4:10:02", "throughput": 8719.5, "total_tokens": 7315904} +{"current_steps": 10845, "total_steps": 204665, "loss": 0.0642, "lr": 1.059657008843504e-06, "epoch": 0.2649451542765006, "percentage": 5.3, "elapsed_time": "0:13:59", "remaining_time": "4:10:01", "throughput": 8719.91, "total_tokens": 7319360} +{"current_steps": 10850, "total_steps": 204665, "loss": 0.1647, "lr": 1.060145600234524e-06, "epoch": 0.2650673051083478, "percentage": 5.3, "elapsed_time": "0:13:59", "remaining_time": "4:10:00", "throughput": 8720.34, "total_tokens": 7322816} +{"current_steps": 10855, "total_steps": 204665, "loss": 0.1161, "lr": 1.0606341916255435e-06, "epoch": 0.26518945594019494, "percentage": 5.3, "elapsed_time": "0:14:00", "remaining_time": "4:09:59", "throughput": 8721.31, "total_tokens": 7326784} +{"current_steps": 10860, "total_steps": 204665, "loss": 0.1226, "lr": 1.0611227830165631e-06, "epoch": 0.26531160677204213, "percentage": 5.31, "elapsed_time": "0:14:00", "remaining_time": "4:09:58", "throughput": 8721.82, "total_tokens": 7330304} +{"current_steps": 10865, "total_steps": 204665, "loss": 0.1123, "lr": 1.061611374407583e-06, "epoch": 0.26543375760388926, "percentage": 5.31, "elapsed_time": "0:14:00", "remaining_time": "4:09:57", "throughput": 8722.15, "total_tokens": 7333632} +{"current_steps": 10870, "total_steps": 204665, "loss": 0.1671, "lr": 1.0620999657986025e-06, "epoch": 0.26555590843573645, "percentage": 5.31, "elapsed_time": "0:14:01", "remaining_time": "4:09:56", "throughput": 8722.8, "total_tokens": 7337280} +{"current_steps": 10875, "total_steps": 204665, "loss": 0.1121, "lr": 1.0625885571896222e-06, "epoch": 0.2656780592675836, "percentage": 5.31, "elapsed_time": "0:14:01", "remaining_time": "4:09:55", "throughput": 8723.03, "total_tokens": 7340480} +{"current_steps": 10880, "total_steps": 204665, "loss": 0.1282, "lr": 1.063077148580642e-06, "epoch": 0.2658002100994308, "percentage": 5.32, "elapsed_time": "0:14:01", "remaining_time": "4:09:54", "throughput": 8722.97, "total_tokens": 7343424} +{"current_steps": 10885, "total_steps": 204665, "loss": 0.1091, "lr": 1.0635657399716616e-06, "epoch": 0.26592236093127797, "percentage": 5.32, "elapsed_time": "0:14:02", "remaining_time": "4:09:53", "throughput": 8723.31, "total_tokens": 7346752} +{"current_steps": 10890, "total_steps": 204665, "loss": 0.1666, "lr": 1.0640543313626812e-06, "epoch": 0.2660445117631251, "percentage": 5.32, "elapsed_time": "0:14:02", "remaining_time": "4:09:52", "throughput": 8723.73, "total_tokens": 7350144} +{"current_steps": 10895, "total_steps": 204665, "loss": 0.0835, "lr": 1.0645429227537011e-06, "epoch": 0.2661666625949723, "percentage": 5.32, "elapsed_time": "0:14:02", "remaining_time": "4:09:51", "throughput": 8723.94, "total_tokens": 7353344} +{"current_steps": 10900, "total_steps": 204665, "loss": 0.1354, "lr": 1.0650315141447208e-06, "epoch": 0.2662888134268194, "percentage": 5.33, "elapsed_time": "0:14:03", "remaining_time": "4:09:49", "throughput": 8724.13, "total_tokens": 7356544} +{"current_steps": 10905, "total_steps": 204665, "loss": 0.147, "lr": 1.0655201055357403e-06, "epoch": 0.2664109642586666, "percentage": 5.33, "elapsed_time": "0:14:03", "remaining_time": "4:09:48", "throughput": 8724.45, "total_tokens": 7359872} +{"current_steps": 10910, "total_steps": 204665, "loss": 0.1455, "lr": 1.0660086969267602e-06, "epoch": 0.26653311509051375, "percentage": 5.33, "elapsed_time": "0:14:03", "remaining_time": "4:09:47", "throughput": 8724.99, "total_tokens": 7363392} +{"current_steps": 10915, "total_steps": 204665, "loss": 0.066, "lr": 1.0664972883177799e-06, "epoch": 0.26665526592236094, "percentage": 5.33, "elapsed_time": "0:14:04", "remaining_time": "4:09:46", "throughput": 8725.54, "total_tokens": 7366912} +{"current_steps": 10920, "total_steps": 204665, "loss": 0.0645, "lr": 1.0669858797087993e-06, "epoch": 0.2667774167542081, "percentage": 5.34, "elapsed_time": "0:14:04", "remaining_time": "4:09:45", "throughput": 8725.83, "total_tokens": 7370176} +{"current_steps": 10925, "total_steps": 204665, "loss": 0.1761, "lr": 1.0674744710998192e-06, "epoch": 0.26689956758605526, "percentage": 5.34, "elapsed_time": "0:14:04", "remaining_time": "4:09:44", "throughput": 8726.11, "total_tokens": 7373440} +{"current_steps": 10930, "total_steps": 204665, "loss": 0.0981, "lr": 1.067963062490839e-06, "epoch": 0.2670217184179024, "percentage": 5.34, "elapsed_time": "0:14:05", "remaining_time": "4:09:43", "throughput": 8726.89, "total_tokens": 7377216} +{"current_steps": 10935, "total_steps": 204665, "loss": 0.0754, "lr": 1.0684516538818586e-06, "epoch": 0.2671438692497496, "percentage": 5.34, "elapsed_time": "0:14:05", "remaining_time": "4:09:42", "throughput": 8727.19, "total_tokens": 7380480} +{"current_steps": 10940, "total_steps": 204665, "loss": 0.0664, "lr": 1.0689402452728783e-06, "epoch": 0.2672660200815968, "percentage": 5.35, "elapsed_time": "0:14:06", "remaining_time": "4:09:41", "throughput": 8727.6, "total_tokens": 7383872} +{"current_steps": 10945, "total_steps": 204665, "loss": 0.1116, "lr": 1.069428836663898e-06, "epoch": 0.2673881709134439, "percentage": 5.35, "elapsed_time": "0:14:06", "remaining_time": "4:09:40", "throughput": 8727.8, "total_tokens": 7387008} +{"current_steps": 10950, "total_steps": 204665, "loss": 0.2517, "lr": 1.0699174280549176e-06, "epoch": 0.2675103217452911, "percentage": 5.35, "elapsed_time": "0:14:06", "remaining_time": "4:09:39", "throughput": 8728.09, "total_tokens": 7390272} +{"current_steps": 10955, "total_steps": 204665, "loss": 0.1716, "lr": 1.0704060194459373e-06, "epoch": 0.26763247257713824, "percentage": 5.35, "elapsed_time": "0:14:07", "remaining_time": "4:09:38", "throughput": 8728.66, "total_tokens": 7393856} +{"current_steps": 10960, "total_steps": 204665, "loss": 0.1175, "lr": 1.070894610836957e-06, "epoch": 0.2677546234089854, "percentage": 5.36, "elapsed_time": "0:14:07", "remaining_time": "4:09:37", "throughput": 8728.86, "total_tokens": 7396992} +{"current_steps": 10965, "total_steps": 204665, "loss": 0.2216, "lr": 1.0713832022279767e-06, "epoch": 0.26787677424083256, "percentage": 5.36, "elapsed_time": "0:14:07", "remaining_time": "4:09:36", "throughput": 8729.31, "total_tokens": 7400448} +{"current_steps": 10970, "total_steps": 204665, "loss": 0.1282, "lr": 1.0718717936189966e-06, "epoch": 0.26799892507267975, "percentage": 5.36, "elapsed_time": "0:14:08", "remaining_time": "4:09:35", "throughput": 8729.79, "total_tokens": 7403904} +{"current_steps": 10975, "total_steps": 204665, "loss": 0.0799, "lr": 1.072360385010016e-06, "epoch": 0.2681210759045269, "percentage": 5.36, "elapsed_time": "0:14:08", "remaining_time": "4:09:33", "throughput": 8729.92, "total_tokens": 7406976} +{"current_steps": 10980, "total_steps": 204665, "loss": 0.0994, "lr": 1.0728489764010357e-06, "epoch": 0.2682432267363741, "percentage": 5.36, "elapsed_time": "0:14:08", "remaining_time": "4:09:32", "throughput": 8730.16, "total_tokens": 7410176} +{"current_steps": 10985, "total_steps": 204665, "loss": 0.069, "lr": 1.0733375677920556e-06, "epoch": 0.26836537756822126, "percentage": 5.37, "elapsed_time": "0:14:09", "remaining_time": "4:09:31", "throughput": 8730.74, "total_tokens": 7413760} +{"current_steps": 10990, "total_steps": 204665, "loss": 0.2193, "lr": 1.073826159183075e-06, "epoch": 0.2684875284000684, "percentage": 5.37, "elapsed_time": "0:14:09", "remaining_time": "4:09:30", "throughput": 8731.21, "total_tokens": 7417216} +{"current_steps": 10995, "total_steps": 204665, "loss": 0.1294, "lr": 1.0743147505740948e-06, "epoch": 0.2686096792319156, "percentage": 5.37, "elapsed_time": "0:14:09", "remaining_time": "4:09:29", "throughput": 8731.66, "total_tokens": 7420672} +{"current_steps": 11000, "total_steps": 204665, "loss": 0.1481, "lr": 1.0748033419651146e-06, "epoch": 0.2687318300637627, "percentage": 5.37, "elapsed_time": "0:14:10", "remaining_time": "4:09:28", "throughput": 8731.86, "total_tokens": 7423808} +{"current_steps": 11005, "total_steps": 204665, "loss": 0.1, "lr": 1.0752919333561341e-06, "epoch": 0.2688539808956099, "percentage": 5.38, "elapsed_time": "0:14:10", "remaining_time": "4:09:27", "throughput": 8732.39, "total_tokens": 7427328} +{"current_steps": 11010, "total_steps": 204665, "loss": 0.1661, "lr": 1.0757805247471538e-06, "epoch": 0.26897613172745705, "percentage": 5.38, "elapsed_time": "0:14:10", "remaining_time": "4:09:26", "throughput": 8732.71, "total_tokens": 7430592} +{"current_steps": 11015, "total_steps": 204665, "loss": 0.1138, "lr": 1.0762691161381737e-06, "epoch": 0.26909828255930424, "percentage": 5.38, "elapsed_time": "0:14:11", "remaining_time": "4:09:25", "throughput": 8732.81, "total_tokens": 7433664} +{"current_steps": 11020, "total_steps": 204665, "loss": 0.1723, "lr": 1.0767577075291934e-06, "epoch": 0.26922043339115137, "percentage": 5.38, "elapsed_time": "0:14:11", "remaining_time": "4:09:23", "throughput": 8732.63, "total_tokens": 7436416} +{"current_steps": 11025, "total_steps": 204665, "loss": 0.0646, "lr": 1.0772462989202128e-06, "epoch": 0.26934258422299856, "percentage": 5.39, "elapsed_time": "0:14:11", "remaining_time": "4:09:22", "throughput": 8732.87, "total_tokens": 7439616} +{"current_steps": 11030, "total_steps": 204665, "loss": 0.1593, "lr": 1.0777348903112327e-06, "epoch": 0.26946473505484575, "percentage": 5.39, "elapsed_time": "0:14:12", "remaining_time": "4:09:21", "throughput": 8732.95, "total_tokens": 7442624} +{"current_steps": 11035, "total_steps": 204665, "loss": 0.1045, "lr": 1.0782234817022524e-06, "epoch": 0.2695868858866929, "percentage": 5.39, "elapsed_time": "0:14:12", "remaining_time": "4:09:20", "throughput": 8733.25, "total_tokens": 7445888} +{"current_steps": 11040, "total_steps": 204665, "loss": 0.0894, "lr": 1.0787120730932719e-06, "epoch": 0.2697090367185401, "percentage": 5.39, "elapsed_time": "0:14:12", "remaining_time": "4:09:19", "throughput": 8733.35, "total_tokens": 7448960} +{"current_steps": 11045, "total_steps": 204665, "loss": 0.0651, "lr": 1.0792006644842918e-06, "epoch": 0.2698311875503872, "percentage": 5.4, "elapsed_time": "0:14:13", "remaining_time": "4:09:18", "throughput": 8733.95, "total_tokens": 7452544} +{"current_steps": 11050, "total_steps": 204665, "loss": 0.1117, "lr": 1.0796892558753115e-06, "epoch": 0.2699533383822344, "percentage": 5.4, "elapsed_time": "0:14:13", "remaining_time": "4:09:17", "throughput": 8734.11, "total_tokens": 7455680} +{"current_steps": 11055, "total_steps": 204665, "loss": 0.12, "lr": 1.0801778472663311e-06, "epoch": 0.27007548921408153, "percentage": 5.4, "elapsed_time": "0:14:13", "remaining_time": "4:09:15", "throughput": 8734.48, "total_tokens": 7459008} +{"current_steps": 11060, "total_steps": 204665, "loss": 0.0946, "lr": 1.0806664386573508e-06, "epoch": 0.2701976400459287, "percentage": 5.4, "elapsed_time": "0:14:14", "remaining_time": "4:09:14", "throughput": 8734.98, "total_tokens": 7462464} +{"current_steps": 11065, "total_steps": 204665, "loss": 0.179, "lr": 1.0811550300483705e-06, "epoch": 0.27031979087777586, "percentage": 5.41, "elapsed_time": "0:14:14", "remaining_time": "4:09:13", "throughput": 8735.22, "total_tokens": 7465664} +{"current_steps": 11070, "total_steps": 204665, "loss": 0.0426, "lr": 1.0816436214393902e-06, "epoch": 0.27044194170962305, "percentage": 5.41, "elapsed_time": "0:14:15", "remaining_time": "4:09:12", "throughput": 8735.62, "total_tokens": 7469056} +{"current_steps": 11075, "total_steps": 204665, "loss": 0.1197, "lr": 1.0821322128304099e-06, "epoch": 0.2705640925414702, "percentage": 5.41, "elapsed_time": "0:14:15", "remaining_time": "4:09:11", "throughput": 8736.05, "total_tokens": 7472512} +{"current_steps": 11080, "total_steps": 204665, "loss": 0.0911, "lr": 1.0826208042214295e-06, "epoch": 0.27068624337331737, "percentage": 5.41, "elapsed_time": "0:14:15", "remaining_time": "4:09:10", "throughput": 8735.96, "total_tokens": 7475392} +{"current_steps": 11085, "total_steps": 204665, "loss": 0.1618, "lr": 1.0831093956124492e-06, "epoch": 0.27080839420516456, "percentage": 5.42, "elapsed_time": "0:14:16", "remaining_time": "4:09:09", "throughput": 8736.5, "total_tokens": 7478912} +{"current_steps": 11090, "total_steps": 204665, "loss": 0.1194, "lr": 1.0835979870034691e-06, "epoch": 0.2709305450370117, "percentage": 5.42, "elapsed_time": "0:14:16", "remaining_time": "4:09:08", "throughput": 8736.67, "total_tokens": 7482048} +{"current_steps": 11095, "total_steps": 204665, "loss": 0.1323, "lr": 1.0840865783944886e-06, "epoch": 0.2710526958688589, "percentage": 5.42, "elapsed_time": "0:14:16", "remaining_time": "4:09:07", "throughput": 8736.91, "total_tokens": 7485248} +{"current_steps": 11100, "total_steps": 204665, "loss": 0.2519, "lr": 1.0845751697855083e-06, "epoch": 0.271174846700706, "percentage": 5.42, "elapsed_time": "0:14:17", "remaining_time": "4:09:06", "throughput": 8737.24, "total_tokens": 7488576} +{"current_steps": 11105, "total_steps": 204665, "loss": 0.0914, "lr": 1.0850637611765282e-06, "epoch": 0.2712969975325532, "percentage": 5.43, "elapsed_time": "0:14:17", "remaining_time": "4:09:05", "throughput": 8737.59, "total_tokens": 7491904} +{"current_steps": 11110, "total_steps": 204665, "loss": 0.1573, "lr": 1.0855523525675476e-06, "epoch": 0.27141914836440034, "percentage": 5.43, "elapsed_time": "0:14:17", "remaining_time": "4:09:04", "throughput": 8738.72, "total_tokens": 7496064} +{"current_steps": 11115, "total_steps": 204665, "loss": 0.0401, "lr": 1.0860409439585673e-06, "epoch": 0.27154129919624753, "percentage": 5.43, "elapsed_time": "0:14:18", "remaining_time": "4:09:03", "throughput": 8739.01, "total_tokens": 7499328} +{"current_steps": 11120, "total_steps": 204665, "loss": 0.1413, "lr": 1.0865295353495872e-06, "epoch": 0.27166345002809467, "percentage": 5.43, "elapsed_time": "0:14:18", "remaining_time": "4:09:02", "throughput": 8739.72, "total_tokens": 7503040} +{"current_steps": 11125, "total_steps": 204665, "loss": 0.2132, "lr": 1.0870181267406069e-06, "epoch": 0.27178560085994186, "percentage": 5.44, "elapsed_time": "0:14:18", "remaining_time": "4:09:01", "throughput": 8740.1, "total_tokens": 7506368} +{"current_steps": 11130, "total_steps": 204665, "loss": 0.1462, "lr": 1.0875067181316264e-06, "epoch": 0.27190775169178905, "percentage": 5.44, "elapsed_time": "0:14:19", "remaining_time": "4:09:00", "throughput": 8740.64, "total_tokens": 7509888} +{"current_steps": 11135, "total_steps": 204665, "loss": 0.118, "lr": 1.0879953095226463e-06, "epoch": 0.2720299025236362, "percentage": 5.44, "elapsed_time": "0:14:19", "remaining_time": "4:08:59", "throughput": 8741.47, "total_tokens": 7513728} +{"current_steps": 11140, "total_steps": 204665, "loss": 0.1055, "lr": 1.088483900913666e-06, "epoch": 0.27215205335548337, "percentage": 5.44, "elapsed_time": "0:14:19", "remaining_time": "4:08:58", "throughput": 8742.11, "total_tokens": 7517376} +{"current_steps": 11145, "total_steps": 204665, "loss": 0.2541, "lr": 1.0889724923046854e-06, "epoch": 0.2722742041873305, "percentage": 5.45, "elapsed_time": "0:14:20", "remaining_time": "4:08:57", "throughput": 8742.37, "total_tokens": 7520576} +{"current_steps": 11150, "total_steps": 204665, "loss": 0.1983, "lr": 1.0894610836957053e-06, "epoch": 0.2723963550191777, "percentage": 5.45, "elapsed_time": "0:14:20", "remaining_time": "4:08:55", "throughput": 8742.42, "total_tokens": 7523584} +{"current_steps": 11155, "total_steps": 204665, "loss": 0.067, "lr": 1.089949675086725e-06, "epoch": 0.27251850585102483, "percentage": 5.45, "elapsed_time": "0:14:20", "remaining_time": "4:08:54", "throughput": 8742.84, "total_tokens": 7526976} +{"current_steps": 11160, "total_steps": 204665, "loss": 0.0961, "lr": 1.0904382664777444e-06, "epoch": 0.272640656682872, "percentage": 5.45, "elapsed_time": "0:14:21", "remaining_time": "4:08:53", "throughput": 8742.83, "total_tokens": 7529920} +{"current_steps": 11165, "total_steps": 204665, "loss": 0.0953, "lr": 1.0909268578687643e-06, "epoch": 0.27276280751471915, "percentage": 5.46, "elapsed_time": "0:14:21", "remaining_time": "4:08:52", "throughput": 8743.22, "total_tokens": 7533312} +{"current_steps": 11170, "total_steps": 204665, "loss": 0.0257, "lr": 1.091415449259784e-06, "epoch": 0.27288495834656634, "percentage": 5.46, "elapsed_time": "0:14:21", "remaining_time": "4:08:51", "throughput": 8744.41, "total_tokens": 7537536} +{"current_steps": 11175, "total_steps": 204665, "loss": 0.1563, "lr": 1.0919040406508037e-06, "epoch": 0.27300710917841353, "percentage": 5.46, "elapsed_time": "0:14:22", "remaining_time": "4:08:50", "throughput": 8744.65, "total_tokens": 7540736} +{"current_steps": 11180, "total_steps": 204665, "loss": 0.1178, "lr": 1.0923926320418234e-06, "epoch": 0.27312926001026067, "percentage": 5.46, "elapsed_time": "0:14:22", "remaining_time": "4:08:49", "throughput": 8745.19, "total_tokens": 7544256} +{"current_steps": 11185, "total_steps": 204665, "loss": 0.2243, "lr": 1.092881223432843e-06, "epoch": 0.27325141084210786, "percentage": 5.47, "elapsed_time": "0:14:23", "remaining_time": "4:08:48", "throughput": 8745.61, "total_tokens": 7547648} +{"current_steps": 11190, "total_steps": 204665, "loss": 0.192, "lr": 1.0933698148238627e-06, "epoch": 0.273373561673955, "percentage": 5.47, "elapsed_time": "0:14:23", "remaining_time": "4:08:47", "throughput": 8746.44, "total_tokens": 7551488} +{"current_steps": 11195, "total_steps": 204665, "loss": 0.1384, "lr": 1.0938584062148824e-06, "epoch": 0.2734957125058022, "percentage": 5.47, "elapsed_time": "0:14:23", "remaining_time": "4:08:46", "throughput": 8746.74, "total_tokens": 7554752} +{"current_steps": 11200, "total_steps": 204665, "loss": 0.0948, "lr": 1.0943469976059021e-06, "epoch": 0.2736178633376493, "percentage": 5.47, "elapsed_time": "0:14:24", "remaining_time": "4:08:45", "throughput": 8747.08, "total_tokens": 7558080} +{"current_steps": 11205, "total_steps": 204665, "loss": 0.1033, "lr": 1.0948355889969218e-06, "epoch": 0.2737400141694965, "percentage": 5.47, "elapsed_time": "0:14:24", "remaining_time": "4:08:44", "throughput": 8747.5, "total_tokens": 7561472} +{"current_steps": 11210, "total_steps": 204665, "loss": 0.1728, "lr": 1.0953241803879417e-06, "epoch": 0.27386216500134364, "percentage": 5.48, "elapsed_time": "0:14:24", "remaining_time": "4:08:43", "throughput": 8748.27, "total_tokens": 7565248} +{"current_steps": 11215, "total_steps": 204665, "loss": 0.0578, "lr": 1.0958127717789612e-06, "epoch": 0.27398431583319083, "percentage": 5.48, "elapsed_time": "0:14:25", "remaining_time": "4:08:42", "throughput": 8748.43, "total_tokens": 7568384} +{"current_steps": 11220, "total_steps": 204665, "loss": 0.1279, "lr": 1.0963013631699808e-06, "epoch": 0.27410646666503796, "percentage": 5.48, "elapsed_time": "0:14:25", "remaining_time": "4:08:41", "throughput": 8748.85, "total_tokens": 7571776} +{"current_steps": 11225, "total_steps": 204665, "loss": 0.0493, "lr": 1.0967899545610007e-06, "epoch": 0.27422861749688515, "percentage": 5.48, "elapsed_time": "0:14:25", "remaining_time": "4:08:40", "throughput": 8748.91, "total_tokens": 7574784} +{"current_steps": 11230, "total_steps": 204665, "loss": 0.1948, "lr": 1.0972785459520202e-06, "epoch": 0.27435076832873234, "percentage": 5.49, "elapsed_time": "0:14:26", "remaining_time": "4:08:39", "throughput": 8749.14, "total_tokens": 7577984} +{"current_steps": 11235, "total_steps": 204665, "loss": 0.0757, "lr": 1.0977671373430399e-06, "epoch": 0.2744729191605795, "percentage": 5.49, "elapsed_time": "0:14:26", "remaining_time": "4:08:37", "throughput": 8749.18, "total_tokens": 7580992} +{"current_steps": 11240, "total_steps": 204665, "loss": 0.1289, "lr": 1.0982557287340598e-06, "epoch": 0.27459506999242667, "percentage": 5.49, "elapsed_time": "0:14:26", "remaining_time": "4:08:36", "throughput": 8749.44, "total_tokens": 7584256} +{"current_steps": 11245, "total_steps": 204665, "loss": 0.2638, "lr": 1.0987443201250795e-06, "epoch": 0.2747172208242738, "percentage": 5.49, "elapsed_time": "0:14:27", "remaining_time": "4:08:35", "throughput": 8749.6, "total_tokens": 7587392} +{"current_steps": 11250, "total_steps": 204665, "loss": 0.1724, "lr": 1.099232911516099e-06, "epoch": 0.274839371656121, "percentage": 5.5, "elapsed_time": "0:14:27", "remaining_time": "4:08:34", "throughput": 8749.79, "total_tokens": 7590528} +{"current_steps": 11255, "total_steps": 204665, "loss": 0.0775, "lr": 1.0997215029071188e-06, "epoch": 0.2749615224879681, "percentage": 5.5, "elapsed_time": "0:14:27", "remaining_time": "4:08:33", "throughput": 8750.07, "total_tokens": 7593792} +{"current_steps": 11260, "total_steps": 204665, "loss": 0.1246, "lr": 1.1002100942981385e-06, "epoch": 0.2750836733198153, "percentage": 5.5, "elapsed_time": "0:14:28", "remaining_time": "4:08:32", "throughput": 8750.44, "total_tokens": 7597120} +{"current_steps": 11265, "total_steps": 204665, "loss": 0.1392, "lr": 1.100698685689158e-06, "epoch": 0.27520582415166245, "percentage": 5.5, "elapsed_time": "0:14:28", "remaining_time": "4:08:31", "throughput": 8750.6, "total_tokens": 7600256} +{"current_steps": 11270, "total_steps": 204665, "loss": 0.1305, "lr": 1.1011872770801779e-06, "epoch": 0.27532797498350964, "percentage": 5.51, "elapsed_time": "0:14:28", "remaining_time": "4:08:30", "throughput": 8750.6, "total_tokens": 7603200} +{"current_steps": 11275, "total_steps": 204665, "loss": 0.1257, "lr": 1.1016758684711975e-06, "epoch": 0.2754501258153568, "percentage": 5.51, "elapsed_time": "0:14:29", "remaining_time": "4:08:28", "throughput": 8750.83, "total_tokens": 7606400} +{"current_steps": 11280, "total_steps": 204665, "loss": 0.2147, "lr": 1.1021644598622172e-06, "epoch": 0.27557227664720396, "percentage": 5.51, "elapsed_time": "0:14:29", "remaining_time": "4:08:27", "throughput": 8751.03, "total_tokens": 7609600} +{"current_steps": 11285, "total_steps": 204665, "loss": 0.1249, "lr": 1.102653051253237e-06, "epoch": 0.27569442747905115, "percentage": 5.51, "elapsed_time": "0:14:29", "remaining_time": "4:08:26", "throughput": 8751.2, "total_tokens": 7612736} +{"current_steps": 11290, "total_steps": 204665, "loss": 0.0402, "lr": 1.1031416426442566e-06, "epoch": 0.2758165783108983, "percentage": 5.52, "elapsed_time": "0:14:30", "remaining_time": "4:08:25", "throughput": 8751.62, "total_tokens": 7616128} +{"current_steps": 11295, "total_steps": 204665, "loss": 0.1116, "lr": 1.1036302340352763e-06, "epoch": 0.2759387291427455, "percentage": 5.52, "elapsed_time": "0:14:30", "remaining_time": "4:08:24", "throughput": 8751.78, "total_tokens": 7619264} +{"current_steps": 11300, "total_steps": 204665, "loss": 0.1853, "lr": 1.104118825426296e-06, "epoch": 0.2760608799745926, "percentage": 5.52, "elapsed_time": "0:14:30", "remaining_time": "4:08:23", "throughput": 8751.94, "total_tokens": 7622400} +{"current_steps": 11305, "total_steps": 204665, "loss": 0.134, "lr": 1.1046074168173156e-06, "epoch": 0.2761830308064398, "percentage": 5.52, "elapsed_time": "0:14:31", "remaining_time": "4:08:22", "throughput": 8751.99, "total_tokens": 7625408} +{"current_steps": 11310, "total_steps": 204665, "loss": 0.1378, "lr": 1.1050960082083353e-06, "epoch": 0.27630518163828693, "percentage": 5.53, "elapsed_time": "0:14:31", "remaining_time": "4:08:21", "throughput": 8752.59, "total_tokens": 7628992} +{"current_steps": 11315, "total_steps": 204665, "loss": 0.1405, "lr": 1.105584599599355e-06, "epoch": 0.2764273324701341, "percentage": 5.53, "elapsed_time": "0:14:31", "remaining_time": "4:08:20", "throughput": 8752.99, "total_tokens": 7632384} +{"current_steps": 11320, "total_steps": 204665, "loss": 0.0711, "lr": 1.1060731909903747e-06, "epoch": 0.2765494833019813, "percentage": 5.53, "elapsed_time": "0:14:32", "remaining_time": "4:08:19", "throughput": 8752.92, "total_tokens": 7635264} +{"current_steps": 11325, "total_steps": 204665, "loss": 0.133, "lr": 1.1065617823813944e-06, "epoch": 0.27667163413382845, "percentage": 5.53, "elapsed_time": "0:14:32", "remaining_time": "4:08:17", "throughput": 8752.97, "total_tokens": 7638272} +{"current_steps": 11330, "total_steps": 204665, "loss": 0.1541, "lr": 1.1070503737724142e-06, "epoch": 0.27679378496567564, "percentage": 5.54, "elapsed_time": "0:14:32", "remaining_time": "4:08:16", "throughput": 8753.28, "total_tokens": 7641536} +{"current_steps": 11335, "total_steps": 204665, "loss": 0.1017, "lr": 1.1075389651634337e-06, "epoch": 0.27691593579752277, "percentage": 5.54, "elapsed_time": "0:14:33", "remaining_time": "4:08:15", "throughput": 8753.85, "total_tokens": 7645120} +{"current_steps": 11340, "total_steps": 204665, "loss": 0.0997, "lr": 1.1080275565544534e-06, "epoch": 0.27703808662936996, "percentage": 5.54, "elapsed_time": "0:14:33", "remaining_time": "4:08:14", "throughput": 8754.49, "total_tokens": 7648768} +{"current_steps": 11345, "total_steps": 204665, "loss": 0.2157, "lr": 1.1085161479454733e-06, "epoch": 0.2771602374612171, "percentage": 5.54, "elapsed_time": "0:14:34", "remaining_time": "4:08:13", "throughput": 8754.54, "total_tokens": 7651776} +{"current_steps": 11350, "total_steps": 204665, "loss": 0.0374, "lr": 1.1090047393364928e-06, "epoch": 0.2772823882930643, "percentage": 5.55, "elapsed_time": "0:14:34", "remaining_time": "4:08:12", "throughput": 8754.93, "total_tokens": 7655168} +{"current_steps": 11355, "total_steps": 204665, "loss": 0.0361, "lr": 1.1094933307275124e-06, "epoch": 0.2774045391249114, "percentage": 5.55, "elapsed_time": "0:14:34", "remaining_time": "4:08:11", "throughput": 8755.51, "total_tokens": 7658752} +{"current_steps": 11360, "total_steps": 204665, "loss": 0.1332, "lr": 1.1099819221185323e-06, "epoch": 0.2775266899567586, "percentage": 5.55, "elapsed_time": "0:14:35", "remaining_time": "4:08:10", "throughput": 8755.86, "total_tokens": 7662080} +{"current_steps": 11365, "total_steps": 204665, "loss": 0.2056, "lr": 1.110470513509552e-06, "epoch": 0.27764884078860574, "percentage": 5.55, "elapsed_time": "0:14:35", "remaining_time": "4:08:09", "throughput": 8756.11, "total_tokens": 7665280} +{"current_steps": 11370, "total_steps": 204665, "loss": 0.09, "lr": 1.1109591049005715e-06, "epoch": 0.27777099162045293, "percentage": 5.56, "elapsed_time": "0:14:35", "remaining_time": "4:08:08", "throughput": 8756.5, "total_tokens": 7668672} +{"current_steps": 11375, "total_steps": 204665, "loss": 0.1548, "lr": 1.1114476962915914e-06, "epoch": 0.2778931424523001, "percentage": 5.56, "elapsed_time": "0:14:36", "remaining_time": "4:08:07", "throughput": 8756.72, "total_tokens": 7671872} +{"current_steps": 11380, "total_steps": 204665, "loss": 0.08, "lr": 1.111936287682611e-06, "epoch": 0.27801529328414726, "percentage": 5.56, "elapsed_time": "0:14:36", "remaining_time": "4:08:06", "throughput": 8756.92, "total_tokens": 7675072} +{"current_steps": 11385, "total_steps": 204665, "loss": 0.089, "lr": 1.1124248790736305e-06, "epoch": 0.27813744411599445, "percentage": 5.56, "elapsed_time": "0:14:36", "remaining_time": "4:08:05", "throughput": 8756.85, "total_tokens": 7677952} +{"current_steps": 11390, "total_steps": 204665, "loss": 0.0878, "lr": 1.1129134704646504e-06, "epoch": 0.2782595949478416, "percentage": 5.57, "elapsed_time": "0:14:37", "remaining_time": "4:08:03", "throughput": 8757.01, "total_tokens": 7681088} +{"current_steps": 11395, "total_steps": 204665, "loss": 0.0657, "lr": 1.11340206185567e-06, "epoch": 0.27838174577968877, "percentage": 5.57, "elapsed_time": "0:14:37", "remaining_time": "4:08:02", "throughput": 8757.23, "total_tokens": 7684288} +{"current_steps": 11400, "total_steps": 204665, "loss": 0.0987, "lr": 1.1138906532466898e-06, "epoch": 0.2785038966115359, "percentage": 5.57, "elapsed_time": "0:14:37", "remaining_time": "4:08:01", "throughput": 8757.31, "total_tokens": 7687360} +{"current_steps": 11405, "total_steps": 204665, "loss": 0.1251, "lr": 1.1143792446377095e-06, "epoch": 0.2786260474433831, "percentage": 5.57, "elapsed_time": "0:14:38", "remaining_time": "4:08:00", "throughput": 8757.42, "total_tokens": 7690432} +{"current_steps": 11410, "total_steps": 204665, "loss": 0.1636, "lr": 1.1148678360287291e-06, "epoch": 0.27874819827523023, "percentage": 5.57, "elapsed_time": "0:14:38", "remaining_time": "4:07:59", "throughput": 8757.87, "total_tokens": 7693888} +{"current_steps": 11415, "total_steps": 204665, "loss": 0.1634, "lr": 1.1153564274197488e-06, "epoch": 0.2788703491070774, "percentage": 5.58, "elapsed_time": "0:14:38", "remaining_time": "4:07:58", "throughput": 8758.64, "total_tokens": 7697664} +{"current_steps": 11420, "total_steps": 204665, "loss": 0.136, "lr": 1.1158450188107685e-06, "epoch": 0.2789924999389246, "percentage": 5.58, "elapsed_time": "0:14:39", "remaining_time": "4:07:57", "throughput": 8758.76, "total_tokens": 7700800} +{"current_steps": 11425, "total_steps": 204665, "loss": 0.0537, "lr": 1.1163336102017882e-06, "epoch": 0.27911465077077174, "percentage": 5.58, "elapsed_time": "0:14:39", "remaining_time": "4:07:56", "throughput": 8758.69, "total_tokens": 7703680} +{"current_steps": 11430, "total_steps": 204665, "loss": 0.2367, "lr": 1.1168222015928079e-06, "epoch": 0.27923680160261893, "percentage": 5.58, "elapsed_time": "0:14:39", "remaining_time": "4:07:55", "throughput": 8759.4, "total_tokens": 7707456} +{"current_steps": 11435, "total_steps": 204665, "loss": 0.1418, "lr": 1.1173107929838276e-06, "epoch": 0.27935895243446607, "percentage": 5.59, "elapsed_time": "0:14:40", "remaining_time": "4:07:54", "throughput": 8759.69, "total_tokens": 7710720} +{"current_steps": 11440, "total_steps": 204665, "loss": 0.2804, "lr": 1.1177993843748472e-06, "epoch": 0.27948110326631326, "percentage": 5.59, "elapsed_time": "0:14:40", "remaining_time": "4:07:53", "throughput": 8759.8, "total_tokens": 7713792} +{"current_steps": 11445, "total_steps": 204665, "loss": 0.1272, "lr": 1.118287975765867e-06, "epoch": 0.2796032540981604, "percentage": 5.59, "elapsed_time": "0:14:40", "remaining_time": "4:07:52", "throughput": 8759.8, "total_tokens": 7716736} +{"current_steps": 11450, "total_steps": 204665, "loss": 0.1038, "lr": 1.1187765671568868e-06, "epoch": 0.2797254049300076, "percentage": 5.59, "elapsed_time": "0:14:41", "remaining_time": "4:07:51", "throughput": 8760.06, "total_tokens": 7720000} +{"current_steps": 11455, "total_steps": 204665, "loss": 0.1069, "lr": 1.1192651585479063e-06, "epoch": 0.2798475557618547, "percentage": 5.6, "elapsed_time": "0:14:41", "remaining_time": "4:07:50", "throughput": 8760.54, "total_tokens": 7723456} +{"current_steps": 11460, "total_steps": 204665, "loss": 0.0691, "lr": 1.119753749938926e-06, "epoch": 0.2799697065937019, "percentage": 5.6, "elapsed_time": "0:14:41", "remaining_time": "4:07:49", "throughput": 8760.58, "total_tokens": 7726464} +{"current_steps": 11465, "total_steps": 204665, "loss": 0.1096, "lr": 1.1202423413299459e-06, "epoch": 0.2800918574255491, "percentage": 5.6, "elapsed_time": "0:14:42", "remaining_time": "4:07:48", "throughput": 8761.0, "total_tokens": 7729920} +{"current_steps": 11470, "total_steps": 204665, "loss": 0.0664, "lr": 1.1207309327209653e-06, "epoch": 0.28021400825739623, "percentage": 5.6, "elapsed_time": "0:14:42", "remaining_time": "4:07:47", "throughput": 8761.28, "total_tokens": 7733184} +{"current_steps": 11475, "total_steps": 204665, "loss": 0.11, "lr": 1.121219524111985e-06, "epoch": 0.2803361590892434, "percentage": 5.61, "elapsed_time": "0:14:42", "remaining_time": "4:07:45", "throughput": 8761.5, "total_tokens": 7736384} +{"current_steps": 11480, "total_steps": 204665, "loss": 0.1157, "lr": 1.121708115503005e-06, "epoch": 0.28045830992109055, "percentage": 5.61, "elapsed_time": "0:14:43", "remaining_time": "4:07:45", "throughput": 8762.18, "total_tokens": 7740096} +{"current_steps": 11485, "total_steps": 204665, "loss": 0.0868, "lr": 1.1221967068940246e-06, "epoch": 0.28058046075293774, "percentage": 5.61, "elapsed_time": "0:14:43", "remaining_time": "4:07:44", "throughput": 8762.8, "total_tokens": 7743744} +{"current_steps": 11490, "total_steps": 204665, "loss": 0.1729, "lr": 1.122685298285044e-06, "epoch": 0.2807026115847849, "percentage": 5.61, "elapsed_time": "0:14:44", "remaining_time": "4:07:43", "throughput": 8763.32, "total_tokens": 7747264} +{"current_steps": 11495, "total_steps": 204665, "loss": 0.1499, "lr": 1.123173889676064e-06, "epoch": 0.28082476241663207, "percentage": 5.62, "elapsed_time": "0:14:44", "remaining_time": "4:07:42", "throughput": 8763.5, "total_tokens": 7750400} +{"current_steps": 11500, "total_steps": 204665, "loss": 0.1297, "lr": 1.1236624810670836e-06, "epoch": 0.2809469132484792, "percentage": 5.62, "elapsed_time": "0:14:44", "remaining_time": "4:07:40", "throughput": 8763.7, "total_tokens": 7753600} +{"current_steps": 11505, "total_steps": 204665, "loss": 0.1523, "lr": 1.124151072458103e-06, "epoch": 0.2810690640803264, "percentage": 5.62, "elapsed_time": "0:14:45", "remaining_time": "4:07:39", "throughput": 8764.04, "total_tokens": 7756928} +{"current_steps": 11510, "total_steps": 204665, "loss": 0.1703, "lr": 1.124639663849123e-06, "epoch": 0.2811912149121735, "percentage": 5.62, "elapsed_time": "0:14:45", "remaining_time": "4:07:38", "throughput": 8764.04, "total_tokens": 7759872} +{"current_steps": 11515, "total_steps": 204665, "loss": 0.0938, "lr": 1.1251282552401427e-06, "epoch": 0.2813133657440207, "percentage": 5.63, "elapsed_time": "0:14:45", "remaining_time": "4:07:37", "throughput": 8764.72, "total_tokens": 7763584} +{"current_steps": 11520, "total_steps": 204665, "loss": 0.1084, "lr": 1.1256168466311623e-06, "epoch": 0.2814355165758679, "percentage": 5.63, "elapsed_time": "0:14:46", "remaining_time": "4:07:36", "throughput": 8764.87, "total_tokens": 7766720} +{"current_steps": 11525, "total_steps": 204665, "loss": 0.2459, "lr": 1.126105438022182e-06, "epoch": 0.28155766740771504, "percentage": 5.63, "elapsed_time": "0:14:46", "remaining_time": "4:07:35", "throughput": 8765.03, "total_tokens": 7769856} +{"current_steps": 11530, "total_steps": 204665, "loss": 0.1084, "lr": 1.1265940294132017e-06, "epoch": 0.28167981823956223, "percentage": 5.63, "elapsed_time": "0:14:46", "remaining_time": "4:07:34", "throughput": 8765.18, "total_tokens": 7772992} +{"current_steps": 11535, "total_steps": 204665, "loss": 0.1102, "lr": 1.1270826208042214e-06, "epoch": 0.28180196907140936, "percentage": 5.64, "elapsed_time": "0:14:47", "remaining_time": "4:07:33", "throughput": 8765.53, "total_tokens": 7776320} +{"current_steps": 11540, "total_steps": 204665, "loss": 0.1183, "lr": 1.127571212195241e-06, "epoch": 0.28192411990325655, "percentage": 5.64, "elapsed_time": "0:14:47", "remaining_time": "4:07:32", "throughput": 8765.75, "total_tokens": 7779520} +{"current_steps": 11545, "total_steps": 204665, "loss": 0.0264, "lr": 1.1280598035862608e-06, "epoch": 0.2820462707351037, "percentage": 5.64, "elapsed_time": "0:14:47", "remaining_time": "4:07:31", "throughput": 8766.48, "total_tokens": 7783296} +{"current_steps": 11550, "total_steps": 204665, "loss": 0.0725, "lr": 1.1285483949772804e-06, "epoch": 0.2821684215669509, "percentage": 5.64, "elapsed_time": "0:14:48", "remaining_time": "4:07:30", "throughput": 8766.54, "total_tokens": 7786304} +{"current_steps": 11555, "total_steps": 204665, "loss": 0.0659, "lr": 1.1290369863683003e-06, "epoch": 0.282290572398798, "percentage": 5.65, "elapsed_time": "0:14:48", "remaining_time": "4:07:29", "throughput": 8767.76, "total_tokens": 7790592} +{"current_steps": 11560, "total_steps": 204665, "loss": 0.2091, "lr": 1.1295255777593198e-06, "epoch": 0.2824127232306452, "percentage": 5.65, "elapsed_time": "0:14:48", "remaining_time": "4:07:28", "throughput": 8768.59, "total_tokens": 7794432} +{"current_steps": 11565, "total_steps": 204665, "loss": 0.056, "lr": 1.1300141691503395e-06, "epoch": 0.2825348740624924, "percentage": 5.65, "elapsed_time": "0:14:49", "remaining_time": "4:07:27", "throughput": 8769.04, "total_tokens": 7797888} +{"current_steps": 11570, "total_steps": 204665, "loss": 0.1011, "lr": 1.1305027605413594e-06, "epoch": 0.2826570248943395, "percentage": 5.65, "elapsed_time": "0:14:49", "remaining_time": "4:07:26", "throughput": 8769.1, "total_tokens": 7800896} +{"current_steps": 11575, "total_steps": 204665, "loss": 0.144, "lr": 1.1309913519323788e-06, "epoch": 0.2827791757261867, "percentage": 5.66, "elapsed_time": "0:14:49", "remaining_time": "4:07:25", "throughput": 8769.14, "total_tokens": 7803904} +{"current_steps": 11580, "total_steps": 204665, "loss": 0.0422, "lr": 1.1314799433233985e-06, "epoch": 0.28290132655803385, "percentage": 5.66, "elapsed_time": "0:14:50", "remaining_time": "4:07:24", "throughput": 8769.61, "total_tokens": 7807360} +{"current_steps": 11585, "total_steps": 204665, "loss": 0.1381, "lr": 1.1319685347144184e-06, "epoch": 0.28302347738988104, "percentage": 5.66, "elapsed_time": "0:14:50", "remaining_time": "4:07:23", "throughput": 8769.84, "total_tokens": 7810560} +{"current_steps": 11590, "total_steps": 204665, "loss": 0.0882, "lr": 1.1324571261054379e-06, "epoch": 0.2831456282217282, "percentage": 5.66, "elapsed_time": "0:14:50", "remaining_time": "4:07:22", "throughput": 8770.65, "total_tokens": 7814400} +{"current_steps": 11595, "total_steps": 204665, "loss": 0.1955, "lr": 1.1329457174964576e-06, "epoch": 0.28326777905357536, "percentage": 5.67, "elapsed_time": "0:14:51", "remaining_time": "4:07:21", "throughput": 8770.96, "total_tokens": 7817728} +{"current_steps": 11600, "total_steps": 204665, "loss": 0.0308, "lr": 1.1334343088874775e-06, "epoch": 0.2833899298854225, "percentage": 5.67, "elapsed_time": "0:14:51", "remaining_time": "4:07:20", "throughput": 8771.18, "total_tokens": 7820928} +{"current_steps": 11605, "total_steps": 204665, "loss": 0.1583, "lr": 1.1339229002784971e-06, "epoch": 0.2835120807172697, "percentage": 5.67, "elapsed_time": "0:14:52", "remaining_time": "4:07:19", "throughput": 8771.43, "total_tokens": 7824128} +{"current_steps": 11610, "total_steps": 204665, "loss": 0.2019, "lr": 1.1344114916695166e-06, "epoch": 0.2836342315491168, "percentage": 5.67, "elapsed_time": "0:14:52", "remaining_time": "4:07:18", "throughput": 8771.97, "total_tokens": 7827712} +{"current_steps": 11615, "total_steps": 204665, "loss": 0.097, "lr": 1.1349000830605365e-06, "epoch": 0.283756382380964, "percentage": 5.68, "elapsed_time": "0:14:52", "remaining_time": "4:07:17", "throughput": 8772.01, "total_tokens": 7830720} +{"current_steps": 11620, "total_steps": 204665, "loss": 0.065, "lr": 1.1353886744515562e-06, "epoch": 0.2838785332128112, "percentage": 5.68, "elapsed_time": "0:14:53", "remaining_time": "4:07:16", "throughput": 8773.01, "total_tokens": 7834816} +{"current_steps": 11625, "total_steps": 204665, "loss": 0.1565, "lr": 1.1358772658425757e-06, "epoch": 0.28400068404465834, "percentage": 5.68, "elapsed_time": "0:14:53", "remaining_time": "4:07:15", "throughput": 8773.27, "total_tokens": 7838080} +{"current_steps": 11630, "total_steps": 204665, "loss": 0.0938, "lr": 1.1363658572335956e-06, "epoch": 0.2841228348765055, "percentage": 5.68, "elapsed_time": "0:14:53", "remaining_time": "4:07:14", "throughput": 8773.51, "total_tokens": 7841280} +{"current_steps": 11635, "total_steps": 204665, "loss": 0.1394, "lr": 1.1368544486246152e-06, "epoch": 0.28424498570835266, "percentage": 5.68, "elapsed_time": "0:14:54", "remaining_time": "4:07:13", "throughput": 8773.48, "total_tokens": 7844224} +{"current_steps": 11640, "total_steps": 204665, "loss": 0.1112, "lr": 1.137343040015635e-06, "epoch": 0.28436713654019985, "percentage": 5.69, "elapsed_time": "0:14:54", "remaining_time": "4:07:12", "throughput": 8773.74, "total_tokens": 7847488} +{"current_steps": 11645, "total_steps": 204665, "loss": 0.146, "lr": 1.1378316314066546e-06, "epoch": 0.284489287372047, "percentage": 5.69, "elapsed_time": "0:14:54", "remaining_time": "4:07:11", "throughput": 8774.07, "total_tokens": 7850816} +{"current_steps": 11650, "total_steps": 204665, "loss": 0.1581, "lr": 1.1383202227976743e-06, "epoch": 0.2846114382038942, "percentage": 5.69, "elapsed_time": "0:14:55", "remaining_time": "4:07:10", "throughput": 8774.23, "total_tokens": 7853952} +{"current_steps": 11655, "total_steps": 204665, "loss": 0.1123, "lr": 1.138808814188694e-06, "epoch": 0.2847335890357413, "percentage": 5.69, "elapsed_time": "0:14:55", "remaining_time": "4:07:09", "throughput": 8774.6, "total_tokens": 7857344} +{"current_steps": 11660, "total_steps": 204665, "loss": 0.0412, "lr": 1.1392974055797136e-06, "epoch": 0.2848557398675885, "percentage": 5.7, "elapsed_time": "0:14:55", "remaining_time": "4:07:08", "throughput": 8775.17, "total_tokens": 7860928} +{"current_steps": 11665, "total_steps": 204665, "loss": 0.126, "lr": 1.1397859969707333e-06, "epoch": 0.2849778906994357, "percentage": 5.7, "elapsed_time": "0:14:56", "remaining_time": "4:07:07", "throughput": 8776.16, "total_tokens": 7865024} +{"current_steps": 11670, "total_steps": 204665, "loss": 0.0255, "lr": 1.140274588361753e-06, "epoch": 0.2851000415312828, "percentage": 5.7, "elapsed_time": "0:14:56", "remaining_time": "4:07:06", "throughput": 8776.78, "total_tokens": 7868672} +{"current_steps": 11675, "total_steps": 204665, "loss": 0.1446, "lr": 1.1407631797527729e-06, "epoch": 0.28522219236313, "percentage": 5.7, "elapsed_time": "0:14:56", "remaining_time": "4:07:05", "throughput": 8776.81, "total_tokens": 7871680} +{"current_steps": 11680, "total_steps": 204665, "loss": 0.1934, "lr": 1.1412517711437924e-06, "epoch": 0.28534434319497715, "percentage": 5.71, "elapsed_time": "0:14:57", "remaining_time": "4:07:04", "throughput": 8777.23, "total_tokens": 7875136} +{"current_steps": 11685, "total_steps": 204665, "loss": 0.1407, "lr": 1.141740362534812e-06, "epoch": 0.28546649402682434, "percentage": 5.71, "elapsed_time": "0:14:57", "remaining_time": "4:07:03", "throughput": 8777.6, "total_tokens": 7878528} +{"current_steps": 11690, "total_steps": 204665, "loss": 0.0432, "lr": 1.142228953925832e-06, "epoch": 0.28558864485867147, "percentage": 5.71, "elapsed_time": "0:14:57", "remaining_time": "4:07:02", "throughput": 8778.07, "total_tokens": 7881984} +{"current_steps": 11695, "total_steps": 204665, "loss": 0.0923, "lr": 1.1427175453168514e-06, "epoch": 0.28571079569051866, "percentage": 5.71, "elapsed_time": "0:14:58", "remaining_time": "4:07:01", "throughput": 8778.26, "total_tokens": 7885184} +{"current_steps": 11700, "total_steps": 204665, "loss": 0.2471, "lr": 1.143206136707871e-06, "epoch": 0.2858329465223658, "percentage": 5.72, "elapsed_time": "0:14:58", "remaining_time": "4:07:00", "throughput": 8779.1, "total_tokens": 7889088} +{"current_steps": 11705, "total_steps": 204665, "loss": 0.0819, "lr": 1.143694728098891e-06, "epoch": 0.285955097354213, "percentage": 5.72, "elapsed_time": "0:14:58", "remaining_time": "4:06:59", "throughput": 8779.9, "total_tokens": 7892928} +{"current_steps": 11710, "total_steps": 204665, "loss": 0.0932, "lr": 1.1441833194899105e-06, "epoch": 0.2860772481860602, "percentage": 5.72, "elapsed_time": "0:14:59", "remaining_time": "4:06:58", "throughput": 8780.28, "total_tokens": 7896320} +{"current_steps": 11715, "total_steps": 204665, "loss": 0.2197, "lr": 1.1446719108809301e-06, "epoch": 0.2861993990179073, "percentage": 5.72, "elapsed_time": "0:14:59", "remaining_time": "4:06:57", "throughput": 8780.71, "total_tokens": 7899776} +{"current_steps": 11720, "total_steps": 204665, "loss": 0.0535, "lr": 1.14516050227195e-06, "epoch": 0.2863215498497545, "percentage": 5.73, "elapsed_time": "0:15:00", "remaining_time": "4:06:57", "throughput": 8781.33, "total_tokens": 7903424} +{"current_steps": 11725, "total_steps": 204665, "loss": 0.1282, "lr": 1.1456490936629697e-06, "epoch": 0.28644370068160163, "percentage": 5.73, "elapsed_time": "0:15:00", "remaining_time": "4:06:55", "throughput": 8781.35, "total_tokens": 7906432} +{"current_steps": 11730, "total_steps": 204665, "loss": 0.0956, "lr": 1.1461376850539892e-06, "epoch": 0.2865658515134488, "percentage": 5.73, "elapsed_time": "0:15:00", "remaining_time": "4:06:54", "throughput": 8781.68, "total_tokens": 7909760} +{"current_steps": 11735, "total_steps": 204665, "loss": 0.1069, "lr": 1.146626276445009e-06, "epoch": 0.28668800234529596, "percentage": 5.73, "elapsed_time": "0:15:01", "remaining_time": "4:06:53", "throughput": 8781.87, "total_tokens": 7912960} +{"current_steps": 11740, "total_steps": 204665, "loss": 0.1146, "lr": 1.1471148678360288e-06, "epoch": 0.28681015317714315, "percentage": 5.74, "elapsed_time": "0:15:01", "remaining_time": "4:06:53", "throughput": 8782.66, "total_tokens": 7916800} +{"current_steps": 11745, "total_steps": 204665, "loss": 0.1474, "lr": 1.1476034592270482e-06, "epoch": 0.2869323040089903, "percentage": 5.74, "elapsed_time": "0:15:01", "remaining_time": "4:06:51", "throughput": 8782.69, "total_tokens": 7919808} +{"current_steps": 11750, "total_steps": 204665, "loss": 0.0573, "lr": 1.1480920506180681e-06, "epoch": 0.28705445484083747, "percentage": 5.74, "elapsed_time": "0:15:02", "remaining_time": "4:06:50", "throughput": 8783.08, "total_tokens": 7923200} +{"current_steps": 11755, "total_steps": 204665, "loss": 0.214, "lr": 1.1485806420090878e-06, "epoch": 0.2871766056726846, "percentage": 5.74, "elapsed_time": "0:15:02", "remaining_time": "4:06:49", "throughput": 8783.13, "total_tokens": 7926208} +{"current_steps": 11760, "total_steps": 204665, "loss": 0.0956, "lr": 1.1490692334001075e-06, "epoch": 0.2872987565045318, "percentage": 5.75, "elapsed_time": "0:15:02", "remaining_time": "4:06:48", "throughput": 8783.56, "total_tokens": 7929664} +{"current_steps": 11765, "total_steps": 204665, "loss": 0.0706, "lr": 1.1495578247911272e-06, "epoch": 0.287420907336379, "percentage": 5.75, "elapsed_time": "0:15:03", "remaining_time": "4:06:47", "throughput": 8784.18, "total_tokens": 7933312} +{"current_steps": 11770, "total_steps": 204665, "loss": 0.2371, "lr": 1.1500464161821468e-06, "epoch": 0.2875430581682261, "percentage": 5.75, "elapsed_time": "0:15:03", "remaining_time": "4:06:47", "throughput": 8784.72, "total_tokens": 7936896} +{"current_steps": 11775, "total_steps": 204665, "loss": 0.1282, "lr": 1.1505350075731665e-06, "epoch": 0.2876652090000733, "percentage": 5.75, "elapsed_time": "0:15:03", "remaining_time": "4:06:46", "throughput": 8785.44, "total_tokens": 7940672} +{"current_steps": 11780, "total_steps": 204665, "loss": 0.1103, "lr": 1.1510235989641862e-06, "epoch": 0.28778735983192044, "percentage": 5.76, "elapsed_time": "0:15:04", "remaining_time": "4:06:45", "throughput": 8785.8, "total_tokens": 7944064} +{"current_steps": 11785, "total_steps": 204665, "loss": 0.0834, "lr": 1.1515121903552059e-06, "epoch": 0.28790951066376763, "percentage": 5.76, "elapsed_time": "0:15:04", "remaining_time": "4:06:44", "throughput": 8786.15, "total_tokens": 7947456} +{"current_steps": 11790, "total_steps": 204665, "loss": 0.1646, "lr": 1.1520007817462256e-06, "epoch": 0.28803166149561477, "percentage": 5.76, "elapsed_time": "0:15:04", "remaining_time": "4:06:43", "throughput": 8786.25, "total_tokens": 7950528} +{"current_steps": 11795, "total_steps": 204665, "loss": 0.1808, "lr": 1.1524893731372455e-06, "epoch": 0.28815381232746196, "percentage": 5.76, "elapsed_time": "0:15:05", "remaining_time": "4:06:42", "throughput": 8786.45, "total_tokens": 7953728} +{"current_steps": 11800, "total_steps": 204665, "loss": 0.0937, "lr": 1.152977964528265e-06, "epoch": 0.2882759631593091, "percentage": 5.77, "elapsed_time": "0:15:05", "remaining_time": "4:06:41", "throughput": 8786.92, "total_tokens": 7957248} +{"current_steps": 11805, "total_steps": 204665, "loss": 0.1189, "lr": 1.1534665559192846e-06, "epoch": 0.2883981139911563, "percentage": 5.77, "elapsed_time": "0:15:05", "remaining_time": "4:06:40", "throughput": 8786.99, "total_tokens": 7960320} +{"current_steps": 11810, "total_steps": 204665, "loss": 0.1614, "lr": 1.1539551473103045e-06, "epoch": 0.28852026482300347, "percentage": 5.77, "elapsed_time": "0:15:06", "remaining_time": "4:06:39", "throughput": 8786.95, "total_tokens": 7963264} +{"current_steps": 11815, "total_steps": 204665, "loss": 0.1142, "lr": 1.154443738701324e-06, "epoch": 0.2886424156548506, "percentage": 5.77, "elapsed_time": "0:15:06", "remaining_time": "4:06:38", "throughput": 8787.16, "total_tokens": 7966464} +{"current_steps": 11820, "total_steps": 204665, "loss": 0.1608, "lr": 1.1549323300923437e-06, "epoch": 0.2887645664866978, "percentage": 5.78, "elapsed_time": "0:15:06", "remaining_time": "4:06:37", "throughput": 8787.85, "total_tokens": 7970240} +{"current_steps": 11825, "total_steps": 204665, "loss": 0.2324, "lr": 1.1554209214833635e-06, "epoch": 0.2888867173185449, "percentage": 5.78, "elapsed_time": "0:15:07", "remaining_time": "4:06:36", "throughput": 8788.21, "total_tokens": 7973632} +{"current_steps": 11830, "total_steps": 204665, "loss": 0.1112, "lr": 1.1559095128743832e-06, "epoch": 0.2890088681503921, "percentage": 5.78, "elapsed_time": "0:15:07", "remaining_time": "4:06:35", "throughput": 8788.4, "total_tokens": 7976832} +{"current_steps": 11835, "total_steps": 204665, "loss": 0.0498, "lr": 1.1563981042654027e-06, "epoch": 0.28913101898223925, "percentage": 5.78, "elapsed_time": "0:15:08", "remaining_time": "4:06:34", "throughput": 8789.53, "total_tokens": 7981120} +{"current_steps": 11840, "total_steps": 204665, "loss": 0.127, "lr": 1.1568866956564226e-06, "epoch": 0.28925316981408644, "percentage": 5.79, "elapsed_time": "0:15:08", "remaining_time": "4:06:33", "throughput": 8789.76, "total_tokens": 7984320} +{"current_steps": 11845, "total_steps": 204665, "loss": 0.0945, "lr": 1.1573752870474423e-06, "epoch": 0.2893753206459336, "percentage": 5.79, "elapsed_time": "0:15:08", "remaining_time": "4:06:32", "throughput": 8790.14, "total_tokens": 7987712} +{"current_steps": 11850, "total_steps": 204665, "loss": 0.2029, "lr": 1.1578638784384617e-06, "epoch": 0.28949747147778077, "percentage": 5.79, "elapsed_time": "0:15:09", "remaining_time": "4:06:31", "throughput": 8790.49, "total_tokens": 7991104} +{"current_steps": 11855, "total_steps": 204665, "loss": 0.1268, "lr": 1.1583524698294816e-06, "epoch": 0.28961962230962796, "percentage": 5.79, "elapsed_time": "0:15:09", "remaining_time": "4:06:30", "throughput": 8790.51, "total_tokens": 7994112} +{"current_steps": 11860, "total_steps": 204665, "loss": 0.1884, "lr": 1.1588410612205013e-06, "epoch": 0.2897417731414751, "percentage": 5.79, "elapsed_time": "0:15:09", "remaining_time": "4:06:29", "throughput": 8790.94, "total_tokens": 7997568} +{"current_steps": 11865, "total_steps": 204665, "loss": 0.1154, "lr": 1.1593296526115208e-06, "epoch": 0.2898639239733223, "percentage": 5.8, "elapsed_time": "0:15:10", "remaining_time": "4:06:28", "throughput": 8791.18, "total_tokens": 8000832} +{"current_steps": 11870, "total_steps": 204665, "loss": 0.0996, "lr": 1.1598182440025407e-06, "epoch": 0.2899860748051694, "percentage": 5.8, "elapsed_time": "0:15:10", "remaining_time": "4:06:27", "throughput": 8791.43, "total_tokens": 8004096} +{"current_steps": 11875, "total_steps": 204665, "loss": 0.0903, "lr": 1.1603068353935604e-06, "epoch": 0.2901082256370166, "percentage": 5.8, "elapsed_time": "0:15:10", "remaining_time": "4:06:26", "throughput": 8791.41, "total_tokens": 8007040} +{"current_steps": 11880, "total_steps": 204665, "loss": 0.1717, "lr": 1.16079542678458e-06, "epoch": 0.29023037646886374, "percentage": 5.8, "elapsed_time": "0:15:11", "remaining_time": "4:06:25", "throughput": 8791.37, "total_tokens": 8009984} +{"current_steps": 11885, "total_steps": 204665, "loss": 0.1056, "lr": 1.1612840181755997e-06, "epoch": 0.2903525273007109, "percentage": 5.81, "elapsed_time": "0:15:11", "remaining_time": "4:06:24", "throughput": 8791.68, "total_tokens": 8013312} +{"current_steps": 11890, "total_steps": 204665, "loss": 0.0957, "lr": 1.1617726095666194e-06, "epoch": 0.29047467813255806, "percentage": 5.81, "elapsed_time": "0:15:11", "remaining_time": "4:06:23", "throughput": 8791.99, "total_tokens": 8016640} +{"current_steps": 11895, "total_steps": 204665, "loss": 0.107, "lr": 1.162261200957639e-06, "epoch": 0.29059682896440525, "percentage": 5.81, "elapsed_time": "0:15:12", "remaining_time": "4:06:22", "throughput": 8792.38, "total_tokens": 8020096} +{"current_steps": 11900, "total_steps": 204665, "loss": 0.0793, "lr": 1.1627497923486588e-06, "epoch": 0.2907189797962524, "percentage": 5.81, "elapsed_time": "0:15:12", "remaining_time": "4:06:21", "throughput": 8793.2, "total_tokens": 8024000} +{"current_steps": 11905, "total_steps": 204665, "loss": 0.0996, "lr": 1.1632383837396784e-06, "epoch": 0.2908411306280996, "percentage": 5.82, "elapsed_time": "0:15:12", "remaining_time": "4:06:20", "throughput": 8793.67, "total_tokens": 8027520} +{"current_steps": 11910, "total_steps": 204665, "loss": 0.0543, "lr": 1.1637269751306981e-06, "epoch": 0.29096328145994677, "percentage": 5.82, "elapsed_time": "0:15:13", "remaining_time": "4:06:20", "throughput": 8794.3, "total_tokens": 8031232} +{"current_steps": 11915, "total_steps": 204665, "loss": 0.1778, "lr": 1.164215566521718e-06, "epoch": 0.2910854322917939, "percentage": 5.82, "elapsed_time": "0:15:13", "remaining_time": "4:06:18", "throughput": 8794.43, "total_tokens": 8034368} +{"current_steps": 11920, "total_steps": 204665, "loss": 0.1668, "lr": 1.1647041579127375e-06, "epoch": 0.2912075831236411, "percentage": 5.82, "elapsed_time": "0:15:13", "remaining_time": "4:06:17", "throughput": 8794.51, "total_tokens": 8037440} +{"current_steps": 11925, "total_steps": 204665, "loss": 0.2086, "lr": 1.1651927493037572e-06, "epoch": 0.2913297339554882, "percentage": 5.83, "elapsed_time": "0:15:14", "remaining_time": "4:06:17", "throughput": 8795.17, "total_tokens": 8041152} +{"current_steps": 11930, "total_steps": 204665, "loss": 0.1015, "lr": 1.165681340694777e-06, "epoch": 0.2914518847873354, "percentage": 5.83, "elapsed_time": "0:15:14", "remaining_time": "4:06:16", "throughput": 8795.57, "total_tokens": 8044608} +{"current_steps": 11935, "total_steps": 204665, "loss": 0.0934, "lr": 1.1661699320857965e-06, "epoch": 0.29157403561918255, "percentage": 5.83, "elapsed_time": "0:15:14", "remaining_time": "4:06:15", "throughput": 8795.96, "total_tokens": 8048064} +{"current_steps": 11940, "total_steps": 204665, "loss": 0.1407, "lr": 1.1666585234768162e-06, "epoch": 0.29169618645102974, "percentage": 5.83, "elapsed_time": "0:15:15", "remaining_time": "4:06:14", "throughput": 8796.27, "total_tokens": 8051392} +{"current_steps": 11945, "total_steps": 204665, "loss": 0.1593, "lr": 1.1671471148678361e-06, "epoch": 0.29181833728287687, "percentage": 5.84, "elapsed_time": "0:15:15", "remaining_time": "4:06:13", "throughput": 8796.53, "total_tokens": 8054656} +{"current_steps": 11950, "total_steps": 204665, "loss": 0.0877, "lr": 1.1676357062588558e-06, "epoch": 0.29194048811472406, "percentage": 5.84, "elapsed_time": "0:15:16", "remaining_time": "4:06:12", "throughput": 8797.01, "total_tokens": 8058176} +{"current_steps": 11955, "total_steps": 204665, "loss": 0.0686, "lr": 1.1681242976498753e-06, "epoch": 0.29206263894657125, "percentage": 5.84, "elapsed_time": "0:15:16", "remaining_time": "4:06:11", "throughput": 8797.93, "total_tokens": 8062208} +{"current_steps": 11960, "total_steps": 204665, "loss": 0.1233, "lr": 1.1686128890408952e-06, "epoch": 0.2921847897784184, "percentage": 5.84, "elapsed_time": "0:15:16", "remaining_time": "4:06:10", "throughput": 8799.15, "total_tokens": 8066560} +{"current_steps": 11965, "total_steps": 204665, "loss": 0.1607, "lr": 1.1691014804319148e-06, "epoch": 0.2923069406102656, "percentage": 5.85, "elapsed_time": "0:15:17", "remaining_time": "4:06:09", "throughput": 8799.41, "total_tokens": 8069824} +{"current_steps": 11970, "total_steps": 204665, "loss": 0.2533, "lr": 1.1695900718229343e-06, "epoch": 0.2924290914421127, "percentage": 5.85, "elapsed_time": "0:15:17", "remaining_time": "4:06:08", "throughput": 8799.78, "total_tokens": 8073216} +{"current_steps": 11975, "total_steps": 204665, "loss": 0.104, "lr": 1.1700786632139542e-06, "epoch": 0.2925512422739599, "percentage": 5.85, "elapsed_time": "0:15:17", "remaining_time": "4:06:07", "throughput": 8799.98, "total_tokens": 8076416} +{"current_steps": 11980, "total_steps": 204665, "loss": 0.0888, "lr": 1.1705672546049739e-06, "epoch": 0.29267339310580703, "percentage": 5.85, "elapsed_time": "0:15:18", "remaining_time": "4:06:06", "throughput": 8800.22, "total_tokens": 8079680} +{"current_steps": 11985, "total_steps": 204665, "loss": 0.1051, "lr": 1.1710558459959936e-06, "epoch": 0.2927955439376542, "percentage": 5.86, "elapsed_time": "0:15:18", "remaining_time": "4:06:06", "throughput": 8800.66, "total_tokens": 8083136} +{"current_steps": 11990, "total_steps": 204665, "loss": 0.1162, "lr": 1.1715444373870132e-06, "epoch": 0.29291769476950136, "percentage": 5.86, "elapsed_time": "0:15:18", "remaining_time": "4:06:05", "throughput": 8800.84, "total_tokens": 8086336} +{"current_steps": 11995, "total_steps": 204665, "loss": 0.1732, "lr": 1.172033028778033e-06, "epoch": 0.29303984560134855, "percentage": 5.86, "elapsed_time": "0:15:19", "remaining_time": "4:06:03", "throughput": 8801.04, "total_tokens": 8089536} +{"current_steps": 12000, "total_steps": 204665, "loss": 0.1816, "lr": 1.1725216201690526e-06, "epoch": 0.29316199643319574, "percentage": 5.86, "elapsed_time": "0:15:19", "remaining_time": "4:06:03", "throughput": 8801.39, "total_tokens": 8092928} +{"current_steps": 12005, "total_steps": 204665, "loss": 0.0593, "lr": 1.1730102115600723e-06, "epoch": 0.29328414726504287, "percentage": 5.87, "elapsed_time": "0:15:19", "remaining_time": "4:06:01", "throughput": 8801.37, "total_tokens": 8095872} +{"current_steps": 12010, "total_steps": 204665, "loss": 0.1083, "lr": 1.173498802951092e-06, "epoch": 0.29340629809689006, "percentage": 5.87, "elapsed_time": "0:15:20", "remaining_time": "4:06:00", "throughput": 8801.53, "total_tokens": 8099008} +{"current_steps": 12015, "total_steps": 204665, "loss": 0.07, "lr": 1.1739873943421116e-06, "epoch": 0.2935284489287372, "percentage": 5.87, "elapsed_time": "0:15:20", "remaining_time": "4:05:59", "throughput": 8801.72, "total_tokens": 8102208} +{"current_steps": 12020, "total_steps": 204665, "loss": 0.094, "lr": 1.1744759857331313e-06, "epoch": 0.2936505997605844, "percentage": 5.87, "elapsed_time": "0:15:20", "remaining_time": "4:05:58", "throughput": 8801.97, "total_tokens": 8105472} +{"current_steps": 12025, "total_steps": 204665, "loss": 0.1558, "lr": 1.174964577124151e-06, "epoch": 0.2937727505924315, "percentage": 5.88, "elapsed_time": "0:15:21", "remaining_time": "4:05:57", "throughput": 8802.38, "total_tokens": 8108928} +{"current_steps": 12030, "total_steps": 204665, "loss": 0.0828, "lr": 1.1754531685151707e-06, "epoch": 0.2938949014242787, "percentage": 5.88, "elapsed_time": "0:15:21", "remaining_time": "4:05:56", "throughput": 8802.74, "total_tokens": 8112320} +{"current_steps": 12035, "total_steps": 204665, "loss": 0.201, "lr": 1.1759417599061906e-06, "epoch": 0.29401705225612584, "percentage": 5.88, "elapsed_time": "0:15:21", "remaining_time": "4:05:56", "throughput": 8803.13, "total_tokens": 8115776} +{"current_steps": 12040, "total_steps": 204665, "loss": 0.1382, "lr": 1.17643035129721e-06, "epoch": 0.29413920308797303, "percentage": 5.88, "elapsed_time": "0:15:22", "remaining_time": "4:05:54", "throughput": 8803.15, "total_tokens": 8118784} +{"current_steps": 12045, "total_steps": 204665, "loss": 0.1141, "lr": 1.1769189426882297e-06, "epoch": 0.29426135391982017, "percentage": 5.89, "elapsed_time": "0:15:22", "remaining_time": "4:05:54", "throughput": 8803.68, "total_tokens": 8122368} +{"current_steps": 12050, "total_steps": 204665, "loss": 0.1614, "lr": 1.1774075340792496e-06, "epoch": 0.29438350475166736, "percentage": 5.89, "elapsed_time": "0:15:22", "remaining_time": "4:05:53", "throughput": 8803.87, "total_tokens": 8125568} +{"current_steps": 12055, "total_steps": 204665, "loss": 0.1131, "lr": 1.177896125470269e-06, "epoch": 0.29450565558351455, "percentage": 5.89, "elapsed_time": "0:15:23", "remaining_time": "4:05:52", "throughput": 8804.05, "total_tokens": 8128768} +{"current_steps": 12060, "total_steps": 204665, "loss": 0.112, "lr": 1.1783847168612888e-06, "epoch": 0.2946278064153617, "percentage": 5.89, "elapsed_time": "0:15:23", "remaining_time": "4:05:51", "throughput": 8804.36, "total_tokens": 8132096} +{"current_steps": 12065, "total_steps": 204665, "loss": 0.0945, "lr": 1.1788733082523087e-06, "epoch": 0.29474995724720887, "percentage": 5.89, "elapsed_time": "0:15:23", "remaining_time": "4:05:50", "throughput": 8804.6, "total_tokens": 8135360} +{"current_steps": 12070, "total_steps": 204665, "loss": 0.1098, "lr": 1.1793618996433284e-06, "epoch": 0.294872108079056, "percentage": 5.9, "elapsed_time": "0:15:24", "remaining_time": "4:05:49", "throughput": 8804.8, "total_tokens": 8138560} +{"current_steps": 12075, "total_steps": 204665, "loss": 0.0424, "lr": 1.1798504910343478e-06, "epoch": 0.2949942589109032, "percentage": 5.9, "elapsed_time": "0:15:24", "remaining_time": "4:05:48", "throughput": 8805.15, "total_tokens": 8141952} +{"current_steps": 12080, "total_steps": 204665, "loss": 0.0183, "lr": 1.1803390824253677e-06, "epoch": 0.29511640974275033, "percentage": 5.9, "elapsed_time": "0:15:25", "remaining_time": "4:05:47", "throughput": 8805.61, "total_tokens": 8145472} +{"current_steps": 12085, "total_steps": 204665, "loss": 0.1725, "lr": 1.1808276738163874e-06, "epoch": 0.2952385605745975, "percentage": 5.9, "elapsed_time": "0:15:25", "remaining_time": "4:05:46", "throughput": 8805.67, "total_tokens": 8148544} +{"current_steps": 12090, "total_steps": 204665, "loss": 0.1385, "lr": 1.1813162652074069e-06, "epoch": 0.29536071140644465, "percentage": 5.91, "elapsed_time": "0:15:25", "remaining_time": "4:05:45", "throughput": 8806.26, "total_tokens": 8152192} +{"current_steps": 12095, "total_steps": 204665, "loss": 0.2717, "lr": 1.1818048565984268e-06, "epoch": 0.29548286223829184, "percentage": 5.91, "elapsed_time": "0:15:26", "remaining_time": "4:05:44", "throughput": 8806.13, "total_tokens": 8155008} +{"current_steps": 12100, "total_steps": 204665, "loss": 0.2778, "lr": 1.1822934479894464e-06, "epoch": 0.29560501307013903, "percentage": 5.91, "elapsed_time": "0:15:26", "remaining_time": "4:05:43", "throughput": 8806.63, "total_tokens": 8158592} +{"current_steps": 12105, "total_steps": 204665, "loss": 0.109, "lr": 1.1827820393804661e-06, "epoch": 0.29572716390198617, "percentage": 5.91, "elapsed_time": "0:15:26", "remaining_time": "4:05:42", "throughput": 8807.05, "total_tokens": 8162048} +{"current_steps": 12110, "total_steps": 204665, "loss": 0.2265, "lr": 1.1832706307714858e-06, "epoch": 0.29584931473383336, "percentage": 5.92, "elapsed_time": "0:15:27", "remaining_time": "4:05:41", "throughput": 8807.28, "total_tokens": 8165312} +{"current_steps": 12115, "total_steps": 204665, "loss": 0.0558, "lr": 1.1837592221625055e-06, "epoch": 0.2959714655656805, "percentage": 5.92, "elapsed_time": "0:15:27", "remaining_time": "4:05:40", "throughput": 8807.7, "total_tokens": 8168768} +{"current_steps": 12120, "total_steps": 204665, "loss": 0.2307, "lr": 1.1842478135535252e-06, "epoch": 0.2960936163975277, "percentage": 5.92, "elapsed_time": "0:15:27", "remaining_time": "4:05:39", "throughput": 8808.15, "total_tokens": 8172288} +{"current_steps": 12125, "total_steps": 204665, "loss": 0.13, "lr": 1.1847364049445448e-06, "epoch": 0.2962157672293748, "percentage": 5.92, "elapsed_time": "0:15:28", "remaining_time": "4:05:38", "throughput": 8808.61, "total_tokens": 8175808} +{"current_steps": 12130, "total_steps": 204665, "loss": 0.1947, "lr": 1.1852249963355645e-06, "epoch": 0.296337918061222, "percentage": 5.93, "elapsed_time": "0:15:28", "remaining_time": "4:05:37", "throughput": 8808.91, "total_tokens": 8179136} +{"current_steps": 12135, "total_steps": 204665, "loss": 0.2153, "lr": 1.1857135877265842e-06, "epoch": 0.29646006889306914, "percentage": 5.93, "elapsed_time": "0:15:28", "remaining_time": "4:05:36", "throughput": 8809.26, "total_tokens": 8182528} +{"current_steps": 12140, "total_steps": 204665, "loss": 0.1133, "lr": 1.1862021791176037e-06, "epoch": 0.29658221972491633, "percentage": 5.93, "elapsed_time": "0:15:29", "remaining_time": "4:05:35", "throughput": 8809.3, "total_tokens": 8185536} +{"current_steps": 12145, "total_steps": 204665, "loss": 0.0854, "lr": 1.1866907705086236e-06, "epoch": 0.2967043705567635, "percentage": 5.93, "elapsed_time": "0:15:29", "remaining_time": "4:05:34", "throughput": 8809.6, "total_tokens": 8188864} +{"current_steps": 12150, "total_steps": 204665, "loss": 0.086, "lr": 1.1871793618996433e-06, "epoch": 0.29682652138861065, "percentage": 5.94, "elapsed_time": "0:15:29", "remaining_time": "4:05:33", "throughput": 8809.87, "total_tokens": 8192192} +{"current_steps": 12155, "total_steps": 204665, "loss": 0.1689, "lr": 1.1876679532906631e-06, "epoch": 0.29694867222045784, "percentage": 5.94, "elapsed_time": "0:15:30", "remaining_time": "4:05:32", "throughput": 8809.87, "total_tokens": 8195136} +{"current_steps": 12160, "total_steps": 204665, "loss": 0.1316, "lr": 1.1881565446816826e-06, "epoch": 0.297070823052305, "percentage": 5.94, "elapsed_time": "0:15:30", "remaining_time": "4:05:31", "throughput": 8810.27, "total_tokens": 8198592} +{"current_steps": 12165, "total_steps": 204665, "loss": 0.1468, "lr": 1.1886451360727023e-06, "epoch": 0.29719297388415217, "percentage": 5.94, "elapsed_time": "0:15:30", "remaining_time": "4:05:30", "throughput": 8810.58, "total_tokens": 8201920} +{"current_steps": 12170, "total_steps": 204665, "loss": 0.1071, "lr": 1.1891337274637222e-06, "epoch": 0.2973151247159993, "percentage": 5.95, "elapsed_time": "0:15:31", "remaining_time": "4:05:30", "throughput": 8811.38, "total_tokens": 8205824} +{"current_steps": 12175, "total_steps": 204665, "loss": 0.1096, "lr": 1.1896223188547417e-06, "epoch": 0.2974372755478465, "percentage": 5.95, "elapsed_time": "0:15:31", "remaining_time": "4:05:29", "throughput": 8811.72, "total_tokens": 8209216} +{"current_steps": 12180, "total_steps": 204665, "loss": 0.1757, "lr": 1.1901109102457613e-06, "epoch": 0.2975594263796936, "percentage": 5.95, "elapsed_time": "0:15:31", "remaining_time": "4:05:28", "throughput": 8812.13, "total_tokens": 8212672} +{"current_steps": 12185, "total_steps": 204665, "loss": 0.1019, "lr": 1.1905995016367812e-06, "epoch": 0.2976815772115408, "percentage": 5.95, "elapsed_time": "0:15:32", "remaining_time": "4:05:27", "throughput": 8812.21, "total_tokens": 8215744} +{"current_steps": 12190, "total_steps": 204665, "loss": 0.1574, "lr": 1.191088093027801e-06, "epoch": 0.29780372804338795, "percentage": 5.96, "elapsed_time": "0:15:32", "remaining_time": "4:05:26", "throughput": 8812.61, "total_tokens": 8219200} +{"current_steps": 12195, "total_steps": 204665, "loss": 0.0352, "lr": 1.1915766844188204e-06, "epoch": 0.29792587887523514, "percentage": 5.96, "elapsed_time": "0:15:33", "remaining_time": "4:05:25", "throughput": 8813.13, "total_tokens": 8222784} +{"current_steps": 12200, "total_steps": 204665, "loss": 0.1086, "lr": 1.1920652758098403e-06, "epoch": 0.29804802970708233, "percentage": 5.96, "elapsed_time": "0:15:33", "remaining_time": "4:05:24", "throughput": 8813.66, "total_tokens": 8226368} +{"current_steps": 12205, "total_steps": 204665, "loss": 0.194, "lr": 1.19255386720086e-06, "epoch": 0.29817018053892946, "percentage": 5.96, "elapsed_time": "0:15:33", "remaining_time": "4:05:23", "throughput": 8813.74, "total_tokens": 8229440} +{"current_steps": 12210, "total_steps": 204665, "loss": 0.0659, "lr": 1.1930424585918794e-06, "epoch": 0.29829233137077665, "percentage": 5.97, "elapsed_time": "0:15:34", "remaining_time": "4:05:22", "throughput": 8814.04, "total_tokens": 8232768} +{"current_steps": 12215, "total_steps": 204665, "loss": 0.3557, "lr": 1.1935310499828993e-06, "epoch": 0.2984144822026238, "percentage": 5.97, "elapsed_time": "0:15:34", "remaining_time": "4:05:21", "throughput": 8814.23, "total_tokens": 8235968} +{"current_steps": 12220, "total_steps": 204665, "loss": 0.1073, "lr": 1.194019641373919e-06, "epoch": 0.298536633034471, "percentage": 5.97, "elapsed_time": "0:15:34", "remaining_time": "4:05:20", "throughput": 8814.31, "total_tokens": 8239040} +{"current_steps": 12225, "total_steps": 204665, "loss": 0.0733, "lr": 1.1945082327649387e-06, "epoch": 0.2986587838663181, "percentage": 5.97, "elapsed_time": "0:15:35", "remaining_time": "4:05:19", "throughput": 8814.4, "total_tokens": 8242112} +{"current_steps": 12230, "total_steps": 204665, "loss": 0.0708, "lr": 1.1949968241559582e-06, "epoch": 0.2987809346981653, "percentage": 5.98, "elapsed_time": "0:15:35", "remaining_time": "4:05:18", "throughput": 8815.07, "total_tokens": 8245888} +{"current_steps": 12235, "total_steps": 204665, "loss": 0.0598, "lr": 1.195485415546978e-06, "epoch": 0.29890308553001244, "percentage": 5.98, "elapsed_time": "0:15:35", "remaining_time": "4:05:17", "throughput": 8815.49, "total_tokens": 8249344} +{"current_steps": 12240, "total_steps": 204665, "loss": 0.1704, "lr": 1.1959740069379977e-06, "epoch": 0.2990252363618596, "percentage": 5.98, "elapsed_time": "0:15:36", "remaining_time": "4:05:16", "throughput": 8816.05, "total_tokens": 8252992} +{"current_steps": 12245, "total_steps": 204665, "loss": 0.0935, "lr": 1.1964625983290172e-06, "epoch": 0.2991473871937068, "percentage": 5.98, "elapsed_time": "0:15:36", "remaining_time": "4:05:16", "throughput": 8816.79, "total_tokens": 8256832} +{"current_steps": 12250, "total_steps": 204665, "loss": 0.0363, "lr": 1.196951189720037e-06, "epoch": 0.29926953802555395, "percentage": 5.99, "elapsed_time": "0:15:36", "remaining_time": "4:05:15", "throughput": 8817.24, "total_tokens": 8260352} +{"current_steps": 12255, "total_steps": 204665, "loss": 0.0674, "lr": 1.1974397811110568e-06, "epoch": 0.29939168885740114, "percentage": 5.99, "elapsed_time": "0:15:37", "remaining_time": "4:05:14", "throughput": 8817.53, "total_tokens": 8263680} +{"current_steps": 12260, "total_steps": 204665, "loss": 0.1637, "lr": 1.1979283725020767e-06, "epoch": 0.2995138396892483, "percentage": 5.99, "elapsed_time": "0:15:37", "remaining_time": "4:05:13", "throughput": 8817.76, "total_tokens": 8266944} +{"current_steps": 12265, "total_steps": 204665, "loss": 0.2747, "lr": 1.1984169638930961e-06, "epoch": 0.29963599052109546, "percentage": 5.99, "elapsed_time": "0:15:37", "remaining_time": "4:05:12", "throughput": 8817.78, "total_tokens": 8269952} +{"current_steps": 12270, "total_steps": 204665, "loss": 0.2082, "lr": 1.1989055552841158e-06, "epoch": 0.2997581413529426, "percentage": 6.0, "elapsed_time": "0:15:38", "remaining_time": "4:05:11", "throughput": 8818.27, "total_tokens": 8273472} +{"current_steps": 12275, "total_steps": 204665, "loss": 0.2137, "lr": 1.1993941466751357e-06, "epoch": 0.2998802921847898, "percentage": 6.0, "elapsed_time": "0:15:38", "remaining_time": "4:05:10", "throughput": 8818.24, "total_tokens": 8276416} +{"current_steps": 12280, "total_steps": 204665, "loss": 0.0824, "lr": 1.1998827380661552e-06, "epoch": 0.3000024430166369, "percentage": 6.0, "elapsed_time": "0:15:38", "remaining_time": "4:05:09", "throughput": 8818.3, "total_tokens": 8279488} +{"current_steps": 12285, "total_steps": 204665, "loss": 0.2047, "lr": 1.2003713294571749e-06, "epoch": 0.3001245938484841, "percentage": 6.0, "elapsed_time": "0:15:39", "remaining_time": "4:05:08", "throughput": 8818.82, "total_tokens": 8283072} +{"current_steps": 12290, "total_steps": 204665, "loss": 0.1809, "lr": 1.2008599208481948e-06, "epoch": 0.30024674468033125, "percentage": 6.0, "elapsed_time": "0:15:39", "remaining_time": "4:05:07", "throughput": 8818.96, "total_tokens": 8286208} +{"current_steps": 12295, "total_steps": 204665, "loss": 0.2269, "lr": 1.2013485122392142e-06, "epoch": 0.30036889551217844, "percentage": 6.01, "elapsed_time": "0:15:39", "remaining_time": "4:05:06", "throughput": 8818.98, "total_tokens": 8289216} +{"current_steps": 12300, "total_steps": 204665, "loss": 0.1067, "lr": 1.201837103630234e-06, "epoch": 0.3004910463440256, "percentage": 6.01, "elapsed_time": "0:15:40", "remaining_time": "4:05:05", "throughput": 8819.38, "total_tokens": 8292672} +{"current_steps": 12305, "total_steps": 204665, "loss": 0.1393, "lr": 1.2023256950212538e-06, "epoch": 0.30061319717587276, "percentage": 6.01, "elapsed_time": "0:15:40", "remaining_time": "4:05:04", "throughput": 8820.16, "total_tokens": 8296576} +{"current_steps": 12310, "total_steps": 204665, "loss": 0.1101, "lr": 1.2028142864122735e-06, "epoch": 0.30073534800771995, "percentage": 6.01, "elapsed_time": "0:15:40", "remaining_time": "4:05:03", "throughput": 8820.35, "total_tokens": 8299776} +{"current_steps": 12315, "total_steps": 204665, "loss": 0.1248, "lr": 1.203302877803293e-06, "epoch": 0.3008574988395671, "percentage": 6.02, "elapsed_time": "0:15:41", "remaining_time": "4:05:02", "throughput": 8821.09, "total_tokens": 8303616} +{"current_steps": 12320, "total_steps": 204665, "loss": 0.044, "lr": 1.2037914691943128e-06, "epoch": 0.3009796496714143, "percentage": 6.02, "elapsed_time": "0:15:41", "remaining_time": "4:05:01", "throughput": 8821.28, "total_tokens": 8306816} +{"current_steps": 12325, "total_steps": 204665, "loss": 0.0833, "lr": 1.2042800605853325e-06, "epoch": 0.3011018005032614, "percentage": 6.02, "elapsed_time": "0:15:42", "remaining_time": "4:05:00", "throughput": 8821.55, "total_tokens": 8310144} +{"current_steps": 12330, "total_steps": 204665, "loss": 0.0851, "lr": 1.204768651976352e-06, "epoch": 0.3012239513351086, "percentage": 6.02, "elapsed_time": "0:15:42", "remaining_time": "4:05:00", "throughput": 8821.75, "total_tokens": 8313408} +{"current_steps": 12335, "total_steps": 204665, "loss": 0.0985, "lr": 1.2052572433673717e-06, "epoch": 0.30134610216695573, "percentage": 6.03, "elapsed_time": "0:15:42", "remaining_time": "4:04:59", "throughput": 8821.91, "total_tokens": 8316608} +{"current_steps": 12340, "total_steps": 204665, "loss": 0.1569, "lr": 1.2057458347583916e-06, "epoch": 0.3014682529988029, "percentage": 6.03, "elapsed_time": "0:15:43", "remaining_time": "4:04:58", "throughput": 8822.76, "total_tokens": 8320576} +{"current_steps": 12345, "total_steps": 204665, "loss": 0.1164, "lr": 1.2062344261494112e-06, "epoch": 0.3015904038306501, "percentage": 6.03, "elapsed_time": "0:15:43", "remaining_time": "4:04:57", "throughput": 8822.71, "total_tokens": 8323520} +{"current_steps": 12350, "total_steps": 204665, "loss": 0.0494, "lr": 1.2067230175404307e-06, "epoch": 0.30171255466249725, "percentage": 6.03, "elapsed_time": "0:15:43", "remaining_time": "4:04:56", "throughput": 8823.16, "total_tokens": 8327040} +{"current_steps": 12355, "total_steps": 204665, "loss": 0.2087, "lr": 1.2072116089314506e-06, "epoch": 0.30183470549434444, "percentage": 6.04, "elapsed_time": "0:15:44", "remaining_time": "4:04:55", "throughput": 8823.32, "total_tokens": 8330240} +{"current_steps": 12360, "total_steps": 204665, "loss": 0.0641, "lr": 1.2077002003224703e-06, "epoch": 0.30195685632619157, "percentage": 6.04, "elapsed_time": "0:15:44", "remaining_time": "4:04:54", "throughput": 8823.37, "total_tokens": 8333312} +{"current_steps": 12365, "total_steps": 204665, "loss": 0.1007, "lr": 1.2081887917134898e-06, "epoch": 0.30207900715803876, "percentage": 6.04, "elapsed_time": "0:15:44", "remaining_time": "4:04:53", "throughput": 8823.63, "total_tokens": 8336576} +{"current_steps": 12370, "total_steps": 204665, "loss": 0.2068, "lr": 1.2086773831045097e-06, "epoch": 0.3022011579898859, "percentage": 6.04, "elapsed_time": "0:15:45", "remaining_time": "4:04:52", "throughput": 8824.24, "total_tokens": 8340288} +{"current_steps": 12375, "total_steps": 204665, "loss": 0.1953, "lr": 1.2091659744955293e-06, "epoch": 0.3023233088217331, "percentage": 6.05, "elapsed_time": "0:15:45", "remaining_time": "4:04:51", "throughput": 8824.46, "total_tokens": 8343552} +{"current_steps": 12380, "total_steps": 204665, "loss": 0.1275, "lr": 1.2096545658865492e-06, "epoch": 0.3024454596535802, "percentage": 6.05, "elapsed_time": "0:15:45", "remaining_time": "4:04:50", "throughput": 8824.97, "total_tokens": 8347136} +{"current_steps": 12385, "total_steps": 204665, "loss": 0.2043, "lr": 1.2101431572775687e-06, "epoch": 0.3025676104854274, "percentage": 6.05, "elapsed_time": "0:15:46", "remaining_time": "4:04:50", "throughput": 8825.79, "total_tokens": 8351104} +{"current_steps": 12390, "total_steps": 204665, "loss": 0.1308, "lr": 1.2106317486685884e-06, "epoch": 0.3026897613172746, "percentage": 6.05, "elapsed_time": "0:15:46", "remaining_time": "4:04:49", "throughput": 8825.85, "total_tokens": 8354176} +{"current_steps": 12395, "total_steps": 204665, "loss": 0.0783, "lr": 1.2111203400596083e-06, "epoch": 0.30281191214912173, "percentage": 6.06, "elapsed_time": "0:15:46", "remaining_time": "4:04:48", "throughput": 8826.46, "total_tokens": 8357888} +{"current_steps": 12400, "total_steps": 204665, "loss": 0.0698, "lr": 1.2116089314506277e-06, "epoch": 0.3029340629809689, "percentage": 6.06, "elapsed_time": "0:15:47", "remaining_time": "4:04:47", "throughput": 8826.58, "total_tokens": 8361024} +{"current_steps": 12405, "total_steps": 204665, "loss": 0.0714, "lr": 1.2120975228416474e-06, "epoch": 0.30305621381281606, "percentage": 6.06, "elapsed_time": "0:15:47", "remaining_time": "4:04:46", "throughput": 8826.78, "total_tokens": 8364224} +{"current_steps": 12410, "total_steps": 204665, "loss": 0.184, "lr": 1.2125861142326673e-06, "epoch": 0.30317836464466325, "percentage": 6.06, "elapsed_time": "0:15:47", "remaining_time": "4:04:45", "throughput": 8826.86, "total_tokens": 8367296} +{"current_steps": 12415, "total_steps": 204665, "loss": 0.1278, "lr": 1.2130747056236868e-06, "epoch": 0.3033005154765104, "percentage": 6.07, "elapsed_time": "0:15:48", "remaining_time": "4:04:44", "throughput": 8826.87, "total_tokens": 8370304} +{"current_steps": 12420, "total_steps": 204665, "loss": 0.1006, "lr": 1.2135632970147065e-06, "epoch": 0.30342266630835757, "percentage": 6.07, "elapsed_time": "0:15:48", "remaining_time": "4:04:43", "throughput": 8827.06, "total_tokens": 8373504} +{"current_steps": 12425, "total_steps": 204665, "loss": 0.1913, "lr": 1.2140518884057261e-06, "epoch": 0.3035448171402047, "percentage": 6.07, "elapsed_time": "0:15:48", "remaining_time": "4:04:42", "throughput": 8827.49, "total_tokens": 8377024} +{"current_steps": 12430, "total_steps": 204665, "loss": 0.1267, "lr": 1.214540479796746e-06, "epoch": 0.3036669679720519, "percentage": 6.07, "elapsed_time": "0:15:49", "remaining_time": "4:04:41", "throughput": 8827.64, "total_tokens": 8380160} +{"current_steps": 12435, "total_steps": 204665, "loss": 0.1016, "lr": 1.2150290711877655e-06, "epoch": 0.303789118803899, "percentage": 6.08, "elapsed_time": "0:15:49", "remaining_time": "4:04:40", "throughput": 8827.86, "total_tokens": 8383424} +{"current_steps": 12440, "total_steps": 204665, "loss": 0.0801, "lr": 1.2155176625787852e-06, "epoch": 0.3039112696357462, "percentage": 6.08, "elapsed_time": "0:15:50", "remaining_time": "4:04:39", "throughput": 8828.42, "total_tokens": 8387072} +{"current_steps": 12445, "total_steps": 204665, "loss": 0.0782, "lr": 1.216006253969805e-06, "epoch": 0.3040334204675934, "percentage": 6.08, "elapsed_time": "0:15:50", "remaining_time": "4:04:38", "throughput": 8828.92, "total_tokens": 8390656} +{"current_steps": 12450, "total_steps": 204665, "loss": 0.0775, "lr": 1.2164948453608246e-06, "epoch": 0.30415557129944054, "percentage": 6.08, "elapsed_time": "0:15:50", "remaining_time": "4:04:38", "throughput": 8829.46, "total_tokens": 8394304} +{"current_steps": 12455, "total_steps": 204665, "loss": 0.243, "lr": 1.2169834367518442e-06, "epoch": 0.30427772213128773, "percentage": 6.09, "elapsed_time": "0:15:51", "remaining_time": "4:04:37", "throughput": 8829.71, "total_tokens": 8397632} +{"current_steps": 12460, "total_steps": 204665, "loss": 0.2416, "lr": 1.2174720281428641e-06, "epoch": 0.30439987296313487, "percentage": 6.09, "elapsed_time": "0:15:51", "remaining_time": "4:04:36", "throughput": 8830.04, "total_tokens": 8401024} +{"current_steps": 12465, "total_steps": 204665, "loss": 0.0535, "lr": 1.2179606195338838e-06, "epoch": 0.30452202379498206, "percentage": 6.09, "elapsed_time": "0:15:51", "remaining_time": "4:04:35", "throughput": 8830.39, "total_tokens": 8404416} +{"current_steps": 12470, "total_steps": 204665, "loss": 0.151, "lr": 1.2184492109249033e-06, "epoch": 0.3046441746268292, "percentage": 6.09, "elapsed_time": "0:15:52", "remaining_time": "4:04:34", "throughput": 8831.27, "total_tokens": 8408448} +{"current_steps": 12475, "total_steps": 204665, "loss": 0.1923, "lr": 1.2189378023159232e-06, "epoch": 0.3047663254586764, "percentage": 6.1, "elapsed_time": "0:15:52", "remaining_time": "4:04:33", "throughput": 8832.12, "total_tokens": 8412416} +{"current_steps": 12480, "total_steps": 204665, "loss": 0.1284, "lr": 1.2194263937069429e-06, "epoch": 0.3048884762905235, "percentage": 6.1, "elapsed_time": "0:15:52", "remaining_time": "4:04:32", "throughput": 8832.02, "total_tokens": 8415296} +{"current_steps": 12485, "total_steps": 204665, "loss": 0.0634, "lr": 1.2199149850979623e-06, "epoch": 0.3050106271223707, "percentage": 6.1, "elapsed_time": "0:15:53", "remaining_time": "4:04:31", "throughput": 8831.97, "total_tokens": 8418240} +{"current_steps": 12490, "total_steps": 204665, "loss": 0.108, "lr": 1.2204035764889822e-06, "epoch": 0.3051327779542179, "percentage": 6.1, "elapsed_time": "0:15:53", "remaining_time": "4:04:30", "throughput": 8832.59, "total_tokens": 8421952} +{"current_steps": 12495, "total_steps": 204665, "loss": 0.0815, "lr": 1.220892167880002e-06, "epoch": 0.305254928786065, "percentage": 6.11, "elapsed_time": "0:15:53", "remaining_time": "4:04:29", "throughput": 8832.64, "total_tokens": 8425024} +{"current_steps": 12500, "total_steps": 204665, "loss": 0.1205, "lr": 1.2213807592710218e-06, "epoch": 0.3053770796179122, "percentage": 6.11, "elapsed_time": "0:15:54", "remaining_time": "4:04:29", "throughput": 8832.93, "total_tokens": 8428352} +{"current_steps": 12505, "total_steps": 204665, "loss": 0.0864, "lr": 1.2218693506620413e-06, "epoch": 0.30549923044975935, "percentage": 6.11, "elapsed_time": "0:15:54", "remaining_time": "4:04:28", "throughput": 8833.04, "total_tokens": 8431488} +{"current_steps": 12510, "total_steps": 204665, "loss": 0.0892, "lr": 1.222357942053061e-06, "epoch": 0.30562138128160654, "percentage": 6.11, "elapsed_time": "0:15:54", "remaining_time": "4:04:27", "throughput": 8833.44, "total_tokens": 8434944} +{"current_steps": 12515, "total_steps": 204665, "loss": 0.0486, "lr": 1.2228465334440806e-06, "epoch": 0.3057435321134537, "percentage": 6.11, "elapsed_time": "0:15:55", "remaining_time": "4:04:26", "throughput": 8833.74, "total_tokens": 8438272} +{"current_steps": 12520, "total_steps": 204665, "loss": 0.1224, "lr": 1.2233351248351003e-06, "epoch": 0.30586568294530087, "percentage": 6.12, "elapsed_time": "0:15:55", "remaining_time": "4:04:25", "throughput": 8834.19, "total_tokens": 8441792} +{"current_steps": 12525, "total_steps": 204665, "loss": 0.2268, "lr": 1.22382371622612e-06, "epoch": 0.305987833777148, "percentage": 6.12, "elapsed_time": "0:15:55", "remaining_time": "4:04:24", "throughput": 8834.29, "total_tokens": 8444864} +{"current_steps": 12530, "total_steps": 204665, "loss": 0.107, "lr": 1.2243123076171397e-06, "epoch": 0.3061099846089952, "percentage": 6.12, "elapsed_time": "0:15:56", "remaining_time": "4:04:23", "throughput": 8834.65, "total_tokens": 8448320} +{"current_steps": 12535, "total_steps": 204665, "loss": 0.1659, "lr": 1.2248008990081596e-06, "epoch": 0.3062321354408424, "percentage": 6.12, "elapsed_time": "0:15:56", "remaining_time": "4:04:22", "throughput": 8835.15, "total_tokens": 8451904} +{"current_steps": 12540, "total_steps": 204665, "loss": 0.1476, "lr": 1.225289490399179e-06, "epoch": 0.3063542862726895, "percentage": 6.13, "elapsed_time": "0:15:56", "remaining_time": "4:04:21", "throughput": 8835.69, "total_tokens": 8455552} +{"current_steps": 12545, "total_steps": 204665, "loss": 0.041, "lr": 1.2257780817901987e-06, "epoch": 0.3064764371045367, "percentage": 6.13, "elapsed_time": "0:15:57", "remaining_time": "4:04:20", "throughput": 8835.86, "total_tokens": 8458752} +{"current_steps": 12550, "total_steps": 204665, "loss": 0.1281, "lr": 1.2262666731812186e-06, "epoch": 0.30659858793638384, "percentage": 6.13, "elapsed_time": "0:15:57", "remaining_time": "4:04:19", "throughput": 8835.97, "total_tokens": 8461888} +{"current_steps": 12555, "total_steps": 204665, "loss": 0.0974, "lr": 1.226755264572238e-06, "epoch": 0.306720738768231, "percentage": 6.13, "elapsed_time": "0:15:58", "remaining_time": "4:04:19", "throughput": 8836.5, "total_tokens": 8465536} +{"current_steps": 12560, "total_steps": 204665, "loss": 0.1158, "lr": 1.2272438559632578e-06, "epoch": 0.30684288960007816, "percentage": 6.14, "elapsed_time": "0:15:58", "remaining_time": "4:04:18", "throughput": 8836.59, "total_tokens": 8468608} +{"current_steps": 12565, "total_steps": 204665, "loss": 0.058, "lr": 1.2277324473542776e-06, "epoch": 0.30696504043192535, "percentage": 6.14, "elapsed_time": "0:15:58", "remaining_time": "4:04:17", "throughput": 8837.09, "total_tokens": 8472192} +{"current_steps": 12570, "total_steps": 204665, "loss": 0.067, "lr": 1.2282210387452971e-06, "epoch": 0.3070871912637725, "percentage": 6.14, "elapsed_time": "0:15:59", "remaining_time": "4:04:16", "throughput": 8837.97, "total_tokens": 8476224} +{"current_steps": 12575, "total_steps": 204665, "loss": 0.2403, "lr": 1.2287096301363168e-06, "epoch": 0.3072093420956197, "percentage": 6.14, "elapsed_time": "0:15:59", "remaining_time": "4:04:15", "throughput": 8838.33, "total_tokens": 8479680} +{"current_steps": 12580, "total_steps": 204665, "loss": 0.1631, "lr": 1.2291982215273367e-06, "epoch": 0.3073314929274668, "percentage": 6.15, "elapsed_time": "0:15:59", "remaining_time": "4:04:14", "throughput": 8838.9, "total_tokens": 8483392} +{"current_steps": 12585, "total_steps": 204665, "loss": 0.1993, "lr": 1.2296868129183564e-06, "epoch": 0.307453643759314, "percentage": 6.15, "elapsed_time": "0:16:00", "remaining_time": "4:04:14", "throughput": 8839.6, "total_tokens": 8487232} +{"current_steps": 12590, "total_steps": 204665, "loss": 0.1225, "lr": 1.2301754043093758e-06, "epoch": 0.3075757945911612, "percentage": 6.15, "elapsed_time": "0:16:00", "remaining_time": "4:04:13", "throughput": 8840.17, "total_tokens": 8490944} +{"current_steps": 12595, "total_steps": 204665, "loss": 0.0538, "lr": 1.2306639957003957e-06, "epoch": 0.3076979454230083, "percentage": 6.15, "elapsed_time": "0:16:00", "remaining_time": "4:04:12", "throughput": 8840.48, "total_tokens": 8494336} +{"current_steps": 12600, "total_steps": 204665, "loss": 0.1477, "lr": 1.2311525870914154e-06, "epoch": 0.3078200962548555, "percentage": 6.16, "elapsed_time": "0:16:01", "remaining_time": "4:04:11", "throughput": 8840.59, "total_tokens": 8497472} +{"current_steps": 12605, "total_steps": 204665, "loss": 0.0984, "lr": 1.2316411784824349e-06, "epoch": 0.30794224708670265, "percentage": 6.16, "elapsed_time": "0:16:01", "remaining_time": "4:04:10", "throughput": 8841.09, "total_tokens": 8501056} +{"current_steps": 12610, "total_steps": 204665, "loss": 0.0836, "lr": 1.2321297698734548e-06, "epoch": 0.30806439791854984, "percentage": 6.16, "elapsed_time": "0:16:01", "remaining_time": "4:04:09", "throughput": 8841.28, "total_tokens": 8504320} +{"current_steps": 12615, "total_steps": 204665, "loss": 0.249, "lr": 1.2326183612644745e-06, "epoch": 0.30818654875039697, "percentage": 6.16, "elapsed_time": "0:16:02", "remaining_time": "4:04:08", "throughput": 8841.56, "total_tokens": 8507648} +{"current_steps": 12620, "total_steps": 204665, "loss": 0.1641, "lr": 1.2331069526554941e-06, "epoch": 0.30830869958224416, "percentage": 6.17, "elapsed_time": "0:16:02", "remaining_time": "4:04:07", "throughput": 8841.65, "total_tokens": 8510720} +{"current_steps": 12625, "total_steps": 204665, "loss": 0.1154, "lr": 1.2335955440465138e-06, "epoch": 0.3084308504140913, "percentage": 6.17, "elapsed_time": "0:16:02", "remaining_time": "4:04:07", "throughput": 8841.86, "total_tokens": 8513984} +{"current_steps": 12630, "total_steps": 204665, "loss": 0.1706, "lr": 1.2340841354375335e-06, "epoch": 0.3085530012459385, "percentage": 6.17, "elapsed_time": "0:16:03", "remaining_time": "4:04:06", "throughput": 8842.12, "total_tokens": 8517312} +{"current_steps": 12635, "total_steps": 204665, "loss": 0.025, "lr": 1.2345727268285532e-06, "epoch": 0.3086751520777857, "percentage": 6.17, "elapsed_time": "0:16:03", "remaining_time": "4:04:05", "throughput": 8842.51, "total_tokens": 8520768} +{"current_steps": 12640, "total_steps": 204665, "loss": 0.1898, "lr": 1.2350613182195729e-06, "epoch": 0.3087973029096328, "percentage": 6.18, "elapsed_time": "0:16:03", "remaining_time": "4:04:04", "throughput": 8842.45, "total_tokens": 8523712} +{"current_steps": 12645, "total_steps": 204665, "loss": 0.2702, "lr": 1.2355499096105926e-06, "epoch": 0.30891945374148, "percentage": 6.18, "elapsed_time": "0:16:04", "remaining_time": "4:04:03", "throughput": 8842.5, "total_tokens": 8526784} +{"current_steps": 12650, "total_steps": 204665, "loss": 0.0935, "lr": 1.2360385010016122e-06, "epoch": 0.30904160457332713, "percentage": 6.18, "elapsed_time": "0:16:04", "remaining_time": "4:04:02", "throughput": 8842.73, "total_tokens": 8530048} +{"current_steps": 12655, "total_steps": 204665, "loss": 0.1544, "lr": 1.2365270923926321e-06, "epoch": 0.3091637554051743, "percentage": 6.18, "elapsed_time": "0:16:04", "remaining_time": "4:04:01", "throughput": 8842.73, "total_tokens": 8533056} +{"current_steps": 12660, "total_steps": 204665, "loss": 0.1554, "lr": 1.2370156837836516e-06, "epoch": 0.30928590623702146, "percentage": 6.19, "elapsed_time": "0:16:05", "remaining_time": "4:04:00", "throughput": 8842.95, "total_tokens": 8536320} +{"current_steps": 12665, "total_steps": 204665, "loss": 0.1172, "lr": 1.2375042751746713e-06, "epoch": 0.30940805706886865, "percentage": 6.19, "elapsed_time": "0:16:05", "remaining_time": "4:03:59", "throughput": 8842.99, "total_tokens": 8539392} +{"current_steps": 12670, "total_steps": 204665, "loss": 0.1463, "lr": 1.2379928665656912e-06, "epoch": 0.3095302079007158, "percentage": 6.19, "elapsed_time": "0:16:06", "remaining_time": "4:03:58", "throughput": 8843.64, "total_tokens": 8543168} +{"current_steps": 12675, "total_steps": 204665, "loss": 0.1172, "lr": 1.2384814579567106e-06, "epoch": 0.30965235873256297, "percentage": 6.19, "elapsed_time": "0:16:06", "remaining_time": "4:03:57", "throughput": 8843.92, "total_tokens": 8546496} +{"current_steps": 12680, "total_steps": 204665, "loss": 0.0659, "lr": 1.2389700493477303e-06, "epoch": 0.30977450956441016, "percentage": 6.2, "elapsed_time": "0:16:06", "remaining_time": "4:03:56", "throughput": 8844.62, "total_tokens": 8550336} +{"current_steps": 12685, "total_steps": 204665, "loss": 0.1353, "lr": 1.2394586407387502e-06, "epoch": 0.3098966603962573, "percentage": 6.2, "elapsed_time": "0:16:07", "remaining_time": "4:03:56", "throughput": 8844.84, "total_tokens": 8553600} +{"current_steps": 12690, "total_steps": 204665, "loss": 0.1039, "lr": 1.23994723212977e-06, "epoch": 0.3100188112281045, "percentage": 6.2, "elapsed_time": "0:16:07", "remaining_time": "4:03:55", "throughput": 8845.33, "total_tokens": 8557184} +{"current_steps": 12695, "total_steps": 204665, "loss": 0.1146, "lr": 1.2404358235207894e-06, "epoch": 0.3101409620599516, "percentage": 6.2, "elapsed_time": "0:16:07", "remaining_time": "4:03:54", "throughput": 8846.15, "total_tokens": 8561152} +{"current_steps": 12700, "total_steps": 204665, "loss": 0.1317, "lr": 1.2409244149118093e-06, "epoch": 0.3102631128917988, "percentage": 6.21, "elapsed_time": "0:16:08", "remaining_time": "4:03:53", "throughput": 8846.85, "total_tokens": 8564992} +{"current_steps": 12705, "total_steps": 204665, "loss": 0.1771, "lr": 1.241413006302829e-06, "epoch": 0.31038526372364594, "percentage": 6.21, "elapsed_time": "0:16:08", "remaining_time": "4:03:52", "throughput": 8847.2, "total_tokens": 8568448} +{"current_steps": 12710, "total_steps": 204665, "loss": 0.0726, "lr": 1.2419015976938484e-06, "epoch": 0.31050741455549313, "percentage": 6.21, "elapsed_time": "0:16:08", "remaining_time": "4:03:52", "throughput": 8847.84, "total_tokens": 8572224} +{"current_steps": 12715, "total_steps": 204665, "loss": 0.1894, "lr": 1.2423901890848683e-06, "epoch": 0.31062956538734027, "percentage": 6.21, "elapsed_time": "0:16:09", "remaining_time": "4:03:51", "throughput": 8848.5, "total_tokens": 8576064} +{"current_steps": 12720, "total_steps": 204665, "loss": 0.1499, "lr": 1.242878780475888e-06, "epoch": 0.31075171621918746, "percentage": 6.22, "elapsed_time": "0:16:09", "remaining_time": "4:03:50", "throughput": 8849.16, "total_tokens": 8579840} +{"current_steps": 12725, "total_steps": 204665, "loss": 0.021, "lr": 1.2433673718669075e-06, "epoch": 0.3108738670510346, "percentage": 6.22, "elapsed_time": "0:16:09", "remaining_time": "4:03:49", "throughput": 8849.7, "total_tokens": 8583488} +{"current_steps": 12730, "total_steps": 204665, "loss": 0.2072, "lr": 1.2438559632579273e-06, "epoch": 0.3109960178828818, "percentage": 6.22, "elapsed_time": "0:16:10", "remaining_time": "4:03:49", "throughput": 8849.86, "total_tokens": 8586688} +{"current_steps": 12735, "total_steps": 204665, "loss": 0.0856, "lr": 1.244344554648947e-06, "epoch": 0.31111816871472897, "percentage": 6.22, "elapsed_time": "0:16:10", "remaining_time": "4:03:48", "throughput": 8849.99, "total_tokens": 8589888} +{"current_steps": 12740, "total_steps": 204665, "loss": 0.1108, "lr": 1.2448331460399667e-06, "epoch": 0.3112403195465761, "percentage": 6.22, "elapsed_time": "0:16:10", "remaining_time": "4:03:47", "throughput": 8850.15, "total_tokens": 8593088} +{"current_steps": 12745, "total_steps": 204665, "loss": 0.0851, "lr": 1.2453217374309864e-06, "epoch": 0.3113624703784233, "percentage": 6.23, "elapsed_time": "0:16:11", "remaining_time": "4:03:46", "throughput": 8850.3, "total_tokens": 8596288} +{"current_steps": 12750, "total_steps": 204665, "loss": 0.1089, "lr": 1.245810328822006e-06, "epoch": 0.31148462121027043, "percentage": 6.23, "elapsed_time": "0:16:11", "remaining_time": "4:03:45", "throughput": 8850.87, "total_tokens": 8600000} +{"current_steps": 12755, "total_steps": 204665, "loss": 0.0856, "lr": 1.2462989202130258e-06, "epoch": 0.3116067720421176, "percentage": 6.23, "elapsed_time": "0:16:11", "remaining_time": "4:03:44", "throughput": 8850.83, "total_tokens": 8602944} +{"current_steps": 12760, "total_steps": 204665, "loss": 0.1233, "lr": 1.2467875116040454e-06, "epoch": 0.31172892287396475, "percentage": 6.23, "elapsed_time": "0:16:12", "remaining_time": "4:03:43", "throughput": 8851.1, "total_tokens": 8606336} +{"current_steps": 12765, "total_steps": 204665, "loss": 0.185, "lr": 1.2472761029950651e-06, "epoch": 0.31185107370581194, "percentage": 6.24, "elapsed_time": "0:16:12", "remaining_time": "4:03:42", "throughput": 8851.27, "total_tokens": 8609536} +{"current_steps": 12770, "total_steps": 204665, "loss": 0.1804, "lr": 1.2477646943860848e-06, "epoch": 0.3119732245376591, "percentage": 6.24, "elapsed_time": "0:16:13", "remaining_time": "4:03:41", "throughput": 8851.34, "total_tokens": 8612672} +{"current_steps": 12775, "total_steps": 204665, "loss": 0.1443, "lr": 1.2482532857771047e-06, "epoch": 0.31209537536950627, "percentage": 6.24, "elapsed_time": "0:16:13", "remaining_time": "4:03:41", "throughput": 8851.69, "total_tokens": 8616128} +{"current_steps": 12780, "total_steps": 204665, "loss": 0.1616, "lr": 1.2487418771681242e-06, "epoch": 0.31221752620135346, "percentage": 6.24, "elapsed_time": "0:16:13", "remaining_time": "4:03:40", "throughput": 8852.08, "total_tokens": 8619584} +{"current_steps": 12785, "total_steps": 204665, "loss": 0.1156, "lr": 1.2492304685591438e-06, "epoch": 0.3123396770332006, "percentage": 6.25, "elapsed_time": "0:16:14", "remaining_time": "4:03:39", "throughput": 8852.45, "total_tokens": 8623040} +{"current_steps": 12790, "total_steps": 204665, "loss": 0.1364, "lr": 1.2497190599501637e-06, "epoch": 0.3124618278650478, "percentage": 6.25, "elapsed_time": "0:16:14", "remaining_time": "4:03:38", "throughput": 8852.76, "total_tokens": 8626432} +{"current_steps": 12795, "total_steps": 204665, "loss": 0.1408, "lr": 1.2502076513411832e-06, "epoch": 0.3125839786968949, "percentage": 6.25, "elapsed_time": "0:16:14", "remaining_time": "4:03:37", "throughput": 8853.54, "total_tokens": 8630400} +{"current_steps": 12800, "total_steps": 204665, "loss": 0.0587, "lr": 1.2506962427322029e-06, "epoch": 0.3127061295287421, "percentage": 6.25, "elapsed_time": "0:16:15", "remaining_time": "4:03:36", "throughput": 8853.66, "total_tokens": 8633536} +{"current_steps": 12805, "total_steps": 204665, "loss": 0.1218, "lr": 1.2511848341232228e-06, "epoch": 0.31282828036058924, "percentage": 6.26, "elapsed_time": "0:16:15", "remaining_time": "4:03:35", "throughput": 8854.01, "total_tokens": 8636992} +{"current_steps": 12810, "total_steps": 204665, "loss": 0.0968, "lr": 1.2516734255142425e-06, "epoch": 0.31295043119243643, "percentage": 6.26, "elapsed_time": "0:16:15", "remaining_time": "4:03:35", "throughput": 8854.36, "total_tokens": 8640448} +{"current_steps": 12815, "total_steps": 204665, "loss": 0.1877, "lr": 1.252162016905262e-06, "epoch": 0.31307258202428356, "percentage": 6.26, "elapsed_time": "0:16:16", "remaining_time": "4:03:34", "throughput": 8854.35, "total_tokens": 8643456} +{"current_steps": 12820, "total_steps": 204665, "loss": 0.1171, "lr": 1.2526506082962818e-06, "epoch": 0.31319473285613075, "percentage": 6.26, "elapsed_time": "0:16:16", "remaining_time": "4:03:33", "throughput": 8854.86, "total_tokens": 8647104} +{"current_steps": 12825, "total_steps": 204665, "loss": 0.1403, "lr": 1.2531391996873015e-06, "epoch": 0.31331688368797794, "percentage": 6.27, "elapsed_time": "0:16:16", "remaining_time": "4:03:32", "throughput": 8855.29, "total_tokens": 8650624} +{"current_steps": 12830, "total_steps": 204665, "loss": 0.0747, "lr": 1.253627791078321e-06, "epoch": 0.3134390345198251, "percentage": 6.27, "elapsed_time": "0:16:17", "remaining_time": "4:03:31", "throughput": 8855.72, "total_tokens": 8654144} +{"current_steps": 12835, "total_steps": 204665, "loss": 0.0731, "lr": 1.2541163824693409e-06, "epoch": 0.31356118535167227, "percentage": 6.27, "elapsed_time": "0:16:17", "remaining_time": "4:03:30", "throughput": 8856.23, "total_tokens": 8657792} +{"current_steps": 12840, "total_steps": 204665, "loss": 0.1482, "lr": 1.2546049738603605e-06, "epoch": 0.3136833361835194, "percentage": 6.27, "elapsed_time": "0:16:17", "remaining_time": "4:03:30", "throughput": 8856.43, "total_tokens": 8661056} +{"current_steps": 12845, "total_steps": 204665, "loss": 0.0934, "lr": 1.25509356525138e-06, "epoch": 0.3138054870153666, "percentage": 6.28, "elapsed_time": "0:16:18", "remaining_time": "4:03:29", "throughput": 8856.74, "total_tokens": 8664448} +{"current_steps": 12850, "total_steps": 204665, "loss": 0.2028, "lr": 1.2555821566424e-06, "epoch": 0.3139276378472137, "percentage": 6.28, "elapsed_time": "0:16:18", "remaining_time": "4:03:28", "throughput": 8857.04, "total_tokens": 8667840} +{"current_steps": 12855, "total_steps": 204665, "loss": 0.1502, "lr": 1.2560707480334196e-06, "epoch": 0.3140497886790609, "percentage": 6.28, "elapsed_time": "0:16:19", "remaining_time": "4:03:27", "throughput": 8857.76, "total_tokens": 8671744} +{"current_steps": 12860, "total_steps": 204665, "loss": 0.0424, "lr": 1.2565593394244393e-06, "epoch": 0.31417193951090805, "percentage": 6.28, "elapsed_time": "0:16:19", "remaining_time": "4:03:26", "throughput": 8858.12, "total_tokens": 8675200} +{"current_steps": 12865, "total_steps": 204665, "loss": 0.0822, "lr": 1.257047930815459e-06, "epoch": 0.31429409034275524, "percentage": 6.29, "elapsed_time": "0:16:19", "remaining_time": "4:03:25", "throughput": 8858.34, "total_tokens": 8678464} +{"current_steps": 12870, "total_steps": 204665, "loss": 0.128, "lr": 1.2575365222064786e-06, "epoch": 0.3144162411746024, "percentage": 6.29, "elapsed_time": "0:16:20", "remaining_time": "4:03:24", "throughput": 8858.44, "total_tokens": 8681600} +{"current_steps": 12875, "total_steps": 204665, "loss": 0.1034, "lr": 1.2580251135974983e-06, "epoch": 0.31453839200644956, "percentage": 6.29, "elapsed_time": "0:16:20", "remaining_time": "4:03:24", "throughput": 8858.87, "total_tokens": 8685120} +{"current_steps": 12880, "total_steps": 204665, "loss": 0.2057, "lr": 1.258513704988518e-06, "epoch": 0.31466054283829675, "percentage": 6.29, "elapsed_time": "0:16:20", "remaining_time": "4:03:23", "throughput": 8859.13, "total_tokens": 8688448} +{"current_steps": 12885, "total_steps": 204665, "loss": 0.1319, "lr": 1.2590022963795377e-06, "epoch": 0.3147826936701439, "percentage": 6.3, "elapsed_time": "0:16:21", "remaining_time": "4:03:22", "throughput": 8859.22, "total_tokens": 8691584} +{"current_steps": 12890, "total_steps": 204665, "loss": 0.0822, "lr": 1.2594908877705574e-06, "epoch": 0.3149048445019911, "percentage": 6.3, "elapsed_time": "0:16:21", "remaining_time": "4:03:21", "throughput": 8859.63, "total_tokens": 8695104} +{"current_steps": 12895, "total_steps": 204665, "loss": 0.1366, "lr": 1.2599794791615773e-06, "epoch": 0.3150269953338382, "percentage": 6.3, "elapsed_time": "0:16:21", "remaining_time": "4:03:20", "throughput": 8860.32, "total_tokens": 8698944} +{"current_steps": 12900, "total_steps": 204665, "loss": 0.1846, "lr": 1.2604680705525967e-06, "epoch": 0.3151491461656854, "percentage": 6.3, "elapsed_time": "0:16:22", "remaining_time": "4:03:20", "throughput": 8861.01, "total_tokens": 8702784} +{"current_steps": 12905, "total_steps": 204665, "loss": 0.0956, "lr": 1.2609566619436164e-06, "epoch": 0.31527129699753254, "percentage": 6.31, "elapsed_time": "0:16:22", "remaining_time": "4:03:19", "throughput": 8861.62, "total_tokens": 8706560} +{"current_steps": 12910, "total_steps": 204665, "loss": 0.1179, "lr": 1.2614452533346363e-06, "epoch": 0.3153934478293797, "percentage": 6.31, "elapsed_time": "0:16:22", "remaining_time": "4:03:18", "throughput": 8861.68, "total_tokens": 8709632} +{"current_steps": 12915, "total_steps": 204665, "loss": 0.1559, "lr": 1.2619338447256558e-06, "epoch": 0.31551559866122686, "percentage": 6.31, "elapsed_time": "0:16:23", "remaining_time": "4:03:17", "throughput": 8861.93, "total_tokens": 8712960} +{"current_steps": 12920, "total_steps": 204665, "loss": 0.1198, "lr": 1.2624224361166754e-06, "epoch": 0.31563774949307405, "percentage": 6.31, "elapsed_time": "0:16:23", "remaining_time": "4:03:16", "throughput": 8862.69, "total_tokens": 8716928} +{"current_steps": 12925, "total_steps": 204665, "loss": 0.1578, "lr": 1.2629110275076953e-06, "epoch": 0.31575990032492124, "percentage": 6.32, "elapsed_time": "0:16:23", "remaining_time": "4:03:16", "throughput": 8862.99, "total_tokens": 8720320} +{"current_steps": 12930, "total_steps": 204665, "loss": 0.0998, "lr": 1.263399618898715e-06, "epoch": 0.3158820511567684, "percentage": 6.32, "elapsed_time": "0:16:24", "remaining_time": "4:03:15", "throughput": 8863.36, "total_tokens": 8723776} +{"current_steps": 12935, "total_steps": 204665, "loss": 0.1375, "lr": 1.2638882102897345e-06, "epoch": 0.31600420198861556, "percentage": 6.32, "elapsed_time": "0:16:24", "remaining_time": "4:03:14", "throughput": 8863.84, "total_tokens": 8727424} +{"current_steps": 12940, "total_steps": 204665, "loss": 0.1309, "lr": 1.2643768016807544e-06, "epoch": 0.3161263528204627, "percentage": 6.32, "elapsed_time": "0:16:24", "remaining_time": "4:03:13", "throughput": 8863.83, "total_tokens": 8730432} +{"current_steps": 12945, "total_steps": 204665, "loss": 0.1737, "lr": 1.264865393071774e-06, "epoch": 0.3162485036523099, "percentage": 6.32, "elapsed_time": "0:16:25", "remaining_time": "4:03:12", "throughput": 8864.27, "total_tokens": 8733952} +{"current_steps": 12950, "total_steps": 204665, "loss": 0.0793, "lr": 1.2653539844627935e-06, "epoch": 0.316370654484157, "percentage": 6.33, "elapsed_time": "0:16:25", "remaining_time": "4:03:11", "throughput": 8864.27, "total_tokens": 8736960} +{"current_steps": 12955, "total_steps": 204665, "loss": 0.075, "lr": 1.2658425758538134e-06, "epoch": 0.3164928053160042, "percentage": 6.33, "elapsed_time": "0:16:25", "remaining_time": "4:03:10", "throughput": 8864.43, "total_tokens": 8740160} +{"current_steps": 12960, "total_steps": 204665, "loss": 0.1446, "lr": 1.2663311672448331e-06, "epoch": 0.31661495614785135, "percentage": 6.33, "elapsed_time": "0:16:26", "remaining_time": "4:03:09", "throughput": 8864.9, "total_tokens": 8743744} +{"current_steps": 12965, "total_steps": 204665, "loss": 0.0679, "lr": 1.2668197586358528e-06, "epoch": 0.31673710697969854, "percentage": 6.33, "elapsed_time": "0:16:26", "remaining_time": "4:03:08", "throughput": 8865.02, "total_tokens": 8746880} +{"current_steps": 12970, "total_steps": 204665, "loss": 0.117, "lr": 1.2673083500268725e-06, "epoch": 0.31685925781154567, "percentage": 6.34, "elapsed_time": "0:16:27", "remaining_time": "4:03:08", "throughput": 8865.31, "total_tokens": 8750272} +{"current_steps": 12975, "total_steps": 204665, "loss": 0.1294, "lr": 1.2677969414178922e-06, "epoch": 0.31698140864339286, "percentage": 6.34, "elapsed_time": "0:16:27", "remaining_time": "4:03:07", "throughput": 8865.67, "total_tokens": 8753728} +{"current_steps": 12980, "total_steps": 204665, "loss": 0.1858, "lr": 1.2682855328089118e-06, "epoch": 0.31710355947524005, "percentage": 6.34, "elapsed_time": "0:16:27", "remaining_time": "4:03:06", "throughput": 8866.08, "total_tokens": 8757248} +{"current_steps": 12985, "total_steps": 204665, "loss": 0.1265, "lr": 1.2687741241999315e-06, "epoch": 0.3172257103070872, "percentage": 6.34, "elapsed_time": "0:16:28", "remaining_time": "4:03:05", "throughput": 8866.26, "total_tokens": 8760512} +{"current_steps": 12990, "total_steps": 204665, "loss": 0.1477, "lr": 1.2692627155909512e-06, "epoch": 0.3173478611389344, "percentage": 6.35, "elapsed_time": "0:16:28", "remaining_time": "4:03:04", "throughput": 8866.34, "total_tokens": 8763584} +{"current_steps": 12995, "total_steps": 204665, "loss": 0.045, "lr": 1.2697513069819709e-06, "epoch": 0.3174700119707815, "percentage": 6.35, "elapsed_time": "0:16:28", "remaining_time": "4:03:03", "throughput": 8866.6, "total_tokens": 8766912} +{"current_steps": 13000, "total_steps": 204665, "loss": 0.0796, "lr": 1.2702398983729906e-06, "epoch": 0.3175921628026287, "percentage": 6.35, "elapsed_time": "0:16:29", "remaining_time": "4:03:02", "throughput": 8867.2, "total_tokens": 8770624} +{"current_steps": 13005, "total_steps": 204665, "loss": 0.067, "lr": 1.2707284897640102e-06, "epoch": 0.31771431363447583, "percentage": 6.35, "elapsed_time": "0:16:29", "remaining_time": "4:03:01", "throughput": 8867.23, "total_tokens": 8773696} +{"current_steps": 13010, "total_steps": 204665, "loss": 0.254, "lr": 1.27121708115503e-06, "epoch": 0.317836464466323, "percentage": 6.36, "elapsed_time": "0:16:29", "remaining_time": "4:03:01", "throughput": 8867.67, "total_tokens": 8777280} +{"current_steps": 13015, "total_steps": 204665, "loss": 0.1108, "lr": 1.2717056725460498e-06, "epoch": 0.31795861529817016, "percentage": 6.36, "elapsed_time": "0:16:30", "remaining_time": "4:03:00", "throughput": 8867.6, "total_tokens": 8780224} +{"current_steps": 13020, "total_steps": 204665, "loss": 0.138, "lr": 1.2721942639370693e-06, "epoch": 0.31808076613001735, "percentage": 6.36, "elapsed_time": "0:16:30", "remaining_time": "4:02:59", "throughput": 8867.83, "total_tokens": 8783552} +{"current_steps": 13025, "total_steps": 204665, "loss": 0.1391, "lr": 1.272682855328089e-06, "epoch": 0.31820291696186453, "percentage": 6.36, "elapsed_time": "0:16:31", "remaining_time": "4:03:02", "throughput": 8865.64, "total_tokens": 8786816} +{"current_steps": 13030, "total_steps": 204665, "loss": 0.0396, "lr": 1.2731714467191089e-06, "epoch": 0.31832506779371167, "percentage": 6.37, "elapsed_time": "0:16:31", "remaining_time": "4:03:01", "throughput": 8865.8, "total_tokens": 8790016} +{"current_steps": 13035, "total_steps": 204665, "loss": 0.1898, "lr": 1.2736600381101283e-06, "epoch": 0.31844721862555886, "percentage": 6.37, "elapsed_time": "0:16:31", "remaining_time": "4:03:00", "throughput": 8865.99, "total_tokens": 8793216} +{"current_steps": 13040, "total_steps": 204665, "loss": 0.0986, "lr": 1.274148629501148e-06, "epoch": 0.318569369457406, "percentage": 6.37, "elapsed_time": "0:16:32", "remaining_time": "4:02:59", "throughput": 8866.11, "total_tokens": 8796352} +{"current_steps": 13045, "total_steps": 204665, "loss": 0.0983, "lr": 1.274637220892168e-06, "epoch": 0.3186915202892532, "percentage": 6.37, "elapsed_time": "0:16:32", "remaining_time": "4:02:58", "throughput": 8866.14, "total_tokens": 8799360} +{"current_steps": 13050, "total_steps": 204665, "loss": 0.0892, "lr": 1.2751258122831876e-06, "epoch": 0.3188136711211003, "percentage": 6.38, "elapsed_time": "0:16:32", "remaining_time": "4:02:57", "throughput": 8866.1, "total_tokens": 8802304} +{"current_steps": 13055, "total_steps": 204665, "loss": 0.1344, "lr": 1.275614403674207e-06, "epoch": 0.3189358219529475, "percentage": 6.38, "elapsed_time": "0:16:33", "remaining_time": "4:02:56", "throughput": 8866.1, "total_tokens": 8805312} +{"current_steps": 13060, "total_steps": 204665, "loss": 0.1088, "lr": 1.276102995065227e-06, "epoch": 0.31905797278479464, "percentage": 6.38, "elapsed_time": "0:16:33", "remaining_time": "4:02:55", "throughput": 8866.07, "total_tokens": 8808256} +{"current_steps": 13065, "total_steps": 204665, "loss": 0.0799, "lr": 1.2765915864562466e-06, "epoch": 0.31918012361664183, "percentage": 6.38, "elapsed_time": "0:16:33", "remaining_time": "4:02:54", "throughput": 8866.38, "total_tokens": 8811648} +{"current_steps": 13070, "total_steps": 204665, "loss": 0.079, "lr": 1.277080177847266e-06, "epoch": 0.319302274448489, "percentage": 6.39, "elapsed_time": "0:16:34", "remaining_time": "4:02:53", "throughput": 8866.5, "total_tokens": 8814784} +{"current_steps": 13075, "total_steps": 204665, "loss": 0.0918, "lr": 1.277568769238286e-06, "epoch": 0.31942442528033616, "percentage": 6.39, "elapsed_time": "0:16:34", "remaining_time": "4:02:52", "throughput": 8866.6, "total_tokens": 8817920} +{"current_steps": 13080, "total_steps": 204665, "loss": 0.0483, "lr": 1.2780573606293057e-06, "epoch": 0.31954657611218334, "percentage": 6.39, "elapsed_time": "0:16:34", "remaining_time": "4:02:51", "throughput": 8866.85, "total_tokens": 8821248} +{"current_steps": 13085, "total_steps": 204665, "loss": 0.0595, "lr": 1.2785459520203254e-06, "epoch": 0.3196687269440305, "percentage": 6.39, "elapsed_time": "0:16:35", "remaining_time": "4:02:50", "throughput": 8867.23, "total_tokens": 8824704} +{"current_steps": 13090, "total_steps": 204665, "loss": 0.1064, "lr": 1.279034543411345e-06, "epoch": 0.31979087777587767, "percentage": 6.4, "elapsed_time": "0:16:35", "remaining_time": "4:02:50", "throughput": 8867.49, "total_tokens": 8828032} +{"current_steps": 13095, "total_steps": 204665, "loss": 0.1114, "lr": 1.2795231348023647e-06, "epoch": 0.3199130286077248, "percentage": 6.4, "elapsed_time": "0:16:35", "remaining_time": "4:02:49", "throughput": 8867.49, "total_tokens": 8831040} +{"current_steps": 13100, "total_steps": 204665, "loss": 0.3178, "lr": 1.2800117261933844e-06, "epoch": 0.320035179439572, "percentage": 6.4, "elapsed_time": "0:16:36", "remaining_time": "4:02:48", "throughput": 8867.62, "total_tokens": 8834240} +{"current_steps": 13105, "total_steps": 204665, "loss": 0.1605, "lr": 1.280500317584404e-06, "epoch": 0.3201573302714191, "percentage": 6.4, "elapsed_time": "0:16:36", "remaining_time": "4:02:47", "throughput": 8867.98, "total_tokens": 8837696} +{"current_steps": 13110, "total_steps": 204665, "loss": 0.1192, "lr": 1.2809889089754238e-06, "epoch": 0.3202794811032663, "percentage": 6.41, "elapsed_time": "0:16:36", "remaining_time": "4:02:46", "throughput": 8868.18, "total_tokens": 8840960} +{"current_steps": 13115, "total_steps": 204665, "loss": 0.0765, "lr": 1.2814775003664434e-06, "epoch": 0.32040163193511345, "percentage": 6.41, "elapsed_time": "0:16:37", "remaining_time": "4:02:45", "throughput": 8868.22, "total_tokens": 8844032} +{"current_steps": 13120, "total_steps": 204665, "loss": 0.0768, "lr": 1.2819660917574633e-06, "epoch": 0.32052378276696064, "percentage": 6.41, "elapsed_time": "0:16:37", "remaining_time": "4:02:44", "throughput": 8868.54, "total_tokens": 8847424} +{"current_steps": 13125, "total_steps": 204665, "loss": 0.142, "lr": 1.2824546831484828e-06, "epoch": 0.32064593359880783, "percentage": 6.41, "elapsed_time": "0:16:37", "remaining_time": "4:02:43", "throughput": 8868.66, "total_tokens": 8850560} +{"current_steps": 13130, "total_steps": 204665, "loss": 0.0831, "lr": 1.2829432745395025e-06, "epoch": 0.32076808443065497, "percentage": 6.42, "elapsed_time": "0:16:38", "remaining_time": "4:02:42", "throughput": 8869.0, "total_tokens": 8854016} +{"current_steps": 13135, "total_steps": 204665, "loss": 0.1633, "lr": 1.2834318659305224e-06, "epoch": 0.32089023526250215, "percentage": 6.42, "elapsed_time": "0:16:38", "remaining_time": "4:02:42", "throughput": 8869.19, "total_tokens": 8857280} +{"current_steps": 13140, "total_steps": 204665, "loss": 0.0974, "lr": 1.2839204573215418e-06, "epoch": 0.3210123860943493, "percentage": 6.42, "elapsed_time": "0:16:39", "remaining_time": "4:02:41", "throughput": 8869.51, "total_tokens": 8860672} +{"current_steps": 13145, "total_steps": 204665, "loss": 0.0883, "lr": 1.2844090487125615e-06, "epoch": 0.3211345369261965, "percentage": 6.42, "elapsed_time": "0:16:39", "remaining_time": "4:02:40", "throughput": 8869.63, "total_tokens": 8863808} +{"current_steps": 13150, "total_steps": 204665, "loss": 0.2072, "lr": 1.2848976401035814e-06, "epoch": 0.3212566877580436, "percentage": 6.43, "elapsed_time": "0:16:39", "remaining_time": "4:02:39", "throughput": 8870.1, "total_tokens": 8867392} +{"current_steps": 13155, "total_steps": 204665, "loss": 0.1008, "lr": 1.2853862314946009e-06, "epoch": 0.3213788385898908, "percentage": 6.43, "elapsed_time": "0:16:40", "remaining_time": "4:02:38", "throughput": 8870.23, "total_tokens": 8870592} +{"current_steps": 13160, "total_steps": 204665, "loss": 0.087, "lr": 1.2858748228856206e-06, "epoch": 0.32150098942173794, "percentage": 6.43, "elapsed_time": "0:16:40", "remaining_time": "4:02:37", "throughput": 8870.45, "total_tokens": 8873856} +{"current_steps": 13165, "total_steps": 204665, "loss": 0.1631, "lr": 1.2863634142766405e-06, "epoch": 0.3216231402535851, "percentage": 6.43, "elapsed_time": "0:16:40", "remaining_time": "4:02:36", "throughput": 8870.58, "total_tokens": 8877056} +{"current_steps": 13170, "total_steps": 204665, "loss": 0.1666, "lr": 1.2868520056676601e-06, "epoch": 0.3217452910854323, "percentage": 6.43, "elapsed_time": "0:16:41", "remaining_time": "4:02:35", "throughput": 8870.77, "total_tokens": 8880320} +{"current_steps": 13175, "total_steps": 204665, "loss": 0.1232, "lr": 1.2873405970586796e-06, "epoch": 0.32186744191727945, "percentage": 6.44, "elapsed_time": "0:16:41", "remaining_time": "4:02:34", "throughput": 8870.92, "total_tokens": 8883520} +{"current_steps": 13180, "total_steps": 204665, "loss": 0.0721, "lr": 1.2878291884496995e-06, "epoch": 0.32198959274912664, "percentage": 6.44, "elapsed_time": "0:16:41", "remaining_time": "4:02:34", "throughput": 8870.86, "total_tokens": 8886464} +{"current_steps": 13185, "total_steps": 204665, "loss": 0.1252, "lr": 1.2883177798407192e-06, "epoch": 0.3221117435809738, "percentage": 6.44, "elapsed_time": "0:16:42", "remaining_time": "4:02:33", "throughput": 8871.01, "total_tokens": 8889664} +{"current_steps": 13190, "total_steps": 204665, "loss": 0.0993, "lr": 1.2888063712317387e-06, "epoch": 0.32223389441282096, "percentage": 6.44, "elapsed_time": "0:16:42", "remaining_time": "4:02:32", "throughput": 8871.04, "total_tokens": 8892736} +{"current_steps": 13195, "total_steps": 204665, "loss": 0.1207, "lr": 1.2892949626227586e-06, "epoch": 0.3223560452446681, "percentage": 6.45, "elapsed_time": "0:16:42", "remaining_time": "4:02:31", "throughput": 8871.18, "total_tokens": 8895936} +{"current_steps": 13200, "total_steps": 204665, "loss": 0.1758, "lr": 1.2897835540137782e-06, "epoch": 0.3224781960765153, "percentage": 6.45, "elapsed_time": "0:16:43", "remaining_time": "4:02:30", "throughput": 8871.41, "total_tokens": 8899264} +{"current_steps": 13205, "total_steps": 204665, "loss": 0.1376, "lr": 1.290272145404798e-06, "epoch": 0.3226003469083624, "percentage": 6.45, "elapsed_time": "0:16:43", "remaining_time": "4:02:29", "throughput": 8871.62, "total_tokens": 8902528} +{"current_steps": 13210, "total_steps": 204665, "loss": 0.0735, "lr": 1.2907607367958176e-06, "epoch": 0.3227224977402096, "percentage": 6.45, "elapsed_time": "0:16:43", "remaining_time": "4:02:28", "throughput": 8871.81, "total_tokens": 8905792} +{"current_steps": 13215, "total_steps": 204665, "loss": 0.1756, "lr": 1.2912493281868373e-06, "epoch": 0.3228446485720568, "percentage": 6.46, "elapsed_time": "0:16:44", "remaining_time": "4:02:27", "throughput": 8871.93, "total_tokens": 8908928} +{"current_steps": 13220, "total_steps": 204665, "loss": 0.2033, "lr": 1.291737919577857e-06, "epoch": 0.32296679940390394, "percentage": 6.46, "elapsed_time": "0:16:44", "remaining_time": "4:02:26", "throughput": 8872.08, "total_tokens": 8912128} +{"current_steps": 13225, "total_steps": 204665, "loss": 0.0812, "lr": 1.2922265109688766e-06, "epoch": 0.3230889502357511, "percentage": 6.46, "elapsed_time": "0:16:44", "remaining_time": "4:02:25", "throughput": 8872.06, "total_tokens": 8915136} +{"current_steps": 13230, "total_steps": 204665, "loss": 0.1386, "lr": 1.2927151023598963e-06, "epoch": 0.32321110106759826, "percentage": 6.46, "elapsed_time": "0:16:45", "remaining_time": "4:02:25", "throughput": 8872.25, "total_tokens": 8918400} +{"current_steps": 13235, "total_steps": 204665, "loss": 0.1314, "lr": 1.293203693750916e-06, "epoch": 0.32333325189944545, "percentage": 6.47, "elapsed_time": "0:16:45", "remaining_time": "4:02:24", "throughput": 8872.14, "total_tokens": 8921280} +{"current_steps": 13240, "total_steps": 204665, "loss": 0.1797, "lr": 1.293692285141936e-06, "epoch": 0.3234554027312926, "percentage": 6.47, "elapsed_time": "0:16:45", "remaining_time": "4:02:23", "throughput": 8872.13, "total_tokens": 8924288} +{"current_steps": 13245, "total_steps": 204665, "loss": 0.1386, "lr": 1.2941808765329554e-06, "epoch": 0.3235775535631398, "percentage": 6.47, "elapsed_time": "0:16:46", "remaining_time": "4:02:22", "throughput": 8872.82, "total_tokens": 8928192} +{"current_steps": 13250, "total_steps": 204665, "loss": 0.068, "lr": 1.294669467923975e-06, "epoch": 0.3236997043949869, "percentage": 6.47, "elapsed_time": "0:16:46", "remaining_time": "4:02:21", "throughput": 8873.43, "total_tokens": 8931968} +{"current_steps": 13255, "total_steps": 204665, "loss": 0.2441, "lr": 1.295158059314995e-06, "epoch": 0.3238218552268341, "percentage": 6.48, "elapsed_time": "0:16:46", "remaining_time": "4:02:20", "throughput": 8873.57, "total_tokens": 8935168} +{"current_steps": 13260, "total_steps": 204665, "loss": 0.1352, "lr": 1.2956466507060144e-06, "epoch": 0.32394400605868123, "percentage": 6.48, "elapsed_time": "0:16:47", "remaining_time": "4:02:19", "throughput": 8873.62, "total_tokens": 8938240} +{"current_steps": 13265, "total_steps": 204665, "loss": 0.1099, "lr": 1.296135242097034e-06, "epoch": 0.3240661568905284, "percentage": 6.48, "elapsed_time": "0:16:47", "remaining_time": "4:02:19", "throughput": 8873.96, "total_tokens": 8941696} +{"current_steps": 13270, "total_steps": 204665, "loss": 0.0451, "lr": 1.296623833488054e-06, "epoch": 0.3241883077223756, "percentage": 6.48, "elapsed_time": "0:16:47", "remaining_time": "4:02:18", "throughput": 8874.49, "total_tokens": 8945408} +{"current_steps": 13275, "total_steps": 204665, "loss": 0.1093, "lr": 1.2971124248790735e-06, "epoch": 0.32431045855422275, "percentage": 6.49, "elapsed_time": "0:16:48", "remaining_time": "4:02:17", "throughput": 8874.4, "total_tokens": 8948288} +{"current_steps": 13280, "total_steps": 204665, "loss": 0.0524, "lr": 1.2976010162700931e-06, "epoch": 0.32443260938606994, "percentage": 6.49, "elapsed_time": "0:16:48", "remaining_time": "4:02:16", "throughput": 8874.81, "total_tokens": 8951808} +{"current_steps": 13285, "total_steps": 204665, "loss": 0.0399, "lr": 1.298089607661113e-06, "epoch": 0.32455476021791707, "percentage": 6.49, "elapsed_time": "0:16:49", "remaining_time": "4:02:15", "throughput": 8875.32, "total_tokens": 8955456} +{"current_steps": 13290, "total_steps": 204665, "loss": 0.1968, "lr": 1.2985781990521327e-06, "epoch": 0.32467691104976426, "percentage": 6.49, "elapsed_time": "0:16:49", "remaining_time": "4:02:14", "throughput": 8875.53, "total_tokens": 8958720} +{"current_steps": 13295, "total_steps": 204665, "loss": 0.1475, "lr": 1.2990667904431522e-06, "epoch": 0.3247990618816114, "percentage": 6.5, "elapsed_time": "0:16:49", "remaining_time": "4:02:14", "throughput": 8875.72, "total_tokens": 8961984} +{"current_steps": 13300, "total_steps": 204665, "loss": 0.1347, "lr": 1.299555381834172e-06, "epoch": 0.3249212127134586, "percentage": 6.5, "elapsed_time": "0:16:50", "remaining_time": "4:02:13", "throughput": 8876.41, "total_tokens": 8965888} +{"current_steps": 13305, "total_steps": 204665, "loss": 0.2356, "lr": 1.3000439732251918e-06, "epoch": 0.3250433635453057, "percentage": 6.5, "elapsed_time": "0:16:50", "remaining_time": "4:02:12", "throughput": 8876.59, "total_tokens": 8969152} +{"current_steps": 13310, "total_steps": 204665, "loss": 0.2769, "lr": 1.3005325646162112e-06, "epoch": 0.3251655143771529, "percentage": 6.5, "elapsed_time": "0:16:50", "remaining_time": "4:02:11", "throughput": 8876.64, "total_tokens": 8972224} +{"current_steps": 13315, "total_steps": 204665, "loss": 0.1549, "lr": 1.3010211560072311e-06, "epoch": 0.3252876652090001, "percentage": 6.51, "elapsed_time": "0:16:51", "remaining_time": "4:02:10", "throughput": 8876.73, "total_tokens": 8975360} +{"current_steps": 13320, "total_steps": 204665, "loss": 0.0598, "lr": 1.3015097473982508e-06, "epoch": 0.32540981604084723, "percentage": 6.51, "elapsed_time": "0:16:51", "remaining_time": "4:02:09", "throughput": 8876.87, "total_tokens": 8978560} +{"current_steps": 13325, "total_steps": 204665, "loss": 0.1031, "lr": 1.3019983387892705e-06, "epoch": 0.3255319668726944, "percentage": 6.51, "elapsed_time": "0:16:51", "remaining_time": "4:02:08", "throughput": 8876.8, "total_tokens": 8981504} +{"current_steps": 13330, "total_steps": 204665, "loss": 0.1894, "lr": 1.3024869301802902e-06, "epoch": 0.32565411770454156, "percentage": 6.51, "elapsed_time": "0:16:52", "remaining_time": "4:02:07", "throughput": 8876.89, "total_tokens": 8984640} +{"current_steps": 13335, "total_steps": 204665, "loss": 0.0675, "lr": 1.3029755215713098e-06, "epoch": 0.32577626853638875, "percentage": 6.52, "elapsed_time": "0:16:52", "remaining_time": "4:02:07", "throughput": 8876.87, "total_tokens": 8987648} +{"current_steps": 13340, "total_steps": 204665, "loss": 0.0484, "lr": 1.3034641129623295e-06, "epoch": 0.3258984193682359, "percentage": 6.52, "elapsed_time": "0:16:52", "remaining_time": "4:02:06", "throughput": 8877.32, "total_tokens": 8991232} +{"current_steps": 13345, "total_steps": 204665, "loss": 0.0963, "lr": 1.3039527043533492e-06, "epoch": 0.32602057020008307, "percentage": 6.52, "elapsed_time": "0:16:53", "remaining_time": "4:02:05", "throughput": 8877.46, "total_tokens": 8994432} +{"current_steps": 13350, "total_steps": 204665, "loss": 0.173, "lr": 1.3044412957443689e-06, "epoch": 0.3261427210319302, "percentage": 6.52, "elapsed_time": "0:16:53", "remaining_time": "4:02:04", "throughput": 8877.95, "total_tokens": 8998080} +{"current_steps": 13355, "total_steps": 204665, "loss": 0.1508, "lr": 1.3049298871353886e-06, "epoch": 0.3262648718637774, "percentage": 6.53, "elapsed_time": "0:16:53", "remaining_time": "4:02:03", "throughput": 8878.14, "total_tokens": 9001344} +{"current_steps": 13360, "total_steps": 204665, "loss": 0.1075, "lr": 1.3054184785264085e-06, "epoch": 0.3263870226956246, "percentage": 6.53, "elapsed_time": "0:16:54", "remaining_time": "4:02:02", "throughput": 8878.59, "total_tokens": 9004928} +{"current_steps": 13365, "total_steps": 204665, "loss": 0.1445, "lr": 1.305907069917428e-06, "epoch": 0.3265091735274717, "percentage": 6.53, "elapsed_time": "0:16:54", "remaining_time": "4:02:02", "throughput": 8878.58, "total_tokens": 9007936} +{"current_steps": 13370, "total_steps": 204665, "loss": 0.0846, "lr": 1.3063956613084476e-06, "epoch": 0.3266313243593189, "percentage": 6.53, "elapsed_time": "0:16:54", "remaining_time": "4:02:01", "throughput": 8878.84, "total_tokens": 9011264} +{"current_steps": 13375, "total_steps": 204665, "loss": 0.1817, "lr": 1.3068842526994675e-06, "epoch": 0.32675347519116604, "percentage": 6.54, "elapsed_time": "0:16:55", "remaining_time": "4:02:00", "throughput": 8879.02, "total_tokens": 9014528} +{"current_steps": 13380, "total_steps": 204665, "loss": 0.1482, "lr": 1.307372844090487e-06, "epoch": 0.32687562602301323, "percentage": 6.54, "elapsed_time": "0:16:55", "remaining_time": "4:01:59", "throughput": 8879.03, "total_tokens": 9017536} +{"current_steps": 13385, "total_steps": 204665, "loss": 0.1093, "lr": 1.3078614354815067e-06, "epoch": 0.32699777685486037, "percentage": 6.54, "elapsed_time": "0:16:55", "remaining_time": "4:01:58", "throughput": 8878.97, "total_tokens": 9020480} +{"current_steps": 13390, "total_steps": 204665, "loss": 0.0675, "lr": 1.3083500268725265e-06, "epoch": 0.32711992768670756, "percentage": 6.54, "elapsed_time": "0:16:56", "remaining_time": "4:01:57", "throughput": 8879.21, "total_tokens": 9023808} +{"current_steps": 13395, "total_steps": 204665, "loss": 0.2227, "lr": 1.3088386182635462e-06, "epoch": 0.3272420785185547, "percentage": 6.54, "elapsed_time": "0:16:56", "remaining_time": "4:01:56", "throughput": 8879.49, "total_tokens": 9027200} +{"current_steps": 13400, "total_steps": 204665, "loss": 0.132, "lr": 1.3093272096545657e-06, "epoch": 0.3273642293504019, "percentage": 6.55, "elapsed_time": "0:16:56", "remaining_time": "4:01:55", "throughput": 8879.65, "total_tokens": 9030400} +{"current_steps": 13405, "total_steps": 204665, "loss": 0.0713, "lr": 1.3098158010455856e-06, "epoch": 0.327486380182249, "percentage": 6.55, "elapsed_time": "0:16:57", "remaining_time": "4:01:55", "throughput": 8880.02, "total_tokens": 9033920} +{"current_steps": 13410, "total_steps": 204665, "loss": 0.1003, "lr": 1.3103043924366053e-06, "epoch": 0.3276085310140962, "percentage": 6.55, "elapsed_time": "0:16:57", "remaining_time": "4:01:54", "throughput": 8880.46, "total_tokens": 9037504} +{"current_steps": 13415, "total_steps": 204665, "loss": 0.1388, "lr": 1.3107929838276247e-06, "epoch": 0.3277306818459434, "percentage": 6.55, "elapsed_time": "0:16:58", "remaining_time": "4:01:53", "throughput": 8880.35, "total_tokens": 9040384} +{"current_steps": 13420, "total_steps": 204665, "loss": 0.0525, "lr": 1.3112815752186446e-06, "epoch": 0.32785283267779053, "percentage": 6.56, "elapsed_time": "0:16:58", "remaining_time": "4:01:52", "throughput": 8880.6, "total_tokens": 9043712} +{"current_steps": 13425, "total_steps": 204665, "loss": 0.2262, "lr": 1.3117701666096643e-06, "epoch": 0.3279749835096377, "percentage": 6.56, "elapsed_time": "0:16:58", "remaining_time": "4:01:51", "throughput": 8880.28, "total_tokens": 9046336} +{"current_steps": 13430, "total_steps": 204665, "loss": 0.1109, "lr": 1.3122587580006838e-06, "epoch": 0.32809713434148485, "percentage": 6.56, "elapsed_time": "0:16:59", "remaining_time": "4:01:50", "throughput": 8880.41, "total_tokens": 9049536} +{"current_steps": 13435, "total_steps": 204665, "loss": 0.0949, "lr": 1.3127473493917037e-06, "epoch": 0.32821928517333204, "percentage": 6.56, "elapsed_time": "0:16:59", "remaining_time": "4:01:49", "throughput": 8880.66, "total_tokens": 9052928} +{"current_steps": 13440, "total_steps": 204665, "loss": 0.1478, "lr": 1.3132359407827234e-06, "epoch": 0.3283414360051792, "percentage": 6.57, "elapsed_time": "0:16:59", "remaining_time": "4:01:49", "throughput": 8881.01, "total_tokens": 9056384} +{"current_steps": 13445, "total_steps": 204665, "loss": 0.0471, "lr": 1.313724532173743e-06, "epoch": 0.32846358683702637, "percentage": 6.57, "elapsed_time": "0:17:00", "remaining_time": "4:01:48", "throughput": 8881.34, "total_tokens": 9059840} +{"current_steps": 13450, "total_steps": 204665, "loss": 0.1651, "lr": 1.3142131235647627e-06, "epoch": 0.3285857376688735, "percentage": 6.57, "elapsed_time": "0:17:00", "remaining_time": "4:01:47", "throughput": 8881.97, "total_tokens": 9063680} +{"current_steps": 13455, "total_steps": 204665, "loss": 0.0945, "lr": 1.3147017149557824e-06, "epoch": 0.3287078885007207, "percentage": 6.57, "elapsed_time": "0:17:00", "remaining_time": "4:01:46", "throughput": 8882.58, "total_tokens": 9067520} +{"current_steps": 13460, "total_steps": 204665, "loss": 0.072, "lr": 1.315190306346802e-06, "epoch": 0.3288300393325679, "percentage": 6.58, "elapsed_time": "0:17:01", "remaining_time": "4:01:46", "throughput": 8882.68, "total_tokens": 9070656} +{"current_steps": 13465, "total_steps": 204665, "loss": 0.1405, "lr": 1.3156788977378218e-06, "epoch": 0.328952190164415, "percentage": 6.58, "elapsed_time": "0:17:01", "remaining_time": "4:01:45", "throughput": 8883.11, "total_tokens": 9074240} +{"current_steps": 13470, "total_steps": 204665, "loss": 0.0691, "lr": 1.3161674891288414e-06, "epoch": 0.3290743409962622, "percentage": 6.58, "elapsed_time": "0:17:01", "remaining_time": "4:01:44", "throughput": 8883.33, "total_tokens": 9077568} +{"current_steps": 13475, "total_steps": 204665, "loss": 0.0812, "lr": 1.3166560805198611e-06, "epoch": 0.32919649182810934, "percentage": 6.58, "elapsed_time": "0:17:02", "remaining_time": "4:01:43", "throughput": 8883.68, "total_tokens": 9081024} +{"current_steps": 13480, "total_steps": 204665, "loss": 0.0681, "lr": 1.317144671910881e-06, "epoch": 0.32931864265995653, "percentage": 6.59, "elapsed_time": "0:17:02", "remaining_time": "4:01:42", "throughput": 8883.88, "total_tokens": 9084288} +{"current_steps": 13485, "total_steps": 204665, "loss": 0.0192, "lr": 1.3176332633019005e-06, "epoch": 0.32944079349180366, "percentage": 6.59, "elapsed_time": "0:17:02", "remaining_time": "4:01:41", "throughput": 8883.99, "total_tokens": 9087424} +{"current_steps": 13490, "total_steps": 204665, "loss": 0.1937, "lr": 1.3181218546929202e-06, "epoch": 0.32956294432365085, "percentage": 6.59, "elapsed_time": "0:17:03", "remaining_time": "4:01:40", "throughput": 8884.19, "total_tokens": 9090688} +{"current_steps": 13495, "total_steps": 204665, "loss": 0.1272, "lr": 1.31861044608394e-06, "epoch": 0.329685095155498, "percentage": 6.59, "elapsed_time": "0:17:03", "remaining_time": "4:01:40", "throughput": 8884.29, "total_tokens": 9093824} +{"current_steps": 13500, "total_steps": 204665, "loss": 0.1441, "lr": 1.3190990374749595e-06, "epoch": 0.3298072459873452, "percentage": 6.6, "elapsed_time": "0:17:03", "remaining_time": "4:01:39", "throughput": 8884.47, "total_tokens": 9097088} +{"current_steps": 13505, "total_steps": 204665, "loss": 0.163, "lr": 1.3195876288659792e-06, "epoch": 0.3299293968191923, "percentage": 6.6, "elapsed_time": "0:17:04", "remaining_time": "4:01:38", "throughput": 8884.95, "total_tokens": 9100736} +{"current_steps": 13510, "total_steps": 204665, "loss": 0.1131, "lr": 1.3200762202569991e-06, "epoch": 0.3300515476510395, "percentage": 6.6, "elapsed_time": "0:17:04", "remaining_time": "4:01:37", "throughput": 8885.3, "total_tokens": 9104192} +{"current_steps": 13515, "total_steps": 204665, "loss": 0.0136, "lr": 1.3205648116480188e-06, "epoch": 0.3301736984828867, "percentage": 6.6, "elapsed_time": "0:17:04", "remaining_time": "4:01:36", "throughput": 8885.62, "total_tokens": 9107584} +{"current_steps": 13520, "total_steps": 204665, "loss": 0.0706, "lr": 1.3210534030390383e-06, "epoch": 0.3302958493147338, "percentage": 6.61, "elapsed_time": "0:17:05", "remaining_time": "4:01:35", "throughput": 8885.77, "total_tokens": 9110784} +{"current_steps": 13525, "total_steps": 204665, "loss": 0.1026, "lr": 1.3215419944300582e-06, "epoch": 0.330418000146581, "percentage": 6.61, "elapsed_time": "0:17:05", "remaining_time": "4:01:35", "throughput": 8886.11, "total_tokens": 9114240} +{"current_steps": 13530, "total_steps": 204665, "loss": 0.1298, "lr": 1.3220305858210778e-06, "epoch": 0.33054015097842815, "percentage": 6.61, "elapsed_time": "0:17:06", "remaining_time": "4:01:34", "throughput": 8886.38, "total_tokens": 9117568} +{"current_steps": 13535, "total_steps": 204665, "loss": 0.0658, "lr": 1.3225191772120973e-06, "epoch": 0.33066230181027534, "percentage": 6.61, "elapsed_time": "0:17:06", "remaining_time": "4:01:33", "throughput": 8886.82, "total_tokens": 9121152} +{"current_steps": 13540, "total_steps": 204665, "loss": 0.1518, "lr": 1.3230077686031172e-06, "epoch": 0.3307844526421225, "percentage": 6.62, "elapsed_time": "0:17:06", "remaining_time": "4:01:32", "throughput": 8886.99, "total_tokens": 9124352} +{"current_steps": 13545, "total_steps": 204665, "loss": 0.1158, "lr": 1.3234963599941369e-06, "epoch": 0.33090660347396966, "percentage": 6.62, "elapsed_time": "0:17:07", "remaining_time": "4:01:31", "throughput": 8887.08, "total_tokens": 9127488} +{"current_steps": 13550, "total_steps": 204665, "loss": 0.0693, "lr": 1.3239849513851564e-06, "epoch": 0.3310287543058168, "percentage": 6.62, "elapsed_time": "0:17:07", "remaining_time": "4:01:31", "throughput": 8887.95, "total_tokens": 9131584} +{"current_steps": 13555, "total_steps": 204665, "loss": 0.1466, "lr": 1.3244735427761762e-06, "epoch": 0.331150905137664, "percentage": 6.62, "elapsed_time": "0:17:07", "remaining_time": "4:01:30", "throughput": 8888.32, "total_tokens": 9135104} +{"current_steps": 13560, "total_steps": 204665, "loss": 0.2083, "lr": 1.324962134167196e-06, "epoch": 0.3312730559695112, "percentage": 6.63, "elapsed_time": "0:17:08", "remaining_time": "4:01:29", "throughput": 8888.91, "total_tokens": 9138880} +{"current_steps": 13565, "total_steps": 204665, "loss": 0.087, "lr": 1.3254507255582156e-06, "epoch": 0.3313952068013583, "percentage": 6.63, "elapsed_time": "0:17:08", "remaining_time": "4:01:28", "throughput": 8888.95, "total_tokens": 9141952} +{"current_steps": 13570, "total_steps": 204665, "loss": 0.2453, "lr": 1.3259393169492353e-06, "epoch": 0.3315173576332055, "percentage": 6.63, "elapsed_time": "0:17:08", "remaining_time": "4:01:27", "throughput": 8888.98, "total_tokens": 9145024} +{"current_steps": 13575, "total_steps": 204665, "loss": 0.2447, "lr": 1.326427908340255e-06, "epoch": 0.33163950846505263, "percentage": 6.63, "elapsed_time": "0:17:09", "remaining_time": "4:01:26", "throughput": 8889.02, "total_tokens": 9148096} +{"current_steps": 13580, "total_steps": 204665, "loss": 0.1698, "lr": 1.3269164997312747e-06, "epoch": 0.3317616592968998, "percentage": 6.64, "elapsed_time": "0:17:09", "remaining_time": "4:01:25", "throughput": 8889.17, "total_tokens": 9151296} +{"current_steps": 13585, "total_steps": 204665, "loss": 0.1941, "lr": 1.3274050911222943e-06, "epoch": 0.33188381012874696, "percentage": 6.64, "elapsed_time": "0:17:09", "remaining_time": "4:01:25", "throughput": 8889.34, "total_tokens": 9154560} +{"current_steps": 13590, "total_steps": 204665, "loss": 0.1002, "lr": 1.327893682513314e-06, "epoch": 0.33200596096059415, "percentage": 6.64, "elapsed_time": "0:17:10", "remaining_time": "4:01:24", "throughput": 8889.58, "total_tokens": 9157888} +{"current_steps": 13595, "total_steps": 204665, "loss": 0.1172, "lr": 1.3283822739043337e-06, "epoch": 0.3321281117924413, "percentage": 6.64, "elapsed_time": "0:17:10", "remaining_time": "4:01:23", "throughput": 8890.04, "total_tokens": 9161472} +{"current_steps": 13600, "total_steps": 204665, "loss": 0.0366, "lr": 1.3288708652953536e-06, "epoch": 0.3322502626242885, "percentage": 6.65, "elapsed_time": "0:17:10", "remaining_time": "4:01:22", "throughput": 8890.34, "total_tokens": 9164864} +{"current_steps": 13605, "total_steps": 204665, "loss": 0.073, "lr": 1.329359456686373e-06, "epoch": 0.33237241345613566, "percentage": 6.65, "elapsed_time": "0:17:11", "remaining_time": "4:01:22", "throughput": 8890.87, "total_tokens": 9168576} +{"current_steps": 13610, "total_steps": 204665, "loss": 0.2594, "lr": 1.3298480480773927e-06, "epoch": 0.3324945642879828, "percentage": 6.65, "elapsed_time": "0:17:11", "remaining_time": "4:01:21", "throughput": 8890.9, "total_tokens": 9171648} +{"current_steps": 13615, "total_steps": 204665, "loss": 0.2359, "lr": 1.3303366394684126e-06, "epoch": 0.33261671511983, "percentage": 6.65, "elapsed_time": "0:17:11", "remaining_time": "4:01:20", "throughput": 8891.05, "total_tokens": 9174848} +{"current_steps": 13620, "total_steps": 204665, "loss": 0.187, "lr": 1.330825230859432e-06, "epoch": 0.3327388659516771, "percentage": 6.65, "elapsed_time": "0:17:12", "remaining_time": "4:01:19", "throughput": 8891.13, "total_tokens": 9177984} +{"current_steps": 13625, "total_steps": 204665, "loss": 0.1991, "lr": 1.3313138222504518e-06, "epoch": 0.3328610167835243, "percentage": 6.66, "elapsed_time": "0:17:12", "remaining_time": "4:01:18", "throughput": 8891.47, "total_tokens": 9181440} +{"current_steps": 13630, "total_steps": 204665, "loss": 0.1258, "lr": 1.3318024136414717e-06, "epoch": 0.33298316761537144, "percentage": 6.66, "elapsed_time": "0:17:12", "remaining_time": "4:01:17", "throughput": 8891.65, "total_tokens": 9184704} +{"current_steps": 13635, "total_steps": 204665, "loss": 0.1824, "lr": 1.3322910050324914e-06, "epoch": 0.33310531844721863, "percentage": 6.66, "elapsed_time": "0:17:13", "remaining_time": "4:01:16", "throughput": 8891.8, "total_tokens": 9187904} +{"current_steps": 13640, "total_steps": 204665, "loss": 0.1674, "lr": 1.3327795964235108e-06, "epoch": 0.33322746927906577, "percentage": 6.66, "elapsed_time": "0:17:13", "remaining_time": "4:01:15", "throughput": 8891.98, "total_tokens": 9191168} +{"current_steps": 13645, "total_steps": 204665, "loss": 0.164, "lr": 1.3332681878145307e-06, "epoch": 0.33334962011091296, "percentage": 6.67, "elapsed_time": "0:17:13", "remaining_time": "4:01:15", "throughput": 8892.42, "total_tokens": 9194752} +{"current_steps": 13650, "total_steps": 204665, "loss": 0.1096, "lr": 1.3337567792055504e-06, "epoch": 0.3334717709427601, "percentage": 6.67, "elapsed_time": "0:17:14", "remaining_time": "4:01:14", "throughput": 8893.0, "total_tokens": 9198528} +{"current_steps": 13655, "total_steps": 204665, "loss": 0.0589, "lr": 1.3342453705965699e-06, "epoch": 0.3335939217746073, "percentage": 6.67, "elapsed_time": "0:17:14", "remaining_time": "4:01:13", "throughput": 8893.17, "total_tokens": 9201728} +{"current_steps": 13660, "total_steps": 204665, "loss": 0.1301, "lr": 1.3347339619875898e-06, "epoch": 0.3337160726064545, "percentage": 6.67, "elapsed_time": "0:17:15", "remaining_time": "4:01:12", "throughput": 8893.22, "total_tokens": 9204800} +{"current_steps": 13665, "total_steps": 204665, "loss": 0.129, "lr": 1.3352225533786094e-06, "epoch": 0.3338382234383016, "percentage": 6.68, "elapsed_time": "0:17:15", "remaining_time": "4:01:11", "throughput": 8893.52, "total_tokens": 9208192} +{"current_steps": 13670, "total_steps": 204665, "loss": 0.1446, "lr": 1.3357111447696291e-06, "epoch": 0.3339603742701488, "percentage": 6.68, "elapsed_time": "0:17:15", "remaining_time": "4:01:10", "throughput": 8893.63, "total_tokens": 9211328} +{"current_steps": 13675, "total_steps": 204665, "loss": 0.1586, "lr": 1.3361997361606488e-06, "epoch": 0.33408252510199593, "percentage": 6.68, "elapsed_time": "0:17:16", "remaining_time": "4:01:10", "throughput": 8893.9, "total_tokens": 9214720} +{"current_steps": 13680, "total_steps": 204665, "loss": 0.1188, "lr": 1.3366883275516685e-06, "epoch": 0.3342046759338431, "percentage": 6.68, "elapsed_time": "0:17:16", "remaining_time": "4:01:09", "throughput": 8894.01, "total_tokens": 9217856} +{"current_steps": 13685, "total_steps": 204665, "loss": 0.0507, "lr": 1.3371769189426882e-06, "epoch": 0.33432682676569025, "percentage": 6.69, "elapsed_time": "0:17:16", "remaining_time": "4:01:08", "throughput": 8894.35, "total_tokens": 9221312} +{"current_steps": 13690, "total_steps": 204665, "loss": 0.0902, "lr": 1.3376655103337079e-06, "epoch": 0.33444897759753744, "percentage": 6.69, "elapsed_time": "0:17:17", "remaining_time": "4:01:07", "throughput": 8894.79, "total_tokens": 9224896} +{"current_steps": 13695, "total_steps": 204665, "loss": 0.0699, "lr": 1.3381541017247275e-06, "epoch": 0.3345711284293846, "percentage": 6.69, "elapsed_time": "0:17:17", "remaining_time": "4:01:06", "throughput": 8894.78, "total_tokens": 9227904} +{"current_steps": 13700, "total_steps": 204665, "loss": 0.054, "lr": 1.3386426931157472e-06, "epoch": 0.33469327926123177, "percentage": 6.69, "elapsed_time": "0:17:17", "remaining_time": "4:01:05", "throughput": 8895.09, "total_tokens": 9231360} +{"current_steps": 13705, "total_steps": 204665, "loss": 0.1826, "lr": 1.339131284506767e-06, "epoch": 0.33481543009307896, "percentage": 6.7, "elapsed_time": "0:17:18", "remaining_time": "4:01:05", "throughput": 8895.63, "total_tokens": 9235072} +{"current_steps": 13710, "total_steps": 204665, "loss": 0.1498, "lr": 1.3396198758977866e-06, "epoch": 0.3349375809249261, "percentage": 6.7, "elapsed_time": "0:17:18", "remaining_time": "4:01:04", "throughput": 8895.93, "total_tokens": 9238464} +{"current_steps": 13715, "total_steps": 204665, "loss": 0.1819, "lr": 1.3401084672888063e-06, "epoch": 0.3350597317567733, "percentage": 6.7, "elapsed_time": "0:17:18", "remaining_time": "4:01:03", "throughput": 8896.27, "total_tokens": 9241920} +{"current_steps": 13720, "total_steps": 204665, "loss": 0.1203, "lr": 1.3405970586798262e-06, "epoch": 0.3351818825886204, "percentage": 6.7, "elapsed_time": "0:17:19", "remaining_time": "4:01:02", "throughput": 8896.35, "total_tokens": 9245056} +{"current_steps": 13725, "total_steps": 204665, "loss": 0.1672, "lr": 1.3410856500708456e-06, "epoch": 0.3353040334204676, "percentage": 6.71, "elapsed_time": "0:17:19", "remaining_time": "4:01:02", "throughput": 8896.95, "total_tokens": 9248832} +{"current_steps": 13730, "total_steps": 204665, "loss": 0.201, "lr": 1.3415742414618653e-06, "epoch": 0.33542618425231474, "percentage": 6.71, "elapsed_time": "0:17:19", "remaining_time": "4:01:01", "throughput": 8897.03, "total_tokens": 9251968} +{"current_steps": 13735, "total_steps": 204665, "loss": 0.1651, "lr": 1.3420628328528852e-06, "epoch": 0.33554833508416193, "percentage": 6.71, "elapsed_time": "0:17:20", "remaining_time": "4:01:00", "throughput": 8897.31, "total_tokens": 9255360} +{"current_steps": 13740, "total_steps": 204665, "loss": 0.1308, "lr": 1.3425514242439047e-06, "epoch": 0.33567048591600906, "percentage": 6.71, "elapsed_time": "0:17:20", "remaining_time": "4:00:59", "throughput": 8897.43, "total_tokens": 9258560} +{"current_steps": 13745, "total_steps": 204665, "loss": 0.2076, "lr": 1.3430400156349243e-06, "epoch": 0.33579263674785625, "percentage": 6.72, "elapsed_time": "0:17:20", "remaining_time": "4:00:58", "throughput": 8897.61, "total_tokens": 9261824} +{"current_steps": 13750, "total_steps": 204665, "loss": 0.111, "lr": 1.3435286070259442e-06, "epoch": 0.33591478757970344, "percentage": 6.72, "elapsed_time": "0:17:21", "remaining_time": "4:00:57", "throughput": 8897.8, "total_tokens": 9265088} +{"current_steps": 13755, "total_steps": 204665, "loss": 0.0729, "lr": 1.344017198416964e-06, "epoch": 0.3360369384115506, "percentage": 6.72, "elapsed_time": "0:17:21", "remaining_time": "4:00:57", "throughput": 8898.02, "total_tokens": 9268416} +{"current_steps": 13760, "total_steps": 204665, "loss": 0.1038, "lr": 1.3445057898079834e-06, "epoch": 0.33615908924339777, "percentage": 6.72, "elapsed_time": "0:17:21", "remaining_time": "4:00:56", "throughput": 8898.4, "total_tokens": 9271936} +{"current_steps": 13765, "total_steps": 204665, "loss": 0.18, "lr": 1.3449943811990033e-06, "epoch": 0.3362812400752449, "percentage": 6.73, "elapsed_time": "0:17:22", "remaining_time": "4:00:55", "throughput": 8898.71, "total_tokens": 9275392} +{"current_steps": 13770, "total_steps": 204665, "loss": 0.1319, "lr": 1.345482972590023e-06, "epoch": 0.3364033909070921, "percentage": 6.73, "elapsed_time": "0:17:22", "remaining_time": "4:00:54", "throughput": 8898.92, "total_tokens": 9278720} +{"current_steps": 13775, "total_steps": 204665, "loss": 0.1157, "lr": 1.3459715639810424e-06, "epoch": 0.3365255417389392, "percentage": 6.73, "elapsed_time": "0:17:23", "remaining_time": "4:00:53", "throughput": 8898.97, "total_tokens": 9281792} +{"current_steps": 13780, "total_steps": 204665, "loss": 0.0984, "lr": 1.3464601553720623e-06, "epoch": 0.3366476925707864, "percentage": 6.73, "elapsed_time": "0:17:23", "remaining_time": "4:00:53", "throughput": 8899.16, "total_tokens": 9285056} +{"current_steps": 13785, "total_steps": 204665, "loss": 0.0676, "lr": 1.346948746763082e-06, "epoch": 0.33676984340263355, "percentage": 6.74, "elapsed_time": "0:17:23", "remaining_time": "4:00:52", "throughput": 8899.49, "total_tokens": 9288512} +{"current_steps": 13790, "total_steps": 204665, "loss": 0.1568, "lr": 1.3474373381541017e-06, "epoch": 0.33689199423448074, "percentage": 6.74, "elapsed_time": "0:17:24", "remaining_time": "4:00:51", "throughput": 8899.44, "total_tokens": 9291456} +{"current_steps": 13795, "total_steps": 204665, "loss": 0.0388, "lr": 1.3479259295451214e-06, "epoch": 0.3370141450663279, "percentage": 6.74, "elapsed_time": "0:17:24", "remaining_time": "4:00:50", "throughput": 8899.75, "total_tokens": 9294912} +{"current_steps": 13800, "total_steps": 204665, "loss": 0.1024, "lr": 1.348414520936141e-06, "epoch": 0.33713629589817506, "percentage": 6.74, "elapsed_time": "0:17:24", "remaining_time": "4:00:49", "throughput": 8900.02, "total_tokens": 9298304} +{"current_steps": 13805, "total_steps": 204665, "loss": 0.0944, "lr": 1.3489031123271607e-06, "epoch": 0.33725844673002225, "percentage": 6.75, "elapsed_time": "0:17:25", "remaining_time": "4:00:48", "throughput": 8900.25, "total_tokens": 9301632} +{"current_steps": 13810, "total_steps": 204665, "loss": 0.1152, "lr": 1.3493917037181804e-06, "epoch": 0.3373805975618694, "percentage": 6.75, "elapsed_time": "0:17:25", "remaining_time": "4:00:48", "throughput": 8900.45, "total_tokens": 9304896} +{"current_steps": 13815, "total_steps": 204665, "loss": 0.19, "lr": 1.3498802951092e-06, "epoch": 0.3375027483937166, "percentage": 6.75, "elapsed_time": "0:17:25", "remaining_time": "4:00:47", "throughput": 8900.59, "total_tokens": 9308096} +{"current_steps": 13820, "total_steps": 204665, "loss": 0.0909, "lr": 1.3503688865002198e-06, "epoch": 0.3376248992255637, "percentage": 6.75, "elapsed_time": "0:17:26", "remaining_time": "4:00:46", "throughput": 8900.96, "total_tokens": 9311616} +{"current_steps": 13825, "total_steps": 204665, "loss": 0.2014, "lr": 1.3508574778912397e-06, "epoch": 0.3377470500574109, "percentage": 6.75, "elapsed_time": "0:17:26", "remaining_time": "4:00:45", "throughput": 8901.13, "total_tokens": 9314880} +{"current_steps": 13830, "total_steps": 204665, "loss": 0.1298, "lr": 1.3513460692822591e-06, "epoch": 0.33786920088925804, "percentage": 6.76, "elapsed_time": "0:17:26", "remaining_time": "4:00:45", "throughput": 8902.28, "total_tokens": 9319424} +{"current_steps": 13835, "total_steps": 204665, "loss": 0.1577, "lr": 1.3518346606732788e-06, "epoch": 0.3379913517211052, "percentage": 6.76, "elapsed_time": "0:17:27", "remaining_time": "4:00:44", "throughput": 8902.36, "total_tokens": 9322560} +{"current_steps": 13840, "total_steps": 204665, "loss": 0.1624, "lr": 1.3523232520642987e-06, "epoch": 0.33811350255295236, "percentage": 6.76, "elapsed_time": "0:17:27", "remaining_time": "4:00:43", "throughput": 8902.75, "total_tokens": 9326080} +{"current_steps": 13845, "total_steps": 204665, "loss": 0.2945, "lr": 1.3528118434553182e-06, "epoch": 0.33823565338479955, "percentage": 6.76, "elapsed_time": "0:17:27", "remaining_time": "4:00:42", "throughput": 8902.64, "total_tokens": 9328960} +{"current_steps": 13850, "total_steps": 204665, "loss": 0.0515, "lr": 1.3533004348463379e-06, "epoch": 0.33835780421664674, "percentage": 6.77, "elapsed_time": "0:17:28", "remaining_time": "4:00:41", "throughput": 8902.85, "total_tokens": 9332288} +{"current_steps": 13855, "total_steps": 204665, "loss": 0.0948, "lr": 1.3537890262373578e-06, "epoch": 0.3384799550484939, "percentage": 6.77, "elapsed_time": "0:17:28", "remaining_time": "4:00:40", "throughput": 8902.92, "total_tokens": 9335424} +{"current_steps": 13860, "total_steps": 204665, "loss": 0.1205, "lr": 1.3542776176283772e-06, "epoch": 0.33860210588034106, "percentage": 6.77, "elapsed_time": "0:17:28", "remaining_time": "4:00:40", "throughput": 8903.05, "total_tokens": 9338624} +{"current_steps": 13865, "total_steps": 204665, "loss": 0.0804, "lr": 1.354766209019397e-06, "epoch": 0.3387242567121882, "percentage": 6.77, "elapsed_time": "0:17:29", "remaining_time": "4:00:39", "throughput": 8903.31, "total_tokens": 9342016} +{"current_steps": 13870, "total_steps": 204665, "loss": 0.0663, "lr": 1.3552548004104168e-06, "epoch": 0.3388464075440354, "percentage": 6.78, "elapsed_time": "0:17:29", "remaining_time": "4:00:38", "throughput": 8903.59, "total_tokens": 9345408} +{"current_steps": 13875, "total_steps": 204665, "loss": 0.2014, "lr": 1.3557433918014365e-06, "epoch": 0.3389685583758825, "percentage": 6.78, "elapsed_time": "0:17:29", "remaining_time": "4:00:37", "throughput": 8903.82, "total_tokens": 9348736} +{"current_steps": 13880, "total_steps": 204665, "loss": 0.1201, "lr": 1.356231983192456e-06, "epoch": 0.3390907092077297, "percentage": 6.78, "elapsed_time": "0:17:30", "remaining_time": "4:00:36", "throughput": 8904.03, "total_tokens": 9352064} +{"current_steps": 13885, "total_steps": 204665, "loss": 0.1617, "lr": 1.3567205745834758e-06, "epoch": 0.33921286003957685, "percentage": 6.78, "elapsed_time": "0:17:30", "remaining_time": "4:00:36", "throughput": 8904.76, "total_tokens": 9356032} +{"current_steps": 13890, "total_steps": 204665, "loss": 0.0969, "lr": 1.3572091659744955e-06, "epoch": 0.33933501087142404, "percentage": 6.79, "elapsed_time": "0:17:31", "remaining_time": "4:00:35", "throughput": 8905.31, "total_tokens": 9359808} +{"current_steps": 13895, "total_steps": 204665, "loss": 0.1939, "lr": 1.357697757365515e-06, "epoch": 0.3394571617032712, "percentage": 6.79, "elapsed_time": "0:17:31", "remaining_time": "4:00:34", "throughput": 8905.35, "total_tokens": 9362880} +{"current_steps": 13900, "total_steps": 204665, "loss": 0.1736, "lr": 1.3581863487565349e-06, "epoch": 0.33957931253511836, "percentage": 6.79, "elapsed_time": "0:17:31", "remaining_time": "4:00:34", "throughput": 8905.66, "total_tokens": 9366336} +{"current_steps": 13905, "total_steps": 204665, "loss": 0.0832, "lr": 1.3586749401475546e-06, "epoch": 0.33970146336696555, "percentage": 6.79, "elapsed_time": "0:17:32", "remaining_time": "4:00:33", "throughput": 8906.32, "total_tokens": 9370240} +{"current_steps": 13910, "total_steps": 204665, "loss": 0.1163, "lr": 1.3591635315385743e-06, "epoch": 0.3398236141988127, "percentage": 6.8, "elapsed_time": "0:17:32", "remaining_time": "4:00:32", "throughput": 8906.49, "total_tokens": 9373504} +{"current_steps": 13915, "total_steps": 204665, "loss": 0.1155, "lr": 1.359652122929594e-06, "epoch": 0.3399457650306599, "percentage": 6.8, "elapsed_time": "0:17:32", "remaining_time": "4:00:31", "throughput": 8906.61, "total_tokens": 9376704} +{"current_steps": 13920, "total_steps": 204665, "loss": 0.1426, "lr": 1.3601407143206136e-06, "epoch": 0.340067915862507, "percentage": 6.8, "elapsed_time": "0:17:33", "remaining_time": "4:00:30", "throughput": 8906.88, "total_tokens": 9380096} +{"current_steps": 13925, "total_steps": 204665, "loss": 0.0888, "lr": 1.3606293057116333e-06, "epoch": 0.3401900666943542, "percentage": 6.8, "elapsed_time": "0:17:33", "remaining_time": "4:00:30", "throughput": 8907.5, "total_tokens": 9383936} +{"current_steps": 13930, "total_steps": 204665, "loss": 0.2428, "lr": 1.361117897102653e-06, "epoch": 0.34031221752620133, "percentage": 6.81, "elapsed_time": "0:17:33", "remaining_time": "4:00:29", "throughput": 8907.57, "total_tokens": 9387072} +{"current_steps": 13935, "total_steps": 204665, "loss": 0.0733, "lr": 1.3616064884936727e-06, "epoch": 0.3404343683580485, "percentage": 6.81, "elapsed_time": "0:17:34", "remaining_time": "4:00:28", "throughput": 8907.65, "total_tokens": 9390208} +{"current_steps": 13940, "total_steps": 204665, "loss": 0.2286, "lr": 1.3620950798846923e-06, "epoch": 0.34055651918989566, "percentage": 6.81, "elapsed_time": "0:17:34", "remaining_time": "4:00:27", "throughput": 8907.69, "total_tokens": 9393280} +{"current_steps": 13945, "total_steps": 204665, "loss": 0.195, "lr": 1.3625836712757122e-06, "epoch": 0.34067867002174285, "percentage": 6.81, "elapsed_time": "0:17:34", "remaining_time": "4:00:26", "throughput": 8908.15, "total_tokens": 9396928} +{"current_steps": 13950, "total_steps": 204665, "loss": 0.1635, "lr": 1.3630722626667317e-06, "epoch": 0.34080082085359004, "percentage": 6.82, "elapsed_time": "0:17:35", "remaining_time": "4:00:26", "throughput": 8908.58, "total_tokens": 9400512} +{"current_steps": 13955, "total_steps": 204665, "loss": 0.0758, "lr": 1.3635608540577514e-06, "epoch": 0.34092297168543717, "percentage": 6.82, "elapsed_time": "0:17:35", "remaining_time": "4:00:25", "throughput": 8908.91, "total_tokens": 9403968} +{"current_steps": 13960, "total_steps": 204665, "loss": 0.0502, "lr": 1.3640494454487713e-06, "epoch": 0.34104512251728436, "percentage": 6.82, "elapsed_time": "0:17:35", "remaining_time": "4:00:24", "throughput": 8908.8, "total_tokens": 9406848} +{"current_steps": 13965, "total_steps": 204665, "loss": 0.0588, "lr": 1.3645380368397907e-06, "epoch": 0.3411672733491315, "percentage": 6.82, "elapsed_time": "0:17:36", "remaining_time": "4:00:23", "throughput": 8909.13, "total_tokens": 9410304} +{"current_steps": 13970, "total_steps": 204665, "loss": 0.1204, "lr": 1.3650266282308104e-06, "epoch": 0.3412894241809787, "percentage": 6.83, "elapsed_time": "0:17:36", "remaining_time": "4:00:22", "throughput": 8909.23, "total_tokens": 9413504} +{"current_steps": 13975, "total_steps": 204665, "loss": 0.0598, "lr": 1.3655152196218303e-06, "epoch": 0.3414115750128258, "percentage": 6.83, "elapsed_time": "0:17:36", "remaining_time": "4:00:22", "throughput": 8909.23, "total_tokens": 9416512} +{"current_steps": 13980, "total_steps": 204665, "loss": 0.1496, "lr": 1.3660038110128498e-06, "epoch": 0.341533725844673, "percentage": 6.83, "elapsed_time": "0:17:37", "remaining_time": "4:00:21", "throughput": 8909.45, "total_tokens": 9419840} +{"current_steps": 13985, "total_steps": 204665, "loss": 0.1019, "lr": 1.3664924024038695e-06, "epoch": 0.34165587667652014, "percentage": 6.83, "elapsed_time": "0:17:37", "remaining_time": "4:00:20", "throughput": 8909.66, "total_tokens": 9423168} +{"current_steps": 13990, "total_steps": 204665, "loss": 0.1098, "lr": 1.3669809937948894e-06, "epoch": 0.34177802750836733, "percentage": 6.84, "elapsed_time": "0:17:37", "remaining_time": "4:00:19", "throughput": 8910.08, "total_tokens": 9426752} +{"current_steps": 13995, "total_steps": 204665, "loss": 0.1414, "lr": 1.367469585185909e-06, "epoch": 0.3419001783402145, "percentage": 6.84, "elapsed_time": "0:17:38", "remaining_time": "4:00:19", "throughput": 8910.61, "total_tokens": 9430464} +{"current_steps": 14000, "total_steps": 204665, "loss": 0.1338, "lr": 1.3679581765769285e-06, "epoch": 0.34202232917206166, "percentage": 6.84, "elapsed_time": "0:17:38", "remaining_time": "4:00:18", "throughput": 8911.58, "total_tokens": 9434752} +{"current_steps": 14005, "total_steps": 204665, "loss": 0.1397, "lr": 1.3684467679679484e-06, "epoch": 0.34214448000390885, "percentage": 6.84, "elapsed_time": "0:17:39", "remaining_time": "4:00:17", "throughput": 8911.87, "total_tokens": 9438144} +{"current_steps": 14010, "total_steps": 204665, "loss": 0.186, "lr": 1.368935359358968e-06, "epoch": 0.342266630835756, "percentage": 6.85, "elapsed_time": "0:17:39", "remaining_time": "4:00:16", "throughput": 8912.08, "total_tokens": 9441472} +{"current_steps": 14015, "total_steps": 204665, "loss": 0.1268, "lr": 1.3694239507499876e-06, "epoch": 0.34238878166760317, "percentage": 6.85, "elapsed_time": "0:17:39", "remaining_time": "4:00:15", "throughput": 8912.1, "total_tokens": 9444544} +{"current_steps": 14020, "total_steps": 204665, "loss": 0.1247, "lr": 1.3699125421410075e-06, "epoch": 0.3425109324994503, "percentage": 6.85, "elapsed_time": "0:17:40", "remaining_time": "4:00:15", "throughput": 8912.3, "total_tokens": 9447808} +{"current_steps": 14025, "total_steps": 204665, "loss": 0.2446, "lr": 1.3704011335320271e-06, "epoch": 0.3426330833312975, "percentage": 6.85, "elapsed_time": "0:17:40", "remaining_time": "4:00:14", "throughput": 8912.18, "total_tokens": 9450688} +{"current_steps": 14030, "total_steps": 204665, "loss": 0.1617, "lr": 1.3708897249230468e-06, "epoch": 0.34275523416314463, "percentage": 6.86, "elapsed_time": "0:17:40", "remaining_time": "4:00:13", "throughput": 8912.26, "total_tokens": 9453824} +{"current_steps": 14035, "total_steps": 204665, "loss": 0.1629, "lr": 1.3713783163140665e-06, "epoch": 0.3428773849949918, "percentage": 6.86, "elapsed_time": "0:17:41", "remaining_time": "4:00:12", "throughput": 8912.68, "total_tokens": 9457408} +{"current_steps": 14040, "total_steps": 204665, "loss": 0.1217, "lr": 1.3718669077050862e-06, "epoch": 0.342999535826839, "percentage": 6.86, "elapsed_time": "0:17:41", "remaining_time": "4:00:12", "throughput": 8913.38, "total_tokens": 9461376} +{"current_steps": 14045, "total_steps": 204665, "loss": 0.0887, "lr": 1.3723554990961059e-06, "epoch": 0.34312168665868614, "percentage": 6.86, "elapsed_time": "0:17:41", "remaining_time": "4:00:11", "throughput": 8913.51, "total_tokens": 9464576} +{"current_steps": 14050, "total_steps": 204665, "loss": 0.0384, "lr": 1.3728440904871255e-06, "epoch": 0.34324383749053333, "percentage": 6.86, "elapsed_time": "0:17:42", "remaining_time": "4:00:10", "throughput": 8913.64, "total_tokens": 9467776} +{"current_steps": 14055, "total_steps": 204665, "loss": 0.1598, "lr": 1.3733326818781452e-06, "epoch": 0.34336598832238047, "percentage": 6.87, "elapsed_time": "0:17:42", "remaining_time": "4:00:09", "throughput": 8913.82, "total_tokens": 9471040} +{"current_steps": 14060, "total_steps": 204665, "loss": 0.0941, "lr": 1.373821273269165e-06, "epoch": 0.34348813915422766, "percentage": 6.87, "elapsed_time": "0:17:42", "remaining_time": "4:00:08", "throughput": 8914.05, "total_tokens": 9474368} +{"current_steps": 14065, "total_steps": 204665, "loss": 0.133, "lr": 1.3743098646601848e-06, "epoch": 0.3436102899860748, "percentage": 6.87, "elapsed_time": "0:17:43", "remaining_time": "4:00:07", "throughput": 8914.22, "total_tokens": 9477632} +{"current_steps": 14070, "total_steps": 204665, "loss": 0.0703, "lr": 1.3747984560512043e-06, "epoch": 0.343732440817922, "percentage": 6.87, "elapsed_time": "0:17:43", "remaining_time": "4:00:07", "throughput": 8914.7, "total_tokens": 9481280} +{"current_steps": 14075, "total_steps": 204665, "loss": 0.1171, "lr": 1.375287047442224e-06, "epoch": 0.3438545916497691, "percentage": 6.88, "elapsed_time": "0:17:43", "remaining_time": "4:00:06", "throughput": 8914.46, "total_tokens": 9483968} +{"current_steps": 14080, "total_steps": 204665, "loss": 0.1291, "lr": 1.3757756388332438e-06, "epoch": 0.3439767424816163, "percentage": 6.88, "elapsed_time": "0:17:44", "remaining_time": "4:00:05", "throughput": 8914.51, "total_tokens": 9487040} +{"current_steps": 14085, "total_steps": 204665, "loss": 0.2305, "lr": 1.3762642302242633e-06, "epoch": 0.34409889331346344, "percentage": 6.88, "elapsed_time": "0:17:44", "remaining_time": "4:00:04", "throughput": 8915.0, "total_tokens": 9490752} +{"current_steps": 14090, "total_steps": 204665, "loss": 0.1599, "lr": 1.376752821615283e-06, "epoch": 0.34422104414531063, "percentage": 6.88, "elapsed_time": "0:17:44", "remaining_time": "4:00:03", "throughput": 8915.16, "total_tokens": 9494016} +{"current_steps": 14095, "total_steps": 204665, "loss": 0.0997, "lr": 1.3772414130063029e-06, "epoch": 0.3443431949771578, "percentage": 6.89, "elapsed_time": "0:17:45", "remaining_time": "4:00:02", "throughput": 8915.45, "total_tokens": 9497408} +{"current_steps": 14100, "total_steps": 204665, "loss": 0.0827, "lr": 1.3777300043973226e-06, "epoch": 0.34446534580900495, "percentage": 6.89, "elapsed_time": "0:17:45", "remaining_time": "4:00:02", "throughput": 8915.8, "total_tokens": 9500928} +{"current_steps": 14105, "total_steps": 204665, "loss": 0.2393, "lr": 1.378218595788342e-06, "epoch": 0.34458749664085214, "percentage": 6.89, "elapsed_time": "0:17:45", "remaining_time": "4:00:01", "throughput": 8915.94, "total_tokens": 9504128} +{"current_steps": 14110, "total_steps": 204665, "loss": 0.1463, "lr": 1.378707187179362e-06, "epoch": 0.3447096474726993, "percentage": 6.89, "elapsed_time": "0:17:46", "remaining_time": "4:00:00", "throughput": 8916.14, "total_tokens": 9507456} +{"current_steps": 14115, "total_steps": 204665, "loss": 0.0823, "lr": 1.3791957785703816e-06, "epoch": 0.34483179830454647, "percentage": 6.9, "elapsed_time": "0:17:46", "remaining_time": "3:59:59", "throughput": 8916.59, "total_tokens": 9511104} +{"current_steps": 14120, "total_steps": 204665, "loss": 0.0464, "lr": 1.379684369961401e-06, "epoch": 0.3449539491363936, "percentage": 6.9, "elapsed_time": "0:17:47", "remaining_time": "3:59:58", "throughput": 8916.59, "total_tokens": 9514112} +{"current_steps": 14125, "total_steps": 204665, "loss": 0.1753, "lr": 1.380172961352421e-06, "epoch": 0.3450760999682408, "percentage": 6.9, "elapsed_time": "0:17:47", "remaining_time": "3:59:58", "throughput": 8916.6, "total_tokens": 9517184} +{"current_steps": 14130, "total_steps": 204665, "loss": 0.0295, "lr": 1.3806615527434407e-06, "epoch": 0.3451982508000879, "percentage": 6.9, "elapsed_time": "0:17:47", "remaining_time": "3:59:57", "throughput": 8916.79, "total_tokens": 9520448} +{"current_steps": 14135, "total_steps": 204665, "loss": 0.3037, "lr": 1.3811501441344601e-06, "epoch": 0.3453204016319351, "percentage": 6.91, "elapsed_time": "0:17:48", "remaining_time": "3:59:56", "throughput": 8917.02, "total_tokens": 9523776} +{"current_steps": 14140, "total_steps": 204665, "loss": 0.1079, "lr": 1.38163873552548e-06, "epoch": 0.3454425524637823, "percentage": 6.91, "elapsed_time": "0:17:48", "remaining_time": "3:59:55", "throughput": 8917.45, "total_tokens": 9527360} +{"current_steps": 14145, "total_steps": 204665, "loss": 0.0986, "lr": 1.3821273269164997e-06, "epoch": 0.34556470329562944, "percentage": 6.91, "elapsed_time": "0:17:48", "remaining_time": "3:59:54", "throughput": 8917.57, "total_tokens": 9530560} +{"current_steps": 14150, "total_steps": 204665, "loss": 0.075, "lr": 1.3826159183075194e-06, "epoch": 0.34568685412747663, "percentage": 6.91, "elapsed_time": "0:17:49", "remaining_time": "3:59:54", "throughput": 8917.8, "total_tokens": 9533888} +{"current_steps": 14155, "total_steps": 204665, "loss": 0.1583, "lr": 1.383104509698539e-06, "epoch": 0.34580900495932376, "percentage": 6.92, "elapsed_time": "0:17:49", "remaining_time": "3:59:53", "throughput": 8918.75, "total_tokens": 9538176} +{"current_steps": 14160, "total_steps": 204665, "loss": 0.0705, "lr": 1.3835931010895587e-06, "epoch": 0.34593115579117095, "percentage": 6.92, "elapsed_time": "0:17:49", "remaining_time": "3:59:52", "throughput": 8919.14, "total_tokens": 9541760} +{"current_steps": 14165, "total_steps": 204665, "loss": 0.0922, "lr": 1.3840816924805784e-06, "epoch": 0.3460533066230181, "percentage": 6.92, "elapsed_time": "0:17:50", "remaining_time": "3:59:52", "throughput": 8919.26, "total_tokens": 9544960} +{"current_steps": 14170, "total_steps": 204665, "loss": 0.251, "lr": 1.384570283871598e-06, "epoch": 0.3461754574548653, "percentage": 6.92, "elapsed_time": "0:17:50", "remaining_time": "3:59:51", "throughput": 8919.43, "total_tokens": 9548224} +{"current_steps": 14175, "total_steps": 204665, "loss": 0.0849, "lr": 1.3850588752626178e-06, "epoch": 0.3462976082867124, "percentage": 6.93, "elapsed_time": "0:17:50", "remaining_time": "3:59:50", "throughput": 8919.99, "total_tokens": 9552000} +{"current_steps": 14180, "total_steps": 204665, "loss": 0.2377, "lr": 1.3855474666536375e-06, "epoch": 0.3464197591185596, "percentage": 6.93, "elapsed_time": "0:17:51", "remaining_time": "3:59:49", "throughput": 8920.03, "total_tokens": 9555072} +{"current_steps": 14185, "total_steps": 204665, "loss": 0.0824, "lr": 1.3860360580446574e-06, "epoch": 0.34654190995040673, "percentage": 6.93, "elapsed_time": "0:17:51", "remaining_time": "3:59:48", "throughput": 8919.92, "total_tokens": 9557952} +{"current_steps": 14190, "total_steps": 204665, "loss": 0.1271, "lr": 1.3865246494356768e-06, "epoch": 0.3466640607822539, "percentage": 6.93, "elapsed_time": "0:17:51", "remaining_time": "3:59:47", "throughput": 8919.94, "total_tokens": 9561024} +{"current_steps": 14195, "total_steps": 204665, "loss": 0.1956, "lr": 1.3870132408266965e-06, "epoch": 0.3467862116141011, "percentage": 6.94, "elapsed_time": "0:17:52", "remaining_time": "3:59:47", "throughput": 8920.46, "total_tokens": 9564736} +{"current_steps": 14200, "total_steps": 204665, "loss": 0.1167, "lr": 1.3875018322177164e-06, "epoch": 0.34690836244594825, "percentage": 6.94, "elapsed_time": "0:17:52", "remaining_time": "3:59:46", "throughput": 8920.51, "total_tokens": 9567808} +{"current_steps": 14205, "total_steps": 204665, "loss": 0.0684, "lr": 1.3879904236087359e-06, "epoch": 0.34703051327779544, "percentage": 6.94, "elapsed_time": "0:17:52", "remaining_time": "3:59:45", "throughput": 8920.54, "total_tokens": 9570880} +{"current_steps": 14210, "total_steps": 204665, "loss": 0.1585, "lr": 1.3884790149997556e-06, "epoch": 0.3471526641096426, "percentage": 6.94, "elapsed_time": "0:17:53", "remaining_time": "3:59:44", "throughput": 8921.17, "total_tokens": 9574720} +{"current_steps": 14215, "total_steps": 204665, "loss": 0.1083, "lr": 1.3889676063907754e-06, "epoch": 0.34727481494148976, "percentage": 6.95, "elapsed_time": "0:17:53", "remaining_time": "3:59:44", "throughput": 8921.78, "total_tokens": 9578560} +{"current_steps": 14220, "total_steps": 204665, "loss": 0.047, "lr": 1.3894561977817951e-06, "epoch": 0.3473969657733369, "percentage": 6.95, "elapsed_time": "0:17:53", "remaining_time": "3:59:43", "throughput": 8921.75, "total_tokens": 9581568} +{"current_steps": 14225, "total_steps": 204665, "loss": 0.0997, "lr": 1.3899447891728146e-06, "epoch": 0.3475191166051841, "percentage": 6.95, "elapsed_time": "0:17:54", "remaining_time": "3:59:42", "throughput": 8922.34, "total_tokens": 9585408} +{"current_steps": 14230, "total_steps": 204665, "loss": 0.0707, "lr": 1.3904333805638345e-06, "epoch": 0.3476412674370312, "percentage": 6.95, "elapsed_time": "0:17:54", "remaining_time": "3:59:41", "throughput": 8922.65, "total_tokens": 9588864} +{"current_steps": 14235, "total_steps": 204665, "loss": 0.1108, "lr": 1.3909219719548542e-06, "epoch": 0.3477634182688784, "percentage": 6.96, "elapsed_time": "0:17:55", "remaining_time": "3:59:41", "throughput": 8922.78, "total_tokens": 9592064} +{"current_steps": 14240, "total_steps": 204665, "loss": 0.1077, "lr": 1.3914105633458736e-06, "epoch": 0.3478855691007256, "percentage": 6.96, "elapsed_time": "0:17:55", "remaining_time": "3:59:40", "throughput": 8922.94, "total_tokens": 9595328} +{"current_steps": 14245, "total_steps": 204665, "loss": 0.2657, "lr": 1.3918991547368935e-06, "epoch": 0.34800771993257273, "percentage": 6.96, "elapsed_time": "0:17:55", "remaining_time": "3:59:39", "throughput": 8923.13, "total_tokens": 9598592} +{"current_steps": 14250, "total_steps": 204665, "loss": 0.1663, "lr": 1.3923877461279132e-06, "epoch": 0.3481298707644199, "percentage": 6.96, "elapsed_time": "0:17:56", "remaining_time": "3:59:38", "throughput": 8923.31, "total_tokens": 9601856} +{"current_steps": 14255, "total_steps": 204665, "loss": 0.0336, "lr": 1.3928763375189327e-06, "epoch": 0.34825202159626706, "percentage": 6.97, "elapsed_time": "0:17:56", "remaining_time": "3:59:37", "throughput": 8923.54, "total_tokens": 9605184} +{"current_steps": 14260, "total_steps": 204665, "loss": 0.1935, "lr": 1.3933649289099526e-06, "epoch": 0.34837417242811425, "percentage": 6.97, "elapsed_time": "0:17:56", "remaining_time": "3:59:36", "throughput": 8923.57, "total_tokens": 9608256} +{"current_steps": 14265, "total_steps": 204665, "loss": 0.1625, "lr": 1.3938535203009723e-06, "epoch": 0.3484963232599614, "percentage": 6.97, "elapsed_time": "0:17:57", "remaining_time": "3:59:36", "throughput": 8923.6, "total_tokens": 9611328} +{"current_steps": 14270, "total_steps": 204665, "loss": 0.1454, "lr": 1.394342111691992e-06, "epoch": 0.3486184740918086, "percentage": 6.97, "elapsed_time": "0:17:57", "remaining_time": "3:59:35", "throughput": 8923.63, "total_tokens": 9614400} +{"current_steps": 14275, "total_steps": 204665, "loss": 0.1765, "lr": 1.3948307030830116e-06, "epoch": 0.3487406249236557, "percentage": 6.97, "elapsed_time": "0:17:57", "remaining_time": "3:59:34", "throughput": 8924.27, "total_tokens": 9618304} +{"current_steps": 14280, "total_steps": 204665, "loss": 0.097, "lr": 1.3953192944740313e-06, "epoch": 0.3488627757555029, "percentage": 6.98, "elapsed_time": "0:17:58", "remaining_time": "3:59:33", "throughput": 8924.78, "total_tokens": 9622016} +{"current_steps": 14285, "total_steps": 204665, "loss": 0.1955, "lr": 1.395807885865051e-06, "epoch": 0.3489849265873501, "percentage": 6.98, "elapsed_time": "0:17:58", "remaining_time": "3:59:33", "throughput": 8925.0, "total_tokens": 9625344} +{"current_steps": 14290, "total_steps": 204665, "loss": 0.1924, "lr": 1.3962964772560707e-06, "epoch": 0.3491070774191972, "percentage": 6.98, "elapsed_time": "0:17:58", "remaining_time": "3:59:32", "throughput": 8925.5, "total_tokens": 9629056} +{"current_steps": 14295, "total_steps": 204665, "loss": 0.2156, "lr": 1.3967850686470903e-06, "epoch": 0.3492292282510444, "percentage": 6.98, "elapsed_time": "0:17:59", "remaining_time": "3:59:31", "throughput": 8925.72, "total_tokens": 9632384} +{"current_steps": 14300, "total_steps": 204665, "loss": 0.0258, "lr": 1.39727366003811e-06, "epoch": 0.34935137908289154, "percentage": 6.99, "elapsed_time": "0:17:59", "remaining_time": "3:59:30", "throughput": 8926.32, "total_tokens": 9636224} +{"current_steps": 14305, "total_steps": 204665, "loss": 0.0459, "lr": 1.39776225142913e-06, "epoch": 0.34947352991473873, "percentage": 6.99, "elapsed_time": "0:17:59", "remaining_time": "3:59:30", "throughput": 8926.62, "total_tokens": 9639680} +{"current_steps": 14310, "total_steps": 204665, "loss": 0.0851, "lr": 1.3982508428201494e-06, "epoch": 0.34959568074658587, "percentage": 6.99, "elapsed_time": "0:18:00", "remaining_time": "3:59:29", "throughput": 8926.69, "total_tokens": 9642816} +{"current_steps": 14315, "total_steps": 204665, "loss": 0.1033, "lr": 1.398739434211169e-06, "epoch": 0.34971783157843306, "percentage": 6.99, "elapsed_time": "0:18:00", "remaining_time": "3:59:28", "throughput": 8926.76, "total_tokens": 9645952} +{"current_steps": 14320, "total_steps": 204665, "loss": 0.1258, "lr": 1.399228025602189e-06, "epoch": 0.3498399824102802, "percentage": 7.0, "elapsed_time": "0:18:00", "remaining_time": "3:59:27", "throughput": 8926.88, "total_tokens": 9649152} +{"current_steps": 14325, "total_steps": 204665, "loss": 0.0797, "lr": 1.3997166169932084e-06, "epoch": 0.3499621332421274, "percentage": 7.0, "elapsed_time": "0:18:01", "remaining_time": "3:59:26", "throughput": 8927.13, "total_tokens": 9652544} +{"current_steps": 14330, "total_steps": 204665, "loss": 0.145, "lr": 1.4002052083842281e-06, "epoch": 0.3500842840739745, "percentage": 7.0, "elapsed_time": "0:18:01", "remaining_time": "3:59:26", "throughput": 8927.66, "total_tokens": 9656320} +{"current_steps": 14335, "total_steps": 204665, "loss": 0.1833, "lr": 1.400693799775248e-06, "epoch": 0.3502064349058217, "percentage": 7.0, "elapsed_time": "0:18:01", "remaining_time": "3:59:25", "throughput": 8927.6, "total_tokens": 9659264} +{"current_steps": 14340, "total_steps": 204665, "loss": 0.2014, "lr": 1.4011823911662677e-06, "epoch": 0.3503285857376689, "percentage": 7.01, "elapsed_time": "0:18:02", "remaining_time": "3:59:24", "throughput": 8927.76, "total_tokens": 9662528} +{"current_steps": 14345, "total_steps": 204665, "loss": 0.0795, "lr": 1.4016709825572872e-06, "epoch": 0.35045073656951603, "percentage": 7.01, "elapsed_time": "0:18:02", "remaining_time": "3:59:23", "throughput": 8928.25, "total_tokens": 9666240} +{"current_steps": 14350, "total_steps": 204665, "loss": 0.0894, "lr": 1.402159573948307e-06, "epoch": 0.3505728874013632, "percentage": 7.01, "elapsed_time": "0:18:03", "remaining_time": "3:59:23", "throughput": 8928.32, "total_tokens": 9669376} +{"current_steps": 14355, "total_steps": 204665, "loss": 0.0369, "lr": 1.4026481653393267e-06, "epoch": 0.35069503823321035, "percentage": 7.01, "elapsed_time": "0:18:03", "remaining_time": "3:59:22", "throughput": 8928.58, "total_tokens": 9672768} +{"current_steps": 14360, "total_steps": 204665, "loss": 0.1528, "lr": 1.4031367567303462e-06, "epoch": 0.35081718906505754, "percentage": 7.02, "elapsed_time": "0:18:03", "remaining_time": "3:59:21", "throughput": 8928.59, "total_tokens": 9675840} +{"current_steps": 14365, "total_steps": 204665, "loss": 0.1661, "lr": 1.403625348121366e-06, "epoch": 0.3509393398969047, "percentage": 7.02, "elapsed_time": "0:18:04", "remaining_time": "3:59:20", "throughput": 8928.68, "total_tokens": 9678976} +{"current_steps": 14370, "total_steps": 204665, "loss": 0.0757, "lr": 1.4041139395123858e-06, "epoch": 0.35106149072875187, "percentage": 7.02, "elapsed_time": "0:18:04", "remaining_time": "3:59:19", "throughput": 8929.03, "total_tokens": 9682496} +{"current_steps": 14375, "total_steps": 204665, "loss": 0.1252, "lr": 1.4046025309034055e-06, "epoch": 0.351183641560599, "percentage": 7.02, "elapsed_time": "0:18:04", "remaining_time": "3:59:19", "throughput": 8929.09, "total_tokens": 9685568} +{"current_steps": 14380, "total_steps": 204665, "loss": 0.183, "lr": 1.4050911222944251e-06, "epoch": 0.3513057923924462, "percentage": 7.03, "elapsed_time": "0:18:05", "remaining_time": "3:59:18", "throughput": 8929.62, "total_tokens": 9689344} +{"current_steps": 14385, "total_steps": 204665, "loss": 0.0644, "lr": 1.4055797136854448e-06, "epoch": 0.3514279432242934, "percentage": 7.03, "elapsed_time": "0:18:05", "remaining_time": "3:59:17", "throughput": 8929.58, "total_tokens": 9692288} +{"current_steps": 14390, "total_steps": 204665, "loss": 0.1095, "lr": 1.4060683050764645e-06, "epoch": 0.3515500940561405, "percentage": 7.03, "elapsed_time": "0:18:05", "remaining_time": "3:59:16", "throughput": 8929.9, "total_tokens": 9695744} +{"current_steps": 14395, "total_steps": 204665, "loss": 0.1922, "lr": 1.4065568964674842e-06, "epoch": 0.3516722448879877, "percentage": 7.03, "elapsed_time": "0:18:06", "remaining_time": "3:59:15", "throughput": 8929.95, "total_tokens": 9698880} +{"current_steps": 14400, "total_steps": 204665, "loss": 0.1522, "lr": 1.4070454878585039e-06, "epoch": 0.35179439571983484, "percentage": 7.04, "elapsed_time": "0:18:06", "remaining_time": "3:59:15", "throughput": 8930.29, "total_tokens": 9702400} +{"current_steps": 14405, "total_steps": 204665, "loss": 0.059, "lr": 1.4075340792495235e-06, "epoch": 0.35191654655168203, "percentage": 7.04, "elapsed_time": "0:18:06", "remaining_time": "3:59:14", "throughput": 8930.54, "total_tokens": 9705792} +{"current_steps": 14410, "total_steps": 204665, "loss": 0.0717, "lr": 1.4080226706405432e-06, "epoch": 0.35203869738352916, "percentage": 7.04, "elapsed_time": "0:18:07", "remaining_time": "3:59:13", "throughput": 8930.72, "total_tokens": 9709056} +{"current_steps": 14415, "total_steps": 204665, "loss": 0.113, "lr": 1.408511262031563e-06, "epoch": 0.35216084821537635, "percentage": 7.04, "elapsed_time": "0:18:07", "remaining_time": "3:59:12", "throughput": 8931.05, "total_tokens": 9712512} +{"current_steps": 14420, "total_steps": 204665, "loss": 0.2413, "lr": 1.4089998534225826e-06, "epoch": 0.3522829990472235, "percentage": 7.05, "elapsed_time": "0:18:07", "remaining_time": "3:59:12", "throughput": 8931.76, "total_tokens": 9716544} +{"current_steps": 14425, "total_steps": 204665, "loss": 0.126, "lr": 1.4094884448136025e-06, "epoch": 0.3524051498790707, "percentage": 7.05, "elapsed_time": "0:18:08", "remaining_time": "3:59:11", "throughput": 8932.08, "total_tokens": 9720000} +{"current_steps": 14430, "total_steps": 204665, "loss": 0.186, "lr": 1.409977036204622e-06, "epoch": 0.35252730071091787, "percentage": 7.05, "elapsed_time": "0:18:08", "remaining_time": "3:59:10", "throughput": 8932.42, "total_tokens": 9723520} +{"current_steps": 14435, "total_steps": 204665, "loss": 0.1164, "lr": 1.4104656275956416e-06, "epoch": 0.352649451542765, "percentage": 7.05, "elapsed_time": "0:18:08", "remaining_time": "3:59:10", "throughput": 8932.28, "total_tokens": 9726400} +{"current_steps": 14440, "total_steps": 204665, "loss": 0.1667, "lr": 1.4109542189866615e-06, "epoch": 0.3527716023746122, "percentage": 7.06, "elapsed_time": "0:18:09", "remaining_time": "3:59:09", "throughput": 8932.31, "total_tokens": 9729472} +{"current_steps": 14445, "total_steps": 204665, "loss": 0.0485, "lr": 1.411442810377681e-06, "epoch": 0.3528937532064593, "percentage": 7.06, "elapsed_time": "0:18:09", "remaining_time": "3:59:08", "throughput": 8932.69, "total_tokens": 9733056} +{"current_steps": 14450, "total_steps": 204665, "loss": 0.0319, "lr": 1.4119314017687007e-06, "epoch": 0.3530159040383065, "percentage": 7.06, "elapsed_time": "0:18:09", "remaining_time": "3:59:07", "throughput": 8933.42, "total_tokens": 9737088} +{"current_steps": 14455, "total_steps": 204665, "loss": 0.0899, "lr": 1.4124199931597206e-06, "epoch": 0.35313805487015365, "percentage": 7.06, "elapsed_time": "0:18:10", "remaining_time": "3:59:07", "throughput": 8933.69, "total_tokens": 9740480} +{"current_steps": 14460, "total_steps": 204665, "loss": 0.1556, "lr": 1.4129085845507403e-06, "epoch": 0.35326020570200084, "percentage": 7.07, "elapsed_time": "0:18:10", "remaining_time": "3:59:06", "throughput": 8933.85, "total_tokens": 9743744} +{"current_steps": 14465, "total_steps": 204665, "loss": 0.0558, "lr": 1.4133971759417597e-06, "epoch": 0.353382356533848, "percentage": 7.07, "elapsed_time": "0:18:10", "remaining_time": "3:59:05", "throughput": 8933.97, "total_tokens": 9746944} +{"current_steps": 14470, "total_steps": 204665, "loss": 0.0949, "lr": 1.4138857673327796e-06, "epoch": 0.35350450736569516, "percentage": 7.07, "elapsed_time": "0:18:11", "remaining_time": "3:59:04", "throughput": 8934.09, "total_tokens": 9750144} +{"current_steps": 14475, "total_steps": 204665, "loss": 0.1063, "lr": 1.4143743587237993e-06, "epoch": 0.3536266581975423, "percentage": 7.07, "elapsed_time": "0:18:11", "remaining_time": "3:59:04", "throughput": 8934.66, "total_tokens": 9753984} +{"current_steps": 14480, "total_steps": 204665, "loss": 0.1582, "lr": 1.4148629501148188e-06, "epoch": 0.3537488090293895, "percentage": 7.07, "elapsed_time": "0:18:12", "remaining_time": "3:59:03", "throughput": 8934.92, "total_tokens": 9757376} +{"current_steps": 14485, "total_steps": 204665, "loss": 0.047, "lr": 1.4153515415058387e-06, "epoch": 0.3538709598612367, "percentage": 7.08, "elapsed_time": "0:18:12", "remaining_time": "3:59:02", "throughput": 8935.06, "total_tokens": 9760576} +{"current_steps": 14490, "total_steps": 204665, "loss": 0.0452, "lr": 1.4158401328968583e-06, "epoch": 0.3539931106930838, "percentage": 7.08, "elapsed_time": "0:18:12", "remaining_time": "3:59:01", "throughput": 8934.98, "total_tokens": 9763520} +{"current_steps": 14495, "total_steps": 204665, "loss": 0.1651, "lr": 1.416328724287878e-06, "epoch": 0.354115261524931, "percentage": 7.08, "elapsed_time": "0:18:13", "remaining_time": "3:59:00", "throughput": 8935.33, "total_tokens": 9767040} +{"current_steps": 14500, "total_steps": 204665, "loss": 0.1228, "lr": 1.4168173156788977e-06, "epoch": 0.35423741235677814, "percentage": 7.08, "elapsed_time": "0:18:13", "remaining_time": "3:59:00", "throughput": 8935.54, "total_tokens": 9770368} +{"current_steps": 14505, "total_steps": 204665, "loss": 0.0829, "lr": 1.4173059070699174e-06, "epoch": 0.3543595631886253, "percentage": 7.09, "elapsed_time": "0:18:13", "remaining_time": "3:58:59", "throughput": 8935.92, "total_tokens": 9773888} +{"current_steps": 14510, "total_steps": 204665, "loss": 0.1262, "lr": 1.417794498460937e-06, "epoch": 0.35448171402047246, "percentage": 7.09, "elapsed_time": "0:18:14", "remaining_time": "3:58:58", "throughput": 8936.14, "total_tokens": 9777216} +{"current_steps": 14515, "total_steps": 204665, "loss": 0.0925, "lr": 1.4182830898519568e-06, "epoch": 0.35460386485231965, "percentage": 7.09, "elapsed_time": "0:18:14", "remaining_time": "3:58:57", "throughput": 8936.34, "total_tokens": 9780544} +{"current_steps": 14520, "total_steps": 204665, "loss": 0.1258, "lr": 1.4187716812429764e-06, "epoch": 0.3547260156841668, "percentage": 7.09, "elapsed_time": "0:18:14", "remaining_time": "3:58:56", "throughput": 8936.4, "total_tokens": 9783680} +{"current_steps": 14525, "total_steps": 204665, "loss": 0.2356, "lr": 1.4192602726339961e-06, "epoch": 0.354848166516014, "percentage": 7.1, "elapsed_time": "0:18:15", "remaining_time": "3:58:56", "throughput": 8936.52, "total_tokens": 9786880} +{"current_steps": 14530, "total_steps": 204665, "loss": 0.1121, "lr": 1.419748864025016e-06, "epoch": 0.35497031734786116, "percentage": 7.1, "elapsed_time": "0:18:15", "remaining_time": "3:58:55", "throughput": 8936.7, "total_tokens": 9790144} +{"current_steps": 14535, "total_steps": 204665, "loss": 0.1484, "lr": 1.4202374554160355e-06, "epoch": 0.3550924681797083, "percentage": 7.1, "elapsed_time": "0:18:15", "remaining_time": "3:58:54", "throughput": 8937.46, "total_tokens": 9794240} +{"current_steps": 14540, "total_steps": 204665, "loss": 0.1654, "lr": 1.4207260468070552e-06, "epoch": 0.3552146190115555, "percentage": 7.1, "elapsed_time": "0:18:16", "remaining_time": "3:58:54", "throughput": 8937.71, "total_tokens": 9797632} +{"current_steps": 14545, "total_steps": 204665, "loss": 0.0966, "lr": 1.421214638198075e-06, "epoch": 0.3553367698434026, "percentage": 7.11, "elapsed_time": "0:18:16", "remaining_time": "3:58:53", "throughput": 8938.05, "total_tokens": 9801152} +{"current_steps": 14550, "total_steps": 204665, "loss": 0.1549, "lr": 1.4217032295890945e-06, "epoch": 0.3554589206752498, "percentage": 7.11, "elapsed_time": "0:18:16", "remaining_time": "3:58:52", "throughput": 8938.15, "total_tokens": 9804352} +{"current_steps": 14555, "total_steps": 204665, "loss": 0.0237, "lr": 1.4221918209801142e-06, "epoch": 0.35558107150709695, "percentage": 7.11, "elapsed_time": "0:18:17", "remaining_time": "3:58:52", "throughput": 8938.96, "total_tokens": 9808512} +{"current_steps": 14560, "total_steps": 204665, "loss": 0.1175, "lr": 1.422680412371134e-06, "epoch": 0.35570322233894414, "percentage": 7.11, "elapsed_time": "0:18:17", "remaining_time": "3:58:51", "throughput": 8938.81, "total_tokens": 9811328} +{"current_steps": 14565, "total_steps": 204665, "loss": 0.1035, "lr": 1.4231690037621536e-06, "epoch": 0.35582537317079127, "percentage": 7.12, "elapsed_time": "0:18:17", "remaining_time": "3:58:50", "throughput": 8939.33, "total_tokens": 9815104} +{"current_steps": 14570, "total_steps": 204665, "loss": 0.1708, "lr": 1.4236575951531732e-06, "epoch": 0.35594752400263846, "percentage": 7.12, "elapsed_time": "0:18:18", "remaining_time": "3:58:49", "throughput": 8939.73, "total_tokens": 9818688} +{"current_steps": 14575, "total_steps": 204665, "loss": 0.0521, "lr": 1.4241461865441931e-06, "epoch": 0.35606967483448565, "percentage": 7.12, "elapsed_time": "0:18:18", "remaining_time": "3:58:49", "throughput": 8940.28, "total_tokens": 9822464} +{"current_steps": 14580, "total_steps": 204665, "loss": 0.182, "lr": 1.4246347779352128e-06, "epoch": 0.3561918256663328, "percentage": 7.12, "elapsed_time": "0:18:19", "remaining_time": "3:58:48", "throughput": 8940.44, "total_tokens": 9825728} +{"current_steps": 14585, "total_steps": 204665, "loss": 0.1155, "lr": 1.4251233693262323e-06, "epoch": 0.35631397649818, "percentage": 7.13, "elapsed_time": "0:18:19", "remaining_time": "3:58:47", "throughput": 8940.74, "total_tokens": 9829184} +{"current_steps": 14590, "total_steps": 204665, "loss": 0.1183, "lr": 1.4256119607172522e-06, "epoch": 0.3564361273300271, "percentage": 7.13, "elapsed_time": "0:18:19", "remaining_time": "3:58:46", "throughput": 8941.08, "total_tokens": 9832704} +{"current_steps": 14595, "total_steps": 204665, "loss": 0.2253, "lr": 1.4261005521082719e-06, "epoch": 0.3565582781618743, "percentage": 7.13, "elapsed_time": "0:18:20", "remaining_time": "3:58:46", "throughput": 8941.11, "total_tokens": 9835776} +{"current_steps": 14600, "total_steps": 204665, "loss": 0.1039, "lr": 1.4265891434992913e-06, "epoch": 0.35668042899372143, "percentage": 7.13, "elapsed_time": "0:18:20", "remaining_time": "3:58:45", "throughput": 8941.36, "total_tokens": 9839168} +{"current_steps": 14605, "total_steps": 204665, "loss": 0.1266, "lr": 1.4270777348903112e-06, "epoch": 0.3568025798255686, "percentage": 7.14, "elapsed_time": "0:18:20", "remaining_time": "3:58:44", "throughput": 8941.27, "total_tokens": 9842112} +{"current_steps": 14610, "total_steps": 204665, "loss": 0.0948, "lr": 1.427566326281331e-06, "epoch": 0.35692473065741576, "percentage": 7.14, "elapsed_time": "0:18:21", "remaining_time": "3:58:43", "throughput": 8941.89, "total_tokens": 9846016} +{"current_steps": 14615, "total_steps": 204665, "loss": 0.1952, "lr": 1.4280549176723506e-06, "epoch": 0.35704688148926295, "percentage": 7.14, "elapsed_time": "0:18:21", "remaining_time": "3:58:43", "throughput": 8942.06, "total_tokens": 9849280} +{"current_steps": 14620, "total_steps": 204665, "loss": 0.0812, "lr": 1.4285435090633703e-06, "epoch": 0.3571690323211101, "percentage": 7.14, "elapsed_time": "0:18:21", "remaining_time": "3:58:42", "throughput": 8942.02, "total_tokens": 9852224} +{"current_steps": 14625, "total_steps": 204665, "loss": 0.1073, "lr": 1.42903210045439e-06, "epoch": 0.35729118315295727, "percentage": 7.15, "elapsed_time": "0:18:22", "remaining_time": "3:58:41", "throughput": 8942.23, "total_tokens": 9855552} +{"current_steps": 14630, "total_steps": 204665, "loss": 0.1347, "lr": 1.4295206918454096e-06, "epoch": 0.35741333398480446, "percentage": 7.15, "elapsed_time": "0:18:22", "remaining_time": "3:58:40", "throughput": 8942.2, "total_tokens": 9858560} +{"current_steps": 14635, "total_steps": 204665, "loss": 0.1511, "lr": 1.4300092832364293e-06, "epoch": 0.3575354848166516, "percentage": 7.15, "elapsed_time": "0:18:22", "remaining_time": "3:58:39", "throughput": 8942.87, "total_tokens": 9862528} +{"current_steps": 14640, "total_steps": 204665, "loss": 0.1841, "lr": 1.430497874627449e-06, "epoch": 0.3576576356484988, "percentage": 7.15, "elapsed_time": "0:18:23", "remaining_time": "3:58:39", "throughput": 8943.14, "total_tokens": 9865920} +{"current_steps": 14645, "total_steps": 204665, "loss": 0.0648, "lr": 1.4309864660184687e-06, "epoch": 0.3577797864803459, "percentage": 7.16, "elapsed_time": "0:18:23", "remaining_time": "3:58:38", "throughput": 8943.49, "total_tokens": 9869440} +{"current_steps": 14650, "total_steps": 204665, "loss": 0.1556, "lr": 1.4314750574094886e-06, "epoch": 0.3579019373121931, "percentage": 7.16, "elapsed_time": "0:18:23", "remaining_time": "3:58:37", "throughput": 8943.39, "total_tokens": 9872320} +{"current_steps": 14655, "total_steps": 204665, "loss": 0.1211, "lr": 1.431963648800508e-06, "epoch": 0.35802408814404024, "percentage": 7.16, "elapsed_time": "0:18:24", "remaining_time": "3:58:36", "throughput": 8943.84, "total_tokens": 9875968} +{"current_steps": 14660, "total_steps": 204665, "loss": 0.1172, "lr": 1.4324522401915277e-06, "epoch": 0.35814623897588743, "percentage": 7.16, "elapsed_time": "0:18:24", "remaining_time": "3:58:35", "throughput": 8943.87, "total_tokens": 9879040} +{"current_steps": 14665, "total_steps": 204665, "loss": 0.091, "lr": 1.4329408315825476e-06, "epoch": 0.35826838980773457, "percentage": 7.17, "elapsed_time": "0:18:24", "remaining_time": "3:58:35", "throughput": 8944.08, "total_tokens": 9882368} +{"current_steps": 14670, "total_steps": 204665, "loss": 0.1043, "lr": 1.433429422973567e-06, "epoch": 0.35839054063958176, "percentage": 7.17, "elapsed_time": "0:18:25", "remaining_time": "3:58:34", "throughput": 8944.15, "total_tokens": 9885504} +{"current_steps": 14675, "total_steps": 204665, "loss": 0.0735, "lr": 1.4339180143645868e-06, "epoch": 0.35851269147142895, "percentage": 7.17, "elapsed_time": "0:18:25", "remaining_time": "3:58:33", "throughput": 8944.53, "total_tokens": 9889088} +{"current_steps": 14680, "total_steps": 204665, "loss": 0.0663, "lr": 1.4344066057556067e-06, "epoch": 0.3586348423032761, "percentage": 7.17, "elapsed_time": "0:18:25", "remaining_time": "3:58:32", "throughput": 8944.68, "total_tokens": 9892352} +{"current_steps": 14685, "total_steps": 204665, "loss": 0.0196, "lr": 1.4348951971466261e-06, "epoch": 0.35875699313512327, "percentage": 7.18, "elapsed_time": "0:18:26", "remaining_time": "3:58:32", "throughput": 8945.04, "total_tokens": 9895872} +{"current_steps": 14690, "total_steps": 204665, "loss": 0.0654, "lr": 1.4353837885376458e-06, "epoch": 0.3588791439669704, "percentage": 7.18, "elapsed_time": "0:18:26", "remaining_time": "3:58:31", "throughput": 8945.24, "total_tokens": 9899200} +{"current_steps": 14695, "total_steps": 204665, "loss": 0.143, "lr": 1.4358723799286657e-06, "epoch": 0.3590012947988176, "percentage": 7.18, "elapsed_time": "0:18:27", "remaining_time": "3:58:30", "throughput": 8945.85, "total_tokens": 9903104} +{"current_steps": 14700, "total_steps": 204665, "loss": 0.1784, "lr": 1.4363609713196854e-06, "epoch": 0.35912344563066473, "percentage": 7.18, "elapsed_time": "0:18:27", "remaining_time": "3:58:30", "throughput": 8945.86, "total_tokens": 9906176} +{"current_steps": 14705, "total_steps": 204665, "loss": 0.0943, "lr": 1.4368495627107049e-06, "epoch": 0.3592455964625119, "percentage": 7.18, "elapsed_time": "0:18:27", "remaining_time": "3:58:29", "throughput": 8946.07, "total_tokens": 9909504} +{"current_steps": 14710, "total_steps": 204665, "loss": 0.1623, "lr": 1.4373381541017247e-06, "epoch": 0.35936774729435905, "percentage": 7.19, "elapsed_time": "0:18:28", "remaining_time": "3:58:28", "throughput": 8946.45, "total_tokens": 9913088} +{"current_steps": 14715, "total_steps": 204665, "loss": 0.216, "lr": 1.4378267454927444e-06, "epoch": 0.35948989812620624, "percentage": 7.19, "elapsed_time": "0:18:28", "remaining_time": "3:58:27", "throughput": 8946.8, "total_tokens": 9916608} +{"current_steps": 14720, "total_steps": 204665, "loss": 0.2218, "lr": 1.438315336883764e-06, "epoch": 0.35961204895805343, "percentage": 7.19, "elapsed_time": "0:18:28", "remaining_time": "3:58:27", "throughput": 8947.01, "total_tokens": 9919936} +{"current_steps": 14725, "total_steps": 204665, "loss": 0.1147, "lr": 1.4388039282747838e-06, "epoch": 0.35973419978990057, "percentage": 7.19, "elapsed_time": "0:18:29", "remaining_time": "3:58:26", "throughput": 8946.85, "total_tokens": 9922752} +{"current_steps": 14730, "total_steps": 204665, "loss": 0.1799, "lr": 1.4392925196658035e-06, "epoch": 0.35985635062174776, "percentage": 7.2, "elapsed_time": "0:18:29", "remaining_time": "3:58:25", "throughput": 8947.09, "total_tokens": 9926144} +{"current_steps": 14735, "total_steps": 204665, "loss": 0.0715, "lr": 1.4397811110568232e-06, "epoch": 0.3599785014535949, "percentage": 7.2, "elapsed_time": "0:18:29", "remaining_time": "3:58:24", "throughput": 8947.21, "total_tokens": 9929344} +{"current_steps": 14740, "total_steps": 204665, "loss": 0.101, "lr": 1.4402697024478428e-06, "epoch": 0.3601006522854421, "percentage": 7.2, "elapsed_time": "0:18:30", "remaining_time": "3:58:23", "throughput": 8947.23, "total_tokens": 9932416} +{"current_steps": 14745, "total_steps": 204665, "loss": 0.0896, "lr": 1.4407582938388625e-06, "epoch": 0.3602228031172892, "percentage": 7.2, "elapsed_time": "0:18:30", "remaining_time": "3:58:23", "throughput": 8947.62, "total_tokens": 9936000} +{"current_steps": 14750, "total_steps": 204665, "loss": 0.0944, "lr": 1.4412468852298822e-06, "epoch": 0.3603449539491364, "percentage": 7.21, "elapsed_time": "0:18:30", "remaining_time": "3:58:22", "throughput": 8947.45, "total_tokens": 9938816} +{"current_steps": 14755, "total_steps": 204665, "loss": 0.1834, "lr": 1.4417354766209019e-06, "epoch": 0.36046710478098354, "percentage": 7.21, "elapsed_time": "0:18:31", "remaining_time": "3:58:21", "throughput": 8947.6, "total_tokens": 9942080} +{"current_steps": 14760, "total_steps": 204665, "loss": 0.0603, "lr": 1.4422240680119216e-06, "epoch": 0.36058925561283073, "percentage": 7.21, "elapsed_time": "0:18:31", "remaining_time": "3:58:20", "throughput": 8947.66, "total_tokens": 9945216} +{"current_steps": 14765, "total_steps": 204665, "loss": 0.0395, "lr": 1.4427126594029412e-06, "epoch": 0.36071140644467786, "percentage": 7.21, "elapsed_time": "0:18:31", "remaining_time": "3:58:19", "throughput": 8947.76, "total_tokens": 9948416} +{"current_steps": 14770, "total_steps": 204665, "loss": 0.174, "lr": 1.4432012507939611e-06, "epoch": 0.36083355727652505, "percentage": 7.22, "elapsed_time": "0:18:32", "remaining_time": "3:58:19", "throughput": 8948.25, "total_tokens": 9952128} +{"current_steps": 14775, "total_steps": 204665, "loss": 0.201, "lr": 1.4436898421849806e-06, "epoch": 0.36095570810837224, "percentage": 7.22, "elapsed_time": "0:18:32", "remaining_time": "3:58:18", "throughput": 8948.52, "total_tokens": 9955584} +{"current_steps": 14780, "total_steps": 204665, "loss": 0.1133, "lr": 1.4441784335760003e-06, "epoch": 0.3610778589402194, "percentage": 7.22, "elapsed_time": "0:18:32", "remaining_time": "3:58:17", "throughput": 8948.98, "total_tokens": 9959296} +{"current_steps": 14785, "total_steps": 204665, "loss": 0.0877, "lr": 1.4446670249670202e-06, "epoch": 0.36120000977206657, "percentage": 7.22, "elapsed_time": "0:18:33", "remaining_time": "3:58:17", "throughput": 8949.29, "total_tokens": 9962816} +{"current_steps": 14790, "total_steps": 204665, "loss": 0.0743, "lr": 1.4451556163580396e-06, "epoch": 0.3613221606039137, "percentage": 7.23, "elapsed_time": "0:18:33", "remaining_time": "3:58:16", "throughput": 8949.37, "total_tokens": 9965952} +{"current_steps": 14795, "total_steps": 204665, "loss": 0.1787, "lr": 1.4456442077490593e-06, "epoch": 0.3614443114357609, "percentage": 7.23, "elapsed_time": "0:18:33", "remaining_time": "3:58:15", "throughput": 8949.61, "total_tokens": 9969344} +{"current_steps": 14800, "total_steps": 204665, "loss": 0.1723, "lr": 1.4461327991400792e-06, "epoch": 0.361566462267608, "percentage": 7.23, "elapsed_time": "0:18:34", "remaining_time": "3:58:14", "throughput": 8949.81, "total_tokens": 9972672} +{"current_steps": 14805, "total_steps": 204665, "loss": 0.0897, "lr": 1.446621390531099e-06, "epoch": 0.3616886130994552, "percentage": 7.23, "elapsed_time": "0:18:34", "remaining_time": "3:58:14", "throughput": 8949.75, "total_tokens": 9975616} +{"current_steps": 14810, "total_steps": 204665, "loss": 0.1675, "lr": 1.4471099819221184e-06, "epoch": 0.36181076393130235, "percentage": 7.24, "elapsed_time": "0:18:34", "remaining_time": "3:58:13", "throughput": 8950.44, "total_tokens": 9979648} +{"current_steps": 14815, "total_steps": 204665, "loss": 0.1314, "lr": 1.4475985733131383e-06, "epoch": 0.36193291476314954, "percentage": 7.24, "elapsed_time": "0:18:35", "remaining_time": "3:58:12", "throughput": 8950.77, "total_tokens": 9983168} +{"current_steps": 14820, "total_steps": 204665, "loss": 0.1952, "lr": 1.448087164704158e-06, "epoch": 0.36205506559499673, "percentage": 7.24, "elapsed_time": "0:18:35", "remaining_time": "3:58:12", "throughput": 8951.07, "total_tokens": 9986624} +{"current_steps": 14825, "total_steps": 204665, "loss": 0.0967, "lr": 1.4485757560951774e-06, "epoch": 0.36217721642684386, "percentage": 7.24, "elapsed_time": "0:18:36", "remaining_time": "3:58:11", "throughput": 8951.36, "total_tokens": 9990080} +{"current_steps": 14830, "total_steps": 204665, "loss": 0.1674, "lr": 1.4490643474861973e-06, "epoch": 0.36229936725869105, "percentage": 7.25, "elapsed_time": "0:18:36", "remaining_time": "3:58:10", "throughput": 8951.35, "total_tokens": 9993088} +{"current_steps": 14835, "total_steps": 204665, "loss": 0.1011, "lr": 1.449552938877217e-06, "epoch": 0.3624215180905382, "percentage": 7.25, "elapsed_time": "0:18:36", "remaining_time": "3:58:09", "throughput": 8951.34, "total_tokens": 9996096} +{"current_steps": 14840, "total_steps": 204665, "loss": 0.065, "lr": 1.4500415302682365e-06, "epoch": 0.3625436689223854, "percentage": 7.25, "elapsed_time": "0:18:37", "remaining_time": "3:58:08", "throughput": 8951.77, "total_tokens": 9999744} +{"current_steps": 14845, "total_steps": 204665, "loss": 0.0536, "lr": 1.4505301216592564e-06, "epoch": 0.3626658197542325, "percentage": 7.25, "elapsed_time": "0:18:37", "remaining_time": "3:58:08", "throughput": 8951.88, "total_tokens": 10002944} +{"current_steps": 14850, "total_steps": 204665, "loss": 0.1733, "lr": 1.451018713050276e-06, "epoch": 0.3627879705860797, "percentage": 7.26, "elapsed_time": "0:18:37", "remaining_time": "3:58:07", "throughput": 8952.12, "total_tokens": 10006336} +{"current_steps": 14855, "total_steps": 204665, "loss": 0.1593, "lr": 1.4515073044412957e-06, "epoch": 0.36291012141792683, "percentage": 7.26, "elapsed_time": "0:18:38", "remaining_time": "3:58:06", "throughput": 8952.09, "total_tokens": 10009344} +{"current_steps": 14860, "total_steps": 204665, "loss": 0.1427, "lr": 1.4519958958323154e-06, "epoch": 0.363032272249774, "percentage": 7.26, "elapsed_time": "0:18:38", "remaining_time": "3:58:05", "throughput": 8952.09, "total_tokens": 10012352} +{"current_steps": 14865, "total_steps": 204665, "loss": 0.1268, "lr": 1.452484487223335e-06, "epoch": 0.36315442308162116, "percentage": 7.26, "elapsed_time": "0:18:38", "remaining_time": "3:58:04", "throughput": 8952.08, "total_tokens": 10015360} +{"current_steps": 14870, "total_steps": 204665, "loss": 0.084, "lr": 1.4529730786143548e-06, "epoch": 0.36327657391346835, "percentage": 7.27, "elapsed_time": "0:18:39", "remaining_time": "3:58:04", "throughput": 8952.76, "total_tokens": 10019392} +{"current_steps": 14875, "total_steps": 204665, "loss": 0.156, "lr": 1.4534616700053744e-06, "epoch": 0.36339872474531554, "percentage": 7.27, "elapsed_time": "0:18:39", "remaining_time": "3:58:03", "throughput": 8953.09, "total_tokens": 10022912} +{"current_steps": 14880, "total_steps": 204665, "loss": 0.059, "lr": 1.4539502613963941e-06, "epoch": 0.36352087557716267, "percentage": 7.27, "elapsed_time": "0:18:39", "remaining_time": "3:58:02", "throughput": 8953.33, "total_tokens": 10026304} +{"current_steps": 14885, "total_steps": 204665, "loss": 0.1032, "lr": 1.4544388527874138e-06, "epoch": 0.36364302640900986, "percentage": 7.27, "elapsed_time": "0:18:40", "remaining_time": "3:58:02", "throughput": 8953.48, "total_tokens": 10029568} +{"current_steps": 14890, "total_steps": 204665, "loss": 0.1649, "lr": 1.4549274441784337e-06, "epoch": 0.363765177240857, "percentage": 7.28, "elapsed_time": "0:18:40", "remaining_time": "3:58:01", "throughput": 8953.82, "total_tokens": 10033088} +{"current_steps": 14895, "total_steps": 204665, "loss": 0.0947, "lr": 1.4554160355694532e-06, "epoch": 0.3638873280727042, "percentage": 7.28, "elapsed_time": "0:18:40", "remaining_time": "3:58:00", "throughput": 8954.22, "total_tokens": 10036672} +{"current_steps": 14900, "total_steps": 204665, "loss": 0.2256, "lr": 1.4559046269604728e-06, "epoch": 0.3640094789045513, "percentage": 7.28, "elapsed_time": "0:18:41", "remaining_time": "3:57:59", "throughput": 8954.42, "total_tokens": 10040000} +{"current_steps": 14905, "total_steps": 204665, "loss": 0.0815, "lr": 1.4563932183514927e-06, "epoch": 0.3641316297363985, "percentage": 7.28, "elapsed_time": "0:18:41", "remaining_time": "3:57:59", "throughput": 8954.43, "total_tokens": 10043072} +{"current_steps": 14910, "total_steps": 204665, "loss": 0.2063, "lr": 1.4568818097425122e-06, "epoch": 0.36425378056824564, "percentage": 7.29, "elapsed_time": "0:18:41", "remaining_time": "3:57:58", "throughput": 8954.58, "total_tokens": 10046336} +{"current_steps": 14915, "total_steps": 204665, "loss": 0.1413, "lr": 1.4573704011335319e-06, "epoch": 0.36437593140009283, "percentage": 7.29, "elapsed_time": "0:18:42", "remaining_time": "3:57:57", "throughput": 8954.41, "total_tokens": 10049152} +{"current_steps": 14920, "total_steps": 204665, "loss": 0.0804, "lr": 1.4578589925245518e-06, "epoch": 0.36449808223194, "percentage": 7.29, "elapsed_time": "0:18:42", "remaining_time": "3:57:56", "throughput": 8954.43, "total_tokens": 10052224} +{"current_steps": 14925, "total_steps": 204665, "loss": 0.0736, "lr": 1.4583475839155715e-06, "epoch": 0.36462023306378716, "percentage": 7.29, "elapsed_time": "0:18:42", "remaining_time": "3:57:55", "throughput": 8954.59, "total_tokens": 10055488} +{"current_steps": 14930, "total_steps": 204665, "loss": 0.0469, "lr": 1.458836175306591e-06, "epoch": 0.36474238389563435, "percentage": 7.29, "elapsed_time": "0:18:43", "remaining_time": "3:57:55", "throughput": 8954.74, "total_tokens": 10058752} +{"current_steps": 14935, "total_steps": 204665, "loss": 0.2228, "lr": 1.4593247666976108e-06, "epoch": 0.3648645347274815, "percentage": 7.3, "elapsed_time": "0:18:43", "remaining_time": "3:57:54", "throughput": 8954.93, "total_tokens": 10062080} +{"current_steps": 14940, "total_steps": 204665, "loss": 0.1705, "lr": 1.4598133580886305e-06, "epoch": 0.36498668555932867, "percentage": 7.3, "elapsed_time": "0:18:43", "remaining_time": "3:57:53", "throughput": 8955.13, "total_tokens": 10065408} +{"current_steps": 14945, "total_steps": 204665, "loss": 0.0597, "lr": 1.46030194947965e-06, "epoch": 0.3651088363911758, "percentage": 7.3, "elapsed_time": "0:18:44", "remaining_time": "3:57:52", "throughput": 8955.16, "total_tokens": 10068480} +{"current_steps": 14950, "total_steps": 204665, "loss": 0.2122, "lr": 1.4607905408706699e-06, "epoch": 0.365230987223023, "percentage": 7.3, "elapsed_time": "0:18:44", "remaining_time": "3:57:51", "throughput": 8955.13, "total_tokens": 10071488} +{"current_steps": 14955, "total_steps": 204665, "loss": 0.1656, "lr": 1.4612791322616896e-06, "epoch": 0.36535313805487013, "percentage": 7.31, "elapsed_time": "0:18:45", "remaining_time": "3:57:51", "throughput": 8955.5, "total_tokens": 10075072} +{"current_steps": 14960, "total_steps": 204665, "loss": 0.157, "lr": 1.4617677236527092e-06, "epoch": 0.3654752888867173, "percentage": 7.31, "elapsed_time": "0:18:45", "remaining_time": "3:57:50", "throughput": 8955.72, "total_tokens": 10078400} +{"current_steps": 14965, "total_steps": 204665, "loss": 0.0806, "lr": 1.462256315043729e-06, "epoch": 0.3655974397185645, "percentage": 7.31, "elapsed_time": "0:18:45", "remaining_time": "3:57:49", "throughput": 8955.72, "total_tokens": 10081472} +{"current_steps": 14970, "total_steps": 204665, "loss": 0.1563, "lr": 1.4627449064347486e-06, "epoch": 0.36571959055041164, "percentage": 7.31, "elapsed_time": "0:18:46", "remaining_time": "3:57:48", "throughput": 8955.78, "total_tokens": 10084608} +{"current_steps": 14975, "total_steps": 204665, "loss": 0.1024, "lr": 1.4632334978257683e-06, "epoch": 0.36584174138225883, "percentage": 7.32, "elapsed_time": "0:18:46", "remaining_time": "3:57:48", "throughput": 8956.02, "total_tokens": 10088000} +{"current_steps": 14980, "total_steps": 204665, "loss": 0.185, "lr": 1.463722089216788e-06, "epoch": 0.36596389221410597, "percentage": 7.32, "elapsed_time": "0:18:46", "remaining_time": "3:57:47", "throughput": 8956.34, "total_tokens": 10091520} +{"current_steps": 14985, "total_steps": 204665, "loss": 0.1085, "lr": 1.4642106806078076e-06, "epoch": 0.36608604304595316, "percentage": 7.32, "elapsed_time": "0:18:47", "remaining_time": "3:57:46", "throughput": 8956.4, "total_tokens": 10094656} +{"current_steps": 14990, "total_steps": 204665, "loss": 0.154, "lr": 1.4646992719988273e-06, "epoch": 0.3662081938778003, "percentage": 7.32, "elapsed_time": "0:18:47", "remaining_time": "3:57:45", "throughput": 8956.46, "total_tokens": 10097792} +{"current_steps": 14995, "total_steps": 204665, "loss": 0.0851, "lr": 1.465187863389847e-06, "epoch": 0.3663303447096475, "percentage": 7.33, "elapsed_time": "0:18:47", "remaining_time": "3:57:45", "throughput": 8956.71, "total_tokens": 10101184} +{"current_steps": 15000, "total_steps": 204665, "loss": 0.1695, "lr": 1.4656764547808667e-06, "epoch": 0.3664524955414946, "percentage": 7.33, "elapsed_time": "0:18:48", "remaining_time": "3:57:44", "throughput": 8956.72, "total_tokens": 10104256} +{"current_steps": 15005, "total_steps": 204665, "loss": 0.0803, "lr": 1.4661650461718864e-06, "epoch": 0.3665746463733418, "percentage": 7.33, "elapsed_time": "0:18:48", "remaining_time": "3:57:43", "throughput": 8956.74, "total_tokens": 10107328} +{"current_steps": 15010, "total_steps": 204665, "loss": 0.0945, "lr": 1.4666536375629063e-06, "epoch": 0.36669679720518894, "percentage": 7.33, "elapsed_time": "0:18:48", "remaining_time": "3:57:42", "throughput": 8957.14, "total_tokens": 10110912} +{"current_steps": 15015, "total_steps": 204665, "loss": 0.108, "lr": 1.4671422289539257e-06, "epoch": 0.36681894803703613, "percentage": 7.34, "elapsed_time": "0:18:49", "remaining_time": "3:57:42", "throughput": 8957.3, "total_tokens": 10114176} +{"current_steps": 15020, "total_steps": 204665, "loss": 0.0921, "lr": 1.4676308203449454e-06, "epoch": 0.3669410988688833, "percentage": 7.34, "elapsed_time": "0:18:49", "remaining_time": "3:57:41", "throughput": 8957.32, "total_tokens": 10117248} +{"current_steps": 15025, "total_steps": 204665, "loss": 0.0643, "lr": 1.4681194117359653e-06, "epoch": 0.36706324970073045, "percentage": 7.34, "elapsed_time": "0:18:49", "remaining_time": "3:57:40", "throughput": 8957.66, "total_tokens": 10120768} +{"current_steps": 15030, "total_steps": 204665, "loss": 0.0781, "lr": 1.4686080031269848e-06, "epoch": 0.36718540053257764, "percentage": 7.34, "elapsed_time": "0:18:50", "remaining_time": "3:57:39", "throughput": 8957.68, "total_tokens": 10123840} +{"current_steps": 15035, "total_steps": 204665, "loss": 0.0874, "lr": 1.4690965945180045e-06, "epoch": 0.3673075513644248, "percentage": 7.35, "elapsed_time": "0:18:50", "remaining_time": "3:57:38", "throughput": 8957.83, "total_tokens": 10127104} +{"current_steps": 15040, "total_steps": 204665, "loss": 0.098, "lr": 1.4695851859090243e-06, "epoch": 0.36742970219627197, "percentage": 7.35, "elapsed_time": "0:18:50", "remaining_time": "3:57:38", "throughput": 8958.21, "total_tokens": 10130688} +{"current_steps": 15045, "total_steps": 204665, "loss": 0.1618, "lr": 1.470073777300044e-06, "epoch": 0.3675518530281191, "percentage": 7.35, "elapsed_time": "0:18:51", "remaining_time": "3:57:37", "throughput": 8958.49, "total_tokens": 10134144} +{"current_steps": 15050, "total_steps": 204665, "loss": 0.0887, "lr": 1.4705623686910635e-06, "epoch": 0.3676740038599663, "percentage": 7.35, "elapsed_time": "0:18:51", "remaining_time": "3:57:36", "throughput": 8958.6, "total_tokens": 10137344} +{"current_steps": 15055, "total_steps": 204665, "loss": 0.2011, "lr": 1.4710509600820834e-06, "epoch": 0.3677961546918134, "percentage": 7.36, "elapsed_time": "0:18:51", "remaining_time": "3:57:36", "throughput": 8958.88, "total_tokens": 10140800} +{"current_steps": 15060, "total_steps": 204665, "loss": 0.1338, "lr": 1.471539551473103e-06, "epoch": 0.3679183055236606, "percentage": 7.36, "elapsed_time": "0:18:52", "remaining_time": "3:57:35", "throughput": 8958.94, "total_tokens": 10143936} +{"current_steps": 15065, "total_steps": 204665, "loss": 0.0552, "lr": 1.4720281428641225e-06, "epoch": 0.3680404563555078, "percentage": 7.36, "elapsed_time": "0:18:52", "remaining_time": "3:57:34", "throughput": 8959.22, "total_tokens": 10147392} +{"current_steps": 15070, "total_steps": 204665, "loss": 0.1437, "lr": 1.4725167342551424e-06, "epoch": 0.36816260718735494, "percentage": 7.36, "elapsed_time": "0:18:52", "remaining_time": "3:57:33", "throughput": 8959.18, "total_tokens": 10150400} +{"current_steps": 15075, "total_steps": 204665, "loss": 0.0665, "lr": 1.4730053256461621e-06, "epoch": 0.36828475801920213, "percentage": 7.37, "elapsed_time": "0:18:53", "remaining_time": "3:57:32", "throughput": 8959.38, "total_tokens": 10153728} +{"current_steps": 15080, "total_steps": 204665, "loss": 0.064, "lr": 1.4734939170371818e-06, "epoch": 0.36840690885104926, "percentage": 7.37, "elapsed_time": "0:18:53", "remaining_time": "3:57:32", "throughput": 8959.56, "total_tokens": 10157056} +{"current_steps": 15085, "total_steps": 204665, "loss": 0.1049, "lr": 1.4739825084282015e-06, "epoch": 0.36852905968289645, "percentage": 7.37, "elapsed_time": "0:18:54", "remaining_time": "3:57:31", "throughput": 8960.27, "total_tokens": 10161152} +{"current_steps": 15090, "total_steps": 204665, "loss": 0.2415, "lr": 1.4744710998192212e-06, "epoch": 0.3686512105147436, "percentage": 7.37, "elapsed_time": "0:18:54", "remaining_time": "3:57:31", "throughput": 8960.47, "total_tokens": 10164480} +{"current_steps": 15095, "total_steps": 204665, "loss": 0.154, "lr": 1.4749596912102408e-06, "epoch": 0.3687733613465908, "percentage": 7.38, "elapsed_time": "0:18:54", "remaining_time": "3:57:30", "throughput": 8960.37, "total_tokens": 10167360} +{"current_steps": 15100, "total_steps": 204665, "loss": 0.0262, "lr": 1.4754482826012605e-06, "epoch": 0.3688955121784379, "percentage": 7.38, "elapsed_time": "0:18:55", "remaining_time": "3:57:29", "throughput": 8960.58, "total_tokens": 10170752} +{"current_steps": 15105, "total_steps": 204665, "loss": 0.1336, "lr": 1.4759368739922802e-06, "epoch": 0.3690176630102851, "percentage": 7.38, "elapsed_time": "0:18:55", "remaining_time": "3:57:28", "throughput": 8960.96, "total_tokens": 10174336} +{"current_steps": 15110, "total_steps": 204665, "loss": 0.0717, "lr": 1.4764254653832999e-06, "epoch": 0.3691398138421323, "percentage": 7.38, "elapsed_time": "0:18:55", "remaining_time": "3:57:27", "throughput": 8960.98, "total_tokens": 10177408} +{"current_steps": 15115, "total_steps": 204665, "loss": 0.2487, "lr": 1.4769140567743196e-06, "epoch": 0.3692619646739794, "percentage": 7.39, "elapsed_time": "0:18:56", "remaining_time": "3:57:27", "throughput": 8961.64, "total_tokens": 10181440} +{"current_steps": 15120, "total_steps": 204665, "loss": 0.0709, "lr": 1.4774026481653392e-06, "epoch": 0.3693841155058266, "percentage": 7.39, "elapsed_time": "0:18:56", "remaining_time": "3:57:26", "throughput": 8961.75, "total_tokens": 10184640} +{"current_steps": 15125, "total_steps": 204665, "loss": 0.049, "lr": 1.477891239556359e-06, "epoch": 0.36950626633767375, "percentage": 7.39, "elapsed_time": "0:18:56", "remaining_time": "3:57:25", "throughput": 8961.66, "total_tokens": 10187584} +{"current_steps": 15130, "total_steps": 204665, "loss": 0.1035, "lr": 1.4783798309473788e-06, "epoch": 0.36962841716952094, "percentage": 7.39, "elapsed_time": "0:18:57", "remaining_time": "3:57:25", "throughput": 8961.8, "total_tokens": 10190848} +{"current_steps": 15135, "total_steps": 204665, "loss": 0.0623, "lr": 1.4788684223383983e-06, "epoch": 0.3697505680013681, "percentage": 7.4, "elapsed_time": "0:18:57", "remaining_time": "3:57:24", "throughput": 8962.08, "total_tokens": 10194304} +{"current_steps": 15140, "total_steps": 204665, "loss": 0.1515, "lr": 1.479357013729418e-06, "epoch": 0.36987271883321526, "percentage": 7.4, "elapsed_time": "0:18:57", "remaining_time": "3:57:23", "throughput": 8962.3, "total_tokens": 10197696} +{"current_steps": 15145, "total_steps": 204665, "loss": 0.2384, "lr": 1.4798456051204379e-06, "epoch": 0.3699948696650624, "percentage": 7.4, "elapsed_time": "0:18:58", "remaining_time": "3:57:22", "throughput": 8962.2, "total_tokens": 10200576} +{"current_steps": 15150, "total_steps": 204665, "loss": 0.0572, "lr": 1.4803341965114573e-06, "epoch": 0.3701170204969096, "percentage": 7.4, "elapsed_time": "0:18:58", "remaining_time": "3:57:22", "throughput": 8962.33, "total_tokens": 10203840} +{"current_steps": 15155, "total_steps": 204665, "loss": 0.0828, "lr": 1.480822787902477e-06, "epoch": 0.3702391713287567, "percentage": 7.4, "elapsed_time": "0:18:58", "remaining_time": "3:57:21", "throughput": 8962.74, "total_tokens": 10207488} +{"current_steps": 15160, "total_steps": 204665, "loss": 0.1452, "lr": 1.481311379293497e-06, "epoch": 0.3703613221606039, "percentage": 7.41, "elapsed_time": "0:18:59", "remaining_time": "3:57:20", "throughput": 8963.07, "total_tokens": 10211008} +{"current_steps": 15165, "total_steps": 204665, "loss": 0.17, "lr": 1.4817999706845166e-06, "epoch": 0.3704834729924511, "percentage": 7.41, "elapsed_time": "0:18:59", "remaining_time": "3:57:20", "throughput": 8963.25, "total_tokens": 10214336} +{"current_steps": 15170, "total_steps": 204665, "loss": 0.1446, "lr": 1.482288562075536e-06, "epoch": 0.37060562382429824, "percentage": 7.41, "elapsed_time": "0:18:59", "remaining_time": "3:57:19", "throughput": 8963.56, "total_tokens": 10217856} +{"current_steps": 15175, "total_steps": 204665, "loss": 0.0167, "lr": 1.482777153466556e-06, "epoch": 0.3707277746561454, "percentage": 7.41, "elapsed_time": "0:19:00", "remaining_time": "3:57:18", "throughput": 8964.1, "total_tokens": 10221696} +{"current_steps": 15180, "total_steps": 204665, "loss": 0.2496, "lr": 1.4832657448575756e-06, "epoch": 0.37084992548799256, "percentage": 7.42, "elapsed_time": "0:19:00", "remaining_time": "3:57:18", "throughput": 8964.34, "total_tokens": 10225088} +{"current_steps": 15185, "total_steps": 204665, "loss": 0.0886, "lr": 1.483754336248595e-06, "epoch": 0.37097207631983975, "percentage": 7.42, "elapsed_time": "0:19:00", "remaining_time": "3:57:17", "throughput": 8964.33, "total_tokens": 10228160} +{"current_steps": 15190, "total_steps": 204665, "loss": 0.0719, "lr": 1.484242927639615e-06, "epoch": 0.3710942271516869, "percentage": 7.42, "elapsed_time": "0:19:01", "remaining_time": "3:57:16", "throughput": 8964.7, "total_tokens": 10231744} +{"current_steps": 15195, "total_steps": 204665, "loss": 0.2309, "lr": 1.4847315190306347e-06, "epoch": 0.3712163779835341, "percentage": 7.42, "elapsed_time": "0:19:01", "remaining_time": "3:57:16", "throughput": 8965.18, "total_tokens": 10235520} +{"current_steps": 15200, "total_steps": 204665, "loss": 0.1409, "lr": 1.4852201104216544e-06, "epoch": 0.3713385288153812, "percentage": 7.43, "elapsed_time": "0:19:02", "remaining_time": "3:57:15", "throughput": 8965.37, "total_tokens": 10238848} +{"current_steps": 15205, "total_steps": 204665, "loss": 0.0193, "lr": 1.485708701812674e-06, "epoch": 0.3714606796472284, "percentage": 7.43, "elapsed_time": "0:19:02", "remaining_time": "3:57:14", "throughput": 8965.64, "total_tokens": 10242304} +{"current_steps": 15210, "total_steps": 204665, "loss": 0.1968, "lr": 1.4861972932036937e-06, "epoch": 0.3715828304790756, "percentage": 7.43, "elapsed_time": "0:19:02", "remaining_time": "3:57:13", "throughput": 8965.69, "total_tokens": 10245440} +{"current_steps": 15215, "total_steps": 204665, "loss": 0.1411, "lr": 1.4866858845947134e-06, "epoch": 0.3717049813109227, "percentage": 7.43, "elapsed_time": "0:19:03", "remaining_time": "3:57:13", "throughput": 8965.84, "total_tokens": 10248768} +{"current_steps": 15220, "total_steps": 204665, "loss": 0.0839, "lr": 1.4871744759857329e-06, "epoch": 0.3718271321427699, "percentage": 7.44, "elapsed_time": "0:19:03", "remaining_time": "3:57:12", "throughput": 8966.04, "total_tokens": 10252160} +{"current_steps": 15225, "total_steps": 204665, "loss": 0.2086, "lr": 1.4876630673767528e-06, "epoch": 0.37194928297461705, "percentage": 7.44, "elapsed_time": "0:19:03", "remaining_time": "3:57:11", "throughput": 8965.98, "total_tokens": 10255168} +{"current_steps": 15230, "total_steps": 204665, "loss": 0.2026, "lr": 1.4881516587677724e-06, "epoch": 0.37207143380646424, "percentage": 7.44, "elapsed_time": "0:19:04", "remaining_time": "3:57:11", "throughput": 8966.96, "total_tokens": 10259648} +{"current_steps": 15235, "total_steps": 204665, "loss": 0.0053, "lr": 1.4886402501587923e-06, "epoch": 0.37219358463831137, "percentage": 7.44, "elapsed_time": "0:19:04", "remaining_time": "3:57:10", "throughput": 8967.48, "total_tokens": 10263424} +{"current_steps": 15240, "total_steps": 204665, "loss": 0.1144, "lr": 1.4891288415498118e-06, "epoch": 0.37231573547015856, "percentage": 7.45, "elapsed_time": "0:19:04", "remaining_time": "3:57:10", "throughput": 8967.46, "total_tokens": 10266496} +{"current_steps": 15245, "total_steps": 204665, "loss": 0.0223, "lr": 1.4896174329408315e-06, "epoch": 0.3724378863020057, "percentage": 7.45, "elapsed_time": "0:19:05", "remaining_time": "3:57:09", "throughput": 8967.85, "total_tokens": 10270144} +{"current_steps": 15250, "total_steps": 204665, "loss": 0.2106, "lr": 1.4901060243318514e-06, "epoch": 0.3725600371338529, "percentage": 7.45, "elapsed_time": "0:19:05", "remaining_time": "3:57:08", "throughput": 8968.4, "total_tokens": 10273984} +{"current_steps": 15255, "total_steps": 204665, "loss": 0.1617, "lr": 1.4905946157228709e-06, "epoch": 0.3726821879657001, "percentage": 7.45, "elapsed_time": "0:19:05", "remaining_time": "3:57:07", "throughput": 8968.44, "total_tokens": 10277056} +{"current_steps": 15260, "total_steps": 204665, "loss": 0.0294, "lr": 1.4910832071138905e-06, "epoch": 0.3728043387975472, "percentage": 7.46, "elapsed_time": "0:19:06", "remaining_time": "3:57:07", "throughput": 8968.63, "total_tokens": 10280384} +{"current_steps": 15265, "total_steps": 204665, "loss": 0.118, "lr": 1.4915717985049104e-06, "epoch": 0.3729264896293944, "percentage": 7.46, "elapsed_time": "0:19:06", "remaining_time": "3:57:06", "throughput": 8968.7, "total_tokens": 10283520} +{"current_steps": 15270, "total_steps": 204665, "loss": 0.1248, "lr": 1.49206038989593e-06, "epoch": 0.37304864046124153, "percentage": 7.46, "elapsed_time": "0:19:06", "remaining_time": "3:57:05", "throughput": 8968.83, "total_tokens": 10286784} +{"current_steps": 15275, "total_steps": 204665, "loss": 0.2098, "lr": 1.4925489812869496e-06, "epoch": 0.3731707912930887, "percentage": 7.46, "elapsed_time": "0:19:07", "remaining_time": "3:57:04", "throughput": 8968.97, "total_tokens": 10290048} +{"current_steps": 15280, "total_steps": 204665, "loss": 0.2542, "lr": 1.4930375726779695e-06, "epoch": 0.37329294212493586, "percentage": 7.47, "elapsed_time": "0:19:07", "remaining_time": "3:57:04", "throughput": 8969.09, "total_tokens": 10293312} +{"current_steps": 15285, "total_steps": 204665, "loss": 0.1726, "lr": 1.4935261640689892e-06, "epoch": 0.37341509295678305, "percentage": 7.47, "elapsed_time": "0:19:07", "remaining_time": "3:57:03", "throughput": 8969.16, "total_tokens": 10296448} +{"current_steps": 15290, "total_steps": 204665, "loss": 0.257, "lr": 1.4940147554600086e-06, "epoch": 0.3735372437886302, "percentage": 7.47, "elapsed_time": "0:19:08", "remaining_time": "3:57:02", "throughput": 8969.17, "total_tokens": 10299520} +{"current_steps": 15295, "total_steps": 204665, "loss": 0.0406, "lr": 1.4945033468510285e-06, "epoch": 0.37365939462047737, "percentage": 7.47, "elapsed_time": "0:19:08", "remaining_time": "3:57:01", "throughput": 8969.44, "total_tokens": 10302976} +{"current_steps": 15300, "total_steps": 204665, "loss": 0.1918, "lr": 1.4949919382420482e-06, "epoch": 0.3737815454523245, "percentage": 7.48, "elapsed_time": "0:19:09", "remaining_time": "3:57:01", "throughput": 8969.72, "total_tokens": 10306432} +{"current_steps": 15305, "total_steps": 204665, "loss": 0.1485, "lr": 1.4954805296330677e-06, "epoch": 0.3739036962841717, "percentage": 7.48, "elapsed_time": "0:19:09", "remaining_time": "3:57:00", "throughput": 8970.07, "total_tokens": 10310016} +{"current_steps": 15310, "total_steps": 204665, "loss": 0.1358, "lr": 1.4959691210240873e-06, "epoch": 0.3740258471160189, "percentage": 7.48, "elapsed_time": "0:19:09", "remaining_time": "3:56:59", "throughput": 8970.28, "total_tokens": 10313408} +{"current_steps": 15315, "total_steps": 204665, "loss": 0.0169, "lr": 1.4964577124151072e-06, "epoch": 0.374147997947866, "percentage": 7.48, "elapsed_time": "0:19:10", "remaining_time": "3:56:59", "throughput": 8970.63, "total_tokens": 10316992} +{"current_steps": 15320, "total_steps": 204665, "loss": 0.1957, "lr": 1.496946303806127e-06, "epoch": 0.3742701487797132, "percentage": 7.49, "elapsed_time": "0:19:10", "remaining_time": "3:56:58", "throughput": 8970.85, "total_tokens": 10320384} +{"current_steps": 15325, "total_steps": 204665, "loss": 0.1696, "lr": 1.4974348951971464e-06, "epoch": 0.37439229961156034, "percentage": 7.49, "elapsed_time": "0:19:10", "remaining_time": "3:56:57", "throughput": 8971.04, "total_tokens": 10323712} +{"current_steps": 15330, "total_steps": 204665, "loss": 0.1251, "lr": 1.4979234865881663e-06, "epoch": 0.37451445044340753, "percentage": 7.49, "elapsed_time": "0:19:11", "remaining_time": "3:56:57", "throughput": 8971.1, "total_tokens": 10326848} +{"current_steps": 15335, "total_steps": 204665, "loss": 0.1813, "lr": 1.498412077979186e-06, "epoch": 0.37463660127525467, "percentage": 7.49, "elapsed_time": "0:19:11", "remaining_time": "3:56:56", "throughput": 8971.23, "total_tokens": 10330112} +{"current_steps": 15340, "total_steps": 204665, "loss": 0.0824, "lr": 1.4989006693702054e-06, "epoch": 0.37475875210710186, "percentage": 7.5, "elapsed_time": "0:19:11", "remaining_time": "3:56:55", "throughput": 8971.6, "total_tokens": 10333696} +{"current_steps": 15345, "total_steps": 204665, "loss": 0.0672, "lr": 1.4993892607612253e-06, "epoch": 0.374880902938949, "percentage": 7.5, "elapsed_time": "0:19:12", "remaining_time": "3:56:54", "throughput": 8971.77, "total_tokens": 10337024} +{"current_steps": 15350, "total_steps": 204665, "loss": 0.066, "lr": 1.499877852152245e-06, "epoch": 0.3750030537707962, "percentage": 7.5, "elapsed_time": "0:19:12", "remaining_time": "3:56:54", "throughput": 8972.02, "total_tokens": 10340480} +{"current_steps": 15355, "total_steps": 204665, "loss": 0.2135, "lr": 1.500366443543265e-06, "epoch": 0.37512520460264337, "percentage": 7.5, "elapsed_time": "0:19:12", "remaining_time": "3:56:53", "throughput": 8972.23, "total_tokens": 10343872} +{"current_steps": 15360, "total_steps": 204665, "loss": 0.0905, "lr": 1.5008550349342844e-06, "epoch": 0.3752473554344905, "percentage": 7.5, "elapsed_time": "0:19:13", "remaining_time": "3:56:52", "throughput": 8972.41, "total_tokens": 10347200} +{"current_steps": 15365, "total_steps": 204665, "loss": 0.1545, "lr": 1.501343626325304e-06, "epoch": 0.3753695062663377, "percentage": 7.51, "elapsed_time": "0:19:13", "remaining_time": "3:56:52", "throughput": 8972.56, "total_tokens": 10350464} +{"current_steps": 15370, "total_steps": 204665, "loss": 0.1257, "lr": 1.501832217716324e-06, "epoch": 0.37549165709818483, "percentage": 7.51, "elapsed_time": "0:19:13", "remaining_time": "3:56:51", "throughput": 8972.69, "total_tokens": 10353728} +{"current_steps": 15375, "total_steps": 204665, "loss": 0.0401, "lr": 1.5023208091073434e-06, "epoch": 0.375613807930032, "percentage": 7.51, "elapsed_time": "0:19:14", "remaining_time": "3:56:50", "throughput": 8972.6, "total_tokens": 10356672} +{"current_steps": 15380, "total_steps": 204665, "loss": 0.1308, "lr": 1.502809400498363e-06, "epoch": 0.37573595876187915, "percentage": 7.51, "elapsed_time": "0:19:14", "remaining_time": "3:56:49", "throughput": 8972.64, "total_tokens": 10359808} +{"current_steps": 15385, "total_steps": 204665, "loss": 0.116, "lr": 1.503297991889383e-06, "epoch": 0.37585810959372634, "percentage": 7.52, "elapsed_time": "0:19:14", "remaining_time": "3:56:49", "throughput": 8972.82, "total_tokens": 10363136} +{"current_steps": 15390, "total_steps": 204665, "loss": 0.1752, "lr": 1.5037865832804025e-06, "epoch": 0.3759802604255735, "percentage": 7.52, "elapsed_time": "0:19:15", "remaining_time": "3:56:48", "throughput": 8973.04, "total_tokens": 10366528} +{"current_steps": 15395, "total_steps": 204665, "loss": 0.287, "lr": 1.5042751746714221e-06, "epoch": 0.37610241125742067, "percentage": 7.52, "elapsed_time": "0:19:15", "remaining_time": "3:56:47", "throughput": 8973.49, "total_tokens": 10370240} +{"current_steps": 15400, "total_steps": 204665, "loss": 0.1475, "lr": 1.5047637660624418e-06, "epoch": 0.37622456208926786, "percentage": 7.52, "elapsed_time": "0:19:16", "remaining_time": "3:56:47", "throughput": 8973.88, "total_tokens": 10373888} +{"current_steps": 15405, "total_steps": 204665, "loss": 0.1297, "lr": 1.5052523574534617e-06, "epoch": 0.376346712921115, "percentage": 7.53, "elapsed_time": "0:19:16", "remaining_time": "3:56:46", "throughput": 8974.23, "total_tokens": 10377472} +{"current_steps": 15410, "total_steps": 204665, "loss": 0.1229, "lr": 1.5057409488444812e-06, "epoch": 0.3764688637529622, "percentage": 7.53, "elapsed_time": "0:19:16", "remaining_time": "3:56:45", "throughput": 8974.33, "total_tokens": 10380672} +{"current_steps": 15415, "total_steps": 204665, "loss": 0.1197, "lr": 1.5062295402355009e-06, "epoch": 0.3765910145848093, "percentage": 7.53, "elapsed_time": "0:19:17", "remaining_time": "3:56:45", "throughput": 8974.62, "total_tokens": 10384128} +{"current_steps": 15420, "total_steps": 204665, "loss": 0.142, "lr": 1.5067181316265208e-06, "epoch": 0.3767131654166565, "percentage": 7.53, "elapsed_time": "0:19:17", "remaining_time": "3:56:44", "throughput": 8974.37, "total_tokens": 10386816} +{"current_steps": 15425, "total_steps": 204665, "loss": 0.0599, "lr": 1.5072067230175402e-06, "epoch": 0.37683531624850364, "percentage": 7.54, "elapsed_time": "0:19:17", "remaining_time": "3:56:43", "throughput": 8974.71, "total_tokens": 10390336} +{"current_steps": 15430, "total_steps": 204665, "loss": 0.1482, "lr": 1.50769531440856e-06, "epoch": 0.3769574670803508, "percentage": 7.54, "elapsed_time": "0:19:18", "remaining_time": "3:56:42", "throughput": 8975.2, "total_tokens": 10394112} +{"current_steps": 15435, "total_steps": 204665, "loss": 0.1473, "lr": 1.5081839057995798e-06, "epoch": 0.37707961791219796, "percentage": 7.54, "elapsed_time": "0:19:18", "remaining_time": "3:56:42", "throughput": 8975.77, "total_tokens": 10398016} +{"current_steps": 15440, "total_steps": 204665, "loss": 0.099, "lr": 1.5086724971905995e-06, "epoch": 0.37720176874404515, "percentage": 7.54, "elapsed_time": "0:19:18", "remaining_time": "3:56:41", "throughput": 8975.9, "total_tokens": 10401280} +{"current_steps": 15445, "total_steps": 204665, "loss": 0.1114, "lr": 1.509161088581619e-06, "epoch": 0.3773239195758923, "percentage": 7.55, "elapsed_time": "0:19:19", "remaining_time": "3:56:41", "throughput": 8976.17, "total_tokens": 10404736} +{"current_steps": 15450, "total_steps": 204665, "loss": 0.1868, "lr": 1.5096496799726388e-06, "epoch": 0.3774460704077395, "percentage": 7.55, "elapsed_time": "0:19:19", "remaining_time": "3:56:40", "throughput": 8976.33, "total_tokens": 10408064} +{"current_steps": 15455, "total_steps": 204665, "loss": 0.171, "lr": 1.5101382713636585e-06, "epoch": 0.37756822123958667, "percentage": 7.55, "elapsed_time": "0:19:19", "remaining_time": "3:56:39", "throughput": 8976.54, "total_tokens": 10411456} +{"current_steps": 15460, "total_steps": 204665, "loss": 0.048, "lr": 1.510626862754678e-06, "epoch": 0.3776903720714338, "percentage": 7.55, "elapsed_time": "0:19:20", "remaining_time": "3:56:38", "throughput": 8976.6, "total_tokens": 10414592} +{"current_steps": 15465, "total_steps": 204665, "loss": 0.1467, "lr": 1.5111154541456979e-06, "epoch": 0.377812522903281, "percentage": 7.56, "elapsed_time": "0:19:20", "remaining_time": "3:56:38", "throughput": 8976.83, "total_tokens": 10417984} +{"current_steps": 15470, "total_steps": 204665, "loss": 0.0913, "lr": 1.5116040455367176e-06, "epoch": 0.3779346737351281, "percentage": 7.56, "elapsed_time": "0:19:20", "remaining_time": "3:56:37", "throughput": 8977.19, "total_tokens": 10421568} +{"current_steps": 15475, "total_steps": 204665, "loss": 0.0812, "lr": 1.5120926369277375e-06, "epoch": 0.3780568245669753, "percentage": 7.56, "elapsed_time": "0:19:21", "remaining_time": "3:56:36", "throughput": 8977.37, "total_tokens": 10424896} +{"current_steps": 15480, "total_steps": 204665, "loss": 0.2021, "lr": 1.512581228318757e-06, "epoch": 0.37817897539882245, "percentage": 7.56, "elapsed_time": "0:19:21", "remaining_time": "3:56:36", "throughput": 8977.77, "total_tokens": 10428544} +{"current_steps": 15485, "total_steps": 204665, "loss": 0.1401, "lr": 1.5130698197097766e-06, "epoch": 0.37830112623066964, "percentage": 7.57, "elapsed_time": "0:19:21", "remaining_time": "3:56:35", "throughput": 8977.87, "total_tokens": 10431744} +{"current_steps": 15490, "total_steps": 204665, "loss": 0.0534, "lr": 1.5135584111007965e-06, "epoch": 0.37842327706251677, "percentage": 7.57, "elapsed_time": "0:19:22", "remaining_time": "3:56:34", "throughput": 8978.5, "total_tokens": 10435712} +{"current_steps": 15495, "total_steps": 204665, "loss": 0.1069, "lr": 1.514047002491816e-06, "epoch": 0.37854542789436396, "percentage": 7.57, "elapsed_time": "0:19:22", "remaining_time": "3:56:34", "throughput": 8978.67, "total_tokens": 10439040} +{"current_steps": 15500, "total_steps": 204665, "loss": 0.0527, "lr": 1.5145355938828357e-06, "epoch": 0.37866757872621115, "percentage": 7.57, "elapsed_time": "0:19:22", "remaining_time": "3:56:33", "throughput": 8978.79, "total_tokens": 10442304} +{"current_steps": 15505, "total_steps": 204665, "loss": 0.0106, "lr": 1.5150241852738553e-06, "epoch": 0.3787897295580583, "percentage": 7.58, "elapsed_time": "0:19:23", "remaining_time": "3:56:32", "throughput": 8979.0, "total_tokens": 10445696} +{"current_steps": 15510, "total_steps": 204665, "loss": 0.2094, "lr": 1.5155127766648752e-06, "epoch": 0.3789118803899055, "percentage": 7.58, "elapsed_time": "0:19:23", "remaining_time": "3:56:32", "throughput": 8979.09, "total_tokens": 10448896} +{"current_steps": 15515, "total_steps": 204665, "loss": 0.0971, "lr": 1.5160013680558947e-06, "epoch": 0.3790340312217526, "percentage": 7.58, "elapsed_time": "0:19:24", "remaining_time": "3:56:31", "throughput": 8979.33, "total_tokens": 10452352} +{"current_steps": 15520, "total_steps": 204665, "loss": 0.2383, "lr": 1.5164899594469144e-06, "epoch": 0.3791561820535998, "percentage": 7.58, "elapsed_time": "0:19:24", "remaining_time": "3:56:30", "throughput": 8979.75, "total_tokens": 10456000} +{"current_steps": 15525, "total_steps": 204665, "loss": 0.1404, "lr": 1.5169785508379343e-06, "epoch": 0.37927833288544693, "percentage": 7.59, "elapsed_time": "0:19:24", "remaining_time": "3:56:30", "throughput": 8980.34, "total_tokens": 10459968} +{"current_steps": 15530, "total_steps": 204665, "loss": 0.1199, "lr": 1.5174671422289538e-06, "epoch": 0.3794004837172941, "percentage": 7.59, "elapsed_time": "0:19:25", "remaining_time": "3:56:29", "throughput": 8980.66, "total_tokens": 10463488} +{"current_steps": 15535, "total_steps": 204665, "loss": 0.0706, "lr": 1.5179557336199734e-06, "epoch": 0.37952263454914126, "percentage": 7.59, "elapsed_time": "0:19:25", "remaining_time": "3:56:28", "throughput": 8980.99, "total_tokens": 10467072} +{"current_steps": 15540, "total_steps": 204665, "loss": 0.1237, "lr": 1.5184443250109933e-06, "epoch": 0.37964478538098845, "percentage": 7.59, "elapsed_time": "0:19:25", "remaining_time": "3:56:28", "throughput": 8981.08, "total_tokens": 10470272} +{"current_steps": 15545, "total_steps": 204665, "loss": 0.1375, "lr": 1.5189329164020128e-06, "epoch": 0.3797669362128356, "percentage": 7.6, "elapsed_time": "0:19:26", "remaining_time": "3:56:27", "throughput": 8981.48, "total_tokens": 10473920} +{"current_steps": 15550, "total_steps": 204665, "loss": 0.0761, "lr": 1.5194215077930325e-06, "epoch": 0.37988908704468277, "percentage": 7.6, "elapsed_time": "0:19:26", "remaining_time": "3:56:26", "throughput": 8981.69, "total_tokens": 10477312} +{"current_steps": 15555, "total_steps": 204665, "loss": 0.0856, "lr": 1.5199100991840524e-06, "epoch": 0.38001123787652996, "percentage": 7.6, "elapsed_time": "0:19:26", "remaining_time": "3:56:26", "throughput": 8981.8, "total_tokens": 10480576} +{"current_steps": 15560, "total_steps": 204665, "loss": 0.0892, "lr": 1.520398690575072e-06, "epoch": 0.3801333887083771, "percentage": 7.6, "elapsed_time": "0:19:27", "remaining_time": "3:56:25", "throughput": 8981.88, "total_tokens": 10483776} +{"current_steps": 15565, "total_steps": 204665, "loss": 0.1898, "lr": 1.5208872819660915e-06, "epoch": 0.3802555395402243, "percentage": 7.61, "elapsed_time": "0:19:27", "remaining_time": "3:56:24", "throughput": 8982.01, "total_tokens": 10487040} +{"current_steps": 15570, "total_steps": 204665, "loss": 0.1709, "lr": 1.5213758733571114e-06, "epoch": 0.3803776903720714, "percentage": 7.61, "elapsed_time": "0:19:27", "remaining_time": "3:56:24", "throughput": 8982.44, "total_tokens": 10490752} +{"current_steps": 15575, "total_steps": 204665, "loss": 0.1493, "lr": 1.521864464748131e-06, "epoch": 0.3804998412039186, "percentage": 7.61, "elapsed_time": "0:19:28", "remaining_time": "3:56:23", "throughput": 8982.49, "total_tokens": 10493888} +{"current_steps": 15580, "total_steps": 204665, "loss": 0.1155, "lr": 1.5223530561391506e-06, "epoch": 0.38062199203576574, "percentage": 7.61, "elapsed_time": "0:19:28", "remaining_time": "3:56:22", "throughput": 8983.04, "total_tokens": 10497728} +{"current_steps": 15585, "total_steps": 204665, "loss": 0.1956, "lr": 1.5228416475301705e-06, "epoch": 0.38074414286761293, "percentage": 7.61, "elapsed_time": "0:19:28", "remaining_time": "3:56:22", "throughput": 8983.35, "total_tokens": 10501248} +{"current_steps": 15590, "total_steps": 204665, "loss": 0.1305, "lr": 1.5233302389211901e-06, "epoch": 0.38086629369946007, "percentage": 7.62, "elapsed_time": "0:19:29", "remaining_time": "3:56:21", "throughput": 8983.54, "total_tokens": 10504576} +{"current_steps": 15595, "total_steps": 204665, "loss": 0.0638, "lr": 1.5238188303122098e-06, "epoch": 0.38098844453130726, "percentage": 7.62, "elapsed_time": "0:19:29", "remaining_time": "3:56:20", "throughput": 8983.61, "total_tokens": 10507712} +{"current_steps": 15600, "total_steps": 204665, "loss": 0.2178, "lr": 1.5243074217032295e-06, "epoch": 0.38111059536315445, "percentage": 7.62, "elapsed_time": "0:19:30", "remaining_time": "3:56:20", "throughput": 8984.15, "total_tokens": 10511552} +{"current_steps": 15605, "total_steps": 204665, "loss": 0.1734, "lr": 1.5247960130942492e-06, "epoch": 0.3812327461950016, "percentage": 7.62, "elapsed_time": "0:19:30", "remaining_time": "3:56:19", "throughput": 8984.04, "total_tokens": 10514432} +{"current_steps": 15610, "total_steps": 204665, "loss": 0.1081, "lr": 1.5252846044852689e-06, "epoch": 0.38135489702684877, "percentage": 7.63, "elapsed_time": "0:19:30", "remaining_time": "3:56:18", "throughput": 8984.14, "total_tokens": 10517632} +{"current_steps": 15615, "total_steps": 204665, "loss": 0.2098, "lr": 1.5257731958762885e-06, "epoch": 0.3814770478586959, "percentage": 7.63, "elapsed_time": "0:19:31", "remaining_time": "3:56:17", "throughput": 8984.38, "total_tokens": 10521024} +{"current_steps": 15620, "total_steps": 204665, "loss": 0.1375, "lr": 1.5262617872673082e-06, "epoch": 0.3815991986905431, "percentage": 7.63, "elapsed_time": "0:19:31", "remaining_time": "3:56:17", "throughput": 8984.73, "total_tokens": 10524608} +{"current_steps": 15625, "total_steps": 204665, "loss": 0.0546, "lr": 1.526750378658328e-06, "epoch": 0.38172134952239023, "percentage": 7.63, "elapsed_time": "0:19:31", "remaining_time": "3:56:16", "throughput": 8985.11, "total_tokens": 10528256} +{"current_steps": 15630, "total_steps": 204665, "loss": 0.1676, "lr": 1.5272389700493478e-06, "epoch": 0.3818435003542374, "percentage": 7.64, "elapsed_time": "0:19:32", "remaining_time": "3:56:15", "throughput": 8985.04, "total_tokens": 10531200} +{"current_steps": 15635, "total_steps": 204665, "loss": 0.159, "lr": 1.5277275614403673e-06, "epoch": 0.38196565118608455, "percentage": 7.64, "elapsed_time": "0:19:32", "remaining_time": "3:56:14", "throughput": 8985.31, "total_tokens": 10534656} +{"current_steps": 15640, "total_steps": 204665, "loss": 0.079, "lr": 1.528216152831387e-06, "epoch": 0.38208780201793174, "percentage": 7.64, "elapsed_time": "0:19:32", "remaining_time": "3:56:14", "throughput": 8985.4, "total_tokens": 10537920} +{"current_steps": 15645, "total_steps": 204665, "loss": 0.1409, "lr": 1.5287047442224068e-06, "epoch": 0.38220995284977893, "percentage": 7.64, "elapsed_time": "0:19:33", "remaining_time": "3:56:13", "throughput": 8985.76, "total_tokens": 10541504} +{"current_steps": 15650, "total_steps": 204665, "loss": 0.1738, "lr": 1.5291933356134263e-06, "epoch": 0.38233210368162607, "percentage": 7.65, "elapsed_time": "0:19:33", "remaining_time": "3:56:12", "throughput": 8985.66, "total_tokens": 10544384} +{"current_steps": 15655, "total_steps": 204665, "loss": 0.1478, "lr": 1.529681927004446e-06, "epoch": 0.38245425451347326, "percentage": 7.65, "elapsed_time": "0:19:33", "remaining_time": "3:56:12", "throughput": 8985.83, "total_tokens": 10547712} +{"current_steps": 15660, "total_steps": 204665, "loss": 0.2437, "lr": 1.5301705183954659e-06, "epoch": 0.3825764053453204, "percentage": 7.65, "elapsed_time": "0:19:34", "remaining_time": "3:56:11", "throughput": 8985.84, "total_tokens": 10550784} +{"current_steps": 15665, "total_steps": 204665, "loss": 0.0959, "lr": 1.5306591097864856e-06, "epoch": 0.3826985561771676, "percentage": 7.65, "elapsed_time": "0:19:34", "remaining_time": "3:56:10", "throughput": 8986.15, "total_tokens": 10554304} +{"current_steps": 15670, "total_steps": 204665, "loss": 0.1287, "lr": 1.531147701177505e-06, "epoch": 0.3828207070090147, "percentage": 7.66, "elapsed_time": "0:19:34", "remaining_time": "3:56:09", "throughput": 8986.43, "total_tokens": 10557760} +{"current_steps": 15675, "total_steps": 204665, "loss": 0.1185, "lr": 1.531636292568525e-06, "epoch": 0.3829428578408619, "percentage": 7.66, "elapsed_time": "0:19:35", "remaining_time": "3:56:09", "throughput": 8986.66, "total_tokens": 10561152} +{"current_steps": 15680, "total_steps": 204665, "loss": 0.1117, "lr": 1.5321248839595446e-06, "epoch": 0.38306500867270904, "percentage": 7.66, "elapsed_time": "0:19:35", "remaining_time": "3:56:08", "throughput": 8986.87, "total_tokens": 10564544} +{"current_steps": 15685, "total_steps": 204665, "loss": 0.1692, "lr": 1.532613475350564e-06, "epoch": 0.38318715950455623, "percentage": 7.66, "elapsed_time": "0:19:35", "remaining_time": "3:56:07", "throughput": 8986.84, "total_tokens": 10567552} +{"current_steps": 15690, "total_steps": 204665, "loss": 0.1682, "lr": 1.533102066741584e-06, "epoch": 0.38330931033640336, "percentage": 7.67, "elapsed_time": "0:19:36", "remaining_time": "3:56:06", "throughput": 8986.85, "total_tokens": 10570624} +{"current_steps": 15695, "total_steps": 204665, "loss": 0.0318, "lr": 1.5335906581326037e-06, "epoch": 0.38343146116825055, "percentage": 7.67, "elapsed_time": "0:19:36", "remaining_time": "3:56:06", "throughput": 8987.02, "total_tokens": 10573952} +{"current_steps": 15700, "total_steps": 204665, "loss": 0.0905, "lr": 1.5340792495236231e-06, "epoch": 0.38355361200009774, "percentage": 7.67, "elapsed_time": "0:19:36", "remaining_time": "3:56:05", "throughput": 8987.24, "total_tokens": 10577344} +{"current_steps": 15705, "total_steps": 204665, "loss": 0.0864, "lr": 1.534567840914643e-06, "epoch": 0.3836757628319449, "percentage": 7.67, "elapsed_time": "0:19:37", "remaining_time": "3:56:04", "throughput": 8987.31, "total_tokens": 10580480} +{"current_steps": 15710, "total_steps": 204665, "loss": 0.2148, "lr": 1.5350564323056627e-06, "epoch": 0.38379791366379207, "percentage": 7.68, "elapsed_time": "0:19:37", "remaining_time": "3:56:04", "throughput": 8987.68, "total_tokens": 10584064} +{"current_steps": 15715, "total_steps": 204665, "loss": 0.1423, "lr": 1.5355450236966824e-06, "epoch": 0.3839200644956392, "percentage": 7.68, "elapsed_time": "0:19:37", "remaining_time": "3:56:03", "throughput": 8988.12, "total_tokens": 10587776} +{"current_steps": 15720, "total_steps": 204665, "loss": 0.1419, "lr": 1.536033615087702e-06, "epoch": 0.3840422153274864, "percentage": 7.68, "elapsed_time": "0:19:38", "remaining_time": "3:56:02", "throughput": 8988.42, "total_tokens": 10591296} +{"current_steps": 15725, "total_steps": 204665, "loss": 0.1796, "lr": 1.5365222064787217e-06, "epoch": 0.3841643661593335, "percentage": 7.68, "elapsed_time": "0:19:38", "remaining_time": "3:56:02", "throughput": 8988.39, "total_tokens": 10594368} +{"current_steps": 15730, "total_steps": 204665, "loss": 0.1257, "lr": 1.5370107978697414e-06, "epoch": 0.3842865169911807, "percentage": 7.69, "elapsed_time": "0:19:39", "remaining_time": "3:56:01", "throughput": 8988.34, "total_tokens": 10597376} +{"current_steps": 15735, "total_steps": 204665, "loss": 0.1822, "lr": 1.5374993892607611e-06, "epoch": 0.38440866782302785, "percentage": 7.69, "elapsed_time": "0:19:39", "remaining_time": "3:56:00", "throughput": 8988.53, "total_tokens": 10600704} +{"current_steps": 15740, "total_steps": 204665, "loss": 0.2058, "lr": 1.5379879806517808e-06, "epoch": 0.38453081865487504, "percentage": 7.69, "elapsed_time": "0:19:39", "remaining_time": "3:55:59", "throughput": 8988.93, "total_tokens": 10604352} +{"current_steps": 15745, "total_steps": 204665, "loss": 0.0936, "lr": 1.5384765720428005e-06, "epoch": 0.38465296948672223, "percentage": 7.69, "elapsed_time": "0:19:40", "remaining_time": "3:55:59", "throughput": 8989.1, "total_tokens": 10607680} +{"current_steps": 15750, "total_steps": 204665, "loss": 0.1499, "lr": 1.5389651634338204e-06, "epoch": 0.38477512031856936, "percentage": 7.7, "elapsed_time": "0:19:40", "remaining_time": "3:55:58", "throughput": 8989.34, "total_tokens": 10611072} +{"current_steps": 15755, "total_steps": 204665, "loss": 0.1205, "lr": 1.5394537548248398e-06, "epoch": 0.38489727115041655, "percentage": 7.7, "elapsed_time": "0:19:40", "remaining_time": "3:55:57", "throughput": 8989.41, "total_tokens": 10614272} +{"current_steps": 15760, "total_steps": 204665, "loss": 0.0851, "lr": 1.5399423462158595e-06, "epoch": 0.3850194219822637, "percentage": 7.7, "elapsed_time": "0:19:41", "remaining_time": "3:55:56", "throughput": 8989.24, "total_tokens": 10617088} +{"current_steps": 15765, "total_steps": 204665, "loss": 0.1307, "lr": 1.5404309376068794e-06, "epoch": 0.3851415728141109, "percentage": 7.7, "elapsed_time": "0:19:41", "remaining_time": "3:55:56", "throughput": 8989.07, "total_tokens": 10619904} +{"current_steps": 15770, "total_steps": 204665, "loss": 0.1781, "lr": 1.5409195289978989e-06, "epoch": 0.385263723645958, "percentage": 7.71, "elapsed_time": "0:19:41", "remaining_time": "3:55:55", "throughput": 8989.42, "total_tokens": 10623488} +{"current_steps": 15775, "total_steps": 204665, "loss": 0.1018, "lr": 1.5414081203889186e-06, "epoch": 0.3853858744778052, "percentage": 7.71, "elapsed_time": "0:19:42", "remaining_time": "3:55:54", "throughput": 8989.61, "total_tokens": 10626880} +{"current_steps": 15780, "total_steps": 204665, "loss": 0.1506, "lr": 1.5418967117799385e-06, "epoch": 0.38550802530965234, "percentage": 7.71, "elapsed_time": "0:19:42", "remaining_time": "3:55:54", "throughput": 8989.78, "total_tokens": 10630208} +{"current_steps": 15785, "total_steps": 204665, "loss": 0.043, "lr": 1.5423853031709581e-06, "epoch": 0.3856301761414995, "percentage": 7.71, "elapsed_time": "0:19:42", "remaining_time": "3:55:53", "throughput": 8989.73, "total_tokens": 10633216} +{"current_steps": 15790, "total_steps": 204665, "loss": 0.1396, "lr": 1.5428738945619776e-06, "epoch": 0.3857523269733467, "percentage": 7.72, "elapsed_time": "0:19:43", "remaining_time": "3:55:52", "throughput": 8989.99, "total_tokens": 10636672} +{"current_steps": 15795, "total_steps": 204665, "loss": 0.1323, "lr": 1.5433624859529975e-06, "epoch": 0.38587447780519385, "percentage": 7.72, "elapsed_time": "0:19:43", "remaining_time": "3:55:51", "throughput": 8990.11, "total_tokens": 10639936} +{"current_steps": 15800, "total_steps": 204665, "loss": 0.1178, "lr": 1.5438510773440172e-06, "epoch": 0.38599662863704104, "percentage": 7.72, "elapsed_time": "0:19:43", "remaining_time": "3:55:51", "throughput": 8990.18, "total_tokens": 10643136} +{"current_steps": 15805, "total_steps": 204665, "loss": 0.086, "lr": 1.5443396687350366e-06, "epoch": 0.3861187794688882, "percentage": 7.72, "elapsed_time": "0:19:44", "remaining_time": "3:55:50", "throughput": 8990.38, "total_tokens": 10646528} +{"current_steps": 15810, "total_steps": 204665, "loss": 0.0228, "lr": 1.5448282601260565e-06, "epoch": 0.38624093030073536, "percentage": 7.72, "elapsed_time": "0:19:44", "remaining_time": "3:55:49", "throughput": 8990.63, "total_tokens": 10649984} +{"current_steps": 15815, "total_steps": 204665, "loss": 0.0876, "lr": 1.5453168515170762e-06, "epoch": 0.3863630811325825, "percentage": 7.73, "elapsed_time": "0:19:44", "remaining_time": "3:55:49", "throughput": 8990.68, "total_tokens": 10653120} +{"current_steps": 15820, "total_steps": 204665, "loss": 0.1813, "lr": 1.5458054429080957e-06, "epoch": 0.3864852319644297, "percentage": 7.73, "elapsed_time": "0:19:45", "remaining_time": "3:55:48", "throughput": 8990.7, "total_tokens": 10656256} +{"current_steps": 15825, "total_steps": 204665, "loss": 0.1512, "lr": 1.5462940342991156e-06, "epoch": 0.3866073827962768, "percentage": 7.73, "elapsed_time": "0:19:45", "remaining_time": "3:55:47", "throughput": 8990.7, "total_tokens": 10659328} +{"current_steps": 15830, "total_steps": 204665, "loss": 0.0881, "lr": 1.5467826256901353e-06, "epoch": 0.386729533628124, "percentage": 7.73, "elapsed_time": "0:19:45", "remaining_time": "3:55:47", "throughput": 8991.02, "total_tokens": 10662848} +{"current_steps": 15835, "total_steps": 204665, "loss": 0.3, "lr": 1.547271217081155e-06, "epoch": 0.38685168445997115, "percentage": 7.74, "elapsed_time": "0:19:46", "remaining_time": "3:55:46", "throughput": 8991.16, "total_tokens": 10666112} +{"current_steps": 15840, "total_steps": 204665, "loss": 0.1846, "lr": 1.5477598084721746e-06, "epoch": 0.38697383529181834, "percentage": 7.74, "elapsed_time": "0:19:46", "remaining_time": "3:55:45", "throughput": 8991.14, "total_tokens": 10669184} +{"current_steps": 15845, "total_steps": 204665, "loss": 0.0803, "lr": 1.5482483998631943e-06, "epoch": 0.3870959861236655, "percentage": 7.74, "elapsed_time": "0:19:46", "remaining_time": "3:55:44", "throughput": 8991.29, "total_tokens": 10672512} +{"current_steps": 15850, "total_steps": 204665, "loss": 0.1346, "lr": 1.548736991254214e-06, "epoch": 0.38721813695551266, "percentage": 7.74, "elapsed_time": "0:19:47", "remaining_time": "3:55:44", "throughput": 8991.18, "total_tokens": 10675392} +{"current_steps": 15855, "total_steps": 204665, "loss": 0.1351, "lr": 1.5492255826452337e-06, "epoch": 0.38734028778735985, "percentage": 7.75, "elapsed_time": "0:19:47", "remaining_time": "3:55:43", "throughput": 8991.34, "total_tokens": 10678720} +{"current_steps": 15860, "total_steps": 204665, "loss": 0.0553, "lr": 1.5497141740362534e-06, "epoch": 0.387462438619207, "percentage": 7.75, "elapsed_time": "0:19:48", "remaining_time": "3:55:42", "throughput": 8991.6, "total_tokens": 10682176} +{"current_steps": 15865, "total_steps": 204665, "loss": 0.0425, "lr": 1.550202765427273e-06, "epoch": 0.3875845894510542, "percentage": 7.75, "elapsed_time": "0:19:48", "remaining_time": "3:55:42", "throughput": 8991.92, "total_tokens": 10685696} +{"current_steps": 15870, "total_steps": 204665, "loss": 0.0721, "lr": 1.550691356818293e-06, "epoch": 0.3877067402829013, "percentage": 7.75, "elapsed_time": "0:19:48", "remaining_time": "3:55:41", "throughput": 8992.22, "total_tokens": 10689216} +{"current_steps": 15875, "total_steps": 204665, "loss": 0.1571, "lr": 1.5511799482093124e-06, "epoch": 0.3878288911147485, "percentage": 7.76, "elapsed_time": "0:19:49", "remaining_time": "3:55:40", "throughput": 8992.23, "total_tokens": 10692288} +{"current_steps": 15880, "total_steps": 204665, "loss": 0.0994, "lr": 1.551668539600332e-06, "epoch": 0.38795104194659563, "percentage": 7.76, "elapsed_time": "0:19:49", "remaining_time": "3:55:39", "throughput": 8992.45, "total_tokens": 10695680} +{"current_steps": 15885, "total_steps": 204665, "loss": 0.2294, "lr": 1.552157130991352e-06, "epoch": 0.3880731927784428, "percentage": 7.76, "elapsed_time": "0:19:49", "remaining_time": "3:55:39", "throughput": 8992.4, "total_tokens": 10698624} +{"current_steps": 15890, "total_steps": 204665, "loss": 0.2457, "lr": 1.5526457223823714e-06, "epoch": 0.38819534361029, "percentage": 7.76, "elapsed_time": "0:19:50", "remaining_time": "3:55:38", "throughput": 8992.72, "total_tokens": 10702208} +{"current_steps": 15895, "total_steps": 204665, "loss": 0.0488, "lr": 1.5531343137733911e-06, "epoch": 0.38831749444213715, "percentage": 7.77, "elapsed_time": "0:19:50", "remaining_time": "3:55:37", "throughput": 8992.68, "total_tokens": 10705216} +{"current_steps": 15900, "total_steps": 204665, "loss": 0.081, "lr": 1.553622905164411e-06, "epoch": 0.38843964527398434, "percentage": 7.77, "elapsed_time": "0:19:50", "remaining_time": "3:55:36", "throughput": 8992.61, "total_tokens": 10708160} +{"current_steps": 15905, "total_steps": 204665, "loss": 0.1093, "lr": 1.5541114965554307e-06, "epoch": 0.38856179610583147, "percentage": 7.77, "elapsed_time": "0:19:51", "remaining_time": "3:55:36", "throughput": 8993.37, "total_tokens": 10712384} +{"current_steps": 15910, "total_steps": 204665, "loss": 0.1033, "lr": 1.5546000879464502e-06, "epoch": 0.38868394693767866, "percentage": 7.77, "elapsed_time": "0:19:51", "remaining_time": "3:55:35", "throughput": 8993.8, "total_tokens": 10716096} +{"current_steps": 15915, "total_steps": 204665, "loss": 0.1871, "lr": 1.55508867933747e-06, "epoch": 0.3888060977695258, "percentage": 7.78, "elapsed_time": "0:19:51", "remaining_time": "3:55:35", "throughput": 8993.85, "total_tokens": 10719232} +{"current_steps": 15920, "total_steps": 204665, "loss": 0.0706, "lr": 1.5555772707284897e-06, "epoch": 0.388928248601373, "percentage": 7.78, "elapsed_time": "0:19:52", "remaining_time": "3:55:34", "throughput": 8994.04, "total_tokens": 10722560} +{"current_steps": 15925, "total_steps": 204665, "loss": 0.0899, "lr": 1.5560658621195092e-06, "epoch": 0.3890503994332201, "percentage": 7.78, "elapsed_time": "0:19:52", "remaining_time": "3:55:33", "throughput": 8993.97, "total_tokens": 10725504} +{"current_steps": 15930, "total_steps": 204665, "loss": 0.2236, "lr": 1.556554453510529e-06, "epoch": 0.3891725502650673, "percentage": 7.78, "elapsed_time": "0:19:52", "remaining_time": "3:55:32", "throughput": 8994.05, "total_tokens": 10728704} +{"current_steps": 15935, "total_steps": 204665, "loss": 0.148, "lr": 1.5570430449015488e-06, "epoch": 0.3892947010969145, "percentage": 7.79, "elapsed_time": "0:19:53", "remaining_time": "3:55:32", "throughput": 8994.26, "total_tokens": 10732096} +{"current_steps": 15940, "total_steps": 204665, "loss": 0.0168, "lr": 1.5575316362925685e-06, "epoch": 0.38941685192876163, "percentage": 7.79, "elapsed_time": "0:19:53", "remaining_time": "3:55:31", "throughput": 8994.39, "total_tokens": 10735360} +{"current_steps": 15945, "total_steps": 204665, "loss": 0.2019, "lr": 1.5580202276835881e-06, "epoch": 0.3895390027606088, "percentage": 7.79, "elapsed_time": "0:19:53", "remaining_time": "3:55:30", "throughput": 8994.57, "total_tokens": 10738688} +{"current_steps": 15950, "total_steps": 204665, "loss": 0.0239, "lr": 1.5585088190746078e-06, "epoch": 0.38966115359245596, "percentage": 7.79, "elapsed_time": "0:19:54", "remaining_time": "3:55:29", "throughput": 8994.51, "total_tokens": 10741632} +{"current_steps": 15955, "total_steps": 204665, "loss": 0.1504, "lr": 1.5589974104656275e-06, "epoch": 0.38978330442430315, "percentage": 7.8, "elapsed_time": "0:19:54", "remaining_time": "3:55:29", "throughput": 8995.02, "total_tokens": 10745472} +{"current_steps": 15960, "total_steps": 204665, "loss": 0.1547, "lr": 1.5594860018566472e-06, "epoch": 0.3899054552561503, "percentage": 7.8, "elapsed_time": "0:19:54", "remaining_time": "3:55:28", "throughput": 8995.23, "total_tokens": 10748864} +{"current_steps": 15965, "total_steps": 204665, "loss": 0.1754, "lr": 1.5599745932476669e-06, "epoch": 0.39002760608799747, "percentage": 7.8, "elapsed_time": "0:19:55", "remaining_time": "3:55:27", "throughput": 8995.35, "total_tokens": 10752128} +{"current_steps": 15970, "total_steps": 204665, "loss": 0.1719, "lr": 1.5604631846386866e-06, "epoch": 0.3901497569198446, "percentage": 7.8, "elapsed_time": "0:19:55", "remaining_time": "3:55:27", "throughput": 8995.76, "total_tokens": 10755840} +{"current_steps": 15975, "total_steps": 204665, "loss": 0.1273, "lr": 1.5609517760297062e-06, "epoch": 0.3902719077516918, "percentage": 7.81, "elapsed_time": "0:19:55", "remaining_time": "3:55:26", "throughput": 8995.86, "total_tokens": 10759040} +{"current_steps": 15980, "total_steps": 204665, "loss": 0.1223, "lr": 1.561440367420726e-06, "epoch": 0.3903940585835389, "percentage": 7.81, "elapsed_time": "0:19:56", "remaining_time": "3:55:25", "throughput": 8995.91, "total_tokens": 10762176} +{"current_steps": 15985, "total_steps": 204665, "loss": 0.1119, "lr": 1.5619289588117456e-06, "epoch": 0.3905162094153861, "percentage": 7.81, "elapsed_time": "0:19:56", "remaining_time": "3:55:25", "throughput": 8996.23, "total_tokens": 10765696} +{"current_steps": 15990, "total_steps": 204665, "loss": 0.1843, "lr": 1.5624175502027655e-06, "epoch": 0.3906383602472333, "percentage": 7.81, "elapsed_time": "0:19:57", "remaining_time": "3:55:24", "throughput": 8996.41, "total_tokens": 10769024} +{"current_steps": 15995, "total_steps": 204665, "loss": 0.1047, "lr": 1.562906141593785e-06, "epoch": 0.39076051107908044, "percentage": 7.82, "elapsed_time": "0:19:57", "remaining_time": "3:55:23", "throughput": 8996.91, "total_tokens": 10772800} +{"current_steps": 16000, "total_steps": 204665, "loss": 0.0575, "lr": 1.5633947329848046e-06, "epoch": 0.39088266191092763, "percentage": 7.82, "elapsed_time": "0:19:57", "remaining_time": "3:55:23", "throughput": 8997.47, "total_tokens": 10776704} +{"current_steps": 16005, "total_steps": 204665, "loss": 0.071, "lr": 1.5638833243758245e-06, "epoch": 0.39100481274277477, "percentage": 7.82, "elapsed_time": "0:19:58", "remaining_time": "3:55:22", "throughput": 8997.89, "total_tokens": 10780416} +{"current_steps": 16010, "total_steps": 204665, "loss": 0.1896, "lr": 1.564371915766844e-06, "epoch": 0.39112696357462196, "percentage": 7.82, "elapsed_time": "0:19:58", "remaining_time": "3:55:21", "throughput": 8997.94, "total_tokens": 10783552} +{"current_steps": 16015, "total_steps": 204665, "loss": 0.1082, "lr": 1.5648605071578637e-06, "epoch": 0.3912491144064691, "percentage": 7.82, "elapsed_time": "0:19:58", "remaining_time": "3:55:21", "throughput": 8997.82, "total_tokens": 10786432} +{"current_steps": 16020, "total_steps": 204665, "loss": 0.1499, "lr": 1.5653490985488836e-06, "epoch": 0.3913712652383163, "percentage": 7.83, "elapsed_time": "0:19:59", "remaining_time": "3:55:20", "throughput": 8997.81, "total_tokens": 10789504} +{"current_steps": 16025, "total_steps": 204665, "loss": 0.1097, "lr": 1.5658376899399033e-06, "epoch": 0.3914934160701634, "percentage": 7.83, "elapsed_time": "0:19:59", "remaining_time": "3:55:19", "throughput": 8997.96, "total_tokens": 10792768} +{"current_steps": 16030, "total_steps": 204665, "loss": 0.0895, "lr": 1.5663262813309227e-06, "epoch": 0.3916155669020106, "percentage": 7.83, "elapsed_time": "0:19:59", "remaining_time": "3:55:19", "throughput": 8998.27, "total_tokens": 10796288} +{"current_steps": 16035, "total_steps": 204665, "loss": 0.2037, "lr": 1.5668148727219426e-06, "epoch": 0.3917377177338578, "percentage": 7.83, "elapsed_time": "0:20:00", "remaining_time": "3:55:18", "throughput": 8998.73, "total_tokens": 10800064} +{"current_steps": 16040, "total_steps": 204665, "loss": 0.155, "lr": 1.5673034641129623e-06, "epoch": 0.3918598685657049, "percentage": 7.84, "elapsed_time": "0:20:00", "remaining_time": "3:55:17", "throughput": 8998.96, "total_tokens": 10803456} +{"current_steps": 16045, "total_steps": 204665, "loss": 0.1418, "lr": 1.5677920555039818e-06, "epoch": 0.3919820193975521, "percentage": 7.84, "elapsed_time": "0:20:00", "remaining_time": "3:55:17", "throughput": 8999.14, "total_tokens": 10806784} +{"current_steps": 16050, "total_steps": 204665, "loss": 0.1995, "lr": 1.5682806468950017e-06, "epoch": 0.39210417022939925, "percentage": 7.84, "elapsed_time": "0:20:01", "remaining_time": "3:55:16", "throughput": 8999.57, "total_tokens": 10810496} +{"current_steps": 16055, "total_steps": 204665, "loss": 0.0676, "lr": 1.5687692382860213e-06, "epoch": 0.39222632106124644, "percentage": 7.84, "elapsed_time": "0:20:01", "remaining_time": "3:55:15", "throughput": 8999.7, "total_tokens": 10813760} +{"current_steps": 16060, "total_steps": 204665, "loss": 0.111, "lr": 1.569257829677041e-06, "epoch": 0.3923484718930936, "percentage": 7.85, "elapsed_time": "0:20:01", "remaining_time": "3:55:15", "throughput": 8999.88, "total_tokens": 10817088} +{"current_steps": 16065, "total_steps": 204665, "loss": 0.1503, "lr": 1.5697464210680607e-06, "epoch": 0.39247062272494077, "percentage": 7.85, "elapsed_time": "0:20:02", "remaining_time": "3:55:14", "throughput": 9000.1, "total_tokens": 10820480} +{"current_steps": 16070, "total_steps": 204665, "loss": 0.2075, "lr": 1.5702350124590804e-06, "epoch": 0.3925927735567879, "percentage": 7.85, "elapsed_time": "0:20:02", "remaining_time": "3:55:13", "throughput": 9000.19, "total_tokens": 10823680} +{"current_steps": 16075, "total_steps": 204665, "loss": 0.1805, "lr": 1.5707236038501e-06, "epoch": 0.3927149243886351, "percentage": 7.85, "elapsed_time": "0:20:02", "remaining_time": "3:55:12", "throughput": 9000.67, "total_tokens": 10827456} +{"current_steps": 16080, "total_steps": 204665, "loss": 0.1784, "lr": 1.5712121952411198e-06, "epoch": 0.3928370752204823, "percentage": 7.86, "elapsed_time": "0:20:03", "remaining_time": "3:55:12", "throughput": 9000.85, "total_tokens": 10830784} +{"current_steps": 16085, "total_steps": 204665, "loss": 0.1074, "lr": 1.5717007866321394e-06, "epoch": 0.3929592260523294, "percentage": 7.86, "elapsed_time": "0:20:03", "remaining_time": "3:55:11", "throughput": 9001.05, "total_tokens": 10834112} +{"current_steps": 16090, "total_steps": 204665, "loss": 0.1116, "lr": 1.5721893780231591e-06, "epoch": 0.3930813768841766, "percentage": 7.86, "elapsed_time": "0:20:03", "remaining_time": "3:55:10", "throughput": 9001.15, "total_tokens": 10837312} +{"current_steps": 16095, "total_steps": 204665, "loss": 0.1407, "lr": 1.5726779694141788e-06, "epoch": 0.39320352771602374, "percentage": 7.86, "elapsed_time": "0:20:04", "remaining_time": "3:55:10", "throughput": 9001.19, "total_tokens": 10840448} +{"current_steps": 16100, "total_steps": 204665, "loss": 0.0713, "lr": 1.5731665608051985e-06, "epoch": 0.3933256785478709, "percentage": 7.87, "elapsed_time": "0:20:04", "remaining_time": "3:55:09", "throughput": 9001.28, "total_tokens": 10843648} +{"current_steps": 16105, "total_steps": 204665, "loss": 0.0996, "lr": 1.5736551521962182e-06, "epoch": 0.39344782937971806, "percentage": 7.87, "elapsed_time": "0:20:05", "remaining_time": "3:55:08", "throughput": 9001.56, "total_tokens": 10847168} +{"current_steps": 16110, "total_steps": 204665, "loss": 0.1066, "lr": 1.574143743587238e-06, "epoch": 0.39356998021156525, "percentage": 7.87, "elapsed_time": "0:20:05", "remaining_time": "3:55:07", "throughput": 9001.56, "total_tokens": 10850240} +{"current_steps": 16115, "total_steps": 204665, "loss": 0.07, "lr": 1.5746323349782575e-06, "epoch": 0.3936921310434124, "percentage": 7.87, "elapsed_time": "0:20:05", "remaining_time": "3:55:07", "throughput": 9001.77, "total_tokens": 10853632} +{"current_steps": 16120, "total_steps": 204665, "loss": 0.2055, "lr": 1.5751209263692772e-06, "epoch": 0.3938142818752596, "percentage": 7.88, "elapsed_time": "0:20:06", "remaining_time": "3:55:06", "throughput": 9002.25, "total_tokens": 10857472} +{"current_steps": 16125, "total_steps": 204665, "loss": 0.2077, "lr": 1.575609517760297e-06, "epoch": 0.3939364327071067, "percentage": 7.88, "elapsed_time": "0:20:06", "remaining_time": "3:55:06", "throughput": 9002.47, "total_tokens": 10860864} +{"current_steps": 16130, "total_steps": 204665, "loss": 0.0593, "lr": 1.5760981091513166e-06, "epoch": 0.3940585835389539, "percentage": 7.88, "elapsed_time": "0:20:06", "remaining_time": "3:55:05", "throughput": 9002.69, "total_tokens": 10864256} +{"current_steps": 16135, "total_steps": 204665, "loss": 0.075, "lr": 1.5765867005423362e-06, "epoch": 0.3941807343708011, "percentage": 7.88, "elapsed_time": "0:20:07", "remaining_time": "3:55:04", "throughput": 9002.69, "total_tokens": 10867328} +{"current_steps": 16140, "total_steps": 204665, "loss": 0.1361, "lr": 1.5770752919333561e-06, "epoch": 0.3943028852026482, "percentage": 7.89, "elapsed_time": "0:20:07", "remaining_time": "3:55:03", "throughput": 9002.89, "total_tokens": 10870720} +{"current_steps": 16145, "total_steps": 204665, "loss": 0.1391, "lr": 1.5775638833243758e-06, "epoch": 0.3944250360344954, "percentage": 7.89, "elapsed_time": "0:20:07", "remaining_time": "3:55:03", "throughput": 9003.12, "total_tokens": 10874176} +{"current_steps": 16150, "total_steps": 204665, "loss": 0.1064, "lr": 1.5780524747153953e-06, "epoch": 0.39454718686634255, "percentage": 7.89, "elapsed_time": "0:20:08", "remaining_time": "3:55:02", "throughput": 9003.15, "total_tokens": 10877312} +{"current_steps": 16155, "total_steps": 204665, "loss": 0.0347, "lr": 1.5785410661064152e-06, "epoch": 0.39466933769818974, "percentage": 7.89, "elapsed_time": "0:20:08", "remaining_time": "3:55:01", "throughput": 9003.16, "total_tokens": 10880384} +{"current_steps": 16160, "total_steps": 204665, "loss": 0.1328, "lr": 1.5790296574974349e-06, "epoch": 0.39479148853003687, "percentage": 7.9, "elapsed_time": "0:20:08", "remaining_time": "3:55:01", "throughput": 9003.27, "total_tokens": 10883648} +{"current_steps": 16165, "total_steps": 204665, "loss": 0.043, "lr": 1.5795182488884543e-06, "epoch": 0.39491363936188406, "percentage": 7.9, "elapsed_time": "0:20:09", "remaining_time": "3:55:00", "throughput": 9003.52, "total_tokens": 10887104} +{"current_steps": 16170, "total_steps": 204665, "loss": 0.135, "lr": 1.5800068402794742e-06, "epoch": 0.3950357901937312, "percentage": 7.9, "elapsed_time": "0:20:09", "remaining_time": "3:54:59", "throughput": 9003.76, "total_tokens": 10890560} +{"current_steps": 16175, "total_steps": 204665, "loss": 0.1755, "lr": 1.580495431670494e-06, "epoch": 0.3951579410255784, "percentage": 7.9, "elapsed_time": "0:20:09", "remaining_time": "3:54:59", "throughput": 9003.78, "total_tokens": 10893696} +{"current_steps": 16180, "total_steps": 204665, "loss": 0.0819, "lr": 1.5809840230615136e-06, "epoch": 0.3952800918574256, "percentage": 7.91, "elapsed_time": "0:20:10", "remaining_time": "3:54:58", "throughput": 9003.78, "total_tokens": 10896768} +{"current_steps": 16185, "total_steps": 204665, "loss": 0.0533, "lr": 1.5814726144525333e-06, "epoch": 0.3954022426892727, "percentage": 7.91, "elapsed_time": "0:20:10", "remaining_time": "3:54:57", "throughput": 9003.76, "total_tokens": 10899840} +{"current_steps": 16190, "total_steps": 204665, "loss": 0.1159, "lr": 1.581961205843553e-06, "epoch": 0.3955243935211199, "percentage": 7.91, "elapsed_time": "0:20:10", "remaining_time": "3:54:56", "throughput": 9003.59, "total_tokens": 10902656} +{"current_steps": 16195, "total_steps": 204665, "loss": 0.1456, "lr": 1.5824497972345726e-06, "epoch": 0.39564654435296703, "percentage": 7.91, "elapsed_time": "0:20:11", "remaining_time": "3:54:56", "throughput": 9003.66, "total_tokens": 10905856} +{"current_steps": 16200, "total_steps": 204665, "loss": 0.2502, "lr": 1.5829383886255923e-06, "epoch": 0.3957686951848142, "percentage": 7.92, "elapsed_time": "0:20:11", "remaining_time": "3:54:55", "throughput": 9003.79, "total_tokens": 10909120} +{"current_steps": 16205, "total_steps": 204665, "loss": 0.1608, "lr": 1.583426980016612e-06, "epoch": 0.39589084601666136, "percentage": 7.92, "elapsed_time": "0:20:11", "remaining_time": "3:54:54", "throughput": 9003.93, "total_tokens": 10912384} +{"current_steps": 16210, "total_steps": 204665, "loss": 0.0518, "lr": 1.5839155714076317e-06, "epoch": 0.39601299684850855, "percentage": 7.92, "elapsed_time": "0:20:12", "remaining_time": "3:54:54", "throughput": 9004.16, "total_tokens": 10915840} +{"current_steps": 16215, "total_steps": 204665, "loss": 0.1392, "lr": 1.5844041627986516e-06, "epoch": 0.3961351476803557, "percentage": 7.92, "elapsed_time": "0:20:12", "remaining_time": "3:54:53", "throughput": 9004.31, "total_tokens": 10919168} +{"current_steps": 16220, "total_steps": 204665, "loss": 0.1297, "lr": 1.584892754189671e-06, "epoch": 0.39625729851220287, "percentage": 7.93, "elapsed_time": "0:20:13", "remaining_time": "3:54:52", "throughput": 9004.25, "total_tokens": 10922176} +{"current_steps": 16225, "total_steps": 204665, "loss": 0.0987, "lr": 1.5853813455806907e-06, "epoch": 0.39637944934405, "percentage": 7.93, "elapsed_time": "0:20:13", "remaining_time": "3:54:52", "throughput": 9004.44, "total_tokens": 10925504} +{"current_steps": 16230, "total_steps": 204665, "loss": 0.0924, "lr": 1.5858699369717106e-06, "epoch": 0.3965016001758972, "percentage": 7.93, "elapsed_time": "0:20:13", "remaining_time": "3:54:51", "throughput": 9004.32, "total_tokens": 10928384} +{"current_steps": 16235, "total_steps": 204665, "loss": 0.1234, "lr": 1.58635852836273e-06, "epoch": 0.3966237510077444, "percentage": 7.93, "elapsed_time": "0:20:14", "remaining_time": "3:54:50", "throughput": 9004.27, "total_tokens": 10931392} +{"current_steps": 16240, "total_steps": 204665, "loss": 0.0636, "lr": 1.5868471197537498e-06, "epoch": 0.3967459018395915, "percentage": 7.93, "elapsed_time": "0:20:14", "remaining_time": "3:54:49", "throughput": 9004.56, "total_tokens": 10934912} +{"current_steps": 16245, "total_steps": 204665, "loss": 0.1387, "lr": 1.5873357111447697e-06, "epoch": 0.3968680526714387, "percentage": 7.94, "elapsed_time": "0:20:14", "remaining_time": "3:54:49", "throughput": 9004.65, "total_tokens": 10938112} +{"current_steps": 16250, "total_steps": 204665, "loss": 0.1428, "lr": 1.5878243025357891e-06, "epoch": 0.39699020350328584, "percentage": 7.94, "elapsed_time": "0:20:15", "remaining_time": "3:54:48", "throughput": 9005.27, "total_tokens": 10942144} +{"current_steps": 16255, "total_steps": 204665, "loss": 0.1288, "lr": 1.5883128939268088e-06, "epoch": 0.39711235433513303, "percentage": 7.94, "elapsed_time": "0:20:15", "remaining_time": "3:54:47", "throughput": 9005.33, "total_tokens": 10945280} +{"current_steps": 16260, "total_steps": 204665, "loss": 0.1731, "lr": 1.5888014853178287e-06, "epoch": 0.39723450516698017, "percentage": 7.94, "elapsed_time": "0:20:15", "remaining_time": "3:54:47", "throughput": 9005.79, "total_tokens": 10949056} +{"current_steps": 16265, "total_steps": 204665, "loss": 0.0676, "lr": 1.5892900767088484e-06, "epoch": 0.39735665599882736, "percentage": 7.95, "elapsed_time": "0:20:16", "remaining_time": "3:54:46", "throughput": 9005.96, "total_tokens": 10952384} +{"current_steps": 16270, "total_steps": 204665, "loss": 0.0302, "lr": 1.5897786680998679e-06, "epoch": 0.3974788068306745, "percentage": 7.95, "elapsed_time": "0:20:16", "remaining_time": "3:54:45", "throughput": 9006.1, "total_tokens": 10955712} +{"current_steps": 16275, "total_steps": 204665, "loss": 0.0313, "lr": 1.5902672594908877e-06, "epoch": 0.3976009576625217, "percentage": 7.95, "elapsed_time": "0:20:16", "remaining_time": "3:54:45", "throughput": 9006.26, "total_tokens": 10959040} +{"current_steps": 16280, "total_steps": 204665, "loss": 0.1402, "lr": 1.5907558508819074e-06, "epoch": 0.39772310849436887, "percentage": 7.95, "elapsed_time": "0:20:17", "remaining_time": "3:54:44", "throughput": 9006.32, "total_tokens": 10962240} +{"current_steps": 16285, "total_steps": 204665, "loss": 0.0433, "lr": 1.591244442272927e-06, "epoch": 0.397845259326216, "percentage": 7.96, "elapsed_time": "0:20:17", "remaining_time": "3:54:43", "throughput": 9006.44, "total_tokens": 10965504} +{"current_steps": 16290, "total_steps": 204665, "loss": 0.1955, "lr": 1.5917330336639468e-06, "epoch": 0.3979674101580632, "percentage": 7.96, "elapsed_time": "0:20:17", "remaining_time": "3:54:43", "throughput": 9006.7, "total_tokens": 10968960} +{"current_steps": 16295, "total_steps": 204665, "loss": 0.1945, "lr": 1.5922216250549665e-06, "epoch": 0.39808956098991033, "percentage": 7.96, "elapsed_time": "0:20:18", "remaining_time": "3:54:42", "throughput": 9007.12, "total_tokens": 10972672} +{"current_steps": 16300, "total_steps": 204665, "loss": 0.2091, "lr": 1.5927102164459862e-06, "epoch": 0.3982117118217575, "percentage": 7.96, "elapsed_time": "0:20:18", "remaining_time": "3:54:41", "throughput": 9007.06, "total_tokens": 10975680} +{"current_steps": 16305, "total_steps": 204665, "loss": 0.135, "lr": 1.5931988078370058e-06, "epoch": 0.39833386265360465, "percentage": 7.97, "elapsed_time": "0:20:18", "remaining_time": "3:54:41", "throughput": 9007.02, "total_tokens": 10978688} +{"current_steps": 16310, "total_steps": 204665, "loss": 0.1638, "lr": 1.5936873992280255e-06, "epoch": 0.39845601348545184, "percentage": 7.97, "elapsed_time": "0:20:19", "remaining_time": "3:54:40", "throughput": 9006.99, "total_tokens": 10981696} +{"current_steps": 16315, "total_steps": 204665, "loss": 0.1767, "lr": 1.5941759906190452e-06, "epoch": 0.398578164317299, "percentage": 7.97, "elapsed_time": "0:20:19", "remaining_time": "3:54:40", "throughput": 9006.89, "total_tokens": 10985024} +{"current_steps": 16320, "total_steps": 204665, "loss": 0.128, "lr": 1.5946645820100649e-06, "epoch": 0.39870031514914617, "percentage": 7.97, "elapsed_time": "0:20:19", "remaining_time": "3:54:39", "throughput": 9007.1, "total_tokens": 10988416} +{"current_steps": 16325, "total_steps": 204665, "loss": 0.1098, "lr": 1.5951531734010846e-06, "epoch": 0.39882246598099336, "percentage": 7.98, "elapsed_time": "0:20:20", "remaining_time": "3:54:38", "throughput": 9007.43, "total_tokens": 10992000} +{"current_steps": 16330, "total_steps": 204665, "loss": 0.1098, "lr": 1.5956417647921042e-06, "epoch": 0.3989446168128405, "percentage": 7.98, "elapsed_time": "0:20:20", "remaining_time": "3:54:38", "throughput": 9007.54, "total_tokens": 10995264} +{"current_steps": 16335, "total_steps": 204665, "loss": 0.0463, "lr": 1.5961303561831241e-06, "epoch": 0.3990667676446877, "percentage": 7.98, "elapsed_time": "0:20:21", "remaining_time": "3:54:37", "throughput": 9007.63, "total_tokens": 10998464} +{"current_steps": 16340, "total_steps": 204665, "loss": 0.1157, "lr": 1.5966189475741436e-06, "epoch": 0.3991889184765348, "percentage": 7.98, "elapsed_time": "0:20:21", "remaining_time": "3:54:36", "throughput": 9007.66, "total_tokens": 11001600} +{"current_steps": 16345, "total_steps": 204665, "loss": 0.053, "lr": 1.5971075389651633e-06, "epoch": 0.399311069308382, "percentage": 7.99, "elapsed_time": "0:20:21", "remaining_time": "3:54:35", "throughput": 9007.83, "total_tokens": 11004928} +{"current_steps": 16350, "total_steps": 204665, "loss": 0.1173, "lr": 1.5975961303561832e-06, "epoch": 0.39943322014022914, "percentage": 7.99, "elapsed_time": "0:20:22", "remaining_time": "3:54:35", "throughput": 9007.86, "total_tokens": 11008064} +{"current_steps": 16355, "total_steps": 204665, "loss": 0.2029, "lr": 1.5980847217472026e-06, "epoch": 0.39955537097207633, "percentage": 7.99, "elapsed_time": "0:20:22", "remaining_time": "3:54:34", "throughput": 9008.03, "total_tokens": 11011392} +{"current_steps": 16360, "total_steps": 204665, "loss": 0.2419, "lr": 1.5985733131382223e-06, "epoch": 0.39967752180392346, "percentage": 7.99, "elapsed_time": "0:20:22", "remaining_time": "3:54:33", "throughput": 9008.33, "total_tokens": 11014912} +{"current_steps": 16365, "total_steps": 204665, "loss": 0.1646, "lr": 1.5990619045292422e-06, "epoch": 0.39979967263577065, "percentage": 8.0, "elapsed_time": "0:20:23", "remaining_time": "3:54:33", "throughput": 9008.66, "total_tokens": 11018496} +{"current_steps": 16370, "total_steps": 204665, "loss": 0.0541, "lr": 1.599550495920262e-06, "epoch": 0.3999218234676178, "percentage": 8.0, "elapsed_time": "0:20:23", "remaining_time": "3:54:32", "throughput": 9008.93, "total_tokens": 11022016} +{"current_steps": 16375, "total_steps": 204665, "loss": 0.0528, "lr": 1.6000390873112814e-06, "epoch": 0.400043974299465, "percentage": 8.0, "elapsed_time": "0:20:23", "remaining_time": "3:54:31", "throughput": 9008.92, "total_tokens": 11025088} +{"current_steps": 16380, "total_steps": 204665, "loss": 0.0828, "lr": 1.6005276787023013e-06, "epoch": 0.40016612513131217, "percentage": 8.0, "elapsed_time": "0:20:24", "remaining_time": "3:54:31", "throughput": 9008.8, "total_tokens": 11027968} +{"current_steps": 16385, "total_steps": 204665, "loss": 0.1717, "lr": 1.601016270093321e-06, "epoch": 0.4002882759631593, "percentage": 8.01, "elapsed_time": "0:20:24", "remaining_time": "3:54:30", "throughput": 9008.94, "total_tokens": 11031296} +{"current_steps": 16390, "total_steps": 204665, "loss": 0.0743, "lr": 1.6015048614843404e-06, "epoch": 0.4004104267950065, "percentage": 8.01, "elapsed_time": "0:20:24", "remaining_time": "3:54:29", "throughput": 9009.15, "total_tokens": 11034688} +{"current_steps": 16395, "total_steps": 204665, "loss": 0.134, "lr": 1.6019934528753603e-06, "epoch": 0.4005325776268536, "percentage": 8.01, "elapsed_time": "0:20:25", "remaining_time": "3:54:29", "throughput": 9009.33, "total_tokens": 11038016} +{"current_steps": 16400, "total_steps": 204665, "loss": 0.1923, "lr": 1.60248204426638e-06, "epoch": 0.4006547284587008, "percentage": 8.01, "elapsed_time": "0:20:25", "remaining_time": "3:54:28", "throughput": 9009.55, "total_tokens": 11041472} +{"current_steps": 16405, "total_steps": 204665, "loss": 0.1039, "lr": 1.6029706356573995e-06, "epoch": 0.40077687929054795, "percentage": 8.02, "elapsed_time": "0:20:25", "remaining_time": "3:54:27", "throughput": 9009.75, "total_tokens": 11044864} +{"current_steps": 16410, "total_steps": 204665, "loss": 0.1081, "lr": 1.6034592270484194e-06, "epoch": 0.40089903012239514, "percentage": 8.02, "elapsed_time": "0:20:26", "remaining_time": "3:54:27", "throughput": 9010.03, "total_tokens": 11048384} +{"current_steps": 16415, "total_steps": 204665, "loss": 0.0626, "lr": 1.603947818439439e-06, "epoch": 0.4010211809542423, "percentage": 8.02, "elapsed_time": "0:20:26", "remaining_time": "3:54:26", "throughput": 9010.19, "total_tokens": 11051712} +{"current_steps": 16420, "total_steps": 204665, "loss": 0.0369, "lr": 1.6044364098304587e-06, "epoch": 0.40114333178608946, "percentage": 8.02, "elapsed_time": "0:20:26", "remaining_time": "3:54:26", "throughput": 9010.7, "total_tokens": 11055552} +{"current_steps": 16425, "total_steps": 204665, "loss": 0.1517, "lr": 1.6049250012214784e-06, "epoch": 0.40126548261793665, "percentage": 8.03, "elapsed_time": "0:20:27", "remaining_time": "3:54:25", "throughput": 9010.86, "total_tokens": 11058880} +{"current_steps": 16430, "total_steps": 204665, "loss": 0.1362, "lr": 1.605413592612498e-06, "epoch": 0.4013876334497838, "percentage": 8.03, "elapsed_time": "0:20:27", "remaining_time": "3:54:24", "throughput": 9011.02, "total_tokens": 11062208} +{"current_steps": 16435, "total_steps": 204665, "loss": 0.2636, "lr": 1.6059021840035178e-06, "epoch": 0.401509784281631, "percentage": 8.03, "elapsed_time": "0:20:27", "remaining_time": "3:54:24", "throughput": 9011.53, "total_tokens": 11066048} +{"current_steps": 16440, "total_steps": 204665, "loss": 0.3135, "lr": 1.6063907753945374e-06, "epoch": 0.4016319351134781, "percentage": 8.03, "elapsed_time": "0:20:28", "remaining_time": "3:54:23", "throughput": 9011.97, "total_tokens": 11069824} +{"current_steps": 16445, "total_steps": 204665, "loss": 0.1552, "lr": 1.6068793667855571e-06, "epoch": 0.4017540859453253, "percentage": 8.04, "elapsed_time": "0:20:28", "remaining_time": "3:54:22", "throughput": 9012.07, "total_tokens": 11073088} +{"current_steps": 16450, "total_steps": 204665, "loss": 0.1472, "lr": 1.6073679581765768e-06, "epoch": 0.40187623677717244, "percentage": 8.04, "elapsed_time": "0:20:29", "remaining_time": "3:54:22", "throughput": 9012.51, "total_tokens": 11076864} +{"current_steps": 16455, "total_steps": 204665, "loss": 0.0921, "lr": 1.6078565495675967e-06, "epoch": 0.4019983876090196, "percentage": 8.04, "elapsed_time": "0:20:29", "remaining_time": "3:54:21", "throughput": 9012.72, "total_tokens": 11080256} +{"current_steps": 16460, "total_steps": 204665, "loss": 0.1323, "lr": 1.6083451409586162e-06, "epoch": 0.40212053844086676, "percentage": 8.04, "elapsed_time": "0:20:29", "remaining_time": "3:54:21", "throughput": 9012.95, "total_tokens": 11083712} +{"current_steps": 16465, "total_steps": 204665, "loss": 0.1764, "lr": 1.6088337323496359e-06, "epoch": 0.40224268927271395, "percentage": 8.04, "elapsed_time": "0:20:30", "remaining_time": "3:54:20", "throughput": 9013.09, "total_tokens": 11087040} +{"current_steps": 16470, "total_steps": 204665, "loss": 0.1396, "lr": 1.6093223237406557e-06, "epoch": 0.40236484010456114, "percentage": 8.05, "elapsed_time": "0:20:30", "remaining_time": "3:54:19", "throughput": 9013.16, "total_tokens": 11090240} +{"current_steps": 16475, "total_steps": 204665, "loss": 0.0541, "lr": 1.6098109151316752e-06, "epoch": 0.4024869909364083, "percentage": 8.05, "elapsed_time": "0:20:30", "remaining_time": "3:54:19", "throughput": 9013.36, "total_tokens": 11093632} +{"current_steps": 16480, "total_steps": 204665, "loss": 0.1336, "lr": 1.610299506522695e-06, "epoch": 0.40260914176825546, "percentage": 8.05, "elapsed_time": "0:20:31", "remaining_time": "3:54:18", "throughput": 9013.39, "total_tokens": 11096768} +{"current_steps": 16485, "total_steps": 204665, "loss": 0.0992, "lr": 1.6107880979137148e-06, "epoch": 0.4027312926001026, "percentage": 8.05, "elapsed_time": "0:20:31", "remaining_time": "3:54:17", "throughput": 9013.46, "total_tokens": 11099968} +{"current_steps": 16490, "total_steps": 204665, "loss": 0.1079, "lr": 1.6112766893047345e-06, "epoch": 0.4028534434319498, "percentage": 8.06, "elapsed_time": "0:20:31", "remaining_time": "3:54:17", "throughput": 9013.57, "total_tokens": 11103232} +{"current_steps": 16495, "total_steps": 204665, "loss": 0.1288, "lr": 1.611765280695754e-06, "epoch": 0.4029755942637969, "percentage": 8.06, "elapsed_time": "0:20:32", "remaining_time": "3:54:16", "throughput": 9013.41, "total_tokens": 11106048} +{"current_steps": 16500, "total_steps": 204665, "loss": 0.0708, "lr": 1.6122538720867738e-06, "epoch": 0.4030977450956441, "percentage": 8.06, "elapsed_time": "0:20:32", "remaining_time": "3:54:15", "throughput": 9013.49, "total_tokens": 11109248} +{"current_steps": 16505, "total_steps": 204665, "loss": 0.1343, "lr": 1.6127424634777935e-06, "epoch": 0.40321989592749125, "percentage": 8.06, "elapsed_time": "0:20:32", "remaining_time": "3:54:14", "throughput": 9013.51, "total_tokens": 11112384} +{"current_steps": 16510, "total_steps": 204665, "loss": 0.075, "lr": 1.613231054868813e-06, "epoch": 0.40334204675933844, "percentage": 8.07, "elapsed_time": "0:20:33", "remaining_time": "3:54:14", "throughput": 9013.92, "total_tokens": 11116096} +{"current_steps": 16515, "total_steps": 204665, "loss": 0.1278, "lr": 1.6137196462598329e-06, "epoch": 0.40346419759118557, "percentage": 8.07, "elapsed_time": "0:20:33", "remaining_time": "3:54:13", "throughput": 9013.96, "total_tokens": 11119232} +{"current_steps": 16520, "total_steps": 204665, "loss": 0.1306, "lr": 1.6142082376508526e-06, "epoch": 0.40358634842303276, "percentage": 8.07, "elapsed_time": "0:20:33", "remaining_time": "3:54:13", "throughput": 9014.52, "total_tokens": 11123200} +{"current_steps": 16525, "total_steps": 204665, "loss": 0.0965, "lr": 1.614696829041872e-06, "epoch": 0.40370849925487995, "percentage": 8.07, "elapsed_time": "0:20:34", "remaining_time": "3:54:12", "throughput": 9014.66, "total_tokens": 11126528} +{"current_steps": 16530, "total_steps": 204665, "loss": 0.1082, "lr": 1.615185420432892e-06, "epoch": 0.4038306500867271, "percentage": 8.08, "elapsed_time": "0:20:34", "remaining_time": "3:54:11", "throughput": 9014.93, "total_tokens": 11130048} +{"current_steps": 16535, "total_steps": 204665, "loss": 0.1653, "lr": 1.6156740118239116e-06, "epoch": 0.4039528009185743, "percentage": 8.08, "elapsed_time": "0:20:34", "remaining_time": "3:54:11", "throughput": 9015.0, "total_tokens": 11133248} +{"current_steps": 16540, "total_steps": 204665, "loss": 0.198, "lr": 1.6161626032149313e-06, "epoch": 0.4040749517504214, "percentage": 8.08, "elapsed_time": "0:20:35", "remaining_time": "3:54:10", "throughput": 9015.14, "total_tokens": 11136576} +{"current_steps": 16545, "total_steps": 204665, "loss": 0.0332, "lr": 1.616651194605951e-06, "epoch": 0.4041971025822686, "percentage": 8.08, "elapsed_time": "0:20:35", "remaining_time": "3:54:09", "throughput": 9015.36, "total_tokens": 11139968} +{"current_steps": 16550, "total_steps": 204665, "loss": 0.1053, "lr": 1.6171397859969706e-06, "epoch": 0.40431925341411573, "percentage": 8.09, "elapsed_time": "0:20:36", "remaining_time": "3:54:09", "throughput": 9015.41, "total_tokens": 11143104} +{"current_steps": 16555, "total_steps": 204665, "loss": 0.1145, "lr": 1.6176283773879903e-06, "epoch": 0.4044414042459629, "percentage": 8.09, "elapsed_time": "0:20:36", "remaining_time": "3:54:08", "throughput": 9015.81, "total_tokens": 11146816} +{"current_steps": 16560, "total_steps": 204665, "loss": 0.058, "lr": 1.61811696877901e-06, "epoch": 0.40456355507781006, "percentage": 8.09, "elapsed_time": "0:20:36", "remaining_time": "3:54:07", "throughput": 9015.89, "total_tokens": 11150016} +{"current_steps": 16565, "total_steps": 204665, "loss": 0.3602, "lr": 1.6186055601700297e-06, "epoch": 0.40468570590965725, "percentage": 8.09, "elapsed_time": "0:20:37", "remaining_time": "3:54:07", "throughput": 9016.02, "total_tokens": 11153280} +{"current_steps": 16570, "total_steps": 204665, "loss": 0.2336, "lr": 1.6190941515610494e-06, "epoch": 0.40480785674150443, "percentage": 8.1, "elapsed_time": "0:20:37", "remaining_time": "3:54:06", "throughput": 9016.02, "total_tokens": 11156352} +{"current_steps": 16575, "total_steps": 204665, "loss": 0.1717, "lr": 1.6195827429520693e-06, "epoch": 0.40493000757335157, "percentage": 8.1, "elapsed_time": "0:20:37", "remaining_time": "3:54:05", "throughput": 9016.29, "total_tokens": 11159872} +{"current_steps": 16580, "total_steps": 204665, "loss": 0.0624, "lr": 1.6200713343430887e-06, "epoch": 0.40505215840519876, "percentage": 8.1, "elapsed_time": "0:20:38", "remaining_time": "3:54:04", "throughput": 9016.37, "total_tokens": 11163072} +{"current_steps": 16585, "total_steps": 204665, "loss": 0.1306, "lr": 1.6205599257341084e-06, "epoch": 0.4051743092370459, "percentage": 8.1, "elapsed_time": "0:20:38", "remaining_time": "3:54:04", "throughput": 9016.5, "total_tokens": 11166336} +{"current_steps": 16590, "total_steps": 204665, "loss": 0.1343, "lr": 1.6210485171251283e-06, "epoch": 0.4052964600688931, "percentage": 8.11, "elapsed_time": "0:20:38", "remaining_time": "3:54:03", "throughput": 9016.78, "total_tokens": 11169856} +{"current_steps": 16595, "total_steps": 204665, "loss": 0.1087, "lr": 1.6215371085161478e-06, "epoch": 0.4054186109007402, "percentage": 8.11, "elapsed_time": "0:20:39", "remaining_time": "3:54:02", "throughput": 9016.81, "total_tokens": 11172992} +{"current_steps": 16600, "total_steps": 204665, "loss": 0.0759, "lr": 1.6220256999071675e-06, "epoch": 0.4055407617325874, "percentage": 8.11, "elapsed_time": "0:20:39", "remaining_time": "3:54:02", "throughput": 9016.97, "total_tokens": 11176320} +{"current_steps": 16605, "total_steps": 204665, "loss": 0.0217, "lr": 1.6225142912981874e-06, "epoch": 0.40566291256443454, "percentage": 8.11, "elapsed_time": "0:20:39", "remaining_time": "3:54:01", "throughput": 9017.12, "total_tokens": 11179648} +{"current_steps": 16610, "total_steps": 204665, "loss": 0.1103, "lr": 1.623002882689207e-06, "epoch": 0.40578506339628173, "percentage": 8.12, "elapsed_time": "0:20:40", "remaining_time": "3:54:00", "throughput": 9017.09, "total_tokens": 11182656} +{"current_steps": 16615, "total_steps": 204665, "loss": 0.225, "lr": 1.6234914740802265e-06, "epoch": 0.4059072142281289, "percentage": 8.12, "elapsed_time": "0:20:40", "remaining_time": "3:54:00", "throughput": 9017.41, "total_tokens": 11186240} +{"current_steps": 16620, "total_steps": 204665, "loss": 0.1125, "lr": 1.6239800654712464e-06, "epoch": 0.40602936505997606, "percentage": 8.12, "elapsed_time": "0:20:40", "remaining_time": "3:53:59", "throughput": 9017.86, "total_tokens": 11190016} +{"current_steps": 16625, "total_steps": 204665, "loss": 0.0681, "lr": 1.624468656862266e-06, "epoch": 0.40615151589182324, "percentage": 8.12, "elapsed_time": "0:20:41", "remaining_time": "3:53:59", "throughput": 9018.06, "total_tokens": 11193408} +{"current_steps": 16630, "total_steps": 204665, "loss": 0.1086, "lr": 1.6249572482532855e-06, "epoch": 0.4062736667236704, "percentage": 8.13, "elapsed_time": "0:20:41", "remaining_time": "3:53:58", "throughput": 9018.03, "total_tokens": 11196416} +{"current_steps": 16635, "total_steps": 204665, "loss": 0.163, "lr": 1.6254458396443054e-06, "epoch": 0.40639581755551757, "percentage": 8.13, "elapsed_time": "0:20:41", "remaining_time": "3:53:57", "throughput": 9018.24, "total_tokens": 11199808} +{"current_steps": 16640, "total_steps": 204665, "loss": 0.2066, "lr": 1.6259344310353251e-06, "epoch": 0.4065179683873647, "percentage": 8.13, "elapsed_time": "0:20:42", "remaining_time": "3:53:56", "throughput": 9018.35, "total_tokens": 11203072} +{"current_steps": 16645, "total_steps": 204665, "loss": 0.0939, "lr": 1.6264230224263448e-06, "epoch": 0.4066401192192119, "percentage": 8.13, "elapsed_time": "0:20:42", "remaining_time": "3:53:56", "throughput": 9018.5, "total_tokens": 11206400} +{"current_steps": 16650, "total_steps": 204665, "loss": 0.1402, "lr": 1.6269116138173645e-06, "epoch": 0.406762270051059, "percentage": 8.14, "elapsed_time": "0:20:42", "remaining_time": "3:53:55", "throughput": 9018.56, "total_tokens": 11209600} +{"current_steps": 16655, "total_steps": 204665, "loss": 0.1372, "lr": 1.6274002052083842e-06, "epoch": 0.4068844208829062, "percentage": 8.14, "elapsed_time": "0:20:43", "remaining_time": "3:53:54", "throughput": 9018.59, "total_tokens": 11212736} +{"current_steps": 16660, "total_steps": 204665, "loss": 0.1845, "lr": 1.6278887965994038e-06, "epoch": 0.40700657171475335, "percentage": 8.14, "elapsed_time": "0:20:43", "remaining_time": "3:53:54", "throughput": 9018.75, "total_tokens": 11216064} +{"current_steps": 16665, "total_steps": 204665, "loss": 0.1796, "lr": 1.6283773879904235e-06, "epoch": 0.40712872254660054, "percentage": 8.14, "elapsed_time": "0:20:43", "remaining_time": "3:53:53", "throughput": 9018.85, "total_tokens": 11219264} +{"current_steps": 16670, "total_steps": 204665, "loss": 0.0825, "lr": 1.6288659793814432e-06, "epoch": 0.40725087337844773, "percentage": 8.15, "elapsed_time": "0:20:44", "remaining_time": "3:53:52", "throughput": 9019.13, "total_tokens": 11222784} +{"current_steps": 16675, "total_steps": 204665, "loss": 0.1176, "lr": 1.6293545707724629e-06, "epoch": 0.40737302421029487, "percentage": 8.15, "elapsed_time": "0:20:44", "remaining_time": "3:53:52", "throughput": 9019.24, "total_tokens": 11226048} +{"current_steps": 16680, "total_steps": 204665, "loss": 0.0873, "lr": 1.6298431621634826e-06, "epoch": 0.40749517504214205, "percentage": 8.15, "elapsed_time": "0:20:45", "remaining_time": "3:53:51", "throughput": 9019.52, "total_tokens": 11229568} +{"current_steps": 16685, "total_steps": 204665, "loss": 0.0537, "lr": 1.6303317535545023e-06, "epoch": 0.4076173258739892, "percentage": 8.15, "elapsed_time": "0:20:45", "remaining_time": "3:53:50", "throughput": 9019.76, "total_tokens": 11233024} +{"current_steps": 16690, "total_steps": 204665, "loss": 0.0696, "lr": 1.630820344945522e-06, "epoch": 0.4077394767058364, "percentage": 8.15, "elapsed_time": "0:20:45", "remaining_time": "3:53:50", "throughput": 9019.71, "total_tokens": 11236032} +{"current_steps": 16695, "total_steps": 204665, "loss": 0.1806, "lr": 1.6313089363365418e-06, "epoch": 0.4078616275376835, "percentage": 8.16, "elapsed_time": "0:20:46", "remaining_time": "3:53:49", "throughput": 9019.71, "total_tokens": 11239104} +{"current_steps": 16700, "total_steps": 204665, "loss": 0.149, "lr": 1.6317975277275613e-06, "epoch": 0.4079837783695307, "percentage": 8.16, "elapsed_time": "0:20:46", "remaining_time": "3:53:48", "throughput": 9019.82, "total_tokens": 11242368} +{"current_steps": 16705, "total_steps": 204665, "loss": 0.049, "lr": 1.632286119118581e-06, "epoch": 0.40810592920137784, "percentage": 8.16, "elapsed_time": "0:20:46", "remaining_time": "3:53:48", "throughput": 9019.95, "total_tokens": 11245632} +{"current_steps": 16710, "total_steps": 204665, "loss": 0.088, "lr": 1.6327747105096009e-06, "epoch": 0.408228080033225, "percentage": 8.16, "elapsed_time": "0:20:47", "remaining_time": "3:53:47", "throughput": 9020.33, "total_tokens": 11249344} +{"current_steps": 16715, "total_steps": 204665, "loss": 0.0908, "lr": 1.6332633019006203e-06, "epoch": 0.4083502308650722, "percentage": 8.17, "elapsed_time": "0:20:47", "remaining_time": "3:53:47", "throughput": 9020.71, "total_tokens": 11253056} +{"current_steps": 16720, "total_steps": 204665, "loss": 0.1675, "lr": 1.63375189329164e-06, "epoch": 0.40847238169691935, "percentage": 8.17, "elapsed_time": "0:20:47", "remaining_time": "3:53:46", "throughput": 9020.78, "total_tokens": 11256256} +{"current_steps": 16725, "total_steps": 204665, "loss": 0.0575, "lr": 1.63424048468266e-06, "epoch": 0.40859453252876654, "percentage": 8.17, "elapsed_time": "0:20:48", "remaining_time": "3:53:45", "throughput": 9021.15, "total_tokens": 11259904} +{"current_steps": 16730, "total_steps": 204665, "loss": 0.0636, "lr": 1.6347290760736796e-06, "epoch": 0.4087166833606137, "percentage": 8.17, "elapsed_time": "0:20:48", "remaining_time": "3:53:45", "throughput": 9021.36, "total_tokens": 11263296} +{"current_steps": 16735, "total_steps": 204665, "loss": 0.1156, "lr": 1.635217667464699e-06, "epoch": 0.40883883419246086, "percentage": 8.18, "elapsed_time": "0:20:48", "remaining_time": "3:53:44", "throughput": 9021.48, "total_tokens": 11266560} +{"current_steps": 16740, "total_steps": 204665, "loss": 0.171, "lr": 1.635706258855719e-06, "epoch": 0.408960985024308, "percentage": 8.18, "elapsed_time": "0:20:49", "remaining_time": "3:53:43", "throughput": 9022.08, "total_tokens": 11270592} +{"current_steps": 16745, "total_steps": 204665, "loss": 0.1687, "lr": 1.6361948502467386e-06, "epoch": 0.4090831358561552, "percentage": 8.18, "elapsed_time": "0:20:49", "remaining_time": "3:53:43", "throughput": 9022.31, "total_tokens": 11274048} +{"current_steps": 16750, "total_steps": 204665, "loss": 0.1968, "lr": 1.6366834416377581e-06, "epoch": 0.4092052866880023, "percentage": 8.18, "elapsed_time": "0:20:49", "remaining_time": "3:53:42", "throughput": 9022.71, "total_tokens": 11277760} +{"current_steps": 16755, "total_steps": 204665, "loss": 0.1212, "lr": 1.637172033028778e-06, "epoch": 0.4093274375198495, "percentage": 8.19, "elapsed_time": "0:20:50", "remaining_time": "3:53:42", "throughput": 9023.02, "total_tokens": 11281344} +{"current_steps": 16760, "total_steps": 204665, "loss": 0.1246, "lr": 1.6376606244197977e-06, "epoch": 0.4094495883516967, "percentage": 8.19, "elapsed_time": "0:20:50", "remaining_time": "3:53:41", "throughput": 9023.48, "total_tokens": 11285184} +{"current_steps": 16765, "total_steps": 204665, "loss": 0.1465, "lr": 1.6381492158108174e-06, "epoch": 0.40957173918354384, "percentage": 8.19, "elapsed_time": "0:20:50", "remaining_time": "3:53:40", "throughput": 9023.68, "total_tokens": 11288576} +{"current_steps": 16770, "total_steps": 204665, "loss": 0.1732, "lr": 1.638637807201837e-06, "epoch": 0.409693890015391, "percentage": 8.19, "elapsed_time": "0:20:51", "remaining_time": "3:53:40", "throughput": 9023.87, "total_tokens": 11291968} +{"current_steps": 16775, "total_steps": 204665, "loss": 0.1714, "lr": 1.6391263985928567e-06, "epoch": 0.40981604084723816, "percentage": 8.2, "elapsed_time": "0:20:51", "remaining_time": "3:53:39", "throughput": 9024.28, "total_tokens": 11295680} +{"current_steps": 16780, "total_steps": 204665, "loss": 0.1803, "lr": 1.6396149899838764e-06, "epoch": 0.40993819167908535, "percentage": 8.2, "elapsed_time": "0:20:52", "remaining_time": "3:53:39", "throughput": 9024.6, "total_tokens": 11299264} +{"current_steps": 16785, "total_steps": 204665, "loss": 0.1094, "lr": 1.640103581374896e-06, "epoch": 0.4100603425109325, "percentage": 8.2, "elapsed_time": "0:20:52", "remaining_time": "3:53:38", "throughput": 9024.83, "total_tokens": 11302720} +{"current_steps": 16790, "total_steps": 204665, "loss": 0.1998, "lr": 1.6405921727659158e-06, "epoch": 0.4101824933427797, "percentage": 8.2, "elapsed_time": "0:20:52", "remaining_time": "3:53:37", "throughput": 9024.75, "total_tokens": 11305664} +{"current_steps": 16795, "total_steps": 204665, "loss": 0.067, "lr": 1.6410807641569355e-06, "epoch": 0.4103046441746268, "percentage": 8.21, "elapsed_time": "0:20:53", "remaining_time": "3:53:37", "throughput": 9024.94, "total_tokens": 11309056} +{"current_steps": 16800, "total_steps": 204665, "loss": 0.1192, "lr": 1.6415693555479553e-06, "epoch": 0.410426795006474, "percentage": 8.21, "elapsed_time": "0:20:53", "remaining_time": "3:53:36", "throughput": 9025.22, "total_tokens": 11312576} +{"current_steps": 16805, "total_steps": 204665, "loss": 0.1092, "lr": 1.6420579469389748e-06, "epoch": 0.41054894583832113, "percentage": 8.21, "elapsed_time": "0:20:53", "remaining_time": "3:53:35", "throughput": 9025.19, "total_tokens": 11315648} +{"current_steps": 16810, "total_steps": 204665, "loss": 0.1387, "lr": 1.6425465383299945e-06, "epoch": 0.4106710966701683, "percentage": 8.21, "elapsed_time": "0:20:54", "remaining_time": "3:53:35", "throughput": 9025.24, "total_tokens": 11318848} +{"current_steps": 16815, "total_steps": 204665, "loss": 0.1777, "lr": 1.6430351297210144e-06, "epoch": 0.4107932475020155, "percentage": 8.22, "elapsed_time": "0:20:54", "remaining_time": "3:53:34", "throughput": 9025.32, "total_tokens": 11322048} +{"current_steps": 16820, "total_steps": 204665, "loss": 0.125, "lr": 1.6435237211120339e-06, "epoch": 0.41091539833386265, "percentage": 8.22, "elapsed_time": "0:20:54", "remaining_time": "3:53:33", "throughput": 9025.49, "total_tokens": 11325376} +{"current_steps": 16825, "total_steps": 204665, "loss": 0.151, "lr": 1.6440123125030535e-06, "epoch": 0.41103754916570984, "percentage": 8.22, "elapsed_time": "0:20:55", "remaining_time": "3:53:33", "throughput": 9025.64, "total_tokens": 11328704} +{"current_steps": 16830, "total_steps": 204665, "loss": 0.0885, "lr": 1.6445009038940734e-06, "epoch": 0.41115969999755697, "percentage": 8.22, "elapsed_time": "0:20:55", "remaining_time": "3:53:32", "throughput": 9025.63, "total_tokens": 11331776} +{"current_steps": 16835, "total_steps": 204665, "loss": 0.0768, "lr": 1.644989495285093e-06, "epoch": 0.41128185082940416, "percentage": 8.23, "elapsed_time": "0:20:55", "remaining_time": "3:53:31", "throughput": 9026.14, "total_tokens": 11335680} +{"current_steps": 16840, "total_steps": 204665, "loss": 0.159, "lr": 1.6454780866761126e-06, "epoch": 0.4114040016612513, "percentage": 8.23, "elapsed_time": "0:20:56", "remaining_time": "3:53:31", "throughput": 9026.05, "total_tokens": 11338624} +{"current_steps": 16845, "total_steps": 204665, "loss": 0.0856, "lr": 1.6459666780671325e-06, "epoch": 0.4115261524930985, "percentage": 8.23, "elapsed_time": "0:20:56", "remaining_time": "3:53:30", "throughput": 9026.15, "total_tokens": 11341888} +{"current_steps": 16850, "total_steps": 204665, "loss": 0.139, "lr": 1.6464552694581522e-06, "epoch": 0.4116483033249456, "percentage": 8.23, "elapsed_time": "0:20:56", "remaining_time": "3:53:29", "throughput": 9026.64, "total_tokens": 11345728} +{"current_steps": 16855, "total_steps": 204665, "loss": 0.1357, "lr": 1.6469438608491716e-06, "epoch": 0.4117704541567928, "percentage": 8.24, "elapsed_time": "0:20:57", "remaining_time": "3:53:29", "throughput": 9026.86, "total_tokens": 11349184} +{"current_steps": 16860, "total_steps": 204665, "loss": 0.1509, "lr": 1.6474324522401915e-06, "epoch": 0.41189260498864, "percentage": 8.24, "elapsed_time": "0:20:57", "remaining_time": "3:53:28", "throughput": 9026.91, "total_tokens": 11352384} +{"current_steps": 16865, "total_steps": 204665, "loss": 0.1452, "lr": 1.6479210436312112e-06, "epoch": 0.41201475582048713, "percentage": 8.24, "elapsed_time": "0:20:57", "remaining_time": "3:53:28", "throughput": 9027.18, "total_tokens": 11355904} +{"current_steps": 16870, "total_steps": 204665, "loss": 0.0671, "lr": 1.6484096350222307e-06, "epoch": 0.4121369066523343, "percentage": 8.24, "elapsed_time": "0:20:58", "remaining_time": "3:53:27", "throughput": 9027.22, "total_tokens": 11359040} +{"current_steps": 16875, "total_steps": 204665, "loss": 0.1257, "lr": 1.6488982264132506e-06, "epoch": 0.41225905748418146, "percentage": 8.25, "elapsed_time": "0:20:58", "remaining_time": "3:53:26", "throughput": 9027.14, "total_tokens": 11361984} +{"current_steps": 16880, "total_steps": 204665, "loss": 0.066, "lr": 1.6493868178042702e-06, "epoch": 0.41238120831602865, "percentage": 8.25, "elapsed_time": "0:20:59", "remaining_time": "3:53:26", "throughput": 9027.76, "total_tokens": 11366016} +{"current_steps": 16885, "total_steps": 204665, "loss": 0.1641, "lr": 1.64987540919529e-06, "epoch": 0.4125033591478758, "percentage": 8.25, "elapsed_time": "0:20:59", "remaining_time": "3:53:25", "throughput": 9028.32, "total_tokens": 11369984} +{"current_steps": 16890, "total_steps": 204665, "loss": 0.1858, "lr": 1.6503640005863096e-06, "epoch": 0.41262550997972297, "percentage": 8.25, "elapsed_time": "0:20:59", "remaining_time": "3:53:24", "throughput": 9028.46, "total_tokens": 11373312} +{"current_steps": 16895, "total_steps": 204665, "loss": 0.1818, "lr": 1.6508525919773293e-06, "epoch": 0.4127476608115701, "percentage": 8.25, "elapsed_time": "0:21:00", "remaining_time": "3:53:24", "throughput": 9028.49, "total_tokens": 11376448} +{"current_steps": 16900, "total_steps": 204665, "loss": 0.0363, "lr": 1.651341183368349e-06, "epoch": 0.4128698116434173, "percentage": 8.26, "elapsed_time": "0:21:00", "remaining_time": "3:53:23", "throughput": 9028.74, "total_tokens": 11379904} +{"current_steps": 16905, "total_steps": 204665, "loss": 0.1037, "lr": 1.6518297747593687e-06, "epoch": 0.41299196247526443, "percentage": 8.26, "elapsed_time": "0:21:00", "remaining_time": "3:53:22", "throughput": 9028.97, "total_tokens": 11383360} +{"current_steps": 16910, "total_steps": 204665, "loss": 0.2688, "lr": 1.6523183661503883e-06, "epoch": 0.4131141133071116, "percentage": 8.26, "elapsed_time": "0:21:01", "remaining_time": "3:53:22", "throughput": 9029.07, "total_tokens": 11386624} +{"current_steps": 16915, "total_steps": 204665, "loss": 0.0447, "lr": 1.652806957541408e-06, "epoch": 0.4132362641389588, "percentage": 8.26, "elapsed_time": "0:21:01", "remaining_time": "3:53:21", "throughput": 9029.37, "total_tokens": 11390208} +{"current_steps": 16920, "total_steps": 204665, "loss": 0.1803, "lr": 1.653295548932428e-06, "epoch": 0.41335841497080594, "percentage": 8.27, "elapsed_time": "0:21:01", "remaining_time": "3:53:21", "throughput": 9029.84, "total_tokens": 11394048} +{"current_steps": 16925, "total_steps": 204665, "loss": 0.0391, "lr": 1.6537841403234474e-06, "epoch": 0.41348056580265313, "percentage": 8.27, "elapsed_time": "0:21:02", "remaining_time": "3:53:20", "throughput": 9030.0, "total_tokens": 11397376} +{"current_steps": 16930, "total_steps": 204665, "loss": 0.126, "lr": 1.654272731714467e-06, "epoch": 0.41360271663450027, "percentage": 8.27, "elapsed_time": "0:21:02", "remaining_time": "3:53:19", "throughput": 9030.32, "total_tokens": 11400960} +{"current_steps": 16935, "total_steps": 204665, "loss": 0.0747, "lr": 1.654761323105487e-06, "epoch": 0.41372486746634746, "percentage": 8.27, "elapsed_time": "0:21:02", "remaining_time": "3:53:19", "throughput": 9030.4, "total_tokens": 11404160} +{"current_steps": 16940, "total_steps": 204665, "loss": 0.1278, "lr": 1.6552499144965064e-06, "epoch": 0.4138470182981946, "percentage": 8.28, "elapsed_time": "0:21:03", "remaining_time": "3:53:18", "throughput": 9030.54, "total_tokens": 11407488} +{"current_steps": 16945, "total_steps": 204665, "loss": 0.0899, "lr": 1.655738505887526e-06, "epoch": 0.4139691691300418, "percentage": 8.28, "elapsed_time": "0:21:03", "remaining_time": "3:53:17", "throughput": 9030.78, "total_tokens": 11410944} +{"current_steps": 16950, "total_steps": 204665, "loss": 0.1208, "lr": 1.656227097278546e-06, "epoch": 0.4140913199618889, "percentage": 8.28, "elapsed_time": "0:21:03", "remaining_time": "3:53:17", "throughput": 9031.3, "total_tokens": 11414848} +{"current_steps": 16955, "total_steps": 204665, "loss": 0.0931, "lr": 1.6567156886695655e-06, "epoch": 0.4142134707937361, "percentage": 8.28, "elapsed_time": "0:21:04", "remaining_time": "3:53:16", "throughput": 9031.47, "total_tokens": 11418176} +{"current_steps": 16960, "total_steps": 204665, "loss": 0.3264, "lr": 1.6572042800605851e-06, "epoch": 0.4143356216255833, "percentage": 8.29, "elapsed_time": "0:21:04", "remaining_time": "3:53:16", "throughput": 9031.63, "total_tokens": 11421504} +{"current_steps": 16965, "total_steps": 204665, "loss": 0.0632, "lr": 1.657692871451605e-06, "epoch": 0.41445777245743043, "percentage": 8.29, "elapsed_time": "0:21:04", "remaining_time": "3:53:15", "throughput": 9031.82, "total_tokens": 11424896} +{"current_steps": 16970, "total_steps": 204665, "loss": 0.2217, "lr": 1.6581814628426247e-06, "epoch": 0.4145799232892776, "percentage": 8.29, "elapsed_time": "0:21:05", "remaining_time": "3:53:14", "throughput": 9031.89, "total_tokens": 11428096} +{"current_steps": 16975, "total_steps": 204665, "loss": 0.1892, "lr": 1.6586700542336442e-06, "epoch": 0.41470207412112475, "percentage": 8.29, "elapsed_time": "0:21:05", "remaining_time": "3:53:14", "throughput": 9032.33, "total_tokens": 11431872} +{"current_steps": 16980, "total_steps": 204665, "loss": 0.118, "lr": 1.659158645624664e-06, "epoch": 0.41482422495297194, "percentage": 8.3, "elapsed_time": "0:21:06", "remaining_time": "3:53:13", "throughput": 9032.34, "total_tokens": 11434944} +{"current_steps": 16985, "total_steps": 204665, "loss": 0.1374, "lr": 1.6596472370156838e-06, "epoch": 0.4149463757848191, "percentage": 8.3, "elapsed_time": "0:21:06", "remaining_time": "3:53:12", "throughput": 9032.68, "total_tokens": 11438528} +{"current_steps": 16990, "total_steps": 204665, "loss": 0.1233, "lr": 1.6601358284067032e-06, "epoch": 0.41506852661666627, "percentage": 8.3, "elapsed_time": "0:21:06", "remaining_time": "3:53:12", "throughput": 9032.6, "total_tokens": 11441472} +{"current_steps": 16995, "total_steps": 204665, "loss": 0.0969, "lr": 1.6606244197977231e-06, "epoch": 0.4151906774485134, "percentage": 8.3, "elapsed_time": "0:21:07", "remaining_time": "3:53:11", "throughput": 9032.74, "total_tokens": 11444800} +{"current_steps": 17000, "total_steps": 204665, "loss": 0.1357, "lr": 1.6611130111887428e-06, "epoch": 0.4153128282803606, "percentage": 8.31, "elapsed_time": "0:21:07", "remaining_time": "3:53:10", "throughput": 9033.18, "total_tokens": 11448576} +{"current_steps": 17005, "total_steps": 204665, "loss": 0.1851, "lr": 1.6616016025797625e-06, "epoch": 0.4154349791122078, "percentage": 8.31, "elapsed_time": "0:21:07", "remaining_time": "3:53:10", "throughput": 9033.39, "total_tokens": 11451968} +{"current_steps": 17010, "total_steps": 204665, "loss": 0.0463, "lr": 1.6620901939707822e-06, "epoch": 0.4155571299440549, "percentage": 8.31, "elapsed_time": "0:21:08", "remaining_time": "3:53:09", "throughput": 9033.5, "total_tokens": 11455232} +{"current_steps": 17015, "total_steps": 204665, "loss": 0.0914, "lr": 1.6625787853618019e-06, "epoch": 0.4156792807759021, "percentage": 8.31, "elapsed_time": "0:21:08", "remaining_time": "3:53:08", "throughput": 9033.47, "total_tokens": 11458240} +{"current_steps": 17020, "total_steps": 204665, "loss": 0.1541, "lr": 1.6630673767528215e-06, "epoch": 0.41580143160774924, "percentage": 8.32, "elapsed_time": "0:21:08", "remaining_time": "3:53:08", "throughput": 9033.49, "total_tokens": 11461376} +{"current_steps": 17025, "total_steps": 204665, "loss": 0.1685, "lr": 1.6635559681438412e-06, "epoch": 0.41592358243959643, "percentage": 8.32, "elapsed_time": "0:21:09", "remaining_time": "3:53:07", "throughput": 9033.58, "total_tokens": 11464576} +{"current_steps": 17030, "total_steps": 204665, "loss": 0.1494, "lr": 1.664044559534861e-06, "epoch": 0.41604573327144356, "percentage": 8.32, "elapsed_time": "0:21:09", "remaining_time": "3:53:06", "throughput": 9033.81, "total_tokens": 11468032} +{"current_steps": 17035, "total_steps": 204665, "loss": 0.0575, "lr": 1.6645331509258806e-06, "epoch": 0.41616788410329075, "percentage": 8.32, "elapsed_time": "0:21:09", "remaining_time": "3:53:06", "throughput": 9034.48, "total_tokens": 11472192} +{"current_steps": 17040, "total_steps": 204665, "loss": 0.1208, "lr": 1.6650217423169005e-06, "epoch": 0.4162900349351379, "percentage": 8.33, "elapsed_time": "0:21:10", "remaining_time": "3:53:05", "throughput": 9034.51, "total_tokens": 11475328} +{"current_steps": 17045, "total_steps": 204665, "loss": 0.1505, "lr": 1.66551033370792e-06, "epoch": 0.4164121857669851, "percentage": 8.33, "elapsed_time": "0:21:10", "remaining_time": "3:53:04", "throughput": 9034.45, "total_tokens": 11478336} +{"current_steps": 17050, "total_steps": 204665, "loss": 0.1621, "lr": 1.6659989250989396e-06, "epoch": 0.4165343365988322, "percentage": 8.33, "elapsed_time": "0:21:10", "remaining_time": "3:53:04", "throughput": 9034.76, "total_tokens": 11481920} +{"current_steps": 17055, "total_steps": 204665, "loss": 0.187, "lr": 1.6664875164899595e-06, "epoch": 0.4166564874306794, "percentage": 8.33, "elapsed_time": "0:21:11", "remaining_time": "3:53:03", "throughput": 9034.75, "total_tokens": 11484992} +{"current_steps": 17060, "total_steps": 204665, "loss": 0.0291, "lr": 1.666976107880979e-06, "epoch": 0.4167786382625266, "percentage": 8.34, "elapsed_time": "0:21:11", "remaining_time": "3:53:02", "throughput": 9034.95, "total_tokens": 11488384} +{"current_steps": 17065, "total_steps": 204665, "loss": 0.1093, "lr": 1.6674646992719987e-06, "epoch": 0.4169007890943737, "percentage": 8.34, "elapsed_time": "0:21:11", "remaining_time": "3:53:02", "throughput": 9035.11, "total_tokens": 11491712} +{"current_steps": 17070, "total_steps": 204665, "loss": 0.0532, "lr": 1.6679532906630186e-06, "epoch": 0.4170229399262209, "percentage": 8.34, "elapsed_time": "0:21:12", "remaining_time": "3:53:01", "throughput": 9035.37, "total_tokens": 11495232} +{"current_steps": 17075, "total_steps": 204665, "loss": 0.2184, "lr": 1.6684418820540382e-06, "epoch": 0.41714509075806805, "percentage": 8.34, "elapsed_time": "0:21:12", "remaining_time": "3:53:00", "throughput": 9035.37, "total_tokens": 11498304} +{"current_steps": 17080, "total_steps": 204665, "loss": 0.1244, "lr": 1.6689304734450577e-06, "epoch": 0.41726724158991524, "percentage": 8.35, "elapsed_time": "0:21:12", "remaining_time": "3:53:00", "throughput": 9035.53, "total_tokens": 11501632} +{"current_steps": 17085, "total_steps": 204665, "loss": 0.0564, "lr": 1.6694190648360776e-06, "epoch": 0.4173893924217624, "percentage": 8.35, "elapsed_time": "0:21:13", "remaining_time": "3:52:59", "throughput": 9035.79, "total_tokens": 11505152} +{"current_steps": 17090, "total_steps": 204665, "loss": 0.0455, "lr": 1.6699076562270973e-06, "epoch": 0.41751154325360956, "percentage": 8.35, "elapsed_time": "0:21:13", "remaining_time": "3:52:58", "throughput": 9035.87, "total_tokens": 11508352} +{"current_steps": 17095, "total_steps": 204665, "loss": 0.138, "lr": 1.6703962476181168e-06, "epoch": 0.4176336940854567, "percentage": 8.35, "elapsed_time": "0:21:13", "remaining_time": "3:52:58", "throughput": 9036.33, "total_tokens": 11512192} +{"current_steps": 17100, "total_steps": 204665, "loss": 0.1228, "lr": 1.6708848390091366e-06, "epoch": 0.4177558449173039, "percentage": 8.36, "elapsed_time": "0:21:14", "remaining_time": "3:52:57", "throughput": 9036.46, "total_tokens": 11515456} +{"current_steps": 17105, "total_steps": 204665, "loss": 0.1136, "lr": 1.6713734304001563e-06, "epoch": 0.4178779957491511, "percentage": 8.36, "elapsed_time": "0:21:14", "remaining_time": "3:52:57", "throughput": 9036.32, "total_tokens": 11518336} +{"current_steps": 17110, "total_steps": 204665, "loss": 0.0804, "lr": 1.6718620217911758e-06, "epoch": 0.4180001465809982, "percentage": 8.36, "elapsed_time": "0:21:15", "remaining_time": "3:52:56", "throughput": 9036.65, "total_tokens": 11521920} +{"current_steps": 17115, "total_steps": 204665, "loss": 0.0496, "lr": 1.6723506131821957e-06, "epoch": 0.4181222974128454, "percentage": 8.36, "elapsed_time": "0:21:15", "remaining_time": "3:52:55", "throughput": 9036.81, "total_tokens": 11525248} +{"current_steps": 17120, "total_steps": 204665, "loss": 0.055, "lr": 1.6728392045732154e-06, "epoch": 0.41824444824469253, "percentage": 8.36, "elapsed_time": "0:21:15", "remaining_time": "3:52:55", "throughput": 9036.75, "total_tokens": 11528256} +{"current_steps": 17125, "total_steps": 204665, "loss": 0.1625, "lr": 1.673327795964235e-06, "epoch": 0.4183665990765397, "percentage": 8.37, "elapsed_time": "0:21:16", "remaining_time": "3:52:54", "throughput": 9036.72, "total_tokens": 11531264} +{"current_steps": 17130, "total_steps": 204665, "loss": 0.1851, "lr": 1.6738163873552547e-06, "epoch": 0.41848874990838686, "percentage": 8.37, "elapsed_time": "0:21:16", "remaining_time": "3:52:53", "throughput": 9036.99, "total_tokens": 11534784} +{"current_steps": 17135, "total_steps": 204665, "loss": 0.2029, "lr": 1.6743049787462744e-06, "epoch": 0.41861090074023405, "percentage": 8.37, "elapsed_time": "0:21:16", "remaining_time": "3:52:53", "throughput": 9037.61, "total_tokens": 11538880} +{"current_steps": 17140, "total_steps": 204665, "loss": 0.2042, "lr": 1.674793570137294e-06, "epoch": 0.4187330515720812, "percentage": 8.37, "elapsed_time": "0:21:17", "remaining_time": "3:52:52", "throughput": 9037.67, "total_tokens": 11542080} +{"current_steps": 17145, "total_steps": 204665, "loss": 0.0884, "lr": 1.6752821615283138e-06, "epoch": 0.4188552024039284, "percentage": 8.38, "elapsed_time": "0:21:17", "remaining_time": "3:52:51", "throughput": 9037.97, "total_tokens": 11545664} +{"current_steps": 17150, "total_steps": 204665, "loss": 0.0796, "lr": 1.6757707529193335e-06, "epoch": 0.41897735323577556, "percentage": 8.38, "elapsed_time": "0:21:17", "remaining_time": "3:52:51", "throughput": 9038.16, "total_tokens": 11549056} +{"current_steps": 17155, "total_steps": 204665, "loss": 0.166, "lr": 1.6762593443103531e-06, "epoch": 0.4190995040676227, "percentage": 8.38, "elapsed_time": "0:21:18", "remaining_time": "3:52:50", "throughput": 9038.06, "total_tokens": 11552000} +{"current_steps": 17160, "total_steps": 204665, "loss": 0.2232, "lr": 1.676747935701373e-06, "epoch": 0.4192216548994699, "percentage": 8.38, "elapsed_time": "0:21:18", "remaining_time": "3:52:49", "throughput": 9038.02, "total_tokens": 11555008} +{"current_steps": 17165, "total_steps": 204665, "loss": 0.1723, "lr": 1.6772365270923925e-06, "epoch": 0.419343805731317, "percentage": 8.39, "elapsed_time": "0:21:18", "remaining_time": "3:52:49", "throughput": 9038.43, "total_tokens": 11558720} +{"current_steps": 17170, "total_steps": 204665, "loss": 0.0894, "lr": 1.6777251184834122e-06, "epoch": 0.4194659565631642, "percentage": 8.39, "elapsed_time": "0:21:19", "remaining_time": "3:52:48", "throughput": 9038.58, "total_tokens": 11562048} +{"current_steps": 17175, "total_steps": 204665, "loss": 0.0682, "lr": 1.678213709874432e-06, "epoch": 0.41958810739501134, "percentage": 8.39, "elapsed_time": "0:21:19", "remaining_time": "3:52:48", "throughput": 9038.88, "total_tokens": 11565632} +{"current_steps": 17180, "total_steps": 204665, "loss": 0.1125, "lr": 1.6787023012654515e-06, "epoch": 0.41971025822685853, "percentage": 8.39, "elapsed_time": "0:21:19", "remaining_time": "3:52:47", "throughput": 9039.14, "total_tokens": 11569152} +{"current_steps": 17185, "total_steps": 204665, "loss": 0.18, "lr": 1.6791908926564712e-06, "epoch": 0.41983240905870567, "percentage": 8.4, "elapsed_time": "0:21:20", "remaining_time": "3:52:46", "throughput": 9039.25, "total_tokens": 11572416} +{"current_steps": 17190, "total_steps": 204665, "loss": 0.0782, "lr": 1.6796794840474911e-06, "epoch": 0.41995455989055286, "percentage": 8.4, "elapsed_time": "0:21:20", "remaining_time": "3:52:46", "throughput": 9039.26, "total_tokens": 11575552} +{"current_steps": 17195, "total_steps": 204665, "loss": 0.0783, "lr": 1.6801680754385108e-06, "epoch": 0.4200767107224, "percentage": 8.4, "elapsed_time": "0:21:20", "remaining_time": "3:52:45", "throughput": 9039.28, "total_tokens": 11578688} +{"current_steps": 17200, "total_steps": 204665, "loss": 0.2211, "lr": 1.6806566668295303e-06, "epoch": 0.4201988615542472, "percentage": 8.4, "elapsed_time": "0:21:21", "remaining_time": "3:52:44", "throughput": 9039.2, "total_tokens": 11581632} +{"current_steps": 17205, "total_steps": 204665, "loss": 0.2273, "lr": 1.6811452582205502e-06, "epoch": 0.4203210123860944, "percentage": 8.41, "elapsed_time": "0:21:21", "remaining_time": "3:52:44", "throughput": 9039.32, "total_tokens": 11584896} +{"current_steps": 17210, "total_steps": 204665, "loss": 0.0593, "lr": 1.6816338496115698e-06, "epoch": 0.4204431632179415, "percentage": 8.41, "elapsed_time": "0:21:21", "remaining_time": "3:52:43", "throughput": 9039.6, "total_tokens": 11588416} +{"current_steps": 17215, "total_steps": 204665, "loss": 0.0771, "lr": 1.6821224410025893e-06, "epoch": 0.4205653140497887, "percentage": 8.41, "elapsed_time": "0:21:22", "remaining_time": "3:52:42", "throughput": 9039.59, "total_tokens": 11591488} +{"current_steps": 17220, "total_steps": 204665, "loss": 0.094, "lr": 1.6826110323936092e-06, "epoch": 0.42068746488163583, "percentage": 8.41, "elapsed_time": "0:21:22", "remaining_time": "3:52:42", "throughput": 9039.82, "total_tokens": 11594944} +{"current_steps": 17225, "total_steps": 204665, "loss": 0.1358, "lr": 1.6830996237846289e-06, "epoch": 0.420809615713483, "percentage": 8.42, "elapsed_time": "0:21:22", "remaining_time": "3:52:41", "throughput": 9039.78, "total_tokens": 11597952} +{"current_steps": 17230, "total_steps": 204665, "loss": 0.1798, "lr": 1.6835882151756484e-06, "epoch": 0.42093176654533015, "percentage": 8.42, "elapsed_time": "0:21:23", "remaining_time": "3:52:40", "throughput": 9039.97, "total_tokens": 11601344} +{"current_steps": 17235, "total_steps": 204665, "loss": 0.1284, "lr": 1.6840768065666683e-06, "epoch": 0.42105391737717734, "percentage": 8.42, "elapsed_time": "0:21:23", "remaining_time": "3:52:40", "throughput": 9040.25, "total_tokens": 11604864} +{"current_steps": 17240, "total_steps": 204665, "loss": 0.0769, "lr": 1.684565397957688e-06, "epoch": 0.4211760682090245, "percentage": 8.42, "elapsed_time": "0:21:24", "remaining_time": "3:52:39", "throughput": 9040.46, "total_tokens": 11608320} +{"current_steps": 17245, "total_steps": 204665, "loss": 0.1262, "lr": 1.6850539893487076e-06, "epoch": 0.42129821904087167, "percentage": 8.43, "elapsed_time": "0:21:24", "remaining_time": "3:52:38", "throughput": 9040.82, "total_tokens": 11611968} +{"current_steps": 17250, "total_steps": 204665, "loss": 0.1179, "lr": 1.6855425807397273e-06, "epoch": 0.42142036987271886, "percentage": 8.43, "elapsed_time": "0:21:24", "remaining_time": "3:52:38", "throughput": 9041.31, "total_tokens": 11615872} +{"current_steps": 17255, "total_steps": 204665, "loss": 0.0897, "lr": 1.686031172130747e-06, "epoch": 0.421542520704566, "percentage": 8.43, "elapsed_time": "0:21:25", "remaining_time": "3:52:37", "throughput": 9041.42, "total_tokens": 11619136} +{"current_steps": 17260, "total_steps": 204665, "loss": 0.1211, "lr": 1.6865197635217667e-06, "epoch": 0.4216646715364132, "percentage": 8.43, "elapsed_time": "0:21:25", "remaining_time": "3:52:37", "throughput": 9041.52, "total_tokens": 11622400} +{"current_steps": 17265, "total_steps": 204665, "loss": 0.117, "lr": 1.6870083549127863e-06, "epoch": 0.4217868223682603, "percentage": 8.44, "elapsed_time": "0:21:25", "remaining_time": "3:52:36", "throughput": 9041.6, "total_tokens": 11625600} +{"current_steps": 17270, "total_steps": 204665, "loss": 0.2705, "lr": 1.687496946303806e-06, "epoch": 0.4219089732001075, "percentage": 8.44, "elapsed_time": "0:21:26", "remaining_time": "3:52:35", "throughput": 9041.78, "total_tokens": 11628928} +{"current_steps": 17275, "total_steps": 204665, "loss": 0.1376, "lr": 1.6879855376948257e-06, "epoch": 0.42203112403195464, "percentage": 8.44, "elapsed_time": "0:21:26", "remaining_time": "3:52:35", "throughput": 9041.89, "total_tokens": 11632192} +{"current_steps": 17280, "total_steps": 204665, "loss": 0.109, "lr": 1.6884741290858456e-06, "epoch": 0.42215327486380183, "percentage": 8.44, "elapsed_time": "0:21:26", "remaining_time": "3:52:34", "throughput": 9041.83, "total_tokens": 11635200} +{"current_steps": 17285, "total_steps": 204665, "loss": 0.0928, "lr": 1.688962720476865e-06, "epoch": 0.42227542569564896, "percentage": 8.45, "elapsed_time": "0:21:27", "remaining_time": "3:52:33", "throughput": 9041.86, "total_tokens": 11638336} +{"current_steps": 17290, "total_steps": 204665, "loss": 0.0988, "lr": 1.6894513118678847e-06, "epoch": 0.42239757652749615, "percentage": 8.45, "elapsed_time": "0:21:27", "remaining_time": "3:52:33", "throughput": 9042.25, "total_tokens": 11642048} +{"current_steps": 17295, "total_steps": 204665, "loss": 0.182, "lr": 1.6899399032589046e-06, "epoch": 0.42251972735934334, "percentage": 8.45, "elapsed_time": "0:21:27", "remaining_time": "3:52:32", "throughput": 9042.32, "total_tokens": 11645248} +{"current_steps": 17300, "total_steps": 204665, "loss": 0.1811, "lr": 1.6904284946499241e-06, "epoch": 0.4226418781911905, "percentage": 8.45, "elapsed_time": "0:21:28", "remaining_time": "3:52:31", "throughput": 9042.58, "total_tokens": 11648768} +{"current_steps": 17305, "total_steps": 204665, "loss": 0.0477, "lr": 1.6909170860409438e-06, "epoch": 0.42276402902303767, "percentage": 8.46, "elapsed_time": "0:21:28", "remaining_time": "3:52:31", "throughput": 9042.7, "total_tokens": 11652032} +{"current_steps": 17310, "total_steps": 204665, "loss": 0.082, "lr": 1.6914056774319637e-06, "epoch": 0.4228861798548848, "percentage": 8.46, "elapsed_time": "0:21:28", "remaining_time": "3:52:30", "throughput": 9042.7, "total_tokens": 11655104} +{"current_steps": 17315, "total_steps": 204665, "loss": 0.2205, "lr": 1.6918942688229834e-06, "epoch": 0.423008330686732, "percentage": 8.46, "elapsed_time": "0:21:29", "remaining_time": "3:52:29", "throughput": 9042.99, "total_tokens": 11658624} +{"current_steps": 17320, "total_steps": 204665, "loss": 0.1187, "lr": 1.6923828602140028e-06, "epoch": 0.4231304815185791, "percentage": 8.46, "elapsed_time": "0:21:29", "remaining_time": "3:52:28", "throughput": 9042.83, "total_tokens": 11661440} +{"current_steps": 17325, "total_steps": 204665, "loss": 0.0832, "lr": 1.6928714516050227e-06, "epoch": 0.4232526323504263, "percentage": 8.47, "elapsed_time": "0:21:29", "remaining_time": "3:52:28", "throughput": 9042.81, "total_tokens": 11664512} +{"current_steps": 17330, "total_steps": 204665, "loss": 0.2175, "lr": 1.6933600429960424e-06, "epoch": 0.42337478318227345, "percentage": 8.47, "elapsed_time": "0:21:30", "remaining_time": "3:52:27", "throughput": 9042.85, "total_tokens": 11667648} +{"current_steps": 17335, "total_steps": 204665, "loss": 0.1996, "lr": 1.6938486343870619e-06, "epoch": 0.42349693401412064, "percentage": 8.47, "elapsed_time": "0:21:30", "remaining_time": "3:52:26", "throughput": 9042.89, "total_tokens": 11670784} +{"current_steps": 17340, "total_steps": 204665, "loss": 0.0694, "lr": 1.6943372257780818e-06, "epoch": 0.4236190848459678, "percentage": 8.47, "elapsed_time": "0:21:30", "remaining_time": "3:52:26", "throughput": 9042.84, "total_tokens": 11673792} +{"current_steps": 17345, "total_steps": 204665, "loss": 0.0343, "lr": 1.6948258171691015e-06, "epoch": 0.42374123567781496, "percentage": 8.47, "elapsed_time": "0:21:31", "remaining_time": "3:52:25", "throughput": 9043.28, "total_tokens": 11677568} +{"current_steps": 17350, "total_steps": 204665, "loss": 0.1287, "lr": 1.6953144085601211e-06, "epoch": 0.42386338650966215, "percentage": 8.48, "elapsed_time": "0:21:31", "remaining_time": "3:52:24", "throughput": 9043.63, "total_tokens": 11681216} +{"current_steps": 17355, "total_steps": 204665, "loss": 0.2124, "lr": 1.6958029999511408e-06, "epoch": 0.4239855373415093, "percentage": 8.48, "elapsed_time": "0:21:31", "remaining_time": "3:52:24", "throughput": 9043.78, "total_tokens": 11684544} +{"current_steps": 17360, "total_steps": 204665, "loss": 0.1128, "lr": 1.6962915913421605e-06, "epoch": 0.4241076881733565, "percentage": 8.48, "elapsed_time": "0:21:32", "remaining_time": "3:52:23", "throughput": 9043.86, "total_tokens": 11687744} +{"current_steps": 17365, "total_steps": 204665, "loss": 0.0695, "lr": 1.6967801827331802e-06, "epoch": 0.4242298390052036, "percentage": 8.48, "elapsed_time": "0:21:32", "remaining_time": "3:52:23", "throughput": 9044.02, "total_tokens": 11691136} +{"current_steps": 17370, "total_steps": 204665, "loss": 0.0546, "lr": 1.6972687741241999e-06, "epoch": 0.4243519898370508, "percentage": 8.49, "elapsed_time": "0:21:33", "remaining_time": "3:52:22", "throughput": 9044.25, "total_tokens": 11694592} +{"current_steps": 17375, "total_steps": 204665, "loss": 0.1033, "lr": 1.6977573655152195e-06, "epoch": 0.42447414066889794, "percentage": 8.49, "elapsed_time": "0:21:33", "remaining_time": "3:52:21", "throughput": 9044.37, "total_tokens": 11697856} +{"current_steps": 17380, "total_steps": 204665, "loss": 0.0761, "lr": 1.6982459569062392e-06, "epoch": 0.4245962915007451, "percentage": 8.49, "elapsed_time": "0:21:33", "remaining_time": "3:52:21", "throughput": 9044.84, "total_tokens": 11701696} +{"current_steps": 17385, "total_steps": 204665, "loss": 0.1001, "lr": 1.698734548297259e-06, "epoch": 0.42471844233259226, "percentage": 8.49, "elapsed_time": "0:21:34", "remaining_time": "3:52:20", "throughput": 9044.88, "total_tokens": 11704832} +{"current_steps": 17390, "total_steps": 204665, "loss": 0.0872, "lr": 1.6992231396882786e-06, "epoch": 0.42484059316443945, "percentage": 8.5, "elapsed_time": "0:21:34", "remaining_time": "3:52:19", "throughput": 9045.01, "total_tokens": 11708096} +{"current_steps": 17395, "total_steps": 204665, "loss": 0.1583, "lr": 1.6997117310792983e-06, "epoch": 0.42496274399628664, "percentage": 8.5, "elapsed_time": "0:21:34", "remaining_time": "3:52:19", "throughput": 9044.9, "total_tokens": 11710976} +{"current_steps": 17400, "total_steps": 204665, "loss": 0.0655, "lr": 1.7002003224703182e-06, "epoch": 0.4250848948281338, "percentage": 8.5, "elapsed_time": "0:21:35", "remaining_time": "3:52:18", "throughput": 9045.21, "total_tokens": 11714560} +{"current_steps": 17405, "total_steps": 204665, "loss": 0.2183, "lr": 1.7006889138613376e-06, "epoch": 0.42520704565998096, "percentage": 8.5, "elapsed_time": "0:21:35", "remaining_time": "3:52:17", "throughput": 9045.28, "total_tokens": 11717760} +{"current_steps": 17410, "total_steps": 204665, "loss": 0.1628, "lr": 1.7011775052523573e-06, "epoch": 0.4253291964918281, "percentage": 8.51, "elapsed_time": "0:21:35", "remaining_time": "3:52:17", "throughput": 9045.46, "total_tokens": 11721152} +{"current_steps": 17415, "total_steps": 204665, "loss": 0.0634, "lr": 1.7016660966433772e-06, "epoch": 0.4254513473236753, "percentage": 8.51, "elapsed_time": "0:21:36", "remaining_time": "3:52:16", "throughput": 9045.46, "total_tokens": 11724224} +{"current_steps": 17420, "total_steps": 204665, "loss": 0.0711, "lr": 1.7021546880343967e-06, "epoch": 0.4255734981555224, "percentage": 8.51, "elapsed_time": "0:21:36", "remaining_time": "3:52:15", "throughput": 9045.44, "total_tokens": 11727296} +{"current_steps": 17425, "total_steps": 204665, "loss": 0.0998, "lr": 1.7026432794254164e-06, "epoch": 0.4256956489873696, "percentage": 8.51, "elapsed_time": "0:21:36", "remaining_time": "3:52:15", "throughput": 9045.59, "total_tokens": 11730624} +{"current_steps": 17430, "total_steps": 204665, "loss": 0.1372, "lr": 1.7031318708164362e-06, "epoch": 0.42581779981921675, "percentage": 8.52, "elapsed_time": "0:21:37", "remaining_time": "3:52:14", "throughput": 9045.7, "total_tokens": 11733888} +{"current_steps": 17435, "total_steps": 204665, "loss": 0.1501, "lr": 1.703620462207456e-06, "epoch": 0.42593995065106394, "percentage": 8.52, "elapsed_time": "0:21:37", "remaining_time": "3:52:13", "throughput": 9045.8, "total_tokens": 11737152} +{"current_steps": 17440, "total_steps": 204665, "loss": 0.2104, "lr": 1.7041090535984754e-06, "epoch": 0.42606210148291107, "percentage": 8.52, "elapsed_time": "0:21:37", "remaining_time": "3:52:13", "throughput": 9045.87, "total_tokens": 11740352} +{"current_steps": 17445, "total_steps": 204665, "loss": 0.1267, "lr": 1.7045976449894953e-06, "epoch": 0.42618425231475826, "percentage": 8.52, "elapsed_time": "0:21:38", "remaining_time": "3:52:12", "throughput": 9046.42, "total_tokens": 11744320} +{"current_steps": 17450, "total_steps": 204665, "loss": 0.0531, "lr": 1.705086236380515e-06, "epoch": 0.42630640314660545, "percentage": 8.53, "elapsed_time": "0:21:38", "remaining_time": "3:52:11", "throughput": 9046.39, "total_tokens": 11747328} +{"current_steps": 17455, "total_steps": 204665, "loss": 0.1527, "lr": 1.7055748277715344e-06, "epoch": 0.4264285539784526, "percentage": 8.53, "elapsed_time": "0:21:38", "remaining_time": "3:52:11", "throughput": 9046.32, "total_tokens": 11750272} +{"current_steps": 17460, "total_steps": 204665, "loss": 0.1504, "lr": 1.7060634191625543e-06, "epoch": 0.4265507048102998, "percentage": 8.53, "elapsed_time": "0:21:39", "remaining_time": "3:52:10", "throughput": 9046.6, "total_tokens": 11753856} +{"current_steps": 17465, "total_steps": 204665, "loss": 0.0409, "lr": 1.706552010553574e-06, "epoch": 0.4266728556421469, "percentage": 8.53, "elapsed_time": "0:21:39", "remaining_time": "3:52:09", "throughput": 9046.58, "total_tokens": 11756928} +{"current_steps": 17470, "total_steps": 204665, "loss": 0.2459, "lr": 1.7070406019445937e-06, "epoch": 0.4267950064739941, "percentage": 8.54, "elapsed_time": "0:21:39", "remaining_time": "3:52:09", "throughput": 9046.68, "total_tokens": 11760128} +{"current_steps": 17475, "total_steps": 204665, "loss": 0.0789, "lr": 1.7075291933356134e-06, "epoch": 0.42691715730584123, "percentage": 8.54, "elapsed_time": "0:21:40", "remaining_time": "3:52:08", "throughput": 9046.89, "total_tokens": 11763584} +{"current_steps": 17480, "total_steps": 204665, "loss": 0.1138, "lr": 1.708017784726633e-06, "epoch": 0.4270393081376884, "percentage": 8.54, "elapsed_time": "0:21:40", "remaining_time": "3:52:07", "throughput": 9047.02, "total_tokens": 11766912} +{"current_steps": 17485, "total_steps": 204665, "loss": 0.0573, "lr": 1.7085063761176527e-06, "epoch": 0.42716145896953556, "percentage": 8.54, "elapsed_time": "0:21:41", "remaining_time": "3:52:07", "throughput": 9047.65, "total_tokens": 11771008} +{"current_steps": 17490, "total_steps": 204665, "loss": 0.1585, "lr": 1.7089949675086724e-06, "epoch": 0.42728360980138275, "percentage": 8.55, "elapsed_time": "0:21:41", "remaining_time": "3:52:06", "throughput": 9047.57, "total_tokens": 11773952} +{"current_steps": 17495, "total_steps": 204665, "loss": 0.0502, "lr": 1.7094835588996921e-06, "epoch": 0.42740576063322994, "percentage": 8.55, "elapsed_time": "0:21:41", "remaining_time": "3:52:06", "throughput": 9047.75, "total_tokens": 11777344} +{"current_steps": 17500, "total_steps": 204665, "loss": 0.1001, "lr": 1.7099721502907118e-06, "epoch": 0.42752791146507707, "percentage": 8.55, "elapsed_time": "0:21:42", "remaining_time": "3:52:05", "throughput": 9048.05, "total_tokens": 11780928} +{"current_steps": 17505, "total_steps": 204665, "loss": 0.1082, "lr": 1.7104607416817317e-06, "epoch": 0.42765006229692426, "percentage": 8.55, "elapsed_time": "0:21:42", "remaining_time": "3:52:04", "throughput": 9048.37, "total_tokens": 11784512} +{"current_steps": 17510, "total_steps": 204665, "loss": 0.1218, "lr": 1.7109493330727512e-06, "epoch": 0.4277722131287714, "percentage": 8.56, "elapsed_time": "0:21:42", "remaining_time": "3:52:04", "throughput": 9048.47, "total_tokens": 11787776} +{"current_steps": 17515, "total_steps": 204665, "loss": 0.1005, "lr": 1.7114379244637708e-06, "epoch": 0.4278943639606186, "percentage": 8.56, "elapsed_time": "0:21:43", "remaining_time": "3:52:03", "throughput": 9048.76, "total_tokens": 11791360} +{"current_steps": 17520, "total_steps": 204665, "loss": 0.0767, "lr": 1.7119265158547907e-06, "epoch": 0.4280165147924657, "percentage": 8.56, "elapsed_time": "0:21:43", "remaining_time": "3:52:02", "throughput": 9048.85, "total_tokens": 11794560} +{"current_steps": 17525, "total_steps": 204665, "loss": 0.1079, "lr": 1.7124151072458102e-06, "epoch": 0.4281386656243129, "percentage": 8.56, "elapsed_time": "0:21:43", "remaining_time": "3:52:02", "throughput": 9048.84, "total_tokens": 11797632} +{"current_steps": 17530, "total_steps": 204665, "loss": 0.1193, "lr": 1.7129036986368299e-06, "epoch": 0.42826081645616004, "percentage": 8.57, "elapsed_time": "0:21:44", "remaining_time": "3:52:01", "throughput": 9049.01, "total_tokens": 11801024} +{"current_steps": 17535, "total_steps": 204665, "loss": 0.157, "lr": 1.7133922900278498e-06, "epoch": 0.42838296728800723, "percentage": 8.57, "elapsed_time": "0:21:44", "remaining_time": "3:52:01", "throughput": 9049.4, "total_tokens": 11804736} +{"current_steps": 17540, "total_steps": 204665, "loss": 0.235, "lr": 1.7138808814188692e-06, "epoch": 0.4285051181198544, "percentage": 8.57, "elapsed_time": "0:21:44", "remaining_time": "3:52:00", "throughput": 9049.82, "total_tokens": 11808512} +{"current_steps": 17545, "total_steps": 204665, "loss": 0.1438, "lr": 1.714369472809889e-06, "epoch": 0.42862726895170156, "percentage": 8.57, "elapsed_time": "0:21:45", "remaining_time": "3:51:59", "throughput": 9049.81, "total_tokens": 11811584} +{"current_steps": 17550, "total_steps": 204665, "loss": 0.1372, "lr": 1.7148580642009088e-06, "epoch": 0.42874941978354875, "percentage": 8.57, "elapsed_time": "0:21:45", "remaining_time": "3:51:59", "throughput": 9049.93, "total_tokens": 11814848} +{"current_steps": 17555, "total_steps": 204665, "loss": 0.0802, "lr": 1.7153466555919285e-06, "epoch": 0.4288715706153959, "percentage": 8.58, "elapsed_time": "0:21:45", "remaining_time": "3:51:58", "throughput": 9050.18, "total_tokens": 11818368} +{"current_steps": 17560, "total_steps": 204665, "loss": 0.133, "lr": 1.715835246982948e-06, "epoch": 0.42899372144724307, "percentage": 8.58, "elapsed_time": "0:21:46", "remaining_time": "3:51:58", "throughput": 9050.3, "total_tokens": 11821696} +{"current_steps": 17565, "total_steps": 204665, "loss": 0.1478, "lr": 1.7163238383739679e-06, "epoch": 0.4291158722790902, "percentage": 8.58, "elapsed_time": "0:21:46", "remaining_time": "3:51:57", "throughput": 9050.48, "total_tokens": 11825088} +{"current_steps": 17570, "total_steps": 204665, "loss": 0.0598, "lr": 1.7168124297649875e-06, "epoch": 0.4292380231109374, "percentage": 8.58, "elapsed_time": "0:21:46", "remaining_time": "3:51:56", "throughput": 9050.65, "total_tokens": 11828480} +{"current_steps": 17575, "total_steps": 204665, "loss": 0.1228, "lr": 1.717301021156007e-06, "epoch": 0.42936017394278453, "percentage": 8.59, "elapsed_time": "0:21:47", "remaining_time": "3:51:56", "throughput": 9050.87, "total_tokens": 11831936} +{"current_steps": 17580, "total_steps": 204665, "loss": 0.0906, "lr": 1.717789612547027e-06, "epoch": 0.4294823247746317, "percentage": 8.59, "elapsed_time": "0:21:47", "remaining_time": "3:51:55", "throughput": 9051.07, "total_tokens": 11835328} +{"current_steps": 17585, "total_steps": 204665, "loss": 0.078, "lr": 1.7182782039380466e-06, "epoch": 0.42960447560647885, "percentage": 8.59, "elapsed_time": "0:21:47", "remaining_time": "3:51:54", "throughput": 9051.25, "total_tokens": 11838720} +{"current_steps": 17590, "total_steps": 204665, "loss": 0.0597, "lr": 1.7187667953290663e-06, "epoch": 0.42972662643832604, "percentage": 8.59, "elapsed_time": "0:21:48", "remaining_time": "3:51:54", "throughput": 9051.44, "total_tokens": 11842112} +{"current_steps": 17595, "total_steps": 204665, "loss": 0.2555, "lr": 1.719255386720086e-06, "epoch": 0.42984877727017323, "percentage": 8.6, "elapsed_time": "0:21:48", "remaining_time": "3:51:53", "throughput": 9051.78, "total_tokens": 11845760} +{"current_steps": 17600, "total_steps": 204665, "loss": 0.2048, "lr": 1.7197439781111056e-06, "epoch": 0.42997092810202037, "percentage": 8.6, "elapsed_time": "0:21:49", "remaining_time": "3:51:53", "throughput": 9051.91, "total_tokens": 11849088} +{"current_steps": 17605, "total_steps": 204665, "loss": 0.1001, "lr": 1.7202325695021253e-06, "epoch": 0.43009307893386756, "percentage": 8.6, "elapsed_time": "0:21:49", "remaining_time": "3:51:52", "throughput": 9051.9, "total_tokens": 11852160} +{"current_steps": 17610, "total_steps": 204665, "loss": 0.0159, "lr": 1.720721160893145e-06, "epoch": 0.4302152297657147, "percentage": 8.6, "elapsed_time": "0:21:49", "remaining_time": "3:51:51", "throughput": 9051.9, "total_tokens": 11855232} +{"current_steps": 17615, "total_steps": 204665, "loss": 0.149, "lr": 1.7212097522841647e-06, "epoch": 0.4303373805975619, "percentage": 8.61, "elapsed_time": "0:21:50", "remaining_time": "3:51:51", "throughput": 9051.98, "total_tokens": 11858432} +{"current_steps": 17620, "total_steps": 204665, "loss": 0.1561, "lr": 1.7216983436751844e-06, "epoch": 0.430459531429409, "percentage": 8.61, "elapsed_time": "0:21:50", "remaining_time": "3:51:50", "throughput": 9052.46, "total_tokens": 11862336} +{"current_steps": 17625, "total_steps": 204665, "loss": 0.1548, "lr": 1.7221869350662042e-06, "epoch": 0.4305816822612562, "percentage": 8.61, "elapsed_time": "0:21:50", "remaining_time": "3:51:49", "throughput": 9052.43, "total_tokens": 11865344} +{"current_steps": 17630, "total_steps": 204665, "loss": 0.1538, "lr": 1.7226755264572237e-06, "epoch": 0.43070383309310334, "percentage": 8.61, "elapsed_time": "0:21:51", "remaining_time": "3:51:49", "throughput": 9052.57, "total_tokens": 11868672} +{"current_steps": 17635, "total_steps": 204665, "loss": 0.1825, "lr": 1.7231641178482434e-06, "epoch": 0.43082598392495053, "percentage": 8.62, "elapsed_time": "0:21:51", "remaining_time": "3:51:48", "throughput": 9052.64, "total_tokens": 11871872} +{"current_steps": 17640, "total_steps": 204665, "loss": 0.169, "lr": 1.7236527092392633e-06, "epoch": 0.4309481347567977, "percentage": 8.62, "elapsed_time": "0:21:51", "remaining_time": "3:51:47", "throughput": 9052.87, "total_tokens": 11875328} +{"current_steps": 17645, "total_steps": 204665, "loss": 0.1005, "lr": 1.7241413006302828e-06, "epoch": 0.43107028558864485, "percentage": 8.62, "elapsed_time": "0:21:52", "remaining_time": "3:51:47", "throughput": 9052.97, "total_tokens": 11878592} +{"current_steps": 17650, "total_steps": 204665, "loss": 0.0513, "lr": 1.7246298920213024e-06, "epoch": 0.43119243642049204, "percentage": 8.62, "elapsed_time": "0:21:52", "remaining_time": "3:51:46", "throughput": 9053.16, "total_tokens": 11881984} +{"current_steps": 17655, "total_steps": 204665, "loss": 0.1343, "lr": 1.7251184834123223e-06, "epoch": 0.4313145872523392, "percentage": 8.63, "elapsed_time": "0:21:52", "remaining_time": "3:51:45", "throughput": 9053.11, "total_tokens": 11884992} +{"current_steps": 17660, "total_steps": 204665, "loss": 0.1457, "lr": 1.7256070748033418e-06, "epoch": 0.43143673808418637, "percentage": 8.63, "elapsed_time": "0:21:53", "remaining_time": "3:51:45", "throughput": 9053.3, "total_tokens": 11888384} +{"current_steps": 17665, "total_steps": 204665, "loss": 0.1399, "lr": 1.7260956661943615e-06, "epoch": 0.4315588889160335, "percentage": 8.63, "elapsed_time": "0:21:53", "remaining_time": "3:51:44", "throughput": 9053.37, "total_tokens": 11891584} +{"current_steps": 17670, "total_steps": 204665, "loss": 0.1125, "lr": 1.7265842575853814e-06, "epoch": 0.4316810397478807, "percentage": 8.63, "elapsed_time": "0:21:53", "remaining_time": "3:51:43", "throughput": 9053.35, "total_tokens": 11894656} +{"current_steps": 17675, "total_steps": 204665, "loss": 0.1155, "lr": 1.727072848976401e-06, "epoch": 0.4318031905797278, "percentage": 8.64, "elapsed_time": "0:21:54", "remaining_time": "3:51:43", "throughput": 9053.27, "total_tokens": 11897600} +{"current_steps": 17680, "total_steps": 204665, "loss": 0.0801, "lr": 1.7275614403674205e-06, "epoch": 0.431925341411575, "percentage": 8.64, "elapsed_time": "0:21:54", "remaining_time": "3:51:42", "throughput": 9053.39, "total_tokens": 11900864} +{"current_steps": 17685, "total_steps": 204665, "loss": 0.2049, "lr": 1.7280500317584404e-06, "epoch": 0.4320474922434222, "percentage": 8.64, "elapsed_time": "0:21:54", "remaining_time": "3:51:41", "throughput": 9053.28, "total_tokens": 11903744} +{"current_steps": 17690, "total_steps": 204665, "loss": 0.0588, "lr": 1.72853862314946e-06, "epoch": 0.43216964307526934, "percentage": 8.64, "elapsed_time": "0:21:55", "remaining_time": "3:51:41", "throughput": 9053.67, "total_tokens": 11907456} +{"current_steps": 17695, "total_steps": 204665, "loss": 0.1682, "lr": 1.7290272145404796e-06, "epoch": 0.43229179390711653, "percentage": 8.65, "elapsed_time": "0:21:55", "remaining_time": "3:51:40", "throughput": 9053.9, "total_tokens": 11910912} +{"current_steps": 17700, "total_steps": 204665, "loss": 0.1615, "lr": 1.7295158059314995e-06, "epoch": 0.43241394473896366, "percentage": 8.65, "elapsed_time": "0:21:55", "remaining_time": "3:51:39", "throughput": 9054.01, "total_tokens": 11914176} +{"current_steps": 17705, "total_steps": 204665, "loss": 0.1076, "lr": 1.7300043973225191e-06, "epoch": 0.43253609557081085, "percentage": 8.65, "elapsed_time": "0:21:56", "remaining_time": "3:51:39", "throughput": 9053.98, "total_tokens": 11917184} +{"current_steps": 17710, "total_steps": 204665, "loss": 0.1363, "lr": 1.7304929887135388e-06, "epoch": 0.432658246402658, "percentage": 8.65, "elapsed_time": "0:21:56", "remaining_time": "3:51:38", "throughput": 9053.93, "total_tokens": 11920192} +{"current_steps": 17715, "total_steps": 204665, "loss": 0.134, "lr": 1.7309815801045585e-06, "epoch": 0.4327803972345052, "percentage": 8.66, "elapsed_time": "0:21:56", "remaining_time": "3:51:37", "throughput": 9053.99, "total_tokens": 11923392} +{"current_steps": 17720, "total_steps": 204665, "loss": 0.1268, "lr": 1.7314701714955782e-06, "epoch": 0.4329025480663523, "percentage": 8.66, "elapsed_time": "0:21:57", "remaining_time": "3:51:37", "throughput": 9054.09, "total_tokens": 11926656} +{"current_steps": 17725, "total_steps": 204665, "loss": 0.121, "lr": 1.7319587628865979e-06, "epoch": 0.4330246988981995, "percentage": 8.66, "elapsed_time": "0:21:57", "remaining_time": "3:51:36", "throughput": 9054.47, "total_tokens": 11930432} +{"current_steps": 17730, "total_steps": 204665, "loss": 0.0776, "lr": 1.7324473542776176e-06, "epoch": 0.43314684973004663, "percentage": 8.66, "elapsed_time": "0:21:57", "remaining_time": "3:51:36", "throughput": 9054.71, "total_tokens": 11933952} +{"current_steps": 17735, "total_steps": 204665, "loss": 0.1244, "lr": 1.7329359456686372e-06, "epoch": 0.4332690005618938, "percentage": 8.67, "elapsed_time": "0:21:58", "remaining_time": "3:51:35", "throughput": 9054.75, "total_tokens": 11937088} +{"current_steps": 17740, "total_steps": 204665, "loss": 0.1057, "lr": 1.733424537059657e-06, "epoch": 0.433391151393741, "percentage": 8.67, "elapsed_time": "0:21:58", "remaining_time": "3:51:34", "throughput": 9054.67, "total_tokens": 11940032} +{"current_steps": 17745, "total_steps": 204665, "loss": 0.0861, "lr": 1.7339131284506768e-06, "epoch": 0.43351330222558815, "percentage": 8.67, "elapsed_time": "0:21:59", "remaining_time": "3:51:34", "throughput": 9054.99, "total_tokens": 11943680} +{"current_steps": 17750, "total_steps": 204665, "loss": 0.0573, "lr": 1.7344017198416963e-06, "epoch": 0.43363545305743534, "percentage": 8.67, "elapsed_time": "0:21:59", "remaining_time": "3:51:33", "throughput": 9055.44, "total_tokens": 11947520} +{"current_steps": 17755, "total_steps": 204665, "loss": 0.0298, "lr": 1.734890311232716e-06, "epoch": 0.4337576038892825, "percentage": 8.68, "elapsed_time": "0:21:59", "remaining_time": "3:51:32", "throughput": 9055.51, "total_tokens": 11950720} +{"current_steps": 17760, "total_steps": 204665, "loss": 0.1699, "lr": 1.7353789026237359e-06, "epoch": 0.43387975472112966, "percentage": 8.68, "elapsed_time": "0:22:00", "remaining_time": "3:51:32", "throughput": 9055.65, "total_tokens": 11954048} +{"current_steps": 17765, "total_steps": 204665, "loss": 0.0999, "lr": 1.7358674940147553e-06, "epoch": 0.4340019055529768, "percentage": 8.68, "elapsed_time": "0:22:00", "remaining_time": "3:51:31", "throughput": 9055.72, "total_tokens": 11957248} +{"current_steps": 17770, "total_steps": 204665, "loss": 0.1424, "lr": 1.736356085405775e-06, "epoch": 0.434124056384824, "percentage": 8.68, "elapsed_time": "0:22:00", "remaining_time": "3:51:30", "throughput": 9055.9, "total_tokens": 11960640} +{"current_steps": 17775, "total_steps": 204665, "loss": 0.212, "lr": 1.736844676796795e-06, "epoch": 0.4342462072166711, "percentage": 8.68, "elapsed_time": "0:22:01", "remaining_time": "3:51:30", "throughput": 9056.04, "total_tokens": 11963968} +{"current_steps": 17780, "total_steps": 204665, "loss": 0.2234, "lr": 1.7373332681878146e-06, "epoch": 0.4343683580485183, "percentage": 8.69, "elapsed_time": "0:22:01", "remaining_time": "3:51:29", "throughput": 9056.06, "total_tokens": 11967104} +{"current_steps": 17785, "total_steps": 204665, "loss": 0.055, "lr": 1.737821859578834e-06, "epoch": 0.4344905088803655, "percentage": 8.69, "elapsed_time": "0:22:01", "remaining_time": "3:51:29", "throughput": 9056.13, "total_tokens": 11970304} +{"current_steps": 17790, "total_steps": 204665, "loss": 0.1663, "lr": 1.738310450969854e-06, "epoch": 0.43461265971221263, "percentage": 8.69, "elapsed_time": "0:22:02", "remaining_time": "3:51:28", "throughput": 9056.57, "total_tokens": 11974144} +{"current_steps": 17795, "total_steps": 204665, "loss": 0.1038, "lr": 1.7387990423608736e-06, "epoch": 0.4347348105440598, "percentage": 8.69, "elapsed_time": "0:22:02", "remaining_time": "3:51:27", "throughput": 9056.59, "total_tokens": 11977280} +{"current_steps": 17800, "total_steps": 204665, "loss": 0.0562, "lr": 1.739287633751893e-06, "epoch": 0.43485696137590696, "percentage": 8.7, "elapsed_time": "0:22:02", "remaining_time": "3:51:27", "throughput": 9057.07, "total_tokens": 11981184} +{"current_steps": 17805, "total_steps": 204665, "loss": 0.028, "lr": 1.739776225142913e-06, "epoch": 0.43497911220775415, "percentage": 8.7, "elapsed_time": "0:22:03", "remaining_time": "3:51:26", "throughput": 9057.1, "total_tokens": 11984320} +{"current_steps": 17810, "total_steps": 204665, "loss": 0.054, "lr": 1.7402648165339327e-06, "epoch": 0.4351012630396013, "percentage": 8.7, "elapsed_time": "0:22:03", "remaining_time": "3:51:26", "throughput": 9057.29, "total_tokens": 11987712} +{"current_steps": 17815, "total_steps": 204665, "loss": 0.1537, "lr": 1.7407534079249521e-06, "epoch": 0.4352234138714485, "percentage": 8.7, "elapsed_time": "0:22:03", "remaining_time": "3:51:25", "throughput": 9057.46, "total_tokens": 11991104} +{"current_steps": 17820, "total_steps": 204665, "loss": 0.08, "lr": 1.741241999315972e-06, "epoch": 0.4353455647032956, "percentage": 8.71, "elapsed_time": "0:22:04", "remaining_time": "3:51:24", "throughput": 9057.76, "total_tokens": 11994688} +{"current_steps": 17825, "total_steps": 204665, "loss": 0.0853, "lr": 1.7417305907069917e-06, "epoch": 0.4354677155351428, "percentage": 8.71, "elapsed_time": "0:22:04", "remaining_time": "3:51:24", "throughput": 9058.13, "total_tokens": 11998400} +{"current_steps": 17830, "total_steps": 204665, "loss": 0.2011, "lr": 1.7422191820980114e-06, "epoch": 0.43558986636699, "percentage": 8.71, "elapsed_time": "0:22:04", "remaining_time": "3:51:23", "throughput": 9058.01, "total_tokens": 12001280} +{"current_steps": 17835, "total_steps": 204665, "loss": 0.0902, "lr": 1.742707773489031e-06, "epoch": 0.4357120171988371, "percentage": 8.71, "elapsed_time": "0:22:05", "remaining_time": "3:51:22", "throughput": 9058.07, "total_tokens": 12004480} +{"current_steps": 17840, "total_steps": 204665, "loss": 0.2976, "lr": 1.7431963648800508e-06, "epoch": 0.4358341680306843, "percentage": 8.72, "elapsed_time": "0:22:05", "remaining_time": "3:51:22", "throughput": 9058.41, "total_tokens": 12008128} +{"current_steps": 17845, "total_steps": 204665, "loss": 0.0274, "lr": 1.7436849562710704e-06, "epoch": 0.43595631886253144, "percentage": 8.72, "elapsed_time": "0:22:05", "remaining_time": "3:51:21", "throughput": 9058.54, "total_tokens": 12011456} +{"current_steps": 17850, "total_steps": 204665, "loss": 0.1043, "lr": 1.7441735476620901e-06, "epoch": 0.43607846969437863, "percentage": 8.72, "elapsed_time": "0:22:06", "remaining_time": "3:51:21", "throughput": 9058.84, "total_tokens": 12015040} +{"current_steps": 17855, "total_steps": 204665, "loss": 0.0902, "lr": 1.7446621390531098e-06, "epoch": 0.43620062052622577, "percentage": 8.72, "elapsed_time": "0:22:06", "remaining_time": "3:51:20", "throughput": 9058.83, "total_tokens": 12018112} +{"current_steps": 17860, "total_steps": 204665, "loss": 0.0827, "lr": 1.7451507304441295e-06, "epoch": 0.43632277135807296, "percentage": 8.73, "elapsed_time": "0:22:07", "remaining_time": "3:51:19", "throughput": 9059.02, "total_tokens": 12021504} +{"current_steps": 17865, "total_steps": 204665, "loss": 0.2358, "lr": 1.7456393218351494e-06, "epoch": 0.4364449221899201, "percentage": 8.73, "elapsed_time": "0:22:07", "remaining_time": "3:51:19", "throughput": 9059.08, "total_tokens": 12024704} +{"current_steps": 17870, "total_steps": 204665, "loss": 0.1437, "lr": 1.7461279132261688e-06, "epoch": 0.4365670730217673, "percentage": 8.73, "elapsed_time": "0:22:07", "remaining_time": "3:51:18", "throughput": 9059.56, "total_tokens": 12028544} +{"current_steps": 17875, "total_steps": 204665, "loss": 0.1021, "lr": 1.7466165046171885e-06, "epoch": 0.4366892238536144, "percentage": 8.73, "elapsed_time": "0:22:08", "remaining_time": "3:51:18", "throughput": 9059.89, "total_tokens": 12032192} +{"current_steps": 17880, "total_steps": 204665, "loss": 0.1488, "lr": 1.7471050960082084e-06, "epoch": 0.4368113746854616, "percentage": 8.74, "elapsed_time": "0:22:08", "remaining_time": "3:51:17", "throughput": 9060.04, "total_tokens": 12035520} +{"current_steps": 17885, "total_steps": 204665, "loss": 0.2402, "lr": 1.7475936873992279e-06, "epoch": 0.4369335255173088, "percentage": 8.74, "elapsed_time": "0:22:08", "remaining_time": "3:51:16", "throughput": 9060.19, "total_tokens": 12038848} +{"current_steps": 17890, "total_steps": 204665, "loss": 0.084, "lr": 1.7480822787902476e-06, "epoch": 0.43705567634915593, "percentage": 8.74, "elapsed_time": "0:22:09", "remaining_time": "3:51:16", "throughput": 9060.35, "total_tokens": 12042240} +{"current_steps": 17895, "total_steps": 204665, "loss": 0.0383, "lr": 1.7485708701812675e-06, "epoch": 0.4371778271810031, "percentage": 8.74, "elapsed_time": "0:22:09", "remaining_time": "3:51:15", "throughput": 9060.96, "total_tokens": 12046400} +{"current_steps": 17900, "total_steps": 204665, "loss": 0.0683, "lr": 1.7490594615722871e-06, "epoch": 0.43729997801285025, "percentage": 8.75, "elapsed_time": "0:22:09", "remaining_time": "3:51:15", "throughput": 9061.28, "total_tokens": 12050048} +{"current_steps": 17905, "total_steps": 204665, "loss": 0.1424, "lr": 1.7495480529633066e-06, "epoch": 0.43742212884469744, "percentage": 8.75, "elapsed_time": "0:22:10", "remaining_time": "3:51:14", "throughput": 9061.25, "total_tokens": 12053120} +{"current_steps": 17910, "total_steps": 204665, "loss": 0.0343, "lr": 1.7500366443543265e-06, "epoch": 0.4375442796765446, "percentage": 8.75, "elapsed_time": "0:22:10", "remaining_time": "3:51:14", "throughput": 9061.55, "total_tokens": 12056704} +{"current_steps": 17915, "total_steps": 204665, "loss": 0.0722, "lr": 1.7505252357453462e-06, "epoch": 0.43766643050839177, "percentage": 8.75, "elapsed_time": "0:22:10", "remaining_time": "3:51:13", "throughput": 9061.5, "total_tokens": 12059712} +{"current_steps": 17920, "total_steps": 204665, "loss": 0.0684, "lr": 1.7510138271363657e-06, "epoch": 0.4377885813402389, "percentage": 8.76, "elapsed_time": "0:22:11", "remaining_time": "3:51:12", "throughput": 9061.7, "total_tokens": 12063168} +{"current_steps": 17925, "total_steps": 204665, "loss": 0.0728, "lr": 1.7515024185273855e-06, "epoch": 0.4379107321720861, "percentage": 8.76, "elapsed_time": "0:22:11", "remaining_time": "3:51:12", "throughput": 9061.89, "total_tokens": 12066560} +{"current_steps": 17930, "total_steps": 204665, "loss": 0.1294, "lr": 1.7519910099184052e-06, "epoch": 0.4380328830039333, "percentage": 8.76, "elapsed_time": "0:22:11", "remaining_time": "3:51:11", "throughput": 9061.93, "total_tokens": 12069760} +{"current_steps": 17935, "total_steps": 204665, "loss": 0.1249, "lr": 1.7524796013094247e-06, "epoch": 0.4381550338357804, "percentage": 8.76, "elapsed_time": "0:22:12", "remaining_time": "3:51:10", "throughput": 9062.06, "total_tokens": 12073088} +{"current_steps": 17940, "total_steps": 204665, "loss": 0.1269, "lr": 1.7529681927004446e-06, "epoch": 0.4382771846676276, "percentage": 8.77, "elapsed_time": "0:22:12", "remaining_time": "3:51:10", "throughput": 9061.99, "total_tokens": 12076096} +{"current_steps": 17945, "total_steps": 204665, "loss": 0.0412, "lr": 1.7534567840914643e-06, "epoch": 0.43839933549947474, "percentage": 8.77, "elapsed_time": "0:22:12", "remaining_time": "3:51:09", "throughput": 9062.28, "total_tokens": 12079680} +{"current_steps": 17950, "total_steps": 204665, "loss": 0.0797, "lr": 1.753945375482484e-06, "epoch": 0.43852148633132193, "percentage": 8.77, "elapsed_time": "0:22:13", "remaining_time": "3:51:09", "throughput": 9062.33, "total_tokens": 12082880} +{"current_steps": 17955, "total_steps": 204665, "loss": 0.0568, "lr": 1.7544339668735036e-06, "epoch": 0.43864363716316906, "percentage": 8.77, "elapsed_time": "0:22:13", "remaining_time": "3:51:08", "throughput": 9062.27, "total_tokens": 12085888} +{"current_steps": 17960, "total_steps": 204665, "loss": 0.1215, "lr": 1.7549225582645233e-06, "epoch": 0.43876578799501625, "percentage": 8.78, "elapsed_time": "0:22:13", "remaining_time": "3:51:07", "throughput": 9062.54, "total_tokens": 12089408} +{"current_steps": 17965, "total_steps": 204665, "loss": 0.0904, "lr": 1.755411149655543e-06, "epoch": 0.4388879388268634, "percentage": 8.78, "elapsed_time": "0:22:14", "remaining_time": "3:51:06", "throughput": 9062.35, "total_tokens": 12092160} +{"current_steps": 17970, "total_steps": 204665, "loss": 0.1017, "lr": 1.7558997410465627e-06, "epoch": 0.4390100896587106, "percentage": 8.78, "elapsed_time": "0:22:14", "remaining_time": "3:51:06", "throughput": 9062.54, "total_tokens": 12095616} +{"current_steps": 17975, "total_steps": 204665, "loss": 0.1135, "lr": 1.7563883324375824e-06, "epoch": 0.43913224049055777, "percentage": 8.78, "elapsed_time": "0:22:15", "remaining_time": "3:51:05", "throughput": 9062.43, "total_tokens": 12098496} +{"current_steps": 17980, "total_steps": 204665, "loss": 0.1535, "lr": 1.756876923828602e-06, "epoch": 0.4392543913224049, "percentage": 8.79, "elapsed_time": "0:22:15", "remaining_time": "3:51:04", "throughput": 9062.57, "total_tokens": 12101824} +{"current_steps": 17985, "total_steps": 204665, "loss": 0.0734, "lr": 1.757365515219622e-06, "epoch": 0.4393765421542521, "percentage": 8.79, "elapsed_time": "0:22:15", "remaining_time": "3:51:04", "throughput": 9062.71, "total_tokens": 12105152} +{"current_steps": 17990, "total_steps": 204665, "loss": 0.1458, "lr": 1.7578541066106414e-06, "epoch": 0.4394986929860992, "percentage": 8.79, "elapsed_time": "0:22:16", "remaining_time": "3:51:03", "throughput": 9063.18, "total_tokens": 12108992} +{"current_steps": 17995, "total_steps": 204665, "loss": 0.1572, "lr": 1.758342698001661e-06, "epoch": 0.4396208438179464, "percentage": 8.79, "elapsed_time": "0:22:16", "remaining_time": "3:51:03", "throughput": 9063.25, "total_tokens": 12112192} +{"current_steps": 18000, "total_steps": 204665, "loss": 0.1239, "lr": 1.758831289392681e-06, "epoch": 0.43974299464979355, "percentage": 8.79, "elapsed_time": "0:22:16", "remaining_time": "3:51:02", "throughput": 9063.61, "total_tokens": 12115904} +{"current_steps": 18005, "total_steps": 204665, "loss": 0.1095, "lr": 1.7593198807837004e-06, "epoch": 0.43986514548164074, "percentage": 8.8, "elapsed_time": "0:22:17", "remaining_time": "3:51:02", "throughput": 9064.05, "total_tokens": 12119744} +{"current_steps": 18010, "total_steps": 204665, "loss": 0.0698, "lr": 1.7598084721747201e-06, "epoch": 0.4399872963134879, "percentage": 8.8, "elapsed_time": "0:22:17", "remaining_time": "3:51:01", "throughput": 9064.08, "total_tokens": 12122880} +{"current_steps": 18015, "total_steps": 204665, "loss": 0.1874, "lr": 1.76029706356574e-06, "epoch": 0.44010944714533506, "percentage": 8.8, "elapsed_time": "0:22:17", "remaining_time": "3:51:00", "throughput": 9064.13, "total_tokens": 12126080} +{"current_steps": 18020, "total_steps": 204665, "loss": 0.0898, "lr": 1.7607856549567597e-06, "epoch": 0.4402315979771822, "percentage": 8.8, "elapsed_time": "0:22:18", "remaining_time": "3:51:00", "throughput": 9064.36, "total_tokens": 12129536} +{"current_steps": 18025, "total_steps": 204665, "loss": 0.0822, "lr": 1.7612742463477792e-06, "epoch": 0.4403537488090294, "percentage": 8.81, "elapsed_time": "0:22:18", "remaining_time": "3:50:59", "throughput": 9064.56, "total_tokens": 12132992} +{"current_steps": 18030, "total_steps": 204665, "loss": 0.1154, "lr": 1.761762837738799e-06, "epoch": 0.4404758996408766, "percentage": 8.81, "elapsed_time": "0:22:18", "remaining_time": "3:50:58", "throughput": 9064.68, "total_tokens": 12136320} +{"current_steps": 18035, "total_steps": 204665, "loss": 0.1134, "lr": 1.7622514291298187e-06, "epoch": 0.4405980504727237, "percentage": 8.81, "elapsed_time": "0:22:19", "remaining_time": "3:50:58", "throughput": 9064.86, "total_tokens": 12139712} +{"current_steps": 18040, "total_steps": 204665, "loss": 0.1963, "lr": 1.7627400205208382e-06, "epoch": 0.4407202013045709, "percentage": 8.81, "elapsed_time": "0:22:19", "remaining_time": "3:50:57", "throughput": 9064.79, "total_tokens": 12142656} +{"current_steps": 18045, "total_steps": 204665, "loss": 0.2346, "lr": 1.7632286119118581e-06, "epoch": 0.44084235213641804, "percentage": 8.82, "elapsed_time": "0:22:19", "remaining_time": "3:50:56", "throughput": 9064.94, "total_tokens": 12145984} +{"current_steps": 18050, "total_steps": 204665, "loss": 0.128, "lr": 1.7637172033028778e-06, "epoch": 0.4409645029682652, "percentage": 8.82, "elapsed_time": "0:22:20", "remaining_time": "3:50:56", "throughput": 9065.22, "total_tokens": 12149568} +{"current_steps": 18055, "total_steps": 204665, "loss": 0.054, "lr": 1.7642057946938975e-06, "epoch": 0.44108665380011236, "percentage": 8.82, "elapsed_time": "0:22:20", "remaining_time": "3:50:55", "throughput": 9065.53, "total_tokens": 12153216} +{"current_steps": 18060, "total_steps": 204665, "loss": 0.1968, "lr": 1.7646943860849172e-06, "epoch": 0.44120880463195955, "percentage": 8.82, "elapsed_time": "0:22:20", "remaining_time": "3:50:55", "throughput": 9065.94, "total_tokens": 12156992} +{"current_steps": 18065, "total_steps": 204665, "loss": 0.0838, "lr": 1.7651829774759368e-06, "epoch": 0.4413309554638067, "percentage": 8.83, "elapsed_time": "0:22:21", "remaining_time": "3:50:54", "throughput": 9066.15, "total_tokens": 12160448} +{"current_steps": 18070, "total_steps": 204665, "loss": 0.1253, "lr": 1.7656715688669565e-06, "epoch": 0.4414531062956539, "percentage": 8.83, "elapsed_time": "0:22:21", "remaining_time": "3:50:54", "throughput": 9066.14, "total_tokens": 12163520} +{"current_steps": 18075, "total_steps": 204665, "loss": 0.0315, "lr": 1.7661601602579762e-06, "epoch": 0.44157525712750106, "percentage": 8.83, "elapsed_time": "0:22:22", "remaining_time": "3:50:53", "throughput": 9066.65, "total_tokens": 12167488} +{"current_steps": 18080, "total_steps": 204665, "loss": 0.0407, "lr": 1.7666487516489959e-06, "epoch": 0.4416974079593482, "percentage": 8.83, "elapsed_time": "0:22:22", "remaining_time": "3:50:53", "throughput": 9066.86, "total_tokens": 12170944} +{"current_steps": 18085, "total_steps": 204665, "loss": 0.1787, "lr": 1.7671373430400156e-06, "epoch": 0.4418195587911954, "percentage": 8.84, "elapsed_time": "0:22:22", "remaining_time": "3:50:52", "throughput": 9067.03, "total_tokens": 12174336} +{"current_steps": 18090, "total_steps": 204665, "loss": 0.0386, "lr": 1.7676259344310352e-06, "epoch": 0.4419417096230425, "percentage": 8.84, "elapsed_time": "0:22:23", "remaining_time": "3:50:51", "throughput": 9067.05, "total_tokens": 12177472} +{"current_steps": 18095, "total_steps": 204665, "loss": 0.1516, "lr": 1.768114525822055e-06, "epoch": 0.4420638604548897, "percentage": 8.84, "elapsed_time": "0:22:23", "remaining_time": "3:50:51", "throughput": 9067.01, "total_tokens": 12180480} +{"current_steps": 18100, "total_steps": 204665, "loss": 0.1046, "lr": 1.7686031172130746e-06, "epoch": 0.44218601128673685, "percentage": 8.84, "elapsed_time": "0:22:23", "remaining_time": "3:50:50", "throughput": 9067.14, "total_tokens": 12183808} +{"current_steps": 18105, "total_steps": 204665, "loss": 0.2344, "lr": 1.7690917086040945e-06, "epoch": 0.44230816211858404, "percentage": 8.85, "elapsed_time": "0:22:24", "remaining_time": "3:50:49", "throughput": 9067.19, "total_tokens": 12187008} +{"current_steps": 18110, "total_steps": 204665, "loss": 0.0702, "lr": 1.769580299995114e-06, "epoch": 0.44243031295043117, "percentage": 8.85, "elapsed_time": "0:22:24", "remaining_time": "3:50:49", "throughput": 9067.42, "total_tokens": 12190464} +{"current_steps": 18115, "total_steps": 204665, "loss": 0.2018, "lr": 1.7700688913861336e-06, "epoch": 0.44255246378227836, "percentage": 8.85, "elapsed_time": "0:22:24", "remaining_time": "3:50:48", "throughput": 9068.04, "total_tokens": 12194624} +{"current_steps": 18120, "total_steps": 204665, "loss": 0.2166, "lr": 1.7705574827771535e-06, "epoch": 0.4426746146141255, "percentage": 8.85, "elapsed_time": "0:22:25", "remaining_time": "3:50:48", "throughput": 9068.21, "total_tokens": 12198016} +{"current_steps": 18125, "total_steps": 204665, "loss": 0.0513, "lr": 1.771046074168173e-06, "epoch": 0.4427967654459727, "percentage": 8.86, "elapsed_time": "0:22:25", "remaining_time": "3:50:47", "throughput": 9068.18, "total_tokens": 12201088} +{"current_steps": 18130, "total_steps": 204665, "loss": 0.1119, "lr": 1.7715346655591927e-06, "epoch": 0.4429189162778199, "percentage": 8.86, "elapsed_time": "0:22:25", "remaining_time": "3:50:46", "throughput": 9068.25, "total_tokens": 12204288} +{"current_steps": 18135, "total_steps": 204665, "loss": 0.2426, "lr": 1.7720232569502126e-06, "epoch": 0.443041067109667, "percentage": 8.86, "elapsed_time": "0:22:26", "remaining_time": "3:50:46", "throughput": 9068.32, "total_tokens": 12207488} +{"current_steps": 18140, "total_steps": 204665, "loss": 0.2005, "lr": 1.7725118483412323e-06, "epoch": 0.4431632179415142, "percentage": 8.86, "elapsed_time": "0:22:26", "remaining_time": "3:50:45", "throughput": 9068.31, "total_tokens": 12210560} +{"current_steps": 18145, "total_steps": 204665, "loss": 0.0823, "lr": 1.7730004397322517e-06, "epoch": 0.44328536877336133, "percentage": 8.87, "elapsed_time": "0:22:26", "remaining_time": "3:50:44", "throughput": 9068.41, "total_tokens": 12213824} +{"current_steps": 18150, "total_steps": 204665, "loss": 0.0994, "lr": 1.7734890311232716e-06, "epoch": 0.4434075196052085, "percentage": 8.87, "elapsed_time": "0:22:27", "remaining_time": "3:50:44", "throughput": 9068.62, "total_tokens": 12217280} +{"current_steps": 18155, "total_steps": 204665, "loss": 0.1956, "lr": 1.7739776225142913e-06, "epoch": 0.44352967043705566, "percentage": 8.87, "elapsed_time": "0:22:27", "remaining_time": "3:50:43", "throughput": 9068.72, "total_tokens": 12220544} +{"current_steps": 18160, "total_steps": 204665, "loss": 0.109, "lr": 1.7744662139053108e-06, "epoch": 0.44365182126890285, "percentage": 8.87, "elapsed_time": "0:22:27", "remaining_time": "3:50:42", "throughput": 9068.71, "total_tokens": 12223616} +{"current_steps": 18165, "total_steps": 204665, "loss": 0.168, "lr": 1.7749548052963307e-06, "epoch": 0.44377397210075, "percentage": 8.88, "elapsed_time": "0:22:28", "remaining_time": "3:50:42", "throughput": 9068.97, "total_tokens": 12227136} +{"current_steps": 18170, "total_steps": 204665, "loss": 0.1427, "lr": 1.7754433966873504e-06, "epoch": 0.44389612293259717, "percentage": 8.88, "elapsed_time": "0:22:28", "remaining_time": "3:50:41", "throughput": 9068.82, "total_tokens": 12229952} +{"current_steps": 18175, "total_steps": 204665, "loss": 0.0636, "lr": 1.77593198807837e-06, "epoch": 0.44401827376444436, "percentage": 8.88, "elapsed_time": "0:22:28", "remaining_time": "3:50:41", "throughput": 9069.06, "total_tokens": 12233472} +{"current_steps": 18180, "total_steps": 204665, "loss": 0.1436, "lr": 1.7764205794693897e-06, "epoch": 0.4441404245962915, "percentage": 8.88, "elapsed_time": "0:22:29", "remaining_time": "3:50:40", "throughput": 9069.23, "total_tokens": 12236864} +{"current_steps": 18185, "total_steps": 204665, "loss": 0.1426, "lr": 1.7769091708604094e-06, "epoch": 0.4442625754281387, "percentage": 8.89, "elapsed_time": "0:22:29", "remaining_time": "3:50:39", "throughput": 9069.34, "total_tokens": 12240192} +{"current_steps": 18190, "total_steps": 204665, "loss": 0.1146, "lr": 1.777397762251429e-06, "epoch": 0.4443847262599858, "percentage": 8.89, "elapsed_time": "0:22:29", "remaining_time": "3:50:39", "throughput": 9069.36, "total_tokens": 12243328} +{"current_steps": 18195, "total_steps": 204665, "loss": 0.0868, "lr": 1.7778863536424485e-06, "epoch": 0.444506877091833, "percentage": 8.89, "elapsed_time": "0:22:30", "remaining_time": "3:50:38", "throughput": 9069.5, "total_tokens": 12246656} +{"current_steps": 18200, "total_steps": 204665, "loss": 0.089, "lr": 1.7783749450334684e-06, "epoch": 0.44462902792368014, "percentage": 8.89, "elapsed_time": "0:22:30", "remaining_time": "3:50:38", "throughput": 9069.74, "total_tokens": 12250176} +{"current_steps": 18205, "total_steps": 204665, "loss": 0.2103, "lr": 1.7788635364244881e-06, "epoch": 0.44475117875552733, "percentage": 8.9, "elapsed_time": "0:22:31", "remaining_time": "3:50:37", "throughput": 9069.8, "total_tokens": 12253376} +{"current_steps": 18210, "total_steps": 204665, "loss": 0.1752, "lr": 1.779352127815508e-06, "epoch": 0.44487332958737447, "percentage": 8.9, "elapsed_time": "0:22:31", "remaining_time": "3:50:36", "throughput": 9069.84, "total_tokens": 12256512} +{"current_steps": 18215, "total_steps": 204665, "loss": 0.0764, "lr": 1.7798407192065275e-06, "epoch": 0.44499548041922166, "percentage": 8.9, "elapsed_time": "0:22:31", "remaining_time": "3:50:36", "throughput": 9070.08, "total_tokens": 12260032} +{"current_steps": 18220, "total_steps": 204665, "loss": 0.0446, "lr": 1.7803293105975472e-06, "epoch": 0.44511763125106885, "percentage": 8.9, "elapsed_time": "0:22:32", "remaining_time": "3:50:35", "throughput": 9070.23, "total_tokens": 12263360} +{"current_steps": 18225, "total_steps": 204665, "loss": 0.238, "lr": 1.780817901988567e-06, "epoch": 0.445239782082916, "percentage": 8.9, "elapsed_time": "0:22:32", "remaining_time": "3:50:34", "throughput": 9070.55, "total_tokens": 12267008} +{"current_steps": 18230, "total_steps": 204665, "loss": 0.1511, "lr": 1.7813064933795865e-06, "epoch": 0.44536193291476317, "percentage": 8.91, "elapsed_time": "0:22:32", "remaining_time": "3:50:34", "throughput": 9070.71, "total_tokens": 12270400} +{"current_steps": 18235, "total_steps": 204665, "loss": 0.0897, "lr": 1.7817950847706062e-06, "epoch": 0.4454840837466103, "percentage": 8.91, "elapsed_time": "0:22:33", "remaining_time": "3:50:33", "throughput": 9070.87, "total_tokens": 12273792} +{"current_steps": 18240, "total_steps": 204665, "loss": 0.1574, "lr": 1.782283676161626e-06, "epoch": 0.4456062345784575, "percentage": 8.91, "elapsed_time": "0:22:33", "remaining_time": "3:50:33", "throughput": 9070.94, "total_tokens": 12276992} +{"current_steps": 18245, "total_steps": 204665, "loss": 0.1676, "lr": 1.7827722675526456e-06, "epoch": 0.44572838541030463, "percentage": 8.91, "elapsed_time": "0:22:33", "remaining_time": "3:50:32", "throughput": 9071.0, "total_tokens": 12280192} +{"current_steps": 18250, "total_steps": 204665, "loss": 0.1209, "lr": 1.7832608589436653e-06, "epoch": 0.4458505362421518, "percentage": 8.92, "elapsed_time": "0:22:34", "remaining_time": "3:50:31", "throughput": 9070.99, "total_tokens": 12283264} +{"current_steps": 18255, "total_steps": 204665, "loss": 0.0612, "lr": 1.7837494503346851e-06, "epoch": 0.44597268707399895, "percentage": 8.92, "elapsed_time": "0:22:34", "remaining_time": "3:50:31", "throughput": 9071.08, "total_tokens": 12286528} +{"current_steps": 18260, "total_steps": 204665, "loss": 0.106, "lr": 1.7842380417257048e-06, "epoch": 0.44609483790584614, "percentage": 8.92, "elapsed_time": "0:22:34", "remaining_time": "3:50:30", "throughput": 9071.22, "total_tokens": 12289856} +{"current_steps": 18265, "total_steps": 204665, "loss": 0.08, "lr": 1.7847266331167243e-06, "epoch": 0.4462169887376933, "percentage": 8.92, "elapsed_time": "0:22:35", "remaining_time": "3:50:29", "throughput": 9071.19, "total_tokens": 12292928} +{"current_steps": 18270, "total_steps": 204665, "loss": 0.1225, "lr": 1.7852152245077442e-06, "epoch": 0.44633913956954047, "percentage": 8.93, "elapsed_time": "0:22:35", "remaining_time": "3:50:29", "throughput": 9071.18, "total_tokens": 12296000} +{"current_steps": 18275, "total_steps": 204665, "loss": 0.0658, "lr": 1.7857038158987639e-06, "epoch": 0.44646129040138766, "percentage": 8.93, "elapsed_time": "0:22:35", "remaining_time": "3:50:28", "throughput": 9071.24, "total_tokens": 12299200} +{"current_steps": 18280, "total_steps": 204665, "loss": 0.1892, "lr": 1.7861924072897833e-06, "epoch": 0.4465834412332348, "percentage": 8.93, "elapsed_time": "0:22:36", "remaining_time": "3:50:27", "throughput": 9071.38, "total_tokens": 12302528} +{"current_steps": 18285, "total_steps": 204665, "loss": 0.1002, "lr": 1.7866809986808032e-06, "epoch": 0.446705592065082, "percentage": 8.93, "elapsed_time": "0:22:36", "remaining_time": "3:50:27", "throughput": 9071.25, "total_tokens": 12305408} +{"current_steps": 18290, "total_steps": 204665, "loss": 0.0617, "lr": 1.787169590071823e-06, "epoch": 0.4468277428969291, "percentage": 8.94, "elapsed_time": "0:22:36", "remaining_time": "3:50:26", "throughput": 9071.32, "total_tokens": 12308608} +{"current_steps": 18295, "total_steps": 204665, "loss": 0.071, "lr": 1.7876581814628426e-06, "epoch": 0.4469498937287763, "percentage": 8.94, "elapsed_time": "0:22:37", "remaining_time": "3:50:25", "throughput": 9071.49, "total_tokens": 12312000} +{"current_steps": 18300, "total_steps": 204665, "loss": 0.1079, "lr": 1.788146772853862e-06, "epoch": 0.44707204456062344, "percentage": 8.94, "elapsed_time": "0:22:37", "remaining_time": "3:50:25", "throughput": 9071.41, "total_tokens": 12314944} +{"current_steps": 18305, "total_steps": 204665, "loss": 0.095, "lr": 1.788635364244882e-06, "epoch": 0.44719419539247063, "percentage": 8.94, "elapsed_time": "0:22:37", "remaining_time": "3:50:24", "throughput": 9071.46, "total_tokens": 12318144} +{"current_steps": 18310, "total_steps": 204665, "loss": 0.095, "lr": 1.7891239556359016e-06, "epoch": 0.44731634622431776, "percentage": 8.95, "elapsed_time": "0:22:38", "remaining_time": "3:50:23", "throughput": 9071.51, "total_tokens": 12321344} +{"current_steps": 18315, "total_steps": 204665, "loss": 0.1015, "lr": 1.7896125470269211e-06, "epoch": 0.44743849705616495, "percentage": 8.95, "elapsed_time": "0:22:38", "remaining_time": "3:50:23", "throughput": 9071.65, "total_tokens": 12324672} +{"current_steps": 18320, "total_steps": 204665, "loss": 0.2122, "lr": 1.790101138417941e-06, "epoch": 0.44756064788801214, "percentage": 8.95, "elapsed_time": "0:22:38", "remaining_time": "3:50:22", "throughput": 9071.63, "total_tokens": 12327744} +{"current_steps": 18325, "total_steps": 204665, "loss": 0.1254, "lr": 1.7905897298089607e-06, "epoch": 0.4476827987198593, "percentage": 8.95, "elapsed_time": "0:22:39", "remaining_time": "3:50:22", "throughput": 9071.81, "total_tokens": 12331136} +{"current_steps": 18330, "total_steps": 204665, "loss": 0.1622, "lr": 1.7910783211999806e-06, "epoch": 0.44780494955170647, "percentage": 8.96, "elapsed_time": "0:22:39", "remaining_time": "3:50:21", "throughput": 9071.99, "total_tokens": 12334528} +{"current_steps": 18335, "total_steps": 204665, "loss": 0.2092, "lr": 1.791566912591e-06, "epoch": 0.4479271003835536, "percentage": 8.96, "elapsed_time": "0:22:39", "remaining_time": "3:50:20", "throughput": 9072.27, "total_tokens": 12338112} +{"current_steps": 18340, "total_steps": 204665, "loss": 0.0867, "lr": 1.7920555039820197e-06, "epoch": 0.4480492512154008, "percentage": 8.96, "elapsed_time": "0:22:40", "remaining_time": "3:50:20", "throughput": 9072.51, "total_tokens": 12341632} +{"current_steps": 18345, "total_steps": 204665, "loss": 0.0842, "lr": 1.7925440953730396e-06, "epoch": 0.4481714020472479, "percentage": 8.96, "elapsed_time": "0:22:40", "remaining_time": "3:50:19", "throughput": 9072.43, "total_tokens": 12344576} +{"current_steps": 18350, "total_steps": 204665, "loss": 0.0792, "lr": 1.793032686764059e-06, "epoch": 0.4482935528790951, "percentage": 8.97, "elapsed_time": "0:22:41", "remaining_time": "3:50:18", "throughput": 9072.48, "total_tokens": 12347776} +{"current_steps": 18355, "total_steps": 204665, "loss": 0.1075, "lr": 1.7935212781550788e-06, "epoch": 0.44841570371094225, "percentage": 8.97, "elapsed_time": "0:22:41", "remaining_time": "3:50:18", "throughput": 9072.73, "total_tokens": 12351296} +{"current_steps": 18360, "total_steps": 204665, "loss": 0.1293, "lr": 1.7940098695460987e-06, "epoch": 0.44853785454278944, "percentage": 8.97, "elapsed_time": "0:22:41", "remaining_time": "3:50:17", "throughput": 9073.09, "total_tokens": 12355008} +{"current_steps": 18365, "total_steps": 204665, "loss": 0.0894, "lr": 1.7944984609371181e-06, "epoch": 0.44866000537463663, "percentage": 8.97, "elapsed_time": "0:22:42", "remaining_time": "3:50:17", "throughput": 9073.18, "total_tokens": 12358272} +{"current_steps": 18370, "total_steps": 204665, "loss": 0.1345, "lr": 1.7949870523281378e-06, "epoch": 0.44878215620648376, "percentage": 8.98, "elapsed_time": "0:22:42", "remaining_time": "3:50:16", "throughput": 9073.22, "total_tokens": 12361408} +{"current_steps": 18375, "total_steps": 204665, "loss": 0.1673, "lr": 1.7954756437191577e-06, "epoch": 0.44890430703833095, "percentage": 8.98, "elapsed_time": "0:22:42", "remaining_time": "3:50:15", "throughput": 9073.39, "total_tokens": 12364800} +{"current_steps": 18380, "total_steps": 204665, "loss": 0.0761, "lr": 1.7959642351101774e-06, "epoch": 0.4490264578701781, "percentage": 8.98, "elapsed_time": "0:22:43", "remaining_time": "3:50:15", "throughput": 9073.48, "total_tokens": 12368064} +{"current_steps": 18385, "total_steps": 204665, "loss": 0.2545, "lr": 1.7964528265011969e-06, "epoch": 0.4491486087020253, "percentage": 8.98, "elapsed_time": "0:22:43", "remaining_time": "3:50:14", "throughput": 9073.72, "total_tokens": 12371584} +{"current_steps": 18390, "total_steps": 204665, "loss": 0.0337, "lr": 1.7969414178922165e-06, "epoch": 0.4492707595338724, "percentage": 8.99, "elapsed_time": "0:22:43", "remaining_time": "3:50:14", "throughput": 9073.67, "total_tokens": 12374592} +{"current_steps": 18395, "total_steps": 204665, "loss": 0.1143, "lr": 1.7974300092832364e-06, "epoch": 0.4493929103657196, "percentage": 8.99, "elapsed_time": "0:22:44", "remaining_time": "3:50:13", "throughput": 9073.8, "total_tokens": 12377920} +{"current_steps": 18400, "total_steps": 204665, "loss": 0.1046, "lr": 1.797918600674256e-06, "epoch": 0.44951506119756673, "percentage": 8.99, "elapsed_time": "0:22:44", "remaining_time": "3:50:12", "throughput": 9073.77, "total_tokens": 12380992} +{"current_steps": 18405, "total_steps": 204665, "loss": 0.2225, "lr": 1.7984071920652756e-06, "epoch": 0.4496372120294139, "percentage": 8.99, "elapsed_time": "0:22:44", "remaining_time": "3:50:12", "throughput": 9073.98, "total_tokens": 12384448} +{"current_steps": 18410, "total_steps": 204665, "loss": 0.1052, "lr": 1.7988957834562955e-06, "epoch": 0.44975936286126106, "percentage": 9.0, "elapsed_time": "0:22:45", "remaining_time": "3:50:11", "throughput": 9073.99, "total_tokens": 12387584} +{"current_steps": 18415, "total_steps": 204665, "loss": 0.1601, "lr": 1.7993843748473152e-06, "epoch": 0.44988151369310825, "percentage": 9.0, "elapsed_time": "0:22:45", "remaining_time": "3:50:10", "throughput": 9074.21, "total_tokens": 12391040} +{"current_steps": 18420, "total_steps": 204665, "loss": 0.0576, "lr": 1.7998729662383346e-06, "epoch": 0.45000366452495544, "percentage": 9.0, "elapsed_time": "0:22:45", "remaining_time": "3:50:10", "throughput": 9074.17, "total_tokens": 12394048} +{"current_steps": 18425, "total_steps": 204665, "loss": 0.1271, "lr": 1.8003615576293545e-06, "epoch": 0.45012581535680257, "percentage": 9.0, "elapsed_time": "0:22:46", "remaining_time": "3:50:09", "throughput": 9074.05, "total_tokens": 12396928} +{"current_steps": 18430, "total_steps": 204665, "loss": 0.042, "lr": 1.8008501490203742e-06, "epoch": 0.45024796618864976, "percentage": 9.0, "elapsed_time": "0:22:46", "remaining_time": "3:50:08", "throughput": 9074.12, "total_tokens": 12400128} +{"current_steps": 18435, "total_steps": 204665, "loss": 0.1996, "lr": 1.8013387404113937e-06, "epoch": 0.4503701170204969, "percentage": 9.01, "elapsed_time": "0:22:46", "remaining_time": "3:50:08", "throughput": 9074.28, "total_tokens": 12403520} +{"current_steps": 18440, "total_steps": 204665, "loss": 0.1522, "lr": 1.8018273318024136e-06, "epoch": 0.4504922678523441, "percentage": 9.01, "elapsed_time": "0:22:47", "remaining_time": "3:50:07", "throughput": 9074.44, "total_tokens": 12406912} +{"current_steps": 18445, "total_steps": 204665, "loss": 0.1372, "lr": 1.8023159231934333e-06, "epoch": 0.4506144186841912, "percentage": 9.01, "elapsed_time": "0:22:47", "remaining_time": "3:50:07", "throughput": 9074.57, "total_tokens": 12410240} +{"current_steps": 18450, "total_steps": 204665, "loss": 0.023, "lr": 1.8028045145844531e-06, "epoch": 0.4507365695160384, "percentage": 9.01, "elapsed_time": "0:22:47", "remaining_time": "3:50:06", "throughput": 9074.46, "total_tokens": 12413120} +{"current_steps": 18455, "total_steps": 204665, "loss": 0.1611, "lr": 1.8032931059754726e-06, "epoch": 0.45085872034788554, "percentage": 9.02, "elapsed_time": "0:22:48", "remaining_time": "3:50:05", "throughput": 9074.48, "total_tokens": 12416256} +{"current_steps": 18460, "total_steps": 204665, "loss": 0.0895, "lr": 1.8037816973664923e-06, "epoch": 0.45098087117973273, "percentage": 9.02, "elapsed_time": "0:22:48", "remaining_time": "3:50:05", "throughput": 9074.61, "total_tokens": 12419584} +{"current_steps": 18465, "total_steps": 204665, "loss": 0.0713, "lr": 1.8042702887575122e-06, "epoch": 0.4511030220115799, "percentage": 9.02, "elapsed_time": "0:22:48", "remaining_time": "3:50:04", "throughput": 9074.77, "total_tokens": 12422976} +{"current_steps": 18470, "total_steps": 204665, "loss": 0.1842, "lr": 1.8047588801485317e-06, "epoch": 0.45122517284342706, "percentage": 9.02, "elapsed_time": "0:22:49", "remaining_time": "3:50:03", "throughput": 9074.96, "total_tokens": 12426432} +{"current_steps": 18475, "total_steps": 204665, "loss": 0.122, "lr": 1.8052474715395513e-06, "epoch": 0.45134732367527425, "percentage": 9.03, "elapsed_time": "0:22:49", "remaining_time": "3:50:03", "throughput": 9074.95, "total_tokens": 12429504} +{"current_steps": 18480, "total_steps": 204665, "loss": 0.0757, "lr": 1.805736062930571e-06, "epoch": 0.4514694745071214, "percentage": 9.03, "elapsed_time": "0:22:49", "remaining_time": "3:50:02", "throughput": 9074.93, "total_tokens": 12432576} +{"current_steps": 18485, "total_steps": 204665, "loss": 0.0932, "lr": 1.806224654321591e-06, "epoch": 0.45159162533896857, "percentage": 9.03, "elapsed_time": "0:22:50", "remaining_time": "3:50:02", "throughput": 9075.04, "total_tokens": 12435904} +{"current_steps": 18490, "total_steps": 204665, "loss": 0.0791, "lr": 1.8067132457126104e-06, "epoch": 0.4517137761708157, "percentage": 9.03, "elapsed_time": "0:22:50", "remaining_time": "3:50:01", "throughput": 9075.56, "total_tokens": 12439872} +{"current_steps": 18495, "total_steps": 204665, "loss": 0.2214, "lr": 1.80720183710363e-06, "epoch": 0.4518359270026629, "percentage": 9.04, "elapsed_time": "0:22:51", "remaining_time": "3:50:00", "throughput": 9075.78, "total_tokens": 12443328} +{"current_steps": 18500, "total_steps": 204665, "loss": 0.1355, "lr": 1.80769042849465e-06, "epoch": 0.45195807783451003, "percentage": 9.04, "elapsed_time": "0:22:51", "remaining_time": "3:50:00", "throughput": 9075.92, "total_tokens": 12446656} +{"current_steps": 18505, "total_steps": 204665, "loss": 0.1599, "lr": 1.8081790198856694e-06, "epoch": 0.4520802286663572, "percentage": 9.04, "elapsed_time": "0:22:51", "remaining_time": "3:49:59", "throughput": 9076.13, "total_tokens": 12450112} +{"current_steps": 18510, "total_steps": 204665, "loss": 0.1091, "lr": 1.8086676112766891e-06, "epoch": 0.4522023794982044, "percentage": 9.04, "elapsed_time": "0:22:52", "remaining_time": "3:49:59", "throughput": 9076.34, "total_tokens": 12453568} +{"current_steps": 18515, "total_steps": 204665, "loss": 0.1155, "lr": 1.809156202667709e-06, "epoch": 0.45232453033005154, "percentage": 9.05, "elapsed_time": "0:22:52", "remaining_time": "3:49:58", "throughput": 9076.57, "total_tokens": 12457088} +{"current_steps": 18520, "total_steps": 204665, "loss": 0.1302, "lr": 1.8096447940587285e-06, "epoch": 0.45244668116189873, "percentage": 9.05, "elapsed_time": "0:22:52", "remaining_time": "3:49:57", "throughput": 9076.67, "total_tokens": 12460352} +{"current_steps": 18525, "total_steps": 204665, "loss": 0.152, "lr": 1.8101333854497482e-06, "epoch": 0.45256883199374587, "percentage": 9.05, "elapsed_time": "0:22:53", "remaining_time": "3:49:57", "throughput": 9077.25, "total_tokens": 12464512} +{"current_steps": 18530, "total_steps": 204665, "loss": 0.0808, "lr": 1.810621976840768e-06, "epoch": 0.45269098282559306, "percentage": 9.05, "elapsed_time": "0:22:53", "remaining_time": "3:49:57", "throughput": 9077.57, "total_tokens": 12468160} +{"current_steps": 18535, "total_steps": 204665, "loss": 0.1997, "lr": 1.8111105682317877e-06, "epoch": 0.4528131336574402, "percentage": 9.06, "elapsed_time": "0:22:53", "remaining_time": "3:49:56", "throughput": 9077.85, "total_tokens": 12471744} +{"current_steps": 18540, "total_steps": 204665, "loss": 0.0638, "lr": 1.8115991596228072e-06, "epoch": 0.4529352844892874, "percentage": 9.06, "elapsed_time": "0:22:54", "remaining_time": "3:49:55", "throughput": 9078.28, "total_tokens": 12475584} +{"current_steps": 18545, "total_steps": 204665, "loss": 0.1659, "lr": 1.812087751013827e-06, "epoch": 0.4530574353211345, "percentage": 9.06, "elapsed_time": "0:22:54", "remaining_time": "3:49:55", "throughput": 9078.74, "total_tokens": 12479488} +{"current_steps": 18550, "total_steps": 204665, "loss": 0.1573, "lr": 1.8125763424048468e-06, "epoch": 0.4531795861529817, "percentage": 9.06, "elapsed_time": "0:22:54", "remaining_time": "3:49:54", "throughput": 9078.68, "total_tokens": 12482496} +{"current_steps": 18555, "total_steps": 204665, "loss": 0.0731, "lr": 1.8130649337958662e-06, "epoch": 0.45330173698482884, "percentage": 9.07, "elapsed_time": "0:22:55", "remaining_time": "3:49:54", "throughput": 9079.05, "total_tokens": 12486272} +{"current_steps": 18560, "total_steps": 204665, "loss": 0.2805, "lr": 1.8135535251868861e-06, "epoch": 0.45342388781667603, "percentage": 9.07, "elapsed_time": "0:22:55", "remaining_time": "3:49:53", "throughput": 9079.12, "total_tokens": 12489472} +{"current_steps": 18565, "total_steps": 204665, "loss": 0.1171, "lr": 1.8140421165779058e-06, "epoch": 0.4535460386485232, "percentage": 9.07, "elapsed_time": "0:22:55", "remaining_time": "3:49:53", "throughput": 9079.13, "total_tokens": 12492608} +{"current_steps": 18570, "total_steps": 204665, "loss": 0.1096, "lr": 1.8145307079689255e-06, "epoch": 0.45366818948037035, "percentage": 9.07, "elapsed_time": "0:22:56", "remaining_time": "3:49:52", "throughput": 9079.44, "total_tokens": 12496256} +{"current_steps": 18575, "total_steps": 204665, "loss": 0.1323, "lr": 1.8150192993599452e-06, "epoch": 0.45379034031221754, "percentage": 9.08, "elapsed_time": "0:22:56", "remaining_time": "3:49:51", "throughput": 9079.62, "total_tokens": 12499712} +{"current_steps": 18580, "total_steps": 204665, "loss": 0.1229, "lr": 1.8155078907509649e-06, "epoch": 0.4539124911440647, "percentage": 9.08, "elapsed_time": "0:22:57", "remaining_time": "3:49:51", "throughput": 9079.89, "total_tokens": 12503296} +{"current_steps": 18585, "total_steps": 204665, "loss": 0.1623, "lr": 1.8159964821419845e-06, "epoch": 0.45403464197591187, "percentage": 9.08, "elapsed_time": "0:22:57", "remaining_time": "3:49:50", "throughput": 9080.38, "total_tokens": 12507264} +{"current_steps": 18590, "total_steps": 204665, "loss": 0.0724, "lr": 1.8164850735330042e-06, "epoch": 0.454156792807759, "percentage": 9.08, "elapsed_time": "0:22:57", "remaining_time": "3:49:50", "throughput": 9080.48, "total_tokens": 12510528} +{"current_steps": 18595, "total_steps": 204665, "loss": 0.094, "lr": 1.816973664924024e-06, "epoch": 0.4542789436396062, "percentage": 9.09, "elapsed_time": "0:22:58", "remaining_time": "3:49:49", "throughput": 9080.89, "total_tokens": 12514304} +{"current_steps": 18600, "total_steps": 204665, "loss": 0.1304, "lr": 1.8174622563150436e-06, "epoch": 0.4544010944714533, "percentage": 9.09, "elapsed_time": "0:22:58", "remaining_time": "3:49:49", "throughput": 9081.54, "total_tokens": 12518528} +{"current_steps": 18605, "total_steps": 204665, "loss": 0.1311, "lr": 1.8179508477060635e-06, "epoch": 0.4545232453033005, "percentage": 9.09, "elapsed_time": "0:22:58", "remaining_time": "3:49:48", "throughput": 9081.87, "total_tokens": 12522240} +{"current_steps": 18610, "total_steps": 204665, "loss": 0.1302, "lr": 1.818439439097083e-06, "epoch": 0.4546453961351477, "percentage": 9.09, "elapsed_time": "0:22:59", "remaining_time": "3:49:48", "throughput": 9081.98, "total_tokens": 12525568} +{"current_steps": 18615, "total_steps": 204665, "loss": 0.0112, "lr": 1.8189280304881026e-06, "epoch": 0.45476754696699484, "percentage": 9.1, "elapsed_time": "0:22:59", "remaining_time": "3:49:47", "throughput": 9082.04, "total_tokens": 12528768} +{"current_steps": 18620, "total_steps": 204665, "loss": 0.0795, "lr": 1.8194166218791225e-06, "epoch": 0.45488969779884203, "percentage": 9.1, "elapsed_time": "0:22:59", "remaining_time": "3:49:47", "throughput": 9082.22, "total_tokens": 12532224} +{"current_steps": 18625, "total_steps": 204665, "loss": 0.1319, "lr": 1.819905213270142e-06, "epoch": 0.45501184863068916, "percentage": 9.1, "elapsed_time": "0:23:00", "remaining_time": "3:49:46", "throughput": 9082.25, "total_tokens": 12535360} +{"current_steps": 18630, "total_steps": 204665, "loss": 0.1793, "lr": 1.8203938046611617e-06, "epoch": 0.45513399946253635, "percentage": 9.1, "elapsed_time": "0:23:00", "remaining_time": "3:49:45", "throughput": 9082.33, "total_tokens": 12538624} +{"current_steps": 18635, "total_steps": 204665, "loss": 0.1235, "lr": 1.8208823960521816e-06, "epoch": 0.4552561502943835, "percentage": 9.11, "elapsed_time": "0:23:00", "remaining_time": "3:49:45", "throughput": 9082.51, "total_tokens": 12542080} +{"current_steps": 18640, "total_steps": 204665, "loss": 0.1727, "lr": 1.8213709874432012e-06, "epoch": 0.4553783011262307, "percentage": 9.11, "elapsed_time": "0:23:01", "remaining_time": "3:49:44", "throughput": 9083.0, "total_tokens": 12546048} +{"current_steps": 18645, "total_steps": 204665, "loss": 0.2543, "lr": 1.8218595788342207e-06, "epoch": 0.4555004519580778, "percentage": 9.11, "elapsed_time": "0:23:01", "remaining_time": "3:49:44", "throughput": 9083.05, "total_tokens": 12549248} +{"current_steps": 18650, "total_steps": 204665, "loss": 0.2284, "lr": 1.8223481702252406e-06, "epoch": 0.455622602789925, "percentage": 9.11, "elapsed_time": "0:23:01", "remaining_time": "3:49:43", "throughput": 9083.2, "total_tokens": 12552640} +{"current_steps": 18655, "total_steps": 204665, "loss": 0.0855, "lr": 1.8228367616162603e-06, "epoch": 0.4557447536217722, "percentage": 9.11, "elapsed_time": "0:23:02", "remaining_time": "3:49:43", "throughput": 9083.32, "total_tokens": 12555968} +{"current_steps": 18660, "total_steps": 204665, "loss": 0.0782, "lr": 1.8233253530072798e-06, "epoch": 0.4558669044536193, "percentage": 9.12, "elapsed_time": "0:23:02", "remaining_time": "3:49:42", "throughput": 9083.42, "total_tokens": 12559296} +{"current_steps": 18665, "total_steps": 204665, "loss": 0.1598, "lr": 1.8238139443982997e-06, "epoch": 0.4559890552854665, "percentage": 9.12, "elapsed_time": "0:23:03", "remaining_time": "3:49:41", "throughput": 9083.41, "total_tokens": 12562368} +{"current_steps": 18670, "total_steps": 204665, "loss": 0.0274, "lr": 1.8243025357893193e-06, "epoch": 0.45611120611731365, "percentage": 9.12, "elapsed_time": "0:23:03", "remaining_time": "3:49:41", "throughput": 9083.51, "total_tokens": 12565696} +{"current_steps": 18675, "total_steps": 204665, "loss": 0.0718, "lr": 1.8247911271803388e-06, "epoch": 0.45623335694916084, "percentage": 9.12, "elapsed_time": "0:23:03", "remaining_time": "3:49:40", "throughput": 9083.86, "total_tokens": 12569408} +{"current_steps": 18680, "total_steps": 204665, "loss": 0.1147, "lr": 1.8252797185713587e-06, "epoch": 0.456355507781008, "percentage": 9.13, "elapsed_time": "0:23:04", "remaining_time": "3:49:40", "throughput": 9084.05, "total_tokens": 12572864} +{"current_steps": 18685, "total_steps": 204665, "loss": 0.1923, "lr": 1.8257683099623784e-06, "epoch": 0.45647765861285516, "percentage": 9.13, "elapsed_time": "0:23:04", "remaining_time": "3:49:39", "throughput": 9084.32, "total_tokens": 12576448} +{"current_steps": 18690, "total_steps": 204665, "loss": 0.3076, "lr": 1.826256901353398e-06, "epoch": 0.4565998094447023, "percentage": 9.13, "elapsed_time": "0:23:04", "remaining_time": "3:49:39", "throughput": 9084.34, "total_tokens": 12579584} +{"current_steps": 18695, "total_steps": 204665, "loss": 0.1864, "lr": 1.8267454927444177e-06, "epoch": 0.4567219602765495, "percentage": 9.13, "elapsed_time": "0:23:05", "remaining_time": "3:49:38", "throughput": 9084.49, "total_tokens": 12582976} +{"current_steps": 18700, "total_steps": 204665, "loss": 0.1133, "lr": 1.8272340841354374e-06, "epoch": 0.4568441111083966, "percentage": 9.14, "elapsed_time": "0:23:05", "remaining_time": "3:49:37", "throughput": 9084.69, "total_tokens": 12586432} +{"current_steps": 18705, "total_steps": 204665, "loss": 0.1829, "lr": 1.827722675526457e-06, "epoch": 0.4569662619402438, "percentage": 9.14, "elapsed_time": "0:23:05", "remaining_time": "3:49:37", "throughput": 9084.47, "total_tokens": 12589184} +{"current_steps": 18710, "total_steps": 204665, "loss": 0.1619, "lr": 1.8282112669174768e-06, "epoch": 0.457088412772091, "percentage": 9.14, "elapsed_time": "0:23:06", "remaining_time": "3:49:36", "throughput": 9084.72, "total_tokens": 12592768} +{"current_steps": 18715, "total_steps": 204665, "loss": 0.1941, "lr": 1.8286998583084965e-06, "epoch": 0.45721056360393814, "percentage": 9.14, "elapsed_time": "0:23:06", "remaining_time": "3:49:35", "throughput": 9084.7, "total_tokens": 12595840} +{"current_steps": 18720, "total_steps": 204665, "loss": 0.1251, "lr": 1.8291884496995161e-06, "epoch": 0.4573327144357853, "percentage": 9.15, "elapsed_time": "0:23:06", "remaining_time": "3:49:35", "throughput": 9084.82, "total_tokens": 12599168} +{"current_steps": 18725, "total_steps": 204665, "loss": 0.1294, "lr": 1.829677041090536e-06, "epoch": 0.45745486526763246, "percentage": 9.15, "elapsed_time": "0:23:07", "remaining_time": "3:49:34", "throughput": 9084.99, "total_tokens": 12602560} +{"current_steps": 18730, "total_steps": 204665, "loss": 0.087, "lr": 1.8301656324815555e-06, "epoch": 0.45757701609947965, "percentage": 9.15, "elapsed_time": "0:23:07", "remaining_time": "3:49:34", "throughput": 9085.28, "total_tokens": 12606208} +{"current_steps": 18735, "total_steps": 204665, "loss": 0.0713, "lr": 1.8306542238725752e-06, "epoch": 0.4576991669313268, "percentage": 9.15, "elapsed_time": "0:23:07", "remaining_time": "3:49:33", "throughput": 9085.44, "total_tokens": 12609600} +{"current_steps": 18740, "total_steps": 204665, "loss": 0.1419, "lr": 1.831142815263595e-06, "epoch": 0.457821317763174, "percentage": 9.16, "elapsed_time": "0:23:08", "remaining_time": "3:49:33", "throughput": 9085.53, "total_tokens": 12612864} +{"current_steps": 18745, "total_steps": 204665, "loss": 0.0825, "lr": 1.8316314066546146e-06, "epoch": 0.4579434685950211, "percentage": 9.16, "elapsed_time": "0:23:08", "remaining_time": "3:49:32", "throughput": 9085.65, "total_tokens": 12616192} +{"current_steps": 18750, "total_steps": 204665, "loss": 0.0839, "lr": 1.8321199980456342e-06, "epoch": 0.4580656194268683, "percentage": 9.16, "elapsed_time": "0:23:08", "remaining_time": "3:49:31", "throughput": 9085.8, "total_tokens": 12619584} +{"current_steps": 18755, "total_steps": 204665, "loss": 0.0678, "lr": 1.8326085894366541e-06, "epoch": 0.4581877702587155, "percentage": 9.16, "elapsed_time": "0:23:09", "remaining_time": "3:49:31", "throughput": 9086.1, "total_tokens": 12623232} +{"current_steps": 18760, "total_steps": 204665, "loss": 0.1289, "lr": 1.8330971808276738e-06, "epoch": 0.4583099210905626, "percentage": 9.17, "elapsed_time": "0:23:09", "remaining_time": "3:49:30", "throughput": 9086.59, "total_tokens": 12627200} +{"current_steps": 18765, "total_steps": 204665, "loss": 0.2559, "lr": 1.8335857722186933e-06, "epoch": 0.4584320719224098, "percentage": 9.17, "elapsed_time": "0:23:10", "remaining_time": "3:49:30", "throughput": 9086.7, "total_tokens": 12630528} +{"current_steps": 18770, "total_steps": 204665, "loss": 0.0332, "lr": 1.8340743636097132e-06, "epoch": 0.45855422275425695, "percentage": 9.17, "elapsed_time": "0:23:10", "remaining_time": "3:49:29", "throughput": 9086.63, "total_tokens": 12633536} +{"current_steps": 18775, "total_steps": 204665, "loss": 0.1104, "lr": 1.8345629550007329e-06, "epoch": 0.45867637358610414, "percentage": 9.17, "elapsed_time": "0:23:10", "remaining_time": "3:49:29", "throughput": 9086.75, "total_tokens": 12636864} +{"current_steps": 18780, "total_steps": 204665, "loss": 0.1016, "lr": 1.8350515463917523e-06, "epoch": 0.45879852441795127, "percentage": 9.18, "elapsed_time": "0:23:11", "remaining_time": "3:49:28", "throughput": 9087.07, "total_tokens": 12640576} +{"current_steps": 18785, "total_steps": 204665, "loss": 0.0848, "lr": 1.8355401377827722e-06, "epoch": 0.45892067524979846, "percentage": 9.18, "elapsed_time": "0:23:11", "remaining_time": "3:49:28", "throughput": 9087.24, "total_tokens": 12643968} +{"current_steps": 18790, "total_steps": 204665, "loss": 0.1632, "lr": 1.836028729173792e-06, "epoch": 0.4590428260816456, "percentage": 9.18, "elapsed_time": "0:23:11", "remaining_time": "3:49:27", "throughput": 9087.22, "total_tokens": 12647040} +{"current_steps": 18795, "total_steps": 204665, "loss": 0.2297, "lr": 1.8365173205648114e-06, "epoch": 0.4591649769134928, "percentage": 9.18, "elapsed_time": "0:23:12", "remaining_time": "3:49:26", "throughput": 9087.29, "total_tokens": 12650240} +{"current_steps": 18800, "total_steps": 204665, "loss": 0.1519, "lr": 1.8370059119558313e-06, "epoch": 0.4592871277453399, "percentage": 9.19, "elapsed_time": "0:23:12", "remaining_time": "3:49:26", "throughput": 9087.27, "total_tokens": 12653312} +{"current_steps": 18805, "total_steps": 204665, "loss": 0.0764, "lr": 1.837494503346851e-06, "epoch": 0.4594092785771871, "percentage": 9.19, "elapsed_time": "0:23:12", "remaining_time": "3:49:25", "throughput": 9087.49, "total_tokens": 12656832} +{"current_steps": 18810, "total_steps": 204665, "loss": 0.2496, "lr": 1.8379830947378706e-06, "epoch": 0.4595314294090343, "percentage": 9.19, "elapsed_time": "0:23:13", "remaining_time": "3:49:24", "throughput": 9087.71, "total_tokens": 12660288} +{"current_steps": 18815, "total_steps": 204665, "loss": 0.1129, "lr": 1.8384716861288903e-06, "epoch": 0.45965358024088143, "percentage": 9.19, "elapsed_time": "0:23:13", "remaining_time": "3:49:24", "throughput": 9087.86, "total_tokens": 12663680} +{"current_steps": 18820, "total_steps": 204665, "loss": 0.1077, "lr": 1.83896027751991e-06, "epoch": 0.4597757310727286, "percentage": 9.2, "elapsed_time": "0:23:13", "remaining_time": "3:49:23", "throughput": 9088.08, "total_tokens": 12667200} +{"current_steps": 18825, "total_steps": 204665, "loss": 0.0589, "lr": 1.8394488689109297e-06, "epoch": 0.45989788190457576, "percentage": 9.2, "elapsed_time": "0:23:14", "remaining_time": "3:49:23", "throughput": 9088.51, "total_tokens": 12671040} +{"current_steps": 18830, "total_steps": 204665, "loss": 0.129, "lr": 1.8399374603019493e-06, "epoch": 0.46002003273642295, "percentage": 9.2, "elapsed_time": "0:23:14", "remaining_time": "3:49:22", "throughput": 9088.69, "total_tokens": 12674496} +{"current_steps": 18835, "total_steps": 204665, "loss": 0.098, "lr": 1.840426051692969e-06, "epoch": 0.4601421835682701, "percentage": 9.2, "elapsed_time": "0:23:14", "remaining_time": "3:49:22", "throughput": 9088.64, "total_tokens": 12677504} +{"current_steps": 18840, "total_steps": 204665, "loss": 0.1474, "lr": 1.8409146430839887e-06, "epoch": 0.46026433440011727, "percentage": 9.21, "elapsed_time": "0:23:15", "remaining_time": "3:49:21", "throughput": 9088.63, "total_tokens": 12680640} +{"current_steps": 18845, "total_steps": 204665, "loss": 0.0447, "lr": 1.8414032344750086e-06, "epoch": 0.4603864852319644, "percentage": 9.21, "elapsed_time": "0:23:15", "remaining_time": "3:49:20", "throughput": 9088.86, "total_tokens": 12684160} +{"current_steps": 18850, "total_steps": 204665, "loss": 0.0752, "lr": 1.841891825866028e-06, "epoch": 0.4605086360638116, "percentage": 9.21, "elapsed_time": "0:23:15", "remaining_time": "3:49:20", "throughput": 9088.94, "total_tokens": 12687424} +{"current_steps": 18855, "total_steps": 204665, "loss": 0.0254, "lr": 1.8423804172570478e-06, "epoch": 0.4606307868956588, "percentage": 9.21, "elapsed_time": "0:23:16", "remaining_time": "3:49:19", "throughput": 9089.09, "total_tokens": 12690816} +{"current_steps": 18860, "total_steps": 204665, "loss": 0.1209, "lr": 1.8428690086480676e-06, "epoch": 0.4607529377275059, "percentage": 9.22, "elapsed_time": "0:23:16", "remaining_time": "3:49:19", "throughput": 9089.15, "total_tokens": 12694016} +{"current_steps": 18865, "total_steps": 204665, "loss": 0.0971, "lr": 1.8433576000390871e-06, "epoch": 0.4608750885593531, "percentage": 9.22, "elapsed_time": "0:23:16", "remaining_time": "3:49:18", "throughput": 9089.49, "total_tokens": 12697728} +{"current_steps": 18870, "total_steps": 204665, "loss": 0.1475, "lr": 1.8438461914301068e-06, "epoch": 0.46099723939120024, "percentage": 9.22, "elapsed_time": "0:23:17", "remaining_time": "3:49:18", "throughput": 9089.93, "total_tokens": 12701632} +{"current_steps": 18875, "total_steps": 204665, "loss": 0.1265, "lr": 1.8443347828211267e-06, "epoch": 0.46111939022304743, "percentage": 9.22, "elapsed_time": "0:23:17", "remaining_time": "3:49:17", "throughput": 9090.09, "total_tokens": 12705024} +{"current_steps": 18880, "total_steps": 204665, "loss": 0.2305, "lr": 1.8448233742121464e-06, "epoch": 0.46124154105489457, "percentage": 9.22, "elapsed_time": "0:23:18", "remaining_time": "3:49:17", "throughput": 9090.28, "total_tokens": 12708480} +{"current_steps": 18885, "total_steps": 204665, "loss": 0.3209, "lr": 1.8453119656031658e-06, "epoch": 0.46136369188674176, "percentage": 9.23, "elapsed_time": "0:23:18", "remaining_time": "3:49:16", "throughput": 9090.47, "total_tokens": 12711936} +{"current_steps": 18890, "total_steps": 204665, "loss": 0.1201, "lr": 1.8458005569941857e-06, "epoch": 0.4614858427185889, "percentage": 9.23, "elapsed_time": "0:23:18", "remaining_time": "3:49:16", "throughput": 9090.97, "total_tokens": 12715904} +{"current_steps": 18895, "total_steps": 204665, "loss": 0.0603, "lr": 1.8462891483852054e-06, "epoch": 0.4616079935504361, "percentage": 9.23, "elapsed_time": "0:23:19", "remaining_time": "3:49:15", "throughput": 9091.39, "total_tokens": 12719744} +{"current_steps": 18900, "total_steps": 204665, "loss": 0.1219, "lr": 1.8467777397762249e-06, "epoch": 0.46173014438228327, "percentage": 9.23, "elapsed_time": "0:23:19", "remaining_time": "3:49:14", "throughput": 9091.56, "total_tokens": 12723200} +{"current_steps": 18905, "total_steps": 204665, "loss": 0.1796, "lr": 1.8472663311672448e-06, "epoch": 0.4618522952141304, "percentage": 9.24, "elapsed_time": "0:23:19", "remaining_time": "3:49:14", "throughput": 9091.5, "total_tokens": 12726208} +{"current_steps": 18910, "total_steps": 204665, "loss": 0.1212, "lr": 1.8477549225582645e-06, "epoch": 0.4619744460459776, "percentage": 9.24, "elapsed_time": "0:23:20", "remaining_time": "3:49:13", "throughput": 9091.38, "total_tokens": 12729088} +{"current_steps": 18915, "total_steps": 204665, "loss": 0.0825, "lr": 1.8482435139492841e-06, "epoch": 0.46209659687782473, "percentage": 9.24, "elapsed_time": "0:23:20", "remaining_time": "3:49:13", "throughput": 9091.74, "total_tokens": 12732864} +{"current_steps": 18920, "total_steps": 204665, "loss": 0.1234, "lr": 1.8487321053403038e-06, "epoch": 0.4622187477096719, "percentage": 9.24, "elapsed_time": "0:23:20", "remaining_time": "3:49:12", "throughput": 9091.89, "total_tokens": 12736256} +{"current_steps": 18925, "total_steps": 204665, "loss": 0.0573, "lr": 1.8492206967313235e-06, "epoch": 0.46234089854151905, "percentage": 9.25, "elapsed_time": "0:23:21", "remaining_time": "3:49:11", "throughput": 9091.95, "total_tokens": 12739520} +{"current_steps": 18930, "total_steps": 204665, "loss": 0.1303, "lr": 1.8497092881223432e-06, "epoch": 0.46246304937336624, "percentage": 9.25, "elapsed_time": "0:23:21", "remaining_time": "3:49:11", "throughput": 9092.21, "total_tokens": 12743104} +{"current_steps": 18935, "total_steps": 204665, "loss": 0.1818, "lr": 1.8501978795133629e-06, "epoch": 0.4625852002052134, "percentage": 9.25, "elapsed_time": "0:23:21", "remaining_time": "3:49:10", "throughput": 9092.35, "total_tokens": 12746432} +{"current_steps": 18940, "total_steps": 204665, "loss": 0.0869, "lr": 1.8506864709043825e-06, "epoch": 0.46270735103706057, "percentage": 9.25, "elapsed_time": "0:23:22", "remaining_time": "3:49:10", "throughput": 9092.37, "total_tokens": 12749568} +{"current_steps": 18945, "total_steps": 204665, "loss": 0.109, "lr": 1.8511750622954022e-06, "epoch": 0.4628295018689077, "percentage": 9.26, "elapsed_time": "0:23:22", "remaining_time": "3:49:09", "throughput": 9092.4, "total_tokens": 12752768} +{"current_steps": 18950, "total_steps": 204665, "loss": 0.1561, "lr": 1.851663653686422e-06, "epoch": 0.4629516527007549, "percentage": 9.26, "elapsed_time": "0:23:22", "remaining_time": "3:49:09", "throughput": 9092.74, "total_tokens": 12756480} +{"current_steps": 18955, "total_steps": 204665, "loss": 0.1497, "lr": 1.8521522450774416e-06, "epoch": 0.4630738035326021, "percentage": 9.26, "elapsed_time": "0:23:23", "remaining_time": "3:49:08", "throughput": 9092.89, "total_tokens": 12759872} +{"current_steps": 18960, "total_steps": 204665, "loss": 0.1316, "lr": 1.8526408364684613e-06, "epoch": 0.4631959543644492, "percentage": 9.26, "elapsed_time": "0:23:23", "remaining_time": "3:49:07", "throughput": 9093.04, "total_tokens": 12763264} +{"current_steps": 18965, "total_steps": 204665, "loss": 0.0591, "lr": 1.8531294278594812e-06, "epoch": 0.4633181051962964, "percentage": 9.27, "elapsed_time": "0:23:23", "remaining_time": "3:49:07", "throughput": 9093.24, "total_tokens": 12766720} +{"current_steps": 18970, "total_steps": 204665, "loss": 0.1837, "lr": 1.8536180192505006e-06, "epoch": 0.46344025602814354, "percentage": 9.27, "elapsed_time": "0:23:24", "remaining_time": "3:49:06", "throughput": 9093.44, "total_tokens": 12770240} +{"current_steps": 18975, "total_steps": 204665, "loss": 0.0643, "lr": 1.8541066106415203e-06, "epoch": 0.4635624068599907, "percentage": 9.27, "elapsed_time": "0:23:24", "remaining_time": "3:49:06", "throughput": 9093.83, "total_tokens": 12774080} +{"current_steps": 18980, "total_steps": 204665, "loss": 0.1817, "lr": 1.8545952020325402e-06, "epoch": 0.46368455769183786, "percentage": 9.27, "elapsed_time": "0:23:25", "remaining_time": "3:49:05", "throughput": 9093.88, "total_tokens": 12777280} +{"current_steps": 18985, "total_steps": 204665, "loss": 0.1293, "lr": 1.8550837934235597e-06, "epoch": 0.46380670852368505, "percentage": 9.28, "elapsed_time": "0:23:25", "remaining_time": "3:49:05", "throughput": 9094.24, "total_tokens": 12781056} +{"current_steps": 18990, "total_steps": 204665, "loss": 0.139, "lr": 1.8555723848145794e-06, "epoch": 0.4639288593555322, "percentage": 9.28, "elapsed_time": "0:23:25", "remaining_time": "3:49:04", "throughput": 9094.34, "total_tokens": 12784384} +{"current_steps": 18995, "total_steps": 204665, "loss": 0.1052, "lr": 1.8560609762055993e-06, "epoch": 0.4640510101873794, "percentage": 9.28, "elapsed_time": "0:23:26", "remaining_time": "3:49:04", "throughput": 9094.35, "total_tokens": 12787520} +{"current_steps": 19000, "total_steps": 204665, "loss": 0.1275, "lr": 1.856549567596619e-06, "epoch": 0.46417316101922657, "percentage": 9.28, "elapsed_time": "0:23:26", "remaining_time": "3:49:03", "throughput": 9094.58, "total_tokens": 12791040} +{"current_steps": 19005, "total_steps": 204665, "loss": 0.0531, "lr": 1.8570381589876384e-06, "epoch": 0.4642953118510737, "percentage": 9.29, "elapsed_time": "0:23:26", "remaining_time": "3:49:02", "throughput": 9094.74, "total_tokens": 12794432} +{"current_steps": 19010, "total_steps": 204665, "loss": 0.1065, "lr": 1.8575267503786583e-06, "epoch": 0.4644174626829209, "percentage": 9.29, "elapsed_time": "0:23:27", "remaining_time": "3:49:02", "throughput": 9094.77, "total_tokens": 12797632} +{"current_steps": 19015, "total_steps": 204665, "loss": 0.1797, "lr": 1.858015341769678e-06, "epoch": 0.464539613514768, "percentage": 9.29, "elapsed_time": "0:23:27", "remaining_time": "3:49:01", "throughput": 9094.99, "total_tokens": 12801152} +{"current_steps": 19020, "total_steps": 204665, "loss": 0.0999, "lr": 1.8585039331606974e-06, "epoch": 0.4646617643466152, "percentage": 9.29, "elapsed_time": "0:23:27", "remaining_time": "3:49:01", "throughput": 9095.32, "total_tokens": 12804864} +{"current_steps": 19025, "total_steps": 204665, "loss": 0.0713, "lr": 1.8589925245517173e-06, "epoch": 0.46478391517846235, "percentage": 9.3, "elapsed_time": "0:23:28", "remaining_time": "3:49:00", "throughput": 9095.47, "total_tokens": 12808256} +{"current_steps": 19030, "total_steps": 204665, "loss": 0.1033, "lr": 1.859481115942737e-06, "epoch": 0.46490606601030954, "percentage": 9.3, "elapsed_time": "0:23:28", "remaining_time": "3:49:00", "throughput": 9095.96, "total_tokens": 12812224} +{"current_steps": 19035, "total_steps": 204665, "loss": 0.2061, "lr": 1.8599697073337567e-06, "epoch": 0.46502821684215667, "percentage": 9.3, "elapsed_time": "0:23:28", "remaining_time": "3:48:59", "throughput": 9096.12, "total_tokens": 12815616} +{"current_steps": 19040, "total_steps": 204665, "loss": 0.1223, "lr": 1.8604582987247764e-06, "epoch": 0.46515036767400386, "percentage": 9.3, "elapsed_time": "0:23:29", "remaining_time": "3:48:59", "throughput": 9096.48, "total_tokens": 12819392} +{"current_steps": 19045, "total_steps": 204665, "loss": 0.0222, "lr": 1.860946890115796e-06, "epoch": 0.46527251850585105, "percentage": 9.31, "elapsed_time": "0:23:29", "remaining_time": "3:48:58", "throughput": 9096.49, "total_tokens": 12822528} +{"current_steps": 19050, "total_steps": 204665, "loss": 0.1632, "lr": 1.8614354815068157e-06, "epoch": 0.4653946693376982, "percentage": 9.31, "elapsed_time": "0:23:29", "remaining_time": "3:48:58", "throughput": 9096.49, "total_tokens": 12825664} +{"current_steps": 19055, "total_steps": 204665, "loss": 0.0751, "lr": 1.8619240728978354e-06, "epoch": 0.4655168201695454, "percentage": 9.31, "elapsed_time": "0:23:30", "remaining_time": "3:48:57", "throughput": 9096.73, "total_tokens": 12829184} +{"current_steps": 19060, "total_steps": 204665, "loss": 0.1655, "lr": 1.8624126642888551e-06, "epoch": 0.4656389710013925, "percentage": 9.31, "elapsed_time": "0:23:30", "remaining_time": "3:48:56", "throughput": 9096.71, "total_tokens": 12832256} +{"current_steps": 19065, "total_steps": 204665, "loss": 0.1237, "lr": 1.8629012556798748e-06, "epoch": 0.4657611218332397, "percentage": 9.32, "elapsed_time": "0:23:30", "remaining_time": "3:48:56", "throughput": 9096.79, "total_tokens": 12835520} +{"current_steps": 19070, "total_steps": 204665, "loss": 0.1247, "lr": 1.8633898470708945e-06, "epoch": 0.46588327266508683, "percentage": 9.32, "elapsed_time": "0:23:31", "remaining_time": "3:48:55", "throughput": 9096.79, "total_tokens": 12838656} +{"current_steps": 19075, "total_steps": 204665, "loss": 0.0818, "lr": 1.8638784384619142e-06, "epoch": 0.466005423496934, "percentage": 9.32, "elapsed_time": "0:23:31", "remaining_time": "3:48:54", "throughput": 9096.91, "total_tokens": 12841984} +{"current_steps": 19080, "total_steps": 204665, "loss": 0.1951, "lr": 1.8643670298529338e-06, "epoch": 0.46612757432878116, "percentage": 9.32, "elapsed_time": "0:23:32", "remaining_time": "3:48:54", "throughput": 9097.39, "total_tokens": 12845952} +{"current_steps": 19085, "total_steps": 204665, "loss": 0.1492, "lr": 1.8648556212439537e-06, "epoch": 0.46624972516062835, "percentage": 9.32, "elapsed_time": "0:23:32", "remaining_time": "3:48:53", "throughput": 9097.47, "total_tokens": 12849216} +{"current_steps": 19090, "total_steps": 204665, "loss": 0.0904, "lr": 1.8653442126349732e-06, "epoch": 0.4663718759924755, "percentage": 9.33, "elapsed_time": "0:23:32", "remaining_time": "3:48:53", "throughput": 9097.48, "total_tokens": 12852352} +{"current_steps": 19095, "total_steps": 204665, "loss": 0.1296, "lr": 1.8658328040259929e-06, "epoch": 0.46649402682432267, "percentage": 9.33, "elapsed_time": "0:23:33", "remaining_time": "3:48:52", "throughput": 9097.77, "total_tokens": 12856000} +{"current_steps": 19100, "total_steps": 204665, "loss": 0.0455, "lr": 1.8663213954170128e-06, "epoch": 0.46661617765616986, "percentage": 9.33, "elapsed_time": "0:23:33", "remaining_time": "3:48:52", "throughput": 9097.71, "total_tokens": 12859008} +{"current_steps": 19105, "total_steps": 204665, "loss": 0.1772, "lr": 1.8668099868080322e-06, "epoch": 0.466738328488017, "percentage": 9.33, "elapsed_time": "0:23:33", "remaining_time": "3:48:51", "throughput": 9097.88, "total_tokens": 12862464} +{"current_steps": 19110, "total_steps": 204665, "loss": 0.0789, "lr": 1.867298578199052e-06, "epoch": 0.4668604793198642, "percentage": 9.34, "elapsed_time": "0:23:34", "remaining_time": "3:48:51", "throughput": 9098.0, "total_tokens": 12865792} +{"current_steps": 19115, "total_steps": 204665, "loss": 0.0677, "lr": 1.8677871695900718e-06, "epoch": 0.4669826301517113, "percentage": 9.34, "elapsed_time": "0:23:34", "remaining_time": "3:48:50", "throughput": 9098.27, "total_tokens": 12869376} +{"current_steps": 19120, "total_steps": 204665, "loss": 0.1072, "lr": 1.8682757609810915e-06, "epoch": 0.4671047809835585, "percentage": 9.34, "elapsed_time": "0:23:34", "remaining_time": "3:48:49", "throughput": 9098.49, "total_tokens": 12872896} +{"current_steps": 19125, "total_steps": 204665, "loss": 0.1762, "lr": 1.868764352372111e-06, "epoch": 0.46722693181540564, "percentage": 9.34, "elapsed_time": "0:23:35", "remaining_time": "3:48:49", "throughput": 9098.82, "total_tokens": 12876608} +{"current_steps": 19130, "total_steps": 204665, "loss": 0.1542, "lr": 1.8692529437631309e-06, "epoch": 0.46734908264725283, "percentage": 9.35, "elapsed_time": "0:23:35", "remaining_time": "3:48:48", "throughput": 9098.94, "total_tokens": 12879936} +{"current_steps": 19135, "total_steps": 204665, "loss": 0.1688, "lr": 1.8697415351541505e-06, "epoch": 0.46747123347909997, "percentage": 9.35, "elapsed_time": "0:23:35", "remaining_time": "3:48:48", "throughput": 9099.16, "total_tokens": 12883392} +{"current_steps": 19140, "total_steps": 204665, "loss": 0.1618, "lr": 1.87023012654517e-06, "epoch": 0.46759338431094716, "percentage": 9.35, "elapsed_time": "0:23:36", "remaining_time": "3:48:47", "throughput": 9099.32, "total_tokens": 12886784} +{"current_steps": 19145, "total_steps": 204665, "loss": 0.1077, "lr": 1.87071871793619e-06, "epoch": 0.46771553514279435, "percentage": 9.35, "elapsed_time": "0:23:36", "remaining_time": "3:48:47", "throughput": 9099.4, "total_tokens": 12890048} +{"current_steps": 19150, "total_steps": 204665, "loss": 0.0752, "lr": 1.8712073093272096e-06, "epoch": 0.4678376859746415, "percentage": 9.36, "elapsed_time": "0:23:36", "remaining_time": "3:48:46", "throughput": 9099.49, "total_tokens": 12893312} +{"current_steps": 19155, "total_steps": 204665, "loss": 0.1155, "lr": 1.8716959007182293e-06, "epoch": 0.46795983680648867, "percentage": 9.36, "elapsed_time": "0:23:37", "remaining_time": "3:48:45", "throughput": 9099.64, "total_tokens": 12896704} +{"current_steps": 19160, "total_steps": 204665, "loss": 0.0286, "lr": 1.872184492109249e-06, "epoch": 0.4680819876383358, "percentage": 9.36, "elapsed_time": "0:23:37", "remaining_time": "3:48:45", "throughput": 9099.8, "total_tokens": 12900096} +{"current_steps": 19165, "total_steps": 204665, "loss": 0.1852, "lr": 1.8726730835002686e-06, "epoch": 0.468204138470183, "percentage": 9.36, "elapsed_time": "0:23:37", "remaining_time": "3:48:44", "throughput": 9099.71, "total_tokens": 12903040} +{"current_steps": 19170, "total_steps": 204665, "loss": 0.1419, "lr": 1.8731616748912883e-06, "epoch": 0.46832628930203013, "percentage": 9.37, "elapsed_time": "0:23:38", "remaining_time": "3:48:44", "throughput": 9099.85, "total_tokens": 12906432} +{"current_steps": 19175, "total_steps": 204665, "loss": 0.106, "lr": 1.873650266282308e-06, "epoch": 0.4684484401338773, "percentage": 9.37, "elapsed_time": "0:23:38", "remaining_time": "3:48:43", "throughput": 9099.76, "total_tokens": 12909376} +{"current_steps": 19180, "total_steps": 204665, "loss": 0.1177, "lr": 1.8741388576733277e-06, "epoch": 0.46857059096572445, "percentage": 9.37, "elapsed_time": "0:23:38", "remaining_time": "3:48:42", "throughput": 9100.0, "total_tokens": 12912896} +{"current_steps": 19185, "total_steps": 204665, "loss": 0.1659, "lr": 1.8746274490643474e-06, "epoch": 0.46869274179757164, "percentage": 9.37, "elapsed_time": "0:23:39", "remaining_time": "3:48:42", "throughput": 9100.04, "total_tokens": 12916096} +{"current_steps": 19190, "total_steps": 204665, "loss": 0.126, "lr": 1.8751160404553672e-06, "epoch": 0.46881489262941883, "percentage": 9.38, "elapsed_time": "0:23:39", "remaining_time": "3:48:41", "throughput": 9100.0, "total_tokens": 12919168} +{"current_steps": 19195, "total_steps": 204665, "loss": 0.233, "lr": 1.8756046318463867e-06, "epoch": 0.46893704346126597, "percentage": 9.38, "elapsed_time": "0:23:40", "remaining_time": "3:48:40", "throughput": 9100.14, "total_tokens": 12922496} +{"current_steps": 19200, "total_steps": 204665, "loss": 0.2077, "lr": 1.8760932232374064e-06, "epoch": 0.46905919429311316, "percentage": 9.38, "elapsed_time": "0:23:40", "remaining_time": "3:48:40", "throughput": 9100.37, "total_tokens": 12926016} +{"current_steps": 19205, "total_steps": 204665, "loss": 0.1411, "lr": 1.8765818146284263e-06, "epoch": 0.4691813451249603, "percentage": 9.38, "elapsed_time": "0:23:40", "remaining_time": "3:48:39", "throughput": 9100.54, "total_tokens": 12929472} +{"current_steps": 19210, "total_steps": 204665, "loss": 0.0664, "lr": 1.8770704060194458e-06, "epoch": 0.4693034959568075, "percentage": 9.39, "elapsed_time": "0:23:41", "remaining_time": "3:48:39", "throughput": 9100.8, "total_tokens": 12933056} +{"current_steps": 19215, "total_steps": 204665, "loss": 0.0971, "lr": 1.8775589974104654e-06, "epoch": 0.4694256467886546, "percentage": 9.39, "elapsed_time": "0:23:41", "remaining_time": "3:48:38", "throughput": 9100.91, "total_tokens": 12936384} +{"current_steps": 19220, "total_steps": 204665, "loss": 0.0367, "lr": 1.8780475888014853e-06, "epoch": 0.4695477976205018, "percentage": 9.39, "elapsed_time": "0:23:41", "remaining_time": "3:48:38", "throughput": 9100.98, "total_tokens": 12939648} +{"current_steps": 19225, "total_steps": 204665, "loss": 0.0736, "lr": 1.8785361801925048e-06, "epoch": 0.46966994845234894, "percentage": 9.39, "elapsed_time": "0:23:42", "remaining_time": "3:48:37", "throughput": 9101.15, "total_tokens": 12943104} +{"current_steps": 19230, "total_steps": 204665, "loss": 0.0748, "lr": 1.8790247715835245e-06, "epoch": 0.46979209928419613, "percentage": 9.4, "elapsed_time": "0:23:42", "remaining_time": "3:48:37", "throughput": 9101.2, "total_tokens": 12946304} +{"current_steps": 19235, "total_steps": 204665, "loss": 0.044, "lr": 1.8795133629745444e-06, "epoch": 0.46991425011604326, "percentage": 9.4, "elapsed_time": "0:23:42", "remaining_time": "3:48:36", "throughput": 9101.09, "total_tokens": 12949248} +{"current_steps": 19240, "total_steps": 204665, "loss": 0.2714, "lr": 1.880001954365564e-06, "epoch": 0.47003640094789045, "percentage": 9.4, "elapsed_time": "0:23:43", "remaining_time": "3:48:35", "throughput": 9101.18, "total_tokens": 12952512} +{"current_steps": 19245, "total_steps": 204665, "loss": 0.1009, "lr": 1.8804905457565835e-06, "epoch": 0.47015855177973764, "percentage": 9.4, "elapsed_time": "0:23:43", "remaining_time": "3:48:35", "throughput": 9101.23, "total_tokens": 12955712} +{"current_steps": 19250, "total_steps": 204665, "loss": 0.0945, "lr": 1.8809791371476034e-06, "epoch": 0.4702807026115848, "percentage": 9.41, "elapsed_time": "0:23:43", "remaining_time": "3:48:34", "throughput": 9101.56, "total_tokens": 12959424} +{"current_steps": 19255, "total_steps": 204665, "loss": 0.1379, "lr": 1.881467728538623e-06, "epoch": 0.47040285344343197, "percentage": 9.41, "elapsed_time": "0:23:44", "remaining_time": "3:48:34", "throughput": 9101.73, "total_tokens": 12962880} +{"current_steps": 19260, "total_steps": 204665, "loss": 0.1535, "lr": 1.8819563199296426e-06, "epoch": 0.4705250042752791, "percentage": 9.41, "elapsed_time": "0:23:44", "remaining_time": "3:48:33", "throughput": 9101.97, "total_tokens": 12966464} +{"current_steps": 19265, "total_steps": 204665, "loss": 0.0838, "lr": 1.8824449113206625e-06, "epoch": 0.4706471551071263, "percentage": 9.41, "elapsed_time": "0:23:44", "remaining_time": "3:48:33", "throughput": 9102.23, "total_tokens": 12970048} +{"current_steps": 19270, "total_steps": 204665, "loss": 0.1308, "lr": 1.8829335027116821e-06, "epoch": 0.4707693059389734, "percentage": 9.42, "elapsed_time": "0:23:45", "remaining_time": "3:48:32", "throughput": 9102.56, "total_tokens": 12973760} +{"current_steps": 19275, "total_steps": 204665, "loss": 0.0659, "lr": 1.8834220941027018e-06, "epoch": 0.4708914567708206, "percentage": 9.42, "elapsed_time": "0:23:45", "remaining_time": "3:48:32", "throughput": 9102.96, "total_tokens": 12977600} +{"current_steps": 19280, "total_steps": 204665, "loss": 0.2964, "lr": 1.8839106854937215e-06, "epoch": 0.47101360760266775, "percentage": 9.42, "elapsed_time": "0:23:45", "remaining_time": "3:48:31", "throughput": 9103.05, "total_tokens": 12980864} +{"current_steps": 19285, "total_steps": 204665, "loss": 0.0543, "lr": 1.8843992768847412e-06, "epoch": 0.47113575843451494, "percentage": 9.42, "elapsed_time": "0:23:46", "remaining_time": "3:48:30", "throughput": 9103.22, "total_tokens": 12984256} +{"current_steps": 19290, "total_steps": 204665, "loss": 0.0297, "lr": 1.8848878682757609e-06, "epoch": 0.47125790926636213, "percentage": 9.43, "elapsed_time": "0:23:46", "remaining_time": "3:48:30", "throughput": 9103.27, "total_tokens": 12987456} +{"current_steps": 19295, "total_steps": 204665, "loss": 0.1323, "lr": 1.8853764596667806e-06, "epoch": 0.47138006009820926, "percentage": 9.43, "elapsed_time": "0:23:47", "remaining_time": "3:48:29", "throughput": 9103.5, "total_tokens": 12990976} +{"current_steps": 19300, "total_steps": 204665, "loss": 0.0875, "lr": 1.8858650510578002e-06, "epoch": 0.47150221093005645, "percentage": 9.43, "elapsed_time": "0:23:47", "remaining_time": "3:48:29", "throughput": 9103.65, "total_tokens": 12994368} +{"current_steps": 19305, "total_steps": 204665, "loss": 0.2024, "lr": 1.88635364244882e-06, "epoch": 0.4716243617619036, "percentage": 9.43, "elapsed_time": "0:23:47", "remaining_time": "3:48:28", "throughput": 9103.6, "total_tokens": 12997376} +{"current_steps": 19310, "total_steps": 204665, "loss": 0.259, "lr": 1.8868422338398398e-06, "epoch": 0.4717465125937508, "percentage": 9.43, "elapsed_time": "0:23:48", "remaining_time": "3:48:27", "throughput": 9103.58, "total_tokens": 13000448} +{"current_steps": 19315, "total_steps": 204665, "loss": 0.1971, "lr": 1.8873308252308593e-06, "epoch": 0.4718686634255979, "percentage": 9.44, "elapsed_time": "0:23:48", "remaining_time": "3:48:27", "throughput": 9103.73, "total_tokens": 13003840} +{"current_steps": 19320, "total_steps": 204665, "loss": 0.2406, "lr": 1.887819416621879e-06, "epoch": 0.4719908142574451, "percentage": 9.44, "elapsed_time": "0:23:48", "remaining_time": "3:48:26", "throughput": 9103.99, "total_tokens": 13007424} +{"current_steps": 19325, "total_steps": 204665, "loss": 0.1605, "lr": 1.8883080080128989e-06, "epoch": 0.47211296508929224, "percentage": 9.44, "elapsed_time": "0:23:49", "remaining_time": "3:48:26", "throughput": 9104.14, "total_tokens": 13010816} +{"current_steps": 19330, "total_steps": 204665, "loss": 0.1467, "lr": 1.8887965994039183e-06, "epoch": 0.4722351159211394, "percentage": 9.44, "elapsed_time": "0:23:49", "remaining_time": "3:48:25", "throughput": 9104.22, "total_tokens": 13014080} +{"current_steps": 19335, "total_steps": 204665, "loss": 0.0574, "lr": 1.889285190794938e-06, "epoch": 0.4723572667529866, "percentage": 9.45, "elapsed_time": "0:23:49", "remaining_time": "3:48:24", "throughput": 9104.31, "total_tokens": 13017344} +{"current_steps": 19340, "total_steps": 204665, "loss": 0.1405, "lr": 1.889773782185958e-06, "epoch": 0.47247941758483375, "percentage": 9.45, "elapsed_time": "0:23:50", "remaining_time": "3:48:24", "throughput": 9104.39, "total_tokens": 13020608} +{"current_steps": 19345, "total_steps": 204665, "loss": 0.1287, "lr": 1.8902623735769776e-06, "epoch": 0.47260156841668094, "percentage": 9.45, "elapsed_time": "0:23:50", "remaining_time": "3:48:23", "throughput": 9104.62, "total_tokens": 13024128} +{"current_steps": 19350, "total_steps": 204665, "loss": 0.1658, "lr": 1.890750964967997e-06, "epoch": 0.4727237192485281, "percentage": 9.45, "elapsed_time": "0:23:50", "remaining_time": "3:48:23", "throughput": 9104.79, "total_tokens": 13027584} +{"current_steps": 19355, "total_steps": 204665, "loss": 0.2057, "lr": 1.891239556359017e-06, "epoch": 0.47284587008037526, "percentage": 9.46, "elapsed_time": "0:23:51", "remaining_time": "3:48:22", "throughput": 9104.75, "total_tokens": 13030592} +{"current_steps": 19360, "total_steps": 204665, "loss": 0.1622, "lr": 1.8917281477500366e-06, "epoch": 0.4729680209122224, "percentage": 9.46, "elapsed_time": "0:23:51", "remaining_time": "3:48:21", "throughput": 9104.79, "total_tokens": 13033792} +{"current_steps": 19365, "total_steps": 204665, "loss": 0.086, "lr": 1.892216739141056e-06, "epoch": 0.4730901717440696, "percentage": 9.46, "elapsed_time": "0:23:51", "remaining_time": "3:48:21", "throughput": 9104.73, "total_tokens": 13036800} +{"current_steps": 19370, "total_steps": 204665, "loss": 0.1403, "lr": 1.892705330532076e-06, "epoch": 0.4732123225759167, "percentage": 9.46, "elapsed_time": "0:23:52", "remaining_time": "3:48:20", "throughput": 9104.83, "total_tokens": 13040128} +{"current_steps": 19375, "total_steps": 204665, "loss": 0.1043, "lr": 1.8931939219230957e-06, "epoch": 0.4733344734077639, "percentage": 9.47, "elapsed_time": "0:23:52", "remaining_time": "3:48:20", "throughput": 9105.03, "total_tokens": 13043584} +{"current_steps": 19380, "total_steps": 204665, "loss": 0.1329, "lr": 1.8936825133141151e-06, "epoch": 0.47345662423961105, "percentage": 9.47, "elapsed_time": "0:23:52", "remaining_time": "3:48:19", "throughput": 9105.01, "total_tokens": 13046656} +{"current_steps": 19385, "total_steps": 204665, "loss": 0.0519, "lr": 1.894171104705135e-06, "epoch": 0.47357877507145824, "percentage": 9.47, "elapsed_time": "0:23:53", "remaining_time": "3:48:18", "throughput": 9105.02, "total_tokens": 13049792} +{"current_steps": 19390, "total_steps": 204665, "loss": 0.1164, "lr": 1.8946596960961547e-06, "epoch": 0.4737009259033054, "percentage": 9.47, "elapsed_time": "0:23:53", "remaining_time": "3:48:18", "throughput": 9104.92, "total_tokens": 13052736} +{"current_steps": 19395, "total_steps": 204665, "loss": 0.1841, "lr": 1.8951482874871744e-06, "epoch": 0.47382307673515256, "percentage": 9.48, "elapsed_time": "0:23:53", "remaining_time": "3:48:17", "throughput": 9104.81, "total_tokens": 13055616} +{"current_steps": 19400, "total_steps": 204665, "loss": 0.1359, "lr": 1.895636878878194e-06, "epoch": 0.47394522756699975, "percentage": 9.48, "elapsed_time": "0:23:54", "remaining_time": "3:48:16", "throughput": 9104.89, "total_tokens": 13058880} +{"current_steps": 19405, "total_steps": 204665, "loss": 0.1169, "lr": 1.8961254702692138e-06, "epoch": 0.4740673783988469, "percentage": 9.48, "elapsed_time": "0:23:54", "remaining_time": "3:48:16", "throughput": 9104.92, "total_tokens": 13062080} +{"current_steps": 19410, "total_steps": 204665, "loss": 0.0242, "lr": 1.8966140616602334e-06, "epoch": 0.4741895292306941, "percentage": 9.48, "elapsed_time": "0:23:54", "remaining_time": "3:48:15", "throughput": 9105.21, "total_tokens": 13065728} +{"current_steps": 19415, "total_steps": 204665, "loss": 0.0707, "lr": 1.8971026530512531e-06, "epoch": 0.4743116800625412, "percentage": 9.49, "elapsed_time": "0:23:55", "remaining_time": "3:48:15", "throughput": 9105.29, "total_tokens": 13068992} +{"current_steps": 19420, "total_steps": 204665, "loss": 0.2041, "lr": 1.8975912444422728e-06, "epoch": 0.4744338308943884, "percentage": 9.49, "elapsed_time": "0:23:55", "remaining_time": "3:48:14", "throughput": 9105.47, "total_tokens": 13072384} +{"current_steps": 19425, "total_steps": 204665, "loss": 0.0601, "lr": 1.8980798358332925e-06, "epoch": 0.47455598172623553, "percentage": 9.49, "elapsed_time": "0:23:56", "remaining_time": "3:48:14", "throughput": 9105.54, "total_tokens": 13075648} +{"current_steps": 19430, "total_steps": 204665, "loss": 0.1482, "lr": 1.8985684272243124e-06, "epoch": 0.4746781325580827, "percentage": 9.49, "elapsed_time": "0:23:56", "remaining_time": "3:48:13", "throughput": 9105.7, "total_tokens": 13079040} +{"current_steps": 19435, "total_steps": 204665, "loss": 0.0946, "lr": 1.8990570186153318e-06, "epoch": 0.4748002833899299, "percentage": 9.5, "elapsed_time": "0:23:56", "remaining_time": "3:48:12", "throughput": 9105.78, "total_tokens": 13082304} +{"current_steps": 19440, "total_steps": 204665, "loss": 0.1373, "lr": 1.8995456100063515e-06, "epoch": 0.47492243422177705, "percentage": 9.5, "elapsed_time": "0:23:57", "remaining_time": "3:48:12", "throughput": 9105.91, "total_tokens": 13085632} +{"current_steps": 19445, "total_steps": 204665, "loss": 0.1384, "lr": 1.9000342013973714e-06, "epoch": 0.47504458505362424, "percentage": 9.5, "elapsed_time": "0:23:57", "remaining_time": "3:48:11", "throughput": 9106.05, "total_tokens": 13089024} +{"current_steps": 19450, "total_steps": 204665, "loss": 0.0781, "lr": 1.9005227927883909e-06, "epoch": 0.47516673588547137, "percentage": 9.5, "elapsed_time": "0:23:57", "remaining_time": "3:48:11", "throughput": 9106.22, "total_tokens": 13092480} +{"current_steps": 19455, "total_steps": 204665, "loss": 0.1162, "lr": 1.9010113841794106e-06, "epoch": 0.47528888671731856, "percentage": 9.51, "elapsed_time": "0:23:58", "remaining_time": "3:48:10", "throughput": 9106.47, "total_tokens": 13096064} +{"current_steps": 19460, "total_steps": 204665, "loss": 0.1173, "lr": 1.9014999755704305e-06, "epoch": 0.4754110375491657, "percentage": 9.51, "elapsed_time": "0:23:58", "remaining_time": "3:48:10", "throughput": 9106.71, "total_tokens": 13099648} +{"current_steps": 19465, "total_steps": 204665, "loss": 0.1658, "lr": 1.9019885669614501e-06, "epoch": 0.4755331883810129, "percentage": 9.51, "elapsed_time": "0:23:58", "remaining_time": "3:48:09", "throughput": 9106.75, "total_tokens": 13102848} +{"current_steps": 19470, "total_steps": 204665, "loss": 0.0847, "lr": 1.9024771583524696e-06, "epoch": 0.47565533921286, "percentage": 9.51, "elapsed_time": "0:23:59", "remaining_time": "3:48:09", "throughput": 9107.14, "total_tokens": 13106688} +{"current_steps": 19475, "total_steps": 204665, "loss": 0.1892, "lr": 1.9029657497434895e-06, "epoch": 0.4757774900447072, "percentage": 9.52, "elapsed_time": "0:23:59", "remaining_time": "3:48:08", "throughput": 9107.39, "total_tokens": 13110272} +{"current_steps": 19480, "total_steps": 204665, "loss": 0.1094, "lr": 1.9034543411345092e-06, "epoch": 0.47589964087655434, "percentage": 9.52, "elapsed_time": "0:23:59", "remaining_time": "3:48:07", "throughput": 9107.34, "total_tokens": 13113280} +{"current_steps": 19485, "total_steps": 204665, "loss": 0.0507, "lr": 1.9039429325255287e-06, "epoch": 0.47602179170840153, "percentage": 9.52, "elapsed_time": "0:24:00", "remaining_time": "3:48:07", "throughput": 9107.38, "total_tokens": 13116480} +{"current_steps": 19490, "total_steps": 204665, "loss": 0.1353, "lr": 1.9044315239165486e-06, "epoch": 0.4761439425402487, "percentage": 9.52, "elapsed_time": "0:24:00", "remaining_time": "3:48:06", "throughput": 9107.55, "total_tokens": 13119936} +{"current_steps": 19495, "total_steps": 204665, "loss": 0.0437, "lr": 1.9049201153075682e-06, "epoch": 0.47626609337209586, "percentage": 9.53, "elapsed_time": "0:24:00", "remaining_time": "3:48:06", "throughput": 9107.74, "total_tokens": 13123392} +{"current_steps": 19500, "total_steps": 204665, "loss": 0.1317, "lr": 1.9054087066985877e-06, "epoch": 0.47638824420394305, "percentage": 9.53, "elapsed_time": "0:24:01", "remaining_time": "3:48:05", "throughput": 9107.71, "total_tokens": 13126464} +{"current_steps": 19505, "total_steps": 204665, "loss": 0.2263, "lr": 1.9058972980896076e-06, "epoch": 0.4765103950357902, "percentage": 9.53, "elapsed_time": "0:24:01", "remaining_time": "3:48:05", "throughput": 9107.94, "total_tokens": 13130048} +{"current_steps": 19510, "total_steps": 204665, "loss": 0.1022, "lr": 1.9063858894806273e-06, "epoch": 0.47663254586763737, "percentage": 9.53, "elapsed_time": "0:24:01", "remaining_time": "3:48:04", "throughput": 9108.08, "total_tokens": 13133440} +{"current_steps": 19515, "total_steps": 204665, "loss": 0.1827, "lr": 1.906874480871647e-06, "epoch": 0.4767546966994845, "percentage": 9.54, "elapsed_time": "0:24:02", "remaining_time": "3:48:03", "throughput": 9108.03, "total_tokens": 13136448} +{"current_steps": 19520, "total_steps": 204665, "loss": 0.1598, "lr": 1.9073630722626666e-06, "epoch": 0.4768768475313317, "percentage": 9.54, "elapsed_time": "0:24:02", "remaining_time": "3:48:03", "throughput": 9108.03, "total_tokens": 13139584} +{"current_steps": 19525, "total_steps": 204665, "loss": 0.1595, "lr": 1.9078516636536863e-06, "epoch": 0.4769989983631788, "percentage": 9.54, "elapsed_time": "0:24:02", "remaining_time": "3:48:02", "throughput": 9108.08, "total_tokens": 13142784} +{"current_steps": 19530, "total_steps": 204665, "loss": 0.2117, "lr": 1.908340255044706e-06, "epoch": 0.477121149195026, "percentage": 9.54, "elapsed_time": "0:24:03", "remaining_time": "3:48:02", "throughput": 9108.17, "total_tokens": 13146112} +{"current_steps": 19535, "total_steps": 204665, "loss": 0.2164, "lr": 1.9088288464357257e-06, "epoch": 0.4772433000268732, "percentage": 9.54, "elapsed_time": "0:24:03", "remaining_time": "3:48:01", "throughput": 9108.33, "total_tokens": 13149504} +{"current_steps": 19540, "total_steps": 204665, "loss": 0.0371, "lr": 1.9093174378267454e-06, "epoch": 0.47736545085872034, "percentage": 9.55, "elapsed_time": "0:24:04", "remaining_time": "3:48:00", "throughput": 9108.37, "total_tokens": 13152704} +{"current_steps": 19545, "total_steps": 204665, "loss": 0.1501, "lr": 1.909806029217765e-06, "epoch": 0.47748760169056753, "percentage": 9.55, "elapsed_time": "0:24:04", "remaining_time": "3:48:00", "throughput": 9108.55, "total_tokens": 13156160} +{"current_steps": 19550, "total_steps": 204665, "loss": 0.0894, "lr": 1.9102946206087847e-06, "epoch": 0.47760975252241467, "percentage": 9.55, "elapsed_time": "0:24:04", "remaining_time": "3:47:59", "throughput": 9108.72, "total_tokens": 13159616} +{"current_steps": 19555, "total_steps": 204665, "loss": 0.1337, "lr": 1.9107832119998044e-06, "epoch": 0.47773190335426186, "percentage": 9.55, "elapsed_time": "0:24:05", "remaining_time": "3:47:59", "throughput": 9108.65, "total_tokens": 13162624} +{"current_steps": 19560, "total_steps": 204665, "loss": 0.0509, "lr": 1.911271803390824e-06, "epoch": 0.477854054186109, "percentage": 9.56, "elapsed_time": "0:24:05", "remaining_time": "3:47:58", "throughput": 9108.65, "total_tokens": 13165760} +{"current_steps": 19565, "total_steps": 204665, "loss": 0.115, "lr": 1.9117603947818438e-06, "epoch": 0.4779762050179562, "percentage": 9.56, "elapsed_time": "0:24:05", "remaining_time": "3:47:57", "throughput": 9108.73, "total_tokens": 13169024} +{"current_steps": 19570, "total_steps": 204665, "loss": 0.2129, "lr": 1.9122489861728635e-06, "epoch": 0.4780983558498033, "percentage": 9.56, "elapsed_time": "0:24:06", "remaining_time": "3:47:57", "throughput": 9108.63, "total_tokens": 13171968} +{"current_steps": 19575, "total_steps": 204665, "loss": 0.0831, "lr": 1.912737577563883e-06, "epoch": 0.4782205066816505, "percentage": 9.56, "elapsed_time": "0:24:06", "remaining_time": "3:47:56", "throughput": 9108.82, "total_tokens": 13175424} +{"current_steps": 19580, "total_steps": 204665, "loss": 0.1144, "lr": 1.913226168954903e-06, "epoch": 0.4783426575134977, "percentage": 9.57, "elapsed_time": "0:24:06", "remaining_time": "3:47:56", "throughput": 9109.01, "total_tokens": 13178880} +{"current_steps": 19585, "total_steps": 204665, "loss": 0.0943, "lr": 1.913714760345923e-06, "epoch": 0.4784648083453448, "percentage": 9.57, "elapsed_time": "0:24:07", "remaining_time": "3:47:55", "throughput": 9109.13, "total_tokens": 13182208} +{"current_steps": 19590, "total_steps": 204665, "loss": 0.0779, "lr": 1.914203351736942e-06, "epoch": 0.478586959177192, "percentage": 9.57, "elapsed_time": "0:24:07", "remaining_time": "3:47:55", "throughput": 9109.27, "total_tokens": 13185600} +{"current_steps": 19595, "total_steps": 204665, "loss": 0.0657, "lr": 1.914691943127962e-06, "epoch": 0.47870911000903915, "percentage": 9.57, "elapsed_time": "0:24:07", "remaining_time": "3:47:54", "throughput": 9109.4, "total_tokens": 13188928} +{"current_steps": 19600, "total_steps": 204665, "loss": 0.2389, "lr": 1.915180534518982e-06, "epoch": 0.47883126084088634, "percentage": 9.58, "elapsed_time": "0:24:08", "remaining_time": "3:47:53", "throughput": 9109.41, "total_tokens": 13192064} +{"current_steps": 19605, "total_steps": 204665, "loss": 0.1893, "lr": 1.9156691259100012e-06, "epoch": 0.4789534116727335, "percentage": 9.58, "elapsed_time": "0:24:08", "remaining_time": "3:47:53", "throughput": 9109.7, "total_tokens": 13195712} +{"current_steps": 19610, "total_steps": 204665, "loss": 0.262, "lr": 1.916157717301021e-06, "epoch": 0.47907556250458067, "percentage": 9.58, "elapsed_time": "0:24:08", "remaining_time": "3:47:52", "throughput": 9109.61, "total_tokens": 13198656} +{"current_steps": 19615, "total_steps": 204665, "loss": 0.1524, "lr": 1.916646308692041e-06, "epoch": 0.4791977133364278, "percentage": 9.58, "elapsed_time": "0:24:09", "remaining_time": "3:47:52", "throughput": 9109.71, "total_tokens": 13201984} +{"current_steps": 19620, "total_steps": 204665, "loss": 0.0971, "lr": 1.9171349000830607e-06, "epoch": 0.479319864168275, "percentage": 9.59, "elapsed_time": "0:24:09", "remaining_time": "3:47:51", "throughput": 9109.82, "total_tokens": 13205312} +{"current_steps": 19625, "total_steps": 204665, "loss": 0.1953, "lr": 1.91762349147408e-06, "epoch": 0.4794420150001221, "percentage": 9.59, "elapsed_time": "0:24:09", "remaining_time": "3:47:50", "throughput": 9109.71, "total_tokens": 13208192} +{"current_steps": 19630, "total_steps": 204665, "loss": 0.1565, "lr": 1.9181120828651e-06, "epoch": 0.4795641658319693, "percentage": 9.59, "elapsed_time": "0:24:10", "remaining_time": "3:47:50", "throughput": 9109.83, "total_tokens": 13211520} +{"current_steps": 19635, "total_steps": 204665, "loss": 0.1034, "lr": 1.9186006742561197e-06, "epoch": 0.4796863166638165, "percentage": 9.59, "elapsed_time": "0:24:10", "remaining_time": "3:47:49", "throughput": 9109.93, "total_tokens": 13214848} +{"current_steps": 19640, "total_steps": 204665, "loss": 0.1566, "lr": 1.919089265647139e-06, "epoch": 0.47980846749566364, "percentage": 9.6, "elapsed_time": "0:24:10", "remaining_time": "3:47:49", "throughput": 9110.12, "total_tokens": 13218304} +{"current_steps": 19645, "total_steps": 204665, "loss": 0.139, "lr": 1.919577857038159e-06, "epoch": 0.4799306183275108, "percentage": 9.6, "elapsed_time": "0:24:11", "remaining_time": "3:47:48", "throughput": 9110.28, "total_tokens": 13221696} +{"current_steps": 19650, "total_steps": 204665, "loss": 0.0572, "lr": 1.9200664484291788e-06, "epoch": 0.48005276915935796, "percentage": 9.6, "elapsed_time": "0:24:11", "remaining_time": "3:47:47", "throughput": 9110.33, "total_tokens": 13224896} +{"current_steps": 19655, "total_steps": 204665, "loss": 0.1403, "lr": 1.920555039820198e-06, "epoch": 0.48017491999120515, "percentage": 9.6, "elapsed_time": "0:24:11", "remaining_time": "3:47:47", "throughput": 9110.32, "total_tokens": 13227968} +{"current_steps": 19660, "total_steps": 204665, "loss": 0.0733, "lr": 1.921043631211218e-06, "epoch": 0.4802970708230523, "percentage": 9.61, "elapsed_time": "0:24:12", "remaining_time": "3:47:46", "throughput": 9110.39, "total_tokens": 13231232} +{"current_steps": 19665, "total_steps": 204665, "loss": 0.088, "lr": 1.921532222602238e-06, "epoch": 0.4804192216548995, "percentage": 9.61, "elapsed_time": "0:24:12", "remaining_time": "3:47:46", "throughput": 9110.5, "total_tokens": 13234560} +{"current_steps": 19670, "total_steps": 204665, "loss": 0.0543, "lr": 1.9220208139932575e-06, "epoch": 0.4805413724867466, "percentage": 9.61, "elapsed_time": "0:24:13", "remaining_time": "3:47:45", "throughput": 9110.53, "total_tokens": 13237696} +{"current_steps": 19675, "total_steps": 204665, "loss": 0.1292, "lr": 1.922509405384277e-06, "epoch": 0.4806635233185938, "percentage": 9.61, "elapsed_time": "0:24:13", "remaining_time": "3:47:44", "throughput": 9110.75, "total_tokens": 13241216} +{"current_steps": 19680, "total_steps": 204665, "loss": 0.1134, "lr": 1.922997996775297e-06, "epoch": 0.480785674150441, "percentage": 9.62, "elapsed_time": "0:24:13", "remaining_time": "3:47:44", "throughput": 9110.96, "total_tokens": 13244736} +{"current_steps": 19685, "total_steps": 204665, "loss": 0.2556, "lr": 1.9234865881663165e-06, "epoch": 0.4809078249822881, "percentage": 9.62, "elapsed_time": "0:24:14", "remaining_time": "3:47:43", "throughput": 9111.03, "total_tokens": 13248000} +{"current_steps": 19690, "total_steps": 204665, "loss": 0.2345, "lr": 1.923975179557336e-06, "epoch": 0.4810299758141353, "percentage": 9.62, "elapsed_time": "0:24:14", "remaining_time": "3:47:43", "throughput": 9110.98, "total_tokens": 13251008} +{"current_steps": 19695, "total_steps": 204665, "loss": 0.1163, "lr": 1.924463770948356e-06, "epoch": 0.48115212664598245, "percentage": 9.62, "elapsed_time": "0:24:14", "remaining_time": "3:47:42", "throughput": 9111.1, "total_tokens": 13254336} +{"current_steps": 19700, "total_steps": 204665, "loss": 0.1854, "lr": 1.9249523623393756e-06, "epoch": 0.48127427747782964, "percentage": 9.63, "elapsed_time": "0:24:15", "remaining_time": "3:47:42", "throughput": 9111.41, "total_tokens": 13258048} +{"current_steps": 19705, "total_steps": 204665, "loss": 0.3226, "lr": 1.9254409537303953e-06, "epoch": 0.48139642830967677, "percentage": 9.63, "elapsed_time": "0:24:15", "remaining_time": "3:47:41", "throughput": 9111.68, "total_tokens": 13261632} +{"current_steps": 19710, "total_steps": 204665, "loss": 0.1299, "lr": 1.925929545121415e-06, "epoch": 0.48151857914152396, "percentage": 9.63, "elapsed_time": "0:24:15", "remaining_time": "3:47:40", "throughput": 9111.84, "total_tokens": 13265024} +{"current_steps": 19715, "total_steps": 204665, "loss": 0.1003, "lr": 1.9264181365124346e-06, "epoch": 0.4816407299733711, "percentage": 9.63, "elapsed_time": "0:24:16", "remaining_time": "3:47:40", "throughput": 9111.99, "total_tokens": 13268416} +{"current_steps": 19720, "total_steps": 204665, "loss": 0.16, "lr": 1.9269067279034543e-06, "epoch": 0.4817628808052183, "percentage": 9.64, "elapsed_time": "0:24:16", "remaining_time": "3:47:39", "throughput": 9112.28, "total_tokens": 13272064} +{"current_steps": 19725, "total_steps": 204665, "loss": 0.1596, "lr": 1.927395319294474e-06, "epoch": 0.4818850316370655, "percentage": 9.64, "elapsed_time": "0:24:16", "remaining_time": "3:47:39", "throughput": 9112.56, "total_tokens": 13275712} +{"current_steps": 19730, "total_steps": 204665, "loss": 0.1389, "lr": 1.9278839106854937e-06, "epoch": 0.4820071824689126, "percentage": 9.64, "elapsed_time": "0:24:17", "remaining_time": "3:47:38", "throughput": 9112.64, "total_tokens": 13278976} +{"current_steps": 19735, "total_steps": 204665, "loss": 0.0965, "lr": 1.9283725020765134e-06, "epoch": 0.4821293333007598, "percentage": 9.64, "elapsed_time": "0:24:17", "remaining_time": "3:47:38", "throughput": 9112.84, "total_tokens": 13282496} +{"current_steps": 19740, "total_steps": 204665, "loss": 0.1027, "lr": 1.928861093467533e-06, "epoch": 0.48225148413260693, "percentage": 9.65, "elapsed_time": "0:24:17", "remaining_time": "3:47:37", "throughput": 9112.82, "total_tokens": 13285568} +{"current_steps": 19745, "total_steps": 204665, "loss": 0.0695, "lr": 1.9293496848585527e-06, "epoch": 0.4823736349644541, "percentage": 9.65, "elapsed_time": "0:24:18", "remaining_time": "3:47:37", "throughput": 9112.96, "total_tokens": 13288960} +{"current_steps": 19750, "total_steps": 204665, "loss": 0.109, "lr": 1.9298382762495724e-06, "epoch": 0.48249578579630126, "percentage": 9.65, "elapsed_time": "0:24:18", "remaining_time": "3:47:36", "throughput": 9113.04, "total_tokens": 13292224} +{"current_steps": 19755, "total_steps": 204665, "loss": 0.1414, "lr": 1.930326867640592e-06, "epoch": 0.48261793662814845, "percentage": 9.65, "elapsed_time": "0:24:18", "remaining_time": "3:47:35", "throughput": 9113.45, "total_tokens": 13296064} +{"current_steps": 19760, "total_steps": 204665, "loss": 0.0575, "lr": 1.9308154590316118e-06, "epoch": 0.4827400874599956, "percentage": 9.65, "elapsed_time": "0:24:19", "remaining_time": "3:47:35", "throughput": 9113.72, "total_tokens": 13299648} +{"current_steps": 19765, "total_steps": 204665, "loss": 0.1743, "lr": 1.9313040504226314e-06, "epoch": 0.48286223829184277, "percentage": 9.66, "elapsed_time": "0:24:19", "remaining_time": "3:47:34", "throughput": 9113.8, "total_tokens": 13302912} +{"current_steps": 19770, "total_steps": 204665, "loss": 0.0443, "lr": 1.931792641813651e-06, "epoch": 0.4829843891236899, "percentage": 9.66, "elapsed_time": "0:24:19", "remaining_time": "3:47:34", "throughput": 9113.99, "total_tokens": 13306368} +{"current_steps": 19775, "total_steps": 204665, "loss": 0.0543, "lr": 1.932281233204671e-06, "epoch": 0.4831065399555371, "percentage": 9.66, "elapsed_time": "0:24:20", "remaining_time": "3:47:33", "throughput": 9113.93, "total_tokens": 13309376} +{"current_steps": 19780, "total_steps": 204665, "loss": 0.1372, "lr": 1.9327698245956905e-06, "epoch": 0.4832286907873843, "percentage": 9.66, "elapsed_time": "0:24:20", "remaining_time": "3:47:33", "throughput": 9114.03, "total_tokens": 13312640} +{"current_steps": 19785, "total_steps": 204665, "loss": 0.0512, "lr": 1.93325841598671e-06, "epoch": 0.4833508416192314, "percentage": 9.67, "elapsed_time": "0:24:21", "remaining_time": "3:47:32", "throughput": 9114.04, "total_tokens": 13315776} +{"current_steps": 19790, "total_steps": 204665, "loss": 0.0811, "lr": 1.93374700737773e-06, "epoch": 0.4834729924510786, "percentage": 9.67, "elapsed_time": "0:24:21", "remaining_time": "3:47:31", "throughput": 9114.44, "total_tokens": 13319616} +{"current_steps": 19795, "total_steps": 204665, "loss": 0.1762, "lr": 1.9342355987687495e-06, "epoch": 0.48359514328292574, "percentage": 9.67, "elapsed_time": "0:24:21", "remaining_time": "3:47:31", "throughput": 9114.6, "total_tokens": 13323008} +{"current_steps": 19800, "total_steps": 204665, "loss": 0.0668, "lr": 1.9347241901597692e-06, "epoch": 0.48371729411477293, "percentage": 9.67, "elapsed_time": "0:24:22", "remaining_time": "3:47:30", "throughput": 9114.75, "total_tokens": 13326400} +{"current_steps": 19805, "total_steps": 204665, "loss": 0.2493, "lr": 1.935212781550789e-06, "epoch": 0.48383944494662007, "percentage": 9.68, "elapsed_time": "0:24:22", "remaining_time": "3:47:30", "throughput": 9114.85, "total_tokens": 13329728} +{"current_steps": 19810, "total_steps": 204665, "loss": 0.1694, "lr": 1.9357013729418086e-06, "epoch": 0.48396159577846726, "percentage": 9.68, "elapsed_time": "0:24:22", "remaining_time": "3:47:29", "throughput": 9115.0, "total_tokens": 13333120} +{"current_steps": 19815, "total_steps": 204665, "loss": 0.1478, "lr": 1.9361899643328283e-06, "epoch": 0.4840837466103144, "percentage": 9.68, "elapsed_time": "0:24:23", "remaining_time": "3:47:29", "throughput": 9115.18, "total_tokens": 13336576} +{"current_steps": 19820, "total_steps": 204665, "loss": 0.0925, "lr": 1.936678555723848e-06, "epoch": 0.4842058974421616, "percentage": 9.68, "elapsed_time": "0:24:23", "remaining_time": "3:47:28", "throughput": 9115.19, "total_tokens": 13339712} +{"current_steps": 19825, "total_steps": 204665, "loss": 0.089, "lr": 1.937167147114868e-06, "epoch": 0.48432804827400877, "percentage": 9.69, "elapsed_time": "0:24:23", "remaining_time": "3:47:27", "throughput": 9115.38, "total_tokens": 13343168} +{"current_steps": 19830, "total_steps": 204665, "loss": 0.1069, "lr": 1.9376557385058873e-06, "epoch": 0.4844501991058559, "percentage": 9.69, "elapsed_time": "0:24:24", "remaining_time": "3:47:27", "throughput": 9115.35, "total_tokens": 13346240} +{"current_steps": 19835, "total_steps": 204665, "loss": 0.0559, "lr": 1.938144329896907e-06, "epoch": 0.4845723499377031, "percentage": 9.69, "elapsed_time": "0:24:24", "remaining_time": "3:47:26", "throughput": 9115.66, "total_tokens": 13349888} +{"current_steps": 19840, "total_steps": 204665, "loss": 0.0851, "lr": 1.938632921287927e-06, "epoch": 0.48469450076955023, "percentage": 9.69, "elapsed_time": "0:24:24", "remaining_time": "3:47:26", "throughput": 9115.89, "total_tokens": 13353472} +{"current_steps": 19845, "total_steps": 204665, "loss": 0.1351, "lr": 1.9391215126789463e-06, "epoch": 0.4848166516013974, "percentage": 9.7, "elapsed_time": "0:24:25", "remaining_time": "3:47:25", "throughput": 9115.93, "total_tokens": 13356672} +{"current_steps": 19850, "total_steps": 204665, "loss": 0.2086, "lr": 1.939610104069966e-06, "epoch": 0.48493880243324455, "percentage": 9.7, "elapsed_time": "0:24:25", "remaining_time": "3:47:25", "throughput": 9116.09, "total_tokens": 13360064} +{"current_steps": 19855, "total_steps": 204665, "loss": 0.0905, "lr": 1.940098695460986e-06, "epoch": 0.48506095326509174, "percentage": 9.7, "elapsed_time": "0:24:25", "remaining_time": "3:47:24", "throughput": 9116.21, "total_tokens": 13363392} +{"current_steps": 19860, "total_steps": 204665, "loss": 0.1664, "lr": 1.940587286852006e-06, "epoch": 0.4851831040969389, "percentage": 9.7, "elapsed_time": "0:24:26", "remaining_time": "3:47:23", "throughput": 9116.29, "total_tokens": 13366656} +{"current_steps": 19865, "total_steps": 204665, "loss": 0.0714, "lr": 1.941075878243025e-06, "epoch": 0.48530525492878607, "percentage": 9.71, "elapsed_time": "0:24:26", "remaining_time": "3:47:23", "throughput": 9116.54, "total_tokens": 13370240} +{"current_steps": 19870, "total_steps": 204665, "loss": 0.1149, "lr": 1.941564469634045e-06, "epoch": 0.48542740576063326, "percentage": 9.71, "elapsed_time": "0:24:26", "remaining_time": "3:47:22", "throughput": 9116.66, "total_tokens": 13373568} +{"current_steps": 19875, "total_steps": 204665, "loss": 0.1532, "lr": 1.942053061025065e-06, "epoch": 0.4855495565924804, "percentage": 9.71, "elapsed_time": "0:24:27", "remaining_time": "3:47:22", "throughput": 9116.85, "total_tokens": 13377024} +{"current_steps": 19880, "total_steps": 204665, "loss": 0.133, "lr": 1.942541652416084e-06, "epoch": 0.4856717074243276, "percentage": 9.71, "elapsed_time": "0:24:27", "remaining_time": "3:47:21", "throughput": 9117.18, "total_tokens": 13380800} +{"current_steps": 19885, "total_steps": 204665, "loss": 0.1333, "lr": 1.943030243807104e-06, "epoch": 0.4857938582561747, "percentage": 9.72, "elapsed_time": "0:24:27", "remaining_time": "3:47:21", "throughput": 9117.4, "total_tokens": 13384320} +{"current_steps": 19890, "total_steps": 204665, "loss": 0.1726, "lr": 1.943518835198124e-06, "epoch": 0.4859160090880219, "percentage": 9.72, "elapsed_time": "0:24:28", "remaining_time": "3:47:20", "throughput": 9117.5, "total_tokens": 13387648} +{"current_steps": 19895, "total_steps": 204665, "loss": 0.1649, "lr": 1.9440074265891436e-06, "epoch": 0.48603815991986904, "percentage": 9.72, "elapsed_time": "0:24:28", "remaining_time": "3:47:20", "throughput": 9117.77, "total_tokens": 13391232} +{"current_steps": 19900, "total_steps": 204665, "loss": 0.0477, "lr": 1.944496017980163e-06, "epoch": 0.48616031075171623, "percentage": 9.72, "elapsed_time": "0:24:29", "remaining_time": "3:47:19", "throughput": 9118.2, "total_tokens": 13395136} +{"current_steps": 19905, "total_steps": 204665, "loss": 0.1885, "lr": 1.944984609371183e-06, "epoch": 0.48628246158356336, "percentage": 9.73, "elapsed_time": "0:24:29", "remaining_time": "3:47:19", "throughput": 9118.29, "total_tokens": 13398400} +{"current_steps": 19910, "total_steps": 204665, "loss": 0.22, "lr": 1.9454732007622026e-06, "epoch": 0.48640461241541055, "percentage": 9.73, "elapsed_time": "0:24:29", "remaining_time": "3:47:18", "throughput": 9118.44, "total_tokens": 13401792} +{"current_steps": 19915, "total_steps": 204665, "loss": 0.1027, "lr": 1.945961792153222e-06, "epoch": 0.4865267632472577, "percentage": 9.73, "elapsed_time": "0:24:30", "remaining_time": "3:47:17", "throughput": 9118.56, "total_tokens": 13405120} +{"current_steps": 19920, "total_steps": 204665, "loss": 0.064, "lr": 1.946450383544242e-06, "epoch": 0.4866489140791049, "percentage": 9.73, "elapsed_time": "0:24:30", "remaining_time": "3:47:17", "throughput": 9118.99, "total_tokens": 13409088} +{"current_steps": 19925, "total_steps": 204665, "loss": 0.0144, "lr": 1.9469389749352617e-06, "epoch": 0.48677106491095207, "percentage": 9.74, "elapsed_time": "0:24:30", "remaining_time": "3:47:17", "throughput": 9119.29, "total_tokens": 13412736} +{"current_steps": 19930, "total_steps": 204665, "loss": 0.0679, "lr": 1.947427566326281e-06, "epoch": 0.4868932157427992, "percentage": 9.74, "elapsed_time": "0:24:31", "remaining_time": "3:47:16", "throughput": 9119.29, "total_tokens": 13415872} +{"current_steps": 19935, "total_steps": 204665, "loss": 0.1872, "lr": 1.947916157717301e-06, "epoch": 0.4870153665746464, "percentage": 9.74, "elapsed_time": "0:24:31", "remaining_time": "3:47:15", "throughput": 9119.36, "total_tokens": 13419136} +{"current_steps": 19940, "total_steps": 204665, "loss": 0.3057, "lr": 1.9484047491083207e-06, "epoch": 0.4871375174064935, "percentage": 9.74, "elapsed_time": "0:24:31", "remaining_time": "3:47:15", "throughput": 9119.67, "total_tokens": 13422848} +{"current_steps": 19945, "total_steps": 204665, "loss": 0.1605, "lr": 1.9488933404993404e-06, "epoch": 0.4872596682383407, "percentage": 9.75, "elapsed_time": "0:24:32", "remaining_time": "3:47:14", "throughput": 9120.13, "total_tokens": 13426816} +{"current_steps": 19950, "total_steps": 204665, "loss": 0.0222, "lr": 1.94938193189036e-06, "epoch": 0.48738181907018785, "percentage": 9.75, "elapsed_time": "0:24:32", "remaining_time": "3:47:14", "throughput": 9120.07, "total_tokens": 13429824} +{"current_steps": 19955, "total_steps": 204665, "loss": 0.1204, "lr": 1.9498705232813798e-06, "epoch": 0.48750396990203504, "percentage": 9.75, "elapsed_time": "0:24:32", "remaining_time": "3:47:13", "throughput": 9120.13, "total_tokens": 13433088} +{"current_steps": 19960, "total_steps": 204665, "loss": 0.1232, "lr": 1.9503591146723994e-06, "epoch": 0.4876261207338822, "percentage": 9.75, "elapsed_time": "0:24:33", "remaining_time": "3:47:13", "throughput": 9120.22, "total_tokens": 13436416} +{"current_steps": 19965, "total_steps": 204665, "loss": 0.0839, "lr": 1.950847706063419e-06, "epoch": 0.48774827156572936, "percentage": 9.75, "elapsed_time": "0:24:33", "remaining_time": "3:47:12", "throughput": 9120.44, "total_tokens": 13439936} +{"current_steps": 19970, "total_steps": 204665, "loss": 0.0839, "lr": 1.951336297454439e-06, "epoch": 0.48787042239757655, "percentage": 9.76, "elapsed_time": "0:24:33", "remaining_time": "3:47:12", "throughput": 9120.53, "total_tokens": 13443200} +{"current_steps": 19975, "total_steps": 204665, "loss": 0.0941, "lr": 1.9518248888454585e-06, "epoch": 0.4879925732294237, "percentage": 9.76, "elapsed_time": "0:24:34", "remaining_time": "3:47:11", "throughput": 9120.56, "total_tokens": 13446400} +{"current_steps": 19980, "total_steps": 204665, "loss": 0.0274, "lr": 1.952313480236478e-06, "epoch": 0.4881147240612709, "percentage": 9.76, "elapsed_time": "0:24:34", "remaining_time": "3:47:10", "throughput": 9120.95, "total_tokens": 13450240} +{"current_steps": 19985, "total_steps": 204665, "loss": 0.0952, "lr": 1.952802071627498e-06, "epoch": 0.488236874893118, "percentage": 9.76, "elapsed_time": "0:24:35", "remaining_time": "3:47:12", "throughput": 9119.38, "total_tokens": 13453376} +{"current_steps": 19990, "total_steps": 204665, "loss": 0.1549, "lr": 1.9532906630185175e-06, "epoch": 0.4883590257249652, "percentage": 9.77, "elapsed_time": "0:24:35", "remaining_time": "3:47:12", "throughput": 9119.39, "total_tokens": 13456512} +{"current_steps": 19995, "total_steps": 204665, "loss": 0.1634, "lr": 1.953779254409537e-06, "epoch": 0.48848117655681234, "percentage": 9.77, "elapsed_time": "0:24:35", "remaining_time": "3:47:11", "throughput": 9119.44, "total_tokens": 13459712} +{"current_steps": 20000, "total_steps": 204665, "loss": 0.1312, "lr": 1.954267845800557e-06, "epoch": 0.4886033273886595, "percentage": 9.77, "elapsed_time": "0:24:36", "remaining_time": "3:47:10", "throughput": 9119.51, "total_tokens": 13462976} +{"current_steps": 20005, "total_steps": 204665, "loss": 0.159, "lr": 1.9547564371915766e-06, "epoch": 0.48872547822050666, "percentage": 9.77, "elapsed_time": "0:24:36", "remaining_time": "3:47:10", "throughput": 9119.71, "total_tokens": 13466432} +{"current_steps": 20010, "total_steps": 204665, "loss": 0.1475, "lr": 1.9552450285825963e-06, "epoch": 0.48884762905235385, "percentage": 9.78, "elapsed_time": "0:24:36", "remaining_time": "3:47:09", "throughput": 9119.94, "total_tokens": 13470016} +{"current_steps": 20015, "total_steps": 204665, "loss": 0.1175, "lr": 1.955733619973616e-06, "epoch": 0.48896977988420104, "percentage": 9.78, "elapsed_time": "0:24:37", "remaining_time": "3:47:09", "throughput": 9120.6, "total_tokens": 13474368} +{"current_steps": 20020, "total_steps": 204665, "loss": 0.1288, "lr": 1.9562222113646356e-06, "epoch": 0.4890919307160482, "percentage": 9.78, "elapsed_time": "0:24:37", "remaining_time": "3:47:08", "throughput": 9120.72, "total_tokens": 13477696} +{"current_steps": 20025, "total_steps": 204665, "loss": 0.1885, "lr": 1.9567108027556553e-06, "epoch": 0.48921408154789536, "percentage": 9.78, "elapsed_time": "0:24:38", "remaining_time": "3:47:08", "throughput": 9120.89, "total_tokens": 13481152} +{"current_steps": 20030, "total_steps": 204665, "loss": 0.0811, "lr": 1.957199394146675e-06, "epoch": 0.4893362323797425, "percentage": 9.79, "elapsed_time": "0:24:38", "remaining_time": "3:47:07", "throughput": 9121.07, "total_tokens": 13484608} +{"current_steps": 20035, "total_steps": 204665, "loss": 0.1319, "lr": 1.9576879855376947e-06, "epoch": 0.4894583832115897, "percentage": 9.79, "elapsed_time": "0:24:38", "remaining_time": "3:47:07", "throughput": 9121.12, "total_tokens": 13487808} +{"current_steps": 20040, "total_steps": 204665, "loss": 0.136, "lr": 1.9581765769287143e-06, "epoch": 0.4895805340434368, "percentage": 9.79, "elapsed_time": "0:24:39", "remaining_time": "3:47:06", "throughput": 9121.17, "total_tokens": 13491008} +{"current_steps": 20045, "total_steps": 204665, "loss": 0.0896, "lr": 1.958665168319734e-06, "epoch": 0.489702684875284, "percentage": 9.79, "elapsed_time": "0:24:39", "remaining_time": "3:47:05", "throughput": 9121.13, "total_tokens": 13494016} +{"current_steps": 20050, "total_steps": 204665, "loss": 0.1986, "lr": 1.959153759710754e-06, "epoch": 0.48982483570713115, "percentage": 9.8, "elapsed_time": "0:24:39", "remaining_time": "3:47:05", "throughput": 9121.37, "total_tokens": 13497600} +{"current_steps": 20055, "total_steps": 204665, "loss": 0.1285, "lr": 1.9596423511017734e-06, "epoch": 0.48994698653897834, "percentage": 9.8, "elapsed_time": "0:24:40", "remaining_time": "3:47:04", "throughput": 9121.35, "total_tokens": 13500672} +{"current_steps": 20060, "total_steps": 204665, "loss": 0.0348, "lr": 1.960130942492793e-06, "epoch": 0.49006913737082547, "percentage": 9.8, "elapsed_time": "0:24:40", "remaining_time": "3:47:04", "throughput": 9121.37, "total_tokens": 13503808} +{"current_steps": 20065, "total_steps": 204665, "loss": 0.0635, "lr": 1.9606195338838127e-06, "epoch": 0.49019128820267266, "percentage": 9.8, "elapsed_time": "0:24:40", "remaining_time": "3:47:03", "throughput": 9121.53, "total_tokens": 13507200} +{"current_steps": 20070, "total_steps": 204665, "loss": 0.1066, "lr": 1.9611081252748324e-06, "epoch": 0.49031343903451985, "percentage": 9.81, "elapsed_time": "0:24:41", "remaining_time": "3:47:02", "throughput": 9121.6, "total_tokens": 13510464} +{"current_steps": 20075, "total_steps": 204665, "loss": 0.0995, "lr": 1.961596716665852e-06, "epoch": 0.490435589866367, "percentage": 9.81, "elapsed_time": "0:24:41", "remaining_time": "3:47:02", "throughput": 9121.54, "total_tokens": 13513472} +{"current_steps": 20080, "total_steps": 204665, "loss": 0.2648, "lr": 1.962085308056872e-06, "epoch": 0.4905577406982142, "percentage": 9.81, "elapsed_time": "0:24:41", "remaining_time": "3:47:01", "throughput": 9121.59, "total_tokens": 13516672} +{"current_steps": 20085, "total_steps": 204665, "loss": 0.1242, "lr": 1.9625738994478915e-06, "epoch": 0.4906798915300613, "percentage": 9.81, "elapsed_time": "0:24:42", "remaining_time": "3:47:01", "throughput": 9121.59, "total_tokens": 13519808} +{"current_steps": 20090, "total_steps": 204665, "loss": 0.0692, "lr": 1.963062490838911e-06, "epoch": 0.4908020423619085, "percentage": 9.82, "elapsed_time": "0:24:42", "remaining_time": "3:47:00", "throughput": 9121.53, "total_tokens": 13522816} +{"current_steps": 20095, "total_steps": 204665, "loss": 0.3476, "lr": 1.963551082229931e-06, "epoch": 0.49092419319375563, "percentage": 9.82, "elapsed_time": "0:24:42", "remaining_time": "3:46:59", "throughput": 9121.67, "total_tokens": 13526208} +{"current_steps": 20100, "total_steps": 204665, "loss": 0.3069, "lr": 1.964039673620951e-06, "epoch": 0.4910463440256028, "percentage": 9.82, "elapsed_time": "0:24:43", "remaining_time": "3:46:59", "throughput": 9121.84, "total_tokens": 13529664} +{"current_steps": 20105, "total_steps": 204665, "loss": 0.0861, "lr": 1.96452826501197e-06, "epoch": 0.49116849485744996, "percentage": 9.82, "elapsed_time": "0:24:43", "remaining_time": "3:46:58", "throughput": 9122.04, "total_tokens": 13533184} +{"current_steps": 20110, "total_steps": 204665, "loss": 0.1571, "lr": 1.96501685640299e-06, "epoch": 0.49129064568929715, "percentage": 9.83, "elapsed_time": "0:24:43", "remaining_time": "3:46:58", "throughput": 9122.25, "total_tokens": 13536704} +{"current_steps": 20115, "total_steps": 204665, "loss": 0.0438, "lr": 1.96550544779401e-06, "epoch": 0.49141279652114433, "percentage": 9.83, "elapsed_time": "0:24:44", "remaining_time": "3:46:57", "throughput": 9122.48, "total_tokens": 13540224} +{"current_steps": 20120, "total_steps": 204665, "loss": 0.1241, "lr": 1.9659940391850292e-06, "epoch": 0.49153494735299147, "percentage": 9.83, "elapsed_time": "0:24:44", "remaining_time": "3:46:57", "throughput": 9122.7, "total_tokens": 13543808} +{"current_steps": 20125, "total_steps": 204665, "loss": 0.0822, "lr": 1.966482630576049e-06, "epoch": 0.49165709818483866, "percentage": 9.83, "elapsed_time": "0:24:44", "remaining_time": "3:46:56", "throughput": 9122.87, "total_tokens": 13547264} +{"current_steps": 20130, "total_steps": 204665, "loss": 0.1256, "lr": 1.966971221967069e-06, "epoch": 0.4917792490166858, "percentage": 9.84, "elapsed_time": "0:24:45", "remaining_time": "3:46:56", "throughput": 9122.88, "total_tokens": 13550400} +{"current_steps": 20135, "total_steps": 204665, "loss": 0.0989, "lr": 1.9674598133580887e-06, "epoch": 0.491901399848533, "percentage": 9.84, "elapsed_time": "0:24:45", "remaining_time": "3:46:55", "throughput": 9123.15, "total_tokens": 13554048} +{"current_steps": 20140, "total_steps": 204665, "loss": 0.0538, "lr": 1.967948404749108e-06, "epoch": 0.4920235506803801, "percentage": 9.84, "elapsed_time": "0:24:46", "remaining_time": "3:46:55", "throughput": 9123.17, "total_tokens": 13557248} +{"current_steps": 20145, "total_steps": 204665, "loss": 0.1511, "lr": 1.968436996140128e-06, "epoch": 0.4921457015122273, "percentage": 9.84, "elapsed_time": "0:24:46", "remaining_time": "3:46:54", "throughput": 9123.49, "total_tokens": 13560960} +{"current_steps": 20150, "total_steps": 204665, "loss": 0.1369, "lr": 1.9689255875311478e-06, "epoch": 0.49226785234407444, "percentage": 9.85, "elapsed_time": "0:24:46", "remaining_time": "3:46:53", "throughput": 9123.34, "total_tokens": 13563776} +{"current_steps": 20155, "total_steps": 204665, "loss": 0.1426, "lr": 1.969414178922167e-06, "epoch": 0.49239000317592163, "percentage": 9.85, "elapsed_time": "0:24:47", "remaining_time": "3:46:53", "throughput": 9123.39, "total_tokens": 13566976} +{"current_steps": 20160, "total_steps": 204665, "loss": 0.0422, "lr": 1.969902770313187e-06, "epoch": 0.49251215400776877, "percentage": 9.85, "elapsed_time": "0:24:47", "remaining_time": "3:46:52", "throughput": 9123.5, "total_tokens": 13570304} +{"current_steps": 20165, "total_steps": 204665, "loss": 0.2077, "lr": 1.970391361704207e-06, "epoch": 0.49263430483961596, "percentage": 9.85, "elapsed_time": "0:24:47", "remaining_time": "3:46:52", "throughput": 9123.86, "total_tokens": 13574080} +{"current_steps": 20170, "total_steps": 204665, "loss": 0.1064, "lr": 1.9708799530952265e-06, "epoch": 0.49275645567146314, "percentage": 9.86, "elapsed_time": "0:24:48", "remaining_time": "3:46:51", "throughput": 9123.95, "total_tokens": 13577344} +{"current_steps": 20175, "total_steps": 204665, "loss": 0.0783, "lr": 1.971368544486246e-06, "epoch": 0.4928786065033103, "percentage": 9.86, "elapsed_time": "0:24:48", "remaining_time": "3:46:51", "throughput": 9123.95, "total_tokens": 13580480} +{"current_steps": 20180, "total_steps": 204665, "loss": 0.1319, "lr": 1.971857135877266e-06, "epoch": 0.49300075733515747, "percentage": 9.86, "elapsed_time": "0:24:48", "remaining_time": "3:46:50", "throughput": 9123.85, "total_tokens": 13583424} +{"current_steps": 20185, "total_steps": 204665, "loss": 0.1384, "lr": 1.9723457272682855e-06, "epoch": 0.4931229081670046, "percentage": 9.86, "elapsed_time": "0:24:49", "remaining_time": "3:46:49", "throughput": 9123.89, "total_tokens": 13586624} +{"current_steps": 20190, "total_steps": 204665, "loss": 0.0931, "lr": 1.972834318659305e-06, "epoch": 0.4932450589988518, "percentage": 9.86, "elapsed_time": "0:24:49", "remaining_time": "3:46:49", "throughput": 9123.96, "total_tokens": 13589888} +{"current_steps": 20195, "total_steps": 204665, "loss": 0.1851, "lr": 1.973322910050325e-06, "epoch": 0.4933672098306989, "percentage": 9.87, "elapsed_time": "0:24:49", "remaining_time": "3:46:48", "throughput": 9124.1, "total_tokens": 13593280} +{"current_steps": 20200, "total_steps": 204665, "loss": 0.0802, "lr": 1.9738115014413446e-06, "epoch": 0.4934893606625461, "percentage": 9.87, "elapsed_time": "0:24:50", "remaining_time": "3:46:48", "throughput": 9124.2, "total_tokens": 13596608} +{"current_steps": 20205, "total_steps": 204665, "loss": 0.0851, "lr": 1.9743000928323642e-06, "epoch": 0.49361151149439325, "percentage": 9.87, "elapsed_time": "0:24:50", "remaining_time": "3:46:47", "throughput": 9124.3, "total_tokens": 13599936} +{"current_steps": 20210, "total_steps": 204665, "loss": 0.1579, "lr": 1.974788684223384e-06, "epoch": 0.49373366232624044, "percentage": 9.87, "elapsed_time": "0:24:50", "remaining_time": "3:46:46", "throughput": 9124.33, "total_tokens": 13603072} +{"current_steps": 20215, "total_steps": 204665, "loss": 0.1716, "lr": 1.9752772756144036e-06, "epoch": 0.49385581315808763, "percentage": 9.88, "elapsed_time": "0:24:51", "remaining_time": "3:46:46", "throughput": 9124.57, "total_tokens": 13606656} +{"current_steps": 20220, "total_steps": 204665, "loss": 0.1867, "lr": 1.9757658670054233e-06, "epoch": 0.49397796398993477, "percentage": 9.88, "elapsed_time": "0:24:51", "remaining_time": "3:46:45", "throughput": 9124.72, "total_tokens": 13610048} +{"current_steps": 20225, "total_steps": 204665, "loss": 0.095, "lr": 1.976254458396443e-06, "epoch": 0.49410011482178195, "percentage": 9.88, "elapsed_time": "0:24:51", "remaining_time": "3:46:45", "throughput": 9124.9, "total_tokens": 13613504} +{"current_steps": 20230, "total_steps": 204665, "loss": 0.1053, "lr": 1.9767430497874627e-06, "epoch": 0.4942222656536291, "percentage": 9.88, "elapsed_time": "0:24:52", "remaining_time": "3:46:44", "throughput": 9125.34, "total_tokens": 13617472} +{"current_steps": 20235, "total_steps": 204665, "loss": 0.0849, "lr": 1.9772316411784823e-06, "epoch": 0.4943444164854763, "percentage": 9.89, "elapsed_time": "0:24:52", "remaining_time": "3:46:44", "throughput": 9125.74, "total_tokens": 13621312} +{"current_steps": 20240, "total_steps": 204665, "loss": 0.0733, "lr": 1.977720232569502e-06, "epoch": 0.4944665673173234, "percentage": 9.89, "elapsed_time": "0:24:52", "remaining_time": "3:46:43", "throughput": 9125.85, "total_tokens": 13624640} +{"current_steps": 20245, "total_steps": 204665, "loss": 0.1103, "lr": 1.9782088239605217e-06, "epoch": 0.4945887181491706, "percentage": 9.89, "elapsed_time": "0:24:53", "remaining_time": "3:46:43", "throughput": 9125.99, "total_tokens": 13628032} +{"current_steps": 20250, "total_steps": 204665, "loss": 0.153, "lr": 1.9786974153515414e-06, "epoch": 0.49471086898101774, "percentage": 9.89, "elapsed_time": "0:24:53", "remaining_time": "3:46:42", "throughput": 9126.06, "total_tokens": 13631296} +{"current_steps": 20255, "total_steps": 204665, "loss": 0.0764, "lr": 1.979186006742561e-06, "epoch": 0.4948330198128649, "percentage": 9.9, "elapsed_time": "0:24:54", "remaining_time": "3:46:42", "throughput": 9126.28, "total_tokens": 13634816} +{"current_steps": 20260, "total_steps": 204665, "loss": 0.075, "lr": 1.9796745981335807e-06, "epoch": 0.4949551706447121, "percentage": 9.9, "elapsed_time": "0:24:54", "remaining_time": "3:46:41", "throughput": 9126.29, "total_tokens": 13637952} +{"current_steps": 20265, "total_steps": 204665, "loss": 0.1036, "lr": 1.9801631895246004e-06, "epoch": 0.49507732147655925, "percentage": 9.9, "elapsed_time": "0:24:54", "remaining_time": "3:46:40", "throughput": 9126.45, "total_tokens": 13641344} +{"current_steps": 20270, "total_steps": 204665, "loss": 0.0937, "lr": 1.98065178091562e-06, "epoch": 0.49519947230840644, "percentage": 9.9, "elapsed_time": "0:24:55", "remaining_time": "3:46:40", "throughput": 9126.74, "total_tokens": 13644992} +{"current_steps": 20275, "total_steps": 204665, "loss": 0.0303, "lr": 1.9811403723066398e-06, "epoch": 0.4953216231402536, "percentage": 9.91, "elapsed_time": "0:24:55", "remaining_time": "3:46:39", "throughput": 9126.81, "total_tokens": 13648256} +{"current_steps": 20280, "total_steps": 204665, "loss": 0.0959, "lr": 1.9816289636976595e-06, "epoch": 0.49544377397210076, "percentage": 9.91, "elapsed_time": "0:24:55", "remaining_time": "3:46:39", "throughput": 9127.03, "total_tokens": 13651776} +{"current_steps": 20285, "total_steps": 204665, "loss": 0.1028, "lr": 1.982117555088679e-06, "epoch": 0.4955659248039479, "percentage": 9.91, "elapsed_time": "0:24:56", "remaining_time": "3:46:38", "throughput": 9127.19, "total_tokens": 13655232} +{"current_steps": 20290, "total_steps": 204665, "loss": 0.1079, "lr": 1.982606146479699e-06, "epoch": 0.4956880756357951, "percentage": 9.91, "elapsed_time": "0:24:56", "remaining_time": "3:46:38", "throughput": 9127.67, "total_tokens": 13659264} +{"current_steps": 20295, "total_steps": 204665, "loss": 0.11, "lr": 1.9830947378707185e-06, "epoch": 0.4958102264676422, "percentage": 9.92, "elapsed_time": "0:24:56", "remaining_time": "3:46:37", "throughput": 9128.13, "total_tokens": 13663232} +{"current_steps": 20300, "total_steps": 204665, "loss": 0.1488, "lr": 1.983583329261738e-06, "epoch": 0.4959323772994894, "percentage": 9.92, "elapsed_time": "0:24:57", "remaining_time": "3:46:37", "throughput": 9128.22, "total_tokens": 13666496} +{"current_steps": 20305, "total_steps": 204665, "loss": 0.1363, "lr": 1.984071920652758e-06, "epoch": 0.49605452813133655, "percentage": 9.92, "elapsed_time": "0:24:57", "remaining_time": "3:46:36", "throughput": 9128.41, "total_tokens": 13669952} +{"current_steps": 20310, "total_steps": 204665, "loss": 0.0476, "lr": 1.9845605120437776e-06, "epoch": 0.49617667896318374, "percentage": 9.92, "elapsed_time": "0:24:57", "remaining_time": "3:46:36", "throughput": 9128.89, "total_tokens": 13673984} +{"current_steps": 20315, "total_steps": 204665, "loss": 0.0905, "lr": 1.9850491034347972e-06, "epoch": 0.4962988297950309, "percentage": 9.93, "elapsed_time": "0:24:58", "remaining_time": "3:46:35", "throughput": 9129.19, "total_tokens": 13677696} +{"current_steps": 20320, "total_steps": 204665, "loss": 0.1117, "lr": 1.985537694825817e-06, "epoch": 0.49642098062687806, "percentage": 9.93, "elapsed_time": "0:24:58", "remaining_time": "3:46:35", "throughput": 9129.26, "total_tokens": 13680960} +{"current_steps": 20325, "total_steps": 204665, "loss": 0.0668, "lr": 1.986026286216837e-06, "epoch": 0.49654313145872525, "percentage": 9.93, "elapsed_time": "0:24:58", "remaining_time": "3:46:34", "throughput": 9129.48, "total_tokens": 13684480} +{"current_steps": 20330, "total_steps": 204665, "loss": 0.0343, "lr": 1.9865148776078563e-06, "epoch": 0.4966652822905724, "percentage": 9.93, "elapsed_time": "0:24:59", "remaining_time": "3:46:34", "throughput": 9129.62, "total_tokens": 13687872} +{"current_steps": 20335, "total_steps": 204665, "loss": 0.0929, "lr": 1.987003468998876e-06, "epoch": 0.4967874331224196, "percentage": 9.94, "elapsed_time": "0:24:59", "remaining_time": "3:46:33", "throughput": 9129.66, "total_tokens": 13691072} +{"current_steps": 20340, "total_steps": 204665, "loss": 0.0431, "lr": 1.987492060389896e-06, "epoch": 0.4969095839542667, "percentage": 9.94, "elapsed_time": "0:24:59", "remaining_time": "3:46:33", "throughput": 9129.93, "total_tokens": 13694720} +{"current_steps": 20345, "total_steps": 204665, "loss": 0.1056, "lr": 1.9879806517809153e-06, "epoch": 0.4970317347861139, "percentage": 9.94, "elapsed_time": "0:25:00", "remaining_time": "3:46:32", "throughput": 9130.01, "total_tokens": 13697984} +{"current_steps": 20350, "total_steps": 204665, "loss": 0.1068, "lr": 1.988469243171935e-06, "epoch": 0.49715388561796103, "percentage": 9.94, "elapsed_time": "0:25:00", "remaining_time": "3:46:31", "throughput": 9130.06, "total_tokens": 13701184} +{"current_steps": 20355, "total_steps": 204665, "loss": 0.1515, "lr": 1.988957834562955e-06, "epoch": 0.4972760364498082, "percentage": 9.95, "elapsed_time": "0:25:01", "remaining_time": "3:46:31", "throughput": 9130.28, "total_tokens": 13704704} +{"current_steps": 20360, "total_steps": 204665, "loss": 0.1188, "lr": 1.9894464259539744e-06, "epoch": 0.4973981872816554, "percentage": 9.95, "elapsed_time": "0:25:01", "remaining_time": "3:46:30", "throughput": 9130.43, "total_tokens": 13708096} +{"current_steps": 20365, "total_steps": 204665, "loss": 0.1672, "lr": 1.989935017344994e-06, "epoch": 0.49752033811350255, "percentage": 9.95, "elapsed_time": "0:25:01", "remaining_time": "3:46:30", "throughput": 9130.58, "total_tokens": 13711488} +{"current_steps": 20370, "total_steps": 204665, "loss": 0.087, "lr": 1.990423608736014e-06, "epoch": 0.49764248894534974, "percentage": 9.95, "elapsed_time": "0:25:02", "remaining_time": "3:46:29", "throughput": 9130.75, "total_tokens": 13714944} +{"current_steps": 20375, "total_steps": 204665, "loss": 0.1452, "lr": 1.990912200127034e-06, "epoch": 0.49776463977719687, "percentage": 9.96, "elapsed_time": "0:25:02", "remaining_time": "3:46:29", "throughput": 9131.06, "total_tokens": 13718656} +{"current_steps": 20380, "total_steps": 204665, "loss": 0.2331, "lr": 1.991400791518053e-06, "epoch": 0.49788679060904406, "percentage": 9.96, "elapsed_time": "0:25:02", "remaining_time": "3:46:28", "throughput": 9130.9, "total_tokens": 13721472} +{"current_steps": 20385, "total_steps": 204665, "loss": 0.0584, "lr": 1.991889382909073e-06, "epoch": 0.4980089414408912, "percentage": 9.96, "elapsed_time": "0:25:03", "remaining_time": "3:46:28", "throughput": 9131.25, "total_tokens": 13725248} +{"current_steps": 20390, "total_steps": 204665, "loss": 0.1784, "lr": 1.992377974300093e-06, "epoch": 0.4981310922727384, "percentage": 9.96, "elapsed_time": "0:25:03", "remaining_time": "3:46:27", "throughput": 9131.64, "total_tokens": 13729088} +{"current_steps": 20395, "total_steps": 204665, "loss": 0.1174, "lr": 1.992866565691112e-06, "epoch": 0.4982532431045855, "percentage": 9.97, "elapsed_time": "0:25:03", "remaining_time": "3:46:27", "throughput": 9131.85, "total_tokens": 13732608} +{"current_steps": 20400, "total_steps": 204665, "loss": 0.2099, "lr": 1.9933551570821322e-06, "epoch": 0.4983753939364327, "percentage": 9.97, "elapsed_time": "0:25:04", "remaining_time": "3:46:26", "throughput": 9132.05, "total_tokens": 13736128} +{"current_steps": 20405, "total_steps": 204665, "loss": 0.0965, "lr": 1.993843748473152e-06, "epoch": 0.4984975447682799, "percentage": 9.97, "elapsed_time": "0:25:04", "remaining_time": "3:46:25", "throughput": 9132.14, "total_tokens": 13739456} +{"current_steps": 20410, "total_steps": 204665, "loss": 0.1392, "lr": 1.9943323398641716e-06, "epoch": 0.49861969560012703, "percentage": 9.97, "elapsed_time": "0:25:04", "remaining_time": "3:46:25", "throughput": 9132.12, "total_tokens": 13742528} +{"current_steps": 20415, "total_steps": 204665, "loss": 0.0692, "lr": 1.9948209312551913e-06, "epoch": 0.4987418464319742, "percentage": 9.97, "elapsed_time": "0:25:05", "remaining_time": "3:46:24", "throughput": 9132.09, "total_tokens": 13745600} +{"current_steps": 20420, "total_steps": 204665, "loss": 0.0727, "lr": 1.995309522646211e-06, "epoch": 0.49886399726382136, "percentage": 9.98, "elapsed_time": "0:25:05", "remaining_time": "3:46:24", "throughput": 9132.16, "total_tokens": 13748864} +{"current_steps": 20425, "total_steps": 204665, "loss": 0.0934, "lr": 1.9957981140372307e-06, "epoch": 0.49898614809566855, "percentage": 9.98, "elapsed_time": "0:25:05", "remaining_time": "3:46:23", "throughput": 9132.92, "total_tokens": 13753472} +{"current_steps": 20430, "total_steps": 204665, "loss": 0.0655, "lr": 1.9962867054282503e-06, "epoch": 0.4991082989275157, "percentage": 9.98, "elapsed_time": "0:25:06", "remaining_time": "3:46:23", "throughput": 9132.79, "total_tokens": 13756352} +{"current_steps": 20435, "total_steps": 204665, "loss": 0.1874, "lr": 1.99677529681927e-06, "epoch": 0.49923044975936287, "percentage": 9.98, "elapsed_time": "0:25:06", "remaining_time": "3:46:22", "throughput": 9133.0, "total_tokens": 13759872} +{"current_steps": 20440, "total_steps": 204665, "loss": 0.1828, "lr": 1.9972638882102897e-06, "epoch": 0.49935260059121, "percentage": 9.99, "elapsed_time": "0:25:06", "remaining_time": "3:46:22", "throughput": 9133.02, "total_tokens": 13763008} +{"current_steps": 20445, "total_steps": 204665, "loss": 0.2337, "lr": 1.9977524796013094e-06, "epoch": 0.4994747514230572, "percentage": 9.99, "elapsed_time": "0:25:07", "remaining_time": "3:46:21", "throughput": 9133.04, "total_tokens": 13766144} +{"current_steps": 20450, "total_steps": 204665, "loss": 0.0943, "lr": 1.998241070992329e-06, "epoch": 0.49959690225490433, "percentage": 9.99, "elapsed_time": "0:25:07", "remaining_time": "3:46:20", "throughput": 9133.12, "total_tokens": 13769408} +{"current_steps": 20455, "total_steps": 204665, "loss": 0.1177, "lr": 1.9987296623833487e-06, "epoch": 0.4997190530867515, "percentage": 9.99, "elapsed_time": "0:25:07", "remaining_time": "3:46:20", "throughput": 9133.17, "total_tokens": 13772608} +{"current_steps": 20460, "total_steps": 204665, "loss": 0.1733, "lr": 1.9992182537743684e-06, "epoch": 0.4998412039185987, "percentage": 10.0, "elapsed_time": "0:25:08", "remaining_time": "3:46:19", "throughput": 9133.14, "total_tokens": 13775680} +{"current_steps": 20465, "total_steps": 204665, "loss": 0.1021, "lr": 1.999706845165388e-06, "epoch": 0.49996335475044584, "percentage": 10.0, "elapsed_time": "0:25:08", "remaining_time": "3:46:19", "throughput": 9133.18, "total_tokens": 13778880} +{"current_steps": 20468, "total_steps": 204665, "eval_loss": 0.11281616985797882, "epoch": 0.5000366452495542, "percentage": 10.0, "elapsed_time": "0:25:56", "remaining_time": "3:53:27", "throughput": 8853.66, "total_tokens": 13780928} +{"current_steps": 20470, "total_steps": 204665, "loss": 0.1982, "lr": 1.9999999994182183e-06, "epoch": 0.500085505582293, "percentage": 10.0, "elapsed_time": "0:26:31", "remaining_time": "3:58:40", "throughput": 8660.04, "total_tokens": 13782144} +{"current_steps": 20475, "total_steps": 204665, "loss": 0.0798, "lr": 1.9999999928731765e-06, "epoch": 0.5002076564141402, "percentage": 10.0, "elapsed_time": "0:26:31", "remaining_time": "3:58:39", "throughput": 8660.31, "total_tokens": 13785600} +{"current_steps": 20480, "total_steps": 204665, "loss": 0.1128, "lr": 1.9999999790558656e-06, "epoch": 0.5003298072459873, "percentage": 10.01, "elapsed_time": "0:26:32", "remaining_time": "3:58:38", "throughput": 8660.44, "total_tokens": 13788800} +{"current_steps": 20485, "total_steps": 204665, "loss": 0.1589, "lr": 1.9999999579662855e-06, "epoch": 0.5004519580778345, "percentage": 10.01, "elapsed_time": "0:26:32", "remaining_time": "3:58:38", "throughput": 8660.7, "total_tokens": 13792256} +{"current_steps": 20490, "total_steps": 204665, "loss": 0.1436, "lr": 1.999999929604437e-06, "epoch": 0.5005741089096817, "percentage": 10.01, "elapsed_time": "0:26:32", "remaining_time": "3:58:37", "throughput": 8660.74, "total_tokens": 13795264} +{"current_steps": 20495, "total_steps": 204665, "loss": 0.0407, "lr": 1.99999989397032e-06, "epoch": 0.5006962597415289, "percentage": 10.01, "elapsed_time": "0:26:33", "remaining_time": "3:58:36", "throughput": 8661.17, "total_tokens": 13799040} +{"current_steps": 20500, "total_steps": 204665, "loss": 0.0523, "lr": 1.9999998510639352e-06, "epoch": 0.500818410573376, "percentage": 10.02, "elapsed_time": "0:26:33", "remaining_time": "3:58:35", "throughput": 8661.18, "total_tokens": 13801984} +{"current_steps": 20505, "total_steps": 204665, "loss": 0.0662, "lr": 1.999999800885282e-06, "epoch": 0.5009405614052231, "percentage": 10.02, "elapsed_time": "0:26:33", "remaining_time": "3:58:35", "throughput": 8661.44, "total_tokens": 13805440} +{"current_steps": 20510, "total_steps": 204665, "loss": 0.1458, "lr": 1.9999997434343614e-06, "epoch": 0.5010627122370703, "percentage": 10.02, "elapsed_time": "0:26:34", "remaining_time": "3:58:34", "throughput": 8661.44, "total_tokens": 13808384} +{"current_steps": 20515, "total_steps": 204665, "loss": 0.122, "lr": 1.9999996787111737e-06, "epoch": 0.5011848630689175, "percentage": 10.02, "elapsed_time": "0:26:34", "remaining_time": "3:58:33", "throughput": 8661.52, "total_tokens": 13811456} +{"current_steps": 20520, "total_steps": 204665, "loss": 0.115, "lr": 1.9999996067157195e-06, "epoch": 0.5013070139007647, "percentage": 10.03, "elapsed_time": "0:26:34", "remaining_time": "3:58:32", "throughput": 8661.56, "total_tokens": 13814464} +{"current_steps": 20525, "total_steps": 204665, "loss": 0.0875, "lr": 1.999999527447999e-06, "epoch": 0.5014291647326118, "percentage": 10.03, "elapsed_time": "0:26:35", "remaining_time": "3:58:31", "throughput": 8661.82, "total_tokens": 13817920} +{"current_steps": 20530, "total_steps": 204665, "loss": 0.0648, "lr": 1.9999994409080134e-06, "epoch": 0.501551315564459, "percentage": 10.03, "elapsed_time": "0:26:35", "remaining_time": "3:58:31", "throughput": 8661.88, "total_tokens": 13820992} +{"current_steps": 20535, "total_steps": 204665, "loss": 0.1201, "lr": 1.9999993470957628e-06, "epoch": 0.5016734663963062, "percentage": 10.03, "elapsed_time": "0:26:35", "remaining_time": "3:58:30", "throughput": 8662.2, "total_tokens": 13824576} +{"current_steps": 20540, "total_steps": 204665, "loss": 0.0275, "lr": 1.9999992460112477e-06, "epoch": 0.5017956172281534, "percentage": 10.04, "elapsed_time": "0:26:36", "remaining_time": "3:58:29", "throughput": 8662.2, "total_tokens": 13827520} +{"current_steps": 20545, "total_steps": 204665, "loss": 0.0616, "lr": 1.999999137654469e-06, "epoch": 0.5019177680600004, "percentage": 10.04, "elapsed_time": "0:26:36", "remaining_time": "3:58:28", "throughput": 8662.24, "total_tokens": 13830528} +{"current_steps": 20550, "total_steps": 204665, "loss": 0.0757, "lr": 1.999999022025428e-06, "epoch": 0.5020399188918476, "percentage": 10.04, "elapsed_time": "0:26:36", "remaining_time": "3:58:28", "throughput": 8662.48, "total_tokens": 13833984} +{"current_steps": 20555, "total_steps": 204665, "loss": 0.2078, "lr": 1.999998899124125e-06, "epoch": 0.5021620697236948, "percentage": 10.04, "elapsed_time": "0:26:37", "remaining_time": "3:58:27", "throughput": 8662.58, "total_tokens": 13837120} +{"current_steps": 20560, "total_steps": 204665, "loss": 0.0793, "lr": 1.999998768950561e-06, "epoch": 0.502284220555542, "percentage": 10.05, "elapsed_time": "0:26:37", "remaining_time": "3:58:26", "throughput": 8662.79, "total_tokens": 13840512} +{"current_steps": 20565, "total_steps": 204665, "loss": 0.1634, "lr": 1.999998631504737e-06, "epoch": 0.5024063713873892, "percentage": 10.05, "elapsed_time": "0:26:38", "remaining_time": "3:58:25", "throughput": 8663.13, "total_tokens": 13844096} +{"current_steps": 20570, "total_steps": 204665, "loss": 0.1221, "lr": 1.9999984867866536e-06, "epoch": 0.5025285222192363, "percentage": 10.05, "elapsed_time": "0:26:38", "remaining_time": "3:58:25", "throughput": 8663.22, "total_tokens": 13847168} +{"current_steps": 20575, "total_steps": 204665, "loss": 0.2725, "lr": 1.999998334796313e-06, "epoch": 0.5026506730510835, "percentage": 10.05, "elapsed_time": "0:26:38", "remaining_time": "3:58:24", "throughput": 8663.43, "total_tokens": 13850496} +{"current_steps": 20580, "total_steps": 204665, "loss": 0.2294, "lr": 1.9999981755337148e-06, "epoch": 0.5027728238829307, "percentage": 10.06, "elapsed_time": "0:26:39", "remaining_time": "3:58:23", "throughput": 8663.51, "total_tokens": 13853568} +{"current_steps": 20585, "total_steps": 204665, "loss": 0.1153, "lr": 1.9999980089988614e-06, "epoch": 0.5028949747147778, "percentage": 10.06, "elapsed_time": "0:26:39", "remaining_time": "3:58:22", "throughput": 8663.86, "total_tokens": 13857216} +{"current_steps": 20590, "total_steps": 204665, "loss": 0.0272, "lr": 1.9999978351917536e-06, "epoch": 0.5030171255466249, "percentage": 10.06, "elapsed_time": "0:26:39", "remaining_time": "3:58:22", "throughput": 8664.09, "total_tokens": 13860608} +{"current_steps": 20595, "total_steps": 204665, "loss": 0.0345, "lr": 1.999997654112392e-06, "epoch": 0.5031392763784721, "percentage": 10.06, "elapsed_time": "0:26:40", "remaining_time": "3:58:21", "throughput": 8664.55, "total_tokens": 13864448} +{"current_steps": 20600, "total_steps": 204665, "loss": 0.0895, "lr": 1.9999974657607787e-06, "epoch": 0.5032614272103193, "percentage": 10.07, "elapsed_time": "0:26:40", "remaining_time": "3:58:20", "throughput": 8664.8, "total_tokens": 13867904} +{"current_steps": 20605, "total_steps": 204665, "loss": 0.2186, "lr": 1.999997270136915e-06, "epoch": 0.5033835780421665, "percentage": 10.07, "elapsed_time": "0:26:40", "remaining_time": "3:58:20", "throughput": 8665.26, "total_tokens": 13871744} +{"current_steps": 20610, "total_steps": 204665, "loss": 0.0855, "lr": 1.9999970672408025e-06, "epoch": 0.5035057288740137, "percentage": 10.07, "elapsed_time": "0:26:41", "remaining_time": "3:58:19", "throughput": 8665.5, "total_tokens": 13875136} +{"current_steps": 20615, "total_steps": 204665, "loss": 0.1191, "lr": 1.999996857072442e-06, "epoch": 0.5036278797058608, "percentage": 10.07, "elapsed_time": "0:26:41", "remaining_time": "3:58:18", "throughput": 8665.71, "total_tokens": 13878464} +{"current_steps": 20620, "total_steps": 204665, "loss": 0.1111, "lr": 1.9999966396318354e-06, "epoch": 0.503750030537708, "percentage": 10.08, "elapsed_time": "0:26:41", "remaining_time": "3:58:17", "throughput": 8665.95, "total_tokens": 13881856} +{"current_steps": 20625, "total_steps": 204665, "loss": 0.0529, "lr": 1.9999964149189844e-06, "epoch": 0.5038721813695551, "percentage": 10.08, "elapsed_time": "0:26:42", "remaining_time": "3:58:16", "throughput": 8666.17, "total_tokens": 13885248} +{"current_steps": 20630, "total_steps": 204665, "loss": 0.0501, "lr": 1.99999618293389e-06, "epoch": 0.5039943322014023, "percentage": 10.08, "elapsed_time": "0:26:42", "remaining_time": "3:58:16", "throughput": 8666.4, "total_tokens": 13888640} +{"current_steps": 20635, "total_steps": 204665, "loss": 0.2339, "lr": 1.999995943676555e-06, "epoch": 0.5041164830332494, "percentage": 10.08, "elapsed_time": "0:26:42", "remaining_time": "3:58:15", "throughput": 8666.48, "total_tokens": 13891712} +{"current_steps": 20640, "total_steps": 204665, "loss": 0.0618, "lr": 1.9999956971469804e-06, "epoch": 0.5042386338650966, "percentage": 10.08, "elapsed_time": "0:26:43", "remaining_time": "3:58:14", "throughput": 8666.83, "total_tokens": 13895360} +{"current_steps": 20645, "total_steps": 204665, "loss": 0.1164, "lr": 1.9999954433451676e-06, "epoch": 0.5043607846969438, "percentage": 10.09, "elapsed_time": "0:26:43", "remaining_time": "3:58:13", "throughput": 8666.79, "total_tokens": 13898240} +{"current_steps": 20650, "total_steps": 204665, "loss": 0.1748, "lr": 1.9999951822711196e-06, "epoch": 0.504482935528791, "percentage": 10.09, "elapsed_time": "0:26:43", "remaining_time": "3:58:13", "throughput": 8667.15, "total_tokens": 13901888} +{"current_steps": 20655, "total_steps": 204665, "loss": 0.1279, "lr": 1.9999949139248376e-06, "epoch": 0.5046050863606382, "percentage": 10.09, "elapsed_time": "0:26:44", "remaining_time": "3:58:12", "throughput": 8667.18, "total_tokens": 13904896} +{"current_steps": 20660, "total_steps": 204665, "loss": 0.0989, "lr": 1.9999946383063233e-06, "epoch": 0.5047272371924852, "percentage": 10.09, "elapsed_time": "0:26:44", "remaining_time": "3:58:11", "throughput": 8667.05, "total_tokens": 13907584} +{"current_steps": 20665, "total_steps": 204665, "loss": 0.0806, "lr": 1.9999943554155792e-06, "epoch": 0.5048493880243324, "percentage": 10.1, "elapsed_time": "0:26:45", "remaining_time": "3:58:10", "throughput": 8667.33, "total_tokens": 13911104} +{"current_steps": 20670, "total_steps": 204665, "loss": 0.1024, "lr": 1.999994065252607e-06, "epoch": 0.5049715388561796, "percentage": 10.1, "elapsed_time": "0:26:45", "remaining_time": "3:58:10", "throughput": 8667.56, "total_tokens": 13914496} +{"current_steps": 20675, "total_steps": 204665, "loss": 0.0824, "lr": 1.9999937678174095e-06, "epoch": 0.5050936896880268, "percentage": 10.1, "elapsed_time": "0:26:45", "remaining_time": "3:58:09", "throughput": 8667.66, "total_tokens": 13917632} +{"current_steps": 20680, "total_steps": 204665, "loss": 0.0504, "lr": 1.999993463109988e-06, "epoch": 0.5052158405198739, "percentage": 10.1, "elapsed_time": "0:26:46", "remaining_time": "3:58:08", "throughput": 8668.01, "total_tokens": 13921280} +{"current_steps": 20685, "total_steps": 204665, "loss": 0.089, "lr": 1.9999931511303454e-06, "epoch": 0.5053379913517211, "percentage": 10.11, "elapsed_time": "0:26:46", "remaining_time": "3:58:07", "throughput": 8668.39, "total_tokens": 13924928} +{"current_steps": 20690, "total_steps": 204665, "loss": 0.0856, "lr": 1.999992831878483e-06, "epoch": 0.5054601421835683, "percentage": 10.11, "elapsed_time": "0:26:46", "remaining_time": "3:58:07", "throughput": 8668.34, "total_tokens": 13927744} +{"current_steps": 20695, "total_steps": 204665, "loss": 0.2061, "lr": 1.9999925053544042e-06, "epoch": 0.5055822930154155, "percentage": 10.11, "elapsed_time": "0:26:47", "remaining_time": "3:58:06", "throughput": 8668.47, "total_tokens": 13930944} +{"current_steps": 20700, "total_steps": 204665, "loss": 0.1686, "lr": 1.999992171558111e-06, "epoch": 0.5057044438472627, "percentage": 10.11, "elapsed_time": "0:26:47", "remaining_time": "3:58:05", "throughput": 8668.51, "total_tokens": 13933952} +{"current_steps": 20705, "total_steps": 204665, "loss": 0.1257, "lr": 1.9999918304896055e-06, "epoch": 0.5058265946791097, "percentage": 10.12, "elapsed_time": "0:26:47", "remaining_time": "3:58:04", "throughput": 8668.61, "total_tokens": 13937088} +{"current_steps": 20710, "total_steps": 204665, "loss": 0.0984, "lr": 1.9999914821488907e-06, "epoch": 0.5059487455109569, "percentage": 10.12, "elapsed_time": "0:26:48", "remaining_time": "3:58:03", "throughput": 8668.64, "total_tokens": 13940096} +{"current_steps": 20715, "total_steps": 204665, "loss": 0.153, "lr": 1.9999911265359687e-06, "epoch": 0.5060708963428041, "percentage": 10.12, "elapsed_time": "0:26:48", "remaining_time": "3:58:03", "throughput": 8668.8, "total_tokens": 13943360} +{"current_steps": 20720, "total_steps": 204665, "loss": 0.0386, "lr": 1.9999907636508423e-06, "epoch": 0.5061930471746513, "percentage": 10.12, "elapsed_time": "0:26:48", "remaining_time": "3:58:02", "throughput": 8669.22, "total_tokens": 13947136} +{"current_steps": 20725, "total_steps": 204665, "loss": 0.0095, "lr": 1.999990393493514e-06, "epoch": 0.5063151980064984, "percentage": 10.13, "elapsed_time": "0:26:49", "remaining_time": "3:58:01", "throughput": 8669.6, "total_tokens": 13950784} +{"current_steps": 20730, "total_steps": 204665, "loss": 0.0739, "lr": 1.9999900160639867e-06, "epoch": 0.5064373488383456, "percentage": 10.13, "elapsed_time": "0:26:49", "remaining_time": "3:58:00", "throughput": 8669.76, "total_tokens": 13954048} +{"current_steps": 20735, "total_steps": 204665, "loss": 0.0863, "lr": 1.999989631362263e-06, "epoch": 0.5065594996701928, "percentage": 10.13, "elapsed_time": "0:26:49", "remaining_time": "3:58:00", "throughput": 8670.05, "total_tokens": 13957568} +{"current_steps": 20740, "total_steps": 204665, "loss": 0.1229, "lr": 1.999989239388346e-06, "epoch": 0.50668165050204, "percentage": 10.13, "elapsed_time": "0:26:50", "remaining_time": "3:57:59", "throughput": 8670.25, "total_tokens": 13960896} +{"current_steps": 20745, "total_steps": 204665, "loss": 0.1095, "lr": 1.999988840142238e-06, "epoch": 0.5068038013338871, "percentage": 10.14, "elapsed_time": "0:26:50", "remaining_time": "3:57:58", "throughput": 8670.39, "total_tokens": 13964096} +{"current_steps": 20750, "total_steps": 204665, "loss": 0.18, "lr": 1.999988433623942e-06, "epoch": 0.5069259521657342, "percentage": 10.14, "elapsed_time": "0:26:50", "remaining_time": "3:57:57", "throughput": 8670.63, "total_tokens": 13967488} +{"current_steps": 20755, "total_steps": 204665, "loss": 0.22, "lr": 1.9999880198334615e-06, "epoch": 0.5070481029975814, "percentage": 10.14, "elapsed_time": "0:26:51", "remaining_time": "3:57:57", "throughput": 8670.64, "total_tokens": 13970496} +{"current_steps": 20760, "total_steps": 204665, "loss": 0.1957, "lr": 1.999987598770799e-06, "epoch": 0.5071702538294286, "percentage": 10.14, "elapsed_time": "0:26:51", "remaining_time": "3:57:56", "throughput": 8670.82, "total_tokens": 13973824} +{"current_steps": 20765, "total_steps": 204665, "loss": 0.2626, "lr": 1.999987170435958e-06, "epoch": 0.5072924046612758, "percentage": 10.15, "elapsed_time": "0:26:51", "remaining_time": "3:57:55", "throughput": 8670.82, "total_tokens": 13976768} +{"current_steps": 20770, "total_steps": 204665, "loss": 0.1409, "lr": 1.999986734828941e-06, "epoch": 0.5074145554931229, "percentage": 10.15, "elapsed_time": "0:26:52", "remaining_time": "3:57:54", "throughput": 8670.81, "total_tokens": 13979712} +{"current_steps": 20775, "total_steps": 204665, "loss": 0.2365, "lr": 1.9999862919497516e-06, "epoch": 0.50753670632497, "percentage": 10.15, "elapsed_time": "0:26:52", "remaining_time": "3:57:54", "throughput": 8670.92, "total_tokens": 13982848} +{"current_steps": 20780, "total_steps": 204665, "loss": 0.1471, "lr": 1.9999858417983926e-06, "epoch": 0.5076588571568172, "percentage": 10.15, "elapsed_time": "0:26:52", "remaining_time": "3:57:53", "throughput": 8671.01, "total_tokens": 13985984} +{"current_steps": 20785, "total_steps": 204665, "loss": 0.1118, "lr": 1.999985384374868e-06, "epoch": 0.5077810079886644, "percentage": 10.16, "elapsed_time": "0:26:53", "remaining_time": "3:57:52", "throughput": 8671.34, "total_tokens": 13989632} +{"current_steps": 20790, "total_steps": 204665, "loss": 0.0515, "lr": 1.9999849196791806e-06, "epoch": 0.5079031588205115, "percentage": 10.16, "elapsed_time": "0:26:53", "remaining_time": "3:57:51", "throughput": 8671.7, "total_tokens": 13993280} +{"current_steps": 20795, "total_steps": 204665, "loss": 0.0598, "lr": 1.999984447711334e-06, "epoch": 0.5080253096523587, "percentage": 10.16, "elapsed_time": "0:26:54", "remaining_time": "3:57:51", "throughput": 8671.85, "total_tokens": 13996544} +{"current_steps": 20800, "total_steps": 204665, "loss": 0.1112, "lr": 1.9999839684713317e-06, "epoch": 0.5081474604842059, "percentage": 10.16, "elapsed_time": "0:26:54", "remaining_time": "3:57:50", "throughput": 8671.9, "total_tokens": 13999680} +{"current_steps": 20805, "total_steps": 204665, "loss": 0.1447, "lr": 1.9999834819591767e-06, "epoch": 0.5082696113160531, "percentage": 10.17, "elapsed_time": "0:26:54", "remaining_time": "3:57:49", "throughput": 8671.96, "total_tokens": 14002752} +{"current_steps": 20810, "total_steps": 204665, "loss": 0.164, "lr": 1.9999829881748725e-06, "epoch": 0.5083917621479003, "percentage": 10.17, "elapsed_time": "0:26:55", "remaining_time": "3:57:48", "throughput": 8671.97, "total_tokens": 14005760} +{"current_steps": 20815, "total_steps": 204665, "loss": 0.1718, "lr": 1.9999824871184237e-06, "epoch": 0.5085139129797474, "percentage": 10.17, "elapsed_time": "0:26:55", "remaining_time": "3:57:48", "throughput": 8672.17, "total_tokens": 14009088} +{"current_steps": 20820, "total_steps": 204665, "loss": 0.085, "lr": 1.999981978789833e-06, "epoch": 0.5086360638115945, "percentage": 10.17, "elapsed_time": "0:26:55", "remaining_time": "3:57:47", "throughput": 8672.39, "total_tokens": 14012544} +{"current_steps": 20825, "total_steps": 204665, "loss": 0.1174, "lr": 1.999981463189105e-06, "epoch": 0.5087582146434417, "percentage": 10.18, "elapsed_time": "0:26:56", "remaining_time": "3:57:46", "throughput": 8672.65, "total_tokens": 14016064} +{"current_steps": 20830, "total_steps": 204665, "loss": 0.0498, "lr": 1.999980940316242e-06, "epoch": 0.5088803654752889, "percentage": 10.18, "elapsed_time": "0:26:56", "remaining_time": "3:57:46", "throughput": 8672.88, "total_tokens": 14019520} +{"current_steps": 20835, "total_steps": 204665, "loss": 0.1354, "lr": 1.9999804101712487e-06, "epoch": 0.509002516307136, "percentage": 10.18, "elapsed_time": "0:26:56", "remaining_time": "3:57:45", "throughput": 8672.91, "total_tokens": 14022528} +{"current_steps": 20840, "total_steps": 204665, "loss": 0.1094, "lr": 1.9999798727541293e-06, "epoch": 0.5091246671389832, "percentage": 10.18, "elapsed_time": "0:26:57", "remaining_time": "3:57:44", "throughput": 8673.08, "total_tokens": 14025856} +{"current_steps": 20845, "total_steps": 204665, "loss": 0.1613, "lr": 1.9999793280648873e-06, "epoch": 0.5092468179708304, "percentage": 10.18, "elapsed_time": "0:26:57", "remaining_time": "3:57:43", "throughput": 8673.19, "total_tokens": 14029056} +{"current_steps": 20850, "total_steps": 204665, "loss": 0.0166, "lr": 1.9999787761035265e-06, "epoch": 0.5093689688026776, "percentage": 10.19, "elapsed_time": "0:26:57", "remaining_time": "3:57:43", "throughput": 8673.38, "total_tokens": 14032448} +{"current_steps": 20855, "total_steps": 204665, "loss": 0.1246, "lr": 1.9999782168700507e-06, "epoch": 0.5094911196345248, "percentage": 10.19, "elapsed_time": "0:26:58", "remaining_time": "3:57:42", "throughput": 8673.52, "total_tokens": 14035712} +{"current_steps": 20860, "total_steps": 204665, "loss": 0.1157, "lr": 1.999977650364465e-06, "epoch": 0.5096132704663718, "percentage": 10.19, "elapsed_time": "0:26:58", "remaining_time": "3:57:41", "throughput": 8673.73, "total_tokens": 14039168} +{"current_steps": 20865, "total_steps": 204665, "loss": 0.0137, "lr": 1.9999770765867725e-06, "epoch": 0.509735421298219, "percentage": 10.19, "elapsed_time": "0:26:58", "remaining_time": "3:57:41", "throughput": 8673.98, "total_tokens": 14042624} +{"current_steps": 20870, "total_steps": 204665, "loss": 0.2449, "lr": 1.9999764955369777e-06, "epoch": 0.5098575721300662, "percentage": 10.2, "elapsed_time": "0:26:59", "remaining_time": "3:57:40", "throughput": 8674.2, "total_tokens": 14046016} +{"current_steps": 20875, "total_steps": 204665, "loss": 0.1225, "lr": 1.9999759072150852e-06, "epoch": 0.5099797229619134, "percentage": 10.2, "elapsed_time": "0:26:59", "remaining_time": "3:57:39", "throughput": 8674.44, "total_tokens": 14049472} +{"current_steps": 20880, "total_steps": 204665, "loss": 0.117, "lr": 1.9999753116210986e-06, "epoch": 0.5101018737937605, "percentage": 10.2, "elapsed_time": "0:26:59", "remaining_time": "3:57:39", "throughput": 8674.74, "total_tokens": 14053056} +{"current_steps": 20885, "total_steps": 204665, "loss": 0.148, "lr": 1.999974708755023e-06, "epoch": 0.5102240246256077, "percentage": 10.2, "elapsed_time": "0:27:00", "remaining_time": "3:57:38", "throughput": 8675.0, "total_tokens": 14056512} +{"current_steps": 20890, "total_steps": 204665, "loss": 0.1261, "lr": 1.9999740986168617e-06, "epoch": 0.5103461754574549, "percentage": 10.21, "elapsed_time": "0:27:00", "remaining_time": "3:57:37", "throughput": 8675.19, "total_tokens": 14059904} +{"current_steps": 20895, "total_steps": 204665, "loss": 0.0516, "lr": 1.9999734812066203e-06, "epoch": 0.510468326289302, "percentage": 10.21, "elapsed_time": "0:27:01", "remaining_time": "3:57:37", "throughput": 8675.62, "total_tokens": 14063744} +{"current_steps": 20900, "total_steps": 204665, "loss": 0.1038, "lr": 1.9999728565243025e-06, "epoch": 0.5105904771211492, "percentage": 10.21, "elapsed_time": "0:27:01", "remaining_time": "3:57:36", "throughput": 8675.88, "total_tokens": 14067264} +{"current_steps": 20905, "total_steps": 204665, "loss": 0.1236, "lr": 1.9999722245699134e-06, "epoch": 0.5107126279529963, "percentage": 10.21, "elapsed_time": "0:27:01", "remaining_time": "3:57:35", "throughput": 8676.07, "total_tokens": 14070656} +{"current_steps": 20910, "total_steps": 204665, "loss": 0.1776, "lr": 1.999971585343457e-06, "epoch": 0.5108347787848435, "percentage": 10.22, "elapsed_time": "0:27:02", "remaining_time": "3:57:35", "throughput": 8676.25, "total_tokens": 14074048} +{"current_steps": 20915, "total_steps": 204665, "loss": 0.0515, "lr": 1.999970938844939e-06, "epoch": 0.5109569296166907, "percentage": 10.22, "elapsed_time": "0:27:02", "remaining_time": "3:57:34", "throughput": 8676.37, "total_tokens": 14077312} +{"current_steps": 20920, "total_steps": 204665, "loss": 0.1006, "lr": 1.9999702850743623e-06, "epoch": 0.5110790804485379, "percentage": 10.22, "elapsed_time": "0:27:02", "remaining_time": "3:57:33", "throughput": 8676.29, "total_tokens": 14080192} +{"current_steps": 20925, "total_steps": 204665, "loss": 0.085, "lr": 1.9999696240317335e-06, "epoch": 0.511201231280385, "percentage": 10.22, "elapsed_time": "0:27:03", "remaining_time": "3:57:33", "throughput": 8676.38, "total_tokens": 14083392} +{"current_steps": 20930, "total_steps": 204665, "loss": 0.1012, "lr": 1.9999689557170562e-06, "epoch": 0.5113233821122322, "percentage": 10.23, "elapsed_time": "0:27:03", "remaining_time": "3:57:32", "throughput": 8676.82, "total_tokens": 14087296} +{"current_steps": 20935, "total_steps": 204665, "loss": 0.0761, "lr": 1.999968280130336e-06, "epoch": 0.5114455329440794, "percentage": 10.23, "elapsed_time": "0:27:03", "remaining_time": "3:57:31", "throughput": 8676.98, "total_tokens": 14090624} +{"current_steps": 20940, "total_steps": 204665, "loss": 0.0917, "lr": 1.9999675972715774e-06, "epoch": 0.5115676837759265, "percentage": 10.23, "elapsed_time": "0:27:04", "remaining_time": "3:57:31", "throughput": 8677.3, "total_tokens": 14094272} +{"current_steps": 20945, "total_steps": 204665, "loss": 0.2236, "lr": 1.9999669071407856e-06, "epoch": 0.5116898346077737, "percentage": 10.23, "elapsed_time": "0:27:04", "remaining_time": "3:57:30", "throughput": 8677.29, "total_tokens": 14097280} +{"current_steps": 20950, "total_steps": 204665, "loss": 0.0352, "lr": 1.9999662097379652e-06, "epoch": 0.5118119854396208, "percentage": 10.24, "elapsed_time": "0:27:04", "remaining_time": "3:57:29", "throughput": 8677.39, "total_tokens": 14100480} +{"current_steps": 20955, "total_steps": 204665, "loss": 0.1376, "lr": 1.9999655050631218e-06, "epoch": 0.511934136271468, "percentage": 10.24, "elapsed_time": "0:27:05", "remaining_time": "3:57:28", "throughput": 8677.48, "total_tokens": 14103680} +{"current_steps": 20960, "total_steps": 204665, "loss": 0.3091, "lr": 1.99996479311626e-06, "epoch": 0.5120562871033152, "percentage": 10.24, "elapsed_time": "0:27:05", "remaining_time": "3:57:28", "throughput": 8677.6, "total_tokens": 14106944} +{"current_steps": 20965, "total_steps": 204665, "loss": 0.2194, "lr": 1.9999640738973856e-06, "epoch": 0.5121784379351624, "percentage": 10.24, "elapsed_time": "0:27:06", "remaining_time": "3:57:27", "throughput": 8677.8, "total_tokens": 14110336} +{"current_steps": 20970, "total_steps": 204665, "loss": 0.1982, "lr": 1.9999633474065034e-06, "epoch": 0.5123005887670095, "percentage": 10.25, "elapsed_time": "0:27:06", "remaining_time": "3:57:26", "throughput": 8677.98, "total_tokens": 14113728} +{"current_steps": 20975, "total_steps": 204665, "loss": 0.0321, "lr": 1.9999626136436185e-06, "epoch": 0.5124227395988566, "percentage": 10.25, "elapsed_time": "0:27:06", "remaining_time": "3:57:26", "throughput": 8678.04, "total_tokens": 14116864} +{"current_steps": 20980, "total_steps": 204665, "loss": 0.1371, "lr": 1.9999618726087373e-06, "epoch": 0.5125448904307038, "percentage": 10.25, "elapsed_time": "0:27:07", "remaining_time": "3:57:25", "throughput": 8678.09, "total_tokens": 14120000} +{"current_steps": 20985, "total_steps": 204665, "loss": 0.0286, "lr": 1.999961124301864e-06, "epoch": 0.512667041262551, "percentage": 10.25, "elapsed_time": "0:27:07", "remaining_time": "3:57:24", "throughput": 8678.17, "total_tokens": 14123200} +{"current_steps": 20990, "total_steps": 204665, "loss": 0.1198, "lr": 1.9999603687230037e-06, "epoch": 0.5127891920943982, "percentage": 10.26, "elapsed_time": "0:27:07", "remaining_time": "3:57:24", "throughput": 8678.41, "total_tokens": 14126720} +{"current_steps": 20995, "total_steps": 204665, "loss": 0.1523, "lr": 1.9999596058721634e-06, "epoch": 0.5129113429262453, "percentage": 10.26, "elapsed_time": "0:27:08", "remaining_time": "3:57:23", "throughput": 8678.56, "total_tokens": 14130048} +{"current_steps": 21000, "total_steps": 204665, "loss": 0.1031, "lr": 1.999958835749348e-06, "epoch": 0.5130334937580925, "percentage": 10.26, "elapsed_time": "0:27:08", "remaining_time": "3:57:22", "throughput": 8678.61, "total_tokens": 14133184} +{"current_steps": 21005, "total_steps": 204665, "loss": 0.1792, "lr": 1.9999580583545625e-06, "epoch": 0.5131556445899397, "percentage": 10.26, "elapsed_time": "0:27:08", "remaining_time": "3:57:22", "throughput": 8678.91, "total_tokens": 14136768} +{"current_steps": 21010, "total_steps": 204665, "loss": 0.0683, "lr": 1.9999572736878134e-06, "epoch": 0.5132777954217869, "percentage": 10.27, "elapsed_time": "0:27:09", "remaining_time": "3:57:21", "throughput": 8679.1, "total_tokens": 14140160} +{"current_steps": 21015, "total_steps": 204665, "loss": 0.1488, "lr": 1.999956481749106e-06, "epoch": 0.5133999462536339, "percentage": 10.27, "elapsed_time": "0:27:09", "remaining_time": "3:57:20", "throughput": 8679.38, "total_tokens": 14143744} +{"current_steps": 21020, "total_steps": 204665, "loss": 0.0436, "lr": 1.999955682538446e-06, "epoch": 0.5135220970854811, "percentage": 10.27, "elapsed_time": "0:27:09", "remaining_time": "3:57:20", "throughput": 8679.53, "total_tokens": 14147072} +{"current_steps": 21025, "total_steps": 204665, "loss": 0.0647, "lr": 1.9999548760558395e-06, "epoch": 0.5136442479173283, "percentage": 10.27, "elapsed_time": "0:27:10", "remaining_time": "3:57:19", "throughput": 8679.48, "total_tokens": 14149952} +{"current_steps": 21030, "total_steps": 204665, "loss": 0.1996, "lr": 1.9999540623012917e-06, "epoch": 0.5137663987491755, "percentage": 10.28, "elapsed_time": "0:27:10", "remaining_time": "3:57:18", "throughput": 8679.6, "total_tokens": 14153216} +{"current_steps": 21035, "total_steps": 204665, "loss": 0.0563, "lr": 1.9999532412748093e-06, "epoch": 0.5138885495810227, "percentage": 10.28, "elapsed_time": "0:27:10", "remaining_time": "3:57:18", "throughput": 8679.66, "total_tokens": 14156352} +{"current_steps": 21040, "total_steps": 204665, "loss": 0.1079, "lr": 1.9999524129763983e-06, "epoch": 0.5140107004128698, "percentage": 10.28, "elapsed_time": "0:27:11", "remaining_time": "3:57:17", "throughput": 8679.89, "total_tokens": 14159808} +{"current_steps": 21045, "total_steps": 204665, "loss": 0.1015, "lr": 1.999951577406064e-06, "epoch": 0.514132851244717, "percentage": 10.28, "elapsed_time": "0:27:11", "remaining_time": "3:57:16", "throughput": 8680.14, "total_tokens": 14163328} +{"current_steps": 21050, "total_steps": 204665, "loss": 0.1002, "lr": 1.9999507345638132e-06, "epoch": 0.5142550020765642, "percentage": 10.29, "elapsed_time": "0:27:12", "remaining_time": "3:57:16", "throughput": 8680.46, "total_tokens": 14166976} +{"current_steps": 21055, "total_steps": 204665, "loss": 0.1809, "lr": 1.9999498844496515e-06, "epoch": 0.5143771529084114, "percentage": 10.29, "elapsed_time": "0:27:12", "remaining_time": "3:57:15", "throughput": 8680.5, "total_tokens": 14170048} +{"current_steps": 21060, "total_steps": 204665, "loss": 0.2079, "lr": 1.999949027063585e-06, "epoch": 0.5144993037402584, "percentage": 10.29, "elapsed_time": "0:27:12", "remaining_time": "3:57:14", "throughput": 8680.61, "total_tokens": 14173312} +{"current_steps": 21065, "total_steps": 204665, "loss": 0.2025, "lr": 1.999948162405621e-06, "epoch": 0.5146214545721056, "percentage": 10.29, "elapsed_time": "0:27:13", "remaining_time": "3:57:14", "throughput": 8680.85, "total_tokens": 14176832} +{"current_steps": 21070, "total_steps": 204665, "loss": 0.1187, "lr": 1.9999472904757644e-06, "epoch": 0.5147436054039528, "percentage": 10.29, "elapsed_time": "0:27:13", "remaining_time": "3:57:13", "throughput": 8680.79, "total_tokens": 14179712} +{"current_steps": 21075, "total_steps": 204665, "loss": 0.0518, "lr": 1.999946411274022e-06, "epoch": 0.5148657562358, "percentage": 10.3, "elapsed_time": "0:27:13", "remaining_time": "3:57:12", "throughput": 8680.87, "total_tokens": 14182912} +{"current_steps": 21080, "total_steps": 204665, "loss": 0.057, "lr": 1.9999455248004005e-06, "epoch": 0.5149879070676471, "percentage": 10.3, "elapsed_time": "0:27:14", "remaining_time": "3:57:11", "throughput": 8680.9, "total_tokens": 14185984} +{"current_steps": 21085, "total_steps": 204665, "loss": 0.2056, "lr": 1.9999446310549067e-06, "epoch": 0.5151100578994943, "percentage": 10.3, "elapsed_time": "0:27:14", "remaining_time": "3:57:11", "throughput": 8681.15, "total_tokens": 14189504} +{"current_steps": 21090, "total_steps": 204665, "loss": 0.0752, "lr": 1.9999437300375457e-06, "epoch": 0.5152322087313415, "percentage": 10.3, "elapsed_time": "0:27:14", "remaining_time": "3:57:10", "throughput": 8681.21, "total_tokens": 14192640} +{"current_steps": 21095, "total_steps": 204665, "loss": 0.1698, "lr": 1.9999428217483256e-06, "epoch": 0.5153543595631886, "percentage": 10.31, "elapsed_time": "0:27:15", "remaining_time": "3:57:09", "throughput": 8681.46, "total_tokens": 14196160} +{"current_steps": 21100, "total_steps": 204665, "loss": 0.1077, "lr": 1.9999419061872526e-06, "epoch": 0.5154765103950358, "percentage": 10.31, "elapsed_time": "0:27:15", "remaining_time": "3:57:09", "throughput": 8681.89, "total_tokens": 14200000} +{"current_steps": 21105, "total_steps": 204665, "loss": 0.1557, "lr": 1.9999409833543327e-06, "epoch": 0.5155986612268829, "percentage": 10.31, "elapsed_time": "0:27:15", "remaining_time": "3:57:08", "throughput": 8682.0, "total_tokens": 14203200} +{"current_steps": 21110, "total_steps": 204665, "loss": 0.0347, "lr": 1.9999400532495728e-06, "epoch": 0.5157208120587301, "percentage": 10.31, "elapsed_time": "0:27:16", "remaining_time": "3:57:07", "throughput": 8682.27, "total_tokens": 14206720} +{"current_steps": 21115, "total_steps": 204665, "loss": 0.2105, "lr": 1.99993911587298e-06, "epoch": 0.5158429628905773, "percentage": 10.32, "elapsed_time": "0:27:16", "remaining_time": "3:57:07", "throughput": 8682.47, "total_tokens": 14210048} +{"current_steps": 21120, "total_steps": 204665, "loss": 0.2227, "lr": 1.9999381712245613e-06, "epoch": 0.5159651137224245, "percentage": 10.32, "elapsed_time": "0:27:16", "remaining_time": "3:57:06", "throughput": 8682.73, "total_tokens": 14213504} +{"current_steps": 21125, "total_steps": 204665, "loss": 0.1199, "lr": 1.999937219304323e-06, "epoch": 0.5160872645542716, "percentage": 10.32, "elapsed_time": "0:27:17", "remaining_time": "3:57:05", "throughput": 8683.04, "total_tokens": 14217088} +{"current_steps": 21130, "total_steps": 204665, "loss": 0.0566, "lr": 1.999936260112272e-06, "epoch": 0.5162094153861188, "percentage": 10.32, "elapsed_time": "0:27:17", "remaining_time": "3:57:04", "throughput": 8683.33, "total_tokens": 14220608} +{"current_steps": 21135, "total_steps": 204665, "loss": 0.2041, "lr": 1.999935293648416e-06, "epoch": 0.5163315662179659, "percentage": 10.33, "elapsed_time": "0:27:18", "remaining_time": "3:57:04", "throughput": 8683.61, "total_tokens": 14224128} +{"current_steps": 21140, "total_steps": 204665, "loss": 0.126, "lr": 1.9999343199127616e-06, "epoch": 0.5164537170498131, "percentage": 10.33, "elapsed_time": "0:27:18", "remaining_time": "3:57:03", "throughput": 8683.63, "total_tokens": 14227136} +{"current_steps": 21145, "total_steps": 204665, "loss": 0.1738, "lr": 1.9999333389053157e-06, "epoch": 0.5165758678816603, "percentage": 10.33, "elapsed_time": "0:27:18", "remaining_time": "3:57:02", "throughput": 8683.86, "total_tokens": 14230528} +{"current_steps": 21150, "total_steps": 204665, "loss": 0.1324, "lr": 1.999932350626086e-06, "epoch": 0.5166980187135074, "percentage": 10.33, "elapsed_time": "0:27:19", "remaining_time": "3:57:02", "throughput": 8684.06, "total_tokens": 14233920} +{"current_steps": 21155, "total_steps": 204665, "loss": 0.0723, "lr": 1.999931355075079e-06, "epoch": 0.5168201695453546, "percentage": 10.34, "elapsed_time": "0:27:19", "remaining_time": "3:57:01", "throughput": 8684.09, "total_tokens": 14236928} +{"current_steps": 21160, "total_steps": 204665, "loss": 0.1253, "lr": 1.999930352252302e-06, "epoch": 0.5169423203772018, "percentage": 10.34, "elapsed_time": "0:27:19", "remaining_time": "3:57:00", "throughput": 8684.39, "total_tokens": 14240512} +{"current_steps": 21165, "total_steps": 204665, "loss": 0.0818, "lr": 1.999929342157763e-06, "epoch": 0.517064471209049, "percentage": 10.34, "elapsed_time": "0:27:20", "remaining_time": "3:56:59", "throughput": 8684.64, "total_tokens": 14243968} +{"current_steps": 21170, "total_steps": 204665, "loss": 0.1234, "lr": 1.9999283247914684e-06, "epoch": 0.517186622040896, "percentage": 10.34, "elapsed_time": "0:27:20", "remaining_time": "3:56:59", "throughput": 8684.7, "total_tokens": 14247040} +{"current_steps": 21175, "total_steps": 204665, "loss": 0.058, "lr": 1.9999273001534265e-06, "epoch": 0.5173087728727432, "percentage": 10.35, "elapsed_time": "0:27:20", "remaining_time": "3:56:58", "throughput": 8684.76, "total_tokens": 14250112} +{"current_steps": 21180, "total_steps": 204665, "loss": 0.0567, "lr": 1.999926268243644e-06, "epoch": 0.5174309237045904, "percentage": 10.35, "elapsed_time": "0:27:21", "remaining_time": "3:56:57", "throughput": 8685.31, "total_tokens": 14254144} +{"current_steps": 21185, "total_steps": 204665, "loss": 0.0362, "lr": 1.999925229062129e-06, "epoch": 0.5175530745364376, "percentage": 10.35, "elapsed_time": "0:27:21", "remaining_time": "3:56:56", "throughput": 8685.43, "total_tokens": 14257344} +{"current_steps": 21190, "total_steps": 204665, "loss": 0.0792, "lr": 1.9999241826088883e-06, "epoch": 0.5176752253682848, "percentage": 10.35, "elapsed_time": "0:27:21", "remaining_time": "3:56:56", "throughput": 8685.61, "total_tokens": 14260672} +{"current_steps": 21195, "total_steps": 204665, "loss": 0.0565, "lr": 1.9999231288839303e-06, "epoch": 0.5177973762001319, "percentage": 10.36, "elapsed_time": "0:27:22", "remaining_time": "3:56:55", "throughput": 8685.76, "total_tokens": 14263936} +{"current_steps": 21200, "total_steps": 204665, "loss": 0.1347, "lr": 1.9999220678872626e-06, "epoch": 0.5179195270319791, "percentage": 10.36, "elapsed_time": "0:27:22", "remaining_time": "3:56:54", "throughput": 8685.94, "total_tokens": 14267264} +{"current_steps": 21205, "total_steps": 204665, "loss": 0.2543, "lr": 1.9999209996188924e-06, "epoch": 0.5180416778638263, "percentage": 10.36, "elapsed_time": "0:27:22", "remaining_time": "3:56:54", "throughput": 8686.18, "total_tokens": 14270720} +{"current_steps": 21210, "total_steps": 204665, "loss": 0.22, "lr": 1.9999199240788282e-06, "epoch": 0.5181638286956735, "percentage": 10.36, "elapsed_time": "0:27:23", "remaining_time": "3:56:53", "throughput": 8686.4, "total_tokens": 14274112} +{"current_steps": 21215, "total_steps": 204665, "loss": 0.199, "lr": 1.999918841267077e-06, "epoch": 0.5182859795275205, "percentage": 10.37, "elapsed_time": "0:27:23", "remaining_time": "3:56:52", "throughput": 8686.56, "total_tokens": 14277376} +{"current_steps": 21220, "total_steps": 204665, "loss": 0.134, "lr": 1.9999177511836467e-06, "epoch": 0.5184081303593677, "percentage": 10.37, "elapsed_time": "0:27:23", "remaining_time": "3:56:51", "throughput": 8686.83, "total_tokens": 14280896} +{"current_steps": 21225, "total_steps": 204665, "loss": 0.1493, "lr": 1.9999166538285463e-06, "epoch": 0.5185302811912149, "percentage": 10.37, "elapsed_time": "0:27:24", "remaining_time": "3:56:51", "throughput": 8687.02, "total_tokens": 14284224} +{"current_steps": 21230, "total_steps": 204665, "loss": 0.0893, "lr": 1.9999155492017824e-06, "epoch": 0.5186524320230621, "percentage": 10.37, "elapsed_time": "0:27:24", "remaining_time": "3:56:50", "throughput": 8687.17, "total_tokens": 14287488} +{"current_steps": 21235, "total_steps": 204665, "loss": 0.0955, "lr": 1.999914437303364e-06, "epoch": 0.5187745828549093, "percentage": 10.38, "elapsed_time": "0:27:25", "remaining_time": "3:56:49", "throughput": 8687.24, "total_tokens": 14290560} +{"current_steps": 21240, "total_steps": 204665, "loss": 0.0706, "lr": 1.9999133181332984e-06, "epoch": 0.5188967336867564, "percentage": 10.38, "elapsed_time": "0:27:25", "remaining_time": "3:56:49", "throughput": 8687.59, "total_tokens": 14294272} +{"current_steps": 21245, "total_steps": 204665, "loss": 0.1165, "lr": 1.9999121916915948e-06, "epoch": 0.5190188845186036, "percentage": 10.38, "elapsed_time": "0:27:25", "remaining_time": "3:56:48", "throughput": 8688.0, "total_tokens": 14298048} +{"current_steps": 21250, "total_steps": 204665, "loss": 0.0853, "lr": 1.9999110579782607e-06, "epoch": 0.5191410353504508, "percentage": 10.38, "elapsed_time": "0:27:26", "remaining_time": "3:56:47", "throughput": 8688.1, "total_tokens": 14301184} +{"current_steps": 21255, "total_steps": 204665, "loss": 0.0565, "lr": 1.9999099169933046e-06, "epoch": 0.5192631861822979, "percentage": 10.39, "elapsed_time": "0:27:26", "remaining_time": "3:56:46", "throughput": 8688.11, "total_tokens": 14304128} +{"current_steps": 21260, "total_steps": 204665, "loss": 0.0888, "lr": 1.999908768736734e-06, "epoch": 0.519385337014145, "percentage": 10.39, "elapsed_time": "0:27:26", "remaining_time": "3:56:46", "throughput": 8688.38, "total_tokens": 14307648} +{"current_steps": 21265, "total_steps": 204665, "loss": 0.1576, "lr": 1.9999076132085582e-06, "epoch": 0.5195074878459922, "percentage": 10.39, "elapsed_time": "0:27:27", "remaining_time": "3:56:45", "throughput": 8688.38, "total_tokens": 14310592} +{"current_steps": 21270, "total_steps": 204665, "loss": 0.1492, "lr": 1.9999064504087853e-06, "epoch": 0.5196296386778394, "percentage": 10.39, "elapsed_time": "0:27:27", "remaining_time": "3:56:44", "throughput": 8688.48, "total_tokens": 14313728} +{"current_steps": 21275, "total_steps": 204665, "loss": 0.2074, "lr": 1.9999052803374243e-06, "epoch": 0.5197517895096866, "percentage": 10.4, "elapsed_time": "0:27:27", "remaining_time": "3:56:43", "throughput": 8688.67, "total_tokens": 14317056} +{"current_steps": 21280, "total_steps": 204665, "loss": 0.1367, "lr": 1.9999041029944827e-06, "epoch": 0.5198739403415338, "percentage": 10.4, "elapsed_time": "0:27:28", "remaining_time": "3:56:43", "throughput": 8688.65, "total_tokens": 14319936} +{"current_steps": 21285, "total_steps": 204665, "loss": 0.1315, "lr": 1.9999029183799696e-06, "epoch": 0.5199960911733809, "percentage": 10.4, "elapsed_time": "0:27:28", "remaining_time": "3:56:42", "throughput": 8688.74, "total_tokens": 14323072} +{"current_steps": 21290, "total_steps": 204665, "loss": 0.1303, "lr": 1.9999017264938933e-06, "epoch": 0.520118242005228, "percentage": 10.4, "elapsed_time": "0:27:28", "remaining_time": "3:56:41", "throughput": 8688.89, "total_tokens": 14326336} +{"current_steps": 21295, "total_steps": 204665, "loss": 0.0619, "lr": 1.9999005273362628e-06, "epoch": 0.5202403928370752, "percentage": 10.4, "elapsed_time": "0:27:29", "remaining_time": "3:56:40", "throughput": 8689.14, "total_tokens": 14329792} +{"current_steps": 21300, "total_steps": 204665, "loss": 0.1505, "lr": 1.9998993209070865e-06, "epoch": 0.5203625436689224, "percentage": 10.41, "elapsed_time": "0:27:29", "remaining_time": "3:56:40", "throughput": 8689.19, "total_tokens": 14332864} +{"current_steps": 21305, "total_steps": 204665, "loss": 0.1236, "lr": 1.9998981072063738e-06, "epoch": 0.5204846945007695, "percentage": 10.41, "elapsed_time": "0:27:29", "remaining_time": "3:56:39", "throughput": 8689.39, "total_tokens": 14336256} +{"current_steps": 21310, "total_steps": 204665, "loss": 0.1244, "lr": 1.999896886234133e-06, "epoch": 0.5206068453326167, "percentage": 10.41, "elapsed_time": "0:27:30", "remaining_time": "3:56:38", "throughput": 8689.71, "total_tokens": 14339904} +{"current_steps": 21315, "total_steps": 204665, "loss": 0.2658, "lr": 1.9998956579903733e-06, "epoch": 0.5207289961644639, "percentage": 10.41, "elapsed_time": "0:27:30", "remaining_time": "3:56:38", "throughput": 8689.97, "total_tokens": 14343424} +{"current_steps": 21320, "total_steps": 204665, "loss": 0.0877, "lr": 1.999894422475103e-06, "epoch": 0.5208511469963111, "percentage": 10.42, "elapsed_time": "0:27:30", "remaining_time": "3:56:37", "throughput": 8690.2, "total_tokens": 14346880} +{"current_steps": 21325, "total_steps": 204665, "loss": 0.0885, "lr": 1.9998931796883316e-06, "epoch": 0.5209732978281582, "percentage": 10.42, "elapsed_time": "0:27:31", "remaining_time": "3:56:36", "throughput": 8690.42, "total_tokens": 14350336} +{"current_steps": 21330, "total_steps": 204665, "loss": 0.1958, "lr": 1.9998919296300684e-06, "epoch": 0.5210954486600053, "percentage": 10.42, "elapsed_time": "0:27:31", "remaining_time": "3:56:36", "throughput": 8690.48, "total_tokens": 14353600} +{"current_steps": 21335, "total_steps": 204665, "loss": 0.194, "lr": 1.9998906723003216e-06, "epoch": 0.5212175994918525, "percentage": 10.42, "elapsed_time": "0:27:31", "remaining_time": "3:56:35", "throughput": 8690.69, "total_tokens": 14356928} +{"current_steps": 21340, "total_steps": 204665, "loss": 0.1148, "lr": 1.9998894076991015e-06, "epoch": 0.5213397503236997, "percentage": 10.43, "elapsed_time": "0:27:32", "remaining_time": "3:56:34", "throughput": 8691.25, "total_tokens": 14361024} +{"current_steps": 21345, "total_steps": 204665, "loss": 0.1752, "lr": 1.9998881358264165e-06, "epoch": 0.5214619011555469, "percentage": 10.43, "elapsed_time": "0:27:32", "remaining_time": "3:56:34", "throughput": 8691.48, "total_tokens": 14364416} +{"current_steps": 21350, "total_steps": 204665, "loss": 0.1184, "lr": 1.9998868566822756e-06, "epoch": 0.521584051987394, "percentage": 10.43, "elapsed_time": "0:27:33", "remaining_time": "3:56:33", "throughput": 8691.72, "total_tokens": 14367936} +{"current_steps": 21355, "total_steps": 204665, "loss": 0.0963, "lr": 1.9998855702666892e-06, "epoch": 0.5217062028192412, "percentage": 10.43, "elapsed_time": "0:27:33", "remaining_time": "3:56:32", "throughput": 8691.97, "total_tokens": 14371392} +{"current_steps": 21360, "total_steps": 204665, "loss": 0.0451, "lr": 1.9998842765796656e-06, "epoch": 0.5218283536510884, "percentage": 10.44, "elapsed_time": "0:27:33", "remaining_time": "3:56:32", "throughput": 8692.26, "total_tokens": 14374976} +{"current_steps": 21365, "total_steps": 204665, "loss": 0.0914, "lr": 1.9998829756212147e-06, "epoch": 0.5219505044829356, "percentage": 10.44, "elapsed_time": "0:27:34", "remaining_time": "3:56:31", "throughput": 8692.66, "total_tokens": 14378752} +{"current_steps": 21370, "total_steps": 204665, "loss": 0.1199, "lr": 1.999881667391346e-06, "epoch": 0.5220726553147826, "percentage": 10.44, "elapsed_time": "0:27:34", "remaining_time": "3:56:30", "throughput": 8692.67, "total_tokens": 14381760} +{"current_steps": 21375, "total_steps": 204665, "loss": 0.1328, "lr": 1.9998803518900687e-06, "epoch": 0.5221948061466298, "percentage": 10.44, "elapsed_time": "0:27:34", "remaining_time": "3:56:30", "throughput": 8693.0, "total_tokens": 14385344} +{"current_steps": 21380, "total_steps": 204665, "loss": 0.2202, "lr": 1.999879029117393e-06, "epoch": 0.522316956978477, "percentage": 10.45, "elapsed_time": "0:27:35", "remaining_time": "3:56:29", "throughput": 8693.41, "total_tokens": 14389120} +{"current_steps": 21385, "total_steps": 204665, "loss": 0.0601, "lr": 1.9998776990733274e-06, "epoch": 0.5224391078103242, "percentage": 10.45, "elapsed_time": "0:27:35", "remaining_time": "3:56:28", "throughput": 8693.77, "total_tokens": 14392832} +{"current_steps": 21390, "total_steps": 204665, "loss": 0.2013, "lr": 1.9998763617578825e-06, "epoch": 0.5225612586421714, "percentage": 10.45, "elapsed_time": "0:27:35", "remaining_time": "3:56:28", "throughput": 8694.15, "total_tokens": 14396544} +{"current_steps": 21395, "total_steps": 204665, "loss": 0.0907, "lr": 1.999875017171068e-06, "epoch": 0.5226834094740185, "percentage": 10.45, "elapsed_time": "0:27:36", "remaining_time": "3:56:27", "throughput": 8694.2, "total_tokens": 14399552} +{"current_steps": 21400, "total_steps": 204665, "loss": 0.1445, "lr": 1.999873665312893e-06, "epoch": 0.5228055603058657, "percentage": 10.46, "elapsed_time": "0:27:36", "remaining_time": "3:56:26", "throughput": 8694.31, "total_tokens": 14402752} +{"current_steps": 21405, "total_steps": 204665, "loss": 0.174, "lr": 1.9998723061833676e-06, "epoch": 0.5229277111377129, "percentage": 10.46, "elapsed_time": "0:27:36", "remaining_time": "3:56:25", "throughput": 8694.52, "total_tokens": 14406144} +{"current_steps": 21410, "total_steps": 204665, "loss": 0.0878, "lr": 1.9998709397825024e-06, "epoch": 0.52304986196956, "percentage": 10.46, "elapsed_time": "0:27:37", "remaining_time": "3:56:25", "throughput": 8694.65, "total_tokens": 14409344} +{"current_steps": 21415, "total_steps": 204665, "loss": 0.067, "lr": 1.999869566110307e-06, "epoch": 0.5231720128014071, "percentage": 10.46, "elapsed_time": "0:27:37", "remaining_time": "3:56:24", "throughput": 8694.81, "total_tokens": 14412608} +{"current_steps": 21420, "total_steps": 204665, "loss": 0.1183, "lr": 1.9998681851667903e-06, "epoch": 0.5232941636332543, "percentage": 10.47, "elapsed_time": "0:27:37", "remaining_time": "3:56:23", "throughput": 8694.92, "total_tokens": 14415744} +{"current_steps": 21425, "total_steps": 204665, "loss": 0.1525, "lr": 1.999866796951964e-06, "epoch": 0.5234163144651015, "percentage": 10.47, "elapsed_time": "0:27:38", "remaining_time": "3:56:22", "throughput": 8694.96, "total_tokens": 14418752} +{"current_steps": 21430, "total_steps": 204665, "loss": 0.2345, "lr": 1.999865401465837e-06, "epoch": 0.5235384652969487, "percentage": 10.47, "elapsed_time": "0:27:38", "remaining_time": "3:56:21", "throughput": 8694.97, "total_tokens": 14421696} +{"current_steps": 21435, "total_steps": 204665, "loss": 0.1961, "lr": 1.9998639987084204e-06, "epoch": 0.5236606161287959, "percentage": 10.47, "elapsed_time": "0:27:38", "remaining_time": "3:56:21", "throughput": 8695.13, "total_tokens": 14424960} +{"current_steps": 21440, "total_steps": 204665, "loss": 0.0777, "lr": 1.9998625886797235e-06, "epoch": 0.523782766960643, "percentage": 10.48, "elapsed_time": "0:27:39", "remaining_time": "3:56:20", "throughput": 8695.48, "total_tokens": 14428608} +{"current_steps": 21445, "total_steps": 204665, "loss": 0.1527, "lr": 1.9998611713797566e-06, "epoch": 0.5239049177924902, "percentage": 10.48, "elapsed_time": "0:27:39", "remaining_time": "3:56:19", "throughput": 8695.53, "total_tokens": 14431680} +{"current_steps": 21450, "total_steps": 204665, "loss": 0.148, "lr": 1.9998597468085306e-06, "epoch": 0.5240270686243373, "percentage": 10.48, "elapsed_time": "0:27:40", "remaining_time": "3:56:18", "throughput": 8695.68, "total_tokens": 14434944} +{"current_steps": 21455, "total_steps": 204665, "loss": 0.1219, "lr": 1.9998583149660558e-06, "epoch": 0.5241492194561845, "percentage": 10.48, "elapsed_time": "0:27:40", "remaining_time": "3:56:18", "throughput": 8695.71, "total_tokens": 14437952} +{"current_steps": 21460, "total_steps": 204665, "loss": 0.1344, "lr": 1.999856875852342e-06, "epoch": 0.5242713702880316, "percentage": 10.49, "elapsed_time": "0:27:40", "remaining_time": "3:56:17", "throughput": 8695.71, "total_tokens": 14440896} +{"current_steps": 21465, "total_steps": 204665, "loss": 0.0374, "lr": 1.9998554294674005e-06, "epoch": 0.5243935211198788, "percentage": 10.49, "elapsed_time": "0:27:41", "remaining_time": "3:56:16", "throughput": 8695.84, "total_tokens": 14444096} +{"current_steps": 21470, "total_steps": 204665, "loss": 0.2057, "lr": 1.9998539758112413e-06, "epoch": 0.524515671951726, "percentage": 10.49, "elapsed_time": "0:27:41", "remaining_time": "3:56:15", "throughput": 8695.93, "total_tokens": 14447232} +{"current_steps": 21475, "total_steps": 204665, "loss": 0.1155, "lr": 1.999852514883875e-06, "epoch": 0.5246378227835732, "percentage": 10.49, "elapsed_time": "0:27:41", "remaining_time": "3:56:15", "throughput": 8696.16, "total_tokens": 14450624} +{"current_steps": 21480, "total_steps": 204665, "loss": 0.142, "lr": 1.999851046685312e-06, "epoch": 0.5247599736154204, "percentage": 10.5, "elapsed_time": "0:27:42", "remaining_time": "3:56:14", "throughput": 8696.29, "total_tokens": 14453824} +{"current_steps": 21485, "total_steps": 204665, "loss": 0.0319, "lr": 1.9998495712155638e-06, "epoch": 0.5248821244472675, "percentage": 10.5, "elapsed_time": "0:27:42", "remaining_time": "3:56:13", "throughput": 8696.46, "total_tokens": 14457088} +{"current_steps": 21490, "total_steps": 204665, "loss": 0.0508, "lr": 1.9998480884746403e-06, "epoch": 0.5250042752791146, "percentage": 10.5, "elapsed_time": "0:27:42", "remaining_time": "3:56:12", "throughput": 8696.47, "total_tokens": 14460032} +{"current_steps": 21495, "total_steps": 204665, "loss": 0.1255, "lr": 1.9998465984625526e-06, "epoch": 0.5251264261109618, "percentage": 10.5, "elapsed_time": "0:27:43", "remaining_time": "3:56:12", "throughput": 8696.85, "total_tokens": 14463744} +{"current_steps": 21500, "total_steps": 204665, "loss": 0.1094, "lr": 1.9998451011793113e-06, "epoch": 0.525248576942809, "percentage": 10.5, "elapsed_time": "0:27:43", "remaining_time": "3:56:11", "throughput": 8697.04, "total_tokens": 14467072} +{"current_steps": 21505, "total_steps": 204665, "loss": 0.0689, "lr": 1.999843596624928e-06, "epoch": 0.5253707277746561, "percentage": 10.51, "elapsed_time": "0:27:43", "remaining_time": "3:56:10", "throughput": 8697.17, "total_tokens": 14470272} +{"current_steps": 21510, "total_steps": 204665, "loss": 0.012, "lr": 1.999842084799413e-06, "epoch": 0.5254928786065033, "percentage": 10.51, "elapsed_time": "0:27:44", "remaining_time": "3:56:10", "throughput": 8697.57, "total_tokens": 14474048} +{"current_steps": 21515, "total_steps": 204665, "loss": 0.0521, "lr": 1.999840565702777e-06, "epoch": 0.5256150294383505, "percentage": 10.51, "elapsed_time": "0:27:44", "remaining_time": "3:56:09", "throughput": 8697.94, "total_tokens": 14477760} +{"current_steps": 21520, "total_steps": 204665, "loss": 0.3199, "lr": 1.9998390393350318e-06, "epoch": 0.5257371802701977, "percentage": 10.51, "elapsed_time": "0:27:44", "remaining_time": "3:56:08", "throughput": 8698.18, "total_tokens": 14481216} +{"current_steps": 21525, "total_steps": 204665, "loss": 0.0071, "lr": 1.9998375056961877e-06, "epoch": 0.5258593311020449, "percentage": 10.52, "elapsed_time": "0:27:45", "remaining_time": "3:56:07", "throughput": 8698.3, "total_tokens": 14484416} +{"current_steps": 21530, "total_steps": 204665, "loss": 0.1347, "lr": 1.999835964786257e-06, "epoch": 0.5259814819338919, "percentage": 10.52, "elapsed_time": "0:27:45", "remaining_time": "3:56:07", "throughput": 8698.6, "total_tokens": 14488000} +{"current_steps": 21535, "total_steps": 204665, "loss": 0.2018, "lr": 1.99983441660525e-06, "epoch": 0.5261036327657391, "percentage": 10.52, "elapsed_time": "0:27:45", "remaining_time": "3:56:06", "throughput": 8698.75, "total_tokens": 14491264} +{"current_steps": 21540, "total_steps": 204665, "loss": 0.1217, "lr": 1.9998328611531783e-06, "epoch": 0.5262257835975863, "percentage": 10.52, "elapsed_time": "0:27:46", "remaining_time": "3:56:05", "throughput": 8699.03, "total_tokens": 14494784} +{"current_steps": 21545, "total_steps": 204665, "loss": 0.2129, "lr": 1.9998312984300527e-06, "epoch": 0.5263479344294335, "percentage": 10.53, "elapsed_time": "0:27:46", "remaining_time": "3:56:05", "throughput": 8699.1, "total_tokens": 14497856} +{"current_steps": 21550, "total_steps": 204665, "loss": 0.1674, "lr": 1.9998297284358854e-06, "epoch": 0.5264700852612806, "percentage": 10.53, "elapsed_time": "0:27:46", "remaining_time": "3:56:04", "throughput": 8699.41, "total_tokens": 14501440} +{"current_steps": 21555, "total_steps": 204665, "loss": 0.1212, "lr": 1.9998281511706874e-06, "epoch": 0.5265922360931278, "percentage": 10.53, "elapsed_time": "0:27:47", "remaining_time": "3:56:03", "throughput": 8699.62, "total_tokens": 14504832} +{"current_steps": 21560, "total_steps": 204665, "loss": 0.0934, "lr": 1.99982656663447e-06, "epoch": 0.526714386924975, "percentage": 10.53, "elapsed_time": "0:27:47", "remaining_time": "3:56:02", "throughput": 8699.91, "total_tokens": 14508352} +{"current_steps": 21565, "total_steps": 204665, "loss": 0.1684, "lr": 1.999824974827245e-06, "epoch": 0.5268365377568222, "percentage": 10.54, "elapsed_time": "0:27:47", "remaining_time": "3:56:02", "throughput": 8700.05, "total_tokens": 14511552} +{"current_steps": 21570, "total_steps": 204665, "loss": 0.1697, "lr": 1.9998233757490237e-06, "epoch": 0.5269586885886693, "percentage": 10.54, "elapsed_time": "0:27:48", "remaining_time": "3:56:01", "throughput": 8700.17, "total_tokens": 14514752} +{"current_steps": 21575, "total_steps": 204665, "loss": 0.0283, "lr": 1.9998217693998177e-06, "epoch": 0.5270808394205164, "percentage": 10.54, "elapsed_time": "0:27:48", "remaining_time": "3:56:00", "throughput": 8700.23, "total_tokens": 14517824} +{"current_steps": 21580, "total_steps": 204665, "loss": 0.0999, "lr": 1.9998201557796395e-06, "epoch": 0.5272029902523636, "percentage": 10.54, "elapsed_time": "0:27:49", "remaining_time": "3:56:00", "throughput": 8700.41, "total_tokens": 14521216} +{"current_steps": 21585, "total_steps": 204665, "loss": 0.0718, "lr": 1.9998185348885e-06, "epoch": 0.5273251410842108, "percentage": 10.55, "elapsed_time": "0:27:49", "remaining_time": "3:55:59", "throughput": 8700.72, "total_tokens": 14524800} +{"current_steps": 21590, "total_steps": 204665, "loss": 0.0716, "lr": 1.999816906726411e-06, "epoch": 0.527447291916058, "percentage": 10.55, "elapsed_time": "0:27:49", "remaining_time": "3:55:58", "throughput": 8701.13, "total_tokens": 14528640} +{"current_steps": 21595, "total_steps": 204665, "loss": 0.0889, "lr": 1.9998152712933846e-06, "epoch": 0.5275694427479051, "percentage": 10.55, "elapsed_time": "0:27:50", "remaining_time": "3:55:58", "throughput": 8701.3, "total_tokens": 14531968} +{"current_steps": 21600, "total_steps": 204665, "loss": 0.0779, "lr": 1.9998136285894326e-06, "epoch": 0.5276915935797523, "percentage": 10.55, "elapsed_time": "0:27:50", "remaining_time": "3:55:57", "throughput": 8701.47, "total_tokens": 14535296} +{"current_steps": 21605, "total_steps": 204665, "loss": 0.1281, "lr": 1.999811978614567e-06, "epoch": 0.5278137444115995, "percentage": 10.56, "elapsed_time": "0:27:50", "remaining_time": "3:55:56", "throughput": 8701.79, "total_tokens": 14538944} +{"current_steps": 21610, "total_steps": 204665, "loss": 0.1101, "lr": 1.9998103213687994e-06, "epoch": 0.5279358952434466, "percentage": 10.56, "elapsed_time": "0:27:51", "remaining_time": "3:55:56", "throughput": 8702.05, "total_tokens": 14542464} +{"current_steps": 21615, "total_steps": 204665, "loss": 0.1037, "lr": 1.9998086568521426e-06, "epoch": 0.5280580460752937, "percentage": 10.56, "elapsed_time": "0:27:51", "remaining_time": "3:55:55", "throughput": 8702.16, "total_tokens": 14545664} +{"current_steps": 21620, "total_steps": 204665, "loss": 0.2195, "lr": 1.9998069850646084e-06, "epoch": 0.5281801969071409, "percentage": 10.56, "elapsed_time": "0:27:51", "remaining_time": "3:55:54", "throughput": 8702.39, "total_tokens": 14549120} +{"current_steps": 21625, "total_steps": 204665, "loss": 0.1886, "lr": 1.999805306006209e-06, "epoch": 0.5283023477389881, "percentage": 10.57, "elapsed_time": "0:27:52", "remaining_time": "3:55:53", "throughput": 8702.51, "total_tokens": 14552320} +{"current_steps": 21630, "total_steps": 204665, "loss": 0.1231, "lr": 1.9998036196769564e-06, "epoch": 0.5284244985708353, "percentage": 10.57, "elapsed_time": "0:27:52", "remaining_time": "3:55:53", "throughput": 8702.63, "total_tokens": 14555520} +{"current_steps": 21635, "total_steps": 204665, "loss": 0.0935, "lr": 1.9998019260768626e-06, "epoch": 0.5285466494026825, "percentage": 10.57, "elapsed_time": "0:27:52", "remaining_time": "3:55:52", "throughput": 8702.83, "total_tokens": 14558848} +{"current_steps": 21640, "total_steps": 204665, "loss": 0.247, "lr": 1.9998002252059406e-06, "epoch": 0.5286688002345296, "percentage": 10.57, "elapsed_time": "0:27:53", "remaining_time": "3:55:51", "throughput": 8703.0, "total_tokens": 14562176} +{"current_steps": 21645, "total_steps": 204665, "loss": 0.1526, "lr": 1.9997985170642025e-06, "epoch": 0.5287909510663767, "percentage": 10.58, "elapsed_time": "0:27:53", "remaining_time": "3:55:51", "throughput": 8703.31, "total_tokens": 14565760} +{"current_steps": 21650, "total_steps": 204665, "loss": 0.067, "lr": 1.9997968016516606e-06, "epoch": 0.5289131018982239, "percentage": 10.58, "elapsed_time": "0:27:53", "remaining_time": "3:55:50", "throughput": 8703.65, "total_tokens": 14569408} +{"current_steps": 21655, "total_steps": 204665, "loss": 0.0626, "lr": 1.9997950789683277e-06, "epoch": 0.5290352527300711, "percentage": 10.58, "elapsed_time": "0:27:54", "remaining_time": "3:55:49", "throughput": 8704.2, "total_tokens": 14573504} +{"current_steps": 21660, "total_steps": 204665, "loss": 0.0374, "lr": 1.9997933490142156e-06, "epoch": 0.5291574035619182, "percentage": 10.58, "elapsed_time": "0:27:54", "remaining_time": "3:55:49", "throughput": 8704.31, "total_tokens": 14576704} +{"current_steps": 21665, "total_steps": 204665, "loss": 0.084, "lr": 1.9997916117893374e-06, "epoch": 0.5292795543937654, "percentage": 10.59, "elapsed_time": "0:27:55", "remaining_time": "3:55:48", "throughput": 8704.89, "total_tokens": 14580864} +{"current_steps": 21670, "total_steps": 204665, "loss": 0.0969, "lr": 1.999789867293706e-06, "epoch": 0.5294017052256126, "percentage": 10.59, "elapsed_time": "0:27:55", "remaining_time": "3:55:47", "throughput": 8705.14, "total_tokens": 14584320} +{"current_steps": 21675, "total_steps": 204665, "loss": 0.1203, "lr": 1.9997881155273336e-06, "epoch": 0.5295238560574598, "percentage": 10.59, "elapsed_time": "0:27:55", "remaining_time": "3:55:47", "throughput": 8705.19, "total_tokens": 14587328} +{"current_steps": 21680, "total_steps": 204665, "loss": 0.188, "lr": 1.999786356490233e-06, "epoch": 0.529646006889307, "percentage": 10.59, "elapsed_time": "0:27:56", "remaining_time": "3:55:46", "throughput": 8705.4, "total_tokens": 14590720} +{"current_steps": 21685, "total_steps": 204665, "loss": 0.1405, "lr": 1.999784590182417e-06, "epoch": 0.529768157721154, "percentage": 10.6, "elapsed_time": "0:27:56", "remaining_time": "3:55:45", "throughput": 8705.43, "total_tokens": 14593728} +{"current_steps": 21690, "total_steps": 204665, "loss": 0.1511, "lr": 1.999782816603899e-06, "epoch": 0.5298903085530012, "percentage": 10.6, "elapsed_time": "0:27:56", "remaining_time": "3:55:44", "throughput": 8705.8, "total_tokens": 14597440} +{"current_steps": 21695, "total_steps": 204665, "loss": 0.0933, "lr": 1.9997810357546913e-06, "epoch": 0.5300124593848484, "percentage": 10.6, "elapsed_time": "0:27:57", "remaining_time": "3:55:44", "throughput": 8705.95, "total_tokens": 14600704} +{"current_steps": 21700, "total_steps": 204665, "loss": 0.0149, "lr": 1.999779247634807e-06, "epoch": 0.5301346102166956, "percentage": 10.6, "elapsed_time": "0:27:57", "remaining_time": "3:55:43", "throughput": 8706.11, "total_tokens": 14603968} +{"current_steps": 21705, "total_steps": 204665, "loss": 0.0334, "lr": 1.9997774522442587e-06, "epoch": 0.5302567610485427, "percentage": 10.61, "elapsed_time": "0:27:57", "remaining_time": "3:55:42", "throughput": 8706.4, "total_tokens": 14607488} +{"current_steps": 21710, "total_steps": 204665, "loss": 0.1878, "lr": 1.9997756495830606e-06, "epoch": 0.5303789118803899, "percentage": 10.61, "elapsed_time": "0:27:58", "remaining_time": "3:55:42", "throughput": 8706.62, "total_tokens": 14610880} +{"current_steps": 21715, "total_steps": 204665, "loss": 0.2545, "lr": 1.9997738396512243e-06, "epoch": 0.5305010627122371, "percentage": 10.61, "elapsed_time": "0:27:58", "remaining_time": "3:55:41", "throughput": 8706.85, "total_tokens": 14614336} +{"current_steps": 21720, "total_steps": 204665, "loss": 0.1535, "lr": 1.9997720224487642e-06, "epoch": 0.5306232135440843, "percentage": 10.61, "elapsed_time": "0:27:58", "remaining_time": "3:55:40", "throughput": 8706.99, "total_tokens": 14617600} +{"current_steps": 21725, "total_steps": 204665, "loss": 0.1522, "lr": 1.999770197975693e-06, "epoch": 0.5307453643759314, "percentage": 10.61, "elapsed_time": "0:27:59", "remaining_time": "3:55:39", "throughput": 8706.91, "total_tokens": 14620480} +{"current_steps": 21730, "total_steps": 204665, "loss": 0.0956, "lr": 1.999768366232024e-06, "epoch": 0.5308675152077785, "percentage": 10.62, "elapsed_time": "0:27:59", "remaining_time": "3:55:39", "throughput": 8707.07, "total_tokens": 14623808} +{"current_steps": 21735, "total_steps": 204665, "loss": 0.1534, "lr": 1.9997665272177706e-06, "epoch": 0.5309896660396257, "percentage": 10.62, "elapsed_time": "0:27:59", "remaining_time": "3:55:38", "throughput": 8707.15, "total_tokens": 14626944} +{"current_steps": 21740, "total_steps": 204665, "loss": 0.1231, "lr": 1.999764680932946e-06, "epoch": 0.5311118168714729, "percentage": 10.62, "elapsed_time": "0:28:00", "remaining_time": "3:55:37", "throughput": 8707.12, "total_tokens": 14629824} +{"current_steps": 21745, "total_steps": 204665, "loss": 0.1844, "lr": 1.9997628273775635e-06, "epoch": 0.5312339677033201, "percentage": 10.62, "elapsed_time": "0:28:00", "remaining_time": "3:55:37", "throughput": 8707.49, "total_tokens": 14633536} +{"current_steps": 21750, "total_steps": 204665, "loss": 0.1802, "lr": 1.999760966551637e-06, "epoch": 0.5313561185351672, "percentage": 10.63, "elapsed_time": "0:28:00", "remaining_time": "3:55:36", "throughput": 8707.5, "total_tokens": 14636480} +{"current_steps": 21755, "total_steps": 204665, "loss": 0.1074, "lr": 1.99975909845518e-06, "epoch": 0.5314782693670144, "percentage": 10.63, "elapsed_time": "0:28:01", "remaining_time": "3:55:35", "throughput": 8707.76, "total_tokens": 14640000} +{"current_steps": 21760, "total_steps": 204665, "loss": 0.0523, "lr": 1.999757223088206e-06, "epoch": 0.5316004201988616, "percentage": 10.63, "elapsed_time": "0:28:01", "remaining_time": "3:55:34", "throughput": 8708.06, "total_tokens": 14643520} +{"current_steps": 21765, "total_steps": 204665, "loss": 0.1024, "lr": 1.9997553404507284e-06, "epoch": 0.5317225710307087, "percentage": 10.63, "elapsed_time": "0:28:01", "remaining_time": "3:55:34", "throughput": 8708.17, "total_tokens": 14646720} +{"current_steps": 21770, "total_steps": 204665, "loss": 0.0662, "lr": 1.9997534505427607e-06, "epoch": 0.5318447218625559, "percentage": 10.64, "elapsed_time": "0:28:02", "remaining_time": "3:55:33", "throughput": 8708.29, "total_tokens": 14649920} +{"current_steps": 21775, "total_steps": 204665, "loss": 0.1042, "lr": 1.9997515533643176e-06, "epoch": 0.531966872694403, "percentage": 10.64, "elapsed_time": "0:28:02", "remaining_time": "3:55:32", "throughput": 8708.42, "total_tokens": 14653120} +{"current_steps": 21780, "total_steps": 204665, "loss": 0.1285, "lr": 1.999749648915412e-06, "epoch": 0.5320890235262502, "percentage": 10.64, "elapsed_time": "0:28:02", "remaining_time": "3:55:31", "throughput": 8708.73, "total_tokens": 14656704} +{"current_steps": 21785, "total_steps": 204665, "loss": 0.1138, "lr": 1.999747737196058e-06, "epoch": 0.5322111743580974, "percentage": 10.64, "elapsed_time": "0:28:03", "remaining_time": "3:55:31", "throughput": 8708.88, "total_tokens": 14659968} +{"current_steps": 21790, "total_steps": 204665, "loss": 0.1466, "lr": 1.9997458182062695e-06, "epoch": 0.5323333251899446, "percentage": 10.65, "elapsed_time": "0:28:03", "remaining_time": "3:55:30", "throughput": 8709.01, "total_tokens": 14663168} +{"current_steps": 21795, "total_steps": 204665, "loss": 0.1678, "lr": 1.999743891946061e-06, "epoch": 0.5324554760217917, "percentage": 10.65, "elapsed_time": "0:28:04", "remaining_time": "3:55:29", "throughput": 8709.37, "total_tokens": 14666880} +{"current_steps": 21800, "total_steps": 204665, "loss": 0.0876, "lr": 1.999741958415446e-06, "epoch": 0.5325776268536389, "percentage": 10.65, "elapsed_time": "0:28:04", "remaining_time": "3:55:29", "throughput": 8709.42, "total_tokens": 14670016} +{"current_steps": 21805, "total_steps": 204665, "loss": 0.1508, "lr": 1.999740017614438e-06, "epoch": 0.532699777685486, "percentage": 10.65, "elapsed_time": "0:28:04", "remaining_time": "3:55:28", "throughput": 8709.47, "total_tokens": 14673152} +{"current_steps": 21810, "total_steps": 204665, "loss": 0.0931, "lr": 1.999738069543052e-06, "epoch": 0.5328219285173332, "percentage": 10.66, "elapsed_time": "0:28:05", "remaining_time": "3:55:27", "throughput": 8709.78, "total_tokens": 14676800} +{"current_steps": 21815, "total_steps": 204665, "loss": 0.0399, "lr": 1.9997361142013016e-06, "epoch": 0.5329440793491804, "percentage": 10.66, "elapsed_time": "0:28:05", "remaining_time": "3:55:27", "throughput": 8710.04, "total_tokens": 14680320} +{"current_steps": 21820, "total_steps": 204665, "loss": 0.1401, "lr": 1.9997341515892016e-06, "epoch": 0.5330662301810275, "percentage": 10.66, "elapsed_time": "0:28:05", "remaining_time": "3:55:26", "throughput": 8710.02, "total_tokens": 14683264} +{"current_steps": 21825, "total_steps": 204665, "loss": 0.135, "lr": 1.9997321817067662e-06, "epoch": 0.5331883810128747, "percentage": 10.66, "elapsed_time": "0:28:06", "remaining_time": "3:55:25", "throughput": 8710.22, "total_tokens": 14686656} +{"current_steps": 21830, "total_steps": 204665, "loss": 0.1525, "lr": 1.999730204554009e-06, "epoch": 0.5333105318447219, "percentage": 10.67, "elapsed_time": "0:28:06", "remaining_time": "3:55:25", "throughput": 8710.53, "total_tokens": 14690304} +{"current_steps": 21835, "total_steps": 204665, "loss": 0.1182, "lr": 1.999728220130945e-06, "epoch": 0.5334326826765691, "percentage": 10.67, "elapsed_time": "0:28:06", "remaining_time": "3:55:24", "throughput": 8710.69, "total_tokens": 14693632} +{"current_steps": 21840, "total_steps": 204665, "loss": 0.0625, "lr": 1.9997262284375886e-06, "epoch": 0.5335548335084161, "percentage": 10.67, "elapsed_time": "0:28:07", "remaining_time": "3:55:23", "throughput": 8710.76, "total_tokens": 14696768} +{"current_steps": 21845, "total_steps": 204665, "loss": 0.0942, "lr": 1.999724229473954e-06, "epoch": 0.5336769843402633, "percentage": 10.67, "elapsed_time": "0:28:07", "remaining_time": "3:55:22", "throughput": 8710.83, "total_tokens": 14699904} +{"current_steps": 21850, "total_steps": 204665, "loss": 0.1101, "lr": 1.999722223240056e-06, "epoch": 0.5337991351721105, "percentage": 10.68, "elapsed_time": "0:28:07", "remaining_time": "3:55:22", "throughput": 8711.34, "total_tokens": 14703936} +{"current_steps": 21855, "total_steps": 204665, "loss": 0.0913, "lr": 1.999720209735909e-06, "epoch": 0.5339212860039577, "percentage": 10.68, "elapsed_time": "0:28:08", "remaining_time": "3:55:21", "throughput": 8711.51, "total_tokens": 14707264} +{"current_steps": 21860, "total_steps": 204665, "loss": 0.1847, "lr": 1.9997181889615277e-06, "epoch": 0.5340434368358048, "percentage": 10.68, "elapsed_time": "0:28:08", "remaining_time": "3:55:21", "throughput": 8711.74, "total_tokens": 14710720} +{"current_steps": 21865, "total_steps": 204665, "loss": 0.0553, "lr": 1.999716160916927e-06, "epoch": 0.534165587667652, "percentage": 10.68, "elapsed_time": "0:28:08", "remaining_time": "3:55:20", "throughput": 8711.84, "total_tokens": 14713920} +{"current_steps": 21870, "total_steps": 204665, "loss": 0.1577, "lr": 1.9997141256021214e-06, "epoch": 0.5342877384994992, "percentage": 10.69, "elapsed_time": "0:28:09", "remaining_time": "3:55:19", "throughput": 8711.98, "total_tokens": 14717184} +{"current_steps": 21875, "total_steps": 204665, "loss": 0.1194, "lr": 1.999712083017126e-06, "epoch": 0.5344098893313464, "percentage": 10.69, "elapsed_time": "0:28:09", "remaining_time": "3:55:18", "throughput": 8712.14, "total_tokens": 14720512} +{"current_steps": 21880, "total_steps": 204665, "loss": 0.0821, "lr": 1.999710033161955e-06, "epoch": 0.5345320401631936, "percentage": 10.69, "elapsed_time": "0:28:10", "remaining_time": "3:55:18", "throughput": 8712.27, "total_tokens": 14723776} +{"current_steps": 21885, "total_steps": 204665, "loss": 0.0919, "lr": 1.9997079760366242e-06, "epoch": 0.5346541909950406, "percentage": 10.69, "elapsed_time": "0:28:10", "remaining_time": "3:55:17", "throughput": 8712.29, "total_tokens": 14726784} +{"current_steps": 21890, "total_steps": 204665, "loss": 0.0458, "lr": 1.999705911641148e-06, "epoch": 0.5347763418268878, "percentage": 10.7, "elapsed_time": "0:28:10", "remaining_time": "3:55:16", "throughput": 8712.58, "total_tokens": 14730368} +{"current_steps": 21895, "total_steps": 204665, "loss": 0.1431, "lr": 1.9997038399755416e-06, "epoch": 0.534898492658735, "percentage": 10.7, "elapsed_time": "0:28:11", "remaining_time": "3:55:16", "throughput": 8712.83, "total_tokens": 14733888} +{"current_steps": 21900, "total_steps": 204665, "loss": 0.1114, "lr": 1.99970176103982e-06, "epoch": 0.5350206434905822, "percentage": 10.7, "elapsed_time": "0:28:11", "remaining_time": "3:55:15", "throughput": 8712.87, "total_tokens": 14736960} +{"current_steps": 21905, "total_steps": 204665, "loss": 0.2132, "lr": 1.999699674833998e-06, "epoch": 0.5351427943224293, "percentage": 10.7, "elapsed_time": "0:28:11", "remaining_time": "3:55:14", "throughput": 8712.98, "total_tokens": 14740160} +{"current_steps": 21910, "total_steps": 204665, "loss": 0.1527, "lr": 1.9996975813580913e-06, "epoch": 0.5352649451542765, "percentage": 10.71, "elapsed_time": "0:28:12", "remaining_time": "3:55:14", "throughput": 8713.26, "total_tokens": 14743744} +{"current_steps": 21915, "total_steps": 204665, "loss": 0.0924, "lr": 1.9996954806121145e-06, "epoch": 0.5353870959861237, "percentage": 10.71, "elapsed_time": "0:28:12", "remaining_time": "3:55:13", "throughput": 8713.7, "total_tokens": 14747648} +{"current_steps": 21920, "total_steps": 204665, "loss": 0.2639, "lr": 1.999693372596084e-06, "epoch": 0.5355092468179709, "percentage": 10.71, "elapsed_time": "0:28:12", "remaining_time": "3:55:12", "throughput": 8713.85, "total_tokens": 14750912} +{"current_steps": 21925, "total_steps": 204665, "loss": 0.1055, "lr": 1.999691257310014e-06, "epoch": 0.535631397649818, "percentage": 10.71, "elapsed_time": "0:28:13", "remaining_time": "3:55:12", "throughput": 8714.32, "total_tokens": 14754880} +{"current_steps": 21930, "total_steps": 204665, "loss": 0.1137, "lr": 1.99968913475392e-06, "epoch": 0.5357535484816651, "percentage": 10.72, "elapsed_time": "0:28:13", "remaining_time": "3:55:11", "throughput": 8714.55, "total_tokens": 14758336} +{"current_steps": 21935, "total_steps": 204665, "loss": 0.1668, "lr": 1.9996870049278183e-06, "epoch": 0.5358756993135123, "percentage": 10.72, "elapsed_time": "0:28:13", "remaining_time": "3:55:10", "throughput": 8714.9, "total_tokens": 14762048} +{"current_steps": 21940, "total_steps": 204665, "loss": 0.1627, "lr": 1.9996848678317236e-06, "epoch": 0.5359978501453595, "percentage": 10.72, "elapsed_time": "0:28:14", "remaining_time": "3:55:10", "throughput": 8715.01, "total_tokens": 14765248} +{"current_steps": 21945, "total_steps": 204665, "loss": 0.2253, "lr": 1.9996827234656515e-06, "epoch": 0.5361200009772067, "percentage": 10.72, "elapsed_time": "0:28:14", "remaining_time": "3:55:09", "throughput": 8715.04, "total_tokens": 14768320} +{"current_steps": 21950, "total_steps": 204665, "loss": 0.1673, "lr": 1.999680571829618e-06, "epoch": 0.5362421518090538, "percentage": 10.72, "elapsed_time": "0:28:14", "remaining_time": "3:55:08", "throughput": 8715.31, "total_tokens": 14771840} +{"current_steps": 21955, "total_steps": 204665, "loss": 0.0802, "lr": 1.9996784129236383e-06, "epoch": 0.536364302640901, "percentage": 10.73, "elapsed_time": "0:28:15", "remaining_time": "3:55:08", "throughput": 8715.76, "total_tokens": 14775744} +{"current_steps": 21960, "total_steps": 204665, "loss": 0.0289, "lr": 1.999676246747728e-06, "epoch": 0.5364864534727481, "percentage": 10.73, "elapsed_time": "0:28:15", "remaining_time": "3:55:07", "throughput": 8716.27, "total_tokens": 14779776} +{"current_steps": 21965, "total_steps": 204665, "loss": 0.2489, "lr": 1.9996740733019037e-06, "epoch": 0.5366086043045953, "percentage": 10.73, "elapsed_time": "0:28:16", "remaining_time": "3:55:06", "throughput": 8716.47, "total_tokens": 14783168} +{"current_steps": 21970, "total_steps": 204665, "loss": 0.0224, "lr": 1.9996718925861805e-06, "epoch": 0.5367307551364425, "percentage": 10.73, "elapsed_time": "0:28:16", "remaining_time": "3:55:06", "throughput": 8716.53, "total_tokens": 14786240} +{"current_steps": 21975, "total_steps": 204665, "loss": 0.1144, "lr": 1.9996697046005746e-06, "epoch": 0.5368529059682896, "percentage": 10.74, "elapsed_time": "0:28:16", "remaining_time": "3:55:05", "throughput": 8716.56, "total_tokens": 14789248} +{"current_steps": 21980, "total_steps": 204665, "loss": 0.0216, "lr": 1.9996675093451014e-06, "epoch": 0.5369750568001368, "percentage": 10.74, "elapsed_time": "0:28:17", "remaining_time": "3:55:04", "throughput": 8716.53, "total_tokens": 14792128} +{"current_steps": 21985, "total_steps": 204665, "loss": 0.0429, "lr": 1.9996653068197774e-06, "epoch": 0.537097207631984, "percentage": 10.74, "elapsed_time": "0:28:17", "remaining_time": "3:55:03", "throughput": 8716.58, "total_tokens": 14795200} +{"current_steps": 21990, "total_steps": 204665, "loss": 0.1353, "lr": 1.999663097024618e-06, "epoch": 0.5372193584638312, "percentage": 10.74, "elapsed_time": "0:28:17", "remaining_time": "3:55:03", "throughput": 8716.83, "total_tokens": 14798656} +{"current_steps": 21995, "total_steps": 204665, "loss": 0.0957, "lr": 1.9996608799596402e-06, "epoch": 0.5373415092956783, "percentage": 10.75, "elapsed_time": "0:28:18", "remaining_time": "3:55:02", "throughput": 8717.05, "total_tokens": 14802112} +{"current_steps": 22000, "total_steps": 204665, "loss": 0.1855, "lr": 1.9996586556248593e-06, "epoch": 0.5374636601275254, "percentage": 10.75, "elapsed_time": "0:28:18", "remaining_time": "3:55:01", "throughput": 8717.29, "total_tokens": 14805568} +{"current_steps": 22005, "total_steps": 204665, "loss": 0.1993, "lr": 1.999656424020292e-06, "epoch": 0.5375858109593726, "percentage": 10.75, "elapsed_time": "0:28:18", "remaining_time": "3:55:01", "throughput": 8717.65, "total_tokens": 14809280} +{"current_steps": 22010, "total_steps": 204665, "loss": 0.3984, "lr": 1.999654185145954e-06, "epoch": 0.5377079617912198, "percentage": 10.75, "elapsed_time": "0:28:19", "remaining_time": "3:55:00", "throughput": 8717.68, "total_tokens": 14812288} +{"current_steps": 22015, "total_steps": 204665, "loss": 0.1776, "lr": 1.9996519390018626e-06, "epoch": 0.537830112623067, "percentage": 10.76, "elapsed_time": "0:28:19", "remaining_time": "3:54:59", "throughput": 8717.71, "total_tokens": 14815296} +{"current_steps": 22020, "total_steps": 204665, "loss": 0.1282, "lr": 1.9996496855880327e-06, "epoch": 0.5379522634549141, "percentage": 10.76, "elapsed_time": "0:28:19", "remaining_time": "3:54:58", "throughput": 8717.75, "total_tokens": 14818368} +{"current_steps": 22025, "total_steps": 204665, "loss": 0.0895, "lr": 1.9996474249044816e-06, "epoch": 0.5380744142867613, "percentage": 10.76, "elapsed_time": "0:28:20", "remaining_time": "3:54:58", "throughput": 8718.02, "total_tokens": 14821888} +{"current_steps": 22030, "total_steps": 204665, "loss": 0.0929, "lr": 1.999645156951226e-06, "epoch": 0.5381965651186085, "percentage": 10.76, "elapsed_time": "0:28:20", "remaining_time": "3:54:57", "throughput": 8718.09, "total_tokens": 14824960} +{"current_steps": 22035, "total_steps": 204665, "loss": 0.0822, "lr": 1.999642881728281e-06, "epoch": 0.5383187159504557, "percentage": 10.77, "elapsed_time": "0:28:20", "remaining_time": "3:54:56", "throughput": 8718.44, "total_tokens": 14828672} +{"current_steps": 22040, "total_steps": 204665, "loss": 0.129, "lr": 1.9996405992356648e-06, "epoch": 0.5384408667823027, "percentage": 10.77, "elapsed_time": "0:28:21", "remaining_time": "3:54:56", "throughput": 8718.7, "total_tokens": 14832192} +{"current_steps": 22045, "total_steps": 204665, "loss": 0.1256, "lr": 1.999638309473393e-06, "epoch": 0.5385630176141499, "percentage": 10.77, "elapsed_time": "0:28:21", "remaining_time": "3:54:55", "throughput": 8719.0, "total_tokens": 14835776} +{"current_steps": 22050, "total_steps": 204665, "loss": 0.0424, "lr": 1.999636012441483e-06, "epoch": 0.5386851684459971, "percentage": 10.77, "elapsed_time": "0:28:21", "remaining_time": "3:54:54", "throughput": 8719.26, "total_tokens": 14839296} +{"current_steps": 22055, "total_steps": 204665, "loss": 0.0114, "lr": 1.9996337081399508e-06, "epoch": 0.5388073192778443, "percentage": 10.78, "elapsed_time": "0:28:22", "remaining_time": "3:54:54", "throughput": 8719.37, "total_tokens": 14842496} +{"current_steps": 22060, "total_steps": 204665, "loss": 0.2205, "lr": 1.9996313965688134e-06, "epoch": 0.5389294701096915, "percentage": 10.78, "elapsed_time": "0:28:22", "remaining_time": "3:54:53", "throughput": 8719.53, "total_tokens": 14845760} +{"current_steps": 22065, "total_steps": 204665, "loss": 0.0879, "lr": 1.9996290777280873e-06, "epoch": 0.5390516209415386, "percentage": 10.78, "elapsed_time": "0:28:22", "remaining_time": "3:54:52", "throughput": 8719.69, "total_tokens": 14849088} +{"current_steps": 22070, "total_steps": 204665, "loss": 0.0739, "lr": 1.99962675161779e-06, "epoch": 0.5391737717733858, "percentage": 10.78, "elapsed_time": "0:28:23", "remaining_time": "3:54:52", "throughput": 8719.88, "total_tokens": 14852416} +{"current_steps": 22075, "total_steps": 204665, "loss": 0.2628, "lr": 1.9996244182379376e-06, "epoch": 0.539295922605233, "percentage": 10.79, "elapsed_time": "0:28:23", "remaining_time": "3:54:51", "throughput": 8720.18, "total_tokens": 14856000} +{"current_steps": 22080, "total_steps": 204665, "loss": 0.1335, "lr": 1.9996220775885484e-06, "epoch": 0.5394180734370801, "percentage": 10.79, "elapsed_time": "0:28:23", "remaining_time": "3:54:50", "throughput": 8720.3, "total_tokens": 14859200} +{"current_steps": 22085, "total_steps": 204665, "loss": 0.0093, "lr": 1.999619729669638e-06, "epoch": 0.5395402242689272, "percentage": 10.79, "elapsed_time": "0:28:24", "remaining_time": "3:54:49", "throughput": 8720.46, "total_tokens": 14862464} +{"current_steps": 22090, "total_steps": 204665, "loss": 0.0814, "lr": 1.999617374481224e-06, "epoch": 0.5396623751007744, "percentage": 10.79, "elapsed_time": "0:28:24", "remaining_time": "3:54:49", "throughput": 8720.59, "total_tokens": 14865728} +{"current_steps": 22095, "total_steps": 204665, "loss": 0.1876, "lr": 1.999615012023324e-06, "epoch": 0.5397845259326216, "percentage": 10.8, "elapsed_time": "0:28:25", "remaining_time": "3:54:48", "throughput": 8720.78, "total_tokens": 14869056} +{"current_steps": 22100, "total_steps": 204665, "loss": 0.238, "lr": 1.9996126422959544e-06, "epoch": 0.5399066767644688, "percentage": 10.8, "elapsed_time": "0:28:25", "remaining_time": "3:54:47", "throughput": 8721.11, "total_tokens": 14872704} +{"current_steps": 22105, "total_steps": 204665, "loss": 0.1577, "lr": 1.9996102652991332e-06, "epoch": 0.540028827596316, "percentage": 10.8, "elapsed_time": "0:28:25", "remaining_time": "3:54:47", "throughput": 8721.29, "total_tokens": 14876032} +{"current_steps": 22110, "total_steps": 204665, "loss": 0.1087, "lr": 1.9996078810328767e-06, "epoch": 0.5401509784281631, "percentage": 10.8, "elapsed_time": "0:28:26", "remaining_time": "3:54:46", "throughput": 8721.97, "total_tokens": 14880448} +{"current_steps": 22115, "total_steps": 204665, "loss": 0.156, "lr": 1.9996054894972035e-06, "epoch": 0.5402731292600103, "percentage": 10.81, "elapsed_time": "0:28:26", "remaining_time": "3:54:45", "throughput": 8722.06, "total_tokens": 14883584} +{"current_steps": 22120, "total_steps": 204665, "loss": 0.1056, "lr": 1.99960309069213e-06, "epoch": 0.5403952800918574, "percentage": 10.81, "elapsed_time": "0:28:26", "remaining_time": "3:54:45", "throughput": 8723.16, "total_tokens": 14888960} +{"current_steps": 22125, "total_steps": 204665, "loss": 0.1507, "lr": 1.999600684617674e-06, "epoch": 0.5405174309237046, "percentage": 10.81, "elapsed_time": "0:28:27", "remaining_time": "3:54:44", "throughput": 8723.46, "total_tokens": 14892544} +{"current_steps": 22130, "total_steps": 204665, "loss": 0.1527, "lr": 1.999598271273853e-06, "epoch": 0.5406395817555517, "percentage": 10.81, "elapsed_time": "0:28:27", "remaining_time": "3:54:44", "throughput": 8723.72, "total_tokens": 14896064} +{"current_steps": 22135, "total_steps": 204665, "loss": 0.1882, "lr": 1.9995958506606843e-06, "epoch": 0.5407617325873989, "percentage": 10.82, "elapsed_time": "0:28:27", "remaining_time": "3:54:43", "throughput": 8723.74, "total_tokens": 14899072} +{"current_steps": 22140, "total_steps": 204665, "loss": 0.0974, "lr": 1.999593422778186e-06, "epoch": 0.5408838834192461, "percentage": 10.82, "elapsed_time": "0:28:28", "remaining_time": "3:54:42", "throughput": 8723.97, "total_tokens": 14902528} +{"current_steps": 22145, "total_steps": 204665, "loss": 0.1271, "lr": 1.9995909876263753e-06, "epoch": 0.5410060342510933, "percentage": 10.82, "elapsed_time": "0:28:28", "remaining_time": "3:54:42", "throughput": 8724.12, "total_tokens": 14905792} +{"current_steps": 22150, "total_steps": 204665, "loss": 0.1134, "lr": 1.99958854520527e-06, "epoch": 0.5411281850829404, "percentage": 10.82, "elapsed_time": "0:28:28", "remaining_time": "3:54:41", "throughput": 8724.34, "total_tokens": 14909184} +{"current_steps": 22155, "total_steps": 204665, "loss": 0.097, "lr": 1.9995860955148884e-06, "epoch": 0.5412503359147876, "percentage": 10.83, "elapsed_time": "0:28:29", "remaining_time": "3:54:40", "throughput": 8724.5, "total_tokens": 14912512} +{"current_steps": 22160, "total_steps": 204665, "loss": 0.0944, "lr": 1.999583638555247e-06, "epoch": 0.5413724867466347, "percentage": 10.83, "elapsed_time": "0:28:29", "remaining_time": "3:54:40", "throughput": 8724.61, "total_tokens": 14915712} +{"current_steps": 22165, "total_steps": 204665, "loss": 0.0748, "lr": 1.999581174326365e-06, "epoch": 0.5414946375784819, "percentage": 10.83, "elapsed_time": "0:28:29", "remaining_time": "3:54:39", "throughput": 8724.84, "total_tokens": 14919168} +{"current_steps": 22170, "total_steps": 204665, "loss": 0.1841, "lr": 1.99957870282826e-06, "epoch": 0.5416167884103291, "percentage": 10.83, "elapsed_time": "0:28:30", "remaining_time": "3:54:38", "throughput": 8725.29, "total_tokens": 14923072} +{"current_steps": 22175, "total_steps": 204665, "loss": 0.1218, "lr": 1.99957622406095e-06, "epoch": 0.5417389392421762, "percentage": 10.83, "elapsed_time": "0:28:30", "remaining_time": "3:54:38", "throughput": 8725.62, "total_tokens": 14926720} +{"current_steps": 22180, "total_steps": 204665, "loss": 0.0952, "lr": 1.9995737380244523e-06, "epoch": 0.5418610900740234, "percentage": 10.84, "elapsed_time": "0:28:31", "remaining_time": "3:54:37", "throughput": 8725.72, "total_tokens": 14929856} +{"current_steps": 22185, "total_steps": 204665, "loss": 0.1337, "lr": 1.999571244718786e-06, "epoch": 0.5419832409058706, "percentage": 10.84, "elapsed_time": "0:28:31", "remaining_time": "3:54:36", "throughput": 8725.83, "total_tokens": 14933056} +{"current_steps": 22190, "total_steps": 204665, "loss": 0.104, "lr": 1.9995687441439685e-06, "epoch": 0.5421053917377178, "percentage": 10.84, "elapsed_time": "0:28:31", "remaining_time": "3:54:35", "throughput": 8725.98, "total_tokens": 14936320} +{"current_steps": 22195, "total_steps": 204665, "loss": 0.0855, "lr": 1.9995662363000184e-06, "epoch": 0.5422275425695648, "percentage": 10.84, "elapsed_time": "0:28:32", "remaining_time": "3:54:35", "throughput": 8725.97, "total_tokens": 14939264} +{"current_steps": 22200, "total_steps": 204665, "loss": 0.1127, "lr": 1.999563721186953e-06, "epoch": 0.542349693401412, "percentage": 10.85, "elapsed_time": "0:28:32", "remaining_time": "3:54:34", "throughput": 8726.33, "total_tokens": 14942976} +{"current_steps": 22205, "total_steps": 204665, "loss": 0.0067, "lr": 1.9995611988047926e-06, "epoch": 0.5424718442332592, "percentage": 10.85, "elapsed_time": "0:28:32", "remaining_time": "3:54:33", "throughput": 8726.51, "total_tokens": 14946304} +{"current_steps": 22210, "total_steps": 204665, "loss": 0.1645, "lr": 1.9995586691535537e-06, "epoch": 0.5425939950651064, "percentage": 10.85, "elapsed_time": "0:28:33", "remaining_time": "3:54:33", "throughput": 8727.01, "total_tokens": 14950336} +{"current_steps": 22215, "total_steps": 204665, "loss": 0.0654, "lr": 1.999556132233255e-06, "epoch": 0.5427161458969536, "percentage": 10.85, "elapsed_time": "0:28:33", "remaining_time": "3:54:32", "throughput": 8727.15, "total_tokens": 14953600} +{"current_steps": 22220, "total_steps": 204665, "loss": 0.1185, "lr": 1.9995535880439158e-06, "epoch": 0.5428382967288007, "percentage": 10.86, "elapsed_time": "0:28:33", "remaining_time": "3:54:31", "throughput": 8727.24, "total_tokens": 14956736} +{"current_steps": 22225, "total_steps": 204665, "loss": 0.0389, "lr": 1.999551036585554e-06, "epoch": 0.5429604475606479, "percentage": 10.86, "elapsed_time": "0:28:34", "remaining_time": "3:54:30", "throughput": 8727.3, "total_tokens": 14959808} +{"current_steps": 22230, "total_steps": 204665, "loss": 0.1265, "lr": 1.999548477858188e-06, "epoch": 0.5430825983924951, "percentage": 10.86, "elapsed_time": "0:28:34", "remaining_time": "3:54:30", "throughput": 8727.43, "total_tokens": 14963008} +{"current_steps": 22235, "total_steps": 204665, "loss": 0.08, "lr": 1.9995459118618364e-06, "epoch": 0.5432047492243423, "percentage": 10.86, "elapsed_time": "0:28:34", "remaining_time": "3:54:29", "throughput": 8727.43, "total_tokens": 14965952} +{"current_steps": 22240, "total_steps": 204665, "loss": 0.2909, "lr": 1.9995433385965187e-06, "epoch": 0.5433269000561893, "percentage": 10.87, "elapsed_time": "0:28:35", "remaining_time": "3:54:28", "throughput": 8727.57, "total_tokens": 14969216} +{"current_steps": 22245, "total_steps": 204665, "loss": 0.2292, "lr": 1.9995407580622526e-06, "epoch": 0.5434490508880365, "percentage": 10.87, "elapsed_time": "0:28:35", "remaining_time": "3:54:28", "throughput": 8727.89, "total_tokens": 14972864} +{"current_steps": 22250, "total_steps": 204665, "loss": 0.0804, "lr": 1.9995381702590572e-06, "epoch": 0.5435712017198837, "percentage": 10.87, "elapsed_time": "0:28:35", "remaining_time": "3:54:27", "throughput": 8728.07, "total_tokens": 14976192} +{"current_steps": 22255, "total_steps": 204665, "loss": 0.2389, "lr": 1.9995355751869517e-06, "epoch": 0.5436933525517309, "percentage": 10.87, "elapsed_time": "0:28:36", "remaining_time": "3:54:26", "throughput": 8728.24, "total_tokens": 14979520} +{"current_steps": 22260, "total_steps": 204665, "loss": 0.1369, "lr": 1.9995329728459545e-06, "epoch": 0.5438155033835781, "percentage": 10.88, "elapsed_time": "0:28:36", "remaining_time": "3:54:25", "throughput": 8728.3, "total_tokens": 14982592} +{"current_steps": 22265, "total_steps": 204665, "loss": 0.113, "lr": 1.999530363236085e-06, "epoch": 0.5439376542154252, "percentage": 10.88, "elapsed_time": "0:28:36", "remaining_time": "3:54:25", "throughput": 8728.63, "total_tokens": 14986240} +{"current_steps": 22270, "total_steps": 204665, "loss": 0.0672, "lr": 1.9995277463573612e-06, "epoch": 0.5440598050472724, "percentage": 10.88, "elapsed_time": "0:28:37", "remaining_time": "3:54:24", "throughput": 8728.82, "total_tokens": 14989568} +{"current_steps": 22275, "total_steps": 204665, "loss": 0.0729, "lr": 1.999525122209803e-06, "epoch": 0.5441819558791195, "percentage": 10.88, "elapsed_time": "0:28:37", "remaining_time": "3:54:23", "throughput": 8729.18, "total_tokens": 14993280} +{"current_steps": 22280, "total_steps": 204665, "loss": 0.07, "lr": 1.9995224907934295e-06, "epoch": 0.5443041067109667, "percentage": 10.89, "elapsed_time": "0:28:37", "remaining_time": "3:54:23", "throughput": 8729.73, "total_tokens": 14997440} +{"current_steps": 22285, "total_steps": 204665, "loss": 0.0847, "lr": 1.9995198521082594e-06, "epoch": 0.5444262575428138, "percentage": 10.89, "elapsed_time": "0:28:38", "remaining_time": "3:54:22", "throughput": 8729.99, "total_tokens": 15000960} +{"current_steps": 22290, "total_steps": 204665, "loss": 0.0509, "lr": 1.999517206154312e-06, "epoch": 0.544548408374661, "percentage": 10.89, "elapsed_time": "0:28:38", "remaining_time": "3:54:21", "throughput": 8730.08, "total_tokens": 15004096} +{"current_steps": 22295, "total_steps": 204665, "loss": 0.1495, "lr": 1.999514552931607e-06, "epoch": 0.5446705592065082, "percentage": 10.89, "elapsed_time": "0:28:39", "remaining_time": "3:54:21", "throughput": 8730.37, "total_tokens": 15007680} +{"current_steps": 22300, "total_steps": 204665, "loss": 0.151, "lr": 1.9995118924401632e-06, "epoch": 0.5447927100383554, "percentage": 10.9, "elapsed_time": "0:28:39", "remaining_time": "3:54:20", "throughput": 8730.57, "total_tokens": 15011072} +{"current_steps": 22305, "total_steps": 204665, "loss": 0.1137, "lr": 1.99950922468e-06, "epoch": 0.5449148608702026, "percentage": 10.9, "elapsed_time": "0:28:39", "remaining_time": "3:54:19", "throughput": 8730.65, "total_tokens": 15014208} +{"current_steps": 22310, "total_steps": 204665, "loss": 0.0952, "lr": 1.9995065496511367e-06, "epoch": 0.5450370117020497, "percentage": 10.9, "elapsed_time": "0:28:40", "remaining_time": "3:54:19", "throughput": 8730.73, "total_tokens": 15017344} +{"current_steps": 22315, "total_steps": 204665, "loss": 0.1833, "lr": 1.9995038673535933e-06, "epoch": 0.5451591625338968, "percentage": 10.9, "elapsed_time": "0:28:40", "remaining_time": "3:54:18", "throughput": 8731.04, "total_tokens": 15020992} +{"current_steps": 22320, "total_steps": 204665, "loss": 0.0792, "lr": 1.9995011777873887e-06, "epoch": 0.545281313365744, "percentage": 10.91, "elapsed_time": "0:28:40", "remaining_time": "3:54:17", "throughput": 8731.06, "total_tokens": 15024000} +{"current_steps": 22325, "total_steps": 204665, "loss": 0.1104, "lr": 1.999498480952543e-06, "epoch": 0.5454034641975912, "percentage": 10.91, "elapsed_time": "0:28:41", "remaining_time": "3:54:17", "throughput": 8731.48, "total_tokens": 15027840} +{"current_steps": 22330, "total_steps": 204665, "loss": 0.2498, "lr": 1.999495776849075e-06, "epoch": 0.5455256150294383, "percentage": 10.91, "elapsed_time": "0:28:41", "remaining_time": "3:54:16", "throughput": 8731.61, "total_tokens": 15031104} +{"current_steps": 22335, "total_steps": 204665, "loss": 0.1117, "lr": 1.999493065477005e-06, "epoch": 0.5456477658612855, "percentage": 10.91, "elapsed_time": "0:28:41", "remaining_time": "3:54:15", "throughput": 8731.8, "total_tokens": 15034432} +{"current_steps": 22340, "total_steps": 204665, "loss": 0.0532, "lr": 1.999490346836353e-06, "epoch": 0.5457699166931327, "percentage": 10.92, "elapsed_time": "0:28:42", "remaining_time": "3:54:15", "throughput": 8731.99, "total_tokens": 15037824} +{"current_steps": 22345, "total_steps": 204665, "loss": 0.0565, "lr": 1.999487620927138e-06, "epoch": 0.5458920675249799, "percentage": 10.92, "elapsed_time": "0:28:42", "remaining_time": "3:54:14", "throughput": 8732.07, "total_tokens": 15040960} +{"current_steps": 22350, "total_steps": 204665, "loss": 0.0731, "lr": 1.9994848877493806e-06, "epoch": 0.5460142183568271, "percentage": 10.92, "elapsed_time": "0:28:42", "remaining_time": "3:54:13", "throughput": 8732.08, "total_tokens": 15043904} +{"current_steps": 22355, "total_steps": 204665, "loss": 0.1369, "lr": 1.9994821473031e-06, "epoch": 0.5461363691886741, "percentage": 10.92, "elapsed_time": "0:28:43", "remaining_time": "3:54:12", "throughput": 8732.08, "total_tokens": 15046848} +{"current_steps": 22360, "total_steps": 204665, "loss": 0.1618, "lr": 1.9994793995883165e-06, "epoch": 0.5462585200205213, "percentage": 10.93, "elapsed_time": "0:28:43", "remaining_time": "3:54:12", "throughput": 8732.24, "total_tokens": 15050176} +{"current_steps": 22365, "total_steps": 204665, "loss": 0.1655, "lr": 1.9994766446050497e-06, "epoch": 0.5463806708523685, "percentage": 10.93, "elapsed_time": "0:28:43", "remaining_time": "3:54:11", "throughput": 8732.49, "total_tokens": 15053632} +{"current_steps": 22370, "total_steps": 204665, "loss": 0.0954, "lr": 1.9994738823533203e-06, "epoch": 0.5465028216842157, "percentage": 10.93, "elapsed_time": "0:28:44", "remaining_time": "3:54:10", "throughput": 8732.72, "total_tokens": 15057088} +{"current_steps": 22375, "total_steps": 204665, "loss": 0.0837, "lr": 1.9994711128331474e-06, "epoch": 0.5466249725160628, "percentage": 10.93, "elapsed_time": "0:28:44", "remaining_time": "3:54:10", "throughput": 8732.8, "total_tokens": 15060224} +{"current_steps": 22380, "total_steps": 204665, "loss": 0.1012, "lr": 1.9994683360445522e-06, "epoch": 0.54674712334791, "percentage": 10.93, "elapsed_time": "0:28:44", "remaining_time": "3:54:09", "throughput": 8732.92, "total_tokens": 15063424} +{"current_steps": 22385, "total_steps": 204665, "loss": 0.0936, "lr": 1.9994655519875546e-06, "epoch": 0.5468692741797572, "percentage": 10.94, "elapsed_time": "0:28:45", "remaining_time": "3:54:08", "throughput": 8732.92, "total_tokens": 15066368} +{"current_steps": 22390, "total_steps": 204665, "loss": 0.0797, "lr": 1.9994627606621745e-06, "epoch": 0.5469914250116044, "percentage": 10.94, "elapsed_time": "0:28:45", "remaining_time": "3:54:07", "throughput": 8733.25, "total_tokens": 15070016} +{"current_steps": 22395, "total_steps": 204665, "loss": 0.2556, "lr": 1.999459962068432e-06, "epoch": 0.5471135758434514, "percentage": 10.94, "elapsed_time": "0:28:45", "remaining_time": "3:54:07", "throughput": 8733.33, "total_tokens": 15073152} +{"current_steps": 22400, "total_steps": 204665, "loss": 0.2031, "lr": 1.9994571562063483e-06, "epoch": 0.5472357266752986, "percentage": 10.94, "elapsed_time": "0:28:46", "remaining_time": "3:54:06", "throughput": 8733.51, "total_tokens": 15076480} +{"current_steps": 22405, "total_steps": 204665, "loss": 0.1312, "lr": 1.999454343075943e-06, "epoch": 0.5473578775071458, "percentage": 10.95, "elapsed_time": "0:28:46", "remaining_time": "3:54:05", "throughput": 8733.62, "total_tokens": 15079680} +{"current_steps": 22410, "total_steps": 204665, "loss": 0.089, "lr": 1.9994515226772373e-06, "epoch": 0.547480028338993, "percentage": 10.95, "elapsed_time": "0:28:46", "remaining_time": "3:54:04", "throughput": 8733.68, "total_tokens": 15082752} +{"current_steps": 22415, "total_steps": 204665, "loss": 0.0578, "lr": 1.9994486950102512e-06, "epoch": 0.5476021791708402, "percentage": 10.95, "elapsed_time": "0:28:47", "remaining_time": "3:54:04", "throughput": 8733.84, "total_tokens": 15086016} +{"current_steps": 22420, "total_steps": 204665, "loss": 0.0682, "lr": 1.9994458600750054e-06, "epoch": 0.5477243300026873, "percentage": 10.95, "elapsed_time": "0:28:47", "remaining_time": "3:54:03", "throughput": 8734.02, "total_tokens": 15089408} +{"current_steps": 22425, "total_steps": 204665, "loss": 0.1, "lr": 1.99944301787152e-06, "epoch": 0.5478464808345345, "percentage": 10.96, "elapsed_time": "0:28:48", "remaining_time": "3:54:02", "throughput": 8734.11, "total_tokens": 15092544} +{"current_steps": 22430, "total_steps": 204665, "loss": 0.1168, "lr": 1.999440168399817e-06, "epoch": 0.5479686316663817, "percentage": 10.96, "elapsed_time": "0:28:48", "remaining_time": "3:54:02", "throughput": 8734.34, "total_tokens": 15096000} +{"current_steps": 22435, "total_steps": 204665, "loss": 0.0899, "lr": 1.9994373116599155e-06, "epoch": 0.5480907824982288, "percentage": 10.96, "elapsed_time": "0:28:48", "remaining_time": "3:54:01", "throughput": 8734.42, "total_tokens": 15099136} +{"current_steps": 22440, "total_steps": 204665, "loss": 0.1968, "lr": 1.9994344476518376e-06, "epoch": 0.5482129333300759, "percentage": 10.96, "elapsed_time": "0:28:49", "remaining_time": "3:54:00", "throughput": 8734.73, "total_tokens": 15102720} +{"current_steps": 22445, "total_steps": 204665, "loss": 0.1988, "lr": 1.9994315763756033e-06, "epoch": 0.5483350841619231, "percentage": 10.97, "elapsed_time": "0:28:49", "remaining_time": "3:54:00", "throughput": 8735.07, "total_tokens": 15106432} +{"current_steps": 22450, "total_steps": 204665, "loss": 0.0623, "lr": 1.9994286978312338e-06, "epoch": 0.5484572349937703, "percentage": 10.97, "elapsed_time": "0:28:49", "remaining_time": "3:53:59", "throughput": 8735.3, "total_tokens": 15109888} +{"current_steps": 22455, "total_steps": 204665, "loss": 0.121, "lr": 1.99942581201875e-06, "epoch": 0.5485793858256175, "percentage": 10.97, "elapsed_time": "0:28:50", "remaining_time": "3:53:58", "throughput": 8735.59, "total_tokens": 15113472} +{"current_steps": 22460, "total_steps": 204665, "loss": 0.0837, "lr": 1.9994229189381726e-06, "epoch": 0.5487015366574647, "percentage": 10.97, "elapsed_time": "0:28:50", "remaining_time": "3:53:58", "throughput": 8735.65, "total_tokens": 15116544} +{"current_steps": 22465, "total_steps": 204665, "loss": 0.1636, "lr": 1.9994200185895233e-06, "epoch": 0.5488236874893118, "percentage": 10.98, "elapsed_time": "0:28:50", "remaining_time": "3:53:57", "throughput": 8735.88, "total_tokens": 15120000} +{"current_steps": 22470, "total_steps": 204665, "loss": 0.1474, "lr": 1.9994171109728227e-06, "epoch": 0.548945838321159, "percentage": 10.98, "elapsed_time": "0:28:51", "remaining_time": "3:53:56", "throughput": 8735.96, "total_tokens": 15123136} +{"current_steps": 22475, "total_steps": 204665, "loss": 0.1026, "lr": 1.999414196088092e-06, "epoch": 0.5490679891530061, "percentage": 10.98, "elapsed_time": "0:28:51", "remaining_time": "3:53:56", "throughput": 8736.18, "total_tokens": 15126592} +{"current_steps": 22480, "total_steps": 204665, "loss": 0.0944, "lr": 1.9994112739353526e-06, "epoch": 0.5491901399848533, "percentage": 10.98, "elapsed_time": "0:28:51", "remaining_time": "3:53:55", "throughput": 8736.39, "total_tokens": 15129984} +{"current_steps": 22485, "total_steps": 204665, "loss": 0.1571, "lr": 1.9994083445146255e-06, "epoch": 0.5493122908167004, "percentage": 10.99, "elapsed_time": "0:28:52", "remaining_time": "3:53:54", "throughput": 8736.66, "total_tokens": 15133504} +{"current_steps": 22490, "total_steps": 204665, "loss": 0.0858, "lr": 1.999405407825932e-06, "epoch": 0.5494344416485476, "percentage": 10.99, "elapsed_time": "0:28:52", "remaining_time": "3:53:53", "throughput": 8736.72, "total_tokens": 15136576} +{"current_steps": 22495, "total_steps": 204665, "loss": 0.1534, "lr": 1.999402463869294e-06, "epoch": 0.5495565924803948, "percentage": 10.99, "elapsed_time": "0:28:52", "remaining_time": "3:53:53", "throughput": 8737.05, "total_tokens": 15140224} +{"current_steps": 22500, "total_steps": 204665, "loss": 0.1497, "lr": 1.9993995126447325e-06, "epoch": 0.549678743312242, "percentage": 10.99, "elapsed_time": "0:28:53", "remaining_time": "3:53:52", "throughput": 8737.2, "total_tokens": 15143488} +{"current_steps": 22505, "total_steps": 204665, "loss": 0.1645, "lr": 1.9993965541522684e-06, "epoch": 0.5498008941440892, "percentage": 11.0, "elapsed_time": "0:28:53", "remaining_time": "3:53:51", "throughput": 8737.37, "total_tokens": 15146816} +{"current_steps": 22510, "total_steps": 204665, "loss": 0.1014, "lr": 1.999393588391924e-06, "epoch": 0.5499230449759362, "percentage": 11.0, "elapsed_time": "0:28:53", "remaining_time": "3:53:51", "throughput": 8737.58, "total_tokens": 15150208} +{"current_steps": 22515, "total_steps": 204665, "loss": 0.0376, "lr": 1.9993906153637204e-06, "epoch": 0.5500451958077834, "percentage": 11.0, "elapsed_time": "0:28:54", "remaining_time": "3:53:50", "throughput": 8737.81, "total_tokens": 15153664} +{"current_steps": 22520, "total_steps": 204665, "loss": 0.0523, "lr": 1.9993876350676796e-06, "epoch": 0.5501673466396306, "percentage": 11.0, "elapsed_time": "0:28:54", "remaining_time": "3:53:49", "throughput": 8738.09, "total_tokens": 15157248} +{"current_steps": 22525, "total_steps": 204665, "loss": 0.1262, "lr": 1.999384647503823e-06, "epoch": 0.5502894974714778, "percentage": 11.01, "elapsed_time": "0:28:54", "remaining_time": "3:53:49", "throughput": 8738.11, "total_tokens": 15160256} +{"current_steps": 22530, "total_steps": 204665, "loss": 0.2036, "lr": 1.9993816526721725e-06, "epoch": 0.5504116483033249, "percentage": 11.01, "elapsed_time": "0:28:55", "remaining_time": "3:53:48", "throughput": 8738.35, "total_tokens": 15163712} +{"current_steps": 22535, "total_steps": 204665, "loss": 0.0762, "lr": 1.9993786505727503e-06, "epoch": 0.5505337991351721, "percentage": 11.01, "elapsed_time": "0:28:55", "remaining_time": "3:53:47", "throughput": 8738.45, "total_tokens": 15166912} +{"current_steps": 22540, "total_steps": 204665, "loss": 0.1405, "lr": 1.9993756412055773e-06, "epoch": 0.5506559499670193, "percentage": 11.01, "elapsed_time": "0:28:56", "remaining_time": "3:53:47", "throughput": 8738.7, "total_tokens": 15170432} +{"current_steps": 22545, "total_steps": 204665, "loss": 0.2726, "lr": 1.999372624570676e-06, "epoch": 0.5507781007988665, "percentage": 11.02, "elapsed_time": "0:28:56", "remaining_time": "3:53:46", "throughput": 8738.86, "total_tokens": 15173760} +{"current_steps": 22550, "total_steps": 204665, "loss": 0.0718, "lr": 1.999369600668068e-06, "epoch": 0.5509002516307137, "percentage": 11.02, "elapsed_time": "0:28:56", "remaining_time": "3:53:45", "throughput": 8738.86, "total_tokens": 15176704} +{"current_steps": 22555, "total_steps": 204665, "loss": 0.122, "lr": 1.9993665694977755e-06, "epoch": 0.5510224024625607, "percentage": 11.02, "elapsed_time": "0:28:57", "remaining_time": "3:53:44", "throughput": 8739.05, "total_tokens": 15180096} +{"current_steps": 22560, "total_steps": 204665, "loss": 0.0551, "lr": 1.9993635310598207e-06, "epoch": 0.5511445532944079, "percentage": 11.02, "elapsed_time": "0:28:57", "remaining_time": "3:53:44", "throughput": 8739.11, "total_tokens": 15183168} +{"current_steps": 22565, "total_steps": 204665, "loss": 0.0613, "lr": 1.9993604853542254e-06, "epoch": 0.5512667041262551, "percentage": 11.03, "elapsed_time": "0:28:57", "remaining_time": "3:53:43", "throughput": 8739.31, "total_tokens": 15186560} +{"current_steps": 22570, "total_steps": 204665, "loss": 0.0921, "lr": 1.9993574323810115e-06, "epoch": 0.5513888549581023, "percentage": 11.03, "elapsed_time": "0:28:58", "remaining_time": "3:53:42", "throughput": 8739.46, "total_tokens": 15189824} +{"current_steps": 22575, "total_steps": 204665, "loss": 0.1304, "lr": 1.999354372140202e-06, "epoch": 0.5515110057899494, "percentage": 11.03, "elapsed_time": "0:28:58", "remaining_time": "3:53:42", "throughput": 8739.54, "total_tokens": 15192960} +{"current_steps": 22580, "total_steps": 204665, "loss": 0.1192, "lr": 1.9993513046318186e-06, "epoch": 0.5516331566217966, "percentage": 11.03, "elapsed_time": "0:28:58", "remaining_time": "3:53:41", "throughput": 8739.81, "total_tokens": 15196544} +{"current_steps": 22585, "total_steps": 204665, "loss": 0.1632, "lr": 1.9993482298558836e-06, "epoch": 0.5517553074536438, "percentage": 11.04, "elapsed_time": "0:28:59", "remaining_time": "3:53:40", "throughput": 8739.83, "total_tokens": 15199552} +{"current_steps": 22590, "total_steps": 204665, "loss": 0.0365, "lr": 1.99934514781242e-06, "epoch": 0.551877458285491, "percentage": 11.04, "elapsed_time": "0:28:59", "remaining_time": "3:53:40", "throughput": 8740.0, "total_tokens": 15202880} +{"current_steps": 22595, "total_steps": 204665, "loss": 0.1904, "lr": 1.999342058501449e-06, "epoch": 0.5519996091173381, "percentage": 11.04, "elapsed_time": "0:28:59", "remaining_time": "3:53:39", "throughput": 8740.06, "total_tokens": 15205952} +{"current_steps": 22600, "total_steps": 204665, "loss": 0.0517, "lr": 1.999338961922994e-06, "epoch": 0.5521217599491852, "percentage": 11.04, "elapsed_time": "0:29:00", "remaining_time": "3:53:38", "throughput": 8740.06, "total_tokens": 15208896} +{"current_steps": 22605, "total_steps": 204665, "loss": 0.2134, "lr": 1.9993358580770774e-06, "epoch": 0.5522439107810324, "percentage": 11.04, "elapsed_time": "0:29:00", "remaining_time": "3:53:37", "throughput": 8740.21, "total_tokens": 15212224} +{"current_steps": 22610, "total_steps": 204665, "loss": 0.1293, "lr": 1.9993327469637215e-06, "epoch": 0.5523660616128796, "percentage": 11.05, "elapsed_time": "0:29:00", "remaining_time": "3:53:37", "throughput": 8740.29, "total_tokens": 15215360} +{"current_steps": 22615, "total_steps": 204665, "loss": 0.0241, "lr": 1.9993296285829492e-06, "epoch": 0.5524882124447268, "percentage": 11.05, "elapsed_time": "0:29:01", "remaining_time": "3:53:36", "throughput": 8740.66, "total_tokens": 15219136} +{"current_steps": 22620, "total_steps": 204665, "loss": 0.1111, "lr": 1.999326502934783e-06, "epoch": 0.5526103632765739, "percentage": 11.05, "elapsed_time": "0:29:01", "remaining_time": "3:53:35", "throughput": 8740.94, "total_tokens": 15222720} +{"current_steps": 22625, "total_steps": 204665, "loss": 0.1049, "lr": 1.9993233700192454e-06, "epoch": 0.5527325141084211, "percentage": 11.05, "elapsed_time": "0:29:01", "remaining_time": "3:53:35", "throughput": 8741.02, "total_tokens": 15225856} +{"current_steps": 22630, "total_steps": 204665, "loss": 0.1601, "lr": 1.99932022983636e-06, "epoch": 0.5528546649402682, "percentage": 11.06, "elapsed_time": "0:29:02", "remaining_time": "3:53:34", "throughput": 8741.18, "total_tokens": 15229120} +{"current_steps": 22635, "total_steps": 204665, "loss": 0.2518, "lr": 1.9993170823861488e-06, "epoch": 0.5529768157721154, "percentage": 11.06, "elapsed_time": "0:29:02", "remaining_time": "3:53:33", "throughput": 8741.32, "total_tokens": 15232384} +{"current_steps": 22640, "total_steps": 204665, "loss": 0.0603, "lr": 1.999313927668635e-06, "epoch": 0.5530989666039626, "percentage": 11.06, "elapsed_time": "0:29:02", "remaining_time": "3:53:33", "throughput": 8741.57, "total_tokens": 15235904} +{"current_steps": 22645, "total_steps": 204665, "loss": 0.0919, "lr": 1.9993107656838415e-06, "epoch": 0.5532211174358097, "percentage": 11.06, "elapsed_time": "0:29:03", "remaining_time": "3:53:32", "throughput": 8741.76, "total_tokens": 15239296} +{"current_steps": 22650, "total_steps": 204665, "loss": 0.092, "lr": 1.9993075964317912e-06, "epoch": 0.5533432682676569, "percentage": 11.07, "elapsed_time": "0:29:03", "remaining_time": "3:53:31", "throughput": 8741.94, "total_tokens": 15242624} +{"current_steps": 22655, "total_steps": 204665, "loss": 0.0941, "lr": 1.999304419912508e-06, "epoch": 0.5534654190995041, "percentage": 11.07, "elapsed_time": "0:29:03", "remaining_time": "3:53:31", "throughput": 8742.35, "total_tokens": 15246464} +{"current_steps": 22660, "total_steps": 204665, "loss": 0.1296, "lr": 1.9993012361260134e-06, "epoch": 0.5535875699313513, "percentage": 11.07, "elapsed_time": "0:29:04", "remaining_time": "3:53:30", "throughput": 8742.61, "total_tokens": 15249984} +{"current_steps": 22665, "total_steps": 204665, "loss": 0.1609, "lr": 1.999298045072332e-06, "epoch": 0.5537097207631984, "percentage": 11.07, "elapsed_time": "0:29:04", "remaining_time": "3:53:29", "throughput": 8742.7, "total_tokens": 15253184} +{"current_steps": 22670, "total_steps": 204665, "loss": 0.1717, "lr": 1.999294846751486e-06, "epoch": 0.5538318715950455, "percentage": 11.08, "elapsed_time": "0:29:05", "remaining_time": "3:53:29", "throughput": 8743.24, "total_tokens": 15257280} +{"current_steps": 22675, "total_steps": 204665, "loss": 0.1045, "lr": 1.9992916411634995e-06, "epoch": 0.5539540224268927, "percentage": 11.08, "elapsed_time": "0:29:05", "remaining_time": "3:53:28", "throughput": 8743.26, "total_tokens": 15260288} +{"current_steps": 22680, "total_steps": 204665, "loss": 0.0683, "lr": 1.9992884283083954e-06, "epoch": 0.5540761732587399, "percentage": 11.08, "elapsed_time": "0:29:05", "remaining_time": "3:53:27", "throughput": 8743.34, "total_tokens": 15263424} +{"current_steps": 22685, "total_steps": 204665, "loss": 0.345, "lr": 1.9992852081861967e-06, "epoch": 0.554198324090587, "percentage": 11.08, "elapsed_time": "0:29:06", "remaining_time": "3:53:27", "throughput": 8743.63, "total_tokens": 15267008} +{"current_steps": 22690, "total_steps": 204665, "loss": 0.1971, "lr": 1.9992819807969275e-06, "epoch": 0.5543204749224342, "percentage": 11.09, "elapsed_time": "0:29:06", "remaining_time": "3:53:26", "throughput": 8743.76, "total_tokens": 15270208} +{"current_steps": 22695, "total_steps": 204665, "loss": 0.0953, "lr": 1.9992787461406107e-06, "epoch": 0.5544426257542814, "percentage": 11.09, "elapsed_time": "0:29:06", "remaining_time": "3:53:25", "throughput": 8744.16, "total_tokens": 15274048} +{"current_steps": 22700, "total_steps": 204665, "loss": 0.1193, "lr": 1.9992755042172705e-06, "epoch": 0.5545647765861286, "percentage": 11.09, "elapsed_time": "0:29:07", "remaining_time": "3:53:25", "throughput": 8744.51, "total_tokens": 15277760} +{"current_steps": 22705, "total_steps": 204665, "loss": 0.0082, "lr": 1.9992722550269296e-06, "epoch": 0.5546869274179758, "percentage": 11.09, "elapsed_time": "0:29:07", "remaining_time": "3:53:24", "throughput": 8744.93, "total_tokens": 15281664} +{"current_steps": 22710, "total_steps": 204665, "loss": 0.1653, "lr": 1.9992689985696123e-06, "epoch": 0.5548090782498228, "percentage": 11.1, "elapsed_time": "0:29:07", "remaining_time": "3:53:23", "throughput": 8745.06, "total_tokens": 15284928} +{"current_steps": 22715, "total_steps": 204665, "loss": 0.0878, "lr": 1.999265734845342e-06, "epoch": 0.55493122908167, "percentage": 11.1, "elapsed_time": "0:29:08", "remaining_time": "3:53:23", "throughput": 8745.29, "total_tokens": 15288448} +{"current_steps": 22720, "total_steps": 204665, "loss": 0.2041, "lr": 1.9992624638541425e-06, "epoch": 0.5550533799135172, "percentage": 11.1, "elapsed_time": "0:29:08", "remaining_time": "3:53:22", "throughput": 8745.21, "total_tokens": 15291200} +{"current_steps": 22725, "total_steps": 204665, "loss": 0.1671, "lr": 1.9992591855960377e-06, "epoch": 0.5551755307453644, "percentage": 11.1, "elapsed_time": "0:29:08", "remaining_time": "3:53:21", "throughput": 8745.41, "total_tokens": 15294592} +{"current_steps": 22730, "total_steps": 204665, "loss": 0.1456, "lr": 1.9992559000710514e-06, "epoch": 0.5552976815772115, "percentage": 11.11, "elapsed_time": "0:29:09", "remaining_time": "3:53:20", "throughput": 8745.43, "total_tokens": 15297600} +{"current_steps": 22735, "total_steps": 204665, "loss": 0.1581, "lr": 1.9992526072792077e-06, "epoch": 0.5554198324090587, "percentage": 11.11, "elapsed_time": "0:29:09", "remaining_time": "3:53:20", "throughput": 8745.62, "total_tokens": 15300992} +{"current_steps": 22740, "total_steps": 204665, "loss": 0.2398, "lr": 1.9992493072205298e-06, "epoch": 0.5555419832409059, "percentage": 11.11, "elapsed_time": "0:29:09", "remaining_time": "3:53:19", "throughput": 8745.84, "total_tokens": 15304448} +{"current_steps": 22745, "total_steps": 204665, "loss": 0.1129, "lr": 1.999245999895042e-06, "epoch": 0.5556641340727531, "percentage": 11.11, "elapsed_time": "0:29:10", "remaining_time": "3:53:18", "throughput": 8745.92, "total_tokens": 15307584} +{"current_steps": 22750, "total_steps": 204665, "loss": 0.1458, "lr": 1.999242685302769e-06, "epoch": 0.5557862849046002, "percentage": 11.12, "elapsed_time": "0:29:10", "remaining_time": "3:53:18", "throughput": 8746.06, "total_tokens": 15310848} +{"current_steps": 22755, "total_steps": 204665, "loss": 0.0751, "lr": 1.9992393634437343e-06, "epoch": 0.5559084357364473, "percentage": 11.12, "elapsed_time": "0:29:10", "remaining_time": "3:53:17", "throughput": 8746.43, "total_tokens": 15314624} +{"current_steps": 22760, "total_steps": 204665, "loss": 0.0323, "lr": 1.999236034317962e-06, "epoch": 0.5560305865682945, "percentage": 11.12, "elapsed_time": "0:29:11", "remaining_time": "3:53:16", "throughput": 8746.58, "total_tokens": 15317888} +{"current_steps": 22765, "total_steps": 204665, "loss": 0.1697, "lr": 1.9992326979254764e-06, "epoch": 0.5561527374001417, "percentage": 11.12, "elapsed_time": "0:29:11", "remaining_time": "3:53:16", "throughput": 8746.73, "total_tokens": 15321216} +{"current_steps": 22770, "total_steps": 204665, "loss": 0.097, "lr": 1.9992293542663023e-06, "epoch": 0.5562748882319889, "percentage": 11.13, "elapsed_time": "0:29:11", "remaining_time": "3:53:15", "throughput": 8746.84, "total_tokens": 15324416} +{"current_steps": 22775, "total_steps": 204665, "loss": 0.1083, "lr": 1.999226003340463e-06, "epoch": 0.556397039063836, "percentage": 11.13, "elapsed_time": "0:29:12", "remaining_time": "3:53:14", "throughput": 8747.1, "total_tokens": 15327936} +{"current_steps": 22780, "total_steps": 204665, "loss": 0.1821, "lr": 1.999222645147984e-06, "epoch": 0.5565191898956832, "percentage": 11.13, "elapsed_time": "0:29:12", "remaining_time": "3:53:14", "throughput": 8747.25, "total_tokens": 15331264} +{"current_steps": 22785, "total_steps": 204665, "loss": 0.08, "lr": 1.999219279688889e-06, "epoch": 0.5566413407275304, "percentage": 11.13, "elapsed_time": "0:29:13", "remaining_time": "3:53:13", "throughput": 8747.34, "total_tokens": 15334400} +{"current_steps": 22790, "total_steps": 204665, "loss": 0.112, "lr": 1.999215906963203e-06, "epoch": 0.5567634915593775, "percentage": 11.14, "elapsed_time": "0:29:13", "remaining_time": "3:53:12", "throughput": 8747.53, "total_tokens": 15337792} +{"current_steps": 22795, "total_steps": 204665, "loss": 0.0274, "lr": 1.9992125269709494e-06, "epoch": 0.5568856423912247, "percentage": 11.14, "elapsed_time": "0:29:13", "remaining_time": "3:53:12", "throughput": 8747.87, "total_tokens": 15341504} +{"current_steps": 22800, "total_steps": 204665, "loss": 0.0542, "lr": 1.9992091397121536e-06, "epoch": 0.5570077932230718, "percentage": 11.14, "elapsed_time": "0:29:14", "remaining_time": "3:53:11", "throughput": 8747.98, "total_tokens": 15344704} +{"current_steps": 22805, "total_steps": 204665, "loss": 0.1122, "lr": 1.999205745186841e-06, "epoch": 0.557129944054919, "percentage": 11.14, "elapsed_time": "0:29:14", "remaining_time": "3:53:10", "throughput": 8747.96, "total_tokens": 15347648} +{"current_steps": 22810, "total_steps": 204665, "loss": 0.0918, "lr": 1.9992023433950346e-06, "epoch": 0.5572520948867662, "percentage": 11.15, "elapsed_time": "0:29:14", "remaining_time": "3:53:10", "throughput": 8748.31, "total_tokens": 15351360} +{"current_steps": 22815, "total_steps": 204665, "loss": 0.2024, "lr": 1.9991989343367604e-06, "epoch": 0.5573742457186134, "percentage": 11.15, "elapsed_time": "0:29:15", "remaining_time": "3:53:09", "throughput": 8748.44, "total_tokens": 15354624} +{"current_steps": 22820, "total_steps": 204665, "loss": 0.1199, "lr": 1.9991955180120426e-06, "epoch": 0.5574963965504605, "percentage": 11.15, "elapsed_time": "0:29:15", "remaining_time": "3:53:08", "throughput": 8748.6, "total_tokens": 15357952} +{"current_steps": 22825, "total_steps": 204665, "loss": 0.1254, "lr": 1.9991920944209065e-06, "epoch": 0.5576185473823076, "percentage": 11.15, "elapsed_time": "0:29:15", "remaining_time": "3:53:08", "throughput": 8748.81, "total_tokens": 15361408} +{"current_steps": 22830, "total_steps": 204665, "loss": 0.1779, "lr": 1.9991886635633768e-06, "epoch": 0.5577406982141548, "percentage": 11.15, "elapsed_time": "0:29:16", "remaining_time": "3:53:07", "throughput": 8748.94, "total_tokens": 15364672} +{"current_steps": 22835, "total_steps": 204665, "loss": 0.146, "lr": 1.9991852254394783e-06, "epoch": 0.557862849046002, "percentage": 11.16, "elapsed_time": "0:29:16", "remaining_time": "3:53:06", "throughput": 8749.07, "total_tokens": 15367936} +{"current_steps": 22840, "total_steps": 204665, "loss": 0.1583, "lr": 1.9991817800492357e-06, "epoch": 0.5579849998778492, "percentage": 11.16, "elapsed_time": "0:29:16", "remaining_time": "3:53:06", "throughput": 8749.06, "total_tokens": 15370880} +{"current_steps": 22845, "total_steps": 204665, "loss": 0.0518, "lr": 1.999178327392675e-06, "epoch": 0.5581071507096963, "percentage": 11.16, "elapsed_time": "0:29:17", "remaining_time": "3:53:05", "throughput": 8749.21, "total_tokens": 15374144} +{"current_steps": 22850, "total_steps": 204665, "loss": 0.0418, "lr": 1.9991748674698202e-06, "epoch": 0.5582293015415435, "percentage": 11.16, "elapsed_time": "0:29:17", "remaining_time": "3:53:04", "throughput": 8749.15, "total_tokens": 15376960} +{"current_steps": 22855, "total_steps": 204665, "loss": 0.1065, "lr": 1.9991714002806977e-06, "epoch": 0.5583514523733907, "percentage": 11.17, "elapsed_time": "0:29:17", "remaining_time": "3:53:03", "throughput": 8749.45, "total_tokens": 15380608} +{"current_steps": 22860, "total_steps": 204665, "loss": 0.1124, "lr": 1.9991679258253314e-06, "epoch": 0.5584736032052379, "percentage": 11.17, "elapsed_time": "0:29:18", "remaining_time": "3:53:03", "throughput": 8749.6, "total_tokens": 15383872} +{"current_steps": 22865, "total_steps": 204665, "loss": 0.1763, "lr": 1.9991644441037476e-06, "epoch": 0.558595754037085, "percentage": 11.17, "elapsed_time": "0:29:18", "remaining_time": "3:53:02", "throughput": 8749.72, "total_tokens": 15387072} +{"current_steps": 22870, "total_steps": 204665, "loss": 0.0812, "lr": 1.9991609551159713e-06, "epoch": 0.5587179048689321, "percentage": 11.17, "elapsed_time": "0:29:18", "remaining_time": "3:53:01", "throughput": 8749.94, "total_tokens": 15390528} +{"current_steps": 22875, "total_steps": 204665, "loss": 0.3103, "lr": 1.9991574588620274e-06, "epoch": 0.5588400557007793, "percentage": 11.18, "elapsed_time": "0:29:19", "remaining_time": "3:53:01", "throughput": 8750.09, "total_tokens": 15393856} +{"current_steps": 22880, "total_steps": 204665, "loss": 0.2112, "lr": 1.999153955341942e-06, "epoch": 0.5589622065326265, "percentage": 11.18, "elapsed_time": "0:29:19", "remaining_time": "3:53:00", "throughput": 8750.37, "total_tokens": 15397440} +{"current_steps": 22885, "total_steps": 204665, "loss": 0.0452, "lr": 1.99915044455574e-06, "epoch": 0.5590843573644737, "percentage": 11.18, "elapsed_time": "0:29:19", "remaining_time": "3:52:59", "throughput": 8750.42, "total_tokens": 15400512} +{"current_steps": 22890, "total_steps": 204665, "loss": 0.1835, "lr": 1.999146926503448e-06, "epoch": 0.5592065081963208, "percentage": 11.18, "elapsed_time": "0:29:20", "remaining_time": "3:52:59", "throughput": 8750.53, "total_tokens": 15403712} +{"current_steps": 22895, "total_steps": 204665, "loss": 0.1538, "lr": 1.9991434011850897e-06, "epoch": 0.559328659028168, "percentage": 11.19, "elapsed_time": "0:29:20", "remaining_time": "3:52:58", "throughput": 8750.67, "total_tokens": 15406976} +{"current_steps": 22900, "total_steps": 204665, "loss": 0.1319, "lr": 1.9991398686006927e-06, "epoch": 0.5594508098600152, "percentage": 11.19, "elapsed_time": "0:29:21", "remaining_time": "3:52:57", "throughput": 8750.91, "total_tokens": 15410432} +{"current_steps": 22905, "total_steps": 204665, "loss": 0.1377, "lr": 1.9991363287502816e-06, "epoch": 0.5595729606918624, "percentage": 11.19, "elapsed_time": "0:29:21", "remaining_time": "3:52:57", "throughput": 8751.19, "total_tokens": 15414016} +{"current_steps": 22910, "total_steps": 204665, "loss": 0.2197, "lr": 1.999132781633882e-06, "epoch": 0.5596951115237094, "percentage": 11.19, "elapsed_time": "0:29:21", "remaining_time": "3:52:56", "throughput": 8751.34, "total_tokens": 15417344} +{"current_steps": 22915, "total_steps": 204665, "loss": 0.1525, "lr": 1.9991292272515204e-06, "epoch": 0.5598172623555566, "percentage": 11.2, "elapsed_time": "0:29:22", "remaining_time": "3:52:55", "throughput": 8751.77, "total_tokens": 15421248} +{"current_steps": 22920, "total_steps": 204665, "loss": 0.1281, "lr": 1.9991256656032224e-06, "epoch": 0.5599394131874038, "percentage": 11.2, "elapsed_time": "0:29:22", "remaining_time": "3:52:55", "throughput": 8751.83, "total_tokens": 15424320} +{"current_steps": 22925, "total_steps": 204665, "loss": 0.1147, "lr": 1.999122096689014e-06, "epoch": 0.560061564019251, "percentage": 11.2, "elapsed_time": "0:29:22", "remaining_time": "3:52:54", "throughput": 8751.83, "total_tokens": 15427264} +{"current_steps": 22930, "total_steps": 204665, "loss": 0.0943, "lr": 1.9991185205089206e-06, "epoch": 0.5601837148510982, "percentage": 11.2, "elapsed_time": "0:29:23", "remaining_time": "3:52:53", "throughput": 8752.14, "total_tokens": 15430912} +{"current_steps": 22935, "total_steps": 204665, "loss": 0.0882, "lr": 1.9991149370629684e-06, "epoch": 0.5603058656829453, "percentage": 11.21, "elapsed_time": "0:29:23", "remaining_time": "3:52:53", "throughput": 8752.42, "total_tokens": 15434496} +{"current_steps": 22940, "total_steps": 204665, "loss": 0.1102, "lr": 1.999111346351184e-06, "epoch": 0.5604280165147925, "percentage": 11.21, "elapsed_time": "0:29:23", "remaining_time": "3:52:52", "throughput": 8752.68, "total_tokens": 15438016} +{"current_steps": 22945, "total_steps": 204665, "loss": 0.2042, "lr": 1.9991077483735934e-06, "epoch": 0.5605501673466396, "percentage": 11.21, "elapsed_time": "0:29:24", "remaining_time": "3:52:51", "throughput": 8752.78, "total_tokens": 15441216} +{"current_steps": 22950, "total_steps": 204665, "loss": 0.0349, "lr": 1.9991041431302224e-06, "epoch": 0.5606723181784868, "percentage": 11.21, "elapsed_time": "0:29:24", "remaining_time": "3:52:51", "throughput": 8752.95, "total_tokens": 15444544} +{"current_steps": 22955, "total_steps": 204665, "loss": 0.2182, "lr": 1.9991005306210967e-06, "epoch": 0.5607944690103339, "percentage": 11.22, "elapsed_time": "0:29:24", "remaining_time": "3:52:50", "throughput": 8753.27, "total_tokens": 15448256} +{"current_steps": 22960, "total_steps": 204665, "loss": 0.1111, "lr": 1.999096910846244e-06, "epoch": 0.5609166198421811, "percentage": 11.22, "elapsed_time": "0:29:25", "remaining_time": "3:52:49", "throughput": 8753.42, "total_tokens": 15451584} +{"current_steps": 22965, "total_steps": 204665, "loss": 0.093, "lr": 1.999093283805689e-06, "epoch": 0.5610387706740283, "percentage": 11.22, "elapsed_time": "0:29:25", "remaining_time": "3:52:49", "throughput": 8753.72, "total_tokens": 15455232} +{"current_steps": 22970, "total_steps": 204665, "loss": 0.1077, "lr": 1.99908964949946e-06, "epoch": 0.5611609215058755, "percentage": 11.22, "elapsed_time": "0:29:25", "remaining_time": "3:52:48", "throughput": 8753.82, "total_tokens": 15458432} +{"current_steps": 22975, "total_steps": 204665, "loss": 0.2061, "lr": 1.9990860079275818e-06, "epoch": 0.5612830723377226, "percentage": 11.23, "elapsed_time": "0:29:26", "remaining_time": "3:52:47", "throughput": 8753.78, "total_tokens": 15461312} +{"current_steps": 22980, "total_steps": 204665, "loss": 0.0531, "lr": 1.9990823590900812e-06, "epoch": 0.5614052231695698, "percentage": 11.23, "elapsed_time": "0:29:26", "remaining_time": "3:52:47", "throughput": 8754.15, "total_tokens": 15465088} +{"current_steps": 22985, "total_steps": 204665, "loss": 0.1349, "lr": 1.9990787029869853e-06, "epoch": 0.561527374001417, "percentage": 11.23, "elapsed_time": "0:29:26", "remaining_time": "3:52:46", "throughput": 8754.54, "total_tokens": 15468928} +{"current_steps": 22990, "total_steps": 204665, "loss": 0.113, "lr": 1.9990750396183203e-06, "epoch": 0.5616495248332641, "percentage": 11.23, "elapsed_time": "0:29:27", "remaining_time": "3:52:45", "throughput": 8754.82, "total_tokens": 15472512} +{"current_steps": 22995, "total_steps": 204665, "loss": 0.0624, "lr": 1.999071368984113e-06, "epoch": 0.5617716756651113, "percentage": 11.24, "elapsed_time": "0:29:27", "remaining_time": "3:52:45", "throughput": 8755.08, "total_tokens": 15476096} +{"current_steps": 23000, "total_steps": 204665, "loss": 0.1023, "lr": 1.9990676910843897e-06, "epoch": 0.5618938264969584, "percentage": 11.24, "elapsed_time": "0:29:28", "remaining_time": "3:52:44", "throughput": 8755.12, "total_tokens": 15479168} +{"current_steps": 23005, "total_steps": 204665, "loss": 0.0564, "lr": 1.9990640059191775e-06, "epoch": 0.5620159773288056, "percentage": 11.24, "elapsed_time": "0:29:28", "remaining_time": "3:52:44", "throughput": 8755.54, "total_tokens": 15483072} +{"current_steps": 23010, "total_steps": 204665, "loss": 0.0212, "lr": 1.999060313488503e-06, "epoch": 0.5621381281606528, "percentage": 11.24, "elapsed_time": "0:29:28", "remaining_time": "3:52:43", "throughput": 8756.13, "total_tokens": 15487360} +{"current_steps": 23015, "total_steps": 204665, "loss": 0.1005, "lr": 1.9990566137923935e-06, "epoch": 0.5622602789925, "percentage": 11.25, "elapsed_time": "0:29:29", "remaining_time": "3:52:42", "throughput": 8756.47, "total_tokens": 15491072} +{"current_steps": 23020, "total_steps": 204665, "loss": 0.1663, "lr": 1.9990529068308755e-06, "epoch": 0.562382429824347, "percentage": 11.25, "elapsed_time": "0:29:29", "remaining_time": "3:52:42", "throughput": 8756.61, "total_tokens": 15494336} +{"current_steps": 23025, "total_steps": 204665, "loss": 0.0967, "lr": 1.999049192603976e-06, "epoch": 0.5625045806561942, "percentage": 11.25, "elapsed_time": "0:29:29", "remaining_time": "3:52:41", "throughput": 8756.91, "total_tokens": 15497920} +{"current_steps": 23030, "total_steps": 204665, "loss": 0.0999, "lr": 1.999045471111722e-06, "epoch": 0.5626267314880414, "percentage": 11.25, "elapsed_time": "0:29:30", "remaining_time": "3:52:40", "throughput": 8757.05, "total_tokens": 15501184} +{"current_steps": 23035, "total_steps": 204665, "loss": 0.128, "lr": 1.999041742354141e-06, "epoch": 0.5627488823198886, "percentage": 11.25, "elapsed_time": "0:29:30", "remaining_time": "3:52:40", "throughput": 8757.56, "total_tokens": 15505280} +{"current_steps": 23040, "total_steps": 204665, "loss": 0.1172, "lr": 1.9990380063312596e-06, "epoch": 0.5628710331517358, "percentage": 11.26, "elapsed_time": "0:29:30", "remaining_time": "3:52:39", "throughput": 8757.86, "total_tokens": 15508864} +{"current_steps": 23045, "total_steps": 204665, "loss": 0.2176, "lr": 1.999034263043105e-06, "epoch": 0.5629931839835829, "percentage": 11.26, "elapsed_time": "0:29:31", "remaining_time": "3:52:38", "throughput": 8757.85, "total_tokens": 15511808} +{"current_steps": 23050, "total_steps": 204665, "loss": 0.0788, "lr": 1.999030512489704e-06, "epoch": 0.5631153348154301, "percentage": 11.26, "elapsed_time": "0:29:31", "remaining_time": "3:52:38", "throughput": 8757.98, "total_tokens": 15515072} +{"current_steps": 23055, "total_steps": 204665, "loss": 0.0788, "lr": 1.9990267546710853e-06, "epoch": 0.5632374856472773, "percentage": 11.26, "elapsed_time": "0:29:31", "remaining_time": "3:52:37", "throughput": 8758.11, "total_tokens": 15518336} +{"current_steps": 23060, "total_steps": 204665, "loss": 0.0714, "lr": 1.9990229895872747e-06, "epoch": 0.5633596364791245, "percentage": 11.27, "elapsed_time": "0:29:32", "remaining_time": "3:52:36", "throughput": 8758.15, "total_tokens": 15521344} +{"current_steps": 23065, "total_steps": 204665, "loss": 0.1301, "lr": 1.9990192172383004e-06, "epoch": 0.5634817873109715, "percentage": 11.27, "elapsed_time": "0:29:32", "remaining_time": "3:52:36", "throughput": 8758.31, "total_tokens": 15524672} +{"current_steps": 23070, "total_steps": 204665, "loss": 0.1546, "lr": 1.99901543762419e-06, "epoch": 0.5636039381428187, "percentage": 11.27, "elapsed_time": "0:29:32", "remaining_time": "3:52:35", "throughput": 8758.65, "total_tokens": 15528384} +{"current_steps": 23075, "total_steps": 204665, "loss": 0.0498, "lr": 1.99901165074497e-06, "epoch": 0.5637260889746659, "percentage": 11.27, "elapsed_time": "0:29:33", "remaining_time": "3:52:34", "throughput": 8758.69, "total_tokens": 15531456} +{"current_steps": 23080, "total_steps": 204665, "loss": 0.0804, "lr": 1.999007856600669e-06, "epoch": 0.5638482398065131, "percentage": 11.28, "elapsed_time": "0:29:33", "remaining_time": "3:52:34", "throughput": 8758.78, "total_tokens": 15534656} +{"current_steps": 23085, "total_steps": 204665, "loss": 0.0787, "lr": 1.999004055191314e-06, "epoch": 0.5639703906383603, "percentage": 11.28, "elapsed_time": "0:29:33", "remaining_time": "3:52:33", "throughput": 8758.8, "total_tokens": 15537664} +{"current_steps": 23090, "total_steps": 204665, "loss": 0.1669, "lr": 1.9990002465169333e-06, "epoch": 0.5640925414702074, "percentage": 11.28, "elapsed_time": "0:29:34", "remaining_time": "3:52:32", "throughput": 8758.99, "total_tokens": 15541056} +{"current_steps": 23095, "total_steps": 204665, "loss": 0.116, "lr": 1.9989964305775535e-06, "epoch": 0.5642146923020546, "percentage": 11.28, "elapsed_time": "0:29:34", "remaining_time": "3:52:32", "throughput": 8759.44, "total_tokens": 15545024} +{"current_steps": 23100, "total_steps": 204665, "loss": 0.1408, "lr": 1.998992607373203e-06, "epoch": 0.5643368431339018, "percentage": 11.29, "elapsed_time": "0:29:35", "remaining_time": "3:52:31", "throughput": 8759.72, "total_tokens": 15548608} +{"current_steps": 23105, "total_steps": 204665, "loss": 0.0136, "lr": 1.9989887769039097e-06, "epoch": 0.564458993965749, "percentage": 11.29, "elapsed_time": "0:29:35", "remaining_time": "3:52:30", "throughput": 8759.77, "total_tokens": 15551680} +{"current_steps": 23110, "total_steps": 204665, "loss": 0.1633, "lr": 1.9989849391697013e-06, "epoch": 0.564581144797596, "percentage": 11.29, "elapsed_time": "0:29:35", "remaining_time": "3:52:30", "throughput": 8759.93, "total_tokens": 15555008} +{"current_steps": 23115, "total_steps": 204665, "loss": 0.2074, "lr": 1.9989810941706056e-06, "epoch": 0.5647032956294432, "percentage": 11.29, "elapsed_time": "0:29:36", "remaining_time": "3:52:29", "throughput": 8760.28, "total_tokens": 15558720} +{"current_steps": 23120, "total_steps": 204665, "loss": 0.0749, "lr": 1.998977241906651e-06, "epoch": 0.5648254464612904, "percentage": 11.3, "elapsed_time": "0:29:36", "remaining_time": "3:52:28", "throughput": 8760.45, "total_tokens": 15562048} +{"current_steps": 23125, "total_steps": 204665, "loss": 0.2976, "lr": 1.9989733823778653e-06, "epoch": 0.5649475972931376, "percentage": 11.3, "elapsed_time": "0:29:36", "remaining_time": "3:52:28", "throughput": 8760.43, "total_tokens": 15564992} +{"current_steps": 23130, "total_steps": 204665, "loss": 0.1892, "lr": 1.998969515584276e-06, "epoch": 0.5650697481249848, "percentage": 11.3, "elapsed_time": "0:29:37", "remaining_time": "3:52:27", "throughput": 8760.96, "total_tokens": 15569152} +{"current_steps": 23135, "total_steps": 204665, "loss": 0.2072, "lr": 1.9989656415259118e-06, "epoch": 0.5651918989568319, "percentage": 11.3, "elapsed_time": "0:29:37", "remaining_time": "3:52:26", "throughput": 8761.32, "total_tokens": 15572928} +{"current_steps": 23140, "total_steps": 204665, "loss": 0.1046, "lr": 1.998961760202801e-06, "epoch": 0.565314049788679, "percentage": 11.31, "elapsed_time": "0:29:37", "remaining_time": "3:52:26", "throughput": 8761.48, "total_tokens": 15576256} +{"current_steps": 23145, "total_steps": 204665, "loss": 0.213, "lr": 1.9989578716149713e-06, "epoch": 0.5654362006205262, "percentage": 11.31, "elapsed_time": "0:29:38", "remaining_time": "3:52:25", "throughput": 8761.42, "total_tokens": 15579072} +{"current_steps": 23150, "total_steps": 204665, "loss": 0.0259, "lr": 1.9989539757624515e-06, "epoch": 0.5655583514523734, "percentage": 11.31, "elapsed_time": "0:29:38", "remaining_time": "3:52:24", "throughput": 8761.81, "total_tokens": 15582912} +{"current_steps": 23155, "total_steps": 204665, "loss": 0.1362, "lr": 1.9989500726452697e-06, "epoch": 0.5656805022842205, "percentage": 11.31, "elapsed_time": "0:29:38", "remaining_time": "3:52:24", "throughput": 8762.09, "total_tokens": 15586496} +{"current_steps": 23160, "total_steps": 204665, "loss": 0.0701, "lr": 1.9989461622634543e-06, "epoch": 0.5658026531160677, "percentage": 11.32, "elapsed_time": "0:29:39", "remaining_time": "3:52:23", "throughput": 8762.17, "total_tokens": 15589632} +{"current_steps": 23165, "total_steps": 204665, "loss": 0.0172, "lr": 1.998942244617034e-06, "epoch": 0.5659248039479149, "percentage": 11.32, "elapsed_time": "0:29:39", "remaining_time": "3:52:22", "throughput": 8762.39, "total_tokens": 15593088} +{"current_steps": 23170, "total_steps": 204665, "loss": 0.2101, "lr": 1.998938319706036e-06, "epoch": 0.5660469547797621, "percentage": 11.32, "elapsed_time": "0:29:39", "remaining_time": "3:52:22", "throughput": 8762.52, "total_tokens": 15596352} +{"current_steps": 23175, "total_steps": 204665, "loss": 0.2097, "lr": 1.9989343875304908e-06, "epoch": 0.5661691056116093, "percentage": 11.32, "elapsed_time": "0:29:40", "remaining_time": "3:52:21", "throughput": 8762.85, "total_tokens": 15600064} +{"current_steps": 23180, "total_steps": 204665, "loss": 0.0918, "lr": 1.998930448090426e-06, "epoch": 0.5662912564434563, "percentage": 11.33, "elapsed_time": "0:29:40", "remaining_time": "3:52:21", "throughput": 8763.06, "total_tokens": 15603520} +{"current_steps": 23185, "total_steps": 204665, "loss": 0.0316, "lr": 1.99892650138587e-06, "epoch": 0.5664134072753035, "percentage": 11.33, "elapsed_time": "0:29:40", "remaining_time": "3:52:20", "throughput": 8763.3, "total_tokens": 15607040} +{"current_steps": 23190, "total_steps": 204665, "loss": 0.082, "lr": 1.998922547416852e-06, "epoch": 0.5665355581071507, "percentage": 11.33, "elapsed_time": "0:29:41", "remaining_time": "3:52:19", "throughput": 8763.47, "total_tokens": 15610368} +{"current_steps": 23195, "total_steps": 204665, "loss": 0.1905, "lr": 1.9989185861834003e-06, "epoch": 0.5666577089389979, "percentage": 11.33, "elapsed_time": "0:29:41", "remaining_time": "3:52:19", "throughput": 8763.63, "total_tokens": 15613696} +{"current_steps": 23200, "total_steps": 204665, "loss": 0.0609, "lr": 1.998914617685544e-06, "epoch": 0.566779859770845, "percentage": 11.34, "elapsed_time": "0:29:42", "remaining_time": "3:52:18", "throughput": 8763.96, "total_tokens": 15617408} +{"current_steps": 23205, "total_steps": 204665, "loss": 0.1334, "lr": 1.998910641923312e-06, "epoch": 0.5669020106026922, "percentage": 11.34, "elapsed_time": "0:29:42", "remaining_time": "3:52:17", "throughput": 8764.01, "total_tokens": 15620480} +{"current_steps": 23210, "total_steps": 204665, "loss": 0.1426, "lr": 1.9989066588967333e-06, "epoch": 0.5670241614345394, "percentage": 11.34, "elapsed_time": "0:29:42", "remaining_time": "3:52:16", "throughput": 8764.0, "total_tokens": 15623424} +{"current_steps": 23215, "total_steps": 204665, "loss": 0.1113, "lr": 1.9989026686058365e-06, "epoch": 0.5671463122663866, "percentage": 11.34, "elapsed_time": "0:29:43", "remaining_time": "3:52:16", "throughput": 8764.1, "total_tokens": 15626624} +{"current_steps": 23220, "total_steps": 204665, "loss": 0.0721, "lr": 1.998898671050651e-06, "epoch": 0.5672684630982336, "percentage": 11.35, "elapsed_time": "0:29:43", "remaining_time": "3:52:15", "throughput": 8764.35, "total_tokens": 15630144} +{"current_steps": 23225, "total_steps": 204665, "loss": 0.1699, "lr": 1.9988946662312052e-06, "epoch": 0.5673906139300808, "percentage": 11.35, "elapsed_time": "0:29:43", "remaining_time": "3:52:14", "throughput": 8764.5, "total_tokens": 15633472} +{"current_steps": 23230, "total_steps": 204665, "loss": 0.2271, "lr": 1.9988906541475292e-06, "epoch": 0.567512764761928, "percentage": 11.35, "elapsed_time": "0:29:44", "remaining_time": "3:52:14", "throughput": 8764.73, "total_tokens": 15636928} +{"current_steps": 23235, "total_steps": 204665, "loss": 0.144, "lr": 1.9988866347996517e-06, "epoch": 0.5676349155937752, "percentage": 11.35, "elapsed_time": "0:29:44", "remaining_time": "3:52:13", "throughput": 8764.89, "total_tokens": 15640256} +{"current_steps": 23240, "total_steps": 204665, "loss": 0.0924, "lr": 1.9988826081876018e-06, "epoch": 0.5677570664256224, "percentage": 11.36, "elapsed_time": "0:29:44", "remaining_time": "3:52:12", "throughput": 8765.0, "total_tokens": 15643456} +{"current_steps": 23245, "total_steps": 204665, "loss": 0.1721, "lr": 1.9988785743114087e-06, "epoch": 0.5678792172574695, "percentage": 11.36, "elapsed_time": "0:29:45", "remaining_time": "3:52:12", "throughput": 8765.0, "total_tokens": 15646400} +{"current_steps": 23250, "total_steps": 204665, "loss": 0.0163, "lr": 1.9988745331711022e-06, "epoch": 0.5680013680893167, "percentage": 11.36, "elapsed_time": "0:29:45", "remaining_time": "3:52:11", "throughput": 8765.05, "total_tokens": 15649472} +{"current_steps": 23255, "total_steps": 204665, "loss": 0.062, "lr": 1.9988704847667115e-06, "epoch": 0.5681235189211639, "percentage": 11.36, "elapsed_time": "0:29:45", "remaining_time": "3:52:10", "throughput": 8765.35, "total_tokens": 15653120} +{"current_steps": 23260, "total_steps": 204665, "loss": 0.108, "lr": 1.9988664290982657e-06, "epoch": 0.568245669753011, "percentage": 11.36, "elapsed_time": "0:29:46", "remaining_time": "3:52:10", "throughput": 8765.79, "total_tokens": 15657088} +{"current_steps": 23265, "total_steps": 204665, "loss": 0.2226, "lr": 1.998862366165795e-06, "epoch": 0.5683678205848581, "percentage": 11.37, "elapsed_time": "0:29:46", "remaining_time": "3:52:09", "throughput": 8765.88, "total_tokens": 15660288} +{"current_steps": 23270, "total_steps": 204665, "loss": 0.1452, "lr": 1.998858295969328e-06, "epoch": 0.5684899714167053, "percentage": 11.37, "elapsed_time": "0:29:46", "remaining_time": "3:52:08", "throughput": 8765.97, "total_tokens": 15663424} +{"current_steps": 23275, "total_steps": 204665, "loss": 0.0867, "lr": 1.998854218508895e-06, "epoch": 0.5686121222485525, "percentage": 11.37, "elapsed_time": "0:29:47", "remaining_time": "3:52:08", "throughput": 8766.03, "total_tokens": 15666496} +{"current_steps": 23280, "total_steps": 204665, "loss": 0.0866, "lr": 1.9988501337845256e-06, "epoch": 0.5687342730803997, "percentage": 11.37, "elapsed_time": "0:29:47", "remaining_time": "3:52:07", "throughput": 8766.4, "total_tokens": 15670272} +{"current_steps": 23285, "total_steps": 204665, "loss": 0.0921, "lr": 1.9988460417962494e-06, "epoch": 0.5688564239122469, "percentage": 11.38, "elapsed_time": "0:29:47", "remaining_time": "3:52:06", "throughput": 8766.56, "total_tokens": 15673600} +{"current_steps": 23290, "total_steps": 204665, "loss": 0.0575, "lr": 1.998841942544096e-06, "epoch": 0.568978574744094, "percentage": 11.38, "elapsed_time": "0:29:48", "remaining_time": "3:52:06", "throughput": 8766.69, "total_tokens": 15676864} +{"current_steps": 23295, "total_steps": 204665, "loss": 0.1449, "lr": 1.9988378360280955e-06, "epoch": 0.5691007255759412, "percentage": 11.38, "elapsed_time": "0:29:48", "remaining_time": "3:52:05", "throughput": 8766.91, "total_tokens": 15680320} +{"current_steps": 23300, "total_steps": 204665, "loss": 0.198, "lr": 1.9988337222482776e-06, "epoch": 0.5692228764077883, "percentage": 11.38, "elapsed_time": "0:29:48", "remaining_time": "3:52:04", "throughput": 8766.93, "total_tokens": 15683328} +{"current_steps": 23305, "total_steps": 204665, "loss": 0.1606, "lr": 1.998829601204672e-06, "epoch": 0.5693450272396355, "percentage": 11.39, "elapsed_time": "0:29:49", "remaining_time": "3:52:04", "throughput": 8767.09, "total_tokens": 15686656} +{"current_steps": 23310, "total_steps": 204665, "loss": 0.0729, "lr": 1.998825472897309e-06, "epoch": 0.5694671780714826, "percentage": 11.39, "elapsed_time": "0:29:49", "remaining_time": "3:52:03", "throughput": 8767.41, "total_tokens": 15690368} +{"current_steps": 23315, "total_steps": 204665, "loss": 0.1514, "lr": 1.9988213373262183e-06, "epoch": 0.5695893289033298, "percentage": 11.39, "elapsed_time": "0:29:49", "remaining_time": "3:52:02", "throughput": 8767.49, "total_tokens": 15693504} +{"current_steps": 23320, "total_steps": 204665, "loss": 0.0697, "lr": 1.9988171944914305e-06, "epoch": 0.569711479735177, "percentage": 11.39, "elapsed_time": "0:29:50", "remaining_time": "3:52:02", "throughput": 8767.58, "total_tokens": 15696640} +{"current_steps": 23325, "total_steps": 204665, "loss": 0.2161, "lr": 1.998813044392975e-06, "epoch": 0.5698336305670242, "percentage": 11.4, "elapsed_time": "0:29:50", "remaining_time": "3:52:01", "throughput": 8767.78, "total_tokens": 15700096} +{"current_steps": 23330, "total_steps": 204665, "loss": 0.0429, "lr": 1.9988088870308824e-06, "epoch": 0.5699557813988714, "percentage": 11.4, "elapsed_time": "0:29:51", "remaining_time": "3:52:00", "throughput": 8767.93, "total_tokens": 15703424} +{"current_steps": 23335, "total_steps": 204665, "loss": 0.208, "lr": 1.9988047224051835e-06, "epoch": 0.5700779322307185, "percentage": 11.4, "elapsed_time": "0:29:51", "remaining_time": "3:52:00", "throughput": 8768.21, "total_tokens": 15707008} +{"current_steps": 23340, "total_steps": 204665, "loss": 0.1379, "lr": 1.9988005505159078e-06, "epoch": 0.5702000830625656, "percentage": 11.4, "elapsed_time": "0:29:51", "remaining_time": "3:51:59", "throughput": 8768.48, "total_tokens": 15710592} +{"current_steps": 23345, "total_steps": 204665, "loss": 0.4155, "lr": 1.9987963713630856e-06, "epoch": 0.5703222338944128, "percentage": 11.41, "elapsed_time": "0:29:52", "remaining_time": "3:51:58", "throughput": 8768.84, "total_tokens": 15714304} +{"current_steps": 23350, "total_steps": 204665, "loss": 0.2139, "lr": 1.9987921849467476e-06, "epoch": 0.57044438472626, "percentage": 11.41, "elapsed_time": "0:29:52", "remaining_time": "3:51:58", "throughput": 8769.08, "total_tokens": 15717824} +{"current_steps": 23355, "total_steps": 204665, "loss": 0.0858, "lr": 1.998787991266924e-06, "epoch": 0.5705665355581071, "percentage": 11.41, "elapsed_time": "0:29:52", "remaining_time": "3:51:57", "throughput": 8769.27, "total_tokens": 15721216} +{"current_steps": 23360, "total_steps": 204665, "loss": 0.1141, "lr": 1.998783790323646e-06, "epoch": 0.5706886863899543, "percentage": 11.41, "elapsed_time": "0:29:53", "remaining_time": "3:51:56", "throughput": 8769.34, "total_tokens": 15724352} +{"current_steps": 23365, "total_steps": 204665, "loss": 0.107, "lr": 1.998779582116943e-06, "epoch": 0.5708108372218015, "percentage": 11.42, "elapsed_time": "0:29:53", "remaining_time": "3:51:56", "throughput": 8769.55, "total_tokens": 15727808} +{"current_steps": 23370, "total_steps": 204665, "loss": 0.1105, "lr": 1.9987753666468473e-06, "epoch": 0.5709329880536487, "percentage": 11.42, "elapsed_time": "0:29:53", "remaining_time": "3:51:55", "throughput": 8769.54, "total_tokens": 15730752} +{"current_steps": 23375, "total_steps": 204665, "loss": 0.0375, "lr": 1.9987711439133877e-06, "epoch": 0.5710551388854959, "percentage": 11.42, "elapsed_time": "0:29:54", "remaining_time": "3:51:54", "throughput": 8769.67, "total_tokens": 15734016} +{"current_steps": 23380, "total_steps": 204665, "loss": 0.1535, "lr": 1.9987669139165955e-06, "epoch": 0.5711772897173429, "percentage": 11.42, "elapsed_time": "0:29:54", "remaining_time": "3:51:54", "throughput": 8769.97, "total_tokens": 15737664} +{"current_steps": 23385, "total_steps": 204665, "loss": 0.07, "lr": 1.998762676656502e-06, "epoch": 0.5712994405491901, "percentage": 11.43, "elapsed_time": "0:29:54", "remaining_time": "3:51:53", "throughput": 8770.18, "total_tokens": 15741120} +{"current_steps": 23390, "total_steps": 204665, "loss": 0.1223, "lr": 1.9987584321331377e-06, "epoch": 0.5714215913810373, "percentage": 11.43, "elapsed_time": "0:29:55", "remaining_time": "3:51:52", "throughput": 8770.29, "total_tokens": 15744320} +{"current_steps": 23395, "total_steps": 204665, "loss": 0.0944, "lr": 1.9987541803465335e-06, "epoch": 0.5715437422128845, "percentage": 11.43, "elapsed_time": "0:29:55", "remaining_time": "3:51:52", "throughput": 8770.49, "total_tokens": 15747776} +{"current_steps": 23400, "total_steps": 204665, "loss": 0.0861, "lr": 1.9987499212967205e-06, "epoch": 0.5716658930447316, "percentage": 11.43, "elapsed_time": "0:29:55", "remaining_time": "3:51:51", "throughput": 8770.85, "total_tokens": 15751552} +{"current_steps": 23405, "total_steps": 204665, "loss": 0.1516, "lr": 1.998745654983729e-06, "epoch": 0.5717880438765788, "percentage": 11.44, "elapsed_time": "0:29:56", "remaining_time": "3:51:50", "throughput": 8770.95, "total_tokens": 15754752} +{"current_steps": 23410, "total_steps": 204665, "loss": 0.1224, "lr": 1.9987413814075907e-06, "epoch": 0.571910194708426, "percentage": 11.44, "elapsed_time": "0:29:56", "remaining_time": "3:51:50", "throughput": 8771.04, "total_tokens": 15757888} +{"current_steps": 23415, "total_steps": 204665, "loss": 0.0904, "lr": 1.998737100568336e-06, "epoch": 0.5720323455402732, "percentage": 11.44, "elapsed_time": "0:29:56", "remaining_time": "3:51:49", "throughput": 8771.17, "total_tokens": 15761152} +{"current_steps": 23420, "total_steps": 204665, "loss": 0.1148, "lr": 1.998732812465997e-06, "epoch": 0.5721544963721203, "percentage": 11.44, "elapsed_time": "0:29:57", "remaining_time": "3:51:48", "throughput": 8771.41, "total_tokens": 15764672} +{"current_steps": 23425, "total_steps": 204665, "loss": 0.0455, "lr": 1.9987285171006042e-06, "epoch": 0.5722766472039674, "percentage": 11.45, "elapsed_time": "0:29:57", "remaining_time": "3:51:48", "throughput": 8771.59, "total_tokens": 15768064} +{"current_steps": 23430, "total_steps": 204665, "loss": 0.1486, "lr": 1.998724214472189e-06, "epoch": 0.5723987980358146, "percentage": 11.45, "elapsed_time": "0:29:57", "remaining_time": "3:51:47", "throughput": 8771.71, "total_tokens": 15771328} +{"current_steps": 23435, "total_steps": 204665, "loss": 0.2083, "lr": 1.9987199045807823e-06, "epoch": 0.5725209488676618, "percentage": 11.45, "elapsed_time": "0:29:58", "remaining_time": "3:51:46", "throughput": 8771.8, "total_tokens": 15774528} +{"current_steps": 23440, "total_steps": 204665, "loss": 0.1026, "lr": 1.9987155874264166e-06, "epoch": 0.572643099699509, "percentage": 11.45, "elapsed_time": "0:29:58", "remaining_time": "3:51:46", "throughput": 8771.9, "total_tokens": 15777728} +{"current_steps": 23445, "total_steps": 204665, "loss": 0.0887, "lr": 1.998711263009122e-06, "epoch": 0.5727652505313561, "percentage": 11.46, "elapsed_time": "0:29:59", "remaining_time": "3:51:45", "throughput": 8772.1, "total_tokens": 15781120} +{"current_steps": 23450, "total_steps": 204665, "loss": 0.2051, "lr": 1.9987069313289307e-06, "epoch": 0.5728874013632033, "percentage": 11.46, "elapsed_time": "0:29:59", "remaining_time": "3:51:44", "throughput": 8772.2, "total_tokens": 15784320} +{"current_steps": 23455, "total_steps": 204665, "loss": 0.2077, "lr": 1.9987025923858736e-06, "epoch": 0.5730095521950505, "percentage": 11.46, "elapsed_time": "0:29:59", "remaining_time": "3:51:44", "throughput": 8772.34, "total_tokens": 15787584} +{"current_steps": 23460, "total_steps": 204665, "loss": 0.3206, "lr": 1.998698246179983e-06, "epoch": 0.5731317030268976, "percentage": 11.46, "elapsed_time": "0:30:00", "remaining_time": "3:51:43", "throughput": 8772.53, "total_tokens": 15790976} +{"current_steps": 23465, "total_steps": 204665, "loss": 0.1039, "lr": 1.9986938927112903e-06, "epoch": 0.5732538538587448, "percentage": 11.47, "elapsed_time": "0:30:00", "remaining_time": "3:51:42", "throughput": 8772.72, "total_tokens": 15794368} +{"current_steps": 23470, "total_steps": 204665, "loss": 0.0355, "lr": 1.998689531979827e-06, "epoch": 0.5733760046905919, "percentage": 11.47, "elapsed_time": "0:30:00", "remaining_time": "3:51:42", "throughput": 8773.0, "total_tokens": 15797952} +{"current_steps": 23475, "total_steps": 204665, "loss": 0.1528, "lr": 1.998685163985624e-06, "epoch": 0.5734981555224391, "percentage": 11.47, "elapsed_time": "0:30:01", "remaining_time": "3:51:41", "throughput": 8773.01, "total_tokens": 15800960} +{"current_steps": 23480, "total_steps": 204665, "loss": 0.0738, "lr": 1.9986807887287145e-06, "epoch": 0.5736203063542863, "percentage": 11.47, "elapsed_time": "0:30:01", "remaining_time": "3:51:41", "throughput": 8773.61, "total_tokens": 15805248} +{"current_steps": 23485, "total_steps": 204665, "loss": 0.0482, "lr": 1.99867640620913e-06, "epoch": 0.5737424571861335, "percentage": 11.47, "elapsed_time": "0:30:01", "remaining_time": "3:51:40", "throughput": 8773.68, "total_tokens": 15808384} +{"current_steps": 23490, "total_steps": 204665, "loss": 0.1362, "lr": 1.9986720164269014e-06, "epoch": 0.5738646080179806, "percentage": 11.48, "elapsed_time": "0:30:02", "remaining_time": "3:51:39", "throughput": 8773.7, "total_tokens": 15811392} +{"current_steps": 23495, "total_steps": 204665, "loss": 0.195, "lr": 1.998667619382062e-06, "epoch": 0.5739867588498277, "percentage": 11.48, "elapsed_time": "0:30:02", "remaining_time": "3:51:38", "throughput": 8773.89, "total_tokens": 15814784} +{"current_steps": 23500, "total_steps": 204665, "loss": 0.1152, "lr": 1.998663215074642e-06, "epoch": 0.5741089096816749, "percentage": 11.48, "elapsed_time": "0:30:02", "remaining_time": "3:51:38", "throughput": 8774.13, "total_tokens": 15818304} +{"current_steps": 23505, "total_steps": 204665, "loss": 0.1916, "lr": 1.9986588035046755e-06, "epoch": 0.5742310605135221, "percentage": 11.48, "elapsed_time": "0:30:03", "remaining_time": "3:51:37", "throughput": 8774.29, "total_tokens": 15821632} +{"current_steps": 23510, "total_steps": 204665, "loss": 0.1102, "lr": 1.998654384672193e-06, "epoch": 0.5743532113453692, "percentage": 11.49, "elapsed_time": "0:30:03", "remaining_time": "3:51:37", "throughput": 8774.58, "total_tokens": 15825280} +{"current_steps": 23515, "total_steps": 204665, "loss": 0.2022, "lr": 1.9986499585772275e-06, "epoch": 0.5744753621772164, "percentage": 11.49, "elapsed_time": "0:30:03", "remaining_time": "3:51:36", "throughput": 8774.77, "total_tokens": 15828672} +{"current_steps": 23520, "total_steps": 204665, "loss": 0.1016, "lr": 1.998645525219811e-06, "epoch": 0.5745975130090636, "percentage": 11.49, "elapsed_time": "0:30:04", "remaining_time": "3:51:35", "throughput": 8774.85, "total_tokens": 15831808} +{"current_steps": 23525, "total_steps": 204665, "loss": 0.2264, "lr": 1.9986410845999752e-06, "epoch": 0.5747196638409108, "percentage": 11.49, "elapsed_time": "0:30:04", "remaining_time": "3:51:35", "throughput": 8774.97, "total_tokens": 15835072} +{"current_steps": 23530, "total_steps": 204665, "loss": 0.0441, "lr": 1.998636636717753e-06, "epoch": 0.574841814672758, "percentage": 11.5, "elapsed_time": "0:30:04", "remaining_time": "3:51:34", "throughput": 8775.02, "total_tokens": 15838144} +{"current_steps": 23535, "total_steps": 204665, "loss": 0.1378, "lr": 1.9986321815731766e-06, "epoch": 0.574963965504605, "percentage": 11.5, "elapsed_time": "0:30:05", "remaining_time": "3:51:33", "throughput": 8775.3, "total_tokens": 15841728} +{"current_steps": 23540, "total_steps": 204665, "loss": 0.0454, "lr": 1.998627719166278e-06, "epoch": 0.5750861163364522, "percentage": 11.5, "elapsed_time": "0:30:05", "remaining_time": "3:51:33", "throughput": 8775.65, "total_tokens": 15845504} +{"current_steps": 23545, "total_steps": 204665, "loss": 0.0466, "lr": 1.9986232494970908e-06, "epoch": 0.5752082671682994, "percentage": 11.5, "elapsed_time": "0:30:05", "remaining_time": "3:51:32", "throughput": 8776.13, "total_tokens": 15849600} +{"current_steps": 23550, "total_steps": 204665, "loss": 0.0798, "lr": 1.9986187725656466e-06, "epoch": 0.5753304180001466, "percentage": 11.51, "elapsed_time": "0:30:06", "remaining_time": "3:51:31", "throughput": 8776.2, "total_tokens": 15852736} +{"current_steps": 23555, "total_steps": 204665, "loss": 0.1278, "lr": 1.9986142883719774e-06, "epoch": 0.5754525688319937, "percentage": 11.51, "elapsed_time": "0:30:06", "remaining_time": "3:51:31", "throughput": 8776.36, "total_tokens": 15856064} +{"current_steps": 23560, "total_steps": 204665, "loss": 0.063, "lr": 1.998609796916117e-06, "epoch": 0.5755747196638409, "percentage": 11.51, "elapsed_time": "0:30:07", "remaining_time": "3:51:30", "throughput": 8776.63, "total_tokens": 15859648} +{"current_steps": 23565, "total_steps": 204665, "loss": 0.0897, "lr": 1.998605298198098e-06, "epoch": 0.5756968704956881, "percentage": 11.51, "elapsed_time": "0:30:07", "remaining_time": "3:51:29", "throughput": 8776.79, "total_tokens": 15862976} +{"current_steps": 23570, "total_steps": 204665, "loss": 0.0718, "lr": 1.9986007922179523e-06, "epoch": 0.5758190213275353, "percentage": 11.52, "elapsed_time": "0:30:07", "remaining_time": "3:51:29", "throughput": 8777.05, "total_tokens": 15866496} +{"current_steps": 23575, "total_steps": 204665, "loss": 0.101, "lr": 1.9985962789757126e-06, "epoch": 0.5759411721593825, "percentage": 11.52, "elapsed_time": "0:30:08", "remaining_time": "3:51:28", "throughput": 8777.24, "total_tokens": 15869888} +{"current_steps": 23580, "total_steps": 204665, "loss": 0.0232, "lr": 1.9985917584714126e-06, "epoch": 0.5760633229912295, "percentage": 11.52, "elapsed_time": "0:30:08", "remaining_time": "3:51:27", "throughput": 8777.28, "total_tokens": 15872960} +{"current_steps": 23585, "total_steps": 204665, "loss": 0.1324, "lr": 1.998587230705085e-06, "epoch": 0.5761854738230767, "percentage": 11.52, "elapsed_time": "0:30:08", "remaining_time": "3:51:27", "throughput": 8777.38, "total_tokens": 15876160} +{"current_steps": 23590, "total_steps": 204665, "loss": 0.0985, "lr": 1.9985826956767618e-06, "epoch": 0.5763076246549239, "percentage": 11.53, "elapsed_time": "0:30:09", "remaining_time": "3:51:26", "throughput": 8777.49, "total_tokens": 15879360} +{"current_steps": 23595, "total_steps": 204665, "loss": 0.1614, "lr": 1.998578153386477e-06, "epoch": 0.5764297754867711, "percentage": 11.53, "elapsed_time": "0:30:09", "remaining_time": "3:51:25", "throughput": 8777.79, "total_tokens": 15883008} +{"current_steps": 23600, "total_steps": 204665, "loss": 0.0492, "lr": 1.9985736038342634e-06, "epoch": 0.5765519263186182, "percentage": 11.53, "elapsed_time": "0:30:09", "remaining_time": "3:51:25", "throughput": 8777.96, "total_tokens": 15886400} +{"current_steps": 23605, "total_steps": 204665, "loss": 0.0798, "lr": 1.9985690470201537e-06, "epoch": 0.5766740771504654, "percentage": 11.53, "elapsed_time": "0:30:10", "remaining_time": "3:51:24", "throughput": 8778.2, "total_tokens": 15889920} +{"current_steps": 23610, "total_steps": 204665, "loss": 0.1079, "lr": 1.9985644829441816e-06, "epoch": 0.5767962279823126, "percentage": 11.54, "elapsed_time": "0:30:10", "remaining_time": "3:51:23", "throughput": 8778.35, "total_tokens": 15893248} +{"current_steps": 23615, "total_steps": 204665, "loss": 0.1202, "lr": 1.9985599116063796e-06, "epoch": 0.5769183788141597, "percentage": 11.54, "elapsed_time": "0:30:10", "remaining_time": "3:51:23", "throughput": 8778.26, "total_tokens": 15896000} +{"current_steps": 23620, "total_steps": 204665, "loss": 0.1365, "lr": 1.9985553330067816e-06, "epoch": 0.5770405296460069, "percentage": 11.54, "elapsed_time": "0:30:11", "remaining_time": "3:51:22", "throughput": 8779.35, "total_tokens": 15901504} +{"current_steps": 23625, "total_steps": 204665, "loss": 0.1305, "lr": 1.9985507471454207e-06, "epoch": 0.577162680477854, "percentage": 11.54, "elapsed_time": "0:30:11", "remaining_time": "3:51:22", "throughput": 8779.51, "total_tokens": 15904832} +{"current_steps": 23630, "total_steps": 204665, "loss": 0.212, "lr": 1.9985461540223303e-06, "epoch": 0.5772848313097012, "percentage": 11.55, "elapsed_time": "0:30:11", "remaining_time": "3:51:21", "throughput": 8779.7, "total_tokens": 15908224} +{"current_steps": 23635, "total_steps": 204665, "loss": 0.1236, "lr": 1.9985415536375434e-06, "epoch": 0.5774069821415484, "percentage": 11.55, "elapsed_time": "0:30:12", "remaining_time": "3:51:21", "throughput": 8779.94, "total_tokens": 15911744} +{"current_steps": 23640, "total_steps": 204665, "loss": 0.3236, "lr": 1.998536945991094e-06, "epoch": 0.5775291329733956, "percentage": 11.55, "elapsed_time": "0:30:12", "remaining_time": "3:51:20", "throughput": 8779.99, "total_tokens": 15914816} +{"current_steps": 23645, "total_steps": 204665, "loss": 0.0493, "lr": 1.9985323310830152e-06, "epoch": 0.5776512838052427, "percentage": 11.55, "elapsed_time": "0:30:12", "remaining_time": "3:51:19", "throughput": 8780.03, "total_tokens": 15917888} +{"current_steps": 23650, "total_steps": 204665, "loss": 0.094, "lr": 1.9985277089133405e-06, "epoch": 0.5777734346370899, "percentage": 11.56, "elapsed_time": "0:30:13", "remaining_time": "3:51:18", "throughput": 8780.34, "total_tokens": 15921536} +{"current_steps": 23655, "total_steps": 204665, "loss": 0.0982, "lr": 1.998523079482104e-06, "epoch": 0.577895585468937, "percentage": 11.56, "elapsed_time": "0:30:13", "remaining_time": "3:51:18", "throughput": 8780.46, "total_tokens": 15924800} +{"current_steps": 23660, "total_steps": 204665, "loss": 0.2915, "lr": 1.998518442789339e-06, "epoch": 0.5780177363007842, "percentage": 11.56, "elapsed_time": "0:30:14", "remaining_time": "3:51:17", "throughput": 8780.56, "total_tokens": 15928000} +{"current_steps": 23665, "total_steps": 204665, "loss": 0.2105, "lr": 1.9985137988350795e-06, "epoch": 0.5781398871326314, "percentage": 11.56, "elapsed_time": "0:30:14", "remaining_time": "3:51:16", "throughput": 8780.73, "total_tokens": 15931328} +{"current_steps": 23670, "total_steps": 204665, "loss": 0.0554, "lr": 1.998509147619359e-06, "epoch": 0.5782620379644785, "percentage": 11.57, "elapsed_time": "0:30:14", "remaining_time": "3:51:16", "throughput": 8780.86, "total_tokens": 15934592} +{"current_steps": 23675, "total_steps": 204665, "loss": 0.1021, "lr": 1.998504489142211e-06, "epoch": 0.5783841887963257, "percentage": 11.57, "elapsed_time": "0:30:15", "remaining_time": "3:51:15", "throughput": 8781.04, "total_tokens": 15937984} +{"current_steps": 23680, "total_steps": 204665, "loss": 0.08, "lr": 1.9984998234036704e-06, "epoch": 0.5785063396281729, "percentage": 11.57, "elapsed_time": "0:30:15", "remaining_time": "3:51:14", "throughput": 8781.32, "total_tokens": 15941568} +{"current_steps": 23685, "total_steps": 204665, "loss": 0.1458, "lr": 1.9984951504037704e-06, "epoch": 0.5786284904600201, "percentage": 11.57, "elapsed_time": "0:30:15", "remaining_time": "3:51:14", "throughput": 8781.64, "total_tokens": 15945280} +{"current_steps": 23690, "total_steps": 204665, "loss": 0.0967, "lr": 1.998490470142545e-06, "epoch": 0.5787506412918672, "percentage": 11.58, "elapsed_time": "0:30:16", "remaining_time": "3:51:13", "throughput": 8781.66, "total_tokens": 15948288} +{"current_steps": 23695, "total_steps": 204665, "loss": 0.1066, "lr": 1.9984857826200284e-06, "epoch": 0.5788727921237143, "percentage": 11.58, "elapsed_time": "0:30:16", "remaining_time": "3:51:13", "throughput": 8782.0, "total_tokens": 15952064} +{"current_steps": 23700, "total_steps": 204665, "loss": 0.2611, "lr": 1.998481087836254e-06, "epoch": 0.5789949429555615, "percentage": 11.58, "elapsed_time": "0:30:16", "remaining_time": "3:51:12", "throughput": 8782.43, "total_tokens": 15956032} +{"current_steps": 23705, "total_steps": 204665, "loss": 0.1465, "lr": 1.9984763857912573e-06, "epoch": 0.5791170937874087, "percentage": 11.58, "elapsed_time": "0:30:17", "remaining_time": "3:51:11", "throughput": 8782.57, "total_tokens": 15959360} +{"current_steps": 23710, "total_steps": 204665, "loss": 0.0293, "lr": 1.998471676485072e-06, "epoch": 0.5792392446192559, "percentage": 11.58, "elapsed_time": "0:30:17", "remaining_time": "3:51:11", "throughput": 8782.75, "total_tokens": 15962752} +{"current_steps": 23715, "total_steps": 204665, "loss": 0.1096, "lr": 1.9984669599177315e-06, "epoch": 0.579361395451103, "percentage": 11.59, "elapsed_time": "0:30:17", "remaining_time": "3:51:10", "throughput": 8782.83, "total_tokens": 15965888} +{"current_steps": 23720, "total_steps": 204665, "loss": 0.2157, "lr": 1.9984622360892707e-06, "epoch": 0.5794835462829502, "percentage": 11.59, "elapsed_time": "0:30:18", "remaining_time": "3:51:09", "throughput": 8782.98, "total_tokens": 15969216} +{"current_steps": 23725, "total_steps": 204665, "loss": 0.1427, "lr": 1.998457504999724e-06, "epoch": 0.5796056971147974, "percentage": 11.59, "elapsed_time": "0:30:18", "remaining_time": "3:51:09", "throughput": 8783.19, "total_tokens": 15972672} +{"current_steps": 23730, "total_steps": 204665, "loss": 0.0804, "lr": 1.9984527666491262e-06, "epoch": 0.5797278479466446, "percentage": 11.59, "elapsed_time": "0:30:18", "remaining_time": "3:51:08", "throughput": 8783.49, "total_tokens": 15976320} +{"current_steps": 23735, "total_steps": 204665, "loss": 0.1042, "lr": 1.998448021037511e-06, "epoch": 0.5798499987784916, "percentage": 11.6, "elapsed_time": "0:30:19", "remaining_time": "3:51:07", "throughput": 8783.53, "total_tokens": 15979392} +{"current_steps": 23740, "total_steps": 204665, "loss": 0.141, "lr": 1.998443268164913e-06, "epoch": 0.5799721496103388, "percentage": 11.6, "elapsed_time": "0:30:19", "remaining_time": "3:51:07", "throughput": 8783.71, "total_tokens": 15982784} +{"current_steps": 23745, "total_steps": 204665, "loss": 0.0586, "lr": 1.998438508031368e-06, "epoch": 0.580094300442186, "percentage": 11.6, "elapsed_time": "0:30:19", "remaining_time": "3:51:06", "throughput": 8784.09, "total_tokens": 15986624} +{"current_steps": 23750, "total_steps": 204665, "loss": 0.0323, "lr": 1.9984337406369084e-06, "epoch": 0.5802164512740332, "percentage": 11.6, "elapsed_time": "0:30:20", "remaining_time": "3:51:06", "throughput": 8784.14, "total_tokens": 15989696} +{"current_steps": 23755, "total_steps": 204665, "loss": 0.061, "lr": 1.9984289659815707e-06, "epoch": 0.5803386021058803, "percentage": 11.61, "elapsed_time": "0:30:20", "remaining_time": "3:51:05", "throughput": 8784.39, "total_tokens": 15993280} +{"current_steps": 23760, "total_steps": 204665, "loss": 0.0845, "lr": 1.998424184065389e-06, "epoch": 0.5804607529377275, "percentage": 11.61, "elapsed_time": "0:30:20", "remaining_time": "3:51:04", "throughput": 8784.33, "total_tokens": 15996096} +{"current_steps": 23765, "total_steps": 204665, "loss": 0.1935, "lr": 1.998419394888398e-06, "epoch": 0.5805829037695747, "percentage": 11.61, "elapsed_time": "0:30:21", "remaining_time": "3:51:04", "throughput": 8784.59, "total_tokens": 15999680} +{"current_steps": 23770, "total_steps": 204665, "loss": 0.0199, "lr": 1.998414598450633e-06, "epoch": 0.5807050546014219, "percentage": 11.61, "elapsed_time": "0:30:21", "remaining_time": "3:51:03", "throughput": 8784.78, "total_tokens": 16003072} +{"current_steps": 23775, "total_steps": 204665, "loss": 0.0787, "lr": 1.998409794752128e-06, "epoch": 0.580827205433269, "percentage": 11.62, "elapsed_time": "0:30:22", "remaining_time": "3:51:02", "throughput": 8784.95, "total_tokens": 16006400} +{"current_steps": 23780, "total_steps": 204665, "loss": 0.0284, "lr": 1.9984049837929183e-06, "epoch": 0.5809493562651161, "percentage": 11.62, "elapsed_time": "0:30:22", "remaining_time": "3:51:02", "throughput": 8785.03, "total_tokens": 16009600} +{"current_steps": 23785, "total_steps": 204665, "loss": 0.1318, "lr": 1.9984001655730397e-06, "epoch": 0.5810715070969633, "percentage": 11.62, "elapsed_time": "0:30:22", "remaining_time": "3:51:01", "throughput": 8785.33, "total_tokens": 16013248} +{"current_steps": 23790, "total_steps": 204665, "loss": 0.126, "lr": 1.998395340092526e-06, "epoch": 0.5811936579288105, "percentage": 11.62, "elapsed_time": "0:30:23", "remaining_time": "3:51:00", "throughput": 8785.37, "total_tokens": 16016320} +{"current_steps": 23795, "total_steps": 204665, "loss": 0.0767, "lr": 1.998390507351413e-06, "epoch": 0.5813158087606577, "percentage": 11.63, "elapsed_time": "0:30:23", "remaining_time": "3:51:00", "throughput": 8785.67, "total_tokens": 16019968} +{"current_steps": 23800, "total_steps": 204665, "loss": 0.0845, "lr": 1.9983856673497357e-06, "epoch": 0.5814379595925048, "percentage": 11.63, "elapsed_time": "0:30:23", "remaining_time": "3:50:59", "throughput": 8785.79, "total_tokens": 16023232} +{"current_steps": 23805, "total_steps": 204665, "loss": 0.0591, "lr": 1.9983808200875295e-06, "epoch": 0.581560110424352, "percentage": 11.63, "elapsed_time": "0:30:24", "remaining_time": "3:50:58", "throughput": 8785.83, "total_tokens": 16026304} +{"current_steps": 23810, "total_steps": 204665, "loss": 0.1458, "lr": 1.9983759655648293e-06, "epoch": 0.5816822612561992, "percentage": 11.63, "elapsed_time": "0:30:24", "remaining_time": "3:50:58", "throughput": 8786.06, "total_tokens": 16029824} +{"current_steps": 23815, "total_steps": 204665, "loss": 0.0809, "lr": 1.9983711037816705e-06, "epoch": 0.5818044120880463, "percentage": 11.64, "elapsed_time": "0:30:24", "remaining_time": "3:50:57", "throughput": 8786.67, "total_tokens": 16034176} +{"current_steps": 23820, "total_steps": 204665, "loss": 0.095, "lr": 1.9983662347380883e-06, "epoch": 0.5819265629198935, "percentage": 11.64, "elapsed_time": "0:30:25", "remaining_time": "3:50:57", "throughput": 8786.97, "total_tokens": 16037824} +{"current_steps": 23825, "total_steps": 204665, "loss": 0.1983, "lr": 1.9983613584341184e-06, "epoch": 0.5820487137517406, "percentage": 11.64, "elapsed_time": "0:30:25", "remaining_time": "3:50:56", "throughput": 8787.03, "total_tokens": 16040960} +{"current_steps": 23830, "total_steps": 204665, "loss": 0.0863, "lr": 1.998356474869796e-06, "epoch": 0.5821708645835878, "percentage": 11.64, "elapsed_time": "0:30:25", "remaining_time": "3:50:55", "throughput": 8787.33, "total_tokens": 16044608} +{"current_steps": 23835, "total_steps": 204665, "loss": 0.1203, "lr": 1.9983515840451574e-06, "epoch": 0.582293015415435, "percentage": 11.65, "elapsed_time": "0:30:26", "remaining_time": "3:50:55", "throughput": 8787.6, "total_tokens": 16048192} +{"current_steps": 23840, "total_steps": 204665, "loss": 0.121, "lr": 1.998346685960237e-06, "epoch": 0.5824151662472822, "percentage": 11.65, "elapsed_time": "0:30:26", "remaining_time": "3:50:54", "throughput": 8787.72, "total_tokens": 16051456} +{"current_steps": 23845, "total_steps": 204665, "loss": 0.0554, "lr": 1.9983417806150716e-06, "epoch": 0.5825373170791293, "percentage": 11.65, "elapsed_time": "0:30:26", "remaining_time": "3:50:53", "throughput": 8787.96, "total_tokens": 16054976} +{"current_steps": 23850, "total_steps": 204665, "loss": 0.0686, "lr": 1.998336868009696e-06, "epoch": 0.5826594679109764, "percentage": 11.65, "elapsed_time": "0:30:27", "remaining_time": "3:50:53", "throughput": 8788.08, "total_tokens": 16058240} +{"current_steps": 23855, "total_steps": 204665, "loss": 0.1335, "lr": 1.998331948144146e-06, "epoch": 0.5827816187428236, "percentage": 11.66, "elapsed_time": "0:30:27", "remaining_time": "3:50:52", "throughput": 8788.11, "total_tokens": 16061312} +{"current_steps": 23860, "total_steps": 204665, "loss": 0.0715, "lr": 1.9983270210184573e-06, "epoch": 0.5829037695746708, "percentage": 11.66, "elapsed_time": "0:30:27", "remaining_time": "3:50:51", "throughput": 8788.32, "total_tokens": 16064768} +{"current_steps": 23865, "total_steps": 204665, "loss": 0.082, "lr": 1.998322086632666e-06, "epoch": 0.583025920406518, "percentage": 11.66, "elapsed_time": "0:30:28", "remaining_time": "3:50:51", "throughput": 8788.6, "total_tokens": 16068416} +{"current_steps": 23870, "total_steps": 204665, "loss": 0.2176, "lr": 1.9983171449868086e-06, "epoch": 0.5831480712383651, "percentage": 11.66, "elapsed_time": "0:30:28", "remaining_time": "3:50:50", "throughput": 8788.64, "total_tokens": 16071488} +{"current_steps": 23875, "total_steps": 204665, "loss": 0.1019, "lr": 1.9983121960809198e-06, "epoch": 0.5832702220702123, "percentage": 11.67, "elapsed_time": "0:30:29", "remaining_time": "3:50:49", "throughput": 8788.97, "total_tokens": 16075200} +{"current_steps": 23880, "total_steps": 204665, "loss": 0.0461, "lr": 1.9983072399150367e-06, "epoch": 0.5833923729020595, "percentage": 11.67, "elapsed_time": "0:30:29", "remaining_time": "3:50:49", "throughput": 8789.16, "total_tokens": 16078656} +{"current_steps": 23885, "total_steps": 204665, "loss": 0.119, "lr": 1.9983022764891943e-06, "epoch": 0.5835145237339067, "percentage": 11.67, "elapsed_time": "0:30:29", "remaining_time": "3:50:48", "throughput": 8789.37, "total_tokens": 16082112} +{"current_steps": 23890, "total_steps": 204665, "loss": 0.1487, "lr": 1.9982973058034297e-06, "epoch": 0.5836366745657537, "percentage": 11.67, "elapsed_time": "0:30:30", "remaining_time": "3:50:48", "throughput": 8789.48, "total_tokens": 16085376} +{"current_steps": 23895, "total_steps": 204665, "loss": 0.1018, "lr": 1.998292327857778e-06, "epoch": 0.5837588253976009, "percentage": 11.68, "elapsed_time": "0:30:30", "remaining_time": "3:50:47", "throughput": 8789.58, "total_tokens": 16088576} +{"current_steps": 23900, "total_steps": 204665, "loss": 0.0956, "lr": 1.998287342652277e-06, "epoch": 0.5838809762294481, "percentage": 11.68, "elapsed_time": "0:30:30", "remaining_time": "3:50:46", "throughput": 8789.73, "total_tokens": 16091904} +{"current_steps": 23905, "total_steps": 204665, "loss": 0.1887, "lr": 1.998282350186961e-06, "epoch": 0.5840031270612953, "percentage": 11.68, "elapsed_time": "0:30:31", "remaining_time": "3:50:46", "throughput": 8789.74, "total_tokens": 16094912} +{"current_steps": 23910, "total_steps": 204665, "loss": 0.0914, "lr": 1.998277350461868e-06, "epoch": 0.5841252778931425, "percentage": 11.68, "elapsed_time": "0:30:31", "remaining_time": "3:50:45", "throughput": 8789.72, "total_tokens": 16097856} +{"current_steps": 23915, "total_steps": 204665, "loss": 0.1373, "lr": 1.998272343477033e-06, "epoch": 0.5842474287249896, "percentage": 11.68, "elapsed_time": "0:30:31", "remaining_time": "3:50:44", "throughput": 8789.99, "total_tokens": 16101632} +{"current_steps": 23920, "total_steps": 204665, "loss": 0.193, "lr": 1.998267329232493e-06, "epoch": 0.5843695795568368, "percentage": 11.69, "elapsed_time": "0:30:32", "remaining_time": "3:50:44", "throughput": 8790.03, "total_tokens": 16104704} +{"current_steps": 23925, "total_steps": 204665, "loss": 0.1243, "lr": 1.9982623077282846e-06, "epoch": 0.584491730388684, "percentage": 11.69, "elapsed_time": "0:30:32", "remaining_time": "3:50:43", "throughput": 8790.15, "total_tokens": 16107968} +{"current_steps": 23930, "total_steps": 204665, "loss": 0.1061, "lr": 1.9982572789644442e-06, "epoch": 0.5846138812205312, "percentage": 11.69, "elapsed_time": "0:30:32", "remaining_time": "3:50:42", "throughput": 8790.39, "total_tokens": 16111488} +{"current_steps": 23935, "total_steps": 204665, "loss": 0.0316, "lr": 1.9982522429410085e-06, "epoch": 0.5847360320523782, "percentage": 11.69, "elapsed_time": "0:30:33", "remaining_time": "3:50:42", "throughput": 8790.64, "total_tokens": 16115136} +{"current_steps": 23940, "total_steps": 204665, "loss": 0.0877, "lr": 1.998247199658014e-06, "epoch": 0.5848581828842254, "percentage": 11.7, "elapsed_time": "0:30:33", "remaining_time": "3:50:41", "throughput": 8790.97, "total_tokens": 16118848} +{"current_steps": 23945, "total_steps": 204665, "loss": 0.0641, "lr": 1.9982421491154973e-06, "epoch": 0.5849803337160726, "percentage": 11.7, "elapsed_time": "0:30:33", "remaining_time": "3:50:41", "throughput": 8791.23, "total_tokens": 16122432} +{"current_steps": 23950, "total_steps": 204665, "loss": 0.1244, "lr": 1.998237091313495e-06, "epoch": 0.5851024845479198, "percentage": 11.7, "elapsed_time": "0:30:34", "remaining_time": "3:50:40", "throughput": 8791.43, "total_tokens": 16125888} +{"current_steps": 23955, "total_steps": 204665, "loss": 0.1109, "lr": 1.9982320262520445e-06, "epoch": 0.585224635379767, "percentage": 11.7, "elapsed_time": "0:30:34", "remaining_time": "3:50:39", "throughput": 8791.65, "total_tokens": 16129408} +{"current_steps": 23960, "total_steps": 204665, "loss": 0.0638, "lr": 1.998226953931182e-06, "epoch": 0.5853467862116141, "percentage": 11.71, "elapsed_time": "0:30:34", "remaining_time": "3:50:39", "throughput": 8792.08, "total_tokens": 16133376} +{"current_steps": 23965, "total_steps": 204665, "loss": 0.0532, "lr": 1.9982218743509445e-06, "epoch": 0.5854689370434613, "percentage": 11.71, "elapsed_time": "0:30:35", "remaining_time": "3:50:38", "throughput": 8792.21, "total_tokens": 16136640} +{"current_steps": 23970, "total_steps": 204665, "loss": 0.2682, "lr": 1.9982167875113692e-06, "epoch": 0.5855910878753084, "percentage": 11.71, "elapsed_time": "0:30:35", "remaining_time": "3:50:38", "throughput": 8792.41, "total_tokens": 16140096} +{"current_steps": 23975, "total_steps": 204665, "loss": 0.0938, "lr": 1.9982116934124925e-06, "epoch": 0.5857132387071556, "percentage": 11.71, "elapsed_time": "0:30:36", "remaining_time": "3:50:37", "throughput": 8792.59, "total_tokens": 16143488} +{"current_steps": 23980, "total_steps": 204665, "loss": 0.2205, "lr": 1.9982065920543524e-06, "epoch": 0.5858353895390027, "percentage": 11.72, "elapsed_time": "0:30:36", "remaining_time": "3:50:36", "throughput": 8792.93, "total_tokens": 16147264} +{"current_steps": 23985, "total_steps": 204665, "loss": 0.0564, "lr": 1.9982014834369853e-06, "epoch": 0.5859575403708499, "percentage": 11.72, "elapsed_time": "0:30:36", "remaining_time": "3:50:36", "throughput": 8793.09, "total_tokens": 16150592} +{"current_steps": 23990, "total_steps": 204665, "loss": 0.1804, "lr": 1.9981963675604286e-06, "epoch": 0.5860796912026971, "percentage": 11.72, "elapsed_time": "0:30:37", "remaining_time": "3:50:35", "throughput": 8793.12, "total_tokens": 16153664} +{"current_steps": 23995, "total_steps": 204665, "loss": 0.0559, "lr": 1.9981912444247195e-06, "epoch": 0.5862018420345443, "percentage": 11.72, "elapsed_time": "0:30:37", "remaining_time": "3:50:34", "throughput": 8793.18, "total_tokens": 16156800} +{"current_steps": 24000, "total_steps": 204665, "loss": 0.1858, "lr": 1.9981861140298948e-06, "epoch": 0.5863239928663915, "percentage": 11.73, "elapsed_time": "0:30:37", "remaining_time": "3:50:34", "throughput": 8793.3, "total_tokens": 16160064} +{"current_steps": 24005, "total_steps": 204665, "loss": 0.1304, "lr": 1.9981809763759926e-06, "epoch": 0.5864461436982386, "percentage": 11.73, "elapsed_time": "0:30:38", "remaining_time": "3:50:33", "throughput": 8793.56, "total_tokens": 16163648} +{"current_steps": 24010, "total_steps": 204665, "loss": 0.2359, "lr": 1.9981758314630495e-06, "epoch": 0.5865682945300857, "percentage": 11.73, "elapsed_time": "0:30:38", "remaining_time": "3:50:32", "throughput": 8793.59, "total_tokens": 16166720} +{"current_steps": 24015, "total_steps": 204665, "loss": 0.116, "lr": 1.998170679291104e-06, "epoch": 0.5866904453619329, "percentage": 11.73, "elapsed_time": "0:30:38", "remaining_time": "3:50:32", "throughput": 8793.77, "total_tokens": 16170112} +{"current_steps": 24020, "total_steps": 204665, "loss": 0.1495, "lr": 1.9981655198601918e-06, "epoch": 0.5868125961937801, "percentage": 11.74, "elapsed_time": "0:30:39", "remaining_time": "3:50:31", "throughput": 8794.03, "total_tokens": 16173696} +{"current_steps": 24025, "total_steps": 204665, "loss": 0.0887, "lr": 1.9981603531703526e-06, "epoch": 0.5869347470256272, "percentage": 11.74, "elapsed_time": "0:30:39", "remaining_time": "3:50:31", "throughput": 8794.18, "total_tokens": 16177024} +{"current_steps": 24030, "total_steps": 204665, "loss": 0.0788, "lr": 1.998155179221622e-06, "epoch": 0.5870568978574744, "percentage": 11.74, "elapsed_time": "0:30:39", "remaining_time": "3:50:30", "throughput": 8794.42, "total_tokens": 16180608} +{"current_steps": 24035, "total_steps": 204665, "loss": 0.1616, "lr": 1.9981499980140386e-06, "epoch": 0.5871790486893216, "percentage": 11.74, "elapsed_time": "0:30:40", "remaining_time": "3:50:29", "throughput": 8794.65, "total_tokens": 16184128} +{"current_steps": 24040, "total_steps": 204665, "loss": 0.1168, "lr": 1.99814480954764e-06, "epoch": 0.5873011995211688, "percentage": 11.75, "elapsed_time": "0:30:40", "remaining_time": "3:50:29", "throughput": 8794.77, "total_tokens": 16187392} +{"current_steps": 24045, "total_steps": 204665, "loss": 0.1582, "lr": 1.998139613822464e-06, "epoch": 0.5874233503530158, "percentage": 11.75, "elapsed_time": "0:30:40", "remaining_time": "3:50:28", "throughput": 8795.0, "total_tokens": 16190912} +{"current_steps": 24050, "total_steps": 204665, "loss": 0.0451, "lr": 1.998134410838548e-06, "epoch": 0.587545501184863, "percentage": 11.75, "elapsed_time": "0:30:41", "remaining_time": "3:50:27", "throughput": 8795.11, "total_tokens": 16194176} +{"current_steps": 24055, "total_steps": 204665, "loss": 0.1327, "lr": 1.9981292005959305e-06, "epoch": 0.5876676520167102, "percentage": 11.75, "elapsed_time": "0:30:41", "remaining_time": "3:50:27", "throughput": 8795.44, "total_tokens": 16197952} +{"current_steps": 24060, "total_steps": 204665, "loss": 0.0967, "lr": 1.998123983094649e-06, "epoch": 0.5877898028485574, "percentage": 11.76, "elapsed_time": "0:30:41", "remaining_time": "3:50:26", "throughput": 8795.73, "total_tokens": 16201600} +{"current_steps": 24065, "total_steps": 204665, "loss": 0.0567, "lr": 1.998118758334741e-06, "epoch": 0.5879119536804046, "percentage": 11.76, "elapsed_time": "0:30:42", "remaining_time": "3:50:26", "throughput": 8796.09, "total_tokens": 16205440} +{"current_steps": 24070, "total_steps": 204665, "loss": 0.0901, "lr": 1.998113526316245e-06, "epoch": 0.5880341045122517, "percentage": 11.76, "elapsed_time": "0:30:42", "remaining_time": "3:50:25", "throughput": 8796.18, "total_tokens": 16208640} +{"current_steps": 24075, "total_steps": 204665, "loss": 0.1703, "lr": 1.998108287039199e-06, "epoch": 0.5881562553440989, "percentage": 11.76, "elapsed_time": "0:30:43", "remaining_time": "3:50:24", "throughput": 8796.33, "total_tokens": 16211968} +{"current_steps": 24080, "total_steps": 204665, "loss": 0.0712, "lr": 1.998103040503641e-06, "epoch": 0.5882784061759461, "percentage": 11.77, "elapsed_time": "0:30:43", "remaining_time": "3:50:24", "throughput": 8796.48, "total_tokens": 16215296} +{"current_steps": 24085, "total_steps": 204665, "loss": 0.0626, "lr": 1.9980977867096097e-06, "epoch": 0.5884005570077933, "percentage": 11.77, "elapsed_time": "0:30:43", "remaining_time": "3:50:23", "throughput": 8796.59, "total_tokens": 16218560} +{"current_steps": 24090, "total_steps": 204665, "loss": 0.1004, "lr": 1.9980925256571424e-06, "epoch": 0.5885227078396403, "percentage": 11.77, "elapsed_time": "0:30:44", "remaining_time": "3:50:22", "throughput": 8796.57, "total_tokens": 16221504} +{"current_steps": 24095, "total_steps": 204665, "loss": 0.1884, "lr": 1.9980872573462783e-06, "epoch": 0.5886448586714875, "percentage": 11.77, "elapsed_time": "0:30:44", "remaining_time": "3:50:22", "throughput": 8796.82, "total_tokens": 16225088} +{"current_steps": 24100, "total_steps": 204665, "loss": 0.0542, "lr": 1.9980819817770546e-06, "epoch": 0.5887670095033347, "percentage": 11.78, "elapsed_time": "0:30:44", "remaining_time": "3:50:21", "throughput": 8797.19, "total_tokens": 16228928} +{"current_steps": 24105, "total_steps": 204665, "loss": 0.0811, "lr": 1.9980766989495107e-06, "epoch": 0.5888891603351819, "percentage": 11.78, "elapsed_time": "0:30:45", "remaining_time": "3:50:21", "throughput": 8797.25, "total_tokens": 16232064} +{"current_steps": 24110, "total_steps": 204665, "loss": 0.1497, "lr": 1.9980714088636844e-06, "epoch": 0.5890113111670291, "percentage": 11.78, "elapsed_time": "0:30:45", "remaining_time": "3:50:20", "throughput": 8797.28, "total_tokens": 16235136} +{"current_steps": 24115, "total_steps": 204665, "loss": 0.0565, "lr": 1.9980661115196145e-06, "epoch": 0.5891334619988762, "percentage": 11.78, "elapsed_time": "0:30:45", "remaining_time": "3:50:19", "throughput": 8797.73, "total_tokens": 16239168} +{"current_steps": 24120, "total_steps": 204665, "loss": 0.14, "lr": 1.998060806917339e-06, "epoch": 0.5892556128307234, "percentage": 11.79, "elapsed_time": "0:30:46", "remaining_time": "3:50:19", "throughput": 8797.82, "total_tokens": 16242368} +{"current_steps": 24125, "total_steps": 204665, "loss": 0.0388, "lr": 1.9980554950568973e-06, "epoch": 0.5893777636625706, "percentage": 11.79, "elapsed_time": "0:30:46", "remaining_time": "3:50:18", "throughput": 8798.05, "total_tokens": 16245888} +{"current_steps": 24130, "total_steps": 204665, "loss": 0.1424, "lr": 1.9980501759383276e-06, "epoch": 0.5894999144944177, "percentage": 11.79, "elapsed_time": "0:30:46", "remaining_time": "3:50:17", "throughput": 8798.16, "total_tokens": 16249152} +{"current_steps": 24135, "total_steps": 204665, "loss": 0.0769, "lr": 1.9980448495616685e-06, "epoch": 0.5896220653262648, "percentage": 11.79, "elapsed_time": "0:30:47", "remaining_time": "3:50:17", "throughput": 8798.28, "total_tokens": 16252416} +{"current_steps": 24140, "total_steps": 204665, "loss": 0.206, "lr": 1.9980395159269586e-06, "epoch": 0.589744216158112, "percentage": 11.79, "elapsed_time": "0:30:47", "remaining_time": "3:50:16", "throughput": 8798.49, "total_tokens": 16255872} +{"current_steps": 24145, "total_steps": 204665, "loss": 0.121, "lr": 1.9980341750342372e-06, "epoch": 0.5898663669899592, "percentage": 11.8, "elapsed_time": "0:30:47", "remaining_time": "3:50:16", "throughput": 8798.96, "total_tokens": 16259968} +{"current_steps": 24150, "total_steps": 204665, "loss": 0.2076, "lr": 1.9980288268835425e-06, "epoch": 0.5899885178218064, "percentage": 11.8, "elapsed_time": "0:30:48", "remaining_time": "3:50:15", "throughput": 8799.19, "total_tokens": 16263488} +{"current_steps": 24155, "total_steps": 204665, "loss": 0.0236, "lr": 1.998023471474914e-06, "epoch": 0.5901106686536536, "percentage": 11.8, "elapsed_time": "0:30:48", "remaining_time": "3:50:14", "throughput": 8799.14, "total_tokens": 16266368} +{"current_steps": 24160, "total_steps": 204665, "loss": 0.1343, "lr": 1.9980181088083903e-06, "epoch": 0.5902328194855007, "percentage": 11.8, "elapsed_time": "0:30:48", "remaining_time": "3:50:14", "throughput": 8799.31, "total_tokens": 16269760} +{"current_steps": 24165, "total_steps": 204665, "loss": 0.1414, "lr": 1.9980127388840106e-06, "epoch": 0.5903549703173478, "percentage": 11.81, "elapsed_time": "0:30:49", "remaining_time": "3:50:13", "throughput": 8799.51, "total_tokens": 16273216} +{"current_steps": 24170, "total_steps": 204665, "loss": 0.0678, "lr": 1.9980073617018135e-06, "epoch": 0.590477121149195, "percentage": 11.81, "elapsed_time": "0:30:49", "remaining_time": "3:50:12", "throughput": 8799.56, "total_tokens": 16276288} +{"current_steps": 24175, "total_steps": 204665, "loss": 0.0568, "lr": 1.9980019772618387e-06, "epoch": 0.5905992719810422, "percentage": 11.81, "elapsed_time": "0:30:50", "remaining_time": "3:50:12", "throughput": 8799.82, "total_tokens": 16279872} +{"current_steps": 24180, "total_steps": 204665, "loss": 0.1065, "lr": 1.997996585564125e-06, "epoch": 0.5907214228128893, "percentage": 11.81, "elapsed_time": "0:30:50", "remaining_time": "3:50:11", "throughput": 8799.93, "total_tokens": 16283136} +{"current_steps": 24185, "total_steps": 204665, "loss": 0.1299, "lr": 1.997991186608712e-06, "epoch": 0.5908435736447365, "percentage": 11.82, "elapsed_time": "0:30:50", "remaining_time": "3:50:10", "throughput": 8800.19, "total_tokens": 16286720} +{"current_steps": 24190, "total_steps": 204665, "loss": 0.0945, "lr": 1.9979857803956383e-06, "epoch": 0.5909657244765837, "percentage": 11.82, "elapsed_time": "0:30:51", "remaining_time": "3:50:10", "throughput": 8800.22, "total_tokens": 16289792} +{"current_steps": 24195, "total_steps": 204665, "loss": 0.2258, "lr": 1.9979803669249434e-06, "epoch": 0.5910878753084309, "percentage": 11.82, "elapsed_time": "0:30:51", "remaining_time": "3:50:09", "throughput": 8800.4, "total_tokens": 16293184} +{"current_steps": 24200, "total_steps": 204665, "loss": 0.1589, "lr": 1.9979749461966672e-06, "epoch": 0.5912100261402781, "percentage": 11.82, "elapsed_time": "0:30:51", "remaining_time": "3:50:09", "throughput": 8800.51, "total_tokens": 16296448} +{"current_steps": 24205, "total_steps": 204665, "loss": 0.2786, "lr": 1.997969518210849e-06, "epoch": 0.5913321769721251, "percentage": 11.83, "elapsed_time": "0:30:52", "remaining_time": "3:50:08", "throughput": 8800.72, "total_tokens": 16299904} +{"current_steps": 24210, "total_steps": 204665, "loss": 0.0697, "lr": 1.9979640829675273e-06, "epoch": 0.5914543278039723, "percentage": 11.83, "elapsed_time": "0:30:52", "remaining_time": "3:50:07", "throughput": 8800.94, "total_tokens": 16303424} +{"current_steps": 24215, "total_steps": 204665, "loss": 0.0782, "lr": 1.997958640466743e-06, "epoch": 0.5915764786358195, "percentage": 11.83, "elapsed_time": "0:30:52", "remaining_time": "3:50:07", "throughput": 8801.0, "total_tokens": 16306560} +{"current_steps": 24220, "total_steps": 204665, "loss": 0.2103, "lr": 1.997953190708535e-06, "epoch": 0.5916986294676667, "percentage": 11.83, "elapsed_time": "0:30:53", "remaining_time": "3:50:06", "throughput": 8801.03, "total_tokens": 16309632} +{"current_steps": 24225, "total_steps": 204665, "loss": 0.0855, "lr": 1.9979477336929426e-06, "epoch": 0.5918207802995138, "percentage": 11.84, "elapsed_time": "0:30:53", "remaining_time": "3:50:05", "throughput": 8801.06, "total_tokens": 16312704} +{"current_steps": 24230, "total_steps": 204665, "loss": 0.0863, "lr": 1.9979422694200062e-06, "epoch": 0.591942931131361, "percentage": 11.84, "elapsed_time": "0:30:53", "remaining_time": "3:50:05", "throughput": 8801.26, "total_tokens": 16316160} +{"current_steps": 24235, "total_steps": 204665, "loss": 0.0656, "lr": 1.997936797889765e-06, "epoch": 0.5920650819632082, "percentage": 11.84, "elapsed_time": "0:30:54", "remaining_time": "3:50:04", "throughput": 8801.56, "total_tokens": 16319872} +{"current_steps": 24240, "total_steps": 204665, "loss": 0.0183, "lr": 1.997931319102259e-06, "epoch": 0.5921872327950554, "percentage": 11.84, "elapsed_time": "0:30:54", "remaining_time": "3:50:03", "throughput": 8801.52, "total_tokens": 16322752} +{"current_steps": 24245, "total_steps": 204665, "loss": 0.1014, "lr": 1.9979258330575283e-06, "epoch": 0.5923093836269026, "percentage": 11.85, "elapsed_time": "0:30:54", "remaining_time": "3:50:03", "throughput": 8801.7, "total_tokens": 16326208} +{"current_steps": 24250, "total_steps": 204665, "loss": 0.0631, "lr": 1.9979203397556124e-06, "epoch": 0.5924315344587496, "percentage": 11.85, "elapsed_time": "0:30:55", "remaining_time": "3:50:02", "throughput": 8801.9, "total_tokens": 16329664} +{"current_steps": 24255, "total_steps": 204665, "loss": 0.1876, "lr": 1.997914839196551e-06, "epoch": 0.5925536852905968, "percentage": 11.85, "elapsed_time": "0:30:55", "remaining_time": "3:50:01", "throughput": 8801.95, "total_tokens": 16332800} +{"current_steps": 24260, "total_steps": 204665, "loss": 0.1368, "lr": 1.997909331380385e-06, "epoch": 0.592675836122444, "percentage": 11.85, "elapsed_time": "0:30:55", "remaining_time": "3:50:01", "throughput": 8802.27, "total_tokens": 16336512} +{"current_steps": 24265, "total_steps": 204665, "loss": 0.0687, "lr": 1.997903816307154e-06, "epoch": 0.5927979869542912, "percentage": 11.86, "elapsed_time": "0:30:56", "remaining_time": "3:50:00", "throughput": 8802.55, "total_tokens": 16340160} +{"current_steps": 24270, "total_steps": 204665, "loss": 0.2328, "lr": 1.9978982939768975e-06, "epoch": 0.5929201377861383, "percentage": 11.86, "elapsed_time": "0:30:56", "remaining_time": "3:50:00", "throughput": 8802.76, "total_tokens": 16343680} +{"current_steps": 24275, "total_steps": 204665, "loss": 0.0459, "lr": 1.9978927643896567e-06, "epoch": 0.5930422886179855, "percentage": 11.86, "elapsed_time": "0:30:57", "remaining_time": "3:49:59", "throughput": 8802.98, "total_tokens": 16347200} +{"current_steps": 24280, "total_steps": 204665, "loss": 0.1105, "lr": 1.9978872275454713e-06, "epoch": 0.5931644394498327, "percentage": 11.86, "elapsed_time": "0:30:57", "remaining_time": "3:49:58", "throughput": 8803.01, "total_tokens": 16350272} +{"current_steps": 24285, "total_steps": 204665, "loss": 0.1195, "lr": 1.997881683444381e-06, "epoch": 0.5932865902816798, "percentage": 11.87, "elapsed_time": "0:30:57", "remaining_time": "3:49:58", "throughput": 8803.21, "total_tokens": 16353728} +{"current_steps": 24290, "total_steps": 204665, "loss": 0.1208, "lr": 1.997876132086427e-06, "epoch": 0.593408741113527, "percentage": 11.87, "elapsed_time": "0:30:58", "remaining_time": "3:49:57", "throughput": 8803.69, "total_tokens": 16357888} +{"current_steps": 24295, "total_steps": 204665, "loss": 0.1052, "lr": 1.99787057347165e-06, "epoch": 0.5935308919453741, "percentage": 11.87, "elapsed_time": "0:30:58", "remaining_time": "3:49:57", "throughput": 8803.86, "total_tokens": 16361280} +{"current_steps": 24300, "total_steps": 204665, "loss": 0.1275, "lr": 1.9978650076000887e-06, "epoch": 0.5936530427772213, "percentage": 11.87, "elapsed_time": "0:30:58", "remaining_time": "3:49:56", "throughput": 8804.3, "total_tokens": 16365312} +{"current_steps": 24305, "total_steps": 204665, "loss": 0.0785, "lr": 1.9978594344717855e-06, "epoch": 0.5937751936090685, "percentage": 11.88, "elapsed_time": "0:30:59", "remaining_time": "3:49:56", "throughput": 8804.52, "total_tokens": 16368832} +{"current_steps": 24310, "total_steps": 204665, "loss": 0.0691, "lr": 1.99785385408678e-06, "epoch": 0.5938973444409157, "percentage": 11.88, "elapsed_time": "0:30:59", "remaining_time": "3:49:55", "throughput": 8804.8, "total_tokens": 16372480} +{"current_steps": 24315, "total_steps": 204665, "loss": 0.1628, "lr": 1.9978482664451126e-06, "epoch": 0.5940194952727628, "percentage": 11.88, "elapsed_time": "0:30:59", "remaining_time": "3:49:54", "throughput": 8804.78, "total_tokens": 16375424} +{"current_steps": 24320, "total_steps": 204665, "loss": 0.1075, "lr": 1.997842671546824e-06, "epoch": 0.59414164610461, "percentage": 11.88, "elapsed_time": "0:31:00", "remaining_time": "3:49:54", "throughput": 8804.87, "total_tokens": 16378624} +{"current_steps": 24325, "total_steps": 204665, "loss": 0.0945, "lr": 1.997837069391956e-06, "epoch": 0.5942637969364571, "percentage": 11.89, "elapsed_time": "0:31:00", "remaining_time": "3:49:53", "throughput": 8805.28, "total_tokens": 16382592} +{"current_steps": 24330, "total_steps": 204665, "loss": 0.1419, "lr": 1.997831459980548e-06, "epoch": 0.5943859477683043, "percentage": 11.89, "elapsed_time": "0:31:00", "remaining_time": "3:49:53", "throughput": 8805.53, "total_tokens": 16386176} +{"current_steps": 24335, "total_steps": 204665, "loss": 0.1901, "lr": 1.997825843312641e-06, "epoch": 0.5945080986001514, "percentage": 11.89, "elapsed_time": "0:31:01", "remaining_time": "3:49:52", "throughput": 8805.62, "total_tokens": 16389376} +{"current_steps": 24340, "total_steps": 204665, "loss": 0.2088, "lr": 1.997820219388276e-06, "epoch": 0.5946302494319986, "percentage": 11.89, "elapsed_time": "0:31:01", "remaining_time": "3:49:51", "throughput": 8805.82, "total_tokens": 16392832} +{"current_steps": 24345, "total_steps": 204665, "loss": 0.0537, "lr": 1.997814588207494e-06, "epoch": 0.5947524002638458, "percentage": 11.9, "elapsed_time": "0:31:01", "remaining_time": "3:49:51", "throughput": 8806.13, "total_tokens": 16396544} +{"current_steps": 24350, "total_steps": 204665, "loss": 0.1235, "lr": 1.9978089497703366e-06, "epoch": 0.594874551095693, "percentage": 11.9, "elapsed_time": "0:31:02", "remaining_time": "3:49:50", "throughput": 8806.18, "total_tokens": 16399680} +{"current_steps": 24355, "total_steps": 204665, "loss": 0.1369, "lr": 1.9978033040768435e-06, "epoch": 0.5949967019275402, "percentage": 11.9, "elapsed_time": "0:31:02", "remaining_time": "3:49:49", "throughput": 8806.44, "total_tokens": 16403264} +{"current_steps": 24360, "total_steps": 204665, "loss": 0.1138, "lr": 1.9977976511270564e-06, "epoch": 0.5951188527593873, "percentage": 11.9, "elapsed_time": "0:31:02", "remaining_time": "3:49:49", "throughput": 8806.52, "total_tokens": 16406464} +{"current_steps": 24365, "total_steps": 204665, "loss": 0.1247, "lr": 1.9977919909210167e-06, "epoch": 0.5952410035912344, "percentage": 11.9, "elapsed_time": "0:31:03", "remaining_time": "3:49:48", "throughput": 8806.68, "total_tokens": 16409856} +{"current_steps": 24370, "total_steps": 204665, "loss": 0.0726, "lr": 1.997786323458765e-06, "epoch": 0.5953631544230816, "percentage": 11.91, "elapsed_time": "0:31:03", "remaining_time": "3:49:48", "throughput": 8806.79, "total_tokens": 16413120} +{"current_steps": 24375, "total_steps": 204665, "loss": 0.1003, "lr": 1.997780648740343e-06, "epoch": 0.5954853052549288, "percentage": 11.91, "elapsed_time": "0:31:04", "remaining_time": "3:49:47", "throughput": 8806.96, "total_tokens": 16416512} +{"current_steps": 24380, "total_steps": 204665, "loss": 0.1277, "lr": 1.997774966765792e-06, "epoch": 0.5956074560867759, "percentage": 11.91, "elapsed_time": "0:31:04", "remaining_time": "3:49:46", "throughput": 8807.5, "total_tokens": 16420736} +{"current_steps": 24385, "total_steps": 204665, "loss": 0.0907, "lr": 1.9977692775351525e-06, "epoch": 0.5957296069186231, "percentage": 11.91, "elapsed_time": "0:31:04", "remaining_time": "3:49:46", "throughput": 8807.78, "total_tokens": 16424384} +{"current_steps": 24390, "total_steps": 204665, "loss": 0.0685, "lr": 1.997763581048467e-06, "epoch": 0.5958517577504703, "percentage": 11.92, "elapsed_time": "0:31:05", "remaining_time": "3:49:45", "throughput": 8807.84, "total_tokens": 16427520} +{"current_steps": 24395, "total_steps": 204665, "loss": 0.0535, "lr": 1.997757877305776e-06, "epoch": 0.5959739085823175, "percentage": 11.92, "elapsed_time": "0:31:05", "remaining_time": "3:49:44", "throughput": 8808.01, "total_tokens": 16430912} +{"current_steps": 24400, "total_steps": 204665, "loss": 0.1696, "lr": 1.997752166307121e-06, "epoch": 0.5960960594141647, "percentage": 11.92, "elapsed_time": "0:31:05", "remaining_time": "3:49:44", "throughput": 8808.06, "total_tokens": 16434048} +{"current_steps": 24405, "total_steps": 204665, "loss": 0.1728, "lr": 1.9977464480525447e-06, "epoch": 0.5962182102460117, "percentage": 11.92, "elapsed_time": "0:31:06", "remaining_time": "3:49:43", "throughput": 8808.09, "total_tokens": 16437120} +{"current_steps": 24410, "total_steps": 204665, "loss": 0.0879, "lr": 1.997740722542087e-06, "epoch": 0.5963403610778589, "percentage": 11.93, "elapsed_time": "0:31:06", "remaining_time": "3:49:42", "throughput": 8808.14, "total_tokens": 16440256} +{"current_steps": 24415, "total_steps": 204665, "loss": 0.1947, "lr": 1.9977349897757913e-06, "epoch": 0.5964625119097061, "percentage": 11.93, "elapsed_time": "0:31:06", "remaining_time": "3:49:42", "throughput": 8808.25, "total_tokens": 16443520} +{"current_steps": 24420, "total_steps": 204665, "loss": 0.0704, "lr": 1.9977292497536976e-06, "epoch": 0.5965846627415533, "percentage": 11.93, "elapsed_time": "0:31:07", "remaining_time": "3:49:41", "throughput": 8808.34, "total_tokens": 16446720} +{"current_steps": 24425, "total_steps": 204665, "loss": 0.1163, "lr": 1.997723502475849e-06, "epoch": 0.5967068135734004, "percentage": 11.93, "elapsed_time": "0:31:07", "remaining_time": "3:49:41", "throughput": 8808.51, "total_tokens": 16450112} +{"current_steps": 24430, "total_steps": 204665, "loss": 0.0953, "lr": 1.9977177479422865e-06, "epoch": 0.5968289644052476, "percentage": 11.94, "elapsed_time": "0:31:07", "remaining_time": "3:49:40", "throughput": 8808.77, "total_tokens": 16453760} +{"current_steps": 24435, "total_steps": 204665, "loss": 0.1416, "lr": 1.997711986153052e-06, "epoch": 0.5969511152370948, "percentage": 11.94, "elapsed_time": "0:31:08", "remaining_time": "3:49:39", "throughput": 8808.95, "total_tokens": 16457280} +{"current_steps": 24440, "total_steps": 204665, "loss": 0.073, "lr": 1.997706217108188e-06, "epoch": 0.597073266068942, "percentage": 11.94, "elapsed_time": "0:31:08", "remaining_time": "3:49:39", "throughput": 8809.04, "total_tokens": 16460544} +{"current_steps": 24445, "total_steps": 204665, "loss": 0.1074, "lr": 1.997700440807736e-06, "epoch": 0.5971954169007891, "percentage": 11.94, "elapsed_time": "0:31:08", "remaining_time": "3:49:38", "throughput": 8809.04, "total_tokens": 16463552} +{"current_steps": 24450, "total_steps": 204665, "loss": 0.2375, "lr": 1.9976946572517377e-06, "epoch": 0.5973175677326362, "percentage": 11.95, "elapsed_time": "0:31:09", "remaining_time": "3:49:38", "throughput": 8809.25, "total_tokens": 16467008} +{"current_steps": 24455, "total_steps": 204665, "loss": 0.0157, "lr": 1.997688866440236e-06, "epoch": 0.5974397185644834, "percentage": 11.95, "elapsed_time": "0:31:09", "remaining_time": "3:49:37", "throughput": 8809.59, "total_tokens": 16470784} +{"current_steps": 24460, "total_steps": 204665, "loss": 0.0783, "lr": 1.997683068373272e-06, "epoch": 0.5975618693963306, "percentage": 11.95, "elapsed_time": "0:31:09", "remaining_time": "3:49:36", "throughput": 8809.61, "total_tokens": 16473856} +{"current_steps": 24465, "total_steps": 204665, "loss": 0.0781, "lr": 1.997677263050889e-06, "epoch": 0.5976840202281778, "percentage": 11.95, "elapsed_time": "0:31:10", "remaining_time": "3:49:36", "throughput": 8809.66, "total_tokens": 16476992} +{"current_steps": 24470, "total_steps": 204665, "loss": 0.1661, "lr": 1.997671450473128e-06, "epoch": 0.5978061710600249, "percentage": 11.96, "elapsed_time": "0:31:10", "remaining_time": "3:49:35", "throughput": 8809.69, "total_tokens": 16480064} +{"current_steps": 24475, "total_steps": 204665, "loss": 0.1997, "lr": 1.997665630640032e-06, "epoch": 0.5979283218918721, "percentage": 11.96, "elapsed_time": "0:31:11", "remaining_time": "3:49:34", "throughput": 8809.76, "total_tokens": 16483200} +{"current_steps": 24480, "total_steps": 204665, "loss": 0.1039, "lr": 1.9976598035516433e-06, "epoch": 0.5980504727237193, "percentage": 11.96, "elapsed_time": "0:31:11", "remaining_time": "3:49:34", "throughput": 8809.77, "total_tokens": 16486208} +{"current_steps": 24485, "total_steps": 204665, "loss": 0.1477, "lr": 1.997653969208004e-06, "epoch": 0.5981726235555664, "percentage": 11.96, "elapsed_time": "0:31:11", "remaining_time": "3:49:33", "throughput": 8809.97, "total_tokens": 16489664} +{"current_steps": 24490, "total_steps": 204665, "loss": 0.1914, "lr": 1.9976481276091572e-06, "epoch": 0.5982947743874136, "percentage": 11.97, "elapsed_time": "0:31:12", "remaining_time": "3:49:32", "throughput": 8810.19, "total_tokens": 16493184} +{"current_steps": 24495, "total_steps": 204665, "loss": 0.0836, "lr": 1.9976422787551443e-06, "epoch": 0.5984169252192607, "percentage": 11.97, "elapsed_time": "0:31:12", "remaining_time": "3:49:32", "throughput": 8810.29, "total_tokens": 16496448} +{"current_steps": 24500, "total_steps": 204665, "loss": 0.1352, "lr": 1.9976364226460087e-06, "epoch": 0.5985390760511079, "percentage": 11.97, "elapsed_time": "0:31:12", "remaining_time": "3:49:31", "throughput": 8810.38, "total_tokens": 16499648} +{"current_steps": 24505, "total_steps": 204665, "loss": 0.152, "lr": 1.9976305592817928e-06, "epoch": 0.5986612268829551, "percentage": 11.97, "elapsed_time": "0:31:13", "remaining_time": "3:49:30", "throughput": 8810.38, "total_tokens": 16502656} +{"current_steps": 24510, "total_steps": 204665, "loss": 0.1363, "lr": 1.997624688662539e-06, "epoch": 0.5987833777148023, "percentage": 11.98, "elapsed_time": "0:31:13", "remaining_time": "3:49:30", "throughput": 8810.41, "total_tokens": 16505728} +{"current_steps": 24515, "total_steps": 204665, "loss": 0.1303, "lr": 1.99761881078829e-06, "epoch": 0.5989055285466494, "percentage": 11.98, "elapsed_time": "0:31:13", "remaining_time": "3:49:29", "throughput": 8810.72, "total_tokens": 16509440} +{"current_steps": 24520, "total_steps": 204665, "loss": 0.1228, "lr": 1.9976129256590885e-06, "epoch": 0.5990276793784965, "percentage": 11.98, "elapsed_time": "0:31:14", "remaining_time": "3:49:28", "throughput": 8810.67, "total_tokens": 16512320} +{"current_steps": 24525, "total_steps": 204665, "loss": 0.0758, "lr": 1.997607033274978e-06, "epoch": 0.5991498302103437, "percentage": 11.98, "elapsed_time": "0:31:14", "remaining_time": "3:49:28", "throughput": 8810.72, "total_tokens": 16515456} +{"current_steps": 24530, "total_steps": 204665, "loss": 0.0758, "lr": 1.9976011336360005e-06, "epoch": 0.5992719810421909, "percentage": 11.99, "elapsed_time": "0:31:14", "remaining_time": "3:49:27", "throughput": 8810.8, "total_tokens": 16518656} +{"current_steps": 24535, "total_steps": 204665, "loss": 0.0999, "lr": 1.9975952267421995e-06, "epoch": 0.5993941318740381, "percentage": 11.99, "elapsed_time": "0:31:15", "remaining_time": "3:49:27", "throughput": 8810.93, "total_tokens": 16521984} +{"current_steps": 24540, "total_steps": 204665, "loss": 0.1037, "lr": 1.9975893125936176e-06, "epoch": 0.5995162827058852, "percentage": 11.99, "elapsed_time": "0:31:15", "remaining_time": "3:49:26", "throughput": 8811.1, "total_tokens": 16525376} +{"current_steps": 24545, "total_steps": 204665, "loss": 0.2703, "lr": 1.9975833911902975e-06, "epoch": 0.5996384335377324, "percentage": 11.99, "elapsed_time": "0:31:15", "remaining_time": "3:49:25", "throughput": 8811.15, "total_tokens": 16528512} +{"current_steps": 24550, "total_steps": 204665, "loss": 0.058, "lr": 1.997577462532283e-06, "epoch": 0.5997605843695796, "percentage": 12.0, "elapsed_time": "0:31:16", "remaining_time": "3:49:25", "throughput": 8811.3, "total_tokens": 16531840} +{"current_steps": 24555, "total_steps": 204665, "loss": 0.1103, "lr": 1.997571526619617e-06, "epoch": 0.5998827352014268, "percentage": 12.0, "elapsed_time": "0:31:16", "remaining_time": "3:49:24", "throughput": 8811.66, "total_tokens": 16535680} +{"current_steps": 24560, "total_steps": 204665, "loss": 0.0944, "lr": 1.9975655834523426e-06, "epoch": 0.6000048860332738, "percentage": 12.0, "elapsed_time": "0:31:16", "remaining_time": "3:49:23", "throughput": 8811.82, "total_tokens": 16539072} +{"current_steps": 24565, "total_steps": 204665, "loss": 0.1462, "lr": 1.9975596330305027e-06, "epoch": 0.600127036865121, "percentage": 12.0, "elapsed_time": "0:31:17", "remaining_time": "3:49:23", "throughput": 8811.98, "total_tokens": 16542464} +{"current_steps": 24570, "total_steps": 204665, "loss": 0.0323, "lr": 1.997553675354141e-06, "epoch": 0.6002491876969682, "percentage": 12.0, "elapsed_time": "0:31:17", "remaining_time": "3:49:22", "throughput": 8812.17, "total_tokens": 16545920} +{"current_steps": 24575, "total_steps": 204665, "loss": 0.0757, "lr": 1.9975477104233005e-06, "epoch": 0.6003713385288154, "percentage": 12.01, "elapsed_time": "0:31:17", "remaining_time": "3:49:22", "throughput": 8812.27, "total_tokens": 16549184} +{"current_steps": 24580, "total_steps": 204665, "loss": 0.1313, "lr": 1.9975417382380247e-06, "epoch": 0.6004934893606625, "percentage": 12.01, "elapsed_time": "0:31:18", "remaining_time": "3:49:21", "throughput": 8812.25, "total_tokens": 16552128} +{"current_steps": 24585, "total_steps": 204665, "loss": 0.1199, "lr": 1.997535758798357e-06, "epoch": 0.6006156401925097, "percentage": 12.01, "elapsed_time": "0:31:18", "remaining_time": "3:49:20", "throughput": 8812.51, "total_tokens": 16555712} +{"current_steps": 24590, "total_steps": 204665, "loss": 0.1158, "lr": 1.9975297721043413e-06, "epoch": 0.6007377910243569, "percentage": 12.01, "elapsed_time": "0:31:19", "remaining_time": "3:49:20", "throughput": 8812.67, "total_tokens": 16559104} +{"current_steps": 24595, "total_steps": 204665, "loss": 0.0862, "lr": 1.9975237781560205e-06, "epoch": 0.6008599418562041, "percentage": 12.02, "elapsed_time": "0:31:19", "remaining_time": "3:49:19", "throughput": 8813.0, "total_tokens": 16562880} +{"current_steps": 24600, "total_steps": 204665, "loss": 0.1336, "lr": 1.997517776953439e-06, "epoch": 0.6009820926880513, "percentage": 12.02, "elapsed_time": "0:31:19", "remaining_time": "3:49:18", "throughput": 8813.09, "total_tokens": 16566080} +{"current_steps": 24605, "total_steps": 204665, "loss": 0.1872, "lr": 1.9975117684966394e-06, "epoch": 0.6011042435198983, "percentage": 12.02, "elapsed_time": "0:31:20", "remaining_time": "3:49:18", "throughput": 8813.39, "total_tokens": 16569792} +{"current_steps": 24610, "total_steps": 204665, "loss": 0.0663, "lr": 1.997505752785666e-06, "epoch": 0.6012263943517455, "percentage": 12.02, "elapsed_time": "0:31:20", "remaining_time": "3:49:17", "throughput": 8813.38, "total_tokens": 16572736} +{"current_steps": 24615, "total_steps": 204665, "loss": 0.1085, "lr": 1.9974997298205624e-06, "epoch": 0.6013485451835927, "percentage": 12.03, "elapsed_time": "0:31:20", "remaining_time": "3:49:17", "throughput": 8813.59, "total_tokens": 16576192} +{"current_steps": 24620, "total_steps": 204665, "loss": 0.2155, "lr": 1.9974936996013727e-06, "epoch": 0.6014706960154399, "percentage": 12.03, "elapsed_time": "0:31:21", "remaining_time": "3:49:16", "throughput": 8813.64, "total_tokens": 16579328} +{"current_steps": 24625, "total_steps": 204665, "loss": 0.0528, "lr": 1.9974876621281407e-06, "epoch": 0.601592846847287, "percentage": 12.03, "elapsed_time": "0:31:21", "remaining_time": "3:49:15", "throughput": 8813.71, "total_tokens": 16582464} +{"current_steps": 24630, "total_steps": 204665, "loss": 0.169, "lr": 1.9974816174009096e-06, "epoch": 0.6017149976791342, "percentage": 12.03, "elapsed_time": "0:31:21", "remaining_time": "3:49:15", "throughput": 8813.87, "total_tokens": 16585856} +{"current_steps": 24635, "total_steps": 204665, "loss": 0.126, "lr": 1.9974755654197244e-06, "epoch": 0.6018371485109814, "percentage": 12.04, "elapsed_time": "0:31:22", "remaining_time": "3:49:14", "throughput": 8814.18, "total_tokens": 16589568} +{"current_steps": 24640, "total_steps": 204665, "loss": 0.1085, "lr": 1.9974695061846283e-06, "epoch": 0.6019592993428285, "percentage": 12.04, "elapsed_time": "0:31:22", "remaining_time": "3:49:13", "throughput": 8814.41, "total_tokens": 16593088} +{"current_steps": 24645, "total_steps": 204665, "loss": 0.1587, "lr": 1.9974634396956656e-06, "epoch": 0.6020814501746757, "percentage": 12.04, "elapsed_time": "0:31:22", "remaining_time": "3:49:13", "throughput": 8814.95, "total_tokens": 16597376} +{"current_steps": 24650, "total_steps": 204665, "loss": 0.1954, "lr": 1.9974573659528805e-06, "epoch": 0.6022036010065228, "percentage": 12.04, "elapsed_time": "0:31:23", "remaining_time": "3:49:12", "throughput": 8815.08, "total_tokens": 16600704} +{"current_steps": 24655, "total_steps": 204665, "loss": 0.117, "lr": 1.9974512849563174e-06, "epoch": 0.60232575183837, "percentage": 12.05, "elapsed_time": "0:31:23", "remaining_time": "3:49:12", "throughput": 8815.39, "total_tokens": 16604416} +{"current_steps": 24660, "total_steps": 204665, "loss": 0.1157, "lr": 1.9974451967060204e-06, "epoch": 0.6024479026702172, "percentage": 12.05, "elapsed_time": "0:31:23", "remaining_time": "3:49:11", "throughput": 8815.5, "total_tokens": 16607680} +{"current_steps": 24665, "total_steps": 204665, "loss": 0.0785, "lr": 1.997439101202033e-06, "epoch": 0.6025700535020644, "percentage": 12.05, "elapsed_time": "0:31:24", "remaining_time": "3:49:10", "throughput": 8815.53, "total_tokens": 16610752} +{"current_steps": 24670, "total_steps": 204665, "loss": 0.1308, "lr": 1.9974329984444007e-06, "epoch": 0.6026922043339115, "percentage": 12.05, "elapsed_time": "0:31:24", "remaining_time": "3:49:10", "throughput": 8815.78, "total_tokens": 16614336} +{"current_steps": 24675, "total_steps": 204665, "loss": 0.0228, "lr": 1.997426888433167e-06, "epoch": 0.6028143551657587, "percentage": 12.06, "elapsed_time": "0:31:24", "remaining_time": "3:49:09", "throughput": 8815.99, "total_tokens": 16617856} +{"current_steps": 24680, "total_steps": 204665, "loss": 0.1213, "lr": 1.9974207711683772e-06, "epoch": 0.6029365059976058, "percentage": 12.06, "elapsed_time": "0:31:25", "remaining_time": "3:49:09", "throughput": 8816.1, "total_tokens": 16621120} +{"current_steps": 24685, "total_steps": 204665, "loss": 0.1865, "lr": 1.9974146466500746e-06, "epoch": 0.603058656829453, "percentage": 12.06, "elapsed_time": "0:31:25", "remaining_time": "3:49:08", "throughput": 8816.19, "total_tokens": 16624320} +{"current_steps": 24690, "total_steps": 204665, "loss": 0.202, "lr": 1.997408514878305e-06, "epoch": 0.6031808076613002, "percentage": 12.06, "elapsed_time": "0:31:26", "remaining_time": "3:49:07", "throughput": 8816.25, "total_tokens": 16627456} +{"current_steps": 24695, "total_steps": 204665, "loss": 0.2102, "lr": 1.997402375853112e-06, "epoch": 0.6033029584931473, "percentage": 12.07, "elapsed_time": "0:31:26", "remaining_time": "3:49:07", "throughput": 8816.41, "total_tokens": 16630848} +{"current_steps": 24700, "total_steps": 204665, "loss": 0.0908, "lr": 1.997396229574541e-06, "epoch": 0.6034251093249945, "percentage": 12.07, "elapsed_time": "0:31:26", "remaining_time": "3:49:06", "throughput": 8816.54, "total_tokens": 16634176} +{"current_steps": 24705, "total_steps": 204665, "loss": 0.1419, "lr": 1.9973900760426364e-06, "epoch": 0.6035472601568417, "percentage": 12.07, "elapsed_time": "0:31:27", "remaining_time": "3:49:05", "throughput": 8816.83, "total_tokens": 16637824} +{"current_steps": 24710, "total_steps": 204665, "loss": 0.104, "lr": 1.9973839152574425e-06, "epoch": 0.6036694109886889, "percentage": 12.07, "elapsed_time": "0:31:27", "remaining_time": "3:49:05", "throughput": 8817.04, "total_tokens": 16641344} +{"current_steps": 24715, "total_steps": 204665, "loss": 0.0662, "lr": 1.9973777472190046e-06, "epoch": 0.603791561820536, "percentage": 12.08, "elapsed_time": "0:31:27", "remaining_time": "3:49:04", "throughput": 8817.1, "total_tokens": 16644480} +{"current_steps": 24720, "total_steps": 204665, "loss": 0.0551, "lr": 1.9973715719273677e-06, "epoch": 0.6039137126523831, "percentage": 12.08, "elapsed_time": "0:31:28", "remaining_time": "3:49:04", "throughput": 8817.24, "total_tokens": 16647808} +{"current_steps": 24725, "total_steps": 204665, "loss": 0.0722, "lr": 1.9973653893825762e-06, "epoch": 0.6040358634842303, "percentage": 12.08, "elapsed_time": "0:31:28", "remaining_time": "3:49:03", "throughput": 8817.42, "total_tokens": 16651264} +{"current_steps": 24730, "total_steps": 204665, "loss": 0.1888, "lr": 1.9973591995846755e-06, "epoch": 0.6041580143160775, "percentage": 12.08, "elapsed_time": "0:31:28", "remaining_time": "3:49:02", "throughput": 8817.47, "total_tokens": 16654400} +{"current_steps": 24735, "total_steps": 204665, "loss": 0.066, "lr": 1.9973530025337105e-06, "epoch": 0.6042801651479247, "percentage": 12.09, "elapsed_time": "0:31:29", "remaining_time": "3:49:02", "throughput": 8817.65, "total_tokens": 16657856} +{"current_steps": 24740, "total_steps": 204665, "loss": 0.0497, "lr": 1.997346798229726e-06, "epoch": 0.6044023159797718, "percentage": 12.09, "elapsed_time": "0:31:29", "remaining_time": "3:49:01", "throughput": 8817.66, "total_tokens": 16660864} +{"current_steps": 24745, "total_steps": 204665, "loss": 0.1068, "lr": 1.9973405866727673e-06, "epoch": 0.604524466811619, "percentage": 12.09, "elapsed_time": "0:31:29", "remaining_time": "3:49:00", "throughput": 8817.84, "total_tokens": 16664320} +{"current_steps": 24750, "total_steps": 204665, "loss": 0.1451, "lr": 1.99733436786288e-06, "epoch": 0.6046466176434662, "percentage": 12.09, "elapsed_time": "0:31:30", "remaining_time": "3:49:00", "throughput": 8817.97, "total_tokens": 16667648} +{"current_steps": 24755, "total_steps": 204665, "loss": 0.0484, "lr": 1.997328141800109e-06, "epoch": 0.6047687684753134, "percentage": 12.1, "elapsed_time": "0:31:30", "remaining_time": "3:48:59", "throughput": 8818.3, "total_tokens": 16671424} +{"current_steps": 24760, "total_steps": 204665, "loss": 0.2128, "lr": 1.997321908484499e-06, "epoch": 0.6048909193071604, "percentage": 12.1, "elapsed_time": "0:31:30", "remaining_time": "3:48:59", "throughput": 8818.65, "total_tokens": 16675264} +{"current_steps": 24765, "total_steps": 204665, "loss": 0.2069, "lr": 1.997315667916096e-06, "epoch": 0.6050130701390076, "percentage": 12.1, "elapsed_time": "0:31:31", "remaining_time": "3:48:58", "throughput": 8818.96, "total_tokens": 16678976} +{"current_steps": 24770, "total_steps": 204665, "loss": 0.0918, "lr": 1.997309420094945e-06, "epoch": 0.6051352209708548, "percentage": 12.1, "elapsed_time": "0:31:31", "remaining_time": "3:48:58", "throughput": 8819.24, "total_tokens": 16682624} +{"current_steps": 24775, "total_steps": 204665, "loss": 0.1285, "lr": 1.9973031650210922e-06, "epoch": 0.605257371802702, "percentage": 12.11, "elapsed_time": "0:31:31", "remaining_time": "3:48:57", "throughput": 8819.38, "total_tokens": 16685952} +{"current_steps": 24780, "total_steps": 204665, "loss": 0.081, "lr": 1.997296902694582e-06, "epoch": 0.6053795226345492, "percentage": 12.11, "elapsed_time": "0:31:32", "remaining_time": "3:48:56", "throughput": 8819.87, "total_tokens": 16690112} +{"current_steps": 24785, "total_steps": 204665, "loss": 0.1219, "lr": 1.997290633115461e-06, "epoch": 0.6055016734663963, "percentage": 12.11, "elapsed_time": "0:31:32", "remaining_time": "3:48:56", "throughput": 8820.1, "total_tokens": 16693696} +{"current_steps": 24790, "total_steps": 204665, "loss": 0.1666, "lr": 1.9972843562837737e-06, "epoch": 0.6056238242982435, "percentage": 12.11, "elapsed_time": "0:31:33", "remaining_time": "3:48:55", "throughput": 8820.13, "total_tokens": 16696768} +{"current_steps": 24795, "total_steps": 204665, "loss": 0.0921, "lr": 1.997278072199567e-06, "epoch": 0.6057459751300907, "percentage": 12.11, "elapsed_time": "0:31:33", "remaining_time": "3:48:55", "throughput": 8820.23, "total_tokens": 16700032} +{"current_steps": 24800, "total_steps": 204665, "loss": 0.0925, "lr": 1.997271780862885e-06, "epoch": 0.6058681259619378, "percentage": 12.12, "elapsed_time": "0:31:33", "remaining_time": "3:48:54", "throughput": 8820.53, "total_tokens": 16703744} +{"current_steps": 24805, "total_steps": 204665, "loss": 0.085, "lr": 1.9972654822737753e-06, "epoch": 0.6059902767937849, "percentage": 12.12, "elapsed_time": "0:31:34", "remaining_time": "3:48:53", "throughput": 8820.74, "total_tokens": 16707264} +{"current_steps": 24810, "total_steps": 204665, "loss": 0.0473, "lr": 1.997259176432282e-06, "epoch": 0.6061124276256321, "percentage": 12.12, "elapsed_time": "0:31:34", "remaining_time": "3:48:53", "throughput": 8821.04, "total_tokens": 16710976} +{"current_steps": 24815, "total_steps": 204665, "loss": 0.0786, "lr": 1.997252863338452e-06, "epoch": 0.6062345784574793, "percentage": 12.12, "elapsed_time": "0:31:34", "remaining_time": "3:48:52", "throughput": 8821.29, "total_tokens": 16714560} +{"current_steps": 24820, "total_steps": 204665, "loss": 0.0628, "lr": 1.9972465429923315e-06, "epoch": 0.6063567292893265, "percentage": 12.13, "elapsed_time": "0:31:35", "remaining_time": "3:48:52", "throughput": 8821.54, "total_tokens": 16718144} +{"current_steps": 24825, "total_steps": 204665, "loss": 0.1397, "lr": 1.997240215393965e-06, "epoch": 0.6064788801211737, "percentage": 12.13, "elapsed_time": "0:31:35", "remaining_time": "3:48:51", "throughput": 8821.62, "total_tokens": 16721344} +{"current_steps": 24830, "total_steps": 204665, "loss": 0.1264, "lr": 1.9972338805434002e-06, "epoch": 0.6066010309530208, "percentage": 12.13, "elapsed_time": "0:31:35", "remaining_time": "3:48:50", "throughput": 8821.67, "total_tokens": 16724480} +{"current_steps": 24835, "total_steps": 204665, "loss": 0.0505, "lr": 1.9972275384406823e-06, "epoch": 0.606723181784868, "percentage": 12.13, "elapsed_time": "0:31:36", "remaining_time": "3:48:50", "throughput": 8821.81, "total_tokens": 16727808} +{"current_steps": 24840, "total_steps": 204665, "loss": 0.0473, "lr": 1.997221189085857e-06, "epoch": 0.6068453326167151, "percentage": 12.14, "elapsed_time": "0:31:36", "remaining_time": "3:48:49", "throughput": 8821.79, "total_tokens": 16730752} +{"current_steps": 24845, "total_steps": 204665, "loss": 0.1355, "lr": 1.9972148324789714e-06, "epoch": 0.6069674834485623, "percentage": 12.14, "elapsed_time": "0:31:36", "remaining_time": "3:48:48", "throughput": 8821.95, "total_tokens": 16734144} +{"current_steps": 24850, "total_steps": 204665, "loss": 0.2389, "lr": 1.9972084686200712e-06, "epoch": 0.6070896342804094, "percentage": 12.14, "elapsed_time": "0:31:37", "remaining_time": "3:48:48", "throughput": 8822.22, "total_tokens": 16737792} +{"current_steps": 24855, "total_steps": 204665, "loss": 0.2192, "lr": 1.997202097509203e-06, "epoch": 0.6072117851122566, "percentage": 12.14, "elapsed_time": "0:31:37", "remaining_time": "3:48:47", "throughput": 8822.27, "total_tokens": 16740928} +{"current_steps": 24860, "total_steps": 204665, "loss": 0.1178, "lr": 1.997195719146413e-06, "epoch": 0.6073339359441038, "percentage": 12.15, "elapsed_time": "0:31:37", "remaining_time": "3:48:47", "throughput": 8822.4, "total_tokens": 16744256} +{"current_steps": 24865, "total_steps": 204665, "loss": 0.1032, "lr": 1.9971893335317472e-06, "epoch": 0.607456086775951, "percentage": 12.15, "elapsed_time": "0:31:38", "remaining_time": "3:48:46", "throughput": 8822.61, "total_tokens": 16747776} +{"current_steps": 24870, "total_steps": 204665, "loss": 0.1418, "lr": 1.997182940665252e-06, "epoch": 0.607578237607798, "percentage": 12.15, "elapsed_time": "0:31:38", "remaining_time": "3:48:45", "throughput": 8822.77, "total_tokens": 16751168} +{"current_steps": 24875, "total_steps": 204665, "loss": 0.1081, "lr": 1.997176540546975e-06, "epoch": 0.6077003884396452, "percentage": 12.15, "elapsed_time": "0:31:38", "remaining_time": "3:48:45", "throughput": 8822.95, "total_tokens": 16754624} +{"current_steps": 24880, "total_steps": 204665, "loss": 0.0947, "lr": 1.997170133176962e-06, "epoch": 0.6078225392714924, "percentage": 12.16, "elapsed_time": "0:31:39", "remaining_time": "3:48:44", "throughput": 8823.11, "total_tokens": 16758016} +{"current_steps": 24885, "total_steps": 204665, "loss": 0.0705, "lr": 1.9971637185552593e-06, "epoch": 0.6079446901033396, "percentage": 12.16, "elapsed_time": "0:31:39", "remaining_time": "3:48:44", "throughput": 8823.25, "total_tokens": 16761344} +{"current_steps": 24890, "total_steps": 204665, "loss": 0.1168, "lr": 1.997157296681914e-06, "epoch": 0.6080668409351868, "percentage": 12.16, "elapsed_time": "0:31:40", "remaining_time": "3:48:43", "throughput": 8823.31, "total_tokens": 16764480} +{"current_steps": 24895, "total_steps": 204665, "loss": 0.1466, "lr": 1.997150867556972e-06, "epoch": 0.6081889917670339, "percentage": 12.16, "elapsed_time": "0:31:40", "remaining_time": "3:48:42", "throughput": 8823.5, "total_tokens": 16767936} +{"current_steps": 24900, "total_steps": 204665, "loss": 0.0522, "lr": 1.997144431180481e-06, "epoch": 0.6083111425988811, "percentage": 12.17, "elapsed_time": "0:31:40", "remaining_time": "3:48:42", "throughput": 8823.68, "total_tokens": 16771328} +{"current_steps": 24905, "total_steps": 204665, "loss": 0.1292, "lr": 1.9971379875524876e-06, "epoch": 0.6084332934307283, "percentage": 12.17, "elapsed_time": "0:31:41", "remaining_time": "3:48:41", "throughput": 8823.71, "total_tokens": 16774400} +{"current_steps": 24910, "total_steps": 204665, "loss": 0.13, "lr": 1.9971315366730388e-06, "epoch": 0.6085554442625755, "percentage": 12.17, "elapsed_time": "0:31:41", "remaining_time": "3:48:40", "throughput": 8823.76, "total_tokens": 16777536} +{"current_steps": 24915, "total_steps": 204665, "loss": 0.0675, "lr": 1.997125078542181e-06, "epoch": 0.6086775950944225, "percentage": 12.17, "elapsed_time": "0:31:41", "remaining_time": "3:48:40", "throughput": 8823.7, "total_tokens": 16780416} +{"current_steps": 24920, "total_steps": 204665, "loss": 0.0505, "lr": 1.9971186131599617e-06, "epoch": 0.6087997459262697, "percentage": 12.18, "elapsed_time": "0:31:42", "remaining_time": "3:48:39", "throughput": 8823.68, "total_tokens": 16783360} +{"current_steps": 24925, "total_steps": 204665, "loss": 0.1188, "lr": 1.9971121405264275e-06, "epoch": 0.6089218967581169, "percentage": 12.18, "elapsed_time": "0:31:42", "remaining_time": "3:48:38", "throughput": 8823.75, "total_tokens": 16786496} +{"current_steps": 24930, "total_steps": 204665, "loss": 0.0607, "lr": 1.997105660641625e-06, "epoch": 0.6090440475899641, "percentage": 12.18, "elapsed_time": "0:31:42", "remaining_time": "3:48:38", "throughput": 8823.77, "total_tokens": 16789504} +{"current_steps": 24935, "total_steps": 204665, "loss": 0.0866, "lr": 1.997099173505603e-06, "epoch": 0.6091661984218113, "percentage": 12.18, "elapsed_time": "0:31:43", "remaining_time": "3:48:37", "throughput": 8824.02, "total_tokens": 16793088} +{"current_steps": 24940, "total_steps": 204665, "loss": 0.0902, "lr": 1.997092679118407e-06, "epoch": 0.6092883492536584, "percentage": 12.19, "elapsed_time": "0:31:43", "remaining_time": "3:48:36", "throughput": 8824.35, "total_tokens": 16796864} +{"current_steps": 24945, "total_steps": 204665, "loss": 0.1309, "lr": 1.9970861774800848e-06, "epoch": 0.6094105000855056, "percentage": 12.19, "elapsed_time": "0:31:43", "remaining_time": "3:48:36", "throughput": 8824.52, "total_tokens": 16800256} +{"current_steps": 24950, "total_steps": 204665, "loss": 0.066, "lr": 1.9970796685906838e-06, "epoch": 0.6095326509173528, "percentage": 12.19, "elapsed_time": "0:31:44", "remaining_time": "3:48:35", "throughput": 8824.68, "total_tokens": 16803648} +{"current_steps": 24955, "total_steps": 204665, "loss": 0.0865, "lr": 1.9970731524502517e-06, "epoch": 0.6096548017492, "percentage": 12.19, "elapsed_time": "0:31:44", "remaining_time": "3:48:35", "throughput": 8824.92, "total_tokens": 16807232} +{"current_steps": 24960, "total_steps": 204665, "loss": 0.1032, "lr": 1.9970666290588348e-06, "epoch": 0.609776952581047, "percentage": 12.2, "elapsed_time": "0:31:44", "remaining_time": "3:48:34", "throughput": 8825.08, "total_tokens": 16810624} +{"current_steps": 24965, "total_steps": 204665, "loss": 0.2635, "lr": 1.9970600984164817e-06, "epoch": 0.6098991034128942, "percentage": 12.2, "elapsed_time": "0:31:45", "remaining_time": "3:48:33", "throughput": 8825.11, "total_tokens": 16813696} +{"current_steps": 24970, "total_steps": 204665, "loss": 0.137, "lr": 1.9970535605232394e-06, "epoch": 0.6100212542447414, "percentage": 12.2, "elapsed_time": "0:31:45", "remaining_time": "3:48:33", "throughput": 8825.21, "total_tokens": 16816960} +{"current_steps": 24975, "total_steps": 204665, "loss": 0.0676, "lr": 1.9970470153791553e-06, "epoch": 0.6101434050765886, "percentage": 12.2, "elapsed_time": "0:31:45", "remaining_time": "3:48:32", "throughput": 8825.26, "total_tokens": 16820096} +{"current_steps": 24980, "total_steps": 204665, "loss": 0.1488, "lr": 1.997040462984277e-06, "epoch": 0.6102655559084358, "percentage": 12.21, "elapsed_time": "0:31:46", "remaining_time": "3:48:31", "throughput": 8825.31, "total_tokens": 16823232} +{"current_steps": 24985, "total_steps": 204665, "loss": 0.0996, "lr": 1.997033903338652e-06, "epoch": 0.6103877067402829, "percentage": 12.21, "elapsed_time": "0:31:46", "remaining_time": "3:48:31", "throughput": 8825.34, "total_tokens": 16826304} +{"current_steps": 24990, "total_steps": 204665, "loss": 0.198, "lr": 1.9970273364423292e-06, "epoch": 0.61050985757213, "percentage": 12.21, "elapsed_time": "0:31:46", "remaining_time": "3:48:30", "throughput": 8825.53, "total_tokens": 16829760} +{"current_steps": 24995, "total_steps": 204665, "loss": 0.1193, "lr": 1.9970207622953547e-06, "epoch": 0.6106320084039772, "percentage": 12.21, "elapsed_time": "0:31:47", "remaining_time": "3:48:30", "throughput": 8825.55, "total_tokens": 16832832} +{"current_steps": 25000, "total_steps": 204665, "loss": 0.0615, "lr": 1.9970141808977773e-06, "epoch": 0.6107541592358244, "percentage": 12.22, "elapsed_time": "0:31:47", "remaining_time": "3:48:29", "throughput": 8825.79, "total_tokens": 16836416} +{"current_steps": 25005, "total_steps": 204665, "loss": 0.1696, "lr": 1.9970075922496444e-06, "epoch": 0.6108763100676715, "percentage": 12.22, "elapsed_time": "0:31:47", "remaining_time": "3:48:28", "throughput": 8825.87, "total_tokens": 16839616} +{"current_steps": 25010, "total_steps": 204665, "loss": 0.0412, "lr": 1.9970009963510044e-06, "epoch": 0.6109984608995187, "percentage": 12.22, "elapsed_time": "0:31:48", "remaining_time": "3:48:28", "throughput": 8826.02, "total_tokens": 16843008} +{"current_steps": 25015, "total_steps": 204665, "loss": 0.1899, "lr": 1.9969943932019047e-06, "epoch": 0.6111206117313659, "percentage": 12.22, "elapsed_time": "0:31:48", "remaining_time": "3:48:27", "throughput": 8826.21, "total_tokens": 16846464} +{"current_steps": 25020, "total_steps": 204665, "loss": 0.0507, "lr": 1.996987782802394e-06, "epoch": 0.6112427625632131, "percentage": 12.22, "elapsed_time": "0:31:49", "remaining_time": "3:48:27", "throughput": 8826.41, "total_tokens": 16849984} +{"current_steps": 25025, "total_steps": 204665, "loss": 0.0448, "lr": 1.9969811651525196e-06, "epoch": 0.6113649133950603, "percentage": 12.23, "elapsed_time": "0:31:49", "remaining_time": "3:48:26", "throughput": 8826.5, "total_tokens": 16853184} +{"current_steps": 25030, "total_steps": 204665, "loss": 0.1035, "lr": 1.9969745402523303e-06, "epoch": 0.6114870642269074, "percentage": 12.23, "elapsed_time": "0:31:49", "remaining_time": "3:48:25", "throughput": 8826.68, "total_tokens": 16856640} +{"current_steps": 25035, "total_steps": 204665, "loss": 0.0872, "lr": 1.9969679081018737e-06, "epoch": 0.6116092150587545, "percentage": 12.23, "elapsed_time": "0:31:50", "remaining_time": "3:48:25", "throughput": 8826.71, "total_tokens": 16859712} +{"current_steps": 25040, "total_steps": 204665, "loss": 0.1359, "lr": 1.9969612687011987e-06, "epoch": 0.6117313658906017, "percentage": 12.23, "elapsed_time": "0:31:50", "remaining_time": "3:48:24", "throughput": 8826.85, "total_tokens": 16863040} +{"current_steps": 25045, "total_steps": 204665, "loss": 0.205, "lr": 1.996954622050353e-06, "epoch": 0.6118535167224489, "percentage": 12.24, "elapsed_time": "0:31:50", "remaining_time": "3:48:23", "throughput": 8826.91, "total_tokens": 16866176} +{"current_steps": 25050, "total_steps": 204665, "loss": 0.0858, "lr": 1.996947968149385e-06, "epoch": 0.611975667554296, "percentage": 12.24, "elapsed_time": "0:31:51", "remaining_time": "3:48:23", "throughput": 8827.09, "total_tokens": 16869632} +{"current_steps": 25055, "total_steps": 204665, "loss": 0.0653, "lr": 1.9969413069983435e-06, "epoch": 0.6120978183861432, "percentage": 12.24, "elapsed_time": "0:31:51", "remaining_time": "3:48:22", "throughput": 8827.25, "total_tokens": 16873024} +{"current_steps": 25060, "total_steps": 204665, "loss": 0.14, "lr": 1.9969346385972764e-06, "epoch": 0.6122199692179904, "percentage": 12.24, "elapsed_time": "0:31:51", "remaining_time": "3:48:22", "throughput": 8827.54, "total_tokens": 16876736} +{"current_steps": 25065, "total_steps": 204665, "loss": 0.2184, "lr": 1.9969279629462327e-06, "epoch": 0.6123421200498376, "percentage": 12.25, "elapsed_time": "0:31:52", "remaining_time": "3:48:21", "throughput": 8827.71, "total_tokens": 16880128} +{"current_steps": 25070, "total_steps": 204665, "loss": 0.066, "lr": 1.9969212800452608e-06, "epoch": 0.6124642708816848, "percentage": 12.25, "elapsed_time": "0:31:52", "remaining_time": "3:48:20", "throughput": 8827.7, "total_tokens": 16883072} +{"current_steps": 25075, "total_steps": 204665, "loss": 0.0957, "lr": 1.996914589894409e-06, "epoch": 0.6125864217135318, "percentage": 12.25, "elapsed_time": "0:31:52", "remaining_time": "3:48:20", "throughput": 8827.87, "total_tokens": 16886464} +{"current_steps": 25080, "total_steps": 204665, "loss": 0.0604, "lr": 1.9969078924937263e-06, "epoch": 0.612708572545379, "percentage": 12.25, "elapsed_time": "0:31:53", "remaining_time": "3:48:19", "throughput": 8828.14, "total_tokens": 16890112} +{"current_steps": 25085, "total_steps": 204665, "loss": 0.1052, "lr": 1.9969011878432608e-06, "epoch": 0.6128307233772262, "percentage": 12.26, "elapsed_time": "0:31:53", "remaining_time": "3:48:18", "throughput": 8828.34, "total_tokens": 16893632} +{"current_steps": 25090, "total_steps": 204665, "loss": 0.1961, "lr": 1.996894475943062e-06, "epoch": 0.6129528742090734, "percentage": 12.26, "elapsed_time": "0:31:53", "remaining_time": "3:48:18", "throughput": 8828.42, "total_tokens": 16896832} +{"current_steps": 25095, "total_steps": 204665, "loss": 0.237, "lr": 1.996887756793179e-06, "epoch": 0.6130750250409205, "percentage": 12.26, "elapsed_time": "0:31:54", "remaining_time": "3:48:17", "throughput": 8828.56, "total_tokens": 16900224} +{"current_steps": 25100, "total_steps": 204665, "loss": 0.0974, "lr": 1.9968810303936593e-06, "epoch": 0.6131971758727677, "percentage": 12.26, "elapsed_time": "0:31:54", "remaining_time": "3:48:17", "throughput": 8828.7, "total_tokens": 16903552} +{"current_steps": 25105, "total_steps": 204665, "loss": 0.1542, "lr": 1.996874296744553e-06, "epoch": 0.6133193267046149, "percentage": 12.27, "elapsed_time": "0:31:54", "remaining_time": "3:48:16", "throughput": 8828.75, "total_tokens": 16906688} +{"current_steps": 25110, "total_steps": 204665, "loss": 0.0714, "lr": 1.9968675558459085e-06, "epoch": 0.613441477536462, "percentage": 12.27, "elapsed_time": "0:31:55", "remaining_time": "3:48:15", "throughput": 8828.78, "total_tokens": 16909760} +{"current_steps": 25115, "total_steps": 204665, "loss": 0.0904, "lr": 1.9968608076977753e-06, "epoch": 0.6135636283683091, "percentage": 12.27, "elapsed_time": "0:31:55", "remaining_time": "3:48:15", "throughput": 8828.92, "total_tokens": 16913088} +{"current_steps": 25120, "total_steps": 204665, "loss": 0.1161, "lr": 1.996854052300202e-06, "epoch": 0.6136857792001563, "percentage": 12.27, "elapsed_time": "0:31:55", "remaining_time": "3:48:14", "throughput": 8828.95, "total_tokens": 16916160} +{"current_steps": 25125, "total_steps": 204665, "loss": 0.034, "lr": 1.996847289653238e-06, "epoch": 0.6138079300320035, "percentage": 12.28, "elapsed_time": "0:31:56", "remaining_time": "3:48:13", "throughput": 8829.19, "total_tokens": 16919744} +{"current_steps": 25130, "total_steps": 204665, "loss": 0.1062, "lr": 1.996840519756932e-06, "epoch": 0.6139300808638507, "percentage": 12.28, "elapsed_time": "0:31:56", "remaining_time": "3:48:13", "throughput": 8829.4, "total_tokens": 16923264} +{"current_steps": 25135, "total_steps": 204665, "loss": 0.153, "lr": 1.996833742611334e-06, "epoch": 0.6140522316956979, "percentage": 12.28, "elapsed_time": "0:31:57", "remaining_time": "3:48:12", "throughput": 8829.4, "total_tokens": 16926272} +{"current_steps": 25140, "total_steps": 204665, "loss": 0.1932, "lr": 1.996826958216493e-06, "epoch": 0.614174382527545, "percentage": 12.28, "elapsed_time": "0:31:57", "remaining_time": "3:48:12", "throughput": 8829.58, "total_tokens": 16929728} +{"current_steps": 25145, "total_steps": 204665, "loss": 0.0886, "lr": 1.996820166572458e-06, "epoch": 0.6142965333593922, "percentage": 12.29, "elapsed_time": "0:31:57", "remaining_time": "3:48:11", "throughput": 8829.86, "total_tokens": 16933376} +{"current_steps": 25150, "total_steps": 204665, "loss": 0.0943, "lr": 1.996813367679279e-06, "epoch": 0.6144186841912394, "percentage": 12.29, "elapsed_time": "0:31:58", "remaining_time": "3:48:10", "throughput": 8829.9, "total_tokens": 16936448} +{"current_steps": 25155, "total_steps": 204665, "loss": 0.1077, "lr": 1.9968065615370046e-06, "epoch": 0.6145408350230865, "percentage": 12.29, "elapsed_time": "0:31:58", "remaining_time": "3:48:10", "throughput": 8830.11, "total_tokens": 16939968} +{"current_steps": 25160, "total_steps": 204665, "loss": 0.1043, "lr": 1.996799748145685e-06, "epoch": 0.6146629858549336, "percentage": 12.29, "elapsed_time": "0:31:58", "remaining_time": "3:48:09", "throughput": 8830.21, "total_tokens": 16943232} +{"current_steps": 25165, "total_steps": 204665, "loss": 0.1222, "lr": 1.9967929275053695e-06, "epoch": 0.6147851366867808, "percentage": 12.3, "elapsed_time": "0:31:59", "remaining_time": "3:48:08", "throughput": 8830.11, "total_tokens": 16945984} +{"current_steps": 25170, "total_steps": 204665, "loss": 0.2028, "lr": 1.996786099616108e-06, "epoch": 0.614907287518628, "percentage": 12.3, "elapsed_time": "0:31:59", "remaining_time": "3:48:08", "throughput": 8830.26, "total_tokens": 16949376} +{"current_steps": 25175, "total_steps": 204665, "loss": 0.1709, "lr": 1.9967792644779496e-06, "epoch": 0.6150294383504752, "percentage": 12.3, "elapsed_time": "0:31:59", "remaining_time": "3:48:07", "throughput": 8830.32, "total_tokens": 16952512} +{"current_steps": 25180, "total_steps": 204665, "loss": 0.154, "lr": 1.9967724220909444e-06, "epoch": 0.6151515891823224, "percentage": 12.3, "elapsed_time": "0:32:00", "remaining_time": "3:48:07", "throughput": 8830.45, "total_tokens": 16955840} +{"current_steps": 25185, "total_steps": 204665, "loss": 0.0944, "lr": 1.996765572455142e-06, "epoch": 0.6152737400141695, "percentage": 12.31, "elapsed_time": "0:32:00", "remaining_time": "3:48:06", "throughput": 8830.77, "total_tokens": 16959616} +{"current_steps": 25190, "total_steps": 204665, "loss": 0.147, "lr": 1.996758715570592e-06, "epoch": 0.6153958908460166, "percentage": 12.31, "elapsed_time": "0:32:00", "remaining_time": "3:48:05", "throughput": 8830.88, "total_tokens": 16962880} +{"current_steps": 25195, "total_steps": 204665, "loss": 0.1115, "lr": 1.9967518514373447e-06, "epoch": 0.6155180416778638, "percentage": 12.31, "elapsed_time": "0:32:01", "remaining_time": "3:48:05", "throughput": 8830.91, "total_tokens": 16965952} +{"current_steps": 25200, "total_steps": 204665, "loss": 0.0778, "lr": 1.9967449800554497e-06, "epoch": 0.615640192509711, "percentage": 12.31, "elapsed_time": "0:32:01", "remaining_time": "3:48:04", "throughput": 8831.04, "total_tokens": 16969280} +{"current_steps": 25205, "total_steps": 204665, "loss": 0.1822, "lr": 1.996738101424957e-06, "epoch": 0.6157623433415581, "percentage": 12.32, "elapsed_time": "0:32:01", "remaining_time": "3:48:03", "throughput": 8831.23, "total_tokens": 16972736} +{"current_steps": 25210, "total_steps": 204665, "loss": 0.1052, "lr": 1.9967312155459175e-06, "epoch": 0.6158844941734053, "percentage": 12.32, "elapsed_time": "0:32:02", "remaining_time": "3:48:03", "throughput": 8831.37, "total_tokens": 16976064} +{"current_steps": 25215, "total_steps": 204665, "loss": 0.1949, "lr": 1.99672432241838e-06, "epoch": 0.6160066450052525, "percentage": 12.32, "elapsed_time": "0:32:02", "remaining_time": "3:48:02", "throughput": 8831.47, "total_tokens": 16979328} +{"current_steps": 25220, "total_steps": 204665, "loss": 0.1043, "lr": 1.9967174220423954e-06, "epoch": 0.6161287958370997, "percentage": 12.32, "elapsed_time": "0:32:02", "remaining_time": "3:48:02", "throughput": 8831.53, "total_tokens": 16982464} +{"current_steps": 25225, "total_steps": 204665, "loss": 0.1102, "lr": 1.996710514418013e-06, "epoch": 0.6162509466689469, "percentage": 12.33, "elapsed_time": "0:32:03", "remaining_time": "3:48:01", "throughput": 8831.77, "total_tokens": 16986048} +{"current_steps": 25230, "total_steps": 204665, "loss": 0.134, "lr": 1.996703599545284e-06, "epoch": 0.6163730975007939, "percentage": 12.33, "elapsed_time": "0:32:03", "remaining_time": "3:48:00", "throughput": 8831.88, "total_tokens": 16989312} +{"current_steps": 25235, "total_steps": 204665, "loss": 0.0338, "lr": 1.996696677424259e-06, "epoch": 0.6164952483326411, "percentage": 12.33, "elapsed_time": "0:32:03", "remaining_time": "3:48:00", "throughput": 8832.2, "total_tokens": 16993088} +{"current_steps": 25240, "total_steps": 204665, "loss": 0.1011, "lr": 1.996689748054987e-06, "epoch": 0.6166173991644883, "percentage": 12.33, "elapsed_time": "0:32:04", "remaining_time": "3:47:59", "throughput": 8832.44, "total_tokens": 16996672} +{"current_steps": 25245, "total_steps": 204665, "loss": 0.1098, "lr": 1.996682811437519e-06, "epoch": 0.6167395499963355, "percentage": 12.33, "elapsed_time": "0:32:04", "remaining_time": "3:47:59", "throughput": 8832.63, "total_tokens": 17000128} +{"current_steps": 25250, "total_steps": 204665, "loss": 0.0585, "lr": 1.9966758675719057e-06, "epoch": 0.6168617008281826, "percentage": 12.34, "elapsed_time": "0:32:05", "remaining_time": "3:47:58", "throughput": 8832.87, "total_tokens": 17003712} +{"current_steps": 25255, "total_steps": 204665, "loss": 0.0604, "lr": 1.996668916458197e-06, "epoch": 0.6169838516600298, "percentage": 12.34, "elapsed_time": "0:32:05", "remaining_time": "3:47:58", "throughput": 8833.34, "total_tokens": 17007872} +{"current_steps": 25260, "total_steps": 204665, "loss": 0.0614, "lr": 1.9966619580964446e-06, "epoch": 0.617106002491877, "percentage": 12.34, "elapsed_time": "0:32:05", "remaining_time": "3:47:57", "throughput": 8833.55, "total_tokens": 17011392} +{"current_steps": 25265, "total_steps": 204665, "loss": 0.1322, "lr": 1.996654992486698e-06, "epoch": 0.6172281533237242, "percentage": 12.34, "elapsed_time": "0:32:06", "remaining_time": "3:47:56", "throughput": 8833.69, "total_tokens": 17014720} +{"current_steps": 25270, "total_steps": 204665, "loss": 0.0883, "lr": 1.9966480196290087e-06, "epoch": 0.6173503041555713, "percentage": 12.35, "elapsed_time": "0:32:06", "remaining_time": "3:47:56", "throughput": 8833.92, "total_tokens": 17018304} +{"current_steps": 25275, "total_steps": 204665, "loss": 0.1705, "lr": 1.996641039523426e-06, "epoch": 0.6174724549874184, "percentage": 12.35, "elapsed_time": "0:32:06", "remaining_time": "3:47:55", "throughput": 8834.16, "total_tokens": 17021888} +{"current_steps": 25280, "total_steps": 204665, "loss": 0.1127, "lr": 1.9966340521700024e-06, "epoch": 0.6175946058192656, "percentage": 12.35, "elapsed_time": "0:32:07", "remaining_time": "3:47:55", "throughput": 8834.32, "total_tokens": 17025280} +{"current_steps": 25285, "total_steps": 204665, "loss": 0.021, "lr": 1.9966270575687876e-06, "epoch": 0.6177167566511128, "percentage": 12.35, "elapsed_time": "0:32:07", "remaining_time": "3:47:54", "throughput": 8834.64, "total_tokens": 17029056} +{"current_steps": 25290, "total_steps": 204665, "loss": 0.0255, "lr": 1.996620055719833e-06, "epoch": 0.61783890748296, "percentage": 12.36, "elapsed_time": "0:32:07", "remaining_time": "3:47:53", "throughput": 8834.76, "total_tokens": 17032384} +{"current_steps": 25295, "total_steps": 204665, "loss": 0.081, "lr": 1.9966130466231886e-06, "epoch": 0.6179610583148071, "percentage": 12.36, "elapsed_time": "0:32:08", "remaining_time": "3:47:53", "throughput": 8834.76, "total_tokens": 17035392} +{"current_steps": 25300, "total_steps": 204665, "loss": 0.1271, "lr": 1.996606030278907e-06, "epoch": 0.6180832091466543, "percentage": 12.36, "elapsed_time": "0:32:08", "remaining_time": "3:47:52", "throughput": 8834.95, "total_tokens": 17038848} +{"current_steps": 25305, "total_steps": 204665, "loss": 0.1147, "lr": 1.9965990066870374e-06, "epoch": 0.6182053599785015, "percentage": 12.36, "elapsed_time": "0:32:08", "remaining_time": "3:47:52", "throughput": 8835.35, "total_tokens": 17042816} +{"current_steps": 25310, "total_steps": 204665, "loss": 0.0699, "lr": 1.9965919758476325e-06, "epoch": 0.6183275108103486, "percentage": 12.37, "elapsed_time": "0:32:09", "remaining_time": "3:47:51", "throughput": 8835.36, "total_tokens": 17045824} +{"current_steps": 25315, "total_steps": 204665, "loss": 0.1055, "lr": 1.9965849377607423e-06, "epoch": 0.6184496616421958, "percentage": 12.37, "elapsed_time": "0:32:09", "remaining_time": "3:47:50", "throughput": 8835.48, "total_tokens": 17049152} +{"current_steps": 25320, "total_steps": 204665, "loss": 0.1039, "lr": 1.9965778924264183e-06, "epoch": 0.6185718124740429, "percentage": 12.37, "elapsed_time": "0:32:09", "remaining_time": "3:47:50", "throughput": 8835.58, "total_tokens": 17052416} +{"current_steps": 25325, "total_steps": 204665, "loss": 0.0592, "lr": 1.996570839844712e-06, "epoch": 0.6186939633058901, "percentage": 12.37, "elapsed_time": "0:32:10", "remaining_time": "3:47:49", "throughput": 8835.78, "total_tokens": 17055872} +{"current_steps": 25330, "total_steps": 204665, "loss": 0.1786, "lr": 1.9965637800156747e-06, "epoch": 0.6188161141377373, "percentage": 12.38, "elapsed_time": "0:32:10", "remaining_time": "3:47:48", "throughput": 8835.78, "total_tokens": 17058880} +{"current_steps": 25335, "total_steps": 204665, "loss": 0.0409, "lr": 1.9965567129393576e-06, "epoch": 0.6189382649695845, "percentage": 12.38, "elapsed_time": "0:32:11", "remaining_time": "3:47:48", "throughput": 8835.93, "total_tokens": 17062208} +{"current_steps": 25340, "total_steps": 204665, "loss": 0.1413, "lr": 1.9965496386158117e-06, "epoch": 0.6190604158014316, "percentage": 12.38, "elapsed_time": "0:32:11", "remaining_time": "3:47:47", "throughput": 8836.21, "total_tokens": 17065920} +{"current_steps": 25345, "total_steps": 204665, "loss": 0.1079, "lr": 1.996542557045089e-06, "epoch": 0.6191825666332788, "percentage": 12.38, "elapsed_time": "0:32:11", "remaining_time": "3:47:47", "throughput": 8836.47, "total_tokens": 17069568} +{"current_steps": 25350, "total_steps": 204665, "loss": 0.2241, "lr": 1.9965354682272405e-06, "epoch": 0.6193047174651259, "percentage": 12.39, "elapsed_time": "0:32:12", "remaining_time": "3:47:46", "throughput": 8836.75, "total_tokens": 17073216} +{"current_steps": 25355, "total_steps": 204665, "loss": 0.1075, "lr": 1.9965283721623185e-06, "epoch": 0.6194268682969731, "percentage": 12.39, "elapsed_time": "0:32:12", "remaining_time": "3:47:46", "throughput": 8837.04, "total_tokens": 17076928} +{"current_steps": 25360, "total_steps": 204665, "loss": 0.0357, "lr": 1.9965212688503736e-06, "epoch": 0.6195490191288203, "percentage": 12.39, "elapsed_time": "0:32:12", "remaining_time": "3:47:45", "throughput": 8837.07, "total_tokens": 17080000} +{"current_steps": 25365, "total_steps": 204665, "loss": 0.0264, "lr": 1.9965141582914583e-06, "epoch": 0.6196711699606674, "percentage": 12.39, "elapsed_time": "0:32:13", "remaining_time": "3:47:44", "throughput": 8837.24, "total_tokens": 17083456} +{"current_steps": 25370, "total_steps": 204665, "loss": 0.1875, "lr": 1.996507040485624e-06, "epoch": 0.6197933207925146, "percentage": 12.4, "elapsed_time": "0:32:13", "remaining_time": "3:47:44", "throughput": 8837.7, "total_tokens": 17087616} +{"current_steps": 25375, "total_steps": 204665, "loss": 0.1642, "lr": 1.9964999154329224e-06, "epoch": 0.6199154716243618, "percentage": 12.4, "elapsed_time": "0:32:13", "remaining_time": "3:47:43", "throughput": 8837.72, "total_tokens": 17090688} +{"current_steps": 25380, "total_steps": 204665, "loss": 0.1116, "lr": 1.9964927831334056e-06, "epoch": 0.620037622456209, "percentage": 12.4, "elapsed_time": "0:32:14", "remaining_time": "3:47:43", "throughput": 8837.95, "total_tokens": 17094272} +{"current_steps": 25385, "total_steps": 204665, "loss": 0.1427, "lr": 1.996485643587125e-06, "epoch": 0.620159773288056, "percentage": 12.4, "elapsed_time": "0:32:14", "remaining_time": "3:47:42", "throughput": 8837.98, "total_tokens": 17097344} +{"current_steps": 25390, "total_steps": 204665, "loss": 0.1185, "lr": 1.996478496794133e-06, "epoch": 0.6202819241199032, "percentage": 12.41, "elapsed_time": "0:32:14", "remaining_time": "3:47:41", "throughput": 8838.3, "total_tokens": 17101120} +{"current_steps": 25395, "total_steps": 204665, "loss": 0.0828, "lr": 1.9964713427544813e-06, "epoch": 0.6204040749517504, "percentage": 12.41, "elapsed_time": "0:32:15", "remaining_time": "3:47:41", "throughput": 8838.33, "total_tokens": 17104192} +{"current_steps": 25400, "total_steps": 204665, "loss": 0.1144, "lr": 1.996464181468222e-06, "epoch": 0.6205262257835976, "percentage": 12.41, "elapsed_time": "0:32:15", "remaining_time": "3:47:40", "throughput": 8838.7, "total_tokens": 17108096} +{"current_steps": 25405, "total_steps": 204665, "loss": 0.2225, "lr": 1.9964570129354066e-06, "epoch": 0.6206483766154447, "percentage": 12.41, "elapsed_time": "0:32:15", "remaining_time": "3:47:40", "throughput": 8838.7, "total_tokens": 17111104} +{"current_steps": 25410, "total_steps": 204665, "loss": 0.0343, "lr": 1.9964498371560886e-06, "epoch": 0.6207705274472919, "percentage": 12.42, "elapsed_time": "0:32:16", "remaining_time": "3:47:39", "throughput": 8838.84, "total_tokens": 17114432} +{"current_steps": 25415, "total_steps": 204665, "loss": 0.1387, "lr": 1.9964426541303186e-06, "epoch": 0.6208926782791391, "percentage": 12.42, "elapsed_time": "0:32:16", "remaining_time": "3:47:38", "throughput": 8838.84, "total_tokens": 17117440} +{"current_steps": 25420, "total_steps": 204665, "loss": 0.1161, "lr": 1.9964354638581503e-06, "epoch": 0.6210148291109863, "percentage": 12.42, "elapsed_time": "0:32:16", "remaining_time": "3:47:38", "throughput": 8839.03, "total_tokens": 17120896} +{"current_steps": 25425, "total_steps": 204665, "loss": 0.0561, "lr": 1.996428266339635e-06, "epoch": 0.6211369799428335, "percentage": 12.42, "elapsed_time": "0:32:17", "remaining_time": "3:47:37", "throughput": 8839.25, "total_tokens": 17124416} +{"current_steps": 25430, "total_steps": 204665, "loss": 0.1095, "lr": 1.9964210615748255e-06, "epoch": 0.6212591307746805, "percentage": 12.43, "elapsed_time": "0:32:17", "remaining_time": "3:47:37", "throughput": 8839.39, "total_tokens": 17127808} +{"current_steps": 25435, "total_steps": 204665, "loss": 0.0779, "lr": 1.996413849563774e-06, "epoch": 0.6213812816065277, "percentage": 12.43, "elapsed_time": "0:32:18", "remaining_time": "3:47:36", "throughput": 8839.48, "total_tokens": 17131008} +{"current_steps": 25440, "total_steps": 204665, "loss": 0.0774, "lr": 1.9964066303065325e-06, "epoch": 0.6215034324383749, "percentage": 12.43, "elapsed_time": "0:32:18", "remaining_time": "3:47:35", "throughput": 8839.46, "total_tokens": 17133952} +{"current_steps": 25445, "total_steps": 204665, "loss": 0.0625, "lr": 1.9963994038031546e-06, "epoch": 0.6216255832702221, "percentage": 12.43, "elapsed_time": "0:32:18", "remaining_time": "3:47:35", "throughput": 8839.72, "total_tokens": 17137600} +{"current_steps": 25450, "total_steps": 204665, "loss": 0.2014, "lr": 1.996392170053692e-06, "epoch": 0.6217477341020692, "percentage": 12.43, "elapsed_time": "0:32:19", "remaining_time": "3:47:34", "throughput": 8839.83, "total_tokens": 17140864} +{"current_steps": 25455, "total_steps": 204665, "loss": 0.0841, "lr": 1.9963849290581974e-06, "epoch": 0.6218698849339164, "percentage": 12.44, "elapsed_time": "0:32:19", "remaining_time": "3:47:33", "throughput": 8839.99, "total_tokens": 17144256} +{"current_steps": 25460, "total_steps": 204665, "loss": 0.1522, "lr": 1.996377680816724e-06, "epoch": 0.6219920357657636, "percentage": 12.44, "elapsed_time": "0:32:19", "remaining_time": "3:47:33", "throughput": 8840.18, "total_tokens": 17147776} +{"current_steps": 25465, "total_steps": 204665, "loss": 0.1654, "lr": 1.9963704253293237e-06, "epoch": 0.6221141865976108, "percentage": 12.44, "elapsed_time": "0:32:20", "remaining_time": "3:47:32", "throughput": 8840.36, "total_tokens": 17151232} +{"current_steps": 25470, "total_steps": 204665, "loss": 0.1012, "lr": 1.99636316259605e-06, "epoch": 0.6222363374294579, "percentage": 12.44, "elapsed_time": "0:32:20", "remaining_time": "3:47:32", "throughput": 8840.52, "total_tokens": 17154624} +{"current_steps": 25475, "total_steps": 204665, "loss": 0.1666, "lr": 1.9963558926169552e-06, "epoch": 0.622358488261305, "percentage": 12.45, "elapsed_time": "0:32:20", "remaining_time": "3:47:31", "throughput": 8840.7, "total_tokens": 17158080} +{"current_steps": 25480, "total_steps": 204665, "loss": 0.0974, "lr": 1.9963486153920925e-06, "epoch": 0.6224806390931522, "percentage": 12.45, "elapsed_time": "0:32:21", "remaining_time": "3:47:31", "throughput": 8841.04, "total_tokens": 17161920} +{"current_steps": 25485, "total_steps": 204665, "loss": 0.1068, "lr": 1.9963413309215143e-06, "epoch": 0.6226027899249994, "percentage": 12.45, "elapsed_time": "0:32:21", "remaining_time": "3:47:30", "throughput": 8841.16, "total_tokens": 17165248} +{"current_steps": 25490, "total_steps": 204665, "loss": 0.1319, "lr": 1.9963340392052744e-06, "epoch": 0.6227249407568466, "percentage": 12.45, "elapsed_time": "0:32:21", "remaining_time": "3:47:29", "throughput": 8841.3, "total_tokens": 17168576} +{"current_steps": 25495, "total_steps": 204665, "loss": 0.1814, "lr": 1.9963267402434253e-06, "epoch": 0.6228470915886937, "percentage": 12.46, "elapsed_time": "0:32:22", "remaining_time": "3:47:29", "throughput": 8841.48, "total_tokens": 17172032} +{"current_steps": 25500, "total_steps": 204665, "loss": 0.0794, "lr": 1.99631943403602e-06, "epoch": 0.6229692424205409, "percentage": 12.46, "elapsed_time": "0:32:22", "remaining_time": "3:47:28", "throughput": 8841.69, "total_tokens": 17175552} +{"current_steps": 25505, "total_steps": 204665, "loss": 0.0511, "lr": 1.996312120583112e-06, "epoch": 0.623091393252388, "percentage": 12.46, "elapsed_time": "0:32:22", "remaining_time": "3:47:27", "throughput": 8841.83, "total_tokens": 17178880} +{"current_steps": 25510, "total_steps": 204665, "loss": 0.1083, "lr": 1.996304799884754e-06, "epoch": 0.6232135440842352, "percentage": 12.46, "elapsed_time": "0:32:23", "remaining_time": "3:47:27", "throughput": 8841.96, "total_tokens": 17182208} +{"current_steps": 25515, "total_steps": 204665, "loss": 0.0684, "lr": 1.996297471941e-06, "epoch": 0.6233356949160824, "percentage": 12.47, "elapsed_time": "0:32:23", "remaining_time": "3:47:26", "throughput": 8842.12, "total_tokens": 17185600} +{"current_steps": 25520, "total_steps": 204665, "loss": 0.071, "lr": 1.9962901367519023e-06, "epoch": 0.6234578457479295, "percentage": 12.47, "elapsed_time": "0:32:23", "remaining_time": "3:47:26", "throughput": 8842.36, "total_tokens": 17189184} +{"current_steps": 25525, "total_steps": 204665, "loss": 0.1737, "lr": 1.996282794317515e-06, "epoch": 0.6235799965797767, "percentage": 12.47, "elapsed_time": "0:32:24", "remaining_time": "3:47:25", "throughput": 8842.76, "total_tokens": 17193152} +{"current_steps": 25530, "total_steps": 204665, "loss": 0.1166, "lr": 1.996275444637891e-06, "epoch": 0.6237021474116239, "percentage": 12.47, "elapsed_time": "0:32:24", "remaining_time": "3:47:25", "throughput": 8842.96, "total_tokens": 17196672} +{"current_steps": 25535, "total_steps": 204665, "loss": 0.1957, "lr": 1.9962680877130842e-06, "epoch": 0.6238242982434711, "percentage": 12.48, "elapsed_time": "0:32:25", "remaining_time": "3:47:24", "throughput": 8843.09, "total_tokens": 17200000} +{"current_steps": 25540, "total_steps": 204665, "loss": 0.1698, "lr": 1.996260723543148e-06, "epoch": 0.6239464490753182, "percentage": 12.48, "elapsed_time": "0:32:25", "remaining_time": "3:47:23", "throughput": 8843.24, "total_tokens": 17203392} +{"current_steps": 25545, "total_steps": 204665, "loss": 0.1698, "lr": 1.996253352128136e-06, "epoch": 0.6240685999071653, "percentage": 12.48, "elapsed_time": "0:32:25", "remaining_time": "3:47:23", "throughput": 8843.32, "total_tokens": 17206592} +{"current_steps": 25550, "total_steps": 204665, "loss": 0.1594, "lr": 1.996245973468101e-06, "epoch": 0.6241907507390125, "percentage": 12.48, "elapsed_time": "0:32:26", "remaining_time": "3:47:22", "throughput": 8843.48, "total_tokens": 17210048} +{"current_steps": 25555, "total_steps": 204665, "loss": 0.1584, "lr": 1.9962385875630977e-06, "epoch": 0.6243129015708597, "percentage": 12.49, "elapsed_time": "0:32:26", "remaining_time": "3:47:22", "throughput": 8844.21, "total_tokens": 17214848} +{"current_steps": 25560, "total_steps": 204665, "loss": 0.0985, "lr": 1.9962311944131796e-06, "epoch": 0.6244350524027069, "percentage": 12.49, "elapsed_time": "0:32:26", "remaining_time": "3:47:21", "throughput": 8844.38, "total_tokens": 17218304} +{"current_steps": 25565, "total_steps": 204665, "loss": 0.0956, "lr": 1.9962237940184003e-06, "epoch": 0.624557203234554, "percentage": 12.49, "elapsed_time": "0:32:27", "remaining_time": "3:47:21", "throughput": 8844.52, "total_tokens": 17221632} +{"current_steps": 25570, "total_steps": 204665, "loss": 0.1397, "lr": 1.9962163863788134e-06, "epoch": 0.6246793540664012, "percentage": 12.49, "elapsed_time": "0:32:27", "remaining_time": "3:47:20", "throughput": 8844.72, "total_tokens": 17225152} +{"current_steps": 25575, "total_steps": 204665, "loss": 0.0717, "lr": 1.996208971494473e-06, "epoch": 0.6248015048982484, "percentage": 12.5, "elapsed_time": "0:32:27", "remaining_time": "3:47:20", "throughput": 8845.1, "total_tokens": 17229056} +{"current_steps": 25580, "total_steps": 204665, "loss": 0.0514, "lr": 1.9962015493654334e-06, "epoch": 0.6249236557300956, "percentage": 12.5, "elapsed_time": "0:32:28", "remaining_time": "3:47:19", "throughput": 8845.21, "total_tokens": 17232320} +{"current_steps": 25585, "total_steps": 204665, "loss": 0.1642, "lr": 1.9961941199917477e-06, "epoch": 0.6250458065619426, "percentage": 12.5, "elapsed_time": "0:32:28", "remaining_time": "3:47:18", "throughput": 8845.22, "total_tokens": 17235328} +{"current_steps": 25590, "total_steps": 204665, "loss": 0.0463, "lr": 1.9961866833734705e-06, "epoch": 0.6251679573937898, "percentage": 12.5, "elapsed_time": "0:32:28", "remaining_time": "3:47:18", "throughput": 8845.22, "total_tokens": 17238336} +{"current_steps": 25595, "total_steps": 204665, "loss": 0.1545, "lr": 1.996179239510656e-06, "epoch": 0.625290108225637, "percentage": 12.51, "elapsed_time": "0:32:29", "remaining_time": "3:47:17", "throughput": 8845.39, "total_tokens": 17241728} +{"current_steps": 25600, "total_steps": 204665, "loss": 0.1015, "lr": 1.996171788403358e-06, "epoch": 0.6254122590574842, "percentage": 12.51, "elapsed_time": "0:32:29", "remaining_time": "3:47:16", "throughput": 8845.62, "total_tokens": 17245248} +{"current_steps": 25605, "total_steps": 204665, "loss": 0.1408, "lr": 1.996164330051631e-06, "epoch": 0.6255344098893314, "percentage": 12.51, "elapsed_time": "0:32:29", "remaining_time": "3:47:16", "throughput": 8845.91, "total_tokens": 17248960} +{"current_steps": 25610, "total_steps": 204665, "loss": 0.0738, "lr": 1.996156864455529e-06, "epoch": 0.6256565607211785, "percentage": 12.51, "elapsed_time": "0:32:30", "remaining_time": "3:47:15", "throughput": 8846.06, "total_tokens": 17252352} +{"current_steps": 25615, "total_steps": 204665, "loss": 0.1187, "lr": 1.996149391615106e-06, "epoch": 0.6257787115530257, "percentage": 12.52, "elapsed_time": "0:32:30", "remaining_time": "3:47:14", "throughput": 8846.12, "total_tokens": 17255488} +{"current_steps": 25620, "total_steps": 204665, "loss": 0.0899, "lr": 1.996141911530417e-06, "epoch": 0.6259008623848729, "percentage": 12.52, "elapsed_time": "0:32:30", "remaining_time": "3:47:14", "throughput": 8846.23, "total_tokens": 17258752} +{"current_steps": 25625, "total_steps": 204665, "loss": 0.1487, "lr": 1.996134424201516e-06, "epoch": 0.62602301321672, "percentage": 12.52, "elapsed_time": "0:32:31", "remaining_time": "3:47:13", "throughput": 8846.44, "total_tokens": 17262272} +{"current_steps": 25630, "total_steps": 204665, "loss": 0.1187, "lr": 1.9961269296284574e-06, "epoch": 0.6261451640485671, "percentage": 12.52, "elapsed_time": "0:32:31", "remaining_time": "3:47:13", "throughput": 8846.59, "total_tokens": 17265600} +{"current_steps": 25635, "total_steps": 204665, "loss": 0.072, "lr": 1.9961194278112963e-06, "epoch": 0.6262673148804143, "percentage": 12.53, "elapsed_time": "0:32:32", "remaining_time": "3:47:12", "throughput": 8846.75, "total_tokens": 17268992} +{"current_steps": 25640, "total_steps": 204665, "loss": 0.1307, "lr": 1.9961119187500867e-06, "epoch": 0.6263894657122615, "percentage": 12.53, "elapsed_time": "0:32:32", "remaining_time": "3:47:12", "throughput": 8847.22, "total_tokens": 17273152} +{"current_steps": 25645, "total_steps": 204665, "loss": 0.0832, "lr": 1.996104402444883e-06, "epoch": 0.6265116165441087, "percentage": 12.53, "elapsed_time": "0:32:32", "remaining_time": "3:47:11", "throughput": 8847.36, "total_tokens": 17276480} +{"current_steps": 25650, "total_steps": 204665, "loss": 0.1212, "lr": 1.99609687889574e-06, "epoch": 0.6266337673759559, "percentage": 12.53, "elapsed_time": "0:32:33", "remaining_time": "3:47:10", "throughput": 8847.36, "total_tokens": 17279488} +{"current_steps": 25655, "total_steps": 204665, "loss": 0.1022, "lr": 1.996089348102713e-06, "epoch": 0.626755918207803, "percentage": 12.54, "elapsed_time": "0:32:33", "remaining_time": "3:47:10", "throughput": 8847.69, "total_tokens": 17283328} +{"current_steps": 25660, "total_steps": 204665, "loss": 0.0568, "lr": 1.996081810065856e-06, "epoch": 0.6268780690396502, "percentage": 12.54, "elapsed_time": "0:32:33", "remaining_time": "3:47:09", "throughput": 8847.91, "total_tokens": 17286848} +{"current_steps": 25665, "total_steps": 204665, "loss": 0.1768, "lr": 1.9960742647852246e-06, "epoch": 0.6270002198714973, "percentage": 12.54, "elapsed_time": "0:32:34", "remaining_time": "3:47:09", "throughput": 8848.41, "total_tokens": 17291072} +{"current_steps": 25670, "total_steps": 204665, "loss": 0.1485, "lr": 1.9960667122608732e-06, "epoch": 0.6271223707033445, "percentage": 12.54, "elapsed_time": "0:32:34", "remaining_time": "3:47:08", "throughput": 8848.47, "total_tokens": 17294208} +{"current_steps": 25675, "total_steps": 204665, "loss": 0.1853, "lr": 1.996059152492856e-06, "epoch": 0.6272445215351916, "percentage": 12.54, "elapsed_time": "0:32:34", "remaining_time": "3:47:07", "throughput": 8848.84, "total_tokens": 17298112} +{"current_steps": 25680, "total_steps": 204665, "loss": 0.1721, "lr": 1.9960515854812298e-06, "epoch": 0.6273666723670388, "percentage": 12.55, "elapsed_time": "0:32:35", "remaining_time": "3:47:07", "throughput": 8848.87, "total_tokens": 17301184} +{"current_steps": 25685, "total_steps": 204665, "loss": 0.1373, "lr": 1.996044011226048e-06, "epoch": 0.627488823198886, "percentage": 12.55, "elapsed_time": "0:32:35", "remaining_time": "3:47:06", "throughput": 8849.01, "total_tokens": 17304512} +{"current_steps": 25690, "total_steps": 204665, "loss": 0.0416, "lr": 1.996036429727366e-06, "epoch": 0.6276109740307332, "percentage": 12.55, "elapsed_time": "0:32:35", "remaining_time": "3:47:06", "throughput": 8849.0, "total_tokens": 17307520} +{"current_steps": 25695, "total_steps": 204665, "loss": 0.152, "lr": 1.99602884098524e-06, "epoch": 0.6277331248625803, "percentage": 12.55, "elapsed_time": "0:32:36", "remaining_time": "3:47:05", "throughput": 8849.24, "total_tokens": 17311104} +{"current_steps": 25700, "total_steps": 204665, "loss": 0.0961, "lr": 1.9960212449997238e-06, "epoch": 0.6278552756944275, "percentage": 12.56, "elapsed_time": "0:32:36", "remaining_time": "3:47:04", "throughput": 8849.43, "total_tokens": 17314560} +{"current_steps": 25705, "total_steps": 204665, "loss": 0.1072, "lr": 1.996013641770873e-06, "epoch": 0.6279774265262746, "percentage": 12.56, "elapsed_time": "0:32:36", "remaining_time": "3:47:04", "throughput": 8849.55, "total_tokens": 17317888} +{"current_steps": 25710, "total_steps": 204665, "loss": 0.0653, "lr": 1.9960060312987434e-06, "epoch": 0.6280995773581218, "percentage": 12.56, "elapsed_time": "0:32:37", "remaining_time": "3:47:03", "throughput": 8849.56, "total_tokens": 17320896} +{"current_steps": 25715, "total_steps": 204665, "loss": 0.1914, "lr": 1.9959984135833902e-06, "epoch": 0.628221728189969, "percentage": 12.56, "elapsed_time": "0:32:37", "remaining_time": "3:47:02", "throughput": 8849.66, "total_tokens": 17324160} +{"current_steps": 25720, "total_steps": 204665, "loss": 0.1041, "lr": 1.9959907886248686e-06, "epoch": 0.6283438790218161, "percentage": 12.57, "elapsed_time": "0:32:37", "remaining_time": "3:47:02", "throughput": 8849.66, "total_tokens": 17327168} +{"current_steps": 25725, "total_steps": 204665, "loss": 0.1079, "lr": 1.9959831564232335e-06, "epoch": 0.6284660298536633, "percentage": 12.57, "elapsed_time": "0:32:38", "remaining_time": "3:47:01", "throughput": 8849.84, "total_tokens": 17330624} +{"current_steps": 25730, "total_steps": 204665, "loss": 0.0786, "lr": 1.9959755169785417e-06, "epoch": 0.6285881806855105, "percentage": 12.57, "elapsed_time": "0:32:38", "remaining_time": "3:47:01", "throughput": 8849.84, "total_tokens": 17333632} +{"current_steps": 25735, "total_steps": 204665, "loss": 0.0607, "lr": 1.995967870290848e-06, "epoch": 0.6287103315173577, "percentage": 12.57, "elapsed_time": "0:32:38", "remaining_time": "3:47:00", "throughput": 8849.99, "total_tokens": 17337024} +{"current_steps": 25740, "total_steps": 204665, "loss": 0.0855, "lr": 1.9959602163602077e-06, "epoch": 0.6288324823492047, "percentage": 12.58, "elapsed_time": "0:32:39", "remaining_time": "3:46:59", "throughput": 8850.14, "total_tokens": 17340352} +{"current_steps": 25745, "total_steps": 204665, "loss": 0.2015, "lr": 1.9959525551866767e-06, "epoch": 0.6289546331810519, "percentage": 12.58, "elapsed_time": "0:32:39", "remaining_time": "3:46:59", "throughput": 8850.24, "total_tokens": 17343616} +{"current_steps": 25750, "total_steps": 204665, "loss": 0.1225, "lr": 1.9959448867703115e-06, "epoch": 0.6290767840128991, "percentage": 12.58, "elapsed_time": "0:32:40", "remaining_time": "3:46:58", "throughput": 8850.36, "total_tokens": 17346880} +{"current_steps": 25755, "total_steps": 204665, "loss": 0.1385, "lr": 1.995937211111167e-06, "epoch": 0.6291989348447463, "percentage": 12.58, "elapsed_time": "0:32:40", "remaining_time": "3:46:57", "throughput": 8850.43, "total_tokens": 17350080} +{"current_steps": 25760, "total_steps": 204665, "loss": 0.1036, "lr": 1.9959295282092987e-06, "epoch": 0.6293210856765935, "percentage": 12.59, "elapsed_time": "0:32:40", "remaining_time": "3:46:57", "throughput": 8850.45, "total_tokens": 17353152} +{"current_steps": 25765, "total_steps": 204665, "loss": 0.1537, "lr": 1.9959218380647638e-06, "epoch": 0.6294432365084406, "percentage": 12.59, "elapsed_time": "0:32:41", "remaining_time": "3:46:56", "throughput": 8850.69, "total_tokens": 17356736} +{"current_steps": 25770, "total_steps": 204665, "loss": 0.1194, "lr": 1.995914140677617e-06, "epoch": 0.6295653873402878, "percentage": 12.59, "elapsed_time": "0:32:41", "remaining_time": "3:46:56", "throughput": 8850.87, "total_tokens": 17360192} +{"current_steps": 25775, "total_steps": 204665, "loss": 0.0897, "lr": 1.9959064360479144e-06, "epoch": 0.629687538172135, "percentage": 12.59, "elapsed_time": "0:32:41", "remaining_time": "3:46:55", "throughput": 8852.02, "total_tokens": 17366144} +{"current_steps": 25780, "total_steps": 204665, "loss": 0.1683, "lr": 1.9958987241757126e-06, "epoch": 0.6298096890039822, "percentage": 12.6, "elapsed_time": "0:32:42", "remaining_time": "3:46:55", "throughput": 8852.13, "total_tokens": 17369408} +{"current_steps": 25785, "total_steps": 204665, "loss": 0.0886, "lr": 1.9958910050610674e-06, "epoch": 0.6299318398358292, "percentage": 12.6, "elapsed_time": "0:32:42", "remaining_time": "3:46:54", "throughput": 8852.34, "total_tokens": 17372928} +{"current_steps": 25790, "total_steps": 204665, "loss": 0.0823, "lr": 1.995883278704035e-06, "epoch": 0.6300539906676764, "percentage": 12.6, "elapsed_time": "0:32:42", "remaining_time": "3:46:54", "throughput": 8852.55, "total_tokens": 17376448} +{"current_steps": 25795, "total_steps": 204665, "loss": 0.1744, "lr": 1.9958755451046716e-06, "epoch": 0.6301761414995236, "percentage": 12.6, "elapsed_time": "0:32:43", "remaining_time": "3:46:53", "throughput": 8852.68, "total_tokens": 17379776} +{"current_steps": 25800, "total_steps": 204665, "loss": 0.1318, "lr": 1.9958678042630333e-06, "epoch": 0.6302982923313708, "percentage": 12.61, "elapsed_time": "0:32:43", "remaining_time": "3:46:52", "throughput": 8852.76, "total_tokens": 17382976} +{"current_steps": 25805, "total_steps": 204665, "loss": 0.0919, "lr": 1.9958600561791765e-06, "epoch": 0.630420443163218, "percentage": 12.61, "elapsed_time": "0:32:43", "remaining_time": "3:46:52", "throughput": 8852.91, "total_tokens": 17386368} +{"current_steps": 25810, "total_steps": 204665, "loss": 0.1327, "lr": 1.9958523008531574e-06, "epoch": 0.6305425939950651, "percentage": 12.61, "elapsed_time": "0:32:44", "remaining_time": "3:46:51", "throughput": 8852.91, "total_tokens": 17389376} +{"current_steps": 25815, "total_steps": 204665, "loss": 0.1494, "lr": 1.9958445382850325e-06, "epoch": 0.6306647448269123, "percentage": 12.61, "elapsed_time": "0:32:44", "remaining_time": "3:46:51", "throughput": 8853.06, "total_tokens": 17392768} +{"current_steps": 25820, "total_steps": 204665, "loss": 0.1626, "lr": 1.9958367684748585e-06, "epoch": 0.6307868956587595, "percentage": 12.62, "elapsed_time": "0:32:44", "remaining_time": "3:46:50", "throughput": 8853.01, "total_tokens": 17395648} +{"current_steps": 25825, "total_steps": 204665, "loss": 0.097, "lr": 1.9958289914226917e-06, "epoch": 0.6309090464906066, "percentage": 12.62, "elapsed_time": "0:32:45", "remaining_time": "3:46:49", "throughput": 8853.06, "total_tokens": 17398784} +{"current_steps": 25830, "total_steps": 204665, "loss": 0.1565, "lr": 1.9958212071285885e-06, "epoch": 0.6310311973224537, "percentage": 12.62, "elapsed_time": "0:32:45", "remaining_time": "3:46:49", "throughput": 8853.19, "total_tokens": 17402112} +{"current_steps": 25835, "total_steps": 204665, "loss": 0.1212, "lr": 1.9958134155926055e-06, "epoch": 0.6311533481543009, "percentage": 12.62, "elapsed_time": "0:32:45", "remaining_time": "3:46:48", "throughput": 8853.38, "total_tokens": 17405568} +{"current_steps": 25840, "total_steps": 204665, "loss": 0.0662, "lr": 1.9958056168147996e-06, "epoch": 0.6312754989861481, "percentage": 12.63, "elapsed_time": "0:32:46", "remaining_time": "3:46:47", "throughput": 8853.41, "total_tokens": 17408640} +{"current_steps": 25845, "total_steps": 204665, "loss": 0.1154, "lr": 1.9957978107952275e-06, "epoch": 0.6313976498179953, "percentage": 12.63, "elapsed_time": "0:32:46", "remaining_time": "3:46:47", "throughput": 8853.6, "total_tokens": 17412096} +{"current_steps": 25850, "total_steps": 204665, "loss": 0.124, "lr": 1.995789997533946e-06, "epoch": 0.6315198006498425, "percentage": 12.63, "elapsed_time": "0:32:47", "remaining_time": "3:46:46", "throughput": 8853.63, "total_tokens": 17415168} +{"current_steps": 25855, "total_steps": 204665, "loss": 0.1397, "lr": 1.995782177031011e-06, "epoch": 0.6316419514816896, "percentage": 12.63, "elapsed_time": "0:32:47", "remaining_time": "3:46:45", "throughput": 8853.71, "total_tokens": 17418368} +{"current_steps": 25860, "total_steps": 204665, "loss": 0.1161, "lr": 1.995774349286481e-06, "epoch": 0.6317641023135367, "percentage": 12.64, "elapsed_time": "0:32:47", "remaining_time": "3:46:45", "throughput": 8853.91, "total_tokens": 17421824} +{"current_steps": 25865, "total_steps": 204665, "loss": 0.0884, "lr": 1.995766514300412e-06, "epoch": 0.6318862531453839, "percentage": 12.64, "elapsed_time": "0:32:48", "remaining_time": "3:46:44", "throughput": 8853.98, "total_tokens": 17425024} +{"current_steps": 25870, "total_steps": 204665, "loss": 0.1705, "lr": 1.995758672072861e-06, "epoch": 0.6320084039772311, "percentage": 12.64, "elapsed_time": "0:32:48", "remaining_time": "3:46:44", "throughput": 8854.0, "total_tokens": 17428096} +{"current_steps": 25875, "total_steps": 204665, "loss": 0.0775, "lr": 1.995750822603885e-06, "epoch": 0.6321305548090782, "percentage": 12.64, "elapsed_time": "0:32:48", "remaining_time": "3:46:43", "throughput": 8854.1, "total_tokens": 17431360} +{"current_steps": 25880, "total_steps": 204665, "loss": 0.1135, "lr": 1.9957429658935415e-06, "epoch": 0.6322527056409254, "percentage": 12.65, "elapsed_time": "0:32:49", "remaining_time": "3:46:42", "throughput": 8854.27, "total_tokens": 17434816} +{"current_steps": 25885, "total_steps": 204665, "loss": 0.1094, "lr": 1.995735101941887e-06, "epoch": 0.6323748564727726, "percentage": 12.65, "elapsed_time": "0:32:49", "remaining_time": "3:46:42", "throughput": 8854.47, "total_tokens": 17438336} +{"current_steps": 25890, "total_steps": 204665, "loss": 0.0563, "lr": 1.995727230748979e-06, "epoch": 0.6324970073046198, "percentage": 12.65, "elapsed_time": "0:32:49", "remaining_time": "3:46:41", "throughput": 8854.54, "total_tokens": 17441536} +{"current_steps": 25895, "total_steps": 204665, "loss": 0.1082, "lr": 1.995719352314875e-06, "epoch": 0.632619158136467, "percentage": 12.65, "elapsed_time": "0:32:50", "remaining_time": "3:46:41", "throughput": 8854.72, "total_tokens": 17444992} +{"current_steps": 25900, "total_steps": 204665, "loss": 0.1079, "lr": 1.995711466639632e-06, "epoch": 0.632741308968314, "percentage": 12.65, "elapsed_time": "0:32:50", "remaining_time": "3:46:40", "throughput": 8854.82, "total_tokens": 17448192} +{"current_steps": 25905, "total_steps": 204665, "loss": 0.0583, "lr": 1.9957035737233072e-06, "epoch": 0.6328634598001612, "percentage": 12.66, "elapsed_time": "0:32:50", "remaining_time": "3:46:39", "throughput": 8854.93, "total_tokens": 17451456} +{"current_steps": 25910, "total_steps": 204665, "loss": 0.0693, "lr": 1.9956956735659583e-06, "epoch": 0.6329856106320084, "percentage": 12.66, "elapsed_time": "0:32:51", "remaining_time": "3:46:39", "throughput": 8855.2, "total_tokens": 17455168} +{"current_steps": 25915, "total_steps": 204665, "loss": 0.0966, "lr": 1.9956877661676427e-06, "epoch": 0.6331077614638556, "percentage": 12.66, "elapsed_time": "0:32:51", "remaining_time": "3:46:38", "throughput": 8855.4, "total_tokens": 17458688} +{"current_steps": 25920, "total_steps": 204665, "loss": 0.171, "lr": 1.9956798515284178e-06, "epoch": 0.6332299122957027, "percentage": 12.66, "elapsed_time": "0:32:51", "remaining_time": "3:46:38", "throughput": 8855.56, "total_tokens": 17462080} +{"current_steps": 25925, "total_steps": 204665, "loss": 0.0922, "lr": 1.9956719296483414e-06, "epoch": 0.6333520631275499, "percentage": 12.67, "elapsed_time": "0:32:52", "remaining_time": "3:46:37", "throughput": 8855.73, "total_tokens": 17465472} +{"current_steps": 25930, "total_steps": 204665, "loss": 0.1561, "lr": 1.9956640005274708e-06, "epoch": 0.6334742139593971, "percentage": 12.67, "elapsed_time": "0:32:52", "remaining_time": "3:46:36", "throughput": 8856.02, "total_tokens": 17469184} +{"current_steps": 25935, "total_steps": 204665, "loss": 0.154, "lr": 1.9956560641658635e-06, "epoch": 0.6335963647912443, "percentage": 12.67, "elapsed_time": "0:32:52", "remaining_time": "3:46:36", "throughput": 8856.48, "total_tokens": 17473344} +{"current_steps": 25940, "total_steps": 204665, "loss": 0.0313, "lr": 1.995648120563578e-06, "epoch": 0.6337185156230913, "percentage": 12.67, "elapsed_time": "0:32:53", "remaining_time": "3:46:35", "throughput": 8856.75, "total_tokens": 17476992} +{"current_steps": 25945, "total_steps": 204665, "loss": 0.1835, "lr": 1.9956401697206712e-06, "epoch": 0.6338406664549385, "percentage": 12.68, "elapsed_time": "0:32:53", "remaining_time": "3:46:35", "throughput": 8856.83, "total_tokens": 17480192} +{"current_steps": 25950, "total_steps": 204665, "loss": 0.1739, "lr": 1.9956322116372013e-06, "epoch": 0.6339628172867857, "percentage": 12.68, "elapsed_time": "0:32:53", "remaining_time": "3:46:34", "throughput": 8857.03, "total_tokens": 17483712} +{"current_steps": 25955, "total_steps": 204665, "loss": 0.1891, "lr": 1.9956242463132265e-06, "epoch": 0.6340849681186329, "percentage": 12.68, "elapsed_time": "0:32:54", "remaining_time": "3:46:33", "throughput": 8856.94, "total_tokens": 17486464} +{"current_steps": 25960, "total_steps": 204665, "loss": 0.1866, "lr": 1.9956162737488043e-06, "epoch": 0.6342071189504801, "percentage": 12.68, "elapsed_time": "0:32:54", "remaining_time": "3:46:33", "throughput": 8857.27, "total_tokens": 17490240} +{"current_steps": 25965, "total_steps": 204665, "loss": 0.0477, "lr": 1.9956082939439923e-06, "epoch": 0.6343292697823272, "percentage": 12.69, "elapsed_time": "0:32:55", "remaining_time": "3:46:32", "throughput": 8857.38, "total_tokens": 17493504} +{"current_steps": 25970, "total_steps": 204665, "loss": 0.1017, "lr": 1.99560030689885e-06, "epoch": 0.6344514206141744, "percentage": 12.69, "elapsed_time": "0:32:55", "remaining_time": "3:46:32", "throughput": 8857.45, "total_tokens": 17496704} +{"current_steps": 25975, "total_steps": 204665, "loss": 0.0958, "lr": 1.9955923126134336e-06, "epoch": 0.6345735714460216, "percentage": 12.69, "elapsed_time": "0:32:55", "remaining_time": "3:46:31", "throughput": 8857.45, "total_tokens": 17499712} +{"current_steps": 25980, "total_steps": 204665, "loss": 0.0124, "lr": 1.995584311087802e-06, "epoch": 0.6346957222778687, "percentage": 12.69, "elapsed_time": "0:32:56", "remaining_time": "3:46:30", "throughput": 8857.7, "total_tokens": 17503360} +{"current_steps": 25985, "total_steps": 204665, "loss": 0.0913, "lr": 1.995576302322014e-06, "epoch": 0.6348178731097158, "percentage": 12.7, "elapsed_time": "0:32:56", "remaining_time": "3:46:30", "throughput": 8857.94, "total_tokens": 17506944} +{"current_steps": 25990, "total_steps": 204665, "loss": 0.0773, "lr": 1.995568286316127e-06, "epoch": 0.634940023941563, "percentage": 12.7, "elapsed_time": "0:32:56", "remaining_time": "3:46:29", "throughput": 8858.02, "total_tokens": 17510144} +{"current_steps": 25995, "total_steps": 204665, "loss": 0.1737, "lr": 1.9955602630702004e-06, "epoch": 0.6350621747734102, "percentage": 12.7, "elapsed_time": "0:32:57", "remaining_time": "3:46:29", "throughput": 8858.05, "total_tokens": 17513216} +{"current_steps": 26000, "total_steps": 204665, "loss": 0.1516, "lr": 1.995552232584291e-06, "epoch": 0.6351843256052574, "percentage": 12.7, "elapsed_time": "0:32:57", "remaining_time": "3:46:28", "throughput": 8858.2, "total_tokens": 17516608} +{"current_steps": 26005, "total_steps": 204665, "loss": 0.1055, "lr": 1.9955441948584584e-06, "epoch": 0.6353064764371046, "percentage": 12.71, "elapsed_time": "0:32:57", "remaining_time": "3:46:27", "throughput": 8858.31, "total_tokens": 17519872} +{"current_steps": 26010, "total_steps": 204665, "loss": 0.1741, "lr": 1.9955361498927604e-06, "epoch": 0.6354286272689517, "percentage": 12.71, "elapsed_time": "0:32:58", "remaining_time": "3:46:27", "throughput": 8858.79, "total_tokens": 17524032} +{"current_steps": 26015, "total_steps": 204665, "loss": 0.081, "lr": 1.995528097687256e-06, "epoch": 0.6355507781007989, "percentage": 12.71, "elapsed_time": "0:32:58", "remaining_time": "3:46:26", "throughput": 8858.84, "total_tokens": 17527168} +{"current_steps": 26020, "total_steps": 204665, "loss": 0.1234, "lr": 1.995520038242003e-06, "epoch": 0.635672928932646, "percentage": 12.71, "elapsed_time": "0:32:58", "remaining_time": "3:46:26", "throughput": 8858.85, "total_tokens": 17530240} +{"current_steps": 26025, "total_steps": 204665, "loss": 0.1828, "lr": 1.995511971557061e-06, "epoch": 0.6357950797644932, "percentage": 12.72, "elapsed_time": "0:32:59", "remaining_time": "3:46:25", "throughput": 8858.94, "total_tokens": 17533440} +{"current_steps": 26030, "total_steps": 204665, "loss": 0.1446, "lr": 1.9955038976324882e-06, "epoch": 0.6359172305963403, "percentage": 12.72, "elapsed_time": "0:32:59", "remaining_time": "3:46:24", "throughput": 8859.07, "total_tokens": 17536768} +{"current_steps": 26035, "total_steps": 204665, "loss": 0.022, "lr": 1.995495816468343e-06, "epoch": 0.6360393814281875, "percentage": 12.72, "elapsed_time": "0:32:59", "remaining_time": "3:46:24", "throughput": 8859.3, "total_tokens": 17540352} +{"current_steps": 26040, "total_steps": 204665, "loss": 0.0648, "lr": 1.9954877280646847e-06, "epoch": 0.6361615322600347, "percentage": 12.72, "elapsed_time": "0:33:00", "remaining_time": "3:46:23", "throughput": 8859.4, "total_tokens": 17543616} +{"current_steps": 26045, "total_steps": 204665, "loss": 0.0706, "lr": 1.995479632421572e-06, "epoch": 0.6362836830918819, "percentage": 12.73, "elapsed_time": "0:33:00", "remaining_time": "3:46:23", "throughput": 8859.77, "total_tokens": 17547520} +{"current_steps": 26050, "total_steps": 204665, "loss": 0.1602, "lr": 1.9954715295390634e-06, "epoch": 0.6364058339237291, "percentage": 12.73, "elapsed_time": "0:33:00", "remaining_time": "3:46:22", "throughput": 8859.81, "total_tokens": 17550592} +{"current_steps": 26055, "total_steps": 204665, "loss": 0.0806, "lr": 1.995463419417218e-06, "epoch": 0.6365279847555761, "percentage": 12.73, "elapsed_time": "0:33:01", "remaining_time": "3:46:21", "throughput": 8859.77, "total_tokens": 17553472} +{"current_steps": 26060, "total_steps": 204665, "loss": 0.1826, "lr": 1.9954553020560952e-06, "epoch": 0.6366501355874233, "percentage": 12.73, "elapsed_time": "0:33:01", "remaining_time": "3:46:21", "throughput": 8859.95, "total_tokens": 17556928} +{"current_steps": 26065, "total_steps": 204665, "loss": 0.0413, "lr": 1.9954471774557536e-06, "epoch": 0.6367722864192705, "percentage": 12.74, "elapsed_time": "0:33:01", "remaining_time": "3:46:20", "throughput": 8860.13, "total_tokens": 17560384} +{"current_steps": 26070, "total_steps": 204665, "loss": 0.1716, "lr": 1.995439045616252e-06, "epoch": 0.6368944372511177, "percentage": 12.74, "elapsed_time": "0:33:02", "remaining_time": "3:46:19", "throughput": 8860.21, "total_tokens": 17563584} +{"current_steps": 26075, "total_steps": 204665, "loss": 0.1879, "lr": 1.9954309065376504e-06, "epoch": 0.6370165880829648, "percentage": 12.74, "elapsed_time": "0:33:02", "remaining_time": "3:46:19", "throughput": 8860.29, "total_tokens": 17566784} +{"current_steps": 26080, "total_steps": 204665, "loss": 0.1128, "lr": 1.9954227602200075e-06, "epoch": 0.637138738914812, "percentage": 12.74, "elapsed_time": "0:33:02", "remaining_time": "3:46:18", "throughput": 8860.38, "total_tokens": 17569984} +{"current_steps": 26085, "total_steps": 204665, "loss": 0.099, "lr": 1.995414606663382e-06, "epoch": 0.6372608897466592, "percentage": 12.75, "elapsed_time": "0:33:03", "remaining_time": "3:46:18", "throughput": 8860.59, "total_tokens": 17573504} +{"current_steps": 26090, "total_steps": 204665, "loss": 0.1222, "lr": 1.995406445867834e-06, "epoch": 0.6373830405785064, "percentage": 12.75, "elapsed_time": "0:33:03", "remaining_time": "3:46:17", "throughput": 8860.97, "total_tokens": 17577472} +{"current_steps": 26095, "total_steps": 204665, "loss": 0.047, "lr": 1.9953982778334232e-06, "epoch": 0.6375051914103536, "percentage": 12.75, "elapsed_time": "0:33:04", "remaining_time": "3:46:17", "throughput": 8861.38, "total_tokens": 17581504} +{"current_steps": 26100, "total_steps": 204665, "loss": 0.1131, "lr": 1.995390102560208e-06, "epoch": 0.6376273422422006, "percentage": 12.75, "elapsed_time": "0:33:04", "remaining_time": "3:46:16", "throughput": 8861.38, "total_tokens": 17584512} +{"current_steps": 26105, "total_steps": 204665, "loss": 0.3489, "lr": 1.995381920048248e-06, "epoch": 0.6377494930740478, "percentage": 12.75, "elapsed_time": "0:33:04", "remaining_time": "3:46:15", "throughput": 8861.44, "total_tokens": 17587648} +{"current_steps": 26110, "total_steps": 204665, "loss": 0.12, "lr": 1.995373730297603e-06, "epoch": 0.637871643905895, "percentage": 12.76, "elapsed_time": "0:33:05", "remaining_time": "3:46:15", "throughput": 8861.69, "total_tokens": 17591296} +{"current_steps": 26115, "total_steps": 204665, "loss": 0.2038, "lr": 1.9953655333083325e-06, "epoch": 0.6379937947377422, "percentage": 12.76, "elapsed_time": "0:33:05", "remaining_time": "3:46:14", "throughput": 8861.72, "total_tokens": 17594368} +{"current_steps": 26120, "total_steps": 204665, "loss": 0.0196, "lr": 1.995357329080496e-06, "epoch": 0.6381159455695893, "percentage": 12.76, "elapsed_time": "0:33:05", "remaining_time": "3:46:13", "throughput": 8861.74, "total_tokens": 17597440} +{"current_steps": 26125, "total_steps": 204665, "loss": 0.1449, "lr": 1.995349117614154e-06, "epoch": 0.6382380964014365, "percentage": 12.76, "elapsed_time": "0:33:06", "remaining_time": "3:46:13", "throughput": 8861.77, "total_tokens": 17600512} +{"current_steps": 26130, "total_steps": 204665, "loss": 0.0482, "lr": 1.995340898909365e-06, "epoch": 0.6383602472332837, "percentage": 12.77, "elapsed_time": "0:33:06", "remaining_time": "3:46:12", "throughput": 8861.79, "total_tokens": 17603584} +{"current_steps": 26135, "total_steps": 204665, "loss": 0.132, "lr": 1.9953326729661894e-06, "epoch": 0.6384823980651309, "percentage": 12.77, "elapsed_time": "0:33:06", "remaining_time": "3:46:11", "throughput": 8861.84, "total_tokens": 17606720} +{"current_steps": 26140, "total_steps": 204665, "loss": 0.0638, "lr": 1.9953244397846867e-06, "epoch": 0.638604548896978, "percentage": 12.77, "elapsed_time": "0:33:07", "remaining_time": "3:46:11", "throughput": 8862.03, "total_tokens": 17610176} +{"current_steps": 26145, "total_steps": 204665, "loss": 0.1026, "lr": 1.995316199364917e-06, "epoch": 0.6387266997288251, "percentage": 12.77, "elapsed_time": "0:33:07", "remaining_time": "3:46:10", "throughput": 8862.11, "total_tokens": 17613440} +{"current_steps": 26150, "total_steps": 204665, "loss": 0.1488, "lr": 1.9953079517069404e-06, "epoch": 0.6388488505606723, "percentage": 12.78, "elapsed_time": "0:33:07", "remaining_time": "3:46:10", "throughput": 8862.12, "total_tokens": 17616448} +{"current_steps": 26155, "total_steps": 204665, "loss": 0.1919, "lr": 1.9952996968108163e-06, "epoch": 0.6389710013925195, "percentage": 12.78, "elapsed_time": "0:33:08", "remaining_time": "3:46:09", "throughput": 8862.32, "total_tokens": 17619968} +{"current_steps": 26160, "total_steps": 204665, "loss": 0.0421, "lr": 1.9952914346766055e-06, "epoch": 0.6390931522243667, "percentage": 12.78, "elapsed_time": "0:33:08", "remaining_time": "3:46:08", "throughput": 8862.35, "total_tokens": 17623040} +{"current_steps": 26165, "total_steps": 204665, "loss": 0.1205, "lr": 1.9952831653043673e-06, "epoch": 0.6392153030562138, "percentage": 12.78, "elapsed_time": "0:33:08", "remaining_time": "3:46:08", "throughput": 8862.55, "total_tokens": 17626560} +{"current_steps": 26170, "total_steps": 204665, "loss": 0.1684, "lr": 1.9952748886941623e-06, "epoch": 0.639337453888061, "percentage": 12.79, "elapsed_time": "0:33:09", "remaining_time": "3:46:07", "throughput": 8862.7, "total_tokens": 17629888} +{"current_steps": 26175, "total_steps": 204665, "loss": 0.12, "lr": 1.995266604846051e-06, "epoch": 0.6394596047199081, "percentage": 12.79, "elapsed_time": "0:33:09", "remaining_time": "3:46:07", "throughput": 8862.98, "total_tokens": 17633600} +{"current_steps": 26180, "total_steps": 204665, "loss": 0.0854, "lr": 1.9952583137600927e-06, "epoch": 0.6395817555517553, "percentage": 12.79, "elapsed_time": "0:33:09", "remaining_time": "3:46:06", "throughput": 8863.22, "total_tokens": 17637184} +{"current_steps": 26185, "total_steps": 204665, "loss": 0.1761, "lr": 1.995250015436349e-06, "epoch": 0.6397039063836025, "percentage": 12.79, "elapsed_time": "0:33:10", "remaining_time": "3:46:05", "throughput": 8863.29, "total_tokens": 17640384} +{"current_steps": 26190, "total_steps": 204665, "loss": 0.1808, "lr": 1.9952417098748787e-06, "epoch": 0.6398260572154496, "percentage": 12.8, "elapsed_time": "0:33:10", "remaining_time": "3:46:05", "throughput": 8863.32, "total_tokens": 17643456} +{"current_steps": 26195, "total_steps": 204665, "loss": 0.1093, "lr": 1.9952333970757437e-06, "epoch": 0.6399482080472968, "percentage": 12.8, "elapsed_time": "0:33:10", "remaining_time": "3:46:04", "throughput": 8863.5, "total_tokens": 17646912} +{"current_steps": 26200, "total_steps": 204665, "loss": 0.0242, "lr": 1.995225077039003e-06, "epoch": 0.640070358879144, "percentage": 12.8, "elapsed_time": "0:33:11", "remaining_time": "3:46:04", "throughput": 8863.64, "total_tokens": 17650304} +{"current_steps": 26205, "total_steps": 204665, "loss": 0.1729, "lr": 1.9952167497647183e-06, "epoch": 0.6401925097109912, "percentage": 12.8, "elapsed_time": "0:33:11", "remaining_time": "3:46:03", "throughput": 8863.73, "total_tokens": 17653504} +{"current_steps": 26210, "total_steps": 204665, "loss": 0.0961, "lr": 1.9952084152529496e-06, "epoch": 0.6403146605428383, "percentage": 12.81, "elapsed_time": "0:33:12", "remaining_time": "3:46:02", "throughput": 8863.83, "total_tokens": 17656768} +{"current_steps": 26215, "total_steps": 204665, "loss": 0.0957, "lr": 1.9952000735037577e-06, "epoch": 0.6404368113746854, "percentage": 12.81, "elapsed_time": "0:33:12", "remaining_time": "3:46:02", "throughput": 8863.93, "total_tokens": 17660032} +{"current_steps": 26220, "total_steps": 204665, "loss": 0.1386, "lr": 1.995191724517203e-06, "epoch": 0.6405589622065326, "percentage": 12.81, "elapsed_time": "0:33:12", "remaining_time": "3:46:01", "throughput": 8864.08, "total_tokens": 17663424} +{"current_steps": 26225, "total_steps": 204665, "loss": 0.0311, "lr": 1.9951833682933468e-06, "epoch": 0.6406811130383798, "percentage": 12.81, "elapsed_time": "0:33:13", "remaining_time": "3:46:01", "throughput": 8864.1, "total_tokens": 17666496} +{"current_steps": 26230, "total_steps": 204665, "loss": 0.0999, "lr": 1.995175004832249e-06, "epoch": 0.6408032638702269, "percentage": 12.82, "elapsed_time": "0:33:13", "remaining_time": "3:46:00", "throughput": 8864.16, "total_tokens": 17669632} +{"current_steps": 26235, "total_steps": 204665, "loss": 0.1355, "lr": 1.9951666341339717e-06, "epoch": 0.6409254147020741, "percentage": 12.82, "elapsed_time": "0:33:13", "remaining_time": "3:45:59", "throughput": 8864.47, "total_tokens": 17673408} +{"current_steps": 26240, "total_steps": 204665, "loss": 0.1531, "lr": 1.9951582561985743e-06, "epoch": 0.6410475655339213, "percentage": 12.82, "elapsed_time": "0:33:14", "remaining_time": "3:45:59", "throughput": 8864.48, "total_tokens": 17676416} +{"current_steps": 26245, "total_steps": 204665, "loss": 0.1753, "lr": 1.995149871026118e-06, "epoch": 0.6411697163657685, "percentage": 12.82, "elapsed_time": "0:33:14", "remaining_time": "3:45:58", "throughput": 8864.61, "total_tokens": 17679744} +{"current_steps": 26250, "total_steps": 204665, "loss": 0.1188, "lr": 1.995141478616665e-06, "epoch": 0.6412918671976157, "percentage": 12.83, "elapsed_time": "0:33:14", "remaining_time": "3:45:57", "throughput": 8864.68, "total_tokens": 17682944} +{"current_steps": 26255, "total_steps": 204665, "loss": 0.1189, "lr": 1.995133078970275e-06, "epoch": 0.6414140180294627, "percentage": 12.83, "elapsed_time": "0:33:15", "remaining_time": "3:45:57", "throughput": 8864.84, "total_tokens": 17686336} +{"current_steps": 26260, "total_steps": 204665, "loss": 0.099, "lr": 1.99512467208701e-06, "epoch": 0.6415361688613099, "percentage": 12.83, "elapsed_time": "0:33:15", "remaining_time": "3:45:56", "throughput": 8865.02, "total_tokens": 17689792} +{"current_steps": 26265, "total_steps": 204665, "loss": 0.0971, "lr": 1.9951162579669306e-06, "epoch": 0.6416583196931571, "percentage": 12.83, "elapsed_time": "0:33:15", "remaining_time": "3:45:56", "throughput": 8865.45, "total_tokens": 17693888} +{"current_steps": 26270, "total_steps": 204665, "loss": 0.1654, "lr": 1.995107836610098e-06, "epoch": 0.6417804705250043, "percentage": 12.84, "elapsed_time": "0:33:16", "remaining_time": "3:45:55", "throughput": 8865.55, "total_tokens": 17697152} +{"current_steps": 26275, "total_steps": 204665, "loss": 0.0956, "lr": 1.9950994080165736e-06, "epoch": 0.6419026213568514, "percentage": 12.84, "elapsed_time": "0:33:16", "remaining_time": "3:45:54", "throughput": 8865.58, "total_tokens": 17700224} +{"current_steps": 26280, "total_steps": 204665, "loss": 0.116, "lr": 1.9950909721864184e-06, "epoch": 0.6420247721886986, "percentage": 12.84, "elapsed_time": "0:33:16", "remaining_time": "3:45:54", "throughput": 8865.67, "total_tokens": 17703488} +{"current_steps": 26285, "total_steps": 204665, "loss": 0.0592, "lr": 1.9950825291196944e-06, "epoch": 0.6421469230205458, "percentage": 12.84, "elapsed_time": "0:33:17", "remaining_time": "3:45:53", "throughput": 8865.74, "total_tokens": 17706688} +{"current_steps": 26290, "total_steps": 204665, "loss": 0.0388, "lr": 1.995074078816462e-06, "epoch": 0.642269073852393, "percentage": 12.85, "elapsed_time": "0:33:17", "remaining_time": "3:45:53", "throughput": 8865.82, "total_tokens": 17709888} +{"current_steps": 26295, "total_steps": 204665, "loss": 0.0883, "lr": 1.9950656212767844e-06, "epoch": 0.6423912246842401, "percentage": 12.85, "elapsed_time": "0:33:17", "remaining_time": "3:45:52", "throughput": 8866.09, "total_tokens": 17713536} +{"current_steps": 26300, "total_steps": 204665, "loss": 0.1991, "lr": 1.995057156500721e-06, "epoch": 0.6425133755160872, "percentage": 12.85, "elapsed_time": "0:33:18", "remaining_time": "3:45:51", "throughput": 8866.24, "total_tokens": 17716928} +{"current_steps": 26305, "total_steps": 204665, "loss": 0.134, "lr": 1.9950486844883348e-06, "epoch": 0.6426355263479344, "percentage": 12.85, "elapsed_time": "0:33:18", "remaining_time": "3:45:51", "throughput": 8866.3, "total_tokens": 17720128} +{"current_steps": 26310, "total_steps": 204665, "loss": 0.1259, "lr": 1.9950402052396866e-06, "epoch": 0.6427576771797816, "percentage": 12.86, "elapsed_time": "0:33:18", "remaining_time": "3:45:50", "throughput": 8866.46, "total_tokens": 17723520} +{"current_steps": 26315, "total_steps": 204665, "loss": 0.202, "lr": 1.9950317187548385e-06, "epoch": 0.6428798280116288, "percentage": 12.86, "elapsed_time": "0:33:19", "remaining_time": "3:45:50", "throughput": 8866.74, "total_tokens": 17727232} +{"current_steps": 26320, "total_steps": 204665, "loss": 0.1676, "lr": 1.995023225033852e-06, "epoch": 0.6430019788434759, "percentage": 12.86, "elapsed_time": "0:33:19", "remaining_time": "3:45:49", "throughput": 8866.83, "total_tokens": 17730496} +{"current_steps": 26325, "total_steps": 204665, "loss": 0.3496, "lr": 1.9950147240767895e-06, "epoch": 0.6431241296753231, "percentage": 12.86, "elapsed_time": "0:33:20", "remaining_time": "3:45:49", "throughput": 8867.16, "total_tokens": 17734336} +{"current_steps": 26330, "total_steps": 204665, "loss": 0.0899, "lr": 1.9950062158837118e-06, "epoch": 0.6432462805071703, "percentage": 12.86, "elapsed_time": "0:33:20", "remaining_time": "3:45:48", "throughput": 8867.34, "total_tokens": 17737792} +{"current_steps": 26335, "total_steps": 204665, "loss": 0.1277, "lr": 1.9949977004546814e-06, "epoch": 0.6433684313390174, "percentage": 12.87, "elapsed_time": "0:33:20", "remaining_time": "3:45:47", "throughput": 8867.53, "total_tokens": 17741312} +{"current_steps": 26340, "total_steps": 204665, "loss": 0.1018, "lr": 1.99498917778976e-06, "epoch": 0.6434905821708646, "percentage": 12.87, "elapsed_time": "0:33:21", "remaining_time": "3:45:47", "throughput": 8867.64, "total_tokens": 17744640} +{"current_steps": 26345, "total_steps": 204665, "loss": 0.143, "lr": 1.9949806478890095e-06, "epoch": 0.6436127330027117, "percentage": 12.87, "elapsed_time": "0:33:21", "remaining_time": "3:45:46", "throughput": 8867.81, "total_tokens": 17748096} +{"current_steps": 26350, "total_steps": 204665, "loss": 0.0545, "lr": 1.9949721107524924e-06, "epoch": 0.6437348838345589, "percentage": 12.87, "elapsed_time": "0:33:21", "remaining_time": "3:45:46", "throughput": 8867.92, "total_tokens": 17751424} +{"current_steps": 26355, "total_steps": 204665, "loss": 0.1333, "lr": 1.9949635663802705e-06, "epoch": 0.6438570346664061, "percentage": 12.88, "elapsed_time": "0:33:22", "remaining_time": "3:45:45", "throughput": 8867.97, "total_tokens": 17754560} +{"current_steps": 26360, "total_steps": 204665, "loss": 0.0863, "lr": 1.994955014772406e-06, "epoch": 0.6439791854982533, "percentage": 12.88, "elapsed_time": "0:33:22", "remaining_time": "3:45:45", "throughput": 8868.1, "total_tokens": 17757952} +{"current_steps": 26365, "total_steps": 204665, "loss": 0.2483, "lr": 1.9949464559289607e-06, "epoch": 0.6441013363301004, "percentage": 12.88, "elapsed_time": "0:33:22", "remaining_time": "3:45:44", "throughput": 8868.3, "total_tokens": 17761472} +{"current_steps": 26370, "total_steps": 204665, "loss": 0.0981, "lr": 1.9949378898499974e-06, "epoch": 0.6442234871619476, "percentage": 12.88, "elapsed_time": "0:33:23", "remaining_time": "3:45:43", "throughput": 8868.36, "total_tokens": 17764608} +{"current_steps": 26375, "total_steps": 204665, "loss": 0.1006, "lr": 1.9949293165355783e-06, "epoch": 0.6443456379937947, "percentage": 12.89, "elapsed_time": "0:33:23", "remaining_time": "3:45:43", "throughput": 8868.46, "total_tokens": 17767872} +{"current_steps": 26380, "total_steps": 204665, "loss": 0.0816, "lr": 1.9949207359857656e-06, "epoch": 0.6444677888256419, "percentage": 12.89, "elapsed_time": "0:33:23", "remaining_time": "3:45:42", "throughput": 8868.56, "total_tokens": 17771136} +{"current_steps": 26385, "total_steps": 204665, "loss": 0.1563, "lr": 1.9949121482006216e-06, "epoch": 0.6445899396574891, "percentage": 12.89, "elapsed_time": "0:33:24", "remaining_time": "3:45:41", "throughput": 8868.7, "total_tokens": 17774464} +{"current_steps": 26390, "total_steps": 204665, "loss": 0.0626, "lr": 1.9949035531802086e-06, "epoch": 0.6447120904893362, "percentage": 12.89, "elapsed_time": "0:33:24", "remaining_time": "3:45:41", "throughput": 8868.77, "total_tokens": 17777664} +{"current_steps": 26395, "total_steps": 204665, "loss": 0.1521, "lr": 1.9948949509245897e-06, "epoch": 0.6448342413211834, "percentage": 12.9, "elapsed_time": "0:33:24", "remaining_time": "3:45:40", "throughput": 8868.78, "total_tokens": 17780672} +{"current_steps": 26400, "total_steps": 204665, "loss": 0.1073, "lr": 1.994886341433827e-06, "epoch": 0.6449563921530306, "percentage": 12.9, "elapsed_time": "0:33:25", "remaining_time": "3:45:40", "throughput": 8868.8, "total_tokens": 17783744} +{"current_steps": 26405, "total_steps": 204665, "loss": 0.225, "lr": 1.994877724707983e-06, "epoch": 0.6450785429848778, "percentage": 12.9, "elapsed_time": "0:33:25", "remaining_time": "3:45:39", "throughput": 8869.25, "total_tokens": 17787904} +{"current_steps": 26410, "total_steps": 204665, "loss": 0.044, "lr": 1.994869100747121e-06, "epoch": 0.6452006938167248, "percentage": 12.9, "elapsed_time": "0:33:25", "remaining_time": "3:45:39", "throughput": 8869.46, "total_tokens": 17791424} +{"current_steps": 26415, "total_steps": 204665, "loss": 0.0557, "lr": 1.994860469551303e-06, "epoch": 0.645322844648572, "percentage": 12.91, "elapsed_time": "0:33:26", "remaining_time": "3:45:38", "throughput": 8869.54, "total_tokens": 17794624} +{"current_steps": 26420, "total_steps": 204665, "loss": 0.1954, "lr": 1.9948518311205925e-06, "epoch": 0.6454449954804192, "percentage": 12.91, "elapsed_time": "0:33:26", "remaining_time": "3:45:37", "throughput": 8869.64, "total_tokens": 17797888} +{"current_steps": 26425, "total_steps": 204665, "loss": 0.084, "lr": 1.9948431854550517e-06, "epoch": 0.6455671463122664, "percentage": 12.91, "elapsed_time": "0:33:26", "remaining_time": "3:45:37", "throughput": 8869.76, "total_tokens": 17801216} +{"current_steps": 26430, "total_steps": 204665, "loss": 0.0945, "lr": 1.9948345325547433e-06, "epoch": 0.6456892971441136, "percentage": 12.91, "elapsed_time": "0:33:27", "remaining_time": "3:45:36", "throughput": 8869.81, "total_tokens": 17804352} +{"current_steps": 26435, "total_steps": 204665, "loss": 0.0385, "lr": 1.994825872419731e-06, "epoch": 0.6458114479759607, "percentage": 12.92, "elapsed_time": "0:33:27", "remaining_time": "3:45:35", "throughput": 8869.91, "total_tokens": 17807616} +{"current_steps": 26440, "total_steps": 204665, "loss": 0.1779, "lr": 1.994817205050077e-06, "epoch": 0.6459335988078079, "percentage": 12.92, "elapsed_time": "0:33:27", "remaining_time": "3:45:35", "throughput": 8869.95, "total_tokens": 17810752} +{"current_steps": 26445, "total_steps": 204665, "loss": 0.2423, "lr": 1.9948085304458453e-06, "epoch": 0.6460557496396551, "percentage": 12.92, "elapsed_time": "0:33:28", "remaining_time": "3:45:34", "throughput": 8869.98, "total_tokens": 17813824} +{"current_steps": 26450, "total_steps": 204665, "loss": 0.0702, "lr": 1.994799848607098e-06, "epoch": 0.6461779004715023, "percentage": 12.92, "elapsed_time": "0:33:28", "remaining_time": "3:45:34", "throughput": 8870.02, "total_tokens": 17816960} +{"current_steps": 26455, "total_steps": 204665, "loss": 0.1297, "lr": 1.9947911595338986e-06, "epoch": 0.6463000513033493, "percentage": 12.93, "elapsed_time": "0:33:29", "remaining_time": "3:45:33", "throughput": 8870.05, "total_tokens": 17820032} +{"current_steps": 26460, "total_steps": 204665, "loss": 0.0541, "lr": 1.9947824632263102e-06, "epoch": 0.6464222021351965, "percentage": 12.93, "elapsed_time": "0:33:29", "remaining_time": "3:45:32", "throughput": 8870.3, "total_tokens": 17823680} +{"current_steps": 26465, "total_steps": 204665, "loss": 0.1016, "lr": 1.9947737596843964e-06, "epoch": 0.6465443529670437, "percentage": 12.93, "elapsed_time": "0:33:29", "remaining_time": "3:45:32", "throughput": 8870.36, "total_tokens": 17826880} +{"current_steps": 26470, "total_steps": 204665, "loss": 0.1124, "lr": 1.9947650489082207e-06, "epoch": 0.6466665037988909, "percentage": 12.93, "elapsed_time": "0:33:30", "remaining_time": "3:45:31", "throughput": 8870.68, "total_tokens": 17830656} +{"current_steps": 26475, "total_steps": 204665, "loss": 0.1057, "lr": 1.9947563308978453e-06, "epoch": 0.646788654630738, "percentage": 12.94, "elapsed_time": "0:33:30", "remaining_time": "3:45:31", "throughput": 8870.83, "total_tokens": 17834048} +{"current_steps": 26480, "total_steps": 204665, "loss": 0.1586, "lr": 1.9947476056533347e-06, "epoch": 0.6469108054625852, "percentage": 12.94, "elapsed_time": "0:33:30", "remaining_time": "3:45:30", "throughput": 8871.0, "total_tokens": 17837504} +{"current_steps": 26485, "total_steps": 204665, "loss": 0.1599, "lr": 1.994738873174752e-06, "epoch": 0.6470329562944324, "percentage": 12.94, "elapsed_time": "0:33:31", "remaining_time": "3:45:29", "throughput": 8871.08, "total_tokens": 17840704} +{"current_steps": 26490, "total_steps": 204665, "loss": 0.1022, "lr": 1.9947301334621603e-06, "epoch": 0.6471551071262795, "percentage": 12.94, "elapsed_time": "0:33:31", "remaining_time": "3:45:29", "throughput": 8871.5, "total_tokens": 17844736} +{"current_steps": 26495, "total_steps": 204665, "loss": 0.1502, "lr": 1.9947213865156237e-06, "epoch": 0.6472772579581267, "percentage": 12.95, "elapsed_time": "0:33:31", "remaining_time": "3:45:29", "throughput": 8871.42, "total_tokens": 17848384} +{"current_steps": 26500, "total_steps": 204665, "loss": 0.0923, "lr": 1.994712632335206e-06, "epoch": 0.6473994087899738, "percentage": 12.95, "elapsed_time": "0:33:32", "remaining_time": "3:45:28", "throughput": 8871.49, "total_tokens": 17851584} +{"current_steps": 26505, "total_steps": 204665, "loss": 0.1213, "lr": 1.9947038709209696e-06, "epoch": 0.647521559621821, "percentage": 12.95, "elapsed_time": "0:33:32", "remaining_time": "3:45:28", "throughput": 8871.67, "total_tokens": 17855040} +{"current_steps": 26510, "total_steps": 204665, "loss": 0.1322, "lr": 1.99469510227298e-06, "epoch": 0.6476437104536682, "percentage": 12.95, "elapsed_time": "0:33:32", "remaining_time": "3:45:27", "throughput": 8872.0, "total_tokens": 17858880} +{"current_steps": 26515, "total_steps": 204665, "loss": 0.0871, "lr": 1.9946863263912995e-06, "epoch": 0.6477658612855154, "percentage": 12.96, "elapsed_time": "0:33:33", "remaining_time": "3:45:26", "throughput": 8872.01, "total_tokens": 17861952} +{"current_steps": 26520, "total_steps": 204665, "loss": 0.0217, "lr": 1.9946775432759927e-06, "epoch": 0.6478880121173625, "percentage": 12.96, "elapsed_time": "0:33:33", "remaining_time": "3:45:26", "throughput": 8872.03, "total_tokens": 17865024} +{"current_steps": 26525, "total_steps": 204665, "loss": 0.0965, "lr": 1.994668752927123e-06, "epoch": 0.6480101629492097, "percentage": 12.96, "elapsed_time": "0:33:33", "remaining_time": "3:45:25", "throughput": 8871.99, "total_tokens": 17867968} +{"current_steps": 26530, "total_steps": 204665, "loss": 0.0301, "lr": 1.9946599553447554e-06, "epoch": 0.6481323137810568, "percentage": 12.96, "elapsed_time": "0:33:34", "remaining_time": "3:45:25", "throughput": 8872.31, "total_tokens": 17871744} +{"current_steps": 26535, "total_steps": 204665, "loss": 0.0995, "lr": 1.9946511505289524e-06, "epoch": 0.648254464612904, "percentage": 12.97, "elapsed_time": "0:33:34", "remaining_time": "3:45:24", "throughput": 8873.16, "total_tokens": 17876992} +{"current_steps": 26540, "total_steps": 204665, "loss": 0.0387, "lr": 1.9946423384797785e-06, "epoch": 0.6483766154447512, "percentage": 12.97, "elapsed_time": "0:33:35", "remaining_time": "3:45:24", "throughput": 8873.19, "total_tokens": 17880064} +{"current_steps": 26545, "total_steps": 204665, "loss": 0.0889, "lr": 1.9946335191972986e-06, "epoch": 0.6484987662765983, "percentage": 12.97, "elapsed_time": "0:33:35", "remaining_time": "3:45:23", "throughput": 8873.48, "total_tokens": 17883840} +{"current_steps": 26550, "total_steps": 204665, "loss": 0.1818, "lr": 1.9946246926815758e-06, "epoch": 0.6486209171084455, "percentage": 12.97, "elapsed_time": "0:33:35", "remaining_time": "3:45:23", "throughput": 8873.71, "total_tokens": 17887424} +{"current_steps": 26555, "total_steps": 204665, "loss": 0.2036, "lr": 1.994615858932675e-06, "epoch": 0.6487430679402927, "percentage": 12.97, "elapsed_time": "0:33:36", "remaining_time": "3:45:22", "throughput": 8873.82, "total_tokens": 17890752} +{"current_steps": 26560, "total_steps": 204665, "loss": 0.1904, "lr": 1.99460701795066e-06, "epoch": 0.6488652187721399, "percentage": 12.98, "elapsed_time": "0:33:36", "remaining_time": "3:45:22", "throughput": 8874.13, "total_tokens": 17894528} +{"current_steps": 26565, "total_steps": 204665, "loss": 0.1425, "lr": 1.994598169735595e-06, "epoch": 0.648987369603987, "percentage": 12.98, "elapsed_time": "0:33:36", "remaining_time": "3:45:21", "throughput": 8874.14, "total_tokens": 17897600} +{"current_steps": 26570, "total_steps": 204665, "loss": 0.2702, "lr": 1.994589314287545e-06, "epoch": 0.6491095204358341, "percentage": 12.98, "elapsed_time": "0:33:37", "remaining_time": "3:45:20", "throughput": 8874.38, "total_tokens": 17901248} +{"current_steps": 26575, "total_steps": 204665, "loss": 0.1467, "lr": 1.9945804516065737e-06, "epoch": 0.6492316712676813, "percentage": 12.98, "elapsed_time": "0:33:37", "remaining_time": "3:45:20", "throughput": 8874.46, "total_tokens": 17904448} +{"current_steps": 26580, "total_steps": 204665, "loss": 0.1725, "lr": 1.9945715816927464e-06, "epoch": 0.6493538220995285, "percentage": 12.99, "elapsed_time": "0:33:37", "remaining_time": "3:45:19", "throughput": 8874.66, "total_tokens": 17907968} +{"current_steps": 26585, "total_steps": 204665, "loss": 0.027, "lr": 1.9945627045461263e-06, "epoch": 0.6494759729313757, "percentage": 12.99, "elapsed_time": "0:33:38", "remaining_time": "3:45:19", "throughput": 8874.76, "total_tokens": 17911232} +{"current_steps": 26590, "total_steps": 204665, "loss": 0.1729, "lr": 1.9945538201667792e-06, "epoch": 0.6495981237632228, "percentage": 12.99, "elapsed_time": "0:33:38", "remaining_time": "3:45:18", "throughput": 8874.85, "total_tokens": 17914496} +{"current_steps": 26595, "total_steps": 204665, "loss": 0.0971, "lr": 1.9945449285547694e-06, "epoch": 0.64972027459507, "percentage": 12.99, "elapsed_time": "0:33:38", "remaining_time": "3:45:17", "throughput": 8874.95, "total_tokens": 17917760} +{"current_steps": 26600, "total_steps": 204665, "loss": 0.1367, "lr": 1.9945360297101607e-06, "epoch": 0.6498424254269172, "percentage": 13.0, "elapsed_time": "0:33:39", "remaining_time": "3:45:17", "throughput": 8875.14, "total_tokens": 17921280} +{"current_steps": 26605, "total_steps": 204665, "loss": 0.1497, "lr": 1.994527123633019e-06, "epoch": 0.6499645762587644, "percentage": 13.0, "elapsed_time": "0:33:39", "remaining_time": "3:45:16", "throughput": 8875.14, "total_tokens": 17924288} +{"current_steps": 26610, "total_steps": 204665, "loss": 0.0734, "lr": 1.9945182103234085e-06, "epoch": 0.6500867270906114, "percentage": 13.0, "elapsed_time": "0:33:39", "remaining_time": "3:45:16", "throughput": 8875.49, "total_tokens": 17928192} +{"current_steps": 26615, "total_steps": 204665, "loss": 0.0735, "lr": 1.9945092897813937e-06, "epoch": 0.6502088779224586, "percentage": 13.0, "elapsed_time": "0:33:40", "remaining_time": "3:45:15", "throughput": 8875.64, "total_tokens": 17931584} +{"current_steps": 26620, "total_steps": 204665, "loss": 0.0445, "lr": 1.99450036200704e-06, "epoch": 0.6503310287543058, "percentage": 13.01, "elapsed_time": "0:33:40", "remaining_time": "3:45:15", "throughput": 8875.91, "total_tokens": 17935296} +{"current_steps": 26625, "total_steps": 204665, "loss": 0.132, "lr": 1.9944914270004126e-06, "epoch": 0.650453179586153, "percentage": 13.01, "elapsed_time": "0:33:41", "remaining_time": "3:45:14", "throughput": 8875.98, "total_tokens": 17938496} +{"current_steps": 26630, "total_steps": 204665, "loss": 0.1255, "lr": 1.994482484761576e-06, "epoch": 0.6505753304180002, "percentage": 13.01, "elapsed_time": "0:33:41", "remaining_time": "3:45:13", "throughput": 8875.95, "total_tokens": 17941440} +{"current_steps": 26635, "total_steps": 204665, "loss": 0.1735, "lr": 1.994473535290595e-06, "epoch": 0.6506974812498473, "percentage": 13.01, "elapsed_time": "0:33:41", "remaining_time": "3:45:13", "throughput": 8876.0, "total_tokens": 17944576} +{"current_steps": 26640, "total_steps": 204665, "loss": 0.0996, "lr": 1.994464578587535e-06, "epoch": 0.6508196320816945, "percentage": 13.02, "elapsed_time": "0:33:42", "remaining_time": "3:45:12", "throughput": 8876.28, "total_tokens": 17948288} +{"current_steps": 26645, "total_steps": 204665, "loss": 0.1026, "lr": 1.9944556146524613e-06, "epoch": 0.6509417829135417, "percentage": 13.02, "elapsed_time": "0:33:42", "remaining_time": "3:45:11", "throughput": 8876.31, "total_tokens": 17951360} +{"current_steps": 26650, "total_steps": 204665, "loss": 0.0945, "lr": 1.9944466434854386e-06, "epoch": 0.6510639337453888, "percentage": 13.02, "elapsed_time": "0:33:42", "remaining_time": "3:45:11", "throughput": 8876.54, "total_tokens": 17954944} +{"current_steps": 26655, "total_steps": 204665, "loss": 0.117, "lr": 1.9944376650865325e-06, "epoch": 0.6511860845772359, "percentage": 13.02, "elapsed_time": "0:33:43", "remaining_time": "3:45:10", "throughput": 8876.59, "total_tokens": 17958080} +{"current_steps": 26660, "total_steps": 204665, "loss": 0.1298, "lr": 1.994428679455808e-06, "epoch": 0.6513082354090831, "percentage": 13.03, "elapsed_time": "0:33:43", "remaining_time": "3:45:10", "throughput": 8876.8, "total_tokens": 17961664} +{"current_steps": 26665, "total_steps": 204665, "loss": 0.0816, "lr": 1.9944196865933313e-06, "epoch": 0.6514303862409303, "percentage": 13.03, "elapsed_time": "0:33:43", "remaining_time": "3:45:09", "throughput": 8876.79, "total_tokens": 17964672} +{"current_steps": 26670, "total_steps": 204665, "loss": 0.2047, "lr": 1.994410686499167e-06, "epoch": 0.6515525370727775, "percentage": 13.03, "elapsed_time": "0:33:44", "remaining_time": "3:45:08", "throughput": 8876.83, "total_tokens": 17967808} +{"current_steps": 26675, "total_steps": 204665, "loss": 0.1808, "lr": 1.9944016791733806e-06, "epoch": 0.6516746879046247, "percentage": 13.03, "elapsed_time": "0:33:44", "remaining_time": "3:45:08", "throughput": 8876.98, "total_tokens": 17971200} +{"current_steps": 26680, "total_steps": 204665, "loss": 0.077, "lr": 1.9943926646160377e-06, "epoch": 0.6517968387364718, "percentage": 13.04, "elapsed_time": "0:33:44", "remaining_time": "3:45:07", "throughput": 8877.11, "total_tokens": 17974528} +{"current_steps": 26685, "total_steps": 204665, "loss": 0.0653, "lr": 1.994383642827204e-06, "epoch": 0.651918989568319, "percentage": 13.04, "elapsed_time": "0:33:45", "remaining_time": "3:45:07", "throughput": 8877.29, "total_tokens": 17977984} +{"current_steps": 26690, "total_steps": 204665, "loss": 0.1219, "lr": 1.9943746138069446e-06, "epoch": 0.6520411404001661, "percentage": 13.04, "elapsed_time": "0:33:45", "remaining_time": "3:45:06", "throughput": 8877.21, "total_tokens": 17980800} +{"current_steps": 26695, "total_steps": 204665, "loss": 0.1261, "lr": 1.9943655775553257e-06, "epoch": 0.6521632912320133, "percentage": 13.04, "elapsed_time": "0:33:45", "remaining_time": "3:45:05", "throughput": 8877.33, "total_tokens": 17984128} +{"current_steps": 26700, "total_steps": 204665, "loss": 0.0388, "lr": 1.9943565340724133e-06, "epoch": 0.6522854420638604, "percentage": 13.05, "elapsed_time": "0:33:46", "remaining_time": "3:45:05", "throughput": 8877.57, "total_tokens": 17987776} +{"current_steps": 26705, "total_steps": 204665, "loss": 0.1485, "lr": 1.994347483358272e-06, "epoch": 0.6524075928957076, "percentage": 13.05, "elapsed_time": "0:33:46", "remaining_time": "3:45:04", "throughput": 8877.71, "total_tokens": 17991168} +{"current_steps": 26710, "total_steps": 204665, "loss": 0.1009, "lr": 1.9943384254129693e-06, "epoch": 0.6525297437275548, "percentage": 13.05, "elapsed_time": "0:33:46", "remaining_time": "3:45:04", "throughput": 8877.82, "total_tokens": 17994432} +{"current_steps": 26715, "total_steps": 204665, "loss": 0.3058, "lr": 1.9943293602365694e-06, "epoch": 0.652651894559402, "percentage": 13.05, "elapsed_time": "0:33:47", "remaining_time": "3:45:03", "throughput": 8877.91, "total_tokens": 17997696} +{"current_steps": 26720, "total_steps": 204665, "loss": 0.1395, "lr": 1.994320287829139e-06, "epoch": 0.6527740453912492, "percentage": 13.06, "elapsed_time": "0:33:47", "remaining_time": "3:45:03", "throughput": 8878.18, "total_tokens": 18001408} +{"current_steps": 26725, "total_steps": 204665, "loss": 0.0725, "lr": 1.9943112081907443e-06, "epoch": 0.6528961962230962, "percentage": 13.06, "elapsed_time": "0:33:47", "remaining_time": "3:45:02", "throughput": 8878.21, "total_tokens": 18004480} +{"current_steps": 26730, "total_steps": 204665, "loss": 0.0555, "lr": 1.9943021213214508e-06, "epoch": 0.6530183470549434, "percentage": 13.06, "elapsed_time": "0:33:48", "remaining_time": "3:45:01", "throughput": 8878.26, "total_tokens": 18007616} +{"current_steps": 26735, "total_steps": 204665, "loss": 0.0594, "lr": 1.994293027221325e-06, "epoch": 0.6531404978867906, "percentage": 13.06, "elapsed_time": "0:33:48", "remaining_time": "3:45:01", "throughput": 8878.44, "total_tokens": 18011072} +{"current_steps": 26740, "total_steps": 204665, "loss": 0.1316, "lr": 1.994283925890433e-06, "epoch": 0.6532626487186378, "percentage": 13.07, "elapsed_time": "0:33:48", "remaining_time": "3:45:00", "throughput": 8878.34, "total_tokens": 18013824} +{"current_steps": 26745, "total_steps": 204665, "loss": 0.0942, "lr": 1.9942748173288408e-06, "epoch": 0.6533847995504849, "percentage": 13.07, "elapsed_time": "0:33:49", "remaining_time": "3:44:59", "throughput": 8878.4, "total_tokens": 18017024} +{"current_steps": 26750, "total_steps": 204665, "loss": 0.1295, "lr": 1.9942657015366145e-06, "epoch": 0.6535069503823321, "percentage": 13.07, "elapsed_time": "0:33:49", "remaining_time": "3:44:59", "throughput": 8878.66, "total_tokens": 18020736} +{"current_steps": 26755, "total_steps": 204665, "loss": 0.0743, "lr": 1.9942565785138207e-06, "epoch": 0.6536291012141793, "percentage": 13.07, "elapsed_time": "0:33:50", "remaining_time": "3:44:58", "throughput": 8878.76, "total_tokens": 18024000} +{"current_steps": 26760, "total_steps": 204665, "loss": 0.1127, "lr": 1.994247448260526e-06, "epoch": 0.6537512520460265, "percentage": 13.08, "elapsed_time": "0:33:50", "remaining_time": "3:44:58", "throughput": 8878.77, "total_tokens": 18027072} +{"current_steps": 26765, "total_steps": 204665, "loss": 0.1408, "lr": 1.994238310776796e-06, "epoch": 0.6538734028778735, "percentage": 13.08, "elapsed_time": "0:33:50", "remaining_time": "3:44:57", "throughput": 8878.92, "total_tokens": 18030464} +{"current_steps": 26770, "total_steps": 204665, "loss": 0.1588, "lr": 1.9942291660626974e-06, "epoch": 0.6539955537097207, "percentage": 13.08, "elapsed_time": "0:33:51", "remaining_time": "3:44:56", "throughput": 8879.08, "total_tokens": 18033856} +{"current_steps": 26775, "total_steps": 204665, "loss": 0.0567, "lr": 1.9942200141182973e-06, "epoch": 0.6541177045415679, "percentage": 13.08, "elapsed_time": "0:33:51", "remaining_time": "3:44:56", "throughput": 8879.16, "total_tokens": 18037056} +{"current_steps": 26780, "total_steps": 204665, "loss": 0.0857, "lr": 1.9942108549436617e-06, "epoch": 0.6542398553734151, "percentage": 13.08, "elapsed_time": "0:33:51", "remaining_time": "3:44:55", "throughput": 8879.15, "total_tokens": 18040064} +{"current_steps": 26785, "total_steps": 204665, "loss": 0.0756, "lr": 1.9942016885388575e-06, "epoch": 0.6543620062052623, "percentage": 13.09, "elapsed_time": "0:33:52", "remaining_time": "3:44:55", "throughput": 8879.3, "total_tokens": 18043456} +{"current_steps": 26790, "total_steps": 204665, "loss": 0.0706, "lr": 1.994192514903951e-06, "epoch": 0.6544841570371094, "percentage": 13.09, "elapsed_time": "0:33:52", "remaining_time": "3:44:54", "throughput": 8879.37, "total_tokens": 18046656} +{"current_steps": 26795, "total_steps": 204665, "loss": 0.0439, "lr": 1.994183334039009e-06, "epoch": 0.6546063078689566, "percentage": 13.09, "elapsed_time": "0:33:52", "remaining_time": "3:44:53", "throughput": 8879.75, "total_tokens": 18050624} +{"current_steps": 26800, "total_steps": 204665, "loss": 0.0505, "lr": 1.9941741459440987e-06, "epoch": 0.6547284587008038, "percentage": 13.09, "elapsed_time": "0:33:53", "remaining_time": "3:44:53", "throughput": 8880.18, "total_tokens": 18054720} +{"current_steps": 26805, "total_steps": 204665, "loss": 0.1256, "lr": 1.9941649506192866e-06, "epoch": 0.654850609532651, "percentage": 13.1, "elapsed_time": "0:33:53", "remaining_time": "3:44:52", "throughput": 8880.24, "total_tokens": 18057856} +{"current_steps": 26810, "total_steps": 204665, "loss": 0.1938, "lr": 1.994155748064639e-06, "epoch": 0.654972760364498, "percentage": 13.1, "elapsed_time": "0:33:53", "remaining_time": "3:44:52", "throughput": 8880.52, "total_tokens": 18061568} +{"current_steps": 26815, "total_steps": 204665, "loss": 0.1578, "lr": 1.994146538280224e-06, "epoch": 0.6550949111963452, "percentage": 13.1, "elapsed_time": "0:33:54", "remaining_time": "3:44:51", "throughput": 8880.84, "total_tokens": 18065408} +{"current_steps": 26820, "total_steps": 204665, "loss": 0.0594, "lr": 1.994137321266108e-06, "epoch": 0.6552170620281924, "percentage": 13.1, "elapsed_time": "0:33:54", "remaining_time": "3:44:51", "throughput": 8881.13, "total_tokens": 18069184} +{"current_steps": 26825, "total_steps": 204665, "loss": 0.0316, "lr": 1.994128097022358e-06, "epoch": 0.6553392128600396, "percentage": 13.11, "elapsed_time": "0:33:54", "remaining_time": "3:44:50", "throughput": 8881.58, "total_tokens": 18073344} +{"current_steps": 26830, "total_steps": 204665, "loss": 0.2249, "lr": 1.9941188655490406e-06, "epoch": 0.6554613636918868, "percentage": 13.11, "elapsed_time": "0:33:55", "remaining_time": "3:44:50", "throughput": 8881.86, "total_tokens": 18077056} +{"current_steps": 26835, "total_steps": 204665, "loss": 0.1366, "lr": 1.994109626846224e-06, "epoch": 0.6555835145237339, "percentage": 13.11, "elapsed_time": "0:33:55", "remaining_time": "3:44:49", "throughput": 8882.05, "total_tokens": 18080576} +{"current_steps": 26840, "total_steps": 204665, "loss": 0.0704, "lr": 1.9941003809139746e-06, "epoch": 0.6557056653555811, "percentage": 13.11, "elapsed_time": "0:33:55", "remaining_time": "3:44:49", "throughput": 8882.22, "total_tokens": 18084032} +{"current_steps": 26845, "total_steps": 204665, "loss": 0.0993, "lr": 1.99409112775236e-06, "epoch": 0.6558278161874282, "percentage": 13.12, "elapsed_time": "0:33:56", "remaining_time": "3:44:48", "throughput": 8882.23, "total_tokens": 18087104} +{"current_steps": 26850, "total_steps": 204665, "loss": 0.1199, "lr": 1.994081867361447e-06, "epoch": 0.6559499670192754, "percentage": 13.12, "elapsed_time": "0:33:56", "remaining_time": "3:44:47", "throughput": 8882.34, "total_tokens": 18090432} +{"current_steps": 26855, "total_steps": 204665, "loss": 0.2395, "lr": 1.9940725997413037e-06, "epoch": 0.6560721178511225, "percentage": 13.12, "elapsed_time": "0:33:57", "remaining_time": "3:44:47", "throughput": 8882.42, "total_tokens": 18093632} +{"current_steps": 26860, "total_steps": 204665, "loss": 0.1031, "lr": 1.994063324891997e-06, "epoch": 0.6561942686829697, "percentage": 13.12, "elapsed_time": "0:33:57", "remaining_time": "3:44:46", "throughput": 8882.47, "total_tokens": 18096768} +{"current_steps": 26865, "total_steps": 204665, "loss": 0.107, "lr": 1.9940540428135942e-06, "epoch": 0.6563164195148169, "percentage": 13.13, "elapsed_time": "0:33:57", "remaining_time": "3:44:46", "throughput": 8882.62, "total_tokens": 18100160} +{"current_steps": 26870, "total_steps": 204665, "loss": 0.126, "lr": 1.9940447535061627e-06, "epoch": 0.6564385703466641, "percentage": 13.13, "elapsed_time": "0:33:58", "remaining_time": "3:44:45", "throughput": 8882.8, "total_tokens": 18103616} +{"current_steps": 26875, "total_steps": 204665, "loss": 0.1211, "lr": 1.994035456969771e-06, "epoch": 0.6565607211785113, "percentage": 13.13, "elapsed_time": "0:33:58", "remaining_time": "3:44:44", "throughput": 8883.09, "total_tokens": 18107392} +{"current_steps": 26880, "total_steps": 204665, "loss": 0.0588, "lr": 1.994026153204486e-06, "epoch": 0.6566828720103584, "percentage": 13.13, "elapsed_time": "0:33:59", "remaining_time": "3:44:46", "throughput": 8882.44, "total_tokens": 18111552} +{"current_steps": 26885, "total_steps": 204665, "loss": 0.052, "lr": 1.9940168422103752e-06, "epoch": 0.6568050228422055, "percentage": 13.14, "elapsed_time": "0:33:59", "remaining_time": "3:44:45", "throughput": 8882.59, "total_tokens": 18114944} +{"current_steps": 26890, "total_steps": 204665, "loss": 0.1785, "lr": 1.9940075239875068e-06, "epoch": 0.6569271736740527, "percentage": 13.14, "elapsed_time": "0:33:59", "remaining_time": "3:44:44", "throughput": 8882.64, "total_tokens": 18118080} +{"current_steps": 26895, "total_steps": 204665, "loss": 0.0843, "lr": 1.993998198535948e-06, "epoch": 0.6570493245058999, "percentage": 13.14, "elapsed_time": "0:34:00", "remaining_time": "3:44:44", "throughput": 8882.85, "total_tokens": 18121664} +{"current_steps": 26900, "total_steps": 204665, "loss": 0.0735, "lr": 1.993988865855767e-06, "epoch": 0.657171475337747, "percentage": 13.14, "elapsed_time": "0:34:00", "remaining_time": "3:44:43", "throughput": 8882.94, "total_tokens": 18124928} +{"current_steps": 26905, "total_steps": 204665, "loss": 0.2564, "lr": 1.9939795259470324e-06, "epoch": 0.6572936261695942, "percentage": 13.15, "elapsed_time": "0:34:00", "remaining_time": "3:44:43", "throughput": 8883.0, "total_tokens": 18128128} +{"current_steps": 26910, "total_steps": 204665, "loss": 0.0907, "lr": 1.9939701788098104e-06, "epoch": 0.6574157770014414, "percentage": 13.15, "elapsed_time": "0:34:01", "remaining_time": "3:44:42", "throughput": 8883.08, "total_tokens": 18131328} +{"current_steps": 26915, "total_steps": 204665, "loss": 0.098, "lr": 1.99396082444417e-06, "epoch": 0.6575379278332886, "percentage": 13.15, "elapsed_time": "0:34:01", "remaining_time": "3:44:42", "throughput": 8883.33, "total_tokens": 18134976} +{"current_steps": 26920, "total_steps": 204665, "loss": 0.0583, "lr": 1.99395146285018e-06, "epoch": 0.6576600786651358, "percentage": 13.15, "elapsed_time": "0:34:01", "remaining_time": "3:44:41", "throughput": 8883.5, "total_tokens": 18138432} +{"current_steps": 26925, "total_steps": 204665, "loss": 0.151, "lr": 1.993942094027907e-06, "epoch": 0.6577822294969828, "percentage": 13.16, "elapsed_time": "0:34:02", "remaining_time": "3:44:40", "throughput": 8883.69, "total_tokens": 18141952} +{"current_steps": 26930, "total_steps": 204665, "loss": 0.1185, "lr": 1.9939327179774198e-06, "epoch": 0.65790438032883, "percentage": 13.16, "elapsed_time": "0:34:02", "remaining_time": "3:44:40", "throughput": 8884.06, "total_tokens": 18145920} +{"current_steps": 26935, "total_steps": 204665, "loss": 0.1919, "lr": 1.9939233346987863e-06, "epoch": 0.6580265311606772, "percentage": 13.16, "elapsed_time": "0:34:02", "remaining_time": "3:44:39", "throughput": 8884.25, "total_tokens": 18149440} +{"current_steps": 26940, "total_steps": 204665, "loss": 0.1845, "lr": 1.993913944192075e-06, "epoch": 0.6581486819925244, "percentage": 13.16, "elapsed_time": "0:34:03", "remaining_time": "3:44:39", "throughput": 8884.33, "total_tokens": 18152704} +{"current_steps": 26945, "total_steps": 204665, "loss": 0.2145, "lr": 1.9939045464573544e-06, "epoch": 0.6582708328243715, "percentage": 13.17, "elapsed_time": "0:34:03", "remaining_time": "3:44:38", "throughput": 8884.35, "total_tokens": 18155776} +{"current_steps": 26950, "total_steps": 204665, "loss": 0.1141, "lr": 1.993895141494693e-06, "epoch": 0.6583929836562187, "percentage": 13.17, "elapsed_time": "0:34:03", "remaining_time": "3:44:38", "throughput": 8884.39, "total_tokens": 18158912} +{"current_steps": 26955, "total_steps": 204665, "loss": 0.1262, "lr": 1.993885729304158e-06, "epoch": 0.6585151344880659, "percentage": 13.17, "elapsed_time": "0:34:04", "remaining_time": "3:44:37", "throughput": 8884.7, "total_tokens": 18162752} +{"current_steps": 26960, "total_steps": 204665, "loss": 0.1028, "lr": 1.993876309885819e-06, "epoch": 0.6586372853199131, "percentage": 13.17, "elapsed_time": "0:34:04", "remaining_time": "3:44:36", "throughput": 8884.9, "total_tokens": 18166272} +{"current_steps": 26965, "total_steps": 204665, "loss": 0.1589, "lr": 1.993866883239744e-06, "epoch": 0.6587594361517602, "percentage": 13.18, "elapsed_time": "0:34:04", "remaining_time": "3:44:36", "throughput": 8885.02, "total_tokens": 18169600} +{"current_steps": 26970, "total_steps": 204665, "loss": 0.0782, "lr": 1.9938574493660023e-06, "epoch": 0.6588815869836073, "percentage": 13.18, "elapsed_time": "0:34:05", "remaining_time": "3:44:35", "throughput": 8885.39, "total_tokens": 18173568} +{"current_steps": 26975, "total_steps": 204665, "loss": 0.1405, "lr": 1.9938480082646613e-06, "epoch": 0.6590037378154545, "percentage": 13.18, "elapsed_time": "0:34:05", "remaining_time": "3:44:35", "throughput": 8885.38, "total_tokens": 18176576} +{"current_steps": 26980, "total_steps": 204665, "loss": 0.0897, "lr": 1.9938385599357907e-06, "epoch": 0.6591258886473017, "percentage": 13.18, "elapsed_time": "0:34:06", "remaining_time": "3:44:34", "throughput": 8885.47, "total_tokens": 18179840} +{"current_steps": 26985, "total_steps": 204665, "loss": 0.1164, "lr": 1.9938291043794585e-06, "epoch": 0.6592480394791489, "percentage": 13.18, "elapsed_time": "0:34:06", "remaining_time": "3:44:34", "throughput": 8885.45, "total_tokens": 18182784} +{"current_steps": 26990, "total_steps": 204665, "loss": 0.1907, "lr": 1.9938196415957334e-06, "epoch": 0.659370190310996, "percentage": 13.19, "elapsed_time": "0:34:06", "remaining_time": "3:44:33", "throughput": 8885.52, "total_tokens": 18185984} +{"current_steps": 26995, "total_steps": 204665, "loss": 0.0442, "lr": 1.9938101715846853e-06, "epoch": 0.6594923411428432, "percentage": 13.19, "elapsed_time": "0:34:07", "remaining_time": "3:44:32", "throughput": 8885.55, "total_tokens": 18189056} +{"current_steps": 27000, "total_steps": 204665, "loss": 0.1052, "lr": 1.993800694346382e-06, "epoch": 0.6596144919746904, "percentage": 13.19, "elapsed_time": "0:34:07", "remaining_time": "3:44:32", "throughput": 8885.67, "total_tokens": 18192384} +{"current_steps": 27005, "total_steps": 204665, "loss": 0.0961, "lr": 1.9937912098808927e-06, "epoch": 0.6597366428065375, "percentage": 13.19, "elapsed_time": "0:34:07", "remaining_time": "3:44:31", "throughput": 8885.79, "total_tokens": 18195712} +{"current_steps": 27010, "total_steps": 204665, "loss": 0.1263, "lr": 1.9937817181882864e-06, "epoch": 0.6598587936383846, "percentage": 13.2, "elapsed_time": "0:34:08", "remaining_time": "3:44:30", "throughput": 8885.84, "total_tokens": 18198848} +{"current_steps": 27015, "total_steps": 204665, "loss": 0.0561, "lr": 1.9937722192686323e-06, "epoch": 0.6599809444702318, "percentage": 13.2, "elapsed_time": "0:34:08", "remaining_time": "3:44:30", "throughput": 8885.92, "total_tokens": 18202112} +{"current_steps": 27020, "total_steps": 204665, "loss": 0.071, "lr": 1.9937627131219995e-06, "epoch": 0.660103095302079, "percentage": 13.2, "elapsed_time": "0:34:08", "remaining_time": "3:44:29", "throughput": 8886.14, "total_tokens": 18205696} +{"current_steps": 27025, "total_steps": 204665, "loss": 0.2277, "lr": 1.9937531997484566e-06, "epoch": 0.6602252461339262, "percentage": 13.2, "elapsed_time": "0:34:09", "remaining_time": "3:44:29", "throughput": 8886.3, "total_tokens": 18209152} +{"current_steps": 27030, "total_steps": 204665, "loss": 0.1101, "lr": 1.993743679148073e-06, "epoch": 0.6603473969657734, "percentage": 13.21, "elapsed_time": "0:34:09", "remaining_time": "3:44:28", "throughput": 8886.37, "total_tokens": 18212352} +{"current_steps": 27035, "total_steps": 204665, "loss": 0.1003, "lr": 1.9937341513209183e-06, "epoch": 0.6604695477976205, "percentage": 13.21, "elapsed_time": "0:34:09", "remaining_time": "3:44:28", "throughput": 8886.5, "total_tokens": 18215680} +{"current_steps": 27040, "total_steps": 204665, "loss": 0.0563, "lr": 1.9937246162670614e-06, "epoch": 0.6605916986294676, "percentage": 13.21, "elapsed_time": "0:34:10", "remaining_time": "3:44:27", "throughput": 8886.65, "total_tokens": 18219072} +{"current_steps": 27045, "total_steps": 204665, "loss": 0.105, "lr": 1.9937150739865723e-06, "epoch": 0.6607138494613148, "percentage": 13.21, "elapsed_time": "0:34:10", "remaining_time": "3:44:26", "throughput": 8886.8, "total_tokens": 18222464} +{"current_steps": 27050, "total_steps": 204665, "loss": 0.0846, "lr": 1.9937055244795195e-06, "epoch": 0.660836000293162, "percentage": 13.22, "elapsed_time": "0:34:10", "remaining_time": "3:44:26", "throughput": 8887.04, "total_tokens": 18226112} +{"current_steps": 27055, "total_steps": 204665, "loss": 0.1617, "lr": 1.993695967745973e-06, "epoch": 0.6609581511250091, "percentage": 13.22, "elapsed_time": "0:34:11", "remaining_time": "3:44:25", "throughput": 8887.07, "total_tokens": 18229184} +{"current_steps": 27060, "total_steps": 204665, "loss": 0.0614, "lr": 1.993686403786002e-06, "epoch": 0.6610803019568563, "percentage": 13.22, "elapsed_time": "0:34:11", "remaining_time": "3:44:25", "throughput": 8887.12, "total_tokens": 18232320} +{"current_steps": 27065, "total_steps": 204665, "loss": 0.0612, "lr": 1.9936768325996763e-06, "epoch": 0.6612024527887035, "percentage": 13.22, "elapsed_time": "0:34:11", "remaining_time": "3:44:24", "throughput": 8887.22, "total_tokens": 18235584} +{"current_steps": 27070, "total_steps": 204665, "loss": 0.0812, "lr": 1.9936672541870656e-06, "epoch": 0.6613246036205507, "percentage": 13.23, "elapsed_time": "0:34:12", "remaining_time": "3:44:23", "throughput": 8887.23, "total_tokens": 18238656} +{"current_steps": 27075, "total_steps": 204665, "loss": 0.0975, "lr": 1.993657668548239e-06, "epoch": 0.6614467544523979, "percentage": 13.23, "elapsed_time": "0:34:12", "remaining_time": "3:44:23", "throughput": 8887.46, "total_tokens": 18242240} +{"current_steps": 27080, "total_steps": 204665, "loss": 0.1443, "lr": 1.993648075683267e-06, "epoch": 0.661568905284245, "percentage": 13.23, "elapsed_time": "0:34:12", "remaining_time": "3:44:22", "throughput": 8887.73, "total_tokens": 18245952} +{"current_steps": 27085, "total_steps": 204665, "loss": 0.1316, "lr": 1.9936384755922185e-06, "epoch": 0.6616910561160921, "percentage": 13.23, "elapsed_time": "0:34:13", "remaining_time": "3:44:22", "throughput": 8887.76, "total_tokens": 18249088} +{"current_steps": 27090, "total_steps": 204665, "loss": 0.0605, "lr": 1.993628868275164e-06, "epoch": 0.6618132069479393, "percentage": 13.24, "elapsed_time": "0:34:13", "remaining_time": "3:44:21", "throughput": 8887.98, "total_tokens": 18252672} +{"current_steps": 27095, "total_steps": 204665, "loss": 0.2245, "lr": 1.9936192537321733e-06, "epoch": 0.6619353577797865, "percentage": 13.24, "elapsed_time": "0:34:13", "remaining_time": "3:44:20", "throughput": 8888.0, "total_tokens": 18255744} +{"current_steps": 27100, "total_steps": 204665, "loss": 0.0751, "lr": 1.993609631963316e-06, "epoch": 0.6620575086116336, "percentage": 13.24, "elapsed_time": "0:34:14", "remaining_time": "3:44:20", "throughput": 8888.33, "total_tokens": 18259584} +{"current_steps": 27105, "total_steps": 204665, "loss": 0.1795, "lr": 1.993600002968662e-06, "epoch": 0.6621796594434808, "percentage": 13.24, "elapsed_time": "0:34:14", "remaining_time": "3:44:19", "throughput": 8888.35, "total_tokens": 18262656} +{"current_steps": 27110, "total_steps": 204665, "loss": 0.1977, "lr": 1.9935903667482815e-06, "epoch": 0.662301810275328, "percentage": 13.25, "elapsed_time": "0:34:15", "remaining_time": "3:44:19", "throughput": 8888.52, "total_tokens": 18266112} +{"current_steps": 27115, "total_steps": 204665, "loss": 0.1104, "lr": 1.993580723302245e-06, "epoch": 0.6624239611071752, "percentage": 13.25, "elapsed_time": "0:34:15", "remaining_time": "3:44:18", "throughput": 8888.57, "total_tokens": 18269248} +{"current_steps": 27120, "total_steps": 204665, "loss": 0.1705, "lr": 1.9935710726306215e-06, "epoch": 0.6625461119390224, "percentage": 13.25, "elapsed_time": "0:34:15", "remaining_time": "3:44:18", "throughput": 8888.71, "total_tokens": 18272640} +{"current_steps": 27125, "total_steps": 204665, "loss": 0.116, "lr": 1.9935614147334825e-06, "epoch": 0.6626682627708694, "percentage": 13.25, "elapsed_time": "0:34:16", "remaining_time": "3:44:17", "throughput": 8888.85, "total_tokens": 18276032} +{"current_steps": 27130, "total_steps": 204665, "loss": 0.1621, "lr": 1.9935517496108976e-06, "epoch": 0.6627904136027166, "percentage": 13.26, "elapsed_time": "0:34:16", "remaining_time": "3:44:16", "throughput": 8889.13, "total_tokens": 18279744} +{"current_steps": 27135, "total_steps": 204665, "loss": 0.0469, "lr": 1.9935420772629374e-06, "epoch": 0.6629125644345638, "percentage": 13.26, "elapsed_time": "0:34:16", "remaining_time": "3:44:16", "throughput": 8889.11, "total_tokens": 18282688} +{"current_steps": 27140, "total_steps": 204665, "loss": 0.1928, "lr": 1.9935323976896713e-06, "epoch": 0.663034715266411, "percentage": 13.26, "elapsed_time": "0:34:17", "remaining_time": "3:44:15", "throughput": 8889.25, "total_tokens": 18286080} +{"current_steps": 27145, "total_steps": 204665, "loss": 0.1215, "lr": 1.993522710891171e-06, "epoch": 0.6631568660982581, "percentage": 13.26, "elapsed_time": "0:34:17", "remaining_time": "3:44:15", "throughput": 8889.47, "total_tokens": 18289664} +{"current_steps": 27150, "total_steps": 204665, "loss": 0.0822, "lr": 1.993513016867506e-06, "epoch": 0.6632790169301053, "percentage": 13.27, "elapsed_time": "0:34:17", "remaining_time": "3:44:14", "throughput": 8889.45, "total_tokens": 18292608} +{"current_steps": 27155, "total_steps": 204665, "loss": 0.0923, "lr": 1.993503315618747e-06, "epoch": 0.6634011677619525, "percentage": 13.27, "elapsed_time": "0:34:18", "remaining_time": "3:44:13", "throughput": 8889.53, "total_tokens": 18295808} +{"current_steps": 27160, "total_steps": 204665, "loss": 0.1647, "lr": 1.993493607144965e-06, "epoch": 0.6635233185937996, "percentage": 13.27, "elapsed_time": "0:34:18", "remaining_time": "3:44:13", "throughput": 8889.7, "total_tokens": 18299264} +{"current_steps": 27165, "total_steps": 204665, "loss": 0.1891, "lr": 1.9934838914462303e-06, "epoch": 0.6636454694256468, "percentage": 13.27, "elapsed_time": "0:34:18", "remaining_time": "3:44:12", "throughput": 8889.71, "total_tokens": 18302336} +{"current_steps": 27170, "total_steps": 204665, "loss": 0.0913, "lr": 1.9934741685226133e-06, "epoch": 0.6637676202574939, "percentage": 13.28, "elapsed_time": "0:34:19", "remaining_time": "3:44:12", "throughput": 8889.9, "total_tokens": 18305792} +{"current_steps": 27175, "total_steps": 204665, "loss": 0.0621, "lr": 1.993464438374185e-06, "epoch": 0.6638897710893411, "percentage": 13.28, "elapsed_time": "0:34:19", "remaining_time": "3:44:11", "throughput": 8889.94, "total_tokens": 18308928} +{"current_steps": 27180, "total_steps": 204665, "loss": 0.1885, "lr": 1.993454701001016e-06, "epoch": 0.6640119219211883, "percentage": 13.28, "elapsed_time": "0:34:19", "remaining_time": "3:44:10", "throughput": 8889.99, "total_tokens": 18312064} +{"current_steps": 27185, "total_steps": 204665, "loss": 0.1044, "lr": 1.9934449564031774e-06, "epoch": 0.6641340727530355, "percentage": 13.28, "elapsed_time": "0:34:20", "remaining_time": "3:44:10", "throughput": 8890.24, "total_tokens": 18315712} +{"current_steps": 27190, "total_steps": 204665, "loss": 0.1608, "lr": 1.99343520458074e-06, "epoch": 0.6642562235848826, "percentage": 13.29, "elapsed_time": "0:34:20", "remaining_time": "3:44:09", "throughput": 8890.45, "total_tokens": 18319296} +{"current_steps": 27195, "total_steps": 204665, "loss": 0.0892, "lr": 1.993425445533774e-06, "epoch": 0.6643783744167298, "percentage": 13.29, "elapsed_time": "0:34:20", "remaining_time": "3:44:09", "throughput": 8890.61, "total_tokens": 18322688} +{"current_steps": 27200, "total_steps": 204665, "loss": 0.031, "lr": 1.993415679262351e-06, "epoch": 0.664500525248577, "percentage": 13.29, "elapsed_time": "0:34:21", "remaining_time": "3:44:08", "throughput": 8890.82, "total_tokens": 18326272} +{"current_steps": 27205, "total_steps": 204665, "loss": 0.1511, "lr": 1.9934059057665428e-06, "epoch": 0.6646226760804241, "percentage": 13.29, "elapsed_time": "0:34:21", "remaining_time": "3:44:07", "throughput": 8890.79, "total_tokens": 18329216} +{"current_steps": 27210, "total_steps": 204665, "loss": 0.0901, "lr": 1.993396125046419e-06, "epoch": 0.6647448269122713, "percentage": 13.29, "elapsed_time": "0:34:21", "remaining_time": "3:44:07", "throughput": 8891.04, "total_tokens": 18332864} +{"current_steps": 27215, "total_steps": 204665, "loss": 0.1088, "lr": 1.9933863371020515e-06, "epoch": 0.6648669777441184, "percentage": 13.3, "elapsed_time": "0:34:22", "remaining_time": "3:44:06", "throughput": 8891.11, "total_tokens": 18336064} +{"current_steps": 27220, "total_steps": 204665, "loss": 0.0837, "lr": 1.9933765419335114e-06, "epoch": 0.6649891285759656, "percentage": 13.3, "elapsed_time": "0:34:22", "remaining_time": "3:44:06", "throughput": 8891.3, "total_tokens": 18339584} +{"current_steps": 27225, "total_steps": 204665, "loss": 0.1089, "lr": 1.9933667395408703e-06, "epoch": 0.6651112794078128, "percentage": 13.3, "elapsed_time": "0:34:22", "remaining_time": "3:44:05", "throughput": 8891.32, "total_tokens": 18342656} +{"current_steps": 27230, "total_steps": 204665, "loss": 0.1059, "lr": 1.9933569299241987e-06, "epoch": 0.66523343023966, "percentage": 13.3, "elapsed_time": "0:34:23", "remaining_time": "3:44:05", "throughput": 8891.49, "total_tokens": 18346112} +{"current_steps": 27235, "total_steps": 204665, "loss": 0.0226, "lr": 1.993347113083568e-06, "epoch": 0.665355581071507, "percentage": 13.31, "elapsed_time": "0:34:23", "remaining_time": "3:44:04", "throughput": 8891.66, "total_tokens": 18349568} +{"current_steps": 27240, "total_steps": 204665, "loss": 0.024, "lr": 1.9933372890190503e-06, "epoch": 0.6654777319033542, "percentage": 13.31, "elapsed_time": "0:34:24", "remaining_time": "3:44:03", "throughput": 8891.76, "total_tokens": 18352832} +{"current_steps": 27245, "total_steps": 204665, "loss": 0.1599, "lr": 1.9933274577307167e-06, "epoch": 0.6655998827352014, "percentage": 13.31, "elapsed_time": "0:34:24", "remaining_time": "3:44:03", "throughput": 8891.77, "total_tokens": 18355840} +{"current_steps": 27250, "total_steps": 204665, "loss": 0.176, "lr": 1.993317619218639e-06, "epoch": 0.6657220335670486, "percentage": 13.31, "elapsed_time": "0:34:24", "remaining_time": "3:44:02", "throughput": 8891.99, "total_tokens": 18359424} +{"current_steps": 27255, "total_steps": 204665, "loss": 0.2333, "lr": 1.9933077734828877e-06, "epoch": 0.6658441843988958, "percentage": 13.32, "elapsed_time": "0:34:25", "remaining_time": "3:44:02", "throughput": 8892.13, "total_tokens": 18362816} +{"current_steps": 27260, "total_steps": 204665, "loss": 0.088, "lr": 1.993297920523535e-06, "epoch": 0.6659663352307429, "percentage": 13.32, "elapsed_time": "0:34:25", "remaining_time": "3:44:01", "throughput": 8892.4, "total_tokens": 18366528} +{"current_steps": 27265, "total_steps": 204665, "loss": 0.1244, "lr": 1.9932880603406533e-06, "epoch": 0.6660884860625901, "percentage": 13.32, "elapsed_time": "0:34:25", "remaining_time": "3:44:00", "throughput": 8892.59, "total_tokens": 18370048} +{"current_steps": 27270, "total_steps": 204665, "loss": 0.0695, "lr": 1.9932781929343135e-06, "epoch": 0.6662106368944373, "percentage": 13.32, "elapsed_time": "0:34:26", "remaining_time": "3:44:00", "throughput": 8892.62, "total_tokens": 18373120} +{"current_steps": 27275, "total_steps": 204665, "loss": 0.182, "lr": 1.9932683183045872e-06, "epoch": 0.6663327877262845, "percentage": 13.33, "elapsed_time": "0:34:26", "remaining_time": "3:43:59", "throughput": 8892.76, "total_tokens": 18376512} +{"current_steps": 27280, "total_steps": 204665, "loss": 0.1501, "lr": 1.993258436451547e-06, "epoch": 0.6664549385581315, "percentage": 13.33, "elapsed_time": "0:34:26", "remaining_time": "3:43:59", "throughput": 8892.86, "total_tokens": 18379776} +{"current_steps": 27285, "total_steps": 204665, "loss": 0.1145, "lr": 1.993248547375264e-06, "epoch": 0.6665770893899787, "percentage": 13.33, "elapsed_time": "0:34:27", "remaining_time": "3:43:58", "throughput": 8893.01, "total_tokens": 18383168} +{"current_steps": 27290, "total_steps": 204665, "loss": 0.0978, "lr": 1.9932386510758102e-06, "epoch": 0.6666992402218259, "percentage": 13.33, "elapsed_time": "0:34:27", "remaining_time": "3:43:57", "throughput": 8893.17, "total_tokens": 18386560} +{"current_steps": 27295, "total_steps": 204665, "loss": 0.1649, "lr": 1.9932287475532582e-06, "epoch": 0.6668213910536731, "percentage": 13.34, "elapsed_time": "0:34:27", "remaining_time": "3:43:57", "throughput": 8893.4, "total_tokens": 18390208} +{"current_steps": 27300, "total_steps": 204665, "loss": 0.3528, "lr": 1.9932188368076793e-06, "epoch": 0.6669435418855202, "percentage": 13.34, "elapsed_time": "0:34:28", "remaining_time": "3:43:56", "throughput": 8893.54, "total_tokens": 18393600} +{"current_steps": 27305, "total_steps": 204665, "loss": 0.1349, "lr": 1.993208918839146e-06, "epoch": 0.6670656927173674, "percentage": 13.34, "elapsed_time": "0:34:28", "remaining_time": "3:43:56", "throughput": 8893.75, "total_tokens": 18397184} +{"current_steps": 27310, "total_steps": 204665, "loss": 0.1671, "lr": 1.99319899364773e-06, "epoch": 0.6671878435492146, "percentage": 13.34, "elapsed_time": "0:34:28", "remaining_time": "3:43:55", "throughput": 8894.33, "total_tokens": 18401728} +{"current_steps": 27315, "total_steps": 204665, "loss": 0.2605, "lr": 1.9931890612335044e-06, "epoch": 0.6673099943810618, "percentage": 13.35, "elapsed_time": "0:34:29", "remaining_time": "3:43:55", "throughput": 8894.48, "total_tokens": 18405184} +{"current_steps": 27320, "total_steps": 204665, "loss": 0.0884, "lr": 1.9931791215965405e-06, "epoch": 0.667432145212909, "percentage": 13.35, "elapsed_time": "0:34:29", "remaining_time": "3:43:54", "throughput": 8894.51, "total_tokens": 18408256} +{"current_steps": 27325, "total_steps": 204665, "loss": 0.1059, "lr": 1.993169174736911e-06, "epoch": 0.667554296044756, "percentage": 13.35, "elapsed_time": "0:34:29", "remaining_time": "3:43:54", "throughput": 8894.5, "total_tokens": 18411264} +{"current_steps": 27330, "total_steps": 204665, "loss": 0.0379, "lr": 1.993159220654688e-06, "epoch": 0.6676764468766032, "percentage": 13.35, "elapsed_time": "0:34:30", "remaining_time": "3:43:53", "throughput": 8894.72, "total_tokens": 18414848} +{"current_steps": 27335, "total_steps": 204665, "loss": 0.1058, "lr": 1.993149259349944e-06, "epoch": 0.6677985977084504, "percentage": 13.36, "elapsed_time": "0:34:30", "remaining_time": "3:43:52", "throughput": 8894.88, "total_tokens": 18418304} +{"current_steps": 27340, "total_steps": 204665, "loss": 0.061, "lr": 1.9931392908227515e-06, "epoch": 0.6679207485402976, "percentage": 13.36, "elapsed_time": "0:34:31", "remaining_time": "3:43:52", "throughput": 8895.02, "total_tokens": 18421696} +{"current_steps": 27345, "total_steps": 204665, "loss": 0.1927, "lr": 1.993129315073183e-06, "epoch": 0.6680428993721447, "percentage": 13.36, "elapsed_time": "0:34:31", "remaining_time": "3:43:51", "throughput": 8895.12, "total_tokens": 18424960} +{"current_steps": 27350, "total_steps": 204665, "loss": 0.0656, "lr": 1.993119332101311e-06, "epoch": 0.6681650502039919, "percentage": 13.36, "elapsed_time": "0:34:31", "remaining_time": "3:43:51", "throughput": 8895.13, "total_tokens": 18428032} +{"current_steps": 27355, "total_steps": 204665, "loss": 0.0642, "lr": 1.993109341907208e-06, "epoch": 0.668287201035839, "percentage": 13.37, "elapsed_time": "0:34:32", "remaining_time": "3:43:50", "throughput": 8895.34, "total_tokens": 18431616} +{"current_steps": 27360, "total_steps": 204665, "loss": 0.1862, "lr": 1.993099344490947e-06, "epoch": 0.6684093518676862, "percentage": 13.37, "elapsed_time": "0:34:32", "remaining_time": "3:43:50", "throughput": 8895.3, "total_tokens": 18434560} +{"current_steps": 27365, "total_steps": 204665, "loss": 0.1303, "lr": 1.9930893398526e-06, "epoch": 0.6685315026995334, "percentage": 13.37, "elapsed_time": "0:34:32", "remaining_time": "3:43:49", "throughput": 8895.33, "total_tokens": 18437696} +{"current_steps": 27370, "total_steps": 204665, "loss": 0.102, "lr": 1.9930793279922408e-06, "epoch": 0.6686536535313805, "percentage": 13.37, "elapsed_time": "0:34:33", "remaining_time": "3:43:48", "throughput": 8895.39, "total_tokens": 18440896} +{"current_steps": 27375, "total_steps": 204665, "loss": 0.0309, "lr": 1.993069308909941e-06, "epoch": 0.6687758043632277, "percentage": 13.38, "elapsed_time": "0:34:33", "remaining_time": "3:43:48", "throughput": 8895.41, "total_tokens": 18443968} +{"current_steps": 27380, "total_steps": 204665, "loss": 0.1691, "lr": 1.9930592826057746e-06, "epoch": 0.6688979551950749, "percentage": 13.38, "elapsed_time": "0:34:33", "remaining_time": "3:43:47", "throughput": 8895.58, "total_tokens": 18447424} +{"current_steps": 27385, "total_steps": 204665, "loss": 0.1198, "lr": 1.993049249079814e-06, "epoch": 0.6690201060269221, "percentage": 13.38, "elapsed_time": "0:34:34", "remaining_time": "3:43:47", "throughput": 8895.76, "total_tokens": 18450944} +{"current_steps": 27390, "total_steps": 204665, "loss": 0.2652, "lr": 1.9930392083321315e-06, "epoch": 0.6691422568587692, "percentage": 13.38, "elapsed_time": "0:34:34", "remaining_time": "3:43:46", "throughput": 8895.77, "total_tokens": 18453952} +{"current_steps": 27395, "total_steps": 204665, "loss": 0.0694, "lr": 1.9930291603628013e-06, "epoch": 0.6692644076906163, "percentage": 13.39, "elapsed_time": "0:34:34", "remaining_time": "3:43:45", "throughput": 8895.8, "total_tokens": 18457088} +{"current_steps": 27400, "total_steps": 204665, "loss": 0.0814, "lr": 1.993019105171896e-06, "epoch": 0.6693865585224635, "percentage": 13.39, "elapsed_time": "0:34:35", "remaining_time": "3:43:45", "throughput": 8895.73, "total_tokens": 18459904} +{"current_steps": 27405, "total_steps": 204665, "loss": 0.0772, "lr": 1.9930090427594885e-06, "epoch": 0.6695087093543107, "percentage": 13.39, "elapsed_time": "0:34:35", "remaining_time": "3:43:44", "throughput": 8895.85, "total_tokens": 18463232} +{"current_steps": 27410, "total_steps": 204665, "loss": 0.0064, "lr": 1.992998973125652e-06, "epoch": 0.6696308601861579, "percentage": 13.39, "elapsed_time": "0:34:35", "remaining_time": "3:43:44", "throughput": 8896.07, "total_tokens": 18466816} +{"current_steps": 27415, "total_steps": 204665, "loss": 0.0142, "lr": 1.9929888962704603e-06, "epoch": 0.669753011018005, "percentage": 13.4, "elapsed_time": "0:34:36", "remaining_time": "3:43:43", "throughput": 8896.17, "total_tokens": 18470080} +{"current_steps": 27420, "total_steps": 204665, "loss": 0.0758, "lr": 1.992978812193986e-06, "epoch": 0.6698751618498522, "percentage": 13.4, "elapsed_time": "0:34:36", "remaining_time": "3:43:42", "throughput": 8896.37, "total_tokens": 18473664} +{"current_steps": 27425, "total_steps": 204665, "loss": 0.0811, "lr": 1.9929687208963026e-06, "epoch": 0.6699973126816994, "percentage": 13.4, "elapsed_time": "0:34:36", "remaining_time": "3:43:42", "throughput": 8896.45, "total_tokens": 18476928} +{"current_steps": 27430, "total_steps": 204665, "loss": 0.1545, "lr": 1.992958622377484e-06, "epoch": 0.6701194635135466, "percentage": 13.4, "elapsed_time": "0:34:37", "remaining_time": "3:43:41", "throughput": 8896.59, "total_tokens": 18480320} +{"current_steps": 27435, "total_steps": 204665, "loss": 0.1974, "lr": 1.9929485166376026e-06, "epoch": 0.6702416143453936, "percentage": 13.4, "elapsed_time": "0:34:37", "remaining_time": "3:43:41", "throughput": 8896.77, "total_tokens": 18483840} +{"current_steps": 27440, "total_steps": 204665, "loss": 0.07, "lr": 1.992938403676733e-06, "epoch": 0.6703637651772408, "percentage": 13.41, "elapsed_time": "0:34:37", "remaining_time": "3:43:40", "throughput": 8896.8, "total_tokens": 18486912} +{"current_steps": 27445, "total_steps": 204665, "loss": 0.1578, "lr": 1.9929282834949483e-06, "epoch": 0.670485916009088, "percentage": 13.41, "elapsed_time": "0:34:38", "remaining_time": "3:43:39", "throughput": 8896.82, "total_tokens": 18489984} +{"current_steps": 27450, "total_steps": 204665, "loss": 0.2339, "lr": 1.9929181560923217e-06, "epoch": 0.6706080668409352, "percentage": 13.41, "elapsed_time": "0:34:38", "remaining_time": "3:43:39", "throughput": 8896.93, "total_tokens": 18493312} +{"current_steps": 27455, "total_steps": 204665, "loss": 0.0862, "lr": 1.9929080214689274e-06, "epoch": 0.6707302176727824, "percentage": 13.41, "elapsed_time": "0:34:38", "remaining_time": "3:43:38", "throughput": 8897.12, "total_tokens": 18496832} +{"current_steps": 27460, "total_steps": 204665, "loss": 0.1234, "lr": 1.992897879624839e-06, "epoch": 0.6708523685046295, "percentage": 13.42, "elapsed_time": "0:34:39", "remaining_time": "3:43:38", "throughput": 8897.24, "total_tokens": 18500160} +{"current_steps": 27465, "total_steps": 204665, "loss": 0.1291, "lr": 1.99288773056013e-06, "epoch": 0.6709745193364767, "percentage": 13.42, "elapsed_time": "0:34:39", "remaining_time": "3:43:37", "throughput": 8897.52, "total_tokens": 18503936} +{"current_steps": 27470, "total_steps": 204665, "loss": 0.1425, "lr": 1.9928775742748747e-06, "epoch": 0.6710966701683239, "percentage": 13.42, "elapsed_time": "0:34:40", "remaining_time": "3:43:37", "throughput": 8897.83, "total_tokens": 18507776} +{"current_steps": 27475, "total_steps": 204665, "loss": 0.0539, "lr": 1.9928674107691463e-06, "epoch": 0.671218821000171, "percentage": 13.42, "elapsed_time": "0:34:40", "remaining_time": "3:43:36", "throughput": 8897.95, "total_tokens": 18511104} +{"current_steps": 27480, "total_steps": 204665, "loss": 0.1931, "lr": 1.992857240043019e-06, "epoch": 0.6713409718320181, "percentage": 13.43, "elapsed_time": "0:34:40", "remaining_time": "3:43:36", "throughput": 8898.01, "total_tokens": 18514304} +{"current_steps": 27485, "total_steps": 204665, "loss": 0.0949, "lr": 1.992847062096567e-06, "epoch": 0.6714631226638653, "percentage": 13.43, "elapsed_time": "0:34:41", "remaining_time": "3:43:35", "throughput": 8898.15, "total_tokens": 18517696} +{"current_steps": 27490, "total_steps": 204665, "loss": 0.1095, "lr": 1.9928368769298636e-06, "epoch": 0.6715852734957125, "percentage": 13.43, "elapsed_time": "0:34:41", "remaining_time": "3:43:34", "throughput": 8898.21, "total_tokens": 18520896} +{"current_steps": 27495, "total_steps": 204665, "loss": 0.0394, "lr": 1.992826684542984e-06, "epoch": 0.6717074243275597, "percentage": 13.43, "elapsed_time": "0:34:41", "remaining_time": "3:43:34", "throughput": 8898.36, "total_tokens": 18524288} +{"current_steps": 27500, "total_steps": 204665, "loss": 0.2707, "lr": 1.9928164849360018e-06, "epoch": 0.6718295751594069, "percentage": 13.44, "elapsed_time": "0:34:42", "remaining_time": "3:43:33", "throughput": 8898.5, "total_tokens": 18527680} +{"current_steps": 27505, "total_steps": 204665, "loss": 0.0718, "lr": 1.9928062781089906e-06, "epoch": 0.671951725991254, "percentage": 13.44, "elapsed_time": "0:34:42", "remaining_time": "3:43:33", "throughput": 8898.73, "total_tokens": 18531328} +{"current_steps": 27510, "total_steps": 204665, "loss": 0.2083, "lr": 1.9927960640620256e-06, "epoch": 0.6720738768231012, "percentage": 13.44, "elapsed_time": "0:34:42", "remaining_time": "3:43:32", "throughput": 8898.99, "total_tokens": 18535040} +{"current_steps": 27515, "total_steps": 204665, "loss": 0.0871, "lr": 1.9927858427951804e-06, "epoch": 0.6721960276549483, "percentage": 13.44, "elapsed_time": "0:34:43", "remaining_time": "3:43:32", "throughput": 8899.08, "total_tokens": 18538304} +{"current_steps": 27520, "total_steps": 204665, "loss": 0.099, "lr": 1.9927756143085293e-06, "epoch": 0.6723181784867955, "percentage": 13.45, "elapsed_time": "0:34:43", "remaining_time": "3:43:31", "throughput": 8899.18, "total_tokens": 18541568} +{"current_steps": 27525, "total_steps": 204665, "loss": 0.0368, "lr": 1.9927653786021466e-06, "epoch": 0.6724403293186426, "percentage": 13.45, "elapsed_time": "0:34:43", "remaining_time": "3:43:30", "throughput": 8899.55, "total_tokens": 18545536} +{"current_steps": 27530, "total_steps": 204665, "loss": 0.1327, "lr": 1.992755135676108e-06, "epoch": 0.6725624801504898, "percentage": 13.45, "elapsed_time": "0:34:44", "remaining_time": "3:43:30", "throughput": 8899.67, "total_tokens": 18548864} +{"current_steps": 27535, "total_steps": 204665, "loss": 0.0681, "lr": 1.9927448855304862e-06, "epoch": 0.672684630982337, "percentage": 13.45, "elapsed_time": "0:34:44", "remaining_time": "3:43:29", "throughput": 8899.78, "total_tokens": 18552192} +{"current_steps": 27540, "total_steps": 204665, "loss": 0.06, "lr": 1.992734628165357e-06, "epoch": 0.6728067818141842, "percentage": 13.46, "elapsed_time": "0:34:44", "remaining_time": "3:43:29", "throughput": 8899.8, "total_tokens": 18555264} +{"current_steps": 27545, "total_steps": 204665, "loss": 0.1047, "lr": 1.9927243635807948e-06, "epoch": 0.6729289326460314, "percentage": 13.46, "elapsed_time": "0:34:45", "remaining_time": "3:43:28", "throughput": 8900.06, "total_tokens": 18558976} +{"current_steps": 27550, "total_steps": 204665, "loss": 0.0696, "lr": 1.9927140917768736e-06, "epoch": 0.6730510834778785, "percentage": 13.46, "elapsed_time": "0:34:45", "remaining_time": "3:43:28", "throughput": 8900.18, "total_tokens": 18562304} +{"current_steps": 27555, "total_steps": 204665, "loss": 0.0841, "lr": 1.9927038127536686e-06, "epoch": 0.6731732343097256, "percentage": 13.46, "elapsed_time": "0:34:45", "remaining_time": "3:43:27", "throughput": 8900.38, "total_tokens": 18565888} +{"current_steps": 27560, "total_steps": 204665, "loss": 0.0578, "lr": 1.9926935265112543e-06, "epoch": 0.6732953851415728, "percentage": 13.47, "elapsed_time": "0:34:46", "remaining_time": "3:43:27", "throughput": 8900.62, "total_tokens": 18569536} +{"current_steps": 27565, "total_steps": 204665, "loss": 0.0541, "lr": 1.992683233049706e-06, "epoch": 0.67341753597342, "percentage": 13.47, "elapsed_time": "0:34:46", "remaining_time": "3:43:26", "throughput": 8900.64, "total_tokens": 18572608} +{"current_steps": 27570, "total_steps": 204665, "loss": 0.2431, "lr": 1.992672932369098e-06, "epoch": 0.6735396868052671, "percentage": 13.47, "elapsed_time": "0:34:47", "remaining_time": "3:43:25", "throughput": 8900.96, "total_tokens": 18576448} +{"current_steps": 27575, "total_steps": 204665, "loss": 0.1644, "lr": 1.9926626244695056e-06, "epoch": 0.6736618376371143, "percentage": 13.47, "elapsed_time": "0:34:47", "remaining_time": "3:43:25", "throughput": 8901.08, "total_tokens": 18579776} +{"current_steps": 27580, "total_steps": 204665, "loss": 0.1107, "lr": 1.9926523093510034e-06, "epoch": 0.6737839884689615, "percentage": 13.48, "elapsed_time": "0:34:47", "remaining_time": "3:43:24", "throughput": 8901.07, "total_tokens": 18582784} +{"current_steps": 27585, "total_steps": 204665, "loss": 0.1118, "lr": 1.992641987013667e-06, "epoch": 0.6739061393008087, "percentage": 13.48, "elapsed_time": "0:34:48", "remaining_time": "3:43:24", "throughput": 8901.04, "total_tokens": 18585728} +{"current_steps": 27590, "total_steps": 204665, "loss": 0.064, "lr": 1.9926316574575707e-06, "epoch": 0.6740282901326557, "percentage": 13.48, "elapsed_time": "0:34:48", "remaining_time": "3:43:23", "throughput": 8901.07, "total_tokens": 18588800} +{"current_steps": 27595, "total_steps": 204665, "loss": 0.2027, "lr": 1.99262132068279e-06, "epoch": 0.6741504409645029, "percentage": 13.48, "elapsed_time": "0:34:48", "remaining_time": "3:43:22", "throughput": 8901.13, "total_tokens": 18592000} +{"current_steps": 27600, "total_steps": 204665, "loss": 0.1205, "lr": 1.9926109766894003e-06, "epoch": 0.6742725917963501, "percentage": 13.49, "elapsed_time": "0:34:49", "remaining_time": "3:43:22", "throughput": 8901.34, "total_tokens": 18595584} +{"current_steps": 27605, "total_steps": 204665, "loss": 0.1196, "lr": 1.992600625477476e-06, "epoch": 0.6743947426281973, "percentage": 13.49, "elapsed_time": "0:34:49", "remaining_time": "3:43:21", "throughput": 8901.61, "total_tokens": 18599296} +{"current_steps": 27610, "total_steps": 204665, "loss": 0.1573, "lr": 1.992590267047094e-06, "epoch": 0.6745168934600445, "percentage": 13.49, "elapsed_time": "0:34:49", "remaining_time": "3:43:21", "throughput": 8901.61, "total_tokens": 18602304} +{"current_steps": 27615, "total_steps": 204665, "loss": 0.0645, "lr": 1.992579901398328e-06, "epoch": 0.6746390442918916, "percentage": 13.49, "elapsed_time": "0:34:50", "remaining_time": "3:43:20", "throughput": 8901.73, "total_tokens": 18605632} +{"current_steps": 27620, "total_steps": 204665, "loss": 0.1136, "lr": 1.992569528531254e-06, "epoch": 0.6747611951237388, "percentage": 13.5, "elapsed_time": "0:34:50", "remaining_time": "3:43:19", "throughput": 8901.92, "total_tokens": 18609152} +{"current_steps": 27625, "total_steps": 204665, "loss": 0.1165, "lr": 1.9925591484459474e-06, "epoch": 0.674883345955586, "percentage": 13.5, "elapsed_time": "0:34:50", "remaining_time": "3:43:19", "throughput": 8902.21, "total_tokens": 18612928} +{"current_steps": 27630, "total_steps": 204665, "loss": 0.1876, "lr": 1.992548761142484e-06, "epoch": 0.6750054967874332, "percentage": 13.5, "elapsed_time": "0:34:51", "remaining_time": "3:43:18", "throughput": 8902.53, "total_tokens": 18616768} +{"current_steps": 27635, "total_steps": 204665, "loss": 0.0641, "lr": 1.9925383666209387e-06, "epoch": 0.6751276476192802, "percentage": 13.5, "elapsed_time": "0:34:51", "remaining_time": "3:43:18", "throughput": 8902.55, "total_tokens": 18619840} +{"current_steps": 27640, "total_steps": 204665, "loss": 0.0321, "lr": 1.9925279648813875e-06, "epoch": 0.6752497984511274, "percentage": 13.5, "elapsed_time": "0:34:51", "remaining_time": "3:43:17", "throughput": 8902.63, "total_tokens": 18623104} +{"current_steps": 27645, "total_steps": 204665, "loss": 0.1091, "lr": 1.992517555923906e-06, "epoch": 0.6753719492829746, "percentage": 13.51, "elapsed_time": "0:34:52", "remaining_time": "3:43:17", "throughput": 8902.76, "total_tokens": 18626496} +{"current_steps": 27650, "total_steps": 204665, "loss": 0.075, "lr": 1.99250713974857e-06, "epoch": 0.6754941001148218, "percentage": 13.51, "elapsed_time": "0:34:52", "remaining_time": "3:43:16", "throughput": 8902.8, "total_tokens": 18629632} +{"current_steps": 27655, "total_steps": 204665, "loss": 0.0667, "lr": 1.992496716355455e-06, "epoch": 0.675616250946669, "percentage": 13.51, "elapsed_time": "0:34:52", "remaining_time": "3:43:15", "throughput": 8902.9, "total_tokens": 18632896} +{"current_steps": 27660, "total_steps": 204665, "loss": 0.1238, "lr": 1.9924862857446374e-06, "epoch": 0.6757384017785161, "percentage": 13.51, "elapsed_time": "0:34:53", "remaining_time": "3:43:15", "throughput": 8903.09, "total_tokens": 18636416} +{"current_steps": 27665, "total_steps": 204665, "loss": 0.1344, "lr": 1.9924758479161916e-06, "epoch": 0.6758605526103633, "percentage": 13.52, "elapsed_time": "0:34:53", "remaining_time": "3:43:14", "throughput": 8903.03, "total_tokens": 18639296} +{"current_steps": 27670, "total_steps": 204665, "loss": 0.1151, "lr": 1.992465402870195e-06, "epoch": 0.6759827034422105, "percentage": 13.52, "elapsed_time": "0:34:53", "remaining_time": "3:43:14", "throughput": 8903.32, "total_tokens": 18643072} +{"current_steps": 27675, "total_steps": 204665, "loss": 0.0603, "lr": 1.9924549506067236e-06, "epoch": 0.6761048542740576, "percentage": 13.52, "elapsed_time": "0:34:54", "remaining_time": "3:43:13", "throughput": 8903.4, "total_tokens": 18646336} +{"current_steps": 27680, "total_steps": 204665, "loss": 0.0828, "lr": 1.992444491125852e-06, "epoch": 0.6762270051059047, "percentage": 13.52, "elapsed_time": "0:34:54", "remaining_time": "3:43:13", "throughput": 8903.69, "total_tokens": 18650112} +{"current_steps": 27685, "total_steps": 204665, "loss": 0.1562, "lr": 1.9924340244276576e-06, "epoch": 0.6763491559377519, "percentage": 13.53, "elapsed_time": "0:34:54", "remaining_time": "3:43:12", "throughput": 8903.88, "total_tokens": 18653632} +{"current_steps": 27690, "total_steps": 204665, "loss": 0.1278, "lr": 1.992423550512216e-06, "epoch": 0.6764713067695991, "percentage": 13.53, "elapsed_time": "0:34:55", "remaining_time": "3:43:11", "throughput": 8903.91, "total_tokens": 18656704} +{"current_steps": 27695, "total_steps": 204665, "loss": 0.1062, "lr": 1.9924130693796034e-06, "epoch": 0.6765934576014463, "percentage": 13.53, "elapsed_time": "0:34:55", "remaining_time": "3:43:11", "throughput": 8904.09, "total_tokens": 18660224} +{"current_steps": 27700, "total_steps": 204665, "loss": 0.0639, "lr": 1.9924025810298957e-06, "epoch": 0.6767156084332935, "percentage": 13.53, "elapsed_time": "0:34:56", "remaining_time": "3:43:10", "throughput": 8904.24, "total_tokens": 18663680} +{"current_steps": 27705, "total_steps": 204665, "loss": 0.1237, "lr": 1.99239208546317e-06, "epoch": 0.6768377592651406, "percentage": 13.54, "elapsed_time": "0:34:56", "remaining_time": "3:43:10", "throughput": 8904.31, "total_tokens": 18666880} +{"current_steps": 27710, "total_steps": 204665, "loss": 0.0874, "lr": 1.9923815826795018e-06, "epoch": 0.6769599100969877, "percentage": 13.54, "elapsed_time": "0:34:56", "remaining_time": "3:43:09", "throughput": 8904.52, "total_tokens": 18670464} +{"current_steps": 27715, "total_steps": 204665, "loss": 0.0819, "lr": 1.992371072678968e-06, "epoch": 0.6770820609288349, "percentage": 13.54, "elapsed_time": "0:34:57", "remaining_time": "3:43:09", "throughput": 8904.68, "total_tokens": 18673920} +{"current_steps": 27720, "total_steps": 204665, "loss": 0.1129, "lr": 1.9923605554616447e-06, "epoch": 0.6772042117606821, "percentage": 13.54, "elapsed_time": "0:34:57", "remaining_time": "3:43:08", "throughput": 8904.93, "total_tokens": 18677632} +{"current_steps": 27725, "total_steps": 204665, "loss": 0.0451, "lr": 1.9923500310276085e-06, "epoch": 0.6773263625925292, "percentage": 13.55, "elapsed_time": "0:34:57", "remaining_time": "3:43:08", "throughput": 8905.1, "total_tokens": 18681088} +{"current_steps": 27730, "total_steps": 204665, "loss": 0.0925, "lr": 1.9923394993769362e-06, "epoch": 0.6774485134243764, "percentage": 13.55, "elapsed_time": "0:34:58", "remaining_time": "3:43:07", "throughput": 8905.12, "total_tokens": 18684160} +{"current_steps": 27735, "total_steps": 204665, "loss": 0.143, "lr": 1.992328960509704e-06, "epoch": 0.6775706642562236, "percentage": 13.55, "elapsed_time": "0:34:58", "remaining_time": "3:43:06", "throughput": 8905.17, "total_tokens": 18687296} +{"current_steps": 27740, "total_steps": 204665, "loss": 0.3171, "lr": 1.9923184144259886e-06, "epoch": 0.6776928150880708, "percentage": 13.55, "elapsed_time": "0:34:58", "remaining_time": "3:43:06", "throughput": 8905.24, "total_tokens": 18690496} +{"current_steps": 27745, "total_steps": 204665, "loss": 0.1289, "lr": 1.992307861125867e-06, "epoch": 0.677814965919918, "percentage": 13.56, "elapsed_time": "0:34:59", "remaining_time": "3:43:05", "throughput": 8905.34, "total_tokens": 18693824} +{"current_steps": 27750, "total_steps": 204665, "loss": 0.1006, "lr": 1.9922973006094156e-06, "epoch": 0.677937116751765, "percentage": 13.56, "elapsed_time": "0:34:59", "remaining_time": "3:43:05", "throughput": 8905.42, "total_tokens": 18697024} +{"current_steps": 27755, "total_steps": 204665, "loss": 0.1345, "lr": 1.9922867328767114e-06, "epoch": 0.6780592675836122, "percentage": 13.56, "elapsed_time": "0:34:59", "remaining_time": "3:43:04", "throughput": 8905.39, "total_tokens": 18699968} +{"current_steps": 27760, "total_steps": 204665, "loss": 0.0691, "lr": 1.992276157927831e-06, "epoch": 0.6781814184154594, "percentage": 13.56, "elapsed_time": "0:35:00", "remaining_time": "3:43:03", "throughput": 8905.5, "total_tokens": 18703296} +{"current_steps": 27765, "total_steps": 204665, "loss": 0.067, "lr": 1.9922655757628516e-06, "epoch": 0.6783035692473066, "percentage": 13.57, "elapsed_time": "0:35:00", "remaining_time": "3:43:03", "throughput": 8905.56, "total_tokens": 18706496} +{"current_steps": 27770, "total_steps": 204665, "loss": 0.0486, "lr": 1.9922549863818504e-06, "epoch": 0.6784257200791537, "percentage": 13.57, "elapsed_time": "0:35:00", "remaining_time": "3:43:02", "throughput": 8905.62, "total_tokens": 18709696} +{"current_steps": 27775, "total_steps": 204665, "loss": 0.0436, "lr": 1.9922443897849037e-06, "epoch": 0.6785478709110009, "percentage": 13.57, "elapsed_time": "0:35:01", "remaining_time": "3:43:02", "throughput": 8905.61, "total_tokens": 18712704} +{"current_steps": 27780, "total_steps": 204665, "loss": 0.0707, "lr": 1.9922337859720887e-06, "epoch": 0.6786700217428481, "percentage": 13.57, "elapsed_time": "0:35:01", "remaining_time": "3:43:01", "throughput": 8905.57, "total_tokens": 18715648} +{"current_steps": 27785, "total_steps": 204665, "loss": 0.1024, "lr": 1.992223174943483e-06, "epoch": 0.6787921725746953, "percentage": 13.58, "elapsed_time": "0:35:01", "remaining_time": "3:43:00", "throughput": 8905.72, "total_tokens": 18719040} +{"current_steps": 27790, "total_steps": 204665, "loss": 0.0903, "lr": 1.992212556699164e-06, "epoch": 0.6789143234065425, "percentage": 13.58, "elapsed_time": "0:35:02", "remaining_time": "3:43:00", "throughput": 8905.76, "total_tokens": 18722176} +{"current_steps": 27795, "total_steps": 204665, "loss": 0.0914, "lr": 1.9922019312392077e-06, "epoch": 0.6790364742383895, "percentage": 13.58, "elapsed_time": "0:35:02", "remaining_time": "3:42:59", "throughput": 8906.04, "total_tokens": 18725952} +{"current_steps": 27800, "total_steps": 204665, "loss": 0.0705, "lr": 1.992191298563692e-06, "epoch": 0.6791586250702367, "percentage": 13.58, "elapsed_time": "0:35:02", "remaining_time": "3:42:59", "throughput": 8906.12, "total_tokens": 18729152} +{"current_steps": 27805, "total_steps": 204665, "loss": 0.0881, "lr": 1.9921806586726946e-06, "epoch": 0.6792807759020839, "percentage": 13.59, "elapsed_time": "0:35:03", "remaining_time": "3:42:58", "throughput": 8906.24, "total_tokens": 18732480} +{"current_steps": 27810, "total_steps": 204665, "loss": 0.1584, "lr": 1.9921700115662927e-06, "epoch": 0.6794029267339311, "percentage": 13.59, "elapsed_time": "0:35:03", "remaining_time": "3:42:57", "throughput": 8906.23, "total_tokens": 18735488} +{"current_steps": 27815, "total_steps": 204665, "loss": 0.1024, "lr": 1.992159357244564e-06, "epoch": 0.6795250775657782, "percentage": 13.59, "elapsed_time": "0:35:03", "remaining_time": "3:42:57", "throughput": 8906.29, "total_tokens": 18738688} +{"current_steps": 27820, "total_steps": 204665, "loss": 0.1267, "lr": 1.9921486957075847e-06, "epoch": 0.6796472283976254, "percentage": 13.59, "elapsed_time": "0:35:04", "remaining_time": "3:42:56", "throughput": 8906.52, "total_tokens": 18742336} +{"current_steps": 27825, "total_steps": 204665, "loss": 0.1554, "lr": 1.9921380269554337e-06, "epoch": 0.6797693792294726, "percentage": 13.6, "elapsed_time": "0:35:04", "remaining_time": "3:42:56", "throughput": 8906.7, "total_tokens": 18745856} +{"current_steps": 27830, "total_steps": 204665, "loss": 0.0212, "lr": 1.992127350988188e-06, "epoch": 0.6798915300613197, "percentage": 13.6, "elapsed_time": "0:35:05", "remaining_time": "3:42:55", "throughput": 8906.72, "total_tokens": 18748928} +{"current_steps": 27835, "total_steps": 204665, "loss": 0.1102, "lr": 1.9921166678059255e-06, "epoch": 0.6800136808931668, "percentage": 13.6, "elapsed_time": "0:35:05", "remaining_time": "3:42:55", "throughput": 8906.99, "total_tokens": 18752704} +{"current_steps": 27840, "total_steps": 204665, "loss": 0.0976, "lr": 1.9921059774087234e-06, "epoch": 0.680135831725014, "percentage": 13.6, "elapsed_time": "0:35:05", "remaining_time": "3:42:54", "throughput": 8907.21, "total_tokens": 18756288} +{"current_steps": 27845, "total_steps": 204665, "loss": 0.1522, "lr": 1.9920952797966598e-06, "epoch": 0.6802579825568612, "percentage": 13.61, "elapsed_time": "0:35:06", "remaining_time": "3:42:53", "throughput": 8907.22, "total_tokens": 18759360} +{"current_steps": 27850, "total_steps": 204665, "loss": 0.1346, "lr": 1.992084574969813e-06, "epoch": 0.6803801333887084, "percentage": 13.61, "elapsed_time": "0:35:06", "remaining_time": "3:42:53", "throughput": 8907.19, "total_tokens": 18762304} +{"current_steps": 27855, "total_steps": 204665, "loss": 0.0575, "lr": 1.99207386292826e-06, "epoch": 0.6805022842205556, "percentage": 13.61, "elapsed_time": "0:35:06", "remaining_time": "3:42:52", "throughput": 8907.42, "total_tokens": 18765952} +{"current_steps": 27860, "total_steps": 204665, "loss": 0.0994, "lr": 1.992063143672079e-06, "epoch": 0.6806244350524027, "percentage": 13.61, "elapsed_time": "0:35:07", "remaining_time": "3:42:52", "throughput": 8907.55, "total_tokens": 18769344} +{"current_steps": 27865, "total_steps": 204665, "loss": 0.0941, "lr": 1.9920524172013482e-06, "epoch": 0.6807465858842499, "percentage": 13.61, "elapsed_time": "0:35:07", "remaining_time": "3:42:51", "throughput": 8907.59, "total_tokens": 18772480} +{"current_steps": 27870, "total_steps": 204665, "loss": 0.0403, "lr": 1.9920416835161453e-06, "epoch": 0.680868736716097, "percentage": 13.62, "elapsed_time": "0:35:07", "remaining_time": "3:42:51", "throughput": 8907.72, "total_tokens": 18775872} +{"current_steps": 27875, "total_steps": 204665, "loss": 0.2086, "lr": 1.9920309426165485e-06, "epoch": 0.6809908875479442, "percentage": 13.62, "elapsed_time": "0:35:08", "remaining_time": "3:42:50", "throughput": 8907.99, "total_tokens": 18779584} +{"current_steps": 27880, "total_steps": 204665, "loss": 0.1813, "lr": 1.992020194502635e-06, "epoch": 0.6811130383797913, "percentage": 13.62, "elapsed_time": "0:35:08", "remaining_time": "3:42:50", "throughput": 8908.19, "total_tokens": 18783168} +{"current_steps": 27885, "total_steps": 204665, "loss": 0.0425, "lr": 1.992009439174485e-06, "epoch": 0.6812351892116385, "percentage": 13.62, "elapsed_time": "0:35:08", "remaining_time": "3:42:49", "throughput": 8908.52, "total_tokens": 18787072} +{"current_steps": 27890, "total_steps": 204665, "loss": 0.0671, "lr": 1.9919986766321754e-06, "epoch": 0.6813573400434857, "percentage": 13.63, "elapsed_time": "0:35:09", "remaining_time": "3:42:48", "throughput": 8908.59, "total_tokens": 18790272} +{"current_steps": 27895, "total_steps": 204665, "loss": 0.0871, "lr": 1.991987906875784e-06, "epoch": 0.6814794908753329, "percentage": 13.63, "elapsed_time": "0:35:09", "remaining_time": "3:42:48", "throughput": 8908.66, "total_tokens": 18793472} +{"current_steps": 27900, "total_steps": 204665, "loss": 0.2074, "lr": 1.9919771299053902e-06, "epoch": 0.6816016417071801, "percentage": 13.63, "elapsed_time": "0:35:09", "remaining_time": "3:42:47", "throughput": 8908.86, "total_tokens": 18797056} +{"current_steps": 27905, "total_steps": 204665, "loss": 0.1312, "lr": 1.991966345721072e-06, "epoch": 0.6817237925390272, "percentage": 13.63, "elapsed_time": "0:35:10", "remaining_time": "3:42:47", "throughput": 8909.23, "total_tokens": 18801088} +{"current_steps": 27910, "total_steps": 204665, "loss": 0.0392, "lr": 1.9919555543229072e-06, "epoch": 0.6818459433708743, "percentage": 13.64, "elapsed_time": "0:35:10", "remaining_time": "3:42:46", "throughput": 8909.27, "total_tokens": 18804224} +{"current_steps": 27915, "total_steps": 204665, "loss": 0.1424, "lr": 1.991944755710975e-06, "epoch": 0.6819680942027215, "percentage": 13.64, "elapsed_time": "0:35:10", "remaining_time": "3:42:46", "throughput": 8909.3, "total_tokens": 18807360} +{"current_steps": 27920, "total_steps": 204665, "loss": 0.169, "lr": 1.9919339498853537e-06, "epoch": 0.6820902450345687, "percentage": 13.64, "elapsed_time": "0:35:11", "remaining_time": "3:42:45", "throughput": 8909.27, "total_tokens": 18810304} +{"current_steps": 27925, "total_steps": 204665, "loss": 0.0517, "lr": 1.9919231368461224e-06, "epoch": 0.6822123958664158, "percentage": 13.64, "elapsed_time": "0:35:11", "remaining_time": "3:42:44", "throughput": 8909.35, "total_tokens": 18813568} +{"current_steps": 27930, "total_steps": 204665, "loss": 0.1451, "lr": 1.9919123165933586e-06, "epoch": 0.682334546698263, "percentage": 13.65, "elapsed_time": "0:35:12", "remaining_time": "3:42:44", "throughput": 8909.36, "total_tokens": 18816640} +{"current_steps": 27935, "total_steps": 204665, "loss": 0.0729, "lr": 1.9919014891271423e-06, "epoch": 0.6824566975301102, "percentage": 13.65, "elapsed_time": "0:35:12", "remaining_time": "3:42:43", "throughput": 8909.49, "total_tokens": 18820032} +{"current_steps": 27940, "total_steps": 204665, "loss": 0.082, "lr": 1.9918906544475507e-06, "epoch": 0.6825788483619574, "percentage": 13.65, "elapsed_time": "0:35:12", "remaining_time": "3:42:43", "throughput": 8909.48, "total_tokens": 18823040} +{"current_steps": 27945, "total_steps": 204665, "loss": 0.1848, "lr": 1.9918798125546643e-06, "epoch": 0.6827009991938046, "percentage": 13.65, "elapsed_time": "0:35:13", "remaining_time": "3:42:42", "throughput": 8909.49, "total_tokens": 18826048} +{"current_steps": 27950, "total_steps": 204665, "loss": 0.1456, "lr": 1.991868963448561e-06, "epoch": 0.6828231500256516, "percentage": 13.66, "elapsed_time": "0:35:13", "remaining_time": "3:42:41", "throughput": 8909.65, "total_tokens": 18829504} +{"current_steps": 27955, "total_steps": 204665, "loss": 0.0392, "lr": 1.9918581071293196e-06, "epoch": 0.6829453008574988, "percentage": 13.66, "elapsed_time": "0:35:13", "remaining_time": "3:42:41", "throughput": 8909.74, "total_tokens": 18832768} +{"current_steps": 27960, "total_steps": 204665, "loss": 0.0952, "lr": 1.9918472435970194e-06, "epoch": 0.683067451689346, "percentage": 13.66, "elapsed_time": "0:35:14", "remaining_time": "3:42:40", "throughput": 8909.94, "total_tokens": 18836352} +{"current_steps": 27965, "total_steps": 204665, "loss": 0.0636, "lr": 1.991836372851739e-06, "epoch": 0.6831896025211932, "percentage": 13.66, "elapsed_time": "0:35:14", "remaining_time": "3:42:40", "throughput": 8909.98, "total_tokens": 18839488} +{"current_steps": 27970, "total_steps": 204665, "loss": 0.1515, "lr": 1.9918254948935576e-06, "epoch": 0.6833117533530403, "percentage": 13.67, "elapsed_time": "0:35:14", "remaining_time": "3:42:39", "throughput": 8909.95, "total_tokens": 18842432} +{"current_steps": 27975, "total_steps": 204665, "loss": 0.1336, "lr": 1.991814609722555e-06, "epoch": 0.6834339041848875, "percentage": 13.67, "elapsed_time": "0:35:15", "remaining_time": "3:42:39", "throughput": 8910.06, "total_tokens": 18845760} +{"current_steps": 27980, "total_steps": 204665, "loss": 0.0967, "lr": 1.9918037173388098e-06, "epoch": 0.6835560550167347, "percentage": 13.67, "elapsed_time": "0:35:15", "remaining_time": "3:42:38", "throughput": 8910.2, "total_tokens": 18849152} +{"current_steps": 27985, "total_steps": 204665, "loss": 0.0754, "lr": 1.9917928177424005e-06, "epoch": 0.6836782058485819, "percentage": 13.67, "elapsed_time": "0:35:15", "remaining_time": "3:42:37", "throughput": 8910.57, "total_tokens": 18853184} +{"current_steps": 27990, "total_steps": 204665, "loss": 0.057, "lr": 1.9917819109334074e-06, "epoch": 0.683800356680429, "percentage": 13.68, "elapsed_time": "0:35:16", "remaining_time": "3:42:37", "throughput": 8910.66, "total_tokens": 18856448} +{"current_steps": 27995, "total_steps": 204665, "loss": 0.0696, "lr": 1.9917709969119097e-06, "epoch": 0.6839225075122761, "percentage": 13.68, "elapsed_time": "0:35:16", "remaining_time": "3:42:36", "throughput": 8910.62, "total_tokens": 18859392} +{"current_steps": 28000, "total_steps": 204665, "loss": 0.1231, "lr": 1.9917600756779866e-06, "epoch": 0.6840446583441233, "percentage": 13.68, "elapsed_time": "0:35:16", "remaining_time": "3:42:36", "throughput": 8910.79, "total_tokens": 18862848} +{"current_steps": 28005, "total_steps": 204665, "loss": 0.1157, "lr": 1.9917491472317173e-06, "epoch": 0.6841668091759705, "percentage": 13.68, "elapsed_time": "0:35:17", "remaining_time": "3:42:35", "throughput": 8910.85, "total_tokens": 18866048} +{"current_steps": 28010, "total_steps": 204665, "loss": 0.0812, "lr": 1.9917382115731814e-06, "epoch": 0.6842889600078177, "percentage": 13.69, "elapsed_time": "0:35:17", "remaining_time": "3:42:35", "throughput": 8911.09, "total_tokens": 18869696} +{"current_steps": 28015, "total_steps": 204665, "loss": 0.171, "lr": 1.9917272687024586e-06, "epoch": 0.6844111108396648, "percentage": 13.69, "elapsed_time": "0:35:17", "remaining_time": "3:42:34", "throughput": 8911.17, "total_tokens": 18872960} +{"current_steps": 28020, "total_steps": 204665, "loss": 0.1007, "lr": 1.9917163186196284e-06, "epoch": 0.684533261671512, "percentage": 13.69, "elapsed_time": "0:35:18", "remaining_time": "3:42:33", "throughput": 8911.35, "total_tokens": 18876480} +{"current_steps": 28025, "total_steps": 204665, "loss": 0.1806, "lr": 1.99170536132477e-06, "epoch": 0.6846554125033592, "percentage": 13.69, "elapsed_time": "0:35:18", "remaining_time": "3:42:33", "throughput": 8911.53, "total_tokens": 18880000} +{"current_steps": 28030, "total_steps": 204665, "loss": 0.1536, "lr": 1.991694396817964e-06, "epoch": 0.6847775633352063, "percentage": 13.7, "elapsed_time": "0:35:18", "remaining_time": "3:42:32", "throughput": 8911.64, "total_tokens": 18883328} +{"current_steps": 28035, "total_steps": 204665, "loss": 0.0765, "lr": 1.991683425099289e-06, "epoch": 0.6848997141670535, "percentage": 13.7, "elapsed_time": "0:35:19", "remaining_time": "3:42:32", "throughput": 8911.68, "total_tokens": 18886464} +{"current_steps": 28040, "total_steps": 204665, "loss": 0.2011, "lr": 1.991672446168826e-06, "epoch": 0.6850218649989006, "percentage": 13.7, "elapsed_time": "0:35:19", "remaining_time": "3:42:31", "throughput": 8911.65, "total_tokens": 18889408} +{"current_steps": 28045, "total_steps": 204665, "loss": 0.2166, "lr": 1.9916614600266543e-06, "epoch": 0.6851440158307478, "percentage": 13.7, "elapsed_time": "0:35:19", "remaining_time": "3:42:31", "throughput": 8911.64, "total_tokens": 18892416} +{"current_steps": 28050, "total_steps": 204665, "loss": 0.1019, "lr": 1.991650466672853e-06, "epoch": 0.685266166662595, "percentage": 13.71, "elapsed_time": "0:35:20", "remaining_time": "3:42:30", "throughput": 8911.72, "total_tokens": 18895680} +{"current_steps": 28055, "total_steps": 204665, "loss": 0.234, "lr": 1.9916394661075037e-06, "epoch": 0.6853883174944422, "percentage": 13.71, "elapsed_time": "0:35:20", "remaining_time": "3:42:29", "throughput": 8911.84, "total_tokens": 18899072} +{"current_steps": 28060, "total_steps": 204665, "loss": 0.0497, "lr": 1.991628458330685e-06, "epoch": 0.6855104683262893, "percentage": 13.71, "elapsed_time": "0:35:21", "remaining_time": "3:42:29", "throughput": 8912.09, "total_tokens": 18902784} +{"current_steps": 28065, "total_steps": 204665, "loss": 0.0878, "lr": 1.9916174433424774e-06, "epoch": 0.6856326191581364, "percentage": 13.71, "elapsed_time": "0:35:21", "remaining_time": "3:42:28", "throughput": 8912.15, "total_tokens": 18905984} +{"current_steps": 28070, "total_steps": 204665, "loss": 0.0997, "lr": 1.991606421142961e-06, "epoch": 0.6857547699899836, "percentage": 13.72, "elapsed_time": "0:35:21", "remaining_time": "3:42:28", "throughput": 8912.15, "total_tokens": 18908992} +{"current_steps": 28075, "total_steps": 204665, "loss": 0.0843, "lr": 1.991595391732216e-06, "epoch": 0.6858769208218308, "percentage": 13.72, "elapsed_time": "0:35:22", "remaining_time": "3:42:27", "throughput": 8912.23, "total_tokens": 18912256} +{"current_steps": 28080, "total_steps": 204665, "loss": 0.0455, "lr": 1.991584355110323e-06, "epoch": 0.685999071653678, "percentage": 13.72, "elapsed_time": "0:35:22", "remaining_time": "3:42:27", "throughput": 8912.35, "total_tokens": 18915584} +{"current_steps": 28085, "total_steps": 204665, "loss": 0.1036, "lr": 1.9915733112773613e-06, "epoch": 0.6861212224855251, "percentage": 13.72, "elapsed_time": "0:35:22", "remaining_time": "3:42:26", "throughput": 8912.42, "total_tokens": 18918784} +{"current_steps": 28090, "total_steps": 204665, "loss": 0.0086, "lr": 1.9915622602334122e-06, "epoch": 0.6862433733173723, "percentage": 13.72, "elapsed_time": "0:35:23", "remaining_time": "3:42:25", "throughput": 8912.48, "total_tokens": 18921984} +{"current_steps": 28095, "total_steps": 204665, "loss": 0.2377, "lr": 1.9915512019785556e-06, "epoch": 0.6863655241492195, "percentage": 13.73, "elapsed_time": "0:35:23", "remaining_time": "3:42:25", "throughput": 8912.59, "total_tokens": 18925312} +{"current_steps": 28100, "total_steps": 204665, "loss": 0.1546, "lr": 1.9915401365128715e-06, "epoch": 0.6864876749810667, "percentage": 13.73, "elapsed_time": "0:35:23", "remaining_time": "3:42:24", "throughput": 8912.67, "total_tokens": 18928576} +{"current_steps": 28105, "total_steps": 204665, "loss": 0.1028, "lr": 1.991529063836441e-06, "epoch": 0.6866098258129137, "percentage": 13.73, "elapsed_time": "0:35:24", "remaining_time": "3:42:24", "throughput": 8912.86, "total_tokens": 18932096} +{"current_steps": 28110, "total_steps": 204665, "loss": 0.1427, "lr": 1.991517983949345e-06, "epoch": 0.6867319766447609, "percentage": 13.73, "elapsed_time": "0:35:24", "remaining_time": "3:42:23", "throughput": 8912.94, "total_tokens": 18935360} +{"current_steps": 28115, "total_steps": 204665, "loss": 0.0468, "lr": 1.991506896851663e-06, "epoch": 0.6868541274766081, "percentage": 13.74, "elapsed_time": "0:35:24", "remaining_time": "3:42:22", "throughput": 8913.05, "total_tokens": 18938688} +{"current_steps": 28120, "total_steps": 204665, "loss": 0.1421, "lr": 1.9914958025434764e-06, "epoch": 0.6869762783084553, "percentage": 13.74, "elapsed_time": "0:35:25", "remaining_time": "3:42:22", "throughput": 8913.17, "total_tokens": 18942080} +{"current_steps": 28125, "total_steps": 204665, "loss": 0.0983, "lr": 1.9914847010248657e-06, "epoch": 0.6870984291403024, "percentage": 13.74, "elapsed_time": "0:35:25", "remaining_time": "3:42:21", "throughput": 8913.19, "total_tokens": 18945152} +{"current_steps": 28130, "total_steps": 204665, "loss": 0.0689, "lr": 1.9914735922959116e-06, "epoch": 0.6872205799721496, "percentage": 13.74, "elapsed_time": "0:35:25", "remaining_time": "3:42:21", "throughput": 8913.21, "total_tokens": 18948224} +{"current_steps": 28135, "total_steps": 204665, "loss": 0.0645, "lr": 1.9914624763566946e-06, "epoch": 0.6873427308039968, "percentage": 13.75, "elapsed_time": "0:35:26", "remaining_time": "3:42:20", "throughput": 8913.38, "total_tokens": 18951680} +{"current_steps": 28140, "total_steps": 204665, "loss": 0.0881, "lr": 1.991451353207296e-06, "epoch": 0.687464881635844, "percentage": 13.75, "elapsed_time": "0:35:26", "remaining_time": "3:42:20", "throughput": 8914.31, "total_tokens": 18957248} +{"current_steps": 28145, "total_steps": 204665, "loss": 0.1386, "lr": 1.9914402228477962e-06, "epoch": 0.6875870324676912, "percentage": 13.75, "elapsed_time": "0:35:26", "remaining_time": "3:42:19", "throughput": 8914.46, "total_tokens": 18960640} +{"current_steps": 28150, "total_steps": 204665, "loss": 0.1298, "lr": 1.991429085278277e-06, "epoch": 0.6877091832995382, "percentage": 13.75, "elapsed_time": "0:35:27", "remaining_time": "3:42:19", "throughput": 8914.52, "total_tokens": 18963840} +{"current_steps": 28155, "total_steps": 204665, "loss": 0.0415, "lr": 1.9914179404988185e-06, "epoch": 0.6878313341313854, "percentage": 13.76, "elapsed_time": "0:35:27", "remaining_time": "3:42:18", "throughput": 8914.81, "total_tokens": 18967616} +{"current_steps": 28160, "total_steps": 204665, "loss": 0.0961, "lr": 1.991406788509502e-06, "epoch": 0.6879534849632326, "percentage": 13.76, "elapsed_time": "0:35:28", "remaining_time": "3:42:18", "throughput": 8915.62, "total_tokens": 18972928} +{"current_steps": 28165, "total_steps": 204665, "loss": 0.157, "lr": 1.991395629310409e-06, "epoch": 0.6880756357950798, "percentage": 13.76, "elapsed_time": "0:35:28", "remaining_time": "3:42:17", "throughput": 8915.72, "total_tokens": 18976192} +{"current_steps": 28170, "total_steps": 204665, "loss": 0.0917, "lr": 1.99138446290162e-06, "epoch": 0.6881977866269269, "percentage": 13.76, "elapsed_time": "0:35:28", "remaining_time": "3:42:17", "throughput": 8915.78, "total_tokens": 18979392} +{"current_steps": 28175, "total_steps": 204665, "loss": 0.0725, "lr": 1.9913732892832166e-06, "epoch": 0.6883199374587741, "percentage": 13.77, "elapsed_time": "0:35:29", "remaining_time": "3:42:16", "throughput": 8915.8, "total_tokens": 18982464} +{"current_steps": 28180, "total_steps": 204665, "loss": 0.156, "lr": 1.9913621084552797e-06, "epoch": 0.6884420882906213, "percentage": 13.77, "elapsed_time": "0:35:29", "remaining_time": "3:42:16", "throughput": 8915.93, "total_tokens": 18985856} +{"current_steps": 28185, "total_steps": 204665, "loss": 0.1084, "lr": 1.9913509204178913e-06, "epoch": 0.6885642391224684, "percentage": 13.77, "elapsed_time": "0:35:29", "remaining_time": "3:42:15", "throughput": 8916.06, "total_tokens": 18989248} +{"current_steps": 28190, "total_steps": 204665, "loss": 0.0997, "lr": 1.9913397251711323e-06, "epoch": 0.6886863899543156, "percentage": 13.77, "elapsed_time": "0:35:30", "remaining_time": "3:42:15", "throughput": 8916.31, "total_tokens": 18992960} +{"current_steps": 28195, "total_steps": 204665, "loss": 0.0419, "lr": 1.991328522715084e-06, "epoch": 0.6888085407861627, "percentage": 13.78, "elapsed_time": "0:35:30", "remaining_time": "3:42:14", "throughput": 8916.44, "total_tokens": 18996352} +{"current_steps": 28200, "total_steps": 204665, "loss": 0.2366, "lr": 1.9913173130498283e-06, "epoch": 0.6889306916180099, "percentage": 13.78, "elapsed_time": "0:35:30", "remaining_time": "3:42:13", "throughput": 8916.71, "total_tokens": 19000064} +{"current_steps": 28205, "total_steps": 204665, "loss": 0.069, "lr": 1.9913060961754463e-06, "epoch": 0.6890528424498571, "percentage": 13.78, "elapsed_time": "0:35:31", "remaining_time": "3:42:13", "throughput": 8916.92, "total_tokens": 19003648} +{"current_steps": 28210, "total_steps": 204665, "loss": 0.1106, "lr": 1.99129487209202e-06, "epoch": 0.6891749932817043, "percentage": 13.78, "elapsed_time": "0:35:31", "remaining_time": "3:42:12", "throughput": 8917.16, "total_tokens": 19007360} +{"current_steps": 28215, "total_steps": 204665, "loss": 0.209, "lr": 1.9912836407996307e-06, "epoch": 0.6892971441135514, "percentage": 13.79, "elapsed_time": "0:35:31", "remaining_time": "3:42:12", "throughput": 8917.14, "total_tokens": 19010304} +{"current_steps": 28220, "total_steps": 204665, "loss": 0.1874, "lr": 1.9912724022983597e-06, "epoch": 0.6894192949453986, "percentage": 13.79, "elapsed_time": "0:35:32", "remaining_time": "3:42:11", "throughput": 8917.29, "total_tokens": 19013696} +{"current_steps": 28225, "total_steps": 204665, "loss": 0.0766, "lr": 1.9912611565882894e-06, "epoch": 0.6895414457772457, "percentage": 13.79, "elapsed_time": "0:35:32", "remaining_time": "3:42:11", "throughput": 8917.38, "total_tokens": 19016960} +{"current_steps": 28230, "total_steps": 204665, "loss": 0.0774, "lr": 1.9912499036695016e-06, "epoch": 0.6896635966090929, "percentage": 13.79, "elapsed_time": "0:35:32", "remaining_time": "3:42:10", "throughput": 8917.66, "total_tokens": 19020736} +{"current_steps": 28235, "total_steps": 204665, "loss": 0.064, "lr": 1.991238643542078e-06, "epoch": 0.6897857474409401, "percentage": 13.8, "elapsed_time": "0:35:33", "remaining_time": "3:42:10", "throughput": 8917.84, "total_tokens": 19024256} +{"current_steps": 28240, "total_steps": 204665, "loss": 0.1147, "lr": 1.9912273762061e-06, "epoch": 0.6899078982727872, "percentage": 13.8, "elapsed_time": "0:35:33", "remaining_time": "3:42:09", "throughput": 8917.89, "total_tokens": 19027392} +{"current_steps": 28245, "total_steps": 204665, "loss": 0.105, "lr": 1.9912161016616496e-06, "epoch": 0.6900300491046344, "percentage": 13.8, "elapsed_time": "0:35:33", "remaining_time": "3:42:08", "throughput": 8918.16, "total_tokens": 19031168} +{"current_steps": 28250, "total_steps": 204665, "loss": 0.2081, "lr": 1.99120481990881e-06, "epoch": 0.6901521999364816, "percentage": 13.8, "elapsed_time": "0:35:34", "remaining_time": "3:42:08", "throughput": 8918.14, "total_tokens": 19034112} +{"current_steps": 28255, "total_steps": 204665, "loss": 0.1315, "lr": 1.991193530947662e-06, "epoch": 0.6902743507683288, "percentage": 13.81, "elapsed_time": "0:35:34", "remaining_time": "3:42:07", "throughput": 8918.17, "total_tokens": 19037248} +{"current_steps": 28260, "total_steps": 204665, "loss": 0.1358, "lr": 1.9911822347782876e-06, "epoch": 0.6903965016001758, "percentage": 13.81, "elapsed_time": "0:35:34", "remaining_time": "3:42:07", "throughput": 8918.2, "total_tokens": 19040320} +{"current_steps": 28265, "total_steps": 204665, "loss": 0.073, "lr": 1.9911709314007696e-06, "epoch": 0.690518652432023, "percentage": 13.81, "elapsed_time": "0:35:35", "remaining_time": "3:42:06", "throughput": 8918.31, "total_tokens": 19043648} +{"current_steps": 28270, "total_steps": 204665, "loss": 0.0217, "lr": 1.99115962081519e-06, "epoch": 0.6906408032638702, "percentage": 13.81, "elapsed_time": "0:35:35", "remaining_time": "3:42:06", "throughput": 8918.53, "total_tokens": 19047296} +{"current_steps": 28275, "total_steps": 204665, "loss": 0.1819, "lr": 1.991148303021631e-06, "epoch": 0.6907629540957174, "percentage": 13.82, "elapsed_time": "0:35:36", "remaining_time": "3:42:05", "throughput": 8918.71, "total_tokens": 19050816} +{"current_steps": 28280, "total_steps": 204665, "loss": 0.1465, "lr": 1.9911369780201754e-06, "epoch": 0.6908851049275646, "percentage": 13.82, "elapsed_time": "0:35:36", "remaining_time": "3:42:04", "throughput": 8918.79, "total_tokens": 19054016} +{"current_steps": 28285, "total_steps": 204665, "loss": 0.0116, "lr": 1.991125645810905e-06, "epoch": 0.6910072557594117, "percentage": 13.82, "elapsed_time": "0:35:36", "remaining_time": "3:42:04", "throughput": 8918.8, "total_tokens": 19057088} +{"current_steps": 28290, "total_steps": 204665, "loss": 0.1489, "lr": 1.991114306393902e-06, "epoch": 0.6911294065912589, "percentage": 13.82, "elapsed_time": "0:35:37", "remaining_time": "3:42:03", "throughput": 8918.95, "total_tokens": 19060544} +{"current_steps": 28295, "total_steps": 204665, "loss": 0.0615, "lr": 1.991102959769249e-06, "epoch": 0.6912515574231061, "percentage": 13.83, "elapsed_time": "0:35:37", "remaining_time": "3:42:03", "throughput": 8919.08, "total_tokens": 19063936} +{"current_steps": 28300, "total_steps": 204665, "loss": 0.3578, "lr": 1.991091605937029e-06, "epoch": 0.6913737082549533, "percentage": 13.83, "elapsed_time": "0:35:37", "remaining_time": "3:42:02", "throughput": 8919.2, "total_tokens": 19067264} +{"current_steps": 28305, "total_steps": 204665, "loss": 0.1859, "lr": 1.9910802448973245e-06, "epoch": 0.6914958590868003, "percentage": 13.83, "elapsed_time": "0:35:38", "remaining_time": "3:42:01", "throughput": 8919.19, "total_tokens": 19070272} +{"current_steps": 28310, "total_steps": 204665, "loss": 0.1424, "lr": 1.9910688766502177e-06, "epoch": 0.6916180099186475, "percentage": 13.83, "elapsed_time": "0:35:38", "remaining_time": "3:42:01", "throughput": 8919.23, "total_tokens": 19073408} +{"current_steps": 28315, "total_steps": 204665, "loss": 0.0961, "lr": 1.9910575011957914e-06, "epoch": 0.6917401607504947, "percentage": 13.83, "elapsed_time": "0:35:38", "remaining_time": "3:42:00", "throughput": 8919.27, "total_tokens": 19076544} +{"current_steps": 28320, "total_steps": 204665, "loss": 0.0969, "lr": 1.9910461185341287e-06, "epoch": 0.6918623115823419, "percentage": 13.84, "elapsed_time": "0:35:39", "remaining_time": "3:42:00", "throughput": 8919.53, "total_tokens": 19080256} +{"current_steps": 28325, "total_steps": 204665, "loss": 0.0501, "lr": 1.9910347286653116e-06, "epoch": 0.6919844624141891, "percentage": 13.84, "elapsed_time": "0:35:39", "remaining_time": "3:41:59", "throughput": 8919.7, "total_tokens": 19083776} +{"current_steps": 28330, "total_steps": 204665, "loss": 0.0835, "lr": 1.9910233315894237e-06, "epoch": 0.6921066132460362, "percentage": 13.84, "elapsed_time": "0:35:39", "remaining_time": "3:41:59", "throughput": 8919.98, "total_tokens": 19087552} +{"current_steps": 28335, "total_steps": 204665, "loss": 0.1434, "lr": 1.9910119273065474e-06, "epoch": 0.6922287640778834, "percentage": 13.84, "elapsed_time": "0:35:40", "remaining_time": "3:41:58", "throughput": 8920.01, "total_tokens": 19090688} +{"current_steps": 28340, "total_steps": 204665, "loss": 0.1814, "lr": 1.991000515816766e-06, "epoch": 0.6923509149097306, "percentage": 13.85, "elapsed_time": "0:35:40", "remaining_time": "3:41:58", "throughput": 8920.07, "total_tokens": 19093888} +{"current_steps": 28345, "total_steps": 204665, "loss": 0.2087, "lr": 1.990989097120162e-06, "epoch": 0.6924730657415777, "percentage": 13.85, "elapsed_time": "0:35:40", "remaining_time": "3:41:57", "throughput": 8920.16, "total_tokens": 19097152} +{"current_steps": 28350, "total_steps": 204665, "loss": 0.0672, "lr": 1.990977671216819e-06, "epoch": 0.6925952165734248, "percentage": 13.85, "elapsed_time": "0:35:41", "remaining_time": "3:41:56", "throughput": 8920.32, "total_tokens": 19100608} +{"current_steps": 28355, "total_steps": 204665, "loss": 0.3032, "lr": 1.9909662381068195e-06, "epoch": 0.692717367405272, "percentage": 13.85, "elapsed_time": "0:35:41", "remaining_time": "3:41:56", "throughput": 8920.43, "total_tokens": 19103936} +{"current_steps": 28360, "total_steps": 204665, "loss": 0.1386, "lr": 1.9909547977902473e-06, "epoch": 0.6928395182371192, "percentage": 13.86, "elapsed_time": "0:35:41", "remaining_time": "3:41:55", "throughput": 8920.49, "total_tokens": 19107136} +{"current_steps": 28365, "total_steps": 204665, "loss": 0.0419, "lr": 1.9909433502671853e-06, "epoch": 0.6929616690689664, "percentage": 13.86, "elapsed_time": "0:35:42", "remaining_time": "3:41:55", "throughput": 8920.58, "total_tokens": 19110400} +{"current_steps": 28370, "total_steps": 204665, "loss": 0.1421, "lr": 1.9909318955377165e-06, "epoch": 0.6930838199008135, "percentage": 13.86, "elapsed_time": "0:35:42", "remaining_time": "3:41:54", "throughput": 8920.77, "total_tokens": 19113920} +{"current_steps": 28375, "total_steps": 204665, "loss": 0.1806, "lr": 1.9909204336019247e-06, "epoch": 0.6932059707326607, "percentage": 13.86, "elapsed_time": "0:35:42", "remaining_time": "3:41:54", "throughput": 8921.03, "total_tokens": 19117696} +{"current_steps": 28380, "total_steps": 204665, "loss": 0.127, "lr": 1.990908964459893e-06, "epoch": 0.6933281215645078, "percentage": 13.87, "elapsed_time": "0:35:43", "remaining_time": "3:41:53", "throughput": 8921.07, "total_tokens": 19120832} +{"current_steps": 28385, "total_steps": 204665, "loss": 0.0871, "lr": 1.9908974881117042e-06, "epoch": 0.693450272396355, "percentage": 13.87, "elapsed_time": "0:35:43", "remaining_time": "3:41:52", "throughput": 8921.21, "total_tokens": 19124224} +{"current_steps": 28390, "total_steps": 204665, "loss": 0.1087, "lr": 1.990886004557443e-06, "epoch": 0.6935724232282022, "percentage": 13.87, "elapsed_time": "0:35:44", "remaining_time": "3:41:52", "throughput": 8921.21, "total_tokens": 19127232} +{"current_steps": 28395, "total_steps": 204665, "loss": 0.0849, "lr": 1.990874513797192e-06, "epoch": 0.6936945740600493, "percentage": 13.87, "elapsed_time": "0:35:44", "remaining_time": "3:41:51", "throughput": 8921.41, "total_tokens": 19130816} +{"current_steps": 28400, "total_steps": 204665, "loss": 0.1112, "lr": 1.990863015831035e-06, "epoch": 0.6938167248918965, "percentage": 13.88, "elapsed_time": "0:35:44", "remaining_time": "3:41:51", "throughput": 8921.43, "total_tokens": 19133888} +{"current_steps": 28405, "total_steps": 204665, "loss": 0.2546, "lr": 1.990851510659056e-06, "epoch": 0.6939388757237437, "percentage": 13.88, "elapsed_time": "0:35:45", "remaining_time": "3:41:50", "throughput": 8921.4, "total_tokens": 19136832} +{"current_steps": 28410, "total_steps": 204665, "loss": 0.1442, "lr": 1.990839998281338e-06, "epoch": 0.6940610265555909, "percentage": 13.88, "elapsed_time": "0:35:45", "remaining_time": "3:41:49", "throughput": 8921.47, "total_tokens": 19140032} +{"current_steps": 28415, "total_steps": 204665, "loss": 0.1615, "lr": 1.9908284786979647e-06, "epoch": 0.694183177387438, "percentage": 13.88, "elapsed_time": "0:35:45", "remaining_time": "3:41:49", "throughput": 8921.42, "total_tokens": 19142912} +{"current_steps": 28420, "total_steps": 204665, "loss": 0.1455, "lr": 1.9908169519090208e-06, "epoch": 0.6943053282192851, "percentage": 13.89, "elapsed_time": "0:35:46", "remaining_time": "3:41:48", "throughput": 8921.42, "total_tokens": 19145920} +{"current_steps": 28425, "total_steps": 204665, "loss": 0.1582, "lr": 1.990805417914589e-06, "epoch": 0.6944274790511323, "percentage": 13.89, "elapsed_time": "0:35:46", "remaining_time": "3:41:48", "throughput": 8921.45, "total_tokens": 19148992} +{"current_steps": 28430, "total_steps": 204665, "loss": 0.1379, "lr": 1.9907938767147542e-06, "epoch": 0.6945496298829795, "percentage": 13.89, "elapsed_time": "0:35:46", "remaining_time": "3:41:47", "throughput": 8921.61, "total_tokens": 19152448} +{"current_steps": 28435, "total_steps": 204665, "loss": 0.0703, "lr": 1.9907823283095998e-06, "epoch": 0.6946717807148267, "percentage": 13.89, "elapsed_time": "0:35:47", "remaining_time": "3:41:46", "throughput": 8921.72, "total_tokens": 19155776} +{"current_steps": 28440, "total_steps": 204665, "loss": 0.1394, "lr": 1.9907707726992095e-06, "epoch": 0.6947939315466738, "percentage": 13.9, "elapsed_time": "0:35:47", "remaining_time": "3:41:46", "throughput": 8921.74, "total_tokens": 19158848} +{"current_steps": 28445, "total_steps": 204665, "loss": 0.1629, "lr": 1.9907592098836678e-06, "epoch": 0.694916082378521, "percentage": 13.9, "elapsed_time": "0:35:47", "remaining_time": "3:41:45", "throughput": 8921.87, "total_tokens": 19162240} +{"current_steps": 28450, "total_steps": 204665, "loss": 0.0741, "lr": 1.9907476398630584e-06, "epoch": 0.6950382332103682, "percentage": 13.9, "elapsed_time": "0:35:48", "remaining_time": "3:41:45", "throughput": 8922.03, "total_tokens": 19165696} +{"current_steps": 28455, "total_steps": 204665, "loss": 0.128, "lr": 1.990736062637466e-06, "epoch": 0.6951603840422154, "percentage": 13.9, "elapsed_time": "0:35:48", "remaining_time": "3:41:44", "throughput": 8922.04, "total_tokens": 19168768} +{"current_steps": 28460, "total_steps": 204665, "loss": 0.184, "lr": 1.9907244782069745e-06, "epoch": 0.6952825348740624, "percentage": 13.91, "elapsed_time": "0:35:48", "remaining_time": "3:41:44", "throughput": 8922.33, "total_tokens": 19172544} +{"current_steps": 28465, "total_steps": 204665, "loss": 0.0412, "lr": 1.990712886571668e-06, "epoch": 0.6954046857059096, "percentage": 13.91, "elapsed_time": "0:35:49", "remaining_time": "3:41:43", "throughput": 8922.54, "total_tokens": 19176128} +{"current_steps": 28470, "total_steps": 204665, "loss": 0.1263, "lr": 1.990701287731631e-06, "epoch": 0.6955268365377568, "percentage": 13.91, "elapsed_time": "0:35:49", "remaining_time": "3:41:43", "throughput": 8922.76, "total_tokens": 19179776} +{"current_steps": 28475, "total_steps": 204665, "loss": 0.1573, "lr": 1.9906896816869475e-06, "epoch": 0.695648987369604, "percentage": 13.91, "elapsed_time": "0:35:49", "remaining_time": "3:41:42", "throughput": 8922.96, "total_tokens": 19183360} +{"current_steps": 28480, "total_steps": 204665, "loss": 0.0671, "lr": 1.9906780684377025e-06, "epoch": 0.6957711382014512, "percentage": 13.92, "elapsed_time": "0:35:50", "remaining_time": "3:41:41", "throughput": 8922.97, "total_tokens": 19186432} +{"current_steps": 28485, "total_steps": 204665, "loss": 0.1002, "lr": 1.99066644798398e-06, "epoch": 0.6958932890332983, "percentage": 13.92, "elapsed_time": "0:35:50", "remaining_time": "3:41:41", "throughput": 8923.07, "total_tokens": 19189760} +{"current_steps": 28490, "total_steps": 204665, "loss": 0.0669, "lr": 1.9906548203258644e-06, "epoch": 0.6960154398651455, "percentage": 13.92, "elapsed_time": "0:35:50", "remaining_time": "3:41:40", "throughput": 8923.11, "total_tokens": 19192896} +{"current_steps": 28495, "total_steps": 204665, "loss": 0.158, "lr": 1.990643185463441e-06, "epoch": 0.6961375906969927, "percentage": 13.92, "elapsed_time": "0:35:51", "remaining_time": "3:41:40", "throughput": 8923.32, "total_tokens": 19196480} +{"current_steps": 28500, "total_steps": 204665, "loss": 0.1286, "lr": 1.9906315433967937e-06, "epoch": 0.6962597415288398, "percentage": 13.93, "elapsed_time": "0:35:51", "remaining_time": "3:41:39", "throughput": 8923.54, "total_tokens": 19200128} +{"current_steps": 28505, "total_steps": 204665, "loss": 0.125, "lr": 1.990619894126007e-06, "epoch": 0.6963818923606869, "percentage": 13.93, "elapsed_time": "0:35:51", "remaining_time": "3:41:39", "throughput": 8923.6, "total_tokens": 19203328} +{"current_steps": 28510, "total_steps": 204665, "loss": 0.1438, "lr": 1.9906082376511665e-06, "epoch": 0.6965040431925341, "percentage": 13.93, "elapsed_time": "0:35:52", "remaining_time": "3:41:38", "throughput": 8923.61, "total_tokens": 19206400} +{"current_steps": 28515, "total_steps": 204665, "loss": 0.0225, "lr": 1.9905965739723563e-06, "epoch": 0.6966261940243813, "percentage": 13.93, "elapsed_time": "0:35:52", "remaining_time": "3:41:37", "throughput": 8923.78, "total_tokens": 19209920} +{"current_steps": 28520, "total_steps": 204665, "loss": 0.0699, "lr": 1.9905849030896614e-06, "epoch": 0.6967483448562285, "percentage": 13.93, "elapsed_time": "0:35:53", "remaining_time": "3:41:37", "throughput": 8923.85, "total_tokens": 19213120} +{"current_steps": 28525, "total_steps": 204665, "loss": 0.1218, "lr": 1.9905732250031664e-06, "epoch": 0.6968704956880757, "percentage": 13.94, "elapsed_time": "0:35:53", "remaining_time": "3:41:36", "throughput": 8923.98, "total_tokens": 19216512} +{"current_steps": 28530, "total_steps": 204665, "loss": 0.0611, "lr": 1.9905615397129565e-06, "epoch": 0.6969926465199228, "percentage": 13.94, "elapsed_time": "0:35:53", "remaining_time": "3:41:36", "throughput": 8923.9, "total_tokens": 19219328} +{"current_steps": 28535, "total_steps": 204665, "loss": 0.1589, "lr": 1.9905498472191168e-06, "epoch": 0.69711479735177, "percentage": 13.94, "elapsed_time": "0:35:54", "remaining_time": "3:41:35", "throughput": 8923.96, "total_tokens": 19222528} +{"current_steps": 28540, "total_steps": 204665, "loss": 0.1138, "lr": 1.9905381475217323e-06, "epoch": 0.6972369481836171, "percentage": 13.94, "elapsed_time": "0:35:54", "remaining_time": "3:41:35", "throughput": 8924.14, "total_tokens": 19226048} +{"current_steps": 28545, "total_steps": 204665, "loss": 0.0534, "lr": 1.990526440620888e-06, "epoch": 0.6973590990154643, "percentage": 13.95, "elapsed_time": "0:35:54", "remaining_time": "3:41:34", "throughput": 8924.19, "total_tokens": 19229184} +{"current_steps": 28550, "total_steps": 204665, "loss": 0.2159, "lr": 1.9905147265166686e-06, "epoch": 0.6974812498473114, "percentage": 13.95, "elapsed_time": "0:35:55", "remaining_time": "3:41:33", "throughput": 8924.37, "total_tokens": 19232704} +{"current_steps": 28555, "total_steps": 204665, "loss": 0.1376, "lr": 1.99050300520916e-06, "epoch": 0.6976034006791586, "percentage": 13.95, "elapsed_time": "0:35:55", "remaining_time": "3:41:33", "throughput": 8924.67, "total_tokens": 19236544} +{"current_steps": 28560, "total_steps": 204665, "loss": 0.062, "lr": 1.9904912766984472e-06, "epoch": 0.6977255515110058, "percentage": 13.95, "elapsed_time": "0:35:55", "remaining_time": "3:41:32", "throughput": 8924.69, "total_tokens": 19239616} +{"current_steps": 28565, "total_steps": 204665, "loss": 0.0789, "lr": 1.990479540984615e-06, "epoch": 0.697847702342853, "percentage": 13.96, "elapsed_time": "0:35:56", "remaining_time": "3:41:32", "throughput": 8924.87, "total_tokens": 19243136} +{"current_steps": 28570, "total_steps": 204665, "loss": 0.1022, "lr": 1.9904677980677496e-06, "epoch": 0.6979698531747002, "percentage": 13.96, "elapsed_time": "0:35:56", "remaining_time": "3:41:31", "throughput": 8924.95, "total_tokens": 19246400} +{"current_steps": 28575, "total_steps": 204665, "loss": 0.0678, "lr": 1.990456047947936e-06, "epoch": 0.6980920040065473, "percentage": 13.96, "elapsed_time": "0:35:56", "remaining_time": "3:41:31", "throughput": 8924.96, "total_tokens": 19249472} +{"current_steps": 28580, "total_steps": 204665, "loss": 0.1318, "lr": 1.9904442906252594e-06, "epoch": 0.6982141548383944, "percentage": 13.96, "elapsed_time": "0:35:57", "remaining_time": "3:41:30", "throughput": 8925.15, "total_tokens": 19252992} +{"current_steps": 28585, "total_steps": 204665, "loss": 0.1667, "lr": 1.9904325260998055e-06, "epoch": 0.6983363056702416, "percentage": 13.97, "elapsed_time": "0:35:57", "remaining_time": "3:41:29", "throughput": 8925.17, "total_tokens": 19256064} +{"current_steps": 28590, "total_steps": 204665, "loss": 0.1057, "lr": 1.99042075437166e-06, "epoch": 0.6984584565020888, "percentage": 13.97, "elapsed_time": "0:35:57", "remaining_time": "3:41:29", "throughput": 8925.29, "total_tokens": 19259456} +{"current_steps": 28595, "total_steps": 204665, "loss": 0.1113, "lr": 1.9904089754409083e-06, "epoch": 0.6985806073339359, "percentage": 13.97, "elapsed_time": "0:35:58", "remaining_time": "3:41:28", "throughput": 8925.22, "total_tokens": 19262272} +{"current_steps": 28600, "total_steps": 204665, "loss": 0.0824, "lr": 1.990397189307636e-06, "epoch": 0.6987027581657831, "percentage": 13.97, "elapsed_time": "0:35:58", "remaining_time": "3:41:28", "throughput": 8925.35, "total_tokens": 19265664} +{"current_steps": 28605, "total_steps": 204665, "loss": 0.0588, "lr": 1.9903853959719293e-06, "epoch": 0.6988249089976303, "percentage": 13.98, "elapsed_time": "0:35:58", "remaining_time": "3:41:27", "throughput": 8925.68, "total_tokens": 19269632} +{"current_steps": 28610, "total_steps": 204665, "loss": 0.156, "lr": 1.9903735954338736e-06, "epoch": 0.6989470598294775, "percentage": 13.98, "elapsed_time": "0:35:59", "remaining_time": "3:41:27", "throughput": 8925.71, "total_tokens": 19272768} +{"current_steps": 28615, "total_steps": 204665, "loss": 0.2028, "lr": 1.9903617876935544e-06, "epoch": 0.6990692106613247, "percentage": 13.98, "elapsed_time": "0:35:59", "remaining_time": "3:41:26", "throughput": 8925.74, "total_tokens": 19275904} +{"current_steps": 28620, "total_steps": 204665, "loss": 0.2168, "lr": 1.990349972751058e-06, "epoch": 0.6991913614931717, "percentage": 13.98, "elapsed_time": "0:35:59", "remaining_time": "3:41:25", "throughput": 8925.81, "total_tokens": 19279104} +{"current_steps": 28625, "total_steps": 204665, "loss": 0.1528, "lr": 1.9903381506064704e-06, "epoch": 0.6993135123250189, "percentage": 13.99, "elapsed_time": "0:36:00", "remaining_time": "3:41:25", "throughput": 8925.99, "total_tokens": 19282624} +{"current_steps": 28630, "total_steps": 204665, "loss": 0.0853, "lr": 1.9903263212598772e-06, "epoch": 0.6994356631568661, "percentage": 13.99, "elapsed_time": "0:36:00", "remaining_time": "3:41:24", "throughput": 8926.01, "total_tokens": 19285696} +{"current_steps": 28635, "total_steps": 204665, "loss": 0.0641, "lr": 1.990314484711365e-06, "epoch": 0.6995578139887133, "percentage": 13.99, "elapsed_time": "0:36:00", "remaining_time": "3:41:24", "throughput": 8926.09, "total_tokens": 19288960} +{"current_steps": 28640, "total_steps": 204665, "loss": 0.1388, "lr": 1.990302640961019e-06, "epoch": 0.6996799648205604, "percentage": 13.99, "elapsed_time": "0:36:01", "remaining_time": "3:41:23", "throughput": 8926.1, "total_tokens": 19292032} +{"current_steps": 28645, "total_steps": 204665, "loss": 0.1323, "lr": 1.990290790008926e-06, "epoch": 0.6998021156524076, "percentage": 14.0, "elapsed_time": "0:36:01", "remaining_time": "3:41:23", "throughput": 8926.08, "total_tokens": 19294976} +{"current_steps": 28650, "total_steps": 204665, "loss": 0.0876, "lr": 1.9902789318551727e-06, "epoch": 0.6999242664842548, "percentage": 14.0, "elapsed_time": "0:36:01", "remaining_time": "3:41:22", "throughput": 8926.16, "total_tokens": 19298240} +{"current_steps": 28655, "total_steps": 204665, "loss": 0.1469, "lr": 1.990267066499844e-06, "epoch": 0.700046417316102, "percentage": 14.0, "elapsed_time": "0:36:02", "remaining_time": "3:41:21", "throughput": 8926.21, "total_tokens": 19301440} +{"current_steps": 28660, "total_steps": 204665, "loss": 0.0732, "lr": 1.9902551939430266e-06, "epoch": 0.700168568147949, "percentage": 14.0, "elapsed_time": "0:36:02", "remaining_time": "3:41:21", "throughput": 8926.28, "total_tokens": 19304640} +{"current_steps": 28665, "total_steps": 204665, "loss": 0.0645, "lr": 1.9902433141848076e-06, "epoch": 0.7002907189797962, "percentage": 14.01, "elapsed_time": "0:36:03", "remaining_time": "3:41:20", "throughput": 8926.38, "total_tokens": 19307968} +{"current_steps": 28670, "total_steps": 204665, "loss": 0.0677, "lr": 1.9902314272252724e-06, "epoch": 0.7004128698116434, "percentage": 14.01, "elapsed_time": "0:36:03", "remaining_time": "3:41:20", "throughput": 8926.41, "total_tokens": 19311104} +{"current_steps": 28675, "total_steps": 204665, "loss": 0.0831, "lr": 1.9902195330645084e-06, "epoch": 0.7005350206434906, "percentage": 14.01, "elapsed_time": "0:36:03", "remaining_time": "3:41:19", "throughput": 8926.48, "total_tokens": 19314304} +{"current_steps": 28680, "total_steps": 204665, "loss": 0.1596, "lr": 1.9902076317026014e-06, "epoch": 0.7006571714753378, "percentage": 14.01, "elapsed_time": "0:36:04", "remaining_time": "3:41:19", "throughput": 8926.63, "total_tokens": 19317760} +{"current_steps": 28685, "total_steps": 204665, "loss": 0.1439, "lr": 1.990195723139638e-06, "epoch": 0.7007793223071849, "percentage": 14.02, "elapsed_time": "0:36:04", "remaining_time": "3:41:18", "throughput": 8926.82, "total_tokens": 19321344} +{"current_steps": 28690, "total_steps": 204665, "loss": 0.2093, "lr": 1.990183807375705e-06, "epoch": 0.7009014731390321, "percentage": 14.02, "elapsed_time": "0:36:04", "remaining_time": "3:41:17", "throughput": 8927.18, "total_tokens": 19325312} +{"current_steps": 28695, "total_steps": 204665, "loss": 0.1465, "lr": 1.9901718844108894e-06, "epoch": 0.7010236239708793, "percentage": 14.02, "elapsed_time": "0:36:05", "remaining_time": "3:41:17", "throughput": 8927.37, "total_tokens": 19328896} +{"current_steps": 28700, "total_steps": 204665, "loss": 0.083, "lr": 1.9901599542452773e-06, "epoch": 0.7011457748027264, "percentage": 14.02, "elapsed_time": "0:36:05", "remaining_time": "3:41:16", "throughput": 8927.58, "total_tokens": 19332480} +{"current_steps": 28705, "total_steps": 204665, "loss": 0.0821, "lr": 1.9901480168789554e-06, "epoch": 0.7012679256345735, "percentage": 14.03, "elapsed_time": "0:36:05", "remaining_time": "3:41:16", "throughput": 8927.64, "total_tokens": 19335680} +{"current_steps": 28710, "total_steps": 204665, "loss": 0.0284, "lr": 1.990136072312011e-06, "epoch": 0.7013900764664207, "percentage": 14.03, "elapsed_time": "0:36:06", "remaining_time": "3:41:15", "throughput": 8927.8, "total_tokens": 19339136} +{"current_steps": 28715, "total_steps": 204665, "loss": 0.197, "lr": 1.9901241205445313e-06, "epoch": 0.7015122272982679, "percentage": 14.03, "elapsed_time": "0:36:06", "remaining_time": "3:41:15", "throughput": 8927.96, "total_tokens": 19342592} +{"current_steps": 28720, "total_steps": 204665, "loss": 0.0998, "lr": 1.990112161576602e-06, "epoch": 0.7016343781301151, "percentage": 14.03, "elapsed_time": "0:36:06", "remaining_time": "3:41:14", "throughput": 8928.27, "total_tokens": 19346496} +{"current_steps": 28725, "total_steps": 204665, "loss": 0.1877, "lr": 1.990100195408311e-06, "epoch": 0.7017565289619623, "percentage": 14.04, "elapsed_time": "0:36:07", "remaining_time": "3:41:14", "throughput": 8928.43, "total_tokens": 19349952} +{"current_steps": 28730, "total_steps": 204665, "loss": 0.1481, "lr": 1.9900882220397454e-06, "epoch": 0.7018786797938094, "percentage": 14.04, "elapsed_time": "0:36:07", "remaining_time": "3:41:13", "throughput": 8928.57, "total_tokens": 19353408} +{"current_steps": 28735, "total_steps": 204665, "loss": 0.1623, "lr": 1.9900762414709913e-06, "epoch": 0.7020008306256565, "percentage": 14.04, "elapsed_time": "0:36:07", "remaining_time": "3:41:13", "throughput": 8928.81, "total_tokens": 19357120} +{"current_steps": 28740, "total_steps": 204665, "loss": 0.1476, "lr": 1.990064253702137e-06, "epoch": 0.7021229814575037, "percentage": 14.04, "elapsed_time": "0:36:08", "remaining_time": "3:41:12", "throughput": 8928.98, "total_tokens": 19360640} +{"current_steps": 28745, "total_steps": 204665, "loss": 0.0906, "lr": 1.990052258733269e-06, "epoch": 0.7022451322893509, "percentage": 14.04, "elapsed_time": "0:36:08", "remaining_time": "3:41:12", "throughput": 8929.34, "total_tokens": 19364672} +{"current_steps": 28750, "total_steps": 204665, "loss": 0.0672, "lr": 1.9900402565644745e-06, "epoch": 0.702367283121198, "percentage": 14.05, "elapsed_time": "0:36:08", "remaining_time": "3:41:11", "throughput": 8929.33, "total_tokens": 19367680} +{"current_steps": 28755, "total_steps": 204665, "loss": 0.1317, "lr": 1.9900282471958413e-06, "epoch": 0.7024894339530452, "percentage": 14.05, "elapsed_time": "0:36:09", "remaining_time": "3:41:10", "throughput": 8929.3, "total_tokens": 19370624} +{"current_steps": 28760, "total_steps": 204665, "loss": 0.1801, "lr": 1.990016230627456e-06, "epoch": 0.7026115847848924, "percentage": 14.05, "elapsed_time": "0:36:09", "remaining_time": "3:41:10", "throughput": 8929.27, "total_tokens": 19373568} +{"current_steps": 28765, "total_steps": 204665, "loss": 0.1105, "lr": 1.9900042068594066e-06, "epoch": 0.7027337356167396, "percentage": 14.05, "elapsed_time": "0:36:10", "remaining_time": "3:41:09", "throughput": 8929.43, "total_tokens": 19377024} +{"current_steps": 28770, "total_steps": 204665, "loss": 0.1156, "lr": 1.98999217589178e-06, "epoch": 0.7028558864485868, "percentage": 14.06, "elapsed_time": "0:36:10", "remaining_time": "3:41:09", "throughput": 8929.43, "total_tokens": 19380032} +{"current_steps": 28775, "total_steps": 204665, "loss": 0.1092, "lr": 1.9899801377246645e-06, "epoch": 0.7029780372804338, "percentage": 14.06, "elapsed_time": "0:36:10", "remaining_time": "3:41:08", "throughput": 8929.65, "total_tokens": 19383680} +{"current_steps": 28780, "total_steps": 204665, "loss": 0.2078, "lr": 1.989968092358147e-06, "epoch": 0.703100188112281, "percentage": 14.06, "elapsed_time": "0:36:11", "remaining_time": "3:41:08", "throughput": 8929.72, "total_tokens": 19386944} +{"current_steps": 28785, "total_steps": 204665, "loss": 0.1218, "lr": 1.9899560397923154e-06, "epoch": 0.7032223389441282, "percentage": 14.06, "elapsed_time": "0:36:11", "remaining_time": "3:41:07", "throughput": 8929.81, "total_tokens": 19390272} +{"current_steps": 28790, "total_steps": 204665, "loss": 0.1744, "lr": 1.9899439800272568e-06, "epoch": 0.7033444897759754, "percentage": 14.07, "elapsed_time": "0:36:11", "remaining_time": "3:41:06", "throughput": 8929.83, "total_tokens": 19393344} +{"current_steps": 28795, "total_steps": 204665, "loss": 0.0851, "lr": 1.9899319130630597e-06, "epoch": 0.7034666406078225, "percentage": 14.07, "elapsed_time": "0:36:12", "remaining_time": "3:41:06", "throughput": 8930.05, "total_tokens": 19396992} +{"current_steps": 28800, "total_steps": 204665, "loss": 0.1174, "lr": 1.989919838899811e-06, "epoch": 0.7035887914396697, "percentage": 14.07, "elapsed_time": "0:36:12", "remaining_time": "3:41:05", "throughput": 8930.14, "total_tokens": 19400320} +{"current_steps": 28805, "total_steps": 204665, "loss": 0.1332, "lr": 1.9899077575376e-06, "epoch": 0.7037109422715169, "percentage": 14.07, "elapsed_time": "0:36:12", "remaining_time": "3:41:05", "throughput": 8930.39, "total_tokens": 19404032} +{"current_steps": 28810, "total_steps": 204665, "loss": 0.1948, "lr": 1.9898956689765127e-06, "epoch": 0.7038330931033641, "percentage": 14.08, "elapsed_time": "0:36:13", "remaining_time": "3:41:04", "throughput": 8930.49, "total_tokens": 19407360} +{"current_steps": 28815, "total_steps": 204665, "loss": 0.0506, "lr": 1.989883573216638e-06, "epoch": 0.7039552439352113, "percentage": 14.08, "elapsed_time": "0:36:13", "remaining_time": "3:41:04", "throughput": 8930.54, "total_tokens": 19410560} +{"current_steps": 28820, "total_steps": 204665, "loss": 0.0926, "lr": 1.9898714702580637e-06, "epoch": 0.7040773947670583, "percentage": 14.08, "elapsed_time": "0:36:13", "remaining_time": "3:41:03", "throughput": 8930.6, "total_tokens": 19413760} +{"current_steps": 28825, "total_steps": 204665, "loss": 0.1177, "lr": 1.9898593601008776e-06, "epoch": 0.7041995455989055, "percentage": 14.08, "elapsed_time": "0:36:14", "remaining_time": "3:41:03", "throughput": 8930.75, "total_tokens": 19417216} +{"current_steps": 28830, "total_steps": 204665, "loss": 0.0938, "lr": 1.9898472427451684e-06, "epoch": 0.7043216964307527, "percentage": 14.09, "elapsed_time": "0:36:14", "remaining_time": "3:41:02", "throughput": 8930.82, "total_tokens": 19420480} +{"current_steps": 28835, "total_steps": 204665, "loss": 0.197, "lr": 1.989835118191024e-06, "epoch": 0.7044438472625999, "percentage": 14.09, "elapsed_time": "0:36:14", "remaining_time": "3:41:02", "throughput": 8930.99, "total_tokens": 19424000} +{"current_steps": 28840, "total_steps": 204665, "loss": 0.0714, "lr": 1.989822986438532e-06, "epoch": 0.704565998094447, "percentage": 14.09, "elapsed_time": "0:36:15", "remaining_time": "3:41:01", "throughput": 8931.19, "total_tokens": 19427648} +{"current_steps": 28845, "total_steps": 204665, "loss": 0.0972, "lr": 1.9898108474877805e-06, "epoch": 0.7046881489262942, "percentage": 14.09, "elapsed_time": "0:36:15", "remaining_time": "3:41:01", "throughput": 8931.26, "total_tokens": 19430912} +{"current_steps": 28850, "total_steps": 204665, "loss": 0.1386, "lr": 1.989798701338859e-06, "epoch": 0.7048102997581414, "percentage": 14.1, "elapsed_time": "0:36:15", "remaining_time": "3:41:00", "throughput": 8931.43, "total_tokens": 19434432} +{"current_steps": 28855, "total_steps": 204665, "loss": 0.2156, "lr": 1.989786547991855e-06, "epoch": 0.7049324505899885, "percentage": 14.1, "elapsed_time": "0:36:16", "remaining_time": "3:40:59", "throughput": 8931.56, "total_tokens": 19437824} +{"current_steps": 28860, "total_steps": 204665, "loss": 0.1754, "lr": 1.989774387446857e-06, "epoch": 0.7050546014218357, "percentage": 14.1, "elapsed_time": "0:36:16", "remaining_time": "3:40:59", "throughput": 8931.72, "total_tokens": 19441344} +{"current_steps": 28865, "total_steps": 204665, "loss": 0.0371, "lr": 1.9897622197039533e-06, "epoch": 0.7051767522536828, "percentage": 14.1, "elapsed_time": "0:36:17", "remaining_time": "3:40:58", "throughput": 8931.97, "total_tokens": 19445056} +{"current_steps": 28870, "total_steps": 204665, "loss": 0.1202, "lr": 1.9897500447632326e-06, "epoch": 0.70529890308553, "percentage": 14.11, "elapsed_time": "0:36:17", "remaining_time": "3:40:58", "throughput": 8931.99, "total_tokens": 19448192} +{"current_steps": 28875, "total_steps": 204665, "loss": 0.1468, "lr": 1.9897378626247835e-06, "epoch": 0.7054210539173772, "percentage": 14.11, "elapsed_time": "0:36:17", "remaining_time": "3:40:57", "throughput": 8932.19, "total_tokens": 19451776} +{"current_steps": 28880, "total_steps": 204665, "loss": 0.0516, "lr": 1.9897256732886943e-06, "epoch": 0.7055432047492244, "percentage": 14.11, "elapsed_time": "0:36:18", "remaining_time": "3:40:57", "throughput": 8932.21, "total_tokens": 19454848} +{"current_steps": 28885, "total_steps": 204665, "loss": 0.1063, "lr": 1.989713476755054e-06, "epoch": 0.7056653555810715, "percentage": 14.11, "elapsed_time": "0:36:18", "remaining_time": "3:40:56", "throughput": 8932.47, "total_tokens": 19458624} +{"current_steps": 28890, "total_steps": 204665, "loss": 0.0761, "lr": 1.9897012730239508e-06, "epoch": 0.7057875064129187, "percentage": 14.12, "elapsed_time": "0:36:18", "remaining_time": "3:40:56", "throughput": 8932.49, "total_tokens": 19461760} +{"current_steps": 28895, "total_steps": 204665, "loss": 0.1534, "lr": 1.989689062095474e-06, "epoch": 0.7059096572447658, "percentage": 14.12, "elapsed_time": "0:36:19", "remaining_time": "3:40:55", "throughput": 8932.78, "total_tokens": 19465600} +{"current_steps": 28900, "total_steps": 204665, "loss": 0.0526, "lr": 1.989676843969712e-06, "epoch": 0.706031808076613, "percentage": 14.12, "elapsed_time": "0:36:19", "remaining_time": "3:40:55", "throughput": 8933.09, "total_tokens": 19469504} +{"current_steps": 28905, "total_steps": 204665, "loss": 0.1339, "lr": 1.9896646186467537e-06, "epoch": 0.7061539589084602, "percentage": 14.12, "elapsed_time": "0:36:19", "remaining_time": "3:40:54", "throughput": 8932.96, "total_tokens": 19472192} +{"current_steps": 28910, "total_steps": 204665, "loss": 0.1101, "lr": 1.9896523861266882e-06, "epoch": 0.7062761097403073, "percentage": 14.13, "elapsed_time": "0:36:20", "remaining_time": "3:40:54", "throughput": 8933.27, "total_tokens": 19476096} +{"current_steps": 28915, "total_steps": 204665, "loss": 0.0629, "lr": 1.9896401464096045e-06, "epoch": 0.7063982605721545, "percentage": 14.13, "elapsed_time": "0:36:20", "remaining_time": "3:40:53", "throughput": 8933.3, "total_tokens": 19479232} +{"current_steps": 28920, "total_steps": 204665, "loss": 0.1961, "lr": 1.9896278994955914e-06, "epoch": 0.7065204114040017, "percentage": 14.13, "elapsed_time": "0:36:20", "remaining_time": "3:40:52", "throughput": 8933.23, "total_tokens": 19482112} +{"current_steps": 28925, "total_steps": 204665, "loss": 0.0798, "lr": 1.9896156453847383e-06, "epoch": 0.7066425622358489, "percentage": 14.13, "elapsed_time": "0:36:21", "remaining_time": "3:40:52", "throughput": 8933.3, "total_tokens": 19485376} +{"current_steps": 28930, "total_steps": 204665, "loss": 0.0248, "lr": 1.9896033840771333e-06, "epoch": 0.706764713067696, "percentage": 14.14, "elapsed_time": "0:36:21", "remaining_time": "3:40:51", "throughput": 8933.42, "total_tokens": 19488768} +{"current_steps": 28935, "total_steps": 204665, "loss": 0.1189, "lr": 1.989591115572867e-06, "epoch": 0.7068868638995431, "percentage": 14.14, "elapsed_time": "0:36:21", "remaining_time": "3:40:51", "throughput": 8933.53, "total_tokens": 19492160} +{"current_steps": 28940, "total_steps": 204665, "loss": 0.0783, "lr": 1.9895788398720276e-06, "epoch": 0.7070090147313903, "percentage": 14.14, "elapsed_time": "0:36:22", "remaining_time": "3:40:50", "throughput": 8933.68, "total_tokens": 19495616} +{"current_steps": 28945, "total_steps": 204665, "loss": 0.0378, "lr": 1.9895665569747047e-06, "epoch": 0.7071311655632375, "percentage": 14.14, "elapsed_time": "0:36:22", "remaining_time": "3:40:50", "throughput": 8933.75, "total_tokens": 19498880} +{"current_steps": 28950, "total_steps": 204665, "loss": 0.0813, "lr": 1.989554266880988e-06, "epoch": 0.7072533163950846, "percentage": 14.15, "elapsed_time": "0:36:22", "remaining_time": "3:40:49", "throughput": 8933.77, "total_tokens": 19501952} +{"current_steps": 28955, "total_steps": 204665, "loss": 0.0586, "lr": 1.9895419695909663e-06, "epoch": 0.7073754672269318, "percentage": 14.15, "elapsed_time": "0:36:23", "remaining_time": "3:40:49", "throughput": 8934.05, "total_tokens": 19505792} +{"current_steps": 28960, "total_steps": 204665, "loss": 0.0589, "lr": 1.989529665104729e-06, "epoch": 0.707497618058779, "percentage": 14.15, "elapsed_time": "0:36:23", "remaining_time": "3:40:48", "throughput": 8934.29, "total_tokens": 19509504} +{"current_steps": 28965, "total_steps": 204665, "loss": 0.1812, "lr": 1.989517353422366e-06, "epoch": 0.7076197688906262, "percentage": 14.15, "elapsed_time": "0:36:24", "remaining_time": "3:40:48", "throughput": 8934.32, "total_tokens": 19512640} +{"current_steps": 28970, "total_steps": 204665, "loss": 0.1463, "lr": 1.989505034543967e-06, "epoch": 0.7077419197224734, "percentage": 14.15, "elapsed_time": "0:36:24", "remaining_time": "3:40:47", "throughput": 8934.45, "total_tokens": 19516032} +{"current_steps": 28975, "total_steps": 204665, "loss": 0.191, "lr": 1.989492708469621e-06, "epoch": 0.7078640705543204, "percentage": 14.16, "elapsed_time": "0:36:24", "remaining_time": "3:40:46", "throughput": 8934.51, "total_tokens": 19519296} +{"current_steps": 28980, "total_steps": 204665, "loss": 0.1693, "lr": 1.9894803751994176e-06, "epoch": 0.7079862213861676, "percentage": 14.16, "elapsed_time": "0:36:25", "remaining_time": "3:40:46", "throughput": 8934.59, "total_tokens": 19522688} +{"current_steps": 28985, "total_steps": 204665, "loss": 0.0771, "lr": 1.989468034733447e-06, "epoch": 0.7081083722180148, "percentage": 14.16, "elapsed_time": "0:36:25", "remaining_time": "3:40:46", "throughput": 8934.94, "total_tokens": 19526720} +{"current_steps": 28990, "total_steps": 204665, "loss": 0.1575, "lr": 1.989455687071799e-06, "epoch": 0.708230523049862, "percentage": 14.16, "elapsed_time": "0:36:25", "remaining_time": "3:40:45", "throughput": 8934.97, "total_tokens": 19529856} +{"current_steps": 28995, "total_steps": 204665, "loss": 0.0408, "lr": 1.9894433322145624e-06, "epoch": 0.7083526738817091, "percentage": 14.17, "elapsed_time": "0:36:26", "remaining_time": "3:40:44", "throughput": 8935.31, "total_tokens": 19533824} +{"current_steps": 29000, "total_steps": 204665, "loss": 0.075, "lr": 1.9894309701618285e-06, "epoch": 0.7084748247135563, "percentage": 14.17, "elapsed_time": "0:36:26", "remaining_time": "3:40:44", "throughput": 8935.44, "total_tokens": 19537216} +{"current_steps": 29005, "total_steps": 204665, "loss": 0.1021, "lr": 1.989418600913686e-06, "epoch": 0.7085969755454035, "percentage": 14.17, "elapsed_time": "0:36:26", "remaining_time": "3:40:43", "throughput": 8935.46, "total_tokens": 19540288} +{"current_steps": 29010, "total_steps": 204665, "loss": 0.0327, "lr": 1.9894062244702258e-06, "epoch": 0.7087191263772507, "percentage": 14.17, "elapsed_time": "0:36:27", "remaining_time": "3:40:43", "throughput": 8935.72, "total_tokens": 19544064} +{"current_steps": 29015, "total_steps": 204665, "loss": 0.089, "lr": 1.989393840831537e-06, "epoch": 0.7088412772090978, "percentage": 14.18, "elapsed_time": "0:36:27", "remaining_time": "3:40:42", "throughput": 8935.92, "total_tokens": 19547648} +{"current_steps": 29020, "total_steps": 204665, "loss": 0.1718, "lr": 1.98938144999771e-06, "epoch": 0.7089634280409449, "percentage": 14.18, "elapsed_time": "0:36:27", "remaining_time": "3:40:42", "throughput": 8936.2, "total_tokens": 19551488} +{"current_steps": 29025, "total_steps": 204665, "loss": 0.1411, "lr": 1.989369051968835e-06, "epoch": 0.7090855788727921, "percentage": 14.18, "elapsed_time": "0:36:28", "remaining_time": "3:40:41", "throughput": 8936.21, "total_tokens": 19554560} +{"current_steps": 29030, "total_steps": 204665, "loss": 0.1518, "lr": 1.9893566467450024e-06, "epoch": 0.7092077297046393, "percentage": 14.18, "elapsed_time": "0:36:28", "remaining_time": "3:40:41", "throughput": 8936.34, "total_tokens": 19557952} +{"current_steps": 29035, "total_steps": 204665, "loss": 0.0834, "lr": 1.989344234326302e-06, "epoch": 0.7093298805364865, "percentage": 14.19, "elapsed_time": "0:36:28", "remaining_time": "3:40:40", "throughput": 8936.36, "total_tokens": 19561088} +{"current_steps": 29040, "total_steps": 204665, "loss": 0.2314, "lr": 1.989331814712824e-06, "epoch": 0.7094520313683336, "percentage": 14.19, "elapsed_time": "0:36:29", "remaining_time": "3:40:40", "throughput": 8936.6, "total_tokens": 19564800} +{"current_steps": 29045, "total_steps": 204665, "loss": 0.1122, "lr": 1.9893193879046594e-06, "epoch": 0.7095741822001808, "percentage": 14.19, "elapsed_time": "0:36:29", "remaining_time": "3:40:39", "throughput": 8936.8, "total_tokens": 19568384} +{"current_steps": 29050, "total_steps": 204665, "loss": 0.1931, "lr": 1.989306953901898e-06, "epoch": 0.709696333032028, "percentage": 14.19, "elapsed_time": "0:36:29", "remaining_time": "3:40:39", "throughput": 8936.81, "total_tokens": 19571456} +{"current_steps": 29055, "total_steps": 204665, "loss": 0.1065, "lr": 1.9892945127046304e-06, "epoch": 0.7098184838638751, "percentage": 14.2, "elapsed_time": "0:36:30", "remaining_time": "3:40:38", "throughput": 8936.99, "total_tokens": 19575040} +{"current_steps": 29060, "total_steps": 204665, "loss": 0.0938, "lr": 1.989282064312947e-06, "epoch": 0.7099406346957223, "percentage": 14.2, "elapsed_time": "0:36:30", "remaining_time": "3:40:37", "throughput": 8936.93, "total_tokens": 19577920} +{"current_steps": 29065, "total_steps": 204665, "loss": 0.0894, "lr": 1.989269608726938e-06, "epoch": 0.7100627855275694, "percentage": 14.2, "elapsed_time": "0:36:31", "remaining_time": "3:40:37", "throughput": 8937.06, "total_tokens": 19581312} +{"current_steps": 29070, "total_steps": 204665, "loss": 0.1275, "lr": 1.9892571459466945e-06, "epoch": 0.7101849363594166, "percentage": 14.2, "elapsed_time": "0:36:31", "remaining_time": "3:40:36", "throughput": 8937.17, "total_tokens": 19584640} +{"current_steps": 29075, "total_steps": 204665, "loss": 0.1724, "lr": 1.9892446759723073e-06, "epoch": 0.7103070871912638, "percentage": 14.21, "elapsed_time": "0:36:31", "remaining_time": "3:40:36", "throughput": 8937.37, "total_tokens": 19588224} +{"current_steps": 29080, "total_steps": 204665, "loss": 0.1144, "lr": 1.989232198803866e-06, "epoch": 0.710429238023111, "percentage": 14.21, "elapsed_time": "0:36:32", "remaining_time": "3:40:35", "throughput": 8937.54, "total_tokens": 19591744} +{"current_steps": 29085, "total_steps": 204665, "loss": 0.0349, "lr": 1.9892197144414627e-06, "epoch": 0.710551388854958, "percentage": 14.21, "elapsed_time": "0:36:32", "remaining_time": "3:40:35", "throughput": 8937.68, "total_tokens": 19595136} +{"current_steps": 29090, "total_steps": 204665, "loss": 0.0683, "lr": 1.9892072228851876e-06, "epoch": 0.7106735396868052, "percentage": 14.21, "elapsed_time": "0:36:32", "remaining_time": "3:40:34", "throughput": 8937.77, "total_tokens": 19598464} +{"current_steps": 29095, "total_steps": 204665, "loss": 0.0898, "lr": 1.9891947241351313e-06, "epoch": 0.7107956905186524, "percentage": 14.22, "elapsed_time": "0:36:33", "remaining_time": "3:40:34", "throughput": 8937.96, "total_tokens": 19602048} +{"current_steps": 29100, "total_steps": 204665, "loss": 0.1653, "lr": 1.989182218191385e-06, "epoch": 0.7109178413504996, "percentage": 14.22, "elapsed_time": "0:36:33", "remaining_time": "3:40:33", "throughput": 8938.18, "total_tokens": 19605696} +{"current_steps": 29105, "total_steps": 204665, "loss": 0.0267, "lr": 1.9891697050540395e-06, "epoch": 0.7110399921823468, "percentage": 14.22, "elapsed_time": "0:36:33", "remaining_time": "3:40:33", "throughput": 8938.24, "total_tokens": 19608896} +{"current_steps": 29110, "total_steps": 204665, "loss": 0.1332, "lr": 1.9891571847231858e-06, "epoch": 0.7111621430141939, "percentage": 14.22, "elapsed_time": "0:36:34", "remaining_time": "3:40:32", "throughput": 8938.51, "total_tokens": 19612672} +{"current_steps": 29115, "total_steps": 204665, "loss": 0.185, "lr": 1.989144657198915e-06, "epoch": 0.7112842938460411, "percentage": 14.23, "elapsed_time": "0:36:34", "remaining_time": "3:40:32", "throughput": 8938.9, "total_tokens": 19616768} +{"current_steps": 29120, "total_steps": 204665, "loss": 0.0358, "lr": 1.989132122481318e-06, "epoch": 0.7114064446778883, "percentage": 14.23, "elapsed_time": "0:36:34", "remaining_time": "3:40:31", "throughput": 8938.9, "total_tokens": 19619776} +{"current_steps": 29125, "total_steps": 204665, "loss": 0.1996, "lr": 1.9891195805704865e-06, "epoch": 0.7115285955097355, "percentage": 14.23, "elapsed_time": "0:36:35", "remaining_time": "3:40:30", "throughput": 8938.99, "total_tokens": 19623040} +{"current_steps": 29130, "total_steps": 204665, "loss": 0.1877, "lr": 1.9891070314665114e-06, "epoch": 0.7116507463415825, "percentage": 14.23, "elapsed_time": "0:36:35", "remaining_time": "3:40:30", "throughput": 8938.95, "total_tokens": 19625984} +{"current_steps": 29135, "total_steps": 204665, "loss": 0.1285, "lr": 1.9890944751694838e-06, "epoch": 0.7117728971734297, "percentage": 14.24, "elapsed_time": "0:36:35", "remaining_time": "3:40:29", "throughput": 8939.39, "total_tokens": 19630272} +{"current_steps": 29140, "total_steps": 204665, "loss": 0.1313, "lr": 1.989081911679495e-06, "epoch": 0.7118950480052769, "percentage": 14.24, "elapsed_time": "0:36:36", "remaining_time": "3:40:29", "throughput": 8939.44, "total_tokens": 19633472} +{"current_steps": 29145, "total_steps": 204665, "loss": 0.1579, "lr": 1.9890693409966366e-06, "epoch": 0.7120171988371241, "percentage": 14.24, "elapsed_time": "0:36:36", "remaining_time": "3:40:28", "throughput": 8939.4, "total_tokens": 19636416} +{"current_steps": 29150, "total_steps": 204665, "loss": 0.1654, "lr": 1.9890567631209996e-06, "epoch": 0.7121393496689713, "percentage": 14.24, "elapsed_time": "0:36:36", "remaining_time": "3:40:28", "throughput": 8939.46, "total_tokens": 19639616} +{"current_steps": 29155, "total_steps": 204665, "loss": 0.2201, "lr": 1.9890441780526764e-06, "epoch": 0.7122615005008184, "percentage": 14.25, "elapsed_time": "0:36:37", "remaining_time": "3:40:27", "throughput": 8939.74, "total_tokens": 19643456} +{"current_steps": 29160, "total_steps": 204665, "loss": 0.0386, "lr": 1.9890315857917577e-06, "epoch": 0.7123836513326656, "percentage": 14.25, "elapsed_time": "0:36:37", "remaining_time": "3:40:27", "throughput": 8939.85, "total_tokens": 19646784} +{"current_steps": 29165, "total_steps": 204665, "loss": 0.1972, "lr": 1.9890189863383354e-06, "epoch": 0.7125058021645128, "percentage": 14.25, "elapsed_time": "0:36:38", "remaining_time": "3:40:26", "throughput": 8939.94, "total_tokens": 19650112} +{"current_steps": 29170, "total_steps": 204665, "loss": 0.1494, "lr": 1.9890063796925006e-06, "epoch": 0.71262795299636, "percentage": 14.25, "elapsed_time": "0:36:38", "remaining_time": "3:40:25", "throughput": 8940.0, "total_tokens": 19653312} +{"current_steps": 29175, "total_steps": 204665, "loss": 0.2056, "lr": 1.988993765854346e-06, "epoch": 0.712750103828207, "percentage": 14.26, "elapsed_time": "0:36:38", "remaining_time": "3:40:25", "throughput": 8940.09, "total_tokens": 19656576} +{"current_steps": 29180, "total_steps": 204665, "loss": 0.1209, "lr": 1.9889811448239625e-06, "epoch": 0.7128722546600542, "percentage": 14.26, "elapsed_time": "0:36:39", "remaining_time": "3:40:24", "throughput": 8940.06, "total_tokens": 19659520} +{"current_steps": 29185, "total_steps": 204665, "loss": 0.1168, "lr": 1.9889685166014417e-06, "epoch": 0.7129944054919014, "percentage": 14.26, "elapsed_time": "0:36:39", "remaining_time": "3:40:24", "throughput": 8940.18, "total_tokens": 19662912} +{"current_steps": 29190, "total_steps": 204665, "loss": 0.102, "lr": 1.988955881186876e-06, "epoch": 0.7131165563237486, "percentage": 14.26, "elapsed_time": "0:36:39", "remaining_time": "3:40:23", "throughput": 8940.25, "total_tokens": 19666176} +{"current_steps": 29195, "total_steps": 204665, "loss": 0.0322, "lr": 1.9889432385803574e-06, "epoch": 0.7132387071555957, "percentage": 14.26, "elapsed_time": "0:36:40", "remaining_time": "3:40:23", "throughput": 8940.5, "total_tokens": 19669888} +{"current_steps": 29200, "total_steps": 204665, "loss": 0.1683, "lr": 1.9889305887819776e-06, "epoch": 0.7133608579874429, "percentage": 14.27, "elapsed_time": "0:36:40", "remaining_time": "3:40:22", "throughput": 8940.51, "total_tokens": 19672960} +{"current_steps": 29205, "total_steps": 204665, "loss": 0.1291, "lr": 1.9889179317918285e-06, "epoch": 0.71348300881929, "percentage": 14.27, "elapsed_time": "0:36:40", "remaining_time": "3:40:21", "throughput": 8940.59, "total_tokens": 19676224} +{"current_steps": 29210, "total_steps": 204665, "loss": 0.0831, "lr": 1.988905267610002e-06, "epoch": 0.7136051596511372, "percentage": 14.27, "elapsed_time": "0:36:41", "remaining_time": "3:40:21", "throughput": 8940.86, "total_tokens": 19680000} +{"current_steps": 29215, "total_steps": 204665, "loss": 0.083, "lr": 1.9888925962365907e-06, "epoch": 0.7137273104829844, "percentage": 14.27, "elapsed_time": "0:36:41", "remaining_time": "3:40:20", "throughput": 8940.96, "total_tokens": 19683328} +{"current_steps": 29220, "total_steps": 204665, "loss": 0.1372, "lr": 1.9888799176716866e-06, "epoch": 0.7138494613148315, "percentage": 14.28, "elapsed_time": "0:36:41", "remaining_time": "3:40:20", "throughput": 8941.04, "total_tokens": 19686592} +{"current_steps": 29225, "total_steps": 204665, "loss": 0.1241, "lr": 1.988867231915381e-06, "epoch": 0.7139716121466787, "percentage": 14.28, "elapsed_time": "0:36:42", "remaining_time": "3:40:19", "throughput": 8941.07, "total_tokens": 19689728} +{"current_steps": 29230, "total_steps": 204665, "loss": 0.1089, "lr": 1.9888545389677675e-06, "epoch": 0.7140937629785259, "percentage": 14.28, "elapsed_time": "0:36:42", "remaining_time": "3:40:19", "throughput": 8941.11, "total_tokens": 19692864} +{"current_steps": 29235, "total_steps": 204665, "loss": 0.1428, "lr": 1.9888418388289376e-06, "epoch": 0.7142159138103731, "percentage": 14.28, "elapsed_time": "0:36:42", "remaining_time": "3:40:18", "throughput": 8941.08, "total_tokens": 19695872} +{"current_steps": 29240, "total_steps": 204665, "loss": 0.0693, "lr": 1.988829131498984e-06, "epoch": 0.7143380646422202, "percentage": 14.29, "elapsed_time": "0:36:43", "remaining_time": "3:40:18", "throughput": 8941.24, "total_tokens": 19699328} +{"current_steps": 29245, "total_steps": 204665, "loss": 0.0795, "lr": 1.9888164169779992e-06, "epoch": 0.7144602154740674, "percentage": 14.29, "elapsed_time": "0:36:43", "remaining_time": "3:40:17", "throughput": 8941.43, "total_tokens": 19702848} +{"current_steps": 29250, "total_steps": 204665, "loss": 0.0835, "lr": 1.9888036952660754e-06, "epoch": 0.7145823663059145, "percentage": 14.29, "elapsed_time": "0:36:43", "remaining_time": "3:40:16", "throughput": 8941.49, "total_tokens": 19706048} +{"current_steps": 29255, "total_steps": 204665, "loss": 0.0817, "lr": 1.9887909663633047e-06, "epoch": 0.7147045171377617, "percentage": 14.29, "elapsed_time": "0:36:44", "remaining_time": "3:40:16", "throughput": 8941.62, "total_tokens": 19709440} +{"current_steps": 29260, "total_steps": 204665, "loss": 0.0933, "lr": 1.9887782302697803e-06, "epoch": 0.7148266679696089, "percentage": 14.3, "elapsed_time": "0:36:44", "remaining_time": "3:40:15", "throughput": 8941.7, "total_tokens": 19712704} +{"current_steps": 29265, "total_steps": 204665, "loss": 0.0394, "lr": 1.988765486985595e-06, "epoch": 0.714948818801456, "percentage": 14.3, "elapsed_time": "0:36:44", "remaining_time": "3:40:15", "throughput": 8941.78, "total_tokens": 19715968} +{"current_steps": 29270, "total_steps": 204665, "loss": 0.0985, "lr": 1.988752736510841e-06, "epoch": 0.7150709696333032, "percentage": 14.3, "elapsed_time": "0:36:45", "remaining_time": "3:40:14", "throughput": 8941.69, "total_tokens": 19718784} +{"current_steps": 29275, "total_steps": 204665, "loss": 0.1016, "lr": 1.9887399788456113e-06, "epoch": 0.7151931204651504, "percentage": 14.3, "elapsed_time": "0:36:45", "remaining_time": "3:40:14", "throughput": 8941.89, "total_tokens": 19722368} +{"current_steps": 29280, "total_steps": 204665, "loss": 0.1463, "lr": 1.988727213989998e-06, "epoch": 0.7153152712969976, "percentage": 14.31, "elapsed_time": "0:36:45", "remaining_time": "3:40:13", "throughput": 8942.1, "total_tokens": 19726016} +{"current_steps": 29285, "total_steps": 204665, "loss": 0.2273, "lr": 1.9887144419440948e-06, "epoch": 0.7154374221288446, "percentage": 14.31, "elapsed_time": "0:36:46", "remaining_time": "3:40:13", "throughput": 8942.18, "total_tokens": 19729280} +{"current_steps": 29290, "total_steps": 204665, "loss": 0.0731, "lr": 1.9887016627079946e-06, "epoch": 0.7155595729606918, "percentage": 14.31, "elapsed_time": "0:36:46", "remaining_time": "3:40:12", "throughput": 8942.33, "total_tokens": 19732736} +{"current_steps": 29295, "total_steps": 204665, "loss": 0.1675, "lr": 1.9886888762817897e-06, "epoch": 0.715681723792539, "percentage": 14.31, "elapsed_time": "0:36:47", "remaining_time": "3:40:12", "throughput": 8942.72, "total_tokens": 19736832} +{"current_steps": 29300, "total_steps": 204665, "loss": 0.1408, "lr": 1.988676082665573e-06, "epoch": 0.7158038746243862, "percentage": 14.32, "elapsed_time": "0:36:47", "remaining_time": "3:40:11", "throughput": 8942.84, "total_tokens": 19740224} +{"current_steps": 29305, "total_steps": 204665, "loss": 0.1125, "lr": 1.9886632818594384e-06, "epoch": 0.7159260254562334, "percentage": 14.32, "elapsed_time": "0:36:47", "remaining_time": "3:40:10", "throughput": 8943.06, "total_tokens": 19743872} +{"current_steps": 29310, "total_steps": 204665, "loss": 0.0717, "lr": 1.988650473863478e-06, "epoch": 0.7160481762880805, "percentage": 14.32, "elapsed_time": "0:36:48", "remaining_time": "3:40:10", "throughput": 8943.14, "total_tokens": 19747136} +{"current_steps": 29315, "total_steps": 204665, "loss": 0.0765, "lr": 1.988637658677786e-06, "epoch": 0.7161703271199277, "percentage": 14.32, "elapsed_time": "0:36:48", "remaining_time": "3:40:09", "throughput": 8943.19, "total_tokens": 19750336} +{"current_steps": 29320, "total_steps": 204665, "loss": 0.1244, "lr": 1.9886248363024545e-06, "epoch": 0.7162924779517749, "percentage": 14.33, "elapsed_time": "0:36:48", "remaining_time": "3:40:09", "throughput": 8943.45, "total_tokens": 19754112} +{"current_steps": 29325, "total_steps": 204665, "loss": 0.0769, "lr": 1.9886120067375777e-06, "epoch": 0.716414628783622, "percentage": 14.33, "elapsed_time": "0:36:49", "remaining_time": "3:40:08", "throughput": 8943.53, "total_tokens": 19757376} +{"current_steps": 29330, "total_steps": 204665, "loss": 0.0164, "lr": 1.9885991699832483e-06, "epoch": 0.7165367796154691, "percentage": 14.33, "elapsed_time": "0:36:49", "remaining_time": "3:40:08", "throughput": 8943.58, "total_tokens": 19760576} +{"current_steps": 29335, "total_steps": 204665, "loss": 0.1532, "lr": 1.98858632603956e-06, "epoch": 0.7166589304473163, "percentage": 14.33, "elapsed_time": "0:36:49", "remaining_time": "3:40:07", "throughput": 8943.64, "total_tokens": 19763776} +{"current_steps": 29340, "total_steps": 204665, "loss": 0.1228, "lr": 1.988573474906606e-06, "epoch": 0.7167810812791635, "percentage": 14.34, "elapsed_time": "0:36:50", "remaining_time": "3:40:07", "throughput": 8944.03, "total_tokens": 19767872} +{"current_steps": 29345, "total_steps": 204665, "loss": 0.2179, "lr": 1.9885606165844796e-06, "epoch": 0.7169032321110107, "percentage": 14.34, "elapsed_time": "0:36:50", "remaining_time": "3:40:06", "throughput": 8944.12, "total_tokens": 19771200} +{"current_steps": 29350, "total_steps": 204665, "loss": 0.0435, "lr": 1.9885477510732745e-06, "epoch": 0.7170253829428579, "percentage": 14.34, "elapsed_time": "0:36:50", "remaining_time": "3:40:06", "throughput": 8944.18, "total_tokens": 19774400} +{"current_steps": 29355, "total_steps": 204665, "loss": 0.1557, "lr": 1.9885348783730843e-06, "epoch": 0.717147533774705, "percentage": 14.34, "elapsed_time": "0:36:51", "remaining_time": "3:40:05", "throughput": 8944.51, "total_tokens": 19778368} +{"current_steps": 29360, "total_steps": 204665, "loss": 0.0719, "lr": 1.9885219984840027e-06, "epoch": 0.7172696846065522, "percentage": 14.35, "elapsed_time": "0:36:51", "remaining_time": "3:40:05", "throughput": 8944.65, "total_tokens": 19781824} +{"current_steps": 29365, "total_steps": 204665, "loss": 0.0791, "lr": 1.9885091114061233e-06, "epoch": 0.7173918354383994, "percentage": 14.35, "elapsed_time": "0:36:51", "remaining_time": "3:40:04", "throughput": 8944.75, "total_tokens": 19785152} +{"current_steps": 29370, "total_steps": 204665, "loss": 0.0856, "lr": 1.9884962171395396e-06, "epoch": 0.7175139862702465, "percentage": 14.35, "elapsed_time": "0:36:52", "remaining_time": "3:40:03", "throughput": 8944.78, "total_tokens": 19788288} +{"current_steps": 29375, "total_steps": 204665, "loss": 0.0835, "lr": 1.9884833156843457e-06, "epoch": 0.7176361371020936, "percentage": 14.35, "elapsed_time": "0:36:52", "remaining_time": "3:40:03", "throughput": 8945.08, "total_tokens": 19792192} +{"current_steps": 29380, "total_steps": 204665, "loss": 0.2071, "lr": 1.988470407040635e-06, "epoch": 0.7177582879339408, "percentage": 14.36, "elapsed_time": "0:36:52", "remaining_time": "3:40:02", "throughput": 8945.19, "total_tokens": 19795520} +{"current_steps": 29385, "total_steps": 204665, "loss": 0.0197, "lr": 1.988457491208502e-06, "epoch": 0.717880438765788, "percentage": 14.36, "elapsed_time": "0:36:53", "remaining_time": "3:40:02", "throughput": 8945.24, "total_tokens": 19798720} +{"current_steps": 29390, "total_steps": 204665, "loss": 0.0873, "lr": 1.9884445681880402e-06, "epoch": 0.7180025895976352, "percentage": 14.36, "elapsed_time": "0:36:53", "remaining_time": "3:40:01", "throughput": 8945.26, "total_tokens": 19801792} +{"current_steps": 29395, "total_steps": 204665, "loss": 0.1053, "lr": 1.9884316379793435e-06, "epoch": 0.7181247404294824, "percentage": 14.36, "elapsed_time": "0:36:54", "remaining_time": "3:40:01", "throughput": 8945.29, "total_tokens": 19804928} +{"current_steps": 29400, "total_steps": 204665, "loss": 0.0368, "lr": 1.9884187005825058e-06, "epoch": 0.7182468912613295, "percentage": 14.36, "elapsed_time": "0:36:54", "remaining_time": "3:40:00", "throughput": 8945.5, "total_tokens": 19808576} +{"current_steps": 29405, "total_steps": 204665, "loss": 0.0519, "lr": 1.988405755997622e-06, "epoch": 0.7183690420931766, "percentage": 14.37, "elapsed_time": "0:36:54", "remaining_time": "3:40:00", "throughput": 8945.74, "total_tokens": 19812288} +{"current_steps": 29410, "total_steps": 204665, "loss": 0.1122, "lr": 1.9883928042247856e-06, "epoch": 0.7184911929250238, "percentage": 14.37, "elapsed_time": "0:36:55", "remaining_time": "3:39:59", "throughput": 8945.89, "total_tokens": 19815744} +{"current_steps": 29415, "total_steps": 204665, "loss": 0.1189, "lr": 1.9883798452640904e-06, "epoch": 0.718613343756871, "percentage": 14.37, "elapsed_time": "0:36:55", "remaining_time": "3:39:59", "throughput": 8945.93, "total_tokens": 19818880} +{"current_steps": 29420, "total_steps": 204665, "loss": 0.3849, "lr": 1.9883668791156316e-06, "epoch": 0.7187354945887181, "percentage": 14.37, "elapsed_time": "0:36:55", "remaining_time": "3:39:58", "throughput": 8945.94, "total_tokens": 19821952} +{"current_steps": 29425, "total_steps": 204665, "loss": 0.1492, "lr": 1.988353905779503e-06, "epoch": 0.7188576454205653, "percentage": 14.38, "elapsed_time": "0:36:56", "remaining_time": "3:39:57", "throughput": 8945.95, "total_tokens": 19825024} +{"current_steps": 29430, "total_steps": 204665, "loss": 0.1241, "lr": 1.9883409252557987e-06, "epoch": 0.7189797962524125, "percentage": 14.38, "elapsed_time": "0:36:56", "remaining_time": "3:39:57", "throughput": 8946.03, "total_tokens": 19828288} +{"current_steps": 29435, "total_steps": 204665, "loss": 0.2149, "lr": 1.9883279375446135e-06, "epoch": 0.7191019470842597, "percentage": 14.38, "elapsed_time": "0:36:56", "remaining_time": "3:39:56", "throughput": 8946.11, "total_tokens": 19831552} +{"current_steps": 29440, "total_steps": 204665, "loss": 0.125, "lr": 1.9883149426460416e-06, "epoch": 0.7192240979161069, "percentage": 14.38, "elapsed_time": "0:36:57", "remaining_time": "3:39:56", "throughput": 8946.15, "total_tokens": 19834688} +{"current_steps": 29445, "total_steps": 204665, "loss": 0.0197, "lr": 1.9883019405601775e-06, "epoch": 0.7193462487479539, "percentage": 14.39, "elapsed_time": "0:36:57", "remaining_time": "3:39:55", "throughput": 8946.23, "total_tokens": 19837952} +{"current_steps": 29450, "total_steps": 204665, "loss": 0.1573, "lr": 1.988288931287116e-06, "epoch": 0.7194683995798011, "percentage": 14.39, "elapsed_time": "0:36:57", "remaining_time": "3:39:55", "throughput": 8946.31, "total_tokens": 19841216} +{"current_steps": 29455, "total_steps": 204665, "loss": 0.1007, "lr": 1.9882759148269517e-06, "epoch": 0.7195905504116483, "percentage": 14.39, "elapsed_time": "0:36:58", "remaining_time": "3:39:54", "throughput": 8946.51, "total_tokens": 19844864} +{"current_steps": 29460, "total_steps": 204665, "loss": 0.075, "lr": 1.988262891179779e-06, "epoch": 0.7197127012434955, "percentage": 14.39, "elapsed_time": "0:36:58", "remaining_time": "3:39:53", "throughput": 8946.55, "total_tokens": 19848000} +{"current_steps": 29465, "total_steps": 204665, "loss": 0.0291, "lr": 1.988249860345693e-06, "epoch": 0.7198348520753426, "percentage": 14.4, "elapsed_time": "0:36:58", "remaining_time": "3:39:53", "throughput": 8946.69, "total_tokens": 19851456} +{"current_steps": 29470, "total_steps": 204665, "loss": 0.1826, "lr": 1.9882368223247883e-06, "epoch": 0.7199570029071898, "percentage": 14.4, "elapsed_time": "0:36:59", "remaining_time": "3:39:52", "throughput": 8946.75, "total_tokens": 19854656} +{"current_steps": 29475, "total_steps": 204665, "loss": 0.1084, "lr": 1.988223777117159e-06, "epoch": 0.720079153739037, "percentage": 14.4, "elapsed_time": "0:36:59", "remaining_time": "3:39:52", "throughput": 8946.67, "total_tokens": 19857472} +{"current_steps": 29480, "total_steps": 204665, "loss": 0.104, "lr": 1.988210724722901e-06, "epoch": 0.7202013045708842, "percentage": 14.4, "elapsed_time": "0:36:59", "remaining_time": "3:39:51", "throughput": 8946.91, "total_tokens": 19861184} +{"current_steps": 29485, "total_steps": 204665, "loss": 0.1386, "lr": 1.988197665142109e-06, "epoch": 0.7203234554027312, "percentage": 14.41, "elapsed_time": "0:37:00", "remaining_time": "3:39:51", "throughput": 8947.06, "total_tokens": 19864640} +{"current_steps": 29490, "total_steps": 204665, "loss": 0.1442, "lr": 1.9881845983748774e-06, "epoch": 0.7204456062345784, "percentage": 14.41, "elapsed_time": "0:37:00", "remaining_time": "3:39:50", "throughput": 8947.23, "total_tokens": 19868160} +{"current_steps": 29495, "total_steps": 204665, "loss": 0.16, "lr": 1.988171524421302e-06, "epoch": 0.7205677570664256, "percentage": 14.41, "elapsed_time": "0:37:00", "remaining_time": "3:39:50", "throughput": 8947.36, "total_tokens": 19871552} +{"current_steps": 29500, "total_steps": 204665, "loss": 0.1296, "lr": 1.9881584432814767e-06, "epoch": 0.7206899078982728, "percentage": 14.41, "elapsed_time": "0:37:01", "remaining_time": "3:39:49", "throughput": 8947.4, "total_tokens": 19874688} +{"current_steps": 29505, "total_steps": 204665, "loss": 0.103, "lr": 1.988145354955498e-06, "epoch": 0.72081205873012, "percentage": 14.42, "elapsed_time": "0:37:01", "remaining_time": "3:39:49", "throughput": 8947.71, "total_tokens": 19878592} +{"current_steps": 29510, "total_steps": 204665, "loss": 0.1291, "lr": 1.9881322594434606e-06, "epoch": 0.7209342095619671, "percentage": 14.42, "elapsed_time": "0:37:01", "remaining_time": "3:39:48", "throughput": 8947.77, "total_tokens": 19881792} +{"current_steps": 29515, "total_steps": 204665, "loss": 0.096, "lr": 1.9881191567454594e-06, "epoch": 0.7210563603938143, "percentage": 14.42, "elapsed_time": "0:37:02", "remaining_time": "3:39:47", "throughput": 8947.76, "total_tokens": 19884800} +{"current_steps": 29520, "total_steps": 204665, "loss": 0.195, "lr": 1.98810604686159e-06, "epoch": 0.7211785112256615, "percentage": 14.42, "elapsed_time": "0:37:02", "remaining_time": "3:39:47", "throughput": 8947.74, "total_tokens": 19887808} +{"current_steps": 29525, "total_steps": 204665, "loss": 0.0749, "lr": 1.9880929297919476e-06, "epoch": 0.7213006620575086, "percentage": 14.43, "elapsed_time": "0:37:03", "remaining_time": "3:39:46", "throughput": 8947.73, "total_tokens": 19890816} +{"current_steps": 29530, "total_steps": 204665, "loss": 0.179, "lr": 1.988079805536628e-06, "epoch": 0.7214228128893557, "percentage": 14.43, "elapsed_time": "0:37:03", "remaining_time": "3:39:46", "throughput": 8947.89, "total_tokens": 19894336} +{"current_steps": 29535, "total_steps": 204665, "loss": 0.1218, "lr": 1.988066674095726e-06, "epoch": 0.7215449637212029, "percentage": 14.43, "elapsed_time": "0:37:03", "remaining_time": "3:39:45", "throughput": 8948.02, "total_tokens": 19897728} +{"current_steps": 29540, "total_steps": 204665, "loss": 0.0808, "lr": 1.988053535469337e-06, "epoch": 0.7216671145530501, "percentage": 14.43, "elapsed_time": "0:37:04", "remaining_time": "3:39:45", "throughput": 8948.13, "total_tokens": 19901056} +{"current_steps": 29545, "total_steps": 204665, "loss": 0.1164, "lr": 1.9880403896575573e-06, "epoch": 0.7217892653848973, "percentage": 14.44, "elapsed_time": "0:37:04", "remaining_time": "3:39:44", "throughput": 8948.23, "total_tokens": 19904384} +{"current_steps": 29550, "total_steps": 204665, "loss": 0.141, "lr": 1.9880272366604824e-06, "epoch": 0.7219114162167445, "percentage": 14.44, "elapsed_time": "0:37:04", "remaining_time": "3:39:43", "throughput": 8948.4, "total_tokens": 19907904} +{"current_steps": 29555, "total_steps": 204665, "loss": 0.1567, "lr": 1.9880140764782074e-06, "epoch": 0.7220335670485916, "percentage": 14.44, "elapsed_time": "0:37:05", "remaining_time": "3:39:43", "throughput": 8948.52, "total_tokens": 19911296} +{"current_steps": 29560, "total_steps": 204665, "loss": 0.1494, "lr": 1.9880009091108284e-06, "epoch": 0.7221557178804388, "percentage": 14.44, "elapsed_time": "0:37:05", "remaining_time": "3:39:42", "throughput": 8948.57, "total_tokens": 19914496} +{"current_steps": 29565, "total_steps": 204665, "loss": 0.0898, "lr": 1.9879877345584412e-06, "epoch": 0.7222778687122859, "percentage": 14.45, "elapsed_time": "0:37:05", "remaining_time": "3:39:42", "throughput": 8948.65, "total_tokens": 19917760} +{"current_steps": 29570, "total_steps": 204665, "loss": 0.0657, "lr": 1.987974552821141e-06, "epoch": 0.7224000195441331, "percentage": 14.45, "elapsed_time": "0:37:06", "remaining_time": "3:39:41", "throughput": 8948.8, "total_tokens": 19921216} +{"current_steps": 29575, "total_steps": 204665, "loss": 0.0873, "lr": 1.9879613638990247e-06, "epoch": 0.7225221703759802, "percentage": 14.45, "elapsed_time": "0:37:06", "remaining_time": "3:39:41", "throughput": 8949.02, "total_tokens": 19924864} +{"current_steps": 29580, "total_steps": 204665, "loss": 0.1493, "lr": 1.987948167792187e-06, "epoch": 0.7226443212078274, "percentage": 14.45, "elapsed_time": "0:37:06", "remaining_time": "3:39:40", "throughput": 8949.07, "total_tokens": 19928064} +{"current_steps": 29585, "total_steps": 204665, "loss": 0.0707, "lr": 1.9879349645007246e-06, "epoch": 0.7227664720396746, "percentage": 14.46, "elapsed_time": "0:37:07", "remaining_time": "3:39:40", "throughput": 8949.24, "total_tokens": 19931584} +{"current_steps": 29590, "total_steps": 204665, "loss": 0.195, "lr": 1.9879217540247338e-06, "epoch": 0.7228886228715218, "percentage": 14.46, "elapsed_time": "0:37:07", "remaining_time": "3:39:39", "throughput": 8949.39, "total_tokens": 19935040} +{"current_steps": 29595, "total_steps": 204665, "loss": 0.1682, "lr": 1.9879085363643102e-06, "epoch": 0.723010773703369, "percentage": 14.46, "elapsed_time": "0:37:07", "remaining_time": "3:39:39", "throughput": 8949.56, "total_tokens": 19938560} +{"current_steps": 29600, "total_steps": 204665, "loss": 0.0726, "lr": 1.9878953115195498e-06, "epoch": 0.723132924535216, "percentage": 14.46, "elapsed_time": "0:37:08", "remaining_time": "3:39:38", "throughput": 8949.59, "total_tokens": 19941696} +{"current_steps": 29605, "total_steps": 204665, "loss": 0.1861, "lr": 1.987882079490549e-06, "epoch": 0.7232550753670632, "percentage": 14.47, "elapsed_time": "0:37:08", "remaining_time": "3:39:38", "throughput": 8950.04, "total_tokens": 19945984} +{"current_steps": 29610, "total_steps": 204665, "loss": 0.0521, "lr": 1.9878688402774042e-06, "epoch": 0.7233772261989104, "percentage": 14.47, "elapsed_time": "0:37:08", "remaining_time": "3:39:37", "throughput": 8950.1, "total_tokens": 19949184} +{"current_steps": 29615, "total_steps": 204665, "loss": 0.0757, "lr": 1.9878555938802115e-06, "epoch": 0.7234993770307576, "percentage": 14.47, "elapsed_time": "0:37:09", "remaining_time": "3:39:36", "throughput": 8950.13, "total_tokens": 19952320} +{"current_steps": 29620, "total_steps": 204665, "loss": 0.0802, "lr": 1.987842340299067e-06, "epoch": 0.7236215278626047, "percentage": 14.47, "elapsed_time": "0:37:09", "remaining_time": "3:39:36", "throughput": 8950.13, "total_tokens": 19955392} +{"current_steps": 29625, "total_steps": 204665, "loss": 0.1038, "lr": 1.9878290795340674e-06, "epoch": 0.7237436786944519, "percentage": 14.47, "elapsed_time": "0:37:09", "remaining_time": "3:39:35", "throughput": 8950.3, "total_tokens": 19958912} +{"current_steps": 29630, "total_steps": 204665, "loss": 0.0661, "lr": 1.9878158115853088e-06, "epoch": 0.7238658295262991, "percentage": 14.48, "elapsed_time": "0:37:10", "remaining_time": "3:39:35", "throughput": 8950.45, "total_tokens": 19962368} +{"current_steps": 29635, "total_steps": 204665, "loss": 0.1026, "lr": 1.9878025364528883e-06, "epoch": 0.7239879803581463, "percentage": 14.48, "elapsed_time": "0:37:10", "remaining_time": "3:39:34", "throughput": 8950.57, "total_tokens": 19965760} +{"current_steps": 29640, "total_steps": 204665, "loss": 0.1179, "lr": 1.987789254136902e-06, "epoch": 0.7241101311899935, "percentage": 14.48, "elapsed_time": "0:37:11", "remaining_time": "3:39:34", "throughput": 8950.72, "total_tokens": 19969216} +{"current_steps": 29645, "total_steps": 204665, "loss": 0.1176, "lr": 1.987775964637447e-06, "epoch": 0.7242322820218405, "percentage": 14.48, "elapsed_time": "0:37:11", "remaining_time": "3:39:33", "throughput": 8950.92, "total_tokens": 19972800} +{"current_steps": 29650, "total_steps": 204665, "loss": 0.0449, "lr": 1.9877626679546185e-06, "epoch": 0.7243544328536877, "percentage": 14.49, "elapsed_time": "0:37:11", "remaining_time": "3:39:33", "throughput": 8951.04, "total_tokens": 19976192} +{"current_steps": 29655, "total_steps": 204665, "loss": 0.073, "lr": 1.987749364088515e-06, "epoch": 0.7244765836855349, "percentage": 14.49, "elapsed_time": "0:37:12", "remaining_time": "3:39:32", "throughput": 8951.17, "total_tokens": 19979648} +{"current_steps": 29660, "total_steps": 204665, "loss": 0.1078, "lr": 1.987736053039232e-06, "epoch": 0.7245987345173821, "percentage": 14.49, "elapsed_time": "0:37:12", "remaining_time": "3:39:32", "throughput": 8951.39, "total_tokens": 19983296} +{"current_steps": 29665, "total_steps": 204665, "loss": 0.1702, "lr": 1.987722734806867e-06, "epoch": 0.7247208853492292, "percentage": 14.49, "elapsed_time": "0:37:12", "remaining_time": "3:39:31", "throughput": 8951.6, "total_tokens": 19986944} +{"current_steps": 29670, "total_steps": 204665, "loss": 0.1022, "lr": 1.9877094093915166e-06, "epoch": 0.7248430361810764, "percentage": 14.5, "elapsed_time": "0:37:13", "remaining_time": "3:39:31", "throughput": 8951.8, "total_tokens": 19990528} +{"current_steps": 29675, "total_steps": 204665, "loss": 0.0221, "lr": 1.9876960767932775e-06, "epoch": 0.7249651870129236, "percentage": 14.5, "elapsed_time": "0:37:13", "remaining_time": "3:39:30", "throughput": 8951.84, "total_tokens": 19993664} +{"current_steps": 29680, "total_steps": 204665, "loss": 0.1655, "lr": 1.9876827370122472e-06, "epoch": 0.7250873378447708, "percentage": 14.5, "elapsed_time": "0:37:13", "remaining_time": "3:39:30", "throughput": 8952.09, "total_tokens": 19997440} +{"current_steps": 29685, "total_steps": 204665, "loss": 0.1892, "lr": 1.987669390048522e-06, "epoch": 0.7252094886766179, "percentage": 14.5, "elapsed_time": "0:37:14", "remaining_time": "3:39:29", "throughput": 8952.17, "total_tokens": 20000704} +{"current_steps": 29690, "total_steps": 204665, "loss": 0.0763, "lr": 1.9876560359021997e-06, "epoch": 0.725331639508465, "percentage": 14.51, "elapsed_time": "0:37:14", "remaining_time": "3:39:28", "throughput": 8952.22, "total_tokens": 20003904} +{"current_steps": 29695, "total_steps": 204665, "loss": 0.1608, "lr": 1.9876426745733768e-06, "epoch": 0.7254537903403122, "percentage": 14.51, "elapsed_time": "0:37:14", "remaining_time": "3:39:28", "throughput": 8952.26, "total_tokens": 20007040} +{"current_steps": 29700, "total_steps": 204665, "loss": 0.1339, "lr": 1.9876293060621507e-06, "epoch": 0.7255759411721594, "percentage": 14.51, "elapsed_time": "0:37:15", "remaining_time": "3:39:27", "throughput": 8952.39, "total_tokens": 20010496} +{"current_steps": 29705, "total_steps": 204665, "loss": 0.0026, "lr": 1.987615930368619e-06, "epoch": 0.7256980920040066, "percentage": 14.51, "elapsed_time": "0:37:15", "remaining_time": "3:39:27", "throughput": 8952.45, "total_tokens": 20013696} +{"current_steps": 29710, "total_steps": 204665, "loss": 0.0895, "lr": 1.987602547492878e-06, "epoch": 0.7258202428358537, "percentage": 14.52, "elapsed_time": "0:37:15", "remaining_time": "3:39:26", "throughput": 8952.52, "total_tokens": 20016960} +{"current_steps": 29715, "total_steps": 204665, "loss": 0.2477, "lr": 1.987589157435026e-06, "epoch": 0.7259423936677009, "percentage": 14.52, "elapsed_time": "0:37:16", "remaining_time": "3:39:26", "throughput": 8952.63, "total_tokens": 20020288} +{"current_steps": 29720, "total_steps": 204665, "loss": 0.1913, "lr": 1.98757576019516e-06, "epoch": 0.726064544499548, "percentage": 14.52, "elapsed_time": "0:37:16", "remaining_time": "3:39:25", "throughput": 8952.64, "total_tokens": 20023360} +{"current_steps": 29725, "total_steps": 204665, "loss": 0.0992, "lr": 1.9875623557733777e-06, "epoch": 0.7261866953313952, "percentage": 14.52, "elapsed_time": "0:37:16", "remaining_time": "3:39:24", "throughput": 8952.74, "total_tokens": 20026688} +{"current_steps": 29730, "total_steps": 204665, "loss": 0.1794, "lr": 1.9875489441697764e-06, "epoch": 0.7263088461632423, "percentage": 14.53, "elapsed_time": "0:37:17", "remaining_time": "3:39:24", "throughput": 8952.81, "total_tokens": 20029952} +{"current_steps": 29735, "total_steps": 204665, "loss": 0.0604, "lr": 1.987535525384453e-06, "epoch": 0.7264309969950895, "percentage": 14.53, "elapsed_time": "0:37:17", "remaining_time": "3:39:23", "throughput": 8953.01, "total_tokens": 20033536} +{"current_steps": 29740, "total_steps": 204665, "loss": 0.0791, "lr": 1.9875220994175058e-06, "epoch": 0.7265531478269367, "percentage": 14.53, "elapsed_time": "0:37:17", "remaining_time": "3:39:23", "throughput": 8953.13, "total_tokens": 20036928} +{"current_steps": 29745, "total_steps": 204665, "loss": 0.0879, "lr": 1.987508666269033e-06, "epoch": 0.7266752986587839, "percentage": 14.53, "elapsed_time": "0:37:18", "remaining_time": "3:39:22", "throughput": 8953.11, "total_tokens": 20039936} +{"current_steps": 29750, "total_steps": 204665, "loss": 0.1627, "lr": 1.987495225939131e-06, "epoch": 0.7267974494906311, "percentage": 14.54, "elapsed_time": "0:37:18", "remaining_time": "3:39:22", "throughput": 8953.29, "total_tokens": 20043456} +{"current_steps": 29755, "total_steps": 204665, "loss": 0.0406, "lr": 1.987481778427898e-06, "epoch": 0.7269196003224782, "percentage": 14.54, "elapsed_time": "0:37:19", "remaining_time": "3:39:21", "throughput": 8953.3, "total_tokens": 20046592} +{"current_steps": 29760, "total_steps": 204665, "loss": 0.0608, "lr": 1.9874683237354317e-06, "epoch": 0.7270417511543253, "percentage": 14.54, "elapsed_time": "0:37:19", "remaining_time": "3:39:21", "throughput": 8953.47, "total_tokens": 20050112} +{"current_steps": 29765, "total_steps": 204665, "loss": 0.0841, "lr": 1.9874548618618303e-06, "epoch": 0.7271639019861725, "percentage": 14.54, "elapsed_time": "0:37:19", "remaining_time": "3:39:20", "throughput": 8953.51, "total_tokens": 20053312} +{"current_steps": 29770, "total_steps": 204665, "loss": 0.1837, "lr": 1.987441392807192e-06, "epoch": 0.7272860528180197, "percentage": 14.55, "elapsed_time": "0:37:20", "remaining_time": "3:39:20", "throughput": 8953.66, "total_tokens": 20056768} +{"current_steps": 29775, "total_steps": 204665, "loss": 0.1468, "lr": 1.987427916571614e-06, "epoch": 0.7274082036498668, "percentage": 14.55, "elapsed_time": "0:37:20", "remaining_time": "3:39:19", "throughput": 8953.67, "total_tokens": 20059840} +{"current_steps": 29780, "total_steps": 204665, "loss": 0.1255, "lr": 1.9874144331551946e-06, "epoch": 0.727530354481714, "percentage": 14.55, "elapsed_time": "0:37:20", "remaining_time": "3:39:18", "throughput": 8953.86, "total_tokens": 20063424} +{"current_steps": 29785, "total_steps": 204665, "loss": 0.2606, "lr": 1.9874009425580317e-06, "epoch": 0.7276525053135612, "percentage": 14.55, "elapsed_time": "0:37:21", "remaining_time": "3:39:18", "throughput": 8953.96, "total_tokens": 20066752} +{"current_steps": 29790, "total_steps": 204665, "loss": 0.0847, "lr": 1.9873874447802236e-06, "epoch": 0.7277746561454084, "percentage": 14.56, "elapsed_time": "0:37:21", "remaining_time": "3:39:17", "throughput": 8953.92, "total_tokens": 20069696} +{"current_steps": 29795, "total_steps": 204665, "loss": 0.1515, "lr": 1.9873739398218687e-06, "epoch": 0.7278968069772556, "percentage": 14.56, "elapsed_time": "0:37:21", "remaining_time": "3:39:17", "throughput": 8953.97, "total_tokens": 20072896} +{"current_steps": 29800, "total_steps": 204665, "loss": 0.1143, "lr": 1.9873604276830647e-06, "epoch": 0.7280189578091026, "percentage": 14.56, "elapsed_time": "0:37:22", "remaining_time": "3:39:16", "throughput": 8954.03, "total_tokens": 20076096} +{"current_steps": 29805, "total_steps": 204665, "loss": 0.1678, "lr": 1.9873469083639103e-06, "epoch": 0.7281411086409498, "percentage": 14.56, "elapsed_time": "0:37:22", "remaining_time": "3:39:16", "throughput": 8954.12, "total_tokens": 20079424} +{"current_steps": 29810, "total_steps": 204665, "loss": 0.1527, "lr": 1.9873333818645033e-06, "epoch": 0.728263259472797, "percentage": 14.57, "elapsed_time": "0:37:22", "remaining_time": "3:39:15", "throughput": 8954.22, "total_tokens": 20082752} +{"current_steps": 29815, "total_steps": 204665, "loss": 0.0715, "lr": 1.987319848184943e-06, "epoch": 0.7283854103046442, "percentage": 14.57, "elapsed_time": "0:37:23", "remaining_time": "3:39:15", "throughput": 8954.47, "total_tokens": 20086528} +{"current_steps": 29820, "total_steps": 204665, "loss": 0.0958, "lr": 1.987306307325327e-06, "epoch": 0.7285075611364913, "percentage": 14.57, "elapsed_time": "0:37:23", "remaining_time": "3:39:14", "throughput": 8954.63, "total_tokens": 20089984} +{"current_steps": 29825, "total_steps": 204665, "loss": 0.0715, "lr": 1.9872927592857535e-06, "epoch": 0.7286297119683385, "percentage": 14.57, "elapsed_time": "0:37:23", "remaining_time": "3:39:14", "throughput": 8954.79, "total_tokens": 20093504} +{"current_steps": 29830, "total_steps": 204665, "loss": 0.0669, "lr": 1.987279204066322e-06, "epoch": 0.7287518628001857, "percentage": 14.58, "elapsed_time": "0:37:24", "remaining_time": "3:39:13", "throughput": 8954.83, "total_tokens": 20096640} +{"current_steps": 29835, "total_steps": 204665, "loss": 0.05, "lr": 1.98726564166713e-06, "epoch": 0.7288740136320329, "percentage": 14.58, "elapsed_time": "0:37:24", "remaining_time": "3:39:12", "throughput": 8954.87, "total_tokens": 20099776} +{"current_steps": 29840, "total_steps": 204665, "loss": 0.1161, "lr": 1.987252072088277e-06, "epoch": 0.72899616446388, "percentage": 14.58, "elapsed_time": "0:37:24", "remaining_time": "3:39:12", "throughput": 8954.85, "total_tokens": 20102784} +{"current_steps": 29845, "total_steps": 204665, "loss": 0.2201, "lr": 1.9872384953298615e-06, "epoch": 0.7291183152957271, "percentage": 14.58, "elapsed_time": "0:37:25", "remaining_time": "3:39:11", "throughput": 8954.88, "total_tokens": 20105920} +{"current_steps": 29850, "total_steps": 204665, "loss": 0.0443, "lr": 1.987224911391982e-06, "epoch": 0.7292404661275743, "percentage": 14.58, "elapsed_time": "0:37:25", "remaining_time": "3:39:11", "throughput": 8954.98, "total_tokens": 20109248} +{"current_steps": 29855, "total_steps": 204665, "loss": 0.0409, "lr": 1.987211320274738e-06, "epoch": 0.7293626169594215, "percentage": 14.59, "elapsed_time": "0:37:25", "remaining_time": "3:39:10", "throughput": 8955.03, "total_tokens": 20112448} +{"current_steps": 29860, "total_steps": 204665, "loss": 0.1766, "lr": 1.987197721978227e-06, "epoch": 0.7294847677912687, "percentage": 14.59, "elapsed_time": "0:37:26", "remaining_time": "3:39:10", "throughput": 8955.03, "total_tokens": 20115520} +{"current_steps": 29865, "total_steps": 204665, "loss": 0.2882, "lr": 1.987184116502549e-06, "epoch": 0.7296069186231158, "percentage": 14.59, "elapsed_time": "0:37:26", "remaining_time": "3:39:09", "throughput": 8955.11, "total_tokens": 20118784} +{"current_steps": 29870, "total_steps": 204665, "loss": 0.1965, "lr": 1.9871705038478025e-06, "epoch": 0.729729069454963, "percentage": 14.59, "elapsed_time": "0:37:26", "remaining_time": "3:39:08", "throughput": 8955.02, "total_tokens": 20121600} +{"current_steps": 29875, "total_steps": 204665, "loss": 0.1923, "lr": 1.9871568840140863e-06, "epoch": 0.7298512202868102, "percentage": 14.6, "elapsed_time": "0:37:27", "remaining_time": "3:39:08", "throughput": 8955.03, "total_tokens": 20124672} +{"current_steps": 29880, "total_steps": 204665, "loss": 0.0443, "lr": 1.9871432570015e-06, "epoch": 0.7299733711186573, "percentage": 14.6, "elapsed_time": "0:37:27", "remaining_time": "3:39:07", "throughput": 8955.17, "total_tokens": 20128128} +{"current_steps": 29885, "total_steps": 204665, "loss": 0.1228, "lr": 1.9871296228101426e-06, "epoch": 0.7300955219505045, "percentage": 14.6, "elapsed_time": "0:37:27", "remaining_time": "3:39:07", "throughput": 8955.22, "total_tokens": 20131328} +{"current_steps": 29890, "total_steps": 204665, "loss": 0.0921, "lr": 1.9871159814401127e-06, "epoch": 0.7302176727823516, "percentage": 14.6, "elapsed_time": "0:37:28", "remaining_time": "3:39:06", "throughput": 8955.37, "total_tokens": 20134784} +{"current_steps": 29895, "total_steps": 204665, "loss": 0.1063, "lr": 1.9871023328915102e-06, "epoch": 0.7303398236141988, "percentage": 14.61, "elapsed_time": "0:37:28", "remaining_time": "3:39:06", "throughput": 8955.37, "total_tokens": 20137856} +{"current_steps": 29900, "total_steps": 204665, "loss": 0.0474, "lr": 1.987088677164434e-06, "epoch": 0.730461974446046, "percentage": 14.61, "elapsed_time": "0:37:29", "remaining_time": "3:39:05", "throughput": 8955.53, "total_tokens": 20141376} +{"current_steps": 29905, "total_steps": 204665, "loss": 0.076, "lr": 1.9870750142589835e-06, "epoch": 0.7305841252778932, "percentage": 14.61, "elapsed_time": "0:37:29", "remaining_time": "3:39:05", "throughput": 8955.87, "total_tokens": 20145408} +{"current_steps": 29910, "total_steps": 204665, "loss": 0.0828, "lr": 1.987061344175258e-06, "epoch": 0.7307062761097403, "percentage": 14.61, "elapsed_time": "0:37:29", "remaining_time": "3:39:04", "throughput": 8955.86, "total_tokens": 20148416} +{"current_steps": 29915, "total_steps": 204665, "loss": 0.2602, "lr": 1.9870476669133566e-06, "epoch": 0.7308284269415875, "percentage": 14.62, "elapsed_time": "0:37:30", "remaining_time": "3:39:04", "throughput": 8955.89, "total_tokens": 20151552} +{"current_steps": 29920, "total_steps": 204665, "loss": 0.1584, "lr": 1.987033982473379e-06, "epoch": 0.7309505777734346, "percentage": 14.62, "elapsed_time": "0:37:30", "remaining_time": "3:39:03", "throughput": 8955.95, "total_tokens": 20154752} +{"current_steps": 29925, "total_steps": 204665, "loss": 0.2126, "lr": 1.9870202908554253e-06, "epoch": 0.7310727286052818, "percentage": 14.62, "elapsed_time": "0:37:30", "remaining_time": "3:39:02", "throughput": 8956.05, "total_tokens": 20158080} +{"current_steps": 29930, "total_steps": 204665, "loss": 0.069, "lr": 1.9870065920595942e-06, "epoch": 0.731194879437129, "percentage": 14.62, "elapsed_time": "0:37:31", "remaining_time": "3:39:02", "throughput": 8956.27, "total_tokens": 20161792} +{"current_steps": 29935, "total_steps": 204665, "loss": 0.1502, "lr": 1.986992886085986e-06, "epoch": 0.7313170302689761, "percentage": 14.63, "elapsed_time": "0:37:31", "remaining_time": "3:39:01", "throughput": 8956.27, "total_tokens": 20164800} +{"current_steps": 29940, "total_steps": 204665, "loss": 0.1002, "lr": 1.9869791729347e-06, "epoch": 0.7314391811008233, "percentage": 14.63, "elapsed_time": "0:37:31", "remaining_time": "3:39:01", "throughput": 8956.23, "total_tokens": 20167744} +{"current_steps": 29945, "total_steps": 204665, "loss": 0.1387, "lr": 1.986965452605836e-06, "epoch": 0.7315613319326705, "percentage": 14.63, "elapsed_time": "0:37:32", "remaining_time": "3:39:00", "throughput": 8956.33, "total_tokens": 20171072} +{"current_steps": 29950, "total_steps": 204665, "loss": 0.0676, "lr": 1.9869517250994932e-06, "epoch": 0.7316834827645177, "percentage": 14.63, "elapsed_time": "0:37:32", "remaining_time": "3:39:00", "throughput": 8956.52, "total_tokens": 20174656} +{"current_steps": 29955, "total_steps": 204665, "loss": 0.1438, "lr": 1.9869379904157724e-06, "epoch": 0.7318056335963647, "percentage": 14.64, "elapsed_time": "0:37:32", "remaining_time": "3:38:59", "throughput": 8956.62, "total_tokens": 20177984} +{"current_steps": 29960, "total_steps": 204665, "loss": 0.0504, "lr": 1.9869242485547734e-06, "epoch": 0.7319277844282119, "percentage": 14.64, "elapsed_time": "0:37:33", "remaining_time": "3:38:59", "throughput": 8956.78, "total_tokens": 20181504} +{"current_steps": 29965, "total_steps": 204665, "loss": 0.1732, "lr": 1.9869104995165957e-06, "epoch": 0.7320499352600591, "percentage": 14.64, "elapsed_time": "0:37:33", "remaining_time": "3:38:58", "throughput": 8956.83, "total_tokens": 20184704} +{"current_steps": 29970, "total_steps": 204665, "loss": 0.0326, "lr": 1.986896743301339e-06, "epoch": 0.7321720860919063, "percentage": 14.64, "elapsed_time": "0:37:33", "remaining_time": "3:38:58", "throughput": 8957.12, "total_tokens": 20188608} +{"current_steps": 29975, "total_steps": 204665, "loss": 0.1004, "lr": 1.986882979909104e-06, "epoch": 0.7322942369237535, "percentage": 14.65, "elapsed_time": "0:37:34", "remaining_time": "3:38:57", "throughput": 8957.38, "total_tokens": 20192384} +{"current_steps": 29980, "total_steps": 204665, "loss": 0.1361, "lr": 1.9868692093399905e-06, "epoch": 0.7324163877556006, "percentage": 14.65, "elapsed_time": "0:37:34", "remaining_time": "3:38:56", "throughput": 8957.32, "total_tokens": 20195264} +{"current_steps": 29985, "total_steps": 204665, "loss": 0.0881, "lr": 1.986855431594099e-06, "epoch": 0.7325385385874478, "percentage": 14.65, "elapsed_time": "0:37:34", "remaining_time": "3:38:56", "throughput": 8957.65, "total_tokens": 20199232} +{"current_steps": 29990, "total_steps": 204665, "loss": 0.0377, "lr": 1.986841646671529e-06, "epoch": 0.732660689419295, "percentage": 14.65, "elapsed_time": "0:37:35", "remaining_time": "3:38:56", "throughput": 8958.01, "total_tokens": 20203264} +{"current_steps": 29995, "total_steps": 204665, "loss": 0.1068, "lr": 1.986827854572381e-06, "epoch": 0.7327828402511422, "percentage": 14.66, "elapsed_time": "0:37:35", "remaining_time": "3:38:55", "throughput": 8958.12, "total_tokens": 20206592} +{"current_steps": 30000, "total_steps": 204665, "loss": 0.1614, "lr": 1.9868140552967555e-06, "epoch": 0.7329049910829892, "percentage": 14.66, "elapsed_time": "0:37:36", "remaining_time": "3:38:54", "throughput": 8958.2, "total_tokens": 20209856} +{"current_steps": 30005, "total_steps": 204665, "loss": 0.0859, "lr": 1.986800248844753e-06, "epoch": 0.7330271419148364, "percentage": 14.66, "elapsed_time": "0:37:36", "remaining_time": "3:38:54", "throughput": 8958.37, "total_tokens": 20213376} +{"current_steps": 30010, "total_steps": 204665, "loss": 0.1404, "lr": 1.9867864352164735e-06, "epoch": 0.7331492927466836, "percentage": 14.66, "elapsed_time": "0:37:36", "remaining_time": "3:38:53", "throughput": 8958.46, "total_tokens": 20216704} +{"current_steps": 30015, "total_steps": 204665, "loss": 0.1487, "lr": 1.9867726144120173e-06, "epoch": 0.7332714435785308, "percentage": 14.67, "elapsed_time": "0:37:37", "remaining_time": "3:38:53", "throughput": 8958.53, "total_tokens": 20219968} +{"current_steps": 30020, "total_steps": 204665, "loss": 0.0727, "lr": 1.9867587864314858e-06, "epoch": 0.7333935944103779, "percentage": 14.67, "elapsed_time": "0:37:37", "remaining_time": "3:38:52", "throughput": 8958.68, "total_tokens": 20223424} +{"current_steps": 30025, "total_steps": 204665, "loss": 0.095, "lr": 1.9867449512749787e-06, "epoch": 0.7335157452422251, "percentage": 14.67, "elapsed_time": "0:37:37", "remaining_time": "3:38:52", "throughput": 8958.76, "total_tokens": 20226688} +{"current_steps": 30030, "total_steps": 204665, "loss": 0.0888, "lr": 1.986731108942597e-06, "epoch": 0.7336378960740723, "percentage": 14.67, "elapsed_time": "0:37:38", "remaining_time": "3:38:51", "throughput": 8958.81, "total_tokens": 20229888} +{"current_steps": 30035, "total_steps": 204665, "loss": 0.0408, "lr": 1.9867172594344415e-06, "epoch": 0.7337600469059194, "percentage": 14.68, "elapsed_time": "0:37:38", "remaining_time": "3:38:51", "throughput": 8958.95, "total_tokens": 20233344} +{"current_steps": 30040, "total_steps": 204665, "loss": 0.0662, "lr": 1.986703402750612e-06, "epoch": 0.7338821977377666, "percentage": 14.68, "elapsed_time": "0:37:38", "remaining_time": "3:38:50", "throughput": 8959.06, "total_tokens": 20236736} +{"current_steps": 30045, "total_steps": 204665, "loss": 0.1591, "lr": 1.9866895388912107e-06, "epoch": 0.7340043485696137, "percentage": 14.68, "elapsed_time": "0:37:39", "remaining_time": "3:38:50", "throughput": 8959.18, "total_tokens": 20240128} +{"current_steps": 30050, "total_steps": 204665, "loss": 0.1238, "lr": 1.9866756678563375e-06, "epoch": 0.7341264994014609, "percentage": 14.68, "elapsed_time": "0:37:39", "remaining_time": "3:38:49", "throughput": 8959.15, "total_tokens": 20243072} +{"current_steps": 30055, "total_steps": 204665, "loss": 0.1967, "lr": 1.9866617896460936e-06, "epoch": 0.7342486502333081, "percentage": 14.68, "elapsed_time": "0:37:39", "remaining_time": "3:38:48", "throughput": 8959.24, "total_tokens": 20246400} +{"current_steps": 30060, "total_steps": 204665, "loss": 0.1251, "lr": 1.9866479042605794e-06, "epoch": 0.7343708010651553, "percentage": 14.69, "elapsed_time": "0:37:40", "remaining_time": "3:38:48", "throughput": 8959.36, "total_tokens": 20249792} +{"current_steps": 30065, "total_steps": 204665, "loss": 0.0821, "lr": 1.9866340116998965e-06, "epoch": 0.7344929518970024, "percentage": 14.69, "elapsed_time": "0:37:40", "remaining_time": "3:38:47", "throughput": 8959.54, "total_tokens": 20253376} +{"current_steps": 30070, "total_steps": 204665, "loss": 0.0891, "lr": 1.986620111964146e-06, "epoch": 0.7346151027288496, "percentage": 14.69, "elapsed_time": "0:37:40", "remaining_time": "3:38:47", "throughput": 8959.69, "total_tokens": 20256832} +{"current_steps": 30075, "total_steps": 204665, "loss": 0.3907, "lr": 1.986606205053428e-06, "epoch": 0.7347372535606967, "percentage": 14.69, "elapsed_time": "0:37:41", "remaining_time": "3:38:46", "throughput": 8959.85, "total_tokens": 20260352} +{"current_steps": 30080, "total_steps": 204665, "loss": 0.0612, "lr": 1.9865922909678444e-06, "epoch": 0.7348594043925439, "percentage": 14.7, "elapsed_time": "0:37:41", "remaining_time": "3:38:46", "throughput": 8959.92, "total_tokens": 20263616} +{"current_steps": 30085, "total_steps": 204665, "loss": 0.1698, "lr": 1.9865783697074965e-06, "epoch": 0.7349815552243911, "percentage": 14.7, "elapsed_time": "0:37:41", "remaining_time": "3:38:45", "throughput": 8959.99, "total_tokens": 20266880} +{"current_steps": 30090, "total_steps": 204665, "loss": 0.0698, "lr": 1.9865644412724857e-06, "epoch": 0.7351037060562382, "percentage": 14.7, "elapsed_time": "0:37:42", "remaining_time": "3:38:45", "throughput": 8960.14, "total_tokens": 20270336} +{"current_steps": 30095, "total_steps": 204665, "loss": 0.1139, "lr": 1.9865505056629122e-06, "epoch": 0.7352258568880854, "percentage": 14.7, "elapsed_time": "0:37:42", "remaining_time": "3:38:44", "throughput": 8960.14, "total_tokens": 20273408} +{"current_steps": 30100, "total_steps": 204665, "loss": 0.193, "lr": 1.9865365628788787e-06, "epoch": 0.7353480077199326, "percentage": 14.71, "elapsed_time": "0:37:42", "remaining_time": "3:38:44", "throughput": 8960.24, "total_tokens": 20276736} +{"current_steps": 30105, "total_steps": 204665, "loss": 0.0898, "lr": 1.9865226129204858e-06, "epoch": 0.7354701585517798, "percentage": 14.71, "elapsed_time": "0:37:43", "remaining_time": "3:38:43", "throughput": 8960.41, "total_tokens": 20280256} +{"current_steps": 30110, "total_steps": 204665, "loss": 0.0696, "lr": 1.9865086557878348e-06, "epoch": 0.7355923093836269, "percentage": 14.71, "elapsed_time": "0:37:43", "remaining_time": "3:38:43", "throughput": 8960.45, "total_tokens": 20283456} +{"current_steps": 30115, "total_steps": 204665, "loss": 0.1215, "lr": 1.9864946914810278e-06, "epoch": 0.735714460215474, "percentage": 14.71, "elapsed_time": "0:37:44", "remaining_time": "3:38:42", "throughput": 8960.52, "total_tokens": 20286720} +{"current_steps": 30120, "total_steps": 204665, "loss": 0.1099, "lr": 1.986480720000166e-06, "epoch": 0.7358366110473212, "percentage": 14.72, "elapsed_time": "0:37:44", "remaining_time": "3:38:41", "throughput": 8960.57, "total_tokens": 20289920} +{"current_steps": 30125, "total_steps": 204665, "loss": 0.1411, "lr": 1.986466741345351e-06, "epoch": 0.7359587618791684, "percentage": 14.72, "elapsed_time": "0:37:44", "remaining_time": "3:38:41", "throughput": 8960.68, "total_tokens": 20293312} +{"current_steps": 30130, "total_steps": 204665, "loss": 0.196, "lr": 1.9864527555166844e-06, "epoch": 0.7360809127110156, "percentage": 14.72, "elapsed_time": "0:37:45", "remaining_time": "3:38:40", "throughput": 8960.75, "total_tokens": 20296576} +{"current_steps": 30135, "total_steps": 204665, "loss": 0.0724, "lr": 1.986438762514269e-06, "epoch": 0.7362030635428627, "percentage": 14.72, "elapsed_time": "0:37:45", "remaining_time": "3:38:40", "throughput": 8961.0, "total_tokens": 20300352} +{"current_steps": 30140, "total_steps": 204665, "loss": 0.1201, "lr": 1.9864247623382046e-06, "epoch": 0.7363252143747099, "percentage": 14.73, "elapsed_time": "0:37:45", "remaining_time": "3:38:39", "throughput": 8961.07, "total_tokens": 20303616} +{"current_steps": 30145, "total_steps": 204665, "loss": 0.039, "lr": 1.986410754988594e-06, "epoch": 0.7364473652065571, "percentage": 14.73, "elapsed_time": "0:37:46", "remaining_time": "3:38:39", "throughput": 8961.15, "total_tokens": 20306880} +{"current_steps": 30150, "total_steps": 204665, "loss": 0.1245, "lr": 1.9863967404655397e-06, "epoch": 0.7365695160384043, "percentage": 14.73, "elapsed_time": "0:37:46", "remaining_time": "3:38:38", "throughput": 8961.35, "total_tokens": 20310464} +{"current_steps": 30155, "total_steps": 204665, "loss": 0.2526, "lr": 1.9863827187691423e-06, "epoch": 0.7366916668702513, "percentage": 14.73, "elapsed_time": "0:37:46", "remaining_time": "3:38:38", "throughput": 8961.42, "total_tokens": 20313728} +{"current_steps": 30160, "total_steps": 204665, "loss": 0.0782, "lr": 1.986368689899505e-06, "epoch": 0.7368138177020985, "percentage": 14.74, "elapsed_time": "0:37:47", "remaining_time": "3:38:37", "throughput": 8961.43, "total_tokens": 20316800} +{"current_steps": 30165, "total_steps": 204665, "loss": 0.0681, "lr": 1.9863546538567292e-06, "epoch": 0.7369359685339457, "percentage": 14.74, "elapsed_time": "0:37:47", "remaining_time": "3:38:37", "throughput": 8961.6, "total_tokens": 20320320} +{"current_steps": 30170, "total_steps": 204665, "loss": 0.0784, "lr": 1.9863406106409165e-06, "epoch": 0.7370581193657929, "percentage": 14.74, "elapsed_time": "0:37:47", "remaining_time": "3:38:36", "throughput": 8961.69, "total_tokens": 20323648} +{"current_steps": 30175, "total_steps": 204665, "loss": 0.0968, "lr": 1.9863265602521703e-06, "epoch": 0.7371802701976401, "percentage": 14.74, "elapsed_time": "0:37:48", "remaining_time": "3:38:36", "throughput": 8961.84, "total_tokens": 20327104} +{"current_steps": 30180, "total_steps": 204665, "loss": 0.1154, "lr": 1.9863125026905917e-06, "epoch": 0.7373024210294872, "percentage": 14.75, "elapsed_time": "0:37:48", "remaining_time": "3:38:35", "throughput": 8961.94, "total_tokens": 20330432} +{"current_steps": 30185, "total_steps": 204665, "loss": 0.218, "lr": 1.9862984379562833e-06, "epoch": 0.7374245718613344, "percentage": 14.75, "elapsed_time": "0:37:48", "remaining_time": "3:38:34", "throughput": 8962.01, "total_tokens": 20333696} +{"current_steps": 30190, "total_steps": 204665, "loss": 0.1431, "lr": 1.9862843660493475e-06, "epoch": 0.7375467226931816, "percentage": 14.75, "elapsed_time": "0:37:49", "remaining_time": "3:38:34", "throughput": 8962.14, "total_tokens": 20337088} +{"current_steps": 30195, "total_steps": 204665, "loss": 0.1025, "lr": 1.9862702869698865e-06, "epoch": 0.7376688735250287, "percentage": 14.75, "elapsed_time": "0:37:49", "remaining_time": "3:38:33", "throughput": 8962.1, "total_tokens": 20340032} +{"current_steps": 30200, "total_steps": 204665, "loss": 0.0184, "lr": 1.986256200718003e-06, "epoch": 0.7377910243568758, "percentage": 14.76, "elapsed_time": "0:37:49", "remaining_time": "3:38:33", "throughput": 8962.18, "total_tokens": 20343296} +{"current_steps": 30205, "total_steps": 204665, "loss": 0.2651, "lr": 1.9862421072937986e-06, "epoch": 0.737913175188723, "percentage": 14.76, "elapsed_time": "0:37:50", "remaining_time": "3:38:32", "throughput": 8962.16, "total_tokens": 20346304} +{"current_steps": 30210, "total_steps": 204665, "loss": 0.0288, "lr": 1.9862280066973765e-06, "epoch": 0.7380353260205702, "percentage": 14.76, "elapsed_time": "0:37:50", "remaining_time": "3:38:32", "throughput": 8962.34, "total_tokens": 20349888} +{"current_steps": 30215, "total_steps": 204665, "loss": 0.1911, "lr": 1.9862138989288393e-06, "epoch": 0.7381574768524174, "percentage": 14.76, "elapsed_time": "0:37:50", "remaining_time": "3:38:31", "throughput": 8962.5, "total_tokens": 20353408} +{"current_steps": 30220, "total_steps": 204665, "loss": 0.1025, "lr": 1.986199783988289e-06, "epoch": 0.7382796276842646, "percentage": 14.77, "elapsed_time": "0:37:51", "remaining_time": "3:38:31", "throughput": 8962.48, "total_tokens": 20356416} +{"current_steps": 30225, "total_steps": 204665, "loss": 0.2797, "lr": 1.9861856618758292e-06, "epoch": 0.7384017785161117, "percentage": 14.77, "elapsed_time": "0:37:51", "remaining_time": "3:38:30", "throughput": 8962.45, "total_tokens": 20359360} +{"current_steps": 30230, "total_steps": 204665, "loss": 0.137, "lr": 1.9861715325915612e-06, "epoch": 0.7385239293479589, "percentage": 14.77, "elapsed_time": "0:37:51", "remaining_time": "3:38:29", "throughput": 8962.49, "total_tokens": 20362560} +{"current_steps": 30235, "total_steps": 204665, "loss": 0.1808, "lr": 1.986157396135589e-06, "epoch": 0.738646080179806, "percentage": 14.77, "elapsed_time": "0:37:52", "remaining_time": "3:38:29", "throughput": 8962.54, "total_tokens": 20365760} +{"current_steps": 30240, "total_steps": 204665, "loss": 0.111, "lr": 1.986143252508015e-06, "epoch": 0.7387682310116532, "percentage": 14.78, "elapsed_time": "0:37:52", "remaining_time": "3:38:28", "throughput": 8962.55, "total_tokens": 20368832} +{"current_steps": 30245, "total_steps": 204665, "loss": 0.0594, "lr": 1.986129101708942e-06, "epoch": 0.7388903818435003, "percentage": 14.78, "elapsed_time": "0:37:53", "remaining_time": "3:38:28", "throughput": 8962.56, "total_tokens": 20371904} +{"current_steps": 30250, "total_steps": 204665, "loss": 0.1885, "lr": 1.986114943738473e-06, "epoch": 0.7390125326753475, "percentage": 14.78, "elapsed_time": "0:37:53", "remaining_time": "3:38:27", "throughput": 8962.62, "total_tokens": 20375168} +{"current_steps": 30255, "total_steps": 204665, "loss": 0.0938, "lr": 1.986100778596711e-06, "epoch": 0.7391346835071947, "percentage": 14.78, "elapsed_time": "0:37:53", "remaining_time": "3:38:27", "throughput": 8962.71, "total_tokens": 20378496} +{"current_steps": 30260, "total_steps": 204665, "loss": 0.0634, "lr": 1.9860866062837584e-06, "epoch": 0.7392568343390419, "percentage": 14.79, "elapsed_time": "0:37:54", "remaining_time": "3:38:26", "throughput": 8962.9, "total_tokens": 20382080} +{"current_steps": 30265, "total_steps": 204665, "loss": 0.0541, "lr": 1.986072426799719e-06, "epoch": 0.7393789851708891, "percentage": 14.79, "elapsed_time": "0:37:54", "remaining_time": "3:38:26", "throughput": 8963.08, "total_tokens": 20385664} +{"current_steps": 30270, "total_steps": 204665, "loss": 0.191, "lr": 1.9860582401446957e-06, "epoch": 0.7395011360027361, "percentage": 14.79, "elapsed_time": "0:37:54", "remaining_time": "3:38:25", "throughput": 8963.17, "total_tokens": 20388992} +{"current_steps": 30275, "total_steps": 204665, "loss": 0.0786, "lr": 1.986044046318792e-06, "epoch": 0.7396232868345833, "percentage": 14.79, "elapsed_time": "0:37:55", "remaining_time": "3:38:25", "throughput": 8963.3, "total_tokens": 20392448} +{"current_steps": 30280, "total_steps": 204665, "loss": 0.113, "lr": 1.986029845322111e-06, "epoch": 0.7397454376664305, "percentage": 14.79, "elapsed_time": "0:37:55", "remaining_time": "3:38:24", "throughput": 8963.32, "total_tokens": 20395584} +{"current_steps": 30285, "total_steps": 204665, "loss": 0.142, "lr": 1.986015637154755e-06, "epoch": 0.7398675884982777, "percentage": 14.8, "elapsed_time": "0:37:55", "remaining_time": "3:38:23", "throughput": 8963.45, "total_tokens": 20399040} +{"current_steps": 30290, "total_steps": 204665, "loss": 0.1325, "lr": 1.9860014218168283e-06, "epoch": 0.7399897393301248, "percentage": 14.8, "elapsed_time": "0:37:56", "remaining_time": "3:38:23", "throughput": 8963.59, "total_tokens": 20402496} +{"current_steps": 30295, "total_steps": 204665, "loss": 0.1215, "lr": 1.985987199308434e-06, "epoch": 0.740111890161972, "percentage": 14.8, "elapsed_time": "0:37:56", "remaining_time": "3:38:22", "throughput": 8963.72, "total_tokens": 20405952} +{"current_steps": 30300, "total_steps": 204665, "loss": 0.1274, "lr": 1.985972969629676e-06, "epoch": 0.7402340409938192, "percentage": 14.8, "elapsed_time": "0:37:56", "remaining_time": "3:38:22", "throughput": 8963.94, "total_tokens": 20409600} +{"current_steps": 30305, "total_steps": 204665, "loss": 0.207, "lr": 1.985958732780657e-06, "epoch": 0.7403561918256664, "percentage": 14.81, "elapsed_time": "0:37:57", "remaining_time": "3:38:21", "throughput": 8964.13, "total_tokens": 20413184} +{"current_steps": 30310, "total_steps": 204665, "loss": 0.1524, "lr": 1.985944488761481e-06, "epoch": 0.7404783426575134, "percentage": 14.81, "elapsed_time": "0:37:57", "remaining_time": "3:38:21", "throughput": 8964.32, "total_tokens": 20416768} +{"current_steps": 30315, "total_steps": 204665, "loss": 0.1147, "lr": 1.9859302375722514e-06, "epoch": 0.7406004934893606, "percentage": 14.81, "elapsed_time": "0:37:57", "remaining_time": "3:38:20", "throughput": 8964.41, "total_tokens": 20420096} +{"current_steps": 30320, "total_steps": 204665, "loss": 0.1366, "lr": 1.985915979213072e-06, "epoch": 0.7407226443212078, "percentage": 14.81, "elapsed_time": "0:37:58", "remaining_time": "3:38:20", "throughput": 8964.33, "total_tokens": 20422912} +{"current_steps": 30325, "total_steps": 204665, "loss": 0.1295, "lr": 1.9859017136840465e-06, "epoch": 0.740844795153055, "percentage": 14.82, "elapsed_time": "0:37:58", "remaining_time": "3:38:19", "throughput": 8964.46, "total_tokens": 20426304} +{"current_steps": 30330, "total_steps": 204665, "loss": 0.0794, "lr": 1.9858874409852786e-06, "epoch": 0.7409669459849022, "percentage": 14.82, "elapsed_time": "0:37:58", "remaining_time": "3:38:19", "throughput": 8964.58, "total_tokens": 20429696} +{"current_steps": 30335, "total_steps": 204665, "loss": 0.1226, "lr": 1.9858731611168713e-06, "epoch": 0.7410890968167493, "percentage": 14.82, "elapsed_time": "0:37:59", "remaining_time": "3:38:18", "throughput": 8964.68, "total_tokens": 20433024} +{"current_steps": 30340, "total_steps": 204665, "loss": 0.0475, "lr": 1.9858588740789304e-06, "epoch": 0.7412112476485965, "percentage": 14.82, "elapsed_time": "0:37:59", "remaining_time": "3:38:18", "throughput": 8964.81, "total_tokens": 20436416} +{"current_steps": 30345, "total_steps": 204665, "loss": 0.0868, "lr": 1.985844579871558e-06, "epoch": 0.7413333984804437, "percentage": 14.83, "elapsed_time": "0:37:59", "remaining_time": "3:38:17", "throughput": 8964.95, "total_tokens": 20439872} +{"current_steps": 30350, "total_steps": 204665, "loss": 0.0766, "lr": 1.9858302784948587e-06, "epoch": 0.7414555493122909, "percentage": 14.83, "elapsed_time": "0:38:00", "remaining_time": "3:38:17", "throughput": 8965.05, "total_tokens": 20443200} +{"current_steps": 30355, "total_steps": 204665, "loss": 0.0788, "lr": 1.9858159699489364e-06, "epoch": 0.7415777001441379, "percentage": 14.83, "elapsed_time": "0:38:00", "remaining_time": "3:38:16", "throughput": 8965.16, "total_tokens": 20446592} +{"current_steps": 30360, "total_steps": 204665, "loss": 0.116, "lr": 1.9858016542338954e-06, "epoch": 0.7416998509759851, "percentage": 14.83, "elapsed_time": "0:38:01", "remaining_time": "3:38:15", "throughput": 8965.25, "total_tokens": 20449920} +{"current_steps": 30365, "total_steps": 204665, "loss": 0.0391, "lr": 1.9857873313498394e-06, "epoch": 0.7418220018078323, "percentage": 14.84, "elapsed_time": "0:38:01", "remaining_time": "3:38:15", "throughput": 8965.39, "total_tokens": 20453376} +{"current_steps": 30370, "total_steps": 204665, "loss": 0.1101, "lr": 1.9857730012968727e-06, "epoch": 0.7419441526396795, "percentage": 14.84, "elapsed_time": "0:38:01", "remaining_time": "3:38:14", "throughput": 8965.53, "total_tokens": 20456832} +{"current_steps": 30375, "total_steps": 204665, "loss": 0.1149, "lr": 1.9857586640750997e-06, "epoch": 0.7420663034715267, "percentage": 14.84, "elapsed_time": "0:38:02", "remaining_time": "3:38:14", "throughput": 8965.59, "total_tokens": 20460096} +{"current_steps": 30380, "total_steps": 204665, "loss": 0.0846, "lr": 1.985744319684625e-06, "epoch": 0.7421884543033738, "percentage": 14.84, "elapsed_time": "0:38:02", "remaining_time": "3:38:13", "throughput": 8965.81, "total_tokens": 20463744} +{"current_steps": 30385, "total_steps": 204665, "loss": 0.1214, "lr": 1.985729968125552e-06, "epoch": 0.742310605135221, "percentage": 14.85, "elapsed_time": "0:38:02", "remaining_time": "3:38:13", "throughput": 8965.96, "total_tokens": 20467264} +{"current_steps": 30390, "total_steps": 204665, "loss": 0.1327, "lr": 1.9857156093979857e-06, "epoch": 0.7424327559670681, "percentage": 14.85, "elapsed_time": "0:38:03", "remaining_time": "3:38:12", "throughput": 8966.09, "total_tokens": 20470720} +{"current_steps": 30395, "total_steps": 204665, "loss": 0.0937, "lr": 1.9857012435020303e-06, "epoch": 0.7425549067989153, "percentage": 14.85, "elapsed_time": "0:38:03", "remaining_time": "3:38:12", "throughput": 8966.08, "total_tokens": 20473728} +{"current_steps": 30400, "total_steps": 204665, "loss": 0.0842, "lr": 1.98568687043779e-06, "epoch": 0.7426770576307624, "percentage": 14.85, "elapsed_time": "0:38:03", "remaining_time": "3:38:11", "throughput": 8966.14, "total_tokens": 20476992} +{"current_steps": 30405, "total_steps": 204665, "loss": 0.1288, "lr": 1.98567249020537e-06, "epoch": 0.7427992084626096, "percentage": 14.86, "elapsed_time": "0:38:04", "remaining_time": "3:38:11", "throughput": 8966.29, "total_tokens": 20480512} +{"current_steps": 30410, "total_steps": 204665, "loss": 0.1023, "lr": 1.9856581028048746e-06, "epoch": 0.7429213592944568, "percentage": 14.86, "elapsed_time": "0:38:04", "remaining_time": "3:38:10", "throughput": 8966.37, "total_tokens": 20483776} +{"current_steps": 30415, "total_steps": 204665, "loss": 0.01, "lr": 1.9856437082364084e-06, "epoch": 0.743043510126304, "percentage": 14.86, "elapsed_time": "0:38:04", "remaining_time": "3:38:10", "throughput": 8966.49, "total_tokens": 20487168} +{"current_steps": 30420, "total_steps": 204665, "loss": 0.1419, "lr": 1.9856293065000763e-06, "epoch": 0.7431656609581512, "percentage": 14.86, "elapsed_time": "0:38:05", "remaining_time": "3:38:09", "throughput": 8966.56, "total_tokens": 20490432} +{"current_steps": 30425, "total_steps": 204665, "loss": 0.233, "lr": 1.9856148975959824e-06, "epoch": 0.7432878117899983, "percentage": 14.87, "elapsed_time": "0:38:05", "remaining_time": "3:38:09", "throughput": 8966.71, "total_tokens": 20493888} +{"current_steps": 30430, "total_steps": 204665, "loss": 0.1279, "lr": 1.9856004815242317e-06, "epoch": 0.7434099626218454, "percentage": 14.87, "elapsed_time": "0:38:05", "remaining_time": "3:38:08", "throughput": 8966.72, "total_tokens": 20496960} +{"current_steps": 30435, "total_steps": 204665, "loss": 0.1186, "lr": 1.9855860582849293e-06, "epoch": 0.7435321134536926, "percentage": 14.87, "elapsed_time": "0:38:06", "remaining_time": "3:38:07", "throughput": 8966.86, "total_tokens": 20500480} +{"current_steps": 30440, "total_steps": 204665, "loss": 0.0902, "lr": 1.9855716278781802e-06, "epoch": 0.7436542642855398, "percentage": 14.87, "elapsed_time": "0:38:06", "remaining_time": "3:38:07", "throughput": 8966.97, "total_tokens": 20503808} +{"current_steps": 30445, "total_steps": 204665, "loss": 0.0876, "lr": 1.985557190304089e-06, "epoch": 0.7437764151173869, "percentage": 14.88, "elapsed_time": "0:38:06", "remaining_time": "3:38:06", "throughput": 8967.04, "total_tokens": 20507072} +{"current_steps": 30450, "total_steps": 204665, "loss": 0.1953, "lr": 1.985542745562761e-06, "epoch": 0.7438985659492341, "percentage": 14.88, "elapsed_time": "0:38:07", "remaining_time": "3:38:06", "throughput": 8967.11, "total_tokens": 20510336} +{"current_steps": 30455, "total_steps": 204665, "loss": 0.1771, "lr": 1.9855282936543007e-06, "epoch": 0.7440207167810813, "percentage": 14.88, "elapsed_time": "0:38:07", "remaining_time": "3:38:05", "throughput": 8967.14, "total_tokens": 20513472} +{"current_steps": 30460, "total_steps": 204665, "loss": 0.1286, "lr": 1.985513834578814e-06, "epoch": 0.7441428676129285, "percentage": 14.88, "elapsed_time": "0:38:07", "remaining_time": "3:38:05", "throughput": 8967.42, "total_tokens": 20517312} +{"current_steps": 30465, "total_steps": 204665, "loss": 0.0696, "lr": 1.9854993683364056e-06, "epoch": 0.7442650184447757, "percentage": 14.89, "elapsed_time": "0:38:08", "remaining_time": "3:38:04", "throughput": 8967.59, "total_tokens": 20520832} +{"current_steps": 30470, "total_steps": 204665, "loss": 0.0721, "lr": 1.9854848949271804e-06, "epoch": 0.7443871692766227, "percentage": 14.89, "elapsed_time": "0:38:08", "remaining_time": "3:38:04", "throughput": 8967.61, "total_tokens": 20523904} +{"current_steps": 30475, "total_steps": 204665, "loss": 0.0882, "lr": 1.985470414351244e-06, "epoch": 0.7445093201084699, "percentage": 14.89, "elapsed_time": "0:38:09", "remaining_time": "3:38:03", "throughput": 8967.77, "total_tokens": 20527424} +{"current_steps": 30480, "total_steps": 204665, "loss": 0.1156, "lr": 1.985455926608702e-06, "epoch": 0.7446314709403171, "percentage": 14.89, "elapsed_time": "0:38:09", "remaining_time": "3:38:03", "throughput": 8967.89, "total_tokens": 20530816} +{"current_steps": 30485, "total_steps": 204665, "loss": 0.1396, "lr": 1.985441431699659e-06, "epoch": 0.7447536217721643, "percentage": 14.9, "elapsed_time": "0:38:09", "remaining_time": "3:38:02", "throughput": 8968.01, "total_tokens": 20534208} +{"current_steps": 30490, "total_steps": 204665, "loss": 0.0677, "lr": 1.9854269296242216e-06, "epoch": 0.7448757726040114, "percentage": 14.9, "elapsed_time": "0:38:10", "remaining_time": "3:38:02", "throughput": 8968.17, "total_tokens": 20537664} +{"current_steps": 30495, "total_steps": 204665, "loss": 0.084, "lr": 1.9854124203824936e-06, "epoch": 0.7449979234358586, "percentage": 14.9, "elapsed_time": "0:38:10", "remaining_time": "3:38:01", "throughput": 8968.15, "total_tokens": 20540672} +{"current_steps": 30500, "total_steps": 204665, "loss": 0.0468, "lr": 1.985397903974582e-06, "epoch": 0.7451200742677058, "percentage": 14.9, "elapsed_time": "0:38:10", "remaining_time": "3:38:00", "throughput": 8968.19, "total_tokens": 20543808} +{"current_steps": 30505, "total_steps": 204665, "loss": 0.196, "lr": 1.985383380400592e-06, "epoch": 0.745242225099553, "percentage": 14.9, "elapsed_time": "0:38:11", "remaining_time": "3:38:00", "throughput": 8968.31, "total_tokens": 20547200} +{"current_steps": 30510, "total_steps": 204665, "loss": 0.2718, "lr": 1.9853688496606286e-06, "epoch": 0.7453643759314001, "percentage": 14.91, "elapsed_time": "0:38:11", "remaining_time": "3:37:59", "throughput": 8968.47, "total_tokens": 20550720} +{"current_steps": 30515, "total_steps": 204665, "loss": 0.0283, "lr": 1.985354311754798e-06, "epoch": 0.7454865267632472, "percentage": 14.91, "elapsed_time": "0:38:11", "remaining_time": "3:37:59", "throughput": 8968.47, "total_tokens": 20553792} +{"current_steps": 30520, "total_steps": 204665, "loss": 0.0974, "lr": 1.985339766683206e-06, "epoch": 0.7456086775950944, "percentage": 14.91, "elapsed_time": "0:38:12", "remaining_time": "3:37:58", "throughput": 8968.57, "total_tokens": 20557120} +{"current_steps": 30525, "total_steps": 204665, "loss": 0.0844, "lr": 1.985325214445958e-06, "epoch": 0.7457308284269416, "percentage": 14.91, "elapsed_time": "0:38:12", "remaining_time": "3:37:58", "throughput": 8968.6, "total_tokens": 20560256} +{"current_steps": 30530, "total_steps": 204665, "loss": 0.0386, "lr": 1.98531065504316e-06, "epoch": 0.7458529792587888, "percentage": 14.92, "elapsed_time": "0:38:12", "remaining_time": "3:37:57", "throughput": 8968.69, "total_tokens": 20563584} +{"current_steps": 30535, "total_steps": 204665, "loss": 0.2243, "lr": 1.985296088474918e-06, "epoch": 0.7459751300906359, "percentage": 14.92, "elapsed_time": "0:38:13", "remaining_time": "3:37:57", "throughput": 8968.79, "total_tokens": 20566912} +{"current_steps": 30540, "total_steps": 204665, "loss": 0.2049, "lr": 1.9852815147413376e-06, "epoch": 0.7460972809224831, "percentage": 14.92, "elapsed_time": "0:38:13", "remaining_time": "3:37:56", "throughput": 8968.87, "total_tokens": 20570176} +{"current_steps": 30545, "total_steps": 204665, "loss": 0.0925, "lr": 1.985266933842525e-06, "epoch": 0.7462194317543303, "percentage": 14.92, "elapsed_time": "0:38:13", "remaining_time": "3:37:55", "throughput": 8968.96, "total_tokens": 20573440} +{"current_steps": 30550, "total_steps": 204665, "loss": 0.1478, "lr": 1.9852523457785864e-06, "epoch": 0.7463415825861774, "percentage": 14.93, "elapsed_time": "0:38:14", "remaining_time": "3:37:55", "throughput": 8969.05, "total_tokens": 20576768} +{"current_steps": 30555, "total_steps": 204665, "loss": 0.077, "lr": 1.985237750549628e-06, "epoch": 0.7464637334180245, "percentage": 14.93, "elapsed_time": "0:38:14", "remaining_time": "3:37:54", "throughput": 8969.27, "total_tokens": 20580416} +{"current_steps": 30560, "total_steps": 204665, "loss": 0.1403, "lr": 1.9852231481557556e-06, "epoch": 0.7465858842498717, "percentage": 14.93, "elapsed_time": "0:38:14", "remaining_time": "3:37:54", "throughput": 8969.28, "total_tokens": 20583488} +{"current_steps": 30565, "total_steps": 204665, "loss": 0.1793, "lr": 1.985208538597075e-06, "epoch": 0.7467080350817189, "percentage": 14.93, "elapsed_time": "0:38:15", "remaining_time": "3:37:53", "throughput": 8969.61, "total_tokens": 20587520} +{"current_steps": 30570, "total_steps": 204665, "loss": 0.0606, "lr": 1.9851939218736937e-06, "epoch": 0.7468301859135661, "percentage": 14.94, "elapsed_time": "0:38:15", "remaining_time": "3:37:53", "throughput": 8969.83, "total_tokens": 20591232} +{"current_steps": 30575, "total_steps": 204665, "loss": 0.0999, "lr": 1.9851792979857166e-06, "epoch": 0.7469523367454133, "percentage": 14.94, "elapsed_time": "0:38:15", "remaining_time": "3:37:52", "throughput": 8970.02, "total_tokens": 20594816} +{"current_steps": 30580, "total_steps": 204665, "loss": 0.1943, "lr": 1.9851646669332507e-06, "epoch": 0.7470744875772604, "percentage": 14.94, "elapsed_time": "0:38:16", "remaining_time": "3:37:52", "throughput": 8970.03, "total_tokens": 20597888} +{"current_steps": 30585, "total_steps": 204665, "loss": 0.1239, "lr": 1.9851500287164023e-06, "epoch": 0.7471966384091075, "percentage": 14.94, "elapsed_time": "0:38:16", "remaining_time": "3:37:51", "throughput": 8970.2, "total_tokens": 20601472} +{"current_steps": 30590, "total_steps": 204665, "loss": 0.1193, "lr": 1.985135383335278e-06, "epoch": 0.7473187892409547, "percentage": 14.95, "elapsed_time": "0:38:17", "remaining_time": "3:37:51", "throughput": 8970.33, "total_tokens": 20604864} +{"current_steps": 30595, "total_steps": 204665, "loss": 0.0523, "lr": 1.9851207307899847e-06, "epoch": 0.7474409400728019, "percentage": 14.95, "elapsed_time": "0:38:17", "remaining_time": "3:37:50", "throughput": 8970.48, "total_tokens": 20608384} +{"current_steps": 30600, "total_steps": 204665, "loss": 0.0525, "lr": 1.985106071080628e-06, "epoch": 0.747563090904649, "percentage": 14.95, "elapsed_time": "0:38:17", "remaining_time": "3:37:50", "throughput": 8970.58, "total_tokens": 20611712} +{"current_steps": 30605, "total_steps": 204665, "loss": 0.0778, "lr": 1.985091404207315e-06, "epoch": 0.7476852417364962, "percentage": 14.95, "elapsed_time": "0:38:18", "remaining_time": "3:37:49", "throughput": 8970.68, "total_tokens": 20615104} +{"current_steps": 30610, "total_steps": 204665, "loss": 0.0828, "lr": 1.9850767301701523e-06, "epoch": 0.7478073925683434, "percentage": 14.96, "elapsed_time": "0:38:18", "remaining_time": "3:37:49", "throughput": 8970.77, "total_tokens": 20618432} +{"current_steps": 30615, "total_steps": 204665, "loss": 0.1881, "lr": 1.985062048969247e-06, "epoch": 0.7479295434001906, "percentage": 14.96, "elapsed_time": "0:38:18", "remaining_time": "3:37:48", "throughput": 8970.86, "total_tokens": 20621760} +{"current_steps": 30620, "total_steps": 204665, "loss": 0.1091, "lr": 1.985047360604705e-06, "epoch": 0.7480516942320378, "percentage": 14.96, "elapsed_time": "0:38:19", "remaining_time": "3:37:48", "throughput": 8971.01, "total_tokens": 20625280} +{"current_steps": 30625, "total_steps": 204665, "loss": 0.0786, "lr": 1.9850326650766343e-06, "epoch": 0.7481738450638848, "percentage": 14.96, "elapsed_time": "0:38:19", "remaining_time": "3:37:47", "throughput": 8971.1, "total_tokens": 20628608} +{"current_steps": 30630, "total_steps": 204665, "loss": 0.0865, "lr": 1.985017962385141e-06, "epoch": 0.748295995895732, "percentage": 14.97, "elapsed_time": "0:38:19", "remaining_time": "3:37:47", "throughput": 8971.12, "total_tokens": 20631744} +{"current_steps": 30635, "total_steps": 204665, "loss": 0.0841, "lr": 1.985003252530332e-06, "epoch": 0.7484181467275792, "percentage": 14.97, "elapsed_time": "0:38:20", "remaining_time": "3:37:46", "throughput": 8971.25, "total_tokens": 20635200} +{"current_steps": 30640, "total_steps": 204665, "loss": 0.0835, "lr": 1.984988535512314e-06, "epoch": 0.7485402975594264, "percentage": 14.97, "elapsed_time": "0:38:20", "remaining_time": "3:37:46", "throughput": 8971.49, "total_tokens": 20638912} +{"current_steps": 30645, "total_steps": 204665, "loss": 0.098, "lr": 1.984973811331195e-06, "epoch": 0.7486624483912735, "percentage": 14.97, "elapsed_time": "0:38:20", "remaining_time": "3:37:45", "throughput": 8971.62, "total_tokens": 20642368} +{"current_steps": 30650, "total_steps": 204665, "loss": 0.1273, "lr": 1.9849590799870813e-06, "epoch": 0.7487845992231207, "percentage": 14.98, "elapsed_time": "0:38:21", "remaining_time": "3:37:45", "throughput": 8971.94, "total_tokens": 20646336} +{"current_steps": 30655, "total_steps": 204665, "loss": 0.0843, "lr": 1.98494434148008e-06, "epoch": 0.7489067500549679, "percentage": 14.98, "elapsed_time": "0:38:21", "remaining_time": "3:37:44", "throughput": 8972.01, "total_tokens": 20649600} +{"current_steps": 30660, "total_steps": 204665, "loss": 0.0638, "lr": 1.984929595810299e-06, "epoch": 0.7490289008868151, "percentage": 14.98, "elapsed_time": "0:38:21", "remaining_time": "3:37:44", "throughput": 8972.19, "total_tokens": 20653184} +{"current_steps": 30665, "total_steps": 204665, "loss": 0.1266, "lr": 1.984914842977845e-06, "epoch": 0.7491510517186623, "percentage": 14.98, "elapsed_time": "0:38:22", "remaining_time": "3:37:43", "throughput": 8972.18, "total_tokens": 20656192} +{"current_steps": 30670, "total_steps": 204665, "loss": 0.2753, "lr": 1.984900082982825e-06, "epoch": 0.7492732025505093, "percentage": 14.99, "elapsed_time": "0:38:22", "remaining_time": "3:37:42", "throughput": 8972.14, "total_tokens": 20659136} +{"current_steps": 30675, "total_steps": 204665, "loss": 0.061, "lr": 1.9848853158253472e-06, "epoch": 0.7493953533823565, "percentage": 14.99, "elapsed_time": "0:38:22", "remaining_time": "3:37:42", "throughput": 8972.21, "total_tokens": 20662400} +{"current_steps": 30680, "total_steps": 204665, "loss": 0.2228, "lr": 1.984870541505518e-06, "epoch": 0.7495175042142037, "percentage": 14.99, "elapsed_time": "0:38:23", "remaining_time": "3:37:41", "throughput": 8972.36, "total_tokens": 20665856} +{"current_steps": 30685, "total_steps": 204665, "loss": 0.0521, "lr": 1.9848557600234453e-06, "epoch": 0.7496396550460509, "percentage": 14.99, "elapsed_time": "0:38:23", "remaining_time": "3:37:41", "throughput": 8972.55, "total_tokens": 20669440} +{"current_steps": 30690, "total_steps": 204665, "loss": 0.1166, "lr": 1.984840971379237e-06, "epoch": 0.749761805877898, "percentage": 15.0, "elapsed_time": "0:38:23", "remaining_time": "3:37:40", "throughput": 8972.6, "total_tokens": 20672640} +{"current_steps": 30695, "total_steps": 204665, "loss": 0.0697, "lr": 1.9848261755730002e-06, "epoch": 0.7498839567097452, "percentage": 15.0, "elapsed_time": "0:38:24", "remaining_time": "3:37:40", "throughput": 8972.76, "total_tokens": 20676160} +{"current_steps": 30700, "total_steps": 204665, "loss": 0.0459, "lr": 1.9848113726048427e-06, "epoch": 0.7500061075415924, "percentage": 15.0, "elapsed_time": "0:38:24", "remaining_time": "3:37:39", "throughput": 8972.83, "total_tokens": 20679424} +{"current_steps": 30702, "total_steps": 204665, "eval_loss": 0.1214575320482254, "epoch": 0.7500549678743312, "percentage": 15.0, "elapsed_time": "0:39:12", "remaining_time": "3:42:11", "throughput": 8789.74, "total_tokens": 20680640} +{"current_steps": 30705, "total_steps": 204665, "loss": 0.0963, "lr": 1.9847965624748717e-06, "epoch": 0.7501282583734395, "percentage": 15.0, "elapsed_time": "0:39:50", "remaining_time": "3:45:46", "throughput": 8650.51, "total_tokens": 20683008} +{"current_steps": 30710, "total_steps": 204665, "loss": 0.0733, "lr": 1.9847817451831952e-06, "epoch": 0.7502504092052867, "percentage": 15.01, "elapsed_time": "0:39:51", "remaining_time": "3:45:45", "throughput": 8650.67, "total_tokens": 20686400} +{"current_steps": 30715, "total_steps": 204665, "loss": 0.1472, "lr": 1.9847669207299212e-06, "epoch": 0.7503725600371338, "percentage": 15.01, "elapsed_time": "0:39:51", "remaining_time": "3:45:44", "throughput": 8650.93, "total_tokens": 20690048} +{"current_steps": 30720, "total_steps": 204665, "loss": 0.1717, "lr": 1.984752089115157e-06, "epoch": 0.750494710868981, "percentage": 15.01, "elapsed_time": "0:39:52", "remaining_time": "3:45:44", "throughput": 8651.22, "total_tokens": 20693824} +{"current_steps": 30725, "total_steps": 204665, "loss": 0.0115, "lr": 1.9847372503390106e-06, "epoch": 0.7506168617008282, "percentage": 15.01, "elapsed_time": "0:39:52", "remaining_time": "3:45:43", "throughput": 8651.42, "total_tokens": 20697344} +{"current_steps": 30730, "total_steps": 204665, "loss": 0.0988, "lr": 1.984722404401591e-06, "epoch": 0.7507390125326754, "percentage": 15.01, "elapsed_time": "0:39:52", "remaining_time": "3:45:43", "throughput": 8651.67, "total_tokens": 20700992} +{"current_steps": 30735, "total_steps": 204665, "loss": 0.1538, "lr": 1.9847075513030042e-06, "epoch": 0.7508611633645225, "percentage": 15.02, "elapsed_time": "0:39:53", "remaining_time": "3:45:42", "throughput": 8651.82, "total_tokens": 20704384} +{"current_steps": 30740, "total_steps": 204665, "loss": 0.0825, "lr": 1.9846926910433597e-06, "epoch": 0.7509833141963697, "percentage": 15.02, "elapsed_time": "0:39:53", "remaining_time": "3:45:41", "throughput": 8652.04, "total_tokens": 20707968} +{"current_steps": 30745, "total_steps": 204665, "loss": 0.2069, "lr": 1.984677823622765e-06, "epoch": 0.7511054650282168, "percentage": 15.02, "elapsed_time": "0:39:53", "remaining_time": "3:45:41", "throughput": 8652.3, "total_tokens": 20711680} +{"current_steps": 30750, "total_steps": 204665, "loss": 0.1362, "lr": 1.9846629490413284e-06, "epoch": 0.751227615860064, "percentage": 15.02, "elapsed_time": "0:39:54", "remaining_time": "3:45:40", "throughput": 8652.39, "total_tokens": 20714880} +{"current_steps": 30755, "total_steps": 204665, "loss": 0.1142, "lr": 1.9846480672991576e-06, "epoch": 0.7513497666919112, "percentage": 15.03, "elapsed_time": "0:39:54", "remaining_time": "3:45:40", "throughput": 8652.71, "total_tokens": 20718720} +{"current_steps": 30760, "total_steps": 204665, "loss": 0.1044, "lr": 1.9846331783963618e-06, "epoch": 0.7514719175237583, "percentage": 15.03, "elapsed_time": "0:39:54", "remaining_time": "3:45:39", "throughput": 8652.86, "total_tokens": 20722112} +{"current_steps": 30765, "total_steps": 204665, "loss": 0.1727, "lr": 1.9846182823330483e-06, "epoch": 0.7515940683556055, "percentage": 15.03, "elapsed_time": "0:39:55", "remaining_time": "3:45:38", "throughput": 8652.96, "total_tokens": 20725312} +{"current_steps": 30770, "total_steps": 204665, "loss": 0.0193, "lr": 1.984603379109326e-06, "epoch": 0.7517162191874527, "percentage": 15.03, "elapsed_time": "0:39:55", "remaining_time": "3:45:38", "throughput": 8653.44, "total_tokens": 20729664} +{"current_steps": 30775, "total_steps": 204665, "loss": 0.2256, "lr": 1.984588468725303e-06, "epoch": 0.7518383700192999, "percentage": 15.04, "elapsed_time": "0:39:55", "remaining_time": "3:45:37", "throughput": 8653.57, "total_tokens": 20732992} +{"current_steps": 30780, "total_steps": 204665, "loss": 0.1115, "lr": 1.984573551181088e-06, "epoch": 0.751960520851147, "percentage": 15.04, "elapsed_time": "0:39:56", "remaining_time": "3:45:36", "throughput": 8653.65, "total_tokens": 20736128} +{"current_steps": 30785, "total_steps": 204665, "loss": 0.0376, "lr": 1.984558626476789e-06, "epoch": 0.7520826716829941, "percentage": 15.04, "elapsed_time": "0:39:56", "remaining_time": "3:45:36", "throughput": 8653.79, "total_tokens": 20739456} +{"current_steps": 30790, "total_steps": 204665, "loss": 0.071, "lr": 1.984543694612515e-06, "epoch": 0.7522048225148413, "percentage": 15.04, "elapsed_time": "0:39:56", "remaining_time": "3:45:35", "throughput": 8653.98, "total_tokens": 20742976} +{"current_steps": 30795, "total_steps": 204665, "loss": 0.0298, "lr": 1.9845287555883745e-06, "epoch": 0.7523269733466885, "percentage": 15.05, "elapsed_time": "0:39:57", "remaining_time": "3:45:35", "throughput": 8654.12, "total_tokens": 20746304} +{"current_steps": 30800, "total_steps": 204665, "loss": 0.0712, "lr": 1.984513809404476e-06, "epoch": 0.7524491241785357, "percentage": 15.05, "elapsed_time": "0:39:57", "remaining_time": "3:45:34", "throughput": 8654.25, "total_tokens": 20749568} +{"current_steps": 30805, "total_steps": 204665, "loss": 0.1266, "lr": 1.9844988560609287e-06, "epoch": 0.7525712750103828, "percentage": 15.05, "elapsed_time": "0:39:57", "remaining_time": "3:45:33", "throughput": 8654.43, "total_tokens": 20753024} +{"current_steps": 30810, "total_steps": 204665, "loss": 0.0992, "lr": 1.98448389555784e-06, "epoch": 0.75269342584223, "percentage": 15.05, "elapsed_time": "0:39:58", "remaining_time": "3:45:33", "throughput": 8654.52, "total_tokens": 20756224} +{"current_steps": 30815, "total_steps": 204665, "loss": 0.1103, "lr": 1.9844689278953204e-06, "epoch": 0.7528155766740772, "percentage": 15.06, "elapsed_time": "0:39:58", "remaining_time": "3:45:32", "throughput": 8654.61, "total_tokens": 20759424} +{"current_steps": 30820, "total_steps": 204665, "loss": 0.123, "lr": 1.984453953073478e-06, "epoch": 0.7529377275059244, "percentage": 15.06, "elapsed_time": "0:39:59", "remaining_time": "3:45:31", "throughput": 8654.86, "total_tokens": 20763072} +{"current_steps": 30825, "total_steps": 204665, "loss": 0.0925, "lr": 1.984438971092421e-06, "epoch": 0.7530598783377714, "percentage": 15.06, "elapsed_time": "0:39:59", "remaining_time": "3:45:31", "throughput": 8654.93, "total_tokens": 20766208} +{"current_steps": 30830, "total_steps": 204665, "loss": 0.1563, "lr": 1.9844239819522595e-06, "epoch": 0.7531820291696186, "percentage": 15.06, "elapsed_time": "0:39:59", "remaining_time": "3:45:30", "throughput": 8655.37, "total_tokens": 20770496} +{"current_steps": 30835, "total_steps": 204665, "loss": 0.155, "lr": 1.984408985653102e-06, "epoch": 0.7533041800014658, "percentage": 15.07, "elapsed_time": "0:40:00", "remaining_time": "3:45:30", "throughput": 8655.46, "total_tokens": 20773696} +{"current_steps": 30840, "total_steps": 204665, "loss": 0.0861, "lr": 1.9843939821950577e-06, "epoch": 0.753426330833313, "percentage": 15.07, "elapsed_time": "0:40:00", "remaining_time": "3:45:29", "throughput": 8655.57, "total_tokens": 20776896} +{"current_steps": 30845, "total_steps": 204665, "loss": 0.1297, "lr": 1.9843789715782356e-06, "epoch": 0.7535484816651601, "percentage": 15.07, "elapsed_time": "0:40:00", "remaining_time": "3:45:28", "throughput": 8655.72, "total_tokens": 20780224} +{"current_steps": 30850, "total_steps": 204665, "loss": 0.0626, "lr": 1.984363953802744e-06, "epoch": 0.7536706324970073, "percentage": 15.07, "elapsed_time": "0:40:01", "remaining_time": "3:45:28", "throughput": 8655.83, "total_tokens": 20783488} +{"current_steps": 30855, "total_steps": 204665, "loss": 0.2264, "lr": 1.984348928868694e-06, "epoch": 0.7537927833288545, "percentage": 15.08, "elapsed_time": "0:40:01", "remaining_time": "3:45:27", "throughput": 8656.07, "total_tokens": 20787136} +{"current_steps": 30860, "total_steps": 204665, "loss": 0.1147, "lr": 1.9843338967761934e-06, "epoch": 0.7539149341607017, "percentage": 15.08, "elapsed_time": "0:40:01", "remaining_time": "3:45:27", "throughput": 8656.37, "total_tokens": 20790912} +{"current_steps": 30865, "total_steps": 204665, "loss": 0.1371, "lr": 1.984318857525352e-06, "epoch": 0.7540370849925488, "percentage": 15.08, "elapsed_time": "0:40:02", "remaining_time": "3:45:26", "throughput": 8656.45, "total_tokens": 20794112} +{"current_steps": 30870, "total_steps": 204665, "loss": 0.3025, "lr": 1.9843038111162796e-06, "epoch": 0.7541592358243959, "percentage": 15.08, "elapsed_time": "0:40:02", "remaining_time": "3:45:25", "throughput": 8656.51, "total_tokens": 20797184} +{"current_steps": 30875, "total_steps": 204665, "loss": 0.0441, "lr": 1.9842887575490844e-06, "epoch": 0.7542813866562431, "percentage": 15.09, "elapsed_time": "0:40:02", "remaining_time": "3:45:25", "throughput": 8656.67, "total_tokens": 20800576} +{"current_steps": 30880, "total_steps": 204665, "loss": 0.1573, "lr": 1.9842736968238773e-06, "epoch": 0.7544035374880903, "percentage": 15.09, "elapsed_time": "0:40:03", "remaining_time": "3:45:24", "throughput": 8656.83, "total_tokens": 20803968} +{"current_steps": 30885, "total_steps": 204665, "loss": 0.1318, "lr": 1.9842586289407665e-06, "epoch": 0.7545256883199375, "percentage": 15.09, "elapsed_time": "0:40:03", "remaining_time": "3:45:23", "throughput": 8657.03, "total_tokens": 20807488} +{"current_steps": 30890, "total_steps": 204665, "loss": 0.1698, "lr": 1.9842435538998627e-06, "epoch": 0.7546478391517846, "percentage": 15.09, "elapsed_time": "0:40:03", "remaining_time": "3:45:23", "throughput": 8657.23, "total_tokens": 20811008} +{"current_steps": 30895, "total_steps": 204665, "loss": 0.1536, "lr": 1.9842284717012743e-06, "epoch": 0.7547699899836318, "percentage": 15.1, "elapsed_time": "0:40:04", "remaining_time": "3:45:22", "throughput": 8657.39, "total_tokens": 20814464} +{"current_steps": 30900, "total_steps": 204665, "loss": 0.1537, "lr": 1.984213382345112e-06, "epoch": 0.754892140815479, "percentage": 15.1, "elapsed_time": "0:40:04", "remaining_time": "3:45:22", "throughput": 8657.49, "total_tokens": 20817664} +{"current_steps": 30905, "total_steps": 204665, "loss": 0.1269, "lr": 1.984198285831486e-06, "epoch": 0.7550142916473261, "percentage": 15.1, "elapsed_time": "0:40:04", "remaining_time": "3:45:21", "throughput": 8657.68, "total_tokens": 20821184} +{"current_steps": 30910, "total_steps": 204665, "loss": 0.1899, "lr": 1.9841831821605045e-06, "epoch": 0.7551364424791733, "percentage": 15.1, "elapsed_time": "0:40:05", "remaining_time": "3:45:20", "throughput": 8658.04, "total_tokens": 20825216} +{"current_steps": 30915, "total_steps": 204665, "loss": 0.1374, "lr": 1.9841680713322786e-06, "epoch": 0.7552585933110204, "percentage": 15.11, "elapsed_time": "0:40:05", "remaining_time": "3:45:20", "throughput": 8658.35, "total_tokens": 20829056} +{"current_steps": 30920, "total_steps": 204665, "loss": 0.1459, "lr": 1.984152953346918e-06, "epoch": 0.7553807441428676, "percentage": 15.11, "elapsed_time": "0:40:06", "remaining_time": "3:45:19", "throughput": 8658.52, "total_tokens": 20832512} +{"current_steps": 30925, "total_steps": 204665, "loss": 0.1336, "lr": 1.984137828204532e-06, "epoch": 0.7555028949747148, "percentage": 15.11, "elapsed_time": "0:40:06", "remaining_time": "3:45:19", "throughput": 8658.71, "total_tokens": 20835968} +{"current_steps": 30930, "total_steps": 204665, "loss": 0.0356, "lr": 1.9841226959052314e-06, "epoch": 0.755625045806562, "percentage": 15.11, "elapsed_time": "0:40:06", "remaining_time": "3:45:18", "throughput": 8658.97, "total_tokens": 20839680} +{"current_steps": 30935, "total_steps": 204665, "loss": 0.1557, "lr": 1.9841075564491253e-06, "epoch": 0.7557471966384091, "percentage": 15.11, "elapsed_time": "0:40:07", "remaining_time": "3:45:18", "throughput": 8659.26, "total_tokens": 20843456} +{"current_steps": 30940, "total_steps": 204665, "loss": 0.0466, "lr": 1.984092409836325e-06, "epoch": 0.7558693474702562, "percentage": 15.12, "elapsed_time": "0:40:07", "remaining_time": "3:45:17", "throughput": 8659.47, "total_tokens": 20846976} +{"current_steps": 30945, "total_steps": 204665, "loss": 0.0731, "lr": 1.984077256066939e-06, "epoch": 0.7559914983021034, "percentage": 15.12, "elapsed_time": "0:40:07", "remaining_time": "3:45:16", "throughput": 8659.52, "total_tokens": 20850048} +{"current_steps": 30950, "total_steps": 204665, "loss": 0.1131, "lr": 1.9840620951410797e-06, "epoch": 0.7561136491339506, "percentage": 15.12, "elapsed_time": "0:40:08", "remaining_time": "3:45:16", "throughput": 8659.74, "total_tokens": 20853632} +{"current_steps": 30955, "total_steps": 204665, "loss": 0.0763, "lr": 1.9840469270588557e-06, "epoch": 0.7562357999657978, "percentage": 15.12, "elapsed_time": "0:40:08", "remaining_time": "3:45:15", "throughput": 8659.9, "total_tokens": 20857024} +{"current_steps": 30960, "total_steps": 204665, "loss": 0.0524, "lr": 1.9840317518203773e-06, "epoch": 0.7563579507976449, "percentage": 15.13, "elapsed_time": "0:40:08", "remaining_time": "3:45:14", "throughput": 8660.05, "total_tokens": 20860416} +{"current_steps": 30965, "total_steps": 204665, "loss": 0.1901, "lr": 1.984016569425756e-06, "epoch": 0.7564801016294921, "percentage": 15.13, "elapsed_time": "0:40:09", "remaining_time": "3:45:14", "throughput": 8660.3, "total_tokens": 20864064} +{"current_steps": 30970, "total_steps": 204665, "loss": 0.1206, "lr": 1.984001379875101e-06, "epoch": 0.7566022524613393, "percentage": 15.13, "elapsed_time": "0:40:09", "remaining_time": "3:45:13", "throughput": 8660.39, "total_tokens": 20867264} +{"current_steps": 30975, "total_steps": 204665, "loss": 0.0689, "lr": 1.9839861831685235e-06, "epoch": 0.7567244032931865, "percentage": 15.13, "elapsed_time": "0:40:09", "remaining_time": "3:45:13", "throughput": 8660.6, "total_tokens": 20870784} +{"current_steps": 30980, "total_steps": 204665, "loss": 0.0657, "lr": 1.983970979306134e-06, "epoch": 0.7568465541250335, "percentage": 15.14, "elapsed_time": "0:40:10", "remaining_time": "3:45:12", "throughput": 8660.65, "total_tokens": 20873856} +{"current_steps": 30985, "total_steps": 204665, "loss": 0.1117, "lr": 1.983955768288043e-06, "epoch": 0.7569687049568807, "percentage": 15.14, "elapsed_time": "0:40:10", "remaining_time": "3:45:11", "throughput": 8660.67, "total_tokens": 20876864} +{"current_steps": 30990, "total_steps": 204665, "loss": 0.1582, "lr": 1.9839405501143606e-06, "epoch": 0.7570908557887279, "percentage": 15.14, "elapsed_time": "0:40:10", "remaining_time": "3:45:11", "throughput": 8660.79, "total_tokens": 20880192} +{"current_steps": 30995, "total_steps": 204665, "loss": 0.202, "lr": 1.983925324785198e-06, "epoch": 0.7572130066205751, "percentage": 15.14, "elapsed_time": "0:40:11", "remaining_time": "3:45:10", "throughput": 8660.95, "total_tokens": 20883584} +{"current_steps": 31000, "total_steps": 204665, "loss": 0.166, "lr": 1.983910092300666e-06, "epoch": 0.7573351574524223, "percentage": 15.15, "elapsed_time": "0:40:11", "remaining_time": "3:45:09", "throughput": 8660.89, "total_tokens": 20886336} +{"current_steps": 31005, "total_steps": 204665, "loss": 0.0651, "lr": 1.983894852660875e-06, "epoch": 0.7574573082842694, "percentage": 15.15, "elapsed_time": "0:40:11", "remaining_time": "3:45:09", "throughput": 8661.03, "total_tokens": 20889728} +{"current_steps": 31010, "total_steps": 204665, "loss": 0.0693, "lr": 1.983879605865936e-06, "epoch": 0.7575794591161166, "percentage": 15.15, "elapsed_time": "0:40:12", "remaining_time": "3:45:08", "throughput": 8661.19, "total_tokens": 20893184} +{"current_steps": 31015, "total_steps": 204665, "loss": 0.1108, "lr": 1.9838643519159596e-06, "epoch": 0.7577016099479638, "percentage": 15.15, "elapsed_time": "0:40:12", "remaining_time": "3:45:08", "throughput": 8661.27, "total_tokens": 20896384} +{"current_steps": 31020, "total_steps": 204665, "loss": 0.1034, "lr": 1.9838490908110573e-06, "epoch": 0.757823760779811, "percentage": 15.16, "elapsed_time": "0:40:12", "remaining_time": "3:45:07", "throughput": 8661.41, "total_tokens": 20899776} +{"current_steps": 31025, "total_steps": 204665, "loss": 0.2739, "lr": 1.9838338225513397e-06, "epoch": 0.757945911611658, "percentage": 15.16, "elapsed_time": "0:40:13", "remaining_time": "3:45:06", "throughput": 8661.45, "total_tokens": 20902848} +{"current_steps": 31030, "total_steps": 204665, "loss": 0.1084, "lr": 1.9838185471369182e-06, "epoch": 0.7580680624435052, "percentage": 15.16, "elapsed_time": "0:40:13", "remaining_time": "3:45:06", "throughput": 8661.63, "total_tokens": 20906368} +{"current_steps": 31035, "total_steps": 204665, "loss": 0.1732, "lr": 1.9838032645679033e-06, "epoch": 0.7581902132753524, "percentage": 15.16, "elapsed_time": "0:40:14", "remaining_time": "3:45:05", "throughput": 8661.84, "total_tokens": 20909952} +{"current_steps": 31040, "total_steps": 204665, "loss": 0.0867, "lr": 1.9837879748444065e-06, "epoch": 0.7583123641071996, "percentage": 15.17, "elapsed_time": "0:40:14", "remaining_time": "3:45:05", "throughput": 8661.92, "total_tokens": 20913152} +{"current_steps": 31045, "total_steps": 204665, "loss": 0.1665, "lr": 1.983772677966539e-06, "epoch": 0.7584345149390468, "percentage": 15.17, "elapsed_time": "0:40:14", "remaining_time": "3:45:04", "throughput": 8662.09, "total_tokens": 20916608} +{"current_steps": 31050, "total_steps": 204665, "loss": 0.1466, "lr": 1.983757373934412e-06, "epoch": 0.7585566657708939, "percentage": 15.17, "elapsed_time": "0:40:15", "remaining_time": "3:45:03", "throughput": 8662.39, "total_tokens": 20920512} +{"current_steps": 31055, "total_steps": 204665, "loss": 0.0726, "lr": 1.983742062748137e-06, "epoch": 0.7586788166027411, "percentage": 15.17, "elapsed_time": "0:40:15", "remaining_time": "3:45:03", "throughput": 8662.58, "total_tokens": 20924032} +{"current_steps": 31060, "total_steps": 204665, "loss": 0.0878, "lr": 1.9837267444078245e-06, "epoch": 0.7588009674345882, "percentage": 15.18, "elapsed_time": "0:40:15", "remaining_time": "3:45:02", "throughput": 8662.72, "total_tokens": 20927424} +{"current_steps": 31065, "total_steps": 204665, "loss": 0.0989, "lr": 1.9837114189135867e-06, "epoch": 0.7589231182664354, "percentage": 15.18, "elapsed_time": "0:40:16", "remaining_time": "3:45:02", "throughput": 8662.97, "total_tokens": 20931136} +{"current_steps": 31070, "total_steps": 204665, "loss": 0.1624, "lr": 1.9836960862655352e-06, "epoch": 0.7590452690982825, "percentage": 15.18, "elapsed_time": "0:40:16", "remaining_time": "3:45:01", "throughput": 8663.3, "total_tokens": 20935040} +{"current_steps": 31075, "total_steps": 204665, "loss": 0.0539, "lr": 1.9836807464637814e-06, "epoch": 0.7591674199301297, "percentage": 15.18, "elapsed_time": "0:40:16", "remaining_time": "3:45:01", "throughput": 8663.4, "total_tokens": 20938304} +{"current_steps": 31080, "total_steps": 204665, "loss": 0.1256, "lr": 1.983665399508436e-06, "epoch": 0.7592895707619769, "percentage": 15.19, "elapsed_time": "0:40:17", "remaining_time": "3:45:00", "throughput": 8663.5, "total_tokens": 20941568} +{"current_steps": 31085, "total_steps": 204665, "loss": 0.1151, "lr": 1.9836500453996116e-06, "epoch": 0.7594117215938241, "percentage": 15.19, "elapsed_time": "0:40:17", "remaining_time": "3:44:59", "throughput": 8663.62, "total_tokens": 20944896} +{"current_steps": 31090, "total_steps": 204665, "loss": 0.0971, "lr": 1.9836346841374192e-06, "epoch": 0.7595338724256712, "percentage": 15.19, "elapsed_time": "0:40:17", "remaining_time": "3:44:59", "throughput": 8663.73, "total_tokens": 20948160} +{"current_steps": 31095, "total_steps": 204665, "loss": 0.2196, "lr": 1.9836193157219713e-06, "epoch": 0.7596560232575184, "percentage": 15.19, "elapsed_time": "0:40:18", "remaining_time": "3:44:58", "throughput": 8663.81, "total_tokens": 20951360} +{"current_steps": 31100, "total_steps": 204665, "loss": 0.1416, "lr": 1.983603940153379e-06, "epoch": 0.7597781740893655, "percentage": 15.2, "elapsed_time": "0:40:18", "remaining_time": "3:44:57", "throughput": 8663.96, "total_tokens": 20954752} +{"current_steps": 31105, "total_steps": 204665, "loss": 0.0766, "lr": 1.983588557431754e-06, "epoch": 0.7599003249212127, "percentage": 15.2, "elapsed_time": "0:40:18", "remaining_time": "3:44:57", "throughput": 8664.04, "total_tokens": 20957952} +{"current_steps": 31110, "total_steps": 204665, "loss": 0.1191, "lr": 1.983573167557209e-06, "epoch": 0.7600224757530599, "percentage": 15.2, "elapsed_time": "0:40:19", "remaining_time": "3:44:56", "throughput": 8664.12, "total_tokens": 20961152} +{"current_steps": 31115, "total_steps": 204665, "loss": 0.1919, "lr": 1.9835577705298545e-06, "epoch": 0.760144626584907, "percentage": 15.2, "elapsed_time": "0:40:19", "remaining_time": "3:44:56", "throughput": 8664.15, "total_tokens": 20964224} +{"current_steps": 31120, "total_steps": 204665, "loss": 0.0693, "lr": 1.983542366349804e-06, "epoch": 0.7602667774167542, "percentage": 15.21, "elapsed_time": "0:40:20", "remaining_time": "3:44:55", "throughput": 8664.27, "total_tokens": 20967552} +{"current_steps": 31125, "total_steps": 204665, "loss": 0.1099, "lr": 1.9835269550171687e-06, "epoch": 0.7603889282486014, "percentage": 15.21, "elapsed_time": "0:40:20", "remaining_time": "3:44:54", "throughput": 8664.27, "total_tokens": 20970496} +{"current_steps": 31130, "total_steps": 204665, "loss": 0.1083, "lr": 1.983511536532061e-06, "epoch": 0.7605110790804486, "percentage": 15.21, "elapsed_time": "0:40:20", "remaining_time": "3:44:54", "throughput": 8664.34, "total_tokens": 20973632} +{"current_steps": 31135, "total_steps": 204665, "loss": 0.1891, "lr": 1.983496110894593e-06, "epoch": 0.7606332299122956, "percentage": 15.21, "elapsed_time": "0:40:21", "remaining_time": "3:44:53", "throughput": 8664.52, "total_tokens": 20977152} +{"current_steps": 31140, "total_steps": 204665, "loss": 0.0849, "lr": 1.9834806781048764e-06, "epoch": 0.7607553807441428, "percentage": 15.22, "elapsed_time": "0:40:21", "remaining_time": "3:44:53", "throughput": 8664.74, "total_tokens": 20980736} +{"current_steps": 31145, "total_steps": 204665, "loss": 0.0771, "lr": 1.983465238163024e-06, "epoch": 0.76087753157599, "percentage": 15.22, "elapsed_time": "0:40:21", "remaining_time": "3:44:52", "throughput": 8664.82, "total_tokens": 20983936} +{"current_steps": 31150, "total_steps": 204665, "loss": 0.044, "lr": 1.9834497910691478e-06, "epoch": 0.7609996824078372, "percentage": 15.22, "elapsed_time": "0:40:22", "remaining_time": "3:44:51", "throughput": 8665.24, "total_tokens": 20988224} +{"current_steps": 31155, "total_steps": 204665, "loss": 0.0602, "lr": 1.98343433682336e-06, "epoch": 0.7611218332396844, "percentage": 15.22, "elapsed_time": "0:40:22", "remaining_time": "3:44:51", "throughput": 8665.37, "total_tokens": 20991552} +{"current_steps": 31160, "total_steps": 204665, "loss": 0.2165, "lr": 1.9834188754257733e-06, "epoch": 0.7612439840715315, "percentage": 15.22, "elapsed_time": "0:40:22", "remaining_time": "3:44:50", "throughput": 8665.43, "total_tokens": 20994688} +{"current_steps": 31165, "total_steps": 204665, "loss": 0.0566, "lr": 1.9834034068765e-06, "epoch": 0.7613661349033787, "percentage": 15.23, "elapsed_time": "0:40:23", "remaining_time": "3:44:50", "throughput": 8665.53, "total_tokens": 20997952} +{"current_steps": 31170, "total_steps": 204665, "loss": 0.1226, "lr": 1.983387931175653e-06, "epoch": 0.7614882857352259, "percentage": 15.23, "elapsed_time": "0:40:23", "remaining_time": "3:44:49", "throughput": 8665.63, "total_tokens": 21001216} +{"current_steps": 31175, "total_steps": 204665, "loss": 0.0843, "lr": 1.983372448323344e-06, "epoch": 0.7616104365670731, "percentage": 15.23, "elapsed_time": "0:40:23", "remaining_time": "3:44:48", "throughput": 8665.83, "total_tokens": 21004736} +{"current_steps": 31180, "total_steps": 204665, "loss": 0.0738, "lr": 1.983356958319686e-06, "epoch": 0.7617325873989201, "percentage": 15.23, "elapsed_time": "0:40:24", "remaining_time": "3:44:48", "throughput": 8665.95, "total_tokens": 21008064} +{"current_steps": 31185, "total_steps": 204665, "loss": 0.044, "lr": 1.9833414611647925e-06, "epoch": 0.7618547382307673, "percentage": 15.24, "elapsed_time": "0:40:24", "remaining_time": "3:44:47", "throughput": 8665.99, "total_tokens": 21011136} +{"current_steps": 31190, "total_steps": 204665, "loss": 0.1582, "lr": 1.9833259568587744e-06, "epoch": 0.7619768890626145, "percentage": 15.24, "elapsed_time": "0:40:24", "remaining_time": "3:44:47", "throughput": 8666.09, "total_tokens": 21014400} +{"current_steps": 31195, "total_steps": 204665, "loss": 0.0659, "lr": 1.983310445401746e-06, "epoch": 0.7620990398944617, "percentage": 15.24, "elapsed_time": "0:40:25", "remaining_time": "3:44:46", "throughput": 8666.34, "total_tokens": 21018112} +{"current_steps": 31200, "total_steps": 204665, "loss": 0.0602, "lr": 1.9832949267938195e-06, "epoch": 0.7622211907263089, "percentage": 15.24, "elapsed_time": "0:40:25", "remaining_time": "3:44:45", "throughput": 8666.37, "total_tokens": 21021120} +{"current_steps": 31205, "total_steps": 204665, "loss": 0.1516, "lr": 1.9832794010351077e-06, "epoch": 0.762343341558156, "percentage": 15.25, "elapsed_time": "0:40:25", "remaining_time": "3:44:45", "throughput": 8666.48, "total_tokens": 21024384} +{"current_steps": 31210, "total_steps": 204665, "loss": 0.1687, "lr": 1.9832638681257238e-06, "epoch": 0.7624654923900032, "percentage": 15.25, "elapsed_time": "0:40:26", "remaining_time": "3:44:44", "throughput": 8666.48, "total_tokens": 21027328} +{"current_steps": 31215, "total_steps": 204665, "loss": 0.1582, "lr": 1.9832483280657805e-06, "epoch": 0.7625876432218504, "percentage": 15.25, "elapsed_time": "0:40:26", "remaining_time": "3:44:43", "throughput": 8666.69, "total_tokens": 21030912} +{"current_steps": 31220, "total_steps": 204665, "loss": 0.0989, "lr": 1.983232780855391e-06, "epoch": 0.7627097940536975, "percentage": 15.25, "elapsed_time": "0:40:26", "remaining_time": "3:44:43", "throughput": 8666.98, "total_tokens": 21034688} +{"current_steps": 31225, "total_steps": 204665, "loss": 0.1562, "lr": 1.983217226494668e-06, "epoch": 0.7628319448855446, "percentage": 15.26, "elapsed_time": "0:40:27", "remaining_time": "3:44:42", "throughput": 8667.01, "total_tokens": 21037760} +{"current_steps": 31230, "total_steps": 204665, "loss": 0.1243, "lr": 1.983201664983725e-06, "epoch": 0.7629540957173918, "percentage": 15.26, "elapsed_time": "0:40:27", "remaining_time": "3:44:42", "throughput": 8667.07, "total_tokens": 21040896} +{"current_steps": 31235, "total_steps": 204665, "loss": 0.1733, "lr": 1.9831860963226754e-06, "epoch": 0.763076246549239, "percentage": 15.26, "elapsed_time": "0:40:28", "remaining_time": "3:44:41", "throughput": 8667.39, "total_tokens": 21044864} +{"current_steps": 31240, "total_steps": 204665, "loss": 0.1372, "lr": 1.9831705205116317e-06, "epoch": 0.7631983973810862, "percentage": 15.26, "elapsed_time": "0:40:28", "remaining_time": "3:44:40", "throughput": 8667.48, "total_tokens": 21048128} +{"current_steps": 31245, "total_steps": 204665, "loss": 0.1415, "lr": 1.9831549375507076e-06, "epoch": 0.7633205482129334, "percentage": 15.27, "elapsed_time": "0:40:28", "remaining_time": "3:44:40", "throughput": 8667.76, "total_tokens": 21051968} +{"current_steps": 31250, "total_steps": 204665, "loss": 0.0339, "lr": 1.983139347440016e-06, "epoch": 0.7634426990447805, "percentage": 15.27, "elapsed_time": "0:40:29", "remaining_time": "3:44:39", "throughput": 8667.86, "total_tokens": 21055296} +{"current_steps": 31255, "total_steps": 204665, "loss": 0.04, "lr": 1.983123750179671e-06, "epoch": 0.7635648498766276, "percentage": 15.27, "elapsed_time": "0:40:29", "remaining_time": "3:44:39", "throughput": 8667.9, "total_tokens": 21058432} +{"current_steps": 31260, "total_steps": 204665, "loss": 0.0793, "lr": 1.9831081457697856e-06, "epoch": 0.7636870007084748, "percentage": 15.27, "elapsed_time": "0:40:29", "remaining_time": "3:44:38", "throughput": 8668.01, "total_tokens": 21061760} +{"current_steps": 31265, "total_steps": 204665, "loss": 0.1415, "lr": 1.9830925342104736e-06, "epoch": 0.763809151540322, "percentage": 15.28, "elapsed_time": "0:40:30", "remaining_time": "3:44:38", "throughput": 8668.11, "total_tokens": 21065088} +{"current_steps": 31270, "total_steps": 204665, "loss": 0.1292, "lr": 1.983076915501848e-06, "epoch": 0.7639313023721691, "percentage": 15.28, "elapsed_time": "0:40:30", "remaining_time": "3:44:37", "throughput": 8668.38, "total_tokens": 21068864} +{"current_steps": 31275, "total_steps": 204665, "loss": 0.1785, "lr": 1.9830612896440226e-06, "epoch": 0.7640534532040163, "percentage": 15.28, "elapsed_time": "0:40:30", "remaining_time": "3:44:36", "throughput": 8668.49, "total_tokens": 21072192} +{"current_steps": 31280, "total_steps": 204665, "loss": 0.158, "lr": 1.983045656637111e-06, "epoch": 0.7641756040358635, "percentage": 15.28, "elapsed_time": "0:40:31", "remaining_time": "3:44:36", "throughput": 8668.43, "total_tokens": 21075072} +{"current_steps": 31285, "total_steps": 204665, "loss": 0.1041, "lr": 1.9830300164812273e-06, "epoch": 0.7642977548677107, "percentage": 15.29, "elapsed_time": "0:40:31", "remaining_time": "3:44:35", "throughput": 8668.82, "total_tokens": 21079232} +{"current_steps": 31290, "total_steps": 204665, "loss": 0.0476, "lr": 1.9830143691764846e-06, "epoch": 0.7644199056995579, "percentage": 15.29, "elapsed_time": "0:40:31", "remaining_time": "3:44:35", "throughput": 8668.94, "total_tokens": 21082560} +{"current_steps": 31295, "total_steps": 204665, "loss": 0.0529, "lr": 1.9829987147229974e-06, "epoch": 0.764542056531405, "percentage": 15.29, "elapsed_time": "0:40:32", "remaining_time": "3:44:34", "throughput": 8669.13, "total_tokens": 21086144} +{"current_steps": 31300, "total_steps": 204665, "loss": 0.2246, "lr": 1.982983053120879e-06, "epoch": 0.7646642073632521, "percentage": 15.29, "elapsed_time": "0:40:32", "remaining_time": "3:44:34", "throughput": 8669.29, "total_tokens": 21089600} +{"current_steps": 31305, "total_steps": 204665, "loss": 0.0652, "lr": 1.9829673843702434e-06, "epoch": 0.7647863581950993, "percentage": 15.3, "elapsed_time": "0:40:33", "remaining_time": "3:44:33", "throughput": 8669.44, "total_tokens": 21093056} +{"current_steps": 31310, "total_steps": 204665, "loss": 0.172, "lr": 1.9829517084712045e-06, "epoch": 0.7649085090269465, "percentage": 15.3, "elapsed_time": "0:40:33", "remaining_time": "3:44:32", "throughput": 8669.52, "total_tokens": 21096256} +{"current_steps": 31315, "total_steps": 204665, "loss": 0.1213, "lr": 1.9829360254238767e-06, "epoch": 0.7650306598587936, "percentage": 15.3, "elapsed_time": "0:40:33", "remaining_time": "3:44:32", "throughput": 8669.61, "total_tokens": 21099520} +{"current_steps": 31320, "total_steps": 204665, "loss": 0.1974, "lr": 1.9829203352283735e-06, "epoch": 0.7651528106906408, "percentage": 15.3, "elapsed_time": "0:40:34", "remaining_time": "3:44:31", "throughput": 8669.64, "total_tokens": 21102592} +{"current_steps": 31325, "total_steps": 204665, "loss": 0.1138, "lr": 1.982904637884809e-06, "epoch": 0.765274961522488, "percentage": 15.31, "elapsed_time": "0:40:34", "remaining_time": "3:44:31", "throughput": 8669.75, "total_tokens": 21105920} +{"current_steps": 31330, "total_steps": 204665, "loss": 0.1358, "lr": 1.982888933393298e-06, "epoch": 0.7653971123543352, "percentage": 15.31, "elapsed_time": "0:40:34", "remaining_time": "3:44:30", "throughput": 8669.95, "total_tokens": 21109504} +{"current_steps": 31335, "total_steps": 204665, "loss": 0.1178, "lr": 1.982873221753954e-06, "epoch": 0.7655192631861824, "percentage": 15.31, "elapsed_time": "0:40:35", "remaining_time": "3:44:29", "throughput": 8670.0, "total_tokens": 21112576} +{"current_steps": 31340, "total_steps": 204665, "loss": 0.1292, "lr": 1.982857502966892e-06, "epoch": 0.7656414140180294, "percentage": 15.31, "elapsed_time": "0:40:35", "remaining_time": "3:44:29", "throughput": 8670.11, "total_tokens": 21115840} +{"current_steps": 31345, "total_steps": 204665, "loss": 0.1554, "lr": 1.9828417770322255e-06, "epoch": 0.7657635648498766, "percentage": 15.32, "elapsed_time": "0:40:35", "remaining_time": "3:44:28", "throughput": 8670.28, "total_tokens": 21119296} +{"current_steps": 31350, "total_steps": 204665, "loss": 0.0441, "lr": 1.9828260439500694e-06, "epoch": 0.7658857156817238, "percentage": 15.32, "elapsed_time": "0:40:36", "remaining_time": "3:44:28", "throughput": 8670.52, "total_tokens": 21122944} +{"current_steps": 31355, "total_steps": 204665, "loss": 0.1804, "lr": 1.9828103037205376e-06, "epoch": 0.766007866513571, "percentage": 15.32, "elapsed_time": "0:40:36", "remaining_time": "3:44:27", "throughput": 8670.56, "total_tokens": 21126016} +{"current_steps": 31360, "total_steps": 204665, "loss": 0.1402, "lr": 1.9827945563437455e-06, "epoch": 0.7661300173454181, "percentage": 15.32, "elapsed_time": "0:40:36", "remaining_time": "3:44:26", "throughput": 8670.7, "total_tokens": 21129344} +{"current_steps": 31365, "total_steps": 204665, "loss": 0.0889, "lr": 1.9827788018198067e-06, "epoch": 0.7662521681772653, "percentage": 15.33, "elapsed_time": "0:40:37", "remaining_time": "3:44:26", "throughput": 8670.7, "total_tokens": 21132288} +{"current_steps": 31370, "total_steps": 204665, "loss": 0.093, "lr": 1.9827630401488365e-06, "epoch": 0.7663743190091125, "percentage": 15.33, "elapsed_time": "0:40:37", "remaining_time": "3:44:25", "throughput": 8670.83, "total_tokens": 21135616} +{"current_steps": 31375, "total_steps": 204665, "loss": 0.1495, "lr": 1.9827472713309486e-06, "epoch": 0.7664964698409596, "percentage": 15.33, "elapsed_time": "0:40:37", "remaining_time": "3:44:24", "throughput": 8670.93, "total_tokens": 21138816} +{"current_steps": 31380, "total_steps": 204665, "loss": 0.1836, "lr": 1.9827314953662584e-06, "epoch": 0.7666186206728067, "percentage": 15.33, "elapsed_time": "0:40:38", "remaining_time": "3:44:24", "throughput": 8671.0, "total_tokens": 21141952} +{"current_steps": 31385, "total_steps": 204665, "loss": 0.0087, "lr": 1.9827157122548806e-06, "epoch": 0.7667407715046539, "percentage": 15.33, "elapsed_time": "0:40:38", "remaining_time": "3:44:23", "throughput": 8671.35, "total_tokens": 21145920} +{"current_steps": 31390, "total_steps": 204665, "loss": 0.1052, "lr": 1.98269992199693e-06, "epoch": 0.7668629223365011, "percentage": 15.34, "elapsed_time": "0:40:38", "remaining_time": "3:44:23", "throughput": 8671.44, "total_tokens": 21149120} +{"current_steps": 31395, "total_steps": 204665, "loss": 0.119, "lr": 1.982684124592521e-06, "epoch": 0.7669850731683483, "percentage": 15.34, "elapsed_time": "0:40:39", "remaining_time": "3:44:22", "throughput": 8671.47, "total_tokens": 21152128} +{"current_steps": 31400, "total_steps": 204665, "loss": 0.0412, "lr": 1.9826683200417684e-06, "epoch": 0.7671072240001955, "percentage": 15.34, "elapsed_time": "0:40:39", "remaining_time": "3:44:21", "throughput": 8671.63, "total_tokens": 21155520} +{"current_steps": 31405, "total_steps": 204665, "loss": 0.2221, "lr": 1.982652508344788e-06, "epoch": 0.7672293748320426, "percentage": 15.34, "elapsed_time": "0:40:39", "remaining_time": "3:44:21", "throughput": 8671.82, "total_tokens": 21159040} +{"current_steps": 31410, "total_steps": 204665, "loss": 0.1157, "lr": 1.982636689501694e-06, "epoch": 0.7673515256638898, "percentage": 15.35, "elapsed_time": "0:40:40", "remaining_time": "3:44:20", "throughput": 8671.98, "total_tokens": 21162432} +{"current_steps": 31415, "total_steps": 204665, "loss": 0.0612, "lr": 1.9826208635126017e-06, "epoch": 0.767473676495737, "percentage": 15.35, "elapsed_time": "0:40:40", "remaining_time": "3:44:20", "throughput": 8672.27, "total_tokens": 21166208} +{"current_steps": 31420, "total_steps": 204665, "loss": 0.0213, "lr": 1.9826050303776265e-06, "epoch": 0.7675958273275841, "percentage": 15.35, "elapsed_time": "0:40:41", "remaining_time": "3:44:19", "throughput": 8672.5, "total_tokens": 21169856} +{"current_steps": 31425, "total_steps": 204665, "loss": 0.1161, "lr": 1.982589190096883e-06, "epoch": 0.7677179781594312, "percentage": 15.35, "elapsed_time": "0:40:41", "remaining_time": "3:44:18", "throughput": 8672.74, "total_tokens": 21173504} +{"current_steps": 31430, "total_steps": 204665, "loss": 0.0755, "lr": 1.9825733426704867e-06, "epoch": 0.7678401289912784, "percentage": 15.36, "elapsed_time": "0:40:41", "remaining_time": "3:44:18", "throughput": 8672.96, "total_tokens": 21177088} +{"current_steps": 31435, "total_steps": 204665, "loss": 0.1222, "lr": 1.9825574880985525e-06, "epoch": 0.7679622798231256, "percentage": 15.36, "elapsed_time": "0:40:42", "remaining_time": "3:44:17", "throughput": 8673.14, "total_tokens": 21180544} +{"current_steps": 31440, "total_steps": 204665, "loss": 0.08, "lr": 1.982541626381196e-06, "epoch": 0.7680844306549728, "percentage": 15.36, "elapsed_time": "0:40:42", "remaining_time": "3:44:17", "throughput": 8673.19, "total_tokens": 21183616} +{"current_steps": 31445, "total_steps": 204665, "loss": 0.0965, "lr": 1.9825257575185326e-06, "epoch": 0.76820658148682, "percentage": 15.36, "elapsed_time": "0:40:42", "remaining_time": "3:44:16", "throughput": 8673.49, "total_tokens": 21187456} +{"current_steps": 31450, "total_steps": 204665, "loss": 0.0524, "lr": 1.9825098815106777e-06, "epoch": 0.768328732318667, "percentage": 15.37, "elapsed_time": "0:40:43", "remaining_time": "3:44:15", "throughput": 8673.58, "total_tokens": 21190656} +{"current_steps": 31455, "total_steps": 204665, "loss": 0.1822, "lr": 1.982493998357747e-06, "epoch": 0.7684508831505142, "percentage": 15.37, "elapsed_time": "0:40:43", "remaining_time": "3:44:15", "throughput": 8673.7, "total_tokens": 21193920} +{"current_steps": 31460, "total_steps": 204665, "loss": 0.0697, "lr": 1.982478108059855e-06, "epoch": 0.7685730339823614, "percentage": 15.37, "elapsed_time": "0:40:43", "remaining_time": "3:44:14", "throughput": 8673.7, "total_tokens": 21196864} +{"current_steps": 31465, "total_steps": 204665, "loss": 0.1942, "lr": 1.982462210617118e-06, "epoch": 0.7686951848142086, "percentage": 15.37, "elapsed_time": "0:40:44", "remaining_time": "3:44:13", "throughput": 8673.8, "total_tokens": 21200064} +{"current_steps": 31470, "total_steps": 204665, "loss": 0.0911, "lr": 1.982446306029652e-06, "epoch": 0.7688173356460557, "percentage": 15.38, "elapsed_time": "0:40:44", "remaining_time": "3:44:13", "throughput": 8673.94, "total_tokens": 21203456} +{"current_steps": 31475, "total_steps": 204665, "loss": 0.1893, "lr": 1.982430394297572e-06, "epoch": 0.7689394864779029, "percentage": 15.38, "elapsed_time": "0:40:44", "remaining_time": "3:44:12", "throughput": 8673.96, "total_tokens": 21206464} +{"current_steps": 31480, "total_steps": 204665, "loss": 0.0927, "lr": 1.9824144754209944e-06, "epoch": 0.7690616373097501, "percentage": 15.38, "elapsed_time": "0:40:45", "remaining_time": "3:44:12", "throughput": 8674.12, "total_tokens": 21209856} +{"current_steps": 31485, "total_steps": 204665, "loss": 0.1205, "lr": 1.982398549400034e-06, "epoch": 0.7691837881415973, "percentage": 15.38, "elapsed_time": "0:40:45", "remaining_time": "3:44:11", "throughput": 8674.09, "total_tokens": 21212672} +{"current_steps": 31490, "total_steps": 204665, "loss": 0.0765, "lr": 1.982382616234807e-06, "epoch": 0.7693059389734445, "percentage": 15.39, "elapsed_time": "0:40:45", "remaining_time": "3:44:10", "throughput": 8674.37, "total_tokens": 21216448} +{"current_steps": 31495, "total_steps": 204665, "loss": 0.1077, "lr": 1.98236667592543e-06, "epoch": 0.7694280898052915, "percentage": 15.39, "elapsed_time": "0:40:46", "remaining_time": "3:44:10", "throughput": 8674.38, "total_tokens": 21219392} +{"current_steps": 31500, "total_steps": 204665, "loss": 0.0534, "lr": 1.9823507284720174e-06, "epoch": 0.7695502406371387, "percentage": 15.39, "elapsed_time": "0:40:46", "remaining_time": "3:44:09", "throughput": 8674.43, "total_tokens": 21222464} +{"current_steps": 31505, "total_steps": 204665, "loss": 0.2408, "lr": 1.9823347738746868e-06, "epoch": 0.7696723914689859, "percentage": 15.39, "elapsed_time": "0:40:46", "remaining_time": "3:44:08", "throughput": 8674.5, "total_tokens": 21225600} +{"current_steps": 31510, "total_steps": 204665, "loss": 0.1162, "lr": 1.9823188121335535e-06, "epoch": 0.7697945423008331, "percentage": 15.4, "elapsed_time": "0:40:47", "remaining_time": "3:44:08", "throughput": 8674.63, "total_tokens": 21228928} +{"current_steps": 31515, "total_steps": 204665, "loss": 0.1069, "lr": 1.9823028432487332e-06, "epoch": 0.7699166931326802, "percentage": 15.4, "elapsed_time": "0:40:47", "remaining_time": "3:44:07", "throughput": 8674.87, "total_tokens": 21232576} +{"current_steps": 31520, "total_steps": 204665, "loss": 0.1632, "lr": 1.982286867220343e-06, "epoch": 0.7700388439645274, "percentage": 15.4, "elapsed_time": "0:40:47", "remaining_time": "3:44:06", "throughput": 8675.05, "total_tokens": 21236032} +{"current_steps": 31525, "total_steps": 204665, "loss": 0.1045, "lr": 1.9822708840484976e-06, "epoch": 0.7701609947963746, "percentage": 15.4, "elapsed_time": "0:40:48", "remaining_time": "3:44:06", "throughput": 8675.31, "total_tokens": 21239744} +{"current_steps": 31530, "total_steps": 204665, "loss": 0.1838, "lr": 1.9822548937333148e-06, "epoch": 0.7702831456282218, "percentage": 15.41, "elapsed_time": "0:40:48", "remaining_time": "3:44:05", "throughput": 8675.42, "total_tokens": 21243008} +{"current_steps": 31535, "total_steps": 204665, "loss": 0.0838, "lr": 1.98223889627491e-06, "epoch": 0.770405296460069, "percentage": 15.41, "elapsed_time": "0:40:48", "remaining_time": "3:44:05", "throughput": 8675.53, "total_tokens": 21246272} +{"current_steps": 31540, "total_steps": 204665, "loss": 0.1898, "lr": 1.9822228916733996e-06, "epoch": 0.770527447291916, "percentage": 15.41, "elapsed_time": "0:40:49", "remaining_time": "3:44:04", "throughput": 8675.66, "total_tokens": 21249600} +{"current_steps": 31545, "total_steps": 204665, "loss": 0.1083, "lr": 1.9822068799289003e-06, "epoch": 0.7706495981237632, "percentage": 15.41, "elapsed_time": "0:40:49", "remaining_time": "3:44:03", "throughput": 8675.81, "total_tokens": 21252992} +{"current_steps": 31550, "total_steps": 204665, "loss": 0.0789, "lr": 1.982190861041529e-06, "epoch": 0.7707717489556104, "percentage": 15.42, "elapsed_time": "0:40:50", "remaining_time": "3:44:03", "throughput": 8675.97, "total_tokens": 21256448} +{"current_steps": 31555, "total_steps": 204665, "loss": 0.0737, "lr": 1.9821748350114004e-06, "epoch": 0.7708938997874576, "percentage": 15.42, "elapsed_time": "0:40:50", "remaining_time": "3:44:02", "throughput": 8675.97, "total_tokens": 21259392} +{"current_steps": 31560, "total_steps": 204665, "loss": 0.0581, "lr": 1.982158801838633e-06, "epoch": 0.7710160506193047, "percentage": 15.42, "elapsed_time": "0:40:50", "remaining_time": "3:44:02", "throughput": 8676.15, "total_tokens": 21262848} +{"current_steps": 31565, "total_steps": 204665, "loss": 0.1367, "lr": 1.9821427615233427e-06, "epoch": 0.7711382014511519, "percentage": 15.42, "elapsed_time": "0:40:51", "remaining_time": "3:44:01", "throughput": 8676.23, "total_tokens": 21266048} +{"current_steps": 31570, "total_steps": 204665, "loss": 0.1666, "lr": 1.9821267140656457e-06, "epoch": 0.771260352282999, "percentage": 15.43, "elapsed_time": "0:40:51", "remaining_time": "3:44:00", "throughput": 8676.29, "total_tokens": 21269120} +{"current_steps": 31575, "total_steps": 204665, "loss": 0.1126, "lr": 1.982110659465659e-06, "epoch": 0.7713825031148462, "percentage": 15.43, "elapsed_time": "0:40:51", "remaining_time": "3:44:00", "throughput": 8676.38, "total_tokens": 21272320} +{"current_steps": 31580, "total_steps": 204665, "loss": 0.164, "lr": 1.9820945977235e-06, "epoch": 0.7715046539466934, "percentage": 15.43, "elapsed_time": "0:40:52", "remaining_time": "3:43:59", "throughput": 8676.41, "total_tokens": 21275328} +{"current_steps": 31585, "total_steps": 204665, "loss": 0.0813, "lr": 1.9820785288392844e-06, "epoch": 0.7716268047785405, "percentage": 15.43, "elapsed_time": "0:40:52", "remaining_time": "3:43:58", "throughput": 8676.56, "total_tokens": 21278720} +{"current_steps": 31590, "total_steps": 204665, "loss": 0.0579, "lr": 1.98206245281313e-06, "epoch": 0.7717489556103877, "percentage": 15.43, "elapsed_time": "0:40:52", "remaining_time": "3:43:58", "throughput": 8676.68, "total_tokens": 21282048} +{"current_steps": 31595, "total_steps": 204665, "loss": 0.1305, "lr": 1.982046369645153e-06, "epoch": 0.7718711064422349, "percentage": 15.44, "elapsed_time": "0:40:53", "remaining_time": "3:43:57", "throughput": 8676.83, "total_tokens": 21285440} +{"current_steps": 31600, "total_steps": 204665, "loss": 0.1274, "lr": 1.9820302793354704e-06, "epoch": 0.7719932572740821, "percentage": 15.44, "elapsed_time": "0:40:53", "remaining_time": "3:43:57", "throughput": 8676.86, "total_tokens": 21288448} +{"current_steps": 31605, "total_steps": 204665, "loss": 0.1899, "lr": 1.9820141818842e-06, "epoch": 0.7721154081059292, "percentage": 15.44, "elapsed_time": "0:40:53", "remaining_time": "3:43:56", "throughput": 8677.0, "total_tokens": 21291776} +{"current_steps": 31610, "total_steps": 204665, "loss": 0.1039, "lr": 1.981998077291458e-06, "epoch": 0.7722375589377763, "percentage": 15.44, "elapsed_time": "0:40:54", "remaining_time": "3:43:55", "throughput": 8677.21, "total_tokens": 21295360} +{"current_steps": 31615, "total_steps": 204665, "loss": 0.1433, "lr": 1.981981965557362e-06, "epoch": 0.7723597097696235, "percentage": 15.45, "elapsed_time": "0:40:54", "remaining_time": "3:43:55", "throughput": 8677.2, "total_tokens": 21298240} +{"current_steps": 31620, "total_steps": 204665, "loss": 0.0809, "lr": 1.981965846682029e-06, "epoch": 0.7724818606014707, "percentage": 15.45, "elapsed_time": "0:40:54", "remaining_time": "3:43:54", "throughput": 8677.34, "total_tokens": 21301568} +{"current_steps": 31625, "total_steps": 204665, "loss": 0.1297, "lr": 1.981949720665576e-06, "epoch": 0.7726040114333178, "percentage": 15.45, "elapsed_time": "0:40:55", "remaining_time": "3:43:53", "throughput": 8677.5, "total_tokens": 21305024} +{"current_steps": 31630, "total_steps": 204665, "loss": 0.0577, "lr": 1.981933587508121e-06, "epoch": 0.772726162265165, "percentage": 15.45, "elapsed_time": "0:40:55", "remaining_time": "3:43:53", "throughput": 8677.59, "total_tokens": 21308224} +{"current_steps": 31635, "total_steps": 204665, "loss": 0.171, "lr": 1.9819174472097807e-06, "epoch": 0.7728483130970122, "percentage": 15.46, "elapsed_time": "0:40:55", "remaining_time": "3:43:52", "throughput": 8677.73, "total_tokens": 21311552} +{"current_steps": 31640, "total_steps": 204665, "loss": 0.0992, "lr": 1.9819012997706727e-06, "epoch": 0.7729704639288594, "percentage": 15.46, "elapsed_time": "0:40:56", "remaining_time": "3:43:52", "throughput": 8677.79, "total_tokens": 21314688} +{"current_steps": 31645, "total_steps": 204665, "loss": 0.1189, "lr": 1.981885145190914e-06, "epoch": 0.7730926147607066, "percentage": 15.46, "elapsed_time": "0:40:56", "remaining_time": "3:43:51", "throughput": 8677.91, "total_tokens": 21317952} +{"current_steps": 31650, "total_steps": 204665, "loss": 0.0643, "lr": 1.981868983470623e-06, "epoch": 0.7732147655925536, "percentage": 15.46, "elapsed_time": "0:40:56", "remaining_time": "3:43:50", "throughput": 8677.96, "total_tokens": 21321024} +{"current_steps": 31655, "total_steps": 204665, "loss": 0.1486, "lr": 1.981852814609916e-06, "epoch": 0.7733369164244008, "percentage": 15.47, "elapsed_time": "0:40:57", "remaining_time": "3:43:50", "throughput": 8678.07, "total_tokens": 21324288} +{"current_steps": 31660, "total_steps": 204665, "loss": 0.1112, "lr": 1.981836638608911e-06, "epoch": 0.773459067256248, "percentage": 15.47, "elapsed_time": "0:40:57", "remaining_time": "3:43:49", "throughput": 8678.27, "total_tokens": 21327808} +{"current_steps": 31665, "total_steps": 204665, "loss": 0.1939, "lr": 1.981820455467727e-06, "epoch": 0.7735812180880952, "percentage": 15.47, "elapsed_time": "0:40:57", "remaining_time": "3:43:48", "throughput": 8678.34, "total_tokens": 21330944} +{"current_steps": 31670, "total_steps": 204665, "loss": 0.1189, "lr": 1.9818042651864797e-06, "epoch": 0.7737033689199423, "percentage": 15.47, "elapsed_time": "0:40:58", "remaining_time": "3:43:48", "throughput": 8678.46, "total_tokens": 21334208} +{"current_steps": 31675, "total_steps": 204665, "loss": 0.0309, "lr": 1.981788067765288e-06, "epoch": 0.7738255197517895, "percentage": 15.48, "elapsed_time": "0:40:58", "remaining_time": "3:43:47", "throughput": 8678.45, "total_tokens": 21337088} +{"current_steps": 31680, "total_steps": 204665, "loss": 0.1, "lr": 1.9817718632042695e-06, "epoch": 0.7739476705836367, "percentage": 15.48, "elapsed_time": "0:40:58", "remaining_time": "3:43:46", "throughput": 8678.63, "total_tokens": 21340608} +{"current_steps": 31685, "total_steps": 204665, "loss": 0.0482, "lr": 1.981755651503542e-06, "epoch": 0.7740698214154839, "percentage": 15.48, "elapsed_time": "0:40:59", "remaining_time": "3:43:46", "throughput": 8678.71, "total_tokens": 21343808} +{"current_steps": 31690, "total_steps": 204665, "loss": 0.0957, "lr": 1.981739432663223e-06, "epoch": 0.774191972247331, "percentage": 15.48, "elapsed_time": "0:40:59", "remaining_time": "3:43:45", "throughput": 8678.89, "total_tokens": 21347264} +{"current_steps": 31695, "total_steps": 204665, "loss": 0.0278, "lr": 1.981723206683431e-06, "epoch": 0.7743141230791781, "percentage": 15.49, "elapsed_time": "0:41:00", "remaining_time": "3:43:45", "throughput": 8679.12, "total_tokens": 21350912} +{"current_steps": 31700, "total_steps": 204665, "loss": 0.2443, "lr": 1.981706973564284e-06, "epoch": 0.7744362739110253, "percentage": 15.49, "elapsed_time": "0:41:00", "remaining_time": "3:43:44", "throughput": 8679.29, "total_tokens": 21354368} +{"current_steps": 31705, "total_steps": 204665, "loss": 0.0087, "lr": 1.9816907333058993e-06, "epoch": 0.7745584247428725, "percentage": 15.49, "elapsed_time": "0:41:00", "remaining_time": "3:43:44", "throughput": 8679.48, "total_tokens": 21357888} +{"current_steps": 31710, "total_steps": 204665, "loss": 0.1654, "lr": 1.981674485908396e-06, "epoch": 0.7746805755747197, "percentage": 15.49, "elapsed_time": "0:41:01", "remaining_time": "3:43:43", "throughput": 8679.65, "total_tokens": 21361344} +{"current_steps": 31715, "total_steps": 204665, "loss": 0.0697, "lr": 1.9816582313718917e-06, "epoch": 0.7748027264065668, "percentage": 15.5, "elapsed_time": "0:41:01", "remaining_time": "3:43:42", "throughput": 8679.64, "total_tokens": 21364224} +{"current_steps": 31720, "total_steps": 204665, "loss": 0.1227, "lr": 1.9816419696965045e-06, "epoch": 0.774924877238414, "percentage": 15.5, "elapsed_time": "0:41:01", "remaining_time": "3:43:42", "throughput": 8679.74, "total_tokens": 21367424} +{"current_steps": 31725, "total_steps": 204665, "loss": 0.0791, "lr": 1.9816257008823532e-06, "epoch": 0.7750470280702612, "percentage": 15.5, "elapsed_time": "0:41:02", "remaining_time": "3:43:41", "throughput": 8679.81, "total_tokens": 21370560} +{"current_steps": 31730, "total_steps": 204665, "loss": 0.0378, "lr": 1.9816094249295557e-06, "epoch": 0.7751691789021083, "percentage": 15.5, "elapsed_time": "0:41:02", "remaining_time": "3:43:40", "throughput": 8680.21, "total_tokens": 21374720} +{"current_steps": 31735, "total_steps": 204665, "loss": 0.1237, "lr": 1.98159314183823e-06, "epoch": 0.7752913297339555, "percentage": 15.51, "elapsed_time": "0:41:02", "remaining_time": "3:43:40", "throughput": 8680.36, "total_tokens": 21378112} +{"current_steps": 31740, "total_steps": 204665, "loss": 0.063, "lr": 1.9815768516084956e-06, "epoch": 0.7754134805658026, "percentage": 15.51, "elapsed_time": "0:41:03", "remaining_time": "3:43:39", "throughput": 8680.52, "total_tokens": 21381504} +{"current_steps": 31745, "total_steps": 204665, "loss": 0.2259, "lr": 1.9815605542404698e-06, "epoch": 0.7755356313976498, "percentage": 15.51, "elapsed_time": "0:41:03", "remaining_time": "3:43:39", "throughput": 8680.82, "total_tokens": 21385344} +{"current_steps": 31750, "total_steps": 204665, "loss": 0.0612, "lr": 1.981544249734272e-06, "epoch": 0.775657782229497, "percentage": 15.51, "elapsed_time": "0:41:03", "remaining_time": "3:43:38", "throughput": 8680.97, "total_tokens": 21388736} +{"current_steps": 31755, "total_steps": 204665, "loss": 0.1576, "lr": 1.98152793809002e-06, "epoch": 0.7757799330613442, "percentage": 15.52, "elapsed_time": "0:41:04", "remaining_time": "3:43:37", "throughput": 8681.03, "total_tokens": 21391872} +{"current_steps": 31760, "total_steps": 204665, "loss": 0.0493, "lr": 1.9815116193078333e-06, "epoch": 0.7759020838931913, "percentage": 15.52, "elapsed_time": "0:41:04", "remaining_time": "3:43:37", "throughput": 8681.26, "total_tokens": 21395520} +{"current_steps": 31765, "total_steps": 204665, "loss": 0.0779, "lr": 1.98149529338783e-06, "epoch": 0.7760242347250385, "percentage": 15.52, "elapsed_time": "0:41:04", "remaining_time": "3:43:36", "throughput": 8681.53, "total_tokens": 21399296} +{"current_steps": 31770, "total_steps": 204665, "loss": 0.1058, "lr": 1.981478960330129e-06, "epoch": 0.7761463855568856, "percentage": 15.52, "elapsed_time": "0:41:05", "remaining_time": "3:43:36", "throughput": 8681.74, "total_tokens": 21402880} +{"current_steps": 31775, "total_steps": 204665, "loss": 0.0803, "lr": 1.9814626201348484e-06, "epoch": 0.7762685363887328, "percentage": 15.53, "elapsed_time": "0:41:05", "remaining_time": "3:43:35", "throughput": 8681.92, "total_tokens": 21406336} +{"current_steps": 31780, "total_steps": 204665, "loss": 0.1543, "lr": 1.9814462728021084e-06, "epoch": 0.77639068722058, "percentage": 15.53, "elapsed_time": "0:41:05", "remaining_time": "3:43:34", "throughput": 8682.01, "total_tokens": 21409536} +{"current_steps": 31785, "total_steps": 204665, "loss": 0.133, "lr": 1.981429918332027e-06, "epoch": 0.7765128380524271, "percentage": 15.53, "elapsed_time": "0:41:06", "remaining_time": "3:43:34", "throughput": 8682.2, "total_tokens": 21413056} +{"current_steps": 31790, "total_steps": 204665, "loss": 0.0619, "lr": 1.981413556724723e-06, "epoch": 0.7766349888842743, "percentage": 15.53, "elapsed_time": "0:41:06", "remaining_time": "3:43:33", "throughput": 8682.5, "total_tokens": 21416896} +{"current_steps": 31795, "total_steps": 204665, "loss": 0.1494, "lr": 1.9813971879803155e-06, "epoch": 0.7767571397161215, "percentage": 15.54, "elapsed_time": "0:41:07", "remaining_time": "3:43:33", "throughput": 8682.59, "total_tokens": 21420096} +{"current_steps": 31800, "total_steps": 204665, "loss": 0.0836, "lr": 1.9813808120989238e-06, "epoch": 0.7768792905479687, "percentage": 15.54, "elapsed_time": "0:41:07", "remaining_time": "3:43:32", "throughput": 8682.72, "total_tokens": 21423424} +{"current_steps": 31805, "total_steps": 204665, "loss": 0.0432, "lr": 1.981364429080667e-06, "epoch": 0.7770014413798157, "percentage": 15.54, "elapsed_time": "0:41:07", "remaining_time": "3:43:31", "throughput": 8682.84, "total_tokens": 21426688} +{"current_steps": 31810, "total_steps": 204665, "loss": 0.1327, "lr": 1.9813480389256643e-06, "epoch": 0.7771235922116629, "percentage": 15.54, "elapsed_time": "0:41:08", "remaining_time": "3:43:31", "throughput": 8682.97, "total_tokens": 21430016} +{"current_steps": 31815, "total_steps": 204665, "loss": 0.1589, "lr": 1.9813316416340345e-06, "epoch": 0.7772457430435101, "percentage": 15.54, "elapsed_time": "0:41:08", "remaining_time": "3:43:30", "throughput": 8683.27, "total_tokens": 21433856} +{"current_steps": 31820, "total_steps": 204665, "loss": 0.1383, "lr": 1.981315237205897e-06, "epoch": 0.7773678938753573, "percentage": 15.55, "elapsed_time": "0:41:08", "remaining_time": "3:43:30", "throughput": 8683.37, "total_tokens": 21437120} +{"current_steps": 31825, "total_steps": 204665, "loss": 0.1419, "lr": 1.9812988256413715e-06, "epoch": 0.7774900447072045, "percentage": 15.55, "elapsed_time": "0:41:09", "remaining_time": "3:43:29", "throughput": 8683.58, "total_tokens": 21440704} +{"current_steps": 31830, "total_steps": 204665, "loss": 0.2094, "lr": 1.9812824069405766e-06, "epoch": 0.7776121955390516, "percentage": 15.55, "elapsed_time": "0:41:09", "remaining_time": "3:43:29", "throughput": 8683.84, "total_tokens": 21444416} +{"current_steps": 31835, "total_steps": 204665, "loss": 0.1335, "lr": 1.981265981103632e-06, "epoch": 0.7777343463708988, "percentage": 15.55, "elapsed_time": "0:41:09", "remaining_time": "3:43:28", "throughput": 8683.91, "total_tokens": 21447616} +{"current_steps": 31840, "total_steps": 204665, "loss": 0.0973, "lr": 1.9812495481306577e-06, "epoch": 0.777856497202746, "percentage": 15.56, "elapsed_time": "0:41:10", "remaining_time": "3:43:27", "throughput": 8683.98, "total_tokens": 21450752} +{"current_steps": 31845, "total_steps": 204665, "loss": 0.1563, "lr": 1.9812331080217726e-06, "epoch": 0.7779786480345932, "percentage": 15.56, "elapsed_time": "0:41:10", "remaining_time": "3:43:27", "throughput": 8684.03, "total_tokens": 21453888} +{"current_steps": 31850, "total_steps": 204665, "loss": 0.0927, "lr": 1.9812166607770965e-06, "epoch": 0.7781007988664402, "percentage": 15.56, "elapsed_time": "0:41:10", "remaining_time": "3:43:26", "throughput": 8684.15, "total_tokens": 21457216} +{"current_steps": 31855, "total_steps": 204665, "loss": 0.155, "lr": 1.981200206396749e-06, "epoch": 0.7782229496982874, "percentage": 15.56, "elapsed_time": "0:41:11", "remaining_time": "3:43:26", "throughput": 8684.51, "total_tokens": 21461248} +{"current_steps": 31860, "total_steps": 204665, "loss": 0.0909, "lr": 1.981183744880849e-06, "epoch": 0.7783451005301346, "percentage": 15.57, "elapsed_time": "0:41:11", "remaining_time": "3:43:25", "throughput": 8684.76, "total_tokens": 21464960} +{"current_steps": 31865, "total_steps": 204665, "loss": 0.0728, "lr": 1.9811672762295176e-06, "epoch": 0.7784672513619818, "percentage": 15.57, "elapsed_time": "0:41:11", "remaining_time": "3:43:24", "throughput": 8684.8, "total_tokens": 21468032} +{"current_steps": 31870, "total_steps": 204665, "loss": 0.0325, "lr": 1.9811508004428737e-06, "epoch": 0.778589402193829, "percentage": 15.57, "elapsed_time": "0:41:12", "remaining_time": "3:43:24", "throughput": 8685.06, "total_tokens": 21471744} +{"current_steps": 31875, "total_steps": 204665, "loss": 0.129, "lr": 1.981134317521037e-06, "epoch": 0.7787115530256761, "percentage": 15.57, "elapsed_time": "0:41:12", "remaining_time": "3:43:23", "throughput": 8685.36, "total_tokens": 21475584} +{"current_steps": 31880, "total_steps": 204665, "loss": 0.1179, "lr": 1.981117827464128e-06, "epoch": 0.7788337038575233, "percentage": 15.58, "elapsed_time": "0:41:12", "remaining_time": "3:43:23", "throughput": 8685.42, "total_tokens": 21478720} +{"current_steps": 31885, "total_steps": 204665, "loss": 0.0984, "lr": 1.981101330272266e-06, "epoch": 0.7789558546893705, "percentage": 15.58, "elapsed_time": "0:41:13", "remaining_time": "3:43:22", "throughput": 8685.56, "total_tokens": 21482112} +{"current_steps": 31890, "total_steps": 204665, "loss": 0.0813, "lr": 1.9810848259455716e-06, "epoch": 0.7790780055212176, "percentage": 15.58, "elapsed_time": "0:41:13", "remaining_time": "3:43:21", "throughput": 8685.77, "total_tokens": 21485696} +{"current_steps": 31895, "total_steps": 204665, "loss": 0.0913, "lr": 1.981068314484164e-06, "epoch": 0.7792001563530647, "percentage": 15.58, "elapsed_time": "0:41:14", "remaining_time": "3:43:21", "throughput": 8685.96, "total_tokens": 21489216} +{"current_steps": 31900, "total_steps": 204665, "loss": 0.1576, "lr": 1.981051795888164e-06, "epoch": 0.7793223071849119, "percentage": 15.59, "elapsed_time": "0:41:14", "remaining_time": "3:43:20", "throughput": 8686.17, "total_tokens": 21492800} +{"current_steps": 31905, "total_steps": 204665, "loss": 0.1572, "lr": 1.9810352701576917e-06, "epoch": 0.7794444580167591, "percentage": 15.59, "elapsed_time": "0:41:14", "remaining_time": "3:43:20", "throughput": 8686.17, "total_tokens": 21495744} +{"current_steps": 31910, "total_steps": 204665, "loss": 0.133, "lr": 1.981018737292867e-06, "epoch": 0.7795666088486063, "percentage": 15.59, "elapsed_time": "0:41:15", "remaining_time": "3:43:19", "throughput": 8686.26, "total_tokens": 21498944} +{"current_steps": 31915, "total_steps": 204665, "loss": 0.0848, "lr": 1.98100219729381e-06, "epoch": 0.7796887596804534, "percentage": 15.59, "elapsed_time": "0:41:15", "remaining_time": "3:43:18", "throughput": 8686.42, "total_tokens": 21502400} +{"current_steps": 31920, "total_steps": 204665, "loss": 0.2272, "lr": 1.980985650160641e-06, "epoch": 0.7798109105123006, "percentage": 15.6, "elapsed_time": "0:41:15", "remaining_time": "3:43:18", "throughput": 8686.58, "total_tokens": 21505856} +{"current_steps": 31925, "total_steps": 204665, "loss": 0.0443, "lr": 1.9809690958934804e-06, "epoch": 0.7799330613441477, "percentage": 15.6, "elapsed_time": "0:41:16", "remaining_time": "3:43:17", "throughput": 8686.68, "total_tokens": 21509056} +{"current_steps": 31930, "total_steps": 204665, "loss": 0.1034, "lr": 1.980952534492449e-06, "epoch": 0.7800552121759949, "percentage": 15.6, "elapsed_time": "0:41:16", "remaining_time": "3:43:17", "throughput": 8686.88, "total_tokens": 21512640} +{"current_steps": 31935, "total_steps": 204665, "loss": 0.1021, "lr": 1.980935965957667e-06, "epoch": 0.7801773630078421, "percentage": 15.6, "elapsed_time": "0:41:16", "remaining_time": "3:43:16", "throughput": 8686.98, "total_tokens": 21515904} +{"current_steps": 31940, "total_steps": 204665, "loss": 0.1275, "lr": 1.9809193902892548e-06, "epoch": 0.7802995138396892, "percentage": 15.61, "elapsed_time": "0:41:17", "remaining_time": "3:43:15", "throughput": 8687.05, "total_tokens": 21519040} +{"current_steps": 31945, "total_steps": 204665, "loss": 0.054, "lr": 1.980902807487333e-06, "epoch": 0.7804216646715364, "percentage": 15.61, "elapsed_time": "0:41:17", "remaining_time": "3:43:15", "throughput": 8687.17, "total_tokens": 21522368} +{"current_steps": 31950, "total_steps": 204665, "loss": 0.0054, "lr": 1.980886217552022e-06, "epoch": 0.7805438155033836, "percentage": 15.61, "elapsed_time": "0:41:17", "remaining_time": "3:43:14", "throughput": 8687.3, "total_tokens": 21525696} +{"current_steps": 31955, "total_steps": 204665, "loss": 0.0705, "lr": 1.9808696204834427e-06, "epoch": 0.7806659663352308, "percentage": 15.61, "elapsed_time": "0:41:18", "remaining_time": "3:43:14", "throughput": 8687.52, "total_tokens": 21529344} +{"current_steps": 31960, "total_steps": 204665, "loss": 0.1702, "lr": 1.9808530162817153e-06, "epoch": 0.7807881171670779, "percentage": 15.62, "elapsed_time": "0:41:18", "remaining_time": "3:43:13", "throughput": 8687.74, "total_tokens": 21532992} +{"current_steps": 31965, "total_steps": 204665, "loss": 0.0475, "lr": 1.9808364049469613e-06, "epoch": 0.780910267998925, "percentage": 15.62, "elapsed_time": "0:41:18", "remaining_time": "3:43:12", "throughput": 8687.93, "total_tokens": 21536512} +{"current_steps": 31970, "total_steps": 204665, "loss": 0.2233, "lr": 1.980819786479301e-06, "epoch": 0.7810324188307722, "percentage": 15.62, "elapsed_time": "0:41:19", "remaining_time": "3:43:12", "throughput": 8688.13, "total_tokens": 21540096} +{"current_steps": 31975, "total_steps": 204665, "loss": 0.116, "lr": 1.9808031608788557e-06, "epoch": 0.7811545696626194, "percentage": 15.62, "elapsed_time": "0:41:19", "remaining_time": "3:43:11", "throughput": 8688.41, "total_tokens": 21543936} +{"current_steps": 31980, "total_steps": 204665, "loss": 0.1117, "lr": 1.980786528145746e-06, "epoch": 0.7812767204944666, "percentage": 15.63, "elapsed_time": "0:41:19", "remaining_time": "3:43:11", "throughput": 8688.56, "total_tokens": 21547328} +{"current_steps": 31985, "total_steps": 204665, "loss": 0.1172, "lr": 1.9807698882800924e-06, "epoch": 0.7813988713263137, "percentage": 15.63, "elapsed_time": "0:41:20", "remaining_time": "3:43:10", "throughput": 8688.67, "total_tokens": 21550592} +{"current_steps": 31990, "total_steps": 204665, "loss": 0.1161, "lr": 1.9807532412820165e-06, "epoch": 0.7815210221581609, "percentage": 15.63, "elapsed_time": "0:41:20", "remaining_time": "3:43:10", "throughput": 8688.83, "total_tokens": 21554048} +{"current_steps": 31995, "total_steps": 204665, "loss": 0.1212, "lr": 1.9807365871516394e-06, "epoch": 0.7816431729900081, "percentage": 15.63, "elapsed_time": "0:41:21", "remaining_time": "3:43:09", "throughput": 8688.97, "total_tokens": 21557440} +{"current_steps": 32000, "total_steps": 204665, "loss": 0.0333, "lr": 1.9807199258890823e-06, "epoch": 0.7817653238218553, "percentage": 15.64, "elapsed_time": "0:41:21", "remaining_time": "3:43:08", "throughput": 8689.27, "total_tokens": 21561280} +{"current_steps": 32005, "total_steps": 204665, "loss": 0.0678, "lr": 1.980703257494466e-06, "epoch": 0.7818874746537023, "percentage": 15.64, "elapsed_time": "0:41:21", "remaining_time": "3:43:08", "throughput": 8689.41, "total_tokens": 21564672} +{"current_steps": 32010, "total_steps": 204665, "loss": 0.0862, "lr": 1.9806865819679116e-06, "epoch": 0.7820096254855495, "percentage": 15.64, "elapsed_time": "0:41:22", "remaining_time": "3:43:07", "throughput": 8689.77, "total_tokens": 21568704} +{"current_steps": 32015, "total_steps": 204665, "loss": 0.026, "lr": 1.9806698993095407e-06, "epoch": 0.7821317763173967, "percentage": 15.64, "elapsed_time": "0:41:22", "remaining_time": "3:43:07", "throughput": 8689.94, "total_tokens": 21572160} +{"current_steps": 32020, "total_steps": 204665, "loss": 0.247, "lr": 1.9806532095194742e-06, "epoch": 0.7822539271492439, "percentage": 15.65, "elapsed_time": "0:41:22", "remaining_time": "3:43:06", "throughput": 8690.14, "total_tokens": 21575680} +{"current_steps": 32025, "total_steps": 204665, "loss": 0.1509, "lr": 1.980636512597834e-06, "epoch": 0.7823760779810911, "percentage": 15.65, "elapsed_time": "0:41:23", "remaining_time": "3:43:06", "throughput": 8690.28, "total_tokens": 21579072} +{"current_steps": 32030, "total_steps": 204665, "loss": 0.1559, "lr": 1.9806198085447417e-06, "epoch": 0.7824982288129382, "percentage": 15.65, "elapsed_time": "0:41:23", "remaining_time": "3:43:05", "throughput": 8690.49, "total_tokens": 21582656} +{"current_steps": 32035, "total_steps": 204665, "loss": 0.1118, "lr": 1.9806030973603183e-06, "epoch": 0.7826203796447854, "percentage": 15.65, "elapsed_time": "0:41:23", "remaining_time": "3:43:04", "throughput": 8690.5, "total_tokens": 21585600} +{"current_steps": 32040, "total_steps": 204665, "loss": 0.1132, "lr": 1.980586379044685e-06, "epoch": 0.7827425304766326, "percentage": 15.65, "elapsed_time": "0:41:24", "remaining_time": "3:43:04", "throughput": 8690.63, "total_tokens": 21588928} +{"current_steps": 32045, "total_steps": 204665, "loss": 0.1005, "lr": 1.9805696535979643e-06, "epoch": 0.7828646813084797, "percentage": 15.66, "elapsed_time": "0:41:24", "remaining_time": "3:43:03", "throughput": 8690.63, "total_tokens": 21591872} +{"current_steps": 32050, "total_steps": 204665, "loss": 0.0973, "lr": 1.980552921020277e-06, "epoch": 0.7829868321403268, "percentage": 15.66, "elapsed_time": "0:41:24", "remaining_time": "3:43:02", "throughput": 8690.84, "total_tokens": 21595456} +{"current_steps": 32055, "total_steps": 204665, "loss": 0.0618, "lr": 1.980536181311745e-06, "epoch": 0.783108982972174, "percentage": 15.66, "elapsed_time": "0:41:25", "remaining_time": "3:43:02", "throughput": 8690.79, "total_tokens": 21598208} +{"current_steps": 32060, "total_steps": 204665, "loss": 0.148, "lr": 1.9805194344724906e-06, "epoch": 0.7832311338040212, "percentage": 15.66, "elapsed_time": "0:41:25", "remaining_time": "3:43:01", "throughput": 8690.79, "total_tokens": 21601152} +{"current_steps": 32065, "total_steps": 204665, "loss": 0.0887, "lr": 1.980502680502635e-06, "epoch": 0.7833532846358684, "percentage": 15.67, "elapsed_time": "0:41:25", "remaining_time": "3:43:00", "throughput": 8690.84, "total_tokens": 21604224} +{"current_steps": 32070, "total_steps": 204665, "loss": 0.1157, "lr": 1.9804859194023e-06, "epoch": 0.7834754354677156, "percentage": 15.67, "elapsed_time": "0:41:26", "remaining_time": "3:43:00", "throughput": 8690.85, "total_tokens": 21607168} +{"current_steps": 32075, "total_steps": 204665, "loss": 0.1083, "lr": 1.980469151171608e-06, "epoch": 0.7835975862995627, "percentage": 15.67, "elapsed_time": "0:41:26", "remaining_time": "3:42:59", "throughput": 8690.87, "total_tokens": 21610176} +{"current_steps": 32080, "total_steps": 204665, "loss": 0.1411, "lr": 1.9804523758106805e-06, "epoch": 0.7837197371314099, "percentage": 15.67, "elapsed_time": "0:41:26", "remaining_time": "3:42:58", "throughput": 8690.91, "total_tokens": 21613184} +{"current_steps": 32085, "total_steps": 204665, "loss": 0.1508, "lr": 1.9804355933196397e-06, "epoch": 0.783841887963257, "percentage": 15.68, "elapsed_time": "0:41:27", "remaining_time": "3:42:58", "throughput": 8691.01, "total_tokens": 21616384} +{"current_steps": 32090, "total_steps": 204665, "loss": 0.0559, "lr": 1.9804188036986068e-06, "epoch": 0.7839640387951042, "percentage": 15.68, "elapsed_time": "0:41:27", "remaining_time": "3:42:57", "throughput": 8691.17, "total_tokens": 21619840} +{"current_steps": 32095, "total_steps": 204665, "loss": 0.0879, "lr": 1.9804020069477058e-06, "epoch": 0.7840861896269513, "percentage": 15.68, "elapsed_time": "0:41:27", "remaining_time": "3:42:57", "throughput": 8691.24, "total_tokens": 21622976} +{"current_steps": 32100, "total_steps": 204665, "loss": 0.0968, "lr": 1.980385203067057e-06, "epoch": 0.7842083404587985, "percentage": 15.68, "elapsed_time": "0:41:28", "remaining_time": "3:42:56", "throughput": 8691.4, "total_tokens": 21626432} +{"current_steps": 32105, "total_steps": 204665, "loss": 0.0054, "lr": 1.9803683920567832e-06, "epoch": 0.7843304912906457, "percentage": 15.69, "elapsed_time": "0:41:28", "remaining_time": "3:42:55", "throughput": 8691.48, "total_tokens": 21629632} +{"current_steps": 32110, "total_steps": 204665, "loss": 0.1798, "lr": 1.9803515739170073e-06, "epoch": 0.7844526421224929, "percentage": 15.69, "elapsed_time": "0:41:28", "remaining_time": "3:42:55", "throughput": 8691.54, "total_tokens": 21632768} +{"current_steps": 32115, "total_steps": 204665, "loss": 0.0212, "lr": 1.9803347486478508e-06, "epoch": 0.7845747929543401, "percentage": 15.69, "elapsed_time": "0:41:29", "remaining_time": "3:42:54", "throughput": 8691.61, "total_tokens": 21635904} +{"current_steps": 32120, "total_steps": 204665, "loss": 0.1661, "lr": 1.980317916249436e-06, "epoch": 0.7846969437861872, "percentage": 15.69, "elapsed_time": "0:41:29", "remaining_time": "3:42:54", "throughput": 8691.72, "total_tokens": 21639168} +{"current_steps": 32125, "total_steps": 204665, "loss": 0.1302, "lr": 1.9803010767218864e-06, "epoch": 0.7848190946180343, "percentage": 15.7, "elapsed_time": "0:41:29", "remaining_time": "3:42:53", "throughput": 8691.82, "total_tokens": 21642432} +{"current_steps": 32130, "total_steps": 204665, "loss": 0.0673, "lr": 1.980284230065323e-06, "epoch": 0.7849412454498815, "percentage": 15.7, "elapsed_time": "0:41:30", "remaining_time": "3:42:52", "throughput": 8691.99, "total_tokens": 21645888} +{"current_steps": 32135, "total_steps": 204665, "loss": 0.3088, "lr": 1.9802673762798696e-06, "epoch": 0.7850633962817287, "percentage": 15.7, "elapsed_time": "0:41:30", "remaining_time": "3:42:52", "throughput": 8692.11, "total_tokens": 21649216} +{"current_steps": 32140, "total_steps": 204665, "loss": 0.1599, "lr": 1.9802505153656477e-06, "epoch": 0.7851855471135758, "percentage": 15.7, "elapsed_time": "0:41:31", "remaining_time": "3:42:51", "throughput": 8692.19, "total_tokens": 21652416} +{"current_steps": 32145, "total_steps": 204665, "loss": 0.0481, "lr": 1.9802336473227804e-06, "epoch": 0.785307697945423, "percentage": 15.71, "elapsed_time": "0:41:31", "remaining_time": "3:42:50", "throughput": 8692.34, "total_tokens": 21655808} +{"current_steps": 32150, "total_steps": 204665, "loss": 0.2222, "lr": 1.9802167721513906e-06, "epoch": 0.7854298487772702, "percentage": 15.71, "elapsed_time": "0:41:31", "remaining_time": "3:42:50", "throughput": 8692.39, "total_tokens": 21658880} +{"current_steps": 32155, "total_steps": 204665, "loss": 0.1291, "lr": 1.9801998898516006e-06, "epoch": 0.7855519996091174, "percentage": 15.71, "elapsed_time": "0:41:32", "remaining_time": "3:42:49", "throughput": 8692.6, "total_tokens": 21662464} +{"current_steps": 32160, "total_steps": 204665, "loss": 0.089, "lr": 1.9801830004235333e-06, "epoch": 0.7856741504409646, "percentage": 15.71, "elapsed_time": "0:41:32", "remaining_time": "3:42:49", "throughput": 8692.69, "total_tokens": 21665664} +{"current_steps": 32165, "total_steps": 204665, "loss": 0.11, "lr": 1.9801661038673123e-06, "epoch": 0.7857963012728116, "percentage": 15.72, "elapsed_time": "0:41:32", "remaining_time": "3:42:48", "throughput": 8692.79, "total_tokens": 21668928} +{"current_steps": 32170, "total_steps": 204665, "loss": 0.0739, "lr": 1.9801492001830587e-06, "epoch": 0.7859184521046588, "percentage": 15.72, "elapsed_time": "0:41:33", "remaining_time": "3:42:47", "throughput": 8693.12, "total_tokens": 21672832} +{"current_steps": 32175, "total_steps": 204665, "loss": 0.0536, "lr": 1.980132289370897e-06, "epoch": 0.786040602936506, "percentage": 15.72, "elapsed_time": "0:41:33", "remaining_time": "3:42:47", "throughput": 8693.39, "total_tokens": 21676608} +{"current_steps": 32180, "total_steps": 204665, "loss": 0.1529, "lr": 1.98011537143095e-06, "epoch": 0.7861627537683532, "percentage": 15.72, "elapsed_time": "0:41:33", "remaining_time": "3:42:46", "throughput": 8693.43, "total_tokens": 21679680} +{"current_steps": 32185, "total_steps": 204665, "loss": 0.1392, "lr": 1.98009844636334e-06, "epoch": 0.7862849046002003, "percentage": 15.73, "elapsed_time": "0:41:34", "remaining_time": "3:42:46", "throughput": 8693.43, "total_tokens": 21682624} +{"current_steps": 32190, "total_steps": 204665, "loss": 0.1319, "lr": 1.9800815141681902e-06, "epoch": 0.7864070554320475, "percentage": 15.73, "elapsed_time": "0:41:34", "remaining_time": "3:42:45", "throughput": 8693.45, "total_tokens": 21685632} +{"current_steps": 32195, "total_steps": 204665, "loss": 0.2551, "lr": 1.9800645748456247e-06, "epoch": 0.7865292062638947, "percentage": 15.73, "elapsed_time": "0:41:34", "remaining_time": "3:42:44", "throughput": 8693.6, "total_tokens": 21689024} +{"current_steps": 32200, "total_steps": 204665, "loss": 0.101, "lr": 1.9800476283957656e-06, "epoch": 0.7866513570957419, "percentage": 15.73, "elapsed_time": "0:41:35", "remaining_time": "3:42:44", "throughput": 8693.75, "total_tokens": 21692416} +{"current_steps": 32205, "total_steps": 204665, "loss": 0.105, "lr": 1.9800306748187367e-06, "epoch": 0.7867735079275889, "percentage": 15.74, "elapsed_time": "0:41:35", "remaining_time": "3:42:43", "throughput": 8693.81, "total_tokens": 21695552} +{"current_steps": 32210, "total_steps": 204665, "loss": 0.0228, "lr": 1.9800137141146612e-06, "epoch": 0.7868956587594361, "percentage": 15.74, "elapsed_time": "0:41:35", "remaining_time": "3:42:43", "throughput": 8693.89, "total_tokens": 21698752} +{"current_steps": 32215, "total_steps": 204665, "loss": 0.131, "lr": 1.979996746283662e-06, "epoch": 0.7870178095912833, "percentage": 15.74, "elapsed_time": "0:41:36", "remaining_time": "3:42:42", "throughput": 8694.1, "total_tokens": 21702336} +{"current_steps": 32220, "total_steps": 204665, "loss": 0.0525, "lr": 1.9799797713258634e-06, "epoch": 0.7871399604231305, "percentage": 15.74, "elapsed_time": "0:41:36", "remaining_time": "3:42:41", "throughput": 8694.29, "total_tokens": 21705856} +{"current_steps": 32225, "total_steps": 204665, "loss": 0.11, "lr": 1.9799627892413876e-06, "epoch": 0.7872621112549777, "percentage": 15.75, "elapsed_time": "0:41:36", "remaining_time": "3:42:41", "throughput": 8694.38, "total_tokens": 21709056} +{"current_steps": 32230, "total_steps": 204665, "loss": 0.1699, "lr": 1.9799458000303594e-06, "epoch": 0.7873842620868248, "percentage": 15.75, "elapsed_time": "0:41:37", "remaining_time": "3:42:40", "throughput": 8694.65, "total_tokens": 21712832} +{"current_steps": 32235, "total_steps": 204665, "loss": 0.1034, "lr": 1.9799288036929016e-06, "epoch": 0.787506412918672, "percentage": 15.75, "elapsed_time": "0:41:37", "remaining_time": "3:42:40", "throughput": 8694.81, "total_tokens": 21716288} +{"current_steps": 32240, "total_steps": 204665, "loss": 0.0651, "lr": 1.979911800229138e-06, "epoch": 0.7876285637505192, "percentage": 15.75, "elapsed_time": "0:41:37", "remaining_time": "3:42:39", "throughput": 8695.01, "total_tokens": 21719872} +{"current_steps": 32245, "total_steps": 204665, "loss": 0.1812, "lr": 1.979894789639192e-06, "epoch": 0.7877507145823663, "percentage": 15.76, "elapsed_time": "0:41:38", "remaining_time": "3:42:38", "throughput": 8695.15, "total_tokens": 21723264} +{"current_steps": 32250, "total_steps": 204665, "loss": 0.0791, "lr": 1.9798777719231882e-06, "epoch": 0.7878728654142134, "percentage": 15.76, "elapsed_time": "0:41:38", "remaining_time": "3:42:38", "throughput": 8695.38, "total_tokens": 21726912} +{"current_steps": 32255, "total_steps": 204665, "loss": 0.0353, "lr": 1.979860747081249e-06, "epoch": 0.7879950162460606, "percentage": 15.76, "elapsed_time": "0:41:39", "remaining_time": "3:42:37", "throughput": 8695.71, "total_tokens": 21730880} +{"current_steps": 32260, "total_steps": 204665, "loss": 0.1154, "lr": 1.979843715113499e-06, "epoch": 0.7881171670779078, "percentage": 15.76, "elapsed_time": "0:41:39", "remaining_time": "3:42:37", "throughput": 8695.8, "total_tokens": 21734080} +{"current_steps": 32265, "total_steps": 204665, "loss": 0.2999, "lr": 1.9798266760200623e-06, "epoch": 0.788239317909755, "percentage": 15.76, "elapsed_time": "0:41:39", "remaining_time": "3:42:36", "throughput": 8695.82, "total_tokens": 21737088} +{"current_steps": 32270, "total_steps": 204665, "loss": 0.0086, "lr": 1.979809629801062e-06, "epoch": 0.7883614687416022, "percentage": 15.77, "elapsed_time": "0:41:40", "remaining_time": "3:42:35", "throughput": 8695.9, "total_tokens": 21740288} +{"current_steps": 32275, "total_steps": 204665, "loss": 0.1003, "lr": 1.979792576456623e-06, "epoch": 0.7884836195734493, "percentage": 15.77, "elapsed_time": "0:41:40", "remaining_time": "3:42:35", "throughput": 8695.96, "total_tokens": 21743424} +{"current_steps": 32280, "total_steps": 204665, "loss": 0.2502, "lr": 1.9797755159868687e-06, "epoch": 0.7886057704052964, "percentage": 15.77, "elapsed_time": "0:41:40", "remaining_time": "3:42:34", "throughput": 8695.96, "total_tokens": 21746368} +{"current_steps": 32285, "total_steps": 204665, "loss": 0.0359, "lr": 1.979758448391923e-06, "epoch": 0.7887279212371436, "percentage": 15.77, "elapsed_time": "0:41:41", "remaining_time": "3:42:34", "throughput": 8696.17, "total_tokens": 21749952} +{"current_steps": 32290, "total_steps": 204665, "loss": 0.1076, "lr": 1.9797413736719105e-06, "epoch": 0.7888500720689908, "percentage": 15.78, "elapsed_time": "0:41:41", "remaining_time": "3:42:33", "throughput": 8696.32, "total_tokens": 21753408} +{"current_steps": 32295, "total_steps": 204665, "loss": 0.1068, "lr": 1.9797242918269553e-06, "epoch": 0.7889722229008379, "percentage": 15.78, "elapsed_time": "0:41:41", "remaining_time": "3:42:33", "throughput": 8696.53, "total_tokens": 21756992} +{"current_steps": 32300, "total_steps": 204665, "loss": 0.0652, "lr": 1.9797072028571816e-06, "epoch": 0.7890943737326851, "percentage": 15.78, "elapsed_time": "0:41:42", "remaining_time": "3:42:32", "throughput": 8696.77, "total_tokens": 21760704} +{"current_steps": 32305, "total_steps": 204665, "loss": 0.1639, "lr": 1.9796901067627135e-06, "epoch": 0.7892165245645323, "percentage": 15.78, "elapsed_time": "0:41:42", "remaining_time": "3:42:31", "throughput": 8696.86, "total_tokens": 21763904} +{"current_steps": 32310, "total_steps": 204665, "loss": 0.1575, "lr": 1.9796730035436756e-06, "epoch": 0.7893386753963795, "percentage": 15.79, "elapsed_time": "0:41:42", "remaining_time": "3:42:31", "throughput": 8696.97, "total_tokens": 21767168} +{"current_steps": 32315, "total_steps": 204665, "loss": 0.09, "lr": 1.9796558932001923e-06, "epoch": 0.7894608262282267, "percentage": 15.79, "elapsed_time": "0:41:43", "remaining_time": "3:42:30", "throughput": 8697.16, "total_tokens": 21770688} +{"current_steps": 32320, "total_steps": 204665, "loss": 0.1841, "lr": 1.9796387757323873e-06, "epoch": 0.7895829770600737, "percentage": 15.79, "elapsed_time": "0:41:43", "remaining_time": "3:42:30", "throughput": 8697.33, "total_tokens": 21774144} +{"current_steps": 32325, "total_steps": 204665, "loss": 0.0075, "lr": 1.979621651140386e-06, "epoch": 0.7897051278919209, "percentage": 15.79, "elapsed_time": "0:41:43", "remaining_time": "3:42:29", "throughput": 8697.42, "total_tokens": 21777344} +{"current_steps": 32330, "total_steps": 204665, "loss": 0.166, "lr": 1.9796045194243123e-06, "epoch": 0.7898272787237681, "percentage": 15.8, "elapsed_time": "0:41:44", "remaining_time": "3:42:28", "throughput": 8697.51, "total_tokens": 21780544} +{"current_steps": 32335, "total_steps": 204665, "loss": 0.121, "lr": 1.9795873805842914e-06, "epoch": 0.7899494295556153, "percentage": 15.8, "elapsed_time": "0:41:44", "remaining_time": "3:42:28", "throughput": 8697.64, "total_tokens": 21783872} +{"current_steps": 32340, "total_steps": 204665, "loss": 0.1063, "lr": 1.9795702346204473e-06, "epoch": 0.7900715803874624, "percentage": 15.8, "elapsed_time": "0:41:44", "remaining_time": "3:42:27", "throughput": 8697.78, "total_tokens": 21787264} +{"current_steps": 32345, "total_steps": 204665, "loss": 0.1067, "lr": 1.9795530815329053e-06, "epoch": 0.7901937312193096, "percentage": 15.8, "elapsed_time": "0:41:45", "remaining_time": "3:42:26", "throughput": 8697.83, "total_tokens": 21790336} +{"current_steps": 32350, "total_steps": 204665, "loss": 0.0771, "lr": 1.979535921321789e-06, "epoch": 0.7903158820511568, "percentage": 15.81, "elapsed_time": "0:41:45", "remaining_time": "3:42:26", "throughput": 8697.9, "total_tokens": 21793536} +{"current_steps": 32355, "total_steps": 204665, "loss": 0.0313, "lr": 1.979518753987225e-06, "epoch": 0.790438032883004, "percentage": 15.81, "elapsed_time": "0:41:45", "remaining_time": "3:42:25", "throughput": 8697.86, "total_tokens": 21796352} +{"current_steps": 32360, "total_steps": 204665, "loss": 0.0862, "lr": 1.979501579529337e-06, "epoch": 0.7905601837148512, "percentage": 15.81, "elapsed_time": "0:41:46", "remaining_time": "3:42:25", "throughput": 8698.11, "total_tokens": 21800064} +{"current_steps": 32365, "total_steps": 204665, "loss": 0.1273, "lr": 1.9794843979482495e-06, "epoch": 0.7906823345466982, "percentage": 15.81, "elapsed_time": "0:41:46", "remaining_time": "3:42:24", "throughput": 8698.16, "total_tokens": 21803136} +{"current_steps": 32370, "total_steps": 204665, "loss": 0.2033, "lr": 1.9794672092440884e-06, "epoch": 0.7908044853785454, "percentage": 15.82, "elapsed_time": "0:41:46", "remaining_time": "3:42:23", "throughput": 8698.29, "total_tokens": 21806464} +{"current_steps": 32375, "total_steps": 204665, "loss": 0.0742, "lr": 1.9794500134169783e-06, "epoch": 0.7909266362103926, "percentage": 15.82, "elapsed_time": "0:41:47", "remaining_time": "3:42:23", "throughput": 8698.37, "total_tokens": 21809664} +{"current_steps": 32380, "total_steps": 204665, "loss": 0.1761, "lr": 1.979432810467044e-06, "epoch": 0.7910487870422398, "percentage": 15.82, "elapsed_time": "0:41:47", "remaining_time": "3:42:22", "throughput": 8698.44, "total_tokens": 21812800} +{"current_steps": 32385, "total_steps": 204665, "loss": 0.0591, "lr": 1.9794156003944115e-06, "epoch": 0.7911709378740869, "percentage": 15.82, "elapsed_time": "0:41:48", "remaining_time": "3:42:22", "throughput": 8698.7, "total_tokens": 21816576} +{"current_steps": 32390, "total_steps": 204665, "loss": 0.1364, "lr": 1.979398383199205e-06, "epoch": 0.7912930887059341, "percentage": 15.83, "elapsed_time": "0:41:48", "remaining_time": "3:42:21", "throughput": 8698.84, "total_tokens": 21819968} +{"current_steps": 32395, "total_steps": 204665, "loss": 0.1386, "lr": 1.9793811588815496e-06, "epoch": 0.7914152395377813, "percentage": 15.83, "elapsed_time": "0:41:48", "remaining_time": "3:42:20", "throughput": 8699.03, "total_tokens": 21823488} +{"current_steps": 32400, "total_steps": 204665, "loss": 0.1915, "lr": 1.9793639274415716e-06, "epoch": 0.7915373903696284, "percentage": 15.83, "elapsed_time": "0:41:49", "remaining_time": "3:42:20", "throughput": 8699.15, "total_tokens": 21826816} +{"current_steps": 32405, "total_steps": 204665, "loss": 0.109, "lr": 1.9793466888793953e-06, "epoch": 0.7916595412014756, "percentage": 15.83, "elapsed_time": "0:41:49", "remaining_time": "3:42:19", "throughput": 8699.29, "total_tokens": 21830208} +{"current_steps": 32410, "total_steps": 204665, "loss": 0.0359, "lr": 1.9793294431951467e-06, "epoch": 0.7917816920333227, "percentage": 15.84, "elapsed_time": "0:41:49", "remaining_time": "3:42:19", "throughput": 8699.34, "total_tokens": 21833344} +{"current_steps": 32415, "total_steps": 204665, "loss": 0.1417, "lr": 1.979312190388951e-06, "epoch": 0.7919038428651699, "percentage": 15.84, "elapsed_time": "0:41:50", "remaining_time": "3:42:18", "throughput": 8699.37, "total_tokens": 21836352} +{"current_steps": 32420, "total_steps": 204665, "loss": 0.1282, "lr": 1.9792949304609336e-06, "epoch": 0.7920259936970171, "percentage": 15.84, "elapsed_time": "0:41:50", "remaining_time": "3:42:17", "throughput": 8699.61, "total_tokens": 21840000} +{"current_steps": 32425, "total_steps": 204665, "loss": 0.1626, "lr": 1.97927766341122e-06, "epoch": 0.7921481445288643, "percentage": 15.84, "elapsed_time": "0:41:50", "remaining_time": "3:42:17", "throughput": 8699.71, "total_tokens": 21843264} +{"current_steps": 32430, "total_steps": 204665, "loss": 0.1898, "lr": 1.9792603892399357e-06, "epoch": 0.7922702953607114, "percentage": 15.85, "elapsed_time": "0:41:51", "remaining_time": "3:42:16", "throughput": 8700.03, "total_tokens": 21847168} +{"current_steps": 32435, "total_steps": 204665, "loss": 0.1549, "lr": 1.9792431079472067e-06, "epoch": 0.7923924461925586, "percentage": 15.85, "elapsed_time": "0:41:51", "remaining_time": "3:42:16", "throughput": 8700.35, "total_tokens": 21851136} +{"current_steps": 32440, "total_steps": 204665, "loss": 0.1009, "lr": 1.9792258195331583e-06, "epoch": 0.7925145970244057, "percentage": 15.85, "elapsed_time": "0:41:51", "remaining_time": "3:42:15", "throughput": 8700.56, "total_tokens": 21854720} +{"current_steps": 32445, "total_steps": 204665, "loss": 0.0791, "lr": 1.9792085239979163e-06, "epoch": 0.7926367478562529, "percentage": 15.85, "elapsed_time": "0:41:52", "remaining_time": "3:42:15", "throughput": 8700.72, "total_tokens": 21858176} +{"current_steps": 32450, "total_steps": 204665, "loss": 0.2024, "lr": 1.9791912213416065e-06, "epoch": 0.7927588986881, "percentage": 15.86, "elapsed_time": "0:41:52", "remaining_time": "3:42:14", "throughput": 8700.81, "total_tokens": 21861376} +{"current_steps": 32455, "total_steps": 204665, "loss": 0.0583, "lr": 1.9791739115643547e-06, "epoch": 0.7928810495199472, "percentage": 15.86, "elapsed_time": "0:41:52", "remaining_time": "3:42:13", "throughput": 8700.85, "total_tokens": 21864448} +{"current_steps": 32460, "total_steps": 204665, "loss": 0.1266, "lr": 1.9791565946662875e-06, "epoch": 0.7930032003517944, "percentage": 15.86, "elapsed_time": "0:41:53", "remaining_time": "3:42:13", "throughput": 8701.07, "total_tokens": 21868096} +{"current_steps": 32465, "total_steps": 204665, "loss": 0.1452, "lr": 1.9791392706475298e-06, "epoch": 0.7931253511836416, "percentage": 15.86, "elapsed_time": "0:41:53", "remaining_time": "3:42:12", "throughput": 8701.15, "total_tokens": 21871296} +{"current_steps": 32470, "total_steps": 204665, "loss": 0.0596, "lr": 1.979121939508208e-06, "epoch": 0.7932475020154888, "percentage": 15.86, "elapsed_time": "0:41:53", "remaining_time": "3:42:12", "throughput": 8701.42, "total_tokens": 21875072} +{"current_steps": 32475, "total_steps": 204665, "loss": 0.1029, "lr": 1.9791046012484478e-06, "epoch": 0.7933696528473358, "percentage": 15.87, "elapsed_time": "0:41:54", "remaining_time": "3:42:11", "throughput": 8701.58, "total_tokens": 21878528} +{"current_steps": 32480, "total_steps": 204665, "loss": 0.0529, "lr": 1.9790872558683755e-06, "epoch": 0.793491803679183, "percentage": 15.87, "elapsed_time": "0:41:54", "remaining_time": "3:42:10", "throughput": 8701.82, "total_tokens": 21882176} +{"current_steps": 32485, "total_steps": 204665, "loss": 0.1334, "lr": 1.9790699033681176e-06, "epoch": 0.7936139545110302, "percentage": 15.87, "elapsed_time": "0:41:55", "remaining_time": "3:42:10", "throughput": 8702.03, "total_tokens": 21885760} +{"current_steps": 32490, "total_steps": 204665, "loss": 0.3129, "lr": 1.9790525437478002e-06, "epoch": 0.7937361053428774, "percentage": 15.87, "elapsed_time": "0:41:55", "remaining_time": "3:42:09", "throughput": 8702.17, "total_tokens": 21889152} +{"current_steps": 32495, "total_steps": 204665, "loss": 0.0127, "lr": 1.9790351770075492e-06, "epoch": 0.7938582561747245, "percentage": 15.88, "elapsed_time": "0:41:55", "remaining_time": "3:42:09", "throughput": 8702.24, "total_tokens": 21892352} +{"current_steps": 32500, "total_steps": 204665, "loss": 0.1781, "lr": 1.9790178031474912e-06, "epoch": 0.7939804070065717, "percentage": 15.88, "elapsed_time": "0:41:56", "remaining_time": "3:42:08", "throughput": 8702.4, "total_tokens": 21895744} +{"current_steps": 32505, "total_steps": 204665, "loss": 0.0983, "lr": 1.9790004221677524e-06, "epoch": 0.7941025578384189, "percentage": 15.88, "elapsed_time": "0:41:56", "remaining_time": "3:42:07", "throughput": 8702.48, "total_tokens": 21898944} +{"current_steps": 32510, "total_steps": 204665, "loss": 0.1732, "lr": 1.978983034068459e-06, "epoch": 0.7942247086702661, "percentage": 15.88, "elapsed_time": "0:41:56", "remaining_time": "3:42:07", "throughput": 8702.65, "total_tokens": 21902400} +{"current_steps": 32515, "total_steps": 204665, "loss": 0.1392, "lr": 1.9789656388497376e-06, "epoch": 0.7943468595021133, "percentage": 15.89, "elapsed_time": "0:41:57", "remaining_time": "3:42:06", "throughput": 8702.77, "total_tokens": 21905728} +{"current_steps": 32520, "total_steps": 204665, "loss": 0.1258, "lr": 1.978948236511715e-06, "epoch": 0.7944690103339603, "percentage": 15.89, "elapsed_time": "0:41:57", "remaining_time": "3:42:06", "throughput": 8702.93, "total_tokens": 21909184} +{"current_steps": 32525, "total_steps": 204665, "loss": 0.0554, "lr": 1.9789308270545175e-06, "epoch": 0.7945911611658075, "percentage": 15.89, "elapsed_time": "0:41:57", "remaining_time": "3:42:05", "throughput": 8702.97, "total_tokens": 21912256} +{"current_steps": 32530, "total_steps": 204665, "loss": 0.0334, "lr": 1.9789134104782716e-06, "epoch": 0.7947133119976547, "percentage": 15.89, "elapsed_time": "0:41:58", "remaining_time": "3:42:04", "throughput": 8703.19, "total_tokens": 21915904} +{"current_steps": 32535, "total_steps": 204665, "loss": 0.2204, "lr": 1.9788959867831043e-06, "epoch": 0.7948354628295019, "percentage": 15.9, "elapsed_time": "0:41:58", "remaining_time": "3:42:04", "throughput": 8703.31, "total_tokens": 21919232} +{"current_steps": 32540, "total_steps": 204665, "loss": 0.1402, "lr": 1.9788785559691417e-06, "epoch": 0.794957613661349, "percentage": 15.9, "elapsed_time": "0:41:58", "remaining_time": "3:42:03", "throughput": 8703.28, "total_tokens": 21922048} +{"current_steps": 32545, "total_steps": 204665, "loss": 0.0586, "lr": 1.9788611180365114e-06, "epoch": 0.7950797644931962, "percentage": 15.9, "elapsed_time": "0:41:59", "remaining_time": "3:42:03", "throughput": 8703.45, "total_tokens": 21925568} +{"current_steps": 32550, "total_steps": 204665, "loss": 0.2114, "lr": 1.9788436729853395e-06, "epoch": 0.7952019153250434, "percentage": 15.9, "elapsed_time": "0:41:59", "remaining_time": "3:42:02", "throughput": 8703.51, "total_tokens": 21928704} +{"current_steps": 32555, "total_steps": 204665, "loss": 0.1629, "lr": 1.978826220815753e-06, "epoch": 0.7953240661568906, "percentage": 15.91, "elapsed_time": "0:41:59", "remaining_time": "3:42:01", "throughput": 8703.63, "total_tokens": 21932032} +{"current_steps": 32560, "total_steps": 204665, "loss": 0.1327, "lr": 1.9788087615278793e-06, "epoch": 0.7954462169887377, "percentage": 15.91, "elapsed_time": "0:42:00", "remaining_time": "3:42:01", "throughput": 8703.88, "total_tokens": 21935808} +{"current_steps": 32565, "total_steps": 204665, "loss": 0.0649, "lr": 1.9787912951218447e-06, "epoch": 0.7955683678205848, "percentage": 15.91, "elapsed_time": "0:42:00", "remaining_time": "3:42:00", "throughput": 8704.04, "total_tokens": 21939264} +{"current_steps": 32570, "total_steps": 204665, "loss": 0.1148, "lr": 1.978773821597777e-06, "epoch": 0.795690518652432, "percentage": 15.91, "elapsed_time": "0:42:00", "remaining_time": "3:42:00", "throughput": 8704.24, "total_tokens": 21942848} +{"current_steps": 32575, "total_steps": 204665, "loss": 0.0996, "lr": 1.978756340955802e-06, "epoch": 0.7958126694842792, "percentage": 15.92, "elapsed_time": "0:42:01", "remaining_time": "3:41:59", "throughput": 8704.31, "total_tokens": 21946048} +{"current_steps": 32580, "total_steps": 204665, "loss": 0.0786, "lr": 1.9787388531960488e-06, "epoch": 0.7959348203161264, "percentage": 15.92, "elapsed_time": "0:42:01", "remaining_time": "3:41:59", "throughput": 8704.53, "total_tokens": 21949696} +{"current_steps": 32585, "total_steps": 204665, "loss": 0.0497, "lr": 1.9787213583186423e-06, "epoch": 0.7960569711479735, "percentage": 15.92, "elapsed_time": "0:42:01", "remaining_time": "3:41:58", "throughput": 8704.67, "total_tokens": 21953088} +{"current_steps": 32590, "total_steps": 204665, "loss": 0.1569, "lr": 1.9787038563237117e-06, "epoch": 0.7961791219798207, "percentage": 15.92, "elapsed_time": "0:42:02", "remaining_time": "3:41:57", "throughput": 8704.74, "total_tokens": 21956224} +{"current_steps": 32595, "total_steps": 204665, "loss": 0.0616, "lr": 1.978686347211383e-06, "epoch": 0.7963012728116678, "percentage": 15.93, "elapsed_time": "0:42:02", "remaining_time": "3:41:57", "throughput": 8704.8, "total_tokens": 21959360} +{"current_steps": 32600, "total_steps": 204665, "loss": 0.1849, "lr": 1.9786688309817836e-06, "epoch": 0.796423423643515, "percentage": 15.93, "elapsed_time": "0:42:03", "remaining_time": "3:41:56", "throughput": 8704.87, "total_tokens": 21962496} +{"current_steps": 32605, "total_steps": 204665, "loss": 0.1986, "lr": 1.978651307635042e-06, "epoch": 0.7965455744753622, "percentage": 15.93, "elapsed_time": "0:42:03", "remaining_time": "3:41:56", "throughput": 8705.06, "total_tokens": 21966016} +{"current_steps": 32610, "total_steps": 204665, "loss": 0.114, "lr": 1.9786337771712845e-06, "epoch": 0.7966677253072093, "percentage": 15.93, "elapsed_time": "0:42:03", "remaining_time": "3:41:55", "throughput": 8705.32, "total_tokens": 21969792} +{"current_steps": 32615, "total_steps": 204665, "loss": 0.1428, "lr": 1.9786162395906388e-06, "epoch": 0.7967898761390565, "percentage": 15.94, "elapsed_time": "0:42:04", "remaining_time": "3:41:54", "throughput": 8705.47, "total_tokens": 21973184} +{"current_steps": 32620, "total_steps": 204665, "loss": 0.1996, "lr": 1.9785986948932324e-06, "epoch": 0.7969120269709037, "percentage": 15.94, "elapsed_time": "0:42:04", "remaining_time": "3:41:54", "throughput": 8705.68, "total_tokens": 21976768} +{"current_steps": 32625, "total_steps": 204665, "loss": 0.0833, "lr": 1.9785811430791932e-06, "epoch": 0.7970341778027509, "percentage": 15.94, "elapsed_time": "0:42:04", "remaining_time": "3:41:53", "throughput": 8705.91, "total_tokens": 21980480} +{"current_steps": 32630, "total_steps": 204665, "loss": 0.0975, "lr": 1.9785635841486492e-06, "epoch": 0.797156328634598, "percentage": 15.94, "elapsed_time": "0:42:05", "remaining_time": "3:41:53", "throughput": 8706.2, "total_tokens": 21984320} +{"current_steps": 32635, "total_steps": 204665, "loss": 0.0554, "lr": 1.9785460181017274e-06, "epoch": 0.7972784794664451, "percentage": 15.95, "elapsed_time": "0:42:05", "remaining_time": "3:41:52", "throughput": 8706.36, "total_tokens": 21987776} +{"current_steps": 32640, "total_steps": 204665, "loss": 0.1462, "lr": 1.9785284449385557e-06, "epoch": 0.7974006302982923, "percentage": 15.95, "elapsed_time": "0:42:05", "remaining_time": "3:41:52", "throughput": 8706.56, "total_tokens": 21991360} +{"current_steps": 32645, "total_steps": 204665, "loss": 0.1373, "lr": 1.978510864659262e-06, "epoch": 0.7975227811301395, "percentage": 15.95, "elapsed_time": "0:42:06", "remaining_time": "3:41:51", "throughput": 8706.72, "total_tokens": 21994816} +{"current_steps": 32650, "total_steps": 204665, "loss": 0.1573, "lr": 1.978493277263974e-06, "epoch": 0.7976449319619867, "percentage": 15.95, "elapsed_time": "0:42:06", "remaining_time": "3:41:50", "throughput": 8706.86, "total_tokens": 21998208} +{"current_steps": 32655, "total_steps": 204665, "loss": 0.081, "lr": 1.97847568275282e-06, "epoch": 0.7977670827938338, "percentage": 15.96, "elapsed_time": "0:42:06", "remaining_time": "3:41:50", "throughput": 8707.14, "total_tokens": 22002048} +{"current_steps": 32660, "total_steps": 204665, "loss": 0.0815, "lr": 1.978458081125927e-06, "epoch": 0.797889233625681, "percentage": 15.96, "elapsed_time": "0:42:07", "remaining_time": "3:41:49", "throughput": 8707.16, "total_tokens": 22005056} +{"current_steps": 32665, "total_steps": 204665, "loss": 0.1989, "lr": 1.978440472383424e-06, "epoch": 0.7980113844575282, "percentage": 15.96, "elapsed_time": "0:42:07", "remaining_time": "3:41:49", "throughput": 8707.26, "total_tokens": 22008320} +{"current_steps": 32670, "total_steps": 204665, "loss": 0.1015, "lr": 1.978422856525439e-06, "epoch": 0.7981335352893754, "percentage": 15.96, "elapsed_time": "0:42:07", "remaining_time": "3:41:48", "throughput": 8707.18, "total_tokens": 22011008} +{"current_steps": 32675, "total_steps": 204665, "loss": 0.1529, "lr": 1.9784052335520997e-06, "epoch": 0.7982556861212224, "percentage": 15.97, "elapsed_time": "0:42:08", "remaining_time": "3:41:47", "throughput": 8707.2, "total_tokens": 22014016} +{"current_steps": 32680, "total_steps": 204665, "loss": 0.0387, "lr": 1.978387603463534e-06, "epoch": 0.7983778369530696, "percentage": 15.97, "elapsed_time": "0:42:08", "remaining_time": "3:41:47", "throughput": 8707.37, "total_tokens": 22017472} +{"current_steps": 32685, "total_steps": 204665, "loss": 0.0815, "lr": 1.978369966259871e-06, "epoch": 0.7984999877849168, "percentage": 15.97, "elapsed_time": "0:42:08", "remaining_time": "3:41:46", "throughput": 8707.42, "total_tokens": 22020544} +{"current_steps": 32690, "total_steps": 204665, "loss": 0.0838, "lr": 1.978352321941238e-06, "epoch": 0.798622138616764, "percentage": 15.97, "elapsed_time": "0:42:09", "remaining_time": "3:41:46", "throughput": 8707.5, "total_tokens": 22023744} +{"current_steps": 32695, "total_steps": 204665, "loss": 0.0937, "lr": 1.978334670507764e-06, "epoch": 0.7987442894486112, "percentage": 15.97, "elapsed_time": "0:42:09", "remaining_time": "3:41:45", "throughput": 8707.59, "total_tokens": 22026944} +{"current_steps": 32700, "total_steps": 204665, "loss": 0.1423, "lr": 1.9783170119595775e-06, "epoch": 0.7988664402804583, "percentage": 15.98, "elapsed_time": "0:42:09", "remaining_time": "3:41:44", "throughput": 8707.67, "total_tokens": 22030144} +{"current_steps": 32705, "total_steps": 204665, "loss": 0.1669, "lr": 1.978299346296806e-06, "epoch": 0.7989885911123055, "percentage": 15.98, "elapsed_time": "0:42:10", "remaining_time": "3:41:44", "throughput": 8707.77, "total_tokens": 22033408} +{"current_steps": 32710, "total_steps": 204665, "loss": 0.1384, "lr": 1.9782816735195786e-06, "epoch": 0.7991107419441527, "percentage": 15.98, "elapsed_time": "0:42:10", "remaining_time": "3:41:43", "throughput": 8707.95, "total_tokens": 22036928} +{"current_steps": 32715, "total_steps": 204665, "loss": 0.201, "lr": 1.978263993628024e-06, "epoch": 0.7992328927759998, "percentage": 15.98, "elapsed_time": "0:42:11", "remaining_time": "3:41:43", "throughput": 8708.16, "total_tokens": 22040512} +{"current_steps": 32720, "total_steps": 204665, "loss": 0.0564, "lr": 1.9782463066222702e-06, "epoch": 0.7993550436078469, "percentage": 15.99, "elapsed_time": "0:42:11", "remaining_time": "3:41:42", "throughput": 8708.25, "total_tokens": 22043776} +{"current_steps": 32725, "total_steps": 204665, "loss": 0.1006, "lr": 1.9782286125024464e-06, "epoch": 0.7994771944396941, "percentage": 15.99, "elapsed_time": "0:42:11", "remaining_time": "3:41:41", "throughput": 8708.27, "total_tokens": 22046784} +{"current_steps": 32730, "total_steps": 204665, "loss": 0.125, "lr": 1.9782109112686812e-06, "epoch": 0.7995993452715413, "percentage": 15.99, "elapsed_time": "0:42:12", "remaining_time": "3:41:41", "throughput": 8708.32, "total_tokens": 22049856} +{"current_steps": 32735, "total_steps": 204665, "loss": 0.1607, "lr": 1.9781932029211027e-06, "epoch": 0.7997214961033885, "percentage": 15.99, "elapsed_time": "0:42:12", "remaining_time": "3:41:40", "throughput": 8708.4, "total_tokens": 22053056} +{"current_steps": 32740, "total_steps": 204665, "loss": 0.1071, "lr": 1.9781754874598403e-06, "epoch": 0.7998436469352356, "percentage": 16.0, "elapsed_time": "0:42:12", "remaining_time": "3:41:39", "throughput": 8708.44, "total_tokens": 22056128} +{"current_steps": 32745, "total_steps": 204665, "loss": 0.1883, "lr": 1.978157764885023e-06, "epoch": 0.7999657977670828, "percentage": 16.0, "elapsed_time": "0:42:13", "remaining_time": "3:41:39", "throughput": 8708.58, "total_tokens": 22059520} +{"current_steps": 32750, "total_steps": 204665, "loss": 0.0534, "lr": 1.9781400351967787e-06, "epoch": 0.80008794859893, "percentage": 16.0, "elapsed_time": "0:42:13", "remaining_time": "3:41:38", "throughput": 8708.81, "total_tokens": 22063168} +{"current_steps": 32755, "total_steps": 204665, "loss": 0.199, "lr": 1.9781222983952374e-06, "epoch": 0.8002100994307771, "percentage": 16.0, "elapsed_time": "0:42:13", "remaining_time": "3:41:38", "throughput": 8708.94, "total_tokens": 22066496} +{"current_steps": 32760, "total_steps": 204665, "loss": 0.0776, "lr": 1.9781045544805273e-06, "epoch": 0.8003322502626243, "percentage": 16.01, "elapsed_time": "0:42:14", "remaining_time": "3:41:37", "throughput": 8709.02, "total_tokens": 22069696} +{"current_steps": 32765, "total_steps": 204665, "loss": 0.0598, "lr": 1.9780868034527783e-06, "epoch": 0.8004544010944714, "percentage": 16.01, "elapsed_time": "0:42:14", "remaining_time": "3:41:37", "throughput": 8709.28, "total_tokens": 22073472} +{"current_steps": 32770, "total_steps": 204665, "loss": 0.1173, "lr": 1.9780690453121185e-06, "epoch": 0.8005765519263186, "percentage": 16.01, "elapsed_time": "0:42:14", "remaining_time": "3:41:36", "throughput": 8709.26, "total_tokens": 22076352} +{"current_steps": 32775, "total_steps": 204665, "loss": 0.0787, "lr": 1.978051280058678e-06, "epoch": 0.8006987027581658, "percentage": 16.01, "elapsed_time": "0:42:15", "remaining_time": "3:41:35", "throughput": 8709.43, "total_tokens": 22079808} +{"current_steps": 32780, "total_steps": 204665, "loss": 0.1477, "lr": 1.978033507692585e-06, "epoch": 0.800820853590013, "percentage": 16.02, "elapsed_time": "0:42:15", "remaining_time": "3:41:35", "throughput": 8709.49, "total_tokens": 22082944} +{"current_steps": 32785, "total_steps": 204665, "loss": 0.1523, "lr": 1.9780157282139697e-06, "epoch": 0.8009430044218601, "percentage": 16.02, "elapsed_time": "0:42:15", "remaining_time": "3:41:34", "throughput": 8709.63, "total_tokens": 22086336} +{"current_steps": 32790, "total_steps": 204665, "loss": 0.0947, "lr": 1.977997941622961e-06, "epoch": 0.8010651552537073, "percentage": 16.02, "elapsed_time": "0:42:16", "remaining_time": "3:41:33", "throughput": 8709.63, "total_tokens": 22089280} +{"current_steps": 32795, "total_steps": 204665, "loss": 0.1376, "lr": 1.9779801479196877e-06, "epoch": 0.8011873060855544, "percentage": 16.02, "elapsed_time": "0:42:16", "remaining_time": "3:41:33", "throughput": 8709.91, "total_tokens": 22093120} +{"current_steps": 32800, "total_steps": 204665, "loss": 0.1301, "lr": 1.97796234710428e-06, "epoch": 0.8013094569174016, "percentage": 16.03, "elapsed_time": "0:42:16", "remaining_time": "3:41:32", "throughput": 8710.1, "total_tokens": 22096640} +{"current_steps": 32805, "total_steps": 204665, "loss": 0.1151, "lr": 1.9779445391768673e-06, "epoch": 0.8014316077492488, "percentage": 16.03, "elapsed_time": "0:42:17", "remaining_time": "3:41:32", "throughput": 8710.16, "total_tokens": 22099776} +{"current_steps": 32810, "total_steps": 204665, "loss": 0.0475, "lr": 1.9779267241375786e-06, "epoch": 0.8015537585810959, "percentage": 16.03, "elapsed_time": "0:42:17", "remaining_time": "3:41:31", "throughput": 8710.22, "total_tokens": 22102912} +{"current_steps": 32815, "total_steps": 204665, "loss": 0.0806, "lr": 1.977908901986544e-06, "epoch": 0.8016759094129431, "percentage": 16.03, "elapsed_time": "0:42:17", "remaining_time": "3:41:30", "throughput": 8710.36, "total_tokens": 22106304} +{"current_steps": 32820, "total_steps": 204665, "loss": 0.0678, "lr": 1.9778910727238925e-06, "epoch": 0.8017980602447903, "percentage": 16.04, "elapsed_time": "0:42:18", "remaining_time": "3:41:30", "throughput": 8710.47, "total_tokens": 22109568} +{"current_steps": 32825, "total_steps": 204665, "loss": 0.1423, "lr": 1.9778732363497544e-06, "epoch": 0.8019202110766375, "percentage": 16.04, "elapsed_time": "0:42:18", "remaining_time": "3:41:29", "throughput": 8710.61, "total_tokens": 22112960} +{"current_steps": 32830, "total_steps": 204665, "loss": 0.0479, "lr": 1.977855392864259e-06, "epoch": 0.8020423619084845, "percentage": 16.04, "elapsed_time": "0:42:18", "remaining_time": "3:41:29", "throughput": 8710.67, "total_tokens": 22116096} +{"current_steps": 32835, "total_steps": 204665, "loss": 0.0336, "lr": 1.977837542267536e-06, "epoch": 0.8021645127403317, "percentage": 16.04, "elapsed_time": "0:42:19", "remaining_time": "3:41:28", "throughput": 8710.82, "total_tokens": 22119552} +{"current_steps": 32840, "total_steps": 204665, "loss": 0.0892, "lr": 1.9778196845597157e-06, "epoch": 0.8022866635721789, "percentage": 16.05, "elapsed_time": "0:42:19", "remaining_time": "3:41:27", "throughput": 8710.9, "total_tokens": 22122752} +{"current_steps": 32845, "total_steps": 204665, "loss": 0.0598, "lr": 1.977801819740928e-06, "epoch": 0.8024088144040261, "percentage": 16.05, "elapsed_time": "0:42:20", "remaining_time": "3:41:27", "throughput": 8710.99, "total_tokens": 22125952} +{"current_steps": 32850, "total_steps": 204665, "loss": 0.134, "lr": 1.9777839478113015e-06, "epoch": 0.8025309652358733, "percentage": 16.05, "elapsed_time": "0:42:20", "remaining_time": "3:41:26", "throughput": 8711.06, "total_tokens": 22129152} +{"current_steps": 32855, "total_steps": 204665, "loss": 0.0702, "lr": 1.9777660687709678e-06, "epoch": 0.8026531160677204, "percentage": 16.05, "elapsed_time": "0:42:20", "remaining_time": "3:41:26", "throughput": 8711.17, "total_tokens": 22132416} +{"current_steps": 32860, "total_steps": 204665, "loss": 0.1017, "lr": 1.977748182620056e-06, "epoch": 0.8027752668995676, "percentage": 16.06, "elapsed_time": "0:42:21", "remaining_time": "3:41:25", "throughput": 8711.3, "total_tokens": 22135808} +{"current_steps": 32865, "total_steps": 204665, "loss": 0.1627, "lr": 1.9777302893586966e-06, "epoch": 0.8028974177314148, "percentage": 16.06, "elapsed_time": "0:42:21", "remaining_time": "3:41:24", "throughput": 8711.36, "total_tokens": 22138944} +{"current_steps": 32870, "total_steps": 204665, "loss": 0.0645, "lr": 1.9777123889870197e-06, "epoch": 0.803019568563262, "percentage": 16.06, "elapsed_time": "0:42:21", "remaining_time": "3:41:24", "throughput": 8711.48, "total_tokens": 22142272} +{"current_steps": 32875, "total_steps": 204665, "loss": 0.1231, "lr": 1.9776944815051547e-06, "epoch": 0.803141719395109, "percentage": 16.06, "elapsed_time": "0:42:22", "remaining_time": "3:41:23", "throughput": 8711.56, "total_tokens": 22145472} +{"current_steps": 32880, "total_steps": 204665, "loss": 0.0995, "lr": 1.977676566913233e-06, "epoch": 0.8032638702269562, "percentage": 16.07, "elapsed_time": "0:42:22", "remaining_time": "3:41:23", "throughput": 8711.63, "total_tokens": 22148608} +{"current_steps": 32885, "total_steps": 204665, "loss": 0.1121, "lr": 1.9776586452113842e-06, "epoch": 0.8033860210588034, "percentage": 16.07, "elapsed_time": "0:42:22", "remaining_time": "3:41:22", "throughput": 8711.79, "total_tokens": 22152064} +{"current_steps": 32890, "total_steps": 204665, "loss": 0.098, "lr": 1.977640716399739e-06, "epoch": 0.8035081718906506, "percentage": 16.07, "elapsed_time": "0:42:23", "remaining_time": "3:41:21", "throughput": 8711.89, "total_tokens": 22155328} +{"current_steps": 32895, "total_steps": 204665, "loss": 0.0745, "lr": 1.9776227804784275e-06, "epoch": 0.8036303227224978, "percentage": 16.07, "elapsed_time": "0:42:23", "remaining_time": "3:41:21", "throughput": 8712.09, "total_tokens": 22158912} +{"current_steps": 32900, "total_steps": 204665, "loss": 0.0898, "lr": 1.9776048374475794e-06, "epoch": 0.8037524735543449, "percentage": 16.08, "elapsed_time": "0:42:23", "remaining_time": "3:41:20", "throughput": 8712.17, "total_tokens": 22162112} +{"current_steps": 32905, "total_steps": 204665, "loss": 0.1985, "lr": 1.9775868873073267e-06, "epoch": 0.8038746243861921, "percentage": 16.08, "elapsed_time": "0:42:24", "remaining_time": "3:41:20", "throughput": 8712.3, "total_tokens": 22165504} +{"current_steps": 32910, "total_steps": 204665, "loss": 0.038, "lr": 1.9775689300577995e-06, "epoch": 0.8039967752180393, "percentage": 16.08, "elapsed_time": "0:42:24", "remaining_time": "3:41:19", "throughput": 8712.42, "total_tokens": 22168832} +{"current_steps": 32915, "total_steps": 204665, "loss": 0.1404, "lr": 1.9775509656991277e-06, "epoch": 0.8041189260498864, "percentage": 16.08, "elapsed_time": "0:42:24", "remaining_time": "3:41:19", "throughput": 8712.52, "total_tokens": 22172096} +{"current_steps": 32920, "total_steps": 204665, "loss": 0.0826, "lr": 1.977532994231442e-06, "epoch": 0.8042410768817335, "percentage": 16.08, "elapsed_time": "0:42:25", "remaining_time": "3:41:18", "throughput": 8712.62, "total_tokens": 22175360} +{"current_steps": 32925, "total_steps": 204665, "loss": 0.0411, "lr": 1.9775150156548743e-06, "epoch": 0.8043632277135807, "percentage": 16.09, "elapsed_time": "0:42:25", "remaining_time": "3:41:17", "throughput": 8712.84, "total_tokens": 22179008} +{"current_steps": 32930, "total_steps": 204665, "loss": 0.1473, "lr": 1.977497029969554e-06, "epoch": 0.8044853785454279, "percentage": 16.09, "elapsed_time": "0:42:25", "remaining_time": "3:41:17", "throughput": 8712.96, "total_tokens": 22182336} +{"current_steps": 32935, "total_steps": 204665, "loss": 0.0851, "lr": 1.977479037175612e-06, "epoch": 0.8046075293772751, "percentage": 16.09, "elapsed_time": "0:42:26", "remaining_time": "3:41:16", "throughput": 8713.02, "total_tokens": 22185472} +{"current_steps": 32940, "total_steps": 204665, "loss": 0.2425, "lr": 1.97746103727318e-06, "epoch": 0.8047296802091223, "percentage": 16.09, "elapsed_time": "0:42:26", "remaining_time": "3:41:16", "throughput": 8713.23, "total_tokens": 22189120} +{"current_steps": 32945, "total_steps": 204665, "loss": 0.1671, "lr": 1.9774430302623887e-06, "epoch": 0.8048518310409694, "percentage": 16.1, "elapsed_time": "0:42:26", "remaining_time": "3:41:15", "throughput": 8713.46, "total_tokens": 22192768} +{"current_steps": 32950, "total_steps": 204665, "loss": 0.2252, "lr": 1.977425016143368e-06, "epoch": 0.8049739818728165, "percentage": 16.1, "elapsed_time": "0:42:27", "remaining_time": "3:41:14", "throughput": 8713.64, "total_tokens": 22196288} +{"current_steps": 32955, "total_steps": 204665, "loss": 0.1459, "lr": 1.9774069949162504e-06, "epoch": 0.8050961327046637, "percentage": 16.1, "elapsed_time": "0:42:27", "remaining_time": "3:41:14", "throughput": 8713.79, "total_tokens": 22199744} +{"current_steps": 32960, "total_steps": 204665, "loss": 0.132, "lr": 1.9773889665811657e-06, "epoch": 0.8052182835365109, "percentage": 16.1, "elapsed_time": "0:42:28", "remaining_time": "3:41:13", "throughput": 8714.06, "total_tokens": 22203520} +{"current_steps": 32965, "total_steps": 204665, "loss": 0.0947, "lr": 1.977370931138246e-06, "epoch": 0.805340434368358, "percentage": 16.11, "elapsed_time": "0:42:28", "remaining_time": "3:41:13", "throughput": 8714.21, "total_tokens": 22206976} +{"current_steps": 32970, "total_steps": 204665, "loss": 0.2265, "lr": 1.977352888587622e-06, "epoch": 0.8054625852002052, "percentage": 16.11, "elapsed_time": "0:42:28", "remaining_time": "3:41:12", "throughput": 8714.39, "total_tokens": 22210496} +{"current_steps": 32975, "total_steps": 204665, "loss": 0.09, "lr": 1.9773348389294243e-06, "epoch": 0.8055847360320524, "percentage": 16.11, "elapsed_time": "0:42:29", "remaining_time": "3:41:12", "throughput": 8714.5, "total_tokens": 22213760} +{"current_steps": 32980, "total_steps": 204665, "loss": 0.1072, "lr": 1.9773167821637855e-06, "epoch": 0.8057068868638996, "percentage": 16.11, "elapsed_time": "0:42:29", "remaining_time": "3:41:11", "throughput": 8714.63, "total_tokens": 22217152} +{"current_steps": 32985, "total_steps": 204665, "loss": 0.1145, "lr": 1.977298718290836e-06, "epoch": 0.8058290376957467, "percentage": 16.12, "elapsed_time": "0:42:29", "remaining_time": "3:41:11", "throughput": 8714.93, "total_tokens": 22221056} +{"current_steps": 32990, "total_steps": 204665, "loss": 0.0897, "lr": 1.9772806473107072e-06, "epoch": 0.8059511885275938, "percentage": 16.12, "elapsed_time": "0:42:30", "remaining_time": "3:41:10", "throughput": 8715.03, "total_tokens": 22224320} +{"current_steps": 32995, "total_steps": 204665, "loss": 0.1855, "lr": 1.977262569223531e-06, "epoch": 0.806073339359441, "percentage": 16.12, "elapsed_time": "0:42:30", "remaining_time": "3:41:09", "throughput": 8715.2, "total_tokens": 22227840} +{"current_steps": 33000, "total_steps": 204665, "loss": 0.0983, "lr": 1.977244484029438e-06, "epoch": 0.8061954901912882, "percentage": 16.12, "elapsed_time": "0:42:30", "remaining_time": "3:41:09", "throughput": 8715.26, "total_tokens": 22230976} +{"current_steps": 33005, "total_steps": 204665, "loss": 0.1356, "lr": 1.9772263917285606e-06, "epoch": 0.8063176410231354, "percentage": 16.13, "elapsed_time": "0:42:31", "remaining_time": "3:41:08", "throughput": 8715.42, "total_tokens": 22234432} +{"current_steps": 33010, "total_steps": 204665, "loss": 0.1449, "lr": 1.97720829232103e-06, "epoch": 0.8064397918549825, "percentage": 16.13, "elapsed_time": "0:42:31", "remaining_time": "3:41:08", "throughput": 8715.56, "total_tokens": 22237824} +{"current_steps": 33015, "total_steps": 204665, "loss": 0.1319, "lr": 1.9771901858069778e-06, "epoch": 0.8065619426868297, "percentage": 16.13, "elapsed_time": "0:42:31", "remaining_time": "3:41:07", "throughput": 8715.66, "total_tokens": 22241088} +{"current_steps": 33020, "total_steps": 204665, "loss": 0.1683, "lr": 1.9771720721865355e-06, "epoch": 0.8066840935186769, "percentage": 16.13, "elapsed_time": "0:42:32", "remaining_time": "3:41:06", "throughput": 8715.76, "total_tokens": 22244352} +{"current_steps": 33025, "total_steps": 204665, "loss": 0.1275, "lr": 1.977153951459836e-06, "epoch": 0.8068062443505241, "percentage": 16.14, "elapsed_time": "0:42:32", "remaining_time": "3:41:06", "throughput": 8715.83, "total_tokens": 22247552} +{"current_steps": 33030, "total_steps": 204665, "loss": 0.083, "lr": 1.977135823627009e-06, "epoch": 0.8069283951823711, "percentage": 16.14, "elapsed_time": "0:42:32", "remaining_time": "3:41:05", "throughput": 8716.13, "total_tokens": 22251456} +{"current_steps": 33035, "total_steps": 204665, "loss": 0.1143, "lr": 1.9771176886881883e-06, "epoch": 0.8070505460142183, "percentage": 16.14, "elapsed_time": "0:42:33", "remaining_time": "3:41:05", "throughput": 8716.22, "total_tokens": 22254656} +{"current_steps": 33040, "total_steps": 204665, "loss": 0.0217, "lr": 1.9770995466435044e-06, "epoch": 0.8071726968460655, "percentage": 16.14, "elapsed_time": "0:42:33", "remaining_time": "3:41:04", "throughput": 8716.26, "total_tokens": 22257728} +{"current_steps": 33045, "total_steps": 204665, "loss": 0.1372, "lr": 1.97708139749309e-06, "epoch": 0.8072948476779127, "percentage": 16.15, "elapsed_time": "0:42:33", "remaining_time": "3:41:03", "throughput": 8716.36, "total_tokens": 22260992} +{"current_steps": 33050, "total_steps": 204665, "loss": 0.1286, "lr": 1.977063241237077e-06, "epoch": 0.8074169985097599, "percentage": 16.15, "elapsed_time": "0:42:34", "remaining_time": "3:41:03", "throughput": 8716.42, "total_tokens": 22264128} +{"current_steps": 33055, "total_steps": 204665, "loss": 0.103, "lr": 1.9770450778755972e-06, "epoch": 0.807539149341607, "percentage": 16.15, "elapsed_time": "0:42:34", "remaining_time": "3:41:02", "throughput": 8716.52, "total_tokens": 22267392} +{"current_steps": 33060, "total_steps": 204665, "loss": 0.1014, "lr": 1.9770269074087825e-06, "epoch": 0.8076613001734542, "percentage": 16.15, "elapsed_time": "0:42:34", "remaining_time": "3:41:02", "throughput": 8716.64, "total_tokens": 22270720} +{"current_steps": 33065, "total_steps": 204665, "loss": 0.0469, "lr": 1.9770087298367657e-06, "epoch": 0.8077834510053014, "percentage": 16.16, "elapsed_time": "0:42:35", "remaining_time": "3:41:01", "throughput": 8716.69, "total_tokens": 22273856} +{"current_steps": 33070, "total_steps": 204665, "loss": 0.088, "lr": 1.976990545159679e-06, "epoch": 0.8079056018371485, "percentage": 16.16, "elapsed_time": "0:42:35", "remaining_time": "3:41:00", "throughput": 8716.83, "total_tokens": 22277248} +{"current_steps": 33075, "total_steps": 204665, "loss": 0.1035, "lr": 1.9769723533776537e-06, "epoch": 0.8080277526689956, "percentage": 16.16, "elapsed_time": "0:42:36", "remaining_time": "3:41:00", "throughput": 8716.93, "total_tokens": 22280512} +{"current_steps": 33080, "total_steps": 204665, "loss": 0.1061, "lr": 1.9769541544908228e-06, "epoch": 0.8081499035008428, "percentage": 16.16, "elapsed_time": "0:42:36", "remaining_time": "3:40:59", "throughput": 8717.03, "total_tokens": 22283776} +{"current_steps": 33085, "total_steps": 204665, "loss": 0.1643, "lr": 1.9769359484993183e-06, "epoch": 0.80827205433269, "percentage": 16.17, "elapsed_time": "0:42:36", "remaining_time": "3:40:59", "throughput": 8717.47, "total_tokens": 22288128} +{"current_steps": 33090, "total_steps": 204665, "loss": 0.0042, "lr": 1.976917735403273e-06, "epoch": 0.8083942051645372, "percentage": 16.17, "elapsed_time": "0:42:37", "remaining_time": "3:40:58", "throughput": 8717.58, "total_tokens": 22291456} +{"current_steps": 33095, "total_steps": 204665, "loss": 0.2322, "lr": 1.976899515202819e-06, "epoch": 0.8085163559963844, "percentage": 16.17, "elapsed_time": "0:42:37", "remaining_time": "3:40:58", "throughput": 8717.63, "total_tokens": 22294528} +{"current_steps": 33100, "total_steps": 204665, "loss": 0.0393, "lr": 1.9768812878980896e-06, "epoch": 0.8086385068282315, "percentage": 16.17, "elapsed_time": "0:42:37", "remaining_time": "3:40:57", "throughput": 8717.63, "total_tokens": 22297472} +{"current_steps": 33105, "total_steps": 204665, "loss": 0.1869, "lr": 1.9768630534892164e-06, "epoch": 0.8087606576600787, "percentage": 16.18, "elapsed_time": "0:42:38", "remaining_time": "3:40:56", "throughput": 8717.7, "total_tokens": 22300672} +{"current_steps": 33110, "total_steps": 204665, "loss": 0.0332, "lr": 1.976844811976332e-06, "epoch": 0.8088828084919258, "percentage": 16.18, "elapsed_time": "0:42:38", "remaining_time": "3:40:56", "throughput": 8717.93, "total_tokens": 22304320} +{"current_steps": 33115, "total_steps": 204665, "loss": 0.1254, "lr": 1.97682656335957e-06, "epoch": 0.809004959323773, "percentage": 16.18, "elapsed_time": "0:42:38", "remaining_time": "3:40:55", "throughput": 8717.92, "total_tokens": 22307200} +{"current_steps": 33120, "total_steps": 204665, "loss": 0.1566, "lr": 1.976808307639062e-06, "epoch": 0.8091271101556201, "percentage": 16.18, "elapsed_time": "0:42:39", "remaining_time": "3:40:54", "throughput": 8718.01, "total_tokens": 22310464} +{"current_steps": 33125, "total_steps": 204665, "loss": 0.1316, "lr": 1.976790044814941e-06, "epoch": 0.8092492609874673, "percentage": 16.18, "elapsed_time": "0:42:39", "remaining_time": "3:40:54", "throughput": 8718.13, "total_tokens": 22313792} +{"current_steps": 33130, "total_steps": 204665, "loss": 0.0509, "lr": 1.976771774887341e-06, "epoch": 0.8093714118193145, "percentage": 16.19, "elapsed_time": "0:42:39", "remaining_time": "3:40:53", "throughput": 8718.33, "total_tokens": 22317376} +{"current_steps": 33135, "total_steps": 204665, "loss": 0.1537, "lr": 1.976753497856393e-06, "epoch": 0.8094935626511617, "percentage": 16.19, "elapsed_time": "0:42:40", "remaining_time": "3:40:53", "throughput": 8718.55, "total_tokens": 22321024} +{"current_steps": 33140, "total_steps": 204665, "loss": 0.0427, "lr": 1.9767352137222313e-06, "epoch": 0.8096157134830089, "percentage": 16.19, "elapsed_time": "0:42:40", "remaining_time": "3:40:52", "throughput": 8718.64, "total_tokens": 22324288} +{"current_steps": 33145, "total_steps": 204665, "loss": 0.1251, "lr": 1.9767169224849884e-06, "epoch": 0.809737864314856, "percentage": 16.19, "elapsed_time": "0:42:40", "remaining_time": "3:40:52", "throughput": 8718.68, "total_tokens": 22327360} +{"current_steps": 33150, "total_steps": 204665, "loss": 0.1213, "lr": 1.9766986241447975e-06, "epoch": 0.8098600151467031, "percentage": 16.2, "elapsed_time": "0:42:41", "remaining_time": "3:40:51", "throughput": 8718.81, "total_tokens": 22330688} +{"current_steps": 33155, "total_steps": 204665, "loss": 0.1751, "lr": 1.9766803187017914e-06, "epoch": 0.8099821659785503, "percentage": 16.2, "elapsed_time": "0:42:41", "remaining_time": "3:40:50", "throughput": 8719.01, "total_tokens": 22334272} +{"current_steps": 33160, "total_steps": 204665, "loss": 0.0847, "lr": 1.976662006156103e-06, "epoch": 0.8101043168103975, "percentage": 16.2, "elapsed_time": "0:42:41", "remaining_time": "3:40:50", "throughput": 8719.11, "total_tokens": 22337600} +{"current_steps": 33165, "total_steps": 204665, "loss": 0.1183, "lr": 1.9766436865078663e-06, "epoch": 0.8102264676422446, "percentage": 16.2, "elapsed_time": "0:42:42", "remaining_time": "3:40:49", "throughput": 8719.15, "total_tokens": 22340672} +{"current_steps": 33170, "total_steps": 204665, "loss": 0.2254, "lr": 1.9766253597572136e-06, "epoch": 0.8103486184740918, "percentage": 16.21, "elapsed_time": "0:42:42", "remaining_time": "3:40:49", "throughput": 8719.24, "total_tokens": 22343936} +{"current_steps": 33175, "total_steps": 204665, "loss": 0.1445, "lr": 1.9766070259042786e-06, "epoch": 0.810470769305939, "percentage": 16.21, "elapsed_time": "0:42:42", "remaining_time": "3:40:48", "throughput": 8719.29, "total_tokens": 22347008} +{"current_steps": 33180, "total_steps": 204665, "loss": 0.1202, "lr": 1.976588684949195e-06, "epoch": 0.8105929201377862, "percentage": 16.21, "elapsed_time": "0:42:43", "remaining_time": "3:40:47", "throughput": 8719.3, "total_tokens": 22350016} +{"current_steps": 33185, "total_steps": 204665, "loss": 0.1223, "lr": 1.9765703368920958e-06, "epoch": 0.8107150709696334, "percentage": 16.21, "elapsed_time": "0:42:43", "remaining_time": "3:40:47", "throughput": 8719.44, "total_tokens": 22353408} +{"current_steps": 33190, "total_steps": 204665, "loss": 0.0386, "lr": 1.976551981733114e-06, "epoch": 0.8108372218014804, "percentage": 16.22, "elapsed_time": "0:42:43", "remaining_time": "3:40:46", "throughput": 8719.56, "total_tokens": 22356736} +{"current_steps": 33195, "total_steps": 204665, "loss": 0.1587, "lr": 1.9765336194723836e-06, "epoch": 0.8109593726333276, "percentage": 16.22, "elapsed_time": "0:42:44", "remaining_time": "3:40:46", "throughput": 8719.78, "total_tokens": 22360384} +{"current_steps": 33200, "total_steps": 204665, "loss": 0.1238, "lr": 1.9765152501100386e-06, "epoch": 0.8110815234651748, "percentage": 16.22, "elapsed_time": "0:42:44", "remaining_time": "3:40:45", "throughput": 8719.96, "total_tokens": 22363904} +{"current_steps": 33205, "total_steps": 204665, "loss": 0.0939, "lr": 1.9764968736462116e-06, "epoch": 0.811203674297022, "percentage": 16.22, "elapsed_time": "0:42:45", "remaining_time": "3:40:45", "throughput": 8720.15, "total_tokens": 22367488} +{"current_steps": 33210, "total_steps": 204665, "loss": 0.0102, "lr": 1.9764784900810367e-06, "epoch": 0.8113258251288691, "percentage": 16.23, "elapsed_time": "0:42:45", "remaining_time": "3:40:44", "throughput": 8720.19, "total_tokens": 22370560} +{"current_steps": 33215, "total_steps": 204665, "loss": 0.0797, "lr": 1.9764600994146474e-06, "epoch": 0.8114479759607163, "percentage": 16.23, "elapsed_time": "0:42:45", "remaining_time": "3:40:43", "throughput": 8720.44, "total_tokens": 22374336} +{"current_steps": 33220, "total_steps": 204665, "loss": 0.1435, "lr": 1.976441701647178e-06, "epoch": 0.8115701267925635, "percentage": 16.23, "elapsed_time": "0:42:46", "remaining_time": "3:40:43", "throughput": 8720.58, "total_tokens": 22377792} +{"current_steps": 33225, "total_steps": 204665, "loss": 0.082, "lr": 1.976423296778762e-06, "epoch": 0.8116922776244107, "percentage": 16.23, "elapsed_time": "0:42:46", "remaining_time": "3:40:42", "throughput": 8720.59, "total_tokens": 22380800} +{"current_steps": 33230, "total_steps": 204665, "loss": 0.1396, "lr": 1.976404884809533e-06, "epoch": 0.8118144284562578, "percentage": 16.24, "elapsed_time": "0:42:46", "remaining_time": "3:40:42", "throughput": 8720.71, "total_tokens": 22384128} +{"current_steps": 33235, "total_steps": 204665, "loss": 0.0456, "lr": 1.9763864657396247e-06, "epoch": 0.8119365792881049, "percentage": 16.24, "elapsed_time": "0:42:47", "remaining_time": "3:40:41", "throughput": 8721.0, "total_tokens": 22388032} +{"current_steps": 33240, "total_steps": 204665, "loss": 0.1648, "lr": 1.976368039569172e-06, "epoch": 0.8120587301199521, "percentage": 16.24, "elapsed_time": "0:42:47", "remaining_time": "3:40:41", "throughput": 8721.14, "total_tokens": 22391424} +{"current_steps": 33245, "total_steps": 204665, "loss": 0.1858, "lr": 1.976349606298308e-06, "epoch": 0.8121808809517993, "percentage": 16.24, "elapsed_time": "0:42:47", "remaining_time": "3:40:40", "throughput": 8721.37, "total_tokens": 22395136} +{"current_steps": 33250, "total_steps": 204665, "loss": 0.0211, "lr": 1.9763311659271672e-06, "epoch": 0.8123030317836465, "percentage": 16.25, "elapsed_time": "0:42:48", "remaining_time": "3:40:39", "throughput": 8721.53, "total_tokens": 22398592} +{"current_steps": 33255, "total_steps": 204665, "loss": 0.1305, "lr": 1.976312718455883e-06, "epoch": 0.8124251826154936, "percentage": 16.25, "elapsed_time": "0:42:48", "remaining_time": "3:40:39", "throughput": 8721.79, "total_tokens": 22402368} +{"current_steps": 33260, "total_steps": 204665, "loss": 0.0898, "lr": 1.976294263884591e-06, "epoch": 0.8125473334473408, "percentage": 16.25, "elapsed_time": "0:42:48", "remaining_time": "3:40:38", "throughput": 8722.03, "total_tokens": 22406144} +{"current_steps": 33265, "total_steps": 204665, "loss": 0.0859, "lr": 1.9762758022134236e-06, "epoch": 0.812669484279188, "percentage": 16.25, "elapsed_time": "0:42:49", "remaining_time": "3:40:38", "throughput": 8722.12, "total_tokens": 22409408} +{"current_steps": 33270, "total_steps": 204665, "loss": 0.1138, "lr": 1.9762573334425164e-06, "epoch": 0.8127916351110351, "percentage": 16.26, "elapsed_time": "0:42:49", "remaining_time": "3:40:37", "throughput": 8722.23, "total_tokens": 22412736} +{"current_steps": 33275, "total_steps": 204665, "loss": 0.1897, "lr": 1.9762388575720035e-06, "epoch": 0.8129137859428822, "percentage": 16.26, "elapsed_time": "0:42:49", "remaining_time": "3:40:37", "throughput": 8722.3, "total_tokens": 22415936} +{"current_steps": 33280, "total_steps": 204665, "loss": 0.2099, "lr": 1.976220374602019e-06, "epoch": 0.8130359367747294, "percentage": 16.26, "elapsed_time": "0:42:50", "remaining_time": "3:40:36", "throughput": 8722.49, "total_tokens": 22419456} +{"current_steps": 33285, "total_steps": 204665, "loss": 0.2129, "lr": 1.976201884532697e-06, "epoch": 0.8131580876065766, "percentage": 16.26, "elapsed_time": "0:42:50", "remaining_time": "3:40:35", "throughput": 8722.58, "total_tokens": 22422720} +{"current_steps": 33290, "total_steps": 204665, "loss": 0.0426, "lr": 1.9761833873641727e-06, "epoch": 0.8132802384384238, "percentage": 16.27, "elapsed_time": "0:42:51", "remaining_time": "3:40:35", "throughput": 8723.01, "total_tokens": 22427072} +{"current_steps": 33295, "total_steps": 204665, "loss": 0.0406, "lr": 1.9761648830965804e-06, "epoch": 0.813402389270271, "percentage": 16.27, "elapsed_time": "0:42:51", "remaining_time": "3:40:34", "throughput": 8723.21, "total_tokens": 22430656} +{"current_steps": 33300, "total_steps": 204665, "loss": 0.1687, "lr": 1.976146371730054e-06, "epoch": 0.813524540102118, "percentage": 16.27, "elapsed_time": "0:42:51", "remaining_time": "3:40:34", "throughput": 8723.3, "total_tokens": 22433920} +{"current_steps": 33305, "total_steps": 204665, "loss": 0.0645, "lr": 1.976127853264729e-06, "epoch": 0.8136466909339652, "percentage": 16.27, "elapsed_time": "0:42:52", "remaining_time": "3:40:33", "throughput": 8723.53, "total_tokens": 22437632} +{"current_steps": 33310, "total_steps": 204665, "loss": 0.1122, "lr": 1.9761093277007394e-06, "epoch": 0.8137688417658124, "percentage": 16.28, "elapsed_time": "0:42:52", "remaining_time": "3:40:33", "throughput": 8723.56, "total_tokens": 22440704} +{"current_steps": 33315, "total_steps": 204665, "loss": 0.1018, "lr": 1.9760907950382204e-06, "epoch": 0.8138909925976596, "percentage": 16.28, "elapsed_time": "0:42:52", "remaining_time": "3:40:32", "throughput": 8723.68, "total_tokens": 22444032} +{"current_steps": 33320, "total_steps": 204665, "loss": 0.0939, "lr": 1.9760722552773066e-06, "epoch": 0.8140131434295067, "percentage": 16.28, "elapsed_time": "0:42:53", "remaining_time": "3:40:32", "throughput": 8723.83, "total_tokens": 22447488} +{"current_steps": 33325, "total_steps": 204665, "loss": 0.1864, "lr": 1.976053708418133e-06, "epoch": 0.8141352942613539, "percentage": 16.28, "elapsed_time": "0:42:53", "remaining_time": "3:40:31", "throughput": 8723.96, "total_tokens": 22450880} +{"current_steps": 33330, "total_steps": 204665, "loss": 0.0875, "lr": 1.9760351544608343e-06, "epoch": 0.8142574450932011, "percentage": 16.29, "elapsed_time": "0:42:53", "remaining_time": "3:40:30", "throughput": 8723.97, "total_tokens": 22453888} +{"current_steps": 33335, "total_steps": 204665, "loss": 0.1386, "lr": 1.976016593405545e-06, "epoch": 0.8143795959250483, "percentage": 16.29, "elapsed_time": "0:42:54", "remaining_time": "3:40:30", "throughput": 8724.08, "total_tokens": 22457152} +{"current_steps": 33340, "total_steps": 204665, "loss": 0.1697, "lr": 1.9759980252524007e-06, "epoch": 0.8145017467568955, "percentage": 16.29, "elapsed_time": "0:42:54", "remaining_time": "3:40:29", "throughput": 8724.13, "total_tokens": 22460288} +{"current_steps": 33345, "total_steps": 204665, "loss": 0.0614, "lr": 1.9759794500015365e-06, "epoch": 0.8146238975887425, "percentage": 16.29, "elapsed_time": "0:42:54", "remaining_time": "3:40:29", "throughput": 8724.19, "total_tokens": 22463424} +{"current_steps": 33350, "total_steps": 204665, "loss": 0.1574, "lr": 1.9759608676530872e-06, "epoch": 0.8147460484205897, "percentage": 16.29, "elapsed_time": "0:42:55", "remaining_time": "3:40:28", "throughput": 8724.2, "total_tokens": 22466432} +{"current_steps": 33355, "total_steps": 204665, "loss": 0.0647, "lr": 1.975942278207188e-06, "epoch": 0.8148681992524369, "percentage": 16.3, "elapsed_time": "0:42:55", "remaining_time": "3:40:27", "throughput": 8724.22, "total_tokens": 22469440} +{"current_steps": 33360, "total_steps": 204665, "loss": 0.0556, "lr": 1.9759236816639733e-06, "epoch": 0.8149903500842841, "percentage": 16.3, "elapsed_time": "0:42:55", "remaining_time": "3:40:27", "throughput": 8724.18, "total_tokens": 22472256} +{"current_steps": 33365, "total_steps": 204665, "loss": 0.0245, "lr": 1.97590507802358e-06, "epoch": 0.8151125009161312, "percentage": 16.3, "elapsed_time": "0:42:56", "remaining_time": "3:40:26", "throughput": 8724.39, "total_tokens": 22475904} +{"current_steps": 33370, "total_steps": 204665, "loss": 0.0503, "lr": 1.9758864672861423e-06, "epoch": 0.8152346517479784, "percentage": 16.3, "elapsed_time": "0:42:56", "remaining_time": "3:40:25", "throughput": 8724.45, "total_tokens": 22479040} +{"current_steps": 33375, "total_steps": 204665, "loss": 0.2484, "lr": 1.9758678494517957e-06, "epoch": 0.8153568025798256, "percentage": 16.31, "elapsed_time": "0:42:56", "remaining_time": "3:40:25", "throughput": 8724.7, "total_tokens": 22482816} +{"current_steps": 33380, "total_steps": 204665, "loss": 0.1218, "lr": 1.9758492245206754e-06, "epoch": 0.8154789534116728, "percentage": 16.31, "elapsed_time": "0:42:57", "remaining_time": "3:40:24", "throughput": 8724.71, "total_tokens": 22485824} +{"current_steps": 33385, "total_steps": 204665, "loss": 0.1422, "lr": 1.975830592492917e-06, "epoch": 0.81560110424352, "percentage": 16.31, "elapsed_time": "0:42:57", "remaining_time": "3:40:24", "throughput": 8724.79, "total_tokens": 22489024} +{"current_steps": 33390, "total_steps": 204665, "loss": 0.0978, "lr": 1.9758119533686565e-06, "epoch": 0.815723255075367, "percentage": 16.31, "elapsed_time": "0:42:57", "remaining_time": "3:40:23", "throughput": 8724.95, "total_tokens": 22492480} +{"current_steps": 33395, "total_steps": 204665, "loss": 0.0414, "lr": 1.9757933071480285e-06, "epoch": 0.8158454059072142, "percentage": 16.32, "elapsed_time": "0:42:58", "remaining_time": "3:40:23", "throughput": 8725.0, "total_tokens": 22495616} +{"current_steps": 33400, "total_steps": 204665, "loss": 0.0998, "lr": 1.9757746538311694e-06, "epoch": 0.8159675567390614, "percentage": 16.32, "elapsed_time": "0:42:58", "remaining_time": "3:40:22", "throughput": 8725.16, "total_tokens": 22499072} +{"current_steps": 33405, "total_steps": 204665, "loss": 0.0641, "lr": 1.9757559934182146e-06, "epoch": 0.8160897075709086, "percentage": 16.32, "elapsed_time": "0:42:58", "remaining_time": "3:40:21", "throughput": 8725.36, "total_tokens": 22502656} +{"current_steps": 33410, "total_steps": 204665, "loss": 0.1152, "lr": 1.9757373259092998e-06, "epoch": 0.8162118584027557, "percentage": 16.32, "elapsed_time": "0:42:59", "remaining_time": "3:40:21", "throughput": 8725.5, "total_tokens": 22506112} +{"current_steps": 33415, "total_steps": 204665, "loss": 0.1796, "lr": 1.9757186513045604e-06, "epoch": 0.8163340092346029, "percentage": 16.33, "elapsed_time": "0:42:59", "remaining_time": "3:40:20", "throughput": 8725.6, "total_tokens": 22509376} +{"current_steps": 33420, "total_steps": 204665, "loss": 0.1909, "lr": 1.975699969604133e-06, "epoch": 0.81645616006645, "percentage": 16.33, "elapsed_time": "0:43:00", "remaining_time": "3:40:20", "throughput": 8725.8, "total_tokens": 22513024} +{"current_steps": 33425, "total_steps": 204665, "loss": 0.1162, "lr": 1.9756812808081527e-06, "epoch": 0.8165783108982972, "percentage": 16.33, "elapsed_time": "0:43:00", "remaining_time": "3:40:19", "throughput": 8725.98, "total_tokens": 22516544} +{"current_steps": 33430, "total_steps": 204665, "loss": 0.0691, "lr": 1.975662584916756e-06, "epoch": 0.8167004617301444, "percentage": 16.33, "elapsed_time": "0:43:00", "remaining_time": "3:40:19", "throughput": 8726.07, "total_tokens": 22519808} +{"current_steps": 33435, "total_steps": 204665, "loss": 0.1068, "lr": 1.975643881930078e-06, "epoch": 0.8168226125619915, "percentage": 16.34, "elapsed_time": "0:43:01", "remaining_time": "3:40:18", "throughput": 8726.23, "total_tokens": 22523264} +{"current_steps": 33440, "total_steps": 204665, "loss": 0.0976, "lr": 1.9756251718482558e-06, "epoch": 0.8169447633938387, "percentage": 16.34, "elapsed_time": "0:43:01", "remaining_time": "3:40:17", "throughput": 8726.34, "total_tokens": 22526592} +{"current_steps": 33445, "total_steps": 204665, "loss": 0.0408, "lr": 1.975606454671425e-06, "epoch": 0.8170669142256859, "percentage": 16.34, "elapsed_time": "0:43:01", "remaining_time": "3:40:17", "throughput": 8726.57, "total_tokens": 22530304} +{"current_steps": 33450, "total_steps": 204665, "loss": 0.1699, "lr": 1.975587730399721e-06, "epoch": 0.8171890650575331, "percentage": 16.34, "elapsed_time": "0:43:02", "remaining_time": "3:40:16", "throughput": 8726.67, "total_tokens": 22533568} +{"current_steps": 33455, "total_steps": 204665, "loss": 0.1904, "lr": 1.9755689990332813e-06, "epoch": 0.8173112158893802, "percentage": 16.35, "elapsed_time": "0:43:02", "remaining_time": "3:40:16", "throughput": 8726.72, "total_tokens": 22536704} +{"current_steps": 33460, "total_steps": 204665, "loss": 0.0707, "lr": 1.975550260572241e-06, "epoch": 0.8174333667212274, "percentage": 16.35, "elapsed_time": "0:43:02", "remaining_time": "3:40:15", "throughput": 8726.95, "total_tokens": 22540416} +{"current_steps": 33465, "total_steps": 204665, "loss": 0.0283, "lr": 1.975531515016737e-06, "epoch": 0.8175555175530745, "percentage": 16.35, "elapsed_time": "0:43:03", "remaining_time": "3:40:15", "throughput": 8727.05, "total_tokens": 22543680} +{"current_steps": 33470, "total_steps": 204665, "loss": 0.1277, "lr": 1.9755127623669053e-06, "epoch": 0.8176776683849217, "percentage": 16.35, "elapsed_time": "0:43:03", "remaining_time": "3:40:14", "throughput": 8727.25, "total_tokens": 22547264} +{"current_steps": 33475, "total_steps": 204665, "loss": 0.1602, "lr": 1.9754940026228826e-06, "epoch": 0.8177998192167689, "percentage": 16.36, "elapsed_time": "0:43:03", "remaining_time": "3:40:13", "throughput": 8727.42, "total_tokens": 22550784} +{"current_steps": 33480, "total_steps": 204665, "loss": 0.2102, "lr": 1.975475235784805e-06, "epoch": 0.817921970048616, "percentage": 16.36, "elapsed_time": "0:43:04", "remaining_time": "3:40:13", "throughput": 8727.51, "total_tokens": 22554048} +{"current_steps": 33485, "total_steps": 204665, "loss": 0.0947, "lr": 1.975456461852809e-06, "epoch": 0.8180441208804632, "percentage": 16.36, "elapsed_time": "0:43:04", "remaining_time": "3:40:12", "throughput": 8727.47, "total_tokens": 22556864} +{"current_steps": 33490, "total_steps": 204665, "loss": 0.0851, "lr": 1.9754376808270316e-06, "epoch": 0.8181662717123104, "percentage": 16.36, "elapsed_time": "0:43:04", "remaining_time": "3:40:12", "throughput": 8727.58, "total_tokens": 22560192} +{"current_steps": 33495, "total_steps": 204665, "loss": 0.1027, "lr": 1.975418892707609e-06, "epoch": 0.8182884225441576, "percentage": 16.37, "elapsed_time": "0:43:05", "remaining_time": "3:40:11", "throughput": 8727.71, "total_tokens": 22563584} +{"current_steps": 33500, "total_steps": 204665, "loss": 0.0578, "lr": 1.975400097494678e-06, "epoch": 0.8184105733760046, "percentage": 16.37, "elapsed_time": "0:43:05", "remaining_time": "3:40:10", "throughput": 8727.77, "total_tokens": 22566720} +{"current_steps": 33505, "total_steps": 204665, "loss": 0.1196, "lr": 1.9753812951883744e-06, "epoch": 0.8185327242078518, "percentage": 16.37, "elapsed_time": "0:43:05", "remaining_time": "3:40:10", "throughput": 8727.88, "total_tokens": 22570048} +{"current_steps": 33510, "total_steps": 204665, "loss": 0.0979, "lr": 1.9753624857888362e-06, "epoch": 0.818654875039699, "percentage": 16.37, "elapsed_time": "0:43:06", "remaining_time": "3:40:09", "throughput": 8727.98, "total_tokens": 22573376} +{"current_steps": 33515, "total_steps": 204665, "loss": 0.0704, "lr": 1.9753436692961992e-06, "epoch": 0.8187770258715462, "percentage": 16.38, "elapsed_time": "0:43:06", "remaining_time": "3:40:09", "throughput": 8728.1, "total_tokens": 22576704} +{"current_steps": 33520, "total_steps": 204665, "loss": 0.1453, "lr": 1.975324845710601e-06, "epoch": 0.8188991767033934, "percentage": 16.38, "elapsed_time": "0:43:07", "remaining_time": "3:40:08", "throughput": 8728.15, "total_tokens": 22579840} +{"current_steps": 33525, "total_steps": 204665, "loss": 0.051, "lr": 1.9753060150321786e-06, "epoch": 0.8190213275352405, "percentage": 16.38, "elapsed_time": "0:43:07", "remaining_time": "3:40:08", "throughput": 8728.24, "total_tokens": 22583104} +{"current_steps": 33530, "total_steps": 204665, "loss": 0.1462, "lr": 1.975287177261068e-06, "epoch": 0.8191434783670877, "percentage": 16.38, "elapsed_time": "0:43:07", "remaining_time": "3:40:07", "throughput": 8728.53, "total_tokens": 22587008} +{"current_steps": 33535, "total_steps": 204665, "loss": 0.1047, "lr": 1.975268332397407e-06, "epoch": 0.8192656291989349, "percentage": 16.39, "elapsed_time": "0:43:08", "remaining_time": "3:40:07", "throughput": 8728.7, "total_tokens": 22590528} +{"current_steps": 33540, "total_steps": 204665, "loss": 0.0521, "lr": 1.975249480441332e-06, "epoch": 0.819387780030782, "percentage": 16.39, "elapsed_time": "0:43:08", "remaining_time": "3:40:06", "throughput": 8728.73, "total_tokens": 22593600} +{"current_steps": 33545, "total_steps": 204665, "loss": 0.1804, "lr": 1.975230621392981e-06, "epoch": 0.8195099308626291, "percentage": 16.39, "elapsed_time": "0:43:08", "remaining_time": "3:40:05", "throughput": 8728.76, "total_tokens": 22596672} +{"current_steps": 33550, "total_steps": 204665, "loss": 0.0828, "lr": 1.9752117552524905e-06, "epoch": 0.8196320816944763, "percentage": 16.39, "elapsed_time": "0:43:09", "remaining_time": "3:40:05", "throughput": 8728.88, "total_tokens": 22600000} +{"current_steps": 33555, "total_steps": 204665, "loss": 0.1668, "lr": 1.9751928820199976e-06, "epoch": 0.8197542325263235, "percentage": 16.4, "elapsed_time": "0:43:09", "remaining_time": "3:40:04", "throughput": 8729.01, "total_tokens": 22603392} +{"current_steps": 33560, "total_steps": 204665, "loss": 0.1005, "lr": 1.97517400169564e-06, "epoch": 0.8198763833581707, "percentage": 16.4, "elapsed_time": "0:43:09", "remaining_time": "3:40:04", "throughput": 8729.15, "total_tokens": 22606784} +{"current_steps": 33565, "total_steps": 204665, "loss": 0.1006, "lr": 1.9751551142795545e-06, "epoch": 0.8199985341900178, "percentage": 16.4, "elapsed_time": "0:43:10", "remaining_time": "3:40:03", "throughput": 8729.26, "total_tokens": 22610112} +{"current_steps": 33570, "total_steps": 204665, "loss": 0.1059, "lr": 1.975136219771879e-06, "epoch": 0.820120685021865, "percentage": 16.4, "elapsed_time": "0:43:10", "remaining_time": "3:40:02", "throughput": 8729.33, "total_tokens": 22613312} +{"current_steps": 33575, "total_steps": 204665, "loss": 0.1553, "lr": 1.97511731817275e-06, "epoch": 0.8202428358537122, "percentage": 16.4, "elapsed_time": "0:43:10", "remaining_time": "3:40:02", "throughput": 8729.39, "total_tokens": 22616448} +{"current_steps": 33580, "total_steps": 204665, "loss": 0.1131, "lr": 1.9750984094823065e-06, "epoch": 0.8203649866855593, "percentage": 16.41, "elapsed_time": "0:43:11", "remaining_time": "3:40:01", "throughput": 8729.58, "total_tokens": 22620032} +{"current_steps": 33585, "total_steps": 204665, "loss": 0.0494, "lr": 1.9750794937006847e-06, "epoch": 0.8204871375174065, "percentage": 16.41, "elapsed_time": "0:43:11", "remaining_time": "3:40:01", "throughput": 8729.68, "total_tokens": 22623296} +{"current_steps": 33590, "total_steps": 204665, "loss": 0.1359, "lr": 1.9750605708280224e-06, "epoch": 0.8206092883492536, "percentage": 16.41, "elapsed_time": "0:43:11", "remaining_time": "3:40:00", "throughput": 8729.77, "total_tokens": 22626560} +{"current_steps": 33595, "total_steps": 204665, "loss": 0.0691, "lr": 1.9750416408644573e-06, "epoch": 0.8207314391811008, "percentage": 16.41, "elapsed_time": "0:43:12", "remaining_time": "3:39:59", "throughput": 8729.83, "total_tokens": 22629696} +{"current_steps": 33600, "total_steps": 204665, "loss": 0.0392, "lr": 1.9750227038101273e-06, "epoch": 0.820853590012948, "percentage": 16.42, "elapsed_time": "0:43:12", "remaining_time": "3:39:59", "throughput": 8729.88, "total_tokens": 22632832} +{"current_steps": 33605, "total_steps": 204665, "loss": 0.0865, "lr": 1.9750037596651702e-06, "epoch": 0.8209757408447952, "percentage": 16.42, "elapsed_time": "0:43:12", "remaining_time": "3:39:58", "throughput": 8730.01, "total_tokens": 22636224} +{"current_steps": 33610, "total_steps": 204665, "loss": 0.2274, "lr": 1.974984808429723e-06, "epoch": 0.8210978916766423, "percentage": 16.42, "elapsed_time": "0:43:13", "remaining_time": "3:39:58", "throughput": 8730.18, "total_tokens": 22639744} +{"current_steps": 33615, "total_steps": 204665, "loss": 0.1166, "lr": 1.9749658501039247e-06, "epoch": 0.8212200425084895, "percentage": 16.42, "elapsed_time": "0:43:13", "remaining_time": "3:39:57", "throughput": 8730.41, "total_tokens": 22643456} +{"current_steps": 33620, "total_steps": 204665, "loss": 0.1583, "lr": 1.974946884687912e-06, "epoch": 0.8213421933403366, "percentage": 16.43, "elapsed_time": "0:43:13", "remaining_time": "3:39:57", "throughput": 8730.62, "total_tokens": 22647104} +{"current_steps": 33625, "total_steps": 204665, "loss": 0.1557, "lr": 1.9749279121818236e-06, "epoch": 0.8214643441721838, "percentage": 16.43, "elapsed_time": "0:43:14", "remaining_time": "3:39:56", "throughput": 8730.68, "total_tokens": 22650240} +{"current_steps": 33630, "total_steps": 204665, "loss": 0.1548, "lr": 1.974908932585797e-06, "epoch": 0.821586495004031, "percentage": 16.43, "elapsed_time": "0:43:14", "remaining_time": "3:39:55", "throughput": 8730.75, "total_tokens": 22653440} +{"current_steps": 33635, "total_steps": 204665, "loss": 0.0919, "lr": 1.9748899458999706e-06, "epoch": 0.8217086458358781, "percentage": 16.43, "elapsed_time": "0:43:15", "remaining_time": "3:39:55", "throughput": 8730.93, "total_tokens": 22656960} +{"current_steps": 33640, "total_steps": 204665, "loss": 0.0835, "lr": 1.974870952124482e-06, "epoch": 0.8218307966677253, "percentage": 16.44, "elapsed_time": "0:43:15", "remaining_time": "3:39:54", "throughput": 8731.01, "total_tokens": 22660160} +{"current_steps": 33645, "total_steps": 204665, "loss": 0.1284, "lr": 1.9748519512594697e-06, "epoch": 0.8219529474995725, "percentage": 16.44, "elapsed_time": "0:43:15", "remaining_time": "3:39:54", "throughput": 8731.08, "total_tokens": 22663360} +{"current_steps": 33650, "total_steps": 204665, "loss": 0.0585, "lr": 1.974832943305072e-06, "epoch": 0.8220750983314197, "percentage": 16.44, "elapsed_time": "0:43:16", "remaining_time": "3:39:53", "throughput": 8731.18, "total_tokens": 22666624} +{"current_steps": 33655, "total_steps": 204665, "loss": 0.0665, "lr": 1.974813928261427e-06, "epoch": 0.8221972491632668, "percentage": 16.44, "elapsed_time": "0:43:16", "remaining_time": "3:39:53", "throughput": 8731.34, "total_tokens": 22670080} +{"current_steps": 33660, "total_steps": 204665, "loss": 0.0723, "lr": 1.9747949061286724e-06, "epoch": 0.8223193999951139, "percentage": 16.45, "elapsed_time": "0:43:16", "remaining_time": "3:39:52", "throughput": 8731.37, "total_tokens": 22673088} +{"current_steps": 33665, "total_steps": 204665, "loss": 0.1446, "lr": 1.9747758769069477e-06, "epoch": 0.8224415508269611, "percentage": 16.45, "elapsed_time": "0:43:17", "remaining_time": "3:39:51", "throughput": 8731.45, "total_tokens": 22676288} +{"current_steps": 33670, "total_steps": 204665, "loss": 0.3086, "lr": 1.9747568405963902e-06, "epoch": 0.8225637016588083, "percentage": 16.45, "elapsed_time": "0:43:17", "remaining_time": "3:39:51", "throughput": 8731.55, "total_tokens": 22679552} +{"current_steps": 33675, "total_steps": 204665, "loss": 0.1643, "lr": 1.974737797197139e-06, "epoch": 0.8226858524906555, "percentage": 16.45, "elapsed_time": "0:43:17", "remaining_time": "3:39:50", "throughput": 8731.63, "total_tokens": 22682752} +{"current_steps": 33680, "total_steps": 204665, "loss": 0.0487, "lr": 1.9747187467093324e-06, "epoch": 0.8228080033225026, "percentage": 16.46, "elapsed_time": "0:43:18", "remaining_time": "3:39:49", "throughput": 8731.62, "total_tokens": 22685696} +{"current_steps": 33685, "total_steps": 204665, "loss": 0.1025, "lr": 1.9746996891331086e-06, "epoch": 0.8229301541543498, "percentage": 16.46, "elapsed_time": "0:43:18", "remaining_time": "3:39:49", "throughput": 8731.88, "total_tokens": 22689536} +{"current_steps": 33690, "total_steps": 204665, "loss": 0.1342, "lr": 1.974680624468607e-06, "epoch": 0.823052304986197, "percentage": 16.46, "elapsed_time": "0:43:18", "remaining_time": "3:39:48", "throughput": 8732.02, "total_tokens": 22692928} +{"current_steps": 33695, "total_steps": 204665, "loss": 0.1626, "lr": 1.974661552715965e-06, "epoch": 0.8231744558180442, "percentage": 16.46, "elapsed_time": "0:43:19", "remaining_time": "3:39:48", "throughput": 8732.03, "total_tokens": 22695936} +{"current_steps": 33700, "total_steps": 204665, "loss": 0.0827, "lr": 1.9746424738753225e-06, "epoch": 0.8232966066498912, "percentage": 16.47, "elapsed_time": "0:43:19", "remaining_time": "3:39:47", "throughput": 8732.0, "total_tokens": 22698816} +{"current_steps": 33705, "total_steps": 204665, "loss": 0.0661, "lr": 1.974623387946818e-06, "epoch": 0.8234187574817384, "percentage": 16.47, "elapsed_time": "0:43:19", "remaining_time": "3:39:47", "throughput": 8732.24, "total_tokens": 22702528} +{"current_steps": 33710, "total_steps": 204665, "loss": 0.0431, "lr": 1.97460429493059e-06, "epoch": 0.8235409083135856, "percentage": 16.47, "elapsed_time": "0:43:20", "remaining_time": "3:39:46", "throughput": 8732.35, "total_tokens": 22705856} +{"current_steps": 33715, "total_steps": 204665, "loss": 0.1104, "lr": 1.974585194826777e-06, "epoch": 0.8236630591454328, "percentage": 16.47, "elapsed_time": "0:43:20", "remaining_time": "3:39:45", "throughput": 8732.41, "total_tokens": 22708992} +{"current_steps": 33720, "total_steps": 204665, "loss": 0.1426, "lr": 1.9745660876355187e-06, "epoch": 0.82378520997728, "percentage": 16.48, "elapsed_time": "0:43:20", "remaining_time": "3:39:45", "throughput": 8732.47, "total_tokens": 22712128} +{"current_steps": 33725, "total_steps": 204665, "loss": 0.0338, "lr": 1.9745469733569536e-06, "epoch": 0.8239073608091271, "percentage": 16.48, "elapsed_time": "0:43:21", "remaining_time": "3:39:44", "throughput": 8732.57, "total_tokens": 22715392} +{"current_steps": 33730, "total_steps": 204665, "loss": 0.1035, "lr": 1.9745278519912206e-06, "epoch": 0.8240295116409743, "percentage": 16.48, "elapsed_time": "0:43:21", "remaining_time": "3:39:44", "throughput": 8732.84, "total_tokens": 22719232} +{"current_steps": 33735, "total_steps": 204665, "loss": 0.1042, "lr": 1.9745087235384596e-06, "epoch": 0.8241516624728215, "percentage": 16.48, "elapsed_time": "0:43:21", "remaining_time": "3:39:43", "throughput": 8732.86, "total_tokens": 22722240} +{"current_steps": 33740, "total_steps": 204665, "loss": 0.1429, "lr": 1.9744895879988085e-06, "epoch": 0.8242738133046686, "percentage": 16.49, "elapsed_time": "0:43:22", "remaining_time": "3:39:42", "throughput": 8733.0, "total_tokens": 22725632} +{"current_steps": 33745, "total_steps": 204665, "loss": 0.194, "lr": 1.974470445372407e-06, "epoch": 0.8243959641365157, "percentage": 16.49, "elapsed_time": "0:43:22", "remaining_time": "3:39:42", "throughput": 8733.02, "total_tokens": 22728640} +{"current_steps": 33750, "total_steps": 204665, "loss": 0.0791, "lr": 1.9744512956593943e-06, "epoch": 0.8245181149683629, "percentage": 16.49, "elapsed_time": "0:43:22", "remaining_time": "3:39:41", "throughput": 8733.11, "total_tokens": 22731904} +{"current_steps": 33755, "total_steps": 204665, "loss": 0.1784, "lr": 1.97443213885991e-06, "epoch": 0.8246402658002101, "percentage": 16.49, "elapsed_time": "0:43:23", "remaining_time": "3:39:41", "throughput": 8733.17, "total_tokens": 22735040} +{"current_steps": 33760, "total_steps": 204665, "loss": 0.196, "lr": 1.9744129749740925e-06, "epoch": 0.8247624166320573, "percentage": 16.5, "elapsed_time": "0:43:23", "remaining_time": "3:39:40", "throughput": 8733.4, "total_tokens": 22738688} +{"current_steps": 33765, "total_steps": 204665, "loss": 0.0673, "lr": 1.974393804002082e-06, "epoch": 0.8248845674639045, "percentage": 16.5, "elapsed_time": "0:43:23", "remaining_time": "3:39:39", "throughput": 8733.41, "total_tokens": 22741696} +{"current_steps": 33770, "total_steps": 204665, "loss": 0.1446, "lr": 1.974374625944018e-06, "epoch": 0.8250067182957516, "percentage": 16.5, "elapsed_time": "0:43:24", "remaining_time": "3:39:39", "throughput": 8733.59, "total_tokens": 22745216} +{"current_steps": 33775, "total_steps": 204665, "loss": 0.1043, "lr": 1.9743554408000394e-06, "epoch": 0.8251288691275988, "percentage": 16.5, "elapsed_time": "0:43:24", "remaining_time": "3:39:38", "throughput": 8733.64, "total_tokens": 22748352} +{"current_steps": 33780, "total_steps": 204665, "loss": 0.156, "lr": 1.974336248570286e-06, "epoch": 0.8252510199594459, "percentage": 16.51, "elapsed_time": "0:43:25", "remaining_time": "3:39:38", "throughput": 8733.77, "total_tokens": 22751744} +{"current_steps": 33785, "total_steps": 204665, "loss": 0.1163, "lr": 1.9743170492548974e-06, "epoch": 0.8253731707912931, "percentage": 16.51, "elapsed_time": "0:43:25", "remaining_time": "3:39:37", "throughput": 8734.09, "total_tokens": 22755712} +{"current_steps": 33790, "total_steps": 204665, "loss": 0.126, "lr": 1.9742978428540132e-06, "epoch": 0.8254953216231402, "percentage": 16.51, "elapsed_time": "0:43:25", "remaining_time": "3:39:37", "throughput": 8734.25, "total_tokens": 22759232} +{"current_steps": 33795, "total_steps": 204665, "loss": 0.0943, "lr": 1.974278629367773e-06, "epoch": 0.8256174724549874, "percentage": 16.51, "elapsed_time": "0:43:26", "remaining_time": "3:39:36", "throughput": 8734.41, "total_tokens": 22762688} +{"current_steps": 33800, "total_steps": 204665, "loss": 0.0327, "lr": 1.974259408796317e-06, "epoch": 0.8257396232868346, "percentage": 16.51, "elapsed_time": "0:43:26", "remaining_time": "3:39:37", "throughput": 8733.68, "total_tokens": 22766144} +{"current_steps": 33805, "total_steps": 204665, "loss": 0.0922, "lr": 1.9742401811397834e-06, "epoch": 0.8258617741186818, "percentage": 16.52, "elapsed_time": "0:43:27", "remaining_time": "3:39:36", "throughput": 8733.71, "total_tokens": 22769216} +{"current_steps": 33810, "total_steps": 204665, "loss": 0.0987, "lr": 1.9742209463983143e-06, "epoch": 0.8259839249505289, "percentage": 16.52, "elapsed_time": "0:43:27", "remaining_time": "3:39:36", "throughput": 8733.92, "total_tokens": 22772864} +{"current_steps": 33815, "total_steps": 204665, "loss": 0.0945, "lr": 1.9742017045720474e-06, "epoch": 0.826106075782376, "percentage": 16.52, "elapsed_time": "0:43:27", "remaining_time": "3:39:35", "throughput": 8733.94, "total_tokens": 22775872} +{"current_steps": 33820, "total_steps": 204665, "loss": 0.049, "lr": 1.9741824556611245e-06, "epoch": 0.8262282266142232, "percentage": 16.52, "elapsed_time": "0:43:28", "remaining_time": "3:39:35", "throughput": 8734.14, "total_tokens": 22779456} +{"current_steps": 33825, "total_steps": 204665, "loss": 0.0323, "lr": 1.9741631996656846e-06, "epoch": 0.8263503774460704, "percentage": 16.53, "elapsed_time": "0:43:28", "remaining_time": "3:39:34", "throughput": 8734.17, "total_tokens": 22782528} +{"current_steps": 33830, "total_steps": 204665, "loss": 0.1273, "lr": 1.9741439365858677e-06, "epoch": 0.8264725282779176, "percentage": 16.53, "elapsed_time": "0:43:28", "remaining_time": "3:39:33", "throughput": 8734.22, "total_tokens": 22785664} +{"current_steps": 33835, "total_steps": 204665, "loss": 0.1618, "lr": 1.974124666421814e-06, "epoch": 0.8265946791097647, "percentage": 16.53, "elapsed_time": "0:43:29", "remaining_time": "3:39:33", "throughput": 8734.33, "total_tokens": 22788992} +{"current_steps": 33840, "total_steps": 204665, "loss": 0.1561, "lr": 1.974105389173664e-06, "epoch": 0.8267168299416119, "percentage": 16.53, "elapsed_time": "0:43:29", "remaining_time": "3:39:32", "throughput": 8734.36, "total_tokens": 22792064} +{"current_steps": 33845, "total_steps": 204665, "loss": 0.0267, "lr": 1.974086104841557e-06, "epoch": 0.8268389807734591, "percentage": 16.54, "elapsed_time": "0:43:29", "remaining_time": "3:39:32", "throughput": 8734.53, "total_tokens": 22795584} +{"current_steps": 33850, "total_steps": 204665, "loss": 0.1838, "lr": 1.974066813425635e-06, "epoch": 0.8269611316053063, "percentage": 16.54, "elapsed_time": "0:43:30", "remaining_time": "3:39:31", "throughput": 8734.7, "total_tokens": 22799104} +{"current_steps": 33855, "total_steps": 204665, "loss": 0.0809, "lr": 1.9740475149260364e-06, "epoch": 0.8270832824371533, "percentage": 16.54, "elapsed_time": "0:43:30", "remaining_time": "3:39:30", "throughput": 8734.71, "total_tokens": 22802112} +{"current_steps": 33860, "total_steps": 204665, "loss": 0.0087, "lr": 1.974028209342902e-06, "epoch": 0.8272054332690005, "percentage": 16.54, "elapsed_time": "0:43:30", "remaining_time": "3:39:30", "throughput": 8734.94, "total_tokens": 22805824} +{"current_steps": 33865, "total_steps": 204665, "loss": 0.0718, "lr": 1.974008896676373e-06, "epoch": 0.8273275841008477, "percentage": 16.55, "elapsed_time": "0:43:31", "remaining_time": "3:39:29", "throughput": 8734.93, "total_tokens": 22808768} +{"current_steps": 33870, "total_steps": 204665, "loss": 0.1391, "lr": 1.973989576926589e-06, "epoch": 0.8274497349326949, "percentage": 16.55, "elapsed_time": "0:43:31", "remaining_time": "3:39:29", "throughput": 8735.05, "total_tokens": 22812096} +{"current_steps": 33875, "total_steps": 204665, "loss": 0.1762, "lr": 1.973970250093691e-06, "epoch": 0.8275718857645421, "percentage": 16.55, "elapsed_time": "0:43:31", "remaining_time": "3:39:28", "throughput": 8735.12, "total_tokens": 22815296} +{"current_steps": 33880, "total_steps": 204665, "loss": 0.1399, "lr": 1.9739509161778196e-06, "epoch": 0.8276940365963892, "percentage": 16.55, "elapsed_time": "0:43:32", "remaining_time": "3:39:28", "throughput": 8735.32, "total_tokens": 22818944} +{"current_steps": 33885, "total_steps": 204665, "loss": 0.084, "lr": 1.9739315751791146e-06, "epoch": 0.8278161874282364, "percentage": 16.56, "elapsed_time": "0:43:32", "remaining_time": "3:39:27", "throughput": 8735.43, "total_tokens": 22822272} +{"current_steps": 33890, "total_steps": 204665, "loss": 0.0412, "lr": 1.973912227097718e-06, "epoch": 0.8279383382600836, "percentage": 16.56, "elapsed_time": "0:43:32", "remaining_time": "3:39:26", "throughput": 8735.59, "total_tokens": 22825792} +{"current_steps": 33895, "total_steps": 204665, "loss": 0.1633, "lr": 1.9738928719337695e-06, "epoch": 0.8280604890919308, "percentage": 16.56, "elapsed_time": "0:43:33", "remaining_time": "3:39:26", "throughput": 8735.86, "total_tokens": 22829632} +{"current_steps": 33900, "total_steps": 204665, "loss": 0.1873, "lr": 1.97387350968741e-06, "epoch": 0.8281826399237778, "percentage": 16.56, "elapsed_time": "0:43:33", "remaining_time": "3:39:25", "throughput": 8736.03, "total_tokens": 22833152} +{"current_steps": 33905, "total_steps": 204665, "loss": 0.1141, "lr": 1.97385414035878e-06, "epoch": 0.828304790755625, "percentage": 16.57, "elapsed_time": "0:43:34", "remaining_time": "3:39:25", "throughput": 8736.23, "total_tokens": 22836736} +{"current_steps": 33910, "total_steps": 204665, "loss": 0.0695, "lr": 1.973834763948021e-06, "epoch": 0.8284269415874722, "percentage": 16.57, "elapsed_time": "0:43:34", "remaining_time": "3:39:24", "throughput": 8736.34, "total_tokens": 22840064} +{"current_steps": 33915, "total_steps": 204665, "loss": 0.0851, "lr": 1.9738153804552734e-06, "epoch": 0.8285490924193194, "percentage": 16.57, "elapsed_time": "0:43:34", "remaining_time": "3:39:24", "throughput": 8736.49, "total_tokens": 22843520} +{"current_steps": 33920, "total_steps": 204665, "loss": 0.1975, "lr": 1.973795989880679e-06, "epoch": 0.8286712432511666, "percentage": 16.57, "elapsed_time": "0:43:35", "remaining_time": "3:39:23", "throughput": 8736.55, "total_tokens": 22846720} +{"current_steps": 33925, "total_steps": 204665, "loss": 0.0485, "lr": 1.973776592224378e-06, "epoch": 0.8287933940830137, "percentage": 16.58, "elapsed_time": "0:43:35", "remaining_time": "3:39:23", "throughput": 8736.67, "total_tokens": 22850048} +{"current_steps": 33930, "total_steps": 204665, "loss": 0.0265, "lr": 1.9737571874865115e-06, "epoch": 0.8289155449148609, "percentage": 16.58, "elapsed_time": "0:43:35", "remaining_time": "3:39:22", "throughput": 8736.82, "total_tokens": 22853504} +{"current_steps": 33935, "total_steps": 204665, "loss": 0.0968, "lr": 1.973737775667221e-06, "epoch": 0.829037695746708, "percentage": 16.58, "elapsed_time": "0:43:36", "remaining_time": "3:39:21", "throughput": 8736.83, "total_tokens": 22856512} +{"current_steps": 33940, "total_steps": 204665, "loss": 0.1644, "lr": 1.9737183567666478e-06, "epoch": 0.8291598465785552, "percentage": 16.58, "elapsed_time": "0:43:36", "remaining_time": "3:39:21", "throughput": 8736.99, "total_tokens": 22860032} +{"current_steps": 33945, "total_steps": 204665, "loss": 0.183, "lr": 1.9736989307849323e-06, "epoch": 0.8292819974104023, "percentage": 16.59, "elapsed_time": "0:43:36", "remaining_time": "3:39:20", "throughput": 8737.12, "total_tokens": 22863424} +{"current_steps": 33950, "total_steps": 204665, "loss": 0.155, "lr": 1.9736794977222166e-06, "epoch": 0.8294041482422495, "percentage": 16.59, "elapsed_time": "0:43:37", "remaining_time": "3:39:20", "throughput": 8737.16, "total_tokens": 22866496} +{"current_steps": 33955, "total_steps": 204665, "loss": 0.0301, "lr": 1.9736600575786413e-06, "epoch": 0.8295262990740967, "percentage": 16.59, "elapsed_time": "0:43:37", "remaining_time": "3:39:19", "throughput": 8737.22, "total_tokens": 22869632} +{"current_steps": 33960, "total_steps": 204665, "loss": 0.1194, "lr": 1.973640610354349e-06, "epoch": 0.8296484499059439, "percentage": 16.59, "elapsed_time": "0:43:37", "remaining_time": "3:39:19", "throughput": 8737.4, "total_tokens": 22873216} +{"current_steps": 33965, "total_steps": 204665, "loss": 0.1718, "lr": 1.9736211560494796e-06, "epoch": 0.8297706007377911, "percentage": 16.6, "elapsed_time": "0:43:38", "remaining_time": "3:39:18", "throughput": 8737.49, "total_tokens": 22876480} +{"current_steps": 33970, "total_steps": 204665, "loss": 0.1535, "lr": 1.9736016946641755e-06, "epoch": 0.8298927515696382, "percentage": 16.6, "elapsed_time": "0:43:38", "remaining_time": "3:39:17", "throughput": 8737.53, "total_tokens": 22879552} +{"current_steps": 33975, "total_steps": 204665, "loss": 0.2542, "lr": 1.973582226198578e-06, "epoch": 0.8300149024014853, "percentage": 16.6, "elapsed_time": "0:43:38", "remaining_time": "3:39:17", "throughput": 8737.62, "total_tokens": 22882816} +{"current_steps": 33980, "total_steps": 204665, "loss": 0.1118, "lr": 1.9735627506528284e-06, "epoch": 0.8301370532333325, "percentage": 16.6, "elapsed_time": "0:43:39", "remaining_time": "3:39:16", "throughput": 8737.75, "total_tokens": 22886208} +{"current_steps": 33985, "total_steps": 204665, "loss": 0.2168, "lr": 1.973543268027069e-06, "epoch": 0.8302592040651797, "percentage": 16.61, "elapsed_time": "0:43:39", "remaining_time": "3:39:16", "throughput": 8737.89, "total_tokens": 22889600} +{"current_steps": 33990, "total_steps": 204665, "loss": 0.0727, "lr": 1.9735237783214413e-06, "epoch": 0.8303813548970268, "percentage": 16.61, "elapsed_time": "0:43:39", "remaining_time": "3:39:15", "throughput": 8738.12, "total_tokens": 22893312} +{"current_steps": 33995, "total_steps": 204665, "loss": 0.1407, "lr": 1.973504281536086e-06, "epoch": 0.830503505728874, "percentage": 16.61, "elapsed_time": "0:43:40", "remaining_time": "3:39:14", "throughput": 8738.18, "total_tokens": 22896512} +{"current_steps": 34000, "total_steps": 204665, "loss": 0.1526, "lr": 1.9734847776711465e-06, "epoch": 0.8306256565607212, "percentage": 16.61, "elapsed_time": "0:43:40", "remaining_time": "3:39:14", "throughput": 8738.52, "total_tokens": 22900608} +{"current_steps": 34005, "total_steps": 204665, "loss": 0.0175, "lr": 1.973465266726764e-06, "epoch": 0.8307478073925684, "percentage": 16.61, "elapsed_time": "0:43:41", "remaining_time": "3:39:13", "throughput": 8738.65, "total_tokens": 22904000} +{"current_steps": 34010, "total_steps": 204665, "loss": 0.0464, "lr": 1.9734457487030792e-06, "epoch": 0.8308699582244156, "percentage": 16.62, "elapsed_time": "0:43:41", "remaining_time": "3:39:13", "throughput": 8738.87, "total_tokens": 22907712} +{"current_steps": 34015, "total_steps": 204665, "loss": 0.0883, "lr": 1.973426223600236e-06, "epoch": 0.8309921090562626, "percentage": 16.62, "elapsed_time": "0:43:41", "remaining_time": "3:39:12", "throughput": 8739.04, "total_tokens": 22911232} +{"current_steps": 34020, "total_steps": 204665, "loss": 0.0874, "lr": 1.9734066914183752e-06, "epoch": 0.8311142598881098, "percentage": 16.62, "elapsed_time": "0:43:42", "remaining_time": "3:39:12", "throughput": 8739.17, "total_tokens": 22914624} +{"current_steps": 34025, "total_steps": 204665, "loss": 0.0753, "lr": 1.973387152157639e-06, "epoch": 0.831236410719957, "percentage": 16.62, "elapsed_time": "0:43:42", "remaining_time": "3:39:11", "throughput": 8739.37, "total_tokens": 22918208} +{"current_steps": 34030, "total_steps": 204665, "loss": 0.2246, "lr": 1.97336760581817e-06, "epoch": 0.8313585615518042, "percentage": 16.63, "elapsed_time": "0:43:42", "remaining_time": "3:39:11", "throughput": 8739.51, "total_tokens": 22921664} +{"current_steps": 34035, "total_steps": 204665, "loss": 0.1399, "lr": 1.9733480524001096e-06, "epoch": 0.8314807123836513, "percentage": 16.63, "elapsed_time": "0:43:43", "remaining_time": "3:39:10", "throughput": 8739.68, "total_tokens": 22925184} +{"current_steps": 34040, "total_steps": 204665, "loss": 0.1742, "lr": 1.9733284919036007e-06, "epoch": 0.8316028632154985, "percentage": 16.63, "elapsed_time": "0:43:43", "remaining_time": "3:39:10", "throughput": 8739.83, "total_tokens": 22928640} +{"current_steps": 34045, "total_steps": 204665, "loss": 0.1754, "lr": 1.9733089243287847e-06, "epoch": 0.8317250140473457, "percentage": 16.63, "elapsed_time": "0:43:43", "remaining_time": "3:39:09", "throughput": 8739.94, "total_tokens": 22931968} +{"current_steps": 34050, "total_steps": 204665, "loss": 0.0714, "lr": 1.973289349675805e-06, "epoch": 0.8318471648791929, "percentage": 16.64, "elapsed_time": "0:43:44", "remaining_time": "3:39:08", "throughput": 8740.02, "total_tokens": 22935168} +{"current_steps": 34055, "total_steps": 204665, "loss": 0.2318, "lr": 1.973269767944803e-06, "epoch": 0.83196931571104, "percentage": 16.64, "elapsed_time": "0:43:44", "remaining_time": "3:39:08", "throughput": 8740.26, "total_tokens": 22938944} +{"current_steps": 34060, "total_steps": 204665, "loss": 0.1794, "lr": 1.9732501791359217e-06, "epoch": 0.8320914665428871, "percentage": 16.64, "elapsed_time": "0:43:44", "remaining_time": "3:39:07", "throughput": 8740.29, "total_tokens": 22942016} +{"current_steps": 34065, "total_steps": 204665, "loss": 0.0536, "lr": 1.973230583249303e-06, "epoch": 0.8322136173747343, "percentage": 16.64, "elapsed_time": "0:43:45", "remaining_time": "3:39:07", "throughput": 8740.33, "total_tokens": 22945152} +{"current_steps": 34070, "total_steps": 204665, "loss": 0.081, "lr": 1.97321098028509e-06, "epoch": 0.8323357682065815, "percentage": 16.65, "elapsed_time": "0:43:45", "remaining_time": "3:39:06", "throughput": 8740.46, "total_tokens": 22948544} +{"current_steps": 34075, "total_steps": 204665, "loss": 0.0337, "lr": 1.973191370243425e-06, "epoch": 0.8324579190384287, "percentage": 16.65, "elapsed_time": "0:43:45", "remaining_time": "3:39:06", "throughput": 8740.59, "total_tokens": 22951936} +{"current_steps": 34080, "total_steps": 204665, "loss": 0.0878, "lr": 1.9731717531244507e-06, "epoch": 0.8325800698702758, "percentage": 16.65, "elapsed_time": "0:43:46", "remaining_time": "3:39:05", "throughput": 8740.78, "total_tokens": 22955520} +{"current_steps": 34085, "total_steps": 204665, "loss": 0.0361, "lr": 1.973152128928309e-06, "epoch": 0.832702220702123, "percentage": 16.65, "elapsed_time": "0:43:46", "remaining_time": "3:39:04", "throughput": 8740.83, "total_tokens": 22958656} +{"current_steps": 34090, "total_steps": 204665, "loss": 0.0429, "lr": 1.973132497655144e-06, "epoch": 0.8328243715339702, "percentage": 16.66, "elapsed_time": "0:43:46", "remaining_time": "3:39:04", "throughput": 8741.02, "total_tokens": 22962240} +{"current_steps": 34095, "total_steps": 204665, "loss": 0.0689, "lr": 1.9731128593050974e-06, "epoch": 0.8329465223658173, "percentage": 16.66, "elapsed_time": "0:43:47", "remaining_time": "3:39:03", "throughput": 8741.14, "total_tokens": 22965568} +{"current_steps": 34100, "total_steps": 204665, "loss": 0.0238, "lr": 1.9730932138783122e-06, "epoch": 0.8330686731976644, "percentage": 16.66, "elapsed_time": "0:43:47", "remaining_time": "3:39:03", "throughput": 8741.33, "total_tokens": 22969152} +{"current_steps": 34105, "total_steps": 204665, "loss": 0.2501, "lr": 1.973073561374932e-06, "epoch": 0.8331908240295116, "percentage": 16.66, "elapsed_time": "0:43:48", "remaining_time": "3:39:02", "throughput": 8741.56, "total_tokens": 22972928} +{"current_steps": 34110, "total_steps": 204665, "loss": 0.2352, "lr": 1.9730539017950986e-06, "epoch": 0.8333129748613588, "percentage": 16.67, "elapsed_time": "0:43:48", "remaining_time": "3:39:02", "throughput": 8741.67, "total_tokens": 22976256} +{"current_steps": 34115, "total_steps": 204665, "loss": 0.1129, "lr": 1.9730342351389555e-06, "epoch": 0.833435125693206, "percentage": 16.67, "elapsed_time": "0:43:48", "remaining_time": "3:39:01", "throughput": 8741.81, "total_tokens": 22979648} +{"current_steps": 34120, "total_steps": 204665, "loss": 0.1751, "lr": 1.973014561406646e-06, "epoch": 0.8335572765250532, "percentage": 16.67, "elapsed_time": "0:43:49", "remaining_time": "3:39:00", "throughput": 8741.87, "total_tokens": 22982784} +{"current_steps": 34125, "total_steps": 204665, "loss": 0.1128, "lr": 1.972994880598313e-06, "epoch": 0.8336794273569003, "percentage": 16.67, "elapsed_time": "0:43:49", "remaining_time": "3:39:00", "throughput": 8741.88, "total_tokens": 22985728} +{"current_steps": 34130, "total_steps": 204665, "loss": 0.0816, "lr": 1.9729751927140994e-06, "epoch": 0.8338015781887474, "percentage": 16.68, "elapsed_time": "0:43:49", "remaining_time": "3:38:59", "throughput": 8742.0, "total_tokens": 22989056} +{"current_steps": 34135, "total_steps": 204665, "loss": 0.1597, "lr": 1.9729554977541484e-06, "epoch": 0.8339237290205946, "percentage": 16.68, "elapsed_time": "0:43:50", "remaining_time": "3:38:59", "throughput": 8742.25, "total_tokens": 22992832} +{"current_steps": 34140, "total_steps": 204665, "loss": 0.0803, "lr": 1.9729357957186034e-06, "epoch": 0.8340458798524418, "percentage": 16.68, "elapsed_time": "0:43:50", "remaining_time": "3:38:58", "throughput": 8742.34, "total_tokens": 22996096} +{"current_steps": 34145, "total_steps": 204665, "loss": 0.1056, "lr": 1.972916086607607e-06, "epoch": 0.8341680306842889, "percentage": 16.68, "elapsed_time": "0:43:50", "remaining_time": "3:38:58", "throughput": 8742.68, "total_tokens": 23000128} +{"current_steps": 34150, "total_steps": 204665, "loss": 0.059, "lr": 1.9728963704213044e-06, "epoch": 0.8342901815161361, "percentage": 16.69, "elapsed_time": "0:43:51", "remaining_time": "3:38:57", "throughput": 8742.7, "total_tokens": 23003136} +{"current_steps": 34155, "total_steps": 204665, "loss": 0.0162, "lr": 1.9728766471598367e-06, "epoch": 0.8344123323479833, "percentage": 16.69, "elapsed_time": "0:43:51", "remaining_time": "3:38:56", "throughput": 8742.87, "total_tokens": 23006656} +{"current_steps": 34160, "total_steps": 204665, "loss": 0.0996, "lr": 1.972856916823349e-06, "epoch": 0.8345344831798305, "percentage": 16.69, "elapsed_time": "0:43:51", "remaining_time": "3:38:56", "throughput": 8743.07, "total_tokens": 23010304} +{"current_steps": 34165, "total_steps": 204665, "loss": 0.2583, "lr": 1.9728371794119836e-06, "epoch": 0.8346566340116777, "percentage": 16.69, "elapsed_time": "0:43:52", "remaining_time": "3:38:55", "throughput": 8743.13, "total_tokens": 23013440} +{"current_steps": 34170, "total_steps": 204665, "loss": 0.2494, "lr": 1.9728174349258844e-06, "epoch": 0.8347787848435247, "percentage": 16.7, "elapsed_time": "0:43:52", "remaining_time": "3:38:55", "throughput": 8743.18, "total_tokens": 23016576} +{"current_steps": 34175, "total_steps": 204665, "loss": 0.1302, "lr": 1.972797683365196e-06, "epoch": 0.8349009356753719, "percentage": 16.7, "elapsed_time": "0:43:52", "remaining_time": "3:38:54", "throughput": 8743.22, "total_tokens": 23019648} +{"current_steps": 34180, "total_steps": 204665, "loss": 0.0458, "lr": 1.9727779247300606e-06, "epoch": 0.8350230865072191, "percentage": 16.7, "elapsed_time": "0:43:53", "remaining_time": "3:38:54", "throughput": 8743.31, "total_tokens": 23022912} +{"current_steps": 34185, "total_steps": 204665, "loss": 0.0924, "lr": 1.9727581590206226e-06, "epoch": 0.8351452373390663, "percentage": 16.7, "elapsed_time": "0:43:53", "remaining_time": "3:38:53", "throughput": 8743.54, "total_tokens": 23026624} +{"current_steps": 34190, "total_steps": 204665, "loss": 0.1005, "lr": 1.9727383862370255e-06, "epoch": 0.8352673881709134, "percentage": 16.71, "elapsed_time": "0:43:53", "remaining_time": "3:38:52", "throughput": 8743.61, "total_tokens": 23029824} +{"current_steps": 34195, "total_steps": 204665, "loss": 0.1247, "lr": 1.9727186063794133e-06, "epoch": 0.8353895390027606, "percentage": 16.71, "elapsed_time": "0:43:54", "remaining_time": "3:38:52", "throughput": 8743.81, "total_tokens": 23033408} +{"current_steps": 34200, "total_steps": 204665, "loss": 0.097, "lr": 1.9726988194479303e-06, "epoch": 0.8355116898346078, "percentage": 16.71, "elapsed_time": "0:43:54", "remaining_time": "3:38:51", "throughput": 8743.98, "total_tokens": 23036928} +{"current_steps": 34205, "total_steps": 204665, "loss": 0.1309, "lr": 1.9726790254427194e-06, "epoch": 0.835633840666455, "percentage": 16.71, "elapsed_time": "0:43:54", "remaining_time": "3:38:51", "throughput": 8744.22, "total_tokens": 23040704} +{"current_steps": 34210, "total_steps": 204665, "loss": 0.0704, "lr": 1.972659224363925e-06, "epoch": 0.8357559914983022, "percentage": 16.72, "elapsed_time": "0:43:55", "remaining_time": "3:38:50", "throughput": 8744.41, "total_tokens": 23044288} +{"current_steps": 34215, "total_steps": 204665, "loss": 0.1503, "lr": 1.9726394162116907e-06, "epoch": 0.8358781423301492, "percentage": 16.72, "elapsed_time": "0:43:55", "remaining_time": "3:38:50", "throughput": 8744.5, "total_tokens": 23047552} +{"current_steps": 34220, "total_steps": 204665, "loss": 0.1158, "lr": 1.9726196009861614e-06, "epoch": 0.8360002931619964, "percentage": 16.72, "elapsed_time": "0:43:56", "remaining_time": "3:38:49", "throughput": 8744.63, "total_tokens": 23050944} +{"current_steps": 34225, "total_steps": 204665, "loss": 0.1044, "lr": 1.972599778687481e-06, "epoch": 0.8361224439938436, "percentage": 16.72, "elapsed_time": "0:43:56", "remaining_time": "3:38:49", "throughput": 8744.72, "total_tokens": 23054208} +{"current_steps": 34230, "total_steps": 204665, "loss": 0.0879, "lr": 1.972579949315793e-06, "epoch": 0.8362445948256908, "percentage": 16.72, "elapsed_time": "0:43:56", "remaining_time": "3:38:48", "throughput": 8744.82, "total_tokens": 23057472} +{"current_steps": 34235, "total_steps": 204665, "loss": 0.1846, "lr": 1.972560112871242e-06, "epoch": 0.8363667456575379, "percentage": 16.73, "elapsed_time": "0:43:57", "remaining_time": "3:38:47", "throughput": 8745.04, "total_tokens": 23061184} +{"current_steps": 34240, "total_steps": 204665, "loss": 0.1902, "lr": 1.972540269353972e-06, "epoch": 0.8364888964893851, "percentage": 16.73, "elapsed_time": "0:43:57", "remaining_time": "3:38:47", "throughput": 8745.06, "total_tokens": 23064256} +{"current_steps": 34245, "total_steps": 204665, "loss": 0.0897, "lr": 1.9725204187641282e-06, "epoch": 0.8366110473212323, "percentage": 16.73, "elapsed_time": "0:43:57", "remaining_time": "3:38:46", "throughput": 8745.18, "total_tokens": 23067584} +{"current_steps": 34250, "total_steps": 204665, "loss": 0.1588, "lr": 1.9725005611018544e-06, "epoch": 0.8367331981530794, "percentage": 16.73, "elapsed_time": "0:43:58", "remaining_time": "3:38:46", "throughput": 8745.29, "total_tokens": 23070912} +{"current_steps": 34255, "total_steps": 204665, "loss": 0.0433, "lr": 1.9724806963672947e-06, "epoch": 0.8368553489849266, "percentage": 16.74, "elapsed_time": "0:43:58", "remaining_time": "3:38:45", "throughput": 8745.5, "total_tokens": 23074560} +{"current_steps": 34260, "total_steps": 204665, "loss": 0.0958, "lr": 1.972460824560594e-06, "epoch": 0.8369774998167737, "percentage": 16.74, "elapsed_time": "0:43:58", "remaining_time": "3:38:45", "throughput": 8745.83, "total_tokens": 23078592} +{"current_steps": 34265, "total_steps": 204665, "loss": 0.0886, "lr": 1.972440945681896e-06, "epoch": 0.8370996506486209, "percentage": 16.74, "elapsed_time": "0:43:59", "remaining_time": "3:38:44", "throughput": 8745.94, "total_tokens": 23081920} +{"current_steps": 34270, "total_steps": 204665, "loss": 0.1852, "lr": 1.9724210597313463e-06, "epoch": 0.8372218014804681, "percentage": 16.74, "elapsed_time": "0:43:59", "remaining_time": "3:38:43", "throughput": 8745.96, "total_tokens": 23084928} +{"current_steps": 34275, "total_steps": 204665, "loss": 0.0785, "lr": 1.972401166709089e-06, "epoch": 0.8373439523123153, "percentage": 16.75, "elapsed_time": "0:43:59", "remaining_time": "3:38:43", "throughput": 8745.97, "total_tokens": 23087936} +{"current_steps": 34280, "total_steps": 204665, "loss": 0.1037, "lr": 1.9723812666152695e-06, "epoch": 0.8374661031441624, "percentage": 16.75, "elapsed_time": "0:44:00", "remaining_time": "3:38:42", "throughput": 8746.03, "total_tokens": 23091072} +{"current_steps": 34285, "total_steps": 204665, "loss": 0.0931, "lr": 1.9723613594500312e-06, "epoch": 0.8375882539760096, "percentage": 16.75, "elapsed_time": "0:44:00", "remaining_time": "3:38:42", "throughput": 8746.17, "total_tokens": 23094528} +{"current_steps": 34290, "total_steps": 204665, "loss": 0.1358, "lr": 1.9723414452135197e-06, "epoch": 0.8377104048078567, "percentage": 16.75, "elapsed_time": "0:44:00", "remaining_time": "3:38:41", "throughput": 8746.5, "total_tokens": 23098560} +{"current_steps": 34295, "total_steps": 204665, "loss": 0.0928, "lr": 1.9723215239058797e-06, "epoch": 0.8378325556397039, "percentage": 16.76, "elapsed_time": "0:44:01", "remaining_time": "3:38:41", "throughput": 8746.67, "total_tokens": 23102080} +{"current_steps": 34300, "total_steps": 204665, "loss": 0.0881, "lr": 1.972301595527256e-06, "epoch": 0.8379547064715511, "percentage": 16.76, "elapsed_time": "0:44:01", "remaining_time": "3:38:40", "throughput": 8746.87, "total_tokens": 23105728} +{"current_steps": 34305, "total_steps": 204665, "loss": 0.1869, "lr": 1.9722816600777937e-06, "epoch": 0.8380768573033982, "percentage": 16.76, "elapsed_time": "0:44:01", "remaining_time": "3:38:40", "throughput": 8747.02, "total_tokens": 23109184} +{"current_steps": 34310, "total_steps": 204665, "loss": 0.0709, "lr": 1.972261717557638e-06, "epoch": 0.8381990081352454, "percentage": 16.76, "elapsed_time": "0:44:02", "remaining_time": "3:38:39", "throughput": 8747.16, "total_tokens": 23112576} +{"current_steps": 34315, "total_steps": 204665, "loss": 0.0985, "lr": 1.972241767966933e-06, "epoch": 0.8383211589670926, "percentage": 16.77, "elapsed_time": "0:44:02", "remaining_time": "3:38:38", "throughput": 8747.33, "total_tokens": 23116096} +{"current_steps": 34320, "total_steps": 204665, "loss": 0.1932, "lr": 1.9722218113058246e-06, "epoch": 0.8384433097989398, "percentage": 16.77, "elapsed_time": "0:44:02", "remaining_time": "3:38:38", "throughput": 8747.47, "total_tokens": 23119488} +{"current_steps": 34325, "total_steps": 204665, "loss": 0.1424, "lr": 1.9722018475744573e-06, "epoch": 0.8385654606307869, "percentage": 16.77, "elapsed_time": "0:44:03", "remaining_time": "3:38:37", "throughput": 8747.59, "total_tokens": 23122816} +{"current_steps": 34330, "total_steps": 204665, "loss": 0.0381, "lr": 1.972181876772977e-06, "epoch": 0.838687611462634, "percentage": 16.77, "elapsed_time": "0:44:03", "remaining_time": "3:38:37", "throughput": 8747.63, "total_tokens": 23125888} +{"current_steps": 34335, "total_steps": 204665, "loss": 0.0856, "lr": 1.9721618989015285e-06, "epoch": 0.8388097622944812, "percentage": 16.78, "elapsed_time": "0:44:04", "remaining_time": "3:38:36", "throughput": 8747.76, "total_tokens": 23129280} +{"current_steps": 34340, "total_steps": 204665, "loss": 0.1908, "lr": 1.972141913960257e-06, "epoch": 0.8389319131263284, "percentage": 16.78, "elapsed_time": "0:44:04", "remaining_time": "3:38:35", "throughput": 8747.87, "total_tokens": 23132608} +{"current_steps": 34345, "total_steps": 204665, "loss": 0.0528, "lr": 1.9721219219493087e-06, "epoch": 0.8390540639581755, "percentage": 16.78, "elapsed_time": "0:44:04", "remaining_time": "3:38:35", "throughput": 8748.0, "total_tokens": 23136000} +{"current_steps": 34350, "total_steps": 204665, "loss": 0.1189, "lr": 1.972101922868828e-06, "epoch": 0.8391762147900227, "percentage": 16.78, "elapsed_time": "0:44:05", "remaining_time": "3:38:34", "throughput": 8748.17, "total_tokens": 23139520} +{"current_steps": 34355, "total_steps": 204665, "loss": 0.0453, "lr": 1.9720819167189605e-06, "epoch": 0.8392983656218699, "percentage": 16.79, "elapsed_time": "0:44:05", "remaining_time": "3:38:34", "throughput": 8748.39, "total_tokens": 23143168} +{"current_steps": 34360, "total_steps": 204665, "loss": 0.0662, "lr": 1.972061903499852e-06, "epoch": 0.8394205164537171, "percentage": 16.79, "elapsed_time": "0:44:05", "remaining_time": "3:38:33", "throughput": 8748.56, "total_tokens": 23146688} +{"current_steps": 34365, "total_steps": 204665, "loss": 0.1032, "lr": 1.972041883211648e-06, "epoch": 0.8395426672855643, "percentage": 16.79, "elapsed_time": "0:44:06", "remaining_time": "3:38:33", "throughput": 8748.64, "total_tokens": 23149888} +{"current_steps": 34370, "total_steps": 204665, "loss": 0.1527, "lr": 1.9720218558544937e-06, "epoch": 0.8396648181174113, "percentage": 16.79, "elapsed_time": "0:44:06", "remaining_time": "3:38:32", "throughput": 8748.8, "total_tokens": 23153344} +{"current_steps": 34375, "total_steps": 204665, "loss": 0.1562, "lr": 1.972001821428535e-06, "epoch": 0.8397869689492585, "percentage": 16.8, "elapsed_time": "0:44:06", "remaining_time": "3:38:31", "throughput": 8748.9, "total_tokens": 23156608} +{"current_steps": 34380, "total_steps": 204665, "loss": 0.0158, "lr": 1.9719817799339178e-06, "epoch": 0.8399091197811057, "percentage": 16.8, "elapsed_time": "0:44:07", "remaining_time": "3:38:31", "throughput": 8749.01, "total_tokens": 23159936} +{"current_steps": 34385, "total_steps": 204665, "loss": 0.1794, "lr": 1.9719617313707875e-06, "epoch": 0.8400312706129529, "percentage": 16.8, "elapsed_time": "0:44:07", "remaining_time": "3:38:30", "throughput": 8749.08, "total_tokens": 23163136} +{"current_steps": 34390, "total_steps": 204665, "loss": 0.1996, "lr": 1.9719416757392906e-06, "epoch": 0.8401534214448, "percentage": 16.8, "elapsed_time": "0:44:07", "remaining_time": "3:38:30", "throughput": 8749.16, "total_tokens": 23166336} +{"current_steps": 34395, "total_steps": 204665, "loss": 0.1569, "lr": 1.9719216130395718e-06, "epoch": 0.8402755722766472, "percentage": 16.81, "elapsed_time": "0:44:08", "remaining_time": "3:38:29", "throughput": 8749.23, "total_tokens": 23169536} +{"current_steps": 34400, "total_steps": 204665, "loss": 0.1087, "lr": 1.9719015432717776e-06, "epoch": 0.8403977231084944, "percentage": 16.81, "elapsed_time": "0:44:08", "remaining_time": "3:38:29", "throughput": 8749.24, "total_tokens": 23172544} +{"current_steps": 34405, "total_steps": 204665, "loss": 0.0696, "lr": 1.9718814664360543e-06, "epoch": 0.8405198739403416, "percentage": 16.81, "elapsed_time": "0:44:08", "remaining_time": "3:38:28", "throughput": 8749.37, "total_tokens": 23175936} +{"current_steps": 34410, "total_steps": 204665, "loss": 0.1507, "lr": 1.9718613825325474e-06, "epoch": 0.8406420247721887, "percentage": 16.81, "elapsed_time": "0:44:09", "remaining_time": "3:38:27", "throughput": 8749.45, "total_tokens": 23179136} +{"current_steps": 34415, "total_steps": 204665, "loss": 0.1766, "lr": 1.971841291561403e-06, "epoch": 0.8407641756040358, "percentage": 16.82, "elapsed_time": "0:44:09", "remaining_time": "3:38:27", "throughput": 8749.56, "total_tokens": 23182464} +{"current_steps": 34420, "total_steps": 204665, "loss": 0.0337, "lr": 1.9718211935227676e-06, "epoch": 0.840886326435883, "percentage": 16.82, "elapsed_time": "0:44:09", "remaining_time": "3:38:26", "throughput": 8749.92, "total_tokens": 23186624} +{"current_steps": 34425, "total_steps": 204665, "loss": 0.0837, "lr": 1.971801088416787e-06, "epoch": 0.8410084772677302, "percentage": 16.82, "elapsed_time": "0:44:10", "remaining_time": "3:38:26", "throughput": 8750.04, "total_tokens": 23190016} +{"current_steps": 34430, "total_steps": 204665, "loss": 0.065, "lr": 1.9717809762436075e-06, "epoch": 0.8411306280995774, "percentage": 16.82, "elapsed_time": "0:44:10", "remaining_time": "3:38:25", "throughput": 8750.16, "total_tokens": 23193344} +{"current_steps": 34435, "total_steps": 204665, "loss": 0.0964, "lr": 1.9717608570033755e-06, "epoch": 0.8412527789314245, "percentage": 16.83, "elapsed_time": "0:44:10", "remaining_time": "3:38:25", "throughput": 8750.35, "total_tokens": 23196928} +{"current_steps": 34440, "total_steps": 204665, "loss": 0.0474, "lr": 1.971740730696237e-06, "epoch": 0.8413749297632717, "percentage": 16.83, "elapsed_time": "0:44:11", "remaining_time": "3:38:24", "throughput": 8750.44, "total_tokens": 23200192} +{"current_steps": 34445, "total_steps": 204665, "loss": 0.1256, "lr": 1.9717205973223386e-06, "epoch": 0.8414970805951189, "percentage": 16.83, "elapsed_time": "0:44:11", "remaining_time": "3:38:23", "throughput": 8750.45, "total_tokens": 23203200} +{"current_steps": 34450, "total_steps": 204665, "loss": 0.0673, "lr": 1.9717004568818266e-06, "epoch": 0.841619231426966, "percentage": 16.83, "elapsed_time": "0:44:12", "remaining_time": "3:38:23", "throughput": 8750.54, "total_tokens": 23206464} +{"current_steps": 34455, "total_steps": 204665, "loss": 0.074, "lr": 1.9716803093748474e-06, "epoch": 0.8417413822588132, "percentage": 16.83, "elapsed_time": "0:44:12", "remaining_time": "3:38:22", "throughput": 8750.59, "total_tokens": 23209600} +{"current_steps": 34460, "total_steps": 204665, "loss": 0.1398, "lr": 1.971660154801548e-06, "epoch": 0.8418635330906603, "percentage": 16.84, "elapsed_time": "0:44:12", "remaining_time": "3:38:22", "throughput": 8750.66, "total_tokens": 23212800} +{"current_steps": 34465, "total_steps": 204665, "loss": 0.0769, "lr": 1.9716399931620743e-06, "epoch": 0.8419856839225075, "percentage": 16.84, "elapsed_time": "0:44:13", "remaining_time": "3:38:21", "throughput": 8750.68, "total_tokens": 23215872} +{"current_steps": 34470, "total_steps": 204665, "loss": 0.1463, "lr": 1.9716198244565734e-06, "epoch": 0.8421078347543547, "percentage": 16.84, "elapsed_time": "0:44:13", "remaining_time": "3:38:21", "throughput": 8750.86, "total_tokens": 23219456} +{"current_steps": 34475, "total_steps": 204665, "loss": 0.0977, "lr": 1.9715996486851915e-06, "epoch": 0.8422299855862019, "percentage": 16.84, "elapsed_time": "0:44:13", "remaining_time": "3:38:20", "throughput": 8751.01, "total_tokens": 23222912} +{"current_steps": 34480, "total_steps": 204665, "loss": 0.0233, "lr": 1.971579465848076e-06, "epoch": 0.842352136418049, "percentage": 16.85, "elapsed_time": "0:44:14", "remaining_time": "3:38:19", "throughput": 8751.01, "total_tokens": 23225920} +{"current_steps": 34485, "total_steps": 204665, "loss": 0.0681, "lr": 1.971559275945373e-06, "epoch": 0.8424742872498961, "percentage": 16.85, "elapsed_time": "0:44:14", "remaining_time": "3:38:19", "throughput": 8751.04, "total_tokens": 23228992} +{"current_steps": 34490, "total_steps": 204665, "loss": 0.2153, "lr": 1.9715390789772297e-06, "epoch": 0.8425964380817433, "percentage": 16.85, "elapsed_time": "0:44:14", "remaining_time": "3:38:18", "throughput": 8751.08, "total_tokens": 23232064} +{"current_steps": 34495, "total_steps": 204665, "loss": 0.1866, "lr": 1.971518874943793e-06, "epoch": 0.8427185889135905, "percentage": 16.85, "elapsed_time": "0:44:15", "remaining_time": "3:38:18", "throughput": 8751.18, "total_tokens": 23235392} +{"current_steps": 34500, "total_steps": 204665, "loss": 0.0846, "lr": 1.97149866384521e-06, "epoch": 0.8428407397454377, "percentage": 16.86, "elapsed_time": "0:44:15", "remaining_time": "3:38:17", "throughput": 8751.38, "total_tokens": 23239040} +{"current_steps": 34505, "total_steps": 204665, "loss": 0.1035, "lr": 1.971478445681627e-06, "epoch": 0.8429628905772848, "percentage": 16.86, "elapsed_time": "0:44:15", "remaining_time": "3:38:17", "throughput": 8751.53, "total_tokens": 23242496} +{"current_steps": 34510, "total_steps": 204665, "loss": 0.0301, "lr": 1.9714582204531916e-06, "epoch": 0.843085041409132, "percentage": 16.86, "elapsed_time": "0:44:16", "remaining_time": "3:38:16", "throughput": 8751.6, "total_tokens": 23245696} +{"current_steps": 34515, "total_steps": 204665, "loss": 0.1464, "lr": 1.9714379881600507e-06, "epoch": 0.8432071922409792, "percentage": 16.86, "elapsed_time": "0:44:16", "remaining_time": "3:38:15", "throughput": 8751.74, "total_tokens": 23249152} +{"current_steps": 34520, "total_steps": 204665, "loss": 0.3484, "lr": 1.9714177488023514e-06, "epoch": 0.8433293430728264, "percentage": 16.87, "elapsed_time": "0:44:16", "remaining_time": "3:38:15", "throughput": 8751.69, "total_tokens": 23251968} +{"current_steps": 34525, "total_steps": 204665, "loss": 0.0453, "lr": 1.971397502380241e-06, "epoch": 0.8434514939046734, "percentage": 16.87, "elapsed_time": "0:44:17", "remaining_time": "3:38:14", "throughput": 8751.7, "total_tokens": 23254976} +{"current_steps": 34530, "total_steps": 204665, "loss": 0.0724, "lr": 1.971377248893867e-06, "epoch": 0.8435736447365206, "percentage": 16.87, "elapsed_time": "0:44:17", "remaining_time": "3:38:14", "throughput": 8751.74, "total_tokens": 23258048} +{"current_steps": 34535, "total_steps": 204665, "loss": 0.1655, "lr": 1.971356988343376e-06, "epoch": 0.8436957955683678, "percentage": 16.87, "elapsed_time": "0:44:17", "remaining_time": "3:38:13", "throughput": 8752.08, "total_tokens": 23262144} +{"current_steps": 34540, "total_steps": 204665, "loss": 0.0349, "lr": 1.971336720728916e-06, "epoch": 0.843817946400215, "percentage": 16.88, "elapsed_time": "0:44:18", "remaining_time": "3:38:13", "throughput": 8752.4, "total_tokens": 23266176} +{"current_steps": 34545, "total_steps": 204665, "loss": 0.122, "lr": 1.9713164460506337e-06, "epoch": 0.8439400972320622, "percentage": 16.88, "elapsed_time": "0:44:18", "remaining_time": "3:38:12", "throughput": 8752.48, "total_tokens": 23269440} +{"current_steps": 34550, "total_steps": 204665, "loss": 0.1521, "lr": 1.971296164308677e-06, "epoch": 0.8440622480639093, "percentage": 16.88, "elapsed_time": "0:44:18", "remaining_time": "3:38:11", "throughput": 8752.59, "total_tokens": 23272768} +{"current_steps": 34555, "total_steps": 204665, "loss": 0.1201, "lr": 1.971275875503194e-06, "epoch": 0.8441843988957565, "percentage": 16.88, "elapsed_time": "0:44:19", "remaining_time": "3:38:11", "throughput": 8752.77, "total_tokens": 23276288} +{"current_steps": 34560, "total_steps": 204665, "loss": 0.0251, "lr": 1.9712555796343307e-06, "epoch": 0.8443065497276037, "percentage": 16.89, "elapsed_time": "0:44:19", "remaining_time": "3:38:10", "throughput": 8752.82, "total_tokens": 23279424} +{"current_steps": 34565, "total_steps": 204665, "loss": 0.0856, "lr": 1.9712352767022364e-06, "epoch": 0.8444287005594509, "percentage": 16.89, "elapsed_time": "0:44:19", "remaining_time": "3:38:10", "throughput": 8752.94, "total_tokens": 23282816} +{"current_steps": 34570, "total_steps": 204665, "loss": 0.0897, "lr": 1.971214966707057e-06, "epoch": 0.8445508513912979, "percentage": 16.89, "elapsed_time": "0:44:20", "remaining_time": "3:38:09", "throughput": 8753.06, "total_tokens": 23286208} +{"current_steps": 34575, "total_steps": 204665, "loss": 0.0634, "lr": 1.971194649648942e-06, "epoch": 0.8446730022231451, "percentage": 16.89, "elapsed_time": "0:44:20", "remaining_time": "3:38:09", "throughput": 8753.17, "total_tokens": 23289536} +{"current_steps": 34580, "total_steps": 204665, "loss": 0.0682, "lr": 1.971174325528038e-06, "epoch": 0.8447951530549923, "percentage": 16.9, "elapsed_time": "0:44:21", "remaining_time": "3:38:08", "throughput": 8753.39, "total_tokens": 23293248} +{"current_steps": 34585, "total_steps": 204665, "loss": 0.1036, "lr": 1.971153994344493e-06, "epoch": 0.8449173038868395, "percentage": 16.9, "elapsed_time": "0:44:21", "remaining_time": "3:38:08", "throughput": 8753.36, "total_tokens": 23296128} +{"current_steps": 34590, "total_steps": 204665, "loss": 0.1706, "lr": 1.971133656098455e-06, "epoch": 0.8450394547186867, "percentage": 16.9, "elapsed_time": "0:44:21", "remaining_time": "3:38:07", "throughput": 8753.37, "total_tokens": 23299136} +{"current_steps": 34595, "total_steps": 204665, "loss": 0.2031, "lr": 1.9711133107900715e-06, "epoch": 0.8451616055505338, "percentage": 16.9, "elapsed_time": "0:44:22", "remaining_time": "3:38:06", "throughput": 8753.4, "total_tokens": 23302208} +{"current_steps": 34600, "total_steps": 204665, "loss": 0.0252, "lr": 1.971092958419491e-06, "epoch": 0.845283756382381, "percentage": 16.91, "elapsed_time": "0:44:22", "remaining_time": "3:38:06", "throughput": 8753.46, "total_tokens": 23305344} +{"current_steps": 34605, "total_steps": 204665, "loss": 0.151, "lr": 1.971072598986862e-06, "epoch": 0.8454059072142281, "percentage": 16.91, "elapsed_time": "0:44:22", "remaining_time": "3:38:05", "throughput": 8753.51, "total_tokens": 23308480} +{"current_steps": 34610, "total_steps": 204665, "loss": 0.1835, "lr": 1.971052232492331e-06, "epoch": 0.8455280580460753, "percentage": 16.91, "elapsed_time": "0:44:23", "remaining_time": "3:38:05", "throughput": 8753.6, "total_tokens": 23311744} +{"current_steps": 34615, "total_steps": 204665, "loss": 0.0506, "lr": 1.9710318589360476e-06, "epoch": 0.8456502088779224, "percentage": 16.91, "elapsed_time": "0:44:23", "remaining_time": "3:38:04", "throughput": 8753.65, "total_tokens": 23314880} +{"current_steps": 34620, "total_steps": 204665, "loss": 0.1065, "lr": 1.971011478318159e-06, "epoch": 0.8457723597097696, "percentage": 16.92, "elapsed_time": "0:44:23", "remaining_time": "3:38:03", "throughput": 8753.76, "total_tokens": 23318208} +{"current_steps": 34625, "total_steps": 204665, "loss": 0.1044, "lr": 1.970991090638814e-06, "epoch": 0.8458945105416168, "percentage": 16.92, "elapsed_time": "0:44:24", "remaining_time": "3:38:03", "throughput": 8753.9, "total_tokens": 23321600} +{"current_steps": 34630, "total_steps": 204665, "loss": 0.1925, "lr": 1.9709706958981602e-06, "epoch": 0.846016661373464, "percentage": 16.92, "elapsed_time": "0:44:24", "remaining_time": "3:38:02", "throughput": 8754.12, "total_tokens": 23325312} +{"current_steps": 34635, "total_steps": 204665, "loss": 0.1667, "lr": 1.9709502940963468e-06, "epoch": 0.8461388122053111, "percentage": 16.92, "elapsed_time": "0:44:24", "remaining_time": "3:38:02", "throughput": 8754.41, "total_tokens": 23329216} +{"current_steps": 34640, "total_steps": 204665, "loss": 0.0684, "lr": 1.9709298852335214e-06, "epoch": 0.8462609630371583, "percentage": 16.93, "elapsed_time": "0:44:25", "remaining_time": "3:38:01", "throughput": 8754.7, "total_tokens": 23333120} +{"current_steps": 34645, "total_steps": 204665, "loss": 0.1576, "lr": 1.9709094693098328e-06, "epoch": 0.8463831138690054, "percentage": 16.93, "elapsed_time": "0:44:25", "remaining_time": "3:38:01", "throughput": 8754.9, "total_tokens": 23336768} +{"current_steps": 34650, "total_steps": 204665, "loss": 0.1096, "lr": 1.970889046325429e-06, "epoch": 0.8465052647008526, "percentage": 16.93, "elapsed_time": "0:44:25", "remaining_time": "3:38:00", "throughput": 8755.17, "total_tokens": 23340608} +{"current_steps": 34655, "total_steps": 204665, "loss": 0.1202, "lr": 1.97086861628046e-06, "epoch": 0.8466274155326998, "percentage": 16.93, "elapsed_time": "0:44:26", "remaining_time": "3:38:00", "throughput": 8755.24, "total_tokens": 23343808} +{"current_steps": 34660, "total_steps": 204665, "loss": 0.0725, "lr": 1.9708481791750726e-06, "epoch": 0.8467495663645469, "percentage": 16.93, "elapsed_time": "0:44:26", "remaining_time": "3:37:59", "throughput": 8755.42, "total_tokens": 23347328} +{"current_steps": 34665, "total_steps": 204665, "loss": 0.2058, "lr": 1.970827735009416e-06, "epoch": 0.8468717171963941, "percentage": 16.94, "elapsed_time": "0:44:26", "remaining_time": "3:37:59", "throughput": 8755.7, "total_tokens": 23351232} +{"current_steps": 34670, "total_steps": 204665, "loss": 0.1002, "lr": 1.970807283783639e-06, "epoch": 0.8469938680282413, "percentage": 16.94, "elapsed_time": "0:44:27", "remaining_time": "3:37:58", "throughput": 8755.84, "total_tokens": 23354624} +{"current_steps": 34675, "total_steps": 204665, "loss": 0.1547, "lr": 1.9707868254978904e-06, "epoch": 0.8471160188600885, "percentage": 16.94, "elapsed_time": "0:44:27", "remaining_time": "3:37:57", "throughput": 8755.96, "total_tokens": 23358016} +{"current_steps": 34680, "total_steps": 204665, "loss": 0.0944, "lr": 1.970766360152319e-06, "epoch": 0.8472381696919355, "percentage": 16.94, "elapsed_time": "0:44:28", "remaining_time": "3:37:57", "throughput": 8756.15, "total_tokens": 23361600} +{"current_steps": 34685, "total_steps": 204665, "loss": 0.1293, "lr": 1.9707458877470735e-06, "epoch": 0.8473603205237827, "percentage": 16.95, "elapsed_time": "0:44:28", "remaining_time": "3:37:56", "throughput": 8756.27, "total_tokens": 23364928} +{"current_steps": 34690, "total_steps": 204665, "loss": 0.0862, "lr": 1.970725408282303e-06, "epoch": 0.8474824713556299, "percentage": 16.95, "elapsed_time": "0:44:28", "remaining_time": "3:37:56", "throughput": 8756.38, "total_tokens": 23368256} +{"current_steps": 34695, "total_steps": 204665, "loss": 0.1469, "lr": 1.970704921758156e-06, "epoch": 0.8476046221874771, "percentage": 16.95, "elapsed_time": "0:44:29", "remaining_time": "3:37:55", "throughput": 8756.67, "total_tokens": 23372160} +{"current_steps": 34700, "total_steps": 204665, "loss": 0.1028, "lr": 1.9706844281747817e-06, "epoch": 0.8477267730193243, "percentage": 16.95, "elapsed_time": "0:44:29", "remaining_time": "3:37:55", "throughput": 8756.76, "total_tokens": 23375424} +{"current_steps": 34705, "total_steps": 204665, "loss": 0.0444, "lr": 1.970663927532329e-06, "epoch": 0.8478489238511714, "percentage": 16.96, "elapsed_time": "0:44:29", "remaining_time": "3:37:54", "throughput": 8756.95, "total_tokens": 23379072} +{"current_steps": 34710, "total_steps": 204665, "loss": 0.0764, "lr": 1.9706434198309472e-06, "epoch": 0.8479710746830186, "percentage": 16.96, "elapsed_time": "0:44:30", "remaining_time": "3:37:54", "throughput": 8757.22, "total_tokens": 23382912} +{"current_steps": 34715, "total_steps": 204665, "loss": 0.1532, "lr": 1.9706229050707855e-06, "epoch": 0.8480932255148658, "percentage": 16.96, "elapsed_time": "0:44:30", "remaining_time": "3:37:53", "throughput": 8757.23, "total_tokens": 23385920} +{"current_steps": 34720, "total_steps": 204665, "loss": 0.0107, "lr": 1.9706023832519932e-06, "epoch": 0.848215376346713, "percentage": 16.96, "elapsed_time": "0:44:30", "remaining_time": "3:37:52", "throughput": 8757.34, "total_tokens": 23389248} +{"current_steps": 34725, "total_steps": 204665, "loss": 0.2432, "lr": 1.970581854374719e-06, "epoch": 0.84833752717856, "percentage": 16.97, "elapsed_time": "0:44:31", "remaining_time": "3:37:52", "throughput": 8757.37, "total_tokens": 23392320} +{"current_steps": 34730, "total_steps": 204665, "loss": 0.0913, "lr": 1.9705613184391124e-06, "epoch": 0.8484596780104072, "percentage": 16.97, "elapsed_time": "0:44:31", "remaining_time": "3:37:51", "throughput": 8757.4, "total_tokens": 23395392} +{"current_steps": 34735, "total_steps": 204665, "loss": 0.0944, "lr": 1.970540775445323e-06, "epoch": 0.8485818288422544, "percentage": 16.97, "elapsed_time": "0:44:31", "remaining_time": "3:37:51", "throughput": 8757.47, "total_tokens": 23398592} +{"current_steps": 34740, "total_steps": 204665, "loss": 0.1853, "lr": 1.9705202253935e-06, "epoch": 0.8487039796741016, "percentage": 16.97, "elapsed_time": "0:44:32", "remaining_time": "3:37:50", "throughput": 8757.65, "total_tokens": 23402112} +{"current_steps": 34745, "total_steps": 204665, "loss": 0.1728, "lr": 1.970499668283793e-06, "epoch": 0.8488261305059488, "percentage": 16.98, "elapsed_time": "0:44:32", "remaining_time": "3:37:50", "throughput": 8757.73, "total_tokens": 23405376} +{"current_steps": 34750, "total_steps": 204665, "loss": 0.0532, "lr": 1.9704791041163514e-06, "epoch": 0.8489482813377959, "percentage": 16.98, "elapsed_time": "0:44:32", "remaining_time": "3:37:49", "throughput": 8757.94, "total_tokens": 23409024} +{"current_steps": 34755, "total_steps": 204665, "loss": 0.1421, "lr": 1.9704585328913247e-06, "epoch": 0.8490704321696431, "percentage": 16.98, "elapsed_time": "0:44:33", "remaining_time": "3:37:48", "throughput": 8758.01, "total_tokens": 23412224} +{"current_steps": 34760, "total_steps": 204665, "loss": 0.1233, "lr": 1.9704379546088626e-06, "epoch": 0.8491925830014903, "percentage": 16.98, "elapsed_time": "0:44:33", "remaining_time": "3:37:48", "throughput": 8758.11, "total_tokens": 23415552} +{"current_steps": 34765, "total_steps": 204665, "loss": 0.077, "lr": 1.9704173692691142e-06, "epoch": 0.8493147338333374, "percentage": 16.99, "elapsed_time": "0:44:33", "remaining_time": "3:37:47", "throughput": 8758.36, "total_tokens": 23419328} +{"current_steps": 34770, "total_steps": 204665, "loss": 0.1388, "lr": 1.9703967768722305e-06, "epoch": 0.8494368846651845, "percentage": 16.99, "elapsed_time": "0:44:34", "remaining_time": "3:37:47", "throughput": 8758.46, "total_tokens": 23422656} +{"current_steps": 34775, "total_steps": 204665, "loss": 0.073, "lr": 1.97037617741836e-06, "epoch": 0.8495590354970317, "percentage": 16.99, "elapsed_time": "0:44:34", "remaining_time": "3:37:46", "throughput": 8758.56, "total_tokens": 23425920} +{"current_steps": 34780, "total_steps": 204665, "loss": 0.1236, "lr": 1.9703555709076528e-06, "epoch": 0.8496811863288789, "percentage": 16.99, "elapsed_time": "0:44:34", "remaining_time": "3:37:46", "throughput": 8758.81, "total_tokens": 23429696} +{"current_steps": 34785, "total_steps": 204665, "loss": 0.045, "lr": 1.9703349573402587e-06, "epoch": 0.8498033371607261, "percentage": 17.0, "elapsed_time": "0:44:35", "remaining_time": "3:37:45", "throughput": 8759.08, "total_tokens": 23433600} +{"current_steps": 34790, "total_steps": 204665, "loss": 0.0993, "lr": 1.970314336716328e-06, "epoch": 0.8499254879925733, "percentage": 17.0, "elapsed_time": "0:44:35", "remaining_time": "3:37:45", "throughput": 8759.26, "total_tokens": 23437184} +{"current_steps": 34795, "total_steps": 204665, "loss": 0.0301, "lr": 1.9702937090360107e-06, "epoch": 0.8500476388244204, "percentage": 17.0, "elapsed_time": "0:44:36", "remaining_time": "3:37:44", "throughput": 8759.44, "total_tokens": 23440768} +{"current_steps": 34800, "total_steps": 204665, "loss": 0.113, "lr": 1.9702730742994566e-06, "epoch": 0.8501697896562675, "percentage": 17.0, "elapsed_time": "0:44:36", "remaining_time": "3:37:43", "throughput": 8759.46, "total_tokens": 23443776} +{"current_steps": 34805, "total_steps": 204665, "loss": 0.1212, "lr": 1.9702524325068156e-06, "epoch": 0.8502919404881147, "percentage": 17.01, "elapsed_time": "0:44:36", "remaining_time": "3:37:43", "throughput": 8759.65, "total_tokens": 23447360} +{"current_steps": 34810, "total_steps": 204665, "loss": 0.1604, "lr": 1.9702317836582378e-06, "epoch": 0.8504140913199619, "percentage": 17.01, "elapsed_time": "0:44:37", "remaining_time": "3:37:42", "throughput": 8759.81, "total_tokens": 23450880} +{"current_steps": 34815, "total_steps": 204665, "loss": 0.113, "lr": 1.9702111277538737e-06, "epoch": 0.850536242151809, "percentage": 17.01, "elapsed_time": "0:44:37", "remaining_time": "3:37:42", "throughput": 8760.08, "total_tokens": 23454720} +{"current_steps": 34820, "total_steps": 204665, "loss": 0.1991, "lr": 1.970190464793873e-06, "epoch": 0.8506583929836562, "percentage": 17.01, "elapsed_time": "0:44:37", "remaining_time": "3:37:41", "throughput": 8760.15, "total_tokens": 23457920} +{"current_steps": 34825, "total_steps": 204665, "loss": 0.1783, "lr": 1.9701697947783866e-06, "epoch": 0.8507805438155034, "percentage": 17.02, "elapsed_time": "0:44:38", "remaining_time": "3:37:41", "throughput": 8760.33, "total_tokens": 23461504} +{"current_steps": 34830, "total_steps": 204665, "loss": 0.1332, "lr": 1.9701491177075645e-06, "epoch": 0.8509026946473506, "percentage": 17.02, "elapsed_time": "0:44:38", "remaining_time": "3:37:40", "throughput": 8760.48, "total_tokens": 23464960} +{"current_steps": 34835, "total_steps": 204665, "loss": 0.0566, "lr": 1.9701284335815573e-06, "epoch": 0.8510248454791978, "percentage": 17.02, "elapsed_time": "0:44:38", "remaining_time": "3:37:40", "throughput": 8760.64, "total_tokens": 23468480} +{"current_steps": 34840, "total_steps": 204665, "loss": 0.0771, "lr": 1.9701077424005148e-06, "epoch": 0.8511469963110448, "percentage": 17.02, "elapsed_time": "0:44:39", "remaining_time": "3:37:39", "throughput": 8760.86, "total_tokens": 23472192} +{"current_steps": 34845, "total_steps": 204665, "loss": 0.0344, "lr": 1.970087044164588e-06, "epoch": 0.851269147142892, "percentage": 17.03, "elapsed_time": "0:44:39", "remaining_time": "3:37:39", "throughput": 8760.96, "total_tokens": 23475520} +{"current_steps": 34850, "total_steps": 204665, "loss": 0.1812, "lr": 1.970066338873927e-06, "epoch": 0.8513912979747392, "percentage": 17.03, "elapsed_time": "0:44:39", "remaining_time": "3:37:38", "throughput": 8760.98, "total_tokens": 23478528} +{"current_steps": 34855, "total_steps": 204665, "loss": 0.0944, "lr": 1.9700456265286827e-06, "epoch": 0.8515134488065864, "percentage": 17.03, "elapsed_time": "0:44:40", "remaining_time": "3:37:37", "throughput": 8761.2, "total_tokens": 23482240} +{"current_steps": 34860, "total_steps": 204665, "loss": 0.0797, "lr": 1.970024907129006e-06, "epoch": 0.8516355996384335, "percentage": 17.03, "elapsed_time": "0:44:40", "remaining_time": "3:37:37", "throughput": 8761.32, "total_tokens": 23485568} +{"current_steps": 34865, "total_steps": 204665, "loss": 0.0867, "lr": 1.9700041806750477e-06, "epoch": 0.8517577504702807, "percentage": 17.04, "elapsed_time": "0:44:40", "remaining_time": "3:37:36", "throughput": 8761.41, "total_tokens": 23488832} +{"current_steps": 34870, "total_steps": 204665, "loss": 0.1091, "lr": 1.9699834471669574e-06, "epoch": 0.8518799013021279, "percentage": 17.04, "elapsed_time": "0:44:41", "remaining_time": "3:37:36", "throughput": 8761.67, "total_tokens": 23492672} +{"current_steps": 34875, "total_steps": 204665, "loss": 0.0721, "lr": 1.9699627066048868e-06, "epoch": 0.8520020521339751, "percentage": 17.04, "elapsed_time": "0:44:41", "remaining_time": "3:37:35", "throughput": 8761.77, "total_tokens": 23496000} +{"current_steps": 34880, "total_steps": 204665, "loss": 0.0762, "lr": 1.9699419589889863e-06, "epoch": 0.8521242029658221, "percentage": 17.04, "elapsed_time": "0:44:42", "remaining_time": "3:37:35", "throughput": 8762.06, "total_tokens": 23499968} +{"current_steps": 34885, "total_steps": 204665, "loss": 0.1507, "lr": 1.9699212043194075e-06, "epoch": 0.8522463537976693, "percentage": 17.04, "elapsed_time": "0:44:42", "remaining_time": "3:37:34", "throughput": 8762.12, "total_tokens": 23503168} +{"current_steps": 34890, "total_steps": 204665, "loss": 0.1667, "lr": 1.9699004425963003e-06, "epoch": 0.8523685046295165, "percentage": 17.05, "elapsed_time": "0:44:42", "remaining_time": "3:37:34", "throughput": 8762.2, "total_tokens": 23506368} +{"current_steps": 34895, "total_steps": 204665, "loss": 0.1714, "lr": 1.9698796738198163e-06, "epoch": 0.8524906554613637, "percentage": 17.05, "elapsed_time": "0:44:43", "remaining_time": "3:37:33", "throughput": 8762.25, "total_tokens": 23509440} +{"current_steps": 34900, "total_steps": 204665, "loss": 0.0708, "lr": 1.9698588979901064e-06, "epoch": 0.8526128062932109, "percentage": 17.05, "elapsed_time": "0:44:43", "remaining_time": "3:37:32", "throughput": 8762.32, "total_tokens": 23512640} +{"current_steps": 34905, "total_steps": 204665, "loss": 0.0841, "lr": 1.969838115107322e-06, "epoch": 0.852734957125058, "percentage": 17.05, "elapsed_time": "0:44:43", "remaining_time": "3:37:32", "throughput": 8762.42, "total_tokens": 23515968} +{"current_steps": 34910, "total_steps": 204665, "loss": 0.0816, "lr": 1.9698173251716138e-06, "epoch": 0.8528571079569052, "percentage": 17.06, "elapsed_time": "0:44:44", "remaining_time": "3:37:31", "throughput": 8762.55, "total_tokens": 23519360} +{"current_steps": 34915, "total_steps": 204665, "loss": 0.1015, "lr": 1.9697965281831335e-06, "epoch": 0.8529792587887524, "percentage": 17.06, "elapsed_time": "0:44:44", "remaining_time": "3:37:31", "throughput": 8762.59, "total_tokens": 23522496} +{"current_steps": 34920, "total_steps": 204665, "loss": 0.0559, "lr": 1.9697757241420315e-06, "epoch": 0.8531014096205995, "percentage": 17.06, "elapsed_time": "0:44:44", "remaining_time": "3:37:30", "throughput": 8762.84, "total_tokens": 23526272} +{"current_steps": 34925, "total_steps": 204665, "loss": 0.0682, "lr": 1.96975491304846e-06, "epoch": 0.8532235604524466, "percentage": 17.06, "elapsed_time": "0:44:45", "remaining_time": "3:37:30", "throughput": 8762.93, "total_tokens": 23529536} +{"current_steps": 34930, "total_steps": 204665, "loss": 0.0585, "lr": 1.9697340949025697e-06, "epoch": 0.8533457112842938, "percentage": 17.07, "elapsed_time": "0:44:45", "remaining_time": "3:37:29", "throughput": 8762.99, "total_tokens": 23532672} +{"current_steps": 34935, "total_steps": 204665, "loss": 0.0794, "lr": 1.9697132697045126e-06, "epoch": 0.853467862116141, "percentage": 17.07, "elapsed_time": "0:44:45", "remaining_time": "3:37:28", "throughput": 8763.21, "total_tokens": 23536384} +{"current_steps": 34940, "total_steps": 204665, "loss": 0.1116, "lr": 1.969692437454439e-06, "epoch": 0.8535900129479882, "percentage": 17.07, "elapsed_time": "0:44:46", "remaining_time": "3:37:28", "throughput": 8763.43, "total_tokens": 23540096} +{"current_steps": 34945, "total_steps": 204665, "loss": 0.1717, "lr": 1.969671598152502e-06, "epoch": 0.8537121637798354, "percentage": 17.07, "elapsed_time": "0:44:46", "remaining_time": "3:37:27", "throughput": 8763.77, "total_tokens": 23544256} +{"current_steps": 34950, "total_steps": 204665, "loss": 0.1252, "lr": 1.9696507517988517e-06, "epoch": 0.8538343146116825, "percentage": 17.08, "elapsed_time": "0:44:46", "remaining_time": "3:37:27", "throughput": 8763.83, "total_tokens": 23547392} +{"current_steps": 34955, "total_steps": 204665, "loss": 0.1007, "lr": 1.9696298983936408e-06, "epoch": 0.8539564654435297, "percentage": 17.08, "elapsed_time": "0:44:47", "remaining_time": "3:37:26", "throughput": 8763.89, "total_tokens": 23550592} +{"current_steps": 34960, "total_steps": 204665, "loss": 0.0744, "lr": 1.9696090379370203e-06, "epoch": 0.8540786162753768, "percentage": 17.08, "elapsed_time": "0:44:47", "remaining_time": "3:37:26", "throughput": 8764.23, "total_tokens": 23554688} +{"current_steps": 34965, "total_steps": 204665, "loss": 0.145, "lr": 1.9695881704291423e-06, "epoch": 0.854200767107224, "percentage": 17.08, "elapsed_time": "0:44:47", "remaining_time": "3:37:25", "throughput": 8764.48, "total_tokens": 23558464} +{"current_steps": 34970, "total_steps": 204665, "loss": 0.0465, "lr": 1.969567295870158e-06, "epoch": 0.8543229179390711, "percentage": 17.09, "elapsed_time": "0:44:48", "remaining_time": "3:37:25", "throughput": 8764.43, "total_tokens": 23561216} +{"current_steps": 34975, "total_steps": 204665, "loss": 0.0931, "lr": 1.9695464142602195e-06, "epoch": 0.8544450687709183, "percentage": 17.09, "elapsed_time": "0:44:48", "remaining_time": "3:37:24", "throughput": 8764.69, "total_tokens": 23565056} +{"current_steps": 34980, "total_steps": 204665, "loss": 0.0676, "lr": 1.9695255255994788e-06, "epoch": 0.8545672196027655, "percentage": 17.09, "elapsed_time": "0:44:48", "remaining_time": "3:37:24", "throughput": 8764.78, "total_tokens": 23568320} +{"current_steps": 34985, "total_steps": 204665, "loss": 0.0429, "lr": 1.969504629888088e-06, "epoch": 0.8546893704346127, "percentage": 17.09, "elapsed_time": "0:44:49", "remaining_time": "3:37:23", "throughput": 8764.93, "total_tokens": 23571840} +{"current_steps": 34990, "total_steps": 204665, "loss": 0.1533, "lr": 1.9694837271261985e-06, "epoch": 0.8548115212664599, "percentage": 17.1, "elapsed_time": "0:44:49", "remaining_time": "3:37:22", "throughput": 8765.06, "total_tokens": 23575232} +{"current_steps": 34995, "total_steps": 204665, "loss": 0.1684, "lr": 1.9694628173139626e-06, "epoch": 0.854933672098307, "percentage": 17.1, "elapsed_time": "0:44:50", "remaining_time": "3:37:22", "throughput": 8765.32, "total_tokens": 23579072} +{"current_steps": 35000, "total_steps": 204665, "loss": 0.1663, "lr": 1.969441900451532e-06, "epoch": 0.8550558229301541, "percentage": 17.1, "elapsed_time": "0:44:50", "remaining_time": "3:37:21", "throughput": 8765.44, "total_tokens": 23582464} +{"current_steps": 35005, "total_steps": 204665, "loss": 0.2365, "lr": 1.96942097653906e-06, "epoch": 0.8551779737620013, "percentage": 17.1, "elapsed_time": "0:44:50", "remaining_time": "3:37:21", "throughput": 8765.5, "total_tokens": 23585600} +{"current_steps": 35010, "total_steps": 204665, "loss": 0.1384, "lr": 1.969400045576697e-06, "epoch": 0.8553001245938485, "percentage": 17.11, "elapsed_time": "0:44:51", "remaining_time": "3:37:20", "throughput": 8765.75, "total_tokens": 23589376} +{"current_steps": 35015, "total_steps": 204665, "loss": 0.0798, "lr": 1.969379107564597e-06, "epoch": 0.8554222754256956, "percentage": 17.11, "elapsed_time": "0:44:51", "remaining_time": "3:37:20", "throughput": 8765.76, "total_tokens": 23592384} +{"current_steps": 35020, "total_steps": 204665, "loss": 0.0671, "lr": 1.9693581625029108e-06, "epoch": 0.8555444262575428, "percentage": 17.11, "elapsed_time": "0:44:51", "remaining_time": "3:37:19", "throughput": 8765.92, "total_tokens": 23595904} +{"current_steps": 35025, "total_steps": 204665, "loss": 0.0827, "lr": 1.9693372103917913e-06, "epoch": 0.85566657708939, "percentage": 17.11, "elapsed_time": "0:44:52", "remaining_time": "3:37:18", "throughput": 8765.95, "total_tokens": 23598976} +{"current_steps": 35030, "total_steps": 204665, "loss": 0.1113, "lr": 1.969316251231391e-06, "epoch": 0.8557887279212372, "percentage": 17.12, "elapsed_time": "0:44:52", "remaining_time": "3:37:18", "throughput": 8766.09, "total_tokens": 23602368} +{"current_steps": 35035, "total_steps": 204665, "loss": 0.1937, "lr": 1.9692952850218624e-06, "epoch": 0.8559108787530844, "percentage": 17.12, "elapsed_time": "0:44:52", "remaining_time": "3:37:17", "throughput": 8766.37, "total_tokens": 23606272} +{"current_steps": 35040, "total_steps": 204665, "loss": 0.1756, "lr": 1.9692743117633576e-06, "epoch": 0.8560330295849314, "percentage": 17.12, "elapsed_time": "0:44:53", "remaining_time": "3:37:17", "throughput": 8766.55, "total_tokens": 23609856} +{"current_steps": 35045, "total_steps": 204665, "loss": 0.0429, "lr": 1.969253331456029e-06, "epoch": 0.8561551804167786, "percentage": 17.12, "elapsed_time": "0:44:53", "remaining_time": "3:37:16", "throughput": 8766.68, "total_tokens": 23613248} +{"current_steps": 35050, "total_steps": 204665, "loss": 0.0925, "lr": 1.96923234410003e-06, "epoch": 0.8562773312486258, "percentage": 17.13, "elapsed_time": "0:44:53", "remaining_time": "3:37:16", "throughput": 8766.69, "total_tokens": 23616256} +{"current_steps": 35055, "total_steps": 204665, "loss": 0.2194, "lr": 1.9692113496955124e-06, "epoch": 0.856399482080473, "percentage": 17.13, "elapsed_time": "0:44:54", "remaining_time": "3:37:15", "throughput": 8766.74, "total_tokens": 23619392} +{"current_steps": 35060, "total_steps": 204665, "loss": 0.0587, "lr": 1.9691903482426295e-06, "epoch": 0.8565216329123201, "percentage": 17.13, "elapsed_time": "0:44:54", "remaining_time": "3:37:15", "throughput": 8767.09, "total_tokens": 23623552} +{"current_steps": 35065, "total_steps": 204665, "loss": 0.0937, "lr": 1.9691693397415333e-06, "epoch": 0.8566437837441673, "percentage": 17.13, "elapsed_time": "0:44:54", "remaining_time": "3:37:14", "throughput": 8767.29, "total_tokens": 23627200} +{"current_steps": 35070, "total_steps": 204665, "loss": 0.154, "lr": 1.9691483241923776e-06, "epoch": 0.8567659345760145, "percentage": 17.14, "elapsed_time": "0:44:55", "remaining_time": "3:37:14", "throughput": 8767.33, "total_tokens": 23630272} +{"current_steps": 35075, "total_steps": 204665, "loss": 0.0931, "lr": 1.969127301595314e-06, "epoch": 0.8568880854078617, "percentage": 17.14, "elapsed_time": "0:44:55", "remaining_time": "3:37:13", "throughput": 8767.38, "total_tokens": 23633408} +{"current_steps": 35080, "total_steps": 204665, "loss": 0.2184, "lr": 1.9691062719504962e-06, "epoch": 0.8570102362397088, "percentage": 17.14, "elapsed_time": "0:44:55", "remaining_time": "3:37:12", "throughput": 8767.54, "total_tokens": 23636928} +{"current_steps": 35085, "total_steps": 204665, "loss": 0.0106, "lr": 1.969085235258077e-06, "epoch": 0.8571323870715559, "percentage": 17.14, "elapsed_time": "0:44:56", "remaining_time": "3:37:12", "throughput": 8767.69, "total_tokens": 23640384} +{"current_steps": 35090, "total_steps": 204665, "loss": 0.0371, "lr": 1.969064191518209e-06, "epoch": 0.8572545379034031, "percentage": 17.15, "elapsed_time": "0:44:56", "remaining_time": "3:37:11", "throughput": 8768.01, "total_tokens": 23644416} +{"current_steps": 35095, "total_steps": 204665, "loss": 0.0719, "lr": 1.969043140731046e-06, "epoch": 0.8573766887352503, "percentage": 17.15, "elapsed_time": "0:44:57", "remaining_time": "3:37:11", "throughput": 8768.05, "total_tokens": 23647488} +{"current_steps": 35100, "total_steps": 204665, "loss": 0.105, "lr": 1.96902208289674e-06, "epoch": 0.8574988395670975, "percentage": 17.15, "elapsed_time": "0:44:57", "remaining_time": "3:37:10", "throughput": 8768.3, "total_tokens": 23651264} +{"current_steps": 35105, "total_steps": 204665, "loss": 0.0889, "lr": 1.9690010180154454e-06, "epoch": 0.8576209903989446, "percentage": 17.15, "elapsed_time": "0:44:57", "remaining_time": "3:37:10", "throughput": 8768.47, "total_tokens": 23654784} +{"current_steps": 35110, "total_steps": 204665, "loss": 0.16, "lr": 1.9689799460873147e-06, "epoch": 0.8577431412307918, "percentage": 17.15, "elapsed_time": "0:44:58", "remaining_time": "3:37:09", "throughput": 8768.65, "total_tokens": 23658368} +{"current_steps": 35115, "total_steps": 204665, "loss": 0.02, "lr": 1.968958867112501e-06, "epoch": 0.857865292062639, "percentage": 17.16, "elapsed_time": "0:44:58", "remaining_time": "3:37:09", "throughput": 8768.88, "total_tokens": 23662144} +{"current_steps": 35120, "total_steps": 204665, "loss": 0.1565, "lr": 1.9689377810911577e-06, "epoch": 0.8579874428944861, "percentage": 17.16, "elapsed_time": "0:44:58", "remaining_time": "3:37:08", "throughput": 8768.99, "total_tokens": 23665472} +{"current_steps": 35125, "total_steps": 204665, "loss": 0.046, "lr": 1.9689166880234385e-06, "epoch": 0.8581095937263333, "percentage": 17.16, "elapsed_time": "0:44:59", "remaining_time": "3:37:08", "throughput": 8769.11, "total_tokens": 23668864} +{"current_steps": 35130, "total_steps": 204665, "loss": 0.1591, "lr": 1.9688955879094966e-06, "epoch": 0.8582317445581804, "percentage": 17.16, "elapsed_time": "0:44:59", "remaining_time": "3:37:07", "throughput": 8769.2, "total_tokens": 23672128} +{"current_steps": 35135, "total_steps": 204665, "loss": 0.071, "lr": 1.9688744807494853e-06, "epoch": 0.8583538953900276, "percentage": 17.17, "elapsed_time": "0:44:59", "remaining_time": "3:37:06", "throughput": 8769.36, "total_tokens": 23675648} +{"current_steps": 35140, "total_steps": 204665, "loss": 0.0692, "lr": 1.968853366543558e-06, "epoch": 0.8584760462218748, "percentage": 17.17, "elapsed_time": "0:45:00", "remaining_time": "3:37:06", "throughput": 8769.54, "total_tokens": 23679232} +{"current_steps": 35145, "total_steps": 204665, "loss": 0.148, "lr": 1.9688322452918686e-06, "epoch": 0.858598197053722, "percentage": 17.17, "elapsed_time": "0:45:00", "remaining_time": "3:37:05", "throughput": 8769.53, "total_tokens": 23682176} +{"current_steps": 35150, "total_steps": 204665, "loss": 0.0971, "lr": 1.9688111169945706e-06, "epoch": 0.8587203478855691, "percentage": 17.17, "elapsed_time": "0:45:00", "remaining_time": "3:37:05", "throughput": 8769.63, "total_tokens": 23685440} +{"current_steps": 35155, "total_steps": 204665, "loss": 0.1121, "lr": 1.9687899816518173e-06, "epoch": 0.8588424987174162, "percentage": 17.18, "elapsed_time": "0:45:01", "remaining_time": "3:37:04", "throughput": 8769.66, "total_tokens": 23688512} +{"current_steps": 35160, "total_steps": 204665, "loss": 0.0545, "lr": 1.968768839263763e-06, "epoch": 0.8589646495492634, "percentage": 17.18, "elapsed_time": "0:45:01", "remaining_time": "3:37:03", "throughput": 8769.78, "total_tokens": 23691840} +{"current_steps": 35165, "total_steps": 204665, "loss": 0.182, "lr": 1.968747689830561e-06, "epoch": 0.8590868003811106, "percentage": 17.18, "elapsed_time": "0:45:01", "remaining_time": "3:37:03", "throughput": 8769.97, "total_tokens": 23695488} +{"current_steps": 35170, "total_steps": 204665, "loss": 0.1623, "lr": 1.968726533352365e-06, "epoch": 0.8592089512129577, "percentage": 17.18, "elapsed_time": "0:45:02", "remaining_time": "3:37:02", "throughput": 8770.17, "total_tokens": 23699136} +{"current_steps": 35175, "total_steps": 204665, "loss": 0.315, "lr": 1.9687053698293293e-06, "epoch": 0.8593311020448049, "percentage": 17.19, "elapsed_time": "0:45:02", "remaining_time": "3:37:02", "throughput": 8770.25, "total_tokens": 23702400} +{"current_steps": 35180, "total_steps": 204665, "loss": 0.0951, "lr": 1.9686841992616077e-06, "epoch": 0.8594532528766521, "percentage": 17.19, "elapsed_time": "0:45:02", "remaining_time": "3:37:01", "throughput": 8770.37, "total_tokens": 23705728} +{"current_steps": 35185, "total_steps": 204665, "loss": 0.2311, "lr": 1.968663021649354e-06, "epoch": 0.8595754037084993, "percentage": 17.19, "elapsed_time": "0:45:03", "remaining_time": "3:37:01", "throughput": 8770.49, "total_tokens": 23709120} +{"current_steps": 35190, "total_steps": 204665, "loss": 0.1489, "lr": 1.9686418369927224e-06, "epoch": 0.8596975545403465, "percentage": 17.19, "elapsed_time": "0:45:03", "remaining_time": "3:37:00", "throughput": 8770.62, "total_tokens": 23712512} +{"current_steps": 35195, "total_steps": 204665, "loss": 0.0941, "lr": 1.9686206452918667e-06, "epoch": 0.8598197053721935, "percentage": 17.2, "elapsed_time": "0:45:03", "remaining_time": "3:37:00", "throughput": 8770.74, "total_tokens": 23715904} +{"current_steps": 35200, "total_steps": 204665, "loss": 0.0694, "lr": 1.968599446546941e-06, "epoch": 0.8599418562040407, "percentage": 17.2, "elapsed_time": "0:45:04", "remaining_time": "3:36:59", "throughput": 8771.07, "total_tokens": 23720000} +{"current_steps": 35205, "total_steps": 204665, "loss": 0.1236, "lr": 1.9685782407580995e-06, "epoch": 0.8600640070358879, "percentage": 17.2, "elapsed_time": "0:45:04", "remaining_time": "3:36:59", "throughput": 8771.14, "total_tokens": 23723200} +{"current_steps": 35210, "total_steps": 204665, "loss": 0.0757, "lr": 1.9685570279254967e-06, "epoch": 0.8601861578677351, "percentage": 17.2, "elapsed_time": "0:45:05", "remaining_time": "3:36:58", "throughput": 8771.29, "total_tokens": 23726656} +{"current_steps": 35215, "total_steps": 204665, "loss": 0.0956, "lr": 1.9685358080492865e-06, "epoch": 0.8603083086995822, "percentage": 17.21, "elapsed_time": "0:45:05", "remaining_time": "3:36:57", "throughput": 8771.41, "total_tokens": 23730048} +{"current_steps": 35220, "total_steps": 204665, "loss": 0.074, "lr": 1.9685145811296237e-06, "epoch": 0.8604304595314294, "percentage": 17.21, "elapsed_time": "0:45:05", "remaining_time": "3:36:57", "throughput": 8771.49, "total_tokens": 23733312} +{"current_steps": 35225, "total_steps": 204665, "loss": 0.1863, "lr": 1.9684933471666626e-06, "epoch": 0.8605526103632766, "percentage": 17.21, "elapsed_time": "0:45:06", "remaining_time": "3:36:56", "throughput": 8771.6, "total_tokens": 23736640} +{"current_steps": 35230, "total_steps": 204665, "loss": 0.0825, "lr": 1.968472106160557e-06, "epoch": 0.8606747611951238, "percentage": 17.21, "elapsed_time": "0:45:06", "remaining_time": "3:36:56", "throughput": 8771.84, "total_tokens": 23740416} +{"current_steps": 35235, "total_steps": 204665, "loss": 0.2678, "lr": 1.9684508581114616e-06, "epoch": 0.860796912026971, "percentage": 17.22, "elapsed_time": "0:45:06", "remaining_time": "3:36:55", "throughput": 8771.92, "total_tokens": 23743680} +{"current_steps": 35240, "total_steps": 204665, "loss": 0.1087, "lr": 1.9684296030195317e-06, "epoch": 0.860919062858818, "percentage": 17.22, "elapsed_time": "0:45:07", "remaining_time": "3:36:55", "throughput": 8772.04, "total_tokens": 23747072} +{"current_steps": 35245, "total_steps": 204665, "loss": 0.0148, "lr": 1.9684083408849206e-06, "epoch": 0.8610412136906652, "percentage": 17.22, "elapsed_time": "0:45:07", "remaining_time": "3:36:54", "throughput": 8772.21, "total_tokens": 23750592} +{"current_steps": 35250, "total_steps": 204665, "loss": 0.2289, "lr": 1.968387071707784e-06, "epoch": 0.8611633645225124, "percentage": 17.22, "elapsed_time": "0:45:07", "remaining_time": "3:36:54", "throughput": 8772.16, "total_tokens": 23753408} +{"current_steps": 35255, "total_steps": 204665, "loss": 0.1386, "lr": 1.9683657954882757e-06, "epoch": 0.8612855153543596, "percentage": 17.23, "elapsed_time": "0:45:08", "remaining_time": "3:36:53", "throughput": 8772.19, "total_tokens": 23756480} +{"current_steps": 35260, "total_steps": 204665, "loss": 0.1047, "lr": 1.968344512226551e-06, "epoch": 0.8614076661862067, "percentage": 17.23, "elapsed_time": "0:45:08", "remaining_time": "3:36:52", "throughput": 8772.28, "total_tokens": 23759808} +{"current_steps": 35265, "total_steps": 204665, "loss": 0.1218, "lr": 1.9683232219227646e-06, "epoch": 0.8615298170180539, "percentage": 17.23, "elapsed_time": "0:45:08", "remaining_time": "3:36:52", "throughput": 8772.41, "total_tokens": 23763200} +{"current_steps": 35270, "total_steps": 204665, "loss": 0.1086, "lr": 1.9683019245770717e-06, "epoch": 0.8616519678499011, "percentage": 17.23, "elapsed_time": "0:45:09", "remaining_time": "3:36:51", "throughput": 8772.49, "total_tokens": 23766464} +{"current_steps": 35275, "total_steps": 204665, "loss": 0.234, "lr": 1.9682806201896264e-06, "epoch": 0.8617741186817482, "percentage": 17.24, "elapsed_time": "0:45:09", "remaining_time": "3:36:51", "throughput": 8772.73, "total_tokens": 23770240} +{"current_steps": 35280, "total_steps": 204665, "loss": 0.0527, "lr": 1.968259308760584e-06, "epoch": 0.8618962695135954, "percentage": 17.24, "elapsed_time": "0:45:09", "remaining_time": "3:36:50", "throughput": 8772.86, "total_tokens": 23773632} +{"current_steps": 35285, "total_steps": 204665, "loss": 0.0753, "lr": 1.9682379902900995e-06, "epoch": 0.8620184203454425, "percentage": 17.24, "elapsed_time": "0:45:10", "remaining_time": "3:36:50", "throughput": 8773.09, "total_tokens": 23777408} +{"current_steps": 35290, "total_steps": 204665, "loss": 0.1124, "lr": 1.968216664778328e-06, "epoch": 0.8621405711772897, "percentage": 17.24, "elapsed_time": "0:45:10", "remaining_time": "3:36:49", "throughput": 8773.24, "total_tokens": 23780864} +{"current_steps": 35295, "total_steps": 204665, "loss": 0.184, "lr": 1.9681953322254243e-06, "epoch": 0.8622627220091369, "percentage": 17.25, "elapsed_time": "0:45:10", "remaining_time": "3:36:49", "throughput": 8773.38, "total_tokens": 23784320} +{"current_steps": 35300, "total_steps": 204665, "loss": 0.0935, "lr": 1.968173992631544e-06, "epoch": 0.8623848728409841, "percentage": 17.25, "elapsed_time": "0:45:11", "remaining_time": "3:36:48", "throughput": 8773.77, "total_tokens": 23788608} +{"current_steps": 35305, "total_steps": 204665, "loss": 0.2388, "lr": 1.968152645996842e-06, "epoch": 0.8625070236728312, "percentage": 17.25, "elapsed_time": "0:45:11", "remaining_time": "3:36:48", "throughput": 8773.89, "total_tokens": 23792000} +{"current_steps": 35310, "total_steps": 204665, "loss": 0.1465, "lr": 1.9681312923214734e-06, "epoch": 0.8626291745046784, "percentage": 17.25, "elapsed_time": "0:45:12", "remaining_time": "3:36:47", "throughput": 8774.06, "total_tokens": 23795584} +{"current_steps": 35315, "total_steps": 204665, "loss": 0.1328, "lr": 1.9681099316055934e-06, "epoch": 0.8627513253365255, "percentage": 17.26, "elapsed_time": "0:45:12", "remaining_time": "3:36:46", "throughput": 8774.12, "total_tokens": 23798720} +{"current_steps": 35320, "total_steps": 204665, "loss": 0.2706, "lr": 1.968088563849358e-06, "epoch": 0.8628734761683727, "percentage": 17.26, "elapsed_time": "0:45:12", "remaining_time": "3:36:46", "throughput": 8774.14, "total_tokens": 23801728} +{"current_steps": 35325, "total_steps": 204665, "loss": 0.078, "lr": 1.968067189052922e-06, "epoch": 0.8629956270002199, "percentage": 17.26, "elapsed_time": "0:45:13", "remaining_time": "3:36:45", "throughput": 8774.38, "total_tokens": 23805504} +{"current_steps": 35330, "total_steps": 204665, "loss": 0.17, "lr": 1.968045807216441e-06, "epoch": 0.863117777832067, "percentage": 17.26, "elapsed_time": "0:45:13", "remaining_time": "3:36:45", "throughput": 8774.64, "total_tokens": 23809344} +{"current_steps": 35335, "total_steps": 204665, "loss": 0.2026, "lr": 1.968024418340071e-06, "epoch": 0.8632399286639142, "percentage": 17.26, "elapsed_time": "0:45:13", "remaining_time": "3:36:44", "throughput": 8774.67, "total_tokens": 23812416} +{"current_steps": 35340, "total_steps": 204665, "loss": 0.1147, "lr": 1.968003022423966e-06, "epoch": 0.8633620794957614, "percentage": 17.27, "elapsed_time": "0:45:14", "remaining_time": "3:36:44", "throughput": 8774.78, "total_tokens": 23815744} +{"current_steps": 35345, "total_steps": 204665, "loss": 0.1263, "lr": 1.9679816194682834e-06, "epoch": 0.8634842303276086, "percentage": 17.27, "elapsed_time": "0:45:14", "remaining_time": "3:36:43", "throughput": 8774.91, "total_tokens": 23819200} +{"current_steps": 35350, "total_steps": 204665, "loss": 0.0971, "lr": 1.967960209473178e-06, "epoch": 0.8636063811594556, "percentage": 17.27, "elapsed_time": "0:45:14", "remaining_time": "3:36:43", "throughput": 8775.02, "total_tokens": 23822528} +{"current_steps": 35355, "total_steps": 204665, "loss": 0.1648, "lr": 1.9679387924388058e-06, "epoch": 0.8637285319913028, "percentage": 17.27, "elapsed_time": "0:45:15", "remaining_time": "3:36:42", "throughput": 8775.23, "total_tokens": 23826240} +{"current_steps": 35360, "total_steps": 204665, "loss": 0.0552, "lr": 1.967917368365322e-06, "epoch": 0.86385068282315, "percentage": 17.28, "elapsed_time": "0:45:15", "remaining_time": "3:36:41", "throughput": 8775.22, "total_tokens": 23829184} +{"current_steps": 35365, "total_steps": 204665, "loss": 0.1812, "lr": 1.9678959372528828e-06, "epoch": 0.8639728336549972, "percentage": 17.28, "elapsed_time": "0:45:15", "remaining_time": "3:36:41", "throughput": 8775.5, "total_tokens": 23833088} +{"current_steps": 35370, "total_steps": 204665, "loss": 0.1272, "lr": 1.967874499101644e-06, "epoch": 0.8640949844868444, "percentage": 17.28, "elapsed_time": "0:45:16", "remaining_time": "3:36:40", "throughput": 8775.53, "total_tokens": 23836160} +{"current_steps": 35375, "total_steps": 204665, "loss": 0.0314, "lr": 1.967853053911762e-06, "epoch": 0.8642171353186915, "percentage": 17.28, "elapsed_time": "0:45:16", "remaining_time": "3:36:40", "throughput": 8775.74, "total_tokens": 23839808} +{"current_steps": 35380, "total_steps": 204665, "loss": 0.1877, "lr": 1.967831601683392e-06, "epoch": 0.8643392861505387, "percentage": 17.29, "elapsed_time": "0:45:16", "remaining_time": "3:36:39", "throughput": 8775.74, "total_tokens": 23842816} +{"current_steps": 35385, "total_steps": 204665, "loss": 0.0687, "lr": 1.96781014241669e-06, "epoch": 0.8644614369823859, "percentage": 17.29, "elapsed_time": "0:45:17", "remaining_time": "3:36:39", "throughput": 8775.84, "total_tokens": 23846144} +{"current_steps": 35390, "total_steps": 204665, "loss": 0.1174, "lr": 1.9677886761118126e-06, "epoch": 0.8645835878142331, "percentage": 17.29, "elapsed_time": "0:45:17", "remaining_time": "3:36:38", "throughput": 8775.97, "total_tokens": 23849536} +{"current_steps": 35395, "total_steps": 204665, "loss": 0.177, "lr": 1.9677672027689156e-06, "epoch": 0.8647057386460801, "percentage": 17.29, "elapsed_time": "0:45:17", "remaining_time": "3:36:38", "throughput": 8776.14, "total_tokens": 23853120} +{"current_steps": 35400, "total_steps": 204665, "loss": 0.2031, "lr": 1.9677457223881553e-06, "epoch": 0.8648278894779273, "percentage": 17.3, "elapsed_time": "0:45:18", "remaining_time": "3:36:37", "throughput": 8776.34, "total_tokens": 23856768} +{"current_steps": 35405, "total_steps": 204665, "loss": 0.1071, "lr": 1.967724234969688e-06, "epoch": 0.8649500403097745, "percentage": 17.3, "elapsed_time": "0:45:18", "remaining_time": "3:36:37", "throughput": 8776.45, "total_tokens": 23860096} +{"current_steps": 35410, "total_steps": 204665, "loss": 0.1915, "lr": 1.967702740513669e-06, "epoch": 0.8650721911416217, "percentage": 17.3, "elapsed_time": "0:45:18", "remaining_time": "3:36:36", "throughput": 8776.52, "total_tokens": 23863296} +{"current_steps": 35415, "total_steps": 204665, "loss": 0.0757, "lr": 1.967681239020256e-06, "epoch": 0.8651943419734689, "percentage": 17.3, "elapsed_time": "0:45:19", "remaining_time": "3:36:35", "throughput": 8776.54, "total_tokens": 23866368} +{"current_steps": 35420, "total_steps": 204665, "loss": 0.1594, "lr": 1.9676597304896046e-06, "epoch": 0.865316492805316, "percentage": 17.31, "elapsed_time": "0:45:19", "remaining_time": "3:36:35", "throughput": 8776.61, "total_tokens": 23869568} +{"current_steps": 35425, "total_steps": 204665, "loss": 0.1046, "lr": 1.967638214921871e-06, "epoch": 0.8654386436371632, "percentage": 17.31, "elapsed_time": "0:45:20", "remaining_time": "3:36:34", "throughput": 8776.75, "total_tokens": 23873024} +{"current_steps": 35430, "total_steps": 204665, "loss": 0.0774, "lr": 1.967616692317213e-06, "epoch": 0.8655607944690104, "percentage": 17.31, "elapsed_time": "0:45:20", "remaining_time": "3:36:34", "throughput": 8776.91, "total_tokens": 23876544} +{"current_steps": 35435, "total_steps": 204665, "loss": 0.1239, "lr": 1.9675951626757854e-06, "epoch": 0.8656829453008575, "percentage": 17.31, "elapsed_time": "0:45:20", "remaining_time": "3:36:33", "throughput": 8777.11, "total_tokens": 23880192} +{"current_steps": 35440, "total_steps": 204665, "loss": 0.0905, "lr": 1.9675736259977455e-06, "epoch": 0.8658050961327046, "percentage": 17.32, "elapsed_time": "0:45:21", "remaining_time": "3:36:33", "throughput": 8777.32, "total_tokens": 23883904} +{"current_steps": 35445, "total_steps": 204665, "loss": 0.0623, "lr": 1.9675520822832504e-06, "epoch": 0.8659272469645518, "percentage": 17.32, "elapsed_time": "0:45:21", "remaining_time": "3:36:32", "throughput": 8777.45, "total_tokens": 23887296} +{"current_steps": 35450, "total_steps": 204665, "loss": 0.0776, "lr": 1.967530531532456e-06, "epoch": 0.866049397796399, "percentage": 17.32, "elapsed_time": "0:45:21", "remaining_time": "3:36:32", "throughput": 8777.54, "total_tokens": 23890560} +{"current_steps": 35455, "total_steps": 204665, "loss": 0.0951, "lr": 1.967508973745519e-06, "epoch": 0.8661715486282462, "percentage": 17.32, "elapsed_time": "0:45:22", "remaining_time": "3:36:31", "throughput": 8777.57, "total_tokens": 23893632} +{"current_steps": 35460, "total_steps": 204665, "loss": 0.0601, "lr": 1.967487408922597e-06, "epoch": 0.8662936994600933, "percentage": 17.33, "elapsed_time": "0:45:22", "remaining_time": "3:36:30", "throughput": 8777.57, "total_tokens": 23896640} +{"current_steps": 35465, "total_steps": 204665, "loss": 0.1787, "lr": 1.9674658370638462e-06, "epoch": 0.8664158502919405, "percentage": 17.33, "elapsed_time": "0:45:22", "remaining_time": "3:36:30", "throughput": 8777.81, "total_tokens": 23900416} +{"current_steps": 35470, "total_steps": 204665, "loss": 0.2974, "lr": 1.9674442581694238e-06, "epoch": 0.8665380011237876, "percentage": 17.33, "elapsed_time": "0:45:23", "remaining_time": "3:36:29", "throughput": 8777.94, "total_tokens": 23903808} +{"current_steps": 35475, "total_steps": 204665, "loss": 0.0575, "lr": 1.967422672239487e-06, "epoch": 0.8666601519556348, "percentage": 17.33, "elapsed_time": "0:45:23", "remaining_time": "3:36:29", "throughput": 8778.12, "total_tokens": 23907392} +{"current_steps": 35480, "total_steps": 204665, "loss": 0.1018, "lr": 1.967401079274191e-06, "epoch": 0.866782302787482, "percentage": 17.34, "elapsed_time": "0:45:23", "remaining_time": "3:36:28", "throughput": 8778.36, "total_tokens": 23911168} +{"current_steps": 35485, "total_steps": 204665, "loss": 0.138, "lr": 1.967379479273695e-06, "epoch": 0.8669044536193291, "percentage": 17.34, "elapsed_time": "0:45:24", "remaining_time": "3:36:28", "throughput": 8778.4, "total_tokens": 23914304} +{"current_steps": 35490, "total_steps": 204665, "loss": 0.1295, "lr": 1.9673578722381552e-06, "epoch": 0.8670266044511763, "percentage": 17.34, "elapsed_time": "0:45:24", "remaining_time": "3:36:27", "throughput": 8778.58, "total_tokens": 23917888} +{"current_steps": 35495, "total_steps": 204665, "loss": 0.065, "lr": 1.967336258167729e-06, "epoch": 0.8671487552830235, "percentage": 17.34, "elapsed_time": "0:45:24", "remaining_time": "3:36:27", "throughput": 8778.75, "total_tokens": 23921472} +{"current_steps": 35500, "total_steps": 204665, "loss": 0.0754, "lr": 1.9673146370625727e-06, "epoch": 0.8672709061148707, "percentage": 17.35, "elapsed_time": "0:45:25", "remaining_time": "3:36:26", "throughput": 8778.84, "total_tokens": 23924736} +{"current_steps": 35505, "total_steps": 204665, "loss": 0.1791, "lr": 1.9672930089228448e-06, "epoch": 0.8673930569467178, "percentage": 17.35, "elapsed_time": "0:45:25", "remaining_time": "3:36:25", "throughput": 8778.85, "total_tokens": 23927744} +{"current_steps": 35510, "total_steps": 204665, "loss": 0.1933, "lr": 1.9672713737487018e-06, "epoch": 0.867515207778565, "percentage": 17.35, "elapsed_time": "0:45:25", "remaining_time": "3:36:25", "throughput": 8778.91, "total_tokens": 23930944} +{"current_steps": 35515, "total_steps": 204665, "loss": 0.0915, "lr": 1.967249731540301e-06, "epoch": 0.8676373586104121, "percentage": 17.35, "elapsed_time": "0:45:26", "remaining_time": "3:36:24", "throughput": 8779.03, "total_tokens": 23934336} +{"current_steps": 35520, "total_steps": 204665, "loss": 0.093, "lr": 1.9672280822978e-06, "epoch": 0.8677595094422593, "percentage": 17.36, "elapsed_time": "0:45:26", "remaining_time": "3:36:24", "throughput": 8779.2, "total_tokens": 23937856} +{"current_steps": 35525, "total_steps": 204665, "loss": 0.1593, "lr": 1.9672064260213565e-06, "epoch": 0.8678816602741065, "percentage": 17.36, "elapsed_time": "0:45:27", "remaining_time": "3:36:23", "throughput": 8779.34, "total_tokens": 23941312} +{"current_steps": 35530, "total_steps": 204665, "loss": 0.1556, "lr": 1.9671847627111273e-06, "epoch": 0.8680038111059536, "percentage": 17.36, "elapsed_time": "0:45:27", "remaining_time": "3:36:23", "throughput": 8779.32, "total_tokens": 23944256} +{"current_steps": 35535, "total_steps": 204665, "loss": 0.0886, "lr": 1.967163092367271e-06, "epoch": 0.8681259619378008, "percentage": 17.36, "elapsed_time": "0:45:27", "remaining_time": "3:36:22", "throughput": 8779.5, "total_tokens": 23947840} +{"current_steps": 35540, "total_steps": 204665, "loss": 0.0558, "lr": 1.9671414149899438e-06, "epoch": 0.868248112769648, "percentage": 17.36, "elapsed_time": "0:45:28", "remaining_time": "3:36:22", "throughput": 8779.57, "total_tokens": 23951040} +{"current_steps": 35545, "total_steps": 204665, "loss": 0.1845, "lr": 1.9671197305793044e-06, "epoch": 0.8683702636014952, "percentage": 17.37, "elapsed_time": "0:45:28", "remaining_time": "3:36:21", "throughput": 8779.56, "total_tokens": 23953984} +{"current_steps": 35550, "total_steps": 204665, "loss": 0.0353, "lr": 1.9670980391355104e-06, "epoch": 0.8684924144333422, "percentage": 17.37, "elapsed_time": "0:45:28", "remaining_time": "3:36:20", "throughput": 8779.62, "total_tokens": 23957184} +{"current_steps": 35555, "total_steps": 204665, "loss": 0.1873, "lr": 1.9670763406587192e-06, "epoch": 0.8686145652651894, "percentage": 17.37, "elapsed_time": "0:45:29", "remaining_time": "3:36:20", "throughput": 8779.75, "total_tokens": 23960640} +{"current_steps": 35560, "total_steps": 204665, "loss": 0.0577, "lr": 1.967054635149089e-06, "epoch": 0.8687367160970366, "percentage": 17.37, "elapsed_time": "0:45:29", "remaining_time": "3:36:19", "throughput": 8779.83, "total_tokens": 23963904} +{"current_steps": 35565, "total_steps": 204665, "loss": 0.121, "lr": 1.967032922606777e-06, "epoch": 0.8688588669288838, "percentage": 17.38, "elapsed_time": "0:45:29", "remaining_time": "3:36:19", "throughput": 8779.9, "total_tokens": 23967104} +{"current_steps": 35570, "total_steps": 204665, "loss": 0.0403, "lr": 1.9670112030319412e-06, "epoch": 0.868981017760731, "percentage": 17.38, "elapsed_time": "0:45:30", "remaining_time": "3:36:18", "throughput": 8779.95, "total_tokens": 23970240} +{"current_steps": 35575, "total_steps": 204665, "loss": 0.187, "lr": 1.9669894764247403e-06, "epoch": 0.8691031685925781, "percentage": 17.38, "elapsed_time": "0:45:30", "remaining_time": "3:36:18", "throughput": 8780.12, "total_tokens": 23973760} +{"current_steps": 35580, "total_steps": 204665, "loss": 0.1906, "lr": 1.966967742785332e-06, "epoch": 0.8692253194244253, "percentage": 17.38, "elapsed_time": "0:45:30", "remaining_time": "3:36:17", "throughput": 8780.18, "total_tokens": 23976960} +{"current_steps": 35585, "total_steps": 204665, "loss": 0.0527, "lr": 1.966946002113874e-06, "epoch": 0.8693474702562725, "percentage": 17.39, "elapsed_time": "0:45:31", "remaining_time": "3:36:16", "throughput": 8780.31, "total_tokens": 23980352} +{"current_steps": 35590, "total_steps": 204665, "loss": 0.0576, "lr": 1.9669242544105245e-06, "epoch": 0.8694696210881196, "percentage": 17.39, "elapsed_time": "0:45:31", "remaining_time": "3:36:16", "throughput": 8780.48, "total_tokens": 23983872} +{"current_steps": 35595, "total_steps": 204665, "loss": 0.0556, "lr": 1.966902499675442e-06, "epoch": 0.8695917719199667, "percentage": 17.39, "elapsed_time": "0:45:31", "remaining_time": "3:36:15", "throughput": 8780.67, "total_tokens": 23987520} +{"current_steps": 35600, "total_steps": 204665, "loss": 0.1358, "lr": 1.9668807379087843e-06, "epoch": 0.8697139227518139, "percentage": 17.39, "elapsed_time": "0:45:32", "remaining_time": "3:36:15", "throughput": 8780.88, "total_tokens": 23991232} +{"current_steps": 35605, "total_steps": 204665, "loss": 0.1904, "lr": 1.9668589691107096e-06, "epoch": 0.8698360735836611, "percentage": 17.4, "elapsed_time": "0:45:32", "remaining_time": "3:36:14", "throughput": 8781.07, "total_tokens": 23994816} +{"current_steps": 35610, "total_steps": 204665, "loss": 0.0965, "lr": 1.966837193281377e-06, "epoch": 0.8699582244155083, "percentage": 17.4, "elapsed_time": "0:45:32", "remaining_time": "3:36:14", "throughput": 8781.12, "total_tokens": 23997952} +{"current_steps": 35615, "total_steps": 204665, "loss": 0.0625, "lr": 1.9668154104209438e-06, "epoch": 0.8700803752473555, "percentage": 17.4, "elapsed_time": "0:45:33", "remaining_time": "3:36:13", "throughput": 8781.17, "total_tokens": 24001088} +{"current_steps": 35620, "total_steps": 204665, "loss": 0.1092, "lr": 1.966793620529569e-06, "epoch": 0.8702025260792026, "percentage": 17.4, "elapsed_time": "0:45:33", "remaining_time": "3:36:13", "throughput": 8781.44, "total_tokens": 24004992} +{"current_steps": 35625, "total_steps": 204665, "loss": 0.1368, "lr": 1.9667718236074106e-06, "epoch": 0.8703246769110498, "percentage": 17.41, "elapsed_time": "0:45:33", "remaining_time": "3:36:12", "throughput": 8781.74, "total_tokens": 24008960} +{"current_steps": 35630, "total_steps": 204665, "loss": 0.0791, "lr": 1.966750019654628e-06, "epoch": 0.870446827742897, "percentage": 17.41, "elapsed_time": "0:45:34", "remaining_time": "3:36:12", "throughput": 8781.86, "total_tokens": 24012352} +{"current_steps": 35635, "total_steps": 204665, "loss": 0.0905, "lr": 1.966728208671379e-06, "epoch": 0.8705689785747441, "percentage": 17.41, "elapsed_time": "0:45:34", "remaining_time": "3:36:11", "throughput": 8782.03, "total_tokens": 24015936} +{"current_steps": 35640, "total_steps": 204665, "loss": 0.1447, "lr": 1.9667063906578226e-06, "epoch": 0.8706911294065912, "percentage": 17.41, "elapsed_time": "0:45:35", "remaining_time": "3:36:10", "throughput": 8782.12, "total_tokens": 24019200} +{"current_steps": 35645, "total_steps": 204665, "loss": 0.219, "lr": 1.966684565614117e-06, "epoch": 0.8708132802384384, "percentage": 17.42, "elapsed_time": "0:45:35", "remaining_time": "3:36:10", "throughput": 8782.24, "total_tokens": 24022592} +{"current_steps": 35650, "total_steps": 204665, "loss": 0.1551, "lr": 1.9666627335404214e-06, "epoch": 0.8709354310702856, "percentage": 17.42, "elapsed_time": "0:45:35", "remaining_time": "3:36:09", "throughput": 8782.37, "total_tokens": 24025984} +{"current_steps": 35655, "total_steps": 204665, "loss": 0.1494, "lr": 1.9666408944368948e-06, "epoch": 0.8710575819021328, "percentage": 17.42, "elapsed_time": "0:45:36", "remaining_time": "3:36:09", "throughput": 8782.7, "total_tokens": 24030080} +{"current_steps": 35660, "total_steps": 204665, "loss": 0.0704, "lr": 1.966619048303695e-06, "epoch": 0.87117973273398, "percentage": 17.42, "elapsed_time": "0:45:36", "remaining_time": "3:36:08", "throughput": 8782.79, "total_tokens": 24033344} +{"current_steps": 35665, "total_steps": 204665, "loss": 0.1243, "lr": 1.966597195140982e-06, "epoch": 0.871301883565827, "percentage": 17.43, "elapsed_time": "0:45:36", "remaining_time": "3:36:08", "throughput": 8782.84, "total_tokens": 24036480} +{"current_steps": 35670, "total_steps": 204665, "loss": 0.267, "lr": 1.966575334948914e-06, "epoch": 0.8714240343976742, "percentage": 17.43, "elapsed_time": "0:45:37", "remaining_time": "3:36:07", "throughput": 8782.87, "total_tokens": 24039552} +{"current_steps": 35675, "total_steps": 204665, "loss": 0.0807, "lr": 1.9665534677276502e-06, "epoch": 0.8715461852295214, "percentage": 17.43, "elapsed_time": "0:45:37", "remaining_time": "3:36:07", "throughput": 8782.95, "total_tokens": 24042816} +{"current_steps": 35680, "total_steps": 204665, "loss": 0.1276, "lr": 1.9665315934773495e-06, "epoch": 0.8716683360613686, "percentage": 17.43, "elapsed_time": "0:45:37", "remaining_time": "3:36:06", "throughput": 8783.01, "total_tokens": 24046016} +{"current_steps": 35685, "total_steps": 204665, "loss": 0.083, "lr": 1.966509712198171e-06, "epoch": 0.8717904868932157, "percentage": 17.44, "elapsed_time": "0:45:38", "remaining_time": "3:36:05", "throughput": 8783.21, "total_tokens": 24049664} +{"current_steps": 35690, "total_steps": 204665, "loss": 0.1309, "lr": 1.966487823890274e-06, "epoch": 0.8719126377250629, "percentage": 17.44, "elapsed_time": "0:45:38", "remaining_time": "3:36:05", "throughput": 8783.39, "total_tokens": 24053248} +{"current_steps": 35695, "total_steps": 204665, "loss": 0.0964, "lr": 1.966465928553818e-06, "epoch": 0.8720347885569101, "percentage": 17.44, "elapsed_time": "0:45:38", "remaining_time": "3:36:04", "throughput": 8783.59, "total_tokens": 24056896} +{"current_steps": 35700, "total_steps": 204665, "loss": 0.0599, "lr": 1.9664440261889614e-06, "epoch": 0.8721569393887573, "percentage": 17.44, "elapsed_time": "0:45:39", "remaining_time": "3:36:04", "throughput": 8783.77, "total_tokens": 24060480} +{"current_steps": 35705, "total_steps": 204665, "loss": 0.076, "lr": 1.966422116795864e-06, "epoch": 0.8722790902206043, "percentage": 17.45, "elapsed_time": "0:45:39", "remaining_time": "3:36:03", "throughput": 8783.82, "total_tokens": 24063616} +{"current_steps": 35710, "total_steps": 204665, "loss": 0.0638, "lr": 1.966400200374685e-06, "epoch": 0.8724012410524515, "percentage": 17.45, "elapsed_time": "0:45:39", "remaining_time": "3:36:03", "throughput": 8783.97, "total_tokens": 24067136} +{"current_steps": 35715, "total_steps": 204665, "loss": 0.0867, "lr": 1.9663782769255837e-06, "epoch": 0.8725233918842987, "percentage": 17.45, "elapsed_time": "0:45:40", "remaining_time": "3:36:02", "throughput": 8784.17, "total_tokens": 24070784} +{"current_steps": 35720, "total_steps": 204665, "loss": 0.0867, "lr": 1.9663563464487197e-06, "epoch": 0.8726455427161459, "percentage": 17.45, "elapsed_time": "0:45:40", "remaining_time": "3:36:02", "throughput": 8784.31, "total_tokens": 24074240} +{"current_steps": 35725, "total_steps": 204665, "loss": 0.0729, "lr": 1.9663344089442524e-06, "epoch": 0.8727676935479931, "percentage": 17.46, "elapsed_time": "0:45:40", "remaining_time": "3:36:01", "throughput": 8784.52, "total_tokens": 24077952} +{"current_steps": 35730, "total_steps": 204665, "loss": 0.2075, "lr": 1.9663124644123416e-06, "epoch": 0.8728898443798402, "percentage": 17.46, "elapsed_time": "0:45:41", "remaining_time": "3:36:01", "throughput": 8784.65, "total_tokens": 24081344} +{"current_steps": 35735, "total_steps": 204665, "loss": 0.0498, "lr": 1.9662905128531464e-06, "epoch": 0.8730119952116874, "percentage": 17.46, "elapsed_time": "0:45:41", "remaining_time": "3:36:00", "throughput": 8784.73, "total_tokens": 24084608} +{"current_steps": 35740, "total_steps": 204665, "loss": 0.0882, "lr": 1.966268554266827e-06, "epoch": 0.8731341460435346, "percentage": 17.46, "elapsed_time": "0:45:41", "remaining_time": "3:36:00", "throughput": 8784.87, "total_tokens": 24088064} +{"current_steps": 35745, "total_steps": 204665, "loss": 0.1407, "lr": 1.9662465886535426e-06, "epoch": 0.8732562968753818, "percentage": 17.47, "elapsed_time": "0:45:42", "remaining_time": "3:35:59", "throughput": 8785.09, "total_tokens": 24091776} +{"current_steps": 35750, "total_steps": 204665, "loss": 0.2092, "lr": 1.966224616013453e-06, "epoch": 0.8733784477072288, "percentage": 17.47, "elapsed_time": "0:45:42", "remaining_time": "3:35:58", "throughput": 8785.19, "total_tokens": 24095104} +{"current_steps": 35755, "total_steps": 204665, "loss": 0.1278, "lr": 1.9662026363467183e-06, "epoch": 0.873500598539076, "percentage": 17.47, "elapsed_time": "0:45:43", "remaining_time": "3:35:58", "throughput": 8785.2, "total_tokens": 24098112} +{"current_steps": 35760, "total_steps": 204665, "loss": 0.2807, "lr": 1.966180649653498e-06, "epoch": 0.8736227493709232, "percentage": 17.47, "elapsed_time": "0:45:43", "remaining_time": "3:35:57", "throughput": 8785.33, "total_tokens": 24101568} +{"current_steps": 35765, "total_steps": 204665, "loss": 0.0905, "lr": 1.966158655933952e-06, "epoch": 0.8737449002027704, "percentage": 17.47, "elapsed_time": "0:45:43", "remaining_time": "3:35:57", "throughput": 8785.45, "total_tokens": 24104960} +{"current_steps": 35770, "total_steps": 204665, "loss": 0.1174, "lr": 1.966136655188241e-06, "epoch": 0.8738670510346176, "percentage": 17.48, "elapsed_time": "0:45:44", "remaining_time": "3:35:56", "throughput": 8785.61, "total_tokens": 24108480} +{"current_steps": 35775, "total_steps": 204665, "loss": 0.0826, "lr": 1.966114647416524e-06, "epoch": 0.8739892018664647, "percentage": 17.48, "elapsed_time": "0:45:44", "remaining_time": "3:35:56", "throughput": 8785.65, "total_tokens": 24111552} +{"current_steps": 35780, "total_steps": 204665, "loss": 0.0744, "lr": 1.9660926326189613e-06, "epoch": 0.8741113526983119, "percentage": 17.48, "elapsed_time": "0:45:44", "remaining_time": "3:35:55", "throughput": 8785.71, "total_tokens": 24114752} +{"current_steps": 35785, "total_steps": 204665, "loss": 0.1183, "lr": 1.9660706107957134e-06, "epoch": 0.874233503530159, "percentage": 17.48, "elapsed_time": "0:45:45", "remaining_time": "3:35:55", "throughput": 8785.8, "total_tokens": 24118016} +{"current_steps": 35790, "total_steps": 204665, "loss": 0.174, "lr": 1.9660485819469395e-06, "epoch": 0.8743556543620062, "percentage": 17.49, "elapsed_time": "0:45:45", "remaining_time": "3:35:54", "throughput": 8785.88, "total_tokens": 24121280} +{"current_steps": 35795, "total_steps": 204665, "loss": 0.0505, "lr": 1.9660265460728015e-06, "epoch": 0.8744778051938533, "percentage": 17.49, "elapsed_time": "0:45:45", "remaining_time": "3:35:53", "throughput": 8786.19, "total_tokens": 24125312} +{"current_steps": 35800, "total_steps": 204665, "loss": 0.0931, "lr": 1.9660045031734578e-06, "epoch": 0.8745999560257005, "percentage": 17.49, "elapsed_time": "0:45:46", "remaining_time": "3:35:53", "throughput": 8786.19, "total_tokens": 24128256} +{"current_steps": 35805, "total_steps": 204665, "loss": 0.1435, "lr": 1.96598245324907e-06, "epoch": 0.8747221068575477, "percentage": 17.49, "elapsed_time": "0:45:46", "remaining_time": "3:35:52", "throughput": 8786.28, "total_tokens": 24131584} +{"current_steps": 35810, "total_steps": 204665, "loss": 0.0891, "lr": 1.965960396299798e-06, "epoch": 0.8748442576893949, "percentage": 17.5, "elapsed_time": "0:45:46", "remaining_time": "3:35:52", "throughput": 8786.37, "total_tokens": 24134912} +{"current_steps": 35815, "total_steps": 204665, "loss": 0.1344, "lr": 1.9659383323258017e-06, "epoch": 0.8749664085212421, "percentage": 17.5, "elapsed_time": "0:45:47", "remaining_time": "3:35:51", "throughput": 8786.49, "total_tokens": 24138304} +{"current_steps": 35820, "total_steps": 204665, "loss": 0.0714, "lr": 1.9659162613272424e-06, "epoch": 0.8750885593530892, "percentage": 17.5, "elapsed_time": "0:45:47", "remaining_time": "3:35:51", "throughput": 8786.64, "total_tokens": 24141760} +{"current_steps": 35825, "total_steps": 204665, "loss": 0.1576, "lr": 1.9658941833042804e-06, "epoch": 0.8752107101849363, "percentage": 17.5, "elapsed_time": "0:45:47", "remaining_time": "3:35:50", "throughput": 8786.7, "total_tokens": 24144960} +{"current_steps": 35830, "total_steps": 204665, "loss": 0.0207, "lr": 1.9658720982570757e-06, "epoch": 0.8753328610167835, "percentage": 17.51, "elapsed_time": "0:45:48", "remaining_time": "3:35:50", "throughput": 8786.69, "total_tokens": 24147904} +{"current_steps": 35835, "total_steps": 204665, "loss": 0.1306, "lr": 1.9658500061857897e-06, "epoch": 0.8754550118486307, "percentage": 17.51, "elapsed_time": "0:45:48", "remaining_time": "3:35:49", "throughput": 8786.75, "total_tokens": 24151104} +{"current_steps": 35840, "total_steps": 204665, "loss": 0.1516, "lr": 1.9658279070905825e-06, "epoch": 0.8755771626804778, "percentage": 17.51, "elapsed_time": "0:45:48", "remaining_time": "3:35:48", "throughput": 8786.86, "total_tokens": 24154432} +{"current_steps": 35845, "total_steps": 204665, "loss": 0.0642, "lr": 1.9658058009716147e-06, "epoch": 0.875699313512325, "percentage": 17.51, "elapsed_time": "0:45:49", "remaining_time": "3:35:48", "throughput": 8786.93, "total_tokens": 24157696} +{"current_steps": 35850, "total_steps": 204665, "loss": 0.0856, "lr": 1.965783687829048e-06, "epoch": 0.8758214643441722, "percentage": 17.52, "elapsed_time": "0:45:49", "remaining_time": "3:35:47", "throughput": 8787.06, "total_tokens": 24161152} +{"current_steps": 35855, "total_steps": 204665, "loss": 0.2556, "lr": 1.965761567663042e-06, "epoch": 0.8759436151760194, "percentage": 17.52, "elapsed_time": "0:45:49", "remaining_time": "3:35:47", "throughput": 8787.15, "total_tokens": 24164416} +{"current_steps": 35860, "total_steps": 204665, "loss": 0.0845, "lr": 1.9657394404737582e-06, "epoch": 0.8760657660078666, "percentage": 17.52, "elapsed_time": "0:45:50", "remaining_time": "3:35:46", "throughput": 8787.33, "total_tokens": 24168000} +{"current_steps": 35865, "total_steps": 204665, "loss": 0.2504, "lr": 1.9657173062613575e-06, "epoch": 0.8761879168397136, "percentage": 17.52, "elapsed_time": "0:45:50", "remaining_time": "3:35:46", "throughput": 8787.5, "total_tokens": 24171584} +{"current_steps": 35870, "total_steps": 204665, "loss": 0.1544, "lr": 1.965695165026001e-06, "epoch": 0.8763100676715608, "percentage": 17.53, "elapsed_time": "0:45:51", "remaining_time": "3:35:45", "throughput": 8787.67, "total_tokens": 24175168} +{"current_steps": 35875, "total_steps": 204665, "loss": 0.0603, "lr": 1.9656730167678494e-06, "epoch": 0.876432218503408, "percentage": 17.53, "elapsed_time": "0:45:51", "remaining_time": "3:35:45", "throughput": 8787.76, "total_tokens": 24178432} +{"current_steps": 35880, "total_steps": 204665, "loss": 0.0844, "lr": 1.965650861487064e-06, "epoch": 0.8765543693352552, "percentage": 17.53, "elapsed_time": "0:45:51", "remaining_time": "3:35:44", "throughput": 8787.83, "total_tokens": 24181696} +{"current_steps": 35885, "total_steps": 204665, "loss": 0.2487, "lr": 1.9656286991838056e-06, "epoch": 0.8766765201671023, "percentage": 17.53, "elapsed_time": "0:45:52", "remaining_time": "3:35:43", "throughput": 8787.88, "total_tokens": 24184832} +{"current_steps": 35890, "total_steps": 204665, "loss": 0.0238, "lr": 1.9656065298582355e-06, "epoch": 0.8767986709989495, "percentage": 17.54, "elapsed_time": "0:45:52", "remaining_time": "3:35:43", "throughput": 8788.0, "total_tokens": 24188224} +{"current_steps": 35895, "total_steps": 204665, "loss": 0.0667, "lr": 1.9655843535105154e-06, "epoch": 0.8769208218307967, "percentage": 17.54, "elapsed_time": "0:45:52", "remaining_time": "3:35:42", "throughput": 8788.2, "total_tokens": 24191872} +{"current_steps": 35900, "total_steps": 204665, "loss": 0.063, "lr": 1.9655621701408057e-06, "epoch": 0.8770429726626439, "percentage": 17.54, "elapsed_time": "0:45:53", "remaining_time": "3:35:42", "throughput": 8788.43, "total_tokens": 24195648} +{"current_steps": 35905, "total_steps": 204665, "loss": 0.1216, "lr": 1.9655399797492683e-06, "epoch": 0.877165123494491, "percentage": 17.54, "elapsed_time": "0:45:53", "remaining_time": "3:35:41", "throughput": 8788.53, "total_tokens": 24198976} +{"current_steps": 35910, "total_steps": 204665, "loss": 0.0618, "lr": 1.9655177823360645e-06, "epoch": 0.8772872743263381, "percentage": 17.55, "elapsed_time": "0:45:53", "remaining_time": "3:35:41", "throughput": 8788.62, "total_tokens": 24202304} +{"current_steps": 35915, "total_steps": 204665, "loss": 0.1367, "lr": 1.9654955779013557e-06, "epoch": 0.8774094251581853, "percentage": 17.55, "elapsed_time": "0:45:54", "remaining_time": "3:35:40", "throughput": 8788.68, "total_tokens": 24205504} +{"current_steps": 35920, "total_steps": 204665, "loss": 0.1328, "lr": 1.9654733664453037e-06, "epoch": 0.8775315759900325, "percentage": 17.55, "elapsed_time": "0:45:54", "remaining_time": "3:35:40", "throughput": 8788.93, "total_tokens": 24209344} +{"current_steps": 35925, "total_steps": 204665, "loss": 0.0406, "lr": 1.9654511479680693e-06, "epoch": 0.8776537268218797, "percentage": 17.55, "elapsed_time": "0:45:54", "remaining_time": "3:35:39", "throughput": 8789.05, "total_tokens": 24212736} +{"current_steps": 35930, "total_steps": 204665, "loss": 0.1151, "lr": 1.9654289224698144e-06, "epoch": 0.8777758776537268, "percentage": 17.56, "elapsed_time": "0:45:55", "remaining_time": "3:35:39", "throughput": 8789.12, "total_tokens": 24216000} +{"current_steps": 35935, "total_steps": 204665, "loss": 0.1295, "lr": 1.965406689950701e-06, "epoch": 0.877898028485574, "percentage": 17.56, "elapsed_time": "0:45:55", "remaining_time": "3:35:38", "throughput": 8789.24, "total_tokens": 24219392} +{"current_steps": 35940, "total_steps": 204665, "loss": 0.1675, "lr": 1.9653844504108906e-06, "epoch": 0.8780201793174212, "percentage": 17.56, "elapsed_time": "0:45:55", "remaining_time": "3:35:38", "throughput": 8789.36, "total_tokens": 24222784} +{"current_steps": 35945, "total_steps": 204665, "loss": 0.1002, "lr": 1.965362203850545e-06, "epoch": 0.8781423301492683, "percentage": 17.56, "elapsed_time": "0:45:56", "remaining_time": "3:35:37", "throughput": 8789.33, "total_tokens": 24225664} +{"current_steps": 35950, "total_steps": 204665, "loss": 0.0102, "lr": 1.965339950269825e-06, "epoch": 0.8782644809811155, "percentage": 17.57, "elapsed_time": "0:45:56", "remaining_time": "3:35:36", "throughput": 8789.53, "total_tokens": 24229312} +{"current_steps": 35955, "total_steps": 204665, "loss": 0.0219, "lr": 1.9653176896688936e-06, "epoch": 0.8783866318129626, "percentage": 17.57, "elapsed_time": "0:45:56", "remaining_time": "3:35:36", "throughput": 8789.61, "total_tokens": 24232576} +{"current_steps": 35960, "total_steps": 204665, "loss": 0.0591, "lr": 1.965295422047912e-06, "epoch": 0.8785087826448098, "percentage": 17.57, "elapsed_time": "0:45:57", "remaining_time": "3:35:35", "throughput": 8789.76, "total_tokens": 24236096} +{"current_steps": 35965, "total_steps": 204665, "loss": 0.1248, "lr": 1.965273147407043e-06, "epoch": 0.878630933476657, "percentage": 17.57, "elapsed_time": "0:45:57", "remaining_time": "3:35:35", "throughput": 8789.94, "total_tokens": 24239680} +{"current_steps": 35970, "total_steps": 204665, "loss": 0.0906, "lr": 1.965250865746448e-06, "epoch": 0.8787530843085042, "percentage": 17.58, "elapsed_time": "0:45:58", "remaining_time": "3:35:34", "throughput": 8789.99, "total_tokens": 24242816} +{"current_steps": 35975, "total_steps": 204665, "loss": 0.0539, "lr": 1.9652285770662893e-06, "epoch": 0.8788752351403513, "percentage": 17.58, "elapsed_time": "0:45:58", "remaining_time": "3:35:34", "throughput": 8790.11, "total_tokens": 24246208} +{"current_steps": 35980, "total_steps": 204665, "loss": 0.088, "lr": 1.965206281366728e-06, "epoch": 0.8789973859721985, "percentage": 17.58, "elapsed_time": "0:45:58", "remaining_time": "3:35:33", "throughput": 8790.09, "total_tokens": 24249152} +{"current_steps": 35985, "total_steps": 204665, "loss": 0.1986, "lr": 1.9651839786479276e-06, "epoch": 0.8791195368040456, "percentage": 17.58, "elapsed_time": "0:45:59", "remaining_time": "3:35:33", "throughput": 8790.23, "total_tokens": 24252608} +{"current_steps": 35990, "total_steps": 204665, "loss": 0.1155, "lr": 1.9651616689100498e-06, "epoch": 0.8792416876358928, "percentage": 17.58, "elapsed_time": "0:45:59", "remaining_time": "3:35:32", "throughput": 8790.26, "total_tokens": 24255680} +{"current_steps": 35995, "total_steps": 204665, "loss": 0.1162, "lr": 1.9651393521532563e-06, "epoch": 0.8793638384677399, "percentage": 17.59, "elapsed_time": "0:45:59", "remaining_time": "3:35:31", "throughput": 8790.32, "total_tokens": 24258880} +{"current_steps": 36000, "total_steps": 204665, "loss": 0.19, "lr": 1.9651170283777098e-06, "epoch": 0.8794859892995871, "percentage": 17.59, "elapsed_time": "0:46:00", "remaining_time": "3:35:31", "throughput": 8790.5, "total_tokens": 24262464} +{"current_steps": 36005, "total_steps": 204665, "loss": 0.1447, "lr": 1.9650946975835733e-06, "epoch": 0.8796081401314343, "percentage": 17.59, "elapsed_time": "0:46:00", "remaining_time": "3:35:30", "throughput": 8790.6, "total_tokens": 24265792} +{"current_steps": 36010, "total_steps": 204665, "loss": 0.2048, "lr": 1.9650723597710078e-06, "epoch": 0.8797302909632815, "percentage": 17.59, "elapsed_time": "0:46:00", "remaining_time": "3:35:30", "throughput": 8790.61, "total_tokens": 24268800} +{"current_steps": 36015, "total_steps": 204665, "loss": 0.1781, "lr": 1.965050014940177e-06, "epoch": 0.8798524417951287, "percentage": 17.6, "elapsed_time": "0:46:01", "remaining_time": "3:35:29", "throughput": 8790.73, "total_tokens": 24272192} +{"current_steps": 36020, "total_steps": 204665, "loss": 0.0397, "lr": 1.9650276630912427e-06, "epoch": 0.8799745926269757, "percentage": 17.6, "elapsed_time": "0:46:01", "remaining_time": "3:35:29", "throughput": 8790.93, "total_tokens": 24275840} +{"current_steps": 36025, "total_steps": 204665, "loss": 0.1337, "lr": 1.965005304224368e-06, "epoch": 0.8800967434588229, "percentage": 17.6, "elapsed_time": "0:46:01", "remaining_time": "3:35:28", "throughput": 8791.07, "total_tokens": 24279296} +{"current_steps": 36030, "total_steps": 204665, "loss": 0.3578, "lr": 1.964982938339715e-06, "epoch": 0.8802188942906701, "percentage": 17.6, "elapsed_time": "0:46:02", "remaining_time": "3:35:27", "throughput": 8791.06, "total_tokens": 24282240} +{"current_steps": 36035, "total_steps": 204665, "loss": 0.0886, "lr": 1.9649605654374466e-06, "epoch": 0.8803410451225173, "percentage": 17.61, "elapsed_time": "0:46:02", "remaining_time": "3:35:27", "throughput": 8791.2, "total_tokens": 24285696} +{"current_steps": 36040, "total_steps": 204665, "loss": 0.0541, "lr": 1.964938185517725e-06, "epoch": 0.8804631959543644, "percentage": 17.61, "elapsed_time": "0:46:02", "remaining_time": "3:35:26", "throughput": 8791.21, "total_tokens": 24288704} +{"current_steps": 36045, "total_steps": 204665, "loss": 0.0778, "lr": 1.964915798580714e-06, "epoch": 0.8805853467862116, "percentage": 17.61, "elapsed_time": "0:46:03", "remaining_time": "3:35:26", "throughput": 8791.16, "total_tokens": 24291520} +{"current_steps": 36050, "total_steps": 204665, "loss": 0.1126, "lr": 1.9648934046265755e-06, "epoch": 0.8807074976180588, "percentage": 17.61, "elapsed_time": "0:46:03", "remaining_time": "3:35:25", "throughput": 8791.24, "total_tokens": 24294720} +{"current_steps": 36055, "total_steps": 204665, "loss": 0.1484, "lr": 1.9648710036554726e-06, "epoch": 0.880829648449906, "percentage": 17.62, "elapsed_time": "0:46:03", "remaining_time": "3:35:25", "throughput": 8791.32, "total_tokens": 24297984} +{"current_steps": 36060, "total_steps": 204665, "loss": 0.1794, "lr": 1.9648485956675683e-06, "epoch": 0.8809517992817532, "percentage": 17.62, "elapsed_time": "0:46:04", "remaining_time": "3:35:24", "throughput": 8791.43, "total_tokens": 24301312} +{"current_steps": 36065, "total_steps": 204665, "loss": 0.1554, "lr": 1.9648261806630255e-06, "epoch": 0.8810739501136002, "percentage": 17.62, "elapsed_time": "0:46:04", "remaining_time": "3:35:23", "throughput": 8791.54, "total_tokens": 24304640} +{"current_steps": 36070, "total_steps": 204665, "loss": 0.0985, "lr": 1.964803758642007e-06, "epoch": 0.8811961009454474, "percentage": 17.62, "elapsed_time": "0:46:04", "remaining_time": "3:35:23", "throughput": 8791.79, "total_tokens": 24308480} +{"current_steps": 36075, "total_steps": 204665, "loss": 0.1324, "lr": 1.9647813296046766e-06, "epoch": 0.8813182517772946, "percentage": 17.63, "elapsed_time": "0:46:05", "remaining_time": "3:35:22", "throughput": 8791.85, "total_tokens": 24311680} +{"current_steps": 36080, "total_steps": 204665, "loss": 0.1911, "lr": 1.964758893551196e-06, "epoch": 0.8814404026091418, "percentage": 17.63, "elapsed_time": "0:46:05", "remaining_time": "3:35:22", "throughput": 8791.91, "total_tokens": 24314880} +{"current_steps": 36085, "total_steps": 204665, "loss": 0.0895, "lr": 1.96473645048173e-06, "epoch": 0.8815625534409889, "percentage": 17.63, "elapsed_time": "0:46:05", "remaining_time": "3:35:21", "throughput": 8792.07, "total_tokens": 24318400} +{"current_steps": 36090, "total_steps": 204665, "loss": 0.105, "lr": 1.964714000396441e-06, "epoch": 0.8816847042728361, "percentage": 17.63, "elapsed_time": "0:46:06", "remaining_time": "3:35:21", "throughput": 8792.11, "total_tokens": 24321536} +{"current_steps": 36095, "total_steps": 204665, "loss": 0.0495, "lr": 1.964691543295492e-06, "epoch": 0.8818068551046833, "percentage": 17.64, "elapsed_time": "0:46:06", "remaining_time": "3:35:20", "throughput": 8792.22, "total_tokens": 24324864} +{"current_steps": 36100, "total_steps": 204665, "loss": 0.0606, "lr": 1.964669079179047e-06, "epoch": 0.8819290059365305, "percentage": 17.64, "elapsed_time": "0:46:06", "remaining_time": "3:35:20", "throughput": 8792.27, "total_tokens": 24328000} +{"current_steps": 36105, "total_steps": 204665, "loss": 0.0605, "lr": 1.9646466080472686e-06, "epoch": 0.8820511567683776, "percentage": 17.64, "elapsed_time": "0:46:07", "remaining_time": "3:35:19", "throughput": 8792.28, "total_tokens": 24331008} +{"current_steps": 36110, "total_steps": 204665, "loss": 0.1002, "lr": 1.9646241299003206e-06, "epoch": 0.8821733076002247, "percentage": 17.64, "elapsed_time": "0:46:07", "remaining_time": "3:35:18", "throughput": 8792.3, "total_tokens": 24334080} +{"current_steps": 36115, "total_steps": 204665, "loss": 0.1031, "lr": 1.9646016447383665e-06, "epoch": 0.8822954584320719, "percentage": 17.65, "elapsed_time": "0:46:08", "remaining_time": "3:35:18", "throughput": 8792.42, "total_tokens": 24337472} +{"current_steps": 36120, "total_steps": 204665, "loss": 0.1362, "lr": 1.96457915256157e-06, "epoch": 0.8824176092639191, "percentage": 17.65, "elapsed_time": "0:46:08", "remaining_time": "3:35:17", "throughput": 8792.57, "total_tokens": 24340928} +{"current_steps": 36125, "total_steps": 204665, "loss": 0.1261, "lr": 1.9645566533700945e-06, "epoch": 0.8825397600957663, "percentage": 17.65, "elapsed_time": "0:46:08", "remaining_time": "3:35:17", "throughput": 8792.74, "total_tokens": 24344512} +{"current_steps": 36130, "total_steps": 204665, "loss": 0.0493, "lr": 1.9645341471641036e-06, "epoch": 0.8826619109276134, "percentage": 17.65, "elapsed_time": "0:46:09", "remaining_time": "3:35:16", "throughput": 8792.83, "total_tokens": 24347776} +{"current_steps": 36135, "total_steps": 204665, "loss": 0.0642, "lr": 1.964511633943761e-06, "epoch": 0.8827840617594606, "percentage": 17.66, "elapsed_time": "0:46:09", "remaining_time": "3:35:16", "throughput": 8792.89, "total_tokens": 24350976} +{"current_steps": 36140, "total_steps": 204665, "loss": 0.1568, "lr": 1.9644891137092298e-06, "epoch": 0.8829062125913077, "percentage": 17.66, "elapsed_time": "0:46:09", "remaining_time": "3:35:15", "throughput": 8792.97, "total_tokens": 24354240} +{"current_steps": 36145, "total_steps": 204665, "loss": 0.0523, "lr": 1.9644665864606747e-06, "epoch": 0.8830283634231549, "percentage": 17.66, "elapsed_time": "0:46:10", "remaining_time": "3:35:15", "throughput": 8793.0, "total_tokens": 24357312} +{"current_steps": 36150, "total_steps": 204665, "loss": 0.1478, "lr": 1.9644440521982594e-06, "epoch": 0.8831505142550021, "percentage": 17.66, "elapsed_time": "0:46:10", "remaining_time": "3:35:14", "throughput": 8793.22, "total_tokens": 24361024} +{"current_steps": 36155, "total_steps": 204665, "loss": 0.1884, "lr": 1.9644215109221475e-06, "epoch": 0.8832726650868492, "percentage": 17.67, "elapsed_time": "0:46:10", "remaining_time": "3:35:13", "throughput": 8793.34, "total_tokens": 24364480} +{"current_steps": 36160, "total_steps": 204665, "loss": 0.1435, "lr": 1.9643989626325024e-06, "epoch": 0.8833948159186964, "percentage": 17.67, "elapsed_time": "0:46:11", "remaining_time": "3:35:13", "throughput": 8793.39, "total_tokens": 24367616} +{"current_steps": 36165, "total_steps": 204665, "loss": 0.2141, "lr": 1.9643764073294893e-06, "epoch": 0.8835169667505436, "percentage": 17.67, "elapsed_time": "0:46:11", "remaining_time": "3:35:12", "throughput": 8793.46, "total_tokens": 24370880} +{"current_steps": 36170, "total_steps": 204665, "loss": 0.1681, "lr": 1.9643538450132713e-06, "epoch": 0.8836391175823908, "percentage": 17.67, "elapsed_time": "0:46:11", "remaining_time": "3:35:12", "throughput": 8793.54, "total_tokens": 24374144} +{"current_steps": 36175, "total_steps": 204665, "loss": 0.0899, "lr": 1.964331275684013e-06, "epoch": 0.8837612684142379, "percentage": 17.68, "elapsed_time": "0:46:12", "remaining_time": "3:35:11", "throughput": 8793.61, "total_tokens": 24377344} +{"current_steps": 36180, "total_steps": 204665, "loss": 0.0782, "lr": 1.964308699341878e-06, "epoch": 0.883883419246085, "percentage": 17.68, "elapsed_time": "0:46:12", "remaining_time": "3:35:11", "throughput": 8793.66, "total_tokens": 24380480} +{"current_steps": 36185, "total_steps": 204665, "loss": 0.0687, "lr": 1.964286115987031e-06, "epoch": 0.8840055700779322, "percentage": 17.68, "elapsed_time": "0:46:12", "remaining_time": "3:35:10", "throughput": 8793.82, "total_tokens": 24384000} +{"current_steps": 36190, "total_steps": 204665, "loss": 0.0591, "lr": 1.9642635256196356e-06, "epoch": 0.8841277209097794, "percentage": 17.68, "elapsed_time": "0:46:13", "remaining_time": "3:35:10", "throughput": 8793.97, "total_tokens": 24387520} +{"current_steps": 36195, "total_steps": 204665, "loss": 0.1161, "lr": 1.9642409282398573e-06, "epoch": 0.8842498717416266, "percentage": 17.68, "elapsed_time": "0:46:13", "remaining_time": "3:35:09", "throughput": 8794.24, "total_tokens": 24391424} +{"current_steps": 36200, "total_steps": 204665, "loss": 0.1667, "lr": 1.964218323847859e-06, "epoch": 0.8843720225734737, "percentage": 17.69, "elapsed_time": "0:46:13", "remaining_time": "3:35:09", "throughput": 8794.28, "total_tokens": 24394560} +{"current_steps": 36205, "total_steps": 204665, "loss": 0.0912, "lr": 1.9641957124438058e-06, "epoch": 0.8844941734053209, "percentage": 17.69, "elapsed_time": "0:46:14", "remaining_time": "3:35:08", "throughput": 8794.44, "total_tokens": 24398080} +{"current_steps": 36210, "total_steps": 204665, "loss": 0.097, "lr": 1.9641730940278617e-06, "epoch": 0.8846163242371681, "percentage": 17.69, "elapsed_time": "0:46:14", "remaining_time": "3:35:07", "throughput": 8794.53, "total_tokens": 24401408} +{"current_steps": 36215, "total_steps": 204665, "loss": 0.1731, "lr": 1.964150468600192e-06, "epoch": 0.8847384750690153, "percentage": 17.69, "elapsed_time": "0:46:14", "remaining_time": "3:35:07", "throughput": 8794.63, "total_tokens": 24404736} +{"current_steps": 36220, "total_steps": 204665, "loss": 0.0174, "lr": 1.964127836160961e-06, "epoch": 0.8848606259008623, "percentage": 17.7, "elapsed_time": "0:46:15", "remaining_time": "3:35:06", "throughput": 8794.71, "total_tokens": 24408000} +{"current_steps": 36225, "total_steps": 204665, "loss": 0.1235, "lr": 1.964105196710332e-06, "epoch": 0.8849827767327095, "percentage": 17.7, "elapsed_time": "0:46:15", "remaining_time": "3:35:06", "throughput": 8794.75, "total_tokens": 24411136} +{"current_steps": 36230, "total_steps": 204665, "loss": 0.0469, "lr": 1.9640825502484716e-06, "epoch": 0.8851049275645567, "percentage": 17.7, "elapsed_time": "0:46:15", "remaining_time": "3:35:05", "throughput": 8794.93, "total_tokens": 24414720} +{"current_steps": 36235, "total_steps": 204665, "loss": 0.1697, "lr": 1.9640598967755435e-06, "epoch": 0.8852270783964039, "percentage": 17.7, "elapsed_time": "0:46:16", "remaining_time": "3:35:05", "throughput": 8795.05, "total_tokens": 24418112} +{"current_steps": 36240, "total_steps": 204665, "loss": 0.1409, "lr": 1.9640372362917128e-06, "epoch": 0.885349229228251, "percentage": 17.71, "elapsed_time": "0:46:16", "remaining_time": "3:35:04", "throughput": 8795.08, "total_tokens": 24421184} +{"current_steps": 36245, "total_steps": 204665, "loss": 0.0835, "lr": 1.964014568797143e-06, "epoch": 0.8854713800600982, "percentage": 17.71, "elapsed_time": "0:46:17", "remaining_time": "3:35:04", "throughput": 8795.17, "total_tokens": 24424448} +{"current_steps": 36250, "total_steps": 204665, "loss": 0.1017, "lr": 1.963991894292001e-06, "epoch": 0.8855935308919454, "percentage": 17.71, "elapsed_time": "0:46:17", "remaining_time": "3:35:03", "throughput": 8795.32, "total_tokens": 24427968} +{"current_steps": 36255, "total_steps": 204665, "loss": 0.1615, "lr": 1.9639692127764504e-06, "epoch": 0.8857156817237926, "percentage": 17.71, "elapsed_time": "0:46:17", "remaining_time": "3:35:03", "throughput": 8795.56, "total_tokens": 24431808} +{"current_steps": 36260, "total_steps": 204665, "loss": 0.1469, "lr": 1.9639465242506563e-06, "epoch": 0.8858378325556397, "percentage": 17.72, "elapsed_time": "0:46:18", "remaining_time": "3:35:02", "throughput": 8795.81, "total_tokens": 24435648} +{"current_steps": 36265, "total_steps": 204665, "loss": 0.084, "lr": 1.9639238287147836e-06, "epoch": 0.8859599833874868, "percentage": 17.72, "elapsed_time": "0:46:18", "remaining_time": "3:35:01", "throughput": 8795.83, "total_tokens": 24438720} +{"current_steps": 36270, "total_steps": 204665, "loss": 0.1053, "lr": 1.963901126168998e-06, "epoch": 0.886082134219334, "percentage": 17.72, "elapsed_time": "0:46:18", "remaining_time": "3:35:01", "throughput": 8795.98, "total_tokens": 24442240} +{"current_steps": 36275, "total_steps": 204665, "loss": 0.0492, "lr": 1.9638784166134636e-06, "epoch": 0.8862042850511812, "percentage": 17.72, "elapsed_time": "0:46:19", "remaining_time": "3:35:00", "throughput": 8795.97, "total_tokens": 24445184} +{"current_steps": 36280, "total_steps": 204665, "loss": 0.0825, "lr": 1.9638557000483463e-06, "epoch": 0.8863264358830284, "percentage": 17.73, "elapsed_time": "0:46:19", "remaining_time": "3:35:00", "throughput": 8796.28, "total_tokens": 24449280} +{"current_steps": 36285, "total_steps": 204665, "loss": 0.0363, "lr": 1.963832976473811e-06, "epoch": 0.8864485867148755, "percentage": 17.73, "elapsed_time": "0:46:19", "remaining_time": "3:34:59", "throughput": 8796.38, "total_tokens": 24452608} +{"current_steps": 36290, "total_steps": 204665, "loss": 0.0744, "lr": 1.9638102458900236e-06, "epoch": 0.8865707375467227, "percentage": 17.73, "elapsed_time": "0:46:20", "remaining_time": "3:34:59", "throughput": 8796.39, "total_tokens": 24455616} +{"current_steps": 36295, "total_steps": 204665, "loss": 0.0468, "lr": 1.963787508297148e-06, "epoch": 0.8866928883785699, "percentage": 17.73, "elapsed_time": "0:46:20", "remaining_time": "3:34:58", "throughput": 8796.54, "total_tokens": 24459072} +{"current_steps": 36300, "total_steps": 204665, "loss": 0.1084, "lr": 1.9637647636953513e-06, "epoch": 0.886815039210417, "percentage": 17.74, "elapsed_time": "0:46:20", "remaining_time": "3:34:58", "throughput": 8796.64, "total_tokens": 24462400} +{"current_steps": 36305, "total_steps": 204665, "loss": 0.1197, "lr": 1.9637420120847976e-06, "epoch": 0.8869371900422642, "percentage": 17.74, "elapsed_time": "0:46:21", "remaining_time": "3:34:57", "throughput": 8796.57, "total_tokens": 24465152} +{"current_steps": 36310, "total_steps": 204665, "loss": 0.0815, "lr": 1.9637192534656528e-06, "epoch": 0.8870593408741113, "percentage": 17.74, "elapsed_time": "0:46:21", "remaining_time": "3:34:56", "throughput": 8796.57, "total_tokens": 24468160} +{"current_steps": 36315, "total_steps": 204665, "loss": 0.1247, "lr": 1.9636964878380824e-06, "epoch": 0.8871814917059585, "percentage": 17.74, "elapsed_time": "0:46:21", "remaining_time": "3:34:56", "throughput": 8796.53, "total_tokens": 24470976} +{"current_steps": 36320, "total_steps": 204665, "loss": 0.063, "lr": 1.963673715202252e-06, "epoch": 0.8873036425378057, "percentage": 17.75, "elapsed_time": "0:46:22", "remaining_time": "3:34:55", "throughput": 8796.77, "total_tokens": 24474752} +{"current_steps": 36325, "total_steps": 204665, "loss": 0.1894, "lr": 1.9636509355583268e-06, "epoch": 0.8874257933696529, "percentage": 17.75, "elapsed_time": "0:46:22", "remaining_time": "3:34:55", "throughput": 8796.98, "total_tokens": 24478464} +{"current_steps": 36330, "total_steps": 204665, "loss": 0.0863, "lr": 1.9636281489064732e-06, "epoch": 0.8875479442015, "percentage": 17.75, "elapsed_time": "0:46:22", "remaining_time": "3:34:54", "throughput": 8797.09, "total_tokens": 24481856} +{"current_steps": 36335, "total_steps": 204665, "loss": 0.122, "lr": 1.9636053552468565e-06, "epoch": 0.8876700950333472, "percentage": 17.75, "elapsed_time": "0:46:23", "remaining_time": "3:34:54", "throughput": 8797.16, "total_tokens": 24485120} +{"current_steps": 36340, "total_steps": 204665, "loss": 0.0674, "lr": 1.963582554579642e-06, "epoch": 0.8877922458651943, "percentage": 17.76, "elapsed_time": "0:46:23", "remaining_time": "3:34:53", "throughput": 8797.29, "total_tokens": 24488576} +{"current_steps": 36345, "total_steps": 204665, "loss": 0.1177, "lr": 1.9635597469049963e-06, "epoch": 0.8879143966970415, "percentage": 17.76, "elapsed_time": "0:46:24", "remaining_time": "3:34:53", "throughput": 8797.44, "total_tokens": 24492096} +{"current_steps": 36350, "total_steps": 204665, "loss": 0.2048, "lr": 1.9635369322230852e-06, "epoch": 0.8880365475288887, "percentage": 17.76, "elapsed_time": "0:46:24", "remaining_time": "3:34:52", "throughput": 8797.54, "total_tokens": 24495424} +{"current_steps": 36355, "total_steps": 204665, "loss": 0.1205, "lr": 1.9635141105340742e-06, "epoch": 0.8881586983607358, "percentage": 17.76, "elapsed_time": "0:46:24", "remaining_time": "3:34:52", "throughput": 8797.68, "total_tokens": 24498880} +{"current_steps": 36360, "total_steps": 204665, "loss": 0.1286, "lr": 1.9634912818381293e-06, "epoch": 0.888280849192583, "percentage": 17.77, "elapsed_time": "0:46:25", "remaining_time": "3:34:51", "throughput": 8797.82, "total_tokens": 24502336} +{"current_steps": 36365, "total_steps": 204665, "loss": 0.2572, "lr": 1.963468446135417e-06, "epoch": 0.8884030000244302, "percentage": 17.77, "elapsed_time": "0:46:25", "remaining_time": "3:34:50", "throughput": 8797.84, "total_tokens": 24505408} +{"current_steps": 36370, "total_steps": 204665, "loss": 0.1699, "lr": 1.9634456034261025e-06, "epoch": 0.8885251508562774, "percentage": 17.77, "elapsed_time": "0:46:25", "remaining_time": "3:34:50", "throughput": 8798.03, "total_tokens": 24509056} +{"current_steps": 36375, "total_steps": 204665, "loss": 0.0129, "lr": 1.963422753710353e-06, "epoch": 0.8886473016881244, "percentage": 17.77, "elapsed_time": "0:46:26", "remaining_time": "3:34:49", "throughput": 8798.06, "total_tokens": 24512128} +{"current_steps": 36380, "total_steps": 204665, "loss": 0.1042, "lr": 1.9633998969883335e-06, "epoch": 0.8887694525199716, "percentage": 17.78, "elapsed_time": "0:46:26", "remaining_time": "3:34:49", "throughput": 8798.2, "total_tokens": 24515648} +{"current_steps": 36385, "total_steps": 204665, "loss": 0.0286, "lr": 1.963377033260211e-06, "epoch": 0.8888916033518188, "percentage": 17.78, "elapsed_time": "0:46:26", "remaining_time": "3:34:48", "throughput": 8798.26, "total_tokens": 24518848} +{"current_steps": 36390, "total_steps": 204665, "loss": 0.0204, "lr": 1.963354162526152e-06, "epoch": 0.889013754183666, "percentage": 17.78, "elapsed_time": "0:46:27", "remaining_time": "3:34:48", "throughput": 8798.61, "total_tokens": 24523072} +{"current_steps": 36395, "total_steps": 204665, "loss": 0.1326, "lr": 1.963331284786322e-06, "epoch": 0.8891359050155132, "percentage": 17.78, "elapsed_time": "0:46:27", "remaining_time": "3:34:47", "throughput": 8798.87, "total_tokens": 24526976} +{"current_steps": 36400, "total_steps": 204665, "loss": 0.1465, "lr": 1.9633084000408883e-06, "epoch": 0.8892580558473603, "percentage": 17.79, "elapsed_time": "0:46:27", "remaining_time": "3:34:47", "throughput": 8798.89, "total_tokens": 24529984} +{"current_steps": 36405, "total_steps": 204665, "loss": 0.0548, "lr": 1.9632855082900163e-06, "epoch": 0.8893802066792075, "percentage": 17.79, "elapsed_time": "0:46:28", "remaining_time": "3:34:46", "throughput": 8798.93, "total_tokens": 24533120} +{"current_steps": 36410, "total_steps": 204665, "loss": 0.0965, "lr": 1.9632626095338735e-06, "epoch": 0.8895023575110547, "percentage": 17.79, "elapsed_time": "0:46:28", "remaining_time": "3:34:46", "throughput": 8799.06, "total_tokens": 24536576} +{"current_steps": 36415, "total_steps": 204665, "loss": 0.0375, "lr": 1.963239703772625e-06, "epoch": 0.8896245083429019, "percentage": 17.79, "elapsed_time": "0:46:28", "remaining_time": "3:34:45", "throughput": 8799.17, "total_tokens": 24539968} +{"current_steps": 36420, "total_steps": 204665, "loss": 0.1732, "lr": 1.963216791006439e-06, "epoch": 0.8897466591747489, "percentage": 17.79, "elapsed_time": "0:46:29", "remaining_time": "3:34:45", "throughput": 8799.27, "total_tokens": 24543296} +{"current_steps": 36425, "total_steps": 204665, "loss": 0.1472, "lr": 1.9631938712354815e-06, "epoch": 0.8898688100065961, "percentage": 17.8, "elapsed_time": "0:46:29", "remaining_time": "3:34:44", "throughput": 8799.3, "total_tokens": 24546368} +{"current_steps": 36430, "total_steps": 204665, "loss": 0.2331, "lr": 1.9631709444599187e-06, "epoch": 0.8899909608384433, "percentage": 17.8, "elapsed_time": "0:46:29", "remaining_time": "3:34:44", "throughput": 8799.47, "total_tokens": 24549952} +{"current_steps": 36435, "total_steps": 204665, "loss": 0.1199, "lr": 1.963148010679918e-06, "epoch": 0.8901131116702905, "percentage": 17.8, "elapsed_time": "0:46:30", "remaining_time": "3:34:43", "throughput": 8799.51, "total_tokens": 24553088} +{"current_steps": 36440, "total_steps": 204665, "loss": 0.1703, "lr": 1.963125069895646e-06, "epoch": 0.8902352625021377, "percentage": 17.8, "elapsed_time": "0:46:30", "remaining_time": "3:34:42", "throughput": 8799.57, "total_tokens": 24556288} +{"current_steps": 36445, "total_steps": 204665, "loss": 0.0402, "lr": 1.9631021221072693e-06, "epoch": 0.8903574133339848, "percentage": 17.81, "elapsed_time": "0:46:30", "remaining_time": "3:34:42", "throughput": 8799.58, "total_tokens": 24559360} +{"current_steps": 36450, "total_steps": 204665, "loss": 0.0887, "lr": 1.9630791673149546e-06, "epoch": 0.890479564165832, "percentage": 17.81, "elapsed_time": "0:46:31", "remaining_time": "3:34:41", "throughput": 8799.66, "total_tokens": 24562624} +{"current_steps": 36455, "total_steps": 204665, "loss": 0.0766, "lr": 1.9630562055188697e-06, "epoch": 0.8906017149976792, "percentage": 17.81, "elapsed_time": "0:46:31", "remaining_time": "3:34:41", "throughput": 8799.87, "total_tokens": 24566336} +{"current_steps": 36460, "total_steps": 204665, "loss": 0.1509, "lr": 1.963033236719181e-06, "epoch": 0.8907238658295263, "percentage": 17.81, "elapsed_time": "0:46:32", "remaining_time": "3:34:40", "throughput": 8800.02, "total_tokens": 24569856} +{"current_steps": 36465, "total_steps": 204665, "loss": 0.0133, "lr": 1.963010260916055e-06, "epoch": 0.8908460166613734, "percentage": 17.82, "elapsed_time": "0:46:32", "remaining_time": "3:34:40", "throughput": 8800.19, "total_tokens": 24573440} +{"current_steps": 36470, "total_steps": 204665, "loss": 0.214, "lr": 1.9629872781096597e-06, "epoch": 0.8909681674932206, "percentage": 17.82, "elapsed_time": "0:46:32", "remaining_time": "3:34:39", "throughput": 8800.29, "total_tokens": 24576768} +{"current_steps": 36475, "total_steps": 204665, "loss": 0.2347, "lr": 1.9629642883001624e-06, "epoch": 0.8910903183250678, "percentage": 17.82, "elapsed_time": "0:46:33", "remaining_time": "3:34:39", "throughput": 8800.32, "total_tokens": 24579904} +{"current_steps": 36480, "total_steps": 204665, "loss": 0.0076, "lr": 1.962941291487729e-06, "epoch": 0.891212469156915, "percentage": 17.82, "elapsed_time": "0:46:33", "remaining_time": "3:34:38", "throughput": 8800.37, "total_tokens": 24583040} +{"current_steps": 36485, "total_steps": 204665, "loss": 0.1516, "lr": 1.962918287672528e-06, "epoch": 0.8913346199887622, "percentage": 17.83, "elapsed_time": "0:46:33", "remaining_time": "3:34:38", "throughput": 8800.48, "total_tokens": 24586432} +{"current_steps": 36490, "total_steps": 204665, "loss": 0.1095, "lr": 1.962895276854726e-06, "epoch": 0.8914567708206093, "percentage": 17.83, "elapsed_time": "0:46:34", "remaining_time": "3:34:37", "throughput": 8800.67, "total_tokens": 24590080} +{"current_steps": 36495, "total_steps": 204665, "loss": 0.1351, "lr": 1.9628722590344905e-06, "epoch": 0.8915789216524564, "percentage": 17.83, "elapsed_time": "0:46:34", "remaining_time": "3:34:37", "throughput": 8800.91, "total_tokens": 24593920} +{"current_steps": 36500, "total_steps": 204665, "loss": 0.1019, "lr": 1.9628492342119892e-06, "epoch": 0.8917010724843036, "percentage": 17.83, "elapsed_time": "0:46:34", "remaining_time": "3:34:36", "throughput": 8801.09, "total_tokens": 24597568} +{"current_steps": 36505, "total_steps": 204665, "loss": 0.1333, "lr": 1.9628262023873893e-06, "epoch": 0.8918232233161508, "percentage": 17.84, "elapsed_time": "0:46:35", "remaining_time": "3:34:35", "throughput": 8801.19, "total_tokens": 24600896} +{"current_steps": 36510, "total_steps": 204665, "loss": 0.0895, "lr": 1.962803163560858e-06, "epoch": 0.8919453741479979, "percentage": 17.84, "elapsed_time": "0:46:35", "remaining_time": "3:34:35", "throughput": 8801.31, "total_tokens": 24604288} +{"current_steps": 36515, "total_steps": 204665, "loss": 0.14, "lr": 1.9627801177325635e-06, "epoch": 0.8920675249798451, "percentage": 17.84, "elapsed_time": "0:46:35", "remaining_time": "3:34:34", "throughput": 8801.39, "total_tokens": 24607552} +{"current_steps": 36520, "total_steps": 204665, "loss": 0.1, "lr": 1.9627570649026726e-06, "epoch": 0.8921896758116923, "percentage": 17.84, "elapsed_time": "0:46:36", "remaining_time": "3:34:34", "throughput": 8801.5, "total_tokens": 24610944} +{"current_steps": 36525, "total_steps": 204665, "loss": 0.0558, "lr": 1.9627340050713535e-06, "epoch": 0.8923118266435395, "percentage": 17.85, "elapsed_time": "0:46:36", "remaining_time": "3:34:33", "throughput": 8801.53, "total_tokens": 24614016} +{"current_steps": 36530, "total_steps": 204665, "loss": 0.1489, "lr": 1.9627109382387743e-06, "epoch": 0.8924339774753866, "percentage": 17.85, "elapsed_time": "0:46:36", "remaining_time": "3:34:33", "throughput": 8801.63, "total_tokens": 24617344} +{"current_steps": 36535, "total_steps": 204665, "loss": 0.1632, "lr": 1.9626878644051014e-06, "epoch": 0.8925561283072337, "percentage": 17.85, "elapsed_time": "0:46:37", "remaining_time": "3:34:32", "throughput": 8801.92, "total_tokens": 24621376} +{"current_steps": 36540, "total_steps": 204665, "loss": 0.1333, "lr": 1.962664783570504e-06, "epoch": 0.8926782791390809, "percentage": 17.85, "elapsed_time": "0:46:37", "remaining_time": "3:34:32", "throughput": 8802.07, "total_tokens": 24624896} +{"current_steps": 36545, "total_steps": 204665, "loss": 0.0889, "lr": 1.962641695735149e-06, "epoch": 0.8928004299709281, "percentage": 17.86, "elapsed_time": "0:46:37", "remaining_time": "3:34:31", "throughput": 8802.13, "total_tokens": 24628096} +{"current_steps": 36550, "total_steps": 204665, "loss": 0.0207, "lr": 1.962618600899205e-06, "epoch": 0.8929225808027753, "percentage": 17.86, "elapsed_time": "0:46:38", "remaining_time": "3:34:31", "throughput": 8802.2, "total_tokens": 24631296} +{"current_steps": 36555, "total_steps": 204665, "loss": 0.0303, "lr": 1.9625954990628396e-06, "epoch": 0.8930447316346224, "percentage": 17.86, "elapsed_time": "0:46:38", "remaining_time": "3:34:30", "throughput": 8802.28, "total_tokens": 24634560} +{"current_steps": 36560, "total_steps": 204665, "loss": 0.1629, "lr": 1.9625723902262205e-06, "epoch": 0.8931668824664696, "percentage": 17.86, "elapsed_time": "0:46:39", "remaining_time": "3:34:29", "throughput": 8802.43, "total_tokens": 24638016} +{"current_steps": 36565, "total_steps": 204665, "loss": 0.0578, "lr": 1.9625492743895166e-06, "epoch": 0.8932890332983168, "percentage": 17.87, "elapsed_time": "0:46:39", "remaining_time": "3:34:29", "throughput": 8802.58, "total_tokens": 24641536} +{"current_steps": 36570, "total_steps": 204665, "loss": 0.0397, "lr": 1.962526151552895e-06, "epoch": 0.893411184130164, "percentage": 17.87, "elapsed_time": "0:46:39", "remaining_time": "3:34:28", "throughput": 8802.59, "total_tokens": 24644608} +{"current_steps": 36575, "total_steps": 204665, "loss": 0.1283, "lr": 1.9625030217165243e-06, "epoch": 0.893533334962011, "percentage": 17.87, "elapsed_time": "0:46:40", "remaining_time": "3:34:28", "throughput": 8802.7, "total_tokens": 24648000} +{"current_steps": 36580, "total_steps": 204665, "loss": 0.0591, "lr": 1.962479884880573e-06, "epoch": 0.8936554857938582, "percentage": 17.87, "elapsed_time": "0:46:40", "remaining_time": "3:34:27", "throughput": 8802.9, "total_tokens": 24651712} +{"current_steps": 36585, "total_steps": 204665, "loss": 0.1834, "lr": 1.962456741045209e-06, "epoch": 0.8937776366257054, "percentage": 17.88, "elapsed_time": "0:46:40", "remaining_time": "3:34:27", "throughput": 8802.84, "total_tokens": 24654528} +{"current_steps": 36590, "total_steps": 204665, "loss": 0.0691, "lr": 1.9624335902106005e-06, "epoch": 0.8938997874575526, "percentage": 17.88, "elapsed_time": "0:46:41", "remaining_time": "3:34:26", "throughput": 8802.95, "total_tokens": 24657920} +{"current_steps": 36595, "total_steps": 204665, "loss": 0.096, "lr": 1.9624104323769167e-06, "epoch": 0.8940219382893998, "percentage": 17.88, "elapsed_time": "0:46:41", "remaining_time": "3:34:26", "throughput": 8803.16, "total_tokens": 24661632} +{"current_steps": 36600, "total_steps": 204665, "loss": 0.1534, "lr": 1.962387267544325e-06, "epoch": 0.8941440891212469, "percentage": 17.88, "elapsed_time": "0:46:41", "remaining_time": "3:34:25", "throughput": 8803.24, "total_tokens": 24664960} +{"current_steps": 36605, "total_steps": 204665, "loss": 0.1002, "lr": 1.962364095712994e-06, "epoch": 0.8942662399530941, "percentage": 17.89, "elapsed_time": "0:46:42", "remaining_time": "3:34:25", "throughput": 8803.39, "total_tokens": 24668480} +{"current_steps": 36610, "total_steps": 204665, "loss": 0.1503, "lr": 1.962340916883093e-06, "epoch": 0.8943883907849413, "percentage": 17.89, "elapsed_time": "0:46:42", "remaining_time": "3:34:24", "throughput": 8803.56, "total_tokens": 24672064} +{"current_steps": 36615, "total_steps": 204665, "loss": 0.0608, "lr": 1.9623177310547898e-06, "epoch": 0.8945105416167884, "percentage": 17.89, "elapsed_time": "0:46:42", "remaining_time": "3:34:24", "throughput": 8803.66, "total_tokens": 24675392} +{"current_steps": 36620, "total_steps": 204665, "loss": 0.2064, "lr": 1.9622945382282526e-06, "epoch": 0.8946326924486355, "percentage": 17.89, "elapsed_time": "0:46:43", "remaining_time": "3:34:23", "throughput": 8803.7, "total_tokens": 24678528} +{"current_steps": 36625, "total_steps": 204665, "loss": 0.1047, "lr": 1.9622713384036517e-06, "epoch": 0.8947548432804827, "percentage": 17.9, "elapsed_time": "0:46:43", "remaining_time": "3:34:23", "throughput": 8803.84, "total_tokens": 24681984} +{"current_steps": 36630, "total_steps": 204665, "loss": 0.0891, "lr": 1.962248131581154e-06, "epoch": 0.8948769941123299, "percentage": 17.9, "elapsed_time": "0:46:43", "remaining_time": "3:34:22", "throughput": 8803.96, "total_tokens": 24685376} +{"current_steps": 36635, "total_steps": 204665, "loss": 0.0213, "lr": 1.962224917760929e-06, "epoch": 0.8949991449441771, "percentage": 17.9, "elapsed_time": "0:46:44", "remaining_time": "3:34:21", "throughput": 8804.01, "total_tokens": 24688576} +{"current_steps": 36640, "total_steps": 204665, "loss": 0.1229, "lr": 1.9622016969431465e-06, "epoch": 0.8951212957760243, "percentage": 17.9, "elapsed_time": "0:46:44", "remaining_time": "3:34:21", "throughput": 8804.18, "total_tokens": 24692160} +{"current_steps": 36645, "total_steps": 204665, "loss": 0.0524, "lr": 1.9621784691279737e-06, "epoch": 0.8952434466078714, "percentage": 17.9, "elapsed_time": "0:46:44", "remaining_time": "3:34:20", "throughput": 8804.26, "total_tokens": 24695424} +{"current_steps": 36650, "total_steps": 204665, "loss": 0.0897, "lr": 1.962155234315581e-06, "epoch": 0.8953655974397186, "percentage": 17.91, "elapsed_time": "0:46:45", "remaining_time": "3:34:20", "throughput": 8804.28, "total_tokens": 24698496} +{"current_steps": 36655, "total_steps": 204665, "loss": 0.1008, "lr": 1.962131992506136e-06, "epoch": 0.8954877482715657, "percentage": 17.91, "elapsed_time": "0:46:45", "remaining_time": "3:34:19", "throughput": 8804.27, "total_tokens": 24701440} +{"current_steps": 36660, "total_steps": 204665, "loss": 0.1414, "lr": 1.9621087436998083e-06, "epoch": 0.8956098991034129, "percentage": 17.91, "elapsed_time": "0:46:45", "remaining_time": "3:34:19", "throughput": 8804.31, "total_tokens": 24704576} +{"current_steps": 36665, "total_steps": 204665, "loss": 0.0464, "lr": 1.9620854878967675e-06, "epoch": 0.89573204993526, "percentage": 17.91, "elapsed_time": "0:46:46", "remaining_time": "3:34:18", "throughput": 8804.26, "total_tokens": 24707392} +{"current_steps": 36670, "total_steps": 204665, "loss": 0.1001, "lr": 1.9620622250971817e-06, "epoch": 0.8958542007671072, "percentage": 17.92, "elapsed_time": "0:46:46", "remaining_time": "3:34:18", "throughput": 8804.42, "total_tokens": 24710976} +{"current_steps": 36675, "total_steps": 204665, "loss": 0.1594, "lr": 1.9620389553012214e-06, "epoch": 0.8959763515989544, "percentage": 17.92, "elapsed_time": "0:46:46", "remaining_time": "3:34:17", "throughput": 8804.43, "total_tokens": 24713984} +{"current_steps": 36680, "total_steps": 204665, "loss": 0.0624, "lr": 1.9620156785090544e-06, "epoch": 0.8960985024308016, "percentage": 17.92, "elapsed_time": "0:46:47", "remaining_time": "3:34:16", "throughput": 8804.58, "total_tokens": 24717504} +{"current_steps": 36685, "total_steps": 204665, "loss": 0.1965, "lr": 1.961992394720851e-06, "epoch": 0.8962206532626488, "percentage": 17.92, "elapsed_time": "0:46:47", "remaining_time": "3:34:16", "throughput": 8804.73, "total_tokens": 24721024} +{"current_steps": 36690, "total_steps": 204665, "loss": 0.0845, "lr": 1.9619691039367798e-06, "epoch": 0.8963428040944958, "percentage": 17.93, "elapsed_time": "0:46:48", "remaining_time": "3:34:15", "throughput": 8804.98, "total_tokens": 24724928} +{"current_steps": 36695, "total_steps": 204665, "loss": 0.0655, "lr": 1.961945806157011e-06, "epoch": 0.896464954926343, "percentage": 17.93, "elapsed_time": "0:46:48", "remaining_time": "3:34:15", "throughput": 8805.13, "total_tokens": 24728448} +{"current_steps": 36700, "total_steps": 204665, "loss": 0.1331, "lr": 1.9619225013817133e-06, "epoch": 0.8965871057581902, "percentage": 17.93, "elapsed_time": "0:46:48", "remaining_time": "3:34:14", "throughput": 8805.3, "total_tokens": 24732032} +{"current_steps": 36705, "total_steps": 204665, "loss": 0.0997, "lr": 1.9618991896110565e-06, "epoch": 0.8967092565900374, "percentage": 17.93, "elapsed_time": "0:46:49", "remaining_time": "3:34:14", "throughput": 8805.44, "total_tokens": 24735488} +{"current_steps": 36710, "total_steps": 204665, "loss": 0.0924, "lr": 1.96187587084521e-06, "epoch": 0.8968314074218845, "percentage": 17.94, "elapsed_time": "0:46:49", "remaining_time": "3:34:13", "throughput": 8805.59, "total_tokens": 24739008} +{"current_steps": 36715, "total_steps": 204665, "loss": 0.0832, "lr": 1.9618525450843432e-06, "epoch": 0.8969535582537317, "percentage": 17.94, "elapsed_time": "0:46:49", "remaining_time": "3:34:13", "throughput": 8805.61, "total_tokens": 24742080} +{"current_steps": 36720, "total_steps": 204665, "loss": 0.2628, "lr": 1.9618292123286264e-06, "epoch": 0.8970757090855789, "percentage": 17.94, "elapsed_time": "0:46:50", "remaining_time": "3:34:12", "throughput": 8805.81, "total_tokens": 24745792} +{"current_steps": 36725, "total_steps": 204665, "loss": 0.0718, "lr": 1.961805872578229e-06, "epoch": 0.8971978599174261, "percentage": 17.94, "elapsed_time": "0:46:50", "remaining_time": "3:34:12", "throughput": 8805.85, "total_tokens": 24748928} +{"current_steps": 36730, "total_steps": 204665, "loss": 0.0482, "lr": 1.9617825258333204e-06, "epoch": 0.8973200107492733, "percentage": 17.95, "elapsed_time": "0:46:50", "remaining_time": "3:34:11", "throughput": 8805.96, "total_tokens": 24752256} +{"current_steps": 36735, "total_steps": 204665, "loss": 0.049, "lr": 1.9617591720940703e-06, "epoch": 0.8974421615811203, "percentage": 17.95, "elapsed_time": "0:46:51", "remaining_time": "3:34:11", "throughput": 8806.11, "total_tokens": 24755776} +{"current_steps": 36740, "total_steps": 204665, "loss": 0.0901, "lr": 1.961735811360649e-06, "epoch": 0.8975643124129675, "percentage": 17.95, "elapsed_time": "0:46:51", "remaining_time": "3:34:10", "throughput": 8806.25, "total_tokens": 24759296} +{"current_steps": 36745, "total_steps": 204665, "loss": 0.1186, "lr": 1.9617124436332263e-06, "epoch": 0.8976864632448147, "percentage": 17.95, "elapsed_time": "0:46:51", "remaining_time": "3:34:10", "throughput": 8806.36, "total_tokens": 24762688} +{"current_steps": 36750, "total_steps": 204665, "loss": 0.1298, "lr": 1.961689068911972e-06, "epoch": 0.8978086140766619, "percentage": 17.96, "elapsed_time": "0:46:52", "remaining_time": "3:34:09", "throughput": 8806.43, "total_tokens": 24765952} +{"current_steps": 36755, "total_steps": 204665, "loss": 0.0264, "lr": 1.9616656871970562e-06, "epoch": 0.897930764908509, "percentage": 17.96, "elapsed_time": "0:46:52", "remaining_time": "3:34:09", "throughput": 8806.58, "total_tokens": 24769472} +{"current_steps": 36760, "total_steps": 204665, "loss": 0.1623, "lr": 1.9616422984886485e-06, "epoch": 0.8980529157403562, "percentage": 17.96, "elapsed_time": "0:46:52", "remaining_time": "3:34:08", "throughput": 8806.78, "total_tokens": 24773184} +{"current_steps": 36765, "total_steps": 204665, "loss": 0.2083, "lr": 1.96161890278692e-06, "epoch": 0.8981750665722034, "percentage": 17.96, "elapsed_time": "0:46:53", "remaining_time": "3:34:07", "throughput": 8806.82, "total_tokens": 24776320} +{"current_steps": 36770, "total_steps": 204665, "loss": 0.0645, "lr": 1.9615955000920396e-06, "epoch": 0.8982972174040506, "percentage": 17.97, "elapsed_time": "0:46:53", "remaining_time": "3:34:07", "throughput": 8806.95, "total_tokens": 24779776} +{"current_steps": 36775, "total_steps": 204665, "loss": 0.1415, "lr": 1.9615720904041785e-06, "epoch": 0.8984193682358977, "percentage": 17.97, "elapsed_time": "0:46:54", "remaining_time": "3:34:06", "throughput": 8807.0, "total_tokens": 24782976} +{"current_steps": 36780, "total_steps": 204665, "loss": 0.1298, "lr": 1.9615486737235064e-06, "epoch": 0.8985415190677448, "percentage": 17.97, "elapsed_time": "0:46:54", "remaining_time": "3:34:06", "throughput": 8807.06, "total_tokens": 24786176} +{"current_steps": 36785, "total_steps": 204665, "loss": 0.0636, "lr": 1.9615252500501936e-06, "epoch": 0.898663669899592, "percentage": 17.97, "elapsed_time": "0:46:54", "remaining_time": "3:34:05", "throughput": 8807.25, "total_tokens": 24789824} +{"current_steps": 36790, "total_steps": 204665, "loss": 0.0881, "lr": 1.9615018193844108e-06, "epoch": 0.8987858207314392, "percentage": 17.98, "elapsed_time": "0:46:55", "remaining_time": "3:34:05", "throughput": 8807.38, "total_tokens": 24793280} +{"current_steps": 36795, "total_steps": 204665, "loss": 0.1375, "lr": 1.961478381726328e-06, "epoch": 0.8989079715632864, "percentage": 17.98, "elapsed_time": "0:46:55", "remaining_time": "3:34:04", "throughput": 8807.53, "total_tokens": 24796800} +{"current_steps": 36800, "total_steps": 204665, "loss": 0.1272, "lr": 1.9614549370761153e-06, "epoch": 0.8990301223951335, "percentage": 17.98, "elapsed_time": "0:46:55", "remaining_time": "3:34:04", "throughput": 8807.61, "total_tokens": 24800064} +{"current_steps": 36805, "total_steps": 204665, "loss": 0.1704, "lr": 1.9614314854339445e-06, "epoch": 0.8991522732269807, "percentage": 17.98, "elapsed_time": "0:46:56", "remaining_time": "3:34:03", "throughput": 8807.72, "total_tokens": 24803456} +{"current_steps": 36810, "total_steps": 204665, "loss": 0.0022, "lr": 1.961408026799985e-06, "epoch": 0.8992744240588278, "percentage": 17.99, "elapsed_time": "0:46:56", "remaining_time": "3:34:03", "throughput": 8807.79, "total_tokens": 24806656} +{"current_steps": 36815, "total_steps": 204665, "loss": 0.1131, "lr": 1.961384561174408e-06, "epoch": 0.899396574890675, "percentage": 17.99, "elapsed_time": "0:46:56", "remaining_time": "3:34:02", "throughput": 8807.76, "total_tokens": 24809536} +{"current_steps": 36820, "total_steps": 204665, "loss": 0.0057, "lr": 1.9613610885573835e-06, "epoch": 0.8995187257225221, "percentage": 17.99, "elapsed_time": "0:46:57", "remaining_time": "3:34:01", "throughput": 8807.83, "total_tokens": 24812800} +{"current_steps": 36825, "total_steps": 204665, "loss": 0.1304, "lr": 1.961337608949083e-06, "epoch": 0.8996408765543693, "percentage": 17.99, "elapsed_time": "0:46:57", "remaining_time": "3:34:01", "throughput": 8807.97, "total_tokens": 24816320} +{"current_steps": 36830, "total_steps": 204665, "loss": 0.1914, "lr": 1.9613141223496763e-06, "epoch": 0.8997630273862165, "percentage": 18.0, "elapsed_time": "0:46:57", "remaining_time": "3:34:00", "throughput": 8807.99, "total_tokens": 24819392} +{"current_steps": 36835, "total_steps": 204665, "loss": 0.0568, "lr": 1.961290628759335e-06, "epoch": 0.8998851782180637, "percentage": 18.0, "elapsed_time": "0:46:58", "remaining_time": "3:34:00", "throughput": 8808.05, "total_tokens": 24822592} +{"current_steps": 36840, "total_steps": 204665, "loss": 0.0487, "lr": 1.9612671281782297e-06, "epoch": 0.9000073290499109, "percentage": 18.0, "elapsed_time": "0:46:58", "remaining_time": "3:33:59", "throughput": 8808.07, "total_tokens": 24825664} +{"current_steps": 36845, "total_steps": 204665, "loss": 0.1083, "lr": 1.9612436206065313e-06, "epoch": 0.900129479881758, "percentage": 18.0, "elapsed_time": "0:46:58", "remaining_time": "3:33:59", "throughput": 8808.13, "total_tokens": 24828864} +{"current_steps": 36850, "total_steps": 204665, "loss": 0.1028, "lr": 1.961220106044411e-06, "epoch": 0.9002516307136051, "percentage": 18.01, "elapsed_time": "0:46:59", "remaining_time": "3:33:58", "throughput": 8808.24, "total_tokens": 24832256} +{"current_steps": 36855, "total_steps": 204665, "loss": 0.076, "lr": 1.961196584492039e-06, "epoch": 0.9003737815454523, "percentage": 18.01, "elapsed_time": "0:46:59", "remaining_time": "3:33:58", "throughput": 8808.32, "total_tokens": 24835520} +{"current_steps": 36860, "total_steps": 204665, "loss": 0.071, "lr": 1.9611730559495876e-06, "epoch": 0.9004959323772995, "percentage": 18.01, "elapsed_time": "0:46:59", "remaining_time": "3:33:57", "throughput": 8808.4, "total_tokens": 24838784} +{"current_steps": 36865, "total_steps": 204665, "loss": 0.2169, "lr": 1.9611495204172266e-06, "epoch": 0.9006180832091466, "percentage": 18.01, "elapsed_time": "0:47:00", "remaining_time": "3:33:57", "throughput": 8808.57, "total_tokens": 24842368} +{"current_steps": 36870, "total_steps": 204665, "loss": 0.0943, "lr": 1.961125977895128e-06, "epoch": 0.9007402340409938, "percentage": 18.01, "elapsed_time": "0:47:00", "remaining_time": "3:33:56", "throughput": 8808.74, "total_tokens": 24845952} +{"current_steps": 36875, "total_steps": 204665, "loss": 0.147, "lr": 1.961102428383463e-06, "epoch": 0.900862384872841, "percentage": 18.02, "elapsed_time": "0:47:00", "remaining_time": "3:33:55", "throughput": 8808.81, "total_tokens": 24849216} +{"current_steps": 36880, "total_steps": 204665, "loss": 0.0963, "lr": 1.9610788718824024e-06, "epoch": 0.9009845357046882, "percentage": 18.02, "elapsed_time": "0:47:01", "remaining_time": "3:33:55", "throughput": 8809.05, "total_tokens": 24853056} +{"current_steps": 36885, "total_steps": 204665, "loss": 0.1742, "lr": 1.9610553083921176e-06, "epoch": 0.9011066865365354, "percentage": 18.02, "elapsed_time": "0:47:01", "remaining_time": "3:33:54", "throughput": 8809.16, "total_tokens": 24856448} +{"current_steps": 36890, "total_steps": 204665, "loss": 0.0552, "lr": 1.9610317379127803e-06, "epoch": 0.9012288373683824, "percentage": 18.02, "elapsed_time": "0:47:02", "remaining_time": "3:33:54", "throughput": 8809.18, "total_tokens": 24859520} +{"current_steps": 36895, "total_steps": 204665, "loss": 0.1553, "lr": 1.9610081604445618e-06, "epoch": 0.9013509882002296, "percentage": 18.03, "elapsed_time": "0:47:02", "remaining_time": "3:33:53", "throughput": 8809.22, "total_tokens": 24862656} +{"current_steps": 36900, "total_steps": 204665, "loss": 0.04, "lr": 1.9609845759876332e-06, "epoch": 0.9014731390320768, "percentage": 18.03, "elapsed_time": "0:47:02", "remaining_time": "3:33:53", "throughput": 8809.35, "total_tokens": 24866112} +{"current_steps": 36905, "total_steps": 204665, "loss": 0.1122, "lr": 1.9609609845421666e-06, "epoch": 0.901595289863924, "percentage": 18.03, "elapsed_time": "0:47:03", "remaining_time": "3:33:52", "throughput": 8809.41, "total_tokens": 24869312} +{"current_steps": 36910, "total_steps": 204665, "loss": 0.1537, "lr": 1.960937386108333e-06, "epoch": 0.9017174406957711, "percentage": 18.03, "elapsed_time": "0:47:03", "remaining_time": "3:33:52", "throughput": 8809.48, "total_tokens": 24872576} +{"current_steps": 36915, "total_steps": 204665, "loss": 0.0725, "lr": 1.9609137806863044e-06, "epoch": 0.9018395915276183, "percentage": 18.04, "elapsed_time": "0:47:03", "remaining_time": "3:33:51", "throughput": 8809.62, "total_tokens": 24876032} +{"current_steps": 36920, "total_steps": 204665, "loss": 0.2087, "lr": 1.9608901682762522e-06, "epoch": 0.9019617423594655, "percentage": 18.04, "elapsed_time": "0:47:04", "remaining_time": "3:33:51", "throughput": 8809.65, "total_tokens": 24879168} +{"current_steps": 36925, "total_steps": 204665, "loss": 0.1464, "lr": 1.9608665488783485e-06, "epoch": 0.9020838931913127, "percentage": 18.04, "elapsed_time": "0:47:04", "remaining_time": "3:33:50", "throughput": 8809.88, "total_tokens": 24883008} +{"current_steps": 36930, "total_steps": 204665, "loss": 0.1137, "lr": 1.960842922492765e-06, "epoch": 0.9022060440231598, "percentage": 18.04, "elapsed_time": "0:47:04", "remaining_time": "3:33:50", "throughput": 8809.99, "total_tokens": 24886400} +{"current_steps": 36935, "total_steps": 204665, "loss": 0.0908, "lr": 1.9608192891196725e-06, "epoch": 0.9023281948550069, "percentage": 18.05, "elapsed_time": "0:47:05", "remaining_time": "3:33:49", "throughput": 8810.24, "total_tokens": 24890240} +{"current_steps": 36940, "total_steps": 204665, "loss": 0.113, "lr": 1.9607956487592446e-06, "epoch": 0.9024503456868541, "percentage": 18.05, "elapsed_time": "0:47:05", "remaining_time": "3:33:49", "throughput": 8810.37, "total_tokens": 24893696} +{"current_steps": 36945, "total_steps": 204665, "loss": 0.0707, "lr": 1.960772001411652e-06, "epoch": 0.9025724965187013, "percentage": 18.05, "elapsed_time": "0:47:05", "remaining_time": "3:33:48", "throughput": 8810.48, "total_tokens": 24897088} +{"current_steps": 36950, "total_steps": 204665, "loss": 0.1639, "lr": 1.9607483470770667e-06, "epoch": 0.9026946473505485, "percentage": 18.05, "elapsed_time": "0:47:06", "remaining_time": "3:33:48", "throughput": 8810.53, "total_tokens": 24900288} +{"current_steps": 36955, "total_steps": 204665, "loss": 0.1239, "lr": 1.960724685755661e-06, "epoch": 0.9028167981823956, "percentage": 18.06, "elapsed_time": "0:47:06", "remaining_time": "3:33:47", "throughput": 8810.91, "total_tokens": 24904640} +{"current_steps": 36960, "total_steps": 204665, "loss": 0.2087, "lr": 1.9607010174476073e-06, "epoch": 0.9029389490142428, "percentage": 18.06, "elapsed_time": "0:47:06", "remaining_time": "3:33:47", "throughput": 8811.01, "total_tokens": 24907968} +{"current_steps": 36965, "total_steps": 204665, "loss": 0.0568, "lr": 1.9606773421530774e-06, "epoch": 0.90306109984609, "percentage": 18.06, "elapsed_time": "0:47:07", "remaining_time": "3:33:46", "throughput": 8811.23, "total_tokens": 24911744} +{"current_steps": 36970, "total_steps": 204665, "loss": 0.156, "lr": 1.960653659872243e-06, "epoch": 0.9031832506779371, "percentage": 18.06, "elapsed_time": "0:47:07", "remaining_time": "3:33:46", "throughput": 8811.38, "total_tokens": 24915328} +{"current_steps": 36975, "total_steps": 204665, "loss": 0.2079, "lr": 1.9606299706052775e-06, "epoch": 0.9033054015097843, "percentage": 18.07, "elapsed_time": "0:47:07", "remaining_time": "3:33:45", "throughput": 8811.42, "total_tokens": 24918464} +{"current_steps": 36980, "total_steps": 204665, "loss": 0.0784, "lr": 1.960606274352352e-06, "epoch": 0.9034275523416314, "percentage": 18.07, "elapsed_time": "0:47:08", "remaining_time": "3:33:44", "throughput": 8811.47, "total_tokens": 24921664} +{"current_steps": 36985, "total_steps": 204665, "loss": 0.0685, "lr": 1.9605825711136395e-06, "epoch": 0.9035497031734786, "percentage": 18.07, "elapsed_time": "0:47:08", "remaining_time": "3:33:44", "throughput": 8811.64, "total_tokens": 24925248} +{"current_steps": 36990, "total_steps": 204665, "loss": 0.1453, "lr": 1.9605588608893124e-06, "epoch": 0.9036718540053258, "percentage": 18.07, "elapsed_time": "0:47:09", "remaining_time": "3:33:43", "throughput": 8811.7, "total_tokens": 24928448} +{"current_steps": 36995, "total_steps": 204665, "loss": 0.1158, "lr": 1.9605351436795426e-06, "epoch": 0.903794004837173, "percentage": 18.08, "elapsed_time": "0:47:09", "remaining_time": "3:33:43", "throughput": 8811.91, "total_tokens": 24932160} +{"current_steps": 37000, "total_steps": 204665, "loss": 0.0426, "lr": 1.960511419484503e-06, "epoch": 0.9039161556690201, "percentage": 18.08, "elapsed_time": "0:47:09", "remaining_time": "3:33:42", "throughput": 8811.98, "total_tokens": 24935424} +{"current_steps": 37005, "total_steps": 204665, "loss": 0.0423, "lr": 1.9604876883043655e-06, "epoch": 0.9040383065008673, "percentage": 18.08, "elapsed_time": "0:47:10", "remaining_time": "3:33:42", "throughput": 8812.06, "total_tokens": 24938752} +{"current_steps": 37010, "total_steps": 204665, "loss": 0.1684, "lr": 1.960463950139304e-06, "epoch": 0.9041604573327144, "percentage": 18.08, "elapsed_time": "0:47:10", "remaining_time": "3:33:41", "throughput": 8812.21, "total_tokens": 24942272} +{"current_steps": 37015, "total_steps": 204665, "loss": 0.1104, "lr": 1.96044020498949e-06, "epoch": 0.9042826081645616, "percentage": 18.09, "elapsed_time": "0:47:10", "remaining_time": "3:33:41", "throughput": 8812.34, "total_tokens": 24945728} +{"current_steps": 37020, "total_steps": 204665, "loss": 0.0333, "lr": 1.9604164528550966e-06, "epoch": 0.9044047589964088, "percentage": 18.09, "elapsed_time": "0:47:11", "remaining_time": "3:33:40", "throughput": 8812.49, "total_tokens": 24949248} +{"current_steps": 37025, "total_steps": 204665, "loss": 0.1655, "lr": 1.960392693736296e-06, "epoch": 0.9045269098282559, "percentage": 18.09, "elapsed_time": "0:47:11", "remaining_time": "3:33:40", "throughput": 8812.67, "total_tokens": 24952896} +{"current_steps": 37030, "total_steps": 204665, "loss": 0.1639, "lr": 1.9603689276332617e-06, "epoch": 0.9046490606601031, "percentage": 18.09, "elapsed_time": "0:47:11", "remaining_time": "3:33:39", "throughput": 8812.74, "total_tokens": 24956096} +{"current_steps": 37035, "total_steps": 204665, "loss": 0.0335, "lr": 1.960345154546166e-06, "epoch": 0.9047712114919503, "percentage": 18.1, "elapsed_time": "0:47:12", "remaining_time": "3:33:39", "throughput": 8812.74, "total_tokens": 24959104} +{"current_steps": 37040, "total_steps": 204665, "loss": 0.1253, "lr": 1.9603213744751824e-06, "epoch": 0.9048933623237975, "percentage": 18.1, "elapsed_time": "0:47:12", "remaining_time": "3:33:38", "throughput": 8812.95, "total_tokens": 24962816} +{"current_steps": 37045, "total_steps": 204665, "loss": 0.2109, "lr": 1.9602975874204836e-06, "epoch": 0.9050155131556445, "percentage": 18.1, "elapsed_time": "0:47:12", "remaining_time": "3:33:38", "throughput": 8813.15, "total_tokens": 24966528} +{"current_steps": 37050, "total_steps": 204665, "loss": 0.1559, "lr": 1.960273793382242e-06, "epoch": 0.9051376639874917, "percentage": 18.1, "elapsed_time": "0:47:13", "remaining_time": "3:33:37", "throughput": 8813.47, "total_tokens": 24970624} +{"current_steps": 37055, "total_steps": 204665, "loss": 0.1607, "lr": 1.9602499923606314e-06, "epoch": 0.9052598148193389, "percentage": 18.11, "elapsed_time": "0:47:13", "remaining_time": "3:33:37", "throughput": 8813.65, "total_tokens": 24974272} +{"current_steps": 37060, "total_steps": 204665, "loss": 0.1161, "lr": 1.960226184355824e-06, "epoch": 0.9053819656511861, "percentage": 18.11, "elapsed_time": "0:47:13", "remaining_time": "3:33:36", "throughput": 8813.87, "total_tokens": 24978048} +{"current_steps": 37065, "total_steps": 204665, "loss": 0.1868, "lr": 1.9602023693679942e-06, "epoch": 0.9055041164830332, "percentage": 18.11, "elapsed_time": "0:47:14", "remaining_time": "3:33:36", "throughput": 8813.92, "total_tokens": 24981184} +{"current_steps": 37070, "total_steps": 204665, "loss": 0.0782, "lr": 1.9601785473973145e-06, "epoch": 0.9056262673148804, "percentage": 18.11, "elapsed_time": "0:47:14", "remaining_time": "3:33:35", "throughput": 8814.1, "total_tokens": 24984832} +{"current_steps": 37075, "total_steps": 204665, "loss": 0.1352, "lr": 1.9601547184439577e-06, "epoch": 0.9057484181467276, "percentage": 18.11, "elapsed_time": "0:47:14", "remaining_time": "3:33:35", "throughput": 8814.24, "total_tokens": 24988288} +{"current_steps": 37080, "total_steps": 204665, "loss": 0.1083, "lr": 1.960130882508098e-06, "epoch": 0.9058705689785748, "percentage": 18.12, "elapsed_time": "0:47:15", "remaining_time": "3:33:34", "throughput": 8814.51, "total_tokens": 24992192} +{"current_steps": 37085, "total_steps": 204665, "loss": 0.1329, "lr": 1.960107039589908e-06, "epoch": 0.905992719810422, "percentage": 18.12, "elapsed_time": "0:47:15", "remaining_time": "3:33:33", "throughput": 8814.62, "total_tokens": 24995584} +{"current_steps": 37090, "total_steps": 204665, "loss": 0.1082, "lr": 1.9600831896895615e-06, "epoch": 0.906114870642269, "percentage": 18.12, "elapsed_time": "0:47:16", "remaining_time": "3:33:33", "throughput": 8814.74, "total_tokens": 24998976} +{"current_steps": 37095, "total_steps": 204665, "loss": 0.068, "lr": 1.9600593328072317e-06, "epoch": 0.9062370214741162, "percentage": 18.12, "elapsed_time": "0:47:16", "remaining_time": "3:33:32", "throughput": 8814.88, "total_tokens": 25002496} +{"current_steps": 37100, "total_steps": 204665, "loss": 0.1702, "lr": 1.960035468943092e-06, "epoch": 0.9063591723059634, "percentage": 18.13, "elapsed_time": "0:47:16", "remaining_time": "3:33:32", "throughput": 8815.0, "total_tokens": 25005952} +{"current_steps": 37105, "total_steps": 204665, "loss": 0.1441, "lr": 1.9600115980973167e-06, "epoch": 0.9064813231378106, "percentage": 18.13, "elapsed_time": "0:47:17", "remaining_time": "3:33:31", "throughput": 8815.14, "total_tokens": 25009408} +{"current_steps": 37110, "total_steps": 204665, "loss": 0.0769, "lr": 1.9599877202700784e-06, "epoch": 0.9066034739696577, "percentage": 18.13, "elapsed_time": "0:47:17", "remaining_time": "3:33:31", "throughput": 8815.14, "total_tokens": 25012416} +{"current_steps": 37115, "total_steps": 204665, "loss": 0.0935, "lr": 1.9599638354615517e-06, "epoch": 0.9067256248015049, "percentage": 18.13, "elapsed_time": "0:47:17", "remaining_time": "3:33:30", "throughput": 8815.24, "total_tokens": 25015744} +{"current_steps": 37120, "total_steps": 204665, "loss": 0.131, "lr": 1.959939943671909e-06, "epoch": 0.9068477756333521, "percentage": 18.14, "elapsed_time": "0:47:18", "remaining_time": "3:33:30", "throughput": 8815.57, "total_tokens": 25019904} +{"current_steps": 37125, "total_steps": 204665, "loss": 0.2101, "lr": 1.9599160449013255e-06, "epoch": 0.9069699264651992, "percentage": 18.14, "elapsed_time": "0:47:18", "remaining_time": "3:33:29", "throughput": 8815.63, "total_tokens": 25023104} +{"current_steps": 37130, "total_steps": 204665, "loss": 0.0628, "lr": 1.959892139149974e-06, "epoch": 0.9070920772970464, "percentage": 18.14, "elapsed_time": "0:47:18", "remaining_time": "3:33:29", "throughput": 8815.63, "total_tokens": 25026112} +{"current_steps": 37135, "total_steps": 204665, "loss": 0.0905, "lr": 1.9598682264180288e-06, "epoch": 0.9072142281288935, "percentage": 18.14, "elapsed_time": "0:47:19", "remaining_time": "3:33:28", "throughput": 8815.78, "total_tokens": 25029632} +{"current_steps": 37140, "total_steps": 204665, "loss": 0.156, "lr": 1.959844306705664e-06, "epoch": 0.9073363789607407, "percentage": 18.15, "elapsed_time": "0:47:19", "remaining_time": "3:33:28", "throughput": 8815.86, "total_tokens": 25032896} +{"current_steps": 37145, "total_steps": 204665, "loss": 0.052, "lr": 1.9598203800130524e-06, "epoch": 0.9074585297925879, "percentage": 18.15, "elapsed_time": "0:47:19", "remaining_time": "3:33:27", "throughput": 8815.99, "total_tokens": 25036352} +{"current_steps": 37150, "total_steps": 204665, "loss": 0.0664, "lr": 1.9597964463403695e-06, "epoch": 0.9075806806244351, "percentage": 18.15, "elapsed_time": "0:47:20", "remaining_time": "3:33:26", "throughput": 8816.04, "total_tokens": 25039488} +{"current_steps": 37155, "total_steps": 204665, "loss": 0.1988, "lr": 1.9597725056877886e-06, "epoch": 0.9077028314562822, "percentage": 18.15, "elapsed_time": "0:47:20", "remaining_time": "3:33:26", "throughput": 8816.14, "total_tokens": 25042816} +{"current_steps": 37160, "total_steps": 204665, "loss": 0.0328, "lr": 1.959748558055484e-06, "epoch": 0.9078249822881294, "percentage": 18.16, "elapsed_time": "0:47:20", "remaining_time": "3:33:25", "throughput": 8816.16, "total_tokens": 25045888} +{"current_steps": 37165, "total_steps": 204665, "loss": 0.1926, "lr": 1.9597246034436293e-06, "epoch": 0.9079471331199765, "percentage": 18.16, "elapsed_time": "0:47:21", "remaining_time": "3:33:25", "throughput": 8816.35, "total_tokens": 25049536} +{"current_steps": 37170, "total_steps": 204665, "loss": 0.1487, "lr": 1.9597006418523995e-06, "epoch": 0.9080692839518237, "percentage": 18.16, "elapsed_time": "0:47:21", "remaining_time": "3:33:24", "throughput": 8816.56, "total_tokens": 25053248} +{"current_steps": 37175, "total_steps": 204665, "loss": 0.2229, "lr": 1.9596766732819684e-06, "epoch": 0.9081914347836709, "percentage": 18.16, "elapsed_time": "0:47:21", "remaining_time": "3:33:24", "throughput": 8816.66, "total_tokens": 25056576} +{"current_steps": 37180, "total_steps": 204665, "loss": 0.0712, "lr": 1.9596526977325106e-06, "epoch": 0.908313585615518, "percentage": 18.17, "elapsed_time": "0:47:22", "remaining_time": "3:33:23", "throughput": 8816.77, "total_tokens": 25059968} +{"current_steps": 37185, "total_steps": 204665, "loss": 0.1904, "lr": 1.9596287152042e-06, "epoch": 0.9084357364473652, "percentage": 18.17, "elapsed_time": "0:47:22", "remaining_time": "3:33:23", "throughput": 8816.87, "total_tokens": 25063296} +{"current_steps": 37190, "total_steps": 204665, "loss": 0.0991, "lr": 1.9596047256972114e-06, "epoch": 0.9085578872792124, "percentage": 18.17, "elapsed_time": "0:47:22", "remaining_time": "3:33:22", "throughput": 8816.85, "total_tokens": 25066240} +{"current_steps": 37195, "total_steps": 204665, "loss": 0.0686, "lr": 1.959580729211719e-06, "epoch": 0.9086800381110596, "percentage": 18.17, "elapsed_time": "0:47:23", "remaining_time": "3:33:22", "throughput": 8817.02, "total_tokens": 25069824} +{"current_steps": 37200, "total_steps": 204665, "loss": 0.022, "lr": 1.9595567257478974e-06, "epoch": 0.9088021889429067, "percentage": 18.18, "elapsed_time": "0:47:23", "remaining_time": "3:33:21", "throughput": 8817.26, "total_tokens": 25073664} +{"current_steps": 37205, "total_steps": 204665, "loss": 0.1277, "lr": 1.9595327153059214e-06, "epoch": 0.9089243397747538, "percentage": 18.18, "elapsed_time": "0:47:24", "remaining_time": "3:33:21", "throughput": 8817.26, "total_tokens": 25076672} +{"current_steps": 37210, "total_steps": 204665, "loss": 0.1181, "lr": 1.9595086978859653e-06, "epoch": 0.909046490606601, "percentage": 18.18, "elapsed_time": "0:47:24", "remaining_time": "3:33:20", "throughput": 8817.37, "total_tokens": 25080064} +{"current_steps": 37215, "total_steps": 204665, "loss": 0.1047, "lr": 1.959484673488204e-06, "epoch": 0.9091686414384482, "percentage": 18.18, "elapsed_time": "0:47:24", "remaining_time": "3:33:19", "throughput": 8817.46, "total_tokens": 25083392} +{"current_steps": 37220, "total_steps": 204665, "loss": 0.0515, "lr": 1.9594606421128123e-06, "epoch": 0.9092907922702954, "percentage": 18.19, "elapsed_time": "0:47:25", "remaining_time": "3:33:19", "throughput": 8817.61, "total_tokens": 25086912} +{"current_steps": 37225, "total_steps": 204665, "loss": 0.1087, "lr": 1.9594366037599645e-06, "epoch": 0.9094129431021425, "percentage": 18.19, "elapsed_time": "0:47:25", "remaining_time": "3:33:18", "throughput": 8817.8, "total_tokens": 25090560} +{"current_steps": 37230, "total_steps": 204665, "loss": 0.1335, "lr": 1.959412558429835e-06, "epoch": 0.9095350939339897, "percentage": 18.19, "elapsed_time": "0:47:25", "remaining_time": "3:33:18", "throughput": 8817.85, "total_tokens": 25093760} +{"current_steps": 37235, "total_steps": 204665, "loss": 0.041, "lr": 1.9593885061226002e-06, "epoch": 0.9096572447658369, "percentage": 18.19, "elapsed_time": "0:47:26", "remaining_time": "3:33:17", "throughput": 8817.98, "total_tokens": 25097216} +{"current_steps": 37240, "total_steps": 204665, "loss": 0.1368, "lr": 1.959364446838434e-06, "epoch": 0.9097793955976841, "percentage": 18.2, "elapsed_time": "0:47:26", "remaining_time": "3:33:17", "throughput": 8817.97, "total_tokens": 25100160} +{"current_steps": 37245, "total_steps": 204665, "loss": 0.0885, "lr": 1.9593403805775113e-06, "epoch": 0.9099015464295311, "percentage": 18.2, "elapsed_time": "0:47:26", "remaining_time": "3:33:16", "throughput": 8818.3, "total_tokens": 25104320} +{"current_steps": 37250, "total_steps": 204665, "loss": 0.1744, "lr": 1.9593163073400075e-06, "epoch": 0.9100236972613783, "percentage": 18.2, "elapsed_time": "0:47:27", "remaining_time": "3:33:16", "throughput": 8818.38, "total_tokens": 25107584} +{"current_steps": 37255, "total_steps": 204665, "loss": 0.098, "lr": 1.9592922271260973e-06, "epoch": 0.9101458480932255, "percentage": 18.2, "elapsed_time": "0:47:27", "remaining_time": "3:33:15", "throughput": 8818.56, "total_tokens": 25111232} +{"current_steps": 37260, "total_steps": 204665, "loss": 0.1152, "lr": 1.959268139935956e-06, "epoch": 0.9102679989250727, "percentage": 18.21, "elapsed_time": "0:47:27", "remaining_time": "3:33:15", "throughput": 8818.64, "total_tokens": 25114560} +{"current_steps": 37265, "total_steps": 204665, "loss": 0.0152, "lr": 1.959244045769759e-06, "epoch": 0.9103901497569199, "percentage": 18.21, "elapsed_time": "0:47:28", "remaining_time": "3:33:14", "throughput": 8818.83, "total_tokens": 25118208} +{"current_steps": 37270, "total_steps": 204665, "loss": 0.0609, "lr": 1.9592199446276812e-06, "epoch": 0.910512300588767, "percentage": 18.21, "elapsed_time": "0:47:28", "remaining_time": "3:33:14", "throughput": 8818.85, "total_tokens": 25121280} +{"current_steps": 37275, "total_steps": 204665, "loss": 0.0292, "lr": 1.959195836509898e-06, "epoch": 0.9106344514206142, "percentage": 18.21, "elapsed_time": "0:47:28", "remaining_time": "3:33:13", "throughput": 8819.03, "total_tokens": 25124928} +{"current_steps": 37280, "total_steps": 204665, "loss": 0.2271, "lr": 1.9591717214165844e-06, "epoch": 0.9107566022524614, "percentage": 18.22, "elapsed_time": "0:47:29", "remaining_time": "3:33:13", "throughput": 8819.14, "total_tokens": 25128320} +{"current_steps": 37285, "total_steps": 204665, "loss": 0.1586, "lr": 1.9591475993479162e-06, "epoch": 0.9108787530843085, "percentage": 18.22, "elapsed_time": "0:47:29", "remaining_time": "3:33:12", "throughput": 8819.26, "total_tokens": 25131776} +{"current_steps": 37290, "total_steps": 204665, "loss": 0.1356, "lr": 1.959123470304069e-06, "epoch": 0.9110009039161556, "percentage": 18.22, "elapsed_time": "0:47:30", "remaining_time": "3:33:12", "throughput": 8819.4, "total_tokens": 25135296} +{"current_steps": 37295, "total_steps": 204665, "loss": 0.0101, "lr": 1.9590993342852175e-06, "epoch": 0.9111230547480028, "percentage": 18.22, "elapsed_time": "0:47:30", "remaining_time": "3:33:11", "throughput": 8819.49, "total_tokens": 25138624} +{"current_steps": 37300, "total_steps": 204665, "loss": 0.1217, "lr": 1.959075191291538e-06, "epoch": 0.91124520557985, "percentage": 18.22, "elapsed_time": "0:47:30", "remaining_time": "3:33:11", "throughput": 8819.61, "total_tokens": 25142016} +{"current_steps": 37305, "total_steps": 204665, "loss": 0.0603, "lr": 1.959051041323206e-06, "epoch": 0.9113673564116972, "percentage": 18.23, "elapsed_time": "0:47:31", "remaining_time": "3:33:10", "throughput": 8819.82, "total_tokens": 25145792} +{"current_steps": 37310, "total_steps": 204665, "loss": 0.1354, "lr": 1.959026884380396e-06, "epoch": 0.9114895072435444, "percentage": 18.23, "elapsed_time": "0:47:31", "remaining_time": "3:33:10", "throughput": 8820.04, "total_tokens": 25149568} +{"current_steps": 37315, "total_steps": 204665, "loss": 0.0892, "lr": 1.959002720463285e-06, "epoch": 0.9116116580753915, "percentage": 18.23, "elapsed_time": "0:47:31", "remaining_time": "3:33:09", "throughput": 8820.1, "total_tokens": 25152768} +{"current_steps": 37320, "total_steps": 204665, "loss": 0.1312, "lr": 1.958978549572048e-06, "epoch": 0.9117338089072387, "percentage": 18.23, "elapsed_time": "0:47:32", "remaining_time": "3:33:08", "throughput": 8820.1, "total_tokens": 25155776} +{"current_steps": 37325, "total_steps": 204665, "loss": 0.106, "lr": 1.958954371706862e-06, "epoch": 0.9118559597390858, "percentage": 18.24, "elapsed_time": "0:47:32", "remaining_time": "3:33:08", "throughput": 8820.26, "total_tokens": 25159360} +{"current_steps": 37330, "total_steps": 204665, "loss": 0.0733, "lr": 1.9589301868679013e-06, "epoch": 0.911978110570933, "percentage": 18.24, "elapsed_time": "0:47:32", "remaining_time": "3:33:07", "throughput": 8820.37, "total_tokens": 25162752} +{"current_steps": 37335, "total_steps": 204665, "loss": 0.1728, "lr": 1.958905995055342e-06, "epoch": 0.9121002614027801, "percentage": 18.24, "elapsed_time": "0:47:33", "remaining_time": "3:33:07", "throughput": 8820.4, "total_tokens": 25165824} +{"current_steps": 37340, "total_steps": 204665, "loss": 0.2231, "lr": 1.9588817962693607e-06, "epoch": 0.9122224122346273, "percentage": 18.24, "elapsed_time": "0:47:33", "remaining_time": "3:33:06", "throughput": 8820.48, "total_tokens": 25169152} +{"current_steps": 37345, "total_steps": 204665, "loss": 0.0484, "lr": 1.9588575905101333e-06, "epoch": 0.9123445630664745, "percentage": 18.25, "elapsed_time": "0:47:33", "remaining_time": "3:33:06", "throughput": 8820.76, "total_tokens": 25173120} +{"current_steps": 37350, "total_steps": 204665, "loss": 0.1025, "lr": 1.958833377777835e-06, "epoch": 0.9124667138983217, "percentage": 18.25, "elapsed_time": "0:47:34", "remaining_time": "3:33:05", "throughput": 8820.82, "total_tokens": 25176320} +{"current_steps": 37355, "total_steps": 204665, "loss": 0.094, "lr": 1.958809158072643e-06, "epoch": 0.9125888647301688, "percentage": 18.25, "elapsed_time": "0:47:34", "remaining_time": "3:33:05", "throughput": 8821.0, "total_tokens": 25179968} +{"current_steps": 37360, "total_steps": 204665, "loss": 0.0401, "lr": 1.958784931394733e-06, "epoch": 0.912711015562016, "percentage": 18.25, "elapsed_time": "0:47:34", "remaining_time": "3:33:04", "throughput": 8821.09, "total_tokens": 25183296} +{"current_steps": 37365, "total_steps": 204665, "loss": 0.2674, "lr": 1.958760697744281e-06, "epoch": 0.9128331663938631, "percentage": 18.26, "elapsed_time": "0:47:35", "remaining_time": "3:33:04", "throughput": 8821.29, "total_tokens": 25187008} +{"current_steps": 37370, "total_steps": 204665, "loss": 0.0754, "lr": 1.958736457121463e-06, "epoch": 0.9129553172257103, "percentage": 18.26, "elapsed_time": "0:47:35", "remaining_time": "3:33:03", "throughput": 8821.48, "total_tokens": 25190656} +{"current_steps": 37375, "total_steps": 204665, "loss": 0.1197, "lr": 1.958712209526456e-06, "epoch": 0.9130774680575575, "percentage": 18.26, "elapsed_time": "0:47:35", "remaining_time": "3:33:03", "throughput": 8821.61, "total_tokens": 25194112} +{"current_steps": 37380, "total_steps": 204665, "loss": 0.0632, "lr": 1.9586879549594356e-06, "epoch": 0.9131996188894046, "percentage": 18.26, "elapsed_time": "0:47:36", "remaining_time": "3:33:02", "throughput": 8821.61, "total_tokens": 25197120} +{"current_steps": 37385, "total_steps": 204665, "loss": 0.1644, "lr": 1.958663693420579e-06, "epoch": 0.9133217697212518, "percentage": 18.27, "elapsed_time": "0:47:36", "remaining_time": "3:33:02", "throughput": 8821.69, "total_tokens": 25200448} +{"current_steps": 37390, "total_steps": 204665, "loss": 0.0693, "lr": 1.9586394249100616e-06, "epoch": 0.913443920553099, "percentage": 18.27, "elapsed_time": "0:47:36", "remaining_time": "3:33:01", "throughput": 8821.75, "total_tokens": 25203648} +{"current_steps": 37395, "total_steps": 204665, "loss": 0.0771, "lr": 1.9586151494280604e-06, "epoch": 0.9135660713849462, "percentage": 18.27, "elapsed_time": "0:47:37", "remaining_time": "3:33:01", "throughput": 8821.88, "total_tokens": 25207104} +{"current_steps": 37400, "total_steps": 204665, "loss": 0.2961, "lr": 1.9585908669747523e-06, "epoch": 0.9136882222167932, "percentage": 18.27, "elapsed_time": "0:47:37", "remaining_time": "3:33:00", "throughput": 8822.01, "total_tokens": 25210560} +{"current_steps": 37405, "total_steps": 204665, "loss": 0.0501, "lr": 1.958566577550314e-06, "epoch": 0.9138103730486404, "percentage": 18.28, "elapsed_time": "0:47:38", "remaining_time": "3:32:59", "throughput": 8822.1, "total_tokens": 25213888} +{"current_steps": 37410, "total_steps": 204665, "loss": 0.1711, "lr": 1.958542281154921e-06, "epoch": 0.9139325238804876, "percentage": 18.28, "elapsed_time": "0:47:38", "remaining_time": "3:32:59", "throughput": 8822.23, "total_tokens": 25217344} +{"current_steps": 37415, "total_steps": 204665, "loss": 0.1616, "lr": 1.9585179777887514e-06, "epoch": 0.9140546747123348, "percentage": 18.28, "elapsed_time": "0:47:38", "remaining_time": "3:32:58", "throughput": 8822.26, "total_tokens": 25220416} +{"current_steps": 37420, "total_steps": 204665, "loss": 0.0618, "lr": 1.9584936674519806e-06, "epoch": 0.914176825544182, "percentage": 18.28, "elapsed_time": "0:47:39", "remaining_time": "3:32:58", "throughput": 8822.44, "total_tokens": 25224064} +{"current_steps": 37425, "total_steps": 204665, "loss": 0.0479, "lr": 1.9584693501447863e-06, "epoch": 0.9142989763760291, "percentage": 18.29, "elapsed_time": "0:47:39", "remaining_time": "3:32:57", "throughput": 8822.45, "total_tokens": 25227072} +{"current_steps": 37430, "total_steps": 204665, "loss": 0.0213, "lr": 1.958445025867345e-06, "epoch": 0.9144211272078763, "percentage": 18.29, "elapsed_time": "0:47:39", "remaining_time": "3:32:57", "throughput": 8822.58, "total_tokens": 25230528} +{"current_steps": 37435, "total_steps": 204665, "loss": 0.1662, "lr": 1.9584206946198342e-06, "epoch": 0.9145432780397235, "percentage": 18.29, "elapsed_time": "0:47:40", "remaining_time": "3:32:56", "throughput": 8822.71, "total_tokens": 25233984} +{"current_steps": 37440, "total_steps": 204665, "loss": 0.0253, "lr": 1.9583963564024297e-06, "epoch": 0.9146654288715707, "percentage": 18.29, "elapsed_time": "0:47:40", "remaining_time": "3:32:56", "throughput": 8822.77, "total_tokens": 25237184} +{"current_steps": 37445, "total_steps": 204665, "loss": 0.1111, "lr": 1.9583720112153094e-06, "epoch": 0.9147875797034177, "percentage": 18.3, "elapsed_time": "0:47:40", "remaining_time": "3:32:55", "throughput": 8822.87, "total_tokens": 25240576} +{"current_steps": 37450, "total_steps": 204665, "loss": 0.1968, "lr": 1.95834765905865e-06, "epoch": 0.9149097305352649, "percentage": 18.3, "elapsed_time": "0:47:41", "remaining_time": "3:32:55", "throughput": 8822.91, "total_tokens": 25243712} +{"current_steps": 37455, "total_steps": 204665, "loss": 0.1636, "lr": 1.958323299932629e-06, "epoch": 0.9150318813671121, "percentage": 18.3, "elapsed_time": "0:47:41", "remaining_time": "3:32:54", "throughput": 8823.05, "total_tokens": 25247232} +{"current_steps": 37460, "total_steps": 204665, "loss": 0.0255, "lr": 1.9582989338374227e-06, "epoch": 0.9151540321989593, "percentage": 18.3, "elapsed_time": "0:47:41", "remaining_time": "3:32:54", "throughput": 8823.24, "total_tokens": 25250880} +{"current_steps": 37465, "total_steps": 204665, "loss": 0.081, "lr": 1.958274560773209e-06, "epoch": 0.9152761830308065, "percentage": 18.31, "elapsed_time": "0:47:42", "remaining_time": "3:32:53", "throughput": 8823.45, "total_tokens": 25254592} +{"current_steps": 37470, "total_steps": 204665, "loss": 0.0502, "lr": 1.958250180740165e-06, "epoch": 0.9153983338626536, "percentage": 18.31, "elapsed_time": "0:47:42", "remaining_time": "3:32:53", "throughput": 8823.58, "total_tokens": 25258048} +{"current_steps": 37475, "total_steps": 204665, "loss": 0.1089, "lr": 1.958225793738468e-06, "epoch": 0.9155204846945008, "percentage": 18.31, "elapsed_time": "0:47:42", "remaining_time": "3:32:52", "throughput": 8823.64, "total_tokens": 25261248} +{"current_steps": 37480, "total_steps": 204665, "loss": 0.1609, "lr": 1.958201399768295e-06, "epoch": 0.915642635526348, "percentage": 18.31, "elapsed_time": "0:47:43", "remaining_time": "3:32:51", "throughput": 8823.86, "total_tokens": 25265024} +{"current_steps": 37485, "total_steps": 204665, "loss": 0.2316, "lr": 1.958176998829824e-06, "epoch": 0.9157647863581951, "percentage": 18.32, "elapsed_time": "0:47:43", "remaining_time": "3:32:51", "throughput": 8823.86, "total_tokens": 25268032} +{"current_steps": 37490, "total_steps": 204665, "loss": 0.0842, "lr": 1.958152590923232e-06, "epoch": 0.9158869371900422, "percentage": 18.32, "elapsed_time": "0:47:43", "remaining_time": "3:32:50", "throughput": 8823.96, "total_tokens": 25271360} +{"current_steps": 37495, "total_steps": 204665, "loss": 0.0642, "lr": 1.958128176048697e-06, "epoch": 0.9160090880218894, "percentage": 18.32, "elapsed_time": "0:47:44", "remaining_time": "3:32:50", "throughput": 8824.09, "total_tokens": 25274816} +{"current_steps": 37500, "total_steps": 204665, "loss": 0.0984, "lr": 1.9581037542063955e-06, "epoch": 0.9161312388537366, "percentage": 18.32, "elapsed_time": "0:47:44", "remaining_time": "3:32:49", "throughput": 8824.2, "total_tokens": 25278208} +{"current_steps": 37505, "total_steps": 204665, "loss": 0.1362, "lr": 1.958079325396506e-06, "epoch": 0.9162533896855838, "percentage": 18.33, "elapsed_time": "0:47:44", "remaining_time": "3:32:49", "throughput": 8824.31, "total_tokens": 25281600} +{"current_steps": 37510, "total_steps": 204665, "loss": 0.0776, "lr": 1.9580548896192066e-06, "epoch": 0.916375540517431, "percentage": 18.33, "elapsed_time": "0:47:45", "remaining_time": "3:32:48", "throughput": 8824.39, "total_tokens": 25284864} +{"current_steps": 37515, "total_steps": 204665, "loss": 0.1038, "lr": 1.9580304468746736e-06, "epoch": 0.916497691349278, "percentage": 18.33, "elapsed_time": "0:47:45", "remaining_time": "3:32:48", "throughput": 8824.49, "total_tokens": 25288192} +{"current_steps": 37520, "total_steps": 204665, "loss": 0.1667, "lr": 1.958005997163086e-06, "epoch": 0.9166198421811252, "percentage": 18.33, "elapsed_time": "0:47:46", "remaining_time": "3:32:47", "throughput": 8824.59, "total_tokens": 25291584} +{"current_steps": 37525, "total_steps": 204665, "loss": 0.0361, "lr": 1.9579815404846207e-06, "epoch": 0.9167419930129724, "percentage": 18.33, "elapsed_time": "0:47:46", "remaining_time": "3:32:47", "throughput": 8824.72, "total_tokens": 25295040} +{"current_steps": 37530, "total_steps": 204665, "loss": 0.1552, "lr": 1.957957076839456e-06, "epoch": 0.9168641438448196, "percentage": 18.34, "elapsed_time": "0:47:46", "remaining_time": "3:32:46", "throughput": 8824.83, "total_tokens": 25298432} +{"current_steps": 37535, "total_steps": 204665, "loss": 0.0896, "lr": 1.95793260622777e-06, "epoch": 0.9169862946766667, "percentage": 18.34, "elapsed_time": "0:47:47", "remaining_time": "3:32:46", "throughput": 8824.89, "total_tokens": 25301632} +{"current_steps": 37540, "total_steps": 204665, "loss": 0.134, "lr": 1.95790812864974e-06, "epoch": 0.9171084455085139, "percentage": 18.34, "elapsed_time": "0:47:47", "remaining_time": "3:32:45", "throughput": 8825.14, "total_tokens": 25305536} +{"current_steps": 37545, "total_steps": 204665, "loss": 0.1619, "lr": 1.9578836441055453e-06, "epoch": 0.9172305963403611, "percentage": 18.34, "elapsed_time": "0:47:47", "remaining_time": "3:32:45", "throughput": 8825.42, "total_tokens": 25309504} +{"current_steps": 37550, "total_steps": 204665, "loss": 0.0792, "lr": 1.9578591525953625e-06, "epoch": 0.9173527471722083, "percentage": 18.35, "elapsed_time": "0:47:48", "remaining_time": "3:32:44", "throughput": 8825.59, "total_tokens": 25313152} +{"current_steps": 37555, "total_steps": 204665, "loss": 0.1003, "lr": 1.9578346541193705e-06, "epoch": 0.9174748980040555, "percentage": 18.35, "elapsed_time": "0:47:48", "remaining_time": "3:32:44", "throughput": 8825.65, "total_tokens": 25316352} +{"current_steps": 37560, "total_steps": 204665, "loss": 0.2018, "lr": 1.957810148677747e-06, "epoch": 0.9175970488359025, "percentage": 18.35, "elapsed_time": "0:47:48", "remaining_time": "3:32:43", "throughput": 8825.74, "total_tokens": 25319680} +{"current_steps": 37565, "total_steps": 204665, "loss": 0.0991, "lr": 1.957785636270671e-06, "epoch": 0.9177191996677497, "percentage": 18.35, "elapsed_time": "0:47:49", "remaining_time": "3:32:43", "throughput": 8826.0, "total_tokens": 25323584} +{"current_steps": 37570, "total_steps": 204665, "loss": 0.1829, "lr": 1.95776111689832e-06, "epoch": 0.9178413504995969, "percentage": 18.36, "elapsed_time": "0:47:49", "remaining_time": "3:32:42", "throughput": 8826.1, "total_tokens": 25326912} +{"current_steps": 37575, "total_steps": 204665, "loss": 0.1812, "lr": 1.957736590560872e-06, "epoch": 0.9179635013314441, "percentage": 18.36, "elapsed_time": "0:47:49", "remaining_time": "3:32:41", "throughput": 8826.15, "total_tokens": 25330112} +{"current_steps": 37580, "total_steps": 204665, "loss": 0.1119, "lr": 1.9577120572585067e-06, "epoch": 0.9180856521632912, "percentage": 18.36, "elapsed_time": "0:47:50", "remaining_time": "3:32:41", "throughput": 8826.17, "total_tokens": 25333184} +{"current_steps": 37585, "total_steps": 204665, "loss": 0.0318, "lr": 1.9576875169914016e-06, "epoch": 0.9182078029951384, "percentage": 18.36, "elapsed_time": "0:47:50", "remaining_time": "3:32:40", "throughput": 8826.31, "total_tokens": 25336640} +{"current_steps": 37590, "total_steps": 204665, "loss": 0.0357, "lr": 1.957662969759735e-06, "epoch": 0.9183299538269856, "percentage": 18.37, "elapsed_time": "0:47:50", "remaining_time": "3:32:40", "throughput": 8826.43, "total_tokens": 25340096} +{"current_steps": 37595, "total_steps": 204665, "loss": 0.1304, "lr": 1.957638415563686e-06, "epoch": 0.9184521046588328, "percentage": 18.37, "elapsed_time": "0:47:51", "remaining_time": "3:32:39", "throughput": 8826.51, "total_tokens": 25343360} +{"current_steps": 37600, "total_steps": 204665, "loss": 0.1276, "lr": 1.9576138544034327e-06, "epoch": 0.9185742554906798, "percentage": 18.37, "elapsed_time": "0:47:51", "remaining_time": "3:32:39", "throughput": 8826.75, "total_tokens": 25347200} +{"current_steps": 37605, "total_steps": 204665, "loss": 0.086, "lr": 1.9575892862791537e-06, "epoch": 0.918696406322527, "percentage": 18.37, "elapsed_time": "0:47:51", "remaining_time": "3:32:38", "throughput": 8826.87, "total_tokens": 25350656} +{"current_steps": 37610, "total_steps": 204665, "loss": 0.1472, "lr": 1.9575647111910276e-06, "epoch": 0.9188185571543742, "percentage": 18.38, "elapsed_time": "0:47:52", "remaining_time": "3:32:38", "throughput": 8826.84, "total_tokens": 25353536} +{"current_steps": 37615, "total_steps": 204665, "loss": 0.2079, "lr": 1.957540129139234e-06, "epoch": 0.9189407079862214, "percentage": 18.38, "elapsed_time": "0:47:52", "remaining_time": "3:32:37", "throughput": 8826.95, "total_tokens": 25356928} +{"current_steps": 37620, "total_steps": 204665, "loss": 0.224, "lr": 1.957515540123951e-06, "epoch": 0.9190628588180686, "percentage": 18.38, "elapsed_time": "0:47:53", "remaining_time": "3:32:37", "throughput": 8827.0, "total_tokens": 25360064} +{"current_steps": 37625, "total_steps": 204665, "loss": 0.0315, "lr": 1.9574909441453573e-06, "epoch": 0.9191850096499157, "percentage": 18.38, "elapsed_time": "0:47:53", "remaining_time": "3:32:36", "throughput": 8827.01, "total_tokens": 25363136} +{"current_steps": 37630, "total_steps": 204665, "loss": 0.0691, "lr": 1.957466341203632e-06, "epoch": 0.9193071604817629, "percentage": 18.39, "elapsed_time": "0:47:53", "remaining_time": "3:32:36", "throughput": 8827.14, "total_tokens": 25366592} +{"current_steps": 37635, "total_steps": 204665, "loss": 0.1469, "lr": 1.9574417312989535e-06, "epoch": 0.91942931131361, "percentage": 18.39, "elapsed_time": "0:47:54", "remaining_time": "3:32:35", "throughput": 8827.33, "total_tokens": 25370304} +{"current_steps": 37640, "total_steps": 204665, "loss": 0.1158, "lr": 1.9574171144315016e-06, "epoch": 0.9195514621454572, "percentage": 18.39, "elapsed_time": "0:47:54", "remaining_time": "3:32:34", "throughput": 8827.31, "total_tokens": 25373248} +{"current_steps": 37645, "total_steps": 204665, "loss": 0.0728, "lr": 1.957392490601455e-06, "epoch": 0.9196736129773043, "percentage": 18.39, "elapsed_time": "0:47:54", "remaining_time": "3:32:34", "throughput": 8827.47, "total_tokens": 25376832} +{"current_steps": 37650, "total_steps": 204665, "loss": 0.0135, "lr": 1.9573678598089924e-06, "epoch": 0.9197957638091515, "percentage": 18.4, "elapsed_time": "0:47:55", "remaining_time": "3:32:33", "throughput": 8827.6, "total_tokens": 25380288} +{"current_steps": 37655, "total_steps": 204665, "loss": 0.0112, "lr": 1.9573432220542933e-06, "epoch": 0.9199179146409987, "percentage": 18.4, "elapsed_time": "0:47:55", "remaining_time": "3:32:33", "throughput": 8827.75, "total_tokens": 25383808} +{"current_steps": 37660, "total_steps": 204665, "loss": 0.1431, "lr": 1.957318577337537e-06, "epoch": 0.9200400654728459, "percentage": 18.4, "elapsed_time": "0:47:55", "remaining_time": "3:32:32", "throughput": 8827.78, "total_tokens": 25386880} +{"current_steps": 37665, "total_steps": 204665, "loss": 0.138, "lr": 1.9572939256589025e-06, "epoch": 0.9201622163046931, "percentage": 18.4, "elapsed_time": "0:47:56", "remaining_time": "3:32:32", "throughput": 8828.01, "total_tokens": 25390720} +{"current_steps": 37670, "total_steps": 204665, "loss": 0.0168, "lr": 1.957269267018569e-06, "epoch": 0.9202843671365402, "percentage": 18.41, "elapsed_time": "0:47:56", "remaining_time": "3:32:31", "throughput": 8828.22, "total_tokens": 25394496} +{"current_steps": 37675, "total_steps": 204665, "loss": 0.0043, "lr": 1.957244601416716e-06, "epoch": 0.9204065179683873, "percentage": 18.41, "elapsed_time": "0:47:56", "remaining_time": "3:32:31", "throughput": 8828.39, "total_tokens": 25398080} +{"current_steps": 37680, "total_steps": 204665, "loss": 0.2152, "lr": 1.957219928853523e-06, "epoch": 0.9205286688002345, "percentage": 18.41, "elapsed_time": "0:47:57", "remaining_time": "3:32:30", "throughput": 8828.56, "total_tokens": 25401664} +{"current_steps": 37685, "total_steps": 204665, "loss": 0.1617, "lr": 1.9571952493291685e-06, "epoch": 0.9206508196320817, "percentage": 18.41, "elapsed_time": "0:47:57", "remaining_time": "3:32:30", "throughput": 8828.8, "total_tokens": 25405568} +{"current_steps": 37690, "total_steps": 204665, "loss": 0.1938, "lr": 1.957170562843833e-06, "epoch": 0.9207729704639288, "percentage": 18.42, "elapsed_time": "0:47:57", "remaining_time": "3:32:29", "throughput": 8829.01, "total_tokens": 25409344} +{"current_steps": 37695, "total_steps": 204665, "loss": 0.0743, "lr": 1.957145869397696e-06, "epoch": 0.920895121295776, "percentage": 18.42, "elapsed_time": "0:47:58", "remaining_time": "3:32:29", "throughput": 8829.18, "total_tokens": 25412928} +{"current_steps": 37700, "total_steps": 204665, "loss": 0.1165, "lr": 1.9571211689909366e-06, "epoch": 0.9210172721276232, "percentage": 18.42, "elapsed_time": "0:47:58", "remaining_time": "3:32:28", "throughput": 8829.27, "total_tokens": 25416256} +{"current_steps": 37705, "total_steps": 204665, "loss": 0.0228, "lr": 1.9570964616237348e-06, "epoch": 0.9211394229594704, "percentage": 18.42, "elapsed_time": "0:47:58", "remaining_time": "3:32:28", "throughput": 8829.45, "total_tokens": 25419904} +{"current_steps": 37710, "total_steps": 204665, "loss": 0.1253, "lr": 1.9570717472962697e-06, "epoch": 0.9212615737913176, "percentage": 18.43, "elapsed_time": "0:47:59", "remaining_time": "3:32:27", "throughput": 8829.49, "total_tokens": 25423040} +{"current_steps": 37715, "total_steps": 204665, "loss": 0.2298, "lr": 1.9570470260087217e-06, "epoch": 0.9213837246231646, "percentage": 18.43, "elapsed_time": "0:47:59", "remaining_time": "3:32:27", "throughput": 8829.63, "total_tokens": 25426560} +{"current_steps": 37720, "total_steps": 204665, "loss": 0.059, "lr": 1.9570222977612704e-06, "epoch": 0.9215058754550118, "percentage": 18.43, "elapsed_time": "0:48:00", "remaining_time": "3:32:26", "throughput": 8829.76, "total_tokens": 25430016} +{"current_steps": 37725, "total_steps": 204665, "loss": 0.1113, "lr": 1.9569975625540954e-06, "epoch": 0.921628026286859, "percentage": 18.43, "elapsed_time": "0:48:00", "remaining_time": "3:32:26", "throughput": 8829.91, "total_tokens": 25433536} +{"current_steps": 37730, "total_steps": 204665, "loss": 0.0467, "lr": 1.9569728203873767e-06, "epoch": 0.9217501771187062, "percentage": 18.44, "elapsed_time": "0:48:00", "remaining_time": "3:32:25", "throughput": 8830.05, "total_tokens": 25436992} +{"current_steps": 37735, "total_steps": 204665, "loss": 0.0898, "lr": 1.9569480712612943e-06, "epoch": 0.9218723279505533, "percentage": 18.44, "elapsed_time": "0:48:01", "remaining_time": "3:32:25", "throughput": 8830.09, "total_tokens": 25440128} +{"current_steps": 37740, "total_steps": 204665, "loss": 0.127, "lr": 1.956923315176028e-06, "epoch": 0.9219944787824005, "percentage": 18.44, "elapsed_time": "0:48:01", "remaining_time": "3:32:24", "throughput": 8830.24, "total_tokens": 25443648} +{"current_steps": 37745, "total_steps": 204665, "loss": 0.0535, "lr": 1.956898552131758e-06, "epoch": 0.9221166296142477, "percentage": 18.44, "elapsed_time": "0:48:01", "remaining_time": "3:32:24", "throughput": 8830.28, "total_tokens": 25446784} +{"current_steps": 37750, "total_steps": 204665, "loss": 0.1066, "lr": 1.9568737821286645e-06, "epoch": 0.9222387804460949, "percentage": 18.44, "elapsed_time": "0:48:02", "remaining_time": "3:32:23", "throughput": 8830.41, "total_tokens": 25450240} +{"current_steps": 37755, "total_steps": 204665, "loss": 0.0087, "lr": 1.9568490051669276e-06, "epoch": 0.922360931277942, "percentage": 18.45, "elapsed_time": "0:48:02", "remaining_time": "3:32:23", "throughput": 8830.56, "total_tokens": 25453824} +{"current_steps": 37760, "total_steps": 204665, "loss": 0.0612, "lr": 1.9568242212467273e-06, "epoch": 0.9224830821097891, "percentage": 18.45, "elapsed_time": "0:48:02", "remaining_time": "3:32:22", "throughput": 8830.89, "total_tokens": 25457984} +{"current_steps": 37765, "total_steps": 204665, "loss": 0.1227, "lr": 1.9567994303682437e-06, "epoch": 0.9226052329416363, "percentage": 18.45, "elapsed_time": "0:48:03", "remaining_time": "3:32:22", "throughput": 8831.0, "total_tokens": 25461376} +{"current_steps": 37770, "total_steps": 204665, "loss": 0.096, "lr": 1.9567746325316575e-06, "epoch": 0.9227273837734835, "percentage": 18.45, "elapsed_time": "0:48:03", "remaining_time": "3:32:21", "throughput": 8831.09, "total_tokens": 25464704} +{"current_steps": 37775, "total_steps": 204665, "loss": 0.2156, "lr": 1.956749827737149e-06, "epoch": 0.9228495346053307, "percentage": 18.46, "elapsed_time": "0:48:03", "remaining_time": "3:32:20", "throughput": 8831.18, "total_tokens": 25468032} +{"current_steps": 37780, "total_steps": 204665, "loss": 0.1741, "lr": 1.956725015984898e-06, "epoch": 0.9229716854371778, "percentage": 18.46, "elapsed_time": "0:48:04", "remaining_time": "3:32:20", "throughput": 8831.37, "total_tokens": 25471744} +{"current_steps": 37785, "total_steps": 204665, "loss": 0.165, "lr": 1.956700197275086e-06, "epoch": 0.923093836269025, "percentage": 18.46, "elapsed_time": "0:48:04", "remaining_time": "3:32:19", "throughput": 8831.5, "total_tokens": 25475200} +{"current_steps": 37790, "total_steps": 204665, "loss": 0.1686, "lr": 1.9566753716078922e-06, "epoch": 0.9232159871008722, "percentage": 18.46, "elapsed_time": "0:48:04", "remaining_time": "3:32:19", "throughput": 8831.64, "total_tokens": 25478720} +{"current_steps": 37795, "total_steps": 204665, "loss": 0.094, "lr": 1.9566505389834978e-06, "epoch": 0.9233381379327193, "percentage": 18.47, "elapsed_time": "0:48:05", "remaining_time": "3:32:18", "throughput": 8831.82, "total_tokens": 25482368} +{"current_steps": 37800, "total_steps": 204665, "loss": 0.0531, "lr": 1.9566256994020833e-06, "epoch": 0.9234602887645665, "percentage": 18.47, "elapsed_time": "0:48:05", "remaining_time": "3:32:18", "throughput": 8831.87, "total_tokens": 25485568} +{"current_steps": 37805, "total_steps": 204665, "loss": 0.1456, "lr": 1.95660085286383e-06, "epoch": 0.9235824395964136, "percentage": 18.47, "elapsed_time": "0:48:05", "remaining_time": "3:32:17", "throughput": 8831.91, "total_tokens": 25488704} +{"current_steps": 37810, "total_steps": 204665, "loss": 0.1239, "lr": 1.956575999368918e-06, "epoch": 0.9237045904282608, "percentage": 18.47, "elapsed_time": "0:48:06", "remaining_time": "3:32:17", "throughput": 8831.98, "total_tokens": 25491968} +{"current_steps": 37815, "total_steps": 204665, "loss": 0.1556, "lr": 1.9565511389175273e-06, "epoch": 0.923826741260108, "percentage": 18.48, "elapsed_time": "0:48:06", "remaining_time": "3:32:16", "throughput": 8832.41, "total_tokens": 25496576} +{"current_steps": 37820, "total_steps": 204665, "loss": 0.1001, "lr": 1.9565262715098396e-06, "epoch": 0.9239488920919552, "percentage": 18.48, "elapsed_time": "0:48:07", "remaining_time": "3:32:16", "throughput": 8832.51, "total_tokens": 25499968} +{"current_steps": 37825, "total_steps": 204665, "loss": 0.069, "lr": 1.9565013971460362e-06, "epoch": 0.9240710429238023, "percentage": 18.48, "elapsed_time": "0:48:07", "remaining_time": "3:32:15", "throughput": 8832.58, "total_tokens": 25503232} +{"current_steps": 37830, "total_steps": 204665, "loss": 0.1173, "lr": 1.956476515826297e-06, "epoch": 0.9241931937556495, "percentage": 18.48, "elapsed_time": "0:48:07", "remaining_time": "3:32:15", "throughput": 8832.67, "total_tokens": 25506560} +{"current_steps": 37835, "total_steps": 204665, "loss": 0.15, "lr": 1.9564516275508033e-06, "epoch": 0.9243153445874966, "percentage": 18.49, "elapsed_time": "0:48:08", "remaining_time": "3:32:14", "throughput": 8832.77, "total_tokens": 25509952} +{"current_steps": 37840, "total_steps": 204665, "loss": 0.1084, "lr": 1.956426732319736e-06, "epoch": 0.9244374954193438, "percentage": 18.49, "elapsed_time": "0:48:08", "remaining_time": "3:32:14", "throughput": 8832.9, "total_tokens": 25513408} +{"current_steps": 37845, "total_steps": 204665, "loss": 0.1143, "lr": 1.9564018301332765e-06, "epoch": 0.924559646251191, "percentage": 18.49, "elapsed_time": "0:48:08", "remaining_time": "3:32:13", "throughput": 8833.03, "total_tokens": 25516864} +{"current_steps": 37850, "total_steps": 204665, "loss": 0.206, "lr": 1.9563769209916055e-06, "epoch": 0.9246817970830381, "percentage": 18.49, "elapsed_time": "0:48:09", "remaining_time": "3:32:13", "throughput": 8833.1, "total_tokens": 25520128} +{"current_steps": 37855, "total_steps": 204665, "loss": 0.0725, "lr": 1.9563520048949043e-06, "epoch": 0.9248039479148853, "percentage": 18.5, "elapsed_time": "0:48:09", "remaining_time": "3:32:12", "throughput": 8833.28, "total_tokens": 25523776} +{"current_steps": 37860, "total_steps": 204665, "loss": 0.0724, "lr": 1.956327081843354e-06, "epoch": 0.9249260987467325, "percentage": 18.5, "elapsed_time": "0:48:09", "remaining_time": "3:32:12", "throughput": 8833.29, "total_tokens": 25526784} +{"current_steps": 37865, "total_steps": 204665, "loss": 0.0253, "lr": 1.9563021518371363e-06, "epoch": 0.9250482495785797, "percentage": 18.5, "elapsed_time": "0:48:10", "remaining_time": "3:32:11", "throughput": 8833.42, "total_tokens": 25530240} +{"current_steps": 37870, "total_steps": 204665, "loss": 0.1545, "lr": 1.9562772148764317e-06, "epoch": 0.9251704004104268, "percentage": 18.5, "elapsed_time": "0:48:10", "remaining_time": "3:32:11", "throughput": 8833.7, "total_tokens": 25534208} +{"current_steps": 37875, "total_steps": 204665, "loss": 0.0044, "lr": 1.9562522709614223e-06, "epoch": 0.9252925512422739, "percentage": 18.51, "elapsed_time": "0:48:10", "remaining_time": "3:32:10", "throughput": 8833.73, "total_tokens": 25537344} +{"current_steps": 37880, "total_steps": 204665, "loss": 0.0535, "lr": 1.956227320092289e-06, "epoch": 0.9254147020741211, "percentage": 18.51, "elapsed_time": "0:48:11", "remaining_time": "3:32:10", "throughput": 8834.02, "total_tokens": 25541376} +{"current_steps": 37885, "total_steps": 204665, "loss": 0.0822, "lr": 1.9562023622692132e-06, "epoch": 0.9255368529059683, "percentage": 18.51, "elapsed_time": "0:48:11", "remaining_time": "3:32:09", "throughput": 8834.07, "total_tokens": 25544576} +{"current_steps": 37890, "total_steps": 204665, "loss": 0.0883, "lr": 1.9561773974923774e-06, "epoch": 0.9256590037378154, "percentage": 18.51, "elapsed_time": "0:48:11", "remaining_time": "3:32:09", "throughput": 8834.27, "total_tokens": 25548288} +{"current_steps": 37895, "total_steps": 204665, "loss": 0.0122, "lr": 1.9561524257619617e-06, "epoch": 0.9257811545696626, "percentage": 18.52, "elapsed_time": "0:48:12", "remaining_time": "3:32:08", "throughput": 8834.51, "total_tokens": 25552128} +{"current_steps": 37900, "total_steps": 204665, "loss": 0.1289, "lr": 1.9561274470781485e-06, "epoch": 0.9259033054015098, "percentage": 18.52, "elapsed_time": "0:48:12", "remaining_time": "3:32:08", "throughput": 8834.67, "total_tokens": 25555712} +{"current_steps": 37905, "total_steps": 204665, "loss": 0.3542, "lr": 1.9561024614411197e-06, "epoch": 0.926025456233357, "percentage": 18.52, "elapsed_time": "0:48:13", "remaining_time": "3:32:07", "throughput": 8834.71, "total_tokens": 25558848} +{"current_steps": 37910, "total_steps": 204665, "loss": 0.1869, "lr": 1.956077468851056e-06, "epoch": 0.9261476070652042, "percentage": 18.52, "elapsed_time": "0:48:13", "remaining_time": "3:32:07", "throughput": 8834.8, "total_tokens": 25562176} +{"current_steps": 37915, "total_steps": 204665, "loss": 0.1002, "lr": 1.9560524693081405e-06, "epoch": 0.9262697578970512, "percentage": 18.53, "elapsed_time": "0:48:13", "remaining_time": "3:32:06", "throughput": 8834.82, "total_tokens": 25565248} +{"current_steps": 37920, "total_steps": 204665, "loss": 0.2334, "lr": 1.956027462812554e-06, "epoch": 0.9263919087288984, "percentage": 18.53, "elapsed_time": "0:48:14", "remaining_time": "3:32:05", "throughput": 8834.8, "total_tokens": 25568128} +{"current_steps": 37925, "total_steps": 204665, "loss": 0.0868, "lr": 1.9560024493644786e-06, "epoch": 0.9265140595607456, "percentage": 18.53, "elapsed_time": "0:48:14", "remaining_time": "3:32:05", "throughput": 8834.78, "total_tokens": 25571072} +{"current_steps": 37930, "total_steps": 204665, "loss": 0.1437, "lr": 1.955977428964096e-06, "epoch": 0.9266362103925928, "percentage": 18.53, "elapsed_time": "0:48:14", "remaining_time": "3:32:04", "throughput": 8834.91, "total_tokens": 25574528} +{"current_steps": 37935, "total_steps": 204665, "loss": 0.159, "lr": 1.9559524016115887e-06, "epoch": 0.9267583612244399, "percentage": 18.54, "elapsed_time": "0:48:15", "remaining_time": "3:32:04", "throughput": 8835.01, "total_tokens": 25577920} +{"current_steps": 37940, "total_steps": 204665, "loss": 0.1351, "lr": 1.9559273673071384e-06, "epoch": 0.9268805120562871, "percentage": 18.54, "elapsed_time": "0:48:15", "remaining_time": "3:32:03", "throughput": 8835.1, "total_tokens": 25581312} +{"current_steps": 37945, "total_steps": 204665, "loss": 0.0348, "lr": 1.955902326050927e-06, "epoch": 0.9270026628881343, "percentage": 18.54, "elapsed_time": "0:48:15", "remaining_time": "3:32:03", "throughput": 8835.23, "total_tokens": 25584768} +{"current_steps": 37950, "total_steps": 204665, "loss": 0.0667, "lr": 1.9558772778431373e-06, "epoch": 0.9271248137199815, "percentage": 18.54, "elapsed_time": "0:48:16", "remaining_time": "3:32:02", "throughput": 8835.32, "total_tokens": 25588096} +{"current_steps": 37955, "total_steps": 204665, "loss": 0.0897, "lr": 1.9558522226839506e-06, "epoch": 0.9272469645518286, "percentage": 18.54, "elapsed_time": "0:48:16", "remaining_time": "3:32:02", "throughput": 8835.5, "total_tokens": 25591744} +{"current_steps": 37960, "total_steps": 204665, "loss": 0.1594, "lr": 1.955827160573549e-06, "epoch": 0.9273691153836757, "percentage": 18.55, "elapsed_time": "0:48:16", "remaining_time": "3:32:01", "throughput": 8835.64, "total_tokens": 25595264} +{"current_steps": 37965, "total_steps": 204665, "loss": 0.1122, "lr": 1.9558020915121157e-06, "epoch": 0.9274912662155229, "percentage": 18.55, "elapsed_time": "0:48:17", "remaining_time": "3:32:01", "throughput": 8835.7, "total_tokens": 25598464} +{"current_steps": 37970, "total_steps": 204665, "loss": 0.1329, "lr": 1.9557770154998326e-06, "epoch": 0.9276134170473701, "percentage": 18.55, "elapsed_time": "0:48:17", "remaining_time": "3:32:00", "throughput": 8835.8, "total_tokens": 25601856} +{"current_steps": 37975, "total_steps": 204665, "loss": 0.2071, "lr": 1.9557519325368818e-06, "epoch": 0.9277355678792173, "percentage": 18.55, "elapsed_time": "0:48:17", "remaining_time": "3:32:00", "throughput": 8835.96, "total_tokens": 25605440} +{"current_steps": 37980, "total_steps": 204665, "loss": 0.121, "lr": 1.955726842623446e-06, "epoch": 0.9278577187110644, "percentage": 18.56, "elapsed_time": "0:48:18", "remaining_time": "3:31:59", "throughput": 8836.03, "total_tokens": 25608704} +{"current_steps": 37985, "total_steps": 204665, "loss": 0.1347, "lr": 1.9557017457597073e-06, "epoch": 0.9279798695429116, "percentage": 18.56, "elapsed_time": "0:48:18", "remaining_time": "3:31:59", "throughput": 8836.08, "total_tokens": 25611904} +{"current_steps": 37990, "total_steps": 204665, "loss": 0.1338, "lr": 1.9556766419458487e-06, "epoch": 0.9281020203747588, "percentage": 18.56, "elapsed_time": "0:48:18", "remaining_time": "3:31:58", "throughput": 8836.1, "total_tokens": 25614976} +{"current_steps": 37995, "total_steps": 204665, "loss": 0.0874, "lr": 1.955651531182052e-06, "epoch": 0.9282241712066059, "percentage": 18.56, "elapsed_time": "0:48:19", "remaining_time": "3:31:57", "throughput": 8836.24, "total_tokens": 25618496} +{"current_steps": 38000, "total_steps": 204665, "loss": 0.1874, "lr": 1.955626413468501e-06, "epoch": 0.9283463220384531, "percentage": 18.57, "elapsed_time": "0:48:19", "remaining_time": "3:31:57", "throughput": 8836.24, "total_tokens": 25621504} +{"current_steps": 38005, "total_steps": 204665, "loss": 0.1164, "lr": 1.9556012888053775e-06, "epoch": 0.9284684728703002, "percentage": 18.57, "elapsed_time": "0:48:19", "remaining_time": "3:31:56", "throughput": 8836.39, "total_tokens": 25625088} +{"current_steps": 38010, "total_steps": 204665, "loss": 0.2076, "lr": 1.955576157192864e-06, "epoch": 0.9285906237021474, "percentage": 18.57, "elapsed_time": "0:48:20", "remaining_time": "3:31:56", "throughput": 8836.48, "total_tokens": 25628480} +{"current_steps": 38015, "total_steps": 204665, "loss": 0.135, "lr": 1.9555510186311445e-06, "epoch": 0.9287127745339946, "percentage": 18.57, "elapsed_time": "0:48:20", "remaining_time": "3:31:55", "throughput": 8836.62, "total_tokens": 25632000} +{"current_steps": 38020, "total_steps": 204665, "loss": 0.1191, "lr": 1.9555258731204e-06, "epoch": 0.9288349253658418, "percentage": 18.58, "elapsed_time": "0:48:21", "remaining_time": "3:31:55", "throughput": 8836.69, "total_tokens": 25635264} +{"current_steps": 38025, "total_steps": 204665, "loss": 0.2239, "lr": 1.955500720660815e-06, "epoch": 0.9289570761976889, "percentage": 18.58, "elapsed_time": "0:48:21", "remaining_time": "3:31:54", "throughput": 8836.78, "total_tokens": 25638656} +{"current_steps": 38030, "total_steps": 204665, "loss": 0.0959, "lr": 1.9554755612525716e-06, "epoch": 0.929079227029536, "percentage": 18.58, "elapsed_time": "0:48:21", "remaining_time": "3:31:54", "throughput": 8836.86, "total_tokens": 25641920} +{"current_steps": 38035, "total_steps": 204665, "loss": 0.074, "lr": 1.9554503948958525e-06, "epoch": 0.9292013778613832, "percentage": 18.58, "elapsed_time": "0:48:22", "remaining_time": "3:31:53", "throughput": 8837.08, "total_tokens": 25645760} +{"current_steps": 38040, "total_steps": 204665, "loss": 0.149, "lr": 1.955425221590842e-06, "epoch": 0.9293235286932304, "percentage": 18.59, "elapsed_time": "0:48:22", "remaining_time": "3:31:53", "throughput": 8837.22, "total_tokens": 25649216} +{"current_steps": 38045, "total_steps": 204665, "loss": 0.0588, "lr": 1.9554000413377218e-06, "epoch": 0.9294456795250776, "percentage": 18.59, "elapsed_time": "0:48:22", "remaining_time": "3:31:52", "throughput": 8837.2, "total_tokens": 25652160} +{"current_steps": 38050, "total_steps": 204665, "loss": 0.1236, "lr": 1.9553748541366755e-06, "epoch": 0.9295678303569247, "percentage": 18.59, "elapsed_time": "0:48:23", "remaining_time": "3:31:52", "throughput": 8837.27, "total_tokens": 25655424} +{"current_steps": 38055, "total_steps": 204665, "loss": 0.0575, "lr": 1.9553496599878865e-06, "epoch": 0.9296899811887719, "percentage": 18.59, "elapsed_time": "0:48:23", "remaining_time": "3:31:51", "throughput": 8837.44, "total_tokens": 25659072} +{"current_steps": 38060, "total_steps": 204665, "loss": 0.0297, "lr": 1.9553244588915375e-06, "epoch": 0.9298121320206191, "percentage": 18.6, "elapsed_time": "0:48:23", "remaining_time": "3:31:51", "throughput": 8837.44, "total_tokens": 25662080} +{"current_steps": 38065, "total_steps": 204665, "loss": 0.0378, "lr": 1.9552992508478124e-06, "epoch": 0.9299342828524663, "percentage": 18.6, "elapsed_time": "0:48:24", "remaining_time": "3:31:50", "throughput": 8837.45, "total_tokens": 25665152} +{"current_steps": 38070, "total_steps": 204665, "loss": 0.1424, "lr": 1.955274035856894e-06, "epoch": 0.9300564336843133, "percentage": 18.6, "elapsed_time": "0:48:24", "remaining_time": "3:31:50", "throughput": 8837.63, "total_tokens": 25668800} +{"current_steps": 38075, "total_steps": 204665, "loss": 0.1317, "lr": 1.955248813918966e-06, "epoch": 0.9301785845161605, "percentage": 18.6, "elapsed_time": "0:48:24", "remaining_time": "3:31:49", "throughput": 8837.69, "total_tokens": 25672064} +{"current_steps": 38080, "total_steps": 204665, "loss": 0.073, "lr": 1.9552235850342115e-06, "epoch": 0.9303007353480077, "percentage": 18.61, "elapsed_time": "0:48:25", "remaining_time": "3:31:49", "throughput": 8837.82, "total_tokens": 25675520} +{"current_steps": 38085, "total_steps": 204665, "loss": 0.1656, "lr": 1.955198349202814e-06, "epoch": 0.9304228861798549, "percentage": 18.61, "elapsed_time": "0:48:25", "remaining_time": "3:31:48", "throughput": 8837.92, "total_tokens": 25678912} +{"current_steps": 38090, "total_steps": 204665, "loss": 0.0914, "lr": 1.9551731064249577e-06, "epoch": 0.9305450370117021, "percentage": 18.61, "elapsed_time": "0:48:25", "remaining_time": "3:31:47", "throughput": 8837.97, "total_tokens": 25682112} +{"current_steps": 38095, "total_steps": 204665, "loss": 0.2259, "lr": 1.9551478567008254e-06, "epoch": 0.9306671878435492, "percentage": 18.61, "elapsed_time": "0:48:26", "remaining_time": "3:31:47", "throughput": 8837.97, "total_tokens": 25685120} +{"current_steps": 38100, "total_steps": 204665, "loss": 0.1399, "lr": 1.955122600030601e-06, "epoch": 0.9307893386753964, "percentage": 18.62, "elapsed_time": "0:48:26", "remaining_time": "3:31:46", "throughput": 8837.92, "total_tokens": 25687936} +{"current_steps": 38105, "total_steps": 204665, "loss": 0.2016, "lr": 1.9550973364144683e-06, "epoch": 0.9309114895072436, "percentage": 18.62, "elapsed_time": "0:48:26", "remaining_time": "3:31:46", "throughput": 8838.01, "total_tokens": 25691264} +{"current_steps": 38110, "total_steps": 204665, "loss": 0.2067, "lr": 1.9550720658526106e-06, "epoch": 0.9310336403390908, "percentage": 18.62, "elapsed_time": "0:48:27", "remaining_time": "3:31:45", "throughput": 8838.07, "total_tokens": 25694528} +{"current_steps": 38115, "total_steps": 204665, "loss": 0.0582, "lr": 1.9550467883452123e-06, "epoch": 0.9311557911709378, "percentage": 18.62, "elapsed_time": "0:48:27", "remaining_time": "3:31:45", "throughput": 8838.23, "total_tokens": 25698112} +{"current_steps": 38120, "total_steps": 204665, "loss": 0.1722, "lr": 1.955021503892457e-06, "epoch": 0.931277942002785, "percentage": 18.63, "elapsed_time": "0:48:27", "remaining_time": "3:31:44", "throughput": 8838.31, "total_tokens": 25701440} +{"current_steps": 38125, "total_steps": 204665, "loss": 0.1415, "lr": 1.9549962124945276e-06, "epoch": 0.9314000928346322, "percentage": 18.63, "elapsed_time": "0:48:28", "remaining_time": "3:31:44", "throughput": 8838.37, "total_tokens": 25704640} +{"current_steps": 38130, "total_steps": 204665, "loss": 0.1866, "lr": 1.9549709141516097e-06, "epoch": 0.9315222436664794, "percentage": 18.63, "elapsed_time": "0:48:28", "remaining_time": "3:31:43", "throughput": 8838.44, "total_tokens": 25707904} +{"current_steps": 38135, "total_steps": 204665, "loss": 0.1155, "lr": 1.9549456088638863e-06, "epoch": 0.9316443944983266, "percentage": 18.63, "elapsed_time": "0:48:28", "remaining_time": "3:31:43", "throughput": 8838.44, "total_tokens": 25710912} +{"current_steps": 38140, "total_steps": 204665, "loss": 0.0837, "lr": 1.954920296631541e-06, "epoch": 0.9317665453301737, "percentage": 18.64, "elapsed_time": "0:48:29", "remaining_time": "3:31:42", "throughput": 8838.62, "total_tokens": 25714560} +{"current_steps": 38145, "total_steps": 204665, "loss": 0.0481, "lr": 1.9548949774547593e-06, "epoch": 0.9318886961620209, "percentage": 18.64, "elapsed_time": "0:48:29", "remaining_time": "3:31:42", "throughput": 8838.7, "total_tokens": 25717824} +{"current_steps": 38150, "total_steps": 204665, "loss": 0.1506, "lr": 1.954869651333724e-06, "epoch": 0.932010846993868, "percentage": 18.64, "elapsed_time": "0:48:30", "remaining_time": "3:31:41", "throughput": 8838.71, "total_tokens": 25720896} +{"current_steps": 38155, "total_steps": 204665, "loss": 0.0912, "lr": 1.95484431826862e-06, "epoch": 0.9321329978257152, "percentage": 18.64, "elapsed_time": "0:48:30", "remaining_time": "3:31:40", "throughput": 8838.78, "total_tokens": 25724160} +{"current_steps": 38160, "total_steps": 204665, "loss": 0.0612, "lr": 1.9548189782596308e-06, "epoch": 0.9322551486575623, "percentage": 18.65, "elapsed_time": "0:48:30", "remaining_time": "3:31:40", "throughput": 8838.84, "total_tokens": 25727360} +{"current_steps": 38165, "total_steps": 204665, "loss": 0.134, "lr": 1.9547936313069416e-06, "epoch": 0.9323772994894095, "percentage": 18.65, "elapsed_time": "0:48:31", "remaining_time": "3:31:39", "throughput": 8838.95, "total_tokens": 25730752} +{"current_steps": 38170, "total_steps": 204665, "loss": 0.1049, "lr": 1.9547682774107368e-06, "epoch": 0.9324994503212567, "percentage": 18.65, "elapsed_time": "0:48:31", "remaining_time": "3:31:39", "throughput": 8838.99, "total_tokens": 25733888} +{"current_steps": 38175, "total_steps": 204665, "loss": 0.0668, "lr": 1.954742916571199e-06, "epoch": 0.9326216011531039, "percentage": 18.65, "elapsed_time": "0:48:31", "remaining_time": "3:31:38", "throughput": 8839.09, "total_tokens": 25737216} +{"current_steps": 38180, "total_steps": 204665, "loss": 0.045, "lr": 1.954717548788515e-06, "epoch": 0.932743751984951, "percentage": 18.65, "elapsed_time": "0:48:32", "remaining_time": "3:31:38", "throughput": 8839.05, "total_tokens": 25740096} +{"current_steps": 38185, "total_steps": 204665, "loss": 0.141, "lr": 1.954692174062868e-06, "epoch": 0.9328659028167982, "percentage": 18.66, "elapsed_time": "0:48:32", "remaining_time": "3:31:37", "throughput": 8839.04, "total_tokens": 25743040} +{"current_steps": 38190, "total_steps": 204665, "loss": 0.2554, "lr": 1.9546667923944424e-06, "epoch": 0.9329880536486453, "percentage": 18.66, "elapsed_time": "0:48:32", "remaining_time": "3:31:37", "throughput": 8839.13, "total_tokens": 25746368} +{"current_steps": 38195, "total_steps": 204665, "loss": 0.115, "lr": 1.954641403783423e-06, "epoch": 0.9331102044804925, "percentage": 18.66, "elapsed_time": "0:48:33", "remaining_time": "3:31:36", "throughput": 8839.19, "total_tokens": 25749568} +{"current_steps": 38200, "total_steps": 204665, "loss": 0.1343, "lr": 1.9546160082299952e-06, "epoch": 0.9332323553123397, "percentage": 18.66, "elapsed_time": "0:48:33", "remaining_time": "3:31:36", "throughput": 8839.36, "total_tokens": 25753216} +{"current_steps": 38205, "total_steps": 204665, "loss": 0.1828, "lr": 1.954590605734343e-06, "epoch": 0.9333545061441868, "percentage": 18.67, "elapsed_time": "0:48:33", "remaining_time": "3:31:35", "throughput": 8839.52, "total_tokens": 25756800} +{"current_steps": 38210, "total_steps": 204665, "loss": 0.0638, "lr": 1.9545651962966507e-06, "epoch": 0.933476656976034, "percentage": 18.67, "elapsed_time": "0:48:34", "remaining_time": "3:31:35", "throughput": 8839.54, "total_tokens": 25759872} +{"current_steps": 38215, "total_steps": 204665, "loss": 0.0519, "lr": 1.9545397799171034e-06, "epoch": 0.9335988078078812, "percentage": 18.67, "elapsed_time": "0:48:34", "remaining_time": "3:31:34", "throughput": 8839.69, "total_tokens": 25763456} +{"current_steps": 38220, "total_steps": 204665, "loss": 0.076, "lr": 1.9545143565958865e-06, "epoch": 0.9337209586397284, "percentage": 18.67, "elapsed_time": "0:48:34", "remaining_time": "3:31:33", "throughput": 8839.77, "total_tokens": 25766720} +{"current_steps": 38225, "total_steps": 204665, "loss": 0.0628, "lr": 1.954488926333184e-06, "epoch": 0.9338431094715754, "percentage": 18.68, "elapsed_time": "0:48:35", "remaining_time": "3:31:33", "throughput": 8839.77, "total_tokens": 25769728} +{"current_steps": 38230, "total_steps": 204665, "loss": 0.0699, "lr": 1.954463489129182e-06, "epoch": 0.9339652603034226, "percentage": 18.68, "elapsed_time": "0:48:35", "remaining_time": "3:31:32", "throughput": 8839.86, "total_tokens": 25773056} +{"current_steps": 38235, "total_steps": 204665, "loss": 0.1508, "lr": 1.9544380449840645e-06, "epoch": 0.9340874111352698, "percentage": 18.68, "elapsed_time": "0:48:35", "remaining_time": "3:31:32", "throughput": 8840.02, "total_tokens": 25776640} +{"current_steps": 38240, "total_steps": 204665, "loss": 0.0959, "lr": 1.9544125938980164e-06, "epoch": 0.934209561967117, "percentage": 18.68, "elapsed_time": "0:48:36", "remaining_time": "3:31:31", "throughput": 8840.05, "total_tokens": 25779776} +{"current_steps": 38245, "total_steps": 204665, "loss": 0.0699, "lr": 1.9543871358712237e-06, "epoch": 0.9343317127989642, "percentage": 18.69, "elapsed_time": "0:48:36", "remaining_time": "3:31:31", "throughput": 8840.06, "total_tokens": 25782848} +{"current_steps": 38250, "total_steps": 204665, "loss": 0.0802, "lr": 1.954361670903871e-06, "epoch": 0.9344538636308113, "percentage": 18.69, "elapsed_time": "0:48:36", "remaining_time": "3:31:30", "throughput": 8840.13, "total_tokens": 25786048} +{"current_steps": 38255, "total_steps": 204665, "loss": 0.1007, "lr": 1.9543361989961432e-06, "epoch": 0.9345760144626585, "percentage": 18.69, "elapsed_time": "0:48:37", "remaining_time": "3:31:30", "throughput": 8840.16, "total_tokens": 25789184} +{"current_steps": 38260, "total_steps": 204665, "loss": 0.1763, "lr": 1.954310720148226e-06, "epoch": 0.9346981652945057, "percentage": 18.69, "elapsed_time": "0:48:37", "remaining_time": "3:31:29", "throughput": 8840.3, "total_tokens": 25792704} +{"current_steps": 38265, "total_steps": 204665, "loss": 0.1349, "lr": 1.954285234360305e-06, "epoch": 0.9348203161263529, "percentage": 18.7, "elapsed_time": "0:48:37", "remaining_time": "3:31:29", "throughput": 8840.44, "total_tokens": 25796224} +{"current_steps": 38270, "total_steps": 204665, "loss": 0.262, "lr": 1.9542597416325647e-06, "epoch": 0.9349424669581999, "percentage": 18.7, "elapsed_time": "0:48:38", "remaining_time": "3:31:28", "throughput": 8840.49, "total_tokens": 25799424} +{"current_steps": 38275, "total_steps": 204665, "loss": 0.1736, "lr": 1.954234241965191e-06, "epoch": 0.9350646177900471, "percentage": 18.7, "elapsed_time": "0:48:38", "remaining_time": "3:31:28", "throughput": 8840.61, "total_tokens": 25802880} +{"current_steps": 38280, "total_steps": 204665, "loss": 0.1193, "lr": 1.9542087353583694e-06, "epoch": 0.9351867686218943, "percentage": 18.7, "elapsed_time": "0:48:39", "remaining_time": "3:31:27", "throughput": 8840.75, "total_tokens": 25806400} +{"current_steps": 38285, "total_steps": 204665, "loss": 0.0837, "lr": 1.9541832218122846e-06, "epoch": 0.9353089194537415, "percentage": 18.71, "elapsed_time": "0:48:39", "remaining_time": "3:31:27", "throughput": 8840.87, "total_tokens": 25809856} +{"current_steps": 38290, "total_steps": 204665, "loss": 0.0656, "lr": 1.9541577013271233e-06, "epoch": 0.9354310702855887, "percentage": 18.71, "elapsed_time": "0:48:39", "remaining_time": "3:31:26", "throughput": 8840.91, "total_tokens": 25812992} +{"current_steps": 38295, "total_steps": 204665, "loss": 0.0843, "lr": 1.9541321739030703e-06, "epoch": 0.9355532211174358, "percentage": 18.71, "elapsed_time": "0:48:40", "remaining_time": "3:31:26", "throughput": 8840.94, "total_tokens": 25816128} +{"current_steps": 38300, "total_steps": 204665, "loss": 0.1442, "lr": 1.954106639540312e-06, "epoch": 0.935675371949283, "percentage": 18.71, "elapsed_time": "0:48:40", "remaining_time": "3:31:25", "throughput": 8841.01, "total_tokens": 25819392} +{"current_steps": 38305, "total_steps": 204665, "loss": 0.055, "lr": 1.954081098239033e-06, "epoch": 0.9357975227811302, "percentage": 18.72, "elapsed_time": "0:48:40", "remaining_time": "3:31:24", "throughput": 8841.14, "total_tokens": 25822912} +{"current_steps": 38310, "total_steps": 204665, "loss": 0.2042, "lr": 1.9540555499994197e-06, "epoch": 0.9359196736129773, "percentage": 18.72, "elapsed_time": "0:48:41", "remaining_time": "3:31:24", "throughput": 8841.18, "total_tokens": 25826048} +{"current_steps": 38315, "total_steps": 204665, "loss": 0.2489, "lr": 1.954029994821658e-06, "epoch": 0.9360418244448244, "percentage": 18.72, "elapsed_time": "0:48:41", "remaining_time": "3:31:23", "throughput": 8841.21, "total_tokens": 25829120} +{"current_steps": 38320, "total_steps": 204665, "loss": 0.1075, "lr": 1.9540044327059336e-06, "epoch": 0.9361639752766716, "percentage": 18.72, "elapsed_time": "0:48:41", "remaining_time": "3:31:23", "throughput": 8841.26, "total_tokens": 25832320} +{"current_steps": 38325, "total_steps": 204665, "loss": 0.1052, "lr": 1.9539788636524326e-06, "epoch": 0.9362861261085188, "percentage": 18.73, "elapsed_time": "0:48:42", "remaining_time": "3:31:22", "throughput": 8841.33, "total_tokens": 25835584} +{"current_steps": 38330, "total_steps": 204665, "loss": 0.0676, "lr": 1.9539532876613404e-06, "epoch": 0.936408276940366, "percentage": 18.73, "elapsed_time": "0:48:42", "remaining_time": "3:31:22", "throughput": 8841.38, "total_tokens": 25838720} +{"current_steps": 38335, "total_steps": 204665, "loss": 0.0935, "lr": 1.9539277047328433e-06, "epoch": 0.9365304277722132, "percentage": 18.73, "elapsed_time": "0:48:42", "remaining_time": "3:31:21", "throughput": 8841.55, "total_tokens": 25842304} +{"current_steps": 38340, "total_steps": 204665, "loss": 0.0629, "lr": 1.9539021148671274e-06, "epoch": 0.9366525786040603, "percentage": 18.73, "elapsed_time": "0:48:43", "remaining_time": "3:31:21", "throughput": 8841.6, "total_tokens": 25845504} +{"current_steps": 38345, "total_steps": 204665, "loss": 0.2716, "lr": 1.953876518064379e-06, "epoch": 0.9367747294359074, "percentage": 18.74, "elapsed_time": "0:48:43", "remaining_time": "3:31:20", "throughput": 8841.6, "total_tokens": 25848512} +{"current_steps": 38350, "total_steps": 204665, "loss": 0.0158, "lr": 1.9538509143247834e-06, "epoch": 0.9368968802677546, "percentage": 18.74, "elapsed_time": "0:48:43", "remaining_time": "3:31:20", "throughput": 8841.72, "total_tokens": 25851968} +{"current_steps": 38355, "total_steps": 204665, "loss": 0.0655, "lr": 1.953825303648528e-06, "epoch": 0.9370190310996018, "percentage": 18.74, "elapsed_time": "0:48:44", "remaining_time": "3:31:19", "throughput": 8841.73, "total_tokens": 25854976} +{"current_steps": 38360, "total_steps": 204665, "loss": 0.0877, "lr": 1.9537996860357983e-06, "epoch": 0.9371411819314489, "percentage": 18.74, "elapsed_time": "0:48:44", "remaining_time": "3:31:18", "throughput": 8841.78, "total_tokens": 25858176} +{"current_steps": 38365, "total_steps": 204665, "loss": 0.077, "lr": 1.9537740614867806e-06, "epoch": 0.9372633327632961, "percentage": 18.75, "elapsed_time": "0:48:44", "remaining_time": "3:31:18", "throughput": 8842.03, "total_tokens": 25862080} +{"current_steps": 38370, "total_steps": 204665, "loss": 0.1235, "lr": 1.953748430001661e-06, "epoch": 0.9373854835951433, "percentage": 18.75, "elapsed_time": "0:48:45", "remaining_time": "3:31:18", "throughput": 8842.13, "total_tokens": 25865472} +{"current_steps": 38375, "total_steps": 204665, "loss": 0.2081, "lr": 1.9537227915806273e-06, "epoch": 0.9375076344269905, "percentage": 18.75, "elapsed_time": "0:48:45", "remaining_time": "3:31:17", "throughput": 8842.37, "total_tokens": 25869376} +{"current_steps": 38380, "total_steps": 204665, "loss": 0.0825, "lr": 1.953697146223864e-06, "epoch": 0.9376297852588377, "percentage": 18.75, "elapsed_time": "0:48:45", "remaining_time": "3:31:16", "throughput": 8842.43, "total_tokens": 25872576} +{"current_steps": 38385, "total_steps": 204665, "loss": 0.1068, "lr": 1.953671493931559e-06, "epoch": 0.9377519360906847, "percentage": 18.76, "elapsed_time": "0:48:46", "remaining_time": "3:31:16", "throughput": 8842.49, "total_tokens": 25875776} +{"current_steps": 38390, "total_steps": 204665, "loss": 0.127, "lr": 1.9536458347038986e-06, "epoch": 0.9378740869225319, "percentage": 18.76, "elapsed_time": "0:48:46", "remaining_time": "3:31:15", "throughput": 8842.61, "total_tokens": 25879232} +{"current_steps": 38395, "total_steps": 204665, "loss": 0.1061, "lr": 1.9536201685410687e-06, "epoch": 0.9379962377543791, "percentage": 18.76, "elapsed_time": "0:48:46", "remaining_time": "3:31:15", "throughput": 8842.59, "total_tokens": 25882112} +{"current_steps": 38400, "total_steps": 204665, "loss": 0.0392, "lr": 1.9535944954432564e-06, "epoch": 0.9381183885862263, "percentage": 18.76, "elapsed_time": "0:48:47", "remaining_time": "3:31:14", "throughput": 8842.71, "total_tokens": 25885568} +{"current_steps": 38405, "total_steps": 204665, "loss": 0.0759, "lr": 1.953568815410649e-06, "epoch": 0.9382405394180734, "percentage": 18.76, "elapsed_time": "0:48:47", "remaining_time": "3:31:14", "throughput": 8842.85, "total_tokens": 25889088} +{"current_steps": 38410, "total_steps": 204665, "loss": 0.1701, "lr": 1.953543128443432e-06, "epoch": 0.9383626902499206, "percentage": 18.77, "elapsed_time": "0:48:48", "remaining_time": "3:31:13", "throughput": 8842.95, "total_tokens": 25892480} +{"current_steps": 38415, "total_steps": 204665, "loss": 0.106, "lr": 1.9535174345417936e-06, "epoch": 0.9384848410817678, "percentage": 18.77, "elapsed_time": "0:48:48", "remaining_time": "3:31:13", "throughput": 8842.91, "total_tokens": 25895296} +{"current_steps": 38420, "total_steps": 204665, "loss": 0.152, "lr": 1.9534917337059194e-06, "epoch": 0.938606991913615, "percentage": 18.77, "elapsed_time": "0:48:48", "remaining_time": "3:31:12", "throughput": 8843.08, "total_tokens": 25898944} +{"current_steps": 38425, "total_steps": 204665, "loss": 0.2075, "lr": 1.9534660259359976e-06, "epoch": 0.938729142745462, "percentage": 18.77, "elapsed_time": "0:48:49", "remaining_time": "3:31:12", "throughput": 8843.16, "total_tokens": 25902208} +{"current_steps": 38430, "total_steps": 204665, "loss": 0.0861, "lr": 1.9534403112322137e-06, "epoch": 0.9388512935773092, "percentage": 18.78, "elapsed_time": "0:48:49", "remaining_time": "3:31:11", "throughput": 8843.16, "total_tokens": 25905216} +{"current_steps": 38435, "total_steps": 204665, "loss": 0.1235, "lr": 1.9534145895947557e-06, "epoch": 0.9389734444091564, "percentage": 18.78, "elapsed_time": "0:48:49", "remaining_time": "3:31:11", "throughput": 8843.2, "total_tokens": 25908352} +{"current_steps": 38440, "total_steps": 204665, "loss": 0.1441, "lr": 1.95338886102381e-06, "epoch": 0.9390955952410036, "percentage": 18.78, "elapsed_time": "0:48:50", "remaining_time": "3:31:10", "throughput": 8843.41, "total_tokens": 25912128} +{"current_steps": 38445, "total_steps": 204665, "loss": 0.0406, "lr": 1.9533631255195643e-06, "epoch": 0.9392177460728508, "percentage": 18.78, "elapsed_time": "0:48:50", "remaining_time": "3:31:10", "throughput": 8843.49, "total_tokens": 25915456} +{"current_steps": 38450, "total_steps": 204665, "loss": 0.0632, "lr": 1.9533373830822056e-06, "epoch": 0.9393398969046979, "percentage": 18.79, "elapsed_time": "0:48:50", "remaining_time": "3:31:09", "throughput": 8843.58, "total_tokens": 25918784} +{"current_steps": 38455, "total_steps": 204665, "loss": 0.1228, "lr": 1.953311633711921e-06, "epoch": 0.9394620477365451, "percentage": 18.79, "elapsed_time": "0:48:51", "remaining_time": "3:31:08", "throughput": 8843.65, "total_tokens": 25922048} +{"current_steps": 38460, "total_steps": 204665, "loss": 0.0834, "lr": 1.953285877408898e-06, "epoch": 0.9395841985683923, "percentage": 18.79, "elapsed_time": "0:48:51", "remaining_time": "3:31:08", "throughput": 8843.67, "total_tokens": 25925120} +{"current_steps": 38465, "total_steps": 204665, "loss": 0.0435, "lr": 1.9532601141733232e-06, "epoch": 0.9397063494002394, "percentage": 18.79, "elapsed_time": "0:48:51", "remaining_time": "3:31:07", "throughput": 8843.87, "total_tokens": 25928832} +{"current_steps": 38470, "total_steps": 204665, "loss": 0.0766, "lr": 1.953234344005385e-06, "epoch": 0.9398285002320865, "percentage": 18.8, "elapsed_time": "0:48:52", "remaining_time": "3:31:07", "throughput": 8843.97, "total_tokens": 25932224} +{"current_steps": 38475, "total_steps": 204665, "loss": 0.1421, "lr": 1.95320856690527e-06, "epoch": 0.9399506510639337, "percentage": 18.8, "elapsed_time": "0:48:52", "remaining_time": "3:31:06", "throughput": 8844.08, "total_tokens": 25935616} +{"current_steps": 38480, "total_steps": 204665, "loss": 0.147, "lr": 1.953182782873166e-06, "epoch": 0.9400728018957809, "percentage": 18.8, "elapsed_time": "0:48:52", "remaining_time": "3:31:06", "throughput": 8844.1, "total_tokens": 25938688} +{"current_steps": 38485, "total_steps": 204665, "loss": 0.0807, "lr": 1.95315699190926e-06, "epoch": 0.9401949527276281, "percentage": 18.8, "elapsed_time": "0:48:53", "remaining_time": "3:31:05", "throughput": 8844.21, "total_tokens": 25942080} +{"current_steps": 38490, "total_steps": 204665, "loss": 0.1887, "lr": 1.9531311940137404e-06, "epoch": 0.9403171035594753, "percentage": 18.81, "elapsed_time": "0:48:53", "remaining_time": "3:31:05", "throughput": 8844.28, "total_tokens": 25945344} +{"current_steps": 38495, "total_steps": 204665, "loss": 0.1246, "lr": 1.9531053891867944e-06, "epoch": 0.9404392543913224, "percentage": 18.81, "elapsed_time": "0:48:53", "remaining_time": "3:31:04", "throughput": 8844.47, "total_tokens": 25949056} +{"current_steps": 38500, "total_steps": 204665, "loss": 0.072, "lr": 1.9530795774286096e-06, "epoch": 0.9405614052231696, "percentage": 18.81, "elapsed_time": "0:48:54", "remaining_time": "3:31:04", "throughput": 8844.63, "total_tokens": 25952640} +{"current_steps": 38505, "total_steps": 204665, "loss": 0.1588, "lr": 1.9530537587393735e-06, "epoch": 0.9406835560550167, "percentage": 18.81, "elapsed_time": "0:48:54", "remaining_time": "3:31:03", "throughput": 8844.67, "total_tokens": 25955776} +{"current_steps": 38510, "total_steps": 204665, "loss": 0.0621, "lr": 1.9530279331192747e-06, "epoch": 0.9408057068868639, "percentage": 18.82, "elapsed_time": "0:48:54", "remaining_time": "3:31:03", "throughput": 8844.72, "total_tokens": 25958976} +{"current_steps": 38515, "total_steps": 204665, "loss": 0.0771, "lr": 1.9530021005685e-06, "epoch": 0.940927857718711, "percentage": 18.82, "elapsed_time": "0:48:55", "remaining_time": "3:31:02", "throughput": 8844.89, "total_tokens": 25962624} +{"current_steps": 38520, "total_steps": 204665, "loss": 0.0954, "lr": 1.952976261087238e-06, "epoch": 0.9410500085505582, "percentage": 18.82, "elapsed_time": "0:48:55", "remaining_time": "3:31:02", "throughput": 8844.94, "total_tokens": 25965824} +{"current_steps": 38525, "total_steps": 204665, "loss": 0.0415, "lr": 1.9529504146756757e-06, "epoch": 0.9411721593824054, "percentage": 18.82, "elapsed_time": "0:48:56", "remaining_time": "3:31:01", "throughput": 8845.13, "total_tokens": 25969536} +{"current_steps": 38530, "total_steps": 204665, "loss": 0.0443, "lr": 1.952924561334002e-06, "epoch": 0.9412943102142526, "percentage": 18.83, "elapsed_time": "0:48:56", "remaining_time": "3:31:01", "throughput": 8845.16, "total_tokens": 25972672} +{"current_steps": 38535, "total_steps": 204665, "loss": 0.1361, "lr": 1.952898701062405e-06, "epoch": 0.9414164610460998, "percentage": 18.83, "elapsed_time": "0:48:56", "remaining_time": "3:31:00", "throughput": 8845.24, "total_tokens": 25976000} +{"current_steps": 38540, "total_steps": 204665, "loss": 0.128, "lr": 1.952872833861072e-06, "epoch": 0.9415386118779469, "percentage": 18.83, "elapsed_time": "0:48:57", "remaining_time": "3:31:00", "throughput": 8845.46, "total_tokens": 25979840} +{"current_steps": 38545, "total_steps": 204665, "loss": 0.0814, "lr": 1.9528469597301915e-06, "epoch": 0.941660762709794, "percentage": 18.83, "elapsed_time": "0:48:57", "remaining_time": "3:30:59", "throughput": 8845.43, "total_tokens": 25982720} +{"current_steps": 38550, "total_steps": 204665, "loss": 0.0383, "lr": 1.9528210786699516e-06, "epoch": 0.9417829135416412, "percentage": 18.84, "elapsed_time": "0:48:57", "remaining_time": "3:30:59", "throughput": 8845.53, "total_tokens": 25986112} +{"current_steps": 38555, "total_steps": 204665, "loss": 0.1121, "lr": 1.9527951906805405e-06, "epoch": 0.9419050643734884, "percentage": 18.84, "elapsed_time": "0:48:58", "remaining_time": "3:30:58", "throughput": 8845.6, "total_tokens": 25989376} +{"current_steps": 38560, "total_steps": 204665, "loss": 0.051, "lr": 1.9527692957621466e-06, "epoch": 0.9420272152053355, "percentage": 18.84, "elapsed_time": "0:48:58", "remaining_time": "3:30:58", "throughput": 8845.72, "total_tokens": 25992832} +{"current_steps": 38565, "total_steps": 204665, "loss": 0.1363, "lr": 1.952743393914958e-06, "epoch": 0.9421493660371827, "percentage": 18.84, "elapsed_time": "0:48:58", "remaining_time": "3:30:57", "throughput": 8845.81, "total_tokens": 25996160} +{"current_steps": 38570, "total_steps": 204665, "loss": 0.0882, "lr": 1.952717485139163e-06, "epoch": 0.9422715168690299, "percentage": 18.85, "elapsed_time": "0:48:59", "remaining_time": "3:30:56", "throughput": 8845.88, "total_tokens": 25999424} +{"current_steps": 38575, "total_steps": 204665, "loss": 0.2171, "lr": 1.9526915694349508e-06, "epoch": 0.9423936677008771, "percentage": 18.85, "elapsed_time": "0:48:59", "remaining_time": "3:30:56", "throughput": 8846.1, "total_tokens": 26003200} +{"current_steps": 38580, "total_steps": 204665, "loss": 0.1181, "lr": 1.9526656468025087e-06, "epoch": 0.9425158185327243, "percentage": 18.85, "elapsed_time": "0:48:59", "remaining_time": "3:30:55", "throughput": 8846.15, "total_tokens": 26006400} +{"current_steps": 38585, "total_steps": 204665, "loss": 0.1448, "lr": 1.9526397172420262e-06, "epoch": 0.9426379693645713, "percentage": 18.85, "elapsed_time": "0:49:00", "remaining_time": "3:30:55", "throughput": 8846.47, "total_tokens": 26010560} +{"current_steps": 38590, "total_steps": 204665, "loss": 0.1124, "lr": 1.9526137807536914e-06, "epoch": 0.9427601201964185, "percentage": 18.86, "elapsed_time": "0:49:00", "remaining_time": "3:30:54", "throughput": 8846.58, "total_tokens": 26013952} +{"current_steps": 38595, "total_steps": 204665, "loss": 0.1023, "lr": 1.9525878373376925e-06, "epoch": 0.9428822710282657, "percentage": 18.86, "elapsed_time": "0:49:00", "remaining_time": "3:30:54", "throughput": 8846.73, "total_tokens": 26017536} +{"current_steps": 38600, "total_steps": 204665, "loss": 0.0568, "lr": 1.952561886994219e-06, "epoch": 0.9430044218601129, "percentage": 18.86, "elapsed_time": "0:49:01", "remaining_time": "3:30:53", "throughput": 8846.84, "total_tokens": 26020992} +{"current_steps": 38605, "total_steps": 204665, "loss": 0.1432, "lr": 1.952535929723459e-06, "epoch": 0.94312657269196, "percentage": 18.86, "elapsed_time": "0:49:01", "remaining_time": "3:30:53", "throughput": 8846.88, "total_tokens": 26024192} +{"current_steps": 38610, "total_steps": 204665, "loss": 0.1304, "lr": 1.9525099655256017e-06, "epoch": 0.9432487235238072, "percentage": 18.86, "elapsed_time": "0:49:01", "remaining_time": "3:30:52", "throughput": 8846.9, "total_tokens": 26027264} +{"current_steps": 38615, "total_steps": 204665, "loss": 0.2119, "lr": 1.9524839944008356e-06, "epoch": 0.9433708743556544, "percentage": 18.87, "elapsed_time": "0:49:02", "remaining_time": "3:30:52", "throughput": 8847.05, "total_tokens": 26030784} +{"current_steps": 38620, "total_steps": 204665, "loss": 0.1403, "lr": 1.9524580163493504e-06, "epoch": 0.9434930251875016, "percentage": 18.87, "elapsed_time": "0:49:02", "remaining_time": "3:30:51", "throughput": 8847.34, "total_tokens": 26034880} +{"current_steps": 38625, "total_steps": 204665, "loss": 0.1103, "lr": 1.9524320313713333e-06, "epoch": 0.9436151760193487, "percentage": 18.87, "elapsed_time": "0:49:03", "remaining_time": "3:30:51", "throughput": 8847.49, "total_tokens": 26038464} +{"current_steps": 38630, "total_steps": 204665, "loss": 0.1217, "lr": 1.952406039466975e-06, "epoch": 0.9437373268511958, "percentage": 18.87, "elapsed_time": "0:49:03", "remaining_time": "3:30:50", "throughput": 8847.62, "total_tokens": 26041920} +{"current_steps": 38635, "total_steps": 204665, "loss": 0.0789, "lr": 1.9523800406364637e-06, "epoch": 0.943859477683043, "percentage": 18.88, "elapsed_time": "0:49:03", "remaining_time": "3:30:50", "throughput": 8847.73, "total_tokens": 26045376} +{"current_steps": 38640, "total_steps": 204665, "loss": 0.1065, "lr": 1.952354034879988e-06, "epoch": 0.9439816285148902, "percentage": 18.88, "elapsed_time": "0:49:04", "remaining_time": "3:30:49", "throughput": 8847.78, "total_tokens": 26048576} +{"current_steps": 38645, "total_steps": 204665, "loss": 0.0972, "lr": 1.9523280221977383e-06, "epoch": 0.9441037793467374, "percentage": 18.88, "elapsed_time": "0:49:04", "remaining_time": "3:30:49", "throughput": 8847.81, "total_tokens": 26051712} +{"current_steps": 38650, "total_steps": 204665, "loss": 0.1515, "lr": 1.9523020025899027e-06, "epoch": 0.9442259301785845, "percentage": 18.88, "elapsed_time": "0:49:04", "remaining_time": "3:30:48", "throughput": 8847.84, "total_tokens": 26054784} +{"current_steps": 38655, "total_steps": 204665, "loss": 0.0833, "lr": 1.952275976056671e-06, "epoch": 0.9443480810104317, "percentage": 18.89, "elapsed_time": "0:49:05", "remaining_time": "3:30:48", "throughput": 8847.84, "total_tokens": 26057792} +{"current_steps": 38660, "total_steps": 204665, "loss": 0.1098, "lr": 1.9522499425982325e-06, "epoch": 0.9444702318422789, "percentage": 18.89, "elapsed_time": "0:49:05", "remaining_time": "3:30:47", "throughput": 8847.93, "total_tokens": 26061120} +{"current_steps": 38665, "total_steps": 204665, "loss": 0.0668, "lr": 1.9522239022147756e-06, "epoch": 0.944592382674126, "percentage": 18.89, "elapsed_time": "0:49:05", "remaining_time": "3:30:47", "throughput": 8847.93, "total_tokens": 26064128} +{"current_steps": 38670, "total_steps": 204665, "loss": 0.1106, "lr": 1.952197854906491e-06, "epoch": 0.9447145335059732, "percentage": 18.89, "elapsed_time": "0:49:06", "remaining_time": "3:30:46", "throughput": 8848.12, "total_tokens": 26067840} +{"current_steps": 38675, "total_steps": 204665, "loss": 0.1118, "lr": 1.9521718006735673e-06, "epoch": 0.9448366843378203, "percentage": 18.9, "elapsed_time": "0:49:06", "remaining_time": "3:30:46", "throughput": 8848.18, "total_tokens": 26071040} +{"current_steps": 38680, "total_steps": 204665, "loss": 0.0134, "lr": 1.952145739516194e-06, "epoch": 0.9449588351696675, "percentage": 18.9, "elapsed_time": "0:49:06", "remaining_time": "3:30:45", "throughput": 8848.38, "total_tokens": 26074752} +{"current_steps": 38685, "total_steps": 204665, "loss": 0.1698, "lr": 1.9521196714345607e-06, "epoch": 0.9450809860015147, "percentage": 18.9, "elapsed_time": "0:49:07", "remaining_time": "3:30:45", "throughput": 8848.47, "total_tokens": 26078080} +{"current_steps": 38690, "total_steps": 204665, "loss": 0.0761, "lr": 1.9520935964288574e-06, "epoch": 0.9452031368333619, "percentage": 18.9, "elapsed_time": "0:49:07", "remaining_time": "3:30:44", "throughput": 8848.85, "total_tokens": 26082496} +{"current_steps": 38695, "total_steps": 204665, "loss": 0.0998, "lr": 1.9520675144992734e-06, "epoch": 0.945325287665209, "percentage": 18.91, "elapsed_time": "0:49:07", "remaining_time": "3:30:44", "throughput": 8848.97, "total_tokens": 26085952} +{"current_steps": 38700, "total_steps": 204665, "loss": 0.0951, "lr": 1.952041425645998e-06, "epoch": 0.9454474384970561, "percentage": 18.91, "elapsed_time": "0:49:08", "remaining_time": "3:30:43", "throughput": 8849.07, "total_tokens": 26089280} +{"current_steps": 38705, "total_steps": 204665, "loss": 0.1408, "lr": 1.9520153298692215e-06, "epoch": 0.9455695893289033, "percentage": 18.91, "elapsed_time": "0:49:08", "remaining_time": "3:30:43", "throughput": 8849.09, "total_tokens": 26092352} +{"current_steps": 38710, "total_steps": 204665, "loss": 0.1871, "lr": 1.9519892271691335e-06, "epoch": 0.9456917401607505, "percentage": 18.91, "elapsed_time": "0:49:08", "remaining_time": "3:30:42", "throughput": 8849.23, "total_tokens": 26095872} +{"current_steps": 38715, "total_steps": 204665, "loss": 0.1672, "lr": 1.951963117545924e-06, "epoch": 0.9458138909925976, "percentage": 18.92, "elapsed_time": "0:49:09", "remaining_time": "3:30:41", "throughput": 8849.32, "total_tokens": 26099200} +{"current_steps": 38720, "total_steps": 204665, "loss": 0.1707, "lr": 1.9519370009997825e-06, "epoch": 0.9459360418244448, "percentage": 18.92, "elapsed_time": "0:49:09", "remaining_time": "3:30:41", "throughput": 8849.35, "total_tokens": 26102272} +{"current_steps": 38725, "total_steps": 204665, "loss": 0.0986, "lr": 1.951910877530899e-06, "epoch": 0.946058192656292, "percentage": 18.92, "elapsed_time": "0:49:09", "remaining_time": "3:30:40", "throughput": 8849.48, "total_tokens": 26105792} +{"current_steps": 38730, "total_steps": 204665, "loss": 0.0627, "lr": 1.9518847471394633e-06, "epoch": 0.9461803434881392, "percentage": 18.92, "elapsed_time": "0:49:10", "remaining_time": "3:30:40", "throughput": 8849.74, "total_tokens": 26109760} +{"current_steps": 38735, "total_steps": 204665, "loss": 0.0619, "lr": 1.951858609825666e-06, "epoch": 0.9463024943199864, "percentage": 18.93, "elapsed_time": "0:49:10", "remaining_time": "3:30:39", "throughput": 8849.8, "total_tokens": 26112960} +{"current_steps": 38740, "total_steps": 204665, "loss": 0.0992, "lr": 1.9518324655896967e-06, "epoch": 0.9464246451518334, "percentage": 18.93, "elapsed_time": "0:49:11", "remaining_time": "3:30:39", "throughput": 8849.8, "total_tokens": 26115968} +{"current_steps": 38745, "total_steps": 204665, "loss": 0.1284, "lr": 1.9518063144317457e-06, "epoch": 0.9465467959836806, "percentage": 18.93, "elapsed_time": "0:49:11", "remaining_time": "3:30:38", "throughput": 8849.91, "total_tokens": 26119360} +{"current_steps": 38750, "total_steps": 204665, "loss": 0.0379, "lr": 1.9517801563520037e-06, "epoch": 0.9466689468155278, "percentage": 18.93, "elapsed_time": "0:49:11", "remaining_time": "3:30:38", "throughput": 8850.03, "total_tokens": 26122816} +{"current_steps": 38755, "total_steps": 204665, "loss": 0.0435, "lr": 1.95175399135066e-06, "epoch": 0.946791097647375, "percentage": 18.94, "elapsed_time": "0:49:12", "remaining_time": "3:30:37", "throughput": 8850.18, "total_tokens": 26126400} +{"current_steps": 38760, "total_steps": 204665, "loss": 0.1659, "lr": 1.951727819427905e-06, "epoch": 0.9469132484792221, "percentage": 18.94, "elapsed_time": "0:49:12", "remaining_time": "3:30:37", "throughput": 8850.2, "total_tokens": 26129472} +{"current_steps": 38765, "total_steps": 204665, "loss": 0.2924, "lr": 1.9517016405839296e-06, "epoch": 0.9470353993110693, "percentage": 18.94, "elapsed_time": "0:49:12", "remaining_time": "3:30:36", "throughput": 8850.25, "total_tokens": 26132672} +{"current_steps": 38770, "total_steps": 204665, "loss": 0.1363, "lr": 1.951675454818924e-06, "epoch": 0.9471575501429165, "percentage": 18.94, "elapsed_time": "0:49:13", "remaining_time": "3:30:36", "throughput": 8850.35, "total_tokens": 26136064} +{"current_steps": 38775, "total_steps": 204665, "loss": 0.193, "lr": 1.9516492621330785e-06, "epoch": 0.9472797009747637, "percentage": 18.95, "elapsed_time": "0:49:13", "remaining_time": "3:30:35", "throughput": 8850.48, "total_tokens": 26139520} +{"current_steps": 38780, "total_steps": 204665, "loss": 0.1092, "lr": 1.9516230625265835e-06, "epoch": 0.9474018518066109, "percentage": 18.95, "elapsed_time": "0:49:13", "remaining_time": "3:30:35", "throughput": 8850.71, "total_tokens": 26143360} +{"current_steps": 38785, "total_steps": 204665, "loss": 0.1376, "lr": 1.9515968559996295e-06, "epoch": 0.9475240026384579, "percentage": 18.95, "elapsed_time": "0:49:14", "remaining_time": "3:30:34", "throughput": 8850.78, "total_tokens": 26146624} +{"current_steps": 38790, "total_steps": 204665, "loss": 0.1381, "lr": 1.9515706425524075e-06, "epoch": 0.9476461534703051, "percentage": 18.95, "elapsed_time": "0:49:14", "remaining_time": "3:30:34", "throughput": 8850.86, "total_tokens": 26149952} +{"current_steps": 38795, "total_steps": 204665, "loss": 0.1833, "lr": 1.9515444221851075e-06, "epoch": 0.9477683043021523, "percentage": 18.96, "elapsed_time": "0:49:14", "remaining_time": "3:30:33", "throughput": 8850.91, "total_tokens": 26153088} +{"current_steps": 38800, "total_steps": 204665, "loss": 0.0472, "lr": 1.951518194897921e-06, "epoch": 0.9478904551339995, "percentage": 18.96, "elapsed_time": "0:49:15", "remaining_time": "3:30:33", "throughput": 8851.24, "total_tokens": 26157312} +{"current_steps": 38805, "total_steps": 204665, "loss": 0.062, "lr": 1.9514919606910378e-06, "epoch": 0.9480126059658466, "percentage": 18.96, "elapsed_time": "0:49:15", "remaining_time": "3:30:32", "throughput": 8851.24, "total_tokens": 26160320} +{"current_steps": 38810, "total_steps": 204665, "loss": 0.0837, "lr": 1.9514657195646492e-06, "epoch": 0.9481347567976938, "percentage": 18.96, "elapsed_time": "0:49:15", "remaining_time": "3:30:32", "throughput": 8851.42, "total_tokens": 26163968} +{"current_steps": 38815, "total_steps": 204665, "loss": 0.0822, "lr": 1.9514394715189464e-06, "epoch": 0.948256907629541, "percentage": 18.97, "elapsed_time": "0:49:16", "remaining_time": "3:30:31", "throughput": 8851.51, "total_tokens": 26167296} +{"current_steps": 38820, "total_steps": 204665, "loss": 0.0713, "lr": 1.9514132165541194e-06, "epoch": 0.9483790584613881, "percentage": 18.97, "elapsed_time": "0:49:16", "remaining_time": "3:30:31", "throughput": 8851.65, "total_tokens": 26170816} +{"current_steps": 38825, "total_steps": 204665, "loss": 0.109, "lr": 1.95138695467036e-06, "epoch": 0.9485012092932353, "percentage": 18.97, "elapsed_time": "0:49:16", "remaining_time": "3:30:30", "throughput": 8851.89, "total_tokens": 26174720} +{"current_steps": 38830, "total_steps": 204665, "loss": 0.073, "lr": 1.951360685867858e-06, "epoch": 0.9486233601250824, "percentage": 18.97, "elapsed_time": "0:49:17", "remaining_time": "3:30:30", "throughput": 8851.96, "total_tokens": 26177984} +{"current_steps": 38835, "total_steps": 204665, "loss": 0.0984, "lr": 1.951334410146806e-06, "epoch": 0.9487455109569296, "percentage": 18.97, "elapsed_time": "0:49:17", "remaining_time": "3:30:29", "throughput": 8852.11, "total_tokens": 26181568} +{"current_steps": 38840, "total_steps": 204665, "loss": 0.1195, "lr": 1.951308127507394e-06, "epoch": 0.9488676617887768, "percentage": 18.98, "elapsed_time": "0:49:18", "remaining_time": "3:30:29", "throughput": 8852.24, "total_tokens": 26185024} +{"current_steps": 38845, "total_steps": 204665, "loss": 0.1248, "lr": 1.9512818379498135e-06, "epoch": 0.948989812620624, "percentage": 18.98, "elapsed_time": "0:49:18", "remaining_time": "3:30:28", "throughput": 8852.32, "total_tokens": 26188352} +{"current_steps": 38850, "total_steps": 204665, "loss": 0.0851, "lr": 1.9512555414742557e-06, "epoch": 0.9491119634524711, "percentage": 18.98, "elapsed_time": "0:49:18", "remaining_time": "3:30:27", "throughput": 8852.4, "total_tokens": 26191616} +{"current_steps": 38855, "total_steps": 204665, "loss": 0.1412, "lr": 1.9512292380809116e-06, "epoch": 0.9492341142843183, "percentage": 18.98, "elapsed_time": "0:49:19", "remaining_time": "3:30:27", "throughput": 8852.73, "total_tokens": 26195840} +{"current_steps": 38860, "total_steps": 204665, "loss": 0.1485, "lr": 1.9512029277699726e-06, "epoch": 0.9493562651161654, "percentage": 18.99, "elapsed_time": "0:49:19", "remaining_time": "3:30:27", "throughput": 8852.85, "total_tokens": 26199296} +{"current_steps": 38865, "total_steps": 204665, "loss": 0.2412, "lr": 1.95117661054163e-06, "epoch": 0.9494784159480126, "percentage": 18.99, "elapsed_time": "0:49:19", "remaining_time": "3:30:26", "throughput": 8852.85, "total_tokens": 26202304} +{"current_steps": 38870, "total_steps": 204665, "loss": 0.1503, "lr": 1.9511502863960755e-06, "epoch": 0.9496005667798598, "percentage": 18.99, "elapsed_time": "0:49:20", "remaining_time": "3:30:25", "throughput": 8852.93, "total_tokens": 26205568} +{"current_steps": 38875, "total_steps": 204665, "loss": 0.0839, "lr": 1.9511239553334998e-06, "epoch": 0.9497227176117069, "percentage": 18.99, "elapsed_time": "0:49:20", "remaining_time": "3:30:25", "throughput": 8853.04, "total_tokens": 26208960} +{"current_steps": 38880, "total_steps": 204665, "loss": 0.0943, "lr": 1.9510976173540953e-06, "epoch": 0.9498448684435541, "percentage": 19.0, "elapsed_time": "0:49:20", "remaining_time": "3:30:24", "throughput": 8853.06, "total_tokens": 26212032} +{"current_steps": 38885, "total_steps": 204665, "loss": 0.1021, "lr": 1.951071272458053e-06, "epoch": 0.9499670192754013, "percentage": 19.0, "elapsed_time": "0:49:21", "remaining_time": "3:30:24", "throughput": 8853.34, "total_tokens": 26216064} +{"current_steps": 38890, "total_steps": 204665, "loss": 0.0941, "lr": 1.9510449206455644e-06, "epoch": 0.9500891701072485, "percentage": 19.0, "elapsed_time": "0:49:21", "remaining_time": "3:30:23", "throughput": 8853.35, "total_tokens": 26219136} +{"current_steps": 38895, "total_steps": 204665, "loss": 0.0121, "lr": 1.9510185619168216e-06, "epoch": 0.9502113209390955, "percentage": 19.0, "elapsed_time": "0:49:21", "remaining_time": "3:30:23", "throughput": 8853.41, "total_tokens": 26222336} +{"current_steps": 38900, "total_steps": 204665, "loss": 0.1273, "lr": 1.9509921962720163e-06, "epoch": 0.9503334717709427, "percentage": 19.01, "elapsed_time": "0:49:22", "remaining_time": "3:30:22", "throughput": 8853.43, "total_tokens": 26225408} +{"current_steps": 38905, "total_steps": 204665, "loss": 0.0294, "lr": 1.9509658237113394e-06, "epoch": 0.9504556226027899, "percentage": 19.01, "elapsed_time": "0:49:22", "remaining_time": "3:30:22", "throughput": 8853.44, "total_tokens": 26228480} +{"current_steps": 38910, "total_steps": 204665, "loss": 0.1051, "lr": 1.9509394442349836e-06, "epoch": 0.9505777734346371, "percentage": 19.01, "elapsed_time": "0:49:22", "remaining_time": "3:30:21", "throughput": 8853.44, "total_tokens": 26231488} +{"current_steps": 38915, "total_steps": 204665, "loss": 0.1074, "lr": 1.9509130578431405e-06, "epoch": 0.9506999242664843, "percentage": 19.01, "elapsed_time": "0:49:23", "remaining_time": "3:30:21", "throughput": 8853.56, "total_tokens": 26234944} +{"current_steps": 38920, "total_steps": 204665, "loss": 0.1077, "lr": 1.9508866645360018e-06, "epoch": 0.9508220750983314, "percentage": 19.02, "elapsed_time": "0:49:23", "remaining_time": "3:30:20", "throughput": 8853.62, "total_tokens": 26238208} +{"current_steps": 38925, "total_steps": 204665, "loss": 0.1287, "lr": 1.9508602643137593e-06, "epoch": 0.9509442259301786, "percentage": 19.02, "elapsed_time": "0:49:23", "remaining_time": "3:30:20", "throughput": 8853.65, "total_tokens": 26241344} +{"current_steps": 38930, "total_steps": 204665, "loss": 0.0629, "lr": 1.950833857176605e-06, "epoch": 0.9510663767620258, "percentage": 19.02, "elapsed_time": "0:49:24", "remaining_time": "3:30:19", "throughput": 8853.71, "total_tokens": 26244544} +{"current_steps": 38935, "total_steps": 204665, "loss": 0.0859, "lr": 1.9508074431247316e-06, "epoch": 0.951188527593873, "percentage": 19.02, "elapsed_time": "0:49:24", "remaining_time": "3:30:19", "throughput": 8853.76, "total_tokens": 26247744} +{"current_steps": 38940, "total_steps": 204665, "loss": 0.1654, "lr": 1.950781022158331e-06, "epoch": 0.95131067842572, "percentage": 19.03, "elapsed_time": "0:49:24", "remaining_time": "3:30:18", "throughput": 8853.94, "total_tokens": 26251392} +{"current_steps": 38945, "total_steps": 204665, "loss": 0.0961, "lr": 1.950754594277594e-06, "epoch": 0.9514328292575672, "percentage": 19.03, "elapsed_time": "0:49:25", "remaining_time": "3:30:17", "throughput": 8854.0, "total_tokens": 26254592} +{"current_steps": 38950, "total_steps": 204665, "loss": 0.0185, "lr": 1.9507281594827147e-06, "epoch": 0.9515549800894144, "percentage": 19.03, "elapsed_time": "0:49:25", "remaining_time": "3:30:17", "throughput": 8854.15, "total_tokens": 26258176} +{"current_steps": 38955, "total_steps": 204665, "loss": 0.0591, "lr": 1.9507017177738845e-06, "epoch": 0.9516771309212616, "percentage": 19.03, "elapsed_time": "0:49:25", "remaining_time": "3:30:16", "throughput": 8854.34, "total_tokens": 26261888} +{"current_steps": 38960, "total_steps": 204665, "loss": 0.0263, "lr": 1.9506752691512955e-06, "epoch": 0.9517992817531087, "percentage": 19.04, "elapsed_time": "0:49:26", "remaining_time": "3:30:16", "throughput": 8854.3, "total_tokens": 26264768} +{"current_steps": 38965, "total_steps": 204665, "loss": 0.0116, "lr": 1.9506488136151403e-06, "epoch": 0.9519214325849559, "percentage": 19.04, "elapsed_time": "0:49:26", "remaining_time": "3:30:15", "throughput": 8854.39, "total_tokens": 26268096} +{"current_steps": 38970, "total_steps": 204665, "loss": 0.3299, "lr": 1.9506223511656113e-06, "epoch": 0.9520435834168031, "percentage": 19.04, "elapsed_time": "0:49:27", "remaining_time": "3:30:15", "throughput": 8854.49, "total_tokens": 26271488} +{"current_steps": 38975, "total_steps": 204665, "loss": 0.2123, "lr": 1.9505958818029006e-06, "epoch": 0.9521657342486503, "percentage": 19.04, "elapsed_time": "0:49:27", "remaining_time": "3:30:14", "throughput": 8854.53, "total_tokens": 26274624} +{"current_steps": 38980, "total_steps": 204665, "loss": 0.0553, "lr": 1.9505694055272012e-06, "epoch": 0.9522878850804974, "percentage": 19.05, "elapsed_time": "0:49:27", "remaining_time": "3:30:14", "throughput": 8854.58, "total_tokens": 26277824} +{"current_steps": 38985, "total_steps": 204665, "loss": 0.0606, "lr": 1.9505429223387055e-06, "epoch": 0.9524100359123445, "percentage": 19.05, "elapsed_time": "0:49:28", "remaining_time": "3:30:13", "throughput": 8854.61, "total_tokens": 26280960} +{"current_steps": 38990, "total_steps": 204665, "loss": 0.1311, "lr": 1.9505164322376056e-06, "epoch": 0.9525321867441917, "percentage": 19.05, "elapsed_time": "0:49:28", "remaining_time": "3:30:13", "throughput": 8854.73, "total_tokens": 26284416} +{"current_steps": 38995, "total_steps": 204665, "loss": 0.2482, "lr": 1.950489935224095e-06, "epoch": 0.9526543375760389, "percentage": 19.05, "elapsed_time": "0:49:28", "remaining_time": "3:30:12", "throughput": 8854.85, "total_tokens": 26287872} +{"current_steps": 39000, "total_steps": 204665, "loss": 0.1956, "lr": 1.9504634312983655e-06, "epoch": 0.9527764884078861, "percentage": 19.06, "elapsed_time": "0:49:29", "remaining_time": "3:30:12", "throughput": 8854.91, "total_tokens": 26291136} +{"current_steps": 39005, "total_steps": 204665, "loss": 0.1214, "lr": 1.9504369204606107e-06, "epoch": 0.9528986392397332, "percentage": 19.06, "elapsed_time": "0:49:29", "remaining_time": "3:30:11", "throughput": 8854.93, "total_tokens": 26294208} +{"current_steps": 39010, "total_steps": 204665, "loss": 0.1255, "lr": 1.950410402711023e-06, "epoch": 0.9530207900715804, "percentage": 19.06, "elapsed_time": "0:49:29", "remaining_time": "3:30:11", "throughput": 8854.9, "total_tokens": 26297024} +{"current_steps": 39015, "total_steps": 204665, "loss": 0.0935, "lr": 1.950383878049795e-06, "epoch": 0.9531429409034275, "percentage": 19.06, "elapsed_time": "0:49:30", "remaining_time": "3:30:10", "throughput": 8855.09, "total_tokens": 26300736} +{"current_steps": 39020, "total_steps": 204665, "loss": 0.0614, "lr": 1.9503573464771197e-06, "epoch": 0.9532650917352747, "percentage": 19.07, "elapsed_time": "0:49:30", "remaining_time": "3:30:10", "throughput": 8855.19, "total_tokens": 26304128} +{"current_steps": 39025, "total_steps": 204665, "loss": 0.081, "lr": 1.9503308079931904e-06, "epoch": 0.9533872425671219, "percentage": 19.07, "elapsed_time": "0:49:30", "remaining_time": "3:30:09", "throughput": 8855.22, "total_tokens": 26307264} +{"current_steps": 39030, "total_steps": 204665, "loss": 0.1635, "lr": 1.9503042625981994e-06, "epoch": 0.953509393398969, "percentage": 19.07, "elapsed_time": "0:49:31", "remaining_time": "3:30:09", "throughput": 8855.36, "total_tokens": 26310784} +{"current_steps": 39035, "total_steps": 204665, "loss": 0.1404, "lr": 1.9502777102923407e-06, "epoch": 0.9536315442308162, "percentage": 19.07, "elapsed_time": "0:49:31", "remaining_time": "3:30:08", "throughput": 8855.38, "total_tokens": 26313856} +{"current_steps": 39040, "total_steps": 204665, "loss": 0.038, "lr": 1.950251151075807e-06, "epoch": 0.9537536950626634, "percentage": 19.08, "elapsed_time": "0:49:31", "remaining_time": "3:30:07", "throughput": 8855.5, "total_tokens": 26317312} +{"current_steps": 39045, "total_steps": 204665, "loss": 0.2091, "lr": 1.950224584948791e-06, "epoch": 0.9538758458945106, "percentage": 19.08, "elapsed_time": "0:49:32", "remaining_time": "3:30:07", "throughput": 8855.61, "total_tokens": 26320704} +{"current_steps": 39050, "total_steps": 204665, "loss": 0.0838, "lr": 1.9501980119114863e-06, "epoch": 0.9539979967263577, "percentage": 19.08, "elapsed_time": "0:49:32", "remaining_time": "3:30:06", "throughput": 8855.67, "total_tokens": 26323968} +{"current_steps": 39055, "total_steps": 204665, "loss": 0.0687, "lr": 1.9501714319640863e-06, "epoch": 0.9541201475582048, "percentage": 19.08, "elapsed_time": "0:49:32", "remaining_time": "3:30:06", "throughput": 8855.75, "total_tokens": 26327232} +{"current_steps": 39060, "total_steps": 204665, "loss": 0.1605, "lr": 1.950144845106784e-06, "epoch": 0.954242298390052, "percentage": 19.08, "elapsed_time": "0:49:33", "remaining_time": "3:30:05", "throughput": 8855.8, "total_tokens": 26330432} +{"current_steps": 39065, "total_steps": 204665, "loss": 0.1054, "lr": 1.950118251339773e-06, "epoch": 0.9543644492218992, "percentage": 19.09, "elapsed_time": "0:49:33", "remaining_time": "3:30:05", "throughput": 8855.88, "total_tokens": 26333760} +{"current_steps": 39070, "total_steps": 204665, "loss": 0.0807, "lr": 1.950091650663246e-06, "epoch": 0.9544866000537464, "percentage": 19.09, "elapsed_time": "0:49:33", "remaining_time": "3:30:04", "throughput": 8856.02, "total_tokens": 26337280} +{"current_steps": 39075, "total_steps": 204665, "loss": 0.0553, "lr": 1.950065043077397e-06, "epoch": 0.9546087508855935, "percentage": 19.09, "elapsed_time": "0:49:34", "remaining_time": "3:30:04", "throughput": 8856.16, "total_tokens": 26340800} +{"current_steps": 39080, "total_steps": 204665, "loss": 0.0795, "lr": 1.95003842858242e-06, "epoch": 0.9547309017174407, "percentage": 19.09, "elapsed_time": "0:49:34", "remaining_time": "3:30:03", "throughput": 8856.3, "total_tokens": 26344320} +{"current_steps": 39085, "total_steps": 204665, "loss": 0.1429, "lr": 1.9500118071785072e-06, "epoch": 0.9548530525492879, "percentage": 19.1, "elapsed_time": "0:49:35", "remaining_time": "3:30:03", "throughput": 8856.7, "total_tokens": 26348864} +{"current_steps": 39090, "total_steps": 204665, "loss": 0.0758, "lr": 1.949985178865854e-06, "epoch": 0.9549752033811351, "percentage": 19.1, "elapsed_time": "0:49:35", "remaining_time": "3:30:02", "throughput": 8856.79, "total_tokens": 26352192} +{"current_steps": 39095, "total_steps": 204665, "loss": 0.0745, "lr": 1.9499585436446522e-06, "epoch": 0.9550973542129821, "percentage": 19.1, "elapsed_time": "0:49:35", "remaining_time": "3:30:02", "throughput": 8856.77, "total_tokens": 26355136} +{"current_steps": 39100, "total_steps": 204665, "loss": 0.1496, "lr": 1.949931901515097e-06, "epoch": 0.9552195050448293, "percentage": 19.1, "elapsed_time": "0:49:36", "remaining_time": "3:30:01", "throughput": 8856.86, "total_tokens": 26358464} +{"current_steps": 39105, "total_steps": 204665, "loss": 0.0301, "lr": 1.949905252477381e-06, "epoch": 0.9553416558766765, "percentage": 19.11, "elapsed_time": "0:49:36", "remaining_time": "3:30:01", "throughput": 8857.02, "total_tokens": 26362112} +{"current_steps": 39110, "total_steps": 204665, "loss": 0.2, "lr": 1.949878596531699e-06, "epoch": 0.9554638067085237, "percentage": 19.11, "elapsed_time": "0:49:36", "remaining_time": "3:30:00", "throughput": 8857.21, "total_tokens": 26365824} +{"current_steps": 39115, "total_steps": 204665, "loss": 0.0912, "lr": 1.9498519336782445e-06, "epoch": 0.9555859575403709, "percentage": 19.11, "elapsed_time": "0:49:37", "remaining_time": "3:30:00", "throughput": 8857.32, "total_tokens": 26369280} +{"current_steps": 39120, "total_steps": 204665, "loss": 0.0706, "lr": 1.9498252639172107e-06, "epoch": 0.955708108372218, "percentage": 19.11, "elapsed_time": "0:49:37", "remaining_time": "3:29:59", "throughput": 8857.43, "total_tokens": 26372672} +{"current_steps": 39125, "total_steps": 204665, "loss": 0.1052, "lr": 1.9497985872487926e-06, "epoch": 0.9558302592040652, "percentage": 19.12, "elapsed_time": "0:49:37", "remaining_time": "3:29:59", "throughput": 8857.53, "total_tokens": 26376000} +{"current_steps": 39130, "total_steps": 204665, "loss": 0.1322, "lr": 1.949771903673183e-06, "epoch": 0.9559524100359124, "percentage": 19.12, "elapsed_time": "0:49:38", "remaining_time": "3:29:58", "throughput": 8857.59, "total_tokens": 26379264} +{"current_steps": 39135, "total_steps": 204665, "loss": 0.1274, "lr": 1.949745213190577e-06, "epoch": 0.9560745608677595, "percentage": 19.12, "elapsed_time": "0:49:38", "remaining_time": "3:29:58", "throughput": 8857.68, "total_tokens": 26382592} +{"current_steps": 39140, "total_steps": 204665, "loss": 0.0558, "lr": 1.9497185158011687e-06, "epoch": 0.9561967116996066, "percentage": 19.12, "elapsed_time": "0:49:38", "remaining_time": "3:29:57", "throughput": 8857.67, "total_tokens": 26385536} +{"current_steps": 39145, "total_steps": 204665, "loss": 0.0973, "lr": 1.9496918115051516e-06, "epoch": 0.9563188625314538, "percentage": 19.13, "elapsed_time": "0:49:39", "remaining_time": "3:29:57", "throughput": 8857.85, "total_tokens": 26389248} +{"current_steps": 39150, "total_steps": 204665, "loss": 0.1796, "lr": 1.9496651003027204e-06, "epoch": 0.956441013363301, "percentage": 19.13, "elapsed_time": "0:49:39", "remaining_time": "3:29:56", "throughput": 8857.9, "total_tokens": 26392384} +{"current_steps": 39155, "total_steps": 204665, "loss": 0.1227, "lr": 1.949638382194069e-06, "epoch": 0.9565631641951482, "percentage": 19.13, "elapsed_time": "0:49:39", "remaining_time": "3:29:56", "throughput": 8857.93, "total_tokens": 26395520} +{"current_steps": 39160, "total_steps": 204665, "loss": 0.0913, "lr": 1.949611657179392e-06, "epoch": 0.9566853150269954, "percentage": 19.13, "elapsed_time": "0:49:40", "remaining_time": "3:29:55", "throughput": 8858.09, "total_tokens": 26399104} +{"current_steps": 39165, "total_steps": 204665, "loss": 0.0726, "lr": 1.9495849252588835e-06, "epoch": 0.9568074658588425, "percentage": 19.14, "elapsed_time": "0:49:40", "remaining_time": "3:29:55", "throughput": 8858.21, "total_tokens": 26402560} +{"current_steps": 39170, "total_steps": 204665, "loss": 0.2176, "lr": 1.9495581864327378e-06, "epoch": 0.9569296166906897, "percentage": 19.14, "elapsed_time": "0:49:40", "remaining_time": "3:29:54", "throughput": 8858.39, "total_tokens": 26406272} +{"current_steps": 39175, "total_steps": 204665, "loss": 0.1195, "lr": 1.94953144070115e-06, "epoch": 0.9570517675225368, "percentage": 19.14, "elapsed_time": "0:49:41", "remaining_time": "3:29:54", "throughput": 8858.46, "total_tokens": 26409536} +{"current_steps": 39180, "total_steps": 204665, "loss": 0.0522, "lr": 1.949504688064314e-06, "epoch": 0.957173918354384, "percentage": 19.14, "elapsed_time": "0:49:41", "remaining_time": "3:29:53", "throughput": 8858.53, "total_tokens": 26412736} +{"current_steps": 39185, "total_steps": 204665, "loss": 0.1914, "lr": 1.949477928522424e-06, "epoch": 0.9572960691862311, "percentage": 19.15, "elapsed_time": "0:49:41", "remaining_time": "3:29:52", "throughput": 8858.69, "total_tokens": 26416320} +{"current_steps": 39190, "total_steps": 204665, "loss": 0.0892, "lr": 1.949451162075676e-06, "epoch": 0.9574182200180783, "percentage": 19.15, "elapsed_time": "0:49:42", "remaining_time": "3:29:52", "throughput": 8858.72, "total_tokens": 26419456} +{"current_steps": 39195, "total_steps": 204665, "loss": 0.0619, "lr": 1.9494243887242634e-06, "epoch": 0.9575403708499255, "percentage": 19.15, "elapsed_time": "0:49:42", "remaining_time": "3:29:51", "throughput": 8858.99, "total_tokens": 26423488} +{"current_steps": 39200, "total_steps": 204665, "loss": 0.136, "lr": 1.9493976084683814e-06, "epoch": 0.9576625216817727, "percentage": 19.15, "elapsed_time": "0:49:43", "remaining_time": "3:29:51", "throughput": 8859.19, "total_tokens": 26427264} +{"current_steps": 39205, "total_steps": 204665, "loss": 0.0504, "lr": 1.949370821308224e-06, "epoch": 0.9577846725136199, "percentage": 19.16, "elapsed_time": "0:49:43", "remaining_time": "3:29:50", "throughput": 8859.28, "total_tokens": 26430592} +{"current_steps": 39210, "total_steps": 204665, "loss": 0.0703, "lr": 1.9493440272439873e-06, "epoch": 0.957906823345467, "percentage": 19.16, "elapsed_time": "0:49:43", "remaining_time": "3:29:50", "throughput": 8859.34, "total_tokens": 26433856} +{"current_steps": 39215, "total_steps": 204665, "loss": 0.1319, "lr": 1.9493172262758656e-06, "epoch": 0.9580289741773141, "percentage": 19.16, "elapsed_time": "0:49:44", "remaining_time": "3:29:49", "throughput": 8859.37, "total_tokens": 26436992} +{"current_steps": 39220, "total_steps": 204665, "loss": 0.1383, "lr": 1.9492904184040532e-06, "epoch": 0.9581511250091613, "percentage": 19.16, "elapsed_time": "0:49:44", "remaining_time": "3:29:49", "throughput": 8859.4, "total_tokens": 26440128} +{"current_steps": 39225, "total_steps": 204665, "loss": 0.0417, "lr": 1.9492636036287457e-06, "epoch": 0.9582732758410085, "percentage": 19.17, "elapsed_time": "0:49:44", "remaining_time": "3:29:48", "throughput": 8859.51, "total_tokens": 26443584} +{"current_steps": 39230, "total_steps": 204665, "loss": 0.0836, "lr": 1.9492367819501383e-06, "epoch": 0.9583954266728556, "percentage": 19.17, "elapsed_time": "0:49:45", "remaining_time": "3:29:48", "throughput": 8859.6, "total_tokens": 26446912} +{"current_steps": 39235, "total_steps": 204665, "loss": 0.0688, "lr": 1.9492099533684254e-06, "epoch": 0.9585175775047028, "percentage": 19.17, "elapsed_time": "0:49:45", "remaining_time": "3:29:47", "throughput": 8859.56, "total_tokens": 26449792} +{"current_steps": 39240, "total_steps": 204665, "loss": 0.1123, "lr": 1.949183117883802e-06, "epoch": 0.95863972833655, "percentage": 19.17, "elapsed_time": "0:49:45", "remaining_time": "3:29:47", "throughput": 8859.64, "total_tokens": 26453120} +{"current_steps": 39245, "total_steps": 204665, "loss": 0.1026, "lr": 1.9491562754964644e-06, "epoch": 0.9587618791683972, "percentage": 19.18, "elapsed_time": "0:49:46", "remaining_time": "3:29:46", "throughput": 8859.71, "total_tokens": 26456384} +{"current_steps": 39250, "total_steps": 204665, "loss": 0.1303, "lr": 1.949129426206607e-06, "epoch": 0.9588840300002442, "percentage": 19.18, "elapsed_time": "0:49:46", "remaining_time": "3:29:46", "throughput": 8859.96, "total_tokens": 26460352} +{"current_steps": 39255, "total_steps": 204665, "loss": 0.1797, "lr": 1.949102570014425e-06, "epoch": 0.9590061808320914, "percentage": 19.18, "elapsed_time": "0:49:46", "remaining_time": "3:29:45", "throughput": 8860.05, "total_tokens": 26463680} +{"current_steps": 39260, "total_steps": 204665, "loss": 0.0993, "lr": 1.9490757069201135e-06, "epoch": 0.9591283316639386, "percentage": 19.18, "elapsed_time": "0:49:47", "remaining_time": "3:29:45", "throughput": 8860.08, "total_tokens": 26466816} +{"current_steps": 39265, "total_steps": 204665, "loss": 0.1357, "lr": 1.9490488369238686e-06, "epoch": 0.9592504824957858, "percentage": 19.19, "elapsed_time": "0:49:47", "remaining_time": "3:29:44", "throughput": 8860.12, "total_tokens": 26470016} +{"current_steps": 39270, "total_steps": 204665, "loss": 0.057, "lr": 1.949021960025885e-06, "epoch": 0.959372633327633, "percentage": 19.19, "elapsed_time": "0:49:47", "remaining_time": "3:29:44", "throughput": 8860.21, "total_tokens": 26473344} +{"current_steps": 39275, "total_steps": 204665, "loss": 0.0984, "lr": 1.9489950762263584e-06, "epoch": 0.9594947841594801, "percentage": 19.19, "elapsed_time": "0:49:48", "remaining_time": "3:29:43", "throughput": 8860.33, "total_tokens": 26476800} +{"current_steps": 39280, "total_steps": 204665, "loss": 0.114, "lr": 1.948968185525485e-06, "epoch": 0.9596169349913273, "percentage": 19.19, "elapsed_time": "0:49:48", "remaining_time": "3:29:43", "throughput": 8860.48, "total_tokens": 26480384} +{"current_steps": 39285, "total_steps": 204665, "loss": 0.0964, "lr": 1.9489412879234587e-06, "epoch": 0.9597390858231745, "percentage": 19.19, "elapsed_time": "0:49:48", "remaining_time": "3:29:42", "throughput": 8860.53, "total_tokens": 26483584} +{"current_steps": 39290, "total_steps": 204665, "loss": 0.0955, "lr": 1.9489143834204768e-06, "epoch": 0.9598612366550217, "percentage": 19.2, "elapsed_time": "0:49:49", "remaining_time": "3:29:42", "throughput": 8860.68, "total_tokens": 26487168} +{"current_steps": 39295, "total_steps": 204665, "loss": 0.2023, "lr": 1.948887472016734e-06, "epoch": 0.9599833874868687, "percentage": 19.2, "elapsed_time": "0:49:49", "remaining_time": "3:29:41", "throughput": 8860.8, "total_tokens": 26490624} +{"current_steps": 39300, "total_steps": 204665, "loss": 0.0522, "lr": 1.9488605537124267e-06, "epoch": 0.9601055383187159, "percentage": 19.2, "elapsed_time": "0:49:49", "remaining_time": "3:29:41", "throughput": 8860.87, "total_tokens": 26493952} +{"current_steps": 39305, "total_steps": 204665, "loss": 0.0684, "lr": 1.94883362850775e-06, "epoch": 0.9602276891505631, "percentage": 19.2, "elapsed_time": "0:49:50", "remaining_time": "3:29:40", "throughput": 8860.94, "total_tokens": 26497216} +{"current_steps": 39310, "total_steps": 204665, "loss": 0.1094, "lr": 1.9488066964029e-06, "epoch": 0.9603498399824103, "percentage": 19.21, "elapsed_time": "0:49:50", "remaining_time": "3:29:40", "throughput": 8860.96, "total_tokens": 26500288} +{"current_steps": 39315, "total_steps": 204665, "loss": 0.0557, "lr": 1.948779757398072e-06, "epoch": 0.9604719908142575, "percentage": 19.21, "elapsed_time": "0:49:51", "remaining_time": "3:29:39", "throughput": 8861.06, "total_tokens": 26503680} +{"current_steps": 39320, "total_steps": 204665, "loss": 0.1563, "lr": 1.948752811493463e-06, "epoch": 0.9605941416461046, "percentage": 19.21, "elapsed_time": "0:49:51", "remaining_time": "3:29:39", "throughput": 8861.12, "total_tokens": 26506944} +{"current_steps": 39325, "total_steps": 204665, "loss": 0.1214, "lr": 1.9487258586892685e-06, "epoch": 0.9607162924779518, "percentage": 19.21, "elapsed_time": "0:49:51", "remaining_time": "3:29:38", "throughput": 8861.2, "total_tokens": 26510272} +{"current_steps": 39330, "total_steps": 204665, "loss": 0.1588, "lr": 1.948698898985684e-06, "epoch": 0.960838443309799, "percentage": 19.22, "elapsed_time": "0:49:52", "remaining_time": "3:29:38", "throughput": 8861.24, "total_tokens": 26513408} +{"current_steps": 39335, "total_steps": 204665, "loss": 0.1465, "lr": 1.948671932382906e-06, "epoch": 0.9609605941416461, "percentage": 19.22, "elapsed_time": "0:49:52", "remaining_time": "3:29:37", "throughput": 8861.32, "total_tokens": 26516736} +{"current_steps": 39340, "total_steps": 204665, "loss": 0.0844, "lr": 1.9486449588811304e-06, "epoch": 0.9610827449734932, "percentage": 19.22, "elapsed_time": "0:49:52", "remaining_time": "3:29:37", "throughput": 8861.49, "total_tokens": 26520384} +{"current_steps": 39345, "total_steps": 204665, "loss": 0.0408, "lr": 1.948617978480554e-06, "epoch": 0.9612048958053404, "percentage": 19.22, "elapsed_time": "0:49:53", "remaining_time": "3:29:36", "throughput": 8861.58, "total_tokens": 26523776} +{"current_steps": 39350, "total_steps": 204665, "loss": 0.1468, "lr": 1.9485909911813717e-06, "epoch": 0.9613270466371876, "percentage": 19.23, "elapsed_time": "0:49:53", "remaining_time": "3:29:36", "throughput": 8861.8, "total_tokens": 26527616} +{"current_steps": 39355, "total_steps": 204665, "loss": 0.0336, "lr": 1.9485639969837815e-06, "epoch": 0.9614491974690348, "percentage": 19.23, "elapsed_time": "0:49:53", "remaining_time": "3:29:35", "throughput": 8861.86, "total_tokens": 26530880} +{"current_steps": 39360, "total_steps": 204665, "loss": 0.1567, "lr": 1.948536995887978e-06, "epoch": 0.961571348300882, "percentage": 19.23, "elapsed_time": "0:49:54", "remaining_time": "3:29:34", "throughput": 8861.87, "total_tokens": 26533952} +{"current_steps": 39365, "total_steps": 204665, "loss": 0.0393, "lr": 1.948509987894159e-06, "epoch": 0.9616934991327291, "percentage": 19.23, "elapsed_time": "0:49:54", "remaining_time": "3:29:34", "throughput": 8861.94, "total_tokens": 26537216} +{"current_steps": 39370, "total_steps": 204665, "loss": 0.1268, "lr": 1.9484829730025195e-06, "epoch": 0.9618156499645762, "percentage": 19.24, "elapsed_time": "0:49:54", "remaining_time": "3:29:33", "throughput": 8862.06, "total_tokens": 26540672} +{"current_steps": 39375, "total_steps": 204665, "loss": 0.2364, "lr": 1.9484559512132575e-06, "epoch": 0.9619378007964234, "percentage": 19.24, "elapsed_time": "0:49:55", "remaining_time": "3:29:33", "throughput": 8862.15, "total_tokens": 26544000} +{"current_steps": 39380, "total_steps": 204665, "loss": 0.1005, "lr": 1.948428922526568e-06, "epoch": 0.9620599516282706, "percentage": 19.24, "elapsed_time": "0:49:55", "remaining_time": "3:29:32", "throughput": 8862.21, "total_tokens": 26547264} +{"current_steps": 39385, "total_steps": 204665, "loss": 0.0725, "lr": 1.9484018869426487e-06, "epoch": 0.9621821024601177, "percentage": 19.24, "elapsed_time": "0:49:55", "remaining_time": "3:29:32", "throughput": 8862.28, "total_tokens": 26550528} +{"current_steps": 39390, "total_steps": 204665, "loss": 0.1236, "lr": 1.9483748444616957e-06, "epoch": 0.9623042532919649, "percentage": 19.25, "elapsed_time": "0:49:56", "remaining_time": "3:29:31", "throughput": 8862.36, "total_tokens": 26553856} +{"current_steps": 39395, "total_steps": 204665, "loss": 0.1777, "lr": 1.9483477950839057e-06, "epoch": 0.9624264041238121, "percentage": 19.25, "elapsed_time": "0:49:56", "remaining_time": "3:29:31", "throughput": 8862.46, "total_tokens": 26557248} +{"current_steps": 39400, "total_steps": 204665, "loss": 0.1267, "lr": 1.9483207388094756e-06, "epoch": 0.9625485549556593, "percentage": 19.25, "elapsed_time": "0:49:56", "remaining_time": "3:29:30", "throughput": 8862.65, "total_tokens": 26560960} +{"current_steps": 39405, "total_steps": 204665, "loss": 0.1065, "lr": 1.948293675638602e-06, "epoch": 0.9626707057875065, "percentage": 19.25, "elapsed_time": "0:49:57", "remaining_time": "3:29:30", "throughput": 8862.73, "total_tokens": 26564288} +{"current_steps": 39410, "total_steps": 204665, "loss": 0.0593, "lr": 1.9482666055714816e-06, "epoch": 0.9627928566193535, "percentage": 19.26, "elapsed_time": "0:49:57", "remaining_time": "3:29:29", "throughput": 8862.8, "total_tokens": 26567616} +{"current_steps": 39415, "total_steps": 204665, "loss": 0.091, "lr": 1.9482395286083116e-06, "epoch": 0.9629150074512007, "percentage": 19.26, "elapsed_time": "0:49:57", "remaining_time": "3:29:29", "throughput": 8862.84, "total_tokens": 26570752} +{"current_steps": 39420, "total_steps": 204665, "loss": 0.1468, "lr": 1.948212444749289e-06, "epoch": 0.9630371582830479, "percentage": 19.26, "elapsed_time": "0:49:58", "remaining_time": "3:29:28", "throughput": 8862.84, "total_tokens": 26573760} +{"current_steps": 39425, "total_steps": 204665, "loss": 0.192, "lr": 1.9481853539946098e-06, "epoch": 0.9631593091148951, "percentage": 19.26, "elapsed_time": "0:49:58", "remaining_time": "3:29:28", "throughput": 8862.85, "total_tokens": 26576832} +{"current_steps": 39430, "total_steps": 204665, "loss": 0.109, "lr": 1.948158256344472e-06, "epoch": 0.9632814599467422, "percentage": 19.27, "elapsed_time": "0:49:59", "remaining_time": "3:29:27", "throughput": 8863.06, "total_tokens": 26580608} +{"current_steps": 39435, "total_steps": 204665, "loss": 0.085, "lr": 1.948131151799072e-06, "epoch": 0.9634036107785894, "percentage": 19.27, "elapsed_time": "0:49:59", "remaining_time": "3:29:27", "throughput": 8863.03, "total_tokens": 26583488} +{"current_steps": 39440, "total_steps": 204665, "loss": 0.1276, "lr": 1.9481040403586074e-06, "epoch": 0.9635257616104366, "percentage": 19.27, "elapsed_time": "0:49:59", "remaining_time": "3:29:26", "throughput": 8863.22, "total_tokens": 26587200} +{"current_steps": 39445, "total_steps": 204665, "loss": 0.0837, "lr": 1.948076922023275e-06, "epoch": 0.9636479124422838, "percentage": 19.27, "elapsed_time": "0:50:00", "remaining_time": "3:29:26", "throughput": 8863.33, "total_tokens": 26590656} +{"current_steps": 39450, "total_steps": 204665, "loss": 0.1127, "lr": 1.948049796793273e-06, "epoch": 0.963770063274131, "percentage": 19.28, "elapsed_time": "0:50:00", "remaining_time": "3:29:25", "throughput": 8863.42, "total_tokens": 26593984} +{"current_steps": 39455, "total_steps": 204665, "loss": 0.0991, "lr": 1.9480226646687976e-06, "epoch": 0.963892214105978, "percentage": 19.28, "elapsed_time": "0:50:00", "remaining_time": "3:29:25", "throughput": 8863.51, "total_tokens": 26597376} +{"current_steps": 39460, "total_steps": 204665, "loss": 0.0466, "lr": 1.947995525650046e-06, "epoch": 0.9640143649378252, "percentage": 19.28, "elapsed_time": "0:50:01", "remaining_time": "3:29:24", "throughput": 8863.48, "total_tokens": 26600256} +{"current_steps": 39465, "total_steps": 204665, "loss": 0.1007, "lr": 1.947968379737216e-06, "epoch": 0.9641365157696724, "percentage": 19.28, "elapsed_time": "0:50:01", "remaining_time": "3:29:24", "throughput": 8863.64, "total_tokens": 26603904} +{"current_steps": 39470, "total_steps": 204665, "loss": 0.1234, "lr": 1.947941226930505e-06, "epoch": 0.9642586666015196, "percentage": 19.29, "elapsed_time": "0:50:01", "remaining_time": "3:29:23", "throughput": 8863.81, "total_tokens": 26607552} +{"current_steps": 39475, "total_steps": 204665, "loss": 0.1156, "lr": 1.947914067230111e-06, "epoch": 0.9643808174333667, "percentage": 19.29, "elapsed_time": "0:50:02", "remaining_time": "3:29:23", "throughput": 8863.84, "total_tokens": 26610688} +{"current_steps": 39480, "total_steps": 204665, "loss": 0.0695, "lr": 1.9478869006362305e-06, "epoch": 0.9645029682652139, "percentage": 19.29, "elapsed_time": "0:50:02", "remaining_time": "3:29:22", "throughput": 8863.93, "total_tokens": 26614016} +{"current_steps": 39485, "total_steps": 204665, "loss": 0.0485, "lr": 1.9478597271490614e-06, "epoch": 0.9646251190970611, "percentage": 19.29, "elapsed_time": "0:50:02", "remaining_time": "3:29:22", "throughput": 8864.02, "total_tokens": 26617408} +{"current_steps": 39490, "total_steps": 204665, "loss": 0.0856, "lr": 1.9478325467688013e-06, "epoch": 0.9647472699289082, "percentage": 19.29, "elapsed_time": "0:50:03", "remaining_time": "3:29:21", "throughput": 8864.11, "total_tokens": 26620736} +{"current_steps": 39495, "total_steps": 204665, "loss": 0.1242, "lr": 1.9478053594956484e-06, "epoch": 0.9648694207607553, "percentage": 19.3, "elapsed_time": "0:50:03", "remaining_time": "3:29:21", "throughput": 8864.31, "total_tokens": 26624512} +{"current_steps": 39500, "total_steps": 204665, "loss": 0.1077, "lr": 1.9477781653297996e-06, "epoch": 0.9649915715926025, "percentage": 19.3, "elapsed_time": "0:50:03", "remaining_time": "3:29:20", "throughput": 8864.37, "total_tokens": 26627776} +{"current_steps": 39505, "total_steps": 204665, "loss": 0.1278, "lr": 1.9477509642714535e-06, "epoch": 0.9651137224244497, "percentage": 19.3, "elapsed_time": "0:50:04", "remaining_time": "3:29:20", "throughput": 8864.58, "total_tokens": 26631552} +{"current_steps": 39510, "total_steps": 204665, "loss": 0.1753, "lr": 1.947723756320807e-06, "epoch": 0.9652358732562969, "percentage": 19.3, "elapsed_time": "0:50:04", "remaining_time": "3:29:19", "throughput": 8864.57, "total_tokens": 26634560} +{"current_steps": 39515, "total_steps": 204665, "loss": 0.1075, "lr": 1.9476965414780587e-06, "epoch": 0.9653580240881441, "percentage": 19.31, "elapsed_time": "0:50:04", "remaining_time": "3:29:18", "throughput": 8864.59, "total_tokens": 26637632} +{"current_steps": 39520, "total_steps": 204665, "loss": 0.2478, "lr": 1.9476693197434063e-06, "epoch": 0.9654801749199912, "percentage": 19.31, "elapsed_time": "0:50:05", "remaining_time": "3:29:18", "throughput": 8864.63, "total_tokens": 26640832} +{"current_steps": 39525, "total_steps": 204665, "loss": 0.0597, "lr": 1.9476420911170478e-06, "epoch": 0.9656023257518384, "percentage": 19.31, "elapsed_time": "0:50:05", "remaining_time": "3:29:17", "throughput": 8864.68, "total_tokens": 26644096} +{"current_steps": 39530, "total_steps": 204665, "loss": 0.0845, "lr": 1.947614855599181e-06, "epoch": 0.9657244765836855, "percentage": 19.31, "elapsed_time": "0:50:05", "remaining_time": "3:29:17", "throughput": 8864.68, "total_tokens": 26647104} +{"current_steps": 39535, "total_steps": 204665, "loss": 0.0957, "lr": 1.947587613190004e-06, "epoch": 0.9658466274155327, "percentage": 19.32, "elapsed_time": "0:50:06", "remaining_time": "3:29:16", "throughput": 8864.67, "total_tokens": 26650112} +{"current_steps": 39540, "total_steps": 204665, "loss": 0.0864, "lr": 1.947560363889715e-06, "epoch": 0.9659687782473798, "percentage": 19.32, "elapsed_time": "0:50:06", "remaining_time": "3:29:16", "throughput": 8864.65, "total_tokens": 26653056} +{"current_steps": 39545, "total_steps": 204665, "loss": 0.0976, "lr": 1.9475331076985124e-06, "epoch": 0.966090929079227, "percentage": 19.32, "elapsed_time": "0:50:07", "remaining_time": "3:29:15", "throughput": 8864.78, "total_tokens": 26656576} +{"current_steps": 39550, "total_steps": 204665, "loss": 0.115, "lr": 1.947505844616594e-06, "epoch": 0.9662130799110742, "percentage": 19.32, "elapsed_time": "0:50:07", "remaining_time": "3:29:15", "throughput": 8864.83, "total_tokens": 26659776} +{"current_steps": 39555, "total_steps": 204665, "loss": 0.1513, "lr": 1.9474785746441584e-06, "epoch": 0.9663352307429214, "percentage": 19.33, "elapsed_time": "0:50:07", "remaining_time": "3:29:14", "throughput": 8864.96, "total_tokens": 26663296} +{"current_steps": 39560, "total_steps": 204665, "loss": 0.1737, "lr": 1.9474512977814034e-06, "epoch": 0.9664573815747686, "percentage": 19.33, "elapsed_time": "0:50:08", "remaining_time": "3:29:14", "throughput": 8865.15, "total_tokens": 26667008} +{"current_steps": 39565, "total_steps": 204665, "loss": 0.116, "lr": 1.947424014028528e-06, "epoch": 0.9665795324066156, "percentage": 19.33, "elapsed_time": "0:50:08", "remaining_time": "3:29:13", "throughput": 8865.26, "total_tokens": 26670464} +{"current_steps": 39570, "total_steps": 204665, "loss": 0.0084, "lr": 1.9473967233857306e-06, "epoch": 0.9667016832384628, "percentage": 19.33, "elapsed_time": "0:50:08", "remaining_time": "3:29:13", "throughput": 8865.35, "total_tokens": 26673792} +{"current_steps": 39575, "total_steps": 204665, "loss": 0.2801, "lr": 1.947369425853209e-06, "epoch": 0.96682383407031, "percentage": 19.34, "elapsed_time": "0:50:09", "remaining_time": "3:29:12", "throughput": 8865.38, "total_tokens": 26676928} +{"current_steps": 39580, "total_steps": 204665, "loss": 0.0528, "lr": 1.9473421214311624e-06, "epoch": 0.9669459849021572, "percentage": 19.34, "elapsed_time": "0:50:09", "remaining_time": "3:29:12", "throughput": 8865.46, "total_tokens": 26680256} +{"current_steps": 39585, "total_steps": 204665, "loss": 0.1016, "lr": 1.947314810119789e-06, "epoch": 0.9670681357340043, "percentage": 19.34, "elapsed_time": "0:50:09", "remaining_time": "3:29:11", "throughput": 8865.5, "total_tokens": 26683456} +{"current_steps": 39590, "total_steps": 204665, "loss": 0.1592, "lr": 1.947287491919287e-06, "epoch": 0.9671902865658515, "percentage": 19.34, "elapsed_time": "0:50:10", "remaining_time": "3:29:11", "throughput": 8865.65, "total_tokens": 26687040} +{"current_steps": 39595, "total_steps": 204665, "loss": 0.037, "lr": 1.947260166829856e-06, "epoch": 0.9673124373976987, "percentage": 19.35, "elapsed_time": "0:50:10", "remaining_time": "3:29:10", "throughput": 8865.86, "total_tokens": 26690880} +{"current_steps": 39600, "total_steps": 204665, "loss": 0.1352, "lr": 1.9472328348516942e-06, "epoch": 0.9674345882295459, "percentage": 19.35, "elapsed_time": "0:50:10", "remaining_time": "3:29:10", "throughput": 8865.98, "total_tokens": 26694400} +{"current_steps": 39605, "total_steps": 204665, "loss": 0.1238, "lr": 1.947205495985001e-06, "epoch": 0.9675567390613931, "percentage": 19.35, "elapsed_time": "0:50:11", "remaining_time": "3:29:09", "throughput": 8866.04, "total_tokens": 26697664} +{"current_steps": 39610, "total_steps": 204665, "loss": 0.0828, "lr": 1.947178150229974e-06, "epoch": 0.9676788898932401, "percentage": 19.35, "elapsed_time": "0:50:11", "remaining_time": "3:29:09", "throughput": 8866.04, "total_tokens": 26700672} +{"current_steps": 39615, "total_steps": 204665, "loss": 0.0915, "lr": 1.9471507975868133e-06, "epoch": 0.9678010407250873, "percentage": 19.36, "elapsed_time": "0:50:11", "remaining_time": "3:29:08", "throughput": 8865.98, "total_tokens": 26703424} +{"current_steps": 39620, "total_steps": 204665, "loss": 0.0577, "lr": 1.9471234380557166e-06, "epoch": 0.9679231915569345, "percentage": 19.36, "elapsed_time": "0:50:12", "remaining_time": "3:29:08", "throughput": 8865.99, "total_tokens": 26706496} +{"current_steps": 39625, "total_steps": 204665, "loss": 0.0826, "lr": 1.947096071636884e-06, "epoch": 0.9680453423887817, "percentage": 19.36, "elapsed_time": "0:50:12", "remaining_time": "3:29:07", "throughput": 8866.19, "total_tokens": 26710272} +{"current_steps": 39630, "total_steps": 204665, "loss": 0.2906, "lr": 1.9470686983305137e-06, "epoch": 0.9681674932206288, "percentage": 19.36, "elapsed_time": "0:50:12", "remaining_time": "3:29:07", "throughput": 8866.21, "total_tokens": 26713344} +{"current_steps": 39635, "total_steps": 204665, "loss": 0.0887, "lr": 1.9470413181368055e-06, "epoch": 0.968289644052476, "percentage": 19.37, "elapsed_time": "0:50:13", "remaining_time": "3:29:06", "throughput": 8866.39, "total_tokens": 26717056} +{"current_steps": 39640, "total_steps": 204665, "loss": 0.1129, "lr": 1.9470139310559575e-06, "epoch": 0.9684117948843232, "percentage": 19.37, "elapsed_time": "0:50:13", "remaining_time": "3:29:06", "throughput": 8866.49, "total_tokens": 26720448} +{"current_steps": 39645, "total_steps": 204665, "loss": 0.1292, "lr": 1.9469865370881697e-06, "epoch": 0.9685339457161704, "percentage": 19.37, "elapsed_time": "0:50:13", "remaining_time": "3:29:05", "throughput": 8866.52, "total_tokens": 26723584} +{"current_steps": 39650, "total_steps": 204665, "loss": 0.1745, "lr": 1.946959136233641e-06, "epoch": 0.9686560965480175, "percentage": 19.37, "elapsed_time": "0:50:14", "remaining_time": "3:29:05", "throughput": 8866.53, "total_tokens": 26726656} +{"current_steps": 39655, "total_steps": 204665, "loss": 0.0521, "lr": 1.946931728492571e-06, "epoch": 0.9687782473798646, "percentage": 19.38, "elapsed_time": "0:50:14", "remaining_time": "3:29:04", "throughput": 8866.54, "total_tokens": 26729728} +{"current_steps": 39660, "total_steps": 204665, "loss": 0.1553, "lr": 1.9469043138651593e-06, "epoch": 0.9689003982117118, "percentage": 19.38, "elapsed_time": "0:50:15", "remaining_time": "3:29:03", "throughput": 8866.69, "total_tokens": 26733312} +{"current_steps": 39665, "total_steps": 204665, "loss": 0.1097, "lr": 1.9468768923516038e-06, "epoch": 0.969022549043559, "percentage": 19.38, "elapsed_time": "0:50:15", "remaining_time": "3:29:03", "throughput": 8866.82, "total_tokens": 26736832} +{"current_steps": 39670, "total_steps": 204665, "loss": 0.1189, "lr": 1.9468494639521054e-06, "epoch": 0.9691446998754062, "percentage": 19.38, "elapsed_time": "0:50:15", "remaining_time": "3:29:02", "throughput": 8866.96, "total_tokens": 26740352} +{"current_steps": 39675, "total_steps": 204665, "loss": 0.0273, "lr": 1.9468220286668627e-06, "epoch": 0.9692668507072533, "percentage": 19.39, "elapsed_time": "0:50:16", "remaining_time": "3:29:02", "throughput": 8867.05, "total_tokens": 26743680} +{"current_steps": 39680, "total_steps": 204665, "loss": 0.1094, "lr": 1.9467945864960756e-06, "epoch": 0.9693890015391005, "percentage": 19.39, "elapsed_time": "0:50:16", "remaining_time": "3:29:01", "throughput": 8867.14, "total_tokens": 26747072} +{"current_steps": 39685, "total_steps": 204665, "loss": 0.075, "lr": 1.946767137439944e-06, "epoch": 0.9695111523709476, "percentage": 19.39, "elapsed_time": "0:50:16", "remaining_time": "3:29:01", "throughput": 8867.07, "total_tokens": 26749824} +{"current_steps": 39690, "total_steps": 204665, "loss": 0.0608, "lr": 1.9467396814986667e-06, "epoch": 0.9696333032027948, "percentage": 19.39, "elapsed_time": "0:50:17", "remaining_time": "3:29:00", "throughput": 8867.32, "total_tokens": 26753792} +{"current_steps": 39695, "total_steps": 204665, "loss": 0.0149, "lr": 1.946712218672444e-06, "epoch": 0.969755454034642, "percentage": 19.4, "elapsed_time": "0:50:17", "remaining_time": "3:29:00", "throughput": 8867.38, "total_tokens": 26757056} +{"current_steps": 39700, "total_steps": 204665, "loss": 0.1228, "lr": 1.9466847489614752e-06, "epoch": 0.9698776048664891, "percentage": 19.4, "elapsed_time": "0:50:17", "remaining_time": "3:28:59", "throughput": 8867.5, "total_tokens": 26760512} +{"current_steps": 39705, "total_steps": 204665, "loss": 0.0055, "lr": 1.9466572723659605e-06, "epoch": 0.9699997556983363, "percentage": 19.4, "elapsed_time": "0:50:18", "remaining_time": "3:28:59", "throughput": 8867.54, "total_tokens": 26763712} +{"current_steps": 39710, "total_steps": 204665, "loss": 0.0845, "lr": 1.9466297888860996e-06, "epoch": 0.9701219065301835, "percentage": 19.4, "elapsed_time": "0:50:18", "remaining_time": "3:28:58", "throughput": 8867.57, "total_tokens": 26766848} +{"current_steps": 39715, "total_steps": 204665, "loss": 0.2151, "lr": 1.9466022985220923e-06, "epoch": 0.9702440573620307, "percentage": 19.4, "elapsed_time": "0:50:18", "remaining_time": "3:28:58", "throughput": 8867.67, "total_tokens": 26770240} +{"current_steps": 39720, "total_steps": 204665, "loss": 0.1373, "lr": 1.946574801274138e-06, "epoch": 0.9703662081938778, "percentage": 19.41, "elapsed_time": "0:50:19", "remaining_time": "3:28:57", "throughput": 8867.73, "total_tokens": 26773504} +{"current_steps": 39725, "total_steps": 204665, "loss": 0.0279, "lr": 1.9465472971424373e-06, "epoch": 0.970488359025725, "percentage": 19.41, "elapsed_time": "0:50:19", "remaining_time": "3:28:57", "throughput": 8867.84, "total_tokens": 26776960} +{"current_steps": 39730, "total_steps": 204665, "loss": 0.1373, "lr": 1.9465197861271904e-06, "epoch": 0.9706105098575721, "percentage": 19.41, "elapsed_time": "0:50:19", "remaining_time": "3:28:56", "throughput": 8867.9, "total_tokens": 26780224} +{"current_steps": 39735, "total_steps": 204665, "loss": 0.0407, "lr": 1.9464922682285966e-06, "epoch": 0.9707326606894193, "percentage": 19.41, "elapsed_time": "0:50:20", "remaining_time": "3:28:56", "throughput": 8868.08, "total_tokens": 26783936} +{"current_steps": 39740, "total_steps": 204665, "loss": 0.1103, "lr": 1.946464743446857e-06, "epoch": 0.9708548115212665, "percentage": 19.42, "elapsed_time": "0:50:20", "remaining_time": "3:28:55", "throughput": 8868.13, "total_tokens": 26787136} +{"current_steps": 39745, "total_steps": 204665, "loss": 0.0649, "lr": 1.9464372117821707e-06, "epoch": 0.9709769623531136, "percentage": 19.42, "elapsed_time": "0:50:20", "remaining_time": "3:28:55", "throughput": 8868.26, "total_tokens": 26790656} +{"current_steps": 39750, "total_steps": 204665, "loss": 0.1237, "lr": 1.9464096732347386e-06, "epoch": 0.9710991131849608, "percentage": 19.42, "elapsed_time": "0:50:21", "remaining_time": "3:28:54", "throughput": 8868.31, "total_tokens": 26793856} +{"current_steps": 39755, "total_steps": 204665, "loss": 0.0979, "lr": 1.9463821278047607e-06, "epoch": 0.971221264016808, "percentage": 19.42, "elapsed_time": "0:50:21", "remaining_time": "3:28:54", "throughput": 8868.43, "total_tokens": 26797312} +{"current_steps": 39760, "total_steps": 204665, "loss": 0.0288, "lr": 1.9463545754924376e-06, "epoch": 0.9713434148486552, "percentage": 19.43, "elapsed_time": "0:50:22", "remaining_time": "3:28:53", "throughput": 8868.58, "total_tokens": 26800896} +{"current_steps": 39765, "total_steps": 204665, "loss": 0.2784, "lr": 1.9463270162979697e-06, "epoch": 0.9714655656805022, "percentage": 19.43, "elapsed_time": "0:50:22", "remaining_time": "3:28:53", "throughput": 8868.6, "total_tokens": 26804032} +{"current_steps": 39770, "total_steps": 204665, "loss": 0.0278, "lr": 1.9462994502215565e-06, "epoch": 0.9715877165123494, "percentage": 19.43, "elapsed_time": "0:50:22", "remaining_time": "3:28:52", "throughput": 8868.7, "total_tokens": 26807424} +{"current_steps": 39775, "total_steps": 204665, "loss": 0.1526, "lr": 1.9462718772634e-06, "epoch": 0.9717098673441966, "percentage": 19.43, "elapsed_time": "0:50:23", "remaining_time": "3:28:52", "throughput": 8868.74, "total_tokens": 26810560} +{"current_steps": 39780, "total_steps": 204665, "loss": 0.0776, "lr": 1.9462442974236996e-06, "epoch": 0.9718320181760438, "percentage": 19.44, "elapsed_time": "0:50:23", "remaining_time": "3:28:51", "throughput": 8868.8, "total_tokens": 26813824} +{"current_steps": 39785, "total_steps": 204665, "loss": 0.1508, "lr": 1.946216710702656e-06, "epoch": 0.9719541690078909, "percentage": 19.44, "elapsed_time": "0:50:23", "remaining_time": "3:28:51", "throughput": 8868.87, "total_tokens": 26817088} +{"current_steps": 39790, "total_steps": 204665, "loss": 0.1146, "lr": 1.94618911710047e-06, "epoch": 0.9720763198397381, "percentage": 19.44, "elapsed_time": "0:50:24", "remaining_time": "3:28:50", "throughput": 8868.9, "total_tokens": 26820224} +{"current_steps": 39795, "total_steps": 204665, "loss": 0.0732, "lr": 1.946161516617342e-06, "epoch": 0.9721984706715853, "percentage": 19.44, "elapsed_time": "0:50:24", "remaining_time": "3:28:50", "throughput": 8868.96, "total_tokens": 26823488} +{"current_steps": 39800, "total_steps": 204665, "loss": 0.0899, "lr": 1.9461339092534733e-06, "epoch": 0.9723206215034325, "percentage": 19.45, "elapsed_time": "0:50:24", "remaining_time": "3:28:49", "throughput": 8869.02, "total_tokens": 26826752} +{"current_steps": 39805, "total_steps": 204665, "loss": 0.1212, "lr": 1.9461062950090645e-06, "epoch": 0.9724427723352796, "percentage": 19.45, "elapsed_time": "0:50:25", "remaining_time": "3:28:49", "throughput": 8869.07, "total_tokens": 26829952} +{"current_steps": 39810, "total_steps": 204665, "loss": 0.0809, "lr": 1.946078673884316e-06, "epoch": 0.9725649231671267, "percentage": 19.45, "elapsed_time": "0:50:25", "remaining_time": "3:28:48", "throughput": 8868.98, "total_tokens": 26832640} +{"current_steps": 39815, "total_steps": 204665, "loss": 0.0563, "lr": 1.9460510458794286e-06, "epoch": 0.9726870739989739, "percentage": 19.45, "elapsed_time": "0:50:25", "remaining_time": "3:28:47", "throughput": 8869.02, "total_tokens": 26835776} +{"current_steps": 39820, "total_steps": 204665, "loss": 0.1205, "lr": 1.9460234109946044e-06, "epoch": 0.9728092248308211, "percentage": 19.46, "elapsed_time": "0:50:26", "remaining_time": "3:28:47", "throughput": 8869.06, "total_tokens": 26838912} +{"current_steps": 39825, "total_steps": 204665, "loss": 0.0838, "lr": 1.9459957692300426e-06, "epoch": 0.9729313756626683, "percentage": 19.46, "elapsed_time": "0:50:26", "remaining_time": "3:28:46", "throughput": 8869.11, "total_tokens": 26842112} +{"current_steps": 39830, "total_steps": 204665, "loss": 0.135, "lr": 1.9459681205859457e-06, "epoch": 0.9730535264945154, "percentage": 19.46, "elapsed_time": "0:50:26", "remaining_time": "3:28:46", "throughput": 8869.13, "total_tokens": 26845184} +{"current_steps": 39835, "total_steps": 204665, "loss": 0.0454, "lr": 1.945940465062514e-06, "epoch": 0.9731756773263626, "percentage": 19.46, "elapsed_time": "0:50:27", "remaining_time": "3:28:45", "throughput": 8869.26, "total_tokens": 26848704} +{"current_steps": 39840, "total_steps": 204665, "loss": 0.0868, "lr": 1.945912802659949e-06, "epoch": 0.9732978281582098, "percentage": 19.47, "elapsed_time": "0:50:27", "remaining_time": "3:28:45", "throughput": 8869.35, "total_tokens": 26852032} +{"current_steps": 39845, "total_steps": 204665, "loss": 0.0862, "lr": 1.9458851333784514e-06, "epoch": 0.9734199789900569, "percentage": 19.47, "elapsed_time": "0:50:27", "remaining_time": "3:28:44", "throughput": 8869.44, "total_tokens": 26855424} +{"current_steps": 39850, "total_steps": 204665, "loss": 0.1357, "lr": 1.945857457218223e-06, "epoch": 0.9735421298219041, "percentage": 19.47, "elapsed_time": "0:50:28", "remaining_time": "3:28:44", "throughput": 8869.57, "total_tokens": 26858944} +{"current_steps": 39855, "total_steps": 204665, "loss": 0.2237, "lr": 1.945829774179464e-06, "epoch": 0.9736642806537512, "percentage": 19.47, "elapsed_time": "0:50:28", "remaining_time": "3:28:43", "throughput": 8869.63, "total_tokens": 26862208} +{"current_steps": 39860, "total_steps": 204665, "loss": 0.0235, "lr": 1.9458020842623774e-06, "epoch": 0.9737864314855984, "percentage": 19.48, "elapsed_time": "0:50:28", "remaining_time": "3:28:43", "throughput": 8869.76, "total_tokens": 26865728} +{"current_steps": 39865, "total_steps": 204665, "loss": 0.0422, "lr": 1.9457743874671633e-06, "epoch": 0.9739085823174456, "percentage": 19.48, "elapsed_time": "0:50:29", "remaining_time": "3:28:42", "throughput": 8869.91, "total_tokens": 26869312} +{"current_steps": 39870, "total_steps": 204665, "loss": 0.1107, "lr": 1.9457466837940234e-06, "epoch": 0.9740307331492928, "percentage": 19.48, "elapsed_time": "0:50:29", "remaining_time": "3:28:42", "throughput": 8870.01, "total_tokens": 26872704} +{"current_steps": 39875, "total_steps": 204665, "loss": 0.1688, "lr": 1.9457189732431594e-06, "epoch": 0.9741528839811399, "percentage": 19.48, "elapsed_time": "0:50:29", "remaining_time": "3:28:41", "throughput": 8870.09, "total_tokens": 26876032} +{"current_steps": 39880, "total_steps": 204665, "loss": 0.1856, "lr": 1.9456912558147724e-06, "epoch": 0.974275034812987, "percentage": 19.49, "elapsed_time": "0:50:30", "remaining_time": "3:28:41", "throughput": 8870.17, "total_tokens": 26879360} +{"current_steps": 39885, "total_steps": 204665, "loss": 0.1668, "lr": 1.9456635315090645e-06, "epoch": 0.9743971856448342, "percentage": 19.49, "elapsed_time": "0:50:30", "remaining_time": "3:28:40", "throughput": 8870.28, "total_tokens": 26882752} +{"current_steps": 39890, "total_steps": 204665, "loss": 0.1114, "lr": 1.945635800326237e-06, "epoch": 0.9745193364766814, "percentage": 19.49, "elapsed_time": "0:50:31", "remaining_time": "3:28:40", "throughput": 8870.42, "total_tokens": 26886272} +{"current_steps": 39895, "total_steps": 204665, "loss": 0.1352, "lr": 1.9456080622664913e-06, "epoch": 0.9746414873085286, "percentage": 19.49, "elapsed_time": "0:50:31", "remaining_time": "3:28:39", "throughput": 8870.43, "total_tokens": 26889344} +{"current_steps": 39900, "total_steps": 204665, "loss": 0.1157, "lr": 1.94558031733003e-06, "epoch": 0.9747636381403757, "percentage": 19.5, "elapsed_time": "0:50:31", "remaining_time": "3:28:39", "throughput": 8870.48, "total_tokens": 26892544} +{"current_steps": 39905, "total_steps": 204665, "loss": 0.0424, "lr": 1.9455525655170537e-06, "epoch": 0.9748857889722229, "percentage": 19.5, "elapsed_time": "0:50:32", "remaining_time": "3:28:38", "throughput": 8870.58, "total_tokens": 26895936} +{"current_steps": 39910, "total_steps": 204665, "loss": 0.0701, "lr": 1.9455248068277653e-06, "epoch": 0.9750079398040701, "percentage": 19.5, "elapsed_time": "0:50:32", "remaining_time": "3:28:38", "throughput": 8870.6, "total_tokens": 26899008} +{"current_steps": 39915, "total_steps": 204665, "loss": 0.1555, "lr": 1.945497041262366e-06, "epoch": 0.9751300906359173, "percentage": 19.5, "elapsed_time": "0:50:32", "remaining_time": "3:28:37", "throughput": 8870.65, "total_tokens": 26902208} +{"current_steps": 39920, "total_steps": 204665, "loss": 0.1173, "lr": 1.945469268821058e-06, "epoch": 0.9752522414677643, "percentage": 19.51, "elapsed_time": "0:50:33", "remaining_time": "3:28:37", "throughput": 8870.7, "total_tokens": 26905408} +{"current_steps": 39925, "total_steps": 204665, "loss": 0.1268, "lr": 1.945441489504043e-06, "epoch": 0.9753743922996115, "percentage": 19.51, "elapsed_time": "0:50:33", "remaining_time": "3:28:36", "throughput": 8870.74, "total_tokens": 26908608} +{"current_steps": 39930, "total_steps": 204665, "loss": 0.0952, "lr": 1.9454137033115234e-06, "epoch": 0.9754965431314587, "percentage": 19.51, "elapsed_time": "0:50:33", "remaining_time": "3:28:36", "throughput": 8870.76, "total_tokens": 26911680} +{"current_steps": 39935, "total_steps": 204665, "loss": 0.1024, "lr": 1.9453859102437007e-06, "epoch": 0.9756186939633059, "percentage": 19.51, "elapsed_time": "0:50:34", "remaining_time": "3:28:35", "throughput": 8870.91, "total_tokens": 26915264} +{"current_steps": 39940, "total_steps": 204665, "loss": 0.1325, "lr": 1.945358110300778e-06, "epoch": 0.9757408447951531, "percentage": 19.51, "elapsed_time": "0:50:34", "remaining_time": "3:28:35", "throughput": 8871.06, "total_tokens": 26918848} +{"current_steps": 39945, "total_steps": 204665, "loss": 0.1706, "lr": 1.9453303034829563e-06, "epoch": 0.9758629956270002, "percentage": 19.52, "elapsed_time": "0:50:34", "remaining_time": "3:28:34", "throughput": 8871.17, "total_tokens": 26922304} +{"current_steps": 39950, "total_steps": 204665, "loss": 0.0871, "lr": 1.9453024897904387e-06, "epoch": 0.9759851464588474, "percentage": 19.52, "elapsed_time": "0:50:35", "remaining_time": "3:28:34", "throughput": 8871.34, "total_tokens": 26925952} +{"current_steps": 39955, "total_steps": 204665, "loss": 0.2264, "lr": 1.9452746692234267e-06, "epoch": 0.9761072972906946, "percentage": 19.52, "elapsed_time": "0:50:35", "remaining_time": "3:28:33", "throughput": 8871.43, "total_tokens": 26929280} +{"current_steps": 39960, "total_steps": 204665, "loss": 0.0949, "lr": 1.9452468417821235e-06, "epoch": 0.9762294481225418, "percentage": 19.52, "elapsed_time": "0:50:35", "remaining_time": "3:28:32", "throughput": 8871.42, "total_tokens": 26932224} +{"current_steps": 39965, "total_steps": 204665, "loss": 0.1502, "lr": 1.945219007466731e-06, "epoch": 0.9763515989543888, "percentage": 19.53, "elapsed_time": "0:50:36", "remaining_time": "3:28:32", "throughput": 8871.45, "total_tokens": 26935360} +{"current_steps": 39970, "total_steps": 204665, "loss": 0.0737, "lr": 1.9451911662774515e-06, "epoch": 0.976473749786236, "percentage": 19.53, "elapsed_time": "0:50:36", "remaining_time": "3:28:31", "throughput": 8871.6, "total_tokens": 26938944} +{"current_steps": 39975, "total_steps": 204665, "loss": 0.18, "lr": 1.9451633182144875e-06, "epoch": 0.9765959006180832, "percentage": 19.53, "elapsed_time": "0:50:36", "remaining_time": "3:28:31", "throughput": 8871.7, "total_tokens": 26942336} +{"current_steps": 39980, "total_steps": 204665, "loss": 0.1195, "lr": 1.9451354632780418e-06, "epoch": 0.9767180514499304, "percentage": 19.53, "elapsed_time": "0:50:37", "remaining_time": "3:28:30", "throughput": 8871.77, "total_tokens": 26945664} +{"current_steps": 39985, "total_steps": 204665, "loss": 0.0777, "lr": 1.9451076014683166e-06, "epoch": 0.9768402022817776, "percentage": 19.54, "elapsed_time": "0:50:37", "remaining_time": "3:28:30", "throughput": 8872.0, "total_tokens": 26949568} +{"current_steps": 39990, "total_steps": 204665, "loss": 0.0916, "lr": 1.945079732785515e-06, "epoch": 0.9769623531136247, "percentage": 19.54, "elapsed_time": "0:50:37", "remaining_time": "3:28:29", "throughput": 8872.09, "total_tokens": 26952960} +{"current_steps": 39995, "total_steps": 204665, "loss": 0.09, "lr": 1.9450518572298394e-06, "epoch": 0.9770845039454719, "percentage": 19.54, "elapsed_time": "0:50:38", "remaining_time": "3:28:29", "throughput": 8872.08, "total_tokens": 26955904} +{"current_steps": 40000, "total_steps": 204665, "loss": 0.0635, "lr": 1.945023974801492e-06, "epoch": 0.977206654777319, "percentage": 19.54, "elapsed_time": "0:50:38", "remaining_time": "3:28:28", "throughput": 8872.32, "total_tokens": 26959872} +{"current_steps": 40005, "total_steps": 204665, "loss": 0.1043, "lr": 1.9449960855006766e-06, "epoch": 0.9773288056091662, "percentage": 19.55, "elapsed_time": "0:50:38", "remaining_time": "3:28:28", "throughput": 8872.42, "total_tokens": 26963264} +{"current_steps": 40010, "total_steps": 204665, "loss": 0.0797, "lr": 1.9449681893275956e-06, "epoch": 0.9774509564410133, "percentage": 19.55, "elapsed_time": "0:50:39", "remaining_time": "3:28:27", "throughput": 8872.49, "total_tokens": 26966528} +{"current_steps": 40015, "total_steps": 204665, "loss": 0.0364, "lr": 1.9449402862824512e-06, "epoch": 0.9775731072728605, "percentage": 19.55, "elapsed_time": "0:50:39", "remaining_time": "3:28:27", "throughput": 8872.63, "total_tokens": 26970048} +{"current_steps": 40020, "total_steps": 204665, "loss": 0.2936, "lr": 1.944912376365447e-06, "epoch": 0.9776952581047077, "percentage": 19.55, "elapsed_time": "0:50:40", "remaining_time": "3:28:26", "throughput": 8872.59, "total_tokens": 26972928} +{"current_steps": 40025, "total_steps": 204665, "loss": 0.0958, "lr": 1.9448844595767865e-06, "epoch": 0.9778174089365549, "percentage": 19.56, "elapsed_time": "0:50:40", "remaining_time": "3:28:26", "throughput": 8872.68, "total_tokens": 26976256} +{"current_steps": 40030, "total_steps": 204665, "loss": 0.2362, "lr": 1.9448565359166715e-06, "epoch": 0.9779395597684021, "percentage": 19.56, "elapsed_time": "0:50:40", "remaining_time": "3:28:25", "throughput": 8872.78, "total_tokens": 26979648} +{"current_steps": 40035, "total_steps": 204665, "loss": 0.0897, "lr": 1.9448286053853054e-06, "epoch": 0.9780617106002492, "percentage": 19.56, "elapsed_time": "0:50:41", "remaining_time": "3:28:25", "throughput": 8872.82, "total_tokens": 26982848} +{"current_steps": 40040, "total_steps": 204665, "loss": 0.1079, "lr": 1.944800667982892e-06, "epoch": 0.9781838614320963, "percentage": 19.56, "elapsed_time": "0:50:41", "remaining_time": "3:28:24", "throughput": 8872.9, "total_tokens": 26986176} +{"current_steps": 40045, "total_steps": 204665, "loss": 0.1416, "lr": 1.944772723709634e-06, "epoch": 0.9783060122639435, "percentage": 19.57, "elapsed_time": "0:50:41", "remaining_time": "3:28:24", "throughput": 8872.94, "total_tokens": 26989312} +{"current_steps": 40050, "total_steps": 204665, "loss": 0.0931, "lr": 1.9447447725657346e-06, "epoch": 0.9784281630957907, "percentage": 19.57, "elapsed_time": "0:50:42", "remaining_time": "3:28:23", "throughput": 8873.03, "total_tokens": 26992640} +{"current_steps": 40055, "total_steps": 204665, "loss": 0.081, "lr": 1.944716814551397e-06, "epoch": 0.9785503139276378, "percentage": 19.57, "elapsed_time": "0:50:42", "remaining_time": "3:28:23", "throughput": 8873.13, "total_tokens": 26996032} +{"current_steps": 40060, "total_steps": 204665, "loss": 0.052, "lr": 1.944688849666825e-06, "epoch": 0.978672464759485, "percentage": 19.57, "elapsed_time": "0:50:42", "remaining_time": "3:28:22", "throughput": 8873.2, "total_tokens": 26999360} +{"current_steps": 40065, "total_steps": 204665, "loss": 0.1148, "lr": 1.944660877912221e-06, "epoch": 0.9787946155913322, "percentage": 19.58, "elapsed_time": "0:50:43", "remaining_time": "3:28:22", "throughput": 8873.24, "total_tokens": 27002496} +{"current_steps": 40070, "total_steps": 204665, "loss": 0.0271, "lr": 1.9446328992877896e-06, "epoch": 0.9789167664231794, "percentage": 19.58, "elapsed_time": "0:50:43", "remaining_time": "3:28:21", "throughput": 8873.31, "total_tokens": 27005760} +{"current_steps": 40075, "total_steps": 204665, "loss": 0.224, "lr": 1.944604913793733e-06, "epoch": 0.9790389172550265, "percentage": 19.58, "elapsed_time": "0:50:43", "remaining_time": "3:28:21", "throughput": 8873.47, "total_tokens": 27009344} +{"current_steps": 40080, "total_steps": 204665, "loss": 0.0777, "lr": 1.944576921430256e-06, "epoch": 0.9791610680868736, "percentage": 19.58, "elapsed_time": "0:50:44", "remaining_time": "3:28:20", "throughput": 8873.5, "total_tokens": 27012480} +{"current_steps": 40085, "total_steps": 204665, "loss": 0.0154, "lr": 1.944548922197561e-06, "epoch": 0.9792832189187208, "percentage": 19.59, "elapsed_time": "0:50:44", "remaining_time": "3:28:20", "throughput": 8873.52, "total_tokens": 27015552} +{"current_steps": 40090, "total_steps": 204665, "loss": 0.0631, "lr": 1.9445209160958526e-06, "epoch": 0.979405369750568, "percentage": 19.59, "elapsed_time": "0:50:44", "remaining_time": "3:28:19", "throughput": 8873.51, "total_tokens": 27018560} +{"current_steps": 40095, "total_steps": 204665, "loss": 0.3232, "lr": 1.9444929031253337e-06, "epoch": 0.9795275205824152, "percentage": 19.59, "elapsed_time": "0:50:45", "remaining_time": "3:28:19", "throughput": 8873.6, "total_tokens": 27021888} +{"current_steps": 40100, "total_steps": 204665, "loss": 0.0867, "lr": 1.944464883286209e-06, "epoch": 0.9796496714142623, "percentage": 19.59, "elapsed_time": "0:50:45", "remaining_time": "3:28:18", "throughput": 8873.67, "total_tokens": 27025152} +{"current_steps": 40105, "total_steps": 204665, "loss": 0.0918, "lr": 1.9444368565786813e-06, "epoch": 0.9797718222461095, "percentage": 19.6, "elapsed_time": "0:50:45", "remaining_time": "3:28:17", "throughput": 8873.69, "total_tokens": 27028224} +{"current_steps": 40110, "total_steps": 204665, "loss": 0.0568, "lr": 1.9444088230029548e-06, "epoch": 0.9798939730779567, "percentage": 19.6, "elapsed_time": "0:50:46", "remaining_time": "3:28:17", "throughput": 8873.77, "total_tokens": 27031552} +{"current_steps": 40115, "total_steps": 204665, "loss": 0.0588, "lr": 1.944380782559233e-06, "epoch": 0.9800161239098039, "percentage": 19.6, "elapsed_time": "0:50:46", "remaining_time": "3:28:16", "throughput": 8873.91, "total_tokens": 27035072} +{"current_steps": 40120, "total_steps": 204665, "loss": 0.1429, "lr": 1.944352735247721e-06, "epoch": 0.9801382747416509, "percentage": 19.6, "elapsed_time": "0:50:46", "remaining_time": "3:28:16", "throughput": 8874.1, "total_tokens": 27038784} +{"current_steps": 40125, "total_steps": 204665, "loss": 0.144, "lr": 1.944324681068621e-06, "epoch": 0.9802604255734981, "percentage": 19.61, "elapsed_time": "0:50:47", "remaining_time": "3:28:15", "throughput": 8874.14, "total_tokens": 27041920} +{"current_steps": 40130, "total_steps": 204665, "loss": 0.1506, "lr": 1.944296620022138e-06, "epoch": 0.9803825764053453, "percentage": 19.61, "elapsed_time": "0:50:47", "remaining_time": "3:28:15", "throughput": 8874.2, "total_tokens": 27045184} +{"current_steps": 40135, "total_steps": 204665, "loss": 0.0484, "lr": 1.944268552108476e-06, "epoch": 0.9805047272371925, "percentage": 19.61, "elapsed_time": "0:50:47", "remaining_time": "3:28:14", "throughput": 8874.28, "total_tokens": 27048448} +{"current_steps": 40140, "total_steps": 204665, "loss": 0.1924, "lr": 1.9442404773278396e-06, "epoch": 0.9806268780690397, "percentage": 19.61, "elapsed_time": "0:50:48", "remaining_time": "3:28:14", "throughput": 8874.41, "total_tokens": 27051968} +{"current_steps": 40145, "total_steps": 204665, "loss": 0.1174, "lr": 1.9442123956804323e-06, "epoch": 0.9807490289008868, "percentage": 19.61, "elapsed_time": "0:50:48", "remaining_time": "3:28:13", "throughput": 8874.45, "total_tokens": 27055168} +{"current_steps": 40150, "total_steps": 204665, "loss": 0.0951, "lr": 1.9441843071664584e-06, "epoch": 0.980871179732734, "percentage": 19.62, "elapsed_time": "0:50:49", "remaining_time": "3:28:13", "throughput": 8874.58, "total_tokens": 27058688} +{"current_steps": 40155, "total_steps": 204665, "loss": 0.1467, "lr": 1.9441562117861224e-06, "epoch": 0.9809933305645812, "percentage": 19.62, "elapsed_time": "0:50:49", "remaining_time": "3:28:12", "throughput": 8874.6, "total_tokens": 27061760} +{"current_steps": 40160, "total_steps": 204665, "loss": 0.0662, "lr": 1.944128109539628e-06, "epoch": 0.9811154813964283, "percentage": 19.62, "elapsed_time": "0:50:49", "remaining_time": "3:28:12", "throughput": 8874.75, "total_tokens": 27065344} +{"current_steps": 40165, "total_steps": 204665, "loss": 0.0604, "lr": 1.9441000004271805e-06, "epoch": 0.9812376322282754, "percentage": 19.62, "elapsed_time": "0:50:50", "remaining_time": "3:28:11", "throughput": 8874.75, "total_tokens": 27068352} +{"current_steps": 40170, "total_steps": 204665, "loss": 0.0954, "lr": 1.944071884448984e-06, "epoch": 0.9813597830601226, "percentage": 19.63, "elapsed_time": "0:50:50", "remaining_time": "3:28:11", "throughput": 8874.86, "total_tokens": 27071744} +{"current_steps": 40175, "total_steps": 204665, "loss": 0.1245, "lr": 1.9440437616052425e-06, "epoch": 0.9814819338919698, "percentage": 19.63, "elapsed_time": "0:50:50", "remaining_time": "3:28:10", "throughput": 8874.91, "total_tokens": 27074944} +{"current_steps": 40180, "total_steps": 204665, "loss": 0.0687, "lr": 1.944015631896161e-06, "epoch": 0.981604084723817, "percentage": 19.63, "elapsed_time": "0:50:51", "remaining_time": "3:28:10", "throughput": 8874.99, "total_tokens": 27078272} +{"current_steps": 40185, "total_steps": 204665, "loss": 0.1081, "lr": 1.9439874953219437e-06, "epoch": 0.9817262355556642, "percentage": 19.63, "elapsed_time": "0:50:51", "remaining_time": "3:28:09", "throughput": 8875.08, "total_tokens": 27081600} +{"current_steps": 40190, "total_steps": 204665, "loss": 0.0673, "lr": 1.9439593518827955e-06, "epoch": 0.9818483863875113, "percentage": 19.64, "elapsed_time": "0:50:51", "remaining_time": "3:28:09", "throughput": 8875.04, "total_tokens": 27084480} +{"current_steps": 40195, "total_steps": 204665, "loss": 0.0639, "lr": 1.9439312015789213e-06, "epoch": 0.9819705372193585, "percentage": 19.64, "elapsed_time": "0:50:52", "remaining_time": "3:28:08", "throughput": 8875.06, "total_tokens": 27087552} +{"current_steps": 40200, "total_steps": 204665, "loss": 0.0221, "lr": 1.9439030444105253e-06, "epoch": 0.9820926880512056, "percentage": 19.64, "elapsed_time": "0:50:52", "remaining_time": "3:28:08", "throughput": 8875.04, "total_tokens": 27090496} +{"current_steps": 40205, "total_steps": 204665, "loss": 0.1202, "lr": 1.9438748803778123e-06, "epoch": 0.9822148388830528, "percentage": 19.64, "elapsed_time": "0:50:52", "remaining_time": "3:28:07", "throughput": 8875.15, "total_tokens": 27093888} +{"current_steps": 40210, "total_steps": 204665, "loss": 0.0858, "lr": 1.943846709480988e-06, "epoch": 0.9823369897148999, "percentage": 19.65, "elapsed_time": "0:50:53", "remaining_time": "3:28:06", "throughput": 8875.18, "total_tokens": 27097024} +{"current_steps": 40215, "total_steps": 204665, "loss": 0.0748, "lr": 1.9438185317202557e-06, "epoch": 0.9824591405467471, "percentage": 19.65, "elapsed_time": "0:50:53", "remaining_time": "3:28:06", "throughput": 8875.31, "total_tokens": 27100544} +{"current_steps": 40220, "total_steps": 204665, "loss": 0.1275, "lr": 1.9437903470958216e-06, "epoch": 0.9825812913785943, "percentage": 19.65, "elapsed_time": "0:50:53", "remaining_time": "3:28:05", "throughput": 8875.42, "total_tokens": 27104000} +{"current_steps": 40225, "total_steps": 204665, "loss": 0.0914, "lr": 1.94376215560789e-06, "epoch": 0.9827034422104415, "percentage": 19.65, "elapsed_time": "0:50:54", "remaining_time": "3:28:05", "throughput": 8875.4, "total_tokens": 27106944} +{"current_steps": 40230, "total_steps": 204665, "loss": 0.1515, "lr": 1.9437339572566666e-06, "epoch": 0.9828255930422887, "percentage": 19.66, "elapsed_time": "0:50:54", "remaining_time": "3:28:04", "throughput": 8875.4, "total_tokens": 27109952} +{"current_steps": 40235, "total_steps": 204665, "loss": 0.2121, "lr": 1.9437057520423557e-06, "epoch": 0.9829477438741357, "percentage": 19.66, "elapsed_time": "0:50:54", "remaining_time": "3:28:04", "throughput": 8875.42, "total_tokens": 27113024} +{"current_steps": 40240, "total_steps": 204665, "loss": 0.1038, "lr": 1.9436775399651628e-06, "epoch": 0.9830698947059829, "percentage": 19.66, "elapsed_time": "0:50:55", "remaining_time": "3:28:03", "throughput": 8875.45, "total_tokens": 27116160} +{"current_steps": 40245, "total_steps": 204665, "loss": 0.115, "lr": 1.9436493210252932e-06, "epoch": 0.9831920455378301, "percentage": 19.66, "elapsed_time": "0:50:55", "remaining_time": "3:28:03", "throughput": 8875.51, "total_tokens": 27119424} +{"current_steps": 40250, "total_steps": 204665, "loss": 0.0432, "lr": 1.9436210952229517e-06, "epoch": 0.9833141963696773, "percentage": 19.67, "elapsed_time": "0:50:55", "remaining_time": "3:28:02", "throughput": 8875.49, "total_tokens": 27122368} +{"current_steps": 40255, "total_steps": 204665, "loss": 0.1868, "lr": 1.943592862558344e-06, "epoch": 0.9834363472015244, "percentage": 19.67, "elapsed_time": "0:50:56", "remaining_time": "3:28:02", "throughput": 8875.65, "total_tokens": 27125952} +{"current_steps": 40260, "total_steps": 204665, "loss": 0.1231, "lr": 1.943564623031675e-06, "epoch": 0.9835584980333716, "percentage": 19.67, "elapsed_time": "0:50:56", "remaining_time": "3:28:01", "throughput": 8875.73, "total_tokens": 27129280} +{"current_steps": 40265, "total_steps": 204665, "loss": 0.1181, "lr": 1.9435363766431504e-06, "epoch": 0.9836806488652188, "percentage": 19.67, "elapsed_time": "0:50:56", "remaining_time": "3:28:01", "throughput": 8875.84, "total_tokens": 27132736} +{"current_steps": 40270, "total_steps": 204665, "loss": 0.016, "lr": 1.9435081233929755e-06, "epoch": 0.983802799697066, "percentage": 19.68, "elapsed_time": "0:50:57", "remaining_time": "3:28:00", "throughput": 8876.01, "total_tokens": 27136384} +{"current_steps": 40275, "total_steps": 204665, "loss": 0.0667, "lr": 1.9434798632813556e-06, "epoch": 0.9839249505289132, "percentage": 19.68, "elapsed_time": "0:50:57", "remaining_time": "3:28:00", "throughput": 8876.14, "total_tokens": 27139904} +{"current_steps": 40280, "total_steps": 204665, "loss": 0.078, "lr": 1.9434515963084965e-06, "epoch": 0.9840471013607602, "percentage": 19.68, "elapsed_time": "0:50:57", "remaining_time": "3:27:59", "throughput": 8876.29, "total_tokens": 27143488} +{"current_steps": 40285, "total_steps": 204665, "loss": 0.1724, "lr": 1.943423322474603e-06, "epoch": 0.9841692521926074, "percentage": 19.68, "elapsed_time": "0:50:58", "remaining_time": "3:27:59", "throughput": 8876.33, "total_tokens": 27146688} +{"current_steps": 40290, "total_steps": 204665, "loss": 0.1108, "lr": 1.9433950417798823e-06, "epoch": 0.9842914030244546, "percentage": 19.69, "elapsed_time": "0:50:58", "remaining_time": "3:27:58", "throughput": 8876.37, "total_tokens": 27149824} +{"current_steps": 40295, "total_steps": 204665, "loss": 0.1305, "lr": 1.9433667542245385e-06, "epoch": 0.9844135538563018, "percentage": 19.69, "elapsed_time": "0:50:59", "remaining_time": "3:27:58", "throughput": 8876.49, "total_tokens": 27153280} +{"current_steps": 40300, "total_steps": 204665, "loss": 0.1398, "lr": 1.9433384598087784e-06, "epoch": 0.9845357046881489, "percentage": 19.69, "elapsed_time": "0:50:59", "remaining_time": "3:27:57", "throughput": 8876.53, "total_tokens": 27156416} +{"current_steps": 40305, "total_steps": 204665, "loss": 0.0858, "lr": 1.943310158532807e-06, "epoch": 0.9846578555199961, "percentage": 19.69, "elapsed_time": "0:50:59", "remaining_time": "3:27:57", "throughput": 8876.58, "total_tokens": 27159616} +{"current_steps": 40310, "total_steps": 204665, "loss": 0.1594, "lr": 1.9432818503968304e-06, "epoch": 0.9847800063518433, "percentage": 19.7, "elapsed_time": "0:51:00", "remaining_time": "3:27:56", "throughput": 8876.7, "total_tokens": 27163072} +{"current_steps": 40315, "total_steps": 204665, "loss": 0.062, "lr": 1.9432535354010542e-06, "epoch": 0.9849021571836905, "percentage": 19.7, "elapsed_time": "0:51:00", "remaining_time": "3:27:56", "throughput": 8876.78, "total_tokens": 27166400} +{"current_steps": 40320, "total_steps": 204665, "loss": 0.1924, "lr": 1.943225213545685e-06, "epoch": 0.9850243080155375, "percentage": 19.7, "elapsed_time": "0:51:00", "remaining_time": "3:27:55", "throughput": 8876.83, "total_tokens": 27169600} +{"current_steps": 40325, "total_steps": 204665, "loss": 0.1132, "lr": 1.9431968848309287e-06, "epoch": 0.9851464588473847, "percentage": 19.7, "elapsed_time": "0:51:01", "remaining_time": "3:27:55", "throughput": 8877.03, "total_tokens": 27173376} +{"current_steps": 40330, "total_steps": 204665, "loss": 0.1546, "lr": 1.9431685492569907e-06, "epoch": 0.9852686096792319, "percentage": 19.71, "elapsed_time": "0:51:01", "remaining_time": "3:27:54", "throughput": 8877.1, "total_tokens": 27176640} +{"current_steps": 40335, "total_steps": 204665, "loss": 0.0869, "lr": 1.943140206824077e-06, "epoch": 0.9853907605110791, "percentage": 19.71, "elapsed_time": "0:51:01", "remaining_time": "3:27:54", "throughput": 8877.14, "total_tokens": 27179840} +{"current_steps": 40340, "total_steps": 204665, "loss": 0.0055, "lr": 1.943111857532394e-06, "epoch": 0.9855129113429263, "percentage": 19.71, "elapsed_time": "0:51:02", "remaining_time": "3:27:53", "throughput": 8877.19, "total_tokens": 27183040} +{"current_steps": 40345, "total_steps": 204665, "loss": 0.1628, "lr": 1.943083501382148e-06, "epoch": 0.9856350621747734, "percentage": 19.71, "elapsed_time": "0:51:02", "remaining_time": "3:27:53", "throughput": 8877.24, "total_tokens": 27186240} +{"current_steps": 40350, "total_steps": 204665, "loss": 0.0698, "lr": 1.9430551383735455e-06, "epoch": 0.9857572130066206, "percentage": 19.72, "elapsed_time": "0:51:02", "remaining_time": "3:27:52", "throughput": 8877.34, "total_tokens": 27189632} +{"current_steps": 40355, "total_steps": 204665, "loss": 0.1483, "lr": 1.943026768506792e-06, "epoch": 0.9858793638384677, "percentage": 19.72, "elapsed_time": "0:51:03", "remaining_time": "3:27:52", "throughput": 8877.42, "total_tokens": 27192960} +{"current_steps": 40360, "total_steps": 204665, "loss": 0.1704, "lr": 1.9429983917820944e-06, "epoch": 0.9860015146703149, "percentage": 19.72, "elapsed_time": "0:51:03", "remaining_time": "3:27:51", "throughput": 8877.52, "total_tokens": 27196352} +{"current_steps": 40365, "total_steps": 204665, "loss": 0.1303, "lr": 1.9429700081996587e-06, "epoch": 0.986123665502162, "percentage": 19.72, "elapsed_time": "0:51:03", "remaining_time": "3:27:50", "throughput": 8877.6, "total_tokens": 27199680} +{"current_steps": 40370, "total_steps": 204665, "loss": 0.0881, "lr": 1.9429416177596917e-06, "epoch": 0.9862458163340092, "percentage": 19.72, "elapsed_time": "0:51:04", "remaining_time": "3:27:50", "throughput": 8877.78, "total_tokens": 27203392} +{"current_steps": 40375, "total_steps": 204665, "loss": 0.1059, "lr": 1.9429132204623993e-06, "epoch": 0.9863679671658564, "percentage": 19.73, "elapsed_time": "0:51:04", "remaining_time": "3:27:50", "throughput": 8877.89, "total_tokens": 27206848} +{"current_steps": 40380, "total_steps": 204665, "loss": 0.1589, "lr": 1.9428848163079884e-06, "epoch": 0.9864901179977036, "percentage": 19.73, "elapsed_time": "0:51:04", "remaining_time": "3:27:49", "throughput": 8878.11, "total_tokens": 27210688} +{"current_steps": 40385, "total_steps": 204665, "loss": 0.08, "lr": 1.942856405296666e-06, "epoch": 0.9866122688295508, "percentage": 19.73, "elapsed_time": "0:51:05", "remaining_time": "3:27:49", "throughput": 8878.18, "total_tokens": 27214016} +{"current_steps": 40390, "total_steps": 204665, "loss": 0.1141, "lr": 1.942827987428638e-06, "epoch": 0.9867344196613979, "percentage": 19.73, "elapsed_time": "0:51:05", "remaining_time": "3:27:48", "throughput": 8878.27, "total_tokens": 27217408} +{"current_steps": 40395, "total_steps": 204665, "loss": 0.2218, "lr": 1.9427995627041107e-06, "epoch": 0.986856570493245, "percentage": 19.74, "elapsed_time": "0:51:05", "remaining_time": "3:27:48", "throughput": 8878.33, "total_tokens": 27220672} +{"current_steps": 40400, "total_steps": 204665, "loss": 0.1277, "lr": 1.942771131123292e-06, "epoch": 0.9869787213250922, "percentage": 19.74, "elapsed_time": "0:51:06", "remaining_time": "3:27:47", "throughput": 8878.58, "total_tokens": 27224640} +{"current_steps": 40405, "total_steps": 204665, "loss": 0.0716, "lr": 1.9427426926863876e-06, "epoch": 0.9871008721569394, "percentage": 19.74, "elapsed_time": "0:51:06", "remaining_time": "3:27:47", "throughput": 8878.82, "total_tokens": 27228608} +{"current_steps": 40410, "total_steps": 204665, "loss": 0.1121, "lr": 1.942714247393605e-06, "epoch": 0.9872230229887865, "percentage": 19.74, "elapsed_time": "0:51:07", "remaining_time": "3:27:46", "throughput": 8878.84, "total_tokens": 27231744} +{"current_steps": 40415, "total_steps": 204665, "loss": 0.0523, "lr": 1.942685795245151e-06, "epoch": 0.9873451738206337, "percentage": 19.75, "elapsed_time": "0:51:07", "remaining_time": "3:27:46", "throughput": 8879.09, "total_tokens": 27235712} +{"current_steps": 40420, "total_steps": 204665, "loss": 0.086, "lr": 1.9426573362412323e-06, "epoch": 0.9874673246524809, "percentage": 19.75, "elapsed_time": "0:51:07", "remaining_time": "3:27:45", "throughput": 8879.15, "total_tokens": 27238976} +{"current_steps": 40425, "total_steps": 204665, "loss": 0.2343, "lr": 1.942628870382056e-06, "epoch": 0.9875894754843281, "percentage": 19.75, "elapsed_time": "0:51:08", "remaining_time": "3:27:45", "throughput": 8879.12, "total_tokens": 27241856} +{"current_steps": 40430, "total_steps": 204665, "loss": 0.1004, "lr": 1.942600397667829e-06, "epoch": 0.9877116263161753, "percentage": 19.75, "elapsed_time": "0:51:08", "remaining_time": "3:27:44", "throughput": 8879.27, "total_tokens": 27245440} +{"current_steps": 40435, "total_steps": 204665, "loss": 0.0552, "lr": 1.942571918098758e-06, "epoch": 0.9878337771480223, "percentage": 19.76, "elapsed_time": "0:51:08", "remaining_time": "3:27:44", "throughput": 8879.38, "total_tokens": 27248896} +{"current_steps": 40440, "total_steps": 204665, "loss": 0.2256, "lr": 1.9425434316750507e-06, "epoch": 0.9879559279798695, "percentage": 19.76, "elapsed_time": "0:51:09", "remaining_time": "3:27:43", "throughput": 8879.52, "total_tokens": 27252416} +{"current_steps": 40445, "total_steps": 204665, "loss": 0.039, "lr": 1.9425149383969144e-06, "epoch": 0.9880780788117167, "percentage": 19.76, "elapsed_time": "0:51:09", "remaining_time": "3:27:43", "throughput": 8879.61, "total_tokens": 27255808} +{"current_steps": 40450, "total_steps": 204665, "loss": 0.0572, "lr": 1.9424864382645553e-06, "epoch": 0.9882002296435639, "percentage": 19.76, "elapsed_time": "0:51:09", "remaining_time": "3:27:42", "throughput": 8879.68, "total_tokens": 27259072} +{"current_steps": 40455, "total_steps": 204665, "loss": 0.0936, "lr": 1.9424579312781817e-06, "epoch": 0.988322380475411, "percentage": 19.77, "elapsed_time": "0:51:10", "remaining_time": "3:27:42", "throughput": 8879.78, "total_tokens": 27262528} +{"current_steps": 40460, "total_steps": 204665, "loss": 0.0895, "lr": 1.942429417438001e-06, "epoch": 0.9884445313072582, "percentage": 19.77, "elapsed_time": "0:51:10", "remaining_time": "3:27:41", "throughput": 8879.9, "total_tokens": 27265984} +{"current_steps": 40465, "total_steps": 204665, "loss": 0.0736, "lr": 1.9424008967442193e-06, "epoch": 0.9885666821391054, "percentage": 19.77, "elapsed_time": "0:51:10", "remaining_time": "3:27:41", "throughput": 8880.07, "total_tokens": 27269632} +{"current_steps": 40470, "total_steps": 204665, "loss": 0.073, "lr": 1.942372369197045e-06, "epoch": 0.9886888329709526, "percentage": 19.77, "elapsed_time": "0:51:11", "remaining_time": "3:27:40", "throughput": 8880.07, "total_tokens": 27272640} +{"current_steps": 40475, "total_steps": 204665, "loss": 0.0215, "lr": 1.9423438347966857e-06, "epoch": 0.9888109838027997, "percentage": 19.78, "elapsed_time": "0:51:11", "remaining_time": "3:27:40", "throughput": 8880.1, "total_tokens": 27275776} +{"current_steps": 40480, "total_steps": 204665, "loss": 0.0869, "lr": 1.942315293543348e-06, "epoch": 0.9889331346346468, "percentage": 19.78, "elapsed_time": "0:51:11", "remaining_time": "3:27:39", "throughput": 8880.13, "total_tokens": 27278912} +{"current_steps": 40485, "total_steps": 204665, "loss": 0.0808, "lr": 1.9422867454372406e-06, "epoch": 0.989055285466494, "percentage": 19.78, "elapsed_time": "0:51:12", "remaining_time": "3:27:39", "throughput": 8880.31, "total_tokens": 27282624} +{"current_steps": 40490, "total_steps": 204665, "loss": 0.1726, "lr": 1.9422581904785704e-06, "epoch": 0.9891774362983412, "percentage": 19.78, "elapsed_time": "0:51:12", "remaining_time": "3:27:38", "throughput": 8880.41, "total_tokens": 27286016} +{"current_steps": 40495, "total_steps": 204665, "loss": 0.1635, "lr": 1.9422296286675447e-06, "epoch": 0.9892995871301884, "percentage": 19.79, "elapsed_time": "0:51:12", "remaining_time": "3:27:38", "throughput": 8880.53, "total_tokens": 27289472} +{"current_steps": 40500, "total_steps": 204665, "loss": 0.0267, "lr": 1.9422010600043722e-06, "epoch": 0.9894217379620355, "percentage": 19.79, "elapsed_time": "0:51:13", "remaining_time": "3:27:37", "throughput": 8880.62, "total_tokens": 27292800} +{"current_steps": 40505, "total_steps": 204665, "loss": 0.222, "lr": 1.9421724844892606e-06, "epoch": 0.9895438887938827, "percentage": 19.79, "elapsed_time": "0:51:13", "remaining_time": "3:27:36", "throughput": 8880.65, "total_tokens": 27295936} +{"current_steps": 40510, "total_steps": 204665, "loss": 0.0899, "lr": 1.9421439021224164e-06, "epoch": 0.9896660396257299, "percentage": 19.79, "elapsed_time": "0:51:14", "remaining_time": "3:27:36", "throughput": 8880.84, "total_tokens": 27299712} +{"current_steps": 40515, "total_steps": 204665, "loss": 0.0432, "lr": 1.942115312904049e-06, "epoch": 0.989788190457577, "percentage": 19.8, "elapsed_time": "0:51:14", "remaining_time": "3:27:35", "throughput": 8880.89, "total_tokens": 27302912} +{"current_steps": 40520, "total_steps": 204665, "loss": 0.0669, "lr": 1.9420867168343652e-06, "epoch": 0.9899103412894242, "percentage": 19.8, "elapsed_time": "0:51:14", "remaining_time": "3:27:35", "throughput": 8881.09, "total_tokens": 27306688} +{"current_steps": 40525, "total_steps": 204665, "loss": 0.0034, "lr": 1.9420581139135733e-06, "epoch": 0.9900324921212713, "percentage": 19.8, "elapsed_time": "0:51:15", "remaining_time": "3:27:34", "throughput": 8881.2, "total_tokens": 27310144} +{"current_steps": 40530, "total_steps": 204665, "loss": 0.0698, "lr": 1.942029504141882e-06, "epoch": 0.9901546429531185, "percentage": 19.8, "elapsed_time": "0:51:15", "remaining_time": "3:27:34", "throughput": 8881.3, "total_tokens": 27313536} +{"current_steps": 40535, "total_steps": 204665, "loss": 0.1137, "lr": 1.9420008875194986e-06, "epoch": 0.9902767937849657, "percentage": 19.81, "elapsed_time": "0:51:15", "remaining_time": "3:27:34", "throughput": 8881.47, "total_tokens": 27317184} +{"current_steps": 40540, "total_steps": 204665, "loss": 0.0686, "lr": 1.941972264046631e-06, "epoch": 0.9903989446168129, "percentage": 19.81, "elapsed_time": "0:51:16", "remaining_time": "3:27:33", "throughput": 8881.6, "total_tokens": 27320704} +{"current_steps": 40545, "total_steps": 204665, "loss": 0.3562, "lr": 1.941943633723488e-06, "epoch": 0.99052109544866, "percentage": 19.81, "elapsed_time": "0:51:16", "remaining_time": "3:27:32", "throughput": 8881.67, "total_tokens": 27323968} +{"current_steps": 40550, "total_steps": 204665, "loss": 0.0403, "lr": 1.9419149965502773e-06, "epoch": 0.9906432462805072, "percentage": 19.81, "elapsed_time": "0:51:16", "remaining_time": "3:27:32", "throughput": 8881.73, "total_tokens": 27327232} +{"current_steps": 40555, "total_steps": 204665, "loss": 0.1088, "lr": 1.9418863525272077e-06, "epoch": 0.9907653971123543, "percentage": 19.82, "elapsed_time": "0:51:17", "remaining_time": "3:27:31", "throughput": 8881.73, "total_tokens": 27330176} +{"current_steps": 40560, "total_steps": 204665, "loss": 0.0694, "lr": 1.941857701654487e-06, "epoch": 0.9908875479442015, "percentage": 19.82, "elapsed_time": "0:51:17", "remaining_time": "3:27:31", "throughput": 8882.42, "total_tokens": 27335936} +{"current_steps": 40565, "total_steps": 204665, "loss": 0.0502, "lr": 1.9418290439323243e-06, "epoch": 0.9910096987760487, "percentage": 19.82, "elapsed_time": "0:51:17", "remaining_time": "3:27:31", "throughput": 8882.53, "total_tokens": 27339392} +{"current_steps": 40570, "total_steps": 204665, "loss": 0.1334, "lr": 1.9418003793609267e-06, "epoch": 0.9911318496078958, "percentage": 19.82, "elapsed_time": "0:51:18", "remaining_time": "3:27:30", "throughput": 8882.68, "total_tokens": 27342976} +{"current_steps": 40575, "total_steps": 204665, "loss": 0.0908, "lr": 1.941771707940504e-06, "epoch": 0.991254000439743, "percentage": 19.83, "elapsed_time": "0:51:18", "remaining_time": "3:27:30", "throughput": 8882.71, "total_tokens": 27346112} +{"current_steps": 40580, "total_steps": 204665, "loss": 0.0786, "lr": 1.941743029671264e-06, "epoch": 0.9913761512715902, "percentage": 19.83, "elapsed_time": "0:51:18", "remaining_time": "3:27:29", "throughput": 8882.76, "total_tokens": 27349312} +{"current_steps": 40585, "total_steps": 204665, "loss": 0.1647, "lr": 1.9417143445534152e-06, "epoch": 0.9914983021034374, "percentage": 19.83, "elapsed_time": "0:51:19", "remaining_time": "3:27:29", "throughput": 8882.73, "total_tokens": 27352192} +{"current_steps": 40590, "total_steps": 204665, "loss": 0.1494, "lr": 1.9416856525871666e-06, "epoch": 0.9916204529352844, "percentage": 19.83, "elapsed_time": "0:51:19", "remaining_time": "3:27:28", "throughput": 8882.81, "total_tokens": 27355520} +{"current_steps": 40595, "total_steps": 204665, "loss": 0.1283, "lr": 1.941656953772726e-06, "epoch": 0.9917426037671316, "percentage": 19.83, "elapsed_time": "0:51:19", "remaining_time": "3:27:28", "throughput": 8882.94, "total_tokens": 27359040} +{"current_steps": 40600, "total_steps": 204665, "loss": 0.1341, "lr": 1.9416282481103038e-06, "epoch": 0.9918647545989788, "percentage": 19.84, "elapsed_time": "0:51:20", "remaining_time": "3:27:27", "throughput": 8882.91, "total_tokens": 27361920} +{"current_steps": 40605, "total_steps": 204665, "loss": 0.0558, "lr": 1.941599535600107e-06, "epoch": 0.991986905430826, "percentage": 19.84, "elapsed_time": "0:51:20", "remaining_time": "3:27:26", "throughput": 8882.98, "total_tokens": 27365248} +{"current_steps": 40610, "total_steps": 204665, "loss": 0.1306, "lr": 1.9415708162423452e-06, "epoch": 0.9921090562626731, "percentage": 19.84, "elapsed_time": "0:51:20", "remaining_time": "3:27:26", "throughput": 8883.06, "total_tokens": 27368512} +{"current_steps": 40615, "total_steps": 204665, "loss": 0.1231, "lr": 1.9415420900372275e-06, "epoch": 0.9922312070945203, "percentage": 19.84, "elapsed_time": "0:51:21", "remaining_time": "3:27:25", "throughput": 8883.15, "total_tokens": 27371840} +{"current_steps": 40620, "total_steps": 204665, "loss": 0.205, "lr": 1.9415133569849622e-06, "epoch": 0.9923533579263675, "percentage": 19.85, "elapsed_time": "0:51:21", "remaining_time": "3:27:25", "throughput": 8883.19, "total_tokens": 27375040} +{"current_steps": 40625, "total_steps": 204665, "loss": 0.0851, "lr": 1.9414846170857587e-06, "epoch": 0.9924755087582147, "percentage": 19.85, "elapsed_time": "0:51:22", "remaining_time": "3:27:24", "throughput": 8883.36, "total_tokens": 27378688} +{"current_steps": 40630, "total_steps": 204665, "loss": 0.0645, "lr": 1.941455870339826e-06, "epoch": 0.9925976595900619, "percentage": 19.85, "elapsed_time": "0:51:22", "remaining_time": "3:27:24", "throughput": 8883.42, "total_tokens": 27381952} +{"current_steps": 40635, "total_steps": 204665, "loss": 0.1897, "lr": 1.9414271167473726e-06, "epoch": 0.9927198104219089, "percentage": 19.85, "elapsed_time": "0:51:22", "remaining_time": "3:27:23", "throughput": 8883.48, "total_tokens": 27385152} +{"current_steps": 40640, "total_steps": 204665, "loss": 0.0993, "lr": 1.941398356308608e-06, "epoch": 0.9928419612537561, "percentage": 19.86, "elapsed_time": "0:51:23", "remaining_time": "3:27:23", "throughput": 8883.64, "total_tokens": 27388800} +{"current_steps": 40645, "total_steps": 204665, "loss": 0.0957, "lr": 1.9413695890237418e-06, "epoch": 0.9929641120856033, "percentage": 19.86, "elapsed_time": "0:51:23", "remaining_time": "3:27:22", "throughput": 8883.83, "total_tokens": 27392512} +{"current_steps": 40650, "total_steps": 204665, "loss": 0.1266, "lr": 1.9413408148929823e-06, "epoch": 0.9930862629174505, "percentage": 19.86, "elapsed_time": "0:51:23", "remaining_time": "3:27:22", "throughput": 8884.41, "total_tokens": 27397824} +{"current_steps": 40655, "total_steps": 204665, "loss": 0.17, "lr": 1.941312033916539e-06, "epoch": 0.9932084137492976, "percentage": 19.86, "elapsed_time": "0:51:24", "remaining_time": "3:27:22", "throughput": 8884.42, "total_tokens": 27400896} +{"current_steps": 40660, "total_steps": 204665, "loss": 0.1171, "lr": 1.941283246094622e-06, "epoch": 0.9933305645811448, "percentage": 19.87, "elapsed_time": "0:51:24", "remaining_time": "3:27:21", "throughput": 8884.52, "total_tokens": 27404352} +{"current_steps": 40665, "total_steps": 204665, "loss": 0.0449, "lr": 1.9412544514274395e-06, "epoch": 0.993452715412992, "percentage": 19.87, "elapsed_time": "0:51:24", "remaining_time": "3:27:21", "throughput": 8884.59, "total_tokens": 27407616} +{"current_steps": 40670, "total_steps": 204665, "loss": 0.1234, "lr": 1.941225649915202e-06, "epoch": 0.9935748662448391, "percentage": 19.87, "elapsed_time": "0:51:25", "remaining_time": "3:27:20", "throughput": 8884.65, "total_tokens": 27410880} +{"current_steps": 40675, "total_steps": 204665, "loss": 0.1513, "lr": 1.941196841558118e-06, "epoch": 0.9936970170766863, "percentage": 19.87, "elapsed_time": "0:51:25", "remaining_time": "3:27:20", "throughput": 8884.75, "total_tokens": 27414272} +{"current_steps": 40680, "total_steps": 204665, "loss": 0.154, "lr": 1.9411680263563976e-06, "epoch": 0.9938191679085334, "percentage": 19.88, "elapsed_time": "0:51:25", "remaining_time": "3:27:19", "throughput": 8884.81, "total_tokens": 27417536} +{"current_steps": 40685, "total_steps": 204665, "loss": 0.1063, "lr": 1.9411392043102502e-06, "epoch": 0.9939413187403806, "percentage": 19.88, "elapsed_time": "0:51:26", "remaining_time": "3:27:19", "throughput": 8884.97, "total_tokens": 27421120} +{"current_steps": 40690, "total_steps": 204665, "loss": 0.0305, "lr": 1.9411103754198852e-06, "epoch": 0.9940634695722278, "percentage": 19.88, "elapsed_time": "0:51:26", "remaining_time": "3:27:18", "throughput": 8885.1, "total_tokens": 27424640} +{"current_steps": 40695, "total_steps": 204665, "loss": 0.1007, "lr": 1.9410815396855126e-06, "epoch": 0.994185620404075, "percentage": 19.88, "elapsed_time": "0:51:26", "remaining_time": "3:27:17", "throughput": 8885.13, "total_tokens": 27427776} +{"current_steps": 40700, "total_steps": 204665, "loss": 0.1402, "lr": 1.941052697107342e-06, "epoch": 0.9943077712359221, "percentage": 19.89, "elapsed_time": "0:51:27", "remaining_time": "3:27:17", "throughput": 8885.18, "total_tokens": 27430976} +{"current_steps": 40705, "total_steps": 204665, "loss": 0.0659, "lr": 1.941023847685583e-06, "epoch": 0.9944299220677693, "percentage": 19.89, "elapsed_time": "0:51:27", "remaining_time": "3:27:16", "throughput": 8885.28, "total_tokens": 27434368} +{"current_steps": 40710, "total_steps": 204665, "loss": 0.1137, "lr": 1.9409949914204454e-06, "epoch": 0.9945520728996164, "percentage": 19.89, "elapsed_time": "0:51:27", "remaining_time": "3:27:16", "throughput": 8885.37, "total_tokens": 27437696} +{"current_steps": 40715, "total_steps": 204665, "loss": 0.1326, "lr": 1.9409661283121393e-06, "epoch": 0.9946742237314636, "percentage": 19.89, "elapsed_time": "0:51:28", "remaining_time": "3:27:16", "throughput": 8884.73, "total_tokens": 27441088} +{"current_steps": 40720, "total_steps": 204665, "loss": 0.1082, "lr": 1.9409372583608743e-06, "epoch": 0.9947963745633108, "percentage": 19.9, "elapsed_time": "0:51:28", "remaining_time": "3:27:16", "throughput": 8884.88, "total_tokens": 27444672} +{"current_steps": 40725, "total_steps": 204665, "loss": 0.0943, "lr": 1.9409083815668604e-06, "epoch": 0.9949185253951579, "percentage": 19.9, "elapsed_time": "0:51:29", "remaining_time": "3:27:15", "throughput": 8885.03, "total_tokens": 27448256} +{"current_steps": 40730, "total_steps": 204665, "loss": 0.1296, "lr": 1.9408794979303077e-06, "epoch": 0.9950406762270051, "percentage": 19.9, "elapsed_time": "0:51:29", "remaining_time": "3:27:15", "throughput": 8885.19, "total_tokens": 27451904} +{"current_steps": 40735, "total_steps": 204665, "loss": 0.0584, "lr": 1.940850607451426e-06, "epoch": 0.9951628270588523, "percentage": 19.9, "elapsed_time": "0:51:29", "remaining_time": "3:27:14", "throughput": 8885.33, "total_tokens": 27455424} +{"current_steps": 40740, "total_steps": 204665, "loss": 0.2389, "lr": 1.940821710130426e-06, "epoch": 0.9952849778906995, "percentage": 19.91, "elapsed_time": "0:51:30", "remaining_time": "3:27:14", "throughput": 8885.45, "total_tokens": 27458944} +{"current_steps": 40745, "total_steps": 204665, "loss": 0.1535, "lr": 1.9407928059675176e-06, "epoch": 0.9954071287225466, "percentage": 19.91, "elapsed_time": "0:51:30", "remaining_time": "3:27:13", "throughput": 8885.54, "total_tokens": 27462272} +{"current_steps": 40750, "total_steps": 204665, "loss": 0.1028, "lr": 1.9407638949629102e-06, "epoch": 0.9955292795543937, "percentage": 19.91, "elapsed_time": "0:51:31", "remaining_time": "3:27:13", "throughput": 8885.66, "total_tokens": 27465792} +{"current_steps": 40755, "total_steps": 204665, "loss": 0.0626, "lr": 1.940734977116815e-06, "epoch": 0.9956514303862409, "percentage": 19.91, "elapsed_time": "0:51:31", "remaining_time": "3:27:13", "throughput": 8885.83, "total_tokens": 27469504} +{"current_steps": 40760, "total_steps": 204665, "loss": 0.1577, "lr": 1.9407060524294426e-06, "epoch": 0.9957735812180881, "percentage": 19.92, "elapsed_time": "0:51:31", "remaining_time": "3:27:12", "throughput": 8885.93, "total_tokens": 27472896} +{"current_steps": 40765, "total_steps": 204665, "loss": 0.1161, "lr": 1.9406771209010024e-06, "epoch": 0.9958957320499353, "percentage": 19.92, "elapsed_time": "0:51:32", "remaining_time": "3:27:12", "throughput": 8886.1, "total_tokens": 27476608} +{"current_steps": 40770, "total_steps": 204665, "loss": 0.1274, "lr": 1.9406481825317052e-06, "epoch": 0.9960178828817824, "percentage": 19.92, "elapsed_time": "0:51:32", "remaining_time": "3:27:11", "throughput": 8886.24, "total_tokens": 27480128} +{"current_steps": 40775, "total_steps": 204665, "loss": 0.0476, "lr": 1.940619237321761e-06, "epoch": 0.9961400337136296, "percentage": 19.92, "elapsed_time": "0:51:32", "remaining_time": "3:27:11", "throughput": 8886.28, "total_tokens": 27483264} +{"current_steps": 40780, "total_steps": 204665, "loss": 0.0756, "lr": 1.9405902852713812e-06, "epoch": 0.9962621845454768, "percentage": 19.93, "elapsed_time": "0:51:33", "remaining_time": "3:27:10", "throughput": 8886.44, "total_tokens": 27486912} +{"current_steps": 40785, "total_steps": 204665, "loss": 0.1296, "lr": 1.940561326380776e-06, "epoch": 0.996384335377324, "percentage": 19.93, "elapsed_time": "0:51:33", "remaining_time": "3:27:10", "throughput": 8886.5, "total_tokens": 27490176} +{"current_steps": 40790, "total_steps": 204665, "loss": 0.0765, "lr": 1.940532360650155e-06, "epoch": 0.996506486209171, "percentage": 19.93, "elapsed_time": "0:51:33", "remaining_time": "3:27:09", "throughput": 8886.51, "total_tokens": 27493248} +{"current_steps": 40795, "total_steps": 204665, "loss": 0.0651, "lr": 1.9405033880797303e-06, "epoch": 0.9966286370410182, "percentage": 19.93, "elapsed_time": "0:51:34", "remaining_time": "3:27:08", "throughput": 8886.58, "total_tokens": 27496512} +{"current_steps": 40800, "total_steps": 204665, "loss": 0.0417, "lr": 1.940474408669712e-06, "epoch": 0.9967507878728654, "percentage": 19.94, "elapsed_time": "0:51:34", "remaining_time": "3:27:08", "throughput": 8886.65, "total_tokens": 27499776} +{"current_steps": 40805, "total_steps": 204665, "loss": 0.016, "lr": 1.9404454224203108e-06, "epoch": 0.9968729387047126, "percentage": 19.94, "elapsed_time": "0:51:34", "remaining_time": "3:27:07", "throughput": 8886.74, "total_tokens": 27503168} +{"current_steps": 40810, "total_steps": 204665, "loss": 0.1012, "lr": 1.9404164293317374e-06, "epoch": 0.9969950895365598, "percentage": 19.94, "elapsed_time": "0:51:35", "remaining_time": "3:27:07", "throughput": 8886.77, "total_tokens": 27506304} +{"current_steps": 40815, "total_steps": 204665, "loss": 0.2259, "lr": 1.940387429404203e-06, "epoch": 0.9971172403684069, "percentage": 19.94, "elapsed_time": "0:51:35", "remaining_time": "3:27:06", "throughput": 8886.85, "total_tokens": 27509632} +{"current_steps": 40820, "total_steps": 204665, "loss": 0.0741, "lr": 1.940358422637918e-06, "epoch": 0.9972393912002541, "percentage": 19.94, "elapsed_time": "0:51:35", "remaining_time": "3:27:06", "throughput": 8886.98, "total_tokens": 27513152} +{"current_steps": 40825, "total_steps": 204665, "loss": 0.1664, "lr": 1.940329409033094e-06, "epoch": 0.9973615420321013, "percentage": 19.95, "elapsed_time": "0:51:36", "remaining_time": "3:27:05", "throughput": 8887.04, "total_tokens": 27516352} +{"current_steps": 40830, "total_steps": 204665, "loss": 0.1112, "lr": 1.9403003885899415e-06, "epoch": 0.9974836928639484, "percentage": 19.95, "elapsed_time": "0:51:36", "remaining_time": "3:27:05", "throughput": 8887.11, "total_tokens": 27519616} +{"current_steps": 40835, "total_steps": 204665, "loss": 0.0942, "lr": 1.9402713613086716e-06, "epoch": 0.9976058436957955, "percentage": 19.95, "elapsed_time": "0:51:36", "remaining_time": "3:27:04", "throughput": 8887.17, "total_tokens": 27522880} +{"current_steps": 40840, "total_steps": 204665, "loss": 0.2131, "lr": 1.9402423271894952e-06, "epoch": 0.9977279945276427, "percentage": 19.95, "elapsed_time": "0:51:37", "remaining_time": "3:27:04", "throughput": 8887.17, "total_tokens": 27525824} +{"current_steps": 40845, "total_steps": 204665, "loss": 0.0197, "lr": 1.9402132862326242e-06, "epoch": 0.9978501453594899, "percentage": 19.96, "elapsed_time": "0:51:37", "remaining_time": "3:27:03", "throughput": 8887.27, "total_tokens": 27529216} +{"current_steps": 40850, "total_steps": 204665, "loss": 0.0347, "lr": 1.940184238438269e-06, "epoch": 0.9979722961913371, "percentage": 19.96, "elapsed_time": "0:51:37", "remaining_time": "3:27:03", "throughput": 8887.41, "total_tokens": 27532800} +{"current_steps": 40855, "total_steps": 204665, "loss": 0.0903, "lr": 1.940155183806641e-06, "epoch": 0.9980944470231842, "percentage": 19.96, "elapsed_time": "0:51:38", "remaining_time": "3:27:02", "throughput": 8887.47, "total_tokens": 27536064} +{"current_steps": 40860, "total_steps": 204665, "loss": 0.1097, "lr": 1.940126122337952e-06, "epoch": 0.9982165978550314, "percentage": 19.96, "elapsed_time": "0:51:38", "remaining_time": "3:27:02", "throughput": 8887.43, "total_tokens": 27538880} +{"current_steps": 40865, "total_steps": 204665, "loss": 0.032, "lr": 1.9400970540324125e-06, "epoch": 0.9983387486868786, "percentage": 19.97, "elapsed_time": "0:51:38", "remaining_time": "3:27:01", "throughput": 8887.49, "total_tokens": 27542144} +{"current_steps": 40870, "total_steps": 204665, "loss": 0.09, "lr": 1.940067978890235e-06, "epoch": 0.9984608995187257, "percentage": 19.97, "elapsed_time": "0:51:39", "remaining_time": "3:27:01", "throughput": 8887.57, "total_tokens": 27545472} +{"current_steps": 40875, "total_steps": 204665, "loss": 0.1085, "lr": 1.9400388969116295e-06, "epoch": 0.9985830503505729, "percentage": 19.97, "elapsed_time": "0:51:39", "remaining_time": "3:27:00", "throughput": 8887.65, "total_tokens": 27548800} +{"current_steps": 40880, "total_steps": 204665, "loss": 0.1362, "lr": 1.9400098080968087e-06, "epoch": 0.99870520118242, "percentage": 19.97, "elapsed_time": "0:51:40", "remaining_time": "3:27:00", "throughput": 8887.81, "total_tokens": 27552448} +{"current_steps": 40885, "total_steps": 204665, "loss": 0.0632, "lr": 1.939980712445984e-06, "epoch": 0.9988273520142672, "percentage": 19.98, "elapsed_time": "0:51:40", "remaining_time": "3:26:59", "throughput": 8887.96, "total_tokens": 27556096} +{"current_steps": 40890, "total_steps": 204665, "loss": 0.0939, "lr": 1.9399516099593666e-06, "epoch": 0.9989495028461144, "percentage": 19.98, "elapsed_time": "0:51:40", "remaining_time": "3:26:59", "throughput": 8888.09, "total_tokens": 27559616} +{"current_steps": 40895, "total_steps": 204665, "loss": 0.1567, "lr": 1.9399225006371684e-06, "epoch": 0.9990716536779616, "percentage": 19.98, "elapsed_time": "0:51:41", "remaining_time": "3:26:58", "throughput": 8888.15, "total_tokens": 27562880} +{"current_steps": 40900, "total_steps": 204665, "loss": 0.2327, "lr": 1.9398933844796006e-06, "epoch": 0.9991938045098087, "percentage": 19.98, "elapsed_time": "0:51:41", "remaining_time": "3:26:58", "throughput": 8888.27, "total_tokens": 27566336} +{"current_steps": 40905, "total_steps": 204665, "loss": 0.091, "lr": 1.9398642614868755e-06, "epoch": 0.9993159553416558, "percentage": 19.99, "elapsed_time": "0:51:41", "remaining_time": "3:26:57", "throughput": 8888.42, "total_tokens": 27569920} +{"current_steps": 40910, "total_steps": 204665, "loss": 0.0299, "lr": 1.9398351316592048e-06, "epoch": 0.999438106173503, "percentage": 19.99, "elapsed_time": "0:51:42", "remaining_time": "3:26:57", "throughput": 8888.51, "total_tokens": 27573312} +{"current_steps": 40915, "total_steps": 204665, "loss": 0.0406, "lr": 1.9398059949967998e-06, "epoch": 0.9995602570053502, "percentage": 19.99, "elapsed_time": "0:51:42", "remaining_time": "3:26:56", "throughput": 8888.83, "total_tokens": 27577536} +{"current_steps": 40920, "total_steps": 204665, "loss": 0.0677, "lr": 1.9397768514998736e-06, "epoch": 0.9996824078371974, "percentage": 19.99, "elapsed_time": "0:51:42", "remaining_time": "3:26:56", "throughput": 8888.95, "total_tokens": 27581056} +{"current_steps": 40925, "total_steps": 204665, "loss": 0.1449, "lr": 1.9397477011686366e-06, "epoch": 0.9998045586690445, "percentage": 20.0, "elapsed_time": "0:51:43", "remaining_time": "3:26:55", "throughput": 8888.99, "total_tokens": 27584320} +{"current_steps": 40930, "total_steps": 204665, "loss": 0.0949, "lr": 1.939718544003302e-06, "epoch": 0.9999267095008917, "percentage": 20.0, "elapsed_time": "0:51:43", "remaining_time": "3:26:55", "throughput": 8889.06, "total_tokens": 27587584} +{"current_steps": 40935, "total_steps": 204665, "loss": 0.1214, "lr": 1.9396893800040813e-06, "epoch": 1.0000488603327389, "percentage": 20.0, "elapsed_time": "0:51:44", "remaining_time": "3:26:55", "throughput": 8888.86, "total_tokens": 27591136} +{"current_steps": 40936, "total_steps": 204665, "eval_loss": 0.12240181118249893, "epoch": 1.0000732904991083, "percentage": 20.0, "elapsed_time": "0:52:31", "remaining_time": "3:30:06", "throughput": 8754.23, "total_tokens": 27591776} +{"current_steps": 40940, "total_steps": 204665, "loss": 0.0258, "lr": 1.9396602091711864e-06, "epoch": 1.000171011164586, "percentage": 20.0, "elapsed_time": "0:53:09", "remaining_time": "3:32:36", "throughput": 8650.9, "total_tokens": 27594592} +{"current_steps": 40945, "total_steps": 204665, "loss": 0.0086, "lr": 1.93963103150483e-06, "epoch": 1.0002931619964333, "percentage": 20.01, "elapsed_time": "0:53:10", "remaining_time": "3:32:35", "throughput": 8651.06, "total_tokens": 27598112} +{"current_steps": 40950, "total_steps": 204665, "loss": 0.0999, "lr": 1.939601847005224e-06, "epoch": 1.0004153128282804, "percentage": 20.01, "elapsed_time": "0:53:10", "remaining_time": "3:32:35", "throughput": 8651.23, "total_tokens": 27601696} +{"current_steps": 40955, "total_steps": 204665, "loss": 0.0866, "lr": 1.9395726556725806e-06, "epoch": 1.0005374636601276, "percentage": 20.01, "elapsed_time": "0:53:10", "remaining_time": "3:32:34", "throughput": 8651.45, "total_tokens": 27605536} +{"current_steps": 40960, "total_steps": 204665, "loss": 0.0141, "lr": 1.939543457507112e-06, "epoch": 1.0006596144919746, "percentage": 20.01, "elapsed_time": "0:53:11", "remaining_time": "3:32:34", "throughput": 8651.49, "total_tokens": 27608608} +{"current_steps": 40965, "total_steps": 204665, "loss": 0.0677, "lr": 1.939514252509031e-06, "epoch": 1.0007817653238218, "percentage": 20.02, "elapsed_time": "0:53:11", "remaining_time": "3:32:33", "throughput": 8651.58, "total_tokens": 27611872} +{"current_steps": 40970, "total_steps": 204665, "loss": 0.0574, "lr": 1.93948504067855e-06, "epoch": 1.000903916155669, "percentage": 20.02, "elapsed_time": "0:53:11", "remaining_time": "3:32:33", "throughput": 8651.69, "total_tokens": 27615264} +{"current_steps": 40975, "total_steps": 204665, "loss": 0.0023, "lr": 1.93945582201588e-06, "epoch": 1.0010260669875162, "percentage": 20.02, "elapsed_time": "0:53:12", "remaining_time": "3:32:32", "throughput": 8651.77, "total_tokens": 27618464} +{"current_steps": 40980, "total_steps": 204665, "loss": 0.0609, "lr": 1.939426596521235e-06, "epoch": 1.0011482178193634, "percentage": 20.02, "elapsed_time": "0:53:12", "remaining_time": "3:32:32", "throughput": 8651.95, "total_tokens": 27622112} +{"current_steps": 40985, "total_steps": 204665, "loss": 0.0448, "lr": 1.9393973641948275e-06, "epoch": 1.0012703686512106, "percentage": 20.03, "elapsed_time": "0:53:12", "remaining_time": "3:32:31", "throughput": 8652.02, "total_tokens": 27625312} +{"current_steps": 40990, "total_steps": 204665, "loss": 0.1127, "lr": 1.9393681250368696e-06, "epoch": 1.0013925194830577, "percentage": 20.03, "elapsed_time": "0:53:13", "remaining_time": "3:32:30", "throughput": 8652.05, "total_tokens": 27628320} +{"current_steps": 40995, "total_steps": 204665, "loss": 0.0845, "lr": 1.939338879047574e-06, "epoch": 1.001514670314905, "percentage": 20.03, "elapsed_time": "0:53:13", "remaining_time": "3:32:30", "throughput": 8652.17, "total_tokens": 27631712} +{"current_steps": 41000, "total_steps": 204665, "loss": 0.118, "lr": 1.9393096262271533e-06, "epoch": 1.001636821146752, "percentage": 20.03, "elapsed_time": "0:53:13", "remaining_time": "3:32:29", "throughput": 8652.23, "total_tokens": 27634848} +{"current_steps": 41005, "total_steps": 204665, "loss": 0.0737, "lr": 1.9392803665758206e-06, "epoch": 1.001758971978599, "percentage": 20.04, "elapsed_time": "0:53:14", "remaining_time": "3:32:29", "throughput": 8652.3, "total_tokens": 27638048} +{"current_steps": 41010, "total_steps": 204665, "loss": 0.1221, "lr": 1.939251100093788e-06, "epoch": 1.0018811228104463, "percentage": 20.04, "elapsed_time": "0:53:14", "remaining_time": "3:32:28", "throughput": 8652.35, "total_tokens": 27641184} +{"current_steps": 41015, "total_steps": 204665, "loss": 0.0264, "lr": 1.9392218267812687e-06, "epoch": 1.0020032736422935, "percentage": 20.04, "elapsed_time": "0:53:14", "remaining_time": "3:32:28", "throughput": 8652.5, "total_tokens": 27644704} +{"current_steps": 41020, "total_steps": 204665, "loss": 0.0347, "lr": 1.939192546638476e-06, "epoch": 1.0021254244741407, "percentage": 20.04, "elapsed_time": "0:53:15", "remaining_time": "3:32:27", "throughput": 8652.64, "total_tokens": 27648160} +{"current_steps": 41025, "total_steps": 204665, "loss": 0.0593, "lr": 1.9391632596656224e-06, "epoch": 1.0022475753059878, "percentage": 20.04, "elapsed_time": "0:53:15", "remaining_time": "3:32:26", "throughput": 8652.69, "total_tokens": 27651296} +{"current_steps": 41030, "total_steps": 204665, "loss": 0.0681, "lr": 1.9391339658629212e-06, "epoch": 1.002369726137835, "percentage": 20.05, "elapsed_time": "0:53:16", "remaining_time": "3:32:26", "throughput": 8652.72, "total_tokens": 27654304} +{"current_steps": 41035, "total_steps": 204665, "loss": 0.083, "lr": 1.939104665230585e-06, "epoch": 1.0024918769696822, "percentage": 20.05, "elapsed_time": "0:53:16", "remaining_time": "3:32:25", "throughput": 8652.73, "total_tokens": 27657248} +{"current_steps": 41040, "total_steps": 204665, "loss": 0.1933, "lr": 1.939075357768827e-06, "epoch": 1.0026140278015294, "percentage": 20.05, "elapsed_time": "0:53:16", "remaining_time": "3:32:25", "throughput": 8652.83, "total_tokens": 27660576} +{"current_steps": 41045, "total_steps": 204665, "loss": 0.0077, "lr": 1.9390460434778607e-06, "epoch": 1.0027361786333764, "percentage": 20.05, "elapsed_time": "0:53:17", "remaining_time": "3:32:24", "throughput": 8652.96, "total_tokens": 27664032} +{"current_steps": 41050, "total_steps": 204665, "loss": 0.0751, "lr": 1.9390167223578984e-06, "epoch": 1.0028583294652236, "percentage": 20.06, "elapsed_time": "0:53:17", "remaining_time": "3:32:24", "throughput": 8653.06, "total_tokens": 27667360} +{"current_steps": 41055, "total_steps": 204665, "loss": 0.0029, "lr": 1.9389873944091544e-06, "epoch": 1.0029804802970708, "percentage": 20.06, "elapsed_time": "0:53:17", "remaining_time": "3:32:23", "throughput": 8653.28, "total_tokens": 27671136} +{"current_steps": 41060, "total_steps": 204665, "loss": 0.0609, "lr": 1.9389580596318417e-06, "epoch": 1.003102631128918, "percentage": 20.06, "elapsed_time": "0:53:18", "remaining_time": "3:32:23", "throughput": 8653.51, "total_tokens": 27674976} +{"current_steps": 41065, "total_steps": 204665, "loss": 0.0066, "lr": 1.9389287180261733e-06, "epoch": 1.0032247819607651, "percentage": 20.06, "elapsed_time": "0:53:18", "remaining_time": "3:32:22", "throughput": 8653.7, "total_tokens": 27678624} +{"current_steps": 41070, "total_steps": 204665, "loss": 0.0896, "lr": 1.9388993695923627e-06, "epoch": 1.0033469327926123, "percentage": 20.07, "elapsed_time": "0:53:18", "remaining_time": "3:32:21", "throughput": 8653.79, "total_tokens": 27681888} +{"current_steps": 41075, "total_steps": 204665, "loss": 0.0598, "lr": 1.938870014330623e-06, "epoch": 1.0034690836244595, "percentage": 20.07, "elapsed_time": "0:53:19", "remaining_time": "3:32:21", "throughput": 8654.02, "total_tokens": 27685728} +{"current_steps": 41080, "total_steps": 204665, "loss": 0.1375, "lr": 1.938840652241168e-06, "epoch": 1.0035912344563067, "percentage": 20.07, "elapsed_time": "0:53:19", "remaining_time": "3:32:20", "throughput": 8654.2, "total_tokens": 27689376} +{"current_steps": 41085, "total_steps": 204665, "loss": 0.1238, "lr": 1.938811283324212e-06, "epoch": 1.003713385288154, "percentage": 20.07, "elapsed_time": "0:53:19", "remaining_time": "3:32:20", "throughput": 8654.29, "total_tokens": 27692704} +{"current_steps": 41090, "total_steps": 204665, "loss": 0.0372, "lr": 1.9387819075799674e-06, "epoch": 1.0038355361200009, "percentage": 20.08, "elapsed_time": "0:53:20", "remaining_time": "3:32:19", "throughput": 8654.44, "total_tokens": 27696224} +{"current_steps": 41095, "total_steps": 204665, "loss": 0.0507, "lr": 1.9387525250086482e-06, "epoch": 1.003957686951848, "percentage": 20.08, "elapsed_time": "0:53:20", "remaining_time": "3:32:19", "throughput": 8654.51, "total_tokens": 27699424} +{"current_steps": 41100, "total_steps": 204665, "loss": 0.0971, "lr": 1.938723135610468e-06, "epoch": 1.0040798377836953, "percentage": 20.08, "elapsed_time": "0:53:20", "remaining_time": "3:32:18", "throughput": 8654.56, "total_tokens": 27702560} +{"current_steps": 41105, "total_steps": 204665, "loss": 0.1414, "lr": 1.938693739385641e-06, "epoch": 1.0042019886155424, "percentage": 20.08, "elapsed_time": "0:53:21", "remaining_time": "3:32:18", "throughput": 8654.79, "total_tokens": 27706400} +{"current_steps": 41110, "total_steps": 204665, "loss": 0.0009, "lr": 1.9386643363343806e-06, "epoch": 1.0043241394473896, "percentage": 20.09, "elapsed_time": "0:53:21", "remaining_time": "3:32:17", "throughput": 8654.89, "total_tokens": 27709792} +{"current_steps": 41115, "total_steps": 204665, "loss": 0.0653, "lr": 1.9386349264569004e-06, "epoch": 1.0044462902792368, "percentage": 20.09, "elapsed_time": "0:53:21", "remaining_time": "3:32:17", "throughput": 8654.95, "total_tokens": 27712992} +{"current_steps": 41120, "total_steps": 204665, "loss": 0.0738, "lr": 1.938605509753415e-06, "epoch": 1.004568441111084, "percentage": 20.09, "elapsed_time": "0:53:22", "remaining_time": "3:32:16", "throughput": 8655.08, "total_tokens": 27716448} +{"current_steps": 41125, "total_steps": 204665, "loss": 0.0532, "lr": 1.9385760862241374e-06, "epoch": 1.0046905919429312, "percentage": 20.09, "elapsed_time": "0:53:22", "remaining_time": "3:32:15", "throughput": 8655.13, "total_tokens": 27719584} +{"current_steps": 41130, "total_steps": 204665, "loss": 0.0434, "lr": 1.9385466558692825e-06, "epoch": 1.0048127427747784, "percentage": 20.1, "elapsed_time": "0:53:23", "remaining_time": "3:32:15", "throughput": 8655.38, "total_tokens": 27723552} +{"current_steps": 41135, "total_steps": 204665, "loss": 0.002, "lr": 1.9385172186890636e-06, "epoch": 1.0049348936066254, "percentage": 20.1, "elapsed_time": "0:53:23", "remaining_time": "3:32:14", "throughput": 8655.44, "total_tokens": 27726752} +{"current_steps": 41140, "total_steps": 204665, "loss": 0.1055, "lr": 1.938487774683695e-06, "epoch": 1.0050570444384725, "percentage": 20.1, "elapsed_time": "0:53:23", "remaining_time": "3:32:14", "throughput": 8655.59, "total_tokens": 27730336} +{"current_steps": 41145, "total_steps": 204665, "loss": 0.0844, "lr": 1.938458323853391e-06, "epoch": 1.0051791952703197, "percentage": 20.1, "elapsed_time": "0:53:24", "remaining_time": "3:32:13", "throughput": 8655.64, "total_tokens": 27733472} +{"current_steps": 41150, "total_steps": 204665, "loss": 0.0021, "lr": 1.9384288661983656e-06, "epoch": 1.005301346102167, "percentage": 20.11, "elapsed_time": "0:53:24", "remaining_time": "3:32:13", "throughput": 8655.77, "total_tokens": 27736928} +{"current_steps": 41155, "total_steps": 204665, "loss": 0.1801, "lr": 1.938399401718833e-06, "epoch": 1.0054234969340141, "percentage": 20.11, "elapsed_time": "0:53:24", "remaining_time": "3:32:12", "throughput": 8655.8, "total_tokens": 27740000} +{"current_steps": 41160, "total_steps": 204665, "loss": 0.0933, "lr": 1.938369930415008e-06, "epoch": 1.0055456477658613, "percentage": 20.11, "elapsed_time": "0:53:25", "remaining_time": "3:32:12", "throughput": 8655.82, "total_tokens": 27743008} +{"current_steps": 41165, "total_steps": 204665, "loss": 0.1244, "lr": 1.938340452287104e-06, "epoch": 1.0056677985977085, "percentage": 20.11, "elapsed_time": "0:53:25", "remaining_time": "3:32:11", "throughput": 8655.94, "total_tokens": 27746464} +{"current_steps": 41170, "total_steps": 204665, "loss": 0.0529, "lr": 1.938310967335336e-06, "epoch": 1.0057899494295557, "percentage": 20.12, "elapsed_time": "0:53:25", "remaining_time": "3:32:11", "throughput": 8656.08, "total_tokens": 27749920} +{"current_steps": 41175, "total_steps": 204665, "loss": 0.04, "lr": 1.9382814755599184e-06, "epoch": 1.0059121002614029, "percentage": 20.12, "elapsed_time": "0:53:26", "remaining_time": "3:32:10", "throughput": 8656.16, "total_tokens": 27753248} +{"current_steps": 41180, "total_steps": 204665, "loss": 0.0037, "lr": 1.938251976961065e-06, "epoch": 1.0060342510932498, "percentage": 20.12, "elapsed_time": "0:53:26", "remaining_time": "3:32:09", "throughput": 8656.24, "total_tokens": 27756512} +{"current_steps": 41185, "total_steps": 204665, "loss": 0.1446, "lr": 1.9382224715389914e-06, "epoch": 1.006156401925097, "percentage": 20.12, "elapsed_time": "0:53:26", "remaining_time": "3:32:09", "throughput": 8656.31, "total_tokens": 27759712} +{"current_steps": 41190, "total_steps": 204665, "loss": 0.0467, "lr": 1.938192959293912e-06, "epoch": 1.0062785527569442, "percentage": 20.13, "elapsed_time": "0:53:27", "remaining_time": "3:32:08", "throughput": 8656.48, "total_tokens": 27763360} +{"current_steps": 41195, "total_steps": 204665, "loss": 0.0773, "lr": 1.9381634402260403e-06, "epoch": 1.0064007035887914, "percentage": 20.13, "elapsed_time": "0:53:27", "remaining_time": "3:32:08", "throughput": 8656.66, "total_tokens": 27767008} +{"current_steps": 41200, "total_steps": 204665, "loss": 0.0522, "lr": 1.938133914335592e-06, "epoch": 1.0065228544206386, "percentage": 20.13, "elapsed_time": "0:53:27", "remaining_time": "3:32:07", "throughput": 8656.81, "total_tokens": 27770528} +{"current_steps": 41205, "total_steps": 204665, "loss": 0.081, "lr": 1.9381043816227812e-06, "epoch": 1.0066450052524858, "percentage": 20.13, "elapsed_time": "0:53:28", "remaining_time": "3:32:07", "throughput": 8656.85, "total_tokens": 27773600} +{"current_steps": 41210, "total_steps": 204665, "loss": 0.0333, "lr": 1.9380748420878235e-06, "epoch": 1.006767156084333, "percentage": 20.14, "elapsed_time": "0:53:28", "remaining_time": "3:32:06", "throughput": 8656.92, "total_tokens": 27776800} +{"current_steps": 41215, "total_steps": 204665, "loss": 0.003, "lr": 1.938045295730933e-06, "epoch": 1.0068893069161802, "percentage": 20.14, "elapsed_time": "0:53:28", "remaining_time": "3:32:06", "throughput": 8656.96, "total_tokens": 27779872} +{"current_steps": 41220, "total_steps": 204665, "loss": 0.07, "lr": 1.9380157425523252e-06, "epoch": 1.0070114577480274, "percentage": 20.14, "elapsed_time": "0:53:29", "remaining_time": "3:32:05", "throughput": 8657.08, "total_tokens": 27783264} +{"current_steps": 41225, "total_steps": 204665, "loss": 0.1728, "lr": 1.937986182552214e-06, "epoch": 1.0071336085798743, "percentage": 20.14, "elapsed_time": "0:53:29", "remaining_time": "3:32:04", "throughput": 8657.03, "total_tokens": 27785952} +{"current_steps": 41230, "total_steps": 204665, "loss": 0.0718, "lr": 1.9379566157308156e-06, "epoch": 1.0072557594117215, "percentage": 20.15, "elapsed_time": "0:53:29", "remaining_time": "3:32:04", "throughput": 8657.02, "total_tokens": 27788832} +{"current_steps": 41235, "total_steps": 204665, "loss": 0.0436, "lr": 1.937927042088344e-06, "epoch": 1.0073779102435687, "percentage": 20.15, "elapsed_time": "0:53:30", "remaining_time": "3:32:03", "throughput": 8657.24, "total_tokens": 27792672} +{"current_steps": 41240, "total_steps": 204665, "loss": 0.0749, "lr": 1.937897461625015e-06, "epoch": 1.007500061075416, "percentage": 20.15, "elapsed_time": "0:53:30", "remaining_time": "3:32:03", "throughput": 8657.33, "total_tokens": 27796000} +{"current_steps": 41245, "total_steps": 204665, "loss": 0.0926, "lr": 1.9378678743410432e-06, "epoch": 1.007622211907263, "percentage": 20.15, "elapsed_time": "0:53:31", "remaining_time": "3:32:02", "throughput": 8657.47, "total_tokens": 27799520} +{"current_steps": 41250, "total_steps": 204665, "loss": 0.1212, "lr": 1.937838280236644e-06, "epoch": 1.0077443627391103, "percentage": 20.15, "elapsed_time": "0:53:31", "remaining_time": "3:32:02", "throughput": 8657.54, "total_tokens": 27802720} +{"current_steps": 41255, "total_steps": 204665, "loss": 0.0305, "lr": 1.9378086793120323e-06, "epoch": 1.0078665135709575, "percentage": 20.16, "elapsed_time": "0:53:31", "remaining_time": "3:32:01", "throughput": 8657.69, "total_tokens": 27806304} +{"current_steps": 41260, "total_steps": 204665, "loss": 0.0022, "lr": 1.937779071567424e-06, "epoch": 1.0079886644028047, "percentage": 20.16, "elapsed_time": "0:53:32", "remaining_time": "3:32:01", "throughput": 8657.92, "total_tokens": 27810272} +{"current_steps": 41265, "total_steps": 204665, "loss": 0.0504, "lr": 1.937749457003034e-06, "epoch": 1.0081108152346518, "percentage": 20.16, "elapsed_time": "0:53:32", "remaining_time": "3:32:00", "throughput": 8658.07, "total_tokens": 27813792} +{"current_steps": 41270, "total_steps": 204665, "loss": 0.0024, "lr": 1.9377198356190775e-06, "epoch": 1.0082329660664988, "percentage": 20.16, "elapsed_time": "0:53:32", "remaining_time": "3:32:00", "throughput": 8658.15, "total_tokens": 27817056} +{"current_steps": 41275, "total_steps": 204665, "loss": 0.1432, "lr": 1.93769020741577e-06, "epoch": 1.008355116898346, "percentage": 20.17, "elapsed_time": "0:53:33", "remaining_time": "3:31:59", "throughput": 8658.23, "total_tokens": 27820384} +{"current_steps": 41280, "total_steps": 204665, "loss": 0.2132, "lr": 1.937660572393328e-06, "epoch": 1.0084772677301932, "percentage": 20.17, "elapsed_time": "0:53:33", "remaining_time": "3:31:59", "throughput": 8658.36, "total_tokens": 27823904} +{"current_steps": 41285, "total_steps": 204665, "loss": 0.1263, "lr": 1.9376309305519653e-06, "epoch": 1.0085994185620404, "percentage": 20.17, "elapsed_time": "0:53:33", "remaining_time": "3:31:58", "throughput": 8658.41, "total_tokens": 27827104} +{"current_steps": 41290, "total_steps": 204665, "loss": 0.0712, "lr": 1.9376012818918984e-06, "epoch": 1.0087215693938876, "percentage": 20.17, "elapsed_time": "0:53:34", "remaining_time": "3:31:57", "throughput": 8658.56, "total_tokens": 27830624} +{"current_steps": 41295, "total_steps": 204665, "loss": 0.037, "lr": 1.937571626413343e-06, "epoch": 1.0088437202257348, "percentage": 20.18, "elapsed_time": "0:53:34", "remaining_time": "3:31:57", "throughput": 8658.58, "total_tokens": 27833696} +{"current_steps": 41300, "total_steps": 204665, "loss": 0.0138, "lr": 1.9375419641165143e-06, "epoch": 1.008965871057582, "percentage": 20.18, "elapsed_time": "0:53:34", "remaining_time": "3:31:56", "throughput": 8658.8, "total_tokens": 27837536} +{"current_steps": 41305, "total_steps": 204665, "loss": 0.1085, "lr": 1.9375122950016287e-06, "epoch": 1.0090880218894291, "percentage": 20.18, "elapsed_time": "0:53:35", "remaining_time": "3:31:56", "throughput": 8658.95, "total_tokens": 27841056} +{"current_steps": 41310, "total_steps": 204665, "loss": 0.0351, "lr": 1.9374826190689013e-06, "epoch": 1.0092101727212763, "percentage": 20.18, "elapsed_time": "0:53:35", "remaining_time": "3:31:55", "throughput": 8659.05, "total_tokens": 27844448} +{"current_steps": 41315, "total_steps": 204665, "loss": 0.0918, "lr": 1.937452936318548e-06, "epoch": 1.0093323235531233, "percentage": 20.19, "elapsed_time": "0:53:35", "remaining_time": "3:31:55", "throughput": 8659.07, "total_tokens": 27847520} +{"current_steps": 41320, "total_steps": 204665, "loss": 0.0846, "lr": 1.937423246750785e-06, "epoch": 1.0094544743849705, "percentage": 20.19, "elapsed_time": "0:53:36", "remaining_time": "3:31:54", "throughput": 8659.18, "total_tokens": 27850976} +{"current_steps": 41325, "total_steps": 204665, "loss": 0.0609, "lr": 1.937393550365828e-06, "epoch": 1.0095766252168177, "percentage": 20.19, "elapsed_time": "0:53:36", "remaining_time": "3:31:54", "throughput": 8659.15, "total_tokens": 27853856} +{"current_steps": 41330, "total_steps": 204665, "loss": 0.0724, "lr": 1.9373638471638925e-06, "epoch": 1.0096987760486649, "percentage": 20.19, "elapsed_time": "0:53:37", "remaining_time": "3:31:53", "throughput": 8659.29, "total_tokens": 27857376} +{"current_steps": 41335, "total_steps": 204665, "loss": 0.0478, "lr": 1.9373341371451956e-06, "epoch": 1.009820926880512, "percentage": 20.2, "elapsed_time": "0:53:37", "remaining_time": "3:31:53", "throughput": 8659.37, "total_tokens": 27860704} +{"current_steps": 41340, "total_steps": 204665, "loss": 0.279, "lr": 1.9373044203099527e-06, "epoch": 1.0099430777123592, "percentage": 20.2, "elapsed_time": "0:53:37", "remaining_time": "3:31:52", "throughput": 8659.47, "total_tokens": 27864096} +{"current_steps": 41345, "total_steps": 204665, "loss": 0.0546, "lr": 1.93727469665838e-06, "epoch": 1.0100652285442064, "percentage": 20.2, "elapsed_time": "0:53:38", "remaining_time": "3:31:52", "throughput": 8659.6, "total_tokens": 27867616} +{"current_steps": 41350, "total_steps": 204665, "loss": 0.2221, "lr": 1.937244966190693e-06, "epoch": 1.0101873793760536, "percentage": 20.2, "elapsed_time": "0:53:38", "remaining_time": "3:31:51", "throughput": 8659.84, "total_tokens": 27871520} +{"current_steps": 41355, "total_steps": 204665, "loss": 0.0076, "lr": 1.937215228907109e-06, "epoch": 1.0103095302079008, "percentage": 20.21, "elapsed_time": "0:53:38", "remaining_time": "3:31:51", "throughput": 8659.88, "total_tokens": 27874720} +{"current_steps": 41360, "total_steps": 204665, "loss": 0.0873, "lr": 1.9371854848078434e-06, "epoch": 1.0104316810397478, "percentage": 20.21, "elapsed_time": "0:53:39", "remaining_time": "3:31:50", "throughput": 8659.85, "total_tokens": 27877600} +{"current_steps": 41365, "total_steps": 204665, "loss": 0.1743, "lr": 1.9371557338931133e-06, "epoch": 1.010553831871595, "percentage": 20.21, "elapsed_time": "0:53:39", "remaining_time": "3:31:50", "throughput": 8659.99, "total_tokens": 27881120} +{"current_steps": 41370, "total_steps": 204665, "loss": 0.1012, "lr": 1.9371259761631346e-06, "epoch": 1.0106759827034422, "percentage": 20.21, "elapsed_time": "0:53:39", "remaining_time": "3:31:49", "throughput": 8660.17, "total_tokens": 27884832} +{"current_steps": 41375, "total_steps": 204665, "loss": 0.0264, "lr": 1.9370962116181235e-06, "epoch": 1.0107981335352894, "percentage": 20.22, "elapsed_time": "0:53:40", "remaining_time": "3:31:48", "throughput": 8660.23, "total_tokens": 27888096} +{"current_steps": 41380, "total_steps": 204665, "loss": 0.2076, "lr": 1.9370664402582966e-06, "epoch": 1.0109202843671365, "percentage": 20.22, "elapsed_time": "0:53:40", "remaining_time": "3:31:48", "throughput": 8660.18, "total_tokens": 27890912} +{"current_steps": 41385, "total_steps": 204665, "loss": 0.0556, "lr": 1.937036662083871e-06, "epoch": 1.0110424351989837, "percentage": 20.22, "elapsed_time": "0:53:40", "remaining_time": "3:31:47", "throughput": 8660.23, "total_tokens": 27894176} +{"current_steps": 41390, "total_steps": 204665, "loss": 0.1091, "lr": 1.9370068770950626e-06, "epoch": 1.011164586030831, "percentage": 20.22, "elapsed_time": "0:53:41", "remaining_time": "3:31:47", "throughput": 8660.29, "total_tokens": 27897376} +{"current_steps": 41395, "total_steps": 204665, "loss": 0.0532, "lr": 1.936977085292088e-06, "epoch": 1.0112867368626781, "percentage": 20.23, "elapsed_time": "0:53:41", "remaining_time": "3:31:46", "throughput": 8660.4, "total_tokens": 27900832} +{"current_steps": 41400, "total_steps": 204665, "loss": 0.0966, "lr": 1.936947286675164e-06, "epoch": 1.0114088876945253, "percentage": 20.23, "elapsed_time": "0:53:42", "remaining_time": "3:31:46", "throughput": 8660.51, "total_tokens": 27904288} +{"current_steps": 41405, "total_steps": 204665, "loss": 0.0164, "lr": 1.9369174812445073e-06, "epoch": 1.0115310385263723, "percentage": 20.23, "elapsed_time": "0:53:42", "remaining_time": "3:31:45", "throughput": 8660.62, "total_tokens": 27907744} +{"current_steps": 41410, "total_steps": 204665, "loss": 0.0632, "lr": 1.9368876690003347e-06, "epoch": 1.0116531893582195, "percentage": 20.23, "elapsed_time": "0:53:42", "remaining_time": "3:31:45", "throughput": 8660.79, "total_tokens": 27911456} +{"current_steps": 41415, "total_steps": 204665, "loss": 0.044, "lr": 1.936857849942863e-06, "epoch": 1.0117753401900667, "percentage": 20.24, "elapsed_time": "0:53:43", "remaining_time": "3:31:44", "throughput": 8660.9, "total_tokens": 27914912} +{"current_steps": 41420, "total_steps": 204665, "loss": 0.1014, "lr": 1.9368280240723093e-06, "epoch": 1.0118974910219138, "percentage": 20.24, "elapsed_time": "0:53:43", "remaining_time": "3:31:44", "throughput": 8660.88, "total_tokens": 27917856} +{"current_steps": 41425, "total_steps": 204665, "loss": 0.0542, "lr": 1.93679819138889e-06, "epoch": 1.012019641853761, "percentage": 20.24, "elapsed_time": "0:53:43", "remaining_time": "3:31:43", "throughput": 8661.11, "total_tokens": 27921760} +{"current_steps": 41430, "total_steps": 204665, "loss": 0.0742, "lr": 1.9367683518928226e-06, "epoch": 1.0121417926856082, "percentage": 20.24, "elapsed_time": "0:53:44", "remaining_time": "3:31:43", "throughput": 8661.26, "total_tokens": 27925344} +{"current_steps": 41435, "total_steps": 204665, "loss": 0.1166, "lr": 1.9367385055843234e-06, "epoch": 1.0122639435174554, "percentage": 20.25, "elapsed_time": "0:53:44", "remaining_time": "3:31:42", "throughput": 8661.33, "total_tokens": 27928672} +{"current_steps": 41440, "total_steps": 204665, "loss": 0.0719, "lr": 1.93670865246361e-06, "epoch": 1.0123860943493026, "percentage": 20.25, "elapsed_time": "0:53:44", "remaining_time": "3:31:42", "throughput": 8661.39, "total_tokens": 27931872} +{"current_steps": 41445, "total_steps": 204665, "loss": 0.0874, "lr": 1.9366787925308992e-06, "epoch": 1.0125082451811498, "percentage": 20.25, "elapsed_time": "0:53:45", "remaining_time": "3:31:41", "throughput": 8661.49, "total_tokens": 27935264} +{"current_steps": 41450, "total_steps": 204665, "loss": 0.0223, "lr": 1.9366489257864084e-06, "epoch": 1.0126303960129968, "percentage": 20.25, "elapsed_time": "0:53:45", "remaining_time": "3:31:41", "throughput": 8661.55, "total_tokens": 27938528} +{"current_steps": 41455, "total_steps": 204665, "loss": 0.0771, "lr": 1.9366190522303543e-06, "epoch": 1.012752546844844, "percentage": 20.26, "elapsed_time": "0:53:45", "remaining_time": "3:31:40", "throughput": 8661.71, "total_tokens": 27942112} +{"current_steps": 41460, "total_steps": 204665, "loss": 0.1288, "lr": 1.936589171862955e-06, "epoch": 1.0128746976766911, "percentage": 20.26, "elapsed_time": "0:53:46", "remaining_time": "3:31:40", "throughput": 8661.79, "total_tokens": 27945376} +{"current_steps": 41465, "total_steps": 204665, "loss": 0.0402, "lr": 1.936559284684427e-06, "epoch": 1.0129968485085383, "percentage": 20.26, "elapsed_time": "0:53:46", "remaining_time": "3:31:39", "throughput": 8661.99, "total_tokens": 27949152} +{"current_steps": 41470, "total_steps": 204665, "loss": 0.0106, "lr": 1.9365293906949885e-06, "epoch": 1.0131189993403855, "percentage": 20.26, "elapsed_time": "0:53:46", "remaining_time": "3:31:39", "throughput": 8662.06, "total_tokens": 27952416} +{"current_steps": 41475, "total_steps": 204665, "loss": 0.1945, "lr": 1.9364994898948557e-06, "epoch": 1.0132411501722327, "percentage": 20.26, "elapsed_time": "0:53:47", "remaining_time": "3:31:38", "throughput": 8662.19, "total_tokens": 27955936} +{"current_steps": 41480, "total_steps": 204665, "loss": 0.1181, "lr": 1.9364695822842473e-06, "epoch": 1.01336330100408, "percentage": 20.27, "elapsed_time": "0:53:47", "remaining_time": "3:31:37", "throughput": 8662.3, "total_tokens": 27959328} +{"current_steps": 41485, "total_steps": 204665, "loss": 0.0289, "lr": 1.93643966786338e-06, "epoch": 1.013485451835927, "percentage": 20.27, "elapsed_time": "0:53:48", "remaining_time": "3:31:37", "throughput": 8662.44, "total_tokens": 27962912} +{"current_steps": 41490, "total_steps": 204665, "loss": 0.1157, "lr": 1.9364097466324717e-06, "epoch": 1.0136076026677743, "percentage": 20.27, "elapsed_time": "0:53:48", "remaining_time": "3:31:36", "throughput": 8662.51, "total_tokens": 27966176} +{"current_steps": 41495, "total_steps": 204665, "loss": 0.0885, "lr": 1.9363798185917394e-06, "epoch": 1.0137297534996212, "percentage": 20.27, "elapsed_time": "0:53:48", "remaining_time": "3:31:36", "throughput": 8662.58, "total_tokens": 27969440} +{"current_steps": 41500, "total_steps": 204665, "loss": 0.1333, "lr": 1.936349883741402e-06, "epoch": 1.0138519043314684, "percentage": 20.28, "elapsed_time": "0:53:49", "remaining_time": "3:31:35", "throughput": 8662.64, "total_tokens": 27972704} +{"current_steps": 41505, "total_steps": 204665, "loss": 0.0017, "lr": 1.9363199420816753e-06, "epoch": 1.0139740551633156, "percentage": 20.28, "elapsed_time": "0:53:49", "remaining_time": "3:31:35", "throughput": 8662.69, "total_tokens": 27975904} +{"current_steps": 41510, "total_steps": 204665, "loss": 0.0493, "lr": 1.936289993612779e-06, "epoch": 1.0140962059951628, "percentage": 20.28, "elapsed_time": "0:53:49", "remaining_time": "3:31:34", "throughput": 8662.82, "total_tokens": 27979424} +{"current_steps": 41515, "total_steps": 204665, "loss": 0.0614, "lr": 1.9362600383349296e-06, "epoch": 1.01421835682701, "percentage": 20.28, "elapsed_time": "0:53:50", "remaining_time": "3:31:34", "throughput": 8662.89, "total_tokens": 27982752} +{"current_steps": 41520, "total_steps": 204665, "loss": 0.0367, "lr": 1.936230076248346e-06, "epoch": 1.0143405076588572, "percentage": 20.29, "elapsed_time": "0:53:50", "remaining_time": "3:31:33", "throughput": 8662.98, "total_tokens": 27986080} +{"current_steps": 41525, "total_steps": 204665, "loss": 0.1619, "lr": 1.9362001073532448e-06, "epoch": 1.0144626584907044, "percentage": 20.29, "elapsed_time": "0:53:50", "remaining_time": "3:31:33", "throughput": 8663.04, "total_tokens": 27989344} +{"current_steps": 41530, "total_steps": 204665, "loss": 0.1091, "lr": 1.936170131649845e-06, "epoch": 1.0145848093225516, "percentage": 20.29, "elapsed_time": "0:53:51", "remaining_time": "3:31:32", "throughput": 8663.26, "total_tokens": 27993184} +{"current_steps": 41535, "total_steps": 204665, "loss": 0.009, "lr": 1.936140149138364e-06, "epoch": 1.0147069601543985, "percentage": 20.29, "elapsed_time": "0:53:51", "remaining_time": "3:31:32", "throughput": 8663.35, "total_tokens": 27996512} +{"current_steps": 41540, "total_steps": 204665, "loss": 0.0661, "lr": 1.93611015981902e-06, "epoch": 1.0148291109862457, "percentage": 20.3, "elapsed_time": "0:53:51", "remaining_time": "3:31:31", "throughput": 8663.44, "total_tokens": 27999840} +{"current_steps": 41545, "total_steps": 204665, "loss": 0.0219, "lr": 1.936080163692031e-06, "epoch": 1.014951261818093, "percentage": 20.3, "elapsed_time": "0:53:52", "remaining_time": "3:31:31", "throughput": 8663.56, "total_tokens": 28003296} +{"current_steps": 41550, "total_steps": 204665, "loss": 0.0188, "lr": 1.9360501607576155e-06, "epoch": 1.01507341264994, "percentage": 20.3, "elapsed_time": "0:53:52", "remaining_time": "3:31:30", "throughput": 8663.69, "total_tokens": 28006816} +{"current_steps": 41555, "total_steps": 204665, "loss": 0.0162, "lr": 1.9360201510159917e-06, "epoch": 1.0151955634817873, "percentage": 20.3, "elapsed_time": "0:53:53", "remaining_time": "3:31:30", "throughput": 8663.78, "total_tokens": 28010144} +{"current_steps": 41560, "total_steps": 204665, "loss": 0.0835, "lr": 1.9359901344673773e-06, "epoch": 1.0153177143136345, "percentage": 20.31, "elapsed_time": "0:53:53", "remaining_time": "3:31:29", "throughput": 8663.83, "total_tokens": 28013344} +{"current_steps": 41565, "total_steps": 204665, "loss": 0.08, "lr": 1.935960111111991e-06, "epoch": 1.0154398651454817, "percentage": 20.31, "elapsed_time": "0:53:53", "remaining_time": "3:31:29", "throughput": 8663.95, "total_tokens": 28016864} +{"current_steps": 41570, "total_steps": 204665, "loss": 0.0109, "lr": 1.935930080950051e-06, "epoch": 1.0155620159773289, "percentage": 20.31, "elapsed_time": "0:53:54", "remaining_time": "3:31:28", "throughput": 8664.01, "total_tokens": 28020128} +{"current_steps": 41575, "total_steps": 204665, "loss": 0.0855, "lr": 1.9359000439817758e-06, "epoch": 1.015684166809176, "percentage": 20.31, "elapsed_time": "0:53:54", "remaining_time": "3:31:28", "throughput": 8664.11, "total_tokens": 28023520} +{"current_steps": 41580, "total_steps": 204665, "loss": 0.2021, "lr": 1.9358700002073833e-06, "epoch": 1.015806317641023, "percentage": 20.32, "elapsed_time": "0:53:54", "remaining_time": "3:31:27", "throughput": 8664.14, "total_tokens": 28026592} +{"current_steps": 41585, "total_steps": 204665, "loss": 0.0022, "lr": 1.935839949627093e-06, "epoch": 1.0159284684728702, "percentage": 20.32, "elapsed_time": "0:53:55", "remaining_time": "3:31:26", "throughput": 8664.24, "total_tokens": 28029920} +{"current_steps": 41590, "total_steps": 204665, "loss": 0.0185, "lr": 1.9358098922411224e-06, "epoch": 1.0160506193047174, "percentage": 20.32, "elapsed_time": "0:53:55", "remaining_time": "3:31:26", "throughput": 8664.36, "total_tokens": 28033312} +{"current_steps": 41595, "total_steps": 204665, "loss": 0.1054, "lr": 1.935779828049691e-06, "epoch": 1.0161727701365646, "percentage": 20.32, "elapsed_time": "0:53:55", "remaining_time": "3:31:25", "throughput": 8664.47, "total_tokens": 28036704} +{"current_steps": 41600, "total_steps": 204665, "loss": 0.1035, "lr": 1.935749757053017e-06, "epoch": 1.0162949209684118, "percentage": 20.33, "elapsed_time": "0:53:56", "remaining_time": "3:31:25", "throughput": 8664.69, "total_tokens": 28040544} +{"current_steps": 41605, "total_steps": 204665, "loss": 0.1913, "lr": 1.9357196792513188e-06, "epoch": 1.016417071800259, "percentage": 20.33, "elapsed_time": "0:53:56", "remaining_time": "3:31:24", "throughput": 8664.84, "total_tokens": 28044064} +{"current_steps": 41610, "total_steps": 204665, "loss": 0.0951, "lr": 1.9356895946448154e-06, "epoch": 1.0165392226321062, "percentage": 20.33, "elapsed_time": "0:53:56", "remaining_time": "3:31:24", "throughput": 8665.04, "total_tokens": 28047776} +{"current_steps": 41615, "total_steps": 204665, "loss": 0.1271, "lr": 1.9356595032337257e-06, "epoch": 1.0166613734639534, "percentage": 20.33, "elapsed_time": "0:53:57", "remaining_time": "3:31:23", "throughput": 8665.09, "total_tokens": 28050912} +{"current_steps": 41620, "total_steps": 204665, "loss": 0.1427, "lr": 1.935629405018269e-06, "epoch": 1.0167835242958005, "percentage": 20.34, "elapsed_time": "0:53:57", "remaining_time": "3:31:23", "throughput": 8665.2, "total_tokens": 28054304} +{"current_steps": 41625, "total_steps": 204665, "loss": 0.1011, "lr": 1.9355992999986627e-06, "epoch": 1.0169056751276475, "percentage": 20.34, "elapsed_time": "0:53:57", "remaining_time": "3:31:22", "throughput": 8665.42, "total_tokens": 28058144} +{"current_steps": 41630, "total_steps": 204665, "loss": 0.1637, "lr": 1.9355691881751272e-06, "epoch": 1.0170278259594947, "percentage": 20.34, "elapsed_time": "0:53:58", "remaining_time": "3:31:22", "throughput": 8665.58, "total_tokens": 28061728} +{"current_steps": 41635, "total_steps": 204665, "loss": 0.0612, "lr": 1.9355390695478805e-06, "epoch": 1.017149976791342, "percentage": 20.34, "elapsed_time": "0:53:58", "remaining_time": "3:31:21", "throughput": 8665.76, "total_tokens": 28065376} +{"current_steps": 41640, "total_steps": 204665, "loss": 0.1635, "lr": 1.9355089441171423e-06, "epoch": 1.017272127623189, "percentage": 20.35, "elapsed_time": "0:53:59", "remaining_time": "3:31:21", "throughput": 8665.99, "total_tokens": 28069216} +{"current_steps": 41645, "total_steps": 204665, "loss": 0.0512, "lr": 1.935478811883131e-06, "epoch": 1.0173942784550363, "percentage": 20.35, "elapsed_time": "0:53:59", "remaining_time": "3:31:20", "throughput": 8666.06, "total_tokens": 28072416} +{"current_steps": 41650, "total_steps": 204665, "loss": 0.1672, "lr": 1.935448672846067e-06, "epoch": 1.0175164292868835, "percentage": 20.35, "elapsed_time": "0:53:59", "remaining_time": "3:31:19", "throughput": 8666.09, "total_tokens": 28075488} +{"current_steps": 41655, "total_steps": 204665, "loss": 0.1745, "lr": 1.935418527006168e-06, "epoch": 1.0176385801187307, "percentage": 20.35, "elapsed_time": "0:54:00", "remaining_time": "3:31:19", "throughput": 8666.15, "total_tokens": 28078688} +{"current_steps": 41660, "total_steps": 204665, "loss": 0.0607, "lr": 1.9353883743636542e-06, "epoch": 1.0177607309505778, "percentage": 20.36, "elapsed_time": "0:54:00", "remaining_time": "3:31:18", "throughput": 8666.2, "total_tokens": 28081824} +{"current_steps": 41665, "total_steps": 204665, "loss": 0.2085, "lr": 1.9353582149187444e-06, "epoch": 1.017882881782425, "percentage": 20.36, "elapsed_time": "0:54:00", "remaining_time": "3:31:18", "throughput": 8666.37, "total_tokens": 28085408} +{"current_steps": 41670, "total_steps": 204665, "loss": 0.0043, "lr": 1.935328048671658e-06, "epoch": 1.018005032614272, "percentage": 20.36, "elapsed_time": "0:54:01", "remaining_time": "3:31:17", "throughput": 8666.57, "total_tokens": 28089184} +{"current_steps": 41675, "total_steps": 204665, "loss": 0.0094, "lr": 1.935297875622615e-06, "epoch": 1.0181271834461192, "percentage": 20.36, "elapsed_time": "0:54:01", "remaining_time": "3:31:17", "throughput": 8666.64, "total_tokens": 28092384} +{"current_steps": 41680, "total_steps": 204665, "loss": 0.201, "lr": 1.9352676957718335e-06, "epoch": 1.0182493342779664, "percentage": 20.36, "elapsed_time": "0:54:01", "remaining_time": "3:31:16", "throughput": 8666.7, "total_tokens": 28095520} +{"current_steps": 41685, "total_steps": 204665, "loss": 0.0463, "lr": 1.9352375091195343e-06, "epoch": 1.0183714851098136, "percentage": 20.37, "elapsed_time": "0:54:02", "remaining_time": "3:31:16", "throughput": 8666.74, "total_tokens": 28098656} +{"current_steps": 41690, "total_steps": 204665, "loss": 0.0568, "lr": 1.935207315665936e-06, "epoch": 1.0184936359416608, "percentage": 20.37, "elapsed_time": "0:54:02", "remaining_time": "3:31:15", "throughput": 8667.04, "total_tokens": 28102752} +{"current_steps": 41695, "total_steps": 204665, "loss": 0.048, "lr": 1.935177115411259e-06, "epoch": 1.018615786773508, "percentage": 20.37, "elapsed_time": "0:54:02", "remaining_time": "3:31:15", "throughput": 8667.26, "total_tokens": 28106592} +{"current_steps": 41700, "total_steps": 204665, "loss": 0.0385, "lr": 1.9351469083557223e-06, "epoch": 1.0187379376053551, "percentage": 20.37, "elapsed_time": "0:54:03", "remaining_time": "3:31:14", "throughput": 8667.33, "total_tokens": 28109856} +{"current_steps": 41705, "total_steps": 204665, "loss": 0.0921, "lr": 1.935116694499546e-06, "epoch": 1.0188600884372023, "percentage": 20.38, "elapsed_time": "0:54:03", "remaining_time": "3:31:13", "throughput": 8667.41, "total_tokens": 28113120} +{"current_steps": 41710, "total_steps": 204665, "loss": 0.0033, "lr": 1.9350864738429493e-06, "epoch": 1.0189822392690495, "percentage": 20.38, "elapsed_time": "0:54:03", "remaining_time": "3:31:13", "throughput": 8667.51, "total_tokens": 28116448} +{"current_steps": 41715, "total_steps": 204665, "loss": 0.0259, "lr": 1.9350562463861524e-06, "epoch": 1.0191043901008965, "percentage": 20.38, "elapsed_time": "0:54:04", "remaining_time": "3:31:12", "throughput": 8667.68, "total_tokens": 28120096} +{"current_steps": 41720, "total_steps": 204665, "loss": 0.0799, "lr": 1.9350260121293746e-06, "epoch": 1.0192265409327437, "percentage": 20.38, "elapsed_time": "0:54:04", "remaining_time": "3:31:12", "throughput": 8667.83, "total_tokens": 28123616} +{"current_steps": 41725, "total_steps": 204665, "loss": 0.0821, "lr": 1.9349957710728365e-06, "epoch": 1.0193486917645909, "percentage": 20.39, "elapsed_time": "0:54:04", "remaining_time": "3:31:11", "throughput": 8667.91, "total_tokens": 28126880} +{"current_steps": 41730, "total_steps": 204665, "loss": 0.0694, "lr": 1.9349655232167575e-06, "epoch": 1.019470842596438, "percentage": 20.39, "elapsed_time": "0:54:05", "remaining_time": "3:31:11", "throughput": 8668.1, "total_tokens": 28130528} +{"current_steps": 41735, "total_steps": 204665, "loss": 0.1287, "lr": 1.934935268561358e-06, "epoch": 1.0195929934282852, "percentage": 20.39, "elapsed_time": "0:54:05", "remaining_time": "3:31:10", "throughput": 8668.22, "total_tokens": 28133984} +{"current_steps": 41740, "total_steps": 204665, "loss": 0.0028, "lr": 1.9349050071068574e-06, "epoch": 1.0197151442601324, "percentage": 20.39, "elapsed_time": "0:54:06", "remaining_time": "3:31:10", "throughput": 8668.39, "total_tokens": 28137632} +{"current_steps": 41745, "total_steps": 204665, "loss": 0.1427, "lr": 1.9348747388534763e-06, "epoch": 1.0198372950919796, "percentage": 20.4, "elapsed_time": "0:54:06", "remaining_time": "3:31:09", "throughput": 8668.61, "total_tokens": 28141408} +{"current_steps": 41750, "total_steps": 204665, "loss": 0.0793, "lr": 1.9348444638014343e-06, "epoch": 1.0199594459238268, "percentage": 20.4, "elapsed_time": "0:54:06", "remaining_time": "3:31:09", "throughput": 8668.82, "total_tokens": 28145184} +{"current_steps": 41755, "total_steps": 204665, "loss": 0.0018, "lr": 1.9348141819509522e-06, "epoch": 1.020081596755674, "percentage": 20.4, "elapsed_time": "0:54:07", "remaining_time": "3:31:08", "throughput": 8668.88, "total_tokens": 28148384} +{"current_steps": 41760, "total_steps": 204665, "loss": 0.2052, "lr": 1.93478389330225e-06, "epoch": 1.020203747587521, "percentage": 20.4, "elapsed_time": "0:54:07", "remaining_time": "3:31:08", "throughput": 8669.01, "total_tokens": 28151840} +{"current_steps": 41765, "total_steps": 204665, "loss": 0.0889, "lr": 1.934753597855548e-06, "epoch": 1.0203258984193682, "percentage": 20.41, "elapsed_time": "0:54:07", "remaining_time": "3:31:07", "throughput": 8669.09, "total_tokens": 28155104} +{"current_steps": 41770, "total_steps": 204665, "loss": 0.0625, "lr": 1.9347232956110663e-06, "epoch": 1.0204480492512153, "percentage": 20.41, "elapsed_time": "0:54:08", "remaining_time": "3:31:06", "throughput": 8669.08, "total_tokens": 28158048} +{"current_steps": 41775, "total_steps": 204665, "loss": 0.1216, "lr": 1.9346929865690258e-06, "epoch": 1.0205702000830625, "percentage": 20.41, "elapsed_time": "0:54:08", "remaining_time": "3:31:06", "throughput": 8669.2, "total_tokens": 28161440} +{"current_steps": 41780, "total_steps": 204665, "loss": 0.0483, "lr": 1.934662670729646e-06, "epoch": 1.0206923509149097, "percentage": 20.41, "elapsed_time": "0:54:08", "remaining_time": "3:31:05", "throughput": 8669.41, "total_tokens": 28165216} +{"current_steps": 41785, "total_steps": 204665, "loss": 0.0602, "lr": 1.9346323480931475e-06, "epoch": 1.020814501746757, "percentage": 20.42, "elapsed_time": "0:54:09", "remaining_time": "3:31:05", "throughput": 8669.53, "total_tokens": 28168672} +{"current_steps": 41790, "total_steps": 204665, "loss": 0.1541, "lr": 1.934602018659752e-06, "epoch": 1.020936652578604, "percentage": 20.42, "elapsed_time": "0:54:09", "remaining_time": "3:31:04", "throughput": 8669.66, "total_tokens": 28172128} +{"current_steps": 41795, "total_steps": 204665, "loss": 0.1585, "lr": 1.9345716824296787e-06, "epoch": 1.0210588034104513, "percentage": 20.42, "elapsed_time": "0:54:09", "remaining_time": "3:31:04", "throughput": 8670.29, "total_tokens": 28177696} +{"current_steps": 41800, "total_steps": 204665, "loss": 0.0411, "lr": 1.9345413394031487e-06, "epoch": 1.0211809542422985, "percentage": 20.42, "elapsed_time": "0:54:10", "remaining_time": "3:31:03", "throughput": 8670.42, "total_tokens": 28181216} +{"current_steps": 41805, "total_steps": 204665, "loss": 0.0372, "lr": 1.9345109895803834e-06, "epoch": 1.0213031050741455, "percentage": 20.43, "elapsed_time": "0:54:10", "remaining_time": "3:31:03", "throughput": 8670.47, "total_tokens": 28184352} +{"current_steps": 41810, "total_steps": 204665, "loss": 0.0806, "lr": 1.934480632961602e-06, "epoch": 1.0214252559059926, "percentage": 20.43, "elapsed_time": "0:54:10", "remaining_time": "3:31:02", "throughput": 8670.55, "total_tokens": 28187616} +{"current_steps": 41815, "total_steps": 204665, "loss": 0.1387, "lr": 1.9344502695470268e-06, "epoch": 1.0215474067378398, "percentage": 20.43, "elapsed_time": "0:54:11", "remaining_time": "3:31:02", "throughput": 8670.68, "total_tokens": 28191136} +{"current_steps": 41820, "total_steps": 204665, "loss": 0.0032, "lr": 1.9344198993368776e-06, "epoch": 1.021669557569687, "percentage": 20.43, "elapsed_time": "0:54:11", "remaining_time": "3:31:01", "throughput": 8670.74, "total_tokens": 28194336} +{"current_steps": 41825, "total_steps": 204665, "loss": 0.094, "lr": 1.9343895223313753e-06, "epoch": 1.0217917084015342, "percentage": 20.44, "elapsed_time": "0:54:12", "remaining_time": "3:31:01", "throughput": 8670.98, "total_tokens": 28198240} +{"current_steps": 41830, "total_steps": 204665, "loss": 0.105, "lr": 1.9343591385307414e-06, "epoch": 1.0219138592333814, "percentage": 20.44, "elapsed_time": "0:54:12", "remaining_time": "3:31:00", "throughput": 8671.01, "total_tokens": 28201248} +{"current_steps": 41835, "total_steps": 204665, "loss": 0.1136, "lr": 1.9343287479351964e-06, "epoch": 1.0220360100652286, "percentage": 20.44, "elapsed_time": "0:54:12", "remaining_time": "3:31:00", "throughput": 8671.06, "total_tokens": 28204448} +{"current_steps": 41840, "total_steps": 204665, "loss": 0.1168, "lr": 1.9342983505449615e-06, "epoch": 1.0221581608970758, "percentage": 20.44, "elapsed_time": "0:54:13", "remaining_time": "3:30:59", "throughput": 8671.09, "total_tokens": 28207520} +{"current_steps": 41845, "total_steps": 204665, "loss": 0.1676, "lr": 1.9342679463602578e-06, "epoch": 1.022280311728923, "percentage": 20.45, "elapsed_time": "0:54:13", "remaining_time": "3:30:59", "throughput": 8671.18, "total_tokens": 28210848} +{"current_steps": 41850, "total_steps": 204665, "loss": 0.1082, "lr": 1.9342375353813062e-06, "epoch": 1.02240246256077, "percentage": 20.45, "elapsed_time": "0:54:13", "remaining_time": "3:30:58", "throughput": 8671.36, "total_tokens": 28214496} +{"current_steps": 41855, "total_steps": 204665, "loss": 0.0846, "lr": 1.9342071176083276e-06, "epoch": 1.0225246133926171, "percentage": 20.45, "elapsed_time": "0:54:14", "remaining_time": "3:30:58", "throughput": 8671.45, "total_tokens": 28217824} +{"current_steps": 41860, "total_steps": 204665, "loss": 0.0428, "lr": 1.934176693041544e-06, "epoch": 1.0226467642244643, "percentage": 20.45, "elapsed_time": "0:54:14", "remaining_time": "3:30:57", "throughput": 8671.48, "total_tokens": 28220896} +{"current_steps": 41865, "total_steps": 204665, "loss": 0.1289, "lr": 1.9341462616811765e-06, "epoch": 1.0227689150563115, "percentage": 20.46, "elapsed_time": "0:54:14", "remaining_time": "3:30:56", "throughput": 8671.65, "total_tokens": 28224544} +{"current_steps": 41870, "total_steps": 204665, "loss": 0.0307, "lr": 1.9341158235274455e-06, "epoch": 1.0228910658881587, "percentage": 20.46, "elapsed_time": "0:54:15", "remaining_time": "3:30:56", "throughput": 8671.62, "total_tokens": 28227360} +{"current_steps": 41875, "total_steps": 204665, "loss": 0.0926, "lr": 1.9340853785805733e-06, "epoch": 1.023013216720006, "percentage": 20.46, "elapsed_time": "0:54:15", "remaining_time": "3:30:55", "throughput": 8671.74, "total_tokens": 28230752} +{"current_steps": 41880, "total_steps": 204665, "loss": 0.0647, "lr": 1.934054926840781e-06, "epoch": 1.023135367551853, "percentage": 20.46, "elapsed_time": "0:54:15", "remaining_time": "3:30:55", "throughput": 8671.79, "total_tokens": 28233888} +{"current_steps": 41885, "total_steps": 204665, "loss": 0.0762, "lr": 1.9340244683082898e-06, "epoch": 1.0232575183837003, "percentage": 20.47, "elapsed_time": "0:54:16", "remaining_time": "3:30:54", "throughput": 8671.83, "total_tokens": 28236960} +{"current_steps": 41890, "total_steps": 204665, "loss": 0.0394, "lr": 1.933994002983322e-06, "epoch": 1.0233796692155475, "percentage": 20.47, "elapsed_time": "0:54:16", "remaining_time": "3:30:54", "throughput": 8671.97, "total_tokens": 28240480} +{"current_steps": 41895, "total_steps": 204665, "loss": 0.0261, "lr": 1.933963530866098e-06, "epoch": 1.0235018200473944, "percentage": 20.47, "elapsed_time": "0:54:16", "remaining_time": "3:30:53", "throughput": 8672.03, "total_tokens": 28243680} +{"current_steps": 41900, "total_steps": 204665, "loss": 0.095, "lr": 1.93393305195684e-06, "epoch": 1.0236239708792416, "percentage": 20.47, "elapsed_time": "0:54:17", "remaining_time": "3:30:53", "throughput": 8672.23, "total_tokens": 28247456} +{"current_steps": 41905, "total_steps": 204665, "loss": 0.0239, "lr": 1.93390256625577e-06, "epoch": 1.0237461217110888, "percentage": 20.47, "elapsed_time": "0:54:17", "remaining_time": "3:30:52", "throughput": 8672.31, "total_tokens": 28250720} +{"current_steps": 41910, "total_steps": 204665, "loss": 0.0914, "lr": 1.9338720737631094e-06, "epoch": 1.023868272542936, "percentage": 20.48, "elapsed_time": "0:54:17", "remaining_time": "3:30:51", "throughput": 8672.35, "total_tokens": 28253792} +{"current_steps": 41915, "total_steps": 204665, "loss": 0.0252, "lr": 1.9338415744790796e-06, "epoch": 1.0239904233747832, "percentage": 20.48, "elapsed_time": "0:54:18", "remaining_time": "3:30:51", "throughput": 8672.48, "total_tokens": 28257248} +{"current_steps": 41920, "total_steps": 204665, "loss": 0.1487, "lr": 1.933811068403903e-06, "epoch": 1.0241125742066304, "percentage": 20.48, "elapsed_time": "0:54:18", "remaining_time": "3:30:50", "throughput": 8672.63, "total_tokens": 28260832} +{"current_steps": 41925, "total_steps": 204665, "loss": 0.033, "lr": 1.933780555537801e-06, "epoch": 1.0242347250384776, "percentage": 20.48, "elapsed_time": "0:54:18", "remaining_time": "3:30:50", "throughput": 8672.73, "total_tokens": 28264160} +{"current_steps": 41930, "total_steps": 204665, "loss": 0.1593, "lr": 1.9337500358809953e-06, "epoch": 1.0243568758703248, "percentage": 20.49, "elapsed_time": "0:54:19", "remaining_time": "3:30:49", "throughput": 8672.85, "total_tokens": 28267616} +{"current_steps": 41935, "total_steps": 204665, "loss": 0.0448, "lr": 1.9337195094337086e-06, "epoch": 1.024479026702172, "percentage": 20.49, "elapsed_time": "0:54:19", "remaining_time": "3:30:49", "throughput": 8672.87, "total_tokens": 28270624} +{"current_steps": 41940, "total_steps": 204665, "loss": 0.0556, "lr": 1.9336889761961627e-06, "epoch": 1.024601177534019, "percentage": 20.49, "elapsed_time": "0:54:20", "remaining_time": "3:30:48", "throughput": 8672.87, "total_tokens": 28273568} +{"current_steps": 41945, "total_steps": 204665, "loss": 0.1861, "lr": 1.933658436168579e-06, "epoch": 1.024723328365866, "percentage": 20.49, "elapsed_time": "0:54:20", "remaining_time": "3:30:48", "throughput": 8672.93, "total_tokens": 28276704} +{"current_steps": 41950, "total_steps": 204665, "loss": 0.057, "lr": 1.93362788935118e-06, "epoch": 1.0248454791977133, "percentage": 20.5, "elapsed_time": "0:54:20", "remaining_time": "3:30:47", "throughput": 8672.99, "total_tokens": 28279904} +{"current_steps": 41955, "total_steps": 204665, "loss": 0.1571, "lr": 1.933597335744188e-06, "epoch": 1.0249676300295605, "percentage": 20.5, "elapsed_time": "0:54:21", "remaining_time": "3:30:47", "throughput": 8673.21, "total_tokens": 28283744} +{"current_steps": 41960, "total_steps": 204665, "loss": 0.0467, "lr": 1.933566775347825e-06, "epoch": 1.0250897808614077, "percentage": 20.5, "elapsed_time": "0:54:21", "remaining_time": "3:30:46", "throughput": 8673.27, "total_tokens": 28286944} +{"current_steps": 41965, "total_steps": 204665, "loss": 0.0018, "lr": 1.9335362081623134e-06, "epoch": 1.0252119316932549, "percentage": 20.5, "elapsed_time": "0:54:21", "remaining_time": "3:30:45", "throughput": 8673.42, "total_tokens": 28290464} +{"current_steps": 41970, "total_steps": 204665, "loss": 0.1227, "lr": 1.9335056341878754e-06, "epoch": 1.025334082525102, "percentage": 20.51, "elapsed_time": "0:54:22", "remaining_time": "3:30:45", "throughput": 8673.76, "total_tokens": 28294816} +{"current_steps": 41975, "total_steps": 204665, "loss": 0.0017, "lr": 1.9334750534247335e-06, "epoch": 1.0254562333569492, "percentage": 20.51, "elapsed_time": "0:54:22", "remaining_time": "3:30:45", "throughput": 8674.25, "total_tokens": 28299808} +{"current_steps": 41980, "total_steps": 204665, "loss": 0.1451, "lr": 1.9334444658731095e-06, "epoch": 1.0255783841887964, "percentage": 20.51, "elapsed_time": "0:54:22", "remaining_time": "3:30:44", "throughput": 8674.34, "total_tokens": 28303136} +{"current_steps": 41985, "total_steps": 204665, "loss": 0.1345, "lr": 1.9334138715332267e-06, "epoch": 1.0257005350206434, "percentage": 20.51, "elapsed_time": "0:54:23", "remaining_time": "3:30:43", "throughput": 8674.39, "total_tokens": 28306272} +{"current_steps": 41990, "total_steps": 204665, "loss": 0.0926, "lr": 1.933383270405307e-06, "epoch": 1.0258226858524906, "percentage": 20.52, "elapsed_time": "0:54:23", "remaining_time": "3:30:43", "throughput": 8674.46, "total_tokens": 28309472} +{"current_steps": 41995, "total_steps": 204665, "loss": 0.0502, "lr": 1.933352662489573e-06, "epoch": 1.0259448366843378, "percentage": 20.52, "elapsed_time": "0:54:23", "remaining_time": "3:30:42", "throughput": 8674.47, "total_tokens": 28312480} +{"current_steps": 42000, "total_steps": 204665, "loss": 0.0655, "lr": 1.9333220477862476e-06, "epoch": 1.026066987516185, "percentage": 20.52, "elapsed_time": "0:54:24", "remaining_time": "3:30:42", "throughput": 8674.66, "total_tokens": 28316192} +{"current_steps": 42005, "total_steps": 204665, "loss": 0.0018, "lr": 1.9332914262955533e-06, "epoch": 1.0261891383480322, "percentage": 20.52, "elapsed_time": "0:54:24", "remaining_time": "3:30:41", "throughput": 8674.87, "total_tokens": 28320032} +{"current_steps": 42010, "total_steps": 204665, "loss": 0.0015, "lr": 1.9332607980177124e-06, "epoch": 1.0263112891798793, "percentage": 20.53, "elapsed_time": "0:54:24", "remaining_time": "3:30:41", "throughput": 8675.11, "total_tokens": 28323936} +{"current_steps": 42015, "total_steps": 204665, "loss": 0.1486, "lr": 1.9332301629529484e-06, "epoch": 1.0264334400117265, "percentage": 20.53, "elapsed_time": "0:54:25", "remaining_time": "3:30:40", "throughput": 8675.15, "total_tokens": 28327072} +{"current_steps": 42020, "total_steps": 204665, "loss": 0.1867, "lr": 1.9331995211014833e-06, "epoch": 1.0265555908435737, "percentage": 20.53, "elapsed_time": "0:54:25", "remaining_time": "3:30:40", "throughput": 8675.22, "total_tokens": 28330272} +{"current_steps": 42025, "total_steps": 204665, "loss": 0.0235, "lr": 1.9331688724635406e-06, "epoch": 1.026677741675421, "percentage": 20.53, "elapsed_time": "0:54:26", "remaining_time": "3:30:39", "throughput": 8675.37, "total_tokens": 28333792} +{"current_steps": 42030, "total_steps": 204665, "loss": 0.0892, "lr": 1.9331382170393424e-06, "epoch": 1.0267998925072679, "percentage": 20.54, "elapsed_time": "0:54:26", "remaining_time": "3:30:39", "throughput": 8675.45, "total_tokens": 28337056} +{"current_steps": 42035, "total_steps": 204665, "loss": 0.0346, "lr": 1.9331075548291125e-06, "epoch": 1.026922043339115, "percentage": 20.54, "elapsed_time": "0:54:26", "remaining_time": "3:30:38", "throughput": 8675.6, "total_tokens": 28340576} +{"current_steps": 42040, "total_steps": 204665, "loss": 0.0051, "lr": 1.933076885833073e-06, "epoch": 1.0270441941709623, "percentage": 20.54, "elapsed_time": "0:54:27", "remaining_time": "3:30:38", "throughput": 8675.66, "total_tokens": 28343776} +{"current_steps": 42045, "total_steps": 204665, "loss": 0.0485, "lr": 1.933046210051448e-06, "epoch": 1.0271663450028095, "percentage": 20.54, "elapsed_time": "0:54:27", "remaining_time": "3:30:37", "throughput": 8675.78, "total_tokens": 28347232} +{"current_steps": 42050, "total_steps": 204665, "loss": 0.1192, "lr": 1.9330155274844597e-06, "epoch": 1.0272884958346566, "percentage": 20.55, "elapsed_time": "0:54:27", "remaining_time": "3:30:36", "throughput": 8675.92, "total_tokens": 28350752} +{"current_steps": 42055, "total_steps": 204665, "loss": 0.1301, "lr": 1.9329848381323318e-06, "epoch": 1.0274106466665038, "percentage": 20.55, "elapsed_time": "0:54:28", "remaining_time": "3:30:36", "throughput": 8675.98, "total_tokens": 28353952} +{"current_steps": 42060, "total_steps": 204665, "loss": 0.0456, "lr": 1.932954141995287e-06, "epoch": 1.027532797498351, "percentage": 20.55, "elapsed_time": "0:54:28", "remaining_time": "3:30:35", "throughput": 8676.32, "total_tokens": 28358304} +{"current_steps": 42065, "total_steps": 204665, "loss": 0.1302, "lr": 1.932923439073549e-06, "epoch": 1.0276549483301982, "percentage": 20.55, "elapsed_time": "0:54:28", "remaining_time": "3:30:35", "throughput": 8676.28, "total_tokens": 28361056} +{"current_steps": 42070, "total_steps": 204665, "loss": 0.091, "lr": 1.93289272936734e-06, "epoch": 1.0277770991620452, "percentage": 20.56, "elapsed_time": "0:54:29", "remaining_time": "3:30:34", "throughput": 8676.35, "total_tokens": 28364256} +{"current_steps": 42075, "total_steps": 204665, "loss": 0.1344, "lr": 1.932862012876885e-06, "epoch": 1.0278992499938924, "percentage": 20.56, "elapsed_time": "0:54:29", "remaining_time": "3:30:34", "throughput": 8676.45, "total_tokens": 28367584} +{"current_steps": 42080, "total_steps": 204665, "loss": 0.0863, "lr": 1.9328312896024063e-06, "epoch": 1.0280214008257396, "percentage": 20.56, "elapsed_time": "0:54:29", "remaining_time": "3:30:33", "throughput": 8676.56, "total_tokens": 28370976} +{"current_steps": 42085, "total_steps": 204665, "loss": 0.0371, "lr": 1.932800559544127e-06, "epoch": 1.0281435516575868, "percentage": 20.56, "elapsed_time": "0:54:30", "remaining_time": "3:30:33", "throughput": 8676.7, "total_tokens": 28374496} +{"current_steps": 42090, "total_steps": 204665, "loss": 0.0506, "lr": 1.932769822702272e-06, "epoch": 1.028265702489434, "percentage": 20.57, "elapsed_time": "0:54:30", "remaining_time": "3:30:32", "throughput": 8676.77, "total_tokens": 28377696} +{"current_steps": 42095, "total_steps": 204665, "loss": 0.1733, "lr": 1.9327390790770636e-06, "epoch": 1.0283878533212811, "percentage": 20.57, "elapsed_time": "0:54:30", "remaining_time": "3:30:32", "throughput": 8676.91, "total_tokens": 28381216} +{"current_steps": 42100, "total_steps": 204665, "loss": 0.0751, "lr": 1.9327083286687256e-06, "epoch": 1.0285100041531283, "percentage": 20.57, "elapsed_time": "0:54:31", "remaining_time": "3:30:31", "throughput": 8677.0, "total_tokens": 28384544} +{"current_steps": 42105, "total_steps": 204665, "loss": 0.2169, "lr": 1.932677571477482e-06, "epoch": 1.0286321549849755, "percentage": 20.57, "elapsed_time": "0:54:31", "remaining_time": "3:30:31", "throughput": 8677.15, "total_tokens": 28388064} +{"current_steps": 42110, "total_steps": 204665, "loss": 0.1519, "lr": 1.9326468075035564e-06, "epoch": 1.0287543058168227, "percentage": 20.58, "elapsed_time": "0:54:31", "remaining_time": "3:30:30", "throughput": 8677.19, "total_tokens": 28391200} +{"current_steps": 42115, "total_steps": 204665, "loss": 0.0666, "lr": 1.932616036747172e-06, "epoch": 1.0288764566486697, "percentage": 20.58, "elapsed_time": "0:54:32", "remaining_time": "3:30:29", "throughput": 8677.28, "total_tokens": 28394528} +{"current_steps": 42120, "total_steps": 204665, "loss": 0.0394, "lr": 1.932585259208553e-06, "epoch": 1.0289986074805169, "percentage": 20.58, "elapsed_time": "0:54:32", "remaining_time": "3:30:29", "throughput": 8677.42, "total_tokens": 28398048} +{"current_steps": 42125, "total_steps": 204665, "loss": 0.0299, "lr": 1.932554474887923e-06, "epoch": 1.029120758312364, "percentage": 20.58, "elapsed_time": "0:54:32", "remaining_time": "3:30:28", "throughput": 8677.53, "total_tokens": 28401440} +{"current_steps": 42130, "total_steps": 204665, "loss": 0.0323, "lr": 1.9325236837855068e-06, "epoch": 1.0292429091442112, "percentage": 20.58, "elapsed_time": "0:54:33", "remaining_time": "3:30:28", "throughput": 8677.58, "total_tokens": 28404576} +{"current_steps": 42135, "total_steps": 204665, "loss": 0.1606, "lr": 1.932492885901527e-06, "epoch": 1.0293650599760584, "percentage": 20.59, "elapsed_time": "0:54:33", "remaining_time": "3:30:27", "throughput": 8677.68, "total_tokens": 28407904} +{"current_steps": 42140, "total_steps": 204665, "loss": 0.0915, "lr": 1.932462081236208e-06, "epoch": 1.0294872108079056, "percentage": 20.59, "elapsed_time": "0:54:34", "remaining_time": "3:30:27", "throughput": 8677.69, "total_tokens": 28410912} +{"current_steps": 42145, "total_steps": 204665, "loss": 0.0902, "lr": 1.932431269789774e-06, "epoch": 1.0296093616397528, "percentage": 20.59, "elapsed_time": "0:54:34", "remaining_time": "3:30:26", "throughput": 8677.86, "total_tokens": 28414560} +{"current_steps": 42150, "total_steps": 204665, "loss": 0.0204, "lr": 1.932400451562449e-06, "epoch": 1.0297315124716, "percentage": 20.59, "elapsed_time": "0:54:34", "remaining_time": "3:30:26", "throughput": 8677.99, "total_tokens": 28418080} +{"current_steps": 42155, "total_steps": 204665, "loss": 0.1347, "lr": 1.9323696265544572e-06, "epoch": 1.0298536633034472, "percentage": 20.6, "elapsed_time": "0:54:35", "remaining_time": "3:30:25", "throughput": 8678.03, "total_tokens": 28421152} +{"current_steps": 42160, "total_steps": 204665, "loss": 0.0876, "lr": 1.9323387947660227e-06, "epoch": 1.0299758141352942, "percentage": 20.6, "elapsed_time": "0:54:35", "remaining_time": "3:30:25", "throughput": 8678.13, "total_tokens": 28424480} +{"current_steps": 42165, "total_steps": 204665, "loss": 0.069, "lr": 1.93230795619737e-06, "epoch": 1.0300979649671413, "percentage": 20.6, "elapsed_time": "0:54:35", "remaining_time": "3:30:24", "throughput": 8678.27, "total_tokens": 28428000} +{"current_steps": 42170, "total_steps": 204665, "loss": 0.0107, "lr": 1.9322771108487227e-06, "epoch": 1.0302201157989885, "percentage": 20.6, "elapsed_time": "0:54:36", "remaining_time": "3:30:23", "throughput": 8678.48, "total_tokens": 28431776} +{"current_steps": 42175, "total_steps": 204665, "loss": 0.1202, "lr": 1.9322462587203056e-06, "epoch": 1.0303422666308357, "percentage": 20.61, "elapsed_time": "0:54:36", "remaining_time": "3:30:23", "throughput": 8678.53, "total_tokens": 28434912} +{"current_steps": 42180, "total_steps": 204665, "loss": 0.1102, "lr": 1.932215399812343e-06, "epoch": 1.030464417462683, "percentage": 20.61, "elapsed_time": "0:54:36", "remaining_time": "3:30:22", "throughput": 8678.68, "total_tokens": 28438432} +{"current_steps": 42185, "total_steps": 204665, "loss": 0.0436, "lr": 1.9321845341250592e-06, "epoch": 1.03058656829453, "percentage": 20.61, "elapsed_time": "0:54:37", "remaining_time": "3:30:22", "throughput": 8678.73, "total_tokens": 28441568} +{"current_steps": 42190, "total_steps": 204665, "loss": 0.1289, "lr": 1.932153661658679e-06, "epoch": 1.0307087191263773, "percentage": 20.61, "elapsed_time": "0:54:37", "remaining_time": "3:30:21", "throughput": 8678.78, "total_tokens": 28444704} +{"current_steps": 42195, "total_steps": 204665, "loss": 0.0261, "lr": 1.932122782413426e-06, "epoch": 1.0308308699582245, "percentage": 20.62, "elapsed_time": "0:54:37", "remaining_time": "3:30:21", "throughput": 8678.83, "total_tokens": 28447840} +{"current_steps": 42200, "total_steps": 204665, "loss": 0.0126, "lr": 1.9320918963895262e-06, "epoch": 1.0309530207900717, "percentage": 20.62, "elapsed_time": "0:54:38", "remaining_time": "3:30:20", "throughput": 8678.83, "total_tokens": 28450784} +{"current_steps": 42205, "total_steps": 204665, "loss": 0.0412, "lr": 1.932061003587203e-06, "epoch": 1.0310751716219186, "percentage": 20.62, "elapsed_time": "0:54:38", "remaining_time": "3:30:20", "throughput": 8679.01, "total_tokens": 28454432} +{"current_steps": 42210, "total_steps": 204665, "loss": 0.0308, "lr": 1.9320301040066816e-06, "epoch": 1.0311973224537658, "percentage": 20.62, "elapsed_time": "0:54:38", "remaining_time": "3:30:19", "throughput": 8679.06, "total_tokens": 28457568} +{"current_steps": 42215, "total_steps": 204665, "loss": 0.1595, "lr": 1.9319991976481863e-06, "epoch": 1.031319473285613, "percentage": 20.63, "elapsed_time": "0:54:39", "remaining_time": "3:30:18", "throughput": 8679.15, "total_tokens": 28460896} +{"current_steps": 42220, "total_steps": 204665, "loss": 0.1558, "lr": 1.9319682845119425e-06, "epoch": 1.0314416241174602, "percentage": 20.63, "elapsed_time": "0:54:39", "remaining_time": "3:30:18", "throughput": 8679.2, "total_tokens": 28464032} +{"current_steps": 42225, "total_steps": 204665, "loss": 0.0608, "lr": 1.9319373645981748e-06, "epoch": 1.0315637749493074, "percentage": 20.63, "elapsed_time": "0:54:39", "remaining_time": "3:30:17", "throughput": 8679.23, "total_tokens": 28467104} +{"current_steps": 42230, "total_steps": 204665, "loss": 0.0129, "lr": 1.9319064379071075e-06, "epoch": 1.0316859257811546, "percentage": 20.63, "elapsed_time": "0:54:40", "remaining_time": "3:30:17", "throughput": 8679.33, "total_tokens": 28470432} +{"current_steps": 42235, "total_steps": 204665, "loss": 0.0663, "lr": 1.931875504438966e-06, "epoch": 1.0318080766130018, "percentage": 20.64, "elapsed_time": "0:54:40", "remaining_time": "3:30:16", "throughput": 8679.96, "total_tokens": 28476000} +{"current_steps": 42240, "total_steps": 204665, "loss": 0.1376, "lr": 1.931844564193976e-06, "epoch": 1.031930227444849, "percentage": 20.64, "elapsed_time": "0:54:41", "remaining_time": "3:30:16", "throughput": 8680.48, "total_tokens": 28481120} +{"current_steps": 42245, "total_steps": 204665, "loss": 0.2042, "lr": 1.9318136171723606e-06, "epoch": 1.0320523782766962, "percentage": 20.64, "elapsed_time": "0:54:41", "remaining_time": "3:30:16", "throughput": 8680.68, "total_tokens": 28484896} +{"current_steps": 42250, "total_steps": 204665, "loss": 0.0014, "lr": 1.9317826633743464e-06, "epoch": 1.0321745291085431, "percentage": 20.64, "elapsed_time": "0:54:41", "remaining_time": "3:30:15", "throughput": 8680.82, "total_tokens": 28488416} +{"current_steps": 42255, "total_steps": 204665, "loss": 0.0013, "lr": 1.9317517028001584e-06, "epoch": 1.0322966799403903, "percentage": 20.65, "elapsed_time": "0:54:42", "remaining_time": "3:30:15", "throughput": 8680.97, "total_tokens": 28491936} +{"current_steps": 42260, "total_steps": 204665, "loss": 0.2312, "lr": 1.9317207354500206e-06, "epoch": 1.0324188307722375, "percentage": 20.65, "elapsed_time": "0:54:42", "remaining_time": "3:30:14", "throughput": 8681.09, "total_tokens": 28495392} +{"current_steps": 42265, "total_steps": 204665, "loss": 0.1211, "lr": 1.9316897613241596e-06, "epoch": 1.0325409816040847, "percentage": 20.65, "elapsed_time": "0:54:42", "remaining_time": "3:30:13", "throughput": 8681.16, "total_tokens": 28498592} +{"current_steps": 42270, "total_steps": 204665, "loss": 0.1664, "lr": 1.9316587804228e-06, "epoch": 1.0326631324359319, "percentage": 20.65, "elapsed_time": "0:54:43", "remaining_time": "3:30:13", "throughput": 8681.2, "total_tokens": 28501728} +{"current_steps": 42275, "total_steps": 204665, "loss": 0.0567, "lr": 1.931627792746167e-06, "epoch": 1.032785283267779, "percentage": 20.66, "elapsed_time": "0:54:43", "remaining_time": "3:30:12", "throughput": 8681.25, "total_tokens": 28504864} +{"current_steps": 42280, "total_steps": 204665, "loss": 0.0706, "lr": 1.931596798294486e-06, "epoch": 1.0329074340996263, "percentage": 20.66, "elapsed_time": "0:54:43", "remaining_time": "3:30:12", "throughput": 8681.29, "total_tokens": 28508000} +{"current_steps": 42285, "total_steps": 204665, "loss": 0.1044, "lr": 1.9315657970679826e-06, "epoch": 1.0330295849314735, "percentage": 20.66, "elapsed_time": "0:54:44", "remaining_time": "3:30:11", "throughput": 8681.33, "total_tokens": 28511072} +{"current_steps": 42290, "total_steps": 204665, "loss": 0.2329, "lr": 1.9315347890668825e-06, "epoch": 1.0331517357633206, "percentage": 20.66, "elapsed_time": "0:54:44", "remaining_time": "3:30:11", "throughput": 8681.33, "total_tokens": 28514016} +{"current_steps": 42295, "total_steps": 204665, "loss": 0.1215, "lr": 1.9315037742914107e-06, "epoch": 1.0332738865951676, "percentage": 20.67, "elapsed_time": "0:54:44", "remaining_time": "3:30:10", "throughput": 8681.51, "total_tokens": 28517664} +{"current_steps": 42300, "total_steps": 204665, "loss": 0.0044, "lr": 1.931472752741793e-06, "epoch": 1.0333960374270148, "percentage": 20.67, "elapsed_time": "0:54:45", "remaining_time": "3:30:10", "throughput": 8681.6, "total_tokens": 28520992} +{"current_steps": 42305, "total_steps": 204665, "loss": 0.1998, "lr": 1.9314417244182547e-06, "epoch": 1.033518188258862, "percentage": 20.67, "elapsed_time": "0:54:45", "remaining_time": "3:30:09", "throughput": 8681.75, "total_tokens": 28524512} +{"current_steps": 42310, "total_steps": 204665, "loss": 0.0614, "lr": 1.9314106893210216e-06, "epoch": 1.0336403390907092, "percentage": 20.67, "elapsed_time": "0:54:45", "remaining_time": "3:30:08", "throughput": 8681.9, "total_tokens": 28528032} +{"current_steps": 42315, "total_steps": 204665, "loss": 0.0382, "lr": 1.9313796474503194e-06, "epoch": 1.0337624899225564, "percentage": 20.68, "elapsed_time": "0:54:46", "remaining_time": "3:30:08", "throughput": 8681.87, "total_tokens": 28530848} +{"current_steps": 42320, "total_steps": 204665, "loss": 0.0943, "lr": 1.931348598806374e-06, "epoch": 1.0338846407544036, "percentage": 20.68, "elapsed_time": "0:54:46", "remaining_time": "3:30:07", "throughput": 8681.99, "total_tokens": 28534304} +{"current_steps": 42325, "total_steps": 204665, "loss": 0.1328, "lr": 1.931317543389411e-06, "epoch": 1.0340067915862508, "percentage": 20.68, "elapsed_time": "0:54:46", "remaining_time": "3:30:07", "throughput": 8682.04, "total_tokens": 28537376} +{"current_steps": 42330, "total_steps": 204665, "loss": 0.1759, "lr": 1.9312864811996567e-06, "epoch": 1.034128942418098, "percentage": 20.68, "elapsed_time": "0:54:47", "remaining_time": "3:30:06", "throughput": 8682.0, "total_tokens": 28540128} +{"current_steps": 42335, "total_steps": 204665, "loss": 0.0172, "lr": 1.931255412237336e-06, "epoch": 1.0342510932499451, "percentage": 20.69, "elapsed_time": "0:54:47", "remaining_time": "3:30:06", "throughput": 8682.09, "total_tokens": 28543456} +{"current_steps": 42340, "total_steps": 204665, "loss": 0.1055, "lr": 1.931224336502676e-06, "epoch": 1.034373244081792, "percentage": 20.69, "elapsed_time": "0:54:47", "remaining_time": "3:30:05", "throughput": 8682.22, "total_tokens": 28546912} +{"current_steps": 42345, "total_steps": 204665, "loss": 0.0127, "lr": 1.931193253995902e-06, "epoch": 1.0344953949136393, "percentage": 20.69, "elapsed_time": "0:54:48", "remaining_time": "3:30:05", "throughput": 8682.31, "total_tokens": 28550240} +{"current_steps": 42350, "total_steps": 204665, "loss": 0.0732, "lr": 1.93116216471724e-06, "epoch": 1.0346175457454865, "percentage": 20.69, "elapsed_time": "0:54:48", "remaining_time": "3:30:04", "throughput": 8682.44, "total_tokens": 28553696} +{"current_steps": 42355, "total_steps": 204665, "loss": 0.07, "lr": 1.9311310686669165e-06, "epoch": 1.0347396965773337, "percentage": 20.69, "elapsed_time": "0:54:49", "remaining_time": "3:30:03", "throughput": 8682.63, "total_tokens": 28557408} +{"current_steps": 42360, "total_steps": 204665, "loss": 0.136, "lr": 1.931099965845158e-06, "epoch": 1.0348618474091809, "percentage": 20.7, "elapsed_time": "0:54:49", "remaining_time": "3:30:03", "throughput": 8682.7, "total_tokens": 28560672} +{"current_steps": 42365, "total_steps": 204665, "loss": 0.1912, "lr": 1.9310688562521894e-06, "epoch": 1.034983998241028, "percentage": 20.7, "elapsed_time": "0:54:49", "remaining_time": "3:30:02", "throughput": 8682.74, "total_tokens": 28563744} +{"current_steps": 42370, "total_steps": 204665, "loss": 0.0607, "lr": 1.9310377398882377e-06, "epoch": 1.0351061490728752, "percentage": 20.7, "elapsed_time": "0:54:50", "remaining_time": "3:30:02", "throughput": 8682.83, "total_tokens": 28567072} +{"current_steps": 42375, "total_steps": 204665, "loss": 0.1144, "lr": 1.931006616753529e-06, "epoch": 1.0352282999047224, "percentage": 20.7, "elapsed_time": "0:54:50", "remaining_time": "3:30:01", "throughput": 8683.0, "total_tokens": 28570720} +{"current_steps": 42380, "total_steps": 204665, "loss": 0.2082, "lr": 1.93097548684829e-06, "epoch": 1.0353504507365696, "percentage": 20.71, "elapsed_time": "0:54:50", "remaining_time": "3:30:01", "throughput": 8683.13, "total_tokens": 28574176} +{"current_steps": 42385, "total_steps": 204665, "loss": 0.0518, "lr": 1.930944350172747e-06, "epoch": 1.0354726015684166, "percentage": 20.71, "elapsed_time": "0:54:51", "remaining_time": "3:30:00", "throughput": 8683.16, "total_tokens": 28577248} +{"current_steps": 42390, "total_steps": 204665, "loss": 0.0541, "lr": 1.930913206727126e-06, "epoch": 1.0355947524002638, "percentage": 20.71, "elapsed_time": "0:54:51", "remaining_time": "3:30:00", "throughput": 8683.26, "total_tokens": 28580576} +{"current_steps": 42395, "total_steps": 204665, "loss": 0.002, "lr": 1.9308820565116538e-06, "epoch": 1.035716903232111, "percentage": 20.71, "elapsed_time": "0:54:51", "remaining_time": "3:29:59", "throughput": 8683.39, "total_tokens": 28584032} +{"current_steps": 42400, "total_steps": 204665, "loss": 0.0064, "lr": 1.930850899526557e-06, "epoch": 1.0358390540639582, "percentage": 20.72, "elapsed_time": "0:54:52", "remaining_time": "3:29:59", "throughput": 8683.54, "total_tokens": 28587552} +{"current_steps": 42405, "total_steps": 204665, "loss": 0.065, "lr": 1.930819735772062e-06, "epoch": 1.0359612048958053, "percentage": 20.72, "elapsed_time": "0:54:52", "remaining_time": "3:29:58", "throughput": 8683.69, "total_tokens": 28591136} +{"current_steps": 42410, "total_steps": 204665, "loss": 0.1083, "lr": 1.930788565248396e-06, "epoch": 1.0360833557276525, "percentage": 20.72, "elapsed_time": "0:54:52", "remaining_time": "3:29:58", "throughput": 8683.79, "total_tokens": 28594464} +{"current_steps": 42415, "total_steps": 204665, "loss": 0.095, "lr": 1.9307573879557847e-06, "epoch": 1.0362055065594997, "percentage": 20.72, "elapsed_time": "0:54:53", "remaining_time": "3:29:57", "throughput": 8683.9, "total_tokens": 28597856} +{"current_steps": 42420, "total_steps": 204665, "loss": 0.1319, "lr": 1.9307262038944552e-06, "epoch": 1.036327657391347, "percentage": 20.73, "elapsed_time": "0:54:53", "remaining_time": "3:29:56", "throughput": 8683.93, "total_tokens": 28600928} +{"current_steps": 42425, "total_steps": 204665, "loss": 0.2187, "lr": 1.9306950130646346e-06, "epoch": 1.036449808223194, "percentage": 20.73, "elapsed_time": "0:54:53", "remaining_time": "3:29:56", "throughput": 8683.95, "total_tokens": 28603936} +{"current_steps": 42430, "total_steps": 204665, "loss": 0.0097, "lr": 1.9306638154665497e-06, "epoch": 1.036571959055041, "percentage": 20.73, "elapsed_time": "0:54:54", "remaining_time": "3:29:55", "throughput": 8683.97, "total_tokens": 28606944} +{"current_steps": 42435, "total_steps": 204665, "loss": 0.0701, "lr": 1.930632611100427e-06, "epoch": 1.0366941098868883, "percentage": 20.73, "elapsed_time": "0:54:54", "remaining_time": "3:29:55", "throughput": 8684.03, "total_tokens": 28610208} +{"current_steps": 42440, "total_steps": 204665, "loss": 0.0021, "lr": 1.9306013999664937e-06, "epoch": 1.0368162607187354, "percentage": 20.74, "elapsed_time": "0:54:54", "remaining_time": "3:29:54", "throughput": 8684.13, "total_tokens": 28613536} +{"current_steps": 42445, "total_steps": 204665, "loss": 0.0961, "lr": 1.930570182064977e-06, "epoch": 1.0369384115505826, "percentage": 20.74, "elapsed_time": "0:54:55", "remaining_time": "3:29:54", "throughput": 8684.22, "total_tokens": 28616864} +{"current_steps": 42450, "total_steps": 204665, "loss": 0.132, "lr": 1.9305389573961033e-06, "epoch": 1.0370605623824298, "percentage": 20.74, "elapsed_time": "0:54:55", "remaining_time": "3:29:53", "throughput": 8684.3, "total_tokens": 28620128} +{"current_steps": 42455, "total_steps": 204665, "loss": 0.0375, "lr": 1.9305077259601e-06, "epoch": 1.037182713214277, "percentage": 20.74, "elapsed_time": "0:54:55", "remaining_time": "3:29:53", "throughput": 8684.42, "total_tokens": 28623520} +{"current_steps": 42460, "total_steps": 204665, "loss": 0.2168, "lr": 1.9304764877571944e-06, "epoch": 1.0373048640461242, "percentage": 20.75, "elapsed_time": "0:54:56", "remaining_time": "3:29:52", "throughput": 8684.56, "total_tokens": 28627040} +{"current_steps": 42465, "total_steps": 204665, "loss": 0.0926, "lr": 1.9304452427876138e-06, "epoch": 1.0374270148779714, "percentage": 20.75, "elapsed_time": "0:54:56", "remaining_time": "3:29:51", "throughput": 8684.66, "total_tokens": 28630368} +{"current_steps": 42470, "total_steps": 204665, "loss": 0.0037, "lr": 1.9304139910515845e-06, "epoch": 1.0375491657098186, "percentage": 20.75, "elapsed_time": "0:54:57", "remaining_time": "3:29:51", "throughput": 8684.73, "total_tokens": 28633632} +{"current_steps": 42475, "total_steps": 204665, "loss": 0.0369, "lr": 1.9303827325493346e-06, "epoch": 1.0376713165416656, "percentage": 20.75, "elapsed_time": "0:54:57", "remaining_time": "3:29:50", "throughput": 8684.77, "total_tokens": 28636704} +{"current_steps": 42480, "total_steps": 204665, "loss": 0.0419, "lr": 1.9303514672810913e-06, "epoch": 1.0377934673735127, "percentage": 20.76, "elapsed_time": "0:54:57", "remaining_time": "3:29:50", "throughput": 8684.97, "total_tokens": 28640480} +{"current_steps": 42485, "total_steps": 204665, "loss": 0.119, "lr": 1.930320195247082e-06, "epoch": 1.03791561820536, "percentage": 20.76, "elapsed_time": "0:54:58", "remaining_time": "3:29:49", "throughput": 8685.05, "total_tokens": 28643808} +{"current_steps": 42490, "total_steps": 204665, "loss": 0.0031, "lr": 1.930288916447534e-06, "epoch": 1.0380377690372071, "percentage": 20.76, "elapsed_time": "0:54:58", "remaining_time": "3:29:49", "throughput": 8685.23, "total_tokens": 28647456} +{"current_steps": 42495, "total_steps": 204665, "loss": 0.2149, "lr": 1.930257630882675e-06, "epoch": 1.0381599198690543, "percentage": 20.76, "elapsed_time": "0:54:58", "remaining_time": "3:29:48", "throughput": 8685.37, "total_tokens": 28650976} +{"current_steps": 42500, "total_steps": 204665, "loss": 0.0508, "lr": 1.930226338552732e-06, "epoch": 1.0382820707009015, "percentage": 20.77, "elapsed_time": "0:54:59", "remaining_time": "3:29:48", "throughput": 8685.46, "total_tokens": 28654304} +{"current_steps": 42505, "total_steps": 204665, "loss": 0.1642, "lr": 1.9301950394579328e-06, "epoch": 1.0384042215327487, "percentage": 20.77, "elapsed_time": "0:54:59", "remaining_time": "3:29:47", "throughput": 8685.52, "total_tokens": 28657440} +{"current_steps": 42510, "total_steps": 204665, "loss": 0.09, "lr": 1.9301637335985052e-06, "epoch": 1.0385263723645959, "percentage": 20.77, "elapsed_time": "0:54:59", "remaining_time": "3:29:47", "throughput": 8685.68, "total_tokens": 28661024} +{"current_steps": 42515, "total_steps": 204665, "loss": 0.0374, "lr": 1.930132420974677e-06, "epoch": 1.038648523196443, "percentage": 20.77, "elapsed_time": "0:55:00", "remaining_time": "3:29:46", "throughput": 8685.74, "total_tokens": 28664160} +{"current_steps": 42520, "total_steps": 204665, "loss": 0.1329, "lr": 1.930101101586675e-06, "epoch": 1.03877067402829, "percentage": 20.78, "elapsed_time": "0:55:00", "remaining_time": "3:29:46", "throughput": 8685.85, "total_tokens": 28667552} +{"current_steps": 42525, "total_steps": 204665, "loss": 0.0922, "lr": 1.9300697754347283e-06, "epoch": 1.0388928248601372, "percentage": 20.78, "elapsed_time": "0:55:00", "remaining_time": "3:29:45", "throughput": 8685.96, "total_tokens": 28670944} +{"current_steps": 42530, "total_steps": 204665, "loss": 0.1599, "lr": 1.9300384425190635e-06, "epoch": 1.0390149756919844, "percentage": 20.78, "elapsed_time": "0:55:01", "remaining_time": "3:29:44", "throughput": 8685.97, "total_tokens": 28673888} +{"current_steps": 42535, "total_steps": 204665, "loss": 0.114, "lr": 1.9300071028399093e-06, "epoch": 1.0391371265238316, "percentage": 20.78, "elapsed_time": "0:55:01", "remaining_time": "3:29:44", "throughput": 8686.03, "total_tokens": 28677088} +{"current_steps": 42540, "total_steps": 204665, "loss": 0.2603, "lr": 1.9299757563974934e-06, "epoch": 1.0392592773556788, "percentage": 20.79, "elapsed_time": "0:55:01", "remaining_time": "3:29:43", "throughput": 8686.08, "total_tokens": 28680224} +{"current_steps": 42545, "total_steps": 204665, "loss": 0.0781, "lr": 1.9299444031920437e-06, "epoch": 1.039381428187526, "percentage": 20.79, "elapsed_time": "0:55:02", "remaining_time": "3:29:43", "throughput": 8686.21, "total_tokens": 28683680} +{"current_steps": 42550, "total_steps": 204665, "loss": 0.0054, "lr": 1.9299130432237877e-06, "epoch": 1.0395035790193732, "percentage": 20.79, "elapsed_time": "0:55:02", "remaining_time": "3:29:42", "throughput": 8686.31, "total_tokens": 28687008} +{"current_steps": 42555, "total_steps": 204665, "loss": 0.044, "lr": 1.929881676492954e-06, "epoch": 1.0396257298512204, "percentage": 20.79, "elapsed_time": "0:55:02", "remaining_time": "3:29:42", "throughput": 8686.35, "total_tokens": 28690080} +{"current_steps": 42560, "total_steps": 204665, "loss": 0.1201, "lr": 1.929850302999771e-06, "epoch": 1.0397478806830676, "percentage": 20.79, "elapsed_time": "0:55:03", "remaining_time": "3:29:41", "throughput": 8686.49, "total_tokens": 28693600} +{"current_steps": 42565, "total_steps": 204665, "loss": 0.1339, "lr": 1.9298189227444665e-06, "epoch": 1.0398700315149145, "percentage": 20.8, "elapsed_time": "0:55:03", "remaining_time": "3:29:41", "throughput": 8686.62, "total_tokens": 28697056} +{"current_steps": 42570, "total_steps": 204665, "loss": 0.0307, "lr": 1.9297875357272683e-06, "epoch": 1.0399921823467617, "percentage": 20.8, "elapsed_time": "0:55:03", "remaining_time": "3:29:40", "throughput": 8686.75, "total_tokens": 28700512} +{"current_steps": 42575, "total_steps": 204665, "loss": 0.0128, "lr": 1.9297561419484056e-06, "epoch": 1.040114333178609, "percentage": 20.8, "elapsed_time": "0:55:04", "remaining_time": "3:29:39", "throughput": 8686.69, "total_tokens": 28703200} +{"current_steps": 42580, "total_steps": 204665, "loss": 0.0397, "lr": 1.9297247414081058e-06, "epoch": 1.040236484010456, "percentage": 20.8, "elapsed_time": "0:55:04", "remaining_time": "3:29:39", "throughput": 8686.81, "total_tokens": 28706592} +{"current_steps": 42585, "total_steps": 204665, "loss": 0.0027, "lr": 1.929693334106598e-06, "epoch": 1.0403586348423033, "percentage": 20.81, "elapsed_time": "0:55:04", "remaining_time": "3:29:38", "throughput": 8686.85, "total_tokens": 28709728} +{"current_steps": 42590, "total_steps": 204665, "loss": 0.0827, "lr": 1.9296619200441095e-06, "epoch": 1.0404807856741505, "percentage": 20.81, "elapsed_time": "0:55:05", "remaining_time": "3:29:38", "throughput": 8687.03, "total_tokens": 28713376} +{"current_steps": 42595, "total_steps": 204665, "loss": 0.1676, "lr": 1.9296304992208697e-06, "epoch": 1.0406029365059977, "percentage": 20.81, "elapsed_time": "0:55:05", "remaining_time": "3:29:37", "throughput": 8687.05, "total_tokens": 28716384} +{"current_steps": 42600, "total_steps": 204665, "loss": 0.0847, "lr": 1.9295990716371075e-06, "epoch": 1.0407250873378449, "percentage": 20.81, "elapsed_time": "0:55:05", "remaining_time": "3:29:37", "throughput": 8687.09, "total_tokens": 28719456} +{"current_steps": 42605, "total_steps": 204665, "loss": 0.05, "lr": 1.9295676372930505e-06, "epoch": 1.0408472381696918, "percentage": 20.82, "elapsed_time": "0:55:06", "remaining_time": "3:29:36", "throughput": 8687.26, "total_tokens": 28723104} +{"current_steps": 42610, "total_steps": 204665, "loss": 0.1016, "lr": 1.9295361961889272e-06, "epoch": 1.040969389001539, "percentage": 20.82, "elapsed_time": "0:55:06", "remaining_time": "3:29:36", "throughput": 8687.34, "total_tokens": 28726368} +{"current_steps": 42615, "total_steps": 204665, "loss": 0.1046, "lr": 1.929504748324967e-06, "epoch": 1.0410915398333862, "percentage": 20.82, "elapsed_time": "0:55:07", "remaining_time": "3:29:35", "throughput": 8687.45, "total_tokens": 28729760} +{"current_steps": 42620, "total_steps": 204665, "loss": 0.1763, "lr": 1.929473293701398e-06, "epoch": 1.0412136906652334, "percentage": 20.82, "elapsed_time": "0:55:07", "remaining_time": "3:29:34", "throughput": 8687.53, "total_tokens": 28733024} +{"current_steps": 42625, "total_steps": 204665, "loss": 0.101, "lr": 1.9294418323184495e-06, "epoch": 1.0413358414970806, "percentage": 20.83, "elapsed_time": "0:55:07", "remaining_time": "3:29:34", "throughput": 8687.64, "total_tokens": 28736416} +{"current_steps": 42630, "total_steps": 204665, "loss": 0.1033, "lr": 1.92941036417635e-06, "epoch": 1.0414579923289278, "percentage": 20.83, "elapsed_time": "0:55:08", "remaining_time": "3:29:33", "throughput": 8687.74, "total_tokens": 28739744} +{"current_steps": 42635, "total_steps": 204665, "loss": 0.0464, "lr": 1.929378889275328e-06, "epoch": 1.041580143160775, "percentage": 20.83, "elapsed_time": "0:55:08", "remaining_time": "3:29:33", "throughput": 8687.72, "total_tokens": 28742560} +{"current_steps": 42640, "total_steps": 204665, "loss": 0.005, "lr": 1.929347407615613e-06, "epoch": 1.0417022939926222, "percentage": 20.83, "elapsed_time": "0:55:08", "remaining_time": "3:29:32", "throughput": 8687.83, "total_tokens": 28745952} +{"current_steps": 42645, "total_steps": 204665, "loss": 0.0939, "lr": 1.9293159191974338e-06, "epoch": 1.0418244448244693, "percentage": 20.84, "elapsed_time": "0:55:09", "remaining_time": "3:29:32", "throughput": 8687.92, "total_tokens": 28749280} +{"current_steps": 42650, "total_steps": 204665, "loss": 0.0669, "lr": 1.9292844240210193e-06, "epoch": 1.0419465956563165, "percentage": 20.84, "elapsed_time": "0:55:09", "remaining_time": "3:29:31", "throughput": 8687.97, "total_tokens": 28752416} +{"current_steps": 42655, "total_steps": 204665, "loss": 0.0011, "lr": 1.9292529220865985e-06, "epoch": 1.0420687464881635, "percentage": 20.84, "elapsed_time": "0:55:09", "remaining_time": "3:29:31", "throughput": 8688.18, "total_tokens": 28756192} +{"current_steps": 42660, "total_steps": 204665, "loss": 0.0752, "lr": 1.9292214133944003e-06, "epoch": 1.0421908973200107, "percentage": 20.84, "elapsed_time": "0:55:10", "remaining_time": "3:29:30", "throughput": 8688.23, "total_tokens": 28759328} +{"current_steps": 42665, "total_steps": 204665, "loss": 0.0056, "lr": 1.929189897944654e-06, "epoch": 1.0423130481518579, "percentage": 20.85, "elapsed_time": "0:55:10", "remaining_time": "3:29:30", "throughput": 8688.32, "total_tokens": 28762656} +{"current_steps": 42670, "total_steps": 204665, "loss": 0.0822, "lr": 1.929158375737589e-06, "epoch": 1.042435198983705, "percentage": 20.85, "elapsed_time": "0:55:10", "remaining_time": "3:29:29", "throughput": 8688.45, "total_tokens": 28766112} +{"current_steps": 42675, "total_steps": 204665, "loss": 0.1787, "lr": 1.9291268467734343e-06, "epoch": 1.0425573498155523, "percentage": 20.85, "elapsed_time": "0:55:11", "remaining_time": "3:29:28", "throughput": 8688.58, "total_tokens": 28769568} +{"current_steps": 42680, "total_steps": 204665, "loss": 0.0615, "lr": 1.92909531105242e-06, "epoch": 1.0426795006473994, "percentage": 20.85, "elapsed_time": "0:55:11", "remaining_time": "3:29:28", "throughput": 8688.68, "total_tokens": 28772896} +{"current_steps": 42685, "total_steps": 204665, "loss": 0.0421, "lr": 1.929063768574774e-06, "epoch": 1.0428016514792466, "percentage": 20.86, "elapsed_time": "0:55:11", "remaining_time": "3:29:27", "throughput": 8688.78, "total_tokens": 28776224} +{"current_steps": 42690, "total_steps": 204665, "loss": 0.0861, "lr": 1.9290322193407264e-06, "epoch": 1.0429238023110938, "percentage": 20.86, "elapsed_time": "0:55:12", "remaining_time": "3:29:27", "throughput": 8688.87, "total_tokens": 28779488} +{"current_steps": 42695, "total_steps": 204665, "loss": 0.101, "lr": 1.9290006633505065e-06, "epoch": 1.0430459531429408, "percentage": 20.86, "elapsed_time": "0:55:12", "remaining_time": "3:29:26", "throughput": 8688.95, "total_tokens": 28782752} +{"current_steps": 42700, "total_steps": 204665, "loss": 0.0381, "lr": 1.928969100604344e-06, "epoch": 1.043168103974788, "percentage": 20.86, "elapsed_time": "0:55:12", "remaining_time": "3:29:26", "throughput": 8689.05, "total_tokens": 28786080} +{"current_steps": 42705, "total_steps": 204665, "loss": 0.0541, "lr": 1.9289375311024683e-06, "epoch": 1.0432902548066352, "percentage": 20.87, "elapsed_time": "0:55:13", "remaining_time": "3:29:25", "throughput": 8689.21, "total_tokens": 28789664} +{"current_steps": 42710, "total_steps": 204665, "loss": 0.0356, "lr": 1.9289059548451094e-06, "epoch": 1.0434124056384824, "percentage": 20.87, "elapsed_time": "0:55:13", "remaining_time": "3:29:25", "throughput": 8689.3, "total_tokens": 28792992} +{"current_steps": 42715, "total_steps": 204665, "loss": 0.1014, "lr": 1.9288743718324963e-06, "epoch": 1.0435345564703296, "percentage": 20.87, "elapsed_time": "0:55:13", "remaining_time": "3:29:24", "throughput": 8689.5, "total_tokens": 28796768} +{"current_steps": 42720, "total_steps": 204665, "loss": 0.093, "lr": 1.9288427820648586e-06, "epoch": 1.0436567073021767, "percentage": 20.87, "elapsed_time": "0:55:14", "remaining_time": "3:29:24", "throughput": 8689.62, "total_tokens": 28800224} +{"current_steps": 42725, "total_steps": 204665, "loss": 0.0736, "lr": 1.9288111855424266e-06, "epoch": 1.043778858134024, "percentage": 20.88, "elapsed_time": "0:55:14", "remaining_time": "3:29:23", "throughput": 8689.81, "total_tokens": 28803936} +{"current_steps": 42730, "total_steps": 204665, "loss": 0.0022, "lr": 1.92877958226543e-06, "epoch": 1.0439010089658711, "percentage": 20.88, "elapsed_time": "0:55:15", "remaining_time": "3:29:23", "throughput": 8689.91, "total_tokens": 28807264} +{"current_steps": 42735, "total_steps": 204665, "loss": 0.1002, "lr": 1.9287479722340985e-06, "epoch": 1.0440231597977183, "percentage": 20.88, "elapsed_time": "0:55:15", "remaining_time": "3:29:22", "throughput": 8690.01, "total_tokens": 28810592} +{"current_steps": 42740, "total_steps": 204665, "loss": 0.1476, "lr": 1.928716355448662e-06, "epoch": 1.0441453106295653, "percentage": 20.88, "elapsed_time": "0:55:15", "remaining_time": "3:29:21", "throughput": 8690.07, "total_tokens": 28813792} +{"current_steps": 42745, "total_steps": 204665, "loss": 0.0419, "lr": 1.92868473190935e-06, "epoch": 1.0442674614614125, "percentage": 20.89, "elapsed_time": "0:55:16", "remaining_time": "3:29:21", "throughput": 8690.27, "total_tokens": 28817568} +{"current_steps": 42750, "total_steps": 204665, "loss": 0.0304, "lr": 1.9286531016163934e-06, "epoch": 1.0443896122932597, "percentage": 20.89, "elapsed_time": "0:55:16", "remaining_time": "3:29:20", "throughput": 8690.44, "total_tokens": 28821216} +{"current_steps": 42755, "total_steps": 204665, "loss": 0.1421, "lr": 1.928621464570021e-06, "epoch": 1.0445117631251069, "percentage": 20.89, "elapsed_time": "0:55:16", "remaining_time": "3:29:20", "throughput": 8690.48, "total_tokens": 28824352} +{"current_steps": 42760, "total_steps": 204665, "loss": 0.0645, "lr": 1.9285898207704637e-06, "epoch": 1.044633913956954, "percentage": 20.89, "elapsed_time": "0:55:17", "remaining_time": "3:29:19", "throughput": 8690.56, "total_tokens": 28827616} +{"current_steps": 42765, "total_steps": 204665, "loss": 0.113, "lr": 1.928558170217952e-06, "epoch": 1.0447560647888012, "percentage": 20.9, "elapsed_time": "0:55:17", "remaining_time": "3:29:19", "throughput": 8690.66, "total_tokens": 28830944} +{"current_steps": 42770, "total_steps": 204665, "loss": 0.1068, "lr": 1.9285265129127147e-06, "epoch": 1.0448782156206484, "percentage": 20.9, "elapsed_time": "0:55:17", "remaining_time": "3:29:18", "throughput": 8690.76, "total_tokens": 28834336} +{"current_steps": 42775, "total_steps": 204665, "loss": 0.2002, "lr": 1.9284948488549834e-06, "epoch": 1.0450003664524956, "percentage": 20.9, "elapsed_time": "0:55:18", "remaining_time": "3:29:18", "throughput": 8690.9, "total_tokens": 28837856} +{"current_steps": 42780, "total_steps": 204665, "loss": 0.0059, "lr": 1.928463178044988e-06, "epoch": 1.0451225172843428, "percentage": 20.9, "elapsed_time": "0:55:18", "remaining_time": "3:29:17", "throughput": 8690.86, "total_tokens": 28840672} +{"current_steps": 42785, "total_steps": 204665, "loss": 0.1131, "lr": 1.9284315004829582e-06, "epoch": 1.0452446681161898, "percentage": 20.9, "elapsed_time": "0:55:18", "remaining_time": "3:29:17", "throughput": 8690.99, "total_tokens": 28844128} +{"current_steps": 42790, "total_steps": 204665, "loss": 0.1306, "lr": 1.9283998161691247e-06, "epoch": 1.045366818948037, "percentage": 20.91, "elapsed_time": "0:55:19", "remaining_time": "3:29:16", "throughput": 8691.22, "total_tokens": 28848032} +{"current_steps": 42795, "total_steps": 204665, "loss": 0.1494, "lr": 1.9283681251037187e-06, "epoch": 1.0454889697798841, "percentage": 20.91, "elapsed_time": "0:55:19", "remaining_time": "3:29:16", "throughput": 8691.32, "total_tokens": 28851424} +{"current_steps": 42800, "total_steps": 204665, "loss": 0.0593, "lr": 1.928336427286969e-06, "epoch": 1.0456111206117313, "percentage": 20.91, "elapsed_time": "0:55:19", "remaining_time": "3:29:15", "throughput": 8691.35, "total_tokens": 28854496} +{"current_steps": 42805, "total_steps": 204665, "loss": 0.1006, "lr": 1.928304722719108e-06, "epoch": 1.0457332714435785, "percentage": 20.91, "elapsed_time": "0:55:20", "remaining_time": "3:29:15", "throughput": 8691.45, "total_tokens": 28857888} +{"current_steps": 42810, "total_steps": 204665, "loss": 0.0862, "lr": 1.9282730114003652e-06, "epoch": 1.0458554222754257, "percentage": 20.92, "elapsed_time": "0:55:20", "remaining_time": "3:29:14", "throughput": 8691.56, "total_tokens": 28861280} +{"current_steps": 42815, "total_steps": 204665, "loss": 0.0299, "lr": 1.928241293330971e-06, "epoch": 1.045977573107273, "percentage": 20.92, "elapsed_time": "0:55:20", "remaining_time": "3:29:13", "throughput": 8691.65, "total_tokens": 28864608} +{"current_steps": 42820, "total_steps": 204665, "loss": 0.0559, "lr": 1.928209568511157e-06, "epoch": 1.04609972393912, "percentage": 20.92, "elapsed_time": "0:55:21", "remaining_time": "3:29:13", "throughput": 8691.84, "total_tokens": 28868320} +{"current_steps": 42825, "total_steps": 204665, "loss": 0.0955, "lr": 1.928177836941153e-06, "epoch": 1.0462218747709673, "percentage": 20.92, "elapsed_time": "0:55:21", "remaining_time": "3:29:12", "throughput": 8691.88, "total_tokens": 28871456} +{"current_steps": 42830, "total_steps": 204665, "loss": 0.0599, "lr": 1.92814609862119e-06, "epoch": 1.0463440256028143, "percentage": 20.93, "elapsed_time": "0:55:22", "remaining_time": "3:29:12", "throughput": 8691.92, "total_tokens": 28874592} +{"current_steps": 42835, "total_steps": 204665, "loss": 0.0608, "lr": 1.928114353551499e-06, "epoch": 1.0464661764346614, "percentage": 20.93, "elapsed_time": "0:55:22", "remaining_time": "3:29:11", "throughput": 8692.08, "total_tokens": 28878176} +{"current_steps": 42840, "total_steps": 204665, "loss": 0.0408, "lr": 1.928082601732311e-06, "epoch": 1.0465883272665086, "percentage": 20.93, "elapsed_time": "0:55:22", "remaining_time": "3:29:11", "throughput": 8692.16, "total_tokens": 28881504} +{"current_steps": 42845, "total_steps": 204665, "loss": 0.0309, "lr": 1.9280508431638567e-06, "epoch": 1.0467104780983558, "percentage": 20.93, "elapsed_time": "0:55:23", "remaining_time": "3:29:10", "throughput": 8692.35, "total_tokens": 28885216} +{"current_steps": 42850, "total_steps": 204665, "loss": 0.1235, "lr": 1.928019077846367e-06, "epoch": 1.046832628930203, "percentage": 20.94, "elapsed_time": "0:55:23", "remaining_time": "3:29:10", "throughput": 8692.44, "total_tokens": 28888544} +{"current_steps": 42855, "total_steps": 204665, "loss": 0.1238, "lr": 1.927987305780073e-06, "epoch": 1.0469547797620502, "percentage": 20.94, "elapsed_time": "0:55:23", "remaining_time": "3:29:09", "throughput": 8692.51, "total_tokens": 28891744} +{"current_steps": 42860, "total_steps": 204665, "loss": 0.1013, "lr": 1.9279555269652053e-06, "epoch": 1.0470769305938974, "percentage": 20.94, "elapsed_time": "0:55:24", "remaining_time": "3:29:09", "throughput": 8692.57, "total_tokens": 28894944} +{"current_steps": 42865, "total_steps": 204665, "loss": 0.0614, "lr": 1.927923741401996e-06, "epoch": 1.0471990814257446, "percentage": 20.94, "elapsed_time": "0:55:24", "remaining_time": "3:29:08", "throughput": 8692.58, "total_tokens": 28897952} +{"current_steps": 42870, "total_steps": 204665, "loss": 0.0939, "lr": 1.9278919490906754e-06, "epoch": 1.0473212322575918, "percentage": 20.95, "elapsed_time": "0:55:24", "remaining_time": "3:29:08", "throughput": 8692.68, "total_tokens": 28901344} +{"current_steps": 42875, "total_steps": 204665, "loss": 0.0972, "lr": 1.9278601500314748e-06, "epoch": 1.0474433830894387, "percentage": 20.95, "elapsed_time": "0:55:25", "remaining_time": "3:29:07", "throughput": 8692.71, "total_tokens": 28904416} +{"current_steps": 42880, "total_steps": 204665, "loss": 0.0583, "lr": 1.927828344224626e-06, "epoch": 1.047565533921286, "percentage": 20.95, "elapsed_time": "0:55:25", "remaining_time": "3:29:06", "throughput": 8692.84, "total_tokens": 28907872} +{"current_steps": 42885, "total_steps": 204665, "loss": 0.0871, "lr": 1.9277965316703595e-06, "epoch": 1.0476876847531331, "percentage": 20.95, "elapsed_time": "0:55:25", "remaining_time": "3:29:06", "throughput": 8692.85, "total_tokens": 28910880} +{"current_steps": 42890, "total_steps": 204665, "loss": 0.0423, "lr": 1.9277647123689077e-06, "epoch": 1.0478098355849803, "percentage": 20.96, "elapsed_time": "0:55:26", "remaining_time": "3:29:05", "throughput": 8693.03, "total_tokens": 28914592} +{"current_steps": 42895, "total_steps": 204665, "loss": 0.0861, "lr": 1.9277328863205006e-06, "epoch": 1.0479319864168275, "percentage": 20.96, "elapsed_time": "0:55:26", "remaining_time": "3:29:05", "throughput": 8693.24, "total_tokens": 28918368} +{"current_steps": 42900, "total_steps": 204665, "loss": 0.0989, "lr": 1.927701053525371e-06, "epoch": 1.0480541372486747, "percentage": 20.96, "elapsed_time": "0:55:26", "remaining_time": "3:29:04", "throughput": 8693.55, "total_tokens": 28922592} +{"current_steps": 42905, "total_steps": 204665, "loss": 0.0402, "lr": 1.92766921398375e-06, "epoch": 1.0481762880805219, "percentage": 20.96, "elapsed_time": "0:55:27", "remaining_time": "3:29:04", "throughput": 8693.68, "total_tokens": 28926048} +{"current_steps": 42910, "total_steps": 204665, "loss": 0.0911, "lr": 1.927637367695868e-06, "epoch": 1.048298438912369, "percentage": 20.97, "elapsed_time": "0:55:27", "remaining_time": "3:29:03", "throughput": 8693.83, "total_tokens": 28929632} +{"current_steps": 42915, "total_steps": 204665, "loss": 0.0014, "lr": 1.9276055146619582e-06, "epoch": 1.0484205897442163, "percentage": 20.97, "elapsed_time": "0:55:27", "remaining_time": "3:29:03", "throughput": 8693.95, "total_tokens": 28933088} +{"current_steps": 42920, "total_steps": 204665, "loss": 0.0398, "lr": 1.9275736548822516e-06, "epoch": 1.0485427405760632, "percentage": 20.97, "elapsed_time": "0:55:28", "remaining_time": "3:29:02", "throughput": 8694.03, "total_tokens": 28936352} +{"current_steps": 42925, "total_steps": 204665, "loss": 0.1345, "lr": 1.9275417883569796e-06, "epoch": 1.0486648914079104, "percentage": 20.97, "elapsed_time": "0:55:28", "remaining_time": "3:29:02", "throughput": 8694.12, "total_tokens": 28939680} +{"current_steps": 42930, "total_steps": 204665, "loss": 0.1114, "lr": 1.9275099150863747e-06, "epoch": 1.0487870422397576, "percentage": 20.98, "elapsed_time": "0:55:29", "remaining_time": "3:29:01", "throughput": 8694.36, "total_tokens": 28943648} +{"current_steps": 42935, "total_steps": 204665, "loss": 0.0751, "lr": 1.9274780350706678e-06, "epoch": 1.0489091930716048, "percentage": 20.98, "elapsed_time": "0:55:29", "remaining_time": "3:29:01", "throughput": 8694.44, "total_tokens": 28946912} +{"current_steps": 42940, "total_steps": 204665, "loss": 0.0015, "lr": 1.9274461483100916e-06, "epoch": 1.049031343903452, "percentage": 20.98, "elapsed_time": "0:55:29", "remaining_time": "3:29:00", "throughput": 8694.48, "total_tokens": 28950048} +{"current_steps": 42945, "total_steps": 204665, "loss": 0.0286, "lr": 1.927414254804877e-06, "epoch": 1.0491534947352992, "percentage": 20.98, "elapsed_time": "0:55:30", "remaining_time": "3:29:00", "throughput": 8694.6, "total_tokens": 28953504} +{"current_steps": 42950, "total_steps": 204665, "loss": 0.1131, "lr": 1.9273823545552573e-06, "epoch": 1.0492756455671464, "percentage": 20.99, "elapsed_time": "0:55:30", "remaining_time": "3:28:59", "throughput": 8694.67, "total_tokens": 28956704} +{"current_steps": 42955, "total_steps": 204665, "loss": 0.0012, "lr": 1.927350447561463e-06, "epoch": 1.0493977963989936, "percentage": 20.99, "elapsed_time": "0:55:30", "remaining_time": "3:28:59", "throughput": 8694.74, "total_tokens": 28959968} +{"current_steps": 42960, "total_steps": 204665, "loss": 0.0587, "lr": 1.927318533823727e-06, "epoch": 1.0495199472308407, "percentage": 20.99, "elapsed_time": "0:55:31", "remaining_time": "3:28:58", "throughput": 8694.94, "total_tokens": 28963744} +{"current_steps": 42965, "total_steps": 204665, "loss": 0.0692, "lr": 1.927286613342281e-06, "epoch": 1.0496420980626877, "percentage": 20.99, "elapsed_time": "0:55:31", "remaining_time": "3:28:58", "throughput": 8695.06, "total_tokens": 28967200} +{"current_steps": 42970, "total_steps": 204665, "loss": 0.0739, "lr": 1.9272546861173576e-06, "epoch": 1.049764248894535, "percentage": 21.0, "elapsed_time": "0:55:31", "remaining_time": "3:28:57", "throughput": 8695.28, "total_tokens": 28971040} +{"current_steps": 42975, "total_steps": 204665, "loss": 0.1976, "lr": 1.9272227521491887e-06, "epoch": 1.049886399726382, "percentage": 21.0, "elapsed_time": "0:55:32", "remaining_time": "3:28:56", "throughput": 8695.34, "total_tokens": 28974240} +{"current_steps": 42980, "total_steps": 204665, "loss": 0.1454, "lr": 1.927190811438007e-06, "epoch": 1.0500085505582293, "percentage": 21.0, "elapsed_time": "0:55:32", "remaining_time": "3:28:56", "throughput": 8695.43, "total_tokens": 28977568} +{"current_steps": 42985, "total_steps": 204665, "loss": 0.061, "lr": 1.9271588639840434e-06, "epoch": 1.0501307013900765, "percentage": 21.0, "elapsed_time": "0:55:32", "remaining_time": "3:28:55", "throughput": 8695.6, "total_tokens": 28981216} +{"current_steps": 42990, "total_steps": 204665, "loss": 0.0465, "lr": 1.9271269097875317e-06, "epoch": 1.0502528522219237, "percentage": 21.01, "elapsed_time": "0:55:33", "remaining_time": "3:28:55", "throughput": 8695.72, "total_tokens": 28984672} +{"current_steps": 42995, "total_steps": 204665, "loss": 0.0031, "lr": 1.9270949488487038e-06, "epoch": 1.0503750030537709, "percentage": 21.01, "elapsed_time": "0:55:33", "remaining_time": "3:28:54", "throughput": 8695.92, "total_tokens": 28988384} +{"current_steps": 43000, "total_steps": 204665, "loss": 0.0765, "lr": 1.9270629811677917e-06, "epoch": 1.050497153885618, "percentage": 21.01, "elapsed_time": "0:55:33", "remaining_time": "3:28:54", "throughput": 8695.99, "total_tokens": 28991648} +{"current_steps": 43005, "total_steps": 204665, "loss": 0.0022, "lr": 1.927031006745029e-06, "epoch": 1.0506193047174652, "percentage": 21.01, "elapsed_time": "0:55:34", "remaining_time": "3:28:53", "throughput": 8696.11, "total_tokens": 28995104} +{"current_steps": 43010, "total_steps": 204665, "loss": 0.0963, "lr": 1.9269990255806467e-06, "epoch": 1.0507414555493122, "percentage": 21.01, "elapsed_time": "0:55:34", "remaining_time": "3:28:53", "throughput": 8696.22, "total_tokens": 28998496} +{"current_steps": 43015, "total_steps": 204665, "loss": 0.0566, "lr": 1.9269670376748783e-06, "epoch": 1.0508636063811594, "percentage": 21.02, "elapsed_time": "0:55:34", "remaining_time": "3:28:52", "throughput": 8696.42, "total_tokens": 29002272} +{"current_steps": 43020, "total_steps": 204665, "loss": 0.0036, "lr": 1.9269350430279566e-06, "epoch": 1.0509857572130066, "percentage": 21.02, "elapsed_time": "0:55:35", "remaining_time": "3:28:52", "throughput": 8696.56, "total_tokens": 29005792} +{"current_steps": 43025, "total_steps": 204665, "loss": 0.0948, "lr": 1.926903041640114e-06, "epoch": 1.0511079080448538, "percentage": 21.02, "elapsed_time": "0:55:35", "remaining_time": "3:28:51", "throughput": 8696.63, "total_tokens": 29009056} +{"current_steps": 43030, "total_steps": 204665, "loss": 0.0552, "lr": 1.9268710335115825e-06, "epoch": 1.051230058876701, "percentage": 21.02, "elapsed_time": "0:55:36", "remaining_time": "3:28:51", "throughput": 8696.78, "total_tokens": 29012640} +{"current_steps": 43035, "total_steps": 204665, "loss": 0.1364, "lr": 1.926839018642596e-06, "epoch": 1.0513522097085481, "percentage": 21.03, "elapsed_time": "0:55:36", "remaining_time": "3:28:50", "throughput": 8696.96, "total_tokens": 29016352} +{"current_steps": 43040, "total_steps": 204665, "loss": 0.1925, "lr": 1.926806997033387e-06, "epoch": 1.0514743605403953, "percentage": 21.03, "elapsed_time": "0:55:36", "remaining_time": "3:28:50", "throughput": 8697.24, "total_tokens": 29020512} +{"current_steps": 43045, "total_steps": 204665, "loss": 0.0892, "lr": 1.926774968684188e-06, "epoch": 1.0515965113722425, "percentage": 21.03, "elapsed_time": "0:55:37", "remaining_time": "3:28:49", "throughput": 8697.35, "total_tokens": 29023904} +{"current_steps": 43050, "total_steps": 204665, "loss": 0.1294, "lr": 1.926742933595232e-06, "epoch": 1.0517186622040897, "percentage": 21.03, "elapsed_time": "0:55:37", "remaining_time": "3:28:49", "throughput": 8697.39, "total_tokens": 29027040} +{"current_steps": 43055, "total_steps": 204665, "loss": 0.0738, "lr": 1.9267108917667528e-06, "epoch": 1.0518408130359367, "percentage": 21.04, "elapsed_time": "0:55:37", "remaining_time": "3:28:48", "throughput": 8697.41, "total_tokens": 29030048} +{"current_steps": 43060, "total_steps": 204665, "loss": 0.0333, "lr": 1.926678843198982e-06, "epoch": 1.0519629638677839, "percentage": 21.04, "elapsed_time": "0:55:38", "remaining_time": "3:28:48", "throughput": 8697.55, "total_tokens": 29033568} +{"current_steps": 43065, "total_steps": 204665, "loss": 0.1935, "lr": 1.926646787892154e-06, "epoch": 1.052085114699631, "percentage": 21.04, "elapsed_time": "0:55:38", "remaining_time": "3:28:47", "throughput": 8697.61, "total_tokens": 29036832} +{"current_steps": 43070, "total_steps": 204665, "loss": 0.0139, "lr": 1.926614725846501e-06, "epoch": 1.0522072655314783, "percentage": 21.04, "elapsed_time": "0:55:38", "remaining_time": "3:28:47", "throughput": 8697.81, "total_tokens": 29040608} +{"current_steps": 43075, "total_steps": 204665, "loss": 0.074, "lr": 1.9265826570622565e-06, "epoch": 1.0523294163633254, "percentage": 21.05, "elapsed_time": "0:55:39", "remaining_time": "3:28:46", "throughput": 8697.89, "total_tokens": 29043872} +{"current_steps": 43080, "total_steps": 204665, "loss": 0.1008, "lr": 1.9265505815396533e-06, "epoch": 1.0524515671951726, "percentage": 21.05, "elapsed_time": "0:55:39", "remaining_time": "3:28:45", "throughput": 8697.94, "total_tokens": 29047008} +{"current_steps": 43085, "total_steps": 204665, "loss": 0.1009, "lr": 1.926518499278926e-06, "epoch": 1.0525737180270198, "percentage": 21.05, "elapsed_time": "0:55:39", "remaining_time": "3:28:45", "throughput": 8697.93, "total_tokens": 29049888} +{"current_steps": 43090, "total_steps": 204665, "loss": 0.0497, "lr": 1.9264864102803062e-06, "epoch": 1.052695868858867, "percentage": 21.05, "elapsed_time": "0:55:40", "remaining_time": "3:28:44", "throughput": 8697.99, "total_tokens": 29053088} +{"current_steps": 43095, "total_steps": 204665, "loss": 0.0636, "lr": 1.9264543145440283e-06, "epoch": 1.0528180196907142, "percentage": 21.06, "elapsed_time": "0:55:40", "remaining_time": "3:28:44", "throughput": 8698.13, "total_tokens": 29056608} +{"current_steps": 43100, "total_steps": 204665, "loss": 0.1812, "lr": 1.9264222120703253e-06, "epoch": 1.0529401705225612, "percentage": 21.06, "elapsed_time": "0:55:40", "remaining_time": "3:28:43", "throughput": 8698.18, "total_tokens": 29059744} +{"current_steps": 43105, "total_steps": 204665, "loss": 0.1036, "lr": 1.9263901028594307e-06, "epoch": 1.0530623213544084, "percentage": 21.06, "elapsed_time": "0:55:41", "remaining_time": "3:28:43", "throughput": 8698.41, "total_tokens": 29063648} +{"current_steps": 43110, "total_steps": 204665, "loss": 0.1083, "lr": 1.9263579869115783e-06, "epoch": 1.0531844721862555, "percentage": 21.06, "elapsed_time": "0:55:41", "remaining_time": "3:28:42", "throughput": 8698.44, "total_tokens": 29066720} +{"current_steps": 43115, "total_steps": 204665, "loss": 0.0401, "lr": 1.9263258642270018e-06, "epoch": 1.0533066230181027, "percentage": 21.07, "elapsed_time": "0:55:41", "remaining_time": "3:28:42", "throughput": 8698.54, "total_tokens": 29070048} +{"current_steps": 43120, "total_steps": 204665, "loss": 0.1869, "lr": 1.926293734805934e-06, "epoch": 1.05342877384995, "percentage": 21.07, "elapsed_time": "0:55:42", "remaining_time": "3:28:41", "throughput": 8698.54, "total_tokens": 29072992} +{"current_steps": 43125, "total_steps": 204665, "loss": 0.0173, "lr": 1.926261598648609e-06, "epoch": 1.0535509246817971, "percentage": 21.07, "elapsed_time": "0:55:42", "remaining_time": "3:28:41", "throughput": 8698.72, "total_tokens": 29076704} +{"current_steps": 43130, "total_steps": 204665, "loss": 0.047, "lr": 1.926229455755261e-06, "epoch": 1.0536730755136443, "percentage": 21.07, "elapsed_time": "0:55:42", "remaining_time": "3:28:40", "throughput": 8698.87, "total_tokens": 29080288} +{"current_steps": 43135, "total_steps": 204665, "loss": 0.1349, "lr": 1.926197306126123e-06, "epoch": 1.0537952263454915, "percentage": 21.08, "elapsed_time": "0:55:43", "remaining_time": "3:28:39", "throughput": 8698.87, "total_tokens": 29083168} +{"current_steps": 43140, "total_steps": 204665, "loss": 0.0556, "lr": 1.926165149761429e-06, "epoch": 1.0539173771773385, "percentage": 21.08, "elapsed_time": "0:55:43", "remaining_time": "3:28:39", "throughput": 8698.96, "total_tokens": 29086496} +{"current_steps": 43145, "total_steps": 204665, "loss": 0.1828, "lr": 1.9261329866614125e-06, "epoch": 1.0540395280091857, "percentage": 21.08, "elapsed_time": "0:55:44", "remaining_time": "3:28:38", "throughput": 8699.0, "total_tokens": 29089568} +{"current_steps": 43150, "total_steps": 204665, "loss": 0.0622, "lr": 1.9261008168263082e-06, "epoch": 1.0541616788410328, "percentage": 21.08, "elapsed_time": "0:55:44", "remaining_time": "3:28:38", "throughput": 8699.16, "total_tokens": 29093152} +{"current_steps": 43155, "total_steps": 204665, "loss": 0.0211, "lr": 1.92606864025635e-06, "epoch": 1.05428382967288, "percentage": 21.09, "elapsed_time": "0:55:44", "remaining_time": "3:28:37", "throughput": 8699.45, "total_tokens": 29097312} +{"current_steps": 43160, "total_steps": 204665, "loss": 0.1547, "lr": 1.9260364569517715e-06, "epoch": 1.0544059805047272, "percentage": 21.09, "elapsed_time": "0:55:45", "remaining_time": "3:28:37", "throughput": 8699.53, "total_tokens": 29100576} +{"current_steps": 43165, "total_steps": 204665, "loss": 0.1126, "lr": 1.926004266912806e-06, "epoch": 1.0545281313365744, "percentage": 21.09, "elapsed_time": "0:55:45", "remaining_time": "3:28:36", "throughput": 8699.65, "total_tokens": 29104032} +{"current_steps": 43170, "total_steps": 204665, "loss": 0.0023, "lr": 1.9259720701396893e-06, "epoch": 1.0546502821684216, "percentage": 21.09, "elapsed_time": "0:55:45", "remaining_time": "3:28:36", "throughput": 8699.82, "total_tokens": 29107680} +{"current_steps": 43175, "total_steps": 204665, "loss": 0.0037, "lr": 1.9259398666326545e-06, "epoch": 1.0547724330002688, "percentage": 21.1, "elapsed_time": "0:55:46", "remaining_time": "3:28:35", "throughput": 8699.9, "total_tokens": 29110944} +{"current_steps": 43180, "total_steps": 204665, "loss": 0.0061, "lr": 1.9259076563919356e-06, "epoch": 1.054894583832116, "percentage": 21.1, "elapsed_time": "0:55:46", "remaining_time": "3:28:35", "throughput": 8699.91, "total_tokens": 29113952} +{"current_steps": 43185, "total_steps": 204665, "loss": 0.0589, "lr": 1.9258754394177672e-06, "epoch": 1.0550167346639632, "percentage": 21.1, "elapsed_time": "0:55:46", "remaining_time": "3:28:34", "throughput": 8699.99, "total_tokens": 29117216} +{"current_steps": 43190, "total_steps": 204665, "loss": 0.0329, "lr": 1.925843215710384e-06, "epoch": 1.0551388854958101, "percentage": 21.1, "elapsed_time": "0:55:47", "remaining_time": "3:28:34", "throughput": 8700.17, "total_tokens": 29120864} +{"current_steps": 43195, "total_steps": 204665, "loss": 0.0941, "lr": 1.92581098527002e-06, "epoch": 1.0552610363276573, "percentage": 21.11, "elapsed_time": "0:55:47", "remaining_time": "3:28:33", "throughput": 8700.26, "total_tokens": 29124192} +{"current_steps": 43200, "total_steps": 204665, "loss": 0.0453, "lr": 1.925778748096909e-06, "epoch": 1.0553831871595045, "percentage": 21.11, "elapsed_time": "0:55:47", "remaining_time": "3:28:32", "throughput": 8700.27, "total_tokens": 29127136} +{"current_steps": 43205, "total_steps": 204665, "loss": 0.0303, "lr": 1.925746504191286e-06, "epoch": 1.0555053379913517, "percentage": 21.11, "elapsed_time": "0:55:48", "remaining_time": "3:28:32", "throughput": 8700.32, "total_tokens": 29130272} +{"current_steps": 43210, "total_steps": 204665, "loss": 0.0251, "lr": 1.9257142535533857e-06, "epoch": 1.055627488823199, "percentage": 21.11, "elapsed_time": "0:55:48", "remaining_time": "3:28:31", "throughput": 8700.44, "total_tokens": 29133728} +{"current_steps": 43215, "total_steps": 204665, "loss": 0.1662, "lr": 1.925681996183442e-06, "epoch": 1.055749639655046, "percentage": 21.11, "elapsed_time": "0:55:48", "remaining_time": "3:28:31", "throughput": 8700.51, "total_tokens": 29136992} +{"current_steps": 43220, "total_steps": 204665, "loss": 0.2896, "lr": 1.92564973208169e-06, "epoch": 1.0558717904868933, "percentage": 21.12, "elapsed_time": "0:55:49", "remaining_time": "3:28:30", "throughput": 8700.53, "total_tokens": 29140000} +{"current_steps": 43225, "total_steps": 204665, "loss": 0.0322, "lr": 1.9256174612483644e-06, "epoch": 1.0559939413187405, "percentage": 21.12, "elapsed_time": "0:55:49", "remaining_time": "3:28:30", "throughput": 8700.62, "total_tokens": 29143328} +{"current_steps": 43230, "total_steps": 204665, "loss": 0.1653, "lr": 1.925585183683699e-06, "epoch": 1.0561160921505874, "percentage": 21.12, "elapsed_time": "0:55:49", "remaining_time": "3:28:29", "throughput": 8700.64, "total_tokens": 29146336} +{"current_steps": 43235, "total_steps": 204665, "loss": 0.093, "lr": 1.92555289938793e-06, "epoch": 1.0562382429824346, "percentage": 21.12, "elapsed_time": "0:55:50", "remaining_time": "3:28:29", "throughput": 8700.73, "total_tokens": 29149664} +{"current_steps": 43240, "total_steps": 204665, "loss": 0.0861, "lr": 1.925520608361291e-06, "epoch": 1.0563603938142818, "percentage": 21.13, "elapsed_time": "0:55:50", "remaining_time": "3:28:28", "throughput": 8700.79, "total_tokens": 29152864} +{"current_steps": 43245, "total_steps": 204665, "loss": 0.1102, "lr": 1.9254883106040173e-06, "epoch": 1.056482544646129, "percentage": 21.13, "elapsed_time": "0:55:50", "remaining_time": "3:28:28", "throughput": 8700.87, "total_tokens": 29156128} +{"current_steps": 43250, "total_steps": 204665, "loss": 0.1114, "lr": 1.9254560061163437e-06, "epoch": 1.0566046954779762, "percentage": 21.13, "elapsed_time": "0:55:51", "remaining_time": "3:28:27", "throughput": 8700.9, "total_tokens": 29159200} +{"current_steps": 43255, "total_steps": 204665, "loss": 0.0591, "lr": 1.9254236948985046e-06, "epoch": 1.0567268463098234, "percentage": 21.13, "elapsed_time": "0:55:51", "remaining_time": "3:28:26", "throughput": 8700.95, "total_tokens": 29162336} +{"current_steps": 43260, "total_steps": 204665, "loss": 0.1124, "lr": 1.9253913769507363e-06, "epoch": 1.0568489971416706, "percentage": 21.14, "elapsed_time": "0:55:51", "remaining_time": "3:28:26", "throughput": 8700.93, "total_tokens": 29165216} +{"current_steps": 43265, "total_steps": 204665, "loss": 0.1745, "lr": 1.9253590522732727e-06, "epoch": 1.0569711479735178, "percentage": 21.14, "elapsed_time": "0:55:52", "remaining_time": "3:28:25", "throughput": 8700.97, "total_tokens": 29168352} +{"current_steps": 43270, "total_steps": 204665, "loss": 0.1335, "lr": 1.9253267208663486e-06, "epoch": 1.057093298805365, "percentage": 21.14, "elapsed_time": "0:55:52", "remaining_time": "3:28:25", "throughput": 8701.08, "total_tokens": 29171744} +{"current_steps": 43275, "total_steps": 204665, "loss": 0.0037, "lr": 1.9252943827302006e-06, "epoch": 1.057215449637212, "percentage": 21.14, "elapsed_time": "0:55:52", "remaining_time": "3:28:24", "throughput": 8701.1, "total_tokens": 29174752} +{"current_steps": 43280, "total_steps": 204665, "loss": 0.1023, "lr": 1.9252620378650627e-06, "epoch": 1.0573376004690591, "percentage": 21.15, "elapsed_time": "0:55:53", "remaining_time": "3:28:24", "throughput": 8701.16, "total_tokens": 29177952} +{"current_steps": 43285, "total_steps": 204665, "loss": 0.0288, "lr": 1.92522968627117e-06, "epoch": 1.0574597513009063, "percentage": 21.15, "elapsed_time": "0:55:53", "remaining_time": "3:28:23", "throughput": 8701.2, "total_tokens": 29181024} +{"current_steps": 43290, "total_steps": 204665, "loss": 0.1486, "lr": 1.9251973279487586e-06, "epoch": 1.0575819021327535, "percentage": 21.15, "elapsed_time": "0:55:54", "remaining_time": "3:28:23", "throughput": 8701.22, "total_tokens": 29184096} +{"current_steps": 43295, "total_steps": 204665, "loss": 0.094, "lr": 1.9251649628980633e-06, "epoch": 1.0577040529646007, "percentage": 21.15, "elapsed_time": "0:55:54", "remaining_time": "3:28:22", "throughput": 8701.4, "total_tokens": 29187808} +{"current_steps": 43300, "total_steps": 204665, "loss": 0.0648, "lr": 1.925132591119319e-06, "epoch": 1.0578262037964479, "percentage": 21.16, "elapsed_time": "0:55:54", "remaining_time": "3:28:21", "throughput": 8701.47, "total_tokens": 29191072} +{"current_steps": 43305, "total_steps": 204665, "loss": 0.1138, "lr": 1.9251002126127626e-06, "epoch": 1.057948354628295, "percentage": 21.16, "elapsed_time": "0:55:55", "remaining_time": "3:28:21", "throughput": 8701.69, "total_tokens": 29194912} +{"current_steps": 43310, "total_steps": 204665, "loss": 0.0849, "lr": 1.9250678273786283e-06, "epoch": 1.0580705054601423, "percentage": 21.16, "elapsed_time": "0:55:55", "remaining_time": "3:28:20", "throughput": 8701.73, "total_tokens": 29198048} +{"current_steps": 43315, "total_steps": 204665, "loss": 0.0362, "lr": 1.9250354354171515e-06, "epoch": 1.0581926562919894, "percentage": 21.16, "elapsed_time": "0:55:55", "remaining_time": "3:28:20", "throughput": 8701.77, "total_tokens": 29201120} +{"current_steps": 43320, "total_steps": 204665, "loss": 0.018, "lr": 1.9250030367285684e-06, "epoch": 1.0583148071238364, "percentage": 21.17, "elapsed_time": "0:55:56", "remaining_time": "3:28:19", "throughput": 8701.79, "total_tokens": 29204192} +{"current_steps": 43325, "total_steps": 204665, "loss": 0.0911, "lr": 1.9249706313131147e-06, "epoch": 1.0584369579556836, "percentage": 21.17, "elapsed_time": "0:55:56", "remaining_time": "3:28:19", "throughput": 8701.86, "total_tokens": 29207392} +{"current_steps": 43330, "total_steps": 204665, "loss": 0.0367, "lr": 1.924938219171026e-06, "epoch": 1.0585591087875308, "percentage": 21.17, "elapsed_time": "0:55:56", "remaining_time": "3:28:18", "throughput": 8701.99, "total_tokens": 29210912} +{"current_steps": 43335, "total_steps": 204665, "loss": 0.0325, "lr": 1.9249058003025367e-06, "epoch": 1.058681259619378, "percentage": 21.17, "elapsed_time": "0:55:57", "remaining_time": "3:28:18", "throughput": 8701.93, "total_tokens": 29213536} +{"current_steps": 43340, "total_steps": 204665, "loss": 0.0867, "lr": 1.9248733747078847e-06, "epoch": 1.0588034104512252, "percentage": 21.18, "elapsed_time": "0:55:57", "remaining_time": "3:28:17", "throughput": 8702.07, "total_tokens": 29217056} +{"current_steps": 43345, "total_steps": 204665, "loss": 0.1057, "lr": 1.9248409423873044e-06, "epoch": 1.0589255612830724, "percentage": 21.18, "elapsed_time": "0:55:57", "remaining_time": "3:28:17", "throughput": 8702.16, "total_tokens": 29220384} +{"current_steps": 43350, "total_steps": 204665, "loss": 0.0897, "lr": 1.9248085033410318e-06, "epoch": 1.0590477121149195, "percentage": 21.18, "elapsed_time": "0:55:58", "remaining_time": "3:28:16", "throughput": 8702.23, "total_tokens": 29223648} +{"current_steps": 43355, "total_steps": 204665, "loss": 0.0721, "lr": 1.9247760575693036e-06, "epoch": 1.0591698629467667, "percentage": 21.18, "elapsed_time": "0:55:58", "remaining_time": "3:28:16", "throughput": 8702.42, "total_tokens": 29227360} +{"current_steps": 43360, "total_steps": 204665, "loss": 0.0285, "lr": 1.9247436050723545e-06, "epoch": 1.059292013778614, "percentage": 21.19, "elapsed_time": "0:55:58", "remaining_time": "3:28:15", "throughput": 8702.58, "total_tokens": 29230944} +{"current_steps": 43365, "total_steps": 204665, "loss": 0.1744, "lr": 1.9247111458504216e-06, "epoch": 1.059414164610461, "percentage": 21.19, "elapsed_time": "0:55:59", "remaining_time": "3:28:14", "throughput": 8702.63, "total_tokens": 29234080} +{"current_steps": 43370, "total_steps": 204665, "loss": 0.0446, "lr": 1.9246786799037403e-06, "epoch": 1.059536315442308, "percentage": 21.19, "elapsed_time": "0:55:59", "remaining_time": "3:28:14", "throughput": 8702.62, "total_tokens": 29237024} +{"current_steps": 43375, "total_steps": 204665, "loss": 0.0152, "lr": 1.924646207232547e-06, "epoch": 1.0596584662741553, "percentage": 21.19, "elapsed_time": "0:55:59", "remaining_time": "3:28:13", "throughput": 8702.72, "total_tokens": 29240352} +{"current_steps": 43380, "total_steps": 204665, "loss": 0.0404, "lr": 1.9246137278370783e-06, "epoch": 1.0597806171060025, "percentage": 21.2, "elapsed_time": "0:56:00", "remaining_time": "3:28:13", "throughput": 8702.8, "total_tokens": 29243616} +{"current_steps": 43385, "total_steps": 204665, "loss": 0.1136, "lr": 1.9245812417175692e-06, "epoch": 1.0599027679378497, "percentage": 21.2, "elapsed_time": "0:56:00", "remaining_time": "3:28:12", "throughput": 8702.97, "total_tokens": 29247328} +{"current_steps": 43390, "total_steps": 204665, "loss": 0.0794, "lr": 1.9245487488742568e-06, "epoch": 1.0600249187696968, "percentage": 21.2, "elapsed_time": "0:56:00", "remaining_time": "3:28:12", "throughput": 8702.96, "total_tokens": 29250208} +{"current_steps": 43395, "total_steps": 204665, "loss": 0.0574, "lr": 1.9245162493073776e-06, "epoch": 1.060147069601544, "percentage": 21.2, "elapsed_time": "0:56:01", "remaining_time": "3:28:11", "throughput": 8703.0, "total_tokens": 29253344} +{"current_steps": 43400, "total_steps": 204665, "loss": 0.0906, "lr": 1.924483743017167e-06, "epoch": 1.0602692204333912, "percentage": 21.21, "elapsed_time": "0:56:01", "remaining_time": "3:28:11", "throughput": 8703.14, "total_tokens": 29256864} +{"current_steps": 43405, "total_steps": 204665, "loss": 0.0496, "lr": 1.9244512300038623e-06, "epoch": 1.0603913712652384, "percentage": 21.21, "elapsed_time": "0:56:01", "remaining_time": "3:28:10", "throughput": 8703.21, "total_tokens": 29260128} +{"current_steps": 43410, "total_steps": 204665, "loss": 0.052, "lr": 1.9244187102676993e-06, "epoch": 1.0605135220970854, "percentage": 21.21, "elapsed_time": "0:56:02", "remaining_time": "3:28:10", "throughput": 8703.43, "total_tokens": 29263968} +{"current_steps": 43415, "total_steps": 204665, "loss": 0.0027, "lr": 1.9243861838089153e-06, "epoch": 1.0606356729289326, "percentage": 21.21, "elapsed_time": "0:56:02", "remaining_time": "3:28:09", "throughput": 8703.5, "total_tokens": 29267232} +{"current_steps": 43420, "total_steps": 204665, "loss": 0.0472, "lr": 1.924353650627746e-06, "epoch": 1.0607578237607798, "percentage": 21.22, "elapsed_time": "0:56:03", "remaining_time": "3:28:09", "throughput": 8703.58, "total_tokens": 29270496} +{"current_steps": 43425, "total_steps": 204665, "loss": 0.0737, "lr": 1.9243211107244284e-06, "epoch": 1.060879974592627, "percentage": 21.22, "elapsed_time": "0:56:03", "remaining_time": "3:28:08", "throughput": 8703.71, "total_tokens": 29273952} +{"current_steps": 43430, "total_steps": 204665, "loss": 0.2243, "lr": 1.924288564099199e-06, "epoch": 1.0610021254244741, "percentage": 21.22, "elapsed_time": "0:56:03", "remaining_time": "3:28:07", "throughput": 8703.79, "total_tokens": 29277280} +{"current_steps": 43435, "total_steps": 204665, "loss": 0.1931, "lr": 1.9242560107522947e-06, "epoch": 1.0611242762563213, "percentage": 21.22, "elapsed_time": "0:56:04", "remaining_time": "3:28:07", "throughput": 8703.81, "total_tokens": 29280288} +{"current_steps": 43440, "total_steps": 204665, "loss": 0.0778, "lr": 1.9242234506839523e-06, "epoch": 1.0612464270881685, "percentage": 21.22, "elapsed_time": "0:56:04", "remaining_time": "3:28:06", "throughput": 8703.87, "total_tokens": 29283488} +{"current_steps": 43445, "total_steps": 204665, "loss": 0.0773, "lr": 1.9241908838944077e-06, "epoch": 1.0613685779200157, "percentage": 21.23, "elapsed_time": "0:56:04", "remaining_time": "3:28:06", "throughput": 8704.05, "total_tokens": 29287200} +{"current_steps": 43450, "total_steps": 204665, "loss": 0.0145, "lr": 1.924158310383899e-06, "epoch": 1.061490728751863, "percentage": 21.23, "elapsed_time": "0:56:05", "remaining_time": "3:28:05", "throughput": 8704.1, "total_tokens": 29290336} +{"current_steps": 43455, "total_steps": 204665, "loss": 0.0795, "lr": 1.9241257301526623e-06, "epoch": 1.0616128795837099, "percentage": 21.23, "elapsed_time": "0:56:05", "remaining_time": "3:28:05", "throughput": 8704.14, "total_tokens": 29293408} +{"current_steps": 43460, "total_steps": 204665, "loss": 0.065, "lr": 1.9240931432009346e-06, "epoch": 1.061735030415557, "percentage": 21.23, "elapsed_time": "0:56:05", "remaining_time": "3:28:04", "throughput": 8704.29, "total_tokens": 29296992} +{"current_steps": 43465, "total_steps": 204665, "loss": 0.1063, "lr": 1.9240605495289533e-06, "epoch": 1.0618571812474042, "percentage": 21.24, "elapsed_time": "0:56:06", "remaining_time": "3:28:04", "throughput": 8704.39, "total_tokens": 29300320} +{"current_steps": 43470, "total_steps": 204665, "loss": 0.105, "lr": 1.924027949136955e-06, "epoch": 1.0619793320792514, "percentage": 21.24, "elapsed_time": "0:56:06", "remaining_time": "3:28:03", "throughput": 8704.45, "total_tokens": 29303520} +{"current_steps": 43475, "total_steps": 204665, "loss": 0.024, "lr": 1.9239953420251767e-06, "epoch": 1.0621014829110986, "percentage": 21.24, "elapsed_time": "0:56:06", "remaining_time": "3:28:03", "throughput": 8704.49, "total_tokens": 29306592} +{"current_steps": 43480, "total_steps": 204665, "loss": 0.1404, "lr": 1.9239627281938562e-06, "epoch": 1.0622236337429458, "percentage": 21.24, "elapsed_time": "0:56:07", "remaining_time": "3:28:02", "throughput": 8704.61, "total_tokens": 29310048} +{"current_steps": 43485, "total_steps": 204665, "loss": 0.0554, "lr": 1.92393010764323e-06, "epoch": 1.062345784574793, "percentage": 21.25, "elapsed_time": "0:56:07", "remaining_time": "3:28:01", "throughput": 8704.71, "total_tokens": 29313440} +{"current_steps": 43490, "total_steps": 204665, "loss": 0.0256, "lr": 1.9238974803735357e-06, "epoch": 1.0624679354066402, "percentage": 21.25, "elapsed_time": "0:56:07", "remaining_time": "3:28:01", "throughput": 8704.83, "total_tokens": 29316896} +{"current_steps": 43495, "total_steps": 204665, "loss": 0.102, "lr": 1.92386484638501e-06, "epoch": 1.0625900862384874, "percentage": 21.25, "elapsed_time": "0:56:08", "remaining_time": "3:28:00", "throughput": 8704.93, "total_tokens": 29320288} +{"current_steps": 43500, "total_steps": 204665, "loss": 0.0439, "lr": 1.923832205677891e-06, "epoch": 1.0627122370703344, "percentage": 21.25, "elapsed_time": "0:56:08", "remaining_time": "3:28:00", "throughput": 8704.94, "total_tokens": 29323296} +{"current_steps": 43505, "total_steps": 204665, "loss": 0.0404, "lr": 1.9237995582524154e-06, "epoch": 1.0628343879021815, "percentage": 21.26, "elapsed_time": "0:56:08", "remaining_time": "3:27:59", "throughput": 8704.97, "total_tokens": 29326304} +{"current_steps": 43510, "total_steps": 204665, "loss": 0.2442, "lr": 1.923766904108821e-06, "epoch": 1.0629565387340287, "percentage": 21.26, "elapsed_time": "0:56:09", "remaining_time": "3:27:59", "throughput": 8705.14, "total_tokens": 29330016} +{"current_steps": 43515, "total_steps": 204665, "loss": 0.1451, "lr": 1.9237342432473456e-06, "epoch": 1.063078689565876, "percentage": 21.26, "elapsed_time": "0:56:09", "remaining_time": "3:27:58", "throughput": 8705.27, "total_tokens": 29333536} +{"current_steps": 43520, "total_steps": 204665, "loss": 0.1617, "lr": 1.923701575668226e-06, "epoch": 1.0632008403977231, "percentage": 21.26, "elapsed_time": "0:56:09", "remaining_time": "3:27:58", "throughput": 8705.42, "total_tokens": 29337120} +{"current_steps": 43525, "total_steps": 204665, "loss": 0.0353, "lr": 1.9236689013717006e-06, "epoch": 1.0633229912295703, "percentage": 21.27, "elapsed_time": "0:56:10", "remaining_time": "3:27:57", "throughput": 8705.53, "total_tokens": 29340512} +{"current_steps": 43530, "total_steps": 204665, "loss": 0.0301, "lr": 1.9236362203580063e-06, "epoch": 1.0634451420614175, "percentage": 21.27, "elapsed_time": "0:56:10", "remaining_time": "3:27:57", "throughput": 8705.55, "total_tokens": 29343584} +{"current_steps": 43535, "total_steps": 204665, "loss": 0.0267, "lr": 1.9236035326273806e-06, "epoch": 1.0635672928932647, "percentage": 21.27, "elapsed_time": "0:56:11", "remaining_time": "3:27:56", "throughput": 8705.63, "total_tokens": 29346848} +{"current_steps": 43540, "total_steps": 204665, "loss": 0.1429, "lr": 1.923570838180062e-06, "epoch": 1.0636894437251119, "percentage": 21.27, "elapsed_time": "0:56:11", "remaining_time": "3:27:56", "throughput": 8705.78, "total_tokens": 29350432} +{"current_steps": 43545, "total_steps": 204665, "loss": 0.1278, "lr": 1.9235381370162872e-06, "epoch": 1.0638115945569588, "percentage": 21.28, "elapsed_time": "0:56:11", "remaining_time": "3:27:55", "throughput": 8705.85, "total_tokens": 29353696} +{"current_steps": 43550, "total_steps": 204665, "loss": 0.3489, "lr": 1.923505429136295e-06, "epoch": 1.063933745388806, "percentage": 21.28, "elapsed_time": "0:56:12", "remaining_time": "3:27:55", "throughput": 8705.94, "total_tokens": 29357024} +{"current_steps": 43555, "total_steps": 204665, "loss": 0.1869, "lr": 1.923472714540323e-06, "epoch": 1.0640558962206532, "percentage": 21.28, "elapsed_time": "0:56:12", "remaining_time": "3:27:54", "throughput": 8706.07, "total_tokens": 29360480} +{"current_steps": 43560, "total_steps": 204665, "loss": 0.1381, "lr": 1.9234399932286093e-06, "epoch": 1.0641780470525004, "percentage": 21.28, "elapsed_time": "0:56:12", "remaining_time": "3:27:54", "throughput": 8706.3, "total_tokens": 29364384} +{"current_steps": 43565, "total_steps": 204665, "loss": 0.1341, "lr": 1.9234072652013915e-06, "epoch": 1.0643001978843476, "percentage": 21.29, "elapsed_time": "0:56:13", "remaining_time": "3:27:53", "throughput": 8706.58, "total_tokens": 29368544} +{"current_steps": 43570, "total_steps": 204665, "loss": 0.1317, "lr": 1.9233745304589074e-06, "epoch": 1.0644223487161948, "percentage": 21.29, "elapsed_time": "0:56:13", "remaining_time": "3:27:53", "throughput": 8706.6, "total_tokens": 29371552} +{"current_steps": 43575, "total_steps": 204665, "loss": 0.0815, "lr": 1.9233417890013956e-06, "epoch": 1.064544499548042, "percentage": 21.29, "elapsed_time": "0:56:13", "remaining_time": "3:27:52", "throughput": 8706.72, "total_tokens": 29375008} +{"current_steps": 43580, "total_steps": 204665, "loss": 0.0384, "lr": 1.923309040829094e-06, "epoch": 1.0646666503798892, "percentage": 21.29, "elapsed_time": "0:56:14", "remaining_time": "3:27:52", "throughput": 8706.81, "total_tokens": 29378400} +{"current_steps": 43585, "total_steps": 204665, "loss": 0.0058, "lr": 1.9232762859422404e-06, "epoch": 1.0647888012117361, "percentage": 21.3, "elapsed_time": "0:56:14", "remaining_time": "3:27:51", "throughput": 8706.87, "total_tokens": 29381600} +{"current_steps": 43590, "total_steps": 204665, "loss": 0.0437, "lr": 1.9232435243410735e-06, "epoch": 1.0649109520435833, "percentage": 21.3, "elapsed_time": "0:56:14", "remaining_time": "3:27:50", "throughput": 8707.02, "total_tokens": 29385184} +{"current_steps": 43595, "total_steps": 204665, "loss": 0.0138, "lr": 1.9232107560258317e-06, "epoch": 1.0650331028754305, "percentage": 21.3, "elapsed_time": "0:56:15", "remaining_time": "3:27:50", "throughput": 8707.02, "total_tokens": 29388128} +{"current_steps": 43600, "total_steps": 204665, "loss": 0.1534, "lr": 1.9231779809967526e-06, "epoch": 1.0651552537072777, "percentage": 21.3, "elapsed_time": "0:56:15", "remaining_time": "3:27:49", "throughput": 8707.03, "total_tokens": 29391136} +{"current_steps": 43605, "total_steps": 204665, "loss": 0.0552, "lr": 1.9231451992540747e-06, "epoch": 1.065277404539125, "percentage": 21.31, "elapsed_time": "0:56:15", "remaining_time": "3:27:49", "throughput": 8707.07, "total_tokens": 29394272} +{"current_steps": 43610, "total_steps": 204665, "loss": 0.1975, "lr": 1.923112410798037e-06, "epoch": 1.065399555370972, "percentage": 21.31, "elapsed_time": "0:56:16", "remaining_time": "3:27:48", "throughput": 8707.3, "total_tokens": 29398176} +{"current_steps": 43615, "total_steps": 204665, "loss": 0.0785, "lr": 1.923079615628877e-06, "epoch": 1.0655217062028193, "percentage": 21.31, "elapsed_time": "0:56:16", "remaining_time": "3:27:48", "throughput": 8707.38, "total_tokens": 29401504} +{"current_steps": 43620, "total_steps": 204665, "loss": 0.0434, "lr": 1.9230468137468344e-06, "epoch": 1.0656438570346665, "percentage": 21.31, "elapsed_time": "0:56:16", "remaining_time": "3:27:47", "throughput": 8707.39, "total_tokens": 29404512} +{"current_steps": 43625, "total_steps": 204665, "loss": 0.1786, "lr": 1.923014005152147e-06, "epoch": 1.0657660078665137, "percentage": 21.32, "elapsed_time": "0:56:17", "remaining_time": "3:27:47", "throughput": 8707.43, "total_tokens": 29407584} +{"current_steps": 43630, "total_steps": 204665, "loss": 0.0271, "lr": 1.9229811898450533e-06, "epoch": 1.0658881586983608, "percentage": 21.32, "elapsed_time": "0:56:17", "remaining_time": "3:27:46", "throughput": 8707.53, "total_tokens": 29410976} +{"current_steps": 43635, "total_steps": 204665, "loss": 0.1013, "lr": 1.9229483678257924e-06, "epoch": 1.0660103095302078, "percentage": 21.32, "elapsed_time": "0:56:17", "remaining_time": "3:27:46", "throughput": 8707.65, "total_tokens": 29414432} +{"current_steps": 43640, "total_steps": 204665, "loss": 0.0404, "lr": 1.9229155390946025e-06, "epoch": 1.066132460362055, "percentage": 21.32, "elapsed_time": "0:56:18", "remaining_time": "3:27:45", "throughput": 8707.89, "total_tokens": 29418400} +{"current_steps": 43645, "total_steps": 204665, "loss": 0.1124, "lr": 1.9228827036517227e-06, "epoch": 1.0662546111939022, "percentage": 21.33, "elapsed_time": "0:56:18", "remaining_time": "3:27:45", "throughput": 8707.95, "total_tokens": 29421600} +{"current_steps": 43650, "total_steps": 204665, "loss": 0.1655, "lr": 1.9228498614973917e-06, "epoch": 1.0663767620257494, "percentage": 21.33, "elapsed_time": "0:56:19", "remaining_time": "3:27:44", "throughput": 8708.08, "total_tokens": 29425120} +{"current_steps": 43655, "total_steps": 204665, "loss": 0.0103, "lr": 1.922817012631848e-06, "epoch": 1.0664989128575966, "percentage": 21.33, "elapsed_time": "0:56:19", "remaining_time": "3:27:44", "throughput": 8708.21, "total_tokens": 29428640} +{"current_steps": 43660, "total_steps": 204665, "loss": 0.102, "lr": 1.922784157055331e-06, "epoch": 1.0666210636894438, "percentage": 21.33, "elapsed_time": "0:56:19", "remaining_time": "3:27:43", "throughput": 8708.31, "total_tokens": 29431968} +{"current_steps": 43665, "total_steps": 204665, "loss": 0.0826, "lr": 1.9227512947680795e-06, "epoch": 1.066743214521291, "percentage": 21.33, "elapsed_time": "0:56:20", "remaining_time": "3:27:42", "throughput": 8708.33, "total_tokens": 29435040} +{"current_steps": 43670, "total_steps": 204665, "loss": 0.147, "lr": 1.922718425770332e-06, "epoch": 1.0668653653531381, "percentage": 21.34, "elapsed_time": "0:56:20", "remaining_time": "3:27:42", "throughput": 8708.42, "total_tokens": 29438368} +{"current_steps": 43675, "total_steps": 204665, "loss": 0.019, "lr": 1.922685550062328e-06, "epoch": 1.066987516184985, "percentage": 21.34, "elapsed_time": "0:56:20", "remaining_time": "3:27:41", "throughput": 8708.49, "total_tokens": 29441632} +{"current_steps": 43680, "total_steps": 204665, "loss": 0.0625, "lr": 1.922652667644307e-06, "epoch": 1.0671096670168323, "percentage": 21.34, "elapsed_time": "0:56:21", "remaining_time": "3:27:41", "throughput": 8708.49, "total_tokens": 29444576} +{"current_steps": 43685, "total_steps": 204665, "loss": 0.0015, "lr": 1.922619778516507e-06, "epoch": 1.0672318178486795, "percentage": 21.34, "elapsed_time": "0:56:21", "remaining_time": "3:27:40", "throughput": 8708.6, "total_tokens": 29448032} +{"current_steps": 43690, "total_steps": 204665, "loss": 0.0739, "lr": 1.922586882679168e-06, "epoch": 1.0673539686805267, "percentage": 21.35, "elapsed_time": "0:56:21", "remaining_time": "3:27:40", "throughput": 8708.71, "total_tokens": 29451424} +{"current_steps": 43695, "total_steps": 204665, "loss": 0.0986, "lr": 1.9225539801325293e-06, "epoch": 1.0674761195123739, "percentage": 21.35, "elapsed_time": "0:56:22", "remaining_time": "3:27:39", "throughput": 8708.77, "total_tokens": 29454624} +{"current_steps": 43700, "total_steps": 204665, "loss": 0.0972, "lr": 1.92252107087683e-06, "epoch": 1.067598270344221, "percentage": 21.35, "elapsed_time": "0:56:22", "remaining_time": "3:27:39", "throughput": 8708.88, "total_tokens": 29458016} +{"current_steps": 43705, "total_steps": 204665, "loss": 0.0258, "lr": 1.922488154912309e-06, "epoch": 1.0677204211760682, "percentage": 21.35, "elapsed_time": "0:56:22", "remaining_time": "3:27:38", "throughput": 8709.04, "total_tokens": 29461664} +{"current_steps": 43710, "total_steps": 204665, "loss": 0.0009, "lr": 1.9224552322392064e-06, "epoch": 1.0678425720079154, "percentage": 21.36, "elapsed_time": "0:56:23", "remaining_time": "3:27:38", "throughput": 8709.13, "total_tokens": 29464992} +{"current_steps": 43715, "total_steps": 204665, "loss": 0.0303, "lr": 1.9224223028577613e-06, "epoch": 1.0679647228397626, "percentage": 21.36, "elapsed_time": "0:56:23", "remaining_time": "3:27:37", "throughput": 8709.2, "total_tokens": 29468192} +{"current_steps": 43720, "total_steps": 204665, "loss": 0.2321, "lr": 1.9223893667682125e-06, "epoch": 1.0680868736716098, "percentage": 21.36, "elapsed_time": "0:56:23", "remaining_time": "3:27:37", "throughput": 8709.43, "total_tokens": 29472160} +{"current_steps": 43725, "total_steps": 204665, "loss": 0.1665, "lr": 1.9223564239708e-06, "epoch": 1.0682090245034568, "percentage": 21.36, "elapsed_time": "0:56:24", "remaining_time": "3:27:36", "throughput": 8709.6, "total_tokens": 29475808} +{"current_steps": 43730, "total_steps": 204665, "loss": 0.0035, "lr": 1.9223234744657644e-06, "epoch": 1.068331175335304, "percentage": 21.37, "elapsed_time": "0:56:24", "remaining_time": "3:27:36", "throughput": 8709.87, "total_tokens": 29479904} +{"current_steps": 43735, "total_steps": 204665, "loss": 0.0922, "lr": 1.922290518253344e-06, "epoch": 1.0684533261671512, "percentage": 21.37, "elapsed_time": "0:56:24", "remaining_time": "3:27:35", "throughput": 8709.93, "total_tokens": 29483104} +{"current_steps": 43740, "total_steps": 204665, "loss": 0.0933, "lr": 1.922257555333779e-06, "epoch": 1.0685754769989984, "percentage": 21.37, "elapsed_time": "0:56:25", "remaining_time": "3:27:35", "throughput": 8709.98, "total_tokens": 29486240} +{"current_steps": 43745, "total_steps": 204665, "loss": 0.0801, "lr": 1.9222245857073086e-06, "epoch": 1.0686976278308455, "percentage": 21.37, "elapsed_time": "0:56:25", "remaining_time": "3:27:34", "throughput": 8710.08, "total_tokens": 29489632} +{"current_steps": 43750, "total_steps": 204665, "loss": 0.033, "lr": 1.922191609374173e-06, "epoch": 1.0688197786626927, "percentage": 21.38, "elapsed_time": "0:56:26", "remaining_time": "3:27:34", "throughput": 8710.33, "total_tokens": 29493600} +{"current_steps": 43755, "total_steps": 204665, "loss": 0.0382, "lr": 1.9221586263346124e-06, "epoch": 1.06894192949454, "percentage": 21.38, "elapsed_time": "0:56:26", "remaining_time": "3:27:33", "throughput": 8710.42, "total_tokens": 29496928} +{"current_steps": 43760, "total_steps": 204665, "loss": 0.2569, "lr": 1.922125636588866e-06, "epoch": 1.0690640803263871, "percentage": 21.38, "elapsed_time": "0:56:26", "remaining_time": "3:27:33", "throughput": 8710.55, "total_tokens": 29500384} +{"current_steps": 43765, "total_steps": 204665, "loss": 0.001, "lr": 1.9220926401371738e-06, "epoch": 1.069186231158234, "percentage": 21.38, "elapsed_time": "0:56:27", "remaining_time": "3:27:32", "throughput": 8710.64, "total_tokens": 29503712} +{"current_steps": 43770, "total_steps": 204665, "loss": 0.2355, "lr": 1.9220596369797765e-06, "epoch": 1.0693083819900813, "percentage": 21.39, "elapsed_time": "0:56:27", "remaining_time": "3:27:31", "throughput": 8710.84, "total_tokens": 29507488} +{"current_steps": 43775, "total_steps": 204665, "loss": 0.0427, "lr": 1.9220266271169127e-06, "epoch": 1.0694305328219285, "percentage": 21.39, "elapsed_time": "0:56:27", "remaining_time": "3:27:31", "throughput": 8710.93, "total_tokens": 29510816} +{"current_steps": 43780, "total_steps": 204665, "loss": 0.0012, "lr": 1.921993610548824e-06, "epoch": 1.0695526836537756, "percentage": 21.39, "elapsed_time": "0:56:28", "remaining_time": "3:27:30", "throughput": 8710.99, "total_tokens": 29514016} +{"current_steps": 43785, "total_steps": 204665, "loss": 0.0944, "lr": 1.9219605872757493e-06, "epoch": 1.0696748344856228, "percentage": 21.39, "elapsed_time": "0:56:28", "remaining_time": "3:27:30", "throughput": 8711.05, "total_tokens": 29517216} +{"current_steps": 43790, "total_steps": 204665, "loss": 0.0709, "lr": 1.921927557297929e-06, "epoch": 1.06979698531747, "percentage": 21.4, "elapsed_time": "0:56:28", "remaining_time": "3:27:29", "throughput": 8711.13, "total_tokens": 29520480} +{"current_steps": 43795, "total_steps": 204665, "loss": 0.0458, "lr": 1.9218945206156043e-06, "epoch": 1.0699191361493172, "percentage": 21.4, "elapsed_time": "0:56:29", "remaining_time": "3:27:29", "throughput": 8711.16, "total_tokens": 29523552} +{"current_steps": 43800, "total_steps": 204665, "loss": 0.0711, "lr": 1.921861477229014e-06, "epoch": 1.0700412869811644, "percentage": 21.4, "elapsed_time": "0:56:29", "remaining_time": "3:27:28", "throughput": 8711.22, "total_tokens": 29526752} +{"current_steps": 43805, "total_steps": 204665, "loss": 0.0032, "lr": 1.9218284271384e-06, "epoch": 1.0701634378130116, "percentage": 21.4, "elapsed_time": "0:56:29", "remaining_time": "3:27:28", "throughput": 8711.23, "total_tokens": 29529760} +{"current_steps": 43810, "total_steps": 204665, "loss": 0.0755, "lr": 1.9217953703440007e-06, "epoch": 1.0702855886448588, "percentage": 21.41, "elapsed_time": "0:56:30", "remaining_time": "3:27:27", "throughput": 8711.32, "total_tokens": 29533088} +{"current_steps": 43815, "total_steps": 204665, "loss": 0.1385, "lr": 1.921762306846058e-06, "epoch": 1.0704077394767058, "percentage": 21.41, "elapsed_time": "0:56:30", "remaining_time": "3:27:27", "throughput": 8711.45, "total_tokens": 29536544} +{"current_steps": 43820, "total_steps": 204665, "loss": 0.0231, "lr": 1.921729236644812e-06, "epoch": 1.070529890308553, "percentage": 21.41, "elapsed_time": "0:56:30", "remaining_time": "3:27:26", "throughput": 8711.48, "total_tokens": 29539680} +{"current_steps": 43825, "total_steps": 204665, "loss": 0.0352, "lr": 1.9216961597405028e-06, "epoch": 1.0706520411404001, "percentage": 21.41, "elapsed_time": "0:56:31", "remaining_time": "3:27:26", "throughput": 8711.65, "total_tokens": 29543328} +{"current_steps": 43830, "total_steps": 204665, "loss": 0.1591, "lr": 1.9216630761333713e-06, "epoch": 1.0707741919722473, "percentage": 21.42, "elapsed_time": "0:56:31", "remaining_time": "3:27:25", "throughput": 8711.79, "total_tokens": 29546848} +{"current_steps": 43835, "total_steps": 204665, "loss": 0.0081, "lr": 1.921629985823658e-06, "epoch": 1.0708963428040945, "percentage": 21.42, "elapsed_time": "0:56:31", "remaining_time": "3:27:24", "throughput": 8711.84, "total_tokens": 29550048} +{"current_steps": 43840, "total_steps": 204665, "loss": 0.0762, "lr": 1.9215968888116038e-06, "epoch": 1.0710184936359417, "percentage": 21.42, "elapsed_time": "0:56:32", "remaining_time": "3:27:24", "throughput": 8711.93, "total_tokens": 29553312} +{"current_steps": 43845, "total_steps": 204665, "loss": 0.1954, "lr": 1.9215637850974488e-06, "epoch": 1.071140644467789, "percentage": 21.42, "elapsed_time": "0:56:32", "remaining_time": "3:27:23", "throughput": 8711.93, "total_tokens": 29556256} +{"current_steps": 43850, "total_steps": 204665, "loss": 0.1681, "lr": 1.921530674681434e-06, "epoch": 1.071262795299636, "percentage": 21.43, "elapsed_time": "0:56:32", "remaining_time": "3:27:23", "throughput": 8712.05, "total_tokens": 29559712} +{"current_steps": 43855, "total_steps": 204665, "loss": 0.0359, "lr": 1.921497557563801e-06, "epoch": 1.071384946131483, "percentage": 21.43, "elapsed_time": "0:56:33", "remaining_time": "3:27:22", "throughput": 8712.13, "total_tokens": 29562976} +{"current_steps": 43860, "total_steps": 204665, "loss": 0.0823, "lr": 1.921464433744789e-06, "epoch": 1.0715070969633302, "percentage": 21.43, "elapsed_time": "0:56:33", "remaining_time": "3:27:22", "throughput": 8712.26, "total_tokens": 29566496} +{"current_steps": 43865, "total_steps": 204665, "loss": 0.0272, "lr": 1.9214313032246404e-06, "epoch": 1.0716292477951774, "percentage": 21.43, "elapsed_time": "0:56:34", "remaining_time": "3:27:21", "throughput": 8712.26, "total_tokens": 29569440} +{"current_steps": 43870, "total_steps": 204665, "loss": 0.0815, "lr": 1.921398166003595e-06, "epoch": 1.0717513986270246, "percentage": 21.44, "elapsed_time": "0:56:34", "remaining_time": "3:27:21", "throughput": 8712.4, "total_tokens": 29572960} +{"current_steps": 43875, "total_steps": 204665, "loss": 0.0707, "lr": 1.921365022081895e-06, "epoch": 1.0718735494588718, "percentage": 21.44, "elapsed_time": "0:56:34", "remaining_time": "3:27:20", "throughput": 8712.45, "total_tokens": 29576096} +{"current_steps": 43880, "total_steps": 204665, "loss": 0.1535, "lr": 1.9213318714597803e-06, "epoch": 1.071995700290719, "percentage": 21.44, "elapsed_time": "0:56:35", "remaining_time": "3:27:20", "throughput": 8712.56, "total_tokens": 29579488} +{"current_steps": 43885, "total_steps": 204665, "loss": 0.0779, "lr": 1.9212987141374924e-06, "epoch": 1.0721178511225662, "percentage": 21.44, "elapsed_time": "0:56:35", "remaining_time": "3:27:19", "throughput": 8712.67, "total_tokens": 29582880} +{"current_steps": 43890, "total_steps": 204665, "loss": 0.1816, "lr": 1.9212655501152726e-06, "epoch": 1.0722400019544134, "percentage": 21.44, "elapsed_time": "0:56:35", "remaining_time": "3:27:19", "throughput": 8712.77, "total_tokens": 29586272} +{"current_steps": 43895, "total_steps": 204665, "loss": 0.0386, "lr": 1.921232379393362e-06, "epoch": 1.0723621527862606, "percentage": 21.45, "elapsed_time": "0:56:36", "remaining_time": "3:27:18", "throughput": 8712.93, "total_tokens": 29589856} +{"current_steps": 43900, "total_steps": 204665, "loss": 0.0355, "lr": 1.9211992019720015e-06, "epoch": 1.0724843036181075, "percentage": 21.45, "elapsed_time": "0:56:36", "remaining_time": "3:27:18", "throughput": 8713.17, "total_tokens": 29593824} +{"current_steps": 43905, "total_steps": 204665, "loss": 0.0126, "lr": 1.9211660178514326e-06, "epoch": 1.0726064544499547, "percentage": 21.45, "elapsed_time": "0:56:36", "remaining_time": "3:27:17", "throughput": 8713.22, "total_tokens": 29596960} +{"current_steps": 43910, "total_steps": 204665, "loss": 0.1277, "lr": 1.921132827031897e-06, "epoch": 1.072728605281802, "percentage": 21.45, "elapsed_time": "0:56:37", "remaining_time": "3:27:16", "throughput": 8713.34, "total_tokens": 29600416} +{"current_steps": 43915, "total_steps": 204665, "loss": 0.0773, "lr": 1.9210996295136356e-06, "epoch": 1.072850756113649, "percentage": 21.46, "elapsed_time": "0:56:37", "remaining_time": "3:27:16", "throughput": 8713.41, "total_tokens": 29603616} +{"current_steps": 43920, "total_steps": 204665, "loss": 0.0019, "lr": 1.92106642529689e-06, "epoch": 1.0729729069454963, "percentage": 21.46, "elapsed_time": "0:56:37", "remaining_time": "3:27:15", "throughput": 8713.59, "total_tokens": 29607328} +{"current_steps": 43925, "total_steps": 204665, "loss": 0.0476, "lr": 1.9210332143819016e-06, "epoch": 1.0730950577773435, "percentage": 21.46, "elapsed_time": "0:56:38", "remaining_time": "3:27:15", "throughput": 8713.73, "total_tokens": 29610848} +{"current_steps": 43930, "total_steps": 204665, "loss": 0.0321, "lr": 1.920999996768912e-06, "epoch": 1.0732172086091907, "percentage": 21.46, "elapsed_time": "0:56:38", "remaining_time": "3:27:14", "throughput": 8713.78, "total_tokens": 29613984} +{"current_steps": 43935, "total_steps": 204665, "loss": 0.047, "lr": 1.9209667724581623e-06, "epoch": 1.0733393594410379, "percentage": 21.47, "elapsed_time": "0:56:38", "remaining_time": "3:27:14", "throughput": 8713.83, "total_tokens": 29617184} +{"current_steps": 43940, "total_steps": 204665, "loss": 0.1575, "lr": 1.9209335414498945e-06, "epoch": 1.073461510272885, "percentage": 21.47, "elapsed_time": "0:56:39", "remaining_time": "3:27:13", "throughput": 8713.95, "total_tokens": 29620640} +{"current_steps": 43945, "total_steps": 204665, "loss": 0.0857, "lr": 1.9209003037443506e-06, "epoch": 1.073583661104732, "percentage": 21.47, "elapsed_time": "0:56:39", "remaining_time": "3:27:13", "throughput": 8713.96, "total_tokens": 29623648} +{"current_steps": 43950, "total_steps": 204665, "loss": 0.1343, "lr": 1.920867059341772e-06, "epoch": 1.0737058119365792, "percentage": 21.47, "elapsed_time": "0:56:39", "remaining_time": "3:27:12", "throughput": 8714.0, "total_tokens": 29626784} +{"current_steps": 43955, "total_steps": 204665, "loss": 0.0476, "lr": 1.9208338082424006e-06, "epoch": 1.0738279627684264, "percentage": 21.48, "elapsed_time": "0:56:40", "remaining_time": "3:27:12", "throughput": 8714.11, "total_tokens": 29630176} +{"current_steps": 43960, "total_steps": 204665, "loss": 0.0022, "lr": 1.920800550446478e-06, "epoch": 1.0739501136002736, "percentage": 21.48, "elapsed_time": "0:56:40", "remaining_time": "3:27:11", "throughput": 8714.15, "total_tokens": 29633312} +{"current_steps": 43965, "total_steps": 204665, "loss": 0.0819, "lr": 1.920767285954246e-06, "epoch": 1.0740722644321208, "percentage": 21.48, "elapsed_time": "0:56:40", "remaining_time": "3:27:11", "throughput": 8714.29, "total_tokens": 29636832} +{"current_steps": 43970, "total_steps": 204665, "loss": 0.0907, "lr": 1.9207340147659465e-06, "epoch": 1.074194415263968, "percentage": 21.48, "elapsed_time": "0:56:41", "remaining_time": "3:27:10", "throughput": 8714.44, "total_tokens": 29640416} +{"current_steps": 43975, "total_steps": 204665, "loss": 0.08, "lr": 1.9207007368818217e-06, "epoch": 1.0743165660958152, "percentage": 21.49, "elapsed_time": "0:56:41", "remaining_time": "3:27:10", "throughput": 8714.52, "total_tokens": 29643680} +{"current_steps": 43980, "total_steps": 204665, "loss": 0.1525, "lr": 1.9206674523021135e-06, "epoch": 1.0744387169276624, "percentage": 21.49, "elapsed_time": "0:56:41", "remaining_time": "3:27:09", "throughput": 8714.68, "total_tokens": 29647328} +{"current_steps": 43985, "total_steps": 204665, "loss": 0.0012, "lr": 1.9206341610270644e-06, "epoch": 1.0745608677595095, "percentage": 21.49, "elapsed_time": "0:56:42", "remaining_time": "3:27:09", "throughput": 8714.87, "total_tokens": 29651104} +{"current_steps": 43990, "total_steps": 204665, "loss": 0.059, "lr": 1.9206008630569157e-06, "epoch": 1.0746830185913565, "percentage": 21.49, "elapsed_time": "0:56:42", "remaining_time": "3:27:08", "throughput": 8714.98, "total_tokens": 29654496} +{"current_steps": 43995, "total_steps": 204665, "loss": 0.1075, "lr": 1.9205675583919096e-06, "epoch": 1.0748051694232037, "percentage": 21.5, "elapsed_time": "0:56:43", "remaining_time": "3:27:07", "throughput": 8715.06, "total_tokens": 29657760} +{"current_steps": 44000, "total_steps": 204665, "loss": 0.284, "lr": 1.920534247032289e-06, "epoch": 1.0749273202550509, "percentage": 21.5, "elapsed_time": "0:56:43", "remaining_time": "3:27:07", "throughput": 8715.14, "total_tokens": 29661024} +{"current_steps": 44005, "total_steps": 204665, "loss": 0.1003, "lr": 1.9205009289782956e-06, "epoch": 1.075049471086898, "percentage": 21.5, "elapsed_time": "0:56:43", "remaining_time": "3:27:06", "throughput": 8715.11, "total_tokens": 29663840} +{"current_steps": 44010, "total_steps": 204665, "loss": 0.112, "lr": 1.9204676042301718e-06, "epoch": 1.0751716219187453, "percentage": 21.5, "elapsed_time": "0:56:44", "remaining_time": "3:27:06", "throughput": 8715.27, "total_tokens": 29667488} +{"current_steps": 44015, "total_steps": 204665, "loss": 0.2006, "lr": 1.92043427278816e-06, "epoch": 1.0752937727505925, "percentage": 21.51, "elapsed_time": "0:56:44", "remaining_time": "3:27:05", "throughput": 8715.42, "total_tokens": 29671072} +{"current_steps": 44020, "total_steps": 204665, "loss": 0.0107, "lr": 1.920400934652503e-06, "epoch": 1.0754159235824396, "percentage": 21.51, "elapsed_time": "0:56:44", "remaining_time": "3:27:05", "throughput": 8715.57, "total_tokens": 29674656} +{"current_steps": 44025, "total_steps": 204665, "loss": 0.1733, "lr": 1.9203675898234426e-06, "epoch": 1.0755380744142868, "percentage": 21.51, "elapsed_time": "0:56:45", "remaining_time": "3:27:04", "throughput": 8715.71, "total_tokens": 29678176} +{"current_steps": 44030, "total_steps": 204665, "loss": 0.0602, "lr": 1.9203342383012214e-06, "epoch": 1.075660225246134, "percentage": 21.51, "elapsed_time": "0:56:45", "remaining_time": "3:27:04", "throughput": 8715.84, "total_tokens": 29681696} +{"current_steps": 44035, "total_steps": 204665, "loss": 0.0779, "lr": 1.920300880086082e-06, "epoch": 1.075782376077981, "percentage": 21.52, "elapsed_time": "0:56:45", "remaining_time": "3:27:03", "throughput": 8715.92, "total_tokens": 29684960} +{"current_steps": 44040, "total_steps": 204665, "loss": 0.1817, "lr": 1.9202675151782675e-06, "epoch": 1.0759045269098282, "percentage": 21.52, "elapsed_time": "0:56:46", "remaining_time": "3:27:03", "throughput": 8715.99, "total_tokens": 29688224} +{"current_steps": 44045, "total_steps": 204665, "loss": 0.0289, "lr": 1.9202341435780197e-06, "epoch": 1.0760266777416754, "percentage": 21.52, "elapsed_time": "0:56:46", "remaining_time": "3:27:02", "throughput": 8716.09, "total_tokens": 29691616} +{"current_steps": 44050, "total_steps": 204665, "loss": 0.0737, "lr": 1.9202007652855822e-06, "epoch": 1.0761488285735226, "percentage": 21.52, "elapsed_time": "0:56:46", "remaining_time": "3:27:02", "throughput": 8716.12, "total_tokens": 29694688} +{"current_steps": 44055, "total_steps": 204665, "loss": 0.0577, "lr": 1.920167380301197e-06, "epoch": 1.0762709794053698, "percentage": 21.53, "elapsed_time": "0:56:47", "remaining_time": "3:27:01", "throughput": 8716.19, "total_tokens": 29697952} +{"current_steps": 44060, "total_steps": 204665, "loss": 0.0635, "lr": 1.920133988625107e-06, "epoch": 1.076393130237217, "percentage": 21.53, "elapsed_time": "0:56:47", "remaining_time": "3:27:01", "throughput": 8716.26, "total_tokens": 29701152} +{"current_steps": 44065, "total_steps": 204665, "loss": 0.0122, "lr": 1.920100590257555e-06, "epoch": 1.0765152810690641, "percentage": 21.53, "elapsed_time": "0:56:47", "remaining_time": "3:27:00", "throughput": 8716.39, "total_tokens": 29704672} +{"current_steps": 44070, "total_steps": 204665, "loss": 0.065, "lr": 1.920067185198784e-06, "epoch": 1.0766374319009113, "percentage": 21.53, "elapsed_time": "0:56:48", "remaining_time": "3:27:00", "throughput": 8716.53, "total_tokens": 29708192} +{"current_steps": 44075, "total_steps": 204665, "loss": 0.0481, "lr": 1.9200337734490374e-06, "epoch": 1.0767595827327585, "percentage": 21.54, "elapsed_time": "0:56:48", "remaining_time": "3:26:59", "throughput": 8716.56, "total_tokens": 29711264} +{"current_steps": 44080, "total_steps": 204665, "loss": 0.1977, "lr": 1.9200003550085575e-06, "epoch": 1.0768817335646055, "percentage": 21.54, "elapsed_time": "0:56:48", "remaining_time": "3:26:58", "throughput": 8716.59, "total_tokens": 29714400} +{"current_steps": 44085, "total_steps": 204665, "loss": 0.0458, "lr": 1.919966929877587e-06, "epoch": 1.0770038843964527, "percentage": 21.54, "elapsed_time": "0:56:49", "remaining_time": "3:26:58", "throughput": 8716.73, "total_tokens": 29717920} +{"current_steps": 44090, "total_steps": 204665, "loss": 0.0403, "lr": 1.9199334980563707e-06, "epoch": 1.0771260352282999, "percentage": 21.54, "elapsed_time": "0:56:49", "remaining_time": "3:26:57", "throughput": 8716.8, "total_tokens": 29721184} +{"current_steps": 44095, "total_steps": 204665, "loss": 0.0826, "lr": 1.91990005954515e-06, "epoch": 1.077248186060147, "percentage": 21.54, "elapsed_time": "0:56:49", "remaining_time": "3:26:57", "throughput": 8716.96, "total_tokens": 29724832} +{"current_steps": 44100, "total_steps": 204665, "loss": 0.0805, "lr": 1.919866614344169e-06, "epoch": 1.0773703368919942, "percentage": 21.55, "elapsed_time": "0:56:50", "remaining_time": "3:26:56", "throughput": 8717.1, "total_tokens": 29728416} +{"current_steps": 44105, "total_steps": 204665, "loss": 0.146, "lr": 1.9198331624536696e-06, "epoch": 1.0774924877238414, "percentage": 21.55, "elapsed_time": "0:56:50", "remaining_time": "3:26:56", "throughput": 8717.27, "total_tokens": 29732064} +{"current_steps": 44110, "total_steps": 204665, "loss": 0.1583, "lr": 1.9197997038738967e-06, "epoch": 1.0776146385556886, "percentage": 21.55, "elapsed_time": "0:56:51", "remaining_time": "3:26:55", "throughput": 8717.46, "total_tokens": 29735840} +{"current_steps": 44115, "total_steps": 204665, "loss": 0.0657, "lr": 1.919766238605093e-06, "epoch": 1.0777367893875358, "percentage": 21.55, "elapsed_time": "0:56:51", "remaining_time": "3:26:55", "throughput": 8717.54, "total_tokens": 29739104} +{"current_steps": 44120, "total_steps": 204665, "loss": 0.0568, "lr": 1.9197327666475017e-06, "epoch": 1.0778589402193828, "percentage": 21.56, "elapsed_time": "0:56:51", "remaining_time": "3:26:54", "throughput": 8717.62, "total_tokens": 29742432} +{"current_steps": 44125, "total_steps": 204665, "loss": 0.1157, "lr": 1.9196992880013662e-06, "epoch": 1.07798109105123, "percentage": 21.56, "elapsed_time": "0:56:52", "remaining_time": "3:26:54", "throughput": 8717.64, "total_tokens": 29745440} +{"current_steps": 44130, "total_steps": 204665, "loss": 0.0797, "lr": 1.9196658026669303e-06, "epoch": 1.0781032418830772, "percentage": 21.56, "elapsed_time": "0:56:52", "remaining_time": "3:26:53", "throughput": 8717.68, "total_tokens": 29748512} +{"current_steps": 44135, "total_steps": 204665, "loss": 0.2125, "lr": 1.9196323106444374e-06, "epoch": 1.0782253927149243, "percentage": 21.56, "elapsed_time": "0:56:52", "remaining_time": "3:26:53", "throughput": 8717.72, "total_tokens": 29751648} +{"current_steps": 44140, "total_steps": 204665, "loss": 0.0615, "lr": 1.9195988119341306e-06, "epoch": 1.0783475435467715, "percentage": 21.57, "elapsed_time": "0:56:53", "remaining_time": "3:26:52", "throughput": 8717.72, "total_tokens": 29754592} +{"current_steps": 44145, "total_steps": 204665, "loss": 0.0466, "lr": 1.9195653065362544e-06, "epoch": 1.0784696943786187, "percentage": 21.57, "elapsed_time": "0:56:53", "remaining_time": "3:26:52", "throughput": 8717.73, "total_tokens": 29757600} +{"current_steps": 44150, "total_steps": 204665, "loss": 0.036, "lr": 1.9195317944510517e-06, "epoch": 1.078591845210466, "percentage": 21.57, "elapsed_time": "0:56:53", "remaining_time": "3:26:51", "throughput": 8717.87, "total_tokens": 29761184} +{"current_steps": 44155, "total_steps": 204665, "loss": 0.1806, "lr": 1.9194982756787662e-06, "epoch": 1.078713996042313, "percentage": 21.57, "elapsed_time": "0:56:54", "remaining_time": "3:26:51", "throughput": 8718.02, "total_tokens": 29764768} +{"current_steps": 44160, "total_steps": 204665, "loss": 0.0684, "lr": 1.9194647502196422e-06, "epoch": 1.0788361468741603, "percentage": 21.58, "elapsed_time": "0:56:54", "remaining_time": "3:26:50", "throughput": 8718.18, "total_tokens": 29768416} +{"current_steps": 44165, "total_steps": 204665, "loss": 0.0755, "lr": 1.9194312180739237e-06, "epoch": 1.0789582977060075, "percentage": 21.58, "elapsed_time": "0:56:54", "remaining_time": "3:26:49", "throughput": 8718.23, "total_tokens": 29771552} +{"current_steps": 44170, "total_steps": 204665, "loss": 0.0485, "lr": 1.9193976792418533e-06, "epoch": 1.0790804485378545, "percentage": 21.58, "elapsed_time": "0:56:55", "remaining_time": "3:26:49", "throughput": 8718.49, "total_tokens": 29775584} +{"current_steps": 44175, "total_steps": 204665, "loss": 0.1085, "lr": 1.919364133723676e-06, "epoch": 1.0792025993697016, "percentage": 21.58, "elapsed_time": "0:56:55", "remaining_time": "3:26:48", "throughput": 8718.46, "total_tokens": 29778400} +{"current_steps": 44180, "total_steps": 204665, "loss": 0.0418, "lr": 1.9193305815196355e-06, "epoch": 1.0793247502015488, "percentage": 21.59, "elapsed_time": "0:56:55", "remaining_time": "3:26:48", "throughput": 8718.55, "total_tokens": 29781728} +{"current_steps": 44185, "total_steps": 204665, "loss": 0.1246, "lr": 1.9192970226299757e-06, "epoch": 1.079446901033396, "percentage": 21.59, "elapsed_time": "0:56:56", "remaining_time": "3:26:47", "throughput": 8718.64, "total_tokens": 29785056} +{"current_steps": 44190, "total_steps": 204665, "loss": 0.0421, "lr": 1.919263457054941e-06, "epoch": 1.0795690518652432, "percentage": 21.59, "elapsed_time": "0:56:56", "remaining_time": "3:26:47", "throughput": 8718.71, "total_tokens": 29788256} +{"current_steps": 44195, "total_steps": 204665, "loss": 0.0286, "lr": 1.9192298847947746e-06, "epoch": 1.0796912026970904, "percentage": 21.59, "elapsed_time": "0:56:56", "remaining_time": "3:26:46", "throughput": 8718.75, "total_tokens": 29791392} +{"current_steps": 44200, "total_steps": 204665, "loss": 0.004, "lr": 1.9191963058497212e-06, "epoch": 1.0798133535289376, "percentage": 21.6, "elapsed_time": "0:56:57", "remaining_time": "3:26:46", "throughput": 8718.74, "total_tokens": 29794336} +{"current_steps": 44205, "total_steps": 204665, "loss": 0.0518, "lr": 1.9191627202200258e-06, "epoch": 1.0799355043607848, "percentage": 21.6, "elapsed_time": "0:56:57", "remaining_time": "3:26:45", "throughput": 8718.84, "total_tokens": 29797728} +{"current_steps": 44210, "total_steps": 204665, "loss": 0.0466, "lr": 1.9191291279059312e-06, "epoch": 1.0800576551926317, "percentage": 21.6, "elapsed_time": "0:56:57", "remaining_time": "3:26:45", "throughput": 8718.93, "total_tokens": 29801056} +{"current_steps": 44215, "total_steps": 204665, "loss": 0.1837, "lr": 1.9190955289076825e-06, "epoch": 1.080179806024479, "percentage": 21.6, "elapsed_time": "0:56:58", "remaining_time": "3:26:44", "throughput": 8719.05, "total_tokens": 29804512} +{"current_steps": 44220, "total_steps": 204665, "loss": 0.1528, "lr": 1.9190619232255242e-06, "epoch": 1.0803019568563261, "percentage": 21.61, "elapsed_time": "0:56:58", "remaining_time": "3:26:44", "throughput": 8719.19, "total_tokens": 29808032} +{"current_steps": 44225, "total_steps": 204665, "loss": 0.1278, "lr": 1.9190283108597e-06, "epoch": 1.0804241076881733, "percentage": 21.61, "elapsed_time": "0:56:59", "remaining_time": "3:26:43", "throughput": 8719.26, "total_tokens": 29811296} +{"current_steps": 44230, "total_steps": 204665, "loss": 0.0481, "lr": 1.918994691810455e-06, "epoch": 1.0805462585200205, "percentage": 21.61, "elapsed_time": "0:56:59", "remaining_time": "3:26:43", "throughput": 8719.63, "total_tokens": 29815904} +{"current_steps": 44235, "total_steps": 204665, "loss": 0.0786, "lr": 1.9189610660780335e-06, "epoch": 1.0806684093518677, "percentage": 21.61, "elapsed_time": "0:56:59", "remaining_time": "3:26:42", "throughput": 8719.67, "total_tokens": 29819040} +{"current_steps": 44240, "total_steps": 204665, "loss": 0.0959, "lr": 1.9189274336626795e-06, "epoch": 1.0807905601837149, "percentage": 21.62, "elapsed_time": "0:57:00", "remaining_time": "3:26:42", "throughput": 8719.79, "total_tokens": 29822496} +{"current_steps": 44245, "total_steps": 204665, "loss": 0.095, "lr": 1.9188937945646386e-06, "epoch": 1.080912711015562, "percentage": 21.62, "elapsed_time": "0:57:00", "remaining_time": "3:26:41", "throughput": 8719.84, "total_tokens": 29825696} +{"current_steps": 44250, "total_steps": 204665, "loss": 0.076, "lr": 1.9188601487841545e-06, "epoch": 1.0810348618474093, "percentage": 21.62, "elapsed_time": "0:57:00", "remaining_time": "3:26:41", "throughput": 8719.93, "total_tokens": 29829024} +{"current_steps": 44255, "total_steps": 204665, "loss": 0.0274, "lr": 1.9188264963214724e-06, "epoch": 1.0811570126792565, "percentage": 21.62, "elapsed_time": "0:57:01", "remaining_time": "3:26:40", "throughput": 8720.05, "total_tokens": 29832480} +{"current_steps": 44260, "total_steps": 204665, "loss": 0.0585, "lr": 1.918792837176837e-06, "epoch": 1.0812791635111034, "percentage": 21.63, "elapsed_time": "0:57:01", "remaining_time": "3:26:39", "throughput": 8720.13, "total_tokens": 29835744} +{"current_steps": 44265, "total_steps": 204665, "loss": 0.0317, "lr": 1.9187591713504925e-06, "epoch": 1.0814013143429506, "percentage": 21.63, "elapsed_time": "0:57:01", "remaining_time": "3:26:39", "throughput": 8720.14, "total_tokens": 29838752} +{"current_steps": 44270, "total_steps": 204665, "loss": 0.0282, "lr": 1.9187254988426846e-06, "epoch": 1.0815234651747978, "percentage": 21.63, "elapsed_time": "0:57:02", "remaining_time": "3:26:38", "throughput": 8720.23, "total_tokens": 29842080} +{"current_steps": 44275, "total_steps": 204665, "loss": 0.102, "lr": 1.918691819653658e-06, "epoch": 1.081645616006645, "percentage": 21.63, "elapsed_time": "0:57:02", "remaining_time": "3:26:38", "throughput": 8720.19, "total_tokens": 29844832} +{"current_steps": 44280, "total_steps": 204665, "loss": 0.1044, "lr": 1.9186581337836567e-06, "epoch": 1.0817677668384922, "percentage": 21.64, "elapsed_time": "0:57:02", "remaining_time": "3:26:37", "throughput": 8720.26, "total_tokens": 29848096} +{"current_steps": 44285, "total_steps": 204665, "loss": 0.0015, "lr": 1.918624441232927e-06, "epoch": 1.0818899176703394, "percentage": 21.64, "elapsed_time": "0:57:03", "remaining_time": "3:26:37", "throughput": 8720.38, "total_tokens": 29851552} +{"current_steps": 44290, "total_steps": 204665, "loss": 0.1048, "lr": 1.918590742001713e-06, "epoch": 1.0820120685021866, "percentage": 21.64, "elapsed_time": "0:57:03", "remaining_time": "3:26:36", "throughput": 8720.56, "total_tokens": 29855264} +{"current_steps": 44295, "total_steps": 204665, "loss": 0.1001, "lr": 1.91855703609026e-06, "epoch": 1.0821342193340338, "percentage": 21.64, "elapsed_time": "0:57:03", "remaining_time": "3:26:36", "throughput": 8720.67, "total_tokens": 29858720} +{"current_steps": 44300, "total_steps": 204665, "loss": 0.0587, "lr": 1.918523323498813e-06, "epoch": 1.0822563701658807, "percentage": 21.65, "elapsed_time": "0:57:04", "remaining_time": "3:26:35", "throughput": 8720.77, "total_tokens": 29862112} +{"current_steps": 44305, "total_steps": 204665, "loss": 0.0833, "lr": 1.9184896042276176e-06, "epoch": 1.082378520997728, "percentage": 21.65, "elapsed_time": "0:57:04", "remaining_time": "3:26:35", "throughput": 8720.84, "total_tokens": 29865376} +{"current_steps": 44310, "total_steps": 204665, "loss": 0.0984, "lr": 1.9184558782769185e-06, "epoch": 1.082500671829575, "percentage": 21.65, "elapsed_time": "0:57:04", "remaining_time": "3:26:34", "throughput": 8720.95, "total_tokens": 29868832} +{"current_steps": 44315, "total_steps": 204665, "loss": 0.0488, "lr": 1.9184221456469615e-06, "epoch": 1.0826228226614223, "percentage": 21.65, "elapsed_time": "0:57:05", "remaining_time": "3:26:34", "throughput": 8721.02, "total_tokens": 29872096} +{"current_steps": 44320, "total_steps": 204665, "loss": 0.0499, "lr": 1.9183884063379918e-06, "epoch": 1.0827449734932695, "percentage": 21.65, "elapsed_time": "0:57:05", "remaining_time": "3:26:33", "throughput": 8721.06, "total_tokens": 29875232} +{"current_steps": 44325, "total_steps": 204665, "loss": 0.0383, "lr": 1.9183546603502545e-06, "epoch": 1.0828671243251167, "percentage": 21.66, "elapsed_time": "0:57:05", "remaining_time": "3:26:33", "throughput": 8721.22, "total_tokens": 29878880} +{"current_steps": 44330, "total_steps": 204665, "loss": 0.0354, "lr": 1.9183209076839944e-06, "epoch": 1.0829892751569639, "percentage": 21.66, "elapsed_time": "0:57:06", "remaining_time": "3:26:32", "throughput": 8721.36, "total_tokens": 29882400} +{"current_steps": 44335, "total_steps": 204665, "loss": 0.0176, "lr": 1.9182871483394585e-06, "epoch": 1.083111425988811, "percentage": 21.66, "elapsed_time": "0:57:06", "remaining_time": "3:26:32", "throughput": 8721.49, "total_tokens": 29885920} +{"current_steps": 44340, "total_steps": 204665, "loss": 0.0967, "lr": 1.918253382316891e-06, "epoch": 1.0832335768206582, "percentage": 21.66, "elapsed_time": "0:57:07", "remaining_time": "3:26:31", "throughput": 8721.64, "total_tokens": 29889504} +{"current_steps": 44345, "total_steps": 204665, "loss": 0.0563, "lr": 1.9182196096165383e-06, "epoch": 1.0833557276525054, "percentage": 21.67, "elapsed_time": "0:57:07", "remaining_time": "3:26:31", "throughput": 8721.75, "total_tokens": 29892896} +{"current_steps": 44350, "total_steps": 204665, "loss": 0.1665, "lr": 1.9181858302386454e-06, "epoch": 1.0834778784843524, "percentage": 21.67, "elapsed_time": "0:57:07", "remaining_time": "3:26:30", "throughput": 8721.83, "total_tokens": 29896224} +{"current_steps": 44355, "total_steps": 204665, "loss": 0.1475, "lr": 1.9181520441834582e-06, "epoch": 1.0836000293161996, "percentage": 21.67, "elapsed_time": "0:57:08", "remaining_time": "3:26:29", "throughput": 8721.85, "total_tokens": 29899296} +{"current_steps": 44360, "total_steps": 204665, "loss": 0.1069, "lr": 1.9181182514512222e-06, "epoch": 1.0837221801480468, "percentage": 21.67, "elapsed_time": "0:57:08", "remaining_time": "3:26:29", "throughput": 8721.87, "total_tokens": 29902368} +{"current_steps": 44365, "total_steps": 204665, "loss": 0.0788, "lr": 1.9180844520421838e-06, "epoch": 1.083844330979894, "percentage": 21.68, "elapsed_time": "0:57:08", "remaining_time": "3:26:28", "throughput": 8722.0, "total_tokens": 29905888} +{"current_steps": 44370, "total_steps": 204665, "loss": 0.0217, "lr": 1.918050645956588e-06, "epoch": 1.0839664818117412, "percentage": 21.68, "elapsed_time": "0:57:09", "remaining_time": "3:26:28", "throughput": 8722.12, "total_tokens": 29909344} +{"current_steps": 44375, "total_steps": 204665, "loss": 0.0903, "lr": 1.918016833194681e-06, "epoch": 1.0840886326435883, "percentage": 21.68, "elapsed_time": "0:57:09", "remaining_time": "3:26:27", "throughput": 8722.15, "total_tokens": 29912480} +{"current_steps": 44380, "total_steps": 204665, "loss": 0.2413, "lr": 1.917983013756709e-06, "epoch": 1.0842107834754355, "percentage": 21.68, "elapsed_time": "0:57:09", "remaining_time": "3:26:27", "throughput": 8722.35, "total_tokens": 29916256} +{"current_steps": 44385, "total_steps": 204665, "loss": 0.0461, "lr": 1.917949187642917e-06, "epoch": 1.0843329343072827, "percentage": 21.69, "elapsed_time": "0:57:10", "remaining_time": "3:26:26", "throughput": 8722.41, "total_tokens": 29919456} +{"current_steps": 44390, "total_steps": 204665, "loss": 0.2388, "lr": 1.917915354853552e-06, "epoch": 1.0844550851391297, "percentage": 21.69, "elapsed_time": "0:57:10", "remaining_time": "3:26:26", "throughput": 8722.59, "total_tokens": 29923232} +{"current_steps": 44395, "total_steps": 204665, "loss": 0.1065, "lr": 1.9178815153888595e-06, "epoch": 1.0845772359709769, "percentage": 21.69, "elapsed_time": "0:57:10", "remaining_time": "3:26:25", "throughput": 8722.6, "total_tokens": 29926240} +{"current_steps": 44400, "total_steps": 204665, "loss": 0.0584, "lr": 1.917847669249086e-06, "epoch": 1.084699386802824, "percentage": 21.69, "elapsed_time": "0:57:11", "remaining_time": "3:26:25", "throughput": 8722.71, "total_tokens": 29929696} +{"current_steps": 44405, "total_steps": 204665, "loss": 0.088, "lr": 1.917813816434477e-06, "epoch": 1.0848215376346713, "percentage": 21.7, "elapsed_time": "0:57:11", "remaining_time": "3:26:24", "throughput": 8722.86, "total_tokens": 29933280} +{"current_steps": 44410, "total_steps": 204665, "loss": 0.1019, "lr": 1.9177799569452793e-06, "epoch": 1.0849436884665185, "percentage": 21.7, "elapsed_time": "0:57:11", "remaining_time": "3:26:24", "throughput": 8722.95, "total_tokens": 29936608} +{"current_steps": 44415, "total_steps": 204665, "loss": 0.068, "lr": 1.917746090781739e-06, "epoch": 1.0850658392983656, "percentage": 21.7, "elapsed_time": "0:57:12", "remaining_time": "3:26:23", "throughput": 8723.14, "total_tokens": 29940384} +{"current_steps": 44420, "total_steps": 204665, "loss": 0.0874, "lr": 1.917712217944102e-06, "epoch": 1.0851879901302128, "percentage": 21.7, "elapsed_time": "0:57:12", "remaining_time": "3:26:23", "throughput": 8723.25, "total_tokens": 29943840} +{"current_steps": 44425, "total_steps": 204665, "loss": 0.0362, "lr": 1.917678338432615e-06, "epoch": 1.08531014096206, "percentage": 21.71, "elapsed_time": "0:57:12", "remaining_time": "3:26:22", "throughput": 8723.34, "total_tokens": 29947168} +{"current_steps": 44430, "total_steps": 204665, "loss": 0.1176, "lr": 1.917644452247524e-06, "epoch": 1.0854322917939072, "percentage": 21.71, "elapsed_time": "0:57:13", "remaining_time": "3:26:22", "throughput": 8723.41, "total_tokens": 29950432} +{"current_steps": 44435, "total_steps": 204665, "loss": 0.0287, "lr": 1.9176105593890765e-06, "epoch": 1.0855544426257542, "percentage": 21.71, "elapsed_time": "0:57:13", "remaining_time": "3:26:21", "throughput": 8723.6, "total_tokens": 29954208} +{"current_steps": 44440, "total_steps": 204665, "loss": 0.1624, "lr": 1.9175766598575177e-06, "epoch": 1.0856765934576014, "percentage": 21.71, "elapsed_time": "0:57:14", "remaining_time": "3:26:21", "throughput": 8723.63, "total_tokens": 29957280} +{"current_steps": 44445, "total_steps": 204665, "loss": 0.128, "lr": 1.917542753653095e-06, "epoch": 1.0857987442894486, "percentage": 21.72, "elapsed_time": "0:57:14", "remaining_time": "3:26:20", "throughput": 8723.67, "total_tokens": 29960416} +{"current_steps": 44450, "total_steps": 204665, "loss": 0.0584, "lr": 1.9175088407760543e-06, "epoch": 1.0859208951212957, "percentage": 21.72, "elapsed_time": "0:57:14", "remaining_time": "3:26:20", "throughput": 8723.76, "total_tokens": 29963808} +{"current_steps": 44455, "total_steps": 204665, "loss": 0.1099, "lr": 1.917474921226642e-06, "epoch": 1.086043045953143, "percentage": 21.72, "elapsed_time": "0:57:15", "remaining_time": "3:26:19", "throughput": 8723.83, "total_tokens": 29967072} +{"current_steps": 44460, "total_steps": 204665, "loss": 0.0261, "lr": 1.917440995005106e-06, "epoch": 1.0861651967849901, "percentage": 21.72, "elapsed_time": "0:57:15", "remaining_time": "3:26:19", "throughput": 8723.93, "total_tokens": 29970464} +{"current_steps": 44465, "total_steps": 204665, "loss": 0.1018, "lr": 1.9174070621116924e-06, "epoch": 1.0862873476168373, "percentage": 21.73, "elapsed_time": "0:57:15", "remaining_time": "3:26:18", "throughput": 8724.2, "total_tokens": 29974560} +{"current_steps": 44470, "total_steps": 204665, "loss": 0.0687, "lr": 1.9173731225466477e-06, "epoch": 1.0864094984486845, "percentage": 21.73, "elapsed_time": "0:57:16", "remaining_time": "3:26:18", "throughput": 8724.19, "total_tokens": 29977504} +{"current_steps": 44475, "total_steps": 204665, "loss": 0.1373, "lr": 1.917339176310219e-06, "epoch": 1.0865316492805317, "percentage": 21.73, "elapsed_time": "0:57:16", "remaining_time": "3:26:17", "throughput": 8724.29, "total_tokens": 29980896} +{"current_steps": 44480, "total_steps": 204665, "loss": 0.0765, "lr": 1.917305223402653e-06, "epoch": 1.0866538001123787, "percentage": 21.73, "elapsed_time": "0:57:16", "remaining_time": "3:26:16", "throughput": 8724.37, "total_tokens": 29984160} +{"current_steps": 44485, "total_steps": 204665, "loss": 0.0376, "lr": 1.9172712638241964e-06, "epoch": 1.0867759509442259, "percentage": 21.74, "elapsed_time": "0:57:17", "remaining_time": "3:26:16", "throughput": 8724.44, "total_tokens": 29987424} +{"current_steps": 44490, "total_steps": 204665, "loss": 0.0685, "lr": 1.917237297575097e-06, "epoch": 1.086898101776073, "percentage": 21.74, "elapsed_time": "0:57:17", "remaining_time": "3:26:15", "throughput": 8724.46, "total_tokens": 29990496} +{"current_steps": 44495, "total_steps": 204665, "loss": 0.0267, "lr": 1.9172033246556008e-06, "epoch": 1.0870202526079202, "percentage": 21.74, "elapsed_time": "0:57:17", "remaining_time": "3:26:15", "throughput": 8724.53, "total_tokens": 29993760} +{"current_steps": 44500, "total_steps": 204665, "loss": 0.0543, "lr": 1.9171693450659556e-06, "epoch": 1.0871424034397674, "percentage": 21.74, "elapsed_time": "0:57:18", "remaining_time": "3:26:14", "throughput": 8724.65, "total_tokens": 29997280} +{"current_steps": 44505, "total_steps": 204665, "loss": 0.0768, "lr": 1.917135358806408e-06, "epoch": 1.0872645542716146, "percentage": 21.75, "elapsed_time": "0:57:18", "remaining_time": "3:26:14", "throughput": 8724.72, "total_tokens": 30000480} +{"current_steps": 44510, "total_steps": 204665, "loss": 0.1496, "lr": 1.9171013658772055e-06, "epoch": 1.0873867051034618, "percentage": 21.75, "elapsed_time": "0:57:18", "remaining_time": "3:26:13", "throughput": 8724.81, "total_tokens": 30003872} +{"current_steps": 44515, "total_steps": 204665, "loss": 0.0012, "lr": 1.9170673662785953e-06, "epoch": 1.087508855935309, "percentage": 21.75, "elapsed_time": "0:57:19", "remaining_time": "3:26:13", "throughput": 8724.91, "total_tokens": 30007264} +{"current_steps": 44520, "total_steps": 204665, "loss": 0.1859, "lr": 1.9170333600108246e-06, "epoch": 1.0876310067671562, "percentage": 21.75, "elapsed_time": "0:57:19", "remaining_time": "3:26:12", "throughput": 8725.12, "total_tokens": 30011104} +{"current_steps": 44525, "total_steps": 204665, "loss": 0.0786, "lr": 1.9169993470741407e-06, "epoch": 1.0877531575990032, "percentage": 21.76, "elapsed_time": "0:57:19", "remaining_time": "3:26:12", "throughput": 8725.19, "total_tokens": 30014368} +{"current_steps": 44530, "total_steps": 204665, "loss": 0.1248, "lr": 1.9169653274687905e-06, "epoch": 1.0878753084308503, "percentage": 21.76, "elapsed_time": "0:57:20", "remaining_time": "3:26:11", "throughput": 8725.33, "total_tokens": 30017952} +{"current_steps": 44535, "total_steps": 204665, "loss": 0.0366, "lr": 1.9169313011950223e-06, "epoch": 1.0879974592626975, "percentage": 21.76, "elapsed_time": "0:57:20", "remaining_time": "3:26:11", "throughput": 8725.45, "total_tokens": 30021408} +{"current_steps": 44540, "total_steps": 204665, "loss": 0.0697, "lr": 1.9168972682530825e-06, "epoch": 1.0881196100945447, "percentage": 21.76, "elapsed_time": "0:57:21", "remaining_time": "3:26:10", "throughput": 8725.61, "total_tokens": 30025056} +{"current_steps": 44545, "total_steps": 204665, "loss": 0.0544, "lr": 1.9168632286432193e-06, "epoch": 1.088241760926392, "percentage": 21.76, "elapsed_time": "0:57:21", "remaining_time": "3:26:10", "throughput": 8725.65, "total_tokens": 30028192} +{"current_steps": 44550, "total_steps": 204665, "loss": 0.1594, "lr": 1.9168291823656804e-06, "epoch": 1.088363911758239, "percentage": 21.77, "elapsed_time": "0:57:21", "remaining_time": "3:26:09", "throughput": 8725.66, "total_tokens": 30031200} +{"current_steps": 44555, "total_steps": 204665, "loss": 0.0597, "lr": 1.916795129420713e-06, "epoch": 1.0884860625900863, "percentage": 21.77, "elapsed_time": "0:57:22", "remaining_time": "3:26:09", "throughput": 8725.67, "total_tokens": 30034208} +{"current_steps": 44560, "total_steps": 204665, "loss": 0.0384, "lr": 1.9167610698085647e-06, "epoch": 1.0886082134219335, "percentage": 21.77, "elapsed_time": "0:57:22", "remaining_time": "3:26:08", "throughput": 8725.81, "total_tokens": 30037792} +{"current_steps": 44565, "total_steps": 204665, "loss": 0.1703, "lr": 1.9167270035294833e-06, "epoch": 1.0887303642537807, "percentage": 21.77, "elapsed_time": "0:57:22", "remaining_time": "3:26:08", "throughput": 8725.91, "total_tokens": 30041184} +{"current_steps": 44570, "total_steps": 204665, "loss": 0.0229, "lr": 1.9166929305837164e-06, "epoch": 1.0888525150856276, "percentage": 21.78, "elapsed_time": "0:57:23", "remaining_time": "3:26:07", "throughput": 8725.98, "total_tokens": 30044448} +{"current_steps": 44575, "total_steps": 204665, "loss": 0.1048, "lr": 1.9166588509715123e-06, "epoch": 1.0889746659174748, "percentage": 21.78, "elapsed_time": "0:57:23", "remaining_time": "3:26:07", "throughput": 8726.09, "total_tokens": 30047904} +{"current_steps": 44580, "total_steps": 204665, "loss": 0.062, "lr": 1.916624764693118e-06, "epoch": 1.089096816749322, "percentage": 21.78, "elapsed_time": "0:57:23", "remaining_time": "3:26:06", "throughput": 8726.14, "total_tokens": 30051040} +{"current_steps": 44585, "total_steps": 204665, "loss": 0.001, "lr": 1.9165906717487824e-06, "epoch": 1.0892189675811692, "percentage": 21.78, "elapsed_time": "0:57:24", "remaining_time": "3:26:05", "throughput": 8726.18, "total_tokens": 30054176} +{"current_steps": 44590, "total_steps": 204665, "loss": 0.1293, "lr": 1.916556572138753e-06, "epoch": 1.0893411184130164, "percentage": 21.79, "elapsed_time": "0:57:24", "remaining_time": "3:26:05", "throughput": 8726.36, "total_tokens": 30057888} +{"current_steps": 44595, "total_steps": 204665, "loss": 0.1436, "lr": 1.916522465863277e-06, "epoch": 1.0894632692448636, "percentage": 21.79, "elapsed_time": "0:57:24", "remaining_time": "3:26:04", "throughput": 8726.41, "total_tokens": 30061088} +{"current_steps": 44600, "total_steps": 204665, "loss": 0.1403, "lr": 1.916488352922604e-06, "epoch": 1.0895854200767108, "percentage": 21.79, "elapsed_time": "0:57:25", "remaining_time": "3:26:04", "throughput": 8726.51, "total_tokens": 30064480} +{"current_steps": 44605, "total_steps": 204665, "loss": 0.1873, "lr": 1.9164542333169806e-06, "epoch": 1.089707570908558, "percentage": 21.79, "elapsed_time": "0:57:25", "remaining_time": "3:26:03", "throughput": 8726.54, "total_tokens": 30067552} +{"current_steps": 44610, "total_steps": 204665, "loss": 0.0671, "lr": 1.9164201070466556e-06, "epoch": 1.0898297217404052, "percentage": 21.8, "elapsed_time": "0:57:25", "remaining_time": "3:26:03", "throughput": 8726.61, "total_tokens": 30070816} +{"current_steps": 44615, "total_steps": 204665, "loss": 0.0633, "lr": 1.916385974111877e-06, "epoch": 1.0899518725722521, "percentage": 21.8, "elapsed_time": "0:57:26", "remaining_time": "3:26:02", "throughput": 8726.76, "total_tokens": 30074400} +{"current_steps": 44620, "total_steps": 204665, "loss": 0.0559, "lr": 1.9163518345128937e-06, "epoch": 1.0900740234040993, "percentage": 21.8, "elapsed_time": "0:57:26", "remaining_time": "3:26:02", "throughput": 8726.86, "total_tokens": 30077728} +{"current_steps": 44625, "total_steps": 204665, "loss": 0.0022, "lr": 1.9163176882499526e-06, "epoch": 1.0901961742359465, "percentage": 21.8, "elapsed_time": "0:57:26", "remaining_time": "3:26:01", "throughput": 8727.03, "total_tokens": 30081376} +{"current_steps": 44630, "total_steps": 204665, "loss": 0.0444, "lr": 1.9162835353233034e-06, "epoch": 1.0903183250677937, "percentage": 21.81, "elapsed_time": "0:57:27", "remaining_time": "3:26:01", "throughput": 8727.05, "total_tokens": 30084448} +{"current_steps": 44635, "total_steps": 204665, "loss": 0.1226, "lr": 1.9162493757331934e-06, "epoch": 1.0904404758996409, "percentage": 21.81, "elapsed_time": "0:57:27", "remaining_time": "3:26:00", "throughput": 8727.22, "total_tokens": 30088096} +{"current_steps": 44640, "total_steps": 204665, "loss": 0.0867, "lr": 1.9162152094798717e-06, "epoch": 1.090562626731488, "percentage": 21.81, "elapsed_time": "0:57:27", "remaining_time": "3:26:00", "throughput": 8727.34, "total_tokens": 30091616} +{"current_steps": 44645, "total_steps": 204665, "loss": 0.1927, "lr": 1.9161810365635867e-06, "epoch": 1.0906847775633353, "percentage": 21.81, "elapsed_time": "0:57:28", "remaining_time": "3:25:59", "throughput": 8727.4, "total_tokens": 30094816} +{"current_steps": 44650, "total_steps": 204665, "loss": 0.1116, "lr": 1.9161468569845867e-06, "epoch": 1.0908069283951825, "percentage": 21.82, "elapsed_time": "0:57:28", "remaining_time": "3:25:59", "throughput": 8727.45, "total_tokens": 30097952} +{"current_steps": 44655, "total_steps": 204665, "loss": 0.0607, "lr": 1.91611267074312e-06, "epoch": 1.0909290792270294, "percentage": 21.82, "elapsed_time": "0:57:29", "remaining_time": "3:25:58", "throughput": 8727.64, "total_tokens": 30101728} +{"current_steps": 44660, "total_steps": 204665, "loss": 0.1164, "lr": 1.9160784778394362e-06, "epoch": 1.0910512300588766, "percentage": 21.82, "elapsed_time": "0:57:29", "remaining_time": "3:25:58", "throughput": 8727.77, "total_tokens": 30105248} +{"current_steps": 44665, "total_steps": 204665, "loss": 0.012, "lr": 1.916044278273783e-06, "epoch": 1.0911733808907238, "percentage": 21.82, "elapsed_time": "0:57:29", "remaining_time": "3:25:57", "throughput": 8727.87, "total_tokens": 30108640} +{"current_steps": 44670, "total_steps": 204665, "loss": 0.0913, "lr": 1.916010072046409e-06, "epoch": 1.091295531722571, "percentage": 21.83, "elapsed_time": "0:57:30", "remaining_time": "3:25:57", "throughput": 8728.0, "total_tokens": 30112160} +{"current_steps": 44675, "total_steps": 204665, "loss": 0.1595, "lr": 1.915975859157564e-06, "epoch": 1.0914176825544182, "percentage": 21.83, "elapsed_time": "0:57:30", "remaining_time": "3:25:56", "throughput": 8728.11, "total_tokens": 30115552} +{"current_steps": 44680, "total_steps": 204665, "loss": 0.0373, "lr": 1.915941639607496e-06, "epoch": 1.0915398333862654, "percentage": 21.83, "elapsed_time": "0:57:30", "remaining_time": "3:25:56", "throughput": 8728.16, "total_tokens": 30118688} +{"current_steps": 44685, "total_steps": 204665, "loss": 0.0661, "lr": 1.915907413396454e-06, "epoch": 1.0916619842181126, "percentage": 21.83, "elapsed_time": "0:57:31", "remaining_time": "3:25:55", "throughput": 8728.22, "total_tokens": 30121888} +{"current_steps": 44690, "total_steps": 204665, "loss": 0.0781, "lr": 1.915873180524687e-06, "epoch": 1.0917841350499597, "percentage": 21.84, "elapsed_time": "0:57:31", "remaining_time": "3:25:54", "throughput": 8728.32, "total_tokens": 30125280} +{"current_steps": 44695, "total_steps": 204665, "loss": 0.0918, "lr": 1.9158389409924437e-06, "epoch": 1.091906285881807, "percentage": 21.84, "elapsed_time": "0:57:31", "remaining_time": "3:25:54", "throughput": 8728.49, "total_tokens": 30128992} +{"current_steps": 44700, "total_steps": 204665, "loss": 0.0144, "lr": 1.9158046947999737e-06, "epoch": 1.0920284367136541, "percentage": 21.84, "elapsed_time": "0:57:32", "remaining_time": "3:25:53", "throughput": 8728.61, "total_tokens": 30132448} +{"current_steps": 44705, "total_steps": 204665, "loss": 0.0917, "lr": 1.9157704419475255e-06, "epoch": 1.092150587545501, "percentage": 21.84, "elapsed_time": "0:57:32", "remaining_time": "3:25:53", "throughput": 8728.69, "total_tokens": 30135712} +{"current_steps": 44710, "total_steps": 204665, "loss": 0.0669, "lr": 1.915736182435348e-06, "epoch": 1.0922727383773483, "percentage": 21.85, "elapsed_time": "0:57:32", "remaining_time": "3:25:52", "throughput": 8728.83, "total_tokens": 30139296} +{"current_steps": 44715, "total_steps": 204665, "loss": 0.0049, "lr": 1.9157019162636906e-06, "epoch": 1.0923948892091955, "percentage": 21.85, "elapsed_time": "0:57:33", "remaining_time": "3:25:52", "throughput": 8728.93, "total_tokens": 30142688} +{"current_steps": 44720, "total_steps": 204665, "loss": 0.1349, "lr": 1.915667643432803e-06, "epoch": 1.0925170400410427, "percentage": 21.85, "elapsed_time": "0:57:33", "remaining_time": "3:25:51", "throughput": 8729.15, "total_tokens": 30146592} +{"current_steps": 44725, "total_steps": 204665, "loss": 0.2515, "lr": 1.915633363942934e-06, "epoch": 1.0926391908728899, "percentage": 21.85, "elapsed_time": "0:57:33", "remaining_time": "3:25:51", "throughput": 8729.25, "total_tokens": 30149984} +{"current_steps": 44730, "total_steps": 204665, "loss": 0.0047, "lr": 1.9155990777943325e-06, "epoch": 1.092761341704737, "percentage": 21.86, "elapsed_time": "0:57:34", "remaining_time": "3:25:50", "throughput": 8729.53, "total_tokens": 30154144} +{"current_steps": 44735, "total_steps": 204665, "loss": 0.1377, "lr": 1.9155647849872487e-06, "epoch": 1.0928834925365842, "percentage": 21.86, "elapsed_time": "0:57:34", "remaining_time": "3:25:50", "throughput": 8729.74, "total_tokens": 30157984} +{"current_steps": 44740, "total_steps": 204665, "loss": 0.2204, "lr": 1.9155304855219316e-06, "epoch": 1.0930056433684314, "percentage": 21.86, "elapsed_time": "0:57:34", "remaining_time": "3:25:49", "throughput": 8729.77, "total_tokens": 30161056} +{"current_steps": 44745, "total_steps": 204665, "loss": 0.0376, "lr": 1.91549617939863e-06, "epoch": 1.0931277942002784, "percentage": 21.86, "elapsed_time": "0:57:35", "remaining_time": "3:25:49", "throughput": 8729.93, "total_tokens": 30164704} +{"current_steps": 44750, "total_steps": 204665, "loss": 0.0082, "lr": 1.9154618666175942e-06, "epoch": 1.0932499450321256, "percentage": 21.86, "elapsed_time": "0:57:35", "remaining_time": "3:25:48", "throughput": 8730.09, "total_tokens": 30168352} +{"current_steps": 44755, "total_steps": 204665, "loss": 0.0475, "lr": 1.9154275471790733e-06, "epoch": 1.0933720958639728, "percentage": 21.87, "elapsed_time": "0:57:36", "remaining_time": "3:25:48", "throughput": 8730.21, "total_tokens": 30171808} +{"current_steps": 44760, "total_steps": 204665, "loss": 0.1045, "lr": 1.9153932210833173e-06, "epoch": 1.09349424669582, "percentage": 21.87, "elapsed_time": "0:57:36", "remaining_time": "3:25:47", "throughput": 8730.36, "total_tokens": 30175392} +{"current_steps": 44765, "total_steps": 204665, "loss": 0.1195, "lr": 1.9153588883305756e-06, "epoch": 1.0936163975276672, "percentage": 21.87, "elapsed_time": "0:57:36", "remaining_time": "3:25:47", "throughput": 8730.44, "total_tokens": 30178720} +{"current_steps": 44770, "total_steps": 204665, "loss": 0.0416, "lr": 1.9153245489210977e-06, "epoch": 1.0937385483595143, "percentage": 21.87, "elapsed_time": "0:57:37", "remaining_time": "3:25:46", "throughput": 8730.49, "total_tokens": 30181856} +{"current_steps": 44775, "total_steps": 204665, "loss": 0.0724, "lr": 1.9152902028551335e-06, "epoch": 1.0938606991913615, "percentage": 21.88, "elapsed_time": "0:57:37", "remaining_time": "3:25:46", "throughput": 8730.53, "total_tokens": 30184992} +{"current_steps": 44780, "total_steps": 204665, "loss": 0.0851, "lr": 1.915255850132933e-06, "epoch": 1.0939828500232087, "percentage": 21.88, "elapsed_time": "0:57:37", "remaining_time": "3:25:45", "throughput": 8730.67, "total_tokens": 30188512} +{"current_steps": 44785, "total_steps": 204665, "loss": 0.059, "lr": 1.915221490754746e-06, "epoch": 1.094105000855056, "percentage": 21.88, "elapsed_time": "0:57:38", "remaining_time": "3:25:45", "throughput": 8730.7, "total_tokens": 30191584} +{"current_steps": 44790, "total_steps": 204665, "loss": 0.1301, "lr": 1.9151871247208214e-06, "epoch": 1.094227151686903, "percentage": 21.88, "elapsed_time": "0:57:38", "remaining_time": "3:25:44", "throughput": 8730.72, "total_tokens": 30194592} +{"current_steps": 44795, "total_steps": 204665, "loss": 0.2542, "lr": 1.9151527520314105e-06, "epoch": 1.09434930251875, "percentage": 21.89, "elapsed_time": "0:57:38", "remaining_time": "3:25:44", "throughput": 8730.95, "total_tokens": 30198560} +{"current_steps": 44800, "total_steps": 204665, "loss": 0.0945, "lr": 1.9151183726867623e-06, "epoch": 1.0944714533505973, "percentage": 21.89, "elapsed_time": "0:57:39", "remaining_time": "3:25:43", "throughput": 8731.02, "total_tokens": 30201824} +{"current_steps": 44805, "total_steps": 204665, "loss": 0.0295, "lr": 1.9150839866871273e-06, "epoch": 1.0945936041824444, "percentage": 21.89, "elapsed_time": "0:57:39", "remaining_time": "3:25:43", "throughput": 8731.17, "total_tokens": 30205408} +{"current_steps": 44810, "total_steps": 204665, "loss": 0.0894, "lr": 1.9150495940327556e-06, "epoch": 1.0947157550142916, "percentage": 21.89, "elapsed_time": "0:57:39", "remaining_time": "3:25:42", "throughput": 8731.26, "total_tokens": 30208736} +{"current_steps": 44815, "total_steps": 204665, "loss": 0.1374, "lr": 1.915015194723897e-06, "epoch": 1.0948379058461388, "percentage": 21.9, "elapsed_time": "0:57:40", "remaining_time": "3:25:42", "throughput": 8731.3, "total_tokens": 30211872} +{"current_steps": 44820, "total_steps": 204665, "loss": 0.0371, "lr": 1.9149807887608012e-06, "epoch": 1.094960056677986, "percentage": 21.9, "elapsed_time": "0:57:40", "remaining_time": "3:25:41", "throughput": 8731.39, "total_tokens": 30215200} +{"current_steps": 44825, "total_steps": 204665, "loss": 0.1393, "lr": 1.9149463761437196e-06, "epoch": 1.0950822075098332, "percentage": 21.9, "elapsed_time": "0:57:40", "remaining_time": "3:25:40", "throughput": 8731.42, "total_tokens": 30218272} +{"current_steps": 44830, "total_steps": 204665, "loss": 0.0718, "lr": 1.914911956872902e-06, "epoch": 1.0952043583416804, "percentage": 21.9, "elapsed_time": "0:57:41", "remaining_time": "3:25:40", "throughput": 8731.5, "total_tokens": 30221600} +{"current_steps": 44835, "total_steps": 204665, "loss": 0.0227, "lr": 1.9148775309485982e-06, "epoch": 1.0953265091735274, "percentage": 21.91, "elapsed_time": "0:57:41", "remaining_time": "3:25:39", "throughput": 8731.63, "total_tokens": 30225120} +{"current_steps": 44840, "total_steps": 204665, "loss": 0.0023, "lr": 1.914843098371059e-06, "epoch": 1.0954486600053746, "percentage": 21.91, "elapsed_time": "0:57:41", "remaining_time": "3:25:39", "throughput": 8731.75, "total_tokens": 30228576} +{"current_steps": 44845, "total_steps": 204665, "loss": 0.0347, "lr": 1.914808659140535e-06, "epoch": 1.0955708108372217, "percentage": 21.91, "elapsed_time": "0:57:42", "remaining_time": "3:25:38", "throughput": 8731.91, "total_tokens": 30232224} +{"current_steps": 44850, "total_steps": 204665, "loss": 0.0849, "lr": 1.9147742132572763e-06, "epoch": 1.095692961669069, "percentage": 21.91, "elapsed_time": "0:57:42", "remaining_time": "3:25:38", "throughput": 8731.94, "total_tokens": 30235296} +{"current_steps": 44855, "total_steps": 204665, "loss": 0.0941, "lr": 1.914739760721533e-06, "epoch": 1.0958151125009161, "percentage": 21.92, "elapsed_time": "0:57:42", "remaining_time": "3:25:37", "throughput": 8731.98, "total_tokens": 30238432} +{"current_steps": 44860, "total_steps": 204665, "loss": 0.031, "lr": 1.9147053015335568e-06, "epoch": 1.0959372633327633, "percentage": 21.92, "elapsed_time": "0:57:43", "remaining_time": "3:25:37", "throughput": 8732.26, "total_tokens": 30242592} +{"current_steps": 44865, "total_steps": 204665, "loss": 0.1462, "lr": 1.9146708356935974e-06, "epoch": 1.0960594141646105, "percentage": 21.92, "elapsed_time": "0:57:43", "remaining_time": "3:25:36", "throughput": 8732.3, "total_tokens": 30245728} +{"current_steps": 44870, "total_steps": 204665, "loss": 0.1299, "lr": 1.9146363632019053e-06, "epoch": 1.0961815649964577, "percentage": 21.92, "elapsed_time": "0:57:44", "remaining_time": "3:25:36", "throughput": 8732.47, "total_tokens": 30249376} +{"current_steps": 44875, "total_steps": 204665, "loss": 0.1636, "lr": 1.914601884058732e-06, "epoch": 1.0963037158283049, "percentage": 21.93, "elapsed_time": "0:57:44", "remaining_time": "3:25:35", "throughput": 8732.55, "total_tokens": 30252640} +{"current_steps": 44880, "total_steps": 204665, "loss": 0.003, "lr": 1.9145673982643276e-06, "epoch": 1.096425866660152, "percentage": 21.93, "elapsed_time": "0:57:44", "remaining_time": "3:25:35", "throughput": 8732.7, "total_tokens": 30256288} +{"current_steps": 44885, "total_steps": 204665, "loss": 0.0457, "lr": 1.914532905818943e-06, "epoch": 1.096548017491999, "percentage": 21.93, "elapsed_time": "0:57:45", "remaining_time": "3:25:34", "throughput": 8732.71, "total_tokens": 30259296} +{"current_steps": 44890, "total_steps": 204665, "loss": 0.1412, "lr": 1.914498406722829e-06, "epoch": 1.0966701683238462, "percentage": 21.93, "elapsed_time": "0:57:45", "remaining_time": "3:25:34", "throughput": 8732.8, "total_tokens": 30262624} +{"current_steps": 44895, "total_steps": 204665, "loss": 0.1249, "lr": 1.914463900976237e-06, "epoch": 1.0967923191556934, "percentage": 21.94, "elapsed_time": "0:57:45", "remaining_time": "3:25:33", "throughput": 8732.83, "total_tokens": 30265760} +{"current_steps": 44900, "total_steps": 204665, "loss": 0.164, "lr": 1.9144293885794177e-06, "epoch": 1.0969144699875406, "percentage": 21.94, "elapsed_time": "0:57:46", "remaining_time": "3:25:33", "throughput": 8732.89, "total_tokens": 30268960} +{"current_steps": 44905, "total_steps": 204665, "loss": 0.0011, "lr": 1.9143948695326217e-06, "epoch": 1.0970366208193878, "percentage": 21.94, "elapsed_time": "0:57:46", "remaining_time": "3:25:32", "throughput": 8733.02, "total_tokens": 30272480} +{"current_steps": 44910, "total_steps": 204665, "loss": 0.0457, "lr": 1.9143603438361e-06, "epoch": 1.097158771651235, "percentage": 21.94, "elapsed_time": "0:57:46", "remaining_time": "3:25:32", "throughput": 8733.04, "total_tokens": 30275488} +{"current_steps": 44915, "total_steps": 204665, "loss": 0.0966, "lr": 1.914325811490104e-06, "epoch": 1.0972809224830822, "percentage": 21.95, "elapsed_time": "0:57:47", "remaining_time": "3:25:31", "throughput": 8733.13, "total_tokens": 30278816} +{"current_steps": 44920, "total_steps": 204665, "loss": 0.1837, "lr": 1.914291272494885e-06, "epoch": 1.0974030733149294, "percentage": 21.95, "elapsed_time": "0:57:47", "remaining_time": "3:25:31", "throughput": 8733.19, "total_tokens": 30282016} +{"current_steps": 44925, "total_steps": 204665, "loss": 0.0419, "lr": 1.914256726850694e-06, "epoch": 1.0975252241467763, "percentage": 21.95, "elapsed_time": "0:57:47", "remaining_time": "3:25:30", "throughput": 8733.28, "total_tokens": 30285344} +{"current_steps": 44930, "total_steps": 204665, "loss": 0.1165, "lr": 1.914222174557782e-06, "epoch": 1.0976473749786235, "percentage": 21.95, "elapsed_time": "0:57:48", "remaining_time": "3:25:29", "throughput": 8733.35, "total_tokens": 30288608} +{"current_steps": 44935, "total_steps": 204665, "loss": 0.1182, "lr": 1.9141876156164006e-06, "epoch": 1.0977695258104707, "percentage": 21.96, "elapsed_time": "0:57:48", "remaining_time": "3:25:29", "throughput": 8733.45, "total_tokens": 30292000} +{"current_steps": 44940, "total_steps": 204665, "loss": 0.0429, "lr": 1.914153050026801e-06, "epoch": 1.097891676642318, "percentage": 21.96, "elapsed_time": "0:57:48", "remaining_time": "3:25:28", "throughput": 8733.64, "total_tokens": 30295712} +{"current_steps": 44945, "total_steps": 204665, "loss": 0.085, "lr": 1.914118477789234e-06, "epoch": 1.098013827474165, "percentage": 21.96, "elapsed_time": "0:57:49", "remaining_time": "3:25:28", "throughput": 8733.8, "total_tokens": 30299296} +{"current_steps": 44950, "total_steps": 204665, "loss": 0.0929, "lr": 1.914083898903952e-06, "epoch": 1.0981359783060123, "percentage": 21.96, "elapsed_time": "0:57:49", "remaining_time": "3:25:27", "throughput": 8734.0, "total_tokens": 30303136} +{"current_steps": 44955, "total_steps": 204665, "loss": 0.0196, "lr": 1.914049313371206e-06, "epoch": 1.0982581291378595, "percentage": 21.97, "elapsed_time": "0:57:49", "remaining_time": "3:25:27", "throughput": 8734.09, "total_tokens": 30306464} +{"current_steps": 44960, "total_steps": 204665, "loss": 0.0349, "lr": 1.914014721191248e-06, "epoch": 1.0983802799697067, "percentage": 21.97, "elapsed_time": "0:57:50", "remaining_time": "3:25:26", "throughput": 8734.17, "total_tokens": 30309728} +{"current_steps": 44965, "total_steps": 204665, "loss": 0.1206, "lr": 1.9139801223643283e-06, "epoch": 1.0985024308015539, "percentage": 21.97, "elapsed_time": "0:57:50", "remaining_time": "3:25:26", "throughput": 8734.25, "total_tokens": 30313056} +{"current_steps": 44970, "total_steps": 204665, "loss": 0.0054, "lr": 1.9139455168907e-06, "epoch": 1.0986245816334008, "percentage": 21.97, "elapsed_time": "0:57:50", "remaining_time": "3:25:25", "throughput": 8734.34, "total_tokens": 30316384} +{"current_steps": 44975, "total_steps": 204665, "loss": 0.1733, "lr": 1.9139109047706134e-06, "epoch": 1.098746732465248, "percentage": 21.97, "elapsed_time": "0:57:51", "remaining_time": "3:25:25", "throughput": 8734.45, "total_tokens": 30319776} +{"current_steps": 44980, "total_steps": 204665, "loss": 0.1076, "lr": 1.9138762860043213e-06, "epoch": 1.0988688832970952, "percentage": 21.98, "elapsed_time": "0:57:51", "remaining_time": "3:25:24", "throughput": 8734.49, "total_tokens": 30322912} +{"current_steps": 44985, "total_steps": 204665, "loss": 0.0892, "lr": 1.913841660592075e-06, "epoch": 1.0989910341289424, "percentage": 21.98, "elapsed_time": "0:57:51", "remaining_time": "3:25:24", "throughput": 8734.52, "total_tokens": 30325984} +{"current_steps": 44990, "total_steps": 204665, "loss": 0.2093, "lr": 1.913807028534126e-06, "epoch": 1.0991131849607896, "percentage": 21.98, "elapsed_time": "0:57:52", "remaining_time": "3:25:23", "throughput": 8734.56, "total_tokens": 30329120} +{"current_steps": 44995, "total_steps": 204665, "loss": 0.0831, "lr": 1.9137723898307275e-06, "epoch": 1.0992353357926368, "percentage": 21.98, "elapsed_time": "0:57:52", "remaining_time": "3:25:23", "throughput": 8734.6, "total_tokens": 30332256} +{"current_steps": 45000, "total_steps": 204665, "loss": 0.0393, "lr": 1.9137377444821296e-06, "epoch": 1.099357486624484, "percentage": 21.99, "elapsed_time": "0:57:52", "remaining_time": "3:25:22", "throughput": 8734.6, "total_tokens": 30335200} +{"current_steps": 45005, "total_steps": 204665, "loss": 0.0218, "lr": 1.913703092488585e-06, "epoch": 1.0994796374563311, "percentage": 21.99, "elapsed_time": "0:57:53", "remaining_time": "3:25:22", "throughput": 8734.72, "total_tokens": 30338656} +{"current_steps": 45010, "total_steps": 204665, "loss": 0.0289, "lr": 1.9136684338503463e-06, "epoch": 1.0996017882881783, "percentage": 21.99, "elapsed_time": "0:57:53", "remaining_time": "3:25:21", "throughput": 8734.8, "total_tokens": 30341920} +{"current_steps": 45015, "total_steps": 204665, "loss": 0.0406, "lr": 1.9136337685676644e-06, "epoch": 1.0997239391200253, "percentage": 21.99, "elapsed_time": "0:57:54", "remaining_time": "3:25:20", "throughput": 8734.88, "total_tokens": 30345248} +{"current_steps": 45020, "total_steps": 204665, "loss": 0.0512, "lr": 1.9135990966407926e-06, "epoch": 1.0998460899518725, "percentage": 22.0, "elapsed_time": "0:57:54", "remaining_time": "3:25:20", "throughput": 8735.42, "total_tokens": 30350624} +{"current_steps": 45025, "total_steps": 204665, "loss": 0.0966, "lr": 1.913564418069982e-06, "epoch": 1.0999682407837197, "percentage": 22.0, "elapsed_time": "0:57:54", "remaining_time": "3:25:20", "throughput": 8735.6, "total_tokens": 30354336} +{"current_steps": 45030, "total_steps": 204665, "loss": 0.0896, "lr": 1.9135297328554853e-06, "epoch": 1.1000903916155669, "percentage": 22.0, "elapsed_time": "0:57:55", "remaining_time": "3:25:19", "throughput": 8735.62, "total_tokens": 30357344} +{"current_steps": 45035, "total_steps": 204665, "loss": 0.0729, "lr": 1.9134950409975547e-06, "epoch": 1.100212542447414, "percentage": 22.0, "elapsed_time": "0:57:55", "remaining_time": "3:25:19", "throughput": 8735.7, "total_tokens": 30360608} +{"current_steps": 45040, "total_steps": 204665, "loss": 0.0882, "lr": 1.9134603424964425e-06, "epoch": 1.1003346932792613, "percentage": 22.01, "elapsed_time": "0:57:55", "remaining_time": "3:25:18", "throughput": 8735.81, "total_tokens": 30364000} +{"current_steps": 45045, "total_steps": 204665, "loss": 0.2351, "lr": 1.9134256373524008e-06, "epoch": 1.1004568441111084, "percentage": 22.01, "elapsed_time": "0:57:56", "remaining_time": "3:25:18", "throughput": 8735.96, "total_tokens": 30367648} +{"current_steps": 45050, "total_steps": 204665, "loss": 0.03, "lr": 1.9133909255656822e-06, "epoch": 1.1005789949429556, "percentage": 22.01, "elapsed_time": "0:57:56", "remaining_time": "3:25:17", "throughput": 8736.06, "total_tokens": 30371040} +{"current_steps": 45055, "total_steps": 204665, "loss": 0.0026, "lr": 1.91335620713654e-06, "epoch": 1.1007011457748028, "percentage": 22.01, "elapsed_time": "0:57:56", "remaining_time": "3:25:17", "throughput": 8736.31, "total_tokens": 30375072} +{"current_steps": 45060, "total_steps": 204665, "loss": 0.1178, "lr": 1.9133214820652247e-06, "epoch": 1.1008232966066498, "percentage": 22.02, "elapsed_time": "0:57:57", "remaining_time": "3:25:16", "throughput": 8736.4, "total_tokens": 30378400} +{"current_steps": 45065, "total_steps": 204665, "loss": 0.101, "lr": 1.91328675035199e-06, "epoch": 1.100945447438497, "percentage": 22.02, "elapsed_time": "0:57:57", "remaining_time": "3:25:16", "throughput": 8736.66, "total_tokens": 30382496} +{"current_steps": 45070, "total_steps": 204665, "loss": 0.1579, "lr": 1.913252011997089e-06, "epoch": 1.1010675982703442, "percentage": 22.02, "elapsed_time": "0:57:57", "remaining_time": "3:25:15", "throughput": 8736.77, "total_tokens": 30385952} +{"current_steps": 45075, "total_steps": 204665, "loss": 0.0704, "lr": 1.913217267000773e-06, "epoch": 1.1011897491021914, "percentage": 22.02, "elapsed_time": "0:57:58", "remaining_time": "3:25:14", "throughput": 8736.8, "total_tokens": 30389024} +{"current_steps": 45080, "total_steps": 204665, "loss": 0.0606, "lr": 1.913182515363296e-06, "epoch": 1.1013118999340386, "percentage": 22.03, "elapsed_time": "0:57:58", "remaining_time": "3:25:14", "throughput": 8736.88, "total_tokens": 30392288} +{"current_steps": 45085, "total_steps": 204665, "loss": 0.1839, "lr": 1.9131477570849103e-06, "epoch": 1.1014340507658857, "percentage": 22.03, "elapsed_time": "0:57:58", "remaining_time": "3:25:13", "throughput": 8736.96, "total_tokens": 30395616} +{"current_steps": 45090, "total_steps": 204665, "loss": 0.0551, "lr": 1.913112992165868e-06, "epoch": 1.101556201597733, "percentage": 22.03, "elapsed_time": "0:57:59", "remaining_time": "3:25:13", "throughput": 8737.05, "total_tokens": 30398944} +{"current_steps": 45095, "total_steps": 204665, "loss": 0.1362, "lr": 1.9130782206064228e-06, "epoch": 1.1016783524295801, "percentage": 22.03, "elapsed_time": "0:57:59", "remaining_time": "3:25:12", "throughput": 8737.18, "total_tokens": 30402464} +{"current_steps": 45100, "total_steps": 204665, "loss": 0.0087, "lr": 1.9130434424068265e-06, "epoch": 1.1018005032614273, "percentage": 22.04, "elapsed_time": "0:58:00", "remaining_time": "3:25:12", "throughput": 8737.31, "total_tokens": 30405984} +{"current_steps": 45105, "total_steps": 204665, "loss": 0.1299, "lr": 1.9130086575673335e-06, "epoch": 1.1019226540932743, "percentage": 22.04, "elapsed_time": "0:58:00", "remaining_time": "3:25:11", "throughput": 8737.46, "total_tokens": 30409568} +{"current_steps": 45110, "total_steps": 204665, "loss": 0.0292, "lr": 1.9129738660881956e-06, "epoch": 1.1020448049251215, "percentage": 22.04, "elapsed_time": "0:58:00", "remaining_time": "3:25:11", "throughput": 8737.61, "total_tokens": 30413152} +{"current_steps": 45115, "total_steps": 204665, "loss": 0.0517, "lr": 1.9129390679696663e-06, "epoch": 1.1021669557569687, "percentage": 22.04, "elapsed_time": "0:58:01", "remaining_time": "3:25:10", "throughput": 8737.77, "total_tokens": 30416800} +{"current_steps": 45120, "total_steps": 204665, "loss": 0.0245, "lr": 1.9129042632119986e-06, "epoch": 1.1022891065888158, "percentage": 22.05, "elapsed_time": "0:58:01", "remaining_time": "3:25:10", "throughput": 8737.92, "total_tokens": 30420384} +{"current_steps": 45125, "total_steps": 204665, "loss": 0.0089, "lr": 1.9128694518154456e-06, "epoch": 1.102411257420663, "percentage": 22.05, "elapsed_time": "0:58:01", "remaining_time": "3:25:09", "throughput": 8738.02, "total_tokens": 30423776} +{"current_steps": 45130, "total_steps": 204665, "loss": 0.084, "lr": 1.91283463378026e-06, "epoch": 1.1025334082525102, "percentage": 22.05, "elapsed_time": "0:58:02", "remaining_time": "3:25:09", "throughput": 8738.12, "total_tokens": 30427168} +{"current_steps": 45135, "total_steps": 204665, "loss": 0.0005, "lr": 1.912799809106696e-06, "epoch": 1.1026555590843574, "percentage": 22.05, "elapsed_time": "0:58:02", "remaining_time": "3:25:08", "throughput": 8738.1, "total_tokens": 30430048} +{"current_steps": 45140, "total_steps": 204665, "loss": 0.0656, "lr": 1.912764977795006e-06, "epoch": 1.1027777099162046, "percentage": 22.06, "elapsed_time": "0:58:02", "remaining_time": "3:25:08", "throughput": 8738.11, "total_tokens": 30433056} +{"current_steps": 45145, "total_steps": 204665, "loss": 0.1529, "lr": 1.9127301398454436e-06, "epoch": 1.1028998607480518, "percentage": 22.06, "elapsed_time": "0:58:03", "remaining_time": "3:25:07", "throughput": 8738.14, "total_tokens": 30436128} +{"current_steps": 45150, "total_steps": 204665, "loss": 0.1235, "lr": 1.912695295258262e-06, "epoch": 1.1030220115798988, "percentage": 22.06, "elapsed_time": "0:58:03", "remaining_time": "3:25:07", "throughput": 8738.2, "total_tokens": 30439328} +{"current_steps": 45155, "total_steps": 204665, "loss": 0.0019, "lr": 1.9126604440337145e-06, "epoch": 1.103144162411746, "percentage": 22.06, "elapsed_time": "0:58:03", "remaining_time": "3:25:06", "throughput": 8738.37, "total_tokens": 30443040} +{"current_steps": 45160, "total_steps": 204665, "loss": 0.1519, "lr": 1.9126255861720552e-06, "epoch": 1.1032663132435931, "percentage": 22.07, "elapsed_time": "0:58:04", "remaining_time": "3:25:06", "throughput": 8738.36, "total_tokens": 30445920} +{"current_steps": 45165, "total_steps": 204665, "loss": 0.0032, "lr": 1.912590721673537e-06, "epoch": 1.1033884640754403, "percentage": 22.07, "elapsed_time": "0:58:04", "remaining_time": "3:25:05", "throughput": 8738.41, "total_tokens": 30449120} +{"current_steps": 45170, "total_steps": 204665, "loss": 0.0689, "lr": 1.912555850538414e-06, "epoch": 1.1035106149072875, "percentage": 22.07, "elapsed_time": "0:58:04", "remaining_time": "3:25:05", "throughput": 8738.51, "total_tokens": 30452512} +{"current_steps": 45175, "total_steps": 204665, "loss": 0.0416, "lr": 1.9125209727669385e-06, "epoch": 1.1036327657391347, "percentage": 22.07, "elapsed_time": "0:58:05", "remaining_time": "3:25:04", "throughput": 8738.68, "total_tokens": 30456224} +{"current_steps": 45180, "total_steps": 204665, "loss": 0.0878, "lr": 1.912486088359366e-06, "epoch": 1.103754916570982, "percentage": 22.08, "elapsed_time": "0:58:05", "remaining_time": "3:25:04", "throughput": 8738.74, "total_tokens": 30459424} +{"current_steps": 45185, "total_steps": 204665, "loss": 0.0887, "lr": 1.9124511973159486e-06, "epoch": 1.103877067402829, "percentage": 22.08, "elapsed_time": "0:58:05", "remaining_time": "3:25:03", "throughput": 8738.82, "total_tokens": 30462688} +{"current_steps": 45190, "total_steps": 204665, "loss": 0.0715, "lr": 1.912416299636941e-06, "epoch": 1.103999218234676, "percentage": 22.08, "elapsed_time": "0:58:06", "remaining_time": "3:25:02", "throughput": 8738.96, "total_tokens": 30466272} +{"current_steps": 45195, "total_steps": 204665, "loss": 0.1073, "lr": 1.912381395322597e-06, "epoch": 1.1041213690665233, "percentage": 22.08, "elapsed_time": "0:58:06", "remaining_time": "3:25:02", "throughput": 8739.03, "total_tokens": 30469536} +{"current_steps": 45200, "total_steps": 204665, "loss": 0.0988, "lr": 1.912346484373169e-06, "epoch": 1.1042435198983704, "percentage": 22.08, "elapsed_time": "0:58:06", "remaining_time": "3:25:01", "throughput": 8739.06, "total_tokens": 30472608} +{"current_steps": 45205, "total_steps": 204665, "loss": 0.0773, "lr": 1.912311566788913e-06, "epoch": 1.1043656707302176, "percentage": 22.09, "elapsed_time": "0:58:07", "remaining_time": "3:25:01", "throughput": 8739.11, "total_tokens": 30475808} +{"current_steps": 45210, "total_steps": 204665, "loss": 0.0827, "lr": 1.9122766425700816e-06, "epoch": 1.1044878215620648, "percentage": 22.09, "elapsed_time": "0:58:07", "remaining_time": "3:25:00", "throughput": 8739.22, "total_tokens": 30479200} +{"current_steps": 45215, "total_steps": 204665, "loss": 0.1808, "lr": 1.912241711716929e-06, "epoch": 1.104609972393912, "percentage": 22.09, "elapsed_time": "0:58:07", "remaining_time": "3:25:00", "throughput": 8739.39, "total_tokens": 30482912} +{"current_steps": 45220, "total_steps": 204665, "loss": 0.0685, "lr": 1.9122067742297093e-06, "epoch": 1.1047321232257592, "percentage": 22.09, "elapsed_time": "0:58:08", "remaining_time": "3:24:59", "throughput": 8739.55, "total_tokens": 30486560} +{"current_steps": 45225, "total_steps": 204665, "loss": 0.1057, "lr": 1.9121718301086766e-06, "epoch": 1.1048542740576064, "percentage": 22.1, "elapsed_time": "0:58:08", "remaining_time": "3:24:59", "throughput": 8739.67, "total_tokens": 30490016} +{"current_steps": 45230, "total_steps": 204665, "loss": 0.1027, "lr": 1.912136879354085e-06, "epoch": 1.1049764248894536, "percentage": 22.1, "elapsed_time": "0:58:09", "remaining_time": "3:24:58", "throughput": 8739.77, "total_tokens": 30493408} +{"current_steps": 45235, "total_steps": 204665, "loss": 0.1223, "lr": 1.912101921966189e-06, "epoch": 1.1050985757213008, "percentage": 22.1, "elapsed_time": "0:58:09", "remaining_time": "3:24:58", "throughput": 8739.99, "total_tokens": 30497312} +{"current_steps": 45240, "total_steps": 204665, "loss": 0.0408, "lr": 1.912066957945242e-06, "epoch": 1.1052207265531477, "percentage": 22.1, "elapsed_time": "0:58:09", "remaining_time": "3:24:57", "throughput": 8740.09, "total_tokens": 30500704} +{"current_steps": 45245, "total_steps": 204665, "loss": 0.1212, "lr": 1.912031987291499e-06, "epoch": 1.105342877384995, "percentage": 22.11, "elapsed_time": "0:58:10", "remaining_time": "3:24:57", "throughput": 8740.15, "total_tokens": 30503904} +{"current_steps": 45250, "total_steps": 204665, "loss": 0.1472, "lr": 1.911997010005214e-06, "epoch": 1.1054650282168421, "percentage": 22.11, "elapsed_time": "0:58:10", "remaining_time": "3:24:56", "throughput": 8740.25, "total_tokens": 30507296} +{"current_steps": 45255, "total_steps": 204665, "loss": 0.0507, "lr": 1.9119620260866415e-06, "epoch": 1.1055871790486893, "percentage": 22.11, "elapsed_time": "0:58:10", "remaining_time": "3:24:56", "throughput": 8740.49, "total_tokens": 30511264} +{"current_steps": 45260, "total_steps": 204665, "loss": 0.132, "lr": 1.911927035536036e-06, "epoch": 1.1057093298805365, "percentage": 22.11, "elapsed_time": "0:58:11", "remaining_time": "3:24:55", "throughput": 8740.5, "total_tokens": 30514272} +{"current_steps": 45265, "total_steps": 204665, "loss": 0.0812, "lr": 1.9118920383536515e-06, "epoch": 1.1058314807123837, "percentage": 22.12, "elapsed_time": "0:58:11", "remaining_time": "3:24:55", "throughput": 8740.56, "total_tokens": 30517472} +{"current_steps": 45270, "total_steps": 204665, "loss": 0.056, "lr": 1.911857034539743e-06, "epoch": 1.1059536315442309, "percentage": 22.12, "elapsed_time": "0:58:11", "remaining_time": "3:24:54", "throughput": 8740.65, "total_tokens": 30520800} +{"current_steps": 45275, "total_steps": 204665, "loss": 0.0698, "lr": 1.911822024094565e-06, "epoch": 1.106075782376078, "percentage": 22.12, "elapsed_time": "0:58:12", "remaining_time": "3:24:54", "throughput": 8740.9, "total_tokens": 30524832} +{"current_steps": 45280, "total_steps": 204665, "loss": 0.0387, "lr": 1.9117870070183718e-06, "epoch": 1.106197933207925, "percentage": 22.12, "elapsed_time": "0:58:12", "remaining_time": "3:24:53", "throughput": 8740.99, "total_tokens": 30528160} +{"current_steps": 45285, "total_steps": 204665, "loss": 0.0378, "lr": 1.9117519833114185e-06, "epoch": 1.1063200840397722, "percentage": 22.13, "elapsed_time": "0:58:12", "remaining_time": "3:24:53", "throughput": 8740.97, "total_tokens": 30531040} +{"current_steps": 45290, "total_steps": 204665, "loss": 0.1398, "lr": 1.9117169529739595e-06, "epoch": 1.1064422348716194, "percentage": 22.13, "elapsed_time": "0:58:13", "remaining_time": "3:24:52", "throughput": 8741.04, "total_tokens": 30534304} +{"current_steps": 45295, "total_steps": 204665, "loss": 0.0015, "lr": 1.9116819160062493e-06, "epoch": 1.1065643857034666, "percentage": 22.13, "elapsed_time": "0:58:13", "remaining_time": "3:24:52", "throughput": 8741.08, "total_tokens": 30537440} +{"current_steps": 45300, "total_steps": 204665, "loss": 0.0918, "lr": 1.9116468724085433e-06, "epoch": 1.1066865365353138, "percentage": 22.13, "elapsed_time": "0:58:13", "remaining_time": "3:24:51", "throughput": 8741.35, "total_tokens": 30541536} +{"current_steps": 45305, "total_steps": 204665, "loss": 0.1732, "lr": 1.9116118221810956e-06, "epoch": 1.106808687367161, "percentage": 22.14, "elapsed_time": "0:58:14", "remaining_time": "3:24:51", "throughput": 8741.47, "total_tokens": 30544992} +{"current_steps": 45310, "total_steps": 204665, "loss": 0.2395, "lr": 1.911576765324162e-06, "epoch": 1.1069308381990082, "percentage": 22.14, "elapsed_time": "0:58:14", "remaining_time": "3:24:50", "throughput": 8741.51, "total_tokens": 30548128} +{"current_steps": 45315, "total_steps": 204665, "loss": 0.1195, "lr": 1.911541701837997e-06, "epoch": 1.1070529890308554, "percentage": 22.14, "elapsed_time": "0:58:14", "remaining_time": "3:24:49", "throughput": 8741.49, "total_tokens": 30551008} +{"current_steps": 45320, "total_steps": 204665, "loss": 0.0957, "lr": 1.9115066317228552e-06, "epoch": 1.1071751398627026, "percentage": 22.14, "elapsed_time": "0:58:15", "remaining_time": "3:24:49", "throughput": 8741.64, "total_tokens": 30554592} +{"current_steps": 45325, "total_steps": 204665, "loss": 0.1277, "lr": 1.911471554978992e-06, "epoch": 1.1072972906945497, "percentage": 22.15, "elapsed_time": "0:58:15", "remaining_time": "3:24:48", "throughput": 8741.81, "total_tokens": 30558304} +{"current_steps": 45330, "total_steps": 204665, "loss": 0.0031, "lr": 1.911436471606663e-06, "epoch": 1.1074194415263967, "percentage": 22.15, "elapsed_time": "0:58:16", "remaining_time": "3:24:48", "throughput": 8742.01, "total_tokens": 30562080} +{"current_steps": 45335, "total_steps": 204665, "loss": 0.1244, "lr": 1.9114013816061222e-06, "epoch": 1.107541592358244, "percentage": 22.15, "elapsed_time": "0:58:16", "remaining_time": "3:24:47", "throughput": 8742.08, "total_tokens": 30565344} +{"current_steps": 45340, "total_steps": 204665, "loss": 0.0315, "lr": 1.911366284977626e-06, "epoch": 1.107663743190091, "percentage": 22.15, "elapsed_time": "0:58:16", "remaining_time": "3:24:47", "throughput": 8742.28, "total_tokens": 30569184} +{"current_steps": 45345, "total_steps": 204665, "loss": 0.0021, "lr": 1.9113311817214287e-06, "epoch": 1.1077858940219383, "percentage": 22.16, "elapsed_time": "0:58:17", "remaining_time": "3:24:46", "throughput": 8742.31, "total_tokens": 30572256} +{"current_steps": 45350, "total_steps": 204665, "loss": 0.1518, "lr": 1.911296071837786e-06, "epoch": 1.1079080448537855, "percentage": 22.16, "elapsed_time": "0:58:17", "remaining_time": "3:24:46", "throughput": 8742.43, "total_tokens": 30575712} +{"current_steps": 45355, "total_steps": 204665, "loss": 0.0362, "lr": 1.911260955326953e-06, "epoch": 1.1080301956856327, "percentage": 22.16, "elapsed_time": "0:58:17", "remaining_time": "3:24:45", "throughput": 8742.55, "total_tokens": 30579232} +{"current_steps": 45360, "total_steps": 204665, "loss": 0.1136, "lr": 1.9112258321891858e-06, "epoch": 1.1081523465174798, "percentage": 22.16, "elapsed_time": "0:58:18", "remaining_time": "3:24:45", "throughput": 8742.76, "total_tokens": 30583072} +{"current_steps": 45365, "total_steps": 204665, "loss": 0.0033, "lr": 1.9111907024247387e-06, "epoch": 1.108274497349327, "percentage": 22.17, "elapsed_time": "0:58:18", "remaining_time": "3:24:44", "throughput": 8742.83, "total_tokens": 30586336} +{"current_steps": 45370, "total_steps": 204665, "loss": 0.0824, "lr": 1.9111555660338677e-06, "epoch": 1.108396648181174, "percentage": 22.17, "elapsed_time": "0:58:18", "remaining_time": "3:24:44", "throughput": 8742.89, "total_tokens": 30589600} +{"current_steps": 45375, "total_steps": 204665, "loss": 0.1192, "lr": 1.9111204230168287e-06, "epoch": 1.1085187990130212, "percentage": 22.17, "elapsed_time": "0:58:19", "remaining_time": "3:24:43", "throughput": 8743.05, "total_tokens": 30593248} +{"current_steps": 45380, "total_steps": 204665, "loss": 0.0516, "lr": 1.9110852733738766e-06, "epoch": 1.1086409498448684, "percentage": 22.17, "elapsed_time": "0:58:19", "remaining_time": "3:24:43", "throughput": 8743.09, "total_tokens": 30596384} +{"current_steps": 45385, "total_steps": 204665, "loss": 0.0061, "lr": 1.9110501171052676e-06, "epoch": 1.1087631006767156, "percentage": 22.18, "elapsed_time": "0:58:19", "remaining_time": "3:24:42", "throughput": 8743.2, "total_tokens": 30599840} +{"current_steps": 45390, "total_steps": 204665, "loss": 0.0958, "lr": 1.911014954211257e-06, "epoch": 1.1088852515085628, "percentage": 22.18, "elapsed_time": "0:58:20", "remaining_time": "3:24:42", "throughput": 8743.27, "total_tokens": 30603104} +{"current_steps": 45395, "total_steps": 204665, "loss": 0.0008, "lr": 1.910979784692101e-06, "epoch": 1.10900740234041, "percentage": 22.18, "elapsed_time": "0:58:20", "remaining_time": "3:24:41", "throughput": 8743.4, "total_tokens": 30606624} +{"current_steps": 45400, "total_steps": 204665, "loss": 0.1607, "lr": 1.9109446085480543e-06, "epoch": 1.1091295531722571, "percentage": 22.18, "elapsed_time": "0:58:20", "remaining_time": "3:24:41", "throughput": 8743.49, "total_tokens": 30609952} +{"current_steps": 45405, "total_steps": 204665, "loss": 0.0909, "lr": 1.9109094257793736e-06, "epoch": 1.1092517040041043, "percentage": 22.19, "elapsed_time": "0:58:21", "remaining_time": "3:24:40", "throughput": 8743.52, "total_tokens": 30613024} +{"current_steps": 45410, "total_steps": 204665, "loss": 0.0454, "lr": 1.9108742363863147e-06, "epoch": 1.1093738548359515, "percentage": 22.19, "elapsed_time": "0:58:21", "remaining_time": "3:24:40", "throughput": 8743.69, "total_tokens": 30616736} +{"current_steps": 45415, "total_steps": 204665, "loss": 0.0845, "lr": 1.9108390403691333e-06, "epoch": 1.1094960056677987, "percentage": 22.19, "elapsed_time": "0:58:21", "remaining_time": "3:24:39", "throughput": 8743.72, "total_tokens": 30619808} +{"current_steps": 45420, "total_steps": 204665, "loss": 0.0218, "lr": 1.9108038377280856e-06, "epoch": 1.1096181564996457, "percentage": 22.19, "elapsed_time": "0:58:22", "remaining_time": "3:24:39", "throughput": 8743.82, "total_tokens": 30623200} +{"current_steps": 45425, "total_steps": 204665, "loss": 0.1623, "lr": 1.910768628463427e-06, "epoch": 1.1097403073314929, "percentage": 22.19, "elapsed_time": "0:58:22", "remaining_time": "3:24:38", "throughput": 8743.87, "total_tokens": 30626400} +{"current_steps": 45430, "total_steps": 204665, "loss": 0.0092, "lr": 1.9107334125754143e-06, "epoch": 1.10986245816334, "percentage": 22.2, "elapsed_time": "0:58:22", "remaining_time": "3:24:38", "throughput": 8743.99, "total_tokens": 30629920} +{"current_steps": 45435, "total_steps": 204665, "loss": 0.2519, "lr": 1.910698190064303e-06, "epoch": 1.1099846089951872, "percentage": 22.2, "elapsed_time": "0:58:23", "remaining_time": "3:24:37", "throughput": 8744.0, "total_tokens": 30632928} +{"current_steps": 45440, "total_steps": 204665, "loss": 0.3251, "lr": 1.91066296093035e-06, "epoch": 1.1101067598270344, "percentage": 22.2, "elapsed_time": "0:58:23", "remaining_time": "3:24:37", "throughput": 8744.15, "total_tokens": 30636576} +{"current_steps": 45445, "total_steps": 204665, "loss": 0.1401, "lr": 1.9106277251738104e-06, "epoch": 1.1102289106588816, "percentage": 22.2, "elapsed_time": "0:58:24", "remaining_time": "3:24:36", "throughput": 8744.2, "total_tokens": 30639712} +{"current_steps": 45450, "total_steps": 204665, "loss": 0.0518, "lr": 1.9105924827949417e-06, "epoch": 1.1103510614907288, "percentage": 22.21, "elapsed_time": "0:58:24", "remaining_time": "3:24:36", "throughput": 8744.29, "total_tokens": 30643104} +{"current_steps": 45455, "total_steps": 204665, "loss": 0.134, "lr": 1.910557233793999e-06, "epoch": 1.110473212322576, "percentage": 22.21, "elapsed_time": "0:58:24", "remaining_time": "3:24:35", "throughput": 8744.35, "total_tokens": 30646304} +{"current_steps": 45460, "total_steps": 204665, "loss": 0.002, "lr": 1.9105219781712396e-06, "epoch": 1.110595363154423, "percentage": 22.21, "elapsed_time": "0:58:25", "remaining_time": "3:24:34", "throughput": 8744.37, "total_tokens": 30649376} +{"current_steps": 45465, "total_steps": 204665, "loss": 0.077, "lr": 1.910486715926919e-06, "epoch": 1.1107175139862702, "percentage": 22.21, "elapsed_time": "0:58:25", "remaining_time": "3:24:34", "throughput": 8744.41, "total_tokens": 30652512} +{"current_steps": 45470, "total_steps": 204665, "loss": 0.1008, "lr": 1.9104514470612946e-06, "epoch": 1.1108396648181174, "percentage": 22.22, "elapsed_time": "0:58:25", "remaining_time": "3:24:33", "throughput": 8744.56, "total_tokens": 30656096} +{"current_steps": 45475, "total_steps": 204665, "loss": 0.0885, "lr": 1.910416171574622e-06, "epoch": 1.1109618156499645, "percentage": 22.22, "elapsed_time": "0:58:26", "remaining_time": "3:24:33", "throughput": 8744.72, "total_tokens": 30659744} +{"current_steps": 45480, "total_steps": 204665, "loss": 0.062, "lr": 1.9103808894671586e-06, "epoch": 1.1110839664818117, "percentage": 22.22, "elapsed_time": "0:58:26", "remaining_time": "3:24:32", "throughput": 8744.79, "total_tokens": 30663008} +{"current_steps": 45485, "total_steps": 204665, "loss": 0.039, "lr": 1.91034560073916e-06, "epoch": 1.111206117313659, "percentage": 22.22, "elapsed_time": "0:58:26", "remaining_time": "3:24:32", "throughput": 8744.91, "total_tokens": 30666528} +{"current_steps": 45490, "total_steps": 204665, "loss": 0.0721, "lr": 1.9103103053908834e-06, "epoch": 1.1113282681455061, "percentage": 22.23, "elapsed_time": "0:58:27", "remaining_time": "3:24:31", "throughput": 8745.01, "total_tokens": 30669920} +{"current_steps": 45495, "total_steps": 204665, "loss": 0.0012, "lr": 1.910275003422586e-06, "epoch": 1.1114504189773533, "percentage": 22.23, "elapsed_time": "0:58:27", "remaining_time": "3:24:31", "throughput": 8745.08, "total_tokens": 30673184} +{"current_steps": 45500, "total_steps": 204665, "loss": 0.0413, "lr": 1.910239694834523e-06, "epoch": 1.1115725698092005, "percentage": 22.23, "elapsed_time": "0:58:27", "remaining_time": "3:24:30", "throughput": 8745.14, "total_tokens": 30676384} +{"current_steps": 45505, "total_steps": 204665, "loss": 0.0017, "lr": 1.910204379626953e-06, "epoch": 1.1116947206410475, "percentage": 22.23, "elapsed_time": "0:58:28", "remaining_time": "3:24:30", "throughput": 8745.26, "total_tokens": 30679840} +{"current_steps": 45510, "total_steps": 204665, "loss": 0.0628, "lr": 1.9101690578001313e-06, "epoch": 1.1118168714728947, "percentage": 22.24, "elapsed_time": "0:58:28", "remaining_time": "3:24:29", "throughput": 8745.36, "total_tokens": 30683232} +{"current_steps": 45515, "total_steps": 204665, "loss": 0.0996, "lr": 1.9101337293543156e-06, "epoch": 1.1119390223047418, "percentage": 22.24, "elapsed_time": "0:58:28", "remaining_time": "3:24:29", "throughput": 8745.44, "total_tokens": 30686560} +{"current_steps": 45520, "total_steps": 204665, "loss": 0.0881, "lr": 1.910098394289763e-06, "epoch": 1.112061173136589, "percentage": 22.24, "elapsed_time": "0:58:29", "remaining_time": "3:24:28", "throughput": 8745.47, "total_tokens": 30689632} +{"current_steps": 45525, "total_steps": 204665, "loss": 0.0683, "lr": 1.9100630526067292e-06, "epoch": 1.1121833239684362, "percentage": 22.24, "elapsed_time": "0:58:29", "remaining_time": "3:24:28", "throughput": 8745.56, "total_tokens": 30693024} +{"current_steps": 45530, "total_steps": 204665, "loss": 0.0671, "lr": 1.9100277043054727e-06, "epoch": 1.1123054748002834, "percentage": 22.25, "elapsed_time": "0:58:29", "remaining_time": "3:24:27", "throughput": 8745.6, "total_tokens": 30696160} +{"current_steps": 45535, "total_steps": 204665, "loss": 0.0694, "lr": 1.90999234938625e-06, "epoch": 1.1124276256321306, "percentage": 22.25, "elapsed_time": "0:58:30", "remaining_time": "3:24:27", "throughput": 8745.58, "total_tokens": 30699040} +{"current_steps": 45540, "total_steps": 204665, "loss": 0.0007, "lr": 1.909956987849318e-06, "epoch": 1.1125497764639778, "percentage": 22.25, "elapsed_time": "0:58:30", "remaining_time": "3:24:26", "throughput": 8745.66, "total_tokens": 30702304} +{"current_steps": 45545, "total_steps": 204665, "loss": 0.0649, "lr": 1.909921619694934e-06, "epoch": 1.112671927295825, "percentage": 22.25, "elapsed_time": "0:58:30", "remaining_time": "3:24:26", "throughput": 8745.74, "total_tokens": 30705632} +{"current_steps": 45550, "total_steps": 204665, "loss": 0.0619, "lr": 1.909886244923356e-06, "epoch": 1.112794078127672, "percentage": 22.26, "elapsed_time": "0:58:31", "remaining_time": "3:24:25", "throughput": 8745.85, "total_tokens": 30709088} +{"current_steps": 45555, "total_steps": 204665, "loss": 0.1361, "lr": 1.9098508635348398e-06, "epoch": 1.1129162289595191, "percentage": 22.26, "elapsed_time": "0:58:31", "remaining_time": "3:24:25", "throughput": 8745.95, "total_tokens": 30712480} +{"current_steps": 45560, "total_steps": 204665, "loss": 0.0475, "lr": 1.909815475529643e-06, "epoch": 1.1130383797913663, "percentage": 22.26, "elapsed_time": "0:58:31", "remaining_time": "3:24:24", "throughput": 8746.0, "total_tokens": 30715680} +{"current_steps": 45565, "total_steps": 204665, "loss": 0.0958, "lr": 1.909780080908024e-06, "epoch": 1.1131605306232135, "percentage": 22.26, "elapsed_time": "0:58:32", "remaining_time": "3:24:23", "throughput": 8746.06, "total_tokens": 30718880} +{"current_steps": 45570, "total_steps": 204665, "loss": 0.0781, "lr": 1.9097446796702395e-06, "epoch": 1.1132826814550607, "percentage": 22.27, "elapsed_time": "0:58:32", "remaining_time": "3:24:23", "throughput": 8746.12, "total_tokens": 30722144} +{"current_steps": 45575, "total_steps": 204665, "loss": 0.1832, "lr": 1.909709271816547e-06, "epoch": 1.113404832286908, "percentage": 22.27, "elapsed_time": "0:58:33", "remaining_time": "3:24:22", "throughput": 8746.16, "total_tokens": 30725280} +{"current_steps": 45580, "total_steps": 204665, "loss": 0.0437, "lr": 1.9096738573472035e-06, "epoch": 1.113526983118755, "percentage": 22.27, "elapsed_time": "0:58:33", "remaining_time": "3:24:22", "throughput": 8746.33, "total_tokens": 30728928} +{"current_steps": 45585, "total_steps": 204665, "loss": 0.2124, "lr": 1.9096384362624675e-06, "epoch": 1.1136491339506023, "percentage": 22.27, "elapsed_time": "0:58:33", "remaining_time": "3:24:21", "throughput": 8746.4, "total_tokens": 30732192} +{"current_steps": 45590, "total_steps": 204665, "loss": 0.1358, "lr": 1.909603008562596e-06, "epoch": 1.1137712847824495, "percentage": 22.28, "elapsed_time": "0:58:34", "remaining_time": "3:24:21", "throughput": 8746.42, "total_tokens": 30735264} +{"current_steps": 45595, "total_steps": 204665, "loss": 0.0688, "lr": 1.909567574247847e-06, "epoch": 1.1138934356142964, "percentage": 22.28, "elapsed_time": "0:58:34", "remaining_time": "3:24:20", "throughput": 8746.54, "total_tokens": 30738720} +{"current_steps": 45600, "total_steps": 204665, "loss": 0.0368, "lr": 1.9095321333184777e-06, "epoch": 1.1140155864461436, "percentage": 22.28, "elapsed_time": "0:58:34", "remaining_time": "3:24:20", "throughput": 8746.62, "total_tokens": 30741984} +{"current_steps": 45605, "total_steps": 204665, "loss": 0.06, "lr": 1.909496685774746e-06, "epoch": 1.1141377372779908, "percentage": 22.28, "elapsed_time": "0:58:35", "remaining_time": "3:24:19", "throughput": 8746.69, "total_tokens": 30745248} +{"current_steps": 45610, "total_steps": 204665, "loss": 0.0493, "lr": 1.90946123161691e-06, "epoch": 1.114259888109838, "percentage": 22.29, "elapsed_time": "0:58:35", "remaining_time": "3:24:19", "throughput": 8746.84, "total_tokens": 30748832} +{"current_steps": 45615, "total_steps": 204665, "loss": 0.1171, "lr": 1.9094257708452275e-06, "epoch": 1.1143820389416852, "percentage": 22.29, "elapsed_time": "0:58:35", "remaining_time": "3:24:18", "throughput": 8746.97, "total_tokens": 30752352} +{"current_steps": 45620, "total_steps": 204665, "loss": 0.1067, "lr": 1.909390303459956e-06, "epoch": 1.1145041897735324, "percentage": 22.29, "elapsed_time": "0:58:36", "remaining_time": "3:24:18", "throughput": 8746.96, "total_tokens": 30755296} +{"current_steps": 45625, "total_steps": 204665, "loss": 0.2377, "lr": 1.9093548294613533e-06, "epoch": 1.1146263406053796, "percentage": 22.29, "elapsed_time": "0:58:36", "remaining_time": "3:24:17", "throughput": 8747.0, "total_tokens": 30758432} +{"current_steps": 45630, "total_steps": 204665, "loss": 0.0355, "lr": 1.9093193488496778e-06, "epoch": 1.1147484914372268, "percentage": 22.29, "elapsed_time": "0:58:36", "remaining_time": "3:24:17", "throughput": 8747.1, "total_tokens": 30761824} +{"current_steps": 45635, "total_steps": 204665, "loss": 0.0493, "lr": 1.9092838616251877e-06, "epoch": 1.114870642269074, "percentage": 22.3, "elapsed_time": "0:58:37", "remaining_time": "3:24:16", "throughput": 8747.31, "total_tokens": 30765728} +{"current_steps": 45640, "total_steps": 204665, "loss": 0.1208, "lr": 1.9092483677881405e-06, "epoch": 1.114992793100921, "percentage": 22.3, "elapsed_time": "0:58:37", "remaining_time": "3:24:16", "throughput": 8747.42, "total_tokens": 30769120} +{"current_steps": 45645, "total_steps": 204665, "loss": 0.001, "lr": 1.909212867338795e-06, "epoch": 1.115114943932768, "percentage": 22.3, "elapsed_time": "0:58:37", "remaining_time": "3:24:15", "throughput": 8747.49, "total_tokens": 30772384} +{"current_steps": 45650, "total_steps": 204665, "loss": 0.0624, "lr": 1.9091773602774087e-06, "epoch": 1.1152370947646153, "percentage": 22.3, "elapsed_time": "0:58:38", "remaining_time": "3:24:15", "throughput": 8747.55, "total_tokens": 30775584} +{"current_steps": 45655, "total_steps": 204665, "loss": 0.1317, "lr": 1.90914184660424e-06, "epoch": 1.1153592455964625, "percentage": 22.31, "elapsed_time": "0:58:38", "remaining_time": "3:24:14", "throughput": 8747.53, "total_tokens": 30778464} +{"current_steps": 45660, "total_steps": 204665, "loss": 0.1104, "lr": 1.9091063263195473e-06, "epoch": 1.1154813964283097, "percentage": 22.31, "elapsed_time": "0:58:38", "remaining_time": "3:24:14", "throughput": 8747.66, "total_tokens": 30781984} +{"current_steps": 45665, "total_steps": 204665, "loss": 0.1635, "lr": 1.909070799423589e-06, "epoch": 1.1156035472601569, "percentage": 22.31, "elapsed_time": "0:58:39", "remaining_time": "3:24:13", "throughput": 8747.73, "total_tokens": 30785248} +{"current_steps": 45670, "total_steps": 204665, "loss": 0.1662, "lr": 1.9090352659166232e-06, "epoch": 1.115725698092004, "percentage": 22.31, "elapsed_time": "0:58:39", "remaining_time": "3:24:13", "throughput": 8747.89, "total_tokens": 30788896} +{"current_steps": 45675, "total_steps": 204665, "loss": 0.0861, "lr": 1.9089997257989084e-06, "epoch": 1.1158478489238512, "percentage": 22.32, "elapsed_time": "0:58:39", "remaining_time": "3:24:12", "throughput": 8748.02, "total_tokens": 30792416} +{"current_steps": 45680, "total_steps": 204665, "loss": 0.0014, "lr": 1.9089641790707036e-06, "epoch": 1.1159699997556984, "percentage": 22.32, "elapsed_time": "0:58:40", "remaining_time": "3:24:12", "throughput": 8748.19, "total_tokens": 30796128} +{"current_steps": 45685, "total_steps": 204665, "loss": 0.0489, "lr": 1.9089286257322664e-06, "epoch": 1.1160921505875454, "percentage": 22.32, "elapsed_time": "0:58:40", "remaining_time": "3:24:11", "throughput": 8748.25, "total_tokens": 30799328} +{"current_steps": 45690, "total_steps": 204665, "loss": 0.2035, "lr": 1.908893065783856e-06, "epoch": 1.1162143014193926, "percentage": 22.32, "elapsed_time": "0:58:40", "remaining_time": "3:24:11", "throughput": 8748.48, "total_tokens": 30803296} +{"current_steps": 45695, "total_steps": 204665, "loss": 0.0675, "lr": 1.90885749922573e-06, "epoch": 1.1163364522512398, "percentage": 22.33, "elapsed_time": "0:58:41", "remaining_time": "3:24:10", "throughput": 8748.52, "total_tokens": 30806432} +{"current_steps": 45700, "total_steps": 204665, "loss": 0.1025, "lr": 1.9088219260581488e-06, "epoch": 1.116458603083087, "percentage": 22.33, "elapsed_time": "0:58:41", "remaining_time": "3:24:09", "throughput": 8748.62, "total_tokens": 30809824} +{"current_steps": 45705, "total_steps": 204665, "loss": 0.1207, "lr": 1.90878634628137e-06, "epoch": 1.1165807539149342, "percentage": 22.33, "elapsed_time": "0:58:42", "remaining_time": "3:24:09", "throughput": 8748.68, "total_tokens": 30813088} +{"current_steps": 45710, "total_steps": 204665, "loss": 0.0226, "lr": 1.908750759895652e-06, "epoch": 1.1167029047467814, "percentage": 22.33, "elapsed_time": "0:58:42", "remaining_time": "3:24:08", "throughput": 8748.85, "total_tokens": 30816800} +{"current_steps": 45715, "total_steps": 204665, "loss": 0.0618, "lr": 1.908715166901254e-06, "epoch": 1.1168250555786285, "percentage": 22.34, "elapsed_time": "0:58:42", "remaining_time": "3:24:08", "throughput": 8748.83, "total_tokens": 30819680} +{"current_steps": 45720, "total_steps": 204665, "loss": 0.0726, "lr": 1.908679567298435e-06, "epoch": 1.1169472064104757, "percentage": 22.34, "elapsed_time": "0:58:43", "remaining_time": "3:24:07", "throughput": 8748.87, "total_tokens": 30822816} +{"current_steps": 45725, "total_steps": 204665, "loss": 0.007, "lr": 1.908643961087454e-06, "epoch": 1.1170693572423227, "percentage": 22.34, "elapsed_time": "0:58:43", "remaining_time": "3:24:07", "throughput": 8748.89, "total_tokens": 30825888} +{"current_steps": 45730, "total_steps": 204665, "loss": 0.0397, "lr": 1.9086083482685696e-06, "epoch": 1.11719150807417, "percentage": 22.34, "elapsed_time": "0:58:43", "remaining_time": "3:24:06", "throughput": 8749.01, "total_tokens": 30829408} +{"current_steps": 45735, "total_steps": 204665, "loss": 0.0484, "lr": 1.908572728842041e-06, "epoch": 1.117313658906017, "percentage": 22.35, "elapsed_time": "0:58:44", "remaining_time": "3:24:06", "throughput": 8749.12, "total_tokens": 30832800} +{"current_steps": 45740, "total_steps": 204665, "loss": 0.0684, "lr": 1.908537102808127e-06, "epoch": 1.1174358097378643, "percentage": 22.35, "elapsed_time": "0:58:44", "remaining_time": "3:24:05", "throughput": 8749.21, "total_tokens": 30836192} +{"current_steps": 45745, "total_steps": 204665, "loss": 0.0011, "lr": 1.9085014701670866e-06, "epoch": 1.1175579605697115, "percentage": 22.35, "elapsed_time": "0:58:44", "remaining_time": "3:24:05", "throughput": 8749.38, "total_tokens": 30839904} +{"current_steps": 45750, "total_steps": 204665, "loss": 0.0022, "lr": 1.9084658309191798e-06, "epoch": 1.1176801114015587, "percentage": 22.35, "elapsed_time": "0:58:45", "remaining_time": "3:24:04", "throughput": 8749.38, "total_tokens": 30842912} +{"current_steps": 45755, "total_steps": 204665, "loss": 0.0953, "lr": 1.9084301850646645e-06, "epoch": 1.1178022622334058, "percentage": 22.36, "elapsed_time": "0:58:45", "remaining_time": "3:24:04", "throughput": 8749.54, "total_tokens": 30846560} +{"current_steps": 45760, "total_steps": 204665, "loss": 0.0214, "lr": 1.908394532603801e-06, "epoch": 1.117924413065253, "percentage": 22.36, "elapsed_time": "0:58:45", "remaining_time": "3:24:03", "throughput": 8749.65, "total_tokens": 30850016} +{"current_steps": 45765, "total_steps": 204665, "loss": 0.0006, "lr": 1.908358873536848e-06, "epoch": 1.1180465638971002, "percentage": 22.36, "elapsed_time": "0:58:46", "remaining_time": "3:24:03", "throughput": 8749.69, "total_tokens": 30853152} +{"current_steps": 45770, "total_steps": 204665, "loss": 0.0494, "lr": 1.9083232078640647e-06, "epoch": 1.1181687147289474, "percentage": 22.36, "elapsed_time": "0:58:46", "remaining_time": "3:24:02", "throughput": 8749.86, "total_tokens": 30856864} +{"current_steps": 45775, "total_steps": 204665, "loss": 0.1459, "lr": 1.908287535585711e-06, "epoch": 1.1182908655607944, "percentage": 22.37, "elapsed_time": "0:58:46", "remaining_time": "3:24:02", "throughput": 8749.93, "total_tokens": 30860128} +{"current_steps": 45780, "total_steps": 204665, "loss": 0.1142, "lr": 1.9082518567020457e-06, "epoch": 1.1184130163926416, "percentage": 22.37, "elapsed_time": "0:58:47", "remaining_time": "3:24:01", "throughput": 8750.05, "total_tokens": 30863648} +{"current_steps": 45785, "total_steps": 204665, "loss": 0.0439, "lr": 1.908216171213329e-06, "epoch": 1.1185351672244888, "percentage": 22.37, "elapsed_time": "0:58:47", "remaining_time": "3:24:01", "throughput": 8750.12, "total_tokens": 30866976} +{"current_steps": 45790, "total_steps": 204665, "loss": 0.1414, "lr": 1.90818047911982e-06, "epoch": 1.118657318056336, "percentage": 22.37, "elapsed_time": "0:58:47", "remaining_time": "3:24:00", "throughput": 8750.29, "total_tokens": 30870688} +{"current_steps": 45795, "total_steps": 204665, "loss": 0.0685, "lr": 1.908144780421778e-06, "epoch": 1.1187794688881831, "percentage": 22.38, "elapsed_time": "0:58:48", "remaining_time": "3:24:00", "throughput": 8750.31, "total_tokens": 30873760} +{"current_steps": 45800, "total_steps": 204665, "loss": 0.0432, "lr": 1.908109075119463e-06, "epoch": 1.1189016197200303, "percentage": 22.38, "elapsed_time": "0:58:48", "remaining_time": "3:23:59", "throughput": 8750.41, "total_tokens": 30877152} +{"current_steps": 45805, "total_steps": 204665, "loss": 0.0734, "lr": 1.9080733632131347e-06, "epoch": 1.1190237705518775, "percentage": 22.38, "elapsed_time": "0:58:49", "remaining_time": "3:23:59", "throughput": 8750.67, "total_tokens": 30881312} +{"current_steps": 45810, "total_steps": 204665, "loss": 0.1117, "lr": 1.9080376447030525e-06, "epoch": 1.1191459213837247, "percentage": 22.38, "elapsed_time": "0:58:49", "remaining_time": "3:23:58", "throughput": 8750.73, "total_tokens": 30884512} +{"current_steps": 45815, "total_steps": 204665, "loss": 0.1565, "lr": 1.9080019195894766e-06, "epoch": 1.1192680722155717, "percentage": 22.39, "elapsed_time": "0:58:49", "remaining_time": "3:23:58", "throughput": 8750.77, "total_tokens": 30887648} +{"current_steps": 45820, "total_steps": 204665, "loss": 0.0634, "lr": 1.9079661878726663e-06, "epoch": 1.1193902230474189, "percentage": 22.39, "elapsed_time": "0:58:50", "remaining_time": "3:23:57", "throughput": 8750.93, "total_tokens": 30891296} +{"current_steps": 45825, "total_steps": 204665, "loss": 0.0536, "lr": 1.9079304495528815e-06, "epoch": 1.119512373879266, "percentage": 22.39, "elapsed_time": "0:58:50", "remaining_time": "3:23:57", "throughput": 8751.0, "total_tokens": 30894624} +{"current_steps": 45830, "total_steps": 204665, "loss": 0.0291, "lr": 1.9078947046303825e-06, "epoch": 1.1196345247111132, "percentage": 22.39, "elapsed_time": "0:58:50", "remaining_time": "3:23:56", "throughput": 8751.09, "total_tokens": 30897952} +{"current_steps": 45835, "total_steps": 204665, "loss": 0.1195, "lr": 1.907858953105429e-06, "epoch": 1.1197566755429604, "percentage": 22.4, "elapsed_time": "0:58:51", "remaining_time": "3:23:56", "throughput": 8751.14, "total_tokens": 30901152} +{"current_steps": 45840, "total_steps": 204665, "loss": 0.2208, "lr": 1.907823194978281e-06, "epoch": 1.1198788263748076, "percentage": 22.4, "elapsed_time": "0:58:51", "remaining_time": "3:23:55", "throughput": 8751.27, "total_tokens": 30904672} +{"current_steps": 45845, "total_steps": 204665, "loss": 0.0397, "lr": 1.9077874302491985e-06, "epoch": 1.1200009772066548, "percentage": 22.4, "elapsed_time": "0:58:51", "remaining_time": "3:23:55", "throughput": 8751.31, "total_tokens": 30907808} +{"current_steps": 45850, "total_steps": 204665, "loss": 0.0989, "lr": 1.9077516589184416e-06, "epoch": 1.120123128038502, "percentage": 22.4, "elapsed_time": "0:58:52", "remaining_time": "3:23:54", "throughput": 8751.33, "total_tokens": 30910880} +{"current_steps": 45855, "total_steps": 204665, "loss": 0.1054, "lr": 1.9077158809862707e-06, "epoch": 1.1202452788703492, "percentage": 22.4, "elapsed_time": "0:58:52", "remaining_time": "3:23:54", "throughput": 8751.44, "total_tokens": 30914336} +{"current_steps": 45860, "total_steps": 204665, "loss": 0.0362, "lr": 1.9076800964529455e-06, "epoch": 1.1203674297021964, "percentage": 22.41, "elapsed_time": "0:58:52", "remaining_time": "3:23:53", "throughput": 8751.49, "total_tokens": 30917536} +{"current_steps": 45865, "total_steps": 204665, "loss": 0.0847, "lr": 1.9076443053187265e-06, "epoch": 1.1204895805340434, "percentage": 22.41, "elapsed_time": "0:58:53", "remaining_time": "3:23:53", "throughput": 8751.55, "total_tokens": 30920736} +{"current_steps": 45870, "total_steps": 204665, "loss": 0.0048, "lr": 1.907608507583874e-06, "epoch": 1.1206117313658905, "percentage": 22.41, "elapsed_time": "0:58:53", "remaining_time": "3:23:52", "throughput": 8751.89, "total_tokens": 30925216} +{"current_steps": 45875, "total_steps": 204665, "loss": 0.0274, "lr": 1.9075727032486486e-06, "epoch": 1.1207338821977377, "percentage": 22.41, "elapsed_time": "0:58:53", "remaining_time": "3:23:52", "throughput": 8751.99, "total_tokens": 30928672} +{"current_steps": 45880, "total_steps": 204665, "loss": 0.0966, "lr": 1.9075368923133102e-06, "epoch": 1.120856033029585, "percentage": 22.42, "elapsed_time": "0:58:54", "remaining_time": "3:23:51", "throughput": 8752.31, "total_tokens": 30933024} +{"current_steps": 45885, "total_steps": 204665, "loss": 0.0922, "lr": 1.9075010747781194e-06, "epoch": 1.120978183861432, "percentage": 22.42, "elapsed_time": "0:58:54", "remaining_time": "3:23:51", "throughput": 8752.32, "total_tokens": 30936032} +{"current_steps": 45890, "total_steps": 204665, "loss": 0.0999, "lr": 1.9074652506433367e-06, "epoch": 1.1211003346932793, "percentage": 22.42, "elapsed_time": "0:58:54", "remaining_time": "3:23:50", "throughput": 8752.41, "total_tokens": 30939424} +{"current_steps": 45895, "total_steps": 204665, "loss": 0.1094, "lr": 1.9074294199092224e-06, "epoch": 1.1212224855251265, "percentage": 22.42, "elapsed_time": "0:58:55", "remaining_time": "3:23:50", "throughput": 8752.48, "total_tokens": 30942752} +{"current_steps": 45900, "total_steps": 204665, "loss": 0.132, "lr": 1.907393582576038e-06, "epoch": 1.1213446363569737, "percentage": 22.43, "elapsed_time": "0:58:55", "remaining_time": "3:23:49", "throughput": 8752.6, "total_tokens": 30946208} +{"current_steps": 45905, "total_steps": 204665, "loss": 0.0294, "lr": 1.9073577386440423e-06, "epoch": 1.1214667871888206, "percentage": 22.43, "elapsed_time": "0:58:56", "remaining_time": "3:23:49", "throughput": 8752.57, "total_tokens": 30949088} +{"current_steps": 45910, "total_steps": 204665, "loss": 0.1609, "lr": 1.9073218881134979e-06, "epoch": 1.1215889380206678, "percentage": 22.43, "elapsed_time": "0:58:56", "remaining_time": "3:23:48", "throughput": 8752.64, "total_tokens": 30952352} +{"current_steps": 45915, "total_steps": 204665, "loss": 0.0919, "lr": 1.9072860309846647e-06, "epoch": 1.121711088852515, "percentage": 22.43, "elapsed_time": "0:58:56", "remaining_time": "3:23:47", "throughput": 8752.64, "total_tokens": 30955296} +{"current_steps": 45920, "total_steps": 204665, "loss": 0.0923, "lr": 1.907250167257803e-06, "epoch": 1.1218332396843622, "percentage": 22.44, "elapsed_time": "0:58:57", "remaining_time": "3:23:47", "throughput": 8752.69, "total_tokens": 30958496} +{"current_steps": 45925, "total_steps": 204665, "loss": 0.1738, "lr": 1.9072142969331746e-06, "epoch": 1.1219553905162094, "percentage": 22.44, "elapsed_time": "0:58:57", "remaining_time": "3:23:46", "throughput": 8752.72, "total_tokens": 30961632} +{"current_steps": 45930, "total_steps": 204665, "loss": 0.0542, "lr": 1.9071784200110392e-06, "epoch": 1.1220775413480566, "percentage": 22.44, "elapsed_time": "0:58:57", "remaining_time": "3:23:46", "throughput": 8752.82, "total_tokens": 30965024} +{"current_steps": 45935, "total_steps": 204665, "loss": 0.0523, "lr": 1.9071425364916588e-06, "epoch": 1.1221996921799038, "percentage": 22.44, "elapsed_time": "0:58:58", "remaining_time": "3:23:45", "throughput": 8752.9, "total_tokens": 30968352} +{"current_steps": 45940, "total_steps": 204665, "loss": 0.133, "lr": 1.907106646375294e-06, "epoch": 1.122321843011751, "percentage": 22.45, "elapsed_time": "0:58:58", "remaining_time": "3:23:45", "throughput": 8753.01, "total_tokens": 30971808} +{"current_steps": 45945, "total_steps": 204665, "loss": 0.0629, "lr": 1.907070749662205e-06, "epoch": 1.1224439938435982, "percentage": 22.45, "elapsed_time": "0:58:58", "remaining_time": "3:23:44", "throughput": 8753.27, "total_tokens": 30975968} +{"current_steps": 45950, "total_steps": 204665, "loss": 0.1863, "lr": 1.907034846352654e-06, "epoch": 1.1225661446754454, "percentage": 22.45, "elapsed_time": "0:58:59", "remaining_time": "3:23:44", "throughput": 8753.29, "total_tokens": 30978976} +{"current_steps": 45955, "total_steps": 204665, "loss": 0.1616, "lr": 1.9069989364469016e-06, "epoch": 1.1226882955072923, "percentage": 22.45, "elapsed_time": "0:58:59", "remaining_time": "3:23:43", "throughput": 8753.38, "total_tokens": 30982368} +{"current_steps": 45960, "total_steps": 204665, "loss": 0.1339, "lr": 1.906963019945209e-06, "epoch": 1.1228104463391395, "percentage": 22.46, "elapsed_time": "0:58:59", "remaining_time": "3:23:43", "throughput": 8753.52, "total_tokens": 30985952} +{"current_steps": 45965, "total_steps": 204665, "loss": 0.0615, "lr": 1.9069270968478376e-06, "epoch": 1.1229325971709867, "percentage": 22.46, "elapsed_time": "0:59:00", "remaining_time": "3:23:42", "throughput": 8753.66, "total_tokens": 30989536} +{"current_steps": 45970, "total_steps": 204665, "loss": 0.0056, "lr": 1.906891167155048e-06, "epoch": 1.123054748002834, "percentage": 22.46, "elapsed_time": "0:59:00", "remaining_time": "3:23:42", "throughput": 8753.76, "total_tokens": 30992928} +{"current_steps": 45975, "total_steps": 204665, "loss": 0.0017, "lr": 1.906855230867102e-06, "epoch": 1.123176898834681, "percentage": 22.46, "elapsed_time": "0:59:00", "remaining_time": "3:23:41", "throughput": 8753.83, "total_tokens": 30996256} +{"current_steps": 45980, "total_steps": 204665, "loss": 0.1181, "lr": 1.906819287984261e-06, "epoch": 1.1232990496665283, "percentage": 22.47, "elapsed_time": "0:59:01", "remaining_time": "3:23:41", "throughput": 8753.93, "total_tokens": 30999648} +{"current_steps": 45985, "total_steps": 204665, "loss": 0.1356, "lr": 1.9067833385067862e-06, "epoch": 1.1234212004983755, "percentage": 22.47, "elapsed_time": "0:59:01", "remaining_time": "3:23:40", "throughput": 8753.92, "total_tokens": 31002528} +{"current_steps": 45990, "total_steps": 204665, "loss": 0.1763, "lr": 1.906747382434939e-06, "epoch": 1.1235433513302227, "percentage": 22.47, "elapsed_time": "0:59:01", "remaining_time": "3:23:40", "throughput": 8753.91, "total_tokens": 31005472} +{"current_steps": 45995, "total_steps": 204665, "loss": 0.0701, "lr": 1.9067114197689809e-06, "epoch": 1.1236655021620696, "percentage": 22.47, "elapsed_time": "0:59:02", "remaining_time": "3:23:39", "throughput": 8754.01, "total_tokens": 31008864} +{"current_steps": 46000, "total_steps": 204665, "loss": 0.0851, "lr": 1.9066754505091735e-06, "epoch": 1.1237876529939168, "percentage": 22.48, "elapsed_time": "0:59:02", "remaining_time": "3:23:39", "throughput": 8754.09, "total_tokens": 31012192} +{"current_steps": 46005, "total_steps": 204665, "loss": 0.0815, "lr": 1.9066394746557783e-06, "epoch": 1.123909803825764, "percentage": 22.48, "elapsed_time": "0:59:02", "remaining_time": "3:23:38", "throughput": 8754.2, "total_tokens": 31015648} +{"current_steps": 46010, "total_steps": 204665, "loss": 0.1494, "lr": 1.9066034922090573e-06, "epoch": 1.1240319546576112, "percentage": 22.48, "elapsed_time": "0:59:03", "remaining_time": "3:23:38", "throughput": 8754.23, "total_tokens": 31018784} +{"current_steps": 46015, "total_steps": 204665, "loss": 0.0123, "lr": 1.9065675031692718e-06, "epoch": 1.1241541054894584, "percentage": 22.48, "elapsed_time": "0:59:03", "remaining_time": "3:23:37", "throughput": 8754.34, "total_tokens": 31022240} +{"current_steps": 46020, "total_steps": 204665, "loss": 0.1601, "lr": 1.9065315075366834e-06, "epoch": 1.1242762563213056, "percentage": 22.49, "elapsed_time": "0:59:03", "remaining_time": "3:23:37", "throughput": 8754.41, "total_tokens": 31025504} +{"current_steps": 46025, "total_steps": 204665, "loss": 0.0768, "lr": 1.906495505311554e-06, "epoch": 1.1243984071531528, "percentage": 22.49, "elapsed_time": "0:59:04", "remaining_time": "3:23:36", "throughput": 8754.49, "total_tokens": 31028832} +{"current_steps": 46030, "total_steps": 204665, "loss": 0.1274, "lr": 1.9064594964941456e-06, "epoch": 1.124520557985, "percentage": 22.49, "elapsed_time": "0:59:04", "remaining_time": "3:23:36", "throughput": 8754.56, "total_tokens": 31032096} +{"current_steps": 46035, "total_steps": 204665, "loss": 0.0304, "lr": 1.9064234810847198e-06, "epoch": 1.1246427088168471, "percentage": 22.49, "elapsed_time": "0:59:05", "remaining_time": "3:23:35", "throughput": 8754.65, "total_tokens": 31035488} +{"current_steps": 46040, "total_steps": 204665, "loss": 0.0452, "lr": 1.9063874590835386e-06, "epoch": 1.124764859648694, "percentage": 22.5, "elapsed_time": "0:59:05", "remaining_time": "3:23:35", "throughput": 8754.76, "total_tokens": 31038944} +{"current_steps": 46045, "total_steps": 204665, "loss": 0.2326, "lr": 1.9063514304908641e-06, "epoch": 1.1248870104805413, "percentage": 22.5, "elapsed_time": "0:59:05", "remaining_time": "3:23:34", "throughput": 8754.9, "total_tokens": 31042528} +{"current_steps": 46050, "total_steps": 204665, "loss": 0.1244, "lr": 1.9063153953069583e-06, "epoch": 1.1250091613123885, "percentage": 22.5, "elapsed_time": "0:59:06", "remaining_time": "3:23:34", "throughput": 8754.93, "total_tokens": 31045664} +{"current_steps": 46055, "total_steps": 204665, "loss": 0.0525, "lr": 1.906279353532083e-06, "epoch": 1.1251313121442357, "percentage": 22.5, "elapsed_time": "0:59:06", "remaining_time": "3:23:33", "throughput": 8755.07, "total_tokens": 31049248} +{"current_steps": 46060, "total_steps": 204665, "loss": 0.0353, "lr": 1.9062433051665008e-06, "epoch": 1.1252534629760829, "percentage": 22.51, "elapsed_time": "0:59:06", "remaining_time": "3:23:33", "throughput": 8755.15, "total_tokens": 31052576} +{"current_steps": 46065, "total_steps": 204665, "loss": 0.013, "lr": 1.9062072502104734e-06, "epoch": 1.12537561380793, "percentage": 22.51, "elapsed_time": "0:59:07", "remaining_time": "3:23:32", "throughput": 8755.21, "total_tokens": 31055776} +{"current_steps": 46070, "total_steps": 204665, "loss": 0.179, "lr": 1.906171188664263e-06, "epoch": 1.1254977646397772, "percentage": 22.51, "elapsed_time": "0:59:07", "remaining_time": "3:23:32", "throughput": 8755.27, "total_tokens": 31059040} +{"current_steps": 46075, "total_steps": 204665, "loss": 0.0592, "lr": 1.9061351205281322e-06, "epoch": 1.1256199154716244, "percentage": 22.51, "elapsed_time": "0:59:07", "remaining_time": "3:23:31", "throughput": 8755.37, "total_tokens": 31062432} +{"current_steps": 46080, "total_steps": 204665, "loss": 0.0514, "lr": 1.906099045802343e-06, "epoch": 1.1257420663034716, "percentage": 22.51, "elapsed_time": "0:59:08", "remaining_time": "3:23:31", "throughput": 8755.45, "total_tokens": 31065760} +{"current_steps": 46085, "total_steps": 204665, "loss": 0.0684, "lr": 1.9060629644871576e-06, "epoch": 1.1258642171353186, "percentage": 22.52, "elapsed_time": "0:59:08", "remaining_time": "3:23:30", "throughput": 8755.49, "total_tokens": 31068896} +{"current_steps": 46090, "total_steps": 204665, "loss": 0.1949, "lr": 1.9060268765828388e-06, "epoch": 1.1259863679671658, "percentage": 22.52, "elapsed_time": "0:59:08", "remaining_time": "3:23:29", "throughput": 8755.59, "total_tokens": 31072288} +{"current_steps": 46095, "total_steps": 204665, "loss": 0.0292, "lr": 1.905990782089649e-06, "epoch": 1.126108518799013, "percentage": 22.52, "elapsed_time": "0:59:09", "remaining_time": "3:23:29", "throughput": 8755.7, "total_tokens": 31075744} +{"current_steps": 46100, "total_steps": 204665, "loss": 0.0117, "lr": 1.9059546810078504e-06, "epoch": 1.1262306696308602, "percentage": 22.52, "elapsed_time": "0:59:09", "remaining_time": "3:23:28", "throughput": 8755.88, "total_tokens": 31079456} +{"current_steps": 46105, "total_steps": 204665, "loss": 0.0041, "lr": 1.9059185733377057e-06, "epoch": 1.1263528204627073, "percentage": 22.53, "elapsed_time": "0:59:09", "remaining_time": "3:23:28", "throughput": 8755.99, "total_tokens": 31082912} +{"current_steps": 46110, "total_steps": 204665, "loss": 0.0755, "lr": 1.9058824590794776e-06, "epoch": 1.1264749712945545, "percentage": 22.53, "elapsed_time": "0:59:10", "remaining_time": "3:23:27", "throughput": 8756.12, "total_tokens": 31086432} +{"current_steps": 46115, "total_steps": 204665, "loss": 0.0963, "lr": 1.9058463382334283e-06, "epoch": 1.1265971221264017, "percentage": 22.53, "elapsed_time": "0:59:10", "remaining_time": "3:23:27", "throughput": 8756.33, "total_tokens": 31090336} +{"current_steps": 46120, "total_steps": 204665, "loss": 0.0509, "lr": 1.9058102107998208e-06, "epoch": 1.126719272958249, "percentage": 22.53, "elapsed_time": "0:59:10", "remaining_time": "3:23:27", "throughput": 8756.45, "total_tokens": 31093856} +{"current_steps": 46125, "total_steps": 204665, "loss": 0.0786, "lr": 1.9057740767789182e-06, "epoch": 1.126841423790096, "percentage": 22.54, "elapsed_time": "0:59:11", "remaining_time": "3:23:26", "throughput": 8756.48, "total_tokens": 31096928} +{"current_steps": 46130, "total_steps": 204665, "loss": 0.1037, "lr": 1.9057379361709827e-06, "epoch": 1.1269635746219433, "percentage": 22.54, "elapsed_time": "0:59:11", "remaining_time": "3:23:25", "throughput": 8756.59, "total_tokens": 31100384} +{"current_steps": 46135, "total_steps": 204665, "loss": 0.0674, "lr": 1.9057017889762772e-06, "epoch": 1.1270857254537903, "percentage": 22.54, "elapsed_time": "0:59:11", "remaining_time": "3:23:25", "throughput": 8756.6, "total_tokens": 31103392} +{"current_steps": 46140, "total_steps": 204665, "loss": 0.0368, "lr": 1.905665635195065e-06, "epoch": 1.1272078762856375, "percentage": 22.54, "elapsed_time": "0:59:12", "remaining_time": "3:23:24", "throughput": 8756.77, "total_tokens": 31107040} +{"current_steps": 46145, "total_steps": 204665, "loss": 0.0386, "lr": 1.9056294748276081e-06, "epoch": 1.1273300271174846, "percentage": 22.55, "elapsed_time": "0:59:12", "remaining_time": "3:23:24", "throughput": 8756.79, "total_tokens": 31110112} +{"current_steps": 46150, "total_steps": 204665, "loss": 0.0635, "lr": 1.9055933078741706e-06, "epoch": 1.1274521779493318, "percentage": 22.55, "elapsed_time": "0:59:13", "remaining_time": "3:23:23", "throughput": 8756.84, "total_tokens": 31113312} +{"current_steps": 46155, "total_steps": 204665, "loss": 0.0946, "lr": 1.9055571343350148e-06, "epoch": 1.127574328781179, "percentage": 22.55, "elapsed_time": "0:59:13", "remaining_time": "3:23:23", "throughput": 8756.96, "total_tokens": 31116768} +{"current_steps": 46160, "total_steps": 204665, "loss": 0.0064, "lr": 1.905520954210404e-06, "epoch": 1.1276964796130262, "percentage": 22.55, "elapsed_time": "0:59:13", "remaining_time": "3:23:22", "throughput": 8757.14, "total_tokens": 31120544} +{"current_steps": 46165, "total_steps": 204665, "loss": 0.0645, "lr": 1.9054847675006013e-06, "epoch": 1.1278186304448734, "percentage": 22.56, "elapsed_time": "0:59:14", "remaining_time": "3:23:22", "throughput": 8757.21, "total_tokens": 31123808} +{"current_steps": 46170, "total_steps": 204665, "loss": 0.0966, "lr": 1.9054485742058697e-06, "epoch": 1.1279407812767204, "percentage": 22.56, "elapsed_time": "0:59:14", "remaining_time": "3:23:21", "throughput": 8757.32, "total_tokens": 31127264} +{"current_steps": 46175, "total_steps": 204665, "loss": 0.0776, "lr": 1.9054123743264725e-06, "epoch": 1.1280629321085676, "percentage": 22.56, "elapsed_time": "0:59:14", "remaining_time": "3:23:21", "throughput": 8757.49, "total_tokens": 31130976} +{"current_steps": 46180, "total_steps": 204665, "loss": 0.0974, "lr": 1.9053761678626733e-06, "epoch": 1.1281850829404148, "percentage": 22.56, "elapsed_time": "0:59:15", "remaining_time": "3:23:20", "throughput": 8757.51, "total_tokens": 31133984} +{"current_steps": 46185, "total_steps": 204665, "loss": 0.0061, "lr": 1.9053399548147348e-06, "epoch": 1.128307233772262, "percentage": 22.57, "elapsed_time": "0:59:15", "remaining_time": "3:23:20", "throughput": 8757.55, "total_tokens": 31137184} +{"current_steps": 46190, "total_steps": 204665, "loss": 0.0486, "lr": 1.9053037351829207e-06, "epoch": 1.1284293846041091, "percentage": 22.57, "elapsed_time": "0:59:15", "remaining_time": "3:23:19", "throughput": 8757.63, "total_tokens": 31140448} +{"current_steps": 46195, "total_steps": 204665, "loss": 0.1009, "lr": 1.9052675089674942e-06, "epoch": 1.1285515354359563, "percentage": 22.57, "elapsed_time": "0:59:16", "remaining_time": "3:23:19", "throughput": 8757.69, "total_tokens": 31143712} +{"current_steps": 46200, "total_steps": 204665, "loss": 0.0236, "lr": 1.905231276168719e-06, "epoch": 1.1286736862678035, "percentage": 22.57, "elapsed_time": "0:59:16", "remaining_time": "3:23:18", "throughput": 8757.71, "total_tokens": 31146784} +{"current_steps": 46205, "total_steps": 204665, "loss": 0.1454, "lr": 1.9051950367868589e-06, "epoch": 1.1287958370996507, "percentage": 22.58, "elapsed_time": "0:59:16", "remaining_time": "3:23:18", "throughput": 8757.87, "total_tokens": 31150496} +{"current_steps": 46210, "total_steps": 204665, "loss": 0.1363, "lr": 1.9051587908221766e-06, "epoch": 1.128917987931498, "percentage": 22.58, "elapsed_time": "0:59:17", "remaining_time": "3:23:17", "throughput": 8758.04, "total_tokens": 31154208} +{"current_steps": 46215, "total_steps": 204665, "loss": 0.0925, "lr": 1.905122538274936e-06, "epoch": 1.129040138763345, "percentage": 22.58, "elapsed_time": "0:59:17", "remaining_time": "3:23:17", "throughput": 8758.13, "total_tokens": 31157600} +{"current_steps": 46220, "total_steps": 204665, "loss": 0.0013, "lr": 1.9050862791454011e-06, "epoch": 1.129162289595192, "percentage": 22.58, "elapsed_time": "0:59:17", "remaining_time": "3:23:16", "throughput": 8758.18, "total_tokens": 31160800} +{"current_steps": 46225, "total_steps": 204665, "loss": 0.199, "lr": 1.9050500134338353e-06, "epoch": 1.1292844404270392, "percentage": 22.59, "elapsed_time": "0:59:18", "remaining_time": "3:23:16", "throughput": 8758.26, "total_tokens": 31164064} +{"current_steps": 46230, "total_steps": 204665, "loss": 0.0435, "lr": 1.9050137411405024e-06, "epoch": 1.1294065912588864, "percentage": 22.59, "elapsed_time": "0:59:18", "remaining_time": "3:23:15", "throughput": 8758.29, "total_tokens": 31167136} +{"current_steps": 46235, "total_steps": 204665, "loss": 0.0609, "lr": 1.9049774622656661e-06, "epoch": 1.1295287420907336, "percentage": 22.59, "elapsed_time": "0:59:18", "remaining_time": "3:23:15", "throughput": 8758.29, "total_tokens": 31170144} +{"current_steps": 46240, "total_steps": 204665, "loss": 0.0446, "lr": 1.90494117680959e-06, "epoch": 1.1296508929225808, "percentage": 22.59, "elapsed_time": "0:59:19", "remaining_time": "3:23:14", "throughput": 8758.33, "total_tokens": 31173280} +{"current_steps": 46245, "total_steps": 204665, "loss": 0.0281, "lr": 1.9049048847725388e-06, "epoch": 1.129773043754428, "percentage": 22.6, "elapsed_time": "0:59:19", "remaining_time": "3:23:14", "throughput": 8758.53, "total_tokens": 31177120} +{"current_steps": 46250, "total_steps": 204665, "loss": 0.1817, "lr": 1.9048685861547755e-06, "epoch": 1.1298951945862752, "percentage": 22.6, "elapsed_time": "0:59:19", "remaining_time": "3:23:13", "throughput": 8758.52, "total_tokens": 31180000} +{"current_steps": 46255, "total_steps": 204665, "loss": 0.0721, "lr": 1.9048322809565644e-06, "epoch": 1.1300173454181224, "percentage": 22.6, "elapsed_time": "0:59:20", "remaining_time": "3:23:13", "throughput": 8758.58, "total_tokens": 31183200} +{"current_steps": 46260, "total_steps": 204665, "loss": 0.0749, "lr": 1.90479596917817e-06, "epoch": 1.1301394962499693, "percentage": 22.6, "elapsed_time": "0:59:20", "remaining_time": "3:23:12", "throughput": 8758.69, "total_tokens": 31186720} +{"current_steps": 46265, "total_steps": 204665, "loss": 0.0618, "lr": 1.9047596508198556e-06, "epoch": 1.1302616470818165, "percentage": 22.61, "elapsed_time": "0:59:21", "remaining_time": "3:23:12", "throughput": 8758.76, "total_tokens": 31189984} +{"current_steps": 46270, "total_steps": 204665, "loss": 0.0574, "lr": 1.904723325881886e-06, "epoch": 1.1303837979136637, "percentage": 22.61, "elapsed_time": "0:59:21", "remaining_time": "3:23:11", "throughput": 8758.93, "total_tokens": 31193696} +{"current_steps": 46275, "total_steps": 204665, "loss": 0.0477, "lr": 1.9046869943645246e-06, "epoch": 1.130505948745511, "percentage": 22.61, "elapsed_time": "0:59:21", "remaining_time": "3:23:11", "throughput": 8759.19, "total_tokens": 31197792} +{"current_steps": 46280, "total_steps": 204665, "loss": 0.0344, "lr": 1.9046506562680365e-06, "epoch": 1.130628099577358, "percentage": 22.61, "elapsed_time": "0:59:22", "remaining_time": "3:23:10", "throughput": 8759.24, "total_tokens": 31200992} +{"current_steps": 46285, "total_steps": 204665, "loss": 0.0564, "lr": 1.9046143115926851e-06, "epoch": 1.1307502504092053, "percentage": 22.62, "elapsed_time": "0:59:22", "remaining_time": "3:23:09", "throughput": 8759.29, "total_tokens": 31204128} +{"current_steps": 46290, "total_steps": 204665, "loss": 0.0887, "lr": 1.9045779603387353e-06, "epoch": 1.1308724012410525, "percentage": 22.62, "elapsed_time": "0:59:22", "remaining_time": "3:23:09", "throughput": 8759.36, "total_tokens": 31207392} +{"current_steps": 46295, "total_steps": 204665, "loss": 0.0545, "lr": 1.9045416025064514e-06, "epoch": 1.1309945520728997, "percentage": 22.62, "elapsed_time": "0:59:23", "remaining_time": "3:23:08", "throughput": 8759.42, "total_tokens": 31210656} +{"current_steps": 46300, "total_steps": 204665, "loss": 0.0667, "lr": 1.9045052380960972e-06, "epoch": 1.1311167029047469, "percentage": 22.62, "elapsed_time": "0:59:23", "remaining_time": "3:23:08", "throughput": 8759.46, "total_tokens": 31213792} +{"current_steps": 46305, "total_steps": 204665, "loss": 0.1294, "lr": 1.9044688671079382e-06, "epoch": 1.131238853736594, "percentage": 22.62, "elapsed_time": "0:59:23", "remaining_time": "3:23:07", "throughput": 8759.45, "total_tokens": 31216672} +{"current_steps": 46310, "total_steps": 204665, "loss": 0.0577, "lr": 1.904432489542238e-06, "epoch": 1.131361004568441, "percentage": 22.63, "elapsed_time": "0:59:24", "remaining_time": "3:23:07", "throughput": 8759.59, "total_tokens": 31220256} +{"current_steps": 46315, "total_steps": 204665, "loss": 0.0775, "lr": 1.9043961053992616e-06, "epoch": 1.1314831554002882, "percentage": 22.63, "elapsed_time": "0:59:24", "remaining_time": "3:23:06", "throughput": 8759.72, "total_tokens": 31223776} +{"current_steps": 46320, "total_steps": 204665, "loss": 0.0025, "lr": 1.9043597146792733e-06, "epoch": 1.1316053062321354, "percentage": 22.63, "elapsed_time": "0:59:24", "remaining_time": "3:23:06", "throughput": 8759.82, "total_tokens": 31227168} +{"current_steps": 46325, "total_steps": 204665, "loss": 0.2185, "lr": 1.9043233173825382e-06, "epoch": 1.1317274570639826, "percentage": 22.63, "elapsed_time": "0:59:25", "remaining_time": "3:23:05", "throughput": 8759.88, "total_tokens": 31230368} +{"current_steps": 46330, "total_steps": 204665, "loss": 0.0088, "lr": 1.9042869135093205e-06, "epoch": 1.1318496078958298, "percentage": 22.64, "elapsed_time": "0:59:25", "remaining_time": "3:23:05", "throughput": 8759.98, "total_tokens": 31233760} +{"current_steps": 46335, "total_steps": 204665, "loss": 0.0758, "lr": 1.9042505030598853e-06, "epoch": 1.131971758727677, "percentage": 22.64, "elapsed_time": "0:59:25", "remaining_time": "3:23:04", "throughput": 8760.02, "total_tokens": 31236896} +{"current_steps": 46340, "total_steps": 204665, "loss": 0.0013, "lr": 1.904214086034497e-06, "epoch": 1.1320939095595242, "percentage": 22.64, "elapsed_time": "0:59:26", "remaining_time": "3:23:04", "throughput": 8760.26, "total_tokens": 31240928} +{"current_steps": 46345, "total_steps": 204665, "loss": 0.0048, "lr": 1.9041776624334206e-06, "epoch": 1.1322160603913713, "percentage": 22.64, "elapsed_time": "0:59:26", "remaining_time": "3:23:03", "throughput": 8760.38, "total_tokens": 31244448} +{"current_steps": 46350, "total_steps": 204665, "loss": 0.1378, "lr": 1.9041412322569212e-06, "epoch": 1.1323382112232183, "percentage": 22.65, "elapsed_time": "0:59:26", "remaining_time": "3:23:03", "throughput": 8760.36, "total_tokens": 31247328} +{"current_steps": 46355, "total_steps": 204665, "loss": 0.1222, "lr": 1.9041047955052639e-06, "epoch": 1.1324603620550655, "percentage": 22.65, "elapsed_time": "0:59:27", "remaining_time": "3:23:02", "throughput": 8760.4, "total_tokens": 31250464} +{"current_steps": 46360, "total_steps": 204665, "loss": 0.1001, "lr": 1.9040683521787128e-06, "epoch": 1.1325825128869127, "percentage": 22.65, "elapsed_time": "0:59:27", "remaining_time": "3:23:02", "throughput": 8760.51, "total_tokens": 31253856} +{"current_steps": 46365, "total_steps": 204665, "loss": 0.2089, "lr": 1.9040319022775337e-06, "epoch": 1.1327046637187599, "percentage": 22.65, "elapsed_time": "0:59:27", "remaining_time": "3:23:01", "throughput": 8760.52, "total_tokens": 31256864} +{"current_steps": 46370, "total_steps": 204665, "loss": 0.156, "lr": 1.9039954458019918e-06, "epoch": 1.132826814550607, "percentage": 22.66, "elapsed_time": "0:59:28", "remaining_time": "3:23:01", "throughput": 8760.58, "total_tokens": 31260064} +{"current_steps": 46375, "total_steps": 204665, "loss": 0.1985, "lr": 1.9039589827523512e-06, "epoch": 1.1329489653824543, "percentage": 22.66, "elapsed_time": "0:59:28", "remaining_time": "3:23:00", "throughput": 8760.64, "total_tokens": 31263328} +{"current_steps": 46380, "total_steps": 204665, "loss": 0.0926, "lr": 1.903922513128878e-06, "epoch": 1.1330711162143015, "percentage": 22.66, "elapsed_time": "0:59:28", "remaining_time": "3:23:00", "throughput": 8760.7, "total_tokens": 31266592} +{"current_steps": 46385, "total_steps": 204665, "loss": 0.035, "lr": 1.9038860369318375e-06, "epoch": 1.1331932670461486, "percentage": 22.66, "elapsed_time": "0:59:29", "remaining_time": "3:22:59", "throughput": 8760.93, "total_tokens": 31270560} +{"current_steps": 46390, "total_steps": 204665, "loss": 0.0376, "lr": 1.9038495541614945e-06, "epoch": 1.1333154178779958, "percentage": 22.67, "elapsed_time": "0:59:29", "remaining_time": "3:22:59", "throughput": 8760.99, "total_tokens": 31273760} +{"current_steps": 46395, "total_steps": 204665, "loss": 0.0388, "lr": 1.903813064818114e-06, "epoch": 1.133437568709843, "percentage": 22.67, "elapsed_time": "0:59:30", "remaining_time": "3:22:58", "throughput": 8761.06, "total_tokens": 31277024} +{"current_steps": 46400, "total_steps": 204665, "loss": 0.0996, "lr": 1.9037765689019622e-06, "epoch": 1.13355971954169, "percentage": 22.67, "elapsed_time": "0:59:30", "remaining_time": "3:22:58", "throughput": 8761.1, "total_tokens": 31280160} +{"current_steps": 46405, "total_steps": 204665, "loss": 0.1017, "lr": 1.9037400664133042e-06, "epoch": 1.1336818703735372, "percentage": 22.67, "elapsed_time": "0:59:30", "remaining_time": "3:22:57", "throughput": 8761.21, "total_tokens": 31283616} +{"current_steps": 46410, "total_steps": 204665, "loss": 0.0062, "lr": 1.903703557352405e-06, "epoch": 1.1338040212053844, "percentage": 22.68, "elapsed_time": "0:59:31", "remaining_time": "3:22:57", "throughput": 8761.33, "total_tokens": 31287072} +{"current_steps": 46415, "total_steps": 204665, "loss": 0.1631, "lr": 1.9036670417195306e-06, "epoch": 1.1339261720372316, "percentage": 22.68, "elapsed_time": "0:59:31", "remaining_time": "3:22:56", "throughput": 8761.32, "total_tokens": 31290016} +{"current_steps": 46420, "total_steps": 204665, "loss": 0.1306, "lr": 1.9036305195149464e-06, "epoch": 1.1340483228690788, "percentage": 22.68, "elapsed_time": "0:59:31", "remaining_time": "3:22:55", "throughput": 8761.48, "total_tokens": 31293728} +{"current_steps": 46425, "total_steps": 204665, "loss": 0.061, "lr": 1.9035939907389182e-06, "epoch": 1.134170473700926, "percentage": 22.68, "elapsed_time": "0:59:32", "remaining_time": "3:22:55", "throughput": 8761.48, "total_tokens": 31296672} +{"current_steps": 46430, "total_steps": 204665, "loss": 0.1522, "lr": 1.9035574553917112e-06, "epoch": 1.1342926245327731, "percentage": 22.69, "elapsed_time": "0:59:32", "remaining_time": "3:22:54", "throughput": 8761.71, "total_tokens": 31300640} +{"current_steps": 46435, "total_steps": 204665, "loss": 0.0435, "lr": 1.9035209134735916e-06, "epoch": 1.1344147753646203, "percentage": 22.69, "elapsed_time": "0:59:32", "remaining_time": "3:22:54", "throughput": 8761.78, "total_tokens": 31303968} +{"current_steps": 46440, "total_steps": 204665, "loss": 0.1184, "lr": 1.9034843649848248e-06, "epoch": 1.1345369261964673, "percentage": 22.69, "elapsed_time": "0:59:33", "remaining_time": "3:22:53", "throughput": 8761.81, "total_tokens": 31307040} +{"current_steps": 46445, "total_steps": 204665, "loss": 0.1014, "lr": 1.9034478099256765e-06, "epoch": 1.1346590770283145, "percentage": 22.69, "elapsed_time": "0:59:33", "remaining_time": "3:22:53", "throughput": 8761.89, "total_tokens": 31310368} +{"current_steps": 46450, "total_steps": 204665, "loss": 0.0286, "lr": 1.9034112482964128e-06, "epoch": 1.1347812278601617, "percentage": 22.7, "elapsed_time": "0:59:33", "remaining_time": "3:22:52", "throughput": 8761.9, "total_tokens": 31313376} +{"current_steps": 46455, "total_steps": 204665, "loss": 0.0489, "lr": 1.9033746800972999e-06, "epoch": 1.1349033786920089, "percentage": 22.7, "elapsed_time": "0:59:34", "remaining_time": "3:22:52", "throughput": 8762.05, "total_tokens": 31316960} +{"current_steps": 46460, "total_steps": 204665, "loss": 0.0743, "lr": 1.903338105328603e-06, "epoch": 1.135025529523856, "percentage": 22.7, "elapsed_time": "0:59:34", "remaining_time": "3:22:51", "throughput": 8762.09, "total_tokens": 31320096} +{"current_steps": 46465, "total_steps": 204665, "loss": 0.0019, "lr": 1.9033015239905885e-06, "epoch": 1.1351476803557032, "percentage": 22.7, "elapsed_time": "0:59:34", "remaining_time": "3:22:51", "throughput": 8762.24, "total_tokens": 31323744} +{"current_steps": 46470, "total_steps": 204665, "loss": 0.0281, "lr": 1.9032649360835222e-06, "epoch": 1.1352698311875504, "percentage": 22.71, "elapsed_time": "0:59:35", "remaining_time": "3:22:50", "throughput": 8762.29, "total_tokens": 31326944} +{"current_steps": 46475, "total_steps": 204665, "loss": 0.0533, "lr": 1.9032283416076704e-06, "epoch": 1.1353919820193976, "percentage": 22.71, "elapsed_time": "0:59:35", "remaining_time": "3:22:50", "throughput": 8762.33, "total_tokens": 31330080} +{"current_steps": 46480, "total_steps": 204665, "loss": 0.1661, "lr": 1.9031917405632993e-06, "epoch": 1.1355141328512448, "percentage": 22.71, "elapsed_time": "0:59:35", "remaining_time": "3:22:49", "throughput": 8762.52, "total_tokens": 31333856} +{"current_steps": 46485, "total_steps": 204665, "loss": 0.0009, "lr": 1.903155132950675e-06, "epoch": 1.135636283683092, "percentage": 22.71, "elapsed_time": "0:59:36", "remaining_time": "3:22:49", "throughput": 8762.54, "total_tokens": 31336928} +{"current_steps": 46490, "total_steps": 204665, "loss": 0.1065, "lr": 1.9031185187700634e-06, "epoch": 1.135758434514939, "percentage": 22.72, "elapsed_time": "0:59:36", "remaining_time": "3:22:48", "throughput": 8762.64, "total_tokens": 31340320} +{"current_steps": 46495, "total_steps": 204665, "loss": 0.0969, "lr": 1.9030818980217313e-06, "epoch": 1.1358805853467862, "percentage": 22.72, "elapsed_time": "0:59:36", "remaining_time": "3:22:48", "throughput": 8762.65, "total_tokens": 31343392} +{"current_steps": 46500, "total_steps": 204665, "loss": 0.1355, "lr": 1.9030452707059445e-06, "epoch": 1.1360027361786333, "percentage": 22.72, "elapsed_time": "0:59:37", "remaining_time": "3:22:47", "throughput": 8762.76, "total_tokens": 31346848} +{"current_steps": 46505, "total_steps": 204665, "loss": 0.0748, "lr": 1.9030086368229696e-06, "epoch": 1.1361248870104805, "percentage": 22.72, "elapsed_time": "0:59:37", "remaining_time": "3:22:47", "throughput": 8762.89, "total_tokens": 31350432} +{"current_steps": 46510, "total_steps": 204665, "loss": 0.0875, "lr": 1.9029719963730732e-06, "epoch": 1.1362470378423277, "percentage": 22.72, "elapsed_time": "0:59:38", "remaining_time": "3:22:46", "throughput": 8763.18, "total_tokens": 31354656} +{"current_steps": 46515, "total_steps": 204665, "loss": 0.0497, "lr": 1.9029353493565215e-06, "epoch": 1.136369188674175, "percentage": 22.73, "elapsed_time": "0:59:38", "remaining_time": "3:22:46", "throughput": 8763.3, "total_tokens": 31358176} +{"current_steps": 46520, "total_steps": 204665, "loss": 0.0028, "lr": 1.9028986957735808e-06, "epoch": 1.136491339506022, "percentage": 22.73, "elapsed_time": "0:59:38", "remaining_time": "3:22:45", "throughput": 8763.28, "total_tokens": 31361056} +{"current_steps": 46525, "total_steps": 204665, "loss": 0.0854, "lr": 1.9028620356245185e-06, "epoch": 1.1366134903378693, "percentage": 22.73, "elapsed_time": "0:59:39", "remaining_time": "3:22:45", "throughput": 8763.24, "total_tokens": 31363808} +{"current_steps": 46530, "total_steps": 204665, "loss": 0.114, "lr": 1.9028253689096e-06, "epoch": 1.1367356411697163, "percentage": 22.73, "elapsed_time": "0:59:39", "remaining_time": "3:22:44", "throughput": 8763.38, "total_tokens": 31367392} +{"current_steps": 46535, "total_steps": 204665, "loss": 0.0493, "lr": 1.902788695629093e-06, "epoch": 1.1368577920015634, "percentage": 22.74, "elapsed_time": "0:59:39", "remaining_time": "3:22:44", "throughput": 8763.51, "total_tokens": 31370912} +{"current_steps": 46540, "total_steps": 204665, "loss": 0.0007, "lr": 1.902752015783264e-06, "epoch": 1.1369799428334106, "percentage": 22.74, "elapsed_time": "0:59:40", "remaining_time": "3:22:43", "throughput": 8763.59, "total_tokens": 31374240} +{"current_steps": 46545, "total_steps": 204665, "loss": 0.2546, "lr": 1.902715329372379e-06, "epoch": 1.1371020936652578, "percentage": 22.74, "elapsed_time": "0:59:40", "remaining_time": "3:22:43", "throughput": 8763.68, "total_tokens": 31377632} +{"current_steps": 46550, "total_steps": 204665, "loss": 0.0117, "lr": 1.9026786363967056e-06, "epoch": 1.137224244497105, "percentage": 22.74, "elapsed_time": "0:59:40", "remaining_time": "3:22:42", "throughput": 8763.78, "total_tokens": 31381088} +{"current_steps": 46555, "total_steps": 204665, "loss": 0.0839, "lr": 1.9026419368565103e-06, "epoch": 1.1373463953289522, "percentage": 22.75, "elapsed_time": "0:59:41", "remaining_time": "3:22:42", "throughput": 8763.86, "total_tokens": 31384416} +{"current_steps": 46560, "total_steps": 204665, "loss": 0.1168, "lr": 1.90260523075206e-06, "epoch": 1.1374685461607994, "percentage": 22.75, "elapsed_time": "0:59:41", "remaining_time": "3:22:41", "throughput": 8763.88, "total_tokens": 31387488} +{"current_steps": 46565, "total_steps": 204665, "loss": 0.2021, "lr": 1.9025685180836218e-06, "epoch": 1.1375906969926466, "percentage": 22.75, "elapsed_time": "0:59:41", "remaining_time": "3:22:41", "throughput": 8764.0, "total_tokens": 31391008} +{"current_steps": 46570, "total_steps": 204665, "loss": 0.0107, "lr": 1.9025317988514624e-06, "epoch": 1.1377128478244938, "percentage": 22.75, "elapsed_time": "0:59:42", "remaining_time": "3:22:40", "throughput": 8764.05, "total_tokens": 31394208} +{"current_steps": 46575, "total_steps": 204665, "loss": 0.139, "lr": 1.9024950730558493e-06, "epoch": 1.137834998656341, "percentage": 22.76, "elapsed_time": "0:59:42", "remaining_time": "3:22:40", "throughput": 8764.11, "total_tokens": 31397408} +{"current_steps": 46580, "total_steps": 204665, "loss": 0.0533, "lr": 1.902458340697049e-06, "epoch": 1.137957149488188, "percentage": 22.76, "elapsed_time": "0:59:42", "remaining_time": "3:22:39", "throughput": 8764.25, "total_tokens": 31400992} +{"current_steps": 46585, "total_steps": 204665, "loss": 0.0471, "lr": 1.902421601775329e-06, "epoch": 1.1380793003200351, "percentage": 22.76, "elapsed_time": "0:59:43", "remaining_time": "3:22:39", "throughput": 8764.35, "total_tokens": 31404448} +{"current_steps": 46590, "total_steps": 204665, "loss": 0.1839, "lr": 1.9023848562909566e-06, "epoch": 1.1382014511518823, "percentage": 22.76, "elapsed_time": "0:59:43", "remaining_time": "3:22:38", "throughput": 8764.46, "total_tokens": 31407904} +{"current_steps": 46595, "total_steps": 204665, "loss": 0.1041, "lr": 1.9023481042441985e-06, "epoch": 1.1383236019837295, "percentage": 22.77, "elapsed_time": "0:59:43", "remaining_time": "3:22:38", "throughput": 8764.49, "total_tokens": 31410976} +{"current_steps": 46600, "total_steps": 204665, "loss": 0.0976, "lr": 1.9023113456353225e-06, "epoch": 1.1384457528155767, "percentage": 22.77, "elapsed_time": "0:59:44", "remaining_time": "3:22:37", "throughput": 8764.57, "total_tokens": 31414304} +{"current_steps": 46605, "total_steps": 204665, "loss": 0.1017, "lr": 1.9022745804645958e-06, "epoch": 1.1385679036474239, "percentage": 22.77, "elapsed_time": "0:59:44", "remaining_time": "3:22:37", "throughput": 8764.66, "total_tokens": 31417696} +{"current_steps": 46610, "total_steps": 204665, "loss": 0.0565, "lr": 1.9022378087322855e-06, "epoch": 1.138690054479271, "percentage": 22.77, "elapsed_time": "0:59:44", "remaining_time": "3:22:36", "throughput": 8764.71, "total_tokens": 31420832} +{"current_steps": 46615, "total_steps": 204665, "loss": 0.0414, "lr": 1.9022010304386588e-06, "epoch": 1.1388122053111183, "percentage": 22.78, "elapsed_time": "0:59:45", "remaining_time": "3:22:36", "throughput": 8764.82, "total_tokens": 31424288} +{"current_steps": 46620, "total_steps": 204665, "loss": 0.0571, "lr": 1.902164245583984e-06, "epoch": 1.1389343561429652, "percentage": 22.78, "elapsed_time": "0:59:45", "remaining_time": "3:22:35", "throughput": 8764.91, "total_tokens": 31427680} +{"current_steps": 46625, "total_steps": 204665, "loss": 0.0807, "lr": 1.9021274541685282e-06, "epoch": 1.1390565069748124, "percentage": 22.78, "elapsed_time": "0:59:45", "remaining_time": "3:22:34", "throughput": 8764.96, "total_tokens": 31430880} +{"current_steps": 46630, "total_steps": 204665, "loss": 0.1928, "lr": 1.9020906561925587e-06, "epoch": 1.1391786578066596, "percentage": 22.78, "elapsed_time": "0:59:46", "remaining_time": "3:22:34", "throughput": 8765.03, "total_tokens": 31434144} +{"current_steps": 46635, "total_steps": 204665, "loss": 0.0031, "lr": 1.9020538516563436e-06, "epoch": 1.1393008086385068, "percentage": 22.79, "elapsed_time": "0:59:46", "remaining_time": "3:22:33", "throughput": 8765.11, "total_tokens": 31437472} +{"current_steps": 46640, "total_steps": 204665, "loss": 0.1425, "lr": 1.9020170405601498e-06, "epoch": 1.139422959470354, "percentage": 22.79, "elapsed_time": "0:59:47", "remaining_time": "3:22:33", "throughput": 8765.16, "total_tokens": 31440672} +{"current_steps": 46645, "total_steps": 204665, "loss": 0.0012, "lr": 1.9019802229042458e-06, "epoch": 1.1395451103022012, "percentage": 22.79, "elapsed_time": "0:59:47", "remaining_time": "3:22:32", "throughput": 8765.21, "total_tokens": 31443872} +{"current_steps": 46650, "total_steps": 204665, "loss": 0.0506, "lr": 1.901943398688899e-06, "epoch": 1.1396672611340484, "percentage": 22.79, "elapsed_time": "0:59:47", "remaining_time": "3:22:32", "throughput": 8765.28, "total_tokens": 31447136} +{"current_steps": 46655, "total_steps": 204665, "loss": 0.2119, "lr": 1.901906567914377e-06, "epoch": 1.1397894119658956, "percentage": 22.8, "elapsed_time": "0:59:48", "remaining_time": "3:22:31", "throughput": 8765.41, "total_tokens": 31450656} +{"current_steps": 46660, "total_steps": 204665, "loss": 0.0027, "lr": 1.9018697305809482e-06, "epoch": 1.1399115627977428, "percentage": 22.8, "elapsed_time": "0:59:48", "remaining_time": "3:22:31", "throughput": 8765.39, "total_tokens": 31453536} +{"current_steps": 46665, "total_steps": 204665, "loss": 0.1224, "lr": 1.9018328866888798e-06, "epoch": 1.14003371362959, "percentage": 22.8, "elapsed_time": "0:59:48", "remaining_time": "3:22:30", "throughput": 8765.43, "total_tokens": 31456672} +{"current_steps": 46670, "total_steps": 204665, "loss": 0.1004, "lr": 1.9017960362384402e-06, "epoch": 1.140155864461437, "percentage": 22.8, "elapsed_time": "0:59:49", "remaining_time": "3:22:30", "throughput": 8765.56, "total_tokens": 31460192} +{"current_steps": 46675, "total_steps": 204665, "loss": 0.1509, "lr": 1.9017591792298974e-06, "epoch": 1.140278015293284, "percentage": 22.81, "elapsed_time": "0:59:49", "remaining_time": "3:22:29", "throughput": 8765.6, "total_tokens": 31463328} +{"current_steps": 46680, "total_steps": 204665, "loss": 0.0268, "lr": 1.9017223156635191e-06, "epoch": 1.1404001661251313, "percentage": 22.81, "elapsed_time": "0:59:49", "remaining_time": "3:22:29", "throughput": 8765.68, "total_tokens": 31466656} +{"current_steps": 46685, "total_steps": 204665, "loss": 0.1924, "lr": 1.901685445539574e-06, "epoch": 1.1405223169569785, "percentage": 22.81, "elapsed_time": "0:59:50", "remaining_time": "3:22:28", "throughput": 8765.73, "total_tokens": 31469856} +{"current_steps": 46690, "total_steps": 204665, "loss": 0.0013, "lr": 1.9016485688583295e-06, "epoch": 1.1406444677888257, "percentage": 22.81, "elapsed_time": "0:59:50", "remaining_time": "3:22:28", "throughput": 8765.82, "total_tokens": 31473184} +{"current_steps": 46695, "total_steps": 204665, "loss": 0.0728, "lr": 1.901611685620054e-06, "epoch": 1.1407666186206729, "percentage": 22.82, "elapsed_time": "0:59:50", "remaining_time": "3:22:27", "throughput": 8765.89, "total_tokens": 31476448} +{"current_steps": 46700, "total_steps": 204665, "loss": 0.0809, "lr": 1.901574795825016e-06, "epoch": 1.14088876945252, "percentage": 22.82, "elapsed_time": "0:59:51", "remaining_time": "3:22:27", "throughput": 8765.98, "total_tokens": 31479776} +{"current_steps": 46705, "total_steps": 204665, "loss": 0.2304, "lr": 1.9015378994734834e-06, "epoch": 1.141010920284367, "percentage": 22.82, "elapsed_time": "0:59:51", "remaining_time": "3:22:26", "throughput": 8766.06, "total_tokens": 31483104} +{"current_steps": 46710, "total_steps": 204665, "loss": 0.0266, "lr": 1.901500996565725e-06, "epoch": 1.1411330711162142, "percentage": 22.82, "elapsed_time": "0:59:51", "remaining_time": "3:22:26", "throughput": 8766.14, "total_tokens": 31486432} +{"current_steps": 46715, "total_steps": 204665, "loss": 0.1238, "lr": 1.9014640871020084e-06, "epoch": 1.1412552219480614, "percentage": 22.83, "elapsed_time": "0:59:52", "remaining_time": "3:22:25", "throughput": 8766.26, "total_tokens": 31489952} +{"current_steps": 46720, "total_steps": 204665, "loss": 0.0747, "lr": 1.9014271710826032e-06, "epoch": 1.1413773727799086, "percentage": 22.83, "elapsed_time": "0:59:52", "remaining_time": "3:22:25", "throughput": 8766.34, "total_tokens": 31493280} +{"current_steps": 46725, "total_steps": 204665, "loss": 0.0029, "lr": 1.9013902485077767e-06, "epoch": 1.1414995236117558, "percentage": 22.83, "elapsed_time": "0:59:52", "remaining_time": "3:22:24", "throughput": 8766.42, "total_tokens": 31496608} +{"current_steps": 46730, "total_steps": 204665, "loss": 0.0033, "lr": 1.9013533193777977e-06, "epoch": 1.141621674443603, "percentage": 22.83, "elapsed_time": "0:59:53", "remaining_time": "3:22:24", "throughput": 8766.44, "total_tokens": 31499616} +{"current_steps": 46735, "total_steps": 204665, "loss": 0.1099, "lr": 1.9013163836929351e-06, "epoch": 1.1417438252754502, "percentage": 22.83, "elapsed_time": "0:59:53", "remaining_time": "3:22:23", "throughput": 8766.53, "total_tokens": 31502944} +{"current_steps": 46740, "total_steps": 204665, "loss": 0.0803, "lr": 1.9012794414534574e-06, "epoch": 1.1418659761072973, "percentage": 22.84, "elapsed_time": "0:59:53", "remaining_time": "3:22:23", "throughput": 8766.68, "total_tokens": 31506592} +{"current_steps": 46745, "total_steps": 204665, "loss": 0.0397, "lr": 1.9012424926596333e-06, "epoch": 1.1419881269391445, "percentage": 22.84, "elapsed_time": "0:59:54", "remaining_time": "3:22:22", "throughput": 8766.73, "total_tokens": 31509792} +{"current_steps": 46750, "total_steps": 204665, "loss": 0.0027, "lr": 1.9012055373117312e-06, "epoch": 1.1421102777709917, "percentage": 22.84, "elapsed_time": "0:59:54", "remaining_time": "3:22:22", "throughput": 8766.8, "total_tokens": 31513056} +{"current_steps": 46755, "total_steps": 204665, "loss": 0.1939, "lr": 1.9011685754100202e-06, "epoch": 1.1422324286028387, "percentage": 22.84, "elapsed_time": "0:59:54", "remaining_time": "3:22:21", "throughput": 8766.99, "total_tokens": 31516896} +{"current_steps": 46760, "total_steps": 204665, "loss": 0.1571, "lr": 1.901131606954769e-06, "epoch": 1.1423545794346859, "percentage": 22.85, "elapsed_time": "0:59:55", "remaining_time": "3:22:21", "throughput": 8767.03, "total_tokens": 31520032} +{"current_steps": 46765, "total_steps": 204665, "loss": 0.058, "lr": 1.901094631946246e-06, "epoch": 1.142476730266533, "percentage": 22.85, "elapsed_time": "0:59:55", "remaining_time": "3:22:20", "throughput": 8767.09, "total_tokens": 31523296} +{"current_steps": 46770, "total_steps": 204665, "loss": 0.2385, "lr": 1.9010576503847207e-06, "epoch": 1.1425988810983803, "percentage": 22.85, "elapsed_time": "0:59:55", "remaining_time": "3:22:19", "throughput": 8767.11, "total_tokens": 31526368} +{"current_steps": 46775, "total_steps": 204665, "loss": 0.1712, "lr": 1.901020662270462e-06, "epoch": 1.1427210319302274, "percentage": 22.85, "elapsed_time": "0:59:56", "remaining_time": "3:22:19", "throughput": 8767.28, "total_tokens": 31530080} +{"current_steps": 46780, "total_steps": 204665, "loss": 0.1039, "lr": 1.9009836676037382e-06, "epoch": 1.1428431827620746, "percentage": 22.86, "elapsed_time": "0:59:56", "remaining_time": "3:22:19", "throughput": 8767.46, "total_tokens": 31533856} +{"current_steps": 46785, "total_steps": 204665, "loss": 0.0891, "lr": 1.900946666384819e-06, "epoch": 1.1429653335939218, "percentage": 22.86, "elapsed_time": "0:59:57", "remaining_time": "3:22:18", "throughput": 8767.6, "total_tokens": 31537504} +{"current_steps": 46790, "total_steps": 204665, "loss": 0.1749, "lr": 1.9009096586139737e-06, "epoch": 1.143087484425769, "percentage": 22.86, "elapsed_time": "0:59:57", "remaining_time": "3:22:18", "throughput": 8767.63, "total_tokens": 31540576} +{"current_steps": 46795, "total_steps": 204665, "loss": 0.099, "lr": 1.9008726442914708e-06, "epoch": 1.143209635257616, "percentage": 22.86, "elapsed_time": "0:59:57", "remaining_time": "3:22:17", "throughput": 8767.6, "total_tokens": 31543392} +{"current_steps": 46800, "total_steps": 204665, "loss": 0.0932, "lr": 1.9008356234175794e-06, "epoch": 1.1433317860894632, "percentage": 22.87, "elapsed_time": "0:59:58", "remaining_time": "3:22:16", "throughput": 8767.7, "total_tokens": 31546848} +{"current_steps": 46805, "total_steps": 204665, "loss": 0.0097, "lr": 1.9007985959925697e-06, "epoch": 1.1434539369213104, "percentage": 22.87, "elapsed_time": "0:59:58", "remaining_time": "3:22:16", "throughput": 8767.78, "total_tokens": 31550176} +{"current_steps": 46810, "total_steps": 204665, "loss": 0.073, "lr": 1.9007615620167098e-06, "epoch": 1.1435760877531576, "percentage": 22.87, "elapsed_time": "0:59:58", "remaining_time": "3:22:15", "throughput": 8767.97, "total_tokens": 31554016} +{"current_steps": 46815, "total_steps": 204665, "loss": 0.0379, "lr": 1.9007245214902698e-06, "epoch": 1.1436982385850047, "percentage": 22.87, "elapsed_time": "0:59:59", "remaining_time": "3:22:15", "throughput": 8768.0, "total_tokens": 31557152} +{"current_steps": 46820, "total_steps": 204665, "loss": 0.0507, "lr": 1.900687474413519e-06, "epoch": 1.143820389416852, "percentage": 22.88, "elapsed_time": "0:59:59", "remaining_time": "3:22:14", "throughput": 8768.08, "total_tokens": 31560480} +{"current_steps": 46825, "total_steps": 204665, "loss": 0.0025, "lr": 1.9006504207867261e-06, "epoch": 1.1439425402486991, "percentage": 22.88, "elapsed_time": "0:59:59", "remaining_time": "3:22:14", "throughput": 8768.1, "total_tokens": 31563552} +{"current_steps": 46830, "total_steps": 204665, "loss": 0.0561, "lr": 1.9006133606101615e-06, "epoch": 1.1440646910805463, "percentage": 22.88, "elapsed_time": "1:00:00", "remaining_time": "3:22:13", "throughput": 8768.15, "total_tokens": 31566752} +{"current_steps": 46835, "total_steps": 204665, "loss": 0.1262, "lr": 1.9005762938840942e-06, "epoch": 1.1441868419123935, "percentage": 22.88, "elapsed_time": "1:00:00", "remaining_time": "3:22:13", "throughput": 8768.24, "total_tokens": 31570080} +{"current_steps": 46840, "total_steps": 204665, "loss": 0.0728, "lr": 1.9005392206087937e-06, "epoch": 1.1443089927442407, "percentage": 22.89, "elapsed_time": "1:00:00", "remaining_time": "3:22:12", "throughput": 8768.42, "total_tokens": 31573856} +{"current_steps": 46845, "total_steps": 204665, "loss": 0.082, "lr": 1.9005021407845302e-06, "epoch": 1.1444311435760877, "percentage": 22.89, "elapsed_time": "1:00:01", "remaining_time": "3:22:12", "throughput": 8768.54, "total_tokens": 31577376} +{"current_steps": 46850, "total_steps": 204665, "loss": 0.1102, "lr": 1.9004650544115726e-06, "epoch": 1.1445532944079349, "percentage": 22.89, "elapsed_time": "1:00:01", "remaining_time": "3:22:11", "throughput": 8768.62, "total_tokens": 31580704} +{"current_steps": 46855, "total_steps": 204665, "loss": 0.0784, "lr": 1.9004279614901908e-06, "epoch": 1.144675445239782, "percentage": 22.89, "elapsed_time": "1:00:01", "remaining_time": "3:22:11", "throughput": 8768.62, "total_tokens": 31583648} +{"current_steps": 46860, "total_steps": 204665, "loss": 0.0909, "lr": 1.9003908620206548e-06, "epoch": 1.1447975960716292, "percentage": 22.9, "elapsed_time": "1:00:02", "remaining_time": "3:22:10", "throughput": 8768.77, "total_tokens": 31587296} +{"current_steps": 46865, "total_steps": 204665, "loss": 0.1251, "lr": 1.9003537560032344e-06, "epoch": 1.1449197469034764, "percentage": 22.9, "elapsed_time": "1:00:02", "remaining_time": "3:22:10", "throughput": 8768.81, "total_tokens": 31590496} +{"current_steps": 46870, "total_steps": 204665, "loss": 0.0921, "lr": 1.9003166434381991e-06, "epoch": 1.1450418977353236, "percentage": 22.9, "elapsed_time": "1:00:02", "remaining_time": "3:22:09", "throughput": 8768.96, "total_tokens": 31594144} +{"current_steps": 46875, "total_steps": 204665, "loss": 0.0178, "lr": 1.9002795243258194e-06, "epoch": 1.1451640485671708, "percentage": 22.9, "elapsed_time": "1:00:03", "remaining_time": "3:22:09", "throughput": 8769.21, "total_tokens": 31598240} +{"current_steps": 46880, "total_steps": 204665, "loss": 0.0763, "lr": 1.9002423986663645e-06, "epoch": 1.145286199399018, "percentage": 22.91, "elapsed_time": "1:00:03", "remaining_time": "3:22:08", "throughput": 8769.31, "total_tokens": 31601632} +{"current_steps": 46885, "total_steps": 204665, "loss": 0.0697, "lr": 1.9002052664601048e-06, "epoch": 1.145408350230865, "percentage": 22.91, "elapsed_time": "1:00:04", "remaining_time": "3:22:08", "throughput": 8769.48, "total_tokens": 31605344} +{"current_steps": 46890, "total_steps": 204665, "loss": 0.0721, "lr": 1.9001681277073103e-06, "epoch": 1.1455305010627121, "percentage": 22.91, "elapsed_time": "1:00:04", "remaining_time": "3:22:07", "throughput": 8769.54, "total_tokens": 31608608} +{"current_steps": 46895, "total_steps": 204665, "loss": 0.1639, "lr": 1.9001309824082512e-06, "epoch": 1.1456526518945593, "percentage": 22.91, "elapsed_time": "1:00:04", "remaining_time": "3:22:07", "throughput": 8769.64, "total_tokens": 31612000} +{"current_steps": 46900, "total_steps": 204665, "loss": 0.2635, "lr": 1.9000938305631974e-06, "epoch": 1.1457748027264065, "percentage": 22.92, "elapsed_time": "1:00:05", "remaining_time": "3:22:06", "throughput": 8769.66, "total_tokens": 31615072} +{"current_steps": 46905, "total_steps": 204665, "loss": 0.1127, "lr": 1.9000566721724193e-06, "epoch": 1.1458969535582537, "percentage": 22.92, "elapsed_time": "1:00:05", "remaining_time": "3:22:06", "throughput": 8769.71, "total_tokens": 31618272} +{"current_steps": 46910, "total_steps": 204665, "loss": 0.0044, "lr": 1.9000195072361866e-06, "epoch": 1.146019104390101, "percentage": 22.92, "elapsed_time": "1:00:05", "remaining_time": "3:22:05", "throughput": 8769.78, "total_tokens": 31621536} +{"current_steps": 46915, "total_steps": 204665, "loss": 0.074, "lr": 1.89998233575477e-06, "epoch": 1.146141255221948, "percentage": 22.92, "elapsed_time": "1:00:06", "remaining_time": "3:22:05", "throughput": 8769.89, "total_tokens": 31624992} +{"current_steps": 46920, "total_steps": 204665, "loss": 0.0017, "lr": 1.8999451577284403e-06, "epoch": 1.1462634060537953, "percentage": 22.93, "elapsed_time": "1:00:06", "remaining_time": "3:22:04", "throughput": 8769.91, "total_tokens": 31628064} +{"current_steps": 46925, "total_steps": 204665, "loss": 0.1145, "lr": 1.899907973157467e-06, "epoch": 1.1463855568856425, "percentage": 22.93, "elapsed_time": "1:00:06", "remaining_time": "3:22:04", "throughput": 8770.11, "total_tokens": 31631904} +{"current_steps": 46930, "total_steps": 204665, "loss": 0.0367, "lr": 1.899870782042121e-06, "epoch": 1.1465077077174897, "percentage": 22.93, "elapsed_time": "1:00:07", "remaining_time": "3:22:03", "throughput": 8770.28, "total_tokens": 31635680} +{"current_steps": 46935, "total_steps": 204665, "loss": 0.1674, "lr": 1.8998335843826724e-06, "epoch": 1.1466298585493366, "percentage": 22.93, "elapsed_time": "1:00:07", "remaining_time": "3:22:03", "throughput": 8770.36, "total_tokens": 31639008} +{"current_steps": 46940, "total_steps": 204665, "loss": 0.0481, "lr": 1.899796380179392e-06, "epoch": 1.1467520093811838, "percentage": 22.94, "elapsed_time": "1:00:07", "remaining_time": "3:22:02", "throughput": 8770.42, "total_tokens": 31642208} +{"current_steps": 46945, "total_steps": 204665, "loss": 0.0481, "lr": 1.8997591694325505e-06, "epoch": 1.146874160213031, "percentage": 22.94, "elapsed_time": "1:00:08", "remaining_time": "3:22:02", "throughput": 8770.57, "total_tokens": 31645856} +{"current_steps": 46950, "total_steps": 204665, "loss": 0.0102, "lr": 1.8997219521424184e-06, "epoch": 1.1469963110448782, "percentage": 22.94, "elapsed_time": "1:00:08", "remaining_time": "3:22:01", "throughput": 8770.63, "total_tokens": 31649120} +{"current_steps": 46955, "total_steps": 204665, "loss": 0.1831, "lr": 1.8996847283092658e-06, "epoch": 1.1471184618767254, "percentage": 22.94, "elapsed_time": "1:00:08", "remaining_time": "3:22:01", "throughput": 8770.65, "total_tokens": 31652128} +{"current_steps": 46960, "total_steps": 204665, "loss": 0.0683, "lr": 1.8996474979333645e-06, "epoch": 1.1472406127085726, "percentage": 22.94, "elapsed_time": "1:00:09", "remaining_time": "3:22:00", "throughput": 8770.71, "total_tokens": 31655392} +{"current_steps": 46965, "total_steps": 204665, "loss": 0.1242, "lr": 1.8996102610149843e-06, "epoch": 1.1473627635404198, "percentage": 22.95, "elapsed_time": "1:00:09", "remaining_time": "3:22:00", "throughput": 8770.97, "total_tokens": 31659552} +{"current_steps": 46970, "total_steps": 204665, "loss": 0.0492, "lr": 1.8995730175543962e-06, "epoch": 1.147484914372267, "percentage": 22.95, "elapsed_time": "1:00:09", "remaining_time": "3:21:59", "throughput": 8770.96, "total_tokens": 31662496} +{"current_steps": 46975, "total_steps": 204665, "loss": 0.0566, "lr": 1.899535767551871e-06, "epoch": 1.147607065204114, "percentage": 22.95, "elapsed_time": "1:00:10", "remaining_time": "3:21:59", "throughput": 8771.02, "total_tokens": 31665696} +{"current_steps": 46980, "total_steps": 204665, "loss": 0.0329, "lr": 1.8994985110076802e-06, "epoch": 1.1477292160359611, "percentage": 22.95, "elapsed_time": "1:00:10", "remaining_time": "3:21:58", "throughput": 8771.28, "total_tokens": 31669856} +{"current_steps": 46985, "total_steps": 204665, "loss": 0.0303, "lr": 1.8994612479220942e-06, "epoch": 1.1478513668678083, "percentage": 22.96, "elapsed_time": "1:00:10", "remaining_time": "3:21:58", "throughput": 8771.27, "total_tokens": 31672736} +{"current_steps": 46990, "total_steps": 204665, "loss": 0.0517, "lr": 1.8994239782953838e-06, "epoch": 1.1479735176996555, "percentage": 22.96, "elapsed_time": "1:00:11", "remaining_time": "3:21:57", "throughput": 8771.36, "total_tokens": 31676128} +{"current_steps": 46995, "total_steps": 204665, "loss": 0.0416, "lr": 1.8993867021278205e-06, "epoch": 1.1480956685315027, "percentage": 22.96, "elapsed_time": "1:00:11", "remaining_time": "3:21:57", "throughput": 8771.4, "total_tokens": 31679264} +{"current_steps": 47000, "total_steps": 204665, "loss": 0.0559, "lr": 1.8993494194196754e-06, "epoch": 1.1482178193633499, "percentage": 22.96, "elapsed_time": "1:00:11", "remaining_time": "3:21:56", "throughput": 8771.39, "total_tokens": 31682144} +{"current_steps": 47005, "total_steps": 204665, "loss": 0.1114, "lr": 1.8993121301712192e-06, "epoch": 1.148339970195197, "percentage": 22.97, "elapsed_time": "1:00:12", "remaining_time": "3:21:56", "throughput": 8771.37, "total_tokens": 31685024} +{"current_steps": 47010, "total_steps": 204665, "loss": 0.0433, "lr": 1.8992748343827233e-06, "epoch": 1.1484621210270443, "percentage": 22.97, "elapsed_time": "1:00:12", "remaining_time": "3:21:55", "throughput": 8771.43, "total_tokens": 31688288} +{"current_steps": 47015, "total_steps": 204665, "loss": 0.2048, "lr": 1.8992375320544589e-06, "epoch": 1.1485842718588914, "percentage": 22.97, "elapsed_time": "1:00:13", "remaining_time": "3:21:55", "throughput": 8771.44, "total_tokens": 31691296} +{"current_steps": 47020, "total_steps": 204665, "loss": 0.134, "lr": 1.8992002231866975e-06, "epoch": 1.1487064226907386, "percentage": 22.97, "elapsed_time": "1:00:13", "remaining_time": "3:21:54", "throughput": 8771.54, "total_tokens": 31694688} +{"current_steps": 47025, "total_steps": 204665, "loss": 0.0332, "lr": 1.89916290777971e-06, "epoch": 1.1488285735225856, "percentage": 22.98, "elapsed_time": "1:00:13", "remaining_time": "3:21:54", "throughput": 8771.66, "total_tokens": 31698208} +{"current_steps": 47030, "total_steps": 204665, "loss": 0.124, "lr": 1.899125585833768e-06, "epoch": 1.1489507243544328, "percentage": 22.98, "elapsed_time": "1:00:14", "remaining_time": "3:21:53", "throughput": 8771.74, "total_tokens": 31701600} +{"current_steps": 47035, "total_steps": 204665, "loss": 0.1013, "lr": 1.8990882573491432e-06, "epoch": 1.14907287518628, "percentage": 22.98, "elapsed_time": "1:00:14", "remaining_time": "3:21:53", "throughput": 8771.82, "total_tokens": 31704928} +{"current_steps": 47040, "total_steps": 204665, "loss": 0.1103, "lr": 1.8990509223261064e-06, "epoch": 1.1491950260181272, "percentage": 22.98, "elapsed_time": "1:00:14", "remaining_time": "3:21:52", "throughput": 8771.99, "total_tokens": 31708640} +{"current_steps": 47045, "total_steps": 204665, "loss": 0.1072, "lr": 1.8990135807649295e-06, "epoch": 1.1493171768499744, "percentage": 22.99, "elapsed_time": "1:00:15", "remaining_time": "3:21:52", "throughput": 8772.08, "total_tokens": 31711968} +{"current_steps": 47050, "total_steps": 204665, "loss": 0.0083, "lr": 1.898976232665884e-06, "epoch": 1.1494393276818216, "percentage": 22.99, "elapsed_time": "1:00:15", "remaining_time": "3:21:51", "throughput": 8772.21, "total_tokens": 31715552} +{"current_steps": 47055, "total_steps": 204665, "loss": 0.0794, "lr": 1.8989388780292418e-06, "epoch": 1.1495614785136687, "percentage": 22.99, "elapsed_time": "1:00:15", "remaining_time": "3:21:51", "throughput": 8772.23, "total_tokens": 31718624} +{"current_steps": 47060, "total_steps": 204665, "loss": 0.0679, "lr": 1.8989015168552743e-06, "epoch": 1.149683629345516, "percentage": 22.99, "elapsed_time": "1:00:16", "remaining_time": "3:21:50", "throughput": 8772.33, "total_tokens": 31722080} +{"current_steps": 47065, "total_steps": 204665, "loss": 0.0021, "lr": 1.898864149144253e-06, "epoch": 1.149805780177363, "percentage": 23.0, "elapsed_time": "1:00:16", "remaining_time": "3:21:50", "throughput": 8772.44, "total_tokens": 31725536} +{"current_steps": 47070, "total_steps": 204665, "loss": 0.126, "lr": 1.89882677489645e-06, "epoch": 1.14992793100921, "percentage": 23.0, "elapsed_time": "1:00:16", "remaining_time": "3:21:49", "throughput": 8772.44, "total_tokens": 31728480} +{"current_steps": 47075, "total_steps": 204665, "loss": 0.0025, "lr": 1.898789394112137e-06, "epoch": 1.1500500818410573, "percentage": 23.0, "elapsed_time": "1:00:17", "remaining_time": "3:21:49", "throughput": 8772.46, "total_tokens": 31731552} +{"current_steps": 47080, "total_steps": 204665, "loss": 0.0754, "lr": 1.8987520067915854e-06, "epoch": 1.1501722326729045, "percentage": 23.0, "elapsed_time": "1:00:17", "remaining_time": "3:21:48", "throughput": 8772.56, "total_tokens": 31735008} +{"current_steps": 47085, "total_steps": 204665, "loss": 0.0624, "lr": 1.8987146129350678e-06, "epoch": 1.1502943835047517, "percentage": 23.01, "elapsed_time": "1:00:17", "remaining_time": "3:21:47", "throughput": 8772.56, "total_tokens": 31737952} +{"current_steps": 47090, "total_steps": 204665, "loss": 0.0826, "lr": 1.8986772125428558e-06, "epoch": 1.1504165343365989, "percentage": 23.01, "elapsed_time": "1:00:18", "remaining_time": "3:21:47", "throughput": 8772.67, "total_tokens": 31741408} +{"current_steps": 47095, "total_steps": 204665, "loss": 0.1433, "lr": 1.8986398056152212e-06, "epoch": 1.150538685168446, "percentage": 23.01, "elapsed_time": "1:00:18", "remaining_time": "3:21:46", "throughput": 8772.79, "total_tokens": 31744928} +{"current_steps": 47100, "total_steps": 204665, "loss": 0.1718, "lr": 1.8986023921524364e-06, "epoch": 1.1506608360002932, "percentage": 23.01, "elapsed_time": "1:00:18", "remaining_time": "3:21:46", "throughput": 8772.96, "total_tokens": 31748640} +{"current_steps": 47105, "total_steps": 204665, "loss": 0.1613, "lr": 1.8985649721547732e-06, "epoch": 1.1507829868321404, "percentage": 23.02, "elapsed_time": "1:00:19", "remaining_time": "3:21:46", "throughput": 8773.14, "total_tokens": 31752416} +{"current_steps": 47110, "total_steps": 204665, "loss": 0.1354, "lr": 1.8985275456225038e-06, "epoch": 1.1509051376639876, "percentage": 23.02, "elapsed_time": "1:00:19", "remaining_time": "3:21:45", "throughput": 8773.21, "total_tokens": 31755744} +{"current_steps": 47115, "total_steps": 204665, "loss": 0.0489, "lr": 1.8984901125559006e-06, "epoch": 1.1510272884958346, "percentage": 23.02, "elapsed_time": "1:00:19", "remaining_time": "3:21:44", "throughput": 8773.29, "total_tokens": 31759072} +{"current_steps": 47120, "total_steps": 204665, "loss": 0.051, "lr": 1.8984526729552354e-06, "epoch": 1.1511494393276818, "percentage": 23.02, "elapsed_time": "1:00:20", "remaining_time": "3:21:44", "throughput": 8773.33, "total_tokens": 31762208} +{"current_steps": 47125, "total_steps": 204665, "loss": 0.1468, "lr": 1.898415226820781e-06, "epoch": 1.151271590159529, "percentage": 23.03, "elapsed_time": "1:00:20", "remaining_time": "3:21:43", "throughput": 8773.4, "total_tokens": 31765472} +{"current_steps": 47130, "total_steps": 204665, "loss": 0.1513, "lr": 1.8983777741528094e-06, "epoch": 1.1513937409913761, "percentage": 23.03, "elapsed_time": "1:00:20", "remaining_time": "3:21:43", "throughput": 8773.38, "total_tokens": 31768352} +{"current_steps": 47135, "total_steps": 204665, "loss": 0.0462, "lr": 1.8983403149515928e-06, "epoch": 1.1515158918232233, "percentage": 23.03, "elapsed_time": "1:00:21", "remaining_time": "3:21:42", "throughput": 8773.67, "total_tokens": 31772576} +{"current_steps": 47140, "total_steps": 204665, "loss": 0.0464, "lr": 1.8983028492174037e-06, "epoch": 1.1516380426550705, "percentage": 23.03, "elapsed_time": "1:00:21", "remaining_time": "3:21:42", "throughput": 8773.72, "total_tokens": 31775776} +{"current_steps": 47145, "total_steps": 204665, "loss": 0.0162, "lr": 1.898265376950515e-06, "epoch": 1.1517601934869177, "percentage": 23.04, "elapsed_time": "1:00:22", "remaining_time": "3:21:41", "throughput": 8773.82, "total_tokens": 31779168} +{"current_steps": 47150, "total_steps": 204665, "loss": 0.1282, "lr": 1.8982278981511986e-06, "epoch": 1.151882344318765, "percentage": 23.04, "elapsed_time": "1:00:22", "remaining_time": "3:21:41", "throughput": 8773.86, "total_tokens": 31782304} +{"current_steps": 47155, "total_steps": 204665, "loss": 0.1716, "lr": 1.8981904128197274e-06, "epoch": 1.1520044951506119, "percentage": 23.04, "elapsed_time": "1:00:22", "remaining_time": "3:21:40", "throughput": 8773.98, "total_tokens": 31785824} +{"current_steps": 47160, "total_steps": 204665, "loss": 0.0282, "lr": 1.898152920956374e-06, "epoch": 1.152126645982459, "percentage": 23.04, "elapsed_time": "1:00:23", "remaining_time": "3:21:40", "throughput": 8774.04, "total_tokens": 31789088} +{"current_steps": 47165, "total_steps": 204665, "loss": 0.0165, "lr": 1.8981154225614108e-06, "epoch": 1.1522487968143063, "percentage": 23.04, "elapsed_time": "1:00:23", "remaining_time": "3:21:39", "throughput": 8774.11, "total_tokens": 31792352} +{"current_steps": 47170, "total_steps": 204665, "loss": 0.04, "lr": 1.8980779176351112e-06, "epoch": 1.1523709476461534, "percentage": 23.05, "elapsed_time": "1:00:23", "remaining_time": "3:21:39", "throughput": 8774.18, "total_tokens": 31795680} +{"current_steps": 47175, "total_steps": 204665, "loss": 0.1514, "lr": 1.8980404061777468e-06, "epoch": 1.1524930984780006, "percentage": 23.05, "elapsed_time": "1:00:24", "remaining_time": "3:21:38", "throughput": 8774.22, "total_tokens": 31798816} +{"current_steps": 47180, "total_steps": 204665, "loss": 0.0017, "lr": 1.8980028881895916e-06, "epoch": 1.1526152493098478, "percentage": 23.05, "elapsed_time": "1:00:24", "remaining_time": "3:21:38", "throughput": 8774.24, "total_tokens": 31801888} +{"current_steps": 47185, "total_steps": 204665, "loss": 0.0878, "lr": 1.8979653636709173e-06, "epoch": 1.152737400141695, "percentage": 23.05, "elapsed_time": "1:00:24", "remaining_time": "3:21:37", "throughput": 8774.36, "total_tokens": 31805408} +{"current_steps": 47190, "total_steps": 204665, "loss": 0.0473, "lr": 1.8979278326219977e-06, "epoch": 1.1528595509735422, "percentage": 23.06, "elapsed_time": "1:00:25", "remaining_time": "3:21:37", "throughput": 8774.43, "total_tokens": 31808672} +{"current_steps": 47195, "total_steps": 204665, "loss": 0.0428, "lr": 1.8978902950431052e-06, "epoch": 1.1529817018053894, "percentage": 23.06, "elapsed_time": "1:00:25", "remaining_time": "3:21:36", "throughput": 8774.56, "total_tokens": 31812256} +{"current_steps": 47200, "total_steps": 204665, "loss": 0.0926, "lr": 1.897852750934513e-06, "epoch": 1.1531038526372366, "percentage": 23.06, "elapsed_time": "1:00:25", "remaining_time": "3:21:36", "throughput": 8774.7, "total_tokens": 31815840} +{"current_steps": 47205, "total_steps": 204665, "loss": 0.0025, "lr": 1.8978152002964943e-06, "epoch": 1.1532260034690835, "percentage": 23.06, "elapsed_time": "1:00:26", "remaining_time": "3:21:35", "throughput": 8774.85, "total_tokens": 31819488} +{"current_steps": 47210, "total_steps": 204665, "loss": 0.2126, "lr": 1.8977776431293218e-06, "epoch": 1.1533481543009307, "percentage": 23.07, "elapsed_time": "1:00:26", "remaining_time": "3:21:35", "throughput": 8774.93, "total_tokens": 31822880} +{"current_steps": 47215, "total_steps": 204665, "loss": 0.2088, "lr": 1.897740079433269e-06, "epoch": 1.153470305132778, "percentage": 23.07, "elapsed_time": "1:00:26", "remaining_time": "3:21:34", "throughput": 8774.96, "total_tokens": 31825952} +{"current_steps": 47220, "total_steps": 204665, "loss": 0.0432, "lr": 1.8977025092086087e-06, "epoch": 1.1535924559646251, "percentage": 23.07, "elapsed_time": "1:00:27", "remaining_time": "3:21:34", "throughput": 8774.98, "total_tokens": 31829024} +{"current_steps": 47225, "total_steps": 204665, "loss": 0.1844, "lr": 1.8976649324556143e-06, "epoch": 1.1537146067964723, "percentage": 23.07, "elapsed_time": "1:00:27", "remaining_time": "3:21:33", "throughput": 8775.13, "total_tokens": 31832672} +{"current_steps": 47230, "total_steps": 204665, "loss": 0.0456, "lr": 1.897627349174559e-06, "epoch": 1.1538367576283195, "percentage": 23.08, "elapsed_time": "1:00:27", "remaining_time": "3:21:33", "throughput": 8775.14, "total_tokens": 31835680} +{"current_steps": 47235, "total_steps": 204665, "loss": 0.0201, "lr": 1.8975897593657165e-06, "epoch": 1.1539589084601667, "percentage": 23.08, "elapsed_time": "1:00:28", "remaining_time": "3:21:32", "throughput": 8775.22, "total_tokens": 31839008} +{"current_steps": 47240, "total_steps": 204665, "loss": 0.0469, "lr": 1.8975521630293595e-06, "epoch": 1.1540810592920137, "percentage": 23.08, "elapsed_time": "1:00:28", "remaining_time": "3:21:32", "throughput": 8775.45, "total_tokens": 31842976} +{"current_steps": 47245, "total_steps": 204665, "loss": 0.0565, "lr": 1.897514560165762e-06, "epoch": 1.1542032101238608, "percentage": 23.08, "elapsed_time": "1:00:29", "remaining_time": "3:21:31", "throughput": 8775.67, "total_tokens": 31846944} +{"current_steps": 47250, "total_steps": 204665, "loss": 0.2177, "lr": 1.8974769507751968e-06, "epoch": 1.154325360955708, "percentage": 23.09, "elapsed_time": "1:00:29", "remaining_time": "3:21:31", "throughput": 8775.71, "total_tokens": 31850080} +{"current_steps": 47255, "total_steps": 204665, "loss": 0.0344, "lr": 1.8974393348579383e-06, "epoch": 1.1544475117875552, "percentage": 23.09, "elapsed_time": "1:00:29", "remaining_time": "3:21:30", "throughput": 8775.79, "total_tokens": 31853408} +{"current_steps": 47260, "total_steps": 204665, "loss": 0.0009, "lr": 1.8974017124142594e-06, "epoch": 1.1545696626194024, "percentage": 23.09, "elapsed_time": "1:00:30", "remaining_time": "3:21:30", "throughput": 8775.98, "total_tokens": 31857248} +{"current_steps": 47265, "total_steps": 204665, "loss": 0.115, "lr": 1.897364083444434e-06, "epoch": 1.1546918134512496, "percentage": 23.09, "elapsed_time": "1:00:30", "remaining_time": "3:21:29", "throughput": 8775.96, "total_tokens": 31860128} +{"current_steps": 47270, "total_steps": 204665, "loss": 0.01, "lr": 1.8973264479487355e-06, "epoch": 1.1548139642830968, "percentage": 23.1, "elapsed_time": "1:00:30", "remaining_time": "3:21:29", "throughput": 8776.09, "total_tokens": 31863712} +{"current_steps": 47275, "total_steps": 204665, "loss": 0.0607, "lr": 1.8972888059274377e-06, "epoch": 1.154936115114944, "percentage": 23.1, "elapsed_time": "1:00:31", "remaining_time": "3:21:28", "throughput": 8776.22, "total_tokens": 31867232} +{"current_steps": 47280, "total_steps": 204665, "loss": 0.108, "lr": 1.8972511573808144e-06, "epoch": 1.1550582659467912, "percentage": 23.1, "elapsed_time": "1:00:31", "remaining_time": "3:21:28", "throughput": 8776.44, "total_tokens": 31871136} +{"current_steps": 47285, "total_steps": 204665, "loss": 0.0396, "lr": 1.8972135023091394e-06, "epoch": 1.1551804167786384, "percentage": 23.1, "elapsed_time": "1:00:31", "remaining_time": "3:21:27", "throughput": 8776.38, "total_tokens": 31873824} +{"current_steps": 47290, "total_steps": 204665, "loss": 0.2427, "lr": 1.8971758407126864e-06, "epoch": 1.1553025676104853, "percentage": 23.11, "elapsed_time": "1:00:32", "remaining_time": "3:21:27", "throughput": 8776.5, "total_tokens": 31877344} +{"current_steps": 47295, "total_steps": 204665, "loss": 0.1947, "lr": 1.8971381725917292e-06, "epoch": 1.1554247184423325, "percentage": 23.11, "elapsed_time": "1:00:32", "remaining_time": "3:21:26", "throughput": 8776.55, "total_tokens": 31880544} +{"current_steps": 47300, "total_steps": 204665, "loss": 0.0869, "lr": 1.8971004979465422e-06, "epoch": 1.1555468692741797, "percentage": 23.11, "elapsed_time": "1:00:32", "remaining_time": "3:21:26", "throughput": 8776.64, "total_tokens": 31883872} +{"current_steps": 47305, "total_steps": 204665, "loss": 0.0875, "lr": 1.897062816777399e-06, "epoch": 1.155669020106027, "percentage": 23.11, "elapsed_time": "1:00:33", "remaining_time": "3:21:25", "throughput": 8776.74, "total_tokens": 31887264} +{"current_steps": 47310, "total_steps": 204665, "loss": 0.0316, "lr": 1.8970251290845737e-06, "epoch": 1.155791170937874, "percentage": 23.12, "elapsed_time": "1:00:33", "remaining_time": "3:21:25", "throughput": 8776.83, "total_tokens": 31890656} +{"current_steps": 47315, "total_steps": 204665, "loss": 0.0475, "lr": 1.8969874348683404e-06, "epoch": 1.1559133217697213, "percentage": 23.12, "elapsed_time": "1:00:33", "remaining_time": "3:21:24", "throughput": 8776.91, "total_tokens": 31893984} +{"current_steps": 47320, "total_steps": 204665, "loss": 0.0016, "lr": 1.8969497341289733e-06, "epoch": 1.1560354726015685, "percentage": 23.12, "elapsed_time": "1:00:34", "remaining_time": "3:21:24", "throughput": 8777.0, "total_tokens": 31897376} +{"current_steps": 47325, "total_steps": 204665, "loss": 0.1015, "lr": 1.8969120268667462e-06, "epoch": 1.1561576234334157, "percentage": 23.12, "elapsed_time": "1:00:34", "remaining_time": "3:21:23", "throughput": 8777.05, "total_tokens": 31900576} +{"current_steps": 47330, "total_steps": 204665, "loss": 0.0299, "lr": 1.8968743130819338e-06, "epoch": 1.1562797742652626, "percentage": 23.13, "elapsed_time": "1:00:34", "remaining_time": "3:21:23", "throughput": 8777.13, "total_tokens": 31903904} +{"current_steps": 47335, "total_steps": 204665, "loss": 0.1661, "lr": 1.8968365927748102e-06, "epoch": 1.1564019250971098, "percentage": 23.13, "elapsed_time": "1:00:35", "remaining_time": "3:21:22", "throughput": 8777.17, "total_tokens": 31907040} +{"current_steps": 47340, "total_steps": 204665, "loss": 0.0757, "lr": 1.8967988659456498e-06, "epoch": 1.156524075928957, "percentage": 23.13, "elapsed_time": "1:00:35", "remaining_time": "3:21:22", "throughput": 8777.3, "total_tokens": 31910560} +{"current_steps": 47345, "total_steps": 204665, "loss": 0.1961, "lr": 1.8967611325947266e-06, "epoch": 1.1566462267608042, "percentage": 23.13, "elapsed_time": "1:00:35", "remaining_time": "3:21:21", "throughput": 8777.34, "total_tokens": 31913696} +{"current_steps": 47350, "total_steps": 204665, "loss": 0.0376, "lr": 1.896723392722315e-06, "epoch": 1.1567683775926514, "percentage": 23.14, "elapsed_time": "1:00:36", "remaining_time": "3:21:21", "throughput": 8777.45, "total_tokens": 31917152} +{"current_steps": 47355, "total_steps": 204665, "loss": 0.0034, "lr": 1.8966856463286903e-06, "epoch": 1.1568905284244986, "percentage": 23.14, "elapsed_time": "1:00:36", "remaining_time": "3:21:20", "throughput": 8777.53, "total_tokens": 31920480} +{"current_steps": 47360, "total_steps": 204665, "loss": 0.2035, "lr": 1.8966478934141262e-06, "epoch": 1.1570126792563458, "percentage": 23.14, "elapsed_time": "1:00:36", "remaining_time": "3:21:20", "throughput": 8777.63, "total_tokens": 31923872} +{"current_steps": 47365, "total_steps": 204665, "loss": 0.0391, "lr": 1.8966101339788971e-06, "epoch": 1.157134830088193, "percentage": 23.14, "elapsed_time": "1:00:37", "remaining_time": "3:21:19", "throughput": 8777.79, "total_tokens": 31927584} +{"current_steps": 47370, "total_steps": 204665, "loss": 0.0416, "lr": 1.8965723680232783e-06, "epoch": 1.1572569809200401, "percentage": 23.15, "elapsed_time": "1:00:37", "remaining_time": "3:21:19", "throughput": 8777.93, "total_tokens": 31931168} +{"current_steps": 47375, "total_steps": 204665, "loss": 0.0387, "lr": 1.8965345955475441e-06, "epoch": 1.1573791317518873, "percentage": 23.15, "elapsed_time": "1:00:38", "remaining_time": "3:21:18", "throughput": 8777.97, "total_tokens": 31934304} +{"current_steps": 47380, "total_steps": 204665, "loss": 0.0539, "lr": 1.896496816551969e-06, "epoch": 1.1575012825837343, "percentage": 23.15, "elapsed_time": "1:00:38", "remaining_time": "3:21:18", "throughput": 8778.07, "total_tokens": 31937760} +{"current_steps": 47385, "total_steps": 204665, "loss": 0.0016, "lr": 1.8964590310368283e-06, "epoch": 1.1576234334155815, "percentage": 23.15, "elapsed_time": "1:00:38", "remaining_time": "3:21:17", "throughput": 8778.18, "total_tokens": 31941216} +{"current_steps": 47390, "total_steps": 204665, "loss": 0.0559, "lr": 1.8964212390023959e-06, "epoch": 1.1577455842474287, "percentage": 23.15, "elapsed_time": "1:00:39", "remaining_time": "3:21:17", "throughput": 8778.31, "total_tokens": 31944736} +{"current_steps": 47395, "total_steps": 204665, "loss": 0.1978, "lr": 1.8963834404489474e-06, "epoch": 1.1578677350792759, "percentage": 23.16, "elapsed_time": "1:00:39", "remaining_time": "3:21:16", "throughput": 8778.4, "total_tokens": 31948128} +{"current_steps": 47400, "total_steps": 204665, "loss": 0.0536, "lr": 1.8963456353767575e-06, "epoch": 1.157989885911123, "percentage": 23.16, "elapsed_time": "1:00:39", "remaining_time": "3:21:16", "throughput": 8778.46, "total_tokens": 31951392} +{"current_steps": 47405, "total_steps": 204665, "loss": 0.0924, "lr": 1.8963078237861008e-06, "epoch": 1.1581120367429703, "percentage": 23.16, "elapsed_time": "1:00:40", "remaining_time": "3:21:15", "throughput": 8778.62, "total_tokens": 31955040} +{"current_steps": 47410, "total_steps": 204665, "loss": 0.0955, "lr": 1.8962700056772527e-06, "epoch": 1.1582341875748174, "percentage": 23.16, "elapsed_time": "1:00:40", "remaining_time": "3:21:15", "throughput": 8778.62, "total_tokens": 31957984} +{"current_steps": 47415, "total_steps": 204665, "loss": 0.1105, "lr": 1.896232181050488e-06, "epoch": 1.1583563384066646, "percentage": 23.17, "elapsed_time": "1:00:40", "remaining_time": "3:21:14", "throughput": 8778.72, "total_tokens": 31961440} +{"current_steps": 47420, "total_steps": 204665, "loss": 0.0438, "lr": 1.8961943499060818e-06, "epoch": 1.1584784892385116, "percentage": 23.17, "elapsed_time": "1:00:41", "remaining_time": "3:21:14", "throughput": 8778.82, "total_tokens": 31964832} +{"current_steps": 47425, "total_steps": 204665, "loss": 0.1184, "lr": 1.8961565122443092e-06, "epoch": 1.1586006400703588, "percentage": 23.17, "elapsed_time": "1:00:41", "remaining_time": "3:21:13", "throughput": 8778.98, "total_tokens": 31968544} +{"current_steps": 47430, "total_steps": 204665, "loss": 0.0383, "lr": 1.8961186680654455e-06, "epoch": 1.158722790902206, "percentage": 23.17, "elapsed_time": "1:00:41", "remaining_time": "3:21:13", "throughput": 8779.06, "total_tokens": 31971872} +{"current_steps": 47435, "total_steps": 204665, "loss": 0.0883, "lr": 1.896080817369766e-06, "epoch": 1.1588449417340532, "percentage": 23.18, "elapsed_time": "1:00:42", "remaining_time": "3:21:12", "throughput": 8779.11, "total_tokens": 31975072} +{"current_steps": 47440, "total_steps": 204665, "loss": 0.1452, "lr": 1.8960429601575453e-06, "epoch": 1.1589670925659004, "percentage": 23.18, "elapsed_time": "1:00:42", "remaining_time": "3:21:11", "throughput": 8779.17, "total_tokens": 31978336} +{"current_steps": 47445, "total_steps": 204665, "loss": 0.0778, "lr": 1.8960050964290595e-06, "epoch": 1.1590892433977475, "percentage": 23.18, "elapsed_time": "1:00:42", "remaining_time": "3:21:11", "throughput": 8779.25, "total_tokens": 31981664} +{"current_steps": 47450, "total_steps": 204665, "loss": 0.0991, "lr": 1.8959672261845836e-06, "epoch": 1.1592113942295947, "percentage": 23.18, "elapsed_time": "1:00:43", "remaining_time": "3:21:10", "throughput": 8779.31, "total_tokens": 31984928} +{"current_steps": 47455, "total_steps": 204665, "loss": 0.0803, "lr": 1.8959293494243931e-06, "epoch": 1.159333545061442, "percentage": 23.19, "elapsed_time": "1:00:43", "remaining_time": "3:21:10", "throughput": 8779.43, "total_tokens": 31988448} +{"current_steps": 47460, "total_steps": 204665, "loss": 0.086, "lr": 1.8958914661487632e-06, "epoch": 1.1594556958932891, "percentage": 23.19, "elapsed_time": "1:00:43", "remaining_time": "3:21:10", "throughput": 8779.55, "total_tokens": 31991968} +{"current_steps": 47465, "total_steps": 204665, "loss": 0.0076, "lr": 1.89585357635797e-06, "epoch": 1.1595778467251363, "percentage": 23.19, "elapsed_time": "1:00:44", "remaining_time": "3:21:09", "throughput": 8779.63, "total_tokens": 31995296} +{"current_steps": 47470, "total_steps": 204665, "loss": 0.0947, "lr": 1.8958156800522884e-06, "epoch": 1.1596999975569833, "percentage": 23.19, "elapsed_time": "1:00:44", "remaining_time": "3:21:08", "throughput": 8779.73, "total_tokens": 31998688} +{"current_steps": 47475, "total_steps": 204665, "loss": 0.0526, "lr": 1.8957777772319942e-06, "epoch": 1.1598221483888305, "percentage": 23.2, "elapsed_time": "1:00:44", "remaining_time": "3:21:08", "throughput": 8779.79, "total_tokens": 32001888} +{"current_steps": 47480, "total_steps": 204665, "loss": 0.0028, "lr": 1.895739867897363e-06, "epoch": 1.1599442992206777, "percentage": 23.2, "elapsed_time": "1:00:45", "remaining_time": "3:21:07", "throughput": 8779.86, "total_tokens": 32005152} +{"current_steps": 47485, "total_steps": 204665, "loss": 0.243, "lr": 1.8957019520486705e-06, "epoch": 1.1600664500525248, "percentage": 23.2, "elapsed_time": "1:00:45", "remaining_time": "3:21:07", "throughput": 8779.94, "total_tokens": 32008480} +{"current_steps": 47490, "total_steps": 204665, "loss": 0.0791, "lr": 1.8956640296861928e-06, "epoch": 1.160188600884372, "percentage": 23.2, "elapsed_time": "1:00:45", "remaining_time": "3:21:06", "throughput": 8780.02, "total_tokens": 32011808} +{"current_steps": 47495, "total_steps": 204665, "loss": 0.0302, "lr": 1.895626100810205e-06, "epoch": 1.1603107517162192, "percentage": 23.21, "elapsed_time": "1:00:46", "remaining_time": "3:21:06", "throughput": 8780.04, "total_tokens": 32014880} +{"current_steps": 47500, "total_steps": 204665, "loss": 0.1028, "lr": 1.8955881654209835e-06, "epoch": 1.1604329025480664, "percentage": 23.21, "elapsed_time": "1:00:46", "remaining_time": "3:21:05", "throughput": 8780.13, "total_tokens": 32018272} +{"current_steps": 47505, "total_steps": 204665, "loss": 0.0414, "lr": 1.8955502235188042e-06, "epoch": 1.1605550533799136, "percentage": 23.21, "elapsed_time": "1:00:47", "remaining_time": "3:21:05", "throughput": 8780.23, "total_tokens": 32021728} +{"current_steps": 47510, "total_steps": 204665, "loss": 0.0638, "lr": 1.8955122751039424e-06, "epoch": 1.1606772042117606, "percentage": 23.21, "elapsed_time": "1:00:47", "remaining_time": "3:21:04", "throughput": 8780.23, "total_tokens": 32024672} +{"current_steps": 47515, "total_steps": 204665, "loss": 0.0016, "lr": 1.8954743201766747e-06, "epoch": 1.1607993550436078, "percentage": 23.22, "elapsed_time": "1:00:47", "remaining_time": "3:21:04", "throughput": 8780.3, "total_tokens": 32027936} +{"current_steps": 47520, "total_steps": 204665, "loss": 0.0406, "lr": 1.8954363587372768e-06, "epoch": 1.160921505875455, "percentage": 23.22, "elapsed_time": "1:00:48", "remaining_time": "3:21:03", "throughput": 8780.28, "total_tokens": 32030816} +{"current_steps": 47525, "total_steps": 204665, "loss": 0.0563, "lr": 1.895398390786025e-06, "epoch": 1.1610436567073021, "percentage": 23.22, "elapsed_time": "1:00:48", "remaining_time": "3:21:03", "throughput": 8780.34, "total_tokens": 32034080} +{"current_steps": 47530, "total_steps": 204665, "loss": 0.0666, "lr": 1.895360416323195e-06, "epoch": 1.1611658075391493, "percentage": 23.22, "elapsed_time": "1:00:48", "remaining_time": "3:21:02", "throughput": 8780.37, "total_tokens": 32037152} +{"current_steps": 47535, "total_steps": 204665, "loss": 0.0549, "lr": 1.8953224353490636e-06, "epoch": 1.1612879583709965, "percentage": 23.23, "elapsed_time": "1:00:49", "remaining_time": "3:21:02", "throughput": 8780.47, "total_tokens": 32040608} +{"current_steps": 47540, "total_steps": 204665, "loss": 0.0822, "lr": 1.8952844478639064e-06, "epoch": 1.1614101092028437, "percentage": 23.23, "elapsed_time": "1:00:49", "remaining_time": "3:21:01", "throughput": 8780.52, "total_tokens": 32043808} +{"current_steps": 47545, "total_steps": 204665, "loss": 0.0543, "lr": 1.8952464538679997e-06, "epoch": 1.161532260034691, "percentage": 23.23, "elapsed_time": "1:00:49", "remaining_time": "3:21:01", "throughput": 8780.58, "total_tokens": 32047072} +{"current_steps": 47550, "total_steps": 204665, "loss": 0.095, "lr": 1.8952084533616203e-06, "epoch": 1.161654410866538, "percentage": 23.23, "elapsed_time": "1:00:50", "remaining_time": "3:21:00", "throughput": 8780.69, "total_tokens": 32050528} +{"current_steps": 47555, "total_steps": 204665, "loss": 0.1886, "lr": 1.8951704463450442e-06, "epoch": 1.1617765616983853, "percentage": 23.24, "elapsed_time": "1:00:50", "remaining_time": "3:21:00", "throughput": 8780.85, "total_tokens": 32054176} +{"current_steps": 47560, "total_steps": 204665, "loss": 0.0999, "lr": 1.8951324328185478e-06, "epoch": 1.1618987125302322, "percentage": 23.24, "elapsed_time": "1:00:50", "remaining_time": "3:20:59", "throughput": 8781.0, "total_tokens": 32057824} +{"current_steps": 47565, "total_steps": 204665, "loss": 0.0235, "lr": 1.8950944127824076e-06, "epoch": 1.1620208633620794, "percentage": 23.24, "elapsed_time": "1:00:51", "remaining_time": "3:20:59", "throughput": 8781.18, "total_tokens": 32061664} +{"current_steps": 47570, "total_steps": 204665, "loss": 0.0019, "lr": 1.8950563862369e-06, "epoch": 1.1621430141939266, "percentage": 23.24, "elapsed_time": "1:00:51", "remaining_time": "3:20:58", "throughput": 8781.35, "total_tokens": 32065376} +{"current_steps": 47575, "total_steps": 204665, "loss": 0.1614, "lr": 1.8950183531823019e-06, "epoch": 1.1622651650257738, "percentage": 23.25, "elapsed_time": "1:00:51", "remaining_time": "3:20:58", "throughput": 8781.43, "total_tokens": 32068704} +{"current_steps": 47580, "total_steps": 204665, "loss": 0.0803, "lr": 1.8949803136188894e-06, "epoch": 1.162387315857621, "percentage": 23.25, "elapsed_time": "1:00:52", "remaining_time": "3:20:57", "throughput": 8781.52, "total_tokens": 32072096} +{"current_steps": 47585, "total_steps": 204665, "loss": 0.0427, "lr": 1.894942267546939e-06, "epoch": 1.1625094666894682, "percentage": 23.25, "elapsed_time": "1:00:52", "remaining_time": "3:20:57", "throughput": 8781.79, "total_tokens": 32076320} +{"current_steps": 47590, "total_steps": 204665, "loss": 0.1575, "lr": 1.8949042149667283e-06, "epoch": 1.1626316175213154, "percentage": 23.25, "elapsed_time": "1:00:52", "remaining_time": "3:20:56", "throughput": 8781.8, "total_tokens": 32079328} +{"current_steps": 47595, "total_steps": 204665, "loss": 0.0022, "lr": 1.894866155878533e-06, "epoch": 1.1627537683531626, "percentage": 23.26, "elapsed_time": "1:00:53", "remaining_time": "3:20:57", "throughput": 8781.4, "total_tokens": 32083360} +{"current_steps": 47600, "total_steps": 204665, "loss": 0.061, "lr": 1.8948280902826306e-06, "epoch": 1.1628759191850095, "percentage": 23.26, "elapsed_time": "1:00:53", "remaining_time": "3:20:56", "throughput": 8781.49, "total_tokens": 32086752} +{"current_steps": 47605, "total_steps": 204665, "loss": 0.006, "lr": 1.8947900181792974e-06, "epoch": 1.1629980700168567, "percentage": 23.26, "elapsed_time": "1:00:54", "remaining_time": "3:20:56", "throughput": 8781.46, "total_tokens": 32089568} +{"current_steps": 47610, "total_steps": 204665, "loss": 0.0014, "lr": 1.8947519395688109e-06, "epoch": 1.163120220848704, "percentage": 23.26, "elapsed_time": "1:00:54", "remaining_time": "3:20:55", "throughput": 8781.62, "total_tokens": 32093280} +{"current_steps": 47615, "total_steps": 204665, "loss": 0.1059, "lr": 1.8947138544514473e-06, "epoch": 1.1632423716805511, "percentage": 23.26, "elapsed_time": "1:00:54", "remaining_time": "3:20:55", "throughput": 8781.67, "total_tokens": 32096480} +{"current_steps": 47620, "total_steps": 204665, "loss": 0.0932, "lr": 1.894675762827484e-06, "epoch": 1.1633645225123983, "percentage": 23.27, "elapsed_time": "1:00:55", "remaining_time": "3:20:54", "throughput": 8781.73, "total_tokens": 32099744} +{"current_steps": 47625, "total_steps": 204665, "loss": 0.2224, "lr": 1.894637664697198e-06, "epoch": 1.1634866733442455, "percentage": 23.27, "elapsed_time": "1:00:55", "remaining_time": "3:20:54", "throughput": 8781.84, "total_tokens": 32103200} +{"current_steps": 47630, "total_steps": 204665, "loss": 0.0968, "lr": 1.8945995600608662e-06, "epoch": 1.1636088241760927, "percentage": 23.27, "elapsed_time": "1:00:55", "remaining_time": "3:20:53", "throughput": 8781.93, "total_tokens": 32106592} +{"current_steps": 47635, "total_steps": 204665, "loss": 0.1202, "lr": 1.8945614489187658e-06, "epoch": 1.1637309750079399, "percentage": 23.27, "elapsed_time": "1:00:56", "remaining_time": "3:20:53", "throughput": 8782.01, "total_tokens": 32109920} +{"current_steps": 47640, "total_steps": 204665, "loss": 0.079, "lr": 1.8945233312711739e-06, "epoch": 1.163853125839787, "percentage": 23.28, "elapsed_time": "1:00:56", "remaining_time": "3:20:52", "throughput": 8782.11, "total_tokens": 32113376} +{"current_steps": 47645, "total_steps": 204665, "loss": 0.0071, "lr": 1.8944852071183676e-06, "epoch": 1.1639752766716343, "percentage": 23.28, "elapsed_time": "1:00:57", "remaining_time": "3:20:52", "throughput": 8782.26, "total_tokens": 32117024} +{"current_steps": 47650, "total_steps": 204665, "loss": 0.0531, "lr": 1.8944470764606247e-06, "epoch": 1.1640974275034812, "percentage": 23.28, "elapsed_time": "1:00:57", "remaining_time": "3:20:51", "throughput": 8782.31, "total_tokens": 32120224} +{"current_steps": 47655, "total_steps": 204665, "loss": 0.0014, "lr": 1.8944089392982216e-06, "epoch": 1.1642195783353284, "percentage": 23.28, "elapsed_time": "1:00:57", "remaining_time": "3:20:51", "throughput": 8782.43, "total_tokens": 32123744} +{"current_steps": 47660, "total_steps": 204665, "loss": 0.1023, "lr": 1.8943707956314364e-06, "epoch": 1.1643417291671756, "percentage": 23.29, "elapsed_time": "1:00:58", "remaining_time": "3:20:50", "throughput": 8782.47, "total_tokens": 32126880} +{"current_steps": 47665, "total_steps": 204665, "loss": 0.0043, "lr": 1.8943326454605462e-06, "epoch": 1.1644638799990228, "percentage": 23.29, "elapsed_time": "1:00:58", "remaining_time": "3:20:50", "throughput": 8782.58, "total_tokens": 32130336} +{"current_steps": 47670, "total_steps": 204665, "loss": 0.0538, "lr": 1.8942944887858286e-06, "epoch": 1.16458603083087, "percentage": 23.29, "elapsed_time": "1:00:58", "remaining_time": "3:20:49", "throughput": 8782.67, "total_tokens": 32133728} +{"current_steps": 47675, "total_steps": 204665, "loss": 0.0024, "lr": 1.8942563256075607e-06, "epoch": 1.1647081816627172, "percentage": 23.29, "elapsed_time": "1:00:59", "remaining_time": "3:20:49", "throughput": 8782.7, "total_tokens": 32136864} +{"current_steps": 47680, "total_steps": 204665, "loss": 0.0203, "lr": 1.8942181559260204e-06, "epoch": 1.1648303324945644, "percentage": 23.3, "elapsed_time": "1:00:59", "remaining_time": "3:20:48", "throughput": 8782.83, "total_tokens": 32140448} +{"current_steps": 47685, "total_steps": 204665, "loss": 0.1419, "lr": 1.894179979741485e-06, "epoch": 1.1649524833264115, "percentage": 23.3, "elapsed_time": "1:00:59", "remaining_time": "3:20:48", "throughput": 8782.9, "total_tokens": 32143712} +{"current_steps": 47690, "total_steps": 204665, "loss": 0.1193, "lr": 1.8941417970542324e-06, "epoch": 1.1650746341582585, "percentage": 23.3, "elapsed_time": "1:01:00", "remaining_time": "3:20:47", "throughput": 8783.04, "total_tokens": 32147360} +{"current_steps": 47695, "total_steps": 204665, "loss": 0.036, "lr": 1.8941036078645403e-06, "epoch": 1.1651967849901057, "percentage": 23.3, "elapsed_time": "1:01:00", "remaining_time": "3:20:47", "throughput": 8783.25, "total_tokens": 32151264} +{"current_steps": 47700, "total_steps": 204665, "loss": 0.0627, "lr": 1.894065412172686e-06, "epoch": 1.165318935821953, "percentage": 23.31, "elapsed_time": "1:01:00", "remaining_time": "3:20:46", "throughput": 8783.37, "total_tokens": 32154784} +{"current_steps": 47705, "total_steps": 204665, "loss": 0.0013, "lr": 1.8940272099789476e-06, "epoch": 1.1654410866538, "percentage": 23.31, "elapsed_time": "1:01:01", "remaining_time": "3:20:46", "throughput": 8783.43, "total_tokens": 32158048} +{"current_steps": 47710, "total_steps": 204665, "loss": 0.1186, "lr": 1.8939890012836032e-06, "epoch": 1.1655632374856473, "percentage": 23.31, "elapsed_time": "1:01:01", "remaining_time": "3:20:45", "throughput": 8783.5, "total_tokens": 32161376} +{"current_steps": 47715, "total_steps": 204665, "loss": 0.0024, "lr": 1.89395078608693e-06, "epoch": 1.1656853883174945, "percentage": 23.31, "elapsed_time": "1:01:01", "remaining_time": "3:20:45", "throughput": 8783.53, "total_tokens": 32164448} +{"current_steps": 47720, "total_steps": 204665, "loss": 0.1112, "lr": 1.8939125643892062e-06, "epoch": 1.1658075391493417, "percentage": 23.32, "elapsed_time": "1:01:02", "remaining_time": "3:20:44", "throughput": 8783.58, "total_tokens": 32167648} +{"current_steps": 47725, "total_steps": 204665, "loss": 0.0691, "lr": 1.89387433619071e-06, "epoch": 1.1659296899811888, "percentage": 23.32, "elapsed_time": "1:01:02", "remaining_time": "3:20:44", "throughput": 8783.59, "total_tokens": 32170656} +{"current_steps": 47730, "total_steps": 204665, "loss": 0.0222, "lr": 1.893836101491719e-06, "epoch": 1.166051840813036, "percentage": 23.32, "elapsed_time": "1:01:02", "remaining_time": "3:20:43", "throughput": 8783.68, "total_tokens": 32174048} +{"current_steps": 47735, "total_steps": 204665, "loss": 0.1264, "lr": 1.8937978602925114e-06, "epoch": 1.1661739916448832, "percentage": 23.32, "elapsed_time": "1:01:03", "remaining_time": "3:20:43", "throughput": 8783.81, "total_tokens": 32177632} +{"current_steps": 47740, "total_steps": 204665, "loss": 0.0959, "lr": 1.8937596125933654e-06, "epoch": 1.1662961424767302, "percentage": 23.33, "elapsed_time": "1:01:03", "remaining_time": "3:20:42", "throughput": 8783.83, "total_tokens": 32180704} +{"current_steps": 47745, "total_steps": 204665, "loss": 0.1216, "lr": 1.8937213583945595e-06, "epoch": 1.1664182933085774, "percentage": 23.33, "elapsed_time": "1:01:03", "remaining_time": "3:20:42", "throughput": 8783.89, "total_tokens": 32183904} +{"current_steps": 47750, "total_steps": 204665, "loss": 0.1122, "lr": 1.8936830976963712e-06, "epoch": 1.1665404441404246, "percentage": 23.33, "elapsed_time": "1:01:04", "remaining_time": "3:20:41", "throughput": 8783.87, "total_tokens": 32186784} +{"current_steps": 47755, "total_steps": 204665, "loss": 0.0621, "lr": 1.893644830499079e-06, "epoch": 1.1666625949722718, "percentage": 23.33, "elapsed_time": "1:01:04", "remaining_time": "3:20:41", "throughput": 8783.99, "total_tokens": 32190304} +{"current_steps": 47760, "total_steps": 204665, "loss": 0.0988, "lr": 1.8936065568029614e-06, "epoch": 1.166784745804119, "percentage": 23.34, "elapsed_time": "1:01:04", "remaining_time": "3:20:40", "throughput": 8784.04, "total_tokens": 32193504} +{"current_steps": 47765, "total_steps": 204665, "loss": 0.0803, "lr": 1.8935682766082964e-06, "epoch": 1.1669068966359661, "percentage": 23.34, "elapsed_time": "1:01:05", "remaining_time": "3:20:40", "throughput": 8784.2, "total_tokens": 32197216} +{"current_steps": 47770, "total_steps": 204665, "loss": 0.1822, "lr": 1.8935299899153625e-06, "epoch": 1.1670290474678133, "percentage": 23.34, "elapsed_time": "1:01:05", "remaining_time": "3:20:39", "throughput": 8784.31, "total_tokens": 32200672} +{"current_steps": 47775, "total_steps": 204665, "loss": 0.0734, "lr": 1.8934916967244386e-06, "epoch": 1.1671511982996603, "percentage": 23.34, "elapsed_time": "1:01:06", "remaining_time": "3:20:39", "throughput": 8784.36, "total_tokens": 32203872} +{"current_steps": 47780, "total_steps": 204665, "loss": 0.1033, "lr": 1.8934533970358022e-06, "epoch": 1.1672733491315075, "percentage": 23.35, "elapsed_time": "1:01:06", "remaining_time": "3:20:38", "throughput": 8784.56, "total_tokens": 32207776} +{"current_steps": 47785, "total_steps": 204665, "loss": 0.2013, "lr": 1.8934150908497327e-06, "epoch": 1.1673954999633547, "percentage": 23.35, "elapsed_time": "1:01:06", "remaining_time": "3:20:38", "throughput": 8784.65, "total_tokens": 32211168} +{"current_steps": 47790, "total_steps": 204665, "loss": 0.0334, "lr": 1.8933767781665085e-06, "epoch": 1.1675176507952019, "percentage": 23.35, "elapsed_time": "1:01:07", "remaining_time": "3:20:37", "throughput": 8784.72, "total_tokens": 32214432} +{"current_steps": 47795, "total_steps": 204665, "loss": 0.154, "lr": 1.8933384589864077e-06, "epoch": 1.167639801627049, "percentage": 23.35, "elapsed_time": "1:01:07", "remaining_time": "3:20:37", "throughput": 8784.75, "total_tokens": 32217568} +{"current_steps": 47800, "total_steps": 204665, "loss": 0.1517, "lr": 1.8933001333097094e-06, "epoch": 1.1677619524588962, "percentage": 23.36, "elapsed_time": "1:01:07", "remaining_time": "3:20:36", "throughput": 8784.81, "total_tokens": 32220832} +{"current_steps": 47805, "total_steps": 204665, "loss": 0.155, "lr": 1.8932618011366922e-06, "epoch": 1.1678841032907434, "percentage": 23.36, "elapsed_time": "1:01:08", "remaining_time": "3:20:36", "throughput": 8784.96, "total_tokens": 32224480} +{"current_steps": 47810, "total_steps": 204665, "loss": 0.167, "lr": 1.8932234624676356e-06, "epoch": 1.1680062541225906, "percentage": 23.36, "elapsed_time": "1:01:08", "remaining_time": "3:20:35", "throughput": 8785.01, "total_tokens": 32227680} +{"current_steps": 47815, "total_steps": 204665, "loss": 0.0336, "lr": 1.893185117302817e-06, "epoch": 1.1681284049544378, "percentage": 23.36, "elapsed_time": "1:01:08", "remaining_time": "3:20:35", "throughput": 8785.12, "total_tokens": 32231136} +{"current_steps": 47820, "total_steps": 204665, "loss": 0.0647, "lr": 1.8931467656425163e-06, "epoch": 1.168250555786285, "percentage": 23.37, "elapsed_time": "1:01:09", "remaining_time": "3:20:34", "throughput": 8785.17, "total_tokens": 32234336} +{"current_steps": 47825, "total_steps": 204665, "loss": 0.0296, "lr": 1.8931084074870118e-06, "epoch": 1.168372706618132, "percentage": 23.37, "elapsed_time": "1:01:09", "remaining_time": "3:20:34", "throughput": 8785.3, "total_tokens": 32237856} +{"current_steps": 47830, "total_steps": 204665, "loss": 0.0017, "lr": 1.8930700428365832e-06, "epoch": 1.1684948574499792, "percentage": 23.37, "elapsed_time": "1:01:09", "remaining_time": "3:20:33", "throughput": 8785.42, "total_tokens": 32241440} +{"current_steps": 47835, "total_steps": 204665, "loss": 0.0681, "lr": 1.8930316716915087e-06, "epoch": 1.1686170082818264, "percentage": 23.37, "elapsed_time": "1:01:10", "remaining_time": "3:20:33", "throughput": 8785.44, "total_tokens": 32244512} +{"current_steps": 47840, "total_steps": 204665, "loss": 0.0032, "lr": 1.892993294052068e-06, "epoch": 1.1687391591136735, "percentage": 23.37, "elapsed_time": "1:01:10", "remaining_time": "3:20:32", "throughput": 8785.55, "total_tokens": 32247968} +{"current_steps": 47845, "total_steps": 204665, "loss": 0.0858, "lr": 1.8929549099185396e-06, "epoch": 1.1688613099455207, "percentage": 23.38, "elapsed_time": "1:01:10", "remaining_time": "3:20:32", "throughput": 8785.58, "total_tokens": 32251104} +{"current_steps": 47850, "total_steps": 204665, "loss": 0.0914, "lr": 1.892916519291203e-06, "epoch": 1.168983460777368, "percentage": 23.38, "elapsed_time": "1:01:11", "remaining_time": "3:20:31", "throughput": 8785.66, "total_tokens": 32254432} +{"current_steps": 47855, "total_steps": 204665, "loss": 0.0972, "lr": 1.8928781221703374e-06, "epoch": 1.1691056116092151, "percentage": 23.38, "elapsed_time": "1:01:11", "remaining_time": "3:20:31", "throughput": 8785.8, "total_tokens": 32258080} +{"current_steps": 47860, "total_steps": 204665, "loss": 0.0008, "lr": 1.8928397185562217e-06, "epoch": 1.1692277624410623, "percentage": 23.38, "elapsed_time": "1:01:11", "remaining_time": "3:20:30", "throughput": 8785.82, "total_tokens": 32261152} +{"current_steps": 47865, "total_steps": 204665, "loss": 0.0328, "lr": 1.8928013084491354e-06, "epoch": 1.1693499132729093, "percentage": 23.39, "elapsed_time": "1:01:12", "remaining_time": "3:20:30", "throughput": 8785.86, "total_tokens": 32264288} +{"current_steps": 47870, "total_steps": 204665, "loss": 0.228, "lr": 1.8927628918493581e-06, "epoch": 1.1694720641047565, "percentage": 23.39, "elapsed_time": "1:01:12", "remaining_time": "3:20:29", "throughput": 8786.07, "total_tokens": 32268256} +{"current_steps": 47875, "total_steps": 204665, "loss": 0.0219, "lr": 1.8927244687571688e-06, "epoch": 1.1695942149366036, "percentage": 23.39, "elapsed_time": "1:01:13", "remaining_time": "3:20:29", "throughput": 8786.18, "total_tokens": 32271776} +{"current_steps": 47880, "total_steps": 204665, "loss": 0.1846, "lr": 1.8926860391728472e-06, "epoch": 1.1697163657684508, "percentage": 23.39, "elapsed_time": "1:01:13", "remaining_time": "3:20:28", "throughput": 8786.25, "total_tokens": 32275040} +{"current_steps": 47885, "total_steps": 204665, "loss": 0.1796, "lr": 1.8926476030966724e-06, "epoch": 1.169838516600298, "percentage": 23.4, "elapsed_time": "1:01:13", "remaining_time": "3:20:28", "throughput": 8786.32, "total_tokens": 32278368} +{"current_steps": 47890, "total_steps": 204665, "loss": 0.0011, "lr": 1.8926091605289245e-06, "epoch": 1.1699606674321452, "percentage": 23.4, "elapsed_time": "1:01:14", "remaining_time": "3:20:27", "throughput": 8786.39, "total_tokens": 32281696} +{"current_steps": 47895, "total_steps": 204665, "loss": 0.1402, "lr": 1.8925707114698823e-06, "epoch": 1.1700828182639924, "percentage": 23.4, "elapsed_time": "1:01:14", "remaining_time": "3:20:27", "throughput": 8786.45, "total_tokens": 32284960} +{"current_steps": 47900, "total_steps": 204665, "loss": 0.0995, "lr": 1.892532255919826e-06, "epoch": 1.1702049690958396, "percentage": 23.4, "elapsed_time": "1:01:14", "remaining_time": "3:20:26", "throughput": 8786.54, "total_tokens": 32288352} +{"current_steps": 47905, "total_steps": 204665, "loss": 0.1636, "lr": 1.8924937938790348e-06, "epoch": 1.1703271199276868, "percentage": 23.41, "elapsed_time": "1:01:15", "remaining_time": "3:20:26", "throughput": 8786.62, "total_tokens": 32291744} +{"current_steps": 47910, "total_steps": 204665, "loss": 0.11, "lr": 1.8924553253477891e-06, "epoch": 1.170449270759534, "percentage": 23.41, "elapsed_time": "1:01:15", "remaining_time": "3:20:25", "throughput": 8786.71, "total_tokens": 32295136} +{"current_steps": 47915, "total_steps": 204665, "loss": 0.1014, "lr": 1.8924168503263682e-06, "epoch": 1.170571421591381, "percentage": 23.41, "elapsed_time": "1:01:15", "remaining_time": "3:20:25", "throughput": 8786.8, "total_tokens": 32298528} +{"current_steps": 47920, "total_steps": 204665, "loss": 0.0434, "lr": 1.8923783688150517e-06, "epoch": 1.1706935724232281, "percentage": 23.41, "elapsed_time": "1:01:16", "remaining_time": "3:20:24", "throughput": 8786.82, "total_tokens": 32301600} +{"current_steps": 47925, "total_steps": 204665, "loss": 0.0966, "lr": 1.8923398808141195e-06, "epoch": 1.1708157232550753, "percentage": 23.42, "elapsed_time": "1:01:16", "remaining_time": "3:20:24", "throughput": 8786.81, "total_tokens": 32304544} +{"current_steps": 47930, "total_steps": 204665, "loss": 0.1141, "lr": 1.8923013863238523e-06, "epoch": 1.1709378740869225, "percentage": 23.42, "elapsed_time": "1:01:16", "remaining_time": "3:20:23", "throughput": 8787.0, "total_tokens": 32308384} +{"current_steps": 47935, "total_steps": 204665, "loss": 0.0021, "lr": 1.8922628853445288e-06, "epoch": 1.1710600249187697, "percentage": 23.42, "elapsed_time": "1:01:17", "remaining_time": "3:20:23", "throughput": 8787.17, "total_tokens": 32312160} +{"current_steps": 47940, "total_steps": 204665, "loss": 0.2046, "lr": 1.89222437787643e-06, "epoch": 1.171182175750617, "percentage": 23.42, "elapsed_time": "1:01:17", "remaining_time": "3:20:22", "throughput": 8787.2, "total_tokens": 32315296} +{"current_steps": 47945, "total_steps": 204665, "loss": 0.1025, "lr": 1.8921858639198354e-06, "epoch": 1.171304326582464, "percentage": 23.43, "elapsed_time": "1:01:17", "remaining_time": "3:20:22", "throughput": 8787.2, "total_tokens": 32318304} +{"current_steps": 47950, "total_steps": 204665, "loss": 0.0589, "lr": 1.8921473434750254e-06, "epoch": 1.1714264774143113, "percentage": 23.43, "elapsed_time": "1:01:18", "remaining_time": "3:20:21", "throughput": 8787.28, "total_tokens": 32321632} +{"current_steps": 47955, "total_steps": 204665, "loss": 0.1001, "lr": 1.8921088165422797e-06, "epoch": 1.1715486282461582, "percentage": 23.43, "elapsed_time": "1:01:18", "remaining_time": "3:20:21", "throughput": 8787.46, "total_tokens": 32325472} +{"current_steps": 47960, "total_steps": 204665, "loss": 0.055, "lr": 1.8920702831218787e-06, "epoch": 1.1716707790780054, "percentage": 23.43, "elapsed_time": "1:01:18", "remaining_time": "3:20:20", "throughput": 8787.53, "total_tokens": 32328736} +{"current_steps": 47965, "total_steps": 204665, "loss": 0.1018, "lr": 1.892031743214103e-06, "epoch": 1.1717929299098526, "percentage": 23.44, "elapsed_time": "1:01:19", "remaining_time": "3:20:20", "throughput": 8787.53, "total_tokens": 32331744} +{"current_steps": 47970, "total_steps": 204665, "loss": 0.0292, "lr": 1.8919931968192322e-06, "epoch": 1.1719150807416998, "percentage": 23.44, "elapsed_time": "1:01:19", "remaining_time": "3:20:19", "throughput": 8787.65, "total_tokens": 32335264} +{"current_steps": 47975, "total_steps": 204665, "loss": 0.0015, "lr": 1.8919546439375468e-06, "epoch": 1.172037231573547, "percentage": 23.44, "elapsed_time": "1:01:19", "remaining_time": "3:20:19", "throughput": 8787.78, "total_tokens": 32338784} +{"current_steps": 47980, "total_steps": 204665, "loss": 0.0441, "lr": 1.8919160845693278e-06, "epoch": 1.1721593824053942, "percentage": 23.44, "elapsed_time": "1:01:20", "remaining_time": "3:20:18", "throughput": 8787.78, "total_tokens": 32341728} +{"current_steps": 47985, "total_steps": 204665, "loss": 0.0108, "lr": 1.891877518714855e-06, "epoch": 1.1722815332372414, "percentage": 23.45, "elapsed_time": "1:01:20", "remaining_time": "3:20:18", "throughput": 8787.85, "total_tokens": 32344992} +{"current_steps": 47990, "total_steps": 204665, "loss": 0.0336, "lr": 1.891838946374409e-06, "epoch": 1.1724036840690886, "percentage": 23.45, "elapsed_time": "1:01:20", "remaining_time": "3:20:17", "throughput": 8787.9, "total_tokens": 32348192} +{"current_steps": 47995, "total_steps": 204665, "loss": 0.1033, "lr": 1.8918003675482702e-06, "epoch": 1.1725258349009358, "percentage": 23.45, "elapsed_time": "1:01:21", "remaining_time": "3:20:16", "throughput": 8787.89, "total_tokens": 32351136} +{"current_steps": 48000, "total_steps": 204665, "loss": 0.0012, "lr": 1.8917617822367193e-06, "epoch": 1.172647985732783, "percentage": 23.45, "elapsed_time": "1:01:21", "remaining_time": "3:20:16", "throughput": 8787.97, "total_tokens": 32354464} +{"current_steps": 48005, "total_steps": 204665, "loss": 0.1176, "lr": 1.8917231904400369e-06, "epoch": 1.17277013656463, "percentage": 23.46, "elapsed_time": "1:01:22", "remaining_time": "3:20:15", "throughput": 8788.02, "total_tokens": 32357664} +{"current_steps": 48010, "total_steps": 204665, "loss": 0.0013, "lr": 1.8916845921585036e-06, "epoch": 1.172892287396477, "percentage": 23.46, "elapsed_time": "1:01:22", "remaining_time": "3:20:15", "throughput": 8788.09, "total_tokens": 32360992} +{"current_steps": 48015, "total_steps": 204665, "loss": 0.1414, "lr": 1.8916459873924e-06, "epoch": 1.1730144382283243, "percentage": 23.46, "elapsed_time": "1:01:22", "remaining_time": "3:20:14", "throughput": 8788.15, "total_tokens": 32364256} +{"current_steps": 48020, "total_steps": 204665, "loss": 0.0688, "lr": 1.8916073761420073e-06, "epoch": 1.1731365890601715, "percentage": 23.46, "elapsed_time": "1:01:23", "remaining_time": "3:20:14", "throughput": 8788.3, "total_tokens": 32367904} +{"current_steps": 48025, "total_steps": 204665, "loss": 0.0011, "lr": 1.8915687584076054e-06, "epoch": 1.1732587398920187, "percentage": 23.47, "elapsed_time": "1:01:23", "remaining_time": "3:20:13", "throughput": 8788.38, "total_tokens": 32371232} +{"current_steps": 48030, "total_steps": 204665, "loss": 0.0009, "lr": 1.8915301341894762e-06, "epoch": 1.1733808907238659, "percentage": 23.47, "elapsed_time": "1:01:23", "remaining_time": "3:20:13", "throughput": 8788.44, "total_tokens": 32374496} +{"current_steps": 48035, "total_steps": 204665, "loss": 0.1076, "lr": 1.8914915034878997e-06, "epoch": 1.173503041555713, "percentage": 23.47, "elapsed_time": "1:01:24", "remaining_time": "3:20:12", "throughput": 8788.49, "total_tokens": 32377696} +{"current_steps": 48040, "total_steps": 204665, "loss": 0.048, "lr": 1.8914528663031575e-06, "epoch": 1.1736251923875602, "percentage": 23.47, "elapsed_time": "1:01:24", "remaining_time": "3:20:12", "throughput": 8788.62, "total_tokens": 32381280} +{"current_steps": 48045, "total_steps": 204665, "loss": 0.1592, "lr": 1.89141422263553e-06, "epoch": 1.1737473432194072, "percentage": 23.47, "elapsed_time": "1:01:24", "remaining_time": "3:20:11", "throughput": 8788.78, "total_tokens": 32384992} +{"current_steps": 48050, "total_steps": 204665, "loss": 0.0489, "lr": 1.8913755724852988e-06, "epoch": 1.1738694940512544, "percentage": 23.48, "elapsed_time": "1:01:25", "remaining_time": "3:20:11", "throughput": 8788.91, "total_tokens": 32388576} +{"current_steps": 48055, "total_steps": 204665, "loss": 0.0907, "lr": 1.8913369158527447e-06, "epoch": 1.1739916448831016, "percentage": 23.48, "elapsed_time": "1:01:25", "remaining_time": "3:20:10", "throughput": 8788.91, "total_tokens": 32391584} +{"current_steps": 48060, "total_steps": 204665, "loss": 0.0203, "lr": 1.8912982527381486e-06, "epoch": 1.1741137957149488, "percentage": 23.48, "elapsed_time": "1:01:25", "remaining_time": "3:20:10", "throughput": 8788.97, "total_tokens": 32394848} +{"current_steps": 48065, "total_steps": 204665, "loss": 0.1048, "lr": 1.8912595831417919e-06, "epoch": 1.174235946546796, "percentage": 23.48, "elapsed_time": "1:01:26", "remaining_time": "3:20:09", "throughput": 8788.99, "total_tokens": 32397920} +{"current_steps": 48070, "total_steps": 204665, "loss": 0.0444, "lr": 1.8912209070639558e-06, "epoch": 1.1743580973786432, "percentage": 23.49, "elapsed_time": "1:01:26", "remaining_time": "3:20:09", "throughput": 8789.06, "total_tokens": 32401184} +{"current_steps": 48075, "total_steps": 204665, "loss": 0.1023, "lr": 1.8911822245049213e-06, "epoch": 1.1744802482104904, "percentage": 23.49, "elapsed_time": "1:01:26", "remaining_time": "3:20:08", "throughput": 8789.12, "total_tokens": 32404448} +{"current_steps": 48080, "total_steps": 204665, "loss": 0.0377, "lr": 1.8911435354649705e-06, "epoch": 1.1746023990423375, "percentage": 23.49, "elapsed_time": "1:01:27", "remaining_time": "3:20:08", "throughput": 8789.15, "total_tokens": 32407584} +{"current_steps": 48085, "total_steps": 204665, "loss": 0.0557, "lr": 1.8911048399443838e-06, "epoch": 1.1747245498741847, "percentage": 23.49, "elapsed_time": "1:01:27", "remaining_time": "3:20:07", "throughput": 8789.33, "total_tokens": 32411360} +{"current_steps": 48090, "total_steps": 204665, "loss": 0.1424, "lr": 1.8910661379434432e-06, "epoch": 1.174846700706032, "percentage": 23.5, "elapsed_time": "1:01:27", "remaining_time": "3:20:07", "throughput": 8789.44, "total_tokens": 32414816} +{"current_steps": 48095, "total_steps": 204665, "loss": 0.0771, "lr": 1.89102742946243e-06, "epoch": 1.1749688515378789, "percentage": 23.5, "elapsed_time": "1:01:28", "remaining_time": "3:20:06", "throughput": 8789.46, "total_tokens": 32417888} +{"current_steps": 48100, "total_steps": 204665, "loss": 0.1183, "lr": 1.8909887145016257e-06, "epoch": 1.175091002369726, "percentage": 23.5, "elapsed_time": "1:01:28", "remaining_time": "3:20:06", "throughput": 8789.59, "total_tokens": 32421472} +{"current_steps": 48105, "total_steps": 204665, "loss": 0.1321, "lr": 1.8909499930613118e-06, "epoch": 1.1752131532015733, "percentage": 23.5, "elapsed_time": "1:01:28", "remaining_time": "3:20:05", "throughput": 8789.61, "total_tokens": 32424544} +{"current_steps": 48110, "total_steps": 204665, "loss": 0.0028, "lr": 1.8909112651417699e-06, "epoch": 1.1753353040334205, "percentage": 23.51, "elapsed_time": "1:01:29", "remaining_time": "3:20:05", "throughput": 8789.63, "total_tokens": 32427616} +{"current_steps": 48115, "total_steps": 204665, "loss": 0.1127, "lr": 1.8908725307432816e-06, "epoch": 1.1754574548652676, "percentage": 23.51, "elapsed_time": "1:01:29", "remaining_time": "3:20:04", "throughput": 8789.64, "total_tokens": 32430624} +{"current_steps": 48120, "total_steps": 204665, "loss": 0.0027, "lr": 1.8908337898661287e-06, "epoch": 1.1755796056971148, "percentage": 23.51, "elapsed_time": "1:01:29", "remaining_time": "3:20:04", "throughput": 8789.77, "total_tokens": 32434208} +{"current_steps": 48125, "total_steps": 204665, "loss": 0.0397, "lr": 1.8907950425105927e-06, "epoch": 1.175701756528962, "percentage": 23.51, "elapsed_time": "1:01:30", "remaining_time": "3:20:03", "throughput": 8789.88, "total_tokens": 32437664} +{"current_steps": 48130, "total_steps": 204665, "loss": 0.1019, "lr": 1.8907562886769557e-06, "epoch": 1.1758239073608092, "percentage": 23.52, "elapsed_time": "1:01:30", "remaining_time": "3:20:03", "throughput": 8790.01, "total_tokens": 32441248} +{"current_steps": 48135, "total_steps": 204665, "loss": 0.0376, "lr": 1.8907175283654992e-06, "epoch": 1.1759460581926562, "percentage": 23.52, "elapsed_time": "1:01:31", "remaining_time": "3:20:02", "throughput": 8790.04, "total_tokens": 32444320} +{"current_steps": 48140, "total_steps": 204665, "loss": 0.0544, "lr": 1.8906787615765055e-06, "epoch": 1.1760682090245034, "percentage": 23.52, "elapsed_time": "1:01:31", "remaining_time": "3:20:02", "throughput": 8790.11, "total_tokens": 32447648} +{"current_steps": 48145, "total_steps": 204665, "loss": 0.205, "lr": 1.8906399883102565e-06, "epoch": 1.1761903598563506, "percentage": 23.52, "elapsed_time": "1:01:31", "remaining_time": "3:20:01", "throughput": 8790.16, "total_tokens": 32450848} +{"current_steps": 48150, "total_steps": 204665, "loss": 0.076, "lr": 1.8906012085670336e-06, "epoch": 1.1763125106881978, "percentage": 23.53, "elapsed_time": "1:01:32", "remaining_time": "3:20:01", "throughput": 8790.26, "total_tokens": 32454240} +{"current_steps": 48155, "total_steps": 204665, "loss": 0.1783, "lr": 1.890562422347119e-06, "epoch": 1.176434661520045, "percentage": 23.53, "elapsed_time": "1:01:32", "remaining_time": "3:20:00", "throughput": 8790.3, "total_tokens": 32457440} +{"current_steps": 48160, "total_steps": 204665, "loss": 0.2149, "lr": 1.8905236296507953e-06, "epoch": 1.1765568123518921, "percentage": 23.53, "elapsed_time": "1:01:32", "remaining_time": "3:20:00", "throughput": 8790.36, "total_tokens": 32460704} +{"current_steps": 48165, "total_steps": 204665, "loss": 0.1459, "lr": 1.890484830478344e-06, "epoch": 1.1766789631837393, "percentage": 23.53, "elapsed_time": "1:01:33", "remaining_time": "3:19:59", "throughput": 8790.6, "total_tokens": 32464800} +{"current_steps": 48170, "total_steps": 204665, "loss": 0.0858, "lr": 1.8904460248300478e-06, "epoch": 1.1768011140155865, "percentage": 23.54, "elapsed_time": "1:01:33", "remaining_time": "3:19:59", "throughput": 8790.69, "total_tokens": 32468192} +{"current_steps": 48175, "total_steps": 204665, "loss": 0.0026, "lr": 1.8904072127061884e-06, "epoch": 1.1769232648474337, "percentage": 23.54, "elapsed_time": "1:01:33", "remaining_time": "3:19:58", "throughput": 8790.66, "total_tokens": 32471072} +{"current_steps": 48180, "total_steps": 204665, "loss": 0.0297, "lr": 1.8903683941070483e-06, "epoch": 1.177045415679281, "percentage": 23.54, "elapsed_time": "1:01:34", "remaining_time": "3:19:58", "throughput": 8790.65, "total_tokens": 32473952} +{"current_steps": 48185, "total_steps": 204665, "loss": 0.0313, "lr": 1.8903295690329097e-06, "epoch": 1.1771675665111279, "percentage": 23.54, "elapsed_time": "1:01:34", "remaining_time": "3:19:57", "throughput": 8790.65, "total_tokens": 32476896} +{"current_steps": 48190, "total_steps": 204665, "loss": 0.0741, "lr": 1.890290737484055e-06, "epoch": 1.177289717342975, "percentage": 23.55, "elapsed_time": "1:01:34", "remaining_time": "3:19:57", "throughput": 8790.72, "total_tokens": 32480224} +{"current_steps": 48195, "total_steps": 204665, "loss": 0.0477, "lr": 1.890251899460767e-06, "epoch": 1.1774118681748222, "percentage": 23.55, "elapsed_time": "1:01:35", "remaining_time": "3:19:56", "throughput": 8790.87, "total_tokens": 32483872} +{"current_steps": 48200, "total_steps": 204665, "loss": 0.1153, "lr": 1.8902130549633272e-06, "epoch": 1.1775340190066694, "percentage": 23.55, "elapsed_time": "1:01:35", "remaining_time": "3:19:56", "throughput": 8790.91, "total_tokens": 32487072} +{"current_steps": 48205, "total_steps": 204665, "loss": 0.0391, "lr": 1.8901742039920188e-06, "epoch": 1.1776561698385166, "percentage": 23.55, "elapsed_time": "1:01:35", "remaining_time": "3:19:55", "throughput": 8790.96, "total_tokens": 32490272} +{"current_steps": 48210, "total_steps": 204665, "loss": 0.1538, "lr": 1.8901353465471242e-06, "epoch": 1.1777783206703638, "percentage": 23.56, "elapsed_time": "1:01:36", "remaining_time": "3:19:55", "throughput": 8791.03, "total_tokens": 32493536} +{"current_steps": 48215, "total_steps": 204665, "loss": 0.0322, "lr": 1.8900964826289258e-06, "epoch": 1.177900471502211, "percentage": 23.56, "elapsed_time": "1:01:36", "remaining_time": "3:19:54", "throughput": 8791.18, "total_tokens": 32497248} +{"current_steps": 48220, "total_steps": 204665, "loss": 0.1566, "lr": 1.8900576122377066e-06, "epoch": 1.1780226223340582, "percentage": 23.56, "elapsed_time": "1:01:36", "remaining_time": "3:19:54", "throughput": 8791.23, "total_tokens": 32500448} +{"current_steps": 48225, "total_steps": 204665, "loss": 0.0295, "lr": 1.8900187353737488e-06, "epoch": 1.1781447731659052, "percentage": 23.56, "elapsed_time": "1:01:37", "remaining_time": "3:19:53", "throughput": 8791.28, "total_tokens": 32503648} +{"current_steps": 48230, "total_steps": 204665, "loss": 0.001, "lr": 1.8899798520373356e-06, "epoch": 1.1782669239977523, "percentage": 23.57, "elapsed_time": "1:01:37", "remaining_time": "3:19:53", "throughput": 8791.39, "total_tokens": 32507104} +{"current_steps": 48235, "total_steps": 204665, "loss": 0.2231, "lr": 1.8899409622287491e-06, "epoch": 1.1783890748295995, "percentage": 23.57, "elapsed_time": "1:01:37", "remaining_time": "3:19:52", "throughput": 8791.48, "total_tokens": 32510560} +{"current_steps": 48240, "total_steps": 204665, "loss": 0.1259, "lr": 1.8899020659482732e-06, "epoch": 1.1785112256614467, "percentage": 23.57, "elapsed_time": "1:01:38", "remaining_time": "3:19:52", "throughput": 8791.61, "total_tokens": 32514144} +{"current_steps": 48245, "total_steps": 204665, "loss": 0.1139, "lr": 1.88986316319619e-06, "epoch": 1.178633376493294, "percentage": 23.57, "elapsed_time": "1:01:38", "remaining_time": "3:19:51", "throughput": 8791.69, "total_tokens": 32517472} +{"current_steps": 48250, "total_steps": 204665, "loss": 0.0018, "lr": 1.8898242539727823e-06, "epoch": 1.178755527325141, "percentage": 23.58, "elapsed_time": "1:01:39", "remaining_time": "3:19:51", "throughput": 8791.81, "total_tokens": 32520992} +{"current_steps": 48255, "total_steps": 204665, "loss": 0.1008, "lr": 1.8897853382783332e-06, "epoch": 1.1788776781569883, "percentage": 23.58, "elapsed_time": "1:01:39", "remaining_time": "3:19:50", "throughput": 8791.8, "total_tokens": 32523936} +{"current_steps": 48260, "total_steps": 204665, "loss": 0.0019, "lr": 1.8897464161131258e-06, "epoch": 1.1789998289888355, "percentage": 23.58, "elapsed_time": "1:01:39", "remaining_time": "3:19:50", "throughput": 8791.88, "total_tokens": 32527328} +{"current_steps": 48265, "total_steps": 204665, "loss": 0.0286, "lr": 1.8897074874774435e-06, "epoch": 1.1791219798206827, "percentage": 23.58, "elapsed_time": "1:01:40", "remaining_time": "3:19:49", "throughput": 8791.98, "total_tokens": 32530720} +{"current_steps": 48270, "total_steps": 204665, "loss": 0.187, "lr": 1.8896685523715687e-06, "epoch": 1.1792441306525299, "percentage": 23.58, "elapsed_time": "1:01:40", "remaining_time": "3:19:49", "throughput": 8792.08, "total_tokens": 32534176} +{"current_steps": 48275, "total_steps": 204665, "loss": 0.0253, "lr": 1.8896296107957853e-06, "epoch": 1.1793662814843768, "percentage": 23.59, "elapsed_time": "1:01:40", "remaining_time": "3:19:48", "throughput": 8792.2, "total_tokens": 32537696} +{"current_steps": 48280, "total_steps": 204665, "loss": 0.1362, "lr": 1.8895906627503756e-06, "epoch": 1.179488432316224, "percentage": 23.59, "elapsed_time": "1:01:41", "remaining_time": "3:19:48", "throughput": 8792.28, "total_tokens": 32541024} +{"current_steps": 48285, "total_steps": 204665, "loss": 0.1035, "lr": 1.8895517082356236e-06, "epoch": 1.1796105831480712, "percentage": 23.59, "elapsed_time": "1:01:41", "remaining_time": "3:19:47", "throughput": 8792.36, "total_tokens": 32544416} +{"current_steps": 48290, "total_steps": 204665, "loss": 0.1123, "lr": 1.8895127472518121e-06, "epoch": 1.1797327339799184, "percentage": 23.59, "elapsed_time": "1:01:41", "remaining_time": "3:19:47", "throughput": 8792.48, "total_tokens": 32547936} +{"current_steps": 48295, "total_steps": 204665, "loss": 0.038, "lr": 1.8894737797992249e-06, "epoch": 1.1798548848117656, "percentage": 23.6, "elapsed_time": "1:01:42", "remaining_time": "3:19:46", "throughput": 8792.5, "total_tokens": 32551072} +{"current_steps": 48300, "total_steps": 204665, "loss": 0.0423, "lr": 1.8894348058781451e-06, "epoch": 1.1799770356436128, "percentage": 23.6, "elapsed_time": "1:01:42", "remaining_time": "3:19:46", "throughput": 8792.58, "total_tokens": 32554400} +{"current_steps": 48305, "total_steps": 204665, "loss": 0.0501, "lr": 1.8893958254888562e-06, "epoch": 1.18009918647546, "percentage": 23.6, "elapsed_time": "1:01:42", "remaining_time": "3:19:45", "throughput": 8792.62, "total_tokens": 32557536} +{"current_steps": 48310, "total_steps": 204665, "loss": 0.0472, "lr": 1.8893568386316414e-06, "epoch": 1.180221337307307, "percentage": 23.6, "elapsed_time": "1:01:43", "remaining_time": "3:19:45", "throughput": 8792.71, "total_tokens": 32560928} +{"current_steps": 48315, "total_steps": 204665, "loss": 0.0604, "lr": 1.8893178453067846e-06, "epoch": 1.1803434881391541, "percentage": 23.61, "elapsed_time": "1:01:43", "remaining_time": "3:19:44", "throughput": 8793.01, "total_tokens": 32565280} +{"current_steps": 48320, "total_steps": 204665, "loss": 0.0701, "lr": 1.8892788455145694e-06, "epoch": 1.1804656389710013, "percentage": 23.61, "elapsed_time": "1:01:43", "remaining_time": "3:19:44", "throughput": 8793.11, "total_tokens": 32568736} +{"current_steps": 48325, "total_steps": 204665, "loss": 0.0004, "lr": 1.8892398392552788e-06, "epoch": 1.1805877898028485, "percentage": 23.61, "elapsed_time": "1:01:44", "remaining_time": "3:19:43", "throughput": 8793.28, "total_tokens": 32572512} +{"current_steps": 48330, "total_steps": 204665, "loss": 0.1615, "lr": 1.8892008265291975e-06, "epoch": 1.1807099406346957, "percentage": 23.61, "elapsed_time": "1:01:44", "remaining_time": "3:19:43", "throughput": 8793.37, "total_tokens": 32575904} +{"current_steps": 48335, "total_steps": 204665, "loss": 0.0992, "lr": 1.8891618073366082e-06, "epoch": 1.1808320914665429, "percentage": 23.62, "elapsed_time": "1:01:44", "remaining_time": "3:19:42", "throughput": 8793.41, "total_tokens": 32579104} +{"current_steps": 48340, "total_steps": 204665, "loss": 0.0066, "lr": 1.8891227816777953e-06, "epoch": 1.18095424229839, "percentage": 23.62, "elapsed_time": "1:01:45", "remaining_time": "3:19:42", "throughput": 8793.53, "total_tokens": 32582688} +{"current_steps": 48345, "total_steps": 204665, "loss": 0.1201, "lr": 1.8890837495530423e-06, "epoch": 1.1810763931302373, "percentage": 23.62, "elapsed_time": "1:01:45", "remaining_time": "3:19:41", "throughput": 8793.59, "total_tokens": 32585952} +{"current_steps": 48350, "total_steps": 204665, "loss": 0.1483, "lr": 1.889044710962633e-06, "epoch": 1.1811985439620845, "percentage": 23.62, "elapsed_time": "1:01:45", "remaining_time": "3:19:41", "throughput": 8793.59, "total_tokens": 32588896} +{"current_steps": 48355, "total_steps": 204665, "loss": 0.1178, "lr": 1.8890056659068516e-06, "epoch": 1.1813206947939316, "percentage": 23.63, "elapsed_time": "1:01:46", "remaining_time": "3:19:40", "throughput": 8793.63, "total_tokens": 32592096} +{"current_steps": 48360, "total_steps": 204665, "loss": 0.1299, "lr": 1.888966614385982e-06, "epoch": 1.1814428456257786, "percentage": 23.63, "elapsed_time": "1:01:46", "remaining_time": "3:19:40", "throughput": 8793.74, "total_tokens": 32595552} +{"current_steps": 48365, "total_steps": 204665, "loss": 0.0912, "lr": 1.8889275564003078e-06, "epoch": 1.1815649964576258, "percentage": 23.63, "elapsed_time": "1:01:47", "remaining_time": "3:19:39", "throughput": 8793.95, "total_tokens": 32599520} +{"current_steps": 48370, "total_steps": 204665, "loss": 0.0017, "lr": 1.8888884919501136e-06, "epoch": 1.181687147289473, "percentage": 23.63, "elapsed_time": "1:01:47", "remaining_time": "3:19:39", "throughput": 8793.98, "total_tokens": 32602656} +{"current_steps": 48375, "total_steps": 204665, "loss": 0.0739, "lr": 1.888849421035683e-06, "epoch": 1.1818092981213202, "percentage": 23.64, "elapsed_time": "1:01:47", "remaining_time": "3:19:38", "throughput": 8794.05, "total_tokens": 32605984} +{"current_steps": 48380, "total_steps": 204665, "loss": 0.1791, "lr": 1.8888103436573003e-06, "epoch": 1.1819314489531674, "percentage": 23.64, "elapsed_time": "1:01:48", "remaining_time": "3:19:38", "throughput": 8794.07, "total_tokens": 32609056} +{"current_steps": 48385, "total_steps": 204665, "loss": 0.1844, "lr": 1.8887712598152498e-06, "epoch": 1.1820535997850146, "percentage": 23.64, "elapsed_time": "1:01:48", "remaining_time": "3:19:37", "throughput": 8794.18, "total_tokens": 32612576} +{"current_steps": 48390, "total_steps": 204665, "loss": 0.0289, "lr": 1.8887321695098157e-06, "epoch": 1.1821757506168618, "percentage": 23.64, "elapsed_time": "1:01:48", "remaining_time": "3:19:37", "throughput": 8794.23, "total_tokens": 32615776} +{"current_steps": 48395, "total_steps": 204665, "loss": 0.0107, "lr": 1.8886930727412822e-06, "epoch": 1.182297901448709, "percentage": 23.65, "elapsed_time": "1:01:49", "remaining_time": "3:19:36", "throughput": 8794.22, "total_tokens": 32618720} +{"current_steps": 48400, "total_steps": 204665, "loss": 0.2006, "lr": 1.8886539695099338e-06, "epoch": 1.182420052280556, "percentage": 23.65, "elapsed_time": "1:01:49", "remaining_time": "3:19:36", "throughput": 8794.36, "total_tokens": 32622368} +{"current_steps": 48405, "total_steps": 204665, "loss": 0.0016, "lr": 1.8886148598160542e-06, "epoch": 1.182542203112403, "percentage": 23.65, "elapsed_time": "1:01:49", "remaining_time": "3:19:35", "throughput": 8794.49, "total_tokens": 32625952} +{"current_steps": 48410, "total_steps": 204665, "loss": 0.0473, "lr": 1.888575743659929e-06, "epoch": 1.1826643539442503, "percentage": 23.65, "elapsed_time": "1:01:50", "remaining_time": "3:19:35", "throughput": 8794.63, "total_tokens": 32629600} +{"current_steps": 48415, "total_steps": 204665, "loss": 0.0073, "lr": 1.8885366210418415e-06, "epoch": 1.1827865047760975, "percentage": 23.66, "elapsed_time": "1:01:50", "remaining_time": "3:19:34", "throughput": 8794.64, "total_tokens": 32632608} +{"current_steps": 48420, "total_steps": 204665, "loss": 0.0715, "lr": 1.8884974919620769e-06, "epoch": 1.1829086556079447, "percentage": 23.66, "elapsed_time": "1:01:50", "remaining_time": "3:19:34", "throughput": 8794.71, "total_tokens": 32635936} +{"current_steps": 48425, "total_steps": 204665, "loss": 0.1009, "lr": 1.8884583564209196e-06, "epoch": 1.1830308064397919, "percentage": 23.66, "elapsed_time": "1:01:51", "remaining_time": "3:19:33", "throughput": 8794.82, "total_tokens": 32639456} +{"current_steps": 48430, "total_steps": 204665, "loss": 0.2983, "lr": 1.8884192144186541e-06, "epoch": 1.183152957271639, "percentage": 23.66, "elapsed_time": "1:01:51", "remaining_time": "3:19:33", "throughput": 8794.91, "total_tokens": 32642848} +{"current_steps": 48435, "total_steps": 204665, "loss": 0.0824, "lr": 1.8883800659555652e-06, "epoch": 1.1832751081034862, "percentage": 23.67, "elapsed_time": "1:01:51", "remaining_time": "3:19:32", "throughput": 8794.99, "total_tokens": 32646176} +{"current_steps": 48440, "total_steps": 204665, "loss": 0.0827, "lr": 1.8883409110319372e-06, "epoch": 1.1833972589353334, "percentage": 23.67, "elapsed_time": "1:01:52", "remaining_time": "3:19:32", "throughput": 8795.09, "total_tokens": 32649632} +{"current_steps": 48445, "total_steps": 204665, "loss": 0.1353, "lr": 1.8883017496480553e-06, "epoch": 1.1835194097671806, "percentage": 23.67, "elapsed_time": "1:01:52", "remaining_time": "3:19:31", "throughput": 8795.14, "total_tokens": 32652832} +{"current_steps": 48450, "total_steps": 204665, "loss": 0.0216, "lr": 1.8882625818042043e-06, "epoch": 1.1836415605990276, "percentage": 23.67, "elapsed_time": "1:01:52", "remaining_time": "3:19:31", "throughput": 8795.2, "total_tokens": 32656096} +{"current_steps": 48455, "total_steps": 204665, "loss": 0.0541, "lr": 1.888223407500669e-06, "epoch": 1.1837637114308748, "percentage": 23.68, "elapsed_time": "1:01:53", "remaining_time": "3:19:30", "throughput": 8795.29, "total_tokens": 32659488} +{"current_steps": 48460, "total_steps": 204665, "loss": 0.1097, "lr": 1.8881842267377339e-06, "epoch": 1.183885862262722, "percentage": 23.68, "elapsed_time": "1:01:53", "remaining_time": "3:19:30", "throughput": 8795.34, "total_tokens": 32662688} +{"current_steps": 48465, "total_steps": 204665, "loss": 0.1229, "lr": 1.8881450395156844e-06, "epoch": 1.1840080130945692, "percentage": 23.68, "elapsed_time": "1:01:53", "remaining_time": "3:19:29", "throughput": 8795.36, "total_tokens": 32665760} +{"current_steps": 48470, "total_steps": 204665, "loss": 0.0777, "lr": 1.888105845834805e-06, "epoch": 1.1841301639264163, "percentage": 23.68, "elapsed_time": "1:01:54", "remaining_time": "3:19:29", "throughput": 8795.42, "total_tokens": 32669024} +{"current_steps": 48475, "total_steps": 204665, "loss": 0.0822, "lr": 1.8880666456953812e-06, "epoch": 1.1842523147582635, "percentage": 23.69, "elapsed_time": "1:01:54", "remaining_time": "3:19:28", "throughput": 8795.47, "total_tokens": 32672224} +{"current_steps": 48480, "total_steps": 204665, "loss": 0.0383, "lr": 1.8880274390976983e-06, "epoch": 1.1843744655901107, "percentage": 23.69, "elapsed_time": "1:01:55", "remaining_time": "3:19:28", "throughput": 8795.53, "total_tokens": 32675488} +{"current_steps": 48485, "total_steps": 204665, "loss": 0.0351, "lr": 1.8879882260420406e-06, "epoch": 1.184496616421958, "percentage": 23.69, "elapsed_time": "1:01:55", "remaining_time": "3:19:27", "throughput": 8795.58, "total_tokens": 32678688} +{"current_steps": 48490, "total_steps": 204665, "loss": 0.1213, "lr": 1.8879490065286937e-06, "epoch": 1.1846187672538049, "percentage": 23.69, "elapsed_time": "1:01:55", "remaining_time": "3:19:27", "throughput": 8795.83, "total_tokens": 32682784} +{"current_steps": 48495, "total_steps": 204665, "loss": 0.1368, "lr": 1.8879097805579428e-06, "epoch": 1.184740918085652, "percentage": 23.69, "elapsed_time": "1:01:56", "remaining_time": "3:19:26", "throughput": 8795.85, "total_tokens": 32685856} +{"current_steps": 48500, "total_steps": 204665, "loss": 0.0819, "lr": 1.8878705481300732e-06, "epoch": 1.1848630689174993, "percentage": 23.7, "elapsed_time": "1:01:56", "remaining_time": "3:19:26", "throughput": 8795.89, "total_tokens": 32689056} +{"current_steps": 48505, "total_steps": 204665, "loss": 0.1993, "lr": 1.88783130924537e-06, "epoch": 1.1849852197493465, "percentage": 23.7, "elapsed_time": "1:01:56", "remaining_time": "3:19:25", "throughput": 8795.96, "total_tokens": 32692384} +{"current_steps": 48510, "total_steps": 204665, "loss": 0.0644, "lr": 1.887792063904119e-06, "epoch": 1.1851073705811936, "percentage": 23.7, "elapsed_time": "1:01:57", "remaining_time": "3:19:25", "throughput": 8796.03, "total_tokens": 32695712} +{"current_steps": 48515, "total_steps": 204665, "loss": 0.1064, "lr": 1.887752812106605e-06, "epoch": 1.1852295214130408, "percentage": 23.7, "elapsed_time": "1:01:57", "remaining_time": "3:19:24", "throughput": 8796.09, "total_tokens": 32698976} +{"current_steps": 48520, "total_steps": 204665, "loss": 0.0749, "lr": 1.8877135538531139e-06, "epoch": 1.185351672244888, "percentage": 23.71, "elapsed_time": "1:01:57", "remaining_time": "3:19:24", "throughput": 8796.28, "total_tokens": 32702880} +{"current_steps": 48525, "total_steps": 204665, "loss": 0.0552, "lr": 1.887674289143931e-06, "epoch": 1.1854738230767352, "percentage": 23.71, "elapsed_time": "1:01:58", "remaining_time": "3:19:24", "throughput": 8796.41, "total_tokens": 32706464} +{"current_steps": 48530, "total_steps": 204665, "loss": 0.1513, "lr": 1.8876350179793423e-06, "epoch": 1.1855959739085824, "percentage": 23.71, "elapsed_time": "1:01:58", "remaining_time": "3:19:23", "throughput": 8796.45, "total_tokens": 32709664} +{"current_steps": 48535, "total_steps": 204665, "loss": 0.0842, "lr": 1.8875957403596328e-06, "epoch": 1.1857181247404296, "percentage": 23.71, "elapsed_time": "1:01:58", "remaining_time": "3:19:23", "throughput": 8796.56, "total_tokens": 32713184} +{"current_steps": 48540, "total_steps": 204665, "loss": 0.039, "lr": 1.8875564562850882e-06, "epoch": 1.1858402755722766, "percentage": 23.72, "elapsed_time": "1:01:59", "remaining_time": "3:19:22", "throughput": 8796.62, "total_tokens": 32716448} +{"current_steps": 48545, "total_steps": 204665, "loss": 0.0392, "lr": 1.8875171657559943e-06, "epoch": 1.1859624264041237, "percentage": 23.72, "elapsed_time": "1:01:59", "remaining_time": "3:19:22", "throughput": 8796.77, "total_tokens": 32720096} +{"current_steps": 48550, "total_steps": 204665, "loss": 0.0228, "lr": 1.8874778687726369e-06, "epoch": 1.186084577235971, "percentage": 23.72, "elapsed_time": "1:01:59", "remaining_time": "3:19:21", "throughput": 8796.86, "total_tokens": 32723488} +{"current_steps": 48555, "total_steps": 204665, "loss": 0.0652, "lr": 1.8874385653353018e-06, "epoch": 1.1862067280678181, "percentage": 23.72, "elapsed_time": "1:02:00", "remaining_time": "3:19:21", "throughput": 8796.92, "total_tokens": 32726752} +{"current_steps": 48560, "total_steps": 204665, "loss": 0.0803, "lr": 1.8873992554442748e-06, "epoch": 1.1863288788996653, "percentage": 23.73, "elapsed_time": "1:02:00", "remaining_time": "3:19:20", "throughput": 8797.13, "total_tokens": 32730720} +{"current_steps": 48565, "total_steps": 204665, "loss": 0.0438, "lr": 1.8873599390998419e-06, "epoch": 1.1864510297315125, "percentage": 23.73, "elapsed_time": "1:02:00", "remaining_time": "3:19:20", "throughput": 8797.18, "total_tokens": 32733984} +{"current_steps": 48570, "total_steps": 204665, "loss": 0.0943, "lr": 1.8873206163022886e-06, "epoch": 1.1865731805633597, "percentage": 23.73, "elapsed_time": "1:02:01", "remaining_time": "3:19:19", "throughput": 8797.2, "total_tokens": 32737120} +{"current_steps": 48575, "total_steps": 204665, "loss": 0.0423, "lr": 1.887281287051901e-06, "epoch": 1.1866953313952069, "percentage": 23.73, "elapsed_time": "1:02:01", "remaining_time": "3:19:19", "throughput": 8797.29, "total_tokens": 32740512} +{"current_steps": 48580, "total_steps": 204665, "loss": 0.0007, "lr": 1.8872419513489652e-06, "epoch": 1.1868174822270539, "percentage": 23.74, "elapsed_time": "1:02:01", "remaining_time": "3:19:18", "throughput": 8797.26, "total_tokens": 32743392} +{"current_steps": 48585, "total_steps": 204665, "loss": 0.0018, "lr": 1.8872026091937676e-06, "epoch": 1.186939633058901, "percentage": 23.74, "elapsed_time": "1:02:02", "remaining_time": "3:19:18", "throughput": 8797.27, "total_tokens": 32746400} +{"current_steps": 48590, "total_steps": 204665, "loss": 0.0158, "lr": 1.8871632605865939e-06, "epoch": 1.1870617838907482, "percentage": 23.74, "elapsed_time": "1:02:02", "remaining_time": "3:19:17", "throughput": 8797.3, "total_tokens": 32749536} +{"current_steps": 48595, "total_steps": 204665, "loss": 0.0004, "lr": 1.8871239055277304e-06, "epoch": 1.1871839347225954, "percentage": 23.74, "elapsed_time": "1:02:03", "remaining_time": "3:19:17", "throughput": 8797.4, "total_tokens": 32752992} +{"current_steps": 48600, "total_steps": 204665, "loss": 0.1389, "lr": 1.8870845440174632e-06, "epoch": 1.1873060855544426, "percentage": 23.75, "elapsed_time": "1:02:03", "remaining_time": "3:19:16", "throughput": 8797.48, "total_tokens": 32756384} +{"current_steps": 48605, "total_steps": 204665, "loss": 0.0459, "lr": 1.8870451760560785e-06, "epoch": 1.1874282363862898, "percentage": 23.75, "elapsed_time": "1:02:03", "remaining_time": "3:19:16", "throughput": 8797.6, "total_tokens": 32759904} +{"current_steps": 48610, "total_steps": 204665, "loss": 0.0503, "lr": 1.8870058016438629e-06, "epoch": 1.187550387218137, "percentage": 23.75, "elapsed_time": "1:02:04", "remaining_time": "3:19:15", "throughput": 8797.67, "total_tokens": 32763232} +{"current_steps": 48615, "total_steps": 204665, "loss": 0.1476, "lr": 1.8869664207811025e-06, "epoch": 1.1876725380499842, "percentage": 23.75, "elapsed_time": "1:02:04", "remaining_time": "3:19:15", "throughput": 8797.79, "total_tokens": 32766752} +{"current_steps": 48620, "total_steps": 204665, "loss": 0.05, "lr": 1.8869270334680833e-06, "epoch": 1.1877946888818314, "percentage": 23.76, "elapsed_time": "1:02:04", "remaining_time": "3:19:14", "throughput": 8797.91, "total_tokens": 32770336} +{"current_steps": 48625, "total_steps": 204665, "loss": 0.1422, "lr": 1.8868876397050925e-06, "epoch": 1.1879168397136786, "percentage": 23.76, "elapsed_time": "1:02:05", "remaining_time": "3:19:14", "throughput": 8797.97, "total_tokens": 32773600} +{"current_steps": 48630, "total_steps": 204665, "loss": 0.1041, "lr": 1.8868482394924163e-06, "epoch": 1.1880389905455255, "percentage": 23.76, "elapsed_time": "1:02:05", "remaining_time": "3:19:13", "throughput": 8798.02, "total_tokens": 32776800} +{"current_steps": 48635, "total_steps": 204665, "loss": 0.0052, "lr": 1.886808832830341e-06, "epoch": 1.1881611413773727, "percentage": 23.76, "elapsed_time": "1:02:05", "remaining_time": "3:19:13", "throughput": 8798.09, "total_tokens": 32780064} +{"current_steps": 48640, "total_steps": 204665, "loss": 0.0493, "lr": 1.8867694197191536e-06, "epoch": 1.18828329220922, "percentage": 23.77, "elapsed_time": "1:02:06", "remaining_time": "3:19:12", "throughput": 8798.21, "total_tokens": 32783648} +{"current_steps": 48645, "total_steps": 204665, "loss": 0.0694, "lr": 1.8867300001591402e-06, "epoch": 1.188405443041067, "percentage": 23.77, "elapsed_time": "1:02:06", "remaining_time": "3:19:12", "throughput": 8798.36, "total_tokens": 32787296} +{"current_steps": 48650, "total_steps": 204665, "loss": 0.0524, "lr": 1.8866905741505878e-06, "epoch": 1.1885275938729143, "percentage": 23.77, "elapsed_time": "1:02:06", "remaining_time": "3:19:11", "throughput": 8798.45, "total_tokens": 32790688} +{"current_steps": 48655, "total_steps": 204665, "loss": 0.1508, "lr": 1.8866511416937833e-06, "epoch": 1.1886497447047615, "percentage": 23.77, "elapsed_time": "1:02:07", "remaining_time": "3:19:11", "throughput": 8798.49, "total_tokens": 32793824} +{"current_steps": 48660, "total_steps": 204665, "loss": 0.1519, "lr": 1.8866117027890128e-06, "epoch": 1.1887718955366087, "percentage": 23.78, "elapsed_time": "1:02:07", "remaining_time": "3:19:10", "throughput": 8798.67, "total_tokens": 32797664} +{"current_steps": 48665, "total_steps": 204665, "loss": 0.1448, "lr": 1.8865722574365639e-06, "epoch": 1.1888940463684559, "percentage": 23.78, "elapsed_time": "1:02:07", "remaining_time": "3:19:10", "throughput": 8798.83, "total_tokens": 32801376} +{"current_steps": 48670, "total_steps": 204665, "loss": 0.1891, "lr": 1.8865328056367229e-06, "epoch": 1.1890161972003028, "percentage": 23.78, "elapsed_time": "1:02:08", "remaining_time": "3:19:09", "throughput": 8799.03, "total_tokens": 32805280} +{"current_steps": 48675, "total_steps": 204665, "loss": 0.0652, "lr": 1.886493347389777e-06, "epoch": 1.18913834803215, "percentage": 23.78, "elapsed_time": "1:02:08", "remaining_time": "3:19:09", "throughput": 8799.11, "total_tokens": 32808608} +{"current_steps": 48680, "total_steps": 204665, "loss": 0.098, "lr": 1.886453882696013e-06, "epoch": 1.1892604988639972, "percentage": 23.79, "elapsed_time": "1:02:08", "remaining_time": "3:19:08", "throughput": 8799.15, "total_tokens": 32811808} +{"current_steps": 48685, "total_steps": 204665, "loss": 0.0018, "lr": 1.8864144115557177e-06, "epoch": 1.1893826496958444, "percentage": 23.79, "elapsed_time": "1:02:09", "remaining_time": "3:19:08", "throughput": 8799.3, "total_tokens": 32815456} +{"current_steps": 48690, "total_steps": 204665, "loss": 0.0915, "lr": 1.8863749339691788e-06, "epoch": 1.1895048005276916, "percentage": 23.79, "elapsed_time": "1:02:09", "remaining_time": "3:19:07", "throughput": 8799.36, "total_tokens": 32818720} +{"current_steps": 48695, "total_steps": 204665, "loss": 0.0542, "lr": 1.8863354499366825e-06, "epoch": 1.1896269513595388, "percentage": 23.79, "elapsed_time": "1:02:10", "remaining_time": "3:19:07", "throughput": 8799.42, "total_tokens": 32821984} +{"current_steps": 48700, "total_steps": 204665, "loss": 0.0797, "lr": 1.8862959594585166e-06, "epoch": 1.189749102191386, "percentage": 23.79, "elapsed_time": "1:02:10", "remaining_time": "3:19:06", "throughput": 8799.53, "total_tokens": 32825440} +{"current_steps": 48705, "total_steps": 204665, "loss": 0.102, "lr": 1.8862564625349683e-06, "epoch": 1.1898712530232332, "percentage": 23.8, "elapsed_time": "1:02:10", "remaining_time": "3:19:06", "throughput": 8799.56, "total_tokens": 32828576} +{"current_steps": 48710, "total_steps": 204665, "loss": 0.0545, "lr": 1.8862169591663247e-06, "epoch": 1.1899934038550803, "percentage": 23.8, "elapsed_time": "1:02:11", "remaining_time": "3:19:05", "throughput": 8799.78, "total_tokens": 32832608} +{"current_steps": 48715, "total_steps": 204665, "loss": 0.1036, "lr": 1.8861774493528725e-06, "epoch": 1.1901155546869275, "percentage": 23.8, "elapsed_time": "1:02:11", "remaining_time": "3:19:05", "throughput": 8799.86, "total_tokens": 32835936} +{"current_steps": 48720, "total_steps": 204665, "loss": 0.1614, "lr": 1.8861379330949002e-06, "epoch": 1.1902377055187745, "percentage": 23.8, "elapsed_time": "1:02:11", "remaining_time": "3:19:04", "throughput": 8799.94, "total_tokens": 32839264} +{"current_steps": 48725, "total_steps": 204665, "loss": 0.0346, "lr": 1.886098410392694e-06, "epoch": 1.1903598563506217, "percentage": 23.81, "elapsed_time": "1:02:12", "remaining_time": "3:19:04", "throughput": 8800.03, "total_tokens": 32842656} +{"current_steps": 48730, "total_steps": 204665, "loss": 0.0614, "lr": 1.886058881246542e-06, "epoch": 1.1904820071824689, "percentage": 23.81, "elapsed_time": "1:02:12", "remaining_time": "3:19:03", "throughput": 8800.14, "total_tokens": 32846176} +{"current_steps": 48735, "total_steps": 204665, "loss": 0.0386, "lr": 1.8860193456567313e-06, "epoch": 1.190604158014316, "percentage": 23.81, "elapsed_time": "1:02:12", "remaining_time": "3:19:03", "throughput": 8800.23, "total_tokens": 32849568} +{"current_steps": 48740, "total_steps": 204665, "loss": 0.1608, "lr": 1.8859798036235498e-06, "epoch": 1.1907263088461633, "percentage": 23.81, "elapsed_time": "1:02:13", "remaining_time": "3:19:02", "throughput": 8800.35, "total_tokens": 32853088} +{"current_steps": 48745, "total_steps": 204665, "loss": 0.0523, "lr": 1.8859402551472847e-06, "epoch": 1.1908484596780105, "percentage": 23.82, "elapsed_time": "1:02:13", "remaining_time": "3:19:02", "throughput": 8800.44, "total_tokens": 32856544} +{"current_steps": 48750, "total_steps": 204665, "loss": 0.0688, "lr": 1.8859007002282242e-06, "epoch": 1.1909706105098576, "percentage": 23.82, "elapsed_time": "1:02:13", "remaining_time": "3:19:01", "throughput": 8800.55, "total_tokens": 32860064} +{"current_steps": 48755, "total_steps": 204665, "loss": 0.2705, "lr": 1.8858611388666552e-06, "epoch": 1.1910927613417048, "percentage": 23.82, "elapsed_time": "1:02:14", "remaining_time": "3:19:01", "throughput": 8800.58, "total_tokens": 32863136} +{"current_steps": 48760, "total_steps": 204665, "loss": 0.0421, "lr": 1.8858215710628657e-06, "epoch": 1.1912149121735518, "percentage": 23.82, "elapsed_time": "1:02:14", "remaining_time": "3:19:00", "throughput": 8800.65, "total_tokens": 32866464} +{"current_steps": 48765, "total_steps": 204665, "loss": 0.1451, "lr": 1.8857819968171436e-06, "epoch": 1.191337063005399, "percentage": 23.83, "elapsed_time": "1:02:14", "remaining_time": "3:19:00", "throughput": 8800.81, "total_tokens": 32870176} +{"current_steps": 48770, "total_steps": 204665, "loss": 0.0495, "lr": 1.8857424161297764e-06, "epoch": 1.1914592138372462, "percentage": 23.83, "elapsed_time": "1:02:15", "remaining_time": "3:18:59", "throughput": 8800.94, "total_tokens": 32873760} +{"current_steps": 48775, "total_steps": 204665, "loss": 0.172, "lr": 1.8857028290010524e-06, "epoch": 1.1915813646690934, "percentage": 23.83, "elapsed_time": "1:02:15", "remaining_time": "3:18:59", "throughput": 8800.97, "total_tokens": 32876896} +{"current_steps": 48780, "total_steps": 204665, "loss": 0.0676, "lr": 1.885663235431259e-06, "epoch": 1.1917035155009406, "percentage": 23.83, "elapsed_time": "1:02:15", "remaining_time": "3:18:58", "throughput": 8801.13, "total_tokens": 32880608} +{"current_steps": 48785, "total_steps": 204665, "loss": 0.0525, "lr": 1.8856236354206843e-06, "epoch": 1.1918256663327877, "percentage": 23.84, "elapsed_time": "1:02:16", "remaining_time": "3:18:58", "throughput": 8801.14, "total_tokens": 32883680} +{"current_steps": 48790, "total_steps": 204665, "loss": 0.0794, "lr": 1.8855840289696165e-06, "epoch": 1.191947817164635, "percentage": 23.84, "elapsed_time": "1:02:16", "remaining_time": "3:18:57", "throughput": 8801.18, "total_tokens": 32886816} +{"current_steps": 48795, "total_steps": 204665, "loss": 0.0449, "lr": 1.885544416078343e-06, "epoch": 1.1920699679964821, "percentage": 23.84, "elapsed_time": "1:02:16", "remaining_time": "3:18:57", "throughput": 8801.25, "total_tokens": 32890144} +{"current_steps": 48800, "total_steps": 204665, "loss": 0.3148, "lr": 1.885504796747153e-06, "epoch": 1.1921921188283293, "percentage": 23.84, "elapsed_time": "1:02:17", "remaining_time": "3:18:56", "throughput": 8801.22, "total_tokens": 32893024} +{"current_steps": 48805, "total_steps": 204665, "loss": 0.0766, "lr": 1.8854651709763334e-06, "epoch": 1.1923142696601765, "percentage": 23.85, "elapsed_time": "1:02:17", "remaining_time": "3:18:56", "throughput": 8801.45, "total_tokens": 32897056} +{"current_steps": 48810, "total_steps": 204665, "loss": 0.0641, "lr": 1.8854255387661734e-06, "epoch": 1.1924364204920235, "percentage": 23.85, "elapsed_time": "1:02:18", "remaining_time": "3:18:55", "throughput": 8801.45, "total_tokens": 32900064} +{"current_steps": 48815, "total_steps": 204665, "loss": 0.1346, "lr": 1.8853859001169603e-06, "epoch": 1.1925585713238707, "percentage": 23.85, "elapsed_time": "1:02:18", "remaining_time": "3:18:55", "throughput": 8801.54, "total_tokens": 32903456} +{"current_steps": 48820, "total_steps": 204665, "loss": 0.1509, "lr": 1.8853462550289829e-06, "epoch": 1.1926807221557179, "percentage": 23.85, "elapsed_time": "1:02:18", "remaining_time": "3:18:54", "throughput": 8801.55, "total_tokens": 32906528} +{"current_steps": 48825, "total_steps": 204665, "loss": 0.1062, "lr": 1.8853066035025295e-06, "epoch": 1.192802872987565, "percentage": 23.86, "elapsed_time": "1:02:19", "remaining_time": "3:18:54", "throughput": 8801.63, "total_tokens": 32909920} +{"current_steps": 48830, "total_steps": 204665, "loss": 0.0804, "lr": 1.8852669455378884e-06, "epoch": 1.1929250238194122, "percentage": 23.86, "elapsed_time": "1:02:19", "remaining_time": "3:18:53", "throughput": 8801.82, "total_tokens": 32913824} +{"current_steps": 48835, "total_steps": 204665, "loss": 0.0714, "lr": 1.8852272811353477e-06, "epoch": 1.1930471746512594, "percentage": 23.86, "elapsed_time": "1:02:19", "remaining_time": "3:18:53", "throughput": 8801.91, "total_tokens": 32917216} +{"current_steps": 48840, "total_steps": 204665, "loss": 0.0757, "lr": 1.8851876102951964e-06, "epoch": 1.1931693254831066, "percentage": 23.86, "elapsed_time": "1:02:20", "remaining_time": "3:18:52", "throughput": 8801.97, "total_tokens": 32920544} +{"current_steps": 48845, "total_steps": 204665, "loss": 0.0366, "lr": 1.8851479330177228e-06, "epoch": 1.1932914763149536, "percentage": 23.87, "elapsed_time": "1:02:20", "remaining_time": "3:18:52", "throughput": 8802.06, "total_tokens": 32924000} +{"current_steps": 48850, "total_steps": 204665, "loss": 0.0422, "lr": 1.885108249303215e-06, "epoch": 1.1934136271468008, "percentage": 23.87, "elapsed_time": "1:02:20", "remaining_time": "3:18:52", "throughput": 8802.17, "total_tokens": 32927456} +{"current_steps": 48855, "total_steps": 204665, "loss": 0.0846, "lr": 1.885068559151962e-06, "epoch": 1.193535777978648, "percentage": 23.87, "elapsed_time": "1:02:21", "remaining_time": "3:18:51", "throughput": 8802.26, "total_tokens": 32930912} +{"current_steps": 48860, "total_steps": 204665, "loss": 0.0491, "lr": 1.8850288625642525e-06, "epoch": 1.1936579288104952, "percentage": 23.87, "elapsed_time": "1:02:21", "remaining_time": "3:18:51", "throughput": 8802.31, "total_tokens": 32934176} +{"current_steps": 48865, "total_steps": 204665, "loss": 0.0707, "lr": 1.8849891595403752e-06, "epoch": 1.1937800796423423, "percentage": 23.88, "elapsed_time": "1:02:21", "remaining_time": "3:18:50", "throughput": 8802.47, "total_tokens": 32937952} +{"current_steps": 48870, "total_steps": 204665, "loss": 0.0488, "lr": 1.8849494500806187e-06, "epoch": 1.1939022304741895, "percentage": 23.88, "elapsed_time": "1:02:22", "remaining_time": "3:18:50", "throughput": 8802.5, "total_tokens": 32941088} +{"current_steps": 48875, "total_steps": 204665, "loss": 0.1003, "lr": 1.8849097341852716e-06, "epoch": 1.1940243813060367, "percentage": 23.88, "elapsed_time": "1:02:22", "remaining_time": "3:18:49", "throughput": 8802.49, "total_tokens": 32944032} +{"current_steps": 48880, "total_steps": 204665, "loss": 0.0657, "lr": 1.884870011854623e-06, "epoch": 1.194146532137884, "percentage": 23.88, "elapsed_time": "1:02:22", "remaining_time": "3:18:49", "throughput": 8802.59, "total_tokens": 32947488} +{"current_steps": 48885, "total_steps": 204665, "loss": 0.0476, "lr": 1.8848302830889615e-06, "epoch": 1.194268682969731, "percentage": 23.89, "elapsed_time": "1:02:23", "remaining_time": "3:18:48", "throughput": 8802.64, "total_tokens": 32950688} +{"current_steps": 48890, "total_steps": 204665, "loss": 0.1174, "lr": 1.8847905478885764e-06, "epoch": 1.1943908338015783, "percentage": 23.89, "elapsed_time": "1:02:23", "remaining_time": "3:18:48", "throughput": 8802.69, "total_tokens": 32953952} +{"current_steps": 48895, "total_steps": 204665, "loss": 0.0362, "lr": 1.884750806253756e-06, "epoch": 1.1945129846334253, "percentage": 23.89, "elapsed_time": "1:02:23", "remaining_time": "3:18:47", "throughput": 8802.78, "total_tokens": 32957344} +{"current_steps": 48900, "total_steps": 204665, "loss": 0.0785, "lr": 1.8847110581847902e-06, "epoch": 1.1946351354652724, "percentage": 23.89, "elapsed_time": "1:02:24", "remaining_time": "3:18:47", "throughput": 8802.85, "total_tokens": 32960672} +{"current_steps": 48905, "total_steps": 204665, "loss": 0.0789, "lr": 1.8846713036819677e-06, "epoch": 1.1947572862971196, "percentage": 23.9, "elapsed_time": "1:02:24", "remaining_time": "3:18:46", "throughput": 8802.9, "total_tokens": 32963872} +{"current_steps": 48910, "total_steps": 204665, "loss": 0.099, "lr": 1.8846315427455774e-06, "epoch": 1.1948794371289668, "percentage": 23.9, "elapsed_time": "1:02:25", "remaining_time": "3:18:46", "throughput": 8802.99, "total_tokens": 32967328} +{"current_steps": 48915, "total_steps": 204665, "loss": 0.0887, "lr": 1.8845917753759086e-06, "epoch": 1.195001587960814, "percentage": 23.9, "elapsed_time": "1:02:25", "remaining_time": "3:18:45", "throughput": 8803.11, "total_tokens": 32970848} +{"current_steps": 48920, "total_steps": 204665, "loss": 0.1821, "lr": 1.8845520015732503e-06, "epoch": 1.1951237387926612, "percentage": 23.9, "elapsed_time": "1:02:25", "remaining_time": "3:18:45", "throughput": 8803.18, "total_tokens": 32974176} +{"current_steps": 48925, "total_steps": 204665, "loss": 0.0193, "lr": 1.8845122213378921e-06, "epoch": 1.1952458896245084, "percentage": 23.9, "elapsed_time": "1:02:26", "remaining_time": "3:18:44", "throughput": 8803.22, "total_tokens": 32977376} +{"current_steps": 48930, "total_steps": 204665, "loss": 0.1289, "lr": 1.884472434670123e-06, "epoch": 1.1953680404563556, "percentage": 23.91, "elapsed_time": "1:02:26", "remaining_time": "3:18:44", "throughput": 8803.26, "total_tokens": 32980576} +{"current_steps": 48935, "total_steps": 204665, "loss": 0.0802, "lr": 1.8844326415702328e-06, "epoch": 1.1954901912882026, "percentage": 23.91, "elapsed_time": "1:02:26", "remaining_time": "3:18:43", "throughput": 8803.26, "total_tokens": 32983584} +{"current_steps": 48940, "total_steps": 204665, "loss": 0.109, "lr": 1.8843928420385101e-06, "epoch": 1.1956123421200497, "percentage": 23.91, "elapsed_time": "1:02:27", "remaining_time": "3:18:43", "throughput": 8803.29, "total_tokens": 32986720} +{"current_steps": 48945, "total_steps": 204665, "loss": 0.0378, "lr": 1.884353036075245e-06, "epoch": 1.195734492951897, "percentage": 23.91, "elapsed_time": "1:02:27", "remaining_time": "3:18:42", "throughput": 8803.46, "total_tokens": 32990496} +{"current_steps": 48950, "total_steps": 204665, "loss": 0.1408, "lr": 1.8843132236807268e-06, "epoch": 1.1958566437837441, "percentage": 23.92, "elapsed_time": "1:02:27", "remaining_time": "3:18:42", "throughput": 8803.51, "total_tokens": 32993760} +{"current_steps": 48955, "total_steps": 204665, "loss": 0.0941, "lr": 1.8842734048552451e-06, "epoch": 1.1959787946155913, "percentage": 23.92, "elapsed_time": "1:02:28", "remaining_time": "3:18:41", "throughput": 8803.57, "total_tokens": 32997024} +{"current_steps": 48960, "total_steps": 204665, "loss": 0.0022, "lr": 1.884233579599089e-06, "epoch": 1.1961009454474385, "percentage": 23.92, "elapsed_time": "1:02:28", "remaining_time": "3:18:41", "throughput": 8803.78, "total_tokens": 33000992} +{"current_steps": 48965, "total_steps": 204665, "loss": 0.048, "lr": 1.8841937479125488e-06, "epoch": 1.1962230962792857, "percentage": 23.92, "elapsed_time": "1:02:28", "remaining_time": "3:18:40", "throughput": 8803.84, "total_tokens": 33004256} +{"current_steps": 48970, "total_steps": 204665, "loss": 0.175, "lr": 1.8841539097959135e-06, "epoch": 1.1963452471111329, "percentage": 23.93, "elapsed_time": "1:02:29", "remaining_time": "3:18:40", "throughput": 8804.0, "total_tokens": 33008032} +{"current_steps": 48975, "total_steps": 204665, "loss": 0.1101, "lr": 1.8841140652494736e-06, "epoch": 1.19646739794298, "percentage": 23.93, "elapsed_time": "1:02:29", "remaining_time": "3:18:39", "throughput": 8804.14, "total_tokens": 33011680} +{"current_steps": 48980, "total_steps": 204665, "loss": 0.1273, "lr": 1.8840742142735179e-06, "epoch": 1.1965895487748273, "percentage": 23.93, "elapsed_time": "1:02:29", "remaining_time": "3:18:39", "throughput": 8804.18, "total_tokens": 33014880} +{"current_steps": 48985, "total_steps": 204665, "loss": 0.1129, "lr": 1.8840343568683373e-06, "epoch": 1.1967116996066742, "percentage": 23.93, "elapsed_time": "1:02:30", "remaining_time": "3:18:38", "throughput": 8804.27, "total_tokens": 33018272} +{"current_steps": 48990, "total_steps": 204665, "loss": 0.0782, "lr": 1.8839944930342207e-06, "epoch": 1.1968338504385214, "percentage": 23.94, "elapsed_time": "1:02:30", "remaining_time": "3:18:38", "throughput": 8804.35, "total_tokens": 33021664} +{"current_steps": 48995, "total_steps": 204665, "loss": 0.122, "lr": 1.8839546227714584e-06, "epoch": 1.1969560012703686, "percentage": 23.94, "elapsed_time": "1:02:30", "remaining_time": "3:18:37", "throughput": 8804.34, "total_tokens": 33024608} +{"current_steps": 49000, "total_steps": 204665, "loss": 0.0397, "lr": 1.8839147460803404e-06, "epoch": 1.1970781521022158, "percentage": 23.94, "elapsed_time": "1:02:31", "remaining_time": "3:18:37", "throughput": 8804.39, "total_tokens": 33027872} +{"current_steps": 49005, "total_steps": 204665, "loss": 0.1236, "lr": 1.8838748629611568e-06, "epoch": 1.197200302934063, "percentage": 23.94, "elapsed_time": "1:02:31", "remaining_time": "3:18:36", "throughput": 8804.44, "total_tokens": 33031136} +{"current_steps": 49010, "total_steps": 204665, "loss": 0.0368, "lr": 1.8838349734141972e-06, "epoch": 1.1973224537659102, "percentage": 23.95, "elapsed_time": "1:02:31", "remaining_time": "3:18:36", "throughput": 8804.45, "total_tokens": 33034144} +{"current_steps": 49015, "total_steps": 204665, "loss": 0.0434, "lr": 1.8837950774397519e-06, "epoch": 1.1974446045977574, "percentage": 23.95, "elapsed_time": "1:02:32", "remaining_time": "3:18:35", "throughput": 8804.49, "total_tokens": 33037344} +{"current_steps": 49020, "total_steps": 204665, "loss": 0.0687, "lr": 1.8837551750381114e-06, "epoch": 1.1975667554296046, "percentage": 23.95, "elapsed_time": "1:02:32", "remaining_time": "3:18:35", "throughput": 8804.51, "total_tokens": 33040416} +{"current_steps": 49025, "total_steps": 204665, "loss": 0.0608, "lr": 1.8837152662095654e-06, "epoch": 1.1976889062614515, "percentage": 23.95, "elapsed_time": "1:02:33", "remaining_time": "3:18:34", "throughput": 8804.54, "total_tokens": 33043552} +{"current_steps": 49030, "total_steps": 204665, "loss": 0.0286, "lr": 1.8836753509544043e-06, "epoch": 1.1978110570932987, "percentage": 23.96, "elapsed_time": "1:02:33", "remaining_time": "3:18:34", "throughput": 8804.58, "total_tokens": 33046752} +{"current_steps": 49035, "total_steps": 204665, "loss": 0.0565, "lr": 1.8836354292729184e-06, "epoch": 1.197933207925146, "percentage": 23.96, "elapsed_time": "1:02:33", "remaining_time": "3:18:33", "throughput": 8804.66, "total_tokens": 33050080} +{"current_steps": 49040, "total_steps": 204665, "loss": 0.2166, "lr": 1.8835955011653977e-06, "epoch": 1.198055358756993, "percentage": 23.96, "elapsed_time": "1:02:34", "remaining_time": "3:18:33", "throughput": 8804.71, "total_tokens": 33053344} +{"current_steps": 49045, "total_steps": 204665, "loss": 0.0579, "lr": 1.8835555666321333e-06, "epoch": 1.1981775095888403, "percentage": 23.96, "elapsed_time": "1:02:34", "remaining_time": "3:18:32", "throughput": 8804.75, "total_tokens": 33056544} +{"current_steps": 49050, "total_steps": 204665, "loss": 0.0819, "lr": 1.8835156256734148e-06, "epoch": 1.1982996604206875, "percentage": 23.97, "elapsed_time": "1:02:34", "remaining_time": "3:18:32", "throughput": 8804.77, "total_tokens": 33059616} +{"current_steps": 49055, "total_steps": 204665, "loss": 0.0732, "lr": 1.8834756782895331e-06, "epoch": 1.1984218112525347, "percentage": 23.97, "elapsed_time": "1:02:35", "remaining_time": "3:18:31", "throughput": 8804.83, "total_tokens": 33062944} +{"current_steps": 49060, "total_steps": 204665, "loss": 0.0728, "lr": 1.883435724480779e-06, "epoch": 1.1985439620843819, "percentage": 23.97, "elapsed_time": "1:02:35", "remaining_time": "3:18:31", "throughput": 8804.95, "total_tokens": 33066464} +{"current_steps": 49065, "total_steps": 204665, "loss": 0.0304, "lr": 1.8833957642474424e-06, "epoch": 1.198666112916229, "percentage": 23.97, "elapsed_time": "1:02:35", "remaining_time": "3:18:30", "throughput": 8805.22, "total_tokens": 33070752} +{"current_steps": 49070, "total_steps": 204665, "loss": 0.0928, "lr": 1.8833557975898141e-06, "epoch": 1.1987882637480762, "percentage": 23.98, "elapsed_time": "1:02:36", "remaining_time": "3:18:30", "throughput": 8805.26, "total_tokens": 33073952} +{"current_steps": 49075, "total_steps": 204665, "loss": 0.081, "lr": 1.883315824508185e-06, "epoch": 1.1989104145799232, "percentage": 23.98, "elapsed_time": "1:02:36", "remaining_time": "3:18:29", "throughput": 8805.3, "total_tokens": 33077152} +{"current_steps": 49080, "total_steps": 204665, "loss": 0.0835, "lr": 1.8832758450028456e-06, "epoch": 1.1990325654117704, "percentage": 23.98, "elapsed_time": "1:02:36", "remaining_time": "3:18:29", "throughput": 8805.39, "total_tokens": 33080544} +{"current_steps": 49085, "total_steps": 204665, "loss": 0.0907, "lr": 1.883235859074087e-06, "epoch": 1.1991547162436176, "percentage": 23.98, "elapsed_time": "1:02:37", "remaining_time": "3:18:28", "throughput": 8805.46, "total_tokens": 33083872} +{"current_steps": 49090, "total_steps": 204665, "loss": 0.1061, "lr": 1.8831958667221992e-06, "epoch": 1.1992768670754648, "percentage": 23.99, "elapsed_time": "1:02:37", "remaining_time": "3:18:28", "throughput": 8805.6, "total_tokens": 33087520} +{"current_steps": 49095, "total_steps": 204665, "loss": 0.1422, "lr": 1.8831558679474738e-06, "epoch": 1.199399017907312, "percentage": 23.99, "elapsed_time": "1:02:37", "remaining_time": "3:18:27", "throughput": 8805.68, "total_tokens": 33090912} +{"current_steps": 49100, "total_steps": 204665, "loss": 0.1067, "lr": 1.8831158627502012e-06, "epoch": 1.1995211687391591, "percentage": 23.99, "elapsed_time": "1:02:38", "remaining_time": "3:18:27", "throughput": 8805.76, "total_tokens": 33094304} +{"current_steps": 49105, "total_steps": 204665, "loss": 0.0296, "lr": 1.8830758511306726e-06, "epoch": 1.1996433195710063, "percentage": 23.99, "elapsed_time": "1:02:38", "remaining_time": "3:18:26", "throughput": 8805.8, "total_tokens": 33097504} +{"current_steps": 49110, "total_steps": 204665, "loss": 0.1116, "lr": 1.8830358330891789e-06, "epoch": 1.1997654704028535, "percentage": 24.0, "elapsed_time": "1:02:38", "remaining_time": "3:18:26", "throughput": 8805.85, "total_tokens": 33100704} +{"current_steps": 49115, "total_steps": 204665, "loss": 0.0305, "lr": 1.882995808626011e-06, "epoch": 1.1998876212347005, "percentage": 24.0, "elapsed_time": "1:02:39", "remaining_time": "3:18:25", "throughput": 8805.9, "total_tokens": 33103968} +{"current_steps": 49120, "total_steps": 204665, "loss": 0.0876, "lr": 1.8829557777414602e-06, "epoch": 1.2000097720665477, "percentage": 24.0, "elapsed_time": "1:02:39", "remaining_time": "3:18:25", "throughput": 8806.05, "total_tokens": 33107680} +{"current_steps": 49125, "total_steps": 204665, "loss": 0.2117, "lr": 1.8829157404358176e-06, "epoch": 1.2001319228983949, "percentage": 24.0, "elapsed_time": "1:02:39", "remaining_time": "3:18:24", "throughput": 8806.11, "total_tokens": 33110944} +{"current_steps": 49130, "total_steps": 204665, "loss": 0.1363, "lr": 1.882875696709374e-06, "epoch": 1.200254073730242, "percentage": 24.01, "elapsed_time": "1:02:40", "remaining_time": "3:18:24", "throughput": 8806.16, "total_tokens": 33114208} +{"current_steps": 49135, "total_steps": 204665, "loss": 0.0076, "lr": 1.882835646562421e-06, "epoch": 1.2003762245620893, "percentage": 24.01, "elapsed_time": "1:02:40", "remaining_time": "3:18:23", "throughput": 8806.25, "total_tokens": 33117600} +{"current_steps": 49140, "total_steps": 204665, "loss": 0.1247, "lr": 1.8827955899952497e-06, "epoch": 1.2004983753939364, "percentage": 24.01, "elapsed_time": "1:02:41", "remaining_time": "3:18:23", "throughput": 8806.39, "total_tokens": 33121248} +{"current_steps": 49145, "total_steps": 204665, "loss": 0.0067, "lr": 1.8827555270081513e-06, "epoch": 1.2006205262257836, "percentage": 24.01, "elapsed_time": "1:02:41", "remaining_time": "3:18:22", "throughput": 8806.43, "total_tokens": 33124512} +{"current_steps": 49150, "total_steps": 204665, "loss": 0.196, "lr": 1.8827154576014178e-06, "epoch": 1.2007426770576308, "percentage": 24.01, "elapsed_time": "1:02:41", "remaining_time": "3:18:22", "throughput": 8806.5, "total_tokens": 33127840} +{"current_steps": 49155, "total_steps": 204665, "loss": 0.0318, "lr": 1.8826753817753396e-06, "epoch": 1.200864827889478, "percentage": 24.02, "elapsed_time": "1:02:42", "remaining_time": "3:18:21", "throughput": 8806.48, "total_tokens": 33130784} +{"current_steps": 49160, "total_steps": 204665, "loss": 0.1143, "lr": 1.8826352995302086e-06, "epoch": 1.2009869787213252, "percentage": 24.02, "elapsed_time": "1:02:42", "remaining_time": "3:18:21", "throughput": 8806.57, "total_tokens": 33134176} +{"current_steps": 49165, "total_steps": 204665, "loss": 0.0035, "lr": 1.8825952108663163e-06, "epoch": 1.2011091295531722, "percentage": 24.02, "elapsed_time": "1:02:42", "remaining_time": "3:18:20", "throughput": 8806.59, "total_tokens": 33137248} +{"current_steps": 49170, "total_steps": 204665, "loss": 0.1935, "lr": 1.8825551157839543e-06, "epoch": 1.2012312803850194, "percentage": 24.02, "elapsed_time": "1:02:43", "remaining_time": "3:18:20", "throughput": 8806.64, "total_tokens": 33140512} +{"current_steps": 49175, "total_steps": 204665, "loss": 0.1047, "lr": 1.8825150142834143e-06, "epoch": 1.2013534312168666, "percentage": 24.03, "elapsed_time": "1:02:43", "remaining_time": "3:18:20", "throughput": 8806.74, "total_tokens": 33143968} +{"current_steps": 49180, "total_steps": 204665, "loss": 0.0758, "lr": 1.8824749063649876e-06, "epoch": 1.2014755820487137, "percentage": 24.03, "elapsed_time": "1:02:43", "remaining_time": "3:18:19", "throughput": 8806.88, "total_tokens": 33147616} +{"current_steps": 49185, "total_steps": 204665, "loss": 0.2307, "lr": 1.882434792028966e-06, "epoch": 1.201597732880561, "percentage": 24.03, "elapsed_time": "1:02:44", "remaining_time": "3:18:19", "throughput": 8806.97, "total_tokens": 33151008} +{"current_steps": 49190, "total_steps": 204665, "loss": 0.0385, "lr": 1.8823946712756413e-06, "epoch": 1.2017198837124081, "percentage": 24.03, "elapsed_time": "1:02:44", "remaining_time": "3:18:18", "throughput": 8807.17, "total_tokens": 33154976} +{"current_steps": 49195, "total_steps": 204665, "loss": 0.1566, "lr": 1.8823545441053053e-06, "epoch": 1.2018420345442553, "percentage": 24.04, "elapsed_time": "1:02:44", "remaining_time": "3:18:18", "throughput": 8807.23, "total_tokens": 33158240} +{"current_steps": 49200, "total_steps": 204665, "loss": 0.1751, "lr": 1.8823144105182496e-06, "epoch": 1.2019641853761025, "percentage": 24.04, "elapsed_time": "1:02:45", "remaining_time": "3:18:17", "throughput": 8807.4, "total_tokens": 33162016} +{"current_steps": 49205, "total_steps": 204665, "loss": 0.126, "lr": 1.8822742705147663e-06, "epoch": 1.2020863362079495, "percentage": 24.04, "elapsed_time": "1:02:45", "remaining_time": "3:18:17", "throughput": 8807.51, "total_tokens": 33165536} +{"current_steps": 49210, "total_steps": 204665, "loss": 0.1073, "lr": 1.8822341240951469e-06, "epoch": 1.2022084870397967, "percentage": 24.04, "elapsed_time": "1:02:45", "remaining_time": "3:18:16", "throughput": 8807.58, "total_tokens": 33168864} +{"current_steps": 49215, "total_steps": 204665, "loss": 0.1445, "lr": 1.882193971259684e-06, "epoch": 1.2023306378716438, "percentage": 24.05, "elapsed_time": "1:02:46", "remaining_time": "3:18:16", "throughput": 8807.65, "total_tokens": 33172192} +{"current_steps": 49220, "total_steps": 204665, "loss": 0.0778, "lr": 1.8821538120086693e-06, "epoch": 1.202452788703491, "percentage": 24.05, "elapsed_time": "1:02:46", "remaining_time": "3:18:15", "throughput": 8807.79, "total_tokens": 33175840} +{"current_steps": 49225, "total_steps": 204665, "loss": 0.1007, "lr": 1.8821136463423945e-06, "epoch": 1.2025749395353382, "percentage": 24.05, "elapsed_time": "1:02:46", "remaining_time": "3:18:15", "throughput": 8807.87, "total_tokens": 33179232} +{"current_steps": 49230, "total_steps": 204665, "loss": 0.0891, "lr": 1.8820734742611522e-06, "epoch": 1.2026970903671854, "percentage": 24.05, "elapsed_time": "1:02:47", "remaining_time": "3:18:14", "throughput": 8807.92, "total_tokens": 33182496} +{"current_steps": 49235, "total_steps": 204665, "loss": 0.0656, "lr": 1.8820332957652342e-06, "epoch": 1.2028192411990326, "percentage": 24.06, "elapsed_time": "1:02:47", "remaining_time": "3:18:14", "throughput": 8808.01, "total_tokens": 33185888} +{"current_steps": 49240, "total_steps": 204665, "loss": 0.0315, "lr": 1.881993110854933e-06, "epoch": 1.2029413920308798, "percentage": 24.06, "elapsed_time": "1:02:48", "remaining_time": "3:18:13", "throughput": 8808.17, "total_tokens": 33189664} +{"current_steps": 49245, "total_steps": 204665, "loss": 0.0093, "lr": 1.8819529195305405e-06, "epoch": 1.203063542862727, "percentage": 24.06, "elapsed_time": "1:02:48", "remaining_time": "3:18:13", "throughput": 8808.19, "total_tokens": 33192736} +{"current_steps": 49250, "total_steps": 204665, "loss": 0.0607, "lr": 1.8819127217923492e-06, "epoch": 1.2031856936945742, "percentage": 24.06, "elapsed_time": "1:02:48", "remaining_time": "3:18:12", "throughput": 8808.36, "total_tokens": 33196576} +{"current_steps": 49255, "total_steps": 204665, "loss": 0.0382, "lr": 1.8818725176406515e-06, "epoch": 1.2033078445264211, "percentage": 24.07, "elapsed_time": "1:02:49", "remaining_time": "3:18:12", "throughput": 8808.41, "total_tokens": 33199776} +{"current_steps": 49260, "total_steps": 204665, "loss": 0.106, "lr": 1.8818323070757397e-06, "epoch": 1.2034299953582683, "percentage": 24.07, "elapsed_time": "1:02:49", "remaining_time": "3:18:11", "throughput": 8808.47, "total_tokens": 33203040} +{"current_steps": 49265, "total_steps": 204665, "loss": 0.093, "lr": 1.881792090097906e-06, "epoch": 1.2035521461901155, "percentage": 24.07, "elapsed_time": "1:02:49", "remaining_time": "3:18:11", "throughput": 8808.51, "total_tokens": 33206240} +{"current_steps": 49270, "total_steps": 204665, "loss": 0.1329, "lr": 1.881751866707443e-06, "epoch": 1.2036742970219627, "percentage": 24.07, "elapsed_time": "1:02:50", "remaining_time": "3:18:10", "throughput": 8808.51, "total_tokens": 33209248} +{"current_steps": 49275, "total_steps": 204665, "loss": 0.0011, "lr": 1.8817116369046435e-06, "epoch": 1.20379644785381, "percentage": 24.08, "elapsed_time": "1:02:50", "remaining_time": "3:18:10", "throughput": 8808.54, "total_tokens": 33212384} +{"current_steps": 49280, "total_steps": 204665, "loss": 0.0017, "lr": 1.8816714006897998e-06, "epoch": 1.203918598685657, "percentage": 24.08, "elapsed_time": "1:02:50", "remaining_time": "3:18:09", "throughput": 8808.64, "total_tokens": 33215840} +{"current_steps": 49285, "total_steps": 204665, "loss": 0.114, "lr": 1.8816311580632042e-06, "epoch": 1.2040407495175043, "percentage": 24.08, "elapsed_time": "1:02:51", "remaining_time": "3:18:09", "throughput": 8808.73, "total_tokens": 33219232} +{"current_steps": 49290, "total_steps": 204665, "loss": 0.0705, "lr": 1.88159090902515e-06, "epoch": 1.2041629003493515, "percentage": 24.08, "elapsed_time": "1:02:51", "remaining_time": "3:18:08", "throughput": 8808.84, "total_tokens": 33222752} +{"current_steps": 49295, "total_steps": 204665, "loss": 0.1219, "lr": 1.8815506535759296e-06, "epoch": 1.2042850511811984, "percentage": 24.09, "elapsed_time": "1:02:51", "remaining_time": "3:18:08", "throughput": 8808.88, "total_tokens": 33225952} +{"current_steps": 49300, "total_steps": 204665, "loss": 0.1627, "lr": 1.8815103917158356e-06, "epoch": 1.2044072020130456, "percentage": 24.09, "elapsed_time": "1:02:52", "remaining_time": "3:18:07", "throughput": 8809.04, "total_tokens": 33229664} +{"current_steps": 49305, "total_steps": 204665, "loss": 0.0472, "lr": 1.881470123445161e-06, "epoch": 1.2045293528448928, "percentage": 24.09, "elapsed_time": "1:02:52", "remaining_time": "3:18:07", "throughput": 8809.22, "total_tokens": 33233504} +{"current_steps": 49310, "total_steps": 204665, "loss": 0.0009, "lr": 1.8814298487641986e-06, "epoch": 1.20465150367674, "percentage": 24.09, "elapsed_time": "1:02:52", "remaining_time": "3:18:06", "throughput": 8809.22, "total_tokens": 33236512} +{"current_steps": 49315, "total_steps": 204665, "loss": 0.0599, "lr": 1.8813895676732411e-06, "epoch": 1.2047736545085872, "percentage": 24.1, "elapsed_time": "1:02:53", "remaining_time": "3:18:06", "throughput": 8809.32, "total_tokens": 33239968} +{"current_steps": 49320, "total_steps": 204665, "loss": 0.0182, "lr": 1.8813492801725818e-06, "epoch": 1.2048958053404344, "percentage": 24.1, "elapsed_time": "1:02:53", "remaining_time": "3:18:05", "throughput": 8809.34, "total_tokens": 33243104} +{"current_steps": 49325, "total_steps": 204665, "loss": 0.1218, "lr": 1.8813089862625136e-06, "epoch": 1.2050179561722816, "percentage": 24.1, "elapsed_time": "1:02:53", "remaining_time": "3:18:05", "throughput": 8809.35, "total_tokens": 33246112} +{"current_steps": 49330, "total_steps": 204665, "loss": 0.0602, "lr": 1.881268685943329e-06, "epoch": 1.2051401070041288, "percentage": 24.1, "elapsed_time": "1:02:54", "remaining_time": "3:18:04", "throughput": 8809.45, "total_tokens": 33249568} +{"current_steps": 49335, "total_steps": 204665, "loss": 0.0861, "lr": 1.881228379215322e-06, "epoch": 1.205262257835976, "percentage": 24.11, "elapsed_time": "1:02:54", "remaining_time": "3:18:04", "throughput": 8809.64, "total_tokens": 33253472} +{"current_steps": 49340, "total_steps": 204665, "loss": 0.1753, "lr": 1.8811880660787846e-06, "epoch": 1.2053844086678231, "percentage": 24.11, "elapsed_time": "1:02:55", "remaining_time": "3:18:03", "throughput": 8809.78, "total_tokens": 33257120} +{"current_steps": 49345, "total_steps": 204665, "loss": 0.0815, "lr": 1.881147746534011e-06, "epoch": 1.2055065594996701, "percentage": 24.11, "elapsed_time": "1:02:55", "remaining_time": "3:18:03", "throughput": 8809.83, "total_tokens": 33260384} +{"current_steps": 49350, "total_steps": 204665, "loss": 0.0344, "lr": 1.8811074205812938e-06, "epoch": 1.2056287103315173, "percentage": 24.11, "elapsed_time": "1:02:55", "remaining_time": "3:18:02", "throughput": 8809.9, "total_tokens": 33263712} +{"current_steps": 49355, "total_steps": 204665, "loss": 0.0385, "lr": 1.8810670882209264e-06, "epoch": 1.2057508611633645, "percentage": 24.12, "elapsed_time": "1:02:56", "remaining_time": "3:18:02", "throughput": 8809.89, "total_tokens": 33266656} +{"current_steps": 49360, "total_steps": 204665, "loss": 0.1348, "lr": 1.8810267494532025e-06, "epoch": 1.2058730119952117, "percentage": 24.12, "elapsed_time": "1:02:56", "remaining_time": "3:18:01", "throughput": 8809.92, "total_tokens": 33269792} +{"current_steps": 49365, "total_steps": 204665, "loss": 0.059, "lr": 1.8809864042784147e-06, "epoch": 1.2059951628270589, "percentage": 24.12, "elapsed_time": "1:02:56", "remaining_time": "3:18:01", "throughput": 8809.97, "total_tokens": 33272992} +{"current_steps": 49370, "total_steps": 204665, "loss": 0.0039, "lr": 1.880946052696857e-06, "epoch": 1.206117313658906, "percentage": 24.12, "elapsed_time": "1:02:57", "remaining_time": "3:18:00", "throughput": 8810.04, "total_tokens": 33276320} +{"current_steps": 49375, "total_steps": 204665, "loss": 0.1022, "lr": 1.8809056947088226e-06, "epoch": 1.2062394644907533, "percentage": 24.12, "elapsed_time": "1:02:57", "remaining_time": "3:18:00", "throughput": 8810.12, "total_tokens": 33279712} +{"current_steps": 49380, "total_steps": 204665, "loss": 0.0726, "lr": 1.880865330314605e-06, "epoch": 1.2063616153226002, "percentage": 24.13, "elapsed_time": "1:02:57", "remaining_time": "3:17:59", "throughput": 8810.16, "total_tokens": 33282912} +{"current_steps": 49385, "total_steps": 204665, "loss": 0.1336, "lr": 1.880824959514498e-06, "epoch": 1.2064837661544474, "percentage": 24.13, "elapsed_time": "1:02:58", "remaining_time": "3:17:59", "throughput": 8810.19, "total_tokens": 33286048} +{"current_steps": 49390, "total_steps": 204665, "loss": 0.0506, "lr": 1.8807845823087952e-06, "epoch": 1.2066059169862946, "percentage": 24.13, "elapsed_time": "1:02:58", "remaining_time": "3:17:58", "throughput": 8810.21, "total_tokens": 33289120} +{"current_steps": 49395, "total_steps": 204665, "loss": 0.0213, "lr": 1.8807441986977894e-06, "epoch": 1.2067280678181418, "percentage": 24.13, "elapsed_time": "1:02:58", "remaining_time": "3:17:58", "throughput": 8810.31, "total_tokens": 33292640} +{"current_steps": 49400, "total_steps": 204665, "loss": 0.1098, "lr": 1.8807038086817752e-06, "epoch": 1.206850218649989, "percentage": 24.14, "elapsed_time": "1:02:59", "remaining_time": "3:17:58", "throughput": 8810.44, "total_tokens": 33296224} +{"current_steps": 49405, "total_steps": 204665, "loss": 0.1692, "lr": 1.8806634122610461e-06, "epoch": 1.2069723694818362, "percentage": 24.14, "elapsed_time": "1:02:59", "remaining_time": "3:17:57", "throughput": 8810.49, "total_tokens": 33299488} +{"current_steps": 49410, "total_steps": 204665, "loss": 0.1198, "lr": 1.8806230094358954e-06, "epoch": 1.2070945203136834, "percentage": 24.14, "elapsed_time": "1:02:59", "remaining_time": "3:17:57", "throughput": 8810.55, "total_tokens": 33302752} +{"current_steps": 49415, "total_steps": 204665, "loss": 0.0987, "lr": 1.8805826002066178e-06, "epoch": 1.2072166711455306, "percentage": 24.14, "elapsed_time": "1:03:00", "remaining_time": "3:17:56", "throughput": 8810.53, "total_tokens": 33305696} +{"current_steps": 49420, "total_steps": 204665, "loss": 0.1584, "lr": 1.8805421845735065e-06, "epoch": 1.2073388219773777, "percentage": 24.15, "elapsed_time": "1:03:00", "remaining_time": "3:17:56", "throughput": 8810.55, "total_tokens": 33308768} +{"current_steps": 49425, "total_steps": 204665, "loss": 0.0575, "lr": 1.8805017625368555e-06, "epoch": 1.207460972809225, "percentage": 24.15, "elapsed_time": "1:03:00", "remaining_time": "3:17:55", "throughput": 8810.66, "total_tokens": 33312288} +{"current_steps": 49430, "total_steps": 204665, "loss": 0.1136, "lr": 1.8804613340969592e-06, "epoch": 1.2075831236410721, "percentage": 24.15, "elapsed_time": "1:03:01", "remaining_time": "3:17:55", "throughput": 8810.73, "total_tokens": 33315616} +{"current_steps": 49435, "total_steps": 204665, "loss": 0.0248, "lr": 1.880420899254111e-06, "epoch": 1.207705274472919, "percentage": 24.15, "elapsed_time": "1:03:01", "remaining_time": "3:17:54", "throughput": 8810.78, "total_tokens": 33318880} +{"current_steps": 49440, "total_steps": 204665, "loss": 0.0634, "lr": 1.8803804580086053e-06, "epoch": 1.2078274253047663, "percentage": 24.16, "elapsed_time": "1:03:01", "remaining_time": "3:17:54", "throughput": 8810.88, "total_tokens": 33322336} +{"current_steps": 49445, "total_steps": 204665, "loss": 0.1465, "lr": 1.8803400103607362e-06, "epoch": 1.2079495761366135, "percentage": 24.16, "elapsed_time": "1:03:02", "remaining_time": "3:17:53", "throughput": 8810.92, "total_tokens": 33325536} +{"current_steps": 49450, "total_steps": 204665, "loss": 0.0011, "lr": 1.8802995563107972e-06, "epoch": 1.2080717269684607, "percentage": 24.16, "elapsed_time": "1:03:02", "remaining_time": "3:17:53", "throughput": 8811.03, "total_tokens": 33329056} +{"current_steps": 49455, "total_steps": 204665, "loss": 0.1953, "lr": 1.8802590958590837e-06, "epoch": 1.2081938778003078, "percentage": 24.16, "elapsed_time": "1:03:03", "remaining_time": "3:17:52", "throughput": 8811.13, "total_tokens": 33332512} +{"current_steps": 49460, "total_steps": 204665, "loss": 0.0797, "lr": 1.8802186290058887e-06, "epoch": 1.208316028632155, "percentage": 24.17, "elapsed_time": "1:03:03", "remaining_time": "3:17:52", "throughput": 8811.2, "total_tokens": 33335840} +{"current_steps": 49465, "total_steps": 204665, "loss": 0.0014, "lr": 1.8801781557515078e-06, "epoch": 1.2084381794640022, "percentage": 24.17, "elapsed_time": "1:03:03", "remaining_time": "3:17:51", "throughput": 8811.26, "total_tokens": 33339168} +{"current_steps": 49470, "total_steps": 204665, "loss": 0.0374, "lr": 1.8801376760962343e-06, "epoch": 1.2085603302958492, "percentage": 24.17, "elapsed_time": "1:03:04", "remaining_time": "3:17:51", "throughput": 8811.29, "total_tokens": 33342240} +{"current_steps": 49475, "total_steps": 204665, "loss": 0.0638, "lr": 1.8800971900403626e-06, "epoch": 1.2086824811276964, "percentage": 24.17, "elapsed_time": "1:03:04", "remaining_time": "3:17:50", "throughput": 8811.42, "total_tokens": 33345888} +{"current_steps": 49480, "total_steps": 204665, "loss": 0.023, "lr": 1.8800566975841878e-06, "epoch": 1.2088046319595436, "percentage": 24.18, "elapsed_time": "1:03:04", "remaining_time": "3:17:50", "throughput": 8811.55, "total_tokens": 33349536} +{"current_steps": 49485, "total_steps": 204665, "loss": 0.0884, "lr": 1.8800161987280037e-06, "epoch": 1.2089267827913908, "percentage": 24.18, "elapsed_time": "1:03:05", "remaining_time": "3:17:49", "throughput": 8811.54, "total_tokens": 33352480} +{"current_steps": 49490, "total_steps": 204665, "loss": 0.174, "lr": 1.8799756934721055e-06, "epoch": 1.209048933623238, "percentage": 24.18, "elapsed_time": "1:03:05", "remaining_time": "3:17:49", "throughput": 8811.61, "total_tokens": 33355808} +{"current_steps": 49495, "total_steps": 204665, "loss": 0.143, "lr": 1.879935181816787e-06, "epoch": 1.2091710844550851, "percentage": 24.18, "elapsed_time": "1:03:05", "remaining_time": "3:17:48", "throughput": 8811.72, "total_tokens": 33359328} +{"current_steps": 49500, "total_steps": 204665, "loss": 0.0526, "lr": 1.8798946637623434e-06, "epoch": 1.2092932352869323, "percentage": 24.19, "elapsed_time": "1:03:06", "remaining_time": "3:17:48", "throughput": 8811.77, "total_tokens": 33362592} +{"current_steps": 49505, "total_steps": 204665, "loss": 0.2042, "lr": 1.879854139309069e-06, "epoch": 1.2094153861187795, "percentage": 24.19, "elapsed_time": "1:03:06", "remaining_time": "3:17:47", "throughput": 8811.8, "total_tokens": 33365728} +{"current_steps": 49510, "total_steps": 204665, "loss": 0.1629, "lr": 1.8798136084572587e-06, "epoch": 1.2095375369506267, "percentage": 24.19, "elapsed_time": "1:03:06", "remaining_time": "3:17:47", "throughput": 8811.78, "total_tokens": 33368672} +{"current_steps": 49515, "total_steps": 204665, "loss": 0.0338, "lr": 1.8797730712072072e-06, "epoch": 1.209659687782474, "percentage": 24.19, "elapsed_time": "1:03:07", "remaining_time": "3:17:46", "throughput": 8811.79, "total_tokens": 33371744} +{"current_steps": 49520, "total_steps": 204665, "loss": 0.0987, "lr": 1.8797325275592094e-06, "epoch": 1.2097818386143209, "percentage": 24.2, "elapsed_time": "1:03:07", "remaining_time": "3:17:46", "throughput": 8811.86, "total_tokens": 33375072} +{"current_steps": 49525, "total_steps": 204665, "loss": 0.1201, "lr": 1.8796919775135597e-06, "epoch": 1.209903989446168, "percentage": 24.2, "elapsed_time": "1:03:07", "remaining_time": "3:17:45", "throughput": 8811.83, "total_tokens": 33377952} +{"current_steps": 49530, "total_steps": 204665, "loss": 0.0782, "lr": 1.8796514210705537e-06, "epoch": 1.2100261402780152, "percentage": 24.2, "elapsed_time": "1:03:08", "remaining_time": "3:17:45", "throughput": 8811.95, "total_tokens": 33381472} +{"current_steps": 49535, "total_steps": 204665, "loss": 0.0364, "lr": 1.8796108582304857e-06, "epoch": 1.2101482911098624, "percentage": 24.2, "elapsed_time": "1:03:08", "remaining_time": "3:17:44", "throughput": 8812.02, "total_tokens": 33384800} +{"current_steps": 49540, "total_steps": 204665, "loss": 0.1381, "lr": 1.8795702889936511e-06, "epoch": 1.2102704419417096, "percentage": 24.21, "elapsed_time": "1:03:08", "remaining_time": "3:17:44", "throughput": 8812.1, "total_tokens": 33388192} +{"current_steps": 49545, "total_steps": 204665, "loss": 0.1393, "lr": 1.8795297133603446e-06, "epoch": 1.2103925927735568, "percentage": 24.21, "elapsed_time": "1:03:09", "remaining_time": "3:17:43", "throughput": 8812.2, "total_tokens": 33391648} +{"current_steps": 49550, "total_steps": 204665, "loss": 0.1072, "lr": 1.8794891313308617e-06, "epoch": 1.210514743605404, "percentage": 24.21, "elapsed_time": "1:03:09", "remaining_time": "3:17:43", "throughput": 8812.24, "total_tokens": 33394848} +{"current_steps": 49555, "total_steps": 204665, "loss": 0.0801, "lr": 1.8794485429054973e-06, "epoch": 1.2106368944372512, "percentage": 24.21, "elapsed_time": "1:03:09", "remaining_time": "3:17:42", "throughput": 8812.26, "total_tokens": 33397920} +{"current_steps": 49560, "total_steps": 204665, "loss": 0.051, "lr": 1.8794079480845464e-06, "epoch": 1.2107590452690982, "percentage": 24.22, "elapsed_time": "1:03:10", "remaining_time": "3:17:42", "throughput": 8812.43, "total_tokens": 33401760} +{"current_steps": 49565, "total_steps": 204665, "loss": 0.0797, "lr": 1.8793673468683044e-06, "epoch": 1.2108811961009454, "percentage": 24.22, "elapsed_time": "1:03:10", "remaining_time": "3:17:41", "throughput": 8812.52, "total_tokens": 33405216} +{"current_steps": 49570, "total_steps": 204665, "loss": 0.0243, "lr": 1.8793267392570667e-06, "epoch": 1.2110033469327925, "percentage": 24.22, "elapsed_time": "1:03:11", "remaining_time": "3:17:41", "throughput": 8812.69, "total_tokens": 33408992} +{"current_steps": 49575, "total_steps": 204665, "loss": 0.0737, "lr": 1.8792861252511282e-06, "epoch": 1.2111254977646397, "percentage": 24.22, "elapsed_time": "1:03:11", "remaining_time": "3:17:40", "throughput": 8812.79, "total_tokens": 33412512} +{"current_steps": 49580, "total_steps": 204665, "loss": 0.108, "lr": 1.8792455048507847e-06, "epoch": 1.211247648596487, "percentage": 24.22, "elapsed_time": "1:03:11", "remaining_time": "3:17:40", "throughput": 8812.88, "total_tokens": 33415904} +{"current_steps": 49585, "total_steps": 204665, "loss": 0.0952, "lr": 1.8792048780563311e-06, "epoch": 1.2113697994283341, "percentage": 24.23, "elapsed_time": "1:03:12", "remaining_time": "3:17:39", "throughput": 8813.06, "total_tokens": 33419744} +{"current_steps": 49590, "total_steps": 204665, "loss": 0.0513, "lr": 1.8791642448680633e-06, "epoch": 1.2114919502601813, "percentage": 24.23, "elapsed_time": "1:03:12", "remaining_time": "3:17:39", "throughput": 8813.05, "total_tokens": 33422688} +{"current_steps": 49595, "total_steps": 204665, "loss": 0.0624, "lr": 1.879123605286277e-06, "epoch": 1.2116141010920285, "percentage": 24.23, "elapsed_time": "1:03:12", "remaining_time": "3:17:38", "throughput": 8813.09, "total_tokens": 33425888} +{"current_steps": 49600, "total_steps": 204665, "loss": 0.0268, "lr": 1.8790829593112669e-06, "epoch": 1.2117362519238757, "percentage": 24.23, "elapsed_time": "1:03:13", "remaining_time": "3:17:38", "throughput": 8813.08, "total_tokens": 33428832} +{"current_steps": 49605, "total_steps": 204665, "loss": 0.0843, "lr": 1.8790423069433294e-06, "epoch": 1.2118584027557229, "percentage": 24.24, "elapsed_time": "1:03:13", "remaining_time": "3:17:37", "throughput": 8813.27, "total_tokens": 33432672} +{"current_steps": 49610, "total_steps": 204665, "loss": 0.1263, "lr": 1.8790016481827596e-06, "epoch": 1.2119805535875698, "percentage": 24.24, "elapsed_time": "1:03:13", "remaining_time": "3:17:37", "throughput": 8813.29, "total_tokens": 33435744} +{"current_steps": 49615, "total_steps": 204665, "loss": 0.0523, "lr": 1.8789609830298534e-06, "epoch": 1.212102704419417, "percentage": 24.24, "elapsed_time": "1:03:14", "remaining_time": "3:17:36", "throughput": 8813.41, "total_tokens": 33439328} +{"current_steps": 49620, "total_steps": 204665, "loss": 0.0036, "lr": 1.8789203114849067e-06, "epoch": 1.2122248552512642, "percentage": 24.24, "elapsed_time": "1:03:14", "remaining_time": "3:17:36", "throughput": 8813.53, "total_tokens": 33442912} +{"current_steps": 49625, "total_steps": 204665, "loss": 0.0131, "lr": 1.8788796335482148e-06, "epoch": 1.2123470060831114, "percentage": 24.25, "elapsed_time": "1:03:14", "remaining_time": "3:17:35", "throughput": 8813.58, "total_tokens": 33446112} +{"current_steps": 49630, "total_steps": 204665, "loss": 0.0871, "lr": 1.878838949220074e-06, "epoch": 1.2124691569149586, "percentage": 24.25, "elapsed_time": "1:03:15", "remaining_time": "3:17:35", "throughput": 8813.67, "total_tokens": 33449568} +{"current_steps": 49635, "total_steps": 204665, "loss": 0.1442, "lr": 1.87879825850078e-06, "epoch": 1.2125913077468058, "percentage": 24.25, "elapsed_time": "1:03:15", "remaining_time": "3:17:34", "throughput": 8813.71, "total_tokens": 33452768} +{"current_steps": 49640, "total_steps": 204665, "loss": 0.0307, "lr": 1.8787575613906287e-06, "epoch": 1.212713458578653, "percentage": 24.25, "elapsed_time": "1:03:15", "remaining_time": "3:17:34", "throughput": 8813.74, "total_tokens": 33455904} +{"current_steps": 49645, "total_steps": 204665, "loss": 0.0752, "lr": 1.878716857889916e-06, "epoch": 1.2128356094105002, "percentage": 24.26, "elapsed_time": "1:03:16", "remaining_time": "3:17:33", "throughput": 8813.83, "total_tokens": 33459296} +{"current_steps": 49650, "total_steps": 204665, "loss": 0.1513, "lr": 1.878676147998938e-06, "epoch": 1.2129577602423471, "percentage": 24.26, "elapsed_time": "1:03:16", "remaining_time": "3:17:33", "throughput": 8813.92, "total_tokens": 33462752} +{"current_steps": 49655, "total_steps": 204665, "loss": 0.1706, "lr": 1.8786354317179906e-06, "epoch": 1.2130799110741943, "percentage": 24.26, "elapsed_time": "1:03:16", "remaining_time": "3:17:33", "throughput": 8814.08, "total_tokens": 33466528} +{"current_steps": 49660, "total_steps": 204665, "loss": 0.0216, "lr": 1.8785947090473702e-06, "epoch": 1.2132020619060415, "percentage": 24.26, "elapsed_time": "1:03:17", "remaining_time": "3:17:32", "throughput": 8814.14, "total_tokens": 33469856} +{"current_steps": 49665, "total_steps": 204665, "loss": 0.1069, "lr": 1.8785539799873727e-06, "epoch": 1.2133242127378887, "percentage": 24.27, "elapsed_time": "1:03:17", "remaining_time": "3:17:32", "throughput": 8814.19, "total_tokens": 33473120} +{"current_steps": 49670, "total_steps": 204665, "loss": 0.1485, "lr": 1.8785132445382944e-06, "epoch": 1.213446363569736, "percentage": 24.27, "elapsed_time": "1:03:17", "remaining_time": "3:17:31", "throughput": 8814.25, "total_tokens": 33476384} +{"current_steps": 49675, "total_steps": 204665, "loss": 0.0381, "lr": 1.8784725027004313e-06, "epoch": 1.213568514401583, "percentage": 24.27, "elapsed_time": "1:03:18", "remaining_time": "3:17:31", "throughput": 8814.39, "total_tokens": 33480096} +{"current_steps": 49680, "total_steps": 204665, "loss": 0.0876, "lr": 1.87843175447408e-06, "epoch": 1.2136906652334303, "percentage": 24.27, "elapsed_time": "1:03:18", "remaining_time": "3:17:30", "throughput": 8814.42, "total_tokens": 33483232} +{"current_steps": 49685, "total_steps": 204665, "loss": 0.0839, "lr": 1.8783909998595368e-06, "epoch": 1.2138128160652775, "percentage": 24.28, "elapsed_time": "1:03:19", "remaining_time": "3:17:30", "throughput": 8814.5, "total_tokens": 33486560} +{"current_steps": 49690, "total_steps": 204665, "loss": 0.1646, "lr": 1.8783502388570978e-06, "epoch": 1.2139349668971247, "percentage": 24.28, "elapsed_time": "1:03:19", "remaining_time": "3:17:29", "throughput": 8814.64, "total_tokens": 33490208} +{"current_steps": 49695, "total_steps": 204665, "loss": 0.0009, "lr": 1.8783094714670597e-06, "epoch": 1.2140571177289718, "percentage": 24.28, "elapsed_time": "1:03:19", "remaining_time": "3:17:29", "throughput": 8814.77, "total_tokens": 33493856} +{"current_steps": 49700, "total_steps": 204665, "loss": 0.0665, "lr": 1.8782686976897192e-06, "epoch": 1.2141792685608188, "percentage": 24.28, "elapsed_time": "1:03:20", "remaining_time": "3:17:28", "throughput": 8814.82, "total_tokens": 33497056} +{"current_steps": 49705, "total_steps": 204665, "loss": 0.0379, "lr": 1.878227917525372e-06, "epoch": 1.214301419392666, "percentage": 24.29, "elapsed_time": "1:03:20", "remaining_time": "3:17:28", "throughput": 8815.0, "total_tokens": 33500896} +{"current_steps": 49710, "total_steps": 204665, "loss": 0.1151, "lr": 1.8781871309743153e-06, "epoch": 1.2144235702245132, "percentage": 24.29, "elapsed_time": "1:03:20", "remaining_time": "3:17:27", "throughput": 8815.19, "total_tokens": 33504800} +{"current_steps": 49715, "total_steps": 204665, "loss": 0.1571, "lr": 1.8781463380368455e-06, "epoch": 1.2145457210563604, "percentage": 24.29, "elapsed_time": "1:03:21", "remaining_time": "3:17:27", "throughput": 8815.24, "total_tokens": 33508000} +{"current_steps": 49720, "total_steps": 204665, "loss": 0.1452, "lr": 1.8781055387132598e-06, "epoch": 1.2146678718882076, "percentage": 24.29, "elapsed_time": "1:03:21", "remaining_time": "3:17:26", "throughput": 8815.28, "total_tokens": 33511200} +{"current_steps": 49725, "total_steps": 204665, "loss": 0.2403, "lr": 1.8780647330038541e-06, "epoch": 1.2147900227200548, "percentage": 24.3, "elapsed_time": "1:03:21", "remaining_time": "3:17:26", "throughput": 8815.34, "total_tokens": 33514464} +{"current_steps": 49730, "total_steps": 204665, "loss": 0.1515, "lr": 1.8780239209089254e-06, "epoch": 1.214912173551902, "percentage": 24.3, "elapsed_time": "1:03:22", "remaining_time": "3:17:25", "throughput": 8815.34, "total_tokens": 33517472} +{"current_steps": 49735, "total_steps": 204665, "loss": 0.0479, "lr": 1.8779831024287706e-06, "epoch": 1.2150343243837491, "percentage": 24.3, "elapsed_time": "1:03:22", "remaining_time": "3:17:25", "throughput": 8815.51, "total_tokens": 33521312} +{"current_steps": 49740, "total_steps": 204665, "loss": 0.0816, "lr": 1.8779422775636869e-06, "epoch": 1.215156475215596, "percentage": 24.3, "elapsed_time": "1:03:22", "remaining_time": "3:17:24", "throughput": 8815.63, "total_tokens": 33524896} +{"current_steps": 49745, "total_steps": 204665, "loss": 0.0805, "lr": 1.8779014463139706e-06, "epoch": 1.2152786260474433, "percentage": 24.31, "elapsed_time": "1:03:23", "remaining_time": "3:17:24", "throughput": 8815.67, "total_tokens": 33528096} +{"current_steps": 49750, "total_steps": 204665, "loss": 0.094, "lr": 1.877860608679919e-06, "epoch": 1.2154007768792905, "percentage": 24.31, "elapsed_time": "1:03:23", "remaining_time": "3:17:23", "throughput": 8815.67, "total_tokens": 33531104} +{"current_steps": 49755, "total_steps": 204665, "loss": 0.0978, "lr": 1.8778197646618285e-06, "epoch": 1.2155229277111377, "percentage": 24.31, "elapsed_time": "1:03:23", "remaining_time": "3:17:23", "throughput": 8815.76, "total_tokens": 33534496} +{"current_steps": 49760, "total_steps": 204665, "loss": 0.096, "lr": 1.8777789142599968e-06, "epoch": 1.2156450785429849, "percentage": 24.31, "elapsed_time": "1:03:24", "remaining_time": "3:17:22", "throughput": 8815.8, "total_tokens": 33537696} +{"current_steps": 49765, "total_steps": 204665, "loss": 0.2209, "lr": 1.8777380574747208e-06, "epoch": 1.215767229374832, "percentage": 24.32, "elapsed_time": "1:03:24", "remaining_time": "3:17:22", "throughput": 8815.86, "total_tokens": 33541024} +{"current_steps": 49770, "total_steps": 204665, "loss": 0.1062, "lr": 1.8776971943062975e-06, "epoch": 1.2158893802066792, "percentage": 24.32, "elapsed_time": "1:03:24", "remaining_time": "3:17:21", "throughput": 8815.92, "total_tokens": 33544288} +{"current_steps": 49775, "total_steps": 204665, "loss": 0.0532, "lr": 1.8776563247550242e-06, "epoch": 1.2160115310385264, "percentage": 24.32, "elapsed_time": "1:03:25", "remaining_time": "3:17:21", "throughput": 8815.93, "total_tokens": 33547360} +{"current_steps": 49780, "total_steps": 204665, "loss": 0.0779, "lr": 1.877615448821198e-06, "epoch": 1.2161336818703736, "percentage": 24.32, "elapsed_time": "1:03:25", "remaining_time": "3:17:20", "throughput": 8816.18, "total_tokens": 33551520} +{"current_steps": 49785, "total_steps": 204665, "loss": 0.0497, "lr": 1.8775745665051161e-06, "epoch": 1.2162558327022208, "percentage": 24.33, "elapsed_time": "1:03:26", "remaining_time": "3:17:20", "throughput": 8816.18, "total_tokens": 33554528} +{"current_steps": 49790, "total_steps": 204665, "loss": 0.0029, "lr": 1.8775336778070762e-06, "epoch": 1.2163779835340678, "percentage": 24.33, "elapsed_time": "1:03:26", "remaining_time": "3:17:19", "throughput": 8816.27, "total_tokens": 33557984} +{"current_steps": 49795, "total_steps": 204665, "loss": 0.0614, "lr": 1.877492782727375e-06, "epoch": 1.216500134365915, "percentage": 24.33, "elapsed_time": "1:03:26", "remaining_time": "3:17:19", "throughput": 8816.25, "total_tokens": 33560928} +{"current_steps": 49800, "total_steps": 204665, "loss": 0.1512, "lr": 1.8774518812663104e-06, "epoch": 1.2166222851977622, "percentage": 24.33, "elapsed_time": "1:03:27", "remaining_time": "3:17:18", "throughput": 8816.33, "total_tokens": 33564256} +{"current_steps": 49805, "total_steps": 204665, "loss": 0.0401, "lr": 1.8774109734241798e-06, "epoch": 1.2167444360296094, "percentage": 24.33, "elapsed_time": "1:03:27", "remaining_time": "3:17:18", "throughput": 8816.4, "total_tokens": 33567584} +{"current_steps": 49810, "total_steps": 204665, "loss": 0.0408, "lr": 1.8773700592012806e-06, "epoch": 1.2168665868614565, "percentage": 24.34, "elapsed_time": "1:03:27", "remaining_time": "3:17:17", "throughput": 8816.48, "total_tokens": 33570976} +{"current_steps": 49815, "total_steps": 204665, "loss": 0.024, "lr": 1.8773291385979104e-06, "epoch": 1.2169887376933037, "percentage": 24.34, "elapsed_time": "1:03:28", "remaining_time": "3:17:17", "throughput": 8816.49, "total_tokens": 33573984} +{"current_steps": 49820, "total_steps": 204665, "loss": 0.1288, "lr": 1.8772882116143667e-06, "epoch": 1.217110888525151, "percentage": 24.34, "elapsed_time": "1:03:28", "remaining_time": "3:17:16", "throughput": 8816.61, "total_tokens": 33577504} +{"current_steps": 49825, "total_steps": 204665, "loss": 0.1204, "lr": 1.8772472782509473e-06, "epoch": 1.2172330393569981, "percentage": 24.34, "elapsed_time": "1:03:28", "remaining_time": "3:17:16", "throughput": 8816.7, "total_tokens": 33580960} +{"current_steps": 49830, "total_steps": 204665, "loss": 0.1673, "lr": 1.8772063385079493e-06, "epoch": 1.217355190188845, "percentage": 24.35, "elapsed_time": "1:03:29", "remaining_time": "3:17:15", "throughput": 8816.72, "total_tokens": 33584032} +{"current_steps": 49835, "total_steps": 204665, "loss": 0.0721, "lr": 1.877165392385671e-06, "epoch": 1.2174773410206923, "percentage": 24.35, "elapsed_time": "1:03:29", "remaining_time": "3:17:15", "throughput": 8816.78, "total_tokens": 33587296} +{"current_steps": 49840, "total_steps": 204665, "loss": 0.0028, "lr": 1.8771244398844104e-06, "epoch": 1.2175994918525395, "percentage": 24.35, "elapsed_time": "1:03:29", "remaining_time": "3:17:14", "throughput": 8816.8, "total_tokens": 33590368} +{"current_steps": 49845, "total_steps": 204665, "loss": 0.0344, "lr": 1.8770834810044646e-06, "epoch": 1.2177216426843867, "percentage": 24.35, "elapsed_time": "1:03:30", "remaining_time": "3:17:14", "throughput": 8816.86, "total_tokens": 33593696} +{"current_steps": 49850, "total_steps": 204665, "loss": 0.0828, "lr": 1.8770425157461318e-06, "epoch": 1.2178437935162338, "percentage": 24.36, "elapsed_time": "1:03:30", "remaining_time": "3:17:13", "throughput": 8816.92, "total_tokens": 33597024} +{"current_steps": 49855, "total_steps": 204665, "loss": 0.0105, "lr": 1.8770015441097103e-06, "epoch": 1.217965944348081, "percentage": 24.36, "elapsed_time": "1:03:30", "remaining_time": "3:17:13", "throughput": 8816.99, "total_tokens": 33600288} +{"current_steps": 49860, "total_steps": 204665, "loss": 0.2466, "lr": 1.8769605660954975e-06, "epoch": 1.2180880951799282, "percentage": 24.36, "elapsed_time": "1:03:31", "remaining_time": "3:17:12", "throughput": 8816.97, "total_tokens": 33603232} +{"current_steps": 49865, "total_steps": 204665, "loss": 0.0755, "lr": 1.8769195817037916e-06, "epoch": 1.2182102460117754, "percentage": 24.36, "elapsed_time": "1:03:31", "remaining_time": "3:17:12", "throughput": 8817.07, "total_tokens": 33606688} +{"current_steps": 49870, "total_steps": 204665, "loss": 0.0893, "lr": 1.8768785909348904e-06, "epoch": 1.2183323968436226, "percentage": 24.37, "elapsed_time": "1:03:31", "remaining_time": "3:17:12", "throughput": 8817.15, "total_tokens": 33610080} +{"current_steps": 49875, "total_steps": 204665, "loss": 0.0612, "lr": 1.8768375937890926e-06, "epoch": 1.2184545476754698, "percentage": 24.37, "elapsed_time": "1:03:32", "remaining_time": "3:17:11", "throughput": 8817.2, "total_tokens": 33613280} +{"current_steps": 49880, "total_steps": 204665, "loss": 0.0436, "lr": 1.8767965902666956e-06, "epoch": 1.2185766985073168, "percentage": 24.37, "elapsed_time": "1:03:32", "remaining_time": "3:17:11", "throughput": 8817.18, "total_tokens": 33616224} +{"current_steps": 49885, "total_steps": 204665, "loss": 0.1213, "lr": 1.8767555803679981e-06, "epoch": 1.218698849339164, "percentage": 24.37, "elapsed_time": "1:03:32", "remaining_time": "3:17:10", "throughput": 8817.28, "total_tokens": 33619680} +{"current_steps": 49890, "total_steps": 204665, "loss": 0.1133, "lr": 1.8767145640932984e-06, "epoch": 1.2188210001710111, "percentage": 24.38, "elapsed_time": "1:03:33", "remaining_time": "3:17:10", "throughput": 8817.32, "total_tokens": 33622880} +{"current_steps": 49895, "total_steps": 204665, "loss": 0.0274, "lr": 1.8766735414428943e-06, "epoch": 1.2189431510028583, "percentage": 24.38, "elapsed_time": "1:03:33", "remaining_time": "3:17:09", "throughput": 8817.37, "total_tokens": 33626080} +{"current_steps": 49900, "total_steps": 204665, "loss": 0.1154, "lr": 1.8766325124170845e-06, "epoch": 1.2190653018347055, "percentage": 24.38, "elapsed_time": "1:03:33", "remaining_time": "3:17:09", "throughput": 8817.57, "total_tokens": 33630048} +{"current_steps": 49905, "total_steps": 204665, "loss": 0.1555, "lr": 1.8765914770161676e-06, "epoch": 1.2191874526665527, "percentage": 24.38, "elapsed_time": "1:03:34", "remaining_time": "3:17:08", "throughput": 8817.64, "total_tokens": 33633376} +{"current_steps": 49910, "total_steps": 204665, "loss": 0.1089, "lr": 1.8765504352404414e-06, "epoch": 1.2193096034984, "percentage": 24.39, "elapsed_time": "1:03:34", "remaining_time": "3:17:08", "throughput": 8818.16, "total_tokens": 33638944} +{"current_steps": 49915, "total_steps": 204665, "loss": 0.0729, "lr": 1.8765093870902046e-06, "epoch": 1.2194317543302469, "percentage": 24.39, "elapsed_time": "1:03:35", "remaining_time": "3:17:07", "throughput": 8818.21, "total_tokens": 33642208} +{"current_steps": 49920, "total_steps": 204665, "loss": 0.0803, "lr": 1.8764683325657558e-06, "epoch": 1.219553905162094, "percentage": 24.39, "elapsed_time": "1:03:35", "remaining_time": "3:17:07", "throughput": 8818.42, "total_tokens": 33646240} +{"current_steps": 49925, "total_steps": 204665, "loss": 0.0596, "lr": 1.8764272716673936e-06, "epoch": 1.2196760559939412, "percentage": 24.39, "elapsed_time": "1:03:35", "remaining_time": "3:17:06", "throughput": 8818.57, "total_tokens": 33649888} +{"current_steps": 49930, "total_steps": 204665, "loss": 0.1279, "lr": 1.8763862043954167e-06, "epoch": 1.2197982068257884, "percentage": 24.4, "elapsed_time": "1:03:36", "remaining_time": "3:17:06", "throughput": 8818.54, "total_tokens": 33652768} +{"current_steps": 49935, "total_steps": 204665, "loss": 0.03, "lr": 1.8763451307501234e-06, "epoch": 1.2199203576576356, "percentage": 24.4, "elapsed_time": "1:03:36", "remaining_time": "3:17:05", "throughput": 8818.65, "total_tokens": 33656288} +{"current_steps": 49940, "total_steps": 204665, "loss": 0.014, "lr": 1.8763040507318126e-06, "epoch": 1.2200425084894828, "percentage": 24.4, "elapsed_time": "1:03:36", "remaining_time": "3:17:05", "throughput": 8818.73, "total_tokens": 33659616} +{"current_steps": 49945, "total_steps": 204665, "loss": 0.1982, "lr": 1.8762629643407832e-06, "epoch": 1.22016465932133, "percentage": 24.4, "elapsed_time": "1:03:37", "remaining_time": "3:17:04", "throughput": 8818.74, "total_tokens": 33662688} +{"current_steps": 49950, "total_steps": 204665, "loss": 0.0014, "lr": 1.876221871577334e-06, "epoch": 1.2202868101531772, "percentage": 24.41, "elapsed_time": "1:03:37", "remaining_time": "3:17:04", "throughput": 8818.81, "total_tokens": 33666016} +{"current_steps": 49955, "total_steps": 204665, "loss": 0.073, "lr": 1.8761807724417633e-06, "epoch": 1.2204089609850244, "percentage": 24.41, "elapsed_time": "1:03:37", "remaining_time": "3:17:03", "throughput": 8818.91, "total_tokens": 33669536} +{"current_steps": 49960, "total_steps": 204665, "loss": 0.0847, "lr": 1.8761396669343705e-06, "epoch": 1.2205311118168716, "percentage": 24.41, "elapsed_time": "1:03:38", "remaining_time": "3:17:03", "throughput": 8818.94, "total_tokens": 33672672} +{"current_steps": 49965, "total_steps": 204665, "loss": 0.0568, "lr": 1.8760985550554545e-06, "epoch": 1.2206532626487188, "percentage": 24.41, "elapsed_time": "1:03:38", "remaining_time": "3:17:02", "throughput": 8818.97, "total_tokens": 33675808} +{"current_steps": 49970, "total_steps": 204665, "loss": 0.0013, "lr": 1.876057436805314e-06, "epoch": 1.2207754134805657, "percentage": 24.42, "elapsed_time": "1:03:38", "remaining_time": "3:17:02", "throughput": 8818.97, "total_tokens": 33678816} +{"current_steps": 49975, "total_steps": 204665, "loss": 0.0405, "lr": 1.8760163121842483e-06, "epoch": 1.220897564312413, "percentage": 24.42, "elapsed_time": "1:03:39", "remaining_time": "3:17:01", "throughput": 8819.07, "total_tokens": 33682272} +{"current_steps": 49980, "total_steps": 204665, "loss": 0.2167, "lr": 1.8759751811925564e-06, "epoch": 1.22101971514426, "percentage": 24.42, "elapsed_time": "1:03:39", "remaining_time": "3:17:01", "throughput": 8819.18, "total_tokens": 33685792} +{"current_steps": 49985, "total_steps": 204665, "loss": 0.1503, "lr": 1.875934043830537e-06, "epoch": 1.2211418659761073, "percentage": 24.42, "elapsed_time": "1:03:39", "remaining_time": "3:17:00", "throughput": 8819.23, "total_tokens": 33689056} +{"current_steps": 49990, "total_steps": 204665, "loss": 0.1675, "lr": 1.87589290009849e-06, "epoch": 1.2212640168079545, "percentage": 24.43, "elapsed_time": "1:03:40", "remaining_time": "3:17:00", "throughput": 8819.49, "total_tokens": 33693280} +{"current_steps": 49995, "total_steps": 204665, "loss": 0.0654, "lr": 1.8758517499967144e-06, "epoch": 1.2213861676398017, "percentage": 24.43, "elapsed_time": "1:03:40", "remaining_time": "3:17:00", "throughput": 8819.6, "total_tokens": 33696800} +{"current_steps": 50000, "total_steps": 204665, "loss": 0.1313, "lr": 1.8758105935255089e-06, "epoch": 1.2215083184716489, "percentage": 24.43, "elapsed_time": "1:03:41", "remaining_time": "3:16:59", "throughput": 8819.71, "total_tokens": 33700320} +{"current_steps": 50005, "total_steps": 204665, "loss": 0.1726, "lr": 1.8757694306851732e-06, "epoch": 1.2216304693034958, "percentage": 24.43, "elapsed_time": "1:03:41", "remaining_time": "3:16:59", "throughput": 8819.81, "total_tokens": 33703776} +{"current_steps": 50010, "total_steps": 204665, "loss": 0.0512, "lr": 1.8757282614760071e-06, "epoch": 1.221752620135343, "percentage": 24.44, "elapsed_time": "1:03:41", "remaining_time": "3:16:58", "throughput": 8819.95, "total_tokens": 33707488} +{"current_steps": 50015, "total_steps": 204665, "loss": 0.0313, "lr": 1.8756870858983089e-06, "epoch": 1.2218747709671902, "percentage": 24.44, "elapsed_time": "1:03:42", "remaining_time": "3:16:58", "throughput": 8819.95, "total_tokens": 33710496} +{"current_steps": 50020, "total_steps": 204665, "loss": 0.079, "lr": 1.8756459039523791e-06, "epoch": 1.2219969217990374, "percentage": 24.44, "elapsed_time": "1:03:42", "remaining_time": "3:16:57", "throughput": 8820.21, "total_tokens": 33714720} +{"current_steps": 50025, "total_steps": 204665, "loss": 0.002, "lr": 1.8756047156385169e-06, "epoch": 1.2221190726308846, "percentage": 24.44, "elapsed_time": "1:03:42", "remaining_time": "3:16:57", "throughput": 8820.27, "total_tokens": 33717984} +{"current_steps": 50030, "total_steps": 204665, "loss": 0.1796, "lr": 1.8755635209570213e-06, "epoch": 1.2222412234627318, "percentage": 24.44, "elapsed_time": "1:03:43", "remaining_time": "3:16:56", "throughput": 8820.26, "total_tokens": 33720928} +{"current_steps": 50035, "total_steps": 204665, "loss": 0.1181, "lr": 1.8755223199081924e-06, "epoch": 1.222363374294579, "percentage": 24.45, "elapsed_time": "1:03:43", "remaining_time": "3:16:56", "throughput": 8820.39, "total_tokens": 33724576} +{"current_steps": 50040, "total_steps": 204665, "loss": 0.0019, "lr": 1.8754811124923298e-06, "epoch": 1.2224855251264262, "percentage": 24.45, "elapsed_time": "1:03:43", "remaining_time": "3:16:55", "throughput": 8820.57, "total_tokens": 33728480} +{"current_steps": 50045, "total_steps": 204665, "loss": 0.0013, "lr": 1.8754398987097331e-06, "epoch": 1.2226076759582734, "percentage": 24.45, "elapsed_time": "1:03:44", "remaining_time": "3:16:55", "throughput": 8820.63, "total_tokens": 33731744} +{"current_steps": 50050, "total_steps": 204665, "loss": 0.0598, "lr": 1.8753986785607019e-06, "epoch": 1.2227298267901205, "percentage": 24.45, "elapsed_time": "1:03:44", "remaining_time": "3:16:54", "throughput": 8820.8, "total_tokens": 33735584} +{"current_steps": 50055, "total_steps": 204665, "loss": 0.1249, "lr": 1.8753574520455362e-06, "epoch": 1.2228519776219675, "percentage": 24.46, "elapsed_time": "1:03:44", "remaining_time": "3:16:54", "throughput": 8821.0, "total_tokens": 33739552} +{"current_steps": 50060, "total_steps": 204665, "loss": 0.09, "lr": 1.8753162191645354e-06, "epoch": 1.2229741284538147, "percentage": 24.46, "elapsed_time": "1:03:45", "remaining_time": "3:16:53", "throughput": 8821.03, "total_tokens": 33742688} +{"current_steps": 50065, "total_steps": 204665, "loss": 0.0686, "lr": 1.8752749799179997e-06, "epoch": 1.223096279285662, "percentage": 24.46, "elapsed_time": "1:03:45", "remaining_time": "3:16:53", "throughput": 8821.11, "total_tokens": 33746080} +{"current_steps": 50070, "total_steps": 204665, "loss": 0.1781, "lr": 1.8752337343062291e-06, "epoch": 1.223218430117509, "percentage": 24.46, "elapsed_time": "1:03:45", "remaining_time": "3:16:52", "throughput": 8821.13, "total_tokens": 33749216} +{"current_steps": 50075, "total_steps": 204665, "loss": 0.0901, "lr": 1.8751924823295232e-06, "epoch": 1.2233405809493563, "percentage": 24.47, "elapsed_time": "1:03:46", "remaining_time": "3:16:52", "throughput": 8821.27, "total_tokens": 33752864} +{"current_steps": 50080, "total_steps": 204665, "loss": 0.0491, "lr": 1.8751512239881824e-06, "epoch": 1.2234627317812035, "percentage": 24.47, "elapsed_time": "1:03:46", "remaining_time": "3:16:51", "throughput": 8821.34, "total_tokens": 33756192} +{"current_steps": 50085, "total_steps": 204665, "loss": 0.0625, "lr": 1.8751099592825063e-06, "epoch": 1.2235848826130507, "percentage": 24.47, "elapsed_time": "1:03:46", "remaining_time": "3:16:51", "throughput": 8821.4, "total_tokens": 33759456} +{"current_steps": 50090, "total_steps": 204665, "loss": 0.0798, "lr": 1.8750686882127952e-06, "epoch": 1.2237070334448978, "percentage": 24.47, "elapsed_time": "1:03:47", "remaining_time": "3:16:50", "throughput": 8821.51, "total_tokens": 33762912} +{"current_steps": 50095, "total_steps": 204665, "loss": 0.0012, "lr": 1.8750274107793492e-06, "epoch": 1.2238291842767448, "percentage": 24.48, "elapsed_time": "1:03:47", "remaining_time": "3:16:50", "throughput": 8821.58, "total_tokens": 33766304} +{"current_steps": 50100, "total_steps": 204665, "loss": 0.1302, "lr": 1.8749861269824688e-06, "epoch": 1.223951335108592, "percentage": 24.48, "elapsed_time": "1:03:48", "remaining_time": "3:16:49", "throughput": 8821.67, "total_tokens": 33769696} +{"current_steps": 50105, "total_steps": 204665, "loss": 0.0476, "lr": 1.8749448368224536e-06, "epoch": 1.2240734859404392, "percentage": 24.48, "elapsed_time": "1:03:48", "remaining_time": "3:16:49", "throughput": 8821.75, "total_tokens": 33773088} +{"current_steps": 50110, "total_steps": 204665, "loss": 0.0521, "lr": 1.8749035402996042e-06, "epoch": 1.2241956367722864, "percentage": 24.48, "elapsed_time": "1:03:48", "remaining_time": "3:16:49", "throughput": 8821.91, "total_tokens": 33776864} +{"current_steps": 50115, "total_steps": 204665, "loss": 0.1457, "lr": 1.8748622374142213e-06, "epoch": 1.2243177876041336, "percentage": 24.49, "elapsed_time": "1:03:49", "remaining_time": "3:16:48", "throughput": 8821.99, "total_tokens": 33780192} +{"current_steps": 50120, "total_steps": 204665, "loss": 0.108, "lr": 1.8748209281666047e-06, "epoch": 1.2244399384359808, "percentage": 24.49, "elapsed_time": "1:03:49", "remaining_time": "3:16:48", "throughput": 8822.1, "total_tokens": 33783712} +{"current_steps": 50125, "total_steps": 204665, "loss": 0.0042, "lr": 1.874779612557055e-06, "epoch": 1.224562089267828, "percentage": 24.49, "elapsed_time": "1:03:49", "remaining_time": "3:16:47", "throughput": 8822.22, "total_tokens": 33787296} +{"current_steps": 50130, "total_steps": 204665, "loss": 0.1485, "lr": 1.8747382905858728e-06, "epoch": 1.2246842400996751, "percentage": 24.49, "elapsed_time": "1:03:50", "remaining_time": "3:16:47", "throughput": 8822.31, "total_tokens": 33790752} +{"current_steps": 50135, "total_steps": 204665, "loss": 0.0507, "lr": 1.8746969622533584e-06, "epoch": 1.2248063909315223, "percentage": 24.5, "elapsed_time": "1:03:50", "remaining_time": "3:16:46", "throughput": 8822.42, "total_tokens": 33794272} +{"current_steps": 50140, "total_steps": 204665, "loss": 0.0425, "lr": 1.8746556275598122e-06, "epoch": 1.2249285417633695, "percentage": 24.5, "elapsed_time": "1:03:50", "remaining_time": "3:16:46", "throughput": 8822.51, "total_tokens": 33797664} +{"current_steps": 50145, "total_steps": 204665, "loss": 0.1256, "lr": 1.8746142865055353e-06, "epoch": 1.2250506925952165, "percentage": 24.5, "elapsed_time": "1:03:51", "remaining_time": "3:16:45", "throughput": 8822.65, "total_tokens": 33801312} +{"current_steps": 50150, "total_steps": 204665, "loss": 0.0746, "lr": 1.8745729390908278e-06, "epoch": 1.2251728434270637, "percentage": 24.5, "elapsed_time": "1:03:51", "remaining_time": "3:16:45", "throughput": 8822.77, "total_tokens": 33804896} +{"current_steps": 50155, "total_steps": 204665, "loss": 0.1385, "lr": 1.8745315853159909e-06, "epoch": 1.2252949942589109, "percentage": 24.51, "elapsed_time": "1:03:51", "remaining_time": "3:16:44", "throughput": 8822.99, "total_tokens": 33808928} +{"current_steps": 50160, "total_steps": 204665, "loss": 0.0433, "lr": 1.874490225181325e-06, "epoch": 1.225417145090758, "percentage": 24.51, "elapsed_time": "1:03:52", "remaining_time": "3:16:44", "throughput": 8822.96, "total_tokens": 33811808} +{"current_steps": 50165, "total_steps": 204665, "loss": 0.0743, "lr": 1.874448858687131e-06, "epoch": 1.2255392959226052, "percentage": 24.51, "elapsed_time": "1:03:52", "remaining_time": "3:16:43", "throughput": 8823.02, "total_tokens": 33815072} +{"current_steps": 50170, "total_steps": 204665, "loss": 0.0852, "lr": 1.8744074858337097e-06, "epoch": 1.2256614467544524, "percentage": 24.51, "elapsed_time": "1:03:52", "remaining_time": "3:16:43", "throughput": 8823.15, "total_tokens": 33818656} +{"current_steps": 50175, "total_steps": 204665, "loss": 0.0143, "lr": 1.874366106621362e-06, "epoch": 1.2257835975862996, "percentage": 24.52, "elapsed_time": "1:03:53", "remaining_time": "3:16:42", "throughput": 8823.2, "total_tokens": 33821920} +{"current_steps": 50180, "total_steps": 204665, "loss": 0.0373, "lr": 1.8743247210503887e-06, "epoch": 1.2259057484181468, "percentage": 24.52, "elapsed_time": "1:03:53", "remaining_time": "3:16:42", "throughput": 8823.27, "total_tokens": 33825248} +{"current_steps": 50185, "total_steps": 204665, "loss": 0.1028, "lr": 1.874283329121091e-06, "epoch": 1.2260278992499938, "percentage": 24.52, "elapsed_time": "1:03:53", "remaining_time": "3:16:41", "throughput": 8823.38, "total_tokens": 33828768} +{"current_steps": 50190, "total_steps": 204665, "loss": 0.1173, "lr": 1.8742419308337695e-06, "epoch": 1.226150050081841, "percentage": 24.52, "elapsed_time": "1:03:54", "remaining_time": "3:16:41", "throughput": 8823.49, "total_tokens": 33832288} +{"current_steps": 50195, "total_steps": 204665, "loss": 0.0366, "lr": 1.874200526188726e-06, "epoch": 1.2262722009136882, "percentage": 24.53, "elapsed_time": "1:03:54", "remaining_time": "3:16:40", "throughput": 8823.57, "total_tokens": 33835616} +{"current_steps": 50200, "total_steps": 204665, "loss": 0.027, "lr": 1.8741591151862607e-06, "epoch": 1.2263943517455353, "percentage": 24.53, "elapsed_time": "1:03:55", "remaining_time": "3:16:40", "throughput": 8823.77, "total_tokens": 33839520} +{"current_steps": 50205, "total_steps": 204665, "loss": 0.0038, "lr": 1.8741176978266755e-06, "epoch": 1.2265165025773825, "percentage": 24.53, "elapsed_time": "1:03:55", "remaining_time": "3:16:39", "throughput": 8823.81, "total_tokens": 33842720} +{"current_steps": 50210, "total_steps": 204665, "loss": 0.0015, "lr": 1.8740762741102709e-06, "epoch": 1.2266386534092297, "percentage": 24.53, "elapsed_time": "1:03:55", "remaining_time": "3:16:39", "throughput": 8823.92, "total_tokens": 33846240} +{"current_steps": 50215, "total_steps": 204665, "loss": 0.1725, "lr": 1.874034844037349e-06, "epoch": 1.226760804241077, "percentage": 24.54, "elapsed_time": "1:03:56", "remaining_time": "3:16:38", "throughput": 8824.05, "total_tokens": 33849824} +{"current_steps": 50220, "total_steps": 204665, "loss": 0.1997, "lr": 1.8739934076082102e-06, "epoch": 1.226882955072924, "percentage": 24.54, "elapsed_time": "1:03:56", "remaining_time": "3:16:38", "throughput": 8824.1, "total_tokens": 33853024} +{"current_steps": 50225, "total_steps": 204665, "loss": 0.0583, "lr": 1.8739519648231568e-06, "epoch": 1.2270051059047713, "percentage": 24.54, "elapsed_time": "1:03:56", "remaining_time": "3:16:37", "throughput": 8824.23, "total_tokens": 33856672} +{"current_steps": 50230, "total_steps": 204665, "loss": 0.0894, "lr": 1.8739105156824893e-06, "epoch": 1.2271272567366185, "percentage": 24.54, "elapsed_time": "1:03:57", "remaining_time": "3:16:37", "throughput": 8824.25, "total_tokens": 33859744} +{"current_steps": 50235, "total_steps": 204665, "loss": 0.0098, "lr": 1.8738690601865094e-06, "epoch": 1.2272494075684655, "percentage": 24.54, "elapsed_time": "1:03:57", "remaining_time": "3:16:36", "throughput": 8824.35, "total_tokens": 33863200} +{"current_steps": 50240, "total_steps": 204665, "loss": 0.1171, "lr": 1.8738275983355188e-06, "epoch": 1.2273715584003126, "percentage": 24.55, "elapsed_time": "1:03:57", "remaining_time": "3:16:36", "throughput": 8824.38, "total_tokens": 33866336} +{"current_steps": 50245, "total_steps": 204665, "loss": 0.1202, "lr": 1.8737861301298189e-06, "epoch": 1.2274937092321598, "percentage": 24.55, "elapsed_time": "1:03:58", "remaining_time": "3:16:35", "throughput": 8824.46, "total_tokens": 33869728} +{"current_steps": 50250, "total_steps": 204665, "loss": 0.0908, "lr": 1.8737446555697112e-06, "epoch": 1.227615860064007, "percentage": 24.55, "elapsed_time": "1:03:58", "remaining_time": "3:16:35", "throughput": 8824.64, "total_tokens": 33873568} +{"current_steps": 50255, "total_steps": 204665, "loss": 0.1399, "lr": 1.8737031746554972e-06, "epoch": 1.2277380108958542, "percentage": 24.55, "elapsed_time": "1:03:58", "remaining_time": "3:16:35", "throughput": 8824.8, "total_tokens": 33877344} +{"current_steps": 50260, "total_steps": 204665, "loss": 0.0458, "lr": 1.8736616873874788e-06, "epoch": 1.2278601617277014, "percentage": 24.56, "elapsed_time": "1:03:59", "remaining_time": "3:16:34", "throughput": 8824.87, "total_tokens": 33880672} +{"current_steps": 50265, "total_steps": 204665, "loss": 0.0731, "lr": 1.8736201937659577e-06, "epoch": 1.2279823125595486, "percentage": 24.56, "elapsed_time": "1:03:59", "remaining_time": "3:16:34", "throughput": 8825.05, "total_tokens": 33884512} +{"current_steps": 50270, "total_steps": 204665, "loss": 0.0707, "lr": 1.8735786937912358e-06, "epoch": 1.2281044633913958, "percentage": 24.56, "elapsed_time": "1:03:59", "remaining_time": "3:16:33", "throughput": 8825.27, "total_tokens": 33888544} +{"current_steps": 50275, "total_steps": 204665, "loss": 0.0657, "lr": 1.8735371874636142e-06, "epoch": 1.2282266142232428, "percentage": 24.56, "elapsed_time": "1:04:00", "remaining_time": "3:16:33", "throughput": 8825.3, "total_tokens": 33891680} +{"current_steps": 50280, "total_steps": 204665, "loss": 0.0042, "lr": 1.8734956747833955e-06, "epoch": 1.22834876505509, "percentage": 24.57, "elapsed_time": "1:04:00", "remaining_time": "3:16:32", "throughput": 8825.49, "total_tokens": 33895584} +{"current_steps": 50285, "total_steps": 204665, "loss": 0.044, "lr": 1.8734541557508811e-06, "epoch": 1.2284709158869371, "percentage": 24.57, "elapsed_time": "1:04:00", "remaining_time": "3:16:32", "throughput": 8825.54, "total_tokens": 33898784} +{"current_steps": 50290, "total_steps": 204665, "loss": 0.1212, "lr": 1.8734126303663733e-06, "epoch": 1.2285930667187843, "percentage": 24.57, "elapsed_time": "1:04:01", "remaining_time": "3:16:31", "throughput": 8825.6, "total_tokens": 33902112} +{"current_steps": 50295, "total_steps": 204665, "loss": 0.0733, "lr": 1.873371098630174e-06, "epoch": 1.2287152175506315, "percentage": 24.57, "elapsed_time": "1:04:01", "remaining_time": "3:16:31", "throughput": 8825.65, "total_tokens": 33905312} +{"current_steps": 50300, "total_steps": 204665, "loss": 0.1261, "lr": 1.8733295605425852e-06, "epoch": 1.2288373683824787, "percentage": 24.58, "elapsed_time": "1:04:02", "remaining_time": "3:16:30", "throughput": 8825.76, "total_tokens": 33908832} +{"current_steps": 50305, "total_steps": 204665, "loss": 0.1365, "lr": 1.8732880161039088e-06, "epoch": 1.228959519214326, "percentage": 24.58, "elapsed_time": "1:04:02", "remaining_time": "3:16:30", "throughput": 8825.86, "total_tokens": 33912352} +{"current_steps": 50310, "total_steps": 204665, "loss": 0.0527, "lr": 1.873246465314447e-06, "epoch": 1.229081670046173, "percentage": 24.58, "elapsed_time": "1:04:02", "remaining_time": "3:16:29", "throughput": 8825.86, "total_tokens": 33915360} +{"current_steps": 50315, "total_steps": 204665, "loss": 0.1051, "lr": 1.873204908174502e-06, "epoch": 1.2292038208780203, "percentage": 24.58, "elapsed_time": "1:04:03", "remaining_time": "3:16:29", "throughput": 8826.09, "total_tokens": 33919456} +{"current_steps": 50320, "total_steps": 204665, "loss": 0.2591, "lr": 1.8731633446843765e-06, "epoch": 1.2293259717098675, "percentage": 24.59, "elapsed_time": "1:04:03", "remaining_time": "3:16:28", "throughput": 8826.09, "total_tokens": 33922464} +{"current_steps": 50325, "total_steps": 204665, "loss": 0.1767, "lr": 1.873121774844372e-06, "epoch": 1.2294481225417144, "percentage": 24.59, "elapsed_time": "1:04:03", "remaining_time": "3:16:28", "throughput": 8826.09, "total_tokens": 33925408} +{"current_steps": 50330, "total_steps": 204665, "loss": 0.0073, "lr": 1.873080198654791e-06, "epoch": 1.2295702733735616, "percentage": 24.59, "elapsed_time": "1:04:04", "remaining_time": "3:16:27", "throughput": 8826.21, "total_tokens": 33928992} +{"current_steps": 50335, "total_steps": 204665, "loss": 0.0729, "lr": 1.873038616115936e-06, "epoch": 1.2296924242054088, "percentage": 24.59, "elapsed_time": "1:04:04", "remaining_time": "3:16:27", "throughput": 8826.31, "total_tokens": 33932448} +{"current_steps": 50340, "total_steps": 204665, "loss": 0.0924, "lr": 1.8729970272281092e-06, "epoch": 1.229814575037256, "percentage": 24.6, "elapsed_time": "1:04:04", "remaining_time": "3:16:26", "throughput": 8826.38, "total_tokens": 33935776} +{"current_steps": 50345, "total_steps": 204665, "loss": 0.0866, "lr": 1.8729554319916137e-06, "epoch": 1.2299367258691032, "percentage": 24.6, "elapsed_time": "1:04:05", "remaining_time": "3:16:26", "throughput": 8826.47, "total_tokens": 33939168} +{"current_steps": 50350, "total_steps": 204665, "loss": 0.0694, "lr": 1.872913830406751e-06, "epoch": 1.2300588767009504, "percentage": 24.6, "elapsed_time": "1:04:05", "remaining_time": "3:16:25", "throughput": 8826.52, "total_tokens": 33942432} +{"current_steps": 50355, "total_steps": 204665, "loss": 0.1368, "lr": 1.8728722224738244e-06, "epoch": 1.2301810275327976, "percentage": 24.6, "elapsed_time": "1:04:05", "remaining_time": "3:16:25", "throughput": 8826.51, "total_tokens": 33945376} +{"current_steps": 50360, "total_steps": 204665, "loss": 0.048, "lr": 1.8728306081931362e-06, "epoch": 1.2303031783646448, "percentage": 24.61, "elapsed_time": "1:04:06", "remaining_time": "3:16:24", "throughput": 8826.52, "total_tokens": 33948384} +{"current_steps": 50365, "total_steps": 204665, "loss": 0.0403, "lr": 1.8727889875649892e-06, "epoch": 1.2304253291964917, "percentage": 24.61, "elapsed_time": "1:04:06", "remaining_time": "3:16:24", "throughput": 8826.55, "total_tokens": 33951520} +{"current_steps": 50370, "total_steps": 204665, "loss": 0.1404, "lr": 1.8727473605896856e-06, "epoch": 1.230547480028339, "percentage": 24.61, "elapsed_time": "1:04:06", "remaining_time": "3:16:23", "throughput": 8826.66, "total_tokens": 33955040} +{"current_steps": 50375, "total_steps": 204665, "loss": 0.0039, "lr": 1.8727057272675286e-06, "epoch": 1.230669630860186, "percentage": 24.61, "elapsed_time": "1:04:07", "remaining_time": "3:16:23", "throughput": 8826.71, "total_tokens": 33958304} +{"current_steps": 50380, "total_steps": 204665, "loss": 0.1388, "lr": 1.8726640875988209e-06, "epoch": 1.2307917816920333, "percentage": 24.62, "elapsed_time": "1:04:07", "remaining_time": "3:16:22", "throughput": 8826.84, "total_tokens": 33961888} +{"current_steps": 50385, "total_steps": 204665, "loss": 0.1215, "lr": 1.8726224415838652e-06, "epoch": 1.2309139325238805, "percentage": 24.62, "elapsed_time": "1:04:07", "remaining_time": "3:16:22", "throughput": 8826.89, "total_tokens": 33965152} +{"current_steps": 50390, "total_steps": 204665, "loss": 0.0899, "lr": 1.8725807892229644e-06, "epoch": 1.2310360833557277, "percentage": 24.62, "elapsed_time": "1:04:08", "remaining_time": "3:16:21", "throughput": 8827.11, "total_tokens": 33969184} +{"current_steps": 50395, "total_steps": 204665, "loss": 0.0357, "lr": 1.8725391305164213e-06, "epoch": 1.2311582341875749, "percentage": 24.62, "elapsed_time": "1:04:08", "remaining_time": "3:16:21", "throughput": 8827.14, "total_tokens": 33972320} +{"current_steps": 50400, "total_steps": 204665, "loss": 0.0211, "lr": 1.8724974654645392e-06, "epoch": 1.231280385019422, "percentage": 24.63, "elapsed_time": "1:04:08", "remaining_time": "3:16:20", "throughput": 8827.23, "total_tokens": 33975776} +{"current_steps": 50405, "total_steps": 204665, "loss": 0.0521, "lr": 1.8724557940676206e-06, "epoch": 1.2314025358512692, "percentage": 24.63, "elapsed_time": "1:04:09", "remaining_time": "3:16:20", "throughput": 8827.22, "total_tokens": 33978720} +{"current_steps": 50410, "total_steps": 204665, "loss": 0.0016, "lr": 1.872414116325969e-06, "epoch": 1.2315246866831164, "percentage": 24.63, "elapsed_time": "1:04:09", "remaining_time": "3:16:19", "throughput": 8827.29, "total_tokens": 33982048} +{"current_steps": 50415, "total_steps": 204665, "loss": 0.1, "lr": 1.8723724322398874e-06, "epoch": 1.2316468375149634, "percentage": 24.63, "elapsed_time": "1:04:09", "remaining_time": "3:16:19", "throughput": 8827.27, "total_tokens": 33984928} +{"current_steps": 50420, "total_steps": 204665, "loss": 0.0651, "lr": 1.8723307418096782e-06, "epoch": 1.2317689883468106, "percentage": 24.64, "elapsed_time": "1:04:10", "remaining_time": "3:16:18", "throughput": 8827.31, "total_tokens": 33988128} +{"current_steps": 50425, "total_steps": 204665, "loss": 0.1015, "lr": 1.8722890450356457e-06, "epoch": 1.2318911391786578, "percentage": 24.64, "elapsed_time": "1:04:10", "remaining_time": "3:16:18", "throughput": 8827.38, "total_tokens": 33991456} +{"current_steps": 50430, "total_steps": 204665, "loss": 0.0219, "lr": 1.8722473419180926e-06, "epoch": 1.232013290010505, "percentage": 24.64, "elapsed_time": "1:04:11", "remaining_time": "3:16:17", "throughput": 8827.47, "total_tokens": 33994848} +{"current_steps": 50435, "total_steps": 204665, "loss": 0.11, "lr": 1.8722056324573226e-06, "epoch": 1.2321354408423522, "percentage": 24.64, "elapsed_time": "1:04:11", "remaining_time": "3:16:17", "throughput": 8827.5, "total_tokens": 33997984} +{"current_steps": 50440, "total_steps": 204665, "loss": 0.0329, "lr": 1.872163916653638e-06, "epoch": 1.2322575916741993, "percentage": 24.65, "elapsed_time": "1:04:11", "remaining_time": "3:16:16", "throughput": 8827.56, "total_tokens": 34001248} +{"current_steps": 50445, "total_steps": 204665, "loss": 0.0817, "lr": 1.8721221945073432e-06, "epoch": 1.2323797425060465, "percentage": 24.65, "elapsed_time": "1:04:12", "remaining_time": "3:16:16", "throughput": 8827.74, "total_tokens": 34005088} +{"current_steps": 50450, "total_steps": 204665, "loss": 0.1316, "lr": 1.872080466018741e-06, "epoch": 1.2325018933378937, "percentage": 24.65, "elapsed_time": "1:04:12", "remaining_time": "3:16:16", "throughput": 8827.83, "total_tokens": 34008544} +{"current_steps": 50455, "total_steps": 204665, "loss": 0.1039, "lr": 1.8720387311881352e-06, "epoch": 1.2326240441697407, "percentage": 24.65, "elapsed_time": "1:04:12", "remaining_time": "3:16:15", "throughput": 8827.98, "total_tokens": 34012192} +{"current_steps": 50460, "total_steps": 204665, "loss": 0.115, "lr": 1.8719969900158293e-06, "epoch": 1.2327461950015879, "percentage": 24.65, "elapsed_time": "1:04:13", "remaining_time": "3:16:15", "throughput": 8828.07, "total_tokens": 34015584} +{"current_steps": 50465, "total_steps": 204665, "loss": 0.0018, "lr": 1.8719552425021265e-06, "epoch": 1.232868345833435, "percentage": 24.66, "elapsed_time": "1:04:13", "remaining_time": "3:16:14", "throughput": 8828.1, "total_tokens": 34018720} +{"current_steps": 50470, "total_steps": 204665, "loss": 0.0622, "lr": 1.8719134886473308e-06, "epoch": 1.2329904966652823, "percentage": 24.66, "elapsed_time": "1:04:13", "remaining_time": "3:16:14", "throughput": 8828.19, "total_tokens": 34022176} +{"current_steps": 50475, "total_steps": 204665, "loss": 0.029, "lr": 1.8718717284517455e-06, "epoch": 1.2331126474971295, "percentage": 24.66, "elapsed_time": "1:04:14", "remaining_time": "3:16:13", "throughput": 8828.2, "total_tokens": 34025248} +{"current_steps": 50480, "total_steps": 204665, "loss": 0.1482, "lr": 1.871829961915675e-06, "epoch": 1.2332347983289766, "percentage": 24.66, "elapsed_time": "1:04:14", "remaining_time": "3:16:13", "throughput": 8828.3, "total_tokens": 34028704} +{"current_steps": 50485, "total_steps": 204665, "loss": 0.046, "lr": 1.871788189039422e-06, "epoch": 1.2333569491608238, "percentage": 24.67, "elapsed_time": "1:04:14", "remaining_time": "3:16:12", "throughput": 8828.45, "total_tokens": 34032416} +{"current_steps": 50490, "total_steps": 204665, "loss": 0.055, "lr": 1.8717464098232912e-06, "epoch": 1.233479099992671, "percentage": 24.67, "elapsed_time": "1:04:15", "remaining_time": "3:16:12", "throughput": 8828.72, "total_tokens": 34036704} +{"current_steps": 50495, "total_steps": 204665, "loss": 0.1558, "lr": 1.8717046242675858e-06, "epoch": 1.2336012508245182, "percentage": 24.67, "elapsed_time": "1:04:15", "remaining_time": "3:16:11", "throughput": 8828.74, "total_tokens": 34039776} +{"current_steps": 50500, "total_steps": 204665, "loss": 0.0559, "lr": 1.8716628323726099e-06, "epoch": 1.2337234016563654, "percentage": 24.67, "elapsed_time": "1:04:15", "remaining_time": "3:16:11", "throughput": 8828.8, "total_tokens": 34043040} +{"current_steps": 50505, "total_steps": 204665, "loss": 0.0497, "lr": 1.8716210341386676e-06, "epoch": 1.2338455524882124, "percentage": 24.68, "elapsed_time": "1:04:16", "remaining_time": "3:16:10", "throughput": 8828.8, "total_tokens": 34046048} +{"current_steps": 50510, "total_steps": 204665, "loss": 0.1009, "lr": 1.8715792295660623e-06, "epoch": 1.2339677033200596, "percentage": 24.68, "elapsed_time": "1:04:16", "remaining_time": "3:16:10", "throughput": 8828.94, "total_tokens": 34049696} +{"current_steps": 50515, "total_steps": 204665, "loss": 0.1194, "lr": 1.8715374186550989e-06, "epoch": 1.2340898541519068, "percentage": 24.68, "elapsed_time": "1:04:16", "remaining_time": "3:16:09", "throughput": 8828.95, "total_tokens": 34052768} +{"current_steps": 50520, "total_steps": 204665, "loss": 0.1266, "lr": 1.8714956014060808e-06, "epoch": 1.234212004983754, "percentage": 24.68, "elapsed_time": "1:04:17", "remaining_time": "3:16:09", "throughput": 8829.02, "total_tokens": 34056096} +{"current_steps": 50525, "total_steps": 204665, "loss": 0.1359, "lr": 1.8714537778193122e-06, "epoch": 1.2343341558156011, "percentage": 24.69, "elapsed_time": "1:04:17", "remaining_time": "3:16:08", "throughput": 8829.12, "total_tokens": 34059552} +{"current_steps": 50530, "total_steps": 204665, "loss": 0.0233, "lr": 1.8714119478950974e-06, "epoch": 1.2344563066474483, "percentage": 24.69, "elapsed_time": "1:04:17", "remaining_time": "3:16:08", "throughput": 8829.24, "total_tokens": 34063136} +{"current_steps": 50535, "total_steps": 204665, "loss": 0.1906, "lr": 1.8713701116337406e-06, "epoch": 1.2345784574792955, "percentage": 24.69, "elapsed_time": "1:04:18", "remaining_time": "3:16:07", "throughput": 8829.3, "total_tokens": 34066464} +{"current_steps": 50540, "total_steps": 204665, "loss": 0.0603, "lr": 1.8713282690355459e-06, "epoch": 1.2347006083111425, "percentage": 24.69, "elapsed_time": "1:04:18", "remaining_time": "3:16:07", "throughput": 8829.44, "total_tokens": 34070112} +{"current_steps": 50545, "total_steps": 204665, "loss": 0.0033, "lr": 1.8712864201008175e-06, "epoch": 1.2348227591429897, "percentage": 24.7, "elapsed_time": "1:04:19", "remaining_time": "3:16:06", "throughput": 8829.53, "total_tokens": 34073568} +{"current_steps": 50550, "total_steps": 204665, "loss": 0.1586, "lr": 1.87124456482986e-06, "epoch": 1.2349449099748369, "percentage": 24.7, "elapsed_time": "1:04:19", "remaining_time": "3:16:06", "throughput": 8829.61, "total_tokens": 34076896} +{"current_steps": 50555, "total_steps": 204665, "loss": 0.0601, "lr": 1.8712027032229778e-06, "epoch": 1.235067060806684, "percentage": 24.7, "elapsed_time": "1:04:19", "remaining_time": "3:16:05", "throughput": 8829.66, "total_tokens": 34080160} +{"current_steps": 50560, "total_steps": 204665, "loss": 0.1422, "lr": 1.8711608352804754e-06, "epoch": 1.2351892116385312, "percentage": 24.7, "elapsed_time": "1:04:20", "remaining_time": "3:16:05", "throughput": 8829.8, "total_tokens": 34083808} +{"current_steps": 50565, "total_steps": 204665, "loss": 0.1348, "lr": 1.8711189610026568e-06, "epoch": 1.2353113624703784, "percentage": 24.71, "elapsed_time": "1:04:20", "remaining_time": "3:16:04", "throughput": 8829.97, "total_tokens": 34087584} +{"current_steps": 50570, "total_steps": 204665, "loss": 0.0303, "lr": 1.8710770803898268e-06, "epoch": 1.2354335133022256, "percentage": 24.71, "elapsed_time": "1:04:20", "remaining_time": "3:16:04", "throughput": 8830.06, "total_tokens": 34091040} +{"current_steps": 50575, "total_steps": 204665, "loss": 0.0039, "lr": 1.8710351934422901e-06, "epoch": 1.2355556641340728, "percentage": 24.71, "elapsed_time": "1:04:21", "remaining_time": "3:16:04", "throughput": 8830.27, "total_tokens": 34095008} +{"current_steps": 50580, "total_steps": 204665, "loss": 0.0217, "lr": 1.870993300160351e-06, "epoch": 1.23567781496592, "percentage": 24.71, "elapsed_time": "1:04:21", "remaining_time": "3:16:03", "throughput": 8830.31, "total_tokens": 34098208} +{"current_steps": 50585, "total_steps": 204665, "loss": 0.0245, "lr": 1.8709514005443149e-06, "epoch": 1.2357999657977672, "percentage": 24.72, "elapsed_time": "1:04:21", "remaining_time": "3:16:03", "throughput": 8830.34, "total_tokens": 34101344} +{"current_steps": 50590, "total_steps": 204665, "loss": 0.0373, "lr": 1.8709094945944855e-06, "epoch": 1.2359221166296142, "percentage": 24.72, "elapsed_time": "1:04:22", "remaining_time": "3:16:02", "throughput": 8830.37, "total_tokens": 34104480} +{"current_steps": 50595, "total_steps": 204665, "loss": 0.0477, "lr": 1.870867582311168e-06, "epoch": 1.2360442674614613, "percentage": 24.72, "elapsed_time": "1:04:22", "remaining_time": "3:16:02", "throughput": 8830.43, "total_tokens": 34107744} +{"current_steps": 50600, "total_steps": 204665, "loss": 0.0901, "lr": 1.8708256636946671e-06, "epoch": 1.2361664182933085, "percentage": 24.72, "elapsed_time": "1:04:22", "remaining_time": "3:16:01", "throughput": 8830.5, "total_tokens": 34111072} +{"current_steps": 50605, "total_steps": 204665, "loss": 0.0008, "lr": 1.870783738745288e-06, "epoch": 1.2362885691251557, "percentage": 24.73, "elapsed_time": "1:04:23", "remaining_time": "3:16:01", "throughput": 8830.68, "total_tokens": 34114912} +{"current_steps": 50610, "total_steps": 204665, "loss": 0.1406, "lr": 1.8707418074633354e-06, "epoch": 1.236410719957003, "percentage": 24.73, "elapsed_time": "1:04:23", "remaining_time": "3:16:00", "throughput": 8830.8, "total_tokens": 34118496} +{"current_steps": 50615, "total_steps": 204665, "loss": 0.0007, "lr": 1.870699869849114e-06, "epoch": 1.23653287078885, "percentage": 24.73, "elapsed_time": "1:04:23", "remaining_time": "3:16:00", "throughput": 8830.85, "total_tokens": 34121696} +{"current_steps": 50620, "total_steps": 204665, "loss": 0.0469, "lr": 1.870657925902929e-06, "epoch": 1.2366550216206973, "percentage": 24.73, "elapsed_time": "1:04:24", "remaining_time": "3:15:59", "throughput": 8830.86, "total_tokens": 34124704} +{"current_steps": 50625, "total_steps": 204665, "loss": 0.076, "lr": 1.8706159756250855e-06, "epoch": 1.2367771724525445, "percentage": 24.74, "elapsed_time": "1:04:24", "remaining_time": "3:15:59", "throughput": 8830.92, "total_tokens": 34128032} +{"current_steps": 50630, "total_steps": 204665, "loss": 0.1679, "lr": 1.8705740190158882e-06, "epoch": 1.2368993232843914, "percentage": 24.74, "elapsed_time": "1:04:24", "remaining_time": "3:15:58", "throughput": 8831.06, "total_tokens": 34131680} +{"current_steps": 50635, "total_steps": 204665, "loss": 0.1274, "lr": 1.8705320560756425e-06, "epoch": 1.2370214741162386, "percentage": 24.74, "elapsed_time": "1:04:25", "remaining_time": "3:15:58", "throughput": 8831.09, "total_tokens": 34134816} +{"current_steps": 50640, "total_steps": 204665, "loss": 0.1526, "lr": 1.8704900868046537e-06, "epoch": 1.2371436249480858, "percentage": 24.74, "elapsed_time": "1:04:25", "remaining_time": "3:15:57", "throughput": 8831.18, "total_tokens": 34138272} +{"current_steps": 50645, "total_steps": 204665, "loss": 0.2716, "lr": 1.8704481112032272e-06, "epoch": 1.237265775779933, "percentage": 24.75, "elapsed_time": "1:04:26", "remaining_time": "3:15:57", "throughput": 8831.29, "total_tokens": 34141792} +{"current_steps": 50650, "total_steps": 204665, "loss": 0.0997, "lr": 1.8704061292716672e-06, "epoch": 1.2373879266117802, "percentage": 24.75, "elapsed_time": "1:04:26", "remaining_time": "3:15:56", "throughput": 8831.4, "total_tokens": 34145312} +{"current_steps": 50655, "total_steps": 204665, "loss": 0.0765, "lr": 1.8703641410102802e-06, "epoch": 1.2375100774436274, "percentage": 24.75, "elapsed_time": "1:04:26", "remaining_time": "3:15:56", "throughput": 8831.49, "total_tokens": 34148704} +{"current_steps": 50660, "total_steps": 204665, "loss": 0.0349, "lr": 1.8703221464193709e-06, "epoch": 1.2376322282754746, "percentage": 24.75, "elapsed_time": "1:04:27", "remaining_time": "3:15:55", "throughput": 8831.52, "total_tokens": 34151840} +{"current_steps": 50665, "total_steps": 204665, "loss": 0.0264, "lr": 1.8702801454992448e-06, "epoch": 1.2377543791073218, "percentage": 24.76, "elapsed_time": "1:04:27", "remaining_time": "3:15:55", "throughput": 8831.53, "total_tokens": 34154848} +{"current_steps": 50670, "total_steps": 204665, "loss": 0.0021, "lr": 1.8702381382502076e-06, "epoch": 1.237876529939169, "percentage": 24.76, "elapsed_time": "1:04:27", "remaining_time": "3:15:54", "throughput": 8831.71, "total_tokens": 34158752} +{"current_steps": 50675, "total_steps": 204665, "loss": 0.0724, "lr": 1.8701961246725643e-06, "epoch": 1.2379986807710162, "percentage": 24.76, "elapsed_time": "1:04:28", "remaining_time": "3:15:54", "throughput": 8831.74, "total_tokens": 34161888} +{"current_steps": 50680, "total_steps": 204665, "loss": 0.0506, "lr": 1.870154104766621e-06, "epoch": 1.2381208316028631, "percentage": 24.76, "elapsed_time": "1:04:28", "remaining_time": "3:15:53", "throughput": 8831.83, "total_tokens": 34165280} +{"current_steps": 50685, "total_steps": 204665, "loss": 0.1808, "lr": 1.870112078532683e-06, "epoch": 1.2382429824347103, "percentage": 24.76, "elapsed_time": "1:04:28", "remaining_time": "3:15:53", "throughput": 8831.9, "total_tokens": 34168608} +{"current_steps": 50690, "total_steps": 204665, "loss": 0.1062, "lr": 1.870070045971056e-06, "epoch": 1.2383651332665575, "percentage": 24.77, "elapsed_time": "1:04:29", "remaining_time": "3:15:52", "throughput": 8831.96, "total_tokens": 34171872} +{"current_steps": 50695, "total_steps": 204665, "loss": 0.1096, "lr": 1.870028007082045e-06, "epoch": 1.2384872840984047, "percentage": 24.77, "elapsed_time": "1:04:29", "remaining_time": "3:15:52", "throughput": 8832.09, "total_tokens": 34175520} +{"current_steps": 50700, "total_steps": 204665, "loss": 0.1077, "lr": 1.869985961865957e-06, "epoch": 1.2386094349302519, "percentage": 24.77, "elapsed_time": "1:04:29", "remaining_time": "3:15:51", "throughput": 8832.12, "total_tokens": 34178592} +{"current_steps": 50705, "total_steps": 204665, "loss": 0.1536, "lr": 1.869943910323097e-06, "epoch": 1.238731585762099, "percentage": 24.77, "elapsed_time": "1:04:30", "remaining_time": "3:15:51", "throughput": 8832.21, "total_tokens": 34182048} +{"current_steps": 50710, "total_steps": 204665, "loss": 0.201, "lr": 1.8699018524537706e-06, "epoch": 1.2388537365939463, "percentage": 24.78, "elapsed_time": "1:04:30", "remaining_time": "3:15:50", "throughput": 8832.41, "total_tokens": 34186016} +{"current_steps": 50715, "total_steps": 204665, "loss": 0.079, "lr": 1.8698597882582842e-06, "epoch": 1.2389758874257935, "percentage": 24.78, "elapsed_time": "1:04:30", "remaining_time": "3:15:50", "throughput": 8832.57, "total_tokens": 34189792} +{"current_steps": 50720, "total_steps": 204665, "loss": 0.1121, "lr": 1.8698177177369433e-06, "epoch": 1.2390980382576404, "percentage": 24.78, "elapsed_time": "1:04:31", "remaining_time": "3:15:49", "throughput": 8832.68, "total_tokens": 34193312} +{"current_steps": 50725, "total_steps": 204665, "loss": 0.0983, "lr": 1.869775640890054e-06, "epoch": 1.2392201890894876, "percentage": 24.78, "elapsed_time": "1:04:31", "remaining_time": "3:15:49", "throughput": 8832.82, "total_tokens": 34196960} +{"current_steps": 50730, "total_steps": 204665, "loss": 0.0083, "lr": 1.8697335577179226e-06, "epoch": 1.2393423399213348, "percentage": 24.79, "elapsed_time": "1:04:31", "remaining_time": "3:15:48", "throughput": 8832.89, "total_tokens": 34200288} +{"current_steps": 50735, "total_steps": 204665, "loss": 0.0796, "lr": 1.8696914682208544e-06, "epoch": 1.239464490753182, "percentage": 24.79, "elapsed_time": "1:04:32", "remaining_time": "3:15:48", "throughput": 8832.89, "total_tokens": 34203296} +{"current_steps": 50740, "total_steps": 204665, "loss": 0.0923, "lr": 1.8696493723991562e-06, "epoch": 1.2395866415850292, "percentage": 24.79, "elapsed_time": "1:04:32", "remaining_time": "3:15:47", "throughput": 8832.98, "total_tokens": 34206688} +{"current_steps": 50745, "total_steps": 204665, "loss": 0.0365, "lr": 1.8696072702531339e-06, "epoch": 1.2397087924168764, "percentage": 24.79, "elapsed_time": "1:04:32", "remaining_time": "3:15:47", "throughput": 8833.0, "total_tokens": 34209760} +{"current_steps": 50750, "total_steps": 204665, "loss": 0.0933, "lr": 1.8695651617830934e-06, "epoch": 1.2398309432487236, "percentage": 24.8, "elapsed_time": "1:04:33", "remaining_time": "3:15:46", "throughput": 8833.08, "total_tokens": 34213152} +{"current_steps": 50755, "total_steps": 204665, "loss": 0.1318, "lr": 1.8695230469893413e-06, "epoch": 1.2399530940805708, "percentage": 24.8, "elapsed_time": "1:04:33", "remaining_time": "3:15:46", "throughput": 8833.09, "total_tokens": 34216224} +{"current_steps": 50760, "total_steps": 204665, "loss": 0.0015, "lr": 1.8694809258721835e-06, "epoch": 1.240075244912418, "percentage": 24.8, "elapsed_time": "1:04:33", "remaining_time": "3:15:45", "throughput": 8833.16, "total_tokens": 34219552} +{"current_steps": 50765, "total_steps": 204665, "loss": 0.0057, "lr": 1.8694387984319268e-06, "epoch": 1.2401973957442651, "percentage": 24.8, "elapsed_time": "1:04:34", "remaining_time": "3:15:45", "throughput": 8833.17, "total_tokens": 34222624} +{"current_steps": 50770, "total_steps": 204665, "loss": 0.0006, "lr": 1.8693966646688774e-06, "epoch": 1.240319546576112, "percentage": 24.81, "elapsed_time": "1:04:34", "remaining_time": "3:15:44", "throughput": 8833.15, "total_tokens": 34225504} +{"current_steps": 50775, "total_steps": 204665, "loss": 0.0929, "lr": 1.8693545245833415e-06, "epoch": 1.2404416974079593, "percentage": 24.81, "elapsed_time": "1:04:35", "remaining_time": "3:15:44", "throughput": 8833.17, "total_tokens": 34228576} +{"current_steps": 50780, "total_steps": 204665, "loss": 0.1348, "lr": 1.8693123781756258e-06, "epoch": 1.2405638482398065, "percentage": 24.81, "elapsed_time": "1:04:35", "remaining_time": "3:15:43", "throughput": 8833.24, "total_tokens": 34231904} +{"current_steps": 50785, "total_steps": 204665, "loss": 0.0009, "lr": 1.8692702254460363e-06, "epoch": 1.2406859990716537, "percentage": 24.81, "elapsed_time": "1:04:35", "remaining_time": "3:15:43", "throughput": 8833.27, "total_tokens": 34235040} +{"current_steps": 50790, "total_steps": 204665, "loss": 0.0466, "lr": 1.8692280663948802e-06, "epoch": 1.2408081499035009, "percentage": 24.82, "elapsed_time": "1:04:36", "remaining_time": "3:15:43", "throughput": 8833.43, "total_tokens": 34238816} +{"current_steps": 50795, "total_steps": 204665, "loss": 0.2587, "lr": 1.8691859010224636e-06, "epoch": 1.240930300735348, "percentage": 24.82, "elapsed_time": "1:04:36", "remaining_time": "3:15:42", "throughput": 8833.52, "total_tokens": 34242208} +{"current_steps": 50800, "total_steps": 204665, "loss": 0.0009, "lr": 1.8691437293290936e-06, "epoch": 1.2410524515671952, "percentage": 24.82, "elapsed_time": "1:04:36", "remaining_time": "3:15:42", "throughput": 8833.55, "total_tokens": 34245344} +{"current_steps": 50805, "total_steps": 204665, "loss": 0.0756, "lr": 1.8691015513150766e-06, "epoch": 1.2411746023990424, "percentage": 24.82, "elapsed_time": "1:04:37", "remaining_time": "3:15:41", "throughput": 8833.59, "total_tokens": 34248544} +{"current_steps": 50810, "total_steps": 204665, "loss": 0.1529, "lr": 1.8690593669807191e-06, "epoch": 1.2412967532308894, "percentage": 24.83, "elapsed_time": "1:04:37", "remaining_time": "3:15:41", "throughput": 8833.65, "total_tokens": 34251808} +{"current_steps": 50815, "total_steps": 204665, "loss": 0.093, "lr": 1.8690171763263284e-06, "epoch": 1.2414189040627366, "percentage": 24.83, "elapsed_time": "1:04:37", "remaining_time": "3:15:40", "throughput": 8833.76, "total_tokens": 34255328} +{"current_steps": 50820, "total_steps": 204665, "loss": 0.1919, "lr": 1.868974979352211e-06, "epoch": 1.2415410548945838, "percentage": 24.83, "elapsed_time": "1:04:38", "remaining_time": "3:15:40", "throughput": 8833.8, "total_tokens": 34258528} +{"current_steps": 50825, "total_steps": 204665, "loss": 0.0842, "lr": 1.8689327760586737e-06, "epoch": 1.241663205726431, "percentage": 24.83, "elapsed_time": "1:04:38", "remaining_time": "3:15:39", "throughput": 8833.89, "total_tokens": 34261920} +{"current_steps": 50830, "total_steps": 204665, "loss": 0.0521, "lr": 1.8688905664460237e-06, "epoch": 1.2417853565582782, "percentage": 24.84, "elapsed_time": "1:04:38", "remaining_time": "3:15:39", "throughput": 8833.91, "total_tokens": 34264992} +{"current_steps": 50835, "total_steps": 204665, "loss": 0.0758, "lr": 1.8688483505145677e-06, "epoch": 1.2419075073901253, "percentage": 24.84, "elapsed_time": "1:04:39", "remaining_time": "3:15:38", "throughput": 8833.94, "total_tokens": 34268128} +{"current_steps": 50840, "total_steps": 204665, "loss": 0.0313, "lr": 1.8688061282646129e-06, "epoch": 1.2420296582219725, "percentage": 24.84, "elapsed_time": "1:04:39", "remaining_time": "3:15:38", "throughput": 8833.96, "total_tokens": 34271200} +{"current_steps": 50845, "total_steps": 204665, "loss": 0.104, "lr": 1.868763899696466e-06, "epoch": 1.2421518090538197, "percentage": 24.84, "elapsed_time": "1:04:39", "remaining_time": "3:15:37", "throughput": 8834.01, "total_tokens": 34274400} +{"current_steps": 50850, "total_steps": 204665, "loss": 0.1647, "lr": 1.8687216648104344e-06, "epoch": 1.242273959885667, "percentage": 24.85, "elapsed_time": "1:04:40", "remaining_time": "3:15:37", "throughput": 8834.18, "total_tokens": 34278240} +{"current_steps": 50855, "total_steps": 204665, "loss": 0.1633, "lr": 1.8686794236068254e-06, "epoch": 1.242396110717514, "percentage": 24.85, "elapsed_time": "1:04:40", "remaining_time": "3:15:36", "throughput": 8834.23, "total_tokens": 34281440} +{"current_steps": 50860, "total_steps": 204665, "loss": 0.0966, "lr": 1.8686371760859458e-06, "epoch": 1.242518261549361, "percentage": 24.85, "elapsed_time": "1:04:40", "remaining_time": "3:15:36", "throughput": 8834.26, "total_tokens": 34284576} +{"current_steps": 50865, "total_steps": 204665, "loss": 0.1385, "lr": 1.8685949222481034e-06, "epoch": 1.2426404123812083, "percentage": 24.85, "elapsed_time": "1:04:41", "remaining_time": "3:15:35", "throughput": 8834.36, "total_tokens": 34288032} +{"current_steps": 50870, "total_steps": 204665, "loss": 0.0023, "lr": 1.8685526620936048e-06, "epoch": 1.2427625632130554, "percentage": 24.86, "elapsed_time": "1:04:41", "remaining_time": "3:15:35", "throughput": 8834.42, "total_tokens": 34291296} +{"current_steps": 50875, "total_steps": 204665, "loss": 0.0586, "lr": 1.8685103956227578e-06, "epoch": 1.2428847140449026, "percentage": 24.86, "elapsed_time": "1:04:41", "remaining_time": "3:15:34", "throughput": 8834.47, "total_tokens": 34294496} +{"current_steps": 50880, "total_steps": 204665, "loss": 0.1438, "lr": 1.8684681228358694e-06, "epoch": 1.2430068648767498, "percentage": 24.86, "elapsed_time": "1:04:42", "remaining_time": "3:15:34", "throughput": 8834.5, "total_tokens": 34297632} +{"current_steps": 50885, "total_steps": 204665, "loss": 0.2076, "lr": 1.8684258437332472e-06, "epoch": 1.243129015708597, "percentage": 24.86, "elapsed_time": "1:04:42", "remaining_time": "3:15:33", "throughput": 8834.64, "total_tokens": 34301280} +{"current_steps": 50890, "total_steps": 204665, "loss": 0.0412, "lr": 1.8683835583151986e-06, "epoch": 1.2432511665404442, "percentage": 24.87, "elapsed_time": "1:04:42", "remaining_time": "3:15:33", "throughput": 8834.79, "total_tokens": 34304992} +{"current_steps": 50895, "total_steps": 204665, "loss": 0.0577, "lr": 1.8683412665820314e-06, "epoch": 1.2433733173722914, "percentage": 24.87, "elapsed_time": "1:04:43", "remaining_time": "3:15:32", "throughput": 8834.89, "total_tokens": 34308448} +{"current_steps": 50900, "total_steps": 204665, "loss": 0.2242, "lr": 1.868298968534053e-06, "epoch": 1.2434954682041384, "percentage": 24.87, "elapsed_time": "1:04:43", "remaining_time": "3:15:32", "throughput": 8835.04, "total_tokens": 34312160} +{"current_steps": 50905, "total_steps": 204665, "loss": 0.1221, "lr": 1.8682566641715709e-06, "epoch": 1.2436176190359856, "percentage": 24.87, "elapsed_time": "1:04:43", "remaining_time": "3:15:31", "throughput": 8835.15, "total_tokens": 34315680} +{"current_steps": 50910, "total_steps": 204665, "loss": 0.0311, "lr": 1.8682143534948928e-06, "epoch": 1.2437397698678327, "percentage": 24.87, "elapsed_time": "1:04:44", "remaining_time": "3:15:31", "throughput": 8835.22, "total_tokens": 34319008} +{"current_steps": 50915, "total_steps": 204665, "loss": 0.0355, "lr": 1.8681720365043263e-06, "epoch": 1.24386192069968, "percentage": 24.88, "elapsed_time": "1:04:44", "remaining_time": "3:15:30", "throughput": 8835.3, "total_tokens": 34322400} +{"current_steps": 50920, "total_steps": 204665, "loss": 0.0827, "lr": 1.8681297132001794e-06, "epoch": 1.2439840715315271, "percentage": 24.88, "elapsed_time": "1:04:45", "remaining_time": "3:15:30", "throughput": 8835.43, "total_tokens": 34325984} +{"current_steps": 50925, "total_steps": 204665, "loss": 0.0321, "lr": 1.8680873835827598e-06, "epoch": 1.2441062223633743, "percentage": 24.88, "elapsed_time": "1:04:45", "remaining_time": "3:15:29", "throughput": 8835.53, "total_tokens": 34329440} +{"current_steps": 50930, "total_steps": 204665, "loss": 0.05, "lr": 1.8680450476523748e-06, "epoch": 1.2442283731952215, "percentage": 24.88, "elapsed_time": "1:04:45", "remaining_time": "3:15:29", "throughput": 8835.59, "total_tokens": 34332704} +{"current_steps": 50935, "total_steps": 204665, "loss": 0.0462, "lr": 1.8680027054093332e-06, "epoch": 1.2443505240270687, "percentage": 24.89, "elapsed_time": "1:04:46", "remaining_time": "3:15:28", "throughput": 8835.65, "total_tokens": 34336032} +{"current_steps": 50940, "total_steps": 204665, "loss": 0.0424, "lr": 1.8679603568539423e-06, "epoch": 1.2444726748589159, "percentage": 24.89, "elapsed_time": "1:04:46", "remaining_time": "3:15:28", "throughput": 8835.66, "total_tokens": 34339040} +{"current_steps": 50945, "total_steps": 204665, "loss": 0.2089, "lr": 1.8679180019865102e-06, "epoch": 1.244594825690763, "percentage": 24.89, "elapsed_time": "1:04:46", "remaining_time": "3:15:27", "throughput": 8835.71, "total_tokens": 34342240} +{"current_steps": 50950, "total_steps": 204665, "loss": 0.0468, "lr": 1.867875640807345e-06, "epoch": 1.24471697652261, "percentage": 24.89, "elapsed_time": "1:04:47", "remaining_time": "3:15:27", "throughput": 8835.8, "total_tokens": 34345696} +{"current_steps": 50955, "total_steps": 204665, "loss": 0.0874, "lr": 1.8678332733167546e-06, "epoch": 1.2448391273544572, "percentage": 24.9, "elapsed_time": "1:04:47", "remaining_time": "3:15:26", "throughput": 8835.91, "total_tokens": 34349216} +{"current_steps": 50960, "total_steps": 204665, "loss": 0.1386, "lr": 1.8677908995150475e-06, "epoch": 1.2449612781863044, "percentage": 24.9, "elapsed_time": "1:04:47", "remaining_time": "3:15:26", "throughput": 8835.96, "total_tokens": 34352416} +{"current_steps": 50965, "total_steps": 204665, "loss": 0.0869, "lr": 1.8677485194025313e-06, "epoch": 1.2450834290181516, "percentage": 24.9, "elapsed_time": "1:04:48", "remaining_time": "3:15:25", "throughput": 8836.09, "total_tokens": 34356000} +{"current_steps": 50970, "total_steps": 204665, "loss": 0.0388, "lr": 1.8677061329795145e-06, "epoch": 1.2452055798499988, "percentage": 24.9, "elapsed_time": "1:04:48", "remaining_time": "3:15:25", "throughput": 8836.24, "total_tokens": 34359712} +{"current_steps": 50975, "total_steps": 204665, "loss": 0.1767, "lr": 1.8676637402463054e-06, "epoch": 1.245327730681846, "percentage": 24.91, "elapsed_time": "1:04:48", "remaining_time": "3:15:24", "throughput": 8836.31, "total_tokens": 34363040} +{"current_steps": 50980, "total_steps": 204665, "loss": 0.0821, "lr": 1.867621341203212e-06, "epoch": 1.2454498815136932, "percentage": 24.91, "elapsed_time": "1:04:49", "remaining_time": "3:15:24", "throughput": 8836.34, "total_tokens": 34366176} +{"current_steps": 50985, "total_steps": 204665, "loss": 0.0614, "lr": 1.867578935850543e-06, "epoch": 1.2455720323455404, "percentage": 24.91, "elapsed_time": "1:04:49", "remaining_time": "3:15:23", "throughput": 8836.37, "total_tokens": 34369312} +{"current_steps": 50990, "total_steps": 204665, "loss": 0.003, "lr": 1.867536524188607e-06, "epoch": 1.2456941831773873, "percentage": 24.91, "elapsed_time": "1:04:49", "remaining_time": "3:15:23", "throughput": 8836.43, "total_tokens": 34372576} +{"current_steps": 50995, "total_steps": 204665, "loss": 0.1066, "lr": 1.8674941062177117e-06, "epoch": 1.2458163340092345, "percentage": 24.92, "elapsed_time": "1:04:50", "remaining_time": "3:15:22", "throughput": 8836.54, "total_tokens": 34376096} +{"current_steps": 51000, "total_steps": 204665, "loss": 0.0379, "lr": 1.8674516819381657e-06, "epoch": 1.2459384848410817, "percentage": 24.92, "elapsed_time": "1:04:50", "remaining_time": "3:15:22", "throughput": 8836.65, "total_tokens": 34379616} +{"current_steps": 51005, "total_steps": 204665, "loss": 0.0926, "lr": 1.867409251350278e-06, "epoch": 1.246060635672929, "percentage": 24.92, "elapsed_time": "1:04:50", "remaining_time": "3:15:21", "throughput": 8836.69, "total_tokens": 34382816} +{"current_steps": 51010, "total_steps": 204665, "loss": 0.116, "lr": 1.8673668144543567e-06, "epoch": 1.246182786504776, "percentage": 24.92, "elapsed_time": "1:04:51", "remaining_time": "3:15:21", "throughput": 8836.78, "total_tokens": 34386208} +{"current_steps": 51015, "total_steps": 204665, "loss": 0.1146, "lr": 1.867324371250711e-06, "epoch": 1.2463049373366233, "percentage": 24.93, "elapsed_time": "1:04:51", "remaining_time": "3:15:20", "throughput": 8836.8, "total_tokens": 34389344} +{"current_steps": 51020, "total_steps": 204665, "loss": 0.0864, "lr": 1.8672819217396491e-06, "epoch": 1.2464270881684705, "percentage": 24.93, "elapsed_time": "1:04:51", "remaining_time": "3:15:20", "throughput": 8836.9, "total_tokens": 34392800} +{"current_steps": 51025, "total_steps": 204665, "loss": 0.056, "lr": 1.8672394659214797e-06, "epoch": 1.2465492390003177, "percentage": 24.93, "elapsed_time": "1:04:52", "remaining_time": "3:15:19", "throughput": 8836.95, "total_tokens": 34396064} +{"current_steps": 51030, "total_steps": 204665, "loss": 0.1655, "lr": 1.8671970037965116e-06, "epoch": 1.2466713898321649, "percentage": 24.93, "elapsed_time": "1:04:52", "remaining_time": "3:15:19", "throughput": 8836.92, "total_tokens": 34398880} +{"current_steps": 51035, "total_steps": 204665, "loss": 0.1223, "lr": 1.8671545353650537e-06, "epoch": 1.246793540664012, "percentage": 24.94, "elapsed_time": "1:04:52", "remaining_time": "3:15:18", "throughput": 8837.03, "total_tokens": 34402400} +{"current_steps": 51040, "total_steps": 204665, "loss": 0.0025, "lr": 1.8671120606274149e-06, "epoch": 1.246915691495859, "percentage": 24.94, "elapsed_time": "1:04:53", "remaining_time": "3:15:18", "throughput": 8837.15, "total_tokens": 34405984} +{"current_steps": 51045, "total_steps": 204665, "loss": 0.0859, "lr": 1.8670695795839038e-06, "epoch": 1.2470378423277062, "percentage": 24.94, "elapsed_time": "1:04:53", "remaining_time": "3:15:18", "throughput": 8837.25, "total_tokens": 34409440} +{"current_steps": 51050, "total_steps": 204665, "loss": 0.0481, "lr": 1.8670270922348296e-06, "epoch": 1.2471599931595534, "percentage": 24.94, "elapsed_time": "1:04:54", "remaining_time": "3:15:17", "throughput": 8837.28, "total_tokens": 34412576} +{"current_steps": 51055, "total_steps": 204665, "loss": 0.1475, "lr": 1.866984598580501e-06, "epoch": 1.2472821439914006, "percentage": 24.95, "elapsed_time": "1:04:54", "remaining_time": "3:15:17", "throughput": 8837.32, "total_tokens": 34415776} +{"current_steps": 51060, "total_steps": 204665, "loss": 0.0036, "lr": 1.8669420986212274e-06, "epoch": 1.2474042948232478, "percentage": 24.95, "elapsed_time": "1:04:54", "remaining_time": "3:15:16", "throughput": 8837.44, "total_tokens": 34419296} +{"current_steps": 51065, "total_steps": 204665, "loss": 0.1043, "lr": 1.866899592357318e-06, "epoch": 1.247526445655095, "percentage": 24.95, "elapsed_time": "1:04:55", "remaining_time": "3:15:16", "throughput": 8837.47, "total_tokens": 34422432} +{"current_steps": 51070, "total_steps": 204665, "loss": 0.0421, "lr": 1.866857079789081e-06, "epoch": 1.2476485964869422, "percentage": 24.95, "elapsed_time": "1:04:55", "remaining_time": "3:15:15", "throughput": 8837.5, "total_tokens": 34425568} +{"current_steps": 51075, "total_steps": 204665, "loss": 0.1972, "lr": 1.8668145609168265e-06, "epoch": 1.2477707473187891, "percentage": 24.96, "elapsed_time": "1:04:55", "remaining_time": "3:15:15", "throughput": 8837.58, "total_tokens": 34428960} +{"current_steps": 51080, "total_steps": 204665, "loss": 0.0566, "lr": 1.8667720357408632e-06, "epoch": 1.2478928981506363, "percentage": 24.96, "elapsed_time": "1:04:56", "remaining_time": "3:15:14", "throughput": 8837.66, "total_tokens": 34432352} +{"current_steps": 51085, "total_steps": 204665, "loss": 0.0437, "lr": 1.8667295042615006e-06, "epoch": 1.2480150489824835, "percentage": 24.96, "elapsed_time": "1:04:56", "remaining_time": "3:15:14", "throughput": 8837.73, "total_tokens": 34435680} +{"current_steps": 51090, "total_steps": 204665, "loss": 0.0038, "lr": 1.866686966479048e-06, "epoch": 1.2481371998143307, "percentage": 24.96, "elapsed_time": "1:04:56", "remaining_time": "3:15:13", "throughput": 8837.99, "total_tokens": 34439904} +{"current_steps": 51095, "total_steps": 204665, "loss": 0.2072, "lr": 1.8666444223938145e-06, "epoch": 1.2482593506461779, "percentage": 24.97, "elapsed_time": "1:04:57", "remaining_time": "3:15:13", "throughput": 8838.04, "total_tokens": 34443104} +{"current_steps": 51100, "total_steps": 204665, "loss": 0.2199, "lr": 1.8666018720061097e-06, "epoch": 1.248381501478025, "percentage": 24.97, "elapsed_time": "1:04:57", "remaining_time": "3:15:12", "throughput": 8838.07, "total_tokens": 34446240} +{"current_steps": 51105, "total_steps": 204665, "loss": 0.0821, "lr": 1.8665593153162429e-06, "epoch": 1.2485036523098723, "percentage": 24.97, "elapsed_time": "1:04:57", "remaining_time": "3:15:12", "throughput": 8838.2, "total_tokens": 34449888} +{"current_steps": 51110, "total_steps": 204665, "loss": 0.0439, "lr": 1.8665167523245238e-06, "epoch": 1.2486258031417194, "percentage": 24.97, "elapsed_time": "1:04:58", "remaining_time": "3:15:11", "throughput": 8838.33, "total_tokens": 34453536} +{"current_steps": 51115, "total_steps": 204665, "loss": 0.0081, "lr": 1.8664741830312618e-06, "epoch": 1.2487479539735666, "percentage": 24.97, "elapsed_time": "1:04:58", "remaining_time": "3:15:11", "throughput": 8838.42, "total_tokens": 34456928} +{"current_steps": 51120, "total_steps": 204665, "loss": 0.0491, "lr": 1.8664316074367666e-06, "epoch": 1.2488701048054138, "percentage": 24.98, "elapsed_time": "1:04:58", "remaining_time": "3:15:10", "throughput": 8838.43, "total_tokens": 34459936} +{"current_steps": 51125, "total_steps": 204665, "loss": 0.0583, "lr": 1.8663890255413474e-06, "epoch": 1.2489922556372608, "percentage": 24.98, "elapsed_time": "1:04:59", "remaining_time": "3:15:10", "throughput": 8838.51, "total_tokens": 34463328} +{"current_steps": 51130, "total_steps": 204665, "loss": 0.1613, "lr": 1.8663464373453146e-06, "epoch": 1.249114406469108, "percentage": 24.98, "elapsed_time": "1:04:59", "remaining_time": "3:15:09", "throughput": 8838.55, "total_tokens": 34466528} +{"current_steps": 51135, "total_steps": 204665, "loss": 0.1676, "lr": 1.8663038428489775e-06, "epoch": 1.2492365573009552, "percentage": 24.98, "elapsed_time": "1:04:59", "remaining_time": "3:15:09", "throughput": 8838.66, "total_tokens": 34470048} +{"current_steps": 51140, "total_steps": 204665, "loss": 0.0033, "lr": 1.8662612420526455e-06, "epoch": 1.2493587081328024, "percentage": 24.99, "elapsed_time": "1:05:00", "remaining_time": "3:15:08", "throughput": 8838.78, "total_tokens": 34473632} +{"current_steps": 51145, "total_steps": 204665, "loss": 0.0473, "lr": 1.866218634956629e-06, "epoch": 1.2494808589646496, "percentage": 24.99, "elapsed_time": "1:05:00", "remaining_time": "3:15:08", "throughput": 8838.78, "total_tokens": 34476640} +{"current_steps": 51150, "total_steps": 204665, "loss": 0.0546, "lr": 1.8661760215612374e-06, "epoch": 1.2496030097964967, "percentage": 24.99, "elapsed_time": "1:05:00", "remaining_time": "3:15:07", "throughput": 8838.84, "total_tokens": 34479904} +{"current_steps": 51155, "total_steps": 204665, "loss": 0.0025, "lr": 1.8661334018667806e-06, "epoch": 1.249725160628344, "percentage": 24.99, "elapsed_time": "1:05:01", "remaining_time": "3:15:07", "throughput": 8838.93, "total_tokens": 34483360} +{"current_steps": 51160, "total_steps": 204665, "loss": 0.1366, "lr": 1.8660907758735693e-06, "epoch": 1.2498473114601911, "percentage": 25.0, "elapsed_time": "1:05:01", "remaining_time": "3:15:06", "throughput": 8838.91, "total_tokens": 34486240} +{"current_steps": 51165, "total_steps": 204665, "loss": 0.1577, "lr": 1.8660481435819127e-06, "epoch": 1.249969462292038, "percentage": 25.0, "elapsed_time": "1:05:01", "remaining_time": "3:15:06", "throughput": 8838.92, "total_tokens": 34489248} +{"current_steps": 51170, "total_steps": 204665, "loss": 0.1395, "lr": 1.8660055049921209e-06, "epoch": 1.2500916131238853, "percentage": 25.0, "elapsed_time": "1:05:02", "remaining_time": "3:15:05", "throughput": 8838.93, "total_tokens": 34492320} +{"current_steps": 51170, "total_steps": 204665, "eval_loss": 0.1528467833995819, "epoch": 1.2500916131238853, "percentage": 25.0, "elapsed_time": "1:05:49", "remaining_time": "3:17:27", "throughput": 8732.89, "total_tokens": 34492320} +{"current_steps": 51175, "total_steps": 204665, "loss": 0.0012, "lr": 1.8659628601045043e-06, "epoch": 1.2502137639557325, "percentage": 25.0, "elapsed_time": "1:06:24", "remaining_time": "3:19:10", "throughput": 8657.44, "total_tokens": 34495200} +{"current_steps": 51180, "total_steps": 204665, "loss": 0.1282, "lr": 1.8659202089193728e-06, "epoch": 1.2503359147875797, "percentage": 25.01, "elapsed_time": "1:06:24", "remaining_time": "3:19:10", "throughput": 8657.47, "total_tokens": 34498272} +{"current_steps": 51185, "total_steps": 204665, "loss": 0.0379, "lr": 1.8658775514370366e-06, "epoch": 1.2504580656194269, "percentage": 25.01, "elapsed_time": "1:06:25", "remaining_time": "3:19:09", "throughput": 8657.5, "total_tokens": 34501344} +{"current_steps": 51190, "total_steps": 204665, "loss": 0.0774, "lr": 1.865834887657806e-06, "epoch": 1.250580216451274, "percentage": 25.01, "elapsed_time": "1:06:25", "remaining_time": "3:19:09", "throughput": 8657.57, "total_tokens": 34504608} +{"current_steps": 51195, "total_steps": 204665, "loss": 0.0997, "lr": 1.8657922175819913e-06, "epoch": 1.2507023672831212, "percentage": 25.01, "elapsed_time": "1:06:25", "remaining_time": "3:19:08", "throughput": 8657.71, "total_tokens": 34508256} +{"current_steps": 51200, "total_steps": 204665, "loss": 0.0893, "lr": 1.8657495412099026e-06, "epoch": 1.2508245181149684, "percentage": 25.02, "elapsed_time": "1:06:26", "remaining_time": "3:19:08", "throughput": 8657.88, "total_tokens": 34511968} +{"current_steps": 51205, "total_steps": 204665, "loss": 0.1052, "lr": 1.8657068585418502e-06, "epoch": 1.2509466689468156, "percentage": 25.02, "elapsed_time": "1:06:26", "remaining_time": "3:19:07", "throughput": 8657.94, "total_tokens": 34515232} +{"current_steps": 51210, "total_steps": 204665, "loss": 0.1418, "lr": 1.865664169578145e-06, "epoch": 1.2510688197786628, "percentage": 25.02, "elapsed_time": "1:06:26", "remaining_time": "3:19:07", "throughput": 8657.96, "total_tokens": 34518240} +{"current_steps": 51215, "total_steps": 204665, "loss": 0.0726, "lr": 1.8656214743190972e-06, "epoch": 1.25119097061051, "percentage": 25.02, "elapsed_time": "1:06:27", "remaining_time": "3:19:06", "throughput": 8658.06, "total_tokens": 34521696} +{"current_steps": 51220, "total_steps": 204665, "loss": 0.2161, "lr": 1.865578772765017e-06, "epoch": 1.251313121442357, "percentage": 25.03, "elapsed_time": "1:06:27", "remaining_time": "3:19:05", "throughput": 8658.1, "total_tokens": 34524768} +{"current_steps": 51225, "total_steps": 204665, "loss": 0.0474, "lr": 1.8655360649162151e-06, "epoch": 1.2514352722742041, "percentage": 25.03, "elapsed_time": "1:06:27", "remaining_time": "3:19:05", "throughput": 8658.11, "total_tokens": 34527776} +{"current_steps": 51230, "total_steps": 204665, "loss": 0.065, "lr": 1.8654933507730025e-06, "epoch": 1.2515574231060513, "percentage": 25.03, "elapsed_time": "1:06:28", "remaining_time": "3:19:04", "throughput": 8658.19, "total_tokens": 34531040} +{"current_steps": 51235, "total_steps": 204665, "loss": 0.0676, "lr": 1.865450630335689e-06, "epoch": 1.2516795739378985, "percentage": 25.03, "elapsed_time": "1:06:28", "remaining_time": "3:19:04", "throughput": 8658.3, "total_tokens": 34534560} +{"current_steps": 51240, "total_steps": 204665, "loss": 0.1243, "lr": 1.865407903604586e-06, "epoch": 1.2518017247697457, "percentage": 25.04, "elapsed_time": "1:06:28", "remaining_time": "3:19:03", "throughput": 8658.31, "total_tokens": 34537568} +{"current_steps": 51245, "total_steps": 204665, "loss": 0.1417, "lr": 1.865365170580004e-06, "epoch": 1.251923875601593, "percentage": 25.04, "elapsed_time": "1:06:29", "remaining_time": "3:19:03", "throughput": 8658.4, "total_tokens": 34540960} +{"current_steps": 51250, "total_steps": 204665, "loss": 0.1178, "lr": 1.8653224312622534e-06, "epoch": 1.2520460264334399, "percentage": 25.04, "elapsed_time": "1:06:29", "remaining_time": "3:19:02", "throughput": 8658.5, "total_tokens": 34544416} +{"current_steps": 51255, "total_steps": 204665, "loss": 0.0324, "lr": 1.8652796856516458e-06, "epoch": 1.252168177265287, "percentage": 25.04, "elapsed_time": "1:06:29", "remaining_time": "3:19:02", "throughput": 8658.48, "total_tokens": 34547232} +{"current_steps": 51260, "total_steps": 204665, "loss": 0.002, "lr": 1.8652369337484912e-06, "epoch": 1.2522903280971343, "percentage": 25.05, "elapsed_time": "1:06:30", "remaining_time": "3:19:01", "throughput": 8658.5, "total_tokens": 34550240} +{"current_steps": 51265, "total_steps": 204665, "loss": 0.1198, "lr": 1.8651941755531012e-06, "epoch": 1.2524124789289814, "percentage": 25.05, "elapsed_time": "1:06:30", "remaining_time": "3:19:01", "throughput": 8658.52, "total_tokens": 34553248} +{"current_steps": 51270, "total_steps": 204665, "loss": 0.0548, "lr": 1.8651514110657863e-06, "epoch": 1.2525346297608286, "percentage": 25.05, "elapsed_time": "1:06:31", "remaining_time": "3:19:00", "throughput": 8658.56, "total_tokens": 34556384} +{"current_steps": 51275, "total_steps": 204665, "loss": 0.1616, "lr": 1.8651086402868574e-06, "epoch": 1.2526567805926758, "percentage": 25.05, "elapsed_time": "1:06:31", "remaining_time": "3:19:00", "throughput": 8658.74, "total_tokens": 34560224} +{"current_steps": 51280, "total_steps": 204665, "loss": 0.1452, "lr": 1.865065863216626e-06, "epoch": 1.252778931424523, "percentage": 25.06, "elapsed_time": "1:06:31", "remaining_time": "3:18:59", "throughput": 8658.77, "total_tokens": 34563296} +{"current_steps": 51285, "total_steps": 204665, "loss": 0.0967, "lr": 1.865023079855403e-06, "epoch": 1.2529010822563702, "percentage": 25.06, "elapsed_time": "1:06:32", "remaining_time": "3:18:59", "throughput": 8658.87, "total_tokens": 34566752} +{"current_steps": 51290, "total_steps": 204665, "loss": 0.0183, "lr": 1.8649802902034995e-06, "epoch": 1.2530232330882174, "percentage": 25.06, "elapsed_time": "1:06:32", "remaining_time": "3:18:58", "throughput": 8659.04, "total_tokens": 34570528} +{"current_steps": 51295, "total_steps": 204665, "loss": 0.0374, "lr": 1.8649374942612266e-06, "epoch": 1.2531453839200646, "percentage": 25.06, "elapsed_time": "1:06:32", "remaining_time": "3:18:58", "throughput": 8659.11, "total_tokens": 34573856} +{"current_steps": 51300, "total_steps": 204665, "loss": 0.1031, "lr": 1.8648946920288956e-06, "epoch": 1.2532675347519118, "percentage": 25.07, "elapsed_time": "1:06:33", "remaining_time": "3:18:57", "throughput": 8659.24, "total_tokens": 34577376} +{"current_steps": 51305, "total_steps": 204665, "loss": 0.0986, "lr": 1.864851883506818e-06, "epoch": 1.253389685583759, "percentage": 25.07, "elapsed_time": "1:06:33", "remaining_time": "3:18:57", "throughput": 8659.3, "total_tokens": 34580640} +{"current_steps": 51310, "total_steps": 204665, "loss": 0.0321, "lr": 1.8648090686953046e-06, "epoch": 1.253511836415606, "percentage": 25.07, "elapsed_time": "1:06:33", "remaining_time": "3:18:56", "throughput": 8659.42, "total_tokens": 34584160} +{"current_steps": 51315, "total_steps": 204665, "loss": 0.1095, "lr": 1.8647662475946673e-06, "epoch": 1.2536339872474531, "percentage": 25.07, "elapsed_time": "1:06:34", "remaining_time": "3:18:56", "throughput": 8659.5, "total_tokens": 34587488} +{"current_steps": 51320, "total_steps": 204665, "loss": 0.0014, "lr": 1.864723420205217e-06, "epoch": 1.2537561380793003, "percentage": 25.08, "elapsed_time": "1:06:34", "remaining_time": "3:18:55", "throughput": 8659.67, "total_tokens": 34591264} +{"current_steps": 51325, "total_steps": 204665, "loss": 0.0824, "lr": 1.8646805865272655e-06, "epoch": 1.2538782889111475, "percentage": 25.08, "elapsed_time": "1:06:34", "remaining_time": "3:18:55", "throughput": 8659.8, "total_tokens": 34594848} +{"current_steps": 51330, "total_steps": 204665, "loss": 0.0936, "lr": 1.864637746561124e-06, "epoch": 1.2540004397429947, "percentage": 25.08, "elapsed_time": "1:06:35", "remaining_time": "3:18:54", "throughput": 8659.91, "total_tokens": 34598304} +{"current_steps": 51335, "total_steps": 204665, "loss": 0.1095, "lr": 1.8645949003071047e-06, "epoch": 1.2541225905748419, "percentage": 25.08, "elapsed_time": "1:06:35", "remaining_time": "3:18:54", "throughput": 8660.09, "total_tokens": 34602144} +{"current_steps": 51340, "total_steps": 204665, "loss": 0.0402, "lr": 1.8645520477655184e-06, "epoch": 1.2542447414066888, "percentage": 25.08, "elapsed_time": "1:06:35", "remaining_time": "3:18:53", "throughput": 8660.13, "total_tokens": 34605280} +{"current_steps": 51345, "total_steps": 204665, "loss": 0.198, "lr": 1.8645091889366774e-06, "epoch": 1.254366892238536, "percentage": 25.09, "elapsed_time": "1:06:36", "remaining_time": "3:18:53", "throughput": 8660.13, "total_tokens": 34608224} +{"current_steps": 51350, "total_steps": 204665, "loss": 0.093, "lr": 1.8644663238208927e-06, "epoch": 1.2544890430703832, "percentage": 25.09, "elapsed_time": "1:06:36", "remaining_time": "3:18:52", "throughput": 8660.29, "total_tokens": 34612000} +{"current_steps": 51355, "total_steps": 204665, "loss": 0.0053, "lr": 1.8644234524184762e-06, "epoch": 1.2546111939022304, "percentage": 25.09, "elapsed_time": "1:06:36", "remaining_time": "3:18:52", "throughput": 8660.32, "total_tokens": 34615072} +{"current_steps": 51360, "total_steps": 204665, "loss": 0.0437, "lr": 1.8643805747297402e-06, "epoch": 1.2547333447340776, "percentage": 25.09, "elapsed_time": "1:06:37", "remaining_time": "3:18:51", "throughput": 8660.37, "total_tokens": 34618272} +{"current_steps": 51365, "total_steps": 204665, "loss": 0.2466, "lr": 1.8643376907549963e-06, "epoch": 1.2548554955659248, "percentage": 25.1, "elapsed_time": "1:06:37", "remaining_time": "3:18:51", "throughput": 8660.5, "total_tokens": 34621792} +{"current_steps": 51370, "total_steps": 204665, "loss": 0.0016, "lr": 1.864294800494556e-06, "epoch": 1.254977646397772, "percentage": 25.1, "elapsed_time": "1:06:38", "remaining_time": "3:18:50", "throughput": 8660.8, "total_tokens": 34626208} +{"current_steps": 51375, "total_steps": 204665, "loss": 0.0824, "lr": 1.8642519039487317e-06, "epoch": 1.2550997972296192, "percentage": 25.1, "elapsed_time": "1:06:38", "remaining_time": "3:18:50", "throughput": 8660.89, "total_tokens": 34629600} +{"current_steps": 51380, "total_steps": 204665, "loss": 0.1086, "lr": 1.8642090011178348e-06, "epoch": 1.2552219480614664, "percentage": 25.1, "elapsed_time": "1:06:38", "remaining_time": "3:18:49", "throughput": 8660.99, "total_tokens": 34633056} +{"current_steps": 51385, "total_steps": 204665, "loss": 0.0059, "lr": 1.8641660920021778e-06, "epoch": 1.2553440988933136, "percentage": 25.11, "elapsed_time": "1:06:39", "remaining_time": "3:18:49", "throughput": 8661.18, "total_tokens": 34636896} +{"current_steps": 51390, "total_steps": 204665, "loss": 0.1896, "lr": 1.8641231766020724e-06, "epoch": 1.2554662497251607, "percentage": 25.11, "elapsed_time": "1:06:39", "remaining_time": "3:18:48", "throughput": 8661.24, "total_tokens": 34640160} +{"current_steps": 51395, "total_steps": 204665, "loss": 0.1344, "lr": 1.864080254917831e-06, "epoch": 1.2555884005570077, "percentage": 25.11, "elapsed_time": "1:06:39", "remaining_time": "3:18:48", "throughput": 8661.32, "total_tokens": 34643488} +{"current_steps": 51400, "total_steps": 204665, "loss": 0.0929, "lr": 1.8640373269497653e-06, "epoch": 1.255710551388855, "percentage": 25.11, "elapsed_time": "1:06:40", "remaining_time": "3:18:47", "throughput": 8661.45, "total_tokens": 34647072} +{"current_steps": 51405, "total_steps": 204665, "loss": 0.105, "lr": 1.8639943926981881e-06, "epoch": 1.255832702220702, "percentage": 25.12, "elapsed_time": "1:06:40", "remaining_time": "3:18:47", "throughput": 8661.59, "total_tokens": 34650656} +{"current_steps": 51410, "total_steps": 204665, "loss": 0.0657, "lr": 1.863951452163411e-06, "epoch": 1.2559548530525493, "percentage": 25.12, "elapsed_time": "1:06:40", "remaining_time": "3:18:46", "throughput": 8661.62, "total_tokens": 34653792} +{"current_steps": 51415, "total_steps": 204665, "loss": 0.0307, "lr": 1.8639085053457469e-06, "epoch": 1.2560770038843965, "percentage": 25.12, "elapsed_time": "1:06:41", "remaining_time": "3:18:46", "throughput": 8661.65, "total_tokens": 34656864} +{"current_steps": 51420, "total_steps": 204665, "loss": 0.0582, "lr": 1.8638655522455072e-06, "epoch": 1.2561991547162437, "percentage": 25.12, "elapsed_time": "1:06:41", "remaining_time": "3:18:45", "throughput": 8661.74, "total_tokens": 34660256} +{"current_steps": 51425, "total_steps": 204665, "loss": 0.1335, "lr": 1.8638225928630053e-06, "epoch": 1.2563213055480908, "percentage": 25.13, "elapsed_time": "1:06:41", "remaining_time": "3:18:45", "throughput": 8661.76, "total_tokens": 34663328} +{"current_steps": 51430, "total_steps": 204665, "loss": 0.0228, "lr": 1.8637796271985532e-06, "epoch": 1.2564434563799378, "percentage": 25.13, "elapsed_time": "1:06:42", "remaining_time": "3:18:44", "throughput": 8661.85, "total_tokens": 34666720} +{"current_steps": 51435, "total_steps": 204665, "loss": 0.1212, "lr": 1.8637366552524632e-06, "epoch": 1.256565607211785, "percentage": 25.13, "elapsed_time": "1:06:42", "remaining_time": "3:18:44", "throughput": 8661.95, "total_tokens": 34670176} +{"current_steps": 51440, "total_steps": 204665, "loss": 0.046, "lr": 1.863693677025048e-06, "epoch": 1.2566877580436322, "percentage": 25.13, "elapsed_time": "1:06:42", "remaining_time": "3:18:43", "throughput": 8661.99, "total_tokens": 34673312} +{"current_steps": 51445, "total_steps": 204665, "loss": 0.1023, "lr": 1.86365069251662e-06, "epoch": 1.2568099088754794, "percentage": 25.14, "elapsed_time": "1:06:43", "remaining_time": "3:18:43", "throughput": 8662.03, "total_tokens": 34676448} +{"current_steps": 51450, "total_steps": 204665, "loss": 0.0908, "lr": 1.8636077017274917e-06, "epoch": 1.2569320597073266, "percentage": 25.14, "elapsed_time": "1:06:43", "remaining_time": "3:18:42", "throughput": 8662.03, "total_tokens": 34679456} +{"current_steps": 51455, "total_steps": 204665, "loss": 0.0017, "lr": 1.8635647046579762e-06, "epoch": 1.2570542105391738, "percentage": 25.14, "elapsed_time": "1:06:43", "remaining_time": "3:18:42", "throughput": 8662.08, "total_tokens": 34682656} +{"current_steps": 51460, "total_steps": 204665, "loss": 0.1742, "lr": 1.863521701308386e-06, "epoch": 1.257176361371021, "percentage": 25.14, "elapsed_time": "1:06:44", "remaining_time": "3:18:41", "throughput": 8662.21, "total_tokens": 34686240} +{"current_steps": 51465, "total_steps": 204665, "loss": 0.1492, "lr": 1.8634786916790332e-06, "epoch": 1.2572985122028681, "percentage": 25.15, "elapsed_time": "1:06:44", "remaining_time": "3:18:41", "throughput": 8662.28, "total_tokens": 34689568} +{"current_steps": 51470, "total_steps": 204665, "loss": 0.0579, "lr": 1.8634356757702316e-06, "epoch": 1.2574206630347153, "percentage": 25.15, "elapsed_time": "1:06:45", "remaining_time": "3:18:40", "throughput": 8662.29, "total_tokens": 34692640} +{"current_steps": 51475, "total_steps": 204665, "loss": 0.0578, "lr": 1.8633926535822932e-06, "epoch": 1.2575428138665625, "percentage": 25.15, "elapsed_time": "1:06:45", "remaining_time": "3:18:39", "throughput": 8662.34, "total_tokens": 34695840} +{"current_steps": 51480, "total_steps": 204665, "loss": 0.0701, "lr": 1.8633496251155314e-06, "epoch": 1.2576649646984097, "percentage": 25.15, "elapsed_time": "1:06:45", "remaining_time": "3:18:39", "throughput": 8662.81, "total_tokens": 34701216} +{"current_steps": 51485, "total_steps": 204665, "loss": 0.1401, "lr": 1.8633065903702588e-06, "epoch": 1.2577871155302567, "percentage": 25.16, "elapsed_time": "1:06:46", "remaining_time": "3:18:39", "throughput": 8662.87, "total_tokens": 34704480} +{"current_steps": 51490, "total_steps": 204665, "loss": 0.0023, "lr": 1.8632635493467887e-06, "epoch": 1.2579092663621039, "percentage": 25.16, "elapsed_time": "1:06:46", "remaining_time": "3:18:38", "throughput": 8662.98, "total_tokens": 34708000} +{"current_steps": 51495, "total_steps": 204665, "loss": 0.0715, "lr": 1.8632205020454336e-06, "epoch": 1.258031417193951, "percentage": 25.16, "elapsed_time": "1:06:46", "remaining_time": "3:18:38", "throughput": 8663.01, "total_tokens": 34711136} +{"current_steps": 51500, "total_steps": 204665, "loss": 0.0838, "lr": 1.8631774484665067e-06, "epoch": 1.2581535680257983, "percentage": 25.16, "elapsed_time": "1:06:47", "remaining_time": "3:18:37", "throughput": 8663.11, "total_tokens": 34714592} +{"current_steps": 51505, "total_steps": 204665, "loss": 0.1563, "lr": 1.8631343886103218e-06, "epoch": 1.2582757188576454, "percentage": 25.17, "elapsed_time": "1:06:47", "remaining_time": "3:18:37", "throughput": 8663.19, "total_tokens": 34717984} +{"current_steps": 51510, "total_steps": 204665, "loss": 0.1384, "lr": 1.863091322477191e-06, "epoch": 1.2583978696894926, "percentage": 25.17, "elapsed_time": "1:06:47", "remaining_time": "3:18:36", "throughput": 8663.22, "total_tokens": 34721184} +{"current_steps": 51515, "total_steps": 204665, "loss": 0.1524, "lr": 1.863048250067428e-06, "epoch": 1.2585200205213398, "percentage": 25.17, "elapsed_time": "1:06:48", "remaining_time": "3:18:36", "throughput": 8663.23, "total_tokens": 34724192} +{"current_steps": 51520, "total_steps": 204665, "loss": 0.0591, "lr": 1.863005171381346e-06, "epoch": 1.2586421713531868, "percentage": 25.17, "elapsed_time": "1:06:48", "remaining_time": "3:18:35", "throughput": 8663.28, "total_tokens": 34727392} +{"current_steps": 51525, "total_steps": 204665, "loss": 0.1017, "lr": 1.8629620864192588e-06, "epoch": 1.258764322185034, "percentage": 25.18, "elapsed_time": "1:06:48", "remaining_time": "3:18:35", "throughput": 8663.57, "total_tokens": 34731808} +{"current_steps": 51530, "total_steps": 204665, "loss": 0.0119, "lr": 1.8629189951814785e-06, "epoch": 1.2588864730168812, "percentage": 25.18, "elapsed_time": "1:06:49", "remaining_time": "3:18:34", "throughput": 8663.63, "total_tokens": 34735072} +{"current_steps": 51535, "total_steps": 204665, "loss": 0.046, "lr": 1.8628758976683195e-06, "epoch": 1.2590086238487284, "percentage": 25.18, "elapsed_time": "1:06:49", "remaining_time": "3:18:34", "throughput": 8663.79, "total_tokens": 34738784} +{"current_steps": 51540, "total_steps": 204665, "loss": 0.0241, "lr": 1.862832793880095e-06, "epoch": 1.2591307746805755, "percentage": 25.18, "elapsed_time": "1:06:50", "remaining_time": "3:18:33", "throughput": 8663.86, "total_tokens": 34742176} +{"current_steps": 51545, "total_steps": 204665, "loss": 0.1454, "lr": 1.8627896838171182e-06, "epoch": 1.2592529255124227, "percentage": 25.19, "elapsed_time": "1:06:50", "remaining_time": "3:18:33", "throughput": 8663.96, "total_tokens": 34745696} +{"current_steps": 51550, "total_steps": 204665, "loss": 0.0427, "lr": 1.8627465674797027e-06, "epoch": 1.25937507634427, "percentage": 25.19, "elapsed_time": "1:06:50", "remaining_time": "3:18:32", "throughput": 8663.96, "total_tokens": 34748704} +{"current_steps": 51555, "total_steps": 204665, "loss": 0.0989, "lr": 1.862703444868162e-06, "epoch": 1.2594972271761171, "percentage": 25.19, "elapsed_time": "1:06:51", "remaining_time": "3:18:32", "throughput": 8664.09, "total_tokens": 34752352} +{"current_steps": 51560, "total_steps": 204665, "loss": 0.0028, "lr": 1.8626603159828101e-06, "epoch": 1.2596193780079643, "percentage": 25.19, "elapsed_time": "1:06:51", "remaining_time": "3:18:31", "throughput": 8664.18, "total_tokens": 34755808} +{"current_steps": 51565, "total_steps": 204665, "loss": 0.1658, "lr": 1.86261718082396e-06, "epoch": 1.2597415288398115, "percentage": 25.19, "elapsed_time": "1:06:51", "remaining_time": "3:18:31", "throughput": 8664.32, "total_tokens": 34759520} +{"current_steps": 51570, "total_steps": 204665, "loss": 0.003, "lr": 1.862574039391926e-06, "epoch": 1.2598636796716587, "percentage": 25.2, "elapsed_time": "1:06:52", "remaining_time": "3:18:30", "throughput": 8664.41, "total_tokens": 34762976} +{"current_steps": 51575, "total_steps": 204665, "loss": 0.0021, "lr": 1.8625308916870215e-06, "epoch": 1.2599858305035057, "percentage": 25.2, "elapsed_time": "1:06:52", "remaining_time": "3:18:30", "throughput": 8664.45, "total_tokens": 34766176} +{"current_steps": 51580, "total_steps": 204665, "loss": 0.157, "lr": 1.8624877377095604e-06, "epoch": 1.2601079813353528, "percentage": 25.2, "elapsed_time": "1:06:52", "remaining_time": "3:18:29", "throughput": 8664.51, "total_tokens": 34769440} +{"current_steps": 51585, "total_steps": 204665, "loss": 0.0354, "lr": 1.8624445774598565e-06, "epoch": 1.2602301321672, "percentage": 25.2, "elapsed_time": "1:06:53", "remaining_time": "3:18:29", "throughput": 8664.54, "total_tokens": 34772576} +{"current_steps": 51590, "total_steps": 204665, "loss": 0.0539, "lr": 1.8624014109382236e-06, "epoch": 1.2603522829990472, "percentage": 25.21, "elapsed_time": "1:06:53", "remaining_time": "3:18:28", "throughput": 8664.71, "total_tokens": 34776352} +{"current_steps": 51595, "total_steps": 204665, "loss": 0.1369, "lr": 1.8623582381449757e-06, "epoch": 1.2604744338308944, "percentage": 25.21, "elapsed_time": "1:06:53", "remaining_time": "3:18:28", "throughput": 8664.76, "total_tokens": 34779616} +{"current_steps": 51600, "total_steps": 204665, "loss": 0.1751, "lr": 1.8623150590804269e-06, "epoch": 1.2605965846627416, "percentage": 25.21, "elapsed_time": "1:06:54", "remaining_time": "3:18:27", "throughput": 8664.86, "total_tokens": 34783136} +{"current_steps": 51605, "total_steps": 204665, "loss": 0.0823, "lr": 1.8622718737448908e-06, "epoch": 1.2607187354945888, "percentage": 25.21, "elapsed_time": "1:06:54", "remaining_time": "3:18:27", "throughput": 8664.91, "total_tokens": 34786400} +{"current_steps": 51610, "total_steps": 204665, "loss": 0.0585, "lr": 1.862228682138682e-06, "epoch": 1.2608408863264358, "percentage": 25.22, "elapsed_time": "1:06:54", "remaining_time": "3:18:26", "throughput": 8664.9, "total_tokens": 34789408} +{"current_steps": 51615, "total_steps": 204665, "loss": 0.0389, "lr": 1.8621854842621141e-06, "epoch": 1.260963037158283, "percentage": 25.22, "elapsed_time": "1:06:55", "remaining_time": "3:18:26", "throughput": 8664.99, "total_tokens": 34792864} +{"current_steps": 51620, "total_steps": 204665, "loss": 0.1365, "lr": 1.8621422801155014e-06, "epoch": 1.2610851879901301, "percentage": 25.22, "elapsed_time": "1:06:55", "remaining_time": "3:18:25", "throughput": 8665.01, "total_tokens": 34796000} +{"current_steps": 51625, "total_steps": 204665, "loss": 0.1355, "lr": 1.8620990696991586e-06, "epoch": 1.2612073388219773, "percentage": 25.22, "elapsed_time": "1:06:56", "remaining_time": "3:18:25", "throughput": 8665.07, "total_tokens": 34799264} +{"current_steps": 51630, "total_steps": 204665, "loss": 0.1614, "lr": 1.862055853013399e-06, "epoch": 1.2613294896538245, "percentage": 25.23, "elapsed_time": "1:06:56", "remaining_time": "3:18:24", "throughput": 8665.13, "total_tokens": 34802528} +{"current_steps": 51635, "total_steps": 204665, "loss": 0.1865, "lr": 1.8620126300585372e-06, "epoch": 1.2614516404856717, "percentage": 25.23, "elapsed_time": "1:06:56", "remaining_time": "3:18:24", "throughput": 8665.23, "total_tokens": 34805984} +{"current_steps": 51640, "total_steps": 204665, "loss": 0.0727, "lr": 1.861969400834888e-06, "epoch": 1.261573791317519, "percentage": 25.23, "elapsed_time": "1:06:57", "remaining_time": "3:18:23", "throughput": 8665.34, "total_tokens": 34809568} +{"current_steps": 51645, "total_steps": 204665, "loss": 0.0692, "lr": 1.8619261653427655e-06, "epoch": 1.261695942149366, "percentage": 25.23, "elapsed_time": "1:06:57", "remaining_time": "3:18:23", "throughput": 8665.47, "total_tokens": 34813216} +{"current_steps": 51650, "total_steps": 204665, "loss": 0.0024, "lr": 1.8618829235824841e-06, "epoch": 1.2618180929812133, "percentage": 25.24, "elapsed_time": "1:06:57", "remaining_time": "3:18:22", "throughput": 8665.52, "total_tokens": 34816480} +{"current_steps": 51655, "total_steps": 204665, "loss": 0.0105, "lr": 1.8618396755543584e-06, "epoch": 1.2619402438130605, "percentage": 25.24, "elapsed_time": "1:06:58", "remaining_time": "3:18:22", "throughput": 8665.6, "total_tokens": 34819872} +{"current_steps": 51660, "total_steps": 204665, "loss": 0.0565, "lr": 1.8617964212587027e-06, "epoch": 1.2620623946449077, "percentage": 25.24, "elapsed_time": "1:06:58", "remaining_time": "3:18:21", "throughput": 8665.66, "total_tokens": 34823200} +{"current_steps": 51665, "total_steps": 204665, "loss": 0.0384, "lr": 1.8617531606958315e-06, "epoch": 1.2621845454767546, "percentage": 25.24, "elapsed_time": "1:06:58", "remaining_time": "3:18:21", "throughput": 8665.73, "total_tokens": 34826528} +{"current_steps": 51670, "total_steps": 204665, "loss": 0.0596, "lr": 1.8617098938660595e-06, "epoch": 1.2623066963086018, "percentage": 25.25, "elapsed_time": "1:06:59", "remaining_time": "3:18:20", "throughput": 8665.77, "total_tokens": 34829728} +{"current_steps": 51675, "total_steps": 204665, "loss": 0.0799, "lr": 1.8616666207697015e-06, "epoch": 1.262428847140449, "percentage": 25.25, "elapsed_time": "1:06:59", "remaining_time": "3:18:20", "throughput": 8665.87, "total_tokens": 34833184} +{"current_steps": 51680, "total_steps": 204665, "loss": 0.0877, "lr": 1.8616233414070721e-06, "epoch": 1.2625509979722962, "percentage": 25.25, "elapsed_time": "1:06:59", "remaining_time": "3:18:19", "throughput": 8665.95, "total_tokens": 34836576} +{"current_steps": 51685, "total_steps": 204665, "loss": 0.098, "lr": 1.861580055778486e-06, "epoch": 1.2626731488041434, "percentage": 25.25, "elapsed_time": "1:07:00", "remaining_time": "3:18:19", "throughput": 8666.1, "total_tokens": 34840352} +{"current_steps": 51690, "total_steps": 204665, "loss": 0.0007, "lr": 1.861536763884258e-06, "epoch": 1.2627952996359906, "percentage": 25.26, "elapsed_time": "1:07:00", "remaining_time": "3:18:19", "throughput": 8666.17, "total_tokens": 34843680} +{"current_steps": 51695, "total_steps": 204665, "loss": 0.1131, "lr": 1.8614934657247028e-06, "epoch": 1.2629174504678378, "percentage": 25.26, "elapsed_time": "1:07:01", "remaining_time": "3:18:18", "throughput": 8666.24, "total_tokens": 34847008} +{"current_steps": 51700, "total_steps": 204665, "loss": 0.0387, "lr": 1.8614501613001354e-06, "epoch": 1.2630396012996847, "percentage": 25.26, "elapsed_time": "1:07:01", "remaining_time": "3:18:18", "throughput": 8666.39, "total_tokens": 34850720} +{"current_steps": 51705, "total_steps": 204665, "loss": 0.162, "lr": 1.8614068506108708e-06, "epoch": 1.263161752131532, "percentage": 25.26, "elapsed_time": "1:07:01", "remaining_time": "3:18:17", "throughput": 8666.53, "total_tokens": 34854432} +{"current_steps": 51710, "total_steps": 204665, "loss": 0.0343, "lr": 1.861363533657224e-06, "epoch": 1.2632839029633791, "percentage": 25.27, "elapsed_time": "1:07:02", "remaining_time": "3:18:17", "throughput": 8666.56, "total_tokens": 34857632} +{"current_steps": 51715, "total_steps": 204665, "loss": 0.0383, "lr": 1.8613202104395098e-06, "epoch": 1.2634060537952263, "percentage": 25.27, "elapsed_time": "1:07:02", "remaining_time": "3:18:16", "throughput": 8666.64, "total_tokens": 34861024} +{"current_steps": 51720, "total_steps": 204665, "loss": 0.0816, "lr": 1.8612768809580435e-06, "epoch": 1.2635282046270735, "percentage": 25.27, "elapsed_time": "1:07:02", "remaining_time": "3:18:16", "throughput": 8666.68, "total_tokens": 34864160} +{"current_steps": 51725, "total_steps": 204665, "loss": 0.1188, "lr": 1.8612335452131398e-06, "epoch": 1.2636503554589207, "percentage": 25.27, "elapsed_time": "1:07:03", "remaining_time": "3:18:15", "throughput": 8666.73, "total_tokens": 34867424} +{"current_steps": 51730, "total_steps": 204665, "loss": 0.1256, "lr": 1.8611902032051141e-06, "epoch": 1.2637725062907679, "percentage": 25.28, "elapsed_time": "1:07:03", "remaining_time": "3:18:15", "throughput": 8666.83, "total_tokens": 34870880} +{"current_steps": 51735, "total_steps": 204665, "loss": 0.0437, "lr": 1.861146854934282e-06, "epoch": 1.263894657122615, "percentage": 25.28, "elapsed_time": "1:07:03", "remaining_time": "3:18:14", "throughput": 8666.96, "total_tokens": 34874464} +{"current_steps": 51740, "total_steps": 204665, "loss": 0.0037, "lr": 1.861103500400958e-06, "epoch": 1.2640168079544623, "percentage": 25.28, "elapsed_time": "1:07:04", "remaining_time": "3:18:14", "throughput": 8667.01, "total_tokens": 34877664} +{"current_steps": 51745, "total_steps": 204665, "loss": 0.1053, "lr": 1.8610601396054579e-06, "epoch": 1.2641389587863094, "percentage": 25.28, "elapsed_time": "1:07:04", "remaining_time": "3:18:13", "throughput": 8667.14, "total_tokens": 34881312} +{"current_steps": 51750, "total_steps": 204665, "loss": 0.0473, "lr": 1.8610167725480967e-06, "epoch": 1.2642611096181566, "percentage": 25.29, "elapsed_time": "1:07:04", "remaining_time": "3:18:13", "throughput": 8667.16, "total_tokens": 34884384} +{"current_steps": 51755, "total_steps": 204665, "loss": 0.0283, "lr": 1.86097339922919e-06, "epoch": 1.2643832604500036, "percentage": 25.29, "elapsed_time": "1:07:05", "remaining_time": "3:18:12", "throughput": 8667.24, "total_tokens": 34887712} +{"current_steps": 51760, "total_steps": 204665, "loss": 0.0492, "lr": 1.8609300196490532e-06, "epoch": 1.2645054112818508, "percentage": 25.29, "elapsed_time": "1:07:05", "remaining_time": "3:18:12", "throughput": 8667.34, "total_tokens": 34891168} +{"current_steps": 51765, "total_steps": 204665, "loss": 0.1076, "lr": 1.8608866338080018e-06, "epoch": 1.264627562113698, "percentage": 25.29, "elapsed_time": "1:07:05", "remaining_time": "3:18:11", "throughput": 8667.47, "total_tokens": 34894688} +{"current_steps": 51770, "total_steps": 204665, "loss": 0.1494, "lr": 1.8608432417063512e-06, "epoch": 1.2647497129455452, "percentage": 25.29, "elapsed_time": "1:07:06", "remaining_time": "3:18:11", "throughput": 8667.49, "total_tokens": 34897760} +{"current_steps": 51775, "total_steps": 204665, "loss": 0.0673, "lr": 1.860799843344417e-06, "epoch": 1.2648718637773924, "percentage": 25.3, "elapsed_time": "1:07:06", "remaining_time": "3:18:10", "throughput": 8667.6, "total_tokens": 34901216} +{"current_steps": 51780, "total_steps": 204665, "loss": 0.0012, "lr": 1.860756438722515e-06, "epoch": 1.2649940146092395, "percentage": 25.3, "elapsed_time": "1:07:06", "remaining_time": "3:18:10", "throughput": 8667.68, "total_tokens": 34904544} +{"current_steps": 51785, "total_steps": 204665, "loss": 0.0723, "lr": 1.8607130278409603e-06, "epoch": 1.2651161654410865, "percentage": 25.3, "elapsed_time": "1:07:07", "remaining_time": "3:18:09", "throughput": 8667.7, "total_tokens": 34907552} +{"current_steps": 51790, "total_steps": 204665, "loss": 0.1587, "lr": 1.8606696107000692e-06, "epoch": 1.2652383162729337, "percentage": 25.3, "elapsed_time": "1:07:07", "remaining_time": "3:18:08", "throughput": 8667.85, "total_tokens": 34911200} +{"current_steps": 51795, "total_steps": 204665, "loss": 0.0537, "lr": 1.860626187300157e-06, "epoch": 1.265360467104781, "percentage": 25.31, "elapsed_time": "1:07:08", "remaining_time": "3:18:08", "throughput": 8668.07, "total_tokens": 34915232} +{"current_steps": 51800, "total_steps": 204665, "loss": 0.1449, "lr": 1.86058275764154e-06, "epoch": 1.265482617936628, "percentage": 25.31, "elapsed_time": "1:07:08", "remaining_time": "3:18:07", "throughput": 8668.09, "total_tokens": 34918304} +{"current_steps": 51805, "total_steps": 204665, "loss": 0.1132, "lr": 1.8605393217245336e-06, "epoch": 1.2656047687684753, "percentage": 25.31, "elapsed_time": "1:07:08", "remaining_time": "3:18:07", "throughput": 8668.27, "total_tokens": 34922144} +{"current_steps": 51810, "total_steps": 204665, "loss": 0.0423, "lr": 1.8604958795494535e-06, "epoch": 1.2657269196003225, "percentage": 25.31, "elapsed_time": "1:07:09", "remaining_time": "3:18:06", "throughput": 8668.28, "total_tokens": 34925152} +{"current_steps": 51815, "total_steps": 204665, "loss": 0.1276, "lr": 1.8604524311166163e-06, "epoch": 1.2658490704321697, "percentage": 25.32, "elapsed_time": "1:07:09", "remaining_time": "3:18:06", "throughput": 8668.39, "total_tokens": 34928608} +{"current_steps": 51820, "total_steps": 204665, "loss": 0.0426, "lr": 1.8604089764263375e-06, "epoch": 1.2659712212640168, "percentage": 25.32, "elapsed_time": "1:07:09", "remaining_time": "3:18:05", "throughput": 8668.43, "total_tokens": 34931744} +{"current_steps": 51825, "total_steps": 204665, "loss": 0.0938, "lr": 1.8603655154789331e-06, "epoch": 1.266093372095864, "percentage": 25.32, "elapsed_time": "1:07:10", "remaining_time": "3:18:05", "throughput": 8668.58, "total_tokens": 34935392} +{"current_steps": 51830, "total_steps": 204665, "loss": 0.2035, "lr": 1.8603220482747192e-06, "epoch": 1.2662155229277112, "percentage": 25.32, "elapsed_time": "1:07:10", "remaining_time": "3:18:04", "throughput": 8668.59, "total_tokens": 34938400} +{"current_steps": 51835, "total_steps": 204665, "loss": 0.227, "lr": 1.8602785748140122e-06, "epoch": 1.2663376737595584, "percentage": 25.33, "elapsed_time": "1:07:10", "remaining_time": "3:18:04", "throughput": 8668.77, "total_tokens": 34942240} +{"current_steps": 51840, "total_steps": 204665, "loss": 0.0861, "lr": 1.8602350950971277e-06, "epoch": 1.2664598245914056, "percentage": 25.33, "elapsed_time": "1:07:11", "remaining_time": "3:18:03", "throughput": 8668.84, "total_tokens": 34945504} +{"current_steps": 51845, "total_steps": 204665, "loss": 0.1287, "lr": 1.8601916091243825e-06, "epoch": 1.2665819754232526, "percentage": 25.33, "elapsed_time": "1:07:11", "remaining_time": "3:18:03", "throughput": 8668.91, "total_tokens": 34948768} +{"current_steps": 51850, "total_steps": 204665, "loss": 0.0453, "lr": 1.8601481168960925e-06, "epoch": 1.2667041262550998, "percentage": 25.33, "elapsed_time": "1:07:11", "remaining_time": "3:18:02", "throughput": 8669.01, "total_tokens": 34952224} +{"current_steps": 51855, "total_steps": 204665, "loss": 0.0872, "lr": 1.860104618412574e-06, "epoch": 1.266826277086947, "percentage": 25.34, "elapsed_time": "1:07:12", "remaining_time": "3:18:02", "throughput": 8669.2, "total_tokens": 34956128} +{"current_steps": 51860, "total_steps": 204665, "loss": 0.0027, "lr": 1.8600611136741432e-06, "epoch": 1.2669484279187941, "percentage": 25.34, "elapsed_time": "1:07:12", "remaining_time": "3:18:01", "throughput": 8669.36, "total_tokens": 34959840} +{"current_steps": 51865, "total_steps": 204665, "loss": 0.1355, "lr": 1.8600176026811169e-06, "epoch": 1.2670705787506413, "percentage": 25.34, "elapsed_time": "1:07:12", "remaining_time": "3:18:01", "throughput": 8669.39, "total_tokens": 34962912} +{"current_steps": 51870, "total_steps": 204665, "loss": 0.1379, "lr": 1.8599740854338112e-06, "epoch": 1.2671927295824885, "percentage": 25.34, "elapsed_time": "1:07:13", "remaining_time": "3:18:00", "throughput": 8669.53, "total_tokens": 34966560} +{"current_steps": 51875, "total_steps": 204665, "loss": 0.009, "lr": 1.8599305619325428e-06, "epoch": 1.2673148804143355, "percentage": 25.35, "elapsed_time": "1:07:13", "remaining_time": "3:18:00", "throughput": 8669.62, "total_tokens": 34969952} +{"current_steps": 51880, "total_steps": 204665, "loss": 0.1323, "lr": 1.8598870321776278e-06, "epoch": 1.2674370312461827, "percentage": 25.35, "elapsed_time": "1:07:13", "remaining_time": "3:17:59", "throughput": 8669.71, "total_tokens": 34973344} +{"current_steps": 51885, "total_steps": 204665, "loss": 0.0956, "lr": 1.8598434961693833e-06, "epoch": 1.2675591820780299, "percentage": 25.35, "elapsed_time": "1:07:14", "remaining_time": "3:17:59", "throughput": 8669.81, "total_tokens": 34976800} +{"current_steps": 51890, "total_steps": 204665, "loss": 0.0017, "lr": 1.8597999539081255e-06, "epoch": 1.267681332909877, "percentage": 25.35, "elapsed_time": "1:07:14", "remaining_time": "3:17:58", "throughput": 8669.95, "total_tokens": 34980384} +{"current_steps": 51895, "total_steps": 204665, "loss": 0.0439, "lr": 1.859756405394171e-06, "epoch": 1.2678034837417242, "percentage": 25.36, "elapsed_time": "1:07:15", "remaining_time": "3:17:58", "throughput": 8670.07, "total_tokens": 34983904} +{"current_steps": 51900, "total_steps": 204665, "loss": 0.1104, "lr": 1.8597128506278365e-06, "epoch": 1.2679256345735714, "percentage": 25.36, "elapsed_time": "1:07:15", "remaining_time": "3:17:57", "throughput": 8670.15, "total_tokens": 34987232} +{"current_steps": 51905, "total_steps": 204665, "loss": 0.2373, "lr": 1.8596692896094394e-06, "epoch": 1.2680477854054186, "percentage": 25.36, "elapsed_time": "1:07:15", "remaining_time": "3:17:57", "throughput": 8670.24, "total_tokens": 34990624} +{"current_steps": 51910, "total_steps": 204665, "loss": 0.0839, "lr": 1.8596257223392959e-06, "epoch": 1.2681699362372658, "percentage": 25.36, "elapsed_time": "1:07:16", "remaining_time": "3:17:56", "throughput": 8670.25, "total_tokens": 34993568} +{"current_steps": 51915, "total_steps": 204665, "loss": 0.0244, "lr": 1.8595821488177228e-06, "epoch": 1.268292087069113, "percentage": 25.37, "elapsed_time": "1:07:16", "remaining_time": "3:17:56", "throughput": 8670.35, "total_tokens": 34996960} +{"current_steps": 51920, "total_steps": 204665, "loss": 0.1089, "lr": 1.8595385690450374e-06, "epoch": 1.2684142379009602, "percentage": 25.37, "elapsed_time": "1:07:16", "remaining_time": "3:17:55", "throughput": 8670.34, "total_tokens": 34999840} +{"current_steps": 51925, "total_steps": 204665, "loss": 0.0253, "lr": 1.8594949830215558e-06, "epoch": 1.2685363887328074, "percentage": 25.37, "elapsed_time": "1:07:17", "remaining_time": "3:17:55", "throughput": 8670.44, "total_tokens": 35003296} +{"current_steps": 51930, "total_steps": 204665, "loss": 0.0893, "lr": 1.859451390747596e-06, "epoch": 1.2686585395646544, "percentage": 25.37, "elapsed_time": "1:07:17", "remaining_time": "3:17:54", "throughput": 8670.5, "total_tokens": 35006496} +{"current_steps": 51935, "total_steps": 204665, "loss": 0.1692, "lr": 1.8594077922234742e-06, "epoch": 1.2687806903965015, "percentage": 25.38, "elapsed_time": "1:07:17", "remaining_time": "3:17:54", "throughput": 8670.53, "total_tokens": 35009568} +{"current_steps": 51940, "total_steps": 204665, "loss": 0.1384, "lr": 1.859364187449508e-06, "epoch": 1.2689028412283487, "percentage": 25.38, "elapsed_time": "1:07:18", "remaining_time": "3:17:53", "throughput": 8670.59, "total_tokens": 35012832} +{"current_steps": 51945, "total_steps": 204665, "loss": 0.1384, "lr": 1.8593205764260142e-06, "epoch": 1.269024992060196, "percentage": 25.38, "elapsed_time": "1:07:18", "remaining_time": "3:17:53", "throughput": 8670.68, "total_tokens": 35016224} +{"current_steps": 51950, "total_steps": 204665, "loss": 0.0345, "lr": 1.8592769591533099e-06, "epoch": 1.2691471428920431, "percentage": 25.38, "elapsed_time": "1:07:18", "remaining_time": "3:17:52", "throughput": 8670.88, "total_tokens": 35020128} +{"current_steps": 51955, "total_steps": 204665, "loss": 0.1079, "lr": 1.8592333356317128e-06, "epoch": 1.2692692937238903, "percentage": 25.39, "elapsed_time": "1:07:19", "remaining_time": "3:17:52", "throughput": 8670.95, "total_tokens": 35023392} +{"current_steps": 51960, "total_steps": 204665, "loss": 0.1429, "lr": 1.8591897058615396e-06, "epoch": 1.2693914445557375, "percentage": 25.39, "elapsed_time": "1:07:19", "remaining_time": "3:17:51", "throughput": 8671.01, "total_tokens": 35026656} +{"current_steps": 51965, "total_steps": 204665, "loss": 0.0967, "lr": 1.8591460698431076e-06, "epoch": 1.2695135953875845, "percentage": 25.39, "elapsed_time": "1:07:19", "remaining_time": "3:17:51", "throughput": 8671.05, "total_tokens": 35029728} +{"current_steps": 51970, "total_steps": 204665, "loss": 0.0196, "lr": 1.8591024275767345e-06, "epoch": 1.2696357462194316, "percentage": 25.39, "elapsed_time": "1:07:20", "remaining_time": "3:17:50", "throughput": 8671.23, "total_tokens": 35033568} +{"current_steps": 51975, "total_steps": 204665, "loss": 0.1069, "lr": 1.8590587790627372e-06, "epoch": 1.2697578970512788, "percentage": 25.4, "elapsed_time": "1:07:20", "remaining_time": "3:17:50", "throughput": 8671.34, "total_tokens": 35037024} +{"current_steps": 51980, "total_steps": 204665, "loss": 0.0019, "lr": 1.8590151243014337e-06, "epoch": 1.269880047883126, "percentage": 25.4, "elapsed_time": "1:07:20", "remaining_time": "3:17:49", "throughput": 8671.48, "total_tokens": 35040608} +{"current_steps": 51985, "total_steps": 204665, "loss": 0.0299, "lr": 1.858971463293141e-06, "epoch": 1.2700021987149732, "percentage": 25.4, "elapsed_time": "1:07:21", "remaining_time": "3:17:49", "throughput": 8671.6, "total_tokens": 35044128} +{"current_steps": 51990, "total_steps": 204665, "loss": 0.0489, "lr": 1.858927796038177e-06, "epoch": 1.2701243495468204, "percentage": 25.4, "elapsed_time": "1:07:21", "remaining_time": "3:17:48", "throughput": 8671.68, "total_tokens": 35047520} +{"current_steps": 51995, "total_steps": 204665, "loss": 0.1065, "lr": 1.8588841225368587e-06, "epoch": 1.2702465003786676, "percentage": 25.4, "elapsed_time": "1:07:21", "remaining_time": "3:17:48", "throughput": 8671.7, "total_tokens": 35050528} +{"current_steps": 52000, "total_steps": 204665, "loss": 0.032, "lr": 1.8588404427895044e-06, "epoch": 1.2703686512105148, "percentage": 25.41, "elapsed_time": "1:07:22", "remaining_time": "3:17:47", "throughput": 8671.75, "total_tokens": 35053728} +{"current_steps": 52005, "total_steps": 204665, "loss": 0.161, "lr": 1.8587967567964312e-06, "epoch": 1.270490802042362, "percentage": 25.41, "elapsed_time": "1:07:22", "remaining_time": "3:17:47", "throughput": 8671.82, "total_tokens": 35056992} +{"current_steps": 52010, "total_steps": 204665, "loss": 0.1344, "lr": 1.858753064557957e-06, "epoch": 1.2706129528742092, "percentage": 25.41, "elapsed_time": "1:07:22", "remaining_time": "3:17:46", "throughput": 8671.93, "total_tokens": 35060448} +{"current_steps": 52015, "total_steps": 204665, "loss": 0.1726, "lr": 1.8587093660743997e-06, "epoch": 1.2707351037060564, "percentage": 25.41, "elapsed_time": "1:07:23", "remaining_time": "3:17:46", "throughput": 8672.06, "total_tokens": 35064032} +{"current_steps": 52020, "total_steps": 204665, "loss": 0.0458, "lr": 1.8586656613460766e-06, "epoch": 1.2708572545379033, "percentage": 25.42, "elapsed_time": "1:07:23", "remaining_time": "3:17:45", "throughput": 8672.13, "total_tokens": 35067296} +{"current_steps": 52025, "total_steps": 204665, "loss": 0.0534, "lr": 1.8586219503733061e-06, "epoch": 1.2709794053697505, "percentage": 25.42, "elapsed_time": "1:07:24", "remaining_time": "3:17:45", "throughput": 8672.13, "total_tokens": 35070240} +{"current_steps": 52030, "total_steps": 204665, "loss": 0.0728, "lr": 1.8585782331564057e-06, "epoch": 1.2711015562015977, "percentage": 25.42, "elapsed_time": "1:07:24", "remaining_time": "3:17:44", "throughput": 8672.23, "total_tokens": 35073632} +{"current_steps": 52035, "total_steps": 204665, "loss": 0.0394, "lr": 1.8585345096956938e-06, "epoch": 1.271223707033445, "percentage": 25.42, "elapsed_time": "1:07:24", "remaining_time": "3:17:43", "throughput": 8672.29, "total_tokens": 35076832} +{"current_steps": 52040, "total_steps": 204665, "loss": 0.0483, "lr": 1.8584907799914874e-06, "epoch": 1.271345857865292, "percentage": 25.43, "elapsed_time": "1:07:25", "remaining_time": "3:17:43", "throughput": 8672.43, "total_tokens": 35080480} +{"current_steps": 52045, "total_steps": 204665, "loss": 0.1918, "lr": 1.858447044044106e-06, "epoch": 1.2714680086971393, "percentage": 25.43, "elapsed_time": "1:07:25", "remaining_time": "3:17:42", "throughput": 8672.5, "total_tokens": 35083744} +{"current_steps": 52050, "total_steps": 204665, "loss": 0.1091, "lr": 1.858403301853866e-06, "epoch": 1.2715901595289865, "percentage": 25.43, "elapsed_time": "1:07:25", "remaining_time": "3:17:42", "throughput": 8672.62, "total_tokens": 35087264} +{"current_steps": 52055, "total_steps": 204665, "loss": 0.1953, "lr": 1.8583595534210868e-06, "epoch": 1.2717123103608334, "percentage": 25.43, "elapsed_time": "1:07:26", "remaining_time": "3:17:41", "throughput": 8672.65, "total_tokens": 35090336} +{"current_steps": 52060, "total_steps": 204665, "loss": 0.0675, "lr": 1.8583157987460859e-06, "epoch": 1.2718344611926806, "percentage": 25.44, "elapsed_time": "1:07:26", "remaining_time": "3:17:41", "throughput": 8672.77, "total_tokens": 35093856} +{"current_steps": 52065, "total_steps": 204665, "loss": 0.0433, "lr": 1.8582720378291817e-06, "epoch": 1.2719566120245278, "percentage": 25.44, "elapsed_time": "1:07:26", "remaining_time": "3:17:41", "throughput": 8673.05, "total_tokens": 35098208} +{"current_steps": 52070, "total_steps": 204665, "loss": 0.002, "lr": 1.8582282706706922e-06, "epoch": 1.272078762856375, "percentage": 25.44, "elapsed_time": "1:07:27", "remaining_time": "3:17:40", "throughput": 8673.09, "total_tokens": 35101280} +{"current_steps": 52075, "total_steps": 204665, "loss": 0.1375, "lr": 1.858184497270936e-06, "epoch": 1.2722009136882222, "percentage": 25.44, "elapsed_time": "1:07:27", "remaining_time": "3:17:39", "throughput": 8673.14, "total_tokens": 35104480} +{"current_steps": 52080, "total_steps": 204665, "loss": 0.0917, "lr": 1.8581407176302313e-06, "epoch": 1.2723230645200694, "percentage": 25.45, "elapsed_time": "1:07:27", "remaining_time": "3:17:39", "throughput": 8673.27, "total_tokens": 35108064} +{"current_steps": 52085, "total_steps": 204665, "loss": 0.0431, "lr": 1.8580969317488964e-06, "epoch": 1.2724452153519166, "percentage": 25.45, "elapsed_time": "1:07:28", "remaining_time": "3:17:38", "throughput": 8673.29, "total_tokens": 35111072} +{"current_steps": 52090, "total_steps": 204665, "loss": 0.0015, "lr": 1.8580531396272501e-06, "epoch": 1.2725673661837638, "percentage": 25.45, "elapsed_time": "1:07:28", "remaining_time": "3:17:38", "throughput": 8673.38, "total_tokens": 35114464} +{"current_steps": 52095, "total_steps": 204665, "loss": 0.083, "lr": 1.8580093412656104e-06, "epoch": 1.272689517015611, "percentage": 25.45, "elapsed_time": "1:07:28", "remaining_time": "3:17:37", "throughput": 8673.52, "total_tokens": 35118112} +{"current_steps": 52100, "total_steps": 204665, "loss": 0.0866, "lr": 1.857965536664296e-06, "epoch": 1.2728116678474581, "percentage": 25.46, "elapsed_time": "1:07:29", "remaining_time": "3:17:37", "throughput": 8673.59, "total_tokens": 35121440} +{"current_steps": 52105, "total_steps": 204665, "loss": 0.1703, "lr": 1.8579217258236254e-06, "epoch": 1.2729338186793053, "percentage": 25.46, "elapsed_time": "1:07:29", "remaining_time": "3:17:36", "throughput": 8673.58, "total_tokens": 35124320} +{"current_steps": 52110, "total_steps": 204665, "loss": 0.0437, "lr": 1.8578779087439172e-06, "epoch": 1.2730559695111523, "percentage": 25.46, "elapsed_time": "1:07:29", "remaining_time": "3:17:36", "throughput": 8673.59, "total_tokens": 35127328} +{"current_steps": 52115, "total_steps": 204665, "loss": 0.0514, "lr": 1.8578340854254902e-06, "epoch": 1.2731781203429995, "percentage": 25.46, "elapsed_time": "1:07:30", "remaining_time": "3:17:35", "throughput": 8673.65, "total_tokens": 35130528} +{"current_steps": 52120, "total_steps": 204665, "loss": 0.0906, "lr": 1.8577902558686631e-06, "epoch": 1.2733002711748467, "percentage": 25.47, "elapsed_time": "1:07:30", "remaining_time": "3:17:35", "throughput": 8673.74, "total_tokens": 35133920} +{"current_steps": 52125, "total_steps": 204665, "loss": 0.1203, "lr": 1.8577464200737544e-06, "epoch": 1.2734224220066939, "percentage": 25.47, "elapsed_time": "1:07:30", "remaining_time": "3:17:34", "throughput": 8673.8, "total_tokens": 35137184} +{"current_steps": 52130, "total_steps": 204665, "loss": 0.0019, "lr": 1.857702578041083e-06, "epoch": 1.273544572838541, "percentage": 25.47, "elapsed_time": "1:07:31", "remaining_time": "3:17:34", "throughput": 8673.91, "total_tokens": 35140704} +{"current_steps": 52135, "total_steps": 204665, "loss": 0.1035, "lr": 1.8576587297709678e-06, "epoch": 1.2736667236703882, "percentage": 25.47, "elapsed_time": "1:07:31", "remaining_time": "3:17:33", "throughput": 8673.97, "total_tokens": 35143968} +{"current_steps": 52140, "total_steps": 204665, "loss": 0.1608, "lr": 1.857614875263728e-06, "epoch": 1.2737888745022354, "percentage": 25.48, "elapsed_time": "1:07:32", "remaining_time": "3:17:33", "throughput": 8674.08, "total_tokens": 35147424} +{"current_steps": 52145, "total_steps": 204665, "loss": 0.0427, "lr": 1.8575710145196817e-06, "epoch": 1.2739110253340824, "percentage": 25.48, "elapsed_time": "1:07:32", "remaining_time": "3:17:32", "throughput": 8674.1, "total_tokens": 35150496} +{"current_steps": 52150, "total_steps": 204665, "loss": 0.2302, "lr": 1.8575271475391484e-06, "epoch": 1.2740331761659296, "percentage": 25.48, "elapsed_time": "1:07:32", "remaining_time": "3:17:32", "throughput": 8674.2, "total_tokens": 35153952} +{"current_steps": 52155, "total_steps": 204665, "loss": 0.0811, "lr": 1.8574832743224471e-06, "epoch": 1.2741553269977768, "percentage": 25.48, "elapsed_time": "1:07:33", "remaining_time": "3:17:31", "throughput": 8674.33, "total_tokens": 35157536} +{"current_steps": 52160, "total_steps": 204665, "loss": 0.0011, "lr": 1.8574393948698967e-06, "epoch": 1.274277477829624, "percentage": 25.49, "elapsed_time": "1:07:33", "remaining_time": "3:17:31", "throughput": 8674.41, "total_tokens": 35160928} +{"current_steps": 52165, "total_steps": 204665, "loss": 0.1059, "lr": 1.8573955091818166e-06, "epoch": 1.2743996286614712, "percentage": 25.49, "elapsed_time": "1:07:33", "remaining_time": "3:17:30", "throughput": 8674.47, "total_tokens": 35164128} +{"current_steps": 52170, "total_steps": 204665, "loss": 0.0718, "lr": 1.8573516172585256e-06, "epoch": 1.2745217794933184, "percentage": 25.49, "elapsed_time": "1:07:34", "remaining_time": "3:17:30", "throughput": 8674.69, "total_tokens": 35168224} +{"current_steps": 52175, "total_steps": 204665, "loss": 0.0203, "lr": 1.8573077191003433e-06, "epoch": 1.2746439303251655, "percentage": 25.49, "elapsed_time": "1:07:34", "remaining_time": "3:17:29", "throughput": 8674.77, "total_tokens": 35171616} +{"current_steps": 52180, "total_steps": 204665, "loss": 0.0592, "lr": 1.857263814707588e-06, "epoch": 1.2747660811570127, "percentage": 25.5, "elapsed_time": "1:07:34", "remaining_time": "3:17:29", "throughput": 8674.86, "total_tokens": 35175008} +{"current_steps": 52185, "total_steps": 204665, "loss": 0.1161, "lr": 1.8572199040805803e-06, "epoch": 1.27488823198886, "percentage": 25.5, "elapsed_time": "1:07:35", "remaining_time": "3:17:28", "throughput": 8674.87, "total_tokens": 35177952} +{"current_steps": 52190, "total_steps": 204665, "loss": 0.1294, "lr": 1.8571759872196386e-06, "epoch": 1.2750103828207071, "percentage": 25.5, "elapsed_time": "1:07:35", "remaining_time": "3:17:28", "throughput": 8675.03, "total_tokens": 35181728} +{"current_steps": 52195, "total_steps": 204665, "loss": 0.1537, "lr": 1.8571320641250829e-06, "epoch": 1.2751325336525543, "percentage": 25.5, "elapsed_time": "1:07:35", "remaining_time": "3:17:27", "throughput": 8675.17, "total_tokens": 35185376} +{"current_steps": 52200, "total_steps": 204665, "loss": 0.0996, "lr": 1.857088134797232e-06, "epoch": 1.2752546844844013, "percentage": 25.51, "elapsed_time": "1:07:36", "remaining_time": "3:17:27", "throughput": 8675.31, "total_tokens": 35189024} +{"current_steps": 52205, "total_steps": 204665, "loss": 0.0464, "lr": 1.8570441992364057e-06, "epoch": 1.2753768353162485, "percentage": 25.51, "elapsed_time": "1:07:36", "remaining_time": "3:17:26", "throughput": 8675.5, "total_tokens": 35192928} +{"current_steps": 52210, "total_steps": 204665, "loss": 0.0576, "lr": 1.8570002574429236e-06, "epoch": 1.2754989861480956, "percentage": 25.51, "elapsed_time": "1:07:36", "remaining_time": "3:17:26", "throughput": 8675.54, "total_tokens": 35196064} +{"current_steps": 52215, "total_steps": 204665, "loss": 0.1012, "lr": 1.8569563094171048e-06, "epoch": 1.2756211369799428, "percentage": 25.51, "elapsed_time": "1:07:37", "remaining_time": "3:17:25", "throughput": 8675.66, "total_tokens": 35199584} +{"current_steps": 52220, "total_steps": 204665, "loss": 0.0752, "lr": 1.8569123551592693e-06, "epoch": 1.27574328781179, "percentage": 25.51, "elapsed_time": "1:07:37", "remaining_time": "3:17:25", "throughput": 8675.69, "total_tokens": 35202720} +{"current_steps": 52225, "total_steps": 204665, "loss": 0.2353, "lr": 1.8568683946697368e-06, "epoch": 1.2758654386436372, "percentage": 25.52, "elapsed_time": "1:07:37", "remaining_time": "3:17:24", "throughput": 8675.77, "total_tokens": 35206112} +{"current_steps": 52230, "total_steps": 204665, "loss": 0.1563, "lr": 1.856824427948827e-06, "epoch": 1.2759875894754844, "percentage": 25.52, "elapsed_time": "1:07:38", "remaining_time": "3:17:24", "throughput": 8675.92, "total_tokens": 35209824} +{"current_steps": 52235, "total_steps": 204665, "loss": 0.0032, "lr": 1.8567804549968593e-06, "epoch": 1.2761097403073314, "percentage": 25.52, "elapsed_time": "1:07:38", "remaining_time": "3:17:23", "throughput": 8676.08, "total_tokens": 35213600} +{"current_steps": 52240, "total_steps": 204665, "loss": 0.0018, "lr": 1.8567364758141539e-06, "epoch": 1.2762318911391786, "percentage": 25.52, "elapsed_time": "1:07:39", "remaining_time": "3:17:23", "throughput": 8676.14, "total_tokens": 35216800} +{"current_steps": 52245, "total_steps": 204665, "loss": 0.1648, "lr": 1.85669249040103e-06, "epoch": 1.2763540419710258, "percentage": 25.53, "elapsed_time": "1:07:39", "remaining_time": "3:17:22", "throughput": 8676.25, "total_tokens": 35220320} +{"current_steps": 52250, "total_steps": 204665, "loss": 0.1565, "lr": 1.8566484987578083e-06, "epoch": 1.276476192802873, "percentage": 25.53, "elapsed_time": "1:07:39", "remaining_time": "3:17:22", "throughput": 8676.25, "total_tokens": 35223264} +{"current_steps": 52255, "total_steps": 204665, "loss": 0.1069, "lr": 1.856604500884808e-06, "epoch": 1.2765983436347201, "percentage": 25.53, "elapsed_time": "1:07:40", "remaining_time": "3:17:21", "throughput": 8676.37, "total_tokens": 35226784} +{"current_steps": 52260, "total_steps": 204665, "loss": 0.0816, "lr": 1.85656049678235e-06, "epoch": 1.2767204944665673, "percentage": 25.53, "elapsed_time": "1:07:40", "remaining_time": "3:17:21", "throughput": 8676.51, "total_tokens": 35230432} +{"current_steps": 52265, "total_steps": 204665, "loss": 0.0797, "lr": 1.856516486450753e-06, "epoch": 1.2768426452984145, "percentage": 25.54, "elapsed_time": "1:07:40", "remaining_time": "3:17:20", "throughput": 8676.54, "total_tokens": 35233504} +{"current_steps": 52270, "total_steps": 204665, "loss": 0.0503, "lr": 1.8564724698903378e-06, "epoch": 1.2769647961302617, "percentage": 25.54, "elapsed_time": "1:07:41", "remaining_time": "3:17:20", "throughput": 8676.58, "total_tokens": 35236640} +{"current_steps": 52275, "total_steps": 204665, "loss": 0.0032, "lr": 1.8564284471014247e-06, "epoch": 1.277086946962109, "percentage": 25.54, "elapsed_time": "1:07:41", "remaining_time": "3:17:19", "throughput": 8676.63, "total_tokens": 35239840} +{"current_steps": 52280, "total_steps": 204665, "loss": 0.0338, "lr": 1.8563844180843335e-06, "epoch": 1.277209097793956, "percentage": 25.54, "elapsed_time": "1:07:41", "remaining_time": "3:17:19", "throughput": 8676.7, "total_tokens": 35243168} +{"current_steps": 52285, "total_steps": 204665, "loss": 0.1058, "lr": 1.8563403828393845e-06, "epoch": 1.2773312486258033, "percentage": 25.55, "elapsed_time": "1:07:42", "remaining_time": "3:17:18", "throughput": 8676.81, "total_tokens": 35246624} +{"current_steps": 52290, "total_steps": 204665, "loss": 0.001, "lr": 1.8562963413668977e-06, "epoch": 1.2774533994576502, "percentage": 25.55, "elapsed_time": "1:07:42", "remaining_time": "3:17:18", "throughput": 8676.84, "total_tokens": 35249696} +{"current_steps": 52295, "total_steps": 204665, "loss": 0.0666, "lr": 1.8562522936671936e-06, "epoch": 1.2775755502894974, "percentage": 25.55, "elapsed_time": "1:07:42", "remaining_time": "3:17:17", "throughput": 8676.88, "total_tokens": 35252896} +{"current_steps": 52300, "total_steps": 204665, "loss": 0.001, "lr": 1.8562082397405927e-06, "epoch": 1.2776977011213446, "percentage": 25.55, "elapsed_time": "1:07:43", "remaining_time": "3:17:17", "throughput": 8676.99, "total_tokens": 35256352} +{"current_steps": 52305, "total_steps": 204665, "loss": 0.0386, "lr": 1.8561641795874153e-06, "epoch": 1.2778198519531918, "percentage": 25.56, "elapsed_time": "1:07:43", "remaining_time": "3:17:16", "throughput": 8677.14, "total_tokens": 35260064} +{"current_steps": 52310, "total_steps": 204665, "loss": 0.1596, "lr": 1.8561201132079814e-06, "epoch": 1.277942002785039, "percentage": 25.56, "elapsed_time": "1:07:43", "remaining_time": "3:17:16", "throughput": 8677.26, "total_tokens": 35263584} +{"current_steps": 52315, "total_steps": 204665, "loss": 0.0415, "lr": 1.8560760406026119e-06, "epoch": 1.2780641536168862, "percentage": 25.56, "elapsed_time": "1:07:44", "remaining_time": "3:17:15", "throughput": 8677.3, "total_tokens": 35266784} +{"current_steps": 52320, "total_steps": 204665, "loss": 0.0582, "lr": 1.8560319617716272e-06, "epoch": 1.2781863044487332, "percentage": 25.56, "elapsed_time": "1:07:44", "remaining_time": "3:17:15", "throughput": 8677.4, "total_tokens": 35270240} +{"current_steps": 52325, "total_steps": 204665, "loss": 0.1099, "lr": 1.8559878767153479e-06, "epoch": 1.2783084552805803, "percentage": 25.57, "elapsed_time": "1:07:44", "remaining_time": "3:17:14", "throughput": 8677.43, "total_tokens": 35273312} +{"current_steps": 52330, "total_steps": 204665, "loss": 0.0055, "lr": 1.8559437854340944e-06, "epoch": 1.2784306061124275, "percentage": 25.57, "elapsed_time": "1:07:45", "remaining_time": "3:17:14", "throughput": 8677.58, "total_tokens": 35277024} +{"current_steps": 52335, "total_steps": 204665, "loss": 0.0301, "lr": 1.8558996879281875e-06, "epoch": 1.2785527569442747, "percentage": 25.57, "elapsed_time": "1:07:45", "remaining_time": "3:17:13", "throughput": 8677.68, "total_tokens": 35280480} +{"current_steps": 52340, "total_steps": 204665, "loss": 0.1198, "lr": 1.8558555841979477e-06, "epoch": 1.278674907776122, "percentage": 25.57, "elapsed_time": "1:07:46", "remaining_time": "3:17:13", "throughput": 8677.92, "total_tokens": 35284640} +{"current_steps": 52345, "total_steps": 204665, "loss": 0.0244, "lr": 1.855811474243696e-06, "epoch": 1.278797058607969, "percentage": 25.58, "elapsed_time": "1:07:46", "remaining_time": "3:17:12", "throughput": 8677.99, "total_tokens": 35287904} +{"current_steps": 52350, "total_steps": 204665, "loss": 0.1092, "lr": 1.855767358065753e-06, "epoch": 1.2789192094398163, "percentage": 25.58, "elapsed_time": "1:07:46", "remaining_time": "3:17:12", "throughput": 8678.05, "total_tokens": 35291168} +{"current_steps": 52355, "total_steps": 204665, "loss": 0.0594, "lr": 1.8557232356644402e-06, "epoch": 1.2790413602716635, "percentage": 25.58, "elapsed_time": "1:07:47", "remaining_time": "3:17:11", "throughput": 8678.2, "total_tokens": 35294880} +{"current_steps": 52360, "total_steps": 204665, "loss": 0.0507, "lr": 1.8556791070400771e-06, "epoch": 1.2791635111035107, "percentage": 25.58, "elapsed_time": "1:07:47", "remaining_time": "3:17:11", "throughput": 8678.29, "total_tokens": 35298272} +{"current_steps": 52365, "total_steps": 204665, "loss": 0.0006, "lr": 1.8556349721929857e-06, "epoch": 1.2792856619353579, "percentage": 25.59, "elapsed_time": "1:07:47", "remaining_time": "3:17:10", "throughput": 8678.37, "total_tokens": 35301600} +{"current_steps": 52370, "total_steps": 204665, "loss": 0.1232, "lr": 1.8555908311234868e-06, "epoch": 1.279407812767205, "percentage": 25.59, "elapsed_time": "1:07:48", "remaining_time": "3:17:10", "throughput": 8678.42, "total_tokens": 35304736} +{"current_steps": 52375, "total_steps": 204665, "loss": 0.0618, "lr": 1.8555466838319012e-06, "epoch": 1.2795299635990522, "percentage": 25.59, "elapsed_time": "1:07:48", "remaining_time": "3:17:09", "throughput": 8678.53, "total_tokens": 35308256} +{"current_steps": 52380, "total_steps": 204665, "loss": 0.0386, "lr": 1.8555025303185497e-06, "epoch": 1.2796521144308992, "percentage": 25.59, "elapsed_time": "1:07:48", "remaining_time": "3:17:09", "throughput": 8678.5, "total_tokens": 35311008} +{"current_steps": 52385, "total_steps": 204665, "loss": 0.0499, "lr": 1.855458370583754e-06, "epoch": 1.2797742652627464, "percentage": 25.6, "elapsed_time": "1:07:49", "remaining_time": "3:17:08", "throughput": 8678.57, "total_tokens": 35314272} +{"current_steps": 52390, "total_steps": 204665, "loss": 0.187, "lr": 1.8554142046278347e-06, "epoch": 1.2798964160945936, "percentage": 25.6, "elapsed_time": "1:07:49", "remaining_time": "3:17:08", "throughput": 8678.65, "total_tokens": 35317600} +{"current_steps": 52395, "total_steps": 204665, "loss": 0.0216, "lr": 1.8553700324511132e-06, "epoch": 1.2800185669264408, "percentage": 25.6, "elapsed_time": "1:07:49", "remaining_time": "3:17:07", "throughput": 8678.71, "total_tokens": 35320864} +{"current_steps": 52400, "total_steps": 204665, "loss": 0.0869, "lr": 1.8553258540539111e-06, "epoch": 1.280140717758288, "percentage": 25.6, "elapsed_time": "1:07:50", "remaining_time": "3:17:07", "throughput": 8678.74, "total_tokens": 35323936} +{"current_steps": 52405, "total_steps": 204665, "loss": 0.1137, "lr": 1.8552816694365489e-06, "epoch": 1.2802628685901352, "percentage": 25.61, "elapsed_time": "1:07:50", "remaining_time": "3:17:06", "throughput": 8678.8, "total_tokens": 35327136} +{"current_steps": 52410, "total_steps": 204665, "loss": 0.0907, "lr": 1.8552374785993487e-06, "epoch": 1.2803850194219821, "percentage": 25.61, "elapsed_time": "1:07:50", "remaining_time": "3:17:06", "throughput": 8678.93, "total_tokens": 35330720} +{"current_steps": 52415, "total_steps": 204665, "loss": 0.0615, "lr": 1.8551932815426315e-06, "epoch": 1.2805071702538293, "percentage": 25.61, "elapsed_time": "1:07:51", "remaining_time": "3:17:05", "throughput": 8678.98, "total_tokens": 35333920} +{"current_steps": 52420, "total_steps": 204665, "loss": 0.0016, "lr": 1.8551490782667188e-06, "epoch": 1.2806293210856765, "percentage": 25.61, "elapsed_time": "1:07:51", "remaining_time": "3:17:05", "throughput": 8679.11, "total_tokens": 35337504} +{"current_steps": 52425, "total_steps": 204665, "loss": 0.0842, "lr": 1.8551048687719315e-06, "epoch": 1.2807514719175237, "percentage": 25.62, "elapsed_time": "1:07:51", "remaining_time": "3:17:04", "throughput": 8679.12, "total_tokens": 35340512} +{"current_steps": 52430, "total_steps": 204665, "loss": 0.0873, "lr": 1.8550606530585922e-06, "epoch": 1.2808736227493709, "percentage": 25.62, "elapsed_time": "1:07:52", "remaining_time": "3:17:04", "throughput": 8679.11, "total_tokens": 35343392} +{"current_steps": 52435, "total_steps": 204665, "loss": 0.0441, "lr": 1.8550164311270215e-06, "epoch": 1.280995773581218, "percentage": 25.62, "elapsed_time": "1:07:52", "remaining_time": "3:17:03", "throughput": 8679.2, "total_tokens": 35346784} +{"current_steps": 52440, "total_steps": 204665, "loss": 0.0698, "lr": 1.8549722029775414e-06, "epoch": 1.2811179244130653, "percentage": 25.62, "elapsed_time": "1:07:52", "remaining_time": "3:17:03", "throughput": 8679.26, "total_tokens": 35350048} +{"current_steps": 52445, "total_steps": 204665, "loss": 0.0022, "lr": 1.8549279686104734e-06, "epoch": 1.2812400752449125, "percentage": 25.62, "elapsed_time": "1:07:53", "remaining_time": "3:17:02", "throughput": 8679.37, "total_tokens": 35353568} +{"current_steps": 52450, "total_steps": 204665, "loss": 0.0998, "lr": 1.8548837280261393e-06, "epoch": 1.2813622260767596, "percentage": 25.63, "elapsed_time": "1:07:53", "remaining_time": "3:17:02", "throughput": 8679.45, "total_tokens": 35356896} +{"current_steps": 52455, "total_steps": 204665, "loss": 0.095, "lr": 1.8548394812248612e-06, "epoch": 1.2814843769086068, "percentage": 25.63, "elapsed_time": "1:07:53", "remaining_time": "3:17:01", "throughput": 8679.55, "total_tokens": 35360288} +{"current_steps": 52460, "total_steps": 204665, "loss": 0.1571, "lr": 1.85479522820696e-06, "epoch": 1.281606527740454, "percentage": 25.63, "elapsed_time": "1:07:54", "remaining_time": "3:17:01", "throughput": 8679.56, "total_tokens": 35363296} +{"current_steps": 52465, "total_steps": 204665, "loss": 0.1396, "lr": 1.854750968972758e-06, "epoch": 1.281728678572301, "percentage": 25.63, "elapsed_time": "1:07:54", "remaining_time": "3:17:00", "throughput": 8679.7, "total_tokens": 35366880} +{"current_steps": 52470, "total_steps": 204665, "loss": 0.1012, "lr": 1.8547067035225775e-06, "epoch": 1.2818508294041482, "percentage": 25.64, "elapsed_time": "1:07:55", "remaining_time": "3:17:00", "throughput": 8679.81, "total_tokens": 35370400} +{"current_steps": 52475, "total_steps": 204665, "loss": 0.0727, "lr": 1.8546624318567395e-06, "epoch": 1.2819729802359954, "percentage": 25.64, "elapsed_time": "1:07:55", "remaining_time": "3:16:59", "throughput": 8679.9, "total_tokens": 35373792} +{"current_steps": 52480, "total_steps": 204665, "loss": 0.0043, "lr": 1.8546181539755665e-06, "epoch": 1.2820951310678426, "percentage": 25.64, "elapsed_time": "1:07:55", "remaining_time": "3:16:59", "throughput": 8679.96, "total_tokens": 35377056} +{"current_steps": 52485, "total_steps": 204665, "loss": 0.074, "lr": 1.8545738698793807e-06, "epoch": 1.2822172818996898, "percentage": 25.64, "elapsed_time": "1:07:56", "remaining_time": "3:16:58", "throughput": 8680.08, "total_tokens": 35380576} +{"current_steps": 52490, "total_steps": 204665, "loss": 0.0824, "lr": 1.8545295795685033e-06, "epoch": 1.282339432731537, "percentage": 25.65, "elapsed_time": "1:07:56", "remaining_time": "3:16:58", "throughput": 8680.25, "total_tokens": 35384352} +{"current_steps": 52495, "total_steps": 204665, "loss": 0.0362, "lr": 1.8544852830432576e-06, "epoch": 1.2824615835633841, "percentage": 25.65, "elapsed_time": "1:07:56", "remaining_time": "3:16:57", "throughput": 8680.32, "total_tokens": 35387616} +{"current_steps": 52500, "total_steps": 204665, "loss": 0.0025, "lr": 1.8544409803039647e-06, "epoch": 1.282583734395231, "percentage": 25.65, "elapsed_time": "1:07:57", "remaining_time": "3:16:57", "throughput": 8680.42, "total_tokens": 35391072} +{"current_steps": 52505, "total_steps": 204665, "loss": 0.2443, "lr": 1.8543966713509472e-06, "epoch": 1.2827058852270783, "percentage": 25.65, "elapsed_time": "1:07:57", "remaining_time": "3:16:56", "throughput": 8680.59, "total_tokens": 35394848} +{"current_steps": 52510, "total_steps": 204665, "loss": 0.0889, "lr": 1.8543523561845276e-06, "epoch": 1.2828280360589255, "percentage": 25.66, "elapsed_time": "1:07:57", "remaining_time": "3:16:56", "throughput": 8680.68, "total_tokens": 35398240} +{"current_steps": 52515, "total_steps": 204665, "loss": 0.0338, "lr": 1.8543080348050274e-06, "epoch": 1.2829501868907727, "percentage": 25.66, "elapsed_time": "1:07:58", "remaining_time": "3:16:55", "throughput": 8680.77, "total_tokens": 35401632} +{"current_steps": 52520, "total_steps": 204665, "loss": 0.0603, "lr": 1.8542637072127695e-06, "epoch": 1.2830723377226199, "percentage": 25.66, "elapsed_time": "1:07:58", "remaining_time": "3:16:55", "throughput": 8680.78, "total_tokens": 35404640} +{"current_steps": 52525, "total_steps": 204665, "loss": 0.0614, "lr": 1.8542193734080764e-06, "epoch": 1.283194488554467, "percentage": 25.66, "elapsed_time": "1:07:58", "remaining_time": "3:16:54", "throughput": 8680.77, "total_tokens": 35407456} +{"current_steps": 52530, "total_steps": 204665, "loss": 0.1687, "lr": 1.8541750333912703e-06, "epoch": 1.2833166393863142, "percentage": 25.67, "elapsed_time": "1:07:59", "remaining_time": "3:16:53", "throughput": 8680.85, "total_tokens": 35410784} +{"current_steps": 52535, "total_steps": 204665, "loss": 0.0623, "lr": 1.8541306871626733e-06, "epoch": 1.2834387902181614, "percentage": 25.67, "elapsed_time": "1:07:59", "remaining_time": "3:16:53", "throughput": 8680.88, "total_tokens": 35413856} +{"current_steps": 52540, "total_steps": 204665, "loss": 0.1718, "lr": 1.8540863347226084e-06, "epoch": 1.2835609410500086, "percentage": 25.67, "elapsed_time": "1:07:59", "remaining_time": "3:16:52", "throughput": 8680.93, "total_tokens": 35417056} +{"current_steps": 52545, "total_steps": 204665, "loss": 0.1321, "lr": 1.8540419760713979e-06, "epoch": 1.2836830918818558, "percentage": 25.67, "elapsed_time": "1:08:00", "remaining_time": "3:16:52", "throughput": 8680.97, "total_tokens": 35420192} +{"current_steps": 52550, "total_steps": 204665, "loss": 0.0411, "lr": 1.8539976112093644e-06, "epoch": 1.283805242713703, "percentage": 25.68, "elapsed_time": "1:08:00", "remaining_time": "3:16:51", "throughput": 8681.09, "total_tokens": 35423712} +{"current_steps": 52555, "total_steps": 204665, "loss": 0.1122, "lr": 1.853953240136831e-06, "epoch": 1.28392739354555, "percentage": 25.68, "elapsed_time": "1:08:00", "remaining_time": "3:16:51", "throughput": 8681.14, "total_tokens": 35426848} +{"current_steps": 52560, "total_steps": 204665, "loss": 0.0471, "lr": 1.8539088628541193e-06, "epoch": 1.2840495443773972, "percentage": 25.68, "elapsed_time": "1:08:01", "remaining_time": "3:16:50", "throughput": 8681.18, "total_tokens": 35429984} +{"current_steps": 52565, "total_steps": 204665, "loss": 0.0956, "lr": 1.8538644793615532e-06, "epoch": 1.2841716952092443, "percentage": 25.68, "elapsed_time": "1:08:01", "remaining_time": "3:16:50", "throughput": 8681.23, "total_tokens": 35433184} +{"current_steps": 52570, "total_steps": 204665, "loss": 0.0697, "lr": 1.8538200896594546e-06, "epoch": 1.2842938460410915, "percentage": 25.69, "elapsed_time": "1:08:01", "remaining_time": "3:16:49", "throughput": 8681.32, "total_tokens": 35436576} +{"current_steps": 52575, "total_steps": 204665, "loss": 0.0124, "lr": 1.8537756937481465e-06, "epoch": 1.2844159968729387, "percentage": 25.69, "elapsed_time": "1:08:02", "remaining_time": "3:16:49", "throughput": 8681.47, "total_tokens": 35440288} +{"current_steps": 52580, "total_steps": 204665, "loss": 0.1329, "lr": 1.8537312916279524e-06, "epoch": 1.284538147704786, "percentage": 25.69, "elapsed_time": "1:08:02", "remaining_time": "3:16:48", "throughput": 8681.53, "total_tokens": 35443488} +{"current_steps": 52585, "total_steps": 204665, "loss": 0.0242, "lr": 1.8536868832991946e-06, "epoch": 1.284660298536633, "percentage": 25.69, "elapsed_time": "1:08:02", "remaining_time": "3:16:48", "throughput": 8681.66, "total_tokens": 35447072} +{"current_steps": 52590, "total_steps": 204665, "loss": 0.0297, "lr": 1.8536424687621958e-06, "epoch": 1.28478244936848, "percentage": 25.7, "elapsed_time": "1:08:03", "remaining_time": "3:16:47", "throughput": 8681.78, "total_tokens": 35450592} +{"current_steps": 52595, "total_steps": 204665, "loss": 0.0983, "lr": 1.8535980480172797e-06, "epoch": 1.2849046002003273, "percentage": 25.7, "elapsed_time": "1:08:03", "remaining_time": "3:16:47", "throughput": 8681.86, "total_tokens": 35453920} +{"current_steps": 52600, "total_steps": 204665, "loss": 0.046, "lr": 1.8535536210647691e-06, "epoch": 1.2850267510321745, "percentage": 25.7, "elapsed_time": "1:08:04", "remaining_time": "3:16:46", "throughput": 8681.87, "total_tokens": 35456928} +{"current_steps": 52605, "total_steps": 204665, "loss": 0.1515, "lr": 1.8535091879049868e-06, "epoch": 1.2851489018640216, "percentage": 25.7, "elapsed_time": "1:08:04", "remaining_time": "3:16:46", "throughput": 8681.94, "total_tokens": 35460192} +{"current_steps": 52610, "total_steps": 204665, "loss": 0.1838, "lr": 1.8534647485382561e-06, "epoch": 1.2852710526958688, "percentage": 25.71, "elapsed_time": "1:08:04", "remaining_time": "3:16:45", "throughput": 8682.06, "total_tokens": 35463712} +{"current_steps": 52615, "total_steps": 204665, "loss": 0.0022, "lr": 1.8534203029649002e-06, "epoch": 1.285393203527716, "percentage": 25.71, "elapsed_time": "1:08:05", "remaining_time": "3:16:45", "throughput": 8682.08, "total_tokens": 35466720} +{"current_steps": 52620, "total_steps": 204665, "loss": 0.0768, "lr": 1.8533758511852424e-06, "epoch": 1.2855153543595632, "percentage": 25.71, "elapsed_time": "1:08:05", "remaining_time": "3:16:44", "throughput": 8682.18, "total_tokens": 35470176} +{"current_steps": 52625, "total_steps": 204665, "loss": 0.0344, "lr": 1.853331393199606e-06, "epoch": 1.2856375051914104, "percentage": 25.71, "elapsed_time": "1:08:05", "remaining_time": "3:16:44", "throughput": 8682.27, "total_tokens": 35473568} +{"current_steps": 52630, "total_steps": 204665, "loss": 0.0547, "lr": 1.8532869290083139e-06, "epoch": 1.2857596560232576, "percentage": 25.72, "elapsed_time": "1:08:06", "remaining_time": "3:16:43", "throughput": 8682.48, "total_tokens": 35477536} +{"current_steps": 52635, "total_steps": 204665, "loss": 0.1291, "lr": 1.8532424586116899e-06, "epoch": 1.2858818068551048, "percentage": 25.72, "elapsed_time": "1:08:06", "remaining_time": "3:16:43", "throughput": 8682.48, "total_tokens": 35480480} +{"current_steps": 52640, "total_steps": 204665, "loss": 0.1439, "lr": 1.8531979820100574e-06, "epoch": 1.286003957686952, "percentage": 25.72, "elapsed_time": "1:08:06", "remaining_time": "3:16:42", "throughput": 8682.59, "total_tokens": 35484000} +{"current_steps": 52645, "total_steps": 204665, "loss": 0.0509, "lr": 1.8531534992037395e-06, "epoch": 1.286126108518799, "percentage": 25.72, "elapsed_time": "1:08:07", "remaining_time": "3:16:42", "throughput": 8682.61, "total_tokens": 35487008} +{"current_steps": 52650, "total_steps": 204665, "loss": 0.0082, "lr": 1.8531090101930595e-06, "epoch": 1.2862482593506461, "percentage": 25.72, "elapsed_time": "1:08:07", "remaining_time": "3:16:41", "throughput": 8682.72, "total_tokens": 35490528} +{"current_steps": 52655, "total_steps": 204665, "loss": 0.1251, "lr": 1.853064514978342e-06, "epoch": 1.2863704101824933, "percentage": 25.73, "elapsed_time": "1:08:07", "remaining_time": "3:16:41", "throughput": 8682.78, "total_tokens": 35493728} +{"current_steps": 52660, "total_steps": 204665, "loss": 0.0998, "lr": 1.8530200135599095e-06, "epoch": 1.2864925610143405, "percentage": 25.73, "elapsed_time": "1:08:08", "remaining_time": "3:16:40", "throughput": 8682.79, "total_tokens": 35496736} +{"current_steps": 52665, "total_steps": 204665, "loss": 0.0935, "lr": 1.8529755059380863e-06, "epoch": 1.2866147118461877, "percentage": 25.73, "elapsed_time": "1:08:08", "remaining_time": "3:16:40", "throughput": 8682.85, "total_tokens": 35500000} +{"current_steps": 52670, "total_steps": 204665, "loss": 0.0905, "lr": 1.8529309921131954e-06, "epoch": 1.2867368626780349, "percentage": 25.73, "elapsed_time": "1:08:08", "remaining_time": "3:16:39", "throughput": 8682.86, "total_tokens": 35503008} +{"current_steps": 52675, "total_steps": 204665, "loss": 0.0017, "lr": 1.8528864720855613e-06, "epoch": 1.286859013509882, "percentage": 25.74, "elapsed_time": "1:08:09", "remaining_time": "3:16:39", "throughput": 8682.94, "total_tokens": 35506336} +{"current_steps": 52680, "total_steps": 204665, "loss": 0.0016, "lr": 1.8528419458555072e-06, "epoch": 1.286981164341729, "percentage": 25.74, "elapsed_time": "1:08:09", "remaining_time": "3:16:38", "throughput": 8683.11, "total_tokens": 35510112} +{"current_steps": 52685, "total_steps": 204665, "loss": 0.0415, "lr": 1.8527974134233571e-06, "epoch": 1.2871033151735762, "percentage": 25.74, "elapsed_time": "1:08:09", "remaining_time": "3:16:38", "throughput": 8683.11, "total_tokens": 35513056} +{"current_steps": 52690, "total_steps": 204665, "loss": 0.0571, "lr": 1.8527528747894347e-06, "epoch": 1.2872254660054234, "percentage": 25.74, "elapsed_time": "1:08:10", "remaining_time": "3:16:37", "throughput": 8683.27, "total_tokens": 35516832} +{"current_steps": 52695, "total_steps": 204665, "loss": 0.0006, "lr": 1.8527083299540641e-06, "epoch": 1.2873476168372706, "percentage": 25.75, "elapsed_time": "1:08:10", "remaining_time": "3:16:37", "throughput": 8683.33, "total_tokens": 35520096} +{"current_steps": 52700, "total_steps": 204665, "loss": 0.1375, "lr": 1.8526637789175696e-06, "epoch": 1.2874697676691178, "percentage": 25.75, "elapsed_time": "1:08:10", "remaining_time": "3:16:36", "throughput": 8683.5, "total_tokens": 35523872} +{"current_steps": 52705, "total_steps": 204665, "loss": 0.173, "lr": 1.8526192216802742e-06, "epoch": 1.287591918500965, "percentage": 25.75, "elapsed_time": "1:08:11", "remaining_time": "3:16:36", "throughput": 8683.7, "total_tokens": 35527840} +{"current_steps": 52710, "total_steps": 204665, "loss": 0.1101, "lr": 1.8525746582425028e-06, "epoch": 1.2877140693328122, "percentage": 25.75, "elapsed_time": "1:08:11", "remaining_time": "3:16:35", "throughput": 8683.74, "total_tokens": 35530976} +{"current_steps": 52715, "total_steps": 204665, "loss": 0.2526, "lr": 1.8525300886045792e-06, "epoch": 1.2878362201646594, "percentage": 25.76, "elapsed_time": "1:08:12", "remaining_time": "3:16:35", "throughput": 8683.85, "total_tokens": 35534432} +{"current_steps": 52720, "total_steps": 204665, "loss": 0.1337, "lr": 1.8524855127668272e-06, "epoch": 1.2879583709965066, "percentage": 25.76, "elapsed_time": "1:08:12", "remaining_time": "3:16:34", "throughput": 8683.85, "total_tokens": 35537312} +{"current_steps": 52725, "total_steps": 204665, "loss": 0.0739, "lr": 1.8524409307295716e-06, "epoch": 1.2880805218283538, "percentage": 25.76, "elapsed_time": "1:08:12", "remaining_time": "3:16:34", "throughput": 8683.92, "total_tokens": 35540640} +{"current_steps": 52730, "total_steps": 204665, "loss": 0.0419, "lr": 1.8523963424931361e-06, "epoch": 1.288202672660201, "percentage": 25.76, "elapsed_time": "1:08:13", "remaining_time": "3:16:33", "throughput": 8683.96, "total_tokens": 35543776} +{"current_steps": 52735, "total_steps": 204665, "loss": 0.0466, "lr": 1.852351748057845e-06, "epoch": 1.288324823492048, "percentage": 25.77, "elapsed_time": "1:08:13", "remaining_time": "3:16:33", "throughput": 8684.04, "total_tokens": 35547104} +{"current_steps": 52740, "total_steps": 204665, "loss": 0.0577, "lr": 1.8523071474240228e-06, "epoch": 1.288446974323895, "percentage": 25.77, "elapsed_time": "1:08:13", "remaining_time": "3:16:32", "throughput": 8684.1, "total_tokens": 35550368} +{"current_steps": 52745, "total_steps": 204665, "loss": 0.1469, "lr": 1.8522625405919938e-06, "epoch": 1.2885691251557423, "percentage": 25.77, "elapsed_time": "1:08:14", "remaining_time": "3:16:32", "throughput": 8684.1, "total_tokens": 35553248} +{"current_steps": 52750, "total_steps": 204665, "loss": 0.0592, "lr": 1.8522179275620825e-06, "epoch": 1.2886912759875895, "percentage": 25.77, "elapsed_time": "1:08:14", "remaining_time": "3:16:31", "throughput": 8684.12, "total_tokens": 35556256} +{"current_steps": 52755, "total_steps": 204665, "loss": 0.1206, "lr": 1.8521733083346131e-06, "epoch": 1.2888134268194367, "percentage": 25.78, "elapsed_time": "1:08:14", "remaining_time": "3:16:30", "throughput": 8684.22, "total_tokens": 35559712} +{"current_steps": 52760, "total_steps": 204665, "loss": 0.0855, "lr": 1.8521286829099104e-06, "epoch": 1.2889355776512839, "percentage": 25.78, "elapsed_time": "1:08:15", "remaining_time": "3:16:30", "throughput": 8684.22, "total_tokens": 35562656} +{"current_steps": 52765, "total_steps": 204665, "loss": 0.1328, "lr": 1.8520840512882985e-06, "epoch": 1.289057728483131, "percentage": 25.78, "elapsed_time": "1:08:15", "remaining_time": "3:16:29", "throughput": 8684.39, "total_tokens": 35566432} +{"current_steps": 52770, "total_steps": 204665, "loss": 0.0984, "lr": 1.8520394134701022e-06, "epoch": 1.289179879314978, "percentage": 25.78, "elapsed_time": "1:08:15", "remaining_time": "3:16:29", "throughput": 8684.52, "total_tokens": 35569952} +{"current_steps": 52775, "total_steps": 204665, "loss": 0.0838, "lr": 1.8519947694556461e-06, "epoch": 1.2893020301468252, "percentage": 25.79, "elapsed_time": "1:08:16", "remaining_time": "3:16:28", "throughput": 8684.55, "total_tokens": 35573024} +{"current_steps": 52780, "total_steps": 204665, "loss": 0.1193, "lr": 1.8519501192452548e-06, "epoch": 1.2894241809786724, "percentage": 25.79, "elapsed_time": "1:08:16", "remaining_time": "3:16:28", "throughput": 8684.63, "total_tokens": 35576352} +{"current_steps": 52785, "total_steps": 204665, "loss": 0.0345, "lr": 1.8519054628392535e-06, "epoch": 1.2895463318105196, "percentage": 25.79, "elapsed_time": "1:08:16", "remaining_time": "3:16:27", "throughput": 8684.68, "total_tokens": 35579552} +{"current_steps": 52790, "total_steps": 204665, "loss": 0.1339, "lr": 1.8518608002379664e-06, "epoch": 1.2896684826423668, "percentage": 25.79, "elapsed_time": "1:08:17", "remaining_time": "3:16:27", "throughput": 8684.83, "total_tokens": 35583264} +{"current_steps": 52795, "total_steps": 204665, "loss": 0.0316, "lr": 1.8518161314417181e-06, "epoch": 1.289790633474214, "percentage": 25.8, "elapsed_time": "1:08:17", "remaining_time": "3:16:26", "throughput": 8684.91, "total_tokens": 35586592} +{"current_steps": 52800, "total_steps": 204665, "loss": 0.0029, "lr": 1.851771456450834e-06, "epoch": 1.2899127843060612, "percentage": 25.8, "elapsed_time": "1:08:17", "remaining_time": "3:16:26", "throughput": 8684.97, "total_tokens": 35589792} +{"current_steps": 52805, "total_steps": 204665, "loss": 0.0027, "lr": 1.8517267752656387e-06, "epoch": 1.2900349351379083, "percentage": 25.8, "elapsed_time": "1:08:18", "remaining_time": "3:16:25", "throughput": 8685.02, "total_tokens": 35592992} +{"current_steps": 52810, "total_steps": 204665, "loss": 0.0313, "lr": 1.8516820878864574e-06, "epoch": 1.2901570859697555, "percentage": 25.8, "elapsed_time": "1:08:18", "remaining_time": "3:16:25", "throughput": 8685.17, "total_tokens": 35596704} +{"current_steps": 52815, "total_steps": 204665, "loss": 0.1372, "lr": 1.8516373943136147e-06, "epoch": 1.2902792368016027, "percentage": 25.81, "elapsed_time": "1:08:18", "remaining_time": "3:16:24", "throughput": 8685.27, "total_tokens": 35600160} +{"current_steps": 52820, "total_steps": 204665, "loss": 0.0029, "lr": 1.8515926945474357e-06, "epoch": 1.29040138763345, "percentage": 25.81, "elapsed_time": "1:08:19", "remaining_time": "3:16:24", "throughput": 8685.27, "total_tokens": 35603104} +{"current_steps": 52825, "total_steps": 204665, "loss": 0.1577, "lr": 1.851547988588246e-06, "epoch": 1.2905235384652969, "percentage": 25.81, "elapsed_time": "1:08:19", "remaining_time": "3:16:23", "throughput": 8685.35, "total_tokens": 35606432} +{"current_steps": 52830, "total_steps": 204665, "loss": 0.2623, "lr": 1.8515032764363698e-06, "epoch": 1.290645689297144, "percentage": 25.81, "elapsed_time": "1:08:19", "remaining_time": "3:16:23", "throughput": 8685.5, "total_tokens": 35610080} +{"current_steps": 52835, "total_steps": 204665, "loss": 0.1614, "lr": 1.8514585580921328e-06, "epoch": 1.2907678401289913, "percentage": 25.82, "elapsed_time": "1:08:20", "remaining_time": "3:16:22", "throughput": 8685.56, "total_tokens": 35613344} +{"current_steps": 52840, "total_steps": 204665, "loss": 0.1081, "lr": 1.8514138335558604e-06, "epoch": 1.2908899909608385, "percentage": 25.82, "elapsed_time": "1:08:20", "remaining_time": "3:16:22", "throughput": 8685.63, "total_tokens": 35616608} +{"current_steps": 52845, "total_steps": 204665, "loss": 0.001, "lr": 1.8513691028278776e-06, "epoch": 1.2910121417926856, "percentage": 25.82, "elapsed_time": "1:08:20", "remaining_time": "3:16:21", "throughput": 8685.66, "total_tokens": 35619744} +{"current_steps": 52850, "total_steps": 204665, "loss": 0.0517, "lr": 1.8513243659085097e-06, "epoch": 1.2911342926245328, "percentage": 25.82, "elapsed_time": "1:08:21", "remaining_time": "3:16:21", "throughput": 8685.78, "total_tokens": 35623264} +{"current_steps": 52855, "total_steps": 204665, "loss": 0.0561, "lr": 1.8512796227980818e-06, "epoch": 1.29125644345638, "percentage": 25.83, "elapsed_time": "1:08:21", "remaining_time": "3:16:20", "throughput": 8685.88, "total_tokens": 35626720} +{"current_steps": 52860, "total_steps": 204665, "loss": 0.0614, "lr": 1.8512348734969196e-06, "epoch": 1.291378594288227, "percentage": 25.83, "elapsed_time": "1:08:22", "remaining_time": "3:16:20", "throughput": 8685.91, "total_tokens": 35629792} +{"current_steps": 52865, "total_steps": 204665, "loss": 0.1245, "lr": 1.8511901180053485e-06, "epoch": 1.2915007451200742, "percentage": 25.83, "elapsed_time": "1:08:22", "remaining_time": "3:16:19", "throughput": 8686.0, "total_tokens": 35633184} +{"current_steps": 52870, "total_steps": 204665, "loss": 0.04, "lr": 1.8511453563236938e-06, "epoch": 1.2916228959519214, "percentage": 25.83, "elapsed_time": "1:08:22", "remaining_time": "3:16:19", "throughput": 8686.11, "total_tokens": 35636704} +{"current_steps": 52875, "total_steps": 204665, "loss": 0.0387, "lr": 1.8511005884522813e-06, "epoch": 1.2917450467837686, "percentage": 25.83, "elapsed_time": "1:08:23", "remaining_time": "3:16:18", "throughput": 8686.25, "total_tokens": 35640352} +{"current_steps": 52880, "total_steps": 204665, "loss": 0.0359, "lr": 1.8510558143914363e-06, "epoch": 1.2918671976156157, "percentage": 25.84, "elapsed_time": "1:08:23", "remaining_time": "3:16:18", "throughput": 8686.41, "total_tokens": 35644064} +{"current_steps": 52885, "total_steps": 204665, "loss": 0.0565, "lr": 1.8510110341414847e-06, "epoch": 1.291989348447463, "percentage": 25.84, "elapsed_time": "1:08:23", "remaining_time": "3:16:17", "throughput": 8686.44, "total_tokens": 35647200} +{"current_steps": 52890, "total_steps": 204665, "loss": 0.1693, "lr": 1.8509662477027517e-06, "epoch": 1.2921114992793101, "percentage": 25.84, "elapsed_time": "1:08:24", "remaining_time": "3:16:17", "throughput": 8686.5, "total_tokens": 35650400} +{"current_steps": 52895, "total_steps": 204665, "loss": 0.1691, "lr": 1.8509214550755633e-06, "epoch": 1.2922336501111573, "percentage": 25.84, "elapsed_time": "1:08:24", "remaining_time": "3:16:16", "throughput": 8686.55, "total_tokens": 35653600} +{"current_steps": 52900, "total_steps": 204665, "loss": 0.1415, "lr": 1.8508766562602455e-06, "epoch": 1.2923558009430045, "percentage": 25.85, "elapsed_time": "1:08:24", "remaining_time": "3:16:16", "throughput": 8686.73, "total_tokens": 35657440} +{"current_steps": 52905, "total_steps": 204665, "loss": 0.0871, "lr": 1.8508318512571238e-06, "epoch": 1.2924779517748517, "percentage": 25.85, "elapsed_time": "1:08:25", "remaining_time": "3:16:15", "throughput": 8686.88, "total_tokens": 35661152} +{"current_steps": 52910, "total_steps": 204665, "loss": 0.0049, "lr": 1.8507870400665236e-06, "epoch": 1.2926001026066989, "percentage": 25.85, "elapsed_time": "1:08:25", "remaining_time": "3:16:15", "throughput": 8687.05, "total_tokens": 35664928} +{"current_steps": 52915, "total_steps": 204665, "loss": 0.0415, "lr": 1.8507422226887712e-06, "epoch": 1.2927222534385459, "percentage": 25.85, "elapsed_time": "1:08:25", "remaining_time": "3:16:14", "throughput": 8687.15, "total_tokens": 35668384} +{"current_steps": 52920, "total_steps": 204665, "loss": 0.1226, "lr": 1.850697399124193e-06, "epoch": 1.292844404270393, "percentage": 25.86, "elapsed_time": "1:08:26", "remaining_time": "3:16:14", "throughput": 8687.21, "total_tokens": 35671648} +{"current_steps": 52925, "total_steps": 204665, "loss": 0.2342, "lr": 1.8506525693731141e-06, "epoch": 1.2929665551022402, "percentage": 25.86, "elapsed_time": "1:08:26", "remaining_time": "3:16:13", "throughput": 8687.28, "total_tokens": 35674912} +{"current_steps": 52930, "total_steps": 204665, "loss": 0.0255, "lr": 1.8506077334358615e-06, "epoch": 1.2930887059340874, "percentage": 25.86, "elapsed_time": "1:08:26", "remaining_time": "3:16:13", "throughput": 8687.45, "total_tokens": 35678688} +{"current_steps": 52935, "total_steps": 204665, "loss": 0.0016, "lr": 1.85056289131276e-06, "epoch": 1.2932108567659346, "percentage": 25.86, "elapsed_time": "1:08:27", "remaining_time": "3:16:12", "throughput": 8687.6, "total_tokens": 35682400} +{"current_steps": 52940, "total_steps": 204665, "loss": 0.0977, "lr": 1.8505180430041367e-06, "epoch": 1.2933330075977818, "percentage": 25.87, "elapsed_time": "1:08:27", "remaining_time": "3:16:12", "throughput": 8687.68, "total_tokens": 35685792} +{"current_steps": 52945, "total_steps": 204665, "loss": 0.0011, "lr": 1.8504731885103175e-06, "epoch": 1.2934551584296288, "percentage": 25.87, "elapsed_time": "1:08:27", "remaining_time": "3:16:11", "throughput": 8687.76, "total_tokens": 35689120} +{"current_steps": 52950, "total_steps": 204665, "loss": 0.1232, "lr": 1.8504283278316284e-06, "epoch": 1.293577309261476, "percentage": 25.87, "elapsed_time": "1:08:28", "remaining_time": "3:16:11", "throughput": 8687.93, "total_tokens": 35692896} +{"current_steps": 52955, "total_steps": 204665, "loss": 0.1634, "lr": 1.8503834609683957e-06, "epoch": 1.2936994600933232, "percentage": 25.87, "elapsed_time": "1:08:28", "remaining_time": "3:16:10", "throughput": 8687.94, "total_tokens": 35695840} +{"current_steps": 52960, "total_steps": 204665, "loss": 0.0694, "lr": 1.8503385879209457e-06, "epoch": 1.2938216109251703, "percentage": 25.88, "elapsed_time": "1:08:29", "remaining_time": "3:16:10", "throughput": 8688.04, "total_tokens": 35699296} +{"current_steps": 52965, "total_steps": 204665, "loss": 0.1277, "lr": 1.8502937086896048e-06, "epoch": 1.2939437617570175, "percentage": 25.88, "elapsed_time": "1:08:29", "remaining_time": "3:16:09", "throughput": 8688.07, "total_tokens": 35702368} +{"current_steps": 52970, "total_steps": 204665, "loss": 0.0476, "lr": 1.8502488232746996e-06, "epoch": 1.2940659125888647, "percentage": 25.88, "elapsed_time": "1:08:29", "remaining_time": "3:16:09", "throughput": 8688.15, "total_tokens": 35705696} +{"current_steps": 52975, "total_steps": 204665, "loss": 0.0302, "lr": 1.8502039316765562e-06, "epoch": 1.294188063420712, "percentage": 25.88, "elapsed_time": "1:08:30", "remaining_time": "3:16:08", "throughput": 8688.19, "total_tokens": 35708832} +{"current_steps": 52980, "total_steps": 204665, "loss": 0.0026, "lr": 1.8501590338955008e-06, "epoch": 1.294310214252559, "percentage": 25.89, "elapsed_time": "1:08:30", "remaining_time": "3:16:08", "throughput": 8688.23, "total_tokens": 35711968} +{"current_steps": 52985, "total_steps": 204665, "loss": 0.0961, "lr": 1.8501141299318605e-06, "epoch": 1.2944323650844063, "percentage": 25.89, "elapsed_time": "1:08:30", "remaining_time": "3:16:07", "throughput": 8688.31, "total_tokens": 35715296} +{"current_steps": 52990, "total_steps": 204665, "loss": 0.0009, "lr": 1.8500692197859616e-06, "epoch": 1.2945545159162535, "percentage": 25.89, "elapsed_time": "1:08:31", "remaining_time": "3:16:07", "throughput": 8688.33, "total_tokens": 35718368} +{"current_steps": 52995, "total_steps": 204665, "loss": 0.0488, "lr": 1.850024303458131e-06, "epoch": 1.2946766667481007, "percentage": 25.89, "elapsed_time": "1:08:31", "remaining_time": "3:16:06", "throughput": 8688.44, "total_tokens": 35721824} +{"current_steps": 53000, "total_steps": 204665, "loss": 0.1413, "lr": 1.8499793809486945e-06, "epoch": 1.2947988175799476, "percentage": 25.9, "elapsed_time": "1:08:31", "remaining_time": "3:16:06", "throughput": 8688.49, "total_tokens": 35725024} +{"current_steps": 53005, "total_steps": 204665, "loss": 0.0305, "lr": 1.8499344522579794e-06, "epoch": 1.2949209684117948, "percentage": 25.9, "elapsed_time": "1:08:32", "remaining_time": "3:16:05", "throughput": 8688.58, "total_tokens": 35728416} +{"current_steps": 53010, "total_steps": 204665, "loss": 0.0607, "lr": 1.8498895173863125e-06, "epoch": 1.295043119243642, "percentage": 25.9, "elapsed_time": "1:08:32", "remaining_time": "3:16:05", "throughput": 8688.67, "total_tokens": 35731808} +{"current_steps": 53015, "total_steps": 204665, "loss": 0.1866, "lr": 1.8498445763340204e-06, "epoch": 1.2951652700754892, "percentage": 25.9, "elapsed_time": "1:08:32", "remaining_time": "3:16:04", "throughput": 8688.7, "total_tokens": 35734880} +{"current_steps": 53020, "total_steps": 204665, "loss": 0.0943, "lr": 1.84979962910143e-06, "epoch": 1.2952874209073364, "percentage": 25.91, "elapsed_time": "1:08:33", "remaining_time": "3:16:04", "throughput": 8688.83, "total_tokens": 35738464} +{"current_steps": 53025, "total_steps": 204665, "loss": 0.1724, "lr": 1.8497546756888683e-06, "epoch": 1.2954095717391836, "percentage": 25.91, "elapsed_time": "1:08:33", "remaining_time": "3:16:03", "throughput": 8688.98, "total_tokens": 35742176} +{"current_steps": 53030, "total_steps": 204665, "loss": 0.1477, "lr": 1.8497097160966616e-06, "epoch": 1.2955317225710308, "percentage": 25.91, "elapsed_time": "1:08:33", "remaining_time": "3:16:03", "throughput": 8689.1, "total_tokens": 35745696} +{"current_steps": 53035, "total_steps": 204665, "loss": 0.1135, "lr": 1.8496647503251377e-06, "epoch": 1.2956538734028777, "percentage": 25.91, "elapsed_time": "1:08:34", "remaining_time": "3:16:02", "throughput": 8689.27, "total_tokens": 35749472} +{"current_steps": 53040, "total_steps": 204665, "loss": 0.1194, "lr": 1.849619778374623e-06, "epoch": 1.295776024234725, "percentage": 25.92, "elapsed_time": "1:08:34", "remaining_time": "3:16:02", "throughput": 8689.32, "total_tokens": 35752736} +{"current_steps": 53045, "total_steps": 204665, "loss": 0.0285, "lr": 1.8495748002454446e-06, "epoch": 1.2958981750665721, "percentage": 25.92, "elapsed_time": "1:08:34", "remaining_time": "3:16:01", "throughput": 8689.41, "total_tokens": 35756128} +{"current_steps": 53050, "total_steps": 204665, "loss": 0.1519, "lr": 1.84952981593793e-06, "epoch": 1.2960203258984193, "percentage": 25.92, "elapsed_time": "1:08:35", "remaining_time": "3:16:01", "throughput": 8689.42, "total_tokens": 35759136} +{"current_steps": 53055, "total_steps": 204665, "loss": 0.0572, "lr": 1.8494848254524062e-06, "epoch": 1.2961424767302665, "percentage": 25.92, "elapsed_time": "1:08:35", "remaining_time": "3:16:00", "throughput": 8689.47, "total_tokens": 35762336} +{"current_steps": 53060, "total_steps": 204665, "loss": 0.0026, "lr": 1.8494398287892002e-06, "epoch": 1.2962646275621137, "percentage": 25.93, "elapsed_time": "1:08:35", "remaining_time": "3:16:00", "throughput": 8689.53, "total_tokens": 35765600} +{"current_steps": 53065, "total_steps": 204665, "loss": 0.0017, "lr": 1.849394825948639e-06, "epoch": 1.2963867783939609, "percentage": 25.93, "elapsed_time": "1:08:36", "remaining_time": "3:15:59", "throughput": 8689.63, "total_tokens": 35769056} +{"current_steps": 53070, "total_steps": 204665, "loss": 0.0598, "lr": 1.8493498169310505e-06, "epoch": 1.296508929225808, "percentage": 25.93, "elapsed_time": "1:08:36", "remaining_time": "3:15:59", "throughput": 8689.7, "total_tokens": 35772384} +{"current_steps": 53075, "total_steps": 204665, "loss": 0.1647, "lr": 1.8493048017367613e-06, "epoch": 1.2966310800576553, "percentage": 25.93, "elapsed_time": "1:08:36", "remaining_time": "3:15:58", "throughput": 8689.75, "total_tokens": 35775584} +{"current_steps": 53080, "total_steps": 204665, "loss": 0.1373, "lr": 1.8492597803660995e-06, "epoch": 1.2967532308895025, "percentage": 25.94, "elapsed_time": "1:08:37", "remaining_time": "3:15:58", "throughput": 8689.88, "total_tokens": 35779232} +{"current_steps": 53085, "total_steps": 204665, "loss": 0.0436, "lr": 1.8492147528193919e-06, "epoch": 1.2968753817213496, "percentage": 25.94, "elapsed_time": "1:08:37", "remaining_time": "3:15:57", "throughput": 8690.01, "total_tokens": 35782816} +{"current_steps": 53090, "total_steps": 204665, "loss": 0.0809, "lr": 1.8491697190969664e-06, "epoch": 1.2969975325531966, "percentage": 25.94, "elapsed_time": "1:08:38", "remaining_time": "3:15:57", "throughput": 8690.04, "total_tokens": 35785952} +{"current_steps": 53095, "total_steps": 204665, "loss": 0.0008, "lr": 1.8491246791991502e-06, "epoch": 1.2971196833850438, "percentage": 25.94, "elapsed_time": "1:08:38", "remaining_time": "3:15:56", "throughput": 8690.22, "total_tokens": 35789792} +{"current_steps": 53100, "total_steps": 204665, "loss": 0.0711, "lr": 1.849079633126271e-06, "epoch": 1.297241834216891, "percentage": 25.94, "elapsed_time": "1:08:38", "remaining_time": "3:15:56", "throughput": 8690.3, "total_tokens": 35793184} +{"current_steps": 53105, "total_steps": 204665, "loss": 0.2188, "lr": 1.8490345808786564e-06, "epoch": 1.2973639850487382, "percentage": 25.95, "elapsed_time": "1:08:39", "remaining_time": "3:15:55", "throughput": 8690.45, "total_tokens": 35796832} +{"current_steps": 53110, "total_steps": 204665, "loss": 0.0716, "lr": 1.8489895224566339e-06, "epoch": 1.2974861358805854, "percentage": 25.95, "elapsed_time": "1:08:39", "remaining_time": "3:15:55", "throughput": 8690.52, "total_tokens": 35800160} +{"current_steps": 53115, "total_steps": 204665, "loss": 0.1335, "lr": 1.848944457860531e-06, "epoch": 1.2976082867124326, "percentage": 25.95, "elapsed_time": "1:08:39", "remaining_time": "3:15:54", "throughput": 8690.63, "total_tokens": 35803616} +{"current_steps": 53120, "total_steps": 204665, "loss": 0.1742, "lr": 1.8488993870906761e-06, "epoch": 1.2977304375442797, "percentage": 25.95, "elapsed_time": "1:08:40", "remaining_time": "3:15:54", "throughput": 8690.83, "total_tokens": 35807584} +{"current_steps": 53125, "total_steps": 204665, "loss": 0.1373, "lr": 1.8488543101473963e-06, "epoch": 1.2978525883761267, "percentage": 25.96, "elapsed_time": "1:08:40", "remaining_time": "3:15:53", "throughput": 8691.02, "total_tokens": 35811488} +{"current_steps": 53130, "total_steps": 204665, "loss": 0.2268, "lr": 1.8488092270310197e-06, "epoch": 1.297974739207974, "percentage": 25.96, "elapsed_time": "1:08:40", "remaining_time": "3:15:53", "throughput": 8691.11, "total_tokens": 35814880} +{"current_steps": 53135, "total_steps": 204665, "loss": 0.1117, "lr": 1.848764137741874e-06, "epoch": 1.298096890039821, "percentage": 25.96, "elapsed_time": "1:08:41", "remaining_time": "3:15:52", "throughput": 8691.15, "total_tokens": 35818016} +{"current_steps": 53140, "total_steps": 204665, "loss": 0.0394, "lr": 1.8487190422802872e-06, "epoch": 1.2982190408716683, "percentage": 25.96, "elapsed_time": "1:08:41", "remaining_time": "3:15:52", "throughput": 8691.24, "total_tokens": 35821408} +{"current_steps": 53145, "total_steps": 204665, "loss": 0.0022, "lr": 1.8486739406465874e-06, "epoch": 1.2983411917035155, "percentage": 25.97, "elapsed_time": "1:08:41", "remaining_time": "3:15:51", "throughput": 8691.3, "total_tokens": 35824672} +{"current_steps": 53150, "total_steps": 204665, "loss": 0.1339, "lr": 1.8486288328411024e-06, "epoch": 1.2984633425353627, "percentage": 25.97, "elapsed_time": "1:08:42", "remaining_time": "3:15:51", "throughput": 8691.36, "total_tokens": 35827936} +{"current_steps": 53155, "total_steps": 204665, "loss": 0.0335, "lr": 1.8485837188641602e-06, "epoch": 1.2985854933672099, "percentage": 25.97, "elapsed_time": "1:08:42", "remaining_time": "3:15:50", "throughput": 8691.42, "total_tokens": 35831136} +{"current_steps": 53160, "total_steps": 204665, "loss": 0.1079, "lr": 1.848538598716089e-06, "epoch": 1.298707644199057, "percentage": 25.97, "elapsed_time": "1:08:42", "remaining_time": "3:15:50", "throughput": 8691.56, "total_tokens": 35834784} +{"current_steps": 53165, "total_steps": 204665, "loss": 0.029, "lr": 1.8484934723972167e-06, "epoch": 1.2988297950309042, "percentage": 25.98, "elapsed_time": "1:08:43", "remaining_time": "3:15:49", "throughput": 8691.65, "total_tokens": 35838176} +{"current_steps": 53170, "total_steps": 204665, "loss": 0.1262, "lr": 1.8484483399078718e-06, "epoch": 1.2989519458627514, "percentage": 25.98, "elapsed_time": "1:08:43", "remaining_time": "3:15:49", "throughput": 8691.65, "total_tokens": 35841120} +{"current_steps": 53175, "total_steps": 204665, "loss": 0.0012, "lr": 1.8484032012483825e-06, "epoch": 1.2990740966945986, "percentage": 25.98, "elapsed_time": "1:08:43", "remaining_time": "3:15:48", "throughput": 8691.69, "total_tokens": 35844256} +{"current_steps": 53180, "total_steps": 204665, "loss": 0.0595, "lr": 1.8483580564190768e-06, "epoch": 1.2991962475264456, "percentage": 25.98, "elapsed_time": "1:08:44", "remaining_time": "3:15:48", "throughput": 8691.89, "total_tokens": 35848224} +{"current_steps": 53185, "total_steps": 204665, "loss": 0.1228, "lr": 1.848312905420283e-06, "epoch": 1.2993183983582928, "percentage": 25.99, "elapsed_time": "1:08:44", "remaining_time": "3:15:47", "throughput": 8692.02, "total_tokens": 35851808} +{"current_steps": 53190, "total_steps": 204665, "loss": 0.0527, "lr": 1.84826774825233e-06, "epoch": 1.29944054919014, "percentage": 25.99, "elapsed_time": "1:08:45", "remaining_time": "3:15:47", "throughput": 8692.11, "total_tokens": 35855200} +{"current_steps": 53195, "total_steps": 204665, "loss": 0.0597, "lr": 1.8482225849155455e-06, "epoch": 1.2995627000219871, "percentage": 25.99, "elapsed_time": "1:08:45", "remaining_time": "3:15:46", "throughput": 8692.22, "total_tokens": 35858720} +{"current_steps": 53200, "total_steps": 204665, "loss": 0.0569, "lr": 1.8481774154102584e-06, "epoch": 1.2996848508538343, "percentage": 25.99, "elapsed_time": "1:08:45", "remaining_time": "3:15:46", "throughput": 8692.35, "total_tokens": 35862304} +{"current_steps": 53205, "total_steps": 204665, "loss": 0.1173, "lr": 1.8481322397367966e-06, "epoch": 1.2998070016856815, "percentage": 26.0, "elapsed_time": "1:08:46", "remaining_time": "3:15:45", "throughput": 8692.44, "total_tokens": 35865696} +{"current_steps": 53210, "total_steps": 204665, "loss": 0.0653, "lr": 1.8480870578954893e-06, "epoch": 1.2999291525175287, "percentage": 26.0, "elapsed_time": "1:08:46", "remaining_time": "3:15:45", "throughput": 8692.56, "total_tokens": 35869216} +{"current_steps": 53215, "total_steps": 204665, "loss": 0.0502, "lr": 1.8480418698866646e-06, "epoch": 1.3000513033493757, "percentage": 26.0, "elapsed_time": "1:08:46", "remaining_time": "3:15:44", "throughput": 8692.62, "total_tokens": 35872480} +{"current_steps": 53220, "total_steps": 204665, "loss": 0.0023, "lr": 1.8479966757106516e-06, "epoch": 1.3001734541812229, "percentage": 26.0, "elapsed_time": "1:08:47", "remaining_time": "3:15:44", "throughput": 8692.71, "total_tokens": 35875872} +{"current_steps": 53225, "total_steps": 204665, "loss": 0.1281, "lr": 1.8479514753677785e-06, "epoch": 1.30029560501307, "percentage": 26.01, "elapsed_time": "1:08:47", "remaining_time": "3:15:43", "throughput": 8692.69, "total_tokens": 35878688} +{"current_steps": 53230, "total_steps": 204665, "loss": 0.1363, "lr": 1.8479062688583743e-06, "epoch": 1.3004177558449173, "percentage": 26.01, "elapsed_time": "1:08:47", "remaining_time": "3:15:43", "throughput": 8692.72, "total_tokens": 35881760} +{"current_steps": 53235, "total_steps": 204665, "loss": 0.0084, "lr": 1.8478610561827676e-06, "epoch": 1.3005399066767644, "percentage": 26.01, "elapsed_time": "1:08:48", "remaining_time": "3:15:42", "throughput": 8692.79, "total_tokens": 35885024} +{"current_steps": 53240, "total_steps": 204665, "loss": 0.2536, "lr": 1.8478158373412872e-06, "epoch": 1.3006620575086116, "percentage": 26.01, "elapsed_time": "1:08:48", "remaining_time": "3:15:42", "throughput": 8692.85, "total_tokens": 35888288} +{"current_steps": 53245, "total_steps": 204665, "loss": 0.0507, "lr": 1.8477706123342623e-06, "epoch": 1.3007842083404588, "percentage": 26.02, "elapsed_time": "1:08:48", "remaining_time": "3:15:41", "throughput": 8692.9, "total_tokens": 35891424} +{"current_steps": 53250, "total_steps": 204665, "loss": 0.0091, "lr": 1.847725381162021e-06, "epoch": 1.300906359172306, "percentage": 26.02, "elapsed_time": "1:08:49", "remaining_time": "3:15:41", "throughput": 8692.95, "total_tokens": 35894624} +{"current_steps": 53255, "total_steps": 204665, "loss": 0.1495, "lr": 1.8476801438248932e-06, "epoch": 1.3010285100041532, "percentage": 26.02, "elapsed_time": "1:08:49", "remaining_time": "3:15:40", "throughput": 8693.01, "total_tokens": 35897888} +{"current_steps": 53260, "total_steps": 204665, "loss": 0.1332, "lr": 1.8476349003232073e-06, "epoch": 1.3011506608360004, "percentage": 26.02, "elapsed_time": "1:08:49", "remaining_time": "3:15:40", "throughput": 8693.13, "total_tokens": 35901472} +{"current_steps": 53265, "total_steps": 204665, "loss": 0.004, "lr": 1.847589650657292e-06, "epoch": 1.3012728116678476, "percentage": 26.03, "elapsed_time": "1:08:50", "remaining_time": "3:15:39", "throughput": 8693.14, "total_tokens": 35904480} +{"current_steps": 53270, "total_steps": 204665, "loss": 0.0764, "lr": 1.847544394827477e-06, "epoch": 1.3013949624996946, "percentage": 26.03, "elapsed_time": "1:08:50", "remaining_time": "3:15:39", "throughput": 8693.21, "total_tokens": 35907808} +{"current_steps": 53275, "total_steps": 204665, "loss": 0.2005, "lr": 1.8474991328340915e-06, "epoch": 1.3015171133315417, "percentage": 26.03, "elapsed_time": "1:08:50", "remaining_time": "3:15:38", "throughput": 8693.21, "total_tokens": 35910752} +{"current_steps": 53280, "total_steps": 204665, "loss": 0.0881, "lr": 1.847453864677464e-06, "epoch": 1.301639264163389, "percentage": 26.03, "elapsed_time": "1:08:51", "remaining_time": "3:15:38", "throughput": 8693.26, "total_tokens": 35913952} +{"current_steps": 53285, "total_steps": 204665, "loss": 0.1268, "lr": 1.8474085903579245e-06, "epoch": 1.3017614149952361, "percentage": 26.04, "elapsed_time": "1:08:51", "remaining_time": "3:15:37", "throughput": 8693.36, "total_tokens": 35917408} +{"current_steps": 53290, "total_steps": 204665, "loss": 0.0852, "lr": 1.8473633098758014e-06, "epoch": 1.3018835658270833, "percentage": 26.04, "elapsed_time": "1:08:51", "remaining_time": "3:15:37", "throughput": 8693.4, "total_tokens": 35920544} +{"current_steps": 53295, "total_steps": 204665, "loss": 0.0741, "lr": 1.8473180232314244e-06, "epoch": 1.3020057166589305, "percentage": 26.04, "elapsed_time": "1:08:52", "remaining_time": "3:15:36", "throughput": 8693.41, "total_tokens": 35923552} +{"current_steps": 53300, "total_steps": 204665, "loss": 0.0429, "lr": 1.8472727304251227e-06, "epoch": 1.3021278674907777, "percentage": 26.04, "elapsed_time": "1:08:52", "remaining_time": "3:15:36", "throughput": 8693.54, "total_tokens": 35927200} +{"current_steps": 53305, "total_steps": 204665, "loss": 0.0497, "lr": 1.8472274314572262e-06, "epoch": 1.3022500183226247, "percentage": 26.05, "elapsed_time": "1:08:52", "remaining_time": "3:15:35", "throughput": 8693.66, "total_tokens": 35930784} +{"current_steps": 53310, "total_steps": 204665, "loss": 0.0659, "lr": 1.847182126328064e-06, "epoch": 1.3023721691544718, "percentage": 26.05, "elapsed_time": "1:08:53", "remaining_time": "3:15:35", "throughput": 8693.7, "total_tokens": 35933920} +{"current_steps": 53315, "total_steps": 204665, "loss": 0.0328, "lr": 1.8471368150379652e-06, "epoch": 1.302494319986319, "percentage": 26.05, "elapsed_time": "1:08:53", "remaining_time": "3:15:34", "throughput": 8693.83, "total_tokens": 35937504} +{"current_steps": 53320, "total_steps": 204665, "loss": 0.0522, "lr": 1.8470914975872596e-06, "epoch": 1.3026164708181662, "percentage": 26.05, "elapsed_time": "1:08:54", "remaining_time": "3:15:34", "throughput": 8693.94, "total_tokens": 35941024} +{"current_steps": 53325, "total_steps": 204665, "loss": 0.0972, "lr": 1.847046173976277e-06, "epoch": 1.3027386216500134, "percentage": 26.05, "elapsed_time": "1:08:54", "remaining_time": "3:15:33", "throughput": 8694.17, "total_tokens": 35945120} +{"current_steps": 53330, "total_steps": 204665, "loss": 0.0486, "lr": 1.8470008442053468e-06, "epoch": 1.3028607724818606, "percentage": 26.06, "elapsed_time": "1:08:54", "remaining_time": "3:15:33", "throughput": 8694.25, "total_tokens": 35948448} +{"current_steps": 53335, "total_steps": 204665, "loss": 0.1491, "lr": 1.8469555082747985e-06, "epoch": 1.3029829233137078, "percentage": 26.06, "elapsed_time": "1:08:55", "remaining_time": "3:15:32", "throughput": 8694.29, "total_tokens": 35951584} +{"current_steps": 53340, "total_steps": 204665, "loss": 0.1864, "lr": 1.846910166184962e-06, "epoch": 1.303105074145555, "percentage": 26.06, "elapsed_time": "1:08:55", "remaining_time": "3:15:32", "throughput": 8694.36, "total_tokens": 35954912} +{"current_steps": 53345, "total_steps": 204665, "loss": 0.1499, "lr": 1.846864817936167e-06, "epoch": 1.3032272249774022, "percentage": 26.06, "elapsed_time": "1:08:55", "remaining_time": "3:15:31", "throughput": 8694.47, "total_tokens": 35958368} +{"current_steps": 53350, "total_steps": 204665, "loss": 0.0402, "lr": 1.8468194635287432e-06, "epoch": 1.3033493758092494, "percentage": 26.07, "elapsed_time": "1:08:56", "remaining_time": "3:15:31", "throughput": 8694.54, "total_tokens": 35961632} +{"current_steps": 53355, "total_steps": 204665, "loss": 0.0991, "lr": 1.8467741029630207e-06, "epoch": 1.3034715266410966, "percentage": 26.07, "elapsed_time": "1:08:56", "remaining_time": "3:15:30", "throughput": 8694.69, "total_tokens": 35965344} +{"current_steps": 53360, "total_steps": 204665, "loss": 0.0474, "lr": 1.8467287362393288e-06, "epoch": 1.3035936774729435, "percentage": 26.07, "elapsed_time": "1:08:56", "remaining_time": "3:15:30", "throughput": 8694.78, "total_tokens": 35968736} +{"current_steps": 53365, "total_steps": 204665, "loss": 0.0616, "lr": 1.846683363357998e-06, "epoch": 1.3037158283047907, "percentage": 26.07, "elapsed_time": "1:08:57", "remaining_time": "3:15:29", "throughput": 8694.81, "total_tokens": 35971808} +{"current_steps": 53370, "total_steps": 204665, "loss": 0.0982, "lr": 1.8466379843193583e-06, "epoch": 1.303837979136638, "percentage": 26.08, "elapsed_time": "1:08:57", "remaining_time": "3:15:29", "throughput": 8694.92, "total_tokens": 35975328} +{"current_steps": 53375, "total_steps": 204665, "loss": 0.1391, "lr": 1.846592599123739e-06, "epoch": 1.303960129968485, "percentage": 26.08, "elapsed_time": "1:08:57", "remaining_time": "3:15:28", "throughput": 8694.94, "total_tokens": 35978336} +{"current_steps": 53380, "total_steps": 204665, "loss": 0.1133, "lr": 1.8465472077714707e-06, "epoch": 1.3040822808003323, "percentage": 26.08, "elapsed_time": "1:08:58", "remaining_time": "3:15:28", "throughput": 8695.03, "total_tokens": 35981728} +{"current_steps": 53385, "total_steps": 204665, "loss": 0.0989, "lr": 1.8465018102628837e-06, "epoch": 1.3042044316321795, "percentage": 26.08, "elapsed_time": "1:08:58", "remaining_time": "3:15:27", "throughput": 8695.1, "total_tokens": 35984992} +{"current_steps": 53390, "total_steps": 204665, "loss": 0.0663, "lr": 1.8464564065983077e-06, "epoch": 1.3043265824640267, "percentage": 26.09, "elapsed_time": "1:08:58", "remaining_time": "3:15:27", "throughput": 8695.15, "total_tokens": 35988192} +{"current_steps": 53395, "total_steps": 204665, "loss": 0.1115, "lr": 1.846410996778073e-06, "epoch": 1.3044487332958736, "percentage": 26.09, "elapsed_time": "1:08:59", "remaining_time": "3:15:26", "throughput": 8695.18, "total_tokens": 35991264} +{"current_steps": 53400, "total_steps": 204665, "loss": 0.1054, "lr": 1.8463655808025098e-06, "epoch": 1.3045708841277208, "percentage": 26.09, "elapsed_time": "1:08:59", "remaining_time": "3:15:26", "throughput": 8695.32, "total_tokens": 35994912} +{"current_steps": 53405, "total_steps": 204665, "loss": 0.1084, "lr": 1.8463201586719486e-06, "epoch": 1.304693034959568, "percentage": 26.09, "elapsed_time": "1:08:59", "remaining_time": "3:15:25", "throughput": 8695.37, "total_tokens": 35998112} +{"current_steps": 53410, "total_steps": 204665, "loss": 0.002, "lr": 1.8462747303867197e-06, "epoch": 1.3048151857914152, "percentage": 26.1, "elapsed_time": "1:09:00", "remaining_time": "3:15:25", "throughput": 8695.41, "total_tokens": 36001248} +{"current_steps": 53415, "total_steps": 204665, "loss": 0.0767, "lr": 1.846229295947153e-06, "epoch": 1.3049373366232624, "percentage": 26.1, "elapsed_time": "1:09:00", "remaining_time": "3:15:24", "throughput": 8695.45, "total_tokens": 36004384} +{"current_steps": 53420, "total_steps": 204665, "loss": 0.0331, "lr": 1.8461838553535793e-06, "epoch": 1.3050594874551096, "percentage": 26.1, "elapsed_time": "1:09:00", "remaining_time": "3:15:24", "throughput": 8695.68, "total_tokens": 36008480} +{"current_steps": 53425, "total_steps": 204665, "loss": 0.0761, "lr": 1.8461384086063292e-06, "epoch": 1.3051816382869568, "percentage": 26.1, "elapsed_time": "1:09:01", "remaining_time": "3:15:23", "throughput": 8695.87, "total_tokens": 36012384} +{"current_steps": 53430, "total_steps": 204665, "loss": 0.0968, "lr": 1.846092955705733e-06, "epoch": 1.305303789118804, "percentage": 26.11, "elapsed_time": "1:09:01", "remaining_time": "3:15:23", "throughput": 8695.93, "total_tokens": 36015648} +{"current_steps": 53435, "total_steps": 204665, "loss": 0.0418, "lr": 1.846047496652121e-06, "epoch": 1.3054259399506511, "percentage": 26.11, "elapsed_time": "1:09:02", "remaining_time": "3:15:22", "throughput": 8696.0, "total_tokens": 36018912} +{"current_steps": 53440, "total_steps": 204665, "loss": 0.0844, "lr": 1.8460020314458244e-06, "epoch": 1.3055480907824983, "percentage": 26.11, "elapsed_time": "1:09:02", "remaining_time": "3:15:22", "throughput": 8696.14, "total_tokens": 36022560} +{"current_steps": 53445, "total_steps": 204665, "loss": 0.0424, "lr": 1.8459565600871732e-06, "epoch": 1.3056702416143455, "percentage": 26.11, "elapsed_time": "1:09:02", "remaining_time": "3:15:21", "throughput": 8696.19, "total_tokens": 36025760} +{"current_steps": 53450, "total_steps": 204665, "loss": 0.002, "lr": 1.8459110825764986e-06, "epoch": 1.3057923924461925, "percentage": 26.12, "elapsed_time": "1:09:03", "remaining_time": "3:15:21", "throughput": 8696.26, "total_tokens": 36029088} +{"current_steps": 53455, "total_steps": 204665, "loss": 0.0551, "lr": 1.845865598914131e-06, "epoch": 1.3059145432780397, "percentage": 26.12, "elapsed_time": "1:09:03", "remaining_time": "3:15:20", "throughput": 8696.29, "total_tokens": 36032160} +{"current_steps": 53460, "total_steps": 204665, "loss": 0.0005, "lr": 1.8458201091004011e-06, "epoch": 1.3060366941098869, "percentage": 26.12, "elapsed_time": "1:09:03", "remaining_time": "3:15:20", "throughput": 8696.43, "total_tokens": 36035808} +{"current_steps": 53465, "total_steps": 204665, "loss": 0.0771, "lr": 1.84577461313564e-06, "epoch": 1.306158844941734, "percentage": 26.12, "elapsed_time": "1:09:04", "remaining_time": "3:15:19", "throughput": 8696.54, "total_tokens": 36039328} +{"current_steps": 53470, "total_steps": 204665, "loss": 0.1988, "lr": 1.8457291110201782e-06, "epoch": 1.3062809957735813, "percentage": 26.13, "elapsed_time": "1:09:04", "remaining_time": "3:15:19", "throughput": 8696.6, "total_tokens": 36042592} +{"current_steps": 53475, "total_steps": 204665, "loss": 0.1351, "lr": 1.8456836027543472e-06, "epoch": 1.3064031466054284, "percentage": 26.13, "elapsed_time": "1:09:04", "remaining_time": "3:15:18", "throughput": 8696.67, "total_tokens": 36045856} +{"current_steps": 53480, "total_steps": 204665, "loss": 0.0866, "lr": 1.8456380883384774e-06, "epoch": 1.3065252974372754, "percentage": 26.13, "elapsed_time": "1:09:05", "remaining_time": "3:15:18", "throughput": 8696.8, "total_tokens": 36049504} +{"current_steps": 53485, "total_steps": 204665, "loss": 0.092, "lr": 1.8455925677729e-06, "epoch": 1.3066474482691226, "percentage": 26.13, "elapsed_time": "1:09:05", "remaining_time": "3:15:17", "throughput": 8696.83, "total_tokens": 36052576} +{"current_steps": 53490, "total_steps": 204665, "loss": 0.1593, "lr": 1.8455470410579462e-06, "epoch": 1.3067695991009698, "percentage": 26.14, "elapsed_time": "1:09:05", "remaining_time": "3:15:17", "throughput": 8696.92, "total_tokens": 36055968} +{"current_steps": 53495, "total_steps": 204665, "loss": 0.108, "lr": 1.8455015081939465e-06, "epoch": 1.306891749932817, "percentage": 26.14, "elapsed_time": "1:09:06", "remaining_time": "3:15:16", "throughput": 8697.11, "total_tokens": 36059872} +{"current_steps": 53500, "total_steps": 204665, "loss": 0.1167, "lr": 1.8454559691812326e-06, "epoch": 1.3070139007646642, "percentage": 26.14, "elapsed_time": "1:09:06", "remaining_time": "3:15:16", "throughput": 8697.25, "total_tokens": 36063520} +{"current_steps": 53505, "total_steps": 204665, "loss": 0.0282, "lr": 1.8454104240201355e-06, "epoch": 1.3071360515965114, "percentage": 26.14, "elapsed_time": "1:09:06", "remaining_time": "3:15:15", "throughput": 8697.32, "total_tokens": 36066848} +{"current_steps": 53510, "total_steps": 204665, "loss": 0.1318, "lr": 1.8453648727109865e-06, "epoch": 1.3072582024283586, "percentage": 26.15, "elapsed_time": "1:09:07", "remaining_time": "3:15:15", "throughput": 8697.45, "total_tokens": 36070432} +{"current_steps": 53515, "total_steps": 204665, "loss": 0.1191, "lr": 1.8453193152541167e-06, "epoch": 1.3073803532602057, "percentage": 26.15, "elapsed_time": "1:09:07", "remaining_time": "3:15:14", "throughput": 8697.5, "total_tokens": 36073632} +{"current_steps": 53520, "total_steps": 204665, "loss": 0.0371, "lr": 1.8452737516498576e-06, "epoch": 1.307502504092053, "percentage": 26.15, "elapsed_time": "1:09:07", "remaining_time": "3:15:14", "throughput": 8697.53, "total_tokens": 36076704} +{"current_steps": 53525, "total_steps": 204665, "loss": 0.074, "lr": 1.8452281818985402e-06, "epoch": 1.3076246549239001, "percentage": 26.15, "elapsed_time": "1:09:08", "remaining_time": "3:15:13", "throughput": 8697.54, "total_tokens": 36079712} +{"current_steps": 53530, "total_steps": 204665, "loss": 0.0565, "lr": 1.845182606000496e-06, "epoch": 1.3077468057557473, "percentage": 26.15, "elapsed_time": "1:09:08", "remaining_time": "3:15:13", "throughput": 8697.54, "total_tokens": 36082656} +{"current_steps": 53535, "total_steps": 204665, "loss": 0.0552, "lr": 1.845137023956057e-06, "epoch": 1.3078689565875945, "percentage": 26.16, "elapsed_time": "1:09:08", "remaining_time": "3:15:12", "throughput": 8697.57, "total_tokens": 36085728} +{"current_steps": 53540, "total_steps": 204665, "loss": 0.02, "lr": 1.8450914357655538e-06, "epoch": 1.3079911074194415, "percentage": 26.16, "elapsed_time": "1:09:09", "remaining_time": "3:15:12", "throughput": 8697.62, "total_tokens": 36088928} +{"current_steps": 53545, "total_steps": 204665, "loss": 0.1131, "lr": 1.8450458414293187e-06, "epoch": 1.3081132582512887, "percentage": 26.16, "elapsed_time": "1:09:09", "remaining_time": "3:15:11", "throughput": 8697.63, "total_tokens": 36091936} +{"current_steps": 53550, "total_steps": 204665, "loss": 0.0659, "lr": 1.8450002409476828e-06, "epoch": 1.3082354090831358, "percentage": 26.16, "elapsed_time": "1:09:09", "remaining_time": "3:15:10", "throughput": 8697.72, "total_tokens": 36095328} +{"current_steps": 53555, "total_steps": 204665, "loss": 0.0435, "lr": 1.844954634320978e-06, "epoch": 1.308357559914983, "percentage": 26.17, "elapsed_time": "1:09:10", "remaining_time": "3:15:10", "throughput": 8697.82, "total_tokens": 36098784} +{"current_steps": 53560, "total_steps": 204665, "loss": 0.0249, "lr": 1.8449090215495358e-06, "epoch": 1.3084797107468302, "percentage": 26.17, "elapsed_time": "1:09:10", "remaining_time": "3:15:10", "throughput": 8697.89, "total_tokens": 36102112} +{"current_steps": 53565, "total_steps": 204665, "loss": 0.1132, "lr": 1.8448634026336877e-06, "epoch": 1.3086018615786774, "percentage": 26.17, "elapsed_time": "1:09:11", "remaining_time": "3:15:09", "throughput": 8697.95, "total_tokens": 36105376} +{"current_steps": 53570, "total_steps": 204665, "loss": 0.0413, "lr": 1.844817777573766e-06, "epoch": 1.3087240124105244, "percentage": 26.17, "elapsed_time": "1:09:11", "remaining_time": "3:15:09", "throughput": 8698.07, "total_tokens": 36108896} +{"current_steps": 53575, "total_steps": 204665, "loss": 0.0455, "lr": 1.844772146370102e-06, "epoch": 1.3088461632423716, "percentage": 26.18, "elapsed_time": "1:09:11", "remaining_time": "3:15:08", "throughput": 8698.18, "total_tokens": 36112416} +{"current_steps": 53580, "total_steps": 204665, "loss": 0.0558, "lr": 1.8447265090230277e-06, "epoch": 1.3089683140742188, "percentage": 26.18, "elapsed_time": "1:09:12", "remaining_time": "3:15:07", "throughput": 8698.22, "total_tokens": 36115552} +{"current_steps": 53585, "total_steps": 204665, "loss": 0.0438, "lr": 1.8446808655328755e-06, "epoch": 1.309090464906066, "percentage": 26.18, "elapsed_time": "1:09:12", "remaining_time": "3:15:07", "throughput": 8698.37, "total_tokens": 36119264} +{"current_steps": 53590, "total_steps": 204665, "loss": 0.1691, "lr": 1.8446352158999764e-06, "epoch": 1.3092126157379131, "percentage": 26.18, "elapsed_time": "1:09:12", "remaining_time": "3:15:07", "throughput": 8698.5, "total_tokens": 36122848} +{"current_steps": 53595, "total_steps": 204665, "loss": 0.0925, "lr": 1.8445895601246628e-06, "epoch": 1.3093347665697603, "percentage": 26.19, "elapsed_time": "1:09:13", "remaining_time": "3:15:06", "throughput": 8698.69, "total_tokens": 36126752} +{"current_steps": 53600, "total_steps": 204665, "loss": 0.1044, "lr": 1.844543898207267e-06, "epoch": 1.3094569174016075, "percentage": 26.19, "elapsed_time": "1:09:13", "remaining_time": "3:15:06", "throughput": 8698.69, "total_tokens": 36129696} +{"current_steps": 53605, "total_steps": 204665, "loss": 0.0014, "lr": 1.8444982301481207e-06, "epoch": 1.3095790682334547, "percentage": 26.19, "elapsed_time": "1:09:13", "remaining_time": "3:15:05", "throughput": 8698.8, "total_tokens": 36133216} +{"current_steps": 53610, "total_steps": 204665, "loss": 0.0781, "lr": 1.8444525559475559e-06, "epoch": 1.309701219065302, "percentage": 26.19, "elapsed_time": "1:09:14", "remaining_time": "3:15:05", "throughput": 8698.85, "total_tokens": 36136416} +{"current_steps": 53615, "total_steps": 204665, "loss": 0.0889, "lr": 1.8444068756059052e-06, "epoch": 1.309823369897149, "percentage": 26.2, "elapsed_time": "1:09:14", "remaining_time": "3:15:04", "throughput": 8699.0, "total_tokens": 36140128} +{"current_steps": 53620, "total_steps": 204665, "loss": 0.1508, "lr": 1.8443611891235008e-06, "epoch": 1.3099455207289963, "percentage": 26.2, "elapsed_time": "1:09:14", "remaining_time": "3:15:04", "throughput": 8699.17, "total_tokens": 36143968} +{"current_steps": 53625, "total_steps": 204665, "loss": 0.1148, "lr": 1.8443154965006741e-06, "epoch": 1.3100676715608432, "percentage": 26.2, "elapsed_time": "1:09:15", "remaining_time": "3:15:03", "throughput": 8699.21, "total_tokens": 36147104} +{"current_steps": 53630, "total_steps": 204665, "loss": 0.0471, "lr": 1.8442697977377586e-06, "epoch": 1.3101898223926904, "percentage": 26.2, "elapsed_time": "1:09:15", "remaining_time": "3:15:03", "throughput": 8699.31, "total_tokens": 36150560} +{"current_steps": 53635, "total_steps": 204665, "loss": 0.063, "lr": 1.8442240928350858e-06, "epoch": 1.3103119732245376, "percentage": 26.21, "elapsed_time": "1:09:15", "remaining_time": "3:15:02", "throughput": 8699.36, "total_tokens": 36153760} +{"current_steps": 53640, "total_steps": 204665, "loss": 0.1102, "lr": 1.8441783817929885e-06, "epoch": 1.3104341240563848, "percentage": 26.21, "elapsed_time": "1:09:16", "remaining_time": "3:15:02", "throughput": 8699.4, "total_tokens": 36156896} +{"current_steps": 53645, "total_steps": 204665, "loss": 0.0036, "lr": 1.844132664611799e-06, "epoch": 1.310556274888232, "percentage": 26.21, "elapsed_time": "1:09:16", "remaining_time": "3:15:01", "throughput": 8699.52, "total_tokens": 36160480} +{"current_steps": 53650, "total_steps": 204665, "loss": 0.126, "lr": 1.8440869412918497e-06, "epoch": 1.3106784257200792, "percentage": 26.21, "elapsed_time": "1:09:16", "remaining_time": "3:15:01", "throughput": 8699.67, "total_tokens": 36164192} +{"current_steps": 53655, "total_steps": 204665, "loss": 0.0899, "lr": 1.8440412118334727e-06, "epoch": 1.3108005765519264, "percentage": 26.22, "elapsed_time": "1:09:17", "remaining_time": "3:15:00", "throughput": 8699.82, "total_tokens": 36167968} +{"current_steps": 53660, "total_steps": 204665, "loss": 0.0861, "lr": 1.8439954762370015e-06, "epoch": 1.3109227273837734, "percentage": 26.22, "elapsed_time": "1:09:17", "remaining_time": "3:15:00", "throughput": 8699.9, "total_tokens": 36171296} +{"current_steps": 53665, "total_steps": 204665, "loss": 0.0879, "lr": 1.8439497345027677e-06, "epoch": 1.3110448782156205, "percentage": 26.22, "elapsed_time": "1:09:18", "remaining_time": "3:14:59", "throughput": 8699.91, "total_tokens": 36174304} +{"current_steps": 53670, "total_steps": 204665, "loss": 0.0018, "lr": 1.8439039866311049e-06, "epoch": 1.3111670290474677, "percentage": 26.22, "elapsed_time": "1:09:18", "remaining_time": "3:14:59", "throughput": 8699.91, "total_tokens": 36177248} +{"current_steps": 53675, "total_steps": 204665, "loss": 0.1782, "lr": 1.8438582326223451e-06, "epoch": 1.311289179879315, "percentage": 26.23, "elapsed_time": "1:09:18", "remaining_time": "3:14:58", "throughput": 8699.97, "total_tokens": 36180512} +{"current_steps": 53680, "total_steps": 204665, "loss": 0.1314, "lr": 1.8438124724768213e-06, "epoch": 1.3114113307111621, "percentage": 26.23, "elapsed_time": "1:09:19", "remaining_time": "3:14:58", "throughput": 8700.0, "total_tokens": 36183648} +{"current_steps": 53685, "total_steps": 204665, "loss": 0.0956, "lr": 1.843766706194866e-06, "epoch": 1.3115334815430093, "percentage": 26.23, "elapsed_time": "1:09:19", "remaining_time": "3:14:57", "throughput": 8700.01, "total_tokens": 36186656} +{"current_steps": 53690, "total_steps": 204665, "loss": 0.0805, "lr": 1.8437209337768127e-06, "epoch": 1.3116556323748565, "percentage": 26.23, "elapsed_time": "1:09:19", "remaining_time": "3:14:57", "throughput": 8700.07, "total_tokens": 36189920} +{"current_steps": 53695, "total_steps": 204665, "loss": 0.0928, "lr": 1.8436751552229937e-06, "epoch": 1.3117777832067037, "percentage": 26.24, "elapsed_time": "1:09:20", "remaining_time": "3:14:56", "throughput": 8700.13, "total_tokens": 36193120} +{"current_steps": 53700, "total_steps": 204665, "loss": 0.0605, "lr": 1.843629370533742e-06, "epoch": 1.3118999340385509, "percentage": 26.24, "elapsed_time": "1:09:20", "remaining_time": "3:14:56", "throughput": 8700.29, "total_tokens": 36196960} +{"current_steps": 53705, "total_steps": 204665, "loss": 0.2534, "lr": 1.8435835797093906e-06, "epoch": 1.312022084870398, "percentage": 26.24, "elapsed_time": "1:09:20", "remaining_time": "3:14:55", "throughput": 8700.35, "total_tokens": 36200224} +{"current_steps": 53710, "total_steps": 204665, "loss": 0.0969, "lr": 1.8435377827502724e-06, "epoch": 1.3121442357022453, "percentage": 26.24, "elapsed_time": "1:09:21", "remaining_time": "3:14:55", "throughput": 8700.55, "total_tokens": 36204192} +{"current_steps": 53715, "total_steps": 204665, "loss": 0.0439, "lr": 1.8434919796567208e-06, "epoch": 1.3122663865340922, "percentage": 26.25, "elapsed_time": "1:09:21", "remaining_time": "3:14:54", "throughput": 8701.01, "total_tokens": 36209568} +{"current_steps": 53720, "total_steps": 204665, "loss": 0.0879, "lr": 1.8434461704290685e-06, "epoch": 1.3123885373659394, "percentage": 26.25, "elapsed_time": "1:09:21", "remaining_time": "3:14:54", "throughput": 8701.03, "total_tokens": 36212640} +{"current_steps": 53725, "total_steps": 204665, "loss": 0.0398, "lr": 1.8434003550676488e-06, "epoch": 1.3125106881977866, "percentage": 26.25, "elapsed_time": "1:09:22", "remaining_time": "3:14:53", "throughput": 8701.22, "total_tokens": 36216544} +{"current_steps": 53730, "total_steps": 204665, "loss": 0.1991, "lr": 1.843354533572795e-06, "epoch": 1.3126328390296338, "percentage": 26.25, "elapsed_time": "1:09:22", "remaining_time": "3:14:53", "throughput": 8701.29, "total_tokens": 36219808} +{"current_steps": 53735, "total_steps": 204665, "loss": 0.0356, "lr": 1.84330870594484e-06, "epoch": 1.312754989861481, "percentage": 26.26, "elapsed_time": "1:09:22", "remaining_time": "3:14:52", "throughput": 8701.33, "total_tokens": 36222944} +{"current_steps": 53740, "total_steps": 204665, "loss": 0.0385, "lr": 1.8432628721841174e-06, "epoch": 1.3128771406933282, "percentage": 26.26, "elapsed_time": "1:09:23", "remaining_time": "3:14:52", "throughput": 8701.36, "total_tokens": 36226080} +{"current_steps": 53745, "total_steps": 204665, "loss": 0.0696, "lr": 1.8432170322909602e-06, "epoch": 1.3129992915251754, "percentage": 26.26, "elapsed_time": "1:09:23", "remaining_time": "3:14:51", "throughput": 8701.45, "total_tokens": 36229472} +{"current_steps": 53750, "total_steps": 204665, "loss": 0.1067, "lr": 1.8431711862657022e-06, "epoch": 1.3131214423570223, "percentage": 26.26, "elapsed_time": "1:09:23", "remaining_time": "3:14:51", "throughput": 8701.53, "total_tokens": 36232800} +{"current_steps": 53755, "total_steps": 204665, "loss": 0.1298, "lr": 1.8431253341086764e-06, "epoch": 1.3132435931888695, "percentage": 26.26, "elapsed_time": "1:09:24", "remaining_time": "3:14:50", "throughput": 8701.71, "total_tokens": 36236704} +{"current_steps": 53760, "total_steps": 204665, "loss": 0.1319, "lr": 1.8430794758202165e-06, "epoch": 1.3133657440207167, "percentage": 26.27, "elapsed_time": "1:09:24", "remaining_time": "3:14:50", "throughput": 8701.73, "total_tokens": 36239712} +{"current_steps": 53765, "total_steps": 204665, "loss": 0.0364, "lr": 1.8430336114006555e-06, "epoch": 1.313487894852564, "percentage": 26.27, "elapsed_time": "1:09:25", "remaining_time": "3:14:49", "throughput": 8701.82, "total_tokens": 36243168} +{"current_steps": 53770, "total_steps": 204665, "loss": 0.0531, "lr": 1.8429877408503279e-06, "epoch": 1.313610045684411, "percentage": 26.27, "elapsed_time": "1:09:25", "remaining_time": "3:14:49", "throughput": 8701.95, "total_tokens": 36246816} +{"current_steps": 53775, "total_steps": 204665, "loss": 0.0306, "lr": 1.8429418641695665e-06, "epoch": 1.3137321965162583, "percentage": 26.27, "elapsed_time": "1:09:25", "remaining_time": "3:14:48", "throughput": 8702.03, "total_tokens": 36250144} +{"current_steps": 53780, "total_steps": 204665, "loss": 0.0013, "lr": 1.8428959813587048e-06, "epoch": 1.3138543473481055, "percentage": 26.28, "elapsed_time": "1:09:26", "remaining_time": "3:14:48", "throughput": 8702.07, "total_tokens": 36253280} +{"current_steps": 53785, "total_steps": 204665, "loss": 0.2003, "lr": 1.8428500924180774e-06, "epoch": 1.3139764981799527, "percentage": 26.28, "elapsed_time": "1:09:26", "remaining_time": "3:14:47", "throughput": 8702.19, "total_tokens": 36256864} +{"current_steps": 53790, "total_steps": 204665, "loss": 0.0865, "lr": 1.842804197348017e-06, "epoch": 1.3140986490117998, "percentage": 26.28, "elapsed_time": "1:09:26", "remaining_time": "3:14:47", "throughput": 8702.24, "total_tokens": 36260064} +{"current_steps": 53795, "total_steps": 204665, "loss": 0.1247, "lr": 1.8427582961488579e-06, "epoch": 1.314220799843647, "percentage": 26.28, "elapsed_time": "1:09:27", "remaining_time": "3:14:46", "throughput": 8702.35, "total_tokens": 36263584} +{"current_steps": 53800, "total_steps": 204665, "loss": 0.1986, "lr": 1.8427123888209337e-06, "epoch": 1.3143429506754942, "percentage": 26.29, "elapsed_time": "1:09:27", "remaining_time": "3:14:46", "throughput": 8702.52, "total_tokens": 36267424} +{"current_steps": 53805, "total_steps": 204665, "loss": 0.1484, "lr": 1.8426664753645786e-06, "epoch": 1.3144651015073412, "percentage": 26.29, "elapsed_time": "1:09:27", "remaining_time": "3:14:45", "throughput": 8702.75, "total_tokens": 36271584} +{"current_steps": 53810, "total_steps": 204665, "loss": 0.0482, "lr": 1.8426205557801259e-06, "epoch": 1.3145872523391884, "percentage": 26.29, "elapsed_time": "1:09:28", "remaining_time": "3:14:45", "throughput": 8702.82, "total_tokens": 36274912} +{"current_steps": 53815, "total_steps": 204665, "loss": 0.0452, "lr": 1.84257463006791e-06, "epoch": 1.3147094031710356, "percentage": 26.29, "elapsed_time": "1:09:28", "remaining_time": "3:14:44", "throughput": 8703.13, "total_tokens": 36279392} +{"current_steps": 53820, "total_steps": 204665, "loss": 0.0494, "lr": 1.842528698228265e-06, "epoch": 1.3148315540028828, "percentage": 26.3, "elapsed_time": "1:09:28", "remaining_time": "3:14:44", "throughput": 8703.26, "total_tokens": 36283040} +{"current_steps": 53825, "total_steps": 204665, "loss": 0.0014, "lr": 1.8424827602615247e-06, "epoch": 1.31495370483473, "percentage": 26.3, "elapsed_time": "1:09:29", "remaining_time": "3:14:43", "throughput": 8703.33, "total_tokens": 36286304} +{"current_steps": 53830, "total_steps": 204665, "loss": 0.1553, "lr": 1.842436816168023e-06, "epoch": 1.3150758556665771, "percentage": 26.3, "elapsed_time": "1:09:29", "remaining_time": "3:14:43", "throughput": 8703.4, "total_tokens": 36289632} +{"current_steps": 53835, "total_steps": 204665, "loss": 0.0241, "lr": 1.8423908659480943e-06, "epoch": 1.3151980064984243, "percentage": 26.3, "elapsed_time": "1:09:29", "remaining_time": "3:14:42", "throughput": 8703.48, "total_tokens": 36292960} +{"current_steps": 53840, "total_steps": 204665, "loss": 0.113, "lr": 1.8423449096020724e-06, "epoch": 1.3153201573302713, "percentage": 26.31, "elapsed_time": "1:09:30", "remaining_time": "3:14:42", "throughput": 8703.46, "total_tokens": 36295840} +{"current_steps": 53845, "total_steps": 204665, "loss": 0.1004, "lr": 1.842298947130292e-06, "epoch": 1.3154423081621185, "percentage": 26.31, "elapsed_time": "1:09:30", "remaining_time": "3:14:41", "throughput": 8703.58, "total_tokens": 36299360} +{"current_steps": 53850, "total_steps": 204665, "loss": 0.1731, "lr": 1.8422529785330872e-06, "epoch": 1.3155644589939657, "percentage": 26.31, "elapsed_time": "1:09:30", "remaining_time": "3:14:41", "throughput": 8703.64, "total_tokens": 36302624} +{"current_steps": 53855, "total_steps": 204665, "loss": 0.1034, "lr": 1.8422070038107918e-06, "epoch": 1.3156866098258129, "percentage": 26.31, "elapsed_time": "1:09:31", "remaining_time": "3:14:40", "throughput": 8703.68, "total_tokens": 36305760} +{"current_steps": 53860, "total_steps": 204665, "loss": 0.1065, "lr": 1.8421610229637405e-06, "epoch": 1.31580876065766, "percentage": 26.32, "elapsed_time": "1:09:31", "remaining_time": "3:14:40", "throughput": 8703.79, "total_tokens": 36309280} +{"current_steps": 53865, "total_steps": 204665, "loss": 0.1159, "lr": 1.842115035992268e-06, "epoch": 1.3159309114895072, "percentage": 26.32, "elapsed_time": "1:09:32", "remaining_time": "3:14:39", "throughput": 8703.87, "total_tokens": 36312672} +{"current_steps": 53870, "total_steps": 204665, "loss": 0.0485, "lr": 1.8420690428967087e-06, "epoch": 1.3160530623213544, "percentage": 26.32, "elapsed_time": "1:09:32", "remaining_time": "3:14:39", "throughput": 8703.91, "total_tokens": 36315808} +{"current_steps": 53875, "total_steps": 204665, "loss": 0.0246, "lr": 1.8420230436773965e-06, "epoch": 1.3161752131532016, "percentage": 26.32, "elapsed_time": "1:09:32", "remaining_time": "3:14:38", "throughput": 8703.96, "total_tokens": 36319008} +{"current_steps": 53880, "total_steps": 204665, "loss": 0.0456, "lr": 1.8419770383346664e-06, "epoch": 1.3162973639850488, "percentage": 26.33, "elapsed_time": "1:09:33", "remaining_time": "3:14:38", "throughput": 8703.99, "total_tokens": 36322080} +{"current_steps": 53885, "total_steps": 204665, "loss": 0.1091, "lr": 1.8419310268688525e-06, "epoch": 1.316419514816896, "percentage": 26.33, "elapsed_time": "1:09:33", "remaining_time": "3:14:37", "throughput": 8704.1, "total_tokens": 36325600} +{"current_steps": 53890, "total_steps": 204665, "loss": 0.091, "lr": 1.84188500928029e-06, "epoch": 1.3165416656487432, "percentage": 26.33, "elapsed_time": "1:09:33", "remaining_time": "3:14:37", "throughput": 8704.15, "total_tokens": 36328800} +{"current_steps": 53895, "total_steps": 204665, "loss": 0.1389, "lr": 1.8418389855693132e-06, "epoch": 1.3166638164805902, "percentage": 26.33, "elapsed_time": "1:09:34", "remaining_time": "3:14:36", "throughput": 8704.15, "total_tokens": 36331744} +{"current_steps": 53900, "total_steps": 204665, "loss": 0.1134, "lr": 1.841792955736257e-06, "epoch": 1.3167859673124374, "percentage": 26.34, "elapsed_time": "1:09:34", "remaining_time": "3:14:36", "throughput": 8704.2, "total_tokens": 36334944} +{"current_steps": 53905, "total_steps": 204665, "loss": 0.1849, "lr": 1.841746919781456e-06, "epoch": 1.3169081181442845, "percentage": 26.34, "elapsed_time": "1:09:34", "remaining_time": "3:14:35", "throughput": 8704.27, "total_tokens": 36338272} +{"current_steps": 53910, "total_steps": 204665, "loss": 0.0561, "lr": 1.8417008777052447e-06, "epoch": 1.3170302689761317, "percentage": 26.34, "elapsed_time": "1:09:35", "remaining_time": "3:14:35", "throughput": 8704.36, "total_tokens": 36341664} +{"current_steps": 53915, "total_steps": 204665, "loss": 0.012, "lr": 1.8416548295079583e-06, "epoch": 1.317152419807979, "percentage": 26.34, "elapsed_time": "1:09:35", "remaining_time": "3:14:34", "throughput": 8704.44, "total_tokens": 36344992} +{"current_steps": 53920, "total_steps": 204665, "loss": 0.0688, "lr": 1.841608775189932e-06, "epoch": 1.3172745706398261, "percentage": 26.35, "elapsed_time": "1:09:35", "remaining_time": "3:14:34", "throughput": 8704.52, "total_tokens": 36348320} +{"current_steps": 53925, "total_steps": 204665, "loss": 0.0525, "lr": 1.8415627147514998e-06, "epoch": 1.3173967214716733, "percentage": 26.35, "elapsed_time": "1:09:36", "remaining_time": "3:14:33", "throughput": 8704.66, "total_tokens": 36352032} +{"current_steps": 53930, "total_steps": 204665, "loss": 0.0305, "lr": 1.8415166481929976e-06, "epoch": 1.3175188723035203, "percentage": 26.35, "elapsed_time": "1:09:36", "remaining_time": "3:14:33", "throughput": 8704.81, "total_tokens": 36355744} +{"current_steps": 53935, "total_steps": 204665, "loss": 0.0499, "lr": 1.8414705755147597e-06, "epoch": 1.3176410231353675, "percentage": 26.35, "elapsed_time": "1:09:36", "remaining_time": "3:14:32", "throughput": 8704.99, "total_tokens": 36359584} +{"current_steps": 53940, "total_steps": 204665, "loss": 0.0467, "lr": 1.8414244967171216e-06, "epoch": 1.3177631739672147, "percentage": 26.36, "elapsed_time": "1:09:37", "remaining_time": "3:14:32", "throughput": 8705.1, "total_tokens": 36363104} +{"current_steps": 53945, "total_steps": 204665, "loss": 0.1175, "lr": 1.8413784118004184e-06, "epoch": 1.3178853247990618, "percentage": 26.36, "elapsed_time": "1:09:37", "remaining_time": "3:14:31", "throughput": 8705.18, "total_tokens": 36366432} +{"current_steps": 53950, "total_steps": 204665, "loss": 0.0013, "lr": 1.8413323207649847e-06, "epoch": 1.318007475630909, "percentage": 26.36, "elapsed_time": "1:09:37", "remaining_time": "3:14:31", "throughput": 8705.34, "total_tokens": 36370208} +{"current_steps": 53955, "total_steps": 204665, "loss": 0.1004, "lr": 1.8412862236111565e-06, "epoch": 1.3181296264627562, "percentage": 26.36, "elapsed_time": "1:09:38", "remaining_time": "3:14:30", "throughput": 8705.41, "total_tokens": 36373536} +{"current_steps": 53960, "total_steps": 204665, "loss": 0.0384, "lr": 1.8412401203392681e-06, "epoch": 1.3182517772946034, "percentage": 26.37, "elapsed_time": "1:09:38", "remaining_time": "3:14:30", "throughput": 8705.55, "total_tokens": 36377184} +{"current_steps": 53965, "total_steps": 204665, "loss": 0.0789, "lr": 1.8411940109496556e-06, "epoch": 1.3183739281264506, "percentage": 26.37, "elapsed_time": "1:09:38", "remaining_time": "3:14:29", "throughput": 8705.63, "total_tokens": 36380576} +{"current_steps": 53970, "total_steps": 204665, "loss": 0.0648, "lr": 1.841147895442654e-06, "epoch": 1.3184960789582978, "percentage": 26.37, "elapsed_time": "1:09:39", "remaining_time": "3:14:29", "throughput": 8705.7, "total_tokens": 36383840} +{"current_steps": 53975, "total_steps": 204665, "loss": 0.0579, "lr": 1.8411017738185985e-06, "epoch": 1.318618229790145, "percentage": 26.37, "elapsed_time": "1:09:39", "remaining_time": "3:14:28", "throughput": 8705.8, "total_tokens": 36387296} +{"current_steps": 53980, "total_steps": 204665, "loss": 0.0875, "lr": 1.8410556460778248e-06, "epoch": 1.3187403806219922, "percentage": 26.37, "elapsed_time": "1:09:40", "remaining_time": "3:14:28", "throughput": 8705.88, "total_tokens": 36390624} +{"current_steps": 53985, "total_steps": 204665, "loss": 0.0979, "lr": 1.8410095122206682e-06, "epoch": 1.3188625314538391, "percentage": 26.38, "elapsed_time": "1:09:40", "remaining_time": "3:14:27", "throughput": 8705.91, "total_tokens": 36393760} +{"current_steps": 53990, "total_steps": 204665, "loss": 0.0856, "lr": 1.8409633722474642e-06, "epoch": 1.3189846822856863, "percentage": 26.38, "elapsed_time": "1:09:40", "remaining_time": "3:14:27", "throughput": 8705.92, "total_tokens": 36396704} +{"current_steps": 53995, "total_steps": 204665, "loss": 0.0566, "lr": 1.8409172261585483e-06, "epoch": 1.3191068331175335, "percentage": 26.38, "elapsed_time": "1:09:41", "remaining_time": "3:14:26", "throughput": 8705.98, "total_tokens": 36399968} +{"current_steps": 54000, "total_steps": 204665, "loss": 0.213, "lr": 1.8408710739542563e-06, "epoch": 1.3192289839493807, "percentage": 26.38, "elapsed_time": "1:09:41", "remaining_time": "3:14:26", "throughput": 8706.16, "total_tokens": 36403808} +{"current_steps": 54005, "total_steps": 204665, "loss": 0.0025, "lr": 1.840824915634924e-06, "epoch": 1.319351134781228, "percentage": 26.39, "elapsed_time": "1:09:41", "remaining_time": "3:14:25", "throughput": 8706.15, "total_tokens": 36406688} +{"current_steps": 54010, "total_steps": 204665, "loss": 0.2569, "lr": 1.840778751200886e-06, "epoch": 1.319473285613075, "percentage": 26.39, "elapsed_time": "1:09:42", "remaining_time": "3:14:25", "throughput": 8706.21, "total_tokens": 36409952} +{"current_steps": 54015, "total_steps": 204665, "loss": 0.1203, "lr": 1.8407325806524795e-06, "epoch": 1.319595436444922, "percentage": 26.39, "elapsed_time": "1:09:42", "remaining_time": "3:14:24", "throughput": 8706.22, "total_tokens": 36412960} +{"current_steps": 54020, "total_steps": 204665, "loss": 0.0017, "lr": 1.840686403990039e-06, "epoch": 1.3197175872767692, "percentage": 26.39, "elapsed_time": "1:09:42", "remaining_time": "3:14:24", "throughput": 8706.39, "total_tokens": 36416736} +{"current_steps": 54025, "total_steps": 204665, "loss": 0.0513, "lr": 1.8406402212139011e-06, "epoch": 1.3198397381086164, "percentage": 26.4, "elapsed_time": "1:09:43", "remaining_time": "3:14:23", "throughput": 8706.43, "total_tokens": 36419872} +{"current_steps": 54030, "total_steps": 204665, "loss": 0.0788, "lr": 1.8405940323244013e-06, "epoch": 1.3199618889404636, "percentage": 26.4, "elapsed_time": "1:09:43", "remaining_time": "3:14:23", "throughput": 8706.5, "total_tokens": 36423200} +{"current_steps": 54035, "total_steps": 204665, "loss": 0.1123, "lr": 1.8405478373218757e-06, "epoch": 1.3200840397723108, "percentage": 26.4, "elapsed_time": "1:09:43", "remaining_time": "3:14:22", "throughput": 8706.61, "total_tokens": 36426720} +{"current_steps": 54040, "total_steps": 204665, "loss": 0.1202, "lr": 1.8405016362066604e-06, "epoch": 1.320206190604158, "percentage": 26.4, "elapsed_time": "1:09:44", "remaining_time": "3:14:22", "throughput": 8706.66, "total_tokens": 36429920} +{"current_steps": 54045, "total_steps": 204665, "loss": 0.0792, "lr": 1.8404554289790906e-06, "epoch": 1.3203283414360052, "percentage": 26.41, "elapsed_time": "1:09:44", "remaining_time": "3:14:21", "throughput": 8706.84, "total_tokens": 36433760} +{"current_steps": 54050, "total_steps": 204665, "loss": 0.109, "lr": 1.8404092156395032e-06, "epoch": 1.3204504922678524, "percentage": 26.41, "elapsed_time": "1:09:44", "remaining_time": "3:14:21", "throughput": 8706.91, "total_tokens": 36437024} +{"current_steps": 54055, "total_steps": 204665, "loss": 0.0546, "lr": 1.8403629961882338e-06, "epoch": 1.3205726430996996, "percentage": 26.41, "elapsed_time": "1:09:45", "remaining_time": "3:14:20", "throughput": 8707.06, "total_tokens": 36440736} +{"current_steps": 54060, "total_steps": 204665, "loss": 0.1726, "lr": 1.8403167706256188e-06, "epoch": 1.3206947939315468, "percentage": 26.41, "elapsed_time": "1:09:45", "remaining_time": "3:14:20", "throughput": 8707.24, "total_tokens": 36444640} +{"current_steps": 54065, "total_steps": 204665, "loss": 0.0416, "lr": 1.8402705389519941e-06, "epoch": 1.320816944763394, "percentage": 26.42, "elapsed_time": "1:09:45", "remaining_time": "3:14:19", "throughput": 8707.36, "total_tokens": 36448160} +{"current_steps": 54070, "total_steps": 204665, "loss": 0.07, "lr": 1.8402243011676961e-06, "epoch": 1.3209390955952411, "percentage": 26.42, "elapsed_time": "1:09:46", "remaining_time": "3:14:19", "throughput": 8707.39, "total_tokens": 36451296} +{"current_steps": 54075, "total_steps": 204665, "loss": 0.0016, "lr": 1.8401780572730609e-06, "epoch": 1.321061246427088, "percentage": 26.42, "elapsed_time": "1:09:46", "remaining_time": "3:14:18", "throughput": 8707.55, "total_tokens": 36455008} +{"current_steps": 54080, "total_steps": 204665, "loss": 0.1247, "lr": 1.8401318072684248e-06, "epoch": 1.3211833972589353, "percentage": 26.42, "elapsed_time": "1:09:46", "remaining_time": "3:14:18", "throughput": 8707.57, "total_tokens": 36458080} +{"current_steps": 54085, "total_steps": 204665, "loss": 0.0699, "lr": 1.8400855511541246e-06, "epoch": 1.3213055480907825, "percentage": 26.43, "elapsed_time": "1:09:47", "remaining_time": "3:14:18", "throughput": 8707.72, "total_tokens": 36461792} +{"current_steps": 54090, "total_steps": 204665, "loss": 0.044, "lr": 1.8400392889304961e-06, "epoch": 1.3214276989226297, "percentage": 26.43, "elapsed_time": "1:09:47", "remaining_time": "3:14:17", "throughput": 8707.73, "total_tokens": 36464800} +{"current_steps": 54095, "total_steps": 204665, "loss": 0.0501, "lr": 1.839993020597876e-06, "epoch": 1.3215498497544769, "percentage": 26.43, "elapsed_time": "1:09:48", "remaining_time": "3:14:17", "throughput": 8707.99, "total_tokens": 36469088} +{"current_steps": 54100, "total_steps": 204665, "loss": 0.042, "lr": 1.8399467461566006e-06, "epoch": 1.321672000586324, "percentage": 26.43, "elapsed_time": "1:09:48", "remaining_time": "3:14:16", "throughput": 8708.04, "total_tokens": 36472288} +{"current_steps": 54105, "total_steps": 204665, "loss": 0.046, "lr": 1.8399004656070067e-06, "epoch": 1.321794151418171, "percentage": 26.44, "elapsed_time": "1:09:48", "remaining_time": "3:14:16", "throughput": 8708.1, "total_tokens": 36475552} +{"current_steps": 54110, "total_steps": 204665, "loss": 0.042, "lr": 1.8398541789494307e-06, "epoch": 1.3219163022500182, "percentage": 26.44, "elapsed_time": "1:09:49", "remaining_time": "3:14:15", "throughput": 8708.19, "total_tokens": 36478944} +{"current_steps": 54115, "total_steps": 204665, "loss": 0.0808, "lr": 1.839807886184209e-06, "epoch": 1.3220384530818654, "percentage": 26.44, "elapsed_time": "1:09:49", "remaining_time": "3:14:15", "throughput": 8708.26, "total_tokens": 36482208} +{"current_steps": 54120, "total_steps": 204665, "loss": 0.1379, "lr": 1.8397615873116785e-06, "epoch": 1.3221606039137126, "percentage": 26.44, "elapsed_time": "1:09:49", "remaining_time": "3:14:14", "throughput": 8708.25, "total_tokens": 36485152} +{"current_steps": 54125, "total_steps": 204665, "loss": 0.0595, "lr": 1.8397152823321761e-06, "epoch": 1.3222827547455598, "percentage": 26.45, "elapsed_time": "1:09:50", "remaining_time": "3:14:13", "throughput": 8708.27, "total_tokens": 36488160} +{"current_steps": 54130, "total_steps": 204665, "loss": 0.0372, "lr": 1.8396689712460382e-06, "epoch": 1.322404905577407, "percentage": 26.45, "elapsed_time": "1:09:50", "remaining_time": "3:14:13", "throughput": 8708.38, "total_tokens": 36491616} +{"current_steps": 54135, "total_steps": 204665, "loss": 0.0994, "lr": 1.8396226540536017e-06, "epoch": 1.3225270564092542, "percentage": 26.45, "elapsed_time": "1:09:50", "remaining_time": "3:14:12", "throughput": 8708.44, "total_tokens": 36494880} +{"current_steps": 54140, "total_steps": 204665, "loss": 0.1139, "lr": 1.8395763307552034e-06, "epoch": 1.3226492072411014, "percentage": 26.45, "elapsed_time": "1:09:51", "remaining_time": "3:14:12", "throughput": 8708.5, "total_tokens": 36498144} +{"current_steps": 54145, "total_steps": 204665, "loss": 0.067, "lr": 1.8395300013511803e-06, "epoch": 1.3227713580729485, "percentage": 26.46, "elapsed_time": "1:09:51", "remaining_time": "3:14:11", "throughput": 8708.54, "total_tokens": 36501344} +{"current_steps": 54150, "total_steps": 204665, "loss": 0.0782, "lr": 1.839483665841869e-06, "epoch": 1.3228935089047957, "percentage": 26.46, "elapsed_time": "1:09:51", "remaining_time": "3:14:11", "throughput": 8708.58, "total_tokens": 36504480} +{"current_steps": 54155, "total_steps": 204665, "loss": 0.0399, "lr": 1.8394373242276069e-06, "epoch": 1.323015659736643, "percentage": 26.46, "elapsed_time": "1:09:52", "remaining_time": "3:14:12", "throughput": 8707.78, "total_tokens": 36507936} +{"current_steps": 54160, "total_steps": 204665, "loss": 0.0857, "lr": 1.8393909765087307e-06, "epoch": 1.32313781056849, "percentage": 26.46, "elapsed_time": "1:09:52", "remaining_time": "3:14:11", "throughput": 8707.81, "total_tokens": 36511008} +{"current_steps": 54165, "total_steps": 204665, "loss": 0.078, "lr": 1.8393446226855779e-06, "epoch": 1.323259961400337, "percentage": 26.47, "elapsed_time": "1:09:53", "remaining_time": "3:14:11", "throughput": 8707.81, "total_tokens": 36513952} +{"current_steps": 54170, "total_steps": 204665, "loss": 0.0536, "lr": 1.8392982627584845e-06, "epoch": 1.3233821122321843, "percentage": 26.47, "elapsed_time": "1:09:53", "remaining_time": "3:14:10", "throughput": 8707.85, "total_tokens": 36517088} +{"current_steps": 54175, "total_steps": 204665, "loss": 0.0804, "lr": 1.839251896727789e-06, "epoch": 1.3235042630640315, "percentage": 26.47, "elapsed_time": "1:09:53", "remaining_time": "3:14:10", "throughput": 8707.94, "total_tokens": 36520544} +{"current_steps": 54180, "total_steps": 204665, "loss": 0.0014, "lr": 1.8392055245938277e-06, "epoch": 1.3236264138958787, "percentage": 26.47, "elapsed_time": "1:09:54", "remaining_time": "3:14:09", "throughput": 8708.0, "total_tokens": 36523808} +{"current_steps": 54185, "total_steps": 204665, "loss": 0.0521, "lr": 1.8391591463569383e-06, "epoch": 1.3237485647277258, "percentage": 26.47, "elapsed_time": "1:09:54", "remaining_time": "3:14:09", "throughput": 8708.06, "total_tokens": 36527072} +{"current_steps": 54190, "total_steps": 204665, "loss": 0.091, "lr": 1.8391127620174578e-06, "epoch": 1.323870715559573, "percentage": 26.48, "elapsed_time": "1:09:54", "remaining_time": "3:14:08", "throughput": 8708.13, "total_tokens": 36530336} +{"current_steps": 54195, "total_steps": 204665, "loss": 0.151, "lr": 1.8390663715757236e-06, "epoch": 1.32399286639142, "percentage": 26.48, "elapsed_time": "1:09:55", "remaining_time": "3:14:08", "throughput": 8708.14, "total_tokens": 36533344} +{"current_steps": 54200, "total_steps": 204665, "loss": 0.2122, "lr": 1.839019975032073e-06, "epoch": 1.3241150172232672, "percentage": 26.48, "elapsed_time": "1:09:55", "remaining_time": "3:14:07", "throughput": 8708.28, "total_tokens": 36536992} +{"current_steps": 54205, "total_steps": 204665, "loss": 0.0159, "lr": 1.8389735723868433e-06, "epoch": 1.3242371680551144, "percentage": 26.48, "elapsed_time": "1:09:56", "remaining_time": "3:14:07", "throughput": 8708.38, "total_tokens": 36540448} +{"current_steps": 54210, "total_steps": 204665, "loss": 0.0922, "lr": 1.8389271636403726e-06, "epoch": 1.3243593188869616, "percentage": 26.49, "elapsed_time": "1:09:56", "remaining_time": "3:14:06", "throughput": 8708.5, "total_tokens": 36544032} +{"current_steps": 54215, "total_steps": 204665, "loss": 0.1983, "lr": 1.8388807487929977e-06, "epoch": 1.3244814697188088, "percentage": 26.49, "elapsed_time": "1:09:56", "remaining_time": "3:14:06", "throughput": 8708.6, "total_tokens": 36547488} +{"current_steps": 54220, "total_steps": 204665, "loss": 0.0692, "lr": 1.8388343278450562e-06, "epoch": 1.324603620550656, "percentage": 26.49, "elapsed_time": "1:09:57", "remaining_time": "3:14:05", "throughput": 8708.61, "total_tokens": 36550496} +{"current_steps": 54225, "total_steps": 204665, "loss": 0.1026, "lr": 1.838787900796886e-06, "epoch": 1.3247257713825031, "percentage": 26.49, "elapsed_time": "1:09:57", "remaining_time": "3:14:05", "throughput": 8708.82, "total_tokens": 36554528} +{"current_steps": 54230, "total_steps": 204665, "loss": 0.0733, "lr": 1.8387414676488247e-06, "epoch": 1.3248479222143503, "percentage": 26.5, "elapsed_time": "1:09:57", "remaining_time": "3:14:04", "throughput": 8708.88, "total_tokens": 36557792} +{"current_steps": 54235, "total_steps": 204665, "loss": 0.043, "lr": 1.8386950284012097e-06, "epoch": 1.3249700730461975, "percentage": 26.5, "elapsed_time": "1:09:58", "remaining_time": "3:14:04", "throughput": 8708.92, "total_tokens": 36560928} +{"current_steps": 54240, "total_steps": 204665, "loss": 0.029, "lr": 1.8386485830543787e-06, "epoch": 1.3250922238780447, "percentage": 26.5, "elapsed_time": "1:09:58", "remaining_time": "3:14:03", "throughput": 8708.87, "total_tokens": 36563616} +{"current_steps": 54245, "total_steps": 204665, "loss": 0.0231, "lr": 1.83860213160867e-06, "epoch": 1.325214374709892, "percentage": 26.5, "elapsed_time": "1:09:58", "remaining_time": "3:14:03", "throughput": 8708.97, "total_tokens": 36567072} +{"current_steps": 54250, "total_steps": 204665, "loss": 0.0864, "lr": 1.8385556740644207e-06, "epoch": 1.3253365255417389, "percentage": 26.51, "elapsed_time": "1:09:59", "remaining_time": "3:14:02", "throughput": 8709.01, "total_tokens": 36570208} +{"current_steps": 54255, "total_steps": 204665, "loss": 0.0398, "lr": 1.8385092104219692e-06, "epoch": 1.325458676373586, "percentage": 26.51, "elapsed_time": "1:09:59", "remaining_time": "3:14:02", "throughput": 8709.14, "total_tokens": 36573856} +{"current_steps": 54260, "total_steps": 204665, "loss": 0.1231, "lr": 1.8384627406816532e-06, "epoch": 1.3255808272054332, "percentage": 26.51, "elapsed_time": "1:09:59", "remaining_time": "3:14:01", "throughput": 8709.23, "total_tokens": 36577248} +{"current_steps": 54265, "total_steps": 204665, "loss": 0.0889, "lr": 1.8384162648438104e-06, "epoch": 1.3257029780372804, "percentage": 26.51, "elapsed_time": "1:10:00", "remaining_time": "3:14:01", "throughput": 8709.34, "total_tokens": 36580768} +{"current_steps": 54270, "total_steps": 204665, "loss": 0.1318, "lr": 1.8383697829087792e-06, "epoch": 1.3258251288691276, "percentage": 26.52, "elapsed_time": "1:10:00", "remaining_time": "3:14:00", "throughput": 8709.47, "total_tokens": 36584416} +{"current_steps": 54275, "total_steps": 204665, "loss": 0.0856, "lr": 1.8383232948768975e-06, "epoch": 1.3259472797009748, "percentage": 26.52, "elapsed_time": "1:10:00", "remaining_time": "3:14:00", "throughput": 8709.6, "total_tokens": 36588000} +{"current_steps": 54280, "total_steps": 204665, "loss": 0.1556, "lr": 1.8382768007485033e-06, "epoch": 1.326069430532822, "percentage": 26.52, "elapsed_time": "1:10:01", "remaining_time": "3:13:59", "throughput": 8709.62, "total_tokens": 36591072} +{"current_steps": 54285, "total_steps": 204665, "loss": 0.1886, "lr": 1.8382303005239346e-06, "epoch": 1.326191581364669, "percentage": 26.52, "elapsed_time": "1:10:01", "remaining_time": "3:13:59", "throughput": 8709.71, "total_tokens": 36594464} +{"current_steps": 54290, "total_steps": 204665, "loss": 0.0695, "lr": 1.8381837942035299e-06, "epoch": 1.3263137321965162, "percentage": 26.53, "elapsed_time": "1:10:01", "remaining_time": "3:13:58", "throughput": 8709.81, "total_tokens": 36597920} +{"current_steps": 54295, "total_steps": 204665, "loss": 0.1333, "lr": 1.838137281787627e-06, "epoch": 1.3264358830283633, "percentage": 26.53, "elapsed_time": "1:10:02", "remaining_time": "3:13:58", "throughput": 8709.85, "total_tokens": 36601056} +{"current_steps": 54300, "total_steps": 204665, "loss": 0.0037, "lr": 1.8380907632765644e-06, "epoch": 1.3265580338602105, "percentage": 26.53, "elapsed_time": "1:10:02", "remaining_time": "3:13:57", "throughput": 8709.9, "total_tokens": 36604256} +{"current_steps": 54305, "total_steps": 204665, "loss": 0.1421, "lr": 1.8380442386706805e-06, "epoch": 1.3266801846920577, "percentage": 26.53, "elapsed_time": "1:10:02", "remaining_time": "3:13:57", "throughput": 8709.98, "total_tokens": 36607584} +{"current_steps": 54310, "total_steps": 204665, "loss": 0.0126, "lr": 1.8379977079703134e-06, "epoch": 1.326802335523905, "percentage": 26.54, "elapsed_time": "1:10:03", "remaining_time": "3:13:56", "throughput": 8710.14, "total_tokens": 36611360} +{"current_steps": 54315, "total_steps": 204665, "loss": 0.1882, "lr": 1.8379511711758013e-06, "epoch": 1.326924486355752, "percentage": 26.54, "elapsed_time": "1:10:03", "remaining_time": "3:13:56", "throughput": 8710.16, "total_tokens": 36614432} +{"current_steps": 54320, "total_steps": 204665, "loss": 0.1443, "lr": 1.8379046282874833e-06, "epoch": 1.3270466371875993, "percentage": 26.54, "elapsed_time": "1:10:03", "remaining_time": "3:13:55", "throughput": 8710.19, "total_tokens": 36617504} +{"current_steps": 54325, "total_steps": 204665, "loss": 0.1574, "lr": 1.8378580793056972e-06, "epoch": 1.3271687880194465, "percentage": 26.54, "elapsed_time": "1:10:04", "remaining_time": "3:13:55", "throughput": 8710.2, "total_tokens": 36620512} +{"current_steps": 54330, "total_steps": 204665, "loss": 0.0449, "lr": 1.837811524230782e-06, "epoch": 1.3272909388512937, "percentage": 26.55, "elapsed_time": "1:10:04", "remaining_time": "3:13:54", "throughput": 8710.31, "total_tokens": 36624032} +{"current_steps": 54335, "total_steps": 204665, "loss": 0.0106, "lr": 1.837764963063076e-06, "epoch": 1.3274130896831409, "percentage": 26.55, "elapsed_time": "1:10:05", "remaining_time": "3:13:54", "throughput": 8710.42, "total_tokens": 36627552} +{"current_steps": 54340, "total_steps": 204665, "loss": 0.0677, "lr": 1.837718395802918e-06, "epoch": 1.3275352405149878, "percentage": 26.55, "elapsed_time": "1:10:05", "remaining_time": "3:13:53", "throughput": 8710.48, "total_tokens": 36630880} +{"current_steps": 54345, "total_steps": 204665, "loss": 0.0778, "lr": 1.8376718224506462e-06, "epoch": 1.327657391346835, "percentage": 26.55, "elapsed_time": "1:10:05", "remaining_time": "3:13:53", "throughput": 8710.58, "total_tokens": 36634336} +{"current_steps": 54350, "total_steps": 204665, "loss": 0.0222, "lr": 1.8376252430065996e-06, "epoch": 1.3277795421786822, "percentage": 26.56, "elapsed_time": "1:10:06", "remaining_time": "3:13:52", "throughput": 8710.61, "total_tokens": 36637472} +{"current_steps": 54355, "total_steps": 204665, "loss": 0.0163, "lr": 1.8375786574711172e-06, "epoch": 1.3279016930105294, "percentage": 26.56, "elapsed_time": "1:10:06", "remaining_time": "3:13:52", "throughput": 8710.7, "total_tokens": 36640864} +{"current_steps": 54360, "total_steps": 204665, "loss": 0.1653, "lr": 1.8375320658445373e-06, "epoch": 1.3280238438423766, "percentage": 26.56, "elapsed_time": "1:10:06", "remaining_time": "3:13:51", "throughput": 8710.78, "total_tokens": 36644256} +{"current_steps": 54365, "total_steps": 204665, "loss": 0.0613, "lr": 1.8374854681271991e-06, "epoch": 1.3281459946742238, "percentage": 26.56, "elapsed_time": "1:10:07", "remaining_time": "3:13:51", "throughput": 8710.85, "total_tokens": 36647584} +{"current_steps": 54370, "total_steps": 204665, "loss": 0.1438, "lr": 1.8374388643194415e-06, "epoch": 1.328268145506071, "percentage": 26.57, "elapsed_time": "1:10:07", "remaining_time": "3:13:50", "throughput": 8710.95, "total_tokens": 36651040} +{"current_steps": 54375, "total_steps": 204665, "loss": 0.003, "lr": 1.8373922544216026e-06, "epoch": 1.328390296337918, "percentage": 26.57, "elapsed_time": "1:10:07", "remaining_time": "3:13:50", "throughput": 8711.01, "total_tokens": 36654304} +{"current_steps": 54380, "total_steps": 204665, "loss": 0.1305, "lr": 1.8373456384340224e-06, "epoch": 1.3285124471697651, "percentage": 26.57, "elapsed_time": "1:10:08", "remaining_time": "3:13:49", "throughput": 8711.06, "total_tokens": 36657504} +{"current_steps": 54385, "total_steps": 204665, "loss": 0.033, "lr": 1.8372990163570396e-06, "epoch": 1.3286345980016123, "percentage": 26.57, "elapsed_time": "1:10:08", "remaining_time": "3:13:49", "throughput": 8711.11, "total_tokens": 36660704} +{"current_steps": 54390, "total_steps": 204665, "loss": 0.0023, "lr": 1.8372523881909929e-06, "epoch": 1.3287567488334595, "percentage": 26.58, "elapsed_time": "1:10:08", "remaining_time": "3:13:48", "throughput": 8711.24, "total_tokens": 36664288} +{"current_steps": 54395, "total_steps": 204665, "loss": 0.1235, "lr": 1.837205753936222e-06, "epoch": 1.3288788996653067, "percentage": 26.58, "elapsed_time": "1:10:09", "remaining_time": "3:13:48", "throughput": 8711.36, "total_tokens": 36667872} +{"current_steps": 54400, "total_steps": 204665, "loss": 0.1215, "lr": 1.8371591135930653e-06, "epoch": 1.329001050497154, "percentage": 26.58, "elapsed_time": "1:10:09", "remaining_time": "3:13:47", "throughput": 8711.46, "total_tokens": 36671328} +{"current_steps": 54405, "total_steps": 204665, "loss": 0.0267, "lr": 1.8371124671618627e-06, "epoch": 1.329123201329001, "percentage": 26.58, "elapsed_time": "1:10:09", "remaining_time": "3:13:47", "throughput": 8711.48, "total_tokens": 36674400} +{"current_steps": 54410, "total_steps": 204665, "loss": 0.0422, "lr": 1.8370658146429529e-06, "epoch": 1.3292453521608483, "percentage": 26.58, "elapsed_time": "1:10:10", "remaining_time": "3:13:46", "throughput": 8711.71, "total_tokens": 36678560} +{"current_steps": 54415, "total_steps": 204665, "loss": 0.0718, "lr": 1.8370191560366752e-06, "epoch": 1.3293675029926955, "percentage": 26.59, "elapsed_time": "1:10:10", "remaining_time": "3:13:46", "throughput": 8711.72, "total_tokens": 36681568} +{"current_steps": 54420, "total_steps": 204665, "loss": 0.1318, "lr": 1.8369724913433694e-06, "epoch": 1.3294896538245426, "percentage": 26.59, "elapsed_time": "1:10:10", "remaining_time": "3:13:45", "throughput": 8711.78, "total_tokens": 36684832} +{"current_steps": 54425, "total_steps": 204665, "loss": 0.08, "lr": 1.8369258205633741e-06, "epoch": 1.3296118046563898, "percentage": 26.59, "elapsed_time": "1:10:11", "remaining_time": "3:13:45", "throughput": 8711.94, "total_tokens": 36688608} +{"current_steps": 54430, "total_steps": 204665, "loss": 0.26, "lr": 1.8368791436970295e-06, "epoch": 1.3297339554882368, "percentage": 26.59, "elapsed_time": "1:10:11", "remaining_time": "3:13:44", "throughput": 8712.02, "total_tokens": 36692000} +{"current_steps": 54435, "total_steps": 204665, "loss": 0.0638, "lr": 1.8368324607446747e-06, "epoch": 1.329856106320084, "percentage": 26.6, "elapsed_time": "1:10:12", "remaining_time": "3:13:44", "throughput": 8712.2, "total_tokens": 36695904} +{"current_steps": 54440, "total_steps": 204665, "loss": 0.015, "lr": 1.8367857717066485e-06, "epoch": 1.3299782571519312, "percentage": 26.6, "elapsed_time": "1:10:12", "remaining_time": "3:13:43", "throughput": 8712.3, "total_tokens": 36699360} +{"current_steps": 54445, "total_steps": 204665, "loss": 0.0694, "lr": 1.8367390765832917e-06, "epoch": 1.3301004079837784, "percentage": 26.6, "elapsed_time": "1:10:12", "remaining_time": "3:13:43", "throughput": 8712.36, "total_tokens": 36702624} +{"current_steps": 54450, "total_steps": 204665, "loss": 0.1328, "lr": 1.8366923753749433e-06, "epoch": 1.3302225588156256, "percentage": 26.6, "elapsed_time": "1:10:13", "remaining_time": "3:13:42", "throughput": 8712.49, "total_tokens": 36706272} +{"current_steps": 54455, "total_steps": 204665, "loss": 0.068, "lr": 1.8366456680819428e-06, "epoch": 1.3303447096474728, "percentage": 26.61, "elapsed_time": "1:10:13", "remaining_time": "3:13:42", "throughput": 8712.52, "total_tokens": 36709344} +{"current_steps": 54460, "total_steps": 204665, "loss": 0.1217, "lr": 1.83659895470463e-06, "epoch": 1.33046686047932, "percentage": 26.61, "elapsed_time": "1:10:13", "remaining_time": "3:13:41", "throughput": 8712.6, "total_tokens": 36712672} +{"current_steps": 54465, "total_steps": 204665, "loss": 0.1103, "lr": 1.8365522352433445e-06, "epoch": 1.330589011311167, "percentage": 26.61, "elapsed_time": "1:10:14", "remaining_time": "3:13:41", "throughput": 8712.67, "total_tokens": 36716000} +{"current_steps": 54470, "total_steps": 204665, "loss": 0.036, "lr": 1.8365055096984264e-06, "epoch": 1.330711162143014, "percentage": 26.61, "elapsed_time": "1:10:14", "remaining_time": "3:13:40", "throughput": 8712.86, "total_tokens": 36719968} +{"current_steps": 54475, "total_steps": 204665, "loss": 0.0812, "lr": 1.8364587780702147e-06, "epoch": 1.3308333129748613, "percentage": 26.62, "elapsed_time": "1:10:15", "remaining_time": "3:13:41", "throughput": 8712.4, "total_tokens": 36723360} +{"current_steps": 54480, "total_steps": 204665, "loss": 0.1162, "lr": 1.8364120403590502e-06, "epoch": 1.3309554638067085, "percentage": 26.62, "elapsed_time": "1:10:15", "remaining_time": "3:13:40", "throughput": 8712.43, "total_tokens": 36726432} +{"current_steps": 54485, "total_steps": 204665, "loss": 0.1438, "lr": 1.8363652965652723e-06, "epoch": 1.3310776146385557, "percentage": 26.62, "elapsed_time": "1:10:15", "remaining_time": "3:13:40", "throughput": 8712.52, "total_tokens": 36729888} +{"current_steps": 54490, "total_steps": 204665, "loss": 0.0286, "lr": 1.836318546689221e-06, "epoch": 1.3311997654704029, "percentage": 26.62, "elapsed_time": "1:10:16", "remaining_time": "3:13:39", "throughput": 8712.54, "total_tokens": 36732896} +{"current_steps": 54495, "total_steps": 204665, "loss": 0.0012, "lr": 1.8362717907312364e-06, "epoch": 1.33132191630225, "percentage": 26.63, "elapsed_time": "1:10:16", "remaining_time": "3:13:39", "throughput": 8712.64, "total_tokens": 36736352} +{"current_steps": 54500, "total_steps": 204665, "loss": 0.0319, "lr": 1.8362250286916581e-06, "epoch": 1.3314440671340972, "percentage": 26.63, "elapsed_time": "1:10:16", "remaining_time": "3:13:38", "throughput": 8712.78, "total_tokens": 36740064} +{"current_steps": 54505, "total_steps": 204665, "loss": 0.0768, "lr": 1.8361782605708267e-06, "epoch": 1.3315662179659444, "percentage": 26.63, "elapsed_time": "1:10:17", "remaining_time": "3:13:38", "throughput": 8712.85, "total_tokens": 36743328} +{"current_steps": 54510, "total_steps": 204665, "loss": 0.0441, "lr": 1.836131486369082e-06, "epoch": 1.3316883687977916, "percentage": 26.63, "elapsed_time": "1:10:17", "remaining_time": "3:13:37", "throughput": 8712.88, "total_tokens": 36746464} +{"current_steps": 54515, "total_steps": 204665, "loss": 0.0483, "lr": 1.8360847060867642e-06, "epoch": 1.3318105196296388, "percentage": 26.64, "elapsed_time": "1:10:17", "remaining_time": "3:13:37", "throughput": 8713.04, "total_tokens": 36750240} +{"current_steps": 54520, "total_steps": 204665, "loss": 0.0849, "lr": 1.8360379197242137e-06, "epoch": 1.3319326704614858, "percentage": 26.64, "elapsed_time": "1:10:18", "remaining_time": "3:13:36", "throughput": 8713.16, "total_tokens": 36753760} +{"current_steps": 54525, "total_steps": 204665, "loss": 0.0359, "lr": 1.8359911272817706e-06, "epoch": 1.332054821293333, "percentage": 26.64, "elapsed_time": "1:10:18", "remaining_time": "3:13:36", "throughput": 8713.25, "total_tokens": 36757216} +{"current_steps": 54530, "total_steps": 204665, "loss": 0.0463, "lr": 1.835944328759775e-06, "epoch": 1.3321769721251802, "percentage": 26.64, "elapsed_time": "1:10:18", "remaining_time": "3:13:35", "throughput": 8713.39, "total_tokens": 36760864} +{"current_steps": 54535, "total_steps": 204665, "loss": 0.1228, "lr": 1.8358975241585675e-06, "epoch": 1.3322991229570273, "percentage": 26.65, "elapsed_time": "1:10:19", "remaining_time": "3:13:35", "throughput": 8713.44, "total_tokens": 36764064} +{"current_steps": 54540, "total_steps": 204665, "loss": 0.0567, "lr": 1.8358507134784882e-06, "epoch": 1.3324212737888745, "percentage": 26.65, "elapsed_time": "1:10:19", "remaining_time": "3:13:34", "throughput": 8713.53, "total_tokens": 36767520} +{"current_steps": 54545, "total_steps": 204665, "loss": 0.1073, "lr": 1.8358038967198776e-06, "epoch": 1.3325434246207217, "percentage": 26.65, "elapsed_time": "1:10:19", "remaining_time": "3:13:34", "throughput": 8713.69, "total_tokens": 36771296} +{"current_steps": 54550, "total_steps": 204665, "loss": 0.0462, "lr": 1.8357570738830768e-06, "epoch": 1.3326655754525687, "percentage": 26.65, "elapsed_time": "1:10:20", "remaining_time": "3:13:33", "throughput": 8713.81, "total_tokens": 36774880} +{"current_steps": 54555, "total_steps": 204665, "loss": 0.053, "lr": 1.8357102449684254e-06, "epoch": 1.3327877262844159, "percentage": 26.66, "elapsed_time": "1:10:20", "remaining_time": "3:13:33", "throughput": 8713.94, "total_tokens": 36778464} +{"current_steps": 54560, "total_steps": 204665, "loss": 0.0801, "lr": 1.8356634099762643e-06, "epoch": 1.332909877116263, "percentage": 26.66, "elapsed_time": "1:10:20", "remaining_time": "3:13:32", "throughput": 8714.0, "total_tokens": 36781792} +{"current_steps": 54565, "total_steps": 204665, "loss": 0.0478, "lr": 1.8356165689069343e-06, "epoch": 1.3330320279481103, "percentage": 26.66, "elapsed_time": "1:10:21", "remaining_time": "3:13:32", "throughput": 8714.05, "total_tokens": 36784992} +{"current_steps": 54570, "total_steps": 204665, "loss": 0.0293, "lr": 1.8355697217607758e-06, "epoch": 1.3331541787799575, "percentage": 26.66, "elapsed_time": "1:10:21", "remaining_time": "3:13:31", "throughput": 8714.12, "total_tokens": 36788320} +{"current_steps": 54575, "total_steps": 204665, "loss": 0.0018, "lr": 1.8355228685381293e-06, "epoch": 1.3332763296118046, "percentage": 26.67, "elapsed_time": "1:10:22", "remaining_time": "3:13:31", "throughput": 8714.21, "total_tokens": 36791776} +{"current_steps": 54580, "total_steps": 204665, "loss": 0.1309, "lr": 1.8354760092393363e-06, "epoch": 1.3333984804436518, "percentage": 26.67, "elapsed_time": "1:10:22", "remaining_time": "3:13:30", "throughput": 8714.27, "total_tokens": 36795040} +{"current_steps": 54585, "total_steps": 204665, "loss": 0.0681, "lr": 1.8354291438647366e-06, "epoch": 1.333520631275499, "percentage": 26.67, "elapsed_time": "1:10:22", "remaining_time": "3:13:30", "throughput": 8714.33, "total_tokens": 36798304} +{"current_steps": 54590, "total_steps": 204665, "loss": 0.1073, "lr": 1.8353822724146714e-06, "epoch": 1.3336427821073462, "percentage": 26.67, "elapsed_time": "1:10:23", "remaining_time": "3:13:29", "throughput": 8714.5, "total_tokens": 36802144} +{"current_steps": 54595, "total_steps": 204665, "loss": 0.1152, "lr": 1.8353353948894819e-06, "epoch": 1.3337649329391934, "percentage": 26.68, "elapsed_time": "1:10:23", "remaining_time": "3:13:29", "throughput": 8714.56, "total_tokens": 36805408} +{"current_steps": 54600, "total_steps": 204665, "loss": 0.0011, "lr": 1.8352885112895086e-06, "epoch": 1.3338870837710406, "percentage": 26.68, "elapsed_time": "1:10:23", "remaining_time": "3:13:28", "throughput": 8714.69, "total_tokens": 36809056} +{"current_steps": 54605, "total_steps": 204665, "loss": 0.1124, "lr": 1.8352416216150926e-06, "epoch": 1.3340092346028878, "percentage": 26.68, "elapsed_time": "1:10:24", "remaining_time": "3:13:28", "throughput": 8714.71, "total_tokens": 36812128} +{"current_steps": 54610, "total_steps": 204665, "loss": 0.1089, "lr": 1.8351947258665747e-06, "epoch": 1.3341313854347348, "percentage": 26.68, "elapsed_time": "1:10:24", "remaining_time": "3:13:27", "throughput": 8714.93, "total_tokens": 36816224} +{"current_steps": 54615, "total_steps": 204665, "loss": 0.0894, "lr": 1.8351478240442963e-06, "epoch": 1.334253536266582, "percentage": 26.69, "elapsed_time": "1:10:24", "remaining_time": "3:13:27", "throughput": 8714.99, "total_tokens": 36819424} +{"current_steps": 54620, "total_steps": 204665, "loss": 0.0718, "lr": 1.8351009161485983e-06, "epoch": 1.3343756870984291, "percentage": 26.69, "elapsed_time": "1:10:25", "remaining_time": "3:13:26", "throughput": 8715.13, "total_tokens": 36823136} +{"current_steps": 54625, "total_steps": 204665, "loss": 0.032, "lr": 1.835054002179822e-06, "epoch": 1.3344978379302763, "percentage": 26.69, "elapsed_time": "1:10:25", "remaining_time": "3:13:26", "throughput": 8715.2, "total_tokens": 36826464} +{"current_steps": 54630, "total_steps": 204665, "loss": 0.0474, "lr": 1.835007082138308e-06, "epoch": 1.3346199887621235, "percentage": 26.69, "elapsed_time": "1:10:25", "remaining_time": "3:13:25", "throughput": 8715.3, "total_tokens": 36829920} +{"current_steps": 54635, "total_steps": 204665, "loss": 0.0734, "lr": 1.8349601560243983e-06, "epoch": 1.3347421395939707, "percentage": 26.69, "elapsed_time": "1:10:26", "remaining_time": "3:13:25", "throughput": 8715.31, "total_tokens": 36832928} +{"current_steps": 54640, "total_steps": 204665, "loss": 0.0744, "lr": 1.8349132238384334e-06, "epoch": 1.3348642904258177, "percentage": 26.7, "elapsed_time": "1:10:26", "remaining_time": "3:13:24", "throughput": 8715.36, "total_tokens": 36836128} +{"current_steps": 54645, "total_steps": 204665, "loss": 0.0449, "lr": 1.8348662855807552e-06, "epoch": 1.3349864412576649, "percentage": 26.7, "elapsed_time": "1:10:26", "remaining_time": "3:13:24", "throughput": 8715.5, "total_tokens": 36839776} +{"current_steps": 54650, "total_steps": 204665, "loss": 0.236, "lr": 1.8348193412517051e-06, "epoch": 1.335108592089512, "percentage": 26.7, "elapsed_time": "1:10:27", "remaining_time": "3:13:23", "throughput": 8715.5, "total_tokens": 36842720} +{"current_steps": 54655, "total_steps": 204665, "loss": 0.0411, "lr": 1.8347723908516234e-06, "epoch": 1.3352307429213592, "percentage": 26.7, "elapsed_time": "1:10:27", "remaining_time": "3:13:23", "throughput": 8715.66, "total_tokens": 36846496} +{"current_steps": 54660, "total_steps": 204665, "loss": 0.1155, "lr": 1.834725434380853e-06, "epoch": 1.3353528937532064, "percentage": 26.71, "elapsed_time": "1:10:27", "remaining_time": "3:13:22", "throughput": 8715.7, "total_tokens": 36849632} +{"current_steps": 54665, "total_steps": 204665, "loss": 0.0413, "lr": 1.8346784718397346e-06, "epoch": 1.3354750445850536, "percentage": 26.71, "elapsed_time": "1:10:28", "remaining_time": "3:13:22", "throughput": 8715.71, "total_tokens": 36852640} +{"current_steps": 54670, "total_steps": 204665, "loss": 0.0515, "lr": 1.8346315032286098e-06, "epoch": 1.3355971954169008, "percentage": 26.71, "elapsed_time": "1:10:28", "remaining_time": "3:13:21", "throughput": 8715.81, "total_tokens": 36856096} +{"current_steps": 54675, "total_steps": 204665, "loss": 0.1245, "lr": 1.83458452854782e-06, "epoch": 1.335719346248748, "percentage": 26.71, "elapsed_time": "1:10:28", "remaining_time": "3:13:21", "throughput": 8715.87, "total_tokens": 36859360} +{"current_steps": 54680, "total_steps": 204665, "loss": 0.1605, "lr": 1.8345375477977076e-06, "epoch": 1.3358414970805952, "percentage": 26.72, "elapsed_time": "1:10:29", "remaining_time": "3:13:20", "throughput": 8715.92, "total_tokens": 36862560} +{"current_steps": 54685, "total_steps": 204665, "loss": 0.1335, "lr": 1.8344905609786132e-06, "epoch": 1.3359636479124424, "percentage": 26.72, "elapsed_time": "1:10:29", "remaining_time": "3:13:20", "throughput": 8715.95, "total_tokens": 36865696} +{"current_steps": 54690, "total_steps": 204665, "loss": 0.0543, "lr": 1.8344435680908793e-06, "epoch": 1.3360857987442896, "percentage": 26.72, "elapsed_time": "1:10:30", "remaining_time": "3:13:19", "throughput": 8716.0, "total_tokens": 36868896} +{"current_steps": 54695, "total_steps": 204665, "loss": 0.1462, "lr": 1.8343965691348471e-06, "epoch": 1.3362079495761365, "percentage": 26.72, "elapsed_time": "1:10:30", "remaining_time": "3:13:19", "throughput": 8716.05, "total_tokens": 36872096} +{"current_steps": 54700, "total_steps": 204665, "loss": 0.103, "lr": 1.8343495641108586e-06, "epoch": 1.3363301004079837, "percentage": 26.73, "elapsed_time": "1:10:30", "remaining_time": "3:13:18", "throughput": 8716.16, "total_tokens": 36875616} +{"current_steps": 54705, "total_steps": 204665, "loss": 0.1377, "lr": 1.8343025530192558e-06, "epoch": 1.336452251239831, "percentage": 26.73, "elapsed_time": "1:10:31", "remaining_time": "3:13:18", "throughput": 8716.24, "total_tokens": 36878944} +{"current_steps": 54710, "total_steps": 204665, "loss": 0.018, "lr": 1.8342555358603804e-06, "epoch": 1.336574402071678, "percentage": 26.73, "elapsed_time": "1:10:31", "remaining_time": "3:13:17", "throughput": 8716.33, "total_tokens": 36882336} +{"current_steps": 54715, "total_steps": 204665, "loss": 0.1011, "lr": 1.8342085126345743e-06, "epoch": 1.3366965529035253, "percentage": 26.73, "elapsed_time": "1:10:31", "remaining_time": "3:13:17", "throughput": 8716.42, "total_tokens": 36885792} +{"current_steps": 54720, "total_steps": 204665, "loss": 0.0022, "lr": 1.8341614833421794e-06, "epoch": 1.3368187037353725, "percentage": 26.74, "elapsed_time": "1:10:32", "remaining_time": "3:13:16", "throughput": 8716.5, "total_tokens": 36889120} +{"current_steps": 54725, "total_steps": 204665, "loss": 0.054, "lr": 1.8341144479835382e-06, "epoch": 1.3369408545672197, "percentage": 26.74, "elapsed_time": "1:10:32", "remaining_time": "3:13:16", "throughput": 8716.58, "total_tokens": 36892512} +{"current_steps": 54730, "total_steps": 204665, "loss": 0.0393, "lr": 1.8340674065589923e-06, "epoch": 1.3370630053990666, "percentage": 26.74, "elapsed_time": "1:10:32", "remaining_time": "3:13:15", "throughput": 8716.67, "total_tokens": 36895904} +{"current_steps": 54735, "total_steps": 204665, "loss": 0.0437, "lr": 1.8340203590688837e-06, "epoch": 1.3371851562309138, "percentage": 26.74, "elapsed_time": "1:10:33", "remaining_time": "3:13:15", "throughput": 8716.76, "total_tokens": 36899360} +{"current_steps": 54740, "total_steps": 204665, "loss": 0.046, "lr": 1.8339733055135546e-06, "epoch": 1.337307307062761, "percentage": 26.75, "elapsed_time": "1:10:33", "remaining_time": "3:13:14", "throughput": 8716.78, "total_tokens": 36902368} +{"current_steps": 54745, "total_steps": 204665, "loss": 0.097, "lr": 1.8339262458933476e-06, "epoch": 1.3374294578946082, "percentage": 26.75, "elapsed_time": "1:10:33", "remaining_time": "3:13:14", "throughput": 8716.85, "total_tokens": 36905696} +{"current_steps": 54750, "total_steps": 204665, "loss": 0.1131, "lr": 1.8338791802086045e-06, "epoch": 1.3375516087264554, "percentage": 26.75, "elapsed_time": "1:10:34", "remaining_time": "3:13:13", "throughput": 8717.14, "total_tokens": 36910176} +{"current_steps": 54755, "total_steps": 204665, "loss": 0.068, "lr": 1.8338321084596678e-06, "epoch": 1.3376737595583026, "percentage": 26.75, "elapsed_time": "1:10:34", "remaining_time": "3:13:13", "throughput": 8717.17, "total_tokens": 36913312} +{"current_steps": 54760, "total_steps": 204665, "loss": 0.068, "lr": 1.8337850306468795e-06, "epoch": 1.3377959103901498, "percentage": 26.76, "elapsed_time": "1:10:34", "remaining_time": "3:13:12", "throughput": 8717.23, "total_tokens": 36916576} +{"current_steps": 54765, "total_steps": 204665, "loss": 0.0019, "lr": 1.8337379467705824e-06, "epoch": 1.337918061221997, "percentage": 26.76, "elapsed_time": "1:10:35", "remaining_time": "3:13:12", "throughput": 8717.39, "total_tokens": 36920352} +{"current_steps": 54770, "total_steps": 204665, "loss": 0.1602, "lr": 1.8336908568311187e-06, "epoch": 1.3380402120538442, "percentage": 26.76, "elapsed_time": "1:10:35", "remaining_time": "3:13:11", "throughput": 8717.39, "total_tokens": 36923296} +{"current_steps": 54775, "total_steps": 204665, "loss": 0.0468, "lr": 1.8336437608288309e-06, "epoch": 1.3381623628856913, "percentage": 26.76, "elapsed_time": "1:10:35", "remaining_time": "3:13:11", "throughput": 8717.44, "total_tokens": 36926496} +{"current_steps": 54780, "total_steps": 204665, "loss": 0.2092, "lr": 1.8335966587640615e-06, "epoch": 1.3382845137175385, "percentage": 26.77, "elapsed_time": "1:10:36", "remaining_time": "3:13:11", "throughput": 8717.53, "total_tokens": 36929952} +{"current_steps": 54785, "total_steps": 204665, "loss": 0.002, "lr": 1.8335495506371529e-06, "epoch": 1.3384066645493855, "percentage": 26.77, "elapsed_time": "1:10:36", "remaining_time": "3:13:10", "throughput": 8717.56, "total_tokens": 36933024} +{"current_steps": 54790, "total_steps": 204665, "loss": 0.0973, "lr": 1.8335024364484477e-06, "epoch": 1.3385288153812327, "percentage": 26.77, "elapsed_time": "1:10:36", "remaining_time": "3:13:10", "throughput": 8717.67, "total_tokens": 36936608} +{"current_steps": 54795, "total_steps": 204665, "loss": 0.1195, "lr": 1.8334553161982887e-06, "epoch": 1.3386509662130799, "percentage": 26.77, "elapsed_time": "1:10:37", "remaining_time": "3:13:09", "throughput": 8717.78, "total_tokens": 36940128} +{"current_steps": 54800, "total_steps": 204665, "loss": 0.0434, "lr": 1.8334081898870185e-06, "epoch": 1.338773117044927, "percentage": 26.78, "elapsed_time": "1:10:37", "remaining_time": "3:13:09", "throughput": 8718.07, "total_tokens": 36944672} +{"current_steps": 54805, "total_steps": 204665, "loss": 0.053, "lr": 1.8333610575149795e-06, "epoch": 1.3388952678767743, "percentage": 26.78, "elapsed_time": "1:10:38", "remaining_time": "3:13:08", "throughput": 8718.17, "total_tokens": 36948192} +{"current_steps": 54810, "total_steps": 204665, "loss": 0.0582, "lr": 1.8333139190825149e-06, "epoch": 1.3390174187086215, "percentage": 26.78, "elapsed_time": "1:10:38", "remaining_time": "3:13:08", "throughput": 8718.23, "total_tokens": 36951456} +{"current_steps": 54815, "total_steps": 204665, "loss": 0.0817, "lr": 1.8332667745899672e-06, "epoch": 1.3391395695404686, "percentage": 26.78, "elapsed_time": "1:10:38", "remaining_time": "3:13:07", "throughput": 8718.23, "total_tokens": 36954400} +{"current_steps": 54820, "total_steps": 204665, "loss": 0.1396, "lr": 1.8332196240376797e-06, "epoch": 1.3392617203723156, "percentage": 26.79, "elapsed_time": "1:10:39", "remaining_time": "3:13:07", "throughput": 8718.31, "total_tokens": 36957792} +{"current_steps": 54825, "total_steps": 204665, "loss": 0.0484, "lr": 1.833172467425995e-06, "epoch": 1.3393838712041628, "percentage": 26.79, "elapsed_time": "1:10:39", "remaining_time": "3:13:06", "throughput": 8718.33, "total_tokens": 36960864} +{"current_steps": 54830, "total_steps": 204665, "loss": 0.0858, "lr": 1.8331253047552558e-06, "epoch": 1.33950602203601, "percentage": 26.79, "elapsed_time": "1:10:39", "remaining_time": "3:13:06", "throughput": 8718.4, "total_tokens": 36964192} +{"current_steps": 54835, "total_steps": 204665, "loss": 0.1635, "lr": 1.8330781360258052e-06, "epoch": 1.3396281728678572, "percentage": 26.79, "elapsed_time": "1:10:40", "remaining_time": "3:13:05", "throughput": 8718.7, "total_tokens": 36968736} +{"current_steps": 54840, "total_steps": 204665, "loss": 0.0901, "lr": 1.8330309612379867e-06, "epoch": 1.3397503236997044, "percentage": 26.8, "elapsed_time": "1:10:40", "remaining_time": "3:13:05", "throughput": 8718.79, "total_tokens": 36972192} +{"current_steps": 54845, "total_steps": 204665, "loss": 0.0333, "lr": 1.832983780392143e-06, "epoch": 1.3398724745315516, "percentage": 26.8, "elapsed_time": "1:10:40", "remaining_time": "3:13:04", "throughput": 8718.86, "total_tokens": 36975520} +{"current_steps": 54850, "total_steps": 204665, "loss": 0.0752, "lr": 1.8329365934886168e-06, "epoch": 1.3399946253633988, "percentage": 26.8, "elapsed_time": "1:10:41", "remaining_time": "3:13:04", "throughput": 8719.0, "total_tokens": 36979232} +{"current_steps": 54855, "total_steps": 204665, "loss": 0.0017, "lr": 1.8328894005277519e-06, "epoch": 1.340116776195246, "percentage": 26.8, "elapsed_time": "1:10:41", "remaining_time": "3:13:03", "throughput": 8719.13, "total_tokens": 36982880} +{"current_steps": 54860, "total_steps": 204665, "loss": 0.0017, "lr": 1.8328422015098913e-06, "epoch": 1.3402389270270931, "percentage": 26.8, "elapsed_time": "1:10:41", "remaining_time": "3:13:03", "throughput": 8719.18, "total_tokens": 36986080} +{"current_steps": 54865, "total_steps": 204665, "loss": 0.2505, "lr": 1.832794996435378e-06, "epoch": 1.3403610778589403, "percentage": 26.81, "elapsed_time": "1:10:42", "remaining_time": "3:13:02", "throughput": 8719.22, "total_tokens": 36989216} +{"current_steps": 54870, "total_steps": 204665, "loss": 0.0796, "lr": 1.8327477853045554e-06, "epoch": 1.3404832286907875, "percentage": 26.81, "elapsed_time": "1:10:42", "remaining_time": "3:13:02", "throughput": 8719.26, "total_tokens": 36992352} +{"current_steps": 54875, "total_steps": 204665, "loss": 0.0009, "lr": 1.8327005681177674e-06, "epoch": 1.3406053795226345, "percentage": 26.81, "elapsed_time": "1:10:42", "remaining_time": "3:13:01", "throughput": 8719.41, "total_tokens": 36996128} +{"current_steps": 54880, "total_steps": 204665, "loss": 0.0478, "lr": 1.8326533448753565e-06, "epoch": 1.3407275303544817, "percentage": 26.81, "elapsed_time": "1:10:43", "remaining_time": "3:13:01", "throughput": 8719.43, "total_tokens": 36999136} +{"current_steps": 54885, "total_steps": 204665, "loss": 0.1097, "lr": 1.8326061155776666e-06, "epoch": 1.3408496811863289, "percentage": 26.82, "elapsed_time": "1:10:43", "remaining_time": "3:13:00", "throughput": 8719.53, "total_tokens": 37002656} +{"current_steps": 54890, "total_steps": 204665, "loss": 0.1133, "lr": 1.8325588802250411e-06, "epoch": 1.340971832018176, "percentage": 26.82, "elapsed_time": "1:10:43", "remaining_time": "3:13:00", "throughput": 8719.56, "total_tokens": 37005728} +{"current_steps": 54895, "total_steps": 204665, "loss": 0.0501, "lr": 1.8325116388178238e-06, "epoch": 1.3410939828500232, "percentage": 26.82, "elapsed_time": "1:10:44", "remaining_time": "3:12:59", "throughput": 8719.6, "total_tokens": 37008864} +{"current_steps": 54900, "total_steps": 204665, "loss": 0.1672, "lr": 1.8324643913563573e-06, "epoch": 1.3412161336818704, "percentage": 26.82, "elapsed_time": "1:10:44", "remaining_time": "3:12:59", "throughput": 8719.7, "total_tokens": 37012384} +{"current_steps": 54905, "total_steps": 204665, "loss": 0.2182, "lr": 1.8324171378409862e-06, "epoch": 1.3413382845137176, "percentage": 26.83, "elapsed_time": "1:10:45", "remaining_time": "3:12:58", "throughput": 8719.76, "total_tokens": 37015648} +{"current_steps": 54910, "total_steps": 204665, "loss": 0.0635, "lr": 1.832369878272054e-06, "epoch": 1.3414604353455646, "percentage": 26.83, "elapsed_time": "1:10:45", "remaining_time": "3:12:58", "throughput": 8719.83, "total_tokens": 37018976} +{"current_steps": 54915, "total_steps": 204665, "loss": 0.0387, "lr": 1.832322612649904e-06, "epoch": 1.3415825861774118, "percentage": 26.83, "elapsed_time": "1:10:45", "remaining_time": "3:12:57", "throughput": 8719.87, "total_tokens": 37022112} +{"current_steps": 54920, "total_steps": 204665, "loss": 0.1057, "lr": 1.83227534097488e-06, "epoch": 1.341704737009259, "percentage": 26.83, "elapsed_time": "1:10:46", "remaining_time": "3:12:57", "throughput": 8719.89, "total_tokens": 37025184} +{"current_steps": 54925, "total_steps": 204665, "loss": 0.0704, "lr": 1.8322280632473256e-06, "epoch": 1.3418268878411062, "percentage": 26.84, "elapsed_time": "1:10:46", "remaining_time": "3:12:56", "throughput": 8719.91, "total_tokens": 37028192} +{"current_steps": 54930, "total_steps": 204665, "loss": 0.0933, "lr": 1.8321807794675853e-06, "epoch": 1.3419490386729533, "percentage": 26.84, "elapsed_time": "1:10:46", "remaining_time": "3:12:56", "throughput": 8719.89, "total_tokens": 37031008} +{"current_steps": 54935, "total_steps": 204665, "loss": 0.1536, "lr": 1.8321334896360026e-06, "epoch": 1.3420711895048005, "percentage": 26.84, "elapsed_time": "1:10:47", "remaining_time": "3:12:55", "throughput": 8719.99, "total_tokens": 37034528} +{"current_steps": 54940, "total_steps": 204665, "loss": 0.0302, "lr": 1.832086193752921e-06, "epoch": 1.3421933403366477, "percentage": 26.84, "elapsed_time": "1:10:47", "remaining_time": "3:12:55", "throughput": 8720.04, "total_tokens": 37037728} +{"current_steps": 54945, "total_steps": 204665, "loss": 0.0906, "lr": 1.832038891818685e-06, "epoch": 1.342315491168495, "percentage": 26.85, "elapsed_time": "1:10:47", "remaining_time": "3:12:54", "throughput": 8720.17, "total_tokens": 37041312} +{"current_steps": 54950, "total_steps": 204665, "loss": 0.072, "lr": 1.8319915838336387e-06, "epoch": 1.342437642000342, "percentage": 26.85, "elapsed_time": "1:10:48", "remaining_time": "3:12:54", "throughput": 8720.19, "total_tokens": 37044384} +{"current_steps": 54955, "total_steps": 204665, "loss": 0.0005, "lr": 1.831944269798125e-06, "epoch": 1.3425597928321893, "percentage": 26.85, "elapsed_time": "1:10:48", "remaining_time": "3:12:53", "throughput": 8720.32, "total_tokens": 37048032} +{"current_steps": 54960, "total_steps": 204665, "loss": 0.1263, "lr": 1.8318969497124894e-06, "epoch": 1.3426819436640365, "percentage": 26.85, "elapsed_time": "1:10:48", "remaining_time": "3:12:53", "throughput": 8720.41, "total_tokens": 37051424} +{"current_steps": 54965, "total_steps": 204665, "loss": 0.1187, "lr": 1.8318496235770756e-06, "epoch": 1.3428040944958834, "percentage": 26.86, "elapsed_time": "1:10:49", "remaining_time": "3:12:52", "throughput": 8720.49, "total_tokens": 37054816} +{"current_steps": 54970, "total_steps": 204665, "loss": 0.1211, "lr": 1.8318022913922272e-06, "epoch": 1.3429262453277306, "percentage": 26.86, "elapsed_time": "1:10:49", "remaining_time": "3:12:52", "throughput": 8720.63, "total_tokens": 37058528} +{"current_steps": 54975, "total_steps": 204665, "loss": 0.0724, "lr": 1.8317549531582888e-06, "epoch": 1.3430483961595778, "percentage": 26.86, "elapsed_time": "1:10:49", "remaining_time": "3:12:51", "throughput": 8720.75, "total_tokens": 37062112} +{"current_steps": 54980, "total_steps": 204665, "loss": 0.0456, "lr": 1.8317076088756047e-06, "epoch": 1.343170546991425, "percentage": 26.86, "elapsed_time": "1:10:50", "remaining_time": "3:12:51", "throughput": 8720.77, "total_tokens": 37065184} +{"current_steps": 54985, "total_steps": 204665, "loss": 0.0368, "lr": 1.8316602585445194e-06, "epoch": 1.3432926978232722, "percentage": 26.87, "elapsed_time": "1:10:50", "remaining_time": "3:12:50", "throughput": 8720.85, "total_tokens": 37068576} +{"current_steps": 54990, "total_steps": 204665, "loss": 0.0752, "lr": 1.831612902165377e-06, "epoch": 1.3434148486551194, "percentage": 26.87, "elapsed_time": "1:10:50", "remaining_time": "3:12:50", "throughput": 8720.93, "total_tokens": 37071968} +{"current_steps": 54995, "total_steps": 204665, "loss": 0.0529, "lr": 1.8315655397385217e-06, "epoch": 1.3435369994869666, "percentage": 26.87, "elapsed_time": "1:10:51", "remaining_time": "3:12:49", "throughput": 8720.96, "total_tokens": 37075104} +{"current_steps": 55000, "total_steps": 204665, "loss": 0.0326, "lr": 1.8315181712642981e-06, "epoch": 1.3436591503188136, "percentage": 26.87, "elapsed_time": "1:10:51", "remaining_time": "3:12:49", "throughput": 8721.02, "total_tokens": 37078304} +{"current_steps": 55005, "total_steps": 204665, "loss": 0.2934, "lr": 1.8314707967430509e-06, "epoch": 1.3437813011506607, "percentage": 26.88, "elapsed_time": "1:10:51", "remaining_time": "3:12:48", "throughput": 8721.06, "total_tokens": 37081440} +{"current_steps": 55010, "total_steps": 204665, "loss": 0.1698, "lr": 1.8314234161751242e-06, "epoch": 1.343903451982508, "percentage": 26.88, "elapsed_time": "1:10:52", "remaining_time": "3:12:48", "throughput": 8721.01, "total_tokens": 37084128} +{"current_steps": 55015, "total_steps": 204665, "loss": 0.1783, "lr": 1.8313760295608632e-06, "epoch": 1.3440256028143551, "percentage": 26.88, "elapsed_time": "1:10:52", "remaining_time": "3:12:47", "throughput": 8721.19, "total_tokens": 37088032} +{"current_steps": 55020, "total_steps": 204665, "loss": 0.0167, "lr": 1.8313286369006119e-06, "epoch": 1.3441477536462023, "percentage": 26.88, "elapsed_time": "1:10:52", "remaining_time": "3:12:47", "throughput": 8721.21, "total_tokens": 37091104} +{"current_steps": 55025, "total_steps": 204665, "loss": 0.0025, "lr": 1.8312812381947147e-06, "epoch": 1.3442699044780495, "percentage": 26.89, "elapsed_time": "1:10:53", "remaining_time": "3:12:46", "throughput": 8721.32, "total_tokens": 37094624} +{"current_steps": 55030, "total_steps": 204665, "loss": 0.0877, "lr": 1.8312338334435174e-06, "epoch": 1.3443920553098967, "percentage": 26.89, "elapsed_time": "1:10:53", "remaining_time": "3:12:46", "throughput": 8721.45, "total_tokens": 37098272} +{"current_steps": 55035, "total_steps": 204665, "loss": 0.1341, "lr": 1.8311864226473636e-06, "epoch": 1.3445142061417439, "percentage": 26.89, "elapsed_time": "1:10:54", "remaining_time": "3:12:45", "throughput": 8721.47, "total_tokens": 37101344} +{"current_steps": 55040, "total_steps": 204665, "loss": 0.0253, "lr": 1.831139005806599e-06, "epoch": 1.344636356973591, "percentage": 26.89, "elapsed_time": "1:10:54", "remaining_time": "3:12:45", "throughput": 8721.57, "total_tokens": 37104800} +{"current_steps": 55045, "total_steps": 204665, "loss": 0.0942, "lr": 1.8310915829215677e-06, "epoch": 1.3447585078054383, "percentage": 26.9, "elapsed_time": "1:10:54", "remaining_time": "3:12:44", "throughput": 8721.69, "total_tokens": 37108384} +{"current_steps": 55050, "total_steps": 204665, "loss": 0.2179, "lr": 1.831044153992615e-06, "epoch": 1.3448806586372855, "percentage": 26.9, "elapsed_time": "1:10:55", "remaining_time": "3:12:44", "throughput": 8721.78, "total_tokens": 37111776} +{"current_steps": 55055, "total_steps": 204665, "loss": 0.1414, "lr": 1.8309967190200855e-06, "epoch": 1.3450028094691324, "percentage": 26.9, "elapsed_time": "1:10:55", "remaining_time": "3:12:43", "throughput": 8721.83, "total_tokens": 37114976} +{"current_steps": 55060, "total_steps": 204665, "loss": 0.1777, "lr": 1.8309492780043243e-06, "epoch": 1.3451249603009796, "percentage": 26.9, "elapsed_time": "1:10:55", "remaining_time": "3:12:43", "throughput": 8721.9, "total_tokens": 37118304} +{"current_steps": 55065, "total_steps": 204665, "loss": 0.0744, "lr": 1.8309018309456767e-06, "epoch": 1.3452471111328268, "percentage": 26.9, "elapsed_time": "1:10:56", "remaining_time": "3:12:42", "throughput": 8721.97, "total_tokens": 37121568} +{"current_steps": 55070, "total_steps": 204665, "loss": 0.0465, "lr": 1.8308543778444875e-06, "epoch": 1.345369261964674, "percentage": 26.91, "elapsed_time": "1:10:56", "remaining_time": "3:12:42", "throughput": 8722.0, "total_tokens": 37124704} +{"current_steps": 55075, "total_steps": 204665, "loss": 0.1673, "lr": 1.8308069187011017e-06, "epoch": 1.3454914127965212, "percentage": 26.91, "elapsed_time": "1:10:56", "remaining_time": "3:12:41", "throughput": 8722.12, "total_tokens": 37128288} +{"current_steps": 55080, "total_steps": 204665, "loss": 0.0441, "lr": 1.8307594535158645e-06, "epoch": 1.3456135636283684, "percentage": 26.91, "elapsed_time": "1:10:57", "remaining_time": "3:12:41", "throughput": 8722.13, "total_tokens": 37131296} +{"current_steps": 55085, "total_steps": 204665, "loss": 0.1084, "lr": 1.8307119822891213e-06, "epoch": 1.3457357144602153, "percentage": 26.91, "elapsed_time": "1:10:57", "remaining_time": "3:12:40", "throughput": 8722.17, "total_tokens": 37134432} +{"current_steps": 55090, "total_steps": 204665, "loss": 0.0891, "lr": 1.830664505021217e-06, "epoch": 1.3458578652920625, "percentage": 26.92, "elapsed_time": "1:10:57", "remaining_time": "3:12:40", "throughput": 8722.23, "total_tokens": 37137696} +{"current_steps": 55095, "total_steps": 204665, "loss": 0.0556, "lr": 1.830617021712497e-06, "epoch": 1.3459800161239097, "percentage": 26.92, "elapsed_time": "1:10:58", "remaining_time": "3:12:39", "throughput": 8722.3, "total_tokens": 37141024} +{"current_steps": 55100, "total_steps": 204665, "loss": 0.0951, "lr": 1.8305695323633065e-06, "epoch": 1.346102166955757, "percentage": 26.92, "elapsed_time": "1:10:58", "remaining_time": "3:12:39", "throughput": 8722.35, "total_tokens": 37144224} +{"current_steps": 55105, "total_steps": 204665, "loss": 0.0283, "lr": 1.830522036973991e-06, "epoch": 1.346224317787604, "percentage": 26.92, "elapsed_time": "1:10:58", "remaining_time": "3:12:38", "throughput": 8722.46, "total_tokens": 37147744} +{"current_steps": 55110, "total_steps": 204665, "loss": 0.1267, "lr": 1.830474535544896e-06, "epoch": 1.3463464686194513, "percentage": 26.93, "elapsed_time": "1:10:59", "remaining_time": "3:12:38", "throughput": 8722.48, "total_tokens": 37150816} +{"current_steps": 55115, "total_steps": 204665, "loss": 0.1288, "lr": 1.8304270280763667e-06, "epoch": 1.3464686194512985, "percentage": 26.93, "elapsed_time": "1:10:59", "remaining_time": "3:12:37", "throughput": 8722.53, "total_tokens": 37154016} +{"current_steps": 55120, "total_steps": 204665, "loss": 0.0154, "lr": 1.8303795145687488e-06, "epoch": 1.3465907702831457, "percentage": 26.93, "elapsed_time": "1:10:59", "remaining_time": "3:12:37", "throughput": 8722.58, "total_tokens": 37157216} +{"current_steps": 55125, "total_steps": 204665, "loss": 0.0831, "lr": 1.8303319950223877e-06, "epoch": 1.3467129211149929, "percentage": 26.93, "elapsed_time": "1:11:00", "remaining_time": "3:12:36", "throughput": 8722.65, "total_tokens": 37160608} +{"current_steps": 55130, "total_steps": 204665, "loss": 0.0366, "lr": 1.8302844694376289e-06, "epoch": 1.34683507194684, "percentage": 26.94, "elapsed_time": "1:11:00", "remaining_time": "3:12:36", "throughput": 8722.77, "total_tokens": 37164192} +{"current_steps": 55135, "total_steps": 204665, "loss": 0.0646, "lr": 1.830236937814818e-06, "epoch": 1.3469572227786872, "percentage": 26.94, "elapsed_time": "1:11:00", "remaining_time": "3:12:35", "throughput": 8722.8, "total_tokens": 37167328} +{"current_steps": 55140, "total_steps": 204665, "loss": 0.0703, "lr": 1.830189400154301e-06, "epoch": 1.3470793736105344, "percentage": 26.94, "elapsed_time": "1:11:01", "remaining_time": "3:12:35", "throughput": 8722.9, "total_tokens": 37170784} +{"current_steps": 55145, "total_steps": 204665, "loss": 0.0018, "lr": 1.8301418564564238e-06, "epoch": 1.3472015244423814, "percentage": 26.94, "elapsed_time": "1:11:01", "remaining_time": "3:12:35", "throughput": 8723.05, "total_tokens": 37174560} +{"current_steps": 55150, "total_steps": 204665, "loss": 0.0507, "lr": 1.830094306721531e-06, "epoch": 1.3473236752742286, "percentage": 26.95, "elapsed_time": "1:11:01", "remaining_time": "3:12:34", "throughput": 8723.15, "total_tokens": 37178016} +{"current_steps": 55155, "total_steps": 204665, "loss": 0.0356, "lr": 1.8300467509499695e-06, "epoch": 1.3474458261060758, "percentage": 26.95, "elapsed_time": "1:11:02", "remaining_time": "3:12:34", "throughput": 8723.27, "total_tokens": 37181600} +{"current_steps": 55160, "total_steps": 204665, "loss": 0.0041, "lr": 1.8299991891420845e-06, "epoch": 1.347567976937923, "percentage": 26.95, "elapsed_time": "1:11:02", "remaining_time": "3:12:33", "throughput": 8723.29, "total_tokens": 37184608} +{"current_steps": 55165, "total_steps": 204665, "loss": 0.0263, "lr": 1.8299516212982225e-06, "epoch": 1.3476901277697702, "percentage": 26.95, "elapsed_time": "1:11:03", "remaining_time": "3:12:33", "throughput": 8723.29, "total_tokens": 37187616} +{"current_steps": 55170, "total_steps": 204665, "loss": 0.0432, "lr": 1.8299040474187288e-06, "epoch": 1.3478122786016173, "percentage": 26.96, "elapsed_time": "1:11:03", "remaining_time": "3:12:32", "throughput": 8723.35, "total_tokens": 37190880} +{"current_steps": 55175, "total_steps": 204665, "loss": 0.1084, "lr": 1.8298564675039499e-06, "epoch": 1.3479344294334643, "percentage": 26.96, "elapsed_time": "1:11:03", "remaining_time": "3:12:32", "throughput": 8723.32, "total_tokens": 37193696} +{"current_steps": 55180, "total_steps": 204665, "loss": 0.1264, "lr": 1.8298088815542312e-06, "epoch": 1.3480565802653115, "percentage": 26.96, "elapsed_time": "1:11:04", "remaining_time": "3:12:31", "throughput": 8723.33, "total_tokens": 37196768} +{"current_steps": 55185, "total_steps": 204665, "loss": 0.2703, "lr": 1.8297612895699195e-06, "epoch": 1.3481787310971587, "percentage": 26.96, "elapsed_time": "1:11:04", "remaining_time": "3:12:31", "throughput": 8723.39, "total_tokens": 37200032} +{"current_steps": 55190, "total_steps": 204665, "loss": 0.2105, "lr": 1.8297136915513605e-06, "epoch": 1.3483008819290059, "percentage": 26.97, "elapsed_time": "1:11:04", "remaining_time": "3:12:30", "throughput": 8723.48, "total_tokens": 37203488} +{"current_steps": 55195, "total_steps": 204665, "loss": 0.0619, "lr": 1.8296660874989e-06, "epoch": 1.348423032760853, "percentage": 26.97, "elapsed_time": "1:11:05", "remaining_time": "3:12:30", "throughput": 8723.57, "total_tokens": 37206880} +{"current_steps": 55200, "total_steps": 204665, "loss": 0.0022, "lr": 1.829618477412885e-06, "epoch": 1.3485451835927003, "percentage": 26.97, "elapsed_time": "1:11:05", "remaining_time": "3:12:29", "throughput": 8723.63, "total_tokens": 37210208} +{"current_steps": 55205, "total_steps": 204665, "loss": 0.0777, "lr": 1.8295708612936611e-06, "epoch": 1.3486673344245474, "percentage": 26.97, "elapsed_time": "1:11:05", "remaining_time": "3:12:29", "throughput": 8723.68, "total_tokens": 37213408} +{"current_steps": 55210, "total_steps": 204665, "loss": 0.067, "lr": 1.8295232391415747e-06, "epoch": 1.3487894852563946, "percentage": 26.98, "elapsed_time": "1:11:06", "remaining_time": "3:12:28", "throughput": 8723.75, "total_tokens": 37216800} +{"current_steps": 55215, "total_steps": 204665, "loss": 0.1029, "lr": 1.8294756109569722e-06, "epoch": 1.3489116360882418, "percentage": 26.98, "elapsed_time": "1:11:06", "remaining_time": "3:12:28", "throughput": 8723.88, "total_tokens": 37220448} +{"current_steps": 55220, "total_steps": 204665, "loss": 0.0033, "lr": 1.8294279767402001e-06, "epoch": 1.349033786920089, "percentage": 26.98, "elapsed_time": "1:11:06", "remaining_time": "3:12:27", "throughput": 8723.92, "total_tokens": 37223584} +{"current_steps": 55225, "total_steps": 204665, "loss": 0.1111, "lr": 1.8293803364916044e-06, "epoch": 1.3491559377519362, "percentage": 26.98, "elapsed_time": "1:11:07", "remaining_time": "3:12:27", "throughput": 8723.91, "total_tokens": 37226464} +{"current_steps": 55230, "total_steps": 204665, "loss": 0.2226, "lr": 1.8293326902115323e-06, "epoch": 1.3492780885837832, "percentage": 26.99, "elapsed_time": "1:11:07", "remaining_time": "3:12:26", "throughput": 8723.96, "total_tokens": 37229728} +{"current_steps": 55235, "total_steps": 204665, "loss": 0.1753, "lr": 1.8292850379003294e-06, "epoch": 1.3494002394156304, "percentage": 26.99, "elapsed_time": "1:11:07", "remaining_time": "3:12:26", "throughput": 8723.99, "total_tokens": 37232864} +{"current_steps": 55240, "total_steps": 204665, "loss": 0.0801, "lr": 1.8292373795583425e-06, "epoch": 1.3495223902474776, "percentage": 26.99, "elapsed_time": "1:11:08", "remaining_time": "3:12:25", "throughput": 8724.09, "total_tokens": 37236320} +{"current_steps": 55245, "total_steps": 204665, "loss": 0.0025, "lr": 1.8291897151859187e-06, "epoch": 1.3496445410793247, "percentage": 26.99, "elapsed_time": "1:11:08", "remaining_time": "3:12:25", "throughput": 8724.19, "total_tokens": 37239840} +{"current_steps": 55250, "total_steps": 204665, "loss": 0.2231, "lr": 1.8291420447834043e-06, "epoch": 1.349766691911172, "percentage": 27.0, "elapsed_time": "1:11:08", "remaining_time": "3:12:24", "throughput": 8724.24, "total_tokens": 37243040} +{"current_steps": 55255, "total_steps": 204665, "loss": 0.0016, "lr": 1.8290943683511457e-06, "epoch": 1.3498888427430191, "percentage": 27.0, "elapsed_time": "1:11:09", "remaining_time": "3:12:24", "throughput": 8724.26, "total_tokens": 37246112} +{"current_steps": 55260, "total_steps": 204665, "loss": 0.0456, "lr": 1.8290466858894899e-06, "epoch": 1.3500109935748663, "percentage": 27.0, "elapsed_time": "1:11:09", "remaining_time": "3:12:23", "throughput": 8724.43, "total_tokens": 37249952} +{"current_steps": 55265, "total_steps": 204665, "loss": 0.0938, "lr": 1.8289989973987838e-06, "epoch": 1.3501331444067133, "percentage": 27.0, "elapsed_time": "1:11:09", "remaining_time": "3:12:23", "throughput": 8724.48, "total_tokens": 37253152} +{"current_steps": 55270, "total_steps": 204665, "loss": 0.1001, "lr": 1.8289513028793739e-06, "epoch": 1.3502552952385605, "percentage": 27.01, "elapsed_time": "1:11:10", "remaining_time": "3:12:22", "throughput": 8724.53, "total_tokens": 37256352} +{"current_steps": 55275, "total_steps": 204665, "loss": 0.0461, "lr": 1.8289036023316072e-06, "epoch": 1.3503774460704077, "percentage": 27.01, "elapsed_time": "1:11:10", "remaining_time": "3:12:22", "throughput": 8724.56, "total_tokens": 37259488} +{"current_steps": 55280, "total_steps": 204665, "loss": 0.0754, "lr": 1.8288558957558301e-06, "epoch": 1.3504995969022549, "percentage": 27.01, "elapsed_time": "1:11:10", "remaining_time": "3:12:21", "throughput": 8724.62, "total_tokens": 37262752} +{"current_steps": 55285, "total_steps": 204665, "loss": 0.1319, "lr": 1.8288081831523907e-06, "epoch": 1.350621747734102, "percentage": 27.01, "elapsed_time": "1:11:11", "remaining_time": "3:12:21", "throughput": 8724.69, "total_tokens": 37266080} +{"current_steps": 55290, "total_steps": 204665, "loss": 0.0835, "lr": 1.8287604645216348e-06, "epoch": 1.3507438985659492, "percentage": 27.01, "elapsed_time": "1:11:11", "remaining_time": "3:12:20", "throughput": 8724.77, "total_tokens": 37269472} +{"current_steps": 55295, "total_steps": 204665, "loss": 0.0482, "lr": 1.8287127398639102e-06, "epoch": 1.3508660493977964, "percentage": 27.02, "elapsed_time": "1:11:12", "remaining_time": "3:12:20", "throughput": 8724.79, "total_tokens": 37272544} +{"current_steps": 55300, "total_steps": 204665, "loss": 0.1202, "lr": 1.8286650091795638e-06, "epoch": 1.3509882002296436, "percentage": 27.02, "elapsed_time": "1:11:12", "remaining_time": "3:12:19", "throughput": 8724.97, "total_tokens": 37276448} +{"current_steps": 55305, "total_steps": 204665, "loss": 0.0391, "lr": 1.828617272468942e-06, "epoch": 1.3511103510614908, "percentage": 27.02, "elapsed_time": "1:11:12", "remaining_time": "3:12:19", "throughput": 8725.07, "total_tokens": 37279904} +{"current_steps": 55310, "total_steps": 204665, "loss": 0.0494, "lr": 1.8285695297323928e-06, "epoch": 1.351232501893338, "percentage": 27.02, "elapsed_time": "1:11:13", "remaining_time": "3:12:18", "throughput": 8725.06, "total_tokens": 37282784} +{"current_steps": 55315, "total_steps": 204665, "loss": 0.0919, "lr": 1.828521780970263e-06, "epoch": 1.3513546527251852, "percentage": 27.03, "elapsed_time": "1:11:13", "remaining_time": "3:12:18", "throughput": 8725.07, "total_tokens": 37285856} +{"current_steps": 55320, "total_steps": 204665, "loss": 0.144, "lr": 1.8284740261829002e-06, "epoch": 1.3514768035570321, "percentage": 27.03, "elapsed_time": "1:11:13", "remaining_time": "3:12:17", "throughput": 8725.11, "total_tokens": 37288992} +{"current_steps": 55325, "total_steps": 204665, "loss": 0.0438, "lr": 1.8284262653706515e-06, "epoch": 1.3515989543888793, "percentage": 27.03, "elapsed_time": "1:11:14", "remaining_time": "3:12:17", "throughput": 8725.17, "total_tokens": 37292320} +{"current_steps": 55330, "total_steps": 204665, "loss": 0.0744, "lr": 1.8283784985338638e-06, "epoch": 1.3517211052207265, "percentage": 27.03, "elapsed_time": "1:11:14", "remaining_time": "3:12:16", "throughput": 8725.25, "total_tokens": 37295712} +{"current_steps": 55335, "total_steps": 204665, "loss": 0.1685, "lr": 1.828330725672885e-06, "epoch": 1.3518432560525737, "percentage": 27.04, "elapsed_time": "1:11:14", "remaining_time": "3:12:16", "throughput": 8725.32, "total_tokens": 37299040} +{"current_steps": 55340, "total_steps": 204665, "loss": 0.0024, "lr": 1.8282829467880624e-06, "epoch": 1.351965406884421, "percentage": 27.04, "elapsed_time": "1:11:15", "remaining_time": "3:12:15", "throughput": 8725.35, "total_tokens": 37302176} +{"current_steps": 55345, "total_steps": 204665, "loss": 0.0802, "lr": 1.8282351618797435e-06, "epoch": 1.352087557716268, "percentage": 27.04, "elapsed_time": "1:11:15", "remaining_time": "3:12:15", "throughput": 8725.39, "total_tokens": 37305312} +{"current_steps": 55350, "total_steps": 204665, "loss": 0.0617, "lr": 1.8281873709482759e-06, "epoch": 1.3522097085481153, "percentage": 27.04, "elapsed_time": "1:11:15", "remaining_time": "3:12:14", "throughput": 8725.43, "total_tokens": 37308512} +{"current_steps": 55355, "total_steps": 204665, "loss": 0.0833, "lr": 1.8281395739940067e-06, "epoch": 1.3523318593799623, "percentage": 27.05, "elapsed_time": "1:11:16", "remaining_time": "3:12:14", "throughput": 8725.47, "total_tokens": 37311712} +{"current_steps": 55360, "total_steps": 204665, "loss": 0.0447, "lr": 1.828091771017284e-06, "epoch": 1.3524540102118094, "percentage": 27.05, "elapsed_time": "1:11:16", "remaining_time": "3:12:13", "throughput": 8725.61, "total_tokens": 37315424} +{"current_steps": 55365, "total_steps": 204665, "loss": 0.0523, "lr": 1.8280439620184549e-06, "epoch": 1.3525761610436566, "percentage": 27.05, "elapsed_time": "1:11:16", "remaining_time": "3:12:13", "throughput": 8725.7, "total_tokens": 37318880} +{"current_steps": 55370, "total_steps": 204665, "loss": 0.0028, "lr": 1.8279961469978676e-06, "epoch": 1.3526983118755038, "percentage": 27.05, "elapsed_time": "1:11:17", "remaining_time": "3:12:12", "throughput": 8725.77, "total_tokens": 37322208} +{"current_steps": 55375, "total_steps": 204665, "loss": 0.1267, "lr": 1.8279483259558694e-06, "epoch": 1.352820462707351, "percentage": 27.06, "elapsed_time": "1:11:17", "remaining_time": "3:12:12", "throughput": 8725.93, "total_tokens": 37326048} +{"current_steps": 55380, "total_steps": 204665, "loss": 0.0956, "lr": 1.8279004988928085e-06, "epoch": 1.3529426135391982, "percentage": 27.06, "elapsed_time": "1:11:17", "remaining_time": "3:12:11", "throughput": 8726.03, "total_tokens": 37329504} +{"current_steps": 55385, "total_steps": 204665, "loss": 0.0289, "lr": 1.8278526658090325e-06, "epoch": 1.3530647643710454, "percentage": 27.06, "elapsed_time": "1:11:18", "remaining_time": "3:12:11", "throughput": 8726.11, "total_tokens": 37332896} +{"current_steps": 55390, "total_steps": 204665, "loss": 0.0558, "lr": 1.8278048267048894e-06, "epoch": 1.3531869152028926, "percentage": 27.06, "elapsed_time": "1:11:18", "remaining_time": "3:12:10", "throughput": 8726.21, "total_tokens": 37336416} +{"current_steps": 55395, "total_steps": 204665, "loss": 0.044, "lr": 1.8277569815807266e-06, "epoch": 1.3533090660347398, "percentage": 27.07, "elapsed_time": "1:11:18", "remaining_time": "3:12:10", "throughput": 8726.24, "total_tokens": 37339488} +{"current_steps": 55400, "total_steps": 204665, "loss": 0.1294, "lr": 1.8277091304368926e-06, "epoch": 1.353431216866587, "percentage": 27.07, "elapsed_time": "1:11:19", "remaining_time": "3:12:09", "throughput": 8726.3, "total_tokens": 37342816} +{"current_steps": 55405, "total_steps": 204665, "loss": 0.1302, "lr": 1.8276612732737351e-06, "epoch": 1.3535533676984342, "percentage": 27.07, "elapsed_time": "1:11:19", "remaining_time": "3:12:09", "throughput": 8726.35, "total_tokens": 37346016} +{"current_steps": 55410, "total_steps": 204665, "loss": 0.1187, "lr": 1.8276134100916024e-06, "epoch": 1.3536755185302811, "percentage": 27.07, "elapsed_time": "1:11:20", "remaining_time": "3:12:08", "throughput": 8726.47, "total_tokens": 37349600} +{"current_steps": 55415, "total_steps": 204665, "loss": 0.1065, "lr": 1.8275655408908421e-06, "epoch": 1.3537976693621283, "percentage": 27.08, "elapsed_time": "1:11:20", "remaining_time": "3:12:08", "throughput": 8726.48, "total_tokens": 37352608} +{"current_steps": 55420, "total_steps": 204665, "loss": 0.0017, "lr": 1.8275176656718025e-06, "epoch": 1.3539198201939755, "percentage": 27.08, "elapsed_time": "1:11:20", "remaining_time": "3:12:07", "throughput": 8726.53, "total_tokens": 37355808} +{"current_steps": 55425, "total_steps": 204665, "loss": 0.0723, "lr": 1.8274697844348321e-06, "epoch": 1.3540419710258227, "percentage": 27.08, "elapsed_time": "1:11:21", "remaining_time": "3:12:07", "throughput": 8726.65, "total_tokens": 37359392} +{"current_steps": 55430, "total_steps": 204665, "loss": 0.0567, "lr": 1.827421897180279e-06, "epoch": 1.3541641218576699, "percentage": 27.08, "elapsed_time": "1:11:21", "remaining_time": "3:12:06", "throughput": 8726.72, "total_tokens": 37362720} +{"current_steps": 55435, "total_steps": 204665, "loss": 0.074, "lr": 1.827374003908491e-06, "epoch": 1.354286272689517, "percentage": 27.09, "elapsed_time": "1:11:21", "remaining_time": "3:12:06", "throughput": 8726.8, "total_tokens": 37366112} +{"current_steps": 55440, "total_steps": 204665, "loss": 0.1579, "lr": 1.8273261046198169e-06, "epoch": 1.3544084235213643, "percentage": 27.09, "elapsed_time": "1:11:22", "remaining_time": "3:12:05", "throughput": 8726.8, "total_tokens": 37369056} +{"current_steps": 55445, "total_steps": 204665, "loss": 0.0733, "lr": 1.8272781993146046e-06, "epoch": 1.3545305743532112, "percentage": 27.09, "elapsed_time": "1:11:22", "remaining_time": "3:12:05", "throughput": 8726.83, "total_tokens": 37372192} +{"current_steps": 55450, "total_steps": 204665, "loss": 0.0467, "lr": 1.827230287993203e-06, "epoch": 1.3546527251850584, "percentage": 27.09, "elapsed_time": "1:11:22", "remaining_time": "3:12:04", "throughput": 8726.93, "total_tokens": 37375712} +{"current_steps": 55455, "total_steps": 204665, "loss": 0.1188, "lr": 1.8271823706559602e-06, "epoch": 1.3547748760169056, "percentage": 27.1, "elapsed_time": "1:11:23", "remaining_time": "3:12:04", "throughput": 8727.01, "total_tokens": 37379104} +{"current_steps": 55460, "total_steps": 204665, "loss": 0.0039, "lr": 1.8271344473032246e-06, "epoch": 1.3548970268487528, "percentage": 27.1, "elapsed_time": "1:11:23", "remaining_time": "3:12:03", "throughput": 8727.1, "total_tokens": 37382496} +{"current_steps": 55465, "total_steps": 204665, "loss": 0.1075, "lr": 1.827086517935345e-06, "epoch": 1.3550191776806, "percentage": 27.1, "elapsed_time": "1:11:23", "remaining_time": "3:12:03", "throughput": 8727.17, "total_tokens": 37385888} +{"current_steps": 55470, "total_steps": 204665, "loss": 0.0014, "lr": 1.8270385825526698e-06, "epoch": 1.3551413285124472, "percentage": 27.1, "elapsed_time": "1:11:24", "remaining_time": "3:12:03", "throughput": 8727.28, "total_tokens": 37389408} +{"current_steps": 55475, "total_steps": 204665, "loss": 0.1457, "lr": 1.8269906411555473e-06, "epoch": 1.3552634793442944, "percentage": 27.11, "elapsed_time": "1:11:24", "remaining_time": "3:12:02", "throughput": 8727.48, "total_tokens": 37393376} +{"current_steps": 55480, "total_steps": 204665, "loss": 0.0955, "lr": 1.8269426937443266e-06, "epoch": 1.3553856301761416, "percentage": 27.11, "elapsed_time": "1:11:24", "remaining_time": "3:12:02", "throughput": 8727.57, "total_tokens": 37396832} +{"current_steps": 55485, "total_steps": 204665, "loss": 0.0859, "lr": 1.8268947403193562e-06, "epoch": 1.3555077810079887, "percentage": 27.11, "elapsed_time": "1:11:25", "remaining_time": "3:12:01", "throughput": 8727.67, "total_tokens": 37400288} +{"current_steps": 55490, "total_steps": 204665, "loss": 0.0015, "lr": 1.8268467808809849e-06, "epoch": 1.355629931839836, "percentage": 27.11, "elapsed_time": "1:11:25", "remaining_time": "3:12:01", "throughput": 8727.79, "total_tokens": 37403872} +{"current_steps": 55495, "total_steps": 204665, "loss": 0.1744, "lr": 1.8267988154295612e-06, "epoch": 1.3557520826716831, "percentage": 27.12, "elapsed_time": "1:11:25", "remaining_time": "3:12:00", "throughput": 8727.84, "total_tokens": 37407136} +{"current_steps": 55500, "total_steps": 204665, "loss": 0.1595, "lr": 1.8267508439654345e-06, "epoch": 1.35587423350353, "percentage": 27.12, "elapsed_time": "1:11:26", "remaining_time": "3:12:00", "throughput": 8728.0, "total_tokens": 37410976} +{"current_steps": 55505, "total_steps": 204665, "loss": 0.0964, "lr": 1.826702866488953e-06, "epoch": 1.3559963843353773, "percentage": 27.12, "elapsed_time": "1:11:26", "remaining_time": "3:11:59", "throughput": 8728.17, "total_tokens": 37414816} +{"current_steps": 55510, "total_steps": 204665, "loss": 0.1445, "lr": 1.826654883000466e-06, "epoch": 1.3561185351672245, "percentage": 27.12, "elapsed_time": "1:11:27", "remaining_time": "3:11:59", "throughput": 8728.26, "total_tokens": 37418272} +{"current_steps": 55515, "total_steps": 204665, "loss": 0.0936, "lr": 1.8266068935003226e-06, "epoch": 1.3562406859990717, "percentage": 27.12, "elapsed_time": "1:11:27", "remaining_time": "3:11:58", "throughput": 8728.36, "total_tokens": 37421792} +{"current_steps": 55520, "total_steps": 204665, "loss": 0.0016, "lr": 1.826558897988871e-06, "epoch": 1.3563628368309188, "percentage": 27.13, "elapsed_time": "1:11:27", "remaining_time": "3:11:58", "throughput": 8728.42, "total_tokens": 37425056} +{"current_steps": 55525, "total_steps": 204665, "loss": 0.0576, "lr": 1.8265108964664608e-06, "epoch": 1.356484987662766, "percentage": 27.13, "elapsed_time": "1:11:28", "remaining_time": "3:11:57", "throughput": 8728.46, "total_tokens": 37428256} +{"current_steps": 55530, "total_steps": 204665, "loss": 0.0034, "lr": 1.8264628889334414e-06, "epoch": 1.3566071384946132, "percentage": 27.13, "elapsed_time": "1:11:28", "remaining_time": "3:11:57", "throughput": 8728.6, "total_tokens": 37431968} +{"current_steps": 55535, "total_steps": 204665, "loss": 0.0277, "lr": 1.8264148753901616e-06, "epoch": 1.3567292893264602, "percentage": 27.13, "elapsed_time": "1:11:28", "remaining_time": "3:11:56", "throughput": 8728.84, "total_tokens": 37436192} +{"current_steps": 55540, "total_steps": 204665, "loss": 0.0666, "lr": 1.8263668558369703e-06, "epoch": 1.3568514401583074, "percentage": 27.14, "elapsed_time": "1:11:29", "remaining_time": "3:11:56", "throughput": 8728.9, "total_tokens": 37439456} +{"current_steps": 55545, "total_steps": 204665, "loss": 0.0879, "lr": 1.8263188302742173e-06, "epoch": 1.3569735909901546, "percentage": 27.14, "elapsed_time": "1:11:29", "remaining_time": "3:11:55", "throughput": 8728.99, "total_tokens": 37442912} +{"current_steps": 55550, "total_steps": 204665, "loss": 0.105, "lr": 1.8262707987022512e-06, "epoch": 1.3570957418220018, "percentage": 27.14, "elapsed_time": "1:11:29", "remaining_time": "3:11:55", "throughput": 8728.99, "total_tokens": 37445856} +{"current_steps": 55555, "total_steps": 204665, "loss": 0.1269, "lr": 1.8262227611214218e-06, "epoch": 1.357217892653849, "percentage": 27.14, "elapsed_time": "1:11:30", "remaining_time": "3:11:54", "throughput": 8729.05, "total_tokens": 37449184} +{"current_steps": 55560, "total_steps": 204665, "loss": 0.061, "lr": 1.826174717532078e-06, "epoch": 1.3573400434856961, "percentage": 27.15, "elapsed_time": "1:11:30", "remaining_time": "3:11:54", "throughput": 8729.06, "total_tokens": 37452192} +{"current_steps": 55565, "total_steps": 204665, "loss": 0.1319, "lr": 1.8261266679345696e-06, "epoch": 1.3574621943175433, "percentage": 27.15, "elapsed_time": "1:11:30", "remaining_time": "3:11:53", "throughput": 8729.1, "total_tokens": 37455392} +{"current_steps": 55570, "total_steps": 204665, "loss": 0.0028, "lr": 1.8260786123292458e-06, "epoch": 1.3575843451493905, "percentage": 27.15, "elapsed_time": "1:11:31", "remaining_time": "3:11:53", "throughput": 8729.19, "total_tokens": 37458784} +{"current_steps": 55575, "total_steps": 204665, "loss": 0.0717, "lr": 1.8260305507164565e-06, "epoch": 1.3577064959812377, "percentage": 27.15, "elapsed_time": "1:11:31", "remaining_time": "3:11:52", "throughput": 8729.24, "total_tokens": 37461984} +{"current_steps": 55580, "total_steps": 204665, "loss": 0.2017, "lr": 1.8259824830965504e-06, "epoch": 1.357828646813085, "percentage": 27.16, "elapsed_time": "1:11:31", "remaining_time": "3:11:52", "throughput": 8729.41, "total_tokens": 37465888} +{"current_steps": 55585, "total_steps": 204665, "loss": 0.0853, "lr": 1.8259344094698777e-06, "epoch": 1.357950797644932, "percentage": 27.16, "elapsed_time": "1:11:32", "remaining_time": "3:11:51", "throughput": 8729.56, "total_tokens": 37469664} +{"current_steps": 55590, "total_steps": 204665, "loss": 0.0399, "lr": 1.8258863298367877e-06, "epoch": 1.358072948476779, "percentage": 27.16, "elapsed_time": "1:11:32", "remaining_time": "3:11:51", "throughput": 8729.63, "total_tokens": 37473056} +{"current_steps": 55595, "total_steps": 204665, "loss": 0.0738, "lr": 1.8258382441976306e-06, "epoch": 1.3581950993086263, "percentage": 27.16, "elapsed_time": "1:11:32", "remaining_time": "3:11:50", "throughput": 8729.66, "total_tokens": 37476192} +{"current_steps": 55600, "total_steps": 204665, "loss": 0.0733, "lr": 1.8257901525527553e-06, "epoch": 1.3583172501404734, "percentage": 27.17, "elapsed_time": "1:11:33", "remaining_time": "3:11:50", "throughput": 8729.73, "total_tokens": 37479520} +{"current_steps": 55605, "total_steps": 204665, "loss": 0.0521, "lr": 1.8257420549025117e-06, "epoch": 1.3584394009723206, "percentage": 27.17, "elapsed_time": "1:11:33", "remaining_time": "3:11:49", "throughput": 8729.73, "total_tokens": 37482528} +{"current_steps": 55610, "total_steps": 204665, "loss": 0.002, "lr": 1.82569395124725e-06, "epoch": 1.3585615518041678, "percentage": 27.17, "elapsed_time": "1:11:34", "remaining_time": "3:11:49", "throughput": 8729.83, "total_tokens": 37486048} +{"current_steps": 55615, "total_steps": 204665, "loss": 0.1526, "lr": 1.82564584158732e-06, "epoch": 1.358683702636015, "percentage": 27.17, "elapsed_time": "1:11:34", "remaining_time": "3:11:49", "throughput": 8729.86, "total_tokens": 37489120} +{"current_steps": 55620, "total_steps": 204665, "loss": 0.0569, "lr": 1.8255977259230714e-06, "epoch": 1.358805853467862, "percentage": 27.18, "elapsed_time": "1:11:34", "remaining_time": "3:11:48", "throughput": 8729.89, "total_tokens": 37492256} +{"current_steps": 55625, "total_steps": 204665, "loss": 0.0938, "lr": 1.8255496042548537e-06, "epoch": 1.3589280042997092, "percentage": 27.18, "elapsed_time": "1:11:35", "remaining_time": "3:11:48", "throughput": 8729.92, "total_tokens": 37495392} +{"current_steps": 55630, "total_steps": 204665, "loss": 0.0346, "lr": 1.8255014765830174e-06, "epoch": 1.3590501551315564, "percentage": 27.18, "elapsed_time": "1:11:35", "remaining_time": "3:11:47", "throughput": 8730.31, "total_tokens": 37500512} +{"current_steps": 55635, "total_steps": 204665, "loss": 0.12, "lr": 1.8254533429079125e-06, "epoch": 1.3591723059634035, "percentage": 27.18, "elapsed_time": "1:11:35", "remaining_time": "3:11:47", "throughput": 8730.47, "total_tokens": 37504288} +{"current_steps": 55640, "total_steps": 204665, "loss": 0.0534, "lr": 1.8254052032298886e-06, "epoch": 1.3592944567952507, "percentage": 27.19, "elapsed_time": "1:11:36", "remaining_time": "3:11:46", "throughput": 8730.55, "total_tokens": 37507744} +{"current_steps": 55645, "total_steps": 204665, "loss": 0.133, "lr": 1.8253570575492963e-06, "epoch": 1.359416607627098, "percentage": 27.19, "elapsed_time": "1:11:36", "remaining_time": "3:11:46", "throughput": 8730.64, "total_tokens": 37511136} +{"current_steps": 55650, "total_steps": 204665, "loss": 0.0398, "lr": 1.8253089058664852e-06, "epoch": 1.3595387584589451, "percentage": 27.19, "elapsed_time": "1:11:36", "remaining_time": "3:11:45", "throughput": 8730.67, "total_tokens": 37514272} +{"current_steps": 55655, "total_steps": 204665, "loss": 0.0322, "lr": 1.825260748181806e-06, "epoch": 1.3596609092907923, "percentage": 27.19, "elapsed_time": "1:11:37", "remaining_time": "3:11:45", "throughput": 8730.66, "total_tokens": 37517152} +{"current_steps": 55660, "total_steps": 204665, "loss": 0.1159, "lr": 1.8252125844956083e-06, "epoch": 1.3597830601226395, "percentage": 27.2, "elapsed_time": "1:11:37", "remaining_time": "3:11:44", "throughput": 8730.73, "total_tokens": 37520480} +{"current_steps": 55665, "total_steps": 204665, "loss": 0.0401, "lr": 1.8251644148082433e-06, "epoch": 1.3599052109544867, "percentage": 27.2, "elapsed_time": "1:11:37", "remaining_time": "3:11:44", "throughput": 8730.75, "total_tokens": 37523552} +{"current_steps": 55670, "total_steps": 204665, "loss": 0.0273, "lr": 1.8251162391200604e-06, "epoch": 1.3600273617863339, "percentage": 27.2, "elapsed_time": "1:11:38", "remaining_time": "3:11:43", "throughput": 8730.88, "total_tokens": 37527200} +{"current_steps": 55675, "total_steps": 204665, "loss": 0.0507, "lr": 1.8250680574314101e-06, "epoch": 1.360149512618181, "percentage": 27.2, "elapsed_time": "1:11:38", "remaining_time": "3:11:43", "throughput": 8730.95, "total_tokens": 37530528} +{"current_steps": 55680, "total_steps": 204665, "loss": 0.1472, "lr": 1.8250198697426434e-06, "epoch": 1.360271663450028, "percentage": 27.21, "elapsed_time": "1:11:38", "remaining_time": "3:11:42", "throughput": 8730.98, "total_tokens": 37533664} +{"current_steps": 55685, "total_steps": 204665, "loss": 0.0013, "lr": 1.82497167605411e-06, "epoch": 1.3603938142818752, "percentage": 27.21, "elapsed_time": "1:11:39", "remaining_time": "3:11:42", "throughput": 8731.07, "total_tokens": 37537120} +{"current_steps": 55690, "total_steps": 204665, "loss": 0.2504, "lr": 1.8249234763661608e-06, "epoch": 1.3605159651137224, "percentage": 27.21, "elapsed_time": "1:11:39", "remaining_time": "3:11:41", "throughput": 8731.11, "total_tokens": 37540256} +{"current_steps": 55695, "total_steps": 204665, "loss": 0.096, "lr": 1.8248752706791461e-06, "epoch": 1.3606381159455696, "percentage": 27.21, "elapsed_time": "1:11:39", "remaining_time": "3:11:41", "throughput": 8731.27, "total_tokens": 37544096} +{"current_steps": 55700, "total_steps": 204665, "loss": 0.1089, "lr": 1.8248270589934167e-06, "epoch": 1.3607602667774168, "percentage": 27.22, "elapsed_time": "1:11:40", "remaining_time": "3:11:40", "throughput": 8731.32, "total_tokens": 37547424} +{"current_steps": 55705, "total_steps": 204665, "loss": 0.0008, "lr": 1.824778841309323e-06, "epoch": 1.360882417609264, "percentage": 27.22, "elapsed_time": "1:11:40", "remaining_time": "3:11:40", "throughput": 8731.46, "total_tokens": 37551264} +{"current_steps": 55710, "total_steps": 204665, "loss": 0.0011, "lr": 1.8247306176272157e-06, "epoch": 1.361004568441111, "percentage": 27.22, "elapsed_time": "1:11:41", "remaining_time": "3:11:39", "throughput": 8731.48, "total_tokens": 37554400} +{"current_steps": 55715, "total_steps": 204665, "loss": 0.1135, "lr": 1.8246823879474458e-06, "epoch": 1.3611267192729581, "percentage": 27.22, "elapsed_time": "1:11:41", "remaining_time": "3:11:39", "throughput": 8731.55, "total_tokens": 37558048} +{"current_steps": 55720, "total_steps": 204665, "loss": 0.0316, "lr": 1.8246341522703635e-06, "epoch": 1.3612488701048053, "percentage": 27.22, "elapsed_time": "1:11:41", "remaining_time": "3:11:39", "throughput": 8731.61, "total_tokens": 37561376} +{"current_steps": 55725, "total_steps": 204665, "loss": 0.0492, "lr": 1.8245859105963197e-06, "epoch": 1.3613710209366525, "percentage": 27.23, "elapsed_time": "1:11:42", "remaining_time": "3:11:38", "throughput": 8731.69, "total_tokens": 37564768} +{"current_steps": 55730, "total_steps": 204665, "loss": 0.1975, "lr": 1.8245376629256657e-06, "epoch": 1.3614931717684997, "percentage": 27.23, "elapsed_time": "1:11:42", "remaining_time": "3:11:38", "throughput": 8731.74, "total_tokens": 37567968} +{"current_steps": 55735, "total_steps": 204665, "loss": 0.041, "lr": 1.8244894092587517e-06, "epoch": 1.361615322600347, "percentage": 27.23, "elapsed_time": "1:11:42", "remaining_time": "3:11:37", "throughput": 8731.82, "total_tokens": 37571360} +{"current_steps": 55740, "total_steps": 204665, "loss": 0.0009, "lr": 1.8244411495959291e-06, "epoch": 1.361737473432194, "percentage": 27.23, "elapsed_time": "1:11:43", "remaining_time": "3:11:37", "throughput": 8731.9, "total_tokens": 37574752} +{"current_steps": 55745, "total_steps": 204665, "loss": 0.142, "lr": 1.8243928839375488e-06, "epoch": 1.3618596242640413, "percentage": 27.24, "elapsed_time": "1:11:43", "remaining_time": "3:11:36", "throughput": 8731.96, "total_tokens": 37578016} +{"current_steps": 55750, "total_steps": 204665, "loss": 0.0089, "lr": 1.8243446122839615e-06, "epoch": 1.3619817750958885, "percentage": 27.24, "elapsed_time": "1:11:43", "remaining_time": "3:11:36", "throughput": 8732.04, "total_tokens": 37581408} +{"current_steps": 55755, "total_steps": 204665, "loss": 0.076, "lr": 1.8242963346355187e-06, "epoch": 1.3621039259277357, "percentage": 27.24, "elapsed_time": "1:11:44", "remaining_time": "3:11:35", "throughput": 8732.13, "total_tokens": 37584864} +{"current_steps": 55760, "total_steps": 204665, "loss": 0.0738, "lr": 1.8242480509925713e-06, "epoch": 1.3622260767595828, "percentage": 27.24, "elapsed_time": "1:11:44", "remaining_time": "3:11:35", "throughput": 8732.19, "total_tokens": 37588192} +{"current_steps": 55765, "total_steps": 204665, "loss": 0.1124, "lr": 1.8241997613554702e-06, "epoch": 1.3623482275914298, "percentage": 27.25, "elapsed_time": "1:11:44", "remaining_time": "3:11:34", "throughput": 8732.27, "total_tokens": 37591584} +{"current_steps": 55770, "total_steps": 204665, "loss": 0.1641, "lr": 1.8241514657245669e-06, "epoch": 1.362470378423277, "percentage": 27.25, "elapsed_time": "1:11:45", "remaining_time": "3:11:34", "throughput": 8732.34, "total_tokens": 37594912} +{"current_steps": 55775, "total_steps": 204665, "loss": 0.0662, "lr": 1.8241031641002125e-06, "epoch": 1.3625925292551242, "percentage": 27.25, "elapsed_time": "1:11:45", "remaining_time": "3:11:33", "throughput": 8732.41, "total_tokens": 37598240} +{"current_steps": 55780, "total_steps": 204665, "loss": 0.0031, "lr": 1.8240548564827577e-06, "epoch": 1.3627146800869714, "percentage": 27.25, "elapsed_time": "1:11:45", "remaining_time": "3:11:33", "throughput": 8732.43, "total_tokens": 37601312} +{"current_steps": 55785, "total_steps": 204665, "loss": 0.0013, "lr": 1.8240065428725552e-06, "epoch": 1.3628368309188186, "percentage": 27.26, "elapsed_time": "1:11:46", "remaining_time": "3:11:32", "throughput": 8732.5, "total_tokens": 37604640} +{"current_steps": 55790, "total_steps": 204665, "loss": 0.0404, "lr": 1.823958223269955e-06, "epoch": 1.3629589817506658, "percentage": 27.26, "elapsed_time": "1:11:46", "remaining_time": "3:11:32", "throughput": 8732.58, "total_tokens": 37608032} +{"current_steps": 55795, "total_steps": 204665, "loss": 0.0571, "lr": 1.823909897675309e-06, "epoch": 1.363081132582513, "percentage": 27.26, "elapsed_time": "1:11:46", "remaining_time": "3:11:31", "throughput": 8732.63, "total_tokens": 37611296} +{"current_steps": 55800, "total_steps": 204665, "loss": 0.0555, "lr": 1.8238615660889685e-06, "epoch": 1.36320328341436, "percentage": 27.26, "elapsed_time": "1:11:47", "remaining_time": "3:11:31", "throughput": 8732.66, "total_tokens": 37614432} +{"current_steps": 55805, "total_steps": 204665, "loss": 0.0917, "lr": 1.8238132285112853e-06, "epoch": 1.3633254342462071, "percentage": 27.27, "elapsed_time": "1:11:47", "remaining_time": "3:11:30", "throughput": 8732.79, "total_tokens": 37618080} +{"current_steps": 55810, "total_steps": 204665, "loss": 0.0064, "lr": 1.8237648849426103e-06, "epoch": 1.3634475850780543, "percentage": 27.27, "elapsed_time": "1:11:48", "remaining_time": "3:11:30", "throughput": 8732.82, "total_tokens": 37621152} +{"current_steps": 55815, "total_steps": 204665, "loss": 0.3132, "lr": 1.823716535383296e-06, "epoch": 1.3635697359099015, "percentage": 27.27, "elapsed_time": "1:11:48", "remaining_time": "3:11:29", "throughput": 8732.86, "total_tokens": 37624352} +{"current_steps": 55820, "total_steps": 204665, "loss": 0.1327, "lr": 1.8236681798336935e-06, "epoch": 1.3636918867417487, "percentage": 27.27, "elapsed_time": "1:11:48", "remaining_time": "3:11:29", "throughput": 8732.93, "total_tokens": 37627680} +{"current_steps": 55825, "total_steps": 204665, "loss": 0.0672, "lr": 1.8236198182941543e-06, "epoch": 1.3638140375735959, "percentage": 27.28, "elapsed_time": "1:11:49", "remaining_time": "3:11:28", "throughput": 8733.0, "total_tokens": 37631008} +{"current_steps": 55830, "total_steps": 204665, "loss": 0.0468, "lr": 1.8235714507650302e-06, "epoch": 1.363936188405443, "percentage": 27.28, "elapsed_time": "1:11:49", "remaining_time": "3:11:28", "throughput": 8733.15, "total_tokens": 37634784} +{"current_steps": 55835, "total_steps": 204665, "loss": 0.0416, "lr": 1.823523077246673e-06, "epoch": 1.3640583392372903, "percentage": 27.28, "elapsed_time": "1:11:49", "remaining_time": "3:11:27", "throughput": 8733.18, "total_tokens": 37637856} +{"current_steps": 55840, "total_steps": 204665, "loss": 0.0454, "lr": 1.8234746977394346e-06, "epoch": 1.3641804900691374, "percentage": 27.28, "elapsed_time": "1:11:50", "remaining_time": "3:11:27", "throughput": 8733.27, "total_tokens": 37641312} +{"current_steps": 55845, "total_steps": 204665, "loss": 0.1019, "lr": 1.8234263122436667e-06, "epoch": 1.3643026409009846, "percentage": 27.29, "elapsed_time": "1:11:50", "remaining_time": "3:11:26", "throughput": 8733.38, "total_tokens": 37644960} +{"current_steps": 55850, "total_steps": 204665, "loss": 0.0462, "lr": 1.8233779207597211e-06, "epoch": 1.3644247917328318, "percentage": 27.29, "elapsed_time": "1:11:50", "remaining_time": "3:11:26", "throughput": 8733.4, "total_tokens": 37648096} +{"current_steps": 55855, "total_steps": 204665, "loss": 0.0809, "lr": 1.8233295232879497e-06, "epoch": 1.3645469425646788, "percentage": 27.29, "elapsed_time": "1:11:51", "remaining_time": "3:11:25", "throughput": 8733.48, "total_tokens": 37651488} +{"current_steps": 55860, "total_steps": 204665, "loss": 0.1134, "lr": 1.8232811198287048e-06, "epoch": 1.364669093396526, "percentage": 27.29, "elapsed_time": "1:11:51", "remaining_time": "3:11:25", "throughput": 8733.46, "total_tokens": 37654368} +{"current_steps": 55865, "total_steps": 204665, "loss": 0.1791, "lr": 1.823232710382338e-06, "epoch": 1.3647912442283732, "percentage": 27.3, "elapsed_time": "1:11:51", "remaining_time": "3:11:24", "throughput": 8733.52, "total_tokens": 37657696} +{"current_steps": 55870, "total_steps": 204665, "loss": 0.1053, "lr": 1.8231842949492016e-06, "epoch": 1.3649133950602204, "percentage": 27.3, "elapsed_time": "1:11:52", "remaining_time": "3:11:24", "throughput": 8733.55, "total_tokens": 37660896} +{"current_steps": 55875, "total_steps": 204665, "loss": 0.0018, "lr": 1.8231358735296475e-06, "epoch": 1.3650355458920675, "percentage": 27.3, "elapsed_time": "1:11:52", "remaining_time": "3:11:23", "throughput": 8733.62, "total_tokens": 37664224} +{"current_steps": 55880, "total_steps": 204665, "loss": 0.0786, "lr": 1.823087446124028e-06, "epoch": 1.3651576967239147, "percentage": 27.3, "elapsed_time": "1:11:52", "remaining_time": "3:11:23", "throughput": 8733.66, "total_tokens": 37667424} +{"current_steps": 55885, "total_steps": 204665, "loss": 0.1927, "lr": 1.8230390127326954e-06, "epoch": 1.365279847555762, "percentage": 27.31, "elapsed_time": "1:11:53", "remaining_time": "3:11:22", "throughput": 8733.69, "total_tokens": 37670560} +{"current_steps": 55890, "total_steps": 204665, "loss": 0.0488, "lr": 1.8229905733560011e-06, "epoch": 1.365401998387609, "percentage": 27.31, "elapsed_time": "1:11:53", "remaining_time": "3:11:22", "throughput": 8733.72, "total_tokens": 37673696} +{"current_steps": 55895, "total_steps": 204665, "loss": 0.0618, "lr": 1.8229421279942985e-06, "epoch": 1.365524149219456, "percentage": 27.31, "elapsed_time": "1:11:53", "remaining_time": "3:11:21", "throughput": 8733.73, "total_tokens": 37676640} +{"current_steps": 55900, "total_steps": 204665, "loss": 0.1043, "lr": 1.8228936766479394e-06, "epoch": 1.3656463000513033, "percentage": 27.31, "elapsed_time": "1:11:54", "remaining_time": "3:11:21", "throughput": 8733.76, "total_tokens": 37679776} +{"current_steps": 55905, "total_steps": 204665, "loss": 0.0794, "lr": 1.822845219317276e-06, "epoch": 1.3657684508831505, "percentage": 27.32, "elapsed_time": "1:11:54", "remaining_time": "3:11:20", "throughput": 8733.83, "total_tokens": 37683104} +{"current_steps": 55910, "total_steps": 204665, "loss": 0.1115, "lr": 1.822796756002661e-06, "epoch": 1.3658906017149977, "percentage": 27.32, "elapsed_time": "1:11:54", "remaining_time": "3:11:20", "throughput": 8733.91, "total_tokens": 37686496} +{"current_steps": 55915, "total_steps": 204665, "loss": 0.0018, "lr": 1.8227482867044466e-06, "epoch": 1.3660127525468448, "percentage": 27.32, "elapsed_time": "1:11:55", "remaining_time": "3:11:19", "throughput": 8733.98, "total_tokens": 37689824} +{"current_steps": 55920, "total_steps": 204665, "loss": 0.1187, "lr": 1.8226998114229852e-06, "epoch": 1.366134903378692, "percentage": 27.32, "elapsed_time": "1:11:55", "remaining_time": "3:11:19", "throughput": 8734.04, "total_tokens": 37693088} +{"current_steps": 55925, "total_steps": 204665, "loss": 0.001, "lr": 1.8226513301586298e-06, "epoch": 1.3662570542105392, "percentage": 27.33, "elapsed_time": "1:11:56", "remaining_time": "3:11:18", "throughput": 8734.14, "total_tokens": 37696544} +{"current_steps": 55930, "total_steps": 204665, "loss": 0.1704, "lr": 1.8226028429117326e-06, "epoch": 1.3663792050423864, "percentage": 27.33, "elapsed_time": "1:11:56", "remaining_time": "3:11:18", "throughput": 8734.26, "total_tokens": 37700192} +{"current_steps": 55935, "total_steps": 204665, "loss": 0.107, "lr": 1.8225543496826461e-06, "epoch": 1.3665013558742336, "percentage": 27.33, "elapsed_time": "1:11:56", "remaining_time": "3:11:18", "throughput": 8734.32, "total_tokens": 37703456} +{"current_steps": 55940, "total_steps": 204665, "loss": 0.0997, "lr": 1.8225058504717232e-06, "epoch": 1.3666235067060808, "percentage": 27.33, "elapsed_time": "1:11:57", "remaining_time": "3:11:17", "throughput": 8734.41, "total_tokens": 37706912} +{"current_steps": 55945, "total_steps": 204665, "loss": 0.1256, "lr": 1.8224573452793166e-06, "epoch": 1.3667456575379278, "percentage": 27.33, "elapsed_time": "1:11:57", "remaining_time": "3:11:17", "throughput": 8734.46, "total_tokens": 37710240} +{"current_steps": 55950, "total_steps": 204665, "loss": 0.0811, "lr": 1.822408834105779e-06, "epoch": 1.366867808369775, "percentage": 27.34, "elapsed_time": "1:11:57", "remaining_time": "3:11:16", "throughput": 8734.63, "total_tokens": 37714144} +{"current_steps": 55955, "total_steps": 204665, "loss": 0.0367, "lr": 1.822360316951463e-06, "epoch": 1.3669899592016221, "percentage": 27.34, "elapsed_time": "1:11:58", "remaining_time": "3:11:16", "throughput": 8734.7, "total_tokens": 37717472} +{"current_steps": 55960, "total_steps": 204665, "loss": 0.055, "lr": 1.8223117938167217e-06, "epoch": 1.3671121100334693, "percentage": 27.34, "elapsed_time": "1:11:58", "remaining_time": "3:11:15", "throughput": 8734.77, "total_tokens": 37720864} +{"current_steps": 55965, "total_steps": 204665, "loss": 0.0378, "lr": 1.8222632647019079e-06, "epoch": 1.3672342608653165, "percentage": 27.34, "elapsed_time": "1:11:58", "remaining_time": "3:11:15", "throughput": 8734.8, "total_tokens": 37724064} +{"current_steps": 55970, "total_steps": 204665, "loss": 0.0034, "lr": 1.8222147296073741e-06, "epoch": 1.3673564116971637, "percentage": 27.35, "elapsed_time": "1:11:59", "remaining_time": "3:11:14", "throughput": 8734.79, "total_tokens": 37727008} +{"current_steps": 55975, "total_steps": 204665, "loss": 0.1093, "lr": 1.8221661885334741e-06, "epoch": 1.367478562529011, "percentage": 27.35, "elapsed_time": "1:11:59", "remaining_time": "3:11:14", "throughput": 8734.81, "total_tokens": 37730080} +{"current_steps": 55980, "total_steps": 204665, "loss": 0.0725, "lr": 1.8221176414805602e-06, "epoch": 1.3676007133608579, "percentage": 27.35, "elapsed_time": "1:11:59", "remaining_time": "3:11:13", "throughput": 8734.84, "total_tokens": 37733344} +{"current_steps": 55985, "total_steps": 204665, "loss": 0.0012, "lr": 1.8220690884489857e-06, "epoch": 1.367722864192705, "percentage": 27.35, "elapsed_time": "1:12:00", "remaining_time": "3:11:13", "throughput": 8734.87, "total_tokens": 37736480} +{"current_steps": 55990, "total_steps": 204665, "loss": 0.1238, "lr": 1.8220205294391037e-06, "epoch": 1.3678450150245522, "percentage": 27.36, "elapsed_time": "1:12:00", "remaining_time": "3:11:12", "throughput": 8734.96, "total_tokens": 37739936} +{"current_steps": 55995, "total_steps": 204665, "loss": 0.0506, "lr": 1.8219719644512672e-06, "epoch": 1.3679671658563994, "percentage": 27.36, "elapsed_time": "1:12:00", "remaining_time": "3:11:12", "throughput": 8734.96, "total_tokens": 37743008} +{"current_steps": 56000, "total_steps": 204665, "loss": 0.0047, "lr": 1.82192339348583e-06, "epoch": 1.3680893166882466, "percentage": 27.36, "elapsed_time": "1:12:01", "remaining_time": "3:11:11", "throughput": 8735.02, "total_tokens": 37746400} +{"current_steps": 56005, "total_steps": 204665, "loss": 0.0508, "lr": 1.8218748165431444e-06, "epoch": 1.3682114675200938, "percentage": 27.36, "elapsed_time": "1:12:01", "remaining_time": "3:11:11", "throughput": 8735.22, "total_tokens": 37750432} +{"current_steps": 56010, "total_steps": 204665, "loss": 0.2266, "lr": 1.821826233623564e-06, "epoch": 1.368333618351941, "percentage": 27.37, "elapsed_time": "1:12:01", "remaining_time": "3:11:10", "throughput": 8735.36, "total_tokens": 37754208} +{"current_steps": 56015, "total_steps": 204665, "loss": 0.0979, "lr": 1.8217776447274424e-06, "epoch": 1.3684557691837882, "percentage": 27.37, "elapsed_time": "1:12:02", "remaining_time": "3:11:10", "throughput": 8735.39, "total_tokens": 37757408} +{"current_steps": 56020, "total_steps": 204665, "loss": 0.2237, "lr": 1.8217290498551326e-06, "epoch": 1.3685779200156354, "percentage": 27.37, "elapsed_time": "1:12:02", "remaining_time": "3:11:09", "throughput": 8735.45, "total_tokens": 37760736} +{"current_steps": 56025, "total_steps": 204665, "loss": 0.0844, "lr": 1.8216804490069882e-06, "epoch": 1.3687000708474826, "percentage": 27.37, "elapsed_time": "1:12:03", "remaining_time": "3:11:09", "throughput": 8735.45, "total_tokens": 37763744} +{"current_steps": 56030, "total_steps": 204665, "loss": 0.1457, "lr": 1.8216318421833625e-06, "epoch": 1.3688222216793298, "percentage": 27.38, "elapsed_time": "1:12:03", "remaining_time": "3:11:08", "throughput": 8735.48, "total_tokens": 37766880} +{"current_steps": 56035, "total_steps": 204665, "loss": 0.13, "lr": 1.821583229384609e-06, "epoch": 1.3689443725111767, "percentage": 27.38, "elapsed_time": "1:12:03", "remaining_time": "3:11:08", "throughput": 8735.5, "total_tokens": 37770016} +{"current_steps": 56040, "total_steps": 204665, "loss": 0.2275, "lr": 1.8215346106110814e-06, "epoch": 1.369066523343024, "percentage": 27.38, "elapsed_time": "1:12:04", "remaining_time": "3:11:08", "throughput": 8735.5, "total_tokens": 37773024} +{"current_steps": 56045, "total_steps": 204665, "loss": 0.0569, "lr": 1.8214859858631333e-06, "epoch": 1.3691886741748711, "percentage": 27.38, "elapsed_time": "1:12:04", "remaining_time": "3:11:07", "throughput": 8735.49, "total_tokens": 37775968} +{"current_steps": 56050, "total_steps": 204665, "loss": 0.1, "lr": 1.8214373551411177e-06, "epoch": 1.3693108250067183, "percentage": 27.39, "elapsed_time": "1:12:04", "remaining_time": "3:11:07", "throughput": 8735.55, "total_tokens": 37779296} +{"current_steps": 56055, "total_steps": 204665, "loss": 0.19, "lr": 1.8213887184453892e-06, "epoch": 1.3694329758385655, "percentage": 27.39, "elapsed_time": "1:12:05", "remaining_time": "3:11:06", "throughput": 8735.67, "total_tokens": 37782944} +{"current_steps": 56060, "total_steps": 204665, "loss": 0.153, "lr": 1.8213400757763009e-06, "epoch": 1.3695551266704127, "percentage": 27.39, "elapsed_time": "1:12:05", "remaining_time": "3:11:06", "throughput": 8735.85, "total_tokens": 37786912} +{"current_steps": 56065, "total_steps": 204665, "loss": 0.0899, "lr": 1.8212914271342064e-06, "epoch": 1.3696772775022599, "percentage": 27.39, "elapsed_time": "1:12:05", "remaining_time": "3:11:05", "throughput": 8735.93, "total_tokens": 37790368} +{"current_steps": 56070, "total_steps": 204665, "loss": 0.0811, "lr": 1.8212427725194599e-06, "epoch": 1.3697994283341068, "percentage": 27.4, "elapsed_time": "1:12:06", "remaining_time": "3:11:05", "throughput": 8735.99, "total_tokens": 37793696} +{"current_steps": 56075, "total_steps": 204665, "loss": 0.0796, "lr": 1.821194111932415e-06, "epoch": 1.369921579165954, "percentage": 27.4, "elapsed_time": "1:12:06", "remaining_time": "3:11:04", "throughput": 8736.06, "total_tokens": 37797088} +{"current_steps": 56080, "total_steps": 204665, "loss": 0.0422, "lr": 1.821145445373426e-06, "epoch": 1.3700437299978012, "percentage": 27.4, "elapsed_time": "1:12:06", "remaining_time": "3:11:04", "throughput": 8736.12, "total_tokens": 37800480} +{"current_steps": 56085, "total_steps": 204665, "loss": 0.1484, "lr": 1.8210967728428458e-06, "epoch": 1.3701658808296484, "percentage": 27.4, "elapsed_time": "1:12:07", "remaining_time": "3:11:03", "throughput": 8736.13, "total_tokens": 37803488} +{"current_steps": 56090, "total_steps": 204665, "loss": 0.0048, "lr": 1.8210480943410296e-06, "epoch": 1.3702880316614956, "percentage": 27.41, "elapsed_time": "1:12:07", "remaining_time": "3:11:03", "throughput": 8736.14, "total_tokens": 37806496} +{"current_steps": 56095, "total_steps": 204665, "loss": 0.1011, "lr": 1.8209994098683306e-06, "epoch": 1.3704101824933428, "percentage": 27.41, "elapsed_time": "1:12:07", "remaining_time": "3:11:02", "throughput": 8736.22, "total_tokens": 37809888} +{"current_steps": 56100, "total_steps": 204665, "loss": 0.1184, "lr": 1.8209507194251033e-06, "epoch": 1.37053233332519, "percentage": 27.41, "elapsed_time": "1:12:08", "remaining_time": "3:11:02", "throughput": 8736.3, "total_tokens": 37813344} +{"current_steps": 56105, "total_steps": 204665, "loss": 0.1454, "lr": 1.8209020230117012e-06, "epoch": 1.3706544841570372, "percentage": 27.41, "elapsed_time": "1:12:08", "remaining_time": "3:11:01", "throughput": 8736.33, "total_tokens": 37816480} +{"current_steps": 56110, "total_steps": 204665, "loss": 0.0406, "lr": 1.8208533206284788e-06, "epoch": 1.3707766349888844, "percentage": 27.42, "elapsed_time": "1:12:08", "remaining_time": "3:11:01", "throughput": 8736.47, "total_tokens": 37820192} +{"current_steps": 56115, "total_steps": 204665, "loss": 0.0023, "lr": 1.8208046122757903e-06, "epoch": 1.3708987858207315, "percentage": 27.42, "elapsed_time": "1:12:09", "remaining_time": "3:11:00", "throughput": 8736.62, "total_tokens": 37823904} +{"current_steps": 56120, "total_steps": 204665, "loss": 0.0481, "lr": 1.8207558979539903e-06, "epoch": 1.3710209366525787, "percentage": 27.42, "elapsed_time": "1:12:09", "remaining_time": "3:11:00", "throughput": 8736.73, "total_tokens": 37827488} +{"current_steps": 56125, "total_steps": 204665, "loss": 0.1668, "lr": 1.820707177663432e-06, "epoch": 1.3711430874844257, "percentage": 27.42, "elapsed_time": "1:12:10", "remaining_time": "3:10:59", "throughput": 8736.8, "total_tokens": 37830816} +{"current_steps": 56130, "total_steps": 204665, "loss": 0.1436, "lr": 1.8206584514044709e-06, "epoch": 1.371265238316273, "percentage": 27.43, "elapsed_time": "1:12:10", "remaining_time": "3:10:59", "throughput": 8736.84, "total_tokens": 37834016} +{"current_steps": 56135, "total_steps": 204665, "loss": 0.0594, "lr": 1.8206097191774608e-06, "epoch": 1.37138738914812, "percentage": 27.43, "elapsed_time": "1:12:10", "remaining_time": "3:10:58", "throughput": 8736.85, "total_tokens": 37837024} +{"current_steps": 56140, "total_steps": 204665, "loss": 0.0926, "lr": 1.820560980982756e-06, "epoch": 1.3715095399799673, "percentage": 27.43, "elapsed_time": "1:12:11", "remaining_time": "3:10:58", "throughput": 8736.9, "total_tokens": 37840288} +{"current_steps": 56145, "total_steps": 204665, "loss": 0.0034, "lr": 1.8205122368207107e-06, "epoch": 1.3716316908118145, "percentage": 27.43, "elapsed_time": "1:12:11", "remaining_time": "3:10:57", "throughput": 8736.98, "total_tokens": 37843680} +{"current_steps": 56150, "total_steps": 204665, "loss": 0.0444, "lr": 1.82046348669168e-06, "epoch": 1.3717538416436617, "percentage": 27.44, "elapsed_time": "1:12:11", "remaining_time": "3:10:57", "throughput": 8737.0, "total_tokens": 37846752} +{"current_steps": 56155, "total_steps": 204665, "loss": 0.0316, "lr": 1.8204147305960182e-06, "epoch": 1.3718759924755086, "percentage": 27.44, "elapsed_time": "1:12:12", "remaining_time": "3:10:56", "throughput": 8737.02, "total_tokens": 37849824} +{"current_steps": 56160, "total_steps": 204665, "loss": 0.0528, "lr": 1.8203659685340797e-06, "epoch": 1.3719981433073558, "percentage": 27.44, "elapsed_time": "1:12:12", "remaining_time": "3:10:56", "throughput": 8737.08, "total_tokens": 37853088} +{"current_steps": 56165, "total_steps": 204665, "loss": 0.0712, "lr": 1.8203172005062194e-06, "epoch": 1.372120294139203, "percentage": 27.44, "elapsed_time": "1:12:12", "remaining_time": "3:10:55", "throughput": 8737.15, "total_tokens": 37856416} +{"current_steps": 56170, "total_steps": 204665, "loss": 0.068, "lr": 1.8202684265127916e-06, "epoch": 1.3722424449710502, "percentage": 27.44, "elapsed_time": "1:12:13", "remaining_time": "3:10:55", "throughput": 8737.18, "total_tokens": 37859488} +{"current_steps": 56175, "total_steps": 204665, "loss": 0.0865, "lr": 1.8202196465541513e-06, "epoch": 1.3723645958028974, "percentage": 27.45, "elapsed_time": "1:12:13", "remaining_time": "3:10:54", "throughput": 8737.21, "total_tokens": 37862624} +{"current_steps": 56180, "total_steps": 204665, "loss": 0.1067, "lr": 1.820170860630653e-06, "epoch": 1.3724867466347446, "percentage": 27.45, "elapsed_time": "1:12:13", "remaining_time": "3:10:54", "throughput": 8737.25, "total_tokens": 37865760} +{"current_steps": 56185, "total_steps": 204665, "loss": 0.1904, "lr": 1.8201220687426515e-06, "epoch": 1.3726088974665918, "percentage": 27.45, "elapsed_time": "1:12:14", "remaining_time": "3:10:53", "throughput": 8737.31, "total_tokens": 37869024} +{"current_steps": 56190, "total_steps": 204665, "loss": 0.0204, "lr": 1.8200732708905018e-06, "epoch": 1.372731048298439, "percentage": 27.45, "elapsed_time": "1:12:14", "remaining_time": "3:10:53", "throughput": 8737.43, "total_tokens": 37872672} +{"current_steps": 56195, "total_steps": 204665, "loss": 0.0023, "lr": 1.820024467074559e-06, "epoch": 1.3728531991302861, "percentage": 27.46, "elapsed_time": "1:12:14", "remaining_time": "3:10:52", "throughput": 8737.49, "total_tokens": 37875936} +{"current_steps": 56200, "total_steps": 204665, "loss": 0.0177, "lr": 1.8199756572951775e-06, "epoch": 1.3729753499621333, "percentage": 27.46, "elapsed_time": "1:12:15", "remaining_time": "3:10:52", "throughput": 8737.52, "total_tokens": 37879008} +{"current_steps": 56205, "total_steps": 204665, "loss": 0.029, "lr": 1.8199268415527125e-06, "epoch": 1.3730975007939805, "percentage": 27.46, "elapsed_time": "1:12:15", "remaining_time": "3:10:51", "throughput": 8737.52, "total_tokens": 37881952} +{"current_steps": 56210, "total_steps": 204665, "loss": 0.0261, "lr": 1.8198780198475189e-06, "epoch": 1.3732196516258277, "percentage": 27.46, "elapsed_time": "1:12:15", "remaining_time": "3:10:51", "throughput": 8737.55, "total_tokens": 37885088} +{"current_steps": 56215, "total_steps": 204665, "loss": 0.0503, "lr": 1.8198291921799519e-06, "epoch": 1.3733418024576747, "percentage": 27.47, "elapsed_time": "1:12:16", "remaining_time": "3:10:50", "throughput": 8737.56, "total_tokens": 37888096} +{"current_steps": 56220, "total_steps": 204665, "loss": 0.0466, "lr": 1.8197803585503665e-06, "epoch": 1.3734639532895219, "percentage": 27.47, "elapsed_time": "1:12:16", "remaining_time": "3:10:50", "throughput": 8737.65, "total_tokens": 37891616} +{"current_steps": 56225, "total_steps": 204665, "loss": 0.1277, "lr": 1.8197315189591175e-06, "epoch": 1.373586104121369, "percentage": 27.47, "elapsed_time": "1:12:16", "remaining_time": "3:10:49", "throughput": 8737.76, "total_tokens": 37895136} +{"current_steps": 56230, "total_steps": 204665, "loss": 0.1435, "lr": 1.8196826734065608e-06, "epoch": 1.3737082549532162, "percentage": 27.47, "elapsed_time": "1:12:17", "remaining_time": "3:10:49", "throughput": 8737.79, "total_tokens": 37898336} +{"current_steps": 56235, "total_steps": 204665, "loss": 0.1152, "lr": 1.8196338218930513e-06, "epoch": 1.3738304057850634, "percentage": 27.48, "elapsed_time": "1:12:17", "remaining_time": "3:10:49", "throughput": 8737.89, "total_tokens": 37901856} +{"current_steps": 56240, "total_steps": 204665, "loss": 0.0549, "lr": 1.819584964418944e-06, "epoch": 1.3739525566169106, "percentage": 27.48, "elapsed_time": "1:12:17", "remaining_time": "3:10:48", "throughput": 8737.9, "total_tokens": 37904928} +{"current_steps": 56245, "total_steps": 204665, "loss": 0.1053, "lr": 1.8195361009845945e-06, "epoch": 1.3740747074487576, "percentage": 27.48, "elapsed_time": "1:12:18", "remaining_time": "3:10:48", "throughput": 8738.0, "total_tokens": 37908448} +{"current_steps": 56250, "total_steps": 204665, "loss": 0.0571, "lr": 1.819487231590358e-06, "epoch": 1.3741968582806048, "percentage": 27.48, "elapsed_time": "1:12:18", "remaining_time": "3:10:47", "throughput": 8738.16, "total_tokens": 37912352} +{"current_steps": 56255, "total_steps": 204665, "loss": 0.1285, "lr": 1.8194383562365898e-06, "epoch": 1.374319009112452, "percentage": 27.49, "elapsed_time": "1:12:19", "remaining_time": "3:10:47", "throughput": 8738.29, "total_tokens": 37916128} +{"current_steps": 56260, "total_steps": 204665, "loss": 0.0941, "lr": 1.8193894749236458e-06, "epoch": 1.3744411599442992, "percentage": 27.49, "elapsed_time": "1:12:19", "remaining_time": "3:10:46", "throughput": 8738.44, "total_tokens": 37919904} +{"current_steps": 56265, "total_steps": 204665, "loss": 0.0929, "lr": 1.8193405876518808e-06, "epoch": 1.3745633107761464, "percentage": 27.49, "elapsed_time": "1:12:19", "remaining_time": "3:10:46", "throughput": 8738.5, "total_tokens": 37923232} +{"current_steps": 56270, "total_steps": 204665, "loss": 0.085, "lr": 1.8192916944216507e-06, "epoch": 1.3746854616079935, "percentage": 27.49, "elapsed_time": "1:12:20", "remaining_time": "3:10:45", "throughput": 8738.59, "total_tokens": 37926688} +{"current_steps": 56275, "total_steps": 204665, "loss": 0.0989, "lr": 1.8192427952333112e-06, "epoch": 1.3748076124398407, "percentage": 27.5, "elapsed_time": "1:12:20", "remaining_time": "3:10:45", "throughput": 8738.61, "total_tokens": 37929888} +{"current_steps": 56280, "total_steps": 204665, "loss": 0.0382, "lr": 1.8191938900872177e-06, "epoch": 1.374929763271688, "percentage": 27.5, "elapsed_time": "1:12:20", "remaining_time": "3:10:44", "throughput": 8738.6, "total_tokens": 37932896} +{"current_steps": 56285, "total_steps": 204665, "loss": 0.002, "lr": 1.8191449789837258e-06, "epoch": 1.3750519141035351, "percentage": 27.5, "elapsed_time": "1:12:21", "remaining_time": "3:10:44", "throughput": 8738.74, "total_tokens": 37936736} +{"current_steps": 56290, "total_steps": 204665, "loss": 0.0336, "lr": 1.8190960619231915e-06, "epoch": 1.3751740649353823, "percentage": 27.5, "elapsed_time": "1:12:21", "remaining_time": "3:10:43", "throughput": 8738.88, "total_tokens": 37940576} +{"current_steps": 56295, "total_steps": 204665, "loss": 0.0731, "lr": 1.81904713890597e-06, "epoch": 1.3752962157672295, "percentage": 27.51, "elapsed_time": "1:12:21", "remaining_time": "3:10:43", "throughput": 8738.91, "total_tokens": 37943840} +{"current_steps": 56300, "total_steps": 204665, "loss": 0.081, "lr": 1.8189982099324177e-06, "epoch": 1.3754183665990765, "percentage": 27.51, "elapsed_time": "1:12:22", "remaining_time": "3:10:43", "throughput": 8738.94, "total_tokens": 37947040} +{"current_steps": 56305, "total_steps": 204665, "loss": 0.1332, "lr": 1.81894927500289e-06, "epoch": 1.3755405174309236, "percentage": 27.51, "elapsed_time": "1:12:22", "remaining_time": "3:10:42", "throughput": 8738.98, "total_tokens": 37950368} +{"current_steps": 56310, "total_steps": 204665, "loss": 0.1287, "lr": 1.818900334117743e-06, "epoch": 1.3756626682627708, "percentage": 27.51, "elapsed_time": "1:12:23", "remaining_time": "3:10:42", "throughput": 8739.04, "total_tokens": 37953696} +{"current_steps": 56315, "total_steps": 204665, "loss": 0.0293, "lr": 1.8188513872773326e-06, "epoch": 1.375784819094618, "percentage": 27.52, "elapsed_time": "1:12:23", "remaining_time": "3:10:41", "throughput": 8739.04, "total_tokens": 37956768} +{"current_steps": 56320, "total_steps": 204665, "loss": 0.2067, "lr": 1.8188024344820145e-06, "epoch": 1.3759069699264652, "percentage": 27.52, "elapsed_time": "1:12:23", "remaining_time": "3:10:41", "throughput": 8739.07, "total_tokens": 37960032} +{"current_steps": 56325, "total_steps": 204665, "loss": 0.0548, "lr": 1.8187534757321447e-06, "epoch": 1.3760291207583124, "percentage": 27.52, "elapsed_time": "1:12:24", "remaining_time": "3:10:40", "throughput": 8739.37, "total_tokens": 37964640} +{"current_steps": 56330, "total_steps": 204665, "loss": 0.0159, "lr": 1.8187045110280796e-06, "epoch": 1.3761512715901596, "percentage": 27.52, "elapsed_time": "1:12:24", "remaining_time": "3:10:40", "throughput": 8739.49, "total_tokens": 37968288} +{"current_steps": 56335, "total_steps": 204665, "loss": 0.1024, "lr": 1.8186555403701753e-06, "epoch": 1.3762734224220066, "percentage": 27.53, "elapsed_time": "1:12:24", "remaining_time": "3:10:39", "throughput": 8739.58, "total_tokens": 37971744} +{"current_steps": 56340, "total_steps": 204665, "loss": 0.0608, "lr": 1.8186065637587876e-06, "epoch": 1.3763955732538538, "percentage": 27.53, "elapsed_time": "1:12:25", "remaining_time": "3:10:39", "throughput": 8739.72, "total_tokens": 37975456} +{"current_steps": 56345, "total_steps": 204665, "loss": 0.162, "lr": 1.8185575811942723e-06, "epoch": 1.376517724085701, "percentage": 27.53, "elapsed_time": "1:12:25", "remaining_time": "3:10:38", "throughput": 8739.85, "total_tokens": 37979168} +{"current_steps": 56350, "total_steps": 204665, "loss": 0.0232, "lr": 1.8185085926769867e-06, "epoch": 1.3766398749175481, "percentage": 27.53, "elapsed_time": "1:12:25", "remaining_time": "3:10:38", "throughput": 8739.93, "total_tokens": 37982560} +{"current_steps": 56355, "total_steps": 204665, "loss": 0.2073, "lr": 1.8184595982072863e-06, "epoch": 1.3767620257493953, "percentage": 27.54, "elapsed_time": "1:12:26", "remaining_time": "3:10:37", "throughput": 8740.0, "total_tokens": 37985888} +{"current_steps": 56360, "total_steps": 204665, "loss": 0.1076, "lr": 1.8184105977855276e-06, "epoch": 1.3768841765812425, "percentage": 27.54, "elapsed_time": "1:12:26", "remaining_time": "3:10:37", "throughput": 8740.02, "total_tokens": 37988960} +{"current_steps": 56365, "total_steps": 204665, "loss": 0.0922, "lr": 1.8183615914120666e-06, "epoch": 1.3770063274130897, "percentage": 27.54, "elapsed_time": "1:12:26", "remaining_time": "3:10:36", "throughput": 8740.09, "total_tokens": 37992288} +{"current_steps": 56370, "total_steps": 204665, "loss": 0.0049, "lr": 1.8183125790872605e-06, "epoch": 1.377128478244937, "percentage": 27.54, "elapsed_time": "1:12:27", "remaining_time": "3:10:36", "throughput": 8740.13, "total_tokens": 37995488} +{"current_steps": 56375, "total_steps": 204665, "loss": 0.0121, "lr": 1.8182635608114647e-06, "epoch": 1.377250629076784, "percentage": 27.55, "elapsed_time": "1:12:27", "remaining_time": "3:10:35", "throughput": 8740.15, "total_tokens": 37998560} +{"current_steps": 56380, "total_steps": 204665, "loss": 0.1404, "lr": 1.8182145365850366e-06, "epoch": 1.3773727799086313, "percentage": 27.55, "elapsed_time": "1:12:27", "remaining_time": "3:10:35", "throughput": 8740.24, "total_tokens": 38001952} +{"current_steps": 56385, "total_steps": 204665, "loss": 0.0923, "lr": 1.8181655064083322e-06, "epoch": 1.3774949307404785, "percentage": 27.55, "elapsed_time": "1:12:28", "remaining_time": "3:10:35", "throughput": 8740.42, "total_tokens": 38005920} +{"current_steps": 56390, "total_steps": 204665, "loss": 0.0437, "lr": 1.818116470281708e-06, "epoch": 1.3776170815723254, "percentage": 27.55, "elapsed_time": "1:12:28", "remaining_time": "3:10:34", "throughput": 8740.47, "total_tokens": 38009120} +{"current_steps": 56395, "total_steps": 204665, "loss": 0.063, "lr": 1.818067428205521e-06, "epoch": 1.3777392324041726, "percentage": 27.55, "elapsed_time": "1:12:28", "remaining_time": "3:10:34", "throughput": 8740.52, "total_tokens": 38012384} +{"current_steps": 56400, "total_steps": 204665, "loss": 0.0565, "lr": 1.8180183801801277e-06, "epoch": 1.3778613832360198, "percentage": 27.56, "elapsed_time": "1:12:29", "remaining_time": "3:10:33", "throughput": 8740.6, "total_tokens": 38015776} +{"current_steps": 56405, "total_steps": 204665, "loss": 0.0022, "lr": 1.8179693262058844e-06, "epoch": 1.377983534067867, "percentage": 27.56, "elapsed_time": "1:12:29", "remaining_time": "3:10:33", "throughput": 8740.79, "total_tokens": 38019808} +{"current_steps": 56410, "total_steps": 204665, "loss": 0.112, "lr": 1.8179202662831483e-06, "epoch": 1.3781056848997142, "percentage": 27.56, "elapsed_time": "1:12:30", "remaining_time": "3:10:32", "throughput": 8740.83, "total_tokens": 38022944} +{"current_steps": 56415, "total_steps": 204665, "loss": 0.0337, "lr": 1.8178712004122763e-06, "epoch": 1.3782278357315614, "percentage": 27.56, "elapsed_time": "1:12:30", "remaining_time": "3:10:32", "throughput": 8740.9, "total_tokens": 38026336} +{"current_steps": 56420, "total_steps": 204665, "loss": 0.081, "lr": 1.8178221285936246e-06, "epoch": 1.3783499865634086, "percentage": 27.57, "elapsed_time": "1:12:30", "remaining_time": "3:10:31", "throughput": 8741.04, "total_tokens": 38030048} +{"current_steps": 56425, "total_steps": 204665, "loss": 0.0586, "lr": 1.8177730508275504e-06, "epoch": 1.3784721373952555, "percentage": 27.57, "elapsed_time": "1:12:31", "remaining_time": "3:10:31", "throughput": 8741.16, "total_tokens": 38033632} +{"current_steps": 56430, "total_steps": 204665, "loss": 0.0415, "lr": 1.8177239671144106e-06, "epoch": 1.3785942882271027, "percentage": 27.57, "elapsed_time": "1:12:31", "remaining_time": "3:10:30", "throughput": 8741.22, "total_tokens": 38036896} +{"current_steps": 56435, "total_steps": 204665, "loss": 0.1464, "lr": 1.8176748774545626e-06, "epoch": 1.37871643905895, "percentage": 27.57, "elapsed_time": "1:12:31", "remaining_time": "3:10:30", "throughput": 8741.32, "total_tokens": 38040416} +{"current_steps": 56440, "total_steps": 204665, "loss": 0.0029, "lr": 1.8176257818483624e-06, "epoch": 1.378838589890797, "percentage": 27.58, "elapsed_time": "1:12:32", "remaining_time": "3:10:29", "throughput": 8741.44, "total_tokens": 38044000} +{"current_steps": 56445, "total_steps": 204665, "loss": 0.0743, "lr": 1.8175766802961681e-06, "epoch": 1.3789607407226443, "percentage": 27.58, "elapsed_time": "1:12:32", "remaining_time": "3:10:29", "throughput": 8741.52, "total_tokens": 38047392} +{"current_steps": 56450, "total_steps": 204665, "loss": 0.0954, "lr": 1.817527572798336e-06, "epoch": 1.3790828915544915, "percentage": 27.58, "elapsed_time": "1:12:32", "remaining_time": "3:10:28", "throughput": 8741.56, "total_tokens": 38050592} +{"current_steps": 56455, "total_steps": 204665, "loss": 0.0879, "lr": 1.8174784593552235e-06, "epoch": 1.3792050423863387, "percentage": 27.58, "elapsed_time": "1:12:33", "remaining_time": "3:10:28", "throughput": 8741.56, "total_tokens": 38053536} +{"current_steps": 56460, "total_steps": 204665, "loss": 0.1678, "lr": 1.817429339967188e-06, "epoch": 1.3793271932181859, "percentage": 27.59, "elapsed_time": "1:12:33", "remaining_time": "3:10:27", "throughput": 8741.63, "total_tokens": 38056864} +{"current_steps": 56465, "total_steps": 204665, "loss": 0.1658, "lr": 1.817380214634586e-06, "epoch": 1.379449344050033, "percentage": 27.59, "elapsed_time": "1:12:33", "remaining_time": "3:10:27", "throughput": 8741.7, "total_tokens": 38060256} +{"current_steps": 56470, "total_steps": 204665, "loss": 0.1639, "lr": 1.8173310833577754e-06, "epoch": 1.3795714948818802, "percentage": 27.59, "elapsed_time": "1:12:34", "remaining_time": "3:10:26", "throughput": 8741.73, "total_tokens": 38063392} +{"current_steps": 56475, "total_steps": 204665, "loss": 0.0794, "lr": 1.8172819461371138e-06, "epoch": 1.3796936457137274, "percentage": 27.59, "elapsed_time": "1:12:34", "remaining_time": "3:10:26", "throughput": 8741.84, "total_tokens": 38066912} +{"current_steps": 56480, "total_steps": 204665, "loss": 0.1965, "lr": 1.8172328029729577e-06, "epoch": 1.3798157965455744, "percentage": 27.6, "elapsed_time": "1:12:34", "remaining_time": "3:10:25", "throughput": 8741.9, "total_tokens": 38070240} +{"current_steps": 56485, "total_steps": 204665, "loss": 0.1164, "lr": 1.8171836538656645e-06, "epoch": 1.3799379473774216, "percentage": 27.6, "elapsed_time": "1:12:35", "remaining_time": "3:10:25", "throughput": 8742.01, "total_tokens": 38073760} +{"current_steps": 56490, "total_steps": 204665, "loss": 0.0277, "lr": 1.8171344988155925e-06, "epoch": 1.3800600982092688, "percentage": 27.6, "elapsed_time": "1:12:35", "remaining_time": "3:10:24", "throughput": 8742.03, "total_tokens": 38076832} +{"current_steps": 56495, "total_steps": 204665, "loss": 0.1765, "lr": 1.8170853378230985e-06, "epoch": 1.380182249041116, "percentage": 27.6, "elapsed_time": "1:12:35", "remaining_time": "3:10:24", "throughput": 8742.04, "total_tokens": 38079840} +{"current_steps": 56500, "total_steps": 204665, "loss": 0.143, "lr": 1.8170361708885402e-06, "epoch": 1.3803043998729632, "percentage": 27.61, "elapsed_time": "1:12:36", "remaining_time": "3:10:23", "throughput": 8742.05, "total_tokens": 38082848} +{"current_steps": 56505, "total_steps": 204665, "loss": 0.098, "lr": 1.816986998012275e-06, "epoch": 1.3804265507048104, "percentage": 27.61, "elapsed_time": "1:12:36", "remaining_time": "3:10:23", "throughput": 8742.14, "total_tokens": 38086304} +{"current_steps": 56510, "total_steps": 204665, "loss": 0.0048, "lr": 1.8169378191946607e-06, "epoch": 1.3805487015366575, "percentage": 27.61, "elapsed_time": "1:12:36", "remaining_time": "3:10:22", "throughput": 8742.28, "total_tokens": 38090016} +{"current_steps": 56515, "total_steps": 204665, "loss": 0.0051, "lr": 1.8168886344360549e-06, "epoch": 1.3806708523685045, "percentage": 27.61, "elapsed_time": "1:12:37", "remaining_time": "3:10:22", "throughput": 8742.31, "total_tokens": 38093152} +{"current_steps": 56520, "total_steps": 204665, "loss": 0.1642, "lr": 1.816839443736815e-06, "epoch": 1.3807930032003517, "percentage": 27.62, "elapsed_time": "1:12:37", "remaining_time": "3:10:21", "throughput": 8742.45, "total_tokens": 38096864} +{"current_steps": 56525, "total_steps": 204665, "loss": 0.0691, "lr": 1.816790247097299e-06, "epoch": 1.3809151540321989, "percentage": 27.62, "elapsed_time": "1:12:38", "remaining_time": "3:10:21", "throughput": 8742.47, "total_tokens": 38099936} +{"current_steps": 56530, "total_steps": 204665, "loss": 0.1108, "lr": 1.8167410445178649e-06, "epoch": 1.381037304864046, "percentage": 27.62, "elapsed_time": "1:12:38", "remaining_time": "3:10:20", "throughput": 8742.54, "total_tokens": 38103264} +{"current_steps": 56535, "total_steps": 204665, "loss": 0.1469, "lr": 1.8166918359988702e-06, "epoch": 1.3811594556958933, "percentage": 27.62, "elapsed_time": "1:12:38", "remaining_time": "3:10:20", "throughput": 8742.6, "total_tokens": 38106592} +{"current_steps": 56540, "total_steps": 204665, "loss": 0.0857, "lr": 1.8166426215406726e-06, "epoch": 1.3812816065277405, "percentage": 27.63, "elapsed_time": "1:12:39", "remaining_time": "3:10:19", "throughput": 8742.61, "total_tokens": 38109600} +{"current_steps": 56545, "total_steps": 204665, "loss": 0.1107, "lr": 1.8165934011436303e-06, "epoch": 1.3814037573595876, "percentage": 27.63, "elapsed_time": "1:12:39", "remaining_time": "3:10:19", "throughput": 8742.63, "total_tokens": 38112672} +{"current_steps": 56550, "total_steps": 204665, "loss": 0.0608, "lr": 1.8165441748081012e-06, "epoch": 1.3815259081914348, "percentage": 27.63, "elapsed_time": "1:12:39", "remaining_time": "3:10:18", "throughput": 8742.68, "total_tokens": 38115872} +{"current_steps": 56555, "total_steps": 204665, "loss": 0.0424, "lr": 1.8164949425344428e-06, "epoch": 1.381648059023282, "percentage": 27.63, "elapsed_time": "1:12:40", "remaining_time": "3:10:18", "throughput": 8742.98, "total_tokens": 38120480} +{"current_steps": 56560, "total_steps": 204665, "loss": 0.025, "lr": 1.8164457043230144e-06, "epoch": 1.3817702098551292, "percentage": 27.64, "elapsed_time": "1:12:40", "remaining_time": "3:10:18", "throughput": 8743.01, "total_tokens": 38123616} +{"current_steps": 56565, "total_steps": 204665, "loss": 0.1618, "lr": 1.8163964601741726e-06, "epoch": 1.3818923606869764, "percentage": 27.64, "elapsed_time": "1:12:40", "remaining_time": "3:10:17", "throughput": 8743.21, "total_tokens": 38127712} +{"current_steps": 56570, "total_steps": 204665, "loss": 0.063, "lr": 1.8163472100882763e-06, "epoch": 1.3820145115188234, "percentage": 27.64, "elapsed_time": "1:12:41", "remaining_time": "3:10:17", "throughput": 8743.37, "total_tokens": 38131488} +{"current_steps": 56575, "total_steps": 204665, "loss": 0.1056, "lr": 1.8162979540656837e-06, "epoch": 1.3821366623506706, "percentage": 27.64, "elapsed_time": "1:12:41", "remaining_time": "3:10:16", "throughput": 8743.37, "total_tokens": 38134496} +{"current_steps": 56580, "total_steps": 204665, "loss": 0.1367, "lr": 1.8162486921067525e-06, "epoch": 1.3822588131825178, "percentage": 27.65, "elapsed_time": "1:12:41", "remaining_time": "3:10:16", "throughput": 8743.46, "total_tokens": 38137888} +{"current_steps": 56585, "total_steps": 204665, "loss": 0.0409, "lr": 1.8161994242118416e-06, "epoch": 1.382380964014365, "percentage": 27.65, "elapsed_time": "1:12:42", "remaining_time": "3:10:15", "throughput": 8743.57, "total_tokens": 38141472} +{"current_steps": 56590, "total_steps": 204665, "loss": 0.1291, "lr": 1.8161501503813085e-06, "epoch": 1.3825031148462121, "percentage": 27.65, "elapsed_time": "1:12:42", "remaining_time": "3:10:15", "throughput": 8743.67, "total_tokens": 38144992} +{"current_steps": 56595, "total_steps": 204665, "loss": 0.0197, "lr": 1.8161008706155126e-06, "epoch": 1.3826252656780593, "percentage": 27.65, "elapsed_time": "1:12:42", "remaining_time": "3:10:14", "throughput": 8743.72, "total_tokens": 38148256} +{"current_steps": 56600, "total_steps": 204665, "loss": 0.1117, "lr": 1.8160515849148112e-06, "epoch": 1.3827474165099065, "percentage": 27.65, "elapsed_time": "1:12:43", "remaining_time": "3:10:14", "throughput": 8743.76, "total_tokens": 38151392} +{"current_steps": 56605, "total_steps": 204665, "loss": 0.0846, "lr": 1.8160022932795632e-06, "epoch": 1.3828695673417535, "percentage": 27.66, "elapsed_time": "1:12:43", "remaining_time": "3:10:13", "throughput": 8743.79, "total_tokens": 38154528} +{"current_steps": 56610, "total_steps": 204665, "loss": 0.0342, "lr": 1.8159529957101273e-06, "epoch": 1.3829917181736007, "percentage": 27.66, "elapsed_time": "1:12:43", "remaining_time": "3:10:13", "throughput": 8743.87, "total_tokens": 38157920} +{"current_steps": 56615, "total_steps": 204665, "loss": 0.0348, "lr": 1.8159036922068616e-06, "epoch": 1.3831138690054479, "percentage": 27.66, "elapsed_time": "1:12:44", "remaining_time": "3:10:12", "throughput": 8744.0, "total_tokens": 38161568} +{"current_steps": 56620, "total_steps": 204665, "loss": 0.1753, "lr": 1.8158543827701249e-06, "epoch": 1.383236019837295, "percentage": 27.66, "elapsed_time": "1:12:44", "remaining_time": "3:10:12", "throughput": 8744.09, "total_tokens": 38165024} +{"current_steps": 56625, "total_steps": 204665, "loss": 0.0462, "lr": 1.8158050674002757e-06, "epoch": 1.3833581706691422, "percentage": 27.67, "elapsed_time": "1:12:45", "remaining_time": "3:10:11", "throughput": 8744.14, "total_tokens": 38168288} +{"current_steps": 56630, "total_steps": 204665, "loss": 0.2128, "lr": 1.8157557460976725e-06, "epoch": 1.3834803215009894, "percentage": 27.67, "elapsed_time": "1:12:45", "remaining_time": "3:10:11", "throughput": 8744.18, "total_tokens": 38171488} +{"current_steps": 56635, "total_steps": 204665, "loss": 0.0025, "lr": 1.815706418862674e-06, "epoch": 1.3836024723328366, "percentage": 27.67, "elapsed_time": "1:12:45", "remaining_time": "3:10:10", "throughput": 8744.21, "total_tokens": 38174624} +{"current_steps": 56640, "total_steps": 204665, "loss": 0.0736, "lr": 1.8156570856956393e-06, "epoch": 1.3837246231646838, "percentage": 27.67, "elapsed_time": "1:12:46", "remaining_time": "3:10:10", "throughput": 8744.27, "total_tokens": 38177888} +{"current_steps": 56645, "total_steps": 204665, "loss": 0.0017, "lr": 1.8156077465969267e-06, "epoch": 1.383846773996531, "percentage": 27.68, "elapsed_time": "1:12:46", "remaining_time": "3:10:09", "throughput": 8744.29, "total_tokens": 38180960} +{"current_steps": 56650, "total_steps": 204665, "loss": 0.0432, "lr": 1.8155584015668954e-06, "epoch": 1.3839689248283782, "percentage": 27.68, "elapsed_time": "1:12:46", "remaining_time": "3:10:09", "throughput": 8744.34, "total_tokens": 38184224} +{"current_steps": 56655, "total_steps": 204665, "loss": 0.0316, "lr": 1.8155090506059039e-06, "epoch": 1.3840910756602254, "percentage": 27.68, "elapsed_time": "1:12:47", "remaining_time": "3:10:08", "throughput": 8744.4, "total_tokens": 38187552} +{"current_steps": 56660, "total_steps": 204665, "loss": 0.1797, "lr": 1.815459693714311e-06, "epoch": 1.3842132264920723, "percentage": 27.68, "elapsed_time": "1:12:47", "remaining_time": "3:10:08", "throughput": 8744.44, "total_tokens": 38190752} +{"current_steps": 56665, "total_steps": 204665, "loss": 0.2378, "lr": 1.8154103308924763e-06, "epoch": 1.3843353773239195, "percentage": 27.69, "elapsed_time": "1:12:47", "remaining_time": "3:10:07", "throughput": 8744.63, "total_tokens": 38194720} +{"current_steps": 56670, "total_steps": 204665, "loss": 0.0175, "lr": 1.815360962140758e-06, "epoch": 1.3844575281557667, "percentage": 27.69, "elapsed_time": "1:12:48", "remaining_time": "3:10:07", "throughput": 8744.79, "total_tokens": 38198624} +{"current_steps": 56675, "total_steps": 204665, "loss": 0.1147, "lr": 1.8153115874595158e-06, "epoch": 1.384579678987614, "percentage": 27.69, "elapsed_time": "1:12:48", "remaining_time": "3:10:07", "throughput": 8744.82, "total_tokens": 38201760} +{"current_steps": 56680, "total_steps": 204665, "loss": 0.0694, "lr": 1.815262206849108e-06, "epoch": 1.384701829819461, "percentage": 27.69, "elapsed_time": "1:12:48", "remaining_time": "3:10:06", "throughput": 8744.89, "total_tokens": 38205088} +{"current_steps": 56685, "total_steps": 204665, "loss": 0.0993, "lr": 1.8152128203098943e-06, "epoch": 1.3848239806513083, "percentage": 27.7, "elapsed_time": "1:12:49", "remaining_time": "3:10:06", "throughput": 8745.01, "total_tokens": 38208736} +{"current_steps": 56690, "total_steps": 204665, "loss": 0.0757, "lr": 1.815163427842234e-06, "epoch": 1.3849461314831553, "percentage": 27.7, "elapsed_time": "1:12:49", "remaining_time": "3:10:05", "throughput": 8745.09, "total_tokens": 38212128} +{"current_steps": 56695, "total_steps": 204665, "loss": 0.114, "lr": 1.8151140294464858e-06, "epoch": 1.3850682823150025, "percentage": 27.7, "elapsed_time": "1:12:49", "remaining_time": "3:10:05", "throughput": 8745.14, "total_tokens": 38215392} +{"current_steps": 56700, "total_steps": 204665, "loss": 0.0642, "lr": 1.8150646251230092e-06, "epoch": 1.3851904331468496, "percentage": 27.7, "elapsed_time": "1:12:50", "remaining_time": "3:10:04", "throughput": 8745.16, "total_tokens": 38218464} +{"current_steps": 56705, "total_steps": 204665, "loss": 0.1127, "lr": 1.8150152148721637e-06, "epoch": 1.3853125839786968, "percentage": 27.71, "elapsed_time": "1:12:50", "remaining_time": "3:10:04", "throughput": 8745.22, "total_tokens": 38221728} +{"current_steps": 56710, "total_steps": 204665, "loss": 0.0424, "lr": 1.8149657986943078e-06, "epoch": 1.385434734810544, "percentage": 27.71, "elapsed_time": "1:12:50", "remaining_time": "3:10:03", "throughput": 8745.3, "total_tokens": 38225120} +{"current_steps": 56715, "total_steps": 204665, "loss": 0.0771, "lr": 1.8149163765898016e-06, "epoch": 1.3855568856423912, "percentage": 27.71, "elapsed_time": "1:12:51", "remaining_time": "3:10:03", "throughput": 8745.33, "total_tokens": 38228256} +{"current_steps": 56720, "total_steps": 204665, "loss": 0.0552, "lr": 1.8148669485590044e-06, "epoch": 1.3856790364742384, "percentage": 27.71, "elapsed_time": "1:12:51", "remaining_time": "3:10:02", "throughput": 8745.52, "total_tokens": 38232224} +{"current_steps": 56725, "total_steps": 204665, "loss": 0.0013, "lr": 1.8148175146022758e-06, "epoch": 1.3858011873060856, "percentage": 27.72, "elapsed_time": "1:12:51", "remaining_time": "3:10:02", "throughput": 8745.55, "total_tokens": 38235424} +{"current_steps": 56730, "total_steps": 204665, "loss": 0.0982, "lr": 1.8147680747199748e-06, "epoch": 1.3859233381379328, "percentage": 27.72, "elapsed_time": "1:12:52", "remaining_time": "3:10:01", "throughput": 8745.63, "total_tokens": 38238816} +{"current_steps": 56735, "total_steps": 204665, "loss": 0.0017, "lr": 1.8147186289124611e-06, "epoch": 1.38604548896978, "percentage": 27.72, "elapsed_time": "1:12:52", "remaining_time": "3:10:01", "throughput": 8745.89, "total_tokens": 38243168} +{"current_steps": 56740, "total_steps": 204665, "loss": 0.045, "lr": 1.8146691771800945e-06, "epoch": 1.3861676398016272, "percentage": 27.72, "elapsed_time": "1:12:53", "remaining_time": "3:10:00", "throughput": 8745.93, "total_tokens": 38246368} +{"current_steps": 56745, "total_steps": 204665, "loss": 0.0683, "lr": 1.8146197195232347e-06, "epoch": 1.3862897906334744, "percentage": 27.73, "elapsed_time": "1:12:53", "remaining_time": "3:10:00", "throughput": 8745.98, "total_tokens": 38249568} +{"current_steps": 56750, "total_steps": 204665, "loss": 0.0041, "lr": 1.814570255942241e-06, "epoch": 1.3864119414653213, "percentage": 27.73, "elapsed_time": "1:12:53", "remaining_time": "3:09:59", "throughput": 8746.06, "total_tokens": 38252960} +{"current_steps": 56755, "total_steps": 204665, "loss": 0.0906, "lr": 1.8145207864374734e-06, "epoch": 1.3865340922971685, "percentage": 27.73, "elapsed_time": "1:12:54", "remaining_time": "3:09:59", "throughput": 8746.16, "total_tokens": 38256480} +{"current_steps": 56760, "total_steps": 204665, "loss": 0.0711, "lr": 1.8144713110092915e-06, "epoch": 1.3866562431290157, "percentage": 27.73, "elapsed_time": "1:12:54", "remaining_time": "3:09:58", "throughput": 8746.27, "total_tokens": 38260064} +{"current_steps": 56765, "total_steps": 204665, "loss": 0.0695, "lr": 1.8144218296580553e-06, "epoch": 1.3867783939608629, "percentage": 27.74, "elapsed_time": "1:12:54", "remaining_time": "3:09:58", "throughput": 8746.31, "total_tokens": 38263328} +{"current_steps": 56770, "total_steps": 204665, "loss": 0.023, "lr": 1.8143723423841241e-06, "epoch": 1.38690054479271, "percentage": 27.74, "elapsed_time": "1:12:55", "remaining_time": "3:09:57", "throughput": 8746.45, "total_tokens": 38267040} +{"current_steps": 56775, "total_steps": 204665, "loss": 0.0352, "lr": 1.814322849187859e-06, "epoch": 1.3870226956245573, "percentage": 27.74, "elapsed_time": "1:12:55", "remaining_time": "3:09:57", "throughput": 8746.52, "total_tokens": 38270368} +{"current_steps": 56780, "total_steps": 204665, "loss": 0.0862, "lr": 1.814273350069618e-06, "epoch": 1.3871448464564042, "percentage": 27.74, "elapsed_time": "1:12:55", "remaining_time": "3:09:57", "throughput": 8746.57, "total_tokens": 38273632} +{"current_steps": 56785, "total_steps": 204665, "loss": 0.1411, "lr": 1.8142238450297632e-06, "epoch": 1.3872669972882514, "percentage": 27.75, "elapsed_time": "1:12:56", "remaining_time": "3:09:56", "throughput": 8746.6, "total_tokens": 38276768} +{"current_steps": 56790, "total_steps": 204665, "loss": 0.0709, "lr": 1.814174334068653e-06, "epoch": 1.3873891481200986, "percentage": 27.75, "elapsed_time": "1:12:56", "remaining_time": "3:09:56", "throughput": 8746.65, "total_tokens": 38279968} +{"current_steps": 56795, "total_steps": 204665, "loss": 0.0428, "lr": 1.8141248171866482e-06, "epoch": 1.3875112989519458, "percentage": 27.75, "elapsed_time": "1:12:56", "remaining_time": "3:09:55", "throughput": 8746.74, "total_tokens": 38283424} +{"current_steps": 56800, "total_steps": 204665, "loss": 0.181, "lr": 1.814075294384109e-06, "epoch": 1.387633449783793, "percentage": 27.75, "elapsed_time": "1:12:57", "remaining_time": "3:09:55", "throughput": 8746.83, "total_tokens": 38286880} +{"current_steps": 56805, "total_steps": 204665, "loss": 0.1029, "lr": 1.8140257656613952e-06, "epoch": 1.3877556006156402, "percentage": 27.76, "elapsed_time": "1:12:57", "remaining_time": "3:09:54", "throughput": 8746.89, "total_tokens": 38290208} +{"current_steps": 56810, "total_steps": 204665, "loss": 0.0015, "lr": 1.8139762310188666e-06, "epoch": 1.3878777514474874, "percentage": 27.76, "elapsed_time": "1:12:57", "remaining_time": "3:09:54", "throughput": 8746.97, "total_tokens": 38293600} +{"current_steps": 56815, "total_steps": 204665, "loss": 0.0907, "lr": 1.8139266904568844e-06, "epoch": 1.3879999022793346, "percentage": 27.76, "elapsed_time": "1:12:58", "remaining_time": "3:09:53", "throughput": 8747.08, "total_tokens": 38297184} +{"current_steps": 56820, "total_steps": 204665, "loss": 0.0459, "lr": 1.8138771439758083e-06, "epoch": 1.3881220531111818, "percentage": 27.76, "elapsed_time": "1:12:58", "remaining_time": "3:09:53", "throughput": 8747.14, "total_tokens": 38300448} +{"current_steps": 56825, "total_steps": 204665, "loss": 0.0013, "lr": 1.8138275915759986e-06, "epoch": 1.388244203943029, "percentage": 27.76, "elapsed_time": "1:12:58", "remaining_time": "3:09:52", "throughput": 8747.19, "total_tokens": 38303712} +{"current_steps": 56830, "total_steps": 204665, "loss": 0.1807, "lr": 1.8137780332578158e-06, "epoch": 1.3883663547748761, "percentage": 27.77, "elapsed_time": "1:12:59", "remaining_time": "3:09:52", "throughput": 8747.3, "total_tokens": 38307296} +{"current_steps": 56835, "total_steps": 204665, "loss": 0.2124, "lr": 1.8137284690216204e-06, "epoch": 1.388488505606723, "percentage": 27.77, "elapsed_time": "1:12:59", "remaining_time": "3:09:51", "throughput": 8747.35, "total_tokens": 38310496} +{"current_steps": 56840, "total_steps": 204665, "loss": 0.099, "lr": 1.8136788988677725e-06, "epoch": 1.3886106564385703, "percentage": 27.77, "elapsed_time": "1:13:00", "remaining_time": "3:09:51", "throughput": 8747.43, "total_tokens": 38313888} +{"current_steps": 56845, "total_steps": 204665, "loss": 0.0729, "lr": 1.813629322796633e-06, "epoch": 1.3887328072704175, "percentage": 27.77, "elapsed_time": "1:13:00", "remaining_time": "3:09:50", "throughput": 8747.55, "total_tokens": 38317536} +{"current_steps": 56850, "total_steps": 204665, "loss": 0.1225, "lr": 1.8135797408085623e-06, "epoch": 1.3888549581022647, "percentage": 27.78, "elapsed_time": "1:13:00", "remaining_time": "3:09:50", "throughput": 8747.66, "total_tokens": 38321120} +{"current_steps": 56855, "total_steps": 204665, "loss": 0.0755, "lr": 1.8135301529039207e-06, "epoch": 1.3889771089341119, "percentage": 27.78, "elapsed_time": "1:13:01", "remaining_time": "3:09:49", "throughput": 8747.7, "total_tokens": 38324320} +{"current_steps": 56860, "total_steps": 204665, "loss": 0.0337, "lr": 1.813480559083069e-06, "epoch": 1.389099259765959, "percentage": 27.78, "elapsed_time": "1:13:01", "remaining_time": "3:09:49", "throughput": 8747.8, "total_tokens": 38327840} +{"current_steps": 56865, "total_steps": 204665, "loss": 0.1575, "lr": 1.813430959346368e-06, "epoch": 1.3892214105978062, "percentage": 27.78, "elapsed_time": "1:13:01", "remaining_time": "3:09:48", "throughput": 8747.85, "total_tokens": 38331104} +{"current_steps": 56870, "total_steps": 204665, "loss": 0.1537, "lr": 1.813381353694178e-06, "epoch": 1.3893435614296532, "percentage": 27.79, "elapsed_time": "1:13:02", "remaining_time": "3:09:48", "throughput": 8747.92, "total_tokens": 38334432} +{"current_steps": 56875, "total_steps": 204665, "loss": 0.0716, "lr": 1.8133317421268601e-06, "epoch": 1.3894657122615004, "percentage": 27.79, "elapsed_time": "1:13:02", "remaining_time": "3:09:47", "throughput": 8748.03, "total_tokens": 38338016} +{"current_steps": 56880, "total_steps": 204665, "loss": 0.0679, "lr": 1.8132821246447753e-06, "epoch": 1.3895878630933476, "percentage": 27.79, "elapsed_time": "1:13:02", "remaining_time": "3:09:47", "throughput": 8748.1, "total_tokens": 38341344} +{"current_steps": 56885, "total_steps": 204665, "loss": 0.1301, "lr": 1.813232501248284e-06, "epoch": 1.3897100139251948, "percentage": 27.79, "elapsed_time": "1:13:03", "remaining_time": "3:09:46", "throughput": 8748.21, "total_tokens": 38344928} +{"current_steps": 56890, "total_steps": 204665, "loss": 0.0464, "lr": 1.813182871937747e-06, "epoch": 1.389832164757042, "percentage": 27.8, "elapsed_time": "1:13:03", "remaining_time": "3:09:46", "throughput": 8748.26, "total_tokens": 38348128} +{"current_steps": 56895, "total_steps": 204665, "loss": 0.1033, "lr": 1.8131332367135256e-06, "epoch": 1.3899543155888892, "percentage": 27.8, "elapsed_time": "1:13:03", "remaining_time": "3:09:45", "throughput": 8748.32, "total_tokens": 38351392} +{"current_steps": 56900, "total_steps": 204665, "loss": 0.0761, "lr": 1.8130835955759807e-06, "epoch": 1.3900764664207363, "percentage": 27.8, "elapsed_time": "1:13:04", "remaining_time": "3:09:45", "throughput": 8748.29, "total_tokens": 38354208} +{"current_steps": 56905, "total_steps": 204665, "loss": 0.0616, "lr": 1.8130339485254731e-06, "epoch": 1.3901986172525835, "percentage": 27.8, "elapsed_time": "1:13:04", "remaining_time": "3:09:44", "throughput": 8748.39, "total_tokens": 38357728} +{"current_steps": 56910, "total_steps": 204665, "loss": 0.0903, "lr": 1.812984295562364e-06, "epoch": 1.3903207680844307, "percentage": 27.81, "elapsed_time": "1:13:04", "remaining_time": "3:09:44", "throughput": 8748.46, "total_tokens": 38361056} +{"current_steps": 56915, "total_steps": 204665, "loss": 0.0795, "lr": 1.8129346366870143e-06, "epoch": 1.390442918916278, "percentage": 27.81, "elapsed_time": "1:13:05", "remaining_time": "3:09:43", "throughput": 8748.5, "total_tokens": 38364256} +{"current_steps": 56920, "total_steps": 204665, "loss": 0.0247, "lr": 1.8128849718997854e-06, "epoch": 1.390565069748125, "percentage": 27.81, "elapsed_time": "1:13:05", "remaining_time": "3:09:43", "throughput": 8748.62, "total_tokens": 38367840} +{"current_steps": 56925, "total_steps": 204665, "loss": 0.035, "lr": 1.8128353012010385e-06, "epoch": 1.390687220579972, "percentage": 27.81, "elapsed_time": "1:13:05", "remaining_time": "3:09:43", "throughput": 8748.78, "total_tokens": 38371680} +{"current_steps": 56930, "total_steps": 204665, "loss": 0.1329, "lr": 1.8127856245911343e-06, "epoch": 1.3908093714118193, "percentage": 27.82, "elapsed_time": "1:13:06", "remaining_time": "3:09:42", "throughput": 8748.84, "total_tokens": 38374944} +{"current_steps": 56935, "total_steps": 204665, "loss": 0.0395, "lr": 1.8127359420704344e-06, "epoch": 1.3909315222436665, "percentage": 27.82, "elapsed_time": "1:13:06", "remaining_time": "3:09:42", "throughput": 8748.91, "total_tokens": 38378336} +{"current_steps": 56940, "total_steps": 204665, "loss": 0.0486, "lr": 1.8126862536393005e-06, "epoch": 1.3910536730755136, "percentage": 27.82, "elapsed_time": "1:13:06", "remaining_time": "3:09:41", "throughput": 8748.98, "total_tokens": 38381664} +{"current_steps": 56945, "total_steps": 204665, "loss": 0.0424, "lr": 1.8126365592980935e-06, "epoch": 1.3911758239073608, "percentage": 27.82, "elapsed_time": "1:13:07", "remaining_time": "3:09:41", "throughput": 8749.05, "total_tokens": 38385056} +{"current_steps": 56950, "total_steps": 204665, "loss": 0.1111, "lr": 1.8125868590471748e-06, "epoch": 1.391297974739208, "percentage": 27.83, "elapsed_time": "1:13:07", "remaining_time": "3:09:40", "throughput": 8749.15, "total_tokens": 38388512} +{"current_steps": 56955, "total_steps": 204665, "loss": 0.003, "lr": 1.8125371528869059e-06, "epoch": 1.3914201255710552, "percentage": 27.83, "elapsed_time": "1:13:08", "remaining_time": "3:09:40", "throughput": 8749.19, "total_tokens": 38391712} +{"current_steps": 56960, "total_steps": 204665, "loss": 0.154, "lr": 1.812487440817648e-06, "epoch": 1.3915422764029022, "percentage": 27.83, "elapsed_time": "1:13:08", "remaining_time": "3:09:39", "throughput": 8749.25, "total_tokens": 38395040} +{"current_steps": 56965, "total_steps": 204665, "loss": 0.0939, "lr": 1.8124377228397631e-06, "epoch": 1.3916644272347494, "percentage": 27.83, "elapsed_time": "1:13:08", "remaining_time": "3:09:39", "throughput": 8749.26, "total_tokens": 38398048} +{"current_steps": 56970, "total_steps": 204665, "loss": 0.1104, "lr": 1.8123879989536129e-06, "epoch": 1.3917865780665966, "percentage": 27.84, "elapsed_time": "1:13:09", "remaining_time": "3:09:38", "throughput": 8749.29, "total_tokens": 38401248} +{"current_steps": 56975, "total_steps": 204665, "loss": 0.0908, "lr": 1.8123382691595581e-06, "epoch": 1.3919087288984437, "percentage": 27.84, "elapsed_time": "1:13:09", "remaining_time": "3:09:38", "throughput": 8749.4, "total_tokens": 38404832} +{"current_steps": 56980, "total_steps": 204665, "loss": 0.0896, "lr": 1.8122885334579615e-06, "epoch": 1.392030879730291, "percentage": 27.84, "elapsed_time": "1:13:09", "remaining_time": "3:09:37", "throughput": 8749.45, "total_tokens": 38408096} +{"current_steps": 56985, "total_steps": 204665, "loss": 0.1319, "lr": 1.8122387918491838e-06, "epoch": 1.3921530305621381, "percentage": 27.84, "elapsed_time": "1:13:10", "remaining_time": "3:09:37", "throughput": 8749.64, "total_tokens": 38412128} +{"current_steps": 56990, "total_steps": 204665, "loss": 0.1685, "lr": 1.8121890443335873e-06, "epoch": 1.3922751813939853, "percentage": 27.85, "elapsed_time": "1:13:10", "remaining_time": "3:09:36", "throughput": 8749.72, "total_tokens": 38415520} +{"current_steps": 56995, "total_steps": 204665, "loss": 0.0021, "lr": 1.8121392909115334e-06, "epoch": 1.3923973322258325, "percentage": 27.85, "elapsed_time": "1:13:10", "remaining_time": "3:09:36", "throughput": 8749.89, "total_tokens": 38419424} +{"current_steps": 57000, "total_steps": 204665, "loss": 0.1008, "lr": 1.8120895315833842e-06, "epoch": 1.3925194830576797, "percentage": 27.85, "elapsed_time": "1:13:11", "remaining_time": "3:09:35", "throughput": 8750.07, "total_tokens": 38423392} +{"current_steps": 57005, "total_steps": 204665, "loss": 0.151, "lr": 1.8120397663495015e-06, "epoch": 1.3926416338895269, "percentage": 27.85, "elapsed_time": "1:13:11", "remaining_time": "3:09:35", "throughput": 8750.21, "total_tokens": 38427104} +{"current_steps": 57010, "total_steps": 204665, "loss": 0.0376, "lr": 1.8119899952102476e-06, "epoch": 1.392763784721374, "percentage": 27.86, "elapsed_time": "1:13:11", "remaining_time": "3:09:34", "throughput": 8750.32, "total_tokens": 38430688} +{"current_steps": 57015, "total_steps": 204665, "loss": 0.0085, "lr": 1.8119402181659837e-06, "epoch": 1.392885935553221, "percentage": 27.86, "elapsed_time": "1:13:12", "remaining_time": "3:09:34", "throughput": 8750.46, "total_tokens": 38434400} +{"current_steps": 57020, "total_steps": 204665, "loss": 0.0804, "lr": 1.811890435217072e-06, "epoch": 1.3930080863850682, "percentage": 27.86, "elapsed_time": "1:13:12", "remaining_time": "3:09:34", "throughput": 8750.55, "total_tokens": 38437920} +{"current_steps": 57025, "total_steps": 204665, "loss": 0.0872, "lr": 1.811840646363875e-06, "epoch": 1.3931302372169154, "percentage": 27.86, "elapsed_time": "1:13:12", "remaining_time": "3:09:33", "throughput": 8750.62, "total_tokens": 38441248} +{"current_steps": 57030, "total_steps": 204665, "loss": 0.221, "lr": 1.8117908516067542e-06, "epoch": 1.3932523880487626, "percentage": 27.87, "elapsed_time": "1:13:13", "remaining_time": "3:09:33", "throughput": 8750.74, "total_tokens": 38444896} +{"current_steps": 57035, "total_steps": 204665, "loss": 0.0527, "lr": 1.8117410509460723e-06, "epoch": 1.3933745388806098, "percentage": 27.87, "elapsed_time": "1:13:13", "remaining_time": "3:09:32", "throughput": 8750.78, "total_tokens": 38448096} +{"current_steps": 57040, "total_steps": 204665, "loss": 0.059, "lr": 1.811691244382191e-06, "epoch": 1.393496689712457, "percentage": 27.87, "elapsed_time": "1:13:14", "remaining_time": "3:09:32", "throughput": 8750.88, "total_tokens": 38451616} +{"current_steps": 57045, "total_steps": 204665, "loss": 0.0346, "lr": 1.8116414319154726e-06, "epoch": 1.3936188405443042, "percentage": 27.87, "elapsed_time": "1:13:14", "remaining_time": "3:09:31", "throughput": 8751.03, "total_tokens": 38455392} +{"current_steps": 57050, "total_steps": 204665, "loss": 0.0558, "lr": 1.8115916135462794e-06, "epoch": 1.3937409913761512, "percentage": 27.87, "elapsed_time": "1:13:14", "remaining_time": "3:09:31", "throughput": 8751.1, "total_tokens": 38458720} +{"current_steps": 57055, "total_steps": 204665, "loss": 0.1203, "lr": 1.8115417892749738e-06, "epoch": 1.3938631422079983, "percentage": 27.88, "elapsed_time": "1:13:15", "remaining_time": "3:09:30", "throughput": 8751.13, "total_tokens": 38461856} +{"current_steps": 57060, "total_steps": 204665, "loss": 0.137, "lr": 1.811491959101918e-06, "epoch": 1.3939852930398455, "percentage": 27.88, "elapsed_time": "1:13:15", "remaining_time": "3:09:30", "throughput": 8751.17, "total_tokens": 38465056} +{"current_steps": 57065, "total_steps": 204665, "loss": 0.0427, "lr": 1.8114421230274743e-06, "epoch": 1.3941074438716927, "percentage": 27.88, "elapsed_time": "1:13:15", "remaining_time": "3:09:29", "throughput": 8751.23, "total_tokens": 38468320} +{"current_steps": 57070, "total_steps": 204665, "loss": 0.047, "lr": 1.8113922810520053e-06, "epoch": 1.39422959470354, "percentage": 27.88, "elapsed_time": "1:13:16", "remaining_time": "3:09:29", "throughput": 8751.28, "total_tokens": 38471520} +{"current_steps": 57075, "total_steps": 204665, "loss": 0.085, "lr": 1.811342433175873e-06, "epoch": 1.394351745535387, "percentage": 27.89, "elapsed_time": "1:13:16", "remaining_time": "3:09:28", "throughput": 8751.32, "total_tokens": 38474720} +{"current_steps": 57080, "total_steps": 204665, "loss": 0.0732, "lr": 1.8112925793994408e-06, "epoch": 1.3944738963672343, "percentage": 27.89, "elapsed_time": "1:13:16", "remaining_time": "3:09:28", "throughput": 8751.35, "total_tokens": 38477856} +{"current_steps": 57085, "total_steps": 204665, "loss": 0.1524, "lr": 1.811242719723071e-06, "epoch": 1.3945960471990815, "percentage": 27.89, "elapsed_time": "1:13:17", "remaining_time": "3:09:27", "throughput": 8751.4, "total_tokens": 38481056} +{"current_steps": 57090, "total_steps": 204665, "loss": 0.0897, "lr": 1.8111928541471254e-06, "epoch": 1.3947181980309287, "percentage": 27.89, "elapsed_time": "1:13:17", "remaining_time": "3:09:27", "throughput": 8751.45, "total_tokens": 38484256} +{"current_steps": 57095, "total_steps": 204665, "loss": 0.0368, "lr": 1.8111429826719673e-06, "epoch": 1.3948403488627759, "percentage": 27.9, "elapsed_time": "1:13:17", "remaining_time": "3:09:26", "throughput": 8751.51, "total_tokens": 38487520} +{"current_steps": 57100, "total_steps": 204665, "loss": 0.1924, "lr": 1.8110931052979593e-06, "epoch": 1.394962499694623, "percentage": 27.9, "elapsed_time": "1:13:18", "remaining_time": "3:09:26", "throughput": 8751.55, "total_tokens": 38490720} +{"current_steps": 57105, "total_steps": 204665, "loss": 0.2088, "lr": 1.8110432220254641e-06, "epoch": 1.39508465052647, "percentage": 27.9, "elapsed_time": "1:13:18", "remaining_time": "3:09:25", "throughput": 8751.69, "total_tokens": 38494432} +{"current_steps": 57110, "total_steps": 204665, "loss": 0.0304, "lr": 1.8109933328548443e-06, "epoch": 1.3952068013583172, "percentage": 27.9, "elapsed_time": "1:13:18", "remaining_time": "3:09:25", "throughput": 8751.77, "total_tokens": 38497824} +{"current_steps": 57115, "total_steps": 204665, "loss": 0.0008, "lr": 1.8109434377864631e-06, "epoch": 1.3953289521901644, "percentage": 27.91, "elapsed_time": "1:13:19", "remaining_time": "3:09:24", "throughput": 8751.78, "total_tokens": 38500896} +{"current_steps": 57120, "total_steps": 204665, "loss": 0.1938, "lr": 1.810893536820683e-06, "epoch": 1.3954511030220116, "percentage": 27.91, "elapsed_time": "1:13:19", "remaining_time": "3:09:24", "throughput": 8751.77, "total_tokens": 38503776} +{"current_steps": 57125, "total_steps": 204665, "loss": 0.0298, "lr": 1.8108436299578669e-06, "epoch": 1.3955732538538588, "percentage": 27.91, "elapsed_time": "1:13:19", "remaining_time": "3:09:23", "throughput": 8751.86, "total_tokens": 38507232} +{"current_steps": 57130, "total_steps": 204665, "loss": 0.0875, "lr": 1.810793717198378e-06, "epoch": 1.395695404685706, "percentage": 27.91, "elapsed_time": "1:13:20", "remaining_time": "3:09:23", "throughput": 8751.99, "total_tokens": 38510944} +{"current_steps": 57135, "total_steps": 204665, "loss": 0.0342, "lr": 1.8107437985425792e-06, "epoch": 1.3958175555175532, "percentage": 27.92, "elapsed_time": "1:13:20", "remaining_time": "3:09:22", "throughput": 8752.05, "total_tokens": 38514208} +{"current_steps": 57140, "total_steps": 204665, "loss": 0.1204, "lr": 1.810693873990833e-06, "epoch": 1.3959397063494001, "percentage": 27.92, "elapsed_time": "1:13:20", "remaining_time": "3:09:22", "throughput": 8752.15, "total_tokens": 38517728} +{"current_steps": 57145, "total_steps": 204665, "loss": 0.1372, "lr": 1.8106439435435035e-06, "epoch": 1.3960618571812473, "percentage": 27.92, "elapsed_time": "1:13:21", "remaining_time": "3:09:21", "throughput": 8752.24, "total_tokens": 38521120} +{"current_steps": 57150, "total_steps": 204665, "loss": 0.002, "lr": 1.8105940072009527e-06, "epoch": 1.3961840080130945, "percentage": 27.92, "elapsed_time": "1:13:21", "remaining_time": "3:09:21", "throughput": 8752.28, "total_tokens": 38524320} +{"current_steps": 57155, "total_steps": 204665, "loss": 0.1249, "lr": 1.8105440649635445e-06, "epoch": 1.3963061588449417, "percentage": 27.93, "elapsed_time": "1:13:21", "remaining_time": "3:09:21", "throughput": 8752.48, "total_tokens": 38528352} +{"current_steps": 57160, "total_steps": 204665, "loss": 0.1006, "lr": 1.8104941168316416e-06, "epoch": 1.3964283096767889, "percentage": 27.93, "elapsed_time": "1:13:22", "remaining_time": "3:09:20", "throughput": 8752.47, "total_tokens": 38531296} +{"current_steps": 57165, "total_steps": 204665, "loss": 0.0017, "lr": 1.810444162805608e-06, "epoch": 1.396550460508636, "percentage": 27.93, "elapsed_time": "1:13:22", "remaining_time": "3:09:20", "throughput": 8752.6, "total_tokens": 38534944} +{"current_steps": 57170, "total_steps": 204665, "loss": 0.1503, "lr": 1.8103942028858059e-06, "epoch": 1.3966726113404833, "percentage": 27.93, "elapsed_time": "1:13:23", "remaining_time": "3:09:19", "throughput": 8752.75, "total_tokens": 38538720} +{"current_steps": 57175, "total_steps": 204665, "loss": 0.0017, "lr": 1.8103442370725995e-06, "epoch": 1.3967947621723305, "percentage": 27.94, "elapsed_time": "1:13:23", "remaining_time": "3:09:19", "throughput": 8752.8, "total_tokens": 38541984} +{"current_steps": 57180, "total_steps": 204665, "loss": 0.0414, "lr": 1.8102942653663518e-06, "epoch": 1.3969169130041776, "percentage": 27.94, "elapsed_time": "1:13:23", "remaining_time": "3:09:18", "throughput": 8753.02, "total_tokens": 38546144} +{"current_steps": 57185, "total_steps": 204665, "loss": 0.1137, "lr": 1.8102442877674261e-06, "epoch": 1.3970390638360248, "percentage": 27.94, "elapsed_time": "1:13:24", "remaining_time": "3:09:18", "throughput": 8753.12, "total_tokens": 38549600} +{"current_steps": 57190, "total_steps": 204665, "loss": 0.0352, "lr": 1.810194304276186e-06, "epoch": 1.397161214667872, "percentage": 27.94, "elapsed_time": "1:13:24", "remaining_time": "3:09:17", "throughput": 8753.18, "total_tokens": 38552928} +{"current_steps": 57195, "total_steps": 204665, "loss": 0.0021, "lr": 1.8101443148929954e-06, "epoch": 1.397283365499719, "percentage": 27.95, "elapsed_time": "1:13:24", "remaining_time": "3:09:17", "throughput": 8753.26, "total_tokens": 38556256} +{"current_steps": 57200, "total_steps": 204665, "loss": 0.0396, "lr": 1.810094319618217e-06, "epoch": 1.3974055163315662, "percentage": 27.95, "elapsed_time": "1:13:25", "remaining_time": "3:09:16", "throughput": 8753.33, "total_tokens": 38559648} +{"current_steps": 57205, "total_steps": 204665, "loss": 0.0853, "lr": 1.810044318452215e-06, "epoch": 1.3975276671634134, "percentage": 27.95, "elapsed_time": "1:13:25", "remaining_time": "3:09:16", "throughput": 8753.37, "total_tokens": 38562784} +{"current_steps": 57210, "total_steps": 204665, "loss": 0.049, "lr": 1.8099943113953529e-06, "epoch": 1.3976498179952606, "percentage": 27.95, "elapsed_time": "1:13:25", "remaining_time": "3:09:15", "throughput": 8753.48, "total_tokens": 38566368} +{"current_steps": 57215, "total_steps": 204665, "loss": 0.1038, "lr": 1.8099442984479942e-06, "epoch": 1.3977719688271077, "percentage": 27.96, "elapsed_time": "1:13:26", "remaining_time": "3:09:15", "throughput": 8753.57, "total_tokens": 38569824} +{"current_steps": 57220, "total_steps": 204665, "loss": 0.0255, "lr": 1.8098942796105027e-06, "epoch": 1.397894119658955, "percentage": 27.96, "elapsed_time": "1:13:26", "remaining_time": "3:09:14", "throughput": 8753.68, "total_tokens": 38573408} +{"current_steps": 57225, "total_steps": 204665, "loss": 0.1684, "lr": 1.8098442548832424e-06, "epoch": 1.398016270490802, "percentage": 27.96, "elapsed_time": "1:13:26", "remaining_time": "3:09:14", "throughput": 8753.77, "total_tokens": 38576864} +{"current_steps": 57230, "total_steps": 204665, "loss": 0.2394, "lr": 1.8097942242665765e-06, "epoch": 1.398138421322649, "percentage": 27.96, "elapsed_time": "1:13:27", "remaining_time": "3:09:13", "throughput": 8753.87, "total_tokens": 38580384} +{"current_steps": 57235, "total_steps": 204665, "loss": 0.0485, "lr": 1.8097441877608695e-06, "epoch": 1.3982605721544963, "percentage": 27.97, "elapsed_time": "1:13:27", "remaining_time": "3:09:13", "throughput": 8753.92, "total_tokens": 38583648} +{"current_steps": 57240, "total_steps": 204665, "loss": 0.0928, "lr": 1.809694145366485e-06, "epoch": 1.3983827229863435, "percentage": 27.97, "elapsed_time": "1:13:27", "remaining_time": "3:09:12", "throughput": 8754.13, "total_tokens": 38587744} +{"current_steps": 57245, "total_steps": 204665, "loss": 0.0088, "lr": 1.8096440970837866e-06, "epoch": 1.3985048738181907, "percentage": 27.97, "elapsed_time": "1:13:28", "remaining_time": "3:09:12", "throughput": 8754.28, "total_tokens": 38591520} +{"current_steps": 57250, "total_steps": 204665, "loss": 0.1195, "lr": 1.8095940429131386e-06, "epoch": 1.3986270246500379, "percentage": 27.97, "elapsed_time": "1:13:28", "remaining_time": "3:09:11", "throughput": 8754.33, "total_tokens": 38594784} +{"current_steps": 57255, "total_steps": 204665, "loss": 0.137, "lr": 1.8095439828549051e-06, "epoch": 1.398749175481885, "percentage": 27.97, "elapsed_time": "1:13:29", "remaining_time": "3:09:11", "throughput": 8754.44, "total_tokens": 38598368} +{"current_steps": 57260, "total_steps": 204665, "loss": 0.1184, "lr": 1.80949391690945e-06, "epoch": 1.3988713263137322, "percentage": 27.98, "elapsed_time": "1:13:29", "remaining_time": "3:09:11", "throughput": 8754.54, "total_tokens": 38601888} +{"current_steps": 57265, "total_steps": 204665, "loss": 0.0366, "lr": 1.8094438450771375e-06, "epoch": 1.3989934771455794, "percentage": 27.98, "elapsed_time": "1:13:29", "remaining_time": "3:09:10", "throughput": 8754.64, "total_tokens": 38605408} +{"current_steps": 57270, "total_steps": 204665, "loss": 0.1262, "lr": 1.8093937673583315e-06, "epoch": 1.3991156279774266, "percentage": 27.98, "elapsed_time": "1:13:30", "remaining_time": "3:09:10", "throughput": 8754.72, "total_tokens": 38608800} +{"current_steps": 57275, "total_steps": 204665, "loss": 0.1281, "lr": 1.8093436837533961e-06, "epoch": 1.3992377788092738, "percentage": 27.98, "elapsed_time": "1:13:30", "remaining_time": "3:09:09", "throughput": 8754.8, "total_tokens": 38612192} +{"current_steps": 57280, "total_steps": 204665, "loss": 0.0841, "lr": 1.809293594262696e-06, "epoch": 1.399359929641121, "percentage": 27.99, "elapsed_time": "1:13:30", "remaining_time": "3:09:09", "throughput": 8754.81, "total_tokens": 38615200} +{"current_steps": 57285, "total_steps": 204665, "loss": 0.1067, "lr": 1.8092434988865953e-06, "epoch": 1.399482080472968, "percentage": 27.99, "elapsed_time": "1:13:31", "remaining_time": "3:09:08", "throughput": 8754.82, "total_tokens": 38618208} +{"current_steps": 57290, "total_steps": 204665, "loss": 0.0774, "lr": 1.809193397625458e-06, "epoch": 1.3996042313048151, "percentage": 27.99, "elapsed_time": "1:13:31", "remaining_time": "3:09:08", "throughput": 8754.95, "total_tokens": 38621856} +{"current_steps": 57295, "total_steps": 204665, "loss": 0.0382, "lr": 1.8091432904796488e-06, "epoch": 1.3997263821366623, "percentage": 27.99, "elapsed_time": "1:13:31", "remaining_time": "3:09:07", "throughput": 8754.96, "total_tokens": 38624864} +{"current_steps": 57300, "total_steps": 204665, "loss": 0.1126, "lr": 1.8090931774495321e-06, "epoch": 1.3998485329685095, "percentage": 28.0, "elapsed_time": "1:13:32", "remaining_time": "3:09:07", "throughput": 8755.15, "total_tokens": 38628832} +{"current_steps": 57305, "total_steps": 204665, "loss": 0.0725, "lr": 1.8090430585354719e-06, "epoch": 1.3999706838003567, "percentage": 28.0, "elapsed_time": "1:13:32", "remaining_time": "3:09:06", "throughput": 8755.21, "total_tokens": 38632160} +{"current_steps": 57310, "total_steps": 204665, "loss": 0.112, "lr": 1.808992933737833e-06, "epoch": 1.400092834632204, "percentage": 28.0, "elapsed_time": "1:13:32", "remaining_time": "3:09:06", "throughput": 8755.27, "total_tokens": 38635488} +{"current_steps": 57315, "total_steps": 204665, "loss": 0.1026, "lr": 1.80894280305698e-06, "epoch": 1.4002149854640509, "percentage": 28.0, "elapsed_time": "1:13:33", "remaining_time": "3:09:05", "throughput": 8755.35, "total_tokens": 38638880} +{"current_steps": 57320, "total_steps": 204665, "loss": 0.0514, "lr": 1.8088926664932775e-06, "epoch": 1.400337136295898, "percentage": 28.01, "elapsed_time": "1:13:33", "remaining_time": "3:09:05", "throughput": 8755.44, "total_tokens": 38642336} +{"current_steps": 57325, "total_steps": 204665, "loss": 0.0423, "lr": 1.80884252404709e-06, "epoch": 1.4004592871277453, "percentage": 28.01, "elapsed_time": "1:13:33", "remaining_time": "3:09:04", "throughput": 8755.57, "total_tokens": 38645984} +{"current_steps": 57330, "total_steps": 204665, "loss": 0.0354, "lr": 1.8087923757187817e-06, "epoch": 1.4005814379595924, "percentage": 28.01, "elapsed_time": "1:13:34", "remaining_time": "3:09:04", "throughput": 8755.59, "total_tokens": 38649056} +{"current_steps": 57335, "total_steps": 204665, "loss": 0.1126, "lr": 1.808742221508718e-06, "epoch": 1.4007035887914396, "percentage": 28.01, "elapsed_time": "1:13:34", "remaining_time": "3:09:03", "throughput": 8755.7, "total_tokens": 38652640} +{"current_steps": 57340, "total_steps": 204665, "loss": 0.119, "lr": 1.8086920614172633e-06, "epoch": 1.4008257396232868, "percentage": 28.02, "elapsed_time": "1:13:34", "remaining_time": "3:09:03", "throughput": 8755.75, "total_tokens": 38655904} +{"current_steps": 57345, "total_steps": 204665, "loss": 0.0705, "lr": 1.8086418954447825e-06, "epoch": 1.400947890455134, "percentage": 28.02, "elapsed_time": "1:13:35", "remaining_time": "3:09:02", "throughput": 8755.82, "total_tokens": 38659232} +{"current_steps": 57350, "total_steps": 204665, "loss": 0.0021, "lr": 1.80859172359164e-06, "epoch": 1.4010700412869812, "percentage": 28.02, "elapsed_time": "1:13:35", "remaining_time": "3:09:02", "throughput": 8755.9, "total_tokens": 38662688} +{"current_steps": 57355, "total_steps": 204665, "loss": 0.0354, "lr": 1.8085415458582012e-06, "epoch": 1.4011921921188284, "percentage": 28.02, "elapsed_time": "1:13:35", "remaining_time": "3:09:01", "throughput": 8756.01, "total_tokens": 38666208} +{"current_steps": 57360, "total_steps": 204665, "loss": 0.0081, "lr": 1.808491362244831e-06, "epoch": 1.4013143429506756, "percentage": 28.03, "elapsed_time": "1:13:36", "remaining_time": "3:09:01", "throughput": 8756.09, "total_tokens": 38669664} +{"current_steps": 57365, "total_steps": 204665, "loss": 0.2416, "lr": 1.8084411727518938e-06, "epoch": 1.4014364937825228, "percentage": 28.03, "elapsed_time": "1:13:36", "remaining_time": "3:09:00", "throughput": 8756.1, "total_tokens": 38672672} +{"current_steps": 57370, "total_steps": 204665, "loss": 0.157, "lr": 1.8083909773797555e-06, "epoch": 1.4015586446143697, "percentage": 28.03, "elapsed_time": "1:13:37", "remaining_time": "3:09:00", "throughput": 8756.17, "total_tokens": 38676000} +{"current_steps": 57375, "total_steps": 204665, "loss": 0.0918, "lr": 1.8083407761287802e-06, "epoch": 1.401680795446217, "percentage": 28.03, "elapsed_time": "1:13:37", "remaining_time": "3:09:00", "throughput": 8756.34, "total_tokens": 38679904} +{"current_steps": 57380, "total_steps": 204665, "loss": 0.0186, "lr": 1.8082905689993333e-06, "epoch": 1.4018029462780641, "percentage": 28.04, "elapsed_time": "1:13:37", "remaining_time": "3:08:59", "throughput": 8756.43, "total_tokens": 38683360} +{"current_steps": 57385, "total_steps": 204665, "loss": 0.0348, "lr": 1.8082403559917801e-06, "epoch": 1.4019250971099113, "percentage": 28.04, "elapsed_time": "1:13:38", "remaining_time": "3:08:59", "throughput": 8756.51, "total_tokens": 38686752} +{"current_steps": 57390, "total_steps": 204665, "loss": 0.0465, "lr": 1.8081901371064854e-06, "epoch": 1.4020472479417585, "percentage": 28.04, "elapsed_time": "1:13:38", "remaining_time": "3:08:58", "throughput": 8756.74, "total_tokens": 38690976} +{"current_steps": 57395, "total_steps": 204665, "loss": 0.2443, "lr": 1.8081399123438147e-06, "epoch": 1.4021693987736057, "percentage": 28.04, "elapsed_time": "1:13:38", "remaining_time": "3:08:58", "throughput": 8756.75, "total_tokens": 38693984} +{"current_steps": 57400, "total_steps": 204665, "loss": 0.0703, "lr": 1.8080896817041337e-06, "epoch": 1.4022915496054529, "percentage": 28.05, "elapsed_time": "1:13:39", "remaining_time": "3:08:57", "throughput": 8756.83, "total_tokens": 38697376} +{"current_steps": 57405, "total_steps": 204665, "loss": 0.003, "lr": 1.8080394451878066e-06, "epoch": 1.4024137004372998, "percentage": 28.05, "elapsed_time": "1:13:39", "remaining_time": "3:08:57", "throughput": 8756.93, "total_tokens": 38700896} +{"current_steps": 57410, "total_steps": 204665, "loss": 0.1784, "lr": 1.8079892027951997e-06, "epoch": 1.402535851269147, "percentage": 28.05, "elapsed_time": "1:13:39", "remaining_time": "3:08:56", "throughput": 8757.04, "total_tokens": 38704480} +{"current_steps": 57415, "total_steps": 204665, "loss": 0.0667, "lr": 1.8079389545266776e-06, "epoch": 1.4026580021009942, "percentage": 28.05, "elapsed_time": "1:13:40", "remaining_time": "3:08:56", "throughput": 8757.08, "total_tokens": 38707680} +{"current_steps": 57420, "total_steps": 204665, "loss": 0.1021, "lr": 1.8078887003826067e-06, "epoch": 1.4027801529328414, "percentage": 28.06, "elapsed_time": "1:13:40", "remaining_time": "3:08:55", "throughput": 8757.1, "total_tokens": 38710752} +{"current_steps": 57425, "total_steps": 204665, "loss": 0.0199, "lr": 1.8078384403633513e-06, "epoch": 1.4029023037646886, "percentage": 28.06, "elapsed_time": "1:13:40", "remaining_time": "3:08:55", "throughput": 8757.11, "total_tokens": 38713760} +{"current_steps": 57430, "total_steps": 204665, "loss": 0.0928, "lr": 1.8077881744692778e-06, "epoch": 1.4030244545965358, "percentage": 28.06, "elapsed_time": "1:13:41", "remaining_time": "3:08:54", "throughput": 8757.21, "total_tokens": 38717280} +{"current_steps": 57435, "total_steps": 204665, "loss": 0.085, "lr": 1.8077379027007513e-06, "epoch": 1.403146605428383, "percentage": 28.06, "elapsed_time": "1:13:41", "remaining_time": "3:08:54", "throughput": 8757.33, "total_tokens": 38720928} +{"current_steps": 57440, "total_steps": 204665, "loss": 0.0015, "lr": 1.8076876250581376e-06, "epoch": 1.4032687562602302, "percentage": 28.07, "elapsed_time": "1:13:41", "remaining_time": "3:08:53", "throughput": 8757.41, "total_tokens": 38724320} +{"current_steps": 57445, "total_steps": 204665, "loss": 0.0119, "lr": 1.807637341541802e-06, "epoch": 1.4033909070920774, "percentage": 28.07, "elapsed_time": "1:13:42", "remaining_time": "3:08:53", "throughput": 8757.42, "total_tokens": 38727328} +{"current_steps": 57450, "total_steps": 204665, "loss": 0.0021, "lr": 1.807587052152111e-06, "epoch": 1.4035130579239246, "percentage": 28.07, "elapsed_time": "1:13:42", "remaining_time": "3:08:52", "throughput": 8757.41, "total_tokens": 38730208} +{"current_steps": 57455, "total_steps": 204665, "loss": 0.1549, "lr": 1.807536756889429e-06, "epoch": 1.4036352087557717, "percentage": 28.07, "elapsed_time": "1:13:42", "remaining_time": "3:08:52", "throughput": 8757.55, "total_tokens": 38733920} +{"current_steps": 57460, "total_steps": 204665, "loss": 0.0534, "lr": 1.807486455754123e-06, "epoch": 1.4037573595876187, "percentage": 28.08, "elapsed_time": "1:13:43", "remaining_time": "3:08:51", "throughput": 8757.59, "total_tokens": 38737120} +{"current_steps": 57465, "total_steps": 204665, "loss": 0.0009, "lr": 1.8074361487465582e-06, "epoch": 1.403879510419466, "percentage": 28.08, "elapsed_time": "1:13:43", "remaining_time": "3:08:51", "throughput": 8757.68, "total_tokens": 38740576} +{"current_steps": 57470, "total_steps": 204665, "loss": 0.0514, "lr": 1.8073858358671004e-06, "epoch": 1.404001661251313, "percentage": 28.08, "elapsed_time": "1:13:43", "remaining_time": "3:08:50", "throughput": 8757.71, "total_tokens": 38743712} +{"current_steps": 57475, "total_steps": 204665, "loss": 0.1495, "lr": 1.8073355171161157e-06, "epoch": 1.4041238120831603, "percentage": 28.08, "elapsed_time": "1:13:44", "remaining_time": "3:08:50", "throughput": 8757.79, "total_tokens": 38747104} +{"current_steps": 57480, "total_steps": 204665, "loss": 0.116, "lr": 1.8072851924939702e-06, "epoch": 1.4042459629150075, "percentage": 28.08, "elapsed_time": "1:13:44", "remaining_time": "3:08:49", "throughput": 8757.89, "total_tokens": 38750624} +{"current_steps": 57485, "total_steps": 204665, "loss": 0.1466, "lr": 1.8072348620010294e-06, "epoch": 1.4043681137468547, "percentage": 28.09, "elapsed_time": "1:13:44", "remaining_time": "3:08:49", "throughput": 8757.91, "total_tokens": 38753696} +{"current_steps": 57490, "total_steps": 204665, "loss": 0.0367, "lr": 1.8071845256376597e-06, "epoch": 1.4044902645787019, "percentage": 28.09, "elapsed_time": "1:13:45", "remaining_time": "3:08:48", "throughput": 8758.02, "total_tokens": 38757280} +{"current_steps": 57495, "total_steps": 204665, "loss": 0.1259, "lr": 1.8071341834042268e-06, "epoch": 1.4046124154105488, "percentage": 28.09, "elapsed_time": "1:13:45", "remaining_time": "3:08:48", "throughput": 8758.01, "total_tokens": 38760160} +{"current_steps": 57500, "total_steps": 204665, "loss": 0.0536, "lr": 1.8070838353010973e-06, "epoch": 1.404734566242396, "percentage": 28.09, "elapsed_time": "1:13:46", "remaining_time": "3:08:47", "throughput": 8758.01, "total_tokens": 38763104} +{"current_steps": 57505, "total_steps": 204665, "loss": 0.0231, "lr": 1.807033481328637e-06, "epoch": 1.4048567170742432, "percentage": 28.1, "elapsed_time": "1:13:46", "remaining_time": "3:08:47", "throughput": 8758.08, "total_tokens": 38766432} +{"current_steps": 57510, "total_steps": 204665, "loss": 0.0559, "lr": 1.806983121487212e-06, "epoch": 1.4049788679060904, "percentage": 28.1, "elapsed_time": "1:13:46", "remaining_time": "3:08:46", "throughput": 8758.21, "total_tokens": 38770144} +{"current_steps": 57515, "total_steps": 204665, "loss": 0.0498, "lr": 1.8069327557771889e-06, "epoch": 1.4051010187379376, "percentage": 28.1, "elapsed_time": "1:13:47", "remaining_time": "3:08:46", "throughput": 8758.24, "total_tokens": 38773280} +{"current_steps": 57520, "total_steps": 204665, "loss": 0.0375, "lr": 1.8068823841989338e-06, "epoch": 1.4052231695697848, "percentage": 28.1, "elapsed_time": "1:13:47", "remaining_time": "3:08:45", "throughput": 8758.31, "total_tokens": 38776608} +{"current_steps": 57525, "total_steps": 204665, "loss": 0.0967, "lr": 1.8068320067528129e-06, "epoch": 1.405345320401632, "percentage": 28.11, "elapsed_time": "1:13:47", "remaining_time": "3:08:45", "throughput": 8758.35, "total_tokens": 38779808} +{"current_steps": 57530, "total_steps": 204665, "loss": 0.1562, "lr": 1.8067816234391925e-06, "epoch": 1.4054674712334791, "percentage": 28.11, "elapsed_time": "1:13:48", "remaining_time": "3:08:45", "throughput": 8758.44, "total_tokens": 38783200} +{"current_steps": 57535, "total_steps": 204665, "loss": 0.0736, "lr": 1.8067312342584393e-06, "epoch": 1.4055896220653263, "percentage": 28.11, "elapsed_time": "1:13:48", "remaining_time": "3:08:44", "throughput": 8758.43, "total_tokens": 38786144} +{"current_steps": 57540, "total_steps": 204665, "loss": 0.1177, "lr": 1.8066808392109193e-06, "epoch": 1.4057117728971735, "percentage": 28.11, "elapsed_time": "1:13:48", "remaining_time": "3:08:44", "throughput": 8758.47, "total_tokens": 38789344} +{"current_steps": 57545, "total_steps": 204665, "loss": 0.0773, "lr": 1.8066304382969995e-06, "epoch": 1.4058339237290207, "percentage": 28.12, "elapsed_time": "1:13:49", "remaining_time": "3:08:43", "throughput": 8758.46, "total_tokens": 38792224} +{"current_steps": 57550, "total_steps": 204665, "loss": 0.0862, "lr": 1.8065800315170461e-06, "epoch": 1.4059560745608677, "percentage": 28.12, "elapsed_time": "1:13:49", "remaining_time": "3:08:43", "throughput": 8758.67, "total_tokens": 38796384} +{"current_steps": 57555, "total_steps": 204665, "loss": 0.0964, "lr": 1.8065296188714259e-06, "epoch": 1.4060782253927149, "percentage": 28.12, "elapsed_time": "1:13:49", "remaining_time": "3:08:42", "throughput": 8758.73, "total_tokens": 38799712} +{"current_steps": 57560, "total_steps": 204665, "loss": 0.1511, "lr": 1.8064792003605054e-06, "epoch": 1.406200376224562, "percentage": 28.12, "elapsed_time": "1:13:50", "remaining_time": "3:08:42", "throughput": 8758.86, "total_tokens": 38803360} +{"current_steps": 57565, "total_steps": 204665, "loss": 0.073, "lr": 1.806428775984651e-06, "epoch": 1.4063225270564093, "percentage": 28.13, "elapsed_time": "1:13:50", "remaining_time": "3:08:41", "throughput": 8758.94, "total_tokens": 38806752} +{"current_steps": 57570, "total_steps": 204665, "loss": 0.0458, "lr": 1.80637834574423e-06, "epoch": 1.4064446778882564, "percentage": 28.13, "elapsed_time": "1:13:50", "remaining_time": "3:08:41", "throughput": 8758.95, "total_tokens": 38809760} +{"current_steps": 57575, "total_steps": 204665, "loss": 0.1079, "lr": 1.8063279096396084e-06, "epoch": 1.4065668287201036, "percentage": 28.13, "elapsed_time": "1:13:51", "remaining_time": "3:08:40", "throughput": 8759.07, "total_tokens": 38813408} +{"current_steps": 57580, "total_steps": 204665, "loss": 0.0348, "lr": 1.8062774676711534e-06, "epoch": 1.4066889795519508, "percentage": 28.13, "elapsed_time": "1:13:51", "remaining_time": "3:08:40", "throughput": 8759.11, "total_tokens": 38816608} +{"current_steps": 57585, "total_steps": 204665, "loss": 0.0337, "lr": 1.8062270198392322e-06, "epoch": 1.4068111303837978, "percentage": 28.14, "elapsed_time": "1:13:51", "remaining_time": "3:08:39", "throughput": 8759.1, "total_tokens": 38819552} +{"current_steps": 57590, "total_steps": 204665, "loss": 0.0309, "lr": 1.8061765661442108e-06, "epoch": 1.406933281215645, "percentage": 28.14, "elapsed_time": "1:13:52", "remaining_time": "3:08:39", "throughput": 8759.14, "total_tokens": 38822688} +{"current_steps": 57595, "total_steps": 204665, "loss": 0.1429, "lr": 1.8061261065864568e-06, "epoch": 1.4070554320474922, "percentage": 28.14, "elapsed_time": "1:13:52", "remaining_time": "3:08:38", "throughput": 8759.24, "total_tokens": 38826208} +{"current_steps": 57600, "total_steps": 204665, "loss": 0.2824, "lr": 1.806075641166337e-06, "epoch": 1.4071775828793394, "percentage": 28.14, "elapsed_time": "1:13:52", "remaining_time": "3:08:38", "throughput": 8759.32, "total_tokens": 38829664} +{"current_steps": 57605, "total_steps": 204665, "loss": 0.0616, "lr": 1.8060251698842182e-06, "epoch": 1.4072997337111866, "percentage": 28.15, "elapsed_time": "1:13:53", "remaining_time": "3:08:37", "throughput": 8759.43, "total_tokens": 38833184} +{"current_steps": 57610, "total_steps": 204665, "loss": 0.1233, "lr": 1.8059746927404676e-06, "epoch": 1.4074218845430337, "percentage": 28.15, "elapsed_time": "1:13:53", "remaining_time": "3:08:37", "throughput": 8759.47, "total_tokens": 38836384} +{"current_steps": 57615, "total_steps": 204665, "loss": 0.0892, "lr": 1.8059242097354522e-06, "epoch": 1.407544035374881, "percentage": 28.15, "elapsed_time": "1:13:54", "remaining_time": "3:08:36", "throughput": 8759.59, "total_tokens": 38840032} +{"current_steps": 57620, "total_steps": 204665, "loss": 0.0226, "lr": 1.8058737208695391e-06, "epoch": 1.4076661862067281, "percentage": 28.15, "elapsed_time": "1:13:54", "remaining_time": "3:08:36", "throughput": 8759.66, "total_tokens": 38843360} +{"current_steps": 57625, "total_steps": 204665, "loss": 0.2279, "lr": 1.8058232261430957e-06, "epoch": 1.4077883370385753, "percentage": 28.16, "elapsed_time": "1:13:54", "remaining_time": "3:08:35", "throughput": 8759.71, "total_tokens": 38846560} +{"current_steps": 57630, "total_steps": 204665, "loss": 0.0297, "lr": 1.8057727255564892e-06, "epoch": 1.4079104878704225, "percentage": 28.16, "elapsed_time": "1:13:55", "remaining_time": "3:08:35", "throughput": 8759.77, "total_tokens": 38849888} +{"current_steps": 57635, "total_steps": 204665, "loss": 0.1256, "lr": 1.8057222191100863e-06, "epoch": 1.4080326387022697, "percentage": 28.16, "elapsed_time": "1:13:55", "remaining_time": "3:08:34", "throughput": 8759.88, "total_tokens": 38853472} +{"current_steps": 57640, "total_steps": 204665, "loss": 0.114, "lr": 1.805671706804255e-06, "epoch": 1.4081547895341167, "percentage": 28.16, "elapsed_time": "1:13:55", "remaining_time": "3:08:34", "throughput": 8760.01, "total_tokens": 38857120} +{"current_steps": 57645, "total_steps": 204665, "loss": 0.1426, "lr": 1.8056211886393622e-06, "epoch": 1.4082769403659638, "percentage": 28.17, "elapsed_time": "1:13:56", "remaining_time": "3:08:33", "throughput": 8760.09, "total_tokens": 38860512} +{"current_steps": 57650, "total_steps": 204665, "loss": 0.1036, "lr": 1.8055706646157756e-06, "epoch": 1.408399091197811, "percentage": 28.17, "elapsed_time": "1:13:56", "remaining_time": "3:08:33", "throughput": 8760.2, "total_tokens": 38864160} +{"current_steps": 57655, "total_steps": 204665, "loss": 0.0018, "lr": 1.8055201347338625e-06, "epoch": 1.4085212420296582, "percentage": 28.17, "elapsed_time": "1:13:56", "remaining_time": "3:08:33", "throughput": 8760.26, "total_tokens": 38867424} +{"current_steps": 57660, "total_steps": 204665, "loss": 0.063, "lr": 1.8054695989939904e-06, "epoch": 1.4086433928615054, "percentage": 28.17, "elapsed_time": "1:13:57", "remaining_time": "3:08:32", "throughput": 8760.32, "total_tokens": 38870688} +{"current_steps": 57665, "total_steps": 204665, "loss": 0.0545, "lr": 1.8054190573965263e-06, "epoch": 1.4087655436933526, "percentage": 28.18, "elapsed_time": "1:13:57", "remaining_time": "3:08:32", "throughput": 8760.39, "total_tokens": 38874080} +{"current_steps": 57670, "total_steps": 204665, "loss": 0.0517, "lr": 1.8053685099418385e-06, "epoch": 1.4088876945251998, "percentage": 28.18, "elapsed_time": "1:13:57", "remaining_time": "3:08:31", "throughput": 8760.4, "total_tokens": 38877088} +{"current_steps": 57675, "total_steps": 204665, "loss": 0.0338, "lr": 1.8053179566302942e-06, "epoch": 1.4090098453570468, "percentage": 28.18, "elapsed_time": "1:13:58", "remaining_time": "3:08:31", "throughput": 8760.43, "total_tokens": 38880224} +{"current_steps": 57680, "total_steps": 204665, "loss": 0.1188, "lr": 1.805267397462261e-06, "epoch": 1.409131996188894, "percentage": 28.18, "elapsed_time": "1:13:58", "remaining_time": "3:08:30", "throughput": 8760.48, "total_tokens": 38883424} +{"current_steps": 57685, "total_steps": 204665, "loss": 0.0563, "lr": 1.805216832438107e-06, "epoch": 1.4092541470207411, "percentage": 28.19, "elapsed_time": "1:13:58", "remaining_time": "3:08:30", "throughput": 8760.54, "total_tokens": 38886752} +{"current_steps": 57690, "total_steps": 204665, "loss": 0.0523, "lr": 1.8051662615581994e-06, "epoch": 1.4093762978525883, "percentage": 28.19, "elapsed_time": "1:13:59", "remaining_time": "3:08:29", "throughput": 8760.65, "total_tokens": 38890336} +{"current_steps": 57695, "total_steps": 204665, "loss": 0.0014, "lr": 1.805115684822906e-06, "epoch": 1.4094984486844355, "percentage": 28.19, "elapsed_time": "1:13:59", "remaining_time": "3:08:29", "throughput": 8760.74, "total_tokens": 38893792} +{"current_steps": 57700, "total_steps": 204665, "loss": 0.001, "lr": 1.8050651022325952e-06, "epoch": 1.4096205995162827, "percentage": 28.19, "elapsed_time": "1:13:59", "remaining_time": "3:08:28", "throughput": 8760.81, "total_tokens": 38897120} +{"current_steps": 57705, "total_steps": 204665, "loss": 0.0374, "lr": 1.805014513787634e-06, "epoch": 1.40974275034813, "percentage": 28.19, "elapsed_time": "1:14:00", "remaining_time": "3:08:28", "throughput": 8760.9, "total_tokens": 38900576} +{"current_steps": 57710, "total_steps": 204665, "loss": 0.142, "lr": 1.804963919488391e-06, "epoch": 1.409864901179977, "percentage": 28.2, "elapsed_time": "1:14:00", "remaining_time": "3:08:27", "throughput": 8761.08, "total_tokens": 38904544} +{"current_steps": 57715, "total_steps": 204665, "loss": 0.0544, "lr": 1.804913319335234e-06, "epoch": 1.4099870520118243, "percentage": 28.2, "elapsed_time": "1:14:00", "remaining_time": "3:08:27", "throughput": 8761.09, "total_tokens": 38907616} +{"current_steps": 57720, "total_steps": 204665, "loss": 0.198, "lr": 1.8048627133285306e-06, "epoch": 1.4101092028436715, "percentage": 28.2, "elapsed_time": "1:14:01", "remaining_time": "3:08:26", "throughput": 8761.12, "total_tokens": 38910752} +{"current_steps": 57725, "total_steps": 204665, "loss": 0.0391, "lr": 1.804812101468649e-06, "epoch": 1.4102313536755187, "percentage": 28.2, "elapsed_time": "1:14:01", "remaining_time": "3:08:26", "throughput": 8761.12, "total_tokens": 38913760} +{"current_steps": 57730, "total_steps": 204665, "loss": 0.059, "lr": 1.8047614837559574e-06, "epoch": 1.4103535045073656, "percentage": 28.21, "elapsed_time": "1:14:01", "remaining_time": "3:08:25", "throughput": 8761.11, "total_tokens": 38916640} +{"current_steps": 57735, "total_steps": 204665, "loss": 0.0686, "lr": 1.8047108601908243e-06, "epoch": 1.4104756553392128, "percentage": 28.21, "elapsed_time": "1:14:02", "remaining_time": "3:08:25", "throughput": 8761.09, "total_tokens": 38919520} +{"current_steps": 57740, "total_steps": 204665, "loss": 0.0847, "lr": 1.8046602307736168e-06, "epoch": 1.41059780617106, "percentage": 28.21, "elapsed_time": "1:14:02", "remaining_time": "3:08:24", "throughput": 8761.15, "total_tokens": 38922784} +{"current_steps": 57745, "total_steps": 204665, "loss": 0.0265, "lr": 1.8046095955047038e-06, "epoch": 1.4107199570029072, "percentage": 28.21, "elapsed_time": "1:14:03", "remaining_time": "3:08:24", "throughput": 8761.32, "total_tokens": 38926688} +{"current_steps": 57750, "total_steps": 204665, "loss": 0.1017, "lr": 1.8045589543844537e-06, "epoch": 1.4108421078347544, "percentage": 28.22, "elapsed_time": "1:14:03", "remaining_time": "3:08:23", "throughput": 8761.35, "total_tokens": 38929824} +{"current_steps": 57755, "total_steps": 204665, "loss": 0.0632, "lr": 1.8045083074132341e-06, "epoch": 1.4109642586666016, "percentage": 28.22, "elapsed_time": "1:14:03", "remaining_time": "3:08:23", "throughput": 8761.47, "total_tokens": 38933408} +{"current_steps": 57760, "total_steps": 204665, "loss": 0.102, "lr": 1.804457654591414e-06, "epoch": 1.4110864094984485, "percentage": 28.22, "elapsed_time": "1:14:04", "remaining_time": "3:08:22", "throughput": 8761.61, "total_tokens": 38937184} +{"current_steps": 57765, "total_steps": 204665, "loss": 0.0575, "lr": 1.8044069959193612e-06, "epoch": 1.4112085603302957, "percentage": 28.22, "elapsed_time": "1:14:04", "remaining_time": "3:08:22", "throughput": 8761.68, "total_tokens": 38940512} +{"current_steps": 57770, "total_steps": 204665, "loss": 0.0731, "lr": 1.8043563313974445e-06, "epoch": 1.411330711162143, "percentage": 28.23, "elapsed_time": "1:14:04", "remaining_time": "3:08:21", "throughput": 8761.77, "total_tokens": 38943968} +{"current_steps": 57775, "total_steps": 204665, "loss": 0.0789, "lr": 1.8043056610260324e-06, "epoch": 1.4114528619939901, "percentage": 28.23, "elapsed_time": "1:14:05", "remaining_time": "3:08:21", "throughput": 8761.91, "total_tokens": 38947680} +{"current_steps": 57780, "total_steps": 204665, "loss": 0.0939, "lr": 1.804254984805493e-06, "epoch": 1.4115750128258373, "percentage": 28.23, "elapsed_time": "1:14:05", "remaining_time": "3:08:21", "throughput": 8761.97, "total_tokens": 38951008} +{"current_steps": 57785, "total_steps": 204665, "loss": 0.1177, "lr": 1.804204302736195e-06, "epoch": 1.4116971636576845, "percentage": 28.23, "elapsed_time": "1:14:05", "remaining_time": "3:08:20", "throughput": 8762.01, "total_tokens": 38954208} +{"current_steps": 57790, "total_steps": 204665, "loss": 0.0814, "lr": 1.804153614818507e-06, "epoch": 1.4118193144895317, "percentage": 28.24, "elapsed_time": "1:14:06", "remaining_time": "3:08:20", "throughput": 8762.04, "total_tokens": 38957344} +{"current_steps": 57795, "total_steps": 204665, "loss": 0.0297, "lr": 1.8041029210527976e-06, "epoch": 1.4119414653213789, "percentage": 28.24, "elapsed_time": "1:14:06", "remaining_time": "3:08:19", "throughput": 8762.15, "total_tokens": 38960928} +{"current_steps": 57800, "total_steps": 204665, "loss": 0.0021, "lr": 1.8040522214394356e-06, "epoch": 1.412063616153226, "percentage": 28.24, "elapsed_time": "1:14:06", "remaining_time": "3:08:19", "throughput": 8762.16, "total_tokens": 38963936} +{"current_steps": 57805, "total_steps": 204665, "loss": 0.0705, "lr": 1.8040015159787894e-06, "epoch": 1.4121857669850733, "percentage": 28.24, "elapsed_time": "1:14:07", "remaining_time": "3:08:18", "throughput": 8762.28, "total_tokens": 38967584} +{"current_steps": 57810, "total_steps": 204665, "loss": 0.0421, "lr": 1.8039508046712281e-06, "epoch": 1.4123079178169204, "percentage": 28.25, "elapsed_time": "1:14:07", "remaining_time": "3:08:18", "throughput": 8762.39, "total_tokens": 38971104} +{"current_steps": 57815, "total_steps": 204665, "loss": 0.1292, "lr": 1.8039000875171202e-06, "epoch": 1.4124300686487676, "percentage": 28.25, "elapsed_time": "1:14:07", "remaining_time": "3:08:17", "throughput": 8762.39, "total_tokens": 38974112} +{"current_steps": 57820, "total_steps": 204665, "loss": 0.0562, "lr": 1.8038493645168349e-06, "epoch": 1.4125522194806146, "percentage": 28.25, "elapsed_time": "1:14:08", "remaining_time": "3:08:17", "throughput": 8762.46, "total_tokens": 38977440} +{"current_steps": 57825, "total_steps": 204665, "loss": 0.0516, "lr": 1.8037986356707404e-06, "epoch": 1.4126743703124618, "percentage": 28.25, "elapsed_time": "1:14:08", "remaining_time": "3:08:16", "throughput": 8762.54, "total_tokens": 38980896} +{"current_steps": 57830, "total_steps": 204665, "loss": 0.0619, "lr": 1.8037479009792062e-06, "epoch": 1.412796521144309, "percentage": 28.26, "elapsed_time": "1:14:08", "remaining_time": "3:08:16", "throughput": 8762.57, "total_tokens": 38984032} +{"current_steps": 57835, "total_steps": 204665, "loss": 0.1209, "lr": 1.8036971604426015e-06, "epoch": 1.4129186719761562, "percentage": 28.26, "elapsed_time": "1:14:09", "remaining_time": "3:08:15", "throughput": 8762.61, "total_tokens": 38987168} +{"current_steps": 57840, "total_steps": 204665, "loss": 0.0015, "lr": 1.8036464140612943e-06, "epoch": 1.4130408228080034, "percentage": 28.26, "elapsed_time": "1:14:09", "remaining_time": "3:08:15", "throughput": 8762.68, "total_tokens": 38990560} +{"current_steps": 57845, "total_steps": 204665, "loss": 0.0598, "lr": 1.8035956618356546e-06, "epoch": 1.4131629736398506, "percentage": 28.26, "elapsed_time": "1:14:09", "remaining_time": "3:08:14", "throughput": 8762.81, "total_tokens": 38994208} +{"current_steps": 57850, "total_steps": 204665, "loss": 0.124, "lr": 1.8035449037660508e-06, "epoch": 1.4132851244716975, "percentage": 28.27, "elapsed_time": "1:14:10", "remaining_time": "3:08:14", "throughput": 8762.88, "total_tokens": 38997600} +{"current_steps": 57855, "total_steps": 204665, "loss": 0.0878, "lr": 1.8034941398528525e-06, "epoch": 1.4134072753035447, "percentage": 28.27, "elapsed_time": "1:14:10", "remaining_time": "3:08:13", "throughput": 8762.96, "total_tokens": 39000992} +{"current_steps": 57860, "total_steps": 204665, "loss": 0.0991, "lr": 1.8034433700964287e-06, "epoch": 1.413529426135392, "percentage": 28.27, "elapsed_time": "1:14:11", "remaining_time": "3:08:13", "throughput": 8763.0, "total_tokens": 39004192} +{"current_steps": 57865, "total_steps": 204665, "loss": 0.0457, "lr": 1.8033925944971484e-06, "epoch": 1.413651576967239, "percentage": 28.27, "elapsed_time": "1:14:11", "remaining_time": "3:08:12", "throughput": 8763.09, "total_tokens": 39007648} +{"current_steps": 57870, "total_steps": 204665, "loss": 0.1027, "lr": 1.8033418130553812e-06, "epoch": 1.4137737277990863, "percentage": 28.28, "elapsed_time": "1:14:11", "remaining_time": "3:08:12", "throughput": 8763.18, "total_tokens": 39011104} +{"current_steps": 57875, "total_steps": 204665, "loss": 0.0397, "lr": 1.8032910257714966e-06, "epoch": 1.4138958786309335, "percentage": 28.28, "elapsed_time": "1:14:12", "remaining_time": "3:08:11", "throughput": 8763.28, "total_tokens": 39014624} +{"current_steps": 57880, "total_steps": 204665, "loss": 0.1179, "lr": 1.803240232645863e-06, "epoch": 1.4140180294627807, "percentage": 28.28, "elapsed_time": "1:14:12", "remaining_time": "3:08:11", "throughput": 8763.3, "total_tokens": 39017696} +{"current_steps": 57885, "total_steps": 204665, "loss": 0.0424, "lr": 1.803189433678851e-06, "epoch": 1.4141401802946278, "percentage": 28.28, "elapsed_time": "1:14:12", "remaining_time": "3:08:10", "throughput": 8763.34, "total_tokens": 39020896} +{"current_steps": 57890, "total_steps": 204665, "loss": 0.0841, "lr": 1.803138628870829e-06, "epoch": 1.414262331126475, "percentage": 28.29, "elapsed_time": "1:14:13", "remaining_time": "3:08:10", "throughput": 8763.52, "total_tokens": 39024864} +{"current_steps": 57895, "total_steps": 204665, "loss": 0.1301, "lr": 1.803087818222167e-06, "epoch": 1.4143844819583222, "percentage": 28.29, "elapsed_time": "1:14:13", "remaining_time": "3:08:10", "throughput": 8763.71, "total_tokens": 39028896} +{"current_steps": 57900, "total_steps": 204665, "loss": 0.11, "lr": 1.803037001733234e-06, "epoch": 1.4145066327901694, "percentage": 28.29, "elapsed_time": "1:14:13", "remaining_time": "3:08:09", "throughput": 8763.78, "total_tokens": 39032224} +{"current_steps": 57905, "total_steps": 204665, "loss": 0.0814, "lr": 1.8029861794044005e-06, "epoch": 1.4146287836220164, "percentage": 28.29, "elapsed_time": "1:14:14", "remaining_time": "3:08:09", "throughput": 8763.91, "total_tokens": 39035936} +{"current_steps": 57910, "total_steps": 204665, "loss": 0.0946, "lr": 1.8029353512360354e-06, "epoch": 1.4147509344538636, "percentage": 28.3, "elapsed_time": "1:14:14", "remaining_time": "3:08:08", "throughput": 8763.96, "total_tokens": 39039200} +{"current_steps": 57915, "total_steps": 204665, "loss": 0.1305, "lr": 1.8028845172285083e-06, "epoch": 1.4148730852857108, "percentage": 28.3, "elapsed_time": "1:14:14", "remaining_time": "3:08:08", "throughput": 8764.02, "total_tokens": 39042464} +{"current_steps": 57920, "total_steps": 204665, "loss": 0.0931, "lr": 1.802833677382189e-06, "epoch": 1.414995236117558, "percentage": 28.3, "elapsed_time": "1:14:15", "remaining_time": "3:08:07", "throughput": 8764.02, "total_tokens": 39045472} +{"current_steps": 57925, "total_steps": 204665, "loss": 0.1595, "lr": 1.8027828316974476e-06, "epoch": 1.4151173869494051, "percentage": 28.3, "elapsed_time": "1:14:15", "remaining_time": "3:08:07", "throughput": 8764.04, "total_tokens": 39048544} +{"current_steps": 57930, "total_steps": 204665, "loss": 0.21, "lr": 1.8027319801746532e-06, "epoch": 1.4152395377812523, "percentage": 28.3, "elapsed_time": "1:14:15", "remaining_time": "3:08:06", "throughput": 8764.11, "total_tokens": 39051936} +{"current_steps": 57935, "total_steps": 204665, "loss": 0.13, "lr": 1.8026811228141762e-06, "epoch": 1.4153616886130995, "percentage": 28.31, "elapsed_time": "1:14:16", "remaining_time": "3:08:06", "throughput": 8764.25, "total_tokens": 39055648} +{"current_steps": 57940, "total_steps": 204665, "loss": 0.0517, "lr": 1.8026302596163857e-06, "epoch": 1.4154838394449465, "percentage": 28.31, "elapsed_time": "1:14:16", "remaining_time": "3:08:05", "throughput": 8764.26, "total_tokens": 39058656} +{"current_steps": 57945, "total_steps": 204665, "loss": 0.045, "lr": 1.8025793905816523e-06, "epoch": 1.4156059902767937, "percentage": 28.31, "elapsed_time": "1:14:16", "remaining_time": "3:08:05", "throughput": 8764.31, "total_tokens": 39061920} +{"current_steps": 57950, "total_steps": 204665, "loss": 0.1386, "lr": 1.802528515710346e-06, "epoch": 1.4157281411086409, "percentage": 28.31, "elapsed_time": "1:14:17", "remaining_time": "3:08:04", "throughput": 8764.3, "total_tokens": 39064864} +{"current_steps": 57955, "total_steps": 204665, "loss": 0.1239, "lr": 1.8024776350028363e-06, "epoch": 1.415850291940488, "percentage": 28.32, "elapsed_time": "1:14:17", "remaining_time": "3:08:04", "throughput": 8764.41, "total_tokens": 39068448} +{"current_steps": 57960, "total_steps": 204665, "loss": 0.1009, "lr": 1.8024267484594933e-06, "epoch": 1.4159724427723352, "percentage": 28.32, "elapsed_time": "1:14:17", "remaining_time": "3:08:03", "throughput": 8764.6, "total_tokens": 39072480} +{"current_steps": 57965, "total_steps": 204665, "loss": 0.0409, "lr": 1.8023758560806873e-06, "epoch": 1.4160945936041824, "percentage": 28.32, "elapsed_time": "1:14:18", "remaining_time": "3:08:03", "throughput": 8764.65, "total_tokens": 39075744} +{"current_steps": 57970, "total_steps": 204665, "loss": 0.0018, "lr": 1.802324957866788e-06, "epoch": 1.4162167444360296, "percentage": 28.32, "elapsed_time": "1:14:18", "remaining_time": "3:08:02", "throughput": 8764.76, "total_tokens": 39079328} +{"current_steps": 57975, "total_steps": 204665, "loss": 0.016, "lr": 1.8022740538181662e-06, "epoch": 1.4163388952678768, "percentage": 28.33, "elapsed_time": "1:14:19", "remaining_time": "3:08:02", "throughput": 8764.82, "total_tokens": 39082592} +{"current_steps": 57980, "total_steps": 204665, "loss": 0.2262, "lr": 1.8022231439351914e-06, "epoch": 1.416461046099724, "percentage": 28.33, "elapsed_time": "1:14:19", "remaining_time": "3:08:01", "throughput": 8764.89, "total_tokens": 39085920} +{"current_steps": 57985, "total_steps": 204665, "loss": 0.0397, "lr": 1.8021722282182342e-06, "epoch": 1.4165831969315712, "percentage": 28.33, "elapsed_time": "1:14:19", "remaining_time": "3:08:01", "throughput": 8764.9, "total_tokens": 39088928} +{"current_steps": 57990, "total_steps": 204665, "loss": 0.1332, "lr": 1.802121306667665e-06, "epoch": 1.4167053477634184, "percentage": 28.33, "elapsed_time": "1:14:20", "remaining_time": "3:08:00", "throughput": 8764.92, "total_tokens": 39092064} +{"current_steps": 57995, "total_steps": 204665, "loss": 0.1581, "lr": 1.8020703792838535e-06, "epoch": 1.4168274985952654, "percentage": 28.34, "elapsed_time": "1:14:20", "remaining_time": "3:08:00", "throughput": 8765.0, "total_tokens": 39095456} +{"current_steps": 58000, "total_steps": 204665, "loss": 0.0014, "lr": 1.8020194460671707e-06, "epoch": 1.4169496494271125, "percentage": 28.34, "elapsed_time": "1:14:20", "remaining_time": "3:07:59", "throughput": 8764.98, "total_tokens": 39098336} +{"current_steps": 58005, "total_steps": 204665, "loss": 0.0703, "lr": 1.8019685070179868e-06, "epoch": 1.4170718002589597, "percentage": 28.34, "elapsed_time": "1:14:21", "remaining_time": "3:07:59", "throughput": 8765.08, "total_tokens": 39101856} +{"current_steps": 58010, "total_steps": 204665, "loss": 0.078, "lr": 1.8019175621366722e-06, "epoch": 1.417193951090807, "percentage": 28.34, "elapsed_time": "1:14:21", "remaining_time": "3:07:58", "throughput": 8765.07, "total_tokens": 39104800} +{"current_steps": 58015, "total_steps": 204665, "loss": 0.0502, "lr": 1.8018666114235973e-06, "epoch": 1.4173161019226541, "percentage": 28.35, "elapsed_time": "1:14:21", "remaining_time": "3:07:58", "throughput": 8765.14, "total_tokens": 39108192} +{"current_steps": 58020, "total_steps": 204665, "loss": 0.0477, "lr": 1.801815654879133e-06, "epoch": 1.4174382527545013, "percentage": 28.35, "elapsed_time": "1:14:22", "remaining_time": "3:07:58", "throughput": 8765.23, "total_tokens": 39111648} +{"current_steps": 58025, "total_steps": 204665, "loss": 0.0822, "lr": 1.8017646925036495e-06, "epoch": 1.4175604035863485, "percentage": 28.35, "elapsed_time": "1:14:22", "remaining_time": "3:07:57", "throughput": 8765.3, "total_tokens": 39114976} +{"current_steps": 58030, "total_steps": 204665, "loss": 0.1607, "lr": 1.8017137242975174e-06, "epoch": 1.4176825544181955, "percentage": 28.35, "elapsed_time": "1:14:22", "remaining_time": "3:07:57", "throughput": 8765.43, "total_tokens": 39118688} +{"current_steps": 58035, "total_steps": 204665, "loss": 0.1481, "lr": 1.8016627502611072e-06, "epoch": 1.4178047052500427, "percentage": 28.36, "elapsed_time": "1:14:23", "remaining_time": "3:07:56", "throughput": 8765.46, "total_tokens": 39121824} +{"current_steps": 58040, "total_steps": 204665, "loss": 0.1372, "lr": 1.8016117703947902e-06, "epoch": 1.4179268560818898, "percentage": 28.36, "elapsed_time": "1:14:23", "remaining_time": "3:07:56", "throughput": 8765.55, "total_tokens": 39125280} +{"current_steps": 58045, "total_steps": 204665, "loss": 0.0425, "lr": 1.8015607846989367e-06, "epoch": 1.418049006913737, "percentage": 28.36, "elapsed_time": "1:14:23", "remaining_time": "3:07:55", "throughput": 8765.57, "total_tokens": 39128416} +{"current_steps": 58050, "total_steps": 204665, "loss": 0.067, "lr": 1.8015097931739175e-06, "epoch": 1.4181711577455842, "percentage": 28.36, "elapsed_time": "1:14:24", "remaining_time": "3:07:55", "throughput": 8765.55, "total_tokens": 39131296} +{"current_steps": 58055, "total_steps": 204665, "loss": 0.118, "lr": 1.8014587958201038e-06, "epoch": 1.4182933085774314, "percentage": 28.37, "elapsed_time": "1:14:24", "remaining_time": "3:07:54", "throughput": 8765.56, "total_tokens": 39134304} +{"current_steps": 58060, "total_steps": 204665, "loss": 0.0955, "lr": 1.801407792637866e-06, "epoch": 1.4184154594092786, "percentage": 28.37, "elapsed_time": "1:14:24", "remaining_time": "3:07:54", "throughput": 8765.72, "total_tokens": 39138208} +{"current_steps": 58065, "total_steps": 204665, "loss": 0.0852, "lr": 1.801356783627575e-06, "epoch": 1.4185376102411258, "percentage": 28.37, "elapsed_time": "1:14:25", "remaining_time": "3:07:53", "throughput": 8765.75, "total_tokens": 39141344} +{"current_steps": 58070, "total_steps": 204665, "loss": 0.131, "lr": 1.8013057687896022e-06, "epoch": 1.418659761072973, "percentage": 28.37, "elapsed_time": "1:14:25", "remaining_time": "3:07:53", "throughput": 8765.82, "total_tokens": 39144672} +{"current_steps": 58075, "total_steps": 204665, "loss": 0.1219, "lr": 1.8012547481243182e-06, "epoch": 1.4187819119048202, "percentage": 28.38, "elapsed_time": "1:14:25", "remaining_time": "3:07:52", "throughput": 8765.84, "total_tokens": 39147744} +{"current_steps": 58080, "total_steps": 204665, "loss": 0.0932, "lr": 1.8012037216320942e-06, "epoch": 1.4189040627366674, "percentage": 28.38, "elapsed_time": "1:14:26", "remaining_time": "3:07:52", "throughput": 8765.96, "total_tokens": 39151392} +{"current_steps": 58085, "total_steps": 204665, "loss": 0.1037, "lr": 1.8011526893133012e-06, "epoch": 1.4190262135685143, "percentage": 28.38, "elapsed_time": "1:14:26", "remaining_time": "3:07:51", "throughput": 8765.97, "total_tokens": 39154464} +{"current_steps": 58090, "total_steps": 204665, "loss": 0.0417, "lr": 1.8011016511683103e-06, "epoch": 1.4191483644003615, "percentage": 28.38, "elapsed_time": "1:14:26", "remaining_time": "3:07:51", "throughput": 8766.06, "total_tokens": 39157920} +{"current_steps": 58095, "total_steps": 204665, "loss": 0.2024, "lr": 1.8010506071974926e-06, "epoch": 1.4192705152322087, "percentage": 28.39, "elapsed_time": "1:14:27", "remaining_time": "3:07:50", "throughput": 8766.13, "total_tokens": 39161312} +{"current_steps": 58100, "total_steps": 204665, "loss": 0.1039, "lr": 1.8009995574012198e-06, "epoch": 1.419392666064056, "percentage": 28.39, "elapsed_time": "1:14:27", "remaining_time": "3:07:50", "throughput": 8766.15, "total_tokens": 39164448} +{"current_steps": 58105, "total_steps": 204665, "loss": 0.1066, "lr": 1.8009485017798624e-06, "epoch": 1.419514816895903, "percentage": 28.39, "elapsed_time": "1:14:28", "remaining_time": "3:07:49", "throughput": 8766.22, "total_tokens": 39167776} +{"current_steps": 58110, "total_steps": 204665, "loss": 0.1594, "lr": 1.8008974403337924e-06, "epoch": 1.4196369677277503, "percentage": 28.39, "elapsed_time": "1:14:28", "remaining_time": "3:07:49", "throughput": 8766.28, "total_tokens": 39171104} +{"current_steps": 58115, "total_steps": 204665, "loss": 0.0865, "lr": 1.8008463730633807e-06, "epoch": 1.4197591185595975, "percentage": 28.4, "elapsed_time": "1:14:28", "remaining_time": "3:07:48", "throughput": 8766.37, "total_tokens": 39174560} +{"current_steps": 58120, "total_steps": 204665, "loss": 0.0939, "lr": 1.8007952999689989e-06, "epoch": 1.4198812693914444, "percentage": 28.4, "elapsed_time": "1:14:29", "remaining_time": "3:07:48", "throughput": 8766.41, "total_tokens": 39177760} +{"current_steps": 58125, "total_steps": 204665, "loss": 0.049, "lr": 1.800744221051018e-06, "epoch": 1.4200034202232916, "percentage": 28.4, "elapsed_time": "1:14:29", "remaining_time": "3:07:47", "throughput": 8766.49, "total_tokens": 39181152} +{"current_steps": 58130, "total_steps": 204665, "loss": 0.1063, "lr": 1.80069313630981e-06, "epoch": 1.4201255710551388, "percentage": 28.4, "elapsed_time": "1:14:29", "remaining_time": "3:07:47", "throughput": 8766.56, "total_tokens": 39184544} +{"current_steps": 58135, "total_steps": 204665, "loss": 0.0017, "lr": 1.8006420457457457e-06, "epoch": 1.420247721886986, "percentage": 28.4, "elapsed_time": "1:14:30", "remaining_time": "3:07:47", "throughput": 8766.65, "total_tokens": 39188000} +{"current_steps": 58140, "total_steps": 204665, "loss": 0.1047, "lr": 1.8005909493591975e-06, "epoch": 1.4203698727188332, "percentage": 28.41, "elapsed_time": "1:14:30", "remaining_time": "3:07:46", "throughput": 8766.62, "total_tokens": 39190816} +{"current_steps": 58145, "total_steps": 204665, "loss": 0.0524, "lr": 1.8005398471505364e-06, "epoch": 1.4204920235506804, "percentage": 28.41, "elapsed_time": "1:14:30", "remaining_time": "3:07:46", "throughput": 8766.7, "total_tokens": 39194208} +{"current_steps": 58150, "total_steps": 204665, "loss": 0.2904, "lr": 1.8004887391201343e-06, "epoch": 1.4206141743825276, "percentage": 28.41, "elapsed_time": "1:14:31", "remaining_time": "3:07:45", "throughput": 8766.86, "total_tokens": 39198048} +{"current_steps": 58155, "total_steps": 204665, "loss": 0.0442, "lr": 1.8004376252683629e-06, "epoch": 1.4207363252143748, "percentage": 28.41, "elapsed_time": "1:14:31", "remaining_time": "3:07:45", "throughput": 8766.93, "total_tokens": 39201376} +{"current_steps": 58160, "total_steps": 204665, "loss": 0.061, "lr": 1.8003865055955938e-06, "epoch": 1.420858476046222, "percentage": 28.42, "elapsed_time": "1:14:31", "remaining_time": "3:07:44", "throughput": 8767.04, "total_tokens": 39204960} +{"current_steps": 58165, "total_steps": 204665, "loss": 0.0012, "lr": 1.8003353801021985e-06, "epoch": 1.4209806268780691, "percentage": 28.42, "elapsed_time": "1:14:32", "remaining_time": "3:07:44", "throughput": 8767.15, "total_tokens": 39208544} +{"current_steps": 58170, "total_steps": 204665, "loss": 0.1808, "lr": 1.8002842487885493e-06, "epoch": 1.4211027777099163, "percentage": 28.42, "elapsed_time": "1:14:32", "remaining_time": "3:07:43", "throughput": 8767.2, "total_tokens": 39211808} +{"current_steps": 58175, "total_steps": 204665, "loss": 0.0492, "lr": 1.8002331116550176e-06, "epoch": 1.4212249285417633, "percentage": 28.42, "elapsed_time": "1:14:32", "remaining_time": "3:07:43", "throughput": 8767.28, "total_tokens": 39215264} +{"current_steps": 58180, "total_steps": 204665, "loss": 0.0165, "lr": 1.8001819687019758e-06, "epoch": 1.4213470793736105, "percentage": 28.43, "elapsed_time": "1:14:33", "remaining_time": "3:07:42", "throughput": 8767.32, "total_tokens": 39218400} +{"current_steps": 58185, "total_steps": 204665, "loss": 0.11, "lr": 1.800130819929795e-06, "epoch": 1.4214692302054577, "percentage": 28.43, "elapsed_time": "1:14:33", "remaining_time": "3:07:42", "throughput": 8767.4, "total_tokens": 39221856} +{"current_steps": 58190, "total_steps": 204665, "loss": 0.0348, "lr": 1.800079665338848e-06, "epoch": 1.4215913810373049, "percentage": 28.43, "elapsed_time": "1:14:33", "remaining_time": "3:07:41", "throughput": 8767.47, "total_tokens": 39225184} +{"current_steps": 58195, "total_steps": 204665, "loss": 0.1837, "lr": 1.8000285049295066e-06, "epoch": 1.421713531869152, "percentage": 28.43, "elapsed_time": "1:14:34", "remaining_time": "3:07:41", "throughput": 8767.46, "total_tokens": 39228128} +{"current_steps": 58200, "total_steps": 204665, "loss": 0.0519, "lr": 1.7999773387021423e-06, "epoch": 1.4218356827009992, "percentage": 28.44, "elapsed_time": "1:14:34", "remaining_time": "3:07:40", "throughput": 8767.55, "total_tokens": 39231584} +{"current_steps": 58205, "total_steps": 204665, "loss": 0.2783, "lr": 1.7999261666571281e-06, "epoch": 1.4219578335328464, "percentage": 28.44, "elapsed_time": "1:14:34", "remaining_time": "3:07:40", "throughput": 8767.58, "total_tokens": 39234720} +{"current_steps": 58210, "total_steps": 204665, "loss": 0.1263, "lr": 1.7998749887948352e-06, "epoch": 1.4220799843646934, "percentage": 28.44, "elapsed_time": "1:14:35", "remaining_time": "3:07:39", "throughput": 8767.73, "total_tokens": 39238560} +{"current_steps": 58215, "total_steps": 204665, "loss": 0.0268, "lr": 1.7998238051156367e-06, "epoch": 1.4222021351965406, "percentage": 28.44, "elapsed_time": "1:14:35", "remaining_time": "3:07:39", "throughput": 8767.87, "total_tokens": 39242272} +{"current_steps": 58220, "total_steps": 204665, "loss": 0.0969, "lr": 1.799772615619904e-06, "epoch": 1.4223242860283878, "percentage": 28.45, "elapsed_time": "1:14:36", "remaining_time": "3:07:38", "throughput": 8767.93, "total_tokens": 39245600} +{"current_steps": 58225, "total_steps": 204665, "loss": 0.0824, "lr": 1.79972142030801e-06, "epoch": 1.422446436860235, "percentage": 28.45, "elapsed_time": "1:14:36", "remaining_time": "3:07:38", "throughput": 8768.05, "total_tokens": 39249248} +{"current_steps": 58230, "total_steps": 204665, "loss": 0.0967, "lr": 1.7996702191803265e-06, "epoch": 1.4225685876920822, "percentage": 28.45, "elapsed_time": "1:14:36", "remaining_time": "3:07:37", "throughput": 8768.09, "total_tokens": 39252512} +{"current_steps": 58235, "total_steps": 204665, "loss": 0.0322, "lr": 1.7996190122372262e-06, "epoch": 1.4226907385239294, "percentage": 28.45, "elapsed_time": "1:14:37", "remaining_time": "3:07:37", "throughput": 8768.17, "total_tokens": 39255904} +{"current_steps": 58240, "total_steps": 204665, "loss": 0.0035, "lr": 1.7995677994790813e-06, "epoch": 1.4228128893557765, "percentage": 28.46, "elapsed_time": "1:14:37", "remaining_time": "3:07:37", "throughput": 8768.21, "total_tokens": 39259104} +{"current_steps": 58245, "total_steps": 204665, "loss": 0.1021, "lr": 1.7995165809062644e-06, "epoch": 1.4229350401876237, "percentage": 28.46, "elapsed_time": "1:14:37", "remaining_time": "3:07:36", "throughput": 8768.26, "total_tokens": 39262368} +{"current_steps": 58250, "total_steps": 204665, "loss": 0.1678, "lr": 1.7994653565191478e-06, "epoch": 1.423057191019471, "percentage": 28.46, "elapsed_time": "1:14:38", "remaining_time": "3:07:36", "throughput": 8768.38, "total_tokens": 39266016} +{"current_steps": 58255, "total_steps": 204665, "loss": 0.2146, "lr": 1.799414126318104e-06, "epoch": 1.4231793418513181, "percentage": 28.46, "elapsed_time": "1:14:38", "remaining_time": "3:07:35", "throughput": 8768.47, "total_tokens": 39269472} +{"current_steps": 58260, "total_steps": 204665, "loss": 0.0499, "lr": 1.7993628903035058e-06, "epoch": 1.4233014926831653, "percentage": 28.47, "elapsed_time": "1:14:38", "remaining_time": "3:07:35", "throughput": 8768.54, "total_tokens": 39272800} +{"current_steps": 58265, "total_steps": 204665, "loss": 0.1058, "lr": 1.7993116484757259e-06, "epoch": 1.4234236435150123, "percentage": 28.47, "elapsed_time": "1:14:39", "remaining_time": "3:07:34", "throughput": 8768.64, "total_tokens": 39276320} +{"current_steps": 58270, "total_steps": 204665, "loss": 0.0026, "lr": 1.7992604008351364e-06, "epoch": 1.4235457943468595, "percentage": 28.47, "elapsed_time": "1:14:39", "remaining_time": "3:07:34", "throughput": 8768.72, "total_tokens": 39279776} +{"current_steps": 58275, "total_steps": 204665, "loss": 0.0919, "lr": 1.7992091473821102e-06, "epoch": 1.4236679451787067, "percentage": 28.47, "elapsed_time": "1:14:39", "remaining_time": "3:07:33", "throughput": 8768.83, "total_tokens": 39283360} +{"current_steps": 58280, "total_steps": 204665, "loss": 0.2179, "lr": 1.7991578881170203e-06, "epoch": 1.4237900960105538, "percentage": 28.48, "elapsed_time": "1:14:40", "remaining_time": "3:07:33", "throughput": 8768.93, "total_tokens": 39286880} +{"current_steps": 58285, "total_steps": 204665, "loss": 0.1943, "lr": 1.7991066230402392e-06, "epoch": 1.423912246842401, "percentage": 28.48, "elapsed_time": "1:14:40", "remaining_time": "3:07:32", "throughput": 8768.96, "total_tokens": 39290016} +{"current_steps": 58290, "total_steps": 204665, "loss": 0.0399, "lr": 1.79905535215214e-06, "epoch": 1.4240343976742482, "percentage": 28.48, "elapsed_time": "1:14:40", "remaining_time": "3:07:32", "throughput": 8769.04, "total_tokens": 39293408} +{"current_steps": 58295, "total_steps": 204665, "loss": 0.1184, "lr": 1.799004075453095e-06, "epoch": 1.4241565485060952, "percentage": 28.48, "elapsed_time": "1:14:41", "remaining_time": "3:07:31", "throughput": 8769.15, "total_tokens": 39297056} +{"current_steps": 58300, "total_steps": 204665, "loss": 0.0444, "lr": 1.7989527929434777e-06, "epoch": 1.4242786993379424, "percentage": 28.49, "elapsed_time": "1:14:41", "remaining_time": "3:07:31", "throughput": 8769.17, "total_tokens": 39300128} +{"current_steps": 58305, "total_steps": 204665, "loss": 0.1591, "lr": 1.7989015046236608e-06, "epoch": 1.4244008501697896, "percentage": 28.49, "elapsed_time": "1:14:41", "remaining_time": "3:07:30", "throughput": 8769.23, "total_tokens": 39303456} +{"current_steps": 58310, "total_steps": 204665, "loss": 0.101, "lr": 1.798850210494017e-06, "epoch": 1.4245230010016368, "percentage": 28.49, "elapsed_time": "1:14:42", "remaining_time": "3:07:30", "throughput": 8769.31, "total_tokens": 39306848} +{"current_steps": 58315, "total_steps": 204665, "loss": 0.0354, "lr": 1.79879891055492e-06, "epoch": 1.424645151833484, "percentage": 28.49, "elapsed_time": "1:14:42", "remaining_time": "3:07:29", "throughput": 8769.34, "total_tokens": 39309984} +{"current_steps": 58320, "total_steps": 204665, "loss": 0.0058, "lr": 1.7987476048067425e-06, "epoch": 1.4247673026653311, "percentage": 28.5, "elapsed_time": "1:14:43", "remaining_time": "3:07:29", "throughput": 8769.37, "total_tokens": 39313120} +{"current_steps": 58325, "total_steps": 204665, "loss": 0.113, "lr": 1.7986962932498572e-06, "epoch": 1.4248894534971783, "percentage": 28.5, "elapsed_time": "1:14:43", "remaining_time": "3:07:28", "throughput": 8769.44, "total_tokens": 39316512} +{"current_steps": 58330, "total_steps": 204665, "loss": 0.1018, "lr": 1.7986449758846378e-06, "epoch": 1.4250116043290255, "percentage": 28.5, "elapsed_time": "1:14:43", "remaining_time": "3:07:28", "throughput": 8769.48, "total_tokens": 39319712} +{"current_steps": 58335, "total_steps": 204665, "loss": 0.1428, "lr": 1.7985936527114576e-06, "epoch": 1.4251337551608727, "percentage": 28.5, "elapsed_time": "1:14:44", "remaining_time": "3:07:27", "throughput": 8769.52, "total_tokens": 39322912} +{"current_steps": 58340, "total_steps": 204665, "loss": 0.0386, "lr": 1.798542323730689e-06, "epoch": 1.42525590599272, "percentage": 28.51, "elapsed_time": "1:14:44", "remaining_time": "3:07:27", "throughput": 8769.6, "total_tokens": 39326304} +{"current_steps": 58345, "total_steps": 204665, "loss": 0.1189, "lr": 1.7984909889427065e-06, "epoch": 1.425378056824567, "percentage": 28.51, "elapsed_time": "1:14:44", "remaining_time": "3:07:27", "throughput": 8769.67, "total_tokens": 39329696} +{"current_steps": 58350, "total_steps": 204665, "loss": 0.0391, "lr": 1.798439648347882e-06, "epoch": 1.4255002076564143, "percentage": 28.51, "elapsed_time": "1:14:45", "remaining_time": "3:07:26", "throughput": 8769.72, "total_tokens": 39332896} +{"current_steps": 58355, "total_steps": 204665, "loss": 0.0033, "lr": 1.7983883019465905e-06, "epoch": 1.4256223584882612, "percentage": 28.51, "elapsed_time": "1:14:45", "remaining_time": "3:07:26", "throughput": 8769.82, "total_tokens": 39336416} +{"current_steps": 58360, "total_steps": 204665, "loss": 0.1611, "lr": 1.7983369497392038e-06, "epoch": 1.4257445093201084, "percentage": 28.51, "elapsed_time": "1:14:45", "remaining_time": "3:07:25", "throughput": 8769.85, "total_tokens": 39339552} +{"current_steps": 58365, "total_steps": 204665, "loss": 0.0557, "lr": 1.7982855917260965e-06, "epoch": 1.4258666601519556, "percentage": 28.52, "elapsed_time": "1:14:46", "remaining_time": "3:07:25", "throughput": 8769.93, "total_tokens": 39343008} +{"current_steps": 58370, "total_steps": 204665, "loss": 0.0352, "lr": 1.7982342279076415e-06, "epoch": 1.4259888109838028, "percentage": 28.52, "elapsed_time": "1:14:46", "remaining_time": "3:07:24", "throughput": 8770.09, "total_tokens": 39346848} +{"current_steps": 58375, "total_steps": 204665, "loss": 0.0015, "lr": 1.7981828582842122e-06, "epoch": 1.42611096181565, "percentage": 28.52, "elapsed_time": "1:14:46", "remaining_time": "3:07:24", "throughput": 8770.41, "total_tokens": 39351584} +{"current_steps": 58380, "total_steps": 204665, "loss": 0.1958, "lr": 1.7981314828561829e-06, "epoch": 1.4262331126474972, "percentage": 28.52, "elapsed_time": "1:14:47", "remaining_time": "3:07:23", "throughput": 8770.48, "total_tokens": 39354976} +{"current_steps": 58385, "total_steps": 204665, "loss": 0.0402, "lr": 1.7980801016239267e-06, "epoch": 1.4263552634793442, "percentage": 28.53, "elapsed_time": "1:14:47", "remaining_time": "3:07:23", "throughput": 8770.49, "total_tokens": 39357984} +{"current_steps": 58390, "total_steps": 204665, "loss": 0.1442, "lr": 1.7980287145878173e-06, "epoch": 1.4264774143111913, "percentage": 28.53, "elapsed_time": "1:14:47", "remaining_time": "3:07:22", "throughput": 8770.58, "total_tokens": 39361440} +{"current_steps": 58395, "total_steps": 204665, "loss": 0.1693, "lr": 1.7979773217482284e-06, "epoch": 1.4265995651430385, "percentage": 28.53, "elapsed_time": "1:14:48", "remaining_time": "3:07:22", "throughput": 8770.55, "total_tokens": 39364256} +{"current_steps": 58400, "total_steps": 204665, "loss": 0.0666, "lr": 1.7979259231055338e-06, "epoch": 1.4267217159748857, "percentage": 28.53, "elapsed_time": "1:14:48", "remaining_time": "3:07:21", "throughput": 8770.64, "total_tokens": 39367712} +{"current_steps": 58405, "total_steps": 204665, "loss": 0.1043, "lr": 1.7978745186601075e-06, "epoch": 1.426843866806733, "percentage": 28.54, "elapsed_time": "1:14:48", "remaining_time": "3:07:21", "throughput": 8770.64, "total_tokens": 39370656} +{"current_steps": 58410, "total_steps": 204665, "loss": 0.0842, "lr": 1.7978231084123229e-06, "epoch": 1.42696601763858, "percentage": 28.54, "elapsed_time": "1:14:49", "remaining_time": "3:07:20", "throughput": 8770.71, "total_tokens": 39374048} +{"current_steps": 58415, "total_steps": 204665, "loss": 0.0402, "lr": 1.7977716923625538e-06, "epoch": 1.4270881684704273, "percentage": 28.54, "elapsed_time": "1:14:49", "remaining_time": "3:07:20", "throughput": 8770.81, "total_tokens": 39377568} +{"current_steps": 58420, "total_steps": 204665, "loss": 0.0249, "lr": 1.7977202705111746e-06, "epoch": 1.4272103193022745, "percentage": 28.54, "elapsed_time": "1:14:49", "remaining_time": "3:07:19", "throughput": 8770.94, "total_tokens": 39381280} +{"current_steps": 58425, "total_steps": 204665, "loss": 0.2239, "lr": 1.7976688428585592e-06, "epoch": 1.4273324701341217, "percentage": 28.55, "elapsed_time": "1:14:50", "remaining_time": "3:07:19", "throughput": 8771.0, "total_tokens": 39384608} +{"current_steps": 58430, "total_steps": 204665, "loss": 0.0534, "lr": 1.7976174094050813e-06, "epoch": 1.4274546209659689, "percentage": 28.55, "elapsed_time": "1:14:50", "remaining_time": "3:07:18", "throughput": 8771.1, "total_tokens": 39388192} +{"current_steps": 58435, "total_steps": 204665, "loss": 0.103, "lr": 1.797565970151115e-06, "epoch": 1.427576771797816, "percentage": 28.55, "elapsed_time": "1:14:51", "remaining_time": "3:07:18", "throughput": 8771.16, "total_tokens": 39391520} +{"current_steps": 58440, "total_steps": 204665, "loss": 0.1048, "lr": 1.7975145250970346e-06, "epoch": 1.427698922629663, "percentage": 28.55, "elapsed_time": "1:14:51", "remaining_time": "3:07:18", "throughput": 8771.21, "total_tokens": 39394784} +{"current_steps": 58445, "total_steps": 204665, "loss": 0.1401, "lr": 1.797463074243214e-06, "epoch": 1.4278210734615102, "percentage": 28.56, "elapsed_time": "1:14:51", "remaining_time": "3:07:17", "throughput": 8771.26, "total_tokens": 39398048} +{"current_steps": 58450, "total_steps": 204665, "loss": 0.0479, "lr": 1.7974116175900273e-06, "epoch": 1.4279432242933574, "percentage": 28.56, "elapsed_time": "1:14:52", "remaining_time": "3:07:17", "throughput": 8771.35, "total_tokens": 39401568} +{"current_steps": 58455, "total_steps": 204665, "loss": 0.0029, "lr": 1.797360155137849e-06, "epoch": 1.4280653751252046, "percentage": 28.56, "elapsed_time": "1:14:52", "remaining_time": "3:07:16", "throughput": 8771.36, "total_tokens": 39404576} +{"current_steps": 58460, "total_steps": 204665, "loss": 0.1259, "lr": 1.797308686887053e-06, "epoch": 1.4281875259570518, "percentage": 28.56, "elapsed_time": "1:14:52", "remaining_time": "3:07:16", "throughput": 8771.39, "total_tokens": 39407712} +{"current_steps": 58465, "total_steps": 204665, "loss": 0.0671, "lr": 1.797257212838014e-06, "epoch": 1.428309676788899, "percentage": 28.57, "elapsed_time": "1:14:53", "remaining_time": "3:07:15", "throughput": 8771.44, "total_tokens": 39410976} +{"current_steps": 58470, "total_steps": 204665, "loss": 0.0049, "lr": 1.797205732991106e-06, "epoch": 1.4284318276207462, "percentage": 28.57, "elapsed_time": "1:14:53", "remaining_time": "3:07:15", "throughput": 8771.52, "total_tokens": 39414432} +{"current_steps": 58475, "total_steps": 204665, "loss": 0.1524, "lr": 1.7971542473467036e-06, "epoch": 1.4285539784525931, "percentage": 28.57, "elapsed_time": "1:14:53", "remaining_time": "3:07:14", "throughput": 8771.58, "total_tokens": 39417760} +{"current_steps": 58480, "total_steps": 204665, "loss": 0.097, "lr": 1.797102755905181e-06, "epoch": 1.4286761292844403, "percentage": 28.57, "elapsed_time": "1:14:54", "remaining_time": "3:07:14", "throughput": 8771.62, "total_tokens": 39420960} +{"current_steps": 58485, "total_steps": 204665, "loss": 0.0374, "lr": 1.7970512586669128e-06, "epoch": 1.4287982801162875, "percentage": 28.58, "elapsed_time": "1:14:54", "remaining_time": "3:07:13", "throughput": 8771.67, "total_tokens": 39424224} +{"current_steps": 58490, "total_steps": 204665, "loss": 0.0026, "lr": 1.7969997556322736e-06, "epoch": 1.4289204309481347, "percentage": 28.58, "elapsed_time": "1:14:54", "remaining_time": "3:07:13", "throughput": 8771.79, "total_tokens": 39427872} +{"current_steps": 58495, "total_steps": 204665, "loss": 0.132, "lr": 1.7969482468016377e-06, "epoch": 1.429042581779982, "percentage": 28.58, "elapsed_time": "1:14:55", "remaining_time": "3:07:12", "throughput": 8771.91, "total_tokens": 39431520} +{"current_steps": 58500, "total_steps": 204665, "loss": 0.0397, "lr": 1.7968967321753796e-06, "epoch": 1.429164732611829, "percentage": 28.58, "elapsed_time": "1:14:55", "remaining_time": "3:07:12", "throughput": 8772.1, "total_tokens": 39435552} +{"current_steps": 58505, "total_steps": 204665, "loss": 0.0854, "lr": 1.7968452117538742e-06, "epoch": 1.4292868834436763, "percentage": 28.59, "elapsed_time": "1:14:55", "remaining_time": "3:07:11", "throughput": 8772.22, "total_tokens": 39439200} +{"current_steps": 58510, "total_steps": 204665, "loss": 0.0433, "lr": 1.7967936855374964e-06, "epoch": 1.4294090342755235, "percentage": 28.59, "elapsed_time": "1:14:56", "remaining_time": "3:07:11", "throughput": 8772.26, "total_tokens": 39442400} +{"current_steps": 58515, "total_steps": 204665, "loss": 0.1447, "lr": 1.7967421535266203e-06, "epoch": 1.4295311851073706, "percentage": 28.59, "elapsed_time": "1:14:56", "remaining_time": "3:07:10", "throughput": 8772.3, "total_tokens": 39445600} +{"current_steps": 58520, "total_steps": 204665, "loss": 0.0557, "lr": 1.796690615721621e-06, "epoch": 1.4296533359392178, "percentage": 28.59, "elapsed_time": "1:14:56", "remaining_time": "3:07:10", "throughput": 8772.39, "total_tokens": 39449056} +{"current_steps": 58525, "total_steps": 204665, "loss": 0.0828, "lr": 1.7966390721228733e-06, "epoch": 1.429775486771065, "percentage": 28.6, "elapsed_time": "1:14:57", "remaining_time": "3:07:10", "throughput": 8772.52, "total_tokens": 39452768} +{"current_steps": 58530, "total_steps": 204665, "loss": 0.1749, "lr": 1.7965875227307522e-06, "epoch": 1.429897637602912, "percentage": 28.6, "elapsed_time": "1:14:57", "remaining_time": "3:07:09", "throughput": 8772.63, "total_tokens": 39456352} +{"current_steps": 58535, "total_steps": 204665, "loss": 0.1555, "lr": 1.796535967545632e-06, "epoch": 1.4300197884347592, "percentage": 28.6, "elapsed_time": "1:14:58", "remaining_time": "3:07:09", "throughput": 8772.63, "total_tokens": 39459360} +{"current_steps": 58540, "total_steps": 204665, "loss": 0.0359, "lr": 1.7964844065678882e-06, "epoch": 1.4301419392666064, "percentage": 28.6, "elapsed_time": "1:14:58", "remaining_time": "3:07:08", "throughput": 8772.75, "total_tokens": 39462944} +{"current_steps": 58545, "total_steps": 204665, "loss": 0.0913, "lr": 1.7964328397978954e-06, "epoch": 1.4302640900984536, "percentage": 28.61, "elapsed_time": "1:14:58", "remaining_time": "3:07:08", "throughput": 8772.82, "total_tokens": 39466336} +{"current_steps": 58550, "total_steps": 204665, "loss": 0.0912, "lr": 1.796381267236029e-06, "epoch": 1.4303862409303008, "percentage": 28.61, "elapsed_time": "1:14:59", "remaining_time": "3:07:07", "throughput": 8772.88, "total_tokens": 39469664} +{"current_steps": 58555, "total_steps": 204665, "loss": 0.1042, "lr": 1.7963296888826638e-06, "epoch": 1.430508391762148, "percentage": 28.61, "elapsed_time": "1:14:59", "remaining_time": "3:07:07", "throughput": 8772.96, "total_tokens": 39473056} +{"current_steps": 58560, "total_steps": 204665, "loss": 0.0864, "lr": 1.796278104738175e-06, "epoch": 1.4306305425939951, "percentage": 28.61, "elapsed_time": "1:14:59", "remaining_time": "3:07:06", "throughput": 8773.02, "total_tokens": 39476384} +{"current_steps": 58565, "total_steps": 204665, "loss": 0.0593, "lr": 1.7962265148029374e-06, "epoch": 1.430752693425842, "percentage": 28.62, "elapsed_time": "1:15:00", "remaining_time": "3:07:06", "throughput": 8773.05, "total_tokens": 39479520} +{"current_steps": 58570, "total_steps": 204665, "loss": 0.1574, "lr": 1.7961749190773263e-06, "epoch": 1.4308748442576893, "percentage": 28.62, "elapsed_time": "1:15:00", "remaining_time": "3:07:05", "throughput": 8773.15, "total_tokens": 39483040} +{"current_steps": 58575, "total_steps": 204665, "loss": 0.1276, "lr": 1.7961233175617173e-06, "epoch": 1.4309969950895365, "percentage": 28.62, "elapsed_time": "1:15:00", "remaining_time": "3:07:05", "throughput": 8773.25, "total_tokens": 39486560} +{"current_steps": 58580, "total_steps": 204665, "loss": 0.0025, "lr": 1.7960717102564855e-06, "epoch": 1.4311191459213837, "percentage": 28.62, "elapsed_time": "1:15:01", "remaining_time": "3:07:04", "throughput": 8773.32, "total_tokens": 39489888} +{"current_steps": 58585, "total_steps": 204665, "loss": 0.1429, "lr": 1.796020097162006e-06, "epoch": 1.4312412967532309, "percentage": 28.62, "elapsed_time": "1:15:01", "remaining_time": "3:07:04", "throughput": 8773.4, "total_tokens": 39493344} +{"current_steps": 58590, "total_steps": 204665, "loss": 0.0477, "lr": 1.7959684782786542e-06, "epoch": 1.431363447585078, "percentage": 28.63, "elapsed_time": "1:15:01", "remaining_time": "3:07:03", "throughput": 8773.46, "total_tokens": 39496608} +{"current_steps": 58595, "total_steps": 204665, "loss": 0.0153, "lr": 1.7959168536068056e-06, "epoch": 1.4314855984169252, "percentage": 28.63, "elapsed_time": "1:15:02", "remaining_time": "3:07:03", "throughput": 8773.45, "total_tokens": 39499552} +{"current_steps": 58600, "total_steps": 204665, "loss": 0.1024, "lr": 1.7958652231468357e-06, "epoch": 1.4316077492487724, "percentage": 28.63, "elapsed_time": "1:15:02", "remaining_time": "3:07:02", "throughput": 8773.54, "total_tokens": 39503008} +{"current_steps": 58605, "total_steps": 204665, "loss": 0.0562, "lr": 1.7958135868991195e-06, "epoch": 1.4317299000806196, "percentage": 28.63, "elapsed_time": "1:15:02", "remaining_time": "3:07:02", "throughput": 8773.61, "total_tokens": 39506400} +{"current_steps": 58610, "total_steps": 204665, "loss": 0.1308, "lr": 1.7957619448640332e-06, "epoch": 1.4318520509124668, "percentage": 28.64, "elapsed_time": "1:15:03", "remaining_time": "3:07:01", "throughput": 8773.77, "total_tokens": 39510240} +{"current_steps": 58615, "total_steps": 204665, "loss": 0.0285, "lr": 1.7957102970419516e-06, "epoch": 1.431974201744314, "percentage": 28.64, "elapsed_time": "1:15:03", "remaining_time": "3:07:01", "throughput": 8773.82, "total_tokens": 39513504} +{"current_steps": 58620, "total_steps": 204665, "loss": 0.0449, "lr": 1.795658643433251e-06, "epoch": 1.432096352576161, "percentage": 28.64, "elapsed_time": "1:15:03", "remaining_time": "3:07:00", "throughput": 8773.91, "total_tokens": 39516960} +{"current_steps": 58625, "total_steps": 204665, "loss": 0.0842, "lr": 1.7956069840383066e-06, "epoch": 1.4322185034080082, "percentage": 28.64, "elapsed_time": "1:15:04", "remaining_time": "3:07:00", "throughput": 8774.01, "total_tokens": 39520544} +{"current_steps": 58630, "total_steps": 204665, "loss": 0.0956, "lr": 1.7955553188574944e-06, "epoch": 1.4323406542398553, "percentage": 28.65, "elapsed_time": "1:15:04", "remaining_time": "3:07:00", "throughput": 8774.14, "total_tokens": 39524256} +{"current_steps": 58635, "total_steps": 204665, "loss": 0.1319, "lr": 1.7955036478911896e-06, "epoch": 1.4324628050717025, "percentage": 28.65, "elapsed_time": "1:15:04", "remaining_time": "3:06:59", "throughput": 8774.29, "total_tokens": 39528032} +{"current_steps": 58640, "total_steps": 204665, "loss": 0.0841, "lr": 1.7954519711397689e-06, "epoch": 1.4325849559035497, "percentage": 28.65, "elapsed_time": "1:15:05", "remaining_time": "3:06:59", "throughput": 8774.35, "total_tokens": 39531424} +{"current_steps": 58645, "total_steps": 204665, "loss": 0.0688, "lr": 1.795400288603607e-06, "epoch": 1.432707106735397, "percentage": 28.65, "elapsed_time": "1:15:05", "remaining_time": "3:06:58", "throughput": 8774.5, "total_tokens": 39535200} +{"current_steps": 58650, "total_steps": 204665, "loss": 0.0019, "lr": 1.7953486002830802e-06, "epoch": 1.432829257567244, "percentage": 28.66, "elapsed_time": "1:15:06", "remaining_time": "3:06:58", "throughput": 8774.6, "total_tokens": 39538720} +{"current_steps": 58655, "total_steps": 204665, "loss": 0.008, "lr": 1.7952969061785647e-06, "epoch": 1.432951408399091, "percentage": 28.66, "elapsed_time": "1:15:06", "remaining_time": "3:06:57", "throughput": 8774.64, "total_tokens": 39541920} +{"current_steps": 58660, "total_steps": 204665, "loss": 0.0869, "lr": 1.7952452062904362e-06, "epoch": 1.4330735592309383, "percentage": 28.66, "elapsed_time": "1:15:06", "remaining_time": "3:06:57", "throughput": 8774.72, "total_tokens": 39545312} +{"current_steps": 58665, "total_steps": 204665, "loss": 0.0162, "lr": 1.7951935006190709e-06, "epoch": 1.4331957100627855, "percentage": 28.66, "elapsed_time": "1:15:07", "remaining_time": "3:06:56", "throughput": 8774.74, "total_tokens": 39548384} +{"current_steps": 58670, "total_steps": 204665, "loss": 0.0794, "lr": 1.795141789164844e-06, "epoch": 1.4333178608946326, "percentage": 28.67, "elapsed_time": "1:15:07", "remaining_time": "3:06:56", "throughput": 8774.87, "total_tokens": 39552096} +{"current_steps": 58675, "total_steps": 204665, "loss": 0.0993, "lr": 1.7950900719281326e-06, "epoch": 1.4334400117264798, "percentage": 28.67, "elapsed_time": "1:15:07", "remaining_time": "3:06:55", "throughput": 8774.99, "total_tokens": 39555744} +{"current_steps": 58680, "total_steps": 204665, "loss": 0.1404, "lr": 1.7950383489093118e-06, "epoch": 1.433562162558327, "percentage": 28.67, "elapsed_time": "1:15:08", "remaining_time": "3:06:55", "throughput": 8775.07, "total_tokens": 39559200} +{"current_steps": 58685, "total_steps": 204665, "loss": 0.0497, "lr": 1.7949866201087592e-06, "epoch": 1.4336843133901742, "percentage": 28.67, "elapsed_time": "1:15:08", "remaining_time": "3:06:54", "throughput": 8775.15, "total_tokens": 39562592} +{"current_steps": 58690, "total_steps": 204665, "loss": 0.0657, "lr": 1.7949348855268494e-06, "epoch": 1.4338064642220214, "percentage": 28.68, "elapsed_time": "1:15:08", "remaining_time": "3:06:54", "throughput": 8775.18, "total_tokens": 39565728} +{"current_steps": 58695, "total_steps": 204665, "loss": 0.0124, "lr": 1.7948831451639594e-06, "epoch": 1.4339286150538686, "percentage": 28.68, "elapsed_time": "1:15:09", "remaining_time": "3:06:53", "throughput": 8775.25, "total_tokens": 39569056} +{"current_steps": 58700, "total_steps": 204665, "loss": 0.0726, "lr": 1.7948313990204654e-06, "epoch": 1.4340507658857158, "percentage": 28.68, "elapsed_time": "1:15:09", "remaining_time": "3:06:53", "throughput": 8775.24, "total_tokens": 39572000} +{"current_steps": 58705, "total_steps": 204665, "loss": 0.0827, "lr": 1.7947796470967438e-06, "epoch": 1.434172916717563, "percentage": 28.68, "elapsed_time": "1:15:09", "remaining_time": "3:06:52", "throughput": 8775.31, "total_tokens": 39575328} +{"current_steps": 58710, "total_steps": 204665, "loss": 0.085, "lr": 1.7947278893931705e-06, "epoch": 1.43429506754941, "percentage": 28.69, "elapsed_time": "1:15:10", "remaining_time": "3:06:52", "throughput": 8775.37, "total_tokens": 39578656} +{"current_steps": 58715, "total_steps": 204665, "loss": 0.0675, "lr": 1.7946761259101226e-06, "epoch": 1.4344172183812571, "percentage": 28.69, "elapsed_time": "1:15:10", "remaining_time": "3:06:52", "throughput": 8775.47, "total_tokens": 39582176} +{"current_steps": 58720, "total_steps": 204665, "loss": 0.0015, "lr": 1.7946243566479762e-06, "epoch": 1.4345393692131043, "percentage": 28.69, "elapsed_time": "1:15:10", "remaining_time": "3:06:51", "throughput": 8775.59, "total_tokens": 39585824} +{"current_steps": 58725, "total_steps": 204665, "loss": 0.0597, "lr": 1.7945725816071074e-06, "epoch": 1.4346615200449515, "percentage": 28.69, "elapsed_time": "1:15:11", "remaining_time": "3:06:51", "throughput": 8775.56, "total_tokens": 39588640} +{"current_steps": 58730, "total_steps": 204665, "loss": 0.1699, "lr": 1.794520800787893e-06, "epoch": 1.4347836708767987, "percentage": 28.7, "elapsed_time": "1:15:11", "remaining_time": "3:06:50", "throughput": 8775.6, "total_tokens": 39591840} +{"current_steps": 58735, "total_steps": 204665, "loss": 0.0763, "lr": 1.79446901419071e-06, "epoch": 1.434905821708646, "percentage": 28.7, "elapsed_time": "1:15:11", "remaining_time": "3:06:50", "throughput": 8775.68, "total_tokens": 39595232} +{"current_steps": 58740, "total_steps": 204665, "loss": 0.0246, "lr": 1.7944172218159348e-06, "epoch": 1.435027972540493, "percentage": 28.7, "elapsed_time": "1:15:12", "remaining_time": "3:06:49", "throughput": 8775.74, "total_tokens": 39598560} +{"current_steps": 58745, "total_steps": 204665, "loss": 0.1222, "lr": 1.7943654236639436e-06, "epoch": 1.43515012337234, "percentage": 28.7, "elapsed_time": "1:15:12", "remaining_time": "3:06:49", "throughput": 8775.76, "total_tokens": 39601632} +{"current_steps": 58750, "total_steps": 204665, "loss": 0.0024, "lr": 1.7943136197351135e-06, "epoch": 1.4352722742041872, "percentage": 28.71, "elapsed_time": "1:15:12", "remaining_time": "3:06:48", "throughput": 8775.92, "total_tokens": 39605472} +{"current_steps": 58755, "total_steps": 204665, "loss": 0.1361, "lr": 1.794261810029821e-06, "epoch": 1.4353944250360344, "percentage": 28.71, "elapsed_time": "1:15:13", "remaining_time": "3:06:48", "throughput": 8776.02, "total_tokens": 39609056} +{"current_steps": 58760, "total_steps": 204665, "loss": 0.0786, "lr": 1.794209994548443e-06, "epoch": 1.4355165758678816, "percentage": 28.71, "elapsed_time": "1:15:13", "remaining_time": "3:06:47", "throughput": 8776.01, "total_tokens": 39611936} +{"current_steps": 58765, "total_steps": 204665, "loss": 0.1309, "lr": 1.7941581732913562e-06, "epoch": 1.4356387266997288, "percentage": 28.71, "elapsed_time": "1:15:14", "remaining_time": "3:06:47", "throughput": 8776.12, "total_tokens": 39615520} +{"current_steps": 58770, "total_steps": 204665, "loss": 0.0376, "lr": 1.7941063462589376e-06, "epoch": 1.435760877531576, "percentage": 28.72, "elapsed_time": "1:15:14", "remaining_time": "3:06:46", "throughput": 8776.26, "total_tokens": 39619296} +{"current_steps": 58775, "total_steps": 204665, "loss": 0.1143, "lr": 1.7940545134515642e-06, "epoch": 1.4358830283634232, "percentage": 28.72, "elapsed_time": "1:15:14", "remaining_time": "3:06:46", "throughput": 8776.32, "total_tokens": 39622560} +{"current_steps": 58780, "total_steps": 204665, "loss": 0.0009, "lr": 1.7940026748696128e-06, "epoch": 1.4360051791952704, "percentage": 28.72, "elapsed_time": "1:15:15", "remaining_time": "3:06:45", "throughput": 8776.39, "total_tokens": 39625888} +{"current_steps": 58785, "total_steps": 204665, "loss": 0.0466, "lr": 1.7939508305134604e-06, "epoch": 1.4361273300271176, "percentage": 28.72, "elapsed_time": "1:15:15", "remaining_time": "3:06:45", "throughput": 8776.58, "total_tokens": 39629984} +{"current_steps": 58790, "total_steps": 204665, "loss": 0.1889, "lr": 1.7938989803834838e-06, "epoch": 1.4362494808589648, "percentage": 28.72, "elapsed_time": "1:15:15", "remaining_time": "3:06:44", "throughput": 8776.64, "total_tokens": 39633312} +{"current_steps": 58795, "total_steps": 204665, "loss": 0.118, "lr": 1.7938471244800603e-06, "epoch": 1.436371631690812, "percentage": 28.73, "elapsed_time": "1:15:16", "remaining_time": "3:06:44", "throughput": 8776.71, "total_tokens": 39636640} +{"current_steps": 58800, "total_steps": 204665, "loss": 0.0903, "lr": 1.7937952628035673e-06, "epoch": 1.436493782522659, "percentage": 28.73, "elapsed_time": "1:15:16", "remaining_time": "3:06:43", "throughput": 8776.76, "total_tokens": 39639968} +{"current_steps": 58805, "total_steps": 204665, "loss": 0.0207, "lr": 1.7937433953543815e-06, "epoch": 1.436615933354506, "percentage": 28.73, "elapsed_time": "1:15:16", "remaining_time": "3:06:43", "throughput": 8776.83, "total_tokens": 39643296} +{"current_steps": 58810, "total_steps": 204665, "loss": 0.1242, "lr": 1.79369152213288e-06, "epoch": 1.4367380841863533, "percentage": 28.73, "elapsed_time": "1:15:17", "remaining_time": "3:06:43", "throughput": 8776.96, "total_tokens": 39647008} +{"current_steps": 58815, "total_steps": 204665, "loss": 0.0988, "lr": 1.7936396431394405e-06, "epoch": 1.4368602350182005, "percentage": 28.74, "elapsed_time": "1:15:17", "remaining_time": "3:06:42", "throughput": 8777.05, "total_tokens": 39650464} +{"current_steps": 58820, "total_steps": 204665, "loss": 0.064, "lr": 1.7935877583744402e-06, "epoch": 1.4369823858500477, "percentage": 28.74, "elapsed_time": "1:15:17", "remaining_time": "3:06:42", "throughput": 8777.13, "total_tokens": 39653856} +{"current_steps": 58825, "total_steps": 204665, "loss": 0.0035, "lr": 1.7935358678382563e-06, "epoch": 1.4371045366818949, "percentage": 28.74, "elapsed_time": "1:15:18", "remaining_time": "3:06:41", "throughput": 8777.19, "total_tokens": 39657184} +{"current_steps": 58830, "total_steps": 204665, "loss": 0.0999, "lr": 1.7934839715312657e-06, "epoch": 1.437226687513742, "percentage": 28.74, "elapsed_time": "1:15:18", "remaining_time": "3:06:41", "throughput": 8777.23, "total_tokens": 39660384} +{"current_steps": 58835, "total_steps": 204665, "loss": 0.1024, "lr": 1.7934320694538462e-06, "epoch": 1.437348838345589, "percentage": 28.75, "elapsed_time": "1:15:18", "remaining_time": "3:06:40", "throughput": 8777.31, "total_tokens": 39663840} +{"current_steps": 58840, "total_steps": 204665, "loss": 0.0798, "lr": 1.7933801616063756e-06, "epoch": 1.4374709891774362, "percentage": 28.75, "elapsed_time": "1:15:19", "remaining_time": "3:06:40", "throughput": 8777.33, "total_tokens": 39666912} +{"current_steps": 58845, "total_steps": 204665, "loss": 0.0659, "lr": 1.793328247989231e-06, "epoch": 1.4375931400092834, "percentage": 28.75, "elapsed_time": "1:15:19", "remaining_time": "3:06:39", "throughput": 8777.38, "total_tokens": 39670176} +{"current_steps": 58850, "total_steps": 204665, "loss": 0.0839, "lr": 1.7932763286027903e-06, "epoch": 1.4377152908411306, "percentage": 28.75, "elapsed_time": "1:15:19", "remaining_time": "3:06:39", "throughput": 8777.4, "total_tokens": 39673248} +{"current_steps": 58855, "total_steps": 204665, "loss": 0.0521, "lr": 1.7932244034474305e-06, "epoch": 1.4378374416729778, "percentage": 28.76, "elapsed_time": "1:15:20", "remaining_time": "3:06:38", "throughput": 8777.54, "total_tokens": 39677024} +{"current_steps": 58860, "total_steps": 204665, "loss": 0.0915, "lr": 1.7931724725235294e-06, "epoch": 1.437959592504825, "percentage": 28.76, "elapsed_time": "1:15:20", "remaining_time": "3:06:38", "throughput": 8777.55, "total_tokens": 39680032} +{"current_steps": 58865, "total_steps": 204665, "loss": 0.0043, "lr": 1.7931205358314648e-06, "epoch": 1.4380817433366722, "percentage": 28.76, "elapsed_time": "1:15:20", "remaining_time": "3:06:37", "throughput": 8777.62, "total_tokens": 39683424} +{"current_steps": 58870, "total_steps": 204665, "loss": 0.0966, "lr": 1.7930685933716142e-06, "epoch": 1.4382038941685193, "percentage": 28.76, "elapsed_time": "1:15:21", "remaining_time": "3:06:37", "throughput": 8777.69, "total_tokens": 39686816} +{"current_steps": 58875, "total_steps": 204665, "loss": 0.1053, "lr": 1.7930166451443558e-06, "epoch": 1.4383260450003665, "percentage": 28.77, "elapsed_time": "1:15:21", "remaining_time": "3:06:36", "throughput": 8777.73, "total_tokens": 39690016} +{"current_steps": 58880, "total_steps": 204665, "loss": 0.1942, "lr": 1.7929646911500669e-06, "epoch": 1.4384481958322137, "percentage": 28.77, "elapsed_time": "1:15:22", "remaining_time": "3:06:36", "throughput": 8777.87, "total_tokens": 39693792} +{"current_steps": 58885, "total_steps": 204665, "loss": 0.069, "lr": 1.7929127313891254e-06, "epoch": 1.438570346664061, "percentage": 28.77, "elapsed_time": "1:15:22", "remaining_time": "3:06:35", "throughput": 8777.91, "total_tokens": 39696992} +{"current_steps": 58890, "total_steps": 204665, "loss": 0.1596, "lr": 1.7928607658619095e-06, "epoch": 1.4386924974959079, "percentage": 28.77, "elapsed_time": "1:15:22", "remaining_time": "3:06:35", "throughput": 8777.95, "total_tokens": 39700192} +{"current_steps": 58895, "total_steps": 204665, "loss": 0.0381, "lr": 1.7928087945687963e-06, "epoch": 1.438814648327755, "percentage": 28.78, "elapsed_time": "1:15:23", "remaining_time": "3:06:34", "throughput": 8777.96, "total_tokens": 39703264} +{"current_steps": 58900, "total_steps": 204665, "loss": 0.0723, "lr": 1.7927568175101652e-06, "epoch": 1.4389367991596023, "percentage": 28.78, "elapsed_time": "1:15:23", "remaining_time": "3:06:34", "throughput": 8777.96, "total_tokens": 39706208} +{"current_steps": 58905, "total_steps": 204665, "loss": 0.1089, "lr": 1.7927048346863925e-06, "epoch": 1.4390589499914495, "percentage": 28.78, "elapsed_time": "1:15:23", "remaining_time": "3:06:33", "throughput": 8777.97, "total_tokens": 39709280} +{"current_steps": 58910, "total_steps": 204665, "loss": 0.0524, "lr": 1.7926528460978573e-06, "epoch": 1.4391811008232966, "percentage": 28.78, "elapsed_time": "1:15:24", "remaining_time": "3:06:33", "throughput": 8778.07, "total_tokens": 39712800} +{"current_steps": 58915, "total_steps": 204665, "loss": 0.0369, "lr": 1.7926008517449373e-06, "epoch": 1.4393032516551438, "percentage": 28.79, "elapsed_time": "1:15:24", "remaining_time": "3:06:33", "throughput": 8778.09, "total_tokens": 39715872} +{"current_steps": 58920, "total_steps": 204665, "loss": 0.0941, "lr": 1.7925488516280113e-06, "epoch": 1.4394254024869908, "percentage": 28.79, "elapsed_time": "1:15:24", "remaining_time": "3:06:32", "throughput": 8778.1, "total_tokens": 39718944} +{"current_steps": 58925, "total_steps": 204665, "loss": 0.0318, "lr": 1.7924968457474563e-06, "epoch": 1.439547553318838, "percentage": 28.79, "elapsed_time": "1:15:25", "remaining_time": "3:06:32", "throughput": 8778.14, "total_tokens": 39722144} +{"current_steps": 58930, "total_steps": 204665, "loss": 0.0685, "lr": 1.7924448341036512e-06, "epoch": 1.4396697041506852, "percentage": 28.79, "elapsed_time": "1:15:25", "remaining_time": "3:06:31", "throughput": 8778.18, "total_tokens": 39725280} +{"current_steps": 58935, "total_steps": 204665, "loss": 0.0387, "lr": 1.792392816696974e-06, "epoch": 1.4397918549825324, "percentage": 28.8, "elapsed_time": "1:15:25", "remaining_time": "3:06:31", "throughput": 8778.38, "total_tokens": 39729440} +{"current_steps": 58940, "total_steps": 204665, "loss": 0.0736, "lr": 1.7923407935278032e-06, "epoch": 1.4399140058143796, "percentage": 28.8, "elapsed_time": "1:15:26", "remaining_time": "3:06:30", "throughput": 8778.47, "total_tokens": 39732896} +{"current_steps": 58945, "total_steps": 204665, "loss": 0.0424, "lr": 1.7922887645965173e-06, "epoch": 1.4400361566462268, "percentage": 28.8, "elapsed_time": "1:15:26", "remaining_time": "3:06:30", "throughput": 8778.49, "total_tokens": 39736032} +{"current_steps": 58950, "total_steps": 204665, "loss": 0.1568, "lr": 1.792236729903494e-06, "epoch": 1.440158307478074, "percentage": 28.8, "elapsed_time": "1:15:26", "remaining_time": "3:06:29", "throughput": 8778.49, "total_tokens": 39739040} +{"current_steps": 58955, "total_steps": 204665, "loss": 0.0775, "lr": 1.7921846894491121e-06, "epoch": 1.4402804583099211, "percentage": 28.81, "elapsed_time": "1:15:27", "remaining_time": "3:06:29", "throughput": 8778.58, "total_tokens": 39742496} +{"current_steps": 58960, "total_steps": 204665, "loss": 0.1183, "lr": 1.7921326432337505e-06, "epoch": 1.4404026091417683, "percentage": 28.81, "elapsed_time": "1:15:27", "remaining_time": "3:06:28", "throughput": 8778.59, "total_tokens": 39745568} +{"current_steps": 58965, "total_steps": 204665, "loss": 0.0617, "lr": 1.792080591257787e-06, "epoch": 1.4405247599736155, "percentage": 28.81, "elapsed_time": "1:15:27", "remaining_time": "3:06:28", "throughput": 8778.66, "total_tokens": 39748960} +{"current_steps": 58970, "total_steps": 204665, "loss": 0.0014, "lr": 1.7920285335216004e-06, "epoch": 1.4406469108054627, "percentage": 28.81, "elapsed_time": "1:15:28", "remaining_time": "3:06:27", "throughput": 8778.72, "total_tokens": 39752224} +{"current_steps": 58975, "total_steps": 204665, "loss": 0.02, "lr": 1.7919764700255693e-06, "epoch": 1.4407690616373097, "percentage": 28.82, "elapsed_time": "1:15:28", "remaining_time": "3:06:27", "throughput": 8778.78, "total_tokens": 39755616} +{"current_steps": 58980, "total_steps": 204665, "loss": 0.1716, "lr": 1.7919244007700725e-06, "epoch": 1.4408912124691569, "percentage": 28.82, "elapsed_time": "1:15:28", "remaining_time": "3:06:26", "throughput": 8778.83, "total_tokens": 39758816} +{"current_steps": 58985, "total_steps": 204665, "loss": 0.0015, "lr": 1.791872325755488e-06, "epoch": 1.441013363301004, "percentage": 28.82, "elapsed_time": "1:15:29", "remaining_time": "3:06:26", "throughput": 8778.85, "total_tokens": 39761952} +{"current_steps": 58990, "total_steps": 204665, "loss": 0.3353, "lr": 1.7918202449821954e-06, "epoch": 1.4411355141328512, "percentage": 28.82, "elapsed_time": "1:15:29", "remaining_time": "3:06:25", "throughput": 8778.87, "total_tokens": 39765024} +{"current_steps": 58995, "total_steps": 204665, "loss": 0.1574, "lr": 1.7917681584505727e-06, "epoch": 1.4412576649646984, "percentage": 28.83, "elapsed_time": "1:15:29", "remaining_time": "3:06:25", "throughput": 8778.94, "total_tokens": 39768352} +{"current_steps": 59000, "total_steps": 204665, "loss": 0.0455, "lr": 1.791716066160999e-06, "epoch": 1.4413798157965456, "percentage": 28.83, "elapsed_time": "1:15:30", "remaining_time": "3:06:24", "throughput": 8779.15, "total_tokens": 39772576} +{"current_steps": 59005, "total_steps": 204665, "loss": 0.0619, "lr": 1.7916639681138532e-06, "epoch": 1.4415019666283928, "percentage": 28.83, "elapsed_time": "1:15:30", "remaining_time": "3:06:24", "throughput": 8779.14, "total_tokens": 39775456} +{"current_steps": 59010, "total_steps": 204665, "loss": 0.0899, "lr": 1.791611864309514e-06, "epoch": 1.4416241174602398, "percentage": 28.83, "elapsed_time": "1:15:31", "remaining_time": "3:06:24", "throughput": 8779.25, "total_tokens": 39779104} +{"current_steps": 59015, "total_steps": 204665, "loss": 0.058, "lr": 1.7915597547483606e-06, "epoch": 1.441746268292087, "percentage": 28.83, "elapsed_time": "1:15:31", "remaining_time": "3:06:23", "throughput": 8779.32, "total_tokens": 39782432} +{"current_steps": 59020, "total_steps": 204665, "loss": 0.1444, "lr": 1.7915076394307717e-06, "epoch": 1.4418684191239342, "percentage": 28.84, "elapsed_time": "1:15:31", "remaining_time": "3:06:23", "throughput": 8779.37, "total_tokens": 39785696} +{"current_steps": 59025, "total_steps": 204665, "loss": 0.0745, "lr": 1.7914555183571266e-06, "epoch": 1.4419905699557813, "percentage": 28.84, "elapsed_time": "1:15:32", "remaining_time": "3:06:22", "throughput": 8779.4, "total_tokens": 39788832} +{"current_steps": 59030, "total_steps": 204665, "loss": 0.0778, "lr": 1.7914033915278036e-06, "epoch": 1.4421127207876285, "percentage": 28.84, "elapsed_time": "1:15:32", "remaining_time": "3:06:22", "throughput": 8779.44, "total_tokens": 39792032} +{"current_steps": 59035, "total_steps": 204665, "loss": 0.0806, "lr": 1.7913512589431825e-06, "epoch": 1.4422348716194757, "percentage": 28.84, "elapsed_time": "1:15:32", "remaining_time": "3:06:21", "throughput": 8779.48, "total_tokens": 39795232} +{"current_steps": 59040, "total_steps": 204665, "loss": 0.0474, "lr": 1.7912991206036421e-06, "epoch": 1.442357022451323, "percentage": 28.85, "elapsed_time": "1:15:33", "remaining_time": "3:06:21", "throughput": 8779.59, "total_tokens": 39798880} +{"current_steps": 59045, "total_steps": 204665, "loss": 0.0484, "lr": 1.791246976509562e-06, "epoch": 1.44247917328317, "percentage": 28.85, "elapsed_time": "1:15:33", "remaining_time": "3:06:20", "throughput": 8779.65, "total_tokens": 39802144} +{"current_steps": 59050, "total_steps": 204665, "loss": 0.2378, "lr": 1.7911948266613205e-06, "epoch": 1.4426013241150173, "percentage": 28.85, "elapsed_time": "1:15:33", "remaining_time": "3:06:20", "throughput": 8779.71, "total_tokens": 39805472} +{"current_steps": 59055, "total_steps": 204665, "loss": 0.1259, "lr": 1.791142671059298e-06, "epoch": 1.4427234749468645, "percentage": 28.85, "elapsed_time": "1:15:34", "remaining_time": "3:06:19", "throughput": 8779.78, "total_tokens": 39808864} +{"current_steps": 59060, "total_steps": 204665, "loss": 0.0342, "lr": 1.7910905097038728e-06, "epoch": 1.4428456257787117, "percentage": 28.86, "elapsed_time": "1:15:34", "remaining_time": "3:06:19", "throughput": 8779.87, "total_tokens": 39812320} +{"current_steps": 59065, "total_steps": 204665, "loss": 0.1464, "lr": 1.7910383425954248e-06, "epoch": 1.4429677766105586, "percentage": 28.86, "elapsed_time": "1:15:34", "remaining_time": "3:06:18", "throughput": 8779.98, "total_tokens": 39815904} +{"current_steps": 59070, "total_steps": 204665, "loss": 0.0687, "lr": 1.7909861697343333e-06, "epoch": 1.4430899274424058, "percentage": 28.86, "elapsed_time": "1:15:35", "remaining_time": "3:06:18", "throughput": 8780.0, "total_tokens": 39818976} +{"current_steps": 59075, "total_steps": 204665, "loss": 0.0014, "lr": 1.7909339911209775e-06, "epoch": 1.443212078274253, "percentage": 28.86, "elapsed_time": "1:15:35", "remaining_time": "3:06:17", "throughput": 8780.12, "total_tokens": 39822624} +{"current_steps": 59080, "total_steps": 204665, "loss": 0.1436, "lr": 1.790881806755737e-06, "epoch": 1.4433342291061002, "percentage": 28.87, "elapsed_time": "1:15:35", "remaining_time": "3:06:17", "throughput": 8780.16, "total_tokens": 39825824} +{"current_steps": 59085, "total_steps": 204665, "loss": 0.0311, "lr": 1.7908296166389914e-06, "epoch": 1.4434563799379474, "percentage": 28.87, "elapsed_time": "1:15:36", "remaining_time": "3:06:16", "throughput": 8780.28, "total_tokens": 39829472} +{"current_steps": 59090, "total_steps": 204665, "loss": 0.0496, "lr": 1.7907774207711199e-06, "epoch": 1.4435785307697946, "percentage": 28.87, "elapsed_time": "1:15:36", "remaining_time": "3:06:16", "throughput": 8780.34, "total_tokens": 39832800} +{"current_steps": 59095, "total_steps": 204665, "loss": 0.1983, "lr": 1.7907252191525023e-06, "epoch": 1.4437006816016418, "percentage": 28.87, "elapsed_time": "1:15:36", "remaining_time": "3:06:15", "throughput": 8780.5, "total_tokens": 39836640} +{"current_steps": 59100, "total_steps": 204665, "loss": 0.0758, "lr": 1.7906730117835185e-06, "epoch": 1.4438228324334887, "percentage": 28.88, "elapsed_time": "1:15:37", "remaining_time": "3:06:15", "throughput": 8780.61, "total_tokens": 39840224} +{"current_steps": 59105, "total_steps": 204665, "loss": 0.0342, "lr": 1.7906207986645477e-06, "epoch": 1.443944983265336, "percentage": 28.88, "elapsed_time": "1:15:37", "remaining_time": "3:06:15", "throughput": 8780.73, "total_tokens": 39843872} +{"current_steps": 59110, "total_steps": 204665, "loss": 0.103, "lr": 1.7905685797959697e-06, "epoch": 1.4440671340971831, "percentage": 28.88, "elapsed_time": "1:15:37", "remaining_time": "3:06:14", "throughput": 8780.75, "total_tokens": 39846944} +{"current_steps": 59115, "total_steps": 204665, "loss": 0.1281, "lr": 1.7905163551781643e-06, "epoch": 1.4441892849290303, "percentage": 28.88, "elapsed_time": "1:15:38", "remaining_time": "3:06:14", "throughput": 8781.0, "total_tokens": 39851296} +{"current_steps": 59120, "total_steps": 204665, "loss": 0.0785, "lr": 1.7904641248115117e-06, "epoch": 1.4443114357608775, "percentage": 28.89, "elapsed_time": "1:15:38", "remaining_time": "3:06:13", "throughput": 8781.1, "total_tokens": 39854880} +{"current_steps": 59125, "total_steps": 204665, "loss": 0.0942, "lr": 1.7904118886963913e-06, "epoch": 1.4444335865927247, "percentage": 28.89, "elapsed_time": "1:15:39", "remaining_time": "3:06:13", "throughput": 8781.13, "total_tokens": 39858016} +{"current_steps": 59130, "total_steps": 204665, "loss": 0.0749, "lr": 1.7903596468331829e-06, "epoch": 1.4445557374245719, "percentage": 28.89, "elapsed_time": "1:15:39", "remaining_time": "3:06:12", "throughput": 8781.2, "total_tokens": 39861408} +{"current_steps": 59135, "total_steps": 204665, "loss": 0.0521, "lr": 1.7903073992222666e-06, "epoch": 1.444677888256419, "percentage": 28.89, "elapsed_time": "1:15:39", "remaining_time": "3:06:12", "throughput": 8781.24, "total_tokens": 39864608} +{"current_steps": 59140, "total_steps": 204665, "loss": 0.2063, "lr": 1.7902551458640222e-06, "epoch": 1.4448000390882663, "percentage": 28.9, "elapsed_time": "1:15:40", "remaining_time": "3:06:11", "throughput": 8781.31, "total_tokens": 39868000} +{"current_steps": 59145, "total_steps": 204665, "loss": 0.1936, "lr": 1.79020288675883e-06, "epoch": 1.4449221899201135, "percentage": 28.9, "elapsed_time": "1:15:40", "remaining_time": "3:06:11", "throughput": 8781.4, "total_tokens": 39871520} +{"current_steps": 59150, "total_steps": 204665, "loss": 0.1798, "lr": 1.79015062190707e-06, "epoch": 1.4450443407519606, "percentage": 28.9, "elapsed_time": "1:15:40", "remaining_time": "3:06:10", "throughput": 8781.43, "total_tokens": 39874656} +{"current_steps": 59155, "total_steps": 204665, "loss": 0.0061, "lr": 1.7900983513091218e-06, "epoch": 1.4451664915838076, "percentage": 28.9, "elapsed_time": "1:15:41", "remaining_time": "3:06:10", "throughput": 8781.44, "total_tokens": 39877728} +{"current_steps": 59160, "total_steps": 204665, "loss": 0.0904, "lr": 1.790046074965366e-06, "epoch": 1.4452886424156548, "percentage": 28.91, "elapsed_time": "1:15:41", "remaining_time": "3:06:09", "throughput": 8781.5, "total_tokens": 39880992} +{"current_steps": 59165, "total_steps": 204665, "loss": 0.1306, "lr": 1.7899937928761829e-06, "epoch": 1.445410793247502, "percentage": 28.91, "elapsed_time": "1:15:41", "remaining_time": "3:06:09", "throughput": 8781.54, "total_tokens": 39884192} +{"current_steps": 59170, "total_steps": 204665, "loss": 0.161, "lr": 1.789941505041952e-06, "epoch": 1.4455329440793492, "percentage": 28.91, "elapsed_time": "1:15:42", "remaining_time": "3:06:08", "throughput": 8781.54, "total_tokens": 39887136} +{"current_steps": 59175, "total_steps": 204665, "loss": 0.0333, "lr": 1.7898892114630542e-06, "epoch": 1.4456550949111964, "percentage": 28.91, "elapsed_time": "1:15:42", "remaining_time": "3:06:08", "throughput": 8781.65, "total_tokens": 39890720} +{"current_steps": 59180, "total_steps": 204665, "loss": 0.1145, "lr": 1.78983691213987e-06, "epoch": 1.4457772457430436, "percentage": 28.92, "elapsed_time": "1:15:42", "remaining_time": "3:06:07", "throughput": 8781.73, "total_tokens": 39894176} +{"current_steps": 59185, "total_steps": 204665, "loss": 0.0209, "lr": 1.789784607072779e-06, "epoch": 1.4458993965748907, "percentage": 28.92, "elapsed_time": "1:15:43", "remaining_time": "3:06:07", "throughput": 8781.77, "total_tokens": 39897376} +{"current_steps": 59190, "total_steps": 204665, "loss": 0.044, "lr": 1.7897322962621616e-06, "epoch": 1.4460215474067377, "percentage": 28.92, "elapsed_time": "1:15:43", "remaining_time": "3:06:06", "throughput": 8781.76, "total_tokens": 39900320} +{"current_steps": 59195, "total_steps": 204665, "loss": 0.0499, "lr": 1.789679979708399e-06, "epoch": 1.446143698238585, "percentage": 28.92, "elapsed_time": "1:15:43", "remaining_time": "3:06:06", "throughput": 8781.8, "total_tokens": 39903520} +{"current_steps": 59200, "total_steps": 204665, "loss": 0.2074, "lr": 1.7896276574118709e-06, "epoch": 1.446265849070432, "percentage": 28.93, "elapsed_time": "1:15:44", "remaining_time": "3:06:06", "throughput": 8781.99, "total_tokens": 39907552} +{"current_steps": 59205, "total_steps": 204665, "loss": 0.104, "lr": 1.7895753293729583e-06, "epoch": 1.4463879999022793, "percentage": 28.93, "elapsed_time": "1:15:44", "remaining_time": "3:06:05", "throughput": 8782.14, "total_tokens": 39911392} +{"current_steps": 59210, "total_steps": 204665, "loss": 0.1042, "lr": 1.7895229955920414e-06, "epoch": 1.4465101507341265, "percentage": 28.93, "elapsed_time": "1:15:44", "remaining_time": "3:06:05", "throughput": 8782.24, "total_tokens": 39914912} +{"current_steps": 59215, "total_steps": 204665, "loss": 0.0912, "lr": 1.789470656069501e-06, "epoch": 1.4466323015659737, "percentage": 28.93, "elapsed_time": "1:15:45", "remaining_time": "3:06:04", "throughput": 8782.29, "total_tokens": 39918176} +{"current_steps": 59220, "total_steps": 204665, "loss": 0.0032, "lr": 1.7894183108057175e-06, "epoch": 1.4467544523978209, "percentage": 28.94, "elapsed_time": "1:15:45", "remaining_time": "3:06:04", "throughput": 8782.36, "total_tokens": 39921504} +{"current_steps": 59225, "total_steps": 204665, "loss": 0.0899, "lr": 1.789365959801072e-06, "epoch": 1.446876603229668, "percentage": 28.94, "elapsed_time": "1:15:45", "remaining_time": "3:06:03", "throughput": 8782.37, "total_tokens": 39924576} +{"current_steps": 59230, "total_steps": 204665, "loss": 0.0011, "lr": 1.7893136030559445e-06, "epoch": 1.4469987540615152, "percentage": 28.94, "elapsed_time": "1:15:46", "remaining_time": "3:06:03", "throughput": 8782.44, "total_tokens": 39927968} +{"current_steps": 59235, "total_steps": 204665, "loss": 0.1049, "lr": 1.7892612405707168e-06, "epoch": 1.4471209048933624, "percentage": 28.94, "elapsed_time": "1:15:46", "remaining_time": "3:06:02", "throughput": 8782.52, "total_tokens": 39931424} +{"current_steps": 59240, "total_steps": 204665, "loss": 0.0006, "lr": 1.7892088723457685e-06, "epoch": 1.4472430557252096, "percentage": 28.94, "elapsed_time": "1:15:47", "remaining_time": "3:06:02", "throughput": 8782.61, "total_tokens": 39934944} +{"current_steps": 59245, "total_steps": 204665, "loss": 0.0215, "lr": 1.7891564983814813e-06, "epoch": 1.4473652065570566, "percentage": 28.95, "elapsed_time": "1:15:47", "remaining_time": "3:06:01", "throughput": 8782.67, "total_tokens": 39938272} +{"current_steps": 59250, "total_steps": 204665, "loss": 0.1159, "lr": 1.7891041186782356e-06, "epoch": 1.4474873573889038, "percentage": 28.95, "elapsed_time": "1:15:47", "remaining_time": "3:06:01", "throughput": 8782.68, "total_tokens": 39941280} +{"current_steps": 59255, "total_steps": 204665, "loss": 0.0907, "lr": 1.7890517332364125e-06, "epoch": 1.447609508220751, "percentage": 28.95, "elapsed_time": "1:15:48", "remaining_time": "3:06:00", "throughput": 8782.76, "total_tokens": 39944672} +{"current_steps": 59260, "total_steps": 204665, "loss": 0.0762, "lr": 1.7889993420563934e-06, "epoch": 1.4477316590525982, "percentage": 28.95, "elapsed_time": "1:15:48", "remaining_time": "3:06:00", "throughput": 8782.84, "total_tokens": 39948064} +{"current_steps": 59265, "total_steps": 204665, "loss": 0.0961, "lr": 1.7889469451385586e-06, "epoch": 1.4478538098844453, "percentage": 28.96, "elapsed_time": "1:15:48", "remaining_time": "3:05:59", "throughput": 8782.83, "total_tokens": 39951008} +{"current_steps": 59270, "total_steps": 204665, "loss": 0.1437, "lr": 1.7888945424832893e-06, "epoch": 1.4479759607162925, "percentage": 28.96, "elapsed_time": "1:15:49", "remaining_time": "3:05:59", "throughput": 8783.0, "total_tokens": 39954912} +{"current_steps": 59275, "total_steps": 204665, "loss": 0.1528, "lr": 1.7888421340909666e-06, "epoch": 1.4480981115481397, "percentage": 28.96, "elapsed_time": "1:15:49", "remaining_time": "3:05:58", "throughput": 8783.02, "total_tokens": 39957984} +{"current_steps": 59280, "total_steps": 204665, "loss": 0.1918, "lr": 1.788789719961972e-06, "epoch": 1.4482202623799867, "percentage": 28.96, "elapsed_time": "1:15:49", "remaining_time": "3:05:58", "throughput": 8783.01, "total_tokens": 39960928} +{"current_steps": 59285, "total_steps": 204665, "loss": 0.0038, "lr": 1.7887373000966864e-06, "epoch": 1.4483424132118339, "percentage": 28.97, "elapsed_time": "1:15:50", "remaining_time": "3:05:57", "throughput": 8783.1, "total_tokens": 39964448} +{"current_steps": 59290, "total_steps": 204665, "loss": 0.0225, "lr": 1.7886848744954909e-06, "epoch": 1.448464564043681, "percentage": 28.97, "elapsed_time": "1:15:50", "remaining_time": "3:05:57", "throughput": 8783.17, "total_tokens": 39967840} +{"current_steps": 59295, "total_steps": 204665, "loss": 0.1449, "lr": 1.7886324431587668e-06, "epoch": 1.4485867148755283, "percentage": 28.97, "elapsed_time": "1:15:50", "remaining_time": "3:05:57", "throughput": 8783.18, "total_tokens": 39970848} +{"current_steps": 59300, "total_steps": 204665, "loss": 0.1448, "lr": 1.7885800060868954e-06, "epoch": 1.4487088657073754, "percentage": 28.97, "elapsed_time": "1:15:51", "remaining_time": "3:05:56", "throughput": 8783.22, "total_tokens": 39974048} +{"current_steps": 59305, "total_steps": 204665, "loss": 0.0019, "lr": 1.788527563280258e-06, "epoch": 1.4488310165392226, "percentage": 28.98, "elapsed_time": "1:15:51", "remaining_time": "3:05:56", "throughput": 8783.23, "total_tokens": 39977056} +{"current_steps": 59310, "total_steps": 204665, "loss": 0.0009, "lr": 1.7884751147392364e-06, "epoch": 1.4489531673710698, "percentage": 28.98, "elapsed_time": "1:15:51", "remaining_time": "3:05:55", "throughput": 8783.3, "total_tokens": 39980384} +{"current_steps": 59315, "total_steps": 204665, "loss": 0.0523, "lr": 1.7884226604642117e-06, "epoch": 1.449075318202917, "percentage": 28.98, "elapsed_time": "1:15:52", "remaining_time": "3:05:55", "throughput": 8783.36, "total_tokens": 39983712} +{"current_steps": 59320, "total_steps": 204665, "loss": 0.1469, "lr": 1.7883702004555652e-06, "epoch": 1.4491974690347642, "percentage": 28.98, "elapsed_time": "1:15:52", "remaining_time": "3:05:54", "throughput": 8783.41, "total_tokens": 39986976} +{"current_steps": 59325, "total_steps": 204665, "loss": 0.0092, "lr": 1.7883177347136785e-06, "epoch": 1.4493196198666114, "percentage": 28.99, "elapsed_time": "1:15:52", "remaining_time": "3:05:54", "throughput": 8783.46, "total_tokens": 39990240} +{"current_steps": 59330, "total_steps": 204665, "loss": 0.0009, "lr": 1.7882652632389332e-06, "epoch": 1.4494417706984586, "percentage": 28.99, "elapsed_time": "1:15:53", "remaining_time": "3:05:53", "throughput": 8783.5, "total_tokens": 39993440} +{"current_steps": 59335, "total_steps": 204665, "loss": 0.1739, "lr": 1.788212786031711e-06, "epoch": 1.4495639215303056, "percentage": 28.99, "elapsed_time": "1:15:53", "remaining_time": "3:05:53", "throughput": 8783.56, "total_tokens": 39996768} +{"current_steps": 59340, "total_steps": 204665, "loss": 0.1538, "lr": 1.7881603030923935e-06, "epoch": 1.4496860723621527, "percentage": 28.99, "elapsed_time": "1:15:53", "remaining_time": "3:05:52", "throughput": 8783.71, "total_tokens": 40000608} +{"current_steps": 59345, "total_steps": 204665, "loss": 0.0392, "lr": 1.788107814421362e-06, "epoch": 1.449808223194, "percentage": 29.0, "elapsed_time": "1:15:54", "remaining_time": "3:05:52", "throughput": 8783.76, "total_tokens": 40003872} +{"current_steps": 59350, "total_steps": 204665, "loss": 0.085, "lr": 1.7880553200189987e-06, "epoch": 1.4499303740258471, "percentage": 29.0, "elapsed_time": "1:15:54", "remaining_time": "3:05:51", "throughput": 8783.85, "total_tokens": 40007328} +{"current_steps": 59355, "total_steps": 204665, "loss": 0.0583, "lr": 1.7880028198856852e-06, "epoch": 1.4500525248576943, "percentage": 29.0, "elapsed_time": "1:15:55", "remaining_time": "3:05:51", "throughput": 8784.0, "total_tokens": 40011168} +{"current_steps": 59360, "total_steps": 204665, "loss": 0.0058, "lr": 1.787950314021803e-06, "epoch": 1.4501746756895415, "percentage": 29.0, "elapsed_time": "1:15:55", "remaining_time": "3:05:50", "throughput": 8784.04, "total_tokens": 40014432} +{"current_steps": 59365, "total_steps": 204665, "loss": 0.0448, "lr": 1.7878978024277344e-06, "epoch": 1.4502968265213887, "percentage": 29.01, "elapsed_time": "1:15:55", "remaining_time": "3:05:50", "throughput": 8784.13, "total_tokens": 40017888} +{"current_steps": 59370, "total_steps": 204665, "loss": 0.0014, "lr": 1.7878452851038612e-06, "epoch": 1.4504189773532357, "percentage": 29.01, "elapsed_time": "1:15:56", "remaining_time": "3:05:49", "throughput": 8784.24, "total_tokens": 40021472} +{"current_steps": 59375, "total_steps": 204665, "loss": 0.0793, "lr": 1.7877927620505648e-06, "epoch": 1.4505411281850829, "percentage": 29.01, "elapsed_time": "1:15:56", "remaining_time": "3:05:49", "throughput": 8784.24, "total_tokens": 40024480} +{"current_steps": 59380, "total_steps": 204665, "loss": 0.0604, "lr": 1.7877402332682278e-06, "epoch": 1.45066327901693, "percentage": 29.01, "elapsed_time": "1:15:56", "remaining_time": "3:05:49", "throughput": 8784.39, "total_tokens": 40028320} +{"current_steps": 59385, "total_steps": 204665, "loss": 0.0002, "lr": 1.787687698757232e-06, "epoch": 1.4507854298487772, "percentage": 29.02, "elapsed_time": "1:15:57", "remaining_time": "3:05:48", "throughput": 8784.45, "total_tokens": 40031584} +{"current_steps": 59390, "total_steps": 204665, "loss": 0.0494, "lr": 1.7876351585179593e-06, "epoch": 1.4509075806806244, "percentage": 29.02, "elapsed_time": "1:15:57", "remaining_time": "3:05:48", "throughput": 8784.52, "total_tokens": 40034976} +{"current_steps": 59395, "total_steps": 204665, "loss": 0.1674, "lr": 1.7875826125507917e-06, "epoch": 1.4510297315124716, "percentage": 29.02, "elapsed_time": "1:15:57", "remaining_time": "3:05:47", "throughput": 8784.56, "total_tokens": 40038176} +{"current_steps": 59400, "total_steps": 204665, "loss": 0.1429, "lr": 1.787530060856112e-06, "epoch": 1.4511518823443188, "percentage": 29.02, "elapsed_time": "1:15:58", "remaining_time": "3:05:47", "throughput": 8784.59, "total_tokens": 40041312} +{"current_steps": 59405, "total_steps": 204665, "loss": 0.005, "lr": 1.7874775034343012e-06, "epoch": 1.451274033176166, "percentage": 29.03, "elapsed_time": "1:15:58", "remaining_time": "3:05:46", "throughput": 8784.64, "total_tokens": 40044576} +{"current_steps": 59410, "total_steps": 204665, "loss": 0.0218, "lr": 1.7874249402857426e-06, "epoch": 1.4513961840080132, "percentage": 29.03, "elapsed_time": "1:15:58", "remaining_time": "3:05:46", "throughput": 8784.65, "total_tokens": 40047584} +{"current_steps": 59415, "total_steps": 204665, "loss": 0.016, "lr": 1.787372371410818e-06, "epoch": 1.4515183348398604, "percentage": 29.03, "elapsed_time": "1:15:59", "remaining_time": "3:05:45", "throughput": 8784.76, "total_tokens": 40051168} +{"current_steps": 59420, "total_steps": 204665, "loss": 0.056, "lr": 1.7873197968099097e-06, "epoch": 1.4516404856717076, "percentage": 29.03, "elapsed_time": "1:15:59", "remaining_time": "3:05:45", "throughput": 8784.88, "total_tokens": 40054816} +{"current_steps": 59425, "total_steps": 204665, "loss": 0.0509, "lr": 1.7872672164834e-06, "epoch": 1.4517626365035545, "percentage": 29.04, "elapsed_time": "1:15:59", "remaining_time": "3:05:44", "throughput": 8784.92, "total_tokens": 40058016} +{"current_steps": 59430, "total_steps": 204665, "loss": 0.3248, "lr": 1.7872146304316714e-06, "epoch": 1.4518847873354017, "percentage": 29.04, "elapsed_time": "1:16:00", "remaining_time": "3:05:44", "throughput": 8785.06, "total_tokens": 40061792} +{"current_steps": 59435, "total_steps": 204665, "loss": 0.1667, "lr": 1.7871620386551065e-06, "epoch": 1.452006938167249, "percentage": 29.04, "elapsed_time": "1:16:00", "remaining_time": "3:05:43", "throughput": 8785.08, "total_tokens": 40064928} +{"current_steps": 59440, "total_steps": 204665, "loss": 0.0771, "lr": 1.7871094411540872e-06, "epoch": 1.452129088999096, "percentage": 29.04, "elapsed_time": "1:16:00", "remaining_time": "3:05:43", "throughput": 8785.17, "total_tokens": 40068384} +{"current_steps": 59445, "total_steps": 204665, "loss": 0.0294, "lr": 1.7870568379289965e-06, "epoch": 1.4522512398309433, "percentage": 29.05, "elapsed_time": "1:16:01", "remaining_time": "3:05:42", "throughput": 8785.22, "total_tokens": 40071648} +{"current_steps": 59450, "total_steps": 204665, "loss": 0.0534, "lr": 1.787004228980217e-06, "epoch": 1.4523733906627905, "percentage": 29.05, "elapsed_time": "1:16:01", "remaining_time": "3:05:42", "throughput": 8785.28, "total_tokens": 40074976} +{"current_steps": 59455, "total_steps": 204665, "loss": 0.1416, "lr": 1.7869516143081307e-06, "epoch": 1.4524955414946374, "percentage": 29.05, "elapsed_time": "1:16:01", "remaining_time": "3:05:41", "throughput": 8785.41, "total_tokens": 40078688} +{"current_steps": 59460, "total_steps": 204665, "loss": 0.0022, "lr": 1.7868989939131204e-06, "epoch": 1.4526176923264846, "percentage": 29.05, "elapsed_time": "1:16:02", "remaining_time": "3:05:41", "throughput": 8785.45, "total_tokens": 40081888} +{"current_steps": 59465, "total_steps": 204665, "loss": 0.0479, "lr": 1.7868463677955697e-06, "epoch": 1.4527398431583318, "percentage": 29.05, "elapsed_time": "1:16:02", "remaining_time": "3:05:40", "throughput": 8785.49, "total_tokens": 40085088} +{"current_steps": 59470, "total_steps": 204665, "loss": 0.2927, "lr": 1.78679373595586e-06, "epoch": 1.452861993990179, "percentage": 29.06, "elapsed_time": "1:16:02", "remaining_time": "3:05:40", "throughput": 8785.53, "total_tokens": 40088352} +{"current_steps": 59475, "total_steps": 204665, "loss": 0.1888, "lr": 1.786741098394375e-06, "epoch": 1.4529841448220262, "percentage": 29.06, "elapsed_time": "1:16:03", "remaining_time": "3:05:40", "throughput": 8785.6, "total_tokens": 40091744} +{"current_steps": 59480, "total_steps": 204665, "loss": 0.1454, "lr": 1.7866884551114968e-06, "epoch": 1.4531062956538734, "percentage": 29.06, "elapsed_time": "1:16:03", "remaining_time": "3:05:39", "throughput": 8785.67, "total_tokens": 40095072} +{"current_steps": 59485, "total_steps": 204665, "loss": 0.0011, "lr": 1.7866358061076086e-06, "epoch": 1.4532284464857206, "percentage": 29.06, "elapsed_time": "1:16:04", "remaining_time": "3:05:39", "throughput": 8785.74, "total_tokens": 40098464} +{"current_steps": 59490, "total_steps": 204665, "loss": 0.1509, "lr": 1.7865831513830933e-06, "epoch": 1.4533505973175678, "percentage": 29.07, "elapsed_time": "1:16:04", "remaining_time": "3:05:38", "throughput": 8785.79, "total_tokens": 40101728} +{"current_steps": 59495, "total_steps": 204665, "loss": 0.1103, "lr": 1.7865304909383338e-06, "epoch": 1.453472748149415, "percentage": 29.07, "elapsed_time": "1:16:04", "remaining_time": "3:05:38", "throughput": 8785.79, "total_tokens": 40104736} +{"current_steps": 59500, "total_steps": 204665, "loss": 0.0494, "lr": 1.786477824773713e-06, "epoch": 1.4535948989812622, "percentage": 29.07, "elapsed_time": "1:16:05", "remaining_time": "3:05:37", "throughput": 8785.94, "total_tokens": 40108576} +{"current_steps": 59505, "total_steps": 204665, "loss": 0.0026, "lr": 1.7864251528896139e-06, "epoch": 1.4537170498131093, "percentage": 29.07, "elapsed_time": "1:16:05", "remaining_time": "3:05:37", "throughput": 8785.94, "total_tokens": 40111584} +{"current_steps": 59510, "total_steps": 204665, "loss": 0.0802, "lr": 1.7863724752864195e-06, "epoch": 1.4538392006449565, "percentage": 29.08, "elapsed_time": "1:16:05", "remaining_time": "3:05:36", "throughput": 8786.06, "total_tokens": 40115232} +{"current_steps": 59515, "total_steps": 204665, "loss": 0.0333, "lr": 1.7863197919645133e-06, "epoch": 1.4539613514768035, "percentage": 29.08, "elapsed_time": "1:16:06", "remaining_time": "3:05:36", "throughput": 8786.1, "total_tokens": 40118432} +{"current_steps": 59520, "total_steps": 204665, "loss": 0.0861, "lr": 1.7862671029242775e-06, "epoch": 1.4540835023086507, "percentage": 29.08, "elapsed_time": "1:16:06", "remaining_time": "3:05:35", "throughput": 8786.17, "total_tokens": 40121760} +{"current_steps": 59525, "total_steps": 204665, "loss": 0.0021, "lr": 1.7862144081660963e-06, "epoch": 1.4542056531404979, "percentage": 29.08, "elapsed_time": "1:16:06", "remaining_time": "3:05:35", "throughput": 8786.31, "total_tokens": 40125600} +{"current_steps": 59530, "total_steps": 204665, "loss": 0.0012, "lr": 1.7861617076903524e-06, "epoch": 1.454327803972345, "percentage": 29.09, "elapsed_time": "1:16:07", "remaining_time": "3:05:34", "throughput": 8786.34, "total_tokens": 40128736} +{"current_steps": 59535, "total_steps": 204665, "loss": 0.001, "lr": 1.7861090014974289e-06, "epoch": 1.4544499548041923, "percentage": 29.09, "elapsed_time": "1:16:07", "remaining_time": "3:05:34", "throughput": 8786.44, "total_tokens": 40132320} +{"current_steps": 59540, "total_steps": 204665, "loss": 0.0529, "lr": 1.7860562895877097e-06, "epoch": 1.4545721056360394, "percentage": 29.09, "elapsed_time": "1:16:07", "remaining_time": "3:05:33", "throughput": 8786.54, "total_tokens": 40135840} +{"current_steps": 59545, "total_steps": 204665, "loss": 0.0624, "lr": 1.786003571961577e-06, "epoch": 1.4546942564678864, "percentage": 29.09, "elapsed_time": "1:16:08", "remaining_time": "3:05:33", "throughput": 8786.61, "total_tokens": 40139232} +{"current_steps": 59550, "total_steps": 204665, "loss": 0.0007, "lr": 1.7859508486194156e-06, "epoch": 1.4548164072997336, "percentage": 29.1, "elapsed_time": "1:16:08", "remaining_time": "3:05:32", "throughput": 8786.59, "total_tokens": 40142048} +{"current_steps": 59555, "total_steps": 204665, "loss": 0.049, "lr": 1.785898119561608e-06, "epoch": 1.4549385581315808, "percentage": 29.1, "elapsed_time": "1:16:08", "remaining_time": "3:05:32", "throughput": 8786.66, "total_tokens": 40145440} +{"current_steps": 59560, "total_steps": 204665, "loss": 0.0465, "lr": 1.785845384788538e-06, "epoch": 1.455060708963428, "percentage": 29.1, "elapsed_time": "1:16:09", "remaining_time": "3:05:31", "throughput": 8786.69, "total_tokens": 40148576} +{"current_steps": 59565, "total_steps": 204665, "loss": 0.08, "lr": 1.7857926443005888e-06, "epoch": 1.4551828597952752, "percentage": 29.1, "elapsed_time": "1:16:09", "remaining_time": "3:05:31", "throughput": 8786.72, "total_tokens": 40151712} +{"current_steps": 59570, "total_steps": 204665, "loss": 0.0449, "lr": 1.7857398980981442e-06, "epoch": 1.4553050106271224, "percentage": 29.11, "elapsed_time": "1:16:09", "remaining_time": "3:05:31", "throughput": 8786.73, "total_tokens": 40154784} +{"current_steps": 59575, "total_steps": 204665, "loss": 0.0793, "lr": 1.7856871461815878e-06, "epoch": 1.4554271614589696, "percentage": 29.11, "elapsed_time": "1:16:10", "remaining_time": "3:05:30", "throughput": 8786.75, "total_tokens": 40157856} +{"current_steps": 59580, "total_steps": 204665, "loss": 0.2216, "lr": 1.785634388551303e-06, "epoch": 1.4555493122908167, "percentage": 29.11, "elapsed_time": "1:16:10", "remaining_time": "3:05:30", "throughput": 8786.81, "total_tokens": 40161184} +{"current_steps": 59585, "total_steps": 204665, "loss": 0.0674, "lr": 1.7855816252076739e-06, "epoch": 1.455671463122664, "percentage": 29.11, "elapsed_time": "1:16:10", "remaining_time": "3:05:29", "throughput": 8786.84, "total_tokens": 40164384} +{"current_steps": 59590, "total_steps": 204665, "loss": 0.0298, "lr": 1.7855288561510837e-06, "epoch": 1.4557936139545111, "percentage": 29.12, "elapsed_time": "1:16:11", "remaining_time": "3:05:29", "throughput": 8786.89, "total_tokens": 40167648} +{"current_steps": 59595, "total_steps": 204665, "loss": 0.0039, "lr": 1.7854760813819166e-06, "epoch": 1.4559157647863583, "percentage": 29.12, "elapsed_time": "1:16:11", "remaining_time": "3:05:28", "throughput": 8786.94, "total_tokens": 40170912} +{"current_steps": 59600, "total_steps": 204665, "loss": 0.092, "lr": 1.785423300900556e-06, "epoch": 1.4560379156182053, "percentage": 29.12, "elapsed_time": "1:16:12", "remaining_time": "3:05:28", "throughput": 8787.03, "total_tokens": 40174368} +{"current_steps": 59605, "total_steps": 204665, "loss": 0.147, "lr": 1.7853705147073859e-06, "epoch": 1.4561600664500525, "percentage": 29.12, "elapsed_time": "1:16:12", "remaining_time": "3:05:27", "throughput": 8787.11, "total_tokens": 40177824} +{"current_steps": 59610, "total_steps": 204665, "loss": 0.1516, "lr": 1.78531772280279e-06, "epoch": 1.4562822172818997, "percentage": 29.13, "elapsed_time": "1:16:12", "remaining_time": "3:05:27", "throughput": 8787.13, "total_tokens": 40180896} +{"current_steps": 59615, "total_steps": 204665, "loss": 0.0996, "lr": 1.7852649251871528e-06, "epoch": 1.4564043681137468, "percentage": 29.13, "elapsed_time": "1:16:13", "remaining_time": "3:05:26", "throughput": 8787.15, "total_tokens": 40183968} +{"current_steps": 59620, "total_steps": 204665, "loss": 0.0661, "lr": 1.7852121218608573e-06, "epoch": 1.456526518945594, "percentage": 29.13, "elapsed_time": "1:16:13", "remaining_time": "3:05:26", "throughput": 8787.19, "total_tokens": 40187168} +{"current_steps": 59625, "total_steps": 204665, "loss": 0.1033, "lr": 1.7851593128242885e-06, "epoch": 1.4566486697774412, "percentage": 29.13, "elapsed_time": "1:16:13", "remaining_time": "3:05:25", "throughput": 8787.26, "total_tokens": 40190560} +{"current_steps": 59630, "total_steps": 204665, "loss": 0.1114, "lr": 1.78510649807783e-06, "epoch": 1.4567708206092884, "percentage": 29.14, "elapsed_time": "1:16:14", "remaining_time": "3:05:25", "throughput": 8787.34, "total_tokens": 40193952} +{"current_steps": 59635, "total_steps": 204665, "loss": 0.1104, "lr": 1.7850536776218656e-06, "epoch": 1.4568929714411354, "percentage": 29.14, "elapsed_time": "1:16:14", "remaining_time": "3:05:24", "throughput": 8787.33, "total_tokens": 40196896} +{"current_steps": 59640, "total_steps": 204665, "loss": 0.0943, "lr": 1.7850008514567797e-06, "epoch": 1.4570151222729826, "percentage": 29.14, "elapsed_time": "1:16:14", "remaining_time": "3:05:24", "throughput": 8787.29, "total_tokens": 40199648} +{"current_steps": 59645, "total_steps": 204665, "loss": 0.0485, "lr": 1.784948019582957e-06, "epoch": 1.4571372731048298, "percentage": 29.14, "elapsed_time": "1:16:15", "remaining_time": "3:05:23", "throughput": 8787.3, "total_tokens": 40202656} +{"current_steps": 59650, "total_steps": 204665, "loss": 0.001, "lr": 1.7848951820007807e-06, "epoch": 1.457259423936677, "percentage": 29.15, "elapsed_time": "1:16:15", "remaining_time": "3:05:23", "throughput": 8787.33, "total_tokens": 40205792} +{"current_steps": 59655, "total_steps": 204665, "loss": 0.0638, "lr": 1.7848423387106355e-06, "epoch": 1.4573815747685241, "percentage": 29.15, "elapsed_time": "1:16:15", "remaining_time": "3:05:22", "throughput": 8787.34, "total_tokens": 40208800} +{"current_steps": 59660, "total_steps": 204665, "loss": 0.0392, "lr": 1.7847894897129058e-06, "epoch": 1.4575037256003713, "percentage": 29.15, "elapsed_time": "1:16:16", "remaining_time": "3:05:22", "throughput": 8787.5, "total_tokens": 40212640} +{"current_steps": 59665, "total_steps": 204665, "loss": 0.0841, "lr": 1.784736635007976e-06, "epoch": 1.4576258764322185, "percentage": 29.15, "elapsed_time": "1:16:16", "remaining_time": "3:05:21", "throughput": 8787.58, "total_tokens": 40216032} +{"current_steps": 59670, "total_steps": 204665, "loss": 0.1628, "lr": 1.7846837745962301e-06, "epoch": 1.4577480272640657, "percentage": 29.15, "elapsed_time": "1:16:16", "remaining_time": "3:05:21", "throughput": 8787.67, "total_tokens": 40219552} +{"current_steps": 59675, "total_steps": 204665, "loss": 0.0269, "lr": 1.784630908478053e-06, "epoch": 1.457870178095913, "percentage": 29.16, "elapsed_time": "1:16:17", "remaining_time": "3:05:20", "throughput": 8787.71, "total_tokens": 40222752} +{"current_steps": 59680, "total_steps": 204665, "loss": 0.0403, "lr": 1.7845780366538285e-06, "epoch": 1.45799232892776, "percentage": 29.16, "elapsed_time": "1:16:17", "remaining_time": "3:05:20", "throughput": 8787.77, "total_tokens": 40226080} +{"current_steps": 59685, "total_steps": 204665, "loss": 0.2522, "lr": 1.7845251591239418e-06, "epoch": 1.4581144797596073, "percentage": 29.16, "elapsed_time": "1:16:17", "remaining_time": "3:05:19", "throughput": 8787.83, "total_tokens": 40229408} +{"current_steps": 59690, "total_steps": 204665, "loss": 0.0425, "lr": 1.7844722758887772e-06, "epoch": 1.4582366305914543, "percentage": 29.16, "elapsed_time": "1:16:18", "remaining_time": "3:05:19", "throughput": 8787.93, "total_tokens": 40232928} +{"current_steps": 59695, "total_steps": 204665, "loss": 0.0396, "lr": 1.7844193869487189e-06, "epoch": 1.4583587814233014, "percentage": 29.17, "elapsed_time": "1:16:18", "remaining_time": "3:05:19", "throughput": 8788.02, "total_tokens": 40236384} +{"current_steps": 59700, "total_steps": 204665, "loss": 0.0483, "lr": 1.7843664923041522e-06, "epoch": 1.4584809322551486, "percentage": 29.17, "elapsed_time": "1:16:18", "remaining_time": "3:05:18", "throughput": 8788.07, "total_tokens": 40239648} +{"current_steps": 59705, "total_steps": 204665, "loss": 0.1454, "lr": 1.784313591955461e-06, "epoch": 1.4586030830869958, "percentage": 29.17, "elapsed_time": "1:16:19", "remaining_time": "3:05:18", "throughput": 8788.09, "total_tokens": 40242784} +{"current_steps": 59710, "total_steps": 204665, "loss": 0.0302, "lr": 1.784260685903031e-06, "epoch": 1.458725233918843, "percentage": 29.17, "elapsed_time": "1:16:19", "remaining_time": "3:05:17", "throughput": 8788.18, "total_tokens": 40246304} +{"current_steps": 59715, "total_steps": 204665, "loss": 0.1307, "lr": 1.7842077741472457e-06, "epoch": 1.4588473847506902, "percentage": 29.18, "elapsed_time": "1:16:19", "remaining_time": "3:05:17", "throughput": 8788.26, "total_tokens": 40249760} +{"current_steps": 59720, "total_steps": 204665, "loss": 0.0339, "lr": 1.7841548566884908e-06, "epoch": 1.4589695355825374, "percentage": 29.18, "elapsed_time": "1:16:20", "remaining_time": "3:05:16", "throughput": 8788.28, "total_tokens": 40252832} +{"current_steps": 59725, "total_steps": 204665, "loss": 0.1555, "lr": 1.784101933527151e-06, "epoch": 1.4590916864143844, "percentage": 29.18, "elapsed_time": "1:16:20", "remaining_time": "3:05:16", "throughput": 8788.45, "total_tokens": 40256736} +{"current_steps": 59730, "total_steps": 204665, "loss": 0.1282, "lr": 1.7840490046636108e-06, "epoch": 1.4592138372462315, "percentage": 29.18, "elapsed_time": "1:16:20", "remaining_time": "3:05:15", "throughput": 8788.49, "total_tokens": 40259936} +{"current_steps": 59735, "total_steps": 204665, "loss": 0.2229, "lr": 1.7839960700982555e-06, "epoch": 1.4593359880780787, "percentage": 29.19, "elapsed_time": "1:16:21", "remaining_time": "3:05:15", "throughput": 8788.55, "total_tokens": 40263264} +{"current_steps": 59740, "total_steps": 204665, "loss": 0.1385, "lr": 1.7839431298314698e-06, "epoch": 1.459458138909926, "percentage": 29.19, "elapsed_time": "1:16:21", "remaining_time": "3:05:14", "throughput": 8788.54, "total_tokens": 40266208} +{"current_steps": 59745, "total_steps": 204665, "loss": 0.0454, "lr": 1.7838901838636389e-06, "epoch": 1.4595802897417731, "percentage": 29.19, "elapsed_time": "1:16:22", "remaining_time": "3:05:14", "throughput": 8788.54, "total_tokens": 40269152} +{"current_steps": 59750, "total_steps": 204665, "loss": 0.0643, "lr": 1.7838372321951478e-06, "epoch": 1.4597024405736203, "percentage": 29.19, "elapsed_time": "1:16:22", "remaining_time": "3:05:13", "throughput": 8788.59, "total_tokens": 40272416} +{"current_steps": 59755, "total_steps": 204665, "loss": 0.1077, "lr": 1.7837842748263813e-06, "epoch": 1.4598245914054675, "percentage": 29.2, "elapsed_time": "1:16:22", "remaining_time": "3:05:13", "throughput": 8788.62, "total_tokens": 40275488} +{"current_steps": 59760, "total_steps": 204665, "loss": 0.0021, "lr": 1.7837313117577251e-06, "epoch": 1.4599467422373147, "percentage": 29.2, "elapsed_time": "1:16:23", "remaining_time": "3:05:12", "throughput": 8788.73, "total_tokens": 40279136} +{"current_steps": 59765, "total_steps": 204665, "loss": 0.0646, "lr": 1.7836783429895636e-06, "epoch": 1.4600688930691619, "percentage": 29.2, "elapsed_time": "1:16:23", "remaining_time": "3:05:12", "throughput": 8788.82, "total_tokens": 40282592} +{"current_steps": 59770, "total_steps": 204665, "loss": 0.0802, "lr": 1.7836253685222827e-06, "epoch": 1.460191043901009, "percentage": 29.2, "elapsed_time": "1:16:23", "remaining_time": "3:05:11", "throughput": 8788.89, "total_tokens": 40285984} +{"current_steps": 59775, "total_steps": 204665, "loss": 0.0581, "lr": 1.7835723883562673e-06, "epoch": 1.4603131947328563, "percentage": 29.21, "elapsed_time": "1:16:24", "remaining_time": "3:05:11", "throughput": 8788.99, "total_tokens": 40289504} +{"current_steps": 59780, "total_steps": 204665, "loss": 0.0379, "lr": 1.7835194024919026e-06, "epoch": 1.4604353455647032, "percentage": 29.21, "elapsed_time": "1:16:24", "remaining_time": "3:05:11", "throughput": 8789.1, "total_tokens": 40293152} +{"current_steps": 59785, "total_steps": 204665, "loss": 0.0462, "lr": 1.783466410929574e-06, "epoch": 1.4605574963965504, "percentage": 29.21, "elapsed_time": "1:16:24", "remaining_time": "3:05:10", "throughput": 8789.24, "total_tokens": 40296928} +{"current_steps": 59790, "total_steps": 204665, "loss": 0.0274, "lr": 1.7834134136696672e-06, "epoch": 1.4606796472283976, "percentage": 29.21, "elapsed_time": "1:16:25", "remaining_time": "3:05:10", "throughput": 8789.35, "total_tokens": 40300512} +{"current_steps": 59795, "total_steps": 204665, "loss": 0.0352, "lr": 1.783360410712567e-06, "epoch": 1.4608017980602448, "percentage": 29.22, "elapsed_time": "1:16:25", "remaining_time": "3:05:09", "throughput": 8789.43, "total_tokens": 40303968} +{"current_steps": 59800, "total_steps": 204665, "loss": 0.0287, "lr": 1.7833074020586597e-06, "epoch": 1.460923948892092, "percentage": 29.22, "elapsed_time": "1:16:25", "remaining_time": "3:05:09", "throughput": 8789.49, "total_tokens": 40307296} +{"current_steps": 59805, "total_steps": 204665, "loss": 0.1239, "lr": 1.7832543877083302e-06, "epoch": 1.4610460997239392, "percentage": 29.22, "elapsed_time": "1:16:26", "remaining_time": "3:05:08", "throughput": 8789.53, "total_tokens": 40310496} +{"current_steps": 59810, "total_steps": 204665, "loss": 0.0801, "lr": 1.7832013676619636e-06, "epoch": 1.4611682505557864, "percentage": 29.22, "elapsed_time": "1:16:26", "remaining_time": "3:05:08", "throughput": 8789.55, "total_tokens": 40313568} +{"current_steps": 59815, "total_steps": 204665, "loss": 0.0944, "lr": 1.7831483419199462e-06, "epoch": 1.4612904013876333, "percentage": 29.23, "elapsed_time": "1:16:26", "remaining_time": "3:05:07", "throughput": 8789.61, "total_tokens": 40316832} +{"current_steps": 59820, "total_steps": 204665, "loss": 0.002, "lr": 1.7830953104826638e-06, "epoch": 1.4614125522194805, "percentage": 29.23, "elapsed_time": "1:16:27", "remaining_time": "3:05:07", "throughput": 8789.62, "total_tokens": 40319904} +{"current_steps": 59825, "total_steps": 204665, "loss": 0.0462, "lr": 1.7830422733505012e-06, "epoch": 1.4615347030513277, "percentage": 29.23, "elapsed_time": "1:16:27", "remaining_time": "3:05:06", "throughput": 8789.81, "total_tokens": 40323936} +{"current_steps": 59830, "total_steps": 204665, "loss": 0.1381, "lr": 1.782989230523845e-06, "epoch": 1.461656853883175, "percentage": 29.23, "elapsed_time": "1:16:27", "remaining_time": "3:05:06", "throughput": 8790.01, "total_tokens": 40328032} +{"current_steps": 59835, "total_steps": 204665, "loss": 0.0352, "lr": 1.7829361820030803e-06, "epoch": 1.461779004715022, "percentage": 29.24, "elapsed_time": "1:16:28", "remaining_time": "3:05:05", "throughput": 8790.08, "total_tokens": 40331424} +{"current_steps": 59840, "total_steps": 204665, "loss": 0.0854, "lr": 1.782883127788593e-06, "epoch": 1.4619011555468693, "percentage": 29.24, "elapsed_time": "1:16:28", "remaining_time": "3:05:05", "throughput": 8790.11, "total_tokens": 40334624} +{"current_steps": 59845, "total_steps": 204665, "loss": 0.0596, "lr": 1.782830067880769e-06, "epoch": 1.4620233063787165, "percentage": 29.24, "elapsed_time": "1:16:28", "remaining_time": "3:05:04", "throughput": 8790.22, "total_tokens": 40338208} +{"current_steps": 59850, "total_steps": 204665, "loss": 0.2008, "lr": 1.7827770022799947e-06, "epoch": 1.4621454572105637, "percentage": 29.24, "elapsed_time": "1:16:29", "remaining_time": "3:05:04", "throughput": 8790.38, "total_tokens": 40342112} +{"current_steps": 59855, "total_steps": 204665, "loss": 0.0595, "lr": 1.7827239309866548e-06, "epoch": 1.4622676080424108, "percentage": 29.25, "elapsed_time": "1:16:29", "remaining_time": "3:05:04", "throughput": 8790.4, "total_tokens": 40345184} +{"current_steps": 59860, "total_steps": 204665, "loss": 0.1361, "lr": 1.7826708540011363e-06, "epoch": 1.462389758874258, "percentage": 29.25, "elapsed_time": "1:16:30", "remaining_time": "3:05:03", "throughput": 8790.47, "total_tokens": 40348576} +{"current_steps": 59865, "total_steps": 204665, "loss": 0.0892, "lr": 1.7826177713238248e-06, "epoch": 1.4625119097061052, "percentage": 29.25, "elapsed_time": "1:16:30", "remaining_time": "3:05:03", "throughput": 8790.58, "total_tokens": 40352224} +{"current_steps": 59870, "total_steps": 204665, "loss": 0.002, "lr": 1.7825646829551064e-06, "epoch": 1.4626340605379522, "percentage": 29.25, "elapsed_time": "1:16:30", "remaining_time": "3:05:02", "throughput": 8790.68, "total_tokens": 40355744} +{"current_steps": 59875, "total_steps": 204665, "loss": 0.0888, "lr": 1.782511588895367e-06, "epoch": 1.4627562113697994, "percentage": 29.26, "elapsed_time": "1:16:31", "remaining_time": "3:05:02", "throughput": 8790.72, "total_tokens": 40359008} +{"current_steps": 59880, "total_steps": 204665, "loss": 0.1662, "lr": 1.782458489144993e-06, "epoch": 1.4628783622016466, "percentage": 29.26, "elapsed_time": "1:16:31", "remaining_time": "3:05:01", "throughput": 8790.76, "total_tokens": 40362208} +{"current_steps": 59885, "total_steps": 204665, "loss": 0.1133, "lr": 1.7824053837043706e-06, "epoch": 1.4630005130334938, "percentage": 29.26, "elapsed_time": "1:16:31", "remaining_time": "3:05:01", "throughput": 8790.81, "total_tokens": 40365472} +{"current_steps": 59890, "total_steps": 204665, "loss": 0.0614, "lr": 1.7823522725738855e-06, "epoch": 1.463122663865341, "percentage": 29.26, "elapsed_time": "1:16:32", "remaining_time": "3:05:00", "throughput": 8790.85, "total_tokens": 40368736} +{"current_steps": 59895, "total_steps": 204665, "loss": 0.0025, "lr": 1.7822991557539244e-06, "epoch": 1.4632448146971881, "percentage": 29.26, "elapsed_time": "1:16:32", "remaining_time": "3:05:00", "throughput": 8790.93, "total_tokens": 40372128} +{"current_steps": 59900, "total_steps": 204665, "loss": 0.1934, "lr": 1.7822460332448733e-06, "epoch": 1.4633669655290353, "percentage": 29.27, "elapsed_time": "1:16:32", "remaining_time": "3:04:59", "throughput": 8790.97, "total_tokens": 40375392} +{"current_steps": 59905, "total_steps": 204665, "loss": 0.0993, "lr": 1.7821929050471188e-06, "epoch": 1.4634891163608823, "percentage": 29.27, "elapsed_time": "1:16:33", "remaining_time": "3:04:59", "throughput": 8790.99, "total_tokens": 40378464} +{"current_steps": 59910, "total_steps": 204665, "loss": 0.0423, "lr": 1.7821397711610468e-06, "epoch": 1.4636112671927295, "percentage": 29.27, "elapsed_time": "1:16:33", "remaining_time": "3:04:58", "throughput": 8791.14, "total_tokens": 40382304} +{"current_steps": 59915, "total_steps": 204665, "loss": 0.0396, "lr": 1.7820866315870444e-06, "epoch": 1.4637334180245767, "percentage": 29.27, "elapsed_time": "1:16:33", "remaining_time": "3:04:58", "throughput": 8791.3, "total_tokens": 40386272} +{"current_steps": 59920, "total_steps": 204665, "loss": 0.0489, "lr": 1.7820334863254974e-06, "epoch": 1.4638555688564239, "percentage": 29.28, "elapsed_time": "1:16:34", "remaining_time": "3:04:58", "throughput": 8791.45, "total_tokens": 40390112} +{"current_steps": 59925, "total_steps": 204665, "loss": 0.1565, "lr": 1.7819803353767926e-06, "epoch": 1.463977719688271, "percentage": 29.28, "elapsed_time": "1:16:34", "remaining_time": "3:04:57", "throughput": 8791.59, "total_tokens": 40393888} +{"current_steps": 59930, "total_steps": 204665, "loss": 0.1662, "lr": 1.7819271787413164e-06, "epoch": 1.4640998705201183, "percentage": 29.28, "elapsed_time": "1:16:34", "remaining_time": "3:04:57", "throughput": 8791.63, "total_tokens": 40397088} +{"current_steps": 59935, "total_steps": 204665, "loss": 0.1424, "lr": 1.7818740164194556e-06, "epoch": 1.4642220213519654, "percentage": 29.28, "elapsed_time": "1:16:35", "remaining_time": "3:04:56", "throughput": 8791.75, "total_tokens": 40400736} +{"current_steps": 59940, "total_steps": 204665, "loss": 0.0331, "lr": 1.7818208484115967e-06, "epoch": 1.4643441721838126, "percentage": 29.29, "elapsed_time": "1:16:35", "remaining_time": "3:04:56", "throughput": 8791.93, "total_tokens": 40404768} +{"current_steps": 59945, "total_steps": 204665, "loss": 0.1021, "lr": 1.781767674718126e-06, "epoch": 1.4644663230156598, "percentage": 29.29, "elapsed_time": "1:16:36", "remaining_time": "3:04:55", "throughput": 8791.97, "total_tokens": 40407968} +{"current_steps": 59950, "total_steps": 204665, "loss": 0.0847, "lr": 1.7817144953394307e-06, "epoch": 1.464588473847507, "percentage": 29.29, "elapsed_time": "1:16:36", "remaining_time": "3:04:55", "throughput": 8791.99, "total_tokens": 40411104} +{"current_steps": 59955, "total_steps": 204665, "loss": 0.1078, "lr": 1.7816613102758976e-06, "epoch": 1.4647106246793542, "percentage": 29.29, "elapsed_time": "1:16:36", "remaining_time": "3:04:54", "throughput": 8792.04, "total_tokens": 40414368} +{"current_steps": 59960, "total_steps": 204665, "loss": 0.0459, "lr": 1.781608119527913e-06, "epoch": 1.4648327755112012, "percentage": 29.3, "elapsed_time": "1:16:37", "remaining_time": "3:04:54", "throughput": 8792.08, "total_tokens": 40417568} +{"current_steps": 59965, "total_steps": 204665, "loss": 0.1725, "lr": 1.7815549230958637e-06, "epoch": 1.4649549263430484, "percentage": 29.3, "elapsed_time": "1:16:37", "remaining_time": "3:04:53", "throughput": 8792.09, "total_tokens": 40420640} +{"current_steps": 59970, "total_steps": 204665, "loss": 0.1199, "lr": 1.7815017209801369e-06, "epoch": 1.4650770771748955, "percentage": 29.3, "elapsed_time": "1:16:37", "remaining_time": "3:04:53", "throughput": 8792.1, "total_tokens": 40423648} +{"current_steps": 59975, "total_steps": 204665, "loss": 0.0028, "lr": 1.7814485131811195e-06, "epoch": 1.4651992280067427, "percentage": 29.3, "elapsed_time": "1:16:38", "remaining_time": "3:04:52", "throughput": 8792.21, "total_tokens": 40427232} +{"current_steps": 59980, "total_steps": 204665, "loss": 0.2917, "lr": 1.7813952996991984e-06, "epoch": 1.46532137883859, "percentage": 29.31, "elapsed_time": "1:16:38", "remaining_time": "3:04:52", "throughput": 8792.21, "total_tokens": 40430176} +{"current_steps": 59985, "total_steps": 204665, "loss": 0.0253, "lr": 1.7813420805347602e-06, "epoch": 1.4654435296704371, "percentage": 29.31, "elapsed_time": "1:16:38", "remaining_time": "3:04:51", "throughput": 8792.3, "total_tokens": 40433696} +{"current_steps": 59990, "total_steps": 204665, "loss": 0.1689, "lr": 1.7812888556881926e-06, "epoch": 1.465565680502284, "percentage": 29.31, "elapsed_time": "1:16:39", "remaining_time": "3:04:51", "throughput": 8792.34, "total_tokens": 40436896} +{"current_steps": 59995, "total_steps": 204665, "loss": 0.0497, "lr": 1.781235625159882e-06, "epoch": 1.4656878313341313, "percentage": 29.31, "elapsed_time": "1:16:39", "remaining_time": "3:04:50", "throughput": 8792.46, "total_tokens": 40440544} +{"current_steps": 60000, "total_steps": 204665, "loss": 0.048, "lr": 1.781182388950216e-06, "epoch": 1.4658099821659785, "percentage": 29.32, "elapsed_time": "1:16:39", "remaining_time": "3:04:50", "throughput": 8792.53, "total_tokens": 40443936} +{"current_steps": 60005, "total_steps": 204665, "loss": 0.1678, "lr": 1.7811291470595815e-06, "epoch": 1.4659321329978257, "percentage": 29.32, "elapsed_time": "1:16:40", "remaining_time": "3:04:50", "throughput": 8792.72, "total_tokens": 40447968} +{"current_steps": 60010, "total_steps": 204665, "loss": 0.0026, "lr": 1.7810758994883656e-06, "epoch": 1.4660542838296728, "percentage": 29.32, "elapsed_time": "1:16:40", "remaining_time": "3:04:49", "throughput": 8792.86, "total_tokens": 40451744} +{"current_steps": 60015, "total_steps": 204665, "loss": 0.0326, "lr": 1.781022646236956e-06, "epoch": 1.46617643466152, "percentage": 29.32, "elapsed_time": "1:16:40", "remaining_time": "3:04:49", "throughput": 8792.89, "total_tokens": 40454880} +{"current_steps": 60020, "total_steps": 204665, "loss": 0.1341, "lr": 1.7809693873057393e-06, "epoch": 1.4662985854933672, "percentage": 29.33, "elapsed_time": "1:16:41", "remaining_time": "3:04:48", "throughput": 8792.92, "total_tokens": 40458080} +{"current_steps": 60025, "total_steps": 204665, "loss": 0.0014, "lr": 1.7809161226951032e-06, "epoch": 1.4664207363252144, "percentage": 29.33, "elapsed_time": "1:16:41", "remaining_time": "3:04:48", "throughput": 8793.07, "total_tokens": 40461856} +{"current_steps": 60030, "total_steps": 204665, "loss": 0.1523, "lr": 1.7808628524054352e-06, "epoch": 1.4665428871570616, "percentage": 29.33, "elapsed_time": "1:16:41", "remaining_time": "3:04:47", "throughput": 8793.11, "total_tokens": 40465056} +{"current_steps": 60035, "total_steps": 204665, "loss": 0.0853, "lr": 1.7808095764371225e-06, "epoch": 1.4666650379889088, "percentage": 29.33, "elapsed_time": "1:16:42", "remaining_time": "3:04:47", "throughput": 8793.15, "total_tokens": 40468256} +{"current_steps": 60040, "total_steps": 204665, "loss": 0.1113, "lr": 1.7807562947905526e-06, "epoch": 1.466787188820756, "percentage": 29.34, "elapsed_time": "1:16:42", "remaining_time": "3:04:46", "throughput": 8793.23, "total_tokens": 40471712} +{"current_steps": 60045, "total_steps": 204665, "loss": 0.0828, "lr": 1.7807030074661127e-06, "epoch": 1.4669093396526032, "percentage": 29.34, "elapsed_time": "1:16:42", "remaining_time": "3:04:46", "throughput": 8793.35, "total_tokens": 40475360} +{"current_steps": 60050, "total_steps": 204665, "loss": 0.0282, "lr": 1.7806497144641909e-06, "epoch": 1.4670314904844501, "percentage": 29.34, "elapsed_time": "1:16:43", "remaining_time": "3:04:45", "throughput": 8793.42, "total_tokens": 40478752} +{"current_steps": 60055, "total_steps": 204665, "loss": 0.0005, "lr": 1.7805964157851739e-06, "epoch": 1.4671536413162973, "percentage": 29.34, "elapsed_time": "1:16:43", "remaining_time": "3:04:45", "throughput": 8793.46, "total_tokens": 40482016} +{"current_steps": 60060, "total_steps": 204665, "loss": 0.0425, "lr": 1.7805431114294503e-06, "epoch": 1.4672757921481445, "percentage": 29.35, "elapsed_time": "1:16:43", "remaining_time": "3:04:44", "throughput": 8793.52, "total_tokens": 40485280} +{"current_steps": 60065, "total_steps": 204665, "loss": 0.1388, "lr": 1.7804898013974068e-06, "epoch": 1.4673979429799917, "percentage": 29.35, "elapsed_time": "1:16:44", "remaining_time": "3:04:44", "throughput": 8793.65, "total_tokens": 40489056} +{"current_steps": 60070, "total_steps": 204665, "loss": 0.0005, "lr": 1.780436485689432e-06, "epoch": 1.467520093811839, "percentage": 29.35, "elapsed_time": "1:16:44", "remaining_time": "3:04:44", "throughput": 8793.78, "total_tokens": 40492768} +{"current_steps": 60075, "total_steps": 204665, "loss": 0.0839, "lr": 1.7803831643059128e-06, "epoch": 1.467642244643686, "percentage": 29.35, "elapsed_time": "1:16:45", "remaining_time": "3:04:43", "throughput": 8793.85, "total_tokens": 40496160} +{"current_steps": 60080, "total_steps": 204665, "loss": 0.1656, "lr": 1.7803298372472373e-06, "epoch": 1.467764395475533, "percentage": 29.36, "elapsed_time": "1:16:45", "remaining_time": "3:04:43", "throughput": 8793.92, "total_tokens": 40499552} +{"current_steps": 60085, "total_steps": 204665, "loss": 0.0605, "lr": 1.7802765045137935e-06, "epoch": 1.4678865463073802, "percentage": 29.36, "elapsed_time": "1:16:45", "remaining_time": "3:04:42", "throughput": 8793.91, "total_tokens": 40502432} +{"current_steps": 60090, "total_steps": 204665, "loss": 0.0969, "lr": 1.7802231661059692e-06, "epoch": 1.4680086971392274, "percentage": 29.36, "elapsed_time": "1:16:46", "remaining_time": "3:04:42", "throughput": 8793.98, "total_tokens": 40505824} +{"current_steps": 60095, "total_steps": 204665, "loss": 0.0764, "lr": 1.780169822024152e-06, "epoch": 1.4681308479710746, "percentage": 29.36, "elapsed_time": "1:16:46", "remaining_time": "3:04:41", "throughput": 8794.17, "total_tokens": 40509920} +{"current_steps": 60100, "total_steps": 204665, "loss": 0.2528, "lr": 1.78011647226873e-06, "epoch": 1.4682529988029218, "percentage": 29.37, "elapsed_time": "1:16:46", "remaining_time": "3:04:41", "throughput": 8794.2, "total_tokens": 40513120} +{"current_steps": 60105, "total_steps": 204665, "loss": 0.0569, "lr": 1.7800631168400915e-06, "epoch": 1.468375149634769, "percentage": 29.37, "elapsed_time": "1:16:47", "remaining_time": "3:04:40", "throughput": 8794.28, "total_tokens": 40516512} +{"current_steps": 60110, "total_steps": 204665, "loss": 0.2744, "lr": 1.7800097557386238e-06, "epoch": 1.4684973004666162, "percentage": 29.37, "elapsed_time": "1:16:47", "remaining_time": "3:04:40", "throughput": 8794.33, "total_tokens": 40519776} +{"current_steps": 60115, "total_steps": 204665, "loss": 0.1327, "lr": 1.7799563889647156e-06, "epoch": 1.4686194512984634, "percentage": 29.37, "elapsed_time": "1:16:47", "remaining_time": "3:04:39", "throughput": 8794.37, "total_tokens": 40522976} +{"current_steps": 60120, "total_steps": 204665, "loss": 0.0038, "lr": 1.7799030165187548e-06, "epoch": 1.4687416021303106, "percentage": 29.37, "elapsed_time": "1:16:48", "remaining_time": "3:04:39", "throughput": 8794.47, "total_tokens": 40526496} +{"current_steps": 60125, "total_steps": 204665, "loss": 0.0437, "lr": 1.7798496384011291e-06, "epoch": 1.4688637529621578, "percentage": 29.38, "elapsed_time": "1:16:48", "remaining_time": "3:04:38", "throughput": 8794.54, "total_tokens": 40529888} +{"current_steps": 60130, "total_steps": 204665, "loss": 0.0683, "lr": 1.7797962546122274e-06, "epoch": 1.468985903794005, "percentage": 29.38, "elapsed_time": "1:16:48", "remaining_time": "3:04:38", "throughput": 8794.68, "total_tokens": 40533664} +{"current_steps": 60135, "total_steps": 204665, "loss": 0.0433, "lr": 1.7797428651524378e-06, "epoch": 1.469108054625852, "percentage": 29.38, "elapsed_time": "1:16:49", "remaining_time": "3:04:38", "throughput": 8794.92, "total_tokens": 40538016} +{"current_steps": 60140, "total_steps": 204665, "loss": 0.0481, "lr": 1.779689470022148e-06, "epoch": 1.4692302054576991, "percentage": 29.38, "elapsed_time": "1:16:49", "remaining_time": "3:04:37", "throughput": 8795.04, "total_tokens": 40541728} +{"current_steps": 60145, "total_steps": 204665, "loss": 0.0712, "lr": 1.7796360692217468e-06, "epoch": 1.4693523562895463, "percentage": 29.39, "elapsed_time": "1:16:49", "remaining_time": "3:04:37", "throughput": 8795.05, "total_tokens": 40544736} +{"current_steps": 60150, "total_steps": 204665, "loss": 0.0833, "lr": 1.7795826627516224e-06, "epoch": 1.4694745071213935, "percentage": 29.39, "elapsed_time": "1:16:50", "remaining_time": "3:04:36", "throughput": 8795.07, "total_tokens": 40547808} +{"current_steps": 60155, "total_steps": 204665, "loss": 0.0974, "lr": 1.779529250612163e-06, "epoch": 1.4695966579532407, "percentage": 29.39, "elapsed_time": "1:16:50", "remaining_time": "3:04:36", "throughput": 8795.22, "total_tokens": 40551712} +{"current_steps": 60160, "total_steps": 204665, "loss": 0.0931, "lr": 1.7794758328037575e-06, "epoch": 1.4697188087850879, "percentage": 29.39, "elapsed_time": "1:16:51", "remaining_time": "3:04:35", "throughput": 8795.35, "total_tokens": 40555424} +{"current_steps": 60165, "total_steps": 204665, "loss": 0.1147, "lr": 1.779422409326794e-06, "epoch": 1.469840959616935, "percentage": 29.4, "elapsed_time": "1:16:51", "remaining_time": "3:04:35", "throughput": 8795.46, "total_tokens": 40559072} +{"current_steps": 60170, "total_steps": 204665, "loss": 0.1045, "lr": 1.779368980181661e-06, "epoch": 1.469963110448782, "percentage": 29.4, "elapsed_time": "1:16:51", "remaining_time": "3:04:34", "throughput": 8795.49, "total_tokens": 40562208} +{"current_steps": 60175, "total_steps": 204665, "loss": 0.1029, "lr": 1.7793155453687473e-06, "epoch": 1.4700852612806292, "percentage": 29.4, "elapsed_time": "1:16:52", "remaining_time": "3:04:34", "throughput": 8795.57, "total_tokens": 40565664} +{"current_steps": 60180, "total_steps": 204665, "loss": 0.1246, "lr": 1.7792621048884412e-06, "epoch": 1.4702074121124764, "percentage": 29.4, "elapsed_time": "1:16:52", "remaining_time": "3:04:33", "throughput": 8795.65, "total_tokens": 40569056} +{"current_steps": 60185, "total_steps": 204665, "loss": 0.2031, "lr": 1.7792086587411315e-06, "epoch": 1.4703295629443236, "percentage": 29.41, "elapsed_time": "1:16:52", "remaining_time": "3:04:33", "throughput": 8795.63, "total_tokens": 40571936} +{"current_steps": 60190, "total_steps": 204665, "loss": 0.1175, "lr": 1.7791552069272071e-06, "epoch": 1.4704517137761708, "percentage": 29.41, "elapsed_time": "1:16:53", "remaining_time": "3:04:32", "throughput": 8795.74, "total_tokens": 40575520} +{"current_steps": 60195, "total_steps": 204665, "loss": 0.0451, "lr": 1.779101749447056e-06, "epoch": 1.470573864608018, "percentage": 29.41, "elapsed_time": "1:16:53", "remaining_time": "3:04:32", "throughput": 8795.85, "total_tokens": 40579168} +{"current_steps": 60200, "total_steps": 204665, "loss": 0.1231, "lr": 1.779048286301068e-06, "epoch": 1.4706960154398652, "percentage": 29.41, "elapsed_time": "1:16:53", "remaining_time": "3:04:31", "throughput": 8795.87, "total_tokens": 40582240} +{"current_steps": 60205, "total_steps": 204665, "loss": 0.0335, "lr": 1.778994817489631e-06, "epoch": 1.4708181662717124, "percentage": 29.42, "elapsed_time": "1:16:54", "remaining_time": "3:04:31", "throughput": 8795.93, "total_tokens": 40585568} +{"current_steps": 60210, "total_steps": 204665, "loss": 0.0749, "lr": 1.778941343013134e-06, "epoch": 1.4709403171035595, "percentage": 29.42, "elapsed_time": "1:16:54", "remaining_time": "3:04:30", "throughput": 8795.97, "total_tokens": 40588768} +{"current_steps": 60215, "total_steps": 204665, "loss": 0.1077, "lr": 1.7788878628719663e-06, "epoch": 1.4710624679354067, "percentage": 29.42, "elapsed_time": "1:16:54", "remaining_time": "3:04:30", "throughput": 8796.01, "total_tokens": 40591968} +{"current_steps": 60220, "total_steps": 204665, "loss": 0.043, "lr": 1.7788343770665165e-06, "epoch": 1.471184618767254, "percentage": 29.42, "elapsed_time": "1:16:55", "remaining_time": "3:04:30", "throughput": 8796.09, "total_tokens": 40595424} +{"current_steps": 60225, "total_steps": 204665, "loss": 0.0018, "lr": 1.7787808855971737e-06, "epoch": 1.471306769599101, "percentage": 29.43, "elapsed_time": "1:16:55", "remaining_time": "3:04:29", "throughput": 8796.11, "total_tokens": 40598560} +{"current_steps": 60230, "total_steps": 204665, "loss": 0.1062, "lr": 1.7787273884643268e-06, "epoch": 1.471428920430948, "percentage": 29.43, "elapsed_time": "1:16:55", "remaining_time": "3:04:29", "throughput": 8796.17, "total_tokens": 40601888} +{"current_steps": 60235, "total_steps": 204665, "loss": 0.039, "lr": 1.7786738856683647e-06, "epoch": 1.4715510712627953, "percentage": 29.43, "elapsed_time": "1:16:56", "remaining_time": "3:04:28", "throughput": 8796.3, "total_tokens": 40605600} +{"current_steps": 60240, "total_steps": 204665, "loss": 0.0543, "lr": 1.7786203772096768e-06, "epoch": 1.4716732220946425, "percentage": 29.43, "elapsed_time": "1:16:56", "remaining_time": "3:04:28", "throughput": 8796.36, "total_tokens": 40608928} +{"current_steps": 60245, "total_steps": 204665, "loss": 0.2321, "lr": 1.7785668630886521e-06, "epoch": 1.4717953729264897, "percentage": 29.44, "elapsed_time": "1:16:56", "remaining_time": "3:04:27", "throughput": 8796.41, "total_tokens": 40612192} +{"current_steps": 60250, "total_steps": 204665, "loss": 0.039, "lr": 1.77851334330568e-06, "epoch": 1.4719175237583368, "percentage": 29.44, "elapsed_time": "1:16:57", "remaining_time": "3:04:27", "throughput": 8796.4, "total_tokens": 40615136} +{"current_steps": 60255, "total_steps": 204665, "loss": 0.0719, "lr": 1.7784598178611492e-06, "epoch": 1.472039674590184, "percentage": 29.44, "elapsed_time": "1:16:57", "remaining_time": "3:04:26", "throughput": 8796.5, "total_tokens": 40618720} +{"current_steps": 60260, "total_steps": 204665, "loss": 0.2505, "lr": 1.7784062867554493e-06, "epoch": 1.472161825422031, "percentage": 29.44, "elapsed_time": "1:16:57", "remaining_time": "3:04:26", "throughput": 8796.55, "total_tokens": 40621984} +{"current_steps": 60265, "total_steps": 204665, "loss": 0.1049, "lr": 1.7783527499889694e-06, "epoch": 1.4722839762538782, "percentage": 29.45, "elapsed_time": "1:16:58", "remaining_time": "3:04:25", "throughput": 8796.63, "total_tokens": 40625440} +{"current_steps": 60270, "total_steps": 204665, "loss": 0.0771, "lr": 1.778299207562099e-06, "epoch": 1.4724061270857254, "percentage": 29.45, "elapsed_time": "1:16:58", "remaining_time": "3:04:25", "throughput": 8796.6, "total_tokens": 40628192} +{"current_steps": 60275, "total_steps": 204665, "loss": 0.0427, "lr": 1.7782456594752275e-06, "epoch": 1.4725282779175726, "percentage": 29.45, "elapsed_time": "1:16:58", "remaining_time": "3:04:24", "throughput": 8796.73, "total_tokens": 40631968} +{"current_steps": 60280, "total_steps": 204665, "loss": 0.0634, "lr": 1.7781921057287442e-06, "epoch": 1.4726504287494198, "percentage": 29.45, "elapsed_time": "1:16:59", "remaining_time": "3:04:24", "throughput": 8796.93, "total_tokens": 40636128} +{"current_steps": 60285, "total_steps": 204665, "loss": 0.0008, "lr": 1.7781385463230385e-06, "epoch": 1.472772579581267, "percentage": 29.46, "elapsed_time": "1:16:59", "remaining_time": "3:04:23", "throughput": 8796.94, "total_tokens": 40639136} +{"current_steps": 60290, "total_steps": 204665, "loss": 0.1044, "lr": 1.7780849812585e-06, "epoch": 1.4728947304131141, "percentage": 29.46, "elapsed_time": "1:17:00", "remaining_time": "3:04:23", "throughput": 8796.91, "total_tokens": 40641888} +{"current_steps": 60295, "total_steps": 204665, "loss": 0.1011, "lr": 1.7780314105355183e-06, "epoch": 1.4730168812449613, "percentage": 29.46, "elapsed_time": "1:17:00", "remaining_time": "3:04:22", "throughput": 8796.9, "total_tokens": 40644832} +{"current_steps": 60300, "total_steps": 204665, "loss": 0.1502, "lr": 1.7779778341544832e-06, "epoch": 1.4731390320768085, "percentage": 29.46, "elapsed_time": "1:17:00", "remaining_time": "3:04:22", "throughput": 8796.94, "total_tokens": 40648032} +{"current_steps": 60305, "total_steps": 204665, "loss": 0.1569, "lr": 1.7779242521157837e-06, "epoch": 1.4732611829086557, "percentage": 29.47, "elapsed_time": "1:17:01", "remaining_time": "3:04:22", "throughput": 8796.99, "total_tokens": 40651296} +{"current_steps": 60310, "total_steps": 204665, "loss": 0.1258, "lr": 1.77787066441981e-06, "epoch": 1.473383333740503, "percentage": 29.47, "elapsed_time": "1:17:01", "remaining_time": "3:04:21", "throughput": 8797.05, "total_tokens": 40654624} +{"current_steps": 60315, "total_steps": 204665, "loss": 0.0015, "lr": 1.7778170710669513e-06, "epoch": 1.4735054845723499, "percentage": 29.47, "elapsed_time": "1:17:01", "remaining_time": "3:04:21", "throughput": 8797.15, "total_tokens": 40658144} +{"current_steps": 60320, "total_steps": 204665, "loss": 0.0685, "lr": 1.7777634720575978e-06, "epoch": 1.473627635404197, "percentage": 29.47, "elapsed_time": "1:17:02", "remaining_time": "3:04:20", "throughput": 8797.19, "total_tokens": 40661408} +{"current_steps": 60325, "total_steps": 204665, "loss": 0.1081, "lr": 1.777709867392139e-06, "epoch": 1.4737497862360442, "percentage": 29.47, "elapsed_time": "1:17:02", "remaining_time": "3:04:20", "throughput": 8797.24, "total_tokens": 40664672} +{"current_steps": 60330, "total_steps": 204665, "loss": 0.0795, "lr": 1.7776562570709652e-06, "epoch": 1.4738719370678914, "percentage": 29.48, "elapsed_time": "1:17:02", "remaining_time": "3:04:19", "throughput": 8797.33, "total_tokens": 40668128} +{"current_steps": 60335, "total_steps": 204665, "loss": 0.1111, "lr": 1.7776026410944659e-06, "epoch": 1.4739940878997386, "percentage": 29.48, "elapsed_time": "1:17:03", "remaining_time": "3:04:19", "throughput": 8797.35, "total_tokens": 40671200} +{"current_steps": 60340, "total_steps": 204665, "loss": 0.1285, "lr": 1.7775490194630307e-06, "epoch": 1.4741162387315858, "percentage": 29.48, "elapsed_time": "1:17:03", "remaining_time": "3:04:18", "throughput": 8797.37, "total_tokens": 40674272} +{"current_steps": 60345, "total_steps": 204665, "loss": 0.0018, "lr": 1.7774953921770504e-06, "epoch": 1.474238389563433, "percentage": 29.48, "elapsed_time": "1:17:03", "remaining_time": "3:04:18", "throughput": 8797.45, "total_tokens": 40677728} +{"current_steps": 60350, "total_steps": 204665, "loss": 0.1822, "lr": 1.7774417592369142e-06, "epoch": 1.47436054039528, "percentage": 29.49, "elapsed_time": "1:17:04", "remaining_time": "3:04:17", "throughput": 8797.52, "total_tokens": 40681120} +{"current_steps": 60355, "total_steps": 204665, "loss": 0.0315, "lr": 1.7773881206430122e-06, "epoch": 1.4744826912271272, "percentage": 29.49, "elapsed_time": "1:17:04", "remaining_time": "3:04:17", "throughput": 8797.59, "total_tokens": 40684512} +{"current_steps": 60360, "total_steps": 204665, "loss": 0.0442, "lr": 1.7773344763957349e-06, "epoch": 1.4746048420589744, "percentage": 29.49, "elapsed_time": "1:17:04", "remaining_time": "3:04:16", "throughput": 8797.76, "total_tokens": 40688480} +{"current_steps": 60365, "total_steps": 204665, "loss": 0.1618, "lr": 1.7772808264954724e-06, "epoch": 1.4747269928908215, "percentage": 29.49, "elapsed_time": "1:17:05", "remaining_time": "3:04:16", "throughput": 8797.83, "total_tokens": 40691872} +{"current_steps": 60370, "total_steps": 204665, "loss": 0.0412, "lr": 1.7772271709426145e-06, "epoch": 1.4748491437226687, "percentage": 29.5, "elapsed_time": "1:17:05", "remaining_time": "3:04:15", "throughput": 8797.9, "total_tokens": 40695264} +{"current_steps": 60375, "total_steps": 204665, "loss": 0.0019, "lr": 1.7771735097375514e-06, "epoch": 1.474971294554516, "percentage": 29.5, "elapsed_time": "1:17:05", "remaining_time": "3:04:15", "throughput": 8798.03, "total_tokens": 40698976} +{"current_steps": 60380, "total_steps": 204665, "loss": 0.0681, "lr": 1.777119842880674e-06, "epoch": 1.4750934453863631, "percentage": 29.5, "elapsed_time": "1:17:06", "remaining_time": "3:04:15", "throughput": 8798.11, "total_tokens": 40702432} +{"current_steps": 60385, "total_steps": 204665, "loss": 0.1681, "lr": 1.7770661703723716e-06, "epoch": 1.4752155962182103, "percentage": 29.5, "elapsed_time": "1:17:06", "remaining_time": "3:04:14", "throughput": 8798.18, "total_tokens": 40705824} +{"current_steps": 60390, "total_steps": 204665, "loss": 0.2188, "lr": 1.7770124922130352e-06, "epoch": 1.4753377470500575, "percentage": 29.51, "elapsed_time": "1:17:06", "remaining_time": "3:04:14", "throughput": 8798.23, "total_tokens": 40709088} +{"current_steps": 60395, "total_steps": 204665, "loss": 0.1257, "lr": 1.7769588084030547e-06, "epoch": 1.4754598978819047, "percentage": 29.51, "elapsed_time": "1:17:07", "remaining_time": "3:04:13", "throughput": 8798.28, "total_tokens": 40712352} +{"current_steps": 60400, "total_steps": 204665, "loss": 0.1547, "lr": 1.776905118942821e-06, "epoch": 1.4755820487137519, "percentage": 29.51, "elapsed_time": "1:17:07", "remaining_time": "3:04:13", "throughput": 8798.44, "total_tokens": 40716192} +{"current_steps": 60405, "total_steps": 204665, "loss": 0.0813, "lr": 1.7768514238327244e-06, "epoch": 1.4757041995455988, "percentage": 29.51, "elapsed_time": "1:17:08", "remaining_time": "3:04:12", "throughput": 8798.49, "total_tokens": 40719520} +{"current_steps": 60410, "total_steps": 204665, "loss": 0.0564, "lr": 1.7767977230731552e-06, "epoch": 1.475826350377446, "percentage": 29.52, "elapsed_time": "1:17:08", "remaining_time": "3:04:12", "throughput": 8798.5, "total_tokens": 40722528} +{"current_steps": 60415, "total_steps": 204665, "loss": 0.2374, "lr": 1.776744016664504e-06, "epoch": 1.4759485012092932, "percentage": 29.52, "elapsed_time": "1:17:08", "remaining_time": "3:04:11", "throughput": 8798.58, "total_tokens": 40725984} +{"current_steps": 60420, "total_steps": 204665, "loss": 0.0318, "lr": 1.7766903046071613e-06, "epoch": 1.4760706520411404, "percentage": 29.52, "elapsed_time": "1:17:09", "remaining_time": "3:04:11", "throughput": 8798.63, "total_tokens": 40729248} +{"current_steps": 60425, "total_steps": 204665, "loss": 0.0308, "lr": 1.776636586901518e-06, "epoch": 1.4761928028729876, "percentage": 29.52, "elapsed_time": "1:17:09", "remaining_time": "3:04:10", "throughput": 8798.69, "total_tokens": 40732576} +{"current_steps": 60430, "total_steps": 204665, "loss": 0.0675, "lr": 1.7765828635479645e-06, "epoch": 1.4763149537048348, "percentage": 29.53, "elapsed_time": "1:17:09", "remaining_time": "3:04:10", "throughput": 8798.73, "total_tokens": 40735840} +{"current_steps": 60435, "total_steps": 204665, "loss": 0.1742, "lr": 1.7765291345468913e-06, "epoch": 1.476437104536682, "percentage": 29.53, "elapsed_time": "1:17:10", "remaining_time": "3:04:09", "throughput": 8798.78, "total_tokens": 40739104} +{"current_steps": 60440, "total_steps": 204665, "loss": 0.0036, "lr": 1.7764753998986898e-06, "epoch": 1.476559255368529, "percentage": 29.53, "elapsed_time": "1:17:10", "remaining_time": "3:04:09", "throughput": 8798.94, "total_tokens": 40742944} +{"current_steps": 60445, "total_steps": 204665, "loss": 0.1498, "lr": 1.77642165960375e-06, "epoch": 1.4766814062003761, "percentage": 29.53, "elapsed_time": "1:17:10", "remaining_time": "3:04:08", "throughput": 8798.98, "total_tokens": 40746144} +{"current_steps": 60450, "total_steps": 204665, "loss": 0.042, "lr": 1.7763679136624632e-06, "epoch": 1.4768035570322233, "percentage": 29.54, "elapsed_time": "1:17:11", "remaining_time": "3:04:08", "throughput": 8799.01, "total_tokens": 40749280} +{"current_steps": 60455, "total_steps": 204665, "loss": 0.1468, "lr": 1.77631416207522e-06, "epoch": 1.4769257078640705, "percentage": 29.54, "elapsed_time": "1:17:11", "remaining_time": "3:04:07", "throughput": 8799.07, "total_tokens": 40752608} +{"current_steps": 60460, "total_steps": 204665, "loss": 0.1179, "lr": 1.7762604048424117e-06, "epoch": 1.4770478586959177, "percentage": 29.54, "elapsed_time": "1:17:11", "remaining_time": "3:04:07", "throughput": 8799.18, "total_tokens": 40756192} +{"current_steps": 60465, "total_steps": 204665, "loss": 0.0883, "lr": 1.7762066419644286e-06, "epoch": 1.477170009527765, "percentage": 29.54, "elapsed_time": "1:17:12", "remaining_time": "3:04:07", "throughput": 8799.26, "total_tokens": 40759648} +{"current_steps": 60470, "total_steps": 204665, "loss": 0.0364, "lr": 1.7761528734416621e-06, "epoch": 1.477292160359612, "percentage": 29.55, "elapsed_time": "1:17:12", "remaining_time": "3:04:06", "throughput": 8799.37, "total_tokens": 40763232} +{"current_steps": 60475, "total_steps": 204665, "loss": 0.0974, "lr": 1.7760990992745033e-06, "epoch": 1.4774143111914593, "percentage": 29.55, "elapsed_time": "1:17:12", "remaining_time": "3:04:06", "throughput": 8799.42, "total_tokens": 40766496} +{"current_steps": 60480, "total_steps": 204665, "loss": 0.1396, "lr": 1.776045319463343e-06, "epoch": 1.4775364620233065, "percentage": 29.55, "elapsed_time": "1:17:13", "remaining_time": "3:04:05", "throughput": 8799.47, "total_tokens": 40769760} +{"current_steps": 60485, "total_steps": 204665, "loss": 0.1247, "lr": 1.7759915340085724e-06, "epoch": 1.4776586128551537, "percentage": 29.55, "elapsed_time": "1:17:13", "remaining_time": "3:04:05", "throughput": 8799.53, "total_tokens": 40773088} +{"current_steps": 60490, "total_steps": 204665, "loss": 0.2395, "lr": 1.7759377429105826e-06, "epoch": 1.4777807636870008, "percentage": 29.56, "elapsed_time": "1:17:13", "remaining_time": "3:04:04", "throughput": 8799.64, "total_tokens": 40776736} +{"current_steps": 60495, "total_steps": 204665, "loss": 0.0034, "lr": 1.775883946169765e-06, "epoch": 1.4779029145188478, "percentage": 29.56, "elapsed_time": "1:17:14", "remaining_time": "3:04:04", "throughput": 8799.68, "total_tokens": 40779936} +{"current_steps": 60500, "total_steps": 204665, "loss": 0.0059, "lr": 1.7758301437865107e-06, "epoch": 1.478025065350695, "percentage": 29.56, "elapsed_time": "1:17:14", "remaining_time": "3:04:03", "throughput": 8799.79, "total_tokens": 40783584} +{"current_steps": 60505, "total_steps": 204665, "loss": 0.084, "lr": 1.7757763357612108e-06, "epoch": 1.4781472161825422, "percentage": 29.56, "elapsed_time": "1:17:14", "remaining_time": "3:04:03", "throughput": 8799.82, "total_tokens": 40786720} +{"current_steps": 60510, "total_steps": 204665, "loss": 0.0243, "lr": 1.7757225220942567e-06, "epoch": 1.4782693670143894, "percentage": 29.57, "elapsed_time": "1:17:15", "remaining_time": "3:04:02", "throughput": 8799.95, "total_tokens": 40790496} +{"current_steps": 60515, "total_steps": 204665, "loss": 0.0969, "lr": 1.7756687027860396e-06, "epoch": 1.4783915178462366, "percentage": 29.57, "elapsed_time": "1:17:15", "remaining_time": "3:04:02", "throughput": 8800.02, "total_tokens": 40793888} +{"current_steps": 60520, "total_steps": 204665, "loss": 0.0723, "lr": 1.7756148778369512e-06, "epoch": 1.4785136686780838, "percentage": 29.57, "elapsed_time": "1:17:15", "remaining_time": "3:04:01", "throughput": 8800.05, "total_tokens": 40797024} +{"current_steps": 60525, "total_steps": 204665, "loss": 0.1222, "lr": 1.775561047247383e-06, "epoch": 1.4786358195099307, "percentage": 29.57, "elapsed_time": "1:17:16", "remaining_time": "3:04:01", "throughput": 8800.1, "total_tokens": 40800288} +{"current_steps": 60530, "total_steps": 204665, "loss": 0.219, "lr": 1.775507211017726e-06, "epoch": 1.478757970341778, "percentage": 29.58, "elapsed_time": "1:17:16", "remaining_time": "3:04:00", "throughput": 8800.13, "total_tokens": 40803424} +{"current_steps": 60535, "total_steps": 204665, "loss": 0.0009, "lr": 1.7754533691483721e-06, "epoch": 1.478880121173625, "percentage": 29.58, "elapsed_time": "1:17:17", "remaining_time": "3:04:00", "throughput": 8800.21, "total_tokens": 40806880} +{"current_steps": 60540, "total_steps": 204665, "loss": 0.0022, "lr": 1.7753995216397128e-06, "epoch": 1.4790022720054723, "percentage": 29.58, "elapsed_time": "1:17:17", "remaining_time": "3:04:00", "throughput": 8800.25, "total_tokens": 40810080} +{"current_steps": 60545, "total_steps": 204665, "loss": 0.1097, "lr": 1.7753456684921395e-06, "epoch": 1.4791244228373195, "percentage": 29.58, "elapsed_time": "1:17:17", "remaining_time": "3:03:59", "throughput": 8800.32, "total_tokens": 40813472} +{"current_steps": 60550, "total_steps": 204665, "loss": 0.0336, "lr": 1.775291809706044e-06, "epoch": 1.4792465736691667, "percentage": 29.58, "elapsed_time": "1:17:18", "remaining_time": "3:03:59", "throughput": 8800.43, "total_tokens": 40817120} +{"current_steps": 60555, "total_steps": 204665, "loss": 0.0927, "lr": 1.7752379452818179e-06, "epoch": 1.4793687245010139, "percentage": 29.59, "elapsed_time": "1:17:18", "remaining_time": "3:03:58", "throughput": 8800.47, "total_tokens": 40820320} +{"current_steps": 60560, "total_steps": 204665, "loss": 0.0943, "lr": 1.7751840752198528e-06, "epoch": 1.479490875332861, "percentage": 29.59, "elapsed_time": "1:17:18", "remaining_time": "3:03:58", "throughput": 8800.57, "total_tokens": 40823904} +{"current_steps": 60565, "total_steps": 204665, "loss": 0.0115, "lr": 1.7751301995205408e-06, "epoch": 1.4796130261647082, "percentage": 29.59, "elapsed_time": "1:17:19", "remaining_time": "3:03:57", "throughput": 8800.69, "total_tokens": 40827552} +{"current_steps": 60570, "total_steps": 204665, "loss": 0.0008, "lr": 1.7750763181842735e-06, "epoch": 1.4797351769965554, "percentage": 29.59, "elapsed_time": "1:17:19", "remaining_time": "3:03:57", "throughput": 8800.8, "total_tokens": 40831136} +{"current_steps": 60575, "total_steps": 204665, "loss": 0.0518, "lr": 1.7750224312114428e-06, "epoch": 1.4798573278284026, "percentage": 29.6, "elapsed_time": "1:17:19", "remaining_time": "3:03:56", "throughput": 8800.88, "total_tokens": 40834592} +{"current_steps": 60580, "total_steps": 204665, "loss": 0.0595, "lr": 1.7749685386024405e-06, "epoch": 1.4799794786602498, "percentage": 29.6, "elapsed_time": "1:17:20", "remaining_time": "3:03:56", "throughput": 8800.86, "total_tokens": 40837408} +{"current_steps": 60585, "total_steps": 204665, "loss": 0.1728, "lr": 1.7749146403576585e-06, "epoch": 1.4801016294920968, "percentage": 29.6, "elapsed_time": "1:17:20", "remaining_time": "3:03:55", "throughput": 8800.92, "total_tokens": 40840736} +{"current_steps": 60590, "total_steps": 204665, "loss": 0.135, "lr": 1.7748607364774886e-06, "epoch": 1.480223780323944, "percentage": 29.6, "elapsed_time": "1:17:20", "remaining_time": "3:03:55", "throughput": 8801.04, "total_tokens": 40844384} +{"current_steps": 60595, "total_steps": 204665, "loss": 0.0297, "lr": 1.7748068269623234e-06, "epoch": 1.4803459311557912, "percentage": 29.61, "elapsed_time": "1:17:21", "remaining_time": "3:03:54", "throughput": 8801.09, "total_tokens": 40847712} +{"current_steps": 60600, "total_steps": 204665, "loss": 0.3403, "lr": 1.7747529118125542e-06, "epoch": 1.4804680819876384, "percentage": 29.61, "elapsed_time": "1:17:21", "remaining_time": "3:03:54", "throughput": 8801.15, "total_tokens": 40851040} +{"current_steps": 60605, "total_steps": 204665, "loss": 0.0287, "lr": 1.7746989910285738e-06, "epoch": 1.4805902328194855, "percentage": 29.61, "elapsed_time": "1:17:21", "remaining_time": "3:03:53", "throughput": 8801.24, "total_tokens": 40854560} +{"current_steps": 60610, "total_steps": 204665, "loss": 0.0652, "lr": 1.7746450646107736e-06, "epoch": 1.4807123836513327, "percentage": 29.61, "elapsed_time": "1:17:22", "remaining_time": "3:03:53", "throughput": 8801.28, "total_tokens": 40857760} +{"current_steps": 60615, "total_steps": 204665, "loss": 0.0025, "lr": 1.7745911325595463e-06, "epoch": 1.4808345344831797, "percentage": 29.62, "elapsed_time": "1:17:22", "remaining_time": "3:03:53", "throughput": 8801.38, "total_tokens": 40861280} +{"current_steps": 60620, "total_steps": 204665, "loss": 0.0533, "lr": 1.7745371948752838e-06, "epoch": 1.4809566853150269, "percentage": 29.62, "elapsed_time": "1:17:22", "remaining_time": "3:03:52", "throughput": 8801.45, "total_tokens": 40864672} +{"current_steps": 60625, "total_steps": 204665, "loss": 0.1157, "lr": 1.774483251558379e-06, "epoch": 1.481078836146874, "percentage": 29.62, "elapsed_time": "1:17:23", "remaining_time": "3:03:52", "throughput": 8801.55, "total_tokens": 40868192} +{"current_steps": 60630, "total_steps": 204665, "loss": 0.1239, "lr": 1.7744293026092233e-06, "epoch": 1.4812009869787213, "percentage": 29.62, "elapsed_time": "1:17:23", "remaining_time": "3:03:51", "throughput": 8801.63, "total_tokens": 40871648} +{"current_steps": 60635, "total_steps": 204665, "loss": 0.123, "lr": 1.7743753480282094e-06, "epoch": 1.4813231378105685, "percentage": 29.63, "elapsed_time": "1:17:23", "remaining_time": "3:03:51", "throughput": 8801.69, "total_tokens": 40874976} +{"current_steps": 60640, "total_steps": 204665, "loss": 0.0655, "lr": 1.7743213878157297e-06, "epoch": 1.4814452886424156, "percentage": 29.63, "elapsed_time": "1:17:24", "remaining_time": "3:03:50", "throughput": 8801.83, "total_tokens": 40878752} +{"current_steps": 60645, "total_steps": 204665, "loss": 0.0573, "lr": 1.7742674219721768e-06, "epoch": 1.4815674394742628, "percentage": 29.63, "elapsed_time": "1:17:24", "remaining_time": "3:03:50", "throughput": 8801.94, "total_tokens": 40882400} +{"current_steps": 60650, "total_steps": 204665, "loss": 0.0266, "lr": 1.7742134504979425e-06, "epoch": 1.48168959030611, "percentage": 29.63, "elapsed_time": "1:17:25", "remaining_time": "3:03:49", "throughput": 8802.05, "total_tokens": 40885984} +{"current_steps": 60655, "total_steps": 204665, "loss": 0.0397, "lr": 1.77415947339342e-06, "epoch": 1.4818117411379572, "percentage": 29.64, "elapsed_time": "1:17:25", "remaining_time": "3:03:49", "throughput": 8802.12, "total_tokens": 40889376} +{"current_steps": 60660, "total_steps": 204665, "loss": 0.0456, "lr": 1.774105490659002e-06, "epoch": 1.4819338919698044, "percentage": 29.64, "elapsed_time": "1:17:25", "remaining_time": "3:03:48", "throughput": 8802.14, "total_tokens": 40892512} +{"current_steps": 60665, "total_steps": 204665, "loss": 0.1718, "lr": 1.77405150229508e-06, "epoch": 1.4820560428016516, "percentage": 29.64, "elapsed_time": "1:17:26", "remaining_time": "3:03:48", "throughput": 8802.15, "total_tokens": 40895520} +{"current_steps": 60670, "total_steps": 204665, "loss": 0.0013, "lr": 1.7739975083020474e-06, "epoch": 1.4821781936334986, "percentage": 29.64, "elapsed_time": "1:17:26", "remaining_time": "3:03:47", "throughput": 8802.16, "total_tokens": 40898592} +{"current_steps": 60675, "total_steps": 204665, "loss": 0.1473, "lr": 1.773943508680297e-06, "epoch": 1.4823003444653458, "percentage": 29.65, "elapsed_time": "1:17:26", "remaining_time": "3:03:47", "throughput": 8802.19, "total_tokens": 40901728} +{"current_steps": 60680, "total_steps": 204665, "loss": 0.0816, "lr": 1.7738895034302212e-06, "epoch": 1.482422495297193, "percentage": 29.65, "elapsed_time": "1:17:27", "remaining_time": "3:03:46", "throughput": 8802.27, "total_tokens": 40905184} +{"current_steps": 60685, "total_steps": 204665, "loss": 0.1157, "lr": 1.7738354925522128e-06, "epoch": 1.4825446461290401, "percentage": 29.65, "elapsed_time": "1:17:27", "remaining_time": "3:03:46", "throughput": 8802.41, "total_tokens": 40908960} +{"current_steps": 60690, "total_steps": 204665, "loss": 0.0656, "lr": 1.7737814760466643e-06, "epoch": 1.4826667969608873, "percentage": 29.65, "elapsed_time": "1:17:27", "remaining_time": "3:03:46", "throughput": 8802.44, "total_tokens": 40912096} +{"current_steps": 60695, "total_steps": 204665, "loss": 0.1437, "lr": 1.773727453913969e-06, "epoch": 1.4827889477927345, "percentage": 29.66, "elapsed_time": "1:17:28", "remaining_time": "3:03:45", "throughput": 8802.5, "total_tokens": 40915424} +{"current_steps": 60700, "total_steps": 204665, "loss": 0.0724, "lr": 1.7736734261545196e-06, "epoch": 1.4829110986245817, "percentage": 29.66, "elapsed_time": "1:17:28", "remaining_time": "3:03:45", "throughput": 8802.61, "total_tokens": 40919008} +{"current_steps": 60705, "total_steps": 204665, "loss": 0.1297, "lr": 1.773619392768709e-06, "epoch": 1.4830332494564287, "percentage": 29.66, "elapsed_time": "1:17:28", "remaining_time": "3:03:44", "throughput": 8802.67, "total_tokens": 40922336} +{"current_steps": 60710, "total_steps": 204665, "loss": 0.128, "lr": 1.7735653537569299e-06, "epoch": 1.4831554002882759, "percentage": 29.66, "elapsed_time": "1:17:29", "remaining_time": "3:03:44", "throughput": 8802.75, "total_tokens": 40925792} +{"current_steps": 60715, "total_steps": 204665, "loss": 0.1701, "lr": 1.7735113091195755e-06, "epoch": 1.483277551120123, "percentage": 29.67, "elapsed_time": "1:17:29", "remaining_time": "3:03:43", "throughput": 8802.73, "total_tokens": 40928672} +{"current_steps": 60720, "total_steps": 204665, "loss": 0.1278, "lr": 1.773457258857039e-06, "epoch": 1.4833997019519702, "percentage": 29.67, "elapsed_time": "1:17:29", "remaining_time": "3:03:43", "throughput": 8802.84, "total_tokens": 40932256} +{"current_steps": 60725, "total_steps": 204665, "loss": 0.0683, "lr": 1.773403202969713e-06, "epoch": 1.4835218527838174, "percentage": 29.67, "elapsed_time": "1:17:30", "remaining_time": "3:03:42", "throughput": 8802.9, "total_tokens": 40935584} +{"current_steps": 60730, "total_steps": 204665, "loss": 0.2014, "lr": 1.773349141457991e-06, "epoch": 1.4836440036156646, "percentage": 29.67, "elapsed_time": "1:17:30", "remaining_time": "3:03:42", "throughput": 8802.94, "total_tokens": 40938784} +{"current_steps": 60735, "total_steps": 204665, "loss": 0.0533, "lr": 1.7732950743222661e-06, "epoch": 1.4837661544475118, "percentage": 29.68, "elapsed_time": "1:17:30", "remaining_time": "3:03:41", "throughput": 8803.04, "total_tokens": 40942368} +{"current_steps": 60740, "total_steps": 204665, "loss": 0.1076, "lr": 1.7732410015629315e-06, "epoch": 1.483888305279359, "percentage": 29.68, "elapsed_time": "1:17:31", "remaining_time": "3:03:41", "throughput": 8803.06, "total_tokens": 40945440} +{"current_steps": 60745, "total_steps": 204665, "loss": 0.0818, "lr": 1.77318692318038e-06, "epoch": 1.4840104561112062, "percentage": 29.68, "elapsed_time": "1:17:31", "remaining_time": "3:03:40", "throughput": 8803.07, "total_tokens": 40948512} +{"current_steps": 60750, "total_steps": 204665, "loss": 0.04, "lr": 1.7731328391750055e-06, "epoch": 1.4841326069430534, "percentage": 29.68, "elapsed_time": "1:17:31", "remaining_time": "3:03:40", "throughput": 8803.13, "total_tokens": 40951840} +{"current_steps": 60755, "total_steps": 204665, "loss": 0.0523, "lr": 1.773078749547201e-06, "epoch": 1.4842547577749006, "percentage": 29.69, "elapsed_time": "1:17:32", "remaining_time": "3:03:39", "throughput": 8803.19, "total_tokens": 40955168} +{"current_steps": 60760, "total_steps": 204665, "loss": 0.051, "lr": 1.77302465429736e-06, "epoch": 1.4843769086067475, "percentage": 29.69, "elapsed_time": "1:17:32", "remaining_time": "3:03:39", "throughput": 8803.33, "total_tokens": 40958944} +{"current_steps": 60765, "total_steps": 204665, "loss": 0.0605, "lr": 1.7729705534258757e-06, "epoch": 1.4844990594385947, "percentage": 29.69, "elapsed_time": "1:17:33", "remaining_time": "3:03:38", "throughput": 8803.41, "total_tokens": 40962336} +{"current_steps": 60770, "total_steps": 204665, "loss": 0.0227, "lr": 1.7729164469331418e-06, "epoch": 1.484621210270442, "percentage": 29.69, "elapsed_time": "1:17:33", "remaining_time": "3:03:38", "throughput": 8803.55, "total_tokens": 40966112} +{"current_steps": 60775, "total_steps": 204665, "loss": 0.0049, "lr": 1.7728623348195515e-06, "epoch": 1.484743361102289, "percentage": 29.69, "elapsed_time": "1:17:33", "remaining_time": "3:03:38", "throughput": 8803.57, "total_tokens": 40969184} +{"current_steps": 60780, "total_steps": 204665, "loss": 0.1831, "lr": 1.7728082170854983e-06, "epoch": 1.4848655119341363, "percentage": 29.7, "elapsed_time": "1:17:34", "remaining_time": "3:03:37", "throughput": 8803.67, "total_tokens": 40972768} +{"current_steps": 60785, "total_steps": 204665, "loss": 0.073, "lr": 1.772754093731376e-06, "epoch": 1.4849876627659835, "percentage": 29.7, "elapsed_time": "1:17:34", "remaining_time": "3:03:37", "throughput": 8803.83, "total_tokens": 40976672} +{"current_steps": 60790, "total_steps": 204665, "loss": 0.0274, "lr": 1.772699964757578e-06, "epoch": 1.4851098135978307, "percentage": 29.7, "elapsed_time": "1:17:34", "remaining_time": "3:03:36", "throughput": 8803.93, "total_tokens": 40980192} +{"current_steps": 60795, "total_steps": 204665, "loss": 0.0567, "lr": 1.7726458301644982e-06, "epoch": 1.4852319644296776, "percentage": 29.7, "elapsed_time": "1:17:35", "remaining_time": "3:03:36", "throughput": 8804.02, "total_tokens": 40983712} +{"current_steps": 60800, "total_steps": 204665, "loss": 0.0805, "lr": 1.7725916899525298e-06, "epoch": 1.4853541152615248, "percentage": 29.71, "elapsed_time": "1:17:35", "remaining_time": "3:03:35", "throughput": 8804.09, "total_tokens": 40987104} +{"current_steps": 60805, "total_steps": 204665, "loss": 0.0451, "lr": 1.7725375441220672e-06, "epoch": 1.485476266093372, "percentage": 29.71, "elapsed_time": "1:17:35", "remaining_time": "3:03:35", "throughput": 8804.21, "total_tokens": 40990816} +{"current_steps": 60810, "total_steps": 204665, "loss": 0.1048, "lr": 1.7724833926735037e-06, "epoch": 1.4855984169252192, "percentage": 29.71, "elapsed_time": "1:17:36", "remaining_time": "3:03:34", "throughput": 8804.31, "total_tokens": 40994336} +{"current_steps": 60815, "total_steps": 204665, "loss": 0.0065, "lr": 1.772429235607233e-06, "epoch": 1.4857205677570664, "percentage": 29.71, "elapsed_time": "1:17:36", "remaining_time": "3:03:34", "throughput": 8804.46, "total_tokens": 40998240} +{"current_steps": 60820, "total_steps": 204665, "loss": 0.08, "lr": 1.7723750729236492e-06, "epoch": 1.4858427185889136, "percentage": 29.72, "elapsed_time": "1:17:36", "remaining_time": "3:03:33", "throughput": 8804.53, "total_tokens": 41001632} +{"current_steps": 60825, "total_steps": 204665, "loss": 0.0097, "lr": 1.7723209046231462e-06, "epoch": 1.4859648694207608, "percentage": 29.72, "elapsed_time": "1:17:37", "remaining_time": "3:03:33", "throughput": 8804.66, "total_tokens": 41005344} +{"current_steps": 60830, "total_steps": 204665, "loss": 0.1109, "lr": 1.772266730706118e-06, "epoch": 1.486087020252608, "percentage": 29.72, "elapsed_time": "1:17:37", "remaining_time": "3:03:33", "throughput": 8804.73, "total_tokens": 41008736} +{"current_steps": 60835, "total_steps": 204665, "loss": 0.0916, "lr": 1.772212551172958e-06, "epoch": 1.4862091710844552, "percentage": 29.72, "elapsed_time": "1:17:37", "remaining_time": "3:03:32", "throughput": 8804.79, "total_tokens": 41012064} +{"current_steps": 60840, "total_steps": 204665, "loss": 0.1368, "lr": 1.772158366024061e-06, "epoch": 1.4863313219163024, "percentage": 29.73, "elapsed_time": "1:17:38", "remaining_time": "3:03:32", "throughput": 8804.83, "total_tokens": 41015264} +{"current_steps": 60845, "total_steps": 204665, "loss": 0.1338, "lr": 1.7721041752598205e-06, "epoch": 1.4864534727481495, "percentage": 29.73, "elapsed_time": "1:17:38", "remaining_time": "3:03:31", "throughput": 8804.83, "total_tokens": 41018272} +{"current_steps": 60850, "total_steps": 204665, "loss": 0.0726, "lr": 1.7720499788806307e-06, "epoch": 1.4865756235799965, "percentage": 29.73, "elapsed_time": "1:17:38", "remaining_time": "3:03:31", "throughput": 8804.87, "total_tokens": 41021472} +{"current_steps": 60855, "total_steps": 204665, "loss": 0.1353, "lr": 1.771995776886886e-06, "epoch": 1.4866977744118437, "percentage": 29.73, "elapsed_time": "1:17:39", "remaining_time": "3:03:30", "throughput": 8804.93, "total_tokens": 41024800} +{"current_steps": 60860, "total_steps": 204665, "loss": 0.0378, "lr": 1.7719415692789803e-06, "epoch": 1.4868199252436909, "percentage": 29.74, "elapsed_time": "1:17:39", "remaining_time": "3:03:30", "throughput": 8805.01, "total_tokens": 41028256} +{"current_steps": 60865, "total_steps": 204665, "loss": 0.0713, "lr": 1.771887356057308e-06, "epoch": 1.486942076075538, "percentage": 29.74, "elapsed_time": "1:17:40", "remaining_time": "3:03:29", "throughput": 8805.11, "total_tokens": 41031840} +{"current_steps": 60870, "total_steps": 204665, "loss": 0.1079, "lr": 1.7718331372222629e-06, "epoch": 1.4870642269073853, "percentage": 29.74, "elapsed_time": "1:17:40", "remaining_time": "3:03:29", "throughput": 8805.3, "total_tokens": 41035936} +{"current_steps": 60875, "total_steps": 204665, "loss": 0.1966, "lr": 1.7717789127742399e-06, "epoch": 1.4871863777392325, "percentage": 29.74, "elapsed_time": "1:17:40", "remaining_time": "3:03:28", "throughput": 8805.41, "total_tokens": 41039520} +{"current_steps": 60880, "total_steps": 204665, "loss": 0.0857, "lr": 1.771724682713633e-06, "epoch": 1.4873085285710796, "percentage": 29.75, "elapsed_time": "1:17:41", "remaining_time": "3:03:28", "throughput": 8805.48, "total_tokens": 41042912} +{"current_steps": 60885, "total_steps": 204665, "loss": 0.1083, "lr": 1.7716704470408365e-06, "epoch": 1.4874306794029266, "percentage": 29.75, "elapsed_time": "1:17:41", "remaining_time": "3:03:27", "throughput": 8805.51, "total_tokens": 41046112} +{"current_steps": 60890, "total_steps": 204665, "loss": 0.0346, "lr": 1.7716162057562451e-06, "epoch": 1.4875528302347738, "percentage": 29.75, "elapsed_time": "1:17:41", "remaining_time": "3:03:27", "throughput": 8805.64, "total_tokens": 41049824} +{"current_steps": 60895, "total_steps": 204665, "loss": 0.0555, "lr": 1.771561958860253e-06, "epoch": 1.487674981066621, "percentage": 29.75, "elapsed_time": "1:17:42", "remaining_time": "3:03:27", "throughput": 8805.78, "total_tokens": 41053664} +{"current_steps": 60900, "total_steps": 204665, "loss": 0.0507, "lr": 1.771507706353255e-06, "epoch": 1.4877971318984682, "percentage": 29.76, "elapsed_time": "1:17:42", "remaining_time": "3:03:26", "throughput": 8805.88, "total_tokens": 41057184} +{"current_steps": 60905, "total_steps": 204665, "loss": 0.002, "lr": 1.7714534482356454e-06, "epoch": 1.4879192827303154, "percentage": 29.76, "elapsed_time": "1:17:42", "remaining_time": "3:03:26", "throughput": 8805.89, "total_tokens": 41060256} +{"current_steps": 60910, "total_steps": 204665, "loss": 0.1203, "lr": 1.7713991845078186e-06, "epoch": 1.4880414335621626, "percentage": 29.76, "elapsed_time": "1:17:43", "remaining_time": "3:03:25", "throughput": 8805.94, "total_tokens": 41063520} +{"current_steps": 60915, "total_steps": 204665, "loss": 0.0725, "lr": 1.7713449151701698e-06, "epoch": 1.4881635843940098, "percentage": 29.76, "elapsed_time": "1:17:43", "remaining_time": "3:03:25", "throughput": 8805.98, "total_tokens": 41066720} +{"current_steps": 60920, "total_steps": 204665, "loss": 0.0618, "lr": 1.7712906402230933e-06, "epoch": 1.488285735225857, "percentage": 29.77, "elapsed_time": "1:17:43", "remaining_time": "3:03:24", "throughput": 8806.01, "total_tokens": 41069920} +{"current_steps": 60925, "total_steps": 204665, "loss": 0.0585, "lr": 1.7712363596669835e-06, "epoch": 1.4884078860577041, "percentage": 29.77, "elapsed_time": "1:17:44", "remaining_time": "3:03:24", "throughput": 8806.05, "total_tokens": 41073120} +{"current_steps": 60930, "total_steps": 204665, "loss": 0.0559, "lr": 1.7711820735022354e-06, "epoch": 1.4885300368895513, "percentage": 29.77, "elapsed_time": "1:17:44", "remaining_time": "3:03:23", "throughput": 8806.08, "total_tokens": 41076256} +{"current_steps": 60935, "total_steps": 204665, "loss": 0.0344, "lr": 1.7711277817292443e-06, "epoch": 1.4886521877213985, "percentage": 29.77, "elapsed_time": "1:17:44", "remaining_time": "3:03:23", "throughput": 8806.11, "total_tokens": 41079392} +{"current_steps": 60940, "total_steps": 204665, "loss": 0.0011, "lr": 1.7710734843484044e-06, "epoch": 1.4887743385532455, "percentage": 29.78, "elapsed_time": "1:17:45", "remaining_time": "3:03:22", "throughput": 8806.26, "total_tokens": 41083232} +{"current_steps": 60945, "total_steps": 204665, "loss": 0.0877, "lr": 1.7710191813601102e-06, "epoch": 1.4888964893850927, "percentage": 29.78, "elapsed_time": "1:17:45", "remaining_time": "3:03:22", "throughput": 8806.32, "total_tokens": 41086560} +{"current_steps": 60950, "total_steps": 204665, "loss": 0.0294, "lr": 1.770964872764758e-06, "epoch": 1.4890186402169399, "percentage": 29.78, "elapsed_time": "1:17:45", "remaining_time": "3:03:21", "throughput": 8806.38, "total_tokens": 41089888} +{"current_steps": 60955, "total_steps": 204665, "loss": 0.0886, "lr": 1.770910558562741e-06, "epoch": 1.489140791048787, "percentage": 29.78, "elapsed_time": "1:17:46", "remaining_time": "3:03:21", "throughput": 8806.45, "total_tokens": 41093280} +{"current_steps": 60960, "total_steps": 204665, "loss": 0.0311, "lr": 1.7708562387544558e-06, "epoch": 1.4892629418806342, "percentage": 29.79, "elapsed_time": "1:17:46", "remaining_time": "3:03:20", "throughput": 8806.52, "total_tokens": 41096736} +{"current_steps": 60965, "total_steps": 204665, "loss": 0.0838, "lr": 1.7708019133402962e-06, "epoch": 1.4893850927124814, "percentage": 29.79, "elapsed_time": "1:17:46", "remaining_time": "3:03:20", "throughput": 8806.57, "total_tokens": 41100000} +{"current_steps": 60970, "total_steps": 204665, "loss": 0.0983, "lr": 1.7707475823206582e-06, "epoch": 1.4895072435443286, "percentage": 29.79, "elapsed_time": "1:17:47", "remaining_time": "3:03:20", "throughput": 8806.65, "total_tokens": 41103456} +{"current_steps": 60975, "total_steps": 204665, "loss": 0.0961, "lr": 1.7706932456959362e-06, "epoch": 1.4896293943761756, "percentage": 29.79, "elapsed_time": "1:17:47", "remaining_time": "3:03:19", "throughput": 8806.7, "total_tokens": 41106720} +{"current_steps": 60980, "total_steps": 204665, "loss": 0.0471, "lr": 1.7706389034665257e-06, "epoch": 1.4897515452080228, "percentage": 29.8, "elapsed_time": "1:17:48", "remaining_time": "3:03:19", "throughput": 8806.84, "total_tokens": 41110560} +{"current_steps": 60985, "total_steps": 204665, "loss": 0.0978, "lr": 1.7705845556328217e-06, "epoch": 1.48987369603987, "percentage": 29.8, "elapsed_time": "1:17:48", "remaining_time": "3:03:18", "throughput": 8806.93, "total_tokens": 41114016} +{"current_steps": 60990, "total_steps": 204665, "loss": 0.2061, "lr": 1.7705302021952198e-06, "epoch": 1.4899958468717172, "percentage": 29.8, "elapsed_time": "1:17:48", "remaining_time": "3:03:18", "throughput": 8806.96, "total_tokens": 41117152} +{"current_steps": 60995, "total_steps": 204665, "loss": 0.0534, "lr": 1.7704758431541146e-06, "epoch": 1.4901179977035643, "percentage": 29.8, "elapsed_time": "1:17:49", "remaining_time": "3:03:17", "throughput": 8807.01, "total_tokens": 41120416} +{"current_steps": 61000, "total_steps": 204665, "loss": 0.0008, "lr": 1.7704214785099024e-06, "epoch": 1.4902401485354115, "percentage": 29.8, "elapsed_time": "1:17:49", "remaining_time": "3:03:17", "throughput": 8807.09, "total_tokens": 41123872} +{"current_steps": 61005, "total_steps": 204665, "loss": 0.2285, "lr": 1.7703671082629776e-06, "epoch": 1.4903622993672587, "percentage": 29.81, "elapsed_time": "1:17:49", "remaining_time": "3:03:16", "throughput": 8807.21, "total_tokens": 41127520} +{"current_steps": 61010, "total_steps": 204665, "loss": 0.0022, "lr": 1.7703127324137358e-06, "epoch": 1.490484450199106, "percentage": 29.81, "elapsed_time": "1:17:50", "remaining_time": "3:03:16", "throughput": 8807.35, "total_tokens": 41131296} +{"current_steps": 61015, "total_steps": 204665, "loss": 0.2344, "lr": 1.7702583509625732e-06, "epoch": 1.490606601030953, "percentage": 29.81, "elapsed_time": "1:17:50", "remaining_time": "3:03:15", "throughput": 8807.35, "total_tokens": 41134304} +{"current_steps": 61020, "total_steps": 204665, "loss": 0.0826, "lr": 1.7702039639098842e-06, "epoch": 1.4907287518628003, "percentage": 29.81, "elapsed_time": "1:17:50", "remaining_time": "3:03:15", "throughput": 8807.4, "total_tokens": 41137568} +{"current_steps": 61025, "total_steps": 204665, "loss": 0.1647, "lr": 1.770149571256065e-06, "epoch": 1.4908509026946475, "percentage": 29.82, "elapsed_time": "1:17:51", "remaining_time": "3:03:14", "throughput": 8807.47, "total_tokens": 41140960} +{"current_steps": 61030, "total_steps": 204665, "loss": 0.1514, "lr": 1.7700951730015113e-06, "epoch": 1.4909730535264945, "percentage": 29.82, "elapsed_time": "1:17:51", "remaining_time": "3:03:14", "throughput": 8807.52, "total_tokens": 41144288} +{"current_steps": 61035, "total_steps": 204665, "loss": 0.068, "lr": 1.770040769146618e-06, "epoch": 1.4910952043583416, "percentage": 29.82, "elapsed_time": "1:17:51", "remaining_time": "3:03:13", "throughput": 8807.65, "total_tokens": 41148000} +{"current_steps": 61040, "total_steps": 204665, "loss": 0.0308, "lr": 1.769986359691781e-06, "epoch": 1.4912173551901888, "percentage": 29.82, "elapsed_time": "1:17:52", "remaining_time": "3:03:13", "throughput": 8807.76, "total_tokens": 41151584} +{"current_steps": 61045, "total_steps": 204665, "loss": 0.16, "lr": 1.7699319446373963e-06, "epoch": 1.491339506022036, "percentage": 29.83, "elapsed_time": "1:17:52", "remaining_time": "3:03:13", "throughput": 8807.8, "total_tokens": 41154848} +{"current_steps": 61050, "total_steps": 204665, "loss": 0.1018, "lr": 1.7698775239838596e-06, "epoch": 1.4914616568538832, "percentage": 29.83, "elapsed_time": "1:17:52", "remaining_time": "3:03:12", "throughput": 8807.88, "total_tokens": 41158304} +{"current_steps": 61055, "total_steps": 204665, "loss": 0.1217, "lr": 1.769823097731566e-06, "epoch": 1.4915838076857304, "percentage": 29.83, "elapsed_time": "1:17:53", "remaining_time": "3:03:12", "throughput": 8807.95, "total_tokens": 41161696} +{"current_steps": 61060, "total_steps": 204665, "loss": 0.1014, "lr": 1.769768665880912e-06, "epoch": 1.4917059585175774, "percentage": 29.83, "elapsed_time": "1:17:53", "remaining_time": "3:03:11", "throughput": 8808.14, "total_tokens": 41165792} +{"current_steps": 61065, "total_steps": 204665, "loss": 0.1832, "lr": 1.7697142284322931e-06, "epoch": 1.4918281093494246, "percentage": 29.84, "elapsed_time": "1:17:53", "remaining_time": "3:03:11", "throughput": 8808.2, "total_tokens": 41169120} +{"current_steps": 61070, "total_steps": 204665, "loss": 0.1397, "lr": 1.7696597853861057e-06, "epoch": 1.4919502601812717, "percentage": 29.84, "elapsed_time": "1:17:54", "remaining_time": "3:03:10", "throughput": 8808.22, "total_tokens": 41172192} +{"current_steps": 61075, "total_steps": 204665, "loss": 0.0759, "lr": 1.769605336742745e-06, "epoch": 1.492072411013119, "percentage": 29.84, "elapsed_time": "1:17:54", "remaining_time": "3:03:10", "throughput": 8808.29, "total_tokens": 41175584} +{"current_steps": 61080, "total_steps": 204665, "loss": 0.0608, "lr": 1.7695508825026074e-06, "epoch": 1.4921945618449661, "percentage": 29.84, "elapsed_time": "1:17:54", "remaining_time": "3:03:09", "throughput": 8808.3, "total_tokens": 41178656} +{"current_steps": 61085, "total_steps": 204665, "loss": 0.0982, "lr": 1.7694964226660884e-06, "epoch": 1.4923167126768133, "percentage": 29.85, "elapsed_time": "1:17:55", "remaining_time": "3:03:09", "throughput": 8808.33, "total_tokens": 41181792} +{"current_steps": 61090, "total_steps": 204665, "loss": 0.0757, "lr": 1.769441957233585e-06, "epoch": 1.4924388635086605, "percentage": 29.85, "elapsed_time": "1:17:55", "remaining_time": "3:03:08", "throughput": 8808.35, "total_tokens": 41184864} +{"current_steps": 61095, "total_steps": 204665, "loss": 0.0755, "lr": 1.7693874862054928e-06, "epoch": 1.4925610143405077, "percentage": 29.85, "elapsed_time": "1:17:56", "remaining_time": "3:03:08", "throughput": 8808.48, "total_tokens": 41188640} +{"current_steps": 61100, "total_steps": 204665, "loss": 0.0749, "lr": 1.7693330095822074e-06, "epoch": 1.4926831651723549, "percentage": 29.85, "elapsed_time": "1:17:56", "remaining_time": "3:03:07", "throughput": 8808.56, "total_tokens": 41192096} +{"current_steps": 61105, "total_steps": 204665, "loss": 0.0377, "lr": 1.7692785273641256e-06, "epoch": 1.492805316004202, "percentage": 29.86, "elapsed_time": "1:17:56", "remaining_time": "3:03:07", "throughput": 8808.56, "total_tokens": 41195104} +{"current_steps": 61110, "total_steps": 204665, "loss": 0.0257, "lr": 1.7692240395516435e-06, "epoch": 1.4929274668360493, "percentage": 29.86, "elapsed_time": "1:17:57", "remaining_time": "3:03:07", "throughput": 8808.69, "total_tokens": 41198816} +{"current_steps": 61115, "total_steps": 204665, "loss": 0.0178, "lr": 1.7691695461451573e-06, "epoch": 1.4930496176678965, "percentage": 29.86, "elapsed_time": "1:17:57", "remaining_time": "3:03:06", "throughput": 8808.75, "total_tokens": 41202144} +{"current_steps": 61120, "total_steps": 204665, "loss": 0.1223, "lr": 1.769115047145063e-06, "epoch": 1.4931717684997434, "percentage": 29.86, "elapsed_time": "1:17:57", "remaining_time": "3:03:06", "throughput": 8808.78, "total_tokens": 41205344} +{"current_steps": 61125, "total_steps": 204665, "loss": 0.1508, "lr": 1.7690605425517578e-06, "epoch": 1.4932939193315906, "percentage": 29.87, "elapsed_time": "1:17:58", "remaining_time": "3:03:05", "throughput": 8808.82, "total_tokens": 41208544} +{"current_steps": 61130, "total_steps": 204665, "loss": 0.026, "lr": 1.7690060323656368e-06, "epoch": 1.4934160701634378, "percentage": 29.87, "elapsed_time": "1:17:58", "remaining_time": "3:03:05", "throughput": 8808.82, "total_tokens": 41211552} +{"current_steps": 61135, "total_steps": 204665, "loss": 0.0044, "lr": 1.7689515165870974e-06, "epoch": 1.493538220995285, "percentage": 29.87, "elapsed_time": "1:17:58", "remaining_time": "3:03:04", "throughput": 8808.88, "total_tokens": 41214944} +{"current_steps": 61140, "total_steps": 204665, "loss": 0.0931, "lr": 1.7688969952165358e-06, "epoch": 1.4936603718271322, "percentage": 29.87, "elapsed_time": "1:17:59", "remaining_time": "3:03:04", "throughput": 8808.96, "total_tokens": 41218400} +{"current_steps": 61145, "total_steps": 204665, "loss": 0.0658, "lr": 1.7688424682543483e-06, "epoch": 1.4937825226589794, "percentage": 29.88, "elapsed_time": "1:17:59", "remaining_time": "3:03:03", "throughput": 8808.99, "total_tokens": 41221472} +{"current_steps": 61150, "total_steps": 204665, "loss": 0.0199, "lr": 1.768787935700932e-06, "epoch": 1.4939046734908263, "percentage": 29.88, "elapsed_time": "1:17:59", "remaining_time": "3:03:03", "throughput": 8809.11, "total_tokens": 41225184} +{"current_steps": 61155, "total_steps": 204665, "loss": 0.0259, "lr": 1.7687333975566828e-06, "epoch": 1.4940268243226735, "percentage": 29.88, "elapsed_time": "1:18:00", "remaining_time": "3:03:02", "throughput": 8809.15, "total_tokens": 41228448} +{"current_steps": 61160, "total_steps": 204665, "loss": 0.091, "lr": 1.7686788538219971e-06, "epoch": 1.4941489751545207, "percentage": 29.88, "elapsed_time": "1:18:00", "remaining_time": "3:03:02", "throughput": 8809.23, "total_tokens": 41231904} +{"current_steps": 61165, "total_steps": 204665, "loss": 0.0311, "lr": 1.7686243044972727e-06, "epoch": 1.494271125986368, "percentage": 29.89, "elapsed_time": "1:18:00", "remaining_time": "3:03:01", "throughput": 8809.24, "total_tokens": 41234912} +{"current_steps": 61170, "total_steps": 204665, "loss": 0.2079, "lr": 1.7685697495829054e-06, "epoch": 1.494393276818215, "percentage": 29.89, "elapsed_time": "1:18:01", "remaining_time": "3:03:01", "throughput": 8809.23, "total_tokens": 41237856} +{"current_steps": 61175, "total_steps": 204665, "loss": 0.1181, "lr": 1.768515189079292e-06, "epoch": 1.4945154276500623, "percentage": 29.89, "elapsed_time": "1:18:01", "remaining_time": "3:03:00", "throughput": 8809.27, "total_tokens": 41241056} +{"current_steps": 61180, "total_steps": 204665, "loss": 0.2076, "lr": 1.7684606229868294e-06, "epoch": 1.4946375784819095, "percentage": 29.89, "elapsed_time": "1:18:01", "remaining_time": "3:03:00", "throughput": 8809.29, "total_tokens": 41244128} +{"current_steps": 61185, "total_steps": 204665, "loss": 0.0468, "lr": 1.7684060513059147e-06, "epoch": 1.4947597293137567, "percentage": 29.9, "elapsed_time": "1:18:02", "remaining_time": "3:02:59", "throughput": 8809.32, "total_tokens": 41247328} +{"current_steps": 61190, "total_steps": 204665, "loss": 0.0097, "lr": 1.7683514740369442e-06, "epoch": 1.4948818801456039, "percentage": 29.9, "elapsed_time": "1:18:02", "remaining_time": "3:02:59", "throughput": 8809.45, "total_tokens": 41251040} +{"current_steps": 61195, "total_steps": 204665, "loss": 0.0571, "lr": 1.7682968911803157e-06, "epoch": 1.495004030977451, "percentage": 29.9, "elapsed_time": "1:18:02", "remaining_time": "3:02:59", "throughput": 8809.48, "total_tokens": 41254240} +{"current_steps": 61200, "total_steps": 204665, "loss": 0.0489, "lr": 1.768242302736425e-06, "epoch": 1.4951261818092982, "percentage": 29.9, "elapsed_time": "1:18:03", "remaining_time": "3:02:58", "throughput": 8809.51, "total_tokens": 41257376} +{"current_steps": 61205, "total_steps": 204665, "loss": 0.1054, "lr": 1.7681877087056699e-06, "epoch": 1.4952483326411452, "percentage": 29.9, "elapsed_time": "1:18:03", "remaining_time": "3:02:58", "throughput": 8809.59, "total_tokens": 41260832} +{"current_steps": 61210, "total_steps": 204665, "loss": 0.1017, "lr": 1.768133109088447e-06, "epoch": 1.4953704834729924, "percentage": 29.91, "elapsed_time": "1:18:03", "remaining_time": "3:02:57", "throughput": 8809.6, "total_tokens": 41263904} +{"current_steps": 61215, "total_steps": 204665, "loss": 0.1413, "lr": 1.7680785038851536e-06, "epoch": 1.4954926343048396, "percentage": 29.91, "elapsed_time": "1:18:04", "remaining_time": "3:02:57", "throughput": 8809.65, "total_tokens": 41267168} +{"current_steps": 61220, "total_steps": 204665, "loss": 0.073, "lr": 1.768023893096187e-06, "epoch": 1.4956147851366868, "percentage": 29.91, "elapsed_time": "1:18:04", "remaining_time": "3:02:56", "throughput": 8809.7, "total_tokens": 41270496} +{"current_steps": 61225, "total_steps": 204665, "loss": 0.1188, "lr": 1.7679692767219437e-06, "epoch": 1.495736935968534, "percentage": 29.91, "elapsed_time": "1:18:05", "remaining_time": "3:02:56", "throughput": 8809.76, "total_tokens": 41273824} +{"current_steps": 61230, "total_steps": 204665, "loss": 0.0501, "lr": 1.7679146547628214e-06, "epoch": 1.4958590868003812, "percentage": 29.92, "elapsed_time": "1:18:05", "remaining_time": "3:02:55", "throughput": 8809.85, "total_tokens": 41277408} +{"current_steps": 61235, "total_steps": 204665, "loss": 0.0318, "lr": 1.7678600272192172e-06, "epoch": 1.4959812376322283, "percentage": 29.92, "elapsed_time": "1:18:05", "remaining_time": "3:02:55", "throughput": 8809.93, "total_tokens": 41280864} +{"current_steps": 61240, "total_steps": 204665, "loss": 0.0297, "lr": 1.7678053940915284e-06, "epoch": 1.4961033884640753, "percentage": 29.92, "elapsed_time": "1:18:06", "remaining_time": "3:02:54", "throughput": 8810.0, "total_tokens": 41284256} +{"current_steps": 61245, "total_steps": 204665, "loss": 0.0066, "lr": 1.767750755380152e-06, "epoch": 1.4962255392959225, "percentage": 29.92, "elapsed_time": "1:18:06", "remaining_time": "3:02:54", "throughput": 8810.0, "total_tokens": 41287264} +{"current_steps": 61250, "total_steps": 204665, "loss": 0.0994, "lr": 1.767696111085486e-06, "epoch": 1.4963476901277697, "percentage": 29.93, "elapsed_time": "1:18:06", "remaining_time": "3:02:53", "throughput": 8810.03, "total_tokens": 41290400} +{"current_steps": 61255, "total_steps": 204665, "loss": 0.1207, "lr": 1.767641461207927e-06, "epoch": 1.4964698409596169, "percentage": 29.93, "elapsed_time": "1:18:07", "remaining_time": "3:02:53", "throughput": 8810.08, "total_tokens": 41293664} +{"current_steps": 61260, "total_steps": 204665, "loss": 0.0529, "lr": 1.7675868057478733e-06, "epoch": 1.496591991791464, "percentage": 29.93, "elapsed_time": "1:18:07", "remaining_time": "3:02:52", "throughput": 8810.17, "total_tokens": 41297184} +{"current_steps": 61265, "total_steps": 204665, "loss": 0.0674, "lr": 1.7675321447057217e-06, "epoch": 1.4967141426233113, "percentage": 29.93, "elapsed_time": "1:18:07", "remaining_time": "3:02:52", "throughput": 8810.26, "total_tokens": 41300640} +{"current_steps": 61270, "total_steps": 204665, "loss": 0.0671, "lr": 1.7674774780818698e-06, "epoch": 1.4968362934551585, "percentage": 29.94, "elapsed_time": "1:18:08", "remaining_time": "3:02:51", "throughput": 8810.26, "total_tokens": 41303648} +{"current_steps": 61275, "total_steps": 204665, "loss": 0.0012, "lr": 1.7674228058767151e-06, "epoch": 1.4969584442870056, "percentage": 29.94, "elapsed_time": "1:18:08", "remaining_time": "3:02:51", "throughput": 8810.37, "total_tokens": 41307232} +{"current_steps": 61280, "total_steps": 204665, "loss": 0.1176, "lr": 1.7673681280906556e-06, "epoch": 1.4970805951188528, "percentage": 29.94, "elapsed_time": "1:18:08", "remaining_time": "3:02:51", "throughput": 8810.4, "total_tokens": 41310432} +{"current_steps": 61285, "total_steps": 204665, "loss": 0.0818, "lr": 1.7673134447240887e-06, "epoch": 1.4972027459507, "percentage": 29.94, "elapsed_time": "1:18:09", "remaining_time": "3:02:50", "throughput": 8810.5, "total_tokens": 41314016} +{"current_steps": 61290, "total_steps": 204665, "loss": 0.0908, "lr": 1.7672587557774117e-06, "epoch": 1.4973248967825472, "percentage": 29.95, "elapsed_time": "1:18:09", "remaining_time": "3:02:50", "throughput": 8810.52, "total_tokens": 41317152} +{"current_steps": 61295, "total_steps": 204665, "loss": 0.163, "lr": 1.767204061251023e-06, "epoch": 1.4974470476143942, "percentage": 29.95, "elapsed_time": "1:18:09", "remaining_time": "3:02:49", "throughput": 8810.54, "total_tokens": 41320288} +{"current_steps": 61300, "total_steps": 204665, "loss": 0.0073, "lr": 1.7671493611453202e-06, "epoch": 1.4975691984462414, "percentage": 29.95, "elapsed_time": "1:18:10", "remaining_time": "3:02:49", "throughput": 8810.58, "total_tokens": 41323488} +{"current_steps": 61305, "total_steps": 204665, "loss": 0.006, "lr": 1.7670946554607006e-06, "epoch": 1.4976913492780886, "percentage": 29.95, "elapsed_time": "1:18:10", "remaining_time": "3:02:48", "throughput": 8810.69, "total_tokens": 41327136} +{"current_steps": 61310, "total_steps": 204665, "loss": 0.1182, "lr": 1.7670399441975622e-06, "epoch": 1.4978135001099357, "percentage": 29.96, "elapsed_time": "1:18:10", "remaining_time": "3:02:48", "throughput": 8810.8, "total_tokens": 41330784} +{"current_steps": 61315, "total_steps": 204665, "loss": 0.1859, "lr": 1.766985227356303e-06, "epoch": 1.497935650941783, "percentage": 29.96, "elapsed_time": "1:18:11", "remaining_time": "3:02:47", "throughput": 8810.85, "total_tokens": 41334112} +{"current_steps": 61320, "total_steps": 204665, "loss": 0.0557, "lr": 1.766930504937321e-06, "epoch": 1.4980578017736301, "percentage": 29.96, "elapsed_time": "1:18:11", "remaining_time": "3:02:47", "throughput": 8810.95, "total_tokens": 41337696} +{"current_steps": 61325, "total_steps": 204665, "loss": 0.178, "lr": 1.7668757769410144e-06, "epoch": 1.4981799526054773, "percentage": 29.96, "elapsed_time": "1:18:11", "remaining_time": "3:02:46", "throughput": 8811.08, "total_tokens": 41341472} +{"current_steps": 61330, "total_steps": 204665, "loss": 0.0019, "lr": 1.7668210433677808e-06, "epoch": 1.4983021034373243, "percentage": 29.97, "elapsed_time": "1:18:12", "remaining_time": "3:02:46", "throughput": 8811.21, "total_tokens": 41345248} +{"current_steps": 61335, "total_steps": 204665, "loss": 0.0774, "lr": 1.7667663042180182e-06, "epoch": 1.4984242542691715, "percentage": 29.97, "elapsed_time": "1:18:12", "remaining_time": "3:02:46", "throughput": 8811.23, "total_tokens": 41348320} +{"current_steps": 61340, "total_steps": 204665, "loss": 0.0294, "lr": 1.766711559492125e-06, "epoch": 1.4985464051010187, "percentage": 29.97, "elapsed_time": "1:18:13", "remaining_time": "3:02:45", "throughput": 8811.3, "total_tokens": 41351776} +{"current_steps": 61345, "total_steps": 204665, "loss": 0.1105, "lr": 1.7666568091904989e-06, "epoch": 1.4986685559328659, "percentage": 29.97, "elapsed_time": "1:18:13", "remaining_time": "3:02:45", "throughput": 8811.35, "total_tokens": 41355104} +{"current_steps": 61350, "total_steps": 204665, "loss": 0.0619, "lr": 1.7666020533135382e-06, "epoch": 1.498790706764713, "percentage": 29.98, "elapsed_time": "1:18:13", "remaining_time": "3:02:44", "throughput": 8811.38, "total_tokens": 41358240} +{"current_steps": 61355, "total_steps": 204665, "loss": 0.1805, "lr": 1.7665472918616412e-06, "epoch": 1.4989128575965602, "percentage": 29.98, "elapsed_time": "1:18:14", "remaining_time": "3:02:44", "throughput": 8811.42, "total_tokens": 41361504} +{"current_steps": 61360, "total_steps": 204665, "loss": 0.0039, "lr": 1.7664925248352062e-06, "epoch": 1.4990350084284074, "percentage": 29.98, "elapsed_time": "1:18:14", "remaining_time": "3:02:43", "throughput": 8811.51, "total_tokens": 41364960} +{"current_steps": 61365, "total_steps": 204665, "loss": 0.1081, "lr": 1.7664377522346312e-06, "epoch": 1.4991571592602546, "percentage": 29.98, "elapsed_time": "1:18:14", "remaining_time": "3:02:43", "throughput": 8811.55, "total_tokens": 41368224} +{"current_steps": 61370, "total_steps": 204665, "loss": 0.0004, "lr": 1.766382974060315e-06, "epoch": 1.4992793100921018, "percentage": 29.99, "elapsed_time": "1:18:15", "remaining_time": "3:02:42", "throughput": 8811.59, "total_tokens": 41371424} +{"current_steps": 61375, "total_steps": 204665, "loss": 0.036, "lr": 1.7663281903126557e-06, "epoch": 1.499401460923949, "percentage": 29.99, "elapsed_time": "1:18:15", "remaining_time": "3:02:42", "throughput": 8811.6, "total_tokens": 41374496} +{"current_steps": 61380, "total_steps": 204665, "loss": 0.12, "lr": 1.7662734009920516e-06, "epoch": 1.4995236117557962, "percentage": 29.99, "elapsed_time": "1:18:15", "remaining_time": "3:02:41", "throughput": 8811.67, "total_tokens": 41377888} +{"current_steps": 61385, "total_steps": 204665, "loss": 0.1095, "lr": 1.7662186060989011e-06, "epoch": 1.4996457625876431, "percentage": 29.99, "elapsed_time": "1:18:16", "remaining_time": "3:02:41", "throughput": 8811.71, "total_tokens": 41381152} +{"current_steps": 61390, "total_steps": 204665, "loss": 0.1075, "lr": 1.7661638056336031e-06, "epoch": 1.4997679134194903, "percentage": 30.0, "elapsed_time": "1:18:16", "remaining_time": "3:02:40", "throughput": 8811.78, "total_tokens": 41384544} +{"current_steps": 61395, "total_steps": 204665, "loss": 0.0568, "lr": 1.7661089995965556e-06, "epoch": 1.4998900642513375, "percentage": 30.0, "elapsed_time": "1:18:16", "remaining_time": "3:02:40", "throughput": 8811.84, "total_tokens": 41387872} +{"current_steps": 61400, "total_steps": 204665, "loss": 0.0425, "lr": 1.7660541879881574e-06, "epoch": 1.5000122150831847, "percentage": 30.0, "elapsed_time": "1:18:17", "remaining_time": "3:02:39", "throughput": 8811.9, "total_tokens": 41391200} +{"current_steps": 61404, "total_steps": 204665, "eval_loss": 0.15240158140659332, "epoch": 1.5001099357486625, "percentage": 30.0, "elapsed_time": "1:19:05", "remaining_time": "3:04:30", "throughput": 8723.28, "total_tokens": 41393504} +{"current_steps": 61405, "total_steps": 204665, "loss": 0.1068, "lr": 1.765999370808807e-06, "epoch": 1.500134365915032, "percentage": 30.0, "elapsed_time": "1:19:49", "remaining_time": "3:06:13", "throughput": 8643.38, "total_tokens": 41394336} +{"current_steps": 61410, "total_steps": 204665, "loss": 0.0767, "lr": 1.7659445480589034e-06, "epoch": 1.500256516746879, "percentage": 30.01, "elapsed_time": "1:19:49", "remaining_time": "3:06:13", "throughput": 8643.01, "total_tokens": 41397856} +{"current_steps": 61415, "total_steps": 204665, "loss": 0.0235, "lr": 1.765889719738845e-06, "epoch": 1.500378667578726, "percentage": 30.01, "elapsed_time": "1:19:50", "remaining_time": "3:06:12", "throughput": 8643.07, "total_tokens": 41401120} +{"current_steps": 61420, "total_steps": 204665, "loss": 0.0499, "lr": 1.7658348858490304e-06, "epoch": 1.5005008184105733, "percentage": 30.01, "elapsed_time": "1:19:50", "remaining_time": "3:06:12", "throughput": 8643.14, "total_tokens": 41404512} +{"current_steps": 61425, "total_steps": 204665, "loss": 0.0994, "lr": 1.7657800463898587e-06, "epoch": 1.5006229692424204, "percentage": 30.01, "elapsed_time": "1:19:50", "remaining_time": "3:06:11", "throughput": 8643.23, "total_tokens": 41407968} +{"current_steps": 61430, "total_steps": 204665, "loss": 0.048, "lr": 1.7657252013617283e-06, "epoch": 1.5007451200742676, "percentage": 30.01, "elapsed_time": "1:19:51", "remaining_time": "3:06:11", "throughput": 8643.33, "total_tokens": 41411424} +{"current_steps": 61435, "total_steps": 204665, "loss": 0.1001, "lr": 1.7656703507650386e-06, "epoch": 1.5008672709061148, "percentage": 30.02, "elapsed_time": "1:19:51", "remaining_time": "3:06:10", "throughput": 8643.37, "total_tokens": 41414624} +{"current_steps": 61440, "total_steps": 204665, "loss": 0.0904, "lr": 1.765615494600188e-06, "epoch": 1.500989421737962, "percentage": 30.02, "elapsed_time": "1:19:51", "remaining_time": "3:06:10", "throughput": 8643.49, "total_tokens": 41418272} +{"current_steps": 61445, "total_steps": 204665, "loss": 0.1318, "lr": 1.7655606328675754e-06, "epoch": 1.5011115725698092, "percentage": 30.02, "elapsed_time": "1:19:52", "remaining_time": "3:06:09", "throughput": 8643.56, "total_tokens": 41421600} +{"current_steps": 61450, "total_steps": 204665, "loss": 0.0189, "lr": 1.7655057655676003e-06, "epoch": 1.5012337234016564, "percentage": 30.02, "elapsed_time": "1:19:52", "remaining_time": "3:06:09", "throughput": 8643.67, "total_tokens": 41425184} +{"current_steps": 61455, "total_steps": 204665, "loss": 0.0023, "lr": 1.7654508927006612e-06, "epoch": 1.5013558742335036, "percentage": 30.03, "elapsed_time": "1:19:52", "remaining_time": "3:06:08", "throughput": 8643.73, "total_tokens": 41428448} +{"current_steps": 61460, "total_steps": 204665, "loss": 0.137, "lr": 1.7653960142671574e-06, "epoch": 1.5014780250653508, "percentage": 30.03, "elapsed_time": "1:19:53", "remaining_time": "3:06:08", "throughput": 8643.81, "total_tokens": 41431840} +{"current_steps": 61465, "total_steps": 204665, "loss": 0.1144, "lr": 1.7653411302674877e-06, "epoch": 1.501600175897198, "percentage": 30.03, "elapsed_time": "1:19:53", "remaining_time": "3:06:07", "throughput": 8643.82, "total_tokens": 41434848} +{"current_steps": 61470, "total_steps": 204665, "loss": 0.1333, "lr": 1.7652862407020517e-06, "epoch": 1.5017223267290452, "percentage": 30.03, "elapsed_time": "1:19:53", "remaining_time": "3:06:07", "throughput": 8643.84, "total_tokens": 41437856} +{"current_steps": 61475, "total_steps": 204665, "loss": 0.0135, "lr": 1.7652313455712483e-06, "epoch": 1.5018444775608923, "percentage": 30.04, "elapsed_time": "1:19:54", "remaining_time": "3:06:07", "throughput": 8644.22, "total_tokens": 41443104} +{"current_steps": 61480, "total_steps": 204665, "loss": 0.085, "lr": 1.7651764448754767e-06, "epoch": 1.5019666283927393, "percentage": 30.04, "elapsed_time": "1:19:54", "remaining_time": "3:06:06", "throughput": 8644.25, "total_tokens": 41446240} +{"current_steps": 61485, "total_steps": 204665, "loss": 0.1033, "lr": 1.7651215386151361e-06, "epoch": 1.5020887792245865, "percentage": 30.04, "elapsed_time": "1:19:55", "remaining_time": "3:06:06", "throughput": 8644.37, "total_tokens": 41449888} +{"current_steps": 61490, "total_steps": 204665, "loss": 0.0637, "lr": 1.765066626790626e-06, "epoch": 1.5022109300564337, "percentage": 30.04, "elapsed_time": "1:19:55", "remaining_time": "3:06:05", "throughput": 8644.53, "total_tokens": 41453728} +{"current_steps": 61495, "total_steps": 204665, "loss": 0.1901, "lr": 1.7650117094023456e-06, "epoch": 1.5023330808882809, "percentage": 30.05, "elapsed_time": "1:19:55", "remaining_time": "3:06:05", "throughput": 8644.62, "total_tokens": 41457184} +{"current_steps": 61500, "total_steps": 204665, "loss": 0.0401, "lr": 1.764956786450694e-06, "epoch": 1.5024552317201278, "percentage": 30.05, "elapsed_time": "1:19:56", "remaining_time": "3:06:04", "throughput": 8644.77, "total_tokens": 41461024} +{"current_steps": 61505, "total_steps": 204665, "loss": 0.147, "lr": 1.7649018579360712e-06, "epoch": 1.502577382551975, "percentage": 30.05, "elapsed_time": "1:19:56", "remaining_time": "3:06:04", "throughput": 8644.88, "total_tokens": 41464544} +{"current_steps": 61510, "total_steps": 204665, "loss": 0.0493, "lr": 1.7648469238588763e-06, "epoch": 1.5026995333838222, "percentage": 30.05, "elapsed_time": "1:19:56", "remaining_time": "3:06:03", "throughput": 8644.91, "total_tokens": 41467616} +{"current_steps": 61515, "total_steps": 204665, "loss": 0.0026, "lr": 1.764791984219509e-06, "epoch": 1.5028216842156694, "percentage": 30.06, "elapsed_time": "1:19:57", "remaining_time": "3:06:03", "throughput": 8645.06, "total_tokens": 41471456} +{"current_steps": 61520, "total_steps": 204665, "loss": 0.1136, "lr": 1.7647370390183686e-06, "epoch": 1.5029438350475166, "percentage": 30.06, "elapsed_time": "1:19:57", "remaining_time": "3:06:02", "throughput": 8645.19, "total_tokens": 41475168} +{"current_steps": 61525, "total_steps": 204665, "loss": 0.0544, "lr": 1.7646820882558546e-06, "epoch": 1.5030659858793638, "percentage": 30.06, "elapsed_time": "1:19:57", "remaining_time": "3:06:02", "throughput": 8645.32, "total_tokens": 41478880} +{"current_steps": 61530, "total_steps": 204665, "loss": 0.0734, "lr": 1.7646271319323667e-06, "epoch": 1.503188136711211, "percentage": 30.06, "elapsed_time": "1:19:58", "remaining_time": "3:06:01", "throughput": 8645.36, "total_tokens": 41482016} +{"current_steps": 61535, "total_steps": 204665, "loss": 0.1048, "lr": 1.7645721700483049e-06, "epoch": 1.5033102875430582, "percentage": 30.07, "elapsed_time": "1:19:58", "remaining_time": "3:06:01", "throughput": 8645.45, "total_tokens": 41485536} +{"current_steps": 61540, "total_steps": 204665, "loss": 0.0583, "lr": 1.7645172026040687e-06, "epoch": 1.5034324383749054, "percentage": 30.07, "elapsed_time": "1:19:58", "remaining_time": "3:06:00", "throughput": 8645.51, "total_tokens": 41488800} +{"current_steps": 61545, "total_steps": 204665, "loss": 0.1607, "lr": 1.7644622296000575e-06, "epoch": 1.5035545892067526, "percentage": 30.07, "elapsed_time": "1:19:59", "remaining_time": "3:06:00", "throughput": 8645.55, "total_tokens": 41491936} +{"current_steps": 61550, "total_steps": 204665, "loss": 0.1701, "lr": 1.7644072510366714e-06, "epoch": 1.5036767400385997, "percentage": 30.07, "elapsed_time": "1:19:59", "remaining_time": "3:05:59", "throughput": 8645.66, "total_tokens": 41495520} +{"current_steps": 61555, "total_steps": 204665, "loss": 0.0329, "lr": 1.7643522669143103e-06, "epoch": 1.503798890870447, "percentage": 30.08, "elapsed_time": "1:19:59", "remaining_time": "3:05:59", "throughput": 8645.72, "total_tokens": 41498848} +{"current_steps": 61560, "total_steps": 204665, "loss": 0.0945, "lr": 1.764297277233374e-06, "epoch": 1.5039210417022941, "percentage": 30.08, "elapsed_time": "1:20:00", "remaining_time": "3:05:58", "throughput": 8645.81, "total_tokens": 41502304} +{"current_steps": 61565, "total_steps": 204665, "loss": 0.0717, "lr": 1.764242281994262e-06, "epoch": 1.5040431925341413, "percentage": 30.08, "elapsed_time": "1:20:00", "remaining_time": "3:05:58", "throughput": 8645.86, "total_tokens": 41505504} +{"current_steps": 61570, "total_steps": 204665, "loss": 0.0826, "lr": 1.7641872811973749e-06, "epoch": 1.5041653433659883, "percentage": 30.08, "elapsed_time": "1:20:00", "remaining_time": "3:05:57", "throughput": 8645.91, "total_tokens": 41508768} +{"current_steps": 61575, "total_steps": 204665, "loss": 0.1313, "lr": 1.7641322748431122e-06, "epoch": 1.5042874941978355, "percentage": 30.09, "elapsed_time": "1:20:01", "remaining_time": "3:05:57", "throughput": 8645.94, "total_tokens": 41511904} +{"current_steps": 61580, "total_steps": 204665, "loss": 0.0715, "lr": 1.764077262931874e-06, "epoch": 1.5044096450296827, "percentage": 30.09, "elapsed_time": "1:20:01", "remaining_time": "3:05:56", "throughput": 8646.04, "total_tokens": 41515424} +{"current_steps": 61585, "total_steps": 204665, "loss": 0.0735, "lr": 1.7640222454640602e-06, "epoch": 1.5045317958615299, "percentage": 30.09, "elapsed_time": "1:20:02", "remaining_time": "3:05:56", "throughput": 8646.18, "total_tokens": 41519264} +{"current_steps": 61590, "total_steps": 204665, "loss": 0.0869, "lr": 1.7639672224400716e-06, "epoch": 1.5046539466933768, "percentage": 30.09, "elapsed_time": "1:20:02", "remaining_time": "3:05:56", "throughput": 8646.21, "total_tokens": 41522400} +{"current_steps": 61595, "total_steps": 204665, "loss": 0.0572, "lr": 1.763912193860308e-06, "epoch": 1.504776097525224, "percentage": 30.1, "elapsed_time": "1:20:02", "remaining_time": "3:05:55", "throughput": 8646.25, "total_tokens": 41525600} +{"current_steps": 61600, "total_steps": 204665, "loss": 0.0358, "lr": 1.763857159725169e-06, "epoch": 1.5048982483570712, "percentage": 30.1, "elapsed_time": "1:20:03", "remaining_time": "3:05:55", "throughput": 8646.33, "total_tokens": 41528992} +{"current_steps": 61605, "total_steps": 204665, "loss": 0.0787, "lr": 1.7638021200350555e-06, "epoch": 1.5050203991889184, "percentage": 30.1, "elapsed_time": "1:20:03", "remaining_time": "3:05:54", "throughput": 8646.39, "total_tokens": 41532320} +{"current_steps": 61610, "total_steps": 204665, "loss": 0.0351, "lr": 1.7637470747903675e-06, "epoch": 1.5051425500207656, "percentage": 30.1, "elapsed_time": "1:20:03", "remaining_time": "3:05:54", "throughput": 8646.47, "total_tokens": 41535776} +{"current_steps": 61615, "total_steps": 204665, "loss": 0.0176, "lr": 1.7636920239915053e-06, "epoch": 1.5052647008526128, "percentage": 30.11, "elapsed_time": "1:20:04", "remaining_time": "3:05:53", "throughput": 8646.53, "total_tokens": 41539040} +{"current_steps": 61620, "total_steps": 204665, "loss": 0.0372, "lr": 1.7636369676388694e-06, "epoch": 1.50538685168446, "percentage": 30.11, "elapsed_time": "1:20:04", "remaining_time": "3:05:53", "throughput": 8646.53, "total_tokens": 41541984} +{"current_steps": 61625, "total_steps": 204665, "loss": 0.2492, "lr": 1.76358190573286e-06, "epoch": 1.5055090025163071, "percentage": 30.11, "elapsed_time": "1:20:04", "remaining_time": "3:05:52", "throughput": 8646.6, "total_tokens": 41545312} +{"current_steps": 61630, "total_steps": 204665, "loss": 0.0623, "lr": 1.7635268382738774e-06, "epoch": 1.5056311533481543, "percentage": 30.11, "elapsed_time": "1:20:05", "remaining_time": "3:05:52", "throughput": 8646.63, "total_tokens": 41548448} +{"current_steps": 61635, "total_steps": 204665, "loss": 0.0943, "lr": 1.7634717652623228e-06, "epoch": 1.5057533041800015, "percentage": 30.12, "elapsed_time": "1:20:05", "remaining_time": "3:05:51", "throughput": 8646.72, "total_tokens": 41551904} +{"current_steps": 61640, "total_steps": 204665, "loss": 0.1596, "lr": 1.7634166866985958e-06, "epoch": 1.5058754550118487, "percentage": 30.12, "elapsed_time": "1:20:05", "remaining_time": "3:05:51", "throughput": 8646.78, "total_tokens": 41555232} +{"current_steps": 61645, "total_steps": 204665, "loss": 0.0694, "lr": 1.7633616025830972e-06, "epoch": 1.505997605843696, "percentage": 30.12, "elapsed_time": "1:20:06", "remaining_time": "3:05:50", "throughput": 8646.86, "total_tokens": 41558624} +{"current_steps": 61650, "total_steps": 204665, "loss": 0.1143, "lr": 1.7633065129162282e-06, "epoch": 1.506119756675543, "percentage": 30.12, "elapsed_time": "1:20:06", "remaining_time": "3:05:50", "throughput": 8646.98, "total_tokens": 41562208} +{"current_steps": 61655, "total_steps": 204665, "loss": 0.0379, "lr": 1.7632514176983886e-06, "epoch": 1.5062419075073903, "percentage": 30.12, "elapsed_time": "1:20:06", "remaining_time": "3:05:49", "throughput": 8647.06, "total_tokens": 41565664} +{"current_steps": 61660, "total_steps": 204665, "loss": 0.0141, "lr": 1.7631963169299794e-06, "epoch": 1.5063640583392373, "percentage": 30.13, "elapsed_time": "1:20:07", "remaining_time": "3:05:49", "throughput": 8647.07, "total_tokens": 41568672} +{"current_steps": 61665, "total_steps": 204665, "loss": 0.209, "lr": 1.7631412106114014e-06, "epoch": 1.5064862091710844, "percentage": 30.13, "elapsed_time": "1:20:07", "remaining_time": "3:05:48", "throughput": 8647.13, "total_tokens": 41572000} +{"current_steps": 61670, "total_steps": 204665, "loss": 0.0995, "lr": 1.763086098743055e-06, "epoch": 1.5066083600029316, "percentage": 30.13, "elapsed_time": "1:20:07", "remaining_time": "3:05:48", "throughput": 8647.17, "total_tokens": 41575200} +{"current_steps": 61675, "total_steps": 204665, "loss": 0.3212, "lr": 1.7630309813253417e-06, "epoch": 1.5067305108347788, "percentage": 30.13, "elapsed_time": "1:20:08", "remaining_time": "3:05:47", "throughput": 8647.18, "total_tokens": 41578208} +{"current_steps": 61680, "total_steps": 204665, "loss": 0.07, "lr": 1.7629758583586613e-06, "epoch": 1.5068526616666258, "percentage": 30.14, "elapsed_time": "1:20:08", "remaining_time": "3:05:47", "throughput": 8647.25, "total_tokens": 41581600} +{"current_steps": 61685, "total_steps": 204665, "loss": 0.0053, "lr": 1.7629207298434157e-06, "epoch": 1.506974812498473, "percentage": 30.14, "elapsed_time": "1:20:09", "remaining_time": "3:05:46", "throughput": 8647.31, "total_tokens": 41584928} +{"current_steps": 61690, "total_steps": 204665, "loss": 0.0005, "lr": 1.7628655957800054e-06, "epoch": 1.5070969633303202, "percentage": 30.14, "elapsed_time": "1:20:09", "remaining_time": "3:05:46", "throughput": 8647.4, "total_tokens": 41588448} +{"current_steps": 61695, "total_steps": 204665, "loss": 0.0389, "lr": 1.7628104561688311e-06, "epoch": 1.5072191141621674, "percentage": 30.14, "elapsed_time": "1:20:09", "remaining_time": "3:05:45", "throughput": 8647.51, "total_tokens": 41592096} +{"current_steps": 61700, "total_steps": 204665, "loss": 0.0391, "lr": 1.7627553110102936e-06, "epoch": 1.5073412649940146, "percentage": 30.15, "elapsed_time": "1:20:10", "remaining_time": "3:05:45", "throughput": 8647.52, "total_tokens": 41595168} +{"current_steps": 61705, "total_steps": 204665, "loss": 0.0008, "lr": 1.762700160304795e-06, "epoch": 1.5074634158258617, "percentage": 30.15, "elapsed_time": "1:20:10", "remaining_time": "3:05:44", "throughput": 8647.59, "total_tokens": 41598560} +{"current_steps": 61710, "total_steps": 204665, "loss": 0.0621, "lr": 1.7626450040527355e-06, "epoch": 1.507585566657709, "percentage": 30.15, "elapsed_time": "1:20:10", "remaining_time": "3:05:44", "throughput": 8647.61, "total_tokens": 41601632} +{"current_steps": 61715, "total_steps": 204665, "loss": 0.1054, "lr": 1.7625898422545163e-06, "epoch": 1.5077077174895561, "percentage": 30.15, "elapsed_time": "1:20:11", "remaining_time": "3:05:43", "throughput": 8647.62, "total_tokens": 41604704} +{"current_steps": 61720, "total_steps": 204665, "loss": 0.1185, "lr": 1.7625346749105385e-06, "epoch": 1.5078298683214033, "percentage": 30.16, "elapsed_time": "1:20:11", "remaining_time": "3:05:43", "throughput": 8647.73, "total_tokens": 41608352} +{"current_steps": 61725, "total_steps": 204665, "loss": 0.1244, "lr": 1.7624795020212036e-06, "epoch": 1.5079520191532505, "percentage": 30.16, "elapsed_time": "1:20:11", "remaining_time": "3:05:43", "throughput": 8647.82, "total_tokens": 41611808} +{"current_steps": 61730, "total_steps": 204665, "loss": 0.0329, "lr": 1.762424323586913e-06, "epoch": 1.5080741699850977, "percentage": 30.16, "elapsed_time": "1:20:12", "remaining_time": "3:05:42", "throughput": 8647.86, "total_tokens": 41615072} +{"current_steps": 61735, "total_steps": 204665, "loss": 0.0861, "lr": 1.7623691396080674e-06, "epoch": 1.5081963208169449, "percentage": 30.16, "elapsed_time": "1:20:12", "remaining_time": "3:05:42", "throughput": 8647.89, "total_tokens": 41618272} +{"current_steps": 61740, "total_steps": 204665, "loss": 0.0107, "lr": 1.7623139500850682e-06, "epoch": 1.508318471648792, "percentage": 30.17, "elapsed_time": "1:20:12", "remaining_time": "3:05:41", "throughput": 8647.88, "total_tokens": 41621216} +{"current_steps": 61745, "total_steps": 204665, "loss": 0.0473, "lr": 1.762258755018317e-06, "epoch": 1.5084406224806393, "percentage": 30.17, "elapsed_time": "1:20:13", "remaining_time": "3:05:41", "throughput": 8647.94, "total_tokens": 41624544} +{"current_steps": 61750, "total_steps": 204665, "loss": 0.0818, "lr": 1.7622035544082153e-06, "epoch": 1.5085627733124862, "percentage": 30.17, "elapsed_time": "1:20:13", "remaining_time": "3:05:40", "throughput": 8648.0, "total_tokens": 41627872} +{"current_steps": 61755, "total_steps": 204665, "loss": 0.0524, "lr": 1.762148348255164e-06, "epoch": 1.5086849241443334, "percentage": 30.17, "elapsed_time": "1:20:13", "remaining_time": "3:05:40", "throughput": 8648.06, "total_tokens": 41631200} +{"current_steps": 61760, "total_steps": 204665, "loss": 0.1926, "lr": 1.7620931365595651e-06, "epoch": 1.5088070749761806, "percentage": 30.18, "elapsed_time": "1:20:14", "remaining_time": "3:05:39", "throughput": 8648.03, "total_tokens": 41634016} +{"current_steps": 61765, "total_steps": 204665, "loss": 0.0595, "lr": 1.7620379193218198e-06, "epoch": 1.5089292258080278, "percentage": 30.18, "elapsed_time": "1:20:14", "remaining_time": "3:05:39", "throughput": 8648.09, "total_tokens": 41637408} +{"current_steps": 61770, "total_steps": 204665, "loss": 0.1163, "lr": 1.7619826965423301e-06, "epoch": 1.5090513766398748, "percentage": 30.18, "elapsed_time": "1:20:14", "remaining_time": "3:05:38", "throughput": 8648.18, "total_tokens": 41640928} +{"current_steps": 61775, "total_steps": 204665, "loss": 0.1921, "lr": 1.7619274682214971e-06, "epoch": 1.509173527471722, "percentage": 30.18, "elapsed_time": "1:20:15", "remaining_time": "3:05:38", "throughput": 8648.25, "total_tokens": 41644320} +{"current_steps": 61780, "total_steps": 204665, "loss": 0.0986, "lr": 1.7618722343597225e-06, "epoch": 1.5092956783035691, "percentage": 30.19, "elapsed_time": "1:20:15", "remaining_time": "3:05:37", "throughput": 8648.27, "total_tokens": 41647328} +{"current_steps": 61785, "total_steps": 204665, "loss": 0.1232, "lr": 1.7618169949574082e-06, "epoch": 1.5094178291354163, "percentage": 30.19, "elapsed_time": "1:20:16", "remaining_time": "3:05:37", "throughput": 8648.28, "total_tokens": 41650400} +{"current_steps": 61790, "total_steps": 204665, "loss": 0.0418, "lr": 1.7617617500149558e-06, "epoch": 1.5095399799672635, "percentage": 30.19, "elapsed_time": "1:20:16", "remaining_time": "3:05:36", "throughput": 8648.31, "total_tokens": 41653536} +{"current_steps": 61795, "total_steps": 204665, "loss": 0.0038, "lr": 1.7617064995327674e-06, "epoch": 1.5096621307991107, "percentage": 30.19, "elapsed_time": "1:20:16", "remaining_time": "3:05:36", "throughput": 8648.34, "total_tokens": 41656736} +{"current_steps": 61800, "total_steps": 204665, "loss": 0.0024, "lr": 1.761651243511244e-06, "epoch": 1.509784281630958, "percentage": 30.2, "elapsed_time": "1:20:17", "remaining_time": "3:05:35", "throughput": 8648.32, "total_tokens": 41659616} +{"current_steps": 61805, "total_steps": 204665, "loss": 0.0471, "lr": 1.761595981950788e-06, "epoch": 1.509906432462805, "percentage": 30.2, "elapsed_time": "1:20:17", "remaining_time": "3:05:35", "throughput": 8648.36, "total_tokens": 41662816} +{"current_steps": 61810, "total_steps": 204665, "loss": 0.0431, "lr": 1.7615407148518014e-06, "epoch": 1.5100285832946523, "percentage": 30.2, "elapsed_time": "1:20:17", "remaining_time": "3:05:34", "throughput": 8648.4, "total_tokens": 41666016} +{"current_steps": 61815, "total_steps": 204665, "loss": 0.0486, "lr": 1.7614854422146855e-06, "epoch": 1.5101507341264995, "percentage": 30.2, "elapsed_time": "1:20:18", "remaining_time": "3:05:34", "throughput": 8648.46, "total_tokens": 41669280} +{"current_steps": 61820, "total_steps": 204665, "loss": 0.1103, "lr": 1.7614301640398429e-06, "epoch": 1.5102728849583467, "percentage": 30.21, "elapsed_time": "1:20:18", "remaining_time": "3:05:33", "throughput": 8648.52, "total_tokens": 41672672} +{"current_steps": 61825, "total_steps": 204665, "loss": 0.0483, "lr": 1.7613748803276752e-06, "epoch": 1.5103950357901939, "percentage": 30.21, "elapsed_time": "1:20:18", "remaining_time": "3:05:33", "throughput": 8648.63, "total_tokens": 41676320} +{"current_steps": 61830, "total_steps": 204665, "loss": 0.0515, "lr": 1.761319591078585e-06, "epoch": 1.510517186622041, "percentage": 30.21, "elapsed_time": "1:20:19", "remaining_time": "3:05:32", "throughput": 8648.7, "total_tokens": 41679648} +{"current_steps": 61835, "total_steps": 204665, "loss": 0.1682, "lr": 1.7612642962929733e-06, "epoch": 1.510639337453888, "percentage": 30.21, "elapsed_time": "1:20:19", "remaining_time": "3:05:32", "throughput": 8648.76, "total_tokens": 41683040} +{"current_steps": 61840, "total_steps": 204665, "loss": 0.1369, "lr": 1.7612089959712434e-06, "epoch": 1.5107614882857352, "percentage": 30.22, "elapsed_time": "1:20:19", "remaining_time": "3:05:31", "throughput": 8648.82, "total_tokens": 41686368} +{"current_steps": 61845, "total_steps": 204665, "loss": 0.1149, "lr": 1.7611536901137969e-06, "epoch": 1.5108836391175824, "percentage": 30.22, "elapsed_time": "1:20:20", "remaining_time": "3:05:31", "throughput": 8648.86, "total_tokens": 41689568} +{"current_steps": 61850, "total_steps": 204665, "loss": 0.0051, "lr": 1.7610983787210357e-06, "epoch": 1.5110057899494296, "percentage": 30.22, "elapsed_time": "1:20:20", "remaining_time": "3:05:31", "throughput": 8648.91, "total_tokens": 41692832} +{"current_steps": 61855, "total_steps": 204665, "loss": 0.087, "lr": 1.7610430617933628e-06, "epoch": 1.5111279407812768, "percentage": 30.22, "elapsed_time": "1:20:20", "remaining_time": "3:05:30", "throughput": 8649.01, "total_tokens": 41696480} +{"current_steps": 61860, "total_steps": 204665, "loss": 0.002, "lr": 1.7609877393311798e-06, "epoch": 1.5112500916131237, "percentage": 30.23, "elapsed_time": "1:20:21", "remaining_time": "3:05:30", "throughput": 8649.03, "total_tokens": 41699616} +{"current_steps": 61865, "total_steps": 204665, "loss": 0.1238, "lr": 1.7609324113348892e-06, "epoch": 1.511372242444971, "percentage": 30.23, "elapsed_time": "1:20:21", "remaining_time": "3:05:29", "throughput": 8649.11, "total_tokens": 41703072} +{"current_steps": 61870, "total_steps": 204665, "loss": 0.1283, "lr": 1.7608770778048936e-06, "epoch": 1.5114943932768181, "percentage": 30.23, "elapsed_time": "1:20:22", "remaining_time": "3:05:29", "throughput": 8649.18, "total_tokens": 41706464} +{"current_steps": 61875, "total_steps": 204665, "loss": 0.1226, "lr": 1.7608217387415954e-06, "epoch": 1.5116165441086653, "percentage": 30.23, "elapsed_time": "1:20:22", "remaining_time": "3:05:28", "throughput": 8649.22, "total_tokens": 41709664} +{"current_steps": 61880, "total_steps": 204665, "loss": 0.2157, "lr": 1.7607663941453966e-06, "epoch": 1.5117386949405125, "percentage": 30.23, "elapsed_time": "1:20:22", "remaining_time": "3:05:28", "throughput": 8649.36, "total_tokens": 41713504} +{"current_steps": 61885, "total_steps": 204665, "loss": 0.1551, "lr": 1.7607110440167e-06, "epoch": 1.5118608457723597, "percentage": 30.24, "elapsed_time": "1:20:23", "remaining_time": "3:05:27", "throughput": 8649.48, "total_tokens": 41717216} +{"current_steps": 61890, "total_steps": 204665, "loss": 0.0153, "lr": 1.7606556883559081e-06, "epoch": 1.5119829966042069, "percentage": 30.24, "elapsed_time": "1:20:23", "remaining_time": "3:05:27", "throughput": 8649.54, "total_tokens": 41720544} +{"current_steps": 61895, "total_steps": 204665, "loss": 0.156, "lr": 1.7606003271634235e-06, "epoch": 1.512105147436054, "percentage": 30.24, "elapsed_time": "1:20:23", "remaining_time": "3:05:26", "throughput": 8649.64, "total_tokens": 41724128} +{"current_steps": 61900, "total_steps": 204665, "loss": 0.1273, "lr": 1.760544960439649e-06, "epoch": 1.5122272982679013, "percentage": 30.24, "elapsed_time": "1:20:24", "remaining_time": "3:05:26", "throughput": 8649.77, "total_tokens": 41727840} +{"current_steps": 61905, "total_steps": 204665, "loss": 0.0023, "lr": 1.7604895881849865e-06, "epoch": 1.5123494490997484, "percentage": 30.25, "elapsed_time": "1:20:24", "remaining_time": "3:05:25", "throughput": 8649.87, "total_tokens": 41731424} +{"current_steps": 61910, "total_steps": 204665, "loss": 0.0992, "lr": 1.7604342103998393e-06, "epoch": 1.5124715999315956, "percentage": 30.25, "elapsed_time": "1:20:24", "remaining_time": "3:05:25", "throughput": 8649.92, "total_tokens": 41734752} +{"current_steps": 61915, "total_steps": 204665, "loss": 0.0758, "lr": 1.76037882708461e-06, "epoch": 1.5125937507634428, "percentage": 30.25, "elapsed_time": "1:20:25", "remaining_time": "3:05:24", "throughput": 8649.99, "total_tokens": 41738080} +{"current_steps": 61920, "total_steps": 204665, "loss": 0.0601, "lr": 1.7603234382397014e-06, "epoch": 1.51271590159529, "percentage": 30.25, "elapsed_time": "1:20:25", "remaining_time": "3:05:24", "throughput": 8650.01, "total_tokens": 41741216} +{"current_steps": 61925, "total_steps": 204665, "loss": 0.1144, "lr": 1.7602680438655164e-06, "epoch": 1.512838052427137, "percentage": 30.26, "elapsed_time": "1:20:25", "remaining_time": "3:05:23", "throughput": 8650.15, "total_tokens": 41744992} +{"current_steps": 61930, "total_steps": 204665, "loss": 0.048, "lr": 1.7602126439624576e-06, "epoch": 1.5129602032589842, "percentage": 30.26, "elapsed_time": "1:20:26", "remaining_time": "3:05:23", "throughput": 8650.23, "total_tokens": 41748448} +{"current_steps": 61935, "total_steps": 204665, "loss": 0.072, "lr": 1.7601572385309279e-06, "epoch": 1.5130823540908314, "percentage": 30.26, "elapsed_time": "1:20:26", "remaining_time": "3:05:23", "throughput": 8650.27, "total_tokens": 41751648} +{"current_steps": 61940, "total_steps": 204665, "loss": 0.0386, "lr": 1.7601018275713301e-06, "epoch": 1.5132045049226786, "percentage": 30.26, "elapsed_time": "1:20:26", "remaining_time": "3:05:22", "throughput": 8650.34, "total_tokens": 41755040} +{"current_steps": 61945, "total_steps": 204665, "loss": 0.0827, "lr": 1.760046411084068e-06, "epoch": 1.5133266557545257, "percentage": 30.27, "elapsed_time": "1:20:27", "remaining_time": "3:05:22", "throughput": 8650.37, "total_tokens": 41758240} +{"current_steps": 61950, "total_steps": 204665, "loss": 0.1892, "lr": 1.7599909890695434e-06, "epoch": 1.5134488065863727, "percentage": 30.27, "elapsed_time": "1:20:27", "remaining_time": "3:05:21", "throughput": 8650.42, "total_tokens": 41761504} +{"current_steps": 61955, "total_steps": 204665, "loss": 0.0432, "lr": 1.7599355615281602e-06, "epoch": 1.51357095741822, "percentage": 30.27, "elapsed_time": "1:20:28", "remaining_time": "3:05:21", "throughput": 8650.47, "total_tokens": 41764768} +{"current_steps": 61960, "total_steps": 204665, "loss": 0.0385, "lr": 1.7598801284603211e-06, "epoch": 1.513693108250067, "percentage": 30.27, "elapsed_time": "1:20:28", "remaining_time": "3:05:20", "throughput": 8650.64, "total_tokens": 41768800} +{"current_steps": 61965, "total_steps": 204665, "loss": 0.0589, "lr": 1.7598246898664293e-06, "epoch": 1.5138152590819143, "percentage": 30.28, "elapsed_time": "1:20:28", "remaining_time": "3:05:20", "throughput": 8650.64, "total_tokens": 41771808} +{"current_steps": 61970, "total_steps": 204665, "loss": 0.0018, "lr": 1.759769245746888e-06, "epoch": 1.5139374099137615, "percentage": 30.28, "elapsed_time": "1:20:29", "remaining_time": "3:05:19", "throughput": 8650.68, "total_tokens": 41775072} +{"current_steps": 61975, "total_steps": 204665, "loss": 0.2485, "lr": 1.7597137961021004e-06, "epoch": 1.5140595607456087, "percentage": 30.28, "elapsed_time": "1:20:29", "remaining_time": "3:05:19", "throughput": 8650.8, "total_tokens": 41778784} +{"current_steps": 61980, "total_steps": 204665, "loss": 0.0541, "lr": 1.7596583409324697e-06, "epoch": 1.5141817115774558, "percentage": 30.28, "elapsed_time": "1:20:29", "remaining_time": "3:05:18", "throughput": 8650.98, "total_tokens": 41782816} +{"current_steps": 61985, "total_steps": 204665, "loss": 0.1162, "lr": 1.7596028802383995e-06, "epoch": 1.514303862409303, "percentage": 30.29, "elapsed_time": "1:20:30", "remaining_time": "3:05:18", "throughput": 8651.15, "total_tokens": 41786848} +{"current_steps": 61990, "total_steps": 204665, "loss": 0.0587, "lr": 1.7595474140202927e-06, "epoch": 1.5144260132411502, "percentage": 30.29, "elapsed_time": "1:20:30", "remaining_time": "3:05:17", "throughput": 8651.23, "total_tokens": 41790304} +{"current_steps": 61995, "total_steps": 204665, "loss": 0.1085, "lr": 1.7594919422785525e-06, "epoch": 1.5145481640729974, "percentage": 30.29, "elapsed_time": "1:20:30", "remaining_time": "3:05:17", "throughput": 8651.26, "total_tokens": 41793440} +{"current_steps": 62000, "total_steps": 204665, "loss": 0.0604, "lr": 1.7594364650135827e-06, "epoch": 1.5146703149048446, "percentage": 30.29, "elapsed_time": "1:20:31", "remaining_time": "3:05:16", "throughput": 8651.38, "total_tokens": 41797088} +{"current_steps": 62005, "total_steps": 204665, "loss": 0.0746, "lr": 1.759380982225787e-06, "epoch": 1.5147924657366918, "percentage": 30.3, "elapsed_time": "1:20:31", "remaining_time": "3:05:16", "throughput": 8651.42, "total_tokens": 41800224} +{"current_steps": 62010, "total_steps": 204665, "loss": 0.0012, "lr": 1.7593254939155684e-06, "epoch": 1.514914616568539, "percentage": 30.3, "elapsed_time": "1:20:31", "remaining_time": "3:05:15", "throughput": 8651.47, "total_tokens": 41803488} +{"current_steps": 62015, "total_steps": 204665, "loss": 0.1087, "lr": 1.7592700000833305e-06, "epoch": 1.515036767400386, "percentage": 30.3, "elapsed_time": "1:20:32", "remaining_time": "3:05:15", "throughput": 8651.56, "total_tokens": 41806944} +{"current_steps": 62020, "total_steps": 204665, "loss": 0.0663, "lr": 1.759214500729477e-06, "epoch": 1.5151589182322331, "percentage": 30.3, "elapsed_time": "1:20:32", "remaining_time": "3:05:15", "throughput": 8651.57, "total_tokens": 41810016} +{"current_steps": 62025, "total_steps": 204665, "loss": 0.1765, "lr": 1.7591589958544113e-06, "epoch": 1.5152810690640803, "percentage": 30.31, "elapsed_time": "1:20:33", "remaining_time": "3:05:14", "throughput": 8651.68, "total_tokens": 41813664} +{"current_steps": 62030, "total_steps": 204665, "loss": 0.0018, "lr": 1.7591034854585373e-06, "epoch": 1.5154032198959275, "percentage": 30.31, "elapsed_time": "1:20:33", "remaining_time": "3:05:14", "throughput": 8651.77, "total_tokens": 41817120} +{"current_steps": 62035, "total_steps": 204665, "loss": 0.086, "lr": 1.7590479695422587e-06, "epoch": 1.5155253707277745, "percentage": 30.31, "elapsed_time": "1:20:33", "remaining_time": "3:05:13", "throughput": 8651.85, "total_tokens": 41820512} +{"current_steps": 62040, "total_steps": 204665, "loss": 0.1341, "lr": 1.758992448105979e-06, "epoch": 1.5156475215596217, "percentage": 30.31, "elapsed_time": "1:20:34", "remaining_time": "3:05:13", "throughput": 8651.97, "total_tokens": 41824160} +{"current_steps": 62045, "total_steps": 204665, "loss": 0.0295, "lr": 1.7589369211501019e-06, "epoch": 1.5157696723914689, "percentage": 30.32, "elapsed_time": "1:20:34", "remaining_time": "3:05:12", "throughput": 8651.98, "total_tokens": 41827168} +{"current_steps": 62050, "total_steps": 204665, "loss": 0.0363, "lr": 1.7588813886750315e-06, "epoch": 1.515891823223316, "percentage": 30.32, "elapsed_time": "1:20:34", "remaining_time": "3:05:12", "throughput": 8652.06, "total_tokens": 41830560} +{"current_steps": 62055, "total_steps": 204665, "loss": 0.0621, "lr": 1.7588258506811716e-06, "epoch": 1.5160139740551632, "percentage": 30.32, "elapsed_time": "1:20:35", "remaining_time": "3:05:11", "throughput": 8652.08, "total_tokens": 41833568} +{"current_steps": 62060, "total_steps": 204665, "loss": 0.1542, "lr": 1.7587703071689259e-06, "epoch": 1.5161361248870104, "percentage": 30.32, "elapsed_time": "1:20:35", "remaining_time": "3:05:11", "throughput": 8652.17, "total_tokens": 41837024} +{"current_steps": 62065, "total_steps": 204665, "loss": 0.0697, "lr": 1.7587147581386988e-06, "epoch": 1.5162582757188576, "percentage": 30.33, "elapsed_time": "1:20:35", "remaining_time": "3:05:10", "throughput": 8652.3, "total_tokens": 41840864} +{"current_steps": 62070, "total_steps": 204665, "loss": 0.0945, "lr": 1.7586592035908935e-06, "epoch": 1.5163804265507048, "percentage": 30.33, "elapsed_time": "1:20:36", "remaining_time": "3:05:10", "throughput": 8652.48, "total_tokens": 41844896} +{"current_steps": 62075, "total_steps": 204665, "loss": 0.14, "lr": 1.7586036435259147e-06, "epoch": 1.516502577382552, "percentage": 30.33, "elapsed_time": "1:20:36", "remaining_time": "3:05:09", "throughput": 8652.6, "total_tokens": 41848544} +{"current_steps": 62080, "total_steps": 204665, "loss": 0.0913, "lr": 1.758548077944166e-06, "epoch": 1.5166247282143992, "percentage": 30.33, "elapsed_time": "1:20:36", "remaining_time": "3:05:09", "throughput": 8652.63, "total_tokens": 41851680} +{"current_steps": 62085, "total_steps": 204665, "loss": 0.0311, "lr": 1.7584925068460516e-06, "epoch": 1.5167468790462464, "percentage": 30.33, "elapsed_time": "1:20:37", "remaining_time": "3:05:08", "throughput": 8652.68, "total_tokens": 41854880} +{"current_steps": 62090, "total_steps": 204665, "loss": 0.0375, "lr": 1.7584369302319757e-06, "epoch": 1.5168690298780936, "percentage": 30.34, "elapsed_time": "1:20:37", "remaining_time": "3:05:08", "throughput": 8652.67, "total_tokens": 41857760} +{"current_steps": 62095, "total_steps": 204665, "loss": 0.0032, "lr": 1.7583813481023424e-06, "epoch": 1.5169911807099408, "percentage": 30.34, "elapsed_time": "1:20:37", "remaining_time": "3:05:07", "throughput": 8652.71, "total_tokens": 41860960} +{"current_steps": 62100, "total_steps": 204665, "loss": 0.0452, "lr": 1.758325760457556e-06, "epoch": 1.517113331541788, "percentage": 30.34, "elapsed_time": "1:20:38", "remaining_time": "3:05:07", "throughput": 8652.86, "total_tokens": 41864736} +{"current_steps": 62105, "total_steps": 204665, "loss": 0.0375, "lr": 1.7582701672980208e-06, "epoch": 1.517235482373635, "percentage": 30.34, "elapsed_time": "1:20:38", "remaining_time": "3:05:06", "throughput": 8652.94, "total_tokens": 41868128} +{"current_steps": 62110, "total_steps": 204665, "loss": 0.1857, "lr": 1.7582145686241412e-06, "epoch": 1.5173576332054821, "percentage": 30.35, "elapsed_time": "1:20:38", "remaining_time": "3:05:06", "throughput": 8652.99, "total_tokens": 41871328} +{"current_steps": 62115, "total_steps": 204665, "loss": 0.0529, "lr": 1.7581589644363208e-06, "epoch": 1.5174797840373293, "percentage": 30.35, "elapsed_time": "1:20:39", "remaining_time": "3:05:05", "throughput": 8653.01, "total_tokens": 41874400} +{"current_steps": 62120, "total_steps": 204665, "loss": 0.0553, "lr": 1.7581033547349648e-06, "epoch": 1.5176019348691765, "percentage": 30.35, "elapsed_time": "1:20:39", "remaining_time": "3:05:05", "throughput": 8653.08, "total_tokens": 41877728} +{"current_steps": 62125, "total_steps": 204665, "loss": 0.0991, "lr": 1.7580477395204774e-06, "epoch": 1.5177240857010235, "percentage": 30.35, "elapsed_time": "1:20:39", "remaining_time": "3:05:04", "throughput": 8653.1, "total_tokens": 41880736} +{"current_steps": 62130, "total_steps": 204665, "loss": 0.1013, "lr": 1.7579921187932628e-06, "epoch": 1.5178462365328707, "percentage": 30.36, "elapsed_time": "1:20:40", "remaining_time": "3:05:04", "throughput": 8653.3, "total_tokens": 41884896} +{"current_steps": 62135, "total_steps": 204665, "loss": 0.2171, "lr": 1.7579364925537257e-06, "epoch": 1.5179683873647178, "percentage": 30.36, "elapsed_time": "1:20:40", "remaining_time": "3:05:03", "throughput": 8653.36, "total_tokens": 41888160} +{"current_steps": 62140, "total_steps": 204665, "loss": 0.2605, "lr": 1.7578808608022704e-06, "epoch": 1.518090538196565, "percentage": 30.36, "elapsed_time": "1:20:41", "remaining_time": "3:05:03", "throughput": 8653.38, "total_tokens": 41891168} +{"current_steps": 62145, "total_steps": 204665, "loss": 0.125, "lr": 1.7578252235393017e-06, "epoch": 1.5182126890284122, "percentage": 30.36, "elapsed_time": "1:20:41", "remaining_time": "3:05:02", "throughput": 8653.43, "total_tokens": 41894368} +{"current_steps": 62150, "total_steps": 204665, "loss": 0.0043, "lr": 1.7577695807652243e-06, "epoch": 1.5183348398602594, "percentage": 30.37, "elapsed_time": "1:20:41", "remaining_time": "3:05:02", "throughput": 8653.43, "total_tokens": 41897312} +{"current_steps": 62155, "total_steps": 204665, "loss": 0.0465, "lr": 1.7577139324804424e-06, "epoch": 1.5184569906921066, "percentage": 30.37, "elapsed_time": "1:20:42", "remaining_time": "3:05:01", "throughput": 8653.51, "total_tokens": 41900704} +{"current_steps": 62160, "total_steps": 204665, "loss": 0.1147, "lr": 1.757658278685361e-06, "epoch": 1.5185791415239538, "percentage": 30.37, "elapsed_time": "1:20:42", "remaining_time": "3:05:01", "throughput": 8653.56, "total_tokens": 41903904} +{"current_steps": 62165, "total_steps": 204665, "loss": 0.109, "lr": 1.7576026193803853e-06, "epoch": 1.518701292355801, "percentage": 30.37, "elapsed_time": "1:20:42", "remaining_time": "3:05:00", "throughput": 8653.6, "total_tokens": 41907104} +{"current_steps": 62170, "total_steps": 204665, "loss": 0.2651, "lr": 1.7575469545659192e-06, "epoch": 1.5188234431876482, "percentage": 30.38, "elapsed_time": "1:20:43", "remaining_time": "3:05:00", "throughput": 8653.6, "total_tokens": 41909984} +{"current_steps": 62175, "total_steps": 204665, "loss": 0.0926, "lr": 1.757491284242368e-06, "epoch": 1.5189455940194954, "percentage": 30.38, "elapsed_time": "1:20:43", "remaining_time": "3:04:59", "throughput": 8653.61, "total_tokens": 41912992} +{"current_steps": 62180, "total_steps": 204665, "loss": 0.0986, "lr": 1.7574356084101362e-06, "epoch": 1.5190677448513425, "percentage": 30.38, "elapsed_time": "1:20:43", "remaining_time": "3:04:59", "throughput": 8653.63, "total_tokens": 41916064} +{"current_steps": 62185, "total_steps": 204665, "loss": 0.0296, "lr": 1.7573799270696293e-06, "epoch": 1.5191898956831897, "percentage": 30.38, "elapsed_time": "1:20:44", "remaining_time": "3:04:58", "throughput": 8653.7, "total_tokens": 41919392} +{"current_steps": 62190, "total_steps": 204665, "loss": 0.1071, "lr": 1.7573242402212515e-06, "epoch": 1.519312046515037, "percentage": 30.39, "elapsed_time": "1:20:44", "remaining_time": "3:04:58", "throughput": 8653.77, "total_tokens": 41922720} +{"current_steps": 62195, "total_steps": 204665, "loss": 0.1806, "lr": 1.7572685478654083e-06, "epoch": 1.519434197346884, "percentage": 30.39, "elapsed_time": "1:20:44", "remaining_time": "3:04:57", "throughput": 8653.94, "total_tokens": 41926624} +{"current_steps": 62200, "total_steps": 204665, "loss": 0.0039, "lr": 1.7572128500025048e-06, "epoch": 1.519556348178731, "percentage": 30.39, "elapsed_time": "1:20:45", "remaining_time": "3:04:57", "throughput": 8653.97, "total_tokens": 41929760} +{"current_steps": 62205, "total_steps": 204665, "loss": 0.1229, "lr": 1.7571571466329454e-06, "epoch": 1.5196784990105783, "percentage": 30.39, "elapsed_time": "1:20:45", "remaining_time": "3:04:56", "throughput": 8654.02, "total_tokens": 41932960} +{"current_steps": 62210, "total_steps": 204665, "loss": 0.1609, "lr": 1.7571014377571358e-06, "epoch": 1.5198006498424255, "percentage": 30.4, "elapsed_time": "1:20:45", "remaining_time": "3:04:56", "throughput": 8654.11, "total_tokens": 41936416} +{"current_steps": 62215, "total_steps": 204665, "loss": 0.0593, "lr": 1.757045723375481e-06, "epoch": 1.5199228006742724, "percentage": 30.4, "elapsed_time": "1:20:46", "remaining_time": "3:04:56", "throughput": 8654.13, "total_tokens": 41939488} +{"current_steps": 62220, "total_steps": 204665, "loss": 0.1467, "lr": 1.7569900034883856e-06, "epoch": 1.5200449515061196, "percentage": 30.4, "elapsed_time": "1:20:46", "remaining_time": "3:04:55", "throughput": 8654.2, "total_tokens": 41942816} +{"current_steps": 62225, "total_steps": 204665, "loss": 0.0073, "lr": 1.7569342780962555e-06, "epoch": 1.5201671023379668, "percentage": 30.4, "elapsed_time": "1:20:46", "remaining_time": "3:04:55", "throughput": 8654.33, "total_tokens": 41946528} +{"current_steps": 62230, "total_steps": 204665, "loss": 0.0223, "lr": 1.756878547199496e-06, "epoch": 1.520289253169814, "percentage": 30.41, "elapsed_time": "1:20:47", "remaining_time": "3:04:54", "throughput": 8654.54, "total_tokens": 41950688} +{"current_steps": 62235, "total_steps": 204665, "loss": 0.1032, "lr": 1.756822810798512e-06, "epoch": 1.5204114040016612, "percentage": 30.41, "elapsed_time": "1:20:47", "remaining_time": "3:04:54", "throughput": 8654.6, "total_tokens": 41953952} +{"current_steps": 62240, "total_steps": 204665, "loss": 0.0415, "lr": 1.756767068893709e-06, "epoch": 1.5205335548335084, "percentage": 30.41, "elapsed_time": "1:20:47", "remaining_time": "3:04:53", "throughput": 8654.65, "total_tokens": 41957216} +{"current_steps": 62245, "total_steps": 204665, "loss": 0.086, "lr": 1.7567113214854921e-06, "epoch": 1.5206557056653556, "percentage": 30.41, "elapsed_time": "1:20:48", "remaining_time": "3:04:53", "throughput": 8654.62, "total_tokens": 41959904} +{"current_steps": 62250, "total_steps": 204665, "loss": 0.032, "lr": 1.756655568574267e-06, "epoch": 1.5207778564972028, "percentage": 30.42, "elapsed_time": "1:20:48", "remaining_time": "3:04:52", "throughput": 8654.74, "total_tokens": 41963488} +{"current_steps": 62255, "total_steps": 204665, "loss": 0.0492, "lr": 1.756599810160439e-06, "epoch": 1.52090000732905, "percentage": 30.42, "elapsed_time": "1:20:48", "remaining_time": "3:04:52", "throughput": 8654.72, "total_tokens": 41966304} +{"current_steps": 62260, "total_steps": 204665, "loss": 0.159, "lr": 1.756544046244414e-06, "epoch": 1.5210221581608971, "percentage": 30.42, "elapsed_time": "1:20:49", "remaining_time": "3:04:51", "throughput": 8654.83, "total_tokens": 41969888} +{"current_steps": 62265, "total_steps": 204665, "loss": 0.0011, "lr": 1.756488276826597e-06, "epoch": 1.5211443089927443, "percentage": 30.42, "elapsed_time": "1:20:49", "remaining_time": "3:04:51", "throughput": 8654.84, "total_tokens": 41972832} +{"current_steps": 62270, "total_steps": 204665, "loss": 0.0602, "lr": 1.756432501907394e-06, "epoch": 1.5212664598245915, "percentage": 30.43, "elapsed_time": "1:20:49", "remaining_time": "3:04:50", "throughput": 8654.86, "total_tokens": 41975840} +{"current_steps": 62275, "total_steps": 204665, "loss": 0.0315, "lr": 1.7563767214872104e-06, "epoch": 1.5213886106564387, "percentage": 30.43, "elapsed_time": "1:20:50", "remaining_time": "3:04:50", "throughput": 8654.93, "total_tokens": 41979168} +{"current_steps": 62280, "total_steps": 204665, "loss": 0.228, "lr": 1.7563209355664514e-06, "epoch": 1.521510761488286, "percentage": 30.43, "elapsed_time": "1:20:50", "remaining_time": "3:04:49", "throughput": 8654.96, "total_tokens": 41982304} +{"current_steps": 62285, "total_steps": 204665, "loss": 0.0013, "lr": 1.7562651441455237e-06, "epoch": 1.5216329123201329, "percentage": 30.43, "elapsed_time": "1:20:51", "remaining_time": "3:04:49", "throughput": 8655.08, "total_tokens": 41985888} +{"current_steps": 62290, "total_steps": 204665, "loss": 0.174, "lr": 1.7562093472248321e-06, "epoch": 1.52175506315198, "percentage": 30.44, "elapsed_time": "1:20:51", "remaining_time": "3:04:48", "throughput": 8655.13, "total_tokens": 41989152} +{"current_steps": 62295, "total_steps": 204665, "loss": 0.0824, "lr": 1.7561535448047828e-06, "epoch": 1.5218772139838272, "percentage": 30.44, "elapsed_time": "1:20:51", "remaining_time": "3:04:48", "throughput": 8655.16, "total_tokens": 41992224} +{"current_steps": 62300, "total_steps": 204665, "loss": 0.0686, "lr": 1.7560977368857814e-06, "epoch": 1.5219993648156744, "percentage": 30.44, "elapsed_time": "1:20:52", "remaining_time": "3:04:47", "throughput": 8655.21, "total_tokens": 41995488} +{"current_steps": 62305, "total_steps": 204665, "loss": 0.1924, "lr": 1.756041923468234e-06, "epoch": 1.5221215156475214, "percentage": 30.44, "elapsed_time": "1:20:52", "remaining_time": "3:04:47", "throughput": 8655.31, "total_tokens": 41999008} +{"current_steps": 62310, "total_steps": 204665, "loss": 0.0175, "lr": 1.7559861045525467e-06, "epoch": 1.5222436664793686, "percentage": 30.44, "elapsed_time": "1:20:52", "remaining_time": "3:04:46", "throughput": 8655.42, "total_tokens": 42002592} +{"current_steps": 62315, "total_steps": 204665, "loss": 0.0774, "lr": 1.7559302801391247e-06, "epoch": 1.5223658173112158, "percentage": 30.45, "elapsed_time": "1:20:53", "remaining_time": "3:04:46", "throughput": 8655.53, "total_tokens": 42006176} +{"current_steps": 62320, "total_steps": 204665, "loss": 0.039, "lr": 1.7558744502283745e-06, "epoch": 1.522487968143063, "percentage": 30.45, "elapsed_time": "1:20:53", "remaining_time": "3:04:45", "throughput": 8655.64, "total_tokens": 42009760} +{"current_steps": 62325, "total_steps": 204665, "loss": 0.079, "lr": 1.7558186148207018e-06, "epoch": 1.5226101189749102, "percentage": 30.45, "elapsed_time": "1:20:53", "remaining_time": "3:04:45", "throughput": 8655.7, "total_tokens": 42013088} +{"current_steps": 62330, "total_steps": 204665, "loss": 0.0347, "lr": 1.7557627739165133e-06, "epoch": 1.5227322698067574, "percentage": 30.45, "elapsed_time": "1:20:54", "remaining_time": "3:04:44", "throughput": 8655.84, "total_tokens": 42016864} +{"current_steps": 62335, "total_steps": 204665, "loss": 0.119, "lr": 1.7557069275162145e-06, "epoch": 1.5228544206386045, "percentage": 30.46, "elapsed_time": "1:20:54", "remaining_time": "3:04:44", "throughput": 8655.91, "total_tokens": 42020192} +{"current_steps": 62340, "total_steps": 204665, "loss": 0.038, "lr": 1.7556510756202114e-06, "epoch": 1.5229765714704517, "percentage": 30.46, "elapsed_time": "1:20:54", "remaining_time": "3:04:43", "throughput": 8656.0, "total_tokens": 42023712} +{"current_steps": 62345, "total_steps": 204665, "loss": 0.0948, "lr": 1.7555952182289104e-06, "epoch": 1.523098722302299, "percentage": 30.46, "elapsed_time": "1:20:55", "remaining_time": "3:04:43", "throughput": 8656.03, "total_tokens": 42026784} +{"current_steps": 62350, "total_steps": 204665, "loss": 0.1393, "lr": 1.755539355342718e-06, "epoch": 1.5232208731341461, "percentage": 30.46, "elapsed_time": "1:20:55", "remaining_time": "3:04:42", "throughput": 8656.04, "total_tokens": 42029792} +{"current_steps": 62355, "total_steps": 204665, "loss": 0.0318, "lr": 1.75548348696204e-06, "epoch": 1.5233430239659933, "percentage": 30.47, "elapsed_time": "1:20:55", "remaining_time": "3:04:42", "throughput": 8656.05, "total_tokens": 42032800} +{"current_steps": 62360, "total_steps": 204665, "loss": 0.1744, "lr": 1.7554276130872832e-06, "epoch": 1.5234651747978405, "percentage": 30.47, "elapsed_time": "1:20:56", "remaining_time": "3:04:41", "throughput": 8656.1, "total_tokens": 42036000} +{"current_steps": 62365, "total_steps": 204665, "loss": 0.1032, "lr": 1.7553717337188534e-06, "epoch": 1.5235873256296877, "percentage": 30.47, "elapsed_time": "1:20:56", "remaining_time": "3:04:41", "throughput": 8656.29, "total_tokens": 42040032} +{"current_steps": 62370, "total_steps": 204665, "loss": 0.0543, "lr": 1.7553158488571572e-06, "epoch": 1.5237094764615347, "percentage": 30.47, "elapsed_time": "1:20:56", "remaining_time": "3:04:40", "throughput": 8656.35, "total_tokens": 42043296} +{"current_steps": 62375, "total_steps": 204665, "loss": 0.1868, "lr": 1.755259958502601e-06, "epoch": 1.5238316272933818, "percentage": 30.48, "elapsed_time": "1:20:57", "remaining_time": "3:04:40", "throughput": 8656.46, "total_tokens": 42046880} +{"current_steps": 62380, "total_steps": 204665, "loss": 0.0293, "lr": 1.755204062655591e-06, "epoch": 1.523953778125229, "percentage": 30.48, "elapsed_time": "1:20:57", "remaining_time": "3:04:39", "throughput": 8656.5, "total_tokens": 42050080} +{"current_steps": 62385, "total_steps": 204665, "loss": 0.0824, "lr": 1.7551481613165341e-06, "epoch": 1.5240759289570762, "percentage": 30.48, "elapsed_time": "1:20:57", "remaining_time": "3:04:39", "throughput": 8656.6, "total_tokens": 42053600} +{"current_steps": 62390, "total_steps": 204665, "loss": 0.0032, "lr": 1.755092254485837e-06, "epoch": 1.5241980797889234, "percentage": 30.48, "elapsed_time": "1:20:58", "remaining_time": "3:04:38", "throughput": 8656.64, "total_tokens": 42056736} +{"current_steps": 62395, "total_steps": 204665, "loss": 0.1097, "lr": 1.7550363421639056e-06, "epoch": 1.5243202306207704, "percentage": 30.49, "elapsed_time": "1:20:58", "remaining_time": "3:04:38", "throughput": 8656.83, "total_tokens": 42060768} +{"current_steps": 62400, "total_steps": 204665, "loss": 0.1254, "lr": 1.7549804243511469e-06, "epoch": 1.5244423814526176, "percentage": 30.49, "elapsed_time": "1:20:59", "remaining_time": "3:04:38", "throughput": 8656.93, "total_tokens": 42064288} +{"current_steps": 62405, "total_steps": 204665, "loss": 0.1479, "lr": 1.7549245010479674e-06, "epoch": 1.5245645322844648, "percentage": 30.49, "elapsed_time": "1:20:59", "remaining_time": "3:04:37", "throughput": 8656.99, "total_tokens": 42067616} +{"current_steps": 62410, "total_steps": 204665, "loss": 0.0592, "lr": 1.7548685722547738e-06, "epoch": 1.524686683116312, "percentage": 30.49, "elapsed_time": "1:20:59", "remaining_time": "3:04:37", "throughput": 8657.09, "total_tokens": 42071136} +{"current_steps": 62415, "total_steps": 204665, "loss": 0.0017, "lr": 1.7548126379719732e-06, "epoch": 1.5248088339481591, "percentage": 30.5, "elapsed_time": "1:21:00", "remaining_time": "3:04:36", "throughput": 8657.16, "total_tokens": 42074464} +{"current_steps": 62420, "total_steps": 204665, "loss": 0.1663, "lr": 1.754756698199972e-06, "epoch": 1.5249309847800063, "percentage": 30.5, "elapsed_time": "1:21:00", "remaining_time": "3:04:36", "throughput": 8657.23, "total_tokens": 42077792} +{"current_steps": 62425, "total_steps": 204665, "loss": 0.1711, "lr": 1.7547007529391769e-06, "epoch": 1.5250531356118535, "percentage": 30.5, "elapsed_time": "1:21:00", "remaining_time": "3:04:35", "throughput": 8657.28, "total_tokens": 42080992} +{"current_steps": 62430, "total_steps": 204665, "loss": 0.0016, "lr": 1.7546448021899952e-06, "epoch": 1.5251752864437007, "percentage": 30.5, "elapsed_time": "1:21:01", "remaining_time": "3:04:35", "throughput": 8657.33, "total_tokens": 42084256} +{"current_steps": 62435, "total_steps": 204665, "loss": 0.0055, "lr": 1.754588845952833e-06, "epoch": 1.525297437275548, "percentage": 30.51, "elapsed_time": "1:21:01", "remaining_time": "3:04:34", "throughput": 8657.37, "total_tokens": 42087456} +{"current_steps": 62440, "total_steps": 204665, "loss": 0.0075, "lr": 1.7545328842280985e-06, "epoch": 1.525419588107395, "percentage": 30.51, "elapsed_time": "1:21:01", "remaining_time": "3:04:34", "throughput": 8657.4, "total_tokens": 42090528} +{"current_steps": 62445, "total_steps": 204665, "loss": 0.0679, "lr": 1.7544769170161973e-06, "epoch": 1.5255417389392423, "percentage": 30.51, "elapsed_time": "1:21:02", "remaining_time": "3:04:33", "throughput": 8657.53, "total_tokens": 42094240} +{"current_steps": 62450, "total_steps": 204665, "loss": 0.0625, "lr": 1.7544209443175372e-06, "epoch": 1.5256638897710895, "percentage": 30.51, "elapsed_time": "1:21:02", "remaining_time": "3:04:33", "throughput": 8657.6, "total_tokens": 42097568} +{"current_steps": 62455, "total_steps": 204665, "loss": 0.1018, "lr": 1.7543649661325254e-06, "epoch": 1.5257860406029367, "percentage": 30.52, "elapsed_time": "1:21:02", "remaining_time": "3:04:32", "throughput": 8657.65, "total_tokens": 42100832} +{"current_steps": 62460, "total_steps": 204665, "loss": 0.0524, "lr": 1.7543089824615682e-06, "epoch": 1.5259081914347836, "percentage": 30.52, "elapsed_time": "1:21:03", "remaining_time": "3:04:32", "throughput": 8657.7, "total_tokens": 42104032} +{"current_steps": 62465, "total_steps": 204665, "loss": 0.0736, "lr": 1.7542529933050735e-06, "epoch": 1.5260303422666308, "percentage": 30.52, "elapsed_time": "1:21:03", "remaining_time": "3:04:31", "throughput": 8657.83, "total_tokens": 42107744} +{"current_steps": 62470, "total_steps": 204665, "loss": 0.0697, "lr": 1.754196998663448e-06, "epoch": 1.526152493098478, "percentage": 30.52, "elapsed_time": "1:21:03", "remaining_time": "3:04:31", "throughput": 8657.94, "total_tokens": 42111392} +{"current_steps": 62475, "total_steps": 204665, "loss": 0.0861, "lr": 1.7541409985370993e-06, "epoch": 1.5262746439303252, "percentage": 30.53, "elapsed_time": "1:21:04", "remaining_time": "3:04:30", "throughput": 8657.98, "total_tokens": 42114528} +{"current_steps": 62480, "total_steps": 204665, "loss": 0.2115, "lr": 1.754084992926434e-06, "epoch": 1.5263967947621724, "percentage": 30.53, "elapsed_time": "1:21:04", "remaining_time": "3:04:30", "throughput": 8658.06, "total_tokens": 42117920} +{"current_steps": 62485, "total_steps": 204665, "loss": 0.0692, "lr": 1.75402898183186e-06, "epoch": 1.5265189455940193, "percentage": 30.53, "elapsed_time": "1:21:04", "remaining_time": "3:04:29", "throughput": 8658.15, "total_tokens": 42121376} +{"current_steps": 62490, "total_steps": 204665, "loss": 0.1354, "lr": 1.7539729652537848e-06, "epoch": 1.5266410964258665, "percentage": 30.53, "elapsed_time": "1:21:05", "remaining_time": "3:04:29", "throughput": 8658.19, "total_tokens": 42124512} +{"current_steps": 62495, "total_steps": 204665, "loss": 0.063, "lr": 1.753916943192615e-06, "epoch": 1.5267632472577137, "percentage": 30.54, "elapsed_time": "1:21:05", "remaining_time": "3:04:28", "throughput": 8658.26, "total_tokens": 42127840} +{"current_steps": 62500, "total_steps": 204665, "loss": 0.0009, "lr": 1.7538609156487585e-06, "epoch": 1.526885398089561, "percentage": 30.54, "elapsed_time": "1:21:05", "remaining_time": "3:04:28", "throughput": 8658.36, "total_tokens": 42131360} +{"current_steps": 62505, "total_steps": 204665, "loss": 0.1256, "lr": 1.7538048826226225e-06, "epoch": 1.527007548921408, "percentage": 30.54, "elapsed_time": "1:21:06", "remaining_time": "3:04:27", "throughput": 8658.45, "total_tokens": 42134816} +{"current_steps": 62510, "total_steps": 204665, "loss": 0.0323, "lr": 1.753748844114615e-06, "epoch": 1.5271296997532553, "percentage": 30.54, "elapsed_time": "1:21:06", "remaining_time": "3:04:27", "throughput": 8658.47, "total_tokens": 42137824} +{"current_steps": 62515, "total_steps": 204665, "loss": 0.1573, "lr": 1.753692800125143e-06, "epoch": 1.5272518505851025, "percentage": 30.55, "elapsed_time": "1:21:07", "remaining_time": "3:04:26", "throughput": 8658.55, "total_tokens": 42141280} +{"current_steps": 62520, "total_steps": 204665, "loss": 0.0461, "lr": 1.753636750654614e-06, "epoch": 1.5273740014169497, "percentage": 30.55, "elapsed_time": "1:21:07", "remaining_time": "3:04:26", "throughput": 8658.65, "total_tokens": 42144736} +{"current_steps": 62525, "total_steps": 204665, "loss": 0.0953, "lr": 1.7535806957034365e-06, "epoch": 1.5274961522487969, "percentage": 30.55, "elapsed_time": "1:21:07", "remaining_time": "3:04:25", "throughput": 8658.77, "total_tokens": 42148384} +{"current_steps": 62530, "total_steps": 204665, "loss": 0.0707, "lr": 1.7535246352720167e-06, "epoch": 1.527618303080644, "percentage": 30.55, "elapsed_time": "1:21:08", "remaining_time": "3:04:25", "throughput": 8658.86, "total_tokens": 42151904} +{"current_steps": 62535, "total_steps": 204665, "loss": 0.0573, "lr": 1.7534685693607637e-06, "epoch": 1.5277404539124912, "percentage": 30.55, "elapsed_time": "1:21:08", "remaining_time": "3:04:24", "throughput": 8658.95, "total_tokens": 42155360} +{"current_steps": 62540, "total_steps": 204665, "loss": 0.1038, "lr": 1.753412497970084e-06, "epoch": 1.5278626047443384, "percentage": 30.56, "elapsed_time": "1:21:08", "remaining_time": "3:04:24", "throughput": 8658.99, "total_tokens": 42158496} +{"current_steps": 62545, "total_steps": 204665, "loss": 0.1688, "lr": 1.7533564211003865e-06, "epoch": 1.5279847555761856, "percentage": 30.56, "elapsed_time": "1:21:09", "remaining_time": "3:04:23", "throughput": 8659.07, "total_tokens": 42161888} +{"current_steps": 62550, "total_steps": 204665, "loss": 0.1191, "lr": 1.7533003387520784e-06, "epoch": 1.5281069064080326, "percentage": 30.56, "elapsed_time": "1:21:09", "remaining_time": "3:04:23", "throughput": 8659.2, "total_tokens": 42165600} +{"current_steps": 62555, "total_steps": 204665, "loss": 0.0018, "lr": 1.7532442509255673e-06, "epoch": 1.5282290572398798, "percentage": 30.56, "elapsed_time": "1:21:09", "remaining_time": "3:04:23", "throughput": 8659.27, "total_tokens": 42168928} +{"current_steps": 62560, "total_steps": 204665, "loss": 0.0421, "lr": 1.753188157621262e-06, "epoch": 1.528351208071727, "percentage": 30.57, "elapsed_time": "1:21:10", "remaining_time": "3:04:22", "throughput": 8659.33, "total_tokens": 42172192} +{"current_steps": 62565, "total_steps": 204665, "loss": 0.1245, "lr": 1.7531320588395693e-06, "epoch": 1.5284733589035742, "percentage": 30.57, "elapsed_time": "1:21:10", "remaining_time": "3:04:22", "throughput": 8659.71, "total_tokens": 42177504} +{"current_steps": 62570, "total_steps": 204665, "loss": 0.0582, "lr": 1.7530759545808977e-06, "epoch": 1.5285955097354211, "percentage": 30.57, "elapsed_time": "1:21:10", "remaining_time": "3:04:21", "throughput": 8659.74, "total_tokens": 42180640} +{"current_steps": 62575, "total_steps": 204665, "loss": 0.0592, "lr": 1.7530198448456556e-06, "epoch": 1.5287176605672683, "percentage": 30.57, "elapsed_time": "1:21:11", "remaining_time": "3:04:21", "throughput": 8659.81, "total_tokens": 42183968} +{"current_steps": 62580, "total_steps": 204665, "loss": 0.001, "lr": 1.7529637296342502e-06, "epoch": 1.5288398113991155, "percentage": 30.58, "elapsed_time": "1:21:11", "remaining_time": "3:04:20", "throughput": 8659.95, "total_tokens": 42187744} +{"current_steps": 62585, "total_steps": 204665, "loss": 0.0325, "lr": 1.7529076089470905e-06, "epoch": 1.5289619622309627, "percentage": 30.58, "elapsed_time": "1:21:11", "remaining_time": "3:04:20", "throughput": 8660.06, "total_tokens": 42191264} +{"current_steps": 62590, "total_steps": 204665, "loss": 0.0993, "lr": 1.752851482784584e-06, "epoch": 1.52908411306281, "percentage": 30.58, "elapsed_time": "1:21:12", "remaining_time": "3:04:19", "throughput": 8660.19, "total_tokens": 42195040} +{"current_steps": 62595, "total_steps": 204665, "loss": 0.0747, "lr": 1.7527953511471387e-06, "epoch": 1.529206263894657, "percentage": 30.58, "elapsed_time": "1:21:12", "remaining_time": "3:04:19", "throughput": 8660.27, "total_tokens": 42198368} +{"current_steps": 62600, "total_steps": 204665, "loss": 0.0597, "lr": 1.7527392140351634e-06, "epoch": 1.5293284147265043, "percentage": 30.59, "elapsed_time": "1:21:12", "remaining_time": "3:04:18", "throughput": 8660.33, "total_tokens": 42201696} +{"current_steps": 62605, "total_steps": 204665, "loss": 0.0322, "lr": 1.7526830714490662e-06, "epoch": 1.5294505655583515, "percentage": 30.59, "elapsed_time": "1:21:13", "remaining_time": "3:04:18", "throughput": 8660.39, "total_tokens": 42204960} +{"current_steps": 62610, "total_steps": 204665, "loss": 0.1846, "lr": 1.752626923389255e-06, "epoch": 1.5295727163901987, "percentage": 30.59, "elapsed_time": "1:21:13", "remaining_time": "3:04:17", "throughput": 8660.45, "total_tokens": 42208288} +{"current_steps": 62615, "total_steps": 204665, "loss": 0.0035, "lr": 1.7525707698561382e-06, "epoch": 1.5296948672220458, "percentage": 30.59, "elapsed_time": "1:21:14", "remaining_time": "3:04:17", "throughput": 8660.57, "total_tokens": 42211936} +{"current_steps": 62620, "total_steps": 204665, "loss": 0.0012, "lr": 1.7525146108501248e-06, "epoch": 1.529817018053893, "percentage": 30.6, "elapsed_time": "1:21:14", "remaining_time": "3:04:16", "throughput": 8660.64, "total_tokens": 42215328} +{"current_steps": 62625, "total_steps": 204665, "loss": 0.0523, "lr": 1.7524584463716226e-06, "epoch": 1.5299391688857402, "percentage": 30.6, "elapsed_time": "1:21:14", "remaining_time": "3:04:16", "throughput": 8660.69, "total_tokens": 42218528} +{"current_steps": 62630, "total_steps": 204665, "loss": 0.1023, "lr": 1.7524022764210401e-06, "epoch": 1.5300613197175874, "percentage": 30.6, "elapsed_time": "1:21:15", "remaining_time": "3:04:15", "throughput": 8660.71, "total_tokens": 42221536} +{"current_steps": 62635, "total_steps": 204665, "loss": 0.0021, "lr": 1.7523461009987862e-06, "epoch": 1.5301834705494346, "percentage": 30.6, "elapsed_time": "1:21:15", "remaining_time": "3:04:15", "throughput": 8660.82, "total_tokens": 42225120} +{"current_steps": 62640, "total_steps": 204665, "loss": 0.2342, "lr": 1.7522899201052686e-06, "epoch": 1.5303056213812816, "percentage": 30.61, "elapsed_time": "1:21:15", "remaining_time": "3:04:14", "throughput": 8660.93, "total_tokens": 42228704} +{"current_steps": 62645, "total_steps": 204665, "loss": 0.1148, "lr": 1.7522337337408968e-06, "epoch": 1.5304277722131288, "percentage": 30.61, "elapsed_time": "1:21:16", "remaining_time": "3:04:14", "throughput": 8660.97, "total_tokens": 42231904} +{"current_steps": 62650, "total_steps": 204665, "loss": 0.0011, "lr": 1.7521775419060786e-06, "epoch": 1.530549923044976, "percentage": 30.61, "elapsed_time": "1:21:16", "remaining_time": "3:04:13", "throughput": 8661.01, "total_tokens": 42235040} +{"current_steps": 62655, "total_steps": 204665, "loss": 0.0391, "lr": 1.7521213446012232e-06, "epoch": 1.5306720738768231, "percentage": 30.61, "elapsed_time": "1:21:16", "remaining_time": "3:04:13", "throughput": 8661.12, "total_tokens": 42238624} +{"current_steps": 62660, "total_steps": 204665, "loss": 0.0433, "lr": 1.752065141826739e-06, "epoch": 1.53079422470867, "percentage": 30.62, "elapsed_time": "1:21:17", "remaining_time": "3:04:12", "throughput": 8661.2, "total_tokens": 42242016} +{"current_steps": 62665, "total_steps": 204665, "loss": 0.1549, "lr": 1.7520089335830348e-06, "epoch": 1.5309163755405173, "percentage": 30.62, "elapsed_time": "1:21:17", "remaining_time": "3:04:12", "throughput": 8661.38, "total_tokens": 42245984} +{"current_steps": 62670, "total_steps": 204665, "loss": 0.0822, "lr": 1.7519527198705193e-06, "epoch": 1.5310385263723645, "percentage": 30.62, "elapsed_time": "1:21:17", "remaining_time": "3:04:12", "throughput": 8661.44, "total_tokens": 42249312} +{"current_steps": 62675, "total_steps": 204665, "loss": 0.0554, "lr": 1.7518965006896016e-06, "epoch": 1.5311606772042117, "percentage": 30.62, "elapsed_time": "1:21:18", "remaining_time": "3:04:11", "throughput": 8661.47, "total_tokens": 42252384} +{"current_steps": 62680, "total_steps": 204665, "loss": 0.1014, "lr": 1.7518402760406903e-06, "epoch": 1.5312828280360589, "percentage": 30.63, "elapsed_time": "1:21:18", "remaining_time": "3:04:11", "throughput": 8661.54, "total_tokens": 42255712} +{"current_steps": 62685, "total_steps": 204665, "loss": 0.0966, "lr": 1.7517840459241944e-06, "epoch": 1.531404978867906, "percentage": 30.63, "elapsed_time": "1:21:18", "remaining_time": "3:04:10", "throughput": 8661.59, "total_tokens": 42258912} +{"current_steps": 62690, "total_steps": 204665, "loss": 0.0772, "lr": 1.7517278103405225e-06, "epoch": 1.5315271296997532, "percentage": 30.63, "elapsed_time": "1:21:19", "remaining_time": "3:04:10", "throughput": 8661.62, "total_tokens": 42262048} +{"current_steps": 62695, "total_steps": 204665, "loss": 0.1186, "lr": 1.7516715692900834e-06, "epoch": 1.5316492805316004, "percentage": 30.63, "elapsed_time": "1:21:19", "remaining_time": "3:04:09", "throughput": 8661.73, "total_tokens": 42265632} +{"current_steps": 62700, "total_steps": 204665, "loss": 0.0012, "lr": 1.751615322773287e-06, "epoch": 1.5317714313634476, "percentage": 30.64, "elapsed_time": "1:21:19", "remaining_time": "3:04:09", "throughput": 8661.78, "total_tokens": 42268832} +{"current_steps": 62705, "total_steps": 204665, "loss": 0.0519, "lr": 1.7515590707905416e-06, "epoch": 1.5318935821952948, "percentage": 30.64, "elapsed_time": "1:21:20", "remaining_time": "3:04:08", "throughput": 8661.82, "total_tokens": 42271968} +{"current_steps": 62710, "total_steps": 204665, "loss": 0.0301, "lr": 1.7515028133422566e-06, "epoch": 1.532015733027142, "percentage": 30.64, "elapsed_time": "1:21:20", "remaining_time": "3:04:08", "throughput": 8661.89, "total_tokens": 42275360} +{"current_steps": 62715, "total_steps": 204665, "loss": 0.0958, "lr": 1.751446550428841e-06, "epoch": 1.5321378838589892, "percentage": 30.64, "elapsed_time": "1:21:20", "remaining_time": "3:04:07", "throughput": 8661.94, "total_tokens": 42278560} +{"current_steps": 62720, "total_steps": 204665, "loss": 0.0023, "lr": 1.7513902820507038e-06, "epoch": 1.5322600346908364, "percentage": 30.65, "elapsed_time": "1:21:21", "remaining_time": "3:04:07", "throughput": 8662.07, "total_tokens": 42282272} +{"current_steps": 62725, "total_steps": 204665, "loss": 0.1115, "lr": 1.7513340082082547e-06, "epoch": 1.5323821855226836, "percentage": 30.65, "elapsed_time": "1:21:21", "remaining_time": "3:04:06", "throughput": 8662.22, "total_tokens": 42286112} +{"current_steps": 62730, "total_steps": 204665, "loss": 0.0755, "lr": 1.7512777289019022e-06, "epoch": 1.5325043363545305, "percentage": 30.65, "elapsed_time": "1:21:22", "remaining_time": "3:04:06", "throughput": 8662.28, "total_tokens": 42289376} +{"current_steps": 62735, "total_steps": 204665, "loss": 0.0407, "lr": 1.7512214441320564e-06, "epoch": 1.5326264871863777, "percentage": 30.65, "elapsed_time": "1:21:22", "remaining_time": "3:04:05", "throughput": 8662.36, "total_tokens": 42292768} +{"current_steps": 62740, "total_steps": 204665, "loss": 0.0592, "lr": 1.751165153899126e-06, "epoch": 1.532748638018225, "percentage": 30.65, "elapsed_time": "1:21:22", "remaining_time": "3:04:05", "throughput": 8662.37, "total_tokens": 42295776} +{"current_steps": 62745, "total_steps": 204665, "loss": 0.0503, "lr": 1.7511088582035204e-06, "epoch": 1.532870788850072, "percentage": 30.66, "elapsed_time": "1:21:23", "remaining_time": "3:04:04", "throughput": 8662.39, "total_tokens": 42298784} +{"current_steps": 62750, "total_steps": 204665, "loss": 0.0556, "lr": 1.7510525570456496e-06, "epoch": 1.532992939681919, "percentage": 30.66, "elapsed_time": "1:21:23", "remaining_time": "3:04:04", "throughput": 8662.51, "total_tokens": 42302432} +{"current_steps": 62755, "total_steps": 204665, "loss": 0.0758, "lr": 1.7509962504259223e-06, "epoch": 1.5331150905137663, "percentage": 30.66, "elapsed_time": "1:21:23", "remaining_time": "3:04:03", "throughput": 8662.6, "total_tokens": 42305952} +{"current_steps": 62760, "total_steps": 204665, "loss": 0.1926, "lr": 1.7509399383447482e-06, "epoch": 1.5332372413456135, "percentage": 30.66, "elapsed_time": "1:21:24", "remaining_time": "3:04:03", "throughput": 8662.62, "total_tokens": 42308960} +{"current_steps": 62765, "total_steps": 204665, "loss": 0.1107, "lr": 1.7508836208025367e-06, "epoch": 1.5333593921774606, "percentage": 30.67, "elapsed_time": "1:21:24", "remaining_time": "3:04:02", "throughput": 8662.73, "total_tokens": 42312544} +{"current_steps": 62770, "total_steps": 204665, "loss": 0.0015, "lr": 1.750827297799698e-06, "epoch": 1.5334815430093078, "percentage": 30.67, "elapsed_time": "1:21:24", "remaining_time": "3:04:02", "throughput": 8662.83, "total_tokens": 42316064} +{"current_steps": 62775, "total_steps": 204665, "loss": 0.0993, "lr": 1.7507709693366412e-06, "epoch": 1.533603693841155, "percentage": 30.67, "elapsed_time": "1:21:25", "remaining_time": "3:04:01", "throughput": 8662.88, "total_tokens": 42319328} +{"current_steps": 62780, "total_steps": 204665, "loss": 0.0441, "lr": 1.7507146354137759e-06, "epoch": 1.5337258446730022, "percentage": 30.67, "elapsed_time": "1:21:25", "remaining_time": "3:04:01", "throughput": 8662.95, "total_tokens": 42322656} +{"current_steps": 62785, "total_steps": 204665, "loss": 0.1276, "lr": 1.7506582960315117e-06, "epoch": 1.5338479955048494, "percentage": 30.68, "elapsed_time": "1:21:25", "remaining_time": "3:04:00", "throughput": 8663.07, "total_tokens": 42326304} +{"current_steps": 62790, "total_steps": 204665, "loss": 0.1918, "lr": 1.7506019511902586e-06, "epoch": 1.5339701463366966, "percentage": 30.68, "elapsed_time": "1:21:26", "remaining_time": "3:04:00", "throughput": 8663.11, "total_tokens": 42329504} +{"current_steps": 62795, "total_steps": 204665, "loss": 0.1111, "lr": 1.750545600890426e-06, "epoch": 1.5340922971685438, "percentage": 30.68, "elapsed_time": "1:21:26", "remaining_time": "3:03:59", "throughput": 8663.2, "total_tokens": 42332960} +{"current_steps": 62800, "total_steps": 204665, "loss": 0.1264, "lr": 1.7504892451324241e-06, "epoch": 1.534214448000391, "percentage": 30.68, "elapsed_time": "1:21:26", "remaining_time": "3:03:59", "throughput": 8663.28, "total_tokens": 42336352} +{"current_steps": 62805, "total_steps": 204665, "loss": 0.0363, "lr": 1.7504328839166628e-06, "epoch": 1.5343365988322382, "percentage": 30.69, "elapsed_time": "1:21:27", "remaining_time": "3:03:58", "throughput": 8663.29, "total_tokens": 42339296} +{"current_steps": 62810, "total_steps": 204665, "loss": 0.1388, "lr": 1.7503765172435515e-06, "epoch": 1.5344587496640854, "percentage": 30.69, "elapsed_time": "1:21:27", "remaining_time": "3:03:58", "throughput": 8663.3, "total_tokens": 42342304} +{"current_steps": 62815, "total_steps": 204665, "loss": 0.0102, "lr": 1.7503201451135002e-06, "epoch": 1.5345809004959325, "percentage": 30.69, "elapsed_time": "1:21:27", "remaining_time": "3:03:57", "throughput": 8663.44, "total_tokens": 42346016} +{"current_steps": 62820, "total_steps": 204665, "loss": 0.1099, "lr": 1.7502637675269192e-06, "epoch": 1.5347030513277795, "percentage": 30.69, "elapsed_time": "1:21:28", "remaining_time": "3:03:57", "throughput": 8663.5, "total_tokens": 42349344} +{"current_steps": 62825, "total_steps": 204665, "loss": 0.0517, "lr": 1.7502073844842183e-06, "epoch": 1.5348252021596267, "percentage": 30.7, "elapsed_time": "1:21:28", "remaining_time": "3:03:56", "throughput": 8663.54, "total_tokens": 42352480} +{"current_steps": 62830, "total_steps": 204665, "loss": 0.0806, "lr": 1.7501509959858074e-06, "epoch": 1.534947352991474, "percentage": 30.7, "elapsed_time": "1:21:28", "remaining_time": "3:03:56", "throughput": 8663.59, "total_tokens": 42355680} +{"current_steps": 62835, "total_steps": 204665, "loss": 0.112, "lr": 1.7500946020320967e-06, "epoch": 1.535069503823321, "percentage": 30.7, "elapsed_time": "1:21:29", "remaining_time": "3:03:55", "throughput": 8663.61, "total_tokens": 42358752} +{"current_steps": 62840, "total_steps": 204665, "loss": 0.2533, "lr": 1.7500382026234964e-06, "epoch": 1.535191654655168, "percentage": 30.7, "elapsed_time": "1:21:29", "remaining_time": "3:03:55", "throughput": 8663.72, "total_tokens": 42362336} +{"current_steps": 62845, "total_steps": 204665, "loss": 0.0778, "lr": 1.7499817977604163e-06, "epoch": 1.5353138054870152, "percentage": 30.71, "elapsed_time": "1:21:29", "remaining_time": "3:03:55", "throughput": 8663.8, "total_tokens": 42365728} +{"current_steps": 62850, "total_steps": 204665, "loss": 0.0658, "lr": 1.7499253874432672e-06, "epoch": 1.5354359563188624, "percentage": 30.71, "elapsed_time": "1:21:30", "remaining_time": "3:03:54", "throughput": 8664.0, "total_tokens": 42369888} +{"current_steps": 62855, "total_steps": 204665, "loss": 0.1032, "lr": 1.7498689716724586e-06, "epoch": 1.5355581071507096, "percentage": 30.71, "elapsed_time": "1:21:30", "remaining_time": "3:03:54", "throughput": 8664.11, "total_tokens": 42373472} +{"current_steps": 62860, "total_steps": 204665, "loss": 0.0364, "lr": 1.7498125504484014e-06, "epoch": 1.5356802579825568, "percentage": 30.71, "elapsed_time": "1:21:31", "remaining_time": "3:03:53", "throughput": 8664.22, "total_tokens": 42377056} +{"current_steps": 62865, "total_steps": 204665, "loss": 0.1149, "lr": 1.7497561237715055e-06, "epoch": 1.535802408814404, "percentage": 30.72, "elapsed_time": "1:21:31", "remaining_time": "3:03:53", "throughput": 8664.29, "total_tokens": 42380384} +{"current_steps": 62870, "total_steps": 204665, "loss": 0.0337, "lr": 1.7496996916421818e-06, "epoch": 1.5359245596462512, "percentage": 30.72, "elapsed_time": "1:21:31", "remaining_time": "3:03:52", "throughput": 8664.37, "total_tokens": 42383776} +{"current_steps": 62875, "total_steps": 204665, "loss": 0.2404, "lr": 1.7496432540608398e-06, "epoch": 1.5360467104780984, "percentage": 30.72, "elapsed_time": "1:21:32", "remaining_time": "3:03:52", "throughput": 8664.46, "total_tokens": 42387232} +{"current_steps": 62880, "total_steps": 204665, "loss": 0.0842, "lr": 1.7495868110278905e-06, "epoch": 1.5361688613099456, "percentage": 30.72, "elapsed_time": "1:21:32", "remaining_time": "3:03:51", "throughput": 8664.53, "total_tokens": 42390624} +{"current_steps": 62885, "total_steps": 204665, "loss": 0.2111, "lr": 1.7495303625437447e-06, "epoch": 1.5362910121417928, "percentage": 30.73, "elapsed_time": "1:21:32", "remaining_time": "3:03:51", "throughput": 8664.62, "total_tokens": 42394080} +{"current_steps": 62890, "total_steps": 204665, "loss": 0.04, "lr": 1.749473908608812e-06, "epoch": 1.53641316297364, "percentage": 30.73, "elapsed_time": "1:21:33", "remaining_time": "3:03:50", "throughput": 8664.62, "total_tokens": 42397024} +{"current_steps": 62895, "total_steps": 204665, "loss": 0.0856, "lr": 1.7494174492235038e-06, "epoch": 1.5365353138054871, "percentage": 30.73, "elapsed_time": "1:21:33", "remaining_time": "3:03:50", "throughput": 8664.71, "total_tokens": 42400480} +{"current_steps": 62900, "total_steps": 204665, "loss": 0.0542, "lr": 1.7493609843882302e-06, "epoch": 1.5366574646373343, "percentage": 30.73, "elapsed_time": "1:21:33", "remaining_time": "3:03:49", "throughput": 8664.83, "total_tokens": 42404128} +{"current_steps": 62905, "total_steps": 204665, "loss": 0.0997, "lr": 1.749304514103402e-06, "epoch": 1.5367796154691813, "percentage": 30.74, "elapsed_time": "1:21:34", "remaining_time": "3:03:49", "throughput": 8664.88, "total_tokens": 42407392} +{"current_steps": 62910, "total_steps": 204665, "loss": 0.1064, "lr": 1.74924803836943e-06, "epoch": 1.5369017663010285, "percentage": 30.74, "elapsed_time": "1:21:34", "remaining_time": "3:03:48", "throughput": 8664.96, "total_tokens": 42410784} +{"current_steps": 62915, "total_steps": 204665, "loss": 0.0706, "lr": 1.7491915571867245e-06, "epoch": 1.5370239171328757, "percentage": 30.74, "elapsed_time": "1:21:34", "remaining_time": "3:03:48", "throughput": 8665.16, "total_tokens": 42414880} +{"current_steps": 62920, "total_steps": 204665, "loss": 0.0611, "lr": 1.7491350705556967e-06, "epoch": 1.5371460679647229, "percentage": 30.74, "elapsed_time": "1:21:35", "remaining_time": "3:03:47", "throughput": 8665.17, "total_tokens": 42417888} +{"current_steps": 62925, "total_steps": 204665, "loss": 0.0141, "lr": 1.749078578476757e-06, "epoch": 1.53726821879657, "percentage": 30.75, "elapsed_time": "1:21:35", "remaining_time": "3:03:47", "throughput": 8665.26, "total_tokens": 42421344} +{"current_steps": 62930, "total_steps": 204665, "loss": 0.0567, "lr": 1.7490220809503163e-06, "epoch": 1.537390369628417, "percentage": 30.75, "elapsed_time": "1:21:35", "remaining_time": "3:03:46", "throughput": 8665.44, "total_tokens": 42425312} +{"current_steps": 62935, "total_steps": 204665, "loss": 0.1468, "lr": 1.7489655779767856e-06, "epoch": 1.5375125204602642, "percentage": 30.75, "elapsed_time": "1:21:36", "remaining_time": "3:03:46", "throughput": 8665.43, "total_tokens": 42428192} +{"current_steps": 62940, "total_steps": 204665, "loss": 0.1312, "lr": 1.748909069556576e-06, "epoch": 1.5376346712921114, "percentage": 30.75, "elapsed_time": "1:21:36", "remaining_time": "3:03:45", "throughput": 8665.54, "total_tokens": 42431840} +{"current_steps": 62945, "total_steps": 204665, "loss": 0.073, "lr": 1.7488525556900981e-06, "epoch": 1.5377568221239586, "percentage": 30.76, "elapsed_time": "1:21:36", "remaining_time": "3:03:45", "throughput": 8665.68, "total_tokens": 42435616} +{"current_steps": 62950, "total_steps": 204665, "loss": 0.0668, "lr": 1.748796036377763e-06, "epoch": 1.5378789729558058, "percentage": 30.76, "elapsed_time": "1:21:37", "remaining_time": "3:03:45", "throughput": 8666.02, "total_tokens": 42440672} +{"current_steps": 62955, "total_steps": 204665, "loss": 0.0608, "lr": 1.7487395116199815e-06, "epoch": 1.538001123787653, "percentage": 30.76, "elapsed_time": "1:21:37", "remaining_time": "3:03:44", "throughput": 8666.11, "total_tokens": 42444192} +{"current_steps": 62960, "total_steps": 204665, "loss": 0.1059, "lr": 1.7486829814171653e-06, "epoch": 1.5381232746195002, "percentage": 30.76, "elapsed_time": "1:21:38", "remaining_time": "3:03:44", "throughput": 8666.13, "total_tokens": 42447200} +{"current_steps": 62965, "total_steps": 204665, "loss": 0.0915, "lr": 1.7486264457697249e-06, "epoch": 1.5382454254513473, "percentage": 30.76, "elapsed_time": "1:21:38", "remaining_time": "3:03:43", "throughput": 8666.19, "total_tokens": 42450528} +{"current_steps": 62970, "total_steps": 204665, "loss": 0.1535, "lr": 1.7485699046780714e-06, "epoch": 1.5383675762831945, "percentage": 30.77, "elapsed_time": "1:21:38", "remaining_time": "3:03:43", "throughput": 8666.27, "total_tokens": 42453920} +{"current_steps": 62975, "total_steps": 204665, "loss": 0.1188, "lr": 1.7485133581426165e-06, "epoch": 1.5384897271150417, "percentage": 30.77, "elapsed_time": "1:21:39", "remaining_time": "3:03:42", "throughput": 8666.27, "total_tokens": 42456800} +{"current_steps": 62980, "total_steps": 204665, "loss": 0.0455, "lr": 1.7484568061637712e-06, "epoch": 1.538611877946889, "percentage": 30.77, "elapsed_time": "1:21:39", "remaining_time": "3:03:42", "throughput": 8666.4, "total_tokens": 42460512} +{"current_steps": 62985, "total_steps": 204665, "loss": 0.1353, "lr": 1.7484002487419466e-06, "epoch": 1.538734028778736, "percentage": 30.77, "elapsed_time": "1:21:39", "remaining_time": "3:03:41", "throughput": 8666.47, "total_tokens": 42463840} +{"current_steps": 62990, "total_steps": 204665, "loss": 0.0933, "lr": 1.748343685877554e-06, "epoch": 1.5388561796105833, "percentage": 30.78, "elapsed_time": "1:21:40", "remaining_time": "3:03:41", "throughput": 8666.6, "total_tokens": 42467552} +{"current_steps": 62995, "total_steps": 204665, "loss": 0.0359, "lr": 1.7482871175710048e-06, "epoch": 1.5389783304424303, "percentage": 30.78, "elapsed_time": "1:21:40", "remaining_time": "3:03:40", "throughput": 8666.66, "total_tokens": 42470880} +{"current_steps": 63000, "total_steps": 204665, "loss": 0.0564, "lr": 1.7482305438227104e-06, "epoch": 1.5391004812742775, "percentage": 30.78, "elapsed_time": "1:21:40", "remaining_time": "3:03:40", "throughput": 8666.7, "total_tokens": 42474016} +{"current_steps": 63005, "total_steps": 204665, "loss": 0.0762, "lr": 1.7481739646330822e-06, "epoch": 1.5392226321061246, "percentage": 30.78, "elapsed_time": "1:21:41", "remaining_time": "3:03:39", "throughput": 8666.78, "total_tokens": 42477408} +{"current_steps": 63010, "total_steps": 204665, "loss": 0.0955, "lr": 1.748117380002532e-06, "epoch": 1.5393447829379718, "percentage": 30.79, "elapsed_time": "1:21:41", "remaining_time": "3:03:39", "throughput": 8666.84, "total_tokens": 42480672} +{"current_steps": 63015, "total_steps": 204665, "loss": 0.0247, "lr": 1.7480607899314707e-06, "epoch": 1.539466933769819, "percentage": 30.79, "elapsed_time": "1:21:41", "remaining_time": "3:03:38", "throughput": 8666.94, "total_tokens": 42484192} +{"current_steps": 63020, "total_steps": 204665, "loss": 0.1316, "lr": 1.7480041944203102e-06, "epoch": 1.539589084601666, "percentage": 30.79, "elapsed_time": "1:21:42", "remaining_time": "3:03:38", "throughput": 8666.98, "total_tokens": 42487392} +{"current_steps": 63025, "total_steps": 204665, "loss": 0.1054, "lr": 1.7479475934694623e-06, "epoch": 1.5397112354335132, "percentage": 30.79, "elapsed_time": "1:21:42", "remaining_time": "3:03:37", "throughput": 8667.03, "total_tokens": 42490656} +{"current_steps": 63030, "total_steps": 204665, "loss": 0.1108, "lr": 1.7478909870793378e-06, "epoch": 1.5398333862653604, "percentage": 30.8, "elapsed_time": "1:21:42", "remaining_time": "3:03:37", "throughput": 8667.07, "total_tokens": 42493792} +{"current_steps": 63035, "total_steps": 204665, "loss": 0.0703, "lr": 1.7478343752503494e-06, "epoch": 1.5399555370972076, "percentage": 30.8, "elapsed_time": "1:21:43", "remaining_time": "3:03:36", "throughput": 8667.21, "total_tokens": 42497504} +{"current_steps": 63040, "total_steps": 204665, "loss": 0.0612, "lr": 1.747777757982908e-06, "epoch": 1.5400776879290548, "percentage": 30.8, "elapsed_time": "1:21:43", "remaining_time": "3:03:36", "throughput": 8667.27, "total_tokens": 42500832} +{"current_steps": 63045, "total_steps": 204665, "loss": 0.0314, "lr": 1.7477211352774254e-06, "epoch": 1.540199838760902, "percentage": 30.8, "elapsed_time": "1:21:43", "remaining_time": "3:03:35", "throughput": 8667.37, "total_tokens": 42504352} +{"current_steps": 63050, "total_steps": 204665, "loss": 0.0689, "lr": 1.7476645071343141e-06, "epoch": 1.5403219895927491, "percentage": 30.81, "elapsed_time": "1:21:44", "remaining_time": "3:03:35", "throughput": 8667.44, "total_tokens": 42507744} +{"current_steps": 63055, "total_steps": 204665, "loss": 0.1989, "lr": 1.7476078735539853e-06, "epoch": 1.5404441404245963, "percentage": 30.81, "elapsed_time": "1:21:44", "remaining_time": "3:03:34", "throughput": 8667.47, "total_tokens": 42510816} +{"current_steps": 63060, "total_steps": 204665, "loss": 0.1072, "lr": 1.7475512345368509e-06, "epoch": 1.5405662912564435, "percentage": 30.81, "elapsed_time": "1:21:44", "remaining_time": "3:03:34", "throughput": 8667.52, "total_tokens": 42514016} +{"current_steps": 63065, "total_steps": 204665, "loss": 0.0739, "lr": 1.7474945900833227e-06, "epoch": 1.5406884420882907, "percentage": 30.81, "elapsed_time": "1:21:45", "remaining_time": "3:03:33", "throughput": 8667.71, "total_tokens": 42518112} +{"current_steps": 63070, "total_steps": 204665, "loss": 0.0852, "lr": 1.7474379401938125e-06, "epoch": 1.540810592920138, "percentage": 30.82, "elapsed_time": "1:21:45", "remaining_time": "3:03:33", "throughput": 8667.75, "total_tokens": 42521312} +{"current_steps": 63075, "total_steps": 204665, "loss": 0.0435, "lr": 1.7473812848687334e-06, "epoch": 1.540932743751985, "percentage": 30.82, "elapsed_time": "1:21:46", "remaining_time": "3:03:32", "throughput": 8667.79, "total_tokens": 42524448} +{"current_steps": 63080, "total_steps": 204665, "loss": 0.1233, "lr": 1.7473246241084958e-06, "epoch": 1.5410548945838323, "percentage": 30.82, "elapsed_time": "1:21:46", "remaining_time": "3:03:32", "throughput": 8667.84, "total_tokens": 42527712} +{"current_steps": 63085, "total_steps": 204665, "loss": 0.0974, "lr": 1.7472679579135129e-06, "epoch": 1.5411770454156792, "percentage": 30.82, "elapsed_time": "1:21:46", "remaining_time": "3:03:32", "throughput": 8667.98, "total_tokens": 42531424} +{"current_steps": 63090, "total_steps": 204665, "loss": 0.1039, "lr": 1.7472112862841963e-06, "epoch": 1.5412991962475264, "percentage": 30.83, "elapsed_time": "1:21:47", "remaining_time": "3:03:31", "throughput": 8668.01, "total_tokens": 42534560} +{"current_steps": 63095, "total_steps": 204665, "loss": 0.101, "lr": 1.7471546092209585e-06, "epoch": 1.5414213470793736, "percentage": 30.83, "elapsed_time": "1:21:47", "remaining_time": "3:03:31", "throughput": 8668.03, "total_tokens": 42537632} +{"current_steps": 63100, "total_steps": 204665, "loss": 0.1874, "lr": 1.7470979267242111e-06, "epoch": 1.5415434979112208, "percentage": 30.83, "elapsed_time": "1:21:47", "remaining_time": "3:03:30", "throughput": 8668.05, "total_tokens": 42540640} +{"current_steps": 63105, "total_steps": 204665, "loss": 0.127, "lr": 1.7470412387943668e-06, "epoch": 1.541665648743068, "percentage": 30.83, "elapsed_time": "1:21:48", "remaining_time": "3:03:30", "throughput": 8668.06, "total_tokens": 42543648} +{"current_steps": 63110, "total_steps": 204665, "loss": 0.0244, "lr": 1.7469845454318374e-06, "epoch": 1.541787799574915, "percentage": 30.84, "elapsed_time": "1:21:48", "remaining_time": "3:03:29", "throughput": 8668.09, "total_tokens": 42546784} +{"current_steps": 63115, "total_steps": 204665, "loss": 0.0402, "lr": 1.7469278466370359e-06, "epoch": 1.5419099504067622, "percentage": 30.84, "elapsed_time": "1:21:48", "remaining_time": "3:03:29", "throughput": 8668.16, "total_tokens": 42550112} +{"current_steps": 63120, "total_steps": 204665, "loss": 0.0152, "lr": 1.7468711424103742e-06, "epoch": 1.5420321012386093, "percentage": 30.84, "elapsed_time": "1:21:49", "remaining_time": "3:03:28", "throughput": 8668.17, "total_tokens": 42553120} +{"current_steps": 63125, "total_steps": 204665, "loss": 0.164, "lr": 1.7468144327522644e-06, "epoch": 1.5421542520704565, "percentage": 30.84, "elapsed_time": "1:21:49", "remaining_time": "3:03:28", "throughput": 8668.25, "total_tokens": 42556512} +{"current_steps": 63130, "total_steps": 204665, "loss": 0.0329, "lr": 1.7467577176631192e-06, "epoch": 1.5422764029023037, "percentage": 30.85, "elapsed_time": "1:21:49", "remaining_time": "3:03:27", "throughput": 8668.33, "total_tokens": 42559904} +{"current_steps": 63135, "total_steps": 204665, "loss": 0.0034, "lr": 1.746700997143351e-06, "epoch": 1.542398553734151, "percentage": 30.85, "elapsed_time": "1:21:50", "remaining_time": "3:03:27", "throughput": 8668.38, "total_tokens": 42563168} +{"current_steps": 63140, "total_steps": 204665, "loss": 0.042, "lr": 1.7466442711933724e-06, "epoch": 1.542520704565998, "percentage": 30.85, "elapsed_time": "1:21:50", "remaining_time": "3:03:26", "throughput": 8668.4, "total_tokens": 42566240} +{"current_steps": 63145, "total_steps": 204665, "loss": 0.0024, "lr": 1.7465875398135958e-06, "epoch": 1.5426428553978453, "percentage": 30.85, "elapsed_time": "1:21:50", "remaining_time": "3:03:26", "throughput": 8668.44, "total_tokens": 42569376} +{"current_steps": 63150, "total_steps": 204665, "loss": 0.0882, "lr": 1.746530803004434e-06, "epoch": 1.5427650062296925, "percentage": 30.86, "elapsed_time": "1:21:51", "remaining_time": "3:03:25", "throughput": 8668.56, "total_tokens": 42573024} +{"current_steps": 63155, "total_steps": 204665, "loss": 0.0602, "lr": 1.7464740607662991e-06, "epoch": 1.5428871570615397, "percentage": 30.86, "elapsed_time": "1:21:51", "remaining_time": "3:03:25", "throughput": 8668.64, "total_tokens": 42576480} +{"current_steps": 63160, "total_steps": 204665, "loss": 0.0833, "lr": 1.746417313099604e-06, "epoch": 1.5430093078933869, "percentage": 30.86, "elapsed_time": "1:21:51", "remaining_time": "3:03:24", "throughput": 8668.64, "total_tokens": 42579360} +{"current_steps": 63165, "total_steps": 204665, "loss": 0.2162, "lr": 1.7463605600047618e-06, "epoch": 1.543131458725234, "percentage": 30.86, "elapsed_time": "1:21:52", "remaining_time": "3:03:24", "throughput": 8668.69, "total_tokens": 42582624} +{"current_steps": 63170, "total_steps": 204665, "loss": 0.0718, "lr": 1.7463038014821848e-06, "epoch": 1.5432536095570812, "percentage": 30.87, "elapsed_time": "1:21:52", "remaining_time": "3:03:23", "throughput": 8668.76, "total_tokens": 42586016} +{"current_steps": 63175, "total_steps": 204665, "loss": 0.0469, "lr": 1.7462470375322856e-06, "epoch": 1.5433757603889282, "percentage": 30.87, "elapsed_time": "1:21:52", "remaining_time": "3:03:23", "throughput": 8668.78, "total_tokens": 42589088} +{"current_steps": 63180, "total_steps": 204665, "loss": 0.0732, "lr": 1.7461902681554773e-06, "epoch": 1.5434979112207754, "percentage": 30.87, "elapsed_time": "1:21:53", "remaining_time": "3:03:22", "throughput": 8668.91, "total_tokens": 42592800} +{"current_steps": 63185, "total_steps": 204665, "loss": 0.1156, "lr": 1.7461334933521725e-06, "epoch": 1.5436200620526226, "percentage": 30.87, "elapsed_time": "1:21:53", "remaining_time": "3:03:22", "throughput": 8669.0, "total_tokens": 42596320} +{"current_steps": 63190, "total_steps": 204665, "loss": 0.0074, "lr": 1.7460767131227844e-06, "epoch": 1.5437422128844698, "percentage": 30.87, "elapsed_time": "1:21:53", "remaining_time": "3:03:21", "throughput": 8669.04, "total_tokens": 42599456} +{"current_steps": 63195, "total_steps": 204665, "loss": 0.0416, "lr": 1.7460199274677262e-06, "epoch": 1.5438643637163167, "percentage": 30.88, "elapsed_time": "1:21:54", "remaining_time": "3:03:21", "throughput": 8669.08, "total_tokens": 42602656} +{"current_steps": 63200, "total_steps": 204665, "loss": 0.1269, "lr": 1.7459631363874098e-06, "epoch": 1.543986514548164, "percentage": 30.88, "elapsed_time": "1:21:54", "remaining_time": "3:03:20", "throughput": 8669.12, "total_tokens": 42605856} +{"current_steps": 63205, "total_steps": 204665, "loss": 0.0604, "lr": 1.745906339882249e-06, "epoch": 1.5441086653800111, "percentage": 30.88, "elapsed_time": "1:21:55", "remaining_time": "3:03:20", "throughput": 8669.2, "total_tokens": 42609248} +{"current_steps": 63210, "total_steps": 204665, "loss": 0.1087, "lr": 1.7458495379526568e-06, "epoch": 1.5442308162118583, "percentage": 30.88, "elapsed_time": "1:21:55", "remaining_time": "3:03:19", "throughput": 8669.25, "total_tokens": 42612512} +{"current_steps": 63215, "total_steps": 204665, "loss": 0.0718, "lr": 1.745792730599046e-06, "epoch": 1.5443529670437055, "percentage": 30.89, "elapsed_time": "1:21:55", "remaining_time": "3:03:19", "throughput": 8669.27, "total_tokens": 42615584} +{"current_steps": 63220, "total_steps": 204665, "loss": 0.2104, "lr": 1.7457359178218304e-06, "epoch": 1.5444751178755527, "percentage": 30.89, "elapsed_time": "1:21:56", "remaining_time": "3:03:18", "throughput": 8669.32, "total_tokens": 42618784} +{"current_steps": 63225, "total_steps": 204665, "loss": 0.0914, "lr": 1.745679099621422e-06, "epoch": 1.5445972687073999, "percentage": 30.89, "elapsed_time": "1:21:56", "remaining_time": "3:03:18", "throughput": 8669.35, "total_tokens": 42621920} +{"current_steps": 63230, "total_steps": 204665, "loss": 0.0784, "lr": 1.7456222759982348e-06, "epoch": 1.544719419539247, "percentage": 30.89, "elapsed_time": "1:21:56", "remaining_time": "3:03:17", "throughput": 8669.47, "total_tokens": 42625568} +{"current_steps": 63235, "total_steps": 204665, "loss": 0.0009, "lr": 1.745565446952682e-06, "epoch": 1.5448415703710943, "percentage": 30.9, "elapsed_time": "1:21:57", "remaining_time": "3:03:17", "throughput": 8669.53, "total_tokens": 42628896} +{"current_steps": 63240, "total_steps": 204665, "loss": 0.0905, "lr": 1.7455086124851764e-06, "epoch": 1.5449637212029415, "percentage": 30.9, "elapsed_time": "1:21:57", "remaining_time": "3:03:16", "throughput": 8669.52, "total_tokens": 42631776} +{"current_steps": 63245, "total_steps": 204665, "loss": 0.0461, "lr": 1.7454517725961319e-06, "epoch": 1.5450858720347886, "percentage": 30.9, "elapsed_time": "1:21:57", "remaining_time": "3:03:16", "throughput": 8669.62, "total_tokens": 42635232} +{"current_steps": 63250, "total_steps": 204665, "loss": 0.0771, "lr": 1.7453949272859619e-06, "epoch": 1.5452080228666358, "percentage": 30.9, "elapsed_time": "1:21:58", "remaining_time": "3:03:15", "throughput": 8669.69, "total_tokens": 42638624} +{"current_steps": 63255, "total_steps": 204665, "loss": 0.0015, "lr": 1.745338076555079e-06, "epoch": 1.545330173698483, "percentage": 30.91, "elapsed_time": "1:21:58", "remaining_time": "3:03:15", "throughput": 8669.73, "total_tokens": 42641824} +{"current_steps": 63260, "total_steps": 204665, "loss": 0.0622, "lr": 1.7452812204038972e-06, "epoch": 1.5454523245303302, "percentage": 30.91, "elapsed_time": "1:21:58", "remaining_time": "3:03:15", "throughput": 8669.83, "total_tokens": 42645408} +{"current_steps": 63265, "total_steps": 204665, "loss": 0.1353, "lr": 1.74522435883283e-06, "epoch": 1.5455744753621772, "percentage": 30.91, "elapsed_time": "1:21:59", "remaining_time": "3:03:14", "throughput": 8669.95, "total_tokens": 42649056} +{"current_steps": 63270, "total_steps": 204665, "loss": 0.1149, "lr": 1.745167491842291e-06, "epoch": 1.5456966261940244, "percentage": 30.91, "elapsed_time": "1:21:59", "remaining_time": "3:03:14", "throughput": 8670.0, "total_tokens": 42652320} +{"current_steps": 63275, "total_steps": 204665, "loss": 0.0959, "lr": 1.7451106194326933e-06, "epoch": 1.5458187770258716, "percentage": 30.92, "elapsed_time": "1:21:59", "remaining_time": "3:03:13", "throughput": 8670.09, "total_tokens": 42655776} +{"current_steps": 63280, "total_steps": 204665, "loss": 0.0981, "lr": 1.745053741604451e-06, "epoch": 1.5459409278577187, "percentage": 30.92, "elapsed_time": "1:22:00", "remaining_time": "3:03:13", "throughput": 8670.12, "total_tokens": 42658912} +{"current_steps": 63285, "total_steps": 204665, "loss": 0.1071, "lr": 1.7449968583579776e-06, "epoch": 1.5460630786895657, "percentage": 30.92, "elapsed_time": "1:22:00", "remaining_time": "3:03:12", "throughput": 8670.23, "total_tokens": 42662496} +{"current_steps": 63290, "total_steps": 204665, "loss": 0.1257, "lr": 1.7449399696936862e-06, "epoch": 1.546185229521413, "percentage": 30.92, "elapsed_time": "1:22:00", "remaining_time": "3:03:12", "throughput": 8670.42, "total_tokens": 42666592} +{"current_steps": 63295, "total_steps": 204665, "loss": 0.0495, "lr": 1.7448830756119912e-06, "epoch": 1.54630738035326, "percentage": 30.93, "elapsed_time": "1:22:01", "remaining_time": "3:03:11", "throughput": 8670.47, "total_tokens": 42669856} +{"current_steps": 63300, "total_steps": 204665, "loss": 0.0459, "lr": 1.7448261761133062e-06, "epoch": 1.5464295311851073, "percentage": 30.93, "elapsed_time": "1:22:01", "remaining_time": "3:03:11", "throughput": 8670.52, "total_tokens": 42673120} +{"current_steps": 63305, "total_steps": 204665, "loss": 0.0671, "lr": 1.7447692711980448e-06, "epoch": 1.5465516820169545, "percentage": 30.93, "elapsed_time": "1:22:01", "remaining_time": "3:03:10", "throughput": 8670.6, "total_tokens": 42676512} +{"current_steps": 63310, "total_steps": 204665, "loss": 0.0339, "lr": 1.744712360866621e-06, "epoch": 1.5466738328488017, "percentage": 30.93, "elapsed_time": "1:22:02", "remaining_time": "3:03:10", "throughput": 8670.71, "total_tokens": 42680096} +{"current_steps": 63315, "total_steps": 204665, "loss": 0.1685, "lr": 1.7446554451194486e-06, "epoch": 1.5467959836806489, "percentage": 30.94, "elapsed_time": "1:22:02", "remaining_time": "3:03:09", "throughput": 8670.74, "total_tokens": 42683232} +{"current_steps": 63320, "total_steps": 204665, "loss": 0.1446, "lr": 1.7445985239569416e-06, "epoch": 1.546918134512496, "percentage": 30.94, "elapsed_time": "1:22:03", "remaining_time": "3:03:09", "throughput": 8670.8, "total_tokens": 42686560} +{"current_steps": 63325, "total_steps": 204665, "loss": 0.0934, "lr": 1.7445415973795137e-06, "epoch": 1.5470402853443432, "percentage": 30.94, "elapsed_time": "1:22:03", "remaining_time": "3:03:08", "throughput": 8670.86, "total_tokens": 42689888} +{"current_steps": 63330, "total_steps": 204665, "loss": 0.1435, "lr": 1.7444846653875791e-06, "epoch": 1.5471624361761904, "percentage": 30.94, "elapsed_time": "1:22:03", "remaining_time": "3:03:08", "throughput": 8670.93, "total_tokens": 42693216} +{"current_steps": 63335, "total_steps": 204665, "loss": 0.1261, "lr": 1.7444277279815518e-06, "epoch": 1.5472845870080376, "percentage": 30.95, "elapsed_time": "1:22:04", "remaining_time": "3:03:07", "throughput": 8671.04, "total_tokens": 42696800} +{"current_steps": 63340, "total_steps": 204665, "loss": 0.1507, "lr": 1.744370785161846e-06, "epoch": 1.5474067378398848, "percentage": 30.95, "elapsed_time": "1:22:04", "remaining_time": "3:03:07", "throughput": 8671.18, "total_tokens": 42700640} +{"current_steps": 63345, "total_steps": 204665, "loss": 0.0884, "lr": 1.7443138369288754e-06, "epoch": 1.547528888671732, "percentage": 30.95, "elapsed_time": "1:22:04", "remaining_time": "3:03:06", "throughput": 8671.26, "total_tokens": 42704096} +{"current_steps": 63350, "total_steps": 204665, "loss": 0.0721, "lr": 1.7442568832830546e-06, "epoch": 1.5476510395035792, "percentage": 30.95, "elapsed_time": "1:22:05", "remaining_time": "3:03:06", "throughput": 8671.36, "total_tokens": 42707616} +{"current_steps": 63355, "total_steps": 204665, "loss": 0.1018, "lr": 1.7441999242247974e-06, "epoch": 1.5477731903354262, "percentage": 30.96, "elapsed_time": "1:22:05", "remaining_time": "3:03:06", "throughput": 8671.38, "total_tokens": 42710688} +{"current_steps": 63360, "total_steps": 204665, "loss": 0.0021, "lr": 1.7441429597545181e-06, "epoch": 1.5478953411672733, "percentage": 30.96, "elapsed_time": "1:22:05", "remaining_time": "3:03:05", "throughput": 8671.45, "total_tokens": 42714080} +{"current_steps": 63365, "total_steps": 204665, "loss": 0.032, "lr": 1.7440859898726312e-06, "epoch": 1.5480174919991205, "percentage": 30.96, "elapsed_time": "1:22:06", "remaining_time": "3:03:05", "throughput": 8671.52, "total_tokens": 42717472} +{"current_steps": 63370, "total_steps": 204665, "loss": 0.1066, "lr": 1.7440290145795507e-06, "epoch": 1.5481396428309677, "percentage": 30.96, "elapsed_time": "1:22:06", "remaining_time": "3:03:04", "throughput": 8671.56, "total_tokens": 42720672} +{"current_steps": 63375, "total_steps": 204665, "loss": 0.129, "lr": 1.7439720338756913e-06, "epoch": 1.5482617936628147, "percentage": 30.97, "elapsed_time": "1:22:06", "remaining_time": "3:03:04", "throughput": 8671.59, "total_tokens": 42723808} +{"current_steps": 63380, "total_steps": 204665, "loss": 0.046, "lr": 1.743915047761467e-06, "epoch": 1.5483839444946619, "percentage": 30.97, "elapsed_time": "1:22:07", "remaining_time": "3:03:03", "throughput": 8671.63, "total_tokens": 42726944} +{"current_steps": 63385, "total_steps": 204665, "loss": 0.1086, "lr": 1.7438580562372925e-06, "epoch": 1.548506095326509, "percentage": 30.97, "elapsed_time": "1:22:07", "remaining_time": "3:03:03", "throughput": 8671.7, "total_tokens": 42730336} +{"current_steps": 63390, "total_steps": 204665, "loss": 0.1548, "lr": 1.7438010593035822e-06, "epoch": 1.5486282461583563, "percentage": 30.97, "elapsed_time": "1:22:07", "remaining_time": "3:03:02", "throughput": 8671.74, "total_tokens": 42733472} +{"current_steps": 63395, "total_steps": 204665, "loss": 0.1006, "lr": 1.7437440569607502e-06, "epoch": 1.5487503969902034, "percentage": 30.98, "elapsed_time": "1:22:08", "remaining_time": "3:03:02", "throughput": 8671.87, "total_tokens": 42737248} +{"current_steps": 63400, "total_steps": 204665, "loss": 0.0539, "lr": 1.7436870492092117e-06, "epoch": 1.5488725478220506, "percentage": 30.98, "elapsed_time": "1:22:08", "remaining_time": "3:03:01", "throughput": 8671.91, "total_tokens": 42740448} +{"current_steps": 63405, "total_steps": 204665, "loss": 0.074, "lr": 1.7436300360493808e-06, "epoch": 1.5489946986538978, "percentage": 30.98, "elapsed_time": "1:22:08", "remaining_time": "3:03:01", "throughput": 8671.99, "total_tokens": 42743840} +{"current_steps": 63410, "total_steps": 204665, "loss": 0.31, "lr": 1.7435730174816725e-06, "epoch": 1.549116849485745, "percentage": 30.98, "elapsed_time": "1:22:09", "remaining_time": "3:03:00", "throughput": 8672.04, "total_tokens": 42747104} +{"current_steps": 63415, "total_steps": 204665, "loss": 0.0478, "lr": 1.743515993506501e-06, "epoch": 1.5492390003175922, "percentage": 30.98, "elapsed_time": "1:22:09", "remaining_time": "3:03:00", "throughput": 8672.18, "total_tokens": 42750880} +{"current_steps": 63420, "total_steps": 204665, "loss": 0.0756, "lr": 1.7434589641242812e-06, "epoch": 1.5493611511494394, "percentage": 30.99, "elapsed_time": "1:22:10", "remaining_time": "3:02:59", "throughput": 8672.42, "total_tokens": 42755296} +{"current_steps": 63425, "total_steps": 204665, "loss": 0.1218, "lr": 1.7434019293354278e-06, "epoch": 1.5494833019812866, "percentage": 30.99, "elapsed_time": "1:22:10", "remaining_time": "3:02:59", "throughput": 8672.48, "total_tokens": 42758624} +{"current_steps": 63430, "total_steps": 204665, "loss": 0.1732, "lr": 1.7433448891403559e-06, "epoch": 1.5496054528131338, "percentage": 30.99, "elapsed_time": "1:22:10", "remaining_time": "3:02:58", "throughput": 8672.51, "total_tokens": 42761760} +{"current_steps": 63435, "total_steps": 204665, "loss": 0.0417, "lr": 1.7432878435394795e-06, "epoch": 1.549727603644981, "percentage": 30.99, "elapsed_time": "1:22:11", "remaining_time": "3:02:58", "throughput": 8672.57, "total_tokens": 42765088} +{"current_steps": 63440, "total_steps": 204665, "loss": 0.0317, "lr": 1.7432307925332146e-06, "epoch": 1.549849754476828, "percentage": 31.0, "elapsed_time": "1:22:11", "remaining_time": "3:02:57", "throughput": 8672.6, "total_tokens": 42768160} +{"current_steps": 63445, "total_steps": 204665, "loss": 0.0716, "lr": 1.743173736121975e-06, "epoch": 1.5499719053086751, "percentage": 31.0, "elapsed_time": "1:22:11", "remaining_time": "3:02:57", "throughput": 8672.75, "total_tokens": 42772000} +{"current_steps": 63450, "total_steps": 204665, "loss": 0.0256, "lr": 1.7431166743061762e-06, "epoch": 1.5500940561405223, "percentage": 31.0, "elapsed_time": "1:22:12", "remaining_time": "3:02:56", "throughput": 8672.76, "total_tokens": 42775072} +{"current_steps": 63455, "total_steps": 204665, "loss": 0.0631, "lr": 1.7430596070862332e-06, "epoch": 1.5502162069723695, "percentage": 31.0, "elapsed_time": "1:22:12", "remaining_time": "3:02:56", "throughput": 8672.8, "total_tokens": 42778272} +{"current_steps": 63460, "total_steps": 204665, "loss": 0.0389, "lr": 1.743002534462561e-06, "epoch": 1.5503383578042167, "percentage": 31.01, "elapsed_time": "1:22:12", "remaining_time": "3:02:56", "throughput": 8672.93, "total_tokens": 42781984} +{"current_steps": 63465, "total_steps": 204665, "loss": 0.0364, "lr": 1.7429454564355744e-06, "epoch": 1.5504605086360637, "percentage": 31.01, "elapsed_time": "1:22:13", "remaining_time": "3:02:55", "throughput": 8672.96, "total_tokens": 42785056} +{"current_steps": 63470, "total_steps": 204665, "loss": 0.1069, "lr": 1.7428883730056884e-06, "epoch": 1.5505826594679109, "percentage": 31.01, "elapsed_time": "1:22:13", "remaining_time": "3:02:55", "throughput": 8672.99, "total_tokens": 42788192} +{"current_steps": 63475, "total_steps": 204665, "loss": 0.0492, "lr": 1.7428312841733187e-06, "epoch": 1.550704810299758, "percentage": 31.01, "elapsed_time": "1:22:13", "remaining_time": "3:02:54", "throughput": 8673.08, "total_tokens": 42791712} +{"current_steps": 63480, "total_steps": 204665, "loss": 0.1006, "lr": 1.7427741899388798e-06, "epoch": 1.5508269611316052, "percentage": 31.02, "elapsed_time": "1:22:14", "remaining_time": "3:02:54", "throughput": 8673.2, "total_tokens": 42795360} +{"current_steps": 63485, "total_steps": 204665, "loss": 0.1221, "lr": 1.7427170903027874e-06, "epoch": 1.5509491119634524, "percentage": 31.02, "elapsed_time": "1:22:14", "remaining_time": "3:02:53", "throughput": 8673.23, "total_tokens": 42798496} +{"current_steps": 63490, "total_steps": 204665, "loss": 0.1871, "lr": 1.7426599852654564e-06, "epoch": 1.5510712627952996, "percentage": 31.02, "elapsed_time": "1:22:14", "remaining_time": "3:02:53", "throughput": 8673.34, "total_tokens": 42802080} +{"current_steps": 63495, "total_steps": 204665, "loss": 0.0024, "lr": 1.7426028748273023e-06, "epoch": 1.5511934136271468, "percentage": 31.02, "elapsed_time": "1:22:15", "remaining_time": "3:02:52", "throughput": 8673.42, "total_tokens": 42805536} +{"current_steps": 63500, "total_steps": 204665, "loss": 0.0444, "lr": 1.7425457589887405e-06, "epoch": 1.551315564458994, "percentage": 31.03, "elapsed_time": "1:22:15", "remaining_time": "3:02:52", "throughput": 8673.46, "total_tokens": 42808736} +{"current_steps": 63505, "total_steps": 204665, "loss": 0.0526, "lr": 1.7424886377501862e-06, "epoch": 1.5514377152908412, "percentage": 31.03, "elapsed_time": "1:22:15", "remaining_time": "3:02:51", "throughput": 8673.55, "total_tokens": 42812192} +{"current_steps": 63510, "total_steps": 204665, "loss": 0.0499, "lr": 1.7424315111120547e-06, "epoch": 1.5515598661226884, "percentage": 31.03, "elapsed_time": "1:22:16", "remaining_time": "3:02:51", "throughput": 8673.64, "total_tokens": 42815712} +{"current_steps": 63515, "total_steps": 204665, "loss": 0.0718, "lr": 1.7423743790747616e-06, "epoch": 1.5516820169545356, "percentage": 31.03, "elapsed_time": "1:22:16", "remaining_time": "3:02:50", "throughput": 8673.72, "total_tokens": 42819168} +{"current_steps": 63520, "total_steps": 204665, "loss": 0.0942, "lr": 1.7423172416387221e-06, "epoch": 1.5518041677863827, "percentage": 31.04, "elapsed_time": "1:22:17", "remaining_time": "3:02:50", "throughput": 8673.79, "total_tokens": 42822496} +{"current_steps": 63525, "total_steps": 204665, "loss": 0.0979, "lr": 1.7422600988043521e-06, "epoch": 1.55192631861823, "percentage": 31.04, "elapsed_time": "1:22:17", "remaining_time": "3:02:49", "throughput": 8673.8, "total_tokens": 42825504} +{"current_steps": 63530, "total_steps": 204665, "loss": 0.0011, "lr": 1.7422029505720671e-06, "epoch": 1.552048469450077, "percentage": 31.04, "elapsed_time": "1:22:17", "remaining_time": "3:02:49", "throughput": 8673.81, "total_tokens": 42828448} +{"current_steps": 63535, "total_steps": 204665, "loss": 0.0825, "lr": 1.7421457969422828e-06, "epoch": 1.552170620281924, "percentage": 31.04, "elapsed_time": "1:22:18", "remaining_time": "3:02:48", "throughput": 8673.94, "total_tokens": 42832224} +{"current_steps": 63540, "total_steps": 204665, "loss": 0.044, "lr": 1.7420886379154145e-06, "epoch": 1.5522927711137713, "percentage": 31.05, "elapsed_time": "1:22:18", "remaining_time": "3:02:48", "throughput": 8674.05, "total_tokens": 42835808} +{"current_steps": 63545, "total_steps": 204665, "loss": 0.23, "lr": 1.742031473491878e-06, "epoch": 1.5524149219456185, "percentage": 31.05, "elapsed_time": "1:22:18", "remaining_time": "3:02:47", "throughput": 8674.1, "total_tokens": 42839072} +{"current_steps": 63550, "total_steps": 204665, "loss": 0.0474, "lr": 1.7419743036720892e-06, "epoch": 1.5525370727774657, "percentage": 31.05, "elapsed_time": "1:22:19", "remaining_time": "3:02:47", "throughput": 8674.13, "total_tokens": 42842144} +{"current_steps": 63555, "total_steps": 204665, "loss": 0.0748, "lr": 1.7419171284564634e-06, "epoch": 1.5526592236093126, "percentage": 31.05, "elapsed_time": "1:22:19", "remaining_time": "3:02:46", "throughput": 8674.26, "total_tokens": 42845920} +{"current_steps": 63560, "total_steps": 204665, "loss": 0.1036, "lr": 1.7418599478454165e-06, "epoch": 1.5527813744411598, "percentage": 31.06, "elapsed_time": "1:22:19", "remaining_time": "3:02:46", "throughput": 8674.32, "total_tokens": 42849184} +{"current_steps": 63565, "total_steps": 204665, "loss": 0.1284, "lr": 1.7418027618393651e-06, "epoch": 1.552903525273007, "percentage": 31.06, "elapsed_time": "1:22:20", "remaining_time": "3:02:45", "throughput": 8674.33, "total_tokens": 42852192} +{"current_steps": 63570, "total_steps": 204665, "loss": 0.0016, "lr": 1.741745570438724e-06, "epoch": 1.5530256761048542, "percentage": 31.06, "elapsed_time": "1:22:20", "remaining_time": "3:02:45", "throughput": 8674.44, "total_tokens": 42855840} +{"current_steps": 63575, "total_steps": 204665, "loss": 0.1145, "lr": 1.7416883736439098e-06, "epoch": 1.5531478269367014, "percentage": 31.06, "elapsed_time": "1:22:20", "remaining_time": "3:02:44", "throughput": 8674.47, "total_tokens": 42858912} +{"current_steps": 63580, "total_steps": 204665, "loss": 0.0483, "lr": 1.7416311714553385e-06, "epoch": 1.5532699777685486, "percentage": 31.07, "elapsed_time": "1:22:21", "remaining_time": "3:02:44", "throughput": 8674.51, "total_tokens": 42862112} +{"current_steps": 63585, "total_steps": 204665, "loss": 0.0595, "lr": 1.7415739638734257e-06, "epoch": 1.5533921286003958, "percentage": 31.07, "elapsed_time": "1:22:21", "remaining_time": "3:02:44", "throughput": 8674.55, "total_tokens": 42865312} +{"current_steps": 63590, "total_steps": 204665, "loss": 0.0916, "lr": 1.7415167508985876e-06, "epoch": 1.553514279432243, "percentage": 31.07, "elapsed_time": "1:22:21", "remaining_time": "3:02:43", "throughput": 8674.62, "total_tokens": 42868640} +{"current_steps": 63595, "total_steps": 204665, "loss": 0.1145, "lr": 1.74145953253124e-06, "epoch": 1.5536364302640902, "percentage": 31.07, "elapsed_time": "1:22:22", "remaining_time": "3:02:43", "throughput": 8674.63, "total_tokens": 42871712} +{"current_steps": 63600, "total_steps": 204665, "loss": 0.0532, "lr": 1.7414023087717996e-06, "epoch": 1.5537585810959373, "percentage": 31.08, "elapsed_time": "1:22:22", "remaining_time": "3:02:42", "throughput": 8674.73, "total_tokens": 42875232} +{"current_steps": 63605, "total_steps": 204665, "loss": 0.0717, "lr": 1.741345079620682e-06, "epoch": 1.5538807319277845, "percentage": 31.08, "elapsed_time": "1:22:22", "remaining_time": "3:02:42", "throughput": 8674.87, "total_tokens": 42879072} +{"current_steps": 63610, "total_steps": 204665, "loss": 0.0011, "lr": 1.7412878450783036e-06, "epoch": 1.5540028827596317, "percentage": 31.08, "elapsed_time": "1:22:23", "remaining_time": "3:02:41", "throughput": 8674.92, "total_tokens": 42882272} +{"current_steps": 63615, "total_steps": 204665, "loss": 0.0703, "lr": 1.7412306051450806e-06, "epoch": 1.554125033591479, "percentage": 31.08, "elapsed_time": "1:22:23", "remaining_time": "3:02:41", "throughput": 8674.97, "total_tokens": 42885536} +{"current_steps": 63620, "total_steps": 204665, "loss": 0.077, "lr": 1.741173359821429e-06, "epoch": 1.5542471844233259, "percentage": 31.08, "elapsed_time": "1:22:23", "remaining_time": "3:02:40", "throughput": 8675.03, "total_tokens": 42888864} +{"current_steps": 63625, "total_steps": 204665, "loss": 0.1488, "lr": 1.7411161091077657e-06, "epoch": 1.554369335255173, "percentage": 31.09, "elapsed_time": "1:22:24", "remaining_time": "3:02:40", "throughput": 8675.08, "total_tokens": 42892128} +{"current_steps": 63630, "total_steps": 204665, "loss": 0.1056, "lr": 1.7410588530045067e-06, "epoch": 1.5544914860870203, "percentage": 31.09, "elapsed_time": "1:22:24", "remaining_time": "3:02:39", "throughput": 8675.13, "total_tokens": 42895328} +{"current_steps": 63635, "total_steps": 204665, "loss": 0.1505, "lr": 1.7410015915120684e-06, "epoch": 1.5546136369188674, "percentage": 31.09, "elapsed_time": "1:22:24", "remaining_time": "3:02:39", "throughput": 8675.19, "total_tokens": 42898656} +{"current_steps": 63640, "total_steps": 204665, "loss": 0.1335, "lr": 1.7409443246308674e-06, "epoch": 1.5547357877507146, "percentage": 31.09, "elapsed_time": "1:22:25", "remaining_time": "3:02:38", "throughput": 8675.2, "total_tokens": 42901664} +{"current_steps": 63645, "total_steps": 204665, "loss": 0.0452, "lr": 1.7408870523613194e-06, "epoch": 1.5548579385825616, "percentage": 31.1, "elapsed_time": "1:22:25", "remaining_time": "3:02:38", "throughput": 8675.29, "total_tokens": 42905184} +{"current_steps": 63650, "total_steps": 204665, "loss": 0.0443, "lr": 1.7408297747038422e-06, "epoch": 1.5549800894144088, "percentage": 31.1, "elapsed_time": "1:22:26", "remaining_time": "3:02:37", "throughput": 8675.31, "total_tokens": 42908256} +{"current_steps": 63655, "total_steps": 204665, "loss": 0.0963, "lr": 1.740772491658851e-06, "epoch": 1.555102240246256, "percentage": 31.1, "elapsed_time": "1:22:26", "remaining_time": "3:02:37", "throughput": 8675.43, "total_tokens": 42911904} +{"current_steps": 63660, "total_steps": 204665, "loss": 0.0008, "lr": 1.7407152032267635e-06, "epoch": 1.5552243910781032, "percentage": 31.1, "elapsed_time": "1:22:26", "remaining_time": "3:02:36", "throughput": 8675.46, "total_tokens": 42915040} +{"current_steps": 63665, "total_steps": 204665, "loss": 0.0613, "lr": 1.7406579094079957e-06, "epoch": 1.5553465419099504, "percentage": 31.11, "elapsed_time": "1:22:27", "remaining_time": "3:02:36", "throughput": 8675.48, "total_tokens": 42918112} +{"current_steps": 63670, "total_steps": 204665, "loss": 0.1516, "lr": 1.740600610202964e-06, "epoch": 1.5554686927417976, "percentage": 31.11, "elapsed_time": "1:22:27", "remaining_time": "3:02:35", "throughput": 8675.57, "total_tokens": 42921632} +{"current_steps": 63675, "total_steps": 204665, "loss": 0.1109, "lr": 1.7405433056120857e-06, "epoch": 1.5555908435736447, "percentage": 31.11, "elapsed_time": "1:22:27", "remaining_time": "3:02:35", "throughput": 8675.62, "total_tokens": 42924832} +{"current_steps": 63680, "total_steps": 204665, "loss": 0.078, "lr": 1.7404859956357774e-06, "epoch": 1.555712994405492, "percentage": 31.11, "elapsed_time": "1:22:28", "remaining_time": "3:02:34", "throughput": 8675.71, "total_tokens": 42928352} +{"current_steps": 63685, "total_steps": 204665, "loss": 0.0986, "lr": 1.7404286802744556e-06, "epoch": 1.5558351452373391, "percentage": 31.12, "elapsed_time": "1:22:28", "remaining_time": "3:02:34", "throughput": 8675.76, "total_tokens": 42931616} +{"current_steps": 63690, "total_steps": 204665, "loss": 0.074, "lr": 1.7403713595285374e-06, "epoch": 1.5559572960691863, "percentage": 31.12, "elapsed_time": "1:22:28", "remaining_time": "3:02:33", "throughput": 8675.81, "total_tokens": 42934880} +{"current_steps": 63695, "total_steps": 204665, "loss": 0.0027, "lr": 1.7403140333984397e-06, "epoch": 1.5560794469010335, "percentage": 31.12, "elapsed_time": "1:22:29", "remaining_time": "3:02:33", "throughput": 8675.9, "total_tokens": 42938336} +{"current_steps": 63700, "total_steps": 204665, "loss": 0.0601, "lr": 1.7402567018845788e-06, "epoch": 1.5562015977328807, "percentage": 31.12, "elapsed_time": "1:22:29", "remaining_time": "3:02:33", "throughput": 8675.95, "total_tokens": 42941600} +{"current_steps": 63705, "total_steps": 204665, "loss": 0.0707, "lr": 1.7401993649873722e-06, "epoch": 1.5563237485647279, "percentage": 31.13, "elapsed_time": "1:22:29", "remaining_time": "3:02:32", "throughput": 8676.05, "total_tokens": 42945184} +{"current_steps": 63710, "total_steps": 204665, "loss": 0.0307, "lr": 1.740142022707237e-06, "epoch": 1.5564458993965749, "percentage": 31.13, "elapsed_time": "1:22:30", "remaining_time": "3:02:32", "throughput": 8676.14, "total_tokens": 42948640} +{"current_steps": 63715, "total_steps": 204665, "loss": 0.0839, "lr": 1.7400846750445898e-06, "epoch": 1.556568050228422, "percentage": 31.13, "elapsed_time": "1:22:30", "remaining_time": "3:02:31", "throughput": 8676.26, "total_tokens": 42952352} +{"current_steps": 63720, "total_steps": 204665, "loss": 0.0931, "lr": 1.7400273219998476e-06, "epoch": 1.5566902010602692, "percentage": 31.13, "elapsed_time": "1:22:30", "remaining_time": "3:02:31", "throughput": 8676.36, "total_tokens": 42955872} +{"current_steps": 63725, "total_steps": 204665, "loss": 0.0014, "lr": 1.739969963573428e-06, "epoch": 1.5568123518921164, "percentage": 31.14, "elapsed_time": "1:22:31", "remaining_time": "3:02:30", "throughput": 8676.38, "total_tokens": 42958944} +{"current_steps": 63730, "total_steps": 204665, "loss": 0.0107, "lr": 1.7399125997657476e-06, "epoch": 1.5569345027239634, "percentage": 31.14, "elapsed_time": "1:22:31", "remaining_time": "3:02:30", "throughput": 8676.43, "total_tokens": 42962208} +{"current_steps": 63735, "total_steps": 204665, "loss": 0.0131, "lr": 1.7398552305772238e-06, "epoch": 1.5570566535558106, "percentage": 31.14, "elapsed_time": "1:22:31", "remaining_time": "3:02:29", "throughput": 8676.47, "total_tokens": 42965408} +{"current_steps": 63740, "total_steps": 204665, "loss": 0.0463, "lr": 1.7397978560082737e-06, "epoch": 1.5571788043876578, "percentage": 31.14, "elapsed_time": "1:22:32", "remaining_time": "3:02:29", "throughput": 8676.56, "total_tokens": 42968928} +{"current_steps": 63745, "total_steps": 204665, "loss": 0.2915, "lr": 1.7397404760593147e-06, "epoch": 1.557300955219505, "percentage": 31.15, "elapsed_time": "1:22:32", "remaining_time": "3:02:28", "throughput": 8676.6, "total_tokens": 42972064} +{"current_steps": 63750, "total_steps": 204665, "loss": 0.0011, "lr": 1.739683090730764e-06, "epoch": 1.5574231060513521, "percentage": 31.15, "elapsed_time": "1:22:32", "remaining_time": "3:02:28", "throughput": 8676.65, "total_tokens": 42975328} +{"current_steps": 63755, "total_steps": 204665, "loss": 0.0585, "lr": 1.7396257000230388e-06, "epoch": 1.5575452568831993, "percentage": 31.15, "elapsed_time": "1:22:33", "remaining_time": "3:02:27", "throughput": 8676.74, "total_tokens": 42978848} +{"current_steps": 63760, "total_steps": 204665, "loss": 0.1041, "lr": 1.7395683039365564e-06, "epoch": 1.5576674077150465, "percentage": 31.15, "elapsed_time": "1:22:33", "remaining_time": "3:02:27", "throughput": 8676.81, "total_tokens": 42982176} +{"current_steps": 63765, "total_steps": 204665, "loss": 0.1248, "lr": 1.7395109024717347e-06, "epoch": 1.5577895585468937, "percentage": 31.16, "elapsed_time": "1:22:34", "remaining_time": "3:02:26", "throughput": 8676.93, "total_tokens": 42985824} +{"current_steps": 63770, "total_steps": 204665, "loss": 0.0651, "lr": 1.7394534956289908e-06, "epoch": 1.557911709378741, "percentage": 31.16, "elapsed_time": "1:22:34", "remaining_time": "3:02:26", "throughput": 8677.04, "total_tokens": 42989472} +{"current_steps": 63775, "total_steps": 204665, "loss": 0.1488, "lr": 1.7393960834087422e-06, "epoch": 1.558033860210588, "percentage": 31.16, "elapsed_time": "1:22:34", "remaining_time": "3:02:25", "throughput": 8677.05, "total_tokens": 42992480} +{"current_steps": 63780, "total_steps": 204665, "loss": 0.0823, "lr": 1.7393386658114063e-06, "epoch": 1.5581560110424353, "percentage": 31.16, "elapsed_time": "1:22:35", "remaining_time": "3:02:25", "throughput": 8677.12, "total_tokens": 42995872} +{"current_steps": 63785, "total_steps": 204665, "loss": 0.0981, "lr": 1.7392812428374009e-06, "epoch": 1.5582781618742825, "percentage": 31.17, "elapsed_time": "1:22:35", "remaining_time": "3:02:24", "throughput": 8677.18, "total_tokens": 42999200} +{"current_steps": 63790, "total_steps": 204665, "loss": 0.0597, "lr": 1.7392238144871433e-06, "epoch": 1.5584003127061297, "percentage": 31.17, "elapsed_time": "1:22:35", "remaining_time": "3:02:24", "throughput": 8677.33, "total_tokens": 43003040} +{"current_steps": 63795, "total_steps": 204665, "loss": 0.2189, "lr": 1.7391663807610513e-06, "epoch": 1.5585224635379769, "percentage": 31.17, "elapsed_time": "1:22:36", "remaining_time": "3:02:23", "throughput": 8677.38, "total_tokens": 43006368} +{"current_steps": 63800, "total_steps": 204665, "loss": 0.0449, "lr": 1.7391089416595426e-06, "epoch": 1.5586446143698238, "percentage": 31.17, "elapsed_time": "1:22:36", "remaining_time": "3:02:23", "throughput": 8677.58, "total_tokens": 43010528} +{"current_steps": 63805, "total_steps": 204665, "loss": 0.1947, "lr": 1.7390514971830348e-06, "epoch": 1.558766765201671, "percentage": 31.18, "elapsed_time": "1:22:36", "remaining_time": "3:02:23", "throughput": 8677.6, "total_tokens": 43013600} +{"current_steps": 63810, "total_steps": 204665, "loss": 0.1185, "lr": 1.7389940473319458e-06, "epoch": 1.5588889160335182, "percentage": 31.18, "elapsed_time": "1:22:37", "remaining_time": "3:02:22", "throughput": 8677.69, "total_tokens": 43017056} +{"current_steps": 63815, "total_steps": 204665, "loss": 0.1306, "lr": 1.7389365921066935e-06, "epoch": 1.5590110668653654, "percentage": 31.18, "elapsed_time": "1:22:37", "remaining_time": "3:02:22", "throughput": 8677.75, "total_tokens": 43020384} +{"current_steps": 63820, "total_steps": 204665, "loss": 0.0493, "lr": 1.7388791315076952e-06, "epoch": 1.5591332176972124, "percentage": 31.18, "elapsed_time": "1:22:37", "remaining_time": "3:02:21", "throughput": 8677.82, "total_tokens": 43023776} +{"current_steps": 63825, "total_steps": 204665, "loss": 0.0326, "lr": 1.7388216655353694e-06, "epoch": 1.5592553685290595, "percentage": 31.19, "elapsed_time": "1:22:38", "remaining_time": "3:02:21", "throughput": 8677.9, "total_tokens": 43027232} +{"current_steps": 63830, "total_steps": 204665, "loss": 0.0376, "lr": 1.7387641941901334e-06, "epoch": 1.5593775193609067, "percentage": 31.19, "elapsed_time": "1:22:38", "remaining_time": "3:02:20", "throughput": 8677.97, "total_tokens": 43030624} +{"current_steps": 63835, "total_steps": 204665, "loss": 0.032, "lr": 1.738706717472406e-06, "epoch": 1.559499670192754, "percentage": 31.19, "elapsed_time": "1:22:38", "remaining_time": "3:02:20", "throughput": 8678.08, "total_tokens": 43034272} +{"current_steps": 63840, "total_steps": 204665, "loss": 0.0396, "lr": 1.7386492353826043e-06, "epoch": 1.5596218210246011, "percentage": 31.19, "elapsed_time": "1:22:39", "remaining_time": "3:02:19", "throughput": 8678.12, "total_tokens": 43037472} +{"current_steps": 63845, "total_steps": 204665, "loss": 0.0999, "lr": 1.7385917479211466e-06, "epoch": 1.5597439718564483, "percentage": 31.19, "elapsed_time": "1:22:39", "remaining_time": "3:02:19", "throughput": 8678.18, "total_tokens": 43040800} +{"current_steps": 63850, "total_steps": 204665, "loss": 0.1058, "lr": 1.7385342550884514e-06, "epoch": 1.5598661226882955, "percentage": 31.2, "elapsed_time": "1:22:39", "remaining_time": "3:02:18", "throughput": 8678.23, "total_tokens": 43044000} +{"current_steps": 63855, "total_steps": 204665, "loss": 0.1929, "lr": 1.7384767568849363e-06, "epoch": 1.5599882735201427, "percentage": 31.2, "elapsed_time": "1:22:40", "remaining_time": "3:02:18", "throughput": 8678.3, "total_tokens": 43047392} +{"current_steps": 63860, "total_steps": 204665, "loss": 0.0006, "lr": 1.7384192533110195e-06, "epoch": 1.5601104243519899, "percentage": 31.2, "elapsed_time": "1:22:40", "remaining_time": "3:02:17", "throughput": 8678.35, "total_tokens": 43050656} +{"current_steps": 63865, "total_steps": 204665, "loss": 0.0021, "lr": 1.7383617443671192e-06, "epoch": 1.560232575183837, "percentage": 31.2, "elapsed_time": "1:22:41", "remaining_time": "3:02:17", "throughput": 8678.44, "total_tokens": 43054176} +{"current_steps": 63870, "total_steps": 204665, "loss": 0.2202, "lr": 1.738304230053654e-06, "epoch": 1.5603547260156843, "percentage": 31.21, "elapsed_time": "1:22:41", "remaining_time": "3:02:16", "throughput": 8678.47, "total_tokens": 43057312} +{"current_steps": 63875, "total_steps": 204665, "loss": 0.0036, "lr": 1.7382467103710417e-06, "epoch": 1.5604768768475314, "percentage": 31.21, "elapsed_time": "1:22:41", "remaining_time": "3:02:16", "throughput": 8678.56, "total_tokens": 43060768} +{"current_steps": 63880, "total_steps": 204665, "loss": 0.1038, "lr": 1.738189185319701e-06, "epoch": 1.5605990276793786, "percentage": 31.21, "elapsed_time": "1:22:42", "remaining_time": "3:02:15", "throughput": 8678.64, "total_tokens": 43064224} +{"current_steps": 63885, "total_steps": 204665, "loss": 0.0009, "lr": 1.7381316549000496e-06, "epoch": 1.5607211785112258, "percentage": 31.21, "elapsed_time": "1:22:42", "remaining_time": "3:02:15", "throughput": 8678.73, "total_tokens": 43067680} +{"current_steps": 63890, "total_steps": 204665, "loss": 0.1021, "lr": 1.7380741191125063e-06, "epoch": 1.5608433293430728, "percentage": 31.22, "elapsed_time": "1:22:42", "remaining_time": "3:02:14", "throughput": 8678.77, "total_tokens": 43070880} +{"current_steps": 63895, "total_steps": 204665, "loss": 0.0362, "lr": 1.7380165779574899e-06, "epoch": 1.56096548017492, "percentage": 31.22, "elapsed_time": "1:22:43", "remaining_time": "3:02:14", "throughput": 8678.84, "total_tokens": 43074272} +{"current_steps": 63900, "total_steps": 204665, "loss": 0.1742, "lr": 1.7379590314354178e-06, "epoch": 1.5610876310067672, "percentage": 31.22, "elapsed_time": "1:22:43", "remaining_time": "3:02:14", "throughput": 8678.89, "total_tokens": 43077472} +{"current_steps": 63905, "total_steps": 204665, "loss": 0.0004, "lr": 1.7379014795467097e-06, "epoch": 1.5612097818386144, "percentage": 31.22, "elapsed_time": "1:22:43", "remaining_time": "3:02:13", "throughput": 8678.93, "total_tokens": 43080672} +{"current_steps": 63910, "total_steps": 204665, "loss": 0.0006, "lr": 1.7378439222917834e-06, "epoch": 1.5613319326704613, "percentage": 31.23, "elapsed_time": "1:22:44", "remaining_time": "3:02:13", "throughput": 8678.97, "total_tokens": 43083872} +{"current_steps": 63915, "total_steps": 204665, "loss": 0.0017, "lr": 1.7377863596710575e-06, "epoch": 1.5614540835023085, "percentage": 31.23, "elapsed_time": "1:22:44", "remaining_time": "3:02:12", "throughput": 8679.05, "total_tokens": 43087264} +{"current_steps": 63920, "total_steps": 204665, "loss": 0.0293, "lr": 1.737728791684951e-06, "epoch": 1.5615762343341557, "percentage": 31.23, "elapsed_time": "1:22:44", "remaining_time": "3:02:12", "throughput": 8679.09, "total_tokens": 43090464} +{"current_steps": 63925, "total_steps": 204665, "loss": 0.1468, "lr": 1.7376712183338823e-06, "epoch": 1.561698385166003, "percentage": 31.23, "elapsed_time": "1:22:45", "remaining_time": "3:02:11", "throughput": 8679.13, "total_tokens": 43093664} +{"current_steps": 63930, "total_steps": 204665, "loss": 0.232, "lr": 1.7376136396182696e-06, "epoch": 1.56182053599785, "percentage": 31.24, "elapsed_time": "1:22:45", "remaining_time": "3:02:11", "throughput": 8679.14, "total_tokens": 43096672} +{"current_steps": 63935, "total_steps": 204665, "loss": 0.1323, "lr": 1.7375560555385324e-06, "epoch": 1.5619426868296973, "percentage": 31.24, "elapsed_time": "1:22:45", "remaining_time": "3:02:10", "throughput": 8679.22, "total_tokens": 43100064} +{"current_steps": 63940, "total_steps": 204665, "loss": 0.0674, "lr": 1.7374984660950896e-06, "epoch": 1.5620648376615445, "percentage": 31.24, "elapsed_time": "1:22:46", "remaining_time": "3:02:10", "throughput": 8679.26, "total_tokens": 43103264} +{"current_steps": 63945, "total_steps": 204665, "loss": 0.0028, "lr": 1.737440871288359e-06, "epoch": 1.5621869884933917, "percentage": 31.24, "elapsed_time": "1:22:46", "remaining_time": "3:02:09", "throughput": 8679.26, "total_tokens": 43106208} +{"current_steps": 63950, "total_steps": 204665, "loss": 0.1143, "lr": 1.7373832711187604e-06, "epoch": 1.5623091393252388, "percentage": 31.25, "elapsed_time": "1:22:46", "remaining_time": "3:02:09", "throughput": 8679.36, "total_tokens": 43109792} +{"current_steps": 63955, "total_steps": 204665, "loss": 0.0738, "lr": 1.737325665586712e-06, "epoch": 1.562431290157086, "percentage": 31.25, "elapsed_time": "1:22:47", "remaining_time": "3:02:08", "throughput": 8679.41, "total_tokens": 43112992} +{"current_steps": 63960, "total_steps": 204665, "loss": 0.0492, "lr": 1.7372680546926333e-06, "epoch": 1.5625534409889332, "percentage": 31.25, "elapsed_time": "1:22:47", "remaining_time": "3:02:08", "throughput": 8679.43, "total_tokens": 43116064} +{"current_steps": 63965, "total_steps": 204665, "loss": 0.0771, "lr": 1.737210438436943e-06, "epoch": 1.5626755918207804, "percentage": 31.25, "elapsed_time": "1:22:47", "remaining_time": "3:02:07", "throughput": 8679.44, "total_tokens": 43119072} +{"current_steps": 63970, "total_steps": 204665, "loss": 0.0857, "lr": 1.7371528168200603e-06, "epoch": 1.5627977426526276, "percentage": 31.26, "elapsed_time": "1:22:48", "remaining_time": "3:02:07", "throughput": 8679.53, "total_tokens": 43122528} +{"current_steps": 63975, "total_steps": 204665, "loss": 0.0913, "lr": 1.7370951898424036e-06, "epoch": 1.5629198934844746, "percentage": 31.26, "elapsed_time": "1:22:48", "remaining_time": "3:02:06", "throughput": 8679.62, "total_tokens": 43126048} +{"current_steps": 63980, "total_steps": 204665, "loss": 0.0444, "lr": 1.7370375575043927e-06, "epoch": 1.5630420443163218, "percentage": 31.26, "elapsed_time": "1:22:49", "remaining_time": "3:02:06", "throughput": 8679.69, "total_tokens": 43129376} +{"current_steps": 63985, "total_steps": 204665, "loss": 0.0455, "lr": 1.7369799198064463e-06, "epoch": 1.563164195148169, "percentage": 31.26, "elapsed_time": "1:22:49", "remaining_time": "3:02:05", "throughput": 8679.75, "total_tokens": 43132704} +{"current_steps": 63990, "total_steps": 204665, "loss": 0.1218, "lr": 1.736922276748984e-06, "epoch": 1.5632863459800161, "percentage": 31.27, "elapsed_time": "1:22:49", "remaining_time": "3:02:05", "throughput": 8679.81, "total_tokens": 43135968} +{"current_steps": 63995, "total_steps": 204665, "loss": 0.0014, "lr": 1.7368646283324245e-06, "epoch": 1.5634084968118633, "percentage": 31.27, "elapsed_time": "1:22:50", "remaining_time": "3:02:04", "throughput": 8679.84, "total_tokens": 43139104} +{"current_steps": 64000, "total_steps": 204665, "loss": 0.1232, "lr": 1.7368069745571869e-06, "epoch": 1.5635306476437103, "percentage": 31.27, "elapsed_time": "1:22:50", "remaining_time": "3:02:04", "throughput": 8679.88, "total_tokens": 43142304} +{"current_steps": 64005, "total_steps": 204665, "loss": 0.1279, "lr": 1.7367493154236913e-06, "epoch": 1.5636527984755575, "percentage": 31.27, "elapsed_time": "1:22:50", "remaining_time": "3:02:03", "throughput": 8679.9, "total_tokens": 43145312} +{"current_steps": 64010, "total_steps": 204665, "loss": 0.1902, "lr": 1.736691650932356e-06, "epoch": 1.5637749493074047, "percentage": 31.28, "elapsed_time": "1:22:51", "remaining_time": "3:02:03", "throughput": 8679.93, "total_tokens": 43148384} +{"current_steps": 64015, "total_steps": 204665, "loss": 0.0904, "lr": 1.7366339810836012e-06, "epoch": 1.5638971001392519, "percentage": 31.28, "elapsed_time": "1:22:51", "remaining_time": "3:02:02", "throughput": 8679.94, "total_tokens": 43151392} +{"current_steps": 64020, "total_steps": 204665, "loss": 0.0997, "lr": 1.736576305877846e-06, "epoch": 1.564019250971099, "percentage": 31.28, "elapsed_time": "1:22:51", "remaining_time": "3:02:02", "throughput": 8679.98, "total_tokens": 43154592} +{"current_steps": 64025, "total_steps": 204665, "loss": 0.2109, "lr": 1.7365186253155097e-06, "epoch": 1.5641414018029463, "percentage": 31.28, "elapsed_time": "1:22:52", "remaining_time": "3:02:01", "throughput": 8680.04, "total_tokens": 43157920} +{"current_steps": 64030, "total_steps": 204665, "loss": 0.0488, "lr": 1.736460939397012e-06, "epoch": 1.5642635526347934, "percentage": 31.29, "elapsed_time": "1:22:52", "remaining_time": "3:02:01", "throughput": 8680.16, "total_tokens": 43161568} +{"current_steps": 64035, "total_steps": 204665, "loss": 0.1417, "lr": 1.736403248122772e-06, "epoch": 1.5643857034666406, "percentage": 31.29, "elapsed_time": "1:22:52", "remaining_time": "3:02:00", "throughput": 8680.22, "total_tokens": 43164832} +{"current_steps": 64040, "total_steps": 204665, "loss": 0.1029, "lr": 1.7363455514932097e-06, "epoch": 1.5645078542984878, "percentage": 31.29, "elapsed_time": "1:22:53", "remaining_time": "3:02:00", "throughput": 8680.28, "total_tokens": 43168160} +{"current_steps": 64045, "total_steps": 204665, "loss": 0.1021, "lr": 1.7362878495087446e-06, "epoch": 1.564630005130335, "percentage": 31.29, "elapsed_time": "1:22:53", "remaining_time": "3:01:59", "throughput": 8680.33, "total_tokens": 43171424} +{"current_steps": 64050, "total_steps": 204665, "loss": 0.0037, "lr": 1.7362301421697963e-06, "epoch": 1.5647521559621822, "percentage": 31.3, "elapsed_time": "1:22:53", "remaining_time": "3:01:59", "throughput": 8680.33, "total_tokens": 43174304} +{"current_steps": 64055, "total_steps": 204665, "loss": 0.0322, "lr": 1.7361724294767839e-06, "epoch": 1.5648743067940294, "percentage": 31.3, "elapsed_time": "1:22:54", "remaining_time": "3:01:59", "throughput": 8680.44, "total_tokens": 43177952} +{"current_steps": 64060, "total_steps": 204665, "loss": 0.1224, "lr": 1.7361147114301279e-06, "epoch": 1.5649964576258766, "percentage": 31.3, "elapsed_time": "1:22:54", "remaining_time": "3:01:58", "throughput": 8680.49, "total_tokens": 43181152} +{"current_steps": 64065, "total_steps": 204665, "loss": 0.038, "lr": 1.7360569880302478e-06, "epoch": 1.5651186084577235, "percentage": 31.3, "elapsed_time": "1:22:54", "remaining_time": "3:01:58", "throughput": 8680.54, "total_tokens": 43184416} +{"current_steps": 64070, "total_steps": 204665, "loss": 0.0331, "lr": 1.735999259277563e-06, "epoch": 1.5652407592895707, "percentage": 31.3, "elapsed_time": "1:22:55", "remaining_time": "3:01:57", "throughput": 8680.59, "total_tokens": 43187680} +{"current_steps": 64075, "total_steps": 204665, "loss": 0.107, "lr": 1.7359415251724938e-06, "epoch": 1.565362910121418, "percentage": 31.31, "elapsed_time": "1:22:55", "remaining_time": "3:01:57", "throughput": 8680.7, "total_tokens": 43191264} +{"current_steps": 64080, "total_steps": 204665, "loss": 0.1043, "lr": 1.73588378571546e-06, "epoch": 1.5654850609532651, "percentage": 31.31, "elapsed_time": "1:22:55", "remaining_time": "3:01:56", "throughput": 8680.73, "total_tokens": 43194336} +{"current_steps": 64085, "total_steps": 204665, "loss": 0.0851, "lr": 1.7358260409068813e-06, "epoch": 1.5656072117851123, "percentage": 31.31, "elapsed_time": "1:22:56", "remaining_time": "3:01:56", "throughput": 8680.7, "total_tokens": 43197088} +{"current_steps": 64090, "total_steps": 204665, "loss": 0.0457, "lr": 1.7357682907471776e-06, "epoch": 1.5657293626169593, "percentage": 31.31, "elapsed_time": "1:22:56", "remaining_time": "3:01:55", "throughput": 8680.8, "total_tokens": 43200672} +{"current_steps": 64095, "total_steps": 204665, "loss": 0.0474, "lr": 1.7357105352367692e-06, "epoch": 1.5658515134488065, "percentage": 31.32, "elapsed_time": "1:22:56", "remaining_time": "3:01:55", "throughput": 8680.85, "total_tokens": 43203872} +{"current_steps": 64100, "total_steps": 204665, "loss": 0.0015, "lr": 1.7356527743760756e-06, "epoch": 1.5659736642806537, "percentage": 31.32, "elapsed_time": "1:22:57", "remaining_time": "3:01:54", "throughput": 8680.91, "total_tokens": 43207136} +{"current_steps": 64105, "total_steps": 204665, "loss": 0.0337, "lr": 1.7355950081655175e-06, "epoch": 1.5660958151125008, "percentage": 31.32, "elapsed_time": "1:22:57", "remaining_time": "3:01:54", "throughput": 8680.93, "total_tokens": 43210208} +{"current_steps": 64110, "total_steps": 204665, "loss": 0.1464, "lr": 1.7355372366055145e-06, "epoch": 1.566217965944348, "percentage": 31.32, "elapsed_time": "1:22:57", "remaining_time": "3:01:53", "throughput": 8681.04, "total_tokens": 43213792} +{"current_steps": 64115, "total_steps": 204665, "loss": 0.1498, "lr": 1.7354794596964869e-06, "epoch": 1.5663401167761952, "percentage": 31.33, "elapsed_time": "1:22:58", "remaining_time": "3:01:53", "throughput": 8681.08, "total_tokens": 43216928} +{"current_steps": 64120, "total_steps": 204665, "loss": 0.0867, "lr": 1.7354216774388549e-06, "epoch": 1.5664622676080424, "percentage": 31.33, "elapsed_time": "1:22:58", "remaining_time": "3:01:52", "throughput": 8681.08, "total_tokens": 43219872} +{"current_steps": 64125, "total_steps": 204665, "loss": 0.1274, "lr": 1.7353638898330384e-06, "epoch": 1.5665844184398896, "percentage": 31.33, "elapsed_time": "1:22:58", "remaining_time": "3:01:52", "throughput": 8681.16, "total_tokens": 43223264} +{"current_steps": 64130, "total_steps": 204665, "loss": 0.0383, "lr": 1.7353060968794582e-06, "epoch": 1.5667065692717368, "percentage": 31.33, "elapsed_time": "1:22:59", "remaining_time": "3:01:51", "throughput": 8681.23, "total_tokens": 43226656} +{"current_steps": 64135, "total_steps": 204665, "loss": 0.0377, "lr": 1.735248298578534e-06, "epoch": 1.566828720103584, "percentage": 31.34, "elapsed_time": "1:22:59", "remaining_time": "3:01:51", "throughput": 8681.26, "total_tokens": 43229728} +{"current_steps": 64140, "total_steps": 204665, "loss": 0.0776, "lr": 1.7351904949306867e-06, "epoch": 1.5669508709354312, "percentage": 31.34, "elapsed_time": "1:23:00", "remaining_time": "3:01:50", "throughput": 8681.32, "total_tokens": 43233056} +{"current_steps": 64145, "total_steps": 204665, "loss": 0.0346, "lr": 1.7351326859363363e-06, "epoch": 1.5670730217672784, "percentage": 31.34, "elapsed_time": "1:23:00", "remaining_time": "3:01:50", "throughput": 8681.46, "total_tokens": 43236832} +{"current_steps": 64150, "total_steps": 204665, "loss": 0.1614, "lr": 1.7350748715959035e-06, "epoch": 1.5671951725991256, "percentage": 31.34, "elapsed_time": "1:23:00", "remaining_time": "3:01:49", "throughput": 8681.53, "total_tokens": 43240224} +{"current_steps": 64155, "total_steps": 204665, "loss": 0.0474, "lr": 1.7350170519098079e-06, "epoch": 1.5673173234309725, "percentage": 31.35, "elapsed_time": "1:23:01", "remaining_time": "3:01:49", "throughput": 8681.49, "total_tokens": 43242848} +{"current_steps": 64160, "total_steps": 204665, "loss": 0.0847, "lr": 1.7349592268784712e-06, "epoch": 1.5674394742628197, "percentage": 31.35, "elapsed_time": "1:23:01", "remaining_time": "3:01:48", "throughput": 8681.57, "total_tokens": 43246240} +{"current_steps": 64165, "total_steps": 204665, "loss": 0.0447, "lr": 1.7349013965023129e-06, "epoch": 1.567561625094667, "percentage": 31.35, "elapsed_time": "1:23:01", "remaining_time": "3:01:48", "throughput": 8681.67, "total_tokens": 43249824} +{"current_steps": 64170, "total_steps": 204665, "loss": 0.1189, "lr": 1.7348435607817544e-06, "epoch": 1.567683775926514, "percentage": 31.35, "elapsed_time": "1:23:02", "remaining_time": "3:01:47", "throughput": 8681.8, "total_tokens": 43253536} +{"current_steps": 64175, "total_steps": 204665, "loss": 0.0759, "lr": 1.7347857197172155e-06, "epoch": 1.5678059267583613, "percentage": 31.36, "elapsed_time": "1:23:02", "remaining_time": "3:01:47", "throughput": 8681.88, "total_tokens": 43256992} +{"current_steps": 64180, "total_steps": 204665, "loss": 0.1785, "lr": 1.7347278733091174e-06, "epoch": 1.5679280775902082, "percentage": 31.36, "elapsed_time": "1:23:02", "remaining_time": "3:01:46", "throughput": 8681.92, "total_tokens": 43260256} +{"current_steps": 64185, "total_steps": 204665, "loss": 0.0056, "lr": 1.7346700215578808e-06, "epoch": 1.5680502284220554, "percentage": 31.36, "elapsed_time": "1:23:03", "remaining_time": "3:01:46", "throughput": 8681.97, "total_tokens": 43263456} +{"current_steps": 64190, "total_steps": 204665, "loss": 0.1561, "lr": 1.7346121644639258e-06, "epoch": 1.5681723792539026, "percentage": 31.36, "elapsed_time": "1:23:03", "remaining_time": "3:01:46", "throughput": 8682.09, "total_tokens": 43267168} +{"current_steps": 64195, "total_steps": 204665, "loss": 0.1017, "lr": 1.7345543020276735e-06, "epoch": 1.5682945300857498, "percentage": 31.37, "elapsed_time": "1:23:03", "remaining_time": "3:01:45", "throughput": 8682.16, "total_tokens": 43270560} +{"current_steps": 64200, "total_steps": 204665, "loss": 0.168, "lr": 1.734496434249545e-06, "epoch": 1.568416680917597, "percentage": 31.37, "elapsed_time": "1:23:04", "remaining_time": "3:01:45", "throughput": 8682.17, "total_tokens": 43273504} +{"current_steps": 64205, "total_steps": 204665, "loss": 0.021, "lr": 1.734438561129961e-06, "epoch": 1.5685388317494442, "percentage": 31.37, "elapsed_time": "1:23:04", "remaining_time": "3:01:44", "throughput": 8682.24, "total_tokens": 43276896} +{"current_steps": 64210, "total_steps": 204665, "loss": 0.0446, "lr": 1.734380682669342e-06, "epoch": 1.5686609825812914, "percentage": 31.37, "elapsed_time": "1:23:04", "remaining_time": "3:01:44", "throughput": 8682.29, "total_tokens": 43280160} +{"current_steps": 64215, "total_steps": 204665, "loss": 0.1164, "lr": 1.734322798868109e-06, "epoch": 1.5687831334131386, "percentage": 31.38, "elapsed_time": "1:23:05", "remaining_time": "3:01:43", "throughput": 8682.31, "total_tokens": 43283232} +{"current_steps": 64220, "total_steps": 204665, "loss": 0.0437, "lr": 1.7342649097266837e-06, "epoch": 1.5689052842449858, "percentage": 31.38, "elapsed_time": "1:23:05", "remaining_time": "3:01:43", "throughput": 8682.38, "total_tokens": 43286688} +{"current_steps": 64225, "total_steps": 204665, "loss": 0.1511, "lr": 1.734207015245486e-06, "epoch": 1.569027435076833, "percentage": 31.38, "elapsed_time": "1:23:05", "remaining_time": "3:01:42", "throughput": 8682.45, "total_tokens": 43290080} +{"current_steps": 64230, "total_steps": 204665, "loss": 0.0503, "lr": 1.7341491154249374e-06, "epoch": 1.5691495859086801, "percentage": 31.38, "elapsed_time": "1:23:06", "remaining_time": "3:01:42", "throughput": 8682.57, "total_tokens": 43293728} +{"current_steps": 64235, "total_steps": 204665, "loss": 0.0409, "lr": 1.734091210265459e-06, "epoch": 1.5692717367405273, "percentage": 31.39, "elapsed_time": "1:23:06", "remaining_time": "3:01:41", "throughput": 8682.64, "total_tokens": 43297120} +{"current_steps": 64240, "total_steps": 204665, "loss": 0.0017, "lr": 1.7340332997674722e-06, "epoch": 1.5693938875723745, "percentage": 31.39, "elapsed_time": "1:23:06", "remaining_time": "3:01:41", "throughput": 8682.64, "total_tokens": 43300064} +{"current_steps": 64245, "total_steps": 204665, "loss": 0.1356, "lr": 1.7339753839313972e-06, "epoch": 1.5695160384042215, "percentage": 31.39, "elapsed_time": "1:23:07", "remaining_time": "3:01:40", "throughput": 8682.69, "total_tokens": 43303328} +{"current_steps": 64250, "total_steps": 204665, "loss": 0.0647, "lr": 1.7339174627576564e-06, "epoch": 1.5696381892360687, "percentage": 31.39, "elapsed_time": "1:23:07", "remaining_time": "3:01:40", "throughput": 8682.69, "total_tokens": 43306272} +{"current_steps": 64255, "total_steps": 204665, "loss": 0.1772, "lr": 1.7338595362466702e-06, "epoch": 1.5697603400679159, "percentage": 31.4, "elapsed_time": "1:23:07", "remaining_time": "3:01:39", "throughput": 8682.73, "total_tokens": 43309472} +{"current_steps": 64260, "total_steps": 204665, "loss": 0.0525, "lr": 1.73380160439886e-06, "epoch": 1.569882490899763, "percentage": 31.4, "elapsed_time": "1:23:08", "remaining_time": "3:01:39", "throughput": 8682.76, "total_tokens": 43312608} +{"current_steps": 64265, "total_steps": 204665, "loss": 0.0366, "lr": 1.7337436672146472e-06, "epoch": 1.57000464173161, "percentage": 31.4, "elapsed_time": "1:23:08", "remaining_time": "3:01:38", "throughput": 8682.83, "total_tokens": 43316000} +{"current_steps": 64270, "total_steps": 204665, "loss": 0.074, "lr": 1.7336857246944532e-06, "epoch": 1.5701267925634572, "percentage": 31.4, "elapsed_time": "1:23:09", "remaining_time": "3:01:38", "throughput": 8682.81, "total_tokens": 43318752} +{"current_steps": 64275, "total_steps": 204665, "loss": 0.0944, "lr": 1.7336277768386992e-06, "epoch": 1.5702489433953044, "percentage": 31.4, "elapsed_time": "1:23:09", "remaining_time": "3:01:37", "throughput": 8682.82, "total_tokens": 43321760} +{"current_steps": 64280, "total_steps": 204665, "loss": 0.1143, "lr": 1.7335698236478065e-06, "epoch": 1.5703710942271516, "percentage": 31.41, "elapsed_time": "1:23:09", "remaining_time": "3:01:37", "throughput": 8682.85, "total_tokens": 43324896} +{"current_steps": 64285, "total_steps": 204665, "loss": 0.0621, "lr": 1.733511865122197e-06, "epoch": 1.5704932450589988, "percentage": 31.41, "elapsed_time": "1:23:10", "remaining_time": "3:01:36", "throughput": 8682.95, "total_tokens": 43328480} +{"current_steps": 64290, "total_steps": 204665, "loss": 0.0431, "lr": 1.7334539012622918e-06, "epoch": 1.570615395890846, "percentage": 31.41, "elapsed_time": "1:23:10", "remaining_time": "3:01:36", "throughput": 8682.99, "total_tokens": 43331680} +{"current_steps": 64295, "total_steps": 204665, "loss": 0.2625, "lr": 1.7333959320685125e-06, "epoch": 1.5707375467226932, "percentage": 31.41, "elapsed_time": "1:23:10", "remaining_time": "3:01:35", "throughput": 8683.03, "total_tokens": 43334880} +{"current_steps": 64300, "total_steps": 204665, "loss": 0.0224, "lr": 1.7333379575412809e-06, "epoch": 1.5708596975545404, "percentage": 31.42, "elapsed_time": "1:23:11", "remaining_time": "3:01:35", "throughput": 8683.1, "total_tokens": 43338208} +{"current_steps": 64305, "total_steps": 204665, "loss": 0.0158, "lr": 1.7332799776810184e-06, "epoch": 1.5709818483863875, "percentage": 31.42, "elapsed_time": "1:23:11", "remaining_time": "3:01:34", "throughput": 8683.11, "total_tokens": 43341280} +{"current_steps": 64310, "total_steps": 204665, "loss": 0.1509, "lr": 1.7332219924881465e-06, "epoch": 1.5711039992182347, "percentage": 31.42, "elapsed_time": "1:23:11", "remaining_time": "3:01:34", "throughput": 8683.22, "total_tokens": 43344864} +{"current_steps": 64315, "total_steps": 204665, "loss": 0.0489, "lr": 1.7331640019630874e-06, "epoch": 1.571226150050082, "percentage": 31.42, "elapsed_time": "1:23:12", "remaining_time": "3:01:34", "throughput": 8683.28, "total_tokens": 43348192} +{"current_steps": 64320, "total_steps": 204665, "loss": 0.1044, "lr": 1.733106006106262e-06, "epoch": 1.5713483008819291, "percentage": 31.43, "elapsed_time": "1:23:12", "remaining_time": "3:01:33", "throughput": 8683.38, "total_tokens": 43351776} +{"current_steps": 64325, "total_steps": 204665, "loss": 0.1271, "lr": 1.7330480049180927e-06, "epoch": 1.5714704517137763, "percentage": 31.43, "elapsed_time": "1:23:12", "remaining_time": "3:01:33", "throughput": 8683.54, "total_tokens": 43355744} +{"current_steps": 64330, "total_steps": 204665, "loss": 0.0984, "lr": 1.7329899983990013e-06, "epoch": 1.5715926025456235, "percentage": 31.43, "elapsed_time": "1:23:13", "remaining_time": "3:01:32", "throughput": 8683.65, "total_tokens": 43359392} +{"current_steps": 64335, "total_steps": 204665, "loss": 0.1279, "lr": 1.7329319865494094e-06, "epoch": 1.5717147533774705, "percentage": 31.43, "elapsed_time": "1:23:13", "remaining_time": "3:01:32", "throughput": 8683.74, "total_tokens": 43362912} +{"current_steps": 64340, "total_steps": 204665, "loss": 0.0015, "lr": 1.7328739693697389e-06, "epoch": 1.5718369042093177, "percentage": 31.44, "elapsed_time": "1:23:13", "remaining_time": "3:01:31", "throughput": 8683.77, "total_tokens": 43366048} +{"current_steps": 64345, "total_steps": 204665, "loss": 0.062, "lr": 1.7328159468604118e-06, "epoch": 1.5719590550411648, "percentage": 31.44, "elapsed_time": "1:23:14", "remaining_time": "3:01:31", "throughput": 8683.77, "total_tokens": 43368992} +{"current_steps": 64350, "total_steps": 204665, "loss": 0.0844, "lr": 1.73275791902185e-06, "epoch": 1.572081205873012, "percentage": 31.44, "elapsed_time": "1:23:14", "remaining_time": "3:01:30", "throughput": 8683.87, "total_tokens": 43372576} +{"current_steps": 64355, "total_steps": 204665, "loss": 0.0953, "lr": 1.7326998858544757e-06, "epoch": 1.572203356704859, "percentage": 31.44, "elapsed_time": "1:23:14", "remaining_time": "3:01:30", "throughput": 8683.95, "total_tokens": 43375968} +{"current_steps": 64360, "total_steps": 204665, "loss": 0.0172, "lr": 1.7326418473587108e-06, "epoch": 1.5723255075367062, "percentage": 31.45, "elapsed_time": "1:23:15", "remaining_time": "3:01:29", "throughput": 8683.97, "total_tokens": 43379040} +{"current_steps": 64365, "total_steps": 204665, "loss": 0.128, "lr": 1.732583803534977e-06, "epoch": 1.5724476583685534, "percentage": 31.45, "elapsed_time": "1:23:15", "remaining_time": "3:01:29", "throughput": 8684.05, "total_tokens": 43382496} +{"current_steps": 64370, "total_steps": 204665, "loss": 0.0183, "lr": 1.732525754383697e-06, "epoch": 1.5725698092004006, "percentage": 31.45, "elapsed_time": "1:23:15", "remaining_time": "3:01:28", "throughput": 8684.1, "total_tokens": 43385696} +{"current_steps": 64375, "total_steps": 204665, "loss": 0.0768, "lr": 1.7324676999052925e-06, "epoch": 1.5726919600322478, "percentage": 31.45, "elapsed_time": "1:23:16", "remaining_time": "3:01:28", "throughput": 8684.23, "total_tokens": 43389472} +{"current_steps": 64380, "total_steps": 204665, "loss": 0.0202, "lr": 1.7324096401001862e-06, "epoch": 1.572814110864095, "percentage": 31.46, "elapsed_time": "1:23:16", "remaining_time": "3:01:27", "throughput": 8684.34, "total_tokens": 43393120} +{"current_steps": 64385, "total_steps": 204665, "loss": 0.0321, "lr": 1.7323515749687997e-06, "epoch": 1.5729362616959421, "percentage": 31.46, "elapsed_time": "1:23:17", "remaining_time": "3:01:27", "throughput": 8684.35, "total_tokens": 43396128} +{"current_steps": 64390, "total_steps": 204665, "loss": 0.0721, "lr": 1.7322935045115557e-06, "epoch": 1.5730584125277893, "percentage": 31.46, "elapsed_time": "1:23:17", "remaining_time": "3:01:26", "throughput": 8684.47, "total_tokens": 43399840} +{"current_steps": 64395, "total_steps": 204665, "loss": 0.0916, "lr": 1.732235428728876e-06, "epoch": 1.5731805633596365, "percentage": 31.46, "elapsed_time": "1:23:17", "remaining_time": "3:01:26", "throughput": 8684.49, "total_tokens": 43402912} +{"current_steps": 64400, "total_steps": 204665, "loss": 0.18, "lr": 1.732177347621184e-06, "epoch": 1.5733027141914837, "percentage": 31.47, "elapsed_time": "1:23:18", "remaining_time": "3:01:25", "throughput": 8684.49, "total_tokens": 43405856} +{"current_steps": 64405, "total_steps": 204665, "loss": 0.0016, "lr": 1.7321192611889008e-06, "epoch": 1.573424865023331, "percentage": 31.47, "elapsed_time": "1:23:18", "remaining_time": "3:01:25", "throughput": 8684.56, "total_tokens": 43409248} +{"current_steps": 64410, "total_steps": 204665, "loss": 0.0976, "lr": 1.7320611694324497e-06, "epoch": 1.573547015855178, "percentage": 31.47, "elapsed_time": "1:23:18", "remaining_time": "3:01:25", "throughput": 8684.73, "total_tokens": 43413216} +{"current_steps": 64415, "total_steps": 204665, "loss": 0.0812, "lr": 1.7320030723522527e-06, "epoch": 1.5736691666870253, "percentage": 31.47, "elapsed_time": "1:23:19", "remaining_time": "3:01:24", "throughput": 8684.76, "total_tokens": 43416352} +{"current_steps": 64420, "total_steps": 204665, "loss": 0.0454, "lr": 1.7319449699487327e-06, "epoch": 1.5737913175188725, "percentage": 31.48, "elapsed_time": "1:23:19", "remaining_time": "3:01:24", "throughput": 8684.79, "total_tokens": 43419488} +{"current_steps": 64425, "total_steps": 204665, "loss": 0.1577, "lr": 1.731886862222312e-06, "epoch": 1.5739134683507194, "percentage": 31.48, "elapsed_time": "1:23:19", "remaining_time": "3:01:23", "throughput": 8684.96, "total_tokens": 43423456} +{"current_steps": 64430, "total_steps": 204665, "loss": 0.0902, "lr": 1.7318287491734131e-06, "epoch": 1.5740356191825666, "percentage": 31.48, "elapsed_time": "1:23:20", "remaining_time": "3:01:23", "throughput": 8685.06, "total_tokens": 43427040} +{"current_steps": 64435, "total_steps": 204665, "loss": 0.2328, "lr": 1.7317706308024587e-06, "epoch": 1.5741577700144138, "percentage": 31.48, "elapsed_time": "1:23:20", "remaining_time": "3:01:22", "throughput": 8685.14, "total_tokens": 43430432} +{"current_steps": 64440, "total_steps": 204665, "loss": 0.006, "lr": 1.7317125071098712e-06, "epoch": 1.574279920846261, "percentage": 31.49, "elapsed_time": "1:23:20", "remaining_time": "3:01:22", "throughput": 8685.23, "total_tokens": 43433952} +{"current_steps": 64445, "total_steps": 204665, "loss": 0.1223, "lr": 1.731654378096074e-06, "epoch": 1.574402071678108, "percentage": 31.49, "elapsed_time": "1:23:21", "remaining_time": "3:01:21", "throughput": 8685.24, "total_tokens": 43436896} +{"current_steps": 64450, "total_steps": 204665, "loss": 0.0534, "lr": 1.731596243761489e-06, "epoch": 1.5745242225099552, "percentage": 31.49, "elapsed_time": "1:23:21", "remaining_time": "3:01:21", "throughput": 8685.28, "total_tokens": 43440096} +{"current_steps": 64455, "total_steps": 204665, "loss": 0.0694, "lr": 1.7315381041065396e-06, "epoch": 1.5746463733418024, "percentage": 31.49, "elapsed_time": "1:23:21", "remaining_time": "3:01:20", "throughput": 8685.35, "total_tokens": 43443424} +{"current_steps": 64460, "total_steps": 204665, "loss": 0.0658, "lr": 1.7314799591316483e-06, "epoch": 1.5747685241736495, "percentage": 31.5, "elapsed_time": "1:23:22", "remaining_time": "3:01:20", "throughput": 8685.39, "total_tokens": 43446560} +{"current_steps": 64465, "total_steps": 204665, "loss": 0.1655, "lr": 1.7314218088372378e-06, "epoch": 1.5748906750054967, "percentage": 31.5, "elapsed_time": "1:23:22", "remaining_time": "3:01:19", "throughput": 8685.52, "total_tokens": 43450336} +{"current_steps": 64470, "total_steps": 204665, "loss": 0.1515, "lr": 1.7313636532237315e-06, "epoch": 1.575012825837344, "percentage": 31.5, "elapsed_time": "1:23:22", "remaining_time": "3:01:19", "throughput": 8685.64, "total_tokens": 43453984} +{"current_steps": 64475, "total_steps": 204665, "loss": 0.0834, "lr": 1.7313054922915518e-06, "epoch": 1.5751349766691911, "percentage": 31.5, "elapsed_time": "1:23:23", "remaining_time": "3:01:18", "throughput": 8685.69, "total_tokens": 43457248} +{"current_steps": 64480, "total_steps": 204665, "loss": 0.0865, "lr": 1.7312473260411217e-06, "epoch": 1.5752571275010383, "percentage": 31.51, "elapsed_time": "1:23:23", "remaining_time": "3:01:18", "throughput": 8685.72, "total_tokens": 43460384} +{"current_steps": 64485, "total_steps": 204665, "loss": 0.146, "lr": 1.7311891544728645e-06, "epoch": 1.5753792783328855, "percentage": 31.51, "elapsed_time": "1:23:23", "remaining_time": "3:01:17", "throughput": 8685.73, "total_tokens": 43463392} +{"current_steps": 64490, "total_steps": 204665, "loss": 0.0538, "lr": 1.7311309775872031e-06, "epoch": 1.5755014291647327, "percentage": 31.51, "elapsed_time": "1:23:24", "remaining_time": "3:01:17", "throughput": 8685.77, "total_tokens": 43466528} +{"current_steps": 64495, "total_steps": 204665, "loss": 0.1977, "lr": 1.7310727953845607e-06, "epoch": 1.5756235799965799, "percentage": 31.51, "elapsed_time": "1:23:24", "remaining_time": "3:01:16", "throughput": 8685.85, "total_tokens": 43469984} +{"current_steps": 64500, "total_steps": 204665, "loss": 0.0642, "lr": 1.7310146078653602e-06, "epoch": 1.575745730828427, "percentage": 31.51, "elapsed_time": "1:23:25", "remaining_time": "3:01:16", "throughput": 8685.94, "total_tokens": 43473440} +{"current_steps": 64505, "total_steps": 204665, "loss": 0.0194, "lr": 1.7309564150300248e-06, "epoch": 1.5758678816602743, "percentage": 31.52, "elapsed_time": "1:23:25", "remaining_time": "3:01:15", "throughput": 8686.04, "total_tokens": 43477024} +{"current_steps": 64510, "total_steps": 204665, "loss": 0.0588, "lr": 1.7308982168789779e-06, "epoch": 1.5759900324921212, "percentage": 31.52, "elapsed_time": "1:23:25", "remaining_time": "3:01:15", "throughput": 8686.16, "total_tokens": 43480672} +{"current_steps": 64515, "total_steps": 204665, "loss": 0.1166, "lr": 1.7308400134126427e-06, "epoch": 1.5761121833239684, "percentage": 31.52, "elapsed_time": "1:23:26", "remaining_time": "3:01:15", "throughput": 8686.25, "total_tokens": 43484128} +{"current_steps": 64520, "total_steps": 204665, "loss": 0.0807, "lr": 1.730781804631442e-06, "epoch": 1.5762343341558156, "percentage": 31.52, "elapsed_time": "1:23:26", "remaining_time": "3:01:14", "throughput": 8686.32, "total_tokens": 43487520} +{"current_steps": 64525, "total_steps": 204665, "loss": 0.1008, "lr": 1.7307235905357996e-06, "epoch": 1.5763564849876628, "percentage": 31.53, "elapsed_time": "1:23:26", "remaining_time": "3:01:14", "throughput": 8686.44, "total_tokens": 43491232} +{"current_steps": 64530, "total_steps": 204665, "loss": 0.1098, "lr": 1.7306653711261387e-06, "epoch": 1.57647863581951, "percentage": 31.53, "elapsed_time": "1:23:27", "remaining_time": "3:01:13", "throughput": 8686.45, "total_tokens": 43494240} +{"current_steps": 64535, "total_steps": 204665, "loss": 0.0432, "lr": 1.7306071464028826e-06, "epoch": 1.576600786651357, "percentage": 31.53, "elapsed_time": "1:23:27", "remaining_time": "3:01:13", "throughput": 8686.53, "total_tokens": 43497632} +{"current_steps": 64540, "total_steps": 204665, "loss": 0.0777, "lr": 1.730548916366455e-06, "epoch": 1.5767229374832041, "percentage": 31.53, "elapsed_time": "1:23:27", "remaining_time": "3:01:12", "throughput": 8686.58, "total_tokens": 43500896} +{"current_steps": 64545, "total_steps": 204665, "loss": 0.0021, "lr": 1.730490681017279e-06, "epoch": 1.5768450883150513, "percentage": 31.54, "elapsed_time": "1:23:28", "remaining_time": "3:01:12", "throughput": 8686.66, "total_tokens": 43504288} +{"current_steps": 64550, "total_steps": 204665, "loss": 0.0614, "lr": 1.7304324403557783e-06, "epoch": 1.5769672391468985, "percentage": 31.54, "elapsed_time": "1:23:28", "remaining_time": "3:01:11", "throughput": 8686.81, "total_tokens": 43508192} +{"current_steps": 64555, "total_steps": 204665, "loss": 0.1901, "lr": 1.7303741943823767e-06, "epoch": 1.5770893899787457, "percentage": 31.54, "elapsed_time": "1:23:28", "remaining_time": "3:01:11", "throughput": 8686.92, "total_tokens": 43511776} +{"current_steps": 64560, "total_steps": 204665, "loss": 0.0285, "lr": 1.7303159430974974e-06, "epoch": 1.577211540810593, "percentage": 31.54, "elapsed_time": "1:23:29", "remaining_time": "3:01:10", "throughput": 8686.96, "total_tokens": 43514976} +{"current_steps": 64565, "total_steps": 204665, "loss": 0.1113, "lr": 1.7302576865015642e-06, "epoch": 1.57733369164244, "percentage": 31.55, "elapsed_time": "1:23:29", "remaining_time": "3:01:10", "throughput": 8687.04, "total_tokens": 43518432} +{"current_steps": 64570, "total_steps": 204665, "loss": 0.0049, "lr": 1.7301994245950004e-06, "epoch": 1.5774558424742873, "percentage": 31.55, "elapsed_time": "1:23:29", "remaining_time": "3:01:09", "throughput": 8687.15, "total_tokens": 43522080} +{"current_steps": 64575, "total_steps": 204665, "loss": 0.0845, "lr": 1.7301411573782301e-06, "epoch": 1.5775779933061345, "percentage": 31.55, "elapsed_time": "1:23:30", "remaining_time": "3:01:09", "throughput": 8687.2, "total_tokens": 43525344} +{"current_steps": 64580, "total_steps": 204665, "loss": 0.0425, "lr": 1.7300828848516771e-06, "epoch": 1.5777001441379817, "percentage": 31.55, "elapsed_time": "1:23:30", "remaining_time": "3:01:08", "throughput": 8687.36, "total_tokens": 43529248} +{"current_steps": 64585, "total_steps": 204665, "loss": 0.1888, "lr": 1.730024607015765e-06, "epoch": 1.5778222949698288, "percentage": 31.56, "elapsed_time": "1:23:30", "remaining_time": "3:01:08", "throughput": 8687.39, "total_tokens": 43532384} +{"current_steps": 64590, "total_steps": 204665, "loss": 0.0429, "lr": 1.7299663238709172e-06, "epoch": 1.577944445801676, "percentage": 31.56, "elapsed_time": "1:23:31", "remaining_time": "3:01:07", "throughput": 8687.47, "total_tokens": 43535776} +{"current_steps": 64595, "total_steps": 204665, "loss": 0.0589, "lr": 1.7299080354175584e-06, "epoch": 1.5780665966335232, "percentage": 31.56, "elapsed_time": "1:23:31", "remaining_time": "3:01:07", "throughput": 8687.56, "total_tokens": 43539296} +{"current_steps": 64600, "total_steps": 204665, "loss": 0.0614, "lr": 1.7298497416561118e-06, "epoch": 1.5781887474653702, "percentage": 31.56, "elapsed_time": "1:23:32", "remaining_time": "3:01:07", "throughput": 8687.6, "total_tokens": 43542496} +{"current_steps": 64605, "total_steps": 204665, "loss": 0.0759, "lr": 1.7297914425870017e-06, "epoch": 1.5783108982972174, "percentage": 31.57, "elapsed_time": "1:23:32", "remaining_time": "3:01:06", "throughput": 8687.69, "total_tokens": 43545952} +{"current_steps": 64610, "total_steps": 204665, "loss": 0.057, "lr": 1.7297331382106517e-06, "epoch": 1.5784330491290646, "percentage": 31.57, "elapsed_time": "1:23:32", "remaining_time": "3:01:06", "throughput": 8687.72, "total_tokens": 43549088} +{"current_steps": 64615, "total_steps": 204665, "loss": 0.0591, "lr": 1.7296748285274863e-06, "epoch": 1.5785551999609118, "percentage": 31.57, "elapsed_time": "1:23:33", "remaining_time": "3:01:05", "throughput": 8687.77, "total_tokens": 43552288} +{"current_steps": 64620, "total_steps": 204665, "loss": 0.131, "lr": 1.7296165135379292e-06, "epoch": 1.578677350792759, "percentage": 31.57, "elapsed_time": "1:23:33", "remaining_time": "3:01:05", "throughput": 8687.83, "total_tokens": 43555680} +{"current_steps": 64625, "total_steps": 204665, "loss": 0.0414, "lr": 1.7295581932424045e-06, "epoch": 1.578799501624606, "percentage": 31.58, "elapsed_time": "1:23:33", "remaining_time": "3:01:04", "throughput": 8688.05, "total_tokens": 43559968} +{"current_steps": 64630, "total_steps": 204665, "loss": 0.0777, "lr": 1.7294998676413367e-06, "epoch": 1.578921652456453, "percentage": 31.58, "elapsed_time": "1:23:34", "remaining_time": "3:01:04", "throughput": 8688.13, "total_tokens": 43563360} +{"current_steps": 64635, "total_steps": 204665, "loss": 0.0833, "lr": 1.7294415367351492e-06, "epoch": 1.5790438032883003, "percentage": 31.58, "elapsed_time": "1:23:34", "remaining_time": "3:01:03", "throughput": 8688.22, "total_tokens": 43566880} +{"current_steps": 64640, "total_steps": 204665, "loss": 0.0345, "lr": 1.7293832005242668e-06, "epoch": 1.5791659541201475, "percentage": 31.58, "elapsed_time": "1:23:34", "remaining_time": "3:01:03", "throughput": 8688.29, "total_tokens": 43570272} +{"current_steps": 64645, "total_steps": 204665, "loss": 0.0457, "lr": 1.7293248590091138e-06, "epoch": 1.5792881049519947, "percentage": 31.59, "elapsed_time": "1:23:35", "remaining_time": "3:01:02", "throughput": 8688.34, "total_tokens": 43573536} +{"current_steps": 64650, "total_steps": 204665, "loss": 0.1409, "lr": 1.7292665121901142e-06, "epoch": 1.5794102557838419, "percentage": 31.59, "elapsed_time": "1:23:35", "remaining_time": "3:01:02", "throughput": 8688.37, "total_tokens": 43576672} +{"current_steps": 64655, "total_steps": 204665, "loss": 0.0689, "lr": 1.7292081600676922e-06, "epoch": 1.579532406615689, "percentage": 31.59, "elapsed_time": "1:23:35", "remaining_time": "3:01:01", "throughput": 8688.47, "total_tokens": 43580192} +{"current_steps": 64660, "total_steps": 204665, "loss": 0.0539, "lr": 1.7291498026422724e-06, "epoch": 1.5796545574475362, "percentage": 31.59, "elapsed_time": "1:23:36", "remaining_time": "3:01:01", "throughput": 8688.55, "total_tokens": 43583584} +{"current_steps": 64665, "total_steps": 204665, "loss": 0.238, "lr": 1.7290914399142792e-06, "epoch": 1.5797767082793834, "percentage": 31.6, "elapsed_time": "1:23:36", "remaining_time": "3:01:00", "throughput": 8688.7, "total_tokens": 43587488} +{"current_steps": 64670, "total_steps": 204665, "loss": 0.0774, "lr": 1.729033071884137e-06, "epoch": 1.5798988591112306, "percentage": 31.6, "elapsed_time": "1:23:36", "remaining_time": "3:01:00", "throughput": 8688.71, "total_tokens": 43590432} +{"current_steps": 64675, "total_steps": 204665, "loss": 0.0615, "lr": 1.72897469855227e-06, "epoch": 1.5800210099430778, "percentage": 31.6, "elapsed_time": "1:23:37", "remaining_time": "3:00:59", "throughput": 8688.73, "total_tokens": 43593440} +{"current_steps": 64680, "total_steps": 204665, "loss": 0.0023, "lr": 1.728916319919103e-06, "epoch": 1.580143160774925, "percentage": 31.6, "elapsed_time": "1:23:37", "remaining_time": "3:00:59", "throughput": 8688.79, "total_tokens": 43596768} +{"current_steps": 64685, "total_steps": 204665, "loss": 0.0012, "lr": 1.7288579359850606e-06, "epoch": 1.5802653116067722, "percentage": 31.61, "elapsed_time": "1:23:37", "remaining_time": "3:00:58", "throughput": 8688.88, "total_tokens": 43600288} +{"current_steps": 64690, "total_steps": 204665, "loss": 0.0733, "lr": 1.728799546750567e-06, "epoch": 1.5803874624386192, "percentage": 31.61, "elapsed_time": "1:23:38", "remaining_time": "3:00:58", "throughput": 8689.0, "total_tokens": 43603936} +{"current_steps": 64695, "total_steps": 204665, "loss": 0.0009, "lr": 1.728741152216047e-06, "epoch": 1.5805096132704664, "percentage": 31.61, "elapsed_time": "1:23:38", "remaining_time": "3:00:58", "throughput": 8689.2, "total_tokens": 43608096} +{"current_steps": 64700, "total_steps": 204665, "loss": 0.1137, "lr": 1.7286827523819256e-06, "epoch": 1.5806317641023135, "percentage": 31.61, "elapsed_time": "1:23:39", "remaining_time": "3:00:57", "throughput": 8689.35, "total_tokens": 43612000} +{"current_steps": 64705, "total_steps": 204665, "loss": 0.2981, "lr": 1.7286243472486274e-06, "epoch": 1.5807539149341607, "percentage": 31.62, "elapsed_time": "1:23:39", "remaining_time": "3:00:57", "throughput": 8689.5, "total_tokens": 43615840} +{"current_steps": 64710, "total_steps": 204665, "loss": 0.0367, "lr": 1.7285659368165766e-06, "epoch": 1.580876065766008, "percentage": 31.62, "elapsed_time": "1:23:39", "remaining_time": "3:00:56", "throughput": 8689.59, "total_tokens": 43619296} +{"current_steps": 64715, "total_steps": 204665, "loss": 0.1975, "lr": 1.7285075210861986e-06, "epoch": 1.5809982165978549, "percentage": 31.62, "elapsed_time": "1:23:40", "remaining_time": "3:00:56", "throughput": 8689.63, "total_tokens": 43622496} +{"current_steps": 64720, "total_steps": 204665, "loss": 0.0018, "lr": 1.7284491000579178e-06, "epoch": 1.581120367429702, "percentage": 31.62, "elapsed_time": "1:23:40", "remaining_time": "3:00:55", "throughput": 8689.71, "total_tokens": 43625952} +{"current_steps": 64725, "total_steps": 204665, "loss": 0.0131, "lr": 1.7283906737321592e-06, "epoch": 1.5812425182615493, "percentage": 31.62, "elapsed_time": "1:23:40", "remaining_time": "3:00:55", "throughput": 8689.78, "total_tokens": 43629344} +{"current_steps": 64730, "total_steps": 204665, "loss": 0.1193, "lr": 1.7283322421093478e-06, "epoch": 1.5813646690933965, "percentage": 31.63, "elapsed_time": "1:23:41", "remaining_time": "3:00:54", "throughput": 8689.86, "total_tokens": 43632736} +{"current_steps": 64735, "total_steps": 204665, "loss": 0.0439, "lr": 1.7282738051899084e-06, "epoch": 1.5814868199252436, "percentage": 31.63, "elapsed_time": "1:23:41", "remaining_time": "3:00:54", "throughput": 8689.89, "total_tokens": 43635872} +{"current_steps": 64740, "total_steps": 204665, "loss": 0.0818, "lr": 1.728215362974266e-06, "epoch": 1.5816089707570908, "percentage": 31.63, "elapsed_time": "1:23:41", "remaining_time": "3:00:53", "throughput": 8689.95, "total_tokens": 43639136} +{"current_steps": 64745, "total_steps": 204665, "loss": 0.068, "lr": 1.7281569154628456e-06, "epoch": 1.581731121588938, "percentage": 31.63, "elapsed_time": "1:23:42", "remaining_time": "3:00:53", "throughput": 8689.97, "total_tokens": 43642208} +{"current_steps": 64750, "total_steps": 204665, "loss": 0.0308, "lr": 1.7280984626560725e-06, "epoch": 1.5818532724207852, "percentage": 31.64, "elapsed_time": "1:23:42", "remaining_time": "3:00:52", "throughput": 8690.09, "total_tokens": 43645920} +{"current_steps": 64755, "total_steps": 204665, "loss": 0.106, "lr": 1.728040004554371e-06, "epoch": 1.5819754232526324, "percentage": 31.64, "elapsed_time": "1:23:42", "remaining_time": "3:00:52", "throughput": 8690.22, "total_tokens": 43649696} +{"current_steps": 64760, "total_steps": 204665, "loss": 0.0527, "lr": 1.7279815411581674e-06, "epoch": 1.5820975740844796, "percentage": 31.64, "elapsed_time": "1:23:43", "remaining_time": "3:00:51", "throughput": 8690.35, "total_tokens": 43653408} +{"current_steps": 64765, "total_steps": 204665, "loss": 0.143, "lr": 1.727923072467886e-06, "epoch": 1.5822197249163268, "percentage": 31.64, "elapsed_time": "1:23:43", "remaining_time": "3:00:51", "throughput": 8690.37, "total_tokens": 43656480} +{"current_steps": 64770, "total_steps": 204665, "loss": 0.1607, "lr": 1.727864598483952e-06, "epoch": 1.582341875748174, "percentage": 31.65, "elapsed_time": "1:23:43", "remaining_time": "3:00:50", "throughput": 8690.45, "total_tokens": 43659936} +{"current_steps": 64775, "total_steps": 204665, "loss": 0.0015, "lr": 1.7278061192067913e-06, "epoch": 1.5824640265800212, "percentage": 31.65, "elapsed_time": "1:23:44", "remaining_time": "3:00:50", "throughput": 8690.48, "total_tokens": 43663072} +{"current_steps": 64780, "total_steps": 204665, "loss": 0.1688, "lr": 1.7277476346368284e-06, "epoch": 1.5825861774118681, "percentage": 31.65, "elapsed_time": "1:23:44", "remaining_time": "3:00:50", "throughput": 8690.52, "total_tokens": 43666272} +{"current_steps": 64785, "total_steps": 204665, "loss": 0.0072, "lr": 1.7276891447744888e-06, "epoch": 1.5827083282437153, "percentage": 31.65, "elapsed_time": "1:23:44", "remaining_time": "3:00:49", "throughput": 8690.55, "total_tokens": 43669408} +{"current_steps": 64790, "total_steps": 204665, "loss": 0.0011, "lr": 1.7276306496201983e-06, "epoch": 1.5828304790755625, "percentage": 31.66, "elapsed_time": "1:23:45", "remaining_time": "3:00:49", "throughput": 8690.6, "total_tokens": 43672672} +{"current_steps": 64795, "total_steps": 204665, "loss": 0.1677, "lr": 1.727572149174382e-06, "epoch": 1.5829526299074097, "percentage": 31.66, "elapsed_time": "1:23:45", "remaining_time": "3:00:48", "throughput": 8690.6, "total_tokens": 43675680} +{"current_steps": 64800, "total_steps": 204665, "loss": 0.0625, "lr": 1.727513643437465e-06, "epoch": 1.5830747807392567, "percentage": 31.66, "elapsed_time": "1:23:45", "remaining_time": "3:00:48", "throughput": 8690.63, "total_tokens": 43678816} +{"current_steps": 64805, "total_steps": 204665, "loss": 0.0381, "lr": 1.7274551324098736e-06, "epoch": 1.5831969315711039, "percentage": 31.66, "elapsed_time": "1:23:46", "remaining_time": "3:00:47", "throughput": 8690.71, "total_tokens": 43682208} +{"current_steps": 64810, "total_steps": 204665, "loss": 0.1027, "lr": 1.7273966160920326e-06, "epoch": 1.583319082402951, "percentage": 31.67, "elapsed_time": "1:23:46", "remaining_time": "3:00:47", "throughput": 8690.76, "total_tokens": 43685536} +{"current_steps": 64815, "total_steps": 204665, "loss": 0.0851, "lr": 1.7273380944843678e-06, "epoch": 1.5834412332347982, "percentage": 31.67, "elapsed_time": "1:23:47", "remaining_time": "3:00:46", "throughput": 8690.81, "total_tokens": 43688800} +{"current_steps": 64820, "total_steps": 204665, "loss": 0.048, "lr": 1.727279567587305e-06, "epoch": 1.5835633840666454, "percentage": 31.67, "elapsed_time": "1:23:47", "remaining_time": "3:00:46", "throughput": 8690.85, "total_tokens": 43692000} +{"current_steps": 64825, "total_steps": 204665, "loss": 0.0625, "lr": 1.727221035401269e-06, "epoch": 1.5836855348984926, "percentage": 31.67, "elapsed_time": "1:23:47", "remaining_time": "3:00:45", "throughput": 8690.96, "total_tokens": 43695648} +{"current_steps": 64830, "total_steps": 204665, "loss": 0.0375, "lr": 1.7271624979266864e-06, "epoch": 1.5838076857303398, "percentage": 31.68, "elapsed_time": "1:23:48", "remaining_time": "3:00:45", "throughput": 8690.99, "total_tokens": 43698784} +{"current_steps": 64835, "total_steps": 204665, "loss": 0.0834, "lr": 1.7271039551639826e-06, "epoch": 1.583929836562187, "percentage": 31.68, "elapsed_time": "1:23:48", "remaining_time": "3:00:44", "throughput": 8691.05, "total_tokens": 43702112} +{"current_steps": 64840, "total_steps": 204665, "loss": 0.0592, "lr": 1.727045407113583e-06, "epoch": 1.5840519873940342, "percentage": 31.68, "elapsed_time": "1:23:48", "remaining_time": "3:00:44", "throughput": 8691.15, "total_tokens": 43705696} +{"current_steps": 64845, "total_steps": 204665, "loss": 0.0441, "lr": 1.7269868537759137e-06, "epoch": 1.5841741382258814, "percentage": 31.68, "elapsed_time": "1:23:49", "remaining_time": "3:00:43", "throughput": 8691.24, "total_tokens": 43709216} +{"current_steps": 64850, "total_steps": 204665, "loss": 0.0017, "lr": 1.7269282951514006e-06, "epoch": 1.5842962890577286, "percentage": 31.69, "elapsed_time": "1:23:49", "remaining_time": "3:00:43", "throughput": 8691.31, "total_tokens": 43712544} +{"current_steps": 64855, "total_steps": 204665, "loss": 0.1, "lr": 1.7268697312404694e-06, "epoch": 1.5844184398895758, "percentage": 31.69, "elapsed_time": "1:23:49", "remaining_time": "3:00:42", "throughput": 8691.41, "total_tokens": 43716128} +{"current_steps": 64860, "total_steps": 204665, "loss": 0.096, "lr": 1.726811162043546e-06, "epoch": 1.584540590721423, "percentage": 31.69, "elapsed_time": "1:23:50", "remaining_time": "3:00:42", "throughput": 8691.43, "total_tokens": 43719200} +{"current_steps": 64865, "total_steps": 204665, "loss": 0.0464, "lr": 1.7267525875610562e-06, "epoch": 1.5846627415532701, "percentage": 31.69, "elapsed_time": "1:23:50", "remaining_time": "3:00:41", "throughput": 8691.55, "total_tokens": 43722912} +{"current_steps": 64870, "total_steps": 204665, "loss": 0.0985, "lr": 1.7266940077934262e-06, "epoch": 1.584784892385117, "percentage": 31.7, "elapsed_time": "1:23:50", "remaining_time": "3:00:41", "throughput": 8691.61, "total_tokens": 43726240} +{"current_steps": 64875, "total_steps": 204665, "loss": 0.0261, "lr": 1.726635422741082e-06, "epoch": 1.5849070432169643, "percentage": 31.7, "elapsed_time": "1:23:51", "remaining_time": "3:00:41", "throughput": 8691.66, "total_tokens": 43729504} +{"current_steps": 64880, "total_steps": 204665, "loss": 0.0808, "lr": 1.7265768324044495e-06, "epoch": 1.5850291940488115, "percentage": 31.7, "elapsed_time": "1:23:51", "remaining_time": "3:00:40", "throughput": 8691.74, "total_tokens": 43732960} +{"current_steps": 64885, "total_steps": 204665, "loss": 0.0526, "lr": 1.7265182367839548e-06, "epoch": 1.5851513448806587, "percentage": 31.7, "elapsed_time": "1:23:51", "remaining_time": "3:00:40", "throughput": 8691.77, "total_tokens": 43736096} +{"current_steps": 64890, "total_steps": 204665, "loss": 0.1818, "lr": 1.7264596358800244e-06, "epoch": 1.5852734957125056, "percentage": 31.71, "elapsed_time": "1:23:52", "remaining_time": "3:00:39", "throughput": 8691.87, "total_tokens": 43739680} +{"current_steps": 64895, "total_steps": 204665, "loss": 0.1195, "lr": 1.7264010296930836e-06, "epoch": 1.5853956465443528, "percentage": 31.71, "elapsed_time": "1:23:52", "remaining_time": "3:00:39", "throughput": 8691.95, "total_tokens": 43743136} +{"current_steps": 64900, "total_steps": 204665, "loss": 0.1465, "lr": 1.7263424182235595e-06, "epoch": 1.5855177973762, "percentage": 31.71, "elapsed_time": "1:23:52", "remaining_time": "3:00:38", "throughput": 8691.98, "total_tokens": 43746272} +{"current_steps": 64905, "total_steps": 204665, "loss": 0.1043, "lr": 1.7262838014718777e-06, "epoch": 1.5856399482080472, "percentage": 31.71, "elapsed_time": "1:23:53", "remaining_time": "3:00:38", "throughput": 8692.06, "total_tokens": 43749664} +{"current_steps": 64910, "total_steps": 204665, "loss": 0.0007, "lr": 1.7262251794384648e-06, "epoch": 1.5857620990398944, "percentage": 31.72, "elapsed_time": "1:23:53", "remaining_time": "3:00:37", "throughput": 8692.09, "total_tokens": 43752800} +{"current_steps": 64915, "total_steps": 204665, "loss": 0.0954, "lr": 1.7261665521237472e-06, "epoch": 1.5858842498717416, "percentage": 31.72, "elapsed_time": "1:23:53", "remaining_time": "3:00:37", "throughput": 8692.18, "total_tokens": 43756320} +{"current_steps": 64920, "total_steps": 204665, "loss": 0.1966, "lr": 1.7261079195281512e-06, "epoch": 1.5860064007035888, "percentage": 31.72, "elapsed_time": "1:23:54", "remaining_time": "3:00:36", "throughput": 8692.29, "total_tokens": 43759968} +{"current_steps": 64925, "total_steps": 204665, "loss": 0.0482, "lr": 1.7260492816521032e-06, "epoch": 1.586128551535436, "percentage": 31.72, "elapsed_time": "1:23:54", "remaining_time": "3:00:36", "throughput": 8692.32, "total_tokens": 43763104} +{"current_steps": 64930, "total_steps": 204665, "loss": 0.1171, "lr": 1.7259906384960293e-06, "epoch": 1.5862507023672832, "percentage": 31.73, "elapsed_time": "1:23:55", "remaining_time": "3:00:35", "throughput": 8692.36, "total_tokens": 43766368} +{"current_steps": 64935, "total_steps": 204665, "loss": 0.0357, "lr": 1.7259319900603562e-06, "epoch": 1.5863728531991304, "percentage": 31.73, "elapsed_time": "1:23:55", "remaining_time": "3:00:35", "throughput": 8692.37, "total_tokens": 43769376} +{"current_steps": 64940, "total_steps": 204665, "loss": 0.0418, "lr": 1.7258733363455104e-06, "epoch": 1.5864950040309775, "percentage": 31.73, "elapsed_time": "1:23:55", "remaining_time": "3:00:34", "throughput": 8692.45, "total_tokens": 43772768} +{"current_steps": 64945, "total_steps": 204665, "loss": 0.1548, "lr": 1.7258146773519187e-06, "epoch": 1.5866171548628247, "percentage": 31.73, "elapsed_time": "1:23:56", "remaining_time": "3:00:34", "throughput": 8692.48, "total_tokens": 43775904} +{"current_steps": 64950, "total_steps": 204665, "loss": 0.1765, "lr": 1.725756013080007e-06, "epoch": 1.586739305694672, "percentage": 31.73, "elapsed_time": "1:23:56", "remaining_time": "3:00:33", "throughput": 8692.51, "total_tokens": 43779040} +{"current_steps": 64955, "total_steps": 204665, "loss": 0.003, "lr": 1.7256973435302027e-06, "epoch": 1.586861456526519, "percentage": 31.74, "elapsed_time": "1:23:56", "remaining_time": "3:00:33", "throughput": 8692.57, "total_tokens": 43782368} +{"current_steps": 64960, "total_steps": 204665, "loss": 0.0608, "lr": 1.725638668702932e-06, "epoch": 1.586983607358366, "percentage": 31.74, "elapsed_time": "1:23:57", "remaining_time": "3:00:32", "throughput": 8692.6, "total_tokens": 43785504} +{"current_steps": 64965, "total_steps": 204665, "loss": 0.0023, "lr": 1.725579988598622e-06, "epoch": 1.5871057581902133, "percentage": 31.74, "elapsed_time": "1:23:57", "remaining_time": "3:00:32", "throughput": 8692.64, "total_tokens": 43788640} +{"current_steps": 64970, "total_steps": 204665, "loss": 0.1631, "lr": 1.725521303217699e-06, "epoch": 1.5872279090220605, "percentage": 31.74, "elapsed_time": "1:23:57", "remaining_time": "3:00:31", "throughput": 8692.71, "total_tokens": 43792032} +{"current_steps": 64975, "total_steps": 204665, "loss": 0.0757, "lr": 1.7254626125605898e-06, "epoch": 1.5873500598539076, "percentage": 31.75, "elapsed_time": "1:23:58", "remaining_time": "3:00:31", "throughput": 8692.8, "total_tokens": 43795552} +{"current_steps": 64980, "total_steps": 204665, "loss": 0.0851, "lr": 1.7254039166277213e-06, "epoch": 1.5874722106857546, "percentage": 31.75, "elapsed_time": "1:23:58", "remaining_time": "3:00:31", "throughput": 8693.02, "total_tokens": 43799840} +{"current_steps": 64985, "total_steps": 204665, "loss": 0.0863, "lr": 1.7253452154195206e-06, "epoch": 1.5875943615176018, "percentage": 31.75, "elapsed_time": "1:23:58", "remaining_time": "3:00:30", "throughput": 8693.01, "total_tokens": 43802720} +{"current_steps": 64990, "total_steps": 204665, "loss": 0.001, "lr": 1.7252865089364144e-06, "epoch": 1.587716512349449, "percentage": 31.75, "elapsed_time": "1:23:59", "remaining_time": "3:00:30", "throughput": 8693.29, "total_tokens": 43807456} +{"current_steps": 64995, "total_steps": 204665, "loss": 0.1012, "lr": 1.7252277971788298e-06, "epoch": 1.5878386631812962, "percentage": 31.76, "elapsed_time": "1:23:59", "remaining_time": "3:00:29", "throughput": 8693.32, "total_tokens": 43810656} +{"current_steps": 65000, "total_steps": 204665, "loss": 0.0888, "lr": 1.7251690801471934e-06, "epoch": 1.5879608140131434, "percentage": 31.76, "elapsed_time": "1:23:59", "remaining_time": "3:00:29", "throughput": 8693.39, "total_tokens": 43813984} +{"current_steps": 65005, "total_steps": 204665, "loss": 0.0607, "lr": 1.7251103578419323e-06, "epoch": 1.5880829648449906, "percentage": 31.76, "elapsed_time": "1:24:00", "remaining_time": "3:00:28", "throughput": 8693.47, "total_tokens": 43817504} +{"current_steps": 65010, "total_steps": 204665, "loss": 0.0944, "lr": 1.725051630263474e-06, "epoch": 1.5882051156768378, "percentage": 31.76, "elapsed_time": "1:24:00", "remaining_time": "3:00:28", "throughput": 8693.54, "total_tokens": 43820896} +{"current_steps": 65015, "total_steps": 204665, "loss": 0.1087, "lr": 1.7249928974122448e-06, "epoch": 1.588327266508685, "percentage": 31.77, "elapsed_time": "1:24:00", "remaining_time": "3:00:27", "throughput": 8693.56, "total_tokens": 43823968} +{"current_steps": 65020, "total_steps": 204665, "loss": 0.0211, "lr": 1.7249341592886721e-06, "epoch": 1.5884494173405321, "percentage": 31.77, "elapsed_time": "1:24:01", "remaining_time": "3:00:27", "throughput": 8693.66, "total_tokens": 43827552} +{"current_steps": 65025, "total_steps": 204665, "loss": 0.1244, "lr": 1.7248754158931838e-06, "epoch": 1.5885715681723793, "percentage": 31.77, "elapsed_time": "1:24:01", "remaining_time": "3:00:26", "throughput": 8693.71, "total_tokens": 43830816} +{"current_steps": 65030, "total_steps": 204665, "loss": 0.1491, "lr": 1.724816667226206e-06, "epoch": 1.5886937190042265, "percentage": 31.77, "elapsed_time": "1:24:02", "remaining_time": "3:00:26", "throughput": 8693.79, "total_tokens": 43834272} +{"current_steps": 65035, "total_steps": 204665, "loss": 0.0019, "lr": 1.7247579132881668e-06, "epoch": 1.5888158698360737, "percentage": 31.78, "elapsed_time": "1:24:02", "remaining_time": "3:00:25", "throughput": 8693.86, "total_tokens": 43837664} +{"current_steps": 65040, "total_steps": 204665, "loss": 0.1295, "lr": 1.724699154079493e-06, "epoch": 1.588938020667921, "percentage": 31.78, "elapsed_time": "1:24:02", "remaining_time": "3:00:25", "throughput": 8693.89, "total_tokens": 43840800} +{"current_steps": 65045, "total_steps": 204665, "loss": 0.1518, "lr": 1.724640389600612e-06, "epoch": 1.5890601714997679, "percentage": 31.78, "elapsed_time": "1:24:03", "remaining_time": "3:00:25", "throughput": 8694.05, "total_tokens": 43844768} +{"current_steps": 65050, "total_steps": 204665, "loss": 0.0872, "lr": 1.7245816198519511e-06, "epoch": 1.589182322331615, "percentage": 31.78, "elapsed_time": "1:24:03", "remaining_time": "3:00:24", "throughput": 8694.1, "total_tokens": 43848032} +{"current_steps": 65055, "total_steps": 204665, "loss": 0.0401, "lr": 1.7245228448339383e-06, "epoch": 1.5893044731634622, "percentage": 31.79, "elapsed_time": "1:24:03", "remaining_time": "3:00:24", "throughput": 8694.13, "total_tokens": 43851168} +{"current_steps": 65060, "total_steps": 204665, "loss": 0.1346, "lr": 1.724464064547e-06, "epoch": 1.5894266239953094, "percentage": 31.79, "elapsed_time": "1:24:04", "remaining_time": "3:00:23", "throughput": 8694.13, "total_tokens": 43854112} +{"current_steps": 65065, "total_steps": 204665, "loss": 0.0352, "lr": 1.724405278991564e-06, "epoch": 1.5895487748271566, "percentage": 31.79, "elapsed_time": "1:24:04", "remaining_time": "3:00:23", "throughput": 8694.19, "total_tokens": 43857440} +{"current_steps": 65070, "total_steps": 204665, "loss": 0.0423, "lr": 1.7243464881680583e-06, "epoch": 1.5896709256590036, "percentage": 31.79, "elapsed_time": "1:24:04", "remaining_time": "3:00:22", "throughput": 8694.25, "total_tokens": 43860768} +{"current_steps": 65075, "total_steps": 204665, "loss": 0.0743, "lr": 1.7242876920769102e-06, "epoch": 1.5897930764908508, "percentage": 31.8, "elapsed_time": "1:24:05", "remaining_time": "3:00:22", "throughput": 8694.25, "total_tokens": 43863776} +{"current_steps": 65080, "total_steps": 204665, "loss": 0.0569, "lr": 1.7242288907185469e-06, "epoch": 1.589915227322698, "percentage": 31.8, "elapsed_time": "1:24:05", "remaining_time": "3:00:21", "throughput": 8694.36, "total_tokens": 43867424} +{"current_steps": 65085, "total_steps": 204665, "loss": 0.1019, "lr": 1.7241700840933964e-06, "epoch": 1.5900373781545452, "percentage": 31.8, "elapsed_time": "1:24:05", "remaining_time": "3:00:21", "throughput": 8694.41, "total_tokens": 43870688} +{"current_steps": 65090, "total_steps": 204665, "loss": 0.084, "lr": 1.7241112722018864e-06, "epoch": 1.5901595289863923, "percentage": 31.8, "elapsed_time": "1:24:06", "remaining_time": "3:00:20", "throughput": 8694.43, "total_tokens": 43873696} +{"current_steps": 65095, "total_steps": 204665, "loss": 0.0284, "lr": 1.7240524550444442e-06, "epoch": 1.5902816798182395, "percentage": 31.81, "elapsed_time": "1:24:06", "remaining_time": "3:00:20", "throughput": 8694.44, "total_tokens": 43876704} +{"current_steps": 65100, "total_steps": 204665, "loss": 0.01, "lr": 1.7239936326214978e-06, "epoch": 1.5904038306500867, "percentage": 31.81, "elapsed_time": "1:24:06", "remaining_time": "3:00:19", "throughput": 8694.47, "total_tokens": 43879904} +{"current_steps": 65105, "total_steps": 204665, "loss": 0.0773, "lr": 1.7239348049334754e-06, "epoch": 1.590525981481934, "percentage": 31.81, "elapsed_time": "1:24:07", "remaining_time": "3:00:19", "throughput": 8694.58, "total_tokens": 43883488} +{"current_steps": 65110, "total_steps": 204665, "loss": 0.0556, "lr": 1.7238759719808043e-06, "epoch": 1.590648132313781, "percentage": 31.81, "elapsed_time": "1:24:07", "remaining_time": "3:00:18", "throughput": 8694.6, "total_tokens": 43886560} +{"current_steps": 65115, "total_steps": 204665, "loss": 0.089, "lr": 1.7238171337639122e-06, "epoch": 1.5907702831456283, "percentage": 31.82, "elapsed_time": "1:24:07", "remaining_time": "3:00:18", "throughput": 8694.61, "total_tokens": 43889568} +{"current_steps": 65120, "total_steps": 204665, "loss": 0.1761, "lr": 1.7237582902832273e-06, "epoch": 1.5908924339774755, "percentage": 31.82, "elapsed_time": "1:24:08", "remaining_time": "3:00:17", "throughput": 8694.74, "total_tokens": 43893344} +{"current_steps": 65125, "total_steps": 204665, "loss": 0.1145, "lr": 1.7236994415391774e-06, "epoch": 1.5910145848093227, "percentage": 31.82, "elapsed_time": "1:24:08", "remaining_time": "3:00:17", "throughput": 8694.78, "total_tokens": 43896544} +{"current_steps": 65130, "total_steps": 204665, "loss": 0.1264, "lr": 1.7236405875321904e-06, "epoch": 1.5911367356411699, "percentage": 31.82, "elapsed_time": "1:24:08", "remaining_time": "3:00:16", "throughput": 8694.87, "total_tokens": 43900064} +{"current_steps": 65135, "total_steps": 204665, "loss": 0.0535, "lr": 1.7235817282626947e-06, "epoch": 1.5912588864730168, "percentage": 31.83, "elapsed_time": "1:24:09", "remaining_time": "3:00:16", "throughput": 8694.92, "total_tokens": 43903264} +{"current_steps": 65140, "total_steps": 204665, "loss": 0.0065, "lr": 1.7235228637311179e-06, "epoch": 1.591381037304864, "percentage": 31.83, "elapsed_time": "1:24:09", "remaining_time": "3:00:15", "throughput": 8694.94, "total_tokens": 43906336} +{"current_steps": 65145, "total_steps": 204665, "loss": 0.0023, "lr": 1.723463993937888e-06, "epoch": 1.5915031881367112, "percentage": 31.83, "elapsed_time": "1:24:09", "remaining_time": "3:00:15", "throughput": 8695.0, "total_tokens": 43909664} +{"current_steps": 65150, "total_steps": 204665, "loss": 0.0792, "lr": 1.7234051188834338e-06, "epoch": 1.5916253389685584, "percentage": 31.83, "elapsed_time": "1:24:10", "remaining_time": "3:00:14", "throughput": 8694.99, "total_tokens": 43912608} +{"current_steps": 65155, "total_steps": 204665, "loss": 0.0474, "lr": 1.7233462385681828e-06, "epoch": 1.5917474898004056, "percentage": 31.83, "elapsed_time": "1:24:10", "remaining_time": "3:00:14", "throughput": 8695.05, "total_tokens": 43915872} +{"current_steps": 65160, "total_steps": 204665, "loss": 0.2247, "lr": 1.723287352992563e-06, "epoch": 1.5918696406322526, "percentage": 31.84, "elapsed_time": "1:24:11", "remaining_time": "3:00:14", "throughput": 8695.09, "total_tokens": 43919072} +{"current_steps": 65165, "total_steps": 204665, "loss": 0.0595, "lr": 1.7232284621570037e-06, "epoch": 1.5919917914640997, "percentage": 31.84, "elapsed_time": "1:24:11", "remaining_time": "3:00:13", "throughput": 8695.13, "total_tokens": 43922272} +{"current_steps": 65170, "total_steps": 204665, "loss": 0.0626, "lr": 1.7231695660619323e-06, "epoch": 1.592113942295947, "percentage": 31.84, "elapsed_time": "1:24:11", "remaining_time": "3:00:13", "throughput": 8695.15, "total_tokens": 43925344} +{"current_steps": 65175, "total_steps": 204665, "loss": 0.0018, "lr": 1.723110664707777e-06, "epoch": 1.5922360931277941, "percentage": 31.84, "elapsed_time": "1:24:12", "remaining_time": "3:00:12", "throughput": 8695.2, "total_tokens": 43928608} +{"current_steps": 65180, "total_steps": 204665, "loss": 0.1544, "lr": 1.7230517580949666e-06, "epoch": 1.5923582439596413, "percentage": 31.85, "elapsed_time": "1:24:12", "remaining_time": "3:00:12", "throughput": 8695.26, "total_tokens": 43931936} +{"current_steps": 65185, "total_steps": 204665, "loss": 0.0791, "lr": 1.7229928462239296e-06, "epoch": 1.5924803947914885, "percentage": 31.85, "elapsed_time": "1:24:12", "remaining_time": "3:00:11", "throughput": 8695.35, "total_tokens": 43935456} +{"current_steps": 65190, "total_steps": 204665, "loss": 0.0047, "lr": 1.7229339290950938e-06, "epoch": 1.5926025456233357, "percentage": 31.85, "elapsed_time": "1:24:13", "remaining_time": "3:00:11", "throughput": 8695.42, "total_tokens": 43938848} +{"current_steps": 65195, "total_steps": 204665, "loss": 0.0549, "lr": 1.7228750067088882e-06, "epoch": 1.5927246964551829, "percentage": 31.85, "elapsed_time": "1:24:13", "remaining_time": "3:00:10", "throughput": 8695.52, "total_tokens": 43942432} +{"current_steps": 65200, "total_steps": 204665, "loss": 0.1257, "lr": 1.7228160790657414e-06, "epoch": 1.59284684728703, "percentage": 31.86, "elapsed_time": "1:24:13", "remaining_time": "3:00:10", "throughput": 8695.59, "total_tokens": 43945824} +{"current_steps": 65205, "total_steps": 204665, "loss": 0.008, "lr": 1.722757146166081e-06, "epoch": 1.5929689981188773, "percentage": 31.86, "elapsed_time": "1:24:14", "remaining_time": "3:00:09", "throughput": 8695.68, "total_tokens": 43949344} +{"current_steps": 65210, "total_steps": 204665, "loss": 0.1465, "lr": 1.7226982080103367e-06, "epoch": 1.5930911489507245, "percentage": 31.86, "elapsed_time": "1:24:14", "remaining_time": "3:00:09", "throughput": 8695.79, "total_tokens": 43952992} +{"current_steps": 65215, "total_steps": 204665, "loss": 0.1148, "lr": 1.7226392645989365e-06, "epoch": 1.5932132997825716, "percentage": 31.86, "elapsed_time": "1:24:14", "remaining_time": "3:00:08", "throughput": 8695.86, "total_tokens": 43956320} +{"current_steps": 65220, "total_steps": 204665, "loss": 0.0726, "lr": 1.7225803159323094e-06, "epoch": 1.5933354506144188, "percentage": 31.87, "elapsed_time": "1:24:15", "remaining_time": "3:00:08", "throughput": 8695.87, "total_tokens": 43959328} +{"current_steps": 65225, "total_steps": 204665, "loss": 0.0435, "lr": 1.7225213620108835e-06, "epoch": 1.5934576014462658, "percentage": 31.87, "elapsed_time": "1:24:15", "remaining_time": "3:00:07", "throughput": 8695.91, "total_tokens": 43962528} +{"current_steps": 65230, "total_steps": 204665, "loss": 0.0547, "lr": 1.7224624028350885e-06, "epoch": 1.593579752278113, "percentage": 31.87, "elapsed_time": "1:24:15", "remaining_time": "3:00:07", "throughput": 8695.93, "total_tokens": 43965600} +{"current_steps": 65235, "total_steps": 204665, "loss": 0.0571, "lr": 1.722403438405352e-06, "epoch": 1.5937019031099602, "percentage": 31.87, "elapsed_time": "1:24:16", "remaining_time": "3:00:06", "throughput": 8696.0, "total_tokens": 43968992} +{"current_steps": 65240, "total_steps": 204665, "loss": 0.0006, "lr": 1.7223444687221038e-06, "epoch": 1.5938240539418074, "percentage": 31.88, "elapsed_time": "1:24:16", "remaining_time": "3:00:06", "throughput": 8696.12, "total_tokens": 43972704} +{"current_steps": 65245, "total_steps": 204665, "loss": 0.1366, "lr": 1.722285493785772e-06, "epoch": 1.5939462047736546, "percentage": 31.88, "elapsed_time": "1:24:16", "remaining_time": "3:00:06", "throughput": 8696.16, "total_tokens": 43975904} +{"current_steps": 65250, "total_steps": 204665, "loss": 0.1517, "lr": 1.722226513596786e-06, "epoch": 1.5940683556055015, "percentage": 31.88, "elapsed_time": "1:24:17", "remaining_time": "3:00:05", "throughput": 8696.19, "total_tokens": 43979040} +{"current_steps": 65255, "total_steps": 204665, "loss": 0.0374, "lr": 1.7221675281555745e-06, "epoch": 1.5941905064373487, "percentage": 31.88, "elapsed_time": "1:24:17", "remaining_time": "3:00:05", "throughput": 8696.28, "total_tokens": 43982624} +{"current_steps": 65260, "total_steps": 204665, "loss": 0.0302, "lr": 1.7221085374625665e-06, "epoch": 1.594312657269196, "percentage": 31.89, "elapsed_time": "1:24:17", "remaining_time": "3:00:04", "throughput": 8696.33, "total_tokens": 43985888} +{"current_steps": 65265, "total_steps": 204665, "loss": 0.212, "lr": 1.7220495415181913e-06, "epoch": 1.594434808101043, "percentage": 31.89, "elapsed_time": "1:24:18", "remaining_time": "3:00:04", "throughput": 8696.41, "total_tokens": 43989344} +{"current_steps": 65270, "total_steps": 204665, "loss": 0.0446, "lr": 1.721990540322877e-06, "epoch": 1.5945569589328903, "percentage": 31.89, "elapsed_time": "1:24:18", "remaining_time": "3:00:03", "throughput": 8696.47, "total_tokens": 43992608} +{"current_steps": 65275, "total_steps": 204665, "loss": 0.0013, "lr": 1.7219315338770536e-06, "epoch": 1.5946791097647375, "percentage": 31.89, "elapsed_time": "1:24:19", "remaining_time": "3:00:03", "throughput": 8696.49, "total_tokens": 43995680} +{"current_steps": 65280, "total_steps": 204665, "loss": 0.086, "lr": 1.7218725221811501e-06, "epoch": 1.5948012605965847, "percentage": 31.9, "elapsed_time": "1:24:19", "remaining_time": "3:00:02", "throughput": 8696.59, "total_tokens": 43999264} +{"current_steps": 65285, "total_steps": 204665, "loss": 0.1713, "lr": 1.7218135052355954e-06, "epoch": 1.5949234114284319, "percentage": 31.9, "elapsed_time": "1:24:19", "remaining_time": "3:00:02", "throughput": 8696.81, "total_tokens": 44003552} +{"current_steps": 65290, "total_steps": 204665, "loss": 0.0031, "lr": 1.7217544830408187e-06, "epoch": 1.595045562260279, "percentage": 31.9, "elapsed_time": "1:24:20", "remaining_time": "3:00:01", "throughput": 8696.84, "total_tokens": 44006688} +{"current_steps": 65295, "total_steps": 204665, "loss": 0.0326, "lr": 1.7216954555972492e-06, "epoch": 1.5951677130921262, "percentage": 31.9, "elapsed_time": "1:24:20", "remaining_time": "3:00:01", "throughput": 8696.94, "total_tokens": 44010336} +{"current_steps": 65300, "total_steps": 204665, "loss": 0.0907, "lr": 1.7216364229053162e-06, "epoch": 1.5952898639239734, "percentage": 31.91, "elapsed_time": "1:24:20", "remaining_time": "3:00:00", "throughput": 8696.97, "total_tokens": 44013472} +{"current_steps": 65305, "total_steps": 204665, "loss": 0.1265, "lr": 1.721577384965449e-06, "epoch": 1.5954120147558206, "percentage": 31.91, "elapsed_time": "1:24:21", "remaining_time": "3:00:00", "throughput": 8697.05, "total_tokens": 44016928} +{"current_steps": 65310, "total_steps": 204665, "loss": 0.0761, "lr": 1.7215183417780771e-06, "epoch": 1.5955341655876678, "percentage": 31.91, "elapsed_time": "1:24:21", "remaining_time": "2:59:59", "throughput": 8697.15, "total_tokens": 44020512} +{"current_steps": 65315, "total_steps": 204665, "loss": 0.0912, "lr": 1.7214592933436298e-06, "epoch": 1.5956563164195148, "percentage": 31.91, "elapsed_time": "1:24:21", "remaining_time": "2:59:59", "throughput": 8697.21, "total_tokens": 44023840} +{"current_steps": 65320, "total_steps": 204665, "loss": 0.2207, "lr": 1.7214002396625365e-06, "epoch": 1.595778467251362, "percentage": 31.92, "elapsed_time": "1:24:22", "remaining_time": "2:59:58", "throughput": 8697.29, "total_tokens": 44027296} +{"current_steps": 65325, "total_steps": 204665, "loss": 0.0872, "lr": 1.7213411807352265e-06, "epoch": 1.5959006180832092, "percentage": 31.92, "elapsed_time": "1:24:22", "remaining_time": "2:59:58", "throughput": 8697.33, "total_tokens": 44030496} +{"current_steps": 65330, "total_steps": 204665, "loss": 0.0273, "lr": 1.7212821165621295e-06, "epoch": 1.5960227689150563, "percentage": 31.92, "elapsed_time": "1:24:22", "remaining_time": "2:59:58", "throughput": 8697.41, "total_tokens": 44033952} +{"current_steps": 65335, "total_steps": 204665, "loss": 0.0352, "lr": 1.7212230471436748e-06, "epoch": 1.5961449197469033, "percentage": 31.92, "elapsed_time": "1:24:23", "remaining_time": "2:59:57", "throughput": 8697.49, "total_tokens": 44037344} +{"current_steps": 65340, "total_steps": 204665, "loss": 0.0852, "lr": 1.7211639724802921e-06, "epoch": 1.5962670705787505, "percentage": 31.93, "elapsed_time": "1:24:23", "remaining_time": "2:59:57", "throughput": 8697.57, "total_tokens": 44040800} +{"current_steps": 65345, "total_steps": 204665, "loss": 0.084, "lr": 1.7211048925724112e-06, "epoch": 1.5963892214105977, "percentage": 31.93, "elapsed_time": "1:24:23", "remaining_time": "2:59:56", "throughput": 8697.57, "total_tokens": 44043744} +{"current_steps": 65350, "total_steps": 204665, "loss": 0.0648, "lr": 1.7210458074204614e-06, "epoch": 1.5965113722424449, "percentage": 31.93, "elapsed_time": "1:24:24", "remaining_time": "2:59:56", "throughput": 8697.62, "total_tokens": 44047008} +{"current_steps": 65355, "total_steps": 204665, "loss": 0.2229, "lr": 1.7209867170248726e-06, "epoch": 1.596633523074292, "percentage": 31.93, "elapsed_time": "1:24:24", "remaining_time": "2:59:55", "throughput": 8697.66, "total_tokens": 44050208} +{"current_steps": 65360, "total_steps": 204665, "loss": 0.0578, "lr": 1.7209276213860747e-06, "epoch": 1.5967556739061393, "percentage": 31.94, "elapsed_time": "1:24:24", "remaining_time": "2:59:55", "throughput": 8697.7, "total_tokens": 44053408} +{"current_steps": 65365, "total_steps": 204665, "loss": 0.0661, "lr": 1.7208685205044971e-06, "epoch": 1.5968778247379865, "percentage": 31.94, "elapsed_time": "1:24:25", "remaining_time": "2:59:54", "throughput": 8697.75, "total_tokens": 44056672} +{"current_steps": 65370, "total_steps": 204665, "loss": 0.1116, "lr": 1.7208094143805695e-06, "epoch": 1.5969999755698336, "percentage": 31.94, "elapsed_time": "1:24:25", "remaining_time": "2:59:54", "throughput": 8697.85, "total_tokens": 44060256} +{"current_steps": 65375, "total_steps": 204665, "loss": 0.1478, "lr": 1.7207503030147222e-06, "epoch": 1.5971221264016808, "percentage": 31.94, "elapsed_time": "1:24:25", "remaining_time": "2:59:53", "throughput": 8697.89, "total_tokens": 44063456} +{"current_steps": 65380, "total_steps": 204665, "loss": 0.0438, "lr": 1.7206911864073848e-06, "epoch": 1.597244277233528, "percentage": 31.94, "elapsed_time": "1:24:26", "remaining_time": "2:59:53", "throughput": 8697.97, "total_tokens": 44066912} +{"current_steps": 65385, "total_steps": 204665, "loss": 0.0284, "lr": 1.720632064558987e-06, "epoch": 1.5973664280653752, "percentage": 31.95, "elapsed_time": "1:24:26", "remaining_time": "2:59:52", "throughput": 8698.07, "total_tokens": 44070432} +{"current_steps": 65390, "total_steps": 204665, "loss": 0.0805, "lr": 1.7205729374699594e-06, "epoch": 1.5974885788972224, "percentage": 31.95, "elapsed_time": "1:24:27", "remaining_time": "2:59:52", "throughput": 8698.15, "total_tokens": 44073888} +{"current_steps": 65395, "total_steps": 204665, "loss": 0.1458, "lr": 1.7205138051407312e-06, "epoch": 1.5976107297290696, "percentage": 31.95, "elapsed_time": "1:24:27", "remaining_time": "2:59:51", "throughput": 8698.16, "total_tokens": 44076896} +{"current_steps": 65400, "total_steps": 204665, "loss": 0.0646, "lr": 1.7204546675717333e-06, "epoch": 1.5977328805609168, "percentage": 31.95, "elapsed_time": "1:24:27", "remaining_time": "2:59:51", "throughput": 8698.22, "total_tokens": 44080224} +{"current_steps": 65405, "total_steps": 204665, "loss": 0.1099, "lr": 1.720395524763395e-06, "epoch": 1.5978550313927637, "percentage": 31.96, "elapsed_time": "1:24:28", "remaining_time": "2:59:50", "throughput": 8698.33, "total_tokens": 44083808} +{"current_steps": 65410, "total_steps": 204665, "loss": 0.084, "lr": 1.7203363767161468e-06, "epoch": 1.597977182224611, "percentage": 31.96, "elapsed_time": "1:24:28", "remaining_time": "2:59:50", "throughput": 8698.36, "total_tokens": 44086944} +{"current_steps": 65415, "total_steps": 204665, "loss": 0.0325, "lr": 1.7202772234304184e-06, "epoch": 1.5980993330564581, "percentage": 31.96, "elapsed_time": "1:24:28", "remaining_time": "2:59:49", "throughput": 8698.36, "total_tokens": 44089888} +{"current_steps": 65420, "total_steps": 204665, "loss": 0.1516, "lr": 1.7202180649066405e-06, "epoch": 1.5982214838883053, "percentage": 31.96, "elapsed_time": "1:24:29", "remaining_time": "2:59:49", "throughput": 8698.42, "total_tokens": 44093216} +{"current_steps": 65425, "total_steps": 204665, "loss": 0.0362, "lr": 1.720158901145243e-06, "epoch": 1.5983436347201523, "percentage": 31.97, "elapsed_time": "1:24:29", "remaining_time": "2:59:49", "throughput": 8698.57, "total_tokens": 44097120} +{"current_steps": 65430, "total_steps": 204665, "loss": 0.1402, "lr": 1.7200997321466563e-06, "epoch": 1.5984657855519995, "percentage": 31.97, "elapsed_time": "1:24:29", "remaining_time": "2:59:48", "throughput": 8698.61, "total_tokens": 44100320} +{"current_steps": 65435, "total_steps": 204665, "loss": 0.0018, "lr": 1.7200405579113108e-06, "epoch": 1.5985879363838467, "percentage": 31.97, "elapsed_time": "1:24:30", "remaining_time": "2:59:48", "throughput": 8698.64, "total_tokens": 44103456} +{"current_steps": 65440, "total_steps": 204665, "loss": 0.0501, "lr": 1.7199813784396366e-06, "epoch": 1.5987100872156939, "percentage": 31.97, "elapsed_time": "1:24:30", "remaining_time": "2:59:47", "throughput": 8698.67, "total_tokens": 44106592} +{"current_steps": 65445, "total_steps": 204665, "loss": 0.0764, "lr": 1.7199221937320645e-06, "epoch": 1.598832238047541, "percentage": 31.98, "elapsed_time": "1:24:30", "remaining_time": "2:59:47", "throughput": 8698.71, "total_tokens": 44109792} +{"current_steps": 65450, "total_steps": 204665, "loss": 0.1109, "lr": 1.7198630037890243e-06, "epoch": 1.5989543888793882, "percentage": 31.98, "elapsed_time": "1:24:31", "remaining_time": "2:59:46", "throughput": 8698.8, "total_tokens": 44113248} +{"current_steps": 65455, "total_steps": 204665, "loss": 0.2167, "lr": 1.7198038086109467e-06, "epoch": 1.5990765397112354, "percentage": 31.98, "elapsed_time": "1:24:31", "remaining_time": "2:59:46", "throughput": 8698.86, "total_tokens": 44116640} +{"current_steps": 65460, "total_steps": 204665, "loss": 0.0713, "lr": 1.7197446081982623e-06, "epoch": 1.5991986905430826, "percentage": 31.98, "elapsed_time": "1:24:31", "remaining_time": "2:59:45", "throughput": 8698.93, "total_tokens": 44120032} +{"current_steps": 65465, "total_steps": 204665, "loss": 0.07, "lr": 1.719685402551401e-06, "epoch": 1.5993208413749298, "percentage": 31.99, "elapsed_time": "1:24:32", "remaining_time": "2:59:45", "throughput": 8699.07, "total_tokens": 44123872} +{"current_steps": 65470, "total_steps": 204665, "loss": 0.0373, "lr": 1.7196261916707947e-06, "epoch": 1.599442992206777, "percentage": 31.99, "elapsed_time": "1:24:32", "remaining_time": "2:59:44", "throughput": 8699.12, "total_tokens": 44127136} +{"current_steps": 65475, "total_steps": 204665, "loss": 0.0014, "lr": 1.7195669755568727e-06, "epoch": 1.5995651430386242, "percentage": 31.99, "elapsed_time": "1:24:32", "remaining_time": "2:59:44", "throughput": 8699.13, "total_tokens": 44130144} +{"current_steps": 65480, "total_steps": 204665, "loss": 0.0558, "lr": 1.7195077542100663e-06, "epoch": 1.5996872938704714, "percentage": 31.99, "elapsed_time": "1:24:33", "remaining_time": "2:59:43", "throughput": 8699.12, "total_tokens": 44133024} +{"current_steps": 65485, "total_steps": 204665, "loss": 0.1076, "lr": 1.7194485276308057e-06, "epoch": 1.5998094447023186, "percentage": 32.0, "elapsed_time": "1:24:33", "remaining_time": "2:59:43", "throughput": 8699.16, "total_tokens": 44136224} +{"current_steps": 65490, "total_steps": 204665, "loss": 0.1035, "lr": 1.7193892958195222e-06, "epoch": 1.5999315955341658, "percentage": 32.0, "elapsed_time": "1:24:33", "remaining_time": "2:59:42", "throughput": 8699.22, "total_tokens": 44139552} +{"current_steps": 65495, "total_steps": 204665, "loss": 0.0076, "lr": 1.719330058776646e-06, "epoch": 1.6000537463660127, "percentage": 32.0, "elapsed_time": "1:24:34", "remaining_time": "2:59:42", "throughput": 8699.27, "total_tokens": 44142816} +{"current_steps": 65500, "total_steps": 204665, "loss": 0.0379, "lr": 1.7192708165026084e-06, "epoch": 1.60017589719786, "percentage": 32.0, "elapsed_time": "1:24:34", "remaining_time": "2:59:41", "throughput": 8699.31, "total_tokens": 44146080} +{"current_steps": 65505, "total_steps": 204665, "loss": 0.1149, "lr": 1.7192115689978398e-06, "epoch": 1.600298048029707, "percentage": 32.01, "elapsed_time": "1:24:35", "remaining_time": "2:59:41", "throughput": 8699.42, "total_tokens": 44149728} +{"current_steps": 65510, "total_steps": 204665, "loss": 0.1016, "lr": 1.7191523162627712e-06, "epoch": 1.6004201988615543, "percentage": 32.01, "elapsed_time": "1:24:35", "remaining_time": "2:59:41", "throughput": 8699.52, "total_tokens": 44153312} +{"current_steps": 65515, "total_steps": 204665, "loss": 0.001, "lr": 1.7190930582978335e-06, "epoch": 1.6005423496934013, "percentage": 32.01, "elapsed_time": "1:24:35", "remaining_time": "2:59:40", "throughput": 8699.63, "total_tokens": 44156960} +{"current_steps": 65520, "total_steps": 204665, "loss": 0.1725, "lr": 1.7190337951034577e-06, "epoch": 1.6006645005252484, "percentage": 32.01, "elapsed_time": "1:24:36", "remaining_time": "2:59:40", "throughput": 8699.71, "total_tokens": 44160416} +{"current_steps": 65525, "total_steps": 204665, "loss": 0.098, "lr": 1.7189745266800748e-06, "epoch": 1.6007866513570956, "percentage": 32.02, "elapsed_time": "1:24:36", "remaining_time": "2:59:39", "throughput": 8699.73, "total_tokens": 44163488} +{"current_steps": 65530, "total_steps": 204665, "loss": 0.1148, "lr": 1.718915253028116e-06, "epoch": 1.6009088021889428, "percentage": 32.02, "elapsed_time": "1:24:36", "remaining_time": "2:59:39", "throughput": 8699.79, "total_tokens": 44166816} +{"current_steps": 65535, "total_steps": 204665, "loss": 0.0477, "lr": 1.7188559741480117e-06, "epoch": 1.60103095302079, "percentage": 32.02, "elapsed_time": "1:24:37", "remaining_time": "2:59:38", "throughput": 8699.94, "total_tokens": 44170720} +{"current_steps": 65540, "total_steps": 204665, "loss": 0.0029, "lr": 1.7187966900401936e-06, "epoch": 1.6011531038526372, "percentage": 32.02, "elapsed_time": "1:24:37", "remaining_time": "2:59:38", "throughput": 8700.0, "total_tokens": 44174112} +{"current_steps": 65545, "total_steps": 204665, "loss": 0.0929, "lr": 1.7187374007050926e-06, "epoch": 1.6012752546844844, "percentage": 32.03, "elapsed_time": "1:24:37", "remaining_time": "2:59:37", "throughput": 8700.05, "total_tokens": 44177376} +{"current_steps": 65550, "total_steps": 204665, "loss": 0.0586, "lr": 1.7186781061431398e-06, "epoch": 1.6013974055163316, "percentage": 32.03, "elapsed_time": "1:24:38", "remaining_time": "2:59:37", "throughput": 8700.09, "total_tokens": 44180512} +{"current_steps": 65555, "total_steps": 204665, "loss": 0.1894, "lr": 1.7186188063547666e-06, "epoch": 1.6015195563481788, "percentage": 32.03, "elapsed_time": "1:24:38", "remaining_time": "2:59:36", "throughput": 8700.18, "total_tokens": 44184032} +{"current_steps": 65560, "total_steps": 204665, "loss": 0.0665, "lr": 1.7185595013404044e-06, "epoch": 1.601641707180026, "percentage": 32.03, "elapsed_time": "1:24:38", "remaining_time": "2:59:36", "throughput": 8700.21, "total_tokens": 44187168} +{"current_steps": 65565, "total_steps": 204665, "loss": 0.0818, "lr": 1.718500191100484e-06, "epoch": 1.6017638580118732, "percentage": 32.04, "elapsed_time": "1:24:39", "remaining_time": "2:59:35", "throughput": 8700.24, "total_tokens": 44190304} +{"current_steps": 65570, "total_steps": 204665, "loss": 0.1852, "lr": 1.718440875635437e-06, "epoch": 1.6018860088437203, "percentage": 32.04, "elapsed_time": "1:24:39", "remaining_time": "2:59:35", "throughput": 8700.3, "total_tokens": 44193632} +{"current_steps": 65575, "total_steps": 204665, "loss": 0.0989, "lr": 1.7183815549456946e-06, "epoch": 1.6020081596755675, "percentage": 32.04, "elapsed_time": "1:24:39", "remaining_time": "2:59:34", "throughput": 8700.36, "total_tokens": 44196960} +{"current_steps": 65580, "total_steps": 204665, "loss": 0.0721, "lr": 1.7183222290316883e-06, "epoch": 1.6021303105074145, "percentage": 32.04, "elapsed_time": "1:24:40", "remaining_time": "2:59:34", "throughput": 8700.42, "total_tokens": 44200288} +{"current_steps": 65585, "total_steps": 204665, "loss": 0.1025, "lr": 1.7182628978938498e-06, "epoch": 1.6022524613392617, "percentage": 32.05, "elapsed_time": "1:24:40", "remaining_time": "2:59:33", "throughput": 8700.48, "total_tokens": 44203616} +{"current_steps": 65590, "total_steps": 204665, "loss": 0.0754, "lr": 1.71820356153261e-06, "epoch": 1.6023746121711089, "percentage": 32.05, "elapsed_time": "1:24:40", "remaining_time": "2:59:33", "throughput": 8700.49, "total_tokens": 44206624} +{"current_steps": 65595, "total_steps": 204665, "loss": 0.0047, "lr": 1.7181442199484009e-06, "epoch": 1.602496763002956, "percentage": 32.05, "elapsed_time": "1:24:41", "remaining_time": "2:59:32", "throughput": 8700.54, "total_tokens": 44209888} +{"current_steps": 65600, "total_steps": 204665, "loss": 0.114, "lr": 1.7180848731416542e-06, "epoch": 1.6026189138348033, "percentage": 32.05, "elapsed_time": "1:24:41", "remaining_time": "2:59:32", "throughput": 8700.75, "total_tokens": 44214176} +{"current_steps": 65605, "total_steps": 204665, "loss": 0.0821, "lr": 1.7180255211128007e-06, "epoch": 1.6027410646666502, "percentage": 32.05, "elapsed_time": "1:24:41", "remaining_time": "2:59:32", "throughput": 8700.79, "total_tokens": 44217376} +{"current_steps": 65610, "total_steps": 204665, "loss": 0.0539, "lr": 1.7179661638622726e-06, "epoch": 1.6028632154984974, "percentage": 32.06, "elapsed_time": "1:24:42", "remaining_time": "2:59:31", "throughput": 8700.82, "total_tokens": 44220512} +{"current_steps": 65615, "total_steps": 204665, "loss": 0.1417, "lr": 1.7179068013905014e-06, "epoch": 1.6029853663303446, "percentage": 32.06, "elapsed_time": "1:24:42", "remaining_time": "2:59:31", "throughput": 8700.88, "total_tokens": 44223840} +{"current_steps": 65620, "total_steps": 204665, "loss": 0.0381, "lr": 1.717847433697919e-06, "epoch": 1.6031075171621918, "percentage": 32.06, "elapsed_time": "1:24:43", "remaining_time": "2:59:30", "throughput": 8700.94, "total_tokens": 44227168} +{"current_steps": 65625, "total_steps": 204665, "loss": 0.1578, "lr": 1.7177880607849568e-06, "epoch": 1.603229667994039, "percentage": 32.06, "elapsed_time": "1:24:43", "remaining_time": "2:59:30", "throughput": 8701.05, "total_tokens": 44230752} +{"current_steps": 65630, "total_steps": 204665, "loss": 0.09, "lr": 1.717728682652047e-06, "epoch": 1.6033518188258862, "percentage": 32.07, "elapsed_time": "1:24:43", "remaining_time": "2:59:29", "throughput": 8701.08, "total_tokens": 44233888} +{"current_steps": 65635, "total_steps": 204665, "loss": 0.004, "lr": 1.717669299299621e-06, "epoch": 1.6034739696577334, "percentage": 32.07, "elapsed_time": "1:24:44", "remaining_time": "2:59:29", "throughput": 8701.15, "total_tokens": 44237280} +{"current_steps": 65640, "total_steps": 204665, "loss": 0.1614, "lr": 1.7176099107281106e-06, "epoch": 1.6035961204895806, "percentage": 32.07, "elapsed_time": "1:24:44", "remaining_time": "2:59:28", "throughput": 8701.16, "total_tokens": 44240288} +{"current_steps": 65645, "total_steps": 204665, "loss": 0.0475, "lr": 1.7175505169379483e-06, "epoch": 1.6037182713214277, "percentage": 32.07, "elapsed_time": "1:24:44", "remaining_time": "2:59:28", "throughput": 8701.27, "total_tokens": 44243936} +{"current_steps": 65650, "total_steps": 204665, "loss": 0.0748, "lr": 1.7174911179295654e-06, "epoch": 1.603840422153275, "percentage": 32.08, "elapsed_time": "1:24:45", "remaining_time": "2:59:27", "throughput": 8701.32, "total_tokens": 44247200} +{"current_steps": 65655, "total_steps": 204665, "loss": 0.0416, "lr": 1.7174317137033944e-06, "epoch": 1.6039625729851221, "percentage": 32.08, "elapsed_time": "1:24:45", "remaining_time": "2:59:27", "throughput": 8701.41, "total_tokens": 44250720} +{"current_steps": 65660, "total_steps": 204665, "loss": 0.0533, "lr": 1.7173723042598667e-06, "epoch": 1.6040847238169693, "percentage": 32.08, "elapsed_time": "1:24:45", "remaining_time": "2:59:26", "throughput": 8701.44, "total_tokens": 44253792} +{"current_steps": 65665, "total_steps": 204665, "loss": 0.0099, "lr": 1.7173128895994148e-06, "epoch": 1.6042068746488165, "percentage": 32.08, "elapsed_time": "1:24:46", "remaining_time": "2:59:26", "throughput": 8701.5, "total_tokens": 44257184} +{"current_steps": 65670, "total_steps": 204665, "loss": 0.2448, "lr": 1.7172534697224708e-06, "epoch": 1.6043290254806635, "percentage": 32.09, "elapsed_time": "1:24:46", "remaining_time": "2:59:25", "throughput": 8701.59, "total_tokens": 44260640} +{"current_steps": 65675, "total_steps": 204665, "loss": 0.0923, "lr": 1.7171940446294664e-06, "epoch": 1.6044511763125107, "percentage": 32.09, "elapsed_time": "1:24:46", "remaining_time": "2:59:25", "throughput": 8701.66, "total_tokens": 44264032} +{"current_steps": 65680, "total_steps": 204665, "loss": 0.0069, "lr": 1.717134614320834e-06, "epoch": 1.6045733271443579, "percentage": 32.09, "elapsed_time": "1:24:47", "remaining_time": "2:59:24", "throughput": 8701.71, "total_tokens": 44267360} +{"current_steps": 65685, "total_steps": 204665, "loss": 0.1055, "lr": 1.717075178797006e-06, "epoch": 1.604695477976205, "percentage": 32.09, "elapsed_time": "1:24:47", "remaining_time": "2:59:24", "throughput": 8701.72, "total_tokens": 44270368} +{"current_steps": 65690, "total_steps": 204665, "loss": 0.0554, "lr": 1.7170157380584143e-06, "epoch": 1.6048176288080522, "percentage": 32.1, "elapsed_time": "1:24:47", "remaining_time": "2:59:24", "throughput": 8701.75, "total_tokens": 44273504} +{"current_steps": 65695, "total_steps": 204665, "loss": 0.0332, "lr": 1.7169562921054913e-06, "epoch": 1.6049397796398992, "percentage": 32.1, "elapsed_time": "1:24:48", "remaining_time": "2:59:23", "throughput": 8701.84, "total_tokens": 44277024} +{"current_steps": 65700, "total_steps": 204665, "loss": 0.0248, "lr": 1.716896840938669e-06, "epoch": 1.6050619304717464, "percentage": 32.1, "elapsed_time": "1:24:48", "remaining_time": "2:59:23", "throughput": 8701.88, "total_tokens": 44280224} +{"current_steps": 65705, "total_steps": 204665, "loss": 0.0531, "lr": 1.7168373845583805e-06, "epoch": 1.6051840813035936, "percentage": 32.1, "elapsed_time": "1:24:48", "remaining_time": "2:59:22", "throughput": 8701.97, "total_tokens": 44283680} +{"current_steps": 65710, "total_steps": 204665, "loss": 0.0277, "lr": 1.7167779229650576e-06, "epoch": 1.6053062321354408, "percentage": 32.11, "elapsed_time": "1:24:49", "remaining_time": "2:59:22", "throughput": 8702.1, "total_tokens": 44287456} +{"current_steps": 65715, "total_steps": 204665, "loss": 0.0948, "lr": 1.7167184561591328e-06, "epoch": 1.605428382967288, "percentage": 32.11, "elapsed_time": "1:24:49", "remaining_time": "2:59:21", "throughput": 8702.26, "total_tokens": 44291360} +{"current_steps": 65720, "total_steps": 204665, "loss": 0.1253, "lr": 1.7166589841410387e-06, "epoch": 1.6055505337991351, "percentage": 32.11, "elapsed_time": "1:24:49", "remaining_time": "2:59:21", "throughput": 8702.34, "total_tokens": 44294816} +{"current_steps": 65725, "total_steps": 204665, "loss": 0.093, "lr": 1.7165995069112077e-06, "epoch": 1.6056726846309823, "percentage": 32.11, "elapsed_time": "1:24:50", "remaining_time": "2:59:20", "throughput": 8702.38, "total_tokens": 44298016} +{"current_steps": 65730, "total_steps": 204665, "loss": 0.0925, "lr": 1.7165400244700723e-06, "epoch": 1.6057948354628295, "percentage": 32.12, "elapsed_time": "1:24:50", "remaining_time": "2:59:20", "throughput": 8702.44, "total_tokens": 44301344} +{"current_steps": 65735, "total_steps": 204665, "loss": 0.0401, "lr": 1.7164805368180652e-06, "epoch": 1.6059169862946767, "percentage": 32.12, "elapsed_time": "1:24:51", "remaining_time": "2:59:19", "throughput": 8702.55, "total_tokens": 44304992} +{"current_steps": 65740, "total_steps": 204665, "loss": 0.0368, "lr": 1.7164210439556187e-06, "epoch": 1.606039137126524, "percentage": 32.12, "elapsed_time": "1:24:51", "remaining_time": "2:59:19", "throughput": 8702.59, "total_tokens": 44308192} +{"current_steps": 65745, "total_steps": 204665, "loss": 0.1424, "lr": 1.716361545883166e-06, "epoch": 1.606161287958371, "percentage": 32.12, "elapsed_time": "1:24:51", "remaining_time": "2:59:18", "throughput": 8702.68, "total_tokens": 44311712} +{"current_steps": 65750, "total_steps": 204665, "loss": 0.1523, "lr": 1.7163020426011393e-06, "epoch": 1.6062834387902183, "percentage": 32.13, "elapsed_time": "1:24:52", "remaining_time": "2:59:18", "throughput": 8702.77, "total_tokens": 44315232} +{"current_steps": 65755, "total_steps": 204665, "loss": 0.1506, "lr": 1.7162425341099715e-06, "epoch": 1.6064055896220655, "percentage": 32.13, "elapsed_time": "1:24:52", "remaining_time": "2:59:17", "throughput": 8702.82, "total_tokens": 44318560} +{"current_steps": 65760, "total_steps": 204665, "loss": 0.0675, "lr": 1.7161830204100952e-06, "epoch": 1.6065277404539124, "percentage": 32.13, "elapsed_time": "1:24:52", "remaining_time": "2:59:17", "throughput": 8702.92, "total_tokens": 44322144} +{"current_steps": 65765, "total_steps": 204665, "loss": 0.1056, "lr": 1.7161235015019435e-06, "epoch": 1.6066498912857596, "percentage": 32.13, "elapsed_time": "1:24:53", "remaining_time": "2:59:17", "throughput": 8702.98, "total_tokens": 44325472} +{"current_steps": 65770, "total_steps": 204665, "loss": 0.1892, "lr": 1.7160639773859491e-06, "epoch": 1.6067720421176068, "percentage": 32.14, "elapsed_time": "1:24:53", "remaining_time": "2:59:16", "throughput": 8703.02, "total_tokens": 44328672} +{"current_steps": 65775, "total_steps": 204665, "loss": 0.1966, "lr": 1.7160044480625447e-06, "epoch": 1.606894192949454, "percentage": 32.14, "elapsed_time": "1:24:53", "remaining_time": "2:59:16", "throughput": 8703.11, "total_tokens": 44332192} +{"current_steps": 65780, "total_steps": 204665, "loss": 0.0466, "lr": 1.7159449135321636e-06, "epoch": 1.6070163437813012, "percentage": 32.14, "elapsed_time": "1:24:54", "remaining_time": "2:59:15", "throughput": 8703.3, "total_tokens": 44336352} +{"current_steps": 65785, "total_steps": 204665, "loss": 0.092, "lr": 1.7158853737952383e-06, "epoch": 1.6071384946131482, "percentage": 32.14, "elapsed_time": "1:24:54", "remaining_time": "2:59:15", "throughput": 8703.35, "total_tokens": 44339616} +{"current_steps": 65790, "total_steps": 204665, "loss": 0.1559, "lr": 1.715825828852202e-06, "epoch": 1.6072606454449954, "percentage": 32.15, "elapsed_time": "1:24:54", "remaining_time": "2:59:14", "throughput": 8703.36, "total_tokens": 44342624} +{"current_steps": 65795, "total_steps": 204665, "loss": 0.0751, "lr": 1.715766278703488e-06, "epoch": 1.6073827962768426, "percentage": 32.15, "elapsed_time": "1:24:55", "remaining_time": "2:59:14", "throughput": 8703.33, "total_tokens": 44345376} +{"current_steps": 65800, "total_steps": 204665, "loss": 0.0654, "lr": 1.7157067233495289e-06, "epoch": 1.6075049471086897, "percentage": 32.15, "elapsed_time": "1:24:55", "remaining_time": "2:59:13", "throughput": 8703.35, "total_tokens": 44348448} +{"current_steps": 65805, "total_steps": 204665, "loss": 0.0446, "lr": 1.715647162790758e-06, "epoch": 1.607627097940537, "percentage": 32.15, "elapsed_time": "1:24:55", "remaining_time": "2:59:13", "throughput": 8703.45, "total_tokens": 44352032} +{"current_steps": 65810, "total_steps": 204665, "loss": 0.0896, "lr": 1.7155875970276086e-06, "epoch": 1.6077492487723841, "percentage": 32.15, "elapsed_time": "1:24:56", "remaining_time": "2:59:12", "throughput": 8703.47, "total_tokens": 44355104} +{"current_steps": 65815, "total_steps": 204665, "loss": 0.0153, "lr": 1.7155280260605137e-06, "epoch": 1.6078713996042313, "percentage": 32.16, "elapsed_time": "1:24:56", "remaining_time": "2:59:12", "throughput": 8703.5, "total_tokens": 44358240} +{"current_steps": 65820, "total_steps": 204665, "loss": 0.044, "lr": 1.7154684498899063e-06, "epoch": 1.6079935504360785, "percentage": 32.16, "elapsed_time": "1:24:56", "remaining_time": "2:59:11", "throughput": 8703.6, "total_tokens": 44361824} +{"current_steps": 65825, "total_steps": 204665, "loss": 0.0039, "lr": 1.7154088685162203e-06, "epoch": 1.6081157012679257, "percentage": 32.16, "elapsed_time": "1:24:57", "remaining_time": "2:59:11", "throughput": 8703.72, "total_tokens": 44365536} +{"current_steps": 65830, "total_steps": 204665, "loss": 0.0479, "lr": 1.7153492819398881e-06, "epoch": 1.6082378520997729, "percentage": 32.16, "elapsed_time": "1:24:57", "remaining_time": "2:59:10", "throughput": 8703.78, "total_tokens": 44368864} +{"current_steps": 65835, "total_steps": 204665, "loss": 0.0011, "lr": 1.7152896901613439e-06, "epoch": 1.60836000293162, "percentage": 32.17, "elapsed_time": "1:24:58", "remaining_time": "2:59:10", "throughput": 8703.88, "total_tokens": 44372448} +{"current_steps": 65840, "total_steps": 204665, "loss": 0.0603, "lr": 1.7152300931810206e-06, "epoch": 1.6084821537634673, "percentage": 32.17, "elapsed_time": "1:24:58", "remaining_time": "2:59:09", "throughput": 8703.88, "total_tokens": 44375392} +{"current_steps": 65845, "total_steps": 204665, "loss": 0.0437, "lr": 1.7151704909993515e-06, "epoch": 1.6086043045953144, "percentage": 32.17, "elapsed_time": "1:24:58", "remaining_time": "2:59:09", "throughput": 8703.9, "total_tokens": 44378464} +{"current_steps": 65850, "total_steps": 204665, "loss": 0.1134, "lr": 1.7151108836167705e-06, "epoch": 1.6087264554271614, "percentage": 32.17, "elapsed_time": "1:24:59", "remaining_time": "2:59:09", "throughput": 8703.97, "total_tokens": 44381856} +{"current_steps": 65855, "total_steps": 204665, "loss": 0.0011, "lr": 1.7150512710337105e-06, "epoch": 1.6088486062590086, "percentage": 32.18, "elapsed_time": "1:24:59", "remaining_time": "2:59:08", "throughput": 8704.05, "total_tokens": 44385312} +{"current_steps": 65860, "total_steps": 204665, "loss": 0.0016, "lr": 1.7149916532506055e-06, "epoch": 1.6089707570908558, "percentage": 32.18, "elapsed_time": "1:24:59", "remaining_time": "2:59:08", "throughput": 8704.06, "total_tokens": 44388320} +{"current_steps": 65865, "total_steps": 204665, "loss": 0.173, "lr": 1.7149320302678892e-06, "epoch": 1.609092907922703, "percentage": 32.18, "elapsed_time": "1:25:00", "remaining_time": "2:59:07", "throughput": 8704.17, "total_tokens": 44391968} +{"current_steps": 65870, "total_steps": 204665, "loss": 0.0882, "lr": 1.7148724020859943e-06, "epoch": 1.60921505875455, "percentage": 32.18, "elapsed_time": "1:25:00", "remaining_time": "2:59:07", "throughput": 8704.23, "total_tokens": 44395296} +{"current_steps": 65875, "total_steps": 204665, "loss": 0.1272, "lr": 1.7148127687053553e-06, "epoch": 1.6093372095863971, "percentage": 32.19, "elapsed_time": "1:25:00", "remaining_time": "2:59:06", "throughput": 8704.3, "total_tokens": 44398624} +{"current_steps": 65880, "total_steps": 204665, "loss": 0.1108, "lr": 1.7147531301264056e-06, "epoch": 1.6094593604182443, "percentage": 32.19, "elapsed_time": "1:25:01", "remaining_time": "2:59:06", "throughput": 8704.36, "total_tokens": 44402016} +{"current_steps": 65885, "total_steps": 204665, "loss": 0.0632, "lr": 1.7146934863495787e-06, "epoch": 1.6095815112500915, "percentage": 32.19, "elapsed_time": "1:25:01", "remaining_time": "2:59:05", "throughput": 8704.48, "total_tokens": 44405664} +{"current_steps": 65890, "total_steps": 204665, "loss": 0.1824, "lr": 1.714633837375309e-06, "epoch": 1.6097036620819387, "percentage": 32.19, "elapsed_time": "1:25:01", "remaining_time": "2:59:05", "throughput": 8704.54, "total_tokens": 44409056} +{"current_steps": 65895, "total_steps": 204665, "loss": 0.0606, "lr": 1.7145741832040294e-06, "epoch": 1.609825812913786, "percentage": 32.2, "elapsed_time": "1:25:02", "remaining_time": "2:59:04", "throughput": 8704.61, "total_tokens": 44412384} +{"current_steps": 65900, "total_steps": 204665, "loss": 0.2215, "lr": 1.7145145238361743e-06, "epoch": 1.609947963745633, "percentage": 32.2, "elapsed_time": "1:25:02", "remaining_time": "2:59:04", "throughput": 8704.65, "total_tokens": 44415648} +{"current_steps": 65905, "total_steps": 204665, "loss": 0.0994, "lr": 1.7144548592721772e-06, "epoch": 1.6100701145774803, "percentage": 32.2, "elapsed_time": "1:25:02", "remaining_time": "2:59:03", "throughput": 8704.69, "total_tokens": 44418848} +{"current_steps": 65910, "total_steps": 204665, "loss": 0.091, "lr": 1.7143951895124724e-06, "epoch": 1.6101922654093275, "percentage": 32.2, "elapsed_time": "1:25:03", "remaining_time": "2:59:03", "throughput": 8704.68, "total_tokens": 44421664} +{"current_steps": 65915, "total_steps": 204665, "loss": 0.0934, "lr": 1.714335514557494e-06, "epoch": 1.6103144162411747, "percentage": 32.21, "elapsed_time": "1:25:03", "remaining_time": "2:59:02", "throughput": 8704.8, "total_tokens": 44425440} +{"current_steps": 65920, "total_steps": 204665, "loss": 0.0027, "lr": 1.714275834407675e-06, "epoch": 1.6104365670730219, "percentage": 32.21, "elapsed_time": "1:25:03", "remaining_time": "2:59:02", "throughput": 8704.92, "total_tokens": 44429152} +{"current_steps": 65925, "total_steps": 204665, "loss": 0.0914, "lr": 1.71421614906345e-06, "epoch": 1.610558717904869, "percentage": 32.21, "elapsed_time": "1:25:04", "remaining_time": "2:59:01", "throughput": 8704.97, "total_tokens": 44432416} +{"current_steps": 65930, "total_steps": 204665, "loss": 0.0421, "lr": 1.7141564585252534e-06, "epoch": 1.6106808687367162, "percentage": 32.21, "elapsed_time": "1:25:04", "remaining_time": "2:59:01", "throughput": 8705.01, "total_tokens": 44435616} +{"current_steps": 65935, "total_steps": 204665, "loss": 0.1913, "lr": 1.714096762793519e-06, "epoch": 1.6108030195685634, "percentage": 32.22, "elapsed_time": "1:25:04", "remaining_time": "2:59:01", "throughput": 8705.04, "total_tokens": 44438752} +{"current_steps": 65940, "total_steps": 204665, "loss": 0.0242, "lr": 1.7140370618686807e-06, "epoch": 1.6109251704004104, "percentage": 32.22, "elapsed_time": "1:25:05", "remaining_time": "2:59:00", "throughput": 8705.12, "total_tokens": 44442208} +{"current_steps": 65945, "total_steps": 204665, "loss": 0.1016, "lr": 1.7139773557511727e-06, "epoch": 1.6110473212322576, "percentage": 32.22, "elapsed_time": "1:25:05", "remaining_time": "2:59:00", "throughput": 8705.2, "total_tokens": 44445664} +{"current_steps": 65950, "total_steps": 204665, "loss": 0.05, "lr": 1.7139176444414296e-06, "epoch": 1.6111694720641048, "percentage": 32.22, "elapsed_time": "1:25:05", "remaining_time": "2:58:59", "throughput": 8705.29, "total_tokens": 44449184} +{"current_steps": 65955, "total_steps": 204665, "loss": 0.1257, "lr": 1.7138579279398853e-06, "epoch": 1.611291622895952, "percentage": 32.23, "elapsed_time": "1:25:06", "remaining_time": "2:58:59", "throughput": 8705.44, "total_tokens": 44453088} +{"current_steps": 65960, "total_steps": 204665, "loss": 0.0661, "lr": 1.7137982062469737e-06, "epoch": 1.611413773727799, "percentage": 32.23, "elapsed_time": "1:25:06", "remaining_time": "2:58:58", "throughput": 8705.52, "total_tokens": 44456544} +{"current_steps": 65965, "total_steps": 204665, "loss": 0.1086, "lr": 1.7137384793631302e-06, "epoch": 1.6115359245596461, "percentage": 32.23, "elapsed_time": "1:25:07", "remaining_time": "2:58:58", "throughput": 8705.6, "total_tokens": 44460000} +{"current_steps": 65970, "total_steps": 204665, "loss": 0.0199, "lr": 1.7136787472887884e-06, "epoch": 1.6116580753914933, "percentage": 32.23, "elapsed_time": "1:25:07", "remaining_time": "2:58:57", "throughput": 8705.75, "total_tokens": 44463904} +{"current_steps": 65975, "total_steps": 204665, "loss": 0.154, "lr": 1.7136190100243826e-06, "epoch": 1.6117802262233405, "percentage": 32.24, "elapsed_time": "1:25:07", "remaining_time": "2:58:57", "throughput": 8705.78, "total_tokens": 44467040} +{"current_steps": 65980, "total_steps": 204665, "loss": 0.0465, "lr": 1.7135592675703475e-06, "epoch": 1.6119023770551877, "percentage": 32.24, "elapsed_time": "1:25:08", "remaining_time": "2:58:56", "throughput": 8705.78, "total_tokens": 44469984} +{"current_steps": 65985, "total_steps": 204665, "loss": 0.1811, "lr": 1.7134995199271174e-06, "epoch": 1.6120245278870349, "percentage": 32.24, "elapsed_time": "1:25:08", "remaining_time": "2:58:56", "throughput": 8705.83, "total_tokens": 44473248} +{"current_steps": 65990, "total_steps": 204665, "loss": 0.0015, "lr": 1.7134397670951268e-06, "epoch": 1.612146678718882, "percentage": 32.24, "elapsed_time": "1:25:08", "remaining_time": "2:58:55", "throughput": 8705.9, "total_tokens": 44476640} +{"current_steps": 65995, "total_steps": 204665, "loss": 0.1421, "lr": 1.7133800090748106e-06, "epoch": 1.6122688295507293, "percentage": 32.25, "elapsed_time": "1:25:09", "remaining_time": "2:58:55", "throughput": 8705.89, "total_tokens": 44479520} +{"current_steps": 66000, "total_steps": 204665, "loss": 0.1589, "lr": 1.713320245866603e-06, "epoch": 1.6123909803825764, "percentage": 32.25, "elapsed_time": "1:25:09", "remaining_time": "2:58:54", "throughput": 8705.91, "total_tokens": 44482592} +{"current_steps": 66005, "total_steps": 204665, "loss": 0.2063, "lr": 1.7132604774709385e-06, "epoch": 1.6125131312144236, "percentage": 32.25, "elapsed_time": "1:25:09", "remaining_time": "2:58:54", "throughput": 8705.98, "total_tokens": 44485984} +{"current_steps": 66010, "total_steps": 204665, "loss": 0.0966, "lr": 1.7132007038882522e-06, "epoch": 1.6126352820462708, "percentage": 32.25, "elapsed_time": "1:25:10", "remaining_time": "2:58:53", "throughput": 8706.02, "total_tokens": 44489184} +{"current_steps": 66015, "total_steps": 204665, "loss": 0.0427, "lr": 1.7131409251189783e-06, "epoch": 1.612757432878118, "percentage": 32.26, "elapsed_time": "1:25:10", "remaining_time": "2:58:53", "throughput": 8706.11, "total_tokens": 44492704} +{"current_steps": 66020, "total_steps": 204665, "loss": 0.0581, "lr": 1.7130811411635522e-06, "epoch": 1.6128795837099652, "percentage": 32.26, "elapsed_time": "1:25:10", "remaining_time": "2:58:53", "throughput": 8706.15, "total_tokens": 44495904} +{"current_steps": 66025, "total_steps": 204665, "loss": 0.004, "lr": 1.713021352022408e-06, "epoch": 1.6130017345418124, "percentage": 32.26, "elapsed_time": "1:25:11", "remaining_time": "2:58:52", "throughput": 8706.27, "total_tokens": 44499616} +{"current_steps": 66030, "total_steps": 204665, "loss": 0.1521, "lr": 1.7129615576959804e-06, "epoch": 1.6131238853736594, "percentage": 32.26, "elapsed_time": "1:25:11", "remaining_time": "2:58:52", "throughput": 8706.34, "total_tokens": 44503008} +{"current_steps": 66035, "total_steps": 204665, "loss": 0.1051, "lr": 1.7129017581847052e-06, "epoch": 1.6132460362055066, "percentage": 32.26, "elapsed_time": "1:25:11", "remaining_time": "2:58:51", "throughput": 8706.38, "total_tokens": 44506208} +{"current_steps": 66040, "total_steps": 204665, "loss": 0.0537, "lr": 1.7128419534890162e-06, "epoch": 1.6133681870373537, "percentage": 32.27, "elapsed_time": "1:25:12", "remaining_time": "2:58:51", "throughput": 8706.39, "total_tokens": 44509216} +{"current_steps": 66045, "total_steps": 204665, "loss": 0.0643, "lr": 1.712782143609349e-06, "epoch": 1.613490337869201, "percentage": 32.27, "elapsed_time": "1:25:12", "remaining_time": "2:58:50", "throughput": 8706.48, "total_tokens": 44512800} +{"current_steps": 66050, "total_steps": 204665, "loss": 0.1507, "lr": 1.7127223285461385e-06, "epoch": 1.613612488701048, "percentage": 32.27, "elapsed_time": "1:25:12", "remaining_time": "2:58:50", "throughput": 8706.62, "total_tokens": 44516640} +{"current_steps": 66055, "total_steps": 204665, "loss": 0.0514, "lr": 1.7126625082998195e-06, "epoch": 1.613734639532895, "percentage": 32.27, "elapsed_time": "1:25:13", "remaining_time": "2:58:49", "throughput": 8706.71, "total_tokens": 44520160} +{"current_steps": 66060, "total_steps": 204665, "loss": 0.1484, "lr": 1.7126026828708266e-06, "epoch": 1.6138567903647423, "percentage": 32.28, "elapsed_time": "1:25:13", "remaining_time": "2:58:49", "throughput": 8706.75, "total_tokens": 44523360} +{"current_steps": 66065, "total_steps": 204665, "loss": 0.0504, "lr": 1.7125428522595956e-06, "epoch": 1.6139789411965895, "percentage": 32.28, "elapsed_time": "1:25:14", "remaining_time": "2:58:48", "throughput": 8706.9, "total_tokens": 44527264} +{"current_steps": 66070, "total_steps": 204665, "loss": 0.0785, "lr": 1.7124830164665616e-06, "epoch": 1.6141010920284367, "percentage": 32.28, "elapsed_time": "1:25:14", "remaining_time": "2:58:48", "throughput": 8706.95, "total_tokens": 44530528} +{"current_steps": 66075, "total_steps": 204665, "loss": 0.1119, "lr": 1.7124231754921592e-06, "epoch": 1.6142232428602838, "percentage": 32.28, "elapsed_time": "1:25:14", "remaining_time": "2:58:47", "throughput": 8706.93, "total_tokens": 44533344} +{"current_steps": 66080, "total_steps": 204665, "loss": 0.0351, "lr": 1.7123633293368239e-06, "epoch": 1.614345393692131, "percentage": 32.29, "elapsed_time": "1:25:15", "remaining_time": "2:58:47", "throughput": 8706.95, "total_tokens": 44536416} +{"current_steps": 66085, "total_steps": 204665, "loss": 0.0275, "lr": 1.7123034780009906e-06, "epoch": 1.6144675445239782, "percentage": 32.29, "elapsed_time": "1:25:15", "remaining_time": "2:58:46", "throughput": 8706.99, "total_tokens": 44539552} +{"current_steps": 66090, "total_steps": 204665, "loss": 0.0778, "lr": 1.7122436214850952e-06, "epoch": 1.6145896953558254, "percentage": 32.29, "elapsed_time": "1:25:15", "remaining_time": "2:58:46", "throughput": 8707.05, "total_tokens": 44542880} +{"current_steps": 66095, "total_steps": 204665, "loss": 0.0436, "lr": 1.7121837597895725e-06, "epoch": 1.6147118461876726, "percentage": 32.29, "elapsed_time": "1:25:16", "remaining_time": "2:58:45", "throughput": 8707.04, "total_tokens": 44545760} +{"current_steps": 66100, "total_steps": 204665, "loss": 0.1088, "lr": 1.712123892914858e-06, "epoch": 1.6148339970195198, "percentage": 32.3, "elapsed_time": "1:25:16", "remaining_time": "2:58:45", "throughput": 8707.06, "total_tokens": 44548832} +{"current_steps": 66105, "total_steps": 204665, "loss": 0.0025, "lr": 1.712064020861387e-06, "epoch": 1.614956147851367, "percentage": 32.3, "elapsed_time": "1:25:16", "remaining_time": "2:58:45", "throughput": 8707.12, "total_tokens": 44552224} +{"current_steps": 66110, "total_steps": 204665, "loss": 0.1381, "lr": 1.7120041436295947e-06, "epoch": 1.6150782986832142, "percentage": 32.3, "elapsed_time": "1:25:17", "remaining_time": "2:58:44", "throughput": 8707.16, "total_tokens": 44555424} +{"current_steps": 66115, "total_steps": 204665, "loss": 0.066, "lr": 1.7119442612199169e-06, "epoch": 1.6152004495150611, "percentage": 32.3, "elapsed_time": "1:25:17", "remaining_time": "2:58:44", "throughput": 8707.18, "total_tokens": 44558496} +{"current_steps": 66120, "total_steps": 204665, "loss": 0.1894, "lr": 1.7118843736327891e-06, "epoch": 1.6153226003469083, "percentage": 32.31, "elapsed_time": "1:25:17", "remaining_time": "2:58:43", "throughput": 8707.26, "total_tokens": 44561888} +{"current_steps": 66125, "total_steps": 204665, "loss": 0.2398, "lr": 1.7118244808686464e-06, "epoch": 1.6154447511787555, "percentage": 32.31, "elapsed_time": "1:25:18", "remaining_time": "2:58:43", "throughput": 8707.32, "total_tokens": 44565216} +{"current_steps": 66130, "total_steps": 204665, "loss": 0.0939, "lr": 1.7117645829279245e-06, "epoch": 1.6155669020106027, "percentage": 32.31, "elapsed_time": "1:25:18", "remaining_time": "2:58:42", "throughput": 8707.36, "total_tokens": 44568416} +{"current_steps": 66135, "total_steps": 204665, "loss": 0.0843, "lr": 1.7117046798110594e-06, "epoch": 1.61568905284245, "percentage": 32.31, "elapsed_time": "1:25:18", "remaining_time": "2:58:42", "throughput": 8707.36, "total_tokens": 44571360} +{"current_steps": 66140, "total_steps": 204665, "loss": 0.0584, "lr": 1.7116447715184866e-06, "epoch": 1.6158112036742969, "percentage": 32.32, "elapsed_time": "1:25:19", "remaining_time": "2:58:41", "throughput": 8707.47, "total_tokens": 44575008} +{"current_steps": 66145, "total_steps": 204665, "loss": 0.0668, "lr": 1.7115848580506413e-06, "epoch": 1.615933354506144, "percentage": 32.32, "elapsed_time": "1:25:19", "remaining_time": "2:58:41", "throughput": 8707.61, "total_tokens": 44578848} +{"current_steps": 66150, "total_steps": 204665, "loss": 0.0433, "lr": 1.7115249394079596e-06, "epoch": 1.6160555053379912, "percentage": 32.32, "elapsed_time": "1:25:19", "remaining_time": "2:58:40", "throughput": 8707.78, "total_tokens": 44582880} +{"current_steps": 66155, "total_steps": 204665, "loss": 0.0462, "lr": 1.7114650155908771e-06, "epoch": 1.6161776561698384, "percentage": 32.32, "elapsed_time": "1:25:20", "remaining_time": "2:58:40", "throughput": 8707.84, "total_tokens": 44586144} +{"current_steps": 66160, "total_steps": 204665, "loss": 0.0029, "lr": 1.71140508659983e-06, "epoch": 1.6162998070016856, "percentage": 32.33, "elapsed_time": "1:25:20", "remaining_time": "2:58:39", "throughput": 8707.95, "total_tokens": 44589792} +{"current_steps": 66165, "total_steps": 204665, "loss": 0.0599, "lr": 1.7113451524352533e-06, "epoch": 1.6164219578335328, "percentage": 32.33, "elapsed_time": "1:25:20", "remaining_time": "2:58:39", "throughput": 8708.0, "total_tokens": 44593056} +{"current_steps": 66170, "total_steps": 204665, "loss": 0.0373, "lr": 1.7112852130975838e-06, "epoch": 1.61654410866538, "percentage": 32.33, "elapsed_time": "1:25:21", "remaining_time": "2:58:38", "throughput": 8708.05, "total_tokens": 44596320} +{"current_steps": 66175, "total_steps": 204665, "loss": 0.1563, "lr": 1.7112252685872566e-06, "epoch": 1.6166662594972272, "percentage": 32.33, "elapsed_time": "1:25:21", "remaining_time": "2:58:38", "throughput": 8708.09, "total_tokens": 44599520} +{"current_steps": 66180, "total_steps": 204665, "loss": 0.0564, "lr": 1.7111653189047076e-06, "epoch": 1.6167884103290744, "percentage": 32.34, "elapsed_time": "1:25:21", "remaining_time": "2:58:37", "throughput": 8708.18, "total_tokens": 44603040} +{"current_steps": 66185, "total_steps": 204665, "loss": 0.0227, "lr": 1.7111053640503737e-06, "epoch": 1.6169105611609216, "percentage": 32.34, "elapsed_time": "1:25:22", "remaining_time": "2:58:37", "throughput": 8708.32, "total_tokens": 44606880} +{"current_steps": 66190, "total_steps": 204665, "loss": 0.0364, "lr": 1.71104540402469e-06, "epoch": 1.6170327119927688, "percentage": 32.34, "elapsed_time": "1:25:22", "remaining_time": "2:58:37", "throughput": 8708.43, "total_tokens": 44610464} +{"current_steps": 66195, "total_steps": 204665, "loss": 0.1687, "lr": 1.7109854388280932e-06, "epoch": 1.617154862824616, "percentage": 32.34, "elapsed_time": "1:25:23", "remaining_time": "2:58:36", "throughput": 8708.48, "total_tokens": 44613728} +{"current_steps": 66200, "total_steps": 204665, "loss": 0.2382, "lr": 1.710925468461019e-06, "epoch": 1.6172770136564631, "percentage": 32.35, "elapsed_time": "1:25:23", "remaining_time": "2:58:36", "throughput": 8708.64, "total_tokens": 44617696} +{"current_steps": 66205, "total_steps": 204665, "loss": 0.1017, "lr": 1.7108654929239033e-06, "epoch": 1.6173991644883101, "percentage": 32.35, "elapsed_time": "1:25:23", "remaining_time": "2:58:35", "throughput": 8708.7, "total_tokens": 44621024} +{"current_steps": 66210, "total_steps": 204665, "loss": 0.1305, "lr": 1.7108055122171825e-06, "epoch": 1.6175213153201573, "percentage": 32.35, "elapsed_time": "1:25:24", "remaining_time": "2:58:35", "throughput": 8708.7, "total_tokens": 44623968} +{"current_steps": 66215, "total_steps": 204665, "loss": 0.073, "lr": 1.710745526341293e-06, "epoch": 1.6176434661520045, "percentage": 32.35, "elapsed_time": "1:25:24", "remaining_time": "2:58:34", "throughput": 8708.75, "total_tokens": 44627232} +{"current_steps": 66220, "total_steps": 204665, "loss": 0.0392, "lr": 1.710685535296671e-06, "epoch": 1.6177656169838517, "percentage": 32.36, "elapsed_time": "1:25:24", "remaining_time": "2:58:34", "throughput": 8708.79, "total_tokens": 44630432} +{"current_steps": 66225, "total_steps": 204665, "loss": 0.0633, "lr": 1.7106255390837525e-06, "epoch": 1.6178877678156989, "percentage": 32.36, "elapsed_time": "1:25:25", "remaining_time": "2:58:33", "throughput": 8708.9, "total_tokens": 44634080} +{"current_steps": 66230, "total_steps": 204665, "loss": 0.1125, "lr": 1.710565537702974e-06, "epoch": 1.6180099186475458, "percentage": 32.36, "elapsed_time": "1:25:25", "remaining_time": "2:58:33", "throughput": 8708.96, "total_tokens": 44637472} +{"current_steps": 66235, "total_steps": 204665, "loss": 0.0341, "lr": 1.7105055311547716e-06, "epoch": 1.618132069479393, "percentage": 32.36, "elapsed_time": "1:25:25", "remaining_time": "2:58:32", "throughput": 8709.06, "total_tokens": 44640992} +{"current_steps": 66240, "total_steps": 204665, "loss": 0.1151, "lr": 1.7104455194395822e-06, "epoch": 1.6182542203112402, "percentage": 32.37, "elapsed_time": "1:25:26", "remaining_time": "2:58:32", "throughput": 8709.07, "total_tokens": 44644000} +{"current_steps": 66245, "total_steps": 204665, "loss": 0.1159, "lr": 1.7103855025578416e-06, "epoch": 1.6183763711430874, "percentage": 32.37, "elapsed_time": "1:25:26", "remaining_time": "2:58:31", "throughput": 8709.26, "total_tokens": 44648224} +{"current_steps": 66250, "total_steps": 204665, "loss": 0.1734, "lr": 1.7103254805099867e-06, "epoch": 1.6184985219749346, "percentage": 32.37, "elapsed_time": "1:25:26", "remaining_time": "2:58:31", "throughput": 8709.33, "total_tokens": 44651616} +{"current_steps": 66255, "total_steps": 204665, "loss": 0.0259, "lr": 1.7102654532964538e-06, "epoch": 1.6186206728067818, "percentage": 32.37, "elapsed_time": "1:25:27", "remaining_time": "2:58:31", "throughput": 8709.41, "total_tokens": 44655072} +{"current_steps": 66260, "total_steps": 204665, "loss": 0.0601, "lr": 1.7102054209176794e-06, "epoch": 1.618742823638629, "percentage": 32.37, "elapsed_time": "1:25:27", "remaining_time": "2:58:30", "throughput": 8709.51, "total_tokens": 44658656} +{"current_steps": 66265, "total_steps": 204665, "loss": 0.0548, "lr": 1.7101453833741005e-06, "epoch": 1.6188649744704762, "percentage": 32.38, "elapsed_time": "1:25:27", "remaining_time": "2:58:30", "throughput": 8709.61, "total_tokens": 44662304} +{"current_steps": 66270, "total_steps": 204665, "loss": 0.0321, "lr": 1.710085340666153e-06, "epoch": 1.6189871253023234, "percentage": 32.38, "elapsed_time": "1:25:28", "remaining_time": "2:58:29", "throughput": 8709.62, "total_tokens": 44665312} +{"current_steps": 66275, "total_steps": 204665, "loss": 0.0417, "lr": 1.710025292794274e-06, "epoch": 1.6191092761341705, "percentage": 32.38, "elapsed_time": "1:25:28", "remaining_time": "2:58:29", "throughput": 8709.74, "total_tokens": 44669024} +{"current_steps": 66280, "total_steps": 204665, "loss": 0.0017, "lr": 1.7099652397589002e-06, "epoch": 1.6192314269660177, "percentage": 32.38, "elapsed_time": "1:25:28", "remaining_time": "2:58:28", "throughput": 8709.77, "total_tokens": 44672160} +{"current_steps": 66285, "total_steps": 204665, "loss": 0.1311, "lr": 1.7099051815604681e-06, "epoch": 1.619353577797865, "percentage": 32.39, "elapsed_time": "1:25:29", "remaining_time": "2:58:28", "throughput": 8709.88, "total_tokens": 44675872} +{"current_steps": 66290, "total_steps": 204665, "loss": 0.1039, "lr": 1.7098451181994147e-06, "epoch": 1.6194757286297121, "percentage": 32.39, "elapsed_time": "1:25:29", "remaining_time": "2:58:27", "throughput": 8710.04, "total_tokens": 44679840} +{"current_steps": 66295, "total_steps": 204665, "loss": 0.2029, "lr": 1.7097850496761764e-06, "epoch": 1.619597879461559, "percentage": 32.39, "elapsed_time": "1:25:30", "remaining_time": "2:58:27", "throughput": 8710.11, "total_tokens": 44683232} +{"current_steps": 66300, "total_steps": 204665, "loss": 0.2378, "lr": 1.709724975991191e-06, "epoch": 1.6197200302934063, "percentage": 32.39, "elapsed_time": "1:25:30", "remaining_time": "2:58:26", "throughput": 8710.12, "total_tokens": 44686240} +{"current_steps": 66305, "total_steps": 204665, "loss": 0.0358, "lr": 1.7096648971448938e-06, "epoch": 1.6198421811252535, "percentage": 32.4, "elapsed_time": "1:25:30", "remaining_time": "2:58:26", "throughput": 8710.29, "total_tokens": 44690272} +{"current_steps": 66310, "total_steps": 204665, "loss": 0.0322, "lr": 1.709604813137723e-06, "epoch": 1.6199643319571007, "percentage": 32.4, "elapsed_time": "1:25:31", "remaining_time": "2:58:25", "throughput": 8710.31, "total_tokens": 44693344} +{"current_steps": 66315, "total_steps": 204665, "loss": 0.0937, "lr": 1.7095447239701153e-06, "epoch": 1.6200864827889478, "percentage": 32.4, "elapsed_time": "1:25:31", "remaining_time": "2:58:25", "throughput": 8710.41, "total_tokens": 44696928} +{"current_steps": 66320, "total_steps": 204665, "loss": 0.0822, "lr": 1.7094846296425072e-06, "epoch": 1.6202086336207948, "percentage": 32.4, "elapsed_time": "1:25:31", "remaining_time": "2:58:25", "throughput": 8710.51, "total_tokens": 44700512} +{"current_steps": 66325, "total_steps": 204665, "loss": 0.045, "lr": 1.7094245301553362e-06, "epoch": 1.620330784452642, "percentage": 32.41, "elapsed_time": "1:25:32", "remaining_time": "2:58:24", "throughput": 8710.5, "total_tokens": 44703392} +{"current_steps": 66330, "total_steps": 204665, "loss": 0.0862, "lr": 1.7093644255090394e-06, "epoch": 1.6204529352844892, "percentage": 32.41, "elapsed_time": "1:25:32", "remaining_time": "2:58:24", "throughput": 8710.56, "total_tokens": 44706720} +{"current_steps": 66335, "total_steps": 204665, "loss": 0.0469, "lr": 1.7093043157040533e-06, "epoch": 1.6205750861163364, "percentage": 32.41, "elapsed_time": "1:25:32", "remaining_time": "2:58:23", "throughput": 8710.58, "total_tokens": 44709856} +{"current_steps": 66340, "total_steps": 204665, "loss": 0.0538, "lr": 1.709244200740816e-06, "epoch": 1.6206972369481836, "percentage": 32.41, "elapsed_time": "1:25:33", "remaining_time": "2:58:23", "throughput": 8710.64, "total_tokens": 44713184} +{"current_steps": 66345, "total_steps": 204665, "loss": 0.0821, "lr": 1.7091840806197636e-06, "epoch": 1.6208193877800308, "percentage": 32.42, "elapsed_time": "1:25:33", "remaining_time": "2:58:22", "throughput": 8710.66, "total_tokens": 44716256} +{"current_steps": 66350, "total_steps": 204665, "loss": 0.0016, "lr": 1.709123955341334e-06, "epoch": 1.620941538611878, "percentage": 32.42, "elapsed_time": "1:25:33", "remaining_time": "2:58:22", "throughput": 8710.77, "total_tokens": 44719904} +{"current_steps": 66355, "total_steps": 204665, "loss": 0.0014, "lr": 1.7090638249059641e-06, "epoch": 1.6210636894437251, "percentage": 32.42, "elapsed_time": "1:25:34", "remaining_time": "2:58:21", "throughput": 8710.83, "total_tokens": 44723232} +{"current_steps": 66360, "total_steps": 204665, "loss": 0.0441, "lr": 1.7090036893140915e-06, "epoch": 1.6211858402755723, "percentage": 32.42, "elapsed_time": "1:25:34", "remaining_time": "2:58:21", "throughput": 8710.97, "total_tokens": 44727072} +{"current_steps": 66365, "total_steps": 204665, "loss": 0.0897, "lr": 1.7089435485661535e-06, "epoch": 1.6213079911074195, "percentage": 32.43, "elapsed_time": "1:25:34", "remaining_time": "2:58:20", "throughput": 8711.07, "total_tokens": 44730720} +{"current_steps": 66370, "total_steps": 204665, "loss": 0.0022, "lr": 1.7088834026625869e-06, "epoch": 1.6214301419392667, "percentage": 32.43, "elapsed_time": "1:25:35", "remaining_time": "2:58:20", "throughput": 8711.11, "total_tokens": 44733920} +{"current_steps": 66375, "total_steps": 204665, "loss": 0.1949, "lr": 1.70882325160383e-06, "epoch": 1.621552292771114, "percentage": 32.43, "elapsed_time": "1:25:35", "remaining_time": "2:58:19", "throughput": 8711.18, "total_tokens": 44737312} +{"current_steps": 66380, "total_steps": 204665, "loss": 0.1604, "lr": 1.7087630953903197e-06, "epoch": 1.621674443602961, "percentage": 32.43, "elapsed_time": "1:25:35", "remaining_time": "2:58:19", "throughput": 8711.25, "total_tokens": 44740704} +{"current_steps": 66385, "total_steps": 204665, "loss": 0.2189, "lr": 1.7087029340224933e-06, "epoch": 1.621796594434808, "percentage": 32.44, "elapsed_time": "1:25:36", "remaining_time": "2:58:18", "throughput": 8711.35, "total_tokens": 44744288} +{"current_steps": 66390, "total_steps": 204665, "loss": 0.0373, "lr": 1.7086427675007886e-06, "epoch": 1.6219187452666552, "percentage": 32.44, "elapsed_time": "1:25:36", "remaining_time": "2:58:18", "throughput": 8711.45, "total_tokens": 44747808} +{"current_steps": 66395, "total_steps": 204665, "loss": 0.04, "lr": 1.7085825958256431e-06, "epoch": 1.6220408960985024, "percentage": 32.44, "elapsed_time": "1:25:37", "remaining_time": "2:58:18", "throughput": 8711.5, "total_tokens": 44751136} +{"current_steps": 66400, "total_steps": 204665, "loss": 0.0008, "lr": 1.7085224189974944e-06, "epoch": 1.6221630469303496, "percentage": 32.44, "elapsed_time": "1:25:37", "remaining_time": "2:58:17", "throughput": 8711.56, "total_tokens": 44754464} +{"current_steps": 66405, "total_steps": 204665, "loss": 0.0907, "lr": 1.7084622370167803e-06, "epoch": 1.6222851977621966, "percentage": 32.45, "elapsed_time": "1:25:37", "remaining_time": "2:58:17", "throughput": 8711.64, "total_tokens": 44757920} +{"current_steps": 66410, "total_steps": 204665, "loss": 0.1325, "lr": 1.708402049883938e-06, "epoch": 1.6224073485940438, "percentage": 32.45, "elapsed_time": "1:25:38", "remaining_time": "2:58:16", "throughput": 8711.74, "total_tokens": 44761504} +{"current_steps": 66415, "total_steps": 204665, "loss": 0.0319, "lr": 1.7083418575994055e-06, "epoch": 1.622529499425891, "percentage": 32.45, "elapsed_time": "1:25:38", "remaining_time": "2:58:16", "throughput": 8711.82, "total_tokens": 44764960} +{"current_steps": 66420, "total_steps": 204665, "loss": 0.0828, "lr": 1.7082816601636205e-06, "epoch": 1.6226516502577382, "percentage": 32.45, "elapsed_time": "1:25:38", "remaining_time": "2:58:15", "throughput": 8711.86, "total_tokens": 44768224} +{"current_steps": 66425, "total_steps": 204665, "loss": 0.0197, "lr": 1.7082214575770209e-06, "epoch": 1.6227738010895854, "percentage": 32.46, "elapsed_time": "1:25:39", "remaining_time": "2:58:15", "throughput": 8711.92, "total_tokens": 44771488} +{"current_steps": 66430, "total_steps": 204665, "loss": 0.0376, "lr": 1.7081612498400442e-06, "epoch": 1.6228959519214325, "percentage": 32.46, "elapsed_time": "1:25:39", "remaining_time": "2:58:14", "throughput": 8711.92, "total_tokens": 44774432} +{"current_steps": 66435, "total_steps": 204665, "loss": 0.1097, "lr": 1.7081010369531286e-06, "epoch": 1.6230181027532797, "percentage": 32.46, "elapsed_time": "1:25:39", "remaining_time": "2:58:14", "throughput": 8712.0, "total_tokens": 44777824} +{"current_steps": 66440, "total_steps": 204665, "loss": 0.0008, "lr": 1.7080408189167116e-06, "epoch": 1.623140253585127, "percentage": 32.46, "elapsed_time": "1:25:40", "remaining_time": "2:58:13", "throughput": 8712.06, "total_tokens": 44781152} +{"current_steps": 66445, "total_steps": 204665, "loss": 0.0708, "lr": 1.7079805957312315e-06, "epoch": 1.6232624044169741, "percentage": 32.47, "elapsed_time": "1:25:40", "remaining_time": "2:58:13", "throughput": 8712.07, "total_tokens": 44784224} +{"current_steps": 66450, "total_steps": 204665, "loss": 0.0751, "lr": 1.707920367397126e-06, "epoch": 1.6233845552488213, "percentage": 32.47, "elapsed_time": "1:25:40", "remaining_time": "2:58:12", "throughput": 8712.17, "total_tokens": 44787808} +{"current_steps": 66455, "total_steps": 204665, "loss": 0.0875, "lr": 1.7078601339148332e-06, "epoch": 1.6235067060806685, "percentage": 32.47, "elapsed_time": "1:25:41", "remaining_time": "2:58:12", "throughput": 8712.22, "total_tokens": 44791072} +{"current_steps": 66460, "total_steps": 204665, "loss": 0.0368, "lr": 1.7077998952847912e-06, "epoch": 1.6236288569125157, "percentage": 32.47, "elapsed_time": "1:25:41", "remaining_time": "2:58:11", "throughput": 8712.23, "total_tokens": 44794080} +{"current_steps": 66465, "total_steps": 204665, "loss": 0.124, "lr": 1.7077396515074379e-06, "epoch": 1.6237510077443629, "percentage": 32.48, "elapsed_time": "1:25:41", "remaining_time": "2:58:11", "throughput": 8712.22, "total_tokens": 44796960} +{"current_steps": 66470, "total_steps": 204665, "loss": 0.121, "lr": 1.7076794025832112e-06, "epoch": 1.62387315857621, "percentage": 32.48, "elapsed_time": "1:25:42", "remaining_time": "2:58:10", "throughput": 8712.26, "total_tokens": 44800160} +{"current_steps": 66475, "total_steps": 204665, "loss": 0.1473, "lr": 1.70761914851255e-06, "epoch": 1.623995309408057, "percentage": 32.48, "elapsed_time": "1:25:42", "remaining_time": "2:58:10", "throughput": 8712.32, "total_tokens": 44803488} +{"current_steps": 66480, "total_steps": 204665, "loss": 0.0533, "lr": 1.7075588892958917e-06, "epoch": 1.6241174602399042, "percentage": 32.48, "elapsed_time": "1:25:42", "remaining_time": "2:58:09", "throughput": 8712.41, "total_tokens": 44807008} +{"current_steps": 66485, "total_steps": 204665, "loss": 0.0263, "lr": 1.7074986249336751e-06, "epoch": 1.6242396110717514, "percentage": 32.48, "elapsed_time": "1:25:43", "remaining_time": "2:58:09", "throughput": 8712.47, "total_tokens": 44810336} +{"current_steps": 66490, "total_steps": 204665, "loss": 0.1167, "lr": 1.707438355426338e-06, "epoch": 1.6243617619035986, "percentage": 32.49, "elapsed_time": "1:25:43", "remaining_time": "2:58:09", "throughput": 8712.45, "total_tokens": 44813152} +{"current_steps": 66495, "total_steps": 204665, "loss": 0.115, "lr": 1.707378080774319e-06, "epoch": 1.6244839127354456, "percentage": 32.49, "elapsed_time": "1:25:43", "remaining_time": "2:58:08", "throughput": 8712.59, "total_tokens": 44816992} +{"current_steps": 66500, "total_steps": 204665, "loss": 0.156, "lr": 1.7073178009780564e-06, "epoch": 1.6246060635672928, "percentage": 32.49, "elapsed_time": "1:25:44", "remaining_time": "2:58:08", "throughput": 8712.61, "total_tokens": 44820064} +{"current_steps": 66505, "total_steps": 204665, "loss": 0.0938, "lr": 1.7072575160379886e-06, "epoch": 1.62472821439914, "percentage": 32.49, "elapsed_time": "1:25:44", "remaining_time": "2:58:07", "throughput": 8712.73, "total_tokens": 44823776} +{"current_steps": 66510, "total_steps": 204665, "loss": 0.0759, "lr": 1.7071972259545535e-06, "epoch": 1.6248503652309871, "percentage": 32.5, "elapsed_time": "1:25:44", "remaining_time": "2:58:07", "throughput": 8712.82, "total_tokens": 44827296} +{"current_steps": 66515, "total_steps": 204665, "loss": 0.0549, "lr": 1.7071369307281903e-06, "epoch": 1.6249725160628343, "percentage": 32.5, "elapsed_time": "1:25:45", "remaining_time": "2:58:06", "throughput": 8712.83, "total_tokens": 44830304} +{"current_steps": 66520, "total_steps": 204665, "loss": 0.0036, "lr": 1.7070766303593369e-06, "epoch": 1.6250946668946815, "percentage": 32.5, "elapsed_time": "1:25:45", "remaining_time": "2:58:06", "throughput": 8712.98, "total_tokens": 44834144} +{"current_steps": 66525, "total_steps": 204665, "loss": 0.296, "lr": 1.7070163248484323e-06, "epoch": 1.6252168177265287, "percentage": 32.5, "elapsed_time": "1:25:46", "remaining_time": "2:58:05", "throughput": 8713.02, "total_tokens": 44837344} +{"current_steps": 66530, "total_steps": 204665, "loss": 0.0014, "lr": 1.706956014195915e-06, "epoch": 1.625338968558376, "percentage": 32.51, "elapsed_time": "1:25:46", "remaining_time": "2:58:05", "throughput": 8713.1, "total_tokens": 44840800} +{"current_steps": 66535, "total_steps": 204665, "loss": 0.1023, "lr": 1.7068956984022229e-06, "epoch": 1.625461119390223, "percentage": 32.51, "elapsed_time": "1:25:46", "remaining_time": "2:58:04", "throughput": 8713.08, "total_tokens": 44843616} +{"current_steps": 66540, "total_steps": 204665, "loss": 0.0053, "lr": 1.7068353774677956e-06, "epoch": 1.6255832702220703, "percentage": 32.51, "elapsed_time": "1:25:47", "remaining_time": "2:58:04", "throughput": 8713.08, "total_tokens": 44846560} +{"current_steps": 66545, "total_steps": 204665, "loss": 0.1193, "lr": 1.706775051393071e-06, "epoch": 1.6257054210539175, "percentage": 32.51, "elapsed_time": "1:25:47", "remaining_time": "2:58:03", "throughput": 8713.15, "total_tokens": 44850016} +{"current_steps": 66550, "total_steps": 204665, "loss": 0.0211, "lr": 1.7067147201784882e-06, "epoch": 1.6258275718857647, "percentage": 32.52, "elapsed_time": "1:25:47", "remaining_time": "2:58:03", "throughput": 8713.24, "total_tokens": 44853536} +{"current_steps": 66555, "total_steps": 204665, "loss": 0.0221, "lr": 1.7066543838244857e-06, "epoch": 1.6259497227176118, "percentage": 32.52, "elapsed_time": "1:25:48", "remaining_time": "2:58:02", "throughput": 8713.31, "total_tokens": 44856928} +{"current_steps": 66560, "total_steps": 204665, "loss": 0.0409, "lr": 1.7065940423315032e-06, "epoch": 1.626071873549459, "percentage": 32.52, "elapsed_time": "1:25:48", "remaining_time": "2:58:02", "throughput": 8713.39, "total_tokens": 44860384} +{"current_steps": 66565, "total_steps": 204665, "loss": 0.0624, "lr": 1.706533695699978e-06, "epoch": 1.626194024381306, "percentage": 32.52, "elapsed_time": "1:25:48", "remaining_time": "2:58:01", "throughput": 8713.39, "total_tokens": 44863328} +{"current_steps": 66570, "total_steps": 204665, "loss": 0.0517, "lr": 1.7064733439303497e-06, "epoch": 1.6263161752131532, "percentage": 32.53, "elapsed_time": "1:25:49", "remaining_time": "2:58:01", "throughput": 8713.45, "total_tokens": 44866656} +{"current_steps": 66575, "total_steps": 204665, "loss": 0.0994, "lr": 1.7064129870230576e-06, "epoch": 1.6264383260450004, "percentage": 32.53, "elapsed_time": "1:25:49", "remaining_time": "2:58:01", "throughput": 8713.57, "total_tokens": 44870368} +{"current_steps": 66580, "total_steps": 204665, "loss": 0.0459, "lr": 1.7063526249785403e-06, "epoch": 1.6265604768768476, "percentage": 32.53, "elapsed_time": "1:25:49", "remaining_time": "2:58:00", "throughput": 8713.58, "total_tokens": 44873376} +{"current_steps": 66585, "total_steps": 204665, "loss": 0.0376, "lr": 1.7062922577972366e-06, "epoch": 1.6266826277086945, "percentage": 32.53, "elapsed_time": "1:25:50", "remaining_time": "2:58:00", "throughput": 8713.63, "total_tokens": 44876704} +{"current_steps": 66590, "total_steps": 204665, "loss": 0.3196, "lr": 1.7062318854795854e-06, "epoch": 1.6268047785405417, "percentage": 32.54, "elapsed_time": "1:25:50", "remaining_time": "2:57:59", "throughput": 8713.71, "total_tokens": 44880096} +{"current_steps": 66595, "total_steps": 204665, "loss": 0.0331, "lr": 1.7061715080260264e-06, "epoch": 1.626926929372389, "percentage": 32.54, "elapsed_time": "1:25:50", "remaining_time": "2:57:59", "throughput": 8713.79, "total_tokens": 44883552} +{"current_steps": 66600, "total_steps": 204665, "loss": 0.099, "lr": 1.706111125436998e-06, "epoch": 1.627049080204236, "percentage": 32.54, "elapsed_time": "1:25:51", "remaining_time": "2:57:58", "throughput": 8713.84, "total_tokens": 44886816} +{"current_steps": 66605, "total_steps": 204665, "loss": 0.0374, "lr": 1.7060507377129396e-06, "epoch": 1.6271712310360833, "percentage": 32.54, "elapsed_time": "1:25:51", "remaining_time": "2:57:58", "throughput": 8713.95, "total_tokens": 44890464} +{"current_steps": 66610, "total_steps": 204665, "loss": 0.0543, "lr": 1.7059903448542903e-06, "epoch": 1.6272933818679305, "percentage": 32.55, "elapsed_time": "1:25:51", "remaining_time": "2:57:57", "throughput": 8713.99, "total_tokens": 44893728} +{"current_steps": 66615, "total_steps": 204665, "loss": 0.0874, "lr": 1.7059299468614893e-06, "epoch": 1.6274155326997777, "percentage": 32.55, "elapsed_time": "1:25:52", "remaining_time": "2:57:57", "throughput": 8714.12, "total_tokens": 44897440} +{"current_steps": 66620, "total_steps": 204665, "loss": 0.049, "lr": 1.705869543734976e-06, "epoch": 1.6275376835316249, "percentage": 32.55, "elapsed_time": "1:25:52", "remaining_time": "2:57:56", "throughput": 8714.26, "total_tokens": 44901344} +{"current_steps": 66625, "total_steps": 204665, "loss": 0.0819, "lr": 1.7058091354751895e-06, "epoch": 1.627659834363472, "percentage": 32.55, "elapsed_time": "1:25:52", "remaining_time": "2:57:56", "throughput": 8714.32, "total_tokens": 44904608} +{"current_steps": 66630, "total_steps": 204665, "loss": 0.0016, "lr": 1.705748722082569e-06, "epoch": 1.6277819851953192, "percentage": 32.56, "elapsed_time": "1:25:53", "remaining_time": "2:57:55", "throughput": 8714.37, "total_tokens": 44907936} +{"current_steps": 66635, "total_steps": 204665, "loss": 0.0496, "lr": 1.7056883035575542e-06, "epoch": 1.6279041360271664, "percentage": 32.56, "elapsed_time": "1:25:53", "remaining_time": "2:57:55", "throughput": 8714.44, "total_tokens": 44911264} +{"current_steps": 66640, "total_steps": 204665, "loss": 0.0025, "lr": 1.7056278799005841e-06, "epoch": 1.6280262868590136, "percentage": 32.56, "elapsed_time": "1:25:54", "remaining_time": "2:57:55", "throughput": 8714.58, "total_tokens": 44915104} +{"current_steps": 66645, "total_steps": 204665, "loss": 0.1403, "lr": 1.705567451112098e-06, "epoch": 1.6281484376908608, "percentage": 32.56, "elapsed_time": "1:25:54", "remaining_time": "2:57:54", "throughput": 8714.67, "total_tokens": 44918624} +{"current_steps": 66650, "total_steps": 204665, "loss": 0.0961, "lr": 1.705507017192536e-06, "epoch": 1.6282705885227078, "percentage": 32.57, "elapsed_time": "1:25:54", "remaining_time": "2:57:54", "throughput": 8714.79, "total_tokens": 44922336} +{"current_steps": 66655, "total_steps": 204665, "loss": 0.0438, "lr": 1.7054465781423373e-06, "epoch": 1.628392739354555, "percentage": 32.57, "elapsed_time": "1:25:55", "remaining_time": "2:57:53", "throughput": 8714.82, "total_tokens": 44925536} +{"current_steps": 66660, "total_steps": 204665, "loss": 0.0817, "lr": 1.7053861339619408e-06, "epoch": 1.6285148901864022, "percentage": 32.57, "elapsed_time": "1:25:55", "remaining_time": "2:57:53", "throughput": 8714.88, "total_tokens": 44928864} +{"current_steps": 66665, "total_steps": 204665, "loss": 0.0014, "lr": 1.7053256846517874e-06, "epoch": 1.6286370410182494, "percentage": 32.57, "elapsed_time": "1:25:55", "remaining_time": "2:57:52", "throughput": 8714.91, "total_tokens": 44932000} +{"current_steps": 66670, "total_steps": 204665, "loss": 0.1552, "lr": 1.7052652302123152e-06, "epoch": 1.6287591918500965, "percentage": 32.58, "elapsed_time": "1:25:56", "remaining_time": "2:57:52", "throughput": 8714.98, "total_tokens": 44935456} +{"current_steps": 66675, "total_steps": 204665, "loss": 0.0643, "lr": 1.7052047706439648e-06, "epoch": 1.6288813426819435, "percentage": 32.58, "elapsed_time": "1:25:56", "remaining_time": "2:57:51", "throughput": 8715.03, "total_tokens": 44938720} +{"current_steps": 66680, "total_steps": 204665, "loss": 0.1296, "lr": 1.7051443059471758e-06, "epoch": 1.6290034935137907, "percentage": 32.58, "elapsed_time": "1:25:56", "remaining_time": "2:57:51", "throughput": 8715.05, "total_tokens": 44941792} +{"current_steps": 66685, "total_steps": 204665, "loss": 0.1599, "lr": 1.7050838361223874e-06, "epoch": 1.629125644345638, "percentage": 32.58, "elapsed_time": "1:25:57", "remaining_time": "2:57:50", "throughput": 8715.13, "total_tokens": 44945248} +{"current_steps": 66690, "total_steps": 204665, "loss": 0.1362, "lr": 1.7050233611700399e-06, "epoch": 1.629247795177485, "percentage": 32.58, "elapsed_time": "1:25:57", "remaining_time": "2:57:50", "throughput": 8715.14, "total_tokens": 44948256} +{"current_steps": 66695, "total_steps": 204665, "loss": 0.0916, "lr": 1.704962881090573e-06, "epoch": 1.6293699460093323, "percentage": 32.59, "elapsed_time": "1:25:57", "remaining_time": "2:57:49", "throughput": 8715.17, "total_tokens": 44951392} +{"current_steps": 66700, "total_steps": 204665, "loss": 0.1602, "lr": 1.7049023958844261e-06, "epoch": 1.6294920968411795, "percentage": 32.59, "elapsed_time": "1:25:58", "remaining_time": "2:57:49", "throughput": 8715.21, "total_tokens": 44954592} +{"current_steps": 66705, "total_steps": 204665, "loss": 0.0779, "lr": 1.7048419055520396e-06, "epoch": 1.6296142476730267, "percentage": 32.59, "elapsed_time": "1:25:58", "remaining_time": "2:57:48", "throughput": 8715.26, "total_tokens": 44957856} +{"current_steps": 66710, "total_steps": 204665, "loss": 0.0228, "lr": 1.704781410093853e-06, "epoch": 1.6297363985048738, "percentage": 32.59, "elapsed_time": "1:25:58", "remaining_time": "2:57:48", "throughput": 8715.33, "total_tokens": 44961312} +{"current_steps": 66715, "total_steps": 204665, "loss": 0.099, "lr": 1.704720909510307e-06, "epoch": 1.629858549336721, "percentage": 32.6, "elapsed_time": "1:25:59", "remaining_time": "2:57:47", "throughput": 8715.37, "total_tokens": 44964512} +{"current_steps": 66720, "total_steps": 204665, "loss": 0.0452, "lr": 1.7046604038018404e-06, "epoch": 1.6299807001685682, "percentage": 32.6, "elapsed_time": "1:25:59", "remaining_time": "2:57:47", "throughput": 8715.46, "total_tokens": 44967968} +{"current_steps": 66725, "total_steps": 204665, "loss": 0.0016, "lr": 1.704599892968894e-06, "epoch": 1.6301028510004154, "percentage": 32.6, "elapsed_time": "1:25:59", "remaining_time": "2:57:47", "throughput": 8715.46, "total_tokens": 44970912} +{"current_steps": 66730, "total_steps": 204665, "loss": 0.1082, "lr": 1.7045393770119075e-06, "epoch": 1.6302250018322626, "percentage": 32.6, "elapsed_time": "1:26:00", "remaining_time": "2:57:46", "throughput": 8715.52, "total_tokens": 44974240} +{"current_steps": 66735, "total_steps": 204665, "loss": 0.1062, "lr": 1.7044788559313214e-06, "epoch": 1.6303471526641098, "percentage": 32.61, "elapsed_time": "1:26:00", "remaining_time": "2:57:46", "throughput": 8715.56, "total_tokens": 44977504} +{"current_steps": 66740, "total_steps": 204665, "loss": 0.1591, "lr": 1.7044183297275753e-06, "epoch": 1.6304693034959568, "percentage": 32.61, "elapsed_time": "1:26:00", "remaining_time": "2:57:45", "throughput": 8715.6, "total_tokens": 44980704} +{"current_steps": 66745, "total_steps": 204665, "loss": 0.1346, "lr": 1.7043577984011099e-06, "epoch": 1.630591454327804, "percentage": 32.61, "elapsed_time": "1:26:01", "remaining_time": "2:57:45", "throughput": 8715.64, "total_tokens": 44983904} +{"current_steps": 66750, "total_steps": 204665, "loss": 0.1032, "lr": 1.7042972619523651e-06, "epoch": 1.6307136051596511, "percentage": 32.61, "elapsed_time": "1:26:01", "remaining_time": "2:57:44", "throughput": 8715.71, "total_tokens": 44987296} +{"current_steps": 66755, "total_steps": 204665, "loss": 0.1449, "lr": 1.7042367203817812e-06, "epoch": 1.6308357559914983, "percentage": 32.62, "elapsed_time": "1:26:01", "remaining_time": "2:57:44", "throughput": 8715.78, "total_tokens": 44990688} +{"current_steps": 66760, "total_steps": 204665, "loss": 0.0012, "lr": 1.7041761736897984e-06, "epoch": 1.6309579068233455, "percentage": 32.62, "elapsed_time": "1:26:02", "remaining_time": "2:57:43", "throughput": 8715.85, "total_tokens": 44994080} +{"current_steps": 66765, "total_steps": 204665, "loss": 0.0847, "lr": 1.7041156218768571e-06, "epoch": 1.6310800576551925, "percentage": 32.62, "elapsed_time": "1:26:02", "remaining_time": "2:57:43", "throughput": 8715.88, "total_tokens": 44997216} +{"current_steps": 66770, "total_steps": 204665, "loss": 0.0459, "lr": 1.7040550649433975e-06, "epoch": 1.6312022084870397, "percentage": 32.62, "elapsed_time": "1:26:03", "remaining_time": "2:57:42", "throughput": 8715.96, "total_tokens": 45000672} +{"current_steps": 66775, "total_steps": 204665, "loss": 0.0595, "lr": 1.70399450288986e-06, "epoch": 1.6313243593188869, "percentage": 32.63, "elapsed_time": "1:26:03", "remaining_time": "2:57:42", "throughput": 8716.02, "total_tokens": 45004000} +{"current_steps": 66780, "total_steps": 204665, "loss": 0.1191, "lr": 1.7039339357166854e-06, "epoch": 1.631446510150734, "percentage": 32.63, "elapsed_time": "1:26:03", "remaining_time": "2:57:41", "throughput": 8716.21, "total_tokens": 45008224} +{"current_steps": 66785, "total_steps": 204665, "loss": 0.1004, "lr": 1.703873363424314e-06, "epoch": 1.6315686609825812, "percentage": 32.63, "elapsed_time": "1:26:04", "remaining_time": "2:57:41", "throughput": 8716.23, "total_tokens": 45011232} +{"current_steps": 66790, "total_steps": 204665, "loss": 0.2868, "lr": 1.7038127860131859e-06, "epoch": 1.6316908118144284, "percentage": 32.63, "elapsed_time": "1:26:04", "remaining_time": "2:57:40", "throughput": 8716.34, "total_tokens": 45014880} +{"current_steps": 66795, "total_steps": 204665, "loss": 0.0023, "lr": 1.7037522034837418e-06, "epoch": 1.6318129626462756, "percentage": 32.64, "elapsed_time": "1:26:04", "remaining_time": "2:57:40", "throughput": 8716.37, "total_tokens": 45018016} +{"current_steps": 66800, "total_steps": 204665, "loss": 0.0195, "lr": 1.7036916158364227e-06, "epoch": 1.6319351134781228, "percentage": 32.64, "elapsed_time": "1:26:05", "remaining_time": "2:57:39", "throughput": 8716.37, "total_tokens": 45020960} +{"current_steps": 66805, "total_steps": 204665, "loss": 0.0265, "lr": 1.7036310230716686e-06, "epoch": 1.63205726430997, "percentage": 32.64, "elapsed_time": "1:26:05", "remaining_time": "2:57:39", "throughput": 8716.38, "total_tokens": 45024032} +{"current_steps": 66810, "total_steps": 204665, "loss": 0.0013, "lr": 1.7035704251899207e-06, "epoch": 1.6321794151418172, "percentage": 32.64, "elapsed_time": "1:26:05", "remaining_time": "2:57:39", "throughput": 8716.46, "total_tokens": 45027488} +{"current_steps": 66815, "total_steps": 204665, "loss": 0.0521, "lr": 1.7035098221916195e-06, "epoch": 1.6323015659736644, "percentage": 32.65, "elapsed_time": "1:26:06", "remaining_time": "2:57:38", "throughput": 8716.54, "total_tokens": 45030944} +{"current_steps": 66820, "total_steps": 204665, "loss": 0.0854, "lr": 1.7034492140772057e-06, "epoch": 1.6324237168055116, "percentage": 32.65, "elapsed_time": "1:26:06", "remaining_time": "2:57:38", "throughput": 8716.59, "total_tokens": 45034208} +{"current_steps": 66825, "total_steps": 204665, "loss": 0.1261, "lr": 1.7033886008471196e-06, "epoch": 1.6325458676373588, "percentage": 32.65, "elapsed_time": "1:26:06", "remaining_time": "2:57:37", "throughput": 8716.69, "total_tokens": 45037792} +{"current_steps": 66830, "total_steps": 204665, "loss": 0.0007, "lr": 1.7033279825018026e-06, "epoch": 1.6326680184692057, "percentage": 32.65, "elapsed_time": "1:26:07", "remaining_time": "2:57:37", "throughput": 8716.76, "total_tokens": 45041184} +{"current_steps": 66835, "total_steps": 204665, "loss": 0.1032, "lr": 1.7032673590416953e-06, "epoch": 1.632790169301053, "percentage": 32.66, "elapsed_time": "1:26:07", "remaining_time": "2:57:36", "throughput": 8716.8, "total_tokens": 45044384} +{"current_steps": 66840, "total_steps": 204665, "loss": 0.0113, "lr": 1.7032067304672387e-06, "epoch": 1.6329123201329, "percentage": 32.66, "elapsed_time": "1:26:07", "remaining_time": "2:57:36", "throughput": 8716.87, "total_tokens": 45047776} +{"current_steps": 66845, "total_steps": 204665, "loss": 0.0011, "lr": 1.7031460967788735e-06, "epoch": 1.6330344709647473, "percentage": 32.66, "elapsed_time": "1:26:08", "remaining_time": "2:57:35", "throughput": 8716.91, "total_tokens": 45050976} +{"current_steps": 66850, "total_steps": 204665, "loss": 0.1239, "lr": 1.7030854579770408e-06, "epoch": 1.6331566217965945, "percentage": 32.66, "elapsed_time": "1:26:08", "remaining_time": "2:57:35", "throughput": 8716.92, "total_tokens": 45053984} +{"current_steps": 66855, "total_steps": 204665, "loss": 0.0526, "lr": 1.7030248140621816e-06, "epoch": 1.6332787726284415, "percentage": 32.67, "elapsed_time": "1:26:08", "remaining_time": "2:57:34", "throughput": 8716.98, "total_tokens": 45057312} +{"current_steps": 66860, "total_steps": 204665, "loss": 0.0331, "lr": 1.7029641650347368e-06, "epoch": 1.6334009234602886, "percentage": 32.67, "elapsed_time": "1:26:09", "remaining_time": "2:57:34", "throughput": 8717.03, "total_tokens": 45060576} +{"current_steps": 66865, "total_steps": 204665, "loss": 0.1633, "lr": 1.7029035108951474e-06, "epoch": 1.6335230742921358, "percentage": 32.67, "elapsed_time": "1:26:09", "remaining_time": "2:57:33", "throughput": 8717.06, "total_tokens": 45063776} +{"current_steps": 66870, "total_steps": 204665, "loss": 0.0636, "lr": 1.7028428516438549e-06, "epoch": 1.633645225123983, "percentage": 32.67, "elapsed_time": "1:26:09", "remaining_time": "2:57:33", "throughput": 8717.12, "total_tokens": 45067104} +{"current_steps": 66875, "total_steps": 204665, "loss": 0.1001, "lr": 1.7027821872813002e-06, "epoch": 1.6337673759558302, "percentage": 32.68, "elapsed_time": "1:26:10", "remaining_time": "2:57:32", "throughput": 8717.13, "total_tokens": 45070112} +{"current_steps": 66880, "total_steps": 204665, "loss": 0.0053, "lr": 1.7027215178079242e-06, "epoch": 1.6338895267876774, "percentage": 32.68, "elapsed_time": "1:26:10", "remaining_time": "2:57:32", "throughput": 8717.27, "total_tokens": 45073952} +{"current_steps": 66885, "total_steps": 204665, "loss": 0.111, "lr": 1.7026608432241683e-06, "epoch": 1.6340116776195246, "percentage": 32.68, "elapsed_time": "1:26:10", "remaining_time": "2:57:32", "throughput": 8717.33, "total_tokens": 45077280} +{"current_steps": 66890, "total_steps": 204665, "loss": 0.0501, "lr": 1.702600163530474e-06, "epoch": 1.6341338284513718, "percentage": 32.68, "elapsed_time": "1:26:11", "remaining_time": "2:57:31", "throughput": 8717.4, "total_tokens": 45080672} +{"current_steps": 66895, "total_steps": 204665, "loss": 0.2267, "lr": 1.702539478727282e-06, "epoch": 1.634255979283219, "percentage": 32.69, "elapsed_time": "1:26:11", "remaining_time": "2:57:31", "throughput": 8717.42, "total_tokens": 45083744} +{"current_steps": 66900, "total_steps": 204665, "loss": 0.1211, "lr": 1.7024787888150339e-06, "epoch": 1.6343781301150662, "percentage": 32.69, "elapsed_time": "1:26:12", "remaining_time": "2:57:30", "throughput": 8717.44, "total_tokens": 45086880} +{"current_steps": 66905, "total_steps": 204665, "loss": 0.0809, "lr": 1.7024180937941712e-06, "epoch": 1.6345002809469134, "percentage": 32.69, "elapsed_time": "1:26:12", "remaining_time": "2:57:30", "throughput": 8717.52, "total_tokens": 45090336} +{"current_steps": 66910, "total_steps": 204665, "loss": 0.0365, "lr": 1.7023573936651355e-06, "epoch": 1.6346224317787605, "percentage": 32.69, "elapsed_time": "1:26:12", "remaining_time": "2:57:29", "throughput": 8717.6, "total_tokens": 45093792} +{"current_steps": 66915, "total_steps": 204665, "loss": 0.0009, "lr": 1.7022966884283677e-06, "epoch": 1.6347445826106077, "percentage": 32.69, "elapsed_time": "1:26:13", "remaining_time": "2:57:29", "throughput": 8717.62, "total_tokens": 45096928} +{"current_steps": 66920, "total_steps": 204665, "loss": 0.0279, "lr": 1.7022359780843095e-06, "epoch": 1.6348667334424547, "percentage": 32.7, "elapsed_time": "1:26:13", "remaining_time": "2:57:28", "throughput": 8717.64, "total_tokens": 45100000} +{"current_steps": 66925, "total_steps": 204665, "loss": 0.0012, "lr": 1.702175262633402e-06, "epoch": 1.634988884274302, "percentage": 32.7, "elapsed_time": "1:26:13", "remaining_time": "2:57:28", "throughput": 8717.69, "total_tokens": 45103264} +{"current_steps": 66930, "total_steps": 204665, "loss": 0.096, "lr": 1.7021145420760877e-06, "epoch": 1.635111035106149, "percentage": 32.7, "elapsed_time": "1:26:14", "remaining_time": "2:57:27", "throughput": 8717.77, "total_tokens": 45106720} +{"current_steps": 66935, "total_steps": 204665, "loss": 0.2329, "lr": 1.7020538164128074e-06, "epoch": 1.6352331859379963, "percentage": 32.7, "elapsed_time": "1:26:14", "remaining_time": "2:57:27", "throughput": 8717.81, "total_tokens": 45109920} +{"current_steps": 66940, "total_steps": 204665, "loss": 0.1619, "lr": 1.7019930856440027e-06, "epoch": 1.6353553367698432, "percentage": 32.71, "elapsed_time": "1:26:14", "remaining_time": "2:57:26", "throughput": 8717.92, "total_tokens": 45113568} +{"current_steps": 66945, "total_steps": 204665, "loss": 0.0393, "lr": 1.7019323497701159e-06, "epoch": 1.6354774876016904, "percentage": 32.71, "elapsed_time": "1:26:15", "remaining_time": "2:57:26", "throughput": 8717.95, "total_tokens": 45116704} +{"current_steps": 66950, "total_steps": 204665, "loss": 0.0604, "lr": 1.7018716087915882e-06, "epoch": 1.6355996384335376, "percentage": 32.71, "elapsed_time": "1:26:15", "remaining_time": "2:57:25", "throughput": 8717.96, "total_tokens": 45119776} +{"current_steps": 66955, "total_steps": 204665, "loss": 0.0023, "lr": 1.701810862708861e-06, "epoch": 1.6357217892653848, "percentage": 32.71, "elapsed_time": "1:26:15", "remaining_time": "2:57:25", "throughput": 8718.0, "total_tokens": 45122976} +{"current_steps": 66960, "total_steps": 204665, "loss": 0.0294, "lr": 1.7017501115223766e-06, "epoch": 1.635843940097232, "percentage": 32.72, "elapsed_time": "1:26:16", "remaining_time": "2:57:24", "throughput": 8718.01, "total_tokens": 45126048} +{"current_steps": 66965, "total_steps": 204665, "loss": 0.0043, "lr": 1.7016893552325766e-06, "epoch": 1.6359660909290792, "percentage": 32.72, "elapsed_time": "1:26:16", "remaining_time": "2:57:24", "throughput": 8718.06, "total_tokens": 45129248} +{"current_steps": 66970, "total_steps": 204665, "loss": 0.0391, "lr": 1.701628593839903e-06, "epoch": 1.6360882417609264, "percentage": 32.72, "elapsed_time": "1:26:16", "remaining_time": "2:57:24", "throughput": 8718.14, "total_tokens": 45132768} +{"current_steps": 66975, "total_steps": 204665, "loss": 0.1749, "lr": 1.7015678273447977e-06, "epoch": 1.6362103925927736, "percentage": 32.72, "elapsed_time": "1:26:17", "remaining_time": "2:57:23", "throughput": 8718.24, "total_tokens": 45136352} +{"current_steps": 66980, "total_steps": 204665, "loss": 0.0005, "lr": 1.7015070557477022e-06, "epoch": 1.6363325434246208, "percentage": 32.73, "elapsed_time": "1:26:17", "remaining_time": "2:57:23", "throughput": 8718.28, "total_tokens": 45139552} +{"current_steps": 66985, "total_steps": 204665, "loss": 0.01, "lr": 1.7014462790490586e-06, "epoch": 1.636454694256468, "percentage": 32.73, "elapsed_time": "1:26:17", "remaining_time": "2:57:22", "throughput": 8718.4, "total_tokens": 45143328} +{"current_steps": 66990, "total_steps": 204665, "loss": 0.0569, "lr": 1.7013854972493093e-06, "epoch": 1.6365768450883151, "percentage": 32.73, "elapsed_time": "1:26:18", "remaining_time": "2:57:22", "throughput": 8718.4, "total_tokens": 45146272} +{"current_steps": 66995, "total_steps": 204665, "loss": 0.0698, "lr": 1.7013247103488962e-06, "epoch": 1.6366989959201623, "percentage": 32.73, "elapsed_time": "1:26:18", "remaining_time": "2:57:21", "throughput": 8718.46, "total_tokens": 45149600} +{"current_steps": 67000, "total_steps": 204665, "loss": 0.0003, "lr": 1.7012639183482609e-06, "epoch": 1.6368211467520095, "percentage": 32.74, "elapsed_time": "1:26:18", "remaining_time": "2:57:21", "throughput": 8718.57, "total_tokens": 45153248} +{"current_steps": 67005, "total_steps": 204665, "loss": 0.0504, "lr": 1.7012031212478456e-06, "epoch": 1.6369432975838567, "percentage": 32.74, "elapsed_time": "1:26:19", "remaining_time": "2:57:20", "throughput": 8718.58, "total_tokens": 45156192} +{"current_steps": 67010, "total_steps": 204665, "loss": 0.1407, "lr": 1.7011423190480926e-06, "epoch": 1.6370654484157037, "percentage": 32.74, "elapsed_time": "1:26:19", "remaining_time": "2:57:20", "throughput": 8718.6, "total_tokens": 45159264} +{"current_steps": 67015, "total_steps": 204665, "loss": 0.0457, "lr": 1.7010815117494444e-06, "epoch": 1.6371875992475509, "percentage": 32.74, "elapsed_time": "1:26:19", "remaining_time": "2:57:19", "throughput": 8718.64, "total_tokens": 45162528} +{"current_steps": 67020, "total_steps": 204665, "loss": 0.084, "lr": 1.7010206993523425e-06, "epoch": 1.637309750079398, "percentage": 32.75, "elapsed_time": "1:26:20", "remaining_time": "2:57:19", "throughput": 8718.69, "total_tokens": 45165792} +{"current_steps": 67025, "total_steps": 204665, "loss": 0.0945, "lr": 1.70095988185723e-06, "epoch": 1.6374319009112452, "percentage": 32.75, "elapsed_time": "1:26:20", "remaining_time": "2:57:18", "throughput": 8718.74, "total_tokens": 45169056} +{"current_steps": 67030, "total_steps": 204665, "loss": 0.2022, "lr": 1.7008990592645483e-06, "epoch": 1.6375540517430922, "percentage": 32.75, "elapsed_time": "1:26:21", "remaining_time": "2:57:18", "throughput": 8718.8, "total_tokens": 45172384} +{"current_steps": 67035, "total_steps": 204665, "loss": 0.1716, "lr": 1.7008382315747402e-06, "epoch": 1.6376762025749394, "percentage": 32.75, "elapsed_time": "1:26:21", "remaining_time": "2:57:17", "throughput": 8718.85, "total_tokens": 45175712} +{"current_steps": 67040, "total_steps": 204665, "loss": 0.1536, "lr": 1.700777398788248e-06, "epoch": 1.6377983534067866, "percentage": 32.76, "elapsed_time": "1:26:21", "remaining_time": "2:57:17", "throughput": 8718.85, "total_tokens": 45178656} +{"current_steps": 67045, "total_steps": 204665, "loss": 0.0731, "lr": 1.700716560905514e-06, "epoch": 1.6379205042386338, "percentage": 32.76, "elapsed_time": "1:26:22", "remaining_time": "2:57:16", "throughput": 8718.97, "total_tokens": 45182368} +{"current_steps": 67050, "total_steps": 204665, "loss": 0.0579, "lr": 1.7006557179269806e-06, "epoch": 1.638042655070481, "percentage": 32.76, "elapsed_time": "1:26:22", "remaining_time": "2:57:16", "throughput": 8718.99, "total_tokens": 45185504} +{"current_steps": 67055, "total_steps": 204665, "loss": 0.0516, "lr": 1.7005948698530907e-06, "epoch": 1.6381648059023282, "percentage": 32.76, "elapsed_time": "1:26:22", "remaining_time": "2:57:16", "throughput": 8719.02, "total_tokens": 45188640} +{"current_steps": 67060, "total_steps": 204665, "loss": 0.0013, "lr": 1.7005340166842866e-06, "epoch": 1.6382869567341753, "percentage": 32.77, "elapsed_time": "1:26:23", "remaining_time": "2:57:15", "throughput": 8719.07, "total_tokens": 45191904} +{"current_steps": 67065, "total_steps": 204665, "loss": 0.0958, "lr": 1.7004731584210102e-06, "epoch": 1.6384091075660225, "percentage": 32.77, "elapsed_time": "1:26:23", "remaining_time": "2:57:15", "throughput": 8719.1, "total_tokens": 45195040} +{"current_steps": 67070, "total_steps": 204665, "loss": 0.0385, "lr": 1.700412295063705e-06, "epoch": 1.6385312583978697, "percentage": 32.77, "elapsed_time": "1:26:23", "remaining_time": "2:57:14", "throughput": 8719.17, "total_tokens": 45198432} +{"current_steps": 67075, "total_steps": 204665, "loss": 0.066, "lr": 1.700351426612813e-06, "epoch": 1.638653409229717, "percentage": 32.77, "elapsed_time": "1:26:24", "remaining_time": "2:57:14", "throughput": 8719.18, "total_tokens": 45201504} +{"current_steps": 67080, "total_steps": 204665, "loss": 0.1887, "lr": 1.7002905530687767e-06, "epoch": 1.638775560061564, "percentage": 32.78, "elapsed_time": "1:26:24", "remaining_time": "2:57:13", "throughput": 8719.22, "total_tokens": 45204704} +{"current_steps": 67085, "total_steps": 204665, "loss": 0.109, "lr": 1.7002296744320396e-06, "epoch": 1.6388977108934113, "percentage": 32.78, "elapsed_time": "1:26:24", "remaining_time": "2:57:13", "throughput": 8719.26, "total_tokens": 45207840} +{"current_steps": 67090, "total_steps": 204665, "loss": 0.0866, "lr": 1.700168790703044e-06, "epoch": 1.6390198617252585, "percentage": 32.78, "elapsed_time": "1:26:25", "remaining_time": "2:57:12", "throughput": 8719.31, "total_tokens": 45211168} +{"current_steps": 67095, "total_steps": 204665, "loss": 0.0852, "lr": 1.7001079018822325e-06, "epoch": 1.6391420125571057, "percentage": 32.78, "elapsed_time": "1:26:25", "remaining_time": "2:57:12", "throughput": 8719.41, "total_tokens": 45214752} +{"current_steps": 67100, "total_steps": 204665, "loss": 0.067, "lr": 1.7000470079700482e-06, "epoch": 1.6392641633889526, "percentage": 32.79, "elapsed_time": "1:26:25", "remaining_time": "2:57:11", "throughput": 8719.43, "total_tokens": 45217824} +{"current_steps": 67105, "total_steps": 204665, "loss": 0.0357, "lr": 1.6999861089669337e-06, "epoch": 1.6393863142207998, "percentage": 32.79, "elapsed_time": "1:26:26", "remaining_time": "2:57:11", "throughput": 8719.53, "total_tokens": 45221408} +{"current_steps": 67110, "total_steps": 204665, "loss": 0.0021, "lr": 1.6999252048733314e-06, "epoch": 1.639508465052647, "percentage": 32.79, "elapsed_time": "1:26:26", "remaining_time": "2:57:10", "throughput": 8719.58, "total_tokens": 45224672} +{"current_steps": 67115, "total_steps": 204665, "loss": 0.1089, "lr": 1.6998642956896853e-06, "epoch": 1.6396306158844942, "percentage": 32.79, "elapsed_time": "1:26:26", "remaining_time": "2:57:10", "throughput": 8719.65, "total_tokens": 45228128} +{"current_steps": 67120, "total_steps": 204665, "loss": 0.0386, "lr": 1.699803381416438e-06, "epoch": 1.6397527667163412, "percentage": 32.8, "elapsed_time": "1:26:27", "remaining_time": "2:57:09", "throughput": 8719.77, "total_tokens": 45231840} +{"current_steps": 67125, "total_steps": 204665, "loss": 0.0364, "lr": 1.699742462054032e-06, "epoch": 1.6398749175481884, "percentage": 32.8, "elapsed_time": "1:26:27", "remaining_time": "2:57:09", "throughput": 8719.88, "total_tokens": 45235424} +{"current_steps": 67130, "total_steps": 204665, "loss": 0.0412, "lr": 1.6996815376029105e-06, "epoch": 1.6399970683800356, "percentage": 32.8, "elapsed_time": "1:26:27", "remaining_time": "2:57:09", "throughput": 8719.94, "total_tokens": 45238752} +{"current_steps": 67135, "total_steps": 204665, "loss": 0.0014, "lr": 1.6996206080635167e-06, "epoch": 1.6401192192118828, "percentage": 32.8, "elapsed_time": "1:26:28", "remaining_time": "2:57:08", "throughput": 8720.03, "total_tokens": 45242272} +{"current_steps": 67140, "total_steps": 204665, "loss": 0.1312, "lr": 1.6995596734362937e-06, "epoch": 1.64024137004373, "percentage": 32.8, "elapsed_time": "1:26:28", "remaining_time": "2:57:08", "throughput": 8720.09, "total_tokens": 45245600} +{"current_steps": 67145, "total_steps": 204665, "loss": 0.1014, "lr": 1.6994987337216845e-06, "epoch": 1.6403635208755771, "percentage": 32.81, "elapsed_time": "1:26:29", "remaining_time": "2:57:07", "throughput": 8720.19, "total_tokens": 45249184} +{"current_steps": 67150, "total_steps": 204665, "loss": 0.1057, "lr": 1.6994377889201328e-06, "epoch": 1.6404856717074243, "percentage": 32.81, "elapsed_time": "1:26:29", "remaining_time": "2:57:07", "throughput": 8720.27, "total_tokens": 45252704} +{"current_steps": 67155, "total_steps": 204665, "loss": 0.1044, "lr": 1.699376839032081e-06, "epoch": 1.6406078225392715, "percentage": 32.81, "elapsed_time": "1:26:29", "remaining_time": "2:57:06", "throughput": 8720.36, "total_tokens": 45256224} +{"current_steps": 67160, "total_steps": 204665, "loss": 0.0037, "lr": 1.6993158840579728e-06, "epoch": 1.6407299733711187, "percentage": 32.81, "elapsed_time": "1:26:30", "remaining_time": "2:57:06", "throughput": 8720.5, "total_tokens": 45260064} +{"current_steps": 67165, "total_steps": 204665, "loss": 0.0722, "lr": 1.6992549239982515e-06, "epoch": 1.640852124202966, "percentage": 32.82, "elapsed_time": "1:26:30", "remaining_time": "2:57:05", "throughput": 8720.56, "total_tokens": 45263392} +{"current_steps": 67170, "total_steps": 204665, "loss": 0.159, "lr": 1.6991939588533601e-06, "epoch": 1.640974275034813, "percentage": 32.82, "elapsed_time": "1:26:30", "remaining_time": "2:57:05", "throughput": 8720.54, "total_tokens": 45266208} +{"current_steps": 67175, "total_steps": 204665, "loss": 0.1023, "lr": 1.6991329886237421e-06, "epoch": 1.6410964258666603, "percentage": 32.82, "elapsed_time": "1:26:31", "remaining_time": "2:57:04", "throughput": 8720.67, "total_tokens": 45269984} +{"current_steps": 67180, "total_steps": 204665, "loss": 0.0088, "lr": 1.6990720133098412e-06, "epoch": 1.6412185766985075, "percentage": 32.82, "elapsed_time": "1:26:31", "remaining_time": "2:57:04", "throughput": 8720.67, "total_tokens": 45272992} +{"current_steps": 67185, "total_steps": 204665, "loss": 0.002, "lr": 1.6990110329121005e-06, "epoch": 1.6413407275303544, "percentage": 32.83, "elapsed_time": "1:26:31", "remaining_time": "2:57:03", "throughput": 8720.73, "total_tokens": 45276320} +{"current_steps": 67190, "total_steps": 204665, "loss": 0.0477, "lr": 1.6989500474309637e-06, "epoch": 1.6414628783622016, "percentage": 32.83, "elapsed_time": "1:26:32", "remaining_time": "2:57:03", "throughput": 8720.76, "total_tokens": 45279520} +{"current_steps": 67195, "total_steps": 204665, "loss": 0.1012, "lr": 1.6988890568668741e-06, "epoch": 1.6415850291940488, "percentage": 32.83, "elapsed_time": "1:26:32", "remaining_time": "2:57:03", "throughput": 8720.82, "total_tokens": 45282848} +{"current_steps": 67200, "total_steps": 204665, "loss": 0.0607, "lr": 1.6988280612202751e-06, "epoch": 1.641707180025896, "percentage": 32.83, "elapsed_time": "1:26:32", "remaining_time": "2:57:02", "throughput": 8720.83, "total_tokens": 45285856} +{"current_steps": 67205, "total_steps": 204665, "loss": 0.302, "lr": 1.6987670604916106e-06, "epoch": 1.6418293308577432, "percentage": 32.84, "elapsed_time": "1:26:33", "remaining_time": "2:57:02", "throughput": 8720.84, "total_tokens": 45288864} +{"current_steps": 67210, "total_steps": 204665, "loss": 0.1141, "lr": 1.6987060546813242e-06, "epoch": 1.6419514816895902, "percentage": 32.84, "elapsed_time": "1:26:33", "remaining_time": "2:57:01", "throughput": 8720.93, "total_tokens": 45292448} +{"current_steps": 67215, "total_steps": 204665, "loss": 0.1661, "lr": 1.6986450437898592e-06, "epoch": 1.6420736325214373, "percentage": 32.84, "elapsed_time": "1:26:33", "remaining_time": "2:57:01", "throughput": 8720.94, "total_tokens": 45295456} +{"current_steps": 67220, "total_steps": 204665, "loss": 0.0886, "lr": 1.6985840278176596e-06, "epoch": 1.6421957833532845, "percentage": 32.84, "elapsed_time": "1:26:34", "remaining_time": "2:57:00", "throughput": 8720.97, "total_tokens": 45298592} +{"current_steps": 67225, "total_steps": 204665, "loss": 0.2015, "lr": 1.6985230067651695e-06, "epoch": 1.6423179341851317, "percentage": 32.85, "elapsed_time": "1:26:34", "remaining_time": "2:57:00", "throughput": 8721.01, "total_tokens": 45301792} +{"current_steps": 67230, "total_steps": 204665, "loss": 0.159, "lr": 1.6984619806328317e-06, "epoch": 1.642440085016979, "percentage": 32.85, "elapsed_time": "1:26:34", "remaining_time": "2:56:59", "throughput": 8721.24, "total_tokens": 45306208} +{"current_steps": 67235, "total_steps": 204665, "loss": 0.0794, "lr": 1.6984009494210904e-06, "epoch": 1.642562235848826, "percentage": 32.85, "elapsed_time": "1:26:35", "remaining_time": "2:56:59", "throughput": 8721.27, "total_tokens": 45309344} +{"current_steps": 67240, "total_steps": 204665, "loss": 0.0459, "lr": 1.69833991313039e-06, "epoch": 1.6426843866806733, "percentage": 32.85, "elapsed_time": "1:26:35", "remaining_time": "2:56:58", "throughput": 8721.35, "total_tokens": 45312800} +{"current_steps": 67245, "total_steps": 204665, "loss": 0.1316, "lr": 1.6982788717611735e-06, "epoch": 1.6428065375125205, "percentage": 32.86, "elapsed_time": "1:26:35", "remaining_time": "2:56:58", "throughput": 8721.38, "total_tokens": 45316000} +{"current_steps": 67250, "total_steps": 204665, "loss": 0.0026, "lr": 1.6982178253138857e-06, "epoch": 1.6429286883443677, "percentage": 32.86, "elapsed_time": "1:26:36", "remaining_time": "2:56:57", "throughput": 8721.45, "total_tokens": 45319392} +{"current_steps": 67255, "total_steps": 204665, "loss": 0.0425, "lr": 1.6981567737889698e-06, "epoch": 1.6430508391762149, "percentage": 32.86, "elapsed_time": "1:26:36", "remaining_time": "2:56:57", "throughput": 8721.63, "total_tokens": 45323488} +{"current_steps": 67260, "total_steps": 204665, "loss": 0.0061, "lr": 1.6980957171868702e-06, "epoch": 1.643172990008062, "percentage": 32.86, "elapsed_time": "1:26:37", "remaining_time": "2:56:56", "throughput": 8721.66, "total_tokens": 45326688} +{"current_steps": 67265, "total_steps": 204665, "loss": 0.1156, "lr": 1.6980346555080306e-06, "epoch": 1.6432951408399092, "percentage": 32.87, "elapsed_time": "1:26:37", "remaining_time": "2:56:56", "throughput": 8721.75, "total_tokens": 45330208} +{"current_steps": 67270, "total_steps": 204665, "loss": 0.0174, "lr": 1.6979735887528954e-06, "epoch": 1.6434172916717564, "percentage": 32.87, "elapsed_time": "1:26:37", "remaining_time": "2:56:56", "throughput": 8721.81, "total_tokens": 45333536} +{"current_steps": 67275, "total_steps": 204665, "loss": 0.1974, "lr": 1.6979125169219085e-06, "epoch": 1.6435394425036034, "percentage": 32.87, "elapsed_time": "1:26:38", "remaining_time": "2:56:55", "throughput": 8721.88, "total_tokens": 45336928} +{"current_steps": 67280, "total_steps": 204665, "loss": 0.0702, "lr": 1.6978514400155137e-06, "epoch": 1.6436615933354506, "percentage": 32.87, "elapsed_time": "1:26:38", "remaining_time": "2:56:55", "throughput": 8721.93, "total_tokens": 45340192} +{"current_steps": 67285, "total_steps": 204665, "loss": 0.0019, "lr": 1.697790358034156e-06, "epoch": 1.6437837441672978, "percentage": 32.88, "elapsed_time": "1:26:38", "remaining_time": "2:56:54", "throughput": 8721.99, "total_tokens": 45343520} +{"current_steps": 67290, "total_steps": 204665, "loss": 0.0007, "lr": 1.6977292709782792e-06, "epoch": 1.643905894999145, "percentage": 32.88, "elapsed_time": "1:26:39", "remaining_time": "2:56:54", "throughput": 8722.02, "total_tokens": 45346656} +{"current_steps": 67295, "total_steps": 204665, "loss": 0.1003, "lr": 1.6976681788483268e-06, "epoch": 1.6440280458309922, "percentage": 32.88, "elapsed_time": "1:26:39", "remaining_time": "2:56:53", "throughput": 8722.07, "total_tokens": 45349984} +{"current_steps": 67300, "total_steps": 204665, "loss": 0.0783, "lr": 1.6976070816447443e-06, "epoch": 1.6441501966628391, "percentage": 32.88, "elapsed_time": "1:26:39", "remaining_time": "2:56:53", "throughput": 8722.12, "total_tokens": 45353248} +{"current_steps": 67305, "total_steps": 204665, "loss": 0.0014, "lr": 1.6975459793679753e-06, "epoch": 1.6442723474946863, "percentage": 32.89, "elapsed_time": "1:26:40", "remaining_time": "2:56:52", "throughput": 8722.16, "total_tokens": 45356448} +{"current_steps": 67310, "total_steps": 204665, "loss": 0.0959, "lr": 1.6974848720184647e-06, "epoch": 1.6443944983265335, "percentage": 32.89, "elapsed_time": "1:26:40", "remaining_time": "2:56:52", "throughput": 8722.21, "total_tokens": 45359712} +{"current_steps": 67315, "total_steps": 204665, "loss": 0.15, "lr": 1.697423759596656e-06, "epoch": 1.6445166491583807, "percentage": 32.89, "elapsed_time": "1:26:40", "remaining_time": "2:56:51", "throughput": 8722.25, "total_tokens": 45362912} +{"current_steps": 67320, "total_steps": 204665, "loss": 0.0369, "lr": 1.6973626421029944e-06, "epoch": 1.6446387999902279, "percentage": 32.89, "elapsed_time": "1:26:41", "remaining_time": "2:56:51", "throughput": 8722.33, "total_tokens": 45366432} +{"current_steps": 67325, "total_steps": 204665, "loss": 0.149, "lr": 1.697301519537924e-06, "epoch": 1.644760950822075, "percentage": 32.9, "elapsed_time": "1:26:41", "remaining_time": "2:56:50", "throughput": 8722.39, "total_tokens": 45369760} +{"current_steps": 67330, "total_steps": 204665, "loss": 0.0926, "lr": 1.6972403919018895e-06, "epoch": 1.6448831016539223, "percentage": 32.9, "elapsed_time": "1:26:41", "remaining_time": "2:56:50", "throughput": 8722.49, "total_tokens": 45373408} +{"current_steps": 67335, "total_steps": 204665, "loss": 0.0958, "lr": 1.6971792591953352e-06, "epoch": 1.6450052524857695, "percentage": 32.9, "elapsed_time": "1:26:42", "remaining_time": "2:56:49", "throughput": 8722.57, "total_tokens": 45376864} +{"current_steps": 67340, "total_steps": 204665, "loss": 0.0568, "lr": 1.6971181214187058e-06, "epoch": 1.6451274033176166, "percentage": 32.9, "elapsed_time": "1:26:42", "remaining_time": "2:56:49", "throughput": 8722.63, "total_tokens": 45380192} +{"current_steps": 67345, "total_steps": 204665, "loss": 0.0958, "lr": 1.697056978572446e-06, "epoch": 1.6452495541494638, "percentage": 32.9, "elapsed_time": "1:26:42", "remaining_time": "2:56:49", "throughput": 8722.62, "total_tokens": 45383072} +{"current_steps": 67350, "total_steps": 204665, "loss": 0.1171, "lr": 1.6969958306570002e-06, "epoch": 1.645371704981311, "percentage": 32.91, "elapsed_time": "1:26:43", "remaining_time": "2:56:48", "throughput": 8722.65, "total_tokens": 45386208} +{"current_steps": 67355, "total_steps": 204665, "loss": 0.1764, "lr": 1.6969346776728134e-06, "epoch": 1.6454938558131582, "percentage": 32.91, "elapsed_time": "1:26:43", "remaining_time": "2:56:48", "throughput": 8722.78, "total_tokens": 45390048} +{"current_steps": 67360, "total_steps": 204665, "loss": 0.1151, "lr": 1.6968735196203303e-06, "epoch": 1.6456160066450054, "percentage": 32.91, "elapsed_time": "1:26:43", "remaining_time": "2:56:47", "throughput": 8722.91, "total_tokens": 45393824} +{"current_steps": 67365, "total_steps": 204665, "loss": 0.1273, "lr": 1.6968123564999952e-06, "epoch": 1.6457381574768524, "percentage": 32.91, "elapsed_time": "1:26:44", "remaining_time": "2:56:47", "throughput": 8722.96, "total_tokens": 45397088} +{"current_steps": 67370, "total_steps": 204665, "loss": 0.1678, "lr": 1.6967511883122536e-06, "epoch": 1.6458603083086996, "percentage": 32.92, "elapsed_time": "1:26:44", "remaining_time": "2:56:46", "throughput": 8723.03, "total_tokens": 45400480} +{"current_steps": 67375, "total_steps": 204665, "loss": 0.1328, "lr": 1.6966900150575498e-06, "epoch": 1.6459824591405467, "percentage": 32.92, "elapsed_time": "1:26:45", "remaining_time": "2:56:46", "throughput": 8723.11, "total_tokens": 45403936} +{"current_steps": 67380, "total_steps": 204665, "loss": 0.0541, "lr": 1.696628836736329e-06, "epoch": 1.646104609972394, "percentage": 32.92, "elapsed_time": "1:26:45", "remaining_time": "2:56:45", "throughput": 8723.18, "total_tokens": 45407392} +{"current_steps": 67385, "total_steps": 204665, "loss": 0.0406, "lr": 1.6965676533490357e-06, "epoch": 1.6462267608042411, "percentage": 32.92, "elapsed_time": "1:26:45", "remaining_time": "2:56:45", "throughput": 8723.28, "total_tokens": 45411040} +{"current_steps": 67390, "total_steps": 204665, "loss": 0.0364, "lr": 1.6965064648961146e-06, "epoch": 1.646348911636088, "percentage": 32.93, "elapsed_time": "1:26:46", "remaining_time": "2:56:44", "throughput": 8723.32, "total_tokens": 45414240} +{"current_steps": 67395, "total_steps": 204665, "loss": 0.0662, "lr": 1.696445271378012e-06, "epoch": 1.6464710624679353, "percentage": 32.93, "elapsed_time": "1:26:46", "remaining_time": "2:56:44", "throughput": 8723.41, "total_tokens": 45417760} +{"current_steps": 67400, "total_steps": 204665, "loss": 0.1032, "lr": 1.6963840727951717e-06, "epoch": 1.6465932132997825, "percentage": 32.93, "elapsed_time": "1:26:46", "remaining_time": "2:56:43", "throughput": 8723.45, "total_tokens": 45421024} +{"current_steps": 67405, "total_steps": 204665, "loss": 0.0662, "lr": 1.6963228691480391e-06, "epoch": 1.6467153641316297, "percentage": 32.93, "elapsed_time": "1:26:47", "remaining_time": "2:56:43", "throughput": 8723.56, "total_tokens": 45424672} +{"current_steps": 67410, "total_steps": 204665, "loss": 0.2042, "lr": 1.6962616604370595e-06, "epoch": 1.6468375149634769, "percentage": 32.94, "elapsed_time": "1:26:47", "remaining_time": "2:56:43", "throughput": 8723.6, "total_tokens": 45427936} +{"current_steps": 67415, "total_steps": 204665, "loss": 0.1869, "lr": 1.6962004466626776e-06, "epoch": 1.646959665795324, "percentage": 32.94, "elapsed_time": "1:26:47", "remaining_time": "2:56:42", "throughput": 8723.59, "total_tokens": 45430816} +{"current_steps": 67420, "total_steps": 204665, "loss": 0.1323, "lr": 1.6961392278253386e-06, "epoch": 1.6470818166271712, "percentage": 32.94, "elapsed_time": "1:26:48", "remaining_time": "2:56:42", "throughput": 8723.67, "total_tokens": 45434336} +{"current_steps": 67425, "total_steps": 204665, "loss": 0.084, "lr": 1.6960780039254882e-06, "epoch": 1.6472039674590184, "percentage": 32.94, "elapsed_time": "1:26:48", "remaining_time": "2:56:41", "throughput": 8723.72, "total_tokens": 45437664} +{"current_steps": 67430, "total_steps": 204665, "loss": 0.1477, "lr": 1.6960167749635714e-06, "epoch": 1.6473261182908656, "percentage": 32.95, "elapsed_time": "1:26:48", "remaining_time": "2:56:41", "throughput": 8723.79, "total_tokens": 45441056} +{"current_steps": 67435, "total_steps": 204665, "loss": 0.0439, "lr": 1.6959555409400332e-06, "epoch": 1.6474482691227128, "percentage": 32.95, "elapsed_time": "1:26:49", "remaining_time": "2:56:40", "throughput": 8723.85, "total_tokens": 45444384} +{"current_steps": 67440, "total_steps": 204665, "loss": 0.0756, "lr": 1.6958943018553194e-06, "epoch": 1.64757041995456, "percentage": 32.95, "elapsed_time": "1:26:49", "remaining_time": "2:56:40", "throughput": 8723.9, "total_tokens": 45447712} +{"current_steps": 67445, "total_steps": 204665, "loss": 0.0039, "lr": 1.695833057709875e-06, "epoch": 1.6476925707864072, "percentage": 32.95, "elapsed_time": "1:26:49", "remaining_time": "2:56:39", "throughput": 8723.99, "total_tokens": 45451232} +{"current_steps": 67450, "total_steps": 204665, "loss": 0.0736, "lr": 1.6957718085041453e-06, "epoch": 1.6478147216182544, "percentage": 32.96, "elapsed_time": "1:26:50", "remaining_time": "2:56:39", "throughput": 8724.01, "total_tokens": 45454304} +{"current_steps": 67455, "total_steps": 204665, "loss": 0.1157, "lr": 1.6957105542385758e-06, "epoch": 1.6479368724501013, "percentage": 32.96, "elapsed_time": "1:26:50", "remaining_time": "2:56:38", "throughput": 8724.02, "total_tokens": 45457376} +{"current_steps": 67460, "total_steps": 204665, "loss": 0.0474, "lr": 1.695649294913612e-06, "epoch": 1.6480590232819485, "percentage": 32.96, "elapsed_time": "1:26:50", "remaining_time": "2:56:38", "throughput": 8724.04, "total_tokens": 45460448} +{"current_steps": 67465, "total_steps": 204665, "loss": 0.2013, "lr": 1.6955880305296996e-06, "epoch": 1.6481811741137957, "percentage": 32.96, "elapsed_time": "1:26:51", "remaining_time": "2:56:37", "throughput": 8724.12, "total_tokens": 45463904} +{"current_steps": 67470, "total_steps": 204665, "loss": 0.0781, "lr": 1.695526761087284e-06, "epoch": 1.648303324945643, "percentage": 32.97, "elapsed_time": "1:26:51", "remaining_time": "2:56:37", "throughput": 8724.21, "total_tokens": 45467424} +{"current_steps": 67475, "total_steps": 204665, "loss": 0.0341, "lr": 1.6954654865868107e-06, "epoch": 1.6484254757774899, "percentage": 32.97, "elapsed_time": "1:26:51", "remaining_time": "2:56:37", "throughput": 8724.27, "total_tokens": 45470816} +{"current_steps": 67480, "total_steps": 204665, "loss": 0.0845, "lr": 1.695404207028725e-06, "epoch": 1.648547626609337, "percentage": 32.97, "elapsed_time": "1:26:52", "remaining_time": "2:56:36", "throughput": 8724.37, "total_tokens": 45474400} +{"current_steps": 67485, "total_steps": 204665, "loss": 0.2447, "lr": 1.6953429224134731e-06, "epoch": 1.6486697774411843, "percentage": 32.97, "elapsed_time": "1:26:52", "remaining_time": "2:56:36", "throughput": 8724.49, "total_tokens": 45478112} +{"current_steps": 67490, "total_steps": 204665, "loss": 0.0339, "lr": 1.6952816327415004e-06, "epoch": 1.6487919282730314, "percentage": 32.98, "elapsed_time": "1:26:53", "remaining_time": "2:56:35", "throughput": 8724.51, "total_tokens": 45481184} +{"current_steps": 67495, "total_steps": 204665, "loss": 0.1602, "lr": 1.6952203380132529e-06, "epoch": 1.6489140791048786, "percentage": 32.98, "elapsed_time": "1:26:53", "remaining_time": "2:56:35", "throughput": 8724.59, "total_tokens": 45484704} +{"current_steps": 67500, "total_steps": 204665, "loss": 0.0029, "lr": 1.6951590382291761e-06, "epoch": 1.6490362299367258, "percentage": 32.98, "elapsed_time": "1:26:53", "remaining_time": "2:56:34", "throughput": 8724.73, "total_tokens": 45488608} +{"current_steps": 67505, "total_steps": 204665, "loss": 0.1515, "lr": 1.6950977333897156e-06, "epoch": 1.649158380768573, "percentage": 32.98, "elapsed_time": "1:26:54", "remaining_time": "2:56:34", "throughput": 8724.75, "total_tokens": 45491680} +{"current_steps": 67510, "total_steps": 204665, "loss": 0.0019, "lr": 1.6950364234953173e-06, "epoch": 1.6492805316004202, "percentage": 32.99, "elapsed_time": "1:26:54", "remaining_time": "2:56:33", "throughput": 8724.84, "total_tokens": 45495200} +{"current_steps": 67515, "total_steps": 204665, "loss": 0.0996, "lr": 1.6949751085464273e-06, "epoch": 1.6494026824322674, "percentage": 32.99, "elapsed_time": "1:26:54", "remaining_time": "2:56:33", "throughput": 8724.87, "total_tokens": 45498336} +{"current_steps": 67520, "total_steps": 204665, "loss": 0.0285, "lr": 1.6949137885434914e-06, "epoch": 1.6495248332641146, "percentage": 32.99, "elapsed_time": "1:26:55", "remaining_time": "2:56:32", "throughput": 8724.9, "total_tokens": 45501536} +{"current_steps": 67525, "total_steps": 204665, "loss": 0.1134, "lr": 1.6948524634869555e-06, "epoch": 1.6496469840959618, "percentage": 32.99, "elapsed_time": "1:26:55", "remaining_time": "2:56:32", "throughput": 8724.98, "total_tokens": 45505056} +{"current_steps": 67530, "total_steps": 204665, "loss": 0.0424, "lr": 1.6947911333772657e-06, "epoch": 1.649769134927809, "percentage": 33.0, "elapsed_time": "1:26:55", "remaining_time": "2:56:31", "throughput": 8725.02, "total_tokens": 45508256} +{"current_steps": 67535, "total_steps": 204665, "loss": 0.2835, "lr": 1.6947297982148678e-06, "epoch": 1.6498912857596562, "percentage": 33.0, "elapsed_time": "1:26:56", "remaining_time": "2:56:31", "throughput": 8725.05, "total_tokens": 45511456} +{"current_steps": 67540, "total_steps": 204665, "loss": 0.1688, "lr": 1.694668458000208e-06, "epoch": 1.6500134365915033, "percentage": 33.0, "elapsed_time": "1:26:56", "remaining_time": "2:56:31", "throughput": 8725.14, "total_tokens": 45514976} +{"current_steps": 67545, "total_steps": 204665, "loss": 0.019, "lr": 1.6946071127337323e-06, "epoch": 1.6501355874233503, "percentage": 33.0, "elapsed_time": "1:26:56", "remaining_time": "2:56:30", "throughput": 8725.22, "total_tokens": 45518432} +{"current_steps": 67550, "total_steps": 204665, "loss": 0.0685, "lr": 1.694545762415887e-06, "epoch": 1.6502577382551975, "percentage": 33.01, "elapsed_time": "1:26:57", "remaining_time": "2:56:30", "throughput": 8725.35, "total_tokens": 45522272} +{"current_steps": 67555, "total_steps": 204665, "loss": 0.1469, "lr": 1.6944844070471178e-06, "epoch": 1.6503798890870447, "percentage": 33.01, "elapsed_time": "1:26:57", "remaining_time": "2:56:29", "throughput": 8725.44, "total_tokens": 45525792} +{"current_steps": 67560, "total_steps": 204665, "loss": 0.1414, "lr": 1.6944230466278712e-06, "epoch": 1.6505020399188919, "percentage": 33.01, "elapsed_time": "1:26:57", "remaining_time": "2:56:29", "throughput": 8725.57, "total_tokens": 45529568} +{"current_steps": 67565, "total_steps": 204665, "loss": 0.0635, "lr": 1.6943616811585936e-06, "epoch": 1.6506241907507389, "percentage": 33.01, "elapsed_time": "1:26:58", "remaining_time": "2:56:28", "throughput": 8725.58, "total_tokens": 45532640} +{"current_steps": 67570, "total_steps": 204665, "loss": 0.0022, "lr": 1.6943003106397313e-06, "epoch": 1.650746341582586, "percentage": 33.01, "elapsed_time": "1:26:58", "remaining_time": "2:56:28", "throughput": 8725.66, "total_tokens": 45536096} +{"current_steps": 67575, "total_steps": 204665, "loss": 0.0309, "lr": 1.69423893507173e-06, "epoch": 1.6508684924144332, "percentage": 33.02, "elapsed_time": "1:26:58", "remaining_time": "2:56:27", "throughput": 8725.72, "total_tokens": 45539424} +{"current_steps": 67580, "total_steps": 204665, "loss": 0.1266, "lr": 1.6941775544550368e-06, "epoch": 1.6509906432462804, "percentage": 33.02, "elapsed_time": "1:26:59", "remaining_time": "2:56:27", "throughput": 8725.8, "total_tokens": 45542944} +{"current_steps": 67585, "total_steps": 204665, "loss": 0.0458, "lr": 1.6941161687900975e-06, "epoch": 1.6511127940781276, "percentage": 33.02, "elapsed_time": "1:26:59", "remaining_time": "2:56:26", "throughput": 8725.87, "total_tokens": 45546336} +{"current_steps": 67590, "total_steps": 204665, "loss": 0.21, "lr": 1.694054778077359e-06, "epoch": 1.6512349449099748, "percentage": 33.02, "elapsed_time": "1:27:00", "remaining_time": "2:56:26", "throughput": 8725.92, "total_tokens": 45549664} +{"current_steps": 67595, "total_steps": 204665, "loss": 0.0357, "lr": 1.693993382317267e-06, "epoch": 1.651357095741822, "percentage": 33.03, "elapsed_time": "1:27:00", "remaining_time": "2:56:25", "throughput": 8725.92, "total_tokens": 45552608} +{"current_steps": 67600, "total_steps": 204665, "loss": 0.0687, "lr": 1.6939319815102686e-06, "epoch": 1.6514792465736692, "percentage": 33.03, "elapsed_time": "1:27:00", "remaining_time": "2:56:25", "throughput": 8725.93, "total_tokens": 45555616} +{"current_steps": 67605, "total_steps": 204665, "loss": 0.0286, "lr": 1.6938705756568106e-06, "epoch": 1.6516013974055164, "percentage": 33.03, "elapsed_time": "1:27:01", "remaining_time": "2:56:25", "throughput": 8726.02, "total_tokens": 45559136} +{"current_steps": 67610, "total_steps": 204665, "loss": 0.1458, "lr": 1.6938091647573385e-06, "epoch": 1.6517235482373636, "percentage": 33.03, "elapsed_time": "1:27:01", "remaining_time": "2:56:24", "throughput": 8726.07, "total_tokens": 45562400} +{"current_steps": 67615, "total_steps": 204665, "loss": 0.0453, "lr": 1.6937477488122997e-06, "epoch": 1.6518456990692107, "percentage": 33.04, "elapsed_time": "1:27:01", "remaining_time": "2:56:24", "throughput": 8726.13, "total_tokens": 45565728} +{"current_steps": 67620, "total_steps": 204665, "loss": 0.1273, "lr": 1.693686327822141e-06, "epoch": 1.651967849901058, "percentage": 33.04, "elapsed_time": "1:27:02", "remaining_time": "2:56:23", "throughput": 8726.2, "total_tokens": 45569184} +{"current_steps": 67625, "total_steps": 204665, "loss": 0.0947, "lr": 1.6936249017873086e-06, "epoch": 1.6520900007329051, "percentage": 33.04, "elapsed_time": "1:27:02", "remaining_time": "2:56:23", "throughput": 8726.24, "total_tokens": 45572384} +{"current_steps": 67630, "total_steps": 204665, "loss": 0.0361, "lr": 1.6935634707082494e-06, "epoch": 1.6522121515647523, "percentage": 33.04, "elapsed_time": "1:27:02", "remaining_time": "2:56:22", "throughput": 8726.31, "total_tokens": 45575776} +{"current_steps": 67635, "total_steps": 204665, "loss": 0.1171, "lr": 1.69350203458541e-06, "epoch": 1.6523343023965993, "percentage": 33.05, "elapsed_time": "1:27:03", "remaining_time": "2:56:22", "throughput": 8726.39, "total_tokens": 45579232} +{"current_steps": 67640, "total_steps": 204665, "loss": 0.0348, "lr": 1.6934405934192372e-06, "epoch": 1.6524564532284465, "percentage": 33.05, "elapsed_time": "1:27:03", "remaining_time": "2:56:21", "throughput": 8726.43, "total_tokens": 45582496} +{"current_steps": 67645, "total_steps": 204665, "loss": 0.0035, "lr": 1.693379147210178e-06, "epoch": 1.6525786040602937, "percentage": 33.05, "elapsed_time": "1:27:03", "remaining_time": "2:56:21", "throughput": 8726.53, "total_tokens": 45586080} +{"current_steps": 67650, "total_steps": 204665, "loss": 0.2543, "lr": 1.6933176959586792e-06, "epoch": 1.6527007548921409, "percentage": 33.05, "elapsed_time": "1:27:04", "remaining_time": "2:56:20", "throughput": 8726.56, "total_tokens": 45589216} +{"current_steps": 67655, "total_steps": 204665, "loss": 0.0465, "lr": 1.6932562396651874e-06, "epoch": 1.6528229057239878, "percentage": 33.06, "elapsed_time": "1:27:04", "remaining_time": "2:56:20", "throughput": 8726.58, "total_tokens": 45592288} +{"current_steps": 67660, "total_steps": 204665, "loss": 0.049, "lr": 1.6931947783301502e-06, "epoch": 1.652945056555835, "percentage": 33.06, "elapsed_time": "1:27:04", "remaining_time": "2:56:19", "throughput": 8726.59, "total_tokens": 45595296} +{"current_steps": 67665, "total_steps": 204665, "loss": 0.1545, "lr": 1.6931333119540138e-06, "epoch": 1.6530672073876822, "percentage": 33.06, "elapsed_time": "1:27:05", "remaining_time": "2:56:19", "throughput": 8726.66, "total_tokens": 45598688} +{"current_steps": 67670, "total_steps": 204665, "loss": 0.1369, "lr": 1.6930718405372254e-06, "epoch": 1.6531893582195294, "percentage": 33.06, "elapsed_time": "1:27:05", "remaining_time": "2:56:18", "throughput": 8726.7, "total_tokens": 45601952} +{"current_steps": 67675, "total_steps": 204665, "loss": 0.1289, "lr": 1.6930103640802327e-06, "epoch": 1.6533115090513766, "percentage": 33.07, "elapsed_time": "1:27:05", "remaining_time": "2:56:18", "throughput": 8726.79, "total_tokens": 45605472} +{"current_steps": 67680, "total_steps": 204665, "loss": 0.194, "lr": 1.6929488825834816e-06, "epoch": 1.6534336598832238, "percentage": 33.07, "elapsed_time": "1:27:06", "remaining_time": "2:56:17", "throughput": 8726.8, "total_tokens": 45608480} +{"current_steps": 67685, "total_steps": 204665, "loss": 0.0867, "lr": 1.6928873960474204e-06, "epoch": 1.653555810715071, "percentage": 33.07, "elapsed_time": "1:27:06", "remaining_time": "2:56:17", "throughput": 8726.86, "total_tokens": 45611872} +{"current_steps": 67690, "total_steps": 204665, "loss": 0.0908, "lr": 1.6928259044724954e-06, "epoch": 1.6536779615469182, "percentage": 33.07, "elapsed_time": "1:27:06", "remaining_time": "2:56:17", "throughput": 8726.91, "total_tokens": 45615136} +{"current_steps": 67695, "total_steps": 204665, "loss": 0.0776, "lr": 1.6927644078591539e-06, "epoch": 1.6538001123787653, "percentage": 33.08, "elapsed_time": "1:27:07", "remaining_time": "2:56:16", "throughput": 8727.02, "total_tokens": 45618848} +{"current_steps": 67700, "total_steps": 204665, "loss": 0.0745, "lr": 1.6927029062078435e-06, "epoch": 1.6539222632106125, "percentage": 33.08, "elapsed_time": "1:27:07", "remaining_time": "2:56:16", "throughput": 8727.14, "total_tokens": 45622560} +{"current_steps": 67705, "total_steps": 204665, "loss": 0.0492, "lr": 1.6926413995190112e-06, "epoch": 1.6540444140424597, "percentage": 33.08, "elapsed_time": "1:27:08", "remaining_time": "2:56:15", "throughput": 8727.27, "total_tokens": 45626336} +{"current_steps": 67710, "total_steps": 204665, "loss": 0.0027, "lr": 1.6925798877931046e-06, "epoch": 1.654166564874307, "percentage": 33.08, "elapsed_time": "1:27:08", "remaining_time": "2:56:15", "throughput": 8727.32, "total_tokens": 45629600} +{"current_steps": 67715, "total_steps": 204665, "loss": 0.1117, "lr": 1.6925183710305704e-06, "epoch": 1.654288715706154, "percentage": 33.09, "elapsed_time": "1:27:08", "remaining_time": "2:56:14", "throughput": 8727.35, "total_tokens": 45632736} +{"current_steps": 67720, "total_steps": 204665, "loss": 0.0551, "lr": 1.6924568492318566e-06, "epoch": 1.6544108665380013, "percentage": 33.09, "elapsed_time": "1:27:09", "remaining_time": "2:56:14", "throughput": 8727.45, "total_tokens": 45636384} +{"current_steps": 67725, "total_steps": 204665, "loss": 0.0821, "lr": 1.6923953223974103e-06, "epoch": 1.6545330173698483, "percentage": 33.09, "elapsed_time": "1:27:09", "remaining_time": "2:56:13", "throughput": 8727.51, "total_tokens": 45639712} +{"current_steps": 67730, "total_steps": 204665, "loss": 0.0315, "lr": 1.692333790527679e-06, "epoch": 1.6546551682016954, "percentage": 33.09, "elapsed_time": "1:27:09", "remaining_time": "2:56:13", "throughput": 8727.52, "total_tokens": 45642720} +{"current_steps": 67735, "total_steps": 204665, "loss": 0.0022, "lr": 1.69227225362311e-06, "epoch": 1.6547773190335426, "percentage": 33.1, "elapsed_time": "1:27:10", "remaining_time": "2:56:12", "throughput": 8727.62, "total_tokens": 45646304} +{"current_steps": 67740, "total_steps": 204665, "loss": 0.1646, "lr": 1.692210711684151e-06, "epoch": 1.6548994698653898, "percentage": 33.1, "elapsed_time": "1:27:10", "remaining_time": "2:56:12", "throughput": 8727.71, "total_tokens": 45649888} +{"current_steps": 67745, "total_steps": 204665, "loss": 0.0147, "lr": 1.6921491647112497e-06, "epoch": 1.6550216206972368, "percentage": 33.1, "elapsed_time": "1:27:10", "remaining_time": "2:56:12", "throughput": 8727.76, "total_tokens": 45653216} +{"current_steps": 67750, "total_steps": 204665, "loss": 0.0367, "lr": 1.6920876127048534e-06, "epoch": 1.655143771529084, "percentage": 33.1, "elapsed_time": "1:27:11", "remaining_time": "2:56:11", "throughput": 8727.8, "total_tokens": 45656416} +{"current_steps": 67755, "total_steps": 204665, "loss": 0.0492, "lr": 1.6920260556654098e-06, "epoch": 1.6552659223609312, "percentage": 33.11, "elapsed_time": "1:27:11", "remaining_time": "2:56:11", "throughput": 8727.8, "total_tokens": 45659360} +{"current_steps": 67760, "total_steps": 204665, "loss": 0.0991, "lr": 1.6919644935933666e-06, "epoch": 1.6553880731927784, "percentage": 33.11, "elapsed_time": "1:27:11", "remaining_time": "2:56:10", "throughput": 8727.86, "total_tokens": 45662752} +{"current_steps": 67765, "total_steps": 204665, "loss": 0.062, "lr": 1.6919029264891713e-06, "epoch": 1.6555102240246256, "percentage": 33.11, "elapsed_time": "1:27:12", "remaining_time": "2:56:10", "throughput": 8727.93, "total_tokens": 45666144} +{"current_steps": 67770, "total_steps": 204665, "loss": 0.0816, "lr": 1.6918413543532722e-06, "epoch": 1.6556323748564727, "percentage": 33.11, "elapsed_time": "1:27:12", "remaining_time": "2:56:09", "throughput": 8727.94, "total_tokens": 45669088} +{"current_steps": 67775, "total_steps": 204665, "loss": 0.0582, "lr": 1.6917797771861165e-06, "epoch": 1.65575452568832, "percentage": 33.12, "elapsed_time": "1:27:12", "remaining_time": "2:56:09", "throughput": 8727.94, "total_tokens": 45672032} +{"current_steps": 67780, "total_steps": 204665, "loss": 0.0879, "lr": 1.691718194988152e-06, "epoch": 1.6558766765201671, "percentage": 33.12, "elapsed_time": "1:27:13", "remaining_time": "2:56:08", "throughput": 8727.96, "total_tokens": 45675104} +{"current_steps": 67785, "total_steps": 204665, "loss": 0.1022, "lr": 1.6916566077598272e-06, "epoch": 1.6559988273520143, "percentage": 33.12, "elapsed_time": "1:27:13", "remaining_time": "2:56:08", "throughput": 8727.95, "total_tokens": 45677984} +{"current_steps": 67790, "total_steps": 204665, "loss": 0.0397, "lr": 1.6915950155015892e-06, "epoch": 1.6561209781838615, "percentage": 33.12, "elapsed_time": "1:27:13", "remaining_time": "2:56:07", "throughput": 8728.01, "total_tokens": 45681376} +{"current_steps": 67795, "total_steps": 204665, "loss": 0.001, "lr": 1.6915334182138863e-06, "epoch": 1.6562431290157087, "percentage": 33.12, "elapsed_time": "1:27:14", "remaining_time": "2:56:07", "throughput": 8728.04, "total_tokens": 45684512} +{"current_steps": 67800, "total_steps": 204665, "loss": 0.1561, "lr": 1.6914718158971662e-06, "epoch": 1.6563652798475559, "percentage": 33.13, "elapsed_time": "1:27:14", "remaining_time": "2:56:06", "throughput": 8728.14, "total_tokens": 45688096} +{"current_steps": 67805, "total_steps": 204665, "loss": 0.0419, "lr": 1.6914102085518773e-06, "epoch": 1.656487430679403, "percentage": 33.13, "elapsed_time": "1:27:14", "remaining_time": "2:56:06", "throughput": 8728.14, "total_tokens": 45691040} +{"current_steps": 67810, "total_steps": 204665, "loss": 0.0523, "lr": 1.6913485961784672e-06, "epoch": 1.65660958151125, "percentage": 33.13, "elapsed_time": "1:27:15", "remaining_time": "2:56:05", "throughput": 8728.21, "total_tokens": 45694432} +{"current_steps": 67815, "total_steps": 204665, "loss": 0.0485, "lr": 1.6912869787773842e-06, "epoch": 1.6567317323430972, "percentage": 33.13, "elapsed_time": "1:27:15", "remaining_time": "2:56:05", "throughput": 8728.26, "total_tokens": 45697696} +{"current_steps": 67820, "total_steps": 204665, "loss": 0.044, "lr": 1.6912253563490765e-06, "epoch": 1.6568538831749444, "percentage": 33.14, "elapsed_time": "1:27:15", "remaining_time": "2:56:04", "throughput": 8728.32, "total_tokens": 45701024} +{"current_steps": 67825, "total_steps": 204665, "loss": 0.1131, "lr": 1.6911637288939922e-06, "epoch": 1.6569760340067916, "percentage": 33.14, "elapsed_time": "1:27:16", "remaining_time": "2:56:04", "throughput": 8728.39, "total_tokens": 45704480} +{"current_steps": 67830, "total_steps": 204665, "loss": 0.0481, "lr": 1.6911020964125791e-06, "epoch": 1.6570981848386388, "percentage": 33.14, "elapsed_time": "1:27:16", "remaining_time": "2:56:03", "throughput": 8728.41, "total_tokens": 45707552} +{"current_steps": 67835, "total_steps": 204665, "loss": 0.0008, "lr": 1.6910404589052857e-06, "epoch": 1.6572203356704858, "percentage": 33.14, "elapsed_time": "1:27:16", "remaining_time": "2:56:03", "throughput": 8728.5, "total_tokens": 45711072} +{"current_steps": 67840, "total_steps": 204665, "loss": 0.0647, "lr": 1.6909788163725605e-06, "epoch": 1.657342486502333, "percentage": 33.15, "elapsed_time": "1:27:17", "remaining_time": "2:56:03", "throughput": 8728.55, "total_tokens": 45714400} +{"current_steps": 67845, "total_steps": 204665, "loss": 0.1721, "lr": 1.6909171688148512e-06, "epoch": 1.6574646373341801, "percentage": 33.15, "elapsed_time": "1:27:17", "remaining_time": "2:56:02", "throughput": 8728.6, "total_tokens": 45717728} +{"current_steps": 67850, "total_steps": 204665, "loss": 0.0284, "lr": 1.6908555162326064e-06, "epoch": 1.6575867881660273, "percentage": 33.15, "elapsed_time": "1:27:18", "remaining_time": "2:56:02", "throughput": 8728.69, "total_tokens": 45721248} +{"current_steps": 67855, "total_steps": 204665, "loss": 0.0729, "lr": 1.6907938586262747e-06, "epoch": 1.6577089389978745, "percentage": 33.15, "elapsed_time": "1:27:18", "remaining_time": "2:56:01", "throughput": 8728.76, "total_tokens": 45724640} +{"current_steps": 67860, "total_steps": 204665, "loss": 0.0603, "lr": 1.690732195996304e-06, "epoch": 1.6578310898297217, "percentage": 33.16, "elapsed_time": "1:27:18", "remaining_time": "2:56:01", "throughput": 8728.8, "total_tokens": 45727840} +{"current_steps": 67865, "total_steps": 204665, "loss": 0.24, "lr": 1.6906705283431432e-06, "epoch": 1.657953240661569, "percentage": 33.16, "elapsed_time": "1:27:19", "remaining_time": "2:56:00", "throughput": 8728.86, "total_tokens": 45731168} +{"current_steps": 67870, "total_steps": 204665, "loss": 0.0306, "lr": 1.6906088556672405e-06, "epoch": 1.658075391493416, "percentage": 33.16, "elapsed_time": "1:27:19", "remaining_time": "2:56:00", "throughput": 8728.91, "total_tokens": 45734432} +{"current_steps": 67875, "total_steps": 204665, "loss": 0.0692, "lr": 1.6905471779690443e-06, "epoch": 1.6581975423252633, "percentage": 33.16, "elapsed_time": "1:27:19", "remaining_time": "2:55:59", "throughput": 8728.99, "total_tokens": 45737952} +{"current_steps": 67880, "total_steps": 204665, "loss": 0.1682, "lr": 1.6904854952490035e-06, "epoch": 1.6583196931571105, "percentage": 33.17, "elapsed_time": "1:27:20", "remaining_time": "2:55:59", "throughput": 8729.03, "total_tokens": 45741216} +{"current_steps": 67885, "total_steps": 204665, "loss": 0.075, "lr": 1.6904238075075665e-06, "epoch": 1.6584418439889577, "percentage": 33.17, "elapsed_time": "1:27:20", "remaining_time": "2:55:58", "throughput": 8729.14, "total_tokens": 45744864} +{"current_steps": 67890, "total_steps": 204665, "loss": 0.0613, "lr": 1.6903621147451816e-06, "epoch": 1.6585639948208049, "percentage": 33.17, "elapsed_time": "1:27:20", "remaining_time": "2:55:58", "throughput": 8729.16, "total_tokens": 45747936} +{"current_steps": 67895, "total_steps": 204665, "loss": 0.0063, "lr": 1.6903004169622976e-06, "epoch": 1.658686145652652, "percentage": 33.17, "elapsed_time": "1:27:21", "remaining_time": "2:55:57", "throughput": 8729.24, "total_tokens": 45751456} +{"current_steps": 67900, "total_steps": 204665, "loss": 0.1953, "lr": 1.6902387141593637e-06, "epoch": 1.658808296484499, "percentage": 33.18, "elapsed_time": "1:27:21", "remaining_time": "2:55:57", "throughput": 8729.35, "total_tokens": 45755104} +{"current_steps": 67905, "total_steps": 204665, "loss": 0.089, "lr": 1.6901770063368281e-06, "epoch": 1.6589304473163462, "percentage": 33.18, "elapsed_time": "1:27:21", "remaining_time": "2:55:57", "throughput": 8729.47, "total_tokens": 45758816} +{"current_steps": 67910, "total_steps": 204665, "loss": 0.1403, "lr": 1.6901152934951397e-06, "epoch": 1.6590525981481934, "percentage": 33.18, "elapsed_time": "1:27:22", "remaining_time": "2:55:56", "throughput": 8729.54, "total_tokens": 45762272} +{"current_steps": 67915, "total_steps": 204665, "loss": 0.1207, "lr": 1.6900535756347472e-06, "epoch": 1.6591747489800406, "percentage": 33.18, "elapsed_time": "1:27:22", "remaining_time": "2:55:56", "throughput": 8729.54, "total_tokens": 45765216} +{"current_steps": 67920, "total_steps": 204665, "loss": 0.1671, "lr": 1.6899918527560995e-06, "epoch": 1.6592968998118878, "percentage": 33.19, "elapsed_time": "1:27:22", "remaining_time": "2:55:55", "throughput": 8729.62, "total_tokens": 45768672} +{"current_steps": 67925, "total_steps": 204665, "loss": 0.0019, "lr": 1.6899301248596454e-06, "epoch": 1.6594190506437347, "percentage": 33.19, "elapsed_time": "1:27:23", "remaining_time": "2:55:55", "throughput": 8729.67, "total_tokens": 45772000} +{"current_steps": 67930, "total_steps": 204665, "loss": 0.1282, "lr": 1.6898683919458342e-06, "epoch": 1.659541201475582, "percentage": 33.19, "elapsed_time": "1:27:23", "remaining_time": "2:55:54", "throughput": 8729.78, "total_tokens": 45775648} +{"current_steps": 67935, "total_steps": 204665, "loss": 0.1585, "lr": 1.689806654015114e-06, "epoch": 1.6596633523074291, "percentage": 33.19, "elapsed_time": "1:27:23", "remaining_time": "2:55:54", "throughput": 8729.83, "total_tokens": 45778912} +{"current_steps": 67940, "total_steps": 204665, "loss": 0.0189, "lr": 1.6897449110679344e-06, "epoch": 1.6597855031392763, "percentage": 33.2, "elapsed_time": "1:27:24", "remaining_time": "2:55:53", "throughput": 8729.9, "total_tokens": 45782304} +{"current_steps": 67945, "total_steps": 204665, "loss": 0.0686, "lr": 1.6896831631047444e-06, "epoch": 1.6599076539711235, "percentage": 33.2, "elapsed_time": "1:27:24", "remaining_time": "2:55:53", "throughput": 8729.95, "total_tokens": 45785632} +{"current_steps": 67950, "total_steps": 204665, "loss": 0.2004, "lr": 1.6896214101259928e-06, "epoch": 1.6600298048029707, "percentage": 33.2, "elapsed_time": "1:27:24", "remaining_time": "2:55:52", "throughput": 8729.96, "total_tokens": 45788640} +{"current_steps": 67955, "total_steps": 204665, "loss": 0.0817, "lr": 1.6895596521321292e-06, "epoch": 1.6601519556348179, "percentage": 33.2, "elapsed_time": "1:27:25", "remaining_time": "2:55:52", "throughput": 8730.04, "total_tokens": 45792160} +{"current_steps": 67960, "total_steps": 204665, "loss": 0.2003, "lr": 1.689497889123602e-06, "epoch": 1.660274106466665, "percentage": 33.21, "elapsed_time": "1:27:25", "remaining_time": "2:55:52", "throughput": 8730.13, "total_tokens": 45795680} +{"current_steps": 67965, "total_steps": 204665, "loss": 0.0544, "lr": 1.6894361211008608e-06, "epoch": 1.6603962572985123, "percentage": 33.21, "elapsed_time": "1:27:26", "remaining_time": "2:55:51", "throughput": 8730.13, "total_tokens": 45798624} +{"current_steps": 67970, "total_steps": 204665, "loss": 0.0039, "lr": 1.6893743480643546e-06, "epoch": 1.6605184081303594, "percentage": 33.21, "elapsed_time": "1:27:26", "remaining_time": "2:55:51", "throughput": 8730.14, "total_tokens": 45801632} +{"current_steps": 67975, "total_steps": 204665, "loss": 0.0492, "lr": 1.689312570014533e-06, "epoch": 1.6606405589622066, "percentage": 33.21, "elapsed_time": "1:27:26", "remaining_time": "2:55:50", "throughput": 8730.24, "total_tokens": 45805280} +{"current_steps": 67980, "total_steps": 204665, "loss": 0.1252, "lr": 1.6892507869518447e-06, "epoch": 1.6607627097940538, "percentage": 33.22, "elapsed_time": "1:27:27", "remaining_time": "2:55:50", "throughput": 8730.28, "total_tokens": 45808480} +{"current_steps": 67985, "total_steps": 204665, "loss": 0.0444, "lr": 1.6891889988767392e-06, "epoch": 1.660884860625901, "percentage": 33.22, "elapsed_time": "1:27:27", "remaining_time": "2:55:49", "throughput": 8730.32, "total_tokens": 45811680} +{"current_steps": 67990, "total_steps": 204665, "loss": 0.0777, "lr": 1.6891272057896661e-06, "epoch": 1.661007011457748, "percentage": 33.22, "elapsed_time": "1:27:27", "remaining_time": "2:55:49", "throughput": 8730.37, "total_tokens": 45814944} +{"current_steps": 67995, "total_steps": 204665, "loss": 0.0692, "lr": 1.689065407691075e-06, "epoch": 1.6611291622895952, "percentage": 33.22, "elapsed_time": "1:27:28", "remaining_time": "2:55:48", "throughput": 8730.43, "total_tokens": 45818272} +{"current_steps": 68000, "total_steps": 204665, "loss": 0.2293, "lr": 1.6890036045814142e-06, "epoch": 1.6612513131214424, "percentage": 33.23, "elapsed_time": "1:27:28", "remaining_time": "2:55:48", "throughput": 8730.47, "total_tokens": 45821472} +{"current_steps": 68005, "total_steps": 204665, "loss": 0.0691, "lr": 1.6889417964611343e-06, "epoch": 1.6613734639532896, "percentage": 33.23, "elapsed_time": "1:27:28", "remaining_time": "2:55:47", "throughput": 8730.52, "total_tokens": 45824736} +{"current_steps": 68010, "total_steps": 204665, "loss": 0.0022, "lr": 1.6888799833306842e-06, "epoch": 1.6614956147851365, "percentage": 33.23, "elapsed_time": "1:27:29", "remaining_time": "2:55:47", "throughput": 8730.54, "total_tokens": 45827872} +{"current_steps": 68015, "total_steps": 204665, "loss": 0.0388, "lr": 1.6888181651905136e-06, "epoch": 1.6616177656169837, "percentage": 33.23, "elapsed_time": "1:27:29", "remaining_time": "2:55:46", "throughput": 8730.64, "total_tokens": 45831456} +{"current_steps": 68020, "total_steps": 204665, "loss": 0.0825, "lr": 1.688756342041072e-06, "epoch": 1.661739916448831, "percentage": 33.23, "elapsed_time": "1:27:29", "remaining_time": "2:55:46", "throughput": 8730.83, "total_tokens": 45835616} +{"current_steps": 68025, "total_steps": 204665, "loss": 0.0442, "lr": 1.688694513882809e-06, "epoch": 1.661862067280678, "percentage": 33.24, "elapsed_time": "1:27:30", "remaining_time": "2:55:45", "throughput": 8731.02, "total_tokens": 45839776} +{"current_steps": 68030, "total_steps": 204665, "loss": 0.0012, "lr": 1.6886326807161746e-06, "epoch": 1.6619842181125253, "percentage": 33.24, "elapsed_time": "1:27:30", "remaining_time": "2:55:45", "throughput": 8731.17, "total_tokens": 45843680} +{"current_steps": 68035, "total_steps": 204665, "loss": 0.1546, "lr": 1.6885708425416178e-06, "epoch": 1.6621063689443725, "percentage": 33.24, "elapsed_time": "1:27:30", "remaining_time": "2:55:45", "throughput": 8731.18, "total_tokens": 45846688} +{"current_steps": 68040, "total_steps": 204665, "loss": 0.0749, "lr": 1.688508999359589e-06, "epoch": 1.6622285197762197, "percentage": 33.24, "elapsed_time": "1:27:31", "remaining_time": "2:55:44", "throughput": 8731.27, "total_tokens": 45850272} +{"current_steps": 68045, "total_steps": 204665, "loss": 0.1484, "lr": 1.688447151170537e-06, "epoch": 1.6623506706080668, "percentage": 33.25, "elapsed_time": "1:27:31", "remaining_time": "2:55:44", "throughput": 8731.35, "total_tokens": 45853728} +{"current_steps": 68050, "total_steps": 204665, "loss": 0.0673, "lr": 1.6883852979749124e-06, "epoch": 1.662472821439914, "percentage": 33.25, "elapsed_time": "1:27:31", "remaining_time": "2:55:43", "throughput": 8731.37, "total_tokens": 45856864} +{"current_steps": 68055, "total_steps": 204665, "loss": 0.0156, "lr": 1.6883234397731647e-06, "epoch": 1.6625949722717612, "percentage": 33.25, "elapsed_time": "1:27:32", "remaining_time": "2:55:43", "throughput": 8731.43, "total_tokens": 45860192} +{"current_steps": 68060, "total_steps": 204665, "loss": 0.0551, "lr": 1.688261576565744e-06, "epoch": 1.6627171231036084, "percentage": 33.25, "elapsed_time": "1:27:32", "remaining_time": "2:55:42", "throughput": 8731.5, "total_tokens": 45863584} +{"current_steps": 68065, "total_steps": 204665, "loss": 0.0017, "lr": 1.6881997083530999e-06, "epoch": 1.6628392739354556, "percentage": 33.26, "elapsed_time": "1:27:33", "remaining_time": "2:55:42", "throughput": 8731.57, "total_tokens": 45866976} +{"current_steps": 68070, "total_steps": 204665, "loss": 0.0667, "lr": 1.6881378351356825e-06, "epoch": 1.6629614247673028, "percentage": 33.26, "elapsed_time": "1:27:33", "remaining_time": "2:55:41", "throughput": 8731.61, "total_tokens": 45870240} +{"current_steps": 68075, "total_steps": 204665, "loss": 0.0887, "lr": 1.6880759569139414e-06, "epoch": 1.66308357559915, "percentage": 33.26, "elapsed_time": "1:27:33", "remaining_time": "2:55:41", "throughput": 8731.69, "total_tokens": 45873696} +{"current_steps": 68080, "total_steps": 204665, "loss": 0.1637, "lr": 1.688014073688327e-06, "epoch": 1.663205726430997, "percentage": 33.26, "elapsed_time": "1:27:34", "remaining_time": "2:55:40", "throughput": 8731.83, "total_tokens": 45877536} +{"current_steps": 68085, "total_steps": 204665, "loss": 0.0667, "lr": 1.6879521854592893e-06, "epoch": 1.6633278772628441, "percentage": 33.27, "elapsed_time": "1:27:34", "remaining_time": "2:55:40", "throughput": 8731.92, "total_tokens": 45881120} +{"current_steps": 68090, "total_steps": 204665, "loss": 0.0385, "lr": 1.6878902922272781e-06, "epoch": 1.6634500280946913, "percentage": 33.27, "elapsed_time": "1:27:34", "remaining_time": "2:55:40", "throughput": 8732.0, "total_tokens": 45884576} +{"current_steps": 68095, "total_steps": 204665, "loss": 0.0879, "lr": 1.687828393992744e-06, "epoch": 1.6635721789265385, "percentage": 33.27, "elapsed_time": "1:27:35", "remaining_time": "2:55:39", "throughput": 8732.04, "total_tokens": 45887776} +{"current_steps": 68100, "total_steps": 204665, "loss": 0.18, "lr": 1.6877664907561367e-06, "epoch": 1.6636943297583855, "percentage": 33.27, "elapsed_time": "1:27:35", "remaining_time": "2:55:39", "throughput": 8732.1, "total_tokens": 45891104} +{"current_steps": 68105, "total_steps": 204665, "loss": 0.0007, "lr": 1.6877045825179063e-06, "epoch": 1.6638164805902327, "percentage": 33.28, "elapsed_time": "1:27:35", "remaining_time": "2:55:38", "throughput": 8732.22, "total_tokens": 45894880} +{"current_steps": 68110, "total_steps": 204665, "loss": 0.0989, "lr": 1.6876426692785032e-06, "epoch": 1.6639386314220799, "percentage": 33.28, "elapsed_time": "1:27:36", "remaining_time": "2:55:38", "throughput": 8732.33, "total_tokens": 45898592} +{"current_steps": 68115, "total_steps": 204665, "loss": 0.0914, "lr": 1.6875807510383777e-06, "epoch": 1.664060782253927, "percentage": 33.28, "elapsed_time": "1:27:36", "remaining_time": "2:55:37", "throughput": 8732.39, "total_tokens": 45901920} +{"current_steps": 68120, "total_steps": 204665, "loss": 0.0416, "lr": 1.6875188277979802e-06, "epoch": 1.6641829330857743, "percentage": 33.28, "elapsed_time": "1:27:36", "remaining_time": "2:55:37", "throughput": 8732.46, "total_tokens": 45905312} +{"current_steps": 68125, "total_steps": 204665, "loss": 0.1037, "lr": 1.6874568995577608e-06, "epoch": 1.6643050839176214, "percentage": 33.29, "elapsed_time": "1:27:37", "remaining_time": "2:55:36", "throughput": 8732.49, "total_tokens": 45908448} +{"current_steps": 68130, "total_steps": 204665, "loss": 0.1828, "lr": 1.6873949663181698e-06, "epoch": 1.6644272347494686, "percentage": 33.29, "elapsed_time": "1:27:37", "remaining_time": "2:55:36", "throughput": 8732.49, "total_tokens": 45911392} +{"current_steps": 68135, "total_steps": 204665, "loss": 0.1889, "lr": 1.6873330280796578e-06, "epoch": 1.6645493855813158, "percentage": 33.29, "elapsed_time": "1:27:37", "remaining_time": "2:55:35", "throughput": 8732.54, "total_tokens": 45914656} +{"current_steps": 68140, "total_steps": 204665, "loss": 0.1231, "lr": 1.6872710848426752e-06, "epoch": 1.664671536413163, "percentage": 33.29, "elapsed_time": "1:27:38", "remaining_time": "2:55:35", "throughput": 8732.63, "total_tokens": 45918240} +{"current_steps": 68145, "total_steps": 204665, "loss": 0.0487, "lr": 1.6872091366076725e-06, "epoch": 1.6647936872450102, "percentage": 33.3, "elapsed_time": "1:27:38", "remaining_time": "2:55:34", "throughput": 8732.75, "total_tokens": 45922016} +{"current_steps": 68150, "total_steps": 204665, "loss": 0.1104, "lr": 1.6871471833751e-06, "epoch": 1.6649158380768574, "percentage": 33.3, "elapsed_time": "1:27:38", "remaining_time": "2:55:34", "throughput": 8732.75, "total_tokens": 45924960} +{"current_steps": 68155, "total_steps": 204665, "loss": 0.1008, "lr": 1.6870852251454082e-06, "epoch": 1.6650379889087046, "percentage": 33.3, "elapsed_time": "1:27:39", "remaining_time": "2:55:34", "throughput": 8732.86, "total_tokens": 45928608} +{"current_steps": 68160, "total_steps": 204665, "loss": 0.0479, "lr": 1.687023261919048e-06, "epoch": 1.6651601397405518, "percentage": 33.3, "elapsed_time": "1:27:39", "remaining_time": "2:55:33", "throughput": 8732.99, "total_tokens": 45932448} +{"current_steps": 68165, "total_steps": 204665, "loss": 0.0546, "lr": 1.6869612936964699e-06, "epoch": 1.665282290572399, "percentage": 33.31, "elapsed_time": "1:27:39", "remaining_time": "2:55:33", "throughput": 8733.03, "total_tokens": 45935712} +{"current_steps": 68170, "total_steps": 204665, "loss": 0.0772, "lr": 1.6868993204781242e-06, "epoch": 1.665404441404246, "percentage": 33.31, "elapsed_time": "1:27:40", "remaining_time": "2:55:32", "throughput": 8733.07, "total_tokens": 45938912} +{"current_steps": 68175, "total_steps": 204665, "loss": 0.095, "lr": 1.6868373422644623e-06, "epoch": 1.6655265922360931, "percentage": 33.31, "elapsed_time": "1:27:40", "remaining_time": "2:55:32", "throughput": 8733.18, "total_tokens": 45942560} +{"current_steps": 68180, "total_steps": 204665, "loss": 0.092, "lr": 1.6867753590559346e-06, "epoch": 1.6656487430679403, "percentage": 33.31, "elapsed_time": "1:27:41", "remaining_time": "2:55:31", "throughput": 8733.2, "total_tokens": 45945632} +{"current_steps": 68185, "total_steps": 204665, "loss": 0.1687, "lr": 1.6867133708529915e-06, "epoch": 1.6657708938997875, "percentage": 33.32, "elapsed_time": "1:27:41", "remaining_time": "2:55:31", "throughput": 8733.28, "total_tokens": 45949152} +{"current_steps": 68190, "total_steps": 204665, "loss": 0.1097, "lr": 1.686651377656084e-06, "epoch": 1.6658930447316345, "percentage": 33.32, "elapsed_time": "1:27:41", "remaining_time": "2:55:30", "throughput": 8733.39, "total_tokens": 45952800} +{"current_steps": 68195, "total_steps": 204665, "loss": 0.1661, "lr": 1.6865893794656631e-06, "epoch": 1.6660151955634817, "percentage": 33.32, "elapsed_time": "1:27:42", "remaining_time": "2:55:30", "throughput": 8733.4, "total_tokens": 45955808} +{"current_steps": 68200, "total_steps": 204665, "loss": 0.0467, "lr": 1.6865273762821794e-06, "epoch": 1.6661373463953288, "percentage": 33.32, "elapsed_time": "1:27:42", "remaining_time": "2:55:29", "throughput": 8733.53, "total_tokens": 45959648} +{"current_steps": 68205, "total_steps": 204665, "loss": 0.0033, "lr": 1.6864653681060841e-06, "epoch": 1.666259497227176, "percentage": 33.33, "elapsed_time": "1:27:42", "remaining_time": "2:55:29", "throughput": 8733.6, "total_tokens": 45963040} +{"current_steps": 68210, "total_steps": 204665, "loss": 0.1121, "lr": 1.686403354937828e-06, "epoch": 1.6663816480590232, "percentage": 33.33, "elapsed_time": "1:27:43", "remaining_time": "2:55:28", "throughput": 8733.67, "total_tokens": 45966432} +{"current_steps": 68215, "total_steps": 204665, "loss": 0.0397, "lr": 1.6863413367778622e-06, "epoch": 1.6665037988908704, "percentage": 33.33, "elapsed_time": "1:27:43", "remaining_time": "2:55:28", "throughput": 8733.7, "total_tokens": 45969568} +{"current_steps": 68220, "total_steps": 204665, "loss": 0.1143, "lr": 1.6862793136266376e-06, "epoch": 1.6666259497227176, "percentage": 33.33, "elapsed_time": "1:27:43", "remaining_time": "2:55:28", "throughput": 8733.72, "total_tokens": 45972640} +{"current_steps": 68225, "total_steps": 204665, "loss": 0.0426, "lr": 1.686217285484605e-06, "epoch": 1.6667481005545648, "percentage": 33.33, "elapsed_time": "1:27:44", "remaining_time": "2:55:27", "throughput": 8733.84, "total_tokens": 45976352} +{"current_steps": 68230, "total_steps": 204665, "loss": 0.0423, "lr": 1.6861552523522157e-06, "epoch": 1.666870251386412, "percentage": 33.34, "elapsed_time": "1:27:44", "remaining_time": "2:55:27", "throughput": 8733.89, "total_tokens": 45979680} +{"current_steps": 68235, "total_steps": 204665, "loss": 0.0519, "lr": 1.6860932142299212e-06, "epoch": 1.6669924022182592, "percentage": 33.34, "elapsed_time": "1:27:44", "remaining_time": "2:55:26", "throughput": 8733.96, "total_tokens": 45983072} +{"current_steps": 68240, "total_steps": 204665, "loss": 0.1198, "lr": 1.6860311711181722e-06, "epoch": 1.6671145530501064, "percentage": 33.34, "elapsed_time": "1:27:45", "remaining_time": "2:55:26", "throughput": 8734.02, "total_tokens": 45986400} +{"current_steps": 68245, "total_steps": 204665, "loss": 0.0444, "lr": 1.6859691230174198e-06, "epoch": 1.6672367038819536, "percentage": 33.34, "elapsed_time": "1:27:45", "remaining_time": "2:55:25", "throughput": 8734.02, "total_tokens": 45989408} +{"current_steps": 68250, "total_steps": 204665, "loss": 0.0011, "lr": 1.6859070699281155e-06, "epoch": 1.6673588547138007, "percentage": 33.35, "elapsed_time": "1:27:45", "remaining_time": "2:55:25", "throughput": 8734.15, "total_tokens": 45993184} +{"current_steps": 68255, "total_steps": 204665, "loss": 0.0943, "lr": 1.6858450118507107e-06, "epoch": 1.667481005545648, "percentage": 33.35, "elapsed_time": "1:27:46", "remaining_time": "2:55:24", "throughput": 8734.25, "total_tokens": 45996832} +{"current_steps": 68260, "total_steps": 204665, "loss": 0.0301, "lr": 1.6857829487856563e-06, "epoch": 1.667603156377495, "percentage": 33.35, "elapsed_time": "1:27:46", "remaining_time": "2:55:24", "throughput": 8734.34, "total_tokens": 46000352} +{"current_steps": 68265, "total_steps": 204665, "loss": 0.0598, "lr": 1.6857208807334038e-06, "epoch": 1.667725307209342, "percentage": 33.35, "elapsed_time": "1:27:46", "remaining_time": "2:55:23", "throughput": 8734.36, "total_tokens": 46003424} +{"current_steps": 68270, "total_steps": 204665, "loss": 0.0956, "lr": 1.6856588076944048e-06, "epoch": 1.6678474580411893, "percentage": 33.36, "elapsed_time": "1:27:47", "remaining_time": "2:55:23", "throughput": 8734.42, "total_tokens": 46006816} +{"current_steps": 68275, "total_steps": 204665, "loss": 0.0745, "lr": 1.6855967296691104e-06, "epoch": 1.6679696088730365, "percentage": 33.36, "elapsed_time": "1:27:47", "remaining_time": "2:55:22", "throughput": 8734.44, "total_tokens": 46009888} +{"current_steps": 68280, "total_steps": 204665, "loss": 0.0364, "lr": 1.6855346466579725e-06, "epoch": 1.6680917597048834, "percentage": 33.36, "elapsed_time": "1:27:47", "remaining_time": "2:55:22", "throughput": 8734.59, "total_tokens": 46013792} +{"current_steps": 68285, "total_steps": 204665, "loss": 0.0034, "lr": 1.6854725586614419e-06, "epoch": 1.6682139105367306, "percentage": 33.36, "elapsed_time": "1:27:48", "remaining_time": "2:55:22", "throughput": 8734.64, "total_tokens": 46017120} +{"current_steps": 68290, "total_steps": 204665, "loss": 0.0019, "lr": 1.6854104656799707e-06, "epoch": 1.6683360613685778, "percentage": 33.37, "elapsed_time": "1:27:48", "remaining_time": "2:55:21", "throughput": 8734.67, "total_tokens": 46020256} +{"current_steps": 68295, "total_steps": 204665, "loss": 0.0043, "lr": 1.6853483677140098e-06, "epoch": 1.668458212200425, "percentage": 33.37, "elapsed_time": "1:27:49", "remaining_time": "2:55:21", "throughput": 8734.78, "total_tokens": 46023904} +{"current_steps": 68300, "total_steps": 204665, "loss": 0.1332, "lr": 1.6852862647640116e-06, "epoch": 1.6685803630322722, "percentage": 33.37, "elapsed_time": "1:27:49", "remaining_time": "2:55:20", "throughput": 8734.83, "total_tokens": 46027232} +{"current_steps": 68305, "total_steps": 204665, "loss": 0.046, "lr": 1.6852241568304274e-06, "epoch": 1.6687025138641194, "percentage": 33.37, "elapsed_time": "1:27:49", "remaining_time": "2:55:20", "throughput": 8734.89, "total_tokens": 46030560} +{"current_steps": 68310, "total_steps": 204665, "loss": 0.0841, "lr": 1.6851620439137087e-06, "epoch": 1.6688246646959666, "percentage": 33.38, "elapsed_time": "1:27:50", "remaining_time": "2:55:19", "throughput": 8735.0, "total_tokens": 46034208} +{"current_steps": 68315, "total_steps": 204665, "loss": 0.0816, "lr": 1.6850999260143076e-06, "epoch": 1.6689468155278138, "percentage": 33.38, "elapsed_time": "1:27:50", "remaining_time": "2:55:19", "throughput": 8735.06, "total_tokens": 46037536} +{"current_steps": 68320, "total_steps": 204665, "loss": 0.0013, "lr": 1.6850378031326752e-06, "epoch": 1.669068966359661, "percentage": 33.38, "elapsed_time": "1:27:50", "remaining_time": "2:55:18", "throughput": 8735.13, "total_tokens": 46040992} +{"current_steps": 68325, "total_steps": 204665, "loss": 0.0786, "lr": 1.6849756752692636e-06, "epoch": 1.6691911171915081, "percentage": 33.38, "elapsed_time": "1:27:51", "remaining_time": "2:55:18", "throughput": 8735.19, "total_tokens": 46044384} +{"current_steps": 68330, "total_steps": 204665, "loss": 0.067, "lr": 1.684913542424525e-06, "epoch": 1.6693132680233553, "percentage": 33.39, "elapsed_time": "1:27:51", "remaining_time": "2:55:17", "throughput": 8735.28, "total_tokens": 46047904} +{"current_steps": 68335, "total_steps": 204665, "loss": 0.0374, "lr": 1.6848514045989108e-06, "epoch": 1.6694354188552025, "percentage": 33.39, "elapsed_time": "1:27:51", "remaining_time": "2:55:17", "throughput": 8735.31, "total_tokens": 46051040} +{"current_steps": 68340, "total_steps": 204665, "loss": 0.1782, "lr": 1.6847892617928729e-06, "epoch": 1.6695575696870497, "percentage": 33.39, "elapsed_time": "1:27:52", "remaining_time": "2:55:16", "throughput": 8735.37, "total_tokens": 46054432} +{"current_steps": 68345, "total_steps": 204665, "loss": 0.175, "lr": 1.6847271140068633e-06, "epoch": 1.6696797205188967, "percentage": 33.39, "elapsed_time": "1:27:52", "remaining_time": "2:55:16", "throughput": 8735.48, "total_tokens": 46058080} +{"current_steps": 68350, "total_steps": 204665, "loss": 0.0545, "lr": 1.684664961241334e-06, "epoch": 1.6698018713507439, "percentage": 33.4, "elapsed_time": "1:27:52", "remaining_time": "2:55:16", "throughput": 8735.52, "total_tokens": 46061280} +{"current_steps": 68355, "total_steps": 204665, "loss": 0.2261, "lr": 1.684602803496737e-06, "epoch": 1.669924022182591, "percentage": 33.4, "elapsed_time": "1:27:53", "remaining_time": "2:55:15", "throughput": 8735.62, "total_tokens": 46064864} +{"current_steps": 68360, "total_steps": 204665, "loss": 0.0986, "lr": 1.684540640773524e-06, "epoch": 1.6700461730144383, "percentage": 33.4, "elapsed_time": "1:27:53", "remaining_time": "2:55:15", "throughput": 8735.64, "total_tokens": 46068000} +{"current_steps": 68365, "total_steps": 204665, "loss": 0.0021, "lr": 1.6844784730721476e-06, "epoch": 1.6701683238462854, "percentage": 33.4, "elapsed_time": "1:27:54", "remaining_time": "2:55:15", "throughput": 8735.34, "total_tokens": 46071712} +{"current_steps": 68370, "total_steps": 204665, "loss": 0.0439, "lr": 1.6844163003930599e-06, "epoch": 1.6702904746781324, "percentage": 33.41, "elapsed_time": "1:27:54", "remaining_time": "2:55:14", "throughput": 8735.44, "total_tokens": 46075296} +{"current_steps": 68375, "total_steps": 204665, "loss": 0.0014, "lr": 1.6843541227367121e-06, "epoch": 1.6704126255099796, "percentage": 33.41, "elapsed_time": "1:27:54", "remaining_time": "2:55:14", "throughput": 8735.49, "total_tokens": 46078624} +{"current_steps": 68380, "total_steps": 204665, "loss": 0.1804, "lr": 1.6842919401035575e-06, "epoch": 1.6705347763418268, "percentage": 33.41, "elapsed_time": "1:27:55", "remaining_time": "2:55:13", "throughput": 8735.51, "total_tokens": 46081696} +{"current_steps": 68385, "total_steps": 204665, "loss": 0.112, "lr": 1.6842297524940477e-06, "epoch": 1.670656927173674, "percentage": 33.41, "elapsed_time": "1:27:55", "remaining_time": "2:55:13", "throughput": 8735.6, "total_tokens": 46085216} +{"current_steps": 68390, "total_steps": 204665, "loss": 0.0388, "lr": 1.6841675599086354e-06, "epoch": 1.6707790780055212, "percentage": 33.42, "elapsed_time": "1:27:55", "remaining_time": "2:55:12", "throughput": 8735.67, "total_tokens": 46088608} +{"current_steps": 68395, "total_steps": 204665, "loss": 0.1337, "lr": 1.6841053623477723e-06, "epoch": 1.6709012288373684, "percentage": 33.42, "elapsed_time": "1:27:56", "remaining_time": "2:55:12", "throughput": 8735.67, "total_tokens": 46091616} +{"current_steps": 68400, "total_steps": 204665, "loss": 0.0462, "lr": 1.6840431598119112e-06, "epoch": 1.6710233796692155, "percentage": 33.42, "elapsed_time": "1:27:56", "remaining_time": "2:55:12", "throughput": 8736.01, "total_tokens": 46096864} +{"current_steps": 68405, "total_steps": 204665, "loss": 0.0923, "lr": 1.683980952301504e-06, "epoch": 1.6711455305010627, "percentage": 33.42, "elapsed_time": "1:27:57", "remaining_time": "2:55:11", "throughput": 8736.1, "total_tokens": 46100448} +{"current_steps": 68410, "total_steps": 204665, "loss": 0.0824, "lr": 1.6839187398170033e-06, "epoch": 1.67126768133291, "percentage": 33.43, "elapsed_time": "1:27:57", "remaining_time": "2:55:11", "throughput": 8736.13, "total_tokens": 46103584} +{"current_steps": 68415, "total_steps": 204665, "loss": 0.0901, "lr": 1.683856522358862e-06, "epoch": 1.6713898321647571, "percentage": 33.43, "elapsed_time": "1:27:57", "remaining_time": "2:55:10", "throughput": 8736.14, "total_tokens": 46106592} +{"current_steps": 68420, "total_steps": 204665, "loss": 0.0017, "lr": 1.6837942999275318e-06, "epoch": 1.6715119829966043, "percentage": 33.43, "elapsed_time": "1:27:58", "remaining_time": "2:55:10", "throughput": 8736.19, "total_tokens": 46109920} +{"current_steps": 68425, "total_steps": 204665, "loss": 0.0331, "lr": 1.6837320725234657e-06, "epoch": 1.6716341338284515, "percentage": 33.43, "elapsed_time": "1:27:58", "remaining_time": "2:55:09", "throughput": 8736.24, "total_tokens": 46113248} +{"current_steps": 68430, "total_steps": 204665, "loss": 0.0887, "lr": 1.6836698401471158e-06, "epoch": 1.6717562846602987, "percentage": 33.44, "elapsed_time": "1:27:58", "remaining_time": "2:55:09", "throughput": 8736.35, "total_tokens": 46116896} +{"current_steps": 68435, "total_steps": 204665, "loss": 0.0547, "lr": 1.6836076027989351e-06, "epoch": 1.6718784354921457, "percentage": 33.44, "elapsed_time": "1:27:59", "remaining_time": "2:55:08", "throughput": 8736.4, "total_tokens": 46120224} +{"current_steps": 68440, "total_steps": 204665, "loss": 0.0575, "lr": 1.683545360479376e-06, "epoch": 1.6720005863239928, "percentage": 33.44, "elapsed_time": "1:27:59", "remaining_time": "2:55:08", "throughput": 8736.47, "total_tokens": 46123616} +{"current_steps": 68445, "total_steps": 204665, "loss": 0.0708, "lr": 1.6834831131888914e-06, "epoch": 1.67212273715584, "percentage": 33.44, "elapsed_time": "1:27:59", "remaining_time": "2:55:07", "throughput": 8736.46, "total_tokens": 46126560} +{"current_steps": 68450, "total_steps": 204665, "loss": 0.026, "lr": 1.6834208609279336e-06, "epoch": 1.6722448879876872, "percentage": 33.44, "elapsed_time": "1:28:00", "remaining_time": "2:55:07", "throughput": 8736.51, "total_tokens": 46129888} +{"current_steps": 68455, "total_steps": 204665, "loss": 0.1095, "lr": 1.6833586036969556e-06, "epoch": 1.6723670388195344, "percentage": 33.45, "elapsed_time": "1:28:00", "remaining_time": "2:55:06", "throughput": 8736.64, "total_tokens": 46133664} +{"current_steps": 68460, "total_steps": 204665, "loss": 0.0005, "lr": 1.6832963414964098e-06, "epoch": 1.6724891896513814, "percentage": 33.45, "elapsed_time": "1:28:00", "remaining_time": "2:55:06", "throughput": 8736.64, "total_tokens": 46136608} +{"current_steps": 68465, "total_steps": 204665, "loss": 0.0005, "lr": 1.6832340743267493e-06, "epoch": 1.6726113404832286, "percentage": 33.45, "elapsed_time": "1:28:01", "remaining_time": "2:55:06", "throughput": 8736.68, "total_tokens": 46139872} +{"current_steps": 68470, "total_steps": 204665, "loss": 0.1449, "lr": 1.683171802188427e-06, "epoch": 1.6727334913150758, "percentage": 33.45, "elapsed_time": "1:28:01", "remaining_time": "2:55:05", "throughput": 8736.73, "total_tokens": 46143200} +{"current_steps": 68475, "total_steps": 204665, "loss": 0.0376, "lr": 1.6831095250818956e-06, "epoch": 1.672855642146923, "percentage": 33.46, "elapsed_time": "1:28:01", "remaining_time": "2:55:05", "throughput": 8736.82, "total_tokens": 46146720} +{"current_steps": 68480, "total_steps": 204665, "loss": 0.1879, "lr": 1.6830472430076076e-06, "epoch": 1.6729777929787701, "percentage": 33.46, "elapsed_time": "1:28:02", "remaining_time": "2:55:04", "throughput": 8736.86, "total_tokens": 46149984} +{"current_steps": 68485, "total_steps": 204665, "loss": 0.0003, "lr": 1.6829849559660167e-06, "epoch": 1.6730999438106173, "percentage": 33.46, "elapsed_time": "1:28:02", "remaining_time": "2:55:04", "throughput": 8736.97, "total_tokens": 46153632} +{"current_steps": 68490, "total_steps": 204665, "loss": 0.0528, "lr": 1.6829226639575756e-06, "epoch": 1.6732220946424645, "percentage": 33.46, "elapsed_time": "1:28:02", "remaining_time": "2:55:03", "throughput": 8737.0, "total_tokens": 46156768} +{"current_steps": 68495, "total_steps": 204665, "loss": 0.0007, "lr": 1.6828603669827368e-06, "epoch": 1.6733442454743117, "percentage": 33.47, "elapsed_time": "1:28:03", "remaining_time": "2:55:03", "throughput": 8737.04, "total_tokens": 46159968} +{"current_steps": 68500, "total_steps": 204665, "loss": 0.114, "lr": 1.682798065041954e-06, "epoch": 1.673466396306159, "percentage": 33.47, "elapsed_time": "1:28:03", "remaining_time": "2:55:02", "throughput": 8737.25, "total_tokens": 46164320} +{"current_steps": 68505, "total_steps": 204665, "loss": 0.1651, "lr": 1.68273575813568e-06, "epoch": 1.673588547138006, "percentage": 33.47, "elapsed_time": "1:28:03", "remaining_time": "2:55:02", "throughput": 8737.35, "total_tokens": 46167968} +{"current_steps": 68510, "total_steps": 204665, "loss": 0.1471, "lr": 1.682673446264368e-06, "epoch": 1.6737106979698533, "percentage": 33.47, "elapsed_time": "1:28:04", "remaining_time": "2:55:01", "throughput": 8737.36, "total_tokens": 46170976} +{"current_steps": 68515, "total_steps": 204665, "loss": 0.0332, "lr": 1.682611129428471e-06, "epoch": 1.6738328488017005, "percentage": 33.48, "elapsed_time": "1:28:04", "remaining_time": "2:55:01", "throughput": 8737.43, "total_tokens": 46174432} +{"current_steps": 68520, "total_steps": 204665, "loss": 0.0901, "lr": 1.6825488076284424e-06, "epoch": 1.6739549996335477, "percentage": 33.48, "elapsed_time": "1:28:05", "remaining_time": "2:55:01", "throughput": 8737.51, "total_tokens": 46177888} +{"current_steps": 68525, "total_steps": 204665, "loss": 0.0046, "lr": 1.682486480864735e-06, "epoch": 1.6740771504653946, "percentage": 33.48, "elapsed_time": "1:28:05", "remaining_time": "2:55:00", "throughput": 8737.59, "total_tokens": 46181408} +{"current_steps": 68530, "total_steps": 204665, "loss": 0.1017, "lr": 1.6824241491378025e-06, "epoch": 1.6741993012972418, "percentage": 33.48, "elapsed_time": "1:28:05", "remaining_time": "2:55:00", "throughput": 8737.61, "total_tokens": 46184480} +{"current_steps": 68535, "total_steps": 204665, "loss": 0.0012, "lr": 1.6823618124480984e-06, "epoch": 1.674321452129089, "percentage": 33.49, "elapsed_time": "1:28:06", "remaining_time": "2:54:59", "throughput": 8737.64, "total_tokens": 46187616} +{"current_steps": 68540, "total_steps": 204665, "loss": 0.0433, "lr": 1.682299470796075e-06, "epoch": 1.6744436029609362, "percentage": 33.49, "elapsed_time": "1:28:06", "remaining_time": "2:54:59", "throughput": 8737.7, "total_tokens": 46191008} +{"current_steps": 68545, "total_steps": 204665, "loss": 0.0571, "lr": 1.6822371241821864e-06, "epoch": 1.6745657537927832, "percentage": 33.49, "elapsed_time": "1:28:06", "remaining_time": "2:54:58", "throughput": 8737.8, "total_tokens": 46194592} +{"current_steps": 68550, "total_steps": 204665, "loss": 0.1668, "lr": 1.6821747726068865e-06, "epoch": 1.6746879046246304, "percentage": 33.49, "elapsed_time": "1:28:07", "remaining_time": "2:54:58", "throughput": 8737.85, "total_tokens": 46197856} +{"current_steps": 68555, "total_steps": 204665, "loss": 0.0703, "lr": 1.6821124160706276e-06, "epoch": 1.6748100554564775, "percentage": 33.5, "elapsed_time": "1:28:07", "remaining_time": "2:54:57", "throughput": 8738.04, "total_tokens": 46202080} +{"current_steps": 68560, "total_steps": 204665, "loss": 0.0012, "lr": 1.6820500545738642e-06, "epoch": 1.6749322062883247, "percentage": 33.5, "elapsed_time": "1:28:07", "remaining_time": "2:54:57", "throughput": 8738.12, "total_tokens": 46205600} +{"current_steps": 68565, "total_steps": 204665, "loss": 0.0521, "lr": 1.6819876881170491e-06, "epoch": 1.675054357120172, "percentage": 33.5, "elapsed_time": "1:28:08", "remaining_time": "2:54:56", "throughput": 8738.23, "total_tokens": 46209248} +{"current_steps": 68570, "total_steps": 204665, "loss": 0.0331, "lr": 1.6819253167006359e-06, "epoch": 1.6751765079520191, "percentage": 33.5, "elapsed_time": "1:28:08", "remaining_time": "2:54:56", "throughput": 8738.26, "total_tokens": 46212448} +{"current_steps": 68575, "total_steps": 204665, "loss": 0.0933, "lr": 1.6818629403250787e-06, "epoch": 1.6752986587838663, "percentage": 33.51, "elapsed_time": "1:28:08", "remaining_time": "2:54:55", "throughput": 8738.26, "total_tokens": 46215392} +{"current_steps": 68580, "total_steps": 204665, "loss": 0.0253, "lr": 1.6818005589908308e-06, "epoch": 1.6754208096157135, "percentage": 33.51, "elapsed_time": "1:28:09", "remaining_time": "2:54:55", "throughput": 8738.28, "total_tokens": 46218464} +{"current_steps": 68585, "total_steps": 204665, "loss": 0.1125, "lr": 1.681738172698346e-06, "epoch": 1.6755429604475607, "percentage": 33.51, "elapsed_time": "1:28:09", "remaining_time": "2:54:55", "throughput": 8738.36, "total_tokens": 46221984} +{"current_steps": 68590, "total_steps": 204665, "loss": 0.0936, "lr": 1.6816757814480775e-06, "epoch": 1.6756651112794079, "percentage": 33.51, "elapsed_time": "1:28:09", "remaining_time": "2:54:54", "throughput": 8738.36, "total_tokens": 46224928} +{"current_steps": 68595, "total_steps": 204665, "loss": 0.0543, "lr": 1.6816133852404795e-06, "epoch": 1.675787262111255, "percentage": 33.52, "elapsed_time": "1:28:10", "remaining_time": "2:54:54", "throughput": 8738.41, "total_tokens": 46228192} +{"current_steps": 68600, "total_steps": 204665, "loss": 0.0585, "lr": 1.6815509840760055e-06, "epoch": 1.6759094129431023, "percentage": 33.52, "elapsed_time": "1:28:10", "remaining_time": "2:54:53", "throughput": 8738.49, "total_tokens": 46231712} +{"current_steps": 68605, "total_steps": 204665, "loss": 0.1653, "lr": 1.6814885779551096e-06, "epoch": 1.6760315637749494, "percentage": 33.52, "elapsed_time": "1:28:10", "remaining_time": "2:54:53", "throughput": 8738.52, "total_tokens": 46234848} +{"current_steps": 68610, "total_steps": 204665, "loss": 0.111, "lr": 1.6814261668782454e-06, "epoch": 1.6761537146067966, "percentage": 33.52, "elapsed_time": "1:28:11", "remaining_time": "2:54:52", "throughput": 8738.67, "total_tokens": 46238752} +{"current_steps": 68615, "total_steps": 204665, "loss": 0.115, "lr": 1.681363750845867e-06, "epoch": 1.6762758654386436, "percentage": 33.53, "elapsed_time": "1:28:11", "remaining_time": "2:54:52", "throughput": 8738.71, "total_tokens": 46241952} +{"current_steps": 68620, "total_steps": 204665, "loss": 0.0271, "lr": 1.681301329858428e-06, "epoch": 1.6763980162704908, "percentage": 33.53, "elapsed_time": "1:28:11", "remaining_time": "2:54:51", "throughput": 8738.78, "total_tokens": 46245408} +{"current_steps": 68625, "total_steps": 204665, "loss": 0.1538, "lr": 1.6812389039163824e-06, "epoch": 1.676520167102338, "percentage": 33.53, "elapsed_time": "1:28:12", "remaining_time": "2:54:51", "throughput": 8738.9, "total_tokens": 46249184} +{"current_steps": 68630, "total_steps": 204665, "loss": 0.121, "lr": 1.6811764730201844e-06, "epoch": 1.6766423179341852, "percentage": 33.53, "elapsed_time": "1:28:12", "remaining_time": "2:54:50", "throughput": 8738.95, "total_tokens": 46252512} +{"current_steps": 68635, "total_steps": 204665, "loss": 0.2316, "lr": 1.6811140371702876e-06, "epoch": 1.6767644687660321, "percentage": 33.54, "elapsed_time": "1:28:13", "remaining_time": "2:54:50", "throughput": 8738.98, "total_tokens": 46255648} +{"current_steps": 68640, "total_steps": 204665, "loss": 0.1531, "lr": 1.6810515963671465e-06, "epoch": 1.6768866195978793, "percentage": 33.54, "elapsed_time": "1:28:13", "remaining_time": "2:54:49", "throughput": 8739.05, "total_tokens": 46259104} +{"current_steps": 68645, "total_steps": 204665, "loss": 0.1858, "lr": 1.680989150611215e-06, "epoch": 1.6770087704297265, "percentage": 33.54, "elapsed_time": "1:28:13", "remaining_time": "2:54:49", "throughput": 8739.09, "total_tokens": 46262368} +{"current_steps": 68650, "total_steps": 204665, "loss": 0.0424, "lr": 1.6809266999029475e-06, "epoch": 1.6771309212615737, "percentage": 33.54, "elapsed_time": "1:28:14", "remaining_time": "2:54:49", "throughput": 8739.07, "total_tokens": 46265184} +{"current_steps": 68655, "total_steps": 204665, "loss": 0.072, "lr": 1.6808642442427975e-06, "epoch": 1.677253072093421, "percentage": 33.55, "elapsed_time": "1:28:14", "remaining_time": "2:54:48", "throughput": 8739.14, "total_tokens": 46268576} +{"current_steps": 68660, "total_steps": 204665, "loss": 0.0042, "lr": 1.6808017836312198e-06, "epoch": 1.677375222925268, "percentage": 33.55, "elapsed_time": "1:28:14", "remaining_time": "2:54:48", "throughput": 8739.18, "total_tokens": 46271840} +{"current_steps": 68665, "total_steps": 204665, "loss": 0.1212, "lr": 1.6807393180686683e-06, "epoch": 1.6774973737571153, "percentage": 33.55, "elapsed_time": "1:28:15", "remaining_time": "2:54:47", "throughput": 8739.21, "total_tokens": 46274976} +{"current_steps": 68670, "total_steps": 204665, "loss": 0.1753, "lr": 1.6806768475555973e-06, "epoch": 1.6776195245889625, "percentage": 33.55, "elapsed_time": "1:28:15", "remaining_time": "2:54:47", "throughput": 8739.19, "total_tokens": 46277856} +{"current_steps": 68675, "total_steps": 204665, "loss": 0.2848, "lr": 1.6806143720924616e-06, "epoch": 1.6777416754208097, "percentage": 33.55, "elapsed_time": "1:28:15", "remaining_time": "2:54:46", "throughput": 8739.33, "total_tokens": 46281696} +{"current_steps": 68680, "total_steps": 204665, "loss": 0.1093, "lr": 1.6805518916797149e-06, "epoch": 1.6778638262526568, "percentage": 33.56, "elapsed_time": "1:28:16", "remaining_time": "2:54:46", "throughput": 8739.38, "total_tokens": 46284960} +{"current_steps": 68685, "total_steps": 204665, "loss": 0.0304, "lr": 1.6804894063178114e-06, "epoch": 1.677985977084504, "percentage": 33.56, "elapsed_time": "1:28:16", "remaining_time": "2:54:45", "throughput": 8739.43, "total_tokens": 46288288} +{"current_steps": 68690, "total_steps": 204665, "loss": 0.048, "lr": 1.6804269160072064e-06, "epoch": 1.6781081279163512, "percentage": 33.56, "elapsed_time": "1:28:16", "remaining_time": "2:54:45", "throughput": 8739.48, "total_tokens": 46291616} +{"current_steps": 68695, "total_steps": 204665, "loss": 0.143, "lr": 1.6803644207483535e-06, "epoch": 1.6782302787481984, "percentage": 33.56, "elapsed_time": "1:28:17", "remaining_time": "2:54:44", "throughput": 8739.49, "total_tokens": 46294624} +{"current_steps": 68700, "total_steps": 204665, "loss": 0.0056, "lr": 1.6803019205417076e-06, "epoch": 1.6783524295800456, "percentage": 33.57, "elapsed_time": "1:28:17", "remaining_time": "2:54:44", "throughput": 8739.56, "total_tokens": 46298080} +{"current_steps": 68705, "total_steps": 204665, "loss": 0.1018, "lr": 1.6802394153877236e-06, "epoch": 1.6784745804118926, "percentage": 33.57, "elapsed_time": "1:28:17", "remaining_time": "2:54:43", "throughput": 8739.63, "total_tokens": 46301472} +{"current_steps": 68710, "total_steps": 204665, "loss": 0.1373, "lr": 1.6801769052868553e-06, "epoch": 1.6785967312437398, "percentage": 33.57, "elapsed_time": "1:28:18", "remaining_time": "2:54:43", "throughput": 8739.64, "total_tokens": 46304544} +{"current_steps": 68715, "total_steps": 204665, "loss": 0.0873, "lr": 1.6801143902395576e-06, "epoch": 1.678718882075587, "percentage": 33.57, "elapsed_time": "1:28:18", "remaining_time": "2:54:43", "throughput": 8739.66, "total_tokens": 46307616} +{"current_steps": 68720, "total_steps": 204665, "loss": 0.038, "lr": 1.6800518702462851e-06, "epoch": 1.6788410329074341, "percentage": 33.58, "elapsed_time": "1:28:18", "remaining_time": "2:54:42", "throughput": 8739.76, "total_tokens": 46311264} +{"current_steps": 68725, "total_steps": 204665, "loss": 0.0029, "lr": 1.6799893453074924e-06, "epoch": 1.678963183739281, "percentage": 33.58, "elapsed_time": "1:28:19", "remaining_time": "2:54:42", "throughput": 8739.83, "total_tokens": 46314656} +{"current_steps": 68730, "total_steps": 204665, "loss": 0.0564, "lr": 1.6799268154236346e-06, "epoch": 1.6790853345711283, "percentage": 33.58, "elapsed_time": "1:28:19", "remaining_time": "2:54:41", "throughput": 8739.87, "total_tokens": 46317856} +{"current_steps": 68735, "total_steps": 204665, "loss": 0.0444, "lr": 1.679864280595166e-06, "epoch": 1.6792074854029755, "percentage": 33.58, "elapsed_time": "1:28:19", "remaining_time": "2:54:41", "throughput": 8739.94, "total_tokens": 46321248} +{"current_steps": 68740, "total_steps": 204665, "loss": 0.1281, "lr": 1.6798017408225414e-06, "epoch": 1.6793296362348227, "percentage": 33.59, "elapsed_time": "1:28:20", "remaining_time": "2:54:40", "throughput": 8739.95, "total_tokens": 46324256} +{"current_steps": 68745, "total_steps": 204665, "loss": 0.0977, "lr": 1.6797391961062157e-06, "epoch": 1.6794517870666699, "percentage": 33.59, "elapsed_time": "1:28:20", "remaining_time": "2:54:40", "throughput": 8740.03, "total_tokens": 46327712} +{"current_steps": 68750, "total_steps": 204665, "loss": 0.1268, "lr": 1.6796766464466436e-06, "epoch": 1.679573937898517, "percentage": 33.59, "elapsed_time": "1:28:20", "remaining_time": "2:54:39", "throughput": 8740.08, "total_tokens": 46330976} +{"current_steps": 68755, "total_steps": 204665, "loss": 0.0773, "lr": 1.6796140918442803e-06, "epoch": 1.6796960887303642, "percentage": 33.59, "elapsed_time": "1:28:21", "remaining_time": "2:54:39", "throughput": 8740.18, "total_tokens": 46334624} +{"current_steps": 68760, "total_steps": 204665, "loss": 0.0809, "lr": 1.6795515322995804e-06, "epoch": 1.6798182395622114, "percentage": 33.6, "elapsed_time": "1:28:21", "remaining_time": "2:54:38", "throughput": 8740.26, "total_tokens": 46338144} +{"current_steps": 68765, "total_steps": 204665, "loss": 0.1333, "lr": 1.679488967812999e-06, "epoch": 1.6799403903940586, "percentage": 33.6, "elapsed_time": "1:28:22", "remaining_time": "2:54:38", "throughput": 8740.28, "total_tokens": 46341216} +{"current_steps": 68770, "total_steps": 204665, "loss": 0.1215, "lr": 1.6794263983849913e-06, "epoch": 1.6800625412259058, "percentage": 33.6, "elapsed_time": "1:28:22", "remaining_time": "2:54:37", "throughput": 8740.37, "total_tokens": 46344800} +{"current_steps": 68775, "total_steps": 204665, "loss": 0.0794, "lr": 1.6793638240160117e-06, "epoch": 1.680184692057753, "percentage": 33.6, "elapsed_time": "1:28:22", "remaining_time": "2:54:37", "throughput": 8740.39, "total_tokens": 46347872} +{"current_steps": 68780, "total_steps": 204665, "loss": 0.1, "lr": 1.679301244706516e-06, "epoch": 1.6803068428896002, "percentage": 33.61, "elapsed_time": "1:28:23", "remaining_time": "2:54:36", "throughput": 8740.46, "total_tokens": 46351264} +{"current_steps": 68785, "total_steps": 204665, "loss": 0.1632, "lr": 1.6792386604569588e-06, "epoch": 1.6804289937214474, "percentage": 33.61, "elapsed_time": "1:28:23", "remaining_time": "2:54:36", "throughput": 8740.51, "total_tokens": 46354592} +{"current_steps": 68790, "total_steps": 204665, "loss": 0.0026, "lr": 1.6791760712677955e-06, "epoch": 1.6805511445532946, "percentage": 33.61, "elapsed_time": "1:28:23", "remaining_time": "2:54:36", "throughput": 8740.56, "total_tokens": 46357856} +{"current_steps": 68795, "total_steps": 204665, "loss": 0.0588, "lr": 1.6791134771394807e-06, "epoch": 1.6806732953851415, "percentage": 33.61, "elapsed_time": "1:28:24", "remaining_time": "2:54:35", "throughput": 8740.58, "total_tokens": 46360928} +{"current_steps": 68800, "total_steps": 204665, "loss": 0.105, "lr": 1.6790508780724705e-06, "epoch": 1.6807954462169887, "percentage": 33.62, "elapsed_time": "1:28:24", "remaining_time": "2:54:35", "throughput": 8740.62, "total_tokens": 46364192} +{"current_steps": 68805, "total_steps": 204665, "loss": 0.0054, "lr": 1.6789882740672194e-06, "epoch": 1.680917597048836, "percentage": 33.62, "elapsed_time": "1:28:24", "remaining_time": "2:54:34", "throughput": 8740.61, "total_tokens": 46367072} +{"current_steps": 68810, "total_steps": 204665, "loss": 0.056, "lr": 1.6789256651241832e-06, "epoch": 1.681039747880683, "percentage": 33.62, "elapsed_time": "1:28:25", "remaining_time": "2:54:34", "throughput": 8740.64, "total_tokens": 46370208} +{"current_steps": 68815, "total_steps": 204665, "loss": 0.0143, "lr": 1.6788630512438168e-06, "epoch": 1.68116189871253, "percentage": 33.62, "elapsed_time": "1:28:25", "remaining_time": "2:54:33", "throughput": 8740.79, "total_tokens": 46374112} +{"current_steps": 68820, "total_steps": 204665, "loss": 0.0289, "lr": 1.6788004324265757e-06, "epoch": 1.6812840495443773, "percentage": 33.63, "elapsed_time": "1:28:25", "remaining_time": "2:54:33", "throughput": 8740.8, "total_tokens": 46377120} +{"current_steps": 68825, "total_steps": 204665, "loss": 0.1079, "lr": 1.6787378086729152e-06, "epoch": 1.6814062003762245, "percentage": 33.63, "elapsed_time": "1:28:26", "remaining_time": "2:54:32", "throughput": 8740.82, "total_tokens": 46380256} +{"current_steps": 68830, "total_steps": 204665, "loss": 0.0743, "lr": 1.678675179983291e-06, "epoch": 1.6815283512080716, "percentage": 33.63, "elapsed_time": "1:28:26", "remaining_time": "2:54:32", "throughput": 8740.85, "total_tokens": 46383392} +{"current_steps": 68835, "total_steps": 204665, "loss": 0.1769, "lr": 1.6786125463581585e-06, "epoch": 1.6816505020399188, "percentage": 33.63, "elapsed_time": "1:28:26", "remaining_time": "2:54:31", "throughput": 8740.89, "total_tokens": 46386592} +{"current_steps": 68840, "total_steps": 204665, "loss": 0.0417, "lr": 1.6785499077979726e-06, "epoch": 1.681772652871766, "percentage": 33.64, "elapsed_time": "1:28:27", "remaining_time": "2:54:31", "throughput": 8740.94, "total_tokens": 46389856} +{"current_steps": 68845, "total_steps": 204665, "loss": 0.1182, "lr": 1.6784872643031896e-06, "epoch": 1.6818948037036132, "percentage": 33.64, "elapsed_time": "1:28:27", "remaining_time": "2:54:30", "throughput": 8741.02, "total_tokens": 46393376} +{"current_steps": 68850, "total_steps": 204665, "loss": 0.0869, "lr": 1.6784246158742643e-06, "epoch": 1.6820169545354604, "percentage": 33.64, "elapsed_time": "1:28:27", "remaining_time": "2:54:30", "throughput": 8741.05, "total_tokens": 46396512} +{"current_steps": 68855, "total_steps": 204665, "loss": 0.0886, "lr": 1.678361962511653e-06, "epoch": 1.6821391053673076, "percentage": 33.64, "elapsed_time": "1:28:28", "remaining_time": "2:54:30", "throughput": 8741.09, "total_tokens": 46399776} +{"current_steps": 68860, "total_steps": 204665, "loss": 0.0896, "lr": 1.6782993042158112e-06, "epoch": 1.6822612561991548, "percentage": 33.65, "elapsed_time": "1:28:28", "remaining_time": "2:54:29", "throughput": 8741.14, "total_tokens": 46403104} +{"current_steps": 68865, "total_steps": 204665, "loss": 0.1263, "lr": 1.678236640987194e-06, "epoch": 1.682383407031002, "percentage": 33.65, "elapsed_time": "1:28:28", "remaining_time": "2:54:29", "throughput": 8741.18, "total_tokens": 46406368} +{"current_steps": 68870, "total_steps": 204665, "loss": 0.1121, "lr": 1.6781739728262579e-06, "epoch": 1.6825055578628492, "percentage": 33.65, "elapsed_time": "1:28:29", "remaining_time": "2:54:28", "throughput": 8741.23, "total_tokens": 46409632} +{"current_steps": 68875, "total_steps": 204665, "loss": 0.0018, "lr": 1.6781112997334582e-06, "epoch": 1.6826277086946964, "percentage": 33.65, "elapsed_time": "1:28:29", "remaining_time": "2:54:28", "throughput": 8741.34, "total_tokens": 46413344} +{"current_steps": 68880, "total_steps": 204665, "loss": 0.1148, "lr": 1.6780486217092507e-06, "epoch": 1.6827498595265433, "percentage": 33.65, "elapsed_time": "1:28:29", "remaining_time": "2:54:27", "throughput": 8741.4, "total_tokens": 46416672} +{"current_steps": 68885, "total_steps": 204665, "loss": 0.0273, "lr": 1.677985938754091e-06, "epoch": 1.6828720103583905, "percentage": 33.66, "elapsed_time": "1:28:30", "remaining_time": "2:54:27", "throughput": 8741.46, "total_tokens": 46420000} +{"current_steps": 68890, "total_steps": 204665, "loss": 0.0751, "lr": 1.6779232508684355e-06, "epoch": 1.6829941611902377, "percentage": 33.66, "elapsed_time": "1:28:30", "remaining_time": "2:54:26", "throughput": 8741.59, "total_tokens": 46423840} +{"current_steps": 68895, "total_steps": 204665, "loss": 0.1406, "lr": 1.6778605580527398e-06, "epoch": 1.683116312022085, "percentage": 33.66, "elapsed_time": "1:28:31", "remaining_time": "2:54:26", "throughput": 8741.7, "total_tokens": 46427552} +{"current_steps": 68900, "total_steps": 204665, "loss": 0.1186, "lr": 1.6777978603074595e-06, "epoch": 1.683238462853932, "percentage": 33.66, "elapsed_time": "1:28:31", "remaining_time": "2:54:25", "throughput": 8741.77, "total_tokens": 46431008} +{"current_steps": 68905, "total_steps": 204665, "loss": 0.1768, "lr": 1.6777351576330512e-06, "epoch": 1.683360613685779, "percentage": 33.67, "elapsed_time": "1:28:31", "remaining_time": "2:54:25", "throughput": 8741.84, "total_tokens": 46434400} +{"current_steps": 68910, "total_steps": 204665, "loss": 0.0808, "lr": 1.6776724500299704e-06, "epoch": 1.6834827645176262, "percentage": 33.67, "elapsed_time": "1:28:32", "remaining_time": "2:54:24", "throughput": 8741.87, "total_tokens": 46437536} +{"current_steps": 68915, "total_steps": 204665, "loss": 0.0464, "lr": 1.6776097374986732e-06, "epoch": 1.6836049153494734, "percentage": 33.67, "elapsed_time": "1:28:32", "remaining_time": "2:54:24", "throughput": 8741.89, "total_tokens": 46440608} +{"current_steps": 68920, "total_steps": 204665, "loss": 0.1128, "lr": 1.6775470200396159e-06, "epoch": 1.6837270661813206, "percentage": 33.67, "elapsed_time": "1:28:32", "remaining_time": "2:54:24", "throughput": 8741.93, "total_tokens": 46443872} +{"current_steps": 68925, "total_steps": 204665, "loss": 0.1164, "lr": 1.6774842976532542e-06, "epoch": 1.6838492170131678, "percentage": 33.68, "elapsed_time": "1:28:33", "remaining_time": "2:54:23", "throughput": 8741.97, "total_tokens": 46447072} +{"current_steps": 68930, "total_steps": 204665, "loss": 0.0688, "lr": 1.6774215703400447e-06, "epoch": 1.683971367845015, "percentage": 33.68, "elapsed_time": "1:28:33", "remaining_time": "2:54:23", "throughput": 8742.07, "total_tokens": 46450656} +{"current_steps": 68935, "total_steps": 204665, "loss": 0.1232, "lr": 1.677358838100443e-06, "epoch": 1.6840935186768622, "percentage": 33.68, "elapsed_time": "1:28:33", "remaining_time": "2:54:22", "throughput": 8742.13, "total_tokens": 46454048} +{"current_steps": 68940, "total_steps": 204665, "loss": 0.1119, "lr": 1.6772961009349063e-06, "epoch": 1.6842156695087094, "percentage": 33.68, "elapsed_time": "1:28:34", "remaining_time": "2:54:22", "throughput": 8742.19, "total_tokens": 46457440} +{"current_steps": 68945, "total_steps": 204665, "loss": 0.0454, "lr": 1.6772333588438893e-06, "epoch": 1.6843378203405566, "percentage": 33.69, "elapsed_time": "1:28:34", "remaining_time": "2:54:21", "throughput": 8742.29, "total_tokens": 46461024} +{"current_steps": 68950, "total_steps": 204665, "loss": 0.1713, "lr": 1.67717061182785e-06, "epoch": 1.6844599711724038, "percentage": 33.69, "elapsed_time": "1:28:34", "remaining_time": "2:54:21", "throughput": 8742.3, "total_tokens": 46464096} +{"current_steps": 68955, "total_steps": 204665, "loss": 0.0426, "lr": 1.6771078598872435e-06, "epoch": 1.684582122004251, "percentage": 33.69, "elapsed_time": "1:28:35", "remaining_time": "2:54:20", "throughput": 8742.37, "total_tokens": 46467488} +{"current_steps": 68960, "total_steps": 204665, "loss": 0.0021, "lr": 1.6770451030225267e-06, "epoch": 1.6847042728360981, "percentage": 33.69, "elapsed_time": "1:28:35", "remaining_time": "2:54:20", "throughput": 8742.48, "total_tokens": 46471200} +{"current_steps": 68965, "total_steps": 204665, "loss": 0.0053, "lr": 1.6769823412341553e-06, "epoch": 1.6848264236679453, "percentage": 33.7, "elapsed_time": "1:28:35", "remaining_time": "2:54:19", "throughput": 8742.55, "total_tokens": 46474592} +{"current_steps": 68970, "total_steps": 204665, "loss": 0.0448, "lr": 1.6769195745225866e-06, "epoch": 1.6849485744997923, "percentage": 33.7, "elapsed_time": "1:28:36", "remaining_time": "2:54:19", "throughput": 8742.62, "total_tokens": 46478048} +{"current_steps": 68975, "total_steps": 204665, "loss": 0.0328, "lr": 1.6768568028882767e-06, "epoch": 1.6850707253316395, "percentage": 33.7, "elapsed_time": "1:28:36", "remaining_time": "2:54:19", "throughput": 8742.79, "total_tokens": 46482080} +{"current_steps": 68980, "total_steps": 204665, "loss": 0.0858, "lr": 1.6767940263316817e-06, "epoch": 1.6851928761634867, "percentage": 33.7, "elapsed_time": "1:28:36", "remaining_time": "2:54:18", "throughput": 8742.85, "total_tokens": 46485472} +{"current_steps": 68985, "total_steps": 204665, "loss": 0.0376, "lr": 1.676731244853259e-06, "epoch": 1.6853150269953339, "percentage": 33.71, "elapsed_time": "1:28:37", "remaining_time": "2:54:18", "throughput": 8742.9, "total_tokens": 46488736} +{"current_steps": 68990, "total_steps": 204665, "loss": 0.0457, "lr": 1.6766684584534647e-06, "epoch": 1.685437177827181, "percentage": 33.71, "elapsed_time": "1:28:37", "remaining_time": "2:54:17", "throughput": 8742.99, "total_tokens": 46492320} +{"current_steps": 68995, "total_steps": 204665, "loss": 0.0196, "lr": 1.6766056671327551e-06, "epoch": 1.685559328659028, "percentage": 33.71, "elapsed_time": "1:28:38", "remaining_time": "2:54:17", "throughput": 8743.02, "total_tokens": 46495520} +{"current_steps": 69000, "total_steps": 204665, "loss": 0.1296, "lr": 1.6765428708915871e-06, "epoch": 1.6856814794908752, "percentage": 33.71, "elapsed_time": "1:28:38", "remaining_time": "2:54:16", "throughput": 8743.08, "total_tokens": 46498848} +{"current_steps": 69005, "total_steps": 204665, "loss": 0.0889, "lr": 1.6764800697304172e-06, "epoch": 1.6858036303227224, "percentage": 33.72, "elapsed_time": "1:28:38", "remaining_time": "2:54:16", "throughput": 8743.18, "total_tokens": 46502496} +{"current_steps": 69010, "total_steps": 204665, "loss": 0.1051, "lr": 1.6764172636497026e-06, "epoch": 1.6859257811545696, "percentage": 33.72, "elapsed_time": "1:28:39", "remaining_time": "2:54:15", "throughput": 8743.31, "total_tokens": 46506336} +{"current_steps": 69015, "total_steps": 204665, "loss": 0.0354, "lr": 1.6763544526499e-06, "epoch": 1.6860479319864168, "percentage": 33.72, "elapsed_time": "1:28:39", "remaining_time": "2:54:15", "throughput": 8743.4, "total_tokens": 46509856} +{"current_steps": 69020, "total_steps": 204665, "loss": 0.1313, "lr": 1.6762916367314651e-06, "epoch": 1.686170082818264, "percentage": 33.72, "elapsed_time": "1:28:39", "remaining_time": "2:54:14", "throughput": 8743.44, "total_tokens": 46513120} +{"current_steps": 69025, "total_steps": 204665, "loss": 0.0722, "lr": 1.6762288158948562e-06, "epoch": 1.6862922336501112, "percentage": 33.73, "elapsed_time": "1:28:40", "remaining_time": "2:54:14", "throughput": 8743.54, "total_tokens": 46516704} +{"current_steps": 69030, "total_steps": 204665, "loss": 0.0932, "lr": 1.6761659901405291e-06, "epoch": 1.6864143844819584, "percentage": 33.73, "elapsed_time": "1:28:40", "remaining_time": "2:54:14", "throughput": 8743.63, "total_tokens": 46520288} +{"current_steps": 69035, "total_steps": 204665, "loss": 0.1082, "lr": 1.6761031594689414e-06, "epoch": 1.6865365353138055, "percentage": 33.73, "elapsed_time": "1:28:40", "remaining_time": "2:54:13", "throughput": 8743.67, "total_tokens": 46523552} +{"current_steps": 69040, "total_steps": 204665, "loss": 0.0745, "lr": 1.6760403238805494e-06, "epoch": 1.6866586861456527, "percentage": 33.73, "elapsed_time": "1:28:41", "remaining_time": "2:54:13", "throughput": 8743.77, "total_tokens": 46527200} +{"current_steps": 69045, "total_steps": 204665, "loss": 0.0354, "lr": 1.6759774833758104e-06, "epoch": 1.6867808369775, "percentage": 33.74, "elapsed_time": "1:28:41", "remaining_time": "2:54:12", "throughput": 8743.83, "total_tokens": 46530528} +{"current_steps": 69050, "total_steps": 204665, "loss": 0.0381, "lr": 1.6759146379551812e-06, "epoch": 1.686902987809347, "percentage": 33.74, "elapsed_time": "1:28:41", "remaining_time": "2:54:12", "throughput": 8743.88, "total_tokens": 46533856} +{"current_steps": 69055, "total_steps": 204665, "loss": 0.0202, "lr": 1.675851787619119e-06, "epoch": 1.6870251386411943, "percentage": 33.74, "elapsed_time": "1:28:42", "remaining_time": "2:54:11", "throughput": 8743.94, "total_tokens": 46537184} +{"current_steps": 69060, "total_steps": 204665, "loss": 0.0344, "lr": 1.6757889323680811e-06, "epoch": 1.6871472894730413, "percentage": 33.74, "elapsed_time": "1:28:42", "remaining_time": "2:54:11", "throughput": 8744.01, "total_tokens": 46540640} +{"current_steps": 69065, "total_steps": 204665, "loss": 0.1578, "lr": 1.675726072202524e-06, "epoch": 1.6872694403048885, "percentage": 33.75, "elapsed_time": "1:28:42", "remaining_time": "2:54:10", "throughput": 8744.08, "total_tokens": 46544096} +{"current_steps": 69070, "total_steps": 204665, "loss": 0.1053, "lr": 1.6756632071229053e-06, "epoch": 1.6873915911367356, "percentage": 33.75, "elapsed_time": "1:28:43", "remaining_time": "2:54:10", "throughput": 8744.09, "total_tokens": 46547168} +{"current_steps": 69075, "total_steps": 204665, "loss": 0.0037, "lr": 1.6756003371296822e-06, "epoch": 1.6875137419685828, "percentage": 33.75, "elapsed_time": "1:28:43", "remaining_time": "2:54:09", "throughput": 8744.16, "total_tokens": 46550624} +{"current_steps": 69080, "total_steps": 204665, "loss": 0.1352, "lr": 1.6755374622233114e-06, "epoch": 1.68763589280043, "percentage": 33.75, "elapsed_time": "1:28:43", "remaining_time": "2:54:09", "throughput": 8744.35, "total_tokens": 46554848} +{"current_steps": 69085, "total_steps": 204665, "loss": 0.078, "lr": 1.6754745824042505e-06, "epoch": 1.687758043632277, "percentage": 33.76, "elapsed_time": "1:28:44", "remaining_time": "2:54:09", "throughput": 8744.39, "total_tokens": 46558112} +{"current_steps": 69090, "total_steps": 204665, "loss": 0.0805, "lr": 1.675411697672957e-06, "epoch": 1.6878801944641242, "percentage": 33.76, "elapsed_time": "1:28:44", "remaining_time": "2:54:08", "throughput": 8744.39, "total_tokens": 46561056} +{"current_steps": 69095, "total_steps": 204665, "loss": 0.0913, "lr": 1.6753488080298877e-06, "epoch": 1.6880023452959714, "percentage": 33.76, "elapsed_time": "1:28:45", "remaining_time": "2:54:08", "throughput": 8744.41, "total_tokens": 46564128} +{"current_steps": 69100, "total_steps": 204665, "loss": 0.1935, "lr": 1.6752859134755003e-06, "epoch": 1.6881244961278186, "percentage": 33.76, "elapsed_time": "1:28:45", "remaining_time": "2:54:07", "throughput": 8744.49, "total_tokens": 46567584} +{"current_steps": 69105, "total_steps": 204665, "loss": 0.0271, "lr": 1.6752230140102522e-06, "epoch": 1.6882466469596658, "percentage": 33.76, "elapsed_time": "1:28:45", "remaining_time": "2:54:07", "throughput": 8744.57, "total_tokens": 46571104} +{"current_steps": 69110, "total_steps": 204665, "loss": 0.0652, "lr": 1.6751601096346006e-06, "epoch": 1.688368797791513, "percentage": 33.77, "elapsed_time": "1:28:46", "remaining_time": "2:54:06", "throughput": 8744.65, "total_tokens": 46574624} +{"current_steps": 69115, "total_steps": 204665, "loss": 0.0684, "lr": 1.675097200349003e-06, "epoch": 1.6884909486233601, "percentage": 33.77, "elapsed_time": "1:28:46", "remaining_time": "2:54:06", "throughput": 8744.74, "total_tokens": 46578144} +{"current_steps": 69120, "total_steps": 204665, "loss": 0.0776, "lr": 1.6750342861539174e-06, "epoch": 1.6886130994552073, "percentage": 33.77, "elapsed_time": "1:28:46", "remaining_time": "2:54:05", "throughput": 8744.81, "total_tokens": 46581600} +{"current_steps": 69125, "total_steps": 204665, "loss": 0.0025, "lr": 1.6749713670498007e-06, "epoch": 1.6887352502870545, "percentage": 33.77, "elapsed_time": "1:28:47", "remaining_time": "2:54:05", "throughput": 8744.8, "total_tokens": 46584480} +{"current_steps": 69130, "total_steps": 204665, "loss": 0.0582, "lr": 1.6749084430371103e-06, "epoch": 1.6888574011189017, "percentage": 33.78, "elapsed_time": "1:28:47", "remaining_time": "2:54:04", "throughput": 8744.91, "total_tokens": 46588192} +{"current_steps": 69135, "total_steps": 204665, "loss": 0.0433, "lr": 1.6748455141163048e-06, "epoch": 1.688979551950749, "percentage": 33.78, "elapsed_time": "1:28:47", "remaining_time": "2:54:04", "throughput": 8745.0, "total_tokens": 46591712} +{"current_steps": 69140, "total_steps": 204665, "loss": 0.0615, "lr": 1.6747825802878408e-06, "epoch": 1.689101702782596, "percentage": 33.78, "elapsed_time": "1:28:48", "remaining_time": "2:54:03", "throughput": 8745.04, "total_tokens": 46594912} +{"current_steps": 69145, "total_steps": 204665, "loss": 0.0818, "lr": 1.6747196415521768e-06, "epoch": 1.6892238536144433, "percentage": 33.78, "elapsed_time": "1:28:48", "remaining_time": "2:54:03", "throughput": 8745.12, "total_tokens": 46598432} +{"current_steps": 69150, "total_steps": 204665, "loss": 0.1306, "lr": 1.6746566979097697e-06, "epoch": 1.6893460044462902, "percentage": 33.79, "elapsed_time": "1:28:48", "remaining_time": "2:54:03", "throughput": 8745.14, "total_tokens": 46601504} +{"current_steps": 69155, "total_steps": 204665, "loss": 0.0447, "lr": 1.6745937493610776e-06, "epoch": 1.6894681552781374, "percentage": 33.79, "elapsed_time": "1:28:49", "remaining_time": "2:54:02", "throughput": 8745.22, "total_tokens": 46605024} +{"current_steps": 69160, "total_steps": 204665, "loss": 0.002, "lr": 1.6745307959065584e-06, "epoch": 1.6895903061099846, "percentage": 33.79, "elapsed_time": "1:28:49", "remaining_time": "2:54:02", "throughput": 8745.29, "total_tokens": 46608416} +{"current_steps": 69165, "total_steps": 204665, "loss": 0.1776, "lr": 1.6744678375466697e-06, "epoch": 1.6897124569418318, "percentage": 33.79, "elapsed_time": "1:28:49", "remaining_time": "2:54:01", "throughput": 8745.42, "total_tokens": 46612256} +{"current_steps": 69170, "total_steps": 204665, "loss": 0.0016, "lr": 1.6744048742818698e-06, "epoch": 1.6898346077736788, "percentage": 33.8, "elapsed_time": "1:28:50", "remaining_time": "2:54:01", "throughput": 8745.51, "total_tokens": 46615776} +{"current_steps": 69175, "total_steps": 204665, "loss": 0.1415, "lr": 1.674341906112616e-06, "epoch": 1.689956758605526, "percentage": 33.8, "elapsed_time": "1:28:50", "remaining_time": "2:54:00", "throughput": 8745.54, "total_tokens": 46618912} +{"current_steps": 69180, "total_steps": 204665, "loss": 0.0341, "lr": 1.6742789330393668e-06, "epoch": 1.6900789094373732, "percentage": 33.8, "elapsed_time": "1:28:50", "remaining_time": "2:54:00", "throughput": 8745.63, "total_tokens": 46622496} +{"current_steps": 69185, "total_steps": 204665, "loss": 0.0437, "lr": 1.6742159550625794e-06, "epoch": 1.6902010602692203, "percentage": 33.8, "elapsed_time": "1:28:51", "remaining_time": "2:53:59", "throughput": 8745.67, "total_tokens": 46625696} +{"current_steps": 69190, "total_steps": 204665, "loss": 0.1078, "lr": 1.6741529721827123e-06, "epoch": 1.6903232111010675, "percentage": 33.81, "elapsed_time": "1:28:51", "remaining_time": "2:53:59", "throughput": 8745.73, "total_tokens": 46629088} +{"current_steps": 69195, "total_steps": 204665, "loss": 0.3351, "lr": 1.6740899844002238e-06, "epoch": 1.6904453619329147, "percentage": 33.81, "elapsed_time": "1:28:51", "remaining_time": "2:53:58", "throughput": 8745.79, "total_tokens": 46632416} +{"current_steps": 69200, "total_steps": 204665, "loss": 0.1243, "lr": 1.6740269917155715e-06, "epoch": 1.690567512764762, "percentage": 33.81, "elapsed_time": "1:28:52", "remaining_time": "2:53:58", "throughput": 8745.86, "total_tokens": 46635872} +{"current_steps": 69205, "total_steps": 204665, "loss": 0.1398, "lr": 1.6739639941292134e-06, "epoch": 1.690689663596609, "percentage": 33.81, "elapsed_time": "1:28:52", "remaining_time": "2:53:58", "throughput": 8745.92, "total_tokens": 46639264} +{"current_steps": 69210, "total_steps": 204665, "loss": 0.1354, "lr": 1.673900991641608e-06, "epoch": 1.6908118144284563, "percentage": 33.82, "elapsed_time": "1:28:53", "remaining_time": "2:53:57", "throughput": 8746.0, "total_tokens": 46642720} +{"current_steps": 69215, "total_steps": 204665, "loss": 0.1438, "lr": 1.6738379842532134e-06, "epoch": 1.6909339652603035, "percentage": 33.82, "elapsed_time": "1:28:53", "remaining_time": "2:53:57", "throughput": 8746.1, "total_tokens": 46646368} +{"current_steps": 69220, "total_steps": 204665, "loss": 0.0384, "lr": 1.6737749719644877e-06, "epoch": 1.6910561160921507, "percentage": 33.82, "elapsed_time": "1:28:53", "remaining_time": "2:53:56", "throughput": 8746.23, "total_tokens": 46650208} +{"current_steps": 69225, "total_steps": 204665, "loss": 0.1072, "lr": 1.673711954775889e-06, "epoch": 1.6911782669239979, "percentage": 33.82, "elapsed_time": "1:28:54", "remaining_time": "2:53:56", "throughput": 8746.23, "total_tokens": 46653216} +{"current_steps": 69230, "total_steps": 204665, "loss": 0.1493, "lr": 1.673648932687876e-06, "epoch": 1.691300417755845, "percentage": 33.83, "elapsed_time": "1:28:54", "remaining_time": "2:53:55", "throughput": 8746.29, "total_tokens": 46656544} +{"current_steps": 69235, "total_steps": 204665, "loss": 0.0677, "lr": 1.6735859057009068e-06, "epoch": 1.6914225685876922, "percentage": 33.83, "elapsed_time": "1:28:54", "remaining_time": "2:53:55", "throughput": 8746.33, "total_tokens": 46659808} +{"current_steps": 69240, "total_steps": 204665, "loss": 0.1851, "lr": 1.6735228738154397e-06, "epoch": 1.6915447194195392, "percentage": 33.83, "elapsed_time": "1:28:55", "remaining_time": "2:53:54", "throughput": 8746.36, "total_tokens": 46663008} +{"current_steps": 69245, "total_steps": 204665, "loss": 0.0098, "lr": 1.6734598370319328e-06, "epoch": 1.6916668702513864, "percentage": 33.83, "elapsed_time": "1:28:55", "remaining_time": "2:53:54", "throughput": 8746.39, "total_tokens": 46666144} +{"current_steps": 69250, "total_steps": 204665, "loss": 0.1005, "lr": 1.673396795350845e-06, "epoch": 1.6917890210832336, "percentage": 33.84, "elapsed_time": "1:28:55", "remaining_time": "2:53:53", "throughput": 8746.52, "total_tokens": 46669984} +{"current_steps": 69255, "total_steps": 204665, "loss": 0.0059, "lr": 1.6733337487726346e-06, "epoch": 1.6919111719150808, "percentage": 33.84, "elapsed_time": "1:28:56", "remaining_time": "2:53:53", "throughput": 8746.57, "total_tokens": 46673248} +{"current_steps": 69260, "total_steps": 204665, "loss": 0.0398, "lr": 1.67327069729776e-06, "epoch": 1.6920333227469277, "percentage": 33.84, "elapsed_time": "1:28:56", "remaining_time": "2:53:53", "throughput": 8746.64, "total_tokens": 46676640} +{"current_steps": 69265, "total_steps": 204665, "loss": 0.0572, "lr": 1.6732076409266802e-06, "epoch": 1.692155473578775, "percentage": 33.84, "elapsed_time": "1:28:56", "remaining_time": "2:53:52", "throughput": 8746.75, "total_tokens": 46680352} +{"current_steps": 69270, "total_steps": 204665, "loss": 0.095, "lr": 1.673144579659853e-06, "epoch": 1.6922776244106221, "percentage": 33.85, "elapsed_time": "1:28:57", "remaining_time": "2:53:52", "throughput": 8746.89, "total_tokens": 46684192} +{"current_steps": 69275, "total_steps": 204665, "loss": 0.0931, "lr": 1.6730815134977374e-06, "epoch": 1.6923997752424693, "percentage": 33.85, "elapsed_time": "1:28:57", "remaining_time": "2:53:51", "throughput": 8746.9, "total_tokens": 46687200} +{"current_steps": 69280, "total_steps": 204665, "loss": 0.0697, "lr": 1.6730184424407922e-06, "epoch": 1.6925219260743165, "percentage": 33.85, "elapsed_time": "1:28:57", "remaining_time": "2:53:51", "throughput": 8747.01, "total_tokens": 46690912} +{"current_steps": 69285, "total_steps": 204665, "loss": 0.0688, "lr": 1.6729553664894756e-06, "epoch": 1.6926440769061637, "percentage": 33.85, "elapsed_time": "1:28:58", "remaining_time": "2:53:50", "throughput": 8747.08, "total_tokens": 46694304} +{"current_steps": 69290, "total_steps": 204665, "loss": 0.0371, "lr": 1.6728922856442465e-06, "epoch": 1.6927662277380109, "percentage": 33.86, "elapsed_time": "1:28:58", "remaining_time": "2:53:50", "throughput": 8747.13, "total_tokens": 46697632} +{"current_steps": 69295, "total_steps": 204665, "loss": 0.0344, "lr": 1.672829199905564e-06, "epoch": 1.692888378569858, "percentage": 33.86, "elapsed_time": "1:28:58", "remaining_time": "2:53:49", "throughput": 8747.19, "total_tokens": 46700960} +{"current_steps": 69300, "total_steps": 204665, "loss": 0.1221, "lr": 1.6727661092738865e-06, "epoch": 1.6930105294017053, "percentage": 33.86, "elapsed_time": "1:28:59", "remaining_time": "2:53:49", "throughput": 8747.2, "total_tokens": 46704032} +{"current_steps": 69305, "total_steps": 204665, "loss": 0.0472, "lr": 1.6727030137496728e-06, "epoch": 1.6931326802335525, "percentage": 33.86, "elapsed_time": "1:28:59", "remaining_time": "2:53:48", "throughput": 8747.27, "total_tokens": 46707424} +{"current_steps": 69310, "total_steps": 204665, "loss": 0.1567, "lr": 1.672639913333382e-06, "epoch": 1.6932548310653996, "percentage": 33.87, "elapsed_time": "1:29:00", "remaining_time": "2:53:48", "throughput": 8747.31, "total_tokens": 46710624} +{"current_steps": 69315, "total_steps": 204665, "loss": 0.0425, "lr": 1.6725768080254726e-06, "epoch": 1.6933769818972468, "percentage": 33.87, "elapsed_time": "1:29:00", "remaining_time": "2:53:48", "throughput": 8747.4, "total_tokens": 46714208} +{"current_steps": 69320, "total_steps": 204665, "loss": 0.0015, "lr": 1.6725136978264038e-06, "epoch": 1.693499132729094, "percentage": 33.87, "elapsed_time": "1:29:00", "remaining_time": "2:53:47", "throughput": 8747.45, "total_tokens": 46717536} +{"current_steps": 69325, "total_steps": 204665, "loss": 0.0483, "lr": 1.6724505827366349e-06, "epoch": 1.6936212835609412, "percentage": 33.87, "elapsed_time": "1:29:01", "remaining_time": "2:53:47", "throughput": 8747.48, "total_tokens": 46720736} +{"current_steps": 69330, "total_steps": 204665, "loss": 0.0668, "lr": 1.6723874627566242e-06, "epoch": 1.6937434343927882, "percentage": 33.87, "elapsed_time": "1:29:01", "remaining_time": "2:53:46", "throughput": 8747.56, "total_tokens": 46724192} +{"current_steps": 69335, "total_steps": 204665, "loss": 0.0038, "lr": 1.672324337886831e-06, "epoch": 1.6938655852246354, "percentage": 33.88, "elapsed_time": "1:29:01", "remaining_time": "2:53:46", "throughput": 8747.67, "total_tokens": 46727904} +{"current_steps": 69340, "total_steps": 204665, "loss": 0.1054, "lr": 1.6722612081277143e-06, "epoch": 1.6939877360564826, "percentage": 33.88, "elapsed_time": "1:29:02", "remaining_time": "2:53:45", "throughput": 8747.68, "total_tokens": 46730912} +{"current_steps": 69345, "total_steps": 204665, "loss": 0.0426, "lr": 1.6721980734797334e-06, "epoch": 1.6941098868883298, "percentage": 33.88, "elapsed_time": "1:29:02", "remaining_time": "2:53:45", "throughput": 8747.71, "total_tokens": 46734048} +{"current_steps": 69350, "total_steps": 204665, "loss": 0.0033, "lr": 1.6721349339433472e-06, "epoch": 1.6942320377201767, "percentage": 33.88, "elapsed_time": "1:29:02", "remaining_time": "2:53:44", "throughput": 8747.74, "total_tokens": 46737184} +{"current_steps": 69355, "total_steps": 204665, "loss": 0.0835, "lr": 1.672071789519015e-06, "epoch": 1.694354188552024, "percentage": 33.89, "elapsed_time": "1:29:03", "remaining_time": "2:53:44", "throughput": 8747.78, "total_tokens": 46740384} +{"current_steps": 69360, "total_steps": 204665, "loss": 0.1602, "lr": 1.672008640207196e-06, "epoch": 1.694476339383871, "percentage": 33.89, "elapsed_time": "1:29:03", "remaining_time": "2:53:43", "throughput": 8747.78, "total_tokens": 46743328} +{"current_steps": 69365, "total_steps": 204665, "loss": 0.0006, "lr": 1.6719454860083495e-06, "epoch": 1.6945984902157183, "percentage": 33.89, "elapsed_time": "1:29:03", "remaining_time": "2:53:43", "throughput": 8747.8, "total_tokens": 46746464} +{"current_steps": 69370, "total_steps": 204665, "loss": 0.1186, "lr": 1.6718823269229348e-06, "epoch": 1.6947206410475655, "percentage": 33.89, "elapsed_time": "1:29:04", "remaining_time": "2:53:42", "throughput": 8747.82, "total_tokens": 46749536} +{"current_steps": 69375, "total_steps": 204665, "loss": 0.0287, "lr": 1.6718191629514112e-06, "epoch": 1.6948427918794127, "percentage": 33.9, "elapsed_time": "1:29:04", "remaining_time": "2:53:42", "throughput": 8747.86, "total_tokens": 46752800} +{"current_steps": 69380, "total_steps": 204665, "loss": 0.0619, "lr": 1.6717559940942373e-06, "epoch": 1.6949649427112599, "percentage": 33.9, "elapsed_time": "1:29:04", "remaining_time": "2:53:41", "throughput": 8747.93, "total_tokens": 46756256} +{"current_steps": 69385, "total_steps": 204665, "loss": 0.1257, "lr": 1.6716928203518736e-06, "epoch": 1.695087093543107, "percentage": 33.9, "elapsed_time": "1:29:05", "remaining_time": "2:53:41", "throughput": 8748.06, "total_tokens": 46760096} +{"current_steps": 69390, "total_steps": 204665, "loss": 0.1073, "lr": 1.671629641724779e-06, "epoch": 1.6952092443749542, "percentage": 33.9, "elapsed_time": "1:29:05", "remaining_time": "2:53:41", "throughput": 8748.15, "total_tokens": 46763616} +{"current_steps": 69395, "total_steps": 204665, "loss": 0.0021, "lr": 1.671566458213413e-06, "epoch": 1.6953313952068014, "percentage": 33.91, "elapsed_time": "1:29:05", "remaining_time": "2:53:40", "throughput": 8748.26, "total_tokens": 46767328} +{"current_steps": 69400, "total_steps": 204665, "loss": 0.0018, "lr": 1.6715032698182352e-06, "epoch": 1.6954535460386486, "percentage": 33.91, "elapsed_time": "1:29:06", "remaining_time": "2:53:40", "throughput": 8748.36, "total_tokens": 46770912} +{"current_steps": 69405, "total_steps": 204665, "loss": 0.2174, "lr": 1.6714400765397047e-06, "epoch": 1.6955756968704958, "percentage": 33.91, "elapsed_time": "1:29:06", "remaining_time": "2:53:39", "throughput": 8748.5, "total_tokens": 46774816} +{"current_steps": 69410, "total_steps": 204665, "loss": 0.0606, "lr": 1.6713768783782815e-06, "epoch": 1.695697847702343, "percentage": 33.91, "elapsed_time": "1:29:06", "remaining_time": "2:53:39", "throughput": 8748.51, "total_tokens": 46777888} +{"current_steps": 69415, "total_steps": 204665, "loss": 0.1072, "lr": 1.6713136753344253e-06, "epoch": 1.69581999853419, "percentage": 33.92, "elapsed_time": "1:29:07", "remaining_time": "2:53:38", "throughput": 8748.58, "total_tokens": 46781280} +{"current_steps": 69420, "total_steps": 204665, "loss": 0.0504, "lr": 1.6712504674085951e-06, "epoch": 1.6959421493660372, "percentage": 33.92, "elapsed_time": "1:29:07", "remaining_time": "2:53:38", "throughput": 8748.66, "total_tokens": 46784800} +{"current_steps": 69425, "total_steps": 204665, "loss": 0.0243, "lr": 1.6711872546012512e-06, "epoch": 1.6960643001978843, "percentage": 33.92, "elapsed_time": "1:29:08", "remaining_time": "2:53:37", "throughput": 8748.7, "total_tokens": 46788064} +{"current_steps": 69430, "total_steps": 204665, "loss": 0.2521, "lr": 1.671124036912853e-06, "epoch": 1.6961864510297315, "percentage": 33.92, "elapsed_time": "1:29:08", "remaining_time": "2:53:37", "throughput": 8748.83, "total_tokens": 46791904} +{"current_steps": 69435, "total_steps": 204665, "loss": 0.0017, "lr": 1.6710608143438606e-06, "epoch": 1.6963086018615787, "percentage": 33.93, "elapsed_time": "1:29:08", "remaining_time": "2:53:37", "throughput": 8748.88, "total_tokens": 46795168} +{"current_steps": 69440, "total_steps": 204665, "loss": 0.1179, "lr": 1.670997586894733e-06, "epoch": 1.6964307526934257, "percentage": 33.93, "elapsed_time": "1:29:09", "remaining_time": "2:53:36", "throughput": 8749.0, "total_tokens": 46798944} +{"current_steps": 69445, "total_steps": 204665, "loss": 0.0248, "lr": 1.6709343545659307e-06, "epoch": 1.6965529035252729, "percentage": 33.93, "elapsed_time": "1:29:09", "remaining_time": "2:53:36", "throughput": 8749.01, "total_tokens": 46801952} +{"current_steps": 69450, "total_steps": 204665, "loss": 0.0537, "lr": 1.670871117357913e-06, "epoch": 1.69667505435712, "percentage": 33.93, "elapsed_time": "1:29:09", "remaining_time": "2:53:35", "throughput": 8749.05, "total_tokens": 46805216} +{"current_steps": 69455, "total_steps": 204665, "loss": 0.0364, "lr": 1.6708078752711408e-06, "epoch": 1.6967972051889673, "percentage": 33.94, "elapsed_time": "1:29:10", "remaining_time": "2:53:35", "throughput": 8749.19, "total_tokens": 46809120} +{"current_steps": 69460, "total_steps": 204665, "loss": 0.0009, "lr": 1.6707446283060727e-06, "epoch": 1.6969193560208145, "percentage": 33.94, "elapsed_time": "1:29:10", "remaining_time": "2:53:34", "throughput": 8749.24, "total_tokens": 46812384} +{"current_steps": 69465, "total_steps": 204665, "loss": 0.0731, "lr": 1.6706813764631696e-06, "epoch": 1.6970415068526616, "percentage": 33.94, "elapsed_time": "1:29:10", "remaining_time": "2:53:34", "throughput": 8749.25, "total_tokens": 46815456} +{"current_steps": 69470, "total_steps": 204665, "loss": 0.0398, "lr": 1.6706181197428908e-06, "epoch": 1.6971636576845088, "percentage": 33.94, "elapsed_time": "1:29:11", "remaining_time": "2:53:33", "throughput": 8749.28, "total_tokens": 46818592} +{"current_steps": 69475, "total_steps": 204665, "loss": 0.1567, "lr": 1.6705548581456967e-06, "epoch": 1.697285808516356, "percentage": 33.95, "elapsed_time": "1:29:11", "remaining_time": "2:53:33", "throughput": 8749.35, "total_tokens": 46822048} +{"current_steps": 69480, "total_steps": 204665, "loss": 0.0009, "lr": 1.6704915916720474e-06, "epoch": 1.6974079593482032, "percentage": 33.95, "elapsed_time": "1:29:11", "remaining_time": "2:53:32", "throughput": 8749.43, "total_tokens": 46825568} +{"current_steps": 69485, "total_steps": 204665, "loss": 0.0305, "lr": 1.670428320322403e-06, "epoch": 1.6975301101800504, "percentage": 33.95, "elapsed_time": "1:29:12", "remaining_time": "2:53:32", "throughput": 8749.47, "total_tokens": 46828768} +{"current_steps": 69490, "total_steps": 204665, "loss": 0.031, "lr": 1.6703650440972235e-06, "epoch": 1.6976522610118976, "percentage": 33.95, "elapsed_time": "1:29:12", "remaining_time": "2:53:31", "throughput": 8749.49, "total_tokens": 46831840} +{"current_steps": 69495, "total_steps": 204665, "loss": 0.0762, "lr": 1.670301762996969e-06, "epoch": 1.6977744118437448, "percentage": 33.96, "elapsed_time": "1:29:12", "remaining_time": "2:53:31", "throughput": 8749.53, "total_tokens": 46835104} +{"current_steps": 69500, "total_steps": 204665, "loss": 0.1448, "lr": 1.6702384770220998e-06, "epoch": 1.697896562675592, "percentage": 33.96, "elapsed_time": "1:29:13", "remaining_time": "2:53:31", "throughput": 8749.58, "total_tokens": 46838432} +{"current_steps": 69505, "total_steps": 204665, "loss": 0.1469, "lr": 1.6701751861730763e-06, "epoch": 1.698018713507439, "percentage": 33.96, "elapsed_time": "1:29:13", "remaining_time": "2:53:30", "throughput": 8749.62, "total_tokens": 46841632} +{"current_steps": 69510, "total_steps": 204665, "loss": 0.1473, "lr": 1.6701118904503581e-06, "epoch": 1.6981408643392861, "percentage": 33.96, "elapsed_time": "1:29:13", "remaining_time": "2:53:30", "throughput": 8749.69, "total_tokens": 46845088} +{"current_steps": 69515, "total_steps": 204665, "loss": 0.1444, "lr": 1.6700485898544067e-06, "epoch": 1.6982630151711333, "percentage": 33.97, "elapsed_time": "1:29:14", "remaining_time": "2:53:29", "throughput": 8749.73, "total_tokens": 46848288} +{"current_steps": 69520, "total_steps": 204665, "loss": 0.0402, "lr": 1.6699852843856813e-06, "epoch": 1.6983851660029805, "percentage": 33.97, "elapsed_time": "1:29:14", "remaining_time": "2:53:29", "throughput": 8749.78, "total_tokens": 46851552} +{"current_steps": 69525, "total_steps": 204665, "loss": 0.1171, "lr": 1.6699219740446426e-06, "epoch": 1.6985073168348277, "percentage": 33.97, "elapsed_time": "1:29:14", "remaining_time": "2:53:28", "throughput": 8749.83, "total_tokens": 46854880} +{"current_steps": 69530, "total_steps": 204665, "loss": 0.0676, "lr": 1.6698586588317515e-06, "epoch": 1.6986294676666747, "percentage": 33.97, "elapsed_time": "1:29:15", "remaining_time": "2:53:28", "throughput": 8749.89, "total_tokens": 46858208} +{"current_steps": 69535, "total_steps": 204665, "loss": 0.0055, "lr": 1.669795338747468e-06, "epoch": 1.6987516184985219, "percentage": 33.98, "elapsed_time": "1:29:15", "remaining_time": "2:53:27", "throughput": 8749.99, "total_tokens": 46861792} +{"current_steps": 69540, "total_steps": 204665, "loss": 0.0251, "lr": 1.6697320137922524e-06, "epoch": 1.698873769330369, "percentage": 33.98, "elapsed_time": "1:29:15", "remaining_time": "2:53:27", "throughput": 8750.05, "total_tokens": 46865184} +{"current_steps": 69545, "total_steps": 204665, "loss": 0.1008, "lr": 1.6696686839665655e-06, "epoch": 1.6989959201622162, "percentage": 33.98, "elapsed_time": "1:29:16", "remaining_time": "2:53:26", "throughput": 8750.1, "total_tokens": 46868512} +{"current_steps": 69550, "total_steps": 204665, "loss": 0.0005, "lr": 1.669605349270868e-06, "epoch": 1.6991180709940634, "percentage": 33.98, "elapsed_time": "1:29:16", "remaining_time": "2:53:26", "throughput": 8750.14, "total_tokens": 46871712} +{"current_steps": 69555, "total_steps": 204665, "loss": 0.0801, "lr": 1.66954200970562e-06, "epoch": 1.6992402218259106, "percentage": 33.98, "elapsed_time": "1:29:17", "remaining_time": "2:53:25", "throughput": 8750.19, "total_tokens": 46875040} +{"current_steps": 69560, "total_steps": 204665, "loss": 0.0201, "lr": 1.6694786652712827e-06, "epoch": 1.6993623726577578, "percentage": 33.99, "elapsed_time": "1:29:17", "remaining_time": "2:53:25", "throughput": 8750.26, "total_tokens": 46878432} +{"current_steps": 69565, "total_steps": 204665, "loss": 0.1146, "lr": 1.6694153159683162e-06, "epoch": 1.699484523489605, "percentage": 33.99, "elapsed_time": "1:29:17", "remaining_time": "2:53:25", "throughput": 8750.24, "total_tokens": 46881248} +{"current_steps": 69570, "total_steps": 204665, "loss": 0.1317, "lr": 1.6693519617971816e-06, "epoch": 1.6996066743214522, "percentage": 33.99, "elapsed_time": "1:29:18", "remaining_time": "2:53:24", "throughput": 8750.42, "total_tokens": 46885408} +{"current_steps": 69575, "total_steps": 204665, "loss": 0.0317, "lr": 1.6692886027583397e-06, "epoch": 1.6997288251532994, "percentage": 33.99, "elapsed_time": "1:29:18", "remaining_time": "2:53:24", "throughput": 8750.5, "total_tokens": 46888928} +{"current_steps": 69580, "total_steps": 204665, "loss": 0.1065, "lr": 1.669225238852251e-06, "epoch": 1.6998509759851466, "percentage": 34.0, "elapsed_time": "1:29:18", "remaining_time": "2:53:23", "throughput": 8750.57, "total_tokens": 46892320} +{"current_steps": 69585, "total_steps": 204665, "loss": 0.2065, "lr": 1.6691618700793763e-06, "epoch": 1.6999731268169938, "percentage": 34.0, "elapsed_time": "1:29:19", "remaining_time": "2:53:23", "throughput": 8750.62, "total_tokens": 46895648} +{"current_steps": 69590, "total_steps": 204665, "loss": 0.1437, "lr": 1.6690984964401764e-06, "epoch": 1.700095277648841, "percentage": 34.0, "elapsed_time": "1:29:19", "remaining_time": "2:53:22", "throughput": 8750.65, "total_tokens": 46898784} +{"current_steps": 69595, "total_steps": 204665, "loss": 0.1236, "lr": 1.6690351179351123e-06, "epoch": 1.700217428480688, "percentage": 34.0, "elapsed_time": "1:29:19", "remaining_time": "2:53:22", "throughput": 8750.65, "total_tokens": 46901728} +{"current_steps": 69600, "total_steps": 204665, "loss": 0.161, "lr": 1.668971734564645e-06, "epoch": 1.700339579312535, "percentage": 34.01, "elapsed_time": "1:29:20", "remaining_time": "2:53:21", "throughput": 8750.73, "total_tokens": 46905248} +{"current_steps": 69605, "total_steps": 204665, "loss": 0.0781, "lr": 1.668908346329235e-06, "epoch": 1.7004617301443823, "percentage": 34.01, "elapsed_time": "1:29:20", "remaining_time": "2:53:21", "throughput": 8750.75, "total_tokens": 46908384} +{"current_steps": 69610, "total_steps": 204665, "loss": 0.2057, "lr": 1.668844953229344e-06, "epoch": 1.7005838809762295, "percentage": 34.01, "elapsed_time": "1:29:20", "remaining_time": "2:53:20", "throughput": 8750.8, "total_tokens": 46911648} +{"current_steps": 69615, "total_steps": 204665, "loss": 0.0927, "lr": 1.6687815552654325e-06, "epoch": 1.7007060318080767, "percentage": 34.01, "elapsed_time": "1:29:21", "remaining_time": "2:53:20", "throughput": 8750.86, "total_tokens": 46914976} +{"current_steps": 69620, "total_steps": 204665, "loss": 0.0305, "lr": 1.6687181524379613e-06, "epoch": 1.7008281826399236, "percentage": 34.02, "elapsed_time": "1:29:21", "remaining_time": "2:53:20", "throughput": 8750.98, "total_tokens": 46918752} +{"current_steps": 69625, "total_steps": 204665, "loss": 0.0596, "lr": 1.6686547447473924e-06, "epoch": 1.7009503334717708, "percentage": 34.02, "elapsed_time": "1:29:21", "remaining_time": "2:53:19", "throughput": 8751.07, "total_tokens": 46922336} +{"current_steps": 69630, "total_steps": 204665, "loss": 0.0261, "lr": 1.668591332194186e-06, "epoch": 1.701072484303618, "percentage": 34.02, "elapsed_time": "1:29:22", "remaining_time": "2:53:19", "throughput": 8751.15, "total_tokens": 46925792} +{"current_steps": 69635, "total_steps": 204665, "loss": 0.0987, "lr": 1.6685279147788036e-06, "epoch": 1.7011946351354652, "percentage": 34.02, "elapsed_time": "1:29:22", "remaining_time": "2:53:18", "throughput": 8751.37, "total_tokens": 46930272} +{"current_steps": 69640, "total_steps": 204665, "loss": 0.1937, "lr": 1.6684644925017067e-06, "epoch": 1.7013167859673124, "percentage": 34.03, "elapsed_time": "1:29:22", "remaining_time": "2:53:18", "throughput": 8751.42, "total_tokens": 46933536} +{"current_steps": 69645, "total_steps": 204665, "loss": 0.149, "lr": 1.6684010653633559e-06, "epoch": 1.7014389367991596, "percentage": 34.03, "elapsed_time": "1:29:23", "remaining_time": "2:53:17", "throughput": 8751.47, "total_tokens": 46936864} +{"current_steps": 69650, "total_steps": 204665, "loss": 0.0415, "lr": 1.6683376333642127e-06, "epoch": 1.7015610876310068, "percentage": 34.03, "elapsed_time": "1:29:23", "remaining_time": "2:53:17", "throughput": 8751.52, "total_tokens": 46940128} +{"current_steps": 69655, "total_steps": 204665, "loss": 0.0025, "lr": 1.6682741965047386e-06, "epoch": 1.701683238462854, "percentage": 34.03, "elapsed_time": "1:29:23", "remaining_time": "2:53:16", "throughput": 8751.55, "total_tokens": 46943264} +{"current_steps": 69660, "total_steps": 204665, "loss": 0.1639, "lr": 1.6682107547853948e-06, "epoch": 1.7018053892947012, "percentage": 34.04, "elapsed_time": "1:29:24", "remaining_time": "2:53:16", "throughput": 8751.61, "total_tokens": 46946656} +{"current_steps": 69665, "total_steps": 204665, "loss": 0.0259, "lr": 1.6681473082066426e-06, "epoch": 1.7019275401265483, "percentage": 34.04, "elapsed_time": "1:29:24", "remaining_time": "2:53:15", "throughput": 8751.66, "total_tokens": 46949920} +{"current_steps": 69670, "total_steps": 204665, "loss": 0.0954, "lr": 1.6680838567689436e-06, "epoch": 1.7020496909583955, "percentage": 34.04, "elapsed_time": "1:29:25", "remaining_time": "2:53:15", "throughput": 8751.85, "total_tokens": 46954144} +{"current_steps": 69675, "total_steps": 204665, "loss": 0.1621, "lr": 1.6680204004727592e-06, "epoch": 1.7021718417902427, "percentage": 34.04, "elapsed_time": "1:29:25", "remaining_time": "2:53:15", "throughput": 8751.9, "total_tokens": 46957472} +{"current_steps": 69680, "total_steps": 204665, "loss": 0.0807, "lr": 1.6679569393185506e-06, "epoch": 1.70229399262209, "percentage": 34.05, "elapsed_time": "1:29:25", "remaining_time": "2:53:14", "throughput": 8752.04, "total_tokens": 46961376} +{"current_steps": 69685, "total_steps": 204665, "loss": 0.0841, "lr": 1.6678934733067793e-06, "epoch": 1.7024161434539369, "percentage": 34.05, "elapsed_time": "1:29:26", "remaining_time": "2:53:14", "throughput": 8752.09, "total_tokens": 46964640} +{"current_steps": 69690, "total_steps": 204665, "loss": 0.0441, "lr": 1.6678300024379073e-06, "epoch": 1.702538294285784, "percentage": 34.05, "elapsed_time": "1:29:26", "remaining_time": "2:53:13", "throughput": 8752.22, "total_tokens": 46968480} +{"current_steps": 69695, "total_steps": 204665, "loss": 0.0706, "lr": 1.6677665267123956e-06, "epoch": 1.7026604451176313, "percentage": 34.05, "elapsed_time": "1:29:26", "remaining_time": "2:53:13", "throughput": 8752.27, "total_tokens": 46971744} +{"current_steps": 69700, "total_steps": 204665, "loss": 0.0741, "lr": 1.6677030461307065e-06, "epoch": 1.7027825959494785, "percentage": 34.06, "elapsed_time": "1:29:27", "remaining_time": "2:53:12", "throughput": 8752.34, "total_tokens": 46975200} +{"current_steps": 69705, "total_steps": 204665, "loss": 0.1458, "lr": 1.667639560693301e-06, "epoch": 1.7029047467813254, "percentage": 34.06, "elapsed_time": "1:29:27", "remaining_time": "2:53:12", "throughput": 8752.5, "total_tokens": 46979232} +{"current_steps": 69710, "total_steps": 204665, "loss": 0.0283, "lr": 1.6675760704006412e-06, "epoch": 1.7030268976131726, "percentage": 34.06, "elapsed_time": "1:29:27", "remaining_time": "2:53:11", "throughput": 8752.7, "total_tokens": 46983520} +{"current_steps": 69715, "total_steps": 204665, "loss": 0.1395, "lr": 1.6675125752531884e-06, "epoch": 1.7031490484450198, "percentage": 34.06, "elapsed_time": "1:29:28", "remaining_time": "2:53:11", "throughput": 8752.73, "total_tokens": 46986720} +{"current_steps": 69720, "total_steps": 204665, "loss": 0.1001, "lr": 1.667449075251405e-06, "epoch": 1.703271199276867, "percentage": 34.07, "elapsed_time": "1:29:28", "remaining_time": "2:53:11", "throughput": 8752.76, "total_tokens": 46989920} +{"current_steps": 69725, "total_steps": 204665, "loss": 0.0372, "lr": 1.6673855703957523e-06, "epoch": 1.7033933501087142, "percentage": 34.07, "elapsed_time": "1:29:28", "remaining_time": "2:53:10", "throughput": 8752.81, "total_tokens": 46993184} +{"current_steps": 69730, "total_steps": 204665, "loss": 0.1044, "lr": 1.667322060686692e-06, "epoch": 1.7035155009405614, "percentage": 34.07, "elapsed_time": "1:29:29", "remaining_time": "2:53:10", "throughput": 8752.89, "total_tokens": 46996640} +{"current_steps": 69735, "total_steps": 204665, "loss": 0.0357, "lr": 1.667258546124686e-06, "epoch": 1.7036376517724086, "percentage": 34.07, "elapsed_time": "1:29:29", "remaining_time": "2:53:09", "throughput": 8752.94, "total_tokens": 46999968} +{"current_steps": 69740, "total_steps": 204665, "loss": 0.0337, "lr": 1.6671950267101972e-06, "epoch": 1.7037598026042557, "percentage": 34.08, "elapsed_time": "1:29:29", "remaining_time": "2:53:09", "throughput": 8753.03, "total_tokens": 47003488} +{"current_steps": 69745, "total_steps": 204665, "loss": 0.0329, "lr": 1.667131502443686e-06, "epoch": 1.703881953436103, "percentage": 34.08, "elapsed_time": "1:29:30", "remaining_time": "2:53:08", "throughput": 8753.13, "total_tokens": 47007136} +{"current_steps": 69750, "total_steps": 204665, "loss": 0.1352, "lr": 1.6670679733256154e-06, "epoch": 1.7040041042679501, "percentage": 34.08, "elapsed_time": "1:29:30", "remaining_time": "2:53:08", "throughput": 8753.2, "total_tokens": 47010528} +{"current_steps": 69755, "total_steps": 204665, "loss": 0.0414, "lr": 1.6670044393564467e-06, "epoch": 1.7041262550997973, "percentage": 34.08, "elapsed_time": "1:29:31", "remaining_time": "2:53:07", "throughput": 8753.24, "total_tokens": 47013792} +{"current_steps": 69760, "total_steps": 204665, "loss": 0.0764, "lr": 1.6669409005366426e-06, "epoch": 1.7042484059316445, "percentage": 34.08, "elapsed_time": "1:29:31", "remaining_time": "2:53:07", "throughput": 8753.27, "total_tokens": 47016928} +{"current_steps": 69765, "total_steps": 204665, "loss": 0.1261, "lr": 1.666877356866665e-06, "epoch": 1.7043705567634917, "percentage": 34.09, "elapsed_time": "1:29:31", "remaining_time": "2:53:06", "throughput": 8753.34, "total_tokens": 47020320} +{"current_steps": 69770, "total_steps": 204665, "loss": 0.03, "lr": 1.6668138083469756e-06, "epoch": 1.7044927075953389, "percentage": 34.09, "elapsed_time": "1:29:32", "remaining_time": "2:53:06", "throughput": 8753.39, "total_tokens": 47023648} +{"current_steps": 69775, "total_steps": 204665, "loss": 0.2081, "lr": 1.666750254978037e-06, "epoch": 1.7046148584271859, "percentage": 34.09, "elapsed_time": "1:29:32", "remaining_time": "2:53:06", "throughput": 8753.51, "total_tokens": 47027360} +{"current_steps": 69780, "total_steps": 204665, "loss": 0.004, "lr": 1.6666866967603113e-06, "epoch": 1.704737009259033, "percentage": 34.09, "elapsed_time": "1:29:32", "remaining_time": "2:53:05", "throughput": 8753.63, "total_tokens": 47031200} +{"current_steps": 69785, "total_steps": 204665, "loss": 0.0831, "lr": 1.6666231336942604e-06, "epoch": 1.7048591600908802, "percentage": 34.1, "elapsed_time": "1:29:33", "remaining_time": "2:53:05", "throughput": 8753.74, "total_tokens": 47034848} +{"current_steps": 69790, "total_steps": 204665, "loss": 0.0354, "lr": 1.666559565780347e-06, "epoch": 1.7049813109227274, "percentage": 34.1, "elapsed_time": "1:29:33", "remaining_time": "2:53:04", "throughput": 8753.84, "total_tokens": 47038496} +{"current_steps": 69795, "total_steps": 204665, "loss": 0.0632, "lr": 1.666495993019033e-06, "epoch": 1.7051034617545744, "percentage": 34.1, "elapsed_time": "1:29:33", "remaining_time": "2:53:04", "throughput": 8753.89, "total_tokens": 47041824} +{"current_steps": 69800, "total_steps": 204665, "loss": 0.0357, "lr": 1.6664324154107807e-06, "epoch": 1.7052256125864216, "percentage": 34.1, "elapsed_time": "1:29:34", "remaining_time": "2:53:03", "throughput": 8753.93, "total_tokens": 47045088} +{"current_steps": 69805, "total_steps": 204665, "loss": 0.1022, "lr": 1.666368832956053e-06, "epoch": 1.7053477634182688, "percentage": 34.11, "elapsed_time": "1:29:34", "remaining_time": "2:53:03", "throughput": 8753.98, "total_tokens": 47048416} +{"current_steps": 69810, "total_steps": 204665, "loss": 0.0911, "lr": 1.666305245655312e-06, "epoch": 1.705469914250116, "percentage": 34.11, "elapsed_time": "1:29:34", "remaining_time": "2:53:02", "throughput": 8754.0, "total_tokens": 47051488} +{"current_steps": 69815, "total_steps": 204665, "loss": 0.2176, "lr": 1.6662416535090196e-06, "epoch": 1.7055920650819631, "percentage": 34.11, "elapsed_time": "1:29:35", "remaining_time": "2:53:02", "throughput": 8754.07, "total_tokens": 47054944} +{"current_steps": 69820, "total_steps": 204665, "loss": 0.1212, "lr": 1.6661780565176388e-06, "epoch": 1.7057142159138103, "percentage": 34.11, "elapsed_time": "1:29:35", "remaining_time": "2:53:01", "throughput": 8754.11, "total_tokens": 47058208} +{"current_steps": 69825, "total_steps": 204665, "loss": 0.0909, "lr": 1.6661144546816321e-06, "epoch": 1.7058363667456575, "percentage": 34.12, "elapsed_time": "1:29:35", "remaining_time": "2:53:01", "throughput": 8754.2, "total_tokens": 47061728} +{"current_steps": 69830, "total_steps": 204665, "loss": 0.0594, "lr": 1.6660508480014618e-06, "epoch": 1.7059585175775047, "percentage": 34.12, "elapsed_time": "1:29:36", "remaining_time": "2:53:01", "throughput": 8754.23, "total_tokens": 47064864} +{"current_steps": 69835, "total_steps": 204665, "loss": 0.0653, "lr": 1.665987236477591e-06, "epoch": 1.706080668409352, "percentage": 34.12, "elapsed_time": "1:29:36", "remaining_time": "2:53:00", "throughput": 8754.24, "total_tokens": 47067936} +{"current_steps": 69840, "total_steps": 204665, "loss": 0.1373, "lr": 1.6659236201104814e-06, "epoch": 1.706202819241199, "percentage": 34.12, "elapsed_time": "1:29:36", "remaining_time": "2:53:00", "throughput": 8754.29, "total_tokens": 47071200} +{"current_steps": 69845, "total_steps": 204665, "loss": 0.1086, "lr": 1.665859998900596e-06, "epoch": 1.7063249700730463, "percentage": 34.13, "elapsed_time": "1:29:37", "remaining_time": "2:52:59", "throughput": 8754.42, "total_tokens": 47075040} +{"current_steps": 69850, "total_steps": 204665, "loss": 0.1358, "lr": 1.6657963728483981e-06, "epoch": 1.7064471209048935, "percentage": 34.13, "elapsed_time": "1:29:37", "remaining_time": "2:52:59", "throughput": 8754.56, "total_tokens": 47078944} +{"current_steps": 69855, "total_steps": 204665, "loss": 0.1373, "lr": 1.6657327419543496e-06, "epoch": 1.7065692717367407, "percentage": 34.13, "elapsed_time": "1:29:37", "remaining_time": "2:52:58", "throughput": 8754.56, "total_tokens": 47081952} +{"current_steps": 69860, "total_steps": 204665, "loss": 0.0671, "lr": 1.665669106218914e-06, "epoch": 1.7066914225685879, "percentage": 34.13, "elapsed_time": "1:29:38", "remaining_time": "2:52:58", "throughput": 8754.64, "total_tokens": 47085408} +{"current_steps": 69865, "total_steps": 204665, "loss": 0.0399, "lr": 1.665605465642553e-06, "epoch": 1.7068135734004348, "percentage": 34.14, "elapsed_time": "1:29:38", "remaining_time": "2:52:57", "throughput": 8754.82, "total_tokens": 47089568} +{"current_steps": 69870, "total_steps": 204665, "loss": 0.033, "lr": 1.6655418202257305e-06, "epoch": 1.706935724232282, "percentage": 34.14, "elapsed_time": "1:29:39", "remaining_time": "2:52:57", "throughput": 8754.89, "total_tokens": 47093024} +{"current_steps": 69875, "total_steps": 204665, "loss": 0.0416, "lr": 1.6654781699689086e-06, "epoch": 1.7070578750641292, "percentage": 34.14, "elapsed_time": "1:29:39", "remaining_time": "2:52:56", "throughput": 8754.97, "total_tokens": 47096544} +{"current_steps": 69880, "total_steps": 204665, "loss": 0.0335, "lr": 1.6654145148725506e-06, "epoch": 1.7071800258959764, "percentage": 34.14, "elapsed_time": "1:29:39", "remaining_time": "2:52:56", "throughput": 8755.07, "total_tokens": 47100128} +{"current_steps": 69885, "total_steps": 204665, "loss": 0.0633, "lr": 1.665350854937119e-06, "epoch": 1.7073021767278234, "percentage": 34.15, "elapsed_time": "1:29:40", "remaining_time": "2:52:56", "throughput": 8755.18, "total_tokens": 47103840} +{"current_steps": 69890, "total_steps": 204665, "loss": 0.2042, "lr": 1.6652871901630772e-06, "epoch": 1.7074243275596706, "percentage": 34.15, "elapsed_time": "1:29:40", "remaining_time": "2:52:55", "throughput": 8755.21, "total_tokens": 47107040} +{"current_steps": 69895, "total_steps": 204665, "loss": 0.111, "lr": 1.665223520550888e-06, "epoch": 1.7075464783915177, "percentage": 34.15, "elapsed_time": "1:29:40", "remaining_time": "2:52:55", "throughput": 8755.29, "total_tokens": 47110560} +{"current_steps": 69900, "total_steps": 204665, "loss": 0.0022, "lr": 1.6651598461010146e-06, "epoch": 1.707668629223365, "percentage": 34.15, "elapsed_time": "1:29:41", "remaining_time": "2:52:54", "throughput": 8755.36, "total_tokens": 47114016} +{"current_steps": 69905, "total_steps": 204665, "loss": 0.0567, "lr": 1.6650961668139197e-06, "epoch": 1.7077907800552121, "percentage": 34.16, "elapsed_time": "1:29:41", "remaining_time": "2:52:54", "throughput": 8755.33, "total_tokens": 47116704} +{"current_steps": 69910, "total_steps": 204665, "loss": 0.0021, "lr": 1.6650324826900666e-06, "epoch": 1.7079129308870593, "percentage": 34.16, "elapsed_time": "1:29:41", "remaining_time": "2:52:53", "throughput": 8755.41, "total_tokens": 47120288} +{"current_steps": 69915, "total_steps": 204665, "loss": 0.0011, "lr": 1.6649687937299183e-06, "epoch": 1.7080350817189065, "percentage": 34.16, "elapsed_time": "1:29:42", "remaining_time": "2:52:53", "throughput": 8755.4, "total_tokens": 47123168} +{"current_steps": 69920, "total_steps": 204665, "loss": 0.0612, "lr": 1.6649050999339382e-06, "epoch": 1.7081572325507537, "percentage": 34.16, "elapsed_time": "1:29:42", "remaining_time": "2:52:52", "throughput": 8755.43, "total_tokens": 47126304} +{"current_steps": 69925, "total_steps": 204665, "loss": 0.1222, "lr": 1.6648414013025895e-06, "epoch": 1.7082793833826009, "percentage": 34.17, "elapsed_time": "1:29:42", "remaining_time": "2:52:52", "throughput": 8755.54, "total_tokens": 47130016} +{"current_steps": 69930, "total_steps": 204665, "loss": 0.0545, "lr": 1.6647776978363354e-06, "epoch": 1.708401534214448, "percentage": 34.17, "elapsed_time": "1:29:43", "remaining_time": "2:52:51", "throughput": 8755.65, "total_tokens": 47133728} +{"current_steps": 69935, "total_steps": 204665, "loss": 0.0524, "lr": 1.6647139895356388e-06, "epoch": 1.7085236850462953, "percentage": 34.17, "elapsed_time": "1:29:43", "remaining_time": "2:52:51", "throughput": 8755.7, "total_tokens": 47137056} +{"current_steps": 69940, "total_steps": 204665, "loss": 0.0917, "lr": 1.6646502764009633e-06, "epoch": 1.7086458358781424, "percentage": 34.17, "elapsed_time": "1:29:43", "remaining_time": "2:52:51", "throughput": 8755.77, "total_tokens": 47140512} +{"current_steps": 69945, "total_steps": 204665, "loss": 0.0343, "lr": 1.6645865584327723e-06, "epoch": 1.7087679867099896, "percentage": 34.18, "elapsed_time": "1:29:44", "remaining_time": "2:52:50", "throughput": 8755.84, "total_tokens": 47143904} +{"current_steps": 69950, "total_steps": 204665, "loss": 0.1482, "lr": 1.664522835631529e-06, "epoch": 1.7088901375418366, "percentage": 34.18, "elapsed_time": "1:29:44", "remaining_time": "2:52:50", "throughput": 8755.9, "total_tokens": 47147296} +{"current_steps": 69955, "total_steps": 204665, "loss": 0.1285, "lr": 1.6644591079976971e-06, "epoch": 1.7090122883736838, "percentage": 34.18, "elapsed_time": "1:29:44", "remaining_time": "2:52:49", "throughput": 8755.91, "total_tokens": 47150304} +{"current_steps": 69960, "total_steps": 204665, "loss": 0.0495, "lr": 1.6643953755317397e-06, "epoch": 1.709134439205531, "percentage": 34.18, "elapsed_time": "1:29:45", "remaining_time": "2:52:49", "throughput": 8756.0, "total_tokens": 47153888} +{"current_steps": 69965, "total_steps": 204665, "loss": 0.0723, "lr": 1.6643316382341204e-06, "epoch": 1.7092565900373782, "percentage": 34.19, "elapsed_time": "1:29:45", "remaining_time": "2:52:48", "throughput": 8756.06, "total_tokens": 47157280} +{"current_steps": 69970, "total_steps": 204665, "loss": 0.1263, "lr": 1.664267896105303e-06, "epoch": 1.7093787408692254, "percentage": 34.19, "elapsed_time": "1:29:46", "remaining_time": "2:52:48", "throughput": 8756.12, "total_tokens": 47160608} +{"current_steps": 69975, "total_steps": 204665, "loss": 0.1161, "lr": 1.6642041491457507e-06, "epoch": 1.7095008917010723, "percentage": 34.19, "elapsed_time": "1:29:46", "remaining_time": "2:52:47", "throughput": 8756.16, "total_tokens": 47163808} +{"current_steps": 69980, "total_steps": 204665, "loss": 0.1276, "lr": 1.6641403973559268e-06, "epoch": 1.7096230425329195, "percentage": 34.19, "elapsed_time": "1:29:46", "remaining_time": "2:52:47", "throughput": 8756.21, "total_tokens": 47167072} +{"current_steps": 69985, "total_steps": 204665, "loss": 0.0363, "lr": 1.6640766407362955e-06, "epoch": 1.7097451933647667, "percentage": 34.19, "elapsed_time": "1:29:47", "remaining_time": "2:52:46", "throughput": 8756.29, "total_tokens": 47170592} +{"current_steps": 69990, "total_steps": 204665, "loss": 0.0511, "lr": 1.6640128792873205e-06, "epoch": 1.709867344196614, "percentage": 34.2, "elapsed_time": "1:29:47", "remaining_time": "2:52:46", "throughput": 8756.4, "total_tokens": 47174304} +{"current_steps": 69995, "total_steps": 204665, "loss": 0.1105, "lr": 1.663949113009465e-06, "epoch": 1.709989495028461, "percentage": 34.2, "elapsed_time": "1:29:47", "remaining_time": "2:52:46", "throughput": 8756.45, "total_tokens": 47177632} +{"current_steps": 70000, "total_steps": 204665, "loss": 0.0849, "lr": 1.663885341903193e-06, "epoch": 1.7101116458603083, "percentage": 34.2, "elapsed_time": "1:29:48", "remaining_time": "2:52:45", "throughput": 8756.56, "total_tokens": 47181280} +{"current_steps": 70005, "total_steps": 204665, "loss": 0.0892, "lr": 1.6638215659689683e-06, "epoch": 1.7102337966921555, "percentage": 34.2, "elapsed_time": "1:29:48", "remaining_time": "2:52:45", "throughput": 8756.56, "total_tokens": 47184288} +{"current_steps": 70010, "total_steps": 204665, "loss": 0.0731, "lr": 1.6637577852072547e-06, "epoch": 1.7103559475240027, "percentage": 34.21, "elapsed_time": "1:29:48", "remaining_time": "2:52:44", "throughput": 8756.65, "total_tokens": 47187872} +{"current_steps": 70015, "total_steps": 204665, "loss": 0.0885, "lr": 1.6636939996185157e-06, "epoch": 1.7104780983558499, "percentage": 34.21, "elapsed_time": "1:29:49", "remaining_time": "2:52:44", "throughput": 8756.71, "total_tokens": 47191200} +{"current_steps": 70020, "total_steps": 204665, "loss": 0.256, "lr": 1.6636302092032155e-06, "epoch": 1.710600249187697, "percentage": 34.21, "elapsed_time": "1:29:49", "remaining_time": "2:52:43", "throughput": 8756.77, "total_tokens": 47194592} +{"current_steps": 70025, "total_steps": 204665, "loss": 0.0816, "lr": 1.6635664139618183e-06, "epoch": 1.7107224000195442, "percentage": 34.21, "elapsed_time": "1:29:49", "remaining_time": "2:52:43", "throughput": 8756.8, "total_tokens": 47197792} +{"current_steps": 70030, "total_steps": 204665, "loss": 0.0045, "lr": 1.6635026138947873e-06, "epoch": 1.7108445508513914, "percentage": 34.22, "elapsed_time": "1:29:50", "remaining_time": "2:52:42", "throughput": 8756.86, "total_tokens": 47201120} +{"current_steps": 70035, "total_steps": 204665, "loss": 0.001, "lr": 1.6634388090025867e-06, "epoch": 1.7109667016832386, "percentage": 34.22, "elapsed_time": "1:29:50", "remaining_time": "2:52:42", "throughput": 8756.96, "total_tokens": 47204768} +{"current_steps": 70040, "total_steps": 204665, "loss": 0.0746, "lr": 1.663374999285681e-06, "epoch": 1.7110888525150856, "percentage": 34.22, "elapsed_time": "1:29:50", "remaining_time": "2:52:41", "throughput": 8757.02, "total_tokens": 47208160} +{"current_steps": 70045, "total_steps": 204665, "loss": 0.0009, "lr": 1.6633111847445336e-06, "epoch": 1.7112110033469328, "percentage": 34.22, "elapsed_time": "1:29:51", "remaining_time": "2:52:41", "throughput": 8757.08, "total_tokens": 47211552} +{"current_steps": 70050, "total_steps": 204665, "loss": 0.0466, "lr": 1.6632473653796088e-06, "epoch": 1.71133315417878, "percentage": 34.23, "elapsed_time": "1:29:51", "remaining_time": "2:52:41", "throughput": 8757.12, "total_tokens": 47214752} +{"current_steps": 70055, "total_steps": 204665, "loss": 0.1216, "lr": 1.6631835411913713e-06, "epoch": 1.7114553050106271, "percentage": 34.23, "elapsed_time": "1:29:51", "remaining_time": "2:52:40", "throughput": 8757.21, "total_tokens": 47218336} +{"current_steps": 70060, "total_steps": 204665, "loss": 0.084, "lr": 1.6631197121802843e-06, "epoch": 1.7115774558424743, "percentage": 34.23, "elapsed_time": "1:29:52", "remaining_time": "2:52:40", "throughput": 8757.3, "total_tokens": 47221920} +{"current_steps": 70065, "total_steps": 204665, "loss": 0.0601, "lr": 1.6630558783468122e-06, "epoch": 1.7116996066743213, "percentage": 34.23, "elapsed_time": "1:29:52", "remaining_time": "2:52:39", "throughput": 8757.33, "total_tokens": 47225056} +{"current_steps": 70070, "total_steps": 204665, "loss": 0.0522, "lr": 1.66299203969142e-06, "epoch": 1.7118217575061685, "percentage": 34.24, "elapsed_time": "1:29:52", "remaining_time": "2:52:39", "throughput": 8757.44, "total_tokens": 47228768} +{"current_steps": 70075, "total_steps": 204665, "loss": 0.0836, "lr": 1.6629281962145706e-06, "epoch": 1.7119439083380157, "percentage": 34.24, "elapsed_time": "1:29:53", "remaining_time": "2:52:38", "throughput": 8757.46, "total_tokens": 47231904} +{"current_steps": 70080, "total_steps": 204665, "loss": 0.0383, "lr": 1.6628643479167297e-06, "epoch": 1.7120660591698629, "percentage": 34.24, "elapsed_time": "1:29:53", "remaining_time": "2:52:38", "throughput": 8757.6, "total_tokens": 47235808} +{"current_steps": 70085, "total_steps": 204665, "loss": 0.0637, "lr": 1.6628004947983606e-06, "epoch": 1.71218821000171, "percentage": 34.24, "elapsed_time": "1:29:54", "remaining_time": "2:52:37", "throughput": 8757.63, "total_tokens": 47239008} +{"current_steps": 70090, "total_steps": 204665, "loss": 0.0009, "lr": 1.6627366368599285e-06, "epoch": 1.7123103608335573, "percentage": 34.25, "elapsed_time": "1:29:54", "remaining_time": "2:52:37", "throughput": 8757.68, "total_tokens": 47242336} +{"current_steps": 70095, "total_steps": 204665, "loss": 0.071, "lr": 1.6626727741018967e-06, "epoch": 1.7124325116654044, "percentage": 34.25, "elapsed_time": "1:29:54", "remaining_time": "2:52:36", "throughput": 8757.7, "total_tokens": 47245472} +{"current_steps": 70100, "total_steps": 204665, "loss": 0.0495, "lr": 1.6626089065247306e-06, "epoch": 1.7125546624972516, "percentage": 34.25, "elapsed_time": "1:29:55", "remaining_time": "2:52:36", "throughput": 8757.73, "total_tokens": 47248608} +{"current_steps": 70105, "total_steps": 204665, "loss": 0.157, "lr": 1.6625450341288943e-06, "epoch": 1.7126768133290988, "percentage": 34.25, "elapsed_time": "1:29:55", "remaining_time": "2:52:36", "throughput": 8757.85, "total_tokens": 47252384} +{"current_steps": 70110, "total_steps": 204665, "loss": 0.0948, "lr": 1.6624811569148523e-06, "epoch": 1.712798964160946, "percentage": 34.26, "elapsed_time": "1:29:55", "remaining_time": "2:52:35", "throughput": 8757.87, "total_tokens": 47255456} +{"current_steps": 70115, "total_steps": 204665, "loss": 0.0011, "lr": 1.662417274883069e-06, "epoch": 1.7129211149927932, "percentage": 34.26, "elapsed_time": "1:29:56", "remaining_time": "2:52:35", "throughput": 8757.9, "total_tokens": 47258656} +{"current_steps": 70120, "total_steps": 204665, "loss": 0.0389, "lr": 1.6623533880340093e-06, "epoch": 1.7130432658246404, "percentage": 34.26, "elapsed_time": "1:29:56", "remaining_time": "2:52:34", "throughput": 8757.95, "total_tokens": 47261984} +{"current_steps": 70125, "total_steps": 204665, "loss": 0.1023, "lr": 1.6622894963681376e-06, "epoch": 1.7131654166564876, "percentage": 34.26, "elapsed_time": "1:29:56", "remaining_time": "2:52:34", "throughput": 8758.03, "total_tokens": 47265440} +{"current_steps": 70130, "total_steps": 204665, "loss": 0.1753, "lr": 1.6622255998859183e-06, "epoch": 1.7132875674883346, "percentage": 34.27, "elapsed_time": "1:29:57", "remaining_time": "2:52:33", "throughput": 8758.04, "total_tokens": 47268512} +{"current_steps": 70135, "total_steps": 204665, "loss": 0.1303, "lr": 1.6621616985878166e-06, "epoch": 1.7134097183201817, "percentage": 34.27, "elapsed_time": "1:29:57", "remaining_time": "2:52:33", "throughput": 8758.13, "total_tokens": 47272096} +{"current_steps": 70140, "total_steps": 204665, "loss": 0.1832, "lr": 1.6620977924742967e-06, "epoch": 1.713531869152029, "percentage": 34.27, "elapsed_time": "1:29:57", "remaining_time": "2:52:32", "throughput": 8758.14, "total_tokens": 47275168} +{"current_steps": 70145, "total_steps": 204665, "loss": 0.0486, "lr": 1.6620338815458237e-06, "epoch": 1.7136540199838761, "percentage": 34.27, "elapsed_time": "1:29:58", "remaining_time": "2:52:32", "throughput": 8758.22, "total_tokens": 47278624} +{"current_steps": 70150, "total_steps": 204665, "loss": 0.0282, "lr": 1.661969965802862e-06, "epoch": 1.7137761708157233, "percentage": 34.28, "elapsed_time": "1:29:58", "remaining_time": "2:52:31", "throughput": 8758.24, "total_tokens": 47281760} +{"current_steps": 70155, "total_steps": 204665, "loss": 0.0466, "lr": 1.6619060452458773e-06, "epoch": 1.7138983216475703, "percentage": 34.28, "elapsed_time": "1:29:58", "remaining_time": "2:52:31", "throughput": 8758.34, "total_tokens": 47285408} +{"current_steps": 70160, "total_steps": 204665, "loss": 0.1555, "lr": 1.661842119875333e-06, "epoch": 1.7140204724794175, "percentage": 34.28, "elapsed_time": "1:29:59", "remaining_time": "2:52:31", "throughput": 8758.51, "total_tokens": 47289504} +{"current_steps": 70165, "total_steps": 204665, "loss": 0.038, "lr": 1.6617781896916955e-06, "epoch": 1.7141426233112647, "percentage": 34.28, "elapsed_time": "1:29:59", "remaining_time": "2:52:30", "throughput": 8758.57, "total_tokens": 47292832} +{"current_steps": 70170, "total_steps": 204665, "loss": 0.0519, "lr": 1.6617142546954286e-06, "epoch": 1.7142647741431118, "percentage": 34.29, "elapsed_time": "1:29:59", "remaining_time": "2:52:30", "throughput": 8758.61, "total_tokens": 47296096} +{"current_steps": 70175, "total_steps": 204665, "loss": 0.1749, "lr": 1.6616503148869977e-06, "epoch": 1.714386924974959, "percentage": 34.29, "elapsed_time": "1:30:00", "remaining_time": "2:52:29", "throughput": 8758.66, "total_tokens": 47299424} +{"current_steps": 70180, "total_steps": 204665, "loss": 0.0288, "lr": 1.661586370266868e-06, "epoch": 1.7145090758068062, "percentage": 34.29, "elapsed_time": "1:30:00", "remaining_time": "2:52:29", "throughput": 8758.72, "total_tokens": 47302752} +{"current_steps": 70185, "total_steps": 204665, "loss": 0.0801, "lr": 1.661522420835504e-06, "epoch": 1.7146312266386534, "percentage": 34.29, "elapsed_time": "1:30:00", "remaining_time": "2:52:28", "throughput": 8758.77, "total_tokens": 47306080} +{"current_steps": 70190, "total_steps": 204665, "loss": 0.1098, "lr": 1.6614584665933711e-06, "epoch": 1.7147533774705006, "percentage": 34.3, "elapsed_time": "1:30:01", "remaining_time": "2:52:28", "throughput": 8758.84, "total_tokens": 47309536} +{"current_steps": 70195, "total_steps": 204665, "loss": 0.007, "lr": 1.661394507540934e-06, "epoch": 1.7148755283023478, "percentage": 34.3, "elapsed_time": "1:30:01", "remaining_time": "2:52:27", "throughput": 8758.89, "total_tokens": 47312864} +{"current_steps": 70200, "total_steps": 204665, "loss": 0.173, "lr": 1.661330543678659e-06, "epoch": 1.714997679134195, "percentage": 34.3, "elapsed_time": "1:30:02", "remaining_time": "2:52:27", "throughput": 8758.94, "total_tokens": 47316128} +{"current_steps": 70205, "total_steps": 204665, "loss": 0.1017, "lr": 1.6612665750070097e-06, "epoch": 1.7151198299660422, "percentage": 34.3, "elapsed_time": "1:30:02", "remaining_time": "2:52:26", "throughput": 8758.99, "total_tokens": 47319456} +{"current_steps": 70210, "total_steps": 204665, "loss": 0.0431, "lr": 1.6612026015264522e-06, "epoch": 1.7152419807978894, "percentage": 34.3, "elapsed_time": "1:30:02", "remaining_time": "2:52:26", "throughput": 8759.03, "total_tokens": 47322720} +{"current_steps": 70215, "total_steps": 204665, "loss": 0.0533, "lr": 1.6611386232374516e-06, "epoch": 1.7153641316297366, "percentage": 34.31, "elapsed_time": "1:30:03", "remaining_time": "2:52:25", "throughput": 8759.05, "total_tokens": 47325792} +{"current_steps": 70220, "total_steps": 204665, "loss": 0.0897, "lr": 1.6610746401404728e-06, "epoch": 1.7154862824615835, "percentage": 34.31, "elapsed_time": "1:30:03", "remaining_time": "2:52:25", "throughput": 8759.1, "total_tokens": 47329056} +{"current_steps": 70225, "total_steps": 204665, "loss": 0.0649, "lr": 1.6610106522359816e-06, "epoch": 1.7156084332934307, "percentage": 34.31, "elapsed_time": "1:30:03", "remaining_time": "2:52:25", "throughput": 8759.09, "total_tokens": 47331936} +{"current_steps": 70230, "total_steps": 204665, "loss": 0.0611, "lr": 1.6609466595244432e-06, "epoch": 1.715730584125278, "percentage": 34.31, "elapsed_time": "1:30:04", "remaining_time": "2:52:24", "throughput": 8759.09, "total_tokens": 47334944} +{"current_steps": 70235, "total_steps": 204665, "loss": 0.034, "lr": 1.660882662006323e-06, "epoch": 1.715852734957125, "percentage": 34.32, "elapsed_time": "1:30:04", "remaining_time": "2:52:24", "throughput": 8759.14, "total_tokens": 47338208} +{"current_steps": 70240, "total_steps": 204665, "loss": 0.0561, "lr": 1.6608186596820863e-06, "epoch": 1.715974885788972, "percentage": 34.32, "elapsed_time": "1:30:04", "remaining_time": "2:52:23", "throughput": 8759.18, "total_tokens": 47341472} +{"current_steps": 70245, "total_steps": 204665, "loss": 0.0466, "lr": 1.6607546525521984e-06, "epoch": 1.7160970366208192, "percentage": 34.32, "elapsed_time": "1:30:05", "remaining_time": "2:52:23", "throughput": 8759.19, "total_tokens": 47344480} +{"current_steps": 70250, "total_steps": 204665, "loss": 0.0434, "lr": 1.660690640617125e-06, "epoch": 1.7162191874526664, "percentage": 34.32, "elapsed_time": "1:30:05", "remaining_time": "2:52:22", "throughput": 8759.25, "total_tokens": 47347872} +{"current_steps": 70255, "total_steps": 204665, "loss": 0.1255, "lr": 1.6606266238773317e-06, "epoch": 1.7163413382845136, "percentage": 34.33, "elapsed_time": "1:30:05", "remaining_time": "2:52:22", "throughput": 8759.32, "total_tokens": 47351328} +{"current_steps": 70260, "total_steps": 204665, "loss": 0.0415, "lr": 1.6605626023332836e-06, "epoch": 1.7164634891163608, "percentage": 34.33, "elapsed_time": "1:30:06", "remaining_time": "2:52:21", "throughput": 8759.36, "total_tokens": 47354592} +{"current_steps": 70265, "total_steps": 204665, "loss": 0.1191, "lr": 1.660498575985447e-06, "epoch": 1.716585639948208, "percentage": 34.33, "elapsed_time": "1:30:06", "remaining_time": "2:52:21", "throughput": 8759.57, "total_tokens": 47358944} +{"current_steps": 70270, "total_steps": 204665, "loss": 0.139, "lr": 1.660434544834287e-06, "epoch": 1.7167077907800552, "percentage": 34.33, "elapsed_time": "1:30:06", "remaining_time": "2:52:20", "throughput": 8759.59, "total_tokens": 47362080} +{"current_steps": 70275, "total_steps": 204665, "loss": 0.0686, "lr": 1.6603705088802692e-06, "epoch": 1.7168299416119024, "percentage": 34.34, "elapsed_time": "1:30:07", "remaining_time": "2:52:20", "throughput": 8759.62, "total_tokens": 47365280} +{"current_steps": 70280, "total_steps": 204665, "loss": 0.1732, "lr": 1.6603064681238595e-06, "epoch": 1.7169520924437496, "percentage": 34.34, "elapsed_time": "1:30:07", "remaining_time": "2:52:20", "throughput": 8759.67, "total_tokens": 47368544} +{"current_steps": 70285, "total_steps": 204665, "loss": 0.096, "lr": 1.6602424225655236e-06, "epoch": 1.7170742432755968, "percentage": 34.34, "elapsed_time": "1:30:07", "remaining_time": "2:52:19", "throughput": 8759.68, "total_tokens": 47371616} +{"current_steps": 70290, "total_steps": 204665, "loss": 0.0898, "lr": 1.6601783722057273e-06, "epoch": 1.717196394107444, "percentage": 34.34, "elapsed_time": "1:30:08", "remaining_time": "2:52:19", "throughput": 8759.79, "total_tokens": 47375328} +{"current_steps": 70295, "total_steps": 204665, "loss": 0.0325, "lr": 1.660114317044936e-06, "epoch": 1.7173185449392911, "percentage": 34.35, "elapsed_time": "1:30:08", "remaining_time": "2:52:18", "throughput": 8759.83, "total_tokens": 47378592} +{"current_steps": 70300, "total_steps": 204665, "loss": 0.1035, "lr": 1.6600502570836162e-06, "epoch": 1.7174406957711383, "percentage": 34.35, "elapsed_time": "1:30:08", "remaining_time": "2:52:18", "throughput": 8759.85, "total_tokens": 47381664} +{"current_steps": 70305, "total_steps": 204665, "loss": 0.0312, "lr": 1.6599861923222332e-06, "epoch": 1.7175628466029855, "percentage": 34.35, "elapsed_time": "1:30:09", "remaining_time": "2:52:17", "throughput": 8759.88, "total_tokens": 47384864} +{"current_steps": 70310, "total_steps": 204665, "loss": 0.1744, "lr": 1.659922122761253e-06, "epoch": 1.7176849974348325, "percentage": 34.35, "elapsed_time": "1:30:09", "remaining_time": "2:52:17", "throughput": 8759.95, "total_tokens": 47388320} +{"current_steps": 70315, "total_steps": 204665, "loss": 0.0006, "lr": 1.6598580484011415e-06, "epoch": 1.7178071482666797, "percentage": 34.36, "elapsed_time": "1:30:10", "remaining_time": "2:52:16", "throughput": 8760.12, "total_tokens": 47392416} +{"current_steps": 70320, "total_steps": 204665, "loss": 0.1155, "lr": 1.659793969242365e-06, "epoch": 1.7179292990985269, "percentage": 34.36, "elapsed_time": "1:30:10", "remaining_time": "2:52:16", "throughput": 8760.19, "total_tokens": 47395872} +{"current_steps": 70325, "total_steps": 204665, "loss": 0.0345, "lr": 1.6597298852853894e-06, "epoch": 1.718051449930374, "percentage": 34.36, "elapsed_time": "1:30:10", "remaining_time": "2:52:15", "throughput": 8760.25, "total_tokens": 47399264} +{"current_steps": 70330, "total_steps": 204665, "loss": 0.1316, "lr": 1.6596657965306807e-06, "epoch": 1.718173600762221, "percentage": 34.36, "elapsed_time": "1:30:11", "remaining_time": "2:52:15", "throughput": 8760.27, "total_tokens": 47402400} +{"current_steps": 70335, "total_steps": 204665, "loss": 0.0012, "lr": 1.6596017029787048e-06, "epoch": 1.7182957515940682, "percentage": 34.37, "elapsed_time": "1:30:11", "remaining_time": "2:52:15", "throughput": 8760.3, "total_tokens": 47405600} +{"current_steps": 70340, "total_steps": 204665, "loss": 0.151, "lr": 1.6595376046299276e-06, "epoch": 1.7184179024259154, "percentage": 34.37, "elapsed_time": "1:30:11", "remaining_time": "2:52:14", "throughput": 8760.35, "total_tokens": 47408928} +{"current_steps": 70345, "total_steps": 204665, "loss": 0.1138, "lr": 1.6594735014848161e-06, "epoch": 1.7185400532577626, "percentage": 34.37, "elapsed_time": "1:30:12", "remaining_time": "2:52:14", "throughput": 8760.45, "total_tokens": 47412576} +{"current_steps": 70350, "total_steps": 204665, "loss": 0.0559, "lr": 1.6594093935438354e-06, "epoch": 1.7186622040896098, "percentage": 34.37, "elapsed_time": "1:30:12", "remaining_time": "2:52:13", "throughput": 8760.53, "total_tokens": 47416032} +{"current_steps": 70355, "total_steps": 204665, "loss": 0.0006, "lr": 1.6593452808074524e-06, "epoch": 1.718784354921457, "percentage": 34.38, "elapsed_time": "1:30:12", "remaining_time": "2:52:13", "throughput": 8760.53, "total_tokens": 47419040} +{"current_steps": 70360, "total_steps": 204665, "loss": 0.0244, "lr": 1.6592811632761335e-06, "epoch": 1.7189065057533042, "percentage": 34.38, "elapsed_time": "1:30:13", "remaining_time": "2:52:12", "throughput": 8760.59, "total_tokens": 47422432} +{"current_steps": 70365, "total_steps": 204665, "loss": 0.0547, "lr": 1.6592170409503444e-06, "epoch": 1.7190286565851514, "percentage": 34.38, "elapsed_time": "1:30:13", "remaining_time": "2:52:12", "throughput": 8760.67, "total_tokens": 47425888} +{"current_steps": 70370, "total_steps": 204665, "loss": 0.0812, "lr": 1.6591529138305515e-06, "epoch": 1.7191508074169985, "percentage": 34.38, "elapsed_time": "1:30:13", "remaining_time": "2:52:11", "throughput": 8760.72, "total_tokens": 47429216} +{"current_steps": 70375, "total_steps": 204665, "loss": 0.1318, "lr": 1.6590887819172215e-06, "epoch": 1.7192729582488457, "percentage": 34.39, "elapsed_time": "1:30:14", "remaining_time": "2:52:11", "throughput": 8760.81, "total_tokens": 47432800} +{"current_steps": 70380, "total_steps": 204665, "loss": 0.156, "lr": 1.6590246452108206e-06, "epoch": 1.719395109080693, "percentage": 34.39, "elapsed_time": "1:30:14", "remaining_time": "2:52:10", "throughput": 8760.92, "total_tokens": 47436512} +{"current_steps": 70385, "total_steps": 204665, "loss": 0.2372, "lr": 1.6589605037118153e-06, "epoch": 1.7195172599125401, "percentage": 34.39, "elapsed_time": "1:30:14", "remaining_time": "2:52:10", "throughput": 8761.02, "total_tokens": 47440160} +{"current_steps": 70390, "total_steps": 204665, "loss": 0.0403, "lr": 1.6588963574206719e-06, "epoch": 1.7196394107443873, "percentage": 34.39, "elapsed_time": "1:30:15", "remaining_time": "2:52:10", "throughput": 8761.06, "total_tokens": 47443424} +{"current_steps": 70395, "total_steps": 204665, "loss": 0.0508, "lr": 1.6588322063378567e-06, "epoch": 1.7197615615762345, "percentage": 34.4, "elapsed_time": "1:30:15", "remaining_time": "2:52:09", "throughput": 8761.13, "total_tokens": 47446880} +{"current_steps": 70400, "total_steps": 204665, "loss": 0.1788, "lr": 1.6587680504638368e-06, "epoch": 1.7198837124080815, "percentage": 34.4, "elapsed_time": "1:30:15", "remaining_time": "2:52:09", "throughput": 8761.2, "total_tokens": 47450272} +{"current_steps": 70405, "total_steps": 204665, "loss": 0.051, "lr": 1.6587038897990783e-06, "epoch": 1.7200058632399287, "percentage": 34.4, "elapsed_time": "1:30:16", "remaining_time": "2:52:08", "throughput": 8761.27, "total_tokens": 47453728} +{"current_steps": 70410, "total_steps": 204665, "loss": 0.0631, "lr": 1.6586397243440483e-06, "epoch": 1.7201280140717758, "percentage": 34.4, "elapsed_time": "1:30:16", "remaining_time": "2:52:08", "throughput": 8761.27, "total_tokens": 47456672} +{"current_steps": 70415, "total_steps": 204665, "loss": 0.1466, "lr": 1.6585755540992125e-06, "epoch": 1.720250164903623, "percentage": 34.41, "elapsed_time": "1:30:16", "remaining_time": "2:52:07", "throughput": 8761.31, "total_tokens": 47459936} +{"current_steps": 70420, "total_steps": 204665, "loss": 0.1025, "lr": 1.6585113790650388e-06, "epoch": 1.72037231573547, "percentage": 34.41, "elapsed_time": "1:30:17", "remaining_time": "2:52:07", "throughput": 8761.38, "total_tokens": 47463328} +{"current_steps": 70425, "total_steps": 204665, "loss": 0.0013, "lr": 1.6584471992419927e-06, "epoch": 1.7204944665673172, "percentage": 34.41, "elapsed_time": "1:30:17", "remaining_time": "2:52:06", "throughput": 8761.51, "total_tokens": 47467232} +{"current_steps": 70430, "total_steps": 204665, "loss": 0.0816, "lr": 1.6583830146305418e-06, "epoch": 1.7206166173991644, "percentage": 34.41, "elapsed_time": "1:30:18", "remaining_time": "2:52:06", "throughput": 8761.55, "total_tokens": 47470496} +{"current_steps": 70435, "total_steps": 204665, "loss": 0.0989, "lr": 1.6583188252311522e-06, "epoch": 1.7207387682310116, "percentage": 34.41, "elapsed_time": "1:30:18", "remaining_time": "2:52:05", "throughput": 8761.65, "total_tokens": 47474080} +{"current_steps": 70440, "total_steps": 204665, "loss": 0.1682, "lr": 1.6582546310442913e-06, "epoch": 1.7208609190628588, "percentage": 34.42, "elapsed_time": "1:30:18", "remaining_time": "2:52:05", "throughput": 8761.73, "total_tokens": 47477664} +{"current_steps": 70445, "total_steps": 204665, "loss": 0.0531, "lr": 1.6581904320704254e-06, "epoch": 1.720983069894706, "percentage": 34.42, "elapsed_time": "1:30:19", "remaining_time": "2:52:05", "throughput": 8761.84, "total_tokens": 47481376} +{"current_steps": 70450, "total_steps": 204665, "loss": 0.0985, "lr": 1.658126228310022e-06, "epoch": 1.7211052207265531, "percentage": 34.42, "elapsed_time": "1:30:19", "remaining_time": "2:52:04", "throughput": 8761.85, "total_tokens": 47484384} +{"current_steps": 70455, "total_steps": 204665, "loss": 0.0026, "lr": 1.6580620197635473e-06, "epoch": 1.7212273715584003, "percentage": 34.42, "elapsed_time": "1:30:19", "remaining_time": "2:52:04", "throughput": 8761.93, "total_tokens": 47487840} +{"current_steps": 70460, "total_steps": 204665, "loss": 0.0616, "lr": 1.6579978064314688e-06, "epoch": 1.7213495223902475, "percentage": 34.43, "elapsed_time": "1:30:20", "remaining_time": "2:52:03", "throughput": 8761.98, "total_tokens": 47491168} +{"current_steps": 70465, "total_steps": 204665, "loss": 0.0845, "lr": 1.6579335883142534e-06, "epoch": 1.7214716732220947, "percentage": 34.43, "elapsed_time": "1:30:20", "remaining_time": "2:52:03", "throughput": 8762.06, "total_tokens": 47494688} +{"current_steps": 70470, "total_steps": 204665, "loss": 0.0646, "lr": 1.6578693654123676e-06, "epoch": 1.721593824053942, "percentage": 34.43, "elapsed_time": "1:30:20", "remaining_time": "2:52:02", "throughput": 8762.13, "total_tokens": 47498080} +{"current_steps": 70475, "total_steps": 204665, "loss": 0.085, "lr": 1.6578051377262792e-06, "epoch": 1.721715974885789, "percentage": 34.43, "elapsed_time": "1:30:21", "remaining_time": "2:52:02", "throughput": 8762.13, "total_tokens": 47501088} +{"current_steps": 70480, "total_steps": 204665, "loss": 0.0571, "lr": 1.6577409052564545e-06, "epoch": 1.7218381257176363, "percentage": 34.44, "elapsed_time": "1:30:21", "remaining_time": "2:52:01", "throughput": 8762.19, "total_tokens": 47504480} +{"current_steps": 70485, "total_steps": 204665, "loss": 0.0518, "lr": 1.6576766680033613e-06, "epoch": 1.7219602765494832, "percentage": 34.44, "elapsed_time": "1:30:21", "remaining_time": "2:52:01", "throughput": 8762.36, "total_tokens": 47508576} +{"current_steps": 70490, "total_steps": 204665, "loss": 0.0763, "lr": 1.6576124259674667e-06, "epoch": 1.7220824273813304, "percentage": 34.44, "elapsed_time": "1:30:22", "remaining_time": "2:52:01", "throughput": 8762.4, "total_tokens": 47511840} +{"current_steps": 70495, "total_steps": 204665, "loss": 0.008, "lr": 1.6575481791492374e-06, "epoch": 1.7222045782131776, "percentage": 34.44, "elapsed_time": "1:30:22", "remaining_time": "2:52:00", "throughput": 8762.45, "total_tokens": 47515168} +{"current_steps": 70500, "total_steps": 204665, "loss": 0.1826, "lr": 1.657483927549141e-06, "epoch": 1.7223267290450248, "percentage": 34.45, "elapsed_time": "1:30:22", "remaining_time": "2:52:00", "throughput": 8762.53, "total_tokens": 47518624} +{"current_steps": 70505, "total_steps": 204665, "loss": 0.0811, "lr": 1.6574196711676444e-06, "epoch": 1.722448879876872, "percentage": 34.45, "elapsed_time": "1:30:23", "remaining_time": "2:51:59", "throughput": 8762.55, "total_tokens": 47521760} +{"current_steps": 70510, "total_steps": 204665, "loss": 0.1212, "lr": 1.6573554100052154e-06, "epoch": 1.722571030708719, "percentage": 34.45, "elapsed_time": "1:30:23", "remaining_time": "2:51:59", "throughput": 8762.58, "total_tokens": 47524896} +{"current_steps": 70515, "total_steps": 204665, "loss": 0.0577, "lr": 1.657291144062321e-06, "epoch": 1.7226931815405662, "percentage": 34.45, "elapsed_time": "1:30:23", "remaining_time": "2:51:58", "throughput": 8762.71, "total_tokens": 47528736} +{"current_steps": 70520, "total_steps": 204665, "loss": 0.0864, "lr": 1.6572268733394283e-06, "epoch": 1.7228153323724134, "percentage": 34.46, "elapsed_time": "1:30:24", "remaining_time": "2:51:58", "throughput": 8762.78, "total_tokens": 47532192} +{"current_steps": 70525, "total_steps": 204665, "loss": 0.1231, "lr": 1.6571625978370055e-06, "epoch": 1.7229374832042605, "percentage": 34.46, "elapsed_time": "1:30:24", "remaining_time": "2:51:57", "throughput": 8762.81, "total_tokens": 47535328} +{"current_steps": 70530, "total_steps": 204665, "loss": 0.2692, "lr": 1.657098317555519e-06, "epoch": 1.7230596340361077, "percentage": 34.46, "elapsed_time": "1:30:25", "remaining_time": "2:51:57", "throughput": 8762.82, "total_tokens": 47538400} +{"current_steps": 70535, "total_steps": 204665, "loss": 0.0742, "lr": 1.6570340324954374e-06, "epoch": 1.723181784867955, "percentage": 34.46, "elapsed_time": "1:30:25", "remaining_time": "2:51:56", "throughput": 8762.89, "total_tokens": 47541792} +{"current_steps": 70540, "total_steps": 204665, "loss": 0.0373, "lr": 1.656969742657227e-06, "epoch": 1.7233039356998021, "percentage": 34.47, "elapsed_time": "1:30:25", "remaining_time": "2:51:56", "throughput": 8762.95, "total_tokens": 47545184} +{"current_steps": 70545, "total_steps": 204665, "loss": 0.1764, "lr": 1.6569054480413564e-06, "epoch": 1.7234260865316493, "percentage": 34.47, "elapsed_time": "1:30:26", "remaining_time": "2:51:55", "throughput": 8762.96, "total_tokens": 47548192} +{"current_steps": 70550, "total_steps": 204665, "loss": 0.051, "lr": 1.6568411486482923e-06, "epoch": 1.7235482373634965, "percentage": 34.47, "elapsed_time": "1:30:26", "remaining_time": "2:51:55", "throughput": 8763.0, "total_tokens": 47551456} +{"current_steps": 70555, "total_steps": 204665, "loss": 0.1628, "lr": 1.656776844478503e-06, "epoch": 1.7236703881953437, "percentage": 34.47, "elapsed_time": "1:30:26", "remaining_time": "2:51:55", "throughput": 8763.13, "total_tokens": 47555296} +{"current_steps": 70560, "total_steps": 204665, "loss": 0.0636, "lr": 1.6567125355324555e-06, "epoch": 1.7237925390271909, "percentage": 34.48, "elapsed_time": "1:30:27", "remaining_time": "2:51:54", "throughput": 8763.19, "total_tokens": 47558688} +{"current_steps": 70565, "total_steps": 204665, "loss": 0.1408, "lr": 1.6566482218106184e-06, "epoch": 1.723914689859038, "percentage": 34.48, "elapsed_time": "1:30:27", "remaining_time": "2:51:54", "throughput": 8763.26, "total_tokens": 47562080} +{"current_steps": 70570, "total_steps": 204665, "loss": 0.0796, "lr": 1.6565839033134584e-06, "epoch": 1.7240368406908853, "percentage": 34.48, "elapsed_time": "1:30:27", "remaining_time": "2:51:53", "throughput": 8763.28, "total_tokens": 47565216} +{"current_steps": 70575, "total_steps": 204665, "loss": 0.0569, "lr": 1.6565195800414434e-06, "epoch": 1.7241589915227322, "percentage": 34.48, "elapsed_time": "1:30:28", "remaining_time": "2:51:53", "throughput": 8763.34, "total_tokens": 47568544} +{"current_steps": 70580, "total_steps": 204665, "loss": 0.0396, "lr": 1.656455251995042e-06, "epoch": 1.7242811423545794, "percentage": 34.49, "elapsed_time": "1:30:28", "remaining_time": "2:51:52", "throughput": 8763.38, "total_tokens": 47571808} +{"current_steps": 70585, "total_steps": 204665, "loss": 0.0013, "lr": 1.6563909191747212e-06, "epoch": 1.7244032931864266, "percentage": 34.49, "elapsed_time": "1:30:28", "remaining_time": "2:51:52", "throughput": 8763.43, "total_tokens": 47575072} +{"current_steps": 70590, "total_steps": 204665, "loss": 0.1305, "lr": 1.656326581580949e-06, "epoch": 1.7245254440182738, "percentage": 34.49, "elapsed_time": "1:30:29", "remaining_time": "2:51:51", "throughput": 8763.51, "total_tokens": 47578592} +{"current_steps": 70595, "total_steps": 204665, "loss": 0.0725, "lr": 1.656262239214193e-06, "epoch": 1.724647594850121, "percentage": 34.49, "elapsed_time": "1:30:29", "remaining_time": "2:51:51", "throughput": 8763.54, "total_tokens": 47581728} +{"current_steps": 70600, "total_steps": 204665, "loss": 0.0006, "lr": 1.6561978920749223e-06, "epoch": 1.724769745681968, "percentage": 34.5, "elapsed_time": "1:30:29", "remaining_time": "2:51:50", "throughput": 8763.61, "total_tokens": 47585184} +{"current_steps": 70605, "total_steps": 204665, "loss": 0.0488, "lr": 1.6561335401636036e-06, "epoch": 1.7248918965138151, "percentage": 34.5, "elapsed_time": "1:30:30", "remaining_time": "2:51:50", "throughput": 8763.64, "total_tokens": 47588384} +{"current_steps": 70610, "total_steps": 204665, "loss": 0.032, "lr": 1.6560691834807052e-06, "epoch": 1.7250140473456623, "percentage": 34.5, "elapsed_time": "1:30:30", "remaining_time": "2:51:50", "throughput": 8763.71, "total_tokens": 47591840} +{"current_steps": 70615, "total_steps": 204665, "loss": 0.0013, "lr": 1.6560048220266955e-06, "epoch": 1.7251361981775095, "percentage": 34.5, "elapsed_time": "1:30:30", "remaining_time": "2:51:49", "throughput": 8763.81, "total_tokens": 47595424} +{"current_steps": 70620, "total_steps": 204665, "loss": 0.1268, "lr": 1.6559404558020424e-06, "epoch": 1.7252583490093567, "percentage": 34.51, "elapsed_time": "1:30:31", "remaining_time": "2:51:49", "throughput": 8763.94, "total_tokens": 47599264} +{"current_steps": 70625, "total_steps": 204665, "loss": 0.1171, "lr": 1.6558760848072135e-06, "epoch": 1.725380499841204, "percentage": 34.51, "elapsed_time": "1:30:31", "remaining_time": "2:51:48", "throughput": 8763.93, "total_tokens": 47602144} +{"current_steps": 70630, "total_steps": 204665, "loss": 0.2168, "lr": 1.6558117090426772e-06, "epoch": 1.725502650673051, "percentage": 34.51, "elapsed_time": "1:30:31", "remaining_time": "2:51:48", "throughput": 8763.99, "total_tokens": 47605536} +{"current_steps": 70635, "total_steps": 204665, "loss": 0.0798, "lr": 1.6557473285089023e-06, "epoch": 1.7256248015048983, "percentage": 34.51, "elapsed_time": "1:30:32", "remaining_time": "2:51:47", "throughput": 8763.98, "total_tokens": 47608416} +{"current_steps": 70640, "total_steps": 204665, "loss": 0.1222, "lr": 1.6556829432063562e-06, "epoch": 1.7257469523367455, "percentage": 34.51, "elapsed_time": "1:30:32", "remaining_time": "2:51:47", "throughput": 8764.06, "total_tokens": 47611936} +{"current_steps": 70645, "total_steps": 204665, "loss": 0.064, "lr": 1.6556185531355074e-06, "epoch": 1.7258691031685927, "percentage": 34.52, "elapsed_time": "1:30:32", "remaining_time": "2:51:46", "throughput": 8764.12, "total_tokens": 47615328} +{"current_steps": 70650, "total_steps": 204665, "loss": 0.0015, "lr": 1.655554158296824e-06, "epoch": 1.7259912540004398, "percentage": 34.52, "elapsed_time": "1:30:33", "remaining_time": "2:51:46", "throughput": 8764.23, "total_tokens": 47619040} +{"current_steps": 70655, "total_steps": 204665, "loss": 0.1151, "lr": 1.6554897586907746e-06, "epoch": 1.726113404832287, "percentage": 34.52, "elapsed_time": "1:30:33", "remaining_time": "2:51:45", "throughput": 8764.28, "total_tokens": 47622368} +{"current_steps": 70660, "total_steps": 204665, "loss": 0.029, "lr": 1.6554253543178272e-06, "epoch": 1.7262355556641342, "percentage": 34.52, "elapsed_time": "1:30:34", "remaining_time": "2:51:45", "throughput": 8764.37, "total_tokens": 47625952} +{"current_steps": 70665, "total_steps": 204665, "loss": 0.1943, "lr": 1.6553609451784505e-06, "epoch": 1.7263577064959812, "percentage": 34.53, "elapsed_time": "1:30:34", "remaining_time": "2:51:45", "throughput": 8764.4, "total_tokens": 47629088} +{"current_steps": 70670, "total_steps": 204665, "loss": 0.0506, "lr": 1.655296531273113e-06, "epoch": 1.7264798573278284, "percentage": 34.53, "elapsed_time": "1:30:34", "remaining_time": "2:51:44", "throughput": 8764.45, "total_tokens": 47632416} +{"current_steps": 70675, "total_steps": 204665, "loss": 0.0964, "lr": 1.6552321126022824e-06, "epoch": 1.7266020081596756, "percentage": 34.53, "elapsed_time": "1:30:35", "remaining_time": "2:51:44", "throughput": 8764.53, "total_tokens": 47635936} +{"current_steps": 70680, "total_steps": 204665, "loss": 0.1082, "lr": 1.6551676891664278e-06, "epoch": 1.7267241589915228, "percentage": 34.53, "elapsed_time": "1:30:35", "remaining_time": "2:51:43", "throughput": 8764.58, "total_tokens": 47639264} +{"current_steps": 70685, "total_steps": 204665, "loss": 0.1175, "lr": 1.6551032609660174e-06, "epoch": 1.72684630982337, "percentage": 34.54, "elapsed_time": "1:30:35", "remaining_time": "2:51:43", "throughput": 8764.58, "total_tokens": 47642208} +{"current_steps": 70690, "total_steps": 204665, "loss": 0.1192, "lr": 1.6550388280015199e-06, "epoch": 1.726968460655217, "percentage": 34.54, "elapsed_time": "1:30:36", "remaining_time": "2:51:42", "throughput": 8764.69, "total_tokens": 47645920} +{"current_steps": 70695, "total_steps": 204665, "loss": 0.0551, "lr": 1.654974390273404e-06, "epoch": 1.727090611487064, "percentage": 34.54, "elapsed_time": "1:30:36", "remaining_time": "2:51:42", "throughput": 8764.75, "total_tokens": 47649312} +{"current_steps": 70700, "total_steps": 204665, "loss": 0.0847, "lr": 1.6549099477821384e-06, "epoch": 1.7272127623189113, "percentage": 34.54, "elapsed_time": "1:30:36", "remaining_time": "2:51:41", "throughput": 8764.78, "total_tokens": 47652448} +{"current_steps": 70705, "total_steps": 204665, "loss": 0.1452, "lr": 1.6548455005281912e-06, "epoch": 1.7273349131507585, "percentage": 34.55, "elapsed_time": "1:30:37", "remaining_time": "2:51:41", "throughput": 8764.84, "total_tokens": 47655840} +{"current_steps": 70710, "total_steps": 204665, "loss": 0.0222, "lr": 1.6547810485120315e-06, "epoch": 1.7274570639826057, "percentage": 34.55, "elapsed_time": "1:30:37", "remaining_time": "2:51:40", "throughput": 8764.85, "total_tokens": 47658848} +{"current_steps": 70715, "total_steps": 204665, "loss": 0.0349, "lr": 1.6547165917341274e-06, "epoch": 1.7275792148144529, "percentage": 34.55, "elapsed_time": "1:30:37", "remaining_time": "2:51:40", "throughput": 8764.88, "total_tokens": 47662048} +{"current_steps": 70720, "total_steps": 204665, "loss": 0.1226, "lr": 1.6546521301949489e-06, "epoch": 1.7277013656463, "percentage": 34.55, "elapsed_time": "1:30:38", "remaining_time": "2:51:40", "throughput": 8764.9, "total_tokens": 47665120} +{"current_steps": 70725, "total_steps": 204665, "loss": 0.134, "lr": 1.6545876638949636e-06, "epoch": 1.7278235164781472, "percentage": 34.56, "elapsed_time": "1:30:38", "remaining_time": "2:51:39", "throughput": 8764.97, "total_tokens": 47668576} +{"current_steps": 70730, "total_steps": 204665, "loss": 0.1153, "lr": 1.6545231928346411e-06, "epoch": 1.7279456673099944, "percentage": 34.56, "elapsed_time": "1:30:38", "remaining_time": "2:51:39", "throughput": 8765.07, "total_tokens": 47672224} +{"current_steps": 70735, "total_steps": 204665, "loss": 0.0368, "lr": 1.6544587170144496e-06, "epoch": 1.7280678181418416, "percentage": 34.56, "elapsed_time": "1:30:39", "remaining_time": "2:51:38", "throughput": 8765.12, "total_tokens": 47675552} +{"current_steps": 70740, "total_steps": 204665, "loss": 0.0445, "lr": 1.6543942364348583e-06, "epoch": 1.7281899689736888, "percentage": 34.56, "elapsed_time": "1:30:39", "remaining_time": "2:51:38", "throughput": 8765.27, "total_tokens": 47679520} +{"current_steps": 70745, "total_steps": 204665, "loss": 0.0499, "lr": 1.6543297510963362e-06, "epoch": 1.728312119805536, "percentage": 34.57, "elapsed_time": "1:30:39", "remaining_time": "2:51:37", "throughput": 8765.29, "total_tokens": 47682592} +{"current_steps": 70750, "total_steps": 204665, "loss": 0.0015, "lr": 1.6542652609993519e-06, "epoch": 1.7284342706373832, "percentage": 34.57, "elapsed_time": "1:30:40", "remaining_time": "2:51:37", "throughput": 8765.29, "total_tokens": 47685600} +{"current_steps": 70755, "total_steps": 204665, "loss": 0.1076, "lr": 1.6542007661443749e-06, "epoch": 1.7285564214692302, "percentage": 34.57, "elapsed_time": "1:30:40", "remaining_time": "2:51:36", "throughput": 8765.33, "total_tokens": 47688800} +{"current_steps": 70760, "total_steps": 204665, "loss": 0.1117, "lr": 1.654136266531874e-06, "epoch": 1.7286785723010774, "percentage": 34.57, "elapsed_time": "1:30:40", "remaining_time": "2:51:36", "throughput": 8765.43, "total_tokens": 47692448} +{"current_steps": 70765, "total_steps": 204665, "loss": 0.2228, "lr": 1.6540717621623182e-06, "epoch": 1.7288007231329245, "percentage": 34.58, "elapsed_time": "1:30:41", "remaining_time": "2:51:35", "throughput": 8765.47, "total_tokens": 47695712} +{"current_steps": 70770, "total_steps": 204665, "loss": 0.0448, "lr": 1.6540072530361767e-06, "epoch": 1.7289228739647717, "percentage": 34.58, "elapsed_time": "1:30:41", "remaining_time": "2:51:35", "throughput": 8765.49, "total_tokens": 47698784} +{"current_steps": 70775, "total_steps": 204665, "loss": 0.0365, "lr": 1.6539427391539183e-06, "epoch": 1.7290450247966187, "percentage": 34.58, "elapsed_time": "1:30:41", "remaining_time": "2:51:34", "throughput": 8765.47, "total_tokens": 47701600} +{"current_steps": 70780, "total_steps": 204665, "loss": 0.0411, "lr": 1.6538782205160124e-06, "epoch": 1.729167175628466, "percentage": 34.58, "elapsed_time": "1:30:42", "remaining_time": "2:51:34", "throughput": 8765.53, "total_tokens": 47704928} +{"current_steps": 70785, "total_steps": 204665, "loss": 0.0804, "lr": 1.6538136971229284e-06, "epoch": 1.729289326460313, "percentage": 34.59, "elapsed_time": "1:30:42", "remaining_time": "2:51:34", "throughput": 8765.57, "total_tokens": 47708128} +{"current_steps": 70790, "total_steps": 204665, "loss": 0.0767, "lr": 1.6537491689751352e-06, "epoch": 1.7294114772921603, "percentage": 34.59, "elapsed_time": "1:30:43", "remaining_time": "2:51:33", "throughput": 8765.61, "total_tokens": 47711456} +{"current_steps": 70795, "total_steps": 204665, "loss": 0.0489, "lr": 1.6536846360731022e-06, "epoch": 1.7295336281240075, "percentage": 34.59, "elapsed_time": "1:30:43", "remaining_time": "2:51:33", "throughput": 8765.67, "total_tokens": 47714784} +{"current_steps": 70800, "total_steps": 204665, "loss": 0.1632, "lr": 1.653620098417299e-06, "epoch": 1.7296557789558547, "percentage": 34.59, "elapsed_time": "1:30:43", "remaining_time": "2:51:32", "throughput": 8765.68, "total_tokens": 47717856} +{"current_steps": 70805, "total_steps": 204665, "loss": 0.0536, "lr": 1.6535555560081945e-06, "epoch": 1.7297779297877018, "percentage": 34.6, "elapsed_time": "1:30:44", "remaining_time": "2:51:32", "throughput": 8765.77, "total_tokens": 47721440} +{"current_steps": 70810, "total_steps": 204665, "loss": 0.0379, "lr": 1.653491008846258e-06, "epoch": 1.729900080619549, "percentage": 34.6, "elapsed_time": "1:30:44", "remaining_time": "2:51:31", "throughput": 8765.83, "total_tokens": 47724832} +{"current_steps": 70815, "total_steps": 204665, "loss": 0.1593, "lr": 1.6534264569319594e-06, "epoch": 1.7300222314513962, "percentage": 34.6, "elapsed_time": "1:30:44", "remaining_time": "2:51:31", "throughput": 8765.86, "total_tokens": 47727968} +{"current_steps": 70820, "total_steps": 204665, "loss": 0.121, "lr": 1.6533619002657676e-06, "epoch": 1.7301443822832434, "percentage": 34.6, "elapsed_time": "1:30:45", "remaining_time": "2:51:30", "throughput": 8765.92, "total_tokens": 47731360} +{"current_steps": 70825, "total_steps": 204665, "loss": 0.0291, "lr": 1.6532973388481523e-06, "epoch": 1.7302665331150906, "percentage": 34.61, "elapsed_time": "1:30:45", "remaining_time": "2:51:30", "throughput": 8766.05, "total_tokens": 47735200} +{"current_steps": 70830, "total_steps": 204665, "loss": 0.088, "lr": 1.6532327726795834e-06, "epoch": 1.7303886839469378, "percentage": 34.61, "elapsed_time": "1:30:45", "remaining_time": "2:51:29", "throughput": 8766.09, "total_tokens": 47738464} +{"current_steps": 70835, "total_steps": 204665, "loss": 0.2023, "lr": 1.65316820176053e-06, "epoch": 1.730510834778785, "percentage": 34.61, "elapsed_time": "1:30:46", "remaining_time": "2:51:29", "throughput": 8766.12, "total_tokens": 47741664} +{"current_steps": 70840, "total_steps": 204665, "loss": 0.0257, "lr": 1.6531036260914615e-06, "epoch": 1.7306329856106322, "percentage": 34.61, "elapsed_time": "1:30:46", "remaining_time": "2:51:29", "throughput": 8766.27, "total_tokens": 47745632} +{"current_steps": 70845, "total_steps": 204665, "loss": 0.0009, "lr": 1.6530390456728478e-06, "epoch": 1.7307551364424791, "percentage": 34.62, "elapsed_time": "1:30:46", "remaining_time": "2:51:28", "throughput": 8766.3, "total_tokens": 47748832} +{"current_steps": 70850, "total_steps": 204665, "loss": 0.1395, "lr": 1.6529744605051586e-06, "epoch": 1.7308772872743263, "percentage": 34.62, "elapsed_time": "1:30:47", "remaining_time": "2:51:28", "throughput": 8766.42, "total_tokens": 47752608} +{"current_steps": 70855, "total_steps": 204665, "loss": 0.0413, "lr": 1.6529098705888636e-06, "epoch": 1.7309994381061735, "percentage": 34.62, "elapsed_time": "1:30:47", "remaining_time": "2:51:27", "throughput": 8766.48, "total_tokens": 47755936} +{"current_steps": 70860, "total_steps": 204665, "loss": 0.1253, "lr": 1.6528452759244322e-06, "epoch": 1.7311215889380207, "percentage": 34.62, "elapsed_time": "1:30:47", "remaining_time": "2:51:27", "throughput": 8766.52, "total_tokens": 47759200} +{"current_steps": 70865, "total_steps": 204665, "loss": 0.0245, "lr": 1.6527806765123345e-06, "epoch": 1.7312437397698677, "percentage": 34.62, "elapsed_time": "1:30:48", "remaining_time": "2:51:26", "throughput": 8766.62, "total_tokens": 47762848} +{"current_steps": 70870, "total_steps": 204665, "loss": 0.0019, "lr": 1.6527160723530403e-06, "epoch": 1.7313658906017149, "percentage": 34.63, "elapsed_time": "1:30:48", "remaining_time": "2:51:26", "throughput": 8766.66, "total_tokens": 47766048} +{"current_steps": 70875, "total_steps": 204665, "loss": 0.0652, "lr": 1.6526514634470188e-06, "epoch": 1.731488041433562, "percentage": 34.63, "elapsed_time": "1:30:48", "remaining_time": "2:51:25", "throughput": 8766.75, "total_tokens": 47769696} +{"current_steps": 70880, "total_steps": 204665, "loss": 0.0015, "lr": 1.6525868497947406e-06, "epoch": 1.7316101922654092, "percentage": 34.63, "elapsed_time": "1:30:49", "remaining_time": "2:51:25", "throughput": 8766.79, "total_tokens": 47772896} +{"current_steps": 70885, "total_steps": 204665, "loss": 0.0574, "lr": 1.6525222313966754e-06, "epoch": 1.7317323430972564, "percentage": 34.63, "elapsed_time": "1:30:49", "remaining_time": "2:51:25", "throughput": 8766.82, "total_tokens": 47776096} +{"current_steps": 70890, "total_steps": 204665, "loss": 0.1191, "lr": 1.6524576082532927e-06, "epoch": 1.7318544939291036, "percentage": 34.64, "elapsed_time": "1:30:49", "remaining_time": "2:51:24", "throughput": 8766.87, "total_tokens": 47779360} +{"current_steps": 70895, "total_steps": 204665, "loss": 0.1471, "lr": 1.6523929803650632e-06, "epoch": 1.7319766447609508, "percentage": 34.64, "elapsed_time": "1:30:50", "remaining_time": "2:51:24", "throughput": 8766.98, "total_tokens": 47783008} +{"current_steps": 70900, "total_steps": 204665, "loss": 0.0375, "lr": 1.6523283477324561e-06, "epoch": 1.732098795592798, "percentage": 34.64, "elapsed_time": "1:30:50", "remaining_time": "2:51:23", "throughput": 8766.99, "total_tokens": 47786080} +{"current_steps": 70905, "total_steps": 204665, "loss": 0.1278, "lr": 1.652263710355942e-06, "epoch": 1.7322209464246452, "percentage": 34.64, "elapsed_time": "1:30:51", "remaining_time": "2:51:23", "throughput": 8767.0, "total_tokens": 47789088} +{"current_steps": 70910, "total_steps": 204665, "loss": 0.1328, "lr": 1.6521990682359906e-06, "epoch": 1.7323430972564924, "percentage": 34.65, "elapsed_time": "1:30:51", "remaining_time": "2:51:22", "throughput": 8767.14, "total_tokens": 47792992} +{"current_steps": 70915, "total_steps": 204665, "loss": 0.089, "lr": 1.6521344213730723e-06, "epoch": 1.7324652480883396, "percentage": 34.65, "elapsed_time": "1:30:51", "remaining_time": "2:51:22", "throughput": 8767.22, "total_tokens": 47796512} +{"current_steps": 70920, "total_steps": 204665, "loss": 0.091, "lr": 1.652069769767657e-06, "epoch": 1.7325873989201868, "percentage": 34.65, "elapsed_time": "1:30:52", "remaining_time": "2:51:21", "throughput": 8767.3, "total_tokens": 47800032} +{"current_steps": 70925, "total_steps": 204665, "loss": 0.0469, "lr": 1.6520051134202154e-06, "epoch": 1.732709549752034, "percentage": 34.65, "elapsed_time": "1:30:52", "remaining_time": "2:51:21", "throughput": 8767.43, "total_tokens": 47803872} +{"current_steps": 70930, "total_steps": 204665, "loss": 0.1223, "lr": 1.6519404523312166e-06, "epoch": 1.7328317005838811, "percentage": 34.66, "elapsed_time": "1:30:52", "remaining_time": "2:51:20", "throughput": 8767.54, "total_tokens": 47807584} +{"current_steps": 70935, "total_steps": 204665, "loss": 0.0338, "lr": 1.6518757865011316e-06, "epoch": 1.732953851415728, "percentage": 34.66, "elapsed_time": "1:30:53", "remaining_time": "2:51:20", "throughput": 8767.61, "total_tokens": 47811040} +{"current_steps": 70940, "total_steps": 204665, "loss": 0.0911, "lr": 1.651811115930431e-06, "epoch": 1.7330760022475753, "percentage": 34.66, "elapsed_time": "1:30:53", "remaining_time": "2:51:20", "throughput": 8767.62, "total_tokens": 47814048} +{"current_steps": 70945, "total_steps": 204665, "loss": 0.1733, "lr": 1.651746440619584e-06, "epoch": 1.7331981530794225, "percentage": 34.66, "elapsed_time": "1:30:53", "remaining_time": "2:51:19", "throughput": 8767.68, "total_tokens": 47817440} +{"current_steps": 70950, "total_steps": 204665, "loss": 0.0495, "lr": 1.651681760569062e-06, "epoch": 1.7333203039112697, "percentage": 34.67, "elapsed_time": "1:30:54", "remaining_time": "2:51:19", "throughput": 8767.7, "total_tokens": 47820512} +{"current_steps": 70955, "total_steps": 204665, "loss": 0.0826, "lr": 1.651617075779335e-06, "epoch": 1.7334424547431166, "percentage": 34.67, "elapsed_time": "1:30:54", "remaining_time": "2:51:18", "throughput": 8767.72, "total_tokens": 47823648} +{"current_steps": 70960, "total_steps": 204665, "loss": 0.0421, "lr": 1.651552386250873e-06, "epoch": 1.7335646055749638, "percentage": 34.67, "elapsed_time": "1:30:54", "remaining_time": "2:51:18", "throughput": 8767.81, "total_tokens": 47827168} +{"current_steps": 70965, "total_steps": 204665, "loss": 0.0399, "lr": 1.6514876919841472e-06, "epoch": 1.733686756406811, "percentage": 34.67, "elapsed_time": "1:30:55", "remaining_time": "2:51:17", "throughput": 8767.87, "total_tokens": 47830560} +{"current_steps": 70970, "total_steps": 204665, "loss": 0.1524, "lr": 1.6514229929796274e-06, "epoch": 1.7338089072386582, "percentage": 34.68, "elapsed_time": "1:30:55", "remaining_time": "2:51:17", "throughput": 8767.89, "total_tokens": 47833696} +{"current_steps": 70975, "total_steps": 204665, "loss": 0.0509, "lr": 1.6513582892377846e-06, "epoch": 1.7339310580705054, "percentage": 34.68, "elapsed_time": "1:30:55", "remaining_time": "2:51:16", "throughput": 8767.94, "total_tokens": 47836960} +{"current_steps": 70980, "total_steps": 204665, "loss": 0.0013, "lr": 1.651293580759089e-06, "epoch": 1.7340532089023526, "percentage": 34.68, "elapsed_time": "1:30:56", "remaining_time": "2:51:16", "throughput": 8767.96, "total_tokens": 47840032} +{"current_steps": 70985, "total_steps": 204665, "loss": 0.0849, "lr": 1.6512288675440113e-06, "epoch": 1.7341753597341998, "percentage": 34.68, "elapsed_time": "1:30:56", "remaining_time": "2:51:15", "throughput": 8767.94, "total_tokens": 47842912} +{"current_steps": 70990, "total_steps": 204665, "loss": 0.0021, "lr": 1.6511641495930224e-06, "epoch": 1.734297510566047, "percentage": 34.69, "elapsed_time": "1:30:56", "remaining_time": "2:51:15", "throughput": 8768.05, "total_tokens": 47846624} +{"current_steps": 70995, "total_steps": 204665, "loss": 0.1011, "lr": 1.651099426906592e-06, "epoch": 1.7344196613978942, "percentage": 34.69, "elapsed_time": "1:30:57", "remaining_time": "2:51:14", "throughput": 8768.06, "total_tokens": 47849632} +{"current_steps": 71000, "total_steps": 204665, "loss": 0.2572, "lr": 1.651034699485192e-06, "epoch": 1.7345418122297414, "percentage": 34.69, "elapsed_time": "1:30:57", "remaining_time": "2:51:14", "throughput": 8768.24, "total_tokens": 47853792} +{"current_steps": 71005, "total_steps": 204665, "loss": 0.1634, "lr": 1.6509699673292925e-06, "epoch": 1.7346639630615885, "percentage": 34.69, "elapsed_time": "1:30:57", "remaining_time": "2:51:14", "throughput": 8768.31, "total_tokens": 47857248} +{"current_steps": 71010, "total_steps": 204665, "loss": 0.2716, "lr": 1.6509052304393643e-06, "epoch": 1.7347861138934357, "percentage": 34.7, "elapsed_time": "1:30:58", "remaining_time": "2:51:13", "throughput": 8768.35, "total_tokens": 47860448} +{"current_steps": 71015, "total_steps": 204665, "loss": 0.0248, "lr": 1.650840488815878e-06, "epoch": 1.734908264725283, "percentage": 34.7, "elapsed_time": "1:30:58", "remaining_time": "2:51:13", "throughput": 8768.53, "total_tokens": 47864672} +{"current_steps": 71020, "total_steps": 204665, "loss": 0.0304, "lr": 1.6507757424593047e-06, "epoch": 1.73503041555713, "percentage": 34.7, "elapsed_time": "1:30:59", "remaining_time": "2:51:12", "throughput": 8768.65, "total_tokens": 47868448} +{"current_steps": 71025, "total_steps": 204665, "loss": 0.187, "lr": 1.6507109913701154e-06, "epoch": 1.735152566388977, "percentage": 34.7, "elapsed_time": "1:30:59", "remaining_time": "2:51:12", "throughput": 8768.78, "total_tokens": 47872288} +{"current_steps": 71030, "total_steps": 204665, "loss": 0.038, "lr": 1.6506462355487804e-06, "epoch": 1.7352747172208243, "percentage": 34.71, "elapsed_time": "1:30:59", "remaining_time": "2:51:11", "throughput": 8768.86, "total_tokens": 47875744} +{"current_steps": 71035, "total_steps": 204665, "loss": 0.0689, "lr": 1.650581474995771e-06, "epoch": 1.7353968680526715, "percentage": 34.71, "elapsed_time": "1:31:00", "remaining_time": "2:51:11", "throughput": 8768.96, "total_tokens": 47879392} +{"current_steps": 71040, "total_steps": 204665, "loss": 0.0723, "lr": 1.6505167097115581e-06, "epoch": 1.7355190188845186, "percentage": 34.71, "elapsed_time": "1:31:00", "remaining_time": "2:51:10", "throughput": 8768.98, "total_tokens": 47882528} +{"current_steps": 71045, "total_steps": 204665, "loss": 0.0519, "lr": 1.650451939696613e-06, "epoch": 1.7356411697163656, "percentage": 34.71, "elapsed_time": "1:31:00", "remaining_time": "2:51:10", "throughput": 8769.12, "total_tokens": 47886432} +{"current_steps": 71050, "total_steps": 204665, "loss": 0.0505, "lr": 1.6503871649514064e-06, "epoch": 1.7357633205482128, "percentage": 34.72, "elapsed_time": "1:31:01", "remaining_time": "2:51:10", "throughput": 8769.13, "total_tokens": 47889504} +{"current_steps": 71055, "total_steps": 204665, "loss": 0.0962, "lr": 1.6503223854764093e-06, "epoch": 1.73588547138006, "percentage": 34.72, "elapsed_time": "1:31:01", "remaining_time": "2:51:09", "throughput": 8769.17, "total_tokens": 47892768} +{"current_steps": 71060, "total_steps": 204665, "loss": 0.057, "lr": 1.6502576012720928e-06, "epoch": 1.7360076222119072, "percentage": 34.72, "elapsed_time": "1:31:01", "remaining_time": "2:51:09", "throughput": 8769.28, "total_tokens": 47896480} +{"current_steps": 71065, "total_steps": 204665, "loss": 0.0938, "lr": 1.6501928123389282e-06, "epoch": 1.7361297730437544, "percentage": 34.72, "elapsed_time": "1:31:02", "remaining_time": "2:51:08", "throughput": 8769.4, "total_tokens": 47900256} +{"current_steps": 71070, "total_steps": 204665, "loss": 0.079, "lr": 1.6501280186773867e-06, "epoch": 1.7362519238756016, "percentage": 34.73, "elapsed_time": "1:31:02", "remaining_time": "2:51:08", "throughput": 8769.46, "total_tokens": 47903648} +{"current_steps": 71075, "total_steps": 204665, "loss": 0.1451, "lr": 1.6500632202879392e-06, "epoch": 1.7363740747074488, "percentage": 34.73, "elapsed_time": "1:31:02", "remaining_time": "2:51:07", "throughput": 8769.5, "total_tokens": 47906912} +{"current_steps": 71080, "total_steps": 204665, "loss": 0.0872, "lr": 1.6499984171710572e-06, "epoch": 1.736496225539296, "percentage": 34.73, "elapsed_time": "1:31:03", "remaining_time": "2:51:07", "throughput": 8769.6, "total_tokens": 47910560} +{"current_steps": 71085, "total_steps": 204665, "loss": 0.0756, "lr": 1.6499336093272121e-06, "epoch": 1.7366183763711431, "percentage": 34.73, "elapsed_time": "1:31:03", "remaining_time": "2:51:06", "throughput": 8769.62, "total_tokens": 47913632} +{"current_steps": 71090, "total_steps": 204665, "loss": 0.0782, "lr": 1.6498687967568745e-06, "epoch": 1.7367405272029903, "percentage": 34.73, "elapsed_time": "1:31:03", "remaining_time": "2:51:06", "throughput": 8769.7, "total_tokens": 47917152} +{"current_steps": 71095, "total_steps": 204665, "loss": 0.0485, "lr": 1.6498039794605166e-06, "epoch": 1.7368626780348375, "percentage": 34.74, "elapsed_time": "1:31:04", "remaining_time": "2:51:06", "throughput": 8769.72, "total_tokens": 47920288} +{"current_steps": 71100, "total_steps": 204665, "loss": 0.0867, "lr": 1.649739157438609e-06, "epoch": 1.7369848288666847, "percentage": 34.74, "elapsed_time": "1:31:04", "remaining_time": "2:51:05", "throughput": 8769.81, "total_tokens": 47923872} +{"current_steps": 71105, "total_steps": 204665, "loss": 0.036, "lr": 1.649674330691624e-06, "epoch": 1.737106979698532, "percentage": 34.74, "elapsed_time": "1:31:04", "remaining_time": "2:51:05", "throughput": 8769.86, "total_tokens": 47927200} +{"current_steps": 71110, "total_steps": 204665, "loss": 0.0798, "lr": 1.6496094992200322e-06, "epoch": 1.7372291305303789, "percentage": 34.74, "elapsed_time": "1:31:05", "remaining_time": "2:51:04", "throughput": 8769.9, "total_tokens": 47930464} +{"current_steps": 71115, "total_steps": 204665, "loss": 0.0412, "lr": 1.6495446630243056e-06, "epoch": 1.737351281362226, "percentage": 34.75, "elapsed_time": "1:31:05", "remaining_time": "2:51:04", "throughput": 8769.98, "total_tokens": 47933984} +{"current_steps": 71120, "total_steps": 204665, "loss": 0.059, "lr": 1.649479822104915e-06, "epoch": 1.7374734321940732, "percentage": 34.75, "elapsed_time": "1:31:06", "remaining_time": "2:51:03", "throughput": 8770.01, "total_tokens": 47937184} +{"current_steps": 71125, "total_steps": 204665, "loss": 0.0557, "lr": 1.649414976462333e-06, "epoch": 1.7375955830259204, "percentage": 34.75, "elapsed_time": "1:31:06", "remaining_time": "2:51:03", "throughput": 8770.06, "total_tokens": 47940512} +{"current_steps": 71130, "total_steps": 204665, "loss": 0.0509, "lr": 1.6493501260970306e-06, "epoch": 1.7377177338577676, "percentage": 34.75, "elapsed_time": "1:31:06", "remaining_time": "2:51:02", "throughput": 8770.15, "total_tokens": 47944096} +{"current_steps": 71135, "total_steps": 204665, "loss": 0.0978, "lr": 1.6492852710094792e-06, "epoch": 1.7378398846896146, "percentage": 34.76, "elapsed_time": "1:31:07", "remaining_time": "2:51:02", "throughput": 8770.27, "total_tokens": 47947872} +{"current_steps": 71140, "total_steps": 204665, "loss": 0.0629, "lr": 1.649220411200151e-06, "epoch": 1.7379620355214618, "percentage": 34.76, "elapsed_time": "1:31:07", "remaining_time": "2:51:02", "throughput": 8770.33, "total_tokens": 47951200} +{"current_steps": 71145, "total_steps": 204665, "loss": 0.0905, "lr": 1.649155546669517e-06, "epoch": 1.738084186353309, "percentage": 34.76, "elapsed_time": "1:31:07", "remaining_time": "2:51:01", "throughput": 8770.36, "total_tokens": 47954400} +{"current_steps": 71150, "total_steps": 204665, "loss": 0.12, "lr": 1.6490906774180493e-06, "epoch": 1.7382063371851562, "percentage": 34.76, "elapsed_time": "1:31:08", "remaining_time": "2:51:01", "throughput": 8770.4, "total_tokens": 47957664} +{"current_steps": 71155, "total_steps": 204665, "loss": 0.0773, "lr": 1.6490258034462196e-06, "epoch": 1.7383284880170033, "percentage": 34.77, "elapsed_time": "1:31:08", "remaining_time": "2:51:00", "throughput": 8770.45, "total_tokens": 47960928} +{"current_steps": 71160, "total_steps": 204665, "loss": 0.0025, "lr": 1.6489609247544998e-06, "epoch": 1.7384506388488505, "percentage": 34.77, "elapsed_time": "1:31:08", "remaining_time": "2:51:00", "throughput": 8770.53, "total_tokens": 47964512} +{"current_steps": 71165, "total_steps": 204665, "loss": 0.1051, "lr": 1.6488960413433617e-06, "epoch": 1.7385727896806977, "percentage": 34.77, "elapsed_time": "1:31:09", "remaining_time": "2:50:59", "throughput": 8770.52, "total_tokens": 47967456} +{"current_steps": 71170, "total_steps": 204665, "loss": 0.0377, "lr": 1.6488311532132768e-06, "epoch": 1.738694940512545, "percentage": 34.77, "elapsed_time": "1:31:09", "remaining_time": "2:50:59", "throughput": 8770.51, "total_tokens": 47970336} +{"current_steps": 71175, "total_steps": 204665, "loss": 0.1525, "lr": 1.6487662603647174e-06, "epoch": 1.738817091344392, "percentage": 34.78, "elapsed_time": "1:31:09", "remaining_time": "2:50:58", "throughput": 8770.61, "total_tokens": 47973984} +{"current_steps": 71180, "total_steps": 204665, "loss": 0.045, "lr": 1.6487013627981554e-06, "epoch": 1.7389392421762393, "percentage": 34.78, "elapsed_time": "1:31:10", "remaining_time": "2:50:58", "throughput": 8770.65, "total_tokens": 47977248} +{"current_steps": 71185, "total_steps": 204665, "loss": 0.1077, "lr": 1.648636460514062e-06, "epoch": 1.7390613930080865, "percentage": 34.78, "elapsed_time": "1:31:10", "remaining_time": "2:50:57", "throughput": 8770.67, "total_tokens": 47980384} +{"current_steps": 71190, "total_steps": 204665, "loss": 0.0564, "lr": 1.6485715535129107e-06, "epoch": 1.7391835438399337, "percentage": 34.78, "elapsed_time": "1:31:10", "remaining_time": "2:50:57", "throughput": 8770.7, "total_tokens": 47983584} +{"current_steps": 71195, "total_steps": 204665, "loss": 0.0383, "lr": 1.648506641795172e-06, "epoch": 1.7393056946717809, "percentage": 34.79, "elapsed_time": "1:31:11", "remaining_time": "2:50:56", "throughput": 8770.71, "total_tokens": 47986656} +{"current_steps": 71200, "total_steps": 204665, "loss": 0.0527, "lr": 1.6484417253613184e-06, "epoch": 1.7394278455036278, "percentage": 34.79, "elapsed_time": "1:31:11", "remaining_time": "2:50:56", "throughput": 8770.8, "total_tokens": 47990240} +{"current_steps": 71205, "total_steps": 204665, "loss": 0.1255, "lr": 1.6483768042118227e-06, "epoch": 1.739549996335475, "percentage": 34.79, "elapsed_time": "1:31:11", "remaining_time": "2:50:56", "throughput": 8770.85, "total_tokens": 47993568} +{"current_steps": 71210, "total_steps": 204665, "loss": 0.1047, "lr": 1.6483118783471563e-06, "epoch": 1.7396721471673222, "percentage": 34.79, "elapsed_time": "1:31:12", "remaining_time": "2:50:55", "throughput": 8770.9, "total_tokens": 47996832} +{"current_steps": 71215, "total_steps": 204665, "loss": 0.0045, "lr": 1.6482469477677916e-06, "epoch": 1.7397942979991694, "percentage": 34.8, "elapsed_time": "1:31:12", "remaining_time": "2:50:55", "throughput": 8770.96, "total_tokens": 48000224} +{"current_steps": 71220, "total_steps": 204665, "loss": 0.1856, "lr": 1.6481820124742005e-06, "epoch": 1.7399164488310166, "percentage": 34.8, "elapsed_time": "1:31:12", "remaining_time": "2:50:54", "throughput": 8771.04, "total_tokens": 48003744} +{"current_steps": 71225, "total_steps": 204665, "loss": 0.0031, "lr": 1.6481170724668556e-06, "epoch": 1.7400385996628636, "percentage": 34.8, "elapsed_time": "1:31:13", "remaining_time": "2:50:54", "throughput": 8771.06, "total_tokens": 48006880} +{"current_steps": 71230, "total_steps": 204665, "loss": 0.0432, "lr": 1.648052127746229e-06, "epoch": 1.7401607504947108, "percentage": 34.8, "elapsed_time": "1:31:13", "remaining_time": "2:50:53", "throughput": 8771.1, "total_tokens": 48010144} +{"current_steps": 71235, "total_steps": 204665, "loss": 0.0296, "lr": 1.6479871783127932e-06, "epoch": 1.740282901326558, "percentage": 34.81, "elapsed_time": "1:31:14", "remaining_time": "2:50:53", "throughput": 8771.11, "total_tokens": 48013216} +{"current_steps": 71240, "total_steps": 204665, "loss": 0.1239, "lr": 1.6479222241670204e-06, "epoch": 1.7404050521584051, "percentage": 34.81, "elapsed_time": "1:31:14", "remaining_time": "2:50:52", "throughput": 8771.16, "total_tokens": 48016544} +{"current_steps": 71245, "total_steps": 204665, "loss": 0.0498, "lr": 1.6478572653093826e-06, "epoch": 1.7405272029902523, "percentage": 34.81, "elapsed_time": "1:31:14", "remaining_time": "2:50:52", "throughput": 8771.2, "total_tokens": 48019744} +{"current_steps": 71250, "total_steps": 204665, "loss": 0.0319, "lr": 1.6477923017403526e-06, "epoch": 1.7406493538220995, "percentage": 34.81, "elapsed_time": "1:31:15", "remaining_time": "2:50:52", "throughput": 8771.27, "total_tokens": 48023200} +{"current_steps": 71255, "total_steps": 204665, "loss": 0.1408, "lr": 1.647727333460403e-06, "epoch": 1.7407715046539467, "percentage": 34.82, "elapsed_time": "1:31:15", "remaining_time": "2:50:51", "throughput": 8771.4, "total_tokens": 48027104} +{"current_steps": 71260, "total_steps": 204665, "loss": 0.0516, "lr": 1.6476623604700058e-06, "epoch": 1.740893655485794, "percentage": 34.82, "elapsed_time": "1:31:15", "remaining_time": "2:50:51", "throughput": 8771.54, "total_tokens": 48031072} +{"current_steps": 71265, "total_steps": 204665, "loss": 0.0052, "lr": 1.6475973827696336e-06, "epoch": 1.741015806317641, "percentage": 34.82, "elapsed_time": "1:31:16", "remaining_time": "2:50:50", "throughput": 8771.55, "total_tokens": 48034144} +{"current_steps": 71270, "total_steps": 204665, "loss": 0.0012, "lr": 1.6475324003597591e-06, "epoch": 1.7411379571494883, "percentage": 34.82, "elapsed_time": "1:31:16", "remaining_time": "2:50:50", "throughput": 8771.56, "total_tokens": 48037216} +{"current_steps": 71275, "total_steps": 204665, "loss": 0.1353, "lr": 1.6474674132408548e-06, "epoch": 1.7412601079813355, "percentage": 34.83, "elapsed_time": "1:31:16", "remaining_time": "2:50:49", "throughput": 8771.59, "total_tokens": 48040416} +{"current_steps": 71280, "total_steps": 204665, "loss": 0.0746, "lr": 1.6474024214133935e-06, "epoch": 1.7413822588131826, "percentage": 34.83, "elapsed_time": "1:31:17", "remaining_time": "2:50:49", "throughput": 8771.69, "total_tokens": 48044064} +{"current_steps": 71285, "total_steps": 204665, "loss": 0.145, "lr": 1.6473374248778475e-06, "epoch": 1.7415044096450298, "percentage": 34.83, "elapsed_time": "1:31:17", "remaining_time": "2:50:48", "throughput": 8771.76, "total_tokens": 48047520} +{"current_steps": 71290, "total_steps": 204665, "loss": 0.1019, "lr": 1.6472724236346897e-06, "epoch": 1.7416265604768768, "percentage": 34.83, "elapsed_time": "1:31:17", "remaining_time": "2:50:48", "throughput": 8771.81, "total_tokens": 48050848} +{"current_steps": 71295, "total_steps": 204665, "loss": 0.2008, "lr": 1.647207417684393e-06, "epoch": 1.741748711308724, "percentage": 34.83, "elapsed_time": "1:31:18", "remaining_time": "2:50:47", "throughput": 8771.92, "total_tokens": 48054560} +{"current_steps": 71300, "total_steps": 204665, "loss": 0.0145, "lr": 1.6471424070274295e-06, "epoch": 1.7418708621405712, "percentage": 34.84, "elapsed_time": "1:31:18", "remaining_time": "2:50:47", "throughput": 8772.01, "total_tokens": 48058144} +{"current_steps": 71305, "total_steps": 204665, "loss": 0.0253, "lr": 1.6470773916642726e-06, "epoch": 1.7419930129724184, "percentage": 34.84, "elapsed_time": "1:31:18", "remaining_time": "2:50:47", "throughput": 8772.0, "total_tokens": 48061088} +{"current_steps": 71310, "total_steps": 204665, "loss": 0.1064, "lr": 1.6470123715953944e-06, "epoch": 1.7421151638042653, "percentage": 34.84, "elapsed_time": "1:31:19", "remaining_time": "2:50:46", "throughput": 8772.03, "total_tokens": 48064224} +{"current_steps": 71315, "total_steps": 204665, "loss": 0.0435, "lr": 1.6469473468212688e-06, "epoch": 1.7422373146361125, "percentage": 34.84, "elapsed_time": "1:31:19", "remaining_time": "2:50:46", "throughput": 8772.04, "total_tokens": 48067296} +{"current_steps": 71320, "total_steps": 204665, "loss": 0.0313, "lr": 1.646882317342368e-06, "epoch": 1.7423594654679597, "percentage": 34.85, "elapsed_time": "1:31:19", "remaining_time": "2:50:45", "throughput": 8772.26, "total_tokens": 48071712} +{"current_steps": 71325, "total_steps": 204665, "loss": 0.0538, "lr": 1.6468172831591647e-06, "epoch": 1.742481616299807, "percentage": 34.85, "elapsed_time": "1:31:20", "remaining_time": "2:50:45", "throughput": 8772.31, "total_tokens": 48075104} +{"current_steps": 71330, "total_steps": 204665, "loss": 0.0011, "lr": 1.6467522442721325e-06, "epoch": 1.742603767131654, "percentage": 34.85, "elapsed_time": "1:31:20", "remaining_time": "2:50:44", "throughput": 8772.41, "total_tokens": 48078688} +{"current_steps": 71335, "total_steps": 204665, "loss": 0.152, "lr": 1.6466872006817436e-06, "epoch": 1.7427259179635013, "percentage": 34.85, "elapsed_time": "1:31:21", "remaining_time": "2:50:44", "throughput": 8772.49, "total_tokens": 48082272} +{"current_steps": 71340, "total_steps": 204665, "loss": 0.0009, "lr": 1.6466221523884715e-06, "epoch": 1.7428480687953485, "percentage": 34.86, "elapsed_time": "1:31:21", "remaining_time": "2:50:43", "throughput": 8772.58, "total_tokens": 48085856} +{"current_steps": 71345, "total_steps": 204665, "loss": 0.1235, "lr": 1.6465570993927895e-06, "epoch": 1.7429702196271957, "percentage": 34.86, "elapsed_time": "1:31:21", "remaining_time": "2:50:43", "throughput": 8772.64, "total_tokens": 48089312} +{"current_steps": 71350, "total_steps": 204665, "loss": 0.0428, "lr": 1.6464920416951702e-06, "epoch": 1.7430923704590429, "percentage": 34.86, "elapsed_time": "1:31:22", "remaining_time": "2:50:43", "throughput": 8772.7, "total_tokens": 48092704} +{"current_steps": 71355, "total_steps": 204665, "loss": 0.0574, "lr": 1.6464269792960867e-06, "epoch": 1.74321452129089, "percentage": 34.86, "elapsed_time": "1:31:22", "remaining_time": "2:50:42", "throughput": 8772.78, "total_tokens": 48096224} +{"current_steps": 71360, "total_steps": 204665, "loss": 0.0495, "lr": 1.6463619121960127e-06, "epoch": 1.7433366721227372, "percentage": 34.87, "elapsed_time": "1:31:22", "remaining_time": "2:50:42", "throughput": 8772.81, "total_tokens": 48099424} +{"current_steps": 71365, "total_steps": 204665, "loss": 0.0956, "lr": 1.646296840395421e-06, "epoch": 1.7434588229545844, "percentage": 34.87, "elapsed_time": "1:31:23", "remaining_time": "2:50:41", "throughput": 8772.83, "total_tokens": 48102560} +{"current_steps": 71370, "total_steps": 204665, "loss": 0.1285, "lr": 1.6462317638947846e-06, "epoch": 1.7435809737864316, "percentage": 34.87, "elapsed_time": "1:31:23", "remaining_time": "2:50:41", "throughput": 8772.88, "total_tokens": 48105824} +{"current_steps": 71375, "total_steps": 204665, "loss": 0.0709, "lr": 1.646166682694577e-06, "epoch": 1.7437031246182788, "percentage": 34.87, "elapsed_time": "1:31:23", "remaining_time": "2:50:40", "throughput": 8772.97, "total_tokens": 48109472} +{"current_steps": 71380, "total_steps": 204665, "loss": 0.08, "lr": 1.6461015967952717e-06, "epoch": 1.7438252754501258, "percentage": 34.88, "elapsed_time": "1:31:24", "remaining_time": "2:50:40", "throughput": 8772.99, "total_tokens": 48112608} +{"current_steps": 71385, "total_steps": 204665, "loss": 0.0549, "lr": 1.6460365061973418e-06, "epoch": 1.743947426281973, "percentage": 34.88, "elapsed_time": "1:31:24", "remaining_time": "2:50:39", "throughput": 8773.04, "total_tokens": 48115936} +{"current_steps": 71390, "total_steps": 204665, "loss": 0.0378, "lr": 1.6459714109012603e-06, "epoch": 1.7440695771138202, "percentage": 34.88, "elapsed_time": "1:31:24", "remaining_time": "2:50:39", "throughput": 8773.2, "total_tokens": 48119968} +{"current_steps": 71395, "total_steps": 204665, "loss": 0.1648, "lr": 1.6459063109075014e-06, "epoch": 1.7441917279456673, "percentage": 34.88, "elapsed_time": "1:31:25", "remaining_time": "2:50:39", "throughput": 8773.24, "total_tokens": 48123232} +{"current_steps": 71400, "total_steps": 204665, "loss": 0.1554, "lr": 1.6458412062165378e-06, "epoch": 1.7443138787775143, "percentage": 34.89, "elapsed_time": "1:31:25", "remaining_time": "2:50:38", "throughput": 8773.33, "total_tokens": 48126816} +{"current_steps": 71405, "total_steps": 204665, "loss": 0.1034, "lr": 1.6457760968288432e-06, "epoch": 1.7444360296093615, "percentage": 34.89, "elapsed_time": "1:31:25", "remaining_time": "2:50:38", "throughput": 8773.34, "total_tokens": 48129888} +{"current_steps": 71410, "total_steps": 204665, "loss": 0.1703, "lr": 1.6457109827448914e-06, "epoch": 1.7445581804412087, "percentage": 34.89, "elapsed_time": "1:31:26", "remaining_time": "2:50:37", "throughput": 8773.38, "total_tokens": 48133152} +{"current_steps": 71415, "total_steps": 204665, "loss": 0.1, "lr": 1.6456458639651553e-06, "epoch": 1.7446803312730559, "percentage": 34.89, "elapsed_time": "1:31:26", "remaining_time": "2:50:37", "throughput": 8773.44, "total_tokens": 48136480} +{"current_steps": 71420, "total_steps": 204665, "loss": 0.0925, "lr": 1.6455807404901093e-06, "epoch": 1.744802482104903, "percentage": 34.9, "elapsed_time": "1:31:26", "remaining_time": "2:50:36", "throughput": 8773.49, "total_tokens": 48139808} +{"current_steps": 71425, "total_steps": 204665, "loss": 0.0316, "lr": 1.6455156123202264e-06, "epoch": 1.7449246329367503, "percentage": 34.9, "elapsed_time": "1:31:27", "remaining_time": "2:50:36", "throughput": 8773.47, "total_tokens": 48142688} +{"current_steps": 71430, "total_steps": 204665, "loss": 0.0916, "lr": 1.64545047945598e-06, "epoch": 1.7450467837685975, "percentage": 34.9, "elapsed_time": "1:31:27", "remaining_time": "2:50:35", "throughput": 8773.55, "total_tokens": 48146144} +{"current_steps": 71435, "total_steps": 204665, "loss": 0.0066, "lr": 1.6453853418978444e-06, "epoch": 1.7451689346004446, "percentage": 34.9, "elapsed_time": "1:31:27", "remaining_time": "2:50:35", "throughput": 8773.6, "total_tokens": 48149472} +{"current_steps": 71440, "total_steps": 204665, "loss": 0.1033, "lr": 1.6453201996462928e-06, "epoch": 1.7452910854322918, "percentage": 34.91, "elapsed_time": "1:31:28", "remaining_time": "2:50:34", "throughput": 8773.62, "total_tokens": 48152608} +{"current_steps": 71445, "total_steps": 204665, "loss": 0.0029, "lr": 1.6452550527017994e-06, "epoch": 1.745413236264139, "percentage": 34.91, "elapsed_time": "1:31:28", "remaining_time": "2:50:34", "throughput": 8773.62, "total_tokens": 48155552} +{"current_steps": 71450, "total_steps": 204665, "loss": 0.0464, "lr": 1.6451899010648377e-06, "epoch": 1.7455353870959862, "percentage": 34.91, "elapsed_time": "1:31:29", "remaining_time": "2:50:34", "throughput": 8773.65, "total_tokens": 48158752} +{"current_steps": 71455, "total_steps": 204665, "loss": 0.1674, "lr": 1.6451247447358812e-06, "epoch": 1.7456575379278334, "percentage": 34.91, "elapsed_time": "1:31:29", "remaining_time": "2:50:33", "throughput": 8773.71, "total_tokens": 48162144} +{"current_steps": 71460, "total_steps": 204665, "loss": 0.0626, "lr": 1.6450595837154042e-06, "epoch": 1.7457796887596806, "percentage": 34.92, "elapsed_time": "1:31:29", "remaining_time": "2:50:33", "throughput": 8773.81, "total_tokens": 48165792} +{"current_steps": 71465, "total_steps": 204665, "loss": 0.0679, "lr": 1.6449944180038805e-06, "epoch": 1.7459018395915278, "percentage": 34.92, "elapsed_time": "1:31:30", "remaining_time": "2:50:32", "throughput": 8773.82, "total_tokens": 48168864} +{"current_steps": 71470, "total_steps": 204665, "loss": 0.0022, "lr": 1.6449292476017835e-06, "epoch": 1.7460239904233747, "percentage": 34.92, "elapsed_time": "1:31:30", "remaining_time": "2:50:32", "throughput": 8773.87, "total_tokens": 48172128} +{"current_steps": 71475, "total_steps": 204665, "loss": 0.1354, "lr": 1.6448640725095882e-06, "epoch": 1.746146141255222, "percentage": 34.92, "elapsed_time": "1:31:30", "remaining_time": "2:50:31", "throughput": 8773.9, "total_tokens": 48175328} +{"current_steps": 71480, "total_steps": 204665, "loss": 0.0472, "lr": 1.6447988927277674e-06, "epoch": 1.7462682920870691, "percentage": 34.93, "elapsed_time": "1:31:31", "remaining_time": "2:50:31", "throughput": 8773.97, "total_tokens": 48178848} +{"current_steps": 71485, "total_steps": 204665, "loss": 0.1125, "lr": 1.6447337082567958e-06, "epoch": 1.7463904429189163, "percentage": 34.93, "elapsed_time": "1:31:31", "remaining_time": "2:50:30", "throughput": 8774.06, "total_tokens": 48182432} +{"current_steps": 71490, "total_steps": 204665, "loss": 0.042, "lr": 1.6446685190971472e-06, "epoch": 1.7465125937507633, "percentage": 34.93, "elapsed_time": "1:31:31", "remaining_time": "2:50:30", "throughput": 8774.18, "total_tokens": 48186208} +{"current_steps": 71495, "total_steps": 204665, "loss": 0.1288, "lr": 1.6446033252492958e-06, "epoch": 1.7466347445826105, "percentage": 34.93, "elapsed_time": "1:31:32", "remaining_time": "2:50:29", "throughput": 8774.27, "total_tokens": 48189792} +{"current_steps": 71500, "total_steps": 204665, "loss": 0.0895, "lr": 1.6445381267137158e-06, "epoch": 1.7467568954144577, "percentage": 34.94, "elapsed_time": "1:31:32", "remaining_time": "2:50:29", "throughput": 8774.35, "total_tokens": 48193312} +{"current_steps": 71505, "total_steps": 204665, "loss": 0.1459, "lr": 1.644472923490881e-06, "epoch": 1.7468790462463049, "percentage": 34.94, "elapsed_time": "1:31:32", "remaining_time": "2:50:29", "throughput": 8774.36, "total_tokens": 48196384} +{"current_steps": 71510, "total_steps": 204665, "loss": 0.0364, "lr": 1.6444077155812656e-06, "epoch": 1.747001197078152, "percentage": 34.94, "elapsed_time": "1:31:33", "remaining_time": "2:50:28", "throughput": 8774.43, "total_tokens": 48199840} +{"current_steps": 71515, "total_steps": 204665, "loss": 0.003, "lr": 1.6443425029853442e-06, "epoch": 1.7471233479099992, "percentage": 34.94, "elapsed_time": "1:31:33", "remaining_time": "2:50:28", "throughput": 8774.55, "total_tokens": 48203616} +{"current_steps": 71520, "total_steps": 204665, "loss": 0.0822, "lr": 1.6442772857035906e-06, "epoch": 1.7472454987418464, "percentage": 34.94, "elapsed_time": "1:31:33", "remaining_time": "2:50:27", "throughput": 8774.58, "total_tokens": 48206816} +{"current_steps": 71525, "total_steps": 204665, "loss": 0.0369, "lr": 1.6442120637364796e-06, "epoch": 1.7473676495736936, "percentage": 34.95, "elapsed_time": "1:31:34", "remaining_time": "2:50:27", "throughput": 8774.68, "total_tokens": 48210464} +{"current_steps": 71530, "total_steps": 204665, "loss": 0.0411, "lr": 1.6441468370844848e-06, "epoch": 1.7474898004055408, "percentage": 34.95, "elapsed_time": "1:31:34", "remaining_time": "2:50:26", "throughput": 8774.78, "total_tokens": 48214112} +{"current_steps": 71535, "total_steps": 204665, "loss": 0.2124, "lr": 1.6440816057480812e-06, "epoch": 1.747611951237388, "percentage": 34.95, "elapsed_time": "1:31:34", "remaining_time": "2:50:26", "throughput": 8774.88, "total_tokens": 48217824} +{"current_steps": 71540, "total_steps": 204665, "loss": 0.0437, "lr": 1.6440163697277432e-06, "epoch": 1.7477341020692352, "percentage": 34.95, "elapsed_time": "1:31:35", "remaining_time": "2:50:25", "throughput": 8774.95, "total_tokens": 48221216} +{"current_steps": 71545, "total_steps": 204665, "loss": 0.1492, "lr": 1.6439511290239447e-06, "epoch": 1.7478562529010824, "percentage": 34.96, "elapsed_time": "1:31:35", "remaining_time": "2:50:25", "throughput": 8775.06, "total_tokens": 48224992} +{"current_steps": 71550, "total_steps": 204665, "loss": 0.0295, "lr": 1.6438858836371604e-06, "epoch": 1.7479784037329296, "percentage": 34.96, "elapsed_time": "1:31:36", "remaining_time": "2:50:25", "throughput": 8775.12, "total_tokens": 48228384} +{"current_steps": 71555, "total_steps": 204665, "loss": 0.0376, "lr": 1.6438206335678647e-06, "epoch": 1.7481005545647765, "percentage": 34.96, "elapsed_time": "1:31:36", "remaining_time": "2:50:24", "throughput": 8775.14, "total_tokens": 48231584} +{"current_steps": 71560, "total_steps": 204665, "loss": 0.0459, "lr": 1.6437553788165319e-06, "epoch": 1.7482227053966237, "percentage": 34.96, "elapsed_time": "1:31:36", "remaining_time": "2:50:24", "throughput": 8775.17, "total_tokens": 48234720} +{"current_steps": 71565, "total_steps": 204665, "loss": 0.0754, "lr": 1.6436901193836372e-06, "epoch": 1.748344856228471, "percentage": 34.97, "elapsed_time": "1:31:37", "remaining_time": "2:50:23", "throughput": 8775.26, "total_tokens": 48238368} +{"current_steps": 71570, "total_steps": 204665, "loss": 0.06, "lr": 1.6436248552696547e-06, "epoch": 1.748467007060318, "percentage": 34.97, "elapsed_time": "1:31:37", "remaining_time": "2:50:23", "throughput": 8775.32, "total_tokens": 48241760} +{"current_steps": 71575, "total_steps": 204665, "loss": 0.2301, "lr": 1.6435595864750592e-06, "epoch": 1.7485891578921653, "percentage": 34.97, "elapsed_time": "1:31:37", "remaining_time": "2:50:22", "throughput": 8775.41, "total_tokens": 48245344} +{"current_steps": 71580, "total_steps": 204665, "loss": 0.0589, "lr": 1.6434943130003253e-06, "epoch": 1.7487113087240123, "percentage": 34.97, "elapsed_time": "1:31:38", "remaining_time": "2:50:22", "throughput": 8775.51, "total_tokens": 48248992} +{"current_steps": 71585, "total_steps": 204665, "loss": 0.0801, "lr": 1.6434290348459279e-06, "epoch": 1.7488334595558594, "percentage": 34.98, "elapsed_time": "1:31:38", "remaining_time": "2:50:21", "throughput": 8775.53, "total_tokens": 48252128} +{"current_steps": 71590, "total_steps": 204665, "loss": 0.0718, "lr": 1.643363752012341e-06, "epoch": 1.7489556103877066, "percentage": 34.98, "elapsed_time": "1:31:38", "remaining_time": "2:50:21", "throughput": 8775.58, "total_tokens": 48255456} +{"current_steps": 71595, "total_steps": 204665, "loss": 0.2395, "lr": 1.6432984645000403e-06, "epoch": 1.7490777612195538, "percentage": 34.98, "elapsed_time": "1:31:39", "remaining_time": "2:50:21", "throughput": 8775.58, "total_tokens": 48258464} +{"current_steps": 71600, "total_steps": 204665, "loss": 0.0602, "lr": 1.6432331723095e-06, "epoch": 1.749199912051401, "percentage": 34.98, "elapsed_time": "1:31:39", "remaining_time": "2:50:20", "throughput": 8775.62, "total_tokens": 48261728} +{"current_steps": 71605, "total_steps": 204665, "loss": 0.1416, "lr": 1.6431678754411951e-06, "epoch": 1.7493220628832482, "percentage": 34.99, "elapsed_time": "1:31:39", "remaining_time": "2:50:20", "throughput": 8775.7, "total_tokens": 48265248} +{"current_steps": 71610, "total_steps": 204665, "loss": 0.1015, "lr": 1.6431025738956002e-06, "epoch": 1.7494442137150954, "percentage": 34.99, "elapsed_time": "1:31:40", "remaining_time": "2:50:19", "throughput": 8775.77, "total_tokens": 48268768} +{"current_steps": 71615, "total_steps": 204665, "loss": 0.15, "lr": 1.6430372676731904e-06, "epoch": 1.7495663645469426, "percentage": 34.99, "elapsed_time": "1:31:40", "remaining_time": "2:50:19", "throughput": 8775.82, "total_tokens": 48272096} +{"current_steps": 71620, "total_steps": 204665, "loss": 0.0779, "lr": 1.6429719567744406e-06, "epoch": 1.7496885153787898, "percentage": 34.99, "elapsed_time": "1:31:40", "remaining_time": "2:50:18", "throughput": 8775.89, "total_tokens": 48275552} +{"current_steps": 71625, "total_steps": 204665, "loss": 0.1512, "lr": 1.6429066411998261e-06, "epoch": 1.749810666210637, "percentage": 35.0, "elapsed_time": "1:31:41", "remaining_time": "2:50:18", "throughput": 8775.95, "total_tokens": 48278944} +{"current_steps": 71630, "total_steps": 204665, "loss": 0.1058, "lr": 1.6428413209498216e-06, "epoch": 1.7499328170424842, "percentage": 35.0, "elapsed_time": "1:31:41", "remaining_time": "2:50:17", "throughput": 8775.98, "total_tokens": 48282080} +{"current_steps": 71635, "total_steps": 204665, "loss": 0.1097, "lr": 1.6427759960249018e-06, "epoch": 1.7500549678743313, "percentage": 35.0, "elapsed_time": "1:31:41", "remaining_time": "2:50:17", "throughput": 8776.01, "total_tokens": 48285280} +{"current_steps": 71638, "total_steps": 204665, "eval_loss": 0.12312835454940796, "epoch": 1.7501282583734397, "percentage": 35.0, "elapsed_time": "1:32:29", "remaining_time": "2:51:45", "throughput": 8700.87, "total_tokens": 48287456} +{"current_steps": 71640, "total_steps": 204665, "loss": 0.0614, "lr": 1.6427106664255423e-06, "epoch": 1.7501771187061785, "percentage": 35.0, "elapsed_time": "1:33:20", "remaining_time": "2:53:18", "throughput": 8622.65, "total_tokens": 48288800} +{"current_steps": 71645, "total_steps": 204665, "loss": 0.0532, "lr": 1.642645332152218e-06, "epoch": 1.7502992695380255, "percentage": 35.01, "elapsed_time": "1:33:20", "remaining_time": "2:53:18", "throughput": 8622.65, "total_tokens": 48291744} +{"current_steps": 71650, "total_steps": 204665, "loss": 0.0864, "lr": 1.6425799932054037e-06, "epoch": 1.7504214203698727, "percentage": 35.01, "elapsed_time": "1:33:20", "remaining_time": "2:53:17", "throughput": 8622.74, "total_tokens": 48295264} +{"current_steps": 71655, "total_steps": 204665, "loss": 0.1333, "lr": 1.642514649585575e-06, "epoch": 1.7505435712017199, "percentage": 35.01, "elapsed_time": "1:33:21", "remaining_time": "2:53:17", "throughput": 8622.77, "total_tokens": 48298400} +{"current_steps": 71660, "total_steps": 204665, "loss": 0.1662, "lr": 1.6424493012932072e-06, "epoch": 1.750665722033567, "percentage": 35.01, "elapsed_time": "1:33:21", "remaining_time": "2:53:16", "throughput": 8622.82, "total_tokens": 48301600} +{"current_steps": 71665, "total_steps": 204665, "loss": 0.0255, "lr": 1.6423839483287751e-06, "epoch": 1.7507878728654143, "percentage": 35.02, "elapsed_time": "1:33:21", "remaining_time": "2:53:16", "throughput": 8622.93, "total_tokens": 48305312} +{"current_steps": 71670, "total_steps": 204665, "loss": 0.0415, "lr": 1.6423185906927542e-06, "epoch": 1.7509100236972612, "percentage": 35.02, "elapsed_time": "1:33:22", "remaining_time": "2:53:15", "throughput": 8622.97, "total_tokens": 48308448} +{"current_steps": 71675, "total_steps": 204665, "loss": 0.0536, "lr": 1.6422532283856195e-06, "epoch": 1.7510321745291084, "percentage": 35.02, "elapsed_time": "1:33:22", "remaining_time": "2:53:15", "throughput": 8623.05, "total_tokens": 48311968} +{"current_steps": 71680, "total_steps": 204665, "loss": 0.0186, "lr": 1.6421878614078466e-06, "epoch": 1.7511543253609556, "percentage": 35.02, "elapsed_time": "1:33:23", "remaining_time": "2:53:15", "throughput": 8623.13, "total_tokens": 48315424} +{"current_steps": 71685, "total_steps": 204665, "loss": 0.0714, "lr": 1.642122489759911e-06, "epoch": 1.7512764761928028, "percentage": 35.03, "elapsed_time": "1:33:23", "remaining_time": "2:53:14", "throughput": 8623.23, "total_tokens": 48319008} +{"current_steps": 71690, "total_steps": 204665, "loss": 0.1259, "lr": 1.642057113442288e-06, "epoch": 1.75139862702465, "percentage": 35.03, "elapsed_time": "1:33:23", "remaining_time": "2:53:14", "throughput": 8623.41, "total_tokens": 48323168} +{"current_steps": 71695, "total_steps": 204665, "loss": 0.0668, "lr": 1.641991732455453e-06, "epoch": 1.7515207778564972, "percentage": 35.03, "elapsed_time": "1:33:24", "remaining_time": "2:53:13", "throughput": 8623.5, "total_tokens": 48326752} +{"current_steps": 71700, "total_steps": 204665, "loss": 0.0899, "lr": 1.6419263467998813e-06, "epoch": 1.7516429286883444, "percentage": 35.03, "elapsed_time": "1:33:24", "remaining_time": "2:53:13", "throughput": 8623.57, "total_tokens": 48330144} +{"current_steps": 71705, "total_steps": 204665, "loss": 0.1817, "lr": 1.6418609564760485e-06, "epoch": 1.7517650795201916, "percentage": 35.04, "elapsed_time": "1:33:24", "remaining_time": "2:53:12", "throughput": 8623.61, "total_tokens": 48333280} +{"current_steps": 71710, "total_steps": 204665, "loss": 0.1945, "lr": 1.6417955614844304e-06, "epoch": 1.7518872303520387, "percentage": 35.04, "elapsed_time": "1:33:25", "remaining_time": "2:53:12", "throughput": 8623.65, "total_tokens": 48336544} +{"current_steps": 71715, "total_steps": 204665, "loss": 0.0362, "lr": 1.6417301618255021e-06, "epoch": 1.752009381183886, "percentage": 35.04, "elapsed_time": "1:33:25", "remaining_time": "2:53:11", "throughput": 8623.69, "total_tokens": 48339744} +{"current_steps": 71720, "total_steps": 204665, "loss": 0.0275, "lr": 1.6416647574997397e-06, "epoch": 1.7521315320157331, "percentage": 35.04, "elapsed_time": "1:33:25", "remaining_time": "2:53:11", "throughput": 8623.76, "total_tokens": 48343200} +{"current_steps": 71725, "total_steps": 204665, "loss": 0.0569, "lr": 1.6415993485076184e-06, "epoch": 1.7522536828475803, "percentage": 35.05, "elapsed_time": "1:33:26", "remaining_time": "2:53:10", "throughput": 8623.87, "total_tokens": 48346848} +{"current_steps": 71730, "total_steps": 204665, "loss": 0.002, "lr": 1.6415339348496144e-06, "epoch": 1.7523758336794275, "percentage": 35.05, "elapsed_time": "1:33:26", "remaining_time": "2:53:10", "throughput": 8623.98, "total_tokens": 48350560} +{"current_steps": 71735, "total_steps": 204665, "loss": 0.1212, "lr": 1.6414685165262027e-06, "epoch": 1.7524979845112745, "percentage": 35.05, "elapsed_time": "1:33:26", "remaining_time": "2:53:09", "throughput": 8624.06, "total_tokens": 48354016} +{"current_steps": 71740, "total_steps": 204665, "loss": 0.1804, "lr": 1.6414030935378597e-06, "epoch": 1.7526201353431217, "percentage": 35.05, "elapsed_time": "1:33:27", "remaining_time": "2:53:09", "throughput": 8624.17, "total_tokens": 48357728} +{"current_steps": 71745, "total_steps": 204665, "loss": 0.0572, "lr": 1.6413376658850607e-06, "epoch": 1.7527422861749689, "percentage": 35.05, "elapsed_time": "1:33:27", "remaining_time": "2:53:09", "throughput": 8624.26, "total_tokens": 48361248} +{"current_steps": 71750, "total_steps": 204665, "loss": 0.0051, "lr": 1.6412722335682818e-06, "epoch": 1.752864437006816, "percentage": 35.06, "elapsed_time": "1:33:27", "remaining_time": "2:53:08", "throughput": 8624.33, "total_tokens": 48364704} +{"current_steps": 71755, "total_steps": 204665, "loss": 0.096, "lr": 1.6412067965879986e-06, "epoch": 1.7529865878386632, "percentage": 35.06, "elapsed_time": "1:33:28", "remaining_time": "2:53:08", "throughput": 8624.37, "total_tokens": 48367840} +{"current_steps": 71760, "total_steps": 204665, "loss": 0.0214, "lr": 1.6411413549446873e-06, "epoch": 1.7531087386705102, "percentage": 35.06, "elapsed_time": "1:33:28", "remaining_time": "2:53:07", "throughput": 8624.48, "total_tokens": 48371552} +{"current_steps": 71765, "total_steps": 204665, "loss": 0.0951, "lr": 1.6410759086388235e-06, "epoch": 1.7532308895023574, "percentage": 35.06, "elapsed_time": "1:33:28", "remaining_time": "2:53:07", "throughput": 8624.5, "total_tokens": 48374624} +{"current_steps": 71770, "total_steps": 204665, "loss": 0.0808, "lr": 1.6410104576708835e-06, "epoch": 1.7533530403342046, "percentage": 35.07, "elapsed_time": "1:33:29", "remaining_time": "2:53:06", "throughput": 8624.6, "total_tokens": 48378208} +{"current_steps": 71775, "total_steps": 204665, "loss": 0.1468, "lr": 1.6409450020413424e-06, "epoch": 1.7534751911660518, "percentage": 35.07, "elapsed_time": "1:33:29", "remaining_time": "2:53:06", "throughput": 8624.65, "total_tokens": 48381536} +{"current_steps": 71780, "total_steps": 204665, "loss": 0.1083, "lr": 1.6408795417506773e-06, "epoch": 1.753597341997899, "percentage": 35.07, "elapsed_time": "1:33:30", "remaining_time": "2:53:05", "throughput": 8624.79, "total_tokens": 48385440} +{"current_steps": 71785, "total_steps": 204665, "loss": 0.0812, "lr": 1.6408140767993639e-06, "epoch": 1.7537194928297462, "percentage": 35.07, "elapsed_time": "1:33:30", "remaining_time": "2:53:05", "throughput": 8624.83, "total_tokens": 48388640} +{"current_steps": 71790, "total_steps": 204665, "loss": 0.155, "lr": 1.640748607187878e-06, "epoch": 1.7538416436615933, "percentage": 35.08, "elapsed_time": "1:33:30", "remaining_time": "2:53:04", "throughput": 8624.91, "total_tokens": 48392096} +{"current_steps": 71795, "total_steps": 204665, "loss": 0.0755, "lr": 1.640683132916696e-06, "epoch": 1.7539637944934405, "percentage": 35.08, "elapsed_time": "1:33:31", "remaining_time": "2:53:04", "throughput": 8624.99, "total_tokens": 48395552} +{"current_steps": 71800, "total_steps": 204665, "loss": 0.1561, "lr": 1.6406176539862936e-06, "epoch": 1.7540859453252877, "percentage": 35.08, "elapsed_time": "1:33:31", "remaining_time": "2:53:03", "throughput": 8625.03, "total_tokens": 48398816} +{"current_steps": 71805, "total_steps": 204665, "loss": 0.1175, "lr": 1.6405521703971476e-06, "epoch": 1.754208096157135, "percentage": 35.08, "elapsed_time": "1:33:31", "remaining_time": "2:53:03", "throughput": 8625.13, "total_tokens": 48402400} +{"current_steps": 71810, "total_steps": 204665, "loss": 0.0259, "lr": 1.640486682149734e-06, "epoch": 1.754330246988982, "percentage": 35.09, "elapsed_time": "1:33:32", "remaining_time": "2:53:02", "throughput": 8625.19, "total_tokens": 48405728} +{"current_steps": 71815, "total_steps": 204665, "loss": 0.1611, "lr": 1.6404211892445288e-06, "epoch": 1.7544523978208293, "percentage": 35.09, "elapsed_time": "1:33:32", "remaining_time": "2:53:02", "throughput": 8625.21, "total_tokens": 48408800} +{"current_steps": 71820, "total_steps": 204665, "loss": 0.0316, "lr": 1.6403556916820088e-06, "epoch": 1.7545745486526765, "percentage": 35.09, "elapsed_time": "1:33:32", "remaining_time": "2:53:01", "throughput": 8625.25, "total_tokens": 48412000} +{"current_steps": 71825, "total_steps": 204665, "loss": 0.0616, "lr": 1.6402901894626497e-06, "epoch": 1.7546966994845234, "percentage": 35.09, "elapsed_time": "1:33:33", "remaining_time": "2:53:01", "throughput": 8625.35, "total_tokens": 48415584} +{"current_steps": 71830, "total_steps": 204665, "loss": 0.0016, "lr": 1.6402246825869281e-06, "epoch": 1.7548188503163706, "percentage": 35.1, "elapsed_time": "1:33:33", "remaining_time": "2:53:01", "throughput": 8625.41, "total_tokens": 48418976} +{"current_steps": 71835, "total_steps": 204665, "loss": 0.0401, "lr": 1.6401591710553201e-06, "epoch": 1.7549410011482178, "percentage": 35.1, "elapsed_time": "1:33:33", "remaining_time": "2:53:00", "throughput": 8625.47, "total_tokens": 48422240} +{"current_steps": 71840, "total_steps": 204665, "loss": 0.0016, "lr": 1.6400936548683028e-06, "epoch": 1.755063151980065, "percentage": 35.1, "elapsed_time": "1:33:34", "remaining_time": "2:53:00", "throughput": 8625.53, "total_tokens": 48425632} +{"current_steps": 71845, "total_steps": 204665, "loss": 0.0936, "lr": 1.6400281340263524e-06, "epoch": 1.755185302811912, "percentage": 35.1, "elapsed_time": "1:33:34", "remaining_time": "2:52:59", "throughput": 8625.52, "total_tokens": 48428448} +{"current_steps": 71850, "total_steps": 204665, "loss": 0.0006, "lr": 1.6399626085299452e-06, "epoch": 1.7553074536437592, "percentage": 35.11, "elapsed_time": "1:33:34", "remaining_time": "2:52:59", "throughput": 8625.67, "total_tokens": 48432416} +{"current_steps": 71855, "total_steps": 204665, "loss": 0.1226, "lr": 1.6398970783795577e-06, "epoch": 1.7554296044756064, "percentage": 35.11, "elapsed_time": "1:33:35", "remaining_time": "2:52:58", "throughput": 8625.78, "total_tokens": 48436128} +{"current_steps": 71860, "total_steps": 204665, "loss": 0.1049, "lr": 1.6398315435756666e-06, "epoch": 1.7555517553074536, "percentage": 35.11, "elapsed_time": "1:33:35", "remaining_time": "2:52:58", "throughput": 8625.85, "total_tokens": 48439520} +{"current_steps": 71865, "total_steps": 204665, "loss": 0.0826, "lr": 1.6397660041187482e-06, "epoch": 1.7556739061393007, "percentage": 35.11, "elapsed_time": "1:33:35", "remaining_time": "2:52:57", "throughput": 8625.83, "total_tokens": 48442272} +{"current_steps": 71870, "total_steps": 204665, "loss": 0.0228, "lr": 1.6397004600092794e-06, "epoch": 1.755796056971148, "percentage": 35.12, "elapsed_time": "1:33:36", "remaining_time": "2:52:57", "throughput": 8625.87, "total_tokens": 48445472} +{"current_steps": 71875, "total_steps": 204665, "loss": 0.0483, "lr": 1.639634911247737e-06, "epoch": 1.7559182078029951, "percentage": 35.12, "elapsed_time": "1:33:36", "remaining_time": "2:52:56", "throughput": 8625.94, "total_tokens": 48448928} +{"current_steps": 71880, "total_steps": 204665, "loss": 0.1246, "lr": 1.6395693578345973e-06, "epoch": 1.7560403586348423, "percentage": 35.12, "elapsed_time": "1:33:37", "remaining_time": "2:52:56", "throughput": 8626.05, "total_tokens": 48452576} +{"current_steps": 71885, "total_steps": 204665, "loss": 0.0114, "lr": 1.6395037997703373e-06, "epoch": 1.7561625094666895, "percentage": 35.12, "elapsed_time": "1:33:37", "remaining_time": "2:52:55", "throughput": 8626.09, "total_tokens": 48455840} +{"current_steps": 71890, "total_steps": 204665, "loss": 0.0011, "lr": 1.6394382370554337e-06, "epoch": 1.7562846602985367, "percentage": 35.13, "elapsed_time": "1:33:37", "remaining_time": "2:52:55", "throughput": 8626.11, "total_tokens": 48458848} +{"current_steps": 71895, "total_steps": 204665, "loss": 0.002, "lr": 1.6393726696903634e-06, "epoch": 1.7564068111303839, "percentage": 35.13, "elapsed_time": "1:33:38", "remaining_time": "2:52:54", "throughput": 8626.18, "total_tokens": 48462304} +{"current_steps": 71900, "total_steps": 204665, "loss": 0.1411, "lr": 1.6393070976756027e-06, "epoch": 1.756528961962231, "percentage": 35.13, "elapsed_time": "1:33:38", "remaining_time": "2:52:54", "throughput": 8626.27, "total_tokens": 48465888} +{"current_steps": 71905, "total_steps": 204665, "loss": 0.0123, "lr": 1.639241521011629e-06, "epoch": 1.7566511127940783, "percentage": 35.13, "elapsed_time": "1:33:38", "remaining_time": "2:52:54", "throughput": 8626.33, "total_tokens": 48469152} +{"current_steps": 71910, "total_steps": 204665, "loss": 0.0587, "lr": 1.6391759396989188e-06, "epoch": 1.7567732636259255, "percentage": 35.14, "elapsed_time": "1:33:39", "remaining_time": "2:52:53", "throughput": 8626.38, "total_tokens": 48472480} +{"current_steps": 71915, "total_steps": 204665, "loss": 0.0357, "lr": 1.6391103537379496e-06, "epoch": 1.7568954144577724, "percentage": 35.14, "elapsed_time": "1:33:39", "remaining_time": "2:52:53", "throughput": 8626.49, "total_tokens": 48476128} +{"current_steps": 71920, "total_steps": 204665, "loss": 0.3049, "lr": 1.639044763129198e-06, "epoch": 1.7570175652896196, "percentage": 35.14, "elapsed_time": "1:33:39", "remaining_time": "2:52:52", "throughput": 8626.54, "total_tokens": 48479392} +{"current_steps": 71925, "total_steps": 204665, "loss": 0.0465, "lr": 1.638979167873141e-06, "epoch": 1.7571397161214668, "percentage": 35.14, "elapsed_time": "1:33:40", "remaining_time": "2:52:52", "throughput": 8626.63, "total_tokens": 48482976} +{"current_steps": 71930, "total_steps": 204665, "loss": 0.0277, "lr": 1.6389135679702554e-06, "epoch": 1.757261866953314, "percentage": 35.15, "elapsed_time": "1:33:40", "remaining_time": "2:52:51", "throughput": 8626.64, "total_tokens": 48486048} +{"current_steps": 71935, "total_steps": 204665, "loss": 0.0618, "lr": 1.6388479634210187e-06, "epoch": 1.757384017785161, "percentage": 35.15, "elapsed_time": "1:33:40", "remaining_time": "2:52:51", "throughput": 8626.66, "total_tokens": 48489120} +{"current_steps": 71940, "total_steps": 204665, "loss": 0.0703, "lr": 1.6387823542259075e-06, "epoch": 1.7575061686170081, "percentage": 35.15, "elapsed_time": "1:33:41", "remaining_time": "2:52:50", "throughput": 8626.71, "total_tokens": 48492448} +{"current_steps": 71945, "total_steps": 204665, "loss": 0.0019, "lr": 1.6387167403853994e-06, "epoch": 1.7576283194488553, "percentage": 35.15, "elapsed_time": "1:33:41", "remaining_time": "2:52:50", "throughput": 8626.82, "total_tokens": 48496096} +{"current_steps": 71950, "total_steps": 204665, "loss": 0.1197, "lr": 1.6386511218999714e-06, "epoch": 1.7577504702807025, "percentage": 35.16, "elapsed_time": "1:33:41", "remaining_time": "2:52:49", "throughput": 8626.9, "total_tokens": 48499680} +{"current_steps": 71955, "total_steps": 204665, "loss": 0.2407, "lr": 1.6385854987701007e-06, "epoch": 1.7578726211125497, "percentage": 35.16, "elapsed_time": "1:33:42", "remaining_time": "2:52:49", "throughput": 8627.03, "total_tokens": 48503584} +{"current_steps": 71960, "total_steps": 204665, "loss": 0.0942, "lr": 1.6385198709962642e-06, "epoch": 1.757994771944397, "percentage": 35.16, "elapsed_time": "1:33:42", "remaining_time": "2:52:48", "throughput": 8627.08, "total_tokens": 48506912} +{"current_steps": 71965, "total_steps": 204665, "loss": 0.2849, "lr": 1.6384542385789397e-06, "epoch": 1.758116922776244, "percentage": 35.16, "elapsed_time": "1:33:42", "remaining_time": "2:52:48", "throughput": 8627.07, "total_tokens": 48509856} +{"current_steps": 71970, "total_steps": 204665, "loss": 0.0458, "lr": 1.638388601518604e-06, "epoch": 1.7582390736080913, "percentage": 35.16, "elapsed_time": "1:33:43", "remaining_time": "2:52:48", "throughput": 8627.11, "total_tokens": 48513120} +{"current_steps": 71975, "total_steps": 204665, "loss": 0.0368, "lr": 1.6383229598157353e-06, "epoch": 1.7583612244399385, "percentage": 35.17, "elapsed_time": "1:33:43", "remaining_time": "2:52:47", "throughput": 8627.13, "total_tokens": 48516320} +{"current_steps": 71980, "total_steps": 204665, "loss": 0.0925, "lr": 1.63825731347081e-06, "epoch": 1.7584833752717857, "percentage": 35.17, "elapsed_time": "1:33:44", "remaining_time": "2:52:47", "throughput": 8627.24, "total_tokens": 48520032} +{"current_steps": 71985, "total_steps": 204665, "loss": 0.1507, "lr": 1.6381916624843058e-06, "epoch": 1.7586055261036329, "percentage": 35.17, "elapsed_time": "1:33:44", "remaining_time": "2:52:46", "throughput": 8627.26, "total_tokens": 48523168} +{"current_steps": 71990, "total_steps": 204665, "loss": 0.0392, "lr": 1.6381260068567e-06, "epoch": 1.75872767693548, "percentage": 35.17, "elapsed_time": "1:33:44", "remaining_time": "2:52:46", "throughput": 8627.32, "total_tokens": 48526560} +{"current_steps": 71995, "total_steps": 204665, "loss": 0.0382, "lr": 1.6380603465884706e-06, "epoch": 1.7588498277673272, "percentage": 35.18, "elapsed_time": "1:33:45", "remaining_time": "2:52:45", "throughput": 8627.33, "total_tokens": 48529632} +{"current_steps": 72000, "total_steps": 204665, "loss": 0.1274, "lr": 1.6379946816800945e-06, "epoch": 1.7589719785991744, "percentage": 35.18, "elapsed_time": "1:33:45", "remaining_time": "2:52:45", "throughput": 8627.42, "total_tokens": 48533216} +{"current_steps": 72005, "total_steps": 204665, "loss": 0.1549, "lr": 1.6379290121320495e-06, "epoch": 1.7590941294310214, "percentage": 35.18, "elapsed_time": "1:33:45", "remaining_time": "2:52:44", "throughput": 8627.42, "total_tokens": 48536224} +{"current_steps": 72010, "total_steps": 204665, "loss": 0.1014, "lr": 1.6378633379448133e-06, "epoch": 1.7592162802628686, "percentage": 35.18, "elapsed_time": "1:33:46", "remaining_time": "2:52:44", "throughput": 8627.47, "total_tokens": 48539552} +{"current_steps": 72015, "total_steps": 204665, "loss": 0.1593, "lr": 1.637797659118863e-06, "epoch": 1.7593384310947158, "percentage": 35.19, "elapsed_time": "1:33:46", "remaining_time": "2:52:43", "throughput": 8627.52, "total_tokens": 48542880} +{"current_steps": 72020, "total_steps": 204665, "loss": 0.0546, "lr": 1.6377319756546771e-06, "epoch": 1.759460581926563, "percentage": 35.19, "elapsed_time": "1:33:46", "remaining_time": "2:52:43", "throughput": 8627.54, "total_tokens": 48546016} +{"current_steps": 72025, "total_steps": 204665, "loss": 0.0447, "lr": 1.637666287552732e-06, "epoch": 1.75958273275841, "percentage": 35.19, "elapsed_time": "1:33:47", "remaining_time": "2:52:42", "throughput": 8627.6, "total_tokens": 48549408} +{"current_steps": 72030, "total_steps": 204665, "loss": 0.0023, "lr": 1.6376005948135068e-06, "epoch": 1.7597048835902571, "percentage": 35.19, "elapsed_time": "1:33:47", "remaining_time": "2:52:42", "throughput": 8627.63, "total_tokens": 48552672} +{"current_steps": 72035, "total_steps": 204665, "loss": 0.04, "lr": 1.6375348974374784e-06, "epoch": 1.7598270344221043, "percentage": 35.2, "elapsed_time": "1:33:47", "remaining_time": "2:52:42", "throughput": 8627.73, "total_tokens": 48556384} +{"current_steps": 72040, "total_steps": 204665, "loss": 0.1235, "lr": 1.6374691954251247e-06, "epoch": 1.7599491852539515, "percentage": 35.2, "elapsed_time": "1:33:48", "remaining_time": "2:52:41", "throughput": 8627.73, "total_tokens": 48559392} +{"current_steps": 72045, "total_steps": 204665, "loss": 0.0937, "lr": 1.6374034887769238e-06, "epoch": 1.7600713360857987, "percentage": 35.2, "elapsed_time": "1:33:48", "remaining_time": "2:52:41", "throughput": 8627.76, "total_tokens": 48562592} +{"current_steps": 72050, "total_steps": 204665, "loss": 0.0362, "lr": 1.6373377774933528e-06, "epoch": 1.7601934869176459, "percentage": 35.2, "elapsed_time": "1:33:48", "remaining_time": "2:52:40", "throughput": 8627.75, "total_tokens": 48565536} +{"current_steps": 72055, "total_steps": 204665, "loss": 0.089, "lr": 1.6372720615748903e-06, "epoch": 1.760315637749493, "percentage": 35.21, "elapsed_time": "1:33:49", "remaining_time": "2:52:40", "throughput": 8627.83, "total_tokens": 48569120} +{"current_steps": 72060, "total_steps": 204665, "loss": 0.0921, "lr": 1.637206341022014e-06, "epoch": 1.7604377885813403, "percentage": 35.21, "elapsed_time": "1:33:49", "remaining_time": "2:52:39", "throughput": 8627.87, "total_tokens": 48572384} +{"current_steps": 72065, "total_steps": 204665, "loss": 0.0427, "lr": 1.6371406158352016e-06, "epoch": 1.7605599394131874, "percentage": 35.21, "elapsed_time": "1:33:50", "remaining_time": "2:52:39", "throughput": 8628.02, "total_tokens": 48576416} +{"current_steps": 72070, "total_steps": 204665, "loss": 0.1243, "lr": 1.6370748860149316e-06, "epoch": 1.7606820902450346, "percentage": 35.21, "elapsed_time": "1:33:50", "remaining_time": "2:52:38", "throughput": 8627.99, "total_tokens": 48579232} +{"current_steps": 72075, "total_steps": 204665, "loss": 0.0553, "lr": 1.6370091515616817e-06, "epoch": 1.7608042410768818, "percentage": 35.22, "elapsed_time": "1:33:50", "remaining_time": "2:52:38", "throughput": 8628.02, "total_tokens": 48582432} +{"current_steps": 72080, "total_steps": 204665, "loss": 0.1632, "lr": 1.63694341247593e-06, "epoch": 1.760926391908729, "percentage": 35.22, "elapsed_time": "1:33:51", "remaining_time": "2:52:37", "throughput": 8628.11, "total_tokens": 48586016} +{"current_steps": 72085, "total_steps": 204665, "loss": 0.1723, "lr": 1.6368776687581538e-06, "epoch": 1.7610485427405762, "percentage": 35.22, "elapsed_time": "1:33:51", "remaining_time": "2:52:37", "throughput": 8628.15, "total_tokens": 48589344} +{"current_steps": 72090, "total_steps": 204665, "loss": 0.1579, "lr": 1.6368119204088323e-06, "epoch": 1.7611706935724232, "percentage": 35.22, "elapsed_time": "1:33:51", "remaining_time": "2:52:37", "throughput": 8628.2, "total_tokens": 48592672} +{"current_steps": 72095, "total_steps": 204665, "loss": 0.0904, "lr": 1.6367461674284432e-06, "epoch": 1.7612928444042704, "percentage": 35.23, "elapsed_time": "1:33:52", "remaining_time": "2:52:36", "throughput": 8628.19, "total_tokens": 48595552} +{"current_steps": 72100, "total_steps": 204665, "loss": 0.1016, "lr": 1.6366804098174648e-06, "epoch": 1.7614149952361176, "percentage": 35.23, "elapsed_time": "1:33:52", "remaining_time": "2:52:36", "throughput": 8628.22, "total_tokens": 48598688} +{"current_steps": 72105, "total_steps": 204665, "loss": 0.0079, "lr": 1.6366146475763754e-06, "epoch": 1.7615371460679647, "percentage": 35.23, "elapsed_time": "1:33:52", "remaining_time": "2:52:35", "throughput": 8628.24, "total_tokens": 48601824} +{"current_steps": 72110, "total_steps": 204665, "loss": 0.0842, "lr": 1.6365488807056528e-06, "epoch": 1.761659296899812, "percentage": 35.23, "elapsed_time": "1:33:53", "remaining_time": "2:52:35", "throughput": 8628.26, "total_tokens": 48604896} +{"current_steps": 72115, "total_steps": 204665, "loss": 0.0733, "lr": 1.6364831092057752e-06, "epoch": 1.761781447731659, "percentage": 35.24, "elapsed_time": "1:33:53", "remaining_time": "2:52:34", "throughput": 8628.46, "total_tokens": 48609248} +{"current_steps": 72120, "total_steps": 204665, "loss": 0.0018, "lr": 1.6364173330772217e-06, "epoch": 1.761903598563506, "percentage": 35.24, "elapsed_time": "1:33:53", "remaining_time": "2:52:34", "throughput": 8628.46, "total_tokens": 48612256} +{"current_steps": 72125, "total_steps": 204665, "loss": 0.0627, "lr": 1.63635155232047e-06, "epoch": 1.7620257493953533, "percentage": 35.24, "elapsed_time": "1:33:54", "remaining_time": "2:52:33", "throughput": 8628.51, "total_tokens": 48615584} +{"current_steps": 72130, "total_steps": 204665, "loss": 0.0468, "lr": 1.636285766935999e-06, "epoch": 1.7621479002272005, "percentage": 35.24, "elapsed_time": "1:33:54", "remaining_time": "2:52:33", "throughput": 8628.57, "total_tokens": 48618976} +{"current_steps": 72135, "total_steps": 204665, "loss": 0.0157, "lr": 1.6362199769242863e-06, "epoch": 1.7622700510590477, "percentage": 35.25, "elapsed_time": "1:33:55", "remaining_time": "2:52:32", "throughput": 8628.69, "total_tokens": 48622816} +{"current_steps": 72140, "total_steps": 204665, "loss": 0.0765, "lr": 1.636154182285811e-06, "epoch": 1.7623922018908948, "percentage": 35.25, "elapsed_time": "1:33:55", "remaining_time": "2:52:32", "throughput": 8628.68, "total_tokens": 48625760} +{"current_steps": 72145, "total_steps": 204665, "loss": 0.1066, "lr": 1.6360883830210515e-06, "epoch": 1.762514352722742, "percentage": 35.25, "elapsed_time": "1:33:55", "remaining_time": "2:52:31", "throughput": 8628.7, "total_tokens": 48628832} +{"current_steps": 72150, "total_steps": 204665, "loss": 0.1817, "lr": 1.636022579130486e-06, "epoch": 1.7626365035545892, "percentage": 35.25, "elapsed_time": "1:33:56", "remaining_time": "2:52:31", "throughput": 8628.71, "total_tokens": 48631968} +{"current_steps": 72155, "total_steps": 204665, "loss": 0.1552, "lr": 1.6359567706145931e-06, "epoch": 1.7627586543864364, "percentage": 35.26, "elapsed_time": "1:33:56", "remaining_time": "2:52:31", "throughput": 8628.78, "total_tokens": 48635424} +{"current_steps": 72160, "total_steps": 204665, "loss": 0.0411, "lr": 1.635890957473852e-06, "epoch": 1.7628808052182836, "percentage": 35.26, "elapsed_time": "1:33:56", "remaining_time": "2:52:30", "throughput": 8628.81, "total_tokens": 48638560} +{"current_steps": 72165, "total_steps": 204665, "loss": 0.0705, "lr": 1.6358251397087405e-06, "epoch": 1.7630029560501308, "percentage": 35.26, "elapsed_time": "1:33:57", "remaining_time": "2:52:30", "throughput": 8628.9, "total_tokens": 48642208} +{"current_steps": 72170, "total_steps": 204665, "loss": 0.0825, "lr": 1.6357593173197378e-06, "epoch": 1.763125106881978, "percentage": 35.26, "elapsed_time": "1:33:57", "remaining_time": "2:52:29", "throughput": 8628.97, "total_tokens": 48645664} +{"current_steps": 72175, "total_steps": 204665, "loss": 0.1601, "lr": 1.6356934903073221e-06, "epoch": 1.7632472577138252, "percentage": 35.26, "elapsed_time": "1:33:57", "remaining_time": "2:52:29", "throughput": 8629.03, "total_tokens": 48649056} +{"current_steps": 72180, "total_steps": 204665, "loss": 0.1951, "lr": 1.6356276586719722e-06, "epoch": 1.7633694085456721, "percentage": 35.27, "elapsed_time": "1:33:58", "remaining_time": "2:52:28", "throughput": 8629.06, "total_tokens": 48652320} +{"current_steps": 72185, "total_steps": 204665, "loss": 0.0517, "lr": 1.6355618224141672e-06, "epoch": 1.7634915593775193, "percentage": 35.27, "elapsed_time": "1:33:58", "remaining_time": "2:52:28", "throughput": 8629.12, "total_tokens": 48655712} +{"current_steps": 72190, "total_steps": 204665, "loss": 0.1116, "lr": 1.6354959815343859e-06, "epoch": 1.7636137102093665, "percentage": 35.27, "elapsed_time": "1:33:58", "remaining_time": "2:52:27", "throughput": 8629.28, "total_tokens": 48659808} +{"current_steps": 72195, "total_steps": 204665, "loss": 0.0629, "lr": 1.6354301360331064e-06, "epoch": 1.7637358610412137, "percentage": 35.27, "elapsed_time": "1:33:59", "remaining_time": "2:52:27", "throughput": 8629.38, "total_tokens": 48663456} +{"current_steps": 72200, "total_steps": 204665, "loss": 0.058, "lr": 1.6353642859108084e-06, "epoch": 1.763858011873061, "percentage": 35.28, "elapsed_time": "1:33:59", "remaining_time": "2:52:26", "throughput": 8629.36, "total_tokens": 48666272} +{"current_steps": 72205, "total_steps": 204665, "loss": 0.161, "lr": 1.6352984311679704e-06, "epoch": 1.7639801627049079, "percentage": 35.28, "elapsed_time": "1:33:59", "remaining_time": "2:52:26", "throughput": 8629.4, "total_tokens": 48669536} +{"current_steps": 72210, "total_steps": 204665, "loss": 0.0984, "lr": 1.6352325718050713e-06, "epoch": 1.764102313536755, "percentage": 35.28, "elapsed_time": "1:34:00", "remaining_time": "2:52:26", "throughput": 8629.47, "total_tokens": 48672992} +{"current_steps": 72215, "total_steps": 204665, "loss": 0.0499, "lr": 1.6351667078225902e-06, "epoch": 1.7642244643686023, "percentage": 35.28, "elapsed_time": "1:34:00", "remaining_time": "2:52:25", "throughput": 8629.54, "total_tokens": 48676448} +{"current_steps": 72220, "total_steps": 204665, "loss": 0.0657, "lr": 1.6351008392210055e-06, "epoch": 1.7643466152004494, "percentage": 35.29, "elapsed_time": "1:34:01", "remaining_time": "2:52:25", "throughput": 8629.6, "total_tokens": 48679776} +{"current_steps": 72225, "total_steps": 204665, "loss": 0.0016, "lr": 1.635034966000797e-06, "epoch": 1.7644687660322966, "percentage": 35.29, "elapsed_time": "1:34:01", "remaining_time": "2:52:24", "throughput": 8629.68, "total_tokens": 48683360} +{"current_steps": 72230, "total_steps": 204665, "loss": 0.0697, "lr": 1.6349690881624437e-06, "epoch": 1.7645909168641438, "percentage": 35.29, "elapsed_time": "1:34:01", "remaining_time": "2:52:24", "throughput": 8629.7, "total_tokens": 48686496} +{"current_steps": 72235, "total_steps": 204665, "loss": 0.0867, "lr": 1.634903205706424e-06, "epoch": 1.764713067695991, "percentage": 35.29, "elapsed_time": "1:34:02", "remaining_time": "2:52:23", "throughput": 8629.77, "total_tokens": 48689952} +{"current_steps": 72240, "total_steps": 204665, "loss": 0.053, "lr": 1.6348373186332175e-06, "epoch": 1.7648352185278382, "percentage": 35.3, "elapsed_time": "1:34:02", "remaining_time": "2:52:23", "throughput": 8629.86, "total_tokens": 48693472} +{"current_steps": 72245, "total_steps": 204665, "loss": 0.0437, "lr": 1.6347714269433032e-06, "epoch": 1.7649573693596854, "percentage": 35.3, "elapsed_time": "1:34:02", "remaining_time": "2:52:22", "throughput": 8629.95, "total_tokens": 48697120} +{"current_steps": 72250, "total_steps": 204665, "loss": 0.0846, "lr": 1.6347055306371606e-06, "epoch": 1.7650795201915326, "percentage": 35.3, "elapsed_time": "1:34:03", "remaining_time": "2:52:22", "throughput": 8629.97, "total_tokens": 48700256} +{"current_steps": 72255, "total_steps": 204665, "loss": 0.1, "lr": 1.6346396297152688e-06, "epoch": 1.7652016710233798, "percentage": 35.3, "elapsed_time": "1:34:03", "remaining_time": "2:52:21", "throughput": 8630.12, "total_tokens": 48704160} +{"current_steps": 72260, "total_steps": 204665, "loss": 0.0857, "lr": 1.6345737241781064e-06, "epoch": 1.765323821855227, "percentage": 35.31, "elapsed_time": "1:34:03", "remaining_time": "2:52:21", "throughput": 8630.24, "total_tokens": 48707936} +{"current_steps": 72265, "total_steps": 204665, "loss": 0.0014, "lr": 1.6345078140261536e-06, "epoch": 1.7654459726870741, "percentage": 35.31, "elapsed_time": "1:34:04", "remaining_time": "2:52:21", "throughput": 8630.31, "total_tokens": 48711392} +{"current_steps": 72270, "total_steps": 204665, "loss": 0.1794, "lr": 1.634441899259889e-06, "epoch": 1.7655681235189211, "percentage": 35.31, "elapsed_time": "1:34:04", "remaining_time": "2:52:20", "throughput": 8630.39, "total_tokens": 48714848} +{"current_steps": 72275, "total_steps": 204665, "loss": 0.0416, "lr": 1.6343759798797926e-06, "epoch": 1.7656902743507683, "percentage": 35.31, "elapsed_time": "1:34:04", "remaining_time": "2:52:20", "throughput": 8630.43, "total_tokens": 48718048} +{"current_steps": 72280, "total_steps": 204665, "loss": 0.0587, "lr": 1.6343100558863432e-06, "epoch": 1.7658124251826155, "percentage": 35.32, "elapsed_time": "1:34:05", "remaining_time": "2:52:19", "throughput": 8630.48, "total_tokens": 48721312} +{"current_steps": 72285, "total_steps": 204665, "loss": 0.164, "lr": 1.6342441272800205e-06, "epoch": 1.7659345760144627, "percentage": 35.32, "elapsed_time": "1:34:05", "remaining_time": "2:52:19", "throughput": 8630.51, "total_tokens": 48724448} +{"current_steps": 72290, "total_steps": 204665, "loss": 0.0512, "lr": 1.634178194061304e-06, "epoch": 1.7660567268463099, "percentage": 35.32, "elapsed_time": "1:34:05", "remaining_time": "2:52:18", "throughput": 8630.56, "total_tokens": 48727712} +{"current_steps": 72295, "total_steps": 204665, "loss": 0.0474, "lr": 1.634112256230673e-06, "epoch": 1.7661788776781568, "percentage": 35.32, "elapsed_time": "1:34:06", "remaining_time": "2:52:18", "throughput": 8630.6, "total_tokens": 48730912} +{"current_steps": 72300, "total_steps": 204665, "loss": 0.0715, "lr": 1.634046313788607e-06, "epoch": 1.766301028510004, "percentage": 35.33, "elapsed_time": "1:34:06", "remaining_time": "2:52:17", "throughput": 8630.64, "total_tokens": 48734112} +{"current_steps": 72305, "total_steps": 204665, "loss": 0.0877, "lr": 1.633980366735586e-06, "epoch": 1.7664231793418512, "percentage": 35.33, "elapsed_time": "1:34:06", "remaining_time": "2:52:17", "throughput": 8630.75, "total_tokens": 48737760} +{"current_steps": 72310, "total_steps": 204665, "loss": 0.1014, "lr": 1.6339144150720889e-06, "epoch": 1.7665453301736984, "percentage": 35.33, "elapsed_time": "1:34:07", "remaining_time": "2:52:16", "throughput": 8630.78, "total_tokens": 48740896} +{"current_steps": 72315, "total_steps": 204665, "loss": 0.0031, "lr": 1.633848458798596e-06, "epoch": 1.7666674810055456, "percentage": 35.33, "elapsed_time": "1:34:07", "remaining_time": "2:52:16", "throughput": 8630.87, "total_tokens": 48744480} +{"current_steps": 72320, "total_steps": 204665, "loss": 0.1545, "lr": 1.6337824979155866e-06, "epoch": 1.7667896318373928, "percentage": 35.34, "elapsed_time": "1:34:08", "remaining_time": "2:52:15", "throughput": 8630.91, "total_tokens": 48747680} +{"current_steps": 72325, "total_steps": 204665, "loss": 0.1265, "lr": 1.6337165324235402e-06, "epoch": 1.76691178266924, "percentage": 35.34, "elapsed_time": "1:34:08", "remaining_time": "2:52:15", "throughput": 8631.01, "total_tokens": 48751200} +{"current_steps": 72330, "total_steps": 204665, "loss": 0.1546, "lr": 1.6336505623229368e-06, "epoch": 1.7670339335010872, "percentage": 35.34, "elapsed_time": "1:34:08", "remaining_time": "2:52:14", "throughput": 8631.08, "total_tokens": 48754656} +{"current_steps": 72335, "total_steps": 204665, "loss": 0.0522, "lr": 1.633584587614256e-06, "epoch": 1.7671560843329344, "percentage": 35.34, "elapsed_time": "1:34:09", "remaining_time": "2:52:14", "throughput": 8631.23, "total_tokens": 48758560} +{"current_steps": 72340, "total_steps": 204665, "loss": 0.1567, "lr": 1.6335186082979778e-06, "epoch": 1.7672782351647816, "percentage": 35.35, "elapsed_time": "1:34:09", "remaining_time": "2:52:14", "throughput": 8631.25, "total_tokens": 48761760} +{"current_steps": 72345, "total_steps": 204665, "loss": 0.0445, "lr": 1.6334526243745819e-06, "epoch": 1.7674003859966287, "percentage": 35.35, "elapsed_time": "1:34:09", "remaining_time": "2:52:13", "throughput": 8631.33, "total_tokens": 48765280} +{"current_steps": 72350, "total_steps": 204665, "loss": 0.0527, "lr": 1.633386635844548e-06, "epoch": 1.767522536828476, "percentage": 35.35, "elapsed_time": "1:34:10", "remaining_time": "2:52:13", "throughput": 8631.38, "total_tokens": 48768544} +{"current_steps": 72355, "total_steps": 204665, "loss": 0.0989, "lr": 1.633320642708356e-06, "epoch": 1.7676446876603231, "percentage": 35.35, "elapsed_time": "1:34:10", "remaining_time": "2:52:12", "throughput": 8631.52, "total_tokens": 48772384} +{"current_steps": 72360, "total_steps": 204665, "loss": 0.1298, "lr": 1.6332546449664865e-06, "epoch": 1.76776683849217, "percentage": 35.36, "elapsed_time": "1:34:10", "remaining_time": "2:52:12", "throughput": 8631.55, "total_tokens": 48775520} +{"current_steps": 72365, "total_steps": 204665, "loss": 0.0787, "lr": 1.6331886426194184e-06, "epoch": 1.7678889893240173, "percentage": 35.36, "elapsed_time": "1:34:11", "remaining_time": "2:52:11", "throughput": 8631.74, "total_tokens": 48779808} +{"current_steps": 72370, "total_steps": 204665, "loss": 0.081, "lr": 1.6331226356676324e-06, "epoch": 1.7680111401558645, "percentage": 35.36, "elapsed_time": "1:34:11", "remaining_time": "2:52:11", "throughput": 8631.84, "total_tokens": 48783392} +{"current_steps": 72375, "total_steps": 204665, "loss": 0.135, "lr": 1.633056624111608e-06, "epoch": 1.7681332909877117, "percentage": 35.36, "elapsed_time": "1:34:11", "remaining_time": "2:52:10", "throughput": 8631.84, "total_tokens": 48786336} +{"current_steps": 72380, "total_steps": 204665, "loss": 0.0253, "lr": 1.6329906079518262e-06, "epoch": 1.7682554418195586, "percentage": 35.37, "elapsed_time": "1:34:12", "remaining_time": "2:52:10", "throughput": 8631.89, "total_tokens": 48789600} +{"current_steps": 72385, "total_steps": 204665, "loss": 0.0988, "lr": 1.632924587188766e-06, "epoch": 1.7683775926514058, "percentage": 35.37, "elapsed_time": "1:34:12", "remaining_time": "2:52:09", "throughput": 8631.92, "total_tokens": 48792672} +{"current_steps": 72390, "total_steps": 204665, "loss": 0.0365, "lr": 1.6328585618229077e-06, "epoch": 1.768499743483253, "percentage": 35.37, "elapsed_time": "1:34:12", "remaining_time": "2:52:09", "throughput": 8632.0, "total_tokens": 48796128} +{"current_steps": 72395, "total_steps": 204665, "loss": 0.045, "lr": 1.632792531854732e-06, "epoch": 1.7686218943151002, "percentage": 35.37, "elapsed_time": "1:34:13", "remaining_time": "2:52:08", "throughput": 8632.06, "total_tokens": 48799520} +{"current_steps": 72400, "total_steps": 204665, "loss": 0.0014, "lr": 1.632726497284719e-06, "epoch": 1.7687440451469474, "percentage": 35.37, "elapsed_time": "1:34:13", "remaining_time": "2:52:08", "throughput": 8632.06, "total_tokens": 48802464} +{"current_steps": 72405, "total_steps": 204665, "loss": 0.1229, "lr": 1.6326604581133484e-06, "epoch": 1.7688661959787946, "percentage": 35.38, "elapsed_time": "1:34:13", "remaining_time": "2:52:07", "throughput": 8632.11, "total_tokens": 48805728} +{"current_steps": 72410, "total_steps": 204665, "loss": 0.0568, "lr": 1.632594414341101e-06, "epoch": 1.7689883468106418, "percentage": 35.38, "elapsed_time": "1:34:14", "remaining_time": "2:52:07", "throughput": 8632.15, "total_tokens": 48808928} +{"current_steps": 72415, "total_steps": 204665, "loss": 0.0917, "lr": 1.632528365968457e-06, "epoch": 1.769110497642489, "percentage": 35.38, "elapsed_time": "1:34:14", "remaining_time": "2:52:07", "throughput": 8632.24, "total_tokens": 48812448} +{"current_steps": 72420, "total_steps": 204665, "loss": 0.25, "lr": 1.6324623129958966e-06, "epoch": 1.7692326484743361, "percentage": 35.38, "elapsed_time": "1:34:15", "remaining_time": "2:52:06", "throughput": 8632.33, "total_tokens": 48815968} +{"current_steps": 72425, "total_steps": 204665, "loss": 0.1087, "lr": 1.6323962554238997e-06, "epoch": 1.7693547993061833, "percentage": 35.39, "elapsed_time": "1:34:15", "remaining_time": "2:52:06", "throughput": 8632.37, "total_tokens": 48819168} +{"current_steps": 72430, "total_steps": 204665, "loss": 0.0829, "lr": 1.6323301932529475e-06, "epoch": 1.7694769501380305, "percentage": 35.39, "elapsed_time": "1:34:15", "remaining_time": "2:52:05", "throughput": 8632.53, "total_tokens": 48823264} +{"current_steps": 72435, "total_steps": 204665, "loss": 0.058, "lr": 1.6322641264835198e-06, "epoch": 1.7695991009698777, "percentage": 35.39, "elapsed_time": "1:34:16", "remaining_time": "2:52:05", "throughput": 8632.57, "total_tokens": 48826464} +{"current_steps": 72440, "total_steps": 204665, "loss": 0.0264, "lr": 1.6321980551160976e-06, "epoch": 1.769721251801725, "percentage": 35.39, "elapsed_time": "1:34:16", "remaining_time": "2:52:04", "throughput": 8632.68, "total_tokens": 48830176} +{"current_steps": 72445, "total_steps": 204665, "loss": 0.0425, "lr": 1.6321319791511607e-06, "epoch": 1.769843402633572, "percentage": 35.4, "elapsed_time": "1:34:16", "remaining_time": "2:52:04", "throughput": 8632.78, "total_tokens": 48833760} +{"current_steps": 72450, "total_steps": 204665, "loss": 0.1903, "lr": 1.6320658985891904e-06, "epoch": 1.769965553465419, "percentage": 35.4, "elapsed_time": "1:34:17", "remaining_time": "2:52:03", "throughput": 8632.83, "total_tokens": 48837088} +{"current_steps": 72455, "total_steps": 204665, "loss": 0.0487, "lr": 1.6319998134306668e-06, "epoch": 1.7700877042972663, "percentage": 35.4, "elapsed_time": "1:34:17", "remaining_time": "2:52:03", "throughput": 8632.9, "total_tokens": 48840544} +{"current_steps": 72460, "total_steps": 204665, "loss": 0.0658, "lr": 1.6319337236760706e-06, "epoch": 1.7702098551291134, "percentage": 35.4, "elapsed_time": "1:34:17", "remaining_time": "2:52:02", "throughput": 8633.01, "total_tokens": 48844192} +{"current_steps": 72465, "total_steps": 204665, "loss": 0.2244, "lr": 1.6318676293258822e-06, "epoch": 1.7703320059609606, "percentage": 35.41, "elapsed_time": "1:34:18", "remaining_time": "2:52:02", "throughput": 8633.09, "total_tokens": 48847712} +{"current_steps": 72470, "total_steps": 204665, "loss": 0.1484, "lr": 1.6318015303805827e-06, "epoch": 1.7704541567928076, "percentage": 35.41, "elapsed_time": "1:34:18", "remaining_time": "2:52:01", "throughput": 8633.13, "total_tokens": 48850912} +{"current_steps": 72475, "total_steps": 204665, "loss": 0.1223, "lr": 1.6317354268406524e-06, "epoch": 1.7705763076246548, "percentage": 35.41, "elapsed_time": "1:34:18", "remaining_time": "2:52:01", "throughput": 8633.17, "total_tokens": 48854112} +{"current_steps": 72480, "total_steps": 204665, "loss": 0.0031, "lr": 1.6316693187065723e-06, "epoch": 1.770698458456502, "percentage": 35.41, "elapsed_time": "1:34:19", "remaining_time": "2:52:00", "throughput": 8633.22, "total_tokens": 48857440} +{"current_steps": 72485, "total_steps": 204665, "loss": 0.0671, "lr": 1.6316032059788229e-06, "epoch": 1.7708206092883492, "percentage": 35.42, "elapsed_time": "1:34:19", "remaining_time": "2:52:00", "throughput": 8633.4, "total_tokens": 48861600} +{"current_steps": 72490, "total_steps": 204665, "loss": 0.036, "lr": 1.6315370886578848e-06, "epoch": 1.7709427601201964, "percentage": 35.42, "elapsed_time": "1:34:19", "remaining_time": "2:52:00", "throughput": 8633.43, "total_tokens": 48864736} +{"current_steps": 72495, "total_steps": 204665, "loss": 0.0519, "lr": 1.6314709667442395e-06, "epoch": 1.7710649109520435, "percentage": 35.42, "elapsed_time": "1:34:20", "remaining_time": "2:51:59", "throughput": 8633.47, "total_tokens": 48867936} +{"current_steps": 72500, "total_steps": 204665, "loss": 0.002, "lr": 1.6314048402383675e-06, "epoch": 1.7711870617838907, "percentage": 35.42, "elapsed_time": "1:34:20", "remaining_time": "2:51:59", "throughput": 8633.52, "total_tokens": 48871200} +{"current_steps": 72505, "total_steps": 204665, "loss": 0.0519, "lr": 1.6313387091407496e-06, "epoch": 1.771309212615738, "percentage": 35.43, "elapsed_time": "1:34:20", "remaining_time": "2:51:58", "throughput": 8633.55, "total_tokens": 48874272} +{"current_steps": 72510, "total_steps": 204665, "loss": 0.0645, "lr": 1.6312725734518668e-06, "epoch": 1.7714313634475851, "percentage": 35.43, "elapsed_time": "1:34:21", "remaining_time": "2:51:58", "throughput": 8633.68, "total_tokens": 48878176} +{"current_steps": 72515, "total_steps": 204665, "loss": 0.1158, "lr": 1.6312064331722e-06, "epoch": 1.7715535142794323, "percentage": 35.43, "elapsed_time": "1:34:21", "remaining_time": "2:51:57", "throughput": 8633.78, "total_tokens": 48881760} +{"current_steps": 72520, "total_steps": 204665, "loss": 0.0697, "lr": 1.6311402883022302e-06, "epoch": 1.7716756651112795, "percentage": 35.43, "elapsed_time": "1:34:22", "remaining_time": "2:51:57", "throughput": 8633.89, "total_tokens": 48885472} +{"current_steps": 72525, "total_steps": 204665, "loss": 0.0456, "lr": 1.6310741388424388e-06, "epoch": 1.7717978159431267, "percentage": 35.44, "elapsed_time": "1:34:22", "remaining_time": "2:51:56", "throughput": 8633.95, "total_tokens": 48888800} +{"current_steps": 72530, "total_steps": 204665, "loss": 0.2052, "lr": 1.631007984793306e-06, "epoch": 1.7719199667749739, "percentage": 35.44, "elapsed_time": "1:34:22", "remaining_time": "2:51:56", "throughput": 8633.99, "total_tokens": 48892064} +{"current_steps": 72535, "total_steps": 204665, "loss": 0.0733, "lr": 1.6309418261553139e-06, "epoch": 1.772042117606821, "percentage": 35.44, "elapsed_time": "1:34:23", "remaining_time": "2:51:55", "throughput": 8634.12, "total_tokens": 48895904} +{"current_steps": 72540, "total_steps": 204665, "loss": 0.1274, "lr": 1.6308756629289429e-06, "epoch": 1.772164268438668, "percentage": 35.44, "elapsed_time": "1:34:23", "remaining_time": "2:51:55", "throughput": 8634.19, "total_tokens": 48899296} +{"current_steps": 72545, "total_steps": 204665, "loss": 0.1346, "lr": 1.6308094951146742e-06, "epoch": 1.7722864192705152, "percentage": 35.45, "elapsed_time": "1:34:23", "remaining_time": "2:51:54", "throughput": 8634.27, "total_tokens": 48902816} +{"current_steps": 72550, "total_steps": 204665, "loss": 0.0313, "lr": 1.6307433227129895e-06, "epoch": 1.7724085701023624, "percentage": 35.45, "elapsed_time": "1:34:24", "remaining_time": "2:51:54", "throughput": 8634.36, "total_tokens": 48906400} +{"current_steps": 72555, "total_steps": 204665, "loss": 0.0037, "lr": 1.6306771457243696e-06, "epoch": 1.7725307209342096, "percentage": 35.45, "elapsed_time": "1:34:24", "remaining_time": "2:51:54", "throughput": 8634.4, "total_tokens": 48909600} +{"current_steps": 72560, "total_steps": 204665, "loss": 0.1203, "lr": 1.6306109641492958e-06, "epoch": 1.7726528717660566, "percentage": 35.45, "elapsed_time": "1:34:24", "remaining_time": "2:51:53", "throughput": 8634.43, "total_tokens": 48912672} +{"current_steps": 72565, "total_steps": 204665, "loss": 0.0387, "lr": 1.6305447779882497e-06, "epoch": 1.7727750225979038, "percentage": 35.46, "elapsed_time": "1:34:25", "remaining_time": "2:51:53", "throughput": 8634.48, "total_tokens": 48916000} +{"current_steps": 72570, "total_steps": 204665, "loss": 0.1363, "lr": 1.6304785872417121e-06, "epoch": 1.772897173429751, "percentage": 35.46, "elapsed_time": "1:34:25", "remaining_time": "2:51:52", "throughput": 8634.48, "total_tokens": 48918880} +{"current_steps": 72575, "total_steps": 204665, "loss": 0.0632, "lr": 1.630412391910165e-06, "epoch": 1.7730193242615981, "percentage": 35.46, "elapsed_time": "1:34:25", "remaining_time": "2:51:52", "throughput": 8634.54, "total_tokens": 48922272} +{"current_steps": 72580, "total_steps": 204665, "loss": 0.0572, "lr": 1.630346191994089e-06, "epoch": 1.7731414750934453, "percentage": 35.46, "elapsed_time": "1:34:26", "remaining_time": "2:51:51", "throughput": 8634.58, "total_tokens": 48925408} +{"current_steps": 72585, "total_steps": 204665, "loss": 0.167, "lr": 1.630279987493966e-06, "epoch": 1.7732636259252925, "percentage": 35.47, "elapsed_time": "1:34:26", "remaining_time": "2:51:51", "throughput": 8634.62, "total_tokens": 48928608} +{"current_steps": 72590, "total_steps": 204665, "loss": 0.0463, "lr": 1.6302137784102774e-06, "epoch": 1.7733857767571397, "percentage": 35.47, "elapsed_time": "1:34:26", "remaining_time": "2:51:50", "throughput": 8634.96, "total_tokens": 48934048} +{"current_steps": 72595, "total_steps": 204665, "loss": 0.0021, "lr": 1.630147564743505e-06, "epoch": 1.773507927588987, "percentage": 35.47, "elapsed_time": "1:34:27", "remaining_time": "2:51:50", "throughput": 8634.94, "total_tokens": 48936800} +{"current_steps": 72600, "total_steps": 204665, "loss": 0.0102, "lr": 1.63008134649413e-06, "epoch": 1.773630078420834, "percentage": 35.47, "elapsed_time": "1:34:27", "remaining_time": "2:51:49", "throughput": 8635.0, "total_tokens": 48940128} +{"current_steps": 72605, "total_steps": 204665, "loss": 0.1173, "lr": 1.6300151236626336e-06, "epoch": 1.7737522292526813, "percentage": 35.48, "elapsed_time": "1:34:28", "remaining_time": "2:51:49", "throughput": 8635.07, "total_tokens": 48943584} +{"current_steps": 72610, "total_steps": 204665, "loss": 0.1184, "lr": 1.629948896249498e-06, "epoch": 1.7738743800845285, "percentage": 35.48, "elapsed_time": "1:34:28", "remaining_time": "2:51:48", "throughput": 8635.15, "total_tokens": 48947040} +{"current_steps": 72615, "total_steps": 204665, "loss": 0.0622, "lr": 1.6298826642552043e-06, "epoch": 1.7739965309163757, "percentage": 35.48, "elapsed_time": "1:34:28", "remaining_time": "2:51:48", "throughput": 8635.22, "total_tokens": 48950496} +{"current_steps": 72620, "total_steps": 204665, "loss": 0.1365, "lr": 1.629816427680235e-06, "epoch": 1.7741186817482228, "percentage": 35.48, "elapsed_time": "1:34:29", "remaining_time": "2:51:48", "throughput": 8635.28, "total_tokens": 48953824} +{"current_steps": 72625, "total_steps": 204665, "loss": 0.0999, "lr": 1.6297501865250708e-06, "epoch": 1.7742408325800698, "percentage": 35.48, "elapsed_time": "1:34:29", "remaining_time": "2:51:47", "throughput": 8635.34, "total_tokens": 48957152} +{"current_steps": 72630, "total_steps": 204665, "loss": 0.0882, "lr": 1.629683940790194e-06, "epoch": 1.774362983411917, "percentage": 35.49, "elapsed_time": "1:34:29", "remaining_time": "2:51:47", "throughput": 8635.36, "total_tokens": 48960224} +{"current_steps": 72635, "total_steps": 204665, "loss": 0.0013, "lr": 1.6296176904760866e-06, "epoch": 1.7744851342437642, "percentage": 35.49, "elapsed_time": "1:34:30", "remaining_time": "2:51:46", "throughput": 8635.37, "total_tokens": 48963232} +{"current_steps": 72640, "total_steps": 204665, "loss": 0.1249, "lr": 1.6295514355832296e-06, "epoch": 1.7746072850756114, "percentage": 35.49, "elapsed_time": "1:34:30", "remaining_time": "2:51:46", "throughput": 8635.42, "total_tokens": 48966496} +{"current_steps": 72645, "total_steps": 204665, "loss": 0.1268, "lr": 1.629485176112105e-06, "epoch": 1.7747294359074586, "percentage": 35.49, "elapsed_time": "1:34:30", "remaining_time": "2:51:45", "throughput": 8635.47, "total_tokens": 48969760} +{"current_steps": 72650, "total_steps": 204665, "loss": 0.1543, "lr": 1.6294189120631954e-06, "epoch": 1.7748515867393055, "percentage": 35.5, "elapsed_time": "1:34:31", "remaining_time": "2:51:45", "throughput": 8635.51, "total_tokens": 48972896} +{"current_steps": 72655, "total_steps": 204665, "loss": 0.0008, "lr": 1.6293526434369818e-06, "epoch": 1.7749737375711527, "percentage": 35.5, "elapsed_time": "1:34:31", "remaining_time": "2:51:44", "throughput": 8635.57, "total_tokens": 48976288} +{"current_steps": 72660, "total_steps": 204665, "loss": 0.1141, "lr": 1.6292863702339466e-06, "epoch": 1.775095888403, "percentage": 35.5, "elapsed_time": "1:34:31", "remaining_time": "2:51:44", "throughput": 8635.64, "total_tokens": 48979680} +{"current_steps": 72665, "total_steps": 204665, "loss": 0.003, "lr": 1.6292200924545715e-06, "epoch": 1.7752180392348471, "percentage": 35.5, "elapsed_time": "1:34:32", "remaining_time": "2:51:43", "throughput": 8635.71, "total_tokens": 48983136} +{"current_steps": 72670, "total_steps": 204665, "loss": 0.1314, "lr": 1.6291538100993391e-06, "epoch": 1.7753401900666943, "percentage": 35.51, "elapsed_time": "1:34:32", "remaining_time": "2:51:43", "throughput": 8635.73, "total_tokens": 48986144} +{"current_steps": 72675, "total_steps": 204665, "loss": 0.1202, "lr": 1.6290875231687306e-06, "epoch": 1.7754623408985415, "percentage": 35.51, "elapsed_time": "1:34:32", "remaining_time": "2:51:42", "throughput": 8635.86, "total_tokens": 48990048} +{"current_steps": 72680, "total_steps": 204665, "loss": 0.038, "lr": 1.6290212316632285e-06, "epoch": 1.7755844917303887, "percentage": 35.51, "elapsed_time": "1:34:33", "remaining_time": "2:51:42", "throughput": 8635.96, "total_tokens": 48993696} +{"current_steps": 72685, "total_steps": 204665, "loss": 0.0612, "lr": 1.628954935583315e-06, "epoch": 1.7757066425622359, "percentage": 35.51, "elapsed_time": "1:34:33", "remaining_time": "2:51:41", "throughput": 8636.03, "total_tokens": 48997088} +{"current_steps": 72690, "total_steps": 204665, "loss": 0.0965, "lr": 1.628888634929472e-06, "epoch": 1.775828793394083, "percentage": 35.52, "elapsed_time": "1:34:33", "remaining_time": "2:51:41", "throughput": 8636.11, "total_tokens": 49000608} +{"current_steps": 72695, "total_steps": 204665, "loss": 0.0807, "lr": 1.6288223297021814e-06, "epoch": 1.7759509442259303, "percentage": 35.52, "elapsed_time": "1:34:34", "remaining_time": "2:51:41", "throughput": 8636.2, "total_tokens": 49004192} +{"current_steps": 72700, "total_steps": 204665, "loss": 0.0388, "lr": 1.628756019901926e-06, "epoch": 1.7760730950577774, "percentage": 35.52, "elapsed_time": "1:34:34", "remaining_time": "2:51:40", "throughput": 8636.22, "total_tokens": 49007200} +{"current_steps": 72705, "total_steps": 204665, "loss": 0.1639, "lr": 1.6286897055291874e-06, "epoch": 1.7761952458896246, "percentage": 35.52, "elapsed_time": "1:34:34", "remaining_time": "2:51:40", "throughput": 8636.22, "total_tokens": 49010144} +{"current_steps": 72710, "total_steps": 204665, "loss": 0.1141, "lr": 1.6286233865844486e-06, "epoch": 1.7763173967214718, "percentage": 35.53, "elapsed_time": "1:34:35", "remaining_time": "2:51:39", "throughput": 8636.34, "total_tokens": 49013920} +{"current_steps": 72715, "total_steps": 204665, "loss": 0.0585, "lr": 1.6285570630681914e-06, "epoch": 1.7764395475533188, "percentage": 35.53, "elapsed_time": "1:34:35", "remaining_time": "2:51:39", "throughput": 8636.38, "total_tokens": 49017120} +{"current_steps": 72720, "total_steps": 204665, "loss": 0.1279, "lr": 1.6284907349808976e-06, "epoch": 1.776561698385166, "percentage": 35.53, "elapsed_time": "1:34:36", "remaining_time": "2:51:38", "throughput": 8636.46, "total_tokens": 49020576} +{"current_steps": 72725, "total_steps": 204665, "loss": 0.032, "lr": 1.6284244023230507e-06, "epoch": 1.7766838492170132, "percentage": 35.53, "elapsed_time": "1:34:36", "remaining_time": "2:51:38", "throughput": 8636.51, "total_tokens": 49023840} +{"current_steps": 72730, "total_steps": 204665, "loss": 0.0013, "lr": 1.6283580650951324e-06, "epoch": 1.7768060000488604, "percentage": 35.54, "elapsed_time": "1:34:36", "remaining_time": "2:51:37", "throughput": 8636.61, "total_tokens": 49027424} +{"current_steps": 72735, "total_steps": 204665, "loss": 0.0744, "lr": 1.6282917232976252e-06, "epoch": 1.7769281508807075, "percentage": 35.54, "elapsed_time": "1:34:37", "remaining_time": "2:51:37", "throughput": 8636.62, "total_tokens": 49030432} +{"current_steps": 72740, "total_steps": 204665, "loss": 0.0605, "lr": 1.6282253769310115e-06, "epoch": 1.7770503017125545, "percentage": 35.54, "elapsed_time": "1:34:37", "remaining_time": "2:51:36", "throughput": 8636.72, "total_tokens": 49034080} +{"current_steps": 72745, "total_steps": 204665, "loss": 0.0023, "lr": 1.628159025995774e-06, "epoch": 1.7771724525444017, "percentage": 35.54, "elapsed_time": "1:34:37", "remaining_time": "2:51:36", "throughput": 8636.81, "total_tokens": 49037600} +{"current_steps": 72750, "total_steps": 204665, "loss": 0.014, "lr": 1.6280926704923949e-06, "epoch": 1.777294603376249, "percentage": 35.55, "elapsed_time": "1:34:38", "remaining_time": "2:51:35", "throughput": 8636.85, "total_tokens": 49040800} +{"current_steps": 72755, "total_steps": 204665, "loss": 0.1248, "lr": 1.6280263104213572e-06, "epoch": 1.777416754208096, "percentage": 35.55, "elapsed_time": "1:34:38", "remaining_time": "2:51:35", "throughput": 8636.9, "total_tokens": 49044064} +{"current_steps": 72760, "total_steps": 204665, "loss": 0.0014, "lr": 1.6279599457831431e-06, "epoch": 1.7775389050399433, "percentage": 35.55, "elapsed_time": "1:34:38", "remaining_time": "2:51:34", "throughput": 8636.93, "total_tokens": 49047200} +{"current_steps": 72765, "total_steps": 204665, "loss": 0.1021, "lr": 1.6278935765782356e-06, "epoch": 1.7776610558717905, "percentage": 35.55, "elapsed_time": "1:34:39", "remaining_time": "2:51:34", "throughput": 8636.99, "total_tokens": 49050528} +{"current_steps": 72770, "total_steps": 204665, "loss": 0.0831, "lr": 1.6278272028071168e-06, "epoch": 1.7777832067036377, "percentage": 35.56, "elapsed_time": "1:34:39", "remaining_time": "2:51:34", "throughput": 8637.08, "total_tokens": 49054112} +{"current_steps": 72775, "total_steps": 204665, "loss": 0.1695, "lr": 1.62776082447027e-06, "epoch": 1.7779053575354848, "percentage": 35.56, "elapsed_time": "1:34:39", "remaining_time": "2:51:33", "throughput": 8637.1, "total_tokens": 49057184} +{"current_steps": 72780, "total_steps": 204665, "loss": 0.0281, "lr": 1.6276944415681776e-06, "epoch": 1.778027508367332, "percentage": 35.56, "elapsed_time": "1:34:40", "remaining_time": "2:51:33", "throughput": 8637.21, "total_tokens": 49060896} +{"current_steps": 72785, "total_steps": 204665, "loss": 0.1584, "lr": 1.6276280541013223e-06, "epoch": 1.7781496591991792, "percentage": 35.56, "elapsed_time": "1:34:40", "remaining_time": "2:51:32", "throughput": 8637.32, "total_tokens": 49064544} +{"current_steps": 72790, "total_steps": 204665, "loss": 0.1369, "lr": 1.627561662070187e-06, "epoch": 1.7782718100310264, "percentage": 35.57, "elapsed_time": "1:34:40", "remaining_time": "2:51:32", "throughput": 8637.44, "total_tokens": 49068320} +{"current_steps": 72795, "total_steps": 204665, "loss": 0.0878, "lr": 1.6274952654752547e-06, "epoch": 1.7783939608628736, "percentage": 35.57, "elapsed_time": "1:34:41", "remaining_time": "2:51:31", "throughput": 8637.51, "total_tokens": 49071712} +{"current_steps": 72800, "total_steps": 204665, "loss": 0.0296, "lr": 1.6274288643170078e-06, "epoch": 1.7785161116947208, "percentage": 35.57, "elapsed_time": "1:34:41", "remaining_time": "2:51:31", "throughput": 8637.58, "total_tokens": 49075168} +{"current_steps": 72805, "total_steps": 204665, "loss": 0.0897, "lr": 1.6273624585959295e-06, "epoch": 1.7786382625265678, "percentage": 35.57, "elapsed_time": "1:34:41", "remaining_time": "2:51:30", "throughput": 8637.69, "total_tokens": 49078816} +{"current_steps": 72810, "total_steps": 204665, "loss": 0.11, "lr": 1.6272960483125026e-06, "epoch": 1.778760413358415, "percentage": 35.58, "elapsed_time": "1:34:42", "remaining_time": "2:51:30", "throughput": 8637.76, "total_tokens": 49082272} +{"current_steps": 72815, "total_steps": 204665, "loss": 0.1619, "lr": 1.6272296334672101e-06, "epoch": 1.7788825641902621, "percentage": 35.58, "elapsed_time": "1:34:42", "remaining_time": "2:51:29", "throughput": 8637.83, "total_tokens": 49085664} +{"current_steps": 72820, "total_steps": 204665, "loss": 0.1086, "lr": 1.6271632140605351e-06, "epoch": 1.7790047150221093, "percentage": 35.58, "elapsed_time": "1:34:42", "remaining_time": "2:51:29", "throughput": 8637.94, "total_tokens": 49089376} +{"current_steps": 72825, "total_steps": 204665, "loss": 0.1185, "lr": 1.6270967900929607e-06, "epoch": 1.7791268658539565, "percentage": 35.58, "elapsed_time": "1:34:43", "remaining_time": "2:51:28", "throughput": 8637.98, "total_tokens": 49092640} +{"current_steps": 72830, "total_steps": 204665, "loss": 0.1189, "lr": 1.6270303615649695e-06, "epoch": 1.7792490166858035, "percentage": 35.58, "elapsed_time": "1:34:43", "remaining_time": "2:51:28", "throughput": 8638.04, "total_tokens": 49095968} +{"current_steps": 72835, "total_steps": 204665, "loss": 0.0279, "lr": 1.6269639284770448e-06, "epoch": 1.7793711675176507, "percentage": 35.59, "elapsed_time": "1:34:44", "remaining_time": "2:51:28", "throughput": 8638.11, "total_tokens": 49099360} +{"current_steps": 72840, "total_steps": 204665, "loss": 0.0224, "lr": 1.62689749082967e-06, "epoch": 1.7794933183494979, "percentage": 35.59, "elapsed_time": "1:34:44", "remaining_time": "2:51:27", "throughput": 8638.15, "total_tokens": 49102560} +{"current_steps": 72845, "total_steps": 204665, "loss": 0.2352, "lr": 1.6268310486233282e-06, "epoch": 1.779615469181345, "percentage": 35.59, "elapsed_time": "1:34:44", "remaining_time": "2:51:27", "throughput": 8638.19, "total_tokens": 49105824} +{"current_steps": 72850, "total_steps": 204665, "loss": 0.0645, "lr": 1.626764601858502e-06, "epoch": 1.7797376200131922, "percentage": 35.59, "elapsed_time": "1:34:45", "remaining_time": "2:51:26", "throughput": 8638.22, "total_tokens": 49108896} +{"current_steps": 72855, "total_steps": 204665, "loss": 0.0024, "lr": 1.6266981505356752e-06, "epoch": 1.7798597708450394, "percentage": 35.6, "elapsed_time": "1:34:45", "remaining_time": "2:51:26", "throughput": 8638.3, "total_tokens": 49112352} +{"current_steps": 72860, "total_steps": 204665, "loss": 0.128, "lr": 1.626631694655331e-06, "epoch": 1.7799819216768866, "percentage": 35.6, "elapsed_time": "1:34:45", "remaining_time": "2:51:25", "throughput": 8638.36, "total_tokens": 49115744} +{"current_steps": 72865, "total_steps": 204665, "loss": 0.0014, "lr": 1.6265652342179523e-06, "epoch": 1.7801040725087338, "percentage": 35.6, "elapsed_time": "1:34:46", "remaining_time": "2:51:25", "throughput": 8638.45, "total_tokens": 49119264} +{"current_steps": 72870, "total_steps": 204665, "loss": 0.0843, "lr": 1.626498769224023e-06, "epoch": 1.780226223340581, "percentage": 35.6, "elapsed_time": "1:34:46", "remaining_time": "2:51:24", "throughput": 8638.5, "total_tokens": 49122592} +{"current_steps": 72875, "total_steps": 204665, "loss": 0.0023, "lr": 1.6264322996740258e-06, "epoch": 1.7803483741724282, "percentage": 35.61, "elapsed_time": "1:34:46", "remaining_time": "2:51:24", "throughput": 8638.59, "total_tokens": 49126176} +{"current_steps": 72880, "total_steps": 204665, "loss": 0.0575, "lr": 1.6263658255684447e-06, "epoch": 1.7804705250042754, "percentage": 35.61, "elapsed_time": "1:34:47", "remaining_time": "2:51:23", "throughput": 8638.65, "total_tokens": 49129504} +{"current_steps": 72885, "total_steps": 204665, "loss": 0.001, "lr": 1.6262993469077628e-06, "epoch": 1.7805926758361226, "percentage": 35.61, "elapsed_time": "1:34:47", "remaining_time": "2:51:23", "throughput": 8638.67, "total_tokens": 49132576} +{"current_steps": 72890, "total_steps": 204665, "loss": 0.143, "lr": 1.6262328636924635e-06, "epoch": 1.7807148266679698, "percentage": 35.61, "elapsed_time": "1:34:47", "remaining_time": "2:51:22", "throughput": 8638.74, "total_tokens": 49136032} +{"current_steps": 72895, "total_steps": 204665, "loss": 0.0331, "lr": 1.6261663759230303e-06, "epoch": 1.7808369774998167, "percentage": 35.62, "elapsed_time": "1:34:48", "remaining_time": "2:51:22", "throughput": 8638.77, "total_tokens": 49139168} +{"current_steps": 72900, "total_steps": 204665, "loss": 0.0545, "lr": 1.6260998835999472e-06, "epoch": 1.780959128331664, "percentage": 35.62, "elapsed_time": "1:34:48", "remaining_time": "2:51:21", "throughput": 8638.79, "total_tokens": 49142176} +{"current_steps": 72905, "total_steps": 204665, "loss": 0.1411, "lr": 1.626033386723697e-06, "epoch": 1.781081279163511, "percentage": 35.62, "elapsed_time": "1:34:48", "remaining_time": "2:51:21", "throughput": 8638.84, "total_tokens": 49145504} +{"current_steps": 72910, "total_steps": 204665, "loss": 0.0998, "lr": 1.6259668852947637e-06, "epoch": 1.7812034299953583, "percentage": 35.62, "elapsed_time": "1:34:49", "remaining_time": "2:51:20", "throughput": 8638.92, "total_tokens": 49149024} +{"current_steps": 72915, "total_steps": 204665, "loss": 0.1419, "lr": 1.6259003793136309e-06, "epoch": 1.7813255808272053, "percentage": 35.63, "elapsed_time": "1:34:49", "remaining_time": "2:51:20", "throughput": 8638.98, "total_tokens": 49152416} +{"current_steps": 72920, "total_steps": 204665, "loss": 0.0397, "lr": 1.625833868780782e-06, "epoch": 1.7814477316590525, "percentage": 35.63, "elapsed_time": "1:34:49", "remaining_time": "2:51:20", "throughput": 8639.04, "total_tokens": 49155744} +{"current_steps": 72925, "total_steps": 204665, "loss": 0.0564, "lr": 1.625767353696701e-06, "epoch": 1.7815698824908996, "percentage": 35.63, "elapsed_time": "1:34:50", "remaining_time": "2:51:19", "throughput": 8639.2, "total_tokens": 49159776} +{"current_steps": 72930, "total_steps": 204665, "loss": 0.001, "lr": 1.6257008340618715e-06, "epoch": 1.7816920333227468, "percentage": 35.63, "elapsed_time": "1:34:50", "remaining_time": "2:51:19", "throughput": 8639.25, "total_tokens": 49163104} +{"current_steps": 72935, "total_steps": 204665, "loss": 0.1116, "lr": 1.6256343098767773e-06, "epoch": 1.781814184154594, "percentage": 35.64, "elapsed_time": "1:34:51", "remaining_time": "2:51:18", "throughput": 8639.29, "total_tokens": 49166304} +{"current_steps": 72940, "total_steps": 204665, "loss": 0.0013, "lr": 1.6255677811419022e-06, "epoch": 1.7819363349864412, "percentage": 35.64, "elapsed_time": "1:34:51", "remaining_time": "2:51:18", "throughput": 8639.3, "total_tokens": 49169312} +{"current_steps": 72945, "total_steps": 204665, "loss": 0.1343, "lr": 1.6255012478577296e-06, "epoch": 1.7820584858182884, "percentage": 35.64, "elapsed_time": "1:34:51", "remaining_time": "2:51:17", "throughput": 8639.47, "total_tokens": 49173408} +{"current_steps": 72950, "total_steps": 204665, "loss": 0.1763, "lr": 1.625434710024744e-06, "epoch": 1.7821806366501356, "percentage": 35.64, "elapsed_time": "1:34:52", "remaining_time": "2:51:17", "throughput": 8639.58, "total_tokens": 49177120} +{"current_steps": 72955, "total_steps": 204665, "loss": 0.0555, "lr": 1.6253681676434289e-06, "epoch": 1.7823027874819828, "percentage": 35.65, "elapsed_time": "1:34:52", "remaining_time": "2:51:16", "throughput": 8639.71, "total_tokens": 49181024} +{"current_steps": 72960, "total_steps": 204665, "loss": 0.1329, "lr": 1.6253016207142682e-06, "epoch": 1.78242493831383, "percentage": 35.65, "elapsed_time": "1:34:52", "remaining_time": "2:51:16", "throughput": 8639.73, "total_tokens": 49184096} +{"current_steps": 72965, "total_steps": 204665, "loss": 0.2357, "lr": 1.625235069237746e-06, "epoch": 1.7825470891456772, "percentage": 35.65, "elapsed_time": "1:34:53", "remaining_time": "2:51:15", "throughput": 8639.8, "total_tokens": 49187488} +{"current_steps": 72970, "total_steps": 204665, "loss": 0.0376, "lr": 1.6251685132143463e-06, "epoch": 1.7826692399775244, "percentage": 35.65, "elapsed_time": "1:34:53", "remaining_time": "2:51:15", "throughput": 8639.83, "total_tokens": 49190624} +{"current_steps": 72975, "total_steps": 204665, "loss": 0.0964, "lr": 1.625101952644553e-06, "epoch": 1.7827913908093715, "percentage": 35.66, "elapsed_time": "1:34:53", "remaining_time": "2:51:14", "throughput": 8639.84, "total_tokens": 49193568} +{"current_steps": 72980, "total_steps": 204665, "loss": 0.0367, "lr": 1.6250353875288501e-06, "epoch": 1.7829135416412187, "percentage": 35.66, "elapsed_time": "1:34:54", "remaining_time": "2:51:14", "throughput": 8639.89, "total_tokens": 49196832} +{"current_steps": 72985, "total_steps": 204665, "loss": 0.1076, "lr": 1.6249688178677215e-06, "epoch": 1.7830356924730657, "percentage": 35.66, "elapsed_time": "1:34:54", "remaining_time": "2:51:14", "throughput": 8639.91, "total_tokens": 49199840} +{"current_steps": 72990, "total_steps": 204665, "loss": 0.1231, "lr": 1.6249022436616518e-06, "epoch": 1.783157843304913, "percentage": 35.66, "elapsed_time": "1:34:54", "remaining_time": "2:51:13", "throughput": 8640.06, "total_tokens": 49203808} +{"current_steps": 72995, "total_steps": 204665, "loss": 0.1205, "lr": 1.624835664911125e-06, "epoch": 1.78327999413676, "percentage": 35.67, "elapsed_time": "1:34:55", "remaining_time": "2:51:13", "throughput": 8640.12, "total_tokens": 49207136} +{"current_steps": 73000, "total_steps": 204665, "loss": 0.0732, "lr": 1.624769081616625e-06, "epoch": 1.7834021449686073, "percentage": 35.67, "elapsed_time": "1:34:55", "remaining_time": "2:51:12", "throughput": 8640.22, "total_tokens": 49210784} +{"current_steps": 73005, "total_steps": 204665, "loss": 0.0901, "lr": 1.6247024937786364e-06, "epoch": 1.7835242958004542, "percentage": 35.67, "elapsed_time": "1:34:55", "remaining_time": "2:51:12", "throughput": 8640.31, "total_tokens": 49214368} +{"current_steps": 73010, "total_steps": 204665, "loss": 0.108, "lr": 1.6246359013976432e-06, "epoch": 1.7836464466323014, "percentage": 35.67, "elapsed_time": "1:34:56", "remaining_time": "2:51:11", "throughput": 8640.43, "total_tokens": 49218144} +{"current_steps": 73015, "total_steps": 204665, "loss": 0.0013, "lr": 1.6245693044741296e-06, "epoch": 1.7837685974641486, "percentage": 35.68, "elapsed_time": "1:34:56", "remaining_time": "2:51:11", "throughput": 8640.51, "total_tokens": 49221600} +{"current_steps": 73020, "total_steps": 204665, "loss": 0.0907, "lr": 1.6245027030085798e-06, "epoch": 1.7838907482959958, "percentage": 35.68, "elapsed_time": "1:34:56", "remaining_time": "2:51:10", "throughput": 8640.59, "total_tokens": 49225120} +{"current_steps": 73025, "total_steps": 204665, "loss": 0.0778, "lr": 1.624436097001479e-06, "epoch": 1.784012899127843, "percentage": 35.68, "elapsed_time": "1:34:57", "remaining_time": "2:51:10", "throughput": 8640.66, "total_tokens": 49228512} +{"current_steps": 73030, "total_steps": 204665, "loss": 0.106, "lr": 1.6243694864533103e-06, "epoch": 1.7841350499596902, "percentage": 35.68, "elapsed_time": "1:34:57", "remaining_time": "2:51:09", "throughput": 8640.73, "total_tokens": 49231904} +{"current_steps": 73035, "total_steps": 204665, "loss": 0.0837, "lr": 1.6243028713645592e-06, "epoch": 1.7842572007915374, "percentage": 35.69, "elapsed_time": "1:34:58", "remaining_time": "2:51:09", "throughput": 8640.81, "total_tokens": 49235360} +{"current_steps": 73040, "total_steps": 204665, "loss": 0.0717, "lr": 1.6242362517357095e-06, "epoch": 1.7843793516233846, "percentage": 35.69, "elapsed_time": "1:34:58", "remaining_time": "2:51:08", "throughput": 8640.82, "total_tokens": 49238304} +{"current_steps": 73045, "total_steps": 204665, "loss": 0.0952, "lr": 1.6241696275672458e-06, "epoch": 1.7845015024552318, "percentage": 35.69, "elapsed_time": "1:34:58", "remaining_time": "2:51:08", "throughput": 8640.83, "total_tokens": 49241312} +{"current_steps": 73050, "total_steps": 204665, "loss": 0.0681, "lr": 1.6241029988596528e-06, "epoch": 1.784623653287079, "percentage": 35.69, "elapsed_time": "1:34:59", "remaining_time": "2:51:07", "throughput": 8640.85, "total_tokens": 49244384} +{"current_steps": 73055, "total_steps": 204665, "loss": 0.0327, "lr": 1.624036365613415e-06, "epoch": 1.7847458041189261, "percentage": 35.69, "elapsed_time": "1:34:59", "remaining_time": "2:51:07", "throughput": 8640.98, "total_tokens": 49248160} +{"current_steps": 73060, "total_steps": 204665, "loss": 0.1776, "lr": 1.623969727829017e-06, "epoch": 1.7848679549507733, "percentage": 35.7, "elapsed_time": "1:34:59", "remaining_time": "2:51:07", "throughput": 8641.02, "total_tokens": 49251360} +{"current_steps": 73065, "total_steps": 204665, "loss": 0.0851, "lr": 1.6239030855069432e-06, "epoch": 1.7849901057826205, "percentage": 35.7, "elapsed_time": "1:35:00", "remaining_time": "2:51:06", "throughput": 8641.09, "total_tokens": 49254816} +{"current_steps": 73070, "total_steps": 204665, "loss": 0.0367, "lr": 1.6238364386476783e-06, "epoch": 1.7851122566144677, "percentage": 35.7, "elapsed_time": "1:35:00", "remaining_time": "2:51:06", "throughput": 8641.15, "total_tokens": 49258144} +{"current_steps": 73075, "total_steps": 204665, "loss": 0.1875, "lr": 1.623769787251707e-06, "epoch": 1.7852344074463147, "percentage": 35.7, "elapsed_time": "1:35:00", "remaining_time": "2:51:05", "throughput": 8641.19, "total_tokens": 49261408} +{"current_steps": 73080, "total_steps": 204665, "loss": 0.1415, "lr": 1.623703131319514e-06, "epoch": 1.7853565582781619, "percentage": 35.71, "elapsed_time": "1:35:01", "remaining_time": "2:51:05", "throughput": 8641.21, "total_tokens": 49264416} +{"current_steps": 73085, "total_steps": 204665, "loss": 0.0578, "lr": 1.6236364708515842e-06, "epoch": 1.785478709110009, "percentage": 35.71, "elapsed_time": "1:35:01", "remaining_time": "2:51:04", "throughput": 8641.25, "total_tokens": 49267616} +{"current_steps": 73090, "total_steps": 204665, "loss": 0.0612, "lr": 1.623569805848402e-06, "epoch": 1.7856008599418562, "percentage": 35.71, "elapsed_time": "1:35:01", "remaining_time": "2:51:04", "throughput": 8641.31, "total_tokens": 49271008} +{"current_steps": 73095, "total_steps": 204665, "loss": 0.1369, "lr": 1.6235031363104528e-06, "epoch": 1.7857230107737032, "percentage": 35.71, "elapsed_time": "1:35:02", "remaining_time": "2:51:03", "throughput": 8641.36, "total_tokens": 49274272} +{"current_steps": 73100, "total_steps": 204665, "loss": 0.0156, "lr": 1.623436462238221e-06, "epoch": 1.7858451616055504, "percentage": 35.72, "elapsed_time": "1:35:02", "remaining_time": "2:51:03", "throughput": 8641.41, "total_tokens": 49277536} +{"current_steps": 73105, "total_steps": 204665, "loss": 0.1298, "lr": 1.6233697836321913e-06, "epoch": 1.7859673124373976, "percentage": 35.72, "elapsed_time": "1:35:02", "remaining_time": "2:51:02", "throughput": 8641.48, "total_tokens": 49280992} +{"current_steps": 73110, "total_steps": 204665, "loss": 0.071, "lr": 1.623303100492849e-06, "epoch": 1.7860894632692448, "percentage": 35.72, "elapsed_time": "1:35:03", "remaining_time": "2:51:02", "throughput": 8641.52, "total_tokens": 49284192} +{"current_steps": 73115, "total_steps": 204665, "loss": 0.1023, "lr": 1.623236412820679e-06, "epoch": 1.786211614101092, "percentage": 35.72, "elapsed_time": "1:35:03", "remaining_time": "2:51:01", "throughput": 8641.54, "total_tokens": 49287264} +{"current_steps": 73120, "total_steps": 204665, "loss": 0.11, "lr": 1.6231697206161661e-06, "epoch": 1.7863337649329392, "percentage": 35.73, "elapsed_time": "1:35:03", "remaining_time": "2:51:01", "throughput": 8641.65, "total_tokens": 49290912} +{"current_steps": 73125, "total_steps": 204665, "loss": 0.1004, "lr": 1.6231030238797956e-06, "epoch": 1.7864559157647864, "percentage": 35.73, "elapsed_time": "1:35:04", "remaining_time": "2:51:00", "throughput": 8641.68, "total_tokens": 49294048} +{"current_steps": 73130, "total_steps": 204665, "loss": 0.0276, "lr": 1.623036322612052e-06, "epoch": 1.7865780665966335, "percentage": 35.73, "elapsed_time": "1:35:04", "remaining_time": "2:51:00", "throughput": 8641.71, "total_tokens": 49297120} +{"current_steps": 73135, "total_steps": 204665, "loss": 0.1215, "lr": 1.622969616813421e-06, "epoch": 1.7867002174284807, "percentage": 35.73, "elapsed_time": "1:35:04", "remaining_time": "2:51:00", "throughput": 8641.77, "total_tokens": 49300512} +{"current_steps": 73140, "total_steps": 204665, "loss": 0.2217, "lr": 1.6229029064843871e-06, "epoch": 1.786822368260328, "percentage": 35.74, "elapsed_time": "1:35:05", "remaining_time": "2:50:59", "throughput": 8641.77, "total_tokens": 49303392} +{"current_steps": 73145, "total_steps": 204665, "loss": 0.0755, "lr": 1.6228361916254358e-06, "epoch": 1.786944519092175, "percentage": 35.74, "elapsed_time": "1:35:05", "remaining_time": "2:50:59", "throughput": 8641.9, "total_tokens": 49307232} +{"current_steps": 73150, "total_steps": 204665, "loss": 0.003, "lr": 1.6227694722370525e-06, "epoch": 1.7870666699240223, "percentage": 35.74, "elapsed_time": "1:35:05", "remaining_time": "2:50:58", "throughput": 8641.96, "total_tokens": 49310560} +{"current_steps": 73155, "total_steps": 204665, "loss": 0.025, "lr": 1.6227027483197214e-06, "epoch": 1.7871888207558695, "percentage": 35.74, "elapsed_time": "1:35:06", "remaining_time": "2:50:58", "throughput": 8642.07, "total_tokens": 49314272} +{"current_steps": 73160, "total_steps": 204665, "loss": 0.0363, "lr": 1.622636019873929e-06, "epoch": 1.7873109715877165, "percentage": 35.75, "elapsed_time": "1:35:06", "remaining_time": "2:50:57", "throughput": 8642.11, "total_tokens": 49317472} +{"current_steps": 73165, "total_steps": 204665, "loss": 0.1551, "lr": 1.62256928690016e-06, "epoch": 1.7874331224195636, "percentage": 35.75, "elapsed_time": "1:35:06", "remaining_time": "2:50:57", "throughput": 8642.14, "total_tokens": 49320544} +{"current_steps": 73170, "total_steps": 204665, "loss": 0.0316, "lr": 1.6225025493988995e-06, "epoch": 1.7875552732514108, "percentage": 35.75, "elapsed_time": "1:35:07", "remaining_time": "2:50:56", "throughput": 8642.18, "total_tokens": 49323744} +{"current_steps": 73175, "total_steps": 204665, "loss": 0.1097, "lr": 1.6224358073706327e-06, "epoch": 1.787677424083258, "percentage": 35.75, "elapsed_time": "1:35:07", "remaining_time": "2:50:56", "throughput": 8642.29, "total_tokens": 49327456} +{"current_steps": 73180, "total_steps": 204665, "loss": 0.124, "lr": 1.622369060815846e-06, "epoch": 1.7877995749151052, "percentage": 35.76, "elapsed_time": "1:35:08", "remaining_time": "2:50:55", "throughput": 8642.38, "total_tokens": 49331040} +{"current_steps": 73185, "total_steps": 204665, "loss": 0.0979, "lr": 1.6223023097350238e-06, "epoch": 1.7879217257469522, "percentage": 35.76, "elapsed_time": "1:35:08", "remaining_time": "2:50:55", "throughput": 8642.5, "total_tokens": 49334816} +{"current_steps": 73190, "total_steps": 204665, "loss": 0.1201, "lr": 1.6222355541286517e-06, "epoch": 1.7880438765787994, "percentage": 35.76, "elapsed_time": "1:35:08", "remaining_time": "2:50:54", "throughput": 8642.54, "total_tokens": 49338016} +{"current_steps": 73195, "total_steps": 204665, "loss": 0.1417, "lr": 1.6221687939972154e-06, "epoch": 1.7881660274106466, "percentage": 35.76, "elapsed_time": "1:35:09", "remaining_time": "2:50:54", "throughput": 8642.57, "total_tokens": 49341152} +{"current_steps": 73200, "total_steps": 204665, "loss": 0.0307, "lr": 1.6221020293412003e-06, "epoch": 1.7882881782424938, "percentage": 35.77, "elapsed_time": "1:35:09", "remaining_time": "2:50:53", "throughput": 8642.65, "total_tokens": 49344608} +{"current_steps": 73205, "total_steps": 204665, "loss": 0.0766, "lr": 1.6220352601610916e-06, "epoch": 1.788410329074341, "percentage": 35.77, "elapsed_time": "1:35:09", "remaining_time": "2:50:53", "throughput": 8642.71, "total_tokens": 49347936} +{"current_steps": 73210, "total_steps": 204665, "loss": 0.0262, "lr": 1.6219684864573755e-06, "epoch": 1.7885324799061881, "percentage": 35.77, "elapsed_time": "1:35:10", "remaining_time": "2:50:53", "throughput": 8642.76, "total_tokens": 49351264} +{"current_steps": 73215, "total_steps": 204665, "loss": 0.0033, "lr": 1.6219017082305373e-06, "epoch": 1.7886546307380353, "percentage": 35.77, "elapsed_time": "1:35:10", "remaining_time": "2:50:52", "throughput": 8642.8, "total_tokens": 49354528} +{"current_steps": 73220, "total_steps": 204665, "loss": 0.0614, "lr": 1.6218349254810627e-06, "epoch": 1.7887767815698825, "percentage": 35.78, "elapsed_time": "1:35:10", "remaining_time": "2:50:52", "throughput": 8642.84, "total_tokens": 49357728} +{"current_steps": 73225, "total_steps": 204665, "loss": 0.0965, "lr": 1.621768138209437e-06, "epoch": 1.7888989324017297, "percentage": 35.78, "elapsed_time": "1:35:11", "remaining_time": "2:50:51", "throughput": 8642.94, "total_tokens": 49361312} +{"current_steps": 73230, "total_steps": 204665, "loss": 0.0449, "lr": 1.621701346416146e-06, "epoch": 1.789021083233577, "percentage": 35.78, "elapsed_time": "1:35:11", "remaining_time": "2:50:51", "throughput": 8643.05, "total_tokens": 49365024} +{"current_steps": 73235, "total_steps": 204665, "loss": 0.054, "lr": 1.621634550101676e-06, "epoch": 1.789143234065424, "percentage": 35.78, "elapsed_time": "1:35:11", "remaining_time": "2:50:50", "throughput": 8643.11, "total_tokens": 49368416} +{"current_steps": 73240, "total_steps": 204665, "loss": 0.0255, "lr": 1.621567749266512e-06, "epoch": 1.7892653848972713, "percentage": 35.79, "elapsed_time": "1:35:12", "remaining_time": "2:50:50", "throughput": 8643.2, "total_tokens": 49371936} +{"current_steps": 73245, "total_steps": 204665, "loss": 0.0013, "lr": 1.6215009439111404e-06, "epoch": 1.7893875357291185, "percentage": 35.79, "elapsed_time": "1:35:12", "remaining_time": "2:50:49", "throughput": 8643.27, "total_tokens": 49375392} +{"current_steps": 73250, "total_steps": 204665, "loss": 0.0731, "lr": 1.621434134036047e-06, "epoch": 1.7895096865609654, "percentage": 35.79, "elapsed_time": "1:35:12", "remaining_time": "2:50:49", "throughput": 8643.31, "total_tokens": 49378592} +{"current_steps": 73255, "total_steps": 204665, "loss": 0.2222, "lr": 1.621367319641717e-06, "epoch": 1.7896318373928126, "percentage": 35.79, "elapsed_time": "1:35:13", "remaining_time": "2:50:48", "throughput": 8643.36, "total_tokens": 49381856} +{"current_steps": 73260, "total_steps": 204665, "loss": 0.0916, "lr": 1.621300500728637e-06, "epoch": 1.7897539882246598, "percentage": 35.8, "elapsed_time": "1:35:13", "remaining_time": "2:50:48", "throughput": 8643.47, "total_tokens": 49385568} +{"current_steps": 73265, "total_steps": 204665, "loss": 0.0386, "lr": 1.6212336772972926e-06, "epoch": 1.789876139056507, "percentage": 35.8, "elapsed_time": "1:35:13", "remaining_time": "2:50:47", "throughput": 8643.56, "total_tokens": 49389152} +{"current_steps": 73270, "total_steps": 204665, "loss": 0.0428, "lr": 1.6211668493481697e-06, "epoch": 1.7899982898883542, "percentage": 35.8, "elapsed_time": "1:35:14", "remaining_time": "2:50:47", "throughput": 8643.57, "total_tokens": 49392096} +{"current_steps": 73275, "total_steps": 204665, "loss": 0.0436, "lr": 1.6211000168817544e-06, "epoch": 1.7901204407202012, "percentage": 35.8, "elapsed_time": "1:35:14", "remaining_time": "2:50:47", "throughput": 8643.66, "total_tokens": 49395680} +{"current_steps": 73280, "total_steps": 204665, "loss": 0.1954, "lr": 1.6210331798985325e-06, "epoch": 1.7902425915520483, "percentage": 35.8, "elapsed_time": "1:35:15", "remaining_time": "2:50:46", "throughput": 8643.68, "total_tokens": 49398752} +{"current_steps": 73285, "total_steps": 204665, "loss": 0.0974, "lr": 1.6209663383989907e-06, "epoch": 1.7903647423838955, "percentage": 35.81, "elapsed_time": "1:35:15", "remaining_time": "2:50:46", "throughput": 8643.72, "total_tokens": 49401952} +{"current_steps": 73290, "total_steps": 204665, "loss": 0.0597, "lr": 1.6208994923836145e-06, "epoch": 1.7904868932157427, "percentage": 35.81, "elapsed_time": "1:35:15", "remaining_time": "2:50:45", "throughput": 8643.75, "total_tokens": 49405088} +{"current_steps": 73295, "total_steps": 204665, "loss": 0.0971, "lr": 1.6208326418528903e-06, "epoch": 1.79060904404759, "percentage": 35.81, "elapsed_time": "1:35:16", "remaining_time": "2:50:45", "throughput": 8643.8, "total_tokens": 49408352} +{"current_steps": 73300, "total_steps": 204665, "loss": 0.1478, "lr": 1.6207657868073037e-06, "epoch": 1.790731194879437, "percentage": 35.81, "elapsed_time": "1:35:16", "remaining_time": "2:50:44", "throughput": 8643.86, "total_tokens": 49411680} +{"current_steps": 73305, "total_steps": 204665, "loss": 0.067, "lr": 1.620698927247342e-06, "epoch": 1.7908533457112843, "percentage": 35.82, "elapsed_time": "1:35:16", "remaining_time": "2:50:44", "throughput": 8643.92, "total_tokens": 49415072} +{"current_steps": 73310, "total_steps": 204665, "loss": 0.0026, "lr": 1.6206320631734903e-06, "epoch": 1.7909754965431315, "percentage": 35.82, "elapsed_time": "1:35:17", "remaining_time": "2:50:43", "throughput": 8643.95, "total_tokens": 49418208} +{"current_steps": 73315, "total_steps": 204665, "loss": 0.1575, "lr": 1.6205651945862355e-06, "epoch": 1.7910976473749787, "percentage": 35.82, "elapsed_time": "1:35:17", "remaining_time": "2:50:43", "throughput": 8644.06, "total_tokens": 49421856} +{"current_steps": 73320, "total_steps": 204665, "loss": 0.1557, "lr": 1.6204983214860634e-06, "epoch": 1.7912197982068259, "percentage": 35.82, "elapsed_time": "1:35:17", "remaining_time": "2:50:42", "throughput": 8644.12, "total_tokens": 49425248} +{"current_steps": 73325, "total_steps": 204665, "loss": 0.0876, "lr": 1.620431443873461e-06, "epoch": 1.791341949038673, "percentage": 35.83, "elapsed_time": "1:35:18", "remaining_time": "2:50:42", "throughput": 8644.14, "total_tokens": 49428256} +{"current_steps": 73330, "total_steps": 204665, "loss": 0.0589, "lr": 1.620364561748914e-06, "epoch": 1.7914640998705202, "percentage": 35.83, "elapsed_time": "1:35:18", "remaining_time": "2:50:41", "throughput": 8644.14, "total_tokens": 49431200} +{"current_steps": 73335, "total_steps": 204665, "loss": 0.0266, "lr": 1.6202976751129092e-06, "epoch": 1.7915862507023674, "percentage": 35.83, "elapsed_time": "1:35:18", "remaining_time": "2:50:41", "throughput": 8644.2, "total_tokens": 49434528} +{"current_steps": 73340, "total_steps": 204665, "loss": 0.1441, "lr": 1.6202307839659328e-06, "epoch": 1.7917084015342144, "percentage": 35.83, "elapsed_time": "1:35:19", "remaining_time": "2:50:40", "throughput": 8644.24, "total_tokens": 49437728} +{"current_steps": 73345, "total_steps": 204665, "loss": 0.1626, "lr": 1.6201638883084714e-06, "epoch": 1.7918305523660616, "percentage": 35.84, "elapsed_time": "1:35:19", "remaining_time": "2:50:40", "throughput": 8644.28, "total_tokens": 49440928} +{"current_steps": 73350, "total_steps": 204665, "loss": 0.1022, "lr": 1.6200969881410113e-06, "epoch": 1.7919527031979088, "percentage": 35.84, "elapsed_time": "1:35:19", "remaining_time": "2:50:39", "throughput": 8644.29, "total_tokens": 49443872} +{"current_steps": 73355, "total_steps": 204665, "loss": 0.0422, "lr": 1.620030083464039e-06, "epoch": 1.792074854029756, "percentage": 35.84, "elapsed_time": "1:35:20", "remaining_time": "2:50:39", "throughput": 8644.34, "total_tokens": 49447136} +{"current_steps": 73360, "total_steps": 204665, "loss": 0.1179, "lr": 1.6199631742780415e-06, "epoch": 1.7921970048616032, "percentage": 35.84, "elapsed_time": "1:35:20", "remaining_time": "2:50:39", "throughput": 8644.39, "total_tokens": 49450464} +{"current_steps": 73365, "total_steps": 204665, "loss": 0.0342, "lr": 1.6198962605835046e-06, "epoch": 1.7923191556934501, "percentage": 35.85, "elapsed_time": "1:35:20", "remaining_time": "2:50:38", "throughput": 8644.54, "total_tokens": 49454368} +{"current_steps": 73370, "total_steps": 204665, "loss": 0.1297, "lr": 1.6198293423809157e-06, "epoch": 1.7924413065252973, "percentage": 35.85, "elapsed_time": "1:35:21", "remaining_time": "2:50:38", "throughput": 8644.56, "total_tokens": 49457440} +{"current_steps": 73375, "total_steps": 204665, "loss": 0.0668, "lr": 1.619762419670761e-06, "epoch": 1.7925634573571445, "percentage": 35.85, "elapsed_time": "1:35:21", "remaining_time": "2:50:37", "throughput": 8644.63, "total_tokens": 49460896} +{"current_steps": 73380, "total_steps": 204665, "loss": 0.0754, "lr": 1.6196954924535274e-06, "epoch": 1.7926856081889917, "percentage": 35.85, "elapsed_time": "1:35:21", "remaining_time": "2:50:37", "throughput": 8644.67, "total_tokens": 49464032} +{"current_steps": 73385, "total_steps": 204665, "loss": 0.0775, "lr": 1.6196285607297013e-06, "epoch": 1.7928077590208389, "percentage": 35.86, "elapsed_time": "1:35:22", "remaining_time": "2:50:36", "throughput": 8644.69, "total_tokens": 49467104} +{"current_steps": 73390, "total_steps": 204665, "loss": 0.1124, "lr": 1.6195616244997698e-06, "epoch": 1.792929909852686, "percentage": 35.86, "elapsed_time": "1:35:22", "remaining_time": "2:50:36", "throughput": 8644.75, "total_tokens": 49470432} +{"current_steps": 73395, "total_steps": 204665, "loss": 0.0208, "lr": 1.6194946837642194e-06, "epoch": 1.7930520606845333, "percentage": 35.86, "elapsed_time": "1:35:22", "remaining_time": "2:50:35", "throughput": 8644.81, "total_tokens": 49473824} +{"current_steps": 73400, "total_steps": 204665, "loss": 0.0871, "lr": 1.6194277385235372e-06, "epoch": 1.7931742115163805, "percentage": 35.86, "elapsed_time": "1:35:23", "remaining_time": "2:50:35", "throughput": 8644.93, "total_tokens": 49477536} +{"current_steps": 73405, "total_steps": 204665, "loss": 0.001, "lr": 1.6193607887782098e-06, "epoch": 1.7932963623482276, "percentage": 35.87, "elapsed_time": "1:35:23", "remaining_time": "2:50:34", "throughput": 8645.02, "total_tokens": 49481120} +{"current_steps": 73410, "total_steps": 204665, "loss": 0.0447, "lr": 1.619293834528724e-06, "epoch": 1.7934185131800748, "percentage": 35.87, "elapsed_time": "1:35:24", "remaining_time": "2:50:34", "throughput": 8645.09, "total_tokens": 49484512} +{"current_steps": 73415, "total_steps": 204665, "loss": 0.1016, "lr": 1.6192268757755674e-06, "epoch": 1.793540664011922, "percentage": 35.87, "elapsed_time": "1:35:24", "remaining_time": "2:50:33", "throughput": 8645.12, "total_tokens": 49487648} +{"current_steps": 73420, "total_steps": 204665, "loss": 0.0026, "lr": 1.6191599125192256e-06, "epoch": 1.7936628148437692, "percentage": 35.87, "elapsed_time": "1:35:24", "remaining_time": "2:50:33", "throughput": 8645.19, "total_tokens": 49491104} +{"current_steps": 73425, "total_steps": 204665, "loss": 0.094, "lr": 1.6190929447601872e-06, "epoch": 1.7937849656756164, "percentage": 35.88, "elapsed_time": "1:35:25", "remaining_time": "2:50:32", "throughput": 8645.26, "total_tokens": 49494496} +{"current_steps": 73430, "total_steps": 204665, "loss": 0.0112, "lr": 1.6190259724989378e-06, "epoch": 1.7939071165074634, "percentage": 35.88, "elapsed_time": "1:35:25", "remaining_time": "2:50:32", "throughput": 8645.35, "total_tokens": 49498016} +{"current_steps": 73435, "total_steps": 204665, "loss": 0.1724, "lr": 1.6189589957359652e-06, "epoch": 1.7940292673393106, "percentage": 35.88, "elapsed_time": "1:35:25", "remaining_time": "2:50:32", "throughput": 8645.4, "total_tokens": 49501280} +{"current_steps": 73440, "total_steps": 204665, "loss": 0.1396, "lr": 1.6188920144717564e-06, "epoch": 1.7941514181711578, "percentage": 35.88, "elapsed_time": "1:35:26", "remaining_time": "2:50:31", "throughput": 8645.4, "total_tokens": 49504224} +{"current_steps": 73445, "total_steps": 204665, "loss": 0.0372, "lr": 1.6188250287067984e-06, "epoch": 1.794273569003005, "percentage": 35.89, "elapsed_time": "1:35:26", "remaining_time": "2:50:31", "throughput": 8645.49, "total_tokens": 49507808} +{"current_steps": 73450, "total_steps": 204665, "loss": 0.0516, "lr": 1.6187580384415785e-06, "epoch": 1.794395719834852, "percentage": 35.89, "elapsed_time": "1:35:26", "remaining_time": "2:50:30", "throughput": 8645.56, "total_tokens": 49511264} +{"current_steps": 73455, "total_steps": 204665, "loss": 0.0438, "lr": 1.6186910436765833e-06, "epoch": 1.794517870666699, "percentage": 35.89, "elapsed_time": "1:35:27", "remaining_time": "2:50:30", "throughput": 8645.63, "total_tokens": 49514656} +{"current_steps": 73460, "total_steps": 204665, "loss": 0.0227, "lr": 1.6186240444123005e-06, "epoch": 1.7946400214985463, "percentage": 35.89, "elapsed_time": "1:35:27", "remaining_time": "2:50:29", "throughput": 8645.65, "total_tokens": 49517728} +{"current_steps": 73465, "total_steps": 204665, "loss": 0.1171, "lr": 1.6185570406492174e-06, "epoch": 1.7947621723303935, "percentage": 35.9, "elapsed_time": "1:35:27", "remaining_time": "2:50:29", "throughput": 8645.69, "total_tokens": 49520864} +{"current_steps": 73470, "total_steps": 204665, "loss": 0.0544, "lr": 1.6184900323878211e-06, "epoch": 1.7948843231622407, "percentage": 35.9, "elapsed_time": "1:35:28", "remaining_time": "2:50:28", "throughput": 8645.7, "total_tokens": 49523872} +{"current_steps": 73475, "total_steps": 204665, "loss": 0.1353, "lr": 1.618423019628599e-06, "epoch": 1.7950064739940879, "percentage": 35.9, "elapsed_time": "1:35:28", "remaining_time": "2:50:28", "throughput": 8645.81, "total_tokens": 49527584} +{"current_steps": 73480, "total_steps": 204665, "loss": 0.001, "lr": 1.6183560023720384e-06, "epoch": 1.795128624825935, "percentage": 35.9, "elapsed_time": "1:35:28", "remaining_time": "2:50:27", "throughput": 8645.88, "total_tokens": 49530976} +{"current_steps": 73485, "total_steps": 204665, "loss": 0.0426, "lr": 1.6182889806186264e-06, "epoch": 1.7952507756577822, "percentage": 35.91, "elapsed_time": "1:35:29", "remaining_time": "2:50:27", "throughput": 8645.87, "total_tokens": 49533792} +{"current_steps": 73490, "total_steps": 204665, "loss": 0.1981, "lr": 1.6182219543688507e-06, "epoch": 1.7953729264896294, "percentage": 35.91, "elapsed_time": "1:35:29", "remaining_time": "2:50:26", "throughput": 8645.92, "total_tokens": 49537056} +{"current_steps": 73495, "total_steps": 204665, "loss": 0.0319, "lr": 1.6181549236231989e-06, "epoch": 1.7954950773214766, "percentage": 35.91, "elapsed_time": "1:35:29", "remaining_time": "2:50:26", "throughput": 8645.92, "total_tokens": 49540000} +{"current_steps": 73500, "total_steps": 204665, "loss": 0.042, "lr": 1.618087888382158e-06, "epoch": 1.7956172281533238, "percentage": 35.91, "elapsed_time": "1:35:30", "remaining_time": "2:50:25", "throughput": 8646.05, "total_tokens": 49543904} +{"current_steps": 73505, "total_steps": 204665, "loss": 0.0453, "lr": 1.6180208486462159e-06, "epoch": 1.795739378985171, "percentage": 35.91, "elapsed_time": "1:35:30", "remaining_time": "2:50:25", "throughput": 8646.13, "total_tokens": 49547360} +{"current_steps": 73510, "total_steps": 204665, "loss": 0.116, "lr": 1.61795380441586e-06, "epoch": 1.7958615298170182, "percentage": 35.92, "elapsed_time": "1:35:30", "remaining_time": "2:50:25", "throughput": 8646.18, "total_tokens": 49550624} +{"current_steps": 73515, "total_steps": 204665, "loss": 0.0884, "lr": 1.6178867556915775e-06, "epoch": 1.7959836806488654, "percentage": 35.92, "elapsed_time": "1:35:31", "remaining_time": "2:50:24", "throughput": 8646.2, "total_tokens": 49553696} +{"current_steps": 73520, "total_steps": 204665, "loss": 0.0022, "lr": 1.6178197024738566e-06, "epoch": 1.7961058314807123, "percentage": 35.92, "elapsed_time": "1:35:31", "remaining_time": "2:50:24", "throughput": 8646.27, "total_tokens": 49557152} +{"current_steps": 73525, "total_steps": 204665, "loss": 0.0508, "lr": 1.6177526447631845e-06, "epoch": 1.7962279823125595, "percentage": 35.92, "elapsed_time": "1:35:31", "remaining_time": "2:50:23", "throughput": 8646.3, "total_tokens": 49560224} +{"current_steps": 73530, "total_steps": 204665, "loss": 0.07, "lr": 1.617685582560049e-06, "epoch": 1.7963501331444067, "percentage": 35.93, "elapsed_time": "1:35:32", "remaining_time": "2:50:23", "throughput": 8646.36, "total_tokens": 49563552} +{"current_steps": 73535, "total_steps": 204665, "loss": 0.0935, "lr": 1.617618515864938e-06, "epoch": 1.796472283976254, "percentage": 35.93, "elapsed_time": "1:35:32", "remaining_time": "2:50:22", "throughput": 8646.46, "total_tokens": 49567200} +{"current_steps": 73540, "total_steps": 204665, "loss": 0.0344, "lr": 1.617551444678339e-06, "epoch": 1.7965944348081009, "percentage": 35.93, "elapsed_time": "1:35:33", "remaining_time": "2:50:22", "throughput": 8646.54, "total_tokens": 49570720} +{"current_steps": 73545, "total_steps": 204665, "loss": 0.001, "lr": 1.6174843690007396e-06, "epoch": 1.796716585639948, "percentage": 35.93, "elapsed_time": "1:35:33", "remaining_time": "2:50:21", "throughput": 8646.56, "total_tokens": 49573728} +{"current_steps": 73550, "total_steps": 204665, "loss": 0.0986, "lr": 1.6174172888326279e-06, "epoch": 1.7968387364717953, "percentage": 35.94, "elapsed_time": "1:35:33", "remaining_time": "2:50:21", "throughput": 8646.62, "total_tokens": 49577120} +{"current_steps": 73555, "total_steps": 204665, "loss": 0.0424, "lr": 1.6173502041744915e-06, "epoch": 1.7969608873036425, "percentage": 35.94, "elapsed_time": "1:35:34", "remaining_time": "2:50:20", "throughput": 8646.67, "total_tokens": 49580448} +{"current_steps": 73560, "total_steps": 204665, "loss": 0.1051, "lr": 1.6172831150268188e-06, "epoch": 1.7970830381354896, "percentage": 35.94, "elapsed_time": "1:35:34", "remaining_time": "2:50:20", "throughput": 8646.84, "total_tokens": 49584544} +{"current_steps": 73565, "total_steps": 204665, "loss": 0.0408, "lr": 1.6172160213900967e-06, "epoch": 1.7972051889673368, "percentage": 35.94, "elapsed_time": "1:35:34", "remaining_time": "2:50:19", "throughput": 8646.86, "total_tokens": 49587616} +{"current_steps": 73570, "total_steps": 204665, "loss": 0.11, "lr": 1.617148923264814e-06, "epoch": 1.797327339799184, "percentage": 35.95, "elapsed_time": "1:35:35", "remaining_time": "2:50:19", "throughput": 8646.93, "total_tokens": 49591072} +{"current_steps": 73575, "total_steps": 204665, "loss": 0.184, "lr": 1.617081820651458e-06, "epoch": 1.7974494906310312, "percentage": 35.95, "elapsed_time": "1:35:35", "remaining_time": "2:50:18", "throughput": 8646.98, "total_tokens": 49594336} +{"current_steps": 73580, "total_steps": 204665, "loss": 0.0354, "lr": 1.6170147135505175e-06, "epoch": 1.7975716414628784, "percentage": 35.95, "elapsed_time": "1:35:35", "remaining_time": "2:50:18", "throughput": 8647.11, "total_tokens": 49598176} +{"current_steps": 73585, "total_steps": 204665, "loss": 0.0947, "lr": 1.6169476019624796e-06, "epoch": 1.7976937922947256, "percentage": 35.95, "elapsed_time": "1:35:36", "remaining_time": "2:50:18", "throughput": 8647.11, "total_tokens": 49601120} +{"current_steps": 73590, "total_steps": 204665, "loss": 0.1298, "lr": 1.616880485887833e-06, "epoch": 1.7978159431265728, "percentage": 35.96, "elapsed_time": "1:35:36", "remaining_time": "2:50:17", "throughput": 8647.21, "total_tokens": 49604704} +{"current_steps": 73595, "total_steps": 204665, "loss": 0.0295, "lr": 1.6168133653270657e-06, "epoch": 1.79793809395842, "percentage": 35.96, "elapsed_time": "1:35:36", "remaining_time": "2:50:17", "throughput": 8647.25, "total_tokens": 49607968} +{"current_steps": 73600, "total_steps": 204665, "loss": 0.1576, "lr": 1.6167462402806658e-06, "epoch": 1.7980602447902672, "percentage": 35.96, "elapsed_time": "1:35:37", "remaining_time": "2:50:16", "throughput": 8647.32, "total_tokens": 49611360} +{"current_steps": 73605, "total_steps": 204665, "loss": 0.1108, "lr": 1.6166791107491212e-06, "epoch": 1.7981823956221143, "percentage": 35.96, "elapsed_time": "1:35:37", "remaining_time": "2:50:16", "throughput": 8647.42, "total_tokens": 49614944} +{"current_steps": 73610, "total_steps": 204665, "loss": 0.1571, "lr": 1.61661197673292e-06, "epoch": 1.7983045464539613, "percentage": 35.97, "elapsed_time": "1:35:37", "remaining_time": "2:50:15", "throughput": 8647.51, "total_tokens": 49618528} +{"current_steps": 73615, "total_steps": 204665, "loss": 0.0018, "lr": 1.616544838232551e-06, "epoch": 1.7984266972858085, "percentage": 35.97, "elapsed_time": "1:35:38", "remaining_time": "2:50:15", "throughput": 8647.63, "total_tokens": 49622304} +{"current_steps": 73620, "total_steps": 204665, "loss": 0.0008, "lr": 1.6164776952485017e-06, "epoch": 1.7985488481176557, "percentage": 35.97, "elapsed_time": "1:35:38", "remaining_time": "2:50:14", "throughput": 8647.66, "total_tokens": 49625376} +{"current_steps": 73625, "total_steps": 204665, "loss": 0.0985, "lr": 1.6164105477812612e-06, "epoch": 1.7986709989495029, "percentage": 35.97, "elapsed_time": "1:35:38", "remaining_time": "2:50:14", "throughput": 8647.7, "total_tokens": 49628576} +{"current_steps": 73630, "total_steps": 204665, "loss": 0.0531, "lr": 1.6163433958313174e-06, "epoch": 1.7987931497813499, "percentage": 35.98, "elapsed_time": "1:35:39", "remaining_time": "2:50:13", "throughput": 8647.82, "total_tokens": 49632352} +{"current_steps": 73635, "total_steps": 204665, "loss": 0.1582, "lr": 1.6162762393991585e-06, "epoch": 1.798915300613197, "percentage": 35.98, "elapsed_time": "1:35:39", "remaining_time": "2:50:13", "throughput": 8647.92, "total_tokens": 49636000} +{"current_steps": 73640, "total_steps": 204665, "loss": 0.1563, "lr": 1.6162090784852728e-06, "epoch": 1.7990374514450442, "percentage": 35.98, "elapsed_time": "1:35:39", "remaining_time": "2:50:12", "throughput": 8647.94, "total_tokens": 49639136} +{"current_steps": 73645, "total_steps": 204665, "loss": 0.0032, "lr": 1.616141913090149e-06, "epoch": 1.7991596022768914, "percentage": 35.98, "elapsed_time": "1:35:40", "remaining_time": "2:50:12", "throughput": 8648.05, "total_tokens": 49642848} +{"current_steps": 73650, "total_steps": 204665, "loss": 0.1117, "lr": 1.616074743214276e-06, "epoch": 1.7992817531087386, "percentage": 35.99, "elapsed_time": "1:35:40", "remaining_time": "2:50:12", "throughput": 8648.07, "total_tokens": 49645856} +{"current_steps": 73655, "total_steps": 204665, "loss": 0.0952, "lr": 1.6160075688581414e-06, "epoch": 1.7994039039405858, "percentage": 35.99, "elapsed_time": "1:35:41", "remaining_time": "2:50:11", "throughput": 8648.12, "total_tokens": 49649120} +{"current_steps": 73660, "total_steps": 204665, "loss": 0.0019, "lr": 1.6159403900222342e-06, "epoch": 1.799526054772433, "percentage": 35.99, "elapsed_time": "1:35:41", "remaining_time": "2:50:11", "throughput": 8648.29, "total_tokens": 49653280} +{"current_steps": 73665, "total_steps": 204665, "loss": 0.0895, "lr": 1.6158732067070426e-06, "epoch": 1.7996482056042802, "percentage": 35.99, "elapsed_time": "1:35:41", "remaining_time": "2:50:10", "throughput": 8648.33, "total_tokens": 49656480} +{"current_steps": 73670, "total_steps": 204665, "loss": 0.0036, "lr": 1.6158060189130556e-06, "epoch": 1.7997703564361274, "percentage": 36.0, "elapsed_time": "1:35:42", "remaining_time": "2:50:10", "throughput": 8648.38, "total_tokens": 49659744} +{"current_steps": 73675, "total_steps": 204665, "loss": 0.1183, "lr": 1.6157388266407614e-06, "epoch": 1.7998925072679746, "percentage": 36.0, "elapsed_time": "1:35:42", "remaining_time": "2:50:09", "throughput": 8648.45, "total_tokens": 49663136} +{"current_steps": 73680, "total_steps": 204665, "loss": 0.1008, "lr": 1.6156716298906487e-06, "epoch": 1.8000146580998218, "percentage": 36.0, "elapsed_time": "1:35:42", "remaining_time": "2:50:09", "throughput": 8648.49, "total_tokens": 49666336} +{"current_steps": 73685, "total_steps": 204665, "loss": 0.0426, "lr": 1.615604428663207e-06, "epoch": 1.800136808931669, "percentage": 36.0, "elapsed_time": "1:35:43", "remaining_time": "2:50:08", "throughput": 8648.51, "total_tokens": 49669408} +{"current_steps": 73690, "total_steps": 204665, "loss": 0.0701, "lr": 1.6155372229589234e-06, "epoch": 1.8002589597635161, "percentage": 36.01, "elapsed_time": "1:35:43", "remaining_time": "2:50:08", "throughput": 8648.58, "total_tokens": 49672800} +{"current_steps": 73695, "total_steps": 204665, "loss": 0.0654, "lr": 1.6154700127782883e-06, "epoch": 1.8003811105953633, "percentage": 36.01, "elapsed_time": "1:35:43", "remaining_time": "2:50:07", "throughput": 8648.62, "total_tokens": 49676000} +{"current_steps": 73700, "total_steps": 204665, "loss": 0.1242, "lr": 1.6154027981217894e-06, "epoch": 1.8005032614272103, "percentage": 36.01, "elapsed_time": "1:35:44", "remaining_time": "2:50:07", "throughput": 8648.6, "total_tokens": 49678752} +{"current_steps": 73705, "total_steps": 204665, "loss": 0.1069, "lr": 1.6153355789899159e-06, "epoch": 1.8006254122590575, "percentage": 36.01, "elapsed_time": "1:35:44", "remaining_time": "2:50:06", "throughput": 8648.63, "total_tokens": 49681952} +{"current_steps": 73710, "total_steps": 204665, "loss": 0.0015, "lr": 1.6152683553831565e-06, "epoch": 1.8007475630909047, "percentage": 36.01, "elapsed_time": "1:35:44", "remaining_time": "2:50:06", "throughput": 8648.73, "total_tokens": 49685536} +{"current_steps": 73715, "total_steps": 204665, "loss": 0.1863, "lr": 1.6152011273020002e-06, "epoch": 1.8008697139227519, "percentage": 36.02, "elapsed_time": "1:35:45", "remaining_time": "2:50:05", "throughput": 8648.82, "total_tokens": 49689056} +{"current_steps": 73720, "total_steps": 204665, "loss": 0.0266, "lr": 1.6151338947469358e-06, "epoch": 1.8009918647545988, "percentage": 36.02, "elapsed_time": "1:35:45", "remaining_time": "2:50:05", "throughput": 8648.84, "total_tokens": 49692128} +{"current_steps": 73725, "total_steps": 204665, "loss": 0.0509, "lr": 1.6150666577184521e-06, "epoch": 1.801114015586446, "percentage": 36.02, "elapsed_time": "1:35:45", "remaining_time": "2:50:05", "throughput": 8648.84, "total_tokens": 49695072} +{"current_steps": 73730, "total_steps": 204665, "loss": 0.0198, "lr": 1.6149994162170386e-06, "epoch": 1.8012361664182932, "percentage": 36.02, "elapsed_time": "1:35:46", "remaining_time": "2:50:04", "throughput": 8648.94, "total_tokens": 49698720} +{"current_steps": 73735, "total_steps": 204665, "loss": 0.1982, "lr": 1.6149321702431836e-06, "epoch": 1.8013583172501404, "percentage": 36.03, "elapsed_time": "1:35:46", "remaining_time": "2:50:04", "throughput": 8649.01, "total_tokens": 49702176} +{"current_steps": 73740, "total_steps": 204665, "loss": 0.0983, "lr": 1.6148649197973768e-06, "epoch": 1.8014804680819876, "percentage": 36.03, "elapsed_time": "1:35:46", "remaining_time": "2:50:03", "throughput": 8649.11, "total_tokens": 49705824} +{"current_steps": 73745, "total_steps": 204665, "loss": 0.1295, "lr": 1.6147976648801068e-06, "epoch": 1.8016026189138348, "percentage": 36.03, "elapsed_time": "1:35:47", "remaining_time": "2:50:03", "throughput": 8649.14, "total_tokens": 49708960} +{"current_steps": 73750, "total_steps": 204665, "loss": 0.0654, "lr": 1.6147304054918626e-06, "epoch": 1.801724769745682, "percentage": 36.03, "elapsed_time": "1:35:47", "remaining_time": "2:50:02", "throughput": 8649.23, "total_tokens": 49712544} +{"current_steps": 73755, "total_steps": 204665, "loss": 0.0359, "lr": 1.6146631416331338e-06, "epoch": 1.8018469205775292, "percentage": 36.04, "elapsed_time": "1:35:47", "remaining_time": "2:50:02", "throughput": 8649.33, "total_tokens": 49716128} +{"current_steps": 73760, "total_steps": 204665, "loss": 0.1364, "lr": 1.6145958733044092e-06, "epoch": 1.8019690714093763, "percentage": 36.04, "elapsed_time": "1:35:48", "remaining_time": "2:50:01", "throughput": 8649.39, "total_tokens": 49719456} +{"current_steps": 73765, "total_steps": 204665, "loss": 0.1124, "lr": 1.614528600506178e-06, "epoch": 1.8020912222412235, "percentage": 36.04, "elapsed_time": "1:35:48", "remaining_time": "2:50:01", "throughput": 8649.39, "total_tokens": 49722400} +{"current_steps": 73770, "total_steps": 204665, "loss": 0.0819, "lr": 1.6144613232389295e-06, "epoch": 1.8022133730730707, "percentage": 36.04, "elapsed_time": "1:35:49", "remaining_time": "2:50:00", "throughput": 8649.51, "total_tokens": 49726112} +{"current_steps": 73775, "total_steps": 204665, "loss": 0.075, "lr": 1.614394041503153e-06, "epoch": 1.802335523904918, "percentage": 36.05, "elapsed_time": "1:35:49", "remaining_time": "2:50:00", "throughput": 8649.54, "total_tokens": 49729248} +{"current_steps": 73780, "total_steps": 204665, "loss": 0.182, "lr": 1.6143267552993382e-06, "epoch": 1.802457674736765, "percentage": 36.05, "elapsed_time": "1:35:49", "remaining_time": "2:49:59", "throughput": 8649.6, "total_tokens": 49732640} +{"current_steps": 73785, "total_steps": 204665, "loss": 0.165, "lr": 1.6142594646279738e-06, "epoch": 1.802579825568612, "percentage": 36.05, "elapsed_time": "1:35:50", "remaining_time": "2:49:59", "throughput": 8649.73, "total_tokens": 49736480} +{"current_steps": 73790, "total_steps": 204665, "loss": 0.1129, "lr": 1.614192169489549e-06, "epoch": 1.8027019764004593, "percentage": 36.05, "elapsed_time": "1:35:50", "remaining_time": "2:49:59", "throughput": 8649.82, "total_tokens": 49740064} +{"current_steps": 73795, "total_steps": 204665, "loss": 0.0624, "lr": 1.6141248698845538e-06, "epoch": 1.8028241272323065, "percentage": 36.06, "elapsed_time": "1:35:50", "remaining_time": "2:49:58", "throughput": 8649.86, "total_tokens": 49743264} +{"current_steps": 73800, "total_steps": 204665, "loss": 0.101, "lr": 1.6140575658134772e-06, "epoch": 1.8029462780641536, "percentage": 36.06, "elapsed_time": "1:35:51", "remaining_time": "2:49:58", "throughput": 8650.04, "total_tokens": 49747488} +{"current_steps": 73805, "total_steps": 204665, "loss": 0.1085, "lr": 1.6139902572768094e-06, "epoch": 1.8030684288960008, "percentage": 36.06, "elapsed_time": "1:35:51", "remaining_time": "2:49:57", "throughput": 8650.08, "total_tokens": 49750752} +{"current_steps": 73810, "total_steps": 204665, "loss": 0.0637, "lr": 1.6139229442750385e-06, "epoch": 1.8031905797278478, "percentage": 36.06, "elapsed_time": "1:35:51", "remaining_time": "2:49:57", "throughput": 8650.1, "total_tokens": 49753824} +{"current_steps": 73815, "total_steps": 204665, "loss": 0.0009, "lr": 1.6138556268086557e-06, "epoch": 1.803312730559695, "percentage": 36.07, "elapsed_time": "1:35:52", "remaining_time": "2:49:56", "throughput": 8650.16, "total_tokens": 49757216} +{"current_steps": 73820, "total_steps": 204665, "loss": 0.0381, "lr": 1.613788304878149e-06, "epoch": 1.8034348813915422, "percentage": 36.07, "elapsed_time": "1:35:52", "remaining_time": "2:49:56", "throughput": 8650.24, "total_tokens": 49760736} +{"current_steps": 73825, "total_steps": 204665, "loss": 0.0885, "lr": 1.6137209784840086e-06, "epoch": 1.8035570322233894, "percentage": 36.07, "elapsed_time": "1:35:52", "remaining_time": "2:49:55", "throughput": 8650.37, "total_tokens": 49764576} +{"current_steps": 73830, "total_steps": 204665, "loss": 0.0437, "lr": 1.6136536476267243e-06, "epoch": 1.8036791830552366, "percentage": 36.07, "elapsed_time": "1:35:53", "remaining_time": "2:49:55", "throughput": 8650.4, "total_tokens": 49767712} +{"current_steps": 73835, "total_steps": 204665, "loss": 0.0381, "lr": 1.6135863123067858e-06, "epoch": 1.8038013338870837, "percentage": 36.08, "elapsed_time": "1:35:53", "remaining_time": "2:49:54", "throughput": 8650.49, "total_tokens": 49771296} +{"current_steps": 73840, "total_steps": 204665, "loss": 0.0836, "lr": 1.6135189725246828e-06, "epoch": 1.803923484718931, "percentage": 36.08, "elapsed_time": "1:35:53", "remaining_time": "2:49:54", "throughput": 8650.55, "total_tokens": 49774624} +{"current_steps": 73845, "total_steps": 204665, "loss": 0.1184, "lr": 1.6134516282809045e-06, "epoch": 1.8040456355507781, "percentage": 36.08, "elapsed_time": "1:35:54", "remaining_time": "2:49:54", "throughput": 8650.73, "total_tokens": 49778784} +{"current_steps": 73850, "total_steps": 204665, "loss": 0.082, "lr": 1.6133842795759408e-06, "epoch": 1.8041677863826253, "percentage": 36.08, "elapsed_time": "1:35:54", "remaining_time": "2:49:53", "throughput": 8650.8, "total_tokens": 49782240} +{"current_steps": 73855, "total_steps": 204665, "loss": 0.0021, "lr": 1.613316926410282e-06, "epoch": 1.8042899372144725, "percentage": 36.09, "elapsed_time": "1:35:54", "remaining_time": "2:49:53", "throughput": 8650.8, "total_tokens": 49785184} +{"current_steps": 73860, "total_steps": 204665, "loss": 0.0884, "lr": 1.6132495687844174e-06, "epoch": 1.8044120880463197, "percentage": 36.09, "elapsed_time": "1:35:55", "remaining_time": "2:49:52", "throughput": 8650.84, "total_tokens": 49788384} +{"current_steps": 73865, "total_steps": 204665, "loss": 0.1359, "lr": 1.6131822066988372e-06, "epoch": 1.8045342388781669, "percentage": 36.09, "elapsed_time": "1:35:55", "remaining_time": "2:49:52", "throughput": 8650.9, "total_tokens": 49791776} +{"current_steps": 73870, "total_steps": 204665, "loss": 0.1146, "lr": 1.6131148401540307e-06, "epoch": 1.804656389710014, "percentage": 36.09, "elapsed_time": "1:35:56", "remaining_time": "2:49:51", "throughput": 8651.04, "total_tokens": 49795680} +{"current_steps": 73875, "total_steps": 204665, "loss": 0.0008, "lr": 1.6130474691504885e-06, "epoch": 1.804778540541861, "percentage": 36.1, "elapsed_time": "1:35:56", "remaining_time": "2:49:51", "throughput": 8651.13, "total_tokens": 49799264} +{"current_steps": 73880, "total_steps": 204665, "loss": 0.0959, "lr": 1.6129800936887002e-06, "epoch": 1.8049006913737082, "percentage": 36.1, "elapsed_time": "1:35:56", "remaining_time": "2:49:50", "throughput": 8651.21, "total_tokens": 49802720} +{"current_steps": 73885, "total_steps": 204665, "loss": 0.0705, "lr": 1.6129127137691554e-06, "epoch": 1.8050228422055554, "percentage": 36.1, "elapsed_time": "1:35:57", "remaining_time": "2:49:50", "throughput": 8651.34, "total_tokens": 49806560} +{"current_steps": 73890, "total_steps": 204665, "loss": 0.1185, "lr": 1.6128453293923446e-06, "epoch": 1.8051449930374026, "percentage": 36.1, "elapsed_time": "1:35:57", "remaining_time": "2:49:49", "throughput": 8651.38, "total_tokens": 49809760} +{"current_steps": 73895, "total_steps": 204665, "loss": 0.038, "lr": 1.6127779405587578e-06, "epoch": 1.8052671438692498, "percentage": 36.11, "elapsed_time": "1:35:57", "remaining_time": "2:49:49", "throughput": 8651.47, "total_tokens": 49813344} +{"current_steps": 73900, "total_steps": 204665, "loss": 0.0756, "lr": 1.6127105472688852e-06, "epoch": 1.8053892947010968, "percentage": 36.11, "elapsed_time": "1:35:58", "remaining_time": "2:49:48", "throughput": 8651.56, "total_tokens": 49816928} +{"current_steps": 73905, "total_steps": 204665, "loss": 0.0234, "lr": 1.6126431495232167e-06, "epoch": 1.805511445532944, "percentage": 36.11, "elapsed_time": "1:35:58", "remaining_time": "2:49:48", "throughput": 8651.63, "total_tokens": 49820320} +{"current_steps": 73910, "total_steps": 204665, "loss": 0.1326, "lr": 1.6125757473222423e-06, "epoch": 1.8056335963647911, "percentage": 36.11, "elapsed_time": "1:35:58", "remaining_time": "2:49:48", "throughput": 8651.69, "total_tokens": 49823712} +{"current_steps": 73915, "total_steps": 204665, "loss": 0.1187, "lr": 1.6125083406664523e-06, "epoch": 1.8057557471966383, "percentage": 36.12, "elapsed_time": "1:35:59", "remaining_time": "2:49:47", "throughput": 8651.73, "total_tokens": 49826848} +{"current_steps": 73920, "total_steps": 204665, "loss": 0.1636, "lr": 1.6124409295563369e-06, "epoch": 1.8058778980284855, "percentage": 36.12, "elapsed_time": "1:35:59", "remaining_time": "2:49:47", "throughput": 8651.74, "total_tokens": 49829856} +{"current_steps": 73925, "total_steps": 204665, "loss": 0.0391, "lr": 1.612373513992386e-06, "epoch": 1.8060000488603327, "percentage": 36.12, "elapsed_time": "1:35:59", "remaining_time": "2:49:46", "throughput": 8651.78, "total_tokens": 49833056} +{"current_steps": 73930, "total_steps": 204665, "loss": 0.0296, "lr": 1.6123060939750908e-06, "epoch": 1.80612219969218, "percentage": 36.12, "elapsed_time": "1:36:00", "remaining_time": "2:49:46", "throughput": 8651.9, "total_tokens": 49836832} +{"current_steps": 73935, "total_steps": 204665, "loss": 0.0689, "lr": 1.6122386695049409e-06, "epoch": 1.806244350524027, "percentage": 36.12, "elapsed_time": "1:36:00", "remaining_time": "2:49:45", "throughput": 8651.95, "total_tokens": 49840160} +{"current_steps": 73940, "total_steps": 204665, "loss": 0.0879, "lr": 1.6121712405824263e-06, "epoch": 1.8063665013558743, "percentage": 36.13, "elapsed_time": "1:36:00", "remaining_time": "2:49:45", "throughput": 8652.05, "total_tokens": 49843808} +{"current_steps": 73945, "total_steps": 204665, "loss": 0.0351, "lr": 1.6121038072080382e-06, "epoch": 1.8064886521877215, "percentage": 36.13, "elapsed_time": "1:36:01", "remaining_time": "2:49:44", "throughput": 8652.09, "total_tokens": 49847008} +{"current_steps": 73950, "total_steps": 204665, "loss": 0.1674, "lr": 1.6120363693822663e-06, "epoch": 1.8066108030195687, "percentage": 36.13, "elapsed_time": "1:36:01", "remaining_time": "2:49:44", "throughput": 8652.17, "total_tokens": 49850528} +{"current_steps": 73955, "total_steps": 204665, "loss": 0.1243, "lr": 1.6119689271056013e-06, "epoch": 1.8067329538514159, "percentage": 36.13, "elapsed_time": "1:36:01", "remaining_time": "2:49:43", "throughput": 8652.22, "total_tokens": 49853792} +{"current_steps": 73960, "total_steps": 204665, "loss": 0.0015, "lr": 1.6119014803785338e-06, "epoch": 1.806855104683263, "percentage": 36.14, "elapsed_time": "1:36:02", "remaining_time": "2:49:43", "throughput": 8652.24, "total_tokens": 49856928} +{"current_steps": 73965, "total_steps": 204665, "loss": 0.0041, "lr": 1.6118340292015545e-06, "epoch": 1.80697725551511, "percentage": 36.14, "elapsed_time": "1:36:02", "remaining_time": "2:49:42", "throughput": 8652.34, "total_tokens": 49860512} +{"current_steps": 73970, "total_steps": 204665, "loss": 0.0398, "lr": 1.6117665735751529e-06, "epoch": 1.8070994063469572, "percentage": 36.14, "elapsed_time": "1:36:03", "remaining_time": "2:49:42", "throughput": 8652.47, "total_tokens": 49864416} +{"current_steps": 73975, "total_steps": 204665, "loss": 0.0755, "lr": 1.6116991134998208e-06, "epoch": 1.8072215571788044, "percentage": 36.14, "elapsed_time": "1:36:03", "remaining_time": "2:49:42", "throughput": 8652.5, "total_tokens": 49867552} +{"current_steps": 73980, "total_steps": 204665, "loss": 0.1772, "lr": 1.6116316489760477e-06, "epoch": 1.8073437080106516, "percentage": 36.15, "elapsed_time": "1:36:03", "remaining_time": "2:49:41", "throughput": 8652.54, "total_tokens": 49870752} +{"current_steps": 73985, "total_steps": 204665, "loss": 0.1199, "lr": 1.6115641800043252e-06, "epoch": 1.8074658588424986, "percentage": 36.15, "elapsed_time": "1:36:04", "remaining_time": "2:49:41", "throughput": 8652.57, "total_tokens": 49873952} +{"current_steps": 73990, "total_steps": 204665, "loss": 0.0553, "lr": 1.6114967065851431e-06, "epoch": 1.8075880096743457, "percentage": 36.15, "elapsed_time": "1:36:04", "remaining_time": "2:49:40", "throughput": 8652.61, "total_tokens": 49877152} +{"current_steps": 73995, "total_steps": 204665, "loss": 0.0014, "lr": 1.6114292287189928e-06, "epoch": 1.807710160506193, "percentage": 36.15, "elapsed_time": "1:36:04", "remaining_time": "2:49:40", "throughput": 8652.69, "total_tokens": 49880672} +{"current_steps": 74000, "total_steps": 204665, "loss": 0.1862, "lr": 1.6113617464063646e-06, "epoch": 1.8078323113380401, "percentage": 36.16, "elapsed_time": "1:36:05", "remaining_time": "2:49:39", "throughput": 8652.77, "total_tokens": 49884128} +{"current_steps": 74005, "total_steps": 204665, "loss": 0.1815, "lr": 1.6112942596477491e-06, "epoch": 1.8079544621698873, "percentage": 36.16, "elapsed_time": "1:36:05", "remaining_time": "2:49:39", "throughput": 8652.8, "total_tokens": 49887264} +{"current_steps": 74010, "total_steps": 204665, "loss": 0.0047, "lr": 1.6112267684436378e-06, "epoch": 1.8080766130017345, "percentage": 36.16, "elapsed_time": "1:36:05", "remaining_time": "2:49:38", "throughput": 8652.83, "total_tokens": 49890400} +{"current_steps": 74015, "total_steps": 204665, "loss": 0.0378, "lr": 1.6111592727945205e-06, "epoch": 1.8081987638335817, "percentage": 36.16, "elapsed_time": "1:36:06", "remaining_time": "2:49:38", "throughput": 8652.89, "total_tokens": 49893792} +{"current_steps": 74020, "total_steps": 204665, "loss": 0.0564, "lr": 1.611091772700889e-06, "epoch": 1.8083209146654289, "percentage": 36.17, "elapsed_time": "1:36:06", "remaining_time": "2:49:37", "throughput": 8652.94, "total_tokens": 49897056} +{"current_steps": 74025, "total_steps": 204665, "loss": 0.1873, "lr": 1.6110242681632335e-06, "epoch": 1.808443065497276, "percentage": 36.17, "elapsed_time": "1:36:06", "remaining_time": "2:49:37", "throughput": 8652.96, "total_tokens": 49900128} +{"current_steps": 74030, "total_steps": 204665, "loss": 0.1264, "lr": 1.6109567591820454e-06, "epoch": 1.8085652163291233, "percentage": 36.17, "elapsed_time": "1:36:07", "remaining_time": "2:49:36", "throughput": 8653.03, "total_tokens": 49903584} +{"current_steps": 74035, "total_steps": 204665, "loss": 0.0015, "lr": 1.6108892457578151e-06, "epoch": 1.8086873671609704, "percentage": 36.17, "elapsed_time": "1:36:07", "remaining_time": "2:49:36", "throughput": 8653.07, "total_tokens": 49906848} +{"current_steps": 74040, "total_steps": 204665, "loss": 0.0855, "lr": 1.6108217278910342e-06, "epoch": 1.8088095179928176, "percentage": 36.18, "elapsed_time": "1:36:07", "remaining_time": "2:49:35", "throughput": 8653.2, "total_tokens": 49910688} +{"current_steps": 74045, "total_steps": 204665, "loss": 0.0939, "lr": 1.6107542055821934e-06, "epoch": 1.8089316688246648, "percentage": 36.18, "elapsed_time": "1:36:08", "remaining_time": "2:49:35", "throughput": 8653.23, "total_tokens": 49913824} +{"current_steps": 74050, "total_steps": 204665, "loss": 0.0009, "lr": 1.6106866788317837e-06, "epoch": 1.809053819656512, "percentage": 36.18, "elapsed_time": "1:36:08", "remaining_time": "2:49:35", "throughput": 8653.29, "total_tokens": 49917216} +{"current_steps": 74055, "total_steps": 204665, "loss": 0.0365, "lr": 1.6106191476402961e-06, "epoch": 1.809175970488359, "percentage": 36.18, "elapsed_time": "1:36:08", "remaining_time": "2:49:34", "throughput": 8653.39, "total_tokens": 49920800} +{"current_steps": 74060, "total_steps": 204665, "loss": 0.2032, "lr": 1.6105516120082218e-06, "epoch": 1.8092981213202062, "percentage": 36.19, "elapsed_time": "1:36:09", "remaining_time": "2:49:34", "throughput": 8653.45, "total_tokens": 49924192} +{"current_steps": 74065, "total_steps": 204665, "loss": 0.0806, "lr": 1.610484071936052e-06, "epoch": 1.8094202721520534, "percentage": 36.19, "elapsed_time": "1:36:09", "remaining_time": "2:49:33", "throughput": 8653.5, "total_tokens": 49927456} +{"current_steps": 74070, "total_steps": 204665, "loss": 0.0486, "lr": 1.6104165274242782e-06, "epoch": 1.8095424229839006, "percentage": 36.19, "elapsed_time": "1:36:09", "remaining_time": "2:49:33", "throughput": 8653.52, "total_tokens": 49930528} +{"current_steps": 74075, "total_steps": 204665, "loss": 0.0669, "lr": 1.610348978473391e-06, "epoch": 1.8096645738157475, "percentage": 36.19, "elapsed_time": "1:36:10", "remaining_time": "2:49:32", "throughput": 8653.58, "total_tokens": 49933920} +{"current_steps": 74080, "total_steps": 204665, "loss": 0.0025, "lr": 1.6102814250838814e-06, "epoch": 1.8097867246475947, "percentage": 36.2, "elapsed_time": "1:36:10", "remaining_time": "2:49:32", "throughput": 8653.62, "total_tokens": 49937120} +{"current_steps": 74085, "total_steps": 204665, "loss": 0.1609, "lr": 1.6102138672562417e-06, "epoch": 1.809908875479442, "percentage": 36.2, "elapsed_time": "1:36:11", "remaining_time": "2:49:31", "throughput": 8653.68, "total_tokens": 49940448} +{"current_steps": 74090, "total_steps": 204665, "loss": 0.0982, "lr": 1.6101463049909626e-06, "epoch": 1.810031026311289, "percentage": 36.2, "elapsed_time": "1:36:11", "remaining_time": "2:49:31", "throughput": 8653.72, "total_tokens": 49943712} +{"current_steps": 74095, "total_steps": 204665, "loss": 0.1006, "lr": 1.6100787382885352e-06, "epoch": 1.8101531771431363, "percentage": 36.2, "elapsed_time": "1:36:11", "remaining_time": "2:49:30", "throughput": 8653.74, "total_tokens": 49946784} +{"current_steps": 74100, "total_steps": 204665, "loss": 0.1143, "lr": 1.6100111671494511e-06, "epoch": 1.8102753279749835, "percentage": 36.21, "elapsed_time": "1:36:12", "remaining_time": "2:49:30", "throughput": 8653.77, "total_tokens": 49949920} +{"current_steps": 74105, "total_steps": 204665, "loss": 0.0009, "lr": 1.6099435915742018e-06, "epoch": 1.8103974788068307, "percentage": 36.21, "elapsed_time": "1:36:12", "remaining_time": "2:49:29", "throughput": 8653.87, "total_tokens": 49953568} +{"current_steps": 74110, "total_steps": 204665, "loss": 0.0568, "lr": 1.6098760115632785e-06, "epoch": 1.8105196296386779, "percentage": 36.21, "elapsed_time": "1:36:12", "remaining_time": "2:49:29", "throughput": 8653.97, "total_tokens": 49957152} +{"current_steps": 74115, "total_steps": 204665, "loss": 0.0408, "lr": 1.6098084271171732e-06, "epoch": 1.810641780470525, "percentage": 36.21, "elapsed_time": "1:36:13", "remaining_time": "2:49:29", "throughput": 8654.06, "total_tokens": 49960736} +{"current_steps": 74120, "total_steps": 204665, "loss": 0.0006, "lr": 1.6097408382363768e-06, "epoch": 1.8107639313023722, "percentage": 36.22, "elapsed_time": "1:36:13", "remaining_time": "2:49:28", "throughput": 8654.14, "total_tokens": 49964256} +{"current_steps": 74125, "total_steps": 204665, "loss": 0.166, "lr": 1.6096732449213812e-06, "epoch": 1.8108860821342194, "percentage": 36.22, "elapsed_time": "1:36:13", "remaining_time": "2:49:28", "throughput": 8654.18, "total_tokens": 49967456} +{"current_steps": 74130, "total_steps": 204665, "loss": 0.0413, "lr": 1.6096056471726775e-06, "epoch": 1.8110082329660666, "percentage": 36.22, "elapsed_time": "1:36:14", "remaining_time": "2:49:27", "throughput": 8654.25, "total_tokens": 49970912} +{"current_steps": 74135, "total_steps": 204665, "loss": 0.0854, "lr": 1.6095380449907577e-06, "epoch": 1.8111303837979138, "percentage": 36.22, "elapsed_time": "1:36:14", "remaining_time": "2:49:27", "throughput": 8654.27, "total_tokens": 49973984} +{"current_steps": 74140, "total_steps": 204665, "loss": 0.0806, "lr": 1.609470438376113e-06, "epoch": 1.811252534629761, "percentage": 36.23, "elapsed_time": "1:36:14", "remaining_time": "2:49:26", "throughput": 8654.32, "total_tokens": 49977248} +{"current_steps": 74145, "total_steps": 204665, "loss": 0.1203, "lr": 1.609402827329236e-06, "epoch": 1.811374685461608, "percentage": 36.23, "elapsed_time": "1:36:15", "remaining_time": "2:49:26", "throughput": 8654.35, "total_tokens": 49980384} +{"current_steps": 74150, "total_steps": 204665, "loss": 0.0476, "lr": 1.609335211850617e-06, "epoch": 1.8114968362934551, "percentage": 36.23, "elapsed_time": "1:36:15", "remaining_time": "2:49:25", "throughput": 8654.4, "total_tokens": 49983584} +{"current_steps": 74155, "total_steps": 204665, "loss": 0.1712, "lr": 1.6092675919407487e-06, "epoch": 1.8116189871253023, "percentage": 36.23, "elapsed_time": "1:36:15", "remaining_time": "2:49:25", "throughput": 8654.48, "total_tokens": 49987104} +{"current_steps": 74160, "total_steps": 204665, "loss": 0.0908, "lr": 1.6091999676001228e-06, "epoch": 1.8117411379571495, "percentage": 36.23, "elapsed_time": "1:36:16", "remaining_time": "2:49:24", "throughput": 8654.58, "total_tokens": 49990752} +{"current_steps": 74165, "total_steps": 204665, "loss": 0.0425, "lr": 1.609132338829231e-06, "epoch": 1.8118632887889965, "percentage": 36.24, "elapsed_time": "1:36:16", "remaining_time": "2:49:24", "throughput": 8654.65, "total_tokens": 49994208} +{"current_steps": 74170, "total_steps": 204665, "loss": 0.0904, "lr": 1.6090647056285645e-06, "epoch": 1.8119854396208437, "percentage": 36.24, "elapsed_time": "1:36:16", "remaining_time": "2:49:23", "throughput": 8654.71, "total_tokens": 49997536} +{"current_steps": 74175, "total_steps": 204665, "loss": 0.06, "lr": 1.608997067998616e-06, "epoch": 1.8121075904526909, "percentage": 36.24, "elapsed_time": "1:36:17", "remaining_time": "2:49:23", "throughput": 8654.72, "total_tokens": 50000544} +{"current_steps": 74180, "total_steps": 204665, "loss": 0.096, "lr": 1.608929425939877e-06, "epoch": 1.812229741284538, "percentage": 36.24, "elapsed_time": "1:36:17", "remaining_time": "2:49:22", "throughput": 8654.75, "total_tokens": 50003680} +{"current_steps": 74185, "total_steps": 204665, "loss": 0.1466, "lr": 1.6088617794528392e-06, "epoch": 1.8123518921163853, "percentage": 36.25, "elapsed_time": "1:36:17", "remaining_time": "2:49:22", "throughput": 8654.78, "total_tokens": 50006880} +{"current_steps": 74190, "total_steps": 204665, "loss": 0.1236, "lr": 1.608794128537995e-06, "epoch": 1.8124740429482324, "percentage": 36.25, "elapsed_time": "1:36:18", "remaining_time": "2:49:22", "throughput": 8654.88, "total_tokens": 50010528} +{"current_steps": 74195, "total_steps": 204665, "loss": 0.0713, "lr": 1.608726473195836e-06, "epoch": 1.8125961937800796, "percentage": 36.25, "elapsed_time": "1:36:18", "remaining_time": "2:49:21", "throughput": 8654.99, "total_tokens": 50014176} +{"current_steps": 74200, "total_steps": 204665, "loss": 0.1177, "lr": 1.6086588134268544e-06, "epoch": 1.8127183446119268, "percentage": 36.25, "elapsed_time": "1:36:18", "remaining_time": "2:49:21", "throughput": 8655.02, "total_tokens": 50017312} +{"current_steps": 74205, "total_steps": 204665, "loss": 0.0225, "lr": 1.6085911492315423e-06, "epoch": 1.812840495443774, "percentage": 36.26, "elapsed_time": "1:36:19", "remaining_time": "2:49:20", "throughput": 8655.13, "total_tokens": 50021024} +{"current_steps": 74210, "total_steps": 204665, "loss": 0.1781, "lr": 1.6085234806103918e-06, "epoch": 1.8129626462756212, "percentage": 36.26, "elapsed_time": "1:36:19", "remaining_time": "2:49:20", "throughput": 8655.19, "total_tokens": 50024416} +{"current_steps": 74215, "total_steps": 204665, "loss": 0.0969, "lr": 1.6084558075638946e-06, "epoch": 1.8130847971074684, "percentage": 36.26, "elapsed_time": "1:36:20", "remaining_time": "2:49:19", "throughput": 8655.24, "total_tokens": 50027680} +{"current_steps": 74220, "total_steps": 204665, "loss": 0.2283, "lr": 1.608388130092543e-06, "epoch": 1.8132069479393156, "percentage": 36.26, "elapsed_time": "1:36:20", "remaining_time": "2:49:19", "throughput": 8655.26, "total_tokens": 50030752} +{"current_steps": 74225, "total_steps": 204665, "loss": 0.004, "lr": 1.6083204481968297e-06, "epoch": 1.8133290987711628, "percentage": 36.27, "elapsed_time": "1:36:20", "remaining_time": "2:49:18", "throughput": 8655.28, "total_tokens": 50033824} +{"current_steps": 74230, "total_steps": 204665, "loss": 0.0022, "lr": 1.6082527618772462e-06, "epoch": 1.81345124960301, "percentage": 36.27, "elapsed_time": "1:36:21", "remaining_time": "2:49:18", "throughput": 8655.3, "total_tokens": 50036896} +{"current_steps": 74235, "total_steps": 204665, "loss": 0.1087, "lr": 1.608185071134285e-06, "epoch": 1.813573400434857, "percentage": 36.27, "elapsed_time": "1:36:21", "remaining_time": "2:49:17", "throughput": 8655.4, "total_tokens": 50040544} +{"current_steps": 74240, "total_steps": 204665, "loss": 0.0011, "lr": 1.6081173759684385e-06, "epoch": 1.8136955512667041, "percentage": 36.27, "elapsed_time": "1:36:21", "remaining_time": "2:49:17", "throughput": 8655.49, "total_tokens": 50044128} +{"current_steps": 74245, "total_steps": 204665, "loss": 0.0458, "lr": 1.6080496763801989e-06, "epoch": 1.8138177020985513, "percentage": 36.28, "elapsed_time": "1:36:22", "remaining_time": "2:49:16", "throughput": 8655.56, "total_tokens": 50047520} +{"current_steps": 74250, "total_steps": 204665, "loss": 0.1124, "lr": 1.6079819723700585e-06, "epoch": 1.8139398529303985, "percentage": 36.28, "elapsed_time": "1:36:22", "remaining_time": "2:49:16", "throughput": 8655.57, "total_tokens": 50050528} +{"current_steps": 74255, "total_steps": 204665, "loss": 0.0276, "lr": 1.6079142639385096e-06, "epoch": 1.8140620037622455, "percentage": 36.28, "elapsed_time": "1:36:22", "remaining_time": "2:49:16", "throughput": 8655.68, "total_tokens": 50054176} +{"current_steps": 74260, "total_steps": 204665, "loss": 0.1728, "lr": 1.6078465510860446e-06, "epoch": 1.8141841545940927, "percentage": 36.28, "elapsed_time": "1:36:23", "remaining_time": "2:49:15", "throughput": 8655.72, "total_tokens": 50057440} +{"current_steps": 74265, "total_steps": 204665, "loss": 0.0582, "lr": 1.607778833813156e-06, "epoch": 1.8143063054259398, "percentage": 36.29, "elapsed_time": "1:36:23", "remaining_time": "2:49:15", "throughput": 8655.79, "total_tokens": 50060832} +{"current_steps": 74270, "total_steps": 204665, "loss": 0.1701, "lr": 1.6077111121203364e-06, "epoch": 1.814428456257787, "percentage": 36.29, "elapsed_time": "1:36:23", "remaining_time": "2:49:14", "throughput": 8655.89, "total_tokens": 50064480} +{"current_steps": 74275, "total_steps": 204665, "loss": 0.0996, "lr": 1.607643386008078e-06, "epoch": 1.8145506070896342, "percentage": 36.29, "elapsed_time": "1:36:24", "remaining_time": "2:49:14", "throughput": 8655.96, "total_tokens": 50067936} +{"current_steps": 74280, "total_steps": 204665, "loss": 0.0023, "lr": 1.6075756554768736e-06, "epoch": 1.8146727579214814, "percentage": 36.29, "elapsed_time": "1:36:24", "remaining_time": "2:49:13", "throughput": 8656.03, "total_tokens": 50071328} +{"current_steps": 74285, "total_steps": 204665, "loss": 0.0529, "lr": 1.6075079205272155e-06, "epoch": 1.8147949087533286, "percentage": 36.3, "elapsed_time": "1:36:24", "remaining_time": "2:49:13", "throughput": 8656.07, "total_tokens": 50074592} +{"current_steps": 74290, "total_steps": 204665, "loss": 0.2788, "lr": 1.6074401811595965e-06, "epoch": 1.8149170595851758, "percentage": 36.3, "elapsed_time": "1:36:25", "remaining_time": "2:49:12", "throughput": 8656.18, "total_tokens": 50078304} +{"current_steps": 74295, "total_steps": 204665, "loss": 0.038, "lr": 1.6073724373745088e-06, "epoch": 1.815039210417023, "percentage": 36.3, "elapsed_time": "1:36:25", "remaining_time": "2:49:12", "throughput": 8656.28, "total_tokens": 50081952} +{"current_steps": 74300, "total_steps": 204665, "loss": 0.045, "lr": 1.6073046891724458e-06, "epoch": 1.8151613612488702, "percentage": 36.3, "elapsed_time": "1:36:25", "remaining_time": "2:49:11", "throughput": 8656.36, "total_tokens": 50085472} +{"current_steps": 74305, "total_steps": 204665, "loss": 0.1823, "lr": 1.6072369365538996e-06, "epoch": 1.8152835120807174, "percentage": 36.31, "elapsed_time": "1:36:26", "remaining_time": "2:49:11", "throughput": 8656.37, "total_tokens": 50088416} +{"current_steps": 74310, "total_steps": 204665, "loss": 0.0917, "lr": 1.607169179519363e-06, "epoch": 1.8154056629125646, "percentage": 36.31, "elapsed_time": "1:36:26", "remaining_time": "2:49:11", "throughput": 8656.47, "total_tokens": 50092128} +{"current_steps": 74315, "total_steps": 204665, "loss": 0.1018, "lr": 1.607101418069329e-06, "epoch": 1.8155278137444117, "percentage": 36.31, "elapsed_time": "1:36:27", "remaining_time": "2:49:10", "throughput": 8656.45, "total_tokens": 50094880} +{"current_steps": 74320, "total_steps": 204665, "loss": 0.1279, "lr": 1.60703365220429e-06, "epoch": 1.8156499645762587, "percentage": 36.31, "elapsed_time": "1:36:27", "remaining_time": "2:49:10", "throughput": 8656.56, "total_tokens": 50098592} +{"current_steps": 74325, "total_steps": 204665, "loss": 0.0388, "lr": 1.606965881924739e-06, "epoch": 1.815772115408106, "percentage": 36.32, "elapsed_time": "1:36:27", "remaining_time": "2:49:09", "throughput": 8656.64, "total_tokens": 50102112} +{"current_steps": 74330, "total_steps": 204665, "loss": 0.0739, "lr": 1.6068981072311689e-06, "epoch": 1.815894266239953, "percentage": 36.32, "elapsed_time": "1:36:28", "remaining_time": "2:49:09", "throughput": 8656.69, "total_tokens": 50105376} +{"current_steps": 74335, "total_steps": 204665, "loss": 0.0028, "lr": 1.6068303281240725e-06, "epoch": 1.8160164170718003, "percentage": 36.32, "elapsed_time": "1:36:28", "remaining_time": "2:49:08", "throughput": 8656.79, "total_tokens": 50109024} +{"current_steps": 74340, "total_steps": 204665, "loss": 0.034, "lr": 1.6067625446039428e-06, "epoch": 1.8161385679036475, "percentage": 36.32, "elapsed_time": "1:36:28", "remaining_time": "2:49:08", "throughput": 8656.89, "total_tokens": 50112608} +{"current_steps": 74345, "total_steps": 204665, "loss": 0.0482, "lr": 1.6066947566712728e-06, "epoch": 1.8162607187354944, "percentage": 36.33, "elapsed_time": "1:36:29", "remaining_time": "2:49:07", "throughput": 8656.92, "total_tokens": 50115808} +{"current_steps": 74350, "total_steps": 204665, "loss": 0.0401, "lr": 1.6066269643265551e-06, "epoch": 1.8163828695673416, "percentage": 36.33, "elapsed_time": "1:36:29", "remaining_time": "2:49:07", "throughput": 8657.0, "total_tokens": 50119264} +{"current_steps": 74355, "total_steps": 204665, "loss": 0.1096, "lr": 1.606559167570283e-06, "epoch": 1.8165050203991888, "percentage": 36.33, "elapsed_time": "1:36:29", "remaining_time": "2:49:06", "throughput": 8657.09, "total_tokens": 50122848} +{"current_steps": 74360, "total_steps": 204665, "loss": 0.094, "lr": 1.6064913664029497e-06, "epoch": 1.816627171231036, "percentage": 36.33, "elapsed_time": "1:36:30", "remaining_time": "2:49:06", "throughput": 8657.12, "total_tokens": 50125984} +{"current_steps": 74365, "total_steps": 204665, "loss": 0.1342, "lr": 1.6064235608250479e-06, "epoch": 1.8167493220628832, "percentage": 36.33, "elapsed_time": "1:36:30", "remaining_time": "2:49:05", "throughput": 8657.17, "total_tokens": 50129248} +{"current_steps": 74370, "total_steps": 204665, "loss": 0.0858, "lr": 1.6063557508370708e-06, "epoch": 1.8168714728947304, "percentage": 36.34, "elapsed_time": "1:36:30", "remaining_time": "2:49:05", "throughput": 8657.2, "total_tokens": 50132384} +{"current_steps": 74375, "total_steps": 204665, "loss": 0.0504, "lr": 1.6062879364395117e-06, "epoch": 1.8169936237265776, "percentage": 36.34, "elapsed_time": "1:36:31", "remaining_time": "2:49:04", "throughput": 8657.25, "total_tokens": 50135712} +{"current_steps": 74380, "total_steps": 204665, "loss": 0.1098, "lr": 1.6062201176328636e-06, "epoch": 1.8171157745584248, "percentage": 36.34, "elapsed_time": "1:36:31", "remaining_time": "2:49:04", "throughput": 8657.29, "total_tokens": 50138912} +{"current_steps": 74385, "total_steps": 204665, "loss": 0.0416, "lr": 1.6061522944176198e-06, "epoch": 1.817237925390272, "percentage": 36.34, "elapsed_time": "1:36:31", "remaining_time": "2:49:04", "throughput": 8657.33, "total_tokens": 50142176} +{"current_steps": 74390, "total_steps": 204665, "loss": 0.12, "lr": 1.6060844667942733e-06, "epoch": 1.8173600762221191, "percentage": 36.35, "elapsed_time": "1:36:32", "remaining_time": "2:49:03", "throughput": 8657.39, "total_tokens": 50145504} +{"current_steps": 74395, "total_steps": 204665, "loss": 0.078, "lr": 1.6060166347633177e-06, "epoch": 1.8174822270539663, "percentage": 36.35, "elapsed_time": "1:36:32", "remaining_time": "2:49:03", "throughput": 8657.4, "total_tokens": 50148512} +{"current_steps": 74400, "total_steps": 204665, "loss": 0.0009, "lr": 1.6059487983252462e-06, "epoch": 1.8176043778858135, "percentage": 36.35, "elapsed_time": "1:36:32", "remaining_time": "2:49:02", "throughput": 8657.46, "total_tokens": 50151904} +{"current_steps": 74405, "total_steps": 204665, "loss": 0.0013, "lr": 1.605880957480552e-06, "epoch": 1.8177265287176607, "percentage": 36.35, "elapsed_time": "1:36:33", "remaining_time": "2:49:02", "throughput": 8657.47, "total_tokens": 50154912} +{"current_steps": 74410, "total_steps": 204665, "loss": 0.0562, "lr": 1.6058131122297285e-06, "epoch": 1.8178486795495077, "percentage": 36.36, "elapsed_time": "1:36:33", "remaining_time": "2:49:01", "throughput": 8657.53, "total_tokens": 50158304} +{"current_steps": 74415, "total_steps": 204665, "loss": 0.1851, "lr": 1.605745262573269e-06, "epoch": 1.8179708303813549, "percentage": 36.36, "elapsed_time": "1:36:33", "remaining_time": "2:49:01", "throughput": 8657.53, "total_tokens": 50161248} +{"current_steps": 74420, "total_steps": 204665, "loss": 0.0014, "lr": 1.6056774085116671e-06, "epoch": 1.818092981213202, "percentage": 36.36, "elapsed_time": "1:36:34", "remaining_time": "2:49:00", "throughput": 8657.55, "total_tokens": 50164320} +{"current_steps": 74425, "total_steps": 204665, "loss": 0.0429, "lr": 1.605609550045416e-06, "epoch": 1.8182151320450493, "percentage": 36.36, "elapsed_time": "1:36:34", "remaining_time": "2:49:00", "throughput": 8657.62, "total_tokens": 50167712} +{"current_steps": 74430, "total_steps": 204665, "loss": 0.0007, "lr": 1.6055416871750098e-06, "epoch": 1.8183372828768964, "percentage": 36.37, "elapsed_time": "1:36:34", "remaining_time": "2:48:59", "throughput": 8657.77, "total_tokens": 50171680} +{"current_steps": 74435, "total_steps": 204665, "loss": 0.1016, "lr": 1.6054738199009412e-06, "epoch": 1.8184594337087434, "percentage": 36.37, "elapsed_time": "1:36:35", "remaining_time": "2:48:59", "throughput": 8657.88, "total_tokens": 50175392} +{"current_steps": 74440, "total_steps": 204665, "loss": 0.0608, "lr": 1.6054059482237043e-06, "epoch": 1.8185815845405906, "percentage": 36.37, "elapsed_time": "1:36:35", "remaining_time": "2:48:58", "throughput": 8657.93, "total_tokens": 50178720} +{"current_steps": 74445, "total_steps": 204665, "loss": 0.0404, "lr": 1.6053380721437927e-06, "epoch": 1.8187037353724378, "percentage": 36.37, "elapsed_time": "1:36:36", "remaining_time": "2:48:58", "throughput": 8657.92, "total_tokens": 50181536} +{"current_steps": 74450, "total_steps": 204665, "loss": 0.1494, "lr": 1.6052701916616993e-06, "epoch": 1.818825886204285, "percentage": 36.38, "elapsed_time": "1:36:36", "remaining_time": "2:48:58", "throughput": 8657.96, "total_tokens": 50184800} +{"current_steps": 74455, "total_steps": 204665, "loss": 0.0619, "lr": 1.6052023067779189e-06, "epoch": 1.8189480370361322, "percentage": 36.38, "elapsed_time": "1:36:36", "remaining_time": "2:48:57", "throughput": 8657.99, "total_tokens": 50187936} +{"current_steps": 74460, "total_steps": 204665, "loss": 0.0656, "lr": 1.605134417492944e-06, "epoch": 1.8190701878679794, "percentage": 36.38, "elapsed_time": "1:36:37", "remaining_time": "2:48:57", "throughput": 8658.01, "total_tokens": 50191008} +{"current_steps": 74465, "total_steps": 204665, "loss": 0.0818, "lr": 1.6050665238072689e-06, "epoch": 1.8191923386998265, "percentage": 36.38, "elapsed_time": "1:36:37", "remaining_time": "2:48:56", "throughput": 8658.07, "total_tokens": 50194400} +{"current_steps": 74470, "total_steps": 204665, "loss": 0.053, "lr": 1.6049986257213878e-06, "epoch": 1.8193144895316737, "percentage": 36.39, "elapsed_time": "1:36:37", "remaining_time": "2:48:56", "throughput": 8658.06, "total_tokens": 50197280} +{"current_steps": 74475, "total_steps": 204665, "loss": 0.0762, "lr": 1.6049307232357935e-06, "epoch": 1.819436640363521, "percentage": 36.39, "elapsed_time": "1:36:38", "remaining_time": "2:48:55", "throughput": 8658.1, "total_tokens": 50200480} +{"current_steps": 74480, "total_steps": 204665, "loss": 0.0584, "lr": 1.6048628163509803e-06, "epoch": 1.8195587911953681, "percentage": 36.39, "elapsed_time": "1:36:38", "remaining_time": "2:48:55", "throughput": 8658.18, "total_tokens": 50203936} +{"current_steps": 74485, "total_steps": 204665, "loss": 0.042, "lr": 1.6047949050674422e-06, "epoch": 1.8196809420272153, "percentage": 36.39, "elapsed_time": "1:36:38", "remaining_time": "2:48:54", "throughput": 8658.21, "total_tokens": 50207136} +{"current_steps": 74490, "total_steps": 204665, "loss": 0.1156, "lr": 1.6047269893856728e-06, "epoch": 1.8198030928590625, "percentage": 36.4, "elapsed_time": "1:36:39", "remaining_time": "2:48:54", "throughput": 8658.23, "total_tokens": 50210144} +{"current_steps": 74495, "total_steps": 204665, "loss": 0.1282, "lr": 1.604659069306166e-06, "epoch": 1.8199252436909097, "percentage": 36.4, "elapsed_time": "1:36:39", "remaining_time": "2:48:53", "throughput": 8658.33, "total_tokens": 50213792} +{"current_steps": 74500, "total_steps": 204665, "loss": 0.1632, "lr": 1.604591144829416e-06, "epoch": 1.8200473945227567, "percentage": 36.4, "elapsed_time": "1:36:39", "remaining_time": "2:48:53", "throughput": 8658.41, "total_tokens": 50217312} +{"current_steps": 74505, "total_steps": 204665, "loss": 0.1738, "lr": 1.6045232159559166e-06, "epoch": 1.8201695453546038, "percentage": 36.4, "elapsed_time": "1:36:40", "remaining_time": "2:48:52", "throughput": 8658.43, "total_tokens": 50220384} +{"current_steps": 74510, "total_steps": 204665, "loss": 0.0321, "lr": 1.6044552826861613e-06, "epoch": 1.820291696186451, "percentage": 36.41, "elapsed_time": "1:36:40", "remaining_time": "2:48:52", "throughput": 8658.5, "total_tokens": 50223840} +{"current_steps": 74515, "total_steps": 204665, "loss": 0.0891, "lr": 1.604387345020645e-06, "epoch": 1.8204138470182982, "percentage": 36.41, "elapsed_time": "1:36:40", "remaining_time": "2:48:51", "throughput": 8658.57, "total_tokens": 50227232} +{"current_steps": 74520, "total_steps": 204665, "loss": 0.0439, "lr": 1.6043194029598612e-06, "epoch": 1.8205359978501452, "percentage": 36.41, "elapsed_time": "1:36:41", "remaining_time": "2:48:51", "throughput": 8658.65, "total_tokens": 50230752} +{"current_steps": 74525, "total_steps": 204665, "loss": 0.1376, "lr": 1.6042514565043047e-06, "epoch": 1.8206581486819924, "percentage": 36.41, "elapsed_time": "1:36:41", "remaining_time": "2:48:51", "throughput": 8658.7, "total_tokens": 50234016} +{"current_steps": 74530, "total_steps": 204665, "loss": 0.0032, "lr": 1.6041835056544683e-06, "epoch": 1.8207802995138396, "percentage": 36.42, "elapsed_time": "1:36:41", "remaining_time": "2:48:50", "throughput": 8658.73, "total_tokens": 50237216} +{"current_steps": 74535, "total_steps": 204665, "loss": 0.0703, "lr": 1.6041155504108477e-06, "epoch": 1.8209024503456868, "percentage": 36.42, "elapsed_time": "1:36:42", "remaining_time": "2:48:50", "throughput": 8658.85, "total_tokens": 50240992} +{"current_steps": 74540, "total_steps": 204665, "loss": 0.104, "lr": 1.6040475907739356e-06, "epoch": 1.821024601177534, "percentage": 36.42, "elapsed_time": "1:36:42", "remaining_time": "2:48:49", "throughput": 8658.91, "total_tokens": 50244320} +{"current_steps": 74545, "total_steps": 204665, "loss": 0.0845, "lr": 1.6039796267442273e-06, "epoch": 1.8211467520093811, "percentage": 36.42, "elapsed_time": "1:36:42", "remaining_time": "2:48:49", "throughput": 8658.98, "total_tokens": 50247776} +{"current_steps": 74550, "total_steps": 204665, "loss": 0.1112, "lr": 1.6039116583222168e-06, "epoch": 1.8212689028412283, "percentage": 36.43, "elapsed_time": "1:36:43", "remaining_time": "2:48:48", "throughput": 8659.11, "total_tokens": 50251616} +{"current_steps": 74555, "total_steps": 204665, "loss": 0.0036, "lr": 1.603843685508398e-06, "epoch": 1.8213910536730755, "percentage": 36.43, "elapsed_time": "1:36:43", "remaining_time": "2:48:48", "throughput": 8659.2, "total_tokens": 50255264} +{"current_steps": 74560, "total_steps": 204665, "loss": 0.0749, "lr": 1.603775708303266e-06, "epoch": 1.8215132045049227, "percentage": 36.43, "elapsed_time": "1:36:44", "remaining_time": "2:48:47", "throughput": 8659.26, "total_tokens": 50258592} +{"current_steps": 74565, "total_steps": 204665, "loss": 0.0439, "lr": 1.6037077267073143e-06, "epoch": 1.82163535533677, "percentage": 36.43, "elapsed_time": "1:36:44", "remaining_time": "2:48:47", "throughput": 8659.25, "total_tokens": 50261472} +{"current_steps": 74570, "total_steps": 204665, "loss": 0.0977, "lr": 1.6036397407210376e-06, "epoch": 1.821757506168617, "percentage": 36.44, "elapsed_time": "1:36:44", "remaining_time": "2:48:46", "throughput": 8659.33, "total_tokens": 50264992} +{"current_steps": 74575, "total_steps": 204665, "loss": 0.0846, "lr": 1.6035717503449302e-06, "epoch": 1.8218796570004643, "percentage": 36.44, "elapsed_time": "1:36:45", "remaining_time": "2:48:46", "throughput": 8659.42, "total_tokens": 50268576} +{"current_steps": 74580, "total_steps": 204665, "loss": 0.0368, "lr": 1.603503755579487e-06, "epoch": 1.8220018078323115, "percentage": 36.44, "elapsed_time": "1:36:45", "remaining_time": "2:48:46", "throughput": 8659.48, "total_tokens": 50271904} +{"current_steps": 74585, "total_steps": 204665, "loss": 0.303, "lr": 1.6034357564252021e-06, "epoch": 1.8221239586641587, "percentage": 36.44, "elapsed_time": "1:36:45", "remaining_time": "2:48:45", "throughput": 8659.54, "total_tokens": 50275296} +{"current_steps": 74590, "total_steps": 204665, "loss": 0.0453, "lr": 1.6033677528825699e-06, "epoch": 1.8222461094960056, "percentage": 36.44, "elapsed_time": "1:36:46", "remaining_time": "2:48:45", "throughput": 8659.6, "total_tokens": 50278624} +{"current_steps": 74595, "total_steps": 204665, "loss": 0.0841, "lr": 1.6032997449520855e-06, "epoch": 1.8223682603278528, "percentage": 36.45, "elapsed_time": "1:36:46", "remaining_time": "2:48:44", "throughput": 8659.66, "total_tokens": 50282016} +{"current_steps": 74600, "total_steps": 204665, "loss": 0.0473, "lr": 1.6032317326342427e-06, "epoch": 1.8224904111597, "percentage": 36.45, "elapsed_time": "1:36:46", "remaining_time": "2:48:44", "throughput": 8659.69, "total_tokens": 50285152} +{"current_steps": 74605, "total_steps": 204665, "loss": 0.0353, "lr": 1.6031637159295366e-06, "epoch": 1.8226125619915472, "percentage": 36.45, "elapsed_time": "1:36:47", "remaining_time": "2:48:43", "throughput": 8659.77, "total_tokens": 50288672} +{"current_steps": 74610, "total_steps": 204665, "loss": 0.0296, "lr": 1.6030956948384618e-06, "epoch": 1.8227347128233942, "percentage": 36.45, "elapsed_time": "1:36:47", "remaining_time": "2:48:43", "throughput": 8659.79, "total_tokens": 50291744} +{"current_steps": 74615, "total_steps": 204665, "loss": 0.221, "lr": 1.6030276693615129e-06, "epoch": 1.8228568636552414, "percentage": 36.46, "elapsed_time": "1:36:47", "remaining_time": "2:48:42", "throughput": 8659.85, "total_tokens": 50295136} +{"current_steps": 74620, "total_steps": 204665, "loss": 0.1465, "lr": 1.6029596394991844e-06, "epoch": 1.8229790144870885, "percentage": 36.46, "elapsed_time": "1:36:48", "remaining_time": "2:48:42", "throughput": 8659.93, "total_tokens": 50298656} +{"current_steps": 74625, "total_steps": 204665, "loss": 0.1616, "lr": 1.6028916052519714e-06, "epoch": 1.8231011653189357, "percentage": 36.46, "elapsed_time": "1:36:48", "remaining_time": "2:48:41", "throughput": 8660.01, "total_tokens": 50302112} +{"current_steps": 74630, "total_steps": 204665, "loss": 0.1432, "lr": 1.6028235666203687e-06, "epoch": 1.823223316150783, "percentage": 36.46, "elapsed_time": "1:36:48", "remaining_time": "2:48:41", "throughput": 8660.1, "total_tokens": 50305696} +{"current_steps": 74635, "total_steps": 204665, "loss": 0.0013, "lr": 1.6027555236048705e-06, "epoch": 1.8233454669826301, "percentage": 36.47, "elapsed_time": "1:36:49", "remaining_time": "2:48:40", "throughput": 8660.22, "total_tokens": 50309472} +{"current_steps": 74640, "total_steps": 204665, "loss": 0.0759, "lr": 1.6026874762059722e-06, "epoch": 1.8234676178144773, "percentage": 36.47, "elapsed_time": "1:36:49", "remaining_time": "2:48:40", "throughput": 8660.25, "total_tokens": 50312672} +{"current_steps": 74645, "total_steps": 204665, "loss": 0.1408, "lr": 1.6026194244241683e-06, "epoch": 1.8235897686463245, "percentage": 36.47, "elapsed_time": "1:36:49", "remaining_time": "2:48:40", "throughput": 8660.28, "total_tokens": 50315744} +{"current_steps": 74650, "total_steps": 204665, "loss": 0.1048, "lr": 1.602551368259954e-06, "epoch": 1.8237119194781717, "percentage": 36.47, "elapsed_time": "1:36:50", "remaining_time": "2:48:39", "throughput": 8660.38, "total_tokens": 50319392} +{"current_steps": 74655, "total_steps": 204665, "loss": 0.1153, "lr": 1.602483307713824e-06, "epoch": 1.8238340703100189, "percentage": 36.48, "elapsed_time": "1:36:50", "remaining_time": "2:48:39", "throughput": 8660.41, "total_tokens": 50322528} +{"current_steps": 74660, "total_steps": 204665, "loss": 0.1064, "lr": 1.6024152427862733e-06, "epoch": 1.823956221141866, "percentage": 36.48, "elapsed_time": "1:36:50", "remaining_time": "2:48:38", "throughput": 8660.47, "total_tokens": 50325920} +{"current_steps": 74665, "total_steps": 204665, "loss": 0.0453, "lr": 1.6023471734777971e-06, "epoch": 1.8240783719737133, "percentage": 36.48, "elapsed_time": "1:36:51", "remaining_time": "2:48:38", "throughput": 8660.49, "total_tokens": 50328992} +{"current_steps": 74670, "total_steps": 204665, "loss": 0.0076, "lr": 1.6022790997888903e-06, "epoch": 1.8242005228055604, "percentage": 36.48, "elapsed_time": "1:36:51", "remaining_time": "2:48:37", "throughput": 8660.54, "total_tokens": 50332320} +{"current_steps": 74675, "total_steps": 204665, "loss": 0.0306, "lr": 1.6022110217200478e-06, "epoch": 1.8243226736374076, "percentage": 36.49, "elapsed_time": "1:36:52", "remaining_time": "2:48:37", "throughput": 8660.66, "total_tokens": 50336096} +{"current_steps": 74680, "total_steps": 204665, "loss": 0.1109, "lr": 1.6021429392717645e-06, "epoch": 1.8244448244692546, "percentage": 36.49, "elapsed_time": "1:36:52", "remaining_time": "2:48:36", "throughput": 8660.67, "total_tokens": 50339104} +{"current_steps": 74685, "total_steps": 204665, "loss": 0.0014, "lr": 1.6020748524445361e-06, "epoch": 1.8245669753011018, "percentage": 36.49, "elapsed_time": "1:36:52", "remaining_time": "2:48:36", "throughput": 8660.73, "total_tokens": 50342432} +{"current_steps": 74690, "total_steps": 204665, "loss": 0.0579, "lr": 1.6020067612388575e-06, "epoch": 1.824689126132949, "percentage": 36.49, "elapsed_time": "1:36:53", "remaining_time": "2:48:35", "throughput": 8660.8, "total_tokens": 50345952} +{"current_steps": 74695, "total_steps": 204665, "loss": 0.1848, "lr": 1.6019386656552234e-06, "epoch": 1.8248112769647962, "percentage": 36.5, "elapsed_time": "1:36:53", "remaining_time": "2:48:35", "throughput": 8660.84, "total_tokens": 50349088} +{"current_steps": 74700, "total_steps": 204665, "loss": 0.0851, "lr": 1.6018705656941299e-06, "epoch": 1.8249334277966431, "percentage": 36.5, "elapsed_time": "1:36:53", "remaining_time": "2:48:34", "throughput": 8660.88, "total_tokens": 50352352} +{"current_steps": 74705, "total_steps": 204665, "loss": 0.0545, "lr": 1.6018024613560717e-06, "epoch": 1.8250555786284903, "percentage": 36.5, "elapsed_time": "1:36:54", "remaining_time": "2:48:34", "throughput": 8660.99, "total_tokens": 50356128} +{"current_steps": 74710, "total_steps": 204665, "loss": 0.0043, "lr": 1.601734352641544e-06, "epoch": 1.8251777294603375, "percentage": 36.5, "elapsed_time": "1:36:54", "remaining_time": "2:48:34", "throughput": 8661.11, "total_tokens": 50359904} +{"current_steps": 74715, "total_steps": 204665, "loss": 0.1019, "lr": 1.6016662395510422e-06, "epoch": 1.8252998802921847, "percentage": 36.51, "elapsed_time": "1:36:54", "remaining_time": "2:48:33", "throughput": 8661.26, "total_tokens": 50363872} +{"current_steps": 74720, "total_steps": 204665, "loss": 0.0392, "lr": 1.6015981220850616e-06, "epoch": 1.825422031124032, "percentage": 36.51, "elapsed_time": "1:36:55", "remaining_time": "2:48:33", "throughput": 8661.31, "total_tokens": 50367200} +{"current_steps": 74725, "total_steps": 204665, "loss": 0.166, "lr": 1.601530000244098e-06, "epoch": 1.825544181955879, "percentage": 36.51, "elapsed_time": "1:36:55", "remaining_time": "2:48:32", "throughput": 8661.42, "total_tokens": 50370912} +{"current_steps": 74730, "total_steps": 204665, "loss": 0.0225, "lr": 1.6014618740286458e-06, "epoch": 1.8256663327877263, "percentage": 36.51, "elapsed_time": "1:36:55", "remaining_time": "2:48:32", "throughput": 8661.47, "total_tokens": 50374240} +{"current_steps": 74735, "total_steps": 204665, "loss": 0.1129, "lr": 1.6013937434392015e-06, "epoch": 1.8257884836195735, "percentage": 36.52, "elapsed_time": "1:36:56", "remaining_time": "2:48:31", "throughput": 8661.51, "total_tokens": 50377440} +{"current_steps": 74740, "total_steps": 204665, "loss": 0.0816, "lr": 1.6013256084762603e-06, "epoch": 1.8259106344514207, "percentage": 36.52, "elapsed_time": "1:36:56", "remaining_time": "2:48:31", "throughput": 8661.56, "total_tokens": 50380704} +{"current_steps": 74745, "total_steps": 204665, "loss": 0.0287, "lr": 1.6012574691403174e-06, "epoch": 1.8260327852832678, "percentage": 36.52, "elapsed_time": "1:36:56", "remaining_time": "2:48:30", "throughput": 8661.58, "total_tokens": 50383776} +{"current_steps": 74750, "total_steps": 204665, "loss": 0.0916, "lr": 1.6011893254318682e-06, "epoch": 1.826154936115115, "percentage": 36.52, "elapsed_time": "1:36:57", "remaining_time": "2:48:30", "throughput": 8661.6, "total_tokens": 50386848} +{"current_steps": 74755, "total_steps": 204665, "loss": 0.0883, "lr": 1.601121177351409e-06, "epoch": 1.8262770869469622, "percentage": 36.53, "elapsed_time": "1:36:57", "remaining_time": "2:48:29", "throughput": 8661.64, "total_tokens": 50390112} +{"current_steps": 74760, "total_steps": 204665, "loss": 0.0841, "lr": 1.6010530248994345e-06, "epoch": 1.8263992377788094, "percentage": 36.53, "elapsed_time": "1:36:57", "remaining_time": "2:48:29", "throughput": 8661.71, "total_tokens": 50393504} +{"current_steps": 74765, "total_steps": 204665, "loss": 0.0606, "lr": 1.6009848680764409e-06, "epoch": 1.8265213886106566, "percentage": 36.53, "elapsed_time": "1:36:58", "remaining_time": "2:48:28", "throughput": 8661.73, "total_tokens": 50396576} +{"current_steps": 74770, "total_steps": 204665, "loss": 0.0721, "lr": 1.6009167068829239e-06, "epoch": 1.8266435394425036, "percentage": 36.53, "elapsed_time": "1:36:58", "remaining_time": "2:48:28", "throughput": 8661.8, "total_tokens": 50400032} +{"current_steps": 74775, "total_steps": 204665, "loss": 0.1413, "lr": 1.6008485413193786e-06, "epoch": 1.8267656902743508, "percentage": 36.54, "elapsed_time": "1:36:59", "remaining_time": "2:48:28", "throughput": 8661.85, "total_tokens": 50403360} +{"current_steps": 74780, "total_steps": 204665, "loss": 0.0292, "lr": 1.6007803713863015e-06, "epoch": 1.826887841106198, "percentage": 36.54, "elapsed_time": "1:36:59", "remaining_time": "2:48:27", "throughput": 8661.89, "total_tokens": 50406560} +{"current_steps": 74785, "total_steps": 204665, "loss": 0.0033, "lr": 1.6007121970841877e-06, "epoch": 1.8270099919380451, "percentage": 36.54, "elapsed_time": "1:36:59", "remaining_time": "2:48:27", "throughput": 8662.02, "total_tokens": 50410464} +{"current_steps": 74790, "total_steps": 204665, "loss": 0.1076, "lr": 1.6006440184135333e-06, "epoch": 1.827132142769892, "percentage": 36.54, "elapsed_time": "1:37:00", "remaining_time": "2:48:26", "throughput": 8662.03, "total_tokens": 50413472} +{"current_steps": 74795, "total_steps": 204665, "loss": 0.0026, "lr": 1.6005758353748338e-06, "epoch": 1.8272542936017393, "percentage": 36.55, "elapsed_time": "1:37:00", "remaining_time": "2:48:26", "throughput": 8662.05, "total_tokens": 50416544} +{"current_steps": 74800, "total_steps": 204665, "loss": 0.0919, "lr": 1.6005076479685854e-06, "epoch": 1.8273764444335865, "percentage": 36.55, "elapsed_time": "1:37:00", "remaining_time": "2:48:25", "throughput": 8662.07, "total_tokens": 50419680} +{"current_steps": 74805, "total_steps": 204665, "loss": 0.0169, "lr": 1.600439456195284e-06, "epoch": 1.8274985952654337, "percentage": 36.55, "elapsed_time": "1:37:01", "remaining_time": "2:48:25", "throughput": 8662.13, "total_tokens": 50423072} +{"current_steps": 74810, "total_steps": 204665, "loss": 0.1338, "lr": 1.6003712600554255e-06, "epoch": 1.8276207460972809, "percentage": 36.55, "elapsed_time": "1:37:01", "remaining_time": "2:48:24", "throughput": 8662.14, "total_tokens": 50426016} +{"current_steps": 74815, "total_steps": 204665, "loss": 0.0809, "lr": 1.6003030595495056e-06, "epoch": 1.827742896929128, "percentage": 36.55, "elapsed_time": "1:37:01", "remaining_time": "2:48:24", "throughput": 8662.21, "total_tokens": 50429472} +{"current_steps": 74820, "total_steps": 204665, "loss": 0.1086, "lr": 1.6002348546780202e-06, "epoch": 1.8278650477609752, "percentage": 36.56, "elapsed_time": "1:37:02", "remaining_time": "2:48:23", "throughput": 8662.25, "total_tokens": 50432736} +{"current_steps": 74825, "total_steps": 204665, "loss": 0.3054, "lr": 1.6001666454414657e-06, "epoch": 1.8279871985928224, "percentage": 36.56, "elapsed_time": "1:37:02", "remaining_time": "2:48:23", "throughput": 8662.34, "total_tokens": 50436320} +{"current_steps": 74830, "total_steps": 204665, "loss": 0.0274, "lr": 1.6000984318403376e-06, "epoch": 1.8281093494246696, "percentage": 36.56, "elapsed_time": "1:37:02", "remaining_time": "2:48:22", "throughput": 8662.42, "total_tokens": 50439776} +{"current_steps": 74835, "total_steps": 204665, "loss": 0.1356, "lr": 1.6000302138751328e-06, "epoch": 1.8282315002565168, "percentage": 36.56, "elapsed_time": "1:37:03", "remaining_time": "2:48:22", "throughput": 8662.48, "total_tokens": 50443168} +{"current_steps": 74840, "total_steps": 204665, "loss": 0.0999, "lr": 1.5999619915463466e-06, "epoch": 1.828353651088364, "percentage": 36.57, "elapsed_time": "1:37:03", "remaining_time": "2:48:22", "throughput": 8662.57, "total_tokens": 50446752} +{"current_steps": 74845, "total_steps": 204665, "loss": 0.0559, "lr": 1.5998937648544756e-06, "epoch": 1.8284758019202112, "percentage": 36.57, "elapsed_time": "1:37:03", "remaining_time": "2:48:21", "throughput": 8662.63, "total_tokens": 50450144} +{"current_steps": 74850, "total_steps": 204665, "loss": 0.0009, "lr": 1.5998255338000157e-06, "epoch": 1.8285979527520584, "percentage": 36.57, "elapsed_time": "1:37:04", "remaining_time": "2:48:21", "throughput": 8662.66, "total_tokens": 50453280} +{"current_steps": 74855, "total_steps": 204665, "loss": 0.053, "lr": 1.599757298383463e-06, "epoch": 1.8287201035839054, "percentage": 36.57, "elapsed_time": "1:37:04", "remaining_time": "2:48:20", "throughput": 8662.69, "total_tokens": 50456416} +{"current_steps": 74860, "total_steps": 204665, "loss": 0.0388, "lr": 1.599689058605314e-06, "epoch": 1.8288422544157525, "percentage": 36.58, "elapsed_time": "1:37:04", "remaining_time": "2:48:20", "throughput": 8662.71, "total_tokens": 50459488} +{"current_steps": 74865, "total_steps": 204665, "loss": 0.075, "lr": 1.599620814466065e-06, "epoch": 1.8289644052475997, "percentage": 36.58, "elapsed_time": "1:37:05", "remaining_time": "2:48:19", "throughput": 8662.9, "total_tokens": 50463776} +{"current_steps": 74870, "total_steps": 204665, "loss": 0.071, "lr": 1.599552565966212e-06, "epoch": 1.829086556079447, "percentage": 36.58, "elapsed_time": "1:37:05", "remaining_time": "2:48:19", "throughput": 8662.94, "total_tokens": 50467040} +{"current_steps": 74875, "total_steps": 204665, "loss": 0.0455, "lr": 1.5994843131062519e-06, "epoch": 1.8292087069112941, "percentage": 36.58, "elapsed_time": "1:37:05", "remaining_time": "2:48:18", "throughput": 8663.12, "total_tokens": 50471200} +{"current_steps": 74880, "total_steps": 204665, "loss": 0.0644, "lr": 1.5994160558866802e-06, "epoch": 1.829330857743141, "percentage": 36.59, "elapsed_time": "1:37:06", "remaining_time": "2:48:18", "throughput": 8663.14, "total_tokens": 50474272} +{"current_steps": 74885, "total_steps": 204665, "loss": 0.1855, "lr": 1.5993477943079937e-06, "epoch": 1.8294530085749883, "percentage": 36.59, "elapsed_time": "1:37:06", "remaining_time": "2:48:17", "throughput": 8663.22, "total_tokens": 50477792} +{"current_steps": 74890, "total_steps": 204665, "loss": 0.0278, "lr": 1.599279528370689e-06, "epoch": 1.8295751594068355, "percentage": 36.59, "elapsed_time": "1:37:07", "remaining_time": "2:48:17", "throughput": 8663.3, "total_tokens": 50481248} +{"current_steps": 74895, "total_steps": 204665, "loss": 0.0006, "lr": 1.5992112580752623e-06, "epoch": 1.8296973102386827, "percentage": 36.59, "elapsed_time": "1:37:07", "remaining_time": "2:48:17", "throughput": 8663.4, "total_tokens": 50484896} +{"current_steps": 74900, "total_steps": 204665, "loss": 0.1846, "lr": 1.5991429834222104e-06, "epoch": 1.8298194610705298, "percentage": 36.6, "elapsed_time": "1:37:07", "remaining_time": "2:48:16", "throughput": 8663.45, "total_tokens": 50488160} +{"current_steps": 74905, "total_steps": 204665, "loss": 0.005, "lr": 1.5990747044120294e-06, "epoch": 1.829941611902377, "percentage": 36.6, "elapsed_time": "1:37:08", "remaining_time": "2:48:16", "throughput": 8663.56, "total_tokens": 50491872} +{"current_steps": 74910, "total_steps": 204665, "loss": 0.1366, "lr": 1.5990064210452158e-06, "epoch": 1.8300637627342242, "percentage": 36.6, "elapsed_time": "1:37:08", "remaining_time": "2:48:15", "throughput": 8663.64, "total_tokens": 50495392} +{"current_steps": 74915, "total_steps": 204665, "loss": 0.0999, "lr": 1.5989381333222664e-06, "epoch": 1.8301859135660714, "percentage": 36.6, "elapsed_time": "1:37:08", "remaining_time": "2:48:15", "throughput": 8663.71, "total_tokens": 50498848} +{"current_steps": 74920, "total_steps": 204665, "loss": 0.034, "lr": 1.5988698412436783e-06, "epoch": 1.8303080643979186, "percentage": 36.61, "elapsed_time": "1:37:09", "remaining_time": "2:48:14", "throughput": 8663.8, "total_tokens": 50502368} +{"current_steps": 74925, "total_steps": 204665, "loss": 0.0404, "lr": 1.5988015448099472e-06, "epoch": 1.8304302152297658, "percentage": 36.61, "elapsed_time": "1:37:09", "remaining_time": "2:48:14", "throughput": 8663.86, "total_tokens": 50505760} +{"current_steps": 74930, "total_steps": 204665, "loss": 0.0499, "lr": 1.5987332440215705e-06, "epoch": 1.830552366061613, "percentage": 36.61, "elapsed_time": "1:37:09", "remaining_time": "2:48:13", "throughput": 8663.94, "total_tokens": 50509280} +{"current_steps": 74935, "total_steps": 204665, "loss": 0.1397, "lr": 1.5986649388790443e-06, "epoch": 1.8306745168934602, "percentage": 36.61, "elapsed_time": "1:37:10", "remaining_time": "2:48:13", "throughput": 8663.97, "total_tokens": 50512416} +{"current_steps": 74940, "total_steps": 204665, "loss": 0.0294, "lr": 1.5985966293828659e-06, "epoch": 1.8307966677253074, "percentage": 36.62, "elapsed_time": "1:37:10", "remaining_time": "2:48:12", "throughput": 8664.07, "total_tokens": 50516064} +{"current_steps": 74945, "total_steps": 204665, "loss": 0.0787, "lr": 1.5985283155335316e-06, "epoch": 1.8309188185571543, "percentage": 36.62, "elapsed_time": "1:37:10", "remaining_time": "2:48:12", "throughput": 8664.14, "total_tokens": 50519456} +{"current_steps": 74950, "total_steps": 204665, "loss": 0.1047, "lr": 1.5984599973315385e-06, "epoch": 1.8310409693890015, "percentage": 36.62, "elapsed_time": "1:37:11", "remaining_time": "2:48:12", "throughput": 8664.17, "total_tokens": 50522592} +{"current_steps": 74955, "total_steps": 204665, "loss": 0.0326, "lr": 1.5983916747773834e-06, "epoch": 1.8311631202208487, "percentage": 36.62, "elapsed_time": "1:37:11", "remaining_time": "2:48:11", "throughput": 8664.29, "total_tokens": 50526368} +{"current_steps": 74960, "total_steps": 204665, "loss": 0.0006, "lr": 1.598323347871563e-06, "epoch": 1.831285271052696, "percentage": 36.63, "elapsed_time": "1:37:11", "remaining_time": "2:48:11", "throughput": 8664.34, "total_tokens": 50529696} +{"current_steps": 74965, "total_steps": 204665, "loss": 0.0736, "lr": 1.5982550166145744e-06, "epoch": 1.831407421884543, "percentage": 36.63, "elapsed_time": "1:37:12", "remaining_time": "2:48:10", "throughput": 8664.39, "total_tokens": 50532960} +{"current_steps": 74970, "total_steps": 204665, "loss": 0.082, "lr": 1.5981866810069142e-06, "epoch": 1.83152957271639, "percentage": 36.63, "elapsed_time": "1:37:12", "remaining_time": "2:48:10", "throughput": 8664.46, "total_tokens": 50536416} +{"current_steps": 74975, "total_steps": 204665, "loss": 0.0615, "lr": 1.5981183410490796e-06, "epoch": 1.8316517235482372, "percentage": 36.63, "elapsed_time": "1:37:12", "remaining_time": "2:48:09", "throughput": 8664.55, "total_tokens": 50540000} +{"current_steps": 74980, "total_steps": 204665, "loss": 0.1137, "lr": 1.5980499967415677e-06, "epoch": 1.8317738743800844, "percentage": 36.64, "elapsed_time": "1:37:13", "remaining_time": "2:48:09", "throughput": 8664.67, "total_tokens": 50543776} +{"current_steps": 74985, "total_steps": 204665, "loss": 0.0009, "lr": 1.5979816480848754e-06, "epoch": 1.8318960252119316, "percentage": 36.64, "elapsed_time": "1:37:13", "remaining_time": "2:48:08", "throughput": 8664.7, "total_tokens": 50546976} +{"current_steps": 74990, "total_steps": 204665, "loss": 0.1536, "lr": 1.5979132950794996e-06, "epoch": 1.8320181760437788, "percentage": 36.64, "elapsed_time": "1:37:14", "remaining_time": "2:48:08", "throughput": 8664.88, "total_tokens": 50551264} +{"current_steps": 74995, "total_steps": 204665, "loss": 0.1114, "lr": 1.5978449377259376e-06, "epoch": 1.832140326875626, "percentage": 36.64, "elapsed_time": "1:37:14", "remaining_time": "2:48:07", "throughput": 8665.01, "total_tokens": 50555104} +{"current_steps": 75000, "total_steps": 204665, "loss": 0.1184, "lr": 1.5977765760246863e-06, "epoch": 1.8322624777074732, "percentage": 36.65, "elapsed_time": "1:37:14", "remaining_time": "2:48:07", "throughput": 8665.08, "total_tokens": 50558560} +{"current_steps": 75005, "total_steps": 204665, "loss": 0.0198, "lr": 1.597708209976243e-06, "epoch": 1.8323846285393204, "percentage": 36.65, "elapsed_time": "1:37:15", "remaining_time": "2:48:07", "throughput": 8665.12, "total_tokens": 50561760} +{"current_steps": 75010, "total_steps": 204665, "loss": 0.0595, "lr": 1.5976398395811046e-06, "epoch": 1.8325067793711676, "percentage": 36.65, "elapsed_time": "1:37:15", "remaining_time": "2:48:06", "throughput": 8665.16, "total_tokens": 50564960} +{"current_steps": 75015, "total_steps": 204665, "loss": 0.0956, "lr": 1.5975714648397686e-06, "epoch": 1.8326289302030148, "percentage": 36.65, "elapsed_time": "1:37:15", "remaining_time": "2:48:06", "throughput": 8665.2, "total_tokens": 50568160} +{"current_steps": 75020, "total_steps": 204665, "loss": 0.0812, "lr": 1.5975030857527326e-06, "epoch": 1.832751081034862, "percentage": 36.66, "elapsed_time": "1:37:16", "remaining_time": "2:48:05", "throughput": 8665.19, "total_tokens": 50571040} +{"current_steps": 75025, "total_steps": 204665, "loss": 0.1012, "lr": 1.5974347023204932e-06, "epoch": 1.8328732318667091, "percentage": 36.66, "elapsed_time": "1:37:16", "remaining_time": "2:48:05", "throughput": 8665.22, "total_tokens": 50574176} +{"current_steps": 75030, "total_steps": 204665, "loss": 0.0047, "lr": 1.5973663145435482e-06, "epoch": 1.8329953826985563, "percentage": 36.66, "elapsed_time": "1:37:16", "remaining_time": "2:48:04", "throughput": 8665.23, "total_tokens": 50577120} +{"current_steps": 75035, "total_steps": 204665, "loss": 0.1798, "lr": 1.5972979224223942e-06, "epoch": 1.8331175335304033, "percentage": 36.66, "elapsed_time": "1:37:17", "remaining_time": "2:48:04", "throughput": 8665.31, "total_tokens": 50580640} +{"current_steps": 75040, "total_steps": 204665, "loss": 0.0024, "lr": 1.597229525957529e-06, "epoch": 1.8332396843622505, "percentage": 36.66, "elapsed_time": "1:37:17", "remaining_time": "2:48:03", "throughput": 8665.42, "total_tokens": 50584352} +{"current_steps": 75045, "total_steps": 204665, "loss": 0.0988, "lr": 1.5971611251494505e-06, "epoch": 1.8333618351940977, "percentage": 36.67, "elapsed_time": "1:37:17", "remaining_time": "2:48:03", "throughput": 8665.43, "total_tokens": 50587360} +{"current_steps": 75050, "total_steps": 204665, "loss": 0.1133, "lr": 1.5970927199986557e-06, "epoch": 1.8334839860259449, "percentage": 36.67, "elapsed_time": "1:37:18", "remaining_time": "2:48:02", "throughput": 8665.49, "total_tokens": 50590688} +{"current_steps": 75055, "total_steps": 204665, "loss": 0.1702, "lr": 1.5970243105056418e-06, "epoch": 1.8336061368577918, "percentage": 36.67, "elapsed_time": "1:37:18", "remaining_time": "2:48:02", "throughput": 8665.49, "total_tokens": 50593632} +{"current_steps": 75060, "total_steps": 204665, "loss": 0.0866, "lr": 1.5969558966709066e-06, "epoch": 1.833728287689639, "percentage": 36.67, "elapsed_time": "1:37:18", "remaining_time": "2:48:01", "throughput": 8665.54, "total_tokens": 50596896} +{"current_steps": 75065, "total_steps": 204665, "loss": 0.1055, "lr": 1.5968874784949476e-06, "epoch": 1.8338504385214862, "percentage": 36.68, "elapsed_time": "1:37:19", "remaining_time": "2:48:01", "throughput": 8665.56, "total_tokens": 50599968} +{"current_steps": 75070, "total_steps": 204665, "loss": 0.0348, "lr": 1.5968190559782622e-06, "epoch": 1.8339725893533334, "percentage": 36.68, "elapsed_time": "1:37:19", "remaining_time": "2:48:00", "throughput": 8665.58, "total_tokens": 50603104} +{"current_steps": 75075, "total_steps": 204665, "loss": 0.1494, "lr": 1.5967506291213481e-06, "epoch": 1.8340947401851806, "percentage": 36.68, "elapsed_time": "1:37:19", "remaining_time": "2:48:00", "throughput": 8665.65, "total_tokens": 50606496} +{"current_steps": 75080, "total_steps": 204665, "loss": 0.0829, "lr": 1.5966821979247031e-06, "epoch": 1.8342168910170278, "percentage": 36.68, "elapsed_time": "1:37:20", "remaining_time": "2:48:00", "throughput": 8665.73, "total_tokens": 50610016} +{"current_steps": 75085, "total_steps": 204665, "loss": 0.072, "lr": 1.5966137623888246e-06, "epoch": 1.834339041848875, "percentage": 36.69, "elapsed_time": "1:37:20", "remaining_time": "2:47:59", "throughput": 8665.73, "total_tokens": 50612960} +{"current_steps": 75090, "total_steps": 204665, "loss": 0.0437, "lr": 1.5965453225142102e-06, "epoch": 1.8344611926807222, "percentage": 36.69, "elapsed_time": "1:37:20", "remaining_time": "2:47:59", "throughput": 8665.84, "total_tokens": 50616672} +{"current_steps": 75095, "total_steps": 204665, "loss": 0.0035, "lr": 1.5964768783013579e-06, "epoch": 1.8345833435125694, "percentage": 36.69, "elapsed_time": "1:37:21", "remaining_time": "2:47:58", "throughput": 8665.88, "total_tokens": 50619872} +{"current_steps": 75100, "total_steps": 204665, "loss": 0.1989, "lr": 1.5964084297507652e-06, "epoch": 1.8347054943444165, "percentage": 36.69, "elapsed_time": "1:37:21", "remaining_time": "2:47:58", "throughput": 8665.98, "total_tokens": 50623584} +{"current_steps": 75105, "total_steps": 204665, "loss": 0.0274, "lr": 1.5963399768629299e-06, "epoch": 1.8348276451762637, "percentage": 36.7, "elapsed_time": "1:37:21", "remaining_time": "2:47:57", "throughput": 8665.99, "total_tokens": 50626592} +{"current_steps": 75110, "total_steps": 204665, "loss": 0.0848, "lr": 1.5962715196383503e-06, "epoch": 1.834949796008111, "percentage": 36.7, "elapsed_time": "1:37:22", "remaining_time": "2:47:57", "throughput": 8666.1, "total_tokens": 50630304} +{"current_steps": 75115, "total_steps": 204665, "loss": 0.0054, "lr": 1.5962030580775236e-06, "epoch": 1.8350719468399581, "percentage": 36.7, "elapsed_time": "1:37:22", "remaining_time": "2:47:56", "throughput": 8666.16, "total_tokens": 50633696} +{"current_steps": 75120, "total_steps": 204665, "loss": 0.1591, "lr": 1.596134592180948e-06, "epoch": 1.8351940976718053, "percentage": 36.7, "elapsed_time": "1:37:23", "remaining_time": "2:47:56", "throughput": 8666.22, "total_tokens": 50637024} +{"current_steps": 75125, "total_steps": 204665, "loss": 0.0389, "lr": 1.5960661219491208e-06, "epoch": 1.8353162485036523, "percentage": 36.71, "elapsed_time": "1:37:23", "remaining_time": "2:47:55", "throughput": 8666.29, "total_tokens": 50640416} +{"current_steps": 75130, "total_steps": 204665, "loss": 0.1209, "lr": 1.595997647382541e-06, "epoch": 1.8354383993354995, "percentage": 36.71, "elapsed_time": "1:37:23", "remaining_time": "2:47:55", "throughput": 8666.33, "total_tokens": 50643616} +{"current_steps": 75135, "total_steps": 204665, "loss": 0.1069, "lr": 1.5959291684817057e-06, "epoch": 1.8355605501673466, "percentage": 36.71, "elapsed_time": "1:37:24", "remaining_time": "2:47:54", "throughput": 8666.35, "total_tokens": 50646752} +{"current_steps": 75140, "total_steps": 204665, "loss": 0.0722, "lr": 1.5958606852471132e-06, "epoch": 1.8356827009991938, "percentage": 36.71, "elapsed_time": "1:37:24", "remaining_time": "2:47:54", "throughput": 8666.44, "total_tokens": 50650272} +{"current_steps": 75145, "total_steps": 204665, "loss": 0.0127, "lr": 1.595792197679262e-06, "epoch": 1.8358048518310408, "percentage": 36.72, "elapsed_time": "1:37:24", "remaining_time": "2:47:54", "throughput": 8666.57, "total_tokens": 50654176} +{"current_steps": 75150, "total_steps": 204665, "loss": 0.0315, "lr": 1.5957237057786492e-06, "epoch": 1.835927002662888, "percentage": 36.72, "elapsed_time": "1:37:25", "remaining_time": "2:47:53", "throughput": 8666.61, "total_tokens": 50657440} +{"current_steps": 75155, "total_steps": 204665, "loss": 0.0672, "lr": 1.595655209545774e-06, "epoch": 1.8360491534947352, "percentage": 36.72, "elapsed_time": "1:37:25", "remaining_time": "2:47:53", "throughput": 8666.65, "total_tokens": 50660640} +{"current_steps": 75160, "total_steps": 204665, "loss": 0.089, "lr": 1.5955867089811332e-06, "epoch": 1.8361713043265824, "percentage": 36.72, "elapsed_time": "1:37:25", "remaining_time": "2:47:52", "throughput": 8666.76, "total_tokens": 50664288} +{"current_steps": 75165, "total_steps": 204665, "loss": 0.0342, "lr": 1.5955182040852257e-06, "epoch": 1.8362934551584296, "percentage": 36.73, "elapsed_time": "1:37:26", "remaining_time": "2:47:52", "throughput": 8666.87, "total_tokens": 50668000} +{"current_steps": 75170, "total_steps": 204665, "loss": 0.0254, "lr": 1.59544969485855e-06, "epoch": 1.8364156059902768, "percentage": 36.73, "elapsed_time": "1:37:26", "remaining_time": "2:47:51", "throughput": 8666.93, "total_tokens": 50671328} +{"current_steps": 75175, "total_steps": 204665, "loss": 0.003, "lr": 1.5953811813016037e-06, "epoch": 1.836537756822124, "percentage": 36.73, "elapsed_time": "1:37:26", "remaining_time": "2:47:51", "throughput": 8666.99, "total_tokens": 50674720} +{"current_steps": 75180, "total_steps": 204665, "loss": 0.1863, "lr": 1.5953126634148855e-06, "epoch": 1.8366599076539711, "percentage": 36.73, "elapsed_time": "1:37:27", "remaining_time": "2:47:50", "throughput": 8667.06, "total_tokens": 50678112} +{"current_steps": 75185, "total_steps": 204665, "loss": 0.1438, "lr": 1.5952441411988934e-06, "epoch": 1.8367820584858183, "percentage": 36.74, "elapsed_time": "1:37:27", "remaining_time": "2:47:50", "throughput": 8667.11, "total_tokens": 50681440} +{"current_steps": 75190, "total_steps": 204665, "loss": 0.1104, "lr": 1.5951756146541257e-06, "epoch": 1.8369042093176655, "percentage": 36.74, "elapsed_time": "1:37:27", "remaining_time": "2:47:49", "throughput": 8667.15, "total_tokens": 50684704} +{"current_steps": 75195, "total_steps": 204665, "loss": 0.089, "lr": 1.5951070837810808e-06, "epoch": 1.8370263601495127, "percentage": 36.74, "elapsed_time": "1:37:28", "remaining_time": "2:47:49", "throughput": 8667.3, "total_tokens": 50688736} +{"current_steps": 75200, "total_steps": 204665, "loss": 0.0431, "lr": 1.5950385485802574e-06, "epoch": 1.83714851098136, "percentage": 36.74, "elapsed_time": "1:37:28", "remaining_time": "2:47:49", "throughput": 8667.32, "total_tokens": 50691808} +{"current_steps": 75205, "total_steps": 204665, "loss": 0.0347, "lr": 1.5949700090521536e-06, "epoch": 1.837270661813207, "percentage": 36.75, "elapsed_time": "1:37:28", "remaining_time": "2:47:48", "throughput": 8667.37, "total_tokens": 50695136} +{"current_steps": 75210, "total_steps": 204665, "loss": 0.1796, "lr": 1.594901465197268e-06, "epoch": 1.8373928126450543, "percentage": 36.75, "elapsed_time": "1:37:29", "remaining_time": "2:47:48", "throughput": 8667.48, "total_tokens": 50698848} +{"current_steps": 75215, "total_steps": 204665, "loss": 0.0567, "lr": 1.5948329170160983e-06, "epoch": 1.8375149634769012, "percentage": 36.75, "elapsed_time": "1:37:29", "remaining_time": "2:47:47", "throughput": 8667.51, "total_tokens": 50701984} +{"current_steps": 75220, "total_steps": 204665, "loss": 0.2215, "lr": 1.5947643645091442e-06, "epoch": 1.8376371143087484, "percentage": 36.75, "elapsed_time": "1:37:30", "remaining_time": "2:47:47", "throughput": 8667.58, "total_tokens": 50705440} +{"current_steps": 75225, "total_steps": 204665, "loss": 0.1647, "lr": 1.5946958076769035e-06, "epoch": 1.8377592651405956, "percentage": 36.76, "elapsed_time": "1:37:30", "remaining_time": "2:47:46", "throughput": 8667.73, "total_tokens": 50709472} +{"current_steps": 75230, "total_steps": 204665, "loss": 0.0869, "lr": 1.5946272465198748e-06, "epoch": 1.8378814159724428, "percentage": 36.76, "elapsed_time": "1:37:30", "remaining_time": "2:47:46", "throughput": 8667.76, "total_tokens": 50712608} +{"current_steps": 75235, "total_steps": 204665, "loss": 0.1903, "lr": 1.5945586810385572e-06, "epoch": 1.8380035668042898, "percentage": 36.76, "elapsed_time": "1:37:31", "remaining_time": "2:47:45", "throughput": 8667.83, "total_tokens": 50716000} +{"current_steps": 75240, "total_steps": 204665, "loss": 0.2179, "lr": 1.5944901112334486e-06, "epoch": 1.838125717636137, "percentage": 36.76, "elapsed_time": "1:37:31", "remaining_time": "2:47:45", "throughput": 8667.89, "total_tokens": 50719392} +{"current_steps": 75245, "total_steps": 204665, "loss": 0.0641, "lr": 1.5944215371050482e-06, "epoch": 1.8382478684679842, "percentage": 36.76, "elapsed_time": "1:37:31", "remaining_time": "2:47:44", "throughput": 8668.02, "total_tokens": 50723232} +{"current_steps": 75250, "total_steps": 204665, "loss": 0.1439, "lr": 1.5943529586538543e-06, "epoch": 1.8383700192998313, "percentage": 36.77, "elapsed_time": "1:37:32", "remaining_time": "2:47:44", "throughput": 8668.07, "total_tokens": 50726560} +{"current_steps": 75255, "total_steps": 204665, "loss": 0.0482, "lr": 1.594284375880366e-06, "epoch": 1.8384921701316785, "percentage": 36.77, "elapsed_time": "1:37:32", "remaining_time": "2:47:44", "throughput": 8668.12, "total_tokens": 50729824} +{"current_steps": 75260, "total_steps": 204665, "loss": 0.0023, "lr": 1.5942157887850818e-06, "epoch": 1.8386143209635257, "percentage": 36.77, "elapsed_time": "1:37:32", "remaining_time": "2:47:43", "throughput": 8668.16, "total_tokens": 50733024} +{"current_steps": 75265, "total_steps": 204665, "loss": 0.0956, "lr": 1.5941471973685007e-06, "epoch": 1.838736471795373, "percentage": 36.77, "elapsed_time": "1:37:33", "remaining_time": "2:47:43", "throughput": 8668.24, "total_tokens": 50736544} +{"current_steps": 75270, "total_steps": 204665, "loss": 0.0363, "lr": 1.5940786016311214e-06, "epoch": 1.83885862262722, "percentage": 36.78, "elapsed_time": "1:37:33", "remaining_time": "2:47:43", "throughput": 8667.93, "total_tokens": 50740064} +{"current_steps": 75275, "total_steps": 204665, "loss": 0.0902, "lr": 1.5940100015734426e-06, "epoch": 1.8389807734590673, "percentage": 36.78, "elapsed_time": "1:37:34", "remaining_time": "2:47:42", "throughput": 8667.95, "total_tokens": 50743136} +{"current_steps": 75280, "total_steps": 204665, "loss": 0.0455, "lr": 1.5939413971959632e-06, "epoch": 1.8391029242909145, "percentage": 36.78, "elapsed_time": "1:37:34", "remaining_time": "2:47:42", "throughput": 8668.0, "total_tokens": 50746400} +{"current_steps": 75285, "total_steps": 204665, "loss": 0.1632, "lr": 1.5938727884991824e-06, "epoch": 1.8392250751227617, "percentage": 36.78, "elapsed_time": "1:37:34", "remaining_time": "2:47:41", "throughput": 8668.1, "total_tokens": 50750048} +{"current_steps": 75290, "total_steps": 204665, "loss": 0.1416, "lr": 1.5938041754835987e-06, "epoch": 1.8393472259546089, "percentage": 36.79, "elapsed_time": "1:37:35", "remaining_time": "2:47:41", "throughput": 8668.16, "total_tokens": 50753440} +{"current_steps": 75295, "total_steps": 204665, "loss": 0.0531, "lr": 1.5937355581497115e-06, "epoch": 1.839469376786456, "percentage": 36.79, "elapsed_time": "1:37:35", "remaining_time": "2:47:40", "throughput": 8668.21, "total_tokens": 50756704} +{"current_steps": 75300, "total_steps": 204665, "loss": 0.1068, "lr": 1.5936669364980198e-06, "epoch": 1.8395915276183032, "percentage": 36.79, "elapsed_time": "1:37:35", "remaining_time": "2:47:40", "throughput": 8668.25, "total_tokens": 50759904} +{"current_steps": 75305, "total_steps": 204665, "loss": 0.1172, "lr": 1.5935983105290221e-06, "epoch": 1.8397136784501502, "percentage": 36.79, "elapsed_time": "1:37:36", "remaining_time": "2:47:39", "throughput": 8668.28, "total_tokens": 50763040} +{"current_steps": 75310, "total_steps": 204665, "loss": 0.0771, "lr": 1.593529680243218e-06, "epoch": 1.8398358292819974, "percentage": 36.8, "elapsed_time": "1:37:36", "remaining_time": "2:47:39", "throughput": 8668.34, "total_tokens": 50766432} +{"current_steps": 75315, "total_steps": 204665, "loss": 0.0495, "lr": 1.5934610456411064e-06, "epoch": 1.8399579801138446, "percentage": 36.8, "elapsed_time": "1:37:36", "remaining_time": "2:47:38", "throughput": 8668.42, "total_tokens": 50770016} +{"current_steps": 75320, "total_steps": 204665, "loss": 0.1261, "lr": 1.5933924067231864e-06, "epoch": 1.8400801309456918, "percentage": 36.8, "elapsed_time": "1:37:37", "remaining_time": "2:47:38", "throughput": 8668.52, "total_tokens": 50773664} +{"current_steps": 75325, "total_steps": 204665, "loss": 0.1299, "lr": 1.5933237634899573e-06, "epoch": 1.8402022817775388, "percentage": 36.8, "elapsed_time": "1:37:37", "remaining_time": "2:47:38", "throughput": 8668.6, "total_tokens": 50777120} +{"current_steps": 75330, "total_steps": 204665, "loss": 0.0197, "lr": 1.5932551159419184e-06, "epoch": 1.840324432609386, "percentage": 36.81, "elapsed_time": "1:37:37", "remaining_time": "2:47:37", "throughput": 8668.64, "total_tokens": 50780384} +{"current_steps": 75335, "total_steps": 204665, "loss": 0.0031, "lr": 1.5931864640795684e-06, "epoch": 1.8404465834412331, "percentage": 36.81, "elapsed_time": "1:37:38", "remaining_time": "2:47:37", "throughput": 8668.64, "total_tokens": 50783328} +{"current_steps": 75340, "total_steps": 204665, "loss": 0.0049, "lr": 1.5931178079034072e-06, "epoch": 1.8405687342730803, "percentage": 36.81, "elapsed_time": "1:37:38", "remaining_time": "2:47:36", "throughput": 8668.65, "total_tokens": 50786336} +{"current_steps": 75345, "total_steps": 204665, "loss": 0.0367, "lr": 1.5930491474139337e-06, "epoch": 1.8406908851049275, "percentage": 36.81, "elapsed_time": "1:37:38", "remaining_time": "2:47:36", "throughput": 8668.68, "total_tokens": 50789472} +{"current_steps": 75350, "total_steps": 204665, "loss": 0.0835, "lr": 1.592980482611647e-06, "epoch": 1.8408130359367747, "percentage": 36.82, "elapsed_time": "1:37:39", "remaining_time": "2:47:35", "throughput": 8668.72, "total_tokens": 50792672} +{"current_steps": 75355, "total_steps": 204665, "loss": 0.002, "lr": 1.5929118134970468e-06, "epoch": 1.840935186768622, "percentage": 36.82, "elapsed_time": "1:37:39", "remaining_time": "2:47:35", "throughput": 8668.74, "total_tokens": 50795808} +{"current_steps": 75360, "total_steps": 204665, "loss": 0.1351, "lr": 1.5928431400706326e-06, "epoch": 1.841057337600469, "percentage": 36.82, "elapsed_time": "1:37:40", "remaining_time": "2:47:34", "throughput": 8668.84, "total_tokens": 50799392} +{"current_steps": 75365, "total_steps": 204665, "loss": 0.0389, "lr": 1.5927744623329034e-06, "epoch": 1.8411794884323163, "percentage": 36.82, "elapsed_time": "1:37:40", "remaining_time": "2:47:34", "throughput": 8668.87, "total_tokens": 50802592} +{"current_steps": 75370, "total_steps": 204665, "loss": 0.0911, "lr": 1.5927057802843591e-06, "epoch": 1.8413016392641635, "percentage": 36.83, "elapsed_time": "1:37:40", "remaining_time": "2:47:33", "throughput": 8668.99, "total_tokens": 50806432} +{"current_steps": 75375, "total_steps": 204665, "loss": 0.236, "lr": 1.5926370939254987e-06, "epoch": 1.8414237900960106, "percentage": 36.83, "elapsed_time": "1:37:41", "remaining_time": "2:47:33", "throughput": 8669.05, "total_tokens": 50809824} +{"current_steps": 75380, "total_steps": 204665, "loss": 0.002, "lr": 1.5925684032568221e-06, "epoch": 1.8415459409278578, "percentage": 36.83, "elapsed_time": "1:37:41", "remaining_time": "2:47:32", "throughput": 8669.15, "total_tokens": 50813408} +{"current_steps": 75385, "total_steps": 204665, "loss": 0.1323, "lr": 1.592499708278829e-06, "epoch": 1.841668091759705, "percentage": 36.83, "elapsed_time": "1:37:41", "remaining_time": "2:47:32", "throughput": 8669.17, "total_tokens": 50816544} +{"current_steps": 75390, "total_steps": 204665, "loss": 0.0376, "lr": 1.5924310089920181e-06, "epoch": 1.841790242591552, "percentage": 36.84, "elapsed_time": "1:37:42", "remaining_time": "2:47:32", "throughput": 8669.22, "total_tokens": 50819808} +{"current_steps": 75395, "total_steps": 204665, "loss": 0.1633, "lr": 1.59236230539689e-06, "epoch": 1.8419123934233992, "percentage": 36.84, "elapsed_time": "1:37:42", "remaining_time": "2:47:31", "throughput": 8669.29, "total_tokens": 50823264} +{"current_steps": 75400, "total_steps": 204665, "loss": 0.0342, "lr": 1.5922935974939438e-06, "epoch": 1.8420345442552464, "percentage": 36.84, "elapsed_time": "1:37:42", "remaining_time": "2:47:31", "throughput": 8669.34, "total_tokens": 50826592} +{"current_steps": 75405, "total_steps": 204665, "loss": 0.0632, "lr": 1.592224885283679e-06, "epoch": 1.8421566950870936, "percentage": 36.84, "elapsed_time": "1:37:43", "remaining_time": "2:47:30", "throughput": 8669.39, "total_tokens": 50829856} +{"current_steps": 75410, "total_steps": 204665, "loss": 0.1241, "lr": 1.592156168766596e-06, "epoch": 1.8422788459189408, "percentage": 36.85, "elapsed_time": "1:37:43", "remaining_time": "2:47:30", "throughput": 8669.43, "total_tokens": 50833056} +{"current_steps": 75415, "total_steps": 204665, "loss": 0.03, "lr": 1.5920874479431935e-06, "epoch": 1.8424009967507877, "percentage": 36.85, "elapsed_time": "1:37:43", "remaining_time": "2:47:29", "throughput": 8669.46, "total_tokens": 50836256} +{"current_steps": 75420, "total_steps": 204665, "loss": 0.2169, "lr": 1.592018722813972e-06, "epoch": 1.842523147582635, "percentage": 36.85, "elapsed_time": "1:37:44", "remaining_time": "2:47:29", "throughput": 8669.59, "total_tokens": 50840096} +{"current_steps": 75425, "total_steps": 204665, "loss": 0.1498, "lr": 1.5919499933794313e-06, "epoch": 1.842645298414482, "percentage": 36.85, "elapsed_time": "1:37:44", "remaining_time": "2:47:28", "throughput": 8669.62, "total_tokens": 50843232} +{"current_steps": 75430, "total_steps": 204665, "loss": 0.0988, "lr": 1.591881259640071e-06, "epoch": 1.8427674492463293, "percentage": 36.86, "elapsed_time": "1:37:44", "remaining_time": "2:47:28", "throughput": 8669.7, "total_tokens": 50846752} +{"current_steps": 75435, "total_steps": 204665, "loss": 0.0021, "lr": 1.591812521596391e-06, "epoch": 1.8428896000781765, "percentage": 36.86, "elapsed_time": "1:37:45", "remaining_time": "2:47:27", "throughput": 8669.77, "total_tokens": 50850208} +{"current_steps": 75440, "total_steps": 204665, "loss": 0.0425, "lr": 1.5917437792488913e-06, "epoch": 1.8430117509100237, "percentage": 36.86, "elapsed_time": "1:37:45", "remaining_time": "2:47:27", "throughput": 8669.84, "total_tokens": 50853664} +{"current_steps": 75445, "total_steps": 204665, "loss": 0.1005, "lr": 1.5916750325980713e-06, "epoch": 1.8431339017418709, "percentage": 36.86, "elapsed_time": "1:37:45", "remaining_time": "2:47:26", "throughput": 8669.91, "total_tokens": 50857120} +{"current_steps": 75450, "total_steps": 204665, "loss": 0.0014, "lr": 1.5916062816444313e-06, "epoch": 1.843256052573718, "percentage": 36.87, "elapsed_time": "1:37:46", "remaining_time": "2:47:26", "throughput": 8669.94, "total_tokens": 50860256} +{"current_steps": 75455, "total_steps": 204665, "loss": 0.0911, "lr": 1.5915375263884716e-06, "epoch": 1.8433782034055652, "percentage": 36.87, "elapsed_time": "1:37:46", "remaining_time": "2:47:26", "throughput": 8670.01, "total_tokens": 50863712} +{"current_steps": 75460, "total_steps": 204665, "loss": 0.0649, "lr": 1.591468766830692e-06, "epoch": 1.8435003542374124, "percentage": 36.87, "elapsed_time": "1:37:46", "remaining_time": "2:47:25", "throughput": 8670.12, "total_tokens": 50867488} +{"current_steps": 75465, "total_steps": 204665, "loss": 0.0338, "lr": 1.5914000029715922e-06, "epoch": 1.8436225050692596, "percentage": 36.87, "elapsed_time": "1:37:47", "remaining_time": "2:47:25", "throughput": 8670.17, "total_tokens": 50870816} +{"current_steps": 75470, "total_steps": 204665, "loss": 0.0766, "lr": 1.5913312348116726e-06, "epoch": 1.8437446559011068, "percentage": 36.87, "elapsed_time": "1:37:47", "remaining_time": "2:47:24", "throughput": 8670.22, "total_tokens": 50874144} +{"current_steps": 75475, "total_steps": 204665, "loss": 0.0731, "lr": 1.591262462351433e-06, "epoch": 1.843866806732954, "percentage": 36.88, "elapsed_time": "1:37:48", "remaining_time": "2:47:24", "throughput": 8670.3, "total_tokens": 50877664} +{"current_steps": 75480, "total_steps": 204665, "loss": 0.0005, "lr": 1.5911936855913738e-06, "epoch": 1.843988957564801, "percentage": 36.88, "elapsed_time": "1:37:48", "remaining_time": "2:47:23", "throughput": 8670.36, "total_tokens": 50880992} +{"current_steps": 75485, "total_steps": 204665, "loss": 0.1547, "lr": 1.5911249045319954e-06, "epoch": 1.8441111083966482, "percentage": 36.88, "elapsed_time": "1:37:48", "remaining_time": "2:47:23", "throughput": 8670.36, "total_tokens": 50883936} +{"current_steps": 75490, "total_steps": 204665, "loss": 0.001, "lr": 1.5910561191737975e-06, "epoch": 1.8442332592284953, "percentage": 36.88, "elapsed_time": "1:37:49", "remaining_time": "2:47:22", "throughput": 8670.48, "total_tokens": 50887776} +{"current_steps": 75495, "total_steps": 204665, "loss": 0.0598, "lr": 1.5909873295172807e-06, "epoch": 1.8443554100603425, "percentage": 36.89, "elapsed_time": "1:37:49", "remaining_time": "2:47:22", "throughput": 8670.58, "total_tokens": 50891424} +{"current_steps": 75500, "total_steps": 204665, "loss": 0.0868, "lr": 1.590918535562945e-06, "epoch": 1.8444775608921897, "percentage": 36.89, "elapsed_time": "1:37:49", "remaining_time": "2:47:22", "throughput": 8670.66, "total_tokens": 50895008} +{"current_steps": 75505, "total_steps": 204665, "loss": 0.1014, "lr": 1.5908497373112903e-06, "epoch": 1.8445997117240367, "percentage": 36.89, "elapsed_time": "1:37:50", "remaining_time": "2:47:21", "throughput": 8670.71, "total_tokens": 50898272} +{"current_steps": 75510, "total_steps": 204665, "loss": 0.0017, "lr": 1.590780934762818e-06, "epoch": 1.8447218625558839, "percentage": 36.89, "elapsed_time": "1:37:50", "remaining_time": "2:47:21", "throughput": 8670.73, "total_tokens": 50901344} +{"current_steps": 75515, "total_steps": 204665, "loss": 0.0018, "lr": 1.5907121279180276e-06, "epoch": 1.844844013387731, "percentage": 36.9, "elapsed_time": "1:37:50", "remaining_time": "2:47:20", "throughput": 8670.81, "total_tokens": 50904800} +{"current_steps": 75520, "total_steps": 204665, "loss": 0.084, "lr": 1.5906433167774198e-06, "epoch": 1.8449661642195783, "percentage": 36.9, "elapsed_time": "1:37:51", "remaining_time": "2:47:20", "throughput": 8670.87, "total_tokens": 50908192} +{"current_steps": 75525, "total_steps": 204665, "loss": 0.0587, "lr": 1.5905745013414949e-06, "epoch": 1.8450883150514255, "percentage": 36.9, "elapsed_time": "1:37:51", "remaining_time": "2:47:19", "throughput": 8670.89, "total_tokens": 50911264} +{"current_steps": 75530, "total_steps": 204665, "loss": 0.1477, "lr": 1.5905056816107533e-06, "epoch": 1.8452104658832726, "percentage": 36.9, "elapsed_time": "1:37:51", "remaining_time": "2:47:19", "throughput": 8670.94, "total_tokens": 50914528} +{"current_steps": 75535, "total_steps": 204665, "loss": 0.085, "lr": 1.5904368575856958e-06, "epoch": 1.8453326167151198, "percentage": 36.91, "elapsed_time": "1:37:52", "remaining_time": "2:47:18", "throughput": 8671.01, "total_tokens": 50917984} +{"current_steps": 75540, "total_steps": 204665, "loss": 0.0291, "lr": 1.5903680292668224e-06, "epoch": 1.845454767546967, "percentage": 36.91, "elapsed_time": "1:37:52", "remaining_time": "2:47:18", "throughput": 8671.02, "total_tokens": 50920928} +{"current_steps": 75545, "total_steps": 204665, "loss": 0.2256, "lr": 1.590299196654634e-06, "epoch": 1.8455769183788142, "percentage": 36.91, "elapsed_time": "1:37:52", "remaining_time": "2:47:17", "throughput": 8671.09, "total_tokens": 50924384} +{"current_steps": 75550, "total_steps": 204665, "loss": 0.0007, "lr": 1.5902303597496309e-06, "epoch": 1.8456990692106614, "percentage": 36.91, "elapsed_time": "1:37:53", "remaining_time": "2:47:17", "throughput": 8671.23, "total_tokens": 50928352} +{"current_steps": 75555, "total_steps": 204665, "loss": 0.1029, "lr": 1.590161518552314e-06, "epoch": 1.8458212200425086, "percentage": 36.92, "elapsed_time": "1:37:53", "remaining_time": "2:47:16", "throughput": 8671.28, "total_tokens": 50931616} +{"current_steps": 75560, "total_steps": 204665, "loss": 0.0211, "lr": 1.590092673063184e-06, "epoch": 1.8459433708743558, "percentage": 36.92, "elapsed_time": "1:37:53", "remaining_time": "2:47:16", "throughput": 8671.37, "total_tokens": 50935200} +{"current_steps": 75565, "total_steps": 204665, "loss": 0.1201, "lr": 1.5900238232827412e-06, "epoch": 1.846065521706203, "percentage": 36.92, "elapsed_time": "1:37:54", "remaining_time": "2:47:16", "throughput": 8671.44, "total_tokens": 50938656} +{"current_steps": 75570, "total_steps": 204665, "loss": 0.1135, "lr": 1.5899549692114864e-06, "epoch": 1.84618767253805, "percentage": 36.92, "elapsed_time": "1:37:54", "remaining_time": "2:47:15", "throughput": 8671.5, "total_tokens": 50942048} +{"current_steps": 75575, "total_steps": 204665, "loss": 0.1495, "lr": 1.5898861108499205e-06, "epoch": 1.8463098233698971, "percentage": 36.93, "elapsed_time": "1:37:54", "remaining_time": "2:47:15", "throughput": 8671.55, "total_tokens": 50945312} +{"current_steps": 75580, "total_steps": 204665, "loss": 0.0314, "lr": 1.5898172481985442e-06, "epoch": 1.8464319742017443, "percentage": 36.93, "elapsed_time": "1:37:55", "remaining_time": "2:47:14", "throughput": 8671.58, "total_tokens": 50948448} +{"current_steps": 75585, "total_steps": 204665, "loss": 0.0422, "lr": 1.589748381257858e-06, "epoch": 1.8465541250335915, "percentage": 36.93, "elapsed_time": "1:37:55", "remaining_time": "2:47:14", "throughput": 8671.65, "total_tokens": 50951904} +{"current_steps": 75590, "total_steps": 204665, "loss": 0.0283, "lr": 1.5896795100283631e-06, "epoch": 1.8466762758654387, "percentage": 36.93, "elapsed_time": "1:37:56", "remaining_time": "2:47:13", "throughput": 8671.72, "total_tokens": 50955360} +{"current_steps": 75595, "total_steps": 204665, "loss": 0.0983, "lr": 1.5896106345105601e-06, "epoch": 1.8467984266972857, "percentage": 36.94, "elapsed_time": "1:37:56", "remaining_time": "2:47:13", "throughput": 8671.78, "total_tokens": 50958688} +{"current_steps": 75600, "total_steps": 204665, "loss": 0.0621, "lr": 1.5895417547049502e-06, "epoch": 1.8469205775291329, "percentage": 36.94, "elapsed_time": "1:37:56", "remaining_time": "2:47:12", "throughput": 8671.95, "total_tokens": 50962848} +{"current_steps": 75605, "total_steps": 204665, "loss": 0.0487, "lr": 1.5894728706120336e-06, "epoch": 1.84704272836098, "percentage": 36.94, "elapsed_time": "1:37:57", "remaining_time": "2:47:12", "throughput": 8671.99, "total_tokens": 50966048} +{"current_steps": 75610, "total_steps": 204665, "loss": 0.0017, "lr": 1.5894039822323121e-06, "epoch": 1.8471648791928272, "percentage": 36.94, "elapsed_time": "1:37:57", "remaining_time": "2:47:11", "throughput": 8672.05, "total_tokens": 50969440} +{"current_steps": 75615, "total_steps": 204665, "loss": 0.0604, "lr": 1.5893350895662865e-06, "epoch": 1.8472870300246744, "percentage": 36.95, "elapsed_time": "1:37:57", "remaining_time": "2:47:11", "throughput": 8672.06, "total_tokens": 50972448} +{"current_steps": 75620, "total_steps": 204665, "loss": 0.0731, "lr": 1.5892661926144575e-06, "epoch": 1.8474091808565216, "percentage": 36.95, "elapsed_time": "1:37:58", "remaining_time": "2:47:10", "throughput": 8672.13, "total_tokens": 50975904} +{"current_steps": 75625, "total_steps": 204665, "loss": 0.0026, "lr": 1.5891972913773263e-06, "epoch": 1.8475313316883688, "percentage": 36.95, "elapsed_time": "1:37:58", "remaining_time": "2:47:10", "throughput": 8672.15, "total_tokens": 50978912} +{"current_steps": 75630, "total_steps": 204665, "loss": 0.1608, "lr": 1.5891283858553935e-06, "epoch": 1.847653482520216, "percentage": 36.95, "elapsed_time": "1:37:58", "remaining_time": "2:47:10", "throughput": 8672.17, "total_tokens": 50982048} +{"current_steps": 75635, "total_steps": 204665, "loss": 0.1126, "lr": 1.5890594760491606e-06, "epoch": 1.8477756333520632, "percentage": 36.96, "elapsed_time": "1:37:59", "remaining_time": "2:47:09", "throughput": 8672.24, "total_tokens": 50985440} +{"current_steps": 75640, "total_steps": 204665, "loss": 0.0486, "lr": 1.5889905619591292e-06, "epoch": 1.8478977841839104, "percentage": 36.96, "elapsed_time": "1:37:59", "remaining_time": "2:47:09", "throughput": 8672.26, "total_tokens": 50988576} +{"current_steps": 75645, "total_steps": 204665, "loss": 0.1633, "lr": 1.5889216435858001e-06, "epoch": 1.8480199350157576, "percentage": 36.96, "elapsed_time": "1:37:59", "remaining_time": "2:47:08", "throughput": 8672.29, "total_tokens": 50991648} +{"current_steps": 75650, "total_steps": 204665, "loss": 0.1638, "lr": 1.5888527209296743e-06, "epoch": 1.8481420858476048, "percentage": 36.96, "elapsed_time": "1:38:00", "remaining_time": "2:47:08", "throughput": 8672.4, "total_tokens": 50995424} +{"current_steps": 75655, "total_steps": 204665, "loss": 0.0348, "lr": 1.588783793991253e-06, "epoch": 1.848264236679452, "percentage": 36.97, "elapsed_time": "1:38:00", "remaining_time": "2:47:07", "throughput": 8672.47, "total_tokens": 50998816} +{"current_steps": 75660, "total_steps": 204665, "loss": 0.0945, "lr": 1.5887148627710372e-06, "epoch": 1.848386387511299, "percentage": 36.97, "elapsed_time": "1:38:00", "remaining_time": "2:47:07", "throughput": 8672.53, "total_tokens": 51002208} +{"current_steps": 75665, "total_steps": 204665, "loss": 0.0817, "lr": 1.5886459272695292e-06, "epoch": 1.848508538343146, "percentage": 36.97, "elapsed_time": "1:38:01", "remaining_time": "2:47:06", "throughput": 8672.58, "total_tokens": 51005472} +{"current_steps": 75670, "total_steps": 204665, "loss": 0.2804, "lr": 1.5885769874872294e-06, "epoch": 1.8486306891749933, "percentage": 36.97, "elapsed_time": "1:38:01", "remaining_time": "2:47:06", "throughput": 8672.68, "total_tokens": 51009120} +{"current_steps": 75675, "total_steps": 204665, "loss": 0.1359, "lr": 1.5885080434246394e-06, "epoch": 1.8487528400068405, "percentage": 36.98, "elapsed_time": "1:38:01", "remaining_time": "2:47:05", "throughput": 8672.69, "total_tokens": 51012128} +{"current_steps": 75680, "total_steps": 204665, "loss": 0.0364, "lr": 1.5884390950822608e-06, "epoch": 1.8488749908386874, "percentage": 36.98, "elapsed_time": "1:38:02", "remaining_time": "2:47:05", "throughput": 8672.78, "total_tokens": 51015712} +{"current_steps": 75685, "total_steps": 204665, "loss": 0.067, "lr": 1.5883701424605947e-06, "epoch": 1.8489971416705346, "percentage": 36.98, "elapsed_time": "1:38:02", "remaining_time": "2:47:04", "throughput": 8672.81, "total_tokens": 51018848} +{"current_steps": 75690, "total_steps": 204665, "loss": 0.1145, "lr": 1.5883011855601427e-06, "epoch": 1.8491192925023818, "percentage": 36.98, "elapsed_time": "1:38:02", "remaining_time": "2:47:04", "throughput": 8672.88, "total_tokens": 51022240} +{"current_steps": 75695, "total_steps": 204665, "loss": 0.002, "lr": 1.5882322243814063e-06, "epoch": 1.849241443334229, "percentage": 36.98, "elapsed_time": "1:38:03", "remaining_time": "2:47:04", "throughput": 8672.92, "total_tokens": 51025504} +{"current_steps": 75700, "total_steps": 204665, "loss": 0.0279, "lr": 1.588163258924887e-06, "epoch": 1.8493635941660762, "percentage": 36.99, "elapsed_time": "1:38:03", "remaining_time": "2:47:03", "throughput": 8672.99, "total_tokens": 51028896} +{"current_steps": 75705, "total_steps": 204665, "loss": 0.0345, "lr": 1.588094289191086e-06, "epoch": 1.8494857449979234, "percentage": 36.99, "elapsed_time": "1:38:04", "remaining_time": "2:47:03", "throughput": 8673.11, "total_tokens": 51032736} +{"current_steps": 75710, "total_steps": 204665, "loss": 0.1506, "lr": 1.5880253151805054e-06, "epoch": 1.8496078958297706, "percentage": 36.99, "elapsed_time": "1:38:04", "remaining_time": "2:47:02", "throughput": 8673.17, "total_tokens": 51036128} +{"current_steps": 75715, "total_steps": 204665, "loss": 0.0747, "lr": 1.5879563368936463e-06, "epoch": 1.8497300466616178, "percentage": 36.99, "elapsed_time": "1:38:04", "remaining_time": "2:47:02", "throughput": 8673.21, "total_tokens": 51039392} +{"current_steps": 75720, "total_steps": 204665, "loss": 0.0353, "lr": 1.5878873543310109e-06, "epoch": 1.849852197493465, "percentage": 37.0, "elapsed_time": "1:38:05", "remaining_time": "2:47:01", "throughput": 8673.23, "total_tokens": 51042464} +{"current_steps": 75725, "total_steps": 204665, "loss": 0.0627, "lr": 1.5878183674931005e-06, "epoch": 1.8499743483253122, "percentage": 37.0, "elapsed_time": "1:38:05", "remaining_time": "2:47:01", "throughput": 8673.33, "total_tokens": 51046112} +{"current_steps": 75730, "total_steps": 204665, "loss": 0.0591, "lr": 1.5877493763804167e-06, "epoch": 1.8500964991571593, "percentage": 37.0, "elapsed_time": "1:38:05", "remaining_time": "2:47:00", "throughput": 8673.39, "total_tokens": 51049504} +{"current_steps": 75735, "total_steps": 204665, "loss": 0.1982, "lr": 1.5876803809934613e-06, "epoch": 1.8502186499890065, "percentage": 37.0, "elapsed_time": "1:38:06", "remaining_time": "2:47:00", "throughput": 8673.48, "total_tokens": 51053088} +{"current_steps": 75740, "total_steps": 204665, "loss": 0.0013, "lr": 1.5876113813327363e-06, "epoch": 1.8503408008208537, "percentage": 37.01, "elapsed_time": "1:38:06", "remaining_time": "2:46:59", "throughput": 8673.54, "total_tokens": 51056416} +{"current_steps": 75745, "total_steps": 204665, "loss": 0.1587, "lr": 1.587542377398743e-06, "epoch": 1.850462951652701, "percentage": 37.01, "elapsed_time": "1:38:06", "remaining_time": "2:46:59", "throughput": 8673.59, "total_tokens": 51059744} +{"current_steps": 75750, "total_steps": 204665, "loss": 0.1871, "lr": 1.587473369191984e-06, "epoch": 1.8505851024845479, "percentage": 37.01, "elapsed_time": "1:38:07", "remaining_time": "2:46:59", "throughput": 8673.64, "total_tokens": 51063072} +{"current_steps": 75755, "total_steps": 204665, "loss": 0.0596, "lr": 1.58740435671296e-06, "epoch": 1.850707253316395, "percentage": 37.01, "elapsed_time": "1:38:07", "remaining_time": "2:46:58", "throughput": 8673.68, "total_tokens": 51066336} +{"current_steps": 75760, "total_steps": 204665, "loss": 0.0658, "lr": 1.5873353399621737e-06, "epoch": 1.8508294041482423, "percentage": 37.02, "elapsed_time": "1:38:07", "remaining_time": "2:46:58", "throughput": 8673.75, "total_tokens": 51069792} +{"current_steps": 75765, "total_steps": 204665, "loss": 0.0011, "lr": 1.5872663189401272e-06, "epoch": 1.8509515549800895, "percentage": 37.02, "elapsed_time": "1:38:08", "remaining_time": "2:46:57", "throughput": 8673.76, "total_tokens": 51072864} +{"current_steps": 75770, "total_steps": 204665, "loss": 0.1394, "lr": 1.5871972936473217e-06, "epoch": 1.8510737058119364, "percentage": 37.02, "elapsed_time": "1:38:08", "remaining_time": "2:46:57", "throughput": 8673.77, "total_tokens": 51075872} +{"current_steps": 75775, "total_steps": 204665, "loss": 0.037, "lr": 1.5871282640842601e-06, "epoch": 1.8511958566437836, "percentage": 37.02, "elapsed_time": "1:38:08", "remaining_time": "2:46:56", "throughput": 8673.8, "total_tokens": 51079008} +{"current_steps": 75780, "total_steps": 204665, "loss": 0.1345, "lr": 1.5870592302514431e-06, "epoch": 1.8513180074756308, "percentage": 37.03, "elapsed_time": "1:38:09", "remaining_time": "2:46:56", "throughput": 8673.88, "total_tokens": 51082464} +{"current_steps": 75785, "total_steps": 204665, "loss": 0.1049, "lr": 1.5869901921493738e-06, "epoch": 1.851440158307478, "percentage": 37.03, "elapsed_time": "1:38:09", "remaining_time": "2:46:55", "throughput": 8673.99, "total_tokens": 51086304} +{"current_steps": 75790, "total_steps": 204665, "loss": 0.1019, "lr": 1.5869211497785539e-06, "epoch": 1.8515623091393252, "percentage": 37.03, "elapsed_time": "1:38:09", "remaining_time": "2:46:55", "throughput": 8674.05, "total_tokens": 51089696} +{"current_steps": 75795, "total_steps": 204665, "loss": 0.0703, "lr": 1.5868521031394858e-06, "epoch": 1.8516844599711724, "percentage": 37.03, "elapsed_time": "1:38:10", "remaining_time": "2:46:54", "throughput": 8674.11, "total_tokens": 51093024} +{"current_steps": 75800, "total_steps": 204665, "loss": 0.0664, "lr": 1.586783052232671e-06, "epoch": 1.8518066108030196, "percentage": 37.04, "elapsed_time": "1:38:10", "remaining_time": "2:46:54", "throughput": 8674.16, "total_tokens": 51096416} +{"current_steps": 75805, "total_steps": 204665, "loss": 0.1082, "lr": 1.5867139970586124e-06, "epoch": 1.8519287616348667, "percentage": 37.04, "elapsed_time": "1:38:10", "remaining_time": "2:46:54", "throughput": 8674.2, "total_tokens": 51099680} +{"current_steps": 75810, "total_steps": 204665, "loss": 0.0262, "lr": 1.5866449376178115e-06, "epoch": 1.852050912466714, "percentage": 37.04, "elapsed_time": "1:38:11", "remaining_time": "2:46:53", "throughput": 8674.31, "total_tokens": 51103392} +{"current_steps": 75815, "total_steps": 204665, "loss": 0.0365, "lr": 1.5865758739107707e-06, "epoch": 1.8521730632985611, "percentage": 37.04, "elapsed_time": "1:38:11", "remaining_time": "2:46:53", "throughput": 8674.34, "total_tokens": 51106592} +{"current_steps": 75820, "total_steps": 204665, "loss": 0.0021, "lr": 1.5865068059379926e-06, "epoch": 1.8522952141304083, "percentage": 37.05, "elapsed_time": "1:38:12", "remaining_time": "2:46:52", "throughput": 8674.38, "total_tokens": 51109792} +{"current_steps": 75825, "total_steps": 204665, "loss": 0.0016, "lr": 1.5864377336999795e-06, "epoch": 1.8524173649622555, "percentage": 37.05, "elapsed_time": "1:38:12", "remaining_time": "2:46:52", "throughput": 8674.43, "total_tokens": 51113120} +{"current_steps": 75830, "total_steps": 204665, "loss": 0.0932, "lr": 1.5863686571972332e-06, "epoch": 1.8525395157941027, "percentage": 37.05, "elapsed_time": "1:38:12", "remaining_time": "2:46:51", "throughput": 8674.5, "total_tokens": 51116512} +{"current_steps": 75835, "total_steps": 204665, "loss": 0.0318, "lr": 1.5862995764302562e-06, "epoch": 1.8526616666259499, "percentage": 37.05, "elapsed_time": "1:38:13", "remaining_time": "2:46:51", "throughput": 8674.57, "total_tokens": 51119968} +{"current_steps": 75840, "total_steps": 204665, "loss": 0.0396, "lr": 1.5862304913995513e-06, "epoch": 1.8527838174577969, "percentage": 37.06, "elapsed_time": "1:38:13", "remaining_time": "2:46:50", "throughput": 8674.61, "total_tokens": 51123232} +{"current_steps": 75845, "total_steps": 204665, "loss": 0.1938, "lr": 1.58616140210562e-06, "epoch": 1.852905968289644, "percentage": 37.06, "elapsed_time": "1:38:13", "remaining_time": "2:46:50", "throughput": 8674.65, "total_tokens": 51126496} +{"current_steps": 75850, "total_steps": 204665, "loss": 0.0655, "lr": 1.5860923085489656e-06, "epoch": 1.8530281191214912, "percentage": 37.06, "elapsed_time": "1:38:14", "remaining_time": "2:46:49", "throughput": 8674.7, "total_tokens": 51129760} +{"current_steps": 75855, "total_steps": 204665, "loss": 0.0028, "lr": 1.5860232107300906e-06, "epoch": 1.8531502699533384, "percentage": 37.06, "elapsed_time": "1:38:14", "remaining_time": "2:46:49", "throughput": 8674.79, "total_tokens": 51133408} +{"current_steps": 75860, "total_steps": 204665, "loss": 0.2307, "lr": 1.585954108649497e-06, "epoch": 1.8532724207851854, "percentage": 37.07, "elapsed_time": "1:38:14", "remaining_time": "2:46:49", "throughput": 8674.83, "total_tokens": 51136672} +{"current_steps": 75865, "total_steps": 204665, "loss": 0.0909, "lr": 1.5858850023076874e-06, "epoch": 1.8533945716170326, "percentage": 37.07, "elapsed_time": "1:38:15", "remaining_time": "2:46:48", "throughput": 8674.89, "total_tokens": 51140000} +{"current_steps": 75870, "total_steps": 204665, "loss": 0.0877, "lr": 1.585815891705165e-06, "epoch": 1.8535167224488798, "percentage": 37.07, "elapsed_time": "1:38:15", "remaining_time": "2:46:48", "throughput": 8674.91, "total_tokens": 51143072} +{"current_steps": 75875, "total_steps": 204665, "loss": 0.001, "lr": 1.5857467768424312e-06, "epoch": 1.853638873280727, "percentage": 37.07, "elapsed_time": "1:38:15", "remaining_time": "2:46:47", "throughput": 8674.99, "total_tokens": 51146592} +{"current_steps": 75880, "total_steps": 204665, "loss": 0.1578, "lr": 1.5856776577199895e-06, "epoch": 1.8537610241125742, "percentage": 37.08, "elapsed_time": "1:38:16", "remaining_time": "2:46:47", "throughput": 8675.09, "total_tokens": 51150304} +{"current_steps": 75885, "total_steps": 204665, "loss": 0.0734, "lr": 1.5856085343383426e-06, "epoch": 1.8538831749444213, "percentage": 37.08, "elapsed_time": "1:38:16", "remaining_time": "2:46:46", "throughput": 8675.13, "total_tokens": 51153568} +{"current_steps": 75890, "total_steps": 204665, "loss": 0.0787, "lr": 1.5855394066979925e-06, "epoch": 1.8540053257762685, "percentage": 37.08, "elapsed_time": "1:38:16", "remaining_time": "2:46:46", "throughput": 8675.19, "total_tokens": 51156960} +{"current_steps": 75895, "total_steps": 204665, "loss": 0.0606, "lr": 1.5854702747994427e-06, "epoch": 1.8541274766081157, "percentage": 37.08, "elapsed_time": "1:38:17", "remaining_time": "2:46:45", "throughput": 8675.24, "total_tokens": 51160288} +{"current_steps": 75900, "total_steps": 204665, "loss": 0.0024, "lr": 1.5854011386431955e-06, "epoch": 1.854249627439963, "percentage": 37.08, "elapsed_time": "1:38:17", "remaining_time": "2:46:45", "throughput": 8675.3, "total_tokens": 51163616} +{"current_steps": 75905, "total_steps": 204665, "loss": 0.0779, "lr": 1.5853319982297538e-06, "epoch": 1.85437177827181, "percentage": 37.09, "elapsed_time": "1:38:17", "remaining_time": "2:46:44", "throughput": 8675.34, "total_tokens": 51166816} +{"current_steps": 75910, "total_steps": 204665, "loss": 0.1529, "lr": 1.58526285355962e-06, "epoch": 1.8544939291036573, "percentage": 37.09, "elapsed_time": "1:38:18", "remaining_time": "2:46:44", "throughput": 8675.4, "total_tokens": 51170208} +{"current_steps": 75915, "total_steps": 204665, "loss": 0.0861, "lr": 1.5851937046332976e-06, "epoch": 1.8546160799355045, "percentage": 37.09, "elapsed_time": "1:38:18", "remaining_time": "2:46:43", "throughput": 8675.5, "total_tokens": 51173856} +{"current_steps": 75920, "total_steps": 204665, "loss": 0.0991, "lr": 1.5851245514512895e-06, "epoch": 1.8547382307673517, "percentage": 37.09, "elapsed_time": "1:38:19", "remaining_time": "2:46:43", "throughput": 8675.51, "total_tokens": 51176864} +{"current_steps": 75925, "total_steps": 204665, "loss": 0.0879, "lr": 1.5850553940140979e-06, "epoch": 1.8548603815991986, "percentage": 37.1, "elapsed_time": "1:38:19", "remaining_time": "2:46:43", "throughput": 8675.59, "total_tokens": 51180448} +{"current_steps": 75930, "total_steps": 204665, "loss": 0.132, "lr": 1.584986232322226e-06, "epoch": 1.8549825324310458, "percentage": 37.1, "elapsed_time": "1:38:19", "remaining_time": "2:46:42", "throughput": 8675.65, "total_tokens": 51183776} +{"current_steps": 75935, "total_steps": 204665, "loss": 0.0056, "lr": 1.5849170663761772e-06, "epoch": 1.855104683262893, "percentage": 37.1, "elapsed_time": "1:38:20", "remaining_time": "2:46:42", "throughput": 8675.72, "total_tokens": 51187232} +{"current_steps": 75940, "total_steps": 204665, "loss": 0.002, "lr": 1.584847896176454e-06, "epoch": 1.8552268340947402, "percentage": 37.1, "elapsed_time": "1:38:20", "remaining_time": "2:46:41", "throughput": 8675.73, "total_tokens": 51190304} +{"current_steps": 75945, "total_steps": 204665, "loss": 0.0393, "lr": 1.5847787217235595e-06, "epoch": 1.8553489849265874, "percentage": 37.11, "elapsed_time": "1:38:20", "remaining_time": "2:46:41", "throughput": 8675.82, "total_tokens": 51193888} +{"current_steps": 75950, "total_steps": 204665, "loss": 0.0694, "lr": 1.5847095430179972e-06, "epoch": 1.8554711357584344, "percentage": 37.11, "elapsed_time": "1:38:21", "remaining_time": "2:46:40", "throughput": 8675.92, "total_tokens": 51197536} +{"current_steps": 75955, "total_steps": 204665, "loss": 0.1415, "lr": 1.5846403600602695e-06, "epoch": 1.8555932865902816, "percentage": 37.11, "elapsed_time": "1:38:21", "remaining_time": "2:46:40", "throughput": 8675.99, "total_tokens": 51200992} +{"current_steps": 75960, "total_steps": 204665, "loss": 0.0605, "lr": 1.5845711728508802e-06, "epoch": 1.8557154374221287, "percentage": 37.11, "elapsed_time": "1:38:21", "remaining_time": "2:46:39", "throughput": 8676.06, "total_tokens": 51204448} +{"current_steps": 75965, "total_steps": 204665, "loss": 0.1219, "lr": 1.5845019813903318e-06, "epoch": 1.855837588253976, "percentage": 37.12, "elapsed_time": "1:38:22", "remaining_time": "2:46:39", "throughput": 8676.1, "total_tokens": 51207648} +{"current_steps": 75970, "total_steps": 204665, "loss": 0.1198, "lr": 1.5844327856791276e-06, "epoch": 1.8559597390858231, "percentage": 37.12, "elapsed_time": "1:38:22", "remaining_time": "2:46:38", "throughput": 8676.16, "total_tokens": 51211040} +{"current_steps": 75975, "total_steps": 204665, "loss": 0.0704, "lr": 1.5843635857177712e-06, "epoch": 1.8560818899176703, "percentage": 37.12, "elapsed_time": "1:38:22", "remaining_time": "2:46:38", "throughput": 8676.2, "total_tokens": 51214304} +{"current_steps": 75980, "total_steps": 204665, "loss": 0.2049, "lr": 1.584294381506766e-06, "epoch": 1.8562040407495175, "percentage": 37.12, "elapsed_time": "1:38:23", "remaining_time": "2:46:38", "throughput": 8676.2, "total_tokens": 51217248} +{"current_steps": 75985, "total_steps": 204665, "loss": 0.0761, "lr": 1.5842251730466143e-06, "epoch": 1.8563261915813647, "percentage": 37.13, "elapsed_time": "1:38:23", "remaining_time": "2:46:37", "throughput": 8676.32, "total_tokens": 51221088} +{"current_steps": 75990, "total_steps": 204665, "loss": 0.1281, "lr": 1.5841559603378204e-06, "epoch": 1.8564483424132119, "percentage": 37.13, "elapsed_time": "1:38:23", "remaining_time": "2:46:37", "throughput": 8676.34, "total_tokens": 51224224} +{"current_steps": 75995, "total_steps": 204665, "loss": 0.0007, "lr": 1.584086743380887e-06, "epoch": 1.856570493245059, "percentage": 37.13, "elapsed_time": "1:38:24", "remaining_time": "2:46:36", "throughput": 8676.32, "total_tokens": 51226976} +{"current_steps": 76000, "total_steps": 204665, "loss": 0.1139, "lr": 1.584017522176318e-06, "epoch": 1.8566926440769063, "percentage": 37.13, "elapsed_time": "1:38:24", "remaining_time": "2:46:36", "throughput": 8676.43, "total_tokens": 51230688} +{"current_steps": 76005, "total_steps": 204665, "loss": 0.0939, "lr": 1.5839482967246162e-06, "epoch": 1.8568147949087535, "percentage": 37.14, "elapsed_time": "1:38:24", "remaining_time": "2:46:35", "throughput": 8676.54, "total_tokens": 51234464} +{"current_steps": 76010, "total_steps": 204665, "loss": 0.0364, "lr": 1.5838790670262853e-06, "epoch": 1.8569369457406006, "percentage": 37.14, "elapsed_time": "1:38:25", "remaining_time": "2:46:35", "throughput": 8676.59, "total_tokens": 51237728} +{"current_steps": 76015, "total_steps": 204665, "loss": 0.0908, "lr": 1.583809833081829e-06, "epoch": 1.8570590965724476, "percentage": 37.14, "elapsed_time": "1:38:25", "remaining_time": "2:46:34", "throughput": 8676.59, "total_tokens": 51240672} +{"current_steps": 76020, "total_steps": 204665, "loss": 0.0514, "lr": 1.5837405948917506e-06, "epoch": 1.8571812474042948, "percentage": 37.14, "elapsed_time": "1:38:25", "remaining_time": "2:46:34", "throughput": 8676.62, "total_tokens": 51243808} +{"current_steps": 76025, "total_steps": 204665, "loss": 0.0146, "lr": 1.5836713524565535e-06, "epoch": 1.857303398236142, "percentage": 37.15, "elapsed_time": "1:38:26", "remaining_time": "2:46:33", "throughput": 8676.73, "total_tokens": 51247520} +{"current_steps": 76030, "total_steps": 204665, "loss": 0.0418, "lr": 1.583602105776741e-06, "epoch": 1.8574255490679892, "percentage": 37.15, "elapsed_time": "1:38:26", "remaining_time": "2:46:33", "throughput": 8676.79, "total_tokens": 51250912} +{"current_steps": 76035, "total_steps": 204665, "loss": 0.1308, "lr": 1.5835328548528173e-06, "epoch": 1.8575476998998364, "percentage": 37.15, "elapsed_time": "1:38:27", "remaining_time": "2:46:33", "throughput": 8676.96, "total_tokens": 51255200} +{"current_steps": 76040, "total_steps": 204665, "loss": 0.0411, "lr": 1.5834635996852858e-06, "epoch": 1.8576698507316833, "percentage": 37.15, "elapsed_time": "1:38:27", "remaining_time": "2:46:32", "throughput": 8677.01, "total_tokens": 51258464} +{"current_steps": 76045, "total_steps": 204665, "loss": 0.0802, "lr": 1.58339434027465e-06, "epoch": 1.8577920015635305, "percentage": 37.16, "elapsed_time": "1:38:27", "remaining_time": "2:46:32", "throughput": 8677.07, "total_tokens": 51261856} +{"current_steps": 76050, "total_steps": 204665, "loss": 0.0297, "lr": 1.583325076621414e-06, "epoch": 1.8579141523953777, "percentage": 37.16, "elapsed_time": "1:38:28", "remaining_time": "2:46:31", "throughput": 8677.07, "total_tokens": 51264864} +{"current_steps": 76055, "total_steps": 204665, "loss": 0.0735, "lr": 1.5832558087260806e-06, "epoch": 1.858036303227225, "percentage": 37.16, "elapsed_time": "1:38:28", "remaining_time": "2:46:31", "throughput": 8677.08, "total_tokens": 51267872} +{"current_steps": 76060, "total_steps": 204665, "loss": 0.0784, "lr": 1.5831865365891544e-06, "epoch": 1.858158454059072, "percentage": 37.16, "elapsed_time": "1:38:28", "remaining_time": "2:46:30", "throughput": 8677.05, "total_tokens": 51270624} +{"current_steps": 76065, "total_steps": 204665, "loss": 0.0406, "lr": 1.5831172602111385e-06, "epoch": 1.8582806048909193, "percentage": 37.17, "elapsed_time": "1:38:29", "remaining_time": "2:46:30", "throughput": 8677.15, "total_tokens": 51274272} +{"current_steps": 76070, "total_steps": 204665, "loss": 0.0465, "lr": 1.5830479795925372e-06, "epoch": 1.8584027557227665, "percentage": 37.17, "elapsed_time": "1:38:29", "remaining_time": "2:46:29", "throughput": 8677.21, "total_tokens": 51277664} +{"current_steps": 76075, "total_steps": 204665, "loss": 0.2475, "lr": 1.5829786947338544e-06, "epoch": 1.8585249065546137, "percentage": 37.17, "elapsed_time": "1:38:29", "remaining_time": "2:46:29", "throughput": 8677.29, "total_tokens": 51281184} +{"current_steps": 76080, "total_steps": 204665, "loss": 0.1517, "lr": 1.5829094056355934e-06, "epoch": 1.8586470573864609, "percentage": 37.17, "elapsed_time": "1:38:30", "remaining_time": "2:46:28", "throughput": 8677.35, "total_tokens": 51284576} +{"current_steps": 76085, "total_steps": 204665, "loss": 0.127, "lr": 1.5828401122982589e-06, "epoch": 1.858769208218308, "percentage": 37.18, "elapsed_time": "1:38:30", "remaining_time": "2:46:28", "throughput": 8677.4, "total_tokens": 51287904} +{"current_steps": 76090, "total_steps": 204665, "loss": 0.1128, "lr": 1.582770814722354e-06, "epoch": 1.8588913590501552, "percentage": 37.18, "elapsed_time": "1:38:30", "remaining_time": "2:46:28", "throughput": 8677.47, "total_tokens": 51291360} +{"current_steps": 76095, "total_steps": 204665, "loss": 0.1007, "lr": 1.582701512908383e-06, "epoch": 1.8590135098820024, "percentage": 37.18, "elapsed_time": "1:38:31", "remaining_time": "2:46:27", "throughput": 8677.57, "total_tokens": 51295008} +{"current_steps": 76100, "total_steps": 204665, "loss": 0.0684, "lr": 1.5826322068568497e-06, "epoch": 1.8591356607138496, "percentage": 37.18, "elapsed_time": "1:38:31", "remaining_time": "2:46:27", "throughput": 8677.65, "total_tokens": 51298592} +{"current_steps": 76105, "total_steps": 204665, "loss": 0.1603, "lr": 1.5825628965682585e-06, "epoch": 1.8592578115456966, "percentage": 37.19, "elapsed_time": "1:38:31", "remaining_time": "2:46:26", "throughput": 8677.73, "total_tokens": 51302048} +{"current_steps": 76110, "total_steps": 204665, "loss": 0.0359, "lr": 1.5824935820431132e-06, "epoch": 1.8593799623775438, "percentage": 37.19, "elapsed_time": "1:38:32", "remaining_time": "2:46:26", "throughput": 8677.81, "total_tokens": 51305568} +{"current_steps": 76115, "total_steps": 204665, "loss": 0.0026, "lr": 1.582424263281918e-06, "epoch": 1.859502113209391, "percentage": 37.19, "elapsed_time": "1:38:32", "remaining_time": "2:46:25", "throughput": 8677.84, "total_tokens": 51308768} +{"current_steps": 76120, "total_steps": 204665, "loss": 0.0414, "lr": 1.5823549402851768e-06, "epoch": 1.8596242640412382, "percentage": 37.19, "elapsed_time": "1:38:32", "remaining_time": "2:46:25", "throughput": 8677.89, "total_tokens": 51312096} +{"current_steps": 76125, "total_steps": 204665, "loss": 0.0744, "lr": 1.5822856130533937e-06, "epoch": 1.8597464148730853, "percentage": 37.19, "elapsed_time": "1:38:33", "remaining_time": "2:46:24", "throughput": 8677.9, "total_tokens": 51315104} +{"current_steps": 76130, "total_steps": 204665, "loss": 0.0508, "lr": 1.5822162815870734e-06, "epoch": 1.8598685657049323, "percentage": 37.2, "elapsed_time": "1:38:33", "remaining_time": "2:46:24", "throughput": 8677.95, "total_tokens": 51318432} +{"current_steps": 76135, "total_steps": 204665, "loss": 0.1015, "lr": 1.5821469458867194e-06, "epoch": 1.8599907165367795, "percentage": 37.2, "elapsed_time": "1:38:34", "remaining_time": "2:46:23", "throughput": 8678.11, "total_tokens": 51322528} +{"current_steps": 76140, "total_steps": 204665, "loss": 0.1237, "lr": 1.5820776059528363e-06, "epoch": 1.8601128673686267, "percentage": 37.2, "elapsed_time": "1:38:34", "remaining_time": "2:46:23", "throughput": 8678.15, "total_tokens": 51325728} +{"current_steps": 76145, "total_steps": 204665, "loss": 0.0658, "lr": 1.5820082617859283e-06, "epoch": 1.8602350182004739, "percentage": 37.2, "elapsed_time": "1:38:34", "remaining_time": "2:46:23", "throughput": 8678.21, "total_tokens": 51329120} +{"current_steps": 76150, "total_steps": 204665, "loss": 0.1557, "lr": 1.5819389133864997e-06, "epoch": 1.860357169032321, "percentage": 37.21, "elapsed_time": "1:38:35", "remaining_time": "2:46:22", "throughput": 8678.24, "total_tokens": 51332320} +{"current_steps": 76155, "total_steps": 204665, "loss": 0.0885, "lr": 1.5818695607550544e-06, "epoch": 1.8604793198641683, "percentage": 37.21, "elapsed_time": "1:38:35", "remaining_time": "2:46:22", "throughput": 8678.24, "total_tokens": 51335264} +{"current_steps": 76160, "total_steps": 204665, "loss": 0.0928, "lr": 1.5818002038920977e-06, "epoch": 1.8606014706960154, "percentage": 37.21, "elapsed_time": "1:38:35", "remaining_time": "2:46:21", "throughput": 8678.26, "total_tokens": 51338336} +{"current_steps": 76165, "total_steps": 204665, "loss": 0.1215, "lr": 1.5817308427981332e-06, "epoch": 1.8607236215278626, "percentage": 37.21, "elapsed_time": "1:38:36", "remaining_time": "2:46:21", "throughput": 8678.32, "total_tokens": 51341664} +{"current_steps": 76170, "total_steps": 204665, "loss": 0.118, "lr": 1.5816614774736656e-06, "epoch": 1.8608457723597098, "percentage": 37.22, "elapsed_time": "1:38:36", "remaining_time": "2:46:20", "throughput": 8678.36, "total_tokens": 51344864} +{"current_steps": 76175, "total_steps": 204665, "loss": 0.0739, "lr": 1.5815921079191994e-06, "epoch": 1.860967923191557, "percentage": 37.22, "elapsed_time": "1:38:36", "remaining_time": "2:46:20", "throughput": 8678.35, "total_tokens": 51347744} +{"current_steps": 76180, "total_steps": 204665, "loss": 0.0379, "lr": 1.5815227341352389e-06, "epoch": 1.8610900740234042, "percentage": 37.22, "elapsed_time": "1:38:37", "remaining_time": "2:46:19", "throughput": 8678.41, "total_tokens": 51351136} +{"current_steps": 76185, "total_steps": 204665, "loss": 0.0019, "lr": 1.5814533561222885e-06, "epoch": 1.8612122248552514, "percentage": 37.22, "elapsed_time": "1:38:37", "remaining_time": "2:46:19", "throughput": 8678.47, "total_tokens": 51354592} +{"current_steps": 76190, "total_steps": 204665, "loss": 0.0707, "lr": 1.581383973880853e-06, "epoch": 1.8613343756870986, "percentage": 37.23, "elapsed_time": "1:38:37", "remaining_time": "2:46:18", "throughput": 8678.5, "total_tokens": 51357728} +{"current_steps": 76195, "total_steps": 204665, "loss": 0.1285, "lr": 1.5813145874114366e-06, "epoch": 1.8614565265189456, "percentage": 37.23, "elapsed_time": "1:38:38", "remaining_time": "2:46:18", "throughput": 8678.52, "total_tokens": 51360800} +{"current_steps": 76200, "total_steps": 204665, "loss": 0.0436, "lr": 1.5812451967145445e-06, "epoch": 1.8615786773507927, "percentage": 37.23, "elapsed_time": "1:38:38", "remaining_time": "2:46:17", "throughput": 8678.51, "total_tokens": 51363680} +{"current_steps": 76205, "total_steps": 204665, "loss": 0.0765, "lr": 1.5811758017906809e-06, "epoch": 1.86170082818264, "percentage": 37.23, "elapsed_time": "1:38:38", "remaining_time": "2:46:17", "throughput": 8678.55, "total_tokens": 51366880} +{"current_steps": 76210, "total_steps": 204665, "loss": 0.0246, "lr": 1.5811064026403507e-06, "epoch": 1.8618229790144871, "percentage": 37.24, "elapsed_time": "1:38:39", "remaining_time": "2:46:17", "throughput": 8678.6, "total_tokens": 51370208} +{"current_steps": 76215, "total_steps": 204665, "loss": 0.0803, "lr": 1.5810369992640583e-06, "epoch": 1.861945129846334, "percentage": 37.24, "elapsed_time": "1:38:39", "remaining_time": "2:46:16", "throughput": 8678.62, "total_tokens": 51373344} +{"current_steps": 76220, "total_steps": 204665, "loss": 0.0507, "lr": 1.5809675916623087e-06, "epoch": 1.8620672806781813, "percentage": 37.24, "elapsed_time": "1:38:39", "remaining_time": "2:46:16", "throughput": 8678.66, "total_tokens": 51376608} +{"current_steps": 76225, "total_steps": 204665, "loss": 0.057, "lr": 1.5808981798356063e-06, "epoch": 1.8621894315100285, "percentage": 37.24, "elapsed_time": "1:38:40", "remaining_time": "2:46:15", "throughput": 8678.74, "total_tokens": 51380128} +{"current_steps": 76230, "total_steps": 204665, "loss": 0.002, "lr": 1.5808287637844559e-06, "epoch": 1.8623115823418757, "percentage": 37.25, "elapsed_time": "1:38:40", "remaining_time": "2:46:15", "throughput": 8678.83, "total_tokens": 51383712} +{"current_steps": 76235, "total_steps": 204665, "loss": 0.087, "lr": 1.580759343509363e-06, "epoch": 1.8624337331737228, "percentage": 37.25, "elapsed_time": "1:38:40", "remaining_time": "2:46:14", "throughput": 8678.84, "total_tokens": 51386720} +{"current_steps": 76240, "total_steps": 204665, "loss": 0.1132, "lr": 1.5806899190108318e-06, "epoch": 1.86255588400557, "percentage": 37.25, "elapsed_time": "1:38:41", "remaining_time": "2:46:14", "throughput": 8678.87, "total_tokens": 51389856} +{"current_steps": 76245, "total_steps": 204665, "loss": 0.093, "lr": 1.5806204902893674e-06, "epoch": 1.8626780348374172, "percentage": 37.25, "elapsed_time": "1:38:41", "remaining_time": "2:46:13", "throughput": 8678.9, "total_tokens": 51393056} +{"current_steps": 76250, "total_steps": 204665, "loss": 0.2589, "lr": 1.5805510573454744e-06, "epoch": 1.8628001856692644, "percentage": 37.26, "elapsed_time": "1:38:41", "remaining_time": "2:46:13", "throughput": 8678.92, "total_tokens": 51396128} +{"current_steps": 76255, "total_steps": 204665, "loss": 0.0593, "lr": 1.580481620179658e-06, "epoch": 1.8629223365011116, "percentage": 37.26, "elapsed_time": "1:38:42", "remaining_time": "2:46:12", "throughput": 8678.97, "total_tokens": 51399456} +{"current_steps": 76260, "total_steps": 204665, "loss": 0.1307, "lr": 1.580412178792423e-06, "epoch": 1.8630444873329588, "percentage": 37.26, "elapsed_time": "1:38:42", "remaining_time": "2:46:12", "throughput": 8679.07, "total_tokens": 51403168} +{"current_steps": 76265, "total_steps": 204665, "loss": 0.1434, "lr": 1.5803427331842748e-06, "epoch": 1.863166638164806, "percentage": 37.26, "elapsed_time": "1:38:43", "remaining_time": "2:46:12", "throughput": 8679.14, "total_tokens": 51406688} +{"current_steps": 76270, "total_steps": 204665, "loss": 0.0023, "lr": 1.5802732833557182e-06, "epoch": 1.8632887889966532, "percentage": 37.27, "elapsed_time": "1:38:43", "remaining_time": "2:46:11", "throughput": 8679.23, "total_tokens": 51410272} +{"current_steps": 76275, "total_steps": 204665, "loss": 0.111, "lr": 1.580203829307258e-06, "epoch": 1.8634109398285004, "percentage": 37.27, "elapsed_time": "1:38:43", "remaining_time": "2:46:11", "throughput": 8679.33, "total_tokens": 51413984} +{"current_steps": 76280, "total_steps": 204665, "loss": 0.0689, "lr": 1.5801343710393997e-06, "epoch": 1.8635330906603476, "percentage": 37.27, "elapsed_time": "1:38:44", "remaining_time": "2:46:10", "throughput": 8679.39, "total_tokens": 51417376} +{"current_steps": 76285, "total_steps": 204665, "loss": 0.0344, "lr": 1.5800649085526478e-06, "epoch": 1.8636552414921945, "percentage": 37.27, "elapsed_time": "1:38:44", "remaining_time": "2:46:10", "throughput": 8679.44, "total_tokens": 51420704} +{"current_steps": 76290, "total_steps": 204665, "loss": 0.1379, "lr": 1.5799954418475081e-06, "epoch": 1.8637773923240417, "percentage": 37.28, "elapsed_time": "1:38:44", "remaining_time": "2:46:09", "throughput": 8679.48, "total_tokens": 51423904} +{"current_steps": 76295, "total_steps": 204665, "loss": 0.1141, "lr": 1.579925970924486e-06, "epoch": 1.863899543155889, "percentage": 37.28, "elapsed_time": "1:38:45", "remaining_time": "2:46:09", "throughput": 8679.54, "total_tokens": 51427296} +{"current_steps": 76300, "total_steps": 204665, "loss": 0.0843, "lr": 1.5798564957840856e-06, "epoch": 1.864021693987736, "percentage": 37.28, "elapsed_time": "1:38:45", "remaining_time": "2:46:08", "throughput": 8679.69, "total_tokens": 51431328} +{"current_steps": 76305, "total_steps": 204665, "loss": 0.0035, "lr": 1.579787016426813e-06, "epoch": 1.864143844819583, "percentage": 37.28, "elapsed_time": "1:38:45", "remaining_time": "2:46:08", "throughput": 8679.77, "total_tokens": 51434912} +{"current_steps": 76310, "total_steps": 204665, "loss": 0.001, "lr": 1.5797175328531733e-06, "epoch": 1.8642659956514303, "percentage": 37.29, "elapsed_time": "1:38:46", "remaining_time": "2:46:07", "throughput": 8679.84, "total_tokens": 51438304} +{"current_steps": 76315, "total_steps": 204665, "loss": 0.0259, "lr": 1.5796480450636719e-06, "epoch": 1.8643881464832774, "percentage": 37.29, "elapsed_time": "1:38:46", "remaining_time": "2:46:07", "throughput": 8679.9, "total_tokens": 51441696} +{"current_steps": 76320, "total_steps": 204665, "loss": 0.1345, "lr": 1.5795785530588138e-06, "epoch": 1.8645102973151246, "percentage": 37.29, "elapsed_time": "1:38:46", "remaining_time": "2:46:07", "throughput": 8679.95, "total_tokens": 51445024} +{"current_steps": 76325, "total_steps": 204665, "loss": 0.1112, "lr": 1.5795090568391048e-06, "epoch": 1.8646324481469718, "percentage": 37.29, "elapsed_time": "1:38:47", "remaining_time": "2:46:06", "throughput": 8680.01, "total_tokens": 51448352} +{"current_steps": 76330, "total_steps": 204665, "loss": 0.0865, "lr": 1.5794395564050499e-06, "epoch": 1.864754598978819, "percentage": 37.3, "elapsed_time": "1:38:47", "remaining_time": "2:46:06", "throughput": 8680.08, "total_tokens": 51451808} +{"current_steps": 76335, "total_steps": 204665, "loss": 0.0862, "lr": 1.5793700517571547e-06, "epoch": 1.8648767498106662, "percentage": 37.3, "elapsed_time": "1:38:47", "remaining_time": "2:46:05", "throughput": 8680.08, "total_tokens": 51454688} +{"current_steps": 76340, "total_steps": 204665, "loss": 0.1357, "lr": 1.5793005428959245e-06, "epoch": 1.8649989006425134, "percentage": 37.3, "elapsed_time": "1:38:48", "remaining_time": "2:46:05", "throughput": 8680.16, "total_tokens": 51458272} +{"current_steps": 76345, "total_steps": 204665, "loss": 0.0021, "lr": 1.5792310298218651e-06, "epoch": 1.8651210514743606, "percentage": 37.3, "elapsed_time": "1:38:48", "remaining_time": "2:46:04", "throughput": 8680.21, "total_tokens": 51461536} +{"current_steps": 76350, "total_steps": 204665, "loss": 0.0451, "lr": 1.579161512535482e-06, "epoch": 1.8652432023062078, "percentage": 37.3, "elapsed_time": "1:38:48", "remaining_time": "2:46:04", "throughput": 8680.22, "total_tokens": 51464608} +{"current_steps": 76355, "total_steps": 204665, "loss": 0.1823, "lr": 1.5790919910372806e-06, "epoch": 1.865365353138055, "percentage": 37.31, "elapsed_time": "1:38:49", "remaining_time": "2:46:03", "throughput": 8680.25, "total_tokens": 51467744} +{"current_steps": 76360, "total_steps": 204665, "loss": 0.0786, "lr": 1.579022465327766e-06, "epoch": 1.8654875039699021, "percentage": 37.31, "elapsed_time": "1:38:49", "remaining_time": "2:46:03", "throughput": 8680.26, "total_tokens": 51470752} +{"current_steps": 76365, "total_steps": 204665, "loss": 0.0775, "lr": 1.578952935407445e-06, "epoch": 1.8656096548017493, "percentage": 37.31, "elapsed_time": "1:38:49", "remaining_time": "2:46:02", "throughput": 8680.26, "total_tokens": 51473760} +{"current_steps": 76370, "total_steps": 204665, "loss": 0.0017, "lr": 1.578883401276822e-06, "epoch": 1.8657318056335965, "percentage": 37.31, "elapsed_time": "1:38:50", "remaining_time": "2:46:02", "throughput": 8680.28, "total_tokens": 51476832} +{"current_steps": 76375, "total_steps": 204665, "loss": 0.1384, "lr": 1.5788138629364033e-06, "epoch": 1.8658539564654435, "percentage": 37.32, "elapsed_time": "1:38:50", "remaining_time": "2:46:01", "throughput": 8680.34, "total_tokens": 51480224} +{"current_steps": 76380, "total_steps": 204665, "loss": 0.0977, "lr": 1.5787443203866947e-06, "epoch": 1.8659761072972907, "percentage": 37.32, "elapsed_time": "1:38:51", "remaining_time": "2:46:01", "throughput": 8680.37, "total_tokens": 51483360} +{"current_steps": 76385, "total_steps": 204665, "loss": 0.0297, "lr": 1.5786747736282019e-06, "epoch": 1.8660982581291379, "percentage": 37.32, "elapsed_time": "1:38:51", "remaining_time": "2:46:01", "throughput": 8680.41, "total_tokens": 51486624} +{"current_steps": 76390, "total_steps": 204665, "loss": 0.1164, "lr": 1.5786052226614301e-06, "epoch": 1.866220408960985, "percentage": 37.32, "elapsed_time": "1:38:51", "remaining_time": "2:46:00", "throughput": 8680.46, "total_tokens": 51489952} +{"current_steps": 76395, "total_steps": 204665, "loss": 0.0587, "lr": 1.5785356674868857e-06, "epoch": 1.866342559792832, "percentage": 37.33, "elapsed_time": "1:38:52", "remaining_time": "2:46:00", "throughput": 8680.55, "total_tokens": 51493536} +{"current_steps": 76400, "total_steps": 204665, "loss": 0.0442, "lr": 1.5784661081050743e-06, "epoch": 1.8664647106246792, "percentage": 37.33, "elapsed_time": "1:38:52", "remaining_time": "2:45:59", "throughput": 8680.58, "total_tokens": 51496672} +{"current_steps": 76405, "total_steps": 204665, "loss": 0.1155, "lr": 1.5783965445165018e-06, "epoch": 1.8665868614565264, "percentage": 37.33, "elapsed_time": "1:38:52", "remaining_time": "2:45:59", "throughput": 8680.62, "total_tokens": 51499872} +{"current_steps": 76410, "total_steps": 204665, "loss": 0.1668, "lr": 1.5783269767216738e-06, "epoch": 1.8667090122883736, "percentage": 37.33, "elapsed_time": "1:38:53", "remaining_time": "2:45:58", "throughput": 8680.78, "total_tokens": 51503968} +{"current_steps": 76415, "total_steps": 204665, "loss": 0.1493, "lr": 1.5782574047210968e-06, "epoch": 1.8668311631202208, "percentage": 37.34, "elapsed_time": "1:38:53", "remaining_time": "2:45:58", "throughput": 8680.83, "total_tokens": 51507232} +{"current_steps": 76420, "total_steps": 204665, "loss": 0.0012, "lr": 1.5781878285152765e-06, "epoch": 1.866953313952068, "percentage": 37.34, "elapsed_time": "1:38:53", "remaining_time": "2:45:57", "throughput": 8680.9, "total_tokens": 51510752} +{"current_steps": 76425, "total_steps": 204665, "loss": 0.0636, "lr": 1.5781182481047184e-06, "epoch": 1.8670754647839152, "percentage": 37.34, "elapsed_time": "1:38:54", "remaining_time": "2:45:57", "throughput": 8680.91, "total_tokens": 51513760} +{"current_steps": 76430, "total_steps": 204665, "loss": 0.0952, "lr": 1.5780486634899291e-06, "epoch": 1.8671976156157624, "percentage": 37.34, "elapsed_time": "1:38:54", "remaining_time": "2:45:56", "throughput": 8681.0, "total_tokens": 51517408} +{"current_steps": 76435, "total_steps": 204665, "loss": 0.1567, "lr": 1.5779790746714145e-06, "epoch": 1.8673197664476096, "percentage": 37.35, "elapsed_time": "1:38:54", "remaining_time": "2:45:56", "throughput": 8681.05, "total_tokens": 51520736} +{"current_steps": 76440, "total_steps": 204665, "loss": 0.0368, "lr": 1.5779094816496806e-06, "epoch": 1.8674419172794567, "percentage": 37.35, "elapsed_time": "1:38:55", "remaining_time": "2:45:56", "throughput": 8681.12, "total_tokens": 51524192} +{"current_steps": 76445, "total_steps": 204665, "loss": 0.1951, "lr": 1.5778398844252334e-06, "epoch": 1.867564068111304, "percentage": 37.35, "elapsed_time": "1:38:55", "remaining_time": "2:45:55", "throughput": 8681.22, "total_tokens": 51527904} +{"current_steps": 76450, "total_steps": 204665, "loss": 0.0265, "lr": 1.5777702829985794e-06, "epoch": 1.8676862189431511, "percentage": 37.35, "elapsed_time": "1:38:55", "remaining_time": "2:45:55", "throughput": 8681.28, "total_tokens": 51531296} +{"current_steps": 76455, "total_steps": 204665, "loss": 0.0533, "lr": 1.577700677370224e-06, "epoch": 1.8678083697749983, "percentage": 37.36, "elapsed_time": "1:38:56", "remaining_time": "2:45:54", "throughput": 8681.31, "total_tokens": 51534496} +{"current_steps": 76460, "total_steps": 204665, "loss": 0.1515, "lr": 1.5776310675406743e-06, "epoch": 1.8679305206068453, "percentage": 37.36, "elapsed_time": "1:38:56", "remaining_time": "2:45:54", "throughput": 8681.41, "total_tokens": 51538144} +{"current_steps": 76465, "total_steps": 204665, "loss": 0.0012, "lr": 1.577561453510436e-06, "epoch": 1.8680526714386925, "percentage": 37.36, "elapsed_time": "1:38:56", "remaining_time": "2:45:53", "throughput": 8681.47, "total_tokens": 51541536} +{"current_steps": 76470, "total_steps": 204665, "loss": 0.0686, "lr": 1.5774918352800156e-06, "epoch": 1.8681748222705397, "percentage": 37.36, "elapsed_time": "1:38:57", "remaining_time": "2:45:53", "throughput": 8681.58, "total_tokens": 51545376} +{"current_steps": 76475, "total_steps": 204665, "loss": 0.0261, "lr": 1.5774222128499188e-06, "epoch": 1.8682969731023868, "percentage": 37.37, "elapsed_time": "1:38:57", "remaining_time": "2:45:52", "throughput": 8681.7, "total_tokens": 51549152} +{"current_steps": 76480, "total_steps": 204665, "loss": 0.0014, "lr": 1.5773525862206528e-06, "epoch": 1.868419123934234, "percentage": 37.37, "elapsed_time": "1:38:58", "remaining_time": "2:45:52", "throughput": 8681.79, "total_tokens": 51552736} +{"current_steps": 76485, "total_steps": 204665, "loss": 0.078, "lr": 1.5772829553927235e-06, "epoch": 1.868541274766081, "percentage": 37.37, "elapsed_time": "1:38:58", "remaining_time": "2:45:52", "throughput": 8681.9, "total_tokens": 51556576} +{"current_steps": 76490, "total_steps": 204665, "loss": 0.0085, "lr": 1.577213320366637e-06, "epoch": 1.8686634255979282, "percentage": 37.37, "elapsed_time": "1:38:58", "remaining_time": "2:45:51", "throughput": 8681.93, "total_tokens": 51559776} +{"current_steps": 76495, "total_steps": 204665, "loss": 0.0239, "lr": 1.5771436811429002e-06, "epoch": 1.8687855764297754, "percentage": 37.38, "elapsed_time": "1:38:59", "remaining_time": "2:45:51", "throughput": 8681.96, "total_tokens": 51562912} +{"current_steps": 76500, "total_steps": 204665, "loss": 0.1157, "lr": 1.5770740377220192e-06, "epoch": 1.8689077272616226, "percentage": 37.38, "elapsed_time": "1:38:59", "remaining_time": "2:45:50", "throughput": 8682.03, "total_tokens": 51566368} +{"current_steps": 76505, "total_steps": 204665, "loss": 0.0966, "lr": 1.5770043901045007e-06, "epoch": 1.8690298780934698, "percentage": 37.38, "elapsed_time": "1:38:59", "remaining_time": "2:45:50", "throughput": 8682.12, "total_tokens": 51569952} +{"current_steps": 76510, "total_steps": 204665, "loss": 0.2293, "lr": 1.5769347382908511e-06, "epoch": 1.869152028925317, "percentage": 37.38, "elapsed_time": "1:39:00", "remaining_time": "2:45:49", "throughput": 8682.13, "total_tokens": 51573024} +{"current_steps": 76515, "total_steps": 204665, "loss": 0.1675, "lr": 1.5768650822815767e-06, "epoch": 1.8692741797571641, "percentage": 37.39, "elapsed_time": "1:39:00", "remaining_time": "2:45:49", "throughput": 8682.21, "total_tokens": 51576544} +{"current_steps": 76520, "total_steps": 204665, "loss": 0.0407, "lr": 1.5767954220771844e-06, "epoch": 1.8693963305890113, "percentage": 37.39, "elapsed_time": "1:39:00", "remaining_time": "2:45:48", "throughput": 8682.22, "total_tokens": 51579616} +{"current_steps": 76525, "total_steps": 204665, "loss": 0.1259, "lr": 1.5767257576781808e-06, "epoch": 1.8695184814208585, "percentage": 37.39, "elapsed_time": "1:39:01", "remaining_time": "2:45:48", "throughput": 8682.3, "total_tokens": 51583136} +{"current_steps": 76530, "total_steps": 204665, "loss": 0.0756, "lr": 1.576656089085072e-06, "epoch": 1.8696406322527057, "percentage": 37.39, "elapsed_time": "1:39:01", "remaining_time": "2:45:47", "throughput": 8682.34, "total_tokens": 51586400} +{"current_steps": 76535, "total_steps": 204665, "loss": 0.1691, "lr": 1.5765864162983654e-06, "epoch": 1.869762783084553, "percentage": 37.4, "elapsed_time": "1:39:01", "remaining_time": "2:45:47", "throughput": 8682.38, "total_tokens": 51589600} +{"current_steps": 76540, "total_steps": 204665, "loss": 0.1084, "lr": 1.576516739318567e-06, "epoch": 1.8698849339164, "percentage": 37.4, "elapsed_time": "1:39:02", "remaining_time": "2:45:47", "throughput": 8682.41, "total_tokens": 51592800} +{"current_steps": 76545, "total_steps": 204665, "loss": 0.0868, "lr": 1.5764470581461842e-06, "epoch": 1.8700070847482473, "percentage": 37.4, "elapsed_time": "1:39:02", "remaining_time": "2:45:46", "throughput": 8682.46, "total_tokens": 51596128} +{"current_steps": 76550, "total_steps": 204665, "loss": 0.1062, "lr": 1.576377372781723e-06, "epoch": 1.8701292355800943, "percentage": 37.4, "elapsed_time": "1:39:02", "remaining_time": "2:45:46", "throughput": 8682.51, "total_tokens": 51599392} +{"current_steps": 76555, "total_steps": 204665, "loss": 0.0709, "lr": 1.5763076832256905e-06, "epoch": 1.8702513864119414, "percentage": 37.41, "elapsed_time": "1:39:03", "remaining_time": "2:45:45", "throughput": 8682.61, "total_tokens": 51603104} +{"current_steps": 76560, "total_steps": 204665, "loss": 0.0825, "lr": 1.5762379894785938e-06, "epoch": 1.8703735372437886, "percentage": 37.41, "elapsed_time": "1:39:03", "remaining_time": "2:45:45", "throughput": 8682.64, "total_tokens": 51606304} +{"current_steps": 76565, "total_steps": 204665, "loss": 0.0386, "lr": 1.5761682915409389e-06, "epoch": 1.8704956880756358, "percentage": 37.41, "elapsed_time": "1:39:03", "remaining_time": "2:45:44", "throughput": 8682.7, "total_tokens": 51609632} +{"current_steps": 76570, "total_steps": 204665, "loss": 0.0324, "lr": 1.5760985894132336e-06, "epoch": 1.870617838907483, "percentage": 37.41, "elapsed_time": "1:39:04", "remaining_time": "2:45:44", "throughput": 8682.72, "total_tokens": 51612768} +{"current_steps": 76575, "total_steps": 204665, "loss": 0.0845, "lr": 1.5760288830959846e-06, "epoch": 1.87073998973933, "percentage": 37.41, "elapsed_time": "1:39:04", "remaining_time": "2:45:43", "throughput": 8682.75, "total_tokens": 51615968} +{"current_steps": 76580, "total_steps": 204665, "loss": 0.0019, "lr": 1.5759591725896986e-06, "epoch": 1.8708621405711772, "percentage": 37.42, "elapsed_time": "1:39:05", "remaining_time": "2:45:43", "throughput": 8682.79, "total_tokens": 51619168} +{"current_steps": 76585, "total_steps": 204665, "loss": 0.047, "lr": 1.5758894578948823e-06, "epoch": 1.8709842914030244, "percentage": 37.42, "elapsed_time": "1:39:05", "remaining_time": "2:45:42", "throughput": 8682.86, "total_tokens": 51622624} +{"current_steps": 76590, "total_steps": 204665, "loss": 0.102, "lr": 1.575819739012043e-06, "epoch": 1.8711064422348715, "percentage": 37.42, "elapsed_time": "1:39:05", "remaining_time": "2:45:42", "throughput": 8682.92, "total_tokens": 51626016} +{"current_steps": 76595, "total_steps": 204665, "loss": 0.0222, "lr": 1.5757500159416877e-06, "epoch": 1.8712285930667187, "percentage": 37.42, "elapsed_time": "1:39:06", "remaining_time": "2:45:42", "throughput": 8682.98, "total_tokens": 51629408} +{"current_steps": 76600, "total_steps": 204665, "loss": 0.072, "lr": 1.5756802886843237e-06, "epoch": 1.871350743898566, "percentage": 37.43, "elapsed_time": "1:39:06", "remaining_time": "2:45:41", "throughput": 8683.05, "total_tokens": 51632864} +{"current_steps": 76605, "total_steps": 204665, "loss": 0.0833, "lr": 1.5756105572404575e-06, "epoch": 1.8714728947304131, "percentage": 37.43, "elapsed_time": "1:39:06", "remaining_time": "2:45:41", "throughput": 8683.06, "total_tokens": 51635936} +{"current_steps": 76610, "total_steps": 204665, "loss": 0.1141, "lr": 1.5755408216105966e-06, "epoch": 1.8715950455622603, "percentage": 37.43, "elapsed_time": "1:39:07", "remaining_time": "2:45:40", "throughput": 8683.15, "total_tokens": 51639520} +{"current_steps": 76615, "total_steps": 204665, "loss": 0.1466, "lr": 1.5754710817952481e-06, "epoch": 1.8717171963941075, "percentage": 37.43, "elapsed_time": "1:39:07", "remaining_time": "2:45:40", "throughput": 8683.19, "total_tokens": 51642656} +{"current_steps": 76620, "total_steps": 204665, "loss": 0.1726, "lr": 1.5754013377949189e-06, "epoch": 1.8718393472259547, "percentage": 37.44, "elapsed_time": "1:39:07", "remaining_time": "2:45:39", "throughput": 8683.29, "total_tokens": 51646368} +{"current_steps": 76625, "total_steps": 204665, "loss": 0.0896, "lr": 1.5753315896101165e-06, "epoch": 1.8719614980578019, "percentage": 37.44, "elapsed_time": "1:39:08", "remaining_time": "2:45:39", "throughput": 8683.34, "total_tokens": 51649632} +{"current_steps": 76630, "total_steps": 204665, "loss": 0.0933, "lr": 1.575261837241348e-06, "epoch": 1.872083648889649, "percentage": 37.44, "elapsed_time": "1:39:08", "remaining_time": "2:45:38", "throughput": 8683.61, "total_tokens": 51654624} +{"current_steps": 76635, "total_steps": 204665, "loss": 0.0481, "lr": 1.575192080689121e-06, "epoch": 1.8722057997214963, "percentage": 37.44, "elapsed_time": "1:39:08", "remaining_time": "2:45:38", "throughput": 8683.7, "total_tokens": 51658208} +{"current_steps": 76640, "total_steps": 204665, "loss": 0.1073, "lr": 1.5751223199539422e-06, "epoch": 1.8723279505533432, "percentage": 37.45, "elapsed_time": "1:39:09", "remaining_time": "2:45:37", "throughput": 8683.73, "total_tokens": 51661344} +{"current_steps": 76645, "total_steps": 204665, "loss": 0.0008, "lr": 1.5750525550363192e-06, "epoch": 1.8724501013851904, "percentage": 37.45, "elapsed_time": "1:39:09", "remaining_time": "2:45:37", "throughput": 8683.77, "total_tokens": 51664608} +{"current_steps": 76650, "total_steps": 204665, "loss": 0.0017, "lr": 1.5749827859367594e-06, "epoch": 1.8725722522170376, "percentage": 37.45, "elapsed_time": "1:39:09", "remaining_time": "2:45:37", "throughput": 8683.86, "total_tokens": 51668192} +{"current_steps": 76655, "total_steps": 204665, "loss": 0.1194, "lr": 1.57491301265577e-06, "epoch": 1.8726944030488848, "percentage": 37.45, "elapsed_time": "1:39:10", "remaining_time": "2:45:36", "throughput": 8683.99, "total_tokens": 51672096} +{"current_steps": 76660, "total_steps": 204665, "loss": 0.0222, "lr": 1.5748432351938587e-06, "epoch": 1.872816553880732, "percentage": 37.46, "elapsed_time": "1:39:10", "remaining_time": "2:45:36", "throughput": 8684.06, "total_tokens": 51675552} +{"current_steps": 76665, "total_steps": 204665, "loss": 0.0675, "lr": 1.5747734535515327e-06, "epoch": 1.872938704712579, "percentage": 37.46, "elapsed_time": "1:39:10", "remaining_time": "2:45:35", "throughput": 8684.17, "total_tokens": 51679264} +{"current_steps": 76670, "total_steps": 204665, "loss": 0.0698, "lr": 1.5747036677292998e-06, "epoch": 1.8730608555444261, "percentage": 37.46, "elapsed_time": "1:39:11", "remaining_time": "2:45:35", "throughput": 8684.2, "total_tokens": 51682464} +{"current_steps": 76675, "total_steps": 204665, "loss": 0.1214, "lr": 1.5746338777276668e-06, "epoch": 1.8731830063762733, "percentage": 37.46, "elapsed_time": "1:39:11", "remaining_time": "2:45:34", "throughput": 8684.23, "total_tokens": 51685600} +{"current_steps": 76680, "total_steps": 204665, "loss": 0.0604, "lr": 1.5745640835471422e-06, "epoch": 1.8733051572081205, "percentage": 37.47, "elapsed_time": "1:39:12", "remaining_time": "2:45:34", "throughput": 8684.29, "total_tokens": 51688992} +{"current_steps": 76685, "total_steps": 204665, "loss": 0.0427, "lr": 1.5744942851882326e-06, "epoch": 1.8734273080399677, "percentage": 37.47, "elapsed_time": "1:39:12", "remaining_time": "2:45:33", "throughput": 8684.32, "total_tokens": 51692128} +{"current_steps": 76690, "total_steps": 204665, "loss": 0.1883, "lr": 1.5744244826514463e-06, "epoch": 1.873549458871815, "percentage": 37.47, "elapsed_time": "1:39:12", "remaining_time": "2:45:33", "throughput": 8684.38, "total_tokens": 51695520} +{"current_steps": 76695, "total_steps": 204665, "loss": 0.0018, "lr": 1.5743546759372906e-06, "epoch": 1.873671609703662, "percentage": 37.47, "elapsed_time": "1:39:13", "remaining_time": "2:45:32", "throughput": 8684.46, "total_tokens": 51698976} +{"current_steps": 76700, "total_steps": 204665, "loss": 0.1953, "lr": 1.5742848650462731e-06, "epoch": 1.8737937605355093, "percentage": 37.48, "elapsed_time": "1:39:13", "remaining_time": "2:45:32", "throughput": 8684.52, "total_tokens": 51702368} +{"current_steps": 76705, "total_steps": 204665, "loss": 0.2132, "lr": 1.574215049978902e-06, "epoch": 1.8739159113673565, "percentage": 37.48, "elapsed_time": "1:39:13", "remaining_time": "2:45:32", "throughput": 8684.65, "total_tokens": 51706208} +{"current_steps": 76710, "total_steps": 204665, "loss": 0.0495, "lr": 1.5741452307356842e-06, "epoch": 1.8740380621992037, "percentage": 37.48, "elapsed_time": "1:39:14", "remaining_time": "2:45:31", "throughput": 8684.68, "total_tokens": 51709408} +{"current_steps": 76715, "total_steps": 204665, "loss": 0.0024, "lr": 1.574075407317128e-06, "epoch": 1.8741602130310508, "percentage": 37.48, "elapsed_time": "1:39:14", "remaining_time": "2:45:31", "throughput": 8684.75, "total_tokens": 51712864} +{"current_steps": 76720, "total_steps": 204665, "loss": 0.0219, "lr": 1.5740055797237408e-06, "epoch": 1.874282363862898, "percentage": 37.49, "elapsed_time": "1:39:14", "remaining_time": "2:45:30", "throughput": 8684.77, "total_tokens": 51715936} +{"current_steps": 76725, "total_steps": 204665, "loss": 0.0642, "lr": 1.573935747956031e-06, "epoch": 1.8744045146947452, "percentage": 37.49, "elapsed_time": "1:39:15", "remaining_time": "2:45:30", "throughput": 8684.81, "total_tokens": 51719136} +{"current_steps": 76730, "total_steps": 204665, "loss": 0.1189, "lr": 1.5738659120145057e-06, "epoch": 1.8745266655265922, "percentage": 37.49, "elapsed_time": "1:39:15", "remaining_time": "2:45:29", "throughput": 8684.88, "total_tokens": 51722592} +{"current_steps": 76735, "total_steps": 204665, "loss": 0.1064, "lr": 1.5737960718996734e-06, "epoch": 1.8746488163584394, "percentage": 37.49, "elapsed_time": "1:39:15", "remaining_time": "2:45:29", "throughput": 8684.92, "total_tokens": 51725792} +{"current_steps": 76740, "total_steps": 204665, "loss": 0.0906, "lr": 1.5737262276120417e-06, "epoch": 1.8747709671902866, "percentage": 37.5, "elapsed_time": "1:39:16", "remaining_time": "2:45:28", "throughput": 8685.04, "total_tokens": 51729696} +{"current_steps": 76745, "total_steps": 204665, "loss": 0.0356, "lr": 1.5736563791521188e-06, "epoch": 1.8748931180221338, "percentage": 37.5, "elapsed_time": "1:39:16", "remaining_time": "2:45:28", "throughput": 8685.1, "total_tokens": 51733024} +{"current_steps": 76750, "total_steps": 204665, "loss": 0.0008, "lr": 1.5735865265204118e-06, "epoch": 1.8750152688539807, "percentage": 37.5, "elapsed_time": "1:39:16", "remaining_time": "2:45:28", "throughput": 8685.28, "total_tokens": 51737312} +{"current_steps": 76755, "total_steps": 204665, "loss": 0.1585, "lr": 1.5735166697174296e-06, "epoch": 1.875137419685828, "percentage": 37.5, "elapsed_time": "1:39:17", "remaining_time": "2:45:27", "throughput": 8685.31, "total_tokens": 51740448} +{"current_steps": 76760, "total_steps": 204665, "loss": 0.0831, "lr": 1.5734468087436801e-06, "epoch": 1.8752595705176751, "percentage": 37.51, "elapsed_time": "1:39:17", "remaining_time": "2:45:27", "throughput": 8685.35, "total_tokens": 51743712} +{"current_steps": 76765, "total_steps": 204665, "loss": 0.0483, "lr": 1.573376943599671e-06, "epoch": 1.8753817213495223, "percentage": 37.51, "elapsed_time": "1:39:17", "remaining_time": "2:45:26", "throughput": 8685.45, "total_tokens": 51747360} +{"current_steps": 76770, "total_steps": 204665, "loss": 0.0009, "lr": 1.5733070742859105e-06, "epoch": 1.8755038721813695, "percentage": 37.51, "elapsed_time": "1:39:18", "remaining_time": "2:45:26", "throughput": 8685.49, "total_tokens": 51750560} +{"current_steps": 76775, "total_steps": 204665, "loss": 0.0011, "lr": 1.5732372008029069e-06, "epoch": 1.8756260230132167, "percentage": 37.51, "elapsed_time": "1:39:18", "remaining_time": "2:45:25", "throughput": 8685.58, "total_tokens": 51754144} +{"current_steps": 76780, "total_steps": 204665, "loss": 0.0656, "lr": 1.5731673231511683e-06, "epoch": 1.8757481738450639, "percentage": 37.51, "elapsed_time": "1:39:18", "remaining_time": "2:45:25", "throughput": 8685.67, "total_tokens": 51757728} +{"current_steps": 76785, "total_steps": 204665, "loss": 0.1484, "lr": 1.5730974413312023e-06, "epoch": 1.875870324676911, "percentage": 37.52, "elapsed_time": "1:39:19", "remaining_time": "2:45:24", "throughput": 8685.77, "total_tokens": 51761440} +{"current_steps": 76790, "total_steps": 204665, "loss": 0.0709, "lr": 1.573027555343518e-06, "epoch": 1.8759924755087583, "percentage": 37.52, "elapsed_time": "1:39:19", "remaining_time": "2:45:24", "throughput": 8685.87, "total_tokens": 51765088} +{"current_steps": 76795, "total_steps": 204665, "loss": 0.0387, "lr": 1.5729576651886229e-06, "epoch": 1.8761146263406054, "percentage": 37.52, "elapsed_time": "1:39:20", "remaining_time": "2:45:23", "throughput": 8686.01, "total_tokens": 51769056} +{"current_steps": 76800, "total_steps": 204665, "loss": 0.117, "lr": 1.5728877708670258e-06, "epoch": 1.8762367771724526, "percentage": 37.52, "elapsed_time": "1:39:20", "remaining_time": "2:45:23", "throughput": 8686.06, "total_tokens": 51772384} +{"current_steps": 76805, "total_steps": 204665, "loss": 0.0016, "lr": 1.5728178723792347e-06, "epoch": 1.8763589280042998, "percentage": 37.53, "elapsed_time": "1:39:20", "remaining_time": "2:45:23", "throughput": 8686.09, "total_tokens": 51775520} +{"current_steps": 76810, "total_steps": 204665, "loss": 0.2674, "lr": 1.5727479697257578e-06, "epoch": 1.876481078836147, "percentage": 37.53, "elapsed_time": "1:39:21", "remaining_time": "2:45:22", "throughput": 8686.13, "total_tokens": 51778720} +{"current_steps": 76815, "total_steps": 204665, "loss": 0.1556, "lr": 1.5726780629071037e-06, "epoch": 1.8766032296679942, "percentage": 37.53, "elapsed_time": "1:39:21", "remaining_time": "2:45:22", "throughput": 8686.16, "total_tokens": 51781920} +{"current_steps": 76820, "total_steps": 204665, "loss": 0.0942, "lr": 1.572608151923781e-06, "epoch": 1.8767253804998412, "percentage": 37.53, "elapsed_time": "1:39:21", "remaining_time": "2:45:21", "throughput": 8686.18, "total_tokens": 51784992} +{"current_steps": 76825, "total_steps": 204665, "loss": 0.0206, "lr": 1.5725382367762972e-06, "epoch": 1.8768475313316884, "percentage": 37.54, "elapsed_time": "1:39:22", "remaining_time": "2:45:21", "throughput": 8686.27, "total_tokens": 51788576} +{"current_steps": 76830, "total_steps": 204665, "loss": 0.021, "lr": 1.5724683174651616e-06, "epoch": 1.8769696821635355, "percentage": 37.54, "elapsed_time": "1:39:22", "remaining_time": "2:45:20", "throughput": 8686.33, "total_tokens": 51791968} +{"current_steps": 76835, "total_steps": 204665, "loss": 0.0357, "lr": 1.5723983939908826e-06, "epoch": 1.8770918329953827, "percentage": 37.54, "elapsed_time": "1:39:22", "remaining_time": "2:45:20", "throughput": 8686.43, "total_tokens": 51795616} +{"current_steps": 76840, "total_steps": 204665, "loss": 0.0541, "lr": 1.5723284663539684e-06, "epoch": 1.8772139838272297, "percentage": 37.54, "elapsed_time": "1:39:23", "remaining_time": "2:45:19", "throughput": 8686.45, "total_tokens": 51798688} +{"current_steps": 76845, "total_steps": 204665, "loss": 0.2528, "lr": 1.5722585345549276e-06, "epoch": 1.877336134659077, "percentage": 37.55, "elapsed_time": "1:39:23", "remaining_time": "2:45:19", "throughput": 8686.45, "total_tokens": 51801632} +{"current_steps": 76850, "total_steps": 204665, "loss": 0.0017, "lr": 1.5721885985942689e-06, "epoch": 1.877458285490924, "percentage": 37.55, "elapsed_time": "1:39:23", "remaining_time": "2:45:18", "throughput": 8686.51, "total_tokens": 51805024} +{"current_steps": 76855, "total_steps": 204665, "loss": 0.0908, "lr": 1.5721186584725007e-06, "epoch": 1.8775804363227713, "percentage": 37.55, "elapsed_time": "1:39:24", "remaining_time": "2:45:18", "throughput": 8686.55, "total_tokens": 51808224} +{"current_steps": 76860, "total_steps": 204665, "loss": 0.0377, "lr": 1.572048714190132e-06, "epoch": 1.8777025871546185, "percentage": 37.55, "elapsed_time": "1:39:24", "remaining_time": "2:45:18", "throughput": 8686.69, "total_tokens": 51812256} +{"current_steps": 76865, "total_steps": 204665, "loss": 0.1122, "lr": 1.571978765747671e-06, "epoch": 1.8778247379864657, "percentage": 37.56, "elapsed_time": "1:39:24", "remaining_time": "2:45:17", "throughput": 8686.74, "total_tokens": 51815584} +{"current_steps": 76870, "total_steps": 204665, "loss": 0.1408, "lr": 1.5719088131456264e-06, "epoch": 1.8779468888183128, "percentage": 37.56, "elapsed_time": "1:39:25", "remaining_time": "2:45:17", "throughput": 8686.79, "total_tokens": 51818848} +{"current_steps": 76875, "total_steps": 204665, "loss": 0.0538, "lr": 1.5718388563845073e-06, "epoch": 1.87806903965016, "percentage": 37.56, "elapsed_time": "1:39:25", "remaining_time": "2:45:16", "throughput": 8686.87, "total_tokens": 51822368} +{"current_steps": 76880, "total_steps": 204665, "loss": 0.0606, "lr": 1.5717688954648223e-06, "epoch": 1.8781911904820072, "percentage": 37.56, "elapsed_time": "1:39:25", "remaining_time": "2:45:16", "throughput": 8686.97, "total_tokens": 51826016} +{"current_steps": 76885, "total_steps": 204665, "loss": 0.093, "lr": 1.5716989303870797e-06, "epoch": 1.8783133413138544, "percentage": 37.57, "elapsed_time": "1:39:26", "remaining_time": "2:45:15", "throughput": 8687.09, "total_tokens": 51829856} +{"current_steps": 76890, "total_steps": 204665, "loss": 0.0452, "lr": 1.5716289611517892e-06, "epoch": 1.8784354921457016, "percentage": 37.57, "elapsed_time": "1:39:26", "remaining_time": "2:45:15", "throughput": 8687.13, "total_tokens": 51833056} +{"current_steps": 76895, "total_steps": 204665, "loss": 0.0483, "lr": 1.571558987759459e-06, "epoch": 1.8785576429775488, "percentage": 37.57, "elapsed_time": "1:39:27", "remaining_time": "2:45:14", "throughput": 8687.23, "total_tokens": 51836704} +{"current_steps": 76900, "total_steps": 204665, "loss": 0.1351, "lr": 1.5714890102105983e-06, "epoch": 1.878679793809396, "percentage": 37.57, "elapsed_time": "1:39:27", "remaining_time": "2:45:14", "throughput": 8687.2, "total_tokens": 51839392} +{"current_steps": 76905, "total_steps": 204665, "loss": 0.0565, "lr": 1.5714190285057152e-06, "epoch": 1.8788019446412432, "percentage": 37.58, "elapsed_time": "1:39:27", "remaining_time": "2:45:13", "throughput": 8687.33, "total_tokens": 51843296} +{"current_steps": 76910, "total_steps": 204665, "loss": 0.009, "lr": 1.5713490426453198e-06, "epoch": 1.8789240954730901, "percentage": 37.58, "elapsed_time": "1:39:28", "remaining_time": "2:45:13", "throughput": 8687.38, "total_tokens": 51846624} +{"current_steps": 76915, "total_steps": 204665, "loss": 0.2575, "lr": 1.5712790526299203e-06, "epoch": 1.8790462463049373, "percentage": 37.58, "elapsed_time": "1:39:28", "remaining_time": "2:45:13", "throughput": 8687.42, "total_tokens": 51849888} +{"current_steps": 76920, "total_steps": 204665, "loss": 0.1942, "lr": 1.5712090584600256e-06, "epoch": 1.8791683971367845, "percentage": 37.58, "elapsed_time": "1:39:28", "remaining_time": "2:45:12", "throughput": 8687.54, "total_tokens": 51853728} +{"current_steps": 76925, "total_steps": 204665, "loss": 0.0867, "lr": 1.5711390601361454e-06, "epoch": 1.8792905479686317, "percentage": 37.59, "elapsed_time": "1:39:29", "remaining_time": "2:45:12", "throughput": 8687.65, "total_tokens": 51857440} +{"current_steps": 76930, "total_steps": 204665, "loss": 0.0122, "lr": 1.5710690576587883e-06, "epoch": 1.8794126988004787, "percentage": 37.59, "elapsed_time": "1:39:29", "remaining_time": "2:45:11", "throughput": 8687.73, "total_tokens": 51860960} +{"current_steps": 76935, "total_steps": 204665, "loss": 0.0533, "lr": 1.5709990510284632e-06, "epoch": 1.8795348496323259, "percentage": 37.59, "elapsed_time": "1:39:29", "remaining_time": "2:45:11", "throughput": 8687.81, "total_tokens": 51864480} +{"current_steps": 76940, "total_steps": 204665, "loss": 0.0375, "lr": 1.5709290402456795e-06, "epoch": 1.879657000464173, "percentage": 37.59, "elapsed_time": "1:39:30", "remaining_time": "2:45:10", "throughput": 8687.82, "total_tokens": 51867552} +{"current_steps": 76945, "total_steps": 204665, "loss": 0.041, "lr": 1.5708590253109462e-06, "epoch": 1.8797791512960202, "percentage": 37.6, "elapsed_time": "1:39:30", "remaining_time": "2:45:10", "throughput": 8687.86, "total_tokens": 51870816} +{"current_steps": 76950, "total_steps": 204665, "loss": 0.0976, "lr": 1.5707890062247727e-06, "epoch": 1.8799013021278674, "percentage": 37.6, "elapsed_time": "1:39:30", "remaining_time": "2:45:09", "throughput": 8687.91, "total_tokens": 51874080} +{"current_steps": 76955, "total_steps": 204665, "loss": 0.0668, "lr": 1.5707189829876678e-06, "epoch": 1.8800234529597146, "percentage": 37.6, "elapsed_time": "1:39:31", "remaining_time": "2:45:09", "throughput": 8687.99, "total_tokens": 51877600} +{"current_steps": 76960, "total_steps": 204665, "loss": 0.0864, "lr": 1.5706489556001411e-06, "epoch": 1.8801456037915618, "percentage": 37.6, "elapsed_time": "1:39:31", "remaining_time": "2:45:08", "throughput": 8688.11, "total_tokens": 51881376} +{"current_steps": 76965, "total_steps": 204665, "loss": 0.106, "lr": 1.5705789240627017e-06, "epoch": 1.880267754623409, "percentage": 37.61, "elapsed_time": "1:39:31", "remaining_time": "2:45:08", "throughput": 8688.14, "total_tokens": 51884576} +{"current_steps": 76970, "total_steps": 204665, "loss": 0.0831, "lr": 1.570508888375859e-06, "epoch": 1.8803899054552562, "percentage": 37.61, "elapsed_time": "1:39:32", "remaining_time": "2:45:08", "throughput": 8688.18, "total_tokens": 51887776} +{"current_steps": 76975, "total_steps": 204665, "loss": 0.0864, "lr": 1.5704388485401221e-06, "epoch": 1.8805120562871034, "percentage": 37.61, "elapsed_time": "1:39:32", "remaining_time": "2:45:07", "throughput": 8688.21, "total_tokens": 51890976} +{"current_steps": 76980, "total_steps": 204665, "loss": 0.0714, "lr": 1.5703688045560004e-06, "epoch": 1.8806342071189506, "percentage": 37.61, "elapsed_time": "1:39:32", "remaining_time": "2:45:07", "throughput": 8688.27, "total_tokens": 51894368} +{"current_steps": 76985, "total_steps": 204665, "loss": 0.0956, "lr": 1.5702987564240035e-06, "epoch": 1.8807563579507978, "percentage": 37.62, "elapsed_time": "1:39:33", "remaining_time": "2:45:06", "throughput": 8688.34, "total_tokens": 51897824} +{"current_steps": 76990, "total_steps": 204665, "loss": 0.0912, "lr": 1.5702287041446406e-06, "epoch": 1.880878508782645, "percentage": 37.62, "elapsed_time": "1:39:33", "remaining_time": "2:45:06", "throughput": 8688.4, "total_tokens": 51901280} +{"current_steps": 76995, "total_steps": 204665, "loss": 0.1558, "lr": 1.5701586477184212e-06, "epoch": 1.881000659614492, "percentage": 37.62, "elapsed_time": "1:39:33", "remaining_time": "2:45:05", "throughput": 8688.48, "total_tokens": 51904800} +{"current_steps": 77000, "total_steps": 204665, "loss": 0.258, "lr": 1.5700885871458546e-06, "epoch": 1.881122810446339, "percentage": 37.62, "elapsed_time": "1:39:34", "remaining_time": "2:45:05", "throughput": 8688.54, "total_tokens": 51908192} +{"current_steps": 77005, "total_steps": 204665, "loss": 0.085, "lr": 1.5700185224274504e-06, "epoch": 1.8812449612781863, "percentage": 37.62, "elapsed_time": "1:39:34", "remaining_time": "2:45:04", "throughput": 8688.62, "total_tokens": 51911712} +{"current_steps": 77010, "total_steps": 204665, "loss": 0.0468, "lr": 1.5699484535637183e-06, "epoch": 1.8813671121100335, "percentage": 37.63, "elapsed_time": "1:39:35", "remaining_time": "2:45:04", "throughput": 8688.69, "total_tokens": 51915104} +{"current_steps": 77015, "total_steps": 204665, "loss": 0.1079, "lr": 1.5698783805551682e-06, "epoch": 1.8814892629418807, "percentage": 37.63, "elapsed_time": "1:39:35", "remaining_time": "2:45:03", "throughput": 8688.71, "total_tokens": 51918176} +{"current_steps": 77020, "total_steps": 204665, "loss": 0.1028, "lr": 1.5698083034023086e-06, "epoch": 1.8816114137737276, "percentage": 37.63, "elapsed_time": "1:39:35", "remaining_time": "2:45:03", "throughput": 8688.71, "total_tokens": 51921184} +{"current_steps": 77025, "total_steps": 204665, "loss": 0.0429, "lr": 1.5697382221056501e-06, "epoch": 1.8817335646055748, "percentage": 37.63, "elapsed_time": "1:39:36", "remaining_time": "2:45:03", "throughput": 8688.86, "total_tokens": 51925216} +{"current_steps": 77030, "total_steps": 204665, "loss": 0.0533, "lr": 1.5696681366657018e-06, "epoch": 1.881855715437422, "percentage": 37.64, "elapsed_time": "1:39:36", "remaining_time": "2:45:02", "throughput": 8688.87, "total_tokens": 51928224} +{"current_steps": 77035, "total_steps": 204665, "loss": 0.0829, "lr": 1.5695980470829736e-06, "epoch": 1.8819778662692692, "percentage": 37.64, "elapsed_time": "1:39:36", "remaining_time": "2:45:02", "throughput": 8688.97, "total_tokens": 51931872} +{"current_steps": 77040, "total_steps": 204665, "loss": 0.0499, "lr": 1.5695279533579754e-06, "epoch": 1.8821000171011164, "percentage": 37.64, "elapsed_time": "1:39:37", "remaining_time": "2:45:01", "throughput": 8689.01, "total_tokens": 51935136} +{"current_steps": 77045, "total_steps": 204665, "loss": 0.0416, "lr": 1.5694578554912167e-06, "epoch": 1.8822221679329636, "percentage": 37.64, "elapsed_time": "1:39:37", "remaining_time": "2:45:01", "throughput": 8689.08, "total_tokens": 51938592} +{"current_steps": 77050, "total_steps": 204665, "loss": 0.1148, "lr": 1.5693877534832072e-06, "epoch": 1.8823443187648108, "percentage": 37.65, "elapsed_time": "1:39:37", "remaining_time": "2:45:00", "throughput": 8689.15, "total_tokens": 51942048} +{"current_steps": 77055, "total_steps": 204665, "loss": 0.0878, "lr": 1.569317647334457e-06, "epoch": 1.882466469596658, "percentage": 37.65, "elapsed_time": "1:39:38", "remaining_time": "2:45:00", "throughput": 8689.21, "total_tokens": 51945376} +{"current_steps": 77060, "total_steps": 204665, "loss": 0.0298, "lr": 1.5692475370454754e-06, "epoch": 1.8825886204285052, "percentage": 37.65, "elapsed_time": "1:39:38", "remaining_time": "2:44:59", "throughput": 8689.23, "total_tokens": 51948512} +{"current_steps": 77065, "total_steps": 204665, "loss": 0.3041, "lr": 1.569177422616773e-06, "epoch": 1.8827107712603524, "percentage": 37.65, "elapsed_time": "1:39:38", "remaining_time": "2:44:59", "throughput": 8689.29, "total_tokens": 51951840} +{"current_steps": 77070, "total_steps": 204665, "loss": 0.0568, "lr": 1.569107304048859e-06, "epoch": 1.8828329220921995, "percentage": 37.66, "elapsed_time": "1:39:39", "remaining_time": "2:44:58", "throughput": 8689.34, "total_tokens": 51955168} +{"current_steps": 77075, "total_steps": 204665, "loss": 0.0458, "lr": 1.5690371813422437e-06, "epoch": 1.8829550729240467, "percentage": 37.66, "elapsed_time": "1:39:39", "remaining_time": "2:44:58", "throughput": 8689.41, "total_tokens": 51958624} +{"current_steps": 77080, "total_steps": 204665, "loss": 0.1906, "lr": 1.5689670544974369e-06, "epoch": 1.883077223755894, "percentage": 37.66, "elapsed_time": "1:39:39", "remaining_time": "2:44:58", "throughput": 8689.48, "total_tokens": 51962080} +{"current_steps": 77085, "total_steps": 204665, "loss": 0.0247, "lr": 1.5688969235149487e-06, "epoch": 1.883199374587741, "percentage": 37.66, "elapsed_time": "1:39:40", "remaining_time": "2:44:57", "throughput": 8689.54, "total_tokens": 51965472} +{"current_steps": 77090, "total_steps": 204665, "loss": 0.0012, "lr": 1.568826788395289e-06, "epoch": 1.883321525419588, "percentage": 37.67, "elapsed_time": "1:39:40", "remaining_time": "2:44:57", "throughput": 8689.6, "total_tokens": 51968928} +{"current_steps": 77095, "total_steps": 204665, "loss": 0.0317, "lr": 1.568756649138968e-06, "epoch": 1.8834436762514353, "percentage": 37.67, "elapsed_time": "1:39:40", "remaining_time": "2:44:56", "throughput": 8689.62, "total_tokens": 51972000} +{"current_steps": 77100, "total_steps": 204665, "loss": 0.0484, "lr": 1.5686865057464958e-06, "epoch": 1.8835658270832825, "percentage": 37.67, "elapsed_time": "1:39:41", "remaining_time": "2:44:56", "throughput": 8689.71, "total_tokens": 51975648} +{"current_steps": 77105, "total_steps": 204665, "loss": 0.1971, "lr": 1.568616358218382e-06, "epoch": 1.8836879779151297, "percentage": 37.67, "elapsed_time": "1:39:41", "remaining_time": "2:44:55", "throughput": 8689.7, "total_tokens": 51978528} +{"current_steps": 77110, "total_steps": 204665, "loss": 0.183, "lr": 1.5685462065551373e-06, "epoch": 1.8838101287469766, "percentage": 37.68, "elapsed_time": "1:39:41", "remaining_time": "2:44:55", "throughput": 8689.74, "total_tokens": 51981728} +{"current_steps": 77115, "total_steps": 204665, "loss": 0.0013, "lr": 1.5684760507572716e-06, "epoch": 1.8839322795788238, "percentage": 37.68, "elapsed_time": "1:39:42", "remaining_time": "2:44:54", "throughput": 8689.81, "total_tokens": 51985184} +{"current_steps": 77120, "total_steps": 204665, "loss": 0.0107, "lr": 1.5684058908252952e-06, "epoch": 1.884054430410671, "percentage": 37.68, "elapsed_time": "1:39:42", "remaining_time": "2:44:54", "throughput": 8689.97, "total_tokens": 51989344} +{"current_steps": 77125, "total_steps": 204665, "loss": 0.0973, "lr": 1.5683357267597183e-06, "epoch": 1.8841765812425182, "percentage": 37.68, "elapsed_time": "1:39:43", "remaining_time": "2:44:54", "throughput": 8690.03, "total_tokens": 51992736} +{"current_steps": 77130, "total_steps": 204665, "loss": 0.0648, "lr": 1.5682655585610514e-06, "epoch": 1.8842987320743654, "percentage": 37.69, "elapsed_time": "1:39:43", "remaining_time": "2:44:53", "throughput": 8690.08, "total_tokens": 51996064} +{"current_steps": 77135, "total_steps": 204665, "loss": 0.1465, "lr": 1.5681953862298043e-06, "epoch": 1.8844208829062126, "percentage": 37.69, "elapsed_time": "1:39:43", "remaining_time": "2:44:53", "throughput": 8690.18, "total_tokens": 51999712} +{"current_steps": 77140, "total_steps": 204665, "loss": 0.036, "lr": 1.5681252097664875e-06, "epoch": 1.8845430337380598, "percentage": 37.69, "elapsed_time": "1:39:44", "remaining_time": "2:44:52", "throughput": 8690.27, "total_tokens": 52003360} +{"current_steps": 77145, "total_steps": 204665, "loss": 0.0753, "lr": 1.5680550291716113e-06, "epoch": 1.884665184569907, "percentage": 37.69, "elapsed_time": "1:39:44", "remaining_time": "2:44:52", "throughput": 8690.34, "total_tokens": 52006816} +{"current_steps": 77150, "total_steps": 204665, "loss": 0.0653, "lr": 1.5679848444456862e-06, "epoch": 1.8847873354017541, "percentage": 37.7, "elapsed_time": "1:39:44", "remaining_time": "2:44:51", "throughput": 8690.41, "total_tokens": 52010208} +{"current_steps": 77155, "total_steps": 204665, "loss": 0.1015, "lr": 1.5679146555892223e-06, "epoch": 1.8849094862336013, "percentage": 37.7, "elapsed_time": "1:39:45", "remaining_time": "2:44:51", "throughput": 8690.45, "total_tokens": 52013472} +{"current_steps": 77160, "total_steps": 204665, "loss": 0.0014, "lr": 1.5678444626027308e-06, "epoch": 1.8850316370654485, "percentage": 37.7, "elapsed_time": "1:39:45", "remaining_time": "2:44:50", "throughput": 8690.49, "total_tokens": 52016736} +{"current_steps": 77165, "total_steps": 204665, "loss": 0.1694, "lr": 1.567774265486721e-06, "epoch": 1.8851537878972957, "percentage": 37.7, "elapsed_time": "1:39:45", "remaining_time": "2:44:50", "throughput": 8690.62, "total_tokens": 52020640} +{"current_steps": 77170, "total_steps": 204665, "loss": 0.0343, "lr": 1.5677040642417048e-06, "epoch": 1.885275938729143, "percentage": 37.71, "elapsed_time": "1:39:46", "remaining_time": "2:44:49", "throughput": 8690.68, "total_tokens": 52024032} +{"current_steps": 77175, "total_steps": 204665, "loss": 0.1471, "lr": 1.5676338588681914e-06, "epoch": 1.8853980895609899, "percentage": 37.71, "elapsed_time": "1:39:46", "remaining_time": "2:44:49", "throughput": 8690.72, "total_tokens": 52027296} +{"current_steps": 77180, "total_steps": 204665, "loss": 0.1674, "lr": 1.567563649366692e-06, "epoch": 1.885520240392837, "percentage": 37.71, "elapsed_time": "1:39:46", "remaining_time": "2:44:49", "throughput": 8690.76, "total_tokens": 52030496} +{"current_steps": 77185, "total_steps": 204665, "loss": 0.1197, "lr": 1.5674934357377168e-06, "epoch": 1.8856423912246842, "percentage": 37.71, "elapsed_time": "1:39:47", "remaining_time": "2:44:48", "throughput": 8690.78, "total_tokens": 52033568} +{"current_steps": 77190, "total_steps": 204665, "loss": 0.0036, "lr": 1.5674232179817773e-06, "epoch": 1.8857645420565314, "percentage": 37.72, "elapsed_time": "1:39:47", "remaining_time": "2:44:48", "throughput": 8690.89, "total_tokens": 52037344} +{"current_steps": 77195, "total_steps": 204665, "loss": 0.1402, "lr": 1.5673529960993832e-06, "epoch": 1.8858866928883786, "percentage": 37.72, "elapsed_time": "1:39:47", "remaining_time": "2:44:47", "throughput": 8691.01, "total_tokens": 52041120} +{"current_steps": 77200, "total_steps": 204665, "loss": 0.0463, "lr": 1.5672827700910456e-06, "epoch": 1.8860088437202256, "percentage": 37.72, "elapsed_time": "1:39:48", "remaining_time": "2:44:47", "throughput": 8691.03, "total_tokens": 52044192} +{"current_steps": 77205, "total_steps": 204665, "loss": 0.071, "lr": 1.5672125399572748e-06, "epoch": 1.8861309945520728, "percentage": 37.72, "elapsed_time": "1:39:48", "remaining_time": "2:44:46", "throughput": 8691.12, "total_tokens": 52047840} +{"current_steps": 77210, "total_steps": 204665, "loss": 0.0378, "lr": 1.5671423056985824e-06, "epoch": 1.88625314538392, "percentage": 37.73, "elapsed_time": "1:39:48", "remaining_time": "2:44:46", "throughput": 8691.19, "total_tokens": 52051296} +{"current_steps": 77215, "total_steps": 204665, "loss": 0.0576, "lr": 1.5670720673154783e-06, "epoch": 1.8863752962157672, "percentage": 37.73, "elapsed_time": "1:39:49", "remaining_time": "2:44:45", "throughput": 8691.36, "total_tokens": 52055456} +{"current_steps": 77220, "total_steps": 204665, "loss": 0.0263, "lr": 1.5670018248084735e-06, "epoch": 1.8864974470476144, "percentage": 37.73, "elapsed_time": "1:39:49", "remaining_time": "2:44:45", "throughput": 8691.39, "total_tokens": 52058592} +{"current_steps": 77225, "total_steps": 204665, "loss": 0.1117, "lr": 1.566931578178079e-06, "epoch": 1.8866195978794615, "percentage": 37.73, "elapsed_time": "1:39:50", "remaining_time": "2:44:44", "throughput": 8691.43, "total_tokens": 52061856} +{"current_steps": 77230, "total_steps": 204665, "loss": 0.0423, "lr": 1.5668613274248056e-06, "epoch": 1.8867417487113087, "percentage": 37.73, "elapsed_time": "1:39:50", "remaining_time": "2:44:44", "throughput": 8691.52, "total_tokens": 52065504} +{"current_steps": 77235, "total_steps": 204665, "loss": 0.2192, "lr": 1.5667910725491645e-06, "epoch": 1.886863899543156, "percentage": 37.74, "elapsed_time": "1:39:50", "remaining_time": "2:44:44", "throughput": 8691.6, "total_tokens": 52069024} +{"current_steps": 77240, "total_steps": 204665, "loss": 0.0861, "lr": 1.5667208135516658e-06, "epoch": 1.886986050375003, "percentage": 37.74, "elapsed_time": "1:39:51", "remaining_time": "2:44:43", "throughput": 8691.79, "total_tokens": 52073376} +{"current_steps": 77245, "total_steps": 204665, "loss": 0.0812, "lr": 1.566650550432821e-06, "epoch": 1.8871082012068503, "percentage": 37.74, "elapsed_time": "1:39:51", "remaining_time": "2:44:43", "throughput": 8691.78, "total_tokens": 52076256} +{"current_steps": 77250, "total_steps": 204665, "loss": 0.1082, "lr": 1.5665802831931412e-06, "epoch": 1.8872303520386975, "percentage": 37.74, "elapsed_time": "1:39:51", "remaining_time": "2:44:42", "throughput": 8691.82, "total_tokens": 52079456} +{"current_steps": 77255, "total_steps": 204665, "loss": 0.0032, "lr": 1.5665100118331371e-06, "epoch": 1.8873525028705447, "percentage": 37.75, "elapsed_time": "1:39:52", "remaining_time": "2:44:42", "throughput": 8691.84, "total_tokens": 52082528} +{"current_steps": 77260, "total_steps": 204665, "loss": 0.0648, "lr": 1.5664397363533198e-06, "epoch": 1.8874746537023919, "percentage": 37.75, "elapsed_time": "1:39:52", "remaining_time": "2:44:41", "throughput": 8691.88, "total_tokens": 52085792} +{"current_steps": 77265, "total_steps": 204665, "loss": 0.167, "lr": 1.5663694567542004e-06, "epoch": 1.8875968045342388, "percentage": 37.75, "elapsed_time": "1:39:52", "remaining_time": "2:44:41", "throughput": 8691.94, "total_tokens": 52089184} +{"current_steps": 77270, "total_steps": 204665, "loss": 0.0372, "lr": 1.5662991730362899e-06, "epoch": 1.887718955366086, "percentage": 37.75, "elapsed_time": "1:39:53", "remaining_time": "2:44:40", "throughput": 8691.99, "total_tokens": 52092448} +{"current_steps": 77275, "total_steps": 204665, "loss": 0.0688, "lr": 1.5662288852000995e-06, "epoch": 1.8878411061979332, "percentage": 37.76, "elapsed_time": "1:39:53", "remaining_time": "2:44:40", "throughput": 8692.08, "total_tokens": 52096096} +{"current_steps": 77280, "total_steps": 204665, "loss": 0.0309, "lr": 1.5661585932461403e-06, "epoch": 1.8879632570297804, "percentage": 37.76, "elapsed_time": "1:39:53", "remaining_time": "2:44:40", "throughput": 8692.14, "total_tokens": 52099488} +{"current_steps": 77285, "total_steps": 204665, "loss": 0.0414, "lr": 1.5660882971749237e-06, "epoch": 1.8880854078616274, "percentage": 37.76, "elapsed_time": "1:39:54", "remaining_time": "2:44:39", "throughput": 8692.22, "total_tokens": 52103008} +{"current_steps": 77290, "total_steps": 204665, "loss": 0.0011, "lr": 1.5660179969869604e-06, "epoch": 1.8882075586934746, "percentage": 37.76, "elapsed_time": "1:39:54", "remaining_time": "2:44:39", "throughput": 8692.29, "total_tokens": 52106464} +{"current_steps": 77295, "total_steps": 204665, "loss": 0.0005, "lr": 1.5659476926827625e-06, "epoch": 1.8883297095253218, "percentage": 37.77, "elapsed_time": "1:39:54", "remaining_time": "2:44:38", "throughput": 8692.35, "total_tokens": 52109856} +{"current_steps": 77300, "total_steps": 204665, "loss": 0.1511, "lr": 1.5658773842628405e-06, "epoch": 1.888451860357169, "percentage": 37.77, "elapsed_time": "1:39:55", "remaining_time": "2:44:38", "throughput": 8692.38, "total_tokens": 52113056} +{"current_steps": 77305, "total_steps": 204665, "loss": 0.0981, "lr": 1.565807071727706e-06, "epoch": 1.8885740111890161, "percentage": 37.77, "elapsed_time": "1:39:55", "remaining_time": "2:44:37", "throughput": 8692.46, "total_tokens": 52116512} +{"current_steps": 77310, "total_steps": 204665, "loss": 0.1299, "lr": 1.5657367550778702e-06, "epoch": 1.8886961620208633, "percentage": 37.77, "elapsed_time": "1:39:55", "remaining_time": "2:44:37", "throughput": 8692.52, "total_tokens": 52119968} +{"current_steps": 77315, "total_steps": 204665, "loss": 0.1116, "lr": 1.5656664343138447e-06, "epoch": 1.8888183128527105, "percentage": 37.78, "elapsed_time": "1:39:56", "remaining_time": "2:44:36", "throughput": 8692.57, "total_tokens": 52123232} +{"current_steps": 77320, "total_steps": 204665, "loss": 0.0592, "lr": 1.5655961094361403e-06, "epoch": 1.8889404636845577, "percentage": 37.78, "elapsed_time": "1:39:56", "remaining_time": "2:44:36", "throughput": 8692.61, "total_tokens": 52126496} +{"current_steps": 77325, "total_steps": 204665, "loss": 0.1957, "lr": 1.5655257804452696e-06, "epoch": 1.889062614516405, "percentage": 37.78, "elapsed_time": "1:39:56", "remaining_time": "2:44:35", "throughput": 8692.68, "total_tokens": 52129952} +{"current_steps": 77330, "total_steps": 204665, "loss": 0.1567, "lr": 1.5654554473417428e-06, "epoch": 1.889184765348252, "percentage": 37.78, "elapsed_time": "1:39:57", "remaining_time": "2:44:35", "throughput": 8692.74, "total_tokens": 52133344} +{"current_steps": 77335, "total_steps": 204665, "loss": 0.0769, "lr": 1.565385110126072e-06, "epoch": 1.8893069161800993, "percentage": 37.79, "elapsed_time": "1:39:57", "remaining_time": "2:44:35", "throughput": 8692.77, "total_tokens": 52136480} +{"current_steps": 77340, "total_steps": 204665, "loss": 0.0255, "lr": 1.5653147687987684e-06, "epoch": 1.8894290670119465, "percentage": 37.79, "elapsed_time": "1:39:58", "remaining_time": "2:44:34", "throughput": 8692.79, "total_tokens": 52139616} +{"current_steps": 77345, "total_steps": 204665, "loss": 0.0013, "lr": 1.565244423360344e-06, "epoch": 1.8895512178437937, "percentage": 37.79, "elapsed_time": "1:39:58", "remaining_time": "2:44:34", "throughput": 8692.82, "total_tokens": 52142816} +{"current_steps": 77350, "total_steps": 204665, "loss": 0.0546, "lr": 1.5651740738113101e-06, "epoch": 1.8896733686756408, "percentage": 37.79, "elapsed_time": "1:39:58", "remaining_time": "2:44:33", "throughput": 8692.83, "total_tokens": 52145760} +{"current_steps": 77355, "total_steps": 204665, "loss": 0.0848, "lr": 1.5651037201521784e-06, "epoch": 1.8897955195074878, "percentage": 37.8, "elapsed_time": "1:39:59", "remaining_time": "2:44:33", "throughput": 8692.87, "total_tokens": 52149024} +{"current_steps": 77360, "total_steps": 204665, "loss": 0.0013, "lr": 1.5650333623834607e-06, "epoch": 1.889917670339335, "percentage": 37.8, "elapsed_time": "1:39:59", "remaining_time": "2:44:32", "throughput": 8693.0, "total_tokens": 52152992} +{"current_steps": 77365, "total_steps": 204665, "loss": 0.1025, "lr": 1.564963000505668e-06, "epoch": 1.8900398211711822, "percentage": 37.8, "elapsed_time": "1:39:59", "remaining_time": "2:44:32", "throughput": 8693.04, "total_tokens": 52156256} +{"current_steps": 77370, "total_steps": 204665, "loss": 0.0752, "lr": 1.5648926345193123e-06, "epoch": 1.8901619720030294, "percentage": 37.8, "elapsed_time": "1:40:00", "remaining_time": "2:44:31", "throughput": 8693.11, "total_tokens": 52159712} +{"current_steps": 77375, "total_steps": 204665, "loss": 0.1883, "lr": 1.564822264424906e-06, "epoch": 1.8902841228348763, "percentage": 37.81, "elapsed_time": "1:40:00", "remaining_time": "2:44:31", "throughput": 8693.11, "total_tokens": 52162592} +{"current_steps": 77380, "total_steps": 204665, "loss": 0.0896, "lr": 1.5647518902229594e-06, "epoch": 1.8904062736667235, "percentage": 37.81, "elapsed_time": "1:40:00", "remaining_time": "2:44:30", "throughput": 8693.19, "total_tokens": 52166112} +{"current_steps": 77385, "total_steps": 204665, "loss": 0.0747, "lr": 1.564681511913986e-06, "epoch": 1.8905284244985707, "percentage": 37.81, "elapsed_time": "1:40:01", "remaining_time": "2:44:30", "throughput": 8693.3, "total_tokens": 52169888} +{"current_steps": 77390, "total_steps": 204665, "loss": 0.1649, "lr": 1.5646111294984963e-06, "epoch": 1.890650575330418, "percentage": 37.81, "elapsed_time": "1:40:01", "remaining_time": "2:44:30", "throughput": 8693.36, "total_tokens": 52173280} +{"current_steps": 77395, "total_steps": 204665, "loss": 0.0928, "lr": 1.5645407429770025e-06, "epoch": 1.890772726162265, "percentage": 37.82, "elapsed_time": "1:40:01", "remaining_time": "2:44:29", "throughput": 8693.44, "total_tokens": 52176800} +{"current_steps": 77400, "total_steps": 204665, "loss": 0.0702, "lr": 1.564470352350017e-06, "epoch": 1.8908948769941123, "percentage": 37.82, "elapsed_time": "1:40:02", "remaining_time": "2:44:29", "throughput": 8693.47, "total_tokens": 52180000} +{"current_steps": 77405, "total_steps": 204665, "loss": 0.0429, "lr": 1.5643999576180509e-06, "epoch": 1.8910170278259595, "percentage": 37.82, "elapsed_time": "1:40:02", "remaining_time": "2:44:28", "throughput": 8693.54, "total_tokens": 52183392} +{"current_steps": 77410, "total_steps": 204665, "loss": 0.143, "lr": 1.5643295587816167e-06, "epoch": 1.8911391786578067, "percentage": 37.82, "elapsed_time": "1:40:02", "remaining_time": "2:44:28", "throughput": 8693.59, "total_tokens": 52186720} +{"current_steps": 77415, "total_steps": 204665, "loss": 0.0799, "lr": 1.5642591558412263e-06, "epoch": 1.8912613294896539, "percentage": 37.83, "elapsed_time": "1:40:03", "remaining_time": "2:44:27", "throughput": 8693.6, "total_tokens": 52189728} +{"current_steps": 77420, "total_steps": 204665, "loss": 0.0017, "lr": 1.5641887487973914e-06, "epoch": 1.891383480321501, "percentage": 37.83, "elapsed_time": "1:40:03", "remaining_time": "2:44:27", "throughput": 8693.67, "total_tokens": 52193248} +{"current_steps": 77425, "total_steps": 204665, "loss": 0.0022, "lr": 1.564118337650624e-06, "epoch": 1.8915056311533482, "percentage": 37.83, "elapsed_time": "1:40:03", "remaining_time": "2:44:26", "throughput": 8693.72, "total_tokens": 52196512} +{"current_steps": 77430, "total_steps": 204665, "loss": 0.0443, "lr": 1.5640479224014364e-06, "epoch": 1.8916277819851954, "percentage": 37.83, "elapsed_time": "1:40:04", "remaining_time": "2:44:26", "throughput": 8693.74, "total_tokens": 52199648} +{"current_steps": 77435, "total_steps": 204665, "loss": 0.0391, "lr": 1.5639775030503409e-06, "epoch": 1.8917499328170426, "percentage": 37.83, "elapsed_time": "1:40:04", "remaining_time": "2:44:25", "throughput": 8693.82, "total_tokens": 52203104} +{"current_steps": 77440, "total_steps": 204665, "loss": 0.0855, "lr": 1.5639070795978491e-06, "epoch": 1.8918720836488898, "percentage": 37.84, "elapsed_time": "1:40:04", "remaining_time": "2:44:25", "throughput": 8693.9, "total_tokens": 52206688} +{"current_steps": 77445, "total_steps": 204665, "loss": 0.1721, "lr": 1.5638366520444732e-06, "epoch": 1.8919942344807368, "percentage": 37.84, "elapsed_time": "1:40:05", "remaining_time": "2:44:25", "throughput": 8694.08, "total_tokens": 52210976} +{"current_steps": 77450, "total_steps": 204665, "loss": 0.0824, "lr": 1.5637662203907255e-06, "epoch": 1.892116385312584, "percentage": 37.84, "elapsed_time": "1:40:05", "remaining_time": "2:44:24", "throughput": 8694.13, "total_tokens": 52214240} +{"current_steps": 77455, "total_steps": 204665, "loss": 0.0363, "lr": 1.5636957846371184e-06, "epoch": 1.8922385361444312, "percentage": 37.84, "elapsed_time": "1:40:06", "remaining_time": "2:44:24", "throughput": 8694.13, "total_tokens": 52217184} +{"current_steps": 77460, "total_steps": 204665, "loss": 0.0547, "lr": 1.563625344784164e-06, "epoch": 1.8923606869762784, "percentage": 37.85, "elapsed_time": "1:40:06", "remaining_time": "2:44:23", "throughput": 8694.17, "total_tokens": 52220384} +{"current_steps": 77465, "total_steps": 204665, "loss": 0.166, "lr": 1.5635549008323742e-06, "epoch": 1.8924828378081253, "percentage": 37.85, "elapsed_time": "1:40:06", "remaining_time": "2:44:23", "throughput": 8694.21, "total_tokens": 52223584} +{"current_steps": 77470, "total_steps": 204665, "loss": 0.0522, "lr": 1.5634844527822617e-06, "epoch": 1.8926049886399725, "percentage": 37.85, "elapsed_time": "1:40:07", "remaining_time": "2:44:22", "throughput": 8694.22, "total_tokens": 52226592} +{"current_steps": 77475, "total_steps": 204665, "loss": 0.0969, "lr": 1.563414000634339e-06, "epoch": 1.8927271394718197, "percentage": 37.85, "elapsed_time": "1:40:07", "remaining_time": "2:44:22", "throughput": 8694.24, "total_tokens": 52229664} +{"current_steps": 77480, "total_steps": 204665, "loss": 0.0367, "lr": 1.563343544389118e-06, "epoch": 1.8928492903036669, "percentage": 37.86, "elapsed_time": "1:40:07", "remaining_time": "2:44:21", "throughput": 8694.27, "total_tokens": 52232800} +{"current_steps": 77485, "total_steps": 204665, "loss": 0.0511, "lr": 1.563273084047111e-06, "epoch": 1.892971441135514, "percentage": 37.86, "elapsed_time": "1:40:08", "remaining_time": "2:44:21", "throughput": 8694.34, "total_tokens": 52236320} +{"current_steps": 77490, "total_steps": 204665, "loss": 0.0613, "lr": 1.5632026196088308e-06, "epoch": 1.8930935919673613, "percentage": 37.86, "elapsed_time": "1:40:08", "remaining_time": "2:44:20", "throughput": 8694.45, "total_tokens": 52240032} +{"current_steps": 77495, "total_steps": 204665, "loss": 0.0434, "lr": 1.5631321510747894e-06, "epoch": 1.8932157427992085, "percentage": 37.86, "elapsed_time": "1:40:08", "remaining_time": "2:44:20", "throughput": 8694.51, "total_tokens": 52243424} +{"current_steps": 77500, "total_steps": 204665, "loss": 0.1223, "lr": 1.5630616784455e-06, "epoch": 1.8933378936310556, "percentage": 37.87, "elapsed_time": "1:40:09", "remaining_time": "2:44:20", "throughput": 8694.58, "total_tokens": 52246880} +{"current_steps": 77505, "total_steps": 204665, "loss": 0.0921, "lr": 1.5629912017214744e-06, "epoch": 1.8934600444629028, "percentage": 37.87, "elapsed_time": "1:40:09", "remaining_time": "2:44:19", "throughput": 8694.65, "total_tokens": 52250336} +{"current_steps": 77510, "total_steps": 204665, "loss": 0.1228, "lr": 1.5629207209032252e-06, "epoch": 1.89358219529475, "percentage": 37.87, "elapsed_time": "1:40:09", "remaining_time": "2:44:19", "throughput": 8694.71, "total_tokens": 52253792} +{"current_steps": 77515, "total_steps": 204665, "loss": 0.164, "lr": 1.5628502359912652e-06, "epoch": 1.8937043461265972, "percentage": 37.87, "elapsed_time": "1:40:10", "remaining_time": "2:44:18", "throughput": 8694.77, "total_tokens": 52257184} +{"current_steps": 77520, "total_steps": 204665, "loss": 0.1079, "lr": 1.562779746986107e-06, "epoch": 1.8938264969584444, "percentage": 37.88, "elapsed_time": "1:40:10", "remaining_time": "2:44:18", "throughput": 8694.84, "total_tokens": 52260576} +{"current_steps": 77525, "total_steps": 204665, "loss": 0.0671, "lr": 1.5627092538882632e-06, "epoch": 1.8939486477902916, "percentage": 37.88, "elapsed_time": "1:40:10", "remaining_time": "2:44:17", "throughput": 8694.95, "total_tokens": 52264352} +{"current_steps": 77530, "total_steps": 204665, "loss": 0.0792, "lr": 1.562638756698246e-06, "epoch": 1.8940707986221386, "percentage": 37.88, "elapsed_time": "1:40:11", "remaining_time": "2:44:17", "throughput": 8695.02, "total_tokens": 52267808} +{"current_steps": 77535, "total_steps": 204665, "loss": 0.1215, "lr": 1.562568255416569e-06, "epoch": 1.8941929494539858, "percentage": 37.88, "elapsed_time": "1:40:11", "remaining_time": "2:44:16", "throughput": 8695.12, "total_tokens": 52271520} +{"current_steps": 77540, "total_steps": 204665, "loss": 0.0386, "lr": 1.5624977500437437e-06, "epoch": 1.894315100285833, "percentage": 37.89, "elapsed_time": "1:40:11", "remaining_time": "2:44:16", "throughput": 8695.15, "total_tokens": 52274656} +{"current_steps": 77545, "total_steps": 204665, "loss": 0.0021, "lr": 1.5624272405802838e-06, "epoch": 1.8944372511176801, "percentage": 37.89, "elapsed_time": "1:40:12", "remaining_time": "2:44:15", "throughput": 8695.18, "total_tokens": 52277792} +{"current_steps": 77550, "total_steps": 204665, "loss": 0.0528, "lr": 1.5623567270267018e-06, "epoch": 1.8945594019495273, "percentage": 37.89, "elapsed_time": "1:40:12", "remaining_time": "2:44:15", "throughput": 8695.23, "total_tokens": 52281120} +{"current_steps": 77555, "total_steps": 204665, "loss": 0.026, "lr": 1.5622862093835102e-06, "epoch": 1.8946815527813743, "percentage": 37.89, "elapsed_time": "1:40:12", "remaining_time": "2:44:15", "throughput": 8695.28, "total_tokens": 52284448} +{"current_steps": 77560, "total_steps": 204665, "loss": 0.0388, "lr": 1.5622156876512223e-06, "epoch": 1.8948037036132215, "percentage": 37.9, "elapsed_time": "1:40:13", "remaining_time": "2:44:14", "throughput": 8695.29, "total_tokens": 52287456} +{"current_steps": 77565, "total_steps": 204665, "loss": 0.0014, "lr": 1.5621451618303505e-06, "epoch": 1.8949258544450687, "percentage": 37.9, "elapsed_time": "1:40:13", "remaining_time": "2:44:14", "throughput": 8695.32, "total_tokens": 52290592} +{"current_steps": 77570, "total_steps": 204665, "loss": 0.1395, "lr": 1.5620746319214078e-06, "epoch": 1.8950480052769159, "percentage": 37.9, "elapsed_time": "1:40:14", "remaining_time": "2:44:13", "throughput": 8695.39, "total_tokens": 52294112} +{"current_steps": 77575, "total_steps": 204665, "loss": 0.0677, "lr": 1.5620040979249074e-06, "epoch": 1.895170156108763, "percentage": 37.9, "elapsed_time": "1:40:14", "remaining_time": "2:44:13", "throughput": 8695.41, "total_tokens": 52297184} +{"current_steps": 77580, "total_steps": 204665, "loss": 0.0555, "lr": 1.561933559841362e-06, "epoch": 1.8952923069406102, "percentage": 37.91, "elapsed_time": "1:40:14", "remaining_time": "2:44:12", "throughput": 8695.55, "total_tokens": 52301152} +{"current_steps": 77585, "total_steps": 204665, "loss": 0.231, "lr": 1.5618630176712846e-06, "epoch": 1.8954144577724574, "percentage": 37.91, "elapsed_time": "1:40:15", "remaining_time": "2:44:12", "throughput": 8695.59, "total_tokens": 52304416} +{"current_steps": 77590, "total_steps": 204665, "loss": 0.1493, "lr": 1.561792471415188e-06, "epoch": 1.8955366086043046, "percentage": 37.91, "elapsed_time": "1:40:15", "remaining_time": "2:44:11", "throughput": 8695.68, "total_tokens": 52308000} +{"current_steps": 77595, "total_steps": 204665, "loss": 0.1698, "lr": 1.5617219210735858e-06, "epoch": 1.8956587594361518, "percentage": 37.91, "elapsed_time": "1:40:15", "remaining_time": "2:44:11", "throughput": 8695.71, "total_tokens": 52311200} +{"current_steps": 77600, "total_steps": 204665, "loss": 0.0042, "lr": 1.5616513666469904e-06, "epoch": 1.895780910267999, "percentage": 37.92, "elapsed_time": "1:40:16", "remaining_time": "2:44:10", "throughput": 8695.82, "total_tokens": 52314976} +{"current_steps": 77605, "total_steps": 204665, "loss": 0.1115, "lr": 1.5615808081359154e-06, "epoch": 1.8959030610998462, "percentage": 37.92, "elapsed_time": "1:40:16", "remaining_time": "2:44:10", "throughput": 8695.81, "total_tokens": 52317856} +{"current_steps": 77610, "total_steps": 204665, "loss": 0.0836, "lr": 1.5615102455408735e-06, "epoch": 1.8960252119316934, "percentage": 37.92, "elapsed_time": "1:40:16", "remaining_time": "2:44:10", "throughput": 8695.85, "total_tokens": 52321056} +{"current_steps": 77615, "total_steps": 204665, "loss": 0.0015, "lr": 1.5614396788623786e-06, "epoch": 1.8961473627635406, "percentage": 37.92, "elapsed_time": "1:40:17", "remaining_time": "2:44:09", "throughput": 8695.89, "total_tokens": 52324256} +{"current_steps": 77620, "total_steps": 204665, "loss": 0.0791, "lr": 1.5613691081009428e-06, "epoch": 1.8962695135953875, "percentage": 37.93, "elapsed_time": "1:40:17", "remaining_time": "2:44:09", "throughput": 8695.91, "total_tokens": 52327328} +{"current_steps": 77625, "total_steps": 204665, "loss": 0.0862, "lr": 1.56129853325708e-06, "epoch": 1.8963916644272347, "percentage": 37.93, "elapsed_time": "1:40:17", "remaining_time": "2:44:08", "throughput": 8695.94, "total_tokens": 52330528} +{"current_steps": 77630, "total_steps": 204665, "loss": 0.0818, "lr": 1.5612279543313033e-06, "epoch": 1.896513815259082, "percentage": 37.93, "elapsed_time": "1:40:18", "remaining_time": "2:44:08", "throughput": 8696.12, "total_tokens": 52334880} +{"current_steps": 77635, "total_steps": 204665, "loss": 0.0512, "lr": 1.561157371324126e-06, "epoch": 1.896635966090929, "percentage": 37.93, "elapsed_time": "1:40:18", "remaining_time": "2:44:07", "throughput": 8696.25, "total_tokens": 52338720} +{"current_steps": 77640, "total_steps": 204665, "loss": 0.0703, "lr": 1.5610867842360614e-06, "epoch": 1.8967581169227763, "percentage": 37.94, "elapsed_time": "1:40:18", "remaining_time": "2:44:07", "throughput": 8696.3, "total_tokens": 52341984} +{"current_steps": 77645, "total_steps": 204665, "loss": 0.064, "lr": 1.5610161930676226e-06, "epoch": 1.8968802677546233, "percentage": 37.94, "elapsed_time": "1:40:19", "remaining_time": "2:44:06", "throughput": 8696.3, "total_tokens": 52344928} +{"current_steps": 77650, "total_steps": 204665, "loss": 0.0177, "lr": 1.5609455978193232e-06, "epoch": 1.8970024185864705, "percentage": 37.94, "elapsed_time": "1:40:19", "remaining_time": "2:44:06", "throughput": 8696.37, "total_tokens": 52348448} +{"current_steps": 77655, "total_steps": 204665, "loss": 0.071, "lr": 1.5608749984916767e-06, "epoch": 1.8971245694183176, "percentage": 37.94, "elapsed_time": "1:40:19", "remaining_time": "2:44:05", "throughput": 8696.41, "total_tokens": 52351648} +{"current_steps": 77660, "total_steps": 204665, "loss": 0.0611, "lr": 1.5608043950851964e-06, "epoch": 1.8972467202501648, "percentage": 37.94, "elapsed_time": "1:40:20", "remaining_time": "2:44:05", "throughput": 8696.45, "total_tokens": 52354912} +{"current_steps": 77665, "total_steps": 204665, "loss": 0.0366, "lr": 1.5607337876003954e-06, "epoch": 1.897368871082012, "percentage": 37.95, "elapsed_time": "1:40:20", "remaining_time": "2:44:05", "throughput": 8696.49, "total_tokens": 52358112} +{"current_steps": 77670, "total_steps": 204665, "loss": 0.0577, "lr": 1.5606631760377878e-06, "epoch": 1.8974910219138592, "percentage": 37.95, "elapsed_time": "1:40:20", "remaining_time": "2:44:04", "throughput": 8696.47, "total_tokens": 52360928} +{"current_steps": 77675, "total_steps": 204665, "loss": 0.044, "lr": 1.5605925603978866e-06, "epoch": 1.8976131727457064, "percentage": 37.95, "elapsed_time": "1:40:21", "remaining_time": "2:44:04", "throughput": 8696.53, "total_tokens": 52364256} +{"current_steps": 77680, "total_steps": 204665, "loss": 0.1288, "lr": 1.5605219406812054e-06, "epoch": 1.8977353235775536, "percentage": 37.95, "elapsed_time": "1:40:21", "remaining_time": "2:44:03", "throughput": 8696.55, "total_tokens": 52367392} +{"current_steps": 77685, "total_steps": 204665, "loss": 0.0254, "lr": 1.5604513168882582e-06, "epoch": 1.8978574744094008, "percentage": 37.96, "elapsed_time": "1:40:21", "remaining_time": "2:44:03", "throughput": 8696.64, "total_tokens": 52370976} +{"current_steps": 77690, "total_steps": 204665, "loss": 0.1965, "lr": 1.560380689019558e-06, "epoch": 1.897979625241248, "percentage": 37.96, "elapsed_time": "1:40:22", "remaining_time": "2:44:02", "throughput": 8696.68, "total_tokens": 52374240} +{"current_steps": 77695, "total_steps": 204665, "loss": 0.0589, "lr": 1.5603100570756192e-06, "epoch": 1.8981017760730952, "percentage": 37.96, "elapsed_time": "1:40:22", "remaining_time": "2:44:02", "throughput": 8696.69, "total_tokens": 52377248} +{"current_steps": 77700, "total_steps": 204665, "loss": 0.1434, "lr": 1.5602394210569544e-06, "epoch": 1.8982239269049423, "percentage": 37.96, "elapsed_time": "1:40:23", "remaining_time": "2:44:01", "throughput": 8696.71, "total_tokens": 52380320} +{"current_steps": 77705, "total_steps": 204665, "loss": 0.0915, "lr": 1.560168780964078e-06, "epoch": 1.8983460777367895, "percentage": 37.97, "elapsed_time": "1:40:23", "remaining_time": "2:44:01", "throughput": 8696.79, "total_tokens": 52383904} +{"current_steps": 77710, "total_steps": 204665, "loss": 0.0484, "lr": 1.5600981367975037e-06, "epoch": 1.8984682285686365, "percentage": 37.97, "elapsed_time": "1:40:23", "remaining_time": "2:44:00", "throughput": 8696.84, "total_tokens": 52387168} +{"current_steps": 77715, "total_steps": 204665, "loss": 0.0511, "lr": 1.5600274885577446e-06, "epoch": 1.8985903794004837, "percentage": 37.97, "elapsed_time": "1:40:24", "remaining_time": "2:44:00", "throughput": 8696.82, "total_tokens": 52389984} +{"current_steps": 77720, "total_steps": 204665, "loss": 0.0516, "lr": 1.5599568362453158e-06, "epoch": 1.8987125302323309, "percentage": 37.97, "elapsed_time": "1:40:24", "remaining_time": "2:43:59", "throughput": 8696.84, "total_tokens": 52393056} +{"current_steps": 77725, "total_steps": 204665, "loss": 0.0975, "lr": 1.5598861798607297e-06, "epoch": 1.898834681064178, "percentage": 37.98, "elapsed_time": "1:40:24", "remaining_time": "2:43:59", "throughput": 8696.91, "total_tokens": 52396512} +{"current_steps": 77730, "total_steps": 204665, "loss": 0.1994, "lr": 1.5598155194045007e-06, "epoch": 1.8989568318960253, "percentage": 37.98, "elapsed_time": "1:40:25", "remaining_time": "2:43:59", "throughput": 8696.97, "total_tokens": 52399840} +{"current_steps": 77735, "total_steps": 204665, "loss": 0.0337, "lr": 1.559744854877143e-06, "epoch": 1.8990789827278722, "percentage": 37.98, "elapsed_time": "1:40:25", "remaining_time": "2:43:58", "throughput": 8697.05, "total_tokens": 52403360} +{"current_steps": 77740, "total_steps": 204665, "loss": 0.1266, "lr": 1.55967418627917e-06, "epoch": 1.8992011335597194, "percentage": 37.98, "elapsed_time": "1:40:25", "remaining_time": "2:43:58", "throughput": 8697.16, "total_tokens": 52407136} +{"current_steps": 77745, "total_steps": 204665, "loss": 0.1016, "lr": 1.5596035136110957e-06, "epoch": 1.8993232843915666, "percentage": 37.99, "elapsed_time": "1:40:26", "remaining_time": "2:43:57", "throughput": 8697.21, "total_tokens": 52410464} +{"current_steps": 77750, "total_steps": 204665, "loss": 0.0045, "lr": 1.559532836873434e-06, "epoch": 1.8994454352234138, "percentage": 37.99, "elapsed_time": "1:40:26", "remaining_time": "2:43:57", "throughput": 8697.25, "total_tokens": 52413728} +{"current_steps": 77755, "total_steps": 204665, "loss": 0.004, "lr": 1.5594621560666994e-06, "epoch": 1.899567586055261, "percentage": 37.99, "elapsed_time": "1:40:26", "remaining_time": "2:43:56", "throughput": 8697.27, "total_tokens": 52416800} +{"current_steps": 77760, "total_steps": 204665, "loss": 0.09, "lr": 1.5593914711914054e-06, "epoch": 1.8996897368871082, "percentage": 37.99, "elapsed_time": "1:40:27", "remaining_time": "2:43:56", "throughput": 8697.31, "total_tokens": 52420064} +{"current_steps": 77765, "total_steps": 204665, "loss": 0.0992, "lr": 1.5593207822480661e-06, "epoch": 1.8998118877189554, "percentage": 38.0, "elapsed_time": "1:40:27", "remaining_time": "2:43:55", "throughput": 8697.41, "total_tokens": 52423776} +{"current_steps": 77770, "total_steps": 204665, "loss": 0.0913, "lr": 1.5592500892371958e-06, "epoch": 1.8999340385508026, "percentage": 38.0, "elapsed_time": "1:40:27", "remaining_time": "2:43:55", "throughput": 8697.46, "total_tokens": 52427104} +{"current_steps": 77775, "total_steps": 204665, "loss": 0.0226, "lr": 1.5591793921593079e-06, "epoch": 1.9000561893826498, "percentage": 38.0, "elapsed_time": "1:40:28", "remaining_time": "2:43:55", "throughput": 8697.46, "total_tokens": 52430048} +{"current_steps": 77780, "total_steps": 204665, "loss": 0.144, "lr": 1.5591086910149174e-06, "epoch": 1.900178340214497, "percentage": 38.0, "elapsed_time": "1:40:28", "remaining_time": "2:43:54", "throughput": 8697.55, "total_tokens": 52433632} +{"current_steps": 77785, "total_steps": 204665, "loss": 0.0424, "lr": 1.5590379858045384e-06, "epoch": 1.9003004910463441, "percentage": 38.01, "elapsed_time": "1:40:28", "remaining_time": "2:43:54", "throughput": 8697.6, "total_tokens": 52436896} +{"current_steps": 77790, "total_steps": 204665, "loss": 0.1, "lr": 1.5589672765286846e-06, "epoch": 1.9004226418781913, "percentage": 38.01, "elapsed_time": "1:40:29", "remaining_time": "2:43:53", "throughput": 8697.63, "total_tokens": 52440096} +{"current_steps": 77795, "total_steps": 204665, "loss": 0.0882, "lr": 1.5588965631878704e-06, "epoch": 1.9005447927100385, "percentage": 38.01, "elapsed_time": "1:40:29", "remaining_time": "2:43:53", "throughput": 8697.7, "total_tokens": 52443552} +{"current_steps": 77800, "total_steps": 204665, "loss": 0.001, "lr": 1.5588258457826098e-06, "epoch": 1.9006669435418855, "percentage": 38.01, "elapsed_time": "1:40:29", "remaining_time": "2:43:52", "throughput": 8697.83, "total_tokens": 52447456} +{"current_steps": 77805, "total_steps": 204665, "loss": 0.0901, "lr": 1.5587551243134173e-06, "epoch": 1.9007890943737327, "percentage": 38.02, "elapsed_time": "1:40:30", "remaining_time": "2:43:52", "throughput": 8697.91, "total_tokens": 52451040} +{"current_steps": 77810, "total_steps": 204665, "loss": 0.0213, "lr": 1.5586843987808078e-06, "epoch": 1.9009112452055799, "percentage": 38.02, "elapsed_time": "1:40:30", "remaining_time": "2:43:51", "throughput": 8697.99, "total_tokens": 52454560} +{"current_steps": 77815, "total_steps": 204665, "loss": 0.0803, "lr": 1.558613669185295e-06, "epoch": 1.901033396037427, "percentage": 38.02, "elapsed_time": "1:40:30", "remaining_time": "2:43:51", "throughput": 8698.07, "total_tokens": 52458016} +{"current_steps": 77820, "total_steps": 204665, "loss": 0.0008, "lr": 1.558542935527393e-06, "epoch": 1.901155546869274, "percentage": 38.02, "elapsed_time": "1:40:31", "remaining_time": "2:43:50", "throughput": 8698.08, "total_tokens": 52461088} +{"current_steps": 77825, "total_steps": 204665, "loss": 0.2709, "lr": 1.5584721978076167e-06, "epoch": 1.9012776977011212, "percentage": 38.03, "elapsed_time": "1:40:31", "remaining_time": "2:43:50", "throughput": 8698.24, "total_tokens": 52465184} +{"current_steps": 77830, "total_steps": 204665, "loss": 0.0012, "lr": 1.5584014560264803e-06, "epoch": 1.9013998485329684, "percentage": 38.03, "elapsed_time": "1:40:32", "remaining_time": "2:43:50", "throughput": 8698.27, "total_tokens": 52468384} +{"current_steps": 77835, "total_steps": 204665, "loss": 0.0785, "lr": 1.5583307101844984e-06, "epoch": 1.9015219993648156, "percentage": 38.03, "elapsed_time": "1:40:32", "remaining_time": "2:43:49", "throughput": 8698.29, "total_tokens": 52471456} +{"current_steps": 77840, "total_steps": 204665, "loss": 0.1133, "lr": 1.5582599602821854e-06, "epoch": 1.9016441501966628, "percentage": 38.03, "elapsed_time": "1:40:32", "remaining_time": "2:43:49", "throughput": 8698.36, "total_tokens": 52474912} +{"current_steps": 77845, "total_steps": 204665, "loss": 0.128, "lr": 1.5581892063200556e-06, "epoch": 1.90176630102851, "percentage": 38.04, "elapsed_time": "1:40:33", "remaining_time": "2:43:48", "throughput": 8698.44, "total_tokens": 52478496} +{"current_steps": 77850, "total_steps": 204665, "loss": 0.0018, "lr": 1.5581184482986242e-06, "epoch": 1.9018884518603572, "percentage": 38.04, "elapsed_time": "1:40:33", "remaining_time": "2:43:48", "throughput": 8698.54, "total_tokens": 52482144} +{"current_steps": 77855, "total_steps": 204665, "loss": 0.0545, "lr": 1.558047686218405e-06, "epoch": 1.9020106026922043, "percentage": 38.04, "elapsed_time": "1:40:33", "remaining_time": "2:43:47", "throughput": 8698.61, "total_tokens": 52485664} +{"current_steps": 77860, "total_steps": 204665, "loss": 0.0012, "lr": 1.5579769200799132e-06, "epoch": 1.9021327535240515, "percentage": 38.04, "elapsed_time": "1:40:34", "remaining_time": "2:43:47", "throughput": 8698.68, "total_tokens": 52489120} +{"current_steps": 77865, "total_steps": 204665, "loss": 0.0627, "lr": 1.557906149883663e-06, "epoch": 1.9022549043558987, "percentage": 38.05, "elapsed_time": "1:40:34", "remaining_time": "2:43:46", "throughput": 8698.7, "total_tokens": 52492192} +{"current_steps": 77870, "total_steps": 204665, "loss": 0.0922, "lr": 1.557835375630169e-06, "epoch": 1.902377055187746, "percentage": 38.05, "elapsed_time": "1:40:34", "remaining_time": "2:43:46", "throughput": 8698.79, "total_tokens": 52495840} +{"current_steps": 77875, "total_steps": 204665, "loss": 0.0438, "lr": 1.5577645973199465e-06, "epoch": 1.902499206019593, "percentage": 38.05, "elapsed_time": "1:40:35", "remaining_time": "2:43:46", "throughput": 8698.8, "total_tokens": 52498784} +{"current_steps": 77880, "total_steps": 204665, "loss": 0.1144, "lr": 1.5576938149535096e-06, "epoch": 1.9026213568514403, "percentage": 38.05, "elapsed_time": "1:40:35", "remaining_time": "2:43:45", "throughput": 8698.86, "total_tokens": 52502176} +{"current_steps": 77885, "total_steps": 204665, "loss": 0.0658, "lr": 1.5576230285313732e-06, "epoch": 1.9027435076832875, "percentage": 38.05, "elapsed_time": "1:40:35", "remaining_time": "2:43:45", "throughput": 8698.94, "total_tokens": 52505696} +{"current_steps": 77890, "total_steps": 204665, "loss": 0.0224, "lr": 1.5575522380540522e-06, "epoch": 1.9028656585151345, "percentage": 38.06, "elapsed_time": "1:40:36", "remaining_time": "2:43:44", "throughput": 8698.97, "total_tokens": 52508832} +{"current_steps": 77895, "total_steps": 204665, "loss": 0.037, "lr": 1.5574814435220616e-06, "epoch": 1.9029878093469816, "percentage": 38.06, "elapsed_time": "1:40:36", "remaining_time": "2:43:44", "throughput": 8698.98, "total_tokens": 52511840} +{"current_steps": 77900, "total_steps": 204665, "loss": 0.1035, "lr": 1.5574106449359157e-06, "epoch": 1.9031099601788288, "percentage": 38.06, "elapsed_time": "1:40:36", "remaining_time": "2:43:43", "throughput": 8699.02, "total_tokens": 52515104} +{"current_steps": 77905, "total_steps": 204665, "loss": 0.1224, "lr": 1.55733984229613e-06, "epoch": 1.903232111010676, "percentage": 38.06, "elapsed_time": "1:40:37", "remaining_time": "2:43:43", "throughput": 8699.12, "total_tokens": 52518752} +{"current_steps": 77910, "total_steps": 204665, "loss": 0.1126, "lr": 1.5572690356032187e-06, "epoch": 1.903354261842523, "percentage": 38.07, "elapsed_time": "1:40:37", "remaining_time": "2:43:42", "throughput": 8699.19, "total_tokens": 52522208} +{"current_steps": 77915, "total_steps": 204665, "loss": 0.0724, "lr": 1.557198224857697e-06, "epoch": 1.9034764126743702, "percentage": 38.07, "elapsed_time": "1:40:37", "remaining_time": "2:43:42", "throughput": 8699.22, "total_tokens": 52525472} +{"current_steps": 77920, "total_steps": 204665, "loss": 0.1327, "lr": 1.5571274100600805e-06, "epoch": 1.9035985635062174, "percentage": 38.07, "elapsed_time": "1:40:38", "remaining_time": "2:43:41", "throughput": 8699.29, "total_tokens": 52528928} +{"current_steps": 77925, "total_steps": 204665, "loss": 0.0025, "lr": 1.5570565912108833e-06, "epoch": 1.9037207143380646, "percentage": 38.07, "elapsed_time": "1:40:38", "remaining_time": "2:43:41", "throughput": 8699.42, "total_tokens": 52532832} +{"current_steps": 77930, "total_steps": 204665, "loss": 0.0865, "lr": 1.5569857683106205e-06, "epoch": 1.9038428651699117, "percentage": 38.08, "elapsed_time": "1:40:39", "remaining_time": "2:43:41", "throughput": 8699.48, "total_tokens": 52536224} +{"current_steps": 77935, "total_steps": 204665, "loss": 0.1001, "lr": 1.5569149413598077e-06, "epoch": 1.903965016001759, "percentage": 38.08, "elapsed_time": "1:40:39", "remaining_time": "2:43:40", "throughput": 8699.55, "total_tokens": 52539680} +{"current_steps": 77940, "total_steps": 204665, "loss": 0.0035, "lr": 1.5568441103589596e-06, "epoch": 1.9040871668336061, "percentage": 38.08, "elapsed_time": "1:40:39", "remaining_time": "2:43:40", "throughput": 8699.55, "total_tokens": 52542624} +{"current_steps": 77945, "total_steps": 204665, "loss": 0.0354, "lr": 1.5567732753085915e-06, "epoch": 1.9042093176654533, "percentage": 38.08, "elapsed_time": "1:40:40", "remaining_time": "2:43:39", "throughput": 8699.6, "total_tokens": 52545952} +{"current_steps": 77950, "total_steps": 204665, "loss": 0.0308, "lr": 1.556702436209218e-06, "epoch": 1.9043314684973005, "percentage": 38.09, "elapsed_time": "1:40:40", "remaining_time": "2:43:39", "throughput": 8699.67, "total_tokens": 52549408} +{"current_steps": 77955, "total_steps": 204665, "loss": 0.0018, "lr": 1.556631593061355e-06, "epoch": 1.9044536193291477, "percentage": 38.09, "elapsed_time": "1:40:40", "remaining_time": "2:43:38", "throughput": 8699.7, "total_tokens": 52552544} +{"current_steps": 77960, "total_steps": 204665, "loss": 0.1904, "lr": 1.556560745865517e-06, "epoch": 1.9045757701609949, "percentage": 38.09, "elapsed_time": "1:40:41", "remaining_time": "2:43:38", "throughput": 8699.77, "total_tokens": 52556000} +{"current_steps": 77965, "total_steps": 204665, "loss": 0.1821, "lr": 1.5564898946222198e-06, "epoch": 1.904697920992842, "percentage": 38.09, "elapsed_time": "1:40:41", "remaining_time": "2:43:37", "throughput": 8699.87, "total_tokens": 52559712} +{"current_steps": 77970, "total_steps": 204665, "loss": 0.0012, "lr": 1.5564190393319784e-06, "epoch": 1.9048200718246893, "percentage": 38.1, "elapsed_time": "1:40:41", "remaining_time": "2:43:37", "throughput": 8699.94, "total_tokens": 52563168} +{"current_steps": 77975, "total_steps": 204665, "loss": 0.0939, "lr": 1.5563481799953082e-06, "epoch": 1.9049422226565365, "percentage": 38.1, "elapsed_time": "1:40:42", "remaining_time": "2:43:36", "throughput": 8700.05, "total_tokens": 52566944} +{"current_steps": 77980, "total_steps": 204665, "loss": 0.1157, "lr": 1.556277316612724e-06, "epoch": 1.9050643734883834, "percentage": 38.1, "elapsed_time": "1:40:42", "remaining_time": "2:43:36", "throughput": 8700.09, "total_tokens": 52570144} +{"current_steps": 77985, "total_steps": 204665, "loss": 0.0557, "lr": 1.556206449184742e-06, "epoch": 1.9051865243202306, "percentage": 38.1, "elapsed_time": "1:40:42", "remaining_time": "2:43:36", "throughput": 8700.11, "total_tokens": 52573216} +{"current_steps": 77990, "total_steps": 204665, "loss": 0.158, "lr": 1.5561355777118768e-06, "epoch": 1.9053086751520778, "percentage": 38.11, "elapsed_time": "1:40:43", "remaining_time": "2:43:35", "throughput": 8700.08, "total_tokens": 52575904} +{"current_steps": 77995, "total_steps": 204665, "loss": 0.0497, "lr": 1.5560647021946442e-06, "epoch": 1.905430825983925, "percentage": 38.11, "elapsed_time": "1:40:43", "remaining_time": "2:43:35", "throughput": 8700.13, "total_tokens": 52579232} +{"current_steps": 78000, "total_steps": 204665, "loss": 0.0926, "lr": 1.5559938226335593e-06, "epoch": 1.905552976815772, "percentage": 38.11, "elapsed_time": "1:40:43", "remaining_time": "2:43:34", "throughput": 8700.17, "total_tokens": 52582432} +{"current_steps": 78005, "total_steps": 204665, "loss": 0.0307, "lr": 1.5559229390291382e-06, "epoch": 1.9056751276476191, "percentage": 38.11, "elapsed_time": "1:40:44", "remaining_time": "2:43:34", "throughput": 8700.22, "total_tokens": 52585696} +{"current_steps": 78010, "total_steps": 204665, "loss": 0.3789, "lr": 1.5558520513818958e-06, "epoch": 1.9057972784794663, "percentage": 38.12, "elapsed_time": "1:40:44", "remaining_time": "2:43:33", "throughput": 8700.19, "total_tokens": 52588448} +{"current_steps": 78015, "total_steps": 204665, "loss": 0.047, "lr": 1.5557811596923477e-06, "epoch": 1.9059194293113135, "percentage": 38.12, "elapsed_time": "1:40:44", "remaining_time": "2:43:33", "throughput": 8700.24, "total_tokens": 52591712} +{"current_steps": 78020, "total_steps": 204665, "loss": 0.0008, "lr": 1.5557102639610095e-06, "epoch": 1.9060415801431607, "percentage": 38.12, "elapsed_time": "1:40:45", "remaining_time": "2:43:32", "throughput": 8700.28, "total_tokens": 52594976} +{"current_steps": 78025, "total_steps": 204665, "loss": 0.3379, "lr": 1.555639364188397e-06, "epoch": 1.906163730975008, "percentage": 38.12, "elapsed_time": "1:40:45", "remaining_time": "2:43:32", "throughput": 8700.37, "total_tokens": 52598560} +{"current_steps": 78030, "total_steps": 204665, "loss": 0.0404, "lr": 1.5555684603750252e-06, "epoch": 1.906285881806855, "percentage": 38.13, "elapsed_time": "1:40:45", "remaining_time": "2:43:31", "throughput": 8700.36, "total_tokens": 52601440} +{"current_steps": 78035, "total_steps": 204665, "loss": 0.0938, "lr": 1.5554975525214104e-06, "epoch": 1.9064080326387023, "percentage": 38.13, "elapsed_time": "1:40:46", "remaining_time": "2:43:31", "throughput": 8700.4, "total_tokens": 52604640} +{"current_steps": 78040, "total_steps": 204665, "loss": 0.1132, "lr": 1.555426640628068e-06, "epoch": 1.9065301834705495, "percentage": 38.13, "elapsed_time": "1:40:46", "remaining_time": "2:43:30", "throughput": 8700.44, "total_tokens": 52607904} +{"current_steps": 78045, "total_steps": 204665, "loss": 0.0164, "lr": 1.5553557246955137e-06, "epoch": 1.9066523343023967, "percentage": 38.13, "elapsed_time": "1:40:46", "remaining_time": "2:43:30", "throughput": 8700.45, "total_tokens": 52610848} +{"current_steps": 78050, "total_steps": 204665, "loss": 0.0401, "lr": 1.555284804724263e-06, "epoch": 1.9067744851342439, "percentage": 38.14, "elapsed_time": "1:40:47", "remaining_time": "2:43:30", "throughput": 8700.51, "total_tokens": 52614304} +{"current_steps": 78055, "total_steps": 204665, "loss": 0.1338, "lr": 1.5552138807148318e-06, "epoch": 1.906896635966091, "percentage": 38.14, "elapsed_time": "1:40:47", "remaining_time": "2:43:29", "throughput": 8700.65, "total_tokens": 52618208} +{"current_steps": 78060, "total_steps": 204665, "loss": 0.0733, "lr": 1.5551429526677363e-06, "epoch": 1.9070187867979382, "percentage": 38.14, "elapsed_time": "1:40:47", "remaining_time": "2:43:29", "throughput": 8700.66, "total_tokens": 52621216} +{"current_steps": 78065, "total_steps": 204665, "loss": 0.0397, "lr": 1.5550720205834917e-06, "epoch": 1.9071409376297852, "percentage": 38.14, "elapsed_time": "1:40:48", "remaining_time": "2:43:28", "throughput": 8700.73, "total_tokens": 52624736} +{"current_steps": 78070, "total_steps": 204665, "loss": 0.154, "lr": 1.555001084462614e-06, "epoch": 1.9072630884616324, "percentage": 38.15, "elapsed_time": "1:40:48", "remaining_time": "2:43:28", "throughput": 8700.77, "total_tokens": 52627936} +{"current_steps": 78075, "total_steps": 204665, "loss": 0.1156, "lr": 1.5549301443056192e-06, "epoch": 1.9073852392934796, "percentage": 38.15, "elapsed_time": "1:40:48", "remaining_time": "2:43:27", "throughput": 8700.81, "total_tokens": 52631200} +{"current_steps": 78080, "total_steps": 204665, "loss": 0.0882, "lr": 1.5548592001130234e-06, "epoch": 1.9075073901253268, "percentage": 38.15, "elapsed_time": "1:40:49", "remaining_time": "2:43:27", "throughput": 8700.94, "total_tokens": 52635104} +{"current_steps": 78085, "total_steps": 204665, "loss": 0.2286, "lr": 1.5547882518853417e-06, "epoch": 1.907629540957174, "percentage": 38.15, "elapsed_time": "1:40:49", "remaining_time": "2:43:26", "throughput": 8700.94, "total_tokens": 52638048} +{"current_steps": 78090, "total_steps": 204665, "loss": 0.203, "lr": 1.554717299623091e-06, "epoch": 1.907751691789021, "percentage": 38.16, "elapsed_time": "1:40:50", "remaining_time": "2:43:26", "throughput": 8700.98, "total_tokens": 52641312} +{"current_steps": 78095, "total_steps": 204665, "loss": 0.0695, "lr": 1.554646343326787e-06, "epoch": 1.9078738426208681, "percentage": 38.16, "elapsed_time": "1:40:50", "remaining_time": "2:43:25", "throughput": 8701.04, "total_tokens": 52644704} +{"current_steps": 78100, "total_steps": 204665, "loss": 0.0021, "lr": 1.5545753829969455e-06, "epoch": 1.9079959934527153, "percentage": 38.16, "elapsed_time": "1:40:50", "remaining_time": "2:43:25", "throughput": 8701.08, "total_tokens": 52647904} +{"current_steps": 78105, "total_steps": 204665, "loss": 0.0808, "lr": 1.5545044186340826e-06, "epoch": 1.9081181442845625, "percentage": 38.16, "elapsed_time": "1:40:51", "remaining_time": "2:43:25", "throughput": 8701.15, "total_tokens": 52651424} +{"current_steps": 78110, "total_steps": 204665, "loss": 0.0022, "lr": 1.554433450238714e-06, "epoch": 1.9082402951164097, "percentage": 38.16, "elapsed_time": "1:40:51", "remaining_time": "2:43:24", "throughput": 8701.19, "total_tokens": 52654624} +{"current_steps": 78115, "total_steps": 204665, "loss": 0.0401, "lr": 1.5543624778113568e-06, "epoch": 1.9083624459482569, "percentage": 38.17, "elapsed_time": "1:40:51", "remaining_time": "2:43:24", "throughput": 8701.22, "total_tokens": 52657824} +{"current_steps": 78120, "total_steps": 204665, "loss": 0.0846, "lr": 1.5542915013525265e-06, "epoch": 1.908484596780104, "percentage": 38.17, "elapsed_time": "1:40:52", "remaining_time": "2:43:23", "throughput": 8701.23, "total_tokens": 52660832} +{"current_steps": 78125, "total_steps": 204665, "loss": 0.1013, "lr": 1.5542205208627393e-06, "epoch": 1.9086067476119513, "percentage": 38.17, "elapsed_time": "1:40:52", "remaining_time": "2:43:23", "throughput": 8701.3, "total_tokens": 52664288} +{"current_steps": 78130, "total_steps": 204665, "loss": 0.0475, "lr": 1.5541495363425113e-06, "epoch": 1.9087288984437984, "percentage": 38.17, "elapsed_time": "1:40:52", "remaining_time": "2:43:22", "throughput": 8701.34, "total_tokens": 52667552} +{"current_steps": 78135, "total_steps": 204665, "loss": 0.0094, "lr": 1.5540785477923587e-06, "epoch": 1.9088510492756456, "percentage": 38.18, "elapsed_time": "1:40:53", "remaining_time": "2:43:22", "throughput": 8701.41, "total_tokens": 52671072} +{"current_steps": 78140, "total_steps": 204665, "loss": 0.1499, "lr": 1.5540075552127982e-06, "epoch": 1.9089732001074928, "percentage": 38.18, "elapsed_time": "1:40:53", "remaining_time": "2:43:21", "throughput": 8701.45, "total_tokens": 52674272} +{"current_steps": 78145, "total_steps": 204665, "loss": 0.1028, "lr": 1.5539365586043456e-06, "epoch": 1.90909535093934, "percentage": 38.18, "elapsed_time": "1:40:53", "remaining_time": "2:43:21", "throughput": 8701.5, "total_tokens": 52677600} +{"current_steps": 78150, "total_steps": 204665, "loss": 0.0684, "lr": 1.553865557967517e-06, "epoch": 1.9092175017711872, "percentage": 38.18, "elapsed_time": "1:40:54", "remaining_time": "2:43:20", "throughput": 8701.55, "total_tokens": 52680928} +{"current_steps": 78155, "total_steps": 204665, "loss": 0.1008, "lr": 1.5537945533028296e-06, "epoch": 1.9093396526030342, "percentage": 38.19, "elapsed_time": "1:40:54", "remaining_time": "2:43:20", "throughput": 8701.61, "total_tokens": 52684256} +{"current_steps": 78160, "total_steps": 204665, "loss": 0.1565, "lr": 1.553723544610799e-06, "epoch": 1.9094618034348814, "percentage": 38.19, "elapsed_time": "1:40:54", "remaining_time": "2:43:20", "throughput": 8701.68, "total_tokens": 52687776} +{"current_steps": 78165, "total_steps": 204665, "loss": 0.0459, "lr": 1.553652531891942e-06, "epoch": 1.9095839542667286, "percentage": 38.19, "elapsed_time": "1:40:55", "remaining_time": "2:43:19", "throughput": 8701.75, "total_tokens": 52691168} +{"current_steps": 78170, "total_steps": 204665, "loss": 0.1741, "lr": 1.5535815151467747e-06, "epoch": 1.9097061050985757, "percentage": 38.19, "elapsed_time": "1:40:55", "remaining_time": "2:43:19", "throughput": 8701.8, "total_tokens": 52694496} +{"current_steps": 78175, "total_steps": 204665, "loss": 0.1741, "lr": 1.5535104943758137e-06, "epoch": 1.909828255930423, "percentage": 38.2, "elapsed_time": "1:40:55", "remaining_time": "2:43:18", "throughput": 8701.81, "total_tokens": 52697568} +{"current_steps": 78180, "total_steps": 204665, "loss": 0.1526, "lr": 1.5534394695795757e-06, "epoch": 1.90995040676227, "percentage": 38.2, "elapsed_time": "1:40:56", "remaining_time": "2:43:18", "throughput": 8701.87, "total_tokens": 52700896} +{"current_steps": 78185, "total_steps": 204665, "loss": 0.0386, "lr": 1.553368440758577e-06, "epoch": 1.910072557594117, "percentage": 38.2, "elapsed_time": "1:40:56", "remaining_time": "2:43:17", "throughput": 8701.89, "total_tokens": 52703968} +{"current_steps": 78190, "total_steps": 204665, "loss": 0.0855, "lr": 1.5532974079133339e-06, "epoch": 1.9101947084259643, "percentage": 38.2, "elapsed_time": "1:40:56", "remaining_time": "2:43:17", "throughput": 8701.94, "total_tokens": 52707296} +{"current_steps": 78195, "total_steps": 204665, "loss": 0.0046, "lr": 1.5532263710443636e-06, "epoch": 1.9103168592578115, "percentage": 38.21, "elapsed_time": "1:40:57", "remaining_time": "2:43:16", "throughput": 8701.97, "total_tokens": 52710496} +{"current_steps": 78200, "total_steps": 204665, "loss": 0.0676, "lr": 1.5531553301521824e-06, "epoch": 1.9104390100896587, "percentage": 38.21, "elapsed_time": "1:40:57", "remaining_time": "2:43:16", "throughput": 8702.02, "total_tokens": 52713824} +{"current_steps": 78205, "total_steps": 204665, "loss": 0.0933, "lr": 1.5530842852373063e-06, "epoch": 1.9105611609215059, "percentage": 38.21, "elapsed_time": "1:40:58", "remaining_time": "2:43:15", "throughput": 8702.06, "total_tokens": 52717088} +{"current_steps": 78210, "total_steps": 204665, "loss": 0.0364, "lr": 1.5530132363002528e-06, "epoch": 1.910683311753353, "percentage": 38.21, "elapsed_time": "1:40:58", "remaining_time": "2:43:15", "throughput": 8702.18, "total_tokens": 52720928} +{"current_steps": 78215, "total_steps": 204665, "loss": 0.0951, "lr": 1.5529421833415383e-06, "epoch": 1.9108054625852002, "percentage": 38.22, "elapsed_time": "1:40:58", "remaining_time": "2:43:15", "throughput": 8702.22, "total_tokens": 52724192} +{"current_steps": 78220, "total_steps": 204665, "loss": 0.0037, "lr": 1.5528711263616795e-06, "epoch": 1.9109276134170474, "percentage": 38.22, "elapsed_time": "1:40:59", "remaining_time": "2:43:14", "throughput": 8702.32, "total_tokens": 52727904} +{"current_steps": 78225, "total_steps": 204665, "loss": 0.0388, "lr": 1.5528000653611932e-06, "epoch": 1.9110497642488946, "percentage": 38.22, "elapsed_time": "1:40:59", "remaining_time": "2:43:14", "throughput": 8702.34, "total_tokens": 52730976} +{"current_steps": 78230, "total_steps": 204665, "loss": 0.1138, "lr": 1.5527290003405961e-06, "epoch": 1.9111719150807418, "percentage": 38.22, "elapsed_time": "1:40:59", "remaining_time": "2:43:13", "throughput": 8702.37, "total_tokens": 52734176} +{"current_steps": 78235, "total_steps": 204665, "loss": 0.0956, "lr": 1.5526579313004053e-06, "epoch": 1.911294065912589, "percentage": 38.23, "elapsed_time": "1:41:00", "remaining_time": "2:43:13", "throughput": 8702.4, "total_tokens": 52737312} +{"current_steps": 78240, "total_steps": 204665, "loss": 0.1298, "lr": 1.552586858241137e-06, "epoch": 1.9114162167444362, "percentage": 38.23, "elapsed_time": "1:41:00", "remaining_time": "2:43:12", "throughput": 8702.44, "total_tokens": 52740576} +{"current_steps": 78245, "total_steps": 204665, "loss": 0.1594, "lr": 1.5525157811633087e-06, "epoch": 1.9115383675762831, "percentage": 38.23, "elapsed_time": "1:41:00", "remaining_time": "2:43:12", "throughput": 8702.47, "total_tokens": 52743712} +{"current_steps": 78250, "total_steps": 204665, "loss": 0.1102, "lr": 1.552444700067437e-06, "epoch": 1.9116605184081303, "percentage": 38.23, "elapsed_time": "1:41:01", "remaining_time": "2:43:11", "throughput": 8702.48, "total_tokens": 52746720} +{"current_steps": 78255, "total_steps": 204665, "loss": 0.0974, "lr": 1.5523736149540388e-06, "epoch": 1.9117826692399775, "percentage": 38.24, "elapsed_time": "1:41:01", "remaining_time": "2:43:11", "throughput": 8702.53, "total_tokens": 52750048} +{"current_steps": 78260, "total_steps": 204665, "loss": 0.0353, "lr": 1.5523025258236312e-06, "epoch": 1.9119048200718247, "percentage": 38.24, "elapsed_time": "1:41:01", "remaining_time": "2:43:10", "throughput": 8702.59, "total_tokens": 52753440} +{"current_steps": 78265, "total_steps": 204665, "loss": 0.0222, "lr": 1.5522314326767309e-06, "epoch": 1.912026970903672, "percentage": 38.24, "elapsed_time": "1:41:02", "remaining_time": "2:43:10", "throughput": 8702.66, "total_tokens": 52756896} +{"current_steps": 78270, "total_steps": 204665, "loss": 0.0442, "lr": 1.5521603355138552e-06, "epoch": 1.9121491217355189, "percentage": 38.24, "elapsed_time": "1:41:02", "remaining_time": "2:43:10", "throughput": 8702.78, "total_tokens": 52760736} +{"current_steps": 78275, "total_steps": 204665, "loss": 0.0553, "lr": 1.5520892343355208e-06, "epoch": 1.912271272567366, "percentage": 38.25, "elapsed_time": "1:41:02", "remaining_time": "2:43:09", "throughput": 8702.92, "total_tokens": 52764768} +{"current_steps": 78280, "total_steps": 204665, "loss": 0.0684, "lr": 1.5520181291422454e-06, "epoch": 1.9123934233992133, "percentage": 38.25, "elapsed_time": "1:41:03", "remaining_time": "2:43:09", "throughput": 8702.97, "total_tokens": 52768096} +{"current_steps": 78285, "total_steps": 204665, "loss": 0.0949, "lr": 1.5519470199345455e-06, "epoch": 1.9125155742310604, "percentage": 38.25, "elapsed_time": "1:41:03", "remaining_time": "2:43:08", "throughput": 8703.05, "total_tokens": 52771616} +{"current_steps": 78290, "total_steps": 204665, "loss": 0.261, "lr": 1.5518759067129383e-06, "epoch": 1.9126377250629076, "percentage": 38.25, "elapsed_time": "1:41:03", "remaining_time": "2:43:08", "throughput": 8703.05, "total_tokens": 52774624} +{"current_steps": 78295, "total_steps": 204665, "loss": 0.0024, "lr": 1.5518047894779413e-06, "epoch": 1.9127598758947548, "percentage": 38.26, "elapsed_time": "1:41:04", "remaining_time": "2:43:07", "throughput": 8703.16, "total_tokens": 52778336} +{"current_steps": 78300, "total_steps": 204665, "loss": 0.0392, "lr": 1.5517336682300711e-06, "epoch": 1.912882026726602, "percentage": 38.26, "elapsed_time": "1:41:04", "remaining_time": "2:43:07", "throughput": 8703.21, "total_tokens": 52781664} +{"current_steps": 78305, "total_steps": 204665, "loss": 0.2153, "lr": 1.5516625429698455e-06, "epoch": 1.9130041775584492, "percentage": 38.26, "elapsed_time": "1:41:04", "remaining_time": "2:43:06", "throughput": 8703.24, "total_tokens": 52784800} +{"current_steps": 78310, "total_steps": 204665, "loss": 0.0306, "lr": 1.5515914136977815e-06, "epoch": 1.9131263283902964, "percentage": 38.26, "elapsed_time": "1:41:05", "remaining_time": "2:43:06", "throughput": 8703.33, "total_tokens": 52788448} +{"current_steps": 78315, "total_steps": 204665, "loss": 0.0718, "lr": 1.5515202804143964e-06, "epoch": 1.9132484792221436, "percentage": 38.26, "elapsed_time": "1:41:05", "remaining_time": "2:43:06", "throughput": 8703.36, "total_tokens": 52791648} +{"current_steps": 78320, "total_steps": 204665, "loss": 0.0966, "lr": 1.5514491431202075e-06, "epoch": 1.9133706300539908, "percentage": 38.27, "elapsed_time": "1:41:06", "remaining_time": "2:43:05", "throughput": 8703.44, "total_tokens": 52795168} +{"current_steps": 78325, "total_steps": 204665, "loss": 0.0456, "lr": 1.5513780018157321e-06, "epoch": 1.913492780885838, "percentage": 38.27, "elapsed_time": "1:41:06", "remaining_time": "2:43:05", "throughput": 8703.53, "total_tokens": 52798752} +{"current_steps": 78330, "total_steps": 204665, "loss": 0.0781, "lr": 1.5513068565014875e-06, "epoch": 1.9136149317176852, "percentage": 38.27, "elapsed_time": "1:41:06", "remaining_time": "2:43:04", "throughput": 8703.61, "total_tokens": 52802336} +{"current_steps": 78335, "total_steps": 204665, "loss": 0.2495, "lr": 1.5512357071779912e-06, "epoch": 1.9137370825495321, "percentage": 38.27, "elapsed_time": "1:41:07", "remaining_time": "2:43:04", "throughput": 8703.65, "total_tokens": 52805600} +{"current_steps": 78340, "total_steps": 204665, "loss": 0.167, "lr": 1.5511645538457604e-06, "epoch": 1.9138592333813793, "percentage": 38.28, "elapsed_time": "1:41:07", "remaining_time": "2:43:03", "throughput": 8703.7, "total_tokens": 52808928} +{"current_steps": 78345, "total_steps": 204665, "loss": 0.0669, "lr": 1.551093396505313e-06, "epoch": 1.9139813842132265, "percentage": 38.28, "elapsed_time": "1:41:07", "remaining_time": "2:43:03", "throughput": 8703.72, "total_tokens": 52812064} +{"current_steps": 78350, "total_steps": 204665, "loss": 0.0021, "lr": 1.551022235157166e-06, "epoch": 1.9141035350450737, "percentage": 38.28, "elapsed_time": "1:41:08", "remaining_time": "2:43:02", "throughput": 8703.82, "total_tokens": 52815712} +{"current_steps": 78355, "total_steps": 204665, "loss": 0.051, "lr": 1.550951069801837e-06, "epoch": 1.9142256858769207, "percentage": 38.28, "elapsed_time": "1:41:08", "remaining_time": "2:43:02", "throughput": 8703.91, "total_tokens": 52819360} +{"current_steps": 78360, "total_steps": 204665, "loss": 0.0383, "lr": 1.550879900439844e-06, "epoch": 1.9143478367087678, "percentage": 38.29, "elapsed_time": "1:41:08", "remaining_time": "2:43:02", "throughput": 8703.96, "total_tokens": 52822688} +{"current_steps": 78365, "total_steps": 204665, "loss": 0.0459, "lr": 1.5508087270717041e-06, "epoch": 1.914469987540615, "percentage": 38.29, "elapsed_time": "1:41:09", "remaining_time": "2:43:01", "throughput": 8704.0, "total_tokens": 52825952} +{"current_steps": 78370, "total_steps": 204665, "loss": 0.1156, "lr": 1.550737549697935e-06, "epoch": 1.9145921383724622, "percentage": 38.29, "elapsed_time": "1:41:09", "remaining_time": "2:43:01", "throughput": 8704.04, "total_tokens": 52829152} +{"current_steps": 78375, "total_steps": 204665, "loss": 0.1214, "lr": 1.550666368319054e-06, "epoch": 1.9147142892043094, "percentage": 38.29, "elapsed_time": "1:41:09", "remaining_time": "2:43:00", "throughput": 8704.09, "total_tokens": 52832480} +{"current_steps": 78380, "total_steps": 204665, "loss": 0.1197, "lr": 1.5505951829355791e-06, "epoch": 1.9148364400361566, "percentage": 38.3, "elapsed_time": "1:41:10", "remaining_time": "2:43:00", "throughput": 8704.15, "total_tokens": 52835872} +{"current_steps": 78385, "total_steps": 204665, "loss": 0.1606, "lr": 1.5505239935480283e-06, "epoch": 1.9149585908680038, "percentage": 38.3, "elapsed_time": "1:41:10", "remaining_time": "2:42:59", "throughput": 8704.27, "total_tokens": 52839712} +{"current_steps": 78390, "total_steps": 204665, "loss": 0.002, "lr": 1.550452800156919e-06, "epoch": 1.915080741699851, "percentage": 38.3, "elapsed_time": "1:41:10", "remaining_time": "2:42:59", "throughput": 8704.36, "total_tokens": 52843360} +{"current_steps": 78395, "total_steps": 204665, "loss": 0.0617, "lr": 1.5503816027627684e-06, "epoch": 1.9152028925316982, "percentage": 38.3, "elapsed_time": "1:41:11", "remaining_time": "2:42:58", "throughput": 8704.36, "total_tokens": 52846304} +{"current_steps": 78400, "total_steps": 204665, "loss": 0.174, "lr": 1.5503104013660946e-06, "epoch": 1.9153250433635454, "percentage": 38.31, "elapsed_time": "1:41:11", "remaining_time": "2:42:58", "throughput": 8704.42, "total_tokens": 52849696} +{"current_steps": 78405, "total_steps": 204665, "loss": 0.0981, "lr": 1.550239195967416e-06, "epoch": 1.9154471941953926, "percentage": 38.31, "elapsed_time": "1:41:11", "remaining_time": "2:42:57", "throughput": 8704.47, "total_tokens": 52853024} +{"current_steps": 78410, "total_steps": 204665, "loss": 0.3347, "lr": 1.55016798656725e-06, "epoch": 1.9155693450272397, "percentage": 38.31, "elapsed_time": "1:41:12", "remaining_time": "2:42:57", "throughput": 8704.5, "total_tokens": 52856160} +{"current_steps": 78415, "total_steps": 204665, "loss": 0.0996, "lr": 1.5500967731661146e-06, "epoch": 1.915691495859087, "percentage": 38.31, "elapsed_time": "1:41:12", "remaining_time": "2:42:57", "throughput": 8704.53, "total_tokens": 52859360} +{"current_steps": 78420, "total_steps": 204665, "loss": 0.0661, "lr": 1.550025555764527e-06, "epoch": 1.9158136466909341, "percentage": 38.32, "elapsed_time": "1:41:12", "remaining_time": "2:42:56", "throughput": 8704.6, "total_tokens": 52862752} +{"current_steps": 78425, "total_steps": 204665, "loss": 0.0543, "lr": 1.5499543343630056e-06, "epoch": 1.915935797522781, "percentage": 38.32, "elapsed_time": "1:41:13", "remaining_time": "2:42:56", "throughput": 8704.68, "total_tokens": 52866336} +{"current_steps": 78430, "total_steps": 204665, "loss": 0.0118, "lr": 1.5498831089620686e-06, "epoch": 1.9160579483546283, "percentage": 38.32, "elapsed_time": "1:41:13", "remaining_time": "2:42:55", "throughput": 8704.66, "total_tokens": 52869152} +{"current_steps": 78435, "total_steps": 204665, "loss": 0.0841, "lr": 1.549811879562234e-06, "epoch": 1.9161800991864755, "percentage": 38.32, "elapsed_time": "1:41:14", "remaining_time": "2:42:55", "throughput": 8704.75, "total_tokens": 52872736} +{"current_steps": 78440, "total_steps": 204665, "loss": 0.0809, "lr": 1.549740646164019e-06, "epoch": 1.9163022500183227, "percentage": 38.33, "elapsed_time": "1:41:14", "remaining_time": "2:42:54", "throughput": 8704.82, "total_tokens": 52876256} +{"current_steps": 78445, "total_steps": 204665, "loss": 0.2138, "lr": 1.5496694087679427e-06, "epoch": 1.9164244008501696, "percentage": 38.33, "elapsed_time": "1:41:14", "remaining_time": "2:42:54", "throughput": 8704.9, "total_tokens": 52879776} +{"current_steps": 78450, "total_steps": 204665, "loss": 0.0528, "lr": 1.5495981673745222e-06, "epoch": 1.9165465516820168, "percentage": 38.33, "elapsed_time": "1:41:15", "remaining_time": "2:42:53", "throughput": 8704.91, "total_tokens": 52882848} +{"current_steps": 78455, "total_steps": 204665, "loss": 0.1183, "lr": 1.549526921984276e-06, "epoch": 1.916668702513864, "percentage": 38.33, "elapsed_time": "1:41:15", "remaining_time": "2:42:53", "throughput": 8704.98, "total_tokens": 52886304} +{"current_steps": 78460, "total_steps": 204665, "loss": 0.0998, "lr": 1.5494556725977224e-06, "epoch": 1.9167908533457112, "percentage": 38.34, "elapsed_time": "1:41:15", "remaining_time": "2:42:53", "throughput": 8705.07, "total_tokens": 52889952} +{"current_steps": 78465, "total_steps": 204665, "loss": 0.0358, "lr": 1.5493844192153794e-06, "epoch": 1.9169130041775584, "percentage": 38.34, "elapsed_time": "1:41:16", "remaining_time": "2:42:52", "throughput": 8705.12, "total_tokens": 52893280} +{"current_steps": 78470, "total_steps": 204665, "loss": 0.0706, "lr": 1.549313161837765e-06, "epoch": 1.9170351550094056, "percentage": 38.34, "elapsed_time": "1:41:16", "remaining_time": "2:42:52", "throughput": 8705.17, "total_tokens": 52896608} +{"current_steps": 78475, "total_steps": 204665, "loss": 0.1005, "lr": 1.5492419004653977e-06, "epoch": 1.9171573058412528, "percentage": 38.34, "elapsed_time": "1:41:16", "remaining_time": "2:42:51", "throughput": 8705.21, "total_tokens": 52899808} +{"current_steps": 78480, "total_steps": 204665, "loss": 0.0561, "lr": 1.5491706350987954e-06, "epoch": 1.9172794566731, "percentage": 38.35, "elapsed_time": "1:41:17", "remaining_time": "2:42:51", "throughput": 8705.26, "total_tokens": 52903136} +{"current_steps": 78485, "total_steps": 204665, "loss": 0.1255, "lr": 1.5490993657384766e-06, "epoch": 1.9174016075049471, "percentage": 38.35, "elapsed_time": "1:41:17", "remaining_time": "2:42:50", "throughput": 8705.34, "total_tokens": 52906720} +{"current_steps": 78490, "total_steps": 204665, "loss": 0.1982, "lr": 1.5490280923849595e-06, "epoch": 1.9175237583367943, "percentage": 38.35, "elapsed_time": "1:41:17", "remaining_time": "2:42:50", "throughput": 8705.47, "total_tokens": 52910624} +{"current_steps": 78495, "total_steps": 204665, "loss": 0.0295, "lr": 1.5489568150387624e-06, "epoch": 1.9176459091686415, "percentage": 38.35, "elapsed_time": "1:41:18", "remaining_time": "2:42:49", "throughput": 8705.55, "total_tokens": 52914208} +{"current_steps": 78500, "total_steps": 204665, "loss": 0.1131, "lr": 1.5488855337004035e-06, "epoch": 1.9177680600004887, "percentage": 38.36, "elapsed_time": "1:41:18", "remaining_time": "2:42:49", "throughput": 8705.58, "total_tokens": 52917344} +{"current_steps": 78505, "total_steps": 204665, "loss": 0.0533, "lr": 1.548814248370402e-06, "epoch": 1.917890210832336, "percentage": 38.36, "elapsed_time": "1:41:18", "remaining_time": "2:42:49", "throughput": 8705.66, "total_tokens": 52920928} +{"current_steps": 78510, "total_steps": 204665, "loss": 0.151, "lr": 1.548742959049275e-06, "epoch": 1.918012361664183, "percentage": 38.36, "elapsed_time": "1:41:19", "remaining_time": "2:42:48", "throughput": 8705.77, "total_tokens": 52924704} +{"current_steps": 78515, "total_steps": 204665, "loss": 0.0807, "lr": 1.548671665737542e-06, "epoch": 1.91813451249603, "percentage": 38.36, "elapsed_time": "1:41:19", "remaining_time": "2:42:48", "throughput": 8705.78, "total_tokens": 52927712} +{"current_steps": 78520, "total_steps": 204665, "loss": 0.16, "lr": 1.5486003684357209e-06, "epoch": 1.9182566633278773, "percentage": 38.37, "elapsed_time": "1:41:19", "remaining_time": "2:42:47", "throughput": 8705.82, "total_tokens": 52930976} +{"current_steps": 78525, "total_steps": 204665, "loss": 0.1026, "lr": 1.5485290671443306e-06, "epoch": 1.9183788141597244, "percentage": 38.37, "elapsed_time": "1:41:20", "remaining_time": "2:42:47", "throughput": 8705.89, "total_tokens": 52934496} +{"current_steps": 78530, "total_steps": 204665, "loss": 0.1407, "lr": 1.5484577618638892e-06, "epoch": 1.9185009649915716, "percentage": 38.37, "elapsed_time": "1:41:20", "remaining_time": "2:42:46", "throughput": 8705.96, "total_tokens": 52937952} +{"current_steps": 78535, "total_steps": 204665, "loss": 0.1558, "lr": 1.5483864525949156e-06, "epoch": 1.9186231158234186, "percentage": 38.37, "elapsed_time": "1:41:21", "remaining_time": "2:42:46", "throughput": 8706.01, "total_tokens": 52941280} +{"current_steps": 78540, "total_steps": 204665, "loss": 0.0748, "lr": 1.5483151393379278e-06, "epoch": 1.9187452666552658, "percentage": 38.37, "elapsed_time": "1:41:21", "remaining_time": "2:42:45", "throughput": 8706.05, "total_tokens": 52944544} +{"current_steps": 78545, "total_steps": 204665, "loss": 0.1002, "lr": 1.5482438220934453e-06, "epoch": 1.918867417487113, "percentage": 38.38, "elapsed_time": "1:41:21", "remaining_time": "2:42:45", "throughput": 8706.12, "total_tokens": 52948000} +{"current_steps": 78550, "total_steps": 204665, "loss": 0.1008, "lr": 1.5481725008619857e-06, "epoch": 1.9189895683189602, "percentage": 38.38, "elapsed_time": "1:41:22", "remaining_time": "2:42:44", "throughput": 8706.19, "total_tokens": 52951520} +{"current_steps": 78555, "total_steps": 204665, "loss": 0.0848, "lr": 1.5481011756440688e-06, "epoch": 1.9191117191508074, "percentage": 38.38, "elapsed_time": "1:41:22", "remaining_time": "2:42:44", "throughput": 8706.22, "total_tokens": 52954656} +{"current_steps": 78560, "total_steps": 204665, "loss": 0.07, "lr": 1.5480298464402127e-06, "epoch": 1.9192338699826546, "percentage": 38.38, "elapsed_time": "1:41:22", "remaining_time": "2:42:44", "throughput": 8706.32, "total_tokens": 52958368} +{"current_steps": 78565, "total_steps": 204665, "loss": 0.09, "lr": 1.5479585132509358e-06, "epoch": 1.9193560208145017, "percentage": 38.39, "elapsed_time": "1:41:23", "remaining_time": "2:42:43", "throughput": 8706.34, "total_tokens": 52961504} +{"current_steps": 78570, "total_steps": 204665, "loss": 0.0319, "lr": 1.5478871760767574e-06, "epoch": 1.919478171646349, "percentage": 38.39, "elapsed_time": "1:41:23", "remaining_time": "2:42:43", "throughput": 8706.38, "total_tokens": 52964704} +{"current_steps": 78575, "total_steps": 204665, "loss": 0.0083, "lr": 1.5478158349181963e-06, "epoch": 1.9196003224781961, "percentage": 38.39, "elapsed_time": "1:41:23", "remaining_time": "2:42:42", "throughput": 8706.41, "total_tokens": 52967904} +{"current_steps": 78580, "total_steps": 204665, "loss": 0.055, "lr": 1.5477444897757707e-06, "epoch": 1.9197224733100433, "percentage": 38.39, "elapsed_time": "1:41:24", "remaining_time": "2:42:42", "throughput": 8706.47, "total_tokens": 52971360} +{"current_steps": 78585, "total_steps": 204665, "loss": 0.0767, "lr": 1.54767314065e-06, "epoch": 1.9198446241418905, "percentage": 38.4, "elapsed_time": "1:41:24", "remaining_time": "2:42:41", "throughput": 8706.52, "total_tokens": 52974624} +{"current_steps": 78590, "total_steps": 204665, "loss": 0.0667, "lr": 1.547601787541403e-06, "epoch": 1.9199667749737377, "percentage": 38.4, "elapsed_time": "1:41:24", "remaining_time": "2:42:41", "throughput": 8706.53, "total_tokens": 52977696} +{"current_steps": 78595, "total_steps": 204665, "loss": 0.0052, "lr": 1.5475304304504983e-06, "epoch": 1.9200889258055849, "percentage": 38.4, "elapsed_time": "1:41:25", "remaining_time": "2:42:40", "throughput": 8706.57, "total_tokens": 52980960} +{"current_steps": 78600, "total_steps": 204665, "loss": 0.0763, "lr": 1.5474590693778054e-06, "epoch": 1.9202110766374318, "percentage": 38.4, "elapsed_time": "1:41:25", "remaining_time": "2:42:40", "throughput": 8706.69, "total_tokens": 52984800} +{"current_steps": 78605, "total_steps": 204665, "loss": 0.0017, "lr": 1.5473877043238428e-06, "epoch": 1.920333227469279, "percentage": 38.41, "elapsed_time": "1:41:25", "remaining_time": "2:42:39", "throughput": 8706.71, "total_tokens": 52987872} +{"current_steps": 78610, "total_steps": 204665, "loss": 0.0367, "lr": 1.5473163352891295e-06, "epoch": 1.9204553783011262, "percentage": 38.41, "elapsed_time": "1:41:26", "remaining_time": "2:42:39", "throughput": 8706.74, "total_tokens": 52991008} +{"current_steps": 78615, "total_steps": 204665, "loss": 0.0514, "lr": 1.5472449622741844e-06, "epoch": 1.9205775291329734, "percentage": 38.41, "elapsed_time": "1:41:26", "remaining_time": "2:42:39", "throughput": 8706.84, "total_tokens": 52994720} +{"current_steps": 78620, "total_steps": 204665, "loss": 0.2303, "lr": 1.547173585279527e-06, "epoch": 1.9206996799648206, "percentage": 38.41, "elapsed_time": "1:41:26", "remaining_time": "2:42:38", "throughput": 8706.91, "total_tokens": 52998176} +{"current_steps": 78625, "total_steps": 204665, "loss": 0.1555, "lr": 1.5471022043056761e-06, "epoch": 1.9208218307966676, "percentage": 38.42, "elapsed_time": "1:41:27", "remaining_time": "2:42:38", "throughput": 8706.97, "total_tokens": 53001568} +{"current_steps": 78630, "total_steps": 204665, "loss": 0.0836, "lr": 1.5470308193531505e-06, "epoch": 1.9209439816285148, "percentage": 38.42, "elapsed_time": "1:41:27", "remaining_time": "2:42:37", "throughput": 8706.99, "total_tokens": 53004704} +{"current_steps": 78635, "total_steps": 204665, "loss": 0.057, "lr": 1.54695943042247e-06, "epoch": 1.921066132460362, "percentage": 38.42, "elapsed_time": "1:41:27", "remaining_time": "2:42:37", "throughput": 8707.03, "total_tokens": 53007968} +{"current_steps": 78640, "total_steps": 204665, "loss": 0.0948, "lr": 1.5468880375141535e-06, "epoch": 1.9211882832922091, "percentage": 38.42, "elapsed_time": "1:41:28", "remaining_time": "2:42:36", "throughput": 8707.11, "total_tokens": 53011488} +{"current_steps": 78645, "total_steps": 204665, "loss": 0.0349, "lr": 1.5468166406287197e-06, "epoch": 1.9213104341240563, "percentage": 38.43, "elapsed_time": "1:41:28", "remaining_time": "2:42:36", "throughput": 8707.18, "total_tokens": 53014944} +{"current_steps": 78650, "total_steps": 204665, "loss": 0.05, "lr": 1.5467452397666885e-06, "epoch": 1.9214325849559035, "percentage": 38.43, "elapsed_time": "1:41:28", "remaining_time": "2:42:35", "throughput": 8707.22, "total_tokens": 53018208} +{"current_steps": 78655, "total_steps": 204665, "loss": 0.0014, "lr": 1.5466738349285788e-06, "epoch": 1.9215547357877507, "percentage": 38.43, "elapsed_time": "1:41:29", "remaining_time": "2:42:35", "throughput": 8707.25, "total_tokens": 53021408} +{"current_steps": 78660, "total_steps": 204665, "loss": 0.0496, "lr": 1.54660242611491e-06, "epoch": 1.921676886619598, "percentage": 38.43, "elapsed_time": "1:41:29", "remaining_time": "2:42:35", "throughput": 8707.3, "total_tokens": 53024672} +{"current_steps": 78665, "total_steps": 204665, "loss": 0.1131, "lr": 1.5465310133262014e-06, "epoch": 1.921799037451445, "percentage": 38.44, "elapsed_time": "1:41:30", "remaining_time": "2:42:34", "throughput": 8707.34, "total_tokens": 53027936} +{"current_steps": 78670, "total_steps": 204665, "loss": 0.1438, "lr": 1.5464595965629719e-06, "epoch": 1.9219211882832923, "percentage": 38.44, "elapsed_time": "1:41:30", "remaining_time": "2:42:34", "throughput": 8707.44, "total_tokens": 53031648} +{"current_steps": 78675, "total_steps": 204665, "loss": 0.0672, "lr": 1.5463881758257414e-06, "epoch": 1.9220433391151395, "percentage": 38.44, "elapsed_time": "1:41:30", "remaining_time": "2:42:33", "throughput": 8707.47, "total_tokens": 53034720} +{"current_steps": 78680, "total_steps": 204665, "loss": 0.0052, "lr": 1.5463167511150292e-06, "epoch": 1.9221654899469867, "percentage": 38.44, "elapsed_time": "1:41:31", "remaining_time": "2:42:33", "throughput": 8707.57, "total_tokens": 53038432} +{"current_steps": 78685, "total_steps": 204665, "loss": 0.1965, "lr": 1.5462453224313547e-06, "epoch": 1.9222876407788339, "percentage": 38.45, "elapsed_time": "1:41:31", "remaining_time": "2:42:32", "throughput": 8707.66, "total_tokens": 53042080} +{"current_steps": 78690, "total_steps": 204665, "loss": 0.0696, "lr": 1.5461738897752371e-06, "epoch": 1.9224097916106808, "percentage": 38.45, "elapsed_time": "1:41:31", "remaining_time": "2:42:32", "throughput": 8707.69, "total_tokens": 53045280} +{"current_steps": 78695, "total_steps": 204665, "loss": 0.0011, "lr": 1.5461024531471961e-06, "epoch": 1.922531942442528, "percentage": 38.45, "elapsed_time": "1:41:32", "remaining_time": "2:42:31", "throughput": 8707.72, "total_tokens": 53048416} +{"current_steps": 78700, "total_steps": 204665, "loss": 0.0433, "lr": 1.5460310125477516e-06, "epoch": 1.9226540932743752, "percentage": 38.45, "elapsed_time": "1:41:32", "remaining_time": "2:42:31", "throughput": 8707.69, "total_tokens": 53051104} +{"current_steps": 78705, "total_steps": 204665, "loss": 0.1933, "lr": 1.5459595679774223e-06, "epoch": 1.9227762441062224, "percentage": 38.46, "elapsed_time": "1:41:32", "remaining_time": "2:42:30", "throughput": 8707.69, "total_tokens": 53054048} +{"current_steps": 78710, "total_steps": 204665, "loss": 0.0407, "lr": 1.5458881194367282e-06, "epoch": 1.9228983949380696, "percentage": 38.46, "elapsed_time": "1:41:33", "remaining_time": "2:42:30", "throughput": 8707.77, "total_tokens": 53057568} +{"current_steps": 78715, "total_steps": 204665, "loss": 0.2197, "lr": 1.5458166669261888e-06, "epoch": 1.9230205457699165, "percentage": 38.46, "elapsed_time": "1:41:33", "remaining_time": "2:42:30", "throughput": 8707.77, "total_tokens": 53060512} +{"current_steps": 78720, "total_steps": 204665, "loss": 0.1353, "lr": 1.545745210446324e-06, "epoch": 1.9231426966017637, "percentage": 38.46, "elapsed_time": "1:41:33", "remaining_time": "2:42:29", "throughput": 8707.77, "total_tokens": 53063456} +{"current_steps": 78725, "total_steps": 204665, "loss": 0.17, "lr": 1.5456737499976532e-06, "epoch": 1.923264847433611, "percentage": 38.47, "elapsed_time": "1:41:34", "remaining_time": "2:42:29", "throughput": 8707.9, "total_tokens": 53067424} +{"current_steps": 78730, "total_steps": 204665, "loss": 0.0516, "lr": 1.5456022855806961e-06, "epoch": 1.9233869982654581, "percentage": 38.47, "elapsed_time": "1:41:34", "remaining_time": "2:42:28", "throughput": 8707.98, "total_tokens": 53070944} +{"current_steps": 78735, "total_steps": 204665, "loss": 0.0501, "lr": 1.5455308171959724e-06, "epoch": 1.9235091490973053, "percentage": 38.47, "elapsed_time": "1:41:34", "remaining_time": "2:42:28", "throughput": 8708.0, "total_tokens": 53074016} +{"current_steps": 78740, "total_steps": 204665, "loss": 0.1306, "lr": 1.5454593448440018e-06, "epoch": 1.9236312999291525, "percentage": 38.47, "elapsed_time": "1:41:35", "remaining_time": "2:42:27", "throughput": 8708.11, "total_tokens": 53077728} +{"current_steps": 78745, "total_steps": 204665, "loss": 0.062, "lr": 1.5453878685253043e-06, "epoch": 1.9237534507609997, "percentage": 38.48, "elapsed_time": "1:41:35", "remaining_time": "2:42:27", "throughput": 8708.14, "total_tokens": 53080928} +{"current_steps": 78750, "total_steps": 204665, "loss": 0.0918, "lr": 1.5453163882403994e-06, "epoch": 1.9238756015928469, "percentage": 38.48, "elapsed_time": "1:41:35", "remaining_time": "2:42:26", "throughput": 8708.26, "total_tokens": 53084768} +{"current_steps": 78755, "total_steps": 204665, "loss": 0.0855, "lr": 1.5452449039898073e-06, "epoch": 1.923997752424694, "percentage": 38.48, "elapsed_time": "1:41:36", "remaining_time": "2:42:26", "throughput": 8708.37, "total_tokens": 53088544} +{"current_steps": 78760, "total_steps": 204665, "loss": 0.0711, "lr": 1.5451734157740471e-06, "epoch": 1.9241199032565413, "percentage": 38.48, "elapsed_time": "1:41:36", "remaining_time": "2:42:25", "throughput": 8708.42, "total_tokens": 53091872} +{"current_steps": 78765, "total_steps": 204665, "loss": 0.0743, "lr": 1.5451019235936396e-06, "epoch": 1.9242420540883884, "percentage": 38.48, "elapsed_time": "1:41:36", "remaining_time": "2:42:25", "throughput": 8708.45, "total_tokens": 53095008} +{"current_steps": 78770, "total_steps": 204665, "loss": 0.0544, "lr": 1.5450304274491043e-06, "epoch": 1.9243642049202356, "percentage": 38.49, "elapsed_time": "1:41:37", "remaining_time": "2:42:25", "throughput": 8708.53, "total_tokens": 53098592} +{"current_steps": 78775, "total_steps": 204665, "loss": 0.0351, "lr": 1.5449589273409608e-06, "epoch": 1.9244863557520828, "percentage": 38.49, "elapsed_time": "1:41:37", "remaining_time": "2:42:24", "throughput": 8708.59, "total_tokens": 53101984} +{"current_steps": 78780, "total_steps": 204665, "loss": 0.1972, "lr": 1.5448874232697298e-06, "epoch": 1.9246085065839298, "percentage": 38.49, "elapsed_time": "1:41:38", "remaining_time": "2:42:24", "throughput": 8708.66, "total_tokens": 53105440} +{"current_steps": 78785, "total_steps": 204665, "loss": 0.1779, "lr": 1.5448159152359307e-06, "epoch": 1.924730657415777, "percentage": 38.49, "elapsed_time": "1:41:38", "remaining_time": "2:42:23", "throughput": 8708.63, "total_tokens": 53108128} +{"current_steps": 78790, "total_steps": 204665, "loss": 0.0853, "lr": 1.544744403240084e-06, "epoch": 1.9248528082476242, "percentage": 38.5, "elapsed_time": "1:41:38", "remaining_time": "2:42:23", "throughput": 8708.7, "total_tokens": 53111584} +{"current_steps": 78795, "total_steps": 204665, "loss": 0.0016, "lr": 1.5446728872827091e-06, "epoch": 1.9249749590794714, "percentage": 38.5, "elapsed_time": "1:41:39", "remaining_time": "2:42:22", "throughput": 8708.78, "total_tokens": 53115168} +{"current_steps": 78800, "total_steps": 204665, "loss": 0.0082, "lr": 1.5446013673643266e-06, "epoch": 1.9250971099113185, "percentage": 38.5, "elapsed_time": "1:41:39", "remaining_time": "2:42:22", "throughput": 8708.89, "total_tokens": 53118944} +{"current_steps": 78805, "total_steps": 204665, "loss": 0.1288, "lr": 1.5445298434854563e-06, "epoch": 1.9252192607431655, "percentage": 38.5, "elapsed_time": "1:41:39", "remaining_time": "2:42:21", "throughput": 8708.92, "total_tokens": 53122144} +{"current_steps": 78810, "total_steps": 204665, "loss": 0.1123, "lr": 1.5444583156466187e-06, "epoch": 1.9253414115750127, "percentage": 38.51, "elapsed_time": "1:41:40", "remaining_time": "2:42:21", "throughput": 8709.01, "total_tokens": 53125728} +{"current_steps": 78815, "total_steps": 204665, "loss": 0.0863, "lr": 1.544386783848334e-06, "epoch": 1.92546356240686, "percentage": 38.51, "elapsed_time": "1:41:40", "remaining_time": "2:42:21", "throughput": 8709.15, "total_tokens": 53129696} +{"current_steps": 78820, "total_steps": 204665, "loss": 0.0478, "lr": 1.544315248091122e-06, "epoch": 1.925585713238707, "percentage": 38.51, "elapsed_time": "1:41:40", "remaining_time": "2:42:20", "throughput": 8709.17, "total_tokens": 53132832} +{"current_steps": 78825, "total_steps": 204665, "loss": 0.1206, "lr": 1.544243708375503e-06, "epoch": 1.9257078640705543, "percentage": 38.51, "elapsed_time": "1:41:41", "remaining_time": "2:42:20", "throughput": 8709.25, "total_tokens": 53136352} +{"current_steps": 78830, "total_steps": 204665, "loss": 0.0627, "lr": 1.5441721647019974e-06, "epoch": 1.9258300149024015, "percentage": 38.52, "elapsed_time": "1:41:41", "remaining_time": "2:42:19", "throughput": 8709.33, "total_tokens": 53139936} +{"current_steps": 78835, "total_steps": 204665, "loss": 0.0372, "lr": 1.5441006170711255e-06, "epoch": 1.9259521657342487, "percentage": 38.52, "elapsed_time": "1:41:41", "remaining_time": "2:42:19", "throughput": 8709.39, "total_tokens": 53143328} +{"current_steps": 78840, "total_steps": 204665, "loss": 0.0459, "lr": 1.5440290654834075e-06, "epoch": 1.9260743165660958, "percentage": 38.52, "elapsed_time": "1:41:42", "remaining_time": "2:42:18", "throughput": 8709.44, "total_tokens": 53146656} +{"current_steps": 78845, "total_steps": 204665, "loss": 0.1206, "lr": 1.5439575099393639e-06, "epoch": 1.926196467397943, "percentage": 38.52, "elapsed_time": "1:41:42", "remaining_time": "2:42:18", "throughput": 8709.47, "total_tokens": 53149920} +{"current_steps": 78850, "total_steps": 204665, "loss": 0.0809, "lr": 1.543885950439515e-06, "epoch": 1.9263186182297902, "percentage": 38.53, "elapsed_time": "1:41:42", "remaining_time": "2:42:17", "throughput": 8709.53, "total_tokens": 53153312} +{"current_steps": 78855, "total_steps": 204665, "loss": 0.0791, "lr": 1.543814386984381e-06, "epoch": 1.9264407690616374, "percentage": 38.53, "elapsed_time": "1:41:43", "remaining_time": "2:42:17", "throughput": 8709.61, "total_tokens": 53156896} +{"current_steps": 78860, "total_steps": 204665, "loss": 0.1982, "lr": 1.5437428195744829e-06, "epoch": 1.9265629198934846, "percentage": 38.53, "elapsed_time": "1:41:43", "remaining_time": "2:42:17", "throughput": 8709.68, "total_tokens": 53160288} +{"current_steps": 78865, "total_steps": 204665, "loss": 0.1215, "lr": 1.5436712482103401e-06, "epoch": 1.9266850707253318, "percentage": 38.53, "elapsed_time": "1:41:43", "remaining_time": "2:42:16", "throughput": 8709.75, "total_tokens": 53163808} +{"current_steps": 78870, "total_steps": 204665, "loss": 0.0611, "lr": 1.5435996728924744e-06, "epoch": 1.9268072215571788, "percentage": 38.54, "elapsed_time": "1:41:44", "remaining_time": "2:42:16", "throughput": 8709.8, "total_tokens": 53167136} +{"current_steps": 78875, "total_steps": 204665, "loss": 0.1073, "lr": 1.5435280936214055e-06, "epoch": 1.926929372389026, "percentage": 38.54, "elapsed_time": "1:41:44", "remaining_time": "2:42:15", "throughput": 8709.86, "total_tokens": 53170528} +{"current_steps": 78880, "total_steps": 204665, "loss": 0.0009, "lr": 1.543456510397654e-06, "epoch": 1.9270515232208731, "percentage": 38.54, "elapsed_time": "1:41:44", "remaining_time": "2:42:15", "throughput": 8709.86, "total_tokens": 53173472} +{"current_steps": 78885, "total_steps": 204665, "loss": 0.103, "lr": 1.5433849232217407e-06, "epoch": 1.9271736740527203, "percentage": 38.54, "elapsed_time": "1:41:45", "remaining_time": "2:42:14", "throughput": 8709.95, "total_tokens": 53177056} +{"current_steps": 78890, "total_steps": 204665, "loss": 0.0019, "lr": 1.543313332094186e-06, "epoch": 1.9272958248845673, "percentage": 38.55, "elapsed_time": "1:41:45", "remaining_time": "2:42:14", "throughput": 8710.05, "total_tokens": 53180768} +{"current_steps": 78895, "total_steps": 204665, "loss": 0.0486, "lr": 1.5432417370155104e-06, "epoch": 1.9274179757164145, "percentage": 38.55, "elapsed_time": "1:41:46", "remaining_time": "2:42:13", "throughput": 8710.08, "total_tokens": 53183968} +{"current_steps": 78900, "total_steps": 204665, "loss": 0.0708, "lr": 1.5431701379862353e-06, "epoch": 1.9275401265482617, "percentage": 38.55, "elapsed_time": "1:41:46", "remaining_time": "2:42:13", "throughput": 8710.13, "total_tokens": 53187296} +{"current_steps": 78905, "total_steps": 204665, "loss": 0.0107, "lr": 1.5430985350068804e-06, "epoch": 1.9276622773801089, "percentage": 38.55, "elapsed_time": "1:41:46", "remaining_time": "2:42:12", "throughput": 8710.2, "total_tokens": 53190752} +{"current_steps": 78910, "total_steps": 204665, "loss": 0.0949, "lr": 1.543026928077967e-06, "epoch": 1.927784428211956, "percentage": 38.56, "elapsed_time": "1:41:47", "remaining_time": "2:42:12", "throughput": 8710.29, "total_tokens": 53194400} +{"current_steps": 78915, "total_steps": 204665, "loss": 0.1189, "lr": 1.5429553172000157e-06, "epoch": 1.9279065790438032, "percentage": 38.56, "elapsed_time": "1:41:47", "remaining_time": "2:42:12", "throughput": 8710.35, "total_tokens": 53197856} +{"current_steps": 78920, "total_steps": 204665, "loss": 0.0013, "lr": 1.5428837023735475e-06, "epoch": 1.9280287298756504, "percentage": 38.56, "elapsed_time": "1:41:47", "remaining_time": "2:42:11", "throughput": 8710.36, "total_tokens": 53200928} +{"current_steps": 78925, "total_steps": 204665, "loss": 0.0833, "lr": 1.5428120835990829e-06, "epoch": 1.9281508807074976, "percentage": 38.56, "elapsed_time": "1:41:48", "remaining_time": "2:42:11", "throughput": 8710.43, "total_tokens": 53204384} +{"current_steps": 78930, "total_steps": 204665, "loss": 0.1537, "lr": 1.5427404608771427e-06, "epoch": 1.9282730315393448, "percentage": 38.57, "elapsed_time": "1:41:48", "remaining_time": "2:42:10", "throughput": 8710.44, "total_tokens": 53207392} +{"current_steps": 78935, "total_steps": 204665, "loss": 0.058, "lr": 1.542668834208248e-06, "epoch": 1.928395182371192, "percentage": 38.57, "elapsed_time": "1:41:48", "remaining_time": "2:42:10", "throughput": 8710.51, "total_tokens": 53210912} +{"current_steps": 78940, "total_steps": 204665, "loss": 0.0759, "lr": 1.5425972035929196e-06, "epoch": 1.9285173332030392, "percentage": 38.57, "elapsed_time": "1:41:49", "remaining_time": "2:42:09", "throughput": 8710.59, "total_tokens": 53214432} +{"current_steps": 78945, "total_steps": 204665, "loss": 0.0417, "lr": 1.5425255690316783e-06, "epoch": 1.9286394840348864, "percentage": 38.57, "elapsed_time": "1:41:49", "remaining_time": "2:42:09", "throughput": 8710.64, "total_tokens": 53217760} +{"current_steps": 78950, "total_steps": 204665, "loss": 0.1116, "lr": 1.5424539305250452e-06, "epoch": 1.9287616348667336, "percentage": 38.58, "elapsed_time": "1:41:49", "remaining_time": "2:42:08", "throughput": 8710.66, "total_tokens": 53220896} +{"current_steps": 78955, "total_steps": 204665, "loss": 0.2375, "lr": 1.542382288073541e-06, "epoch": 1.9288837856985808, "percentage": 38.58, "elapsed_time": "1:41:50", "remaining_time": "2:42:08", "throughput": 8710.7, "total_tokens": 53224160} +{"current_steps": 78960, "total_steps": 204665, "loss": 0.1627, "lr": 1.5423106416776873e-06, "epoch": 1.9290059365304277, "percentage": 38.58, "elapsed_time": "1:41:50", "remaining_time": "2:42:08", "throughput": 8710.74, "total_tokens": 53227360} +{"current_steps": 78965, "total_steps": 204665, "loss": 0.0888, "lr": 1.5422389913380046e-06, "epoch": 1.929128087362275, "percentage": 38.58, "elapsed_time": "1:41:50", "remaining_time": "2:42:07", "throughput": 8710.77, "total_tokens": 53230560} +{"current_steps": 78970, "total_steps": 204665, "loss": 0.0429, "lr": 1.5421673370550142e-06, "epoch": 1.9292502381941221, "percentage": 38.59, "elapsed_time": "1:41:51", "remaining_time": "2:42:07", "throughput": 8710.81, "total_tokens": 53233824} +{"current_steps": 78975, "total_steps": 204665, "loss": 0.0709, "lr": 1.542095678829237e-06, "epoch": 1.9293723890259693, "percentage": 38.59, "elapsed_time": "1:41:51", "remaining_time": "2:42:06", "throughput": 8710.97, "total_tokens": 53237984} +{"current_steps": 78980, "total_steps": 204665, "loss": 0.264, "lr": 1.5420240166611942e-06, "epoch": 1.9294945398578163, "percentage": 38.59, "elapsed_time": "1:41:51", "remaining_time": "2:42:06", "throughput": 8711.01, "total_tokens": 53241312} +{"current_steps": 78985, "total_steps": 204665, "loss": 0.1324, "lr": 1.5419523505514068e-06, "epoch": 1.9296166906896635, "percentage": 38.59, "elapsed_time": "1:41:52", "remaining_time": "2:42:05", "throughput": 8711.11, "total_tokens": 53245024} +{"current_steps": 78990, "total_steps": 204665, "loss": 0.0684, "lr": 1.5418806805003964e-06, "epoch": 1.9297388415215107, "percentage": 38.59, "elapsed_time": "1:41:52", "remaining_time": "2:42:05", "throughput": 8711.18, "total_tokens": 53248480} +{"current_steps": 78995, "total_steps": 204665, "loss": 0.066, "lr": 1.5418090065086838e-06, "epoch": 1.9298609923533578, "percentage": 38.6, "elapsed_time": "1:41:53", "remaining_time": "2:42:04", "throughput": 8711.22, "total_tokens": 53251744} +{"current_steps": 79000, "total_steps": 204665, "loss": 0.0335, "lr": 1.5417373285767903e-06, "epoch": 1.929983143185205, "percentage": 38.6, "elapsed_time": "1:41:53", "remaining_time": "2:42:04", "throughput": 8711.3, "total_tokens": 53255328} +{"current_steps": 79005, "total_steps": 204665, "loss": 0.0654, "lr": 1.5416656467052374e-06, "epoch": 1.9301052940170522, "percentage": 38.6, "elapsed_time": "1:41:53", "remaining_time": "2:42:04", "throughput": 8711.37, "total_tokens": 53258784} +{"current_steps": 79010, "total_steps": 204665, "loss": 0.1352, "lr": 1.5415939608945463e-06, "epoch": 1.9302274448488994, "percentage": 38.6, "elapsed_time": "1:41:54", "remaining_time": "2:42:03", "throughput": 8711.45, "total_tokens": 53262368} +{"current_steps": 79015, "total_steps": 204665, "loss": 0.093, "lr": 1.5415222711452382e-06, "epoch": 1.9303495956807466, "percentage": 38.61, "elapsed_time": "1:41:54", "remaining_time": "2:42:03", "throughput": 8711.56, "total_tokens": 53266144} +{"current_steps": 79020, "total_steps": 204665, "loss": 0.0653, "lr": 1.5414505774578342e-06, "epoch": 1.9304717465125938, "percentage": 38.61, "elapsed_time": "1:41:54", "remaining_time": "2:42:02", "throughput": 8711.61, "total_tokens": 53269472} +{"current_steps": 79025, "total_steps": 204665, "loss": 0.0632, "lr": 1.5413788798328563e-06, "epoch": 1.930593897344441, "percentage": 38.61, "elapsed_time": "1:41:55", "remaining_time": "2:42:02", "throughput": 8711.67, "total_tokens": 53272864} +{"current_steps": 79030, "total_steps": 204665, "loss": 0.1579, "lr": 1.5413071782708254e-06, "epoch": 1.9307160481762882, "percentage": 38.61, "elapsed_time": "1:41:55", "remaining_time": "2:42:01", "throughput": 8711.66, "total_tokens": 53275680} +{"current_steps": 79035, "total_steps": 204665, "loss": 0.0986, "lr": 1.5412354727722631e-06, "epoch": 1.9308381990081354, "percentage": 38.62, "elapsed_time": "1:41:55", "remaining_time": "2:42:01", "throughput": 8711.77, "total_tokens": 53279456} +{"current_steps": 79040, "total_steps": 204665, "loss": 0.0759, "lr": 1.541163763337691e-06, "epoch": 1.9309603498399825, "percentage": 38.62, "elapsed_time": "1:41:56", "remaining_time": "2:42:00", "throughput": 8711.79, "total_tokens": 53282592} +{"current_steps": 79045, "total_steps": 204665, "loss": 0.0426, "lr": 1.5410920499676303e-06, "epoch": 1.9310825006718297, "percentage": 38.62, "elapsed_time": "1:41:56", "remaining_time": "2:42:00", "throughput": 8711.89, "total_tokens": 53286240} +{"current_steps": 79050, "total_steps": 204665, "loss": 0.0322, "lr": 1.5410203326626028e-06, "epoch": 1.9312046515036767, "percentage": 38.62, "elapsed_time": "1:41:56", "remaining_time": "2:42:00", "throughput": 8711.95, "total_tokens": 53289632} +{"current_steps": 79055, "total_steps": 204665, "loss": 0.0042, "lr": 1.54094861142313e-06, "epoch": 1.931326802335524, "percentage": 38.63, "elapsed_time": "1:41:57", "remaining_time": "2:41:59", "throughput": 8711.99, "total_tokens": 53292896} +{"current_steps": 79060, "total_steps": 204665, "loss": 0.1168, "lr": 1.5408768862497332e-06, "epoch": 1.931448953167371, "percentage": 38.63, "elapsed_time": "1:41:57", "remaining_time": "2:41:59", "throughput": 8712.08, "total_tokens": 53296480} +{"current_steps": 79065, "total_steps": 204665, "loss": 0.11, "lr": 1.5408051571429344e-06, "epoch": 1.9315711039992183, "percentage": 38.63, "elapsed_time": "1:41:57", "remaining_time": "2:41:58", "throughput": 8712.12, "total_tokens": 53299744} +{"current_steps": 79070, "total_steps": 204665, "loss": 0.046, "lr": 1.540733424103255e-06, "epoch": 1.9316932548310652, "percentage": 38.63, "elapsed_time": "1:41:58", "remaining_time": "2:41:58", "throughput": 8712.11, "total_tokens": 53302624} +{"current_steps": 79075, "total_steps": 204665, "loss": 0.0843, "lr": 1.5406616871312166e-06, "epoch": 1.9318154056629124, "percentage": 38.64, "elapsed_time": "1:41:58", "remaining_time": "2:41:57", "throughput": 8712.14, "total_tokens": 53305824} +{"current_steps": 79080, "total_steps": 204665, "loss": 0.1187, "lr": 1.540589946227341e-06, "epoch": 1.9319375564947596, "percentage": 38.64, "elapsed_time": "1:41:58", "remaining_time": "2:41:57", "throughput": 8712.13, "total_tokens": 53308704} +{"current_steps": 79085, "total_steps": 204665, "loss": 0.0444, "lr": 1.5405182013921498e-06, "epoch": 1.9320597073266068, "percentage": 38.64, "elapsed_time": "1:41:59", "remaining_time": "2:41:56", "throughput": 8712.14, "total_tokens": 53311712} +{"current_steps": 79090, "total_steps": 204665, "loss": 0.175, "lr": 1.5404464526261651e-06, "epoch": 1.932181858158454, "percentage": 38.64, "elapsed_time": "1:41:59", "remaining_time": "2:41:56", "throughput": 8712.2, "total_tokens": 53315104} +{"current_steps": 79095, "total_steps": 204665, "loss": 0.0021, "lr": 1.5403746999299083e-06, "epoch": 1.9323040089903012, "percentage": 38.65, "elapsed_time": "1:41:59", "remaining_time": "2:41:55", "throughput": 8712.18, "total_tokens": 53317920} +{"current_steps": 79100, "total_steps": 204665, "loss": 0.103, "lr": 1.540302943303901e-06, "epoch": 1.9324261598221484, "percentage": 38.65, "elapsed_time": "1:42:00", "remaining_time": "2:41:55", "throughput": 8712.25, "total_tokens": 53321376} +{"current_steps": 79105, "total_steps": 204665, "loss": 0.1041, "lr": 1.5402311827486663e-06, "epoch": 1.9325483106539956, "percentage": 38.65, "elapsed_time": "1:42:00", "remaining_time": "2:41:54", "throughput": 8712.26, "total_tokens": 53324448} +{"current_steps": 79110, "total_steps": 204665, "loss": 0.1032, "lr": 1.5401594182647241e-06, "epoch": 1.9326704614858428, "percentage": 38.65, "elapsed_time": "1:42:00", "remaining_time": "2:41:54", "throughput": 8712.34, "total_tokens": 53328032} +{"current_steps": 79115, "total_steps": 204665, "loss": 0.0018, "lr": 1.5400876498525978e-06, "epoch": 1.93279261231769, "percentage": 38.66, "elapsed_time": "1:42:01", "remaining_time": "2:41:54", "throughput": 8712.42, "total_tokens": 53331552} +{"current_steps": 79120, "total_steps": 204665, "loss": 0.0707, "lr": 1.540015877512809e-06, "epoch": 1.9329147631495371, "percentage": 38.66, "elapsed_time": "1:42:01", "remaining_time": "2:41:53", "throughput": 8712.55, "total_tokens": 53335456} +{"current_steps": 79125, "total_steps": 204665, "loss": 0.1359, "lr": 1.5399441012458793e-06, "epoch": 1.9330369139813843, "percentage": 38.66, "elapsed_time": "1:42:02", "remaining_time": "2:41:53", "throughput": 8712.58, "total_tokens": 53338720} +{"current_steps": 79130, "total_steps": 204665, "loss": 0.1184, "lr": 1.5398723210523313e-06, "epoch": 1.9331590648132315, "percentage": 38.66, "elapsed_time": "1:42:02", "remaining_time": "2:41:52", "throughput": 8712.62, "total_tokens": 53341920} +{"current_steps": 79135, "total_steps": 204665, "loss": 0.035, "lr": 1.5398005369326859e-06, "epoch": 1.9332812156450785, "percentage": 38.67, "elapsed_time": "1:42:02", "remaining_time": "2:41:52", "throughput": 8712.7, "total_tokens": 53345568} +{"current_steps": 79140, "total_steps": 204665, "loss": 0.2139, "lr": 1.5397287488874662e-06, "epoch": 1.9334033664769257, "percentage": 38.67, "elapsed_time": "1:42:03", "remaining_time": "2:41:51", "throughput": 8712.74, "total_tokens": 53348768} +{"current_steps": 79145, "total_steps": 204665, "loss": 0.088, "lr": 1.5396569569171935e-06, "epoch": 1.9335255173087729, "percentage": 38.67, "elapsed_time": "1:42:03", "remaining_time": "2:41:51", "throughput": 8712.8, "total_tokens": 53352224} +{"current_steps": 79150, "total_steps": 204665, "loss": 0.0501, "lr": 1.5395851610223906e-06, "epoch": 1.93364766814062, "percentage": 38.67, "elapsed_time": "1:42:03", "remaining_time": "2:41:50", "throughput": 8712.79, "total_tokens": 53355104} +{"current_steps": 79155, "total_steps": 204665, "loss": 0.1531, "lr": 1.5395133612035794e-06, "epoch": 1.9337698189724672, "percentage": 38.68, "elapsed_time": "1:42:04", "remaining_time": "2:41:50", "throughput": 8712.82, "total_tokens": 53358304} +{"current_steps": 79160, "total_steps": 204665, "loss": 0.0222, "lr": 1.5394415574612816e-06, "epoch": 1.9338919698043142, "percentage": 38.68, "elapsed_time": "1:42:04", "remaining_time": "2:41:50", "throughput": 8712.92, "total_tokens": 53362016} +{"current_steps": 79165, "total_steps": 204665, "loss": 0.0414, "lr": 1.5393697497960196e-06, "epoch": 1.9340141206361614, "percentage": 38.68, "elapsed_time": "1:42:04", "remaining_time": "2:41:49", "throughput": 8713.0, "total_tokens": 53365536} +{"current_steps": 79170, "total_steps": 204665, "loss": 0.2279, "lr": 1.5392979382083163e-06, "epoch": 1.9341362714680086, "percentage": 38.68, "elapsed_time": "1:42:05", "remaining_time": "2:41:49", "throughput": 8713.12, "total_tokens": 53369440} +{"current_steps": 79175, "total_steps": 204665, "loss": 0.0723, "lr": 1.5392261226986926e-06, "epoch": 1.9342584222998558, "percentage": 38.69, "elapsed_time": "1:42:05", "remaining_time": "2:41:48", "throughput": 8713.14, "total_tokens": 53372576} +{"current_steps": 79180, "total_steps": 204665, "loss": 0.2502, "lr": 1.5391543032676721e-06, "epoch": 1.934380573131703, "percentage": 38.69, "elapsed_time": "1:42:05", "remaining_time": "2:41:48", "throughput": 8713.15, "total_tokens": 53375584} +{"current_steps": 79185, "total_steps": 204665, "loss": 0.0783, "lr": 1.5390824799157763e-06, "epoch": 1.9345027239635502, "percentage": 38.69, "elapsed_time": "1:42:06", "remaining_time": "2:41:47", "throughput": 8713.15, "total_tokens": 53378528} +{"current_steps": 79190, "total_steps": 204665, "loss": 0.0312, "lr": 1.5390106526435277e-06, "epoch": 1.9346248747953974, "percentage": 38.69, "elapsed_time": "1:42:06", "remaining_time": "2:41:47", "throughput": 8713.21, "total_tokens": 53381984} +{"current_steps": 79195, "total_steps": 204665, "loss": 0.0512, "lr": 1.5389388214514485e-06, "epoch": 1.9347470256272445, "percentage": 38.69, "elapsed_time": "1:42:06", "remaining_time": "2:41:46", "throughput": 8713.28, "total_tokens": 53385440} +{"current_steps": 79200, "total_steps": 204665, "loss": 0.0864, "lr": 1.5388669863400614e-06, "epoch": 1.9348691764590917, "percentage": 38.7, "elapsed_time": "1:42:07", "remaining_time": "2:41:46", "throughput": 8713.34, "total_tokens": 53388896} +{"current_steps": 79205, "total_steps": 204665, "loss": 0.036, "lr": 1.5387951473098883e-06, "epoch": 1.934991327290939, "percentage": 38.7, "elapsed_time": "1:42:07", "remaining_time": "2:41:46", "throughput": 8713.4, "total_tokens": 53392288} +{"current_steps": 79210, "total_steps": 204665, "loss": 0.2508, "lr": 1.5387233043614525e-06, "epoch": 1.9351134781227861, "percentage": 38.7, "elapsed_time": "1:42:07", "remaining_time": "2:41:45", "throughput": 8713.49, "total_tokens": 53395936} +{"current_steps": 79215, "total_steps": 204665, "loss": 0.0236, "lr": 1.5386514574952756e-06, "epoch": 1.9352356289546333, "percentage": 38.7, "elapsed_time": "1:42:08", "remaining_time": "2:41:45", "throughput": 8713.53, "total_tokens": 53399200} +{"current_steps": 79220, "total_steps": 204665, "loss": 0.0322, "lr": 1.5385796067118805e-06, "epoch": 1.9353577797864805, "percentage": 38.71, "elapsed_time": "1:42:08", "remaining_time": "2:41:44", "throughput": 8713.6, "total_tokens": 53402784} +{"current_steps": 79225, "total_steps": 204665, "loss": 0.1729, "lr": 1.5385077520117898e-06, "epoch": 1.9354799306183275, "percentage": 38.71, "elapsed_time": "1:42:09", "remaining_time": "2:41:44", "throughput": 8713.72, "total_tokens": 53406624} +{"current_steps": 79230, "total_steps": 204665, "loss": 0.2673, "lr": 1.5384358933955257e-06, "epoch": 1.9356020814501746, "percentage": 38.71, "elapsed_time": "1:42:09", "remaining_time": "2:41:43", "throughput": 8713.75, "total_tokens": 53409824} +{"current_steps": 79235, "total_steps": 204665, "loss": 0.0014, "lr": 1.5383640308636108e-06, "epoch": 1.9357242322820218, "percentage": 38.71, "elapsed_time": "1:42:09", "remaining_time": "2:41:43", "throughput": 8713.82, "total_tokens": 53413280} +{"current_steps": 79240, "total_steps": 204665, "loss": 0.0711, "lr": 1.5382921644165682e-06, "epoch": 1.935846383113869, "percentage": 38.72, "elapsed_time": "1:42:10", "remaining_time": "2:41:43", "throughput": 8713.95, "total_tokens": 53417312} +{"current_steps": 79245, "total_steps": 204665, "loss": 0.0745, "lr": 1.53822029405492e-06, "epoch": 1.9359685339457162, "percentage": 38.72, "elapsed_time": "1:42:10", "remaining_time": "2:41:42", "throughput": 8714.03, "total_tokens": 53420832} +{"current_steps": 79250, "total_steps": 204665, "loss": 0.0018, "lr": 1.5381484197791891e-06, "epoch": 1.9360906847775632, "percentage": 38.72, "elapsed_time": "1:42:10", "remaining_time": "2:41:42", "throughput": 8714.03, "total_tokens": 53423840} +{"current_steps": 79255, "total_steps": 204665, "loss": 0.1126, "lr": 1.5380765415898984e-06, "epoch": 1.9362128356094104, "percentage": 38.72, "elapsed_time": "1:42:11", "remaining_time": "2:41:41", "throughput": 8714.06, "total_tokens": 53426976} +{"current_steps": 79260, "total_steps": 204665, "loss": 0.0283, "lr": 1.53800465948757e-06, "epoch": 1.9363349864412576, "percentage": 38.73, "elapsed_time": "1:42:11", "remaining_time": "2:41:41", "throughput": 8714.09, "total_tokens": 53430176} +{"current_steps": 79265, "total_steps": 204665, "loss": 0.0509, "lr": 1.537932773472727e-06, "epoch": 1.9364571372731048, "percentage": 38.73, "elapsed_time": "1:42:11", "remaining_time": "2:41:40", "throughput": 8714.18, "total_tokens": 53433824} +{"current_steps": 79270, "total_steps": 204665, "loss": 0.0054, "lr": 1.5378608835458922e-06, "epoch": 1.936579288104952, "percentage": 38.73, "elapsed_time": "1:42:12", "remaining_time": "2:41:40", "throughput": 8714.21, "total_tokens": 53437024} +{"current_steps": 79275, "total_steps": 204665, "loss": 0.0606, "lr": 1.5377889897075886e-06, "epoch": 1.9367014389367991, "percentage": 38.73, "elapsed_time": "1:42:12", "remaining_time": "2:41:39", "throughput": 8714.24, "total_tokens": 53440224} +{"current_steps": 79280, "total_steps": 204665, "loss": 0.0886, "lr": 1.537717091958339e-06, "epoch": 1.9368235897686463, "percentage": 38.74, "elapsed_time": "1:42:12", "remaining_time": "2:41:39", "throughput": 8714.32, "total_tokens": 53443808} +{"current_steps": 79285, "total_steps": 204665, "loss": 0.1185, "lr": 1.5376451902986659e-06, "epoch": 1.9369457406004935, "percentage": 38.74, "elapsed_time": "1:42:13", "remaining_time": "2:41:38", "throughput": 8714.33, "total_tokens": 53446880} +{"current_steps": 79290, "total_steps": 204665, "loss": 0.0366, "lr": 1.5375732847290923e-06, "epoch": 1.9370678914323407, "percentage": 38.74, "elapsed_time": "1:42:13", "remaining_time": "2:41:38", "throughput": 8714.41, "total_tokens": 53450464} +{"current_steps": 79295, "total_steps": 204665, "loss": 0.0236, "lr": 1.5375013752501412e-06, "epoch": 1.937190042264188, "percentage": 38.74, "elapsed_time": "1:42:13", "remaining_time": "2:41:38", "throughput": 8714.46, "total_tokens": 53453792} +{"current_steps": 79300, "total_steps": 204665, "loss": 0.0492, "lr": 1.5374294618623354e-06, "epoch": 1.937312193096035, "percentage": 38.75, "elapsed_time": "1:42:14", "remaining_time": "2:41:37", "throughput": 8714.51, "total_tokens": 53457120} +{"current_steps": 79305, "total_steps": 204665, "loss": 0.043, "lr": 1.537357544566198e-06, "epoch": 1.9374343439278823, "percentage": 38.75, "elapsed_time": "1:42:14", "remaining_time": "2:41:37", "throughput": 8714.56, "total_tokens": 53460448} +{"current_steps": 79310, "total_steps": 204665, "loss": 0.0276, "lr": 1.537285623362252e-06, "epoch": 1.9375564947597295, "percentage": 38.75, "elapsed_time": "1:42:14", "remaining_time": "2:41:36", "throughput": 8714.61, "total_tokens": 53463776} +{"current_steps": 79315, "total_steps": 204665, "loss": 0.0755, "lr": 1.5372136982510203e-06, "epoch": 1.9376786455915764, "percentage": 38.75, "elapsed_time": "1:42:15", "remaining_time": "2:41:36", "throughput": 8714.67, "total_tokens": 53467232} +{"current_steps": 79320, "total_steps": 204665, "loss": 0.0909, "lr": 1.5371417692330267e-06, "epoch": 1.9378007964234236, "percentage": 38.76, "elapsed_time": "1:42:15", "remaining_time": "2:41:35", "throughput": 8714.74, "total_tokens": 53470752} +{"current_steps": 79325, "total_steps": 204665, "loss": 0.0852, "lr": 1.537069836308793e-06, "epoch": 1.9379229472552708, "percentage": 38.76, "elapsed_time": "1:42:16", "remaining_time": "2:41:35", "throughput": 8714.76, "total_tokens": 53473824} +{"current_steps": 79330, "total_steps": 204665, "loss": 0.1114, "lr": 1.5369978994788436e-06, "epoch": 1.938045098087118, "percentage": 38.76, "elapsed_time": "1:42:16", "remaining_time": "2:41:34", "throughput": 8714.73, "total_tokens": 53476512} +{"current_steps": 79335, "total_steps": 204665, "loss": 0.0344, "lr": 1.5369259587437006e-06, "epoch": 1.9381672489189652, "percentage": 38.76, "elapsed_time": "1:42:16", "remaining_time": "2:41:34", "throughput": 8714.74, "total_tokens": 53479520} +{"current_steps": 79340, "total_steps": 204665, "loss": 0.1046, "lr": 1.5368540141038876e-06, "epoch": 1.9382893997508122, "percentage": 38.77, "elapsed_time": "1:42:17", "remaining_time": "2:41:33", "throughput": 8714.77, "total_tokens": 53482656} +{"current_steps": 79345, "total_steps": 204665, "loss": 0.0904, "lr": 1.5367820655599283e-06, "epoch": 1.9384115505826593, "percentage": 38.77, "elapsed_time": "1:42:17", "remaining_time": "2:41:33", "throughput": 8714.85, "total_tokens": 53486240} +{"current_steps": 79350, "total_steps": 204665, "loss": 0.1344, "lr": 1.536710113112345e-06, "epoch": 1.9385337014145065, "percentage": 38.77, "elapsed_time": "1:42:17", "remaining_time": "2:41:33", "throughput": 8714.93, "total_tokens": 53489760} +{"current_steps": 79355, "total_steps": 204665, "loss": 0.2347, "lr": 1.5366381567616615e-06, "epoch": 1.9386558522463537, "percentage": 38.77, "elapsed_time": "1:42:18", "remaining_time": "2:41:32", "throughput": 8714.96, "total_tokens": 53492960} +{"current_steps": 79360, "total_steps": 204665, "loss": 0.0645, "lr": 1.5365661965084008e-06, "epoch": 1.938778003078201, "percentage": 38.78, "elapsed_time": "1:42:18", "remaining_time": "2:41:32", "throughput": 8715.0, "total_tokens": 53496224} +{"current_steps": 79365, "total_steps": 204665, "loss": 0.1529, "lr": 1.5364942323530868e-06, "epoch": 1.938900153910048, "percentage": 38.78, "elapsed_time": "1:42:18", "remaining_time": "2:41:31", "throughput": 8715.0, "total_tokens": 53499168} +{"current_steps": 79370, "total_steps": 204665, "loss": 0.1296, "lr": 1.536422264296242e-06, "epoch": 1.9390223047418953, "percentage": 38.78, "elapsed_time": "1:42:19", "remaining_time": "2:41:31", "throughput": 8715.01, "total_tokens": 53502176} +{"current_steps": 79375, "total_steps": 204665, "loss": 0.1098, "lr": 1.5363502923383906e-06, "epoch": 1.9391444555737425, "percentage": 38.78, "elapsed_time": "1:42:19", "remaining_time": "2:41:30", "throughput": 8715.02, "total_tokens": 53505120} +{"current_steps": 79380, "total_steps": 204665, "loss": 0.0626, "lr": 1.5362783164800554e-06, "epoch": 1.9392666064055897, "percentage": 38.79, "elapsed_time": "1:42:19", "remaining_time": "2:41:30", "throughput": 8715.04, "total_tokens": 53508256} +{"current_steps": 79385, "total_steps": 204665, "loss": 0.0417, "lr": 1.5362063367217603e-06, "epoch": 1.9393887572374369, "percentage": 38.79, "elapsed_time": "1:42:20", "remaining_time": "2:41:29", "throughput": 8715.1, "total_tokens": 53511648} +{"current_steps": 79390, "total_steps": 204665, "loss": 0.1514, "lr": 1.5361343530640283e-06, "epoch": 1.939510908069284, "percentage": 38.79, "elapsed_time": "1:42:20", "remaining_time": "2:41:29", "throughput": 8715.11, "total_tokens": 53514656} +{"current_steps": 79395, "total_steps": 204665, "loss": 0.0348, "lr": 1.536062365507383e-06, "epoch": 1.9396330589011312, "percentage": 38.79, "elapsed_time": "1:42:20", "remaining_time": "2:41:28", "throughput": 8715.12, "total_tokens": 53517664} +{"current_steps": 79400, "total_steps": 204665, "loss": 0.1085, "lr": 1.5359903740523481e-06, "epoch": 1.9397552097329784, "percentage": 38.8, "elapsed_time": "1:42:21", "remaining_time": "2:41:28", "throughput": 8715.17, "total_tokens": 53520992} +{"current_steps": 79405, "total_steps": 204665, "loss": 0.1096, "lr": 1.535918378699447e-06, "epoch": 1.9398773605648254, "percentage": 38.8, "elapsed_time": "1:42:21", "remaining_time": "2:41:28", "throughput": 8715.18, "total_tokens": 53524000} +{"current_steps": 79410, "total_steps": 204665, "loss": 0.1842, "lr": 1.5358463794492034e-06, "epoch": 1.9399995113966726, "percentage": 38.8, "elapsed_time": "1:42:21", "remaining_time": "2:41:27", "throughput": 8715.21, "total_tokens": 53527200} +{"current_steps": 79415, "total_steps": 204665, "loss": 0.0709, "lr": 1.5357743763021407e-06, "epoch": 1.9401216622285198, "percentage": 38.8, "elapsed_time": "1:42:22", "remaining_time": "2:41:27", "throughput": 8715.29, "total_tokens": 53530784} +{"current_steps": 79420, "total_steps": 204665, "loss": 0.0449, "lr": 1.5357023692587827e-06, "epoch": 1.940243813060367, "percentage": 38.8, "elapsed_time": "1:42:22", "remaining_time": "2:41:26", "throughput": 8715.33, "total_tokens": 53534048} +{"current_steps": 79425, "total_steps": 204665, "loss": 0.0918, "lr": 1.5356303583196528e-06, "epoch": 1.940365963892214, "percentage": 38.81, "elapsed_time": "1:42:22", "remaining_time": "2:41:26", "throughput": 8715.39, "total_tokens": 53537440} +{"current_steps": 79430, "total_steps": 204665, "loss": 0.068, "lr": 1.5355583434852749e-06, "epoch": 1.9404881147240611, "percentage": 38.81, "elapsed_time": "1:42:23", "remaining_time": "2:41:25", "throughput": 8715.35, "total_tokens": 53540064} +{"current_steps": 79435, "total_steps": 204665, "loss": 0.0767, "lr": 1.535486324756173e-06, "epoch": 1.9406102655559083, "percentage": 38.81, "elapsed_time": "1:42:23", "remaining_time": "2:41:25", "throughput": 8715.44, "total_tokens": 53543648} +{"current_steps": 79440, "total_steps": 204665, "loss": 0.0317, "lr": 1.5354143021328704e-06, "epoch": 1.9407324163877555, "percentage": 38.81, "elapsed_time": "1:42:23", "remaining_time": "2:41:24", "throughput": 8715.55, "total_tokens": 53547488} +{"current_steps": 79445, "total_steps": 204665, "loss": 0.0711, "lr": 1.5353422756158909e-06, "epoch": 1.9408545672196027, "percentage": 38.82, "elapsed_time": "1:42:24", "remaining_time": "2:41:24", "throughput": 8715.65, "total_tokens": 53551136} +{"current_steps": 79450, "total_steps": 204665, "loss": 0.0091, "lr": 1.5352702452057584e-06, "epoch": 1.94097671805145, "percentage": 38.82, "elapsed_time": "1:42:24", "remaining_time": "2:41:24", "throughput": 8715.65, "total_tokens": 53554144} +{"current_steps": 79455, "total_steps": 204665, "loss": 0.0815, "lr": 1.5351982109029964e-06, "epoch": 1.941098868883297, "percentage": 38.82, "elapsed_time": "1:42:24", "remaining_time": "2:41:23", "throughput": 8715.68, "total_tokens": 53557280} +{"current_steps": 79460, "total_steps": 204665, "loss": 0.0037, "lr": 1.5351261727081295e-06, "epoch": 1.9412210197151443, "percentage": 38.82, "elapsed_time": "1:42:25", "remaining_time": "2:41:23", "throughput": 8715.68, "total_tokens": 53560288} +{"current_steps": 79465, "total_steps": 204665, "loss": 0.1259, "lr": 1.5350541306216809e-06, "epoch": 1.9413431705469915, "percentage": 38.83, "elapsed_time": "1:42:25", "remaining_time": "2:41:22", "throughput": 8715.76, "total_tokens": 53563808} +{"current_steps": 79470, "total_steps": 204665, "loss": 0.001, "lr": 1.5349820846441748e-06, "epoch": 1.9414653213788386, "percentage": 38.83, "elapsed_time": "1:42:25", "remaining_time": "2:41:22", "throughput": 8715.85, "total_tokens": 53567392} +{"current_steps": 79475, "total_steps": 204665, "loss": 0.1426, "lr": 1.5349100347761353e-06, "epoch": 1.9415874722106858, "percentage": 38.83, "elapsed_time": "1:42:26", "remaining_time": "2:41:21", "throughput": 8715.85, "total_tokens": 53570400} +{"current_steps": 79480, "total_steps": 204665, "loss": 0.0804, "lr": 1.5348379810180858e-06, "epoch": 1.941709623042533, "percentage": 38.83, "elapsed_time": "1:42:26", "remaining_time": "2:41:21", "throughput": 8715.91, "total_tokens": 53573792} +{"current_steps": 79485, "total_steps": 204665, "loss": 0.0546, "lr": 1.5347659233705507e-06, "epoch": 1.9418317738743802, "percentage": 38.84, "elapsed_time": "1:42:27", "remaining_time": "2:41:20", "throughput": 8715.94, "total_tokens": 53576992} +{"current_steps": 79490, "total_steps": 204665, "loss": 0.1037, "lr": 1.534693861834054e-06, "epoch": 1.9419539247062274, "percentage": 38.84, "elapsed_time": "1:42:27", "remaining_time": "2:41:20", "throughput": 8715.99, "total_tokens": 53580256} +{"current_steps": 79495, "total_steps": 204665, "loss": 0.0331, "lr": 1.5346217964091198e-06, "epoch": 1.9420760755380744, "percentage": 38.84, "elapsed_time": "1:42:27", "remaining_time": "2:41:19", "throughput": 8716.12, "total_tokens": 53584224} +{"current_steps": 79500, "total_steps": 204665, "loss": 0.0263, "lr": 1.5345497270962724e-06, "epoch": 1.9421982263699216, "percentage": 38.84, "elapsed_time": "1:42:28", "remaining_time": "2:41:19", "throughput": 8716.19, "total_tokens": 53587744} +{"current_steps": 79505, "total_steps": 204665, "loss": 0.0533, "lr": 1.5344776538960353e-06, "epoch": 1.9423203772017688, "percentage": 38.85, "elapsed_time": "1:42:28", "remaining_time": "2:41:19", "throughput": 8716.22, "total_tokens": 53590880} +{"current_steps": 79510, "total_steps": 204665, "loss": 0.144, "lr": 1.534405576808933e-06, "epoch": 1.942442528033616, "percentage": 38.85, "elapsed_time": "1:42:28", "remaining_time": "2:41:18", "throughput": 8716.27, "total_tokens": 53594208} +{"current_steps": 79515, "total_steps": 204665, "loss": 0.0418, "lr": 1.5343334958354893e-06, "epoch": 1.942564678865463, "percentage": 38.85, "elapsed_time": "1:42:29", "remaining_time": "2:41:18", "throughput": 8716.35, "total_tokens": 53597792} +{"current_steps": 79520, "total_steps": 204665, "loss": 0.0889, "lr": 1.534261410976229e-06, "epoch": 1.94268682969731, "percentage": 38.85, "elapsed_time": "1:42:29", "remaining_time": "2:41:17", "throughput": 8716.36, "total_tokens": 53600864} +{"current_steps": 79525, "total_steps": 204665, "loss": 0.1442, "lr": 1.5341893222316759e-06, "epoch": 1.9428089805291573, "percentage": 38.86, "elapsed_time": "1:42:29", "remaining_time": "2:41:17", "throughput": 8716.43, "total_tokens": 53604320} +{"current_steps": 79530, "total_steps": 204665, "loss": 0.0782, "lr": 1.5341172296023545e-06, "epoch": 1.9429311313610045, "percentage": 38.86, "elapsed_time": "1:42:30", "remaining_time": "2:41:16", "throughput": 8716.51, "total_tokens": 53607776} +{"current_steps": 79535, "total_steps": 204665, "loss": 0.0684, "lr": 1.5340451330887891e-06, "epoch": 1.9430532821928517, "percentage": 38.86, "elapsed_time": "1:42:30", "remaining_time": "2:41:16", "throughput": 8716.61, "total_tokens": 53611552} +{"current_steps": 79540, "total_steps": 204665, "loss": 0.1107, "lr": 1.5339730326915038e-06, "epoch": 1.9431754330246989, "percentage": 38.86, "elapsed_time": "1:42:30", "remaining_time": "2:41:15", "throughput": 8716.76, "total_tokens": 53615648} +{"current_steps": 79545, "total_steps": 204665, "loss": 0.1097, "lr": 1.5339009284110228e-06, "epoch": 1.943297583856546, "percentage": 38.87, "elapsed_time": "1:42:31", "remaining_time": "2:41:15", "throughput": 8716.79, "total_tokens": 53618848} +{"current_steps": 79550, "total_steps": 204665, "loss": 0.0017, "lr": 1.5338288202478706e-06, "epoch": 1.9434197346883932, "percentage": 38.87, "elapsed_time": "1:42:31", "remaining_time": "2:41:15", "throughput": 8716.87, "total_tokens": 53622432} +{"current_steps": 79555, "total_steps": 204665, "loss": 0.0393, "lr": 1.5337567082025714e-06, "epoch": 1.9435418855202404, "percentage": 38.87, "elapsed_time": "1:42:31", "remaining_time": "2:41:14", "throughput": 8716.95, "total_tokens": 53626016} +{"current_steps": 79560, "total_steps": 204665, "loss": 0.1167, "lr": 1.5336845922756502e-06, "epoch": 1.9436640363520876, "percentage": 38.87, "elapsed_time": "1:42:32", "remaining_time": "2:41:14", "throughput": 8717.06, "total_tokens": 53629792} +{"current_steps": 79565, "total_steps": 204665, "loss": 0.012, "lr": 1.5336124724676314e-06, "epoch": 1.9437861871839348, "percentage": 38.88, "elapsed_time": "1:42:32", "remaining_time": "2:41:13", "throughput": 8717.06, "total_tokens": 53632736} +{"current_steps": 79570, "total_steps": 204665, "loss": 0.0345, "lr": 1.533540348779039e-06, "epoch": 1.943908338015782, "percentage": 38.88, "elapsed_time": "1:42:32", "remaining_time": "2:41:13", "throughput": 8717.13, "total_tokens": 53636192} +{"current_steps": 79575, "total_steps": 204665, "loss": 0.0412, "lr": 1.5334682212103973e-06, "epoch": 1.9440304888476292, "percentage": 38.88, "elapsed_time": "1:42:33", "remaining_time": "2:41:12", "throughput": 8717.23, "total_tokens": 53639968} +{"current_steps": 79580, "total_steps": 204665, "loss": 0.0968, "lr": 1.5333960897622313e-06, "epoch": 1.9441526396794764, "percentage": 38.88, "elapsed_time": "1:42:33", "remaining_time": "2:41:12", "throughput": 8717.26, "total_tokens": 53643104} +{"current_steps": 79585, "total_steps": 204665, "loss": 0.0289, "lr": 1.5333239544350656e-06, "epoch": 1.9442747905113233, "percentage": 38.89, "elapsed_time": "1:42:34", "remaining_time": "2:41:11", "throughput": 8717.23, "total_tokens": 53645856} +{"current_steps": 79590, "total_steps": 204665, "loss": 0.0682, "lr": 1.533251815229425e-06, "epoch": 1.9443969413431705, "percentage": 38.89, "elapsed_time": "1:42:34", "remaining_time": "2:41:11", "throughput": 8717.35, "total_tokens": 53649696} +{"current_steps": 79595, "total_steps": 204665, "loss": 0.2259, "lr": 1.5331796721458332e-06, "epoch": 1.9445190921750177, "percentage": 38.89, "elapsed_time": "1:42:34", "remaining_time": "2:41:11", "throughput": 8717.42, "total_tokens": 53653152} +{"current_steps": 79600, "total_steps": 204665, "loss": 0.1408, "lr": 1.5331075251848159e-06, "epoch": 1.944641243006865, "percentage": 38.89, "elapsed_time": "1:42:35", "remaining_time": "2:41:10", "throughput": 8717.49, "total_tokens": 53656672} +{"current_steps": 79605, "total_steps": 204665, "loss": 0.1304, "lr": 1.5330353743468968e-06, "epoch": 1.9447633938387119, "percentage": 38.9, "elapsed_time": "1:42:35", "remaining_time": "2:41:10", "throughput": 8717.5, "total_tokens": 53659744} +{"current_steps": 79610, "total_steps": 204665, "loss": 0.0999, "lr": 1.5329632196326015e-06, "epoch": 1.944885544670559, "percentage": 38.9, "elapsed_time": "1:42:35", "remaining_time": "2:41:09", "throughput": 8717.55, "total_tokens": 53663136} +{"current_steps": 79615, "total_steps": 204665, "loss": 0.0075, "lr": 1.532891061042454e-06, "epoch": 1.9450076955024063, "percentage": 38.9, "elapsed_time": "1:42:36", "remaining_time": "2:41:09", "throughput": 8717.57, "total_tokens": 53666208} +{"current_steps": 79620, "total_steps": 204665, "loss": 0.0496, "lr": 1.5328188985769795e-06, "epoch": 1.9451298463342535, "percentage": 38.9, "elapsed_time": "1:42:36", "remaining_time": "2:41:08", "throughput": 8717.65, "total_tokens": 53669792} +{"current_steps": 79625, "total_steps": 204665, "loss": 0.0075, "lr": 1.5327467322367028e-06, "epoch": 1.9452519971661006, "percentage": 38.91, "elapsed_time": "1:42:36", "remaining_time": "2:41:08", "throughput": 8717.73, "total_tokens": 53673312} +{"current_steps": 79630, "total_steps": 204665, "loss": 0.0013, "lr": 1.5326745620221484e-06, "epoch": 1.9453741479979478, "percentage": 38.91, "elapsed_time": "1:42:37", "remaining_time": "2:41:07", "throughput": 8717.8, "total_tokens": 53676832} +{"current_steps": 79635, "total_steps": 204665, "loss": 0.1459, "lr": 1.5326023879338411e-06, "epoch": 1.945496298829795, "percentage": 38.91, "elapsed_time": "1:42:37", "remaining_time": "2:41:07", "throughput": 8717.83, "total_tokens": 53679968} +{"current_steps": 79640, "total_steps": 204665, "loss": 0.1007, "lr": 1.5325302099723065e-06, "epoch": 1.9456184496616422, "percentage": 38.91, "elapsed_time": "1:42:37", "remaining_time": "2:41:07", "throughput": 8717.91, "total_tokens": 53683488} +{"current_steps": 79645, "total_steps": 204665, "loss": 0.079, "lr": 1.5324580281380689e-06, "epoch": 1.9457406004934894, "percentage": 38.91, "elapsed_time": "1:42:38", "remaining_time": "2:41:06", "throughput": 8717.92, "total_tokens": 53686496} +{"current_steps": 79650, "total_steps": 204665, "loss": 0.0798, "lr": 1.5323858424316529e-06, "epoch": 1.9458627513253366, "percentage": 38.92, "elapsed_time": "1:42:38", "remaining_time": "2:41:06", "throughput": 8718.07, "total_tokens": 53690592} +{"current_steps": 79655, "total_steps": 204665, "loss": 0.0389, "lr": 1.5323136528535842e-06, "epoch": 1.9459849021571838, "percentage": 38.92, "elapsed_time": "1:42:38", "remaining_time": "2:41:05", "throughput": 8718.12, "total_tokens": 53693920} +{"current_steps": 79660, "total_steps": 204665, "loss": 0.2223, "lr": 1.5322414594043874e-06, "epoch": 1.946107052989031, "percentage": 38.92, "elapsed_time": "1:42:39", "remaining_time": "2:41:05", "throughput": 8718.18, "total_tokens": 53697376} +{"current_steps": 79665, "total_steps": 204665, "loss": 0.1392, "lr": 1.5321692620845875e-06, "epoch": 1.9462292038208782, "percentage": 38.92, "elapsed_time": "1:42:39", "remaining_time": "2:41:04", "throughput": 8718.24, "total_tokens": 53700768} +{"current_steps": 79670, "total_steps": 204665, "loss": 0.1499, "lr": 1.5320970608947093e-06, "epoch": 1.9463513546527251, "percentage": 38.93, "elapsed_time": "1:42:39", "remaining_time": "2:41:04", "throughput": 8718.28, "total_tokens": 53704032} +{"current_steps": 79675, "total_steps": 204665, "loss": 0.1676, "lr": 1.5320248558352784e-06, "epoch": 1.9464735054845723, "percentage": 38.93, "elapsed_time": "1:42:40", "remaining_time": "2:41:03", "throughput": 8718.33, "total_tokens": 53707360} +{"current_steps": 79680, "total_steps": 204665, "loss": 0.0718, "lr": 1.5319526469068196e-06, "epoch": 1.9465956563164195, "percentage": 38.93, "elapsed_time": "1:42:40", "remaining_time": "2:41:03", "throughput": 8718.37, "total_tokens": 53710560} +{"current_steps": 79685, "total_steps": 204665, "loss": 0.1137, "lr": 1.5318804341098583e-06, "epoch": 1.9467178071482667, "percentage": 38.93, "elapsed_time": "1:42:40", "remaining_time": "2:41:03", "throughput": 8718.37, "total_tokens": 53713504} +{"current_steps": 79690, "total_steps": 204665, "loss": 0.1201, "lr": 1.5318082174449192e-06, "epoch": 1.9468399579801139, "percentage": 38.94, "elapsed_time": "1:42:41", "remaining_time": "2:41:02", "throughput": 8718.4, "total_tokens": 53716704} +{"current_steps": 79695, "total_steps": 204665, "loss": 0.0778, "lr": 1.5317359969125279e-06, "epoch": 1.9469621088119609, "percentage": 38.94, "elapsed_time": "1:42:41", "remaining_time": "2:41:02", "throughput": 8718.52, "total_tokens": 53720480} +{"current_steps": 79700, "total_steps": 204665, "loss": 0.1615, "lr": 1.5316637725132094e-06, "epoch": 1.947084259643808, "percentage": 38.94, "elapsed_time": "1:42:41", "remaining_time": "2:41:01", "throughput": 8718.54, "total_tokens": 53723616} +{"current_steps": 79705, "total_steps": 204665, "loss": 0.0031, "lr": 1.5315915442474887e-06, "epoch": 1.9472064104756552, "percentage": 38.94, "elapsed_time": "1:42:42", "remaining_time": "2:41:01", "throughput": 8718.57, "total_tokens": 53726816} +{"current_steps": 79710, "total_steps": 204665, "loss": 0.1203, "lr": 1.5315193121158915e-06, "epoch": 1.9473285613075024, "percentage": 38.95, "elapsed_time": "1:42:42", "remaining_time": "2:41:00", "throughput": 8718.62, "total_tokens": 53730080} +{"current_steps": 79715, "total_steps": 204665, "loss": 0.0737, "lr": 1.5314470761189429e-06, "epoch": 1.9474507121393496, "percentage": 38.95, "elapsed_time": "1:42:43", "remaining_time": "2:41:00", "throughput": 8718.63, "total_tokens": 53733088} +{"current_steps": 79720, "total_steps": 204665, "loss": 0.2403, "lr": 1.5313748362571681e-06, "epoch": 1.9475728629711968, "percentage": 38.95, "elapsed_time": "1:42:43", "remaining_time": "2:40:59", "throughput": 8718.69, "total_tokens": 53736544} +{"current_steps": 79725, "total_steps": 204665, "loss": 0.0776, "lr": 1.5313025925310928e-06, "epoch": 1.947695013803044, "percentage": 38.95, "elapsed_time": "1:42:43", "remaining_time": "2:40:59", "throughput": 8718.71, "total_tokens": 53739680} +{"current_steps": 79730, "total_steps": 204665, "loss": 0.0806, "lr": 1.5312303449412419e-06, "epoch": 1.9478171646348912, "percentage": 38.96, "elapsed_time": "1:42:44", "remaining_time": "2:40:58", "throughput": 8718.77, "total_tokens": 53743072} +{"current_steps": 79735, "total_steps": 204665, "loss": 0.0024, "lr": 1.531158093488141e-06, "epoch": 1.9479393154667384, "percentage": 38.96, "elapsed_time": "1:42:44", "remaining_time": "2:40:58", "throughput": 8718.78, "total_tokens": 53746144} +{"current_steps": 79740, "total_steps": 204665, "loss": 0.0449, "lr": 1.5310858381723154e-06, "epoch": 1.9480614662985856, "percentage": 38.96, "elapsed_time": "1:42:44", "remaining_time": "2:40:58", "throughput": 8718.82, "total_tokens": 53749344} +{"current_steps": 79745, "total_steps": 204665, "loss": 0.0016, "lr": 1.5310135789942915e-06, "epoch": 1.9481836171304328, "percentage": 38.96, "elapsed_time": "1:42:45", "remaining_time": "2:40:57", "throughput": 8718.9, "total_tokens": 53752928} +{"current_steps": 79750, "total_steps": 204665, "loss": 0.0685, "lr": 1.5309413159545935e-06, "epoch": 1.94830576796228, "percentage": 38.97, "elapsed_time": "1:42:45", "remaining_time": "2:40:57", "throughput": 8718.99, "total_tokens": 53756576} +{"current_steps": 79755, "total_steps": 204665, "loss": 0.1998, "lr": 1.5308690490537477e-06, "epoch": 1.9484279187941271, "percentage": 38.97, "elapsed_time": "1:42:45", "remaining_time": "2:40:56", "throughput": 8719.03, "total_tokens": 53759776} +{"current_steps": 79760, "total_steps": 204665, "loss": 0.1284, "lr": 1.530796778292279e-06, "epoch": 1.948550069625974, "percentage": 38.97, "elapsed_time": "1:42:46", "remaining_time": "2:40:56", "throughput": 8719.07, "total_tokens": 53763104} +{"current_steps": 79765, "total_steps": 204665, "loss": 0.0915, "lr": 1.5307245036707136e-06, "epoch": 1.9486722204578213, "percentage": 38.97, "elapsed_time": "1:42:46", "remaining_time": "2:40:55", "throughput": 8719.11, "total_tokens": 53766304} +{"current_steps": 79770, "total_steps": 204665, "loss": 0.2308, "lr": 1.5306522251895766e-06, "epoch": 1.9487943712896685, "percentage": 38.98, "elapsed_time": "1:42:46", "remaining_time": "2:40:55", "throughput": 8719.16, "total_tokens": 53769632} +{"current_steps": 79775, "total_steps": 204665, "loss": 0.0016, "lr": 1.5305799428493944e-06, "epoch": 1.9489165221215157, "percentage": 38.98, "elapsed_time": "1:42:47", "remaining_time": "2:40:54", "throughput": 8719.24, "total_tokens": 53773216} +{"current_steps": 79780, "total_steps": 204665, "loss": 0.0651, "lr": 1.5305076566506918e-06, "epoch": 1.9490386729533629, "percentage": 38.98, "elapsed_time": "1:42:47", "remaining_time": "2:40:54", "throughput": 8719.29, "total_tokens": 53776544} +{"current_steps": 79785, "total_steps": 204665, "loss": 0.0341, "lr": 1.530435366593995e-06, "epoch": 1.9491608237852098, "percentage": 38.98, "elapsed_time": "1:42:47", "remaining_time": "2:40:54", "throughput": 8719.38, "total_tokens": 53780192} +{"current_steps": 79790, "total_steps": 204665, "loss": 0.0921, "lr": 1.5303630726798294e-06, "epoch": 1.949282974617057, "percentage": 38.99, "elapsed_time": "1:42:48", "remaining_time": "2:40:53", "throughput": 8719.43, "total_tokens": 53783520} +{"current_steps": 79795, "total_steps": 204665, "loss": 0.0783, "lr": 1.5302907749087209e-06, "epoch": 1.9494051254489042, "percentage": 38.99, "elapsed_time": "1:42:48", "remaining_time": "2:40:53", "throughput": 8719.58, "total_tokens": 53787616} +{"current_steps": 79800, "total_steps": 204665, "loss": 0.0736, "lr": 1.5302184732811952e-06, "epoch": 1.9495272762807514, "percentage": 38.99, "elapsed_time": "1:42:48", "remaining_time": "2:40:52", "throughput": 8719.65, "total_tokens": 53791072} +{"current_steps": 79805, "total_steps": 204665, "loss": 0.0424, "lr": 1.5301461677977782e-06, "epoch": 1.9496494271125986, "percentage": 38.99, "elapsed_time": "1:42:49", "remaining_time": "2:40:52", "throughput": 8719.71, "total_tokens": 53794528} +{"current_steps": 79810, "total_steps": 204665, "loss": 0.0995, "lr": 1.530073858458996e-06, "epoch": 1.9497715779444458, "percentage": 39.0, "elapsed_time": "1:42:49", "remaining_time": "2:40:51", "throughput": 8719.79, "total_tokens": 53798048} +{"current_steps": 79815, "total_steps": 204665, "loss": 0.045, "lr": 1.5300015452653737e-06, "epoch": 1.949893728776293, "percentage": 39.0, "elapsed_time": "1:42:49", "remaining_time": "2:40:51", "throughput": 8719.87, "total_tokens": 53801568} +{"current_steps": 79820, "total_steps": 204665, "loss": 0.0998, "lr": 1.529929228217438e-06, "epoch": 1.9500158796081402, "percentage": 39.0, "elapsed_time": "1:42:50", "remaining_time": "2:40:50", "throughput": 8719.9, "total_tokens": 53804768} +{"current_steps": 79825, "total_steps": 204665, "loss": 0.1129, "lr": 1.5298569073157138e-06, "epoch": 1.9501380304399873, "percentage": 39.0, "elapsed_time": "1:42:50", "remaining_time": "2:40:50", "throughput": 8719.93, "total_tokens": 53807968} +{"current_steps": 79830, "total_steps": 204665, "loss": 0.0385, "lr": 1.529784582560728e-06, "epoch": 1.9502601812718345, "percentage": 39.01, "elapsed_time": "1:42:51", "remaining_time": "2:40:50", "throughput": 8720.03, "total_tokens": 53811680} +{"current_steps": 79835, "total_steps": 204665, "loss": 0.1016, "lr": 1.5297122539530061e-06, "epoch": 1.9503823321036817, "percentage": 39.01, "elapsed_time": "1:42:51", "remaining_time": "2:40:49", "throughput": 8720.15, "total_tokens": 53815520} +{"current_steps": 79840, "total_steps": 204665, "loss": 0.1642, "lr": 1.5296399214930746e-06, "epoch": 1.950504482935529, "percentage": 39.01, "elapsed_time": "1:42:51", "remaining_time": "2:40:49", "throughput": 8720.18, "total_tokens": 53818784} +{"current_steps": 79845, "total_steps": 204665, "loss": 0.1013, "lr": 1.529567585181459e-06, "epoch": 1.950626633767376, "percentage": 39.01, "elapsed_time": "1:42:52", "remaining_time": "2:40:48", "throughput": 8720.23, "total_tokens": 53822048} +{"current_steps": 79850, "total_steps": 204665, "loss": 0.0947, "lr": 1.529495245018685e-06, "epoch": 1.950748784599223, "percentage": 39.01, "elapsed_time": "1:42:52", "remaining_time": "2:40:48", "throughput": 8720.23, "total_tokens": 53825056} +{"current_steps": 79855, "total_steps": 204665, "loss": 0.0969, "lr": 1.5294229010052799e-06, "epoch": 1.9508709354310703, "percentage": 39.02, "elapsed_time": "1:42:52", "remaining_time": "2:40:47", "throughput": 8720.29, "total_tokens": 53828448} +{"current_steps": 79860, "total_steps": 204665, "loss": 0.0795, "lr": 1.5293505531417686e-06, "epoch": 1.9509930862629175, "percentage": 39.02, "elapsed_time": "1:42:53", "remaining_time": "2:40:47", "throughput": 8720.33, "total_tokens": 53831648} +{"current_steps": 79865, "total_steps": 204665, "loss": 0.1232, "lr": 1.5292782014286778e-06, "epoch": 1.9511152370947646, "percentage": 39.02, "elapsed_time": "1:42:53", "remaining_time": "2:40:46", "throughput": 8720.35, "total_tokens": 53834720} +{"current_steps": 79870, "total_steps": 204665, "loss": 0.0317, "lr": 1.5292058458665336e-06, "epoch": 1.9512373879266118, "percentage": 39.02, "elapsed_time": "1:42:53", "remaining_time": "2:40:46", "throughput": 8720.35, "total_tokens": 53837664} +{"current_steps": 79875, "total_steps": 204665, "loss": 0.1238, "lr": 1.5291334864558621e-06, "epoch": 1.9513595387584588, "percentage": 39.03, "elapsed_time": "1:42:54", "remaining_time": "2:40:45", "throughput": 8720.39, "total_tokens": 53840928} +{"current_steps": 79880, "total_steps": 204665, "loss": 0.1142, "lr": 1.5290611231971895e-06, "epoch": 1.951481689590306, "percentage": 39.03, "elapsed_time": "1:42:54", "remaining_time": "2:40:45", "throughput": 8720.47, "total_tokens": 53844576} +{"current_steps": 79885, "total_steps": 204665, "loss": 0.0476, "lr": 1.5289887560910422e-06, "epoch": 1.9516038404221532, "percentage": 39.03, "elapsed_time": "1:42:54", "remaining_time": "2:40:45", "throughput": 8720.54, "total_tokens": 53848032} +{"current_steps": 79890, "total_steps": 204665, "loss": 0.1525, "lr": 1.528916385137946e-06, "epoch": 1.9517259912540004, "percentage": 39.03, "elapsed_time": "1:42:55", "remaining_time": "2:40:44", "throughput": 8720.58, "total_tokens": 53851296} +{"current_steps": 79895, "total_steps": 204665, "loss": 0.1233, "lr": 1.528844010338428e-06, "epoch": 1.9518481420858476, "percentage": 39.04, "elapsed_time": "1:42:55", "remaining_time": "2:40:44", "throughput": 8720.61, "total_tokens": 53854496} +{"current_steps": 79900, "total_steps": 204665, "loss": 0.1436, "lr": 1.5287716316930146e-06, "epoch": 1.9519702929176947, "percentage": 39.04, "elapsed_time": "1:42:55", "remaining_time": "2:40:43", "throughput": 8720.69, "total_tokens": 53858080} +{"current_steps": 79905, "total_steps": 204665, "loss": 0.1101, "lr": 1.528699249202231e-06, "epoch": 1.952092443749542, "percentage": 39.04, "elapsed_time": "1:42:56", "remaining_time": "2:40:43", "throughput": 8720.75, "total_tokens": 53861472} +{"current_steps": 79910, "total_steps": 204665, "loss": 0.1607, "lr": 1.5286268628666044e-06, "epoch": 1.9522145945813891, "percentage": 39.04, "elapsed_time": "1:42:56", "remaining_time": "2:40:42", "throughput": 8720.75, "total_tokens": 53864416} +{"current_steps": 79915, "total_steps": 204665, "loss": 0.0703, "lr": 1.5285544726866611e-06, "epoch": 1.9523367454132363, "percentage": 39.05, "elapsed_time": "1:42:56", "remaining_time": "2:40:42", "throughput": 8720.8, "total_tokens": 53867808} +{"current_steps": 79920, "total_steps": 204665, "loss": 0.1685, "lr": 1.5284820786629274e-06, "epoch": 1.9524588962450835, "percentage": 39.05, "elapsed_time": "1:42:57", "remaining_time": "2:40:41", "throughput": 8720.8, "total_tokens": 53870752} +{"current_steps": 79925, "total_steps": 204665, "loss": 0.0704, "lr": 1.52840968079593e-06, "epoch": 1.9525810470769307, "percentage": 39.05, "elapsed_time": "1:42:57", "remaining_time": "2:40:41", "throughput": 8720.84, "total_tokens": 53873952} +{"current_steps": 79930, "total_steps": 204665, "loss": 0.0026, "lr": 1.528337279086195e-06, "epoch": 1.9527031979087779, "percentage": 39.05, "elapsed_time": "1:42:57", "remaining_time": "2:40:41", "throughput": 8720.94, "total_tokens": 53877664} +{"current_steps": 79935, "total_steps": 204665, "loss": 0.0327, "lr": 1.5282648735342495e-06, "epoch": 1.952825348740625, "percentage": 39.06, "elapsed_time": "1:42:58", "remaining_time": "2:40:40", "throughput": 8720.96, "total_tokens": 53880800} +{"current_steps": 79940, "total_steps": 204665, "loss": 0.0877, "lr": 1.5281924641406198e-06, "epoch": 1.952947499572472, "percentage": 39.06, "elapsed_time": "1:42:58", "remaining_time": "2:40:40", "throughput": 8721.12, "total_tokens": 53884960} +{"current_steps": 79945, "total_steps": 204665, "loss": 0.0775, "lr": 1.5281200509058322e-06, "epoch": 1.9530696504043192, "percentage": 39.06, "elapsed_time": "1:42:59", "remaining_time": "2:40:39", "throughput": 8721.13, "total_tokens": 53887968} +{"current_steps": 79950, "total_steps": 204665, "loss": 0.1576, "lr": 1.5280476338304139e-06, "epoch": 1.9531918012361664, "percentage": 39.06, "elapsed_time": "1:42:59", "remaining_time": "2:40:39", "throughput": 8721.15, "total_tokens": 53891168} +{"current_steps": 79955, "total_steps": 204665, "loss": 0.0053, "lr": 1.527975212914891e-06, "epoch": 1.9533139520680136, "percentage": 39.07, "elapsed_time": "1:42:59", "remaining_time": "2:40:38", "throughput": 8721.21, "total_tokens": 53894560} +{"current_steps": 79960, "total_steps": 204665, "loss": 0.0029, "lr": 1.5279027881597904e-06, "epoch": 1.9534361028998606, "percentage": 39.07, "elapsed_time": "1:43:00", "remaining_time": "2:40:38", "throughput": 8721.25, "total_tokens": 53897824} +{"current_steps": 79965, "total_steps": 204665, "loss": 0.0022, "lr": 1.5278303595656384e-06, "epoch": 1.9535582537317078, "percentage": 39.07, "elapsed_time": "1:43:00", "remaining_time": "2:40:37", "throughput": 8721.36, "total_tokens": 53901600} +{"current_steps": 79970, "total_steps": 204665, "loss": 0.0011, "lr": 1.5277579271329623e-06, "epoch": 1.953680404563555, "percentage": 39.07, "elapsed_time": "1:43:00", "remaining_time": "2:40:37", "throughput": 8721.42, "total_tokens": 53904992} +{"current_steps": 79975, "total_steps": 204665, "loss": 0.0596, "lr": 1.5276854908622887e-06, "epoch": 1.9538025553954022, "percentage": 39.08, "elapsed_time": "1:43:01", "remaining_time": "2:40:37", "throughput": 8721.43, "total_tokens": 53908000} +{"current_steps": 79980, "total_steps": 204665, "loss": 0.0008, "lr": 1.527613050754144e-06, "epoch": 1.9539247062272493, "percentage": 39.08, "elapsed_time": "1:43:01", "remaining_time": "2:40:36", "throughput": 8721.47, "total_tokens": 53911328} +{"current_steps": 79985, "total_steps": 204665, "loss": 0.2629, "lr": 1.5275406068090555e-06, "epoch": 1.9540468570590965, "percentage": 39.08, "elapsed_time": "1:43:01", "remaining_time": "2:40:36", "throughput": 8721.49, "total_tokens": 53914464} +{"current_steps": 79990, "total_steps": 204665, "loss": 0.0013, "lr": 1.5274681590275495e-06, "epoch": 1.9541690078909437, "percentage": 39.08, "elapsed_time": "1:43:02", "remaining_time": "2:40:35", "throughput": 8721.55, "total_tokens": 53917856} +{"current_steps": 79995, "total_steps": 204665, "loss": 0.0438, "lr": 1.5273957074101539e-06, "epoch": 1.954291158722791, "percentage": 39.09, "elapsed_time": "1:43:02", "remaining_time": "2:40:35", "throughput": 8721.62, "total_tokens": 53921376} +{"current_steps": 80000, "total_steps": 204665, "loss": 0.074, "lr": 1.5273232519573943e-06, "epoch": 1.954413309554638, "percentage": 39.09, "elapsed_time": "1:43:02", "remaining_time": "2:40:34", "throughput": 8721.64, "total_tokens": 53924512} +{"current_steps": 80005, "total_steps": 204665, "loss": 0.0981, "lr": 1.5272507926697983e-06, "epoch": 1.9545354603864853, "percentage": 39.09, "elapsed_time": "1:43:03", "remaining_time": "2:40:34", "throughput": 8721.69, "total_tokens": 53927904} +{"current_steps": 80010, "total_steps": 204665, "loss": 0.21, "lr": 1.527178329547893e-06, "epoch": 1.9546576112183325, "percentage": 39.09, "elapsed_time": "1:43:03", "remaining_time": "2:40:33", "throughput": 8721.75, "total_tokens": 53931296} +{"current_steps": 80015, "total_steps": 204665, "loss": 0.0498, "lr": 1.5271058625922044e-06, "epoch": 1.9547797620501797, "percentage": 39.1, "elapsed_time": "1:43:03", "remaining_time": "2:40:33", "throughput": 8721.8, "total_tokens": 53934688} +{"current_steps": 80020, "total_steps": 204665, "loss": 0.1497, "lr": 1.5270333918032607e-06, "epoch": 1.9549019128820269, "percentage": 39.1, "elapsed_time": "1:43:04", "remaining_time": "2:40:33", "throughput": 8721.85, "total_tokens": 53938016} +{"current_steps": 80025, "total_steps": 204665, "loss": 0.0401, "lr": 1.5269609171815884e-06, "epoch": 1.955024063713874, "percentage": 39.1, "elapsed_time": "1:43:04", "remaining_time": "2:40:32", "throughput": 8721.85, "total_tokens": 53940960} +{"current_steps": 80030, "total_steps": 204665, "loss": 0.1183, "lr": 1.5268884387277143e-06, "epoch": 1.955146214545721, "percentage": 39.1, "elapsed_time": "1:43:04", "remaining_time": "2:40:32", "throughput": 8721.94, "total_tokens": 53944608} +{"current_steps": 80035, "total_steps": 204665, "loss": 0.1696, "lr": 1.5268159564421658e-06, "epoch": 1.9552683653775682, "percentage": 39.11, "elapsed_time": "1:43:05", "remaining_time": "2:40:31", "throughput": 8721.94, "total_tokens": 53947552} +{"current_steps": 80040, "total_steps": 204665, "loss": 0.0395, "lr": 1.5267434703254701e-06, "epoch": 1.9553905162094154, "percentage": 39.11, "elapsed_time": "1:43:05", "remaining_time": "2:40:31", "throughput": 8721.99, "total_tokens": 53950880} +{"current_steps": 80045, "total_steps": 204665, "loss": 0.065, "lr": 1.5266709803781544e-06, "epoch": 1.9555126670412626, "percentage": 39.11, "elapsed_time": "1:43:05", "remaining_time": "2:40:30", "throughput": 8722.06, "total_tokens": 53954336} +{"current_steps": 80050, "total_steps": 204665, "loss": 0.0143, "lr": 1.5265984866007453e-06, "epoch": 1.9556348178731096, "percentage": 39.11, "elapsed_time": "1:43:06", "remaining_time": "2:40:30", "throughput": 8722.11, "total_tokens": 53957728} +{"current_steps": 80055, "total_steps": 204665, "loss": 0.0829, "lr": 1.5265259889937708e-06, "epoch": 1.9557569687049567, "percentage": 39.12, "elapsed_time": "1:43:06", "remaining_time": "2:40:29", "throughput": 8722.16, "total_tokens": 53961056} +{"current_steps": 80060, "total_steps": 204665, "loss": 0.1419, "lr": 1.5264534875577575e-06, "epoch": 1.955879119536804, "percentage": 39.12, "elapsed_time": "1:43:07", "remaining_time": "2:40:29", "throughput": 8722.2, "total_tokens": 53964320} +{"current_steps": 80065, "total_steps": 204665, "loss": 0.1244, "lr": 1.526380982293233e-06, "epoch": 1.9560012703686511, "percentage": 39.12, "elapsed_time": "1:43:07", "remaining_time": "2:40:28", "throughput": 8722.2, "total_tokens": 53967328} +{"current_steps": 80070, "total_steps": 204665, "loss": 0.0039, "lr": 1.5263084732007242e-06, "epoch": 1.9561234212004983, "percentage": 39.12, "elapsed_time": "1:43:07", "remaining_time": "2:40:28", "throughput": 8722.22, "total_tokens": 53970400} +{"current_steps": 80075, "total_steps": 204665, "loss": 0.0678, "lr": 1.5262359602807583e-06, "epoch": 1.9562455720323455, "percentage": 39.12, "elapsed_time": "1:43:08", "remaining_time": "2:40:28", "throughput": 8722.24, "total_tokens": 53973536} +{"current_steps": 80080, "total_steps": 204665, "loss": 0.0714, "lr": 1.5261634435338632e-06, "epoch": 1.9563677228641927, "percentage": 39.13, "elapsed_time": "1:43:08", "remaining_time": "2:40:27", "throughput": 8722.28, "total_tokens": 53976736} +{"current_steps": 80085, "total_steps": 204665, "loss": 0.0319, "lr": 1.526090922960566e-06, "epoch": 1.9564898736960399, "percentage": 39.13, "elapsed_time": "1:43:08", "remaining_time": "2:40:27", "throughput": 8722.32, "total_tokens": 53980064} +{"current_steps": 80090, "total_steps": 204665, "loss": 0.0608, "lr": 1.5260183985613945e-06, "epoch": 1.956612024527887, "percentage": 39.13, "elapsed_time": "1:43:09", "remaining_time": "2:40:26", "throughput": 8722.4, "total_tokens": 53983584} +{"current_steps": 80095, "total_steps": 204665, "loss": 0.0483, "lr": 1.5259458703368754e-06, "epoch": 1.9567341753597343, "percentage": 39.13, "elapsed_time": "1:43:09", "remaining_time": "2:40:26", "throughput": 8722.46, "total_tokens": 53986976} +{"current_steps": 80100, "total_steps": 204665, "loss": 0.2075, "lr": 1.5258733382875365e-06, "epoch": 1.9568563261915815, "percentage": 39.14, "elapsed_time": "1:43:09", "remaining_time": "2:40:25", "throughput": 8722.56, "total_tokens": 53990688} +{"current_steps": 80105, "total_steps": 204665, "loss": 0.1296, "lr": 1.5258008024139052e-06, "epoch": 1.9569784770234286, "percentage": 39.14, "elapsed_time": "1:43:10", "remaining_time": "2:40:25", "throughput": 8722.59, "total_tokens": 53993824} +{"current_steps": 80110, "total_steps": 204665, "loss": 0.1, "lr": 1.5257282627165093e-06, "epoch": 1.9571006278552758, "percentage": 39.14, "elapsed_time": "1:43:10", "remaining_time": "2:40:24", "throughput": 8722.79, "total_tokens": 53998304} +{"current_steps": 80115, "total_steps": 204665, "loss": 0.0711, "lr": 1.5256557191958756e-06, "epoch": 1.957222778687123, "percentage": 39.14, "elapsed_time": "1:43:10", "remaining_time": "2:40:24", "throughput": 8722.88, "total_tokens": 54001952} +{"current_steps": 80120, "total_steps": 204665, "loss": 0.0077, "lr": 1.5255831718525324e-06, "epoch": 1.95734492951897, "percentage": 39.15, "elapsed_time": "1:43:11", "remaining_time": "2:40:24", "throughput": 8722.92, "total_tokens": 54005216} +{"current_steps": 80125, "total_steps": 204665, "loss": 0.1188, "lr": 1.5255106206870073e-06, "epoch": 1.9574670803508172, "percentage": 39.15, "elapsed_time": "1:43:11", "remaining_time": "2:40:23", "throughput": 8723.22, "total_tokens": 54010592} +{"current_steps": 80130, "total_steps": 204665, "loss": 0.0665, "lr": 1.525438065699827e-06, "epoch": 1.9575892311826644, "percentage": 39.15, "elapsed_time": "1:43:11", "remaining_time": "2:40:23", "throughput": 8723.25, "total_tokens": 54013728} +{"current_steps": 80135, "total_steps": 204665, "loss": 0.0343, "lr": 1.52536550689152e-06, "epoch": 1.9577113820145116, "percentage": 39.15, "elapsed_time": "1:43:12", "remaining_time": "2:40:22", "throughput": 8723.28, "total_tokens": 54016864} +{"current_steps": 80140, "total_steps": 204665, "loss": 0.0164, "lr": 1.525292944262614e-06, "epoch": 1.9578335328463585, "percentage": 39.16, "elapsed_time": "1:43:12", "remaining_time": "2:40:22", "throughput": 8723.35, "total_tokens": 54020384} +{"current_steps": 80145, "total_steps": 204665, "loss": 0.127, "lr": 1.525220377813636e-06, "epoch": 1.9579556836782057, "percentage": 39.16, "elapsed_time": "1:43:12", "remaining_time": "2:40:21", "throughput": 8723.45, "total_tokens": 54024096} +{"current_steps": 80150, "total_steps": 204665, "loss": 0.1639, "lr": 1.5251478075451145e-06, "epoch": 1.958077834510053, "percentage": 39.16, "elapsed_time": "1:43:13", "remaining_time": "2:40:21", "throughput": 8723.48, "total_tokens": 54027232} +{"current_steps": 80155, "total_steps": 204665, "loss": 0.1081, "lr": 1.525075233457577e-06, "epoch": 1.9581999853419, "percentage": 39.16, "elapsed_time": "1:43:13", "remaining_time": "2:40:21", "throughput": 8723.5, "total_tokens": 54030368} +{"current_steps": 80160, "total_steps": 204665, "loss": 0.0384, "lr": 1.5250026555515508e-06, "epoch": 1.9583221361737473, "percentage": 39.17, "elapsed_time": "1:43:14", "remaining_time": "2:40:20", "throughput": 8723.56, "total_tokens": 54033760} +{"current_steps": 80165, "total_steps": 204665, "loss": 0.1221, "lr": 1.5249300738275642e-06, "epoch": 1.9584442870055945, "percentage": 39.17, "elapsed_time": "1:43:14", "remaining_time": "2:40:20", "throughput": 8723.59, "total_tokens": 54036960} +{"current_steps": 80170, "total_steps": 204665, "loss": 0.1412, "lr": 1.5248574882861448e-06, "epoch": 1.9585664378374417, "percentage": 39.17, "elapsed_time": "1:43:14", "remaining_time": "2:40:19", "throughput": 8723.67, "total_tokens": 54040544} +{"current_steps": 80175, "total_steps": 204665, "loss": 0.0873, "lr": 1.5247848989278209e-06, "epoch": 1.9586885886692889, "percentage": 39.17, "elapsed_time": "1:43:15", "remaining_time": "2:40:19", "throughput": 8723.73, "total_tokens": 54043936} +{"current_steps": 80180, "total_steps": 204665, "loss": 0.1489, "lr": 1.5247123057531197e-06, "epoch": 1.958810739501136, "percentage": 39.18, "elapsed_time": "1:43:15", "remaining_time": "2:40:18", "throughput": 8723.75, "total_tokens": 54047008} +{"current_steps": 80185, "total_steps": 204665, "loss": 0.0386, "lr": 1.5246397087625698e-06, "epoch": 1.9589328903329832, "percentage": 39.18, "elapsed_time": "1:43:15", "remaining_time": "2:40:18", "throughput": 8723.82, "total_tokens": 54050464} +{"current_steps": 80190, "total_steps": 204665, "loss": 0.1232, "lr": 1.5245671079566987e-06, "epoch": 1.9590550411648304, "percentage": 39.18, "elapsed_time": "1:43:16", "remaining_time": "2:40:17", "throughput": 8723.87, "total_tokens": 54053856} +{"current_steps": 80195, "total_steps": 204665, "loss": 0.0367, "lr": 1.5244945033360343e-06, "epoch": 1.9591771919966776, "percentage": 39.18, "elapsed_time": "1:43:16", "remaining_time": "2:40:17", "throughput": 8723.9, "total_tokens": 54056992} +{"current_steps": 80200, "total_steps": 204665, "loss": 0.0035, "lr": 1.524421894901105e-06, "epoch": 1.9592993428285248, "percentage": 39.19, "elapsed_time": "1:43:16", "remaining_time": "2:40:16", "throughput": 8723.94, "total_tokens": 54060256} +{"current_steps": 80205, "total_steps": 204665, "loss": 0.0954, "lr": 1.5243492826524388e-06, "epoch": 1.959421493660372, "percentage": 39.19, "elapsed_time": "1:43:17", "remaining_time": "2:40:16", "throughput": 8723.99, "total_tokens": 54063584} +{"current_steps": 80210, "total_steps": 204665, "loss": 0.0685, "lr": 1.5242766665905635e-06, "epoch": 1.959543644492219, "percentage": 39.19, "elapsed_time": "1:43:17", "remaining_time": "2:40:16", "throughput": 8724.11, "total_tokens": 54067424} +{"current_steps": 80215, "total_steps": 204665, "loss": 0.1259, "lr": 1.5242040467160071e-06, "epoch": 1.9596657953240662, "percentage": 39.19, "elapsed_time": "1:43:17", "remaining_time": "2:40:15", "throughput": 8724.13, "total_tokens": 54070560} +{"current_steps": 80220, "total_steps": 204665, "loss": 0.0437, "lr": 1.524131423029298e-06, "epoch": 1.9597879461559133, "percentage": 39.2, "elapsed_time": "1:43:18", "remaining_time": "2:40:15", "throughput": 8724.2, "total_tokens": 54074016} +{"current_steps": 80225, "total_steps": 204665, "loss": 0.0531, "lr": 1.5240587955309642e-06, "epoch": 1.9599100969877605, "percentage": 39.2, "elapsed_time": "1:43:18", "remaining_time": "2:40:14", "throughput": 8724.25, "total_tokens": 54077344} +{"current_steps": 80230, "total_steps": 204665, "loss": 0.1066, "lr": 1.5239861642215336e-06, "epoch": 1.9600322478196075, "percentage": 39.2, "elapsed_time": "1:43:18", "remaining_time": "2:40:14", "throughput": 8724.32, "total_tokens": 54080864} +{"current_steps": 80235, "total_steps": 204665, "loss": 0.0723, "lr": 1.5239135291015349e-06, "epoch": 1.9601543986514547, "percentage": 39.2, "elapsed_time": "1:43:19", "remaining_time": "2:40:13", "throughput": 8724.43, "total_tokens": 54084704} +{"current_steps": 80240, "total_steps": 204665, "loss": 0.0152, "lr": 1.523840890171496e-06, "epoch": 1.9602765494833019, "percentage": 39.21, "elapsed_time": "1:43:19", "remaining_time": "2:40:13", "throughput": 8724.51, "total_tokens": 54088224} +{"current_steps": 80245, "total_steps": 204665, "loss": 0.0367, "lr": 1.5237682474319455e-06, "epoch": 1.960398700315149, "percentage": 39.21, "elapsed_time": "1:43:19", "remaining_time": "2:40:12", "throughput": 8724.53, "total_tokens": 54091360} +{"current_steps": 80250, "total_steps": 204665, "loss": 0.0373, "lr": 1.5236956008834114e-06, "epoch": 1.9605208511469963, "percentage": 39.21, "elapsed_time": "1:43:20", "remaining_time": "2:40:12", "throughput": 8724.57, "total_tokens": 54094624} +{"current_steps": 80255, "total_steps": 204665, "loss": 0.0512, "lr": 1.523622950526422e-06, "epoch": 1.9606430019788434, "percentage": 39.21, "elapsed_time": "1:43:20", "remaining_time": "2:40:12", "throughput": 8724.59, "total_tokens": 54097696} +{"current_steps": 80260, "total_steps": 204665, "loss": 0.0995, "lr": 1.5235502963615054e-06, "epoch": 1.9607651528106906, "percentage": 39.22, "elapsed_time": "1:43:20", "remaining_time": "2:40:11", "throughput": 8724.67, "total_tokens": 54101280} +{"current_steps": 80265, "total_steps": 204665, "loss": 0.1105, "lr": 1.5234776383891906e-06, "epoch": 1.9608873036425378, "percentage": 39.22, "elapsed_time": "1:43:21", "remaining_time": "2:40:11", "throughput": 8724.7, "total_tokens": 54104416} +{"current_steps": 80270, "total_steps": 204665, "loss": 0.0724, "lr": 1.5234049766100055e-06, "epoch": 1.961009454474385, "percentage": 39.22, "elapsed_time": "1:43:21", "remaining_time": "2:40:10", "throughput": 8724.76, "total_tokens": 54107872} +{"current_steps": 80275, "total_steps": 204665, "loss": 0.1278, "lr": 1.5233323110244785e-06, "epoch": 1.9611316053062322, "percentage": 39.22, "elapsed_time": "1:43:21", "remaining_time": "2:40:10", "throughput": 8724.8, "total_tokens": 54111136} +{"current_steps": 80280, "total_steps": 204665, "loss": 0.0964, "lr": 1.523259641633138e-06, "epoch": 1.9612537561380794, "percentage": 39.23, "elapsed_time": "1:43:22", "remaining_time": "2:40:09", "throughput": 8724.88, "total_tokens": 54114656} +{"current_steps": 80285, "total_steps": 204665, "loss": 0.051, "lr": 1.523186968436513e-06, "epoch": 1.9613759069699266, "percentage": 39.23, "elapsed_time": "1:43:22", "remaining_time": "2:40:09", "throughput": 8724.91, "total_tokens": 54117856} +{"current_steps": 80290, "total_steps": 204665, "loss": 0.0274, "lr": 1.5231142914351316e-06, "epoch": 1.9614980578017738, "percentage": 39.23, "elapsed_time": "1:43:23", "remaining_time": "2:40:08", "throughput": 8724.95, "total_tokens": 54121056} +{"current_steps": 80295, "total_steps": 204665, "loss": 0.0569, "lr": 1.5230416106295221e-06, "epoch": 1.9616202086336207, "percentage": 39.23, "elapsed_time": "1:43:23", "remaining_time": "2:40:08", "throughput": 8725.03, "total_tokens": 54124640} +{"current_steps": 80300, "total_steps": 204665, "loss": 0.0749, "lr": 1.5229689260202134e-06, "epoch": 1.961742359465468, "percentage": 39.23, "elapsed_time": "1:43:23", "remaining_time": "2:40:08", "throughput": 8725.03, "total_tokens": 54127584} +{"current_steps": 80305, "total_steps": 204665, "loss": 0.0353, "lr": 1.5228962376077344e-06, "epoch": 1.9618645102973151, "percentage": 39.24, "elapsed_time": "1:43:24", "remaining_time": "2:40:07", "throughput": 8725.03, "total_tokens": 54130528} +{"current_steps": 80310, "total_steps": 204665, "loss": 0.1514, "lr": 1.5228235453926131e-06, "epoch": 1.9619866611291623, "percentage": 39.24, "elapsed_time": "1:43:24", "remaining_time": "2:40:07", "throughput": 8725.04, "total_tokens": 54133600} +{"current_steps": 80315, "total_steps": 204665, "loss": 0.0796, "lr": 1.5227508493753783e-06, "epoch": 1.9621088119610095, "percentage": 39.24, "elapsed_time": "1:43:24", "remaining_time": "2:40:06", "throughput": 8725.05, "total_tokens": 54136608} +{"current_steps": 80320, "total_steps": 204665, "loss": 0.1009, "lr": 1.5226781495565588e-06, "epoch": 1.9622309627928565, "percentage": 39.24, "elapsed_time": "1:43:25", "remaining_time": "2:40:06", "throughput": 8725.1, "total_tokens": 54139936} +{"current_steps": 80325, "total_steps": 204665, "loss": 0.0519, "lr": 1.5226054459366831e-06, "epoch": 1.9623531136247037, "percentage": 39.25, "elapsed_time": "1:43:25", "remaining_time": "2:40:05", "throughput": 8725.13, "total_tokens": 54143136} +{"current_steps": 80330, "total_steps": 204665, "loss": 0.0286, "lr": 1.5225327385162801e-06, "epoch": 1.9624752644565508, "percentage": 39.25, "elapsed_time": "1:43:25", "remaining_time": "2:40:05", "throughput": 8725.18, "total_tokens": 54146528} +{"current_steps": 80335, "total_steps": 204665, "loss": 0.2561, "lr": 1.5224600272958785e-06, "epoch": 1.962597415288398, "percentage": 39.25, "elapsed_time": "1:43:26", "remaining_time": "2:40:04", "throughput": 8725.21, "total_tokens": 54149664} +{"current_steps": 80340, "total_steps": 204665, "loss": 0.0868, "lr": 1.522387312276007e-06, "epoch": 1.9627195661202452, "percentage": 39.25, "elapsed_time": "1:43:26", "remaining_time": "2:40:04", "throughput": 8725.29, "total_tokens": 54153184} +{"current_steps": 80345, "total_steps": 204665, "loss": 0.1178, "lr": 1.5223145934571944e-06, "epoch": 1.9628417169520924, "percentage": 39.26, "elapsed_time": "1:43:26", "remaining_time": "2:40:03", "throughput": 8725.3, "total_tokens": 54156256} +{"current_steps": 80350, "total_steps": 204665, "loss": 0.1157, "lr": 1.5222418708399696e-06, "epoch": 1.9629638677839396, "percentage": 39.26, "elapsed_time": "1:43:27", "remaining_time": "2:40:03", "throughput": 8725.38, "total_tokens": 54159776} +{"current_steps": 80355, "total_steps": 204665, "loss": 0.1211, "lr": 1.5221691444248615e-06, "epoch": 1.9630860186157868, "percentage": 39.26, "elapsed_time": "1:43:27", "remaining_time": "2:40:03", "throughput": 8725.4, "total_tokens": 54162912} +{"current_steps": 80360, "total_steps": 204665, "loss": 0.1338, "lr": 1.522096414212399e-06, "epoch": 1.963208169447634, "percentage": 39.26, "elapsed_time": "1:43:27", "remaining_time": "2:40:02", "throughput": 8725.43, "total_tokens": 54166112} +{"current_steps": 80365, "total_steps": 204665, "loss": 0.1737, "lr": 1.522023680203111e-06, "epoch": 1.9633303202794812, "percentage": 39.27, "elapsed_time": "1:43:28", "remaining_time": "2:40:02", "throughput": 8725.55, "total_tokens": 54169952} +{"current_steps": 80370, "total_steps": 204665, "loss": 0.0497, "lr": 1.5219509423975262e-06, "epoch": 1.9634524711113284, "percentage": 39.27, "elapsed_time": "1:43:28", "remaining_time": "2:40:01", "throughput": 8725.59, "total_tokens": 54173216} +{"current_steps": 80375, "total_steps": 204665, "loss": 0.0026, "lr": 1.5218782007961738e-06, "epoch": 1.9635746219431756, "percentage": 39.27, "elapsed_time": "1:43:28", "remaining_time": "2:40:01", "throughput": 8725.62, "total_tokens": 54176416} +{"current_steps": 80380, "total_steps": 204665, "loss": 0.0518, "lr": 1.5218054553995829e-06, "epoch": 1.9636967727750227, "percentage": 39.27, "elapsed_time": "1:43:29", "remaining_time": "2:40:00", "throughput": 8725.67, "total_tokens": 54179808} +{"current_steps": 80385, "total_steps": 204665, "loss": 0.1372, "lr": 1.521732706208282e-06, "epoch": 1.9638189236068697, "percentage": 39.28, "elapsed_time": "1:43:29", "remaining_time": "2:40:00", "throughput": 8725.77, "total_tokens": 54183520} +{"current_steps": 80390, "total_steps": 204665, "loss": 0.12, "lr": 1.521659953222801e-06, "epoch": 1.963941074438717, "percentage": 39.28, "elapsed_time": "1:43:29", "remaining_time": "2:39:59", "throughput": 8725.87, "total_tokens": 54187232} +{"current_steps": 80395, "total_steps": 204665, "loss": 0.021, "lr": 1.5215871964436683e-06, "epoch": 1.964063225270564, "percentage": 39.28, "elapsed_time": "1:43:30", "remaining_time": "2:39:59", "throughput": 8725.92, "total_tokens": 54190624} +{"current_steps": 80400, "total_steps": 204665, "loss": 0.0013, "lr": 1.5215144358714134e-06, "epoch": 1.9641853761024113, "percentage": 39.28, "elapsed_time": "1:43:30", "remaining_time": "2:39:59", "throughput": 8726.07, "total_tokens": 54194720} +{"current_steps": 80405, "total_steps": 204665, "loss": 0.0366, "lr": 1.521441671506565e-06, "epoch": 1.9643075269342585, "percentage": 39.29, "elapsed_time": "1:43:31", "remaining_time": "2:39:58", "throughput": 8726.14, "total_tokens": 54198240} +{"current_steps": 80410, "total_steps": 204665, "loss": 0.1229, "lr": 1.5213689033496526e-06, "epoch": 1.9644296777661054, "percentage": 39.29, "elapsed_time": "1:43:31", "remaining_time": "2:39:58", "throughput": 8726.17, "total_tokens": 54201376} +{"current_steps": 80415, "total_steps": 204665, "loss": 0.0042, "lr": 1.5212961314012054e-06, "epoch": 1.9645518285979526, "percentage": 39.29, "elapsed_time": "1:43:31", "remaining_time": "2:39:57", "throughput": 8726.25, "total_tokens": 54205024} +{"current_steps": 80420, "total_steps": 204665, "loss": 0.0394, "lr": 1.5212233556617524e-06, "epoch": 1.9646739794297998, "percentage": 39.29, "elapsed_time": "1:43:32", "remaining_time": "2:39:57", "throughput": 8726.29, "total_tokens": 54208224} +{"current_steps": 80425, "total_steps": 204665, "loss": 0.0308, "lr": 1.5211505761318231e-06, "epoch": 1.964796130261647, "percentage": 39.3, "elapsed_time": "1:43:32", "remaining_time": "2:39:56", "throughput": 8726.32, "total_tokens": 54211424} +{"current_steps": 80430, "total_steps": 204665, "loss": 0.0389, "lr": 1.5210777928119466e-06, "epoch": 1.9649182810934942, "percentage": 39.3, "elapsed_time": "1:43:32", "remaining_time": "2:39:56", "throughput": 8726.34, "total_tokens": 54214496} +{"current_steps": 80435, "total_steps": 204665, "loss": 0.0804, "lr": 1.5210050057026521e-06, "epoch": 1.9650404319253414, "percentage": 39.3, "elapsed_time": "1:43:33", "remaining_time": "2:39:55", "throughput": 8726.45, "total_tokens": 54218336} +{"current_steps": 80440, "total_steps": 204665, "loss": 0.1344, "lr": 1.520932214804469e-06, "epoch": 1.9651625827571886, "percentage": 39.3, "elapsed_time": "1:43:33", "remaining_time": "2:39:55", "throughput": 8726.54, "total_tokens": 54221984} +{"current_steps": 80445, "total_steps": 204665, "loss": 0.1511, "lr": 1.520859420117927e-06, "epoch": 1.9652847335890358, "percentage": 39.31, "elapsed_time": "1:43:33", "remaining_time": "2:39:55", "throughput": 8726.58, "total_tokens": 54225248} +{"current_steps": 80450, "total_steps": 204665, "loss": 0.2087, "lr": 1.520786621643555e-06, "epoch": 1.965406884420883, "percentage": 39.31, "elapsed_time": "1:43:34", "remaining_time": "2:39:54", "throughput": 8726.62, "total_tokens": 54228512} +{"current_steps": 80455, "total_steps": 204665, "loss": 0.1028, "lr": 1.5207138193818824e-06, "epoch": 1.9655290352527302, "percentage": 39.31, "elapsed_time": "1:43:34", "remaining_time": "2:39:54", "throughput": 8726.66, "total_tokens": 54231776} +{"current_steps": 80460, "total_steps": 204665, "loss": 0.0016, "lr": 1.5206410133334393e-06, "epoch": 1.9656511860845773, "percentage": 39.31, "elapsed_time": "1:43:34", "remaining_time": "2:39:53", "throughput": 8726.74, "total_tokens": 54235360} +{"current_steps": 80465, "total_steps": 204665, "loss": 0.0289, "lr": 1.5205682034987547e-06, "epoch": 1.9657733369164245, "percentage": 39.32, "elapsed_time": "1:43:35", "remaining_time": "2:39:53", "throughput": 8726.76, "total_tokens": 54238496} +{"current_steps": 80470, "total_steps": 204665, "loss": 0.0339, "lr": 1.520495389878358e-06, "epoch": 1.9658954877482717, "percentage": 39.32, "elapsed_time": "1:43:35", "remaining_time": "2:39:52", "throughput": 8726.78, "total_tokens": 54241568} +{"current_steps": 80475, "total_steps": 204665, "loss": 0.1891, "lr": 1.5204225724727789e-06, "epoch": 1.9660176385801187, "percentage": 39.32, "elapsed_time": "1:43:35", "remaining_time": "2:39:52", "throughput": 8726.84, "total_tokens": 54244960} +{"current_steps": 80480, "total_steps": 204665, "loss": 0.0773, "lr": 1.5203497512825465e-06, "epoch": 1.9661397894119659, "percentage": 39.32, "elapsed_time": "1:43:36", "remaining_time": "2:39:51", "throughput": 8726.91, "total_tokens": 54248480} +{"current_steps": 80485, "total_steps": 204665, "loss": 0.0412, "lr": 1.5202769263081908e-06, "epoch": 1.966261940243813, "percentage": 39.33, "elapsed_time": "1:43:36", "remaining_time": "2:39:51", "throughput": 8726.95, "total_tokens": 54251680} +{"current_steps": 80490, "total_steps": 204665, "loss": 0.221, "lr": 1.5202040975502417e-06, "epoch": 1.9663840910756603, "percentage": 39.33, "elapsed_time": "1:43:36", "remaining_time": "2:39:51", "throughput": 8726.97, "total_tokens": 54254816} +{"current_steps": 80495, "total_steps": 204665, "loss": 0.1006, "lr": 1.5201312650092283e-06, "epoch": 1.9665062419075072, "percentage": 39.33, "elapsed_time": "1:43:37", "remaining_time": "2:39:50", "throughput": 8726.99, "total_tokens": 54257952} +{"current_steps": 80500, "total_steps": 204665, "loss": 0.1159, "lr": 1.5200584286856808e-06, "epoch": 1.9666283927393544, "percentage": 39.33, "elapsed_time": "1:43:37", "remaining_time": "2:39:50", "throughput": 8727.01, "total_tokens": 54261024} +{"current_steps": 80505, "total_steps": 204665, "loss": 0.0986, "lr": 1.519985588580128e-06, "epoch": 1.9667505435712016, "percentage": 39.34, "elapsed_time": "1:43:37", "remaining_time": "2:39:49", "throughput": 8727.06, "total_tokens": 54264416} +{"current_steps": 80510, "total_steps": 204665, "loss": 0.0933, "lr": 1.5199127446931e-06, "epoch": 1.9668726944030488, "percentage": 39.34, "elapsed_time": "1:43:38", "remaining_time": "2:39:49", "throughput": 8727.1, "total_tokens": 54267680} +{"current_steps": 80515, "total_steps": 204665, "loss": 0.0496, "lr": 1.5198398970251273e-06, "epoch": 1.966994845234896, "percentage": 39.34, "elapsed_time": "1:43:38", "remaining_time": "2:39:48", "throughput": 8727.19, "total_tokens": 54271392} +{"current_steps": 80520, "total_steps": 204665, "loss": 0.0501, "lr": 1.519767045576739e-06, "epoch": 1.9671169960667432, "percentage": 39.34, "elapsed_time": "1:43:39", "remaining_time": "2:39:48", "throughput": 8727.25, "total_tokens": 54274784} +{"current_steps": 80525, "total_steps": 204665, "loss": 0.1235, "lr": 1.5196941903484648e-06, "epoch": 1.9672391468985904, "percentage": 39.34, "elapsed_time": "1:43:39", "remaining_time": "2:39:47", "throughput": 8727.3, "total_tokens": 54278176} +{"current_steps": 80530, "total_steps": 204665, "loss": 0.0017, "lr": 1.5196213313408346e-06, "epoch": 1.9673612977304376, "percentage": 39.35, "elapsed_time": "1:43:39", "remaining_time": "2:39:47", "throughput": 8727.36, "total_tokens": 54281568} +{"current_steps": 80535, "total_steps": 204665, "loss": 0.0486, "lr": 1.5195484685543783e-06, "epoch": 1.9674834485622847, "percentage": 39.35, "elapsed_time": "1:43:40", "remaining_time": "2:39:47", "throughput": 8727.39, "total_tokens": 54284768} +{"current_steps": 80540, "total_steps": 204665, "loss": 0.054, "lr": 1.5194756019896256e-06, "epoch": 1.967605599394132, "percentage": 39.35, "elapsed_time": "1:43:40", "remaining_time": "2:39:46", "throughput": 8727.46, "total_tokens": 54288288} +{"current_steps": 80545, "total_steps": 204665, "loss": 0.0774, "lr": 1.5194027316471068e-06, "epoch": 1.9677277502259791, "percentage": 39.35, "elapsed_time": "1:43:40", "remaining_time": "2:39:46", "throughput": 8727.52, "total_tokens": 54291744} +{"current_steps": 80550, "total_steps": 204665, "loss": 0.0913, "lr": 1.5193298575273517e-06, "epoch": 1.9678499010578263, "percentage": 39.36, "elapsed_time": "1:43:41", "remaining_time": "2:39:45", "throughput": 8727.49, "total_tokens": 54294496} +{"current_steps": 80555, "total_steps": 204665, "loss": 0.0978, "lr": 1.51925697963089e-06, "epoch": 1.9679720518896735, "percentage": 39.36, "elapsed_time": "1:43:41", "remaining_time": "2:39:45", "throughput": 8727.58, "total_tokens": 54298144} +{"current_steps": 80560, "total_steps": 204665, "loss": 0.0596, "lr": 1.5191840979582522e-06, "epoch": 1.9680942027215207, "percentage": 39.36, "elapsed_time": "1:43:41", "remaining_time": "2:39:44", "throughput": 8727.57, "total_tokens": 54301024} +{"current_steps": 80565, "total_steps": 204665, "loss": 0.0024, "lr": 1.5191112125099678e-06, "epoch": 1.9682163535533677, "percentage": 39.36, "elapsed_time": "1:43:42", "remaining_time": "2:39:44", "throughput": 8727.61, "total_tokens": 54304288} +{"current_steps": 80570, "total_steps": 204665, "loss": 0.0716, "lr": 1.519038323286567e-06, "epoch": 1.9683385043852148, "percentage": 39.37, "elapsed_time": "1:43:42", "remaining_time": "2:39:43", "throughput": 8727.64, "total_tokens": 54307488} +{"current_steps": 80575, "total_steps": 204665, "loss": 0.0178, "lr": 1.5189654302885798e-06, "epoch": 1.968460655217062, "percentage": 39.37, "elapsed_time": "1:43:42", "remaining_time": "2:39:43", "throughput": 8727.68, "total_tokens": 54310752} +{"current_steps": 80580, "total_steps": 204665, "loss": 0.0965, "lr": 1.5188925335165369e-06, "epoch": 1.9685828060489092, "percentage": 39.37, "elapsed_time": "1:43:43", "remaining_time": "2:39:43", "throughput": 8727.74, "total_tokens": 54314080} +{"current_steps": 80585, "total_steps": 204665, "loss": 0.0889, "lr": 1.5188196329709675e-06, "epoch": 1.9687049568807562, "percentage": 39.37, "elapsed_time": "1:43:43", "remaining_time": "2:39:42", "throughput": 8727.78, "total_tokens": 54317344} +{"current_steps": 80590, "total_steps": 204665, "loss": 0.0476, "lr": 1.5187467286524022e-06, "epoch": 1.9688271077126034, "percentage": 39.38, "elapsed_time": "1:43:43", "remaining_time": "2:39:42", "throughput": 8727.96, "total_tokens": 54321696} +{"current_steps": 80595, "total_steps": 204665, "loss": 0.0349, "lr": 1.5186738205613714e-06, "epoch": 1.9689492585444506, "percentage": 39.38, "elapsed_time": "1:43:44", "remaining_time": "2:39:41", "throughput": 8727.99, "total_tokens": 54324896} +{"current_steps": 80600, "total_steps": 204665, "loss": 0.04, "lr": 1.5186009086984048e-06, "epoch": 1.9690714093762978, "percentage": 39.38, "elapsed_time": "1:43:44", "remaining_time": "2:39:41", "throughput": 8728.0, "total_tokens": 54327904} +{"current_steps": 80605, "total_steps": 204665, "loss": 0.1077, "lr": 1.5185279930640329e-06, "epoch": 1.969193560208145, "percentage": 39.38, "elapsed_time": "1:43:44", "remaining_time": "2:39:40", "throughput": 8728.1, "total_tokens": 54331616} +{"current_steps": 80610, "total_steps": 204665, "loss": 0.0114, "lr": 1.518455073658786e-06, "epoch": 1.9693157110399921, "percentage": 39.39, "elapsed_time": "1:43:45", "remaining_time": "2:39:40", "throughput": 8728.1, "total_tokens": 54334624} +{"current_steps": 80615, "total_steps": 204665, "loss": 0.0017, "lr": 1.5183821504831946e-06, "epoch": 1.9694378618718393, "percentage": 39.39, "elapsed_time": "1:43:45", "remaining_time": "2:39:39", "throughput": 8728.17, "total_tokens": 54338080} +{"current_steps": 80620, "total_steps": 204665, "loss": 0.1975, "lr": 1.5183092235377887e-06, "epoch": 1.9695600127036865, "percentage": 39.39, "elapsed_time": "1:43:45", "remaining_time": "2:39:39", "throughput": 8728.18, "total_tokens": 54341152} +{"current_steps": 80625, "total_steps": 204665, "loss": 0.034, "lr": 1.5182362928230984e-06, "epoch": 1.9696821635355337, "percentage": 39.39, "elapsed_time": "1:43:46", "remaining_time": "2:39:39", "throughput": 8728.25, "total_tokens": 54344608} +{"current_steps": 80630, "total_steps": 204665, "loss": 0.0687, "lr": 1.518163358339655e-06, "epoch": 1.969804314367381, "percentage": 39.4, "elapsed_time": "1:43:46", "remaining_time": "2:39:38", "throughput": 8728.3, "total_tokens": 54347936} +{"current_steps": 80635, "total_steps": 204665, "loss": 0.0011, "lr": 1.5180904200879876e-06, "epoch": 1.969926465199228, "percentage": 39.4, "elapsed_time": "1:43:46", "remaining_time": "2:39:38", "throughput": 8728.34, "total_tokens": 54351264} +{"current_steps": 80640, "total_steps": 204665, "loss": 0.1065, "lr": 1.5180174780686277e-06, "epoch": 1.9700486160310753, "percentage": 39.4, "elapsed_time": "1:43:47", "remaining_time": "2:39:37", "throughput": 8728.39, "total_tokens": 54354592} +{"current_steps": 80645, "total_steps": 204665, "loss": 0.0454, "lr": 1.5179445322821055e-06, "epoch": 1.9701707668629225, "percentage": 39.4, "elapsed_time": "1:43:47", "remaining_time": "2:39:37", "throughput": 8728.43, "total_tokens": 54357856} +{"current_steps": 80650, "total_steps": 204665, "loss": 0.0426, "lr": 1.5178715827289508e-06, "epoch": 1.9702929176947697, "percentage": 39.41, "elapsed_time": "1:43:48", "remaining_time": "2:39:36", "throughput": 8728.45, "total_tokens": 54360928} +{"current_steps": 80655, "total_steps": 204665, "loss": 0.154, "lr": 1.5177986294096951e-06, "epoch": 1.9704150685266166, "percentage": 39.41, "elapsed_time": "1:43:48", "remaining_time": "2:39:36", "throughput": 8728.48, "total_tokens": 54364128} +{"current_steps": 80660, "total_steps": 204665, "loss": 0.0668, "lr": 1.5177256723248685e-06, "epoch": 1.9705372193584638, "percentage": 39.41, "elapsed_time": "1:43:48", "remaining_time": "2:39:35", "throughput": 8728.5, "total_tokens": 54367200} +{"current_steps": 80665, "total_steps": 204665, "loss": 0.0838, "lr": 1.5176527114750012e-06, "epoch": 1.970659370190311, "percentage": 39.41, "elapsed_time": "1:43:49", "remaining_time": "2:39:35", "throughput": 8728.58, "total_tokens": 54370784} +{"current_steps": 80670, "total_steps": 204665, "loss": 0.1594, "lr": 1.5175797468606243e-06, "epoch": 1.9707815210221582, "percentage": 39.42, "elapsed_time": "1:43:49", "remaining_time": "2:39:34", "throughput": 8728.6, "total_tokens": 54373920} +{"current_steps": 80675, "total_steps": 204665, "loss": 0.1475, "lr": 1.517506778482269e-06, "epoch": 1.9709036718540052, "percentage": 39.42, "elapsed_time": "1:43:49", "remaining_time": "2:39:34", "throughput": 8728.67, "total_tokens": 54377376} +{"current_steps": 80680, "total_steps": 204665, "loss": 0.11, "lr": 1.5174338063404644e-06, "epoch": 1.9710258226858524, "percentage": 39.42, "elapsed_time": "1:43:50", "remaining_time": "2:39:34", "throughput": 8728.73, "total_tokens": 54380768} +{"current_steps": 80685, "total_steps": 204665, "loss": 0.0018, "lr": 1.5173608304357422e-06, "epoch": 1.9711479735176995, "percentage": 39.42, "elapsed_time": "1:43:50", "remaining_time": "2:39:33", "throughput": 8728.78, "total_tokens": 54384160} +{"current_steps": 80690, "total_steps": 204665, "loss": 0.0318, "lr": 1.5172878507686329e-06, "epoch": 1.9712701243495467, "percentage": 39.43, "elapsed_time": "1:43:50", "remaining_time": "2:39:33", "throughput": 8728.87, "total_tokens": 54387744} +{"current_steps": 80695, "total_steps": 204665, "loss": 0.0724, "lr": 1.5172148673396673e-06, "epoch": 1.971392275181394, "percentage": 39.43, "elapsed_time": "1:43:51", "remaining_time": "2:39:32", "throughput": 8728.89, "total_tokens": 54390880} +{"current_steps": 80700, "total_steps": 204665, "loss": 0.1306, "lr": 1.5171418801493757e-06, "epoch": 1.9715144260132411, "percentage": 39.43, "elapsed_time": "1:43:51", "remaining_time": "2:39:32", "throughput": 8728.94, "total_tokens": 54394272} +{"current_steps": 80705, "total_steps": 204665, "loss": 0.1369, "lr": 1.5170688891982895e-06, "epoch": 1.9716365768450883, "percentage": 39.43, "elapsed_time": "1:43:51", "remaining_time": "2:39:31", "throughput": 8728.96, "total_tokens": 54397408} +{"current_steps": 80710, "total_steps": 204665, "loss": 0.0628, "lr": 1.5169958944869393e-06, "epoch": 1.9717587276769355, "percentage": 39.44, "elapsed_time": "1:43:52", "remaining_time": "2:39:31", "throughput": 8729.02, "total_tokens": 54400800} +{"current_steps": 80715, "total_steps": 204665, "loss": 0.0422, "lr": 1.5169228960158557e-06, "epoch": 1.9718808785087827, "percentage": 39.44, "elapsed_time": "1:43:52", "remaining_time": "2:39:30", "throughput": 8729.03, "total_tokens": 54403872} +{"current_steps": 80720, "total_steps": 204665, "loss": 0.0017, "lr": 1.51684989378557e-06, "epoch": 1.9720030293406299, "percentage": 39.44, "elapsed_time": "1:43:52", "remaining_time": "2:39:30", "throughput": 8729.13, "total_tokens": 54407584} +{"current_steps": 80725, "total_steps": 204665, "loss": 0.0369, "lr": 1.5167768877966128e-06, "epoch": 1.972125180172477, "percentage": 39.44, "elapsed_time": "1:43:53", "remaining_time": "2:39:30", "throughput": 8729.17, "total_tokens": 54410848} +{"current_steps": 80730, "total_steps": 204665, "loss": 0.0298, "lr": 1.5167038780495151e-06, "epoch": 1.9722473310043243, "percentage": 39.44, "elapsed_time": "1:43:53", "remaining_time": "2:39:29", "throughput": 8729.2, "total_tokens": 54413984} +{"current_steps": 80735, "total_steps": 204665, "loss": 0.0452, "lr": 1.5166308645448077e-06, "epoch": 1.9723694818361714, "percentage": 39.45, "elapsed_time": "1:43:53", "remaining_time": "2:39:29", "throughput": 8729.27, "total_tokens": 54417504} +{"current_steps": 80740, "total_steps": 204665, "loss": 0.0985, "lr": 1.516557847283022e-06, "epoch": 1.9724916326680186, "percentage": 39.45, "elapsed_time": "1:43:54", "remaining_time": "2:39:28", "throughput": 8729.34, "total_tokens": 54421024} +{"current_steps": 80745, "total_steps": 204665, "loss": 0.0931, "lr": 1.5164848262646883e-06, "epoch": 1.9726137834998656, "percentage": 39.45, "elapsed_time": "1:43:54", "remaining_time": "2:39:28", "throughput": 8729.41, "total_tokens": 54424480} +{"current_steps": 80750, "total_steps": 204665, "loss": 0.0469, "lr": 1.5164118014903382e-06, "epoch": 1.9727359343317128, "percentage": 39.45, "elapsed_time": "1:43:54", "remaining_time": "2:39:27", "throughput": 8729.39, "total_tokens": 54427296} +{"current_steps": 80755, "total_steps": 204665, "loss": 0.0497, "lr": 1.5163387729605028e-06, "epoch": 1.97285808516356, "percentage": 39.46, "elapsed_time": "1:43:55", "remaining_time": "2:39:27", "throughput": 8729.44, "total_tokens": 54430624} +{"current_steps": 80760, "total_steps": 204665, "loss": 0.0958, "lr": 1.5162657406757125e-06, "epoch": 1.9729802359954072, "percentage": 39.46, "elapsed_time": "1:43:55", "remaining_time": "2:39:26", "throughput": 8729.5, "total_tokens": 54434080} +{"current_steps": 80765, "total_steps": 204665, "loss": 0.1314, "lr": 1.516192704636499e-06, "epoch": 1.9731023868272541, "percentage": 39.46, "elapsed_time": "1:43:55", "remaining_time": "2:39:26", "throughput": 8729.5, "total_tokens": 54437024} +{"current_steps": 80770, "total_steps": 204665, "loss": 0.102, "lr": 1.5161196648433936e-06, "epoch": 1.9732245376591013, "percentage": 39.46, "elapsed_time": "1:43:56", "remaining_time": "2:39:26", "throughput": 8729.48, "total_tokens": 54439840} +{"current_steps": 80775, "total_steps": 204665, "loss": 0.1239, "lr": 1.516046621296927e-06, "epoch": 1.9733466884909485, "percentage": 39.47, "elapsed_time": "1:43:56", "remaining_time": "2:39:25", "throughput": 8729.5, "total_tokens": 54442912} +{"current_steps": 80780, "total_steps": 204665, "loss": 0.0033, "lr": 1.5159735739976307e-06, "epoch": 1.9734688393227957, "percentage": 39.47, "elapsed_time": "1:43:57", "remaining_time": "2:39:25", "throughput": 8729.5, "total_tokens": 54445920} +{"current_steps": 80785, "total_steps": 204665, "loss": 0.0027, "lr": 1.515900522946036e-06, "epoch": 1.973590990154643, "percentage": 39.47, "elapsed_time": "1:43:57", "remaining_time": "2:39:24", "throughput": 8729.54, "total_tokens": 54449184} +{"current_steps": 80790, "total_steps": 204665, "loss": 0.078, "lr": 1.5158274681426732e-06, "epoch": 1.97371314098649, "percentage": 39.47, "elapsed_time": "1:43:57", "remaining_time": "2:39:24", "throughput": 8729.61, "total_tokens": 54452704} +{"current_steps": 80795, "total_steps": 204665, "loss": 0.0011, "lr": 1.5157544095880747e-06, "epoch": 1.9738352918183373, "percentage": 39.48, "elapsed_time": "1:43:58", "remaining_time": "2:39:23", "throughput": 8729.65, "total_tokens": 54455904} +{"current_steps": 80800, "total_steps": 204665, "loss": 0.0354, "lr": 1.5156813472827717e-06, "epoch": 1.9739574426501845, "percentage": 39.48, "elapsed_time": "1:43:58", "remaining_time": "2:39:23", "throughput": 8729.71, "total_tokens": 54459360} +{"current_steps": 80805, "total_steps": 204665, "loss": 0.1044, "lr": 1.515608281227295e-06, "epoch": 1.9740795934820317, "percentage": 39.48, "elapsed_time": "1:43:58", "remaining_time": "2:39:22", "throughput": 8729.72, "total_tokens": 54462432} +{"current_steps": 80810, "total_steps": 204665, "loss": 0.0775, "lr": 1.515535211422176e-06, "epoch": 1.9742017443138788, "percentage": 39.48, "elapsed_time": "1:43:59", "remaining_time": "2:39:22", "throughput": 8729.75, "total_tokens": 54465568} +{"current_steps": 80815, "total_steps": 204665, "loss": 0.0375, "lr": 1.5154621378679467e-06, "epoch": 1.974323895145726, "percentage": 39.49, "elapsed_time": "1:43:59", "remaining_time": "2:39:22", "throughput": 8729.84, "total_tokens": 54469216} +{"current_steps": 80820, "total_steps": 204665, "loss": 0.0969, "lr": 1.5153890605651377e-06, "epoch": 1.9744460459775732, "percentage": 39.49, "elapsed_time": "1:43:59", "remaining_time": "2:39:21", "throughput": 8729.87, "total_tokens": 54472416} +{"current_steps": 80825, "total_steps": 204665, "loss": 0.0172, "lr": 1.5153159795142809e-06, "epoch": 1.9745681968094204, "percentage": 39.49, "elapsed_time": "1:44:00", "remaining_time": "2:39:21", "throughput": 8729.97, "total_tokens": 54476128} +{"current_steps": 80830, "total_steps": 204665, "loss": 0.1739, "lr": 1.5152428947159077e-06, "epoch": 1.9746903476412674, "percentage": 39.49, "elapsed_time": "1:44:00", "remaining_time": "2:39:20", "throughput": 8730.0, "total_tokens": 54479328} +{"current_steps": 80835, "total_steps": 204665, "loss": 0.0405, "lr": 1.5151698061705497e-06, "epoch": 1.9748124984731146, "percentage": 39.5, "elapsed_time": "1:44:00", "remaining_time": "2:39:20", "throughput": 8730.08, "total_tokens": 54482912} +{"current_steps": 80840, "total_steps": 204665, "loss": 0.045, "lr": 1.5150967138787384e-06, "epoch": 1.9749346493049618, "percentage": 39.5, "elapsed_time": "1:44:01", "remaining_time": "2:39:19", "throughput": 8730.13, "total_tokens": 54486304} +{"current_steps": 80845, "total_steps": 204665, "loss": 0.0367, "lr": 1.5150236178410052e-06, "epoch": 1.975056800136809, "percentage": 39.5, "elapsed_time": "1:44:01", "remaining_time": "2:39:19", "throughput": 8730.19, "total_tokens": 54489760} +{"current_steps": 80850, "total_steps": 204665, "loss": 0.1388, "lr": 1.5149505180578818e-06, "epoch": 1.9751789509686561, "percentage": 39.5, "elapsed_time": "1:44:01", "remaining_time": "2:39:18", "throughput": 8730.23, "total_tokens": 54493024} +{"current_steps": 80855, "total_steps": 204665, "loss": 0.2005, "lr": 1.514877414529899e-06, "epoch": 1.9753011018005031, "percentage": 39.51, "elapsed_time": "1:44:02", "remaining_time": "2:39:18", "throughput": 8730.24, "total_tokens": 54496096} +{"current_steps": 80860, "total_steps": 204665, "loss": 0.0668, "lr": 1.5148043072575899e-06, "epoch": 1.9754232526323503, "percentage": 39.51, "elapsed_time": "1:44:02", "remaining_time": "2:39:18", "throughput": 8730.31, "total_tokens": 54499616} +{"current_steps": 80865, "total_steps": 204665, "loss": 0.0071, "lr": 1.5147311962414852e-06, "epoch": 1.9755454034641975, "percentage": 39.51, "elapsed_time": "1:44:02", "remaining_time": "2:39:17", "throughput": 8730.4, "total_tokens": 54503264} +{"current_steps": 80870, "total_steps": 204665, "loss": 0.0546, "lr": 1.514658081482117e-06, "epoch": 1.9756675542960447, "percentage": 39.51, "elapsed_time": "1:44:03", "remaining_time": "2:39:17", "throughput": 8730.53, "total_tokens": 54507232} +{"current_steps": 80875, "total_steps": 204665, "loss": 0.0021, "lr": 1.5145849629800166e-06, "epoch": 1.9757897051278919, "percentage": 39.52, "elapsed_time": "1:44:03", "remaining_time": "2:39:16", "throughput": 8730.54, "total_tokens": 54510240} +{"current_steps": 80880, "total_steps": 204665, "loss": 0.0716, "lr": 1.514511840735716e-06, "epoch": 1.975911855959739, "percentage": 39.52, "elapsed_time": "1:44:03", "remaining_time": "2:39:16", "throughput": 8730.61, "total_tokens": 54513760} +{"current_steps": 80885, "total_steps": 204665, "loss": 0.0341, "lr": 1.5144387147497469e-06, "epoch": 1.9760340067915863, "percentage": 39.52, "elapsed_time": "1:44:04", "remaining_time": "2:39:15", "throughput": 8730.68, "total_tokens": 54517280} +{"current_steps": 80890, "total_steps": 204665, "loss": 0.1111, "lr": 1.514365585022641e-06, "epoch": 1.9761561576234334, "percentage": 39.52, "elapsed_time": "1:44:04", "remaining_time": "2:39:15", "throughput": 8730.74, "total_tokens": 54520736} +{"current_steps": 80895, "total_steps": 204665, "loss": 0.1001, "lr": 1.5142924515549306e-06, "epoch": 1.9762783084552806, "percentage": 39.53, "elapsed_time": "1:44:05", "remaining_time": "2:39:14", "throughput": 8730.8, "total_tokens": 54524128} +{"current_steps": 80900, "total_steps": 204665, "loss": 0.1185, "lr": 1.5142193143471467e-06, "epoch": 1.9764004592871278, "percentage": 39.53, "elapsed_time": "1:44:05", "remaining_time": "2:39:14", "throughput": 8730.86, "total_tokens": 54527584} +{"current_steps": 80905, "total_steps": 204665, "loss": 0.1664, "lr": 1.5141461733998217e-06, "epoch": 1.976522610118975, "percentage": 39.53, "elapsed_time": "1:44:05", "remaining_time": "2:39:14", "throughput": 8730.85, "total_tokens": 54530464} +{"current_steps": 80910, "total_steps": 204665, "loss": 0.0421, "lr": 1.5140730287134876e-06, "epoch": 1.9766447609508222, "percentage": 39.53, "elapsed_time": "1:44:06", "remaining_time": "2:39:13", "throughput": 8730.91, "total_tokens": 54533856} +{"current_steps": 80915, "total_steps": 204665, "loss": 0.1165, "lr": 1.513999880288676e-06, "epoch": 1.9767669117826694, "percentage": 39.54, "elapsed_time": "1:44:06", "remaining_time": "2:39:13", "throughput": 8730.98, "total_tokens": 54537376} +{"current_steps": 80920, "total_steps": 204665, "loss": 0.102, "lr": 1.513926728125919e-06, "epoch": 1.9768890626145164, "percentage": 39.54, "elapsed_time": "1:44:06", "remaining_time": "2:39:12", "throughput": 8731.07, "total_tokens": 54541088} +{"current_steps": 80925, "total_steps": 204665, "loss": 0.1535, "lr": 1.5138535722257488e-06, "epoch": 1.9770112134463635, "percentage": 39.54, "elapsed_time": "1:44:07", "remaining_time": "2:39:12", "throughput": 8731.14, "total_tokens": 54544544} +{"current_steps": 80930, "total_steps": 204665, "loss": 0.1311, "lr": 1.5137804125886973e-06, "epoch": 1.9771333642782107, "percentage": 39.54, "elapsed_time": "1:44:07", "remaining_time": "2:39:11", "throughput": 8731.18, "total_tokens": 54547872} +{"current_steps": 80935, "total_steps": 204665, "loss": 0.2092, "lr": 1.5137072492152962e-06, "epoch": 1.977255515110058, "percentage": 39.55, "elapsed_time": "1:44:07", "remaining_time": "2:39:11", "throughput": 8731.23, "total_tokens": 54551200} +{"current_steps": 80940, "total_steps": 204665, "loss": 0.2748, "lr": 1.513634082106078e-06, "epoch": 1.9773776659419051, "percentage": 39.55, "elapsed_time": "1:44:08", "remaining_time": "2:39:10", "throughput": 8731.28, "total_tokens": 54554528} +{"current_steps": 80945, "total_steps": 204665, "loss": 0.0636, "lr": 1.5135609112615746e-06, "epoch": 1.977499816773752, "percentage": 39.55, "elapsed_time": "1:44:08", "remaining_time": "2:39:10", "throughput": 8731.32, "total_tokens": 54557856} +{"current_steps": 80950, "total_steps": 204665, "loss": 0.1343, "lr": 1.5134877366823178e-06, "epoch": 1.9776219676055993, "percentage": 39.55, "elapsed_time": "1:44:08", "remaining_time": "2:39:10", "throughput": 8731.37, "total_tokens": 54561184} +{"current_steps": 80955, "total_steps": 204665, "loss": 0.058, "lr": 1.5134145583688406e-06, "epoch": 1.9777441184374465, "percentage": 39.55, "elapsed_time": "1:44:09", "remaining_time": "2:39:09", "throughput": 8731.39, "total_tokens": 54564320} +{"current_steps": 80960, "total_steps": 204665, "loss": 0.0711, "lr": 1.5133413763216742e-06, "epoch": 1.9778662692692937, "percentage": 39.56, "elapsed_time": "1:44:09", "remaining_time": "2:39:09", "throughput": 8731.44, "total_tokens": 54567648} +{"current_steps": 80965, "total_steps": 204665, "loss": 0.0481, "lr": 1.5132681905413515e-06, "epoch": 1.9779884201011408, "percentage": 39.56, "elapsed_time": "1:44:09", "remaining_time": "2:39:08", "throughput": 8731.49, "total_tokens": 54570912} +{"current_steps": 80970, "total_steps": 204665, "loss": 0.1138, "lr": 1.5131950010284043e-06, "epoch": 1.978110570932988, "percentage": 39.56, "elapsed_time": "1:44:10", "remaining_time": "2:39:08", "throughput": 8731.52, "total_tokens": 54574112} +{"current_steps": 80975, "total_steps": 204665, "loss": 0.0475, "lr": 1.513121807783365e-06, "epoch": 1.9782327217648352, "percentage": 39.56, "elapsed_time": "1:44:10", "remaining_time": "2:39:07", "throughput": 8731.62, "total_tokens": 54577888} +{"current_steps": 80980, "total_steps": 204665, "loss": 0.0688, "lr": 1.513048610806766e-06, "epoch": 1.9783548725966824, "percentage": 39.57, "elapsed_time": "1:44:10", "remaining_time": "2:39:07", "throughput": 8731.72, "total_tokens": 54581600} +{"current_steps": 80985, "total_steps": 204665, "loss": 0.0358, "lr": 1.5129754100991394e-06, "epoch": 1.9784770234285296, "percentage": 39.57, "elapsed_time": "1:44:11", "remaining_time": "2:39:06", "throughput": 8731.73, "total_tokens": 54584608} +{"current_steps": 80990, "total_steps": 204665, "loss": 0.0421, "lr": 1.512902205661018e-06, "epoch": 1.9785991742603768, "percentage": 39.57, "elapsed_time": "1:44:11", "remaining_time": "2:39:06", "throughput": 8731.78, "total_tokens": 54588000} +{"current_steps": 80995, "total_steps": 204665, "loss": 0.1467, "lr": 1.5128289974929334e-06, "epoch": 1.978721325092224, "percentage": 39.57, "elapsed_time": "1:44:11", "remaining_time": "2:39:06", "throughput": 8731.8, "total_tokens": 54591136} +{"current_steps": 81000, "total_steps": 204665, "loss": 0.0667, "lr": 1.5127557855954186e-06, "epoch": 1.9788434759240712, "percentage": 39.58, "elapsed_time": "1:44:12", "remaining_time": "2:39:05", "throughput": 8731.84, "total_tokens": 54594400} +{"current_steps": 81005, "total_steps": 204665, "loss": 0.0786, "lr": 1.5126825699690056e-06, "epoch": 1.9789656267559184, "percentage": 39.58, "elapsed_time": "1:44:12", "remaining_time": "2:39:05", "throughput": 8731.98, "total_tokens": 54598432} +{"current_steps": 81010, "total_steps": 204665, "loss": 0.1151, "lr": 1.512609350614227e-06, "epoch": 1.9790877775877653, "percentage": 39.58, "elapsed_time": "1:44:13", "remaining_time": "2:39:04", "throughput": 8732.01, "total_tokens": 54601696} +{"current_steps": 81015, "total_steps": 204665, "loss": 0.1662, "lr": 1.5125361275316157e-06, "epoch": 1.9792099284196125, "percentage": 39.58, "elapsed_time": "1:44:13", "remaining_time": "2:39:04", "throughput": 8732.05, "total_tokens": 54604960} +{"current_steps": 81020, "total_steps": 204665, "loss": 0.069, "lr": 1.5124629007217036e-06, "epoch": 1.9793320792514597, "percentage": 39.59, "elapsed_time": "1:44:13", "remaining_time": "2:39:03", "throughput": 8732.12, "total_tokens": 54608480} +{"current_steps": 81025, "total_steps": 204665, "loss": 0.0018, "lr": 1.5123896701850237e-06, "epoch": 1.979454230083307, "percentage": 39.59, "elapsed_time": "1:44:14", "remaining_time": "2:39:03", "throughput": 8732.19, "total_tokens": 54611936} +{"current_steps": 81030, "total_steps": 204665, "loss": 0.145, "lr": 1.512316435922108e-06, "epoch": 1.9795763809151539, "percentage": 39.59, "elapsed_time": "1:44:14", "remaining_time": "2:39:02", "throughput": 8732.18, "total_tokens": 54614816} +{"current_steps": 81035, "total_steps": 204665, "loss": 0.0485, "lr": 1.5122431979334894e-06, "epoch": 1.979698531747001, "percentage": 39.59, "elapsed_time": "1:44:14", "remaining_time": "2:39:02", "throughput": 8732.21, "total_tokens": 54618016} +{"current_steps": 81040, "total_steps": 204665, "loss": 0.1454, "lr": 1.5121699562197006e-06, "epoch": 1.9798206825788482, "percentage": 39.6, "elapsed_time": "1:44:15", "remaining_time": "2:39:02", "throughput": 8732.26, "total_tokens": 54621408} +{"current_steps": 81045, "total_steps": 204665, "loss": 0.0675, "lr": 1.5120967107812738e-06, "epoch": 1.9799428334106954, "percentage": 39.6, "elapsed_time": "1:44:15", "remaining_time": "2:39:01", "throughput": 8732.31, "total_tokens": 54624800} +{"current_steps": 81050, "total_steps": 204665, "loss": 0.0325, "lr": 1.5120234616187423e-06, "epoch": 1.9800649842425426, "percentage": 39.6, "elapsed_time": "1:44:15", "remaining_time": "2:39:01", "throughput": 8732.36, "total_tokens": 54628128} +{"current_steps": 81055, "total_steps": 204665, "loss": 0.0427, "lr": 1.5119502087326387e-06, "epoch": 1.9801871350743898, "percentage": 39.6, "elapsed_time": "1:44:16", "remaining_time": "2:39:00", "throughput": 8732.41, "total_tokens": 54631456} +{"current_steps": 81060, "total_steps": 204665, "loss": 0.1117, "lr": 1.511876952123495e-06, "epoch": 1.980309285906237, "percentage": 39.61, "elapsed_time": "1:44:16", "remaining_time": "2:39:00", "throughput": 8732.43, "total_tokens": 54634592} +{"current_steps": 81065, "total_steps": 204665, "loss": 0.0639, "lr": 1.511803691791845e-06, "epoch": 1.9804314367380842, "percentage": 39.61, "elapsed_time": "1:44:16", "remaining_time": "2:38:59", "throughput": 8732.46, "total_tokens": 54637792} +{"current_steps": 81070, "total_steps": 204665, "loss": 0.0815, "lr": 1.5117304277382204e-06, "epoch": 1.9805535875699314, "percentage": 39.61, "elapsed_time": "1:44:17", "remaining_time": "2:38:59", "throughput": 8732.52, "total_tokens": 54641184} +{"current_steps": 81075, "total_steps": 204665, "loss": 0.0257, "lr": 1.5116571599631544e-06, "epoch": 1.9806757384017786, "percentage": 39.61, "elapsed_time": "1:44:17", "remaining_time": "2:38:58", "throughput": 8732.59, "total_tokens": 54644640} +{"current_steps": 81080, "total_steps": 204665, "loss": 0.0029, "lr": 1.51158388846718e-06, "epoch": 1.9807978892336258, "percentage": 39.62, "elapsed_time": "1:44:17", "remaining_time": "2:38:58", "throughput": 8732.59, "total_tokens": 54647648} +{"current_steps": 81085, "total_steps": 204665, "loss": 0.0702, "lr": 1.5115106132508305e-06, "epoch": 1.980920040065473, "percentage": 39.62, "elapsed_time": "1:44:18", "remaining_time": "2:38:58", "throughput": 8732.61, "total_tokens": 54650720} +{"current_steps": 81090, "total_steps": 204665, "loss": 0.0007, "lr": 1.5114373343146375e-06, "epoch": 1.9810421908973201, "percentage": 39.62, "elapsed_time": "1:44:18", "remaining_time": "2:38:57", "throughput": 8732.65, "total_tokens": 54653984} +{"current_steps": 81095, "total_steps": 204665, "loss": 0.1593, "lr": 1.5113640516591354e-06, "epoch": 1.9811643417291673, "percentage": 39.62, "elapsed_time": "1:44:18", "remaining_time": "2:38:57", "throughput": 8732.73, "total_tokens": 54657568} +{"current_steps": 81100, "total_steps": 204665, "loss": 0.1267, "lr": 1.5112907652848556e-06, "epoch": 1.9812864925610143, "percentage": 39.63, "elapsed_time": "1:44:19", "remaining_time": "2:38:56", "throughput": 8732.75, "total_tokens": 54660640} +{"current_steps": 81105, "total_steps": 204665, "loss": 0.1972, "lr": 1.5112174751923324e-06, "epoch": 1.9814086433928615, "percentage": 39.63, "elapsed_time": "1:44:19", "remaining_time": "2:38:56", "throughput": 8732.75, "total_tokens": 54663648} +{"current_steps": 81110, "total_steps": 204665, "loss": 0.1394, "lr": 1.511144181382098e-06, "epoch": 1.9815307942247087, "percentage": 39.63, "elapsed_time": "1:44:19", "remaining_time": "2:38:55", "throughput": 8732.79, "total_tokens": 54666912} +{"current_steps": 81115, "total_steps": 204665, "loss": 0.0018, "lr": 1.5110708838546856e-06, "epoch": 1.9816529450565559, "percentage": 39.63, "elapsed_time": "1:44:20", "remaining_time": "2:38:55", "throughput": 8732.82, "total_tokens": 54670048} +{"current_steps": 81120, "total_steps": 204665, "loss": 0.0385, "lr": 1.5109975826106285e-06, "epoch": 1.9817750958884028, "percentage": 39.64, "elapsed_time": "1:44:20", "remaining_time": "2:38:54", "throughput": 8732.86, "total_tokens": 54673312} +{"current_steps": 81125, "total_steps": 204665, "loss": 0.012, "lr": 1.5109242776504591e-06, "epoch": 1.98189724672025, "percentage": 39.64, "elapsed_time": "1:44:20", "remaining_time": "2:38:54", "throughput": 8732.91, "total_tokens": 54676640} +{"current_steps": 81130, "total_steps": 204665, "loss": 0.0411, "lr": 1.5108509689747115e-06, "epoch": 1.9820193975520972, "percentage": 39.64, "elapsed_time": "1:44:21", "remaining_time": "2:38:54", "throughput": 8732.96, "total_tokens": 54679968} +{"current_steps": 81135, "total_steps": 204665, "loss": 0.001, "lr": 1.5107776565839177e-06, "epoch": 1.9821415483839444, "percentage": 39.64, "elapsed_time": "1:44:21", "remaining_time": "2:38:53", "throughput": 8733.1, "total_tokens": 54684064} +{"current_steps": 81140, "total_steps": 204665, "loss": 0.1699, "lr": 1.510704340478612e-06, "epoch": 1.9822636992157916, "percentage": 39.65, "elapsed_time": "1:44:22", "remaining_time": "2:38:53", "throughput": 8733.16, "total_tokens": 54687392} +{"current_steps": 81145, "total_steps": 204665, "loss": 0.1538, "lr": 1.5106310206593265e-06, "epoch": 1.9823858500476388, "percentage": 39.65, "elapsed_time": "1:44:22", "remaining_time": "2:38:52", "throughput": 8733.17, "total_tokens": 54690464} +{"current_steps": 81150, "total_steps": 204665, "loss": 0.2718, "lr": 1.510557697126595e-06, "epoch": 1.982508000879486, "percentage": 39.65, "elapsed_time": "1:44:22", "remaining_time": "2:38:52", "throughput": 8733.24, "total_tokens": 54693984} +{"current_steps": 81155, "total_steps": 204665, "loss": 0.1438, "lr": 1.5104843698809506e-06, "epoch": 1.9826301517113332, "percentage": 39.65, "elapsed_time": "1:44:23", "remaining_time": "2:38:51", "throughput": 8733.33, "total_tokens": 54697632} +{"current_steps": 81160, "total_steps": 204665, "loss": 0.0019, "lr": 1.5104110389229265e-06, "epoch": 1.9827523025431804, "percentage": 39.66, "elapsed_time": "1:44:23", "remaining_time": "2:38:51", "throughput": 8733.43, "total_tokens": 54701344} +{"current_steps": 81165, "total_steps": 204665, "loss": 0.1028, "lr": 1.5103377042530561e-06, "epoch": 1.9828744533750275, "percentage": 39.66, "elapsed_time": "1:44:23", "remaining_time": "2:38:50", "throughput": 8733.46, "total_tokens": 54704544} +{"current_steps": 81170, "total_steps": 204665, "loss": 0.0757, "lr": 1.5102643658718726e-06, "epoch": 1.9829966042068747, "percentage": 39.66, "elapsed_time": "1:44:24", "remaining_time": "2:38:50", "throughput": 8733.61, "total_tokens": 54708704} +{"current_steps": 81175, "total_steps": 204665, "loss": 0.0797, "lr": 1.5101910237799093e-06, "epoch": 1.983118755038722, "percentage": 39.66, "elapsed_time": "1:44:24", "remaining_time": "2:38:50", "throughput": 8733.62, "total_tokens": 54711776} +{"current_steps": 81180, "total_steps": 204665, "loss": 0.044, "lr": 1.5101176779776999e-06, "epoch": 1.9832409058705691, "percentage": 39.66, "elapsed_time": "1:44:24", "remaining_time": "2:38:49", "throughput": 8733.63, "total_tokens": 54714784} +{"current_steps": 81185, "total_steps": 204665, "loss": 0.0014, "lr": 1.5100443284657773e-06, "epoch": 1.9833630567024163, "percentage": 39.67, "elapsed_time": "1:44:25", "remaining_time": "2:38:49", "throughput": 8733.67, "total_tokens": 54718048} +{"current_steps": 81190, "total_steps": 204665, "loss": 0.1347, "lr": 1.5099709752446754e-06, "epoch": 1.9834852075342633, "percentage": 39.67, "elapsed_time": "1:44:25", "remaining_time": "2:38:48", "throughput": 8733.7, "total_tokens": 54721184} +{"current_steps": 81195, "total_steps": 204665, "loss": 0.1472, "lr": 1.5098976183149272e-06, "epoch": 1.9836073583661105, "percentage": 39.67, "elapsed_time": "1:44:25", "remaining_time": "2:38:48", "throughput": 8733.75, "total_tokens": 54724576} +{"current_steps": 81200, "total_steps": 204665, "loss": 0.1159, "lr": 1.5098242576770666e-06, "epoch": 1.9837295091979577, "percentage": 39.67, "elapsed_time": "1:44:26", "remaining_time": "2:38:47", "throughput": 8733.76, "total_tokens": 54727648} +{"current_steps": 81205, "total_steps": 204665, "loss": 0.0484, "lr": 1.5097508933316267e-06, "epoch": 1.9838516600298048, "percentage": 39.68, "elapsed_time": "1:44:26", "remaining_time": "2:38:47", "throughput": 8733.85, "total_tokens": 54731232} +{"current_steps": 81210, "total_steps": 204665, "loss": 0.1048, "lr": 1.5096775252791414e-06, "epoch": 1.9839738108616518, "percentage": 39.68, "elapsed_time": "1:44:26", "remaining_time": "2:38:46", "throughput": 8733.97, "total_tokens": 54735136} +{"current_steps": 81215, "total_steps": 204665, "loss": 0.0012, "lr": 1.5096041535201435e-06, "epoch": 1.984095961693499, "percentage": 39.68, "elapsed_time": "1:44:27", "remaining_time": "2:38:46", "throughput": 8734.04, "total_tokens": 54738656} +{"current_steps": 81220, "total_steps": 204665, "loss": 0.0957, "lr": 1.5095307780551676e-06, "epoch": 1.9842181125253462, "percentage": 39.68, "elapsed_time": "1:44:27", "remaining_time": "2:38:46", "throughput": 8734.05, "total_tokens": 54741728} +{"current_steps": 81225, "total_steps": 204665, "loss": 0.0014, "lr": 1.5094573988847468e-06, "epoch": 1.9843402633571934, "percentage": 39.69, "elapsed_time": "1:44:27", "remaining_time": "2:38:45", "throughput": 8734.07, "total_tokens": 54744864} +{"current_steps": 81230, "total_steps": 204665, "loss": 0.0008, "lr": 1.5093840160094145e-06, "epoch": 1.9844624141890406, "percentage": 39.69, "elapsed_time": "1:44:28", "remaining_time": "2:38:45", "throughput": 8734.11, "total_tokens": 54748128} +{"current_steps": 81235, "total_steps": 204665, "loss": 0.0874, "lr": 1.509310629429705e-06, "epoch": 1.9845845650208878, "percentage": 39.69, "elapsed_time": "1:44:28", "remaining_time": "2:38:44", "throughput": 8734.18, "total_tokens": 54751712} +{"current_steps": 81240, "total_steps": 204665, "loss": 0.0342, "lr": 1.5092372391461515e-06, "epoch": 1.984706715852735, "percentage": 39.69, "elapsed_time": "1:44:29", "remaining_time": "2:38:44", "throughput": 8734.22, "total_tokens": 54754976} +{"current_steps": 81245, "total_steps": 204665, "loss": 0.0006, "lr": 1.5091638451592878e-06, "epoch": 1.9848288666845821, "percentage": 39.7, "elapsed_time": "1:44:29", "remaining_time": "2:38:43", "throughput": 8734.25, "total_tokens": 54758176} +{"current_steps": 81250, "total_steps": 204665, "loss": 0.0261, "lr": 1.5090904474696478e-06, "epoch": 1.9849510175164293, "percentage": 39.7, "elapsed_time": "1:44:29", "remaining_time": "2:38:43", "throughput": 8734.35, "total_tokens": 54761888} +{"current_steps": 81255, "total_steps": 204665, "loss": 0.0012, "lr": 1.5090170460777647e-06, "epoch": 1.9850731683482765, "percentage": 39.7, "elapsed_time": "1:44:30", "remaining_time": "2:38:42", "throughput": 8734.42, "total_tokens": 54765408} +{"current_steps": 81260, "total_steps": 204665, "loss": 0.0779, "lr": 1.508943640984173e-06, "epoch": 1.9851953191801237, "percentage": 39.7, "elapsed_time": "1:44:30", "remaining_time": "2:38:42", "throughput": 8734.49, "total_tokens": 54768928} +{"current_steps": 81265, "total_steps": 204665, "loss": 0.0576, "lr": 1.5088702321894062e-06, "epoch": 1.985317470011971, "percentage": 39.71, "elapsed_time": "1:44:30", "remaining_time": "2:38:42", "throughput": 8734.56, "total_tokens": 54772448} +{"current_steps": 81270, "total_steps": 204665, "loss": 0.1292, "lr": 1.5087968196939985e-06, "epoch": 1.985439620843818, "percentage": 39.71, "elapsed_time": "1:44:31", "remaining_time": "2:38:41", "throughput": 8734.6, "total_tokens": 54775648} +{"current_steps": 81275, "total_steps": 204665, "loss": 0.0034, "lr": 1.5087234034984833e-06, "epoch": 1.9855617716756653, "percentage": 39.71, "elapsed_time": "1:44:31", "remaining_time": "2:38:41", "throughput": 8734.64, "total_tokens": 54778976} +{"current_steps": 81280, "total_steps": 204665, "loss": 0.1485, "lr": 1.5086499836033945e-06, "epoch": 1.9856839225075122, "percentage": 39.71, "elapsed_time": "1:44:31", "remaining_time": "2:38:40", "throughput": 8734.65, "total_tokens": 54782048} +{"current_steps": 81285, "total_steps": 204665, "loss": 0.0958, "lr": 1.5085765600092663e-06, "epoch": 1.9858060733393594, "percentage": 39.72, "elapsed_time": "1:44:32", "remaining_time": "2:38:40", "throughput": 8734.72, "total_tokens": 54785568} +{"current_steps": 81290, "total_steps": 204665, "loss": 0.0731, "lr": 1.5085031327166324e-06, "epoch": 1.9859282241712066, "percentage": 39.72, "elapsed_time": "1:44:32", "remaining_time": "2:38:39", "throughput": 8734.77, "total_tokens": 54788896} +{"current_steps": 81295, "total_steps": 204665, "loss": 0.092, "lr": 1.5084297017260274e-06, "epoch": 1.9860503750030538, "percentage": 39.72, "elapsed_time": "1:44:32", "remaining_time": "2:38:39", "throughput": 8734.81, "total_tokens": 54792224} +{"current_steps": 81300, "total_steps": 204665, "loss": 0.1245, "lr": 1.5083562670379847e-06, "epoch": 1.9861725258349008, "percentage": 39.72, "elapsed_time": "1:44:33", "remaining_time": "2:38:38", "throughput": 8734.86, "total_tokens": 54795552} +{"current_steps": 81305, "total_steps": 204665, "loss": 0.18, "lr": 1.5082828286530385e-06, "epoch": 1.986294676666748, "percentage": 39.73, "elapsed_time": "1:44:33", "remaining_time": "2:38:38", "throughput": 8734.97, "total_tokens": 54799392} +{"current_steps": 81310, "total_steps": 204665, "loss": 0.0018, "lr": 1.5082093865717226e-06, "epoch": 1.9864168274985952, "percentage": 39.73, "elapsed_time": "1:44:33", "remaining_time": "2:38:38", "throughput": 8735.01, "total_tokens": 54802720} +{"current_steps": 81315, "total_steps": 204665, "loss": 0.0463, "lr": 1.5081359407945717e-06, "epoch": 1.9865389783304424, "percentage": 39.73, "elapsed_time": "1:44:34", "remaining_time": "2:38:37", "throughput": 8735.05, "total_tokens": 54805984} +{"current_steps": 81320, "total_steps": 204665, "loss": 0.1515, "lr": 1.5080624913221192e-06, "epoch": 1.9866611291622895, "percentage": 39.73, "elapsed_time": "1:44:34", "remaining_time": "2:38:37", "throughput": 8735.1, "total_tokens": 54809376} +{"current_steps": 81325, "total_steps": 204665, "loss": 0.0629, "lr": 1.5079890381549e-06, "epoch": 1.9867832799941367, "percentage": 39.74, "elapsed_time": "1:44:34", "remaining_time": "2:38:36", "throughput": 8735.1, "total_tokens": 54812256} +{"current_steps": 81330, "total_steps": 204665, "loss": 0.1602, "lr": 1.5079155812934474e-06, "epoch": 1.986905430825984, "percentage": 39.74, "elapsed_time": "1:44:35", "remaining_time": "2:38:36", "throughput": 8735.16, "total_tokens": 54815776} +{"current_steps": 81335, "total_steps": 204665, "loss": 0.0121, "lr": 1.5078421207382963e-06, "epoch": 1.987027581657831, "percentage": 39.74, "elapsed_time": "1:44:35", "remaining_time": "2:38:35", "throughput": 8735.27, "total_tokens": 54819552} +{"current_steps": 81340, "total_steps": 204665, "loss": 0.003, "lr": 1.5077686564899808e-06, "epoch": 1.9871497324896783, "percentage": 39.74, "elapsed_time": "1:44:35", "remaining_time": "2:38:35", "throughput": 8735.28, "total_tokens": 54822624} +{"current_steps": 81345, "total_steps": 204665, "loss": 0.0501, "lr": 1.507695188549035e-06, "epoch": 1.9872718833215255, "percentage": 39.75, "elapsed_time": "1:44:36", "remaining_time": "2:38:35", "throughput": 8735.32, "total_tokens": 54825888} +{"current_steps": 81350, "total_steps": 204665, "loss": 0.1933, "lr": 1.5076217169159933e-06, "epoch": 1.9873940341533727, "percentage": 39.75, "elapsed_time": "1:44:36", "remaining_time": "2:38:34", "throughput": 8735.38, "total_tokens": 54829280} +{"current_steps": 81355, "total_steps": 204665, "loss": 0.1009, "lr": 1.5075482415913899e-06, "epoch": 1.9875161849852199, "percentage": 39.75, "elapsed_time": "1:44:37", "remaining_time": "2:38:34", "throughput": 8735.4, "total_tokens": 54832416} +{"current_steps": 81360, "total_steps": 204665, "loss": 0.0637, "lr": 1.5074747625757591e-06, "epoch": 1.987638335817067, "percentage": 39.75, "elapsed_time": "1:44:37", "remaining_time": "2:38:33", "throughput": 8735.48, "total_tokens": 54836000} +{"current_steps": 81365, "total_steps": 204665, "loss": 0.1048, "lr": 1.5074012798696356e-06, "epoch": 1.987760486648914, "percentage": 39.76, "elapsed_time": "1:44:37", "remaining_time": "2:38:33", "throughput": 8735.52, "total_tokens": 54839328} +{"current_steps": 81370, "total_steps": 204665, "loss": 0.1378, "lr": 1.5073277934735531e-06, "epoch": 1.9878826374807612, "percentage": 39.76, "elapsed_time": "1:44:38", "remaining_time": "2:38:32", "throughput": 8735.57, "total_tokens": 54842656} +{"current_steps": 81375, "total_steps": 204665, "loss": 0.1105, "lr": 1.5072543033880466e-06, "epoch": 1.9880047883126084, "percentage": 39.76, "elapsed_time": "1:44:38", "remaining_time": "2:38:32", "throughput": 8735.64, "total_tokens": 54846176} +{"current_steps": 81380, "total_steps": 204665, "loss": 0.0333, "lr": 1.5071808096136503e-06, "epoch": 1.9881269391444556, "percentage": 39.76, "elapsed_time": "1:44:38", "remaining_time": "2:38:31", "throughput": 8735.74, "total_tokens": 54849952} +{"current_steps": 81385, "total_steps": 204665, "loss": 0.084, "lr": 1.507107312150899e-06, "epoch": 1.9882490899763028, "percentage": 39.76, "elapsed_time": "1:44:39", "remaining_time": "2:38:31", "throughput": 8735.72, "total_tokens": 54852768} +{"current_steps": 81390, "total_steps": 204665, "loss": 0.0249, "lr": 1.5070338110003266e-06, "epoch": 1.9883712408081498, "percentage": 39.77, "elapsed_time": "1:44:39", "remaining_time": "2:38:31", "throughput": 8735.75, "total_tokens": 54855968} +{"current_steps": 81395, "total_steps": 204665, "loss": 0.0502, "lr": 1.5069603061624683e-06, "epoch": 1.988493391639997, "percentage": 39.77, "elapsed_time": "1:44:39", "remaining_time": "2:38:30", "throughput": 8735.8, "total_tokens": 54859296} +{"current_steps": 81400, "total_steps": 204665, "loss": 0.0339, "lr": 1.5068867976378582e-06, "epoch": 1.9886155424718441, "percentage": 39.77, "elapsed_time": "1:44:40", "remaining_time": "2:38:30", "throughput": 8735.91, "total_tokens": 54863136} +{"current_steps": 81405, "total_steps": 204665, "loss": 0.0677, "lr": 1.506813285427031e-06, "epoch": 1.9887376933036913, "percentage": 39.77, "elapsed_time": "1:44:40", "remaining_time": "2:38:29", "throughput": 8735.95, "total_tokens": 54866400} +{"current_steps": 81410, "total_steps": 204665, "loss": 0.1462, "lr": 1.5067397695305212e-06, "epoch": 1.9888598441355385, "percentage": 39.78, "elapsed_time": "1:44:40", "remaining_time": "2:38:29", "throughput": 8736.01, "total_tokens": 54869920} +{"current_steps": 81415, "total_steps": 204665, "loss": 0.0628, "lr": 1.5066662499488634e-06, "epoch": 1.9889819949673857, "percentage": 39.78, "elapsed_time": "1:44:41", "remaining_time": "2:38:28", "throughput": 8736.06, "total_tokens": 54873248} +{"current_steps": 81420, "total_steps": 204665, "loss": 0.0587, "lr": 1.5065927266825923e-06, "epoch": 1.989104145799233, "percentage": 39.78, "elapsed_time": "1:44:41", "remaining_time": "2:38:28", "throughput": 8736.2, "total_tokens": 54877280} +{"current_steps": 81425, "total_steps": 204665, "loss": 0.0877, "lr": 1.5065191997322426e-06, "epoch": 1.98922629663108, "percentage": 39.78, "elapsed_time": "1:44:41", "remaining_time": "2:38:27", "throughput": 8736.17, "total_tokens": 54879968} +{"current_steps": 81430, "total_steps": 204665, "loss": 0.041, "lr": 1.5064456690983493e-06, "epoch": 1.9893484474629273, "percentage": 39.79, "elapsed_time": "1:44:42", "remaining_time": "2:38:27", "throughput": 8736.19, "total_tokens": 54883104} +{"current_steps": 81435, "total_steps": 204665, "loss": 0.0558, "lr": 1.5063721347814468e-06, "epoch": 1.9894705982947745, "percentage": 39.79, "elapsed_time": "1:44:42", "remaining_time": "2:38:27", "throughput": 8736.26, "total_tokens": 54886624} +{"current_steps": 81440, "total_steps": 204665, "loss": 0.0027, "lr": 1.50629859678207e-06, "epoch": 1.9895927491266217, "percentage": 39.79, "elapsed_time": "1:44:42", "remaining_time": "2:38:26", "throughput": 8736.3, "total_tokens": 54889952} +{"current_steps": 81445, "total_steps": 204665, "loss": 0.0344, "lr": 1.5062250551007533e-06, "epoch": 1.9897148999584688, "percentage": 39.79, "elapsed_time": "1:44:43", "remaining_time": "2:38:26", "throughput": 8736.42, "total_tokens": 54893792} +{"current_steps": 81450, "total_steps": 204665, "loss": 0.0756, "lr": 1.5061515097380323e-06, "epoch": 1.989837050790316, "percentage": 39.8, "elapsed_time": "1:44:43", "remaining_time": "2:38:25", "throughput": 8736.52, "total_tokens": 54897504} +{"current_steps": 81455, "total_steps": 204665, "loss": 0.0261, "lr": 1.5060779606944412e-06, "epoch": 1.989959201622163, "percentage": 39.8, "elapsed_time": "1:44:44", "remaining_time": "2:38:25", "throughput": 8736.55, "total_tokens": 54900704} +{"current_steps": 81460, "total_steps": 204665, "loss": 0.0433, "lr": 1.506004407970515e-06, "epoch": 1.9900813524540102, "percentage": 39.8, "elapsed_time": "1:44:44", "remaining_time": "2:38:24", "throughput": 8736.62, "total_tokens": 54904224} +{"current_steps": 81465, "total_steps": 204665, "loss": 0.1088, "lr": 1.5059308515667888e-06, "epoch": 1.9902035032858574, "percentage": 39.8, "elapsed_time": "1:44:44", "remaining_time": "2:38:24", "throughput": 8736.65, "total_tokens": 54907360} +{"current_steps": 81470, "total_steps": 204665, "loss": 0.0366, "lr": 1.5058572914837973e-06, "epoch": 1.9903256541177046, "percentage": 39.81, "elapsed_time": "1:44:45", "remaining_time": "2:38:24", "throughput": 8736.83, "total_tokens": 54911776} +{"current_steps": 81475, "total_steps": 204665, "loss": 0.0852, "lr": 1.505783727722075e-06, "epoch": 1.9904478049495518, "percentage": 39.81, "elapsed_time": "1:44:45", "remaining_time": "2:38:23", "throughput": 8736.89, "total_tokens": 54915168} +{"current_steps": 81480, "total_steps": 204665, "loss": 0.0808, "lr": 1.505710160282158e-06, "epoch": 1.9905699557813987, "percentage": 39.81, "elapsed_time": "1:44:45", "remaining_time": "2:38:23", "throughput": 8736.9, "total_tokens": 54918240} +{"current_steps": 81485, "total_steps": 204665, "loss": 0.1603, "lr": 1.5056365891645805e-06, "epoch": 1.990692106613246, "percentage": 39.81, "elapsed_time": "1:44:46", "remaining_time": "2:38:22", "throughput": 8736.95, "total_tokens": 54921568} +{"current_steps": 81490, "total_steps": 204665, "loss": 0.0425, "lr": 1.5055630143698778e-06, "epoch": 1.990814257445093, "percentage": 39.82, "elapsed_time": "1:44:46", "remaining_time": "2:38:22", "throughput": 8736.97, "total_tokens": 54924704} +{"current_steps": 81495, "total_steps": 204665, "loss": 0.0634, "lr": 1.505489435898585e-06, "epoch": 1.9909364082769403, "percentage": 39.82, "elapsed_time": "1:44:46", "remaining_time": "2:38:21", "throughput": 8737.03, "total_tokens": 54928096} +{"current_steps": 81500, "total_steps": 204665, "loss": 0.0872, "lr": 1.505415853751237e-06, "epoch": 1.9910585591087875, "percentage": 39.82, "elapsed_time": "1:44:47", "remaining_time": "2:38:21", "throughput": 8737.07, "total_tokens": 54931424} +{"current_steps": 81505, "total_steps": 204665, "loss": 0.1399, "lr": 1.5053422679283688e-06, "epoch": 1.9911807099406347, "percentage": 39.82, "elapsed_time": "1:44:47", "remaining_time": "2:38:20", "throughput": 8737.12, "total_tokens": 54934752} +{"current_steps": 81510, "total_steps": 204665, "loss": 0.0519, "lr": 1.5052686784305158e-06, "epoch": 1.9913028607724819, "percentage": 39.83, "elapsed_time": "1:44:47", "remaining_time": "2:38:20", "throughput": 8737.16, "total_tokens": 54938080} +{"current_steps": 81515, "total_steps": 204665, "loss": 0.039, "lr": 1.505195085258213e-06, "epoch": 1.991425011604329, "percentage": 39.83, "elapsed_time": "1:44:48", "remaining_time": "2:38:20", "throughput": 8737.27, "total_tokens": 54941856} +{"current_steps": 81520, "total_steps": 204665, "loss": 0.0728, "lr": 1.5051214884119956e-06, "epoch": 1.9915471624361762, "percentage": 39.83, "elapsed_time": "1:44:48", "remaining_time": "2:38:19", "throughput": 8737.3, "total_tokens": 54945056} +{"current_steps": 81525, "total_steps": 204665, "loss": 0.1029, "lr": 1.505047887892399e-06, "epoch": 1.9916693132680234, "percentage": 39.83, "elapsed_time": "1:44:48", "remaining_time": "2:38:19", "throughput": 8737.35, "total_tokens": 54948448} +{"current_steps": 81530, "total_steps": 204665, "loss": 0.0036, "lr": 1.5049742836999584e-06, "epoch": 1.9917914640998706, "percentage": 39.84, "elapsed_time": "1:44:49", "remaining_time": "2:38:18", "throughput": 8737.4, "total_tokens": 54951840} +{"current_steps": 81535, "total_steps": 204665, "loss": 0.2146, "lr": 1.5049006758352088e-06, "epoch": 1.9919136149317178, "percentage": 39.84, "elapsed_time": "1:44:49", "remaining_time": "2:38:18", "throughput": 8737.43, "total_tokens": 54955040} +{"current_steps": 81540, "total_steps": 204665, "loss": 0.0869, "lr": 1.5048270642986855e-06, "epoch": 1.992035765763565, "percentage": 39.84, "elapsed_time": "1:44:49", "remaining_time": "2:38:17", "throughput": 8737.49, "total_tokens": 54958496} +{"current_steps": 81545, "total_steps": 204665, "loss": 0.0977, "lr": 1.5047534490909243e-06, "epoch": 1.992157916595412, "percentage": 39.84, "elapsed_time": "1:44:50", "remaining_time": "2:38:17", "throughput": 8737.54, "total_tokens": 54961760} +{"current_steps": 81550, "total_steps": 204665, "loss": 0.1243, "lr": 1.5046798302124603e-06, "epoch": 1.9922800674272592, "percentage": 39.85, "elapsed_time": "1:44:50", "remaining_time": "2:38:16", "throughput": 8737.59, "total_tokens": 54965152} +{"current_steps": 81555, "total_steps": 204665, "loss": 0.0381, "lr": 1.5046062076638288e-06, "epoch": 1.9924022182591064, "percentage": 39.85, "elapsed_time": "1:44:51", "remaining_time": "2:38:16", "throughput": 8737.67, "total_tokens": 54968736} +{"current_steps": 81560, "total_steps": 204665, "loss": 0.1014, "lr": 1.5045325814455657e-06, "epoch": 1.9925243690909535, "percentage": 39.85, "elapsed_time": "1:44:51", "remaining_time": "2:38:16", "throughput": 8737.74, "total_tokens": 54972256} +{"current_steps": 81565, "total_steps": 204665, "loss": 0.0302, "lr": 1.5044589515582051e-06, "epoch": 1.9926465199228007, "percentage": 39.85, "elapsed_time": "1:44:51", "remaining_time": "2:38:15", "throughput": 8737.81, "total_tokens": 54975776} +{"current_steps": 81570, "total_steps": 204665, "loss": 0.0014, "lr": 1.5043853180022837e-06, "epoch": 1.9927686707546477, "percentage": 39.86, "elapsed_time": "1:44:52", "remaining_time": "2:38:15", "throughput": 8737.8, "total_tokens": 54978656} +{"current_steps": 81575, "total_steps": 204665, "loss": 0.2286, "lr": 1.5043116807783364e-06, "epoch": 1.9928908215864949, "percentage": 39.86, "elapsed_time": "1:44:52", "remaining_time": "2:38:14", "throughput": 8737.86, "total_tokens": 54982048} +{"current_steps": 81580, "total_steps": 204665, "loss": 0.0634, "lr": 1.5042380398868991e-06, "epoch": 1.993012972418342, "percentage": 39.86, "elapsed_time": "1:44:52", "remaining_time": "2:38:14", "throughput": 8737.92, "total_tokens": 54985504} +{"current_steps": 81585, "total_steps": 204665, "loss": 0.001, "lr": 1.5041643953285074e-06, "epoch": 1.9931351232501893, "percentage": 39.86, "elapsed_time": "1:44:53", "remaining_time": "2:38:13", "throughput": 8738.01, "total_tokens": 54989152} +{"current_steps": 81590, "total_steps": 204665, "loss": 0.0087, "lr": 1.5040907471036962e-06, "epoch": 1.9932572740820365, "percentage": 39.87, "elapsed_time": "1:44:53", "remaining_time": "2:38:13", "throughput": 8738.08, "total_tokens": 54992672} +{"current_steps": 81595, "total_steps": 204665, "loss": 0.133, "lr": 1.5040170952130019e-06, "epoch": 1.9933794249138836, "percentage": 39.87, "elapsed_time": "1:44:53", "remaining_time": "2:38:12", "throughput": 8738.12, "total_tokens": 54995936} +{"current_steps": 81600, "total_steps": 204665, "loss": 0.2358, "lr": 1.5039434396569592e-06, "epoch": 1.9935015757457308, "percentage": 39.87, "elapsed_time": "1:44:54", "remaining_time": "2:38:12", "throughput": 8738.14, "total_tokens": 54999008} +{"current_steps": 81605, "total_steps": 204665, "loss": 0.1468, "lr": 1.5038697804361046e-06, "epoch": 1.993623726577578, "percentage": 39.87, "elapsed_time": "1:44:54", "remaining_time": "2:38:12", "throughput": 8738.24, "total_tokens": 55002784} +{"current_steps": 81610, "total_steps": 204665, "loss": 0.1263, "lr": 1.5037961175509737e-06, "epoch": 1.9937458774094252, "percentage": 39.87, "elapsed_time": "1:44:54", "remaining_time": "2:38:11", "throughput": 8738.27, "total_tokens": 55005920} +{"current_steps": 81615, "total_steps": 204665, "loss": 0.0316, "lr": 1.5037224510021016e-06, "epoch": 1.9938680282412724, "percentage": 39.88, "elapsed_time": "1:44:55", "remaining_time": "2:38:11", "throughput": 8738.35, "total_tokens": 55009504} +{"current_steps": 81620, "total_steps": 204665, "loss": 0.0791, "lr": 1.5036487807900243e-06, "epoch": 1.9939901790731196, "percentage": 39.88, "elapsed_time": "1:44:55", "remaining_time": "2:38:10", "throughput": 8738.39, "total_tokens": 55012768} +{"current_steps": 81625, "total_steps": 204665, "loss": 0.0762, "lr": 1.5035751069152775e-06, "epoch": 1.9941123299049668, "percentage": 39.88, "elapsed_time": "1:44:55", "remaining_time": "2:38:10", "throughput": 8738.46, "total_tokens": 55016288} +{"current_steps": 81630, "total_steps": 204665, "loss": 0.1994, "lr": 1.5035014293783972e-06, "epoch": 1.994234480736814, "percentage": 39.88, "elapsed_time": "1:44:56", "remaining_time": "2:38:09", "throughput": 8738.47, "total_tokens": 55019296} +{"current_steps": 81635, "total_steps": 204665, "loss": 0.045, "lr": 1.503427748179919e-06, "epoch": 1.994356631568661, "percentage": 39.89, "elapsed_time": "1:44:56", "remaining_time": "2:38:09", "throughput": 8738.46, "total_tokens": 55022176} +{"current_steps": 81640, "total_steps": 204665, "loss": 0.1222, "lr": 1.503354063320379e-06, "epoch": 1.9944787824005081, "percentage": 39.89, "elapsed_time": "1:44:56", "remaining_time": "2:38:08", "throughput": 8738.47, "total_tokens": 55025184} +{"current_steps": 81645, "total_steps": 204665, "loss": 0.1907, "lr": 1.503280374800313e-06, "epoch": 1.9946009332323553, "percentage": 39.89, "elapsed_time": "1:44:57", "remaining_time": "2:38:08", "throughput": 8738.55, "total_tokens": 55028768} +{"current_steps": 81650, "total_steps": 204665, "loss": 0.0371, "lr": 1.5032066826202563e-06, "epoch": 1.9947230840642025, "percentage": 39.89, "elapsed_time": "1:44:57", "remaining_time": "2:38:08", "throughput": 8738.66, "total_tokens": 55032608} +{"current_steps": 81655, "total_steps": 204665, "loss": 0.0012, "lr": 1.5031329867807457e-06, "epoch": 1.9948452348960495, "percentage": 39.9, "elapsed_time": "1:44:57", "remaining_time": "2:38:07", "throughput": 8738.72, "total_tokens": 55036064} +{"current_steps": 81660, "total_steps": 204665, "loss": 0.1023, "lr": 1.5030592872823164e-06, "epoch": 1.9949673857278967, "percentage": 39.9, "elapsed_time": "1:44:58", "remaining_time": "2:38:07", "throughput": 8738.73, "total_tokens": 55039072} +{"current_steps": 81665, "total_steps": 204665, "loss": 0.092, "lr": 1.5029855841255047e-06, "epoch": 1.9950895365597439, "percentage": 39.9, "elapsed_time": "1:44:58", "remaining_time": "2:38:06", "throughput": 8738.76, "total_tokens": 55042272} +{"current_steps": 81670, "total_steps": 204665, "loss": 0.1157, "lr": 1.5029118773108467e-06, "epoch": 1.995211687391591, "percentage": 39.9, "elapsed_time": "1:44:58", "remaining_time": "2:38:06", "throughput": 8738.76, "total_tokens": 55045216} +{"current_steps": 81675, "total_steps": 204665, "loss": 0.0362, "lr": 1.5028381668388783e-06, "epoch": 1.9953338382234382, "percentage": 39.91, "elapsed_time": "1:44:59", "remaining_time": "2:38:05", "throughput": 8738.81, "total_tokens": 55048608} +{"current_steps": 81680, "total_steps": 204665, "loss": 0.1336, "lr": 1.5027644527101353e-06, "epoch": 1.9954559890552854, "percentage": 39.91, "elapsed_time": "1:44:59", "remaining_time": "2:38:05", "throughput": 8738.85, "total_tokens": 55051872} +{"current_steps": 81685, "total_steps": 204665, "loss": 0.1057, "lr": 1.5026907349251538e-06, "epoch": 1.9955781398871326, "percentage": 39.91, "elapsed_time": "1:45:00", "remaining_time": "2:38:04", "throughput": 8738.9, "total_tokens": 55055264} +{"current_steps": 81690, "total_steps": 204665, "loss": 0.1327, "lr": 1.5026170134844705e-06, "epoch": 1.9957002907189798, "percentage": 39.91, "elapsed_time": "1:45:00", "remaining_time": "2:38:04", "throughput": 8738.92, "total_tokens": 55058336} +{"current_steps": 81695, "total_steps": 204665, "loss": 0.0102, "lr": 1.5025432883886208e-06, "epoch": 1.995822441550827, "percentage": 39.92, "elapsed_time": "1:45:00", "remaining_time": "2:38:04", "throughput": 8738.91, "total_tokens": 55061216} +{"current_steps": 81700, "total_steps": 204665, "loss": 0.1804, "lr": 1.502469559638141e-06, "epoch": 1.9959445923826742, "percentage": 39.92, "elapsed_time": "1:45:01", "remaining_time": "2:38:03", "throughput": 8738.99, "total_tokens": 55064800} +{"current_steps": 81705, "total_steps": 204665, "loss": 0.0232, "lr": 1.5023958272335677e-06, "epoch": 1.9960667432145214, "percentage": 39.92, "elapsed_time": "1:45:01", "remaining_time": "2:38:03", "throughput": 8739.03, "total_tokens": 55068064} +{"current_steps": 81710, "total_steps": 204665, "loss": 0.1199, "lr": 1.5023220911754368e-06, "epoch": 1.9961888940463686, "percentage": 39.92, "elapsed_time": "1:45:01", "remaining_time": "2:38:02", "throughput": 8739.13, "total_tokens": 55071840} +{"current_steps": 81715, "total_steps": 204665, "loss": 0.102, "lr": 1.502248351464285e-06, "epoch": 1.9963110448782158, "percentage": 39.93, "elapsed_time": "1:45:02", "remaining_time": "2:38:02", "throughput": 8739.18, "total_tokens": 55075232} +{"current_steps": 81720, "total_steps": 204665, "loss": 0.0461, "lr": 1.5021746081006474e-06, "epoch": 1.996433195710063, "percentage": 39.93, "elapsed_time": "1:45:02", "remaining_time": "2:38:01", "throughput": 8739.21, "total_tokens": 55078368} +{"current_steps": 81725, "total_steps": 204665, "loss": 0.0729, "lr": 1.502100861085061e-06, "epoch": 1.99655534654191, "percentage": 39.93, "elapsed_time": "1:45:02", "remaining_time": "2:38:01", "throughput": 8739.3, "total_tokens": 55082080} +{"current_steps": 81730, "total_steps": 204665, "loss": 0.1241, "lr": 1.5020271104180623e-06, "epoch": 1.996677497373757, "percentage": 39.93, "elapsed_time": "1:45:03", "remaining_time": "2:38:00", "throughput": 8739.3, "total_tokens": 55085024} +{"current_steps": 81735, "total_steps": 204665, "loss": 0.1156, "lr": 1.5019533561001875e-06, "epoch": 1.9967996482056043, "percentage": 39.94, "elapsed_time": "1:45:03", "remaining_time": "2:38:00", "throughput": 8739.37, "total_tokens": 55088544} +{"current_steps": 81740, "total_steps": 204665, "loss": 0.1221, "lr": 1.5018795981319727e-06, "epoch": 1.9969217990374515, "percentage": 39.94, "elapsed_time": "1:45:03", "remaining_time": "2:38:00", "throughput": 8739.42, "total_tokens": 55091936} +{"current_steps": 81745, "total_steps": 204665, "loss": 0.0466, "lr": 1.5018058365139546e-06, "epoch": 1.9970439498692985, "percentage": 39.94, "elapsed_time": "1:45:04", "remaining_time": "2:37:59", "throughput": 8739.48, "total_tokens": 55095328} +{"current_steps": 81750, "total_steps": 204665, "loss": 0.1147, "lr": 1.5017320712466695e-06, "epoch": 1.9971661007011456, "percentage": 39.94, "elapsed_time": "1:45:04", "remaining_time": "2:37:59", "throughput": 8739.52, "total_tokens": 55098656} +{"current_steps": 81755, "total_steps": 204665, "loss": 0.0567, "lr": 1.5016583023306538e-06, "epoch": 1.9972882515329928, "percentage": 39.95, "elapsed_time": "1:45:04", "remaining_time": "2:37:58", "throughput": 8739.58, "total_tokens": 55102048} +{"current_steps": 81760, "total_steps": 204665, "loss": 0.0016, "lr": 1.5015845297664437e-06, "epoch": 1.99741040236484, "percentage": 39.95, "elapsed_time": "1:45:05", "remaining_time": "2:37:58", "throughput": 8739.59, "total_tokens": 55105120} +{"current_steps": 81765, "total_steps": 204665, "loss": 0.0671, "lr": 1.5015107535545765e-06, "epoch": 1.9975325531966872, "percentage": 39.95, "elapsed_time": "1:45:05", "remaining_time": "2:37:57", "throughput": 8739.61, "total_tokens": 55108192} +{"current_steps": 81770, "total_steps": 204665, "loss": 0.2056, "lr": 1.501436973695588e-06, "epoch": 1.9976547040285344, "percentage": 39.95, "elapsed_time": "1:45:05", "remaining_time": "2:37:57", "throughput": 8739.62, "total_tokens": 55111200} +{"current_steps": 81775, "total_steps": 204665, "loss": 0.188, "lr": 1.5013631901900147e-06, "epoch": 1.9977768548603816, "percentage": 39.96, "elapsed_time": "1:45:06", "remaining_time": "2:37:56", "throughput": 8739.64, "total_tokens": 55114400} +{"current_steps": 81780, "total_steps": 204665, "loss": 0.0903, "lr": 1.501289403038394e-06, "epoch": 1.9978990056922288, "percentage": 39.96, "elapsed_time": "1:45:06", "remaining_time": "2:37:56", "throughput": 8739.69, "total_tokens": 55117728} +{"current_steps": 81785, "total_steps": 204665, "loss": 0.0297, "lr": 1.5012156122412615e-06, "epoch": 1.998021156524076, "percentage": 39.96, "elapsed_time": "1:45:06", "remaining_time": "2:37:56", "throughput": 8739.8, "total_tokens": 55121568} +{"current_steps": 81790, "total_steps": 204665, "loss": 0.0873, "lr": 1.501141817799154e-06, "epoch": 1.9981433073559232, "percentage": 39.96, "elapsed_time": "1:45:07", "remaining_time": "2:37:55", "throughput": 8739.85, "total_tokens": 55124896} +{"current_steps": 81795, "total_steps": 204665, "loss": 0.0424, "lr": 1.5010680197126089e-06, "epoch": 1.9982654581877703, "percentage": 39.97, "elapsed_time": "1:45:07", "remaining_time": "2:37:55", "throughput": 8739.85, "total_tokens": 55127904} +{"current_steps": 81800, "total_steps": 204665, "loss": 0.1556, "lr": 1.5009942179821624e-06, "epoch": 1.9983876090196175, "percentage": 39.97, "elapsed_time": "1:45:07", "remaining_time": "2:37:54", "throughput": 8739.84, "total_tokens": 55130784} +{"current_steps": 81805, "total_steps": 204665, "loss": 0.0675, "lr": 1.5009204126083507e-06, "epoch": 1.9985097598514647, "percentage": 39.97, "elapsed_time": "1:45:08", "remaining_time": "2:37:54", "throughput": 8739.88, "total_tokens": 55133984} +{"current_steps": 81810, "total_steps": 204665, "loss": 0.0825, "lr": 1.5008466035917117e-06, "epoch": 1.998631910683312, "percentage": 39.97, "elapsed_time": "1:45:08", "remaining_time": "2:37:53", "throughput": 8739.93, "total_tokens": 55137376} +{"current_steps": 81815, "total_steps": 204665, "loss": 0.0692, "lr": 1.500772790932781e-06, "epoch": 1.9987540615151589, "percentage": 39.98, "elapsed_time": "1:45:09", "remaining_time": "2:37:53", "throughput": 8739.96, "total_tokens": 55140640} +{"current_steps": 81820, "total_steps": 204665, "loss": 0.1433, "lr": 1.5006989746320962e-06, "epoch": 1.998876212347006, "percentage": 39.98, "elapsed_time": "1:45:09", "remaining_time": "2:37:52", "throughput": 8740.02, "total_tokens": 55144032} +{"current_steps": 81825, "total_steps": 204665, "loss": 0.0021, "lr": 1.5006251546901936e-06, "epoch": 1.9989983631788533, "percentage": 39.98, "elapsed_time": "1:45:09", "remaining_time": "2:37:52", "throughput": 8740.1, "total_tokens": 55147680} +{"current_steps": 81830, "total_steps": 204665, "loss": 0.0312, "lr": 1.5005513311076103e-06, "epoch": 1.9991205140107005, "percentage": 39.98, "elapsed_time": "1:45:10", "remaining_time": "2:37:52", "throughput": 8740.16, "total_tokens": 55151136} +{"current_steps": 81835, "total_steps": 204665, "loss": 0.0798, "lr": 1.500477503884883e-06, "epoch": 1.9992426648425474, "percentage": 39.98, "elapsed_time": "1:45:10", "remaining_time": "2:37:51", "throughput": 8740.16, "total_tokens": 55154080} +{"current_steps": 81840, "total_steps": 204665, "loss": 0.0722, "lr": 1.5004036730225486e-06, "epoch": 1.9993648156743946, "percentage": 39.99, "elapsed_time": "1:45:10", "remaining_time": "2:37:51", "throughput": 8740.17, "total_tokens": 55157152} +{"current_steps": 81845, "total_steps": 204665, "loss": 0.0506, "lr": 1.5003298385211443e-06, "epoch": 1.9994869665062418, "percentage": 39.99, "elapsed_time": "1:45:11", "remaining_time": "2:37:50", "throughput": 8740.27, "total_tokens": 55160864} +{"current_steps": 81850, "total_steps": 204665, "loss": 0.0443, "lr": 1.5002560003812064e-06, "epoch": 1.999609117338089, "percentage": 39.99, "elapsed_time": "1:45:11", "remaining_time": "2:37:50", "throughput": 8740.35, "total_tokens": 55164448} +{"current_steps": 81855, "total_steps": 204665, "loss": 0.1503, "lr": 1.5001821586032729e-06, "epoch": 1.9997312681699362, "percentage": 39.99, "elapsed_time": "1:45:11", "remaining_time": "2:37:49", "throughput": 8740.42, "total_tokens": 55167904} +{"current_steps": 81860, "total_steps": 204665, "loss": 0.0473, "lr": 1.50010831318788e-06, "epoch": 1.9998534190017834, "percentage": 40.0, "elapsed_time": "1:45:12", "remaining_time": "2:37:49", "throughput": 8740.43, "total_tokens": 55171040} +{"current_steps": 81865, "total_steps": 204665, "loss": 0.2202, "lr": 1.500034464135565e-06, "epoch": 1.9999755698336306, "percentage": 40.0, "elapsed_time": "1:45:12", "remaining_time": "2:37:48", "throughput": 8740.44, "total_tokens": 55174048} +{"current_steps": 81870, "total_steps": 204665, "loss": 0.0019, "lr": 1.4999606114468647e-06, "epoch": 2.0000977206654778, "percentage": 40.0, "elapsed_time": "1:45:12", "remaining_time": "2:37:48", "throughput": 8740.3, "total_tokens": 55177384} +{"current_steps": 81872, "total_steps": 204665, "eval_loss": 0.12374971061944962, "epoch": 2.0001465809982166, "percentage": 40.0, "elapsed_time": "1:46:00", "remaining_time": "2:38:59", "throughput": 8674.87, "total_tokens": 55178600} +{"current_steps": 81875, "total_steps": 204665, "loss": 0.0235, "lr": 1.4998867551223164e-06, "epoch": 2.000219871497325, "percentage": 40.0, "elapsed_time": "1:46:34", "remaining_time": "2:39:50", "throughput": 8628.78, "total_tokens": 55180584} +{"current_steps": 81880, "total_steps": 204665, "loss": 0.0016, "lr": 1.4998128951624572e-06, "epoch": 2.000342022329172, "percentage": 40.01, "elapsed_time": "1:46:35", "remaining_time": "2:39:50", "throughput": 8628.79, "total_tokens": 55183592} +{"current_steps": 81885, "total_steps": 204665, "loss": 0.0287, "lr": 1.4997390315678242e-06, "epoch": 2.0004641731610193, "percentage": 40.01, "elapsed_time": "1:46:35", "remaining_time": "2:39:49", "throughput": 8628.9, "total_tokens": 55187368} +{"current_steps": 81890, "total_steps": 204665, "loss": 0.0262, "lr": 1.4996651643389545e-06, "epoch": 2.0005863239928665, "percentage": 40.01, "elapsed_time": "1:46:36", "remaining_time": "2:39:49", "throughput": 8628.98, "total_tokens": 55191016} +{"current_steps": 81895, "total_steps": 204665, "loss": 0.0651, "lr": 1.4995912934763854e-06, "epoch": 2.0007084748247137, "percentage": 40.01, "elapsed_time": "1:46:36", "remaining_time": "2:39:48", "throughput": 8629.07, "total_tokens": 55194600} +{"current_steps": 81900, "total_steps": 204665, "loss": 0.0267, "lr": 1.4995174189806542e-06, "epoch": 2.000830625656561, "percentage": 40.02, "elapsed_time": "1:46:36", "remaining_time": "2:39:48", "throughput": 8629.1, "total_tokens": 55197736} +{"current_steps": 81905, "total_steps": 204665, "loss": 0.047, "lr": 1.4994435408522976e-06, "epoch": 2.000952776488408, "percentage": 40.02, "elapsed_time": "1:46:37", "remaining_time": "2:39:47", "throughput": 8629.19, "total_tokens": 55201320} +{"current_steps": 81910, "total_steps": 204665, "loss": 0.0019, "lr": 1.4993696590918533e-06, "epoch": 2.0010749273202553, "percentage": 40.02, "elapsed_time": "1:46:37", "remaining_time": "2:39:47", "throughput": 8629.22, "total_tokens": 55204520} +{"current_steps": 81915, "total_steps": 204665, "loss": 0.0854, "lr": 1.4992957736998589e-06, "epoch": 2.001197078152102, "percentage": 40.02, "elapsed_time": "1:46:37", "remaining_time": "2:39:47", "throughput": 8629.25, "total_tokens": 55207656} +{"current_steps": 81920, "total_steps": 204665, "loss": 0.0359, "lr": 1.4992218846768509e-06, "epoch": 2.001319228983949, "percentage": 40.03, "elapsed_time": "1:46:38", "remaining_time": "2:39:46", "throughput": 8629.29, "total_tokens": 55210856} +{"current_steps": 81925, "total_steps": 204665, "loss": 0.0026, "lr": 1.4991479920233673e-06, "epoch": 2.0014413798157964, "percentage": 40.03, "elapsed_time": "1:46:38", "remaining_time": "2:39:46", "throughput": 8629.36, "total_tokens": 55214312} +{"current_steps": 81930, "total_steps": 204665, "loss": 0.0017, "lr": 1.4990740957399452e-06, "epoch": 2.0015635306476436, "percentage": 40.03, "elapsed_time": "1:46:38", "remaining_time": "2:39:45", "throughput": 8629.36, "total_tokens": 55217256} +{"current_steps": 81935, "total_steps": 204665, "loss": 0.0011, "lr": 1.499000195827122e-06, "epoch": 2.0016856814794908, "percentage": 40.03, "elapsed_time": "1:46:39", "remaining_time": "2:39:45", "throughput": 8629.43, "total_tokens": 55220648} +{"current_steps": 81940, "total_steps": 204665, "loss": 0.0394, "lr": 1.4989262922854353e-06, "epoch": 2.001807832311338, "percentage": 40.04, "elapsed_time": "1:46:39", "remaining_time": "2:39:44", "throughput": 8629.46, "total_tokens": 55223848} +{"current_steps": 81945, "total_steps": 204665, "loss": 0.0271, "lr": 1.4988523851154221e-06, "epoch": 2.001929983143185, "percentage": 40.04, "elapsed_time": "1:46:39", "remaining_time": "2:39:44", "throughput": 8629.5, "total_tokens": 55227048} +{"current_steps": 81950, "total_steps": 204665, "loss": 0.0005, "lr": 1.4987784743176206e-06, "epoch": 2.0020521339750323, "percentage": 40.04, "elapsed_time": "1:46:40", "remaining_time": "2:39:43", "throughput": 8629.58, "total_tokens": 55230568} +{"current_steps": 81955, "total_steps": 204665, "loss": 0.1124, "lr": 1.4987045598925678e-06, "epoch": 2.0021742848068795, "percentage": 40.04, "elapsed_time": "1:46:40", "remaining_time": "2:39:43", "throughput": 8629.58, "total_tokens": 55233448} +{"current_steps": 81960, "total_steps": 204665, "loss": 0.0502, "lr": 1.4986306418408011e-06, "epoch": 2.0022964356387267, "percentage": 40.05, "elapsed_time": "1:46:40", "remaining_time": "2:39:42", "throughput": 8629.58, "total_tokens": 55236392} +{"current_steps": 81965, "total_steps": 204665, "loss": 0.0002, "lr": 1.4985567201628584e-06, "epoch": 2.002418586470574, "percentage": 40.05, "elapsed_time": "1:46:41", "remaining_time": "2:39:42", "throughput": 8629.62, "total_tokens": 55239592} +{"current_steps": 81970, "total_steps": 204665, "loss": 0.0293, "lr": 1.498482794859277e-06, "epoch": 2.002540737302421, "percentage": 40.05, "elapsed_time": "1:46:41", "remaining_time": "2:39:41", "throughput": 8629.66, "total_tokens": 55242920} +{"current_steps": 81975, "total_steps": 204665, "loss": 0.0001, "lr": 1.4984088659305949e-06, "epoch": 2.0026628881342683, "percentage": 40.05, "elapsed_time": "1:46:41", "remaining_time": "2:39:41", "throughput": 8629.72, "total_tokens": 55246312} +{"current_steps": 81980, "total_steps": 204665, "loss": 0.0007, "lr": 1.4983349333773493e-06, "epoch": 2.0027850389661155, "percentage": 40.06, "elapsed_time": "1:46:42", "remaining_time": "2:39:41", "throughput": 8629.74, "total_tokens": 55249320} +{"current_steps": 81985, "total_steps": 204665, "loss": 0.0017, "lr": 1.4982609972000779e-06, "epoch": 2.0029071897979627, "percentage": 40.06, "elapsed_time": "1:46:42", "remaining_time": "2:39:40", "throughput": 8629.78, "total_tokens": 55252584} +{"current_steps": 81990, "total_steps": 204665, "loss": 0.0005, "lr": 1.4981870573993187e-06, "epoch": 2.00302934062981, "percentage": 40.06, "elapsed_time": "1:46:42", "remaining_time": "2:39:40", "throughput": 8629.9, "total_tokens": 55256424} +{"current_steps": 81995, "total_steps": 204665, "loss": 0.0694, "lr": 1.498113113975609e-06, "epoch": 2.003151491461657, "percentage": 40.06, "elapsed_time": "1:46:43", "remaining_time": "2:39:39", "throughput": 8630.0, "total_tokens": 55260136} +{"current_steps": 82000, "total_steps": 204665, "loss": 0.0646, "lr": 1.4980391669294872e-06, "epoch": 2.003273642293504, "percentage": 40.07, "elapsed_time": "1:46:43", "remaining_time": "2:39:39", "throughput": 8630.06, "total_tokens": 55263464} +{"current_steps": 82005, "total_steps": 204665, "loss": 0.0001, "lr": 1.4979652162614902e-06, "epoch": 2.003395793125351, "percentage": 40.07, "elapsed_time": "1:46:43", "remaining_time": "2:39:38", "throughput": 8630.14, "total_tokens": 55267048} +{"current_steps": 82010, "total_steps": 204665, "loss": 0.0811, "lr": 1.4978912619721563e-06, "epoch": 2.003517943957198, "percentage": 40.07, "elapsed_time": "1:46:44", "remaining_time": "2:39:38", "throughput": 8630.16, "total_tokens": 55270056} +{"current_steps": 82015, "total_steps": 204665, "loss": 0.0692, "lr": 1.4978173040620233e-06, "epoch": 2.0036400947890454, "percentage": 40.07, "elapsed_time": "1:46:44", "remaining_time": "2:39:37", "throughput": 8630.21, "total_tokens": 55273384} +{"current_steps": 82020, "total_steps": 204665, "loss": 0.0016, "lr": 1.497743342531629e-06, "epoch": 2.0037622456208926, "percentage": 40.08, "elapsed_time": "1:46:44", "remaining_time": "2:39:37", "throughput": 8630.19, "total_tokens": 55276136} +{"current_steps": 82025, "total_steps": 204665, "loss": 0.0005, "lr": 1.4976693773815113e-06, "epoch": 2.0038843964527397, "percentage": 40.08, "elapsed_time": "1:46:45", "remaining_time": "2:39:36", "throughput": 8630.32, "total_tokens": 55280040} +{"current_steps": 82030, "total_steps": 204665, "loss": 0.0006, "lr": 1.497595408612208e-06, "epoch": 2.004006547284587, "percentage": 40.08, "elapsed_time": "1:46:45", "remaining_time": "2:39:36", "throughput": 8630.35, "total_tokens": 55283240} +{"current_steps": 82035, "total_steps": 204665, "loss": 0.0575, "lr": 1.4975214362242567e-06, "epoch": 2.004128698116434, "percentage": 40.08, "elapsed_time": "1:46:46", "remaining_time": "2:39:36", "throughput": 8630.43, "total_tokens": 55286760} +{"current_steps": 82040, "total_steps": 204665, "loss": 0.0749, "lr": 1.4974474602181962e-06, "epoch": 2.0042508489482813, "percentage": 40.09, "elapsed_time": "1:46:46", "remaining_time": "2:39:35", "throughput": 8630.52, "total_tokens": 55290344} +{"current_steps": 82045, "total_steps": 204665, "loss": 0.1332, "lr": 1.4973734805945635e-06, "epoch": 2.0043729997801285, "percentage": 40.09, "elapsed_time": "1:46:46", "remaining_time": "2:39:35", "throughput": 8630.58, "total_tokens": 55293928} +{"current_steps": 82050, "total_steps": 204665, "loss": 0.0002, "lr": 1.4972994973538976e-06, "epoch": 2.0044951506119757, "percentage": 40.09, "elapsed_time": "1:46:47", "remaining_time": "2:39:34", "throughput": 8630.59, "total_tokens": 55296936} +{"current_steps": 82055, "total_steps": 204665, "loss": 0.0304, "lr": 1.4972255104967355e-06, "epoch": 2.004617301443823, "percentage": 40.09, "elapsed_time": "1:46:47", "remaining_time": "2:39:34", "throughput": 8630.67, "total_tokens": 55300456} +{"current_steps": 82060, "total_steps": 204665, "loss": 0.0007, "lr": 1.497151520023616e-06, "epoch": 2.00473945227567, "percentage": 40.09, "elapsed_time": "1:46:47", "remaining_time": "2:39:33", "throughput": 8630.68, "total_tokens": 55303528} +{"current_steps": 82065, "total_steps": 204665, "loss": 0.0003, "lr": 1.4970775259350767e-06, "epoch": 2.0048616031075173, "percentage": 40.1, "elapsed_time": "1:46:48", "remaining_time": "2:39:33", "throughput": 8630.7, "total_tokens": 55306600} +{"current_steps": 82070, "total_steps": 204665, "loss": 0.0004, "lr": 1.4970035282316562e-06, "epoch": 2.0049837539393645, "percentage": 40.1, "elapsed_time": "1:46:48", "remaining_time": "2:39:32", "throughput": 8630.76, "total_tokens": 55309992} +{"current_steps": 82075, "total_steps": 204665, "loss": 0.0445, "lr": 1.4969295269138924e-06, "epoch": 2.0051059047712116, "percentage": 40.1, "elapsed_time": "1:46:48", "remaining_time": "2:39:32", "throughput": 8630.85, "total_tokens": 55313576} +{"current_steps": 82080, "total_steps": 204665, "loss": 0.0393, "lr": 1.4968555219823233e-06, "epoch": 2.005228055603059, "percentage": 40.1, "elapsed_time": "1:46:49", "remaining_time": "2:39:31", "throughput": 8630.92, "total_tokens": 55317096} +{"current_steps": 82085, "total_steps": 204665, "loss": 0.0003, "lr": 1.4967815134374872e-06, "epoch": 2.005350206434906, "percentage": 40.11, "elapsed_time": "1:46:49", "remaining_time": "2:39:31", "throughput": 8631.02, "total_tokens": 55320808} +{"current_steps": 82090, "total_steps": 204665, "loss": 0.0002, "lr": 1.4967075012799224e-06, "epoch": 2.0054723572667528, "percentage": 40.11, "elapsed_time": "1:46:49", "remaining_time": "2:39:31", "throughput": 8631.05, "total_tokens": 55323944} +{"current_steps": 82095, "total_steps": 204665, "loss": 0.0001, "lr": 1.4966334855101667e-06, "epoch": 2.0055945080986, "percentage": 40.11, "elapsed_time": "1:46:50", "remaining_time": "2:39:30", "throughput": 8631.08, "total_tokens": 55327080} +{"current_steps": 82100, "total_steps": 204665, "loss": 0.0004, "lr": 1.496559466128759e-06, "epoch": 2.005716658930447, "percentage": 40.11, "elapsed_time": "1:46:50", "remaining_time": "2:39:30", "throughput": 8631.16, "total_tokens": 55330664} +{"current_steps": 82105, "total_steps": 204665, "loss": 0.0006, "lr": 1.4964854431362372e-06, "epoch": 2.0058388097622943, "percentage": 40.12, "elapsed_time": "1:46:50", "remaining_time": "2:39:29", "throughput": 8631.19, "total_tokens": 55333864} +{"current_steps": 82110, "total_steps": 204665, "loss": 0.0004, "lr": 1.49641141653314e-06, "epoch": 2.0059609605941415, "percentage": 40.12, "elapsed_time": "1:46:51", "remaining_time": "2:39:29", "throughput": 8631.2, "total_tokens": 55336808} +{"current_steps": 82115, "total_steps": 204665, "loss": 0.0759, "lr": 1.4963373863200053e-06, "epoch": 2.0060831114259887, "percentage": 40.12, "elapsed_time": "1:46:51", "remaining_time": "2:39:28", "throughput": 8631.27, "total_tokens": 55340328} +{"current_steps": 82120, "total_steps": 204665, "loss": 0.0007, "lr": 1.4962633524973716e-06, "epoch": 2.006205262257836, "percentage": 40.12, "elapsed_time": "1:46:51", "remaining_time": "2:39:28", "throughput": 8631.32, "total_tokens": 55343656} +{"current_steps": 82125, "total_steps": 204665, "loss": 0.0006, "lr": 1.4961893150657775e-06, "epoch": 2.006327413089683, "percentage": 40.13, "elapsed_time": "1:46:52", "remaining_time": "2:39:27", "throughput": 8631.37, "total_tokens": 55347048} +{"current_steps": 82130, "total_steps": 204665, "loss": 0.0942, "lr": 1.496115274025761e-06, "epoch": 2.0064495639215303, "percentage": 40.13, "elapsed_time": "1:46:52", "remaining_time": "2:39:27", "throughput": 8631.41, "total_tokens": 55350248} +{"current_steps": 82135, "total_steps": 204665, "loss": 0.0362, "lr": 1.4960412293778609e-06, "epoch": 2.0065717147533775, "percentage": 40.13, "elapsed_time": "1:46:53", "remaining_time": "2:39:27", "throughput": 8631.52, "total_tokens": 55354024} +{"current_steps": 82140, "total_steps": 204665, "loss": 0.0268, "lr": 1.4959671811226152e-06, "epoch": 2.0066938655852247, "percentage": 40.13, "elapsed_time": "1:46:53", "remaining_time": "2:39:26", "throughput": 8631.58, "total_tokens": 55357480} +{"current_steps": 82145, "total_steps": 204665, "loss": 0.0377, "lr": 1.4958931292605631e-06, "epoch": 2.006816016417072, "percentage": 40.14, "elapsed_time": "1:46:53", "remaining_time": "2:39:26", "throughput": 8631.68, "total_tokens": 55361256} +{"current_steps": 82150, "total_steps": 204665, "loss": 0.049, "lr": 1.495819073792243e-06, "epoch": 2.006938167248919, "percentage": 40.14, "elapsed_time": "1:46:54", "remaining_time": "2:39:25", "throughput": 8631.69, "total_tokens": 55364200} +{"current_steps": 82155, "total_steps": 204665, "loss": 0.0635, "lr": 1.4957450147181928e-06, "epoch": 2.0070603180807662, "percentage": 40.14, "elapsed_time": "1:46:54", "remaining_time": "2:39:25", "throughput": 8631.82, "total_tokens": 55368168} +{"current_steps": 82160, "total_steps": 204665, "loss": 0.0004, "lr": 1.4956709520389517e-06, "epoch": 2.0071824689126134, "percentage": 40.14, "elapsed_time": "1:46:54", "remaining_time": "2:39:24", "throughput": 8631.89, "total_tokens": 55371688} +{"current_steps": 82165, "total_steps": 204665, "loss": 0.0643, "lr": 1.495596885755058e-06, "epoch": 2.0073046197444606, "percentage": 40.15, "elapsed_time": "1:46:55", "remaining_time": "2:39:24", "throughput": 8631.58, "total_tokens": 55375016} +{"current_steps": 82170, "total_steps": 204665, "loss": 0.0005, "lr": 1.4955228158670509e-06, "epoch": 2.007426770576308, "percentage": 40.15, "elapsed_time": "1:46:55", "remaining_time": "2:39:24", "throughput": 8631.89, "total_tokens": 55380456} +{"current_steps": 82175, "total_steps": 204665, "loss": 0.0001, "lr": 1.4954487423754682e-06, "epoch": 2.007548921408155, "percentage": 40.15, "elapsed_time": "1:46:56", "remaining_time": "2:39:23", "throughput": 8631.94, "total_tokens": 55383784} +{"current_steps": 82180, "total_steps": 204665, "loss": 0.0392, "lr": 1.4953746652808492e-06, "epoch": 2.0076710722400017, "percentage": 40.15, "elapsed_time": "1:46:56", "remaining_time": "2:39:23", "throughput": 8631.96, "total_tokens": 55386856} +{"current_steps": 82185, "total_steps": 204665, "loss": 0.0635, "lr": 1.4953005845837322e-06, "epoch": 2.007793223071849, "percentage": 40.16, "elapsed_time": "1:46:56", "remaining_time": "2:39:23", "throughput": 8632.06, "total_tokens": 55390568} +{"current_steps": 82190, "total_steps": 204665, "loss": 0.0001, "lr": 1.495226500284656e-06, "epoch": 2.007915373903696, "percentage": 40.16, "elapsed_time": "1:46:57", "remaining_time": "2:39:22", "throughput": 8632.06, "total_tokens": 55393576} +{"current_steps": 82195, "total_steps": 204665, "loss": 0.0582, "lr": 1.4951524123841598e-06, "epoch": 2.0080375247355433, "percentage": 40.16, "elapsed_time": "1:46:57", "remaining_time": "2:39:22", "throughput": 8632.11, "total_tokens": 55396968} +{"current_steps": 82200, "total_steps": 204665, "loss": 0.0018, "lr": 1.495078320882782e-06, "epoch": 2.0081596755673905, "percentage": 40.16, "elapsed_time": "1:46:57", "remaining_time": "2:39:21", "throughput": 8632.12, "total_tokens": 55400040} +{"current_steps": 82205, "total_steps": 204665, "loss": 0.0003, "lr": 1.4950042257810616e-06, "epoch": 2.0082818263992377, "percentage": 40.17, "elapsed_time": "1:46:58", "remaining_time": "2:39:21", "throughput": 8632.17, "total_tokens": 55403432} +{"current_steps": 82210, "total_steps": 204665, "loss": 0.0285, "lr": 1.4949301270795372e-06, "epoch": 2.008403977231085, "percentage": 40.17, "elapsed_time": "1:46:58", "remaining_time": "2:39:20", "throughput": 8632.2, "total_tokens": 55406632} +{"current_steps": 82215, "total_steps": 204665, "loss": 0.0309, "lr": 1.4948560247787477e-06, "epoch": 2.008526128062932, "percentage": 40.17, "elapsed_time": "1:46:58", "remaining_time": "2:39:20", "throughput": 8632.23, "total_tokens": 55409832} +{"current_steps": 82220, "total_steps": 204665, "loss": 0.002, "lr": 1.494781918879232e-06, "epoch": 2.0086482788947793, "percentage": 40.17, "elapsed_time": "1:46:59", "remaining_time": "2:39:19", "throughput": 8632.29, "total_tokens": 55413224} +{"current_steps": 82225, "total_steps": 204665, "loss": 0.0609, "lr": 1.4947078093815294e-06, "epoch": 2.0087704297266264, "percentage": 40.18, "elapsed_time": "1:46:59", "remaining_time": "2:39:19", "throughput": 8632.38, "total_tokens": 55416936} +{"current_steps": 82230, "total_steps": 204665, "loss": 0.0009, "lr": 1.4946336962861782e-06, "epoch": 2.0088925805584736, "percentage": 40.18, "elapsed_time": "1:47:00", "remaining_time": "2:39:18", "throughput": 8632.38, "total_tokens": 55419880} +{"current_steps": 82235, "total_steps": 204665, "loss": 0.0786, "lr": 1.494559579593718e-06, "epoch": 2.009014731390321, "percentage": 40.18, "elapsed_time": "1:47:00", "remaining_time": "2:39:18", "throughput": 8632.41, "total_tokens": 55423080} +{"current_steps": 82240, "total_steps": 204665, "loss": 0.0002, "lr": 1.4944854593046876e-06, "epoch": 2.009136882222168, "percentage": 40.18, "elapsed_time": "1:47:00", "remaining_time": "2:39:18", "throughput": 8632.44, "total_tokens": 55426280} +{"current_steps": 82245, "total_steps": 204665, "loss": 0.0316, "lr": 1.4944113354196258e-06, "epoch": 2.009259033054015, "percentage": 40.19, "elapsed_time": "1:47:01", "remaining_time": "2:39:17", "throughput": 8632.51, "total_tokens": 55429800} +{"current_steps": 82250, "total_steps": 204665, "loss": 0.0985, "lr": 1.4943372079390718e-06, "epoch": 2.0093811838858624, "percentage": 40.19, "elapsed_time": "1:47:01", "remaining_time": "2:39:17", "throughput": 8632.58, "total_tokens": 55433320} +{"current_steps": 82255, "total_steps": 204665, "loss": 0.0003, "lr": 1.4942630768635644e-06, "epoch": 2.0095033347177096, "percentage": 40.19, "elapsed_time": "1:47:01", "remaining_time": "2:39:16", "throughput": 8632.69, "total_tokens": 55437160} +{"current_steps": 82260, "total_steps": 204665, "loss": 0.0002, "lr": 1.4941889421936433e-06, "epoch": 2.009625485549557, "percentage": 40.19, "elapsed_time": "1:47:02", "remaining_time": "2:39:16", "throughput": 8632.8, "total_tokens": 55441000} +{"current_steps": 82265, "total_steps": 204665, "loss": 0.0276, "lr": 1.4941148039298472e-06, "epoch": 2.009747636381404, "percentage": 40.19, "elapsed_time": "1:47:02", "remaining_time": "2:39:15", "throughput": 8632.8, "total_tokens": 55443944} +{"current_steps": 82270, "total_steps": 204665, "loss": 0.0445, "lr": 1.4940406620727154e-06, "epoch": 2.0098697872132507, "percentage": 40.2, "elapsed_time": "1:47:02", "remaining_time": "2:39:15", "throughput": 8632.85, "total_tokens": 55447272} +{"current_steps": 82275, "total_steps": 204665, "loss": 0.1243, "lr": 1.493966516622787e-06, "epoch": 2.009991938045098, "percentage": 40.2, "elapsed_time": "1:47:03", "remaining_time": "2:39:14", "throughput": 8632.93, "total_tokens": 55450920} +{"current_steps": 82280, "total_steps": 204665, "loss": 0.0006, "lr": 1.4938923675806012e-06, "epoch": 2.010114088876945, "percentage": 40.2, "elapsed_time": "1:47:03", "remaining_time": "2:39:14", "throughput": 8633.0, "total_tokens": 55454440} +{"current_steps": 82285, "total_steps": 204665, "loss": 0.0021, "lr": 1.4938182149466974e-06, "epoch": 2.0102362397087923, "percentage": 40.2, "elapsed_time": "1:47:03", "remaining_time": "2:39:14", "throughput": 8633.1, "total_tokens": 55458152} +{"current_steps": 82290, "total_steps": 204665, "loss": 0.0004, "lr": 1.4937440587216144e-06, "epoch": 2.0103583905406395, "percentage": 40.21, "elapsed_time": "1:47:04", "remaining_time": "2:39:13", "throughput": 8633.16, "total_tokens": 55461544} +{"current_steps": 82295, "total_steps": 204665, "loss": 0.0002, "lr": 1.493669898905892e-06, "epoch": 2.0104805413724867, "percentage": 40.21, "elapsed_time": "1:47:04", "remaining_time": "2:39:13", "throughput": 8633.25, "total_tokens": 55465256} +{"current_steps": 82300, "total_steps": 204665, "loss": 0.0696, "lr": 1.4935957355000693e-06, "epoch": 2.010602692204334, "percentage": 40.21, "elapsed_time": "1:47:04", "remaining_time": "2:39:12", "throughput": 8633.36, "total_tokens": 55469032} +{"current_steps": 82305, "total_steps": 204665, "loss": 0.103, "lr": 1.4935215685046858e-06, "epoch": 2.010724843036181, "percentage": 40.21, "elapsed_time": "1:47:05", "remaining_time": "2:39:12", "throughput": 8633.41, "total_tokens": 55472424} +{"current_steps": 82310, "total_steps": 204665, "loss": 0.0419, "lr": 1.4934473979202804e-06, "epoch": 2.0108469938680282, "percentage": 40.22, "elapsed_time": "1:47:05", "remaining_time": "2:39:11", "throughput": 8633.46, "total_tokens": 55475752} +{"current_steps": 82315, "total_steps": 204665, "loss": 0.0002, "lr": 1.4933732237473928e-06, "epoch": 2.0109691446998754, "percentage": 40.22, "elapsed_time": "1:47:06", "remaining_time": "2:39:11", "throughput": 8633.61, "total_tokens": 55479912} +{"current_steps": 82320, "total_steps": 204665, "loss": 0.0835, "lr": 1.4932990459865626e-06, "epoch": 2.0110912955317226, "percentage": 40.22, "elapsed_time": "1:47:06", "remaining_time": "2:39:10", "throughput": 8633.69, "total_tokens": 55483496} +{"current_steps": 82325, "total_steps": 204665, "loss": 0.0489, "lr": 1.493224864638329e-06, "epoch": 2.01121344636357, "percentage": 40.22, "elapsed_time": "1:47:06", "remaining_time": "2:39:10", "throughput": 8633.75, "total_tokens": 55486952} +{"current_steps": 82330, "total_steps": 204665, "loss": 0.0007, "lr": 1.4931506797032316e-06, "epoch": 2.011335597195417, "percentage": 40.23, "elapsed_time": "1:47:07", "remaining_time": "2:39:10", "throughput": 8633.79, "total_tokens": 55490216} +{"current_steps": 82335, "total_steps": 204665, "loss": 0.0442, "lr": 1.49307649118181e-06, "epoch": 2.011457748027264, "percentage": 40.23, "elapsed_time": "1:47:07", "remaining_time": "2:39:09", "throughput": 8633.81, "total_tokens": 55493352} +{"current_steps": 82340, "total_steps": 204665, "loss": 0.1068, "lr": 1.4930022990746034e-06, "epoch": 2.0115798988591114, "percentage": 40.23, "elapsed_time": "1:47:07", "remaining_time": "2:39:09", "throughput": 8633.83, "total_tokens": 55496424} +{"current_steps": 82345, "total_steps": 204665, "loss": 0.0003, "lr": 1.4929281033821513e-06, "epoch": 2.0117020496909586, "percentage": 40.23, "elapsed_time": "1:47:08", "remaining_time": "2:39:08", "throughput": 8633.88, "total_tokens": 55499816} +{"current_steps": 82350, "total_steps": 204665, "loss": 0.0016, "lr": 1.4928539041049935e-06, "epoch": 2.0118242005228058, "percentage": 40.24, "elapsed_time": "1:47:08", "remaining_time": "2:39:08", "throughput": 8633.97, "total_tokens": 55503464} +{"current_steps": 82355, "total_steps": 204665, "loss": 0.0003, "lr": 1.4927797012436694e-06, "epoch": 2.011946351354653, "percentage": 40.24, "elapsed_time": "1:47:08", "remaining_time": "2:39:07", "throughput": 8634.05, "total_tokens": 55507048} +{"current_steps": 82360, "total_steps": 204665, "loss": 0.0003, "lr": 1.492705494798719e-06, "epoch": 2.0120685021864997, "percentage": 40.24, "elapsed_time": "1:47:09", "remaining_time": "2:39:07", "throughput": 8634.1, "total_tokens": 55510440} +{"current_steps": 82365, "total_steps": 204665, "loss": 0.0236, "lr": 1.4926312847706817e-06, "epoch": 2.012190653018347, "percentage": 40.24, "elapsed_time": "1:47:09", "remaining_time": "2:39:06", "throughput": 8634.18, "total_tokens": 55514024} +{"current_steps": 82370, "total_steps": 204665, "loss": 0.0004, "lr": 1.4925570711600972e-06, "epoch": 2.012312803850194, "percentage": 40.25, "elapsed_time": "1:47:09", "remaining_time": "2:39:06", "throughput": 8634.21, "total_tokens": 55517224} +{"current_steps": 82375, "total_steps": 204665, "loss": 0.0002, "lr": 1.492482853967505e-06, "epoch": 2.0124349546820413, "percentage": 40.25, "elapsed_time": "1:47:10", "remaining_time": "2:39:06", "throughput": 8634.29, "total_tokens": 55520808} +{"current_steps": 82380, "total_steps": 204665, "loss": 0.0009, "lr": 1.4924086331934454e-06, "epoch": 2.0125571055138884, "percentage": 40.25, "elapsed_time": "1:47:10", "remaining_time": "2:39:05", "throughput": 8634.3, "total_tokens": 55523816} +{"current_steps": 82385, "total_steps": 204665, "loss": 0.0028, "lr": 1.4923344088384576e-06, "epoch": 2.0126792563457356, "percentage": 40.25, "elapsed_time": "1:47:10", "remaining_time": "2:39:05", "throughput": 8634.35, "total_tokens": 55527208} +{"current_steps": 82390, "total_steps": 204665, "loss": 0.0118, "lr": 1.4922601809030814e-06, "epoch": 2.012801407177583, "percentage": 40.26, "elapsed_time": "1:47:11", "remaining_time": "2:39:04", "throughput": 8634.38, "total_tokens": 55530408} +{"current_steps": 82395, "total_steps": 204665, "loss": 0.0002, "lr": 1.492185949387857e-06, "epoch": 2.01292355800943, "percentage": 40.26, "elapsed_time": "1:47:11", "remaining_time": "2:39:04", "throughput": 8634.41, "total_tokens": 55533608} +{"current_steps": 82400, "total_steps": 204665, "loss": 0.0002, "lr": 1.492111714293324e-06, "epoch": 2.013045708841277, "percentage": 40.26, "elapsed_time": "1:47:12", "remaining_time": "2:39:03", "throughput": 8634.43, "total_tokens": 55536744} +{"current_steps": 82405, "total_steps": 204665, "loss": 0.0285, "lr": 1.492037475620022e-06, "epoch": 2.0131678596731244, "percentage": 40.26, "elapsed_time": "1:47:12", "remaining_time": "2:39:03", "throughput": 8634.51, "total_tokens": 55540392} +{"current_steps": 82410, "total_steps": 204665, "loss": 0.0002, "lr": 1.4919632333684913e-06, "epoch": 2.0132900105049716, "percentage": 40.27, "elapsed_time": "1:47:12", "remaining_time": "2:39:02", "throughput": 8634.52, "total_tokens": 55543464} +{"current_steps": 82415, "total_steps": 204665, "loss": 0.041, "lr": 1.4918889875392716e-06, "epoch": 2.0134121613368188, "percentage": 40.27, "elapsed_time": "1:47:13", "remaining_time": "2:39:02", "throughput": 8634.55, "total_tokens": 55546600} +{"current_steps": 82420, "total_steps": 204665, "loss": 0.092, "lr": 1.4918147381329028e-06, "epoch": 2.013534312168666, "percentage": 40.27, "elapsed_time": "1:47:13", "remaining_time": "2:39:02", "throughput": 8634.55, "total_tokens": 55549608} +{"current_steps": 82425, "total_steps": 204665, "loss": 0.0001, "lr": 1.491740485149925e-06, "epoch": 2.013656463000513, "percentage": 40.27, "elapsed_time": "1:47:13", "remaining_time": "2:39:01", "throughput": 8634.63, "total_tokens": 55553128} +{"current_steps": 82430, "total_steps": 204665, "loss": 0.0594, "lr": 1.491666228590878e-06, "epoch": 2.0137786138323603, "percentage": 40.28, "elapsed_time": "1:47:14", "remaining_time": "2:39:01", "throughput": 8634.65, "total_tokens": 55556328} +{"current_steps": 82435, "total_steps": 204665, "loss": 0.1018, "lr": 1.4915919684563023e-06, "epoch": 2.0139007646642075, "percentage": 40.28, "elapsed_time": "1:47:14", "remaining_time": "2:39:00", "throughput": 8634.66, "total_tokens": 55559336} +{"current_steps": 82440, "total_steps": 204665, "loss": 0.0006, "lr": 1.4915177047467374e-06, "epoch": 2.0140229154960547, "percentage": 40.28, "elapsed_time": "1:47:14", "remaining_time": "2:39:00", "throughput": 8634.72, "total_tokens": 55562792} +{"current_steps": 82445, "total_steps": 204665, "loss": 0.0002, "lr": 1.4914434374627237e-06, "epoch": 2.014145066327902, "percentage": 40.28, "elapsed_time": "1:47:15", "remaining_time": "2:38:59", "throughput": 8634.72, "total_tokens": 55565736} +{"current_steps": 82450, "total_steps": 204665, "loss": 0.0002, "lr": 1.491369166604801e-06, "epoch": 2.0142672171597487, "percentage": 40.29, "elapsed_time": "1:47:15", "remaining_time": "2:38:59", "throughput": 8634.84, "total_tokens": 55569704} +{"current_steps": 82455, "total_steps": 204665, "loss": 0.0369, "lr": 1.4912948921735093e-06, "epoch": 2.014389367991596, "percentage": 40.29, "elapsed_time": "1:47:15", "remaining_time": "2:38:58", "throughput": 8634.85, "total_tokens": 55572712} +{"current_steps": 82460, "total_steps": 204665, "loss": 0.0076, "lr": 1.4912206141693893e-06, "epoch": 2.014511518823443, "percentage": 40.29, "elapsed_time": "1:47:16", "remaining_time": "2:38:58", "throughput": 8634.88, "total_tokens": 55575976} +{"current_steps": 82465, "total_steps": 204665, "loss": 0.0001, "lr": 1.491146332592981e-06, "epoch": 2.0146336696552902, "percentage": 40.29, "elapsed_time": "1:47:16", "remaining_time": "2:38:57", "throughput": 8634.96, "total_tokens": 55579624} +{"current_steps": 82470, "total_steps": 204665, "loss": 0.0561, "lr": 1.491072047444824e-06, "epoch": 2.0147558204871374, "percentage": 40.3, "elapsed_time": "1:47:16", "remaining_time": "2:38:57", "throughput": 8634.99, "total_tokens": 55582824} +{"current_steps": 82475, "total_steps": 204665, "loss": 0.0205, "lr": 1.4909977587254595e-06, "epoch": 2.0148779713189846, "percentage": 40.3, "elapsed_time": "1:47:17", "remaining_time": "2:38:57", "throughput": 8635.08, "total_tokens": 55586472} +{"current_steps": 82480, "total_steps": 204665, "loss": 0.0476, "lr": 1.4909234664354266e-06, "epoch": 2.015000122150832, "percentage": 40.3, "elapsed_time": "1:47:17", "remaining_time": "2:38:56", "throughput": 8635.06, "total_tokens": 55589288} +{"current_steps": 82485, "total_steps": 204665, "loss": 0.0005, "lr": 1.490849170575267e-06, "epoch": 2.015122272982679, "percentage": 40.3, "elapsed_time": "1:47:17", "remaining_time": "2:38:56", "throughput": 8635.1, "total_tokens": 55592488} +{"current_steps": 82490, "total_steps": 204665, "loss": 0.0001, "lr": 1.4907748711455198e-06, "epoch": 2.015244423814526, "percentage": 40.3, "elapsed_time": "1:47:18", "remaining_time": "2:38:55", "throughput": 8635.22, "total_tokens": 55596392} +{"current_steps": 82495, "total_steps": 204665, "loss": 0.0414, "lr": 1.4907005681467257e-06, "epoch": 2.0153665746463734, "percentage": 40.31, "elapsed_time": "1:47:18", "remaining_time": "2:38:55", "throughput": 8635.27, "total_tokens": 55599720} +{"current_steps": 82500, "total_steps": 204665, "loss": 0.0001, "lr": 1.490626261579425e-06, "epoch": 2.0154887254782206, "percentage": 40.31, "elapsed_time": "1:47:19", "remaining_time": "2:38:54", "throughput": 8635.31, "total_tokens": 55602920} +{"current_steps": 82505, "total_steps": 204665, "loss": 0.0006, "lr": 1.4905519514441585e-06, "epoch": 2.0156108763100677, "percentage": 40.31, "elapsed_time": "1:47:19", "remaining_time": "2:38:54", "throughput": 8635.36, "total_tokens": 55606248} +{"current_steps": 82510, "total_steps": 204665, "loss": 0.0062, "lr": 1.490477637741466e-06, "epoch": 2.015733027141915, "percentage": 40.31, "elapsed_time": "1:47:19", "remaining_time": "2:38:53", "throughput": 8635.38, "total_tokens": 55609384} +{"current_steps": 82515, "total_steps": 204665, "loss": 0.087, "lr": 1.4904033204718881e-06, "epoch": 2.015855177973762, "percentage": 40.32, "elapsed_time": "1:47:20", "remaining_time": "2:38:53", "throughput": 8635.42, "total_tokens": 55612584} +{"current_steps": 82520, "total_steps": 204665, "loss": 0.0778, "lr": 1.4903289996359659e-06, "epoch": 2.0159773288056093, "percentage": 40.32, "elapsed_time": "1:47:20", "remaining_time": "2:38:52", "throughput": 8635.49, "total_tokens": 55615976} +{"current_steps": 82525, "total_steps": 204665, "loss": 0.0, "lr": 1.4902546752342389e-06, "epoch": 2.0160994796374565, "percentage": 40.32, "elapsed_time": "1:47:20", "remaining_time": "2:38:52", "throughput": 8635.49, "total_tokens": 55618920} +{"current_steps": 82530, "total_steps": 204665, "loss": 0.0633, "lr": 1.490180347267248e-06, "epoch": 2.0162216304693037, "percentage": 40.32, "elapsed_time": "1:47:21", "remaining_time": "2:38:52", "throughput": 8635.52, "total_tokens": 55621992} +{"current_steps": 82535, "total_steps": 204665, "loss": 0.0003, "lr": 1.4901060157355338e-06, "epoch": 2.016343781301151, "percentage": 40.33, "elapsed_time": "1:47:21", "remaining_time": "2:38:51", "throughput": 8635.55, "total_tokens": 55625192} +{"current_steps": 82540, "total_steps": 204665, "loss": 0.0003, "lr": 1.490031680639637e-06, "epoch": 2.0164659321329976, "percentage": 40.33, "elapsed_time": "1:47:21", "remaining_time": "2:38:51", "throughput": 8635.57, "total_tokens": 55628264} +{"current_steps": 82545, "total_steps": 204665, "loss": 0.0002, "lr": 1.4899573419800979e-06, "epoch": 2.016588082964845, "percentage": 40.33, "elapsed_time": "1:47:22", "remaining_time": "2:38:50", "throughput": 8635.65, "total_tokens": 55631784} +{"current_steps": 82550, "total_steps": 204665, "loss": 0.1244, "lr": 1.489882999757457e-06, "epoch": 2.016710233796692, "percentage": 40.33, "elapsed_time": "1:47:22", "remaining_time": "2:38:50", "throughput": 8635.73, "total_tokens": 55635368} +{"current_steps": 82555, "total_steps": 204665, "loss": 0.0248, "lr": 1.4898086539722556e-06, "epoch": 2.016832384628539, "percentage": 40.34, "elapsed_time": "1:47:22", "remaining_time": "2:38:49", "throughput": 8635.75, "total_tokens": 55638440} +{"current_steps": 82560, "total_steps": 204665, "loss": 0.0003, "lr": 1.4897343046250337e-06, "epoch": 2.0169545354603864, "percentage": 40.34, "elapsed_time": "1:47:23", "remaining_time": "2:38:49", "throughput": 8635.79, "total_tokens": 55641704} +{"current_steps": 82565, "total_steps": 204665, "loss": 0.0002, "lr": 1.489659951716332e-06, "epoch": 2.0170766862922336, "percentage": 40.34, "elapsed_time": "1:47:23", "remaining_time": "2:38:48", "throughput": 8635.8, "total_tokens": 55644648} +{"current_steps": 82570, "total_steps": 204665, "loss": 0.0004, "lr": 1.4895855952466918e-06, "epoch": 2.0171988371240808, "percentage": 40.34, "elapsed_time": "1:47:23", "remaining_time": "2:38:48", "throughput": 8635.85, "total_tokens": 55647976} +{"current_steps": 82575, "total_steps": 204665, "loss": 0.0796, "lr": 1.4895112352166533e-06, "epoch": 2.017320987955928, "percentage": 40.35, "elapsed_time": "1:47:24", "remaining_time": "2:38:47", "throughput": 8635.89, "total_tokens": 55651176} +{"current_steps": 82580, "total_steps": 204665, "loss": 0.0703, "lr": 1.4894368716267573e-06, "epoch": 2.017443138787775, "percentage": 40.35, "elapsed_time": "1:47:24", "remaining_time": "2:38:47", "throughput": 8636.0, "total_tokens": 55654952} +{"current_steps": 82585, "total_steps": 204665, "loss": 0.0001, "lr": 1.4893625044775451e-06, "epoch": 2.0175652896196223, "percentage": 40.35, "elapsed_time": "1:47:24", "remaining_time": "2:38:47", "throughput": 8636.06, "total_tokens": 55658344} +{"current_steps": 82590, "total_steps": 204665, "loss": 0.1, "lr": 1.4892881337695569e-06, "epoch": 2.0176874404514695, "percentage": 40.35, "elapsed_time": "1:47:25", "remaining_time": "2:38:46", "throughput": 8636.13, "total_tokens": 55661864} +{"current_steps": 82595, "total_steps": 204665, "loss": 0.0391, "lr": 1.4892137595033338e-06, "epoch": 2.0178095912833167, "percentage": 40.36, "elapsed_time": "1:47:25", "remaining_time": "2:38:46", "throughput": 8636.23, "total_tokens": 55665576} +{"current_steps": 82600, "total_steps": 204665, "loss": 0.0122, "lr": 1.4891393816794167e-06, "epoch": 2.017931742115164, "percentage": 40.36, "elapsed_time": "1:47:25", "remaining_time": "2:38:45", "throughput": 8636.29, "total_tokens": 55668904} +{"current_steps": 82605, "total_steps": 204665, "loss": 0.0012, "lr": 1.4890650002983466e-06, "epoch": 2.018053892947011, "percentage": 40.36, "elapsed_time": "1:47:26", "remaining_time": "2:38:45", "throughput": 8636.33, "total_tokens": 55672168} +{"current_steps": 82610, "total_steps": 204665, "loss": 0.0278, "lr": 1.4889906153606639e-06, "epoch": 2.0181760437788583, "percentage": 40.36, "elapsed_time": "1:47:26", "remaining_time": "2:38:44", "throughput": 8636.4, "total_tokens": 55675624} +{"current_steps": 82615, "total_steps": 204665, "loss": 0.0216, "lr": 1.4889162268669103e-06, "epoch": 2.0182981946107055, "percentage": 40.37, "elapsed_time": "1:47:26", "remaining_time": "2:38:44", "throughput": 8636.42, "total_tokens": 55678696} +{"current_steps": 82620, "total_steps": 204665, "loss": 0.0693, "lr": 1.4888418348176265e-06, "epoch": 2.0184203454425527, "percentage": 40.37, "elapsed_time": "1:47:27", "remaining_time": "2:38:43", "throughput": 8636.44, "total_tokens": 55681832} +{"current_steps": 82625, "total_steps": 204665, "loss": 0.0889, "lr": 1.4887674392133528e-06, "epoch": 2.0185424962743994, "percentage": 40.37, "elapsed_time": "1:47:27", "remaining_time": "2:38:43", "throughput": 8636.43, "total_tokens": 55684648} +{"current_steps": 82630, "total_steps": 204665, "loss": 0.0317, "lr": 1.488693040054631e-06, "epoch": 2.0186646471062466, "percentage": 40.37, "elapsed_time": "1:47:28", "remaining_time": "2:38:42", "throughput": 8636.56, "total_tokens": 55688552} +{"current_steps": 82635, "total_steps": 204665, "loss": 0.0002, "lr": 1.4886186373420022e-06, "epoch": 2.018786797938094, "percentage": 40.38, "elapsed_time": "1:47:28", "remaining_time": "2:38:42", "throughput": 8636.59, "total_tokens": 55691688} +{"current_steps": 82640, "total_steps": 204665, "loss": 0.0431, "lr": 1.4885442310760073e-06, "epoch": 2.018908948769941, "percentage": 40.38, "elapsed_time": "1:47:28", "remaining_time": "2:38:42", "throughput": 8636.64, "total_tokens": 55695016} +{"current_steps": 82645, "total_steps": 204665, "loss": 0.0524, "lr": 1.4884698212571873e-06, "epoch": 2.019031099601788, "percentage": 40.38, "elapsed_time": "1:47:29", "remaining_time": "2:38:41", "throughput": 8636.77, "total_tokens": 55698984} +{"current_steps": 82650, "total_steps": 204665, "loss": 0.0188, "lr": 1.4883954078860833e-06, "epoch": 2.0191532504336354, "percentage": 40.38, "elapsed_time": "1:47:29", "remaining_time": "2:38:41", "throughput": 8636.79, "total_tokens": 55702056} +{"current_steps": 82655, "total_steps": 204665, "loss": 0.0008, "lr": 1.4883209909632365e-06, "epoch": 2.0192754012654826, "percentage": 40.39, "elapsed_time": "1:47:29", "remaining_time": "2:38:40", "throughput": 8636.83, "total_tokens": 55705320} +{"current_steps": 82660, "total_steps": 204665, "loss": 0.0615, "lr": 1.488246570489188e-06, "epoch": 2.0193975520973297, "percentage": 40.39, "elapsed_time": "1:47:30", "remaining_time": "2:38:40", "throughput": 8636.9, "total_tokens": 55708776} +{"current_steps": 82665, "total_steps": 204665, "loss": 0.0442, "lr": 1.4881721464644792e-06, "epoch": 2.019519702929177, "percentage": 40.39, "elapsed_time": "1:47:30", "remaining_time": "2:38:39", "throughput": 8636.93, "total_tokens": 55711976} +{"current_steps": 82670, "total_steps": 204665, "loss": 0.0361, "lr": 1.4880977188896514e-06, "epoch": 2.019641853761024, "percentage": 40.39, "elapsed_time": "1:47:30", "remaining_time": "2:38:39", "throughput": 8636.98, "total_tokens": 55715240} +{"current_steps": 82675, "total_steps": 204665, "loss": 0.0426, "lr": 1.4880232877652454e-06, "epoch": 2.0197640045928713, "percentage": 40.4, "elapsed_time": "1:47:31", "remaining_time": "2:38:38", "throughput": 8637.04, "total_tokens": 55718696} +{"current_steps": 82680, "total_steps": 204665, "loss": 0.0392, "lr": 1.4879488530918032e-06, "epoch": 2.0198861554247185, "percentage": 40.4, "elapsed_time": "1:47:31", "remaining_time": "2:38:38", "throughput": 8637.15, "total_tokens": 55722408} +{"current_steps": 82685, "total_steps": 204665, "loss": 0.0549, "lr": 1.4878744148698655e-06, "epoch": 2.0200083062565657, "percentage": 40.4, "elapsed_time": "1:47:31", "remaining_time": "2:38:37", "throughput": 8637.15, "total_tokens": 55725352} +{"current_steps": 82690, "total_steps": 204665, "loss": 0.1252, "lr": 1.4877999730999738e-06, "epoch": 2.020130457088413, "percentage": 40.4, "elapsed_time": "1:47:32", "remaining_time": "2:38:37", "throughput": 8637.28, "total_tokens": 55729320} +{"current_steps": 82695, "total_steps": 204665, "loss": 0.0441, "lr": 1.4877255277826694e-06, "epoch": 2.02025260792026, "percentage": 40.41, "elapsed_time": "1:47:32", "remaining_time": "2:38:37", "throughput": 8637.35, "total_tokens": 55732776} +{"current_steps": 82700, "total_steps": 204665, "loss": 0.1352, "lr": 1.4876510789184939e-06, "epoch": 2.0203747587521073, "percentage": 40.41, "elapsed_time": "1:47:32", "remaining_time": "2:38:36", "throughput": 8637.4, "total_tokens": 55736104} +{"current_steps": 82705, "total_steps": 204665, "loss": 0.0021, "lr": 1.4875766265079888e-06, "epoch": 2.0204969095839544, "percentage": 40.41, "elapsed_time": "1:47:33", "remaining_time": "2:38:36", "throughput": 8637.45, "total_tokens": 55739432} +{"current_steps": 82710, "total_steps": 204665, "loss": 0.028, "lr": 1.487502170551695e-06, "epoch": 2.0206190604158016, "percentage": 40.41, "elapsed_time": "1:47:33", "remaining_time": "2:38:35", "throughput": 8637.48, "total_tokens": 55742568} +{"current_steps": 82715, "total_steps": 204665, "loss": 0.06, "lr": 1.4874277110501545e-06, "epoch": 2.0207412112476484, "percentage": 40.41, "elapsed_time": "1:47:33", "remaining_time": "2:38:35", "throughput": 8637.59, "total_tokens": 55746344} +{"current_steps": 82720, "total_steps": 204665, "loss": 0.0002, "lr": 1.4873532480039084e-06, "epoch": 2.0208633620794956, "percentage": 40.42, "elapsed_time": "1:47:34", "remaining_time": "2:38:34", "throughput": 8637.57, "total_tokens": 55749096} +{"current_steps": 82725, "total_steps": 204665, "loss": 0.0545, "lr": 1.4872787814134983e-06, "epoch": 2.0209855129113428, "percentage": 40.42, "elapsed_time": "1:47:34", "remaining_time": "2:38:34", "throughput": 8637.65, "total_tokens": 55752680} +{"current_steps": 82730, "total_steps": 204665, "loss": 0.001, "lr": 1.487204311279466e-06, "epoch": 2.02110766374319, "percentage": 40.42, "elapsed_time": "1:47:34", "remaining_time": "2:38:33", "throughput": 8637.79, "total_tokens": 55756712} +{"current_steps": 82735, "total_steps": 204665, "loss": 0.1147, "lr": 1.4871298376023531e-06, "epoch": 2.021229814575037, "percentage": 40.42, "elapsed_time": "1:47:35", "remaining_time": "2:38:33", "throughput": 8637.83, "total_tokens": 55759976} +{"current_steps": 82740, "total_steps": 204665, "loss": 0.0001, "lr": 1.4870553603827007e-06, "epoch": 2.0213519654068843, "percentage": 40.43, "elapsed_time": "1:47:35", "remaining_time": "2:38:33", "throughput": 8637.89, "total_tokens": 55763432} +{"current_steps": 82745, "total_steps": 204665, "loss": 0.0474, "lr": 1.486980879621051e-06, "epoch": 2.0214741162387315, "percentage": 40.43, "elapsed_time": "1:47:36", "remaining_time": "2:38:32", "throughput": 8637.96, "total_tokens": 55766888} +{"current_steps": 82750, "total_steps": 204665, "loss": 0.0007, "lr": 1.4869063953179452e-06, "epoch": 2.0215962670705787, "percentage": 40.43, "elapsed_time": "1:47:36", "remaining_time": "2:38:32", "throughput": 8638.02, "total_tokens": 55770280} +{"current_steps": 82755, "total_steps": 204665, "loss": 0.0002, "lr": 1.4868319074739252e-06, "epoch": 2.021718417902426, "percentage": 40.43, "elapsed_time": "1:47:36", "remaining_time": "2:38:31", "throughput": 8638.13, "total_tokens": 55774056} +{"current_steps": 82760, "total_steps": 204665, "loss": 0.0274, "lr": 1.4867574160895327e-06, "epoch": 2.021840568734273, "percentage": 40.44, "elapsed_time": "1:47:37", "remaining_time": "2:38:31", "throughput": 8638.17, "total_tokens": 55777320} +{"current_steps": 82765, "total_steps": 204665, "loss": 0.0312, "lr": 1.4866829211653092e-06, "epoch": 2.0219627195661203, "percentage": 40.44, "elapsed_time": "1:47:37", "remaining_time": "2:38:30", "throughput": 8638.25, "total_tokens": 55780904} +{"current_steps": 82770, "total_steps": 204665, "loss": 0.0282, "lr": 1.4866084227017966e-06, "epoch": 2.0220848703979675, "percentage": 40.44, "elapsed_time": "1:47:37", "remaining_time": "2:38:30", "throughput": 8638.25, "total_tokens": 55783784} +{"current_steps": 82775, "total_steps": 204665, "loss": 0.0368, "lr": 1.4865339206995367e-06, "epoch": 2.0222070212298147, "percentage": 40.44, "elapsed_time": "1:47:38", "remaining_time": "2:38:29", "throughput": 8638.35, "total_tokens": 55787560} +{"current_steps": 82780, "total_steps": 204665, "loss": 0.0002, "lr": 1.486459415159071e-06, "epoch": 2.022329172061662, "percentage": 40.45, "elapsed_time": "1:47:38", "remaining_time": "2:38:29", "throughput": 8638.46, "total_tokens": 55791336} +{"current_steps": 82785, "total_steps": 204665, "loss": 0.0192, "lr": 1.486384906080942e-06, "epoch": 2.022451322893509, "percentage": 40.45, "elapsed_time": "1:47:38", "remaining_time": "2:38:29", "throughput": 8638.72, "total_tokens": 55796456} +{"current_steps": 82790, "total_steps": 204665, "loss": 0.0002, "lr": 1.4863103934656908e-06, "epoch": 2.0225734737253562, "percentage": 40.45, "elapsed_time": "1:47:39", "remaining_time": "2:38:28", "throughput": 8638.89, "total_tokens": 55800744} +{"current_steps": 82795, "total_steps": 204665, "loss": 0.0401, "lr": 1.4862358773138599e-06, "epoch": 2.0226956245572034, "percentage": 40.45, "elapsed_time": "1:47:39", "remaining_time": "2:38:28", "throughput": 8638.95, "total_tokens": 55804136} +{"current_steps": 82800, "total_steps": 204665, "loss": 0.0001, "lr": 1.486161357625991e-06, "epoch": 2.0228177753890506, "percentage": 40.46, "elapsed_time": "1:47:39", "remaining_time": "2:38:27", "throughput": 8639.01, "total_tokens": 55807528} +{"current_steps": 82805, "total_steps": 204665, "loss": 0.0002, "lr": 1.4860868344026258e-06, "epoch": 2.0229399262208974, "percentage": 40.46, "elapsed_time": "1:47:40", "remaining_time": "2:38:27", "throughput": 8639.06, "total_tokens": 55810920} +{"current_steps": 82810, "total_steps": 204665, "loss": 0.0823, "lr": 1.486012307644306e-06, "epoch": 2.0230620770527445, "percentage": 40.46, "elapsed_time": "1:47:40", "remaining_time": "2:38:26", "throughput": 8639.1, "total_tokens": 55814120} +{"current_steps": 82815, "total_steps": 204665, "loss": 0.0001, "lr": 1.4859377773515745e-06, "epoch": 2.0231842278845917, "percentage": 40.46, "elapsed_time": "1:47:40", "remaining_time": "2:38:26", "throughput": 8639.14, "total_tokens": 55817448} +{"current_steps": 82820, "total_steps": 204665, "loss": 0.0418, "lr": 1.4858632435249728e-06, "epoch": 2.023306378716439, "percentage": 40.47, "elapsed_time": "1:47:41", "remaining_time": "2:38:25", "throughput": 8639.18, "total_tokens": 55820712} +{"current_steps": 82825, "total_steps": 204665, "loss": 0.0001, "lr": 1.4857887061650426e-06, "epoch": 2.023428529548286, "percentage": 40.47, "elapsed_time": "1:47:41", "remaining_time": "2:38:25", "throughput": 8639.27, "total_tokens": 55824296} +{"current_steps": 82830, "total_steps": 204665, "loss": 0.1306, "lr": 1.4857141652723264e-06, "epoch": 2.0235506803801333, "percentage": 40.47, "elapsed_time": "1:47:42", "remaining_time": "2:38:25", "throughput": 8639.32, "total_tokens": 55827688} +{"current_steps": 82835, "total_steps": 204665, "loss": 0.0003, "lr": 1.4856396208473662e-06, "epoch": 2.0236728312119805, "percentage": 40.47, "elapsed_time": "1:47:42", "remaining_time": "2:38:24", "throughput": 8639.52, "total_tokens": 55832168} +{"current_steps": 82840, "total_steps": 204665, "loss": 0.0759, "lr": 1.4855650728907038e-06, "epoch": 2.0237949820438277, "percentage": 40.48, "elapsed_time": "1:47:42", "remaining_time": "2:38:24", "throughput": 8639.59, "total_tokens": 55835624} +{"current_steps": 82845, "total_steps": 204665, "loss": 0.0002, "lr": 1.4854905214028817e-06, "epoch": 2.023917132875675, "percentage": 40.48, "elapsed_time": "1:47:43", "remaining_time": "2:38:23", "throughput": 8639.65, "total_tokens": 55839080} +{"current_steps": 82850, "total_steps": 204665, "loss": 0.0642, "lr": 1.4854159663844423e-06, "epoch": 2.024039283707522, "percentage": 40.48, "elapsed_time": "1:47:43", "remaining_time": "2:38:23", "throughput": 8639.68, "total_tokens": 55842216} +{"current_steps": 82855, "total_steps": 204665, "loss": 0.0002, "lr": 1.4853414078359272e-06, "epoch": 2.0241614345393693, "percentage": 40.48, "elapsed_time": "1:47:43", "remaining_time": "2:38:22", "throughput": 8639.75, "total_tokens": 55845672} +{"current_steps": 82860, "total_steps": 204665, "loss": 0.0749, "lr": 1.485266845757879e-06, "epoch": 2.0242835853712164, "percentage": 40.49, "elapsed_time": "1:47:44", "remaining_time": "2:38:22", "throughput": 8639.88, "total_tokens": 55849704} +{"current_steps": 82865, "total_steps": 204665, "loss": 0.007, "lr": 1.4851922801508393e-06, "epoch": 2.0244057362030636, "percentage": 40.49, "elapsed_time": "1:47:44", "remaining_time": "2:38:21", "throughput": 8639.96, "total_tokens": 55853288} +{"current_steps": 82870, "total_steps": 204665, "loss": 0.0792, "lr": 1.4851177110153512e-06, "epoch": 2.024527887034911, "percentage": 40.49, "elapsed_time": "1:47:44", "remaining_time": "2:38:21", "throughput": 8640.01, "total_tokens": 55856616} +{"current_steps": 82875, "total_steps": 204665, "loss": 0.0004, "lr": 1.4850431383519563e-06, "epoch": 2.024650037866758, "percentage": 40.49, "elapsed_time": "1:47:45", "remaining_time": "2:38:21", "throughput": 8640.09, "total_tokens": 55860200} +{"current_steps": 82880, "total_steps": 204665, "loss": 0.0001, "lr": 1.4849685621611976e-06, "epoch": 2.024772188698605, "percentage": 40.5, "elapsed_time": "1:47:45", "remaining_time": "2:38:20", "throughput": 8640.17, "total_tokens": 55863784} +{"current_steps": 82885, "total_steps": 204665, "loss": 0.0114, "lr": 1.4848939824436171e-06, "epoch": 2.0248943395304524, "percentage": 40.5, "elapsed_time": "1:47:45", "remaining_time": "2:38:20", "throughput": 8640.22, "total_tokens": 55867048} +{"current_steps": 82890, "total_steps": 204665, "loss": 0.0002, "lr": 1.4848193991997572e-06, "epoch": 2.0250164903622996, "percentage": 40.5, "elapsed_time": "1:47:46", "remaining_time": "2:38:19", "throughput": 8640.27, "total_tokens": 55870376} +{"current_steps": 82895, "total_steps": 204665, "loss": 0.0001, "lr": 1.4847448124301598e-06, "epoch": 2.0251386411941463, "percentage": 40.5, "elapsed_time": "1:47:46", "remaining_time": "2:38:19", "throughput": 8640.36, "total_tokens": 55874088} +{"current_steps": 82900, "total_steps": 204665, "loss": 0.0276, "lr": 1.484670222135368e-06, "epoch": 2.0252607920259935, "percentage": 40.51, "elapsed_time": "1:47:46", "remaining_time": "2:38:18", "throughput": 8640.39, "total_tokens": 55877224} +{"current_steps": 82905, "total_steps": 204665, "loss": 0.0953, "lr": 1.484595628315924e-06, "epoch": 2.0253829428578407, "percentage": 40.51, "elapsed_time": "1:47:47", "remaining_time": "2:38:18", "throughput": 8640.47, "total_tokens": 55880808} +{"current_steps": 82910, "total_steps": 204665, "loss": 0.0002, "lr": 1.48452103097237e-06, "epoch": 2.025505093689688, "percentage": 40.51, "elapsed_time": "1:47:47", "remaining_time": "2:38:17", "throughput": 8640.52, "total_tokens": 55884200} +{"current_steps": 82915, "total_steps": 204665, "loss": 0.0919, "lr": 1.4844464301052494e-06, "epoch": 2.025627244521535, "percentage": 40.51, "elapsed_time": "1:47:48", "remaining_time": "2:38:17", "throughput": 8640.59, "total_tokens": 55887784} +{"current_steps": 82920, "total_steps": 204665, "loss": 0.0002, "lr": 1.4843718257151034e-06, "epoch": 2.0257493953533823, "percentage": 40.51, "elapsed_time": "1:47:48", "remaining_time": "2:38:17", "throughput": 8640.61, "total_tokens": 55890920} +{"current_steps": 82925, "total_steps": 204665, "loss": 0.0001, "lr": 1.4842972178024753e-06, "epoch": 2.0258715461852295, "percentage": 40.52, "elapsed_time": "1:47:48", "remaining_time": "2:38:16", "throughput": 8640.66, "total_tokens": 55894248} +{"current_steps": 82930, "total_steps": 204665, "loss": 0.0377, "lr": 1.4842226063679077e-06, "epoch": 2.0259936970170767, "percentage": 40.52, "elapsed_time": "1:47:49", "remaining_time": "2:38:16", "throughput": 8640.66, "total_tokens": 55897192} +{"current_steps": 82935, "total_steps": 204665, "loss": 0.0003, "lr": 1.484147991411943e-06, "epoch": 2.026115847848924, "percentage": 40.52, "elapsed_time": "1:47:49", "remaining_time": "2:38:15", "throughput": 8640.66, "total_tokens": 55900136} +{"current_steps": 82940, "total_steps": 204665, "loss": 0.0004, "lr": 1.484073372935124e-06, "epoch": 2.026237998680771, "percentage": 40.52, "elapsed_time": "1:47:49", "remaining_time": "2:38:15", "throughput": 8640.77, "total_tokens": 55903912} +{"current_steps": 82945, "total_steps": 204665, "loss": 0.0002, "lr": 1.4839987509379933e-06, "epoch": 2.0263601495126182, "percentage": 40.53, "elapsed_time": "1:47:50", "remaining_time": "2:38:14", "throughput": 8640.78, "total_tokens": 55906920} +{"current_steps": 82950, "total_steps": 204665, "loss": 0.0399, "lr": 1.4839241254210932e-06, "epoch": 2.0264823003444654, "percentage": 40.53, "elapsed_time": "1:47:50", "remaining_time": "2:38:14", "throughput": 8640.85, "total_tokens": 55910440} +{"current_steps": 82955, "total_steps": 204665, "loss": 0.0004, "lr": 1.483849496384967e-06, "epoch": 2.0266044511763126, "percentage": 40.53, "elapsed_time": "1:47:50", "remaining_time": "2:38:13", "throughput": 8640.9, "total_tokens": 55913768} +{"current_steps": 82960, "total_steps": 204665, "loss": 0.0001, "lr": 1.483774863830157e-06, "epoch": 2.02672660200816, "percentage": 40.53, "elapsed_time": "1:47:51", "remaining_time": "2:38:13", "throughput": 8640.91, "total_tokens": 55916712} +{"current_steps": 82965, "total_steps": 204665, "loss": 0.0005, "lr": 1.483700227757206e-06, "epoch": 2.026848752840007, "percentage": 40.54, "elapsed_time": "1:47:51", "remaining_time": "2:38:12", "throughput": 8640.93, "total_tokens": 55919848} +{"current_steps": 82970, "total_steps": 204665, "loss": 0.159, "lr": 1.4836255881666568e-06, "epoch": 2.026970903671854, "percentage": 40.54, "elapsed_time": "1:47:51", "remaining_time": "2:38:12", "throughput": 8641.01, "total_tokens": 55923368} +{"current_steps": 82975, "total_steps": 204665, "loss": 0.0001, "lr": 1.4835509450590525e-06, "epoch": 2.0270930545037014, "percentage": 40.54, "elapsed_time": "1:47:52", "remaining_time": "2:38:12", "throughput": 8641.02, "total_tokens": 55926376} +{"current_steps": 82980, "total_steps": 204665, "loss": 0.0001, "lr": 1.4834762984349354e-06, "epoch": 2.0272152053355486, "percentage": 40.54, "elapsed_time": "1:47:52", "remaining_time": "2:38:11", "throughput": 8641.13, "total_tokens": 55930152} +{"current_steps": 82985, "total_steps": 204665, "loss": 0.0002, "lr": 1.4834016482948489e-06, "epoch": 2.0273373561673953, "percentage": 40.55, "elapsed_time": "1:47:52", "remaining_time": "2:38:11", "throughput": 8641.15, "total_tokens": 55933288} +{"current_steps": 82990, "total_steps": 204665, "loss": 0.0002, "lr": 1.4833269946393353e-06, "epoch": 2.0274595069992425, "percentage": 40.55, "elapsed_time": "1:47:53", "remaining_time": "2:38:10", "throughput": 8641.34, "total_tokens": 55937704} +{"current_steps": 82995, "total_steps": 204665, "loss": 0.0213, "lr": 1.483252337468938e-06, "epoch": 2.0275816578310897, "percentage": 40.55, "elapsed_time": "1:47:53", "remaining_time": "2:38:10", "throughput": 8641.36, "total_tokens": 55940776} +{"current_steps": 83000, "total_steps": 204665, "loss": 0.0004, "lr": 1.4831776767841996e-06, "epoch": 2.027703808662937, "percentage": 40.55, "elapsed_time": "1:47:53", "remaining_time": "2:38:09", "throughput": 8641.39, "total_tokens": 55943912} +{"current_steps": 83005, "total_steps": 204665, "loss": 0.048, "lr": 1.4831030125856633e-06, "epoch": 2.027825959494784, "percentage": 40.56, "elapsed_time": "1:47:54", "remaining_time": "2:38:09", "throughput": 8641.45, "total_tokens": 55947368} +{"current_steps": 83010, "total_steps": 204665, "loss": 0.0642, "lr": 1.4830283448738718e-06, "epoch": 2.0279481103266312, "percentage": 40.56, "elapsed_time": "1:47:54", "remaining_time": "2:38:08", "throughput": 8641.51, "total_tokens": 55950824} +{"current_steps": 83015, "total_steps": 204665, "loss": 0.1304, "lr": 1.4829536736493685e-06, "epoch": 2.0280702611584784, "percentage": 40.56, "elapsed_time": "1:47:55", "remaining_time": "2:38:08", "throughput": 8641.62, "total_tokens": 55954600} +{"current_steps": 83020, "total_steps": 204665, "loss": 0.0001, "lr": 1.482878998912696e-06, "epoch": 2.0281924119903256, "percentage": 40.56, "elapsed_time": "1:47:55", "remaining_time": "2:38:08", "throughput": 8641.73, "total_tokens": 55958376} +{"current_steps": 83025, "total_steps": 204665, "loss": 0.0407, "lr": 1.4828043206643976e-06, "epoch": 2.028314562822173, "percentage": 40.57, "elapsed_time": "1:47:55", "remaining_time": "2:38:07", "throughput": 8641.73, "total_tokens": 55961320} +{"current_steps": 83030, "total_steps": 204665, "loss": 0.0004, "lr": 1.4827296389050161e-06, "epoch": 2.02843671365402, "percentage": 40.57, "elapsed_time": "1:47:56", "remaining_time": "2:38:07", "throughput": 8641.78, "total_tokens": 55964648} +{"current_steps": 83035, "total_steps": 204665, "loss": 0.0479, "lr": 1.482654953635095e-06, "epoch": 2.028558864485867, "percentage": 40.57, "elapsed_time": "1:47:56", "remaining_time": "2:38:06", "throughput": 8641.83, "total_tokens": 55967976} +{"current_steps": 83040, "total_steps": 204665, "loss": 0.0001, "lr": 1.4825802648551774e-06, "epoch": 2.0286810153177144, "percentage": 40.57, "elapsed_time": "1:47:56", "remaining_time": "2:38:06", "throughput": 8641.84, "total_tokens": 55970920} +{"current_steps": 83045, "total_steps": 204665, "loss": 0.0491, "lr": 1.482505572565806e-06, "epoch": 2.0288031661495616, "percentage": 40.58, "elapsed_time": "1:47:57", "remaining_time": "2:38:05", "throughput": 8641.88, "total_tokens": 55974184} +{"current_steps": 83050, "total_steps": 204665, "loss": 0.1344, "lr": 1.4824308767675247e-06, "epoch": 2.0289253169814088, "percentage": 40.58, "elapsed_time": "1:47:57", "remaining_time": "2:38:05", "throughput": 8641.94, "total_tokens": 55977640} +{"current_steps": 83055, "total_steps": 204665, "loss": 0.0002, "lr": 1.4823561774608759e-06, "epoch": 2.029047467813256, "percentage": 40.58, "elapsed_time": "1:47:57", "remaining_time": "2:38:04", "throughput": 8641.99, "total_tokens": 55980904} +{"current_steps": 83060, "total_steps": 204665, "loss": 0.034, "lr": 1.4822814746464034e-06, "epoch": 2.029169618645103, "percentage": 40.58, "elapsed_time": "1:47:58", "remaining_time": "2:38:04", "throughput": 8642.08, "total_tokens": 55984552} +{"current_steps": 83065, "total_steps": 204665, "loss": 0.0361, "lr": 1.4822067683246503e-06, "epoch": 2.0292917694769503, "percentage": 40.59, "elapsed_time": "1:47:58", "remaining_time": "2:38:03", "throughput": 8642.12, "total_tokens": 55987816} +{"current_steps": 83070, "total_steps": 204665, "loss": 0.0002, "lr": 1.4821320584961601e-06, "epoch": 2.029413920308797, "percentage": 40.59, "elapsed_time": "1:47:58", "remaining_time": "2:38:03", "throughput": 8642.23, "total_tokens": 55991592} +{"current_steps": 83075, "total_steps": 204665, "loss": 0.0004, "lr": 1.4820573451614757e-06, "epoch": 2.0295360711406443, "percentage": 40.59, "elapsed_time": "1:47:59", "remaining_time": "2:38:03", "throughput": 8642.28, "total_tokens": 55994920} +{"current_steps": 83080, "total_steps": 204665, "loss": 0.0004, "lr": 1.4819826283211407e-06, "epoch": 2.0296582219724915, "percentage": 40.59, "elapsed_time": "1:47:59", "remaining_time": "2:38:02", "throughput": 8642.32, "total_tokens": 55998184} +{"current_steps": 83085, "total_steps": 204665, "loss": 0.0003, "lr": 1.4819079079756982e-06, "epoch": 2.0297803728043387, "percentage": 40.6, "elapsed_time": "1:47:59", "remaining_time": "2:38:02", "throughput": 8642.4, "total_tokens": 56001704} +{"current_steps": 83090, "total_steps": 204665, "loss": 0.1135, "lr": 1.4818331841256919e-06, "epoch": 2.029902523636186, "percentage": 40.6, "elapsed_time": "1:48:00", "remaining_time": "2:38:01", "throughput": 8642.42, "total_tokens": 56004840} +{"current_steps": 83095, "total_steps": 204665, "loss": 0.0003, "lr": 1.481758456771665e-06, "epoch": 2.030024674468033, "percentage": 40.6, "elapsed_time": "1:48:00", "remaining_time": "2:38:01", "throughput": 8642.42, "total_tokens": 56007784} +{"current_steps": 83100, "total_steps": 204665, "loss": 0.0512, "lr": 1.481683725914161e-06, "epoch": 2.03014682529988, "percentage": 40.6, "elapsed_time": "1:48:00", "remaining_time": "2:38:00", "throughput": 8642.52, "total_tokens": 56011496} +{"current_steps": 83105, "total_steps": 204665, "loss": 0.0002, "lr": 1.4816089915537235e-06, "epoch": 2.0302689761317274, "percentage": 40.61, "elapsed_time": "1:48:01", "remaining_time": "2:38:00", "throughput": 8642.59, "total_tokens": 56015016} +{"current_steps": 83110, "total_steps": 204665, "loss": 0.0489, "lr": 1.4815342536908962e-06, "epoch": 2.0303911269635746, "percentage": 40.61, "elapsed_time": "1:48:01", "remaining_time": "2:37:59", "throughput": 8642.61, "total_tokens": 56018088} +{"current_steps": 83115, "total_steps": 204665, "loss": 0.0001, "lr": 1.4814595123262218e-06, "epoch": 2.030513277795422, "percentage": 40.61, "elapsed_time": "1:48:01", "remaining_time": "2:37:59", "throughput": 8642.68, "total_tokens": 56021544} +{"current_steps": 83120, "total_steps": 204665, "loss": 0.1347, "lr": 1.4813847674602447e-06, "epoch": 2.030635428627269, "percentage": 40.61, "elapsed_time": "1:48:02", "remaining_time": "2:37:58", "throughput": 8642.77, "total_tokens": 56025192} +{"current_steps": 83125, "total_steps": 204665, "loss": 0.124, "lr": 1.4813100190935077e-06, "epoch": 2.030757579459116, "percentage": 40.62, "elapsed_time": "1:48:02", "remaining_time": "2:37:58", "throughput": 8642.81, "total_tokens": 56028392} +{"current_steps": 83130, "total_steps": 204665, "loss": 0.0594, "lr": 1.4812352672265549e-06, "epoch": 2.0308797302909634, "percentage": 40.62, "elapsed_time": "1:48:03", "remaining_time": "2:37:58", "throughput": 8642.88, "total_tokens": 56031912} +{"current_steps": 83135, "total_steps": 204665, "loss": 0.0001, "lr": 1.48116051185993e-06, "epoch": 2.0310018811228105, "percentage": 40.62, "elapsed_time": "1:48:03", "remaining_time": "2:37:57", "throughput": 8642.94, "total_tokens": 56035304} +{"current_steps": 83140, "total_steps": 204665, "loss": 0.0616, "lr": 1.4810857529941762e-06, "epoch": 2.0311240319546577, "percentage": 40.62, "elapsed_time": "1:48:03", "remaining_time": "2:37:57", "throughput": 8643.0, "total_tokens": 56038696} +{"current_steps": 83145, "total_steps": 204665, "loss": 0.076, "lr": 1.4810109906298375e-06, "epoch": 2.031246182786505, "percentage": 40.62, "elapsed_time": "1:48:04", "remaining_time": "2:37:56", "throughput": 8643.03, "total_tokens": 56041896} +{"current_steps": 83150, "total_steps": 204665, "loss": 0.1202, "lr": 1.4809362247674578e-06, "epoch": 2.031368333618352, "percentage": 40.63, "elapsed_time": "1:48:04", "remaining_time": "2:37:56", "throughput": 8643.11, "total_tokens": 56045416} +{"current_steps": 83155, "total_steps": 204665, "loss": 0.0196, "lr": 1.48086145540758e-06, "epoch": 2.0314904844501993, "percentage": 40.63, "elapsed_time": "1:48:04", "remaining_time": "2:37:55", "throughput": 8643.18, "total_tokens": 56048936} +{"current_steps": 83160, "total_steps": 204665, "loss": 0.0005, "lr": 1.4807866825507487e-06, "epoch": 2.031612635282046, "percentage": 40.63, "elapsed_time": "1:48:05", "remaining_time": "2:37:55", "throughput": 8643.24, "total_tokens": 56052264} +{"current_steps": 83165, "total_steps": 204665, "loss": 0.0346, "lr": 1.4807119061975074e-06, "epoch": 2.0317347861138932, "percentage": 40.63, "elapsed_time": "1:48:05", "remaining_time": "2:37:54", "throughput": 8643.27, "total_tokens": 56055464} +{"current_steps": 83170, "total_steps": 204665, "loss": 0.0758, "lr": 1.4806371263483995e-06, "epoch": 2.0318569369457404, "percentage": 40.64, "elapsed_time": "1:48:05", "remaining_time": "2:37:54", "throughput": 8643.33, "total_tokens": 56058792} +{"current_steps": 83175, "total_steps": 204665, "loss": 0.025, "lr": 1.4805623430039693e-06, "epoch": 2.0319790877775876, "percentage": 40.64, "elapsed_time": "1:48:06", "remaining_time": "2:37:54", "throughput": 8643.39, "total_tokens": 56062248} +{"current_steps": 83180, "total_steps": 204665, "loss": 0.0016, "lr": 1.4804875561647604e-06, "epoch": 2.032101238609435, "percentage": 40.64, "elapsed_time": "1:48:06", "remaining_time": "2:37:53", "throughput": 8643.47, "total_tokens": 56065768} +{"current_steps": 83185, "total_steps": 204665, "loss": 0.0915, "lr": 1.4804127658313168e-06, "epoch": 2.032223389441282, "percentage": 40.64, "elapsed_time": "1:48:06", "remaining_time": "2:37:53", "throughput": 8643.51, "total_tokens": 56069032} +{"current_steps": 83190, "total_steps": 204665, "loss": 0.0537, "lr": 1.4803379720041824e-06, "epoch": 2.032345540273129, "percentage": 40.65, "elapsed_time": "1:48:07", "remaining_time": "2:37:52", "throughput": 8643.56, "total_tokens": 56072360} +{"current_steps": 83195, "total_steps": 204665, "loss": 0.0415, "lr": 1.480263174683901e-06, "epoch": 2.0324676911049764, "percentage": 40.65, "elapsed_time": "1:48:07", "remaining_time": "2:37:52", "throughput": 8643.63, "total_tokens": 56075816} +{"current_steps": 83200, "total_steps": 204665, "loss": 0.0252, "lr": 1.4801883738710168e-06, "epoch": 2.0325898419368236, "percentage": 40.65, "elapsed_time": "1:48:07", "remaining_time": "2:37:51", "throughput": 8643.67, "total_tokens": 56079080} +{"current_steps": 83205, "total_steps": 204665, "loss": 0.01, "lr": 1.4801135695660734e-06, "epoch": 2.0327119927686708, "percentage": 40.65, "elapsed_time": "1:48:08", "remaining_time": "2:37:51", "throughput": 8643.72, "total_tokens": 56082344} +{"current_steps": 83210, "total_steps": 204665, "loss": 0.0004, "lr": 1.480038761769615e-06, "epoch": 2.032834143600518, "percentage": 40.66, "elapsed_time": "1:48:08", "remaining_time": "2:37:50", "throughput": 8643.76, "total_tokens": 56085672} +{"current_steps": 83215, "total_steps": 204665, "loss": 0.0973, "lr": 1.4799639504821857e-06, "epoch": 2.032956294432365, "percentage": 40.66, "elapsed_time": "1:48:08", "remaining_time": "2:37:50", "throughput": 8643.8, "total_tokens": 56088872} +{"current_steps": 83220, "total_steps": 204665, "loss": 0.0001, "lr": 1.4798891357043296e-06, "epoch": 2.0330784452642123, "percentage": 40.66, "elapsed_time": "1:48:09", "remaining_time": "2:37:49", "throughput": 8643.85, "total_tokens": 56092200} +{"current_steps": 83225, "total_steps": 204665, "loss": 0.0547, "lr": 1.4798143174365902e-06, "epoch": 2.0332005960960595, "percentage": 40.66, "elapsed_time": "1:48:09", "remaining_time": "2:37:49", "throughput": 8643.93, "total_tokens": 56095720} +{"current_steps": 83230, "total_steps": 204665, "loss": 0.0004, "lr": 1.4797394956795125e-06, "epoch": 2.0333227469279067, "percentage": 40.67, "elapsed_time": "1:48:09", "remaining_time": "2:37:49", "throughput": 8643.96, "total_tokens": 56098856} +{"current_steps": 83235, "total_steps": 204665, "loss": 0.0008, "lr": 1.4796646704336397e-06, "epoch": 2.033444897759754, "percentage": 40.67, "elapsed_time": "1:48:10", "remaining_time": "2:37:48", "throughput": 8644.02, "total_tokens": 56102248} +{"current_steps": 83240, "total_steps": 204665, "loss": 0.1074, "lr": 1.4795898416995167e-06, "epoch": 2.033567048591601, "percentage": 40.67, "elapsed_time": "1:48:10", "remaining_time": "2:37:48", "throughput": 8644.07, "total_tokens": 56105640} +{"current_steps": 83245, "total_steps": 204665, "loss": 0.0002, "lr": 1.479515009477687e-06, "epoch": 2.0336891994234483, "percentage": 40.67, "elapsed_time": "1:48:10", "remaining_time": "2:37:47", "throughput": 8644.11, "total_tokens": 56108840} +{"current_steps": 83250, "total_steps": 204665, "loss": 0.0893, "lr": 1.4794401737686956e-06, "epoch": 2.033811350255295, "percentage": 40.68, "elapsed_time": "1:48:11", "remaining_time": "2:37:47", "throughput": 8644.15, "total_tokens": 56112040} +{"current_steps": 83255, "total_steps": 204665, "loss": 0.0386, "lr": 1.4793653345730864e-06, "epoch": 2.033933501087142, "percentage": 40.68, "elapsed_time": "1:48:11", "remaining_time": "2:37:46", "throughput": 8644.24, "total_tokens": 56115688} +{"current_steps": 83260, "total_steps": 204665, "loss": 0.0768, "lr": 1.4792904918914034e-06, "epoch": 2.0340556519189894, "percentage": 40.68, "elapsed_time": "1:48:12", "remaining_time": "2:37:46", "throughput": 8644.25, "total_tokens": 56118696} +{"current_steps": 83265, "total_steps": 204665, "loss": 0.001, "lr": 1.4792156457241912e-06, "epoch": 2.0341778027508366, "percentage": 40.68, "elapsed_time": "1:48:12", "remaining_time": "2:37:45", "throughput": 8644.36, "total_tokens": 56122472} +{"current_steps": 83270, "total_steps": 204665, "loss": 0.0035, "lr": 1.4791407960719935e-06, "epoch": 2.034299953582684, "percentage": 40.69, "elapsed_time": "1:48:12", "remaining_time": "2:37:45", "throughput": 8644.39, "total_tokens": 56125608} +{"current_steps": 83275, "total_steps": 204665, "loss": 0.0427, "lr": 1.4790659429353553e-06, "epoch": 2.034422104414531, "percentage": 40.69, "elapsed_time": "1:48:13", "remaining_time": "2:37:44", "throughput": 8644.41, "total_tokens": 56128680} +{"current_steps": 83280, "total_steps": 204665, "loss": 0.0005, "lr": 1.4789910863148206e-06, "epoch": 2.034544255246378, "percentage": 40.69, "elapsed_time": "1:48:13", "remaining_time": "2:37:44", "throughput": 8644.58, "total_tokens": 56132968} +{"current_steps": 83285, "total_steps": 204665, "loss": 0.1166, "lr": 1.4789162262109338e-06, "epoch": 2.0346664060782254, "percentage": 40.69, "elapsed_time": "1:48:13", "remaining_time": "2:37:44", "throughput": 8644.61, "total_tokens": 56136168} +{"current_steps": 83290, "total_steps": 204665, "loss": 0.0022, "lr": 1.4788413626242396e-06, "epoch": 2.0347885569100725, "percentage": 40.7, "elapsed_time": "1:48:14", "remaining_time": "2:37:43", "throughput": 8644.64, "total_tokens": 56139304} +{"current_steps": 83295, "total_steps": 204665, "loss": 0.0004, "lr": 1.4787664955552822e-06, "epoch": 2.0349107077419197, "percentage": 40.7, "elapsed_time": "1:48:14", "remaining_time": "2:37:43", "throughput": 8644.66, "total_tokens": 56142376} +{"current_steps": 83300, "total_steps": 204665, "loss": 0.0003, "lr": 1.4786916250046063e-06, "epoch": 2.035032858573767, "percentage": 40.7, "elapsed_time": "1:48:14", "remaining_time": "2:37:42", "throughput": 8644.73, "total_tokens": 56145832} +{"current_steps": 83305, "total_steps": 204665, "loss": 0.002, "lr": 1.4786167509727556e-06, "epoch": 2.035155009405614, "percentage": 40.7, "elapsed_time": "1:48:15", "remaining_time": "2:37:42", "throughput": 8644.83, "total_tokens": 56149544} +{"current_steps": 83310, "total_steps": 204665, "loss": 0.0006, "lr": 1.4785418734602752e-06, "epoch": 2.0352771602374613, "percentage": 40.71, "elapsed_time": "1:48:15", "remaining_time": "2:37:41", "throughput": 8644.96, "total_tokens": 56153512} +{"current_steps": 83315, "total_steps": 204665, "loss": 0.0235, "lr": 1.4784669924677102e-06, "epoch": 2.0353993110693085, "percentage": 40.71, "elapsed_time": "1:48:15", "remaining_time": "2:37:41", "throughput": 8645.01, "total_tokens": 56156904} +{"current_steps": 83320, "total_steps": 204665, "loss": 0.0004, "lr": 1.4783921079956042e-06, "epoch": 2.0355214619011557, "percentage": 40.71, "elapsed_time": "1:48:16", "remaining_time": "2:37:40", "throughput": 8645.08, "total_tokens": 56160360} +{"current_steps": 83325, "total_steps": 204665, "loss": 0.0439, "lr": 1.478317220044502e-06, "epoch": 2.035643612733003, "percentage": 40.71, "elapsed_time": "1:48:16", "remaining_time": "2:37:40", "throughput": 8645.06, "total_tokens": 56163112} +{"current_steps": 83330, "total_steps": 204665, "loss": 0.0, "lr": 1.4782423286149484e-06, "epoch": 2.03576576356485, "percentage": 40.72, "elapsed_time": "1:48:16", "remaining_time": "2:37:39", "throughput": 8645.08, "total_tokens": 56166120} +{"current_steps": 83335, "total_steps": 204665, "loss": 0.0421, "lr": 1.478167433707488e-06, "epoch": 2.0358879143966973, "percentage": 40.72, "elapsed_time": "1:48:17", "remaining_time": "2:37:39", "throughput": 8645.13, "total_tokens": 56169448} +{"current_steps": 83340, "total_steps": 204665, "loss": 0.0176, "lr": 1.4780925353226651e-06, "epoch": 2.036010065228544, "percentage": 40.72, "elapsed_time": "1:48:17", "remaining_time": "2:37:39", "throughput": 8645.16, "total_tokens": 56172648} +{"current_steps": 83345, "total_steps": 204665, "loss": 0.0003, "lr": 1.478017633461025e-06, "epoch": 2.036132216060391, "percentage": 40.72, "elapsed_time": "1:48:17", "remaining_time": "2:37:38", "throughput": 8645.21, "total_tokens": 56175976} +{"current_steps": 83350, "total_steps": 204665, "loss": 0.0514, "lr": 1.477942728123112e-06, "epoch": 2.0362543668922384, "percentage": 40.73, "elapsed_time": "1:48:18", "remaining_time": "2:37:38", "throughput": 8645.28, "total_tokens": 56179496} +{"current_steps": 83355, "total_steps": 204665, "loss": 0.0001, "lr": 1.4778678193094712e-06, "epoch": 2.0363765177240856, "percentage": 40.73, "elapsed_time": "1:48:18", "remaining_time": "2:37:37", "throughput": 8645.38, "total_tokens": 56183144} +{"current_steps": 83360, "total_steps": 204665, "loss": 0.0002, "lr": 1.477792907020647e-06, "epoch": 2.0364986685559328, "percentage": 40.73, "elapsed_time": "1:48:18", "remaining_time": "2:37:37", "throughput": 8645.45, "total_tokens": 56186600} +{"current_steps": 83365, "total_steps": 204665, "loss": 0.0156, "lr": 1.477717991257184e-06, "epoch": 2.03662081938778, "percentage": 40.73, "elapsed_time": "1:48:19", "remaining_time": "2:37:36", "throughput": 8645.5, "total_tokens": 56189928} +{"current_steps": 83370, "total_steps": 204665, "loss": 0.0004, "lr": 1.4776430720196275e-06, "epoch": 2.036742970219627, "percentage": 40.73, "elapsed_time": "1:48:19", "remaining_time": "2:37:36", "throughput": 8645.52, "total_tokens": 56193000} +{"current_steps": 83375, "total_steps": 204665, "loss": 0.001, "lr": 1.4775681493085218e-06, "epoch": 2.0368651210514743, "percentage": 40.74, "elapsed_time": "1:48:20", "remaining_time": "2:37:35", "throughput": 8645.58, "total_tokens": 56196456} +{"current_steps": 83380, "total_steps": 204665, "loss": 0.0004, "lr": 1.4774932231244125e-06, "epoch": 2.0369872718833215, "percentage": 40.74, "elapsed_time": "1:48:20", "remaining_time": "2:37:35", "throughput": 8645.7, "total_tokens": 56200360} +{"current_steps": 83385, "total_steps": 204665, "loss": 0.0095, "lr": 1.4774182934678438e-06, "epoch": 2.0371094227151687, "percentage": 40.74, "elapsed_time": "1:48:20", "remaining_time": "2:37:35", "throughput": 8645.78, "total_tokens": 56203944} +{"current_steps": 83390, "total_steps": 204665, "loss": 0.0367, "lr": 1.477343360339361e-06, "epoch": 2.037231573547016, "percentage": 40.74, "elapsed_time": "1:48:21", "remaining_time": "2:37:34", "throughput": 8645.91, "total_tokens": 56207848} +{"current_steps": 83395, "total_steps": 204665, "loss": 0.0001, "lr": 1.4772684237395088e-06, "epoch": 2.037353724378863, "percentage": 40.75, "elapsed_time": "1:48:21", "remaining_time": "2:37:34", "throughput": 8646.04, "total_tokens": 56211880} +{"current_steps": 83400, "total_steps": 204665, "loss": 0.0001, "lr": 1.4771934836688322e-06, "epoch": 2.0374758752107103, "percentage": 40.75, "elapsed_time": "1:48:21", "remaining_time": "2:37:33", "throughput": 8646.08, "total_tokens": 56215144} +{"current_steps": 83405, "total_steps": 204665, "loss": 0.0001, "lr": 1.477118540127876e-06, "epoch": 2.0375980260425575, "percentage": 40.75, "elapsed_time": "1:48:22", "remaining_time": "2:37:33", "throughput": 8646.16, "total_tokens": 56218664} +{"current_steps": 83410, "total_steps": 204665, "loss": 0.0437, "lr": 1.477043593117186e-06, "epoch": 2.0377201768744047, "percentage": 40.75, "elapsed_time": "1:48:22", "remaining_time": "2:37:32", "throughput": 8646.28, "total_tokens": 56222568} +{"current_steps": 83415, "total_steps": 204665, "loss": 0.0001, "lr": 1.4769686426373065e-06, "epoch": 2.037842327706252, "percentage": 40.76, "elapsed_time": "1:48:22", "remaining_time": "2:37:32", "throughput": 8646.41, "total_tokens": 56226536} +{"current_steps": 83420, "total_steps": 204665, "loss": 0.059, "lr": 1.4768936886887826e-06, "epoch": 2.037964478538099, "percentage": 40.76, "elapsed_time": "1:48:23", "remaining_time": "2:37:31", "throughput": 8646.49, "total_tokens": 56230120} +{"current_steps": 83425, "total_steps": 204665, "loss": 0.0001, "lr": 1.4768187312721598e-06, "epoch": 2.0380866293699462, "percentage": 40.76, "elapsed_time": "1:48:23", "remaining_time": "2:37:31", "throughput": 8646.53, "total_tokens": 56233384} +{"current_steps": 83430, "total_steps": 204665, "loss": 0.0003, "lr": 1.4767437703879825e-06, "epoch": 2.038208780201793, "percentage": 40.76, "elapsed_time": "1:48:23", "remaining_time": "2:37:31", "throughput": 8646.6, "total_tokens": 56236904} +{"current_steps": 83435, "total_steps": 204665, "loss": 0.0001, "lr": 1.4766688060367965e-06, "epoch": 2.03833093103364, "percentage": 40.77, "elapsed_time": "1:48:24", "remaining_time": "2:37:30", "throughput": 8646.61, "total_tokens": 56239848} +{"current_steps": 83440, "total_steps": 204665, "loss": 0.0003, "lr": 1.4765938382191468e-06, "epoch": 2.0384530818654873, "percentage": 40.77, "elapsed_time": "1:48:24", "remaining_time": "2:37:30", "throughput": 8646.66, "total_tokens": 56243176} +{"current_steps": 83445, "total_steps": 204665, "loss": 0.0522, "lr": 1.4765188669355784e-06, "epoch": 2.0385752326973345, "percentage": 40.77, "elapsed_time": "1:48:24", "remaining_time": "2:37:29", "throughput": 8646.75, "total_tokens": 56246760} +{"current_steps": 83450, "total_steps": 204665, "loss": 0.0001, "lr": 1.4764438921866367e-06, "epoch": 2.0386973835291817, "percentage": 40.77, "elapsed_time": "1:48:25", "remaining_time": "2:37:29", "throughput": 8646.74, "total_tokens": 56249640} +{"current_steps": 83455, "total_steps": 204665, "loss": 0.0604, "lr": 1.476368913972867e-06, "epoch": 2.038819534361029, "percentage": 40.78, "elapsed_time": "1:48:25", "remaining_time": "2:37:28", "throughput": 8646.75, "total_tokens": 56252648} +{"current_steps": 83460, "total_steps": 204665, "loss": 0.0002, "lr": 1.4762939322948142e-06, "epoch": 2.038941685192876, "percentage": 40.78, "elapsed_time": "1:48:25", "remaining_time": "2:37:28", "throughput": 8646.77, "total_tokens": 56255720} +{"current_steps": 83465, "total_steps": 204665, "loss": 0.1104, "lr": 1.4762189471530237e-06, "epoch": 2.0390638360247233, "percentage": 40.78, "elapsed_time": "1:48:26", "remaining_time": "2:37:27", "throughput": 8646.76, "total_tokens": 56258536} +{"current_steps": 83470, "total_steps": 204665, "loss": 0.1059, "lr": 1.4761439585480413e-06, "epoch": 2.0391859868565705, "percentage": 40.78, "elapsed_time": "1:48:26", "remaining_time": "2:37:27", "throughput": 8646.86, "total_tokens": 56262248} +{"current_steps": 83475, "total_steps": 204665, "loss": 0.0003, "lr": 1.4760689664804117e-06, "epoch": 2.0393081376884177, "percentage": 40.79, "elapsed_time": "1:48:27", "remaining_time": "2:37:26", "throughput": 8647.02, "total_tokens": 56266408} +{"current_steps": 83480, "total_steps": 204665, "loss": 0.0001, "lr": 1.4759939709506808e-06, "epoch": 2.039430288520265, "percentage": 40.79, "elapsed_time": "1:48:27", "remaining_time": "2:37:26", "throughput": 8647.09, "total_tokens": 56269928} +{"current_steps": 83485, "total_steps": 204665, "loss": 0.0291, "lr": 1.4759189719593936e-06, "epoch": 2.039552439352112, "percentage": 40.79, "elapsed_time": "1:48:27", "remaining_time": "2:37:26", "throughput": 8647.2, "total_tokens": 56273704} +{"current_steps": 83490, "total_steps": 204665, "loss": 0.0007, "lr": 1.4758439695070956e-06, "epoch": 2.0396745901839592, "percentage": 40.79, "elapsed_time": "1:48:28", "remaining_time": "2:37:25", "throughput": 8647.28, "total_tokens": 56277288} +{"current_steps": 83495, "total_steps": 204665, "loss": 0.0345, "lr": 1.475768963594332e-06, "epoch": 2.0397967410158064, "percentage": 40.8, "elapsed_time": "1:48:28", "remaining_time": "2:37:25", "throughput": 8647.35, "total_tokens": 56280744} +{"current_steps": 83500, "total_steps": 204665, "loss": 0.0004, "lr": 1.4756939542216488e-06, "epoch": 2.0399188918476536, "percentage": 40.8, "elapsed_time": "1:48:28", "remaining_time": "2:37:24", "throughput": 8647.4, "total_tokens": 56284008} +{"current_steps": 83505, "total_steps": 204665, "loss": 0.0383, "lr": 1.4756189413895912e-06, "epoch": 2.040041042679501, "percentage": 40.8, "elapsed_time": "1:48:29", "remaining_time": "2:37:24", "throughput": 8647.45, "total_tokens": 56287400} +{"current_steps": 83510, "total_steps": 204665, "loss": 0.0001, "lr": 1.4755439250987046e-06, "epoch": 2.040163193511348, "percentage": 40.8, "elapsed_time": "1:48:29", "remaining_time": "2:37:23", "throughput": 8647.48, "total_tokens": 56290536} +{"current_steps": 83515, "total_steps": 204665, "loss": 0.0001, "lr": 1.475468905349535e-06, "epoch": 2.040285344343195, "percentage": 40.81, "elapsed_time": "1:48:29", "remaining_time": "2:37:23", "throughput": 8647.53, "total_tokens": 56293864} +{"current_steps": 83520, "total_steps": 204665, "loss": 0.0621, "lr": 1.4753938821426274e-06, "epoch": 2.040407495175042, "percentage": 40.81, "elapsed_time": "1:48:30", "remaining_time": "2:37:22", "throughput": 8647.54, "total_tokens": 56296872} +{"current_steps": 83525, "total_steps": 204665, "loss": 0.0216, "lr": 1.4753188554785276e-06, "epoch": 2.040529646006889, "percentage": 40.81, "elapsed_time": "1:48:30", "remaining_time": "2:37:22", "throughput": 8647.57, "total_tokens": 56300008} +{"current_steps": 83530, "total_steps": 204665, "loss": 0.0002, "lr": 1.4752438253577816e-06, "epoch": 2.0406517968387363, "percentage": 40.81, "elapsed_time": "1:48:30", "remaining_time": "2:37:22", "throughput": 8647.62, "total_tokens": 56303272} +{"current_steps": 83535, "total_steps": 204665, "loss": 0.0003, "lr": 1.4751687917809342e-06, "epoch": 2.0407739476705835, "percentage": 40.82, "elapsed_time": "1:48:31", "remaining_time": "2:37:21", "throughput": 8647.68, "total_tokens": 56306664} +{"current_steps": 83540, "total_steps": 204665, "loss": 0.0503, "lr": 1.4750937547485316e-06, "epoch": 2.0408960985024307, "percentage": 40.82, "elapsed_time": "1:48:31", "remaining_time": "2:37:21", "throughput": 8647.68, "total_tokens": 56309608} +{"current_steps": 83545, "total_steps": 204665, "loss": 0.083, "lr": 1.4750187142611195e-06, "epoch": 2.041018249334278, "percentage": 40.82, "elapsed_time": "1:48:31", "remaining_time": "2:37:20", "throughput": 8647.79, "total_tokens": 56313384} +{"current_steps": 83550, "total_steps": 204665, "loss": 0.0003, "lr": 1.4749436703192436e-06, "epoch": 2.041140400166125, "percentage": 40.82, "elapsed_time": "1:48:32", "remaining_time": "2:37:20", "throughput": 8647.8, "total_tokens": 56316392} +{"current_steps": 83555, "total_steps": 204665, "loss": 0.0001, "lr": 1.4748686229234497e-06, "epoch": 2.0412625509979723, "percentage": 40.83, "elapsed_time": "1:48:32", "remaining_time": "2:37:19", "throughput": 8647.86, "total_tokens": 56319784} +{"current_steps": 83560, "total_steps": 204665, "loss": 0.1114, "lr": 1.474793572074283e-06, "epoch": 2.0413847018298195, "percentage": 40.83, "elapsed_time": "1:48:32", "remaining_time": "2:37:19", "throughput": 8647.88, "total_tokens": 56322856} +{"current_steps": 83565, "total_steps": 204665, "loss": 0.0002, "lr": 1.47471851777229e-06, "epoch": 2.0415068526616666, "percentage": 40.83, "elapsed_time": "1:48:33", "remaining_time": "2:37:18", "throughput": 8647.9, "total_tokens": 56325928} +{"current_steps": 83570, "total_steps": 204665, "loss": 0.0001, "lr": 1.4746434600180165e-06, "epoch": 2.041629003493514, "percentage": 40.83, "elapsed_time": "1:48:33", "remaining_time": "2:37:18", "throughput": 8647.92, "total_tokens": 56329000} +{"current_steps": 83575, "total_steps": 204665, "loss": 0.0626, "lr": 1.4745683988120079e-06, "epoch": 2.041751154325361, "percentage": 40.84, "elapsed_time": "1:48:33", "remaining_time": "2:37:17", "throughput": 8647.98, "total_tokens": 56332392} +{"current_steps": 83580, "total_steps": 204665, "loss": 0.0003, "lr": 1.4744933341548105e-06, "epoch": 2.041873305157208, "percentage": 40.84, "elapsed_time": "1:48:34", "remaining_time": "2:37:17", "throughput": 8648.03, "total_tokens": 56335720} +{"current_steps": 83585, "total_steps": 204665, "loss": 0.0433, "lr": 1.4744182660469697e-06, "epoch": 2.0419954559890554, "percentage": 40.84, "elapsed_time": "1:48:34", "remaining_time": "2:37:16", "throughput": 8648.08, "total_tokens": 56338984} +{"current_steps": 83590, "total_steps": 204665, "loss": 0.0005, "lr": 1.4743431944890315e-06, "epoch": 2.0421176068209026, "percentage": 40.84, "elapsed_time": "1:48:34", "remaining_time": "2:37:16", "throughput": 8648.15, "total_tokens": 56342504} +{"current_steps": 83595, "total_steps": 204665, "loss": 0.107, "lr": 1.4742681194815423e-06, "epoch": 2.04223975765275, "percentage": 40.84, "elapsed_time": "1:48:35", "remaining_time": "2:37:16", "throughput": 8648.23, "total_tokens": 56346088} +{"current_steps": 83600, "total_steps": 204665, "loss": 0.0558, "lr": 1.4741930410250477e-06, "epoch": 2.042361908484597, "percentage": 40.85, "elapsed_time": "1:48:35", "remaining_time": "2:37:15", "throughput": 8648.32, "total_tokens": 56349672} +{"current_steps": 83605, "total_steps": 204665, "loss": 0.0592, "lr": 1.4741179591200936e-06, "epoch": 2.042484059316444, "percentage": 40.85, "elapsed_time": "1:48:36", "remaining_time": "2:37:15", "throughput": 8648.37, "total_tokens": 56353064} +{"current_steps": 83610, "total_steps": 204665, "loss": 0.0792, "lr": 1.4740428737672263e-06, "epoch": 2.042606210148291, "percentage": 40.85, "elapsed_time": "1:48:36", "remaining_time": "2:37:14", "throughput": 8648.39, "total_tokens": 56356072} +{"current_steps": 83615, "total_steps": 204665, "loss": 0.0275, "lr": 1.4739677849669919e-06, "epoch": 2.042728360980138, "percentage": 40.85, "elapsed_time": "1:48:36", "remaining_time": "2:37:14", "throughput": 8648.47, "total_tokens": 56359656} +{"current_steps": 83620, "total_steps": 204665, "loss": 0.0653, "lr": 1.4738926927199358e-06, "epoch": 2.0428505118119853, "percentage": 40.86, "elapsed_time": "1:48:37", "remaining_time": "2:37:13", "throughput": 8648.53, "total_tokens": 56363048} +{"current_steps": 83625, "total_steps": 204665, "loss": 0.0515, "lr": 1.473817597026605e-06, "epoch": 2.0429726626438325, "percentage": 40.86, "elapsed_time": "1:48:37", "remaining_time": "2:37:13", "throughput": 8648.61, "total_tokens": 56366632} +{"current_steps": 83630, "total_steps": 204665, "loss": 0.0004, "lr": 1.4737424978875453e-06, "epoch": 2.0430948134756797, "percentage": 40.86, "elapsed_time": "1:48:37", "remaining_time": "2:37:12", "throughput": 8648.68, "total_tokens": 56370152} +{"current_steps": 83635, "total_steps": 204665, "loss": 0.0693, "lr": 1.4736673953033023e-06, "epoch": 2.043216964307527, "percentage": 40.86, "elapsed_time": "1:48:38", "remaining_time": "2:37:12", "throughput": 8648.72, "total_tokens": 56373416} +{"current_steps": 83640, "total_steps": 204665, "loss": 0.0005, "lr": 1.473592289274423e-06, "epoch": 2.043339115139374, "percentage": 40.87, "elapsed_time": "1:48:38", "remaining_time": "2:37:12", "throughput": 8648.75, "total_tokens": 56376488} +{"current_steps": 83645, "total_steps": 204665, "loss": 0.0482, "lr": 1.473517179801453e-06, "epoch": 2.0434612659712212, "percentage": 40.87, "elapsed_time": "1:48:38", "remaining_time": "2:37:11", "throughput": 8648.77, "total_tokens": 56379624} +{"current_steps": 83650, "total_steps": 204665, "loss": 0.0008, "lr": 1.4734420668849384e-06, "epoch": 2.0435834168030684, "percentage": 40.87, "elapsed_time": "1:48:39", "remaining_time": "2:37:11", "throughput": 8648.8, "total_tokens": 56382760} +{"current_steps": 83655, "total_steps": 204665, "loss": 0.0539, "lr": 1.4733669505254263e-06, "epoch": 2.0437055676349156, "percentage": 40.87, "elapsed_time": "1:48:39", "remaining_time": "2:37:10", "throughput": 8648.95, "total_tokens": 56386856} +{"current_steps": 83660, "total_steps": 204665, "loss": 0.0019, "lr": 1.473291830723462e-06, "epoch": 2.043827718466763, "percentage": 40.88, "elapsed_time": "1:48:39", "remaining_time": "2:37:10", "throughput": 8649.03, "total_tokens": 56390504} +{"current_steps": 83665, "total_steps": 204665, "loss": 0.0344, "lr": 1.4732167074795925e-06, "epoch": 2.04394986929861, "percentage": 40.88, "elapsed_time": "1:48:40", "remaining_time": "2:37:09", "throughput": 8649.04, "total_tokens": 56393448} +{"current_steps": 83670, "total_steps": 204665, "loss": 0.0002, "lr": 1.4731415807943638e-06, "epoch": 2.044072020130457, "percentage": 40.88, "elapsed_time": "1:48:40", "remaining_time": "2:37:09", "throughput": 8649.15, "total_tokens": 56397288} +{"current_steps": 83675, "total_steps": 204665, "loss": 0.0003, "lr": 1.4730664506683219e-06, "epoch": 2.0441941709623044, "percentage": 40.88, "elapsed_time": "1:48:40", "remaining_time": "2:37:08", "throughput": 8649.22, "total_tokens": 56400744} +{"current_steps": 83680, "total_steps": 204665, "loss": 0.0002, "lr": 1.4729913171020138e-06, "epoch": 2.0443163217941516, "percentage": 40.89, "elapsed_time": "1:48:41", "remaining_time": "2:37:08", "throughput": 8649.28, "total_tokens": 56404200} +{"current_steps": 83685, "total_steps": 204665, "loss": 0.0814, "lr": 1.4729161800959857e-06, "epoch": 2.0444384726259988, "percentage": 40.89, "elapsed_time": "1:48:41", "remaining_time": "2:37:08", "throughput": 8649.3, "total_tokens": 56407272} +{"current_steps": 83690, "total_steps": 204665, "loss": 0.0003, "lr": 1.4728410396507839e-06, "epoch": 2.044560623457846, "percentage": 40.89, "elapsed_time": "1:48:41", "remaining_time": "2:37:07", "throughput": 8649.32, "total_tokens": 56410344} +{"current_steps": 83695, "total_steps": 204665, "loss": 0.0005, "lr": 1.4727658957669548e-06, "epoch": 2.0446827742896927, "percentage": 40.89, "elapsed_time": "1:48:42", "remaining_time": "2:37:07", "throughput": 8649.35, "total_tokens": 56413480} +{"current_steps": 83700, "total_steps": 204665, "loss": 0.0, "lr": 1.472690748445045e-06, "epoch": 2.04480492512154, "percentage": 40.9, "elapsed_time": "1:48:42", "remaining_time": "2:37:06", "throughput": 8649.39, "total_tokens": 56416680} +{"current_steps": 83705, "total_steps": 204665, "loss": 0.0007, "lr": 1.4726155976856012e-06, "epoch": 2.044927075953387, "percentage": 40.9, "elapsed_time": "1:48:42", "remaining_time": "2:37:06", "throughput": 8649.45, "total_tokens": 56420072} +{"current_steps": 83710, "total_steps": 204665, "loss": 0.0592, "lr": 1.4725404434891693e-06, "epoch": 2.0450492267852343, "percentage": 40.9, "elapsed_time": "1:48:43", "remaining_time": "2:37:05", "throughput": 8649.54, "total_tokens": 56423784} +{"current_steps": 83715, "total_steps": 204665, "loss": 0.0205, "lr": 1.472465285856296e-06, "epoch": 2.0451713776170815, "percentage": 40.9, "elapsed_time": "1:48:43", "remaining_time": "2:37:05", "throughput": 8649.66, "total_tokens": 56427688} +{"current_steps": 83720, "total_steps": 204665, "loss": 0.0001, "lr": 1.4723901247875283e-06, "epoch": 2.0452935284489286, "percentage": 40.91, "elapsed_time": "1:48:44", "remaining_time": "2:37:04", "throughput": 8649.7, "total_tokens": 56430888} +{"current_steps": 83725, "total_steps": 204665, "loss": 0.0001, "lr": 1.4723149602834127e-06, "epoch": 2.045415679280776, "percentage": 40.91, "elapsed_time": "1:48:44", "remaining_time": "2:37:04", "throughput": 8649.74, "total_tokens": 56434152} +{"current_steps": 83730, "total_steps": 204665, "loss": 0.0, "lr": 1.4722397923444955e-06, "epoch": 2.045537830112623, "percentage": 40.91, "elapsed_time": "1:48:44", "remaining_time": "2:37:03", "throughput": 8649.79, "total_tokens": 56437416} +{"current_steps": 83735, "total_steps": 204665, "loss": 0.0, "lr": 1.4721646209713239e-06, "epoch": 2.04565998094447, "percentage": 40.91, "elapsed_time": "1:48:45", "remaining_time": "2:37:03", "throughput": 8649.84, "total_tokens": 56440744} +{"current_steps": 83740, "total_steps": 204665, "loss": 0.0001, "lr": 1.472089446164444e-06, "epoch": 2.0457821317763174, "percentage": 40.92, "elapsed_time": "1:48:45", "remaining_time": "2:37:03", "throughput": 8649.85, "total_tokens": 56443752} +{"current_steps": 83745, "total_steps": 204665, "loss": 0.0003, "lr": 1.4720142679244022e-06, "epoch": 2.0459042826081646, "percentage": 40.92, "elapsed_time": "1:48:45", "remaining_time": "2:37:02", "throughput": 8649.89, "total_tokens": 56447016} +{"current_steps": 83750, "total_steps": 204665, "loss": 0.049, "lr": 1.471939086251746e-06, "epoch": 2.046026433440012, "percentage": 40.92, "elapsed_time": "1:48:46", "remaining_time": "2:37:02", "throughput": 8649.97, "total_tokens": 56450536} +{"current_steps": 83755, "total_steps": 204665, "loss": 0.0, "lr": 1.471863901147022e-06, "epoch": 2.046148584271859, "percentage": 40.92, "elapsed_time": "1:48:46", "remaining_time": "2:37:01", "throughput": 8650.02, "total_tokens": 56453864} +{"current_steps": 83760, "total_steps": 204665, "loss": 0.062, "lr": 1.4717887126107766e-06, "epoch": 2.046270735103706, "percentage": 40.93, "elapsed_time": "1:48:46", "remaining_time": "2:37:01", "throughput": 8650.13, "total_tokens": 56457640} +{"current_steps": 83765, "total_steps": 204665, "loss": 0.0396, "lr": 1.471713520643557e-06, "epoch": 2.0463928859355534, "percentage": 40.93, "elapsed_time": "1:48:47", "remaining_time": "2:37:00", "throughput": 8650.21, "total_tokens": 56461224} +{"current_steps": 83770, "total_steps": 204665, "loss": 0.0001, "lr": 1.4716383252459096e-06, "epoch": 2.0465150367674005, "percentage": 40.93, "elapsed_time": "1:48:47", "remaining_time": "2:37:00", "throughput": 8650.28, "total_tokens": 56464680} +{"current_steps": 83775, "total_steps": 204665, "loss": 0.0774, "lr": 1.4715631264183812e-06, "epoch": 2.0466371875992477, "percentage": 40.93, "elapsed_time": "1:48:47", "remaining_time": "2:36:59", "throughput": 8650.42, "total_tokens": 56468648} +{"current_steps": 83780, "total_steps": 204665, "loss": 0.0593, "lr": 1.4714879241615195e-06, "epoch": 2.046759338431095, "percentage": 40.94, "elapsed_time": "1:48:48", "remaining_time": "2:36:59", "throughput": 8650.48, "total_tokens": 56472040} +{"current_steps": 83785, "total_steps": 204665, "loss": 0.0001, "lr": 1.4714127184758703e-06, "epoch": 2.0468814892629417, "percentage": 40.94, "elapsed_time": "1:48:48", "remaining_time": "2:36:58", "throughput": 8650.51, "total_tokens": 56475240} +{"current_steps": 83790, "total_steps": 204665, "loss": 0.0007, "lr": 1.4713375093619812e-06, "epoch": 2.047003640094789, "percentage": 40.94, "elapsed_time": "1:48:48", "remaining_time": "2:36:58", "throughput": 8650.58, "total_tokens": 56478760} +{"current_steps": 83795, "total_steps": 204665, "loss": 0.0231, "lr": 1.471262296820399e-06, "epoch": 2.047125790926636, "percentage": 40.94, "elapsed_time": "1:48:49", "remaining_time": "2:36:58", "throughput": 8650.68, "total_tokens": 56482408} +{"current_steps": 83800, "total_steps": 204665, "loss": 0.0009, "lr": 1.4711870808516706e-06, "epoch": 2.0472479417584832, "percentage": 40.94, "elapsed_time": "1:48:49", "remaining_time": "2:36:57", "throughput": 8650.76, "total_tokens": 56486056} +{"current_steps": 83805, "total_steps": 204665, "loss": 0.0644, "lr": 1.4711118614563427e-06, "epoch": 2.0473700925903304, "percentage": 40.95, "elapsed_time": "1:48:49", "remaining_time": "2:36:57", "throughput": 8650.86, "total_tokens": 56489768} +{"current_steps": 83810, "total_steps": 204665, "loss": 0.0003, "lr": 1.4710366386349631e-06, "epoch": 2.0474922434221776, "percentage": 40.95, "elapsed_time": "1:48:50", "remaining_time": "2:36:56", "throughput": 8650.89, "total_tokens": 56492904} +{"current_steps": 83815, "total_steps": 204665, "loss": 0.0652, "lr": 1.4709614123880783e-06, "epoch": 2.047614394254025, "percentage": 40.95, "elapsed_time": "1:48:50", "remaining_time": "2:36:56", "throughput": 8650.94, "total_tokens": 56496168} +{"current_steps": 83820, "total_steps": 204665, "loss": 0.043, "lr": 1.470886182716235e-06, "epoch": 2.047736545085872, "percentage": 40.95, "elapsed_time": "1:48:50", "remaining_time": "2:36:55", "throughput": 8651.0, "total_tokens": 56499624} +{"current_steps": 83825, "total_steps": 204665, "loss": 0.044, "lr": 1.4708109496199815e-06, "epoch": 2.047858695917719, "percentage": 40.96, "elapsed_time": "1:48:51", "remaining_time": "2:36:55", "throughput": 8651.04, "total_tokens": 56502888} +{"current_steps": 83830, "total_steps": 204665, "loss": 0.0002, "lr": 1.4707357130998635e-06, "epoch": 2.0479808467495664, "percentage": 40.96, "elapsed_time": "1:48:51", "remaining_time": "2:36:54", "throughput": 8651.09, "total_tokens": 56506152} +{"current_steps": 83835, "total_steps": 204665, "loss": 0.1097, "lr": 1.470660473156429e-06, "epoch": 2.0481029975814136, "percentage": 40.96, "elapsed_time": "1:48:52", "remaining_time": "2:36:54", "throughput": 8651.13, "total_tokens": 56509416} +{"current_steps": 83840, "total_steps": 204665, "loss": 0.1012, "lr": 1.4705852297902248e-06, "epoch": 2.0482251484132608, "percentage": 40.96, "elapsed_time": "1:48:52", "remaining_time": "2:36:54", "throughput": 8651.25, "total_tokens": 56513320} +{"current_steps": 83845, "total_steps": 204665, "loss": 0.1403, "lr": 1.4705099830017983e-06, "epoch": 2.048347299245108, "percentage": 40.97, "elapsed_time": "1:48:52", "remaining_time": "2:36:53", "throughput": 8651.31, "total_tokens": 56516776} +{"current_steps": 83850, "total_steps": 204665, "loss": 0.1199, "lr": 1.470434732791697e-06, "epoch": 2.048469450076955, "percentage": 40.97, "elapsed_time": "1:48:53", "remaining_time": "2:36:53", "throughput": 8651.38, "total_tokens": 56520232} +{"current_steps": 83855, "total_steps": 204665, "loss": 0.0007, "lr": 1.4703594791604674e-06, "epoch": 2.0485916009088023, "percentage": 40.97, "elapsed_time": "1:48:53", "remaining_time": "2:36:52", "throughput": 8651.44, "total_tokens": 56523688} +{"current_steps": 83860, "total_steps": 204665, "loss": 0.0463, "lr": 1.4702842221086573e-06, "epoch": 2.0487137517406495, "percentage": 40.97, "elapsed_time": "1:48:53", "remaining_time": "2:36:52", "throughput": 8651.48, "total_tokens": 56526888} +{"current_steps": 83865, "total_steps": 204665, "loss": 0.0004, "lr": 1.470208961636814e-06, "epoch": 2.0488359025724967, "percentage": 40.98, "elapsed_time": "1:48:54", "remaining_time": "2:36:51", "throughput": 8651.52, "total_tokens": 56530152} +{"current_steps": 83870, "total_steps": 204665, "loss": 0.0006, "lr": 1.4701336977454841e-06, "epoch": 2.048958053404344, "percentage": 40.98, "elapsed_time": "1:48:54", "remaining_time": "2:36:51", "throughput": 8651.61, "total_tokens": 56533800} +{"current_steps": 83875, "total_steps": 204665, "loss": 0.0003, "lr": 1.470058430435216e-06, "epoch": 2.0490802042361906, "percentage": 40.98, "elapsed_time": "1:48:54", "remaining_time": "2:36:50", "throughput": 8651.71, "total_tokens": 56537512} +{"current_steps": 83880, "total_steps": 204665, "loss": 0.0003, "lr": 1.4699831597065565e-06, "epoch": 2.049202355068038, "percentage": 40.98, "elapsed_time": "1:48:55", "remaining_time": "2:36:50", "throughput": 8651.77, "total_tokens": 56540904} +{"current_steps": 83885, "total_steps": 204665, "loss": 0.0006, "lr": 1.469907885560053e-06, "epoch": 2.049324505899885, "percentage": 40.99, "elapsed_time": "1:48:55", "remaining_time": "2:36:50", "throughput": 8651.79, "total_tokens": 56544040} +{"current_steps": 83890, "total_steps": 204665, "loss": 0.0098, "lr": 1.4698326079962532e-06, "epoch": 2.049446656731732, "percentage": 40.99, "elapsed_time": "1:48:55", "remaining_time": "2:36:49", "throughput": 8651.85, "total_tokens": 56547368} +{"current_steps": 83895, "total_steps": 204665, "loss": 0.0327, "lr": 1.4697573270157038e-06, "epoch": 2.0495688075635794, "percentage": 40.99, "elapsed_time": "1:48:56", "remaining_time": "2:36:49", "throughput": 8651.9, "total_tokens": 56550696} +{"current_steps": 83900, "total_steps": 204665, "loss": 0.0614, "lr": 1.469682042618953e-06, "epoch": 2.0496909583954266, "percentage": 40.99, "elapsed_time": "1:48:56", "remaining_time": "2:36:48", "throughput": 8651.94, "total_tokens": 56553960} +{"current_steps": 83905, "total_steps": 204665, "loss": 0.0002, "lr": 1.469606754806548e-06, "epoch": 2.0498131092272738, "percentage": 41.0, "elapsed_time": "1:48:56", "remaining_time": "2:36:48", "throughput": 8651.97, "total_tokens": 56557096} +{"current_steps": 83910, "total_steps": 204665, "loss": 0.0003, "lr": 1.4695314635790366e-06, "epoch": 2.049935260059121, "percentage": 41.0, "elapsed_time": "1:48:57", "remaining_time": "2:36:47", "throughput": 8652.0, "total_tokens": 56560232} +{"current_steps": 83915, "total_steps": 204665, "loss": 0.0001, "lr": 1.4694561689369657e-06, "epoch": 2.050057410890968, "percentage": 41.0, "elapsed_time": "1:48:57", "remaining_time": "2:36:47", "throughput": 8652.01, "total_tokens": 56563240} +{"current_steps": 83920, "total_steps": 204665, "loss": 0.0111, "lr": 1.4693808708808837e-06, "epoch": 2.0501795617228153, "percentage": 41.0, "elapsed_time": "1:48:57", "remaining_time": "2:36:46", "throughput": 8652.12, "total_tokens": 56567016} +{"current_steps": 83925, "total_steps": 204665, "loss": 0.0002, "lr": 1.4693055694113377e-06, "epoch": 2.0503017125546625, "percentage": 41.01, "elapsed_time": "1:48:58", "remaining_time": "2:36:46", "throughput": 8652.17, "total_tokens": 56570408} +{"current_steps": 83930, "total_steps": 204665, "loss": 0.0001, "lr": 1.469230264528875e-06, "epoch": 2.0504238633865097, "percentage": 41.01, "elapsed_time": "1:48:58", "remaining_time": "2:36:45", "throughput": 8652.21, "total_tokens": 56573608} +{"current_steps": 83935, "total_steps": 204665, "loss": 0.0001, "lr": 1.469154956234044e-06, "epoch": 2.050546014218357, "percentage": 41.01, "elapsed_time": "1:48:58", "remaining_time": "2:36:45", "throughput": 8652.25, "total_tokens": 56576872} +{"current_steps": 83940, "total_steps": 204665, "loss": 0.0714, "lr": 1.4690796445273918e-06, "epoch": 2.050668165050204, "percentage": 41.01, "elapsed_time": "1:48:59", "remaining_time": "2:36:45", "throughput": 8652.3, "total_tokens": 56580200} +{"current_steps": 83945, "total_steps": 204665, "loss": 0.0373, "lr": 1.4690043294094665e-06, "epoch": 2.0507903158820513, "percentage": 41.02, "elapsed_time": "1:48:59", "remaining_time": "2:36:44", "throughput": 8652.38, "total_tokens": 56583784} +{"current_steps": 83950, "total_steps": 204665, "loss": 0.0004, "lr": 1.4689290108808152e-06, "epoch": 2.0509124667138985, "percentage": 41.02, "elapsed_time": "1:49:00", "remaining_time": "2:36:44", "throughput": 8652.41, "total_tokens": 56586920} +{"current_steps": 83955, "total_steps": 204665, "loss": 0.1476, "lr": 1.4688536889419861e-06, "epoch": 2.0510346175457457, "percentage": 41.02, "elapsed_time": "1:49:00", "remaining_time": "2:36:43", "throughput": 8652.42, "total_tokens": 56589928} +{"current_steps": 83960, "total_steps": 204665, "loss": 0.0411, "lr": 1.468778363593527e-06, "epoch": 2.051156768377593, "percentage": 41.02, "elapsed_time": "1:49:00", "remaining_time": "2:36:43", "throughput": 8652.49, "total_tokens": 56593384} +{"current_steps": 83965, "total_steps": 204665, "loss": 0.0005, "lr": 1.4687030348359855e-06, "epoch": 2.0512789192094396, "percentage": 41.03, "elapsed_time": "1:49:01", "remaining_time": "2:36:42", "throughput": 8652.53, "total_tokens": 56596648} +{"current_steps": 83970, "total_steps": 204665, "loss": 0.0247, "lr": 1.4686277026699094e-06, "epoch": 2.051401070041287, "percentage": 41.03, "elapsed_time": "1:49:01", "remaining_time": "2:36:42", "throughput": 8652.64, "total_tokens": 56600488} +{"current_steps": 83975, "total_steps": 204665, "loss": 0.0652, "lr": 1.4685523670958466e-06, "epoch": 2.051523220873134, "percentage": 41.03, "elapsed_time": "1:49:01", "remaining_time": "2:36:41", "throughput": 8652.68, "total_tokens": 56603752} +{"current_steps": 83980, "total_steps": 204665, "loss": 0.0007, "lr": 1.468477028114345e-06, "epoch": 2.051645371704981, "percentage": 41.03, "elapsed_time": "1:49:02", "remaining_time": "2:36:41", "throughput": 8652.69, "total_tokens": 56606760} +{"current_steps": 83985, "total_steps": 204665, "loss": 0.0728, "lr": 1.4684016857259524e-06, "epoch": 2.0517675225368284, "percentage": 41.04, "elapsed_time": "1:49:02", "remaining_time": "2:36:41", "throughput": 8652.76, "total_tokens": 56610280} +{"current_steps": 83990, "total_steps": 204665, "loss": 0.0004, "lr": 1.4683263399312171e-06, "epoch": 2.0518896733686756, "percentage": 41.04, "elapsed_time": "1:49:02", "remaining_time": "2:36:40", "throughput": 8652.8, "total_tokens": 56613480} +{"current_steps": 83995, "total_steps": 204665, "loss": 0.0446, "lr": 1.4682509907306863e-06, "epoch": 2.0520118242005227, "percentage": 41.04, "elapsed_time": "1:49:03", "remaining_time": "2:36:40", "throughput": 8652.84, "total_tokens": 56616744} +{"current_steps": 84000, "total_steps": 204665, "loss": 0.0003, "lr": 1.4681756381249085e-06, "epoch": 2.05213397503237, "percentage": 41.04, "elapsed_time": "1:49:03", "remaining_time": "2:36:39", "throughput": 8652.95, "total_tokens": 56620584} +{"current_steps": 84005, "total_steps": 204665, "loss": 0.0361, "lr": 1.4681002821144315e-06, "epoch": 2.052256125864217, "percentage": 41.05, "elapsed_time": "1:49:03", "remaining_time": "2:36:39", "throughput": 8653.03, "total_tokens": 56624104} +{"current_steps": 84010, "total_steps": 204665, "loss": 0.0764, "lr": 1.4680249226998033e-06, "epoch": 2.0523782766960643, "percentage": 41.05, "elapsed_time": "1:49:04", "remaining_time": "2:36:38", "throughput": 8653.09, "total_tokens": 56627560} +{"current_steps": 84015, "total_steps": 204665, "loss": 0.0414, "lr": 1.467949559881572e-06, "epoch": 2.0525004275279115, "percentage": 41.05, "elapsed_time": "1:49:04", "remaining_time": "2:36:38", "throughput": 8653.14, "total_tokens": 56630888} +{"current_steps": 84020, "total_steps": 204665, "loss": 0.0008, "lr": 1.467874193660286e-06, "epoch": 2.0526225783597587, "percentage": 41.05, "elapsed_time": "1:49:04", "remaining_time": "2:36:37", "throughput": 8653.2, "total_tokens": 56634280} +{"current_steps": 84025, "total_steps": 204665, "loss": 0.0001, "lr": 1.4677988240364922e-06, "epoch": 2.052744729191606, "percentage": 41.05, "elapsed_time": "1:49:05", "remaining_time": "2:36:37", "throughput": 8653.21, "total_tokens": 56637288} +{"current_steps": 84030, "total_steps": 204665, "loss": 0.0377, "lr": 1.4677234510107402e-06, "epoch": 2.052866880023453, "percentage": 41.06, "elapsed_time": "1:49:05", "remaining_time": "2:36:36", "throughput": 8653.25, "total_tokens": 56640552} +{"current_steps": 84035, "total_steps": 204665, "loss": 0.0002, "lr": 1.4676480745835774e-06, "epoch": 2.0529890308553003, "percentage": 41.06, "elapsed_time": "1:49:05", "remaining_time": "2:36:36", "throughput": 8653.31, "total_tokens": 56644008} +{"current_steps": 84040, "total_steps": 204665, "loss": 0.0301, "lr": 1.4675726947555519e-06, "epoch": 2.0531111816871475, "percentage": 41.06, "elapsed_time": "1:49:06", "remaining_time": "2:36:36", "throughput": 8653.35, "total_tokens": 56647208} +{"current_steps": 84045, "total_steps": 204665, "loss": 0.0034, "lr": 1.467497311527212e-06, "epoch": 2.0532333325189946, "percentage": 41.06, "elapsed_time": "1:49:06", "remaining_time": "2:36:35", "throughput": 8653.49, "total_tokens": 56651304} +{"current_steps": 84050, "total_steps": 204665, "loss": 0.0001, "lr": 1.467421924899106e-06, "epoch": 2.053355483350842, "percentage": 41.07, "elapsed_time": "1:49:06", "remaining_time": "2:36:35", "throughput": 8653.53, "total_tokens": 56654504} +{"current_steps": 84055, "total_steps": 204665, "loss": 0.0613, "lr": 1.4673465348717817e-06, "epoch": 2.0534776341826886, "percentage": 41.07, "elapsed_time": "1:49:07", "remaining_time": "2:36:34", "throughput": 8653.58, "total_tokens": 56657832} +{"current_steps": 84060, "total_steps": 204665, "loss": 0.025, "lr": 1.4672711414457879e-06, "epoch": 2.0535997850145358, "percentage": 41.07, "elapsed_time": "1:49:07", "remaining_time": "2:36:34", "throughput": 8653.7, "total_tokens": 56661736} +{"current_steps": 84065, "total_steps": 204665, "loss": 0.0004, "lr": 1.4671957446216728e-06, "epoch": 2.053721935846383, "percentage": 41.07, "elapsed_time": "1:49:08", "remaining_time": "2:36:33", "throughput": 8653.72, "total_tokens": 56664744} +{"current_steps": 84070, "total_steps": 204665, "loss": 0.0001, "lr": 1.4671203443999844e-06, "epoch": 2.05384408667823, "percentage": 41.08, "elapsed_time": "1:49:08", "remaining_time": "2:36:33", "throughput": 8653.8, "total_tokens": 56668328} +{"current_steps": 84075, "total_steps": 204665, "loss": 0.0857, "lr": 1.4670449407812715e-06, "epoch": 2.0539662375100773, "percentage": 41.08, "elapsed_time": "1:49:08", "remaining_time": "2:36:32", "throughput": 8653.82, "total_tokens": 56671400} +{"current_steps": 84080, "total_steps": 204665, "loss": 0.0003, "lr": 1.4669695337660818e-06, "epoch": 2.0540883883419245, "percentage": 41.08, "elapsed_time": "1:49:09", "remaining_time": "2:36:32", "throughput": 8653.87, "total_tokens": 56674728} +{"current_steps": 84085, "total_steps": 204665, "loss": 0.151, "lr": 1.4668941233549642e-06, "epoch": 2.0542105391737717, "percentage": 41.08, "elapsed_time": "1:49:09", "remaining_time": "2:36:32", "throughput": 8653.93, "total_tokens": 56678120} +{"current_steps": 84090, "total_steps": 204665, "loss": 0.1097, "lr": 1.4668187095484673e-06, "epoch": 2.054332690005619, "percentage": 41.09, "elapsed_time": "1:49:09", "remaining_time": "2:36:31", "throughput": 8653.98, "total_tokens": 56681448} +{"current_steps": 84095, "total_steps": 204665, "loss": 0.0005, "lr": 1.4667432923471389e-06, "epoch": 2.054454840837466, "percentage": 41.09, "elapsed_time": "1:49:10", "remaining_time": "2:36:31", "throughput": 8654.07, "total_tokens": 56685160} +{"current_steps": 84100, "total_steps": 204665, "loss": 0.0368, "lr": 1.4666678717515275e-06, "epoch": 2.0545769916693133, "percentage": 41.09, "elapsed_time": "1:49:10", "remaining_time": "2:36:30", "throughput": 8654.11, "total_tokens": 56688424} +{"current_steps": 84105, "total_steps": 204665, "loss": 0.0007, "lr": 1.4665924477621824e-06, "epoch": 2.0546991425011605, "percentage": 41.09, "elapsed_time": "1:49:10", "remaining_time": "2:36:30", "throughput": 8654.18, "total_tokens": 56691880} +{"current_steps": 84110, "total_steps": 204665, "loss": 0.0392, "lr": 1.466517020379651e-06, "epoch": 2.0548212933330077, "percentage": 41.1, "elapsed_time": "1:49:11", "remaining_time": "2:36:29", "throughput": 8654.26, "total_tokens": 56695464} +{"current_steps": 84115, "total_steps": 204665, "loss": 0.0001, "lr": 1.4664415896044826e-06, "epoch": 2.054943444164855, "percentage": 41.1, "elapsed_time": "1:49:11", "remaining_time": "2:36:29", "throughput": 8654.35, "total_tokens": 56699176} +{"current_steps": 84120, "total_steps": 204665, "loss": 0.0491, "lr": 1.466366155437225e-06, "epoch": 2.055065594996702, "percentage": 41.1, "elapsed_time": "1:49:11", "remaining_time": "2:36:28", "throughput": 8654.37, "total_tokens": 56702248} +{"current_steps": 84125, "total_steps": 204665, "loss": 0.0004, "lr": 1.4662907178784277e-06, "epoch": 2.0551877458285492, "percentage": 41.1, "elapsed_time": "1:49:12", "remaining_time": "2:36:28", "throughput": 8654.45, "total_tokens": 56705832} +{"current_steps": 84130, "total_steps": 204665, "loss": 0.0002, "lr": 1.466215276928639e-06, "epoch": 2.0553098966603964, "percentage": 41.11, "elapsed_time": "1:49:12", "remaining_time": "2:36:28", "throughput": 8654.5, "total_tokens": 56709160} +{"current_steps": 84135, "total_steps": 204665, "loss": 0.0376, "lr": 1.4661398325884074e-06, "epoch": 2.0554320474922436, "percentage": 41.11, "elapsed_time": "1:49:12", "remaining_time": "2:36:27", "throughput": 8654.61, "total_tokens": 56712936} +{"current_steps": 84140, "total_steps": 204665, "loss": 0.1215, "lr": 1.4660643848582813e-06, "epoch": 2.0555541983240904, "percentage": 41.11, "elapsed_time": "1:49:13", "remaining_time": "2:36:27", "throughput": 8654.68, "total_tokens": 56716456} +{"current_steps": 84145, "total_steps": 204665, "loss": 0.057, "lr": 1.4659889337388099e-06, "epoch": 2.0556763491559376, "percentage": 41.11, "elapsed_time": "1:49:13", "remaining_time": "2:36:26", "throughput": 8654.72, "total_tokens": 56719656} +{"current_steps": 84150, "total_steps": 204665, "loss": 0.0001, "lr": 1.4659134792305415e-06, "epoch": 2.0557984999877847, "percentage": 41.12, "elapsed_time": "1:49:13", "remaining_time": "2:36:26", "throughput": 8654.78, "total_tokens": 56723112} +{"current_steps": 84155, "total_steps": 204665, "loss": 0.0401, "lr": 1.4658380213340249e-06, "epoch": 2.055920650819632, "percentage": 41.12, "elapsed_time": "1:49:14", "remaining_time": "2:36:25", "throughput": 8654.8, "total_tokens": 56726248} +{"current_steps": 84160, "total_steps": 204665, "loss": 0.0643, "lr": 1.465762560049809e-06, "epoch": 2.056042801651479, "percentage": 41.12, "elapsed_time": "1:49:14", "remaining_time": "2:36:25", "throughput": 8654.83, "total_tokens": 56729384} +{"current_steps": 84165, "total_steps": 204665, "loss": 0.0002, "lr": 1.4656870953784426e-06, "epoch": 2.0561649524833263, "percentage": 41.12, "elapsed_time": "1:49:15", "remaining_time": "2:36:24", "throughput": 8654.95, "total_tokens": 56733288} +{"current_steps": 84170, "total_steps": 204665, "loss": 0.0004, "lr": 1.4656116273204742e-06, "epoch": 2.0562871033151735, "percentage": 41.13, "elapsed_time": "1:49:15", "remaining_time": "2:36:24", "throughput": 8654.98, "total_tokens": 56736424} +{"current_steps": 84175, "total_steps": 204665, "loss": 0.0329, "lr": 1.465536155876453e-06, "epoch": 2.0564092541470207, "percentage": 41.13, "elapsed_time": "1:49:15", "remaining_time": "2:36:23", "throughput": 8655.04, "total_tokens": 56739816} +{"current_steps": 84180, "total_steps": 204665, "loss": 0.1285, "lr": 1.4654606810469275e-06, "epoch": 2.056531404978868, "percentage": 41.13, "elapsed_time": "1:49:16", "remaining_time": "2:36:23", "throughput": 8655.09, "total_tokens": 56743208} +{"current_steps": 84185, "total_steps": 204665, "loss": 0.0529, "lr": 1.4653852028324467e-06, "epoch": 2.056653555810715, "percentage": 41.13, "elapsed_time": "1:49:16", "remaining_time": "2:36:23", "throughput": 8655.2, "total_tokens": 56746984} +{"current_steps": 84190, "total_steps": 204665, "loss": 0.0003, "lr": 1.4653097212335594e-06, "epoch": 2.0567757066425623, "percentage": 41.14, "elapsed_time": "1:49:16", "remaining_time": "2:36:22", "throughput": 8655.31, "total_tokens": 56750824} +{"current_steps": 84195, "total_steps": 204665, "loss": 0.0904, "lr": 1.465234236250815e-06, "epoch": 2.0568978574744095, "percentage": 41.14, "elapsed_time": "1:49:17", "remaining_time": "2:36:22", "throughput": 8655.4, "total_tokens": 56754472} +{"current_steps": 84200, "total_steps": 204665, "loss": 0.0169, "lr": 1.4651587478847623e-06, "epoch": 2.0570200083062566, "percentage": 41.14, "elapsed_time": "1:49:17", "remaining_time": "2:36:21", "throughput": 8655.45, "total_tokens": 56757736} +{"current_steps": 84205, "total_steps": 204665, "loss": 0.0745, "lr": 1.4650832561359496e-06, "epoch": 2.057142159138104, "percentage": 41.14, "elapsed_time": "1:49:17", "remaining_time": "2:36:21", "throughput": 8655.5, "total_tokens": 56761128} +{"current_steps": 84210, "total_steps": 204665, "loss": 0.0949, "lr": 1.4650077610049264e-06, "epoch": 2.057264309969951, "percentage": 41.15, "elapsed_time": "1:49:18", "remaining_time": "2:36:20", "throughput": 8655.56, "total_tokens": 56764584} +{"current_steps": 84215, "total_steps": 204665, "loss": 0.0992, "lr": 1.4649322624922418e-06, "epoch": 2.057386460801798, "percentage": 41.15, "elapsed_time": "1:49:18", "remaining_time": "2:36:20", "throughput": 8655.69, "total_tokens": 56768488} +{"current_steps": 84220, "total_steps": 204665, "loss": 0.0001, "lr": 1.4648567605984447e-06, "epoch": 2.0575086116336454, "percentage": 41.15, "elapsed_time": "1:49:18", "remaining_time": "2:36:19", "throughput": 8655.75, "total_tokens": 56771880} +{"current_steps": 84225, "total_steps": 204665, "loss": 0.0006, "lr": 1.4647812553240844e-06, "epoch": 2.0576307624654926, "percentage": 41.15, "elapsed_time": "1:49:19", "remaining_time": "2:36:19", "throughput": 8655.83, "total_tokens": 56775464} +{"current_steps": 84230, "total_steps": 204665, "loss": 0.0006, "lr": 1.4647057466697094e-06, "epoch": 2.0577529132973393, "percentage": 41.16, "elapsed_time": "1:49:19", "remaining_time": "2:36:19", "throughput": 8655.9, "total_tokens": 56778984} +{"current_steps": 84235, "total_steps": 204665, "loss": 0.0472, "lr": 1.4646302346358697e-06, "epoch": 2.0578750641291865, "percentage": 41.16, "elapsed_time": "1:49:19", "remaining_time": "2:36:18", "throughput": 8655.97, "total_tokens": 56782504} +{"current_steps": 84240, "total_steps": 204665, "loss": 0.0006, "lr": 1.4645547192231134e-06, "epoch": 2.0579972149610337, "percentage": 41.16, "elapsed_time": "1:49:20", "remaining_time": "2:36:18", "throughput": 8655.99, "total_tokens": 56785576} +{"current_steps": 84245, "total_steps": 204665, "loss": 0.0006, "lr": 1.4644792004319909e-06, "epoch": 2.058119365792881, "percentage": 41.16, "elapsed_time": "1:49:20", "remaining_time": "2:36:17", "throughput": 8656.04, "total_tokens": 56788968} +{"current_steps": 84250, "total_steps": 204665, "loss": 0.0503, "lr": 1.4644036782630502e-06, "epoch": 2.058241516624728, "percentage": 41.16, "elapsed_time": "1:49:20", "remaining_time": "2:36:17", "throughput": 8656.06, "total_tokens": 56792040} +{"current_steps": 84255, "total_steps": 204665, "loss": 0.11, "lr": 1.4643281527168414e-06, "epoch": 2.0583636674565753, "percentage": 41.17, "elapsed_time": "1:49:21", "remaining_time": "2:36:16", "throughput": 8656.15, "total_tokens": 56795688} +{"current_steps": 84260, "total_steps": 204665, "loss": 0.0372, "lr": 1.464252623793913e-06, "epoch": 2.0584858182884225, "percentage": 41.17, "elapsed_time": "1:49:21", "remaining_time": "2:36:16", "throughput": 8656.2, "total_tokens": 56799016} +{"current_steps": 84265, "total_steps": 204665, "loss": 0.0007, "lr": 1.464177091494815e-06, "epoch": 2.0586079691202697, "percentage": 41.17, "elapsed_time": "1:49:21", "remaining_time": "2:36:15", "throughput": 8656.21, "total_tokens": 56801960} +{"current_steps": 84270, "total_steps": 204665, "loss": 0.0004, "lr": 1.4641015558200962e-06, "epoch": 2.058730119952117, "percentage": 41.17, "elapsed_time": "1:49:22", "remaining_time": "2:36:15", "throughput": 8656.26, "total_tokens": 56805352} +{"current_steps": 84275, "total_steps": 204665, "loss": 0.0008, "lr": 1.4640260167703058e-06, "epoch": 2.058852270783964, "percentage": 41.18, "elapsed_time": "1:49:22", "remaining_time": "2:36:15", "throughput": 8656.42, "total_tokens": 56809576} +{"current_steps": 84280, "total_steps": 204665, "loss": 0.0138, "lr": 1.4639504743459937e-06, "epoch": 2.0589744216158112, "percentage": 41.18, "elapsed_time": "1:49:23", "remaining_time": "2:36:14", "throughput": 8656.47, "total_tokens": 56812904} +{"current_steps": 84285, "total_steps": 204665, "loss": 0.0002, "lr": 1.463874928547709e-06, "epoch": 2.0590965724476584, "percentage": 41.18, "elapsed_time": "1:49:23", "remaining_time": "2:36:14", "throughput": 8656.47, "total_tokens": 56815784} +{"current_steps": 84290, "total_steps": 204665, "loss": 0.0387, "lr": 1.463799379376001e-06, "epoch": 2.0592187232795056, "percentage": 41.18, "elapsed_time": "1:49:23", "remaining_time": "2:36:13", "throughput": 8656.48, "total_tokens": 56818792} +{"current_steps": 84295, "total_steps": 204665, "loss": 0.0357, "lr": 1.463723826831419e-06, "epoch": 2.059340874111353, "percentage": 41.19, "elapsed_time": "1:49:24", "remaining_time": "2:36:13", "throughput": 8656.58, "total_tokens": 56822568} +{"current_steps": 84300, "total_steps": 204665, "loss": 0.0306, "lr": 1.463648270914513e-06, "epoch": 2.0594630249432, "percentage": 41.19, "elapsed_time": "1:49:24", "remaining_time": "2:36:12", "throughput": 8656.61, "total_tokens": 56825704} +{"current_steps": 84305, "total_steps": 204665, "loss": 0.0431, "lr": 1.4635727116258316e-06, "epoch": 2.059585175775047, "percentage": 41.19, "elapsed_time": "1:49:24", "remaining_time": "2:36:12", "throughput": 8656.66, "total_tokens": 56829032} +{"current_steps": 84310, "total_steps": 204665, "loss": 0.0383, "lr": 1.4634971489659251e-06, "epoch": 2.0597073266068944, "percentage": 41.19, "elapsed_time": "1:49:25", "remaining_time": "2:36:11", "throughput": 8656.72, "total_tokens": 56832424} +{"current_steps": 84315, "total_steps": 204665, "loss": 0.0003, "lr": 1.4634215829353425e-06, "epoch": 2.0598294774387416, "percentage": 41.2, "elapsed_time": "1:49:25", "remaining_time": "2:36:11", "throughput": 8656.77, "total_tokens": 56835816} +{"current_steps": 84320, "total_steps": 204665, "loss": 0.082, "lr": 1.4633460135346334e-06, "epoch": 2.0599516282705883, "percentage": 41.2, "elapsed_time": "1:49:25", "remaining_time": "2:36:11", "throughput": 8656.84, "total_tokens": 56839272} +{"current_steps": 84325, "total_steps": 204665, "loss": 0.0002, "lr": 1.4632704407643477e-06, "epoch": 2.0600737791024355, "percentage": 41.2, "elapsed_time": "1:49:26", "remaining_time": "2:36:10", "throughput": 8656.85, "total_tokens": 56842280} +{"current_steps": 84330, "total_steps": 204665, "loss": 0.0491, "lr": 1.4631948646250347e-06, "epoch": 2.0601959299342827, "percentage": 41.2, "elapsed_time": "1:49:26", "remaining_time": "2:36:10", "throughput": 8656.89, "total_tokens": 56845544} +{"current_steps": 84335, "total_steps": 204665, "loss": 0.0378, "lr": 1.4631192851172437e-06, "epoch": 2.06031808076613, "percentage": 41.21, "elapsed_time": "1:49:26", "remaining_time": "2:36:09", "throughput": 8656.95, "total_tokens": 56849000} +{"current_steps": 84340, "total_steps": 204665, "loss": 0.1917, "lr": 1.4630437022415252e-06, "epoch": 2.060440231597977, "percentage": 41.21, "elapsed_time": "1:49:27", "remaining_time": "2:36:09", "throughput": 8657.03, "total_tokens": 56852584} +{"current_steps": 84345, "total_steps": 204665, "loss": 0.0425, "lr": 1.462968115998428e-06, "epoch": 2.0605623824298243, "percentage": 41.21, "elapsed_time": "1:49:27", "remaining_time": "2:36:08", "throughput": 8657.1, "total_tokens": 56856040} +{"current_steps": 84350, "total_steps": 204665, "loss": 0.0002, "lr": 1.4628925263885025e-06, "epoch": 2.0606845332616714, "percentage": 41.21, "elapsed_time": "1:49:27", "remaining_time": "2:36:08", "throughput": 8657.18, "total_tokens": 56859624} +{"current_steps": 84355, "total_steps": 204665, "loss": 0.0103, "lr": 1.4628169334122979e-06, "epoch": 2.0608066840935186, "percentage": 41.22, "elapsed_time": "1:49:28", "remaining_time": "2:36:07", "throughput": 8657.2, "total_tokens": 56862696} +{"current_steps": 84360, "total_steps": 204665, "loss": 0.0621, "lr": 1.462741337070364e-06, "epoch": 2.060928834925366, "percentage": 41.22, "elapsed_time": "1:49:28", "remaining_time": "2:36:07", "throughput": 8657.25, "total_tokens": 56866088} +{"current_steps": 84365, "total_steps": 204665, "loss": 0.0002, "lr": 1.4626657373632504e-06, "epoch": 2.061050985757213, "percentage": 41.22, "elapsed_time": "1:49:28", "remaining_time": "2:36:06", "throughput": 8657.3, "total_tokens": 56869416} +{"current_steps": 84370, "total_steps": 204665, "loss": 0.0006, "lr": 1.4625901342915074e-06, "epoch": 2.06117313658906, "percentage": 41.22, "elapsed_time": "1:49:29", "remaining_time": "2:36:06", "throughput": 8657.48, "total_tokens": 56873768} +{"current_steps": 84375, "total_steps": 204665, "loss": 0.0001, "lr": 1.4625145278556846e-06, "epoch": 2.0612952874209074, "percentage": 41.23, "elapsed_time": "1:49:29", "remaining_time": "2:36:06", "throughput": 8657.51, "total_tokens": 56876904} +{"current_steps": 84380, "total_steps": 204665, "loss": 0.0656, "lr": 1.4624389180563314e-06, "epoch": 2.0614174382527546, "percentage": 41.23, "elapsed_time": "1:49:30", "remaining_time": "2:36:05", "throughput": 8657.65, "total_tokens": 56880936} +{"current_steps": 84385, "total_steps": 204665, "loss": 0.0701, "lr": 1.4623633048939984e-06, "epoch": 2.0615395890846018, "percentage": 41.23, "elapsed_time": "1:49:30", "remaining_time": "2:36:05", "throughput": 8657.71, "total_tokens": 56884392} +{"current_steps": 84390, "total_steps": 204665, "loss": 0.0007, "lr": 1.462287688369235e-06, "epoch": 2.061661739916449, "percentage": 41.23, "elapsed_time": "1:49:30", "remaining_time": "2:36:04", "throughput": 8657.77, "total_tokens": 56887784} +{"current_steps": 84395, "total_steps": 204665, "loss": 0.0458, "lr": 1.4622120684825912e-06, "epoch": 2.061783890748296, "percentage": 41.24, "elapsed_time": "1:49:31", "remaining_time": "2:36:04", "throughput": 8657.85, "total_tokens": 56891368} +{"current_steps": 84400, "total_steps": 204665, "loss": 0.0004, "lr": 1.4621364452346168e-06, "epoch": 2.0619060415801433, "percentage": 41.24, "elapsed_time": "1:49:31", "remaining_time": "2:36:03", "throughput": 8657.92, "total_tokens": 56894888} +{"current_steps": 84405, "total_steps": 204665, "loss": 0.1089, "lr": 1.4620608186258617e-06, "epoch": 2.0620281924119905, "percentage": 41.24, "elapsed_time": "1:49:31", "remaining_time": "2:36:03", "throughput": 8657.97, "total_tokens": 56898216} +{"current_steps": 84410, "total_steps": 204665, "loss": 0.0679, "lr": 1.4619851886568764e-06, "epoch": 2.0621503432438373, "percentage": 41.24, "elapsed_time": "1:49:32", "remaining_time": "2:36:02", "throughput": 8658.04, "total_tokens": 56901672} +{"current_steps": 84415, "total_steps": 204665, "loss": 0.1514, "lr": 1.4619095553282104e-06, "epoch": 2.0622724940756845, "percentage": 41.25, "elapsed_time": "1:49:32", "remaining_time": "2:36:02", "throughput": 8658.11, "total_tokens": 56905192} +{"current_steps": 84420, "total_steps": 204665, "loss": 0.0006, "lr": 1.4618339186404138e-06, "epoch": 2.0623946449075317, "percentage": 41.25, "elapsed_time": "1:49:32", "remaining_time": "2:36:02", "throughput": 8658.17, "total_tokens": 56908584} +{"current_steps": 84425, "total_steps": 204665, "loss": 0.0012, "lr": 1.4617582785940369e-06, "epoch": 2.062516795739379, "percentage": 41.25, "elapsed_time": "1:49:33", "remaining_time": "2:36:01", "throughput": 8658.26, "total_tokens": 56912232} +{"current_steps": 84430, "total_steps": 204665, "loss": 0.034, "lr": 1.4616826351896294e-06, "epoch": 2.062638946571226, "percentage": 41.25, "elapsed_time": "1:49:33", "remaining_time": "2:36:01", "throughput": 8658.33, "total_tokens": 56915688} +{"current_steps": 84435, "total_steps": 204665, "loss": 0.039, "lr": 1.4616069884277417e-06, "epoch": 2.0627610974030732, "percentage": 41.26, "elapsed_time": "1:49:33", "remaining_time": "2:36:00", "throughput": 8658.4, "total_tokens": 56919208} +{"current_steps": 84440, "total_steps": 204665, "loss": 0.0011, "lr": 1.4615313383089238e-06, "epoch": 2.0628832482349204, "percentage": 41.26, "elapsed_time": "1:49:34", "remaining_time": "2:36:00", "throughput": 8658.46, "total_tokens": 56922600} +{"current_steps": 84445, "total_steps": 204665, "loss": 0.0004, "lr": 1.4614556848337261e-06, "epoch": 2.0630053990667676, "percentage": 41.26, "elapsed_time": "1:49:34", "remaining_time": "2:35:59", "throughput": 8658.53, "total_tokens": 56926120} +{"current_steps": 84450, "total_steps": 204665, "loss": 0.0896, "lr": 1.4613800280026983e-06, "epoch": 2.063127549898615, "percentage": 41.26, "elapsed_time": "1:49:34", "remaining_time": "2:35:59", "throughput": 8658.58, "total_tokens": 56929448} +{"current_steps": 84455, "total_steps": 204665, "loss": 0.0335, "lr": 1.4613043678163908e-06, "epoch": 2.063249700730462, "percentage": 41.26, "elapsed_time": "1:49:35", "remaining_time": "2:35:58", "throughput": 8658.66, "total_tokens": 56933032} +{"current_steps": 84460, "total_steps": 204665, "loss": 0.1152, "lr": 1.461228704275354e-06, "epoch": 2.063371851562309, "percentage": 41.27, "elapsed_time": "1:49:35", "remaining_time": "2:35:58", "throughput": 8658.68, "total_tokens": 56936104} +{"current_steps": 84465, "total_steps": 204665, "loss": 0.0003, "lr": 1.4611530373801379e-06, "epoch": 2.0634940023941564, "percentage": 41.27, "elapsed_time": "1:49:35", "remaining_time": "2:35:58", "throughput": 8658.7, "total_tokens": 56939240} +{"current_steps": 84470, "total_steps": 204665, "loss": 0.001, "lr": 1.461077367131293e-06, "epoch": 2.0636161532260036, "percentage": 41.27, "elapsed_time": "1:49:36", "remaining_time": "2:35:57", "throughput": 8658.78, "total_tokens": 56942824} +{"current_steps": 84475, "total_steps": 204665, "loss": 0.0352, "lr": 1.4610016935293695e-06, "epoch": 2.0637383040578507, "percentage": 41.27, "elapsed_time": "1:49:36", "remaining_time": "2:35:57", "throughput": 8658.86, "total_tokens": 56946408} +{"current_steps": 84480, "total_steps": 204665, "loss": 0.038, "lr": 1.4609260165749175e-06, "epoch": 2.063860454889698, "percentage": 41.28, "elapsed_time": "1:49:37", "remaining_time": "2:35:56", "throughput": 8658.91, "total_tokens": 56949736} +{"current_steps": 84485, "total_steps": 204665, "loss": 0.0259, "lr": 1.4608503362684875e-06, "epoch": 2.063982605721545, "percentage": 41.28, "elapsed_time": "1:49:37", "remaining_time": "2:35:56", "throughput": 8658.95, "total_tokens": 56952936} +{"current_steps": 84490, "total_steps": 204665, "loss": 0.0005, "lr": 1.4607746526106299e-06, "epoch": 2.0641047565533923, "percentage": 41.28, "elapsed_time": "1:49:37", "remaining_time": "2:35:55", "throughput": 8659.0, "total_tokens": 56956328} +{"current_steps": 84495, "total_steps": 204665, "loss": 0.0674, "lr": 1.4606989656018953e-06, "epoch": 2.0642269073852395, "percentage": 41.28, "elapsed_time": "1:49:38", "remaining_time": "2:35:55", "throughput": 8659.02, "total_tokens": 56959400} +{"current_steps": 84500, "total_steps": 204665, "loss": 0.0004, "lr": 1.4606232752428338e-06, "epoch": 2.0643490582170863, "percentage": 41.29, "elapsed_time": "1:49:38", "remaining_time": "2:35:54", "throughput": 8659.11, "total_tokens": 56963048} +{"current_steps": 84505, "total_steps": 204665, "loss": 0.035, "lr": 1.460547581533996e-06, "epoch": 2.0644712090489334, "percentage": 41.29, "elapsed_time": "1:49:38", "remaining_time": "2:35:54", "throughput": 8659.13, "total_tokens": 56966120} +{"current_steps": 84510, "total_steps": 204665, "loss": 0.03, "lr": 1.4604718844759325e-06, "epoch": 2.0645933598807806, "percentage": 41.29, "elapsed_time": "1:49:39", "remaining_time": "2:35:54", "throughput": 8659.22, "total_tokens": 56969768} +{"current_steps": 84515, "total_steps": 204665, "loss": 0.0011, "lr": 1.4603961840691934e-06, "epoch": 2.064715510712628, "percentage": 41.29, "elapsed_time": "1:49:39", "remaining_time": "2:35:53", "throughput": 8659.25, "total_tokens": 56972968} +{"current_steps": 84520, "total_steps": 204665, "loss": 0.0861, "lr": 1.4603204803143293e-06, "epoch": 2.064837661544475, "percentage": 41.3, "elapsed_time": "1:49:39", "remaining_time": "2:35:53", "throughput": 8659.26, "total_tokens": 56975912} +{"current_steps": 84525, "total_steps": 204665, "loss": 0.0002, "lr": 1.4602447732118907e-06, "epoch": 2.064959812376322, "percentage": 41.3, "elapsed_time": "1:49:40", "remaining_time": "2:35:52", "throughput": 8659.37, "total_tokens": 56979752} +{"current_steps": 84530, "total_steps": 204665, "loss": 0.0003, "lr": 1.4601690627624288e-06, "epoch": 2.0650819632081694, "percentage": 41.3, "elapsed_time": "1:49:40", "remaining_time": "2:35:52", "throughput": 8659.41, "total_tokens": 56983080} +{"current_steps": 84535, "total_steps": 204665, "loss": 0.0565, "lr": 1.4600933489664934e-06, "epoch": 2.0652041140400166, "percentage": 41.3, "elapsed_time": "1:49:40", "remaining_time": "2:35:51", "throughput": 8659.49, "total_tokens": 56986600} +{"current_steps": 84540, "total_steps": 204665, "loss": 0.0001, "lr": 1.4600176318246356e-06, "epoch": 2.0653262648718638, "percentage": 41.31, "elapsed_time": "1:49:41", "remaining_time": "2:35:51", "throughput": 8659.53, "total_tokens": 56989928} +{"current_steps": 84545, "total_steps": 204665, "loss": 0.1238, "lr": 1.4599419113374057e-06, "epoch": 2.065448415703711, "percentage": 41.31, "elapsed_time": "1:49:41", "remaining_time": "2:35:50", "throughput": 8659.6, "total_tokens": 56993384} +{"current_steps": 84550, "total_steps": 204665, "loss": 0.0001, "lr": 1.4598661875053545e-06, "epoch": 2.065570566535558, "percentage": 41.31, "elapsed_time": "1:49:41", "remaining_time": "2:35:50", "throughput": 8659.66, "total_tokens": 56996840} +{"current_steps": 84555, "total_steps": 204665, "loss": 0.0555, "lr": 1.459790460329033e-06, "epoch": 2.0656927173674053, "percentage": 41.31, "elapsed_time": "1:49:42", "remaining_time": "2:35:50", "throughput": 8659.71, "total_tokens": 57000104} +{"current_steps": 84560, "total_steps": 204665, "loss": 0.0003, "lr": 1.4597147298089914e-06, "epoch": 2.0658148681992525, "percentage": 41.32, "elapsed_time": "1:49:42", "remaining_time": "2:35:49", "throughput": 8659.77, "total_tokens": 57003560} +{"current_steps": 84565, "total_steps": 204665, "loss": 0.0001, "lr": 1.4596389959457803e-06, "epoch": 2.0659370190310997, "percentage": 41.32, "elapsed_time": "1:49:42", "remaining_time": "2:35:49", "throughput": 8659.85, "total_tokens": 57007080} +{"current_steps": 84570, "total_steps": 204665, "loss": 0.0001, "lr": 1.4595632587399513e-06, "epoch": 2.066059169862947, "percentage": 41.32, "elapsed_time": "1:49:43", "remaining_time": "2:35:48", "throughput": 8659.91, "total_tokens": 57010472} +{"current_steps": 84575, "total_steps": 204665, "loss": 0.0467, "lr": 1.4594875181920546e-06, "epoch": 2.066181320694794, "percentage": 41.32, "elapsed_time": "1:49:43", "remaining_time": "2:35:48", "throughput": 8659.96, "total_tokens": 57013864} +{"current_steps": 84580, "total_steps": 204665, "loss": 0.0653, "lr": 1.4594117743026407e-06, "epoch": 2.0663034715266413, "percentage": 41.33, "elapsed_time": "1:49:43", "remaining_time": "2:35:47", "throughput": 8660.03, "total_tokens": 57017320} +{"current_steps": 84585, "total_steps": 204665, "loss": 0.0534, "lr": 1.459336027072261e-06, "epoch": 2.066425622358488, "percentage": 41.33, "elapsed_time": "1:49:44", "remaining_time": "2:35:47", "throughput": 8660.13, "total_tokens": 57021096} +{"current_steps": 84590, "total_steps": 204665, "loss": 0.0085, "lr": 1.459260276501466e-06, "epoch": 2.0665477731903352, "percentage": 41.33, "elapsed_time": "1:49:44", "remaining_time": "2:35:46", "throughput": 8660.22, "total_tokens": 57024744} +{"current_steps": 84595, "total_steps": 204665, "loss": 0.0676, "lr": 1.4591845225908073e-06, "epoch": 2.0666699240221824, "percentage": 41.33, "elapsed_time": "1:49:45", "remaining_time": "2:35:46", "throughput": 8660.26, "total_tokens": 57028008} +{"current_steps": 84600, "total_steps": 204665, "loss": 0.0002, "lr": 1.4591087653408347e-06, "epoch": 2.0667920748540296, "percentage": 41.34, "elapsed_time": "1:49:45", "remaining_time": "2:35:45", "throughput": 8660.26, "total_tokens": 57030952} +{"current_steps": 84605, "total_steps": 204665, "loss": 0.0689, "lr": 1.4590330047521e-06, "epoch": 2.066914225685877, "percentage": 41.34, "elapsed_time": "1:49:45", "remaining_time": "2:35:45", "throughput": 8660.31, "total_tokens": 57034216} +{"current_steps": 84610, "total_steps": 204665, "loss": 0.0004, "lr": 1.458957240825154e-06, "epoch": 2.067036376517724, "percentage": 41.34, "elapsed_time": "1:49:46", "remaining_time": "2:35:45", "throughput": 8660.37, "total_tokens": 57037672} +{"current_steps": 84615, "total_steps": 204665, "loss": 0.0001, "lr": 1.458881473560547e-06, "epoch": 2.067158527349571, "percentage": 41.34, "elapsed_time": "1:49:46", "remaining_time": "2:35:44", "throughput": 8660.41, "total_tokens": 57040936} +{"current_steps": 84620, "total_steps": 204665, "loss": 0.0725, "lr": 1.4588057029588308e-06, "epoch": 2.0672806781814184, "percentage": 41.35, "elapsed_time": "1:49:46", "remaining_time": "2:35:44", "throughput": 8660.46, "total_tokens": 57044264} +{"current_steps": 84625, "total_steps": 204665, "loss": 0.0572, "lr": 1.458729929020556e-06, "epoch": 2.0674028290132656, "percentage": 41.35, "elapsed_time": "1:49:47", "remaining_time": "2:35:43", "throughput": 8660.56, "total_tokens": 57047976} +{"current_steps": 84630, "total_steps": 204665, "loss": 0.0851, "lr": 1.4586541517462739e-06, "epoch": 2.0675249798451127, "percentage": 41.35, "elapsed_time": "1:49:47", "remaining_time": "2:35:43", "throughput": 8660.66, "total_tokens": 57051752} +{"current_steps": 84635, "total_steps": 204665, "loss": 0.0002, "lr": 1.4585783711365355e-06, "epoch": 2.06764713067696, "percentage": 41.35, "elapsed_time": "1:49:47", "remaining_time": "2:35:42", "throughput": 8660.73, "total_tokens": 57055208} +{"current_steps": 84640, "total_steps": 204665, "loss": 0.0001, "lr": 1.4585025871918913e-06, "epoch": 2.067769281508807, "percentage": 41.36, "elapsed_time": "1:49:48", "remaining_time": "2:35:42", "throughput": 8660.81, "total_tokens": 57058792} +{"current_steps": 84645, "total_steps": 204665, "loss": 0.0003, "lr": 1.4584267999128934e-06, "epoch": 2.0678914323406543, "percentage": 41.36, "elapsed_time": "1:49:48", "remaining_time": "2:35:42", "throughput": 8660.94, "total_tokens": 57062760} +{"current_steps": 84650, "total_steps": 204665, "loss": 0.0536, "lr": 1.4583510093000923e-06, "epoch": 2.0680135831725015, "percentage": 41.36, "elapsed_time": "1:49:48", "remaining_time": "2:35:41", "throughput": 8661.0, "total_tokens": 57066216} +{"current_steps": 84655, "total_steps": 204665, "loss": 0.0005, "lr": 1.4582752153540397e-06, "epoch": 2.0681357340043487, "percentage": 41.36, "elapsed_time": "1:49:49", "remaining_time": "2:35:41", "throughput": 8661.04, "total_tokens": 57069416} +{"current_steps": 84660, "total_steps": 204665, "loss": 0.0008, "lr": 1.4581994180752863e-06, "epoch": 2.068257884836196, "percentage": 41.37, "elapsed_time": "1:49:49", "remaining_time": "2:35:40", "throughput": 8661.14, "total_tokens": 57073192} +{"current_steps": 84665, "total_steps": 204665, "loss": 0.0502, "lr": 1.4581236174643836e-06, "epoch": 2.068380035668043, "percentage": 41.37, "elapsed_time": "1:49:49", "remaining_time": "2:35:40", "throughput": 8661.18, "total_tokens": 57076456} +{"current_steps": 84670, "total_steps": 204665, "loss": 0.0004, "lr": 1.4580478135218828e-06, "epoch": 2.0685021864998903, "percentage": 41.37, "elapsed_time": "1:49:50", "remaining_time": "2:35:39", "throughput": 8661.27, "total_tokens": 57080104} +{"current_steps": 84675, "total_steps": 204665, "loss": 0.0383, "lr": 1.4579720062483348e-06, "epoch": 2.0686243373317375, "percentage": 41.37, "elapsed_time": "1:49:50", "remaining_time": "2:35:39", "throughput": 8661.29, "total_tokens": 57083176} +{"current_steps": 84680, "total_steps": 204665, "loss": 0.0003, "lr": 1.4578961956442913e-06, "epoch": 2.068746488163584, "percentage": 41.37, "elapsed_time": "1:49:50", "remaining_time": "2:35:38", "throughput": 8661.34, "total_tokens": 57086440} +{"current_steps": 84685, "total_steps": 204665, "loss": 0.0003, "lr": 1.4578203817103036e-06, "epoch": 2.0688686389954314, "percentage": 41.38, "elapsed_time": "1:49:51", "remaining_time": "2:35:38", "throughput": 8661.36, "total_tokens": 57089512} +{"current_steps": 84690, "total_steps": 204665, "loss": 0.0002, "lr": 1.4577445644469229e-06, "epoch": 2.0689907898272786, "percentage": 41.38, "elapsed_time": "1:49:51", "remaining_time": "2:35:37", "throughput": 8661.38, "total_tokens": 57092584} +{"current_steps": 84695, "total_steps": 204665, "loss": 0.064, "lr": 1.4576687438547003e-06, "epoch": 2.0691129406591258, "percentage": 41.38, "elapsed_time": "1:49:51", "remaining_time": "2:35:37", "throughput": 8661.39, "total_tokens": 57095656} +{"current_steps": 84700, "total_steps": 204665, "loss": 0.0564, "lr": 1.457592919934188e-06, "epoch": 2.069235091490973, "percentage": 41.38, "elapsed_time": "1:49:52", "remaining_time": "2:35:37", "throughput": 8661.52, "total_tokens": 57099560} +{"current_steps": 84705, "total_steps": 204665, "loss": 0.0001, "lr": 1.4575170926859368e-06, "epoch": 2.06935724232282, "percentage": 41.39, "elapsed_time": "1:49:52", "remaining_time": "2:35:36", "throughput": 8661.57, "total_tokens": 57102952} +{"current_steps": 84710, "total_steps": 204665, "loss": 0.0062, "lr": 1.4574412621104982e-06, "epoch": 2.0694793931546673, "percentage": 41.39, "elapsed_time": "1:49:53", "remaining_time": "2:35:36", "throughput": 8661.6, "total_tokens": 57106088} +{"current_steps": 84715, "total_steps": 204665, "loss": 0.0901, "lr": 1.4573654282084236e-06, "epoch": 2.0696015439865145, "percentage": 41.39, "elapsed_time": "1:49:53", "remaining_time": "2:35:35", "throughput": 8661.64, "total_tokens": 57109352} +{"current_steps": 84720, "total_steps": 204665, "loss": 0.0002, "lr": 1.4572895909802644e-06, "epoch": 2.0697236948183617, "percentage": 41.39, "elapsed_time": "1:49:53", "remaining_time": "2:35:35", "throughput": 8661.68, "total_tokens": 57112616} +{"current_steps": 84725, "total_steps": 204665, "loss": 0.0393, "lr": 1.4572137504265727e-06, "epoch": 2.069845845650209, "percentage": 41.4, "elapsed_time": "1:49:54", "remaining_time": "2:35:34", "throughput": 8661.74, "total_tokens": 57116072} +{"current_steps": 84730, "total_steps": 204665, "loss": 0.0001, "lr": 1.4571379065478995e-06, "epoch": 2.069967996482056, "percentage": 41.4, "elapsed_time": "1:49:54", "remaining_time": "2:35:34", "throughput": 8661.77, "total_tokens": 57119208} +{"current_steps": 84735, "total_steps": 204665, "loss": 0.0017, "lr": 1.4570620593447967e-06, "epoch": 2.0700901473139033, "percentage": 41.4, "elapsed_time": "1:49:54", "remaining_time": "2:35:33", "throughput": 8661.79, "total_tokens": 57122344} +{"current_steps": 84740, "total_steps": 204665, "loss": 0.0432, "lr": 1.4569862088178151e-06, "epoch": 2.0702122981457505, "percentage": 41.4, "elapsed_time": "1:49:55", "remaining_time": "2:35:33", "throughput": 8661.82, "total_tokens": 57125480} +{"current_steps": 84745, "total_steps": 204665, "loss": 0.0536, "lr": 1.4569103549675073e-06, "epoch": 2.0703344489775977, "percentage": 41.41, "elapsed_time": "1:49:55", "remaining_time": "2:35:33", "throughput": 8661.88, "total_tokens": 57128936} +{"current_steps": 84750, "total_steps": 204665, "loss": 0.0001, "lr": 1.4568344977944242e-06, "epoch": 2.070456599809445, "percentage": 41.41, "elapsed_time": "1:49:55", "remaining_time": "2:35:32", "throughput": 8661.94, "total_tokens": 57132328} +{"current_steps": 84755, "total_steps": 204665, "loss": 0.0, "lr": 1.456758637299118e-06, "epoch": 2.070578750641292, "percentage": 41.41, "elapsed_time": "1:49:56", "remaining_time": "2:35:32", "throughput": 8661.98, "total_tokens": 57135592} +{"current_steps": 84760, "total_steps": 204665, "loss": 0.0598, "lr": 1.4566827734821403e-06, "epoch": 2.0707009014731392, "percentage": 41.41, "elapsed_time": "1:49:56", "remaining_time": "2:35:31", "throughput": 8662.0, "total_tokens": 57138728} +{"current_steps": 84765, "total_steps": 204665, "loss": 0.0354, "lr": 1.4566069063440424e-06, "epoch": 2.070823052304986, "percentage": 41.42, "elapsed_time": "1:49:56", "remaining_time": "2:35:31", "throughput": 8662.0, "total_tokens": 57141608} +{"current_steps": 84770, "total_steps": 204665, "loss": 0.0017, "lr": 1.4565310358853762e-06, "epoch": 2.070945203136833, "percentage": 41.42, "elapsed_time": "1:49:57", "remaining_time": "2:35:30", "throughput": 8662.02, "total_tokens": 57144744} +{"current_steps": 84775, "total_steps": 204665, "loss": 0.0326, "lr": 1.4564551621066937e-06, "epoch": 2.0710673539686804, "percentage": 41.42, "elapsed_time": "1:49:57", "remaining_time": "2:35:30", "throughput": 8662.08, "total_tokens": 57148136} +{"current_steps": 84780, "total_steps": 204665, "loss": 0.0411, "lr": 1.4563792850085464e-06, "epoch": 2.0711895048005275, "percentage": 41.42, "elapsed_time": "1:49:57", "remaining_time": "2:35:29", "throughput": 8662.12, "total_tokens": 57151400} +{"current_steps": 84785, "total_steps": 204665, "loss": 0.12, "lr": 1.456303404591486e-06, "epoch": 2.0713116556323747, "percentage": 41.43, "elapsed_time": "1:49:58", "remaining_time": "2:35:29", "throughput": 8662.2, "total_tokens": 57154920} +{"current_steps": 84790, "total_steps": 204665, "loss": 0.0385, "lr": 1.456227520856065e-06, "epoch": 2.071433806464222, "percentage": 41.43, "elapsed_time": "1:49:58", "remaining_time": "2:35:28", "throughput": 8662.23, "total_tokens": 57158120} +{"current_steps": 84795, "total_steps": 204665, "loss": 0.0526, "lr": 1.4561516338028343e-06, "epoch": 2.071555957296069, "percentage": 41.43, "elapsed_time": "1:49:58", "remaining_time": "2:35:28", "throughput": 8662.21, "total_tokens": 57160936} +{"current_steps": 84800, "total_steps": 204665, "loss": 0.0002, "lr": 1.4560757434323463e-06, "epoch": 2.0716781081279163, "percentage": 41.43, "elapsed_time": "1:49:59", "remaining_time": "2:35:28", "throughput": 8662.32, "total_tokens": 57164776} +{"current_steps": 84805, "total_steps": 204665, "loss": 0.0001, "lr": 1.455999849745153e-06, "epoch": 2.0718002589597635, "percentage": 41.44, "elapsed_time": "1:49:59", "remaining_time": "2:35:27", "throughput": 8662.46, "total_tokens": 57168872} +{"current_steps": 84810, "total_steps": 204665, "loss": 0.0001, "lr": 1.4559239527418062e-06, "epoch": 2.0719224097916107, "percentage": 41.44, "elapsed_time": "1:49:59", "remaining_time": "2:35:27", "throughput": 8662.51, "total_tokens": 57172136} +{"current_steps": 84815, "total_steps": 204665, "loss": 0.0007, "lr": 1.4558480524228576e-06, "epoch": 2.072044560623458, "percentage": 41.44, "elapsed_time": "1:50:00", "remaining_time": "2:35:26", "throughput": 8662.64, "total_tokens": 57176168} +{"current_steps": 84820, "total_steps": 204665, "loss": 0.0006, "lr": 1.4557721487888594e-06, "epoch": 2.072166711455305, "percentage": 41.44, "elapsed_time": "1:50:00", "remaining_time": "2:35:26", "throughput": 8662.73, "total_tokens": 57179816} +{"current_steps": 84825, "total_steps": 204665, "loss": 0.0003, "lr": 1.4556962418403637e-06, "epoch": 2.0722888622871523, "percentage": 41.45, "elapsed_time": "1:50:01", "remaining_time": "2:35:25", "throughput": 8662.77, "total_tokens": 57183080} +{"current_steps": 84830, "total_steps": 204665, "loss": 0.1638, "lr": 1.4556203315779222e-06, "epoch": 2.0724110131189994, "percentage": 41.45, "elapsed_time": "1:50:01", "remaining_time": "2:35:25", "throughput": 8662.8, "total_tokens": 57186344} +{"current_steps": 84835, "total_steps": 204665, "loss": 0.0708, "lr": 1.4555444180020867e-06, "epoch": 2.0725331639508466, "percentage": 41.45, "elapsed_time": "1:50:01", "remaining_time": "2:35:24", "throughput": 8662.83, "total_tokens": 57189480} +{"current_steps": 84840, "total_steps": 204665, "loss": 0.0537, "lr": 1.4554685011134102e-06, "epoch": 2.072655314782694, "percentage": 41.45, "elapsed_time": "1:50:02", "remaining_time": "2:35:24", "throughput": 8662.82, "total_tokens": 57192360} +{"current_steps": 84845, "total_steps": 204665, "loss": 0.0546, "lr": 1.4553925809124443e-06, "epoch": 2.072777465614541, "percentage": 41.46, "elapsed_time": "1:50:02", "remaining_time": "2:35:24", "throughput": 8662.89, "total_tokens": 57195880} +{"current_steps": 84850, "total_steps": 204665, "loss": 0.0422, "lr": 1.455316657399741e-06, "epoch": 2.072899616446388, "percentage": 41.46, "elapsed_time": "1:50:02", "remaining_time": "2:35:23", "throughput": 8662.95, "total_tokens": 57199272} +{"current_steps": 84855, "total_steps": 204665, "loss": 0.0002, "lr": 1.4552407305758524e-06, "epoch": 2.073021767278235, "percentage": 41.46, "elapsed_time": "1:50:03", "remaining_time": "2:35:23", "throughput": 8663.01, "total_tokens": 57202728} +{"current_steps": 84860, "total_steps": 204665, "loss": 0.0311, "lr": 1.4551648004413307e-06, "epoch": 2.073143918110082, "percentage": 41.46, "elapsed_time": "1:50:03", "remaining_time": "2:35:22", "throughput": 8663.07, "total_tokens": 57206120} +{"current_steps": 84865, "total_steps": 204665, "loss": 0.0477, "lr": 1.4550888669967281e-06, "epoch": 2.0732660689419293, "percentage": 41.47, "elapsed_time": "1:50:03", "remaining_time": "2:35:22", "throughput": 8663.13, "total_tokens": 57209576} +{"current_steps": 84870, "total_steps": 204665, "loss": 0.129, "lr": 1.4550129302425972e-06, "epoch": 2.0733882197737765, "percentage": 41.47, "elapsed_time": "1:50:04", "remaining_time": "2:35:21", "throughput": 8663.16, "total_tokens": 57212776} +{"current_steps": 84875, "total_steps": 204665, "loss": 0.0003, "lr": 1.4549369901794894e-06, "epoch": 2.0735103706056237, "percentage": 41.47, "elapsed_time": "1:50:04", "remaining_time": "2:35:21", "throughput": 8663.21, "total_tokens": 57216104} +{"current_steps": 84880, "total_steps": 204665, "loss": 0.1224, "lr": 1.4548610468079578e-06, "epoch": 2.073632521437471, "percentage": 41.47, "elapsed_time": "1:50:04", "remaining_time": "2:35:20", "throughput": 8663.22, "total_tokens": 57219112} +{"current_steps": 84885, "total_steps": 204665, "loss": 0.0338, "lr": 1.4547851001285542e-06, "epoch": 2.073754672269318, "percentage": 41.48, "elapsed_time": "1:50:05", "remaining_time": "2:35:20", "throughput": 8663.27, "total_tokens": 57222440} +{"current_steps": 84890, "total_steps": 204665, "loss": 0.0008, "lr": 1.4547091501418312e-06, "epoch": 2.0738768231011653, "percentage": 41.48, "elapsed_time": "1:50:05", "remaining_time": "2:35:20", "throughput": 8663.34, "total_tokens": 57225960} +{"current_steps": 84895, "total_steps": 204665, "loss": 0.0004, "lr": 1.4546331968483405e-06, "epoch": 2.0739989739330125, "percentage": 41.48, "elapsed_time": "1:50:05", "remaining_time": "2:35:19", "throughput": 8663.41, "total_tokens": 57229480} +{"current_steps": 84900, "total_steps": 204665, "loss": 0.0561, "lr": 1.4545572402486352e-06, "epoch": 2.0741211247648597, "percentage": 41.48, "elapsed_time": "1:50:06", "remaining_time": "2:35:19", "throughput": 8663.45, "total_tokens": 57232680} +{"current_steps": 84905, "total_steps": 204665, "loss": 0.0009, "lr": 1.4544812803432676e-06, "epoch": 2.074243275596707, "percentage": 41.48, "elapsed_time": "1:50:06", "remaining_time": "2:35:18", "throughput": 8663.52, "total_tokens": 57236200} +{"current_steps": 84910, "total_steps": 204665, "loss": 0.0004, "lr": 1.4544053171327897e-06, "epoch": 2.074365426428554, "percentage": 41.49, "elapsed_time": "1:50:06", "remaining_time": "2:35:18", "throughput": 8663.52, "total_tokens": 57239144} +{"current_steps": 84915, "total_steps": 204665, "loss": 0.0445, "lr": 1.4543293506177538e-06, "epoch": 2.0744875772604012, "percentage": 41.49, "elapsed_time": "1:50:07", "remaining_time": "2:35:17", "throughput": 8663.54, "total_tokens": 57242216} +{"current_steps": 84920, "total_steps": 204665, "loss": 0.0581, "lr": 1.4542533807987132e-06, "epoch": 2.0746097280922484, "percentage": 41.49, "elapsed_time": "1:50:07", "remaining_time": "2:35:17", "throughput": 8663.62, "total_tokens": 57245800} +{"current_steps": 84925, "total_steps": 204665, "loss": 0.0395, "lr": 1.4541774076762197e-06, "epoch": 2.0747318789240956, "percentage": 41.49, "elapsed_time": "1:50:07", "remaining_time": "2:35:16", "throughput": 8663.66, "total_tokens": 57249064} +{"current_steps": 84930, "total_steps": 204665, "loss": 0.0599, "lr": 1.4541014312508257e-06, "epoch": 2.074854029755943, "percentage": 41.5, "elapsed_time": "1:50:08", "remaining_time": "2:35:16", "throughput": 8663.68, "total_tokens": 57252200} +{"current_steps": 84935, "total_steps": 204665, "loss": 0.0001, "lr": 1.454025451523084e-06, "epoch": 2.07497618058779, "percentage": 41.5, "elapsed_time": "1:50:08", "remaining_time": "2:35:15", "throughput": 8663.7, "total_tokens": 57255272} +{"current_steps": 84940, "total_steps": 204665, "loss": 0.0823, "lr": 1.4539494684935473e-06, "epoch": 2.075098331419637, "percentage": 41.5, "elapsed_time": "1:50:08", "remaining_time": "2:35:15", "throughput": 8663.8, "total_tokens": 57259048} +{"current_steps": 84945, "total_steps": 204665, "loss": 0.0443, "lr": 1.4538734821627679e-06, "epoch": 2.075220482251484, "percentage": 41.5, "elapsed_time": "1:50:09", "remaining_time": "2:35:15", "throughput": 8663.87, "total_tokens": 57262568} +{"current_steps": 84950, "total_steps": 204665, "loss": 0.0003, "lr": 1.4537974925312986e-06, "epoch": 2.075342633083331, "percentage": 41.51, "elapsed_time": "1:50:09", "remaining_time": "2:35:14", "throughput": 8663.89, "total_tokens": 57265640} +{"current_steps": 84955, "total_steps": 204665, "loss": 0.1051, "lr": 1.4537214995996914e-06, "epoch": 2.0754647839151783, "percentage": 41.51, "elapsed_time": "1:50:10", "remaining_time": "2:35:14", "throughput": 8663.99, "total_tokens": 57269352} +{"current_steps": 84960, "total_steps": 204665, "loss": 0.0354, "lr": 1.4536455033684995e-06, "epoch": 2.0755869347470255, "percentage": 41.51, "elapsed_time": "1:50:10", "remaining_time": "2:35:13", "throughput": 8664.01, "total_tokens": 57272488} +{"current_steps": 84965, "total_steps": 204665, "loss": 0.0002, "lr": 1.4535695038382759e-06, "epoch": 2.0757090855788727, "percentage": 41.51, "elapsed_time": "1:50:10", "remaining_time": "2:35:13", "throughput": 8664.15, "total_tokens": 57276520} +{"current_steps": 84970, "total_steps": 204665, "loss": 0.0572, "lr": 1.4534935010095727e-06, "epoch": 2.07583123641072, "percentage": 41.52, "elapsed_time": "1:50:11", "remaining_time": "2:35:12", "throughput": 8664.23, "total_tokens": 57280104} +{"current_steps": 84975, "total_steps": 204665, "loss": 0.0803, "lr": 1.453417494882943e-06, "epoch": 2.075953387242567, "percentage": 41.52, "elapsed_time": "1:50:11", "remaining_time": "2:35:12", "throughput": 8664.27, "total_tokens": 57283368} +{"current_steps": 84980, "total_steps": 204665, "loss": 0.0001, "lr": 1.453341485458939e-06, "epoch": 2.0760755380744143, "percentage": 41.52, "elapsed_time": "1:50:11", "remaining_time": "2:35:11", "throughput": 8664.27, "total_tokens": 57286312} +{"current_steps": 84985, "total_steps": 204665, "loss": 0.1466, "lr": 1.4532654727381139e-06, "epoch": 2.0761976889062614, "percentage": 41.52, "elapsed_time": "1:50:12", "remaining_time": "2:35:11", "throughput": 8664.28, "total_tokens": 57289256} +{"current_steps": 84990, "total_steps": 204665, "loss": 0.0008, "lr": 1.45318945672102e-06, "epoch": 2.0763198397381086, "percentage": 41.53, "elapsed_time": "1:50:12", "remaining_time": "2:35:11", "throughput": 8664.33, "total_tokens": 57292584} +{"current_steps": 84995, "total_steps": 204665, "loss": 0.0323, "lr": 1.453113437408211e-06, "epoch": 2.076441990569956, "percentage": 41.53, "elapsed_time": "1:50:12", "remaining_time": "2:35:10", "throughput": 8664.36, "total_tokens": 57295784} +{"current_steps": 85000, "total_steps": 204665, "loss": 0.0315, "lr": 1.4530374148002391e-06, "epoch": 2.076564141401803, "percentage": 41.53, "elapsed_time": "1:50:13", "remaining_time": "2:35:10", "throughput": 8664.41, "total_tokens": 57299176} +{"current_steps": 85005, "total_steps": 204665, "loss": 0.0002, "lr": 1.4529613888976572e-06, "epoch": 2.07668629223365, "percentage": 41.53, "elapsed_time": "1:50:13", "remaining_time": "2:35:09", "throughput": 8664.46, "total_tokens": 57302440} +{"current_steps": 85010, "total_steps": 204665, "loss": 0.0547, "lr": 1.452885359701018e-06, "epoch": 2.0768084430654974, "percentage": 41.54, "elapsed_time": "1:50:13", "remaining_time": "2:35:09", "throughput": 8664.51, "total_tokens": 57305832} +{"current_steps": 85015, "total_steps": 204665, "loss": 0.0001, "lr": 1.452809327210875e-06, "epoch": 2.0769305938973446, "percentage": 41.54, "elapsed_time": "1:50:14", "remaining_time": "2:35:08", "throughput": 8664.6, "total_tokens": 57309544} +{"current_steps": 85020, "total_steps": 204665, "loss": 0.0516, "lr": 1.4527332914277807e-06, "epoch": 2.0770527447291918, "percentage": 41.54, "elapsed_time": "1:50:14", "remaining_time": "2:35:08", "throughput": 8664.67, "total_tokens": 57313000} +{"current_steps": 85025, "total_steps": 204665, "loss": 0.0001, "lr": 1.4526572523522882e-06, "epoch": 2.077174895561039, "percentage": 41.54, "elapsed_time": "1:50:14", "remaining_time": "2:35:07", "throughput": 8664.71, "total_tokens": 57316264} +{"current_steps": 85030, "total_steps": 204665, "loss": 0.0015, "lr": 1.4525812099849502e-06, "epoch": 2.077297046392886, "percentage": 41.55, "elapsed_time": "1:50:15", "remaining_time": "2:35:07", "throughput": 8664.77, "total_tokens": 57319656} +{"current_steps": 85035, "total_steps": 204665, "loss": 0.0015, "lr": 1.45250516432632e-06, "epoch": 2.077419197224733, "percentage": 41.55, "elapsed_time": "1:50:15", "remaining_time": "2:35:07", "throughput": 8664.77, "total_tokens": 57322600} +{"current_steps": 85040, "total_steps": 204665, "loss": 0.0003, "lr": 1.4524291153769505e-06, "epoch": 2.07754134805658, "percentage": 41.55, "elapsed_time": "1:50:15", "remaining_time": "2:35:06", "throughput": 8664.81, "total_tokens": 57325864} +{"current_steps": 85045, "total_steps": 204665, "loss": 0.0458, "lr": 1.452353063137395e-06, "epoch": 2.0776634988884273, "percentage": 41.55, "elapsed_time": "1:50:16", "remaining_time": "2:35:06", "throughput": 8664.81, "total_tokens": 57328744} +{"current_steps": 85050, "total_steps": 204665, "loss": 0.0301, "lr": 1.452277007608206e-06, "epoch": 2.0777856497202745, "percentage": 41.56, "elapsed_time": "1:50:16", "remaining_time": "2:35:05", "throughput": 8664.9, "total_tokens": 57332392} +{"current_steps": 85055, "total_steps": 204665, "loss": 0.0471, "lr": 1.452200948789937e-06, "epoch": 2.0779078005521217, "percentage": 41.56, "elapsed_time": "1:50:16", "remaining_time": "2:35:05", "throughput": 8664.96, "total_tokens": 57335784} +{"current_steps": 85060, "total_steps": 204665, "loss": 0.0004, "lr": 1.4521248866831415e-06, "epoch": 2.078029951383969, "percentage": 41.56, "elapsed_time": "1:50:17", "remaining_time": "2:35:04", "throughput": 8665.0, "total_tokens": 57339048} +{"current_steps": 85065, "total_steps": 204665, "loss": 0.1021, "lr": 1.452048821288372e-06, "epoch": 2.078152102215816, "percentage": 41.56, "elapsed_time": "1:50:17", "remaining_time": "2:35:04", "throughput": 8664.99, "total_tokens": 57341928} +{"current_steps": 85070, "total_steps": 204665, "loss": 0.0493, "lr": 1.4519727526061818e-06, "epoch": 2.0782742530476632, "percentage": 41.57, "elapsed_time": "1:50:17", "remaining_time": "2:35:03", "throughput": 8665.01, "total_tokens": 57345000} +{"current_steps": 85075, "total_steps": 204665, "loss": 0.0419, "lr": 1.451896680637124e-06, "epoch": 2.0783964038795104, "percentage": 41.57, "elapsed_time": "1:50:18", "remaining_time": "2:35:03", "throughput": 8665.09, "total_tokens": 57348584} +{"current_steps": 85080, "total_steps": 204665, "loss": 0.038, "lr": 1.4518206053817524e-06, "epoch": 2.0785185547113576, "percentage": 41.57, "elapsed_time": "1:50:18", "remaining_time": "2:35:02", "throughput": 8665.13, "total_tokens": 57351848} +{"current_steps": 85085, "total_steps": 204665, "loss": 0.0302, "lr": 1.4517445268406196e-06, "epoch": 2.078640705543205, "percentage": 41.57, "elapsed_time": "1:50:19", "remaining_time": "2:35:02", "throughput": 8665.18, "total_tokens": 57355240} +{"current_steps": 85090, "total_steps": 204665, "loss": 0.0365, "lr": 1.451668445014279e-06, "epoch": 2.078762856375052, "percentage": 41.58, "elapsed_time": "1:50:19", "remaining_time": "2:35:02", "throughput": 8665.19, "total_tokens": 57358248} +{"current_steps": 85095, "total_steps": 204665, "loss": 0.0004, "lr": 1.4515923599032841e-06, "epoch": 2.078885007206899, "percentage": 41.58, "elapsed_time": "1:50:19", "remaining_time": "2:35:01", "throughput": 8665.27, "total_tokens": 57361832} +{"current_steps": 85100, "total_steps": 204665, "loss": 0.0381, "lr": 1.451516271508188e-06, "epoch": 2.0790071580387464, "percentage": 41.58, "elapsed_time": "1:50:20", "remaining_time": "2:35:01", "throughput": 8665.39, "total_tokens": 57365736} +{"current_steps": 85105, "total_steps": 204665, "loss": 0.0002, "lr": 1.4514401798295444e-06, "epoch": 2.0791293088705936, "percentage": 41.58, "elapsed_time": "1:50:20", "remaining_time": "2:35:00", "throughput": 8665.52, "total_tokens": 57369768} +{"current_steps": 85110, "total_steps": 204665, "loss": 0.0254, "lr": 1.4513640848679063e-06, "epoch": 2.0792514597024407, "percentage": 41.59, "elapsed_time": "1:50:20", "remaining_time": "2:35:00", "throughput": 8665.59, "total_tokens": 57373288} +{"current_steps": 85115, "total_steps": 204665, "loss": 0.1445, "lr": 1.451287986623827e-06, "epoch": 2.079373610534288, "percentage": 41.59, "elapsed_time": "1:50:21", "remaining_time": "2:34:59", "throughput": 8665.64, "total_tokens": 57376680} +{"current_steps": 85120, "total_steps": 204665, "loss": 0.0622, "lr": 1.45121188509786e-06, "epoch": 2.079495761366135, "percentage": 41.59, "elapsed_time": "1:50:21", "remaining_time": "2:34:59", "throughput": 8665.66, "total_tokens": 57379752} +{"current_steps": 85125, "total_steps": 204665, "loss": 0.0591, "lr": 1.4511357802905591e-06, "epoch": 2.079617912197982, "percentage": 41.59, "elapsed_time": "1:50:21", "remaining_time": "2:34:58", "throughput": 8665.69, "total_tokens": 57382952} +{"current_steps": 85130, "total_steps": 204665, "loss": 0.0716, "lr": 1.4510596722024775e-06, "epoch": 2.079740063029829, "percentage": 41.59, "elapsed_time": "1:50:22", "remaining_time": "2:34:58", "throughput": 8665.74, "total_tokens": 57386280} +{"current_steps": 85135, "total_steps": 204665, "loss": 0.0018, "lr": 1.4509835608341685e-06, "epoch": 2.0798622138616762, "percentage": 41.6, "elapsed_time": "1:50:22", "remaining_time": "2:34:58", "throughput": 8665.73, "total_tokens": 57389160} +{"current_steps": 85140, "total_steps": 204665, "loss": 0.0007, "lr": 1.450907446186186e-06, "epoch": 2.0799843646935234, "percentage": 41.6, "elapsed_time": "1:50:22", "remaining_time": "2:34:57", "throughput": 8665.77, "total_tokens": 57392360} +{"current_steps": 85145, "total_steps": 204665, "loss": 0.0005, "lr": 1.4508313282590827e-06, "epoch": 2.0801065155253706, "percentage": 41.6, "elapsed_time": "1:50:23", "remaining_time": "2:34:57", "throughput": 8665.79, "total_tokens": 57395432} +{"current_steps": 85150, "total_steps": 204665, "loss": 0.0236, "lr": 1.450755207053413e-06, "epoch": 2.080228666357218, "percentage": 41.6, "elapsed_time": "1:50:23", "remaining_time": "2:34:56", "throughput": 8665.86, "total_tokens": 57398952} +{"current_steps": 85155, "total_steps": 204665, "loss": 0.1286, "lr": 1.45067908256973e-06, "epoch": 2.080350817189065, "percentage": 41.61, "elapsed_time": "1:50:23", "remaining_time": "2:34:56", "throughput": 8665.92, "total_tokens": 57402344} +{"current_steps": 85160, "total_steps": 204665, "loss": 0.0012, "lr": 1.450602954808588e-06, "epoch": 2.080472968020912, "percentage": 41.61, "elapsed_time": "1:50:24", "remaining_time": "2:34:55", "throughput": 8665.95, "total_tokens": 57405480} +{"current_steps": 85165, "total_steps": 204665, "loss": 0.1086, "lr": 1.4505268237705396e-06, "epoch": 2.0805951188527594, "percentage": 41.61, "elapsed_time": "1:50:24", "remaining_time": "2:34:55", "throughput": 8666.24, "total_tokens": 57410920} +{"current_steps": 85170, "total_steps": 204665, "loss": 0.0002, "lr": 1.4504506894561394e-06, "epoch": 2.0807172696846066, "percentage": 41.61, "elapsed_time": "1:50:25", "remaining_time": "2:34:55", "throughput": 8666.32, "total_tokens": 57414440} +{"current_steps": 85175, "total_steps": 204665, "loss": 0.0001, "lr": 1.4503745518659404e-06, "epoch": 2.0808394205164538, "percentage": 41.62, "elapsed_time": "1:50:25", "remaining_time": "2:34:54", "throughput": 8666.34, "total_tokens": 57417512} +{"current_steps": 85180, "total_steps": 204665, "loss": 0.0484, "lr": 1.4502984110004967e-06, "epoch": 2.080961571348301, "percentage": 41.62, "elapsed_time": "1:50:25", "remaining_time": "2:34:54", "throughput": 8666.35, "total_tokens": 57420584} +{"current_steps": 85185, "total_steps": 204665, "loss": 0.0516, "lr": 1.4502222668603616e-06, "epoch": 2.081083722180148, "percentage": 41.62, "elapsed_time": "1:50:26", "remaining_time": "2:34:53", "throughput": 8666.44, "total_tokens": 57424168} +{"current_steps": 85190, "total_steps": 204665, "loss": 0.0012, "lr": 1.450146119446089e-06, "epoch": 2.0812058730119953, "percentage": 41.62, "elapsed_time": "1:50:26", "remaining_time": "2:34:53", "throughput": 8666.47, "total_tokens": 57427304} +{"current_steps": 85195, "total_steps": 204665, "loss": 0.0311, "lr": 1.4500699687582332e-06, "epoch": 2.0813280238438425, "percentage": 41.63, "elapsed_time": "1:50:26", "remaining_time": "2:34:52", "throughput": 8666.61, "total_tokens": 57431400} +{"current_steps": 85200, "total_steps": 204665, "loss": 0.0544, "lr": 1.4499938147973472e-06, "epoch": 2.0814501746756897, "percentage": 41.63, "elapsed_time": "1:50:27", "remaining_time": "2:34:52", "throughput": 8666.66, "total_tokens": 57434728} +{"current_steps": 85205, "total_steps": 204665, "loss": 0.0002, "lr": 1.4499176575639851e-06, "epoch": 2.081572325507537, "percentage": 41.63, "elapsed_time": "1:50:27", "remaining_time": "2:34:51", "throughput": 8666.73, "total_tokens": 57438248} +{"current_steps": 85210, "total_steps": 204665, "loss": 0.0341, "lr": 1.4498414970587008e-06, "epoch": 2.0816944763393836, "percentage": 41.63, "elapsed_time": "1:50:27", "remaining_time": "2:34:51", "throughput": 8666.77, "total_tokens": 57441512} +{"current_steps": 85215, "total_steps": 204665, "loss": 0.0003, "lr": 1.4497653332820482e-06, "epoch": 2.081816627171231, "percentage": 41.64, "elapsed_time": "1:50:28", "remaining_time": "2:34:50", "throughput": 8666.8, "total_tokens": 57444712} +{"current_steps": 85220, "total_steps": 204665, "loss": 0.0003, "lr": 1.449689166234581e-06, "epoch": 2.081938778003078, "percentage": 41.64, "elapsed_time": "1:50:28", "remaining_time": "2:34:50", "throughput": 8666.85, "total_tokens": 57447976} +{"current_steps": 85225, "total_steps": 204665, "loss": 0.001, "lr": 1.4496129959168535e-06, "epoch": 2.082060928834925, "percentage": 41.64, "elapsed_time": "1:50:28", "remaining_time": "2:34:50", "throughput": 8666.91, "total_tokens": 57451432} +{"current_steps": 85230, "total_steps": 204665, "loss": 0.0444, "lr": 1.4495368223294194e-06, "epoch": 2.0821830796667724, "percentage": 41.64, "elapsed_time": "1:50:29", "remaining_time": "2:34:49", "throughput": 8667.02, "total_tokens": 57455208} +{"current_steps": 85235, "total_steps": 204665, "loss": 0.0002, "lr": 1.4494606454728323e-06, "epoch": 2.0823052304986196, "percentage": 41.65, "elapsed_time": "1:50:29", "remaining_time": "2:34:49", "throughput": 8667.12, "total_tokens": 57458920} +{"current_steps": 85240, "total_steps": 204665, "loss": 0.0336, "lr": 1.4493844653476468e-06, "epoch": 2.082427381330467, "percentage": 41.65, "elapsed_time": "1:50:29", "remaining_time": "2:34:48", "throughput": 8667.15, "total_tokens": 57462056} +{"current_steps": 85245, "total_steps": 204665, "loss": 0.0319, "lr": 1.4493082819544165e-06, "epoch": 2.082549532162314, "percentage": 41.65, "elapsed_time": "1:50:30", "remaining_time": "2:34:48", "throughput": 8667.23, "total_tokens": 57465704} +{"current_steps": 85250, "total_steps": 204665, "loss": 0.0516, "lr": 1.4492320952936954e-06, "epoch": 2.082671682994161, "percentage": 41.65, "elapsed_time": "1:50:30", "remaining_time": "2:34:47", "throughput": 8667.25, "total_tokens": 57468776} +{"current_steps": 85255, "total_steps": 204665, "loss": 0.0449, "lr": 1.4491559053660377e-06, "epoch": 2.0827938338260084, "percentage": 41.66, "elapsed_time": "1:50:30", "remaining_time": "2:34:47", "throughput": 8667.33, "total_tokens": 57472360} +{"current_steps": 85260, "total_steps": 204665, "loss": 0.0, "lr": 1.4490797121719976e-06, "epoch": 2.0829159846578555, "percentage": 41.66, "elapsed_time": "1:50:31", "remaining_time": "2:34:46", "throughput": 8667.35, "total_tokens": 57475496} +{"current_steps": 85265, "total_steps": 204665, "loss": 0.0001, "lr": 1.4490035157121287e-06, "epoch": 2.0830381354897027, "percentage": 41.66, "elapsed_time": "1:50:31", "remaining_time": "2:34:46", "throughput": 8667.43, "total_tokens": 57479080} +{"current_steps": 85270, "total_steps": 204665, "loss": 0.0001, "lr": 1.4489273159869858e-06, "epoch": 2.08316028632155, "percentage": 41.66, "elapsed_time": "1:50:31", "remaining_time": "2:34:46", "throughput": 8667.49, "total_tokens": 57482536} +{"current_steps": 85275, "total_steps": 204665, "loss": 0.0003, "lr": 1.4488511129971226e-06, "epoch": 2.083282437153397, "percentage": 41.67, "elapsed_time": "1:50:32", "remaining_time": "2:34:45", "throughput": 8667.56, "total_tokens": 57486056} +{"current_steps": 85280, "total_steps": 204665, "loss": 0.062, "lr": 1.4487749067430931e-06, "epoch": 2.0834045879852443, "percentage": 41.67, "elapsed_time": "1:50:32", "remaining_time": "2:34:45", "throughput": 8667.62, "total_tokens": 57489448} +{"current_steps": 85285, "total_steps": 204665, "loss": 0.0004, "lr": 1.4486986972254525e-06, "epoch": 2.0835267388170915, "percentage": 41.67, "elapsed_time": "1:50:33", "remaining_time": "2:34:44", "throughput": 8667.63, "total_tokens": 57492520} +{"current_steps": 85290, "total_steps": 204665, "loss": 0.0004, "lr": 1.448622484444754e-06, "epoch": 2.0836488896489387, "percentage": 41.67, "elapsed_time": "1:50:33", "remaining_time": "2:34:44", "throughput": 8667.66, "total_tokens": 57495656} +{"current_steps": 85295, "total_steps": 204665, "loss": 0.0884, "lr": 1.448546268401552e-06, "epoch": 2.083771040480786, "percentage": 41.68, "elapsed_time": "1:50:33", "remaining_time": "2:34:43", "throughput": 8667.72, "total_tokens": 57499112} +{"current_steps": 85300, "total_steps": 204665, "loss": 0.1168, "lr": 1.4484700490964007e-06, "epoch": 2.083893191312633, "percentage": 41.68, "elapsed_time": "1:50:34", "remaining_time": "2:34:43", "throughput": 8667.78, "total_tokens": 57502568} +{"current_steps": 85305, "total_steps": 204665, "loss": 0.0005, "lr": 1.4483938265298545e-06, "epoch": 2.08401534214448, "percentage": 41.68, "elapsed_time": "1:50:34", "remaining_time": "2:34:42", "throughput": 8667.87, "total_tokens": 57506216} +{"current_steps": 85310, "total_steps": 204665, "loss": 0.0001, "lr": 1.448317600702468e-06, "epoch": 2.084137492976327, "percentage": 41.68, "elapsed_time": "1:50:34", "remaining_time": "2:34:42", "throughput": 8667.97, "total_tokens": 57509992} +{"current_steps": 85315, "total_steps": 204665, "loss": 0.0003, "lr": 1.4482413716147954e-06, "epoch": 2.084259643808174, "percentage": 41.69, "elapsed_time": "1:50:35", "remaining_time": "2:34:42", "throughput": 8668.01, "total_tokens": 57513256} +{"current_steps": 85320, "total_steps": 204665, "loss": 0.1029, "lr": 1.448165139267391e-06, "epoch": 2.0843817946400214, "percentage": 41.69, "elapsed_time": "1:50:35", "remaining_time": "2:34:41", "throughput": 8668.04, "total_tokens": 57516456} +{"current_steps": 85325, "total_steps": 204665, "loss": 0.0475, "lr": 1.448088903660809e-06, "epoch": 2.0845039454718686, "percentage": 41.69, "elapsed_time": "1:50:35", "remaining_time": "2:34:41", "throughput": 8668.16, "total_tokens": 57520424} +{"current_steps": 85330, "total_steps": 204665, "loss": 0.0373, "lr": 1.4480126647956044e-06, "epoch": 2.0846260963037158, "percentage": 41.69, "elapsed_time": "1:50:36", "remaining_time": "2:34:40", "throughput": 8668.22, "total_tokens": 57523816} +{"current_steps": 85335, "total_steps": 204665, "loss": 0.0637, "lr": 1.4479364226723308e-06, "epoch": 2.084748247135563, "percentage": 41.69, "elapsed_time": "1:50:36", "remaining_time": "2:34:40", "throughput": 8668.26, "total_tokens": 57527080} +{"current_steps": 85340, "total_steps": 204665, "loss": 0.0002, "lr": 1.447860177291543e-06, "epoch": 2.08487039796741, "percentage": 41.7, "elapsed_time": "1:50:36", "remaining_time": "2:34:39", "throughput": 8668.25, "total_tokens": 57529960} +{"current_steps": 85345, "total_steps": 204665, "loss": 0.1198, "lr": 1.4477839286537958e-06, "epoch": 2.0849925487992573, "percentage": 41.7, "elapsed_time": "1:50:37", "remaining_time": "2:34:39", "throughput": 8668.26, "total_tokens": 57532904} +{"current_steps": 85350, "total_steps": 204665, "loss": 0.0724, "lr": 1.4477076767596433e-06, "epoch": 2.0851146996311045, "percentage": 41.7, "elapsed_time": "1:50:37", "remaining_time": "2:34:38", "throughput": 8668.28, "total_tokens": 57535976} +{"current_steps": 85355, "total_steps": 204665, "loss": 0.0002, "lr": 1.4476314216096402e-06, "epoch": 2.0852368504629517, "percentage": 41.7, "elapsed_time": "1:50:37", "remaining_time": "2:34:38", "throughput": 8668.31, "total_tokens": 57539176} +{"current_steps": 85360, "total_steps": 204665, "loss": 0.0001, "lr": 1.4475551632043408e-06, "epoch": 2.085359001294799, "percentage": 41.71, "elapsed_time": "1:50:38", "remaining_time": "2:34:38", "throughput": 8668.36, "total_tokens": 57542504} +{"current_steps": 85365, "total_steps": 204665, "loss": 0.0807, "lr": 1.4474789015443001e-06, "epoch": 2.085481152126646, "percentage": 41.71, "elapsed_time": "1:50:38", "remaining_time": "2:34:37", "throughput": 8668.45, "total_tokens": 57546088} +{"current_steps": 85370, "total_steps": 204665, "loss": 0.0412, "lr": 1.4474026366300724e-06, "epoch": 2.0856033029584933, "percentage": 41.71, "elapsed_time": "1:50:38", "remaining_time": "2:34:37", "throughput": 8668.52, "total_tokens": 57549672} +{"current_steps": 85375, "total_steps": 204665, "loss": 0.093, "lr": 1.4473263684622124e-06, "epoch": 2.0857254537903405, "percentage": 41.71, "elapsed_time": "1:50:39", "remaining_time": "2:34:36", "throughput": 8668.61, "total_tokens": 57553320} +{"current_steps": 85380, "total_steps": 204665, "loss": 0.0004, "lr": 1.4472500970412747e-06, "epoch": 2.0858476046221877, "percentage": 41.72, "elapsed_time": "1:50:39", "remaining_time": "2:34:36", "throughput": 8668.64, "total_tokens": 57556520} +{"current_steps": 85385, "total_steps": 204665, "loss": 0.0426, "lr": 1.4471738223678141e-06, "epoch": 2.085969755454035, "percentage": 41.72, "elapsed_time": "1:50:39", "remaining_time": "2:34:35", "throughput": 8668.71, "total_tokens": 57559976} +{"current_steps": 85390, "total_steps": 204665, "loss": 0.0004, "lr": 1.4470975444423853e-06, "epoch": 2.0860919062858816, "percentage": 41.72, "elapsed_time": "1:50:40", "remaining_time": "2:34:35", "throughput": 8668.81, "total_tokens": 57563752} +{"current_steps": 85395, "total_steps": 204665, "loss": 0.0388, "lr": 1.4470212632655425e-06, "epoch": 2.086214057117729, "percentage": 41.72, "elapsed_time": "1:50:40", "remaining_time": "2:34:34", "throughput": 8668.86, "total_tokens": 57567080} +{"current_steps": 85400, "total_steps": 204665, "loss": 0.0465, "lr": 1.4469449788378411e-06, "epoch": 2.086336207949576, "percentage": 41.73, "elapsed_time": "1:50:41", "remaining_time": "2:34:34", "throughput": 8668.93, "total_tokens": 57570600} +{"current_steps": 85405, "total_steps": 204665, "loss": 0.0002, "lr": 1.4468686911598356e-06, "epoch": 2.086458358781423, "percentage": 41.73, "elapsed_time": "1:50:41", "remaining_time": "2:34:34", "throughput": 8668.95, "total_tokens": 57573672} +{"current_steps": 85410, "total_steps": 204665, "loss": 0.0926, "lr": 1.4467924002320807e-06, "epoch": 2.0865805096132704, "percentage": 41.73, "elapsed_time": "1:50:41", "remaining_time": "2:34:33", "throughput": 8668.99, "total_tokens": 57576872} +{"current_steps": 85415, "total_steps": 204665, "loss": 0.0384, "lr": 1.4467161060551313e-06, "epoch": 2.0867026604451175, "percentage": 41.73, "elapsed_time": "1:50:42", "remaining_time": "2:34:33", "throughput": 8669.01, "total_tokens": 57580008} +{"current_steps": 85420, "total_steps": 204665, "loss": 0.0281, "lr": 1.4466398086295422e-06, "epoch": 2.0868248112769647, "percentage": 41.74, "elapsed_time": "1:50:42", "remaining_time": "2:34:32", "throughput": 8669.07, "total_tokens": 57583400} +{"current_steps": 85425, "total_steps": 204665, "loss": 0.0689, "lr": 1.4465635079558683e-06, "epoch": 2.086946962108812, "percentage": 41.74, "elapsed_time": "1:50:42", "remaining_time": "2:34:32", "throughput": 8669.12, "total_tokens": 57586728} +{"current_steps": 85430, "total_steps": 204665, "loss": 0.0012, "lr": 1.4464872040346646e-06, "epoch": 2.087069112940659, "percentage": 41.74, "elapsed_time": "1:50:43", "remaining_time": "2:34:31", "throughput": 8669.17, "total_tokens": 57590120} +{"current_steps": 85435, "total_steps": 204665, "loss": 0.0006, "lr": 1.4464108968664857e-06, "epoch": 2.0871912637725063, "percentage": 41.74, "elapsed_time": "1:50:43", "remaining_time": "2:34:31", "throughput": 8669.28, "total_tokens": 57593896} +{"current_steps": 85440, "total_steps": 204665, "loss": 0.0002, "lr": 1.4463345864518867e-06, "epoch": 2.0873134146043535, "percentage": 41.75, "elapsed_time": "1:50:43", "remaining_time": "2:34:30", "throughput": 8669.32, "total_tokens": 57597160} +{"current_steps": 85445, "total_steps": 204665, "loss": 0.048, "lr": 1.4462582727914228e-06, "epoch": 2.0874355654362007, "percentage": 41.75, "elapsed_time": "1:50:44", "remaining_time": "2:34:30", "throughput": 8669.35, "total_tokens": 57600360} +{"current_steps": 85450, "total_steps": 204665, "loss": 0.038, "lr": 1.4461819558856484e-06, "epoch": 2.087557716268048, "percentage": 41.75, "elapsed_time": "1:50:44", "remaining_time": "2:34:30", "throughput": 8669.38, "total_tokens": 57603560} +{"current_steps": 85455, "total_steps": 204665, "loss": 0.0011, "lr": 1.446105635735119e-06, "epoch": 2.087679867099895, "percentage": 41.75, "elapsed_time": "1:50:44", "remaining_time": "2:34:29", "throughput": 8669.45, "total_tokens": 57607016} +{"current_steps": 85460, "total_steps": 204665, "loss": 0.0248, "lr": 1.4460293123403893e-06, "epoch": 2.0878020179317422, "percentage": 41.76, "elapsed_time": "1:50:45", "remaining_time": "2:34:29", "throughput": 8669.49, "total_tokens": 57610344} +{"current_steps": 85465, "total_steps": 204665, "loss": 0.0407, "lr": 1.4459529857020144e-06, "epoch": 2.0879241687635894, "percentage": 41.76, "elapsed_time": "1:50:45", "remaining_time": "2:34:28", "throughput": 8669.58, "total_tokens": 57613992} +{"current_steps": 85470, "total_steps": 204665, "loss": 0.0002, "lr": 1.4458766558205495e-06, "epoch": 2.0880463195954366, "percentage": 41.76, "elapsed_time": "1:50:45", "remaining_time": "2:34:28", "throughput": 8669.62, "total_tokens": 57617320} +{"current_steps": 85475, "total_steps": 204665, "loss": 0.0002, "lr": 1.4458003226965496e-06, "epoch": 2.088168470427284, "percentage": 41.76, "elapsed_time": "1:50:46", "remaining_time": "2:34:27", "throughput": 8669.69, "total_tokens": 57620776} +{"current_steps": 85480, "total_steps": 204665, "loss": 0.0502, "lr": 1.4457239863305702e-06, "epoch": 2.0882906212591306, "percentage": 41.77, "elapsed_time": "1:50:46", "remaining_time": "2:34:27", "throughput": 8669.76, "total_tokens": 57624296} +{"current_steps": 85485, "total_steps": 204665, "loss": 0.0003, "lr": 1.4456476467231658e-06, "epoch": 2.0884127720909778, "percentage": 41.77, "elapsed_time": "1:50:46", "remaining_time": "2:34:26", "throughput": 8669.86, "total_tokens": 57628072} +{"current_steps": 85490, "total_steps": 204665, "loss": 0.0523, "lr": 1.4455713038748918e-06, "epoch": 2.088534922922825, "percentage": 41.77, "elapsed_time": "1:50:47", "remaining_time": "2:34:26", "throughput": 8669.89, "total_tokens": 57631208} +{"current_steps": 85495, "total_steps": 204665, "loss": 0.0003, "lr": 1.4454949577863036e-06, "epoch": 2.088657073754672, "percentage": 41.77, "elapsed_time": "1:50:47", "remaining_time": "2:34:26", "throughput": 8669.92, "total_tokens": 57634344} +{"current_steps": 85500, "total_steps": 204665, "loss": 0.002, "lr": 1.4454186084579561e-06, "epoch": 2.0887792245865193, "percentage": 41.78, "elapsed_time": "1:50:47", "remaining_time": "2:34:25", "throughput": 8670.01, "total_tokens": 57637992} +{"current_steps": 85505, "total_steps": 204665, "loss": 0.0525, "lr": 1.4453422558904047e-06, "epoch": 2.0889013754183665, "percentage": 41.78, "elapsed_time": "1:50:48", "remaining_time": "2:34:25", "throughput": 8670.03, "total_tokens": 57641128} +{"current_steps": 85510, "total_steps": 204665, "loss": 0.0004, "lr": 1.4452659000842047e-06, "epoch": 2.0890235262502137, "percentage": 41.78, "elapsed_time": "1:50:48", "remaining_time": "2:34:24", "throughput": 8670.09, "total_tokens": 57644520} +{"current_steps": 85515, "total_steps": 204665, "loss": 0.0081, "lr": 1.4451895410399111e-06, "epoch": 2.089145677082061, "percentage": 41.78, "elapsed_time": "1:50:48", "remaining_time": "2:34:24", "throughput": 8670.09, "total_tokens": 57647400} +{"current_steps": 85520, "total_steps": 204665, "loss": 0.0002, "lr": 1.4451131787580795e-06, "epoch": 2.089267827913908, "percentage": 41.79, "elapsed_time": "1:50:49", "remaining_time": "2:34:23", "throughput": 8670.14, "total_tokens": 57650728} +{"current_steps": 85525, "total_steps": 204665, "loss": 0.0126, "lr": 1.4450368132392652e-06, "epoch": 2.0893899787457553, "percentage": 41.79, "elapsed_time": "1:50:49", "remaining_time": "2:34:23", "throughput": 8670.2, "total_tokens": 57654120} +{"current_steps": 85530, "total_steps": 204665, "loss": 0.0686, "lr": 1.4449604444840236e-06, "epoch": 2.0895121295776025, "percentage": 41.79, "elapsed_time": "1:50:50", "remaining_time": "2:34:22", "throughput": 8670.3, "total_tokens": 57657896} +{"current_steps": 85535, "total_steps": 204665, "loss": 0.0994, "lr": 1.4448840724929098e-06, "epoch": 2.0896342804094497, "percentage": 41.79, "elapsed_time": "1:50:50", "remaining_time": "2:34:22", "throughput": 8670.34, "total_tokens": 57661224} +{"current_steps": 85540, "total_steps": 204665, "loss": 0.0443, "lr": 1.4448076972664795e-06, "epoch": 2.089756431241297, "percentage": 41.8, "elapsed_time": "1:50:50", "remaining_time": "2:34:21", "throughput": 8670.36, "total_tokens": 57664296} +{"current_steps": 85545, "total_steps": 204665, "loss": 0.0002, "lr": 1.4447313188052878e-06, "epoch": 2.089878582073144, "percentage": 41.8, "elapsed_time": "1:50:51", "remaining_time": "2:34:21", "throughput": 8670.44, "total_tokens": 57667816} +{"current_steps": 85550, "total_steps": 204665, "loss": 0.0417, "lr": 1.4446549371098907e-06, "epoch": 2.090000732904991, "percentage": 41.8, "elapsed_time": "1:50:51", "remaining_time": "2:34:21", "throughput": 8670.45, "total_tokens": 57670824} +{"current_steps": 85555, "total_steps": 204665, "loss": 0.1099, "lr": 1.4445785521808428e-06, "epoch": 2.0901228837368384, "percentage": 41.8, "elapsed_time": "1:50:51", "remaining_time": "2:34:20", "throughput": 8670.59, "total_tokens": 57674920} +{"current_steps": 85560, "total_steps": 204665, "loss": 0.0588, "lr": 1.4445021640187005e-06, "epoch": 2.0902450345686856, "percentage": 41.8, "elapsed_time": "1:50:52", "remaining_time": "2:34:20", "throughput": 8670.63, "total_tokens": 57678184} +{"current_steps": 85565, "total_steps": 204665, "loss": 0.0002, "lr": 1.4444257726240187e-06, "epoch": 2.090367185400533, "percentage": 41.81, "elapsed_time": "1:50:52", "remaining_time": "2:34:19", "throughput": 8670.65, "total_tokens": 57681256} +{"current_steps": 85570, "total_steps": 204665, "loss": 0.0317, "lr": 1.4443493779973533e-06, "epoch": 2.0904893362323795, "percentage": 41.81, "elapsed_time": "1:50:52", "remaining_time": "2:34:19", "throughput": 8670.69, "total_tokens": 57684456} +{"current_steps": 85575, "total_steps": 204665, "loss": 0.0002, "lr": 1.4442729801392597e-06, "epoch": 2.0906114870642267, "percentage": 41.81, "elapsed_time": "1:50:53", "remaining_time": "2:34:18", "throughput": 8670.76, "total_tokens": 57687912} +{"current_steps": 85580, "total_steps": 204665, "loss": 0.0002, "lr": 1.4441965790502933e-06, "epoch": 2.090733637896074, "percentage": 41.81, "elapsed_time": "1:50:53", "remaining_time": "2:34:18", "throughput": 8670.81, "total_tokens": 57691240} +{"current_steps": 85585, "total_steps": 204665, "loss": 0.0367, "lr": 1.44412017473101e-06, "epoch": 2.090855788727921, "percentage": 41.82, "elapsed_time": "1:50:53", "remaining_time": "2:34:17", "throughput": 8670.86, "total_tokens": 57694632} +{"current_steps": 85590, "total_steps": 204665, "loss": 0.0003, "lr": 1.4440437671819652e-06, "epoch": 2.0909779395597683, "percentage": 41.82, "elapsed_time": "1:50:54", "remaining_time": "2:34:17", "throughput": 8670.91, "total_tokens": 57697960} +{"current_steps": 85595, "total_steps": 204665, "loss": 0.0392, "lr": 1.4439673564037152e-06, "epoch": 2.0911000903916155, "percentage": 41.82, "elapsed_time": "1:50:54", "remaining_time": "2:34:17", "throughput": 8671.05, "total_tokens": 57701992} +{"current_steps": 85600, "total_steps": 204665, "loss": 0.0001, "lr": 1.4438909423968148e-06, "epoch": 2.0912222412234627, "percentage": 41.82, "elapsed_time": "1:50:54", "remaining_time": "2:34:16", "throughput": 8671.09, "total_tokens": 57705320} +{"current_steps": 85605, "total_steps": 204665, "loss": 0.0004, "lr": 1.4438145251618198e-06, "epoch": 2.09134439205531, "percentage": 41.83, "elapsed_time": "1:50:55", "remaining_time": "2:34:16", "throughput": 8671.11, "total_tokens": 57708328} +{"current_steps": 85610, "total_steps": 204665, "loss": 0.0731, "lr": 1.4437381046992865e-06, "epoch": 2.091466542887157, "percentage": 41.83, "elapsed_time": "1:50:55", "remaining_time": "2:34:15", "throughput": 8671.14, "total_tokens": 57711528} +{"current_steps": 85615, "total_steps": 204665, "loss": 0.0679, "lr": 1.4436616810097704e-06, "epoch": 2.0915886937190042, "percentage": 41.83, "elapsed_time": "1:50:55", "remaining_time": "2:34:15", "throughput": 8671.19, "total_tokens": 57714856} +{"current_steps": 85620, "total_steps": 204665, "loss": 0.053, "lr": 1.4435852540938272e-06, "epoch": 2.0917108445508514, "percentage": 41.83, "elapsed_time": "1:50:56", "remaining_time": "2:34:14", "throughput": 8671.22, "total_tokens": 57717992} +{"current_steps": 85625, "total_steps": 204665, "loss": 0.0002, "lr": 1.4435088239520125e-06, "epoch": 2.0918329953826986, "percentage": 41.84, "elapsed_time": "1:50:56", "remaining_time": "2:34:14", "throughput": 8671.34, "total_tokens": 57721896} +{"current_steps": 85630, "total_steps": 204665, "loss": 0.0005, "lr": 1.4434323905848826e-06, "epoch": 2.091955146214546, "percentage": 41.84, "elapsed_time": "1:50:56", "remaining_time": "2:34:13", "throughput": 8671.4, "total_tokens": 57725352} +{"current_steps": 85635, "total_steps": 204665, "loss": 0.0002, "lr": 1.443355953992993e-06, "epoch": 2.092077297046393, "percentage": 41.84, "elapsed_time": "1:50:57", "remaining_time": "2:34:13", "throughput": 8671.46, "total_tokens": 57728744} +{"current_steps": 85640, "total_steps": 204665, "loss": 0.0466, "lr": 1.4432795141768999e-06, "epoch": 2.09219944787824, "percentage": 41.84, "elapsed_time": "1:50:57", "remaining_time": "2:34:13", "throughput": 8671.46, "total_tokens": 57731624} +{"current_steps": 85645, "total_steps": 204665, "loss": 0.0797, "lr": 1.4432030711371586e-06, "epoch": 2.0923215987100874, "percentage": 41.85, "elapsed_time": "1:50:58", "remaining_time": "2:34:12", "throughput": 8671.48, "total_tokens": 57734760} +{"current_steps": 85650, "total_steps": 204665, "loss": 0.0006, "lr": 1.4431266248743254e-06, "epoch": 2.0924437495419346, "percentage": 41.85, "elapsed_time": "1:50:58", "remaining_time": "2:34:12", "throughput": 8671.5, "total_tokens": 57737832} +{"current_steps": 85655, "total_steps": 204665, "loss": 0.0002, "lr": 1.4430501753889563e-06, "epoch": 2.0925659003737813, "percentage": 41.85, "elapsed_time": "1:50:58", "remaining_time": "2:34:11", "throughput": 8671.56, "total_tokens": 57741288} +{"current_steps": 85660, "total_steps": 204665, "loss": 0.0001, "lr": 1.4429737226816072e-06, "epoch": 2.0926880512056285, "percentage": 41.85, "elapsed_time": "1:50:59", "remaining_time": "2:34:11", "throughput": 8671.61, "total_tokens": 57744616} +{"current_steps": 85665, "total_steps": 204665, "loss": 0.0001, "lr": 1.4428972667528338e-06, "epoch": 2.0928102020374757, "percentage": 41.86, "elapsed_time": "1:50:59", "remaining_time": "2:34:10", "throughput": 8671.67, "total_tokens": 57748008} +{"current_steps": 85670, "total_steps": 204665, "loss": 0.0468, "lr": 1.4428208076031925e-06, "epoch": 2.092932352869323, "percentage": 41.86, "elapsed_time": "1:50:59", "remaining_time": "2:34:10", "throughput": 8671.67, "total_tokens": 57750952} +{"current_steps": 85675, "total_steps": 204665, "loss": 0.0002, "lr": 1.4427443452332392e-06, "epoch": 2.09305450370117, "percentage": 41.86, "elapsed_time": "1:51:00", "remaining_time": "2:34:09", "throughput": 8671.69, "total_tokens": 57754088} +{"current_steps": 85680, "total_steps": 204665, "loss": 0.0001, "lr": 1.4426678796435301e-06, "epoch": 2.0931766545330173, "percentage": 41.86, "elapsed_time": "1:51:00", "remaining_time": "2:34:09", "throughput": 8671.75, "total_tokens": 57757480} +{"current_steps": 85685, "total_steps": 204665, "loss": 0.0001, "lr": 1.4425914108346209e-06, "epoch": 2.0932988053648645, "percentage": 41.87, "elapsed_time": "1:51:00", "remaining_time": "2:34:08", "throughput": 8671.81, "total_tokens": 57760872} +{"current_steps": 85690, "total_steps": 204665, "loss": 0.0001, "lr": 1.442514938807068e-06, "epoch": 2.0934209561967116, "percentage": 41.87, "elapsed_time": "1:51:01", "remaining_time": "2:34:08", "throughput": 8671.9, "total_tokens": 57764584} +{"current_steps": 85695, "total_steps": 204665, "loss": 0.0003, "lr": 1.4424384635614274e-06, "epoch": 2.093543107028559, "percentage": 41.87, "elapsed_time": "1:51:01", "remaining_time": "2:34:08", "throughput": 8671.97, "total_tokens": 57768104} +{"current_steps": 85700, "total_steps": 204665, "loss": 0.0001, "lr": 1.4423619850982554e-06, "epoch": 2.093665257860406, "percentage": 41.87, "elapsed_time": "1:51:01", "remaining_time": "2:34:07", "throughput": 8671.98, "total_tokens": 57771048} +{"current_steps": 85705, "total_steps": 204665, "loss": 0.1421, "lr": 1.442285503418108e-06, "epoch": 2.093787408692253, "percentage": 41.88, "elapsed_time": "1:51:02", "remaining_time": "2:34:07", "throughput": 8672.01, "total_tokens": 57774248} +{"current_steps": 85710, "total_steps": 204665, "loss": 0.0003, "lr": 1.4422090185215413e-06, "epoch": 2.0939095595241004, "percentage": 41.88, "elapsed_time": "1:51:02", "remaining_time": "2:34:06", "throughput": 8672.08, "total_tokens": 57777832} +{"current_steps": 85715, "total_steps": 204665, "loss": 0.0822, "lr": 1.4421325304091118e-06, "epoch": 2.0940317103559476, "percentage": 41.88, "elapsed_time": "1:51:02", "remaining_time": "2:34:06", "throughput": 8672.12, "total_tokens": 57781032} +{"current_steps": 85720, "total_steps": 204665, "loss": 0.0003, "lr": 1.4420560390813755e-06, "epoch": 2.094153861187795, "percentage": 41.88, "elapsed_time": "1:51:03", "remaining_time": "2:34:05", "throughput": 8672.21, "total_tokens": 57784744} +{"current_steps": 85725, "total_steps": 204665, "loss": 0.033, "lr": 1.4419795445388892e-06, "epoch": 2.094276012019642, "percentage": 41.89, "elapsed_time": "1:51:03", "remaining_time": "2:34:05", "throughput": 8672.33, "total_tokens": 57788648} +{"current_steps": 85730, "total_steps": 204665, "loss": 0.0001, "lr": 1.4419030467822084e-06, "epoch": 2.094398162851489, "percentage": 41.89, "elapsed_time": "1:51:03", "remaining_time": "2:34:04", "throughput": 8672.41, "total_tokens": 57792232} +{"current_steps": 85735, "total_steps": 204665, "loss": 0.0003, "lr": 1.4418265458118897e-06, "epoch": 2.0945203136833364, "percentage": 41.89, "elapsed_time": "1:51:04", "remaining_time": "2:34:04", "throughput": 8672.47, "total_tokens": 57795624} +{"current_steps": 85740, "total_steps": 204665, "loss": 0.0614, "lr": 1.4417500416284898e-06, "epoch": 2.0946424645151835, "percentage": 41.89, "elapsed_time": "1:51:04", "remaining_time": "2:34:04", "throughput": 8672.52, "total_tokens": 57798888} +{"current_steps": 85745, "total_steps": 204665, "loss": 0.0001, "lr": 1.4416735342325646e-06, "epoch": 2.0947646153470307, "percentage": 41.9, "elapsed_time": "1:51:04", "remaining_time": "2:34:03", "throughput": 8672.57, "total_tokens": 57802280} +{"current_steps": 85750, "total_steps": 204665, "loss": 0.0057, "lr": 1.441597023624671e-06, "epoch": 2.0948867661788775, "percentage": 41.9, "elapsed_time": "1:51:05", "remaining_time": "2:34:03", "throughput": 8672.63, "total_tokens": 57805736} +{"current_steps": 85755, "total_steps": 204665, "loss": 0.0001, "lr": 1.4415205098053647e-06, "epoch": 2.0950089170107247, "percentage": 41.9, "elapsed_time": "1:51:05", "remaining_time": "2:34:02", "throughput": 8672.67, "total_tokens": 57809000} +{"current_steps": 85760, "total_steps": 204665, "loss": 0.0598, "lr": 1.4414439927752026e-06, "epoch": 2.095131067842572, "percentage": 41.9, "elapsed_time": "1:51:06", "remaining_time": "2:34:02", "throughput": 8672.77, "total_tokens": 57812712} +{"current_steps": 85765, "total_steps": 204665, "loss": 0.0097, "lr": 1.4413674725347408e-06, "epoch": 2.095253218674419, "percentage": 41.91, "elapsed_time": "1:51:06", "remaining_time": "2:34:01", "throughput": 8672.79, "total_tokens": 57815848} +{"current_steps": 85770, "total_steps": 204665, "loss": 0.0621, "lr": 1.4412909490845364e-06, "epoch": 2.0953753695062662, "percentage": 41.91, "elapsed_time": "1:51:06", "remaining_time": "2:34:01", "throughput": 8672.89, "total_tokens": 57819560} +{"current_steps": 85775, "total_steps": 204665, "loss": 0.0743, "lr": 1.4412144224251454e-06, "epoch": 2.0954975203381134, "percentage": 41.91, "elapsed_time": "1:51:07", "remaining_time": "2:34:00", "throughput": 8672.93, "total_tokens": 57822824} +{"current_steps": 85780, "total_steps": 204665, "loss": 0.0001, "lr": 1.4411378925571246e-06, "epoch": 2.0956196711699606, "percentage": 41.91, "elapsed_time": "1:51:07", "remaining_time": "2:34:00", "throughput": 8673.05, "total_tokens": 57826792} +{"current_steps": 85785, "total_steps": 204665, "loss": 0.0003, "lr": 1.4410613594810302e-06, "epoch": 2.095741822001808, "percentage": 41.91, "elapsed_time": "1:51:07", "remaining_time": "2:34:00", "throughput": 8673.14, "total_tokens": 57830440} +{"current_steps": 85790, "total_steps": 204665, "loss": 0.0003, "lr": 1.440984823197419e-06, "epoch": 2.095863972833655, "percentage": 41.92, "elapsed_time": "1:51:08", "remaining_time": "2:33:59", "throughput": 8673.16, "total_tokens": 57833576} +{"current_steps": 85795, "total_steps": 204665, "loss": 0.0001, "lr": 1.4409082837068476e-06, "epoch": 2.095986123665502, "percentage": 41.92, "elapsed_time": "1:51:08", "remaining_time": "2:33:59", "throughput": 8673.22, "total_tokens": 57836968} +{"current_steps": 85800, "total_steps": 204665, "loss": 0.0003, "lr": 1.4408317410098725e-06, "epoch": 2.0961082744973494, "percentage": 41.92, "elapsed_time": "1:51:08", "remaining_time": "2:33:58", "throughput": 8673.24, "total_tokens": 57840104} +{"current_steps": 85805, "total_steps": 204665, "loss": 0.0468, "lr": 1.4407551951070504e-06, "epoch": 2.0962304253291966, "percentage": 41.92, "elapsed_time": "1:51:09", "remaining_time": "2:33:58", "throughput": 8673.28, "total_tokens": 57843304} +{"current_steps": 85810, "total_steps": 204665, "loss": 0.1659, "lr": 1.440678645998938e-06, "epoch": 2.0963525761610438, "percentage": 41.93, "elapsed_time": "1:51:09", "remaining_time": "2:33:57", "throughput": 8673.32, "total_tokens": 57846568} +{"current_steps": 85815, "total_steps": 204665, "loss": 0.0002, "lr": 1.4406020936860921e-06, "epoch": 2.096474726992891, "percentage": 41.93, "elapsed_time": "1:51:09", "remaining_time": "2:33:57", "throughput": 8673.37, "total_tokens": 57849896} +{"current_steps": 85820, "total_steps": 204665, "loss": 0.0798, "lr": 1.4405255381690692e-06, "epoch": 2.096596877824738, "percentage": 41.93, "elapsed_time": "1:51:10", "remaining_time": "2:33:56", "throughput": 8673.39, "total_tokens": 57852968} +{"current_steps": 85825, "total_steps": 204665, "loss": 0.0424, "lr": 1.440448979448426e-06, "epoch": 2.0967190286565853, "percentage": 41.93, "elapsed_time": "1:51:10", "remaining_time": "2:33:56", "throughput": 8673.51, "total_tokens": 57856936} +{"current_steps": 85830, "total_steps": 204665, "loss": 0.0004, "lr": 1.4403724175247191e-06, "epoch": 2.0968411794884325, "percentage": 41.94, "elapsed_time": "1:51:10", "remaining_time": "2:33:56", "throughput": 8673.52, "total_tokens": 57859880} +{"current_steps": 85835, "total_steps": 204665, "loss": 0.0001, "lr": 1.4402958523985061e-06, "epoch": 2.0969633303202793, "percentage": 41.94, "elapsed_time": "1:51:11", "remaining_time": "2:33:55", "throughput": 8673.55, "total_tokens": 57863080} +{"current_steps": 85840, "total_steps": 204665, "loss": 0.0918, "lr": 1.440219284070343e-06, "epoch": 2.0970854811521265, "percentage": 41.94, "elapsed_time": "1:51:11", "remaining_time": "2:33:55", "throughput": 8673.58, "total_tokens": 57866280} +{"current_steps": 85845, "total_steps": 204665, "loss": 0.0002, "lr": 1.4401427125407866e-06, "epoch": 2.0972076319839736, "percentage": 41.94, "elapsed_time": "1:51:11", "remaining_time": "2:33:54", "throughput": 8673.64, "total_tokens": 57869672} +{"current_steps": 85850, "total_steps": 204665, "loss": 0.0704, "lr": 1.4400661378103944e-06, "epoch": 2.097329782815821, "percentage": 41.95, "elapsed_time": "1:51:12", "remaining_time": "2:33:54", "throughput": 8673.66, "total_tokens": 57872744} +{"current_steps": 85855, "total_steps": 204665, "loss": 0.0001, "lr": 1.4399895598797226e-06, "epoch": 2.097451933647668, "percentage": 41.95, "elapsed_time": "1:51:12", "remaining_time": "2:33:53", "throughput": 8673.71, "total_tokens": 57876136} +{"current_steps": 85860, "total_steps": 204665, "loss": 0.0378, "lr": 1.4399129787493288e-06, "epoch": 2.097574084479515, "percentage": 41.95, "elapsed_time": "1:51:12", "remaining_time": "2:33:53", "throughput": 8673.79, "total_tokens": 57879656} +{"current_steps": 85865, "total_steps": 204665, "loss": 0.0577, "lr": 1.4398363944197688e-06, "epoch": 2.0976962353113624, "percentage": 41.95, "elapsed_time": "1:51:13", "remaining_time": "2:33:52", "throughput": 8673.83, "total_tokens": 57882984} +{"current_steps": 85870, "total_steps": 204665, "loss": 0.0002, "lr": 1.439759806891601e-06, "epoch": 2.0978183861432096, "percentage": 41.96, "elapsed_time": "1:51:13", "remaining_time": "2:33:52", "throughput": 8673.89, "total_tokens": 57886440} +{"current_steps": 85875, "total_steps": 204665, "loss": 0.0005, "lr": 1.4396832161653811e-06, "epoch": 2.097940536975057, "percentage": 41.96, "elapsed_time": "1:51:13", "remaining_time": "2:33:52", "throughput": 8673.9, "total_tokens": 57889448} +{"current_steps": 85880, "total_steps": 204665, "loss": 0.03, "lr": 1.4396066222416668e-06, "epoch": 2.098062687806904, "percentage": 41.96, "elapsed_time": "1:51:14", "remaining_time": "2:33:51", "throughput": 8673.91, "total_tokens": 57892520} +{"current_steps": 85885, "total_steps": 204665, "loss": 0.0006, "lr": 1.4395300251210147e-06, "epoch": 2.098184838638751, "percentage": 41.96, "elapsed_time": "1:51:14", "remaining_time": "2:33:51", "throughput": 8673.92, "total_tokens": 57895528} +{"current_steps": 85890, "total_steps": 204665, "loss": 0.0001, "lr": 1.439453424803982e-06, "epoch": 2.0983069894705983, "percentage": 41.97, "elapsed_time": "1:51:15", "remaining_time": "2:33:50", "throughput": 8673.95, "total_tokens": 57898728} +{"current_steps": 85895, "total_steps": 204665, "loss": 0.0002, "lr": 1.4393768212911259e-06, "epoch": 2.0984291403024455, "percentage": 41.97, "elapsed_time": "1:51:15", "remaining_time": "2:33:50", "throughput": 8673.99, "total_tokens": 57901992} +{"current_steps": 85900, "total_steps": 204665, "loss": 0.0879, "lr": 1.4393002145830035e-06, "epoch": 2.0985512911342927, "percentage": 41.97, "elapsed_time": "1:51:15", "remaining_time": "2:33:49", "throughput": 8674.09, "total_tokens": 57905704} +{"current_steps": 85905, "total_steps": 204665, "loss": 0.0004, "lr": 1.439223604680172e-06, "epoch": 2.09867344196614, "percentage": 41.97, "elapsed_time": "1:51:16", "remaining_time": "2:33:49", "throughput": 8674.17, "total_tokens": 57909288} +{"current_steps": 85910, "total_steps": 204665, "loss": 0.0002, "lr": 1.439146991583188e-06, "epoch": 2.098795592797987, "percentage": 41.98, "elapsed_time": "1:51:16", "remaining_time": "2:33:48", "throughput": 8674.23, "total_tokens": 57912744} +{"current_steps": 85915, "total_steps": 204665, "loss": 0.0537, "lr": 1.439070375292609e-06, "epoch": 2.0989177436298343, "percentage": 41.98, "elapsed_time": "1:51:16", "remaining_time": "2:33:48", "throughput": 8674.31, "total_tokens": 57916328} +{"current_steps": 85920, "total_steps": 204665, "loss": 0.0002, "lr": 1.4389937558089919e-06, "epoch": 2.0990398944616815, "percentage": 41.98, "elapsed_time": "1:51:17", "remaining_time": "2:33:48", "throughput": 8674.36, "total_tokens": 57919720} +{"current_steps": 85925, "total_steps": 204665, "loss": 0.1204, "lr": 1.4389171331328945e-06, "epoch": 2.0991620452935287, "percentage": 41.98, "elapsed_time": "1:51:17", "remaining_time": "2:33:47", "throughput": 8674.43, "total_tokens": 57923240} +{"current_steps": 85930, "total_steps": 204665, "loss": 0.0001, "lr": 1.4388405072648735e-06, "epoch": 2.0992841961253754, "percentage": 41.99, "elapsed_time": "1:51:17", "remaining_time": "2:33:47", "throughput": 8674.47, "total_tokens": 57926440} +{"current_steps": 85935, "total_steps": 204665, "loss": 0.0001, "lr": 1.4387638782054863e-06, "epoch": 2.0994063469572226, "percentage": 41.99, "elapsed_time": "1:51:18", "remaining_time": "2:33:46", "throughput": 8674.55, "total_tokens": 57930024} +{"current_steps": 85940, "total_steps": 204665, "loss": 0.0444, "lr": 1.4386872459552902e-06, "epoch": 2.09952849778907, "percentage": 41.99, "elapsed_time": "1:51:18", "remaining_time": "2:33:46", "throughput": 8674.58, "total_tokens": 57933224} +{"current_steps": 85945, "total_steps": 204665, "loss": 0.0002, "lr": 1.4386106105148425e-06, "epoch": 2.099650648620917, "percentage": 41.99, "elapsed_time": "1:51:18", "remaining_time": "2:33:45", "throughput": 8674.67, "total_tokens": 57936936} +{"current_steps": 85950, "total_steps": 204665, "loss": 0.0001, "lr": 1.4385339718847002e-06, "epoch": 2.099772799452764, "percentage": 42.0, "elapsed_time": "1:51:19", "remaining_time": "2:33:45", "throughput": 8674.71, "total_tokens": 57940136} +{"current_steps": 85955, "total_steps": 204665, "loss": 0.049, "lr": 1.4384573300654213e-06, "epoch": 2.0998949502846114, "percentage": 42.0, "elapsed_time": "1:51:19", "remaining_time": "2:33:44", "throughput": 8674.83, "total_tokens": 57944040} +{"current_steps": 85960, "total_steps": 204665, "loss": 0.0636, "lr": 1.4383806850575627e-06, "epoch": 2.1000171011164586, "percentage": 42.0, "elapsed_time": "1:51:19", "remaining_time": "2:33:44", "throughput": 8674.89, "total_tokens": 57947496} +{"current_steps": 85965, "total_steps": 204665, "loss": 0.0, "lr": 1.4383040368616816e-06, "epoch": 2.1001392519483058, "percentage": 42.0, "elapsed_time": "1:51:20", "remaining_time": "2:33:44", "throughput": 8675.0, "total_tokens": 57951336} +{"current_steps": 85970, "total_steps": 204665, "loss": 0.0366, "lr": 1.4382273854783358e-06, "epoch": 2.100261402780153, "percentage": 42.01, "elapsed_time": "1:51:20", "remaining_time": "2:33:43", "throughput": 8675.08, "total_tokens": 57954920} +{"current_steps": 85975, "total_steps": 204665, "loss": 0.1009, "lr": 1.4381507309080827e-06, "epoch": 2.100383553612, "percentage": 42.01, "elapsed_time": "1:51:20", "remaining_time": "2:33:43", "throughput": 8675.13, "total_tokens": 57958312} +{"current_steps": 85980, "total_steps": 204665, "loss": 0.0002, "lr": 1.4380740731514793e-06, "epoch": 2.1005057044438473, "percentage": 42.01, "elapsed_time": "1:51:21", "remaining_time": "2:33:42", "throughput": 8675.22, "total_tokens": 57961960} +{"current_steps": 85985, "total_steps": 204665, "loss": 0.0796, "lr": 1.4379974122090835e-06, "epoch": 2.1006278552756945, "percentage": 42.01, "elapsed_time": "1:51:21", "remaining_time": "2:33:42", "throughput": 8675.27, "total_tokens": 57965352} +{"current_steps": 85990, "total_steps": 204665, "loss": 0.0002, "lr": 1.4379207480814527e-06, "epoch": 2.1007500061075417, "percentage": 42.02, "elapsed_time": "1:51:22", "remaining_time": "2:33:41", "throughput": 8675.3, "total_tokens": 57968488} +{"current_steps": 85995, "total_steps": 204665, "loss": 0.0513, "lr": 1.4378440807691447e-06, "epoch": 2.100872156939389, "percentage": 42.02, "elapsed_time": "1:51:22", "remaining_time": "2:33:41", "throughput": 8675.39, "total_tokens": 57972136} +{"current_steps": 86000, "total_steps": 204665, "loss": 0.0003, "lr": 1.4377674102727166e-06, "epoch": 2.100994307771236, "percentage": 42.02, "elapsed_time": "1:51:22", "remaining_time": "2:33:40", "throughput": 8675.43, "total_tokens": 57975464} +{"current_steps": 86005, "total_steps": 204665, "loss": 0.0002, "lr": 1.4376907365927262e-06, "epoch": 2.1011164586030833, "percentage": 42.02, "elapsed_time": "1:51:23", "remaining_time": "2:33:40", "throughput": 8675.51, "total_tokens": 57979048} +{"current_steps": 86010, "total_steps": 204665, "loss": 0.0003, "lr": 1.437614059729731e-06, "epoch": 2.1012386094349305, "percentage": 42.02, "elapsed_time": "1:51:23", "remaining_time": "2:33:40", "throughput": 8675.55, "total_tokens": 57982248} +{"current_steps": 86015, "total_steps": 204665, "loss": 0.0001, "lr": 1.4375373796842887e-06, "epoch": 2.101360760266777, "percentage": 42.03, "elapsed_time": "1:51:23", "remaining_time": "2:33:39", "throughput": 8675.6, "total_tokens": 57985640} +{"current_steps": 86020, "total_steps": 204665, "loss": 0.0001, "lr": 1.4374606964569569e-06, "epoch": 2.1014829110986244, "percentage": 42.03, "elapsed_time": "1:51:24", "remaining_time": "2:33:39", "throughput": 8675.63, "total_tokens": 57988776} +{"current_steps": 86025, "total_steps": 204665, "loss": 0.0001, "lr": 1.4373840100482932e-06, "epoch": 2.1016050619304716, "percentage": 42.03, "elapsed_time": "1:51:24", "remaining_time": "2:33:38", "throughput": 8675.69, "total_tokens": 57992296} +{"current_steps": 86030, "total_steps": 204665, "loss": 0.0388, "lr": 1.4373073204588556e-06, "epoch": 2.1017272127623188, "percentage": 42.03, "elapsed_time": "1:51:24", "remaining_time": "2:33:38", "throughput": 8675.74, "total_tokens": 57995624} +{"current_steps": 86035, "total_steps": 204665, "loss": 0.0002, "lr": 1.437230627689201e-06, "epoch": 2.101849363594166, "percentage": 42.04, "elapsed_time": "1:51:25", "remaining_time": "2:33:37", "throughput": 8675.8, "total_tokens": 57999016} +{"current_steps": 86040, "total_steps": 204665, "loss": 0.0003, "lr": 1.4371539317398882e-06, "epoch": 2.101971514426013, "percentage": 42.04, "elapsed_time": "1:51:25", "remaining_time": "2:33:37", "throughput": 8675.84, "total_tokens": 58002280} +{"current_steps": 86045, "total_steps": 204665, "loss": 0.0003, "lr": 1.437077232611474e-06, "epoch": 2.1020936652578603, "percentage": 42.04, "elapsed_time": "1:51:25", "remaining_time": "2:33:36", "throughput": 8675.86, "total_tokens": 58005416} +{"current_steps": 86050, "total_steps": 204665, "loss": 0.0501, "lr": 1.4370005303045168e-06, "epoch": 2.1022158160897075, "percentage": 42.04, "elapsed_time": "1:51:26", "remaining_time": "2:33:36", "throughput": 8675.9, "total_tokens": 58008680} +{"current_steps": 86055, "total_steps": 204665, "loss": 0.0001, "lr": 1.436923824819574e-06, "epoch": 2.1023379669215547, "percentage": 42.05, "elapsed_time": "1:51:26", "remaining_time": "2:33:36", "throughput": 8675.97, "total_tokens": 58012200} +{"current_steps": 86060, "total_steps": 204665, "loss": 0.0001, "lr": 1.4368471161572042e-06, "epoch": 2.102460117753402, "percentage": 42.05, "elapsed_time": "1:51:26", "remaining_time": "2:33:35", "throughput": 8676.05, "total_tokens": 58015784} +{"current_steps": 86065, "total_steps": 204665, "loss": 0.0371, "lr": 1.4367704043179643e-06, "epoch": 2.102582268585249, "percentage": 42.05, "elapsed_time": "1:51:27", "remaining_time": "2:33:35", "throughput": 8676.1, "total_tokens": 58019048} +{"current_steps": 86070, "total_steps": 204665, "loss": 0.0001, "lr": 1.4366936893024124e-06, "epoch": 2.1027044194170963, "percentage": 42.05, "elapsed_time": "1:51:27", "remaining_time": "2:33:34", "throughput": 8676.11, "total_tokens": 58022056} +{"current_steps": 86075, "total_steps": 204665, "loss": 0.0001, "lr": 1.4366169711111068e-06, "epoch": 2.1028265702489435, "percentage": 42.06, "elapsed_time": "1:51:27", "remaining_time": "2:33:34", "throughput": 8676.27, "total_tokens": 58026344} +{"current_steps": 86080, "total_steps": 204665, "loss": 0.0001, "lr": 1.4365402497446048e-06, "epoch": 2.1029487210807907, "percentage": 42.06, "elapsed_time": "1:51:28", "remaining_time": "2:33:33", "throughput": 8676.29, "total_tokens": 58029416} +{"current_steps": 86085, "total_steps": 204665, "loss": 0.0372, "lr": 1.436463525203465e-06, "epoch": 2.103070871912638, "percentage": 42.06, "elapsed_time": "1:51:28", "remaining_time": "2:33:33", "throughput": 8676.33, "total_tokens": 58032744} +{"current_steps": 86090, "total_steps": 204665, "loss": 0.0073, "lr": 1.4363867974882448e-06, "epoch": 2.103193022744485, "percentage": 42.06, "elapsed_time": "1:51:28", "remaining_time": "2:33:32", "throughput": 8676.41, "total_tokens": 58036328} +{"current_steps": 86095, "total_steps": 204665, "loss": 0.0002, "lr": 1.436310066599503e-06, "epoch": 2.1033151735763322, "percentage": 42.07, "elapsed_time": "1:51:29", "remaining_time": "2:33:32", "throughput": 8676.49, "total_tokens": 58039848} +{"current_steps": 86100, "total_steps": 204665, "loss": 0.0752, "lr": 1.4362333325377964e-06, "epoch": 2.1034373244081794, "percentage": 42.07, "elapsed_time": "1:51:29", "remaining_time": "2:33:32", "throughput": 8676.48, "total_tokens": 58042728} +{"current_steps": 86105, "total_steps": 204665, "loss": 0.0003, "lr": 1.436156595303684e-06, "epoch": 2.103559475240026, "percentage": 42.07, "elapsed_time": "1:51:30", "remaining_time": "2:33:31", "throughput": 8676.52, "total_tokens": 58045992} +{"current_steps": 86110, "total_steps": 204665, "loss": 0.0467, "lr": 1.4360798548977235e-06, "epoch": 2.1036816260718734, "percentage": 42.07, "elapsed_time": "1:51:30", "remaining_time": "2:33:31", "throughput": 8676.59, "total_tokens": 58049448} +{"current_steps": 86115, "total_steps": 204665, "loss": 0.045, "lr": 1.4360031113204729e-06, "epoch": 2.1038037769037206, "percentage": 42.08, "elapsed_time": "1:51:30", "remaining_time": "2:33:30", "throughput": 8676.64, "total_tokens": 58052776} +{"current_steps": 86120, "total_steps": 204665, "loss": 0.0454, "lr": 1.4359263645724905e-06, "epoch": 2.1039259277355677, "percentage": 42.08, "elapsed_time": "1:51:31", "remaining_time": "2:33:30", "throughput": 8676.68, "total_tokens": 58056040} +{"current_steps": 86125, "total_steps": 204665, "loss": 0.0878, "lr": 1.4358496146543343e-06, "epoch": 2.104048078567415, "percentage": 42.08, "elapsed_time": "1:51:31", "remaining_time": "2:33:29", "throughput": 8676.72, "total_tokens": 58059304} +{"current_steps": 86130, "total_steps": 204665, "loss": 0.0716, "lr": 1.4357728615665626e-06, "epoch": 2.104170229399262, "percentage": 42.08, "elapsed_time": "1:51:31", "remaining_time": "2:33:29", "throughput": 8676.76, "total_tokens": 58062568} +{"current_steps": 86135, "total_steps": 204665, "loss": 0.0006, "lr": 1.435696105309733e-06, "epoch": 2.1042923802311093, "percentage": 42.09, "elapsed_time": "1:51:32", "remaining_time": "2:33:28", "throughput": 8676.86, "total_tokens": 58066344} +{"current_steps": 86140, "total_steps": 204665, "loss": 0.0009, "lr": 1.4356193458844045e-06, "epoch": 2.1044145310629565, "percentage": 42.09, "elapsed_time": "1:51:32", "remaining_time": "2:33:28", "throughput": 8676.89, "total_tokens": 58069544} +{"current_steps": 86145, "total_steps": 204665, "loss": 0.0001, "lr": 1.4355425832911348e-06, "epoch": 2.1045366818948037, "percentage": 42.09, "elapsed_time": "1:51:32", "remaining_time": "2:33:28", "throughput": 8676.95, "total_tokens": 58072936} +{"current_steps": 86150, "total_steps": 204665, "loss": 0.0001, "lr": 1.4354658175304824e-06, "epoch": 2.104658832726651, "percentage": 42.09, "elapsed_time": "1:51:33", "remaining_time": "2:33:27", "throughput": 8677.08, "total_tokens": 58077032} +{"current_steps": 86155, "total_steps": 204665, "loss": 0.0608, "lr": 1.4353890486030054e-06, "epoch": 2.104780983558498, "percentage": 42.1, "elapsed_time": "1:51:33", "remaining_time": "2:33:27", "throughput": 8677.15, "total_tokens": 58080552} +{"current_steps": 86160, "total_steps": 204665, "loss": 0.0652, "lr": 1.4353122765092622e-06, "epoch": 2.1049031343903453, "percentage": 42.1, "elapsed_time": "1:51:33", "remaining_time": "2:33:26", "throughput": 8677.24, "total_tokens": 58084136} +{"current_steps": 86165, "total_steps": 204665, "loss": 0.0525, "lr": 1.435235501249811e-06, "epoch": 2.1050252852221925, "percentage": 42.1, "elapsed_time": "1:51:34", "remaining_time": "2:33:26", "throughput": 8677.32, "total_tokens": 58087720} +{"current_steps": 86170, "total_steps": 204665, "loss": 0.0515, "lr": 1.4351587228252102e-06, "epoch": 2.1051474360540396, "percentage": 42.1, "elapsed_time": "1:51:34", "remaining_time": "2:33:25", "throughput": 8677.34, "total_tokens": 58090856} +{"current_steps": 86175, "total_steps": 204665, "loss": 0.0427, "lr": 1.4350819412360182e-06, "epoch": 2.105269586885887, "percentage": 42.11, "elapsed_time": "1:51:34", "remaining_time": "2:33:25", "throughput": 8677.38, "total_tokens": 58094056} +{"current_steps": 86180, "total_steps": 204665, "loss": 0.0004, "lr": 1.4350051564827932e-06, "epoch": 2.105391737717734, "percentage": 42.11, "elapsed_time": "1:51:35", "remaining_time": "2:33:24", "throughput": 8677.48, "total_tokens": 58097832} +{"current_steps": 86185, "total_steps": 204665, "loss": 0.0803, "lr": 1.4349283685660935e-06, "epoch": 2.105513888549581, "percentage": 42.11, "elapsed_time": "1:51:35", "remaining_time": "2:33:24", "throughput": 8677.51, "total_tokens": 58101032} +{"current_steps": 86190, "total_steps": 204665, "loss": 0.0002, "lr": 1.434851577486478e-06, "epoch": 2.1056360393814284, "percentage": 42.11, "elapsed_time": "1:51:35", "remaining_time": "2:33:24", "throughput": 8677.52, "total_tokens": 58103976} +{"current_steps": 86195, "total_steps": 204665, "loss": 0.0004, "lr": 1.4347747832445047e-06, "epoch": 2.105758190213275, "percentage": 42.12, "elapsed_time": "1:51:36", "remaining_time": "2:33:23", "throughput": 8677.59, "total_tokens": 58107496} +{"current_steps": 86200, "total_steps": 204665, "loss": 0.0007, "lr": 1.4346979858407323e-06, "epoch": 2.1058803410451223, "percentage": 42.12, "elapsed_time": "1:51:36", "remaining_time": "2:33:23", "throughput": 8677.63, "total_tokens": 58110760} +{"current_steps": 86205, "total_steps": 204665, "loss": 0.0001, "lr": 1.434621185275719e-06, "epoch": 2.1060024918769695, "percentage": 42.12, "elapsed_time": "1:51:36", "remaining_time": "2:33:22", "throughput": 8677.7, "total_tokens": 58114280} +{"current_steps": 86210, "total_steps": 204665, "loss": 0.0036, "lr": 1.434544381550024e-06, "epoch": 2.1061246427088167, "percentage": 42.12, "elapsed_time": "1:51:37", "remaining_time": "2:33:22", "throughput": 8677.78, "total_tokens": 58117864} +{"current_steps": 86215, "total_steps": 204665, "loss": 0.0799, "lr": 1.4344675746642054e-06, "epoch": 2.106246793540664, "percentage": 42.12, "elapsed_time": "1:51:37", "remaining_time": "2:33:21", "throughput": 8677.8, "total_tokens": 58121000} +{"current_steps": 86220, "total_steps": 204665, "loss": 0.0002, "lr": 1.4343907646188217e-06, "epoch": 2.106368944372511, "percentage": 42.13, "elapsed_time": "1:51:38", "remaining_time": "2:33:21", "throughput": 8677.83, "total_tokens": 58124136} +{"current_steps": 86225, "total_steps": 204665, "loss": 0.0001, "lr": 1.434313951414431e-06, "epoch": 2.1064910952043583, "percentage": 42.13, "elapsed_time": "1:51:38", "remaining_time": "2:33:20", "throughput": 8677.93, "total_tokens": 58127912} +{"current_steps": 86230, "total_steps": 204665, "loss": 0.0545, "lr": 1.4342371350515927e-06, "epoch": 2.1066132460362055, "percentage": 42.13, "elapsed_time": "1:51:38", "remaining_time": "2:33:20", "throughput": 8677.92, "total_tokens": 58130792} +{"current_steps": 86235, "total_steps": 204665, "loss": 0.0466, "lr": 1.4341603155308653e-06, "epoch": 2.1067353968680527, "percentage": 42.13, "elapsed_time": "1:51:39", "remaining_time": "2:33:20", "throughput": 8678.02, "total_tokens": 58134568} +{"current_steps": 86240, "total_steps": 204665, "loss": 0.0418, "lr": 1.4340834928528072e-06, "epoch": 2.1068575476999, "percentage": 42.14, "elapsed_time": "1:51:39", "remaining_time": "2:33:19", "throughput": 8678.04, "total_tokens": 58137640} +{"current_steps": 86245, "total_steps": 204665, "loss": 0.0003, "lr": 1.434006667017977e-06, "epoch": 2.106979698531747, "percentage": 42.14, "elapsed_time": "1:51:39", "remaining_time": "2:33:19", "throughput": 8678.17, "total_tokens": 58141672} +{"current_steps": 86250, "total_steps": 204665, "loss": 0.0, "lr": 1.433929838026934e-06, "epoch": 2.1071018493635942, "percentage": 42.14, "elapsed_time": "1:51:40", "remaining_time": "2:33:18", "throughput": 8678.22, "total_tokens": 58145000} +{"current_steps": 86255, "total_steps": 204665, "loss": 0.0004, "lr": 1.4338530058802363e-06, "epoch": 2.1072240001954414, "percentage": 42.14, "elapsed_time": "1:51:40", "remaining_time": "2:33:18", "throughput": 8678.35, "total_tokens": 58148968} +{"current_steps": 86260, "total_steps": 204665, "loss": 0.0001, "lr": 1.4337761705784427e-06, "epoch": 2.1073461510272886, "percentage": 42.15, "elapsed_time": "1:51:40", "remaining_time": "2:33:17", "throughput": 8678.39, "total_tokens": 58152296} +{"current_steps": 86265, "total_steps": 204665, "loss": 0.0693, "lr": 1.4336993321221123e-06, "epoch": 2.107468301859136, "percentage": 42.15, "elapsed_time": "1:51:41", "remaining_time": "2:33:17", "throughput": 8678.44, "total_tokens": 58155624} +{"current_steps": 86270, "total_steps": 204665, "loss": 0.0002, "lr": 1.4336224905118038e-06, "epoch": 2.107590452690983, "percentage": 42.15, "elapsed_time": "1:51:41", "remaining_time": "2:33:16", "throughput": 8678.48, "total_tokens": 58158888} +{"current_steps": 86275, "total_steps": 204665, "loss": 0.0001, "lr": 1.4335456457480758e-06, "epoch": 2.10771260352283, "percentage": 42.15, "elapsed_time": "1:51:41", "remaining_time": "2:33:16", "throughput": 8678.53, "total_tokens": 58162216} +{"current_steps": 86280, "total_steps": 204665, "loss": 0.0513, "lr": 1.4334687978314873e-06, "epoch": 2.107834754354677, "percentage": 42.16, "elapsed_time": "1:51:42", "remaining_time": "2:33:16", "throughput": 8678.66, "total_tokens": 58166184} +{"current_steps": 86285, "total_steps": 204665, "loss": 0.0002, "lr": 1.433391946762597e-06, "epoch": 2.107956905186524, "percentage": 42.16, "elapsed_time": "1:51:42", "remaining_time": "2:33:15", "throughput": 8678.68, "total_tokens": 58169320} +{"current_steps": 86290, "total_steps": 204665, "loss": 0.0479, "lr": 1.4333150925419639e-06, "epoch": 2.1080790560183713, "percentage": 42.16, "elapsed_time": "1:51:42", "remaining_time": "2:33:15", "throughput": 8678.73, "total_tokens": 58172712} +{"current_steps": 86295, "total_steps": 204665, "loss": 0.1066, "lr": 1.4332382351701467e-06, "epoch": 2.1082012068502185, "percentage": 42.16, "elapsed_time": "1:51:43", "remaining_time": "2:33:14", "throughput": 8678.78, "total_tokens": 58176040} +{"current_steps": 86300, "total_steps": 204665, "loss": 0.0539, "lr": 1.4331613746477049e-06, "epoch": 2.1083233576820657, "percentage": 42.17, "elapsed_time": "1:51:43", "remaining_time": "2:33:14", "throughput": 8678.8, "total_tokens": 58179176} +{"current_steps": 86305, "total_steps": 204665, "loss": 0.0003, "lr": 1.4330845109751967e-06, "epoch": 2.108445508513913, "percentage": 42.17, "elapsed_time": "1:51:43", "remaining_time": "2:33:13", "throughput": 8678.91, "total_tokens": 58183016} +{"current_steps": 86310, "total_steps": 204665, "loss": 0.0009, "lr": 1.433007644153182e-06, "epoch": 2.10856765934576, "percentage": 42.17, "elapsed_time": "1:51:44", "remaining_time": "2:33:13", "throughput": 8678.98, "total_tokens": 58186536} +{"current_steps": 86315, "total_steps": 204665, "loss": 0.0005, "lr": 1.432930774182219e-06, "epoch": 2.1086898101776073, "percentage": 42.17, "elapsed_time": "1:51:44", "remaining_time": "2:33:13", "throughput": 8679.03, "total_tokens": 58189864} +{"current_steps": 86320, "total_steps": 204665, "loss": 0.0823, "lr": 1.4328539010628668e-06, "epoch": 2.1088119610094544, "percentage": 42.18, "elapsed_time": "1:51:44", "remaining_time": "2:33:12", "throughput": 8679.05, "total_tokens": 58193000} +{"current_steps": 86325, "total_steps": 204665, "loss": 0.0003, "lr": 1.4327770247956847e-06, "epoch": 2.1089341118413016, "percentage": 42.18, "elapsed_time": "1:51:45", "remaining_time": "2:33:12", "throughput": 8679.13, "total_tokens": 58196584} +{"current_steps": 86330, "total_steps": 204665, "loss": 0.0003, "lr": 1.4327001453812318e-06, "epoch": 2.109056262673149, "percentage": 42.18, "elapsed_time": "1:51:45", "remaining_time": "2:33:11", "throughput": 8679.18, "total_tokens": 58199976} +{"current_steps": 86335, "total_steps": 204665, "loss": 0.0452, "lr": 1.432623262820067e-06, "epoch": 2.109178413504996, "percentage": 42.18, "elapsed_time": "1:51:46", "remaining_time": "2:33:11", "throughput": 8679.23, "total_tokens": 58203304} +{"current_steps": 86340, "total_steps": 204665, "loss": 0.0019, "lr": 1.4325463771127492e-06, "epoch": 2.109300564336843, "percentage": 42.19, "elapsed_time": "1:51:46", "remaining_time": "2:33:10", "throughput": 8679.29, "total_tokens": 58206696} +{"current_steps": 86345, "total_steps": 204665, "loss": 0.0006, "lr": 1.432469488259838e-06, "epoch": 2.1094227151686904, "percentage": 42.19, "elapsed_time": "1:51:46", "remaining_time": "2:33:10", "throughput": 8679.38, "total_tokens": 58210408} +{"current_steps": 86350, "total_steps": 204665, "loss": 0.0001, "lr": 1.4323925962618925e-06, "epoch": 2.1095448660005376, "percentage": 42.19, "elapsed_time": "1:51:47", "remaining_time": "2:33:09", "throughput": 8679.45, "total_tokens": 58213928} +{"current_steps": 86355, "total_steps": 204665, "loss": 0.0002, "lr": 1.4323157011194716e-06, "epoch": 2.109667016832385, "percentage": 42.19, "elapsed_time": "1:51:47", "remaining_time": "2:33:09", "throughput": 8679.48, "total_tokens": 58217064} +{"current_steps": 86360, "total_steps": 204665, "loss": 0.0753, "lr": 1.4322388028331344e-06, "epoch": 2.109789167664232, "percentage": 42.2, "elapsed_time": "1:51:47", "remaining_time": "2:33:09", "throughput": 8679.55, "total_tokens": 58220648} +{"current_steps": 86365, "total_steps": 204665, "loss": 0.0756, "lr": 1.432161901403441e-06, "epoch": 2.109911318496079, "percentage": 42.2, "elapsed_time": "1:51:48", "remaining_time": "2:33:08", "throughput": 8679.61, "total_tokens": 58224040} +{"current_steps": 86370, "total_steps": 204665, "loss": 0.0342, "lr": 1.4320849968309497e-06, "epoch": 2.1100334693279263, "percentage": 42.2, "elapsed_time": "1:51:48", "remaining_time": "2:33:08", "throughput": 8679.7, "total_tokens": 58227752} +{"current_steps": 86375, "total_steps": 204665, "loss": 0.0595, "lr": 1.4320080891162201e-06, "epoch": 2.110155620159773, "percentage": 42.2, "elapsed_time": "1:51:48", "remaining_time": "2:33:07", "throughput": 8679.71, "total_tokens": 58230696} +{"current_steps": 86380, "total_steps": 204665, "loss": 0.0007, "lr": 1.4319311782598113e-06, "epoch": 2.1102777709916203, "percentage": 42.21, "elapsed_time": "1:51:49", "remaining_time": "2:33:07", "throughput": 8679.74, "total_tokens": 58233896} +{"current_steps": 86385, "total_steps": 204665, "loss": 0.0008, "lr": 1.4318542642622828e-06, "epoch": 2.1103999218234675, "percentage": 42.21, "elapsed_time": "1:51:49", "remaining_time": "2:33:06", "throughput": 8679.77, "total_tokens": 58237096} +{"current_steps": 86390, "total_steps": 204665, "loss": 0.0002, "lr": 1.431777347124194e-06, "epoch": 2.1105220726553147, "percentage": 42.21, "elapsed_time": "1:51:49", "remaining_time": "2:33:06", "throughput": 8679.81, "total_tokens": 58240360} +{"current_steps": 86395, "total_steps": 204665, "loss": 0.0005, "lr": 1.4317004268461044e-06, "epoch": 2.110644223487162, "percentage": 42.21, "elapsed_time": "1:51:50", "remaining_time": "2:33:05", "throughput": 8679.89, "total_tokens": 58244008} +{"current_steps": 86400, "total_steps": 204665, "loss": 0.0002, "lr": 1.431623503428573e-06, "epoch": 2.110766374319009, "percentage": 42.22, "elapsed_time": "1:51:50", "remaining_time": "2:33:05", "throughput": 8679.95, "total_tokens": 58247400} +{"current_steps": 86405, "total_steps": 204665, "loss": 0.0651, "lr": 1.4315465768721593e-06, "epoch": 2.1108885251508562, "percentage": 42.22, "elapsed_time": "1:51:50", "remaining_time": "2:33:05", "throughput": 8680.01, "total_tokens": 58250856} +{"current_steps": 86410, "total_steps": 204665, "loss": 0.0787, "lr": 1.431469647177423e-06, "epoch": 2.1110106759827034, "percentage": 42.22, "elapsed_time": "1:51:51", "remaining_time": "2:33:04", "throughput": 8680.08, "total_tokens": 58254440} +{"current_steps": 86415, "total_steps": 204665, "loss": 0.0002, "lr": 1.4313927143449235e-06, "epoch": 2.1111328268145506, "percentage": 42.22, "elapsed_time": "1:51:51", "remaining_time": "2:33:04", "throughput": 8680.18, "total_tokens": 58258152} +{"current_steps": 86420, "total_steps": 204665, "loss": 0.0628, "lr": 1.43131577837522e-06, "epoch": 2.111254977646398, "percentage": 42.23, "elapsed_time": "1:51:51", "remaining_time": "2:33:03", "throughput": 8680.23, "total_tokens": 58261480} +{"current_steps": 86425, "total_steps": 204665, "loss": 0.0005, "lr": 1.431238839268872e-06, "epoch": 2.111377128478245, "percentage": 42.23, "elapsed_time": "1:51:52", "remaining_time": "2:33:03", "throughput": 8680.3, "total_tokens": 58265064} +{"current_steps": 86430, "total_steps": 204665, "loss": 0.0327, "lr": 1.4311618970264392e-06, "epoch": 2.111499279310092, "percentage": 42.23, "elapsed_time": "1:51:52", "remaining_time": "2:33:02", "throughput": 8680.36, "total_tokens": 58268456} +{"current_steps": 86435, "total_steps": 204665, "loss": 0.0004, "lr": 1.4310849516484813e-06, "epoch": 2.1116214301419394, "percentage": 42.23, "elapsed_time": "1:51:53", "remaining_time": "2:33:02", "throughput": 8680.42, "total_tokens": 58271912} +{"current_steps": 86440, "total_steps": 204665, "loss": 0.0008, "lr": 1.4310080031355575e-06, "epoch": 2.1117435809737866, "percentage": 42.23, "elapsed_time": "1:51:53", "remaining_time": "2:33:01", "throughput": 8680.44, "total_tokens": 58274984} +{"current_steps": 86445, "total_steps": 204665, "loss": 0.0764, "lr": 1.4309310514882277e-06, "epoch": 2.1118657318056338, "percentage": 42.24, "elapsed_time": "1:51:53", "remaining_time": "2:33:01", "throughput": 8680.49, "total_tokens": 58278312} +{"current_steps": 86450, "total_steps": 204665, "loss": 0.0497, "lr": 1.4308540967070513e-06, "epoch": 2.111987882637481, "percentage": 42.24, "elapsed_time": "1:51:54", "remaining_time": "2:33:01", "throughput": 8680.55, "total_tokens": 58281832} +{"current_steps": 86455, "total_steps": 204665, "loss": 0.0046, "lr": 1.430777138792588e-06, "epoch": 2.112110033469328, "percentage": 42.24, "elapsed_time": "1:51:54", "remaining_time": "2:33:00", "throughput": 8680.62, "total_tokens": 58285288} +{"current_steps": 86460, "total_steps": 204665, "loss": 0.0002, "lr": 1.4307001777453977e-06, "epoch": 2.112232184301175, "percentage": 42.24, "elapsed_time": "1:51:54", "remaining_time": "2:33:00", "throughput": 8680.72, "total_tokens": 58289128} +{"current_steps": 86465, "total_steps": 204665, "loss": 0.0495, "lr": 1.4306232135660397e-06, "epoch": 2.112354335133022, "percentage": 42.25, "elapsed_time": "1:51:55", "remaining_time": "2:32:59", "throughput": 8680.81, "total_tokens": 58292776} +{"current_steps": 86470, "total_steps": 204665, "loss": 0.0523, "lr": 1.430546246255074e-06, "epoch": 2.1124764859648693, "percentage": 42.25, "elapsed_time": "1:51:55", "remaining_time": "2:32:59", "throughput": 8680.95, "total_tokens": 58296872} +{"current_steps": 86475, "total_steps": 204665, "loss": 0.0002, "lr": 1.4304692758130599e-06, "epoch": 2.1125986367967164, "percentage": 42.25, "elapsed_time": "1:51:55", "remaining_time": "2:32:58", "throughput": 8681.03, "total_tokens": 58300456} +{"current_steps": 86480, "total_steps": 204665, "loss": 0.0002, "lr": 1.4303923022405577e-06, "epoch": 2.1127207876285636, "percentage": 42.25, "elapsed_time": "1:51:56", "remaining_time": "2:32:58", "throughput": 8681.07, "total_tokens": 58303656} +{"current_steps": 86485, "total_steps": 204665, "loss": 0.0004, "lr": 1.430315325538127e-06, "epoch": 2.112842938460411, "percentage": 42.26, "elapsed_time": "1:51:56", "remaining_time": "2:32:58", "throughput": 8681.11, "total_tokens": 58306920} +{"current_steps": 86490, "total_steps": 204665, "loss": 0.0003, "lr": 1.4302383457063272e-06, "epoch": 2.112965089292258, "percentage": 42.26, "elapsed_time": "1:51:56", "remaining_time": "2:32:57", "throughput": 8681.15, "total_tokens": 58310184} +{"current_steps": 86495, "total_steps": 204665, "loss": 0.0001, "lr": 1.4301613627457186e-06, "epoch": 2.113087240124105, "percentage": 42.26, "elapsed_time": "1:51:57", "remaining_time": "2:32:57", "throughput": 8681.21, "total_tokens": 58313704} +{"current_steps": 86500, "total_steps": 204665, "loss": 0.0513, "lr": 1.4300843766568609e-06, "epoch": 2.1132093909559524, "percentage": 42.26, "elapsed_time": "1:51:57", "remaining_time": "2:32:56", "throughput": 8681.25, "total_tokens": 58316904} +{"current_steps": 86505, "total_steps": 204665, "loss": 0.005, "lr": 1.4300073874403139e-06, "epoch": 2.1133315417877996, "percentage": 42.27, "elapsed_time": "1:51:57", "remaining_time": "2:32:56", "throughput": 8681.32, "total_tokens": 58320424} +{"current_steps": 86510, "total_steps": 204665, "loss": 0.0002, "lr": 1.4299303950966372e-06, "epoch": 2.1134536926196468, "percentage": 42.27, "elapsed_time": "1:51:58", "remaining_time": "2:32:55", "throughput": 8681.38, "total_tokens": 58323880} +{"current_steps": 86515, "total_steps": 204665, "loss": 0.0696, "lr": 1.4298533996263916e-06, "epoch": 2.113575843451494, "percentage": 42.27, "elapsed_time": "1:51:58", "remaining_time": "2:32:55", "throughput": 8681.44, "total_tokens": 58327336} +{"current_steps": 86520, "total_steps": 204665, "loss": 0.0517, "lr": 1.429776401030136e-06, "epoch": 2.113697994283341, "percentage": 42.27, "elapsed_time": "1:51:58", "remaining_time": "2:32:54", "throughput": 8681.47, "total_tokens": 58330536} +{"current_steps": 86525, "total_steps": 204665, "loss": 0.0001, "lr": 1.4296993993084313e-06, "epoch": 2.1138201451151883, "percentage": 42.28, "elapsed_time": "1:51:59", "remaining_time": "2:32:54", "throughput": 8681.52, "total_tokens": 58333928} +{"current_steps": 86530, "total_steps": 204665, "loss": 0.0405, "lr": 1.4296223944618366e-06, "epoch": 2.1139422959470355, "percentage": 42.28, "elapsed_time": "1:51:59", "remaining_time": "2:32:54", "throughput": 8681.58, "total_tokens": 58337384} +{"current_steps": 86535, "total_steps": 204665, "loss": 0.0003, "lr": 1.4295453864909125e-06, "epoch": 2.1140644467788827, "percentage": 42.28, "elapsed_time": "1:52:00", "remaining_time": "2:32:53", "throughput": 8681.66, "total_tokens": 58341032} +{"current_steps": 86540, "total_steps": 204665, "loss": 0.0001, "lr": 1.4294683753962187e-06, "epoch": 2.11418659761073, "percentage": 42.28, "elapsed_time": "1:52:00", "remaining_time": "2:32:53", "throughput": 8681.69, "total_tokens": 58344232} +{"current_steps": 86545, "total_steps": 204665, "loss": 0.1218, "lr": 1.429391361178315e-06, "epoch": 2.114308748442577, "percentage": 42.29, "elapsed_time": "1:52:00", "remaining_time": "2:32:52", "throughput": 8681.77, "total_tokens": 58347816} +{"current_steps": 86550, "total_steps": 204665, "loss": 0.0559, "lr": 1.4293143438377624e-06, "epoch": 2.114430899274424, "percentage": 42.29, "elapsed_time": "1:52:01", "remaining_time": "2:32:52", "throughput": 8681.82, "total_tokens": 58351144} +{"current_steps": 86555, "total_steps": 204665, "loss": 0.0001, "lr": 1.4292373233751202e-06, "epoch": 2.114553050106271, "percentage": 42.29, "elapsed_time": "1:52:01", "remaining_time": "2:32:51", "throughput": 8681.87, "total_tokens": 58354472} +{"current_steps": 86560, "total_steps": 204665, "loss": 0.0856, "lr": 1.4291602997909489e-06, "epoch": 2.1146752009381182, "percentage": 42.29, "elapsed_time": "1:52:01", "remaining_time": "2:32:51", "throughput": 8681.93, "total_tokens": 58357928} +{"current_steps": 86565, "total_steps": 204665, "loss": 0.0006, "lr": 1.4290832730858082e-06, "epoch": 2.1147973517699654, "percentage": 42.3, "elapsed_time": "1:52:02", "remaining_time": "2:32:50", "throughput": 8681.98, "total_tokens": 58361320} +{"current_steps": 86570, "total_steps": 204665, "loss": 0.0375, "lr": 1.4290062432602588e-06, "epoch": 2.1149195026018126, "percentage": 42.3, "elapsed_time": "1:52:02", "remaining_time": "2:32:50", "throughput": 8682.07, "total_tokens": 58364904} +{"current_steps": 86575, "total_steps": 204665, "loss": 0.0238, "lr": 1.4289292103148604e-06, "epoch": 2.11504165343366, "percentage": 42.3, "elapsed_time": "1:52:02", "remaining_time": "2:32:50", "throughput": 8682.1, "total_tokens": 58368168} +{"current_steps": 86580, "total_steps": 204665, "loss": 0.1127, "lr": 1.4288521742501734e-06, "epoch": 2.115163804265507, "percentage": 42.3, "elapsed_time": "1:52:03", "remaining_time": "2:32:49", "throughput": 8682.11, "total_tokens": 58371112} +{"current_steps": 86585, "total_steps": 204665, "loss": 0.0591, "lr": 1.4287751350667584e-06, "epoch": 2.115285955097354, "percentage": 42.31, "elapsed_time": "1:52:03", "remaining_time": "2:32:49", "throughput": 8682.15, "total_tokens": 58374376} +{"current_steps": 86590, "total_steps": 204665, "loss": 0.0003, "lr": 1.4286980927651749e-06, "epoch": 2.1154081059292014, "percentage": 42.31, "elapsed_time": "1:52:03", "remaining_time": "2:32:48", "throughput": 8682.17, "total_tokens": 58377512} +{"current_steps": 86595, "total_steps": 204665, "loss": 0.0732, "lr": 1.4286210473459837e-06, "epoch": 2.1155302567610486, "percentage": 42.31, "elapsed_time": "1:52:04", "remaining_time": "2:32:48", "throughput": 8682.22, "total_tokens": 58380840} +{"current_steps": 86600, "total_steps": 204665, "loss": 0.0357, "lr": 1.428543998809745e-06, "epoch": 2.1156524075928957, "percentage": 42.31, "elapsed_time": "1:52:04", "remaining_time": "2:32:47", "throughput": 8682.27, "total_tokens": 58384232} +{"current_steps": 86605, "total_steps": 204665, "loss": 0.0002, "lr": 1.4284669471570188e-06, "epoch": 2.115774558424743, "percentage": 42.32, "elapsed_time": "1:52:04", "remaining_time": "2:32:47", "throughput": 8682.33, "total_tokens": 58387624} +{"current_steps": 86610, "total_steps": 204665, "loss": 0.0002, "lr": 1.428389892388366e-06, "epoch": 2.11589670925659, "percentage": 42.32, "elapsed_time": "1:52:05", "remaining_time": "2:32:46", "throughput": 8682.4, "total_tokens": 58391144} +{"current_steps": 86615, "total_steps": 204665, "loss": 0.0005, "lr": 1.4283128345043464e-06, "epoch": 2.1160188600884373, "percentage": 42.32, "elapsed_time": "1:52:05", "remaining_time": "2:32:46", "throughput": 8682.48, "total_tokens": 58394728} +{"current_steps": 86620, "total_steps": 204665, "loss": 0.007, "lr": 1.428235773505521e-06, "epoch": 2.1161410109202845, "percentage": 42.32, "elapsed_time": "1:52:05", "remaining_time": "2:32:46", "throughput": 8682.55, "total_tokens": 58398248} +{"current_steps": 86625, "total_steps": 204665, "loss": 0.0003, "lr": 1.4281587093924496e-06, "epoch": 2.1162631617521317, "percentage": 42.33, "elapsed_time": "1:52:06", "remaining_time": "2:32:45", "throughput": 8682.62, "total_tokens": 58401768} +{"current_steps": 86630, "total_steps": 204665, "loss": 0.0226, "lr": 1.4280816421656932e-06, "epoch": 2.116385312583979, "percentage": 42.33, "elapsed_time": "1:52:06", "remaining_time": "2:32:45", "throughput": 8682.7, "total_tokens": 58405352} +{"current_steps": 86635, "total_steps": 204665, "loss": 0.0361, "lr": 1.428004571825812e-06, "epoch": 2.116507463415826, "percentage": 42.33, "elapsed_time": "1:52:06", "remaining_time": "2:32:44", "throughput": 8682.77, "total_tokens": 58408808} +{"current_steps": 86640, "total_steps": 204665, "loss": 0.057, "lr": 1.427927498373366e-06, "epoch": 2.116629614247673, "percentage": 42.33, "elapsed_time": "1:52:07", "remaining_time": "2:32:44", "throughput": 8682.85, "total_tokens": 58412456} +{"current_steps": 86645, "total_steps": 204665, "loss": 0.0002, "lr": 1.4278504218089164e-06, "epoch": 2.11675176507952, "percentage": 42.34, "elapsed_time": "1:52:07", "remaining_time": "2:32:43", "throughput": 8683.0, "total_tokens": 58416616} +{"current_steps": 86650, "total_steps": 204665, "loss": 0.0239, "lr": 1.4277733421330233e-06, "epoch": 2.116873915911367, "percentage": 42.34, "elapsed_time": "1:52:08", "remaining_time": "2:32:43", "throughput": 8683.11, "total_tokens": 58420456} +{"current_steps": 86655, "total_steps": 204665, "loss": 0.0007, "lr": 1.4276962593462476e-06, "epoch": 2.1169960667432144, "percentage": 42.34, "elapsed_time": "1:52:08", "remaining_time": "2:32:42", "throughput": 8683.17, "total_tokens": 58423912} +{"current_steps": 86660, "total_steps": 204665, "loss": 0.047, "lr": 1.4276191734491497e-06, "epoch": 2.1171182175750616, "percentage": 42.34, "elapsed_time": "1:52:08", "remaining_time": "2:32:42", "throughput": 8683.26, "total_tokens": 58427560} +{"current_steps": 86665, "total_steps": 204665, "loss": 0.0261, "lr": 1.4275420844422898e-06, "epoch": 2.1172403684069088, "percentage": 42.34, "elapsed_time": "1:52:09", "remaining_time": "2:32:42", "throughput": 8683.33, "total_tokens": 58431016} +{"current_steps": 86670, "total_steps": 204665, "loss": 0.0992, "lr": 1.4274649923262292e-06, "epoch": 2.117362519238756, "percentage": 42.35, "elapsed_time": "1:52:09", "remaining_time": "2:32:41", "throughput": 8683.35, "total_tokens": 58434152} +{"current_steps": 86675, "total_steps": 204665, "loss": 0.0008, "lr": 1.427387897101528e-06, "epoch": 2.117484670070603, "percentage": 42.35, "elapsed_time": "1:52:09", "remaining_time": "2:32:41", "throughput": 8683.36, "total_tokens": 58437096} +{"current_steps": 86680, "total_steps": 204665, "loss": 0.0003, "lr": 1.4273107987687477e-06, "epoch": 2.1176068209024503, "percentage": 42.35, "elapsed_time": "1:52:10", "remaining_time": "2:32:40", "throughput": 8683.41, "total_tokens": 58440488} +{"current_steps": 86685, "total_steps": 204665, "loss": 0.0415, "lr": 1.4272336973284476e-06, "epoch": 2.1177289717342975, "percentage": 42.35, "elapsed_time": "1:52:10", "remaining_time": "2:32:40", "throughput": 8683.5, "total_tokens": 58444136} +{"current_steps": 86690, "total_steps": 204665, "loss": 0.0001, "lr": 1.4271565927811894e-06, "epoch": 2.1178511225661447, "percentage": 42.36, "elapsed_time": "1:52:10", "remaining_time": "2:32:39", "throughput": 8683.59, "total_tokens": 58447784} +{"current_steps": 86695, "total_steps": 204665, "loss": 0.0001, "lr": 1.4270794851275336e-06, "epoch": 2.117973273397992, "percentage": 42.36, "elapsed_time": "1:52:11", "remaining_time": "2:32:39", "throughput": 8683.63, "total_tokens": 58451112} +{"current_steps": 86700, "total_steps": 204665, "loss": 0.0001, "lr": 1.4270023743680407e-06, "epoch": 2.118095424229839, "percentage": 42.36, "elapsed_time": "1:52:11", "remaining_time": "2:32:38", "throughput": 8683.66, "total_tokens": 58454248} +{"current_steps": 86705, "total_steps": 204665, "loss": 0.0002, "lr": 1.4269252605032718e-06, "epoch": 2.1182175750616863, "percentage": 42.36, "elapsed_time": "1:52:11", "remaining_time": "2:32:38", "throughput": 8683.72, "total_tokens": 58457704} +{"current_steps": 86710, "total_steps": 204665, "loss": 0.0748, "lr": 1.4268481435337875e-06, "epoch": 2.1183397258935335, "percentage": 42.37, "elapsed_time": "1:52:12", "remaining_time": "2:32:38", "throughput": 8683.79, "total_tokens": 58461160} +{"current_steps": 86715, "total_steps": 204665, "loss": 0.1228, "lr": 1.4267710234601488e-06, "epoch": 2.1184618767253807, "percentage": 42.37, "elapsed_time": "1:52:12", "remaining_time": "2:32:37", "throughput": 8683.84, "total_tokens": 58464552} +{"current_steps": 86720, "total_steps": 204665, "loss": 0.0006, "lr": 1.4266939002829163e-06, "epoch": 2.118584027557228, "percentage": 42.37, "elapsed_time": "1:52:12", "remaining_time": "2:32:37", "throughput": 8683.92, "total_tokens": 58468136} +{"current_steps": 86725, "total_steps": 204665, "loss": 0.0001, "lr": 1.4266167740026513e-06, "epoch": 2.1187061783890746, "percentage": 42.37, "elapsed_time": "1:52:13", "remaining_time": "2:32:36", "throughput": 8683.95, "total_tokens": 58471336} +{"current_steps": 86730, "total_steps": 204665, "loss": 0.0001, "lr": 1.4265396446199142e-06, "epoch": 2.118828329220922, "percentage": 42.38, "elapsed_time": "1:52:13", "remaining_time": "2:32:36", "throughput": 8683.99, "total_tokens": 58474600} +{"current_steps": 86735, "total_steps": 204665, "loss": 0.1686, "lr": 1.426462512135266e-06, "epoch": 2.118950480052769, "percentage": 42.38, "elapsed_time": "1:52:13", "remaining_time": "2:32:35", "throughput": 8684.01, "total_tokens": 58477672} +{"current_steps": 86740, "total_steps": 204665, "loss": 0.0006, "lr": 1.426385376549268e-06, "epoch": 2.119072630884616, "percentage": 42.38, "elapsed_time": "1:52:14", "remaining_time": "2:32:35", "throughput": 8684.06, "total_tokens": 58481000} +{"current_steps": 86745, "total_steps": 204665, "loss": 0.0455, "lr": 1.4263082378624804e-06, "epoch": 2.1191947817164634, "percentage": 42.38, "elapsed_time": "1:52:14", "remaining_time": "2:32:34", "throughput": 8684.1, "total_tokens": 58484328} +{"current_steps": 86750, "total_steps": 204665, "loss": 0.0405, "lr": 1.4262310960754649e-06, "epoch": 2.1193169325483106, "percentage": 42.39, "elapsed_time": "1:52:14", "remaining_time": "2:32:34", "throughput": 8684.15, "total_tokens": 58487656} +{"current_steps": 86755, "total_steps": 204665, "loss": 0.0003, "lr": 1.4261539511887822e-06, "epoch": 2.1194390833801577, "percentage": 42.39, "elapsed_time": "1:52:15", "remaining_time": "2:32:34", "throughput": 8684.16, "total_tokens": 58490728} +{"current_steps": 86760, "total_steps": 204665, "loss": 0.0004, "lr": 1.4260768032029932e-06, "epoch": 2.119561234212005, "percentage": 42.39, "elapsed_time": "1:52:15", "remaining_time": "2:32:33", "throughput": 8684.23, "total_tokens": 58494184} +{"current_steps": 86765, "total_steps": 204665, "loss": 0.0002, "lr": 1.4259996521186591e-06, "epoch": 2.119683385043852, "percentage": 42.39, "elapsed_time": "1:52:16", "remaining_time": "2:32:33", "throughput": 8684.28, "total_tokens": 58497576} +{"current_steps": 86770, "total_steps": 204665, "loss": 0.0115, "lr": 1.4259224979363413e-06, "epoch": 2.1198055358756993, "percentage": 42.4, "elapsed_time": "1:52:16", "remaining_time": "2:32:32", "throughput": 8684.3, "total_tokens": 58500648} +{"current_steps": 86775, "total_steps": 204665, "loss": 0.0001, "lr": 1.4258453406566002e-06, "epoch": 2.1199276867075465, "percentage": 42.4, "elapsed_time": "1:52:16", "remaining_time": "2:32:32", "throughput": 8684.37, "total_tokens": 58504232} +{"current_steps": 86780, "total_steps": 204665, "loss": 0.0001, "lr": 1.4257681802799973e-06, "epoch": 2.1200498375393937, "percentage": 42.4, "elapsed_time": "1:52:17", "remaining_time": "2:32:31", "throughput": 8684.42, "total_tokens": 58507560} +{"current_steps": 86785, "total_steps": 204665, "loss": 0.0001, "lr": 1.4256910168070938e-06, "epoch": 2.120171988371241, "percentage": 42.4, "elapsed_time": "1:52:17", "remaining_time": "2:32:31", "throughput": 8684.5, "total_tokens": 58511144} +{"current_steps": 86790, "total_steps": 204665, "loss": 0.0501, "lr": 1.4256138502384508e-06, "epoch": 2.120294139203088, "percentage": 42.41, "elapsed_time": "1:52:17", "remaining_time": "2:32:30", "throughput": 8684.49, "total_tokens": 58514024} +{"current_steps": 86795, "total_steps": 204665, "loss": 0.0004, "lr": 1.425536680574629e-06, "epoch": 2.1204162900349353, "percentage": 42.41, "elapsed_time": "1:52:18", "remaining_time": "2:32:30", "throughput": 8684.55, "total_tokens": 58517544} +{"current_steps": 86800, "total_steps": 204665, "loss": 0.0001, "lr": 1.4254595078161905e-06, "epoch": 2.1205384408667824, "percentage": 42.41, "elapsed_time": "1:52:18", "remaining_time": "2:32:30", "throughput": 8684.64, "total_tokens": 58521256} +{"current_steps": 86805, "total_steps": 204665, "loss": 0.0389, "lr": 1.4253823319636958e-06, "epoch": 2.1206605916986296, "percentage": 42.41, "elapsed_time": "1:52:18", "remaining_time": "2:32:29", "throughput": 8684.72, "total_tokens": 58524840} +{"current_steps": 86810, "total_steps": 204665, "loss": 0.0001, "lr": 1.4253051530177063e-06, "epoch": 2.120782742530477, "percentage": 42.42, "elapsed_time": "1:52:19", "remaining_time": "2:32:29", "throughput": 8684.75, "total_tokens": 58528040} +{"current_steps": 86815, "total_steps": 204665, "loss": 0.0864, "lr": 1.4252279709787834e-06, "epoch": 2.120904893362324, "percentage": 42.42, "elapsed_time": "1:52:19", "remaining_time": "2:32:28", "throughput": 8684.87, "total_tokens": 58531944} +{"current_steps": 86820, "total_steps": 204665, "loss": 0.0879, "lr": 1.4251507858474882e-06, "epoch": 2.1210270441941708, "percentage": 42.42, "elapsed_time": "1:52:19", "remaining_time": "2:32:28", "throughput": 8684.89, "total_tokens": 58535080} +{"current_steps": 86825, "total_steps": 204665, "loss": 0.0402, "lr": 1.4250735976243823e-06, "epoch": 2.121149195026018, "percentage": 42.42, "elapsed_time": "1:52:20", "remaining_time": "2:32:27", "throughput": 8684.91, "total_tokens": 58538216} +{"current_steps": 86830, "total_steps": 204665, "loss": 0.0003, "lr": 1.4249964063100266e-06, "epoch": 2.121271345857865, "percentage": 42.43, "elapsed_time": "1:52:20", "remaining_time": "2:32:27", "throughput": 8684.97, "total_tokens": 58541672} +{"current_steps": 86835, "total_steps": 204665, "loss": 0.0004, "lr": 1.4249192119049832e-06, "epoch": 2.1213934966897123, "percentage": 42.43, "elapsed_time": "1:52:20", "remaining_time": "2:32:27", "throughput": 8685.0, "total_tokens": 58544872} +{"current_steps": 86840, "total_steps": 204665, "loss": 0.054, "lr": 1.4248420144098128e-06, "epoch": 2.1215156475215595, "percentage": 42.43, "elapsed_time": "1:52:21", "remaining_time": "2:32:26", "throughput": 8685.06, "total_tokens": 58548264} +{"current_steps": 86845, "total_steps": 204665, "loss": 0.0455, "lr": 1.4247648138250768e-06, "epoch": 2.1216377983534067, "percentage": 42.43, "elapsed_time": "1:52:21", "remaining_time": "2:32:26", "throughput": 8685.11, "total_tokens": 58551656} +{"current_steps": 86850, "total_steps": 204665, "loss": 0.0001, "lr": 1.4246876101513369e-06, "epoch": 2.121759949185254, "percentage": 42.44, "elapsed_time": "1:52:21", "remaining_time": "2:32:25", "throughput": 8685.19, "total_tokens": 58555240} +{"current_steps": 86855, "total_steps": 204665, "loss": 0.0448, "lr": 1.4246104033891545e-06, "epoch": 2.121882100017101, "percentage": 42.44, "elapsed_time": "1:52:22", "remaining_time": "2:32:25", "throughput": 8685.3, "total_tokens": 58559080} +{"current_steps": 86860, "total_steps": 204665, "loss": 0.0001, "lr": 1.4245331935390913e-06, "epoch": 2.1220042508489483, "percentage": 42.44, "elapsed_time": "1:52:22", "remaining_time": "2:32:24", "throughput": 8685.38, "total_tokens": 58562728} +{"current_steps": 86865, "total_steps": 204665, "loss": 0.0001, "lr": 1.424455980601708e-06, "epoch": 2.1221264016807955, "percentage": 42.44, "elapsed_time": "1:52:23", "remaining_time": "2:32:24", "throughput": 8685.44, "total_tokens": 58566120} +{"current_steps": 86870, "total_steps": 204665, "loss": 0.0003, "lr": 1.424378764577567e-06, "epoch": 2.1222485525126427, "percentage": 42.44, "elapsed_time": "1:52:23", "remaining_time": "2:32:23", "throughput": 8685.53, "total_tokens": 58569768} +{"current_steps": 86875, "total_steps": 204665, "loss": 0.0004, "lr": 1.4243015454672294e-06, "epoch": 2.12237070334449, "percentage": 42.45, "elapsed_time": "1:52:23", "remaining_time": "2:32:23", "throughput": 8685.57, "total_tokens": 58573096} +{"current_steps": 86880, "total_steps": 204665, "loss": 0.0001, "lr": 1.4242243232712569e-06, "epoch": 2.122492854176337, "percentage": 42.45, "elapsed_time": "1:52:24", "remaining_time": "2:32:23", "throughput": 8685.7, "total_tokens": 58577128} +{"current_steps": 86885, "total_steps": 204665, "loss": 0.0515, "lr": 1.424147097990211e-06, "epoch": 2.1226150050081842, "percentage": 42.45, "elapsed_time": "1:52:24", "remaining_time": "2:32:22", "throughput": 8685.74, "total_tokens": 58580328} +{"current_steps": 86890, "total_steps": 204665, "loss": 0.048, "lr": 1.4240698696246535e-06, "epoch": 2.1227371558400314, "percentage": 42.45, "elapsed_time": "1:52:24", "remaining_time": "2:32:22", "throughput": 8685.82, "total_tokens": 58583976} +{"current_steps": 86895, "total_steps": 204665, "loss": 0.0004, "lr": 1.4239926381751455e-06, "epoch": 2.1228593066718786, "percentage": 42.46, "elapsed_time": "1:52:25", "remaining_time": "2:32:21", "throughput": 8685.87, "total_tokens": 58587304} +{"current_steps": 86900, "total_steps": 204665, "loss": 0.0003, "lr": 1.423915403642249e-06, "epoch": 2.122981457503726, "percentage": 42.46, "elapsed_time": "1:52:25", "remaining_time": "2:32:21", "throughput": 8685.9, "total_tokens": 58590504} +{"current_steps": 86905, "total_steps": 204665, "loss": 0.0417, "lr": 1.4238381660265259e-06, "epoch": 2.1231036083355725, "percentage": 42.46, "elapsed_time": "1:52:25", "remaining_time": "2:32:20", "throughput": 8685.94, "total_tokens": 58593768} +{"current_steps": 86910, "total_steps": 204665, "loss": 0.0994, "lr": 1.4237609253285377e-06, "epoch": 2.1232257591674197, "percentage": 42.46, "elapsed_time": "1:52:26", "remaining_time": "2:32:20", "throughput": 8686.02, "total_tokens": 58597352} +{"current_steps": 86915, "total_steps": 204665, "loss": 0.1099, "lr": 1.4236836815488458e-06, "epoch": 2.123347909999267, "percentage": 42.47, "elapsed_time": "1:52:26", "remaining_time": "2:32:19", "throughput": 8686.05, "total_tokens": 58600616} +{"current_steps": 86920, "total_steps": 204665, "loss": 0.0416, "lr": 1.4236064346880123e-06, "epoch": 2.123470060831114, "percentage": 42.47, "elapsed_time": "1:52:26", "remaining_time": "2:32:19", "throughput": 8686.14, "total_tokens": 58604264} +{"current_steps": 86925, "total_steps": 204665, "loss": 0.032, "lr": 1.423529184746599e-06, "epoch": 2.1235922116629613, "percentage": 42.47, "elapsed_time": "1:52:27", "remaining_time": "2:32:19", "throughput": 8686.22, "total_tokens": 58607848} +{"current_steps": 86930, "total_steps": 204665, "loss": 0.0918, "lr": 1.4234519317251674e-06, "epoch": 2.1237143624948085, "percentage": 42.47, "elapsed_time": "1:52:27", "remaining_time": "2:32:18", "throughput": 8686.23, "total_tokens": 58610920} +{"current_steps": 86935, "total_steps": 204665, "loss": 0.0003, "lr": 1.4233746756242795e-06, "epoch": 2.1238365133266557, "percentage": 42.48, "elapsed_time": "1:52:27", "remaining_time": "2:32:18", "throughput": 8686.3, "total_tokens": 58614440} +{"current_steps": 86940, "total_steps": 204665, "loss": 0.0003, "lr": 1.4232974164444972e-06, "epoch": 2.123958664158503, "percentage": 42.48, "elapsed_time": "1:52:28", "remaining_time": "2:32:17", "throughput": 8686.34, "total_tokens": 58617704} +{"current_steps": 86945, "total_steps": 204665, "loss": 0.0412, "lr": 1.4232201541863822e-06, "epoch": 2.12408081499035, "percentage": 42.48, "elapsed_time": "1:52:28", "remaining_time": "2:32:17", "throughput": 8686.39, "total_tokens": 58621032} +{"current_steps": 86950, "total_steps": 204665, "loss": 0.1003, "lr": 1.4231428888504964e-06, "epoch": 2.1242029658221973, "percentage": 42.48, "elapsed_time": "1:52:28", "remaining_time": "2:32:16", "throughput": 8686.53, "total_tokens": 58625128} +{"current_steps": 86955, "total_steps": 204665, "loss": 0.0425, "lr": 1.4230656204374017e-06, "epoch": 2.1243251166540444, "percentage": 42.49, "elapsed_time": "1:52:29", "remaining_time": "2:32:16", "throughput": 8686.59, "total_tokens": 58628584} +{"current_steps": 86960, "total_steps": 204665, "loss": 0.0337, "lr": 1.4229883489476599e-06, "epoch": 2.1244472674858916, "percentage": 42.49, "elapsed_time": "1:52:29", "remaining_time": "2:32:16", "throughput": 8686.65, "total_tokens": 58631976} +{"current_steps": 86965, "total_steps": 204665, "loss": 0.0371, "lr": 1.422911074381833e-06, "epoch": 2.124569418317739, "percentage": 42.49, "elapsed_time": "1:52:30", "remaining_time": "2:32:15", "throughput": 8686.69, "total_tokens": 58635304} +{"current_steps": 86970, "total_steps": 204665, "loss": 0.0003, "lr": 1.4228337967404833e-06, "epoch": 2.124691569149586, "percentage": 42.49, "elapsed_time": "1:52:30", "remaining_time": "2:32:15", "throughput": 8686.71, "total_tokens": 58638376} +{"current_steps": 86975, "total_steps": 204665, "loss": 0.0988, "lr": 1.4227565160241724e-06, "epoch": 2.124813719981433, "percentage": 42.5, "elapsed_time": "1:52:30", "remaining_time": "2:32:14", "throughput": 8686.73, "total_tokens": 58641512} +{"current_steps": 86980, "total_steps": 204665, "loss": 0.0436, "lr": 1.4226792322334622e-06, "epoch": 2.1249358708132804, "percentage": 42.5, "elapsed_time": "1:52:31", "remaining_time": "2:32:14", "throughput": 8686.79, "total_tokens": 58644968} +{"current_steps": 86985, "total_steps": 204665, "loss": 0.1034, "lr": 1.4226019453689151e-06, "epoch": 2.1250580216451276, "percentage": 42.5, "elapsed_time": "1:52:31", "remaining_time": "2:32:13", "throughput": 8686.84, "total_tokens": 58648296} +{"current_steps": 86990, "total_steps": 204665, "loss": 0.0004, "lr": 1.422524655431093e-06, "epoch": 2.1251801724769748, "percentage": 42.5, "elapsed_time": "1:52:31", "remaining_time": "2:32:13", "throughput": 8686.92, "total_tokens": 58651880} +{"current_steps": 86995, "total_steps": 204665, "loss": 0.0016, "lr": 1.422447362420558e-06, "epoch": 2.125302323308822, "percentage": 42.51, "elapsed_time": "1:52:32", "remaining_time": "2:32:12", "throughput": 8687.04, "total_tokens": 58655848} +{"current_steps": 87000, "total_steps": 204665, "loss": 0.0008, "lr": 1.422370066337872e-06, "epoch": 2.1254244741406687, "percentage": 42.51, "elapsed_time": "1:52:32", "remaining_time": "2:32:12", "throughput": 8687.15, "total_tokens": 58659688} +{"current_steps": 87005, "total_steps": 204665, "loss": 0.0005, "lr": 1.4222927671835976e-06, "epoch": 2.125546624972516, "percentage": 42.51, "elapsed_time": "1:52:32", "remaining_time": "2:32:12", "throughput": 8687.21, "total_tokens": 58663144} +{"current_steps": 87010, "total_steps": 204665, "loss": 0.0488, "lr": 1.4222154649582963e-06, "epoch": 2.125668775804363, "percentage": 42.51, "elapsed_time": "1:52:33", "remaining_time": "2:32:11", "throughput": 8687.26, "total_tokens": 58666472} +{"current_steps": 87015, "total_steps": 204665, "loss": 0.005, "lr": 1.4221381596625307e-06, "epoch": 2.1257909266362103, "percentage": 42.52, "elapsed_time": "1:52:33", "remaining_time": "2:32:11", "throughput": 8687.35, "total_tokens": 58670184} +{"current_steps": 87020, "total_steps": 204665, "loss": 0.0002, "lr": 1.4220608512968627e-06, "epoch": 2.1259130774680575, "percentage": 42.52, "elapsed_time": "1:52:33", "remaining_time": "2:32:10", "throughput": 8687.38, "total_tokens": 58673384} +{"current_steps": 87025, "total_steps": 204665, "loss": 0.0002, "lr": 1.4219835398618548e-06, "epoch": 2.1260352282999047, "percentage": 42.52, "elapsed_time": "1:52:34", "remaining_time": "2:32:10", "throughput": 8687.4, "total_tokens": 58676520} +{"current_steps": 87030, "total_steps": 204665, "loss": 0.0001, "lr": 1.4219062253580691e-06, "epoch": 2.126157379131752, "percentage": 42.52, "elapsed_time": "1:52:34", "remaining_time": "2:32:09", "throughput": 8687.48, "total_tokens": 58680104} +{"current_steps": 87035, "total_steps": 204665, "loss": 0.0004, "lr": 1.421828907786068e-06, "epoch": 2.126279529963599, "percentage": 42.53, "elapsed_time": "1:52:34", "remaining_time": "2:32:09", "throughput": 8687.5, "total_tokens": 58683240} +{"current_steps": 87040, "total_steps": 204665, "loss": 0.062, "lr": 1.4217515871464132e-06, "epoch": 2.1264016807954462, "percentage": 42.53, "elapsed_time": "1:52:35", "remaining_time": "2:32:08", "throughput": 8687.59, "total_tokens": 58686888} +{"current_steps": 87045, "total_steps": 204665, "loss": 0.0225, "lr": 1.4216742634396677e-06, "epoch": 2.1265238316272934, "percentage": 42.53, "elapsed_time": "1:52:35", "remaining_time": "2:32:08", "throughput": 8687.62, "total_tokens": 58690088} +{"current_steps": 87050, "total_steps": 204665, "loss": 0.0001, "lr": 1.4215969366663936e-06, "epoch": 2.1266459824591406, "percentage": 42.53, "elapsed_time": "1:52:35", "remaining_time": "2:32:08", "throughput": 8687.65, "total_tokens": 58693288} +{"current_steps": 87055, "total_steps": 204665, "loss": 0.0004, "lr": 1.4215196068271531e-06, "epoch": 2.126768133290988, "percentage": 42.54, "elapsed_time": "1:52:36", "remaining_time": "2:32:07", "throughput": 8687.7, "total_tokens": 58696616} +{"current_steps": 87060, "total_steps": 204665, "loss": 0.0502, "lr": 1.4214422739225087e-06, "epoch": 2.126890284122835, "percentage": 42.54, "elapsed_time": "1:52:36", "remaining_time": "2:32:07", "throughput": 8687.72, "total_tokens": 58699752} +{"current_steps": 87065, "total_steps": 204665, "loss": 0.1728, "lr": 1.4213649379530228e-06, "epoch": 2.127012434954682, "percentage": 42.54, "elapsed_time": "1:52:36", "remaining_time": "2:32:06", "throughput": 8687.79, "total_tokens": 58703272} +{"current_steps": 87070, "total_steps": 204665, "loss": 0.0559, "lr": 1.4212875989192573e-06, "epoch": 2.1271345857865294, "percentage": 42.54, "elapsed_time": "1:52:37", "remaining_time": "2:32:06", "throughput": 8687.84, "total_tokens": 58706536} +{"current_steps": 87075, "total_steps": 204665, "loss": 0.1699, "lr": 1.4212102568217755e-06, "epoch": 2.1272567366183766, "percentage": 42.55, "elapsed_time": "1:52:37", "remaining_time": "2:32:05", "throughput": 8687.91, "total_tokens": 58710120} +{"current_steps": 87080, "total_steps": 204665, "loss": 0.1176, "lr": 1.4211329116611392e-06, "epoch": 2.1273788874502237, "percentage": 42.55, "elapsed_time": "1:52:38", "remaining_time": "2:32:05", "throughput": 8687.92, "total_tokens": 58713128} +{"current_steps": 87085, "total_steps": 204665, "loss": 0.0312, "lr": 1.4210555634379113e-06, "epoch": 2.1275010382820705, "percentage": 42.55, "elapsed_time": "1:52:38", "remaining_time": "2:32:04", "throughput": 8687.89, "total_tokens": 58715752} +{"current_steps": 87090, "total_steps": 204665, "loss": 0.0013, "lr": 1.420978212152654e-06, "epoch": 2.1276231891139177, "percentage": 42.55, "elapsed_time": "1:52:38", "remaining_time": "2:32:04", "throughput": 8687.93, "total_tokens": 58719016} +{"current_steps": 87095, "total_steps": 204665, "loss": 0.0369, "lr": 1.4209008578059299e-06, "epoch": 2.127745339945765, "percentage": 42.55, "elapsed_time": "1:52:39", "remaining_time": "2:32:04", "throughput": 8687.96, "total_tokens": 58722216} +{"current_steps": 87100, "total_steps": 204665, "loss": 0.0001, "lr": 1.4208235003983017e-06, "epoch": 2.127867490777612, "percentage": 42.56, "elapsed_time": "1:52:39", "remaining_time": "2:32:03", "throughput": 8688.04, "total_tokens": 58725800} +{"current_steps": 87105, "total_steps": 204665, "loss": 0.0003, "lr": 1.4207461399303316e-06, "epoch": 2.1279896416094592, "percentage": 42.56, "elapsed_time": "1:52:39", "remaining_time": "2:32:03", "throughput": 8688.04, "total_tokens": 58728744} +{"current_steps": 87110, "total_steps": 204665, "loss": 0.0396, "lr": 1.4206687764025825e-06, "epoch": 2.1281117924413064, "percentage": 42.56, "elapsed_time": "1:52:40", "remaining_time": "2:32:02", "throughput": 8688.12, "total_tokens": 58732392} +{"current_steps": 87115, "total_steps": 204665, "loss": 0.0004, "lr": 1.4205914098156168e-06, "epoch": 2.1282339432731536, "percentage": 42.56, "elapsed_time": "1:52:40", "remaining_time": "2:32:02", "throughput": 8688.15, "total_tokens": 58735592} +{"current_steps": 87120, "total_steps": 204665, "loss": 0.0325, "lr": 1.4205140401699973e-06, "epoch": 2.128356094105001, "percentage": 42.57, "elapsed_time": "1:52:40", "remaining_time": "2:32:01", "throughput": 8688.21, "total_tokens": 58739048} +{"current_steps": 87125, "total_steps": 204665, "loss": 0.0001, "lr": 1.4204366674662867e-06, "epoch": 2.128478244936848, "percentage": 42.57, "elapsed_time": "1:52:41", "remaining_time": "2:32:01", "throughput": 8688.3, "total_tokens": 58742696} +{"current_steps": 87130, "total_steps": 204665, "loss": 0.0418, "lr": 1.4203592917050476e-06, "epoch": 2.128600395768695, "percentage": 42.57, "elapsed_time": "1:52:41", "remaining_time": "2:32:00", "throughput": 8688.37, "total_tokens": 58746216} +{"current_steps": 87135, "total_steps": 204665, "loss": 0.0001, "lr": 1.4202819128868422e-06, "epoch": 2.1287225466005424, "percentage": 42.57, "elapsed_time": "1:52:41", "remaining_time": "2:32:00", "throughput": 8688.43, "total_tokens": 58749608} +{"current_steps": 87140, "total_steps": 204665, "loss": 0.0001, "lr": 1.4202045310122341e-06, "epoch": 2.1288446974323896, "percentage": 42.58, "elapsed_time": "1:52:42", "remaining_time": "2:32:00", "throughput": 8688.44, "total_tokens": 58752680} +{"current_steps": 87145, "total_steps": 204665, "loss": 0.0069, "lr": 1.4201271460817859e-06, "epoch": 2.1289668482642368, "percentage": 42.58, "elapsed_time": "1:52:42", "remaining_time": "2:31:59", "throughput": 8688.51, "total_tokens": 58756200} +{"current_steps": 87150, "total_steps": 204665, "loss": 0.0305, "lr": 1.4200497580960597e-06, "epoch": 2.129088999096084, "percentage": 42.58, "elapsed_time": "1:52:42", "remaining_time": "2:31:59", "throughput": 8688.52, "total_tokens": 58759208} +{"current_steps": 87155, "total_steps": 204665, "loss": 0.1128, "lr": 1.4199723670556187e-06, "epoch": 2.129211149927931, "percentage": 42.58, "elapsed_time": "1:52:43", "remaining_time": "2:31:58", "throughput": 8688.58, "total_tokens": 58762600} +{"current_steps": 87160, "total_steps": 204665, "loss": 0.0003, "lr": 1.419894972961026e-06, "epoch": 2.1293333007597783, "percentage": 42.59, "elapsed_time": "1:52:43", "remaining_time": "2:31:58", "throughput": 8688.63, "total_tokens": 58765928} +{"current_steps": 87165, "total_steps": 204665, "loss": 0.1243, "lr": 1.4198175758128436e-06, "epoch": 2.1294554515916255, "percentage": 42.59, "elapsed_time": "1:52:43", "remaining_time": "2:31:57", "throughput": 8688.66, "total_tokens": 58769192} +{"current_steps": 87170, "total_steps": 204665, "loss": 0.0542, "lr": 1.4197401756116352e-06, "epoch": 2.1295776024234723, "percentage": 42.59, "elapsed_time": "1:52:44", "remaining_time": "2:31:57", "throughput": 8688.66, "total_tokens": 58772072} +{"current_steps": 87175, "total_steps": 204665, "loss": 0.0302, "lr": 1.419662772357963e-06, "epoch": 2.1296997532553195, "percentage": 42.59, "elapsed_time": "1:52:44", "remaining_time": "2:31:56", "throughput": 8688.7, "total_tokens": 58775336} +{"current_steps": 87180, "total_steps": 204665, "loss": 0.0539, "lr": 1.4195853660523907e-06, "epoch": 2.1298219040871667, "percentage": 42.6, "elapsed_time": "1:52:44", "remaining_time": "2:31:56", "throughput": 8688.8, "total_tokens": 58779112} +{"current_steps": 87185, "total_steps": 204665, "loss": 0.0675, "lr": 1.4195079566954805e-06, "epoch": 2.129944054919014, "percentage": 42.6, "elapsed_time": "1:52:45", "remaining_time": "2:31:56", "throughput": 8688.88, "total_tokens": 58782760} +{"current_steps": 87190, "total_steps": 204665, "loss": 0.0288, "lr": 1.419430544287796e-06, "epoch": 2.130066205750861, "percentage": 42.6, "elapsed_time": "1:52:45", "remaining_time": "2:31:55", "throughput": 8688.99, "total_tokens": 58786664} +{"current_steps": 87195, "total_steps": 204665, "loss": 0.0721, "lr": 1.4193531288298993e-06, "epoch": 2.130188356582708, "percentage": 42.6, "elapsed_time": "1:52:45", "remaining_time": "2:31:55", "throughput": 8689.03, "total_tokens": 58789928} +{"current_steps": 87200, "total_steps": 204665, "loss": 0.0002, "lr": 1.419275710322354e-06, "epoch": 2.1303105074145554, "percentage": 42.61, "elapsed_time": "1:52:46", "remaining_time": "2:31:54", "throughput": 8689.11, "total_tokens": 58793512} +{"current_steps": 87205, "total_steps": 204665, "loss": 0.0003, "lr": 1.419198288765723e-06, "epoch": 2.1304326582464026, "percentage": 42.61, "elapsed_time": "1:52:46", "remaining_time": "2:31:54", "throughput": 8689.11, "total_tokens": 58796456} +{"current_steps": 87210, "total_steps": 204665, "loss": 0.0005, "lr": 1.4191208641605693e-06, "epoch": 2.13055480907825, "percentage": 42.61, "elapsed_time": "1:52:47", "remaining_time": "2:31:53", "throughput": 8689.13, "total_tokens": 58799528} +{"current_steps": 87215, "total_steps": 204665, "loss": 0.0044, "lr": 1.4190434365074559e-06, "epoch": 2.130676959910097, "percentage": 42.61, "elapsed_time": "1:52:47", "remaining_time": "2:31:53", "throughput": 8689.2, "total_tokens": 58803048} +{"current_steps": 87220, "total_steps": 204665, "loss": 0.0366, "lr": 1.418966005806946e-06, "epoch": 2.130799110741944, "percentage": 42.62, "elapsed_time": "1:52:47", "remaining_time": "2:31:52", "throughput": 8689.22, "total_tokens": 58806248} +{"current_steps": 87225, "total_steps": 204665, "loss": 0.1019, "lr": 1.4188885720596022e-06, "epoch": 2.1309212615737914, "percentage": 42.62, "elapsed_time": "1:52:48", "remaining_time": "2:31:52", "throughput": 8689.25, "total_tokens": 58809448} +{"current_steps": 87230, "total_steps": 204665, "loss": 0.0567, "lr": 1.4188111352659884e-06, "epoch": 2.1310434124056385, "percentage": 42.62, "elapsed_time": "1:52:48", "remaining_time": "2:31:52", "throughput": 8689.29, "total_tokens": 58812648} +{"current_steps": 87235, "total_steps": 204665, "loss": 0.0002, "lr": 1.4187336954266674e-06, "epoch": 2.1311655632374857, "percentage": 42.62, "elapsed_time": "1:52:48", "remaining_time": "2:31:51", "throughput": 8689.31, "total_tokens": 58815784} +{"current_steps": 87240, "total_steps": 204665, "loss": 0.1964, "lr": 1.4186562525422025e-06, "epoch": 2.131287714069333, "percentage": 42.63, "elapsed_time": "1:52:49", "remaining_time": "2:31:51", "throughput": 8689.37, "total_tokens": 58819240} +{"current_steps": 87245, "total_steps": 204665, "loss": 0.0005, "lr": 1.4185788066131566e-06, "epoch": 2.13140986490118, "percentage": 42.63, "elapsed_time": "1:52:49", "remaining_time": "2:31:50", "throughput": 8689.44, "total_tokens": 58822760} +{"current_steps": 87250, "total_steps": 204665, "loss": 0.0262, "lr": 1.4185013576400928e-06, "epoch": 2.1315320157330273, "percentage": 42.63, "elapsed_time": "1:52:49", "remaining_time": "2:31:50", "throughput": 8689.44, "total_tokens": 58825704} +{"current_steps": 87255, "total_steps": 204665, "loss": 0.0006, "lr": 1.418423905623575e-06, "epoch": 2.1316541665648745, "percentage": 42.63, "elapsed_time": "1:52:50", "remaining_time": "2:31:49", "throughput": 8689.53, "total_tokens": 58829416} +{"current_steps": 87260, "total_steps": 204665, "loss": 0.1206, "lr": 1.4183464505641656e-06, "epoch": 2.1317763173967217, "percentage": 42.64, "elapsed_time": "1:52:50", "remaining_time": "2:31:49", "throughput": 8689.58, "total_tokens": 58832744} +{"current_steps": 87265, "total_steps": 204665, "loss": 0.0003, "lr": 1.4182689924624285e-06, "epoch": 2.1318984682285684, "percentage": 42.64, "elapsed_time": "1:52:50", "remaining_time": "2:31:48", "throughput": 8689.59, "total_tokens": 58835752} +{"current_steps": 87270, "total_steps": 204665, "loss": 0.0479, "lr": 1.4181915313189269e-06, "epoch": 2.1320206190604156, "percentage": 42.64, "elapsed_time": "1:52:51", "remaining_time": "2:31:48", "throughput": 8689.62, "total_tokens": 58838952} +{"current_steps": 87275, "total_steps": 204665, "loss": 0.044, "lr": 1.4181140671342235e-06, "epoch": 2.132142769892263, "percentage": 42.64, "elapsed_time": "1:52:51", "remaining_time": "2:31:48", "throughput": 8689.63, "total_tokens": 58842024} +{"current_steps": 87280, "total_steps": 204665, "loss": 0.1059, "lr": 1.4180365999088826e-06, "epoch": 2.13226492072411, "percentage": 42.65, "elapsed_time": "1:52:51", "remaining_time": "2:31:47", "throughput": 8689.73, "total_tokens": 58845800} +{"current_steps": 87285, "total_steps": 204665, "loss": 0.0003, "lr": 1.4179591296434669e-06, "epoch": 2.132387071555957, "percentage": 42.65, "elapsed_time": "1:52:52", "remaining_time": "2:31:47", "throughput": 8689.87, "total_tokens": 58849896} +{"current_steps": 87290, "total_steps": 204665, "loss": 0.0013, "lr": 1.4178816563385398e-06, "epoch": 2.1325092223878044, "percentage": 42.65, "elapsed_time": "1:52:52", "remaining_time": "2:31:46", "throughput": 8689.89, "total_tokens": 58853032} +{"current_steps": 87295, "total_steps": 204665, "loss": 0.0812, "lr": 1.4178041799946653e-06, "epoch": 2.1326313732196516, "percentage": 42.65, "elapsed_time": "1:52:52", "remaining_time": "2:31:46", "throughput": 8689.93, "total_tokens": 58856296} +{"current_steps": 87300, "total_steps": 204665, "loss": 0.0668, "lr": 1.4177267006124064e-06, "epoch": 2.1327535240514988, "percentage": 42.66, "elapsed_time": "1:52:53", "remaining_time": "2:31:45", "throughput": 8689.99, "total_tokens": 58859688} +{"current_steps": 87305, "total_steps": 204665, "loss": 0.064, "lr": 1.4176492181923267e-06, "epoch": 2.132875674883346, "percentage": 42.66, "elapsed_time": "1:52:53", "remaining_time": "2:31:45", "throughput": 8690.11, "total_tokens": 58863656} +{"current_steps": 87310, "total_steps": 204665, "loss": 0.0002, "lr": 1.4175717327349893e-06, "epoch": 2.132997825715193, "percentage": 42.66, "elapsed_time": "1:52:53", "remaining_time": "2:31:45", "throughput": 8690.2, "total_tokens": 58867304} +{"current_steps": 87315, "total_steps": 204665, "loss": 0.0009, "lr": 1.417494244240958e-06, "epoch": 2.1331199765470403, "percentage": 42.66, "elapsed_time": "1:52:54", "remaining_time": "2:31:44", "throughput": 8690.28, "total_tokens": 58870952} +{"current_steps": 87320, "total_steps": 204665, "loss": 0.042, "lr": 1.4174167527107961e-06, "epoch": 2.1332421273788875, "percentage": 42.66, "elapsed_time": "1:52:54", "remaining_time": "2:31:44", "throughput": 8690.37, "total_tokens": 58874600} +{"current_steps": 87325, "total_steps": 204665, "loss": 0.0888, "lr": 1.4173392581450674e-06, "epoch": 2.1333642782107347, "percentage": 42.67, "elapsed_time": "1:52:55", "remaining_time": "2:31:43", "throughput": 8690.41, "total_tokens": 58877864} +{"current_steps": 87330, "total_steps": 204665, "loss": 0.0483, "lr": 1.4172617605443353e-06, "epoch": 2.133486429042582, "percentage": 42.67, "elapsed_time": "1:52:55", "remaining_time": "2:31:43", "throughput": 8690.43, "total_tokens": 58881000} +{"current_steps": 87335, "total_steps": 204665, "loss": 0.0001, "lr": 1.4171842599091636e-06, "epoch": 2.133608579874429, "percentage": 42.67, "elapsed_time": "1:52:55", "remaining_time": "2:31:42", "throughput": 8690.48, "total_tokens": 58884328} +{"current_steps": 87340, "total_steps": 204665, "loss": 0.0466, "lr": 1.4171067562401157e-06, "epoch": 2.1337307307062763, "percentage": 42.67, "elapsed_time": "1:52:56", "remaining_time": "2:31:42", "throughput": 8690.5, "total_tokens": 58887400} +{"current_steps": 87345, "total_steps": 204665, "loss": 0.0215, "lr": 1.4170292495377554e-06, "epoch": 2.1338528815381235, "percentage": 42.68, "elapsed_time": "1:52:56", "remaining_time": "2:31:41", "throughput": 8690.58, "total_tokens": 58891048} +{"current_steps": 87350, "total_steps": 204665, "loss": 0.0014, "lr": 1.416951739802646e-06, "epoch": 2.13397503236997, "percentage": 42.68, "elapsed_time": "1:52:56", "remaining_time": "2:31:41", "throughput": 8690.64, "total_tokens": 58894504} +{"current_steps": 87355, "total_steps": 204665, "loss": 0.0004, "lr": 1.4168742270353515e-06, "epoch": 2.1340971832018174, "percentage": 42.68, "elapsed_time": "1:52:57", "remaining_time": "2:31:41", "throughput": 8690.68, "total_tokens": 58897704} +{"current_steps": 87360, "total_steps": 204665, "loss": 0.001, "lr": 1.4167967112364357e-06, "epoch": 2.1342193340336646, "percentage": 42.68, "elapsed_time": "1:52:57", "remaining_time": "2:31:40", "throughput": 8690.69, "total_tokens": 58900776} +{"current_steps": 87365, "total_steps": 204665, "loss": 0.0003, "lr": 1.416719192406462e-06, "epoch": 2.134341484865512, "percentage": 42.69, "elapsed_time": "1:52:57", "remaining_time": "2:31:40", "throughput": 8690.75, "total_tokens": 58904168} +{"current_steps": 87370, "total_steps": 204665, "loss": 0.1239, "lr": 1.4166416705459941e-06, "epoch": 2.134463635697359, "percentage": 42.69, "elapsed_time": "1:52:58", "remaining_time": "2:31:39", "throughput": 8690.76, "total_tokens": 58907240} +{"current_steps": 87375, "total_steps": 204665, "loss": 0.0004, "lr": 1.4165641456555959e-06, "epoch": 2.134585786529206, "percentage": 42.69, "elapsed_time": "1:52:58", "remaining_time": "2:31:39", "throughput": 8690.8, "total_tokens": 58910504} +{"current_steps": 87380, "total_steps": 204665, "loss": 0.0003, "lr": 1.4164866177358312e-06, "epoch": 2.1347079373610534, "percentage": 42.69, "elapsed_time": "1:52:58", "remaining_time": "2:31:38", "throughput": 8690.85, "total_tokens": 58913896} +{"current_steps": 87385, "total_steps": 204665, "loss": 0.0005, "lr": 1.4164090867872638e-06, "epoch": 2.1348300881929005, "percentage": 42.7, "elapsed_time": "1:52:59", "remaining_time": "2:31:38", "throughput": 8690.93, "total_tokens": 58917480} +{"current_steps": 87390, "total_steps": 204665, "loss": 0.0001, "lr": 1.4163315528104576e-06, "epoch": 2.1349522390247477, "percentage": 42.7, "elapsed_time": "1:52:59", "remaining_time": "2:31:37", "throughput": 8691.0, "total_tokens": 58921000} +{"current_steps": 87395, "total_steps": 204665, "loss": 0.0933, "lr": 1.4162540158059765e-06, "epoch": 2.135074389856595, "percentage": 42.7, "elapsed_time": "1:52:59", "remaining_time": "2:31:37", "throughput": 8691.01, "total_tokens": 58924008} +{"current_steps": 87400, "total_steps": 204665, "loss": 0.0377, "lr": 1.416176475774384e-06, "epoch": 2.135196540688442, "percentage": 42.7, "elapsed_time": "1:53:00", "remaining_time": "2:31:37", "throughput": 8691.11, "total_tokens": 58927784} +{"current_steps": 87405, "total_steps": 204665, "loss": 0.0003, "lr": 1.4160989327162443e-06, "epoch": 2.1353186915202893, "percentage": 42.71, "elapsed_time": "1:53:00", "remaining_time": "2:31:36", "throughput": 8691.14, "total_tokens": 58930920} +{"current_steps": 87410, "total_steps": 204665, "loss": 0.0001, "lr": 1.4160213866321216e-06, "epoch": 2.1354408423521365, "percentage": 42.71, "elapsed_time": "1:53:00", "remaining_time": "2:31:36", "throughput": 8691.18, "total_tokens": 58934248} +{"current_steps": 87415, "total_steps": 204665, "loss": 0.0001, "lr": 1.4159438375225788e-06, "epoch": 2.1355629931839837, "percentage": 42.71, "elapsed_time": "1:53:01", "remaining_time": "2:31:35", "throughput": 8691.23, "total_tokens": 58937576} +{"current_steps": 87420, "total_steps": 204665, "loss": 0.0002, "lr": 1.4158662853881809e-06, "epoch": 2.135685144015831, "percentage": 42.71, "elapsed_time": "1:53:01", "remaining_time": "2:31:35", "throughput": 8691.27, "total_tokens": 58940840} +{"current_steps": 87425, "total_steps": 204665, "loss": 0.0003, "lr": 1.4157887302294916e-06, "epoch": 2.135807294847678, "percentage": 42.72, "elapsed_time": "1:53:01", "remaining_time": "2:31:34", "throughput": 8691.29, "total_tokens": 58943912} +{"current_steps": 87430, "total_steps": 204665, "loss": 0.0692, "lr": 1.4157111720470746e-06, "epoch": 2.1359294456795253, "percentage": 42.72, "elapsed_time": "1:53:02", "remaining_time": "2:31:34", "throughput": 8691.38, "total_tokens": 58947624} +{"current_steps": 87435, "total_steps": 204665, "loss": 0.0002, "lr": 1.4156336108414944e-06, "epoch": 2.1360515965113724, "percentage": 42.72, "elapsed_time": "1:53:02", "remaining_time": "2:31:33", "throughput": 8691.44, "total_tokens": 58951080} +{"current_steps": 87440, "total_steps": 204665, "loss": 0.0458, "lr": 1.4155560466133146e-06, "epoch": 2.1361737473432196, "percentage": 42.72, "elapsed_time": "1:53:03", "remaining_time": "2:31:33", "throughput": 8691.55, "total_tokens": 58954920} +{"current_steps": 87445, "total_steps": 204665, "loss": 0.0405, "lr": 1.4154784793630993e-06, "epoch": 2.1362958981750664, "percentage": 42.73, "elapsed_time": "1:53:03", "remaining_time": "2:31:33", "throughput": 8691.61, "total_tokens": 58958376} +{"current_steps": 87450, "total_steps": 204665, "loss": 0.068, "lr": 1.415400909091413e-06, "epoch": 2.1364180490069136, "percentage": 42.73, "elapsed_time": "1:53:03", "remaining_time": "2:31:32", "throughput": 8691.64, "total_tokens": 58961512} +{"current_steps": 87455, "total_steps": 204665, "loss": 0.0, "lr": 1.4153233357988197e-06, "epoch": 2.1365401998387608, "percentage": 42.73, "elapsed_time": "1:53:04", "remaining_time": "2:31:32", "throughput": 8691.7, "total_tokens": 58964968} +{"current_steps": 87460, "total_steps": 204665, "loss": 0.0001, "lr": 1.4152457594858834e-06, "epoch": 2.136662350670608, "percentage": 42.73, "elapsed_time": "1:53:04", "remaining_time": "2:31:31", "throughput": 8691.74, "total_tokens": 58968232} +{"current_steps": 87465, "total_steps": 204665, "loss": 0.0001, "lr": 1.415168180153168e-06, "epoch": 2.136784501502455, "percentage": 42.74, "elapsed_time": "1:53:04", "remaining_time": "2:31:31", "throughput": 8691.8, "total_tokens": 58971688} +{"current_steps": 87470, "total_steps": 204665, "loss": 0.0002, "lr": 1.415090597801238e-06, "epoch": 2.1369066523343023, "percentage": 42.74, "elapsed_time": "1:53:05", "remaining_time": "2:31:30", "throughput": 8691.84, "total_tokens": 58975016} +{"current_steps": 87475, "total_steps": 204665, "loss": 0.0005, "lr": 1.4150130124306574e-06, "epoch": 2.1370288031661495, "percentage": 42.74, "elapsed_time": "1:53:05", "remaining_time": "2:31:30", "throughput": 8691.89, "total_tokens": 58978408} +{"current_steps": 87480, "total_steps": 204665, "loss": 0.3102, "lr": 1.4149354240419906e-06, "epoch": 2.1371509539979967, "percentage": 42.74, "elapsed_time": "1:53:05", "remaining_time": "2:31:29", "throughput": 8691.9, "total_tokens": 58981416} +{"current_steps": 87485, "total_steps": 204665, "loss": 0.0778, "lr": 1.414857832635802e-06, "epoch": 2.137273104829844, "percentage": 42.75, "elapsed_time": "1:53:06", "remaining_time": "2:31:29", "throughput": 8691.92, "total_tokens": 58984488} +{"current_steps": 87490, "total_steps": 204665, "loss": 0.0527, "lr": 1.4147802382126553e-06, "epoch": 2.137395255661691, "percentage": 42.75, "elapsed_time": "1:53:06", "remaining_time": "2:31:29", "throughput": 8692.05, "total_tokens": 58988520} +{"current_steps": 87495, "total_steps": 204665, "loss": 0.0001, "lr": 1.4147026407731156e-06, "epoch": 2.1375174064935383, "percentage": 42.75, "elapsed_time": "1:53:06", "remaining_time": "2:31:28", "throughput": 8692.11, "total_tokens": 58992040} +{"current_steps": 87500, "total_steps": 204665, "loss": 0.0316, "lr": 1.4146250403177464e-06, "epoch": 2.1376395573253855, "percentage": 42.75, "elapsed_time": "1:53:07", "remaining_time": "2:31:28", "throughput": 8692.17, "total_tokens": 58995432} +{"current_steps": 87505, "total_steps": 204665, "loss": 0.0014, "lr": 1.4145474368471124e-06, "epoch": 2.1377617081572327, "percentage": 42.76, "elapsed_time": "1:53:07", "remaining_time": "2:31:27", "throughput": 8692.3, "total_tokens": 58999464} +{"current_steps": 87510, "total_steps": 204665, "loss": 0.0005, "lr": 1.4144698303617782e-06, "epoch": 2.13788385898908, "percentage": 42.76, "elapsed_time": "1:53:07", "remaining_time": "2:31:27", "throughput": 8692.31, "total_tokens": 59002472} +{"current_steps": 87515, "total_steps": 204665, "loss": 0.0044, "lr": 1.4143922208623078e-06, "epoch": 2.138006009820927, "percentage": 42.76, "elapsed_time": "1:53:08", "remaining_time": "2:31:26", "throughput": 8692.3, "total_tokens": 59005352} +{"current_steps": 87520, "total_steps": 204665, "loss": 0.0865, "lr": 1.4143146083492656e-06, "epoch": 2.1381281606527742, "percentage": 42.76, "elapsed_time": "1:53:08", "remaining_time": "2:31:26", "throughput": 8692.36, "total_tokens": 59008808} +{"current_steps": 87525, "total_steps": 204665, "loss": 0.0002, "lr": 1.4142369928232164e-06, "epoch": 2.1382503114846214, "percentage": 42.77, "elapsed_time": "1:53:08", "remaining_time": "2:31:26", "throughput": 8692.4, "total_tokens": 59012136} +{"current_steps": 87530, "total_steps": 204665, "loss": 0.0601, "lr": 1.414159374284724e-06, "epoch": 2.138372462316468, "percentage": 42.77, "elapsed_time": "1:53:09", "remaining_time": "2:31:25", "throughput": 8692.47, "total_tokens": 59015656} +{"current_steps": 87535, "total_steps": 204665, "loss": 0.0923, "lr": 1.4140817527343534e-06, "epoch": 2.1384946131483153, "percentage": 42.77, "elapsed_time": "1:53:09", "remaining_time": "2:31:25", "throughput": 8692.55, "total_tokens": 59019304} +{"current_steps": 87540, "total_steps": 204665, "loss": 0.0002, "lr": 1.4140041281726686e-06, "epoch": 2.1386167639801625, "percentage": 42.77, "elapsed_time": "1:53:09", "remaining_time": "2:31:24", "throughput": 8692.6, "total_tokens": 59022696} +{"current_steps": 87545, "total_steps": 204665, "loss": 0.0358, "lr": 1.413926500600235e-06, "epoch": 2.1387389148120097, "percentage": 42.77, "elapsed_time": "1:53:10", "remaining_time": "2:31:24", "throughput": 8692.68, "total_tokens": 59026280} +{"current_steps": 87550, "total_steps": 204665, "loss": 0.0001, "lr": 1.4138488700176163e-06, "epoch": 2.138861065643857, "percentage": 42.78, "elapsed_time": "1:53:10", "remaining_time": "2:31:23", "throughput": 8692.74, "total_tokens": 59029736} +{"current_steps": 87555, "total_steps": 204665, "loss": 0.0029, "lr": 1.4137712364253774e-06, "epoch": 2.138983216475704, "percentage": 42.78, "elapsed_time": "1:53:11", "remaining_time": "2:31:23", "throughput": 8692.8, "total_tokens": 59033192} +{"current_steps": 87560, "total_steps": 204665, "loss": 0.0051, "lr": 1.4136935998240827e-06, "epoch": 2.1391053673075513, "percentage": 42.78, "elapsed_time": "1:53:11", "remaining_time": "2:31:22", "throughput": 8692.79, "total_tokens": 59036072} +{"current_steps": 87565, "total_steps": 204665, "loss": 0.0003, "lr": 1.413615960214297e-06, "epoch": 2.1392275181393985, "percentage": 42.78, "elapsed_time": "1:53:11", "remaining_time": "2:31:22", "throughput": 8692.81, "total_tokens": 59039144} +{"current_steps": 87570, "total_steps": 204665, "loss": 0.0824, "lr": 1.4135383175965844e-06, "epoch": 2.1393496689712457, "percentage": 42.79, "elapsed_time": "1:53:12", "remaining_time": "2:31:22", "throughput": 8692.89, "total_tokens": 59042792} +{"current_steps": 87575, "total_steps": 204665, "loss": 0.0279, "lr": 1.41346067197151e-06, "epoch": 2.139471819803093, "percentage": 42.79, "elapsed_time": "1:53:12", "remaining_time": "2:31:21", "throughput": 8692.89, "total_tokens": 59045800} +{"current_steps": 87580, "total_steps": 204665, "loss": 0.0739, "lr": 1.4133830233396386e-06, "epoch": 2.13959397063494, "percentage": 42.79, "elapsed_time": "1:53:12", "remaining_time": "2:31:21", "throughput": 8692.92, "total_tokens": 59049000} +{"current_steps": 87585, "total_steps": 204665, "loss": 0.0615, "lr": 1.413305371701535e-06, "epoch": 2.1397161214667872, "percentage": 42.79, "elapsed_time": "1:53:13", "remaining_time": "2:31:20", "throughput": 8692.94, "total_tokens": 59052136} +{"current_steps": 87590, "total_steps": 204665, "loss": 0.0559, "lr": 1.413227717057763e-06, "epoch": 2.1398382722986344, "percentage": 42.8, "elapsed_time": "1:53:13", "remaining_time": "2:31:20", "throughput": 8692.98, "total_tokens": 59055400} +{"current_steps": 87595, "total_steps": 204665, "loss": 0.0003, "lr": 1.413150059408888e-06, "epoch": 2.1399604231304816, "percentage": 42.8, "elapsed_time": "1:53:13", "remaining_time": "2:31:19", "throughput": 8693.0, "total_tokens": 59058536} +{"current_steps": 87600, "total_steps": 204665, "loss": 0.0394, "lr": 1.4130723987554747e-06, "epoch": 2.140082573962329, "percentage": 42.8, "elapsed_time": "1:53:14", "remaining_time": "2:31:19", "throughput": 8692.99, "total_tokens": 59061416} +{"current_steps": 87605, "total_steps": 204665, "loss": 0.121, "lr": 1.4129947350980878e-06, "epoch": 2.140204724794176, "percentage": 42.8, "elapsed_time": "1:53:14", "remaining_time": "2:31:18", "throughput": 8693.01, "total_tokens": 59064488} +{"current_steps": 87610, "total_steps": 204665, "loss": 0.076, "lr": 1.4129170684372921e-06, "epoch": 2.140326875626023, "percentage": 42.81, "elapsed_time": "1:53:14", "remaining_time": "2:31:18", "throughput": 8693.06, "total_tokens": 59067880} +{"current_steps": 87615, "total_steps": 204665, "loss": 0.0013, "lr": 1.4128393987736526e-06, "epoch": 2.1404490264578704, "percentage": 42.81, "elapsed_time": "1:53:15", "remaining_time": "2:31:18", "throughput": 8693.06, "total_tokens": 59070888} +{"current_steps": 87620, "total_steps": 204665, "loss": 0.0106, "lr": 1.412761726107734e-06, "epoch": 2.1405711772897176, "percentage": 42.81, "elapsed_time": "1:53:15", "remaining_time": "2:31:17", "throughput": 8693.08, "total_tokens": 59073960} +{"current_steps": 87625, "total_steps": 204665, "loss": 0.0318, "lr": 1.412684050440101e-06, "epoch": 2.1406933281215643, "percentage": 42.81, "elapsed_time": "1:53:15", "remaining_time": "2:31:17", "throughput": 8693.14, "total_tokens": 59077416} +{"current_steps": 87630, "total_steps": 204665, "loss": 0.0445, "lr": 1.4126063717713183e-06, "epoch": 2.1408154789534115, "percentage": 42.82, "elapsed_time": "1:53:16", "remaining_time": "2:31:16", "throughput": 8693.17, "total_tokens": 59080680} +{"current_steps": 87635, "total_steps": 204665, "loss": 0.0002, "lr": 1.4125286901019513e-06, "epoch": 2.1409376297852587, "percentage": 42.82, "elapsed_time": "1:53:16", "remaining_time": "2:31:16", "throughput": 8693.21, "total_tokens": 59084008} +{"current_steps": 87640, "total_steps": 204665, "loss": 0.0002, "lr": 1.4124510054325648e-06, "epoch": 2.141059780617106, "percentage": 42.82, "elapsed_time": "1:53:16", "remaining_time": "2:31:15", "throughput": 8693.25, "total_tokens": 59087272} +{"current_steps": 87645, "total_steps": 204665, "loss": 0.0414, "lr": 1.4123733177637236e-06, "epoch": 2.141181931448953, "percentage": 42.82, "elapsed_time": "1:53:17", "remaining_time": "2:31:15", "throughput": 8693.34, "total_tokens": 59090920} +{"current_steps": 87650, "total_steps": 204665, "loss": 0.0004, "lr": 1.4122956270959927e-06, "epoch": 2.1413040822808003, "percentage": 42.83, "elapsed_time": "1:53:17", "remaining_time": "2:31:14", "throughput": 8693.4, "total_tokens": 59094440} +{"current_steps": 87655, "total_steps": 204665, "loss": 0.0589, "lr": 1.412217933429937e-06, "epoch": 2.1414262331126475, "percentage": 42.83, "elapsed_time": "1:53:17", "remaining_time": "2:31:14", "throughput": 8693.44, "total_tokens": 59097704} +{"current_steps": 87660, "total_steps": 204665, "loss": 0.0003, "lr": 1.4121402367661217e-06, "epoch": 2.1415483839444946, "percentage": 42.83, "elapsed_time": "1:53:18", "remaining_time": "2:31:14", "throughput": 8693.45, "total_tokens": 59100712} +{"current_steps": 87665, "total_steps": 204665, "loss": 0.0344, "lr": 1.4120625371051119e-06, "epoch": 2.141670534776342, "percentage": 42.83, "elapsed_time": "1:53:18", "remaining_time": "2:31:13", "throughput": 8693.5, "total_tokens": 59104040} +{"current_steps": 87670, "total_steps": 204665, "loss": 0.0003, "lr": 1.4119848344474723e-06, "epoch": 2.141792685608189, "percentage": 42.84, "elapsed_time": "1:53:18", "remaining_time": "2:31:13", "throughput": 8693.55, "total_tokens": 59107432} +{"current_steps": 87675, "total_steps": 204665, "loss": 0.0582, "lr": 1.4119071287937683e-06, "epoch": 2.141914836440036, "percentage": 42.84, "elapsed_time": "1:53:19", "remaining_time": "2:31:12", "throughput": 8693.61, "total_tokens": 59110888} +{"current_steps": 87680, "total_steps": 204665, "loss": 0.0766, "lr": 1.4118294201445648e-06, "epoch": 2.1420369872718834, "percentage": 42.84, "elapsed_time": "1:53:19", "remaining_time": "2:31:12", "throughput": 8693.63, "total_tokens": 59114024} +{"current_steps": 87685, "total_steps": 204665, "loss": 0.0003, "lr": 1.411751708500427e-06, "epoch": 2.1421591381037306, "percentage": 42.84, "elapsed_time": "1:53:20", "remaining_time": "2:31:11", "throughput": 8693.66, "total_tokens": 59117160} +{"current_steps": 87690, "total_steps": 204665, "loss": 0.0003, "lr": 1.41167399386192e-06, "epoch": 2.142281288935578, "percentage": 42.85, "elapsed_time": "1:53:20", "remaining_time": "2:31:11", "throughput": 8693.69, "total_tokens": 59120360} +{"current_steps": 87695, "total_steps": 204665, "loss": 0.0321, "lr": 1.4115962762296088e-06, "epoch": 2.142403439767425, "percentage": 42.85, "elapsed_time": "1:53:20", "remaining_time": "2:31:10", "throughput": 8693.71, "total_tokens": 59123496} +{"current_steps": 87700, "total_steps": 204665, "loss": 0.001, "lr": 1.411518555604059e-06, "epoch": 2.142525590599272, "percentage": 42.85, "elapsed_time": "1:53:21", "remaining_time": "2:31:10", "throughput": 8693.76, "total_tokens": 59126952} +{"current_steps": 87705, "total_steps": 204665, "loss": 0.0225, "lr": 1.4114408319858355e-06, "epoch": 2.1426477414311194, "percentage": 42.85, "elapsed_time": "1:53:21", "remaining_time": "2:31:10", "throughput": 8693.89, "total_tokens": 59130920} +{"current_steps": 87710, "total_steps": 204665, "loss": 0.0648, "lr": 1.4113631053755037e-06, "epoch": 2.142769892262966, "percentage": 42.86, "elapsed_time": "1:53:21", "remaining_time": "2:31:09", "throughput": 8693.91, "total_tokens": 59133992} +{"current_steps": 87715, "total_steps": 204665, "loss": 0.0513, "lr": 1.4112853757736288e-06, "epoch": 2.1428920430948133, "percentage": 42.86, "elapsed_time": "1:53:22", "remaining_time": "2:31:09", "throughput": 8693.96, "total_tokens": 59137448} +{"current_steps": 87720, "total_steps": 204665, "loss": 0.0005, "lr": 1.411207643180776e-06, "epoch": 2.1430141939266605, "percentage": 42.86, "elapsed_time": "1:53:22", "remaining_time": "2:31:08", "throughput": 8694.0, "total_tokens": 59140712} +{"current_steps": 87725, "total_steps": 204665, "loss": 0.0001, "lr": 1.4111299075975103e-06, "epoch": 2.1431363447585077, "percentage": 42.86, "elapsed_time": "1:53:22", "remaining_time": "2:31:08", "throughput": 8694.03, "total_tokens": 59143976} +{"current_steps": 87730, "total_steps": 204665, "loss": 0.0377, "lr": 1.4110521690243977e-06, "epoch": 2.143258495590355, "percentage": 42.87, "elapsed_time": "1:53:23", "remaining_time": "2:31:07", "throughput": 8694.15, "total_tokens": 59147944} +{"current_steps": 87735, "total_steps": 204665, "loss": 0.0271, "lr": 1.4109744274620031e-06, "epoch": 2.143380646422202, "percentage": 42.87, "elapsed_time": "1:53:23", "remaining_time": "2:31:07", "throughput": 8694.2, "total_tokens": 59151272} +{"current_steps": 87740, "total_steps": 204665, "loss": 0.0003, "lr": 1.410896682910892e-06, "epoch": 2.1435027972540492, "percentage": 42.87, "elapsed_time": "1:53:23", "remaining_time": "2:31:07", "throughput": 8694.26, "total_tokens": 59154792} +{"current_steps": 87745, "total_steps": 204665, "loss": 0.0006, "lr": 1.4108189353716292e-06, "epoch": 2.1436249480858964, "percentage": 42.87, "elapsed_time": "1:53:24", "remaining_time": "2:31:06", "throughput": 8694.31, "total_tokens": 59158184} +{"current_steps": 87750, "total_steps": 204665, "loss": 0.0001, "lr": 1.4107411848447813e-06, "epoch": 2.1437470989177436, "percentage": 42.87, "elapsed_time": "1:53:24", "remaining_time": "2:31:06", "throughput": 8694.36, "total_tokens": 59161512} +{"current_steps": 87755, "total_steps": 204665, "loss": 0.0964, "lr": 1.4106634313309124e-06, "epoch": 2.143869249749591, "percentage": 42.88, "elapsed_time": "1:53:24", "remaining_time": "2:31:05", "throughput": 8694.36, "total_tokens": 59164456} +{"current_steps": 87760, "total_steps": 204665, "loss": 0.1318, "lr": 1.4105856748305889e-06, "epoch": 2.143991400581438, "percentage": 42.88, "elapsed_time": "1:53:25", "remaining_time": "2:31:05", "throughput": 8694.4, "total_tokens": 59167784} +{"current_steps": 87765, "total_steps": 204665, "loss": 0.0, "lr": 1.410507915344376e-06, "epoch": 2.144113551413285, "percentage": 42.88, "elapsed_time": "1:53:25", "remaining_time": "2:31:04", "throughput": 8694.41, "total_tokens": 59170792} +{"current_steps": 87770, "total_steps": 204665, "loss": 0.0351, "lr": 1.4104301528728393e-06, "epoch": 2.1442357022451324, "percentage": 42.88, "elapsed_time": "1:53:25", "remaining_time": "2:31:04", "throughput": 8694.5, "total_tokens": 59174440} +{"current_steps": 87775, "total_steps": 204665, "loss": 0.049, "lr": 1.410352387416544e-06, "epoch": 2.1443578530769796, "percentage": 42.89, "elapsed_time": "1:53:26", "remaining_time": "2:31:03", "throughput": 8694.6, "total_tokens": 59178216} +{"current_steps": 87780, "total_steps": 204665, "loss": 0.0053, "lr": 1.4102746189760555e-06, "epoch": 2.1444800039088268, "percentage": 42.89, "elapsed_time": "1:53:26", "remaining_time": "2:31:03", "throughput": 8694.67, "total_tokens": 59181736} +{"current_steps": 87785, "total_steps": 204665, "loss": 0.0006, "lr": 1.4101968475519398e-06, "epoch": 2.144602154740674, "percentage": 42.89, "elapsed_time": "1:53:27", "remaining_time": "2:31:03", "throughput": 8694.76, "total_tokens": 59185512} +{"current_steps": 87790, "total_steps": 204665, "loss": 0.0002, "lr": 1.410119073144762e-06, "epoch": 2.144724305572521, "percentage": 42.89, "elapsed_time": "1:53:27", "remaining_time": "2:31:02", "throughput": 8694.84, "total_tokens": 59189096} +{"current_steps": 87795, "total_steps": 204665, "loss": 0.0262, "lr": 1.4100412957550884e-06, "epoch": 2.144846456404368, "percentage": 42.9, "elapsed_time": "1:53:27", "remaining_time": "2:31:02", "throughput": 8694.87, "total_tokens": 59192296} +{"current_steps": 87800, "total_steps": 204665, "loss": 0.0, "lr": 1.4099635153834842e-06, "epoch": 2.144968607236215, "percentage": 42.9, "elapsed_time": "1:53:28", "remaining_time": "2:31:01", "throughput": 8694.91, "total_tokens": 59195560} +{"current_steps": 87805, "total_steps": 204665, "loss": 0.0383, "lr": 1.409885732030515e-06, "epoch": 2.1450907580680623, "percentage": 42.9, "elapsed_time": "1:53:28", "remaining_time": "2:31:01", "throughput": 8694.91, "total_tokens": 59198504} +{"current_steps": 87810, "total_steps": 204665, "loss": 0.0001, "lr": 1.4098079456967462e-06, "epoch": 2.1452129088999095, "percentage": 42.9, "elapsed_time": "1:53:28", "remaining_time": "2:31:00", "throughput": 8694.99, "total_tokens": 59202152} +{"current_steps": 87815, "total_steps": 204665, "loss": 0.0004, "lr": 1.4097301563827443e-06, "epoch": 2.1453350597317566, "percentage": 42.91, "elapsed_time": "1:53:29", "remaining_time": "2:31:00", "throughput": 8695.04, "total_tokens": 59205480} +{"current_steps": 87820, "total_steps": 204665, "loss": 0.0001, "lr": 1.409652364089074e-06, "epoch": 2.145457210563604, "percentage": 42.91, "elapsed_time": "1:53:29", "remaining_time": "2:31:00", "throughput": 8695.05, "total_tokens": 59208488} +{"current_steps": 87825, "total_steps": 204665, "loss": 0.073, "lr": 1.4095745688163016e-06, "epoch": 2.145579361395451, "percentage": 42.91, "elapsed_time": "1:53:29", "remaining_time": "2:30:59", "throughput": 8695.09, "total_tokens": 59211752} +{"current_steps": 87830, "total_steps": 204665, "loss": 0.0001, "lr": 1.4094967705649932e-06, "epoch": 2.145701512227298, "percentage": 42.91, "elapsed_time": "1:53:30", "remaining_time": "2:30:59", "throughput": 8695.11, "total_tokens": 59214888} +{"current_steps": 87835, "total_steps": 204665, "loss": 0.0001, "lr": 1.4094189693357138e-06, "epoch": 2.1458236630591454, "percentage": 42.92, "elapsed_time": "1:53:30", "remaining_time": "2:30:58", "throughput": 8695.15, "total_tokens": 59218152} +{"current_steps": 87840, "total_steps": 204665, "loss": 0.0577, "lr": 1.4093411651290295e-06, "epoch": 2.1459458138909926, "percentage": 42.92, "elapsed_time": "1:53:30", "remaining_time": "2:30:58", "throughput": 8695.22, "total_tokens": 59221736} +{"current_steps": 87845, "total_steps": 204665, "loss": 0.0001, "lr": 1.4092633579455062e-06, "epoch": 2.14606796472284, "percentage": 42.92, "elapsed_time": "1:53:31", "remaining_time": "2:30:57", "throughput": 8695.25, "total_tokens": 59224936} +{"current_steps": 87850, "total_steps": 204665, "loss": 0.1333, "lr": 1.4091855477857099e-06, "epoch": 2.146190115554687, "percentage": 42.92, "elapsed_time": "1:53:31", "remaining_time": "2:30:57", "throughput": 8695.29, "total_tokens": 59228200} +{"current_steps": 87855, "total_steps": 204665, "loss": 0.0534, "lr": 1.4091077346502059e-06, "epoch": 2.146312266386534, "percentage": 42.93, "elapsed_time": "1:53:31", "remaining_time": "2:30:56", "throughput": 8695.35, "total_tokens": 59231592} +{"current_steps": 87860, "total_steps": 204665, "loss": 0.0003, "lr": 1.4090299185395607e-06, "epoch": 2.1464344172183814, "percentage": 42.93, "elapsed_time": "1:53:32", "remaining_time": "2:30:56", "throughput": 8695.41, "total_tokens": 59235112} +{"current_steps": 87865, "total_steps": 204665, "loss": 0.0397, "lr": 1.4089520994543395e-06, "epoch": 2.1465565680502285, "percentage": 42.93, "elapsed_time": "1:53:32", "remaining_time": "2:30:56", "throughput": 8695.53, "total_tokens": 59239016} +{"current_steps": 87870, "total_steps": 204665, "loss": 0.0015, "lr": 1.408874277395109e-06, "epoch": 2.1466787188820757, "percentage": 42.93, "elapsed_time": "1:53:32", "remaining_time": "2:30:55", "throughput": 8695.57, "total_tokens": 59242344} +{"current_steps": 87875, "total_steps": 204665, "loss": 0.0439, "lr": 1.4087964523624352e-06, "epoch": 2.146800869713923, "percentage": 42.94, "elapsed_time": "1:53:33", "remaining_time": "2:30:55", "throughput": 8695.69, "total_tokens": 59246312} +{"current_steps": 87880, "total_steps": 204665, "loss": 0.1187, "lr": 1.408718624356883e-06, "epoch": 2.14692302054577, "percentage": 42.94, "elapsed_time": "1:53:33", "remaining_time": "2:30:54", "throughput": 8695.77, "total_tokens": 59249896} +{"current_steps": 87885, "total_steps": 204665, "loss": 0.0004, "lr": 1.4086407933790189e-06, "epoch": 2.1470451713776173, "percentage": 42.94, "elapsed_time": "1:53:34", "remaining_time": "2:30:54", "throughput": 8696.02, "total_tokens": 59255080} +{"current_steps": 87890, "total_steps": 204665, "loss": 0.0009, "lr": 1.4085629594294094e-06, "epoch": 2.147167322209464, "percentage": 42.94, "elapsed_time": "1:53:34", "remaining_time": "2:30:53", "throughput": 8696.05, "total_tokens": 59258216} +{"current_steps": 87895, "total_steps": 204665, "loss": 0.0389, "lr": 1.4084851225086204e-06, "epoch": 2.1472894730413112, "percentage": 42.95, "elapsed_time": "1:53:34", "remaining_time": "2:30:53", "throughput": 8696.07, "total_tokens": 59261352} +{"current_steps": 87900, "total_steps": 204665, "loss": 0.0003, "lr": 1.4084072826172171e-06, "epoch": 2.1474116238731584, "percentage": 42.95, "elapsed_time": "1:53:35", "remaining_time": "2:30:53", "throughput": 8696.07, "total_tokens": 59264296} +{"current_steps": 87905, "total_steps": 204665, "loss": 0.0001, "lr": 1.4083294397557665e-06, "epoch": 2.1475337747050056, "percentage": 42.95, "elapsed_time": "1:53:35", "remaining_time": "2:30:52", "throughput": 8696.17, "total_tokens": 59268072} +{"current_steps": 87910, "total_steps": 204665, "loss": 0.0001, "lr": 1.4082515939248342e-06, "epoch": 2.147655925536853, "percentage": 42.95, "elapsed_time": "1:53:35", "remaining_time": "2:30:52", "throughput": 8696.19, "total_tokens": 59271208} +{"current_steps": 87915, "total_steps": 204665, "loss": 0.0001, "lr": 1.4081737451249868e-06, "epoch": 2.1477780763687, "percentage": 42.96, "elapsed_time": "1:53:36", "remaining_time": "2:30:51", "throughput": 8696.2, "total_tokens": 59274216} +{"current_steps": 87920, "total_steps": 204665, "loss": 0.0, "lr": 1.4080958933567901e-06, "epoch": 2.147900227200547, "percentage": 42.96, "elapsed_time": "1:53:36", "remaining_time": "2:30:51", "throughput": 8696.28, "total_tokens": 59277864} +{"current_steps": 87925, "total_steps": 204665, "loss": 0.0003, "lr": 1.4080180386208105e-06, "epoch": 2.1480223780323944, "percentage": 42.96, "elapsed_time": "1:53:36", "remaining_time": "2:30:50", "throughput": 8696.35, "total_tokens": 59281384} +{"current_steps": 87930, "total_steps": 204665, "loss": 0.0001, "lr": 1.4079401809176136e-06, "epoch": 2.1481445288642416, "percentage": 42.96, "elapsed_time": "1:53:37", "remaining_time": "2:30:50", "throughput": 8696.44, "total_tokens": 59285096} +{"current_steps": 87935, "total_steps": 204665, "loss": 0.0002, "lr": 1.4078623202477662e-06, "epoch": 2.1482666796960888, "percentage": 42.97, "elapsed_time": "1:53:37", "remaining_time": "2:30:49", "throughput": 8696.5, "total_tokens": 59288552} +{"current_steps": 87940, "total_steps": 204665, "loss": 0.0002, "lr": 1.407784456611834e-06, "epoch": 2.148388830527936, "percentage": 42.97, "elapsed_time": "1:53:37", "remaining_time": "2:30:49", "throughput": 8696.53, "total_tokens": 59291752} +{"current_steps": 87945, "total_steps": 204665, "loss": 0.0, "lr": 1.4077065900103836e-06, "epoch": 2.148510981359783, "percentage": 42.97, "elapsed_time": "1:53:38", "remaining_time": "2:30:49", "throughput": 8696.62, "total_tokens": 59295400} +{"current_steps": 87950, "total_steps": 204665, "loss": 0.0437, "lr": 1.4076287204439817e-06, "epoch": 2.1486331321916303, "percentage": 42.97, "elapsed_time": "1:53:38", "remaining_time": "2:30:48", "throughput": 8696.67, "total_tokens": 59298728} +{"current_steps": 87955, "total_steps": 204665, "loss": 0.0001, "lr": 1.4075508479131936e-06, "epoch": 2.1487552830234775, "percentage": 42.98, "elapsed_time": "1:53:38", "remaining_time": "2:30:48", "throughput": 8696.75, "total_tokens": 59302376} +{"current_steps": 87960, "total_steps": 204665, "loss": 0.0992, "lr": 1.4074729724185864e-06, "epoch": 2.1488774338553247, "percentage": 42.98, "elapsed_time": "1:53:39", "remaining_time": "2:30:47", "throughput": 8696.76, "total_tokens": 59305384} +{"current_steps": 87965, "total_steps": 204665, "loss": 0.0002, "lr": 1.407395093960726e-06, "epoch": 2.148999584687172, "percentage": 42.98, "elapsed_time": "1:53:39", "remaining_time": "2:30:47", "throughput": 8696.84, "total_tokens": 59309032} +{"current_steps": 87970, "total_steps": 204665, "loss": 0.1488, "lr": 1.4073172125401792e-06, "epoch": 2.149121735519019, "percentage": 42.98, "elapsed_time": "1:53:39", "remaining_time": "2:30:46", "throughput": 8696.91, "total_tokens": 59312552} +{"current_steps": 87975, "total_steps": 204665, "loss": 0.0, "lr": 1.4072393281575117e-06, "epoch": 2.149243886350866, "percentage": 42.98, "elapsed_time": "1:53:40", "remaining_time": "2:30:46", "throughput": 8697.01, "total_tokens": 59316264} +{"current_steps": 87980, "total_steps": 204665, "loss": 0.1237, "lr": 1.4071614408132903e-06, "epoch": 2.149366037182713, "percentage": 42.99, "elapsed_time": "1:53:40", "remaining_time": "2:30:46", "throughput": 8697.05, "total_tokens": 59319592} +{"current_steps": 87985, "total_steps": 204665, "loss": 0.0001, "lr": 1.4070835505080816e-06, "epoch": 2.14948818801456, "percentage": 42.99, "elapsed_time": "1:53:41", "remaining_time": "2:30:45", "throughput": 8697.09, "total_tokens": 59322920} +{"current_steps": 87990, "total_steps": 204665, "loss": 0.0455, "lr": 1.4070056572424519e-06, "epoch": 2.1496103388464074, "percentage": 42.99, "elapsed_time": "1:53:41", "remaining_time": "2:30:45", "throughput": 8697.12, "total_tokens": 59326056} +{"current_steps": 87995, "total_steps": 204665, "loss": 0.0002, "lr": 1.4069277610169672e-06, "epoch": 2.1497324896782546, "percentage": 42.99, "elapsed_time": "1:53:41", "remaining_time": "2:30:44", "throughput": 8697.33, "total_tokens": 59330792} +{"current_steps": 88000, "total_steps": 204665, "loss": 0.0004, "lr": 1.4068498618321946e-06, "epoch": 2.1498546405101018, "percentage": 43.0, "elapsed_time": "1:53:42", "remaining_time": "2:30:44", "throughput": 8697.33, "total_tokens": 59333736} +{"current_steps": 88005, "total_steps": 204665, "loss": 0.0963, "lr": 1.4067719596887003e-06, "epoch": 2.149976791341949, "percentage": 43.0, "elapsed_time": "1:53:42", "remaining_time": "2:30:43", "throughput": 8697.39, "total_tokens": 59337128} +{"current_steps": 88010, "total_steps": 204665, "loss": 0.0003, "lr": 1.4066940545870506e-06, "epoch": 2.150098942173796, "percentage": 43.0, "elapsed_time": "1:53:42", "remaining_time": "2:30:43", "throughput": 8697.45, "total_tokens": 59340584} +{"current_steps": 88015, "total_steps": 204665, "loss": 0.0193, "lr": 1.406616146527813e-06, "epoch": 2.1502210930056433, "percentage": 43.0, "elapsed_time": "1:53:43", "remaining_time": "2:30:42", "throughput": 8697.48, "total_tokens": 59343848} +{"current_steps": 88020, "total_steps": 204665, "loss": 0.026, "lr": 1.4065382355115532e-06, "epoch": 2.1503432438374905, "percentage": 43.01, "elapsed_time": "1:53:43", "remaining_time": "2:30:42", "throughput": 8697.54, "total_tokens": 59347240} +{"current_steps": 88025, "total_steps": 204665, "loss": 0.0296, "lr": 1.4064603215388378e-06, "epoch": 2.1504653946693377, "percentage": 43.01, "elapsed_time": "1:53:43", "remaining_time": "2:30:42", "throughput": 8697.64, "total_tokens": 59351016} +{"current_steps": 88030, "total_steps": 204665, "loss": 0.0935, "lr": 1.4063824046102338e-06, "epoch": 2.150587545501185, "percentage": 43.01, "elapsed_time": "1:53:44", "remaining_time": "2:30:41", "throughput": 8697.71, "total_tokens": 59354536} +{"current_steps": 88035, "total_steps": 204665, "loss": 0.0001, "lr": 1.4063044847263074e-06, "epoch": 2.150709696333032, "percentage": 43.01, "elapsed_time": "1:53:44", "remaining_time": "2:30:41", "throughput": 8697.71, "total_tokens": 59357544} +{"current_steps": 88040, "total_steps": 204665, "loss": 0.1847, "lr": 1.4062265618876258e-06, "epoch": 2.1508318471648793, "percentage": 43.02, "elapsed_time": "1:53:44", "remaining_time": "2:30:40", "throughput": 8697.75, "total_tokens": 59360808} +{"current_steps": 88045, "total_steps": 204665, "loss": 0.0882, "lr": 1.4061486360947555e-06, "epoch": 2.1509539979967265, "percentage": 43.02, "elapsed_time": "1:53:45", "remaining_time": "2:30:40", "throughput": 8697.76, "total_tokens": 59363880} +{"current_steps": 88050, "total_steps": 204665, "loss": 0.0865, "lr": 1.4060707073482628e-06, "epoch": 2.1510761488285737, "percentage": 43.02, "elapsed_time": "1:53:45", "remaining_time": "2:30:39", "throughput": 8697.8, "total_tokens": 59367144} +{"current_steps": 88055, "total_steps": 204665, "loss": 0.0001, "lr": 1.4059927756487147e-06, "epoch": 2.151198299660421, "percentage": 43.02, "elapsed_time": "1:53:45", "remaining_time": "2:30:39", "throughput": 8697.87, "total_tokens": 59370664} +{"current_steps": 88060, "total_steps": 204665, "loss": 0.0487, "lr": 1.4059148409966778e-06, "epoch": 2.151320450492268, "percentage": 43.03, "elapsed_time": "1:53:46", "remaining_time": "2:30:38", "throughput": 8697.91, "total_tokens": 59373928} +{"current_steps": 88065, "total_steps": 204665, "loss": 0.0006, "lr": 1.405836903392719e-06, "epoch": 2.1514426013241152, "percentage": 43.03, "elapsed_time": "1:53:46", "remaining_time": "2:30:38", "throughput": 8697.93, "total_tokens": 59377064} +{"current_steps": 88070, "total_steps": 204665, "loss": 0.0003, "lr": 1.4057589628374053e-06, "epoch": 2.151564752155962, "percentage": 43.03, "elapsed_time": "1:53:46", "remaining_time": "2:30:38", "throughput": 8698.02, "total_tokens": 59380776} +{"current_steps": 88075, "total_steps": 204665, "loss": 0.0338, "lr": 1.4056810193313034e-06, "epoch": 2.151686902987809, "percentage": 43.03, "elapsed_time": "1:53:47", "remaining_time": "2:30:37", "throughput": 8698.02, "total_tokens": 59383720} +{"current_steps": 88080, "total_steps": 204665, "loss": 0.0002, "lr": 1.40560307287498e-06, "epoch": 2.1518090538196564, "percentage": 43.04, "elapsed_time": "1:53:47", "remaining_time": "2:30:37", "throughput": 8698.03, "total_tokens": 59386792} +{"current_steps": 88085, "total_steps": 204665, "loss": 0.0005, "lr": 1.405525123469002e-06, "epoch": 2.1519312046515036, "percentage": 43.04, "elapsed_time": "1:53:47", "remaining_time": "2:30:36", "throughput": 8698.04, "total_tokens": 59389800} +{"current_steps": 88090, "total_steps": 204665, "loss": 0.0629, "lr": 1.405447171113936e-06, "epoch": 2.1520533554833507, "percentage": 43.04, "elapsed_time": "1:53:48", "remaining_time": "2:30:36", "throughput": 8698.06, "total_tokens": 59392936} +{"current_steps": 88095, "total_steps": 204665, "loss": 0.0463, "lr": 1.405369215810349e-06, "epoch": 2.152175506315198, "percentage": 43.04, "elapsed_time": "1:53:48", "remaining_time": "2:30:35", "throughput": 8698.08, "total_tokens": 59396008} +{"current_steps": 88100, "total_steps": 204665, "loss": 0.0004, "lr": 1.405291257558808e-06, "epoch": 2.152297657147045, "percentage": 43.05, "elapsed_time": "1:53:48", "remaining_time": "2:30:35", "throughput": 8698.11, "total_tokens": 59399208} +{"current_steps": 88105, "total_steps": 204665, "loss": 0.0001, "lr": 1.4052132963598804e-06, "epoch": 2.1524198079788923, "percentage": 43.05, "elapsed_time": "1:53:49", "remaining_time": "2:30:34", "throughput": 8698.22, "total_tokens": 59403048} +{"current_steps": 88110, "total_steps": 204665, "loss": 0.0848, "lr": 1.4051353322141324e-06, "epoch": 2.1525419588107395, "percentage": 43.05, "elapsed_time": "1:53:49", "remaining_time": "2:30:34", "throughput": 8698.26, "total_tokens": 59406312} +{"current_steps": 88115, "total_steps": 204665, "loss": 0.0654, "lr": 1.4050573651221313e-06, "epoch": 2.1526641096425867, "percentage": 43.05, "elapsed_time": "1:53:50", "remaining_time": "2:30:34", "throughput": 8698.29, "total_tokens": 59409512} +{"current_steps": 88120, "total_steps": 204665, "loss": 0.0005, "lr": 1.404979395084444e-06, "epoch": 2.152786260474434, "percentage": 43.06, "elapsed_time": "1:53:50", "remaining_time": "2:30:33", "throughput": 8698.32, "total_tokens": 59412712} +{"current_steps": 88125, "total_steps": 204665, "loss": 0.0003, "lr": 1.404901422101638e-06, "epoch": 2.152908411306281, "percentage": 43.06, "elapsed_time": "1:53:50", "remaining_time": "2:30:33", "throughput": 8698.36, "total_tokens": 59415976} +{"current_steps": 88130, "total_steps": 204665, "loss": 0.0564, "lr": 1.4048234461742798e-06, "epoch": 2.1530305621381283, "percentage": 43.06, "elapsed_time": "1:53:51", "remaining_time": "2:30:32", "throughput": 8698.36, "total_tokens": 59418984} +{"current_steps": 88135, "total_steps": 204665, "loss": 0.0007, "lr": 1.4047454673029366e-06, "epoch": 2.1531527129699755, "percentage": 43.06, "elapsed_time": "1:53:51", "remaining_time": "2:30:32", "throughput": 8698.42, "total_tokens": 59422440} +{"current_steps": 88140, "total_steps": 204665, "loss": 0.1477, "lr": 1.4046674854881756e-06, "epoch": 2.1532748638018226, "percentage": 43.07, "elapsed_time": "1:53:51", "remaining_time": "2:30:31", "throughput": 8698.46, "total_tokens": 59425704} +{"current_steps": 88145, "total_steps": 204665, "loss": 0.0002, "lr": 1.4045895007305635e-06, "epoch": 2.15339701463367, "percentage": 43.07, "elapsed_time": "1:53:52", "remaining_time": "2:30:31", "throughput": 8698.47, "total_tokens": 59428712} +{"current_steps": 88150, "total_steps": 204665, "loss": 0.0002, "lr": 1.4045115130306679e-06, "epoch": 2.153519165465517, "percentage": 43.07, "elapsed_time": "1:53:52", "remaining_time": "2:30:30", "throughput": 8698.48, "total_tokens": 59431720} +{"current_steps": 88155, "total_steps": 204665, "loss": 0.0198, "lr": 1.4044335223890557e-06, "epoch": 2.1536413162973638, "percentage": 43.07, "elapsed_time": "1:53:52", "remaining_time": "2:30:30", "throughput": 8698.52, "total_tokens": 59435048} +{"current_steps": 88160, "total_steps": 204665, "loss": 0.0001, "lr": 1.4043555288062941e-06, "epoch": 2.153763467129211, "percentage": 43.08, "elapsed_time": "1:53:53", "remaining_time": "2:30:30", "throughput": 8698.62, "total_tokens": 59438760} +{"current_steps": 88165, "total_steps": 204665, "loss": 0.0004, "lr": 1.4042775322829506e-06, "epoch": 2.153885617961058, "percentage": 43.08, "elapsed_time": "1:53:53", "remaining_time": "2:30:29", "throughput": 8698.64, "total_tokens": 59441896} +{"current_steps": 88170, "total_steps": 204665, "loss": 0.0868, "lr": 1.4041995328195919e-06, "epoch": 2.1540077687929053, "percentage": 43.08, "elapsed_time": "1:53:53", "remaining_time": "2:30:29", "throughput": 8698.69, "total_tokens": 59445224} +{"current_steps": 88175, "total_steps": 204665, "loss": 0.0001, "lr": 1.4041215304167855e-06, "epoch": 2.1541299196247525, "percentage": 43.08, "elapsed_time": "1:53:54", "remaining_time": "2:30:28", "throughput": 8698.68, "total_tokens": 59448104} +{"current_steps": 88180, "total_steps": 204665, "loss": 0.0001, "lr": 1.4040435250750988e-06, "epoch": 2.1542520704565997, "percentage": 43.09, "elapsed_time": "1:53:54", "remaining_time": "2:30:28", "throughput": 8698.76, "total_tokens": 59451688} +{"current_steps": 88185, "total_steps": 204665, "loss": 0.0001, "lr": 1.4039655167950987e-06, "epoch": 2.154374221288447, "percentage": 43.09, "elapsed_time": "1:53:54", "remaining_time": "2:30:27", "throughput": 8698.78, "total_tokens": 59454760} +{"current_steps": 88190, "total_steps": 204665, "loss": 0.0007, "lr": 1.403887505577353e-06, "epoch": 2.154496372120294, "percentage": 43.09, "elapsed_time": "1:53:55", "remaining_time": "2:30:27", "throughput": 8698.82, "total_tokens": 59458024} +{"current_steps": 88195, "total_steps": 204665, "loss": 0.0656, "lr": 1.4038094914224285e-06, "epoch": 2.1546185229521413, "percentage": 43.09, "elapsed_time": "1:53:55", "remaining_time": "2:30:26", "throughput": 8698.86, "total_tokens": 59461288} +{"current_steps": 88200, "total_steps": 204665, "loss": 0.0229, "lr": 1.4037314743308928e-06, "epoch": 2.1547406737839885, "percentage": 43.09, "elapsed_time": "1:53:55", "remaining_time": "2:30:26", "throughput": 8698.95, "total_tokens": 59465000} +{"current_steps": 88205, "total_steps": 204665, "loss": 0.0001, "lr": 1.4036534543033133e-06, "epoch": 2.1548628246158357, "percentage": 43.1, "elapsed_time": "1:53:56", "remaining_time": "2:30:26", "throughput": 8698.99, "total_tokens": 59468264} +{"current_steps": 88210, "total_steps": 204665, "loss": 0.0668, "lr": 1.4035754313402573e-06, "epoch": 2.154984975447683, "percentage": 43.1, "elapsed_time": "1:53:56", "remaining_time": "2:30:25", "throughput": 8699.07, "total_tokens": 59471848} +{"current_steps": 88215, "total_steps": 204665, "loss": 0.2547, "lr": 1.403497405442292e-06, "epoch": 2.15510712627953, "percentage": 43.1, "elapsed_time": "1:53:56", "remaining_time": "2:30:25", "throughput": 8699.13, "total_tokens": 59475304} +{"current_steps": 88220, "total_steps": 204665, "loss": 0.0004, "lr": 1.403419376609985e-06, "epoch": 2.1552292771113772, "percentage": 43.1, "elapsed_time": "1:53:57", "remaining_time": "2:30:24", "throughput": 8699.18, "total_tokens": 59478696} +{"current_steps": 88225, "total_steps": 204665, "loss": 0.0002, "lr": 1.4033413448439042e-06, "epoch": 2.1553514279432244, "percentage": 43.11, "elapsed_time": "1:53:57", "remaining_time": "2:30:24", "throughput": 8699.25, "total_tokens": 59482216} +{"current_steps": 88230, "total_steps": 204665, "loss": 0.0004, "lr": 1.4032633101446166e-06, "epoch": 2.1554735787750716, "percentage": 43.11, "elapsed_time": "1:53:57", "remaining_time": "2:30:23", "throughput": 8699.31, "total_tokens": 59485672} +{"current_steps": 88235, "total_steps": 204665, "loss": 0.0825, "lr": 1.4031852725126897e-06, "epoch": 2.155595729606919, "percentage": 43.11, "elapsed_time": "1:53:58", "remaining_time": "2:30:23", "throughput": 8699.33, "total_tokens": 59488808} +{"current_steps": 88240, "total_steps": 204665, "loss": 0.1709, "lr": 1.4031072319486907e-06, "epoch": 2.1557178804387656, "percentage": 43.11, "elapsed_time": "1:53:58", "remaining_time": "2:30:23", "throughput": 8699.37, "total_tokens": 59492072} +{"current_steps": 88245, "total_steps": 204665, "loss": 0.0001, "lr": 1.403029188453188e-06, "epoch": 2.155840031270613, "percentage": 43.12, "elapsed_time": "1:53:59", "remaining_time": "2:30:22", "throughput": 8699.48, "total_tokens": 59495976} +{"current_steps": 88250, "total_steps": 204665, "loss": 0.0004, "lr": 1.4029511420267484e-06, "epoch": 2.15596218210246, "percentage": 43.12, "elapsed_time": "1:53:59", "remaining_time": "2:30:22", "throughput": 8699.52, "total_tokens": 59499240} +{"current_steps": 88255, "total_steps": 204665, "loss": 0.0003, "lr": 1.4028730926699395e-06, "epoch": 2.156084332934307, "percentage": 43.12, "elapsed_time": "1:53:59", "remaining_time": "2:30:21", "throughput": 8699.52, "total_tokens": 59502184} +{"current_steps": 88260, "total_steps": 204665, "loss": 0.0001, "lr": 1.4027950403833294e-06, "epoch": 2.1562064837661543, "percentage": 43.12, "elapsed_time": "1:54:00", "remaining_time": "2:30:21", "throughput": 8699.63, "total_tokens": 59506024} +{"current_steps": 88265, "total_steps": 204665, "loss": 0.0002, "lr": 1.4027169851674851e-06, "epoch": 2.1563286345980015, "percentage": 43.13, "elapsed_time": "1:54:00", "remaining_time": "2:30:20", "throughput": 8699.64, "total_tokens": 59509032} +{"current_steps": 88270, "total_steps": 204665, "loss": 0.0009, "lr": 1.402638927022975e-06, "epoch": 2.1564507854298487, "percentage": 43.13, "elapsed_time": "1:54:00", "remaining_time": "2:30:20", "throughput": 8699.7, "total_tokens": 59512488} +{"current_steps": 88275, "total_steps": 204665, "loss": 0.0002, "lr": 1.402560865950366e-06, "epoch": 2.156572936261696, "percentage": 43.13, "elapsed_time": "1:54:01", "remaining_time": "2:30:19", "throughput": 8699.74, "total_tokens": 59515816} +{"current_steps": 88280, "total_steps": 204665, "loss": 0.0007, "lr": 1.4024828019502262e-06, "epoch": 2.156695087093543, "percentage": 43.13, "elapsed_time": "1:54:01", "remaining_time": "2:30:19", "throughput": 8699.77, "total_tokens": 59519016} +{"current_steps": 88285, "total_steps": 204665, "loss": 0.0403, "lr": 1.4024047350231234e-06, "epoch": 2.1568172379253903, "percentage": 43.14, "elapsed_time": "1:54:01", "remaining_time": "2:30:19", "throughput": 8699.78, "total_tokens": 59522024} +{"current_steps": 88290, "total_steps": 204665, "loss": 0.0002, "lr": 1.4023266651696249e-06, "epoch": 2.1569393887572375, "percentage": 43.14, "elapsed_time": "1:54:02", "remaining_time": "2:30:18", "throughput": 8699.81, "total_tokens": 59525224} +{"current_steps": 88295, "total_steps": 204665, "loss": 0.0436, "lr": 1.4022485923902988e-06, "epoch": 2.1570615395890846, "percentage": 43.14, "elapsed_time": "1:54:02", "remaining_time": "2:30:18", "throughput": 8699.84, "total_tokens": 59528424} +{"current_steps": 88300, "total_steps": 204665, "loss": 0.0001, "lr": 1.4021705166857126e-06, "epoch": 2.157183690420932, "percentage": 43.14, "elapsed_time": "1:54:02", "remaining_time": "2:30:17", "throughput": 8699.92, "total_tokens": 59532072} +{"current_steps": 88305, "total_steps": 204665, "loss": 0.0001, "lr": 1.4020924380564342e-06, "epoch": 2.157305841252779, "percentage": 43.15, "elapsed_time": "1:54:03", "remaining_time": "2:30:17", "throughput": 8699.94, "total_tokens": 59535208} +{"current_steps": 88310, "total_steps": 204665, "loss": 0.0055, "lr": 1.4020143565030318e-06, "epoch": 2.157427992084626, "percentage": 43.15, "elapsed_time": "1:54:03", "remaining_time": "2:30:16", "throughput": 8699.98, "total_tokens": 59538536} +{"current_steps": 88315, "total_steps": 204665, "loss": 0.0424, "lr": 1.4019362720260723e-06, "epoch": 2.1575501429164734, "percentage": 43.15, "elapsed_time": "1:54:03", "remaining_time": "2:30:16", "throughput": 8699.99, "total_tokens": 59541544} +{"current_steps": 88320, "total_steps": 204665, "loss": 0.0404, "lr": 1.4018581846261246e-06, "epoch": 2.1576722937483206, "percentage": 43.15, "elapsed_time": "1:54:04", "remaining_time": "2:30:15", "throughput": 8700.07, "total_tokens": 59545128} +{"current_steps": 88325, "total_steps": 204665, "loss": 0.0001, "lr": 1.4017800943037558e-06, "epoch": 2.157794444580168, "percentage": 43.16, "elapsed_time": "1:54:04", "remaining_time": "2:30:15", "throughput": 8700.14, "total_tokens": 59548712} +{"current_steps": 88330, "total_steps": 204665, "loss": 0.0377, "lr": 1.4017020010595344e-06, "epoch": 2.157916595412015, "percentage": 43.16, "elapsed_time": "1:54:04", "remaining_time": "2:30:15", "throughput": 8700.17, "total_tokens": 59551912} +{"current_steps": 88335, "total_steps": 204665, "loss": 0.0001, "lr": 1.401623904894028e-06, "epoch": 2.1580387462438617, "percentage": 43.16, "elapsed_time": "1:54:05", "remaining_time": "2:30:14", "throughput": 8700.18, "total_tokens": 59554984} +{"current_steps": 88340, "total_steps": 204665, "loss": 0.049, "lr": 1.4015458058078042e-06, "epoch": 2.158160897075709, "percentage": 43.16, "elapsed_time": "1:54:05", "remaining_time": "2:30:14", "throughput": 8700.23, "total_tokens": 59558312} +{"current_steps": 88345, "total_steps": 204665, "loss": 0.0583, "lr": 1.4014677038014315e-06, "epoch": 2.158283047907556, "percentage": 43.17, "elapsed_time": "1:54:05", "remaining_time": "2:30:13", "throughput": 8700.26, "total_tokens": 59561448} +{"current_steps": 88350, "total_steps": 204665, "loss": 0.0027, "lr": 1.4013895988754776e-06, "epoch": 2.1584051987394033, "percentage": 43.17, "elapsed_time": "1:54:06", "remaining_time": "2:30:13", "throughput": 8700.24, "total_tokens": 59564264} +{"current_steps": 88355, "total_steps": 204665, "loss": 0.0367, "lr": 1.4013114910305107e-06, "epoch": 2.1585273495712505, "percentage": 43.17, "elapsed_time": "1:54:06", "remaining_time": "2:30:12", "throughput": 8700.31, "total_tokens": 59567848} +{"current_steps": 88360, "total_steps": 204665, "loss": 0.0436, "lr": 1.4012333802670985e-06, "epoch": 2.1586495004030977, "percentage": 43.17, "elapsed_time": "1:54:06", "remaining_time": "2:30:12", "throughput": 8700.47, "total_tokens": 59572136} +{"current_steps": 88365, "total_steps": 204665, "loss": 0.0001, "lr": 1.4011552665858094e-06, "epoch": 2.158771651234945, "percentage": 43.18, "elapsed_time": "1:54:07", "remaining_time": "2:30:12", "throughput": 8700.56, "total_tokens": 59575784} +{"current_steps": 88370, "total_steps": 204665, "loss": 0.0019, "lr": 1.4010771499872114e-06, "epoch": 2.158893802066792, "percentage": 43.18, "elapsed_time": "1:54:07", "remaining_time": "2:30:11", "throughput": 8700.6, "total_tokens": 59579112} +{"current_steps": 88375, "total_steps": 204665, "loss": 0.056, "lr": 1.4009990304718722e-06, "epoch": 2.1590159528986392, "percentage": 43.18, "elapsed_time": "1:54:08", "remaining_time": "2:30:11", "throughput": 8700.62, "total_tokens": 59582184} +{"current_steps": 88380, "total_steps": 204665, "loss": 0.0347, "lr": 1.4009209080403603e-06, "epoch": 2.1591381037304864, "percentage": 43.18, "elapsed_time": "1:54:08", "remaining_time": "2:30:10", "throughput": 8700.59, "total_tokens": 59584936} +{"current_steps": 88385, "total_steps": 204665, "loss": 0.0294, "lr": 1.400842782693244e-06, "epoch": 2.1592602545623336, "percentage": 43.19, "elapsed_time": "1:54:08", "remaining_time": "2:30:10", "throughput": 8700.63, "total_tokens": 59588200} +{"current_steps": 88390, "total_steps": 204665, "loss": 0.0552, "lr": 1.4007646544310912e-06, "epoch": 2.159382405394181, "percentage": 43.19, "elapsed_time": "1:54:09", "remaining_time": "2:30:09", "throughput": 8700.68, "total_tokens": 59591528} +{"current_steps": 88395, "total_steps": 204665, "loss": 0.0003, "lr": 1.4006865232544696e-06, "epoch": 2.159504556226028, "percentage": 43.19, "elapsed_time": "1:54:09", "remaining_time": "2:30:09", "throughput": 8700.75, "total_tokens": 59595112} +{"current_steps": 88400, "total_steps": 204665, "loss": 0.1146, "lr": 1.4006083891639481e-06, "epoch": 2.159626707057875, "percentage": 43.19, "elapsed_time": "1:54:09", "remaining_time": "2:30:08", "throughput": 8700.82, "total_tokens": 59598632} +{"current_steps": 88405, "total_steps": 204665, "loss": 0.0001, "lr": 1.4005302521600945e-06, "epoch": 2.1597488578897224, "percentage": 43.19, "elapsed_time": "1:54:10", "remaining_time": "2:30:08", "throughput": 8700.87, "total_tokens": 59602024} +{"current_steps": 88410, "total_steps": 204665, "loss": 0.0182, "lr": 1.4004521122434772e-06, "epoch": 2.1598710087215696, "percentage": 43.2, "elapsed_time": "1:54:10", "remaining_time": "2:30:08", "throughput": 8700.92, "total_tokens": 59605352} +{"current_steps": 88415, "total_steps": 204665, "loss": 0.0001, "lr": 1.4003739694146644e-06, "epoch": 2.1599931595534168, "percentage": 43.2, "elapsed_time": "1:54:10", "remaining_time": "2:30:07", "throughput": 8700.92, "total_tokens": 59608296} +{"current_steps": 88420, "total_steps": 204665, "loss": 0.0517, "lr": 1.4002958236742246e-06, "epoch": 2.1601153103852635, "percentage": 43.2, "elapsed_time": "1:54:11", "remaining_time": "2:30:07", "throughput": 8700.93, "total_tokens": 59611368} +{"current_steps": 88425, "total_steps": 204665, "loss": 0.0537, "lr": 1.4002176750227257e-06, "epoch": 2.1602374612171107, "percentage": 43.2, "elapsed_time": "1:54:11", "remaining_time": "2:30:06", "throughput": 8700.95, "total_tokens": 59614440} +{"current_steps": 88430, "total_steps": 204665, "loss": 0.0573, "lr": 1.4001395234607362e-06, "epoch": 2.160359612048958, "percentage": 43.21, "elapsed_time": "1:54:11", "remaining_time": "2:30:06", "throughput": 8701.0, "total_tokens": 59617832} +{"current_steps": 88435, "total_steps": 204665, "loss": 0.0303, "lr": 1.4000613689888248e-06, "epoch": 2.160481762880805, "percentage": 43.21, "elapsed_time": "1:54:12", "remaining_time": "2:30:05", "throughput": 8701.05, "total_tokens": 59621224} +{"current_steps": 88440, "total_steps": 204665, "loss": 0.0767, "lr": 1.399983211607559e-06, "epoch": 2.1606039137126523, "percentage": 43.21, "elapsed_time": "1:54:12", "remaining_time": "2:30:05", "throughput": 8701.07, "total_tokens": 59624360} +{"current_steps": 88445, "total_steps": 204665, "loss": 0.0001, "lr": 1.3999050513175081e-06, "epoch": 2.1607260645444994, "percentage": 43.21, "elapsed_time": "1:54:12", "remaining_time": "2:30:04", "throughput": 8701.1, "total_tokens": 59627496} +{"current_steps": 88450, "total_steps": 204665, "loss": 0.0005, "lr": 1.39982688811924e-06, "epoch": 2.1608482153763466, "percentage": 43.22, "elapsed_time": "1:54:13", "remaining_time": "2:30:04", "throughput": 8701.19, "total_tokens": 59631208} +{"current_steps": 88455, "total_steps": 204665, "loss": 0.0441, "lr": 1.3997487220133232e-06, "epoch": 2.160970366208194, "percentage": 43.22, "elapsed_time": "1:54:13", "remaining_time": "2:30:04", "throughput": 8701.2, "total_tokens": 59634280} +{"current_steps": 88460, "total_steps": 204665, "loss": 0.0314, "lr": 1.3996705530003262e-06, "epoch": 2.161092517040041, "percentage": 43.22, "elapsed_time": "1:54:13", "remaining_time": "2:30:03", "throughput": 8701.26, "total_tokens": 59637672} +{"current_steps": 88465, "total_steps": 204665, "loss": 0.0001, "lr": 1.3995923810808176e-06, "epoch": 2.161214667871888, "percentage": 43.22, "elapsed_time": "1:54:14", "remaining_time": "2:30:03", "throughput": 8701.33, "total_tokens": 59641256} +{"current_steps": 88470, "total_steps": 204665, "loss": 0.0001, "lr": 1.3995142062553654e-06, "epoch": 2.1613368187037354, "percentage": 43.23, "elapsed_time": "1:54:14", "remaining_time": "2:30:02", "throughput": 8701.37, "total_tokens": 59644584} +{"current_steps": 88475, "total_steps": 204665, "loss": 0.0431, "lr": 1.3994360285245386e-06, "epoch": 2.1614589695355826, "percentage": 43.23, "elapsed_time": "1:54:14", "remaining_time": "2:30:02", "throughput": 8701.42, "total_tokens": 59647912} +{"current_steps": 88480, "total_steps": 204665, "loss": 0.0001, "lr": 1.3993578478889054e-06, "epoch": 2.1615811203674298, "percentage": 43.23, "elapsed_time": "1:54:15", "remaining_time": "2:30:01", "throughput": 8701.47, "total_tokens": 59651304} +{"current_steps": 88485, "total_steps": 204665, "loss": 0.0991, "lr": 1.3992796643490348e-06, "epoch": 2.161703271199277, "percentage": 43.23, "elapsed_time": "1:54:15", "remaining_time": "2:30:01", "throughput": 8701.51, "total_tokens": 59654568} +{"current_steps": 88490, "total_steps": 204665, "loss": 0.0002, "lr": 1.399201477905495e-06, "epoch": 2.161825422031124, "percentage": 43.24, "elapsed_time": "1:54:16", "remaining_time": "2:30:01", "throughput": 8701.7, "total_tokens": 59659112} +{"current_steps": 88495, "total_steps": 204665, "loss": 0.0597, "lr": 1.3991232885588546e-06, "epoch": 2.1619475728629713, "percentage": 43.24, "elapsed_time": "1:54:16", "remaining_time": "2:30:00", "throughput": 8701.75, "total_tokens": 59662568} +{"current_steps": 88500, "total_steps": 204665, "loss": 0.0002, "lr": 1.3990450963096824e-06, "epoch": 2.1620697236948185, "percentage": 43.24, "elapsed_time": "1:54:16", "remaining_time": "2:30:00", "throughput": 8701.79, "total_tokens": 59665832} +{"current_steps": 88505, "total_steps": 204665, "loss": 0.0536, "lr": 1.398966901158547e-06, "epoch": 2.1621918745266657, "percentage": 43.24, "elapsed_time": "1:54:17", "remaining_time": "2:29:59", "throughput": 8701.88, "total_tokens": 59669544} +{"current_steps": 88510, "total_steps": 204665, "loss": 0.0708, "lr": 1.3988887031060168e-06, "epoch": 2.162314025358513, "percentage": 43.25, "elapsed_time": "1:54:17", "remaining_time": "2:29:59", "throughput": 8701.93, "total_tokens": 59672872} +{"current_steps": 88515, "total_steps": 204665, "loss": 0.1252, "lr": 1.3988105021526608e-06, "epoch": 2.1624361761903597, "percentage": 43.25, "elapsed_time": "1:54:17", "remaining_time": "2:29:58", "throughput": 8701.98, "total_tokens": 59676200} +{"current_steps": 88520, "total_steps": 204665, "loss": 0.0001, "lr": 1.3987322982990474e-06, "epoch": 2.162558327022207, "percentage": 43.25, "elapsed_time": "1:54:18", "remaining_time": "2:29:58", "throughput": 8701.98, "total_tokens": 59679144} +{"current_steps": 88525, "total_steps": 204665, "loss": 0.0002, "lr": 1.3986540915457457e-06, "epoch": 2.162680477854054, "percentage": 43.25, "elapsed_time": "1:54:18", "remaining_time": "2:29:57", "throughput": 8702.04, "total_tokens": 59682600} +{"current_steps": 88530, "total_steps": 204665, "loss": 0.1044, "lr": 1.398575881893324e-06, "epoch": 2.1628026286859012, "percentage": 43.26, "elapsed_time": "1:54:18", "remaining_time": "2:29:57", "throughput": 8702.09, "total_tokens": 59685928} +{"current_steps": 88535, "total_steps": 204665, "loss": 0.0002, "lr": 1.3984976693423512e-06, "epoch": 2.1629247795177484, "percentage": 43.26, "elapsed_time": "1:54:19", "remaining_time": "2:29:57", "throughput": 8702.14, "total_tokens": 59689320} +{"current_steps": 88540, "total_steps": 204665, "loss": 0.0003, "lr": 1.3984194538933961e-06, "epoch": 2.1630469303495956, "percentage": 43.26, "elapsed_time": "1:54:19", "remaining_time": "2:29:56", "throughput": 8702.21, "total_tokens": 59692904} +{"current_steps": 88545, "total_steps": 204665, "loss": 0.0003, "lr": 1.3983412355470283e-06, "epoch": 2.163169081181443, "percentage": 43.26, "elapsed_time": "1:54:19", "remaining_time": "2:29:56", "throughput": 8702.24, "total_tokens": 59696040} +{"current_steps": 88550, "total_steps": 204665, "loss": 0.1038, "lr": 1.3982630143038154e-06, "epoch": 2.16329123201329, "percentage": 43.27, "elapsed_time": "1:54:20", "remaining_time": "2:29:55", "throughput": 8702.28, "total_tokens": 59699368} +{"current_steps": 88555, "total_steps": 204665, "loss": 0.0558, "lr": 1.3981847901643266e-06, "epoch": 2.163413382845137, "percentage": 43.27, "elapsed_time": "1:54:20", "remaining_time": "2:29:55", "throughput": 8702.27, "total_tokens": 59702248} +{"current_steps": 88560, "total_steps": 204665, "loss": 0.0054, "lr": 1.398106563129131e-06, "epoch": 2.1635355336769844, "percentage": 43.27, "elapsed_time": "1:54:20", "remaining_time": "2:29:54", "throughput": 8702.31, "total_tokens": 59705512} +{"current_steps": 88565, "total_steps": 204665, "loss": 0.0636, "lr": 1.3980283331987973e-06, "epoch": 2.1636576845088316, "percentage": 43.27, "elapsed_time": "1:54:21", "remaining_time": "2:29:54", "throughput": 8702.34, "total_tokens": 59708712} +{"current_steps": 88570, "total_steps": 204665, "loss": 0.0157, "lr": 1.3979501003738948e-06, "epoch": 2.1637798353406787, "percentage": 43.28, "elapsed_time": "1:54:21", "remaining_time": "2:29:53", "throughput": 8702.41, "total_tokens": 59712168} +{"current_steps": 88575, "total_steps": 204665, "loss": 0.0006, "lr": 1.397871864654992e-06, "epoch": 2.163901986172526, "percentage": 43.28, "elapsed_time": "1:54:21", "remaining_time": "2:29:53", "throughput": 8702.45, "total_tokens": 59715496} +{"current_steps": 88580, "total_steps": 204665, "loss": 0.0006, "lr": 1.397793626042658e-06, "epoch": 2.164024137004373, "percentage": 43.28, "elapsed_time": "1:54:22", "remaining_time": "2:29:53", "throughput": 8702.5, "total_tokens": 59718888} +{"current_steps": 88585, "total_steps": 204665, "loss": 0.1219, "lr": 1.3977153845374616e-06, "epoch": 2.1641462878362203, "percentage": 43.28, "elapsed_time": "1:54:22", "remaining_time": "2:29:52", "throughput": 8702.56, "total_tokens": 59722280} +{"current_steps": 88590, "total_steps": 204665, "loss": 0.0005, "lr": 1.397637140139972e-06, "epoch": 2.1642684386680675, "percentage": 43.29, "elapsed_time": "1:54:22", "remaining_time": "2:29:52", "throughput": 8702.6, "total_tokens": 59725608} +{"current_steps": 88595, "total_steps": 204665, "loss": 0.0559, "lr": 1.3975588928507583e-06, "epoch": 2.1643905894999147, "percentage": 43.29, "elapsed_time": "1:54:23", "remaining_time": "2:29:51", "throughput": 8702.65, "total_tokens": 59729000} +{"current_steps": 88600, "total_steps": 204665, "loss": 0.0894, "lr": 1.3974806426703894e-06, "epoch": 2.1645127403317614, "percentage": 43.29, "elapsed_time": "1:54:23", "remaining_time": "2:29:51", "throughput": 8702.72, "total_tokens": 59732520} +{"current_steps": 88605, "total_steps": 204665, "loss": 0.0502, "lr": 1.3974023895994342e-06, "epoch": 2.1646348911636086, "percentage": 43.29, "elapsed_time": "1:54:24", "remaining_time": "2:29:50", "throughput": 8702.81, "total_tokens": 59736232} +{"current_steps": 88610, "total_steps": 204665, "loss": 0.0663, "lr": 1.3973241336384622e-06, "epoch": 2.164757041995456, "percentage": 43.3, "elapsed_time": "1:54:24", "remaining_time": "2:29:50", "throughput": 8702.8, "total_tokens": 59739048} +{"current_steps": 88615, "total_steps": 204665, "loss": 0.0003, "lr": 1.397245874788042e-06, "epoch": 2.164879192827303, "percentage": 43.3, "elapsed_time": "1:54:24", "remaining_time": "2:29:49", "throughput": 8702.89, "total_tokens": 59742760} +{"current_steps": 88620, "total_steps": 204665, "loss": 0.0001, "lr": 1.397167613048743e-06, "epoch": 2.16500134365915, "percentage": 43.3, "elapsed_time": "1:54:25", "remaining_time": "2:29:49", "throughput": 8702.96, "total_tokens": 59746216} +{"current_steps": 88625, "total_steps": 204665, "loss": 0.0612, "lr": 1.397089348421134e-06, "epoch": 2.1651234944909974, "percentage": 43.3, "elapsed_time": "1:54:25", "remaining_time": "2:29:49", "throughput": 8703.0, "total_tokens": 59749544} +{"current_steps": 88630, "total_steps": 204665, "loss": 0.0002, "lr": 1.397011080905785e-06, "epoch": 2.1652456453228446, "percentage": 43.3, "elapsed_time": "1:54:25", "remaining_time": "2:29:48", "throughput": 8703.08, "total_tokens": 59753128} +{"current_steps": 88635, "total_steps": 204665, "loss": 0.0003, "lr": 1.3969328105032643e-06, "epoch": 2.1653677961546918, "percentage": 43.31, "elapsed_time": "1:54:26", "remaining_time": "2:29:48", "throughput": 8703.12, "total_tokens": 59756392} +{"current_steps": 88640, "total_steps": 204665, "loss": 0.0002, "lr": 1.3968545372141416e-06, "epoch": 2.165489946986539, "percentage": 43.31, "elapsed_time": "1:54:26", "remaining_time": "2:29:47", "throughput": 8703.14, "total_tokens": 59759528} +{"current_steps": 88645, "total_steps": 204665, "loss": 0.0256, "lr": 1.3967762610389858e-06, "epoch": 2.165612097818386, "percentage": 43.31, "elapsed_time": "1:54:26", "remaining_time": "2:29:47", "throughput": 8703.23, "total_tokens": 59763240} +{"current_steps": 88650, "total_steps": 204665, "loss": 0.0002, "lr": 1.3966979819783666e-06, "epoch": 2.1657342486502333, "percentage": 43.31, "elapsed_time": "1:54:27", "remaining_time": "2:29:46", "throughput": 8703.27, "total_tokens": 59766504} +{"current_steps": 88655, "total_steps": 204665, "loss": 0.0406, "lr": 1.3966197000328528e-06, "epoch": 2.1658563994820805, "percentage": 43.32, "elapsed_time": "1:54:27", "remaining_time": "2:29:46", "throughput": 8703.31, "total_tokens": 59769768} +{"current_steps": 88660, "total_steps": 204665, "loss": 0.0002, "lr": 1.3965414152030138e-06, "epoch": 2.1659785503139277, "percentage": 43.32, "elapsed_time": "1:54:27", "remaining_time": "2:29:46", "throughput": 8703.34, "total_tokens": 59772968} +{"current_steps": 88665, "total_steps": 204665, "loss": 0.0001, "lr": 1.3964631274894189e-06, "epoch": 2.166100701145775, "percentage": 43.32, "elapsed_time": "1:54:28", "remaining_time": "2:29:45", "throughput": 8703.37, "total_tokens": 59776232} +{"current_steps": 88670, "total_steps": 204665, "loss": 0.0396, "lr": 1.3963848368926376e-06, "epoch": 2.166222851977622, "percentage": 43.32, "elapsed_time": "1:54:28", "remaining_time": "2:29:45", "throughput": 8703.45, "total_tokens": 59779880} +{"current_steps": 88675, "total_steps": 204665, "loss": 0.0005, "lr": 1.3963065434132392e-06, "epoch": 2.1663450028094693, "percentage": 43.33, "elapsed_time": "1:54:28", "remaining_time": "2:29:44", "throughput": 8703.54, "total_tokens": 59783592} +{"current_steps": 88680, "total_steps": 204665, "loss": 0.0002, "lr": 1.3962282470517933e-06, "epoch": 2.1664671536413165, "percentage": 43.33, "elapsed_time": "1:54:29", "remaining_time": "2:29:44", "throughput": 8703.58, "total_tokens": 59786792} +{"current_steps": 88685, "total_steps": 204665, "loss": 0.0003, "lr": 1.3961499478088685e-06, "epoch": 2.1665893044731637, "percentage": 43.33, "elapsed_time": "1:54:29", "remaining_time": "2:29:43", "throughput": 8703.6, "total_tokens": 59789928} +{"current_steps": 88690, "total_steps": 204665, "loss": 0.0839, "lr": 1.3960716456850347e-06, "epoch": 2.166711455305011, "percentage": 43.33, "elapsed_time": "1:54:29", "remaining_time": "2:29:43", "throughput": 8703.67, "total_tokens": 59793512} +{"current_steps": 88695, "total_steps": 204665, "loss": 0.0556, "lr": 1.3959933406808616e-06, "epoch": 2.1668336061368576, "percentage": 43.34, "elapsed_time": "1:54:30", "remaining_time": "2:29:42", "throughput": 8703.69, "total_tokens": 59796584} +{"current_steps": 88700, "total_steps": 204665, "loss": 0.1148, "lr": 1.3959150327969188e-06, "epoch": 2.166955756968705, "percentage": 43.34, "elapsed_time": "1:54:30", "remaining_time": "2:29:42", "throughput": 8703.74, "total_tokens": 59799976} +{"current_steps": 88705, "total_steps": 204665, "loss": 0.0005, "lr": 1.395836722033775e-06, "epoch": 2.167077907800552, "percentage": 43.34, "elapsed_time": "1:54:30", "remaining_time": "2:29:42", "throughput": 8703.8, "total_tokens": 59803368} +{"current_steps": 88710, "total_steps": 204665, "loss": 0.0003, "lr": 1.395758408392e-06, "epoch": 2.167200058632399, "percentage": 43.34, "elapsed_time": "1:54:31", "remaining_time": "2:29:41", "throughput": 8703.85, "total_tokens": 59806760} +{"current_steps": 88715, "total_steps": 204665, "loss": 0.0002, "lr": 1.3956800918721637e-06, "epoch": 2.1673222094642464, "percentage": 43.35, "elapsed_time": "1:54:31", "remaining_time": "2:29:41", "throughput": 8703.87, "total_tokens": 59809896} +{"current_steps": 88720, "total_steps": 204665, "loss": 0.0, "lr": 1.3956017724748347e-06, "epoch": 2.1674443602960936, "percentage": 43.35, "elapsed_time": "1:54:31", "remaining_time": "2:29:40", "throughput": 8703.93, "total_tokens": 59813352} +{"current_steps": 88725, "total_steps": 204665, "loss": 0.0001, "lr": 1.395523450200584e-06, "epoch": 2.1675665111279407, "percentage": 43.35, "elapsed_time": "1:54:32", "remaining_time": "2:29:40", "throughput": 8703.96, "total_tokens": 59816488} +{"current_steps": 88730, "total_steps": 204665, "loss": 0.0002, "lr": 1.39544512504998e-06, "epoch": 2.167688661959788, "percentage": 43.35, "elapsed_time": "1:54:32", "remaining_time": "2:29:39", "throughput": 8703.98, "total_tokens": 59819624} +{"current_steps": 88735, "total_steps": 204665, "loss": 0.0002, "lr": 1.3953667970235928e-06, "epoch": 2.167810812791635, "percentage": 43.36, "elapsed_time": "1:54:33", "remaining_time": "2:29:39", "throughput": 8704.05, "total_tokens": 59823144} +{"current_steps": 88740, "total_steps": 204665, "loss": 0.1275, "lr": 1.3952884661219917e-06, "epoch": 2.1679329636234823, "percentage": 43.36, "elapsed_time": "1:54:33", "remaining_time": "2:29:38", "throughput": 8704.11, "total_tokens": 59826664} +{"current_steps": 88745, "total_steps": 204665, "loss": 0.0001, "lr": 1.395210132345747e-06, "epoch": 2.1680551144553295, "percentage": 43.36, "elapsed_time": "1:54:33", "remaining_time": "2:29:38", "throughput": 8704.18, "total_tokens": 59830184} +{"current_steps": 88750, "total_steps": 204665, "loss": 0.0001, "lr": 1.3951317956954274e-06, "epoch": 2.1681772652871767, "percentage": 43.36, "elapsed_time": "1:54:34", "remaining_time": "2:29:38", "throughput": 8704.2, "total_tokens": 59833256} +{"current_steps": 88755, "total_steps": 204665, "loss": 0.0739, "lr": 1.3950534561716035e-06, "epoch": 2.168299416119024, "percentage": 43.37, "elapsed_time": "1:54:34", "remaining_time": "2:29:37", "throughput": 8704.25, "total_tokens": 59836648} +{"current_steps": 88760, "total_steps": 204665, "loss": 0.0001, "lr": 1.3949751137748442e-06, "epoch": 2.168421566950871, "percentage": 43.37, "elapsed_time": "1:54:34", "remaining_time": "2:29:37", "throughput": 8704.37, "total_tokens": 59840616} +{"current_steps": 88765, "total_steps": 204665, "loss": 0.0628, "lr": 1.39489676850572e-06, "epoch": 2.1685437177827183, "percentage": 43.37, "elapsed_time": "1:54:35", "remaining_time": "2:29:36", "throughput": 8704.38, "total_tokens": 59843688} +{"current_steps": 88770, "total_steps": 204665, "loss": 0.0001, "lr": 1.3948184203648002e-06, "epoch": 2.1686658686145655, "percentage": 43.37, "elapsed_time": "1:54:35", "remaining_time": "2:29:36", "throughput": 8704.44, "total_tokens": 59847144} +{"current_steps": 88775, "total_steps": 204665, "loss": 0.0427, "lr": 1.3947400693526545e-06, "epoch": 2.1687880194464126, "percentage": 43.38, "elapsed_time": "1:54:35", "remaining_time": "2:29:35", "throughput": 8704.46, "total_tokens": 59850216} +{"current_steps": 88780, "total_steps": 204665, "loss": 0.0568, "lr": 1.3946617154698529e-06, "epoch": 2.1689101702782594, "percentage": 43.38, "elapsed_time": "1:54:36", "remaining_time": "2:29:35", "throughput": 8704.5, "total_tokens": 59853480} +{"current_steps": 88785, "total_steps": 204665, "loss": 0.0002, "lr": 1.3945833587169653e-06, "epoch": 2.1690323211101066, "percentage": 43.38, "elapsed_time": "1:54:36", "remaining_time": "2:29:35", "throughput": 8704.61, "total_tokens": 59857320} +{"current_steps": 88790, "total_steps": 204665, "loss": 0.0002, "lr": 1.3945049990945613e-06, "epoch": 2.1691544719419538, "percentage": 43.38, "elapsed_time": "1:54:36", "remaining_time": "2:29:34", "throughput": 8704.63, "total_tokens": 59860392} +{"current_steps": 88795, "total_steps": 204665, "loss": 0.0002, "lr": 1.3944266366032107e-06, "epoch": 2.169276622773801, "percentage": 43.39, "elapsed_time": "1:54:37", "remaining_time": "2:29:34", "throughput": 8704.7, "total_tokens": 59863976} +{"current_steps": 88800, "total_steps": 204665, "loss": 0.0, "lr": 1.3943482712434837e-06, "epoch": 2.169398773605648, "percentage": 43.39, "elapsed_time": "1:54:37", "remaining_time": "2:29:33", "throughput": 8704.7, "total_tokens": 59866920} +{"current_steps": 88805, "total_steps": 204665, "loss": 0.0003, "lr": 1.39426990301595e-06, "epoch": 2.1695209244374953, "percentage": 43.39, "elapsed_time": "1:54:37", "remaining_time": "2:29:33", "throughput": 8704.78, "total_tokens": 59870504} +{"current_steps": 88810, "total_steps": 204665, "loss": 0.0001, "lr": 1.3941915319211797e-06, "epoch": 2.1696430752693425, "percentage": 43.39, "elapsed_time": "1:54:38", "remaining_time": "2:29:32", "throughput": 8704.85, "total_tokens": 59874088} +{"current_steps": 88815, "total_steps": 204665, "loss": 0.0558, "lr": 1.394113157959742e-06, "epoch": 2.1697652261011897, "percentage": 43.4, "elapsed_time": "1:54:38", "remaining_time": "2:29:32", "throughput": 8704.91, "total_tokens": 59877544} +{"current_steps": 88820, "total_steps": 204665, "loss": 0.0003, "lr": 1.3940347811322078e-06, "epoch": 2.169887376933037, "percentage": 43.4, "elapsed_time": "1:54:38", "remaining_time": "2:29:31", "throughput": 8705.02, "total_tokens": 59881448} +{"current_steps": 88825, "total_steps": 204665, "loss": 0.0664, "lr": 1.3939564014391468e-06, "epoch": 2.170009527764884, "percentage": 43.4, "elapsed_time": "1:54:39", "remaining_time": "2:29:31", "throughput": 8705.14, "total_tokens": 59885416} +{"current_steps": 88830, "total_steps": 204665, "loss": 0.0, "lr": 1.3938780188811286e-06, "epoch": 2.1701316785967313, "percentage": 43.4, "elapsed_time": "1:54:39", "remaining_time": "2:29:31", "throughput": 8705.21, "total_tokens": 59889000} +{"current_steps": 88835, "total_steps": 204665, "loss": 0.0475, "lr": 1.3937996334587235e-06, "epoch": 2.1702538294285785, "percentage": 43.41, "elapsed_time": "1:54:40", "remaining_time": "2:29:30", "throughput": 8705.26, "total_tokens": 59892328} +{"current_steps": 88840, "total_steps": 204665, "loss": 0.0813, "lr": 1.3937212451725018e-06, "epoch": 2.1703759802604257, "percentage": 43.41, "elapsed_time": "1:54:40", "remaining_time": "2:29:30", "throughput": 8705.31, "total_tokens": 59895720} +{"current_steps": 88845, "total_steps": 204665, "loss": 0.0001, "lr": 1.3936428540230328e-06, "epoch": 2.170498131092273, "percentage": 43.41, "elapsed_time": "1:54:40", "remaining_time": "2:29:29", "throughput": 8705.41, "total_tokens": 59899496} +{"current_steps": 88850, "total_steps": 204665, "loss": 0.0, "lr": 1.3935644600108875e-06, "epoch": 2.17062028192412, "percentage": 43.41, "elapsed_time": "1:54:41", "remaining_time": "2:29:29", "throughput": 8705.44, "total_tokens": 59902696} +{"current_steps": 88855, "total_steps": 204665, "loss": 0.1509, "lr": 1.3934860631366358e-06, "epoch": 2.1707424327559672, "percentage": 43.41, "elapsed_time": "1:54:41", "remaining_time": "2:29:28", "throughput": 8705.48, "total_tokens": 59906024} +{"current_steps": 88860, "total_steps": 204665, "loss": 0.0001, "lr": 1.3934076634008474e-06, "epoch": 2.1708645835878144, "percentage": 43.42, "elapsed_time": "1:54:41", "remaining_time": "2:29:28", "throughput": 8705.57, "total_tokens": 59909672} +{"current_steps": 88865, "total_steps": 204665, "loss": 0.0836, "lr": 1.3933292608040927e-06, "epoch": 2.170986734419661, "percentage": 43.42, "elapsed_time": "1:54:42", "remaining_time": "2:29:28", "throughput": 8705.6, "total_tokens": 59912872} +{"current_steps": 88870, "total_steps": 204665, "loss": 0.1332, "lr": 1.3932508553469417e-06, "epoch": 2.1711088852515084, "percentage": 43.42, "elapsed_time": "1:54:42", "remaining_time": "2:29:27", "throughput": 8705.64, "total_tokens": 59916200} +{"current_steps": 88875, "total_steps": 204665, "loss": 0.0531, "lr": 1.3931724470299646e-06, "epoch": 2.1712310360833555, "percentage": 43.42, "elapsed_time": "1:54:42", "remaining_time": "2:29:27", "throughput": 8705.72, "total_tokens": 59919784} +{"current_steps": 88880, "total_steps": 204665, "loss": 0.0003, "lr": 1.393094035853732e-06, "epoch": 2.1713531869152027, "percentage": 43.43, "elapsed_time": "1:54:43", "remaining_time": "2:29:26", "throughput": 8705.79, "total_tokens": 59923368} +{"current_steps": 88885, "total_steps": 204665, "loss": 0.0502, "lr": 1.3930156218188137e-06, "epoch": 2.17147533774705, "percentage": 43.43, "elapsed_time": "1:54:43", "remaining_time": "2:29:26", "throughput": 8705.83, "total_tokens": 59926632} +{"current_steps": 88890, "total_steps": 204665, "loss": 0.0377, "lr": 1.3929372049257802e-06, "epoch": 2.171597488578897, "percentage": 43.43, "elapsed_time": "1:54:43", "remaining_time": "2:29:25", "throughput": 8705.86, "total_tokens": 59929832} +{"current_steps": 88895, "total_steps": 204665, "loss": 0.0008, "lr": 1.3928587851752015e-06, "epoch": 2.1717196394107443, "percentage": 43.43, "elapsed_time": "1:54:44", "remaining_time": "2:29:25", "throughput": 8705.94, "total_tokens": 59933480} +{"current_steps": 88900, "total_steps": 204665, "loss": 0.048, "lr": 1.392780362567648e-06, "epoch": 2.1718417902425915, "percentage": 43.44, "elapsed_time": "1:54:44", "remaining_time": "2:29:25", "throughput": 8706.04, "total_tokens": 59937256} +{"current_steps": 88905, "total_steps": 204665, "loss": 0.074, "lr": 1.3927019371036903e-06, "epoch": 2.1719639410744387, "percentage": 43.44, "elapsed_time": "1:54:44", "remaining_time": "2:29:24", "throughput": 8706.06, "total_tokens": 59940328} +{"current_steps": 88910, "total_steps": 204665, "loss": 0.0447, "lr": 1.3926235087838982e-06, "epoch": 2.172086091906286, "percentage": 43.44, "elapsed_time": "1:54:45", "remaining_time": "2:29:24", "throughput": 8706.09, "total_tokens": 59943528} +{"current_steps": 88915, "total_steps": 204665, "loss": 0.0433, "lr": 1.3925450776088426e-06, "epoch": 2.172208242738133, "percentage": 43.44, "elapsed_time": "1:54:45", "remaining_time": "2:29:23", "throughput": 8706.13, "total_tokens": 59946856} +{"current_steps": 88920, "total_steps": 204665, "loss": 0.0003, "lr": 1.3924666435790936e-06, "epoch": 2.1723303935699803, "percentage": 43.45, "elapsed_time": "1:54:45", "remaining_time": "2:29:23", "throughput": 8706.15, "total_tokens": 59949928} +{"current_steps": 88925, "total_steps": 204665, "loss": 0.0431, "lr": 1.3923882066952216e-06, "epoch": 2.1724525444018274, "percentage": 43.45, "elapsed_time": "1:54:46", "remaining_time": "2:29:22", "throughput": 8706.25, "total_tokens": 59953704} +{"current_steps": 88930, "total_steps": 204665, "loss": 0.0004, "lr": 1.3923097669577967e-06, "epoch": 2.1725746952336746, "percentage": 43.45, "elapsed_time": "1:54:46", "remaining_time": "2:29:22", "throughput": 8706.29, "total_tokens": 59957032} +{"current_steps": 88935, "total_steps": 204665, "loss": 0.0538, "lr": 1.3922313243673899e-06, "epoch": 2.172696846065522, "percentage": 43.45, "elapsed_time": "1:54:46", "remaining_time": "2:29:21", "throughput": 8706.31, "total_tokens": 59960104} +{"current_steps": 88940, "total_steps": 204665, "loss": 0.0267, "lr": 1.3921528789245713e-06, "epoch": 2.172818996897369, "percentage": 43.46, "elapsed_time": "1:54:47", "remaining_time": "2:29:21", "throughput": 8706.34, "total_tokens": 59963368} +{"current_steps": 88945, "total_steps": 204665, "loss": 0.0488, "lr": 1.3920744306299117e-06, "epoch": 2.172941147729216, "percentage": 43.46, "elapsed_time": "1:54:47", "remaining_time": "2:29:21", "throughput": 8706.33, "total_tokens": 59966248} +{"current_steps": 88950, "total_steps": 204665, "loss": 0.0003, "lr": 1.391995979483981e-06, "epoch": 2.1730632985610634, "percentage": 43.46, "elapsed_time": "1:54:48", "remaining_time": "2:29:20", "throughput": 8706.47, "total_tokens": 59970344} +{"current_steps": 88955, "total_steps": 204665, "loss": 0.0002, "lr": 1.3919175254873505e-06, "epoch": 2.1731854493929106, "percentage": 43.46, "elapsed_time": "1:54:48", "remaining_time": "2:29:20", "throughput": 8706.47, "total_tokens": 59973352} +{"current_steps": 88960, "total_steps": 204665, "loss": 0.0513, "lr": 1.3918390686405903e-06, "epoch": 2.1733076002247573, "percentage": 43.47, "elapsed_time": "1:54:48", "remaining_time": "2:29:19", "throughput": 8706.49, "total_tokens": 59976488} +{"current_steps": 88965, "total_steps": 204665, "loss": 0.0526, "lr": 1.391760608944271e-06, "epoch": 2.1734297510566045, "percentage": 43.47, "elapsed_time": "1:54:49", "remaining_time": "2:29:19", "throughput": 8706.57, "total_tokens": 59980136} +{"current_steps": 88970, "total_steps": 204665, "loss": 0.0488, "lr": 1.3916821463989629e-06, "epoch": 2.1735519018884517, "percentage": 43.47, "elapsed_time": "1:54:49", "remaining_time": "2:29:18", "throughput": 8706.69, "total_tokens": 59984040} +{"current_steps": 88975, "total_steps": 204665, "loss": 0.1476, "lr": 1.3916036810052373e-06, "epoch": 2.173674052720299, "percentage": 43.47, "elapsed_time": "1:54:49", "remaining_time": "2:29:18", "throughput": 8706.7, "total_tokens": 59987112} +{"current_steps": 88980, "total_steps": 204665, "loss": 0.0713, "lr": 1.391525212763664e-06, "epoch": 2.173796203552146, "percentage": 43.48, "elapsed_time": "1:54:50", "remaining_time": "2:29:17", "throughput": 8706.73, "total_tokens": 59990376} +{"current_steps": 88985, "total_steps": 204665, "loss": 0.0001, "lr": 1.3914467416748144e-06, "epoch": 2.1739183543839933, "percentage": 43.48, "elapsed_time": "1:54:50", "remaining_time": "2:29:17", "throughput": 8706.8, "total_tokens": 59993896} +{"current_steps": 88990, "total_steps": 204665, "loss": 0.0669, "lr": 1.3913682677392587e-06, "epoch": 2.1740405052158405, "percentage": 43.48, "elapsed_time": "1:54:50", "remaining_time": "2:29:17", "throughput": 8706.8, "total_tokens": 59996840} +{"current_steps": 88995, "total_steps": 204665, "loss": 0.0655, "lr": 1.3912897909575675e-06, "epoch": 2.1741626560476877, "percentage": 43.48, "elapsed_time": "1:54:51", "remaining_time": "2:29:16", "throughput": 8706.85, "total_tokens": 60000232} +{"current_steps": 89000, "total_steps": 204665, "loss": 0.0543, "lr": 1.3912113113303117e-06, "epoch": 2.174284806879535, "percentage": 43.49, "elapsed_time": "1:54:51", "remaining_time": "2:29:16", "throughput": 8706.94, "total_tokens": 60003944} +{"current_steps": 89005, "total_steps": 204665, "loss": 0.0481, "lr": 1.3911328288580621e-06, "epoch": 2.174406957711382, "percentage": 43.49, "elapsed_time": "1:54:51", "remaining_time": "2:29:15", "throughput": 8706.98, "total_tokens": 60007272} +{"current_steps": 89010, "total_steps": 204665, "loss": 0.0006, "lr": 1.3910543435413898e-06, "epoch": 2.1745291085432292, "percentage": 43.49, "elapsed_time": "1:54:52", "remaining_time": "2:29:15", "throughput": 8707.02, "total_tokens": 60010536} +{"current_steps": 89015, "total_steps": 204665, "loss": 0.0003, "lr": 1.3909758553808646e-06, "epoch": 2.1746512593750764, "percentage": 43.49, "elapsed_time": "1:54:52", "remaining_time": "2:29:14", "throughput": 8707.08, "total_tokens": 60013992} +{"current_steps": 89020, "total_steps": 204665, "loss": 0.0004, "lr": 1.390897364377058e-06, "epoch": 2.1747734102069236, "percentage": 43.5, "elapsed_time": "1:54:52", "remaining_time": "2:29:14", "throughput": 8707.13, "total_tokens": 60017384} +{"current_steps": 89025, "total_steps": 204665, "loss": 0.002, "lr": 1.3908188705305405e-06, "epoch": 2.174895561038771, "percentage": 43.5, "elapsed_time": "1:54:53", "remaining_time": "2:29:14", "throughput": 8707.16, "total_tokens": 60020584} +{"current_steps": 89030, "total_steps": 204665, "loss": 0.0003, "lr": 1.390740373841883e-06, "epoch": 2.175017711870618, "percentage": 43.5, "elapsed_time": "1:54:53", "remaining_time": "2:29:13", "throughput": 8707.28, "total_tokens": 60024616} +{"current_steps": 89035, "total_steps": 204665, "loss": 0.0017, "lr": 1.3906618743116567e-06, "epoch": 2.175139862702465, "percentage": 43.5, "elapsed_time": "1:54:53", "remaining_time": "2:29:13", "throughput": 8707.36, "total_tokens": 60028264} +{"current_steps": 89040, "total_steps": 204665, "loss": 0.0001, "lr": 1.390583371940432e-06, "epoch": 2.1752620135343124, "percentage": 43.51, "elapsed_time": "1:54:54", "remaining_time": "2:29:12", "throughput": 8707.39, "total_tokens": 60031464} +{"current_steps": 89045, "total_steps": 204665, "loss": 0.0005, "lr": 1.3905048667287799e-06, "epoch": 2.175384164366159, "percentage": 43.51, "elapsed_time": "1:54:54", "remaining_time": "2:29:12", "throughput": 8707.44, "total_tokens": 60034856} +{"current_steps": 89050, "total_steps": 204665, "loss": 0.0001, "lr": 1.3904263586772716e-06, "epoch": 2.1755063151980063, "percentage": 43.51, "elapsed_time": "1:54:55", "remaining_time": "2:29:11", "throughput": 8707.49, "total_tokens": 60038248} +{"current_steps": 89055, "total_steps": 204665, "loss": 0.0004, "lr": 1.3903478477864776e-06, "epoch": 2.1756284660298535, "percentage": 43.51, "elapsed_time": "1:54:55", "remaining_time": "2:29:11", "throughput": 8707.5, "total_tokens": 60041384} +{"current_steps": 89060, "total_steps": 204665, "loss": 0.0701, "lr": 1.390269334056969e-06, "epoch": 2.1757506168617007, "percentage": 43.52, "elapsed_time": "1:54:55", "remaining_time": "2:29:11", "throughput": 8707.56, "total_tokens": 60044840} +{"current_steps": 89065, "total_steps": 204665, "loss": 0.0752, "lr": 1.390190817489317e-06, "epoch": 2.175872767693548, "percentage": 43.52, "elapsed_time": "1:54:56", "remaining_time": "2:29:10", "throughput": 8707.61, "total_tokens": 60048232} +{"current_steps": 89070, "total_steps": 204665, "loss": 0.0856, "lr": 1.3901122980840928e-06, "epoch": 2.175994918525395, "percentage": 43.52, "elapsed_time": "1:54:56", "remaining_time": "2:29:10", "throughput": 8707.7, "total_tokens": 60051944} +{"current_steps": 89075, "total_steps": 204665, "loss": 0.0001, "lr": 1.3900337758418665e-06, "epoch": 2.1761170693572423, "percentage": 43.52, "elapsed_time": "1:54:56", "remaining_time": "2:29:09", "throughput": 8707.74, "total_tokens": 60055208} +{"current_steps": 89080, "total_steps": 204665, "loss": 0.0582, "lr": 1.3899552507632098e-06, "epoch": 2.1762392201890894, "percentage": 43.52, "elapsed_time": "1:54:57", "remaining_time": "2:29:09", "throughput": 8707.83, "total_tokens": 60058920} +{"current_steps": 89085, "total_steps": 204665, "loss": 0.0328, "lr": 1.3898767228486936e-06, "epoch": 2.1763613710209366, "percentage": 43.53, "elapsed_time": "1:54:57", "remaining_time": "2:29:08", "throughput": 8707.82, "total_tokens": 60061864} +{"current_steps": 89090, "total_steps": 204665, "loss": 0.0001, "lr": 1.389798192098889e-06, "epoch": 2.176483521852784, "percentage": 43.53, "elapsed_time": "1:54:57", "remaining_time": "2:29:08", "throughput": 8707.86, "total_tokens": 60065192} +{"current_steps": 89095, "total_steps": 204665, "loss": 0.0613, "lr": 1.389719658514367e-06, "epoch": 2.176605672684631, "percentage": 43.53, "elapsed_time": "1:54:58", "remaining_time": "2:29:07", "throughput": 8707.9, "total_tokens": 60068456} +{"current_steps": 89100, "total_steps": 204665, "loss": 0.07, "lr": 1.3896411220956991e-06, "epoch": 2.176727823516478, "percentage": 43.53, "elapsed_time": "1:54:58", "remaining_time": "2:29:07", "throughput": 8708.0, "total_tokens": 60072296} +{"current_steps": 89105, "total_steps": 204665, "loss": 0.0383, "lr": 1.3895625828434561e-06, "epoch": 2.1768499743483254, "percentage": 43.54, "elapsed_time": "1:54:58", "remaining_time": "2:29:07", "throughput": 8708.03, "total_tokens": 60075560} +{"current_steps": 89110, "total_steps": 204665, "loss": 0.0001, "lr": 1.3894840407582092e-06, "epoch": 2.1769721251801726, "percentage": 43.54, "elapsed_time": "1:54:59", "remaining_time": "2:29:06", "throughput": 8708.06, "total_tokens": 60078760} +{"current_steps": 89115, "total_steps": 204665, "loss": 0.0005, "lr": 1.3894054958405295e-06, "epoch": 2.1770942760120198, "percentage": 43.54, "elapsed_time": "1:54:59", "remaining_time": "2:29:06", "throughput": 8707.79, "total_tokens": 60082152} +{"current_steps": 89120, "total_steps": 204665, "loss": 0.0011, "lr": 1.3893269480909886e-06, "epoch": 2.177216426843867, "percentage": 43.54, "elapsed_time": "1:55:00", "remaining_time": "2:29:06", "throughput": 8707.87, "total_tokens": 60085800} +{"current_steps": 89125, "total_steps": 204665, "loss": 0.0004, "lr": 1.389248397510157e-06, "epoch": 2.177338577675714, "percentage": 43.55, "elapsed_time": "1:55:00", "remaining_time": "2:29:05", "throughput": 8707.92, "total_tokens": 60089128} +{"current_steps": 89130, "total_steps": 204665, "loss": 0.0704, "lr": 1.3891698440986063e-06, "epoch": 2.1774607285075613, "percentage": 43.55, "elapsed_time": "1:55:00", "remaining_time": "2:29:05", "throughput": 8707.94, "total_tokens": 60092264} +{"current_steps": 89135, "total_steps": 204665, "loss": 0.0661, "lr": 1.389091287856908e-06, "epoch": 2.1775828793394085, "percentage": 43.55, "elapsed_time": "1:55:01", "remaining_time": "2:29:04", "throughput": 8707.93, "total_tokens": 60095208} +{"current_steps": 89140, "total_steps": 204665, "loss": 0.0504, "lr": 1.3890127287856334e-06, "epoch": 2.1777050301712553, "percentage": 43.55, "elapsed_time": "1:55:01", "remaining_time": "2:29:04", "throughput": 8708.03, "total_tokens": 60098920} +{"current_steps": 89145, "total_steps": 204665, "loss": 0.0005, "lr": 1.3889341668853536e-06, "epoch": 2.1778271810031025, "percentage": 43.56, "elapsed_time": "1:55:01", "remaining_time": "2:29:03", "throughput": 8708.06, "total_tokens": 60102184} +{"current_steps": 89150, "total_steps": 204665, "loss": 0.0372, "lr": 1.3888556021566397e-06, "epoch": 2.1779493318349497, "percentage": 43.56, "elapsed_time": "1:55:02", "remaining_time": "2:29:03", "throughput": 8708.16, "total_tokens": 60105960} +{"current_steps": 89155, "total_steps": 204665, "loss": 0.1155, "lr": 1.3887770346000632e-06, "epoch": 2.178071482666797, "percentage": 43.56, "elapsed_time": "1:55:02", "remaining_time": "2:29:03", "throughput": 8708.21, "total_tokens": 60109352} +{"current_steps": 89160, "total_steps": 204665, "loss": 0.011, "lr": 1.3886984642161957e-06, "epoch": 2.178193633498644, "percentage": 43.56, "elapsed_time": "1:55:02", "remaining_time": "2:29:02", "throughput": 8708.22, "total_tokens": 60112360} +{"current_steps": 89165, "total_steps": 204665, "loss": 0.0003, "lr": 1.3886198910056086e-06, "epoch": 2.1783157843304912, "percentage": 43.57, "elapsed_time": "1:55:03", "remaining_time": "2:29:02", "throughput": 8708.31, "total_tokens": 60116136} +{"current_steps": 89170, "total_steps": 204665, "loss": 0.0004, "lr": 1.388541314968873e-06, "epoch": 2.1784379351623384, "percentage": 43.57, "elapsed_time": "1:55:03", "remaining_time": "2:29:01", "throughput": 8708.34, "total_tokens": 60119336} +{"current_steps": 89175, "total_steps": 204665, "loss": 0.0002, "lr": 1.3884627361065604e-06, "epoch": 2.1785600859941856, "percentage": 43.57, "elapsed_time": "1:55:03", "remaining_time": "2:29:01", "throughput": 8708.4, "total_tokens": 60122792} +{"current_steps": 89180, "total_steps": 204665, "loss": 0.0071, "lr": 1.3883841544192424e-06, "epoch": 2.178682236826033, "percentage": 43.57, "elapsed_time": "1:55:04", "remaining_time": "2:29:00", "throughput": 8708.45, "total_tokens": 60126184} +{"current_steps": 89185, "total_steps": 204665, "loss": 0.0002, "lr": 1.38830556990749e-06, "epoch": 2.17880438765788, "percentage": 43.58, "elapsed_time": "1:55:04", "remaining_time": "2:29:00", "throughput": 8708.47, "total_tokens": 60129320} +{"current_steps": 89190, "total_steps": 204665, "loss": 0.1062, "lr": 1.3882269825718753e-06, "epoch": 2.178926538489727, "percentage": 43.58, "elapsed_time": "1:55:05", "remaining_time": "2:29:00", "throughput": 8708.57, "total_tokens": 60133096} +{"current_steps": 89195, "total_steps": 204665, "loss": 0.0008, "lr": 1.3881483924129693e-06, "epoch": 2.1790486893215744, "percentage": 43.58, "elapsed_time": "1:55:05", "remaining_time": "2:28:59", "throughput": 8708.58, "total_tokens": 60136104} +{"current_steps": 89200, "total_steps": 204665, "loss": 0.0559, "lr": 1.3880697994313442e-06, "epoch": 2.1791708401534216, "percentage": 43.58, "elapsed_time": "1:55:05", "remaining_time": "2:28:59", "throughput": 8708.61, "total_tokens": 60139368} +{"current_steps": 89205, "total_steps": 204665, "loss": 0.1086, "lr": 1.3879912036275712e-06, "epoch": 2.1792929909852687, "percentage": 43.59, "elapsed_time": "1:55:06", "remaining_time": "2:28:58", "throughput": 8708.64, "total_tokens": 60142568} +{"current_steps": 89210, "total_steps": 204665, "loss": 0.0001, "lr": 1.3879126050022213e-06, "epoch": 2.179415141817116, "percentage": 43.59, "elapsed_time": "1:55:06", "remaining_time": "2:28:58", "throughput": 8708.71, "total_tokens": 60146088} +{"current_steps": 89215, "total_steps": 204665, "loss": 0.001, "lr": 1.3878340035558671e-06, "epoch": 2.179537292648963, "percentage": 43.59, "elapsed_time": "1:55:06", "remaining_time": "2:28:57", "throughput": 8708.74, "total_tokens": 60149352} +{"current_steps": 89220, "total_steps": 204665, "loss": 0.0847, "lr": 1.3877553992890796e-06, "epoch": 2.1796594434808103, "percentage": 43.59, "elapsed_time": "1:55:07", "remaining_time": "2:28:57", "throughput": 8708.79, "total_tokens": 60152680} +{"current_steps": 89225, "total_steps": 204665, "loss": 0.0003, "lr": 1.3876767922024305e-06, "epoch": 2.179781594312657, "percentage": 43.6, "elapsed_time": "1:55:07", "remaining_time": "2:28:56", "throughput": 8708.88, "total_tokens": 60156392} +{"current_steps": 89230, "total_steps": 204665, "loss": 0.0506, "lr": 1.3875981822964912e-06, "epoch": 2.1799037451445042, "percentage": 43.6, "elapsed_time": "1:55:07", "remaining_time": "2:28:56", "throughput": 8708.91, "total_tokens": 60159656} +{"current_steps": 89235, "total_steps": 204665, "loss": 0.1247, "lr": 1.387519569571834e-06, "epoch": 2.1800258959763514, "percentage": 43.6, "elapsed_time": "1:55:08", "remaining_time": "2:28:56", "throughput": 8708.96, "total_tokens": 60163048} +{"current_steps": 89240, "total_steps": 204665, "loss": 0.1293, "lr": 1.38744095402903e-06, "epoch": 2.1801480468081986, "percentage": 43.6, "elapsed_time": "1:55:08", "remaining_time": "2:28:55", "throughput": 8708.97, "total_tokens": 60166056} +{"current_steps": 89245, "total_steps": 204665, "loss": 0.0432, "lr": 1.3873623356686517e-06, "epoch": 2.180270197640046, "percentage": 43.61, "elapsed_time": "1:55:08", "remaining_time": "2:28:55", "throughput": 8709.07, "total_tokens": 60169896} +{"current_steps": 89250, "total_steps": 204665, "loss": 0.0004, "lr": 1.3872837144912696e-06, "epoch": 2.180392348471893, "percentage": 43.61, "elapsed_time": "1:55:09", "remaining_time": "2:28:54", "throughput": 8709.16, "total_tokens": 60173608} +{"current_steps": 89255, "total_steps": 204665, "loss": 0.0363, "lr": 1.3872050904974566e-06, "epoch": 2.18051449930374, "percentage": 43.61, "elapsed_time": "1:55:09", "remaining_time": "2:28:54", "throughput": 8709.2, "total_tokens": 60176936} +{"current_steps": 89260, "total_steps": 204665, "loss": 0.0466, "lr": 1.387126463687784e-06, "epoch": 2.1806366501355874, "percentage": 43.61, "elapsed_time": "1:55:09", "remaining_time": "2:28:53", "throughput": 8709.22, "total_tokens": 60180008} +{"current_steps": 89265, "total_steps": 204665, "loss": 0.0394, "lr": 1.3870478340628235e-06, "epoch": 2.1807588009674346, "percentage": 43.62, "elapsed_time": "1:55:10", "remaining_time": "2:28:53", "throughput": 8709.21, "total_tokens": 60182888} +{"current_steps": 89270, "total_steps": 204665, "loss": 0.0002, "lr": 1.3869692016231473e-06, "epoch": 2.1808809517992818, "percentage": 43.62, "elapsed_time": "1:55:10", "remaining_time": "2:28:52", "throughput": 8709.21, "total_tokens": 60185832} +{"current_steps": 89275, "total_steps": 204665, "loss": 0.0005, "lr": 1.3868905663693272e-06, "epoch": 2.181003102631129, "percentage": 43.62, "elapsed_time": "1:55:10", "remaining_time": "2:28:52", "throughput": 8709.22, "total_tokens": 60188840} +{"current_steps": 89280, "total_steps": 204665, "loss": 0.1913, "lr": 1.386811928301934e-06, "epoch": 2.181125253462976, "percentage": 43.62, "elapsed_time": "1:55:11", "remaining_time": "2:28:52", "throughput": 8709.25, "total_tokens": 60192104} +{"current_steps": 89285, "total_steps": 204665, "loss": 0.068, "lr": 1.386733287421541e-06, "epoch": 2.1812474042948233, "percentage": 43.62, "elapsed_time": "1:55:11", "remaining_time": "2:28:51", "throughput": 8709.28, "total_tokens": 60195368} +{"current_steps": 89290, "total_steps": 204665, "loss": 0.0343, "lr": 1.3866546437287195e-06, "epoch": 2.1813695551266705, "percentage": 43.63, "elapsed_time": "1:55:11", "remaining_time": "2:28:51", "throughput": 8709.34, "total_tokens": 60198824} +{"current_steps": 89295, "total_steps": 204665, "loss": 0.0516, "lr": 1.3865759972240411e-06, "epoch": 2.1814917059585177, "percentage": 43.63, "elapsed_time": "1:55:12", "remaining_time": "2:28:50", "throughput": 8709.37, "total_tokens": 60202024} +{"current_steps": 89300, "total_steps": 204665, "loss": 0.0002, "lr": 1.3864973479080786e-06, "epoch": 2.181613856790365, "percentage": 43.63, "elapsed_time": "1:55:12", "remaining_time": "2:28:50", "throughput": 8709.4, "total_tokens": 60205224} +{"current_steps": 89305, "total_steps": 204665, "loss": 0.0002, "lr": 1.386418695781403e-06, "epoch": 2.181736007622212, "percentage": 43.63, "elapsed_time": "1:55:13", "remaining_time": "2:28:49", "throughput": 8709.47, "total_tokens": 60208808} +{"current_steps": 89310, "total_steps": 204665, "loss": 0.0017, "lr": 1.3863400408445867e-06, "epoch": 2.181858158454059, "percentage": 43.64, "elapsed_time": "1:55:13", "remaining_time": "2:28:49", "throughput": 8709.52, "total_tokens": 60212200} +{"current_steps": 89315, "total_steps": 204665, "loss": 0.0004, "lr": 1.3862613830982018e-06, "epoch": 2.1819803092859065, "percentage": 43.64, "elapsed_time": "1:55:13", "remaining_time": "2:28:49", "throughput": 8709.55, "total_tokens": 60215464} +{"current_steps": 89320, "total_steps": 204665, "loss": 0.0729, "lr": 1.3861827225428204e-06, "epoch": 2.182102460117753, "percentage": 43.64, "elapsed_time": "1:55:14", "remaining_time": "2:28:48", "throughput": 8709.57, "total_tokens": 60218600} +{"current_steps": 89325, "total_steps": 204665, "loss": 0.0524, "lr": 1.3861040591790144e-06, "epoch": 2.1822246109496004, "percentage": 43.64, "elapsed_time": "1:55:14", "remaining_time": "2:28:48", "throughput": 8709.62, "total_tokens": 60221992} +{"current_steps": 89330, "total_steps": 204665, "loss": 0.1776, "lr": 1.3860253930073555e-06, "epoch": 2.1823467617814476, "percentage": 43.65, "elapsed_time": "1:55:14", "remaining_time": "2:28:47", "throughput": 8709.65, "total_tokens": 60225256} +{"current_steps": 89335, "total_steps": 204665, "loss": 0.0133, "lr": 1.3859467240284165e-06, "epoch": 2.182468912613295, "percentage": 43.65, "elapsed_time": "1:55:15", "remaining_time": "2:28:47", "throughput": 8709.73, "total_tokens": 60228840} +{"current_steps": 89340, "total_steps": 204665, "loss": 0.0005, "lr": 1.3858680522427686e-06, "epoch": 2.182591063445142, "percentage": 43.65, "elapsed_time": "1:55:15", "remaining_time": "2:28:46", "throughput": 8709.8, "total_tokens": 60232424} +{"current_steps": 89345, "total_steps": 204665, "loss": 0.0009, "lr": 1.3857893776509849e-06, "epoch": 2.182713214276989, "percentage": 43.65, "elapsed_time": "1:55:15", "remaining_time": "2:28:46", "throughput": 8709.83, "total_tokens": 60235624} +{"current_steps": 89350, "total_steps": 204665, "loss": 0.0492, "lr": 1.385710700253637e-06, "epoch": 2.1828353651088364, "percentage": 43.66, "elapsed_time": "1:55:16", "remaining_time": "2:28:46", "throughput": 8709.86, "total_tokens": 60238888} +{"current_steps": 89355, "total_steps": 204665, "loss": 0.0399, "lr": 1.385632020051297e-06, "epoch": 2.1829575159406835, "percentage": 43.66, "elapsed_time": "1:55:16", "remaining_time": "2:28:45", "throughput": 8709.93, "total_tokens": 60242408} +{"current_steps": 89360, "total_steps": 204665, "loss": 0.0133, "lr": 1.3855533370445374e-06, "epoch": 2.1830796667725307, "percentage": 43.66, "elapsed_time": "1:55:16", "remaining_time": "2:28:45", "throughput": 8710.05, "total_tokens": 60246376} +{"current_steps": 89365, "total_steps": 204665, "loss": 0.0004, "lr": 1.3854746512339301e-06, "epoch": 2.183201817604378, "percentage": 43.66, "elapsed_time": "1:55:17", "remaining_time": "2:28:44", "throughput": 8710.08, "total_tokens": 60249576} +{"current_steps": 89370, "total_steps": 204665, "loss": 0.0538, "lr": 1.3853959626200475e-06, "epoch": 2.183323968436225, "percentage": 43.67, "elapsed_time": "1:55:17", "remaining_time": "2:28:44", "throughput": 8710.11, "total_tokens": 60252776} +{"current_steps": 89375, "total_steps": 204665, "loss": 0.1073, "lr": 1.3853172712034617e-06, "epoch": 2.1834461192680723, "percentage": 43.67, "elapsed_time": "1:55:17", "remaining_time": "2:28:43", "throughput": 8710.12, "total_tokens": 60255784} +{"current_steps": 89380, "total_steps": 204665, "loss": 0.0619, "lr": 1.3852385769847453e-06, "epoch": 2.1835682700999195, "percentage": 43.67, "elapsed_time": "1:55:18", "remaining_time": "2:28:43", "throughput": 8710.18, "total_tokens": 60259240} +{"current_steps": 89385, "total_steps": 204665, "loss": 0.0001, "lr": 1.3851598799644702e-06, "epoch": 2.1836904209317667, "percentage": 43.67, "elapsed_time": "1:55:18", "remaining_time": "2:28:42", "throughput": 8710.24, "total_tokens": 60262760} +{"current_steps": 89390, "total_steps": 204665, "loss": 0.0006, "lr": 1.3850811801432087e-06, "epoch": 2.183812571763614, "percentage": 43.68, "elapsed_time": "1:55:18", "remaining_time": "2:28:42", "throughput": 8710.33, "total_tokens": 60266408} +{"current_steps": 89395, "total_steps": 204665, "loss": 0.0434, "lr": 1.3850024775215337e-06, "epoch": 2.183934722595461, "percentage": 43.68, "elapsed_time": "1:55:19", "remaining_time": "2:28:42", "throughput": 8710.35, "total_tokens": 60269608} +{"current_steps": 89400, "total_steps": 204665, "loss": 0.0347, "lr": 1.384923772100017e-06, "epoch": 2.1840568734273083, "percentage": 43.68, "elapsed_time": "1:55:19", "remaining_time": "2:28:41", "throughput": 8710.36, "total_tokens": 60272616} +{"current_steps": 89405, "total_steps": 204665, "loss": 0.0004, "lr": 1.3848450638792305e-06, "epoch": 2.184179024259155, "percentage": 43.68, "elapsed_time": "1:55:20", "remaining_time": "2:28:41", "throughput": 8710.45, "total_tokens": 60276392} +{"current_steps": 89410, "total_steps": 204665, "loss": 0.0013, "lr": 1.3847663528597477e-06, "epoch": 2.184301175091002, "percentage": 43.69, "elapsed_time": "1:55:20", "remaining_time": "2:28:40", "throughput": 8710.55, "total_tokens": 60280104} +{"current_steps": 89415, "total_steps": 204665, "loss": 0.0448, "lr": 1.3846876390421405e-06, "epoch": 2.1844233259228494, "percentage": 43.69, "elapsed_time": "1:55:20", "remaining_time": "2:28:40", "throughput": 8710.58, "total_tokens": 60283368} +{"current_steps": 89420, "total_steps": 204665, "loss": 0.0003, "lr": 1.3846089224269815e-06, "epoch": 2.1845454767546966, "percentage": 43.69, "elapsed_time": "1:55:21", "remaining_time": "2:28:39", "throughput": 8710.64, "total_tokens": 60286888} +{"current_steps": 89425, "total_steps": 204665, "loss": 0.0002, "lr": 1.3845302030148428e-06, "epoch": 2.1846676275865438, "percentage": 43.69, "elapsed_time": "1:55:21", "remaining_time": "2:28:39", "throughput": 8710.71, "total_tokens": 60290472} +{"current_steps": 89430, "total_steps": 204665, "loss": 0.0003, "lr": 1.384451480806297e-06, "epoch": 2.184789778418391, "percentage": 43.7, "elapsed_time": "1:55:21", "remaining_time": "2:28:39", "throughput": 8710.75, "total_tokens": 60293736} +{"current_steps": 89435, "total_steps": 204665, "loss": 0.0455, "lr": 1.3843727558019166e-06, "epoch": 2.184911929250238, "percentage": 43.7, "elapsed_time": "1:55:22", "remaining_time": "2:28:38", "throughput": 8710.81, "total_tokens": 60297128} +{"current_steps": 89440, "total_steps": 204665, "loss": 0.0537, "lr": 1.3842940280022738e-06, "epoch": 2.1850340800820853, "percentage": 43.7, "elapsed_time": "1:55:22", "remaining_time": "2:28:38", "throughput": 8710.86, "total_tokens": 60300520} +{"current_steps": 89445, "total_steps": 204665, "loss": 0.0003, "lr": 1.384215297407942e-06, "epoch": 2.1851562309139325, "percentage": 43.7, "elapsed_time": "1:55:22", "remaining_time": "2:28:37", "throughput": 8710.93, "total_tokens": 60304040} +{"current_steps": 89450, "total_steps": 204665, "loss": 0.0528, "lr": 1.384136564019493e-06, "epoch": 2.1852783817457797, "percentage": 43.71, "elapsed_time": "1:55:23", "remaining_time": "2:28:37", "throughput": 8710.97, "total_tokens": 60307304} +{"current_steps": 89455, "total_steps": 204665, "loss": 0.0003, "lr": 1.3840578278374996e-06, "epoch": 2.185400532577627, "percentage": 43.71, "elapsed_time": "1:55:23", "remaining_time": "2:28:36", "throughput": 8710.99, "total_tokens": 60310440} +{"current_steps": 89460, "total_steps": 204665, "loss": 0.0004, "lr": 1.3839790888625345e-06, "epoch": 2.185522683409474, "percentage": 43.71, "elapsed_time": "1:55:23", "remaining_time": "2:28:36", "throughput": 8711.01, "total_tokens": 60313576} +{"current_steps": 89465, "total_steps": 204665, "loss": 0.0003, "lr": 1.38390034709517e-06, "epoch": 2.1856448342413213, "percentage": 43.71, "elapsed_time": "1:55:24", "remaining_time": "2:28:35", "throughput": 8711.05, "total_tokens": 60316776} +{"current_steps": 89470, "total_steps": 204665, "loss": 0.0006, "lr": 1.383821602535979e-06, "epoch": 2.1857669850731685, "percentage": 43.72, "elapsed_time": "1:55:24", "remaining_time": "2:28:35", "throughput": 8711.13, "total_tokens": 60320424} +{"current_steps": 89475, "total_steps": 204665, "loss": 0.0001, "lr": 1.3837428551855342e-06, "epoch": 2.1858891359050157, "percentage": 43.72, "elapsed_time": "1:55:24", "remaining_time": "2:28:35", "throughput": 8711.18, "total_tokens": 60323816} +{"current_steps": 89480, "total_steps": 204665, "loss": 0.0001, "lr": 1.383664105044408e-06, "epoch": 2.186011286736863, "percentage": 43.72, "elapsed_time": "1:55:25", "remaining_time": "2:28:34", "throughput": 8711.21, "total_tokens": 60327016} +{"current_steps": 89485, "total_steps": 204665, "loss": 0.0106, "lr": 1.3835853521131733e-06, "epoch": 2.18613343756871, "percentage": 43.72, "elapsed_time": "1:55:25", "remaining_time": "2:28:34", "throughput": 8711.21, "total_tokens": 60329960} +{"current_steps": 89490, "total_steps": 204665, "loss": 0.0446, "lr": 1.3835065963924026e-06, "epoch": 2.186255588400557, "percentage": 43.73, "elapsed_time": "1:55:25", "remaining_time": "2:28:33", "throughput": 8711.25, "total_tokens": 60333288} +{"current_steps": 89495, "total_steps": 204665, "loss": 0.0002, "lr": 1.3834278378826687e-06, "epoch": 2.186377739232404, "percentage": 43.73, "elapsed_time": "1:55:26", "remaining_time": "2:28:33", "throughput": 8711.31, "total_tokens": 60336680} +{"current_steps": 89500, "total_steps": 204665, "loss": 0.0001, "lr": 1.3833490765845445e-06, "epoch": 2.186499890064251, "percentage": 43.73, "elapsed_time": "1:55:26", "remaining_time": "2:28:32", "throughput": 8711.37, "total_tokens": 60340136} +{"current_steps": 89505, "total_steps": 204665, "loss": 0.0001, "lr": 1.383270312498603e-06, "epoch": 2.1866220408960984, "percentage": 43.73, "elapsed_time": "1:55:26", "remaining_time": "2:28:32", "throughput": 8711.43, "total_tokens": 60343592} +{"current_steps": 89510, "total_steps": 204665, "loss": 0.062, "lr": 1.3831915456254164e-06, "epoch": 2.1867441917279455, "percentage": 43.73, "elapsed_time": "1:55:27", "remaining_time": "2:28:31", "throughput": 8711.46, "total_tokens": 60346792} +{"current_steps": 89515, "total_steps": 204665, "loss": 0.0006, "lr": 1.383112775965558e-06, "epoch": 2.1868663425597927, "percentage": 43.74, "elapsed_time": "1:55:27", "remaining_time": "2:28:31", "throughput": 8711.56, "total_tokens": 60350632} +{"current_steps": 89520, "total_steps": 204665, "loss": 0.0999, "lr": 1.3830340035196004e-06, "epoch": 2.18698849339164, "percentage": 43.74, "elapsed_time": "1:55:27", "remaining_time": "2:28:31", "throughput": 8711.59, "total_tokens": 60353768} +{"current_steps": 89525, "total_steps": 204665, "loss": 0.0445, "lr": 1.3829552282881165e-06, "epoch": 2.187110644223487, "percentage": 43.74, "elapsed_time": "1:55:28", "remaining_time": "2:28:30", "throughput": 8711.61, "total_tokens": 60356968} +{"current_steps": 89530, "total_steps": 204665, "loss": 0.0009, "lr": 1.3828764502716793e-06, "epoch": 2.1872327950553343, "percentage": 43.74, "elapsed_time": "1:55:28", "remaining_time": "2:28:30", "throughput": 8711.65, "total_tokens": 60360232} +{"current_steps": 89535, "total_steps": 204665, "loss": 0.0298, "lr": 1.3827976694708614e-06, "epoch": 2.1873549458871815, "percentage": 43.75, "elapsed_time": "1:55:29", "remaining_time": "2:28:29", "throughput": 8711.64, "total_tokens": 60363048} +{"current_steps": 89540, "total_steps": 204665, "loss": 0.0005, "lr": 1.3827188858862359e-06, "epoch": 2.1874770967190287, "percentage": 43.75, "elapsed_time": "1:55:29", "remaining_time": "2:28:29", "throughput": 8711.7, "total_tokens": 60366504} +{"current_steps": 89545, "total_steps": 204665, "loss": 0.0493, "lr": 1.3826400995183755e-06, "epoch": 2.187599247550876, "percentage": 43.75, "elapsed_time": "1:55:29", "remaining_time": "2:28:28", "throughput": 8711.75, "total_tokens": 60369832} +{"current_steps": 89550, "total_steps": 204665, "loss": 0.0621, "lr": 1.3825613103678539e-06, "epoch": 2.187721398382723, "percentage": 43.75, "elapsed_time": "1:55:30", "remaining_time": "2:28:28", "throughput": 8711.77, "total_tokens": 60373032} +{"current_steps": 89555, "total_steps": 204665, "loss": 0.1244, "lr": 1.382482518435243e-06, "epoch": 2.1878435492145702, "percentage": 43.76, "elapsed_time": "1:55:30", "remaining_time": "2:28:28", "throughput": 8711.85, "total_tokens": 60376616} +{"current_steps": 89560, "total_steps": 204665, "loss": 0.0003, "lr": 1.3824037237211166e-06, "epoch": 2.1879657000464174, "percentage": 43.76, "elapsed_time": "1:55:30", "remaining_time": "2:28:27", "throughput": 8711.87, "total_tokens": 60379752} +{"current_steps": 89565, "total_steps": 204665, "loss": 0.0001, "lr": 1.3823249262260476e-06, "epoch": 2.1880878508782646, "percentage": 43.76, "elapsed_time": "1:55:31", "remaining_time": "2:28:27", "throughput": 8711.93, "total_tokens": 60383208} +{"current_steps": 89570, "total_steps": 204665, "loss": 0.0004, "lr": 1.3822461259506088e-06, "epoch": 2.188210001710112, "percentage": 43.76, "elapsed_time": "1:55:31", "remaining_time": "2:28:26", "throughput": 8711.95, "total_tokens": 60386344} +{"current_steps": 89575, "total_steps": 204665, "loss": 0.0005, "lr": 1.3821673228953735e-06, "epoch": 2.188332152541959, "percentage": 43.77, "elapsed_time": "1:55:31", "remaining_time": "2:28:26", "throughput": 8711.99, "total_tokens": 60389608} +{"current_steps": 89580, "total_steps": 204665, "loss": 0.0001, "lr": 1.3820885170609142e-06, "epoch": 2.188454303373806, "percentage": 43.77, "elapsed_time": "1:55:32", "remaining_time": "2:28:25", "throughput": 8712.05, "total_tokens": 60393064} +{"current_steps": 89585, "total_steps": 204665, "loss": 0.0001, "lr": 1.382009708447805e-06, "epoch": 2.188576454205653, "percentage": 43.77, "elapsed_time": "1:55:32", "remaining_time": "2:28:25", "throughput": 8712.13, "total_tokens": 60396712} +{"current_steps": 89590, "total_steps": 204665, "loss": 0.0706, "lr": 1.3819308970566178e-06, "epoch": 2.1886986050375, "percentage": 43.77, "elapsed_time": "1:55:32", "remaining_time": "2:28:24", "throughput": 8712.21, "total_tokens": 60400360} +{"current_steps": 89595, "total_steps": 204665, "loss": 0.0002, "lr": 1.381852082887927e-06, "epoch": 2.1888207558693473, "percentage": 43.78, "elapsed_time": "1:55:33", "remaining_time": "2:28:24", "throughput": 8712.26, "total_tokens": 60403752} +{"current_steps": 89600, "total_steps": 204665, "loss": 0.0001, "lr": 1.3817732659423048e-06, "epoch": 2.1889429067011945, "percentage": 43.78, "elapsed_time": "1:55:33", "remaining_time": "2:28:24", "throughput": 8712.32, "total_tokens": 60407208} +{"current_steps": 89605, "total_steps": 204665, "loss": 0.0672, "lr": 1.3816944462203251e-06, "epoch": 2.1890650575330417, "percentage": 43.78, "elapsed_time": "1:55:33", "remaining_time": "2:28:23", "throughput": 8712.39, "total_tokens": 60410728} +{"current_steps": 89610, "total_steps": 204665, "loss": 0.1354, "lr": 1.3816156237225602e-06, "epoch": 2.189187208364889, "percentage": 43.78, "elapsed_time": "1:55:34", "remaining_time": "2:28:23", "throughput": 8712.44, "total_tokens": 60414184} +{"current_steps": 89615, "total_steps": 204665, "loss": 0.0521, "lr": 1.3815367984495842e-06, "epoch": 2.189309359196736, "percentage": 43.79, "elapsed_time": "1:55:34", "remaining_time": "2:28:22", "throughput": 8712.51, "total_tokens": 60417640} +{"current_steps": 89620, "total_steps": 204665, "loss": 0.0002, "lr": 1.3814579704019697e-06, "epoch": 2.1894315100285833, "percentage": 43.79, "elapsed_time": "1:55:34", "remaining_time": "2:28:22", "throughput": 8712.57, "total_tokens": 60421160} +{"current_steps": 89625, "total_steps": 204665, "loss": 0.0438, "lr": 1.3813791395802905e-06, "epoch": 2.1895536608604305, "percentage": 43.79, "elapsed_time": "1:55:35", "remaining_time": "2:28:21", "throughput": 8712.61, "total_tokens": 60424488} +{"current_steps": 89630, "total_steps": 204665, "loss": 0.0001, "lr": 1.3813003059851198e-06, "epoch": 2.1896758116922777, "percentage": 43.79, "elapsed_time": "1:55:35", "remaining_time": "2:28:21", "throughput": 8712.67, "total_tokens": 60427944} +{"current_steps": 89635, "total_steps": 204665, "loss": 0.0456, "lr": 1.3812214696170303e-06, "epoch": 2.189797962524125, "percentage": 43.8, "elapsed_time": "1:55:35", "remaining_time": "2:28:21", "throughput": 8712.73, "total_tokens": 60431336} +{"current_steps": 89640, "total_steps": 204665, "loss": 0.044, "lr": 1.381142630476596e-06, "epoch": 2.189920113355972, "percentage": 43.8, "elapsed_time": "1:55:36", "remaining_time": "2:28:20", "throughput": 8712.76, "total_tokens": 60434536} +{"current_steps": 89645, "total_steps": 204665, "loss": 0.0, "lr": 1.3810637885643898e-06, "epoch": 2.190042264187819, "percentage": 43.8, "elapsed_time": "1:55:36", "remaining_time": "2:28:20", "throughput": 8712.78, "total_tokens": 60437672} +{"current_steps": 89650, "total_steps": 204665, "loss": 0.1284, "lr": 1.3809849438809853e-06, "epoch": 2.1901644150196664, "percentage": 43.8, "elapsed_time": "1:55:37", "remaining_time": "2:28:19", "throughput": 8712.79, "total_tokens": 60440616} +{"current_steps": 89655, "total_steps": 204665, "loss": 0.1287, "lr": 1.3809060964269557e-06, "epoch": 2.1902865658515136, "percentage": 43.81, "elapsed_time": "1:55:37", "remaining_time": "2:28:19", "throughput": 8712.84, "total_tokens": 60444008} +{"current_steps": 89660, "total_steps": 204665, "loss": 0.0911, "lr": 1.3808272462028747e-06, "epoch": 2.190408716683361, "percentage": 43.81, "elapsed_time": "1:55:37", "remaining_time": "2:28:18", "throughput": 8712.92, "total_tokens": 60447656} +{"current_steps": 89665, "total_steps": 204665, "loss": 0.0001, "lr": 1.3807483932093157e-06, "epoch": 2.190530867515208, "percentage": 43.81, "elapsed_time": "1:55:38", "remaining_time": "2:28:18", "throughput": 8713.01, "total_tokens": 60451304} +{"current_steps": 89670, "total_steps": 204665, "loss": 0.0006, "lr": 1.3806695374468515e-06, "epoch": 2.1906530183470547, "percentage": 43.81, "elapsed_time": "1:55:38", "remaining_time": "2:28:17", "throughput": 8713.01, "total_tokens": 60454248} +{"current_steps": 89675, "total_steps": 204665, "loss": 0.0001, "lr": 1.3805906789160564e-06, "epoch": 2.190775169178902, "percentage": 43.82, "elapsed_time": "1:55:38", "remaining_time": "2:28:17", "throughput": 8713.02, "total_tokens": 60457320} +{"current_steps": 89680, "total_steps": 204665, "loss": 0.001, "lr": 1.3805118176175033e-06, "epoch": 2.190897320010749, "percentage": 43.82, "elapsed_time": "1:55:39", "remaining_time": "2:28:17", "throughput": 8713.04, "total_tokens": 60460392} +{"current_steps": 89685, "total_steps": 204665, "loss": 0.0002, "lr": 1.380432953551766e-06, "epoch": 2.1910194708425963, "percentage": 43.82, "elapsed_time": "1:55:39", "remaining_time": "2:28:16", "throughput": 8713.13, "total_tokens": 60464104} +{"current_steps": 89690, "total_steps": 204665, "loss": 0.0002, "lr": 1.3803540867194182e-06, "epoch": 2.1911416216744435, "percentage": 43.82, "elapsed_time": "1:55:39", "remaining_time": "2:28:16", "throughput": 8713.19, "total_tokens": 60467560} +{"current_steps": 89695, "total_steps": 204665, "loss": 0.0008, "lr": 1.3802752171210329e-06, "epoch": 2.1912637725062907, "percentage": 43.83, "elapsed_time": "1:55:40", "remaining_time": "2:28:15", "throughput": 8713.21, "total_tokens": 60470696} +{"current_steps": 89700, "total_steps": 204665, "loss": 0.0003, "lr": 1.3801963447571837e-06, "epoch": 2.191385923338138, "percentage": 43.83, "elapsed_time": "1:55:40", "remaining_time": "2:28:15", "throughput": 8713.28, "total_tokens": 60474216} +{"current_steps": 89705, "total_steps": 204665, "loss": 0.0018, "lr": 1.380117469628445e-06, "epoch": 2.191508074169985, "percentage": 43.83, "elapsed_time": "1:55:40", "remaining_time": "2:28:14", "throughput": 8713.33, "total_tokens": 60477608} +{"current_steps": 89710, "total_steps": 204665, "loss": 0.0002, "lr": 1.3800385917353894e-06, "epoch": 2.1916302250018322, "percentage": 43.83, "elapsed_time": "1:55:41", "remaining_time": "2:28:14", "throughput": 8713.34, "total_tokens": 60480616} +{"current_steps": 89715, "total_steps": 204665, "loss": 0.0001, "lr": 1.379959711078591e-06, "epoch": 2.1917523758336794, "percentage": 43.84, "elapsed_time": "1:55:41", "remaining_time": "2:28:14", "throughput": 8713.44, "total_tokens": 60484392} +{"current_steps": 89720, "total_steps": 204665, "loss": 0.0001, "lr": 1.3798808276586233e-06, "epoch": 2.1918745266655266, "percentage": 43.84, "elapsed_time": "1:55:41", "remaining_time": "2:28:13", "throughput": 8713.46, "total_tokens": 60487592} +{"current_steps": 89725, "total_steps": 204665, "loss": 0.1269, "lr": 1.3798019414760603e-06, "epoch": 2.191996677497374, "percentage": 43.84, "elapsed_time": "1:55:42", "remaining_time": "2:28:13", "throughput": 8713.48, "total_tokens": 60490600} +{"current_steps": 89730, "total_steps": 204665, "loss": 0.0008, "lr": 1.3797230525314754e-06, "epoch": 2.192118828329221, "percentage": 43.84, "elapsed_time": "1:55:42", "remaining_time": "2:28:12", "throughput": 8713.49, "total_tokens": 60493672} +{"current_steps": 89735, "total_steps": 204665, "loss": 0.1136, "lr": 1.379644160825442e-06, "epoch": 2.192240979161068, "percentage": 43.84, "elapsed_time": "1:55:42", "remaining_time": "2:28:12", "throughput": 8713.53, "total_tokens": 60496936} +{"current_steps": 89740, "total_steps": 204665, "loss": 0.0825, "lr": 1.3795652663585347e-06, "epoch": 2.1923631299929154, "percentage": 43.85, "elapsed_time": "1:55:43", "remaining_time": "2:28:11", "throughput": 8713.55, "total_tokens": 60500072} +{"current_steps": 89745, "total_steps": 204665, "loss": 0.0013, "lr": 1.3794863691313264e-06, "epoch": 2.1924852808247626, "percentage": 43.85, "elapsed_time": "1:55:43", "remaining_time": "2:28:11", "throughput": 8713.59, "total_tokens": 60503400} +{"current_steps": 89750, "total_steps": 204665, "loss": 0.0067, "lr": 1.379407469144391e-06, "epoch": 2.1926074316566098, "percentage": 43.85, "elapsed_time": "1:55:43", "remaining_time": "2:28:10", "throughput": 8713.64, "total_tokens": 60506792} +{"current_steps": 89755, "total_steps": 204665, "loss": 0.0003, "lr": 1.3793285663983026e-06, "epoch": 2.192729582488457, "percentage": 43.85, "elapsed_time": "1:55:44", "remaining_time": "2:28:10", "throughput": 8713.66, "total_tokens": 60509928} +{"current_steps": 89760, "total_steps": 204665, "loss": 0.0691, "lr": 1.3792496608936348e-06, "epoch": 2.192851733320304, "percentage": 43.86, "elapsed_time": "1:55:44", "remaining_time": "2:28:10", "throughput": 8713.75, "total_tokens": 60513576} +{"current_steps": 89765, "total_steps": 204665, "loss": 0.0001, "lr": 1.3791707526309615e-06, "epoch": 2.192973884152151, "percentage": 43.86, "elapsed_time": "1:55:44", "remaining_time": "2:28:09", "throughput": 8713.82, "total_tokens": 60517096} +{"current_steps": 89770, "total_steps": 204665, "loss": 0.065, "lr": 1.3790918416108567e-06, "epoch": 2.193096034983998, "percentage": 43.86, "elapsed_time": "1:55:45", "remaining_time": "2:28:09", "throughput": 8713.91, "total_tokens": 60520872} +{"current_steps": 89775, "total_steps": 204665, "loss": 0.0002, "lr": 1.3790129278338936e-06, "epoch": 2.1932181858158453, "percentage": 43.86, "elapsed_time": "1:55:45", "remaining_time": "2:28:08", "throughput": 8714.03, "total_tokens": 60524776} +{"current_steps": 89780, "total_steps": 204665, "loss": 0.0001, "lr": 1.3789340113006466e-06, "epoch": 2.1933403366476925, "percentage": 43.87, "elapsed_time": "1:55:46", "remaining_time": "2:28:08", "throughput": 8714.12, "total_tokens": 60528552} +{"current_steps": 89785, "total_steps": 204665, "loss": 0.2405, "lr": 1.3788550920116899e-06, "epoch": 2.1934624874795396, "percentage": 43.87, "elapsed_time": "1:55:46", "remaining_time": "2:28:07", "throughput": 8714.13, "total_tokens": 60531560} +{"current_steps": 89790, "total_steps": 204665, "loss": 0.1328, "lr": 1.378776169967597e-06, "epoch": 2.193584638311387, "percentage": 43.87, "elapsed_time": "1:55:46", "remaining_time": "2:28:07", "throughput": 8714.16, "total_tokens": 60534760} +{"current_steps": 89795, "total_steps": 204665, "loss": 0.0013, "lr": 1.3786972451689419e-06, "epoch": 2.193706789143234, "percentage": 43.87, "elapsed_time": "1:55:47", "remaining_time": "2:28:07", "throughput": 8714.23, "total_tokens": 60538344} +{"current_steps": 89800, "total_steps": 204665, "loss": 0.0002, "lr": 1.3786183176162985e-06, "epoch": 2.193828939975081, "percentage": 43.88, "elapsed_time": "1:55:47", "remaining_time": "2:28:06", "throughput": 8714.23, "total_tokens": 60541288} +{"current_steps": 89805, "total_steps": 204665, "loss": 0.0005, "lr": 1.3785393873102407e-06, "epoch": 2.1939510908069284, "percentage": 43.88, "elapsed_time": "1:55:47", "remaining_time": "2:28:06", "throughput": 8714.38, "total_tokens": 60545512} +{"current_steps": 89810, "total_steps": 204665, "loss": 0.0011, "lr": 1.3784604542513428e-06, "epoch": 2.1940732416387756, "percentage": 43.88, "elapsed_time": "1:55:48", "remaining_time": "2:28:05", "throughput": 8714.45, "total_tokens": 60549032} +{"current_steps": 89815, "total_steps": 204665, "loss": 0.089, "lr": 1.3783815184401788e-06, "epoch": 2.194195392470623, "percentage": 43.88, "elapsed_time": "1:55:48", "remaining_time": "2:28:05", "throughput": 8714.5, "total_tokens": 60552424} +{"current_steps": 89820, "total_steps": 204665, "loss": 0.0489, "lr": 1.3783025798773224e-06, "epoch": 2.19431754330247, "percentage": 43.89, "elapsed_time": "1:55:48", "remaining_time": "2:28:04", "throughput": 8714.51, "total_tokens": 60555496} +{"current_steps": 89825, "total_steps": 204665, "loss": 0.0003, "lr": 1.378223638563348e-06, "epoch": 2.194439694134317, "percentage": 43.89, "elapsed_time": "1:55:49", "remaining_time": "2:28:04", "throughput": 8714.64, "total_tokens": 60559528} +{"current_steps": 89830, "total_steps": 204665, "loss": 0.0002, "lr": 1.3781446944988297e-06, "epoch": 2.1945618449661644, "percentage": 43.89, "elapsed_time": "1:55:49", "remaining_time": "2:28:03", "throughput": 8714.71, "total_tokens": 60563176} +{"current_steps": 89835, "total_steps": 204665, "loss": 0.0481, "lr": 1.3780657476843414e-06, "epoch": 2.1946839957980115, "percentage": 43.89, "elapsed_time": "1:55:49", "remaining_time": "2:28:03", "throughput": 8714.74, "total_tokens": 60566376} +{"current_steps": 89840, "total_steps": 204665, "loss": 0.0743, "lr": 1.3779867981204571e-06, "epoch": 2.1948061466298587, "percentage": 43.9, "elapsed_time": "1:55:50", "remaining_time": "2:28:03", "throughput": 8714.8, "total_tokens": 60569832} +{"current_steps": 89845, "total_steps": 204665, "loss": 0.0873, "lr": 1.3779078458077513e-06, "epoch": 2.194928297461706, "percentage": 43.9, "elapsed_time": "1:55:50", "remaining_time": "2:28:02", "throughput": 8714.83, "total_tokens": 60573032} +{"current_steps": 89850, "total_steps": 204665, "loss": 0.0001, "lr": 1.3778288907467982e-06, "epoch": 2.1950504482935527, "percentage": 43.9, "elapsed_time": "1:55:50", "remaining_time": "2:28:02", "throughput": 8714.87, "total_tokens": 60576296} +{"current_steps": 89855, "total_steps": 204665, "loss": 0.0002, "lr": 1.3777499329381714e-06, "epoch": 2.1951725991254, "percentage": 43.9, "elapsed_time": "1:55:51", "remaining_time": "2:28:01", "throughput": 8714.89, "total_tokens": 60579496} +{"current_steps": 89860, "total_steps": 204665, "loss": 0.046, "lr": 1.3776709723824459e-06, "epoch": 2.195294749957247, "percentage": 43.91, "elapsed_time": "1:55:51", "remaining_time": "2:28:01", "throughput": 8714.98, "total_tokens": 60583208} +{"current_steps": 89865, "total_steps": 204665, "loss": 0.1189, "lr": 1.377592009080195e-06, "epoch": 2.1954169007890942, "percentage": 43.91, "elapsed_time": "1:55:51", "remaining_time": "2:28:00", "throughput": 8715.0, "total_tokens": 60586280} +{"current_steps": 89870, "total_steps": 204665, "loss": 0.0671, "lr": 1.3775130430319936e-06, "epoch": 2.1955390516209414, "percentage": 43.91, "elapsed_time": "1:55:52", "remaining_time": "2:28:00", "throughput": 8715.03, "total_tokens": 60589544} +{"current_steps": 89875, "total_steps": 204665, "loss": 0.0458, "lr": 1.377434074238416e-06, "epoch": 2.1956612024527886, "percentage": 43.91, "elapsed_time": "1:55:52", "remaining_time": "2:28:00", "throughput": 8715.1, "total_tokens": 60593064} +{"current_steps": 89880, "total_steps": 204665, "loss": 0.0002, "lr": 1.377355102700036e-06, "epoch": 2.195783353284636, "percentage": 43.92, "elapsed_time": "1:55:52", "remaining_time": "2:27:59", "throughput": 8715.1, "total_tokens": 60596008} +{"current_steps": 89885, "total_steps": 204665, "loss": 0.087, "lr": 1.3772761284174286e-06, "epoch": 2.195905504116483, "percentage": 43.92, "elapsed_time": "1:55:53", "remaining_time": "2:27:59", "throughput": 8715.16, "total_tokens": 60599528} +{"current_steps": 89890, "total_steps": 204665, "loss": 0.0002, "lr": 1.3771971513911675e-06, "epoch": 2.19602765494833, "percentage": 43.92, "elapsed_time": "1:55:53", "remaining_time": "2:27:58", "throughput": 8715.26, "total_tokens": 60603304} +{"current_steps": 89895, "total_steps": 204665, "loss": 0.0338, "lr": 1.3771181716218277e-06, "epoch": 2.1961498057801774, "percentage": 43.92, "elapsed_time": "1:55:54", "remaining_time": "2:27:58", "throughput": 8715.29, "total_tokens": 60606504} +{"current_steps": 89900, "total_steps": 204665, "loss": 0.0557, "lr": 1.3770391891099824e-06, "epoch": 2.1962719566120246, "percentage": 43.93, "elapsed_time": "1:55:54", "remaining_time": "2:27:57", "throughput": 8715.35, "total_tokens": 60609960} +{"current_steps": 89905, "total_steps": 204665, "loss": 0.0003, "lr": 1.376960203856207e-06, "epoch": 2.1963941074438718, "percentage": 43.93, "elapsed_time": "1:55:54", "remaining_time": "2:27:57", "throughput": 8715.41, "total_tokens": 60613416} +{"current_steps": 89910, "total_steps": 204665, "loss": 0.0001, "lr": 1.3768812158610757e-06, "epoch": 2.196516258275719, "percentage": 43.93, "elapsed_time": "1:55:55", "remaining_time": "2:27:56", "throughput": 8715.44, "total_tokens": 60616616} +{"current_steps": 89915, "total_steps": 204665, "loss": 0.0, "lr": 1.3768022251251627e-06, "epoch": 2.196638409107566, "percentage": 43.93, "elapsed_time": "1:55:55", "remaining_time": "2:27:56", "throughput": 8715.5, "total_tokens": 60620072} +{"current_steps": 89920, "total_steps": 204665, "loss": 0.0513, "lr": 1.3767232316490428e-06, "epoch": 2.1967605599394133, "percentage": 43.94, "elapsed_time": "1:55:55", "remaining_time": "2:27:56", "throughput": 8715.54, "total_tokens": 60623400} +{"current_steps": 89925, "total_steps": 204665, "loss": 0.0836, "lr": 1.3766442354332899e-06, "epoch": 2.1968827107712605, "percentage": 43.94, "elapsed_time": "1:55:56", "remaining_time": "2:27:55", "throughput": 8715.57, "total_tokens": 60626600} +{"current_steps": 89930, "total_steps": 204665, "loss": 0.0261, "lr": 1.3765652364784787e-06, "epoch": 2.1970048616031077, "percentage": 43.94, "elapsed_time": "1:55:56", "remaining_time": "2:27:55", "throughput": 8715.61, "total_tokens": 60629864} +{"current_steps": 89935, "total_steps": 204665, "loss": 0.0002, "lr": 1.3764862347851844e-06, "epoch": 2.1971270124349545, "percentage": 43.94, "elapsed_time": "1:55:56", "remaining_time": "2:27:54", "throughput": 8715.7, "total_tokens": 60633640} +{"current_steps": 89940, "total_steps": 204665, "loss": 0.0729, "lr": 1.3764072303539806e-06, "epoch": 2.1972491632668016, "percentage": 43.94, "elapsed_time": "1:55:57", "remaining_time": "2:27:54", "throughput": 8715.74, "total_tokens": 60636904} +{"current_steps": 89945, "total_steps": 204665, "loss": 0.0451, "lr": 1.3763282231854425e-06, "epoch": 2.197371314098649, "percentage": 43.95, "elapsed_time": "1:55:57", "remaining_time": "2:27:53", "throughput": 8715.84, "total_tokens": 60640680} +{"current_steps": 89950, "total_steps": 204665, "loss": 0.0003, "lr": 1.376249213280144e-06, "epoch": 2.197493464930496, "percentage": 43.95, "elapsed_time": "1:55:57", "remaining_time": "2:27:53", "throughput": 8715.87, "total_tokens": 60643880} +{"current_steps": 89955, "total_steps": 204665, "loss": 0.0311, "lr": 1.37617020063866e-06, "epoch": 2.197615615762343, "percentage": 43.95, "elapsed_time": "1:55:58", "remaining_time": "2:27:53", "throughput": 8715.89, "total_tokens": 60647016} +{"current_steps": 89960, "total_steps": 204665, "loss": 0.0001, "lr": 1.3760911852615654e-06, "epoch": 2.1977377665941904, "percentage": 43.95, "elapsed_time": "1:55:58", "remaining_time": "2:27:52", "throughput": 8715.92, "total_tokens": 60650216} +{"current_steps": 89965, "total_steps": 204665, "loss": 0.0002, "lr": 1.376012167149434e-06, "epoch": 2.1978599174260376, "percentage": 43.96, "elapsed_time": "1:55:58", "remaining_time": "2:27:52", "throughput": 8715.93, "total_tokens": 60653224} +{"current_steps": 89970, "total_steps": 204665, "loss": 0.0841, "lr": 1.3759331463028414e-06, "epoch": 2.197982068257885, "percentage": 43.96, "elapsed_time": "1:55:59", "remaining_time": "2:27:51", "throughput": 8715.97, "total_tokens": 60656552} +{"current_steps": 89975, "total_steps": 204665, "loss": 0.097, "lr": 1.3758541227223618e-06, "epoch": 2.198104219089732, "percentage": 43.96, "elapsed_time": "1:55:59", "remaining_time": "2:27:51", "throughput": 8716.12, "total_tokens": 60660776} +{"current_steps": 89980, "total_steps": 204665, "loss": 0.0182, "lr": 1.3757750964085698e-06, "epoch": 2.198226369921579, "percentage": 43.96, "elapsed_time": "1:55:59", "remaining_time": "2:27:50", "throughput": 8716.17, "total_tokens": 60664168} +{"current_steps": 89985, "total_steps": 204665, "loss": 0.0001, "lr": 1.3756960673620403e-06, "epoch": 2.1983485207534263, "percentage": 43.97, "elapsed_time": "1:56:00", "remaining_time": "2:27:50", "throughput": 8716.23, "total_tokens": 60667688} +{"current_steps": 89990, "total_steps": 204665, "loss": 0.0001, "lr": 1.375617035583348e-06, "epoch": 2.1984706715852735, "percentage": 43.97, "elapsed_time": "1:56:00", "remaining_time": "2:27:50", "throughput": 8716.27, "total_tokens": 60670952} +{"current_steps": 89995, "total_steps": 204665, "loss": 0.0002, "lr": 1.3755380010730677e-06, "epoch": 2.1985928224171207, "percentage": 43.97, "elapsed_time": "1:56:01", "remaining_time": "2:27:49", "throughput": 8716.35, "total_tokens": 60674600} +{"current_steps": 90000, "total_steps": 204665, "loss": 0.0679, "lr": 1.375458963831774e-06, "epoch": 2.198714973248968, "percentage": 43.97, "elapsed_time": "1:56:01", "remaining_time": "2:27:49", "throughput": 8716.38, "total_tokens": 60677800} +{"current_steps": 90005, "total_steps": 204665, "loss": 0.0548, "lr": 1.3753799238600416e-06, "epoch": 2.198837124080815, "percentage": 43.98, "elapsed_time": "1:56:01", "remaining_time": "2:27:48", "throughput": 8716.43, "total_tokens": 60681192} +{"current_steps": 90010, "total_steps": 204665, "loss": 0.0006, "lr": 1.3753008811584455e-06, "epoch": 2.1989592749126623, "percentage": 43.98, "elapsed_time": "1:56:02", "remaining_time": "2:27:48", "throughput": 8716.45, "total_tokens": 60684328} +{"current_steps": 90015, "total_steps": 204665, "loss": 0.0643, "lr": 1.3752218357275605e-06, "epoch": 2.1990814257445095, "percentage": 43.98, "elapsed_time": "1:56:02", "remaining_time": "2:27:47", "throughput": 8716.49, "total_tokens": 60687528} +{"current_steps": 90020, "total_steps": 204665, "loss": 0.0002, "lr": 1.3751427875679613e-06, "epoch": 2.1992035765763567, "percentage": 43.98, "elapsed_time": "1:56:02", "remaining_time": "2:27:47", "throughput": 8716.58, "total_tokens": 60691304} +{"current_steps": 90025, "total_steps": 204665, "loss": 0.1813, "lr": 1.3750637366802227e-06, "epoch": 2.199325727408204, "percentage": 43.99, "elapsed_time": "1:56:03", "remaining_time": "2:27:46", "throughput": 8716.59, "total_tokens": 60694376} +{"current_steps": 90030, "total_steps": 204665, "loss": 0.0651, "lr": 1.37498468306492e-06, "epoch": 2.1994478782400506, "percentage": 43.99, "elapsed_time": "1:56:03", "remaining_time": "2:27:46", "throughput": 8716.59, "total_tokens": 60697320} +{"current_steps": 90035, "total_steps": 204665, "loss": 0.0005, "lr": 1.3749056267226276e-06, "epoch": 2.199570029071898, "percentage": 43.99, "elapsed_time": "1:56:03", "remaining_time": "2:27:46", "throughput": 8716.63, "total_tokens": 60700648} +{"current_steps": 90040, "total_steps": 204665, "loss": 0.0003, "lr": 1.3748265676539207e-06, "epoch": 2.199692179903745, "percentage": 43.99, "elapsed_time": "1:56:04", "remaining_time": "2:27:45", "throughput": 8716.64, "total_tokens": 60703656} +{"current_steps": 90045, "total_steps": 204665, "loss": 0.0727, "lr": 1.3747475058593742e-06, "epoch": 2.199814330735592, "percentage": 44.0, "elapsed_time": "1:56:04", "remaining_time": "2:27:45", "throughput": 8716.67, "total_tokens": 60706856} +{"current_steps": 90050, "total_steps": 204665, "loss": 0.0516, "lr": 1.3746684413395634e-06, "epoch": 2.1999364815674394, "percentage": 44.0, "elapsed_time": "1:56:04", "remaining_time": "2:27:44", "throughput": 8716.77, "total_tokens": 60710632} +{"current_steps": 90055, "total_steps": 204665, "loss": 0.0003, "lr": 1.3745893740950622e-06, "epoch": 2.2000586323992866, "percentage": 44.0, "elapsed_time": "1:56:05", "remaining_time": "2:27:44", "throughput": 8716.85, "total_tokens": 60714344} +{"current_steps": 90060, "total_steps": 204665, "loss": 0.0949, "lr": 1.374510304126447e-06, "epoch": 2.2001807832311338, "percentage": 44.0, "elapsed_time": "1:56:05", "remaining_time": "2:27:43", "throughput": 8716.87, "total_tokens": 60717480} +{"current_steps": 90065, "total_steps": 204665, "loss": 0.1318, "lr": 1.3744312314342918e-06, "epoch": 2.200302934062981, "percentage": 44.01, "elapsed_time": "1:56:05", "remaining_time": "2:27:43", "throughput": 8717.01, "total_tokens": 60721576} +{"current_steps": 90070, "total_steps": 204665, "loss": 0.0243, "lr": 1.374352156019172e-06, "epoch": 2.200425084894828, "percentage": 44.01, "elapsed_time": "1:56:06", "remaining_time": "2:27:43", "throughput": 8717.03, "total_tokens": 60724712} +{"current_steps": 90075, "total_steps": 204665, "loss": 0.0779, "lr": 1.3742730778816626e-06, "epoch": 2.2005472357266753, "percentage": 44.01, "elapsed_time": "1:56:06", "remaining_time": "2:27:42", "throughput": 8717.09, "total_tokens": 60728168} +{"current_steps": 90080, "total_steps": 204665, "loss": 0.0005, "lr": 1.3741939970223388e-06, "epoch": 2.2006693865585225, "percentage": 44.01, "elapsed_time": "1:56:06", "remaining_time": "2:27:42", "throughput": 8717.2, "total_tokens": 60732072} +{"current_steps": 90085, "total_steps": 204665, "loss": 0.0424, "lr": 1.3741149134417756e-06, "epoch": 2.2007915373903697, "percentage": 44.02, "elapsed_time": "1:56:07", "remaining_time": "2:27:41", "throughput": 8717.25, "total_tokens": 60735464} +{"current_steps": 90090, "total_steps": 204665, "loss": 0.0006, "lr": 1.3740358271405481e-06, "epoch": 2.200913688222217, "percentage": 44.02, "elapsed_time": "1:56:07", "remaining_time": "2:27:41", "throughput": 8717.28, "total_tokens": 60738664} +{"current_steps": 90095, "total_steps": 204665, "loss": 0.0478, "lr": 1.3739567381192316e-06, "epoch": 2.201035839054064, "percentage": 44.02, "elapsed_time": "1:56:07", "remaining_time": "2:27:40", "throughput": 8717.33, "total_tokens": 60742056} +{"current_steps": 90100, "total_steps": 204665, "loss": 0.0002, "lr": 1.373877646378401e-06, "epoch": 2.2011579898859113, "percentage": 44.02, "elapsed_time": "1:56:08", "remaining_time": "2:27:40", "throughput": 8717.33, "total_tokens": 60745000} +{"current_steps": 90105, "total_steps": 204665, "loss": 0.0397, "lr": 1.3737985519186316e-06, "epoch": 2.2012801407177585, "percentage": 44.03, "elapsed_time": "1:56:08", "remaining_time": "2:27:39", "throughput": 8717.37, "total_tokens": 60748328} +{"current_steps": 90110, "total_steps": 204665, "loss": 0.0622, "lr": 1.3737194547404986e-06, "epoch": 2.2014022915496056, "percentage": 44.03, "elapsed_time": "1:56:08", "remaining_time": "2:27:39", "throughput": 8717.4, "total_tokens": 60751528} +{"current_steps": 90115, "total_steps": 204665, "loss": 0.0006, "lr": 1.373640354844577e-06, "epoch": 2.2015244423814524, "percentage": 44.03, "elapsed_time": "1:56:09", "remaining_time": "2:27:39", "throughput": 8717.4, "total_tokens": 60754472} +{"current_steps": 90120, "total_steps": 204665, "loss": 0.0004, "lr": 1.3735612522314423e-06, "epoch": 2.2016465932132996, "percentage": 44.03, "elapsed_time": "1:56:09", "remaining_time": "2:27:38", "throughput": 8717.43, "total_tokens": 60757672} +{"current_steps": 90125, "total_steps": 204665, "loss": 0.0477, "lr": 1.37348214690167e-06, "epoch": 2.2017687440451468, "percentage": 44.04, "elapsed_time": "1:56:10", "remaining_time": "2:27:38", "throughput": 8717.45, "total_tokens": 60760808} +{"current_steps": 90130, "total_steps": 204665, "loss": 0.0005, "lr": 1.373403038855835e-06, "epoch": 2.201890894876994, "percentage": 44.04, "elapsed_time": "1:56:10", "remaining_time": "2:27:37", "throughput": 8717.54, "total_tokens": 60764520} +{"current_steps": 90135, "total_steps": 204665, "loss": 0.0005, "lr": 1.3733239280945124e-06, "epoch": 2.202013045708841, "percentage": 44.04, "elapsed_time": "1:56:10", "remaining_time": "2:27:37", "throughput": 8717.62, "total_tokens": 60768168} +{"current_steps": 90140, "total_steps": 204665, "loss": 0.0753, "lr": 1.373244814618278e-06, "epoch": 2.2021351965406883, "percentage": 44.04, "elapsed_time": "1:56:11", "remaining_time": "2:27:36", "throughput": 8717.64, "total_tokens": 60771304} +{"current_steps": 90145, "total_steps": 204665, "loss": 0.0331, "lr": 1.3731656984277069e-06, "epoch": 2.2022573473725355, "percentage": 44.05, "elapsed_time": "1:56:11", "remaining_time": "2:27:36", "throughput": 8717.69, "total_tokens": 60774632} +{"current_steps": 90150, "total_steps": 204665, "loss": 0.0361, "lr": 1.3730865795233744e-06, "epoch": 2.2023794982043827, "percentage": 44.05, "elapsed_time": "1:56:11", "remaining_time": "2:27:36", "throughput": 8717.72, "total_tokens": 60777896} +{"current_steps": 90155, "total_steps": 204665, "loss": 0.0006, "lr": 1.373007457905856e-06, "epoch": 2.20250164903623, "percentage": 44.05, "elapsed_time": "1:56:12", "remaining_time": "2:27:35", "throughput": 8717.75, "total_tokens": 60781096} +{"current_steps": 90160, "total_steps": 204665, "loss": 0.0003, "lr": 1.3729283335757272e-06, "epoch": 2.202623799868077, "percentage": 44.05, "elapsed_time": "1:56:12", "remaining_time": "2:27:35", "throughput": 8717.82, "total_tokens": 60784616} +{"current_steps": 90165, "total_steps": 204665, "loss": 0.0003, "lr": 1.372849206533563e-06, "epoch": 2.2027459506999243, "percentage": 44.05, "elapsed_time": "1:56:12", "remaining_time": "2:27:34", "throughput": 8717.85, "total_tokens": 60787816} +{"current_steps": 90170, "total_steps": 204665, "loss": 0.0515, "lr": 1.3727700767799393e-06, "epoch": 2.2028681015317715, "percentage": 44.06, "elapsed_time": "1:56:13", "remaining_time": "2:27:34", "throughput": 8717.85, "total_tokens": 60790760} +{"current_steps": 90175, "total_steps": 204665, "loss": 0.0002, "lr": 1.372690944315431e-06, "epoch": 2.2029902523636187, "percentage": 44.06, "elapsed_time": "1:56:13", "remaining_time": "2:27:33", "throughput": 8717.85, "total_tokens": 60793768} +{"current_steps": 90180, "total_steps": 204665, "loss": 0.0848, "lr": 1.372611809140614e-06, "epoch": 2.203112403195466, "percentage": 44.06, "elapsed_time": "1:56:13", "remaining_time": "2:27:33", "throughput": 8717.89, "total_tokens": 60797032} +{"current_steps": 90185, "total_steps": 204665, "loss": 0.0307, "lr": 1.3725326712560638e-06, "epoch": 2.203234554027313, "percentage": 44.06, "elapsed_time": "1:56:14", "remaining_time": "2:27:32", "throughput": 8717.92, "total_tokens": 60800232} +{"current_steps": 90190, "total_steps": 204665, "loss": 0.0562, "lr": 1.3724535306623558e-06, "epoch": 2.2033567048591602, "percentage": 44.07, "elapsed_time": "1:56:14", "remaining_time": "2:27:32", "throughput": 8718.01, "total_tokens": 60803880} +{"current_steps": 90195, "total_steps": 204665, "loss": 0.0002, "lr": 1.3723743873600658e-06, "epoch": 2.2034788556910074, "percentage": 44.07, "elapsed_time": "1:56:14", "remaining_time": "2:27:32", "throughput": 8718.07, "total_tokens": 60807400} +{"current_steps": 90200, "total_steps": 204665, "loss": 0.0491, "lr": 1.3722952413497689e-06, "epoch": 2.2036010065228546, "percentage": 44.07, "elapsed_time": "1:56:15", "remaining_time": "2:27:31", "throughput": 8718.08, "total_tokens": 60810472} +{"current_steps": 90205, "total_steps": 204665, "loss": 0.0502, "lr": 1.372216092632041e-06, "epoch": 2.203723157354702, "percentage": 44.07, "elapsed_time": "1:56:15", "remaining_time": "2:27:31", "throughput": 8718.17, "total_tokens": 60814120} +{"current_steps": 90210, "total_steps": 204665, "loss": 0.0286, "lr": 1.372136941207457e-06, "epoch": 2.2038453081865486, "percentage": 44.08, "elapsed_time": "1:56:15", "remaining_time": "2:27:30", "throughput": 8718.22, "total_tokens": 60817512} +{"current_steps": 90215, "total_steps": 204665, "loss": 0.0015, "lr": 1.3720577870765934e-06, "epoch": 2.2039674590183957, "percentage": 44.08, "elapsed_time": "1:56:16", "remaining_time": "2:27:30", "throughput": 8718.3, "total_tokens": 60821160} +{"current_steps": 90220, "total_steps": 204665, "loss": 0.0001, "lr": 1.3719786302400258e-06, "epoch": 2.204089609850243, "percentage": 44.08, "elapsed_time": "1:56:16", "remaining_time": "2:27:29", "throughput": 8718.38, "total_tokens": 60824808} +{"current_steps": 90225, "total_steps": 204665, "loss": 0.0117, "lr": 1.3718994706983293e-06, "epoch": 2.20421176068209, "percentage": 44.08, "elapsed_time": "1:56:16", "remaining_time": "2:27:29", "throughput": 8718.43, "total_tokens": 60828136} +{"current_steps": 90230, "total_steps": 204665, "loss": 0.0005, "lr": 1.3718203084520798e-06, "epoch": 2.2043339115139373, "percentage": 44.09, "elapsed_time": "1:56:17", "remaining_time": "2:27:29", "throughput": 8718.44, "total_tokens": 60831208} +{"current_steps": 90235, "total_steps": 204665, "loss": 0.0699, "lr": 1.371741143501853e-06, "epoch": 2.2044560623457845, "percentage": 44.09, "elapsed_time": "1:56:17", "remaining_time": "2:27:28", "throughput": 8718.47, "total_tokens": 60834344} +{"current_steps": 90240, "total_steps": 204665, "loss": 0.0004, "lr": 1.3716619758482249e-06, "epoch": 2.2045782131776317, "percentage": 44.09, "elapsed_time": "1:56:17", "remaining_time": "2:27:28", "throughput": 8718.52, "total_tokens": 60837672} +{"current_steps": 90245, "total_steps": 204665, "loss": 0.0489, "lr": 1.3715828054917705e-06, "epoch": 2.204700364009479, "percentage": 44.09, "elapsed_time": "1:56:18", "remaining_time": "2:27:27", "throughput": 8718.63, "total_tokens": 60841576} +{"current_steps": 90250, "total_steps": 204665, "loss": 0.044, "lr": 1.3715036324330665e-06, "epoch": 2.204822514841326, "percentage": 44.1, "elapsed_time": "1:56:18", "remaining_time": "2:27:27", "throughput": 8718.68, "total_tokens": 60845032} +{"current_steps": 90255, "total_steps": 204665, "loss": 0.0004, "lr": 1.3714244566726878e-06, "epoch": 2.2049446656731733, "percentage": 44.1, "elapsed_time": "1:56:19", "remaining_time": "2:27:26", "throughput": 8718.8, "total_tokens": 60848936} +{"current_steps": 90260, "total_steps": 204665, "loss": 0.0502, "lr": 1.3713452782112107e-06, "epoch": 2.2050668165050205, "percentage": 44.1, "elapsed_time": "1:56:19", "remaining_time": "2:27:26", "throughput": 8718.87, "total_tokens": 60852520} +{"current_steps": 90265, "total_steps": 204665, "loss": 0.0002, "lr": 1.3712660970492108e-06, "epoch": 2.2051889673368676, "percentage": 44.1, "elapsed_time": "1:56:19", "remaining_time": "2:27:25", "throughput": 8718.93, "total_tokens": 60855976} +{"current_steps": 90270, "total_steps": 204665, "loss": 0.0004, "lr": 1.371186913187264e-06, "epoch": 2.205311118168715, "percentage": 44.11, "elapsed_time": "1:56:20", "remaining_time": "2:27:25", "throughput": 8718.97, "total_tokens": 60859240} +{"current_steps": 90275, "total_steps": 204665, "loss": 0.0915, "lr": 1.3711077266259459e-06, "epoch": 2.205433269000562, "percentage": 44.11, "elapsed_time": "1:56:20", "remaining_time": "2:27:25", "throughput": 8718.98, "total_tokens": 60862312} +{"current_steps": 90280, "total_steps": 204665, "loss": 0.1077, "lr": 1.3710285373658328e-06, "epoch": 2.205555419832409, "percentage": 44.11, "elapsed_time": "1:56:20", "remaining_time": "2:27:24", "throughput": 8719.03, "total_tokens": 60865640} +{"current_steps": 90285, "total_steps": 204665, "loss": 0.0458, "lr": 1.3709493454075004e-06, "epoch": 2.2056775706642564, "percentage": 44.11, "elapsed_time": "1:56:21", "remaining_time": "2:27:24", "throughput": 8719.09, "total_tokens": 60869160} +{"current_steps": 90290, "total_steps": 204665, "loss": 0.0004, "lr": 1.3708701507515245e-06, "epoch": 2.2057997214961036, "percentage": 44.12, "elapsed_time": "1:56:21", "remaining_time": "2:27:23", "throughput": 8719.14, "total_tokens": 60872488} +{"current_steps": 90295, "total_steps": 204665, "loss": 0.0002, "lr": 1.3707909533984811e-06, "epoch": 2.2059218723279503, "percentage": 44.12, "elapsed_time": "1:56:21", "remaining_time": "2:27:23", "throughput": 8719.14, "total_tokens": 60875496} +{"current_steps": 90300, "total_steps": 204665, "loss": 0.0952, "lr": 1.3707117533489463e-06, "epoch": 2.2060440231597975, "percentage": 44.12, "elapsed_time": "1:56:22", "remaining_time": "2:27:22", "throughput": 8719.16, "total_tokens": 60878632} +{"current_steps": 90305, "total_steps": 204665, "loss": 0.0002, "lr": 1.370632550603496e-06, "epoch": 2.2061661739916447, "percentage": 44.12, "elapsed_time": "1:56:22", "remaining_time": "2:27:22", "throughput": 8719.18, "total_tokens": 60881704} +{"current_steps": 90310, "total_steps": 204665, "loss": 0.0509, "lr": 1.3705533451627058e-06, "epoch": 2.206288324823492, "percentage": 44.13, "elapsed_time": "1:56:22", "remaining_time": "2:27:22", "throughput": 8719.22, "total_tokens": 60884968} +{"current_steps": 90315, "total_steps": 204665, "loss": 0.0392, "lr": 1.3704741370271522e-06, "epoch": 2.206410475655339, "percentage": 44.13, "elapsed_time": "1:56:23", "remaining_time": "2:27:21", "throughput": 8719.28, "total_tokens": 60888488} +{"current_steps": 90320, "total_steps": 204665, "loss": 0.0003, "lr": 1.370394926197411e-06, "epoch": 2.2065326264871863, "percentage": 44.13, "elapsed_time": "1:56:23", "remaining_time": "2:27:21", "throughput": 8719.32, "total_tokens": 60891816} +{"current_steps": 90325, "total_steps": 204665, "loss": 0.0002, "lr": 1.3703157126740583e-06, "epoch": 2.2066547773190335, "percentage": 44.13, "elapsed_time": "1:56:23", "remaining_time": "2:27:20", "throughput": 8719.38, "total_tokens": 60895272} +{"current_steps": 90330, "total_steps": 204665, "loss": 0.0567, "lr": 1.37023649645767e-06, "epoch": 2.2067769281508807, "percentage": 44.14, "elapsed_time": "1:56:24", "remaining_time": "2:27:20", "throughput": 8719.44, "total_tokens": 60898728} +{"current_steps": 90335, "total_steps": 204665, "loss": 0.0215, "lr": 1.3701572775488225e-06, "epoch": 2.206899078982728, "percentage": 44.14, "elapsed_time": "1:56:24", "remaining_time": "2:27:19", "throughput": 8719.49, "total_tokens": 60902120} +{"current_steps": 90340, "total_steps": 204665, "loss": 0.1538, "lr": 1.3700780559480913e-06, "epoch": 2.207021229814575, "percentage": 44.14, "elapsed_time": "1:56:24", "remaining_time": "2:27:19", "throughput": 8719.56, "total_tokens": 60905640} +{"current_steps": 90345, "total_steps": 204665, "loss": 0.0001, "lr": 1.3699988316560536e-06, "epoch": 2.2071433806464222, "percentage": 44.14, "elapsed_time": "1:56:25", "remaining_time": "2:27:19", "throughput": 8719.64, "total_tokens": 60909288} +{"current_steps": 90350, "total_steps": 204665, "loss": 0.0392, "lr": 1.3699196046732844e-06, "epoch": 2.2072655314782694, "percentage": 44.15, "elapsed_time": "1:56:25", "remaining_time": "2:27:18", "throughput": 8719.69, "total_tokens": 60912680} +{"current_steps": 90355, "total_steps": 204665, "loss": 0.0567, "lr": 1.3698403750003604e-06, "epoch": 2.2073876823101166, "percentage": 44.15, "elapsed_time": "1:56:26", "remaining_time": "2:27:18", "throughput": 8719.76, "total_tokens": 60916264} +{"current_steps": 90360, "total_steps": 204665, "loss": 0.064, "lr": 1.3697611426378582e-06, "epoch": 2.207509833141964, "percentage": 44.15, "elapsed_time": "1:56:26", "remaining_time": "2:27:17", "throughput": 8719.79, "total_tokens": 60919400} +{"current_steps": 90365, "total_steps": 204665, "loss": 0.0507, "lr": 1.3696819075863527e-06, "epoch": 2.207631983973811, "percentage": 44.15, "elapsed_time": "1:56:26", "remaining_time": "2:27:17", "throughput": 8719.79, "total_tokens": 60922408} +{"current_steps": 90370, "total_steps": 204665, "loss": 0.0576, "lr": 1.3696026698464216e-06, "epoch": 2.207754134805658, "percentage": 44.16, "elapsed_time": "1:56:27", "remaining_time": "2:27:16", "throughput": 8719.82, "total_tokens": 60925608} +{"current_steps": 90375, "total_steps": 204665, "loss": 0.0005, "lr": 1.3695234294186403e-06, "epoch": 2.2078762856375054, "percentage": 44.16, "elapsed_time": "1:56:27", "remaining_time": "2:27:16", "throughput": 8719.83, "total_tokens": 60928616} +{"current_steps": 90380, "total_steps": 204665, "loss": 0.039, "lr": 1.369444186303585e-06, "epoch": 2.207998436469352, "percentage": 44.16, "elapsed_time": "1:56:27", "remaining_time": "2:27:15", "throughput": 8719.89, "total_tokens": 60932136} +{"current_steps": 90385, "total_steps": 204665, "loss": 0.0706, "lr": 1.3693649405018323e-06, "epoch": 2.2081205873011998, "percentage": 44.16, "elapsed_time": "1:56:28", "remaining_time": "2:27:15", "throughput": 8719.96, "total_tokens": 60935656} +{"current_steps": 90390, "total_steps": 204665, "loss": 0.0013, "lr": 1.3692856920139586e-06, "epoch": 2.2082427381330465, "percentage": 44.16, "elapsed_time": "1:56:28", "remaining_time": "2:27:15", "throughput": 8719.97, "total_tokens": 60938664} +{"current_steps": 90395, "total_steps": 204665, "loss": 0.1045, "lr": 1.36920644084054e-06, "epoch": 2.2083648889648937, "percentage": 44.17, "elapsed_time": "1:56:28", "remaining_time": "2:27:14", "throughput": 8720.03, "total_tokens": 60942120} +{"current_steps": 90400, "total_steps": 204665, "loss": 0.0011, "lr": 1.3691271869821526e-06, "epoch": 2.208487039796741, "percentage": 44.17, "elapsed_time": "1:56:29", "remaining_time": "2:27:14", "throughput": 8720.09, "total_tokens": 60945640} +{"current_steps": 90405, "total_steps": 204665, "loss": 0.0944, "lr": 1.369047930439373e-06, "epoch": 2.208609190628588, "percentage": 44.17, "elapsed_time": "1:56:29", "remaining_time": "2:27:13", "throughput": 8720.15, "total_tokens": 60949096} +{"current_steps": 90410, "total_steps": 204665, "loss": 0.0002, "lr": 1.368968671212778e-06, "epoch": 2.2087313414604353, "percentage": 44.17, "elapsed_time": "1:56:29", "remaining_time": "2:27:13", "throughput": 8720.24, "total_tokens": 60952808} +{"current_steps": 90415, "total_steps": 204665, "loss": 0.0506, "lr": 1.3688894093029432e-06, "epoch": 2.2088534922922824, "percentage": 44.18, "elapsed_time": "1:56:30", "remaining_time": "2:27:12", "throughput": 8720.25, "total_tokens": 60955880} +{"current_steps": 90420, "total_steps": 204665, "loss": 0.0004, "lr": 1.3688101447104456e-06, "epoch": 2.2089756431241296, "percentage": 44.18, "elapsed_time": "1:56:30", "remaining_time": "2:27:12", "throughput": 8720.29, "total_tokens": 60959208} +{"current_steps": 90425, "total_steps": 204665, "loss": 0.0005, "lr": 1.3687308774358616e-06, "epoch": 2.209097793955977, "percentage": 44.18, "elapsed_time": "1:56:30", "remaining_time": "2:27:12", "throughput": 8720.34, "total_tokens": 60962600} +{"current_steps": 90430, "total_steps": 204665, "loss": 0.0002, "lr": 1.368651607479767e-06, "epoch": 2.209219944787824, "percentage": 44.18, "elapsed_time": "1:56:31", "remaining_time": "2:27:11", "throughput": 8720.43, "total_tokens": 60966248} +{"current_steps": 90435, "total_steps": 204665, "loss": 0.0005, "lr": 1.3685723348427388e-06, "epoch": 2.209342095619671, "percentage": 44.19, "elapsed_time": "1:56:31", "remaining_time": "2:27:11", "throughput": 8720.46, "total_tokens": 60969448} +{"current_steps": 90440, "total_steps": 204665, "loss": 0.0094, "lr": 1.3684930595253538e-06, "epoch": 2.2094642464515184, "percentage": 44.19, "elapsed_time": "1:56:31", "remaining_time": "2:27:10", "throughput": 8720.5, "total_tokens": 60972776} +{"current_steps": 90445, "total_steps": 204665, "loss": 0.0001, "lr": 1.3684137815281882e-06, "epoch": 2.2095863972833656, "percentage": 44.19, "elapsed_time": "1:56:32", "remaining_time": "2:27:10", "throughput": 8720.54, "total_tokens": 60976040} +{"current_steps": 90450, "total_steps": 204665, "loss": 0.0887, "lr": 1.3683345008518181e-06, "epoch": 2.209708548115213, "percentage": 44.19, "elapsed_time": "1:56:32", "remaining_time": "2:27:09", "throughput": 8720.57, "total_tokens": 60979304} +{"current_steps": 90455, "total_steps": 204665, "loss": 0.074, "lr": 1.3682552174968208e-06, "epoch": 2.20983069894706, "percentage": 44.2, "elapsed_time": "1:56:32", "remaining_time": "2:27:09", "throughput": 8720.62, "total_tokens": 60982632} +{"current_steps": 90460, "total_steps": 204665, "loss": 0.0298, "lr": 1.3681759314637723e-06, "epoch": 2.209952849778907, "percentage": 44.2, "elapsed_time": "1:56:33", "remaining_time": "2:27:08", "throughput": 8720.71, "total_tokens": 60986408} +{"current_steps": 90465, "total_steps": 204665, "loss": 0.0002, "lr": 1.3680966427532494e-06, "epoch": 2.2100750006107543, "percentage": 44.2, "elapsed_time": "1:56:33", "remaining_time": "2:27:08", "throughput": 8720.79, "total_tokens": 60989992} +{"current_steps": 90470, "total_steps": 204665, "loss": 0.0217, "lr": 1.3680173513658289e-06, "epoch": 2.2101971514426015, "percentage": 44.2, "elapsed_time": "1:56:33", "remaining_time": "2:27:08", "throughput": 8720.83, "total_tokens": 60993320} +{"current_steps": 90475, "total_steps": 204665, "loss": 0.0378, "lr": 1.367938057302087e-06, "epoch": 2.2103193022744483, "percentage": 44.21, "elapsed_time": "1:56:34", "remaining_time": "2:27:07", "throughput": 8720.88, "total_tokens": 60996712} +{"current_steps": 90480, "total_steps": 204665, "loss": 0.0004, "lr": 1.3678587605626007e-06, "epoch": 2.2104414531062955, "percentage": 44.21, "elapsed_time": "1:56:34", "remaining_time": "2:27:07", "throughput": 8720.94, "total_tokens": 61000168} +{"current_steps": 90485, "total_steps": 204665, "loss": 0.0011, "lr": 1.3677794611479466e-06, "epoch": 2.2105636039381427, "percentage": 44.21, "elapsed_time": "1:56:35", "remaining_time": "2:27:06", "throughput": 8720.96, "total_tokens": 61003304} +{"current_steps": 90490, "total_steps": 204665, "loss": 0.0577, "lr": 1.367700159058701e-06, "epoch": 2.21068575476999, "percentage": 44.21, "elapsed_time": "1:56:35", "remaining_time": "2:27:06", "throughput": 8721.02, "total_tokens": 61006824} +{"current_steps": 90495, "total_steps": 204665, "loss": 0.0446, "lr": 1.3676208542954414e-06, "epoch": 2.210807905601837, "percentage": 44.22, "elapsed_time": "1:56:35", "remaining_time": "2:27:05", "throughput": 8721.06, "total_tokens": 61010152} +{"current_steps": 90500, "total_steps": 204665, "loss": 0.1367, "lr": 1.3675415468587436e-06, "epoch": 2.2109300564336842, "percentage": 44.22, "elapsed_time": "1:56:36", "remaining_time": "2:27:05", "throughput": 8721.12, "total_tokens": 61013608} +{"current_steps": 90505, "total_steps": 204665, "loss": 0.0008, "lr": 1.3674622367491852e-06, "epoch": 2.2110522072655314, "percentage": 44.22, "elapsed_time": "1:56:36", "remaining_time": "2:27:05", "throughput": 8721.15, "total_tokens": 61016872} +{"current_steps": 90510, "total_steps": 204665, "loss": 0.0004, "lr": 1.3673829239673424e-06, "epoch": 2.2111743580973786, "percentage": 44.22, "elapsed_time": "1:56:36", "remaining_time": "2:27:04", "throughput": 8721.2, "total_tokens": 61020200} +{"current_steps": 90515, "total_steps": 204665, "loss": 0.0008, "lr": 1.3673036085137926e-06, "epoch": 2.211296508929226, "percentage": 44.23, "elapsed_time": "1:56:37", "remaining_time": "2:27:04", "throughput": 8721.23, "total_tokens": 61023464} +{"current_steps": 90520, "total_steps": 204665, "loss": 0.0002, "lr": 1.3672242903891117e-06, "epoch": 2.211418659761073, "percentage": 44.23, "elapsed_time": "1:56:37", "remaining_time": "2:27:03", "throughput": 8721.32, "total_tokens": 61027176} +{"current_steps": 90525, "total_steps": 204665, "loss": 0.0002, "lr": 1.3671449695938768e-06, "epoch": 2.21154081059292, "percentage": 44.23, "elapsed_time": "1:56:37", "remaining_time": "2:27:03", "throughput": 8721.36, "total_tokens": 61030440} +{"current_steps": 90530, "total_steps": 204665, "loss": 0.0004, "lr": 1.3670656461286655e-06, "epoch": 2.2116629614247674, "percentage": 44.23, "elapsed_time": "1:56:38", "remaining_time": "2:27:02", "throughput": 8721.42, "total_tokens": 61033896} +{"current_steps": 90535, "total_steps": 204665, "loss": 0.011, "lr": 1.3669863199940538e-06, "epoch": 2.2117851122566146, "percentage": 44.24, "elapsed_time": "1:56:38", "remaining_time": "2:27:02", "throughput": 8721.45, "total_tokens": 61037096} +{"current_steps": 90540, "total_steps": 204665, "loss": 0.0002, "lr": 1.3669069911906189e-06, "epoch": 2.2119072630884618, "percentage": 44.24, "elapsed_time": "1:56:38", "remaining_time": "2:27:02", "throughput": 8721.52, "total_tokens": 61040680} +{"current_steps": 90545, "total_steps": 204665, "loss": 0.1162, "lr": 1.3668276597189375e-06, "epoch": 2.212029413920309, "percentage": 44.24, "elapsed_time": "1:56:39", "remaining_time": "2:27:01", "throughput": 8721.57, "total_tokens": 61044072} +{"current_steps": 90550, "total_steps": 204665, "loss": 0.0005, "lr": 1.3667483255795868e-06, "epoch": 2.212151564752156, "percentage": 44.24, "elapsed_time": "1:56:39", "remaining_time": "2:27:01", "throughput": 8721.6, "total_tokens": 61047208} +{"current_steps": 90555, "total_steps": 204665, "loss": 0.0625, "lr": 1.3666689887731434e-06, "epoch": 2.2122737155840033, "percentage": 44.25, "elapsed_time": "1:56:39", "remaining_time": "2:27:00", "throughput": 8721.64, "total_tokens": 61050536} +{"current_steps": 90560, "total_steps": 204665, "loss": 0.0355, "lr": 1.366589649300185e-06, "epoch": 2.21239586641585, "percentage": 44.25, "elapsed_time": "1:56:40", "remaining_time": "2:27:00", "throughput": 8721.7, "total_tokens": 61053992} +{"current_steps": 90565, "total_steps": 204665, "loss": 0.1054, "lr": 1.366510307161288e-06, "epoch": 2.2125180172476973, "percentage": 44.25, "elapsed_time": "1:56:40", "remaining_time": "2:26:59", "throughput": 8721.76, "total_tokens": 61057448} +{"current_steps": 90570, "total_steps": 204665, "loss": 0.0379, "lr": 1.3664309623570293e-06, "epoch": 2.2126401680795444, "percentage": 44.25, "elapsed_time": "1:56:40", "remaining_time": "2:26:59", "throughput": 8721.76, "total_tokens": 61060456} +{"current_steps": 90575, "total_steps": 204665, "loss": 0.1337, "lr": 1.3663516148879861e-06, "epoch": 2.2127623189113916, "percentage": 44.26, "elapsed_time": "1:56:41", "remaining_time": "2:26:58", "throughput": 8721.85, "total_tokens": 61064168} +{"current_steps": 90580, "total_steps": 204665, "loss": 0.1482, "lr": 1.3662722647547355e-06, "epoch": 2.212884469743239, "percentage": 44.26, "elapsed_time": "1:56:41", "remaining_time": "2:26:58", "throughput": 8721.92, "total_tokens": 61067688} +{"current_steps": 90585, "total_steps": 204665, "loss": 0.0408, "lr": 1.366192911957854e-06, "epoch": 2.213006620575086, "percentage": 44.26, "elapsed_time": "1:56:41", "remaining_time": "2:26:58", "throughput": 8721.97, "total_tokens": 61071016} +{"current_steps": 90590, "total_steps": 204665, "loss": 0.0242, "lr": 1.3661135564979198e-06, "epoch": 2.213128771406933, "percentage": 44.26, "elapsed_time": "1:56:42", "remaining_time": "2:26:57", "throughput": 8721.97, "total_tokens": 61074024} +{"current_steps": 90595, "total_steps": 204665, "loss": 0.0005, "lr": 1.366034198375509e-06, "epoch": 2.2132509222387804, "percentage": 44.27, "elapsed_time": "1:56:42", "remaining_time": "2:26:57", "throughput": 8722.0, "total_tokens": 61077160} +{"current_steps": 90600, "total_steps": 204665, "loss": 0.0005, "lr": 1.3659548375911992e-06, "epoch": 2.2133730730706276, "percentage": 44.27, "elapsed_time": "1:56:42", "remaining_time": "2:26:56", "throughput": 8722.01, "total_tokens": 61080232} +{"current_steps": 90605, "total_steps": 204665, "loss": 0.1485, "lr": 1.3658754741455674e-06, "epoch": 2.2134952239024748, "percentage": 44.27, "elapsed_time": "1:56:43", "remaining_time": "2:26:56", "throughput": 8722.17, "total_tokens": 61084584} +{"current_steps": 90610, "total_steps": 204665, "loss": 0.0731, "lr": 1.3657961080391907e-06, "epoch": 2.213617374734322, "percentage": 44.27, "elapsed_time": "1:56:43", "remaining_time": "2:26:55", "throughput": 8722.23, "total_tokens": 61088040} +{"current_steps": 90615, "total_steps": 204665, "loss": 0.0745, "lr": 1.3657167392726463e-06, "epoch": 2.213739525566169, "percentage": 44.27, "elapsed_time": "1:56:44", "remaining_time": "2:26:55", "throughput": 8722.27, "total_tokens": 61091304} +{"current_steps": 90620, "total_steps": 204665, "loss": 0.0939, "lr": 1.3656373678465114e-06, "epoch": 2.2138616763980163, "percentage": 44.28, "elapsed_time": "1:56:44", "remaining_time": "2:26:55", "throughput": 8722.36, "total_tokens": 61095016} +{"current_steps": 90625, "total_steps": 204665, "loss": 0.0014, "lr": 1.3655579937613633e-06, "epoch": 2.2139838272298635, "percentage": 44.28, "elapsed_time": "1:56:44", "remaining_time": "2:26:54", "throughput": 8722.39, "total_tokens": 61098280} +{"current_steps": 90630, "total_steps": 204665, "loss": 0.0004, "lr": 1.365478617017779e-06, "epoch": 2.2141059780617107, "percentage": 44.28, "elapsed_time": "1:56:45", "remaining_time": "2:26:54", "throughput": 8722.44, "total_tokens": 61101608} +{"current_steps": 90635, "total_steps": 204665, "loss": 0.0006, "lr": 1.3653992376163359e-06, "epoch": 2.214228128893558, "percentage": 44.28, "elapsed_time": "1:56:45", "remaining_time": "2:26:53", "throughput": 8722.47, "total_tokens": 61104872} +{"current_steps": 90640, "total_steps": 204665, "loss": 0.1049, "lr": 1.3653198555576113e-06, "epoch": 2.214350279725405, "percentage": 44.29, "elapsed_time": "1:56:45", "remaining_time": "2:26:53", "throughput": 8722.53, "total_tokens": 61108328} +{"current_steps": 90645, "total_steps": 204665, "loss": 0.0003, "lr": 1.3652404708421823e-06, "epoch": 2.2144724305572523, "percentage": 44.29, "elapsed_time": "1:56:46", "remaining_time": "2:26:52", "throughput": 8722.54, "total_tokens": 61111336} +{"current_steps": 90650, "total_steps": 204665, "loss": 0.0296, "lr": 1.3651610834706266e-06, "epoch": 2.2145945813890995, "percentage": 44.29, "elapsed_time": "1:56:46", "remaining_time": "2:26:52", "throughput": 8722.56, "total_tokens": 61114408} +{"current_steps": 90655, "total_steps": 204665, "loss": 0.0348, "lr": 1.3650816934435211e-06, "epoch": 2.2147167322209462, "percentage": 44.29, "elapsed_time": "1:56:46", "remaining_time": "2:26:51", "throughput": 8722.64, "total_tokens": 61118120} +{"current_steps": 90660, "total_steps": 204665, "loss": 0.0594, "lr": 1.3650023007614436e-06, "epoch": 2.2148388830527934, "percentage": 44.3, "elapsed_time": "1:56:47", "remaining_time": "2:26:51", "throughput": 8722.68, "total_tokens": 61121448} +{"current_steps": 90665, "total_steps": 204665, "loss": 0.0411, "lr": 1.3649229054249709e-06, "epoch": 2.2149610338846406, "percentage": 44.3, "elapsed_time": "1:56:47", "remaining_time": "2:26:51", "throughput": 8722.77, "total_tokens": 61125160} +{"current_steps": 90670, "total_steps": 204665, "loss": 0.0007, "lr": 1.3648435074346812e-06, "epoch": 2.215083184716488, "percentage": 44.3, "elapsed_time": "1:56:47", "remaining_time": "2:26:50", "throughput": 8722.79, "total_tokens": 61128296} +{"current_steps": 90675, "total_steps": 204665, "loss": 0.04, "lr": 1.364764106791151e-06, "epoch": 2.215205335548335, "percentage": 44.3, "elapsed_time": "1:56:48", "remaining_time": "2:26:50", "throughput": 8722.82, "total_tokens": 61131560} +{"current_steps": 90680, "total_steps": 204665, "loss": 0.0004, "lr": 1.3646847034949577e-06, "epoch": 2.215327486380182, "percentage": 44.31, "elapsed_time": "1:56:48", "remaining_time": "2:26:49", "throughput": 8722.84, "total_tokens": 61134632} +{"current_steps": 90685, "total_steps": 204665, "loss": 0.0002, "lr": 1.3646052975466798e-06, "epoch": 2.2154496372120294, "percentage": 44.31, "elapsed_time": "1:56:48", "remaining_time": "2:26:49", "throughput": 8722.9, "total_tokens": 61138152} +{"current_steps": 90690, "total_steps": 204665, "loss": 0.0352, "lr": 1.3645258889468938e-06, "epoch": 2.2155717880438766, "percentage": 44.31, "elapsed_time": "1:56:49", "remaining_time": "2:26:48", "throughput": 8722.95, "total_tokens": 61141480} +{"current_steps": 90695, "total_steps": 204665, "loss": 0.0001, "lr": 1.3644464776961778e-06, "epoch": 2.2156939388757237, "percentage": 44.31, "elapsed_time": "1:56:49", "remaining_time": "2:26:48", "throughput": 8723.04, "total_tokens": 61145320} +{"current_steps": 90700, "total_steps": 204665, "loss": 0.0874, "lr": 1.3643670637951086e-06, "epoch": 2.215816089707571, "percentage": 44.32, "elapsed_time": "1:56:49", "remaining_time": "2:26:48", "throughput": 8723.07, "total_tokens": 61148520} +{"current_steps": 90705, "total_steps": 204665, "loss": 0.1244, "lr": 1.3642876472442642e-06, "epoch": 2.215938240539418, "percentage": 44.32, "elapsed_time": "1:56:50", "remaining_time": "2:26:47", "throughput": 8723.08, "total_tokens": 61151528} +{"current_steps": 90710, "total_steps": 204665, "loss": 0.0002, "lr": 1.3642082280442219e-06, "epoch": 2.2160603913712653, "percentage": 44.32, "elapsed_time": "1:56:50", "remaining_time": "2:26:47", "throughput": 8723.15, "total_tokens": 61155112} +{"current_steps": 90715, "total_steps": 204665, "loss": 0.0001, "lr": 1.3641288061955599e-06, "epoch": 2.2161825422031125, "percentage": 44.32, "elapsed_time": "1:56:51", "remaining_time": "2:26:46", "throughput": 8723.21, "total_tokens": 61158568} +{"current_steps": 90720, "total_steps": 204665, "loss": 0.0565, "lr": 1.364049381698855e-06, "epoch": 2.2163046930349597, "percentage": 44.33, "elapsed_time": "1:56:51", "remaining_time": "2:26:46", "throughput": 8723.27, "total_tokens": 61162024} +{"current_steps": 90725, "total_steps": 204665, "loss": 0.0587, "lr": 1.363969954554685e-06, "epoch": 2.216426843866807, "percentage": 44.33, "elapsed_time": "1:56:51", "remaining_time": "2:26:45", "throughput": 8723.33, "total_tokens": 61165544} +{"current_steps": 90730, "total_steps": 204665, "loss": 0.1062, "lr": 1.3638905247636276e-06, "epoch": 2.216548994698654, "percentage": 44.33, "elapsed_time": "1:56:52", "remaining_time": "2:26:45", "throughput": 8723.38, "total_tokens": 61168936} +{"current_steps": 90735, "total_steps": 204665, "loss": 0.0007, "lr": 1.3638110923262608e-06, "epoch": 2.2166711455305013, "percentage": 44.33, "elapsed_time": "1:56:52", "remaining_time": "2:26:45", "throughput": 8723.4, "total_tokens": 61172072} +{"current_steps": 90740, "total_steps": 204665, "loss": 0.0465, "lr": 1.3637316572431613e-06, "epoch": 2.216793296362348, "percentage": 44.34, "elapsed_time": "1:56:52", "remaining_time": "2:26:44", "throughput": 8723.41, "total_tokens": 61175080} +{"current_steps": 90745, "total_steps": 204665, "loss": 0.0372, "lr": 1.3636522195149077e-06, "epoch": 2.216915447194195, "percentage": 44.34, "elapsed_time": "1:56:53", "remaining_time": "2:26:44", "throughput": 8723.47, "total_tokens": 61178536} +{"current_steps": 90750, "total_steps": 204665, "loss": 0.0348, "lr": 1.363572779142077e-06, "epoch": 2.2170375980260424, "percentage": 44.34, "elapsed_time": "1:56:53", "remaining_time": "2:26:43", "throughput": 8723.5, "total_tokens": 61181800} +{"current_steps": 90755, "total_steps": 204665, "loss": 0.0004, "lr": 1.3634933361252477e-06, "epoch": 2.2171597488578896, "percentage": 44.34, "elapsed_time": "1:56:53", "remaining_time": "2:26:43", "throughput": 8723.56, "total_tokens": 61185192} +{"current_steps": 90760, "total_steps": 204665, "loss": 0.0004, "lr": 1.3634138904649969e-06, "epoch": 2.2172818996897368, "percentage": 44.35, "elapsed_time": "1:56:54", "remaining_time": "2:26:42", "throughput": 8723.73, "total_tokens": 61189672} +{"current_steps": 90765, "total_steps": 204665, "loss": 0.0411, "lr": 1.3633344421619027e-06, "epoch": 2.217404050521584, "percentage": 44.35, "elapsed_time": "1:56:54", "remaining_time": "2:26:42", "throughput": 8723.75, "total_tokens": 61192744} +{"current_steps": 90770, "total_steps": 204665, "loss": 0.0002, "lr": 1.3632549912165425e-06, "epoch": 2.217526201353431, "percentage": 44.35, "elapsed_time": "1:56:54", "remaining_time": "2:26:41", "throughput": 8723.79, "total_tokens": 61196072} +{"current_steps": 90775, "total_steps": 204665, "loss": 0.0159, "lr": 1.3631755376294944e-06, "epoch": 2.2176483521852783, "percentage": 44.35, "elapsed_time": "1:56:55", "remaining_time": "2:26:41", "throughput": 8723.82, "total_tokens": 61199336} +{"current_steps": 90780, "total_steps": 204665, "loss": 0.0001, "lr": 1.363096081401336e-06, "epoch": 2.2177705030171255, "percentage": 44.36, "elapsed_time": "1:56:55", "remaining_time": "2:26:41", "throughput": 8723.87, "total_tokens": 61202664} +{"current_steps": 90785, "total_steps": 204665, "loss": 0.0003, "lr": 1.3630166225326453e-06, "epoch": 2.2178926538489727, "percentage": 44.36, "elapsed_time": "1:56:55", "remaining_time": "2:26:40", "throughput": 8723.92, "total_tokens": 61206056} +{"current_steps": 90790, "total_steps": 204665, "loss": 0.0214, "lr": 1.3629371610240004e-06, "epoch": 2.21801480468082, "percentage": 44.36, "elapsed_time": "1:56:56", "remaining_time": "2:26:40", "throughput": 8723.96, "total_tokens": 61209384} +{"current_steps": 90795, "total_steps": 204665, "loss": 0.0001, "lr": 1.3628576968759784e-06, "epoch": 2.218136955512667, "percentage": 44.36, "elapsed_time": "1:56:56", "remaining_time": "2:26:39", "throughput": 8724.0, "total_tokens": 61212648} +{"current_steps": 90800, "total_steps": 204665, "loss": 0.0001, "lr": 1.3627782300891575e-06, "epoch": 2.2182591063445143, "percentage": 44.37, "elapsed_time": "1:56:56", "remaining_time": "2:26:39", "throughput": 8724.0, "total_tokens": 61215656} +{"current_steps": 90805, "total_steps": 204665, "loss": 0.0282, "lr": 1.362698760664116e-06, "epoch": 2.2183812571763615, "percentage": 44.37, "elapsed_time": "1:56:57", "remaining_time": "2:26:38", "throughput": 8724.05, "total_tokens": 61219048} +{"current_steps": 90810, "total_steps": 204665, "loss": 0.0379, "lr": 1.3626192886014317e-06, "epoch": 2.2185034080082087, "percentage": 44.37, "elapsed_time": "1:56:57", "remaining_time": "2:26:38", "throughput": 8724.08, "total_tokens": 61222312} +{"current_steps": 90815, "total_steps": 204665, "loss": 0.0011, "lr": 1.3625398139016824e-06, "epoch": 2.218625558840056, "percentage": 44.37, "elapsed_time": "1:56:57", "remaining_time": "2:26:38", "throughput": 8724.18, "total_tokens": 61226152} +{"current_steps": 90820, "total_steps": 204665, "loss": 0.0003, "lr": 1.362460336565446e-06, "epoch": 2.218747709671903, "percentage": 44.37, "elapsed_time": "1:56:58", "remaining_time": "2:26:37", "throughput": 8724.24, "total_tokens": 61229608} +{"current_steps": 90825, "total_steps": 204665, "loss": 0.0404, "lr": 1.3623808565933005e-06, "epoch": 2.2188698605037502, "percentage": 44.38, "elapsed_time": "1:56:58", "remaining_time": "2:26:37", "throughput": 8724.29, "total_tokens": 61233000} +{"current_steps": 90830, "total_steps": 204665, "loss": 0.0087, "lr": 1.362301373985824e-06, "epoch": 2.2189920113355974, "percentage": 44.38, "elapsed_time": "1:56:59", "remaining_time": "2:26:36", "throughput": 8724.32, "total_tokens": 61236200} +{"current_steps": 90835, "total_steps": 204665, "loss": 0.0058, "lr": 1.3622218887435942e-06, "epoch": 2.219114162167444, "percentage": 44.38, "elapsed_time": "1:56:59", "remaining_time": "2:26:36", "throughput": 8724.4, "total_tokens": 61239848} +{"current_steps": 90840, "total_steps": 204665, "loss": 0.0002, "lr": 1.3621424008671895e-06, "epoch": 2.2192363129992914, "percentage": 44.38, "elapsed_time": "1:56:59", "remaining_time": "2:26:35", "throughput": 8724.44, "total_tokens": 61243176} +{"current_steps": 90845, "total_steps": 204665, "loss": 0.0003, "lr": 1.362062910357188e-06, "epoch": 2.2193584638311386, "percentage": 44.39, "elapsed_time": "1:57:00", "remaining_time": "2:26:35", "throughput": 8724.52, "total_tokens": 61246824} +{"current_steps": 90850, "total_steps": 204665, "loss": 0.0817, "lr": 1.3619834172141675e-06, "epoch": 2.2194806146629857, "percentage": 44.39, "elapsed_time": "1:57:00", "remaining_time": "2:26:35", "throughput": 8724.56, "total_tokens": 61250152} +{"current_steps": 90855, "total_steps": 204665, "loss": 0.0001, "lr": 1.3619039214387065e-06, "epoch": 2.219602765494833, "percentage": 44.39, "elapsed_time": "1:57:00", "remaining_time": "2:26:34", "throughput": 8724.65, "total_tokens": 61253864} +{"current_steps": 90860, "total_steps": 204665, "loss": 0.0001, "lr": 1.3618244230313826e-06, "epoch": 2.21972491632668, "percentage": 44.39, "elapsed_time": "1:57:01", "remaining_time": "2:26:34", "throughput": 8724.68, "total_tokens": 61257064} +{"current_steps": 90865, "total_steps": 204665, "loss": 0.0004, "lr": 1.361744921992774e-06, "epoch": 2.2198470671585273, "percentage": 44.4, "elapsed_time": "1:57:01", "remaining_time": "2:26:33", "throughput": 8724.69, "total_tokens": 61260200} +{"current_steps": 90870, "total_steps": 204665, "loss": 0.0524, "lr": 1.3616654183234596e-06, "epoch": 2.2199692179903745, "percentage": 44.4, "elapsed_time": "1:57:01", "remaining_time": "2:26:33", "throughput": 8724.73, "total_tokens": 61263464} +{"current_steps": 90875, "total_steps": 204665, "loss": 0.0334, "lr": 1.3615859120240165e-06, "epoch": 2.2200913688222217, "percentage": 44.4, "elapsed_time": "1:57:02", "remaining_time": "2:26:32", "throughput": 8724.77, "total_tokens": 61266792} +{"current_steps": 90880, "total_steps": 204665, "loss": 0.0002, "lr": 1.3615064030950236e-06, "epoch": 2.220213519654069, "percentage": 44.4, "elapsed_time": "1:57:02", "remaining_time": "2:26:32", "throughput": 8724.82, "total_tokens": 61270184} +{"current_steps": 90885, "total_steps": 204665, "loss": 0.049, "lr": 1.361426891537059e-06, "epoch": 2.220335670485916, "percentage": 44.41, "elapsed_time": "1:57:02", "remaining_time": "2:26:32", "throughput": 8724.88, "total_tokens": 61273640} +{"current_steps": 90890, "total_steps": 204665, "loss": 0.0811, "lr": 1.3613473773507007e-06, "epoch": 2.2204578213177633, "percentage": 44.41, "elapsed_time": "1:57:03", "remaining_time": "2:26:31", "throughput": 8724.92, "total_tokens": 61276968} +{"current_steps": 90895, "total_steps": 204665, "loss": 0.0, "lr": 1.3612678605365268e-06, "epoch": 2.2205799721496104, "percentage": 44.41, "elapsed_time": "1:57:03", "remaining_time": "2:26:31", "throughput": 8724.92, "total_tokens": 61279848} +{"current_steps": 90900, "total_steps": 204665, "loss": 0.0002, "lr": 1.3611883410951162e-06, "epoch": 2.2207021229814576, "percentage": 44.41, "elapsed_time": "1:57:03", "remaining_time": "2:26:30", "throughput": 8724.99, "total_tokens": 61283432} +{"current_steps": 90905, "total_steps": 204665, "loss": 0.0475, "lr": 1.3611088190270467e-06, "epoch": 2.220824273813305, "percentage": 44.42, "elapsed_time": "1:57:04", "remaining_time": "2:26:30", "throughput": 8725.07, "total_tokens": 61287080} +{"current_steps": 90910, "total_steps": 204665, "loss": 0.0, "lr": 1.361029294332897e-06, "epoch": 2.220946424645152, "percentage": 44.42, "elapsed_time": "1:57:04", "remaining_time": "2:26:29", "throughput": 8725.09, "total_tokens": 61290216} +{"current_steps": 90915, "total_steps": 204665, "loss": 0.087, "lr": 1.3609497670132448e-06, "epoch": 2.221068575476999, "percentage": 44.42, "elapsed_time": "1:57:04", "remaining_time": "2:26:29", "throughput": 8725.12, "total_tokens": 61293416} +{"current_steps": 90920, "total_steps": 204665, "loss": 0.0002, "lr": 1.3608702370686689e-06, "epoch": 2.221190726308846, "percentage": 44.42, "elapsed_time": "1:57:05", "remaining_time": "2:26:28", "throughput": 8725.12, "total_tokens": 61296424} +{"current_steps": 90925, "total_steps": 204665, "loss": 0.1818, "lr": 1.3607907044997476e-06, "epoch": 2.221312877140693, "percentage": 44.43, "elapsed_time": "1:57:05", "remaining_time": "2:26:28", "throughput": 8725.18, "total_tokens": 61299880} +{"current_steps": 90930, "total_steps": 204665, "loss": 0.0001, "lr": 1.3607111693070595e-06, "epoch": 2.2214350279725403, "percentage": 44.43, "elapsed_time": "1:57:05", "remaining_time": "2:26:28", "throughput": 8725.28, "total_tokens": 61303720} +{"current_steps": 90935, "total_steps": 204665, "loss": 0.0741, "lr": 1.3606316314911826e-06, "epoch": 2.2215571788043875, "percentage": 44.43, "elapsed_time": "1:57:06", "remaining_time": "2:26:27", "throughput": 8725.31, "total_tokens": 61306920} +{"current_steps": 90940, "total_steps": 204665, "loss": 0.0001, "lr": 1.3605520910526953e-06, "epoch": 2.2216793296362347, "percentage": 44.43, "elapsed_time": "1:57:06", "remaining_time": "2:26:27", "throughput": 8725.33, "total_tokens": 61310120} +{"current_steps": 90945, "total_steps": 204665, "loss": 0.0002, "lr": 1.3604725479921765e-06, "epoch": 2.221801480468082, "percentage": 44.44, "elapsed_time": "1:57:07", "remaining_time": "2:26:26", "throughput": 8725.38, "total_tokens": 61313512} +{"current_steps": 90950, "total_steps": 204665, "loss": 0.0292, "lr": 1.3603930023102042e-06, "epoch": 2.221923631299929, "percentage": 44.44, "elapsed_time": "1:57:07", "remaining_time": "2:26:26", "throughput": 8725.4, "total_tokens": 61316584} +{"current_steps": 90955, "total_steps": 204665, "loss": 0.062, "lr": 1.3603134540073571e-06, "epoch": 2.2220457821317763, "percentage": 44.44, "elapsed_time": "1:57:07", "remaining_time": "2:26:25", "throughput": 8725.48, "total_tokens": 61320232} +{"current_steps": 90960, "total_steps": 204665, "loss": 0.0001, "lr": 1.3602339030842135e-06, "epoch": 2.2221679329636235, "percentage": 44.44, "elapsed_time": "1:57:08", "remaining_time": "2:26:25", "throughput": 8725.5, "total_tokens": 61323432} +{"current_steps": 90965, "total_steps": 204665, "loss": 0.0421, "lr": 1.3601543495413521e-06, "epoch": 2.2222900837954707, "percentage": 44.45, "elapsed_time": "1:57:08", "remaining_time": "2:26:25", "throughput": 8725.55, "total_tokens": 61326760} +{"current_steps": 90970, "total_steps": 204665, "loss": 0.0841, "lr": 1.3600747933793516e-06, "epoch": 2.222412234627318, "percentage": 44.45, "elapsed_time": "1:57:08", "remaining_time": "2:26:24", "throughput": 8725.6, "total_tokens": 61330152} +{"current_steps": 90975, "total_steps": 204665, "loss": 0.0005, "lr": 1.3599952345987902e-06, "epoch": 2.222534385459165, "percentage": 44.45, "elapsed_time": "1:57:09", "remaining_time": "2:26:24", "throughput": 8725.64, "total_tokens": 61333480} +{"current_steps": 90980, "total_steps": 204665, "loss": 0.0673, "lr": 1.3599156732002467e-06, "epoch": 2.2226565362910122, "percentage": 44.45, "elapsed_time": "1:57:09", "remaining_time": "2:26:23", "throughput": 8725.71, "total_tokens": 61336936} +{"current_steps": 90985, "total_steps": 204665, "loss": 0.0565, "lr": 1.3598361091842999e-06, "epoch": 2.2227786871228594, "percentage": 44.46, "elapsed_time": "1:57:09", "remaining_time": "2:26:23", "throughput": 8725.76, "total_tokens": 61340392} +{"current_steps": 90990, "total_steps": 204665, "loss": 0.0946, "lr": 1.3597565425515273e-06, "epoch": 2.2229008379547066, "percentage": 44.46, "elapsed_time": "1:57:10", "remaining_time": "2:26:22", "throughput": 8725.77, "total_tokens": 61343400} +{"current_steps": 90995, "total_steps": 204665, "loss": 0.0033, "lr": 1.359676973302509e-06, "epoch": 2.223022988786554, "percentage": 44.46, "elapsed_time": "1:57:10", "remaining_time": "2:26:22", "throughput": 8725.79, "total_tokens": 61346536} +{"current_steps": 91000, "total_steps": 204665, "loss": 0.0479, "lr": 1.359597401437823e-06, "epoch": 2.223145139618401, "percentage": 44.46, "elapsed_time": "1:57:10", "remaining_time": "2:26:21", "throughput": 8725.85, "total_tokens": 61350056} +{"current_steps": 91005, "total_steps": 204665, "loss": 0.0855, "lr": 1.3595178269580478e-06, "epoch": 2.2232672904502477, "percentage": 44.47, "elapsed_time": "1:57:11", "remaining_time": "2:26:21", "throughput": 8725.93, "total_tokens": 61353640} +{"current_steps": 91010, "total_steps": 204665, "loss": 0.0065, "lr": 1.3594382498637625e-06, "epoch": 2.223389441282095, "percentage": 44.47, "elapsed_time": "1:57:11", "remaining_time": "2:26:21", "throughput": 8725.94, "total_tokens": 61356712} +{"current_steps": 91015, "total_steps": 204665, "loss": 0.0441, "lr": 1.3593586701555454e-06, "epoch": 2.223511592113942, "percentage": 44.47, "elapsed_time": "1:57:11", "remaining_time": "2:26:20", "throughput": 8725.96, "total_tokens": 61359912} +{"current_steps": 91020, "total_steps": 204665, "loss": 0.0261, "lr": 1.359279087833975e-06, "epoch": 2.2236337429457893, "percentage": 44.47, "elapsed_time": "1:57:12", "remaining_time": "2:26:20", "throughput": 8726.05, "total_tokens": 61363624} +{"current_steps": 91025, "total_steps": 204665, "loss": 0.0001, "lr": 1.359199502899631e-06, "epoch": 2.2237558937776365, "percentage": 44.48, "elapsed_time": "1:57:12", "remaining_time": "2:26:19", "throughput": 8726.1, "total_tokens": 61366952} +{"current_steps": 91030, "total_steps": 204665, "loss": 0.0011, "lr": 1.3591199153530916e-06, "epoch": 2.2238780446094837, "percentage": 44.48, "elapsed_time": "1:57:12", "remaining_time": "2:26:19", "throughput": 8726.23, "total_tokens": 61371048} +{"current_steps": 91035, "total_steps": 204665, "loss": 0.0367, "lr": 1.3590403251949354e-06, "epoch": 2.224000195441331, "percentage": 44.48, "elapsed_time": "1:57:13", "remaining_time": "2:26:18", "throughput": 8726.23, "total_tokens": 61373992} +{"current_steps": 91040, "total_steps": 204665, "loss": 0.0014, "lr": 1.3589607324257415e-06, "epoch": 2.224122346273178, "percentage": 44.48, "elapsed_time": "1:57:13", "remaining_time": "2:26:18", "throughput": 8726.26, "total_tokens": 61377192} +{"current_steps": 91045, "total_steps": 204665, "loss": 0.1273, "lr": 1.3588811370460884e-06, "epoch": 2.2242444971050253, "percentage": 44.48, "elapsed_time": "1:57:13", "remaining_time": "2:26:18", "throughput": 8726.32, "total_tokens": 61380648} +{"current_steps": 91050, "total_steps": 204665, "loss": 0.0011, "lr": 1.3588015390565551e-06, "epoch": 2.2243666479368724, "percentage": 44.49, "elapsed_time": "1:57:14", "remaining_time": "2:26:17", "throughput": 8726.37, "total_tokens": 61384104} +{"current_steps": 91055, "total_steps": 204665, "loss": 0.035, "lr": 1.3587219384577207e-06, "epoch": 2.2244887987687196, "percentage": 44.49, "elapsed_time": "1:57:14", "remaining_time": "2:26:17", "throughput": 8726.47, "total_tokens": 61387880} +{"current_steps": 91060, "total_steps": 204665, "loss": 0.0001, "lr": 1.3586423352501637e-06, "epoch": 2.224610949600567, "percentage": 44.49, "elapsed_time": "1:57:15", "remaining_time": "2:26:16", "throughput": 8726.48, "total_tokens": 61390952} +{"current_steps": 91065, "total_steps": 204665, "loss": 0.0524, "lr": 1.3585627294344635e-06, "epoch": 2.224733100432414, "percentage": 44.49, "elapsed_time": "1:57:15", "remaining_time": "2:26:16", "throughput": 8726.52, "total_tokens": 61394152} +{"current_steps": 91070, "total_steps": 204665, "loss": 0.0001, "lr": 1.3584831210111985e-06, "epoch": 2.224855251264261, "percentage": 44.5, "elapsed_time": "1:57:15", "remaining_time": "2:26:15", "throughput": 8726.56, "total_tokens": 61397416} +{"current_steps": 91075, "total_steps": 204665, "loss": 0.0377, "lr": 1.3584035099809477e-06, "epoch": 2.2249774020961084, "percentage": 44.5, "elapsed_time": "1:57:16", "remaining_time": "2:26:15", "throughput": 8726.61, "total_tokens": 61400872} +{"current_steps": 91080, "total_steps": 204665, "loss": 0.0003, "lr": 1.3583238963442904e-06, "epoch": 2.2250995529279556, "percentage": 44.5, "elapsed_time": "1:57:16", "remaining_time": "2:26:15", "throughput": 8726.71, "total_tokens": 61404584} +{"current_steps": 91085, "total_steps": 204665, "loss": 0.0001, "lr": 1.3582442801018052e-06, "epoch": 2.2252217037598028, "percentage": 44.5, "elapsed_time": "1:57:16", "remaining_time": "2:26:14", "throughput": 8726.75, "total_tokens": 61407912} +{"current_steps": 91090, "total_steps": 204665, "loss": 0.0009, "lr": 1.3581646612540713e-06, "epoch": 2.22534385459165, "percentage": 44.51, "elapsed_time": "1:57:17", "remaining_time": "2:26:14", "throughput": 8726.9, "total_tokens": 61412136} +{"current_steps": 91095, "total_steps": 204665, "loss": 0.0003, "lr": 1.3580850398016676e-06, "epoch": 2.225466005423497, "percentage": 44.51, "elapsed_time": "1:57:17", "remaining_time": "2:26:13", "throughput": 8726.96, "total_tokens": 61415656} +{"current_steps": 91100, "total_steps": 204665, "loss": 0.0772, "lr": 1.3580054157451732e-06, "epoch": 2.225588156255344, "percentage": 44.51, "elapsed_time": "1:57:17", "remaining_time": "2:26:13", "throughput": 8726.99, "total_tokens": 61418856} +{"current_steps": 91105, "total_steps": 204665, "loss": 0.0622, "lr": 1.3579257890851673e-06, "epoch": 2.225710307087191, "percentage": 44.51, "elapsed_time": "1:57:18", "remaining_time": "2:26:12", "throughput": 8727.05, "total_tokens": 61422248} +{"current_steps": 91110, "total_steps": 204665, "loss": 0.0008, "lr": 1.3578461598222286e-06, "epoch": 2.2258324579190383, "percentage": 44.52, "elapsed_time": "1:57:18", "remaining_time": "2:26:12", "throughput": 8727.06, "total_tokens": 61425320} +{"current_steps": 91115, "total_steps": 204665, "loss": 0.0423, "lr": 1.357766527956936e-06, "epoch": 2.2259546087508855, "percentage": 44.52, "elapsed_time": "1:57:18", "remaining_time": "2:26:12", "throughput": 8727.14, "total_tokens": 61429032} +{"current_steps": 91120, "total_steps": 204665, "loss": 0.0304, "lr": 1.3576868934898696e-06, "epoch": 2.2260767595827327, "percentage": 44.52, "elapsed_time": "1:57:19", "remaining_time": "2:26:11", "throughput": 8727.21, "total_tokens": 61432680} +{"current_steps": 91125, "total_steps": 204665, "loss": 0.0004, "lr": 1.3576072564216077e-06, "epoch": 2.22619891041458, "percentage": 44.52, "elapsed_time": "1:57:19", "remaining_time": "2:26:11", "throughput": 8727.24, "total_tokens": 61435944} +{"current_steps": 91130, "total_steps": 204665, "loss": 0.0435, "lr": 1.3575276167527297e-06, "epoch": 2.226321061246427, "percentage": 44.53, "elapsed_time": "1:57:19", "remaining_time": "2:26:10", "throughput": 8727.28, "total_tokens": 61439272} +{"current_steps": 91135, "total_steps": 204665, "loss": 0.0525, "lr": 1.3574479744838147e-06, "epoch": 2.2264432120782742, "percentage": 44.53, "elapsed_time": "1:57:20", "remaining_time": "2:26:10", "throughput": 8727.34, "total_tokens": 61442792} +{"current_steps": 91140, "total_steps": 204665, "loss": 0.2178, "lr": 1.357368329615442e-06, "epoch": 2.2265653629101214, "percentage": 44.53, "elapsed_time": "1:57:20", "remaining_time": "2:26:09", "throughput": 8727.38, "total_tokens": 61446120} +{"current_steps": 91145, "total_steps": 204665, "loss": 0.0001, "lr": 1.3572886821481905e-06, "epoch": 2.2266875137419686, "percentage": 44.53, "elapsed_time": "1:57:20", "remaining_time": "2:26:09", "throughput": 8727.43, "total_tokens": 61449448} +{"current_steps": 91150, "total_steps": 204665, "loss": 0.0003, "lr": 1.3572090320826395e-06, "epoch": 2.226809664573816, "percentage": 44.54, "elapsed_time": "1:57:21", "remaining_time": "2:26:08", "throughput": 8727.45, "total_tokens": 61452584} +{"current_steps": 91155, "total_steps": 204665, "loss": 0.0008, "lr": 1.3571293794193684e-06, "epoch": 2.226931815405663, "percentage": 44.54, "elapsed_time": "1:57:21", "remaining_time": "2:26:08", "throughput": 8727.46, "total_tokens": 61455720} +{"current_steps": 91160, "total_steps": 204665, "loss": 0.0008, "lr": 1.3570497241589564e-06, "epoch": 2.22705396623751, "percentage": 44.54, "elapsed_time": "1:57:21", "remaining_time": "2:26:08", "throughput": 8727.51, "total_tokens": 61459048} +{"current_steps": 91165, "total_steps": 204665, "loss": 0.0007, "lr": 1.356970066301983e-06, "epoch": 2.2271761170693574, "percentage": 44.54, "elapsed_time": "1:57:22", "remaining_time": "2:26:07", "throughput": 8727.55, "total_tokens": 61462376} +{"current_steps": 91170, "total_steps": 204665, "loss": 0.0588, "lr": 1.3568904058490272e-06, "epoch": 2.2272982679012046, "percentage": 44.55, "elapsed_time": "1:57:22", "remaining_time": "2:26:07", "throughput": 8727.67, "total_tokens": 61466408} +{"current_steps": 91175, "total_steps": 204665, "loss": 0.0003, "lr": 1.356810742800668e-06, "epoch": 2.2274204187330517, "percentage": 44.55, "elapsed_time": "1:57:23", "remaining_time": "2:26:06", "throughput": 8727.75, "total_tokens": 61470056} +{"current_steps": 91180, "total_steps": 204665, "loss": 0.0002, "lr": 1.3567310771574853e-06, "epoch": 2.227542569564899, "percentage": 44.55, "elapsed_time": "1:57:23", "remaining_time": "2:26:06", "throughput": 8727.73, "total_tokens": 61472872} +{"current_steps": 91185, "total_steps": 204665, "loss": 0.182, "lr": 1.3566514089200584e-06, "epoch": 2.2276647203967457, "percentage": 44.55, "elapsed_time": "1:57:23", "remaining_time": "2:26:05", "throughput": 8727.77, "total_tokens": 61476136} +{"current_steps": 91190, "total_steps": 204665, "loss": 0.0003, "lr": 1.3565717380889664e-06, "epoch": 2.227786871228593, "percentage": 44.56, "elapsed_time": "1:57:24", "remaining_time": "2:26:05", "throughput": 8727.85, "total_tokens": 61479848} +{"current_steps": 91195, "total_steps": 204665, "loss": 0.0004, "lr": 1.356492064664789e-06, "epoch": 2.22790902206044, "percentage": 44.56, "elapsed_time": "1:57:24", "remaining_time": "2:26:05", "throughput": 8727.88, "total_tokens": 61483112} +{"current_steps": 91200, "total_steps": 204665, "loss": 0.0375, "lr": 1.3564123886481054e-06, "epoch": 2.2280311728922872, "percentage": 44.56, "elapsed_time": "1:57:24", "remaining_time": "2:26:04", "throughput": 8727.96, "total_tokens": 61486760} +{"current_steps": 91205, "total_steps": 204665, "loss": 0.0001, "lr": 1.3563327100394947e-06, "epoch": 2.2281533237241344, "percentage": 44.56, "elapsed_time": "1:57:25", "remaining_time": "2:26:04", "throughput": 8728.01, "total_tokens": 61490152} +{"current_steps": 91210, "total_steps": 204665, "loss": 0.0001, "lr": 1.356253028839537e-06, "epoch": 2.2282754745559816, "percentage": 44.57, "elapsed_time": "1:57:25", "remaining_time": "2:26:03", "throughput": 8728.03, "total_tokens": 61493352} +{"current_steps": 91215, "total_steps": 204665, "loss": 0.0001, "lr": 1.3561733450488113e-06, "epoch": 2.228397625387829, "percentage": 44.57, "elapsed_time": "1:57:25", "remaining_time": "2:26:03", "throughput": 8728.08, "total_tokens": 61496744} +{"current_steps": 91220, "total_steps": 204665, "loss": 0.131, "lr": 1.3560936586678974e-06, "epoch": 2.228519776219676, "percentage": 44.57, "elapsed_time": "1:57:26", "remaining_time": "2:26:02", "throughput": 8728.09, "total_tokens": 61499752} +{"current_steps": 91225, "total_steps": 204665, "loss": 0.0001, "lr": 1.3560139696973747e-06, "epoch": 2.228641927051523, "percentage": 44.57, "elapsed_time": "1:57:26", "remaining_time": "2:26:02", "throughput": 8728.13, "total_tokens": 61503080} +{"current_steps": 91230, "total_steps": 204665, "loss": 0.0002, "lr": 1.3559342781378225e-06, "epoch": 2.2287640778833704, "percentage": 44.58, "elapsed_time": "1:57:26", "remaining_time": "2:26:02", "throughput": 8728.21, "total_tokens": 61506792} +{"current_steps": 91235, "total_steps": 204665, "loss": 0.0003, "lr": 1.3558545839898206e-06, "epoch": 2.2288862287152176, "percentage": 44.58, "elapsed_time": "1:57:27", "remaining_time": "2:26:01", "throughput": 8728.28, "total_tokens": 61510312} +{"current_steps": 91240, "total_steps": 204665, "loss": 0.0984, "lr": 1.3557748872539484e-06, "epoch": 2.2290083795470648, "percentage": 44.58, "elapsed_time": "1:57:27", "remaining_time": "2:26:01", "throughput": 8728.28, "total_tokens": 61513320} +{"current_steps": 91245, "total_steps": 204665, "loss": 0.0001, "lr": 1.3556951879307855e-06, "epoch": 2.229130530378912, "percentage": 44.58, "elapsed_time": "1:57:27", "remaining_time": "2:26:00", "throughput": 8728.31, "total_tokens": 61516520} +{"current_steps": 91250, "total_steps": 204665, "loss": 0.0013, "lr": 1.3556154860209114e-06, "epoch": 2.229252681210759, "percentage": 44.59, "elapsed_time": "1:57:28", "remaining_time": "2:26:00", "throughput": 8728.34, "total_tokens": 61519784} +{"current_steps": 91255, "total_steps": 204665, "loss": 0.0002, "lr": 1.355535781524906e-06, "epoch": 2.2293748320426063, "percentage": 44.59, "elapsed_time": "1:57:28", "remaining_time": "2:25:59", "throughput": 8728.35, "total_tokens": 61522792} +{"current_steps": 91260, "total_steps": 204665, "loss": 0.1264, "lr": 1.3554560744433488e-06, "epoch": 2.2294969828744535, "percentage": 44.59, "elapsed_time": "1:57:28", "remaining_time": "2:25:59", "throughput": 8728.47, "total_tokens": 61526824} +{"current_steps": 91265, "total_steps": 204665, "loss": 0.0239, "lr": 1.3553763647768192e-06, "epoch": 2.2296191337063007, "percentage": 44.59, "elapsed_time": "1:57:29", "remaining_time": "2:25:59", "throughput": 8728.48, "total_tokens": 61529896} +{"current_steps": 91270, "total_steps": 204665, "loss": 0.0419, "lr": 1.355296652525897e-06, "epoch": 2.229741284538148, "percentage": 44.59, "elapsed_time": "1:57:29", "remaining_time": "2:25:58", "throughput": 8728.52, "total_tokens": 61533288} +{"current_steps": 91275, "total_steps": 204665, "loss": 0.0001, "lr": 1.3552169376911625e-06, "epoch": 2.229863435369995, "percentage": 44.6, "elapsed_time": "1:57:30", "remaining_time": "2:25:58", "throughput": 8728.6, "total_tokens": 61536936} +{"current_steps": 91280, "total_steps": 204665, "loss": 0.0001, "lr": 1.3551372202731945e-06, "epoch": 2.229985586201842, "percentage": 44.6, "elapsed_time": "1:57:30", "remaining_time": "2:25:57", "throughput": 8728.66, "total_tokens": 61540456} +{"current_steps": 91285, "total_steps": 204665, "loss": 0.0275, "lr": 1.3550575002725732e-06, "epoch": 2.230107737033689, "percentage": 44.6, "elapsed_time": "1:57:30", "remaining_time": "2:25:57", "throughput": 8728.68, "total_tokens": 61543592} +{"current_steps": 91290, "total_steps": 204665, "loss": 0.0034, "lr": 1.3549777776898786e-06, "epoch": 2.230229887865536, "percentage": 44.6, "elapsed_time": "1:57:31", "remaining_time": "2:25:56", "throughput": 8728.71, "total_tokens": 61546792} +{"current_steps": 91295, "total_steps": 204665, "loss": 0.0002, "lr": 1.3548980525256897e-06, "epoch": 2.2303520386973834, "percentage": 44.61, "elapsed_time": "1:57:31", "remaining_time": "2:25:56", "throughput": 8728.76, "total_tokens": 61550184} +{"current_steps": 91300, "total_steps": 204665, "loss": 0.0004, "lr": 1.3548183247805867e-06, "epoch": 2.2304741895292306, "percentage": 44.61, "elapsed_time": "1:57:31", "remaining_time": "2:25:56", "throughput": 8728.83, "total_tokens": 61553768} +{"current_steps": 91305, "total_steps": 204665, "loss": 0.0225, "lr": 1.3547385944551495e-06, "epoch": 2.230596340361078, "percentage": 44.61, "elapsed_time": "1:57:32", "remaining_time": "2:25:55", "throughput": 8728.86, "total_tokens": 61556968} +{"current_steps": 91310, "total_steps": 204665, "loss": 0.0303, "lr": 1.3546588615499576e-06, "epoch": 2.230718491192925, "percentage": 44.61, "elapsed_time": "1:57:32", "remaining_time": "2:25:55", "throughput": 8728.92, "total_tokens": 61560424} +{"current_steps": 91315, "total_steps": 204665, "loss": 0.0561, "lr": 1.3545791260655915e-06, "epoch": 2.230840642024772, "percentage": 44.62, "elapsed_time": "1:57:32", "remaining_time": "2:25:54", "throughput": 8728.91, "total_tokens": 61563368} +{"current_steps": 91320, "total_steps": 204665, "loss": 0.0004, "lr": 1.3544993880026305e-06, "epoch": 2.2309627928566194, "percentage": 44.62, "elapsed_time": "1:57:33", "remaining_time": "2:25:54", "throughput": 8728.95, "total_tokens": 61566696} +{"current_steps": 91325, "total_steps": 204665, "loss": 0.0397, "lr": 1.3544196473616544e-06, "epoch": 2.2310849436884665, "percentage": 44.62, "elapsed_time": "1:57:33", "remaining_time": "2:25:53", "throughput": 8728.98, "total_tokens": 61569896} +{"current_steps": 91330, "total_steps": 204665, "loss": 0.0002, "lr": 1.3543399041432432e-06, "epoch": 2.2312070945203137, "percentage": 44.62, "elapsed_time": "1:57:33", "remaining_time": "2:25:53", "throughput": 8728.99, "total_tokens": 61572968} +{"current_steps": 91335, "total_steps": 204665, "loss": 0.0932, "lr": 1.3542601583479774e-06, "epoch": 2.231329245352161, "percentage": 44.63, "elapsed_time": "1:57:34", "remaining_time": "2:25:52", "throughput": 8729.02, "total_tokens": 61576232} +{"current_steps": 91340, "total_steps": 204665, "loss": 0.0369, "lr": 1.3541804099764362e-06, "epoch": 2.231451396184008, "percentage": 44.63, "elapsed_time": "1:57:34", "remaining_time": "2:25:52", "throughput": 8729.03, "total_tokens": 61579240} +{"current_steps": 91345, "total_steps": 204665, "loss": 0.0008, "lr": 1.3541006590291998e-06, "epoch": 2.2315735470158553, "percentage": 44.63, "elapsed_time": "1:57:34", "remaining_time": "2:25:52", "throughput": 8729.08, "total_tokens": 61582696} +{"current_steps": 91350, "total_steps": 204665, "loss": 0.0001, "lr": 1.3540209055068484e-06, "epoch": 2.2316956978477025, "percentage": 44.63, "elapsed_time": "1:57:35", "remaining_time": "2:25:51", "throughput": 8729.13, "total_tokens": 61586088} +{"current_steps": 91355, "total_steps": 204665, "loss": 0.0001, "lr": 1.3539411494099614e-06, "epoch": 2.2318178486795497, "percentage": 44.64, "elapsed_time": "1:57:35", "remaining_time": "2:25:51", "throughput": 8729.15, "total_tokens": 61589224} +{"current_steps": 91360, "total_steps": 204665, "loss": 0.0002, "lr": 1.353861390739119e-06, "epoch": 2.231939999511397, "percentage": 44.64, "elapsed_time": "1:57:35", "remaining_time": "2:25:50", "throughput": 8729.22, "total_tokens": 61592808} +{"current_steps": 91365, "total_steps": 204665, "loss": 0.0002, "lr": 1.3537816294949017e-06, "epoch": 2.2320621503432436, "percentage": 44.64, "elapsed_time": "1:57:36", "remaining_time": "2:25:50", "throughput": 8729.25, "total_tokens": 61596008} +{"current_steps": 91370, "total_steps": 204665, "loss": 0.2142, "lr": 1.353701865677889e-06, "epoch": 2.232184301175091, "percentage": 44.64, "elapsed_time": "1:57:36", "remaining_time": "2:25:49", "throughput": 8729.28, "total_tokens": 61599272} +{"current_steps": 91375, "total_steps": 204665, "loss": 0.0002, "lr": 1.3536220992886615e-06, "epoch": 2.232306452006938, "percentage": 44.65, "elapsed_time": "1:57:36", "remaining_time": "2:25:49", "throughput": 8729.37, "total_tokens": 61602920} +{"current_steps": 91380, "total_steps": 204665, "loss": 0.0007, "lr": 1.3535423303277989e-06, "epoch": 2.232428602838785, "percentage": 44.65, "elapsed_time": "1:57:37", "remaining_time": "2:25:49", "throughput": 8729.38, "total_tokens": 61605992} +{"current_steps": 91385, "total_steps": 204665, "loss": 0.0978, "lr": 1.3534625587958814e-06, "epoch": 2.2325507536706324, "percentage": 44.65, "elapsed_time": "1:57:37", "remaining_time": "2:25:48", "throughput": 8729.43, "total_tokens": 61609320} +{"current_steps": 91390, "total_steps": 204665, "loss": 0.0008, "lr": 1.353382784693489e-06, "epoch": 2.2326729045024796, "percentage": 44.65, "elapsed_time": "1:57:38", "remaining_time": "2:25:48", "throughput": 8729.47, "total_tokens": 61612712} +{"current_steps": 91395, "total_steps": 204665, "loss": 0.0578, "lr": 1.353303008021202e-06, "epoch": 2.2327950553343268, "percentage": 44.66, "elapsed_time": "1:57:38", "remaining_time": "2:25:47", "throughput": 8729.53, "total_tokens": 61616168} +{"current_steps": 91400, "total_steps": 204665, "loss": 0.0456, "lr": 1.3532232287796007e-06, "epoch": 2.232917206166174, "percentage": 44.66, "elapsed_time": "1:57:38", "remaining_time": "2:25:47", "throughput": 8729.58, "total_tokens": 61619560} +{"current_steps": 91405, "total_steps": 204665, "loss": 0.0002, "lr": 1.353143446969265e-06, "epoch": 2.233039356998021, "percentage": 44.66, "elapsed_time": "1:57:39", "remaining_time": "2:25:46", "throughput": 8729.65, "total_tokens": 61623144} +{"current_steps": 91410, "total_steps": 204665, "loss": 0.0538, "lr": 1.3530636625907747e-06, "epoch": 2.2331615078298683, "percentage": 44.66, "elapsed_time": "1:57:39", "remaining_time": "2:25:46", "throughput": 8729.71, "total_tokens": 61626600} +{"current_steps": 91415, "total_steps": 204665, "loss": 0.0005, "lr": 1.352983875644711e-06, "epoch": 2.2332836586617155, "percentage": 44.67, "elapsed_time": "1:57:39", "remaining_time": "2:25:46", "throughput": 8729.7, "total_tokens": 61629480} +{"current_steps": 91420, "total_steps": 204665, "loss": 0.0611, "lr": 1.3529040861316535e-06, "epoch": 2.2334058094935627, "percentage": 44.67, "elapsed_time": "1:57:40", "remaining_time": "2:25:45", "throughput": 8729.75, "total_tokens": 61632872} +{"current_steps": 91425, "total_steps": 204665, "loss": 0.0003, "lr": 1.3528242940521821e-06, "epoch": 2.23352796032541, "percentage": 44.67, "elapsed_time": "1:57:40", "remaining_time": "2:25:45", "throughput": 8729.79, "total_tokens": 61636200} +{"current_steps": 91430, "total_steps": 204665, "loss": 0.0005, "lr": 1.352744499406878e-06, "epoch": 2.233650111157257, "percentage": 44.67, "elapsed_time": "1:57:40", "remaining_time": "2:25:44", "throughput": 8729.81, "total_tokens": 61639272} +{"current_steps": 91435, "total_steps": 204665, "loss": 0.0002, "lr": 1.352664702196321e-06, "epoch": 2.2337722619891043, "percentage": 44.68, "elapsed_time": "1:57:41", "remaining_time": "2:25:44", "throughput": 8729.85, "total_tokens": 61642536} +{"current_steps": 91440, "total_steps": 204665, "loss": 0.0003, "lr": 1.3525849024210913e-06, "epoch": 2.2338944128209515, "percentage": 44.68, "elapsed_time": "1:57:41", "remaining_time": "2:25:43", "throughput": 8729.88, "total_tokens": 61645736} +{"current_steps": 91445, "total_steps": 204665, "loss": 0.1131, "lr": 1.3525051000817699e-06, "epoch": 2.2340165636527987, "percentage": 44.68, "elapsed_time": "1:57:41", "remaining_time": "2:25:43", "throughput": 8729.98, "total_tokens": 61649640} +{"current_steps": 91450, "total_steps": 204665, "loss": 0.0431, "lr": 1.352425295178936e-06, "epoch": 2.2341387144846454, "percentage": 44.68, "elapsed_time": "1:57:42", "remaining_time": "2:25:42", "throughput": 8730.01, "total_tokens": 61652904} +{"current_steps": 91455, "total_steps": 204665, "loss": 0.0002, "lr": 1.3523454877131703e-06, "epoch": 2.234260865316493, "percentage": 44.69, "elapsed_time": "1:57:42", "remaining_time": "2:25:42", "throughput": 8730.06, "total_tokens": 61656296} +{"current_steps": 91460, "total_steps": 204665, "loss": 0.0075, "lr": 1.352265677685054e-06, "epoch": 2.23438301614834, "percentage": 44.69, "elapsed_time": "1:57:42", "remaining_time": "2:25:42", "throughput": 8730.11, "total_tokens": 61659688} +{"current_steps": 91465, "total_steps": 204665, "loss": 0.0005, "lr": 1.352185865095167e-06, "epoch": 2.234505166980187, "percentage": 44.69, "elapsed_time": "1:57:43", "remaining_time": "2:25:41", "throughput": 8730.12, "total_tokens": 61662696} +{"current_steps": 91470, "total_steps": 204665, "loss": 0.0008, "lr": 1.3521060499440893e-06, "epoch": 2.234627317812034, "percentage": 44.69, "elapsed_time": "1:57:43", "remaining_time": "2:25:41", "throughput": 8730.18, "total_tokens": 61666152} +{"current_steps": 91475, "total_steps": 204665, "loss": 0.0003, "lr": 1.352026232232402e-06, "epoch": 2.2347494686438814, "percentage": 44.69, "elapsed_time": "1:57:43", "remaining_time": "2:25:40", "throughput": 8730.2, "total_tokens": 61669288} +{"current_steps": 91480, "total_steps": 204665, "loss": 0.0586, "lr": 1.351946411960685e-06, "epoch": 2.2348716194757285, "percentage": 44.7, "elapsed_time": "1:57:44", "remaining_time": "2:25:40", "throughput": 8730.26, "total_tokens": 61672808} +{"current_steps": 91485, "total_steps": 204665, "loss": 0.0469, "lr": 1.351866589129519e-06, "epoch": 2.2349937703075757, "percentage": 44.7, "elapsed_time": "1:57:44", "remaining_time": "2:25:39", "throughput": 8730.26, "total_tokens": 61675752} +{"current_steps": 91490, "total_steps": 204665, "loss": 0.0579, "lr": 1.3517867637394846e-06, "epoch": 2.235115921139423, "percentage": 44.7, "elapsed_time": "1:57:44", "remaining_time": "2:25:39", "throughput": 8730.31, "total_tokens": 61679144} +{"current_steps": 91495, "total_steps": 204665, "loss": 0.0002, "lr": 1.3517069357911626e-06, "epoch": 2.23523807197127, "percentage": 44.7, "elapsed_time": "1:57:45", "remaining_time": "2:25:39", "throughput": 8730.44, "total_tokens": 61683304} +{"current_steps": 91500, "total_steps": 204665, "loss": 0.1406, "lr": 1.351627105285133e-06, "epoch": 2.2353602228031173, "percentage": 44.71, "elapsed_time": "1:57:45", "remaining_time": "2:25:38", "throughput": 8730.44, "total_tokens": 61686248} +{"current_steps": 91505, "total_steps": 204665, "loss": 0.0408, "lr": 1.3515472722219763e-06, "epoch": 2.2354823736349645, "percentage": 44.71, "elapsed_time": "1:57:46", "remaining_time": "2:25:38", "throughput": 8730.53, "total_tokens": 61689960} +{"current_steps": 91510, "total_steps": 204665, "loss": 0.1092, "lr": 1.3514674366022734e-06, "epoch": 2.2356045244668117, "percentage": 44.71, "elapsed_time": "1:57:46", "remaining_time": "2:25:37", "throughput": 8730.62, "total_tokens": 61693672} +{"current_steps": 91515, "total_steps": 204665, "loss": 0.0002, "lr": 1.3513875984266045e-06, "epoch": 2.235726675298659, "percentage": 44.71, "elapsed_time": "1:57:46", "remaining_time": "2:25:37", "throughput": 8730.69, "total_tokens": 61697256} +{"current_steps": 91520, "total_steps": 204665, "loss": 0.1026, "lr": 1.3513077576955506e-06, "epoch": 2.235848826130506, "percentage": 44.72, "elapsed_time": "1:57:47", "remaining_time": "2:25:36", "throughput": 8730.72, "total_tokens": 61700456} +{"current_steps": 91525, "total_steps": 204665, "loss": 0.0003, "lr": 1.3512279144096924e-06, "epoch": 2.2359709769623533, "percentage": 44.72, "elapsed_time": "1:57:47", "remaining_time": "2:25:36", "throughput": 8730.72, "total_tokens": 61703464} +{"current_steps": 91530, "total_steps": 204665, "loss": 0.0644, "lr": 1.3511480685696101e-06, "epoch": 2.2360931277942004, "percentage": 44.72, "elapsed_time": "1:57:47", "remaining_time": "2:25:36", "throughput": 8730.76, "total_tokens": 61706728} +{"current_steps": 91535, "total_steps": 204665, "loss": 0.0004, "lr": 1.3510682201758847e-06, "epoch": 2.2362152786260476, "percentage": 44.72, "elapsed_time": "1:57:48", "remaining_time": "2:25:35", "throughput": 8730.82, "total_tokens": 61710248} +{"current_steps": 91540, "total_steps": 204665, "loss": 0.0503, "lr": 1.350988369229097e-06, "epoch": 2.236337429457895, "percentage": 44.73, "elapsed_time": "1:57:48", "remaining_time": "2:25:35", "throughput": 8730.82, "total_tokens": 61713192} +{"current_steps": 91545, "total_steps": 204665, "loss": 0.0582, "lr": 1.3509085157298272e-06, "epoch": 2.2364595802897416, "percentage": 44.73, "elapsed_time": "1:57:48", "remaining_time": "2:25:34", "throughput": 8730.83, "total_tokens": 61716136} +{"current_steps": 91550, "total_steps": 204665, "loss": 0.0005, "lr": 1.3508286596786565e-06, "epoch": 2.2365817311215888, "percentage": 44.73, "elapsed_time": "1:57:49", "remaining_time": "2:25:34", "throughput": 8730.86, "total_tokens": 61719400} +{"current_steps": 91555, "total_steps": 204665, "loss": 0.1024, "lr": 1.3507488010761651e-06, "epoch": 2.236703881953436, "percentage": 44.73, "elapsed_time": "1:57:49", "remaining_time": "2:25:33", "throughput": 8730.88, "total_tokens": 61722472} +{"current_steps": 91560, "total_steps": 204665, "loss": 0.0002, "lr": 1.3506689399229342e-06, "epoch": 2.236826032785283, "percentage": 44.74, "elapsed_time": "1:57:49", "remaining_time": "2:25:33", "throughput": 8730.97, "total_tokens": 61726248} +{"current_steps": 91565, "total_steps": 204665, "loss": 0.0502, "lr": 1.3505890762195446e-06, "epoch": 2.2369481836171303, "percentage": 44.74, "elapsed_time": "1:57:50", "remaining_time": "2:25:32", "throughput": 8731.03, "total_tokens": 61729768} +{"current_steps": 91570, "total_steps": 204665, "loss": 0.0005, "lr": 1.3505092099665771e-06, "epoch": 2.2370703344489775, "percentage": 44.74, "elapsed_time": "1:57:50", "remaining_time": "2:25:32", "throughput": 8731.07, "total_tokens": 61733032} +{"current_steps": 91575, "total_steps": 204665, "loss": 0.0007, "lr": 1.3504293411646122e-06, "epoch": 2.2371924852808247, "percentage": 44.74, "elapsed_time": "1:57:50", "remaining_time": "2:25:32", "throughput": 8731.15, "total_tokens": 61736680} +{"current_steps": 91580, "total_steps": 204665, "loss": 0.0538, "lr": 1.3503494698142305e-06, "epoch": 2.237314636112672, "percentage": 44.75, "elapsed_time": "1:57:51", "remaining_time": "2:25:31", "throughput": 8731.26, "total_tokens": 61740584} +{"current_steps": 91585, "total_steps": 204665, "loss": 0.0003, "lr": 1.3502695959160136e-06, "epoch": 2.237436786944519, "percentage": 44.75, "elapsed_time": "1:57:51", "remaining_time": "2:25:31", "throughput": 8731.31, "total_tokens": 61744040} +{"current_steps": 91590, "total_steps": 204665, "loss": 0.029, "lr": 1.350189719470542e-06, "epoch": 2.2375589377763663, "percentage": 44.75, "elapsed_time": "1:57:51", "remaining_time": "2:25:30", "throughput": 8731.38, "total_tokens": 61747624} +{"current_steps": 91595, "total_steps": 204665, "loss": 0.0006, "lr": 1.3501098404783963e-06, "epoch": 2.2376810886082135, "percentage": 44.75, "elapsed_time": "1:57:52", "remaining_time": "2:25:30", "throughput": 8731.44, "total_tokens": 61751016} +{"current_steps": 91600, "total_steps": 204665, "loss": 0.0001, "lr": 1.3500299589401581e-06, "epoch": 2.2378032394400607, "percentage": 44.76, "elapsed_time": "1:57:52", "remaining_time": "2:25:29", "throughput": 8731.48, "total_tokens": 61754344} +{"current_steps": 91605, "total_steps": 204665, "loss": 0.0421, "lr": 1.3499500748564076e-06, "epoch": 2.237925390271908, "percentage": 44.76, "elapsed_time": "1:57:52", "remaining_time": "2:25:29", "throughput": 8731.49, "total_tokens": 61757416} +{"current_steps": 91610, "total_steps": 204665, "loss": 0.0563, "lr": 1.349870188227726e-06, "epoch": 2.238047541103755, "percentage": 44.76, "elapsed_time": "1:57:53", "remaining_time": "2:25:29", "throughput": 8731.53, "total_tokens": 61760744} +{"current_steps": 91615, "total_steps": 204665, "loss": 0.0428, "lr": 1.3497902990546942e-06, "epoch": 2.2381696919356022, "percentage": 44.76, "elapsed_time": "1:57:53", "remaining_time": "2:25:28", "throughput": 8731.57, "total_tokens": 61764008} +{"current_steps": 91620, "total_steps": 204665, "loss": 0.0697, "lr": 1.3497104073378936e-06, "epoch": 2.2382918427674494, "percentage": 44.77, "elapsed_time": "1:57:53", "remaining_time": "2:25:28", "throughput": 8731.63, "total_tokens": 61767528} +{"current_steps": 91625, "total_steps": 204665, "loss": 0.0008, "lr": 1.3496305130779044e-06, "epoch": 2.2384139935992966, "percentage": 44.77, "elapsed_time": "1:57:54", "remaining_time": "2:25:27", "throughput": 8731.69, "total_tokens": 61770984} +{"current_steps": 91630, "total_steps": 204665, "loss": 0.0846, "lr": 1.3495506162753085e-06, "epoch": 2.2385361444311433, "percentage": 44.77, "elapsed_time": "1:57:54", "remaining_time": "2:25:27", "throughput": 8731.76, "total_tokens": 61774568} +{"current_steps": 91635, "total_steps": 204665, "loss": 0.0028, "lr": 1.3494707169306866e-06, "epoch": 2.2386582952629905, "percentage": 44.77, "elapsed_time": "1:57:55", "remaining_time": "2:25:26", "throughput": 8731.77, "total_tokens": 61777576} +{"current_steps": 91640, "total_steps": 204665, "loss": 0.0422, "lr": 1.349390815044619e-06, "epoch": 2.2387804460948377, "percentage": 44.78, "elapsed_time": "1:57:55", "remaining_time": "2:25:26", "throughput": 8731.81, "total_tokens": 61780904} +{"current_steps": 91645, "total_steps": 204665, "loss": 0.1066, "lr": 1.3493109106176879e-06, "epoch": 2.238902596926685, "percentage": 44.78, "elapsed_time": "1:57:55", "remaining_time": "2:25:26", "throughput": 8731.84, "total_tokens": 61784104} +{"current_steps": 91650, "total_steps": 204665, "loss": 0.0829, "lr": 1.349231003650474e-06, "epoch": 2.239024747758532, "percentage": 44.78, "elapsed_time": "1:57:56", "remaining_time": "2:25:25", "throughput": 8731.86, "total_tokens": 61787240} +{"current_steps": 91655, "total_steps": 204665, "loss": 0.0011, "lr": 1.349151094143558e-06, "epoch": 2.2391468985903793, "percentage": 44.78, "elapsed_time": "1:57:56", "remaining_time": "2:25:25", "throughput": 8731.91, "total_tokens": 61790632} +{"current_steps": 91660, "total_steps": 204665, "loss": 0.0003, "lr": 1.3490711820975217e-06, "epoch": 2.2392690494222265, "percentage": 44.79, "elapsed_time": "1:57:56", "remaining_time": "2:25:24", "throughput": 8731.91, "total_tokens": 61793640} +{"current_steps": 91665, "total_steps": 204665, "loss": 0.0489, "lr": 1.3489912675129455e-06, "epoch": 2.2393912002540737, "percentage": 44.79, "elapsed_time": "1:57:57", "remaining_time": "2:25:24", "throughput": 8731.98, "total_tokens": 61797160} +{"current_steps": 91670, "total_steps": 204665, "loss": 0.0002, "lr": 1.348911350390411e-06, "epoch": 2.239513351085921, "percentage": 44.79, "elapsed_time": "1:57:57", "remaining_time": "2:25:23", "throughput": 8732.08, "total_tokens": 61801064} +{"current_steps": 91675, "total_steps": 204665, "loss": 0.0253, "lr": 1.3488314307304994e-06, "epoch": 2.239635501917768, "percentage": 44.79, "elapsed_time": "1:57:57", "remaining_time": "2:25:23", "throughput": 8732.11, "total_tokens": 61804264} +{"current_steps": 91680, "total_steps": 204665, "loss": 0.0518, "lr": 1.3487515085337917e-06, "epoch": 2.2397576527496152, "percentage": 44.8, "elapsed_time": "1:57:58", "remaining_time": "2:25:23", "throughput": 8732.12, "total_tokens": 61807336} +{"current_steps": 91685, "total_steps": 204665, "loss": 0.0004, "lr": 1.3486715838008693e-06, "epoch": 2.2398798035814624, "percentage": 44.8, "elapsed_time": "1:57:58", "remaining_time": "2:25:22", "throughput": 8732.19, "total_tokens": 61810920} +{"current_steps": 91690, "total_steps": 204665, "loss": 0.0458, "lr": 1.3485916565323135e-06, "epoch": 2.2400019544133096, "percentage": 44.8, "elapsed_time": "1:57:58", "remaining_time": "2:25:22", "throughput": 8732.22, "total_tokens": 61814184} +{"current_steps": 91695, "total_steps": 204665, "loss": 0.0025, "lr": 1.3485117267287053e-06, "epoch": 2.240124105245157, "percentage": 44.8, "elapsed_time": "1:57:59", "remaining_time": "2:25:21", "throughput": 8732.27, "total_tokens": 61817576} +{"current_steps": 91700, "total_steps": 204665, "loss": 0.0373, "lr": 1.348431794390626e-06, "epoch": 2.240246256077004, "percentage": 44.8, "elapsed_time": "1:57:59", "remaining_time": "2:25:21", "throughput": 8732.3, "total_tokens": 61820840} +{"current_steps": 91705, "total_steps": 204665, "loss": 0.0001, "lr": 1.3483518595186572e-06, "epoch": 2.240368406908851, "percentage": 44.81, "elapsed_time": "1:57:59", "remaining_time": "2:25:20", "throughput": 8732.37, "total_tokens": 61824424} +{"current_steps": 91710, "total_steps": 204665, "loss": 0.0001, "lr": 1.3482719221133799e-06, "epoch": 2.2404905577406984, "percentage": 44.81, "elapsed_time": "1:58:00", "remaining_time": "2:25:20", "throughput": 8732.4, "total_tokens": 61827624} +{"current_steps": 91715, "total_steps": 204665, "loss": 0.0285, "lr": 1.3481919821753754e-06, "epoch": 2.2406127085725456, "percentage": 44.81, "elapsed_time": "1:58:00", "remaining_time": "2:25:19", "throughput": 8732.41, "total_tokens": 61830760} +{"current_steps": 91720, "total_steps": 204665, "loss": 0.0002, "lr": 1.348112039705225e-06, "epoch": 2.2407348594043928, "percentage": 44.81, "elapsed_time": "1:58:00", "remaining_time": "2:25:19", "throughput": 8732.46, "total_tokens": 61834088} +{"current_steps": 91725, "total_steps": 204665, "loss": 0.0002, "lr": 1.3480320947035106e-06, "epoch": 2.2408570102362395, "percentage": 44.82, "elapsed_time": "1:58:01", "remaining_time": "2:25:19", "throughput": 8732.53, "total_tokens": 61837672} +{"current_steps": 91730, "total_steps": 204665, "loss": 0.0367, "lr": 1.347952147170813e-06, "epoch": 2.2409791610680867, "percentage": 44.82, "elapsed_time": "1:58:01", "remaining_time": "2:25:18", "throughput": 8732.54, "total_tokens": 61840680} +{"current_steps": 91735, "total_steps": 204665, "loss": 0.0004, "lr": 1.3478721971077137e-06, "epoch": 2.241101311899934, "percentage": 44.82, "elapsed_time": "1:58:01", "remaining_time": "2:25:18", "throughput": 8732.57, "total_tokens": 61843880} +{"current_steps": 91740, "total_steps": 204665, "loss": 0.0173, "lr": 1.3477922445147943e-06, "epoch": 2.241223462731781, "percentage": 44.82, "elapsed_time": "1:58:02", "remaining_time": "2:25:17", "throughput": 8732.62, "total_tokens": 61847336} +{"current_steps": 91745, "total_steps": 204665, "loss": 0.0384, "lr": 1.347712289392636e-06, "epoch": 2.2413456135636283, "percentage": 44.83, "elapsed_time": "1:58:02", "remaining_time": "2:25:17", "throughput": 8732.66, "total_tokens": 61850600} +{"current_steps": 91750, "total_steps": 204665, "loss": 0.0002, "lr": 1.3476323317418208e-06, "epoch": 2.2414677643954755, "percentage": 44.83, "elapsed_time": "1:58:03", "remaining_time": "2:25:16", "throughput": 8732.7, "total_tokens": 61853864} +{"current_steps": 91755, "total_steps": 204665, "loss": 0.1162, "lr": 1.3475523715629296e-06, "epoch": 2.2415899152273226, "percentage": 44.83, "elapsed_time": "1:58:03", "remaining_time": "2:25:16", "throughput": 8732.73, "total_tokens": 61857064} +{"current_steps": 91760, "total_steps": 204665, "loss": 0.0001, "lr": 1.3474724088565442e-06, "epoch": 2.24171206605917, "percentage": 44.83, "elapsed_time": "1:58:03", "remaining_time": "2:25:16", "throughput": 8732.76, "total_tokens": 61860328} +{"current_steps": 91765, "total_steps": 204665, "loss": 0.0004, "lr": 1.3473924436232456e-06, "epoch": 2.241834216891017, "percentage": 44.84, "elapsed_time": "1:58:04", "remaining_time": "2:25:15", "throughput": 8732.78, "total_tokens": 61863464} +{"current_steps": 91770, "total_steps": 204665, "loss": 0.0389, "lr": 1.347312475863616e-06, "epoch": 2.241956367722864, "percentage": 44.84, "elapsed_time": "1:58:04", "remaining_time": "2:25:15", "throughput": 8732.84, "total_tokens": 61866920} +{"current_steps": 91775, "total_steps": 204665, "loss": 0.0905, "lr": 1.3472325055782366e-06, "epoch": 2.2420785185547114, "percentage": 44.84, "elapsed_time": "1:58:04", "remaining_time": "2:25:14", "throughput": 8732.86, "total_tokens": 61870056} +{"current_steps": 91780, "total_steps": 204665, "loss": 0.0055, "lr": 1.347152532767689e-06, "epoch": 2.2422006693865586, "percentage": 44.84, "elapsed_time": "1:58:05", "remaining_time": "2:25:14", "throughput": 8732.92, "total_tokens": 61873512} +{"current_steps": 91785, "total_steps": 204665, "loss": 0.0001, "lr": 1.347072557432555e-06, "epoch": 2.242322820218406, "percentage": 44.85, "elapsed_time": "1:58:05", "remaining_time": "2:25:13", "throughput": 8733.07, "total_tokens": 61877736} +{"current_steps": 91790, "total_steps": 204665, "loss": 0.1082, "lr": 1.3469925795734155e-06, "epoch": 2.242444971050253, "percentage": 44.85, "elapsed_time": "1:58:05", "remaining_time": "2:25:13", "throughput": 8733.08, "total_tokens": 61880808} +{"current_steps": 91795, "total_steps": 204665, "loss": 0.0716, "lr": 1.346912599190853e-06, "epoch": 2.2425671218821, "percentage": 44.85, "elapsed_time": "1:58:06", "remaining_time": "2:25:13", "throughput": 8733.18, "total_tokens": 61884648} +{"current_steps": 91800, "total_steps": 204665, "loss": 0.0777, "lr": 1.346832616285449e-06, "epoch": 2.2426892727139474, "percentage": 44.85, "elapsed_time": "1:58:06", "remaining_time": "2:25:12", "throughput": 8733.2, "total_tokens": 61887784} +{"current_steps": 91805, "total_steps": 204665, "loss": 0.0003, "lr": 1.3467526308577846e-06, "epoch": 2.2428114235457945, "percentage": 44.86, "elapsed_time": "1:58:06", "remaining_time": "2:25:12", "throughput": 8733.26, "total_tokens": 61891240} +{"current_steps": 91810, "total_steps": 204665, "loss": 0.0001, "lr": 1.3466726429084418e-06, "epoch": 2.2429335743776413, "percentage": 44.86, "elapsed_time": "1:58:07", "remaining_time": "2:25:11", "throughput": 8733.3, "total_tokens": 61894568} +{"current_steps": 91815, "total_steps": 204665, "loss": 0.1248, "lr": 1.3465926524380024e-06, "epoch": 2.2430557252094885, "percentage": 44.86, "elapsed_time": "1:58:07", "remaining_time": "2:25:11", "throughput": 8733.34, "total_tokens": 61897896} +{"current_steps": 91820, "total_steps": 204665, "loss": 0.0005, "lr": 1.3465126594470481e-06, "epoch": 2.2431778760413357, "percentage": 44.86, "elapsed_time": "1:58:07", "remaining_time": "2:25:10", "throughput": 8733.38, "total_tokens": 61901160} +{"current_steps": 91825, "total_steps": 204665, "loss": 0.0002, "lr": 1.3464326639361604e-06, "epoch": 2.243300026873183, "percentage": 44.87, "elapsed_time": "1:58:08", "remaining_time": "2:25:10", "throughput": 8733.42, "total_tokens": 61904424} +{"current_steps": 91830, "total_steps": 204665, "loss": 0.0004, "lr": 1.346352665905921e-06, "epoch": 2.24342217770503, "percentage": 44.87, "elapsed_time": "1:58:08", "remaining_time": "2:25:09", "throughput": 8733.46, "total_tokens": 61907752} +{"current_steps": 91835, "total_steps": 204665, "loss": 0.0002, "lr": 1.3462726653569121e-06, "epoch": 2.2435443285368772, "percentage": 44.87, "elapsed_time": "1:58:08", "remaining_time": "2:25:09", "throughput": 8733.46, "total_tokens": 61910760} +{"current_steps": 91840, "total_steps": 204665, "loss": 0.0691, "lr": 1.3461926622897153e-06, "epoch": 2.2436664793687244, "percentage": 44.87, "elapsed_time": "1:58:09", "remaining_time": "2:25:09", "throughput": 8733.52, "total_tokens": 61914216} +{"current_steps": 91845, "total_steps": 204665, "loss": 0.1318, "lr": 1.3461126567049123e-06, "epoch": 2.2437886302005716, "percentage": 44.88, "elapsed_time": "1:58:09", "remaining_time": "2:25:08", "throughput": 8733.55, "total_tokens": 61917416} +{"current_steps": 91850, "total_steps": 204665, "loss": 0.1174, "lr": 1.3460326486030849e-06, "epoch": 2.243910781032419, "percentage": 44.88, "elapsed_time": "1:58:09", "remaining_time": "2:25:08", "throughput": 8733.58, "total_tokens": 61920680} +{"current_steps": 91855, "total_steps": 204665, "loss": 0.0346, "lr": 1.345952637984815e-06, "epoch": 2.244032931864266, "percentage": 44.88, "elapsed_time": "1:58:10", "remaining_time": "2:25:07", "throughput": 8733.61, "total_tokens": 61923880} +{"current_steps": 91860, "total_steps": 204665, "loss": 0.0004, "lr": 1.3458726248506844e-06, "epoch": 2.244155082696113, "percentage": 44.88, "elapsed_time": "1:58:10", "remaining_time": "2:25:07", "throughput": 8733.62, "total_tokens": 61926888} +{"current_steps": 91865, "total_steps": 204665, "loss": 0.0443, "lr": 1.3457926092012752e-06, "epoch": 2.2442772335279604, "percentage": 44.89, "elapsed_time": "1:58:10", "remaining_time": "2:25:06", "throughput": 8733.68, "total_tokens": 61930344} +{"current_steps": 91870, "total_steps": 204665, "loss": 0.0434, "lr": 1.3457125910371692e-06, "epoch": 2.2443993843598076, "percentage": 44.89, "elapsed_time": "1:58:11", "remaining_time": "2:25:06", "throughput": 8733.72, "total_tokens": 61933736} +{"current_steps": 91875, "total_steps": 204665, "loss": 0.0007, "lr": 1.345632570358948e-06, "epoch": 2.2445215351916548, "percentage": 44.89, "elapsed_time": "1:58:11", "remaining_time": "2:25:06", "throughput": 8733.76, "total_tokens": 61937064} +{"current_steps": 91880, "total_steps": 204665, "loss": 0.0007, "lr": 1.345552547167194e-06, "epoch": 2.244643686023502, "percentage": 44.89, "elapsed_time": "1:58:12", "remaining_time": "2:25:05", "throughput": 8733.84, "total_tokens": 61940712} +{"current_steps": 91885, "total_steps": 204665, "loss": 0.0627, "lr": 1.345472521462489e-06, "epoch": 2.244765836855349, "percentage": 44.9, "elapsed_time": "1:58:12", "remaining_time": "2:25:05", "throughput": 8733.88, "total_tokens": 61944040} +{"current_steps": 91890, "total_steps": 204665, "loss": 0.0617, "lr": 1.3453924932454145e-06, "epoch": 2.2448879876871963, "percentage": 44.9, "elapsed_time": "1:58:12", "remaining_time": "2:25:04", "throughput": 8733.89, "total_tokens": 61947112} +{"current_steps": 91895, "total_steps": 204665, "loss": 0.0385, "lr": 1.3453124625165533e-06, "epoch": 2.2450101385190435, "percentage": 44.9, "elapsed_time": "1:58:13", "remaining_time": "2:25:04", "throughput": 8733.93, "total_tokens": 61950376} +{"current_steps": 91900, "total_steps": 204665, "loss": 0.0001, "lr": 1.3452324292764866e-06, "epoch": 2.2451322893508907, "percentage": 44.9, "elapsed_time": "1:58:13", "remaining_time": "2:25:03", "throughput": 8733.98, "total_tokens": 61953832} +{"current_steps": 91905, "total_steps": 204665, "loss": 0.053, "lr": 1.345152393525797e-06, "epoch": 2.2452544401827375, "percentage": 44.91, "elapsed_time": "1:58:13", "remaining_time": "2:25:03", "throughput": 8733.99, "total_tokens": 61956840} +{"current_steps": 91910, "total_steps": 204665, "loss": 0.0002, "lr": 1.3450723552650667e-06, "epoch": 2.2453765910145846, "percentage": 44.91, "elapsed_time": "1:58:14", "remaining_time": "2:25:03", "throughput": 8734.07, "total_tokens": 61960488} +{"current_steps": 91915, "total_steps": 204665, "loss": 0.0002, "lr": 1.3449923144948772e-06, "epoch": 2.245498741846432, "percentage": 44.91, "elapsed_time": "1:58:14", "remaining_time": "2:25:02", "throughput": 8734.09, "total_tokens": 61963688} +{"current_steps": 91920, "total_steps": 204665, "loss": 0.0002, "lr": 1.3449122712158106e-06, "epoch": 2.245620892678279, "percentage": 44.91, "elapsed_time": "1:58:14", "remaining_time": "2:25:02", "throughput": 8734.17, "total_tokens": 61967336} +{"current_steps": 91925, "total_steps": 204665, "loss": 0.0567, "lr": 1.3448322254284495e-06, "epoch": 2.245743043510126, "percentage": 44.91, "elapsed_time": "1:58:15", "remaining_time": "2:25:01", "throughput": 8734.23, "total_tokens": 61970792} +{"current_steps": 91930, "total_steps": 204665, "loss": 0.0668, "lr": 1.3447521771333754e-06, "epoch": 2.2458651943419734, "percentage": 44.92, "elapsed_time": "1:58:15", "remaining_time": "2:25:01", "throughput": 8734.26, "total_tokens": 61974056} +{"current_steps": 91935, "total_steps": 204665, "loss": 0.001, "lr": 1.344672126331171e-06, "epoch": 2.2459873451738206, "percentage": 44.92, "elapsed_time": "1:58:15", "remaining_time": "2:25:00", "throughput": 8734.29, "total_tokens": 61977256} +{"current_steps": 91940, "total_steps": 204665, "loss": 0.0492, "lr": 1.3445920730224177e-06, "epoch": 2.246109496005668, "percentage": 44.92, "elapsed_time": "1:58:16", "remaining_time": "2:25:00", "throughput": 8734.31, "total_tokens": 61980456} +{"current_steps": 91945, "total_steps": 204665, "loss": 0.0787, "lr": 1.3445120172076987e-06, "epoch": 2.246231646837515, "percentage": 44.92, "elapsed_time": "1:58:16", "remaining_time": "2:25:00", "throughput": 8734.36, "total_tokens": 61983848} +{"current_steps": 91950, "total_steps": 204665, "loss": 0.0002, "lr": 1.3444319588875955e-06, "epoch": 2.246353797669362, "percentage": 44.93, "elapsed_time": "1:58:16", "remaining_time": "2:24:59", "throughput": 8734.39, "total_tokens": 61987112} +{"current_steps": 91955, "total_steps": 204665, "loss": 0.0004, "lr": 1.3443518980626904e-06, "epoch": 2.2464759485012094, "percentage": 44.93, "elapsed_time": "1:58:17", "remaining_time": "2:24:59", "throughput": 8734.45, "total_tokens": 61990568} +{"current_steps": 91960, "total_steps": 204665, "loss": 0.0004, "lr": 1.3442718347335658e-06, "epoch": 2.2465980993330565, "percentage": 44.93, "elapsed_time": "1:58:17", "remaining_time": "2:24:58", "throughput": 8734.52, "total_tokens": 61994152} +{"current_steps": 91965, "total_steps": 204665, "loss": 0.039, "lr": 1.3441917689008038e-06, "epoch": 2.2467202501649037, "percentage": 44.93, "elapsed_time": "1:58:17", "remaining_time": "2:24:58", "throughput": 8734.59, "total_tokens": 61997736} +{"current_steps": 91970, "total_steps": 204665, "loss": 0.0001, "lr": 1.3441117005649867e-06, "epoch": 2.246842400996751, "percentage": 44.94, "elapsed_time": "1:58:18", "remaining_time": "2:24:57", "throughput": 8734.7, "total_tokens": 62001640} +{"current_steps": 91975, "total_steps": 204665, "loss": 0.0002, "lr": 1.3440316297266967e-06, "epoch": 2.246964551828598, "percentage": 44.94, "elapsed_time": "1:58:18", "remaining_time": "2:24:57", "throughput": 8734.72, "total_tokens": 62004776} +{"current_steps": 91980, "total_steps": 204665, "loss": 0.0322, "lr": 1.343951556386516e-06, "epoch": 2.2470867026604453, "percentage": 44.94, "elapsed_time": "1:58:19", "remaining_time": "2:24:57", "throughput": 8734.77, "total_tokens": 62008232} +{"current_steps": 91985, "total_steps": 204665, "loss": 0.0467, "lr": 1.343871480545027e-06, "epoch": 2.2472088534922925, "percentage": 44.94, "elapsed_time": "1:58:19", "remaining_time": "2:24:56", "throughput": 8734.8, "total_tokens": 62011496} +{"current_steps": 91990, "total_steps": 204665, "loss": 0.0005, "lr": 1.3437914022028122e-06, "epoch": 2.2473310043241392, "percentage": 44.95, "elapsed_time": "1:58:19", "remaining_time": "2:24:56", "throughput": 8734.85, "total_tokens": 62014888} +{"current_steps": 91995, "total_steps": 204665, "loss": 0.0544, "lr": 1.343711321360454e-06, "epoch": 2.2474531551559864, "percentage": 44.95, "elapsed_time": "1:58:20", "remaining_time": "2:24:55", "throughput": 8734.89, "total_tokens": 62018280} +{"current_steps": 92000, "total_steps": 204665, "loss": 0.0003, "lr": 1.3436312380185345e-06, "epoch": 2.2475753059878336, "percentage": 44.95, "elapsed_time": "1:58:20", "remaining_time": "2:24:55", "throughput": 8734.95, "total_tokens": 62021736} +{"current_steps": 92005, "total_steps": 204665, "loss": 0.0888, "lr": 1.3435511521776363e-06, "epoch": 2.247697456819681, "percentage": 44.95, "elapsed_time": "1:58:20", "remaining_time": "2:24:54", "throughput": 8735.01, "total_tokens": 62025192} +{"current_steps": 92010, "total_steps": 204665, "loss": 0.0008, "lr": 1.343471063838342e-06, "epoch": 2.247819607651528, "percentage": 44.96, "elapsed_time": "1:58:21", "remaining_time": "2:24:54", "throughput": 8735.09, "total_tokens": 62028904} +{"current_steps": 92015, "total_steps": 204665, "loss": 0.0931, "lr": 1.3433909730012334e-06, "epoch": 2.247941758483375, "percentage": 44.96, "elapsed_time": "1:58:21", "remaining_time": "2:24:54", "throughput": 8735.17, "total_tokens": 62032552} +{"current_steps": 92020, "total_steps": 204665, "loss": 0.0638, "lr": 1.3433108796668933e-06, "epoch": 2.2480639093152224, "percentage": 44.96, "elapsed_time": "1:58:21", "remaining_time": "2:24:53", "throughput": 8735.21, "total_tokens": 62035944} +{"current_steps": 92025, "total_steps": 204665, "loss": 0.0744, "lr": 1.3432307838359043e-06, "epoch": 2.2481860601470696, "percentage": 44.96, "elapsed_time": "1:58:22", "remaining_time": "2:24:53", "throughput": 8735.26, "total_tokens": 62039336} +{"current_steps": 92030, "total_steps": 204665, "loss": 0.1497, "lr": 1.3431506855088483e-06, "epoch": 2.2483082109789168, "percentage": 44.97, "elapsed_time": "1:58:22", "remaining_time": "2:24:52", "throughput": 8735.27, "total_tokens": 62042408} +{"current_steps": 92035, "total_steps": 204665, "loss": 0.0363, "lr": 1.3430705846863086e-06, "epoch": 2.248430361810764, "percentage": 44.97, "elapsed_time": "1:58:22", "remaining_time": "2:24:52", "throughput": 8735.34, "total_tokens": 62045992} +{"current_steps": 92040, "total_steps": 204665, "loss": 0.0425, "lr": 1.3429904813688674e-06, "epoch": 2.248552512642611, "percentage": 44.97, "elapsed_time": "1:58:23", "remaining_time": "2:24:51", "throughput": 8735.41, "total_tokens": 62049576} +{"current_steps": 92045, "total_steps": 204665, "loss": 0.08, "lr": 1.3429103755571066e-06, "epoch": 2.2486746634744583, "percentage": 44.97, "elapsed_time": "1:58:23", "remaining_time": "2:24:51", "throughput": 8735.44, "total_tokens": 62052840} +{"current_steps": 92050, "total_steps": 204665, "loss": 0.0002, "lr": 1.34283026725161e-06, "epoch": 2.2487968143063055, "percentage": 44.98, "elapsed_time": "1:58:23", "remaining_time": "2:24:51", "throughput": 8735.45, "total_tokens": 62055912} +{"current_steps": 92055, "total_steps": 204665, "loss": 0.0343, "lr": 1.342750156452959e-06, "epoch": 2.2489189651381527, "percentage": 44.98, "elapsed_time": "1:58:24", "remaining_time": "2:24:50", "throughput": 8735.5, "total_tokens": 62059304} +{"current_steps": 92060, "total_steps": 204665, "loss": 0.0355, "lr": 1.342670043161737e-06, "epoch": 2.24904111597, "percentage": 44.98, "elapsed_time": "1:58:24", "remaining_time": "2:24:50", "throughput": 8735.78, "total_tokens": 62064808} +{"current_steps": 92065, "total_steps": 204665, "loss": 0.0001, "lr": 1.3425899273785262e-06, "epoch": 2.249163266801847, "percentage": 44.98, "elapsed_time": "1:58:25", "remaining_time": "2:24:49", "throughput": 8735.82, "total_tokens": 62068136} +{"current_steps": 92070, "total_steps": 204665, "loss": 0.0363, "lr": 1.3425098091039095e-06, "epoch": 2.2492854176336943, "percentage": 44.99, "elapsed_time": "1:58:25", "remaining_time": "2:24:49", "throughput": 8735.79, "total_tokens": 62070888} +{"current_steps": 92075, "total_steps": 204665, "loss": 0.0171, "lr": 1.3424296883384688e-06, "epoch": 2.249407568465541, "percentage": 44.99, "elapsed_time": "1:58:25", "remaining_time": "2:24:48", "throughput": 8735.79, "total_tokens": 62073832} +{"current_steps": 92080, "total_steps": 204665, "loss": 0.0, "lr": 1.3423495650827877e-06, "epoch": 2.249529719297388, "percentage": 44.99, "elapsed_time": "1:58:26", "remaining_time": "2:24:48", "throughput": 8735.84, "total_tokens": 62077288} +{"current_steps": 92085, "total_steps": 204665, "loss": 0.0009, "lr": 1.3422694393374484e-06, "epoch": 2.2496518701292354, "percentage": 44.99, "elapsed_time": "1:58:26", "remaining_time": "2:24:48", "throughput": 8735.85, "total_tokens": 62080360} +{"current_steps": 92090, "total_steps": 204665, "loss": 0.0001, "lr": 1.3421893111030338e-06, "epoch": 2.2497740209610826, "percentage": 45.0, "elapsed_time": "1:58:26", "remaining_time": "2:24:47", "throughput": 8735.87, "total_tokens": 62083496} +{"current_steps": 92095, "total_steps": 204665, "loss": 0.1333, "lr": 1.3421091803801262e-06, "epoch": 2.2498961717929298, "percentage": 45.0, "elapsed_time": "1:58:27", "remaining_time": "2:24:47", "throughput": 8735.9, "total_tokens": 62086760} +{"current_steps": 92100, "total_steps": 204665, "loss": 0.0001, "lr": 1.342029047169309e-06, "epoch": 2.250018322624777, "percentage": 45.0, "elapsed_time": "1:58:27", "remaining_time": "2:24:46", "throughput": 8735.94, "total_tokens": 62090024} +{"current_steps": 92105, "total_steps": 204665, "loss": 0.0001, "lr": 1.341948911471164e-06, "epoch": 2.250140473456624, "percentage": 45.0, "elapsed_time": "1:58:27", "remaining_time": "2:24:46", "throughput": 8735.96, "total_tokens": 62093160} +{"current_steps": 92106, "total_steps": 204665, "eval_loss": 0.2040521502494812, "epoch": 2.2501649036229936, "percentage": 45.0, "elapsed_time": "1:59:15", "remaining_time": "2:25:44", "throughput": 8678.0, "total_tokens": 62093992} +{"current_steps": 92110, "total_steps": 204665, "loss": 0.0001, "lr": 1.341868773286275e-06, "epoch": 2.2502626242884713, "percentage": 45.01, "elapsed_time": "1:59:52", "remaining_time": "2:26:28", "throughput": 8633.88, "total_tokens": 62097000} +{"current_steps": 92115, "total_steps": 204665, "loss": 0.0002, "lr": 1.3417886326152247e-06, "epoch": 2.2503847751203185, "percentage": 45.01, "elapsed_time": "1:59:52", "remaining_time": "2:26:28", "throughput": 8633.89, "total_tokens": 62100072} +{"current_steps": 92120, "total_steps": 204665, "loss": 0.043, "lr": 1.3417084894585948e-06, "epoch": 2.2505069259521657, "percentage": 45.01, "elapsed_time": "1:59:52", "remaining_time": "2:26:27", "throughput": 8633.92, "total_tokens": 62103272} +{"current_steps": 92125, "total_steps": 204665, "loss": 0.0002, "lr": 1.341628343816969e-06, "epoch": 2.250629076784013, "percentage": 45.01, "elapsed_time": "1:59:53", "remaining_time": "2:26:27", "throughput": 8633.94, "total_tokens": 62106344} +{"current_steps": 92130, "total_steps": 204665, "loss": 0.0598, "lr": 1.3415481956909305e-06, "epoch": 2.25075122761586, "percentage": 45.02, "elapsed_time": "1:59:53", "remaining_time": "2:26:26", "throughput": 8633.97, "total_tokens": 62109480} +{"current_steps": 92135, "total_steps": 204665, "loss": 0.1393, "lr": 1.341468045081061e-06, "epoch": 2.2508733784477073, "percentage": 45.02, "elapsed_time": "1:59:53", "remaining_time": "2:26:26", "throughput": 8634.04, "total_tokens": 62113064} +{"current_steps": 92140, "total_steps": 204665, "loss": 0.1327, "lr": 1.3413878919879443e-06, "epoch": 2.2509955292795545, "percentage": 45.02, "elapsed_time": "1:59:54", "remaining_time": "2:26:26", "throughput": 8634.13, "total_tokens": 62116776} +{"current_steps": 92145, "total_steps": 204665, "loss": 0.0002, "lr": 1.341307736412163e-06, "epoch": 2.2511176801114017, "percentage": 45.02, "elapsed_time": "1:59:54", "remaining_time": "2:26:25", "throughput": 8634.18, "total_tokens": 62120168} +{"current_steps": 92150, "total_steps": 204665, "loss": 0.0004, "lr": 1.3412275783543002e-06, "epoch": 2.251239830943249, "percentage": 45.02, "elapsed_time": "1:59:55", "remaining_time": "2:26:25", "throughput": 8634.21, "total_tokens": 62123304} +{"current_steps": 92155, "total_steps": 204665, "loss": 0.0531, "lr": 1.3411474178149384e-06, "epoch": 2.251361981775096, "percentage": 45.03, "elapsed_time": "1:59:55", "remaining_time": "2:26:24", "throughput": 8634.27, "total_tokens": 62126824} +{"current_steps": 92160, "total_steps": 204665, "loss": 0.0002, "lr": 1.341067254794661e-06, "epoch": 2.2514841326069432, "percentage": 45.03, "elapsed_time": "1:59:55", "remaining_time": "2:26:24", "throughput": 8634.38, "total_tokens": 62130664} +{"current_steps": 92165, "total_steps": 204665, "loss": 0.0002, "lr": 1.340987089294051e-06, "epoch": 2.2516062834387904, "percentage": 45.03, "elapsed_time": "1:59:56", "remaining_time": "2:26:23", "throughput": 8634.44, "total_tokens": 62134184} +{"current_steps": 92170, "total_steps": 204665, "loss": 0.0002, "lr": 1.3409069213136908e-06, "epoch": 2.251728434270637, "percentage": 45.03, "elapsed_time": "1:59:56", "remaining_time": "2:26:23", "throughput": 8634.51, "total_tokens": 62137640} +{"current_steps": 92175, "total_steps": 204665, "loss": 0.0003, "lr": 1.3408267508541639e-06, "epoch": 2.2518505851024844, "percentage": 45.04, "elapsed_time": "1:59:56", "remaining_time": "2:26:22", "throughput": 8634.54, "total_tokens": 62140904} +{"current_steps": 92180, "total_steps": 204665, "loss": 0.0098, "lr": 1.3407465779160532e-06, "epoch": 2.2519727359343316, "percentage": 45.04, "elapsed_time": "1:59:57", "remaining_time": "2:26:22", "throughput": 8634.61, "total_tokens": 62144424} +{"current_steps": 92185, "total_steps": 204665, "loss": 0.0006, "lr": 1.3406664024999417e-06, "epoch": 2.2520948867661787, "percentage": 45.04, "elapsed_time": "1:59:57", "remaining_time": "2:26:22", "throughput": 8634.62, "total_tokens": 62147432} +{"current_steps": 92190, "total_steps": 204665, "loss": 0.038, "lr": 1.3405862246064126e-06, "epoch": 2.252217037598026, "percentage": 45.04, "elapsed_time": "1:59:57", "remaining_time": "2:26:21", "throughput": 8634.62, "total_tokens": 62150312} +{"current_steps": 92195, "total_steps": 204665, "loss": 0.1214, "lr": 1.3405060442360488e-06, "epoch": 2.252339188429873, "percentage": 45.05, "elapsed_time": "1:59:58", "remaining_time": "2:26:21", "throughput": 8634.68, "total_tokens": 62153768} +{"current_steps": 92200, "total_steps": 204665, "loss": 0.0414, "lr": 1.3404258613894332e-06, "epoch": 2.2524613392617203, "percentage": 45.05, "elapsed_time": "1:59:58", "remaining_time": "2:26:20", "throughput": 8634.73, "total_tokens": 62157160} +{"current_steps": 92205, "total_steps": 204665, "loss": 0.0925, "lr": 1.3403456760671494e-06, "epoch": 2.2525834900935675, "percentage": 45.05, "elapsed_time": "1:59:58", "remaining_time": "2:26:20", "throughput": 8634.81, "total_tokens": 62160744} +{"current_steps": 92210, "total_steps": 204665, "loss": 0.0628, "lr": 1.3402654882697803e-06, "epoch": 2.2527056409254147, "percentage": 45.05, "elapsed_time": "1:59:59", "remaining_time": "2:26:19", "throughput": 8634.82, "total_tokens": 62163816} +{"current_steps": 92215, "total_steps": 204665, "loss": 0.0385, "lr": 1.3401852979979094e-06, "epoch": 2.252827791757262, "percentage": 45.06, "elapsed_time": "1:59:59", "remaining_time": "2:26:19", "throughput": 8634.87, "total_tokens": 62167144} +{"current_steps": 92220, "total_steps": 204665, "loss": 0.0001, "lr": 1.340105105252119e-06, "epoch": 2.252949942589109, "percentage": 45.06, "elapsed_time": "1:59:59", "remaining_time": "2:26:18", "throughput": 8634.95, "total_tokens": 62170792} +{"current_steps": 92225, "total_steps": 204665, "loss": 0.0983, "lr": 1.3400249100329932e-06, "epoch": 2.2530720934209563, "percentage": 45.06, "elapsed_time": "2:00:00", "remaining_time": "2:26:18", "throughput": 8635.01, "total_tokens": 62174248} +{"current_steps": 92230, "total_steps": 204665, "loss": 0.0312, "lr": 1.3399447123411146e-06, "epoch": 2.2531942442528035, "percentage": 45.06, "elapsed_time": "2:00:00", "remaining_time": "2:26:18", "throughput": 8635.04, "total_tokens": 62177384} +{"current_steps": 92235, "total_steps": 204665, "loss": 0.0005, "lr": 1.3398645121770664e-06, "epoch": 2.2533163950846506, "percentage": 45.07, "elapsed_time": "2:00:00", "remaining_time": "2:26:17", "throughput": 8635.08, "total_tokens": 62180712} +{"current_steps": 92240, "total_steps": 204665, "loss": 0.0003, "lr": 1.3397843095414324e-06, "epoch": 2.253438545916498, "percentage": 45.07, "elapsed_time": "2:00:01", "remaining_time": "2:26:17", "throughput": 8635.13, "total_tokens": 62184104} +{"current_steps": 92245, "total_steps": 204665, "loss": 0.0966, "lr": 1.3397041044347953e-06, "epoch": 2.253560696748345, "percentage": 45.07, "elapsed_time": "2:00:01", "remaining_time": "2:26:16", "throughput": 8635.21, "total_tokens": 62187752} +{"current_steps": 92250, "total_steps": 204665, "loss": 0.0053, "lr": 1.3396238968577387e-06, "epoch": 2.253682847580192, "percentage": 45.07, "elapsed_time": "2:00:02", "remaining_time": "2:26:16", "throughput": 8635.28, "total_tokens": 62191272} +{"current_steps": 92255, "total_steps": 204665, "loss": 0.0002, "lr": 1.339543686810846e-06, "epoch": 2.253804998412039, "percentage": 45.08, "elapsed_time": "2:00:02", "remaining_time": "2:26:15", "throughput": 8635.31, "total_tokens": 62194536} +{"current_steps": 92260, "total_steps": 204665, "loss": 0.0527, "lr": 1.3394634742946998e-06, "epoch": 2.2539271492438866, "percentage": 45.08, "elapsed_time": "2:00:02", "remaining_time": "2:26:15", "throughput": 8635.33, "total_tokens": 62197608} +{"current_steps": 92265, "total_steps": 204665, "loss": 0.0004, "lr": 1.339383259309884e-06, "epoch": 2.2540493000757333, "percentage": 45.08, "elapsed_time": "2:00:03", "remaining_time": "2:26:14", "throughput": 8635.35, "total_tokens": 62200744} +{"current_steps": 92270, "total_steps": 204665, "loss": 0.0025, "lr": 1.3393030418569821e-06, "epoch": 2.2541714509075805, "percentage": 45.08, "elapsed_time": "2:00:03", "remaining_time": "2:26:14", "throughput": 8635.41, "total_tokens": 62204200} +{"current_steps": 92275, "total_steps": 204665, "loss": 0.1194, "lr": 1.3392228219365772e-06, "epoch": 2.2542936017394277, "percentage": 45.09, "elapsed_time": "2:00:03", "remaining_time": "2:26:14", "throughput": 8635.46, "total_tokens": 62207656} +{"current_steps": 92280, "total_steps": 204665, "loss": 0.0002, "lr": 1.3391425995492524e-06, "epoch": 2.254415752571275, "percentage": 45.09, "elapsed_time": "2:00:04", "remaining_time": "2:26:13", "throughput": 8635.56, "total_tokens": 62211432} +{"current_steps": 92285, "total_steps": 204665, "loss": 0.0002, "lr": 1.3390623746955918e-06, "epoch": 2.254537903403122, "percentage": 45.09, "elapsed_time": "2:00:04", "remaining_time": "2:26:13", "throughput": 8635.62, "total_tokens": 62214952} +{"current_steps": 92290, "total_steps": 204665, "loss": 0.0381, "lr": 1.3389821473761783e-06, "epoch": 2.2546600542349693, "percentage": 45.09, "elapsed_time": "2:00:04", "remaining_time": "2:26:12", "throughput": 8635.67, "total_tokens": 62218344} +{"current_steps": 92295, "total_steps": 204665, "loss": 0.0961, "lr": 1.338901917591595e-06, "epoch": 2.2547822050668165, "percentage": 45.1, "elapsed_time": "2:00:05", "remaining_time": "2:26:12", "throughput": 8635.75, "total_tokens": 62221992} +{"current_steps": 92300, "total_steps": 204665, "loss": 0.0986, "lr": 1.3388216853424262e-06, "epoch": 2.2549043558986637, "percentage": 45.1, "elapsed_time": "2:00:05", "remaining_time": "2:26:11", "throughput": 8635.79, "total_tokens": 62225320} +{"current_steps": 92305, "total_steps": 204665, "loss": 0.0002, "lr": 1.3387414506292548e-06, "epoch": 2.255026506730511, "percentage": 45.1, "elapsed_time": "2:00:05", "remaining_time": "2:26:11", "throughput": 8635.79, "total_tokens": 62228264} +{"current_steps": 92310, "total_steps": 204665, "loss": 0.0362, "lr": 1.3386612134526648e-06, "epoch": 2.255148657562358, "percentage": 45.1, "elapsed_time": "2:00:06", "remaining_time": "2:26:11", "throughput": 8635.83, "total_tokens": 62231528} +{"current_steps": 92315, "total_steps": 204665, "loss": 0.0891, "lr": 1.3385809738132392e-06, "epoch": 2.2552708083942052, "percentage": 45.11, "elapsed_time": "2:00:06", "remaining_time": "2:26:10", "throughput": 8635.9, "total_tokens": 62235048} +{"current_steps": 92320, "total_steps": 204665, "loss": 0.0002, "lr": 1.3385007317115614e-06, "epoch": 2.2553929592260524, "percentage": 45.11, "elapsed_time": "2:00:06", "remaining_time": "2:26:10", "throughput": 8635.91, "total_tokens": 62238120} +{"current_steps": 92325, "total_steps": 204665, "loss": 0.0315, "lr": 1.3384204871482156e-06, "epoch": 2.2555151100578996, "percentage": 45.11, "elapsed_time": "2:00:07", "remaining_time": "2:26:09", "throughput": 8635.97, "total_tokens": 62241576} +{"current_steps": 92330, "total_steps": 204665, "loss": 0.0003, "lr": 1.3383402401237848e-06, "epoch": 2.255637260889747, "percentage": 45.11, "elapsed_time": "2:00:07", "remaining_time": "2:26:09", "throughput": 8636.01, "total_tokens": 62244840} +{"current_steps": 92335, "total_steps": 204665, "loss": 0.0001, "lr": 1.3382599906388529e-06, "epoch": 2.255759411721594, "percentage": 45.12, "elapsed_time": "2:00:07", "remaining_time": "2:26:08", "throughput": 8635.99, "total_tokens": 62247592} +{"current_steps": 92340, "total_steps": 204665, "loss": 0.0001, "lr": 1.3381797386940035e-06, "epoch": 2.2558815625534407, "percentage": 45.12, "elapsed_time": "2:00:08", "remaining_time": "2:26:08", "throughput": 8636.05, "total_tokens": 62251112} +{"current_steps": 92345, "total_steps": 204665, "loss": 0.0969, "lr": 1.33809948428982e-06, "epoch": 2.2560037133852884, "percentage": 45.12, "elapsed_time": "2:00:08", "remaining_time": "2:26:07", "throughput": 8636.07, "total_tokens": 62254184} +{"current_steps": 92350, "total_steps": 204665, "loss": 0.0001, "lr": 1.3380192274268859e-06, "epoch": 2.256125864217135, "percentage": 45.12, "elapsed_time": "2:00:08", "remaining_time": "2:26:07", "throughput": 8636.2, "total_tokens": 62258216} +{"current_steps": 92355, "total_steps": 204665, "loss": 0.0489, "lr": 1.337938968105785e-06, "epoch": 2.2562480150489823, "percentage": 45.12, "elapsed_time": "2:00:09", "remaining_time": "2:26:07", "throughput": 8636.26, "total_tokens": 62261736} +{"current_steps": 92360, "total_steps": 204665, "loss": 0.0563, "lr": 1.337858706327101e-06, "epoch": 2.2563701658808295, "percentage": 45.13, "elapsed_time": "2:00:09", "remaining_time": "2:26:06", "throughput": 8636.31, "total_tokens": 62265064} +{"current_steps": 92365, "total_steps": 204665, "loss": 0.0648, "lr": 1.337778442091418e-06, "epoch": 2.2564923167126767, "percentage": 45.13, "elapsed_time": "2:00:10", "remaining_time": "2:26:06", "throughput": 8636.32, "total_tokens": 62268072} +{"current_steps": 92370, "total_steps": 204665, "loss": 0.0, "lr": 1.337698175399319e-06, "epoch": 2.256614467544524, "percentage": 45.13, "elapsed_time": "2:00:10", "remaining_time": "2:26:05", "throughput": 8636.39, "total_tokens": 62271592} +{"current_steps": 92375, "total_steps": 204665, "loss": 0.0006, "lr": 1.3376179062513884e-06, "epoch": 2.256736618376371, "percentage": 45.13, "elapsed_time": "2:00:10", "remaining_time": "2:26:05", "throughput": 8636.4, "total_tokens": 62274600} +{"current_steps": 92380, "total_steps": 204665, "loss": 0.0789, "lr": 1.3375376346482094e-06, "epoch": 2.2568587692082183, "percentage": 45.14, "elapsed_time": "2:00:11", "remaining_time": "2:26:04", "throughput": 8636.45, "total_tokens": 62277992} +{"current_steps": 92385, "total_steps": 204665, "loss": 0.0431, "lr": 1.337457360590366e-06, "epoch": 2.2569809200400655, "percentage": 45.14, "elapsed_time": "2:00:11", "remaining_time": "2:26:04", "throughput": 8636.56, "total_tokens": 62281896} +{"current_steps": 92390, "total_steps": 204665, "loss": 0.0203, "lr": 1.3373770840784416e-06, "epoch": 2.2571030708719126, "percentage": 45.14, "elapsed_time": "2:00:11", "remaining_time": "2:26:03", "throughput": 8636.65, "total_tokens": 62285544} +{"current_steps": 92395, "total_steps": 204665, "loss": 0.1419, "lr": 1.3372968051130205e-06, "epoch": 2.25722522170376, "percentage": 45.14, "elapsed_time": "2:00:12", "remaining_time": "2:26:03", "throughput": 8636.73, "total_tokens": 62289192} +{"current_steps": 92400, "total_steps": 204665, "loss": 0.0342, "lr": 1.3372165236946864e-06, "epoch": 2.257347372535607, "percentage": 45.15, "elapsed_time": "2:00:12", "remaining_time": "2:26:03", "throughput": 8636.76, "total_tokens": 62292392} +{"current_steps": 92405, "total_steps": 204665, "loss": 0.0007, "lr": 1.3371362398240228e-06, "epoch": 2.257469523367454, "percentage": 45.15, "elapsed_time": "2:00:12", "remaining_time": "2:26:02", "throughput": 8636.76, "total_tokens": 62295272} +{"current_steps": 92410, "total_steps": 204665, "loss": 0.0346, "lr": 1.3370559535016138e-06, "epoch": 2.2575916741993014, "percentage": 45.15, "elapsed_time": "2:00:13", "remaining_time": "2:26:02", "throughput": 8636.84, "total_tokens": 62298920} +{"current_steps": 92415, "total_steps": 204665, "loss": 0.1336, "lr": 1.3369756647280436e-06, "epoch": 2.2577138250311486, "percentage": 45.15, "elapsed_time": "2:00:13", "remaining_time": "2:26:01", "throughput": 8636.87, "total_tokens": 62302120} +{"current_steps": 92420, "total_steps": 204665, "loss": 0.0003, "lr": 1.3368953735038951e-06, "epoch": 2.257835975862996, "percentage": 45.16, "elapsed_time": "2:00:13", "remaining_time": "2:26:01", "throughput": 8636.89, "total_tokens": 62305192} +{"current_steps": 92425, "total_steps": 204665, "loss": 0.0837, "lr": 1.3368150798297535e-06, "epoch": 2.257958126694843, "percentage": 45.16, "elapsed_time": "2:00:14", "remaining_time": "2:26:00", "throughput": 8636.96, "total_tokens": 62308776} +{"current_steps": 92430, "total_steps": 204665, "loss": 0.0004, "lr": 1.336734783706202e-06, "epoch": 2.25808027752669, "percentage": 45.16, "elapsed_time": "2:00:14", "remaining_time": "2:26:00", "throughput": 8636.97, "total_tokens": 62311848} +{"current_steps": 92435, "total_steps": 204665, "loss": 0.0527, "lr": 1.3366544851338244e-06, "epoch": 2.258202428358537, "percentage": 45.16, "elapsed_time": "2:00:14", "remaining_time": "2:25:59", "throughput": 8637.1, "total_tokens": 62315944} +{"current_steps": 92440, "total_steps": 204665, "loss": 0.0002, "lr": 1.3365741841132047e-06, "epoch": 2.258324579190384, "percentage": 45.17, "elapsed_time": "2:00:15", "remaining_time": "2:25:59", "throughput": 8637.19, "total_tokens": 62319656} +{"current_steps": 92445, "total_steps": 204665, "loss": 0.1209, "lr": 1.3364938806449271e-06, "epoch": 2.2584467300222313, "percentage": 45.17, "elapsed_time": "2:00:15", "remaining_time": "2:25:59", "throughput": 8637.28, "total_tokens": 62323368} +{"current_steps": 92450, "total_steps": 204665, "loss": 0.0005, "lr": 1.3364135747295752e-06, "epoch": 2.2585688808540785, "percentage": 45.17, "elapsed_time": "2:00:15", "remaining_time": "2:25:58", "throughput": 8637.32, "total_tokens": 62326632} +{"current_steps": 92455, "total_steps": 204665, "loss": 0.0502, "lr": 1.3363332663677338e-06, "epoch": 2.2586910316859257, "percentage": 45.17, "elapsed_time": "2:00:16", "remaining_time": "2:25:58", "throughput": 8637.44, "total_tokens": 62330664} +{"current_steps": 92460, "total_steps": 204665, "loss": 0.0453, "lr": 1.3362529555599861e-06, "epoch": 2.258813182517773, "percentage": 45.18, "elapsed_time": "2:00:16", "remaining_time": "2:25:57", "throughput": 8637.51, "total_tokens": 62334248} +{"current_steps": 92465, "total_steps": 204665, "loss": 0.0003, "lr": 1.3361726423069165e-06, "epoch": 2.25893533334962, "percentage": 45.18, "elapsed_time": "2:00:17", "remaining_time": "2:25:57", "throughput": 8637.58, "total_tokens": 62337768} +{"current_steps": 92470, "total_steps": 204665, "loss": 0.0844, "lr": 1.3360923266091093e-06, "epoch": 2.2590574841814672, "percentage": 45.18, "elapsed_time": "2:00:17", "remaining_time": "2:25:56", "throughput": 8637.62, "total_tokens": 62341096} +{"current_steps": 92475, "total_steps": 204665, "loss": 0.0006, "lr": 1.3360120084671481e-06, "epoch": 2.2591796350133144, "percentage": 45.18, "elapsed_time": "2:00:17", "remaining_time": "2:25:56", "throughput": 8637.69, "total_tokens": 62344616} +{"current_steps": 92480, "total_steps": 204665, "loss": 0.001, "lr": 1.3359316878816174e-06, "epoch": 2.2593017858451616, "percentage": 45.19, "elapsed_time": "2:00:18", "remaining_time": "2:25:56", "throughput": 8637.74, "total_tokens": 62347944} +{"current_steps": 92485, "total_steps": 204665, "loss": 0.0912, "lr": 1.3358513648531008e-06, "epoch": 2.259423936677009, "percentage": 45.19, "elapsed_time": "2:00:18", "remaining_time": "2:25:55", "throughput": 8637.77, "total_tokens": 62351144} +{"current_steps": 92490, "total_steps": 204665, "loss": 0.0002, "lr": 1.3357710393821828e-06, "epoch": 2.259546087508856, "percentage": 45.19, "elapsed_time": "2:00:18", "remaining_time": "2:25:55", "throughput": 8637.78, "total_tokens": 62354216} +{"current_steps": 92495, "total_steps": 204665, "loss": 0.1095, "lr": 1.3356907114694477e-06, "epoch": 2.259668238340703, "percentage": 45.19, "elapsed_time": "2:00:19", "remaining_time": "2:25:54", "throughput": 8637.81, "total_tokens": 62357416} +{"current_steps": 92500, "total_steps": 204665, "loss": 0.1062, "lr": 1.3356103811154792e-06, "epoch": 2.2597903891725504, "percentage": 45.2, "elapsed_time": "2:00:19", "remaining_time": "2:25:54", "throughput": 8637.94, "total_tokens": 62361448} +{"current_steps": 92505, "total_steps": 204665, "loss": 0.0005, "lr": 1.3355300483208621e-06, "epoch": 2.2599125400043976, "percentage": 45.2, "elapsed_time": "2:00:19", "remaining_time": "2:25:53", "throughput": 8638.01, "total_tokens": 62365032} +{"current_steps": 92510, "total_steps": 204665, "loss": 0.0007, "lr": 1.3354497130861796e-06, "epoch": 2.2600346908362448, "percentage": 45.2, "elapsed_time": "2:00:20", "remaining_time": "2:25:53", "throughput": 8638.03, "total_tokens": 62368168} +{"current_steps": 92515, "total_steps": 204665, "loss": 0.0373, "lr": 1.335369375412017e-06, "epoch": 2.260156841668092, "percentage": 45.2, "elapsed_time": "2:00:20", "remaining_time": "2:25:52", "throughput": 8638.07, "total_tokens": 62371368} +{"current_steps": 92520, "total_steps": 204665, "loss": 0.0004, "lr": 1.335289035298958e-06, "epoch": 2.2602789924999387, "percentage": 45.21, "elapsed_time": "2:00:20", "remaining_time": "2:25:52", "throughput": 8638.11, "total_tokens": 62374760} +{"current_steps": 92525, "total_steps": 204665, "loss": 0.1364, "lr": 1.3352086927475872e-06, "epoch": 2.2604011433317863, "percentage": 45.21, "elapsed_time": "2:00:21", "remaining_time": "2:25:52", "throughput": 8638.11, "total_tokens": 62377704} +{"current_steps": 92530, "total_steps": 204665, "loss": 0.0001, "lr": 1.3351283477584883e-06, "epoch": 2.260523294163633, "percentage": 45.21, "elapsed_time": "2:00:21", "remaining_time": "2:25:51", "throughput": 8638.14, "total_tokens": 62380904} +{"current_steps": 92535, "total_steps": 204665, "loss": 0.0002, "lr": 1.3350480003322463e-06, "epoch": 2.2606454449954803, "percentage": 45.21, "elapsed_time": "2:00:21", "remaining_time": "2:25:51", "throughput": 8638.2, "total_tokens": 62384424} +{"current_steps": 92540, "total_steps": 204665, "loss": 0.0008, "lr": 1.334967650469445e-06, "epoch": 2.2607675958273274, "percentage": 45.22, "elapsed_time": "2:00:22", "remaining_time": "2:25:50", "throughput": 8638.24, "total_tokens": 62387688} +{"current_steps": 92545, "total_steps": 204665, "loss": 0.0004, "lr": 1.3348872981706685e-06, "epoch": 2.2608897466591746, "percentage": 45.22, "elapsed_time": "2:00:22", "remaining_time": "2:25:50", "throughput": 8638.31, "total_tokens": 62391272} +{"current_steps": 92550, "total_steps": 204665, "loss": 0.0563, "lr": 1.3348069434365017e-06, "epoch": 2.261011897491022, "percentage": 45.22, "elapsed_time": "2:00:22", "remaining_time": "2:25:49", "throughput": 8638.34, "total_tokens": 62394536} +{"current_steps": 92555, "total_steps": 204665, "loss": 0.0943, "lr": 1.3347265862675288e-06, "epoch": 2.261134048322869, "percentage": 45.22, "elapsed_time": "2:00:23", "remaining_time": "2:25:49", "throughput": 8638.39, "total_tokens": 62397864} +{"current_steps": 92560, "total_steps": 204665, "loss": 0.0003, "lr": 1.3346462266643342e-06, "epoch": 2.261256199154716, "percentage": 45.23, "elapsed_time": "2:00:23", "remaining_time": "2:25:49", "throughput": 8638.43, "total_tokens": 62401256} +{"current_steps": 92565, "total_steps": 204665, "loss": 0.0395, "lr": 1.334565864627502e-06, "epoch": 2.2613783499865634, "percentage": 45.23, "elapsed_time": "2:00:24", "remaining_time": "2:25:48", "throughput": 8638.48, "total_tokens": 62404648} +{"current_steps": 92570, "total_steps": 204665, "loss": 0.0004, "lr": 1.334485500157617e-06, "epoch": 2.2615005008184106, "percentage": 45.23, "elapsed_time": "2:00:24", "remaining_time": "2:25:48", "throughput": 8638.52, "total_tokens": 62407976} +{"current_steps": 92575, "total_steps": 204665, "loss": 0.0003, "lr": 1.3344051332552635e-06, "epoch": 2.2616226516502578, "percentage": 45.23, "elapsed_time": "2:00:24", "remaining_time": "2:25:47", "throughput": 8638.56, "total_tokens": 62411240} +{"current_steps": 92580, "total_steps": 204665, "loss": 0.0004, "lr": 1.334324763921026e-06, "epoch": 2.261744802482105, "percentage": 45.23, "elapsed_time": "2:00:25", "remaining_time": "2:25:47", "throughput": 8638.64, "total_tokens": 62414952} +{"current_steps": 92585, "total_steps": 204665, "loss": 0.0822, "lr": 1.334244392155489e-06, "epoch": 2.261866953313952, "percentage": 45.24, "elapsed_time": "2:00:25", "remaining_time": "2:25:46", "throughput": 8638.65, "total_tokens": 62417960} +{"current_steps": 92590, "total_steps": 204665, "loss": 0.004, "lr": 1.3341640179592363e-06, "epoch": 2.2619891041457993, "percentage": 45.24, "elapsed_time": "2:00:25", "remaining_time": "2:25:46", "throughput": 8638.79, "total_tokens": 62422120} +{"current_steps": 92595, "total_steps": 204665, "loss": 0.1738, "lr": 1.3340836413328536e-06, "epoch": 2.2621112549776465, "percentage": 45.24, "elapsed_time": "2:00:26", "remaining_time": "2:25:45", "throughput": 8638.82, "total_tokens": 62425384} +{"current_steps": 92600, "total_steps": 204665, "loss": 0.0003, "lr": 1.3340032622769245e-06, "epoch": 2.2622334058094937, "percentage": 45.24, "elapsed_time": "2:00:26", "remaining_time": "2:25:45", "throughput": 8638.84, "total_tokens": 62428584} +{"current_steps": 92605, "total_steps": 204665, "loss": 0.0002, "lr": 1.3339228807920337e-06, "epoch": 2.262355556641341, "percentage": 45.25, "elapsed_time": "2:00:26", "remaining_time": "2:25:45", "throughput": 8638.93, "total_tokens": 62432296} +{"current_steps": 92610, "total_steps": 204665, "loss": 0.0002, "lr": 1.333842496878766e-06, "epoch": 2.262477707473188, "percentage": 45.25, "elapsed_time": "2:00:27", "remaining_time": "2:25:44", "throughput": 8638.99, "total_tokens": 62435880} +{"current_steps": 92615, "total_steps": 204665, "loss": 0.0238, "lr": 1.333762110537706e-06, "epoch": 2.262599858305035, "percentage": 45.25, "elapsed_time": "2:00:27", "remaining_time": "2:25:44", "throughput": 8639.04, "total_tokens": 62439272} +{"current_steps": 92620, "total_steps": 204665, "loss": 0.0002, "lr": 1.3336817217694383e-06, "epoch": 2.262722009136882, "percentage": 45.25, "elapsed_time": "2:00:27", "remaining_time": "2:25:43", "throughput": 8639.09, "total_tokens": 62442664} +{"current_steps": 92625, "total_steps": 204665, "loss": 0.0877, "lr": 1.333601330574547e-06, "epoch": 2.2628441599687292, "percentage": 45.26, "elapsed_time": "2:00:28", "remaining_time": "2:25:43", "throughput": 8639.14, "total_tokens": 62446120} +{"current_steps": 92630, "total_steps": 204665, "loss": 0.0961, "lr": 1.3335209369536174e-06, "epoch": 2.2629663108005764, "percentage": 45.26, "elapsed_time": "2:00:28", "remaining_time": "2:25:42", "throughput": 8639.2, "total_tokens": 62449640} +{"current_steps": 92635, "total_steps": 204665, "loss": 0.0885, "lr": 1.3334405409072336e-06, "epoch": 2.2630884616324236, "percentage": 45.26, "elapsed_time": "2:00:28", "remaining_time": "2:25:42", "throughput": 8639.26, "total_tokens": 62453160} +{"current_steps": 92640, "total_steps": 204665, "loss": 0.046, "lr": 1.3333601424359806e-06, "epoch": 2.263210612464271, "percentage": 45.26, "elapsed_time": "2:00:29", "remaining_time": "2:25:42", "throughput": 8639.31, "total_tokens": 62456552} +{"current_steps": 92645, "total_steps": 204665, "loss": 0.0005, "lr": 1.3332797415404431e-06, "epoch": 2.263332763296118, "percentage": 45.27, "elapsed_time": "2:00:29", "remaining_time": "2:25:41", "throughput": 8639.36, "total_tokens": 62460008} +{"current_steps": 92650, "total_steps": 204665, "loss": 0.0002, "lr": 1.3331993382212058e-06, "epoch": 2.263454914127965, "percentage": 45.27, "elapsed_time": "2:00:30", "remaining_time": "2:25:41", "throughput": 8639.41, "total_tokens": 62463400} +{"current_steps": 92655, "total_steps": 204665, "loss": 0.0002, "lr": 1.333118932478853e-06, "epoch": 2.2635770649598124, "percentage": 45.27, "elapsed_time": "2:00:30", "remaining_time": "2:25:40", "throughput": 8639.47, "total_tokens": 62466856} +{"current_steps": 92660, "total_steps": 204665, "loss": 0.0005, "lr": 1.3330385243139697e-06, "epoch": 2.2636992157916596, "percentage": 45.27, "elapsed_time": "2:00:30", "remaining_time": "2:25:40", "throughput": 8639.48, "total_tokens": 62469928} +{"current_steps": 92665, "total_steps": 204665, "loss": 0.0006, "lr": 1.332958113727141e-06, "epoch": 2.2638213666235067, "percentage": 45.28, "elapsed_time": "2:00:31", "remaining_time": "2:25:39", "throughput": 8639.48, "total_tokens": 62472936} +{"current_steps": 92670, "total_steps": 204665, "loss": 0.009, "lr": 1.3328777007189507e-06, "epoch": 2.263943517455354, "percentage": 45.28, "elapsed_time": "2:00:31", "remaining_time": "2:25:39", "throughput": 8639.52, "total_tokens": 62476200} +{"current_steps": 92675, "total_steps": 204665, "loss": 0.0429, "lr": 1.3327972852899847e-06, "epoch": 2.264065668287201, "percentage": 45.28, "elapsed_time": "2:00:31", "remaining_time": "2:25:39", "throughput": 8639.55, "total_tokens": 62479400} +{"current_steps": 92680, "total_steps": 204665, "loss": 0.0715, "lr": 1.3327168674408273e-06, "epoch": 2.2641878191190483, "percentage": 45.28, "elapsed_time": "2:00:32", "remaining_time": "2:25:38", "throughput": 8639.66, "total_tokens": 62483432} +{"current_steps": 92685, "total_steps": 204665, "loss": 0.0844, "lr": 1.3326364471720632e-06, "epoch": 2.2643099699508955, "percentage": 45.29, "elapsed_time": "2:00:32", "remaining_time": "2:25:38", "throughput": 8639.73, "total_tokens": 62487080} +{"current_steps": 92690, "total_steps": 204665, "loss": 0.0573, "lr": 1.332556024484278e-06, "epoch": 2.2644321207827427, "percentage": 45.29, "elapsed_time": "2:00:32", "remaining_time": "2:25:37", "throughput": 8639.71, "total_tokens": 62489896} +{"current_steps": 92695, "total_steps": 204665, "loss": 0.039, "lr": 1.3324755993780554e-06, "epoch": 2.26455427161459, "percentage": 45.29, "elapsed_time": "2:00:33", "remaining_time": "2:25:37", "throughput": 8639.81, "total_tokens": 62493736} +{"current_steps": 92700, "total_steps": 204665, "loss": 0.0007, "lr": 1.3323951718539808e-06, "epoch": 2.2646764224464366, "percentage": 45.29, "elapsed_time": "2:00:33", "remaining_time": "2:25:36", "throughput": 8639.86, "total_tokens": 62497128} +{"current_steps": 92705, "total_steps": 204665, "loss": 0.0034, "lr": 1.332314741912639e-06, "epoch": 2.2647985732782843, "percentage": 45.3, "elapsed_time": "2:00:33", "remaining_time": "2:25:36", "throughput": 8639.9, "total_tokens": 62500520} +{"current_steps": 92710, "total_steps": 204665, "loss": 0.0006, "lr": 1.3322343095546153e-06, "epoch": 2.264920724110131, "percentage": 45.3, "elapsed_time": "2:00:34", "remaining_time": "2:25:35", "throughput": 8639.91, "total_tokens": 62503528} +{"current_steps": 92715, "total_steps": 204665, "loss": 0.083, "lr": 1.3321538747804942e-06, "epoch": 2.265042874941978, "percentage": 45.3, "elapsed_time": "2:00:34", "remaining_time": "2:25:35", "throughput": 8639.89, "total_tokens": 62506408} +{"current_steps": 92720, "total_steps": 204665, "loss": 0.0493, "lr": 1.3320734375908608e-06, "epoch": 2.2651650257738254, "percentage": 45.3, "elapsed_time": "2:00:34", "remaining_time": "2:25:35", "throughput": 8639.92, "total_tokens": 62509608} +{"current_steps": 92725, "total_steps": 204665, "loss": 0.0335, "lr": 1.3319929979863e-06, "epoch": 2.2652871766056726, "percentage": 45.31, "elapsed_time": "2:00:35", "remaining_time": "2:25:34", "throughput": 8639.94, "total_tokens": 62512744} +{"current_steps": 92730, "total_steps": 204665, "loss": 0.0001, "lr": 1.3319125559673968e-06, "epoch": 2.2654093274375198, "percentage": 45.31, "elapsed_time": "2:00:35", "remaining_time": "2:25:34", "throughput": 8639.99, "total_tokens": 62516136} +{"current_steps": 92735, "total_steps": 204665, "loss": 0.0446, "lr": 1.3318321115347364e-06, "epoch": 2.265531478269367, "percentage": 45.31, "elapsed_time": "2:00:36", "remaining_time": "2:25:33", "throughput": 8640.04, "total_tokens": 62519592} +{"current_steps": 92740, "total_steps": 204665, "loss": 0.0003, "lr": 1.3317516646889036e-06, "epoch": 2.265653629101214, "percentage": 45.31, "elapsed_time": "2:00:36", "remaining_time": "2:25:33", "throughput": 8640.07, "total_tokens": 62522728} +{"current_steps": 92745, "total_steps": 204665, "loss": 0.0002, "lr": 1.3316712154304835e-06, "epoch": 2.2657757799330613, "percentage": 45.32, "elapsed_time": "2:00:36", "remaining_time": "2:25:32", "throughput": 8640.14, "total_tokens": 62526312} +{"current_steps": 92750, "total_steps": 204665, "loss": 0.0003, "lr": 1.331590763760061e-06, "epoch": 2.2658979307649085, "percentage": 45.32, "elapsed_time": "2:00:37", "remaining_time": "2:25:32", "throughput": 8640.18, "total_tokens": 62529640} +{"current_steps": 92755, "total_steps": 204665, "loss": 0.0001, "lr": 1.3315103096782215e-06, "epoch": 2.2660200815967557, "percentage": 45.32, "elapsed_time": "2:00:37", "remaining_time": "2:25:32", "throughput": 8640.22, "total_tokens": 62532904} +{"current_steps": 92760, "total_steps": 204665, "loss": 0.0367, "lr": 1.3314298531855492e-06, "epoch": 2.266142232428603, "percentage": 45.32, "elapsed_time": "2:00:37", "remaining_time": "2:25:31", "throughput": 8640.24, "total_tokens": 62536040} +{"current_steps": 92765, "total_steps": 204665, "loss": 0.0282, "lr": 1.3313493942826304e-06, "epoch": 2.26626438326045, "percentage": 45.33, "elapsed_time": "2:00:38", "remaining_time": "2:25:31", "throughput": 8640.28, "total_tokens": 62539304} +{"current_steps": 92770, "total_steps": 204665, "loss": 0.0003, "lr": 1.3312689329700497e-06, "epoch": 2.2663865340922973, "percentage": 45.33, "elapsed_time": "2:00:38", "remaining_time": "2:25:30", "throughput": 8640.33, "total_tokens": 62542696} +{"current_steps": 92775, "total_steps": 204665, "loss": 0.0318, "lr": 1.331188469248392e-06, "epoch": 2.2665086849241445, "percentage": 45.33, "elapsed_time": "2:00:38", "remaining_time": "2:25:30", "throughput": 8640.38, "total_tokens": 62546024} +{"current_steps": 92780, "total_steps": 204665, "loss": 0.0005, "lr": 1.3311080031182428e-06, "epoch": 2.2666308357559917, "percentage": 45.33, "elapsed_time": "2:00:39", "remaining_time": "2:25:29", "throughput": 8640.39, "total_tokens": 62549032} +{"current_steps": 92785, "total_steps": 204665, "loss": 0.0931, "lr": 1.331027534580187e-06, "epoch": 2.2667529865878384, "percentage": 45.34, "elapsed_time": "2:00:39", "remaining_time": "2:25:29", "throughput": 8640.43, "total_tokens": 62552360} +{"current_steps": 92790, "total_steps": 204665, "loss": 0.0513, "lr": 1.3309470636348103e-06, "epoch": 2.266875137419686, "percentage": 45.34, "elapsed_time": "2:00:39", "remaining_time": "2:25:28", "throughput": 8640.48, "total_tokens": 62555752} +{"current_steps": 92795, "total_steps": 204665, "loss": 0.0737, "lr": 1.3308665902826972e-06, "epoch": 2.266997288251533, "percentage": 45.34, "elapsed_time": "2:00:40", "remaining_time": "2:25:28", "throughput": 8640.54, "total_tokens": 62559208} +{"current_steps": 92800, "total_steps": 204665, "loss": 0.0002, "lr": 1.3307861145244335e-06, "epoch": 2.26711943908338, "percentage": 45.34, "elapsed_time": "2:00:40", "remaining_time": "2:25:28", "throughput": 8640.57, "total_tokens": 62562472} +{"current_steps": 92805, "total_steps": 204665, "loss": 0.081, "lr": 1.330705636360604e-06, "epoch": 2.267241589915227, "percentage": 45.34, "elapsed_time": "2:00:40", "remaining_time": "2:25:27", "throughput": 8640.58, "total_tokens": 62565480} +{"current_steps": 92810, "total_steps": 204665, "loss": 0.0004, "lr": 1.3306251557917942e-06, "epoch": 2.2673637407470744, "percentage": 45.35, "elapsed_time": "2:00:41", "remaining_time": "2:25:27", "throughput": 8640.64, "total_tokens": 62568936} +{"current_steps": 92815, "total_steps": 204665, "loss": 0.0497, "lr": 1.3305446728185894e-06, "epoch": 2.2674858915789216, "percentage": 45.35, "elapsed_time": "2:00:41", "remaining_time": "2:25:26", "throughput": 8640.68, "total_tokens": 62572264} +{"current_steps": 92820, "total_steps": 204665, "loss": 0.0001, "lr": 1.330464187441575e-06, "epoch": 2.2676080424107687, "percentage": 45.35, "elapsed_time": "2:00:41", "remaining_time": "2:25:26", "throughput": 8640.81, "total_tokens": 62576360} +{"current_steps": 92825, "total_steps": 204665, "loss": 0.0281, "lr": 1.3303836996613359e-06, "epoch": 2.267730193242616, "percentage": 45.35, "elapsed_time": "2:00:42", "remaining_time": "2:25:25", "throughput": 8640.85, "total_tokens": 62579688} +{"current_steps": 92830, "total_steps": 204665, "loss": 0.048, "lr": 1.3303032094784575e-06, "epoch": 2.267852344074463, "percentage": 45.36, "elapsed_time": "2:00:42", "remaining_time": "2:25:25", "throughput": 8640.86, "total_tokens": 62582632} +{"current_steps": 92835, "total_steps": 204665, "loss": 0.0001, "lr": 1.3302227168935255e-06, "epoch": 2.2679744949063103, "percentage": 45.36, "elapsed_time": "2:00:42", "remaining_time": "2:25:24", "throughput": 8640.9, "total_tokens": 62585896} +{"current_steps": 92840, "total_steps": 204665, "loss": 0.0001, "lr": 1.3301422219071252e-06, "epoch": 2.2680966457381575, "percentage": 45.36, "elapsed_time": "2:00:43", "remaining_time": "2:25:24", "throughput": 8641.01, "total_tokens": 62589800} +{"current_steps": 92845, "total_steps": 204665, "loss": 0.0001, "lr": 1.330061724519842e-06, "epoch": 2.2682187965700047, "percentage": 45.36, "elapsed_time": "2:00:43", "remaining_time": "2:25:24", "throughput": 8641.12, "total_tokens": 62593704} +{"current_steps": 92850, "total_steps": 204665, "loss": 0.0553, "lr": 1.329981224732261e-06, "epoch": 2.268340947401852, "percentage": 45.37, "elapsed_time": "2:00:44", "remaining_time": "2:25:23", "throughput": 8641.11, "total_tokens": 62596520} +{"current_steps": 92855, "total_steps": 204665, "loss": 0.0001, "lr": 1.3299007225449677e-06, "epoch": 2.268463098233699, "percentage": 45.37, "elapsed_time": "2:00:44", "remaining_time": "2:25:23", "throughput": 8641.15, "total_tokens": 62599784} +{"current_steps": 92860, "total_steps": 204665, "loss": 0.0002, "lr": 1.3298202179585475e-06, "epoch": 2.2685852490655463, "percentage": 45.37, "elapsed_time": "2:00:44", "remaining_time": "2:25:22", "throughput": 8641.2, "total_tokens": 62603176} +{"current_steps": 92865, "total_steps": 204665, "loss": 0.002, "lr": 1.3297397109735862e-06, "epoch": 2.2687073998973935, "percentage": 45.37, "elapsed_time": "2:00:45", "remaining_time": "2:25:22", "throughput": 8641.29, "total_tokens": 62606888} +{"current_steps": 92870, "total_steps": 204665, "loss": 0.0004, "lr": 1.329659201590669e-06, "epoch": 2.2688295507292406, "percentage": 45.38, "elapsed_time": "2:00:45", "remaining_time": "2:25:21", "throughput": 8641.3, "total_tokens": 62609960} +{"current_steps": 92875, "total_steps": 204665, "loss": 0.0005, "lr": 1.3295786898103814e-06, "epoch": 2.268951701561088, "percentage": 45.38, "elapsed_time": "2:00:45", "remaining_time": "2:25:21", "throughput": 8641.32, "total_tokens": 62613032} +{"current_steps": 92880, "total_steps": 204665, "loss": 0.0001, "lr": 1.3294981756333087e-06, "epoch": 2.2690738523929346, "percentage": 45.38, "elapsed_time": "2:00:46", "remaining_time": "2:25:21", "throughput": 8641.34, "total_tokens": 62616104} +{"current_steps": 92885, "total_steps": 204665, "loss": 0.0644, "lr": 1.3294176590600368e-06, "epoch": 2.269196003224782, "percentage": 45.38, "elapsed_time": "2:00:46", "remaining_time": "2:25:20", "throughput": 8641.36, "total_tokens": 62619240} +{"current_steps": 92890, "total_steps": 204665, "loss": 0.0001, "lr": 1.3293371400911513e-06, "epoch": 2.269318154056629, "percentage": 45.39, "elapsed_time": "2:00:46", "remaining_time": "2:25:20", "throughput": 8641.41, "total_tokens": 62622568} +{"current_steps": 92895, "total_steps": 204665, "loss": 0.0679, "lr": 1.3292566187272374e-06, "epoch": 2.269440304888476, "percentage": 45.39, "elapsed_time": "2:00:47", "remaining_time": "2:25:19", "throughput": 8641.49, "total_tokens": 62626152} +{"current_steps": 92900, "total_steps": 204665, "loss": 0.0608, "lr": 1.3291760949688806e-06, "epoch": 2.2695624557203233, "percentage": 45.39, "elapsed_time": "2:00:47", "remaining_time": "2:25:19", "throughput": 8641.51, "total_tokens": 62629224} +{"current_steps": 92905, "total_steps": 204665, "loss": 0.0001, "lr": 1.329095568816667e-06, "epoch": 2.2696846065521705, "percentage": 45.39, "elapsed_time": "2:00:47", "remaining_time": "2:25:18", "throughput": 8641.59, "total_tokens": 62632872} +{"current_steps": 92910, "total_steps": 204665, "loss": 0.0001, "lr": 1.3290150402711817e-06, "epoch": 2.2698067573840177, "percentage": 45.4, "elapsed_time": "2:00:48", "remaining_time": "2:25:18", "throughput": 8641.63, "total_tokens": 62636136} +{"current_steps": 92915, "total_steps": 204665, "loss": 0.1128, "lr": 1.3289345093330104e-06, "epoch": 2.269928908215865, "percentage": 45.4, "elapsed_time": "2:00:48", "remaining_time": "2:25:17", "throughput": 8641.65, "total_tokens": 62639272} +{"current_steps": 92920, "total_steps": 204665, "loss": 0.0308, "lr": 1.3288539760027391e-06, "epoch": 2.270051059047712, "percentage": 45.4, "elapsed_time": "2:00:48", "remaining_time": "2:25:17", "throughput": 8641.69, "total_tokens": 62642472} +{"current_steps": 92925, "total_steps": 204665, "loss": 0.0691, "lr": 1.3287734402809533e-06, "epoch": 2.2701732098795593, "percentage": 45.4, "elapsed_time": "2:00:49", "remaining_time": "2:25:17", "throughput": 8641.75, "total_tokens": 62645992} +{"current_steps": 92930, "total_steps": 204665, "loss": 0.0029, "lr": 1.3286929021682385e-06, "epoch": 2.2702953607114065, "percentage": 45.41, "elapsed_time": "2:00:49", "remaining_time": "2:25:16", "throughput": 8641.84, "total_tokens": 62649704} +{"current_steps": 92935, "total_steps": 204665, "loss": 0.0002, "lr": 1.3286123616651806e-06, "epoch": 2.2704175115432537, "percentage": 45.41, "elapsed_time": "2:00:49", "remaining_time": "2:25:16", "throughput": 8641.88, "total_tokens": 62652968} +{"current_steps": 92940, "total_steps": 204665, "loss": 0.0502, "lr": 1.3285318187723652e-06, "epoch": 2.270539662375101, "percentage": 45.41, "elapsed_time": "2:00:50", "remaining_time": "2:25:15", "throughput": 8641.94, "total_tokens": 62656424} +{"current_steps": 92945, "total_steps": 204665, "loss": 0.0548, "lr": 1.3284512734903779e-06, "epoch": 2.270661813206948, "percentage": 45.41, "elapsed_time": "2:00:50", "remaining_time": "2:25:15", "throughput": 8641.94, "total_tokens": 62659304} +{"current_steps": 92950, "total_steps": 204665, "loss": 0.0646, "lr": 1.3283707258198047e-06, "epoch": 2.2707839640387952, "percentage": 45.42, "elapsed_time": "2:00:50", "remaining_time": "2:25:14", "throughput": 8642.01, "total_tokens": 62662888} +{"current_steps": 92955, "total_steps": 204665, "loss": 0.0716, "lr": 1.3282901757612314e-06, "epoch": 2.2709061148706424, "percentage": 45.42, "elapsed_time": "2:00:51", "remaining_time": "2:25:14", "throughput": 8642.06, "total_tokens": 62666280} +{"current_steps": 92960, "total_steps": 204665, "loss": 0.0642, "lr": 1.3282096233152435e-06, "epoch": 2.2710282657024896, "percentage": 45.42, "elapsed_time": "2:00:51", "remaining_time": "2:25:13", "throughput": 8642.15, "total_tokens": 62669992} +{"current_steps": 92965, "total_steps": 204665, "loss": 0.013, "lr": 1.3281290684824268e-06, "epoch": 2.2711504165343364, "percentage": 45.42, "elapsed_time": "2:00:52", "remaining_time": "2:25:13", "throughput": 8642.18, "total_tokens": 62673128} +{"current_steps": 92970, "total_steps": 204665, "loss": 0.1342, "lr": 1.3280485112633675e-06, "epoch": 2.271272567366184, "percentage": 45.43, "elapsed_time": "2:00:52", "remaining_time": "2:25:13", "throughput": 8642.21, "total_tokens": 62676328} +{"current_steps": 92975, "total_steps": 204665, "loss": 0.0004, "lr": 1.327967951658651e-06, "epoch": 2.2713947181980307, "percentage": 45.43, "elapsed_time": "2:00:52", "remaining_time": "2:25:12", "throughput": 8642.32, "total_tokens": 62680232} +{"current_steps": 92980, "total_steps": 204665, "loss": 0.0007, "lr": 1.3278873896688633e-06, "epoch": 2.271516869029878, "percentage": 45.43, "elapsed_time": "2:00:53", "remaining_time": "2:25:12", "throughput": 8642.38, "total_tokens": 62683624} +{"current_steps": 92985, "total_steps": 204665, "loss": 0.0001, "lr": 1.3278068252945908e-06, "epoch": 2.271639019861725, "percentage": 45.43, "elapsed_time": "2:00:53", "remaining_time": "2:25:11", "throughput": 8642.42, "total_tokens": 62686952} +{"current_steps": 92990, "total_steps": 204665, "loss": 0.1115, "lr": 1.327726258536418e-06, "epoch": 2.2717611706935723, "percentage": 45.44, "elapsed_time": "2:00:53", "remaining_time": "2:25:11", "throughput": 8642.51, "total_tokens": 62690600} +{"current_steps": 92995, "total_steps": 204665, "loss": 0.0355, "lr": 1.3276456893949325e-06, "epoch": 2.2718833215254195, "percentage": 45.44, "elapsed_time": "2:00:54", "remaining_time": "2:25:10", "throughput": 8642.55, "total_tokens": 62693928} +{"current_steps": 93000, "total_steps": 204665, "loss": 0.0633, "lr": 1.3275651178707194e-06, "epoch": 2.2720054723572667, "percentage": 45.44, "elapsed_time": "2:00:54", "remaining_time": "2:25:10", "throughput": 8642.61, "total_tokens": 62697320} +{"current_steps": 93005, "total_steps": 204665, "loss": 0.0125, "lr": 1.3274845439643645e-06, "epoch": 2.272127623189114, "percentage": 45.44, "elapsed_time": "2:00:54", "remaining_time": "2:25:09", "throughput": 8642.66, "total_tokens": 62700712} +{"current_steps": 93010, "total_steps": 204665, "loss": 0.0001, "lr": 1.3274039676764535e-06, "epoch": 2.272249774020961, "percentage": 45.44, "elapsed_time": "2:00:55", "remaining_time": "2:25:09", "throughput": 8642.66, "total_tokens": 62703656} +{"current_steps": 93015, "total_steps": 204665, "loss": 0.0003, "lr": 1.3273233890075733e-06, "epoch": 2.2723719248528083, "percentage": 45.45, "elapsed_time": "2:00:55", "remaining_time": "2:25:09", "throughput": 8642.74, "total_tokens": 62707240} +{"current_steps": 93020, "total_steps": 204665, "loss": 0.0003, "lr": 1.327242807958309e-06, "epoch": 2.2724940756846554, "percentage": 45.45, "elapsed_time": "2:00:55", "remaining_time": "2:25:08", "throughput": 8642.77, "total_tokens": 62710376} +{"current_steps": 93025, "total_steps": 204665, "loss": 0.0431, "lr": 1.3271622245292473e-06, "epoch": 2.2726162265165026, "percentage": 45.45, "elapsed_time": "2:00:56", "remaining_time": "2:25:08", "throughput": 8642.79, "total_tokens": 62713512} +{"current_steps": 93030, "total_steps": 204665, "loss": 0.0336, "lr": 1.3270816387209738e-06, "epoch": 2.27273837734835, "percentage": 45.45, "elapsed_time": "2:00:56", "remaining_time": "2:25:07", "throughput": 8642.84, "total_tokens": 62716904} +{"current_steps": 93035, "total_steps": 204665, "loss": 0.0526, "lr": 1.3270010505340748e-06, "epoch": 2.272860528180197, "percentage": 45.46, "elapsed_time": "2:00:56", "remaining_time": "2:25:07", "throughput": 8642.89, "total_tokens": 62720296} +{"current_steps": 93040, "total_steps": 204665, "loss": 0.0002, "lr": 1.3269204599691357e-06, "epoch": 2.272982679012044, "percentage": 45.46, "elapsed_time": "2:00:57", "remaining_time": "2:25:06", "throughput": 8642.92, "total_tokens": 62723432} +{"current_steps": 93045, "total_steps": 204665, "loss": 0.0525, "lr": 1.3268398670267438e-06, "epoch": 2.2731048298438914, "percentage": 45.46, "elapsed_time": "2:00:57", "remaining_time": "2:25:06", "throughput": 8642.97, "total_tokens": 62726824} +{"current_steps": 93050, "total_steps": 204665, "loss": 0.0008, "lr": 1.326759271707484e-06, "epoch": 2.2732269806757386, "percentage": 45.46, "elapsed_time": "2:00:57", "remaining_time": "2:25:05", "throughput": 8643.0, "total_tokens": 62730024} +{"current_steps": 93055, "total_steps": 204665, "loss": 0.0525, "lr": 1.3266786740119428e-06, "epoch": 2.2733491315075858, "percentage": 45.47, "elapsed_time": "2:00:58", "remaining_time": "2:25:05", "throughput": 8643.02, "total_tokens": 62733096} +{"current_steps": 93060, "total_steps": 204665, "loss": 0.0002, "lr": 1.3265980739407068e-06, "epoch": 2.2734712823394325, "percentage": 45.47, "elapsed_time": "2:00:58", "remaining_time": "2:25:05", "throughput": 8643.16, "total_tokens": 62737320} +{"current_steps": 93065, "total_steps": 204665, "loss": 0.0004, "lr": 1.3265174714943618e-06, "epoch": 2.2735934331712797, "percentage": 45.47, "elapsed_time": "2:00:58", "remaining_time": "2:25:04", "throughput": 8643.24, "total_tokens": 62740904} +{"current_steps": 93070, "total_steps": 204665, "loss": 0.0359, "lr": 1.3264368666734933e-06, "epoch": 2.273715584003127, "percentage": 45.47, "elapsed_time": "2:00:59", "remaining_time": "2:25:04", "throughput": 8643.29, "total_tokens": 62744296} +{"current_steps": 93075, "total_steps": 204665, "loss": 0.0389, "lr": 1.3263562594786886e-06, "epoch": 2.273837734834974, "percentage": 45.48, "elapsed_time": "2:00:59", "remaining_time": "2:25:03", "throughput": 8643.31, "total_tokens": 62747304} +{"current_steps": 93080, "total_steps": 204665, "loss": 0.0492, "lr": 1.3262756499105333e-06, "epoch": 2.2739598856668213, "percentage": 45.48, "elapsed_time": "2:00:59", "remaining_time": "2:25:03", "throughput": 8643.35, "total_tokens": 62750632} +{"current_steps": 93085, "total_steps": 204665, "loss": 0.1146, "lr": 1.3261950379696136e-06, "epoch": 2.2740820364986685, "percentage": 45.48, "elapsed_time": "2:01:00", "remaining_time": "2:25:02", "throughput": 8643.38, "total_tokens": 62753832} +{"current_steps": 93090, "total_steps": 204665, "loss": 0.0013, "lr": 1.326114423656516e-06, "epoch": 2.2742041873305157, "percentage": 45.48, "elapsed_time": "2:01:00", "remaining_time": "2:25:02", "throughput": 8643.46, "total_tokens": 62757416} +{"current_steps": 93095, "total_steps": 204665, "loss": 0.037, "lr": 1.3260338069718266e-06, "epoch": 2.274326338162363, "percentage": 45.49, "elapsed_time": "2:01:01", "remaining_time": "2:25:01", "throughput": 8643.47, "total_tokens": 62760424} +{"current_steps": 93100, "total_steps": 204665, "loss": 0.0509, "lr": 1.3259531879161316e-06, "epoch": 2.27444848899421, "percentage": 45.49, "elapsed_time": "2:01:01", "remaining_time": "2:25:01", "throughput": 8643.55, "total_tokens": 62764072} +{"current_steps": 93105, "total_steps": 204665, "loss": 0.0347, "lr": 1.3258725664900173e-06, "epoch": 2.2745706398260572, "percentage": 45.49, "elapsed_time": "2:01:01", "remaining_time": "2:25:01", "throughput": 8643.63, "total_tokens": 62767720} +{"current_steps": 93110, "total_steps": 204665, "loss": 0.0002, "lr": 1.3257919426940703e-06, "epoch": 2.2746927906579044, "percentage": 45.49, "elapsed_time": "2:01:02", "remaining_time": "2:25:00", "throughput": 8643.69, "total_tokens": 62771112} +{"current_steps": 93115, "total_steps": 204665, "loss": 0.0004, "lr": 1.3257113165288764e-06, "epoch": 2.2748149414897516, "percentage": 45.5, "elapsed_time": "2:01:02", "remaining_time": "2:25:00", "throughput": 8643.73, "total_tokens": 62774440} +{"current_steps": 93120, "total_steps": 204665, "loss": 0.0004, "lr": 1.3256306879950224e-06, "epoch": 2.274937092321599, "percentage": 45.5, "elapsed_time": "2:01:02", "remaining_time": "2:24:59", "throughput": 8643.8, "total_tokens": 62777960} +{"current_steps": 93125, "total_steps": 204665, "loss": 0.0501, "lr": 1.3255500570930945e-06, "epoch": 2.275059243153446, "percentage": 45.5, "elapsed_time": "2:01:03", "remaining_time": "2:24:59", "throughput": 8643.83, "total_tokens": 62781160} +{"current_steps": 93130, "total_steps": 204665, "loss": 0.0002, "lr": 1.3254694238236788e-06, "epoch": 2.275181393985293, "percentage": 45.5, "elapsed_time": "2:01:03", "remaining_time": "2:24:58", "throughput": 8643.84, "total_tokens": 62784168} +{"current_steps": 93135, "total_steps": 204665, "loss": 0.0001, "lr": 1.3253887881873618e-06, "epoch": 2.2753035448171404, "percentage": 45.51, "elapsed_time": "2:01:03", "remaining_time": "2:24:58", "throughput": 8643.84, "total_tokens": 62787112} +{"current_steps": 93140, "total_steps": 204665, "loss": 0.0002, "lr": 1.32530815018473e-06, "epoch": 2.2754256956489876, "percentage": 45.51, "elapsed_time": "2:01:04", "remaining_time": "2:24:58", "throughput": 8643.92, "total_tokens": 62790760} +{"current_steps": 93145, "total_steps": 204665, "loss": 0.0524, "lr": 1.3252275098163701e-06, "epoch": 2.2755478464808343, "percentage": 45.51, "elapsed_time": "2:01:04", "remaining_time": "2:24:57", "throughput": 8644.0, "total_tokens": 62794344} +{"current_steps": 93150, "total_steps": 204665, "loss": 0.1383, "lr": 1.3251468670828683e-06, "epoch": 2.275669997312682, "percentage": 45.51, "elapsed_time": "2:01:04", "remaining_time": "2:24:57", "throughput": 8644.05, "total_tokens": 62797672} +{"current_steps": 93155, "total_steps": 204665, "loss": 0.0647, "lr": 1.325066221984811e-06, "epoch": 2.2757921481445287, "percentage": 45.52, "elapsed_time": "2:01:05", "remaining_time": "2:24:56", "throughput": 8644.13, "total_tokens": 62801320} +{"current_steps": 93160, "total_steps": 204665, "loss": 0.1175, "lr": 1.3249855745227847e-06, "epoch": 2.275914298976376, "percentage": 45.52, "elapsed_time": "2:01:05", "remaining_time": "2:24:56", "throughput": 8644.15, "total_tokens": 62804392} +{"current_steps": 93165, "total_steps": 204665, "loss": 0.0003, "lr": 1.3249049246973757e-06, "epoch": 2.276036449808223, "percentage": 45.52, "elapsed_time": "2:01:05", "remaining_time": "2:24:55", "throughput": 8644.17, "total_tokens": 62807528} +{"current_steps": 93170, "total_steps": 204665, "loss": 0.0396, "lr": 1.3248242725091707e-06, "epoch": 2.2761586006400703, "percentage": 45.52, "elapsed_time": "2:01:06", "remaining_time": "2:24:55", "throughput": 8644.21, "total_tokens": 62810792} +{"current_steps": 93175, "total_steps": 204665, "loss": 0.0005, "lr": 1.3247436179587563e-06, "epoch": 2.2762807514719174, "percentage": 45.53, "elapsed_time": "2:01:06", "remaining_time": "2:24:54", "throughput": 8644.25, "total_tokens": 62814120} +{"current_steps": 93180, "total_steps": 204665, "loss": 0.0692, "lr": 1.324662961046719e-06, "epoch": 2.2764029023037646, "percentage": 45.53, "elapsed_time": "2:01:06", "remaining_time": "2:24:54", "throughput": 8644.31, "total_tokens": 62817576} +{"current_steps": 93185, "total_steps": 204665, "loss": 0.0486, "lr": 1.3245823017736454e-06, "epoch": 2.276525053135612, "percentage": 45.53, "elapsed_time": "2:01:07", "remaining_time": "2:24:54", "throughput": 8644.4, "total_tokens": 62821224} +{"current_steps": 93190, "total_steps": 204665, "loss": 0.0341, "lr": 1.324501640140122e-06, "epoch": 2.276647203967459, "percentage": 45.53, "elapsed_time": "2:01:07", "remaining_time": "2:24:53", "throughput": 8644.44, "total_tokens": 62824552} +{"current_steps": 93195, "total_steps": 204665, "loss": 0.0629, "lr": 1.3244209761467352e-06, "epoch": 2.276769354799306, "percentage": 45.54, "elapsed_time": "2:01:07", "remaining_time": "2:24:53", "throughput": 8644.47, "total_tokens": 62827752} +{"current_steps": 93200, "total_steps": 204665, "loss": 0.0069, "lr": 1.324340309794072e-06, "epoch": 2.2768915056311534, "percentage": 45.54, "elapsed_time": "2:01:08", "remaining_time": "2:24:52", "throughput": 8644.48, "total_tokens": 62830696} +{"current_steps": 93205, "total_steps": 204665, "loss": 0.0998, "lr": 1.3242596410827187e-06, "epoch": 2.2770136564630006, "percentage": 45.54, "elapsed_time": "2:01:08", "remaining_time": "2:24:52", "throughput": 8644.53, "total_tokens": 62834088} +{"current_steps": 93210, "total_steps": 204665, "loss": 0.0689, "lr": 1.3241789700132621e-06, "epoch": 2.2771358072948478, "percentage": 45.54, "elapsed_time": "2:01:08", "remaining_time": "2:24:51", "throughput": 8644.56, "total_tokens": 62837288} +{"current_steps": 93215, "total_steps": 204665, "loss": 0.1573, "lr": 1.324098296586289e-06, "epoch": 2.277257958126695, "percentage": 45.55, "elapsed_time": "2:01:09", "remaining_time": "2:24:51", "throughput": 8644.59, "total_tokens": 62840488} +{"current_steps": 93220, "total_steps": 204665, "loss": 0.0571, "lr": 1.324017620802386e-06, "epoch": 2.277380108958542, "percentage": 45.55, "elapsed_time": "2:01:09", "remaining_time": "2:24:50", "throughput": 8644.65, "total_tokens": 62843880} +{"current_steps": 93225, "total_steps": 204665, "loss": 0.1587, "lr": 1.3239369426621391e-06, "epoch": 2.2775022597903893, "percentage": 45.55, "elapsed_time": "2:01:10", "remaining_time": "2:24:50", "throughput": 8644.68, "total_tokens": 62847144} +{"current_steps": 93230, "total_steps": 204665, "loss": 0.0686, "lr": 1.323856262166136e-06, "epoch": 2.2776244106222365, "percentage": 45.55, "elapsed_time": "2:01:10", "remaining_time": "2:24:50", "throughput": 8644.74, "total_tokens": 62850600} +{"current_steps": 93235, "total_steps": 204665, "loss": 0.0006, "lr": 1.323775579314963e-06, "epoch": 2.2777465614540837, "percentage": 45.55, "elapsed_time": "2:01:10", "remaining_time": "2:24:49", "throughput": 8644.82, "total_tokens": 62854248} +{"current_steps": 93240, "total_steps": 204665, "loss": 0.0004, "lr": 1.323694894109207e-06, "epoch": 2.2778687122859305, "percentage": 45.56, "elapsed_time": "2:01:11", "remaining_time": "2:24:49", "throughput": 8644.86, "total_tokens": 62857512} +{"current_steps": 93245, "total_steps": 204665, "loss": 0.0003, "lr": 1.3236142065494546e-06, "epoch": 2.2779908631177777, "percentage": 45.56, "elapsed_time": "2:01:11", "remaining_time": "2:24:48", "throughput": 8644.93, "total_tokens": 62861096} +{"current_steps": 93250, "total_steps": 204665, "loss": 0.1099, "lr": 1.3235335166362926e-06, "epoch": 2.278113013949625, "percentage": 45.56, "elapsed_time": "2:01:11", "remaining_time": "2:24:48", "throughput": 8644.97, "total_tokens": 62864424} +{"current_steps": 93255, "total_steps": 204665, "loss": 0.0002, "lr": 1.323452824370308e-06, "epoch": 2.278235164781472, "percentage": 45.56, "elapsed_time": "2:01:12", "remaining_time": "2:24:47", "throughput": 8645.01, "total_tokens": 62867624} +{"current_steps": 93260, "total_steps": 204665, "loss": 0.0283, "lr": 1.3233721297520875e-06, "epoch": 2.2783573156133192, "percentage": 45.57, "elapsed_time": "2:01:12", "remaining_time": "2:24:47", "throughput": 8645.07, "total_tokens": 62871080} +{"current_steps": 93265, "total_steps": 204665, "loss": 0.0589, "lr": 1.3232914327822177e-06, "epoch": 2.2784794664451664, "percentage": 45.57, "elapsed_time": "2:01:12", "remaining_time": "2:24:46", "throughput": 8645.07, "total_tokens": 62874024} +{"current_steps": 93270, "total_steps": 204665, "loss": 0.0002, "lr": 1.3232107334612858e-06, "epoch": 2.2786016172770136, "percentage": 45.57, "elapsed_time": "2:01:13", "remaining_time": "2:24:46", "throughput": 8645.09, "total_tokens": 62877160} +{"current_steps": 93275, "total_steps": 204665, "loss": 0.0421, "lr": 1.3231300317898786e-06, "epoch": 2.278723768108861, "percentage": 45.57, "elapsed_time": "2:01:13", "remaining_time": "2:24:46", "throughput": 8645.12, "total_tokens": 62880296} +{"current_steps": 93280, "total_steps": 204665, "loss": 0.0779, "lr": 1.3230493277685826e-06, "epoch": 2.278845918940708, "percentage": 45.58, "elapsed_time": "2:01:13", "remaining_time": "2:24:45", "throughput": 8645.16, "total_tokens": 62883560} +{"current_steps": 93285, "total_steps": 204665, "loss": 0.0413, "lr": 1.322968621397985e-06, "epoch": 2.278968069772555, "percentage": 45.58, "elapsed_time": "2:01:14", "remaining_time": "2:24:45", "throughput": 8645.21, "total_tokens": 62886952} +{"current_steps": 93290, "total_steps": 204665, "loss": 0.0428, "lr": 1.322887912678673e-06, "epoch": 2.2790902206044024, "percentage": 45.58, "elapsed_time": "2:01:14", "remaining_time": "2:24:44", "throughput": 8645.26, "total_tokens": 62890344} +{"current_steps": 93295, "total_steps": 204665, "loss": 0.0002, "lr": 1.322807201611233e-06, "epoch": 2.2792123714362496, "percentage": 45.58, "elapsed_time": "2:01:14", "remaining_time": "2:24:44", "throughput": 8645.37, "total_tokens": 62894248} +{"current_steps": 93300, "total_steps": 204665, "loss": 0.0004, "lr": 1.3227264881962522e-06, "epoch": 2.2793345222680967, "percentage": 45.59, "elapsed_time": "2:01:15", "remaining_time": "2:24:43", "throughput": 8645.48, "total_tokens": 62898216} +{"current_steps": 93305, "total_steps": 204665, "loss": 0.0003, "lr": 1.322645772434318e-06, "epoch": 2.279456673099944, "percentage": 45.59, "elapsed_time": "2:01:15", "remaining_time": "2:24:43", "throughput": 8645.61, "total_tokens": 62902184} +{"current_steps": 93310, "total_steps": 204665, "loss": 0.1207, "lr": 1.3225650543260168e-06, "epoch": 2.279578823931791, "percentage": 45.59, "elapsed_time": "2:01:15", "remaining_time": "2:24:43", "throughput": 8645.66, "total_tokens": 62905640} +{"current_steps": 93315, "total_steps": 204665, "loss": 0.0002, "lr": 1.3224843338719356e-06, "epoch": 2.2797009747636383, "percentage": 45.59, "elapsed_time": "2:01:16", "remaining_time": "2:24:42", "throughput": 8645.68, "total_tokens": 62908712} +{"current_steps": 93320, "total_steps": 204665, "loss": 0.0415, "lr": 1.3224036110726614e-06, "epoch": 2.2798231255954855, "percentage": 45.6, "elapsed_time": "2:01:16", "remaining_time": "2:24:42", "throughput": 8645.73, "total_tokens": 62912104} +{"current_steps": 93325, "total_steps": 204665, "loss": 0.0006, "lr": 1.3223228859287815e-06, "epoch": 2.2799452764273322, "percentage": 45.6, "elapsed_time": "2:01:17", "remaining_time": "2:24:41", "throughput": 8645.81, "total_tokens": 62915688} +{"current_steps": 93330, "total_steps": 204665, "loss": 0.0001, "lr": 1.3222421584408832e-06, "epoch": 2.28006742725918, "percentage": 45.6, "elapsed_time": "2:01:17", "remaining_time": "2:24:41", "throughput": 8645.87, "total_tokens": 62919144} +{"current_steps": 93335, "total_steps": 204665, "loss": 0.0564, "lr": 1.3221614286095531e-06, "epoch": 2.2801895780910266, "percentage": 45.6, "elapsed_time": "2:01:17", "remaining_time": "2:24:40", "throughput": 8645.94, "total_tokens": 62922728} +{"current_steps": 93340, "total_steps": 204665, "loss": 0.0039, "lr": 1.3220806964353784e-06, "epoch": 2.280311728922874, "percentage": 45.61, "elapsed_time": "2:01:18", "remaining_time": "2:24:40", "throughput": 8645.99, "total_tokens": 62926120} +{"current_steps": 93345, "total_steps": 204665, "loss": 0.036, "lr": 1.3219999619189462e-06, "epoch": 2.280433879754721, "percentage": 45.61, "elapsed_time": "2:01:18", "remaining_time": "2:24:39", "throughput": 8646.03, "total_tokens": 62929384} +{"current_steps": 93350, "total_steps": 204665, "loss": 0.0003, "lr": 1.3219192250608436e-06, "epoch": 2.280556030586568, "percentage": 45.61, "elapsed_time": "2:01:18", "remaining_time": "2:24:39", "throughput": 8646.06, "total_tokens": 62932584} +{"current_steps": 93355, "total_steps": 204665, "loss": 0.0001, "lr": 1.321838485861658e-06, "epoch": 2.2806781814184154, "percentage": 45.61, "elapsed_time": "2:01:19", "remaining_time": "2:24:39", "throughput": 8646.11, "total_tokens": 62935976} +{"current_steps": 93360, "total_steps": 204665, "loss": 0.0001, "lr": 1.3217577443219763e-06, "epoch": 2.2808003322502626, "percentage": 45.62, "elapsed_time": "2:01:19", "remaining_time": "2:24:38", "throughput": 8646.14, "total_tokens": 62939176} +{"current_steps": 93365, "total_steps": 204665, "loss": 0.0003, "lr": 1.3216770004423858e-06, "epoch": 2.2809224830821098, "percentage": 45.62, "elapsed_time": "2:01:19", "remaining_time": "2:24:38", "throughput": 8646.16, "total_tokens": 62942312} +{"current_steps": 93370, "total_steps": 204665, "loss": 0.0, "lr": 1.3215962542234735e-06, "epoch": 2.281044633913957, "percentage": 45.62, "elapsed_time": "2:01:20", "remaining_time": "2:24:37", "throughput": 8646.18, "total_tokens": 62945448} +{"current_steps": 93375, "total_steps": 204665, "loss": 0.0002, "lr": 1.321515505665827e-06, "epoch": 2.281166784745804, "percentage": 45.62, "elapsed_time": "2:01:20", "remaining_time": "2:24:37", "throughput": 8646.21, "total_tokens": 62948584} +{"current_steps": 93380, "total_steps": 204665, "loss": 0.0001, "lr": 1.321434754770033e-06, "epoch": 2.2812889355776513, "percentage": 45.63, "elapsed_time": "2:01:20", "remaining_time": "2:24:36", "throughput": 8646.21, "total_tokens": 62951528} +{"current_steps": 93385, "total_steps": 204665, "loss": 0.0001, "lr": 1.3213540015366789e-06, "epoch": 2.2814110864094985, "percentage": 45.63, "elapsed_time": "2:01:21", "remaining_time": "2:24:36", "throughput": 8646.24, "total_tokens": 62954664} +{"current_steps": 93390, "total_steps": 204665, "loss": 0.0003, "lr": 1.3212732459663524e-06, "epoch": 2.2815332372413457, "percentage": 45.63, "elapsed_time": "2:01:21", "remaining_time": "2:24:35", "throughput": 8646.35, "total_tokens": 62958504} +{"current_steps": 93395, "total_steps": 204665, "loss": 0.0001, "lr": 1.32119248805964e-06, "epoch": 2.281655388073193, "percentage": 45.63, "elapsed_time": "2:01:21", "remaining_time": "2:24:35", "throughput": 8646.34, "total_tokens": 62961384} +{"current_steps": 93400, "total_steps": 204665, "loss": 0.0355, "lr": 1.3211117278171297e-06, "epoch": 2.28177753890504, "percentage": 45.64, "elapsed_time": "2:01:22", "remaining_time": "2:24:35", "throughput": 8646.45, "total_tokens": 62965224} +{"current_steps": 93405, "total_steps": 204665, "loss": 0.0006, "lr": 1.3210309652394087e-06, "epoch": 2.2818996897368873, "percentage": 45.64, "elapsed_time": "2:01:22", "remaining_time": "2:24:34", "throughput": 8646.49, "total_tokens": 62968488} +{"current_steps": 93410, "total_steps": 204665, "loss": 0.0325, "lr": 1.3209502003270641e-06, "epoch": 2.282021840568734, "percentage": 45.64, "elapsed_time": "2:01:22", "remaining_time": "2:24:34", "throughput": 8646.58, "total_tokens": 62972264} +{"current_steps": 93415, "total_steps": 204665, "loss": 0.1406, "lr": 1.3208694330806834e-06, "epoch": 2.2821439914005817, "percentage": 45.64, "elapsed_time": "2:01:23", "remaining_time": "2:24:33", "throughput": 8646.61, "total_tokens": 62975464} +{"current_steps": 93420, "total_steps": 204665, "loss": 0.0667, "lr": 1.3207886635008535e-06, "epoch": 2.2822661422324284, "percentage": 45.65, "elapsed_time": "2:01:23", "remaining_time": "2:24:33", "throughput": 8646.66, "total_tokens": 62978792} +{"current_steps": 93425, "total_steps": 204665, "loss": 0.0002, "lr": 1.3207078915881624e-06, "epoch": 2.2823882930642756, "percentage": 45.65, "elapsed_time": "2:01:23", "remaining_time": "2:24:32", "throughput": 8646.72, "total_tokens": 62982312} +{"current_steps": 93430, "total_steps": 204665, "loss": 0.0, "lr": 1.3206271173431973e-06, "epoch": 2.282510443896123, "percentage": 45.65, "elapsed_time": "2:01:24", "remaining_time": "2:24:32", "throughput": 8646.77, "total_tokens": 62985640} +{"current_steps": 93435, "total_steps": 204665, "loss": 0.0002, "lr": 1.3205463407665456e-06, "epoch": 2.28263259472797, "percentage": 45.65, "elapsed_time": "2:01:24", "remaining_time": "2:24:32", "throughput": 8646.8, "total_tokens": 62988840} +{"current_steps": 93440, "total_steps": 204665, "loss": 0.1984, "lr": 1.3204655618587946e-06, "epoch": 2.282754745559817, "percentage": 45.66, "elapsed_time": "2:01:24", "remaining_time": "2:24:31", "throughput": 8646.84, "total_tokens": 62992168} +{"current_steps": 93445, "total_steps": 204665, "loss": 0.0423, "lr": 1.3203847806205316e-06, "epoch": 2.2828768963916644, "percentage": 45.66, "elapsed_time": "2:01:25", "remaining_time": "2:24:31", "throughput": 8646.88, "total_tokens": 62995432} +{"current_steps": 93450, "total_steps": 204665, "loss": 0.0516, "lr": 1.3203039970523446e-06, "epoch": 2.2829990472235115, "percentage": 45.66, "elapsed_time": "2:01:25", "remaining_time": "2:24:30", "throughput": 8646.92, "total_tokens": 62998696} +{"current_steps": 93455, "total_steps": 204665, "loss": 0.0456, "lr": 1.3202232111548208e-06, "epoch": 2.2831211980553587, "percentage": 45.66, "elapsed_time": "2:01:26", "remaining_time": "2:24:30", "throughput": 8646.97, "total_tokens": 63002024} +{"current_steps": 93460, "total_steps": 204665, "loss": 0.0001, "lr": 1.3201424229285476e-06, "epoch": 2.283243348887206, "percentage": 45.66, "elapsed_time": "2:01:26", "remaining_time": "2:24:29", "throughput": 8647.03, "total_tokens": 63005480} +{"current_steps": 93465, "total_steps": 204665, "loss": 0.0004, "lr": 1.3200616323741129e-06, "epoch": 2.283365499719053, "percentage": 45.67, "elapsed_time": "2:01:26", "remaining_time": "2:24:29", "throughput": 8647.08, "total_tokens": 63008872} +{"current_steps": 93470, "total_steps": 204665, "loss": 0.0004, "lr": 1.3199808394921034e-06, "epoch": 2.2834876505509003, "percentage": 45.67, "elapsed_time": "2:01:27", "remaining_time": "2:24:28", "throughput": 8647.14, "total_tokens": 63012328} +{"current_steps": 93475, "total_steps": 204665, "loss": 0.062, "lr": 1.3199000442831074e-06, "epoch": 2.2836098013827475, "percentage": 45.67, "elapsed_time": "2:01:27", "remaining_time": "2:24:28", "throughput": 8647.21, "total_tokens": 63015912} +{"current_steps": 93480, "total_steps": 204665, "loss": 0.0001, "lr": 1.3198192467477122e-06, "epoch": 2.2837319522145947, "percentage": 45.67, "elapsed_time": "2:01:27", "remaining_time": "2:24:28", "throughput": 8647.26, "total_tokens": 63019304} +{"current_steps": 93485, "total_steps": 204665, "loss": 0.0002, "lr": 1.3197384468865057e-06, "epoch": 2.283854103046442, "percentage": 45.68, "elapsed_time": "2:01:28", "remaining_time": "2:24:27", "throughput": 8647.31, "total_tokens": 63022632} +{"current_steps": 93490, "total_steps": 204665, "loss": 0.1495, "lr": 1.3196576447000748e-06, "epoch": 2.283976253878289, "percentage": 45.68, "elapsed_time": "2:01:28", "remaining_time": "2:24:27", "throughput": 8647.33, "total_tokens": 63025704} +{"current_steps": 93495, "total_steps": 204665, "loss": 0.128, "lr": 1.3195768401890077e-06, "epoch": 2.2840984047101363, "percentage": 45.68, "elapsed_time": "2:01:28", "remaining_time": "2:24:26", "throughput": 8647.37, "total_tokens": 63028968} +{"current_steps": 93500, "total_steps": 204665, "loss": 0.0016, "lr": 1.3194960333538918e-06, "epoch": 2.2842205555419834, "percentage": 45.68, "elapsed_time": "2:01:29", "remaining_time": "2:24:26", "throughput": 8647.44, "total_tokens": 63032488} +{"current_steps": 93505, "total_steps": 204665, "loss": 0.0014, "lr": 1.3194152241953148e-06, "epoch": 2.28434270637383, "percentage": 45.69, "elapsed_time": "2:01:29", "remaining_time": "2:24:25", "throughput": 8647.48, "total_tokens": 63035816} +{"current_steps": 93510, "total_steps": 204665, "loss": 0.0005, "lr": 1.3193344127138647e-06, "epoch": 2.2844648572056774, "percentage": 45.69, "elapsed_time": "2:01:29", "remaining_time": "2:24:25", "throughput": 8647.55, "total_tokens": 63039336} +{"current_steps": 93515, "total_steps": 204665, "loss": 0.0943, "lr": 1.3192535989101285e-06, "epoch": 2.2845870080375246, "percentage": 45.69, "elapsed_time": "2:01:30", "remaining_time": "2:24:24", "throughput": 8647.66, "total_tokens": 63043304} +{"current_steps": 93520, "total_steps": 204665, "loss": 0.0004, "lr": 1.3191727827846945e-06, "epoch": 2.2847091588693718, "percentage": 45.69, "elapsed_time": "2:01:30", "remaining_time": "2:24:24", "throughput": 8647.74, "total_tokens": 63046888} +{"current_steps": 93525, "total_steps": 204665, "loss": 0.0266, "lr": 1.31909196433815e-06, "epoch": 2.284831309701219, "percentage": 45.7, "elapsed_time": "2:01:30", "remaining_time": "2:24:24", "throughput": 8647.8, "total_tokens": 63050344} +{"current_steps": 93530, "total_steps": 204665, "loss": 0.0393, "lr": 1.3190111435710828e-06, "epoch": 2.284953460533066, "percentage": 45.7, "elapsed_time": "2:01:31", "remaining_time": "2:24:23", "throughput": 8647.81, "total_tokens": 63053416} +{"current_steps": 93535, "total_steps": 204665, "loss": 0.0315, "lr": 1.3189303204840809e-06, "epoch": 2.2850756113649133, "percentage": 45.7, "elapsed_time": "2:01:31", "remaining_time": "2:24:23", "throughput": 8647.85, "total_tokens": 63056680} +{"current_steps": 93540, "total_steps": 204665, "loss": 0.0002, "lr": 1.3188494950777318e-06, "epoch": 2.2851977621967605, "percentage": 45.7, "elapsed_time": "2:01:31", "remaining_time": "2:24:22", "throughput": 8647.92, "total_tokens": 63060200} +{"current_steps": 93545, "total_steps": 204665, "loss": 0.0001, "lr": 1.3187686673526238e-06, "epoch": 2.2853199130286077, "percentage": 45.71, "elapsed_time": "2:01:32", "remaining_time": "2:24:22", "throughput": 8647.97, "total_tokens": 63063592} +{"current_steps": 93550, "total_steps": 204665, "loss": 0.0578, "lr": 1.3186878373093438e-06, "epoch": 2.285442063860455, "percentage": 45.71, "elapsed_time": "2:01:32", "remaining_time": "2:24:21", "throughput": 8648.07, "total_tokens": 63067432} +{"current_steps": 93555, "total_steps": 204665, "loss": 0.0002, "lr": 1.3186070049484806e-06, "epoch": 2.285564214692302, "percentage": 45.71, "elapsed_time": "2:01:33", "remaining_time": "2:24:21", "throughput": 8648.1, "total_tokens": 63070632} +{"current_steps": 93560, "total_steps": 204665, "loss": 0.0592, "lr": 1.3185261702706211e-06, "epoch": 2.2856863655241493, "percentage": 45.71, "elapsed_time": "2:01:33", "remaining_time": "2:24:21", "throughput": 8648.2, "total_tokens": 63074472} +{"current_steps": 93565, "total_steps": 204665, "loss": 0.1175, "lr": 1.3184453332763542e-06, "epoch": 2.2858085163559965, "percentage": 45.72, "elapsed_time": "2:01:33", "remaining_time": "2:24:20", "throughput": 8648.21, "total_tokens": 63077480} +{"current_steps": 93570, "total_steps": 204665, "loss": 0.0708, "lr": 1.3183644939662668e-06, "epoch": 2.2859306671878437, "percentage": 45.72, "elapsed_time": "2:01:34", "remaining_time": "2:24:20", "throughput": 8648.45, "total_tokens": 63082664} +{"current_steps": 93575, "total_steps": 204665, "loss": 0.0003, "lr": 1.318283652340947e-06, "epoch": 2.286052818019691, "percentage": 45.72, "elapsed_time": "2:01:34", "remaining_time": "2:24:19", "throughput": 8648.53, "total_tokens": 63086248} +{"current_steps": 93580, "total_steps": 204665, "loss": 0.0194, "lr": 1.3182028084009832e-06, "epoch": 2.286174968851538, "percentage": 45.72, "elapsed_time": "2:01:34", "remaining_time": "2:24:19", "throughput": 8648.58, "total_tokens": 63089704} +{"current_steps": 93585, "total_steps": 204665, "loss": 0.1173, "lr": 1.318121962146963e-06, "epoch": 2.2862971196833852, "percentage": 45.73, "elapsed_time": "2:01:35", "remaining_time": "2:24:18", "throughput": 8648.61, "total_tokens": 63092840} +{"current_steps": 93590, "total_steps": 204665, "loss": 0.0005, "lr": 1.3180411135794742e-06, "epoch": 2.286419270515232, "percentage": 45.73, "elapsed_time": "2:01:35", "remaining_time": "2:24:18", "throughput": 8648.64, "total_tokens": 63096104} +{"current_steps": 93595, "total_steps": 204665, "loss": 0.0478, "lr": 1.317960262699105e-06, "epoch": 2.2865414213470796, "percentage": 45.73, "elapsed_time": "2:01:35", "remaining_time": "2:24:18", "throughput": 8648.7, "total_tokens": 63099560} +{"current_steps": 93600, "total_steps": 204665, "loss": 0.0823, "lr": 1.317879409506443e-06, "epoch": 2.2866635721789264, "percentage": 45.73, "elapsed_time": "2:01:36", "remaining_time": "2:24:17", "throughput": 8648.75, "total_tokens": 63102952} +{"current_steps": 93605, "total_steps": 204665, "loss": 0.1719, "lr": 1.3177985540020765e-06, "epoch": 2.2867857230107735, "percentage": 45.74, "elapsed_time": "2:01:36", "remaining_time": "2:24:17", "throughput": 8648.85, "total_tokens": 63106728} +{"current_steps": 93610, "total_steps": 204665, "loss": 0.0003, "lr": 1.3177176961865934e-06, "epoch": 2.2869078738426207, "percentage": 45.74, "elapsed_time": "2:01:36", "remaining_time": "2:24:16", "throughput": 8648.94, "total_tokens": 63110440} +{"current_steps": 93615, "total_steps": 204665, "loss": 0.0495, "lr": 1.3176368360605818e-06, "epoch": 2.287030024674468, "percentage": 45.74, "elapsed_time": "2:01:37", "remaining_time": "2:24:16", "throughput": 8649.01, "total_tokens": 63113960} +{"current_steps": 93620, "total_steps": 204665, "loss": 0.0435, "lr": 1.3175559736246302e-06, "epoch": 2.287152175506315, "percentage": 45.74, "elapsed_time": "2:01:37", "remaining_time": "2:24:15", "throughput": 8649.03, "total_tokens": 63117032} +{"current_steps": 93625, "total_steps": 204665, "loss": 0.0382, "lr": 1.3174751088793257e-06, "epoch": 2.2872743263381623, "percentage": 45.75, "elapsed_time": "2:01:37", "remaining_time": "2:24:15", "throughput": 8649.07, "total_tokens": 63120360} +{"current_steps": 93630, "total_steps": 204665, "loss": 0.0003, "lr": 1.3173942418252566e-06, "epoch": 2.2873964771700095, "percentage": 45.75, "elapsed_time": "2:01:38", "remaining_time": "2:24:14", "throughput": 8649.08, "total_tokens": 63123368} +{"current_steps": 93635, "total_steps": 204665, "loss": 0.0003, "lr": 1.3173133724630114e-06, "epoch": 2.2875186280018567, "percentage": 45.75, "elapsed_time": "2:01:38", "remaining_time": "2:24:14", "throughput": 8649.18, "total_tokens": 63127144} +{"current_steps": 93640, "total_steps": 204665, "loss": 0.0272, "lr": 1.3172325007931782e-06, "epoch": 2.287640778833704, "percentage": 45.75, "elapsed_time": "2:01:38", "remaining_time": "2:24:14", "throughput": 8649.24, "total_tokens": 63130664} +{"current_steps": 93645, "total_steps": 204665, "loss": 0.0012, "lr": 1.3171516268163447e-06, "epoch": 2.287762929665551, "percentage": 45.76, "elapsed_time": "2:01:39", "remaining_time": "2:24:13", "throughput": 8649.25, "total_tokens": 63133672} +{"current_steps": 93650, "total_steps": 204665, "loss": 0.0635, "lr": 1.3170707505330993e-06, "epoch": 2.2878850804973982, "percentage": 45.76, "elapsed_time": "2:01:39", "remaining_time": "2:24:13", "throughput": 8649.35, "total_tokens": 63137512} +{"current_steps": 93655, "total_steps": 204665, "loss": 0.1236, "lr": 1.3169898719440301e-06, "epoch": 2.2880072313292454, "percentage": 45.76, "elapsed_time": "2:01:40", "remaining_time": "2:24:12", "throughput": 8649.39, "total_tokens": 63140776} +{"current_steps": 93660, "total_steps": 204665, "loss": 0.0006, "lr": 1.3169089910497254e-06, "epoch": 2.2881293821610926, "percentage": 45.76, "elapsed_time": "2:01:40", "remaining_time": "2:24:12", "throughput": 8649.45, "total_tokens": 63144232} +{"current_steps": 93665, "total_steps": 204665, "loss": 0.0117, "lr": 1.3168281078507735e-06, "epoch": 2.28825153299294, "percentage": 45.77, "elapsed_time": "2:01:40", "remaining_time": "2:24:11", "throughput": 8649.51, "total_tokens": 63147624} +{"current_steps": 93670, "total_steps": 204665, "loss": 0.0002, "lr": 1.316747222347762e-06, "epoch": 2.288373683824787, "percentage": 45.77, "elapsed_time": "2:01:41", "remaining_time": "2:24:11", "throughput": 8649.54, "total_tokens": 63150824} +{"current_steps": 93675, "total_steps": 204665, "loss": 0.0422, "lr": 1.3166663345412796e-06, "epoch": 2.288495834656634, "percentage": 45.77, "elapsed_time": "2:01:41", "remaining_time": "2:24:11", "throughput": 8649.58, "total_tokens": 63154088} +{"current_steps": 93680, "total_steps": 204665, "loss": 0.0003, "lr": 1.3165854444319148e-06, "epoch": 2.2886179854884814, "percentage": 45.77, "elapsed_time": "2:01:41", "remaining_time": "2:24:10", "throughput": 8649.76, "total_tokens": 63158632} +{"current_steps": 93685, "total_steps": 204665, "loss": 0.0002, "lr": 1.316504552020255e-06, "epoch": 2.288740136320328, "percentage": 45.77, "elapsed_time": "2:01:42", "remaining_time": "2:24:10", "throughput": 8649.78, "total_tokens": 63161704} +{"current_steps": 93690, "total_steps": 204665, "loss": 0.0002, "lr": 1.316423657306889e-06, "epoch": 2.2888622871521753, "percentage": 45.78, "elapsed_time": "2:01:42", "remaining_time": "2:24:09", "throughput": 8649.79, "total_tokens": 63164712} +{"current_steps": 93695, "total_steps": 204665, "loss": 0.0003, "lr": 1.3163427602924052e-06, "epoch": 2.2889844379840225, "percentage": 45.78, "elapsed_time": "2:01:42", "remaining_time": "2:24:09", "throughput": 8649.84, "total_tokens": 63168040} +{"current_steps": 93700, "total_steps": 204665, "loss": 0.0003, "lr": 1.3162618609773917e-06, "epoch": 2.2891065888158697, "percentage": 45.78, "elapsed_time": "2:01:43", "remaining_time": "2:24:08", "throughput": 8649.84, "total_tokens": 63170984} +{"current_steps": 93705, "total_steps": 204665, "loss": 0.0006, "lr": 1.316180959362437e-06, "epoch": 2.289228739647717, "percentage": 45.78, "elapsed_time": "2:01:43", "remaining_time": "2:24:08", "throughput": 8649.88, "total_tokens": 63174312} +{"current_steps": 93710, "total_steps": 204665, "loss": 0.0002, "lr": 1.3161000554481292e-06, "epoch": 2.289350890479564, "percentage": 45.79, "elapsed_time": "2:01:43", "remaining_time": "2:24:07", "throughput": 8649.92, "total_tokens": 63177512} +{"current_steps": 93715, "total_steps": 204665, "loss": 0.0007, "lr": 1.3160191492350568e-06, "epoch": 2.2894730413114113, "percentage": 45.79, "elapsed_time": "2:01:44", "remaining_time": "2:24:07", "throughput": 8649.95, "total_tokens": 63180776} +{"current_steps": 93720, "total_steps": 204665, "loss": 0.0002, "lr": 1.3159382407238083e-06, "epoch": 2.2895951921432585, "percentage": 45.79, "elapsed_time": "2:01:44", "remaining_time": "2:24:07", "throughput": 8649.99, "total_tokens": 63183976} +{"current_steps": 93725, "total_steps": 204665, "loss": 0.0764, "lr": 1.3158573299149716e-06, "epoch": 2.2897173429751057, "percentage": 45.79, "elapsed_time": "2:01:44", "remaining_time": "2:24:06", "throughput": 8649.98, "total_tokens": 63186856} +{"current_steps": 93730, "total_steps": 204665, "loss": 0.0001, "lr": 1.3157764168091356e-06, "epoch": 2.289839493806953, "percentage": 45.8, "elapsed_time": "2:01:45", "remaining_time": "2:24:06", "throughput": 8650.05, "total_tokens": 63190376} +{"current_steps": 93735, "total_steps": 204665, "loss": 0.0002, "lr": 1.3156955014068886e-06, "epoch": 2.2899616446388, "percentage": 45.8, "elapsed_time": "2:01:45", "remaining_time": "2:24:05", "throughput": 8650.07, "total_tokens": 63193512} +{"current_steps": 93740, "total_steps": 204665, "loss": 0.0001, "lr": 1.3156145837088192e-06, "epoch": 2.290083795470647, "percentage": 45.8, "elapsed_time": "2:01:45", "remaining_time": "2:24:05", "throughput": 8650.12, "total_tokens": 63196840} +{"current_steps": 93745, "total_steps": 204665, "loss": 0.0659, "lr": 1.3155336637155154e-06, "epoch": 2.2902059463024944, "percentage": 45.8, "elapsed_time": "2:01:46", "remaining_time": "2:24:04", "throughput": 8650.15, "total_tokens": 63200040} +{"current_steps": 93750, "total_steps": 204665, "loss": 0.1073, "lr": 1.315452741427566e-06, "epoch": 2.2903280971343416, "percentage": 45.81, "elapsed_time": "2:01:46", "remaining_time": "2:24:04", "throughput": 8650.23, "total_tokens": 63203752} +{"current_steps": 93755, "total_steps": 204665, "loss": 0.1151, "lr": 1.3153718168455595e-06, "epoch": 2.290450247966189, "percentage": 45.81, "elapsed_time": "2:01:46", "remaining_time": "2:24:03", "throughput": 8650.29, "total_tokens": 63207144} +{"current_steps": 93760, "total_steps": 204665, "loss": 0.0002, "lr": 1.315290889970084e-06, "epoch": 2.290572398798036, "percentage": 45.81, "elapsed_time": "2:01:47", "remaining_time": "2:24:03", "throughput": 8650.34, "total_tokens": 63210536} +{"current_steps": 93765, "total_steps": 204665, "loss": 0.0005, "lr": 1.3152099608017286e-06, "epoch": 2.290694549629883, "percentage": 45.81, "elapsed_time": "2:01:47", "remaining_time": "2:24:03", "throughput": 8650.38, "total_tokens": 63213800} +{"current_steps": 93770, "total_steps": 204665, "loss": 0.0102, "lr": 1.3151290293410818e-06, "epoch": 2.29081670046173, "percentage": 45.82, "elapsed_time": "2:01:47", "remaining_time": "2:24:02", "throughput": 8650.44, "total_tokens": 63217320} +{"current_steps": 93775, "total_steps": 204665, "loss": 0.0002, "lr": 1.315048095588732e-06, "epoch": 2.2909388512935775, "percentage": 45.82, "elapsed_time": "2:01:48", "remaining_time": "2:24:02", "throughput": 8650.49, "total_tokens": 63220712} +{"current_steps": 93780, "total_steps": 204665, "loss": 0.0384, "lr": 1.3149671595452674e-06, "epoch": 2.2910610021254243, "percentage": 45.82, "elapsed_time": "2:01:48", "remaining_time": "2:24:01", "throughput": 8650.55, "total_tokens": 63224104} +{"current_steps": 93785, "total_steps": 204665, "loss": 0.0002, "lr": 1.3148862212112765e-06, "epoch": 2.2911831529572715, "percentage": 45.82, "elapsed_time": "2:01:49", "remaining_time": "2:24:01", "throughput": 8650.58, "total_tokens": 63227304} +{"current_steps": 93790, "total_steps": 204665, "loss": 0.0001, "lr": 1.314805280587349e-06, "epoch": 2.2913053037891187, "percentage": 45.83, "elapsed_time": "2:01:49", "remaining_time": "2:24:00", "throughput": 8650.59, "total_tokens": 63230376} +{"current_steps": 93795, "total_steps": 204665, "loss": 0.002, "lr": 1.3147243376740724e-06, "epoch": 2.291427454620966, "percentage": 45.83, "elapsed_time": "2:01:49", "remaining_time": "2:24:00", "throughput": 8650.6, "total_tokens": 63233320} +{"current_steps": 93800, "total_steps": 204665, "loss": 0.0005, "lr": 1.314643392472036e-06, "epoch": 2.291549605452813, "percentage": 45.83, "elapsed_time": "2:01:50", "remaining_time": "2:23:59", "throughput": 8650.64, "total_tokens": 63236648} +{"current_steps": 93805, "total_steps": 204665, "loss": 0.0002, "lr": 1.3145624449818283e-06, "epoch": 2.2916717562846602, "percentage": 45.83, "elapsed_time": "2:01:50", "remaining_time": "2:23:59", "throughput": 8650.68, "total_tokens": 63239848} +{"current_steps": 93810, "total_steps": 204665, "loss": 0.0002, "lr": 1.3144814952040375e-06, "epoch": 2.2917939071165074, "percentage": 45.84, "elapsed_time": "2:01:50", "remaining_time": "2:23:59", "throughput": 8650.74, "total_tokens": 63243368} +{"current_steps": 93815, "total_steps": 204665, "loss": 0.084, "lr": 1.314400543139253e-06, "epoch": 2.2919160579483546, "percentage": 45.84, "elapsed_time": "2:01:51", "remaining_time": "2:23:58", "throughput": 8650.77, "total_tokens": 63246568} +{"current_steps": 93820, "total_steps": 204665, "loss": 0.0701, "lr": 1.3143195887880631e-06, "epoch": 2.292038208780202, "percentage": 45.84, "elapsed_time": "2:01:51", "remaining_time": "2:23:58", "throughput": 8650.83, "total_tokens": 63250024} +{"current_steps": 93825, "total_steps": 204665, "loss": 0.0002, "lr": 1.3142386321510565e-06, "epoch": 2.292160359612049, "percentage": 45.84, "elapsed_time": "2:01:51", "remaining_time": "2:23:57", "throughput": 8650.9, "total_tokens": 63253608} +{"current_steps": 93830, "total_steps": 204665, "loss": 0.0008, "lr": 1.3141576732288223e-06, "epoch": 2.292282510443896, "percentage": 45.85, "elapsed_time": "2:01:52", "remaining_time": "2:23:57", "throughput": 8650.91, "total_tokens": 63256616} +{"current_steps": 93835, "total_steps": 204665, "loss": 0.099, "lr": 1.314076712021949e-06, "epoch": 2.2924046612757434, "percentage": 45.85, "elapsed_time": "2:01:52", "remaining_time": "2:23:56", "throughput": 8650.94, "total_tokens": 63259752} +{"current_steps": 93840, "total_steps": 204665, "loss": 0.0002, "lr": 1.3139957485310251e-06, "epoch": 2.2925268121075906, "percentage": 45.85, "elapsed_time": "2:01:52", "remaining_time": "2:23:56", "throughput": 8651.03, "total_tokens": 63263528} +{"current_steps": 93845, "total_steps": 204665, "loss": 0.0001, "lr": 1.31391478275664e-06, "epoch": 2.2926489629394378, "percentage": 45.85, "elapsed_time": "2:01:53", "remaining_time": "2:23:56", "throughput": 8651.11, "total_tokens": 63267112} +{"current_steps": 93850, "total_steps": 204665, "loss": 0.054, "lr": 1.3138338146993814e-06, "epoch": 2.292771113771285, "percentage": 45.86, "elapsed_time": "2:01:53", "remaining_time": "2:23:55", "throughput": 8651.16, "total_tokens": 63270568} +{"current_steps": 93855, "total_steps": 204665, "loss": 0.0007, "lr": 1.3137528443598398e-06, "epoch": 2.2928932646031317, "percentage": 45.86, "elapsed_time": "2:01:53", "remaining_time": "2:23:55", "throughput": 8651.23, "total_tokens": 63274088} +{"current_steps": 93860, "total_steps": 204665, "loss": 0.1433, "lr": 1.3136718717386025e-06, "epoch": 2.2930154154349793, "percentage": 45.86, "elapsed_time": "2:01:54", "remaining_time": "2:23:54", "throughput": 8651.26, "total_tokens": 63277288} +{"current_steps": 93865, "total_steps": 204665, "loss": 0.0002, "lr": 1.3135908968362596e-06, "epoch": 2.293137566266826, "percentage": 45.86, "elapsed_time": "2:01:54", "remaining_time": "2:23:54", "throughput": 8651.28, "total_tokens": 63280360} +{"current_steps": 93870, "total_steps": 204665, "loss": 0.0003, "lr": 1.313509919653399e-06, "epoch": 2.2932597170986733, "percentage": 45.87, "elapsed_time": "2:01:54", "remaining_time": "2:23:53", "throughput": 8651.31, "total_tokens": 63283560} +{"current_steps": 93875, "total_steps": 204665, "loss": 0.0597, "lr": 1.3134289401906099e-06, "epoch": 2.2933818679305205, "percentage": 45.87, "elapsed_time": "2:01:55", "remaining_time": "2:23:53", "throughput": 8651.43, "total_tokens": 63287528} +{"current_steps": 93880, "total_steps": 204665, "loss": 0.0002, "lr": 1.3133479584484812e-06, "epoch": 2.2935040187623676, "percentage": 45.87, "elapsed_time": "2:01:55", "remaining_time": "2:23:52", "throughput": 8651.5, "total_tokens": 63291112} +{"current_steps": 93885, "total_steps": 204665, "loss": 0.0593, "lr": 1.3132669744276022e-06, "epoch": 2.293626169594215, "percentage": 45.87, "elapsed_time": "2:01:55", "remaining_time": "2:23:52", "throughput": 8651.56, "total_tokens": 63294568} +{"current_steps": 93890, "total_steps": 204665, "loss": 0.0417, "lr": 1.3131859881285612e-06, "epoch": 2.293748320426062, "percentage": 45.87, "elapsed_time": "2:01:56", "remaining_time": "2:23:52", "throughput": 8651.69, "total_tokens": 63298664} +{"current_steps": 93895, "total_steps": 204665, "loss": 0.0002, "lr": 1.3131049995519474e-06, "epoch": 2.293870471257909, "percentage": 45.88, "elapsed_time": "2:01:56", "remaining_time": "2:23:51", "throughput": 8651.71, "total_tokens": 63301800} +{"current_steps": 93900, "total_steps": 204665, "loss": 0.0001, "lr": 1.3130240086983499e-06, "epoch": 2.2939926220897564, "percentage": 45.88, "elapsed_time": "2:01:57", "remaining_time": "2:23:51", "throughput": 8651.79, "total_tokens": 63305448} +{"current_steps": 93905, "total_steps": 204665, "loss": 0.0466, "lr": 1.3129430155683579e-06, "epoch": 2.2941147729216036, "percentage": 45.88, "elapsed_time": "2:01:57", "remaining_time": "2:23:50", "throughput": 8651.86, "total_tokens": 63308968} +{"current_steps": 93910, "total_steps": 204665, "loss": 0.1974, "lr": 1.3128620201625596e-06, "epoch": 2.294236923753451, "percentage": 45.88, "elapsed_time": "2:01:57", "remaining_time": "2:23:50", "throughput": 8651.96, "total_tokens": 63312808} +{"current_steps": 93915, "total_steps": 204665, "loss": 0.0494, "lr": 1.3127810224815447e-06, "epoch": 2.294359074585298, "percentage": 45.89, "elapsed_time": "2:01:58", "remaining_time": "2:23:49", "throughput": 8651.97, "total_tokens": 63315880} +{"current_steps": 93920, "total_steps": 204665, "loss": 0.0008, "lr": 1.3127000225259025e-06, "epoch": 2.294481225417145, "percentage": 45.89, "elapsed_time": "2:01:58", "remaining_time": "2:23:49", "throughput": 8651.99, "total_tokens": 63318952} +{"current_steps": 93925, "total_steps": 204665, "loss": 0.0001, "lr": 1.3126190202962213e-06, "epoch": 2.2946033762489924, "percentage": 45.89, "elapsed_time": "2:01:58", "remaining_time": "2:23:49", "throughput": 8652.01, "total_tokens": 63322024} +{"current_steps": 93930, "total_steps": 204665, "loss": 0.0004, "lr": 1.3125380157930908e-06, "epoch": 2.2947255270808395, "percentage": 45.89, "elapsed_time": "2:01:59", "remaining_time": "2:23:48", "throughput": 8652.1, "total_tokens": 63325736} +{"current_steps": 93935, "total_steps": 204665, "loss": 0.1105, "lr": 1.3124570090170994e-06, "epoch": 2.2948476779126867, "percentage": 45.9, "elapsed_time": "2:01:59", "remaining_time": "2:23:48", "throughput": 8652.19, "total_tokens": 63329448} +{"current_steps": 93940, "total_steps": 204665, "loss": 0.1233, "lr": 1.3123759999688367e-06, "epoch": 2.294969828744534, "percentage": 45.9, "elapsed_time": "2:01:59", "remaining_time": "2:23:47", "throughput": 8652.33, "total_tokens": 63333672} +{"current_steps": 93945, "total_steps": 204665, "loss": 0.0527, "lr": 1.3122949886488913e-06, "epoch": 2.295091979576381, "percentage": 45.9, "elapsed_time": "2:02:00", "remaining_time": "2:23:47", "throughput": 8652.41, "total_tokens": 63337320} +{"current_steps": 93950, "total_steps": 204665, "loss": 0.0094, "lr": 1.3122139750578533e-06, "epoch": 2.295214130408228, "percentage": 45.9, "elapsed_time": "2:02:00", "remaining_time": "2:23:46", "throughput": 8652.42, "total_tokens": 63340328} +{"current_steps": 93955, "total_steps": 204665, "loss": 0.0312, "lr": 1.3121329591963112e-06, "epoch": 2.2953362812400755, "percentage": 45.91, "elapsed_time": "2:02:00", "remaining_time": "2:23:46", "throughput": 8652.52, "total_tokens": 63344104} +{"current_steps": 93960, "total_steps": 204665, "loss": 0.0003, "lr": 1.3120519410648543e-06, "epoch": 2.2954584320719222, "percentage": 45.91, "elapsed_time": "2:02:01", "remaining_time": "2:23:45", "throughput": 8652.57, "total_tokens": 63347496} +{"current_steps": 93965, "total_steps": 204665, "loss": 0.0006, "lr": 1.3119709206640716e-06, "epoch": 2.2955805829037694, "percentage": 45.91, "elapsed_time": "2:02:01", "remaining_time": "2:23:45", "throughput": 8652.56, "total_tokens": 63350376} +{"current_steps": 93970, "total_steps": 204665, "loss": 0.1245, "lr": 1.3118898979945528e-06, "epoch": 2.2957027337356166, "percentage": 45.91, "elapsed_time": "2:02:01", "remaining_time": "2:23:45", "throughput": 8652.67, "total_tokens": 63354280} +{"current_steps": 93975, "total_steps": 204665, "loss": 0.0408, "lr": 1.3118088730568863e-06, "epoch": 2.295824884567464, "percentage": 45.92, "elapsed_time": "2:02:02", "remaining_time": "2:23:44", "throughput": 8652.74, "total_tokens": 63357864} +{"current_steps": 93980, "total_steps": 204665, "loss": 0.0003, "lr": 1.3117278458516622e-06, "epoch": 2.295947035399311, "percentage": 45.92, "elapsed_time": "2:02:02", "remaining_time": "2:23:44", "throughput": 8652.8, "total_tokens": 63361320} +{"current_steps": 93985, "total_steps": 204665, "loss": 0.0355, "lr": 1.3116468163794691e-06, "epoch": 2.296069186231158, "percentage": 45.92, "elapsed_time": "2:02:02", "remaining_time": "2:23:43", "throughput": 8652.82, "total_tokens": 63364392} +{"current_steps": 93990, "total_steps": 204665, "loss": 0.0518, "lr": 1.3115657846408965e-06, "epoch": 2.2961913370630054, "percentage": 45.92, "elapsed_time": "2:02:03", "remaining_time": "2:23:43", "throughput": 8652.83, "total_tokens": 63367400} +{"current_steps": 93995, "total_steps": 204665, "loss": 0.0567, "lr": 1.3114847506365338e-06, "epoch": 2.2963134878948526, "percentage": 45.93, "elapsed_time": "2:02:03", "remaining_time": "2:23:42", "throughput": 8652.91, "total_tokens": 63371048} +{"current_steps": 94000, "total_steps": 204665, "loss": 0.0216, "lr": 1.3114037143669702e-06, "epoch": 2.2964356387266998, "percentage": 45.93, "elapsed_time": "2:02:04", "remaining_time": "2:23:42", "throughput": 8652.92, "total_tokens": 63374056} +{"current_steps": 94005, "total_steps": 204665, "loss": 0.0643, "lr": 1.3113226758327952e-06, "epoch": 2.296557789558547, "percentage": 45.93, "elapsed_time": "2:02:04", "remaining_time": "2:23:42", "throughput": 8652.98, "total_tokens": 63377512} +{"current_steps": 94010, "total_steps": 204665, "loss": 0.0006, "lr": 1.3112416350345977e-06, "epoch": 2.296679940390394, "percentage": 45.93, "elapsed_time": "2:02:04", "remaining_time": "2:23:41", "throughput": 8653.01, "total_tokens": 63380712} +{"current_steps": 94015, "total_steps": 204665, "loss": 0.038, "lr": 1.3111605919729676e-06, "epoch": 2.2968020912222413, "percentage": 45.94, "elapsed_time": "2:02:05", "remaining_time": "2:23:41", "throughput": 8653.05, "total_tokens": 63384040} +{"current_steps": 94020, "total_steps": 204665, "loss": 0.0514, "lr": 1.3110795466484939e-06, "epoch": 2.2969242420540885, "percentage": 45.94, "elapsed_time": "2:02:05", "remaining_time": "2:23:40", "throughput": 8653.1, "total_tokens": 63387432} +{"current_steps": 94025, "total_steps": 204665, "loss": 0.0426, "lr": 1.3109984990617658e-06, "epoch": 2.2970463928859357, "percentage": 45.94, "elapsed_time": "2:02:05", "remaining_time": "2:23:40", "throughput": 8653.25, "total_tokens": 63391656} +{"current_steps": 94030, "total_steps": 204665, "loss": 0.0015, "lr": 1.3109174492133732e-06, "epoch": 2.297168543717783, "percentage": 45.94, "elapsed_time": "2:02:06", "remaining_time": "2:23:39", "throughput": 8653.28, "total_tokens": 63394792} +{"current_steps": 94035, "total_steps": 204665, "loss": 0.0007, "lr": 1.3108363971039053e-06, "epoch": 2.2972906945496296, "percentage": 45.95, "elapsed_time": "2:02:06", "remaining_time": "2:23:39", "throughput": 8653.31, "total_tokens": 63397992} +{"current_steps": 94040, "total_steps": 204665, "loss": 0.0002, "lr": 1.3107553427339515e-06, "epoch": 2.2974128453814773, "percentage": 45.95, "elapsed_time": "2:02:06", "remaining_time": "2:23:38", "throughput": 8653.37, "total_tokens": 63401512} +{"current_steps": 94045, "total_steps": 204665, "loss": 0.0331, "lr": 1.310674286104101e-06, "epoch": 2.297534996213324, "percentage": 45.95, "elapsed_time": "2:02:07", "remaining_time": "2:23:38", "throughput": 8653.41, "total_tokens": 63404776} +{"current_steps": 94050, "total_steps": 204665, "loss": 0.0002, "lr": 1.310593227214944e-06, "epoch": 2.297657147045171, "percentage": 45.95, "elapsed_time": "2:02:07", "remaining_time": "2:23:38", "throughput": 8653.55, "total_tokens": 63408936} +{"current_steps": 94055, "total_steps": 204665, "loss": 0.0001, "lr": 1.3105121660670692e-06, "epoch": 2.2977792978770184, "percentage": 45.96, "elapsed_time": "2:02:07", "remaining_time": "2:23:37", "throughput": 8653.64, "total_tokens": 63412712} +{"current_steps": 94060, "total_steps": 204665, "loss": 0.0001, "lr": 1.3104311026610666e-06, "epoch": 2.2979014487088656, "percentage": 45.96, "elapsed_time": "2:02:08", "remaining_time": "2:23:37", "throughput": 8653.72, "total_tokens": 63416296} +{"current_steps": 94065, "total_steps": 204665, "loss": 0.0934, "lr": 1.310350036997525e-06, "epoch": 2.298023599540713, "percentage": 45.96, "elapsed_time": "2:02:08", "remaining_time": "2:23:36", "throughput": 8653.71, "total_tokens": 63419112} +{"current_steps": 94070, "total_steps": 204665, "loss": 0.0353, "lr": 1.310268969077035e-06, "epoch": 2.29814575037256, "percentage": 45.96, "elapsed_time": "2:02:08", "remaining_time": "2:23:36", "throughput": 8653.81, "total_tokens": 63422888} +{"current_steps": 94075, "total_steps": 204665, "loss": 0.1447, "lr": 1.3101878989001856e-06, "epoch": 2.298267901204407, "percentage": 45.97, "elapsed_time": "2:02:09", "remaining_time": "2:23:35", "throughput": 8653.87, "total_tokens": 63426344} +{"current_steps": 94080, "total_steps": 204665, "loss": 0.001, "lr": 1.3101068264675662e-06, "epoch": 2.2983900520362543, "percentage": 45.97, "elapsed_time": "2:02:09", "remaining_time": "2:23:35", "throughput": 8653.95, "total_tokens": 63429992} +{"current_steps": 94085, "total_steps": 204665, "loss": 0.0001, "lr": 1.3100257517797668e-06, "epoch": 2.2985122028681015, "percentage": 45.97, "elapsed_time": "2:02:09", "remaining_time": "2:23:35", "throughput": 8654.02, "total_tokens": 63433512} +{"current_steps": 94090, "total_steps": 204665, "loss": 0.1512, "lr": 1.3099446748373764e-06, "epoch": 2.2986343536999487, "percentage": 45.97, "elapsed_time": "2:02:10", "remaining_time": "2:23:34", "throughput": 8654.03, "total_tokens": 63436520} +{"current_steps": 94095, "total_steps": 204665, "loss": 0.0002, "lr": 1.3098635956409851e-06, "epoch": 2.298756504531796, "percentage": 45.98, "elapsed_time": "2:02:10", "remaining_time": "2:23:34", "throughput": 8654.05, "total_tokens": 63439720} +{"current_steps": 94100, "total_steps": 204665, "loss": 0.0005, "lr": 1.3097825141911821e-06, "epoch": 2.298878655363643, "percentage": 45.98, "elapsed_time": "2:02:10", "remaining_time": "2:23:33", "throughput": 8654.11, "total_tokens": 63443176} +{"current_steps": 94105, "total_steps": 204665, "loss": 0.1533, "lr": 1.3097014304885578e-06, "epoch": 2.2990008061954903, "percentage": 45.98, "elapsed_time": "2:02:11", "remaining_time": "2:23:33", "throughput": 8654.14, "total_tokens": 63446376} +{"current_steps": 94110, "total_steps": 204665, "loss": 0.113, "lr": 1.3096203445337013e-06, "epoch": 2.2991229570273375, "percentage": 45.98, "elapsed_time": "2:02:11", "remaining_time": "2:23:32", "throughput": 8654.2, "total_tokens": 63449832} +{"current_steps": 94115, "total_steps": 204665, "loss": 0.1354, "lr": 1.309539256327202e-06, "epoch": 2.2992451078591847, "percentage": 45.98, "elapsed_time": "2:02:12", "remaining_time": "2:23:32", "throughput": 8654.2, "total_tokens": 63452776} +{"current_steps": 94120, "total_steps": 204665, "loss": 0.0712, "lr": 1.3094581658696505e-06, "epoch": 2.299367258691032, "percentage": 45.99, "elapsed_time": "2:02:12", "remaining_time": "2:23:31", "throughput": 8654.24, "total_tokens": 63456104} +{"current_steps": 94125, "total_steps": 204665, "loss": 0.0532, "lr": 1.3093770731616358e-06, "epoch": 2.299489409522879, "percentage": 45.99, "elapsed_time": "2:02:12", "remaining_time": "2:23:31", "throughput": 8654.22, "total_tokens": 63458856} +{"current_steps": 94130, "total_steps": 204665, "loss": 0.0367, "lr": 1.3092959782037478e-06, "epoch": 2.299611560354726, "percentage": 45.99, "elapsed_time": "2:02:13", "remaining_time": "2:23:31", "throughput": 8654.22, "total_tokens": 63461800} +{"current_steps": 94135, "total_steps": 204665, "loss": 0.0827, "lr": 1.3092148809965763e-06, "epoch": 2.299733711186573, "percentage": 45.99, "elapsed_time": "2:02:13", "remaining_time": "2:23:30", "throughput": 8654.33, "total_tokens": 63465640} +{"current_steps": 94140, "total_steps": 204665, "loss": 0.0007, "lr": 1.3091337815407108e-06, "epoch": 2.29985586201842, "percentage": 46.0, "elapsed_time": "2:02:13", "remaining_time": "2:23:30", "throughput": 8654.33, "total_tokens": 63468584} +{"current_steps": 94145, "total_steps": 204665, "loss": 0.033, "lr": 1.3090526798367414e-06, "epoch": 2.2999780128502674, "percentage": 46.0, "elapsed_time": "2:02:14", "remaining_time": "2:23:29", "throughput": 8654.43, "total_tokens": 63472488} +{"current_steps": 94150, "total_steps": 204665, "loss": 0.053, "lr": 1.3089715758852578e-06, "epoch": 2.3001001636821146, "percentage": 46.0, "elapsed_time": "2:02:14", "remaining_time": "2:23:29", "throughput": 8654.47, "total_tokens": 63475752} +{"current_steps": 94155, "total_steps": 204665, "loss": 0.001, "lr": 1.3088904696868498e-06, "epoch": 2.3002223145139618, "percentage": 46.0, "elapsed_time": "2:02:14", "remaining_time": "2:23:28", "throughput": 8654.53, "total_tokens": 63479208} +{"current_steps": 94160, "total_steps": 204665, "loss": 0.0005, "lr": 1.308809361242107e-06, "epoch": 2.300344465345809, "percentage": 46.01, "elapsed_time": "2:02:15", "remaining_time": "2:23:28", "throughput": 8654.61, "total_tokens": 63482856} +{"current_steps": 94165, "total_steps": 204665, "loss": 0.0007, "lr": 1.3087282505516197e-06, "epoch": 2.300466616177656, "percentage": 46.01, "elapsed_time": "2:02:15", "remaining_time": "2:23:28", "throughput": 8654.65, "total_tokens": 63486248} +{"current_steps": 94170, "total_steps": 204665, "loss": 0.0003, "lr": 1.3086471376159777e-06, "epoch": 2.3005887670095033, "percentage": 46.01, "elapsed_time": "2:02:15", "remaining_time": "2:23:27", "throughput": 8654.69, "total_tokens": 63489512} +{"current_steps": 94175, "total_steps": 204665, "loss": 0.0005, "lr": 1.3085660224357703e-06, "epoch": 2.3007109178413505, "percentage": 46.01, "elapsed_time": "2:02:16", "remaining_time": "2:23:27", "throughput": 8654.69, "total_tokens": 63492456} +{"current_steps": 94180, "total_steps": 204665, "loss": 0.0003, "lr": 1.3084849050115883e-06, "epoch": 2.3008330686731977, "percentage": 46.02, "elapsed_time": "2:02:16", "remaining_time": "2:23:26", "throughput": 8654.76, "total_tokens": 63495976} +{"current_steps": 94185, "total_steps": 204665, "loss": 0.0457, "lr": 1.3084037853440206e-06, "epoch": 2.300955219505045, "percentage": 46.02, "elapsed_time": "2:02:16", "remaining_time": "2:23:26", "throughput": 8654.78, "total_tokens": 63499176} +{"current_steps": 94190, "total_steps": 204665, "loss": 0.0295, "lr": 1.308322663433658e-06, "epoch": 2.301077370336892, "percentage": 46.02, "elapsed_time": "2:02:17", "remaining_time": "2:23:25", "throughput": 8654.82, "total_tokens": 63502440} +{"current_steps": 94195, "total_steps": 204665, "loss": 0.0837, "lr": 1.3082415392810896e-06, "epoch": 2.3011995211687393, "percentage": 46.02, "elapsed_time": "2:02:17", "remaining_time": "2:23:25", "throughput": 8654.85, "total_tokens": 63505640} +{"current_steps": 94200, "total_steps": 204665, "loss": 0.0005, "lr": 1.3081604128869064e-06, "epoch": 2.3013216720005865, "percentage": 46.03, "elapsed_time": "2:02:17", "remaining_time": "2:23:24", "throughput": 8654.92, "total_tokens": 63509224} +{"current_steps": 94205, "total_steps": 204665, "loss": 0.038, "lr": 1.3080792842516974e-06, "epoch": 2.3014438228324336, "percentage": 46.03, "elapsed_time": "2:02:18", "remaining_time": "2:23:24", "throughput": 8654.96, "total_tokens": 63512488} +{"current_steps": 94210, "total_steps": 204665, "loss": 0.0001, "lr": 1.3079981533760532e-06, "epoch": 2.301565973664281, "percentage": 46.03, "elapsed_time": "2:02:18", "remaining_time": "2:23:24", "throughput": 8655.02, "total_tokens": 63516008} +{"current_steps": 94215, "total_steps": 204665, "loss": 0.0405, "lr": 1.3079170202605633e-06, "epoch": 2.3016881244961276, "percentage": 46.03, "elapsed_time": "2:02:18", "remaining_time": "2:23:23", "throughput": 8655.04, "total_tokens": 63519208} +{"current_steps": 94220, "total_steps": 204665, "loss": 0.0309, "lr": 1.3078358849058182e-06, "epoch": 2.301810275327975, "percentage": 46.04, "elapsed_time": "2:02:19", "remaining_time": "2:23:23", "throughput": 8655.05, "total_tokens": 63522216} +{"current_steps": 94225, "total_steps": 204665, "loss": 0.0003, "lr": 1.3077547473124076e-06, "epoch": 2.301932426159822, "percentage": 46.04, "elapsed_time": "2:02:19", "remaining_time": "2:23:22", "throughput": 8655.14, "total_tokens": 63525928} +{"current_steps": 94230, "total_steps": 204665, "loss": 0.0373, "lr": 1.3076736074809219e-06, "epoch": 2.302054576991669, "percentage": 46.04, "elapsed_time": "2:02:20", "remaining_time": "2:23:22", "throughput": 8655.16, "total_tokens": 63529064} +{"current_steps": 94235, "total_steps": 204665, "loss": 0.0007, "lr": 1.3075924654119507e-06, "epoch": 2.3021767278235163, "percentage": 46.04, "elapsed_time": "2:02:20", "remaining_time": "2:23:21", "throughput": 8655.2, "total_tokens": 63532328} +{"current_steps": 94240, "total_steps": 204665, "loss": 0.0002, "lr": 1.307511321106085e-06, "epoch": 2.3022988786553635, "percentage": 46.05, "elapsed_time": "2:02:20", "remaining_time": "2:23:21", "throughput": 8655.27, "total_tokens": 63535976} +{"current_steps": 94245, "total_steps": 204665, "loss": 0.0002, "lr": 1.3074301745639138e-06, "epoch": 2.3024210294872107, "percentage": 46.05, "elapsed_time": "2:02:21", "remaining_time": "2:23:20", "throughput": 8655.33, "total_tokens": 63539368} +{"current_steps": 94250, "total_steps": 204665, "loss": 0.001, "lr": 1.3073490257860278e-06, "epoch": 2.302543180319058, "percentage": 46.05, "elapsed_time": "2:02:21", "remaining_time": "2:23:20", "throughput": 8655.36, "total_tokens": 63542632} +{"current_steps": 94255, "total_steps": 204665, "loss": 0.0003, "lr": 1.3072678747730166e-06, "epoch": 2.302665331150905, "percentage": 46.05, "elapsed_time": "2:02:21", "remaining_time": "2:23:20", "throughput": 8655.46, "total_tokens": 63546408} +{"current_steps": 94260, "total_steps": 204665, "loss": 0.0001, "lr": 1.307186721525471e-06, "epoch": 2.3027874819827523, "percentage": 46.06, "elapsed_time": "2:02:22", "remaining_time": "2:23:19", "throughput": 8655.51, "total_tokens": 63549800} +{"current_steps": 94265, "total_steps": 204665, "loss": 0.0697, "lr": 1.3071055660439811e-06, "epoch": 2.3029096328145995, "percentage": 46.06, "elapsed_time": "2:02:22", "remaining_time": "2:23:19", "throughput": 8655.62, "total_tokens": 63553768} +{"current_steps": 94270, "total_steps": 204665, "loss": 0.0515, "lr": 1.3070244083291368e-06, "epoch": 2.3030317836464467, "percentage": 46.06, "elapsed_time": "2:02:22", "remaining_time": "2:23:18", "throughput": 8655.63, "total_tokens": 63556840} +{"current_steps": 94275, "total_steps": 204665, "loss": 0.14, "lr": 1.3069432483815285e-06, "epoch": 2.303153934478294, "percentage": 46.06, "elapsed_time": "2:02:23", "remaining_time": "2:23:18", "throughput": 8655.73, "total_tokens": 63560616} +{"current_steps": 94280, "total_steps": 204665, "loss": 0.0005, "lr": 1.3068620862017466e-06, "epoch": 2.303276085310141, "percentage": 46.07, "elapsed_time": "2:02:23", "remaining_time": "2:23:17", "throughput": 8655.79, "total_tokens": 63564200} +{"current_steps": 94285, "total_steps": 204665, "loss": 0.0003, "lr": 1.3067809217903807e-06, "epoch": 2.3033982361419882, "percentage": 46.07, "elapsed_time": "2:02:23", "remaining_time": "2:23:17", "throughput": 8655.82, "total_tokens": 63567400} +{"current_steps": 94290, "total_steps": 204665, "loss": 0.0739, "lr": 1.3066997551480215e-06, "epoch": 2.3035203869738354, "percentage": 46.07, "elapsed_time": "2:02:24", "remaining_time": "2:23:17", "throughput": 8655.97, "total_tokens": 63571624} +{"current_steps": 94295, "total_steps": 204665, "loss": 0.0889, "lr": 1.3066185862752592e-06, "epoch": 2.3036425378056826, "percentage": 46.07, "elapsed_time": "2:02:24", "remaining_time": "2:23:16", "throughput": 8656.05, "total_tokens": 63575272} +{"current_steps": 94300, "total_steps": 204665, "loss": 0.0001, "lr": 1.3065374151726842e-06, "epoch": 2.30376468863753, "percentage": 46.08, "elapsed_time": "2:02:24", "remaining_time": "2:23:16", "throughput": 8656.1, "total_tokens": 63578664} +{"current_steps": 94305, "total_steps": 204665, "loss": 0.1545, "lr": 1.3064562418408863e-06, "epoch": 2.303886839469377, "percentage": 46.08, "elapsed_time": "2:02:25", "remaining_time": "2:23:15", "throughput": 8656.14, "total_tokens": 63581992} +{"current_steps": 94310, "total_steps": 204665, "loss": 0.0004, "lr": 1.3063750662804567e-06, "epoch": 2.3040089903012237, "percentage": 46.08, "elapsed_time": "2:02:25", "remaining_time": "2:23:15", "throughput": 8656.21, "total_tokens": 63585512} +{"current_steps": 94315, "total_steps": 204665, "loss": 0.0003, "lr": 1.3062938884919844e-06, "epoch": 2.304131141133071, "percentage": 46.08, "elapsed_time": "2:02:25", "remaining_time": "2:23:14", "throughput": 8656.24, "total_tokens": 63588712} +{"current_steps": 94320, "total_steps": 204665, "loss": 0.0758, "lr": 1.3062127084760613e-06, "epoch": 2.304253291964918, "percentage": 46.09, "elapsed_time": "2:02:26", "remaining_time": "2:23:14", "throughput": 8656.3, "total_tokens": 63592232} +{"current_steps": 94325, "total_steps": 204665, "loss": 0.0004, "lr": 1.3061315262332768e-06, "epoch": 2.3043754427967653, "percentage": 46.09, "elapsed_time": "2:02:26", "remaining_time": "2:23:14", "throughput": 8656.39, "total_tokens": 63595944} +{"current_steps": 94330, "total_steps": 204665, "loss": 0.0002, "lr": 1.3060503417642218e-06, "epoch": 2.3044975936286125, "percentage": 46.09, "elapsed_time": "2:02:27", "remaining_time": "2:23:13", "throughput": 8656.44, "total_tokens": 63599400} +{"current_steps": 94335, "total_steps": 204665, "loss": 0.0429, "lr": 1.3059691550694858e-06, "epoch": 2.3046197444604597, "percentage": 46.09, "elapsed_time": "2:02:27", "remaining_time": "2:23:13", "throughput": 8656.53, "total_tokens": 63603112} +{"current_steps": 94340, "total_steps": 204665, "loss": 0.0378, "lr": 1.3058879661496602e-06, "epoch": 2.304741895292307, "percentage": 46.09, "elapsed_time": "2:02:27", "remaining_time": "2:23:12", "throughput": 8656.62, "total_tokens": 63606888} +{"current_steps": 94345, "total_steps": 204665, "loss": 0.0002, "lr": 1.305806775005335e-06, "epoch": 2.304864046124154, "percentage": 46.1, "elapsed_time": "2:02:28", "remaining_time": "2:23:12", "throughput": 8656.62, "total_tokens": 63609832} +{"current_steps": 94350, "total_steps": 204665, "loss": 0.0001, "lr": 1.3057255816371007e-06, "epoch": 2.3049861969560013, "percentage": 46.1, "elapsed_time": "2:02:28", "remaining_time": "2:23:11", "throughput": 8656.66, "total_tokens": 63613096} +{"current_steps": 94355, "total_steps": 204665, "loss": 0.0699, "lr": 1.3056443860455476e-06, "epoch": 2.3051083477878485, "percentage": 46.1, "elapsed_time": "2:02:28", "remaining_time": "2:23:11", "throughput": 8656.73, "total_tokens": 63616680} +{"current_steps": 94360, "total_steps": 204665, "loss": 0.0706, "lr": 1.3055631882312664e-06, "epoch": 2.3052304986196956, "percentage": 46.1, "elapsed_time": "2:02:29", "remaining_time": "2:23:11", "throughput": 8656.74, "total_tokens": 63619752} +{"current_steps": 94365, "total_steps": 204665, "loss": 0.0004, "lr": 1.3054819881948476e-06, "epoch": 2.305352649451543, "percentage": 46.11, "elapsed_time": "2:02:29", "remaining_time": "2:23:10", "throughput": 8656.77, "total_tokens": 63622952} +{"current_steps": 94370, "total_steps": 204665, "loss": 0.0342, "lr": 1.3054007859368813e-06, "epoch": 2.30547480028339, "percentage": 46.11, "elapsed_time": "2:02:29", "remaining_time": "2:23:10", "throughput": 8656.87, "total_tokens": 63626792} +{"current_steps": 94375, "total_steps": 204665, "loss": 0.0002, "lr": 1.3053195814579587e-06, "epoch": 2.305596951115237, "percentage": 46.11, "elapsed_time": "2:02:30", "remaining_time": "2:23:09", "throughput": 8656.9, "total_tokens": 63629992} +{"current_steps": 94380, "total_steps": 204665, "loss": 0.0437, "lr": 1.3052383747586697e-06, "epoch": 2.3057191019470844, "percentage": 46.11, "elapsed_time": "2:02:30", "remaining_time": "2:23:09", "throughput": 8656.92, "total_tokens": 63633128} +{"current_steps": 94385, "total_steps": 204665, "loss": 0.0002, "lr": 1.3051571658396053e-06, "epoch": 2.3058412527789316, "percentage": 46.12, "elapsed_time": "2:02:30", "remaining_time": "2:23:08", "throughput": 8656.96, "total_tokens": 63636392} +{"current_steps": 94390, "total_steps": 204665, "loss": 0.0, "lr": 1.3050759547013558e-06, "epoch": 2.305963403610779, "percentage": 46.12, "elapsed_time": "2:02:31", "remaining_time": "2:23:08", "throughput": 8657.01, "total_tokens": 63639720} +{"current_steps": 94395, "total_steps": 204665, "loss": 0.0002, "lr": 1.3049947413445123e-06, "epoch": 2.3060855544426255, "percentage": 46.12, "elapsed_time": "2:02:31", "remaining_time": "2:23:07", "throughput": 8657.01, "total_tokens": 63642728} +{"current_steps": 94400, "total_steps": 204665, "loss": 0.0001, "lr": 1.3049135257696646e-06, "epoch": 2.306207705274473, "percentage": 46.12, "elapsed_time": "2:02:31", "remaining_time": "2:23:07", "throughput": 8657.03, "total_tokens": 63645800} +{"current_steps": 94405, "total_steps": 204665, "loss": 0.0002, "lr": 1.304832307977404e-06, "epoch": 2.30632985610632, "percentage": 46.13, "elapsed_time": "2:02:32", "remaining_time": "2:23:07", "throughput": 8657.06, "total_tokens": 63649000} +{"current_steps": 94410, "total_steps": 204665, "loss": 0.0448, "lr": 1.3047510879683206e-06, "epoch": 2.306452006938167, "percentage": 46.13, "elapsed_time": "2:02:32", "remaining_time": "2:23:06", "throughput": 8657.09, "total_tokens": 63652200} +{"current_steps": 94415, "total_steps": 204665, "loss": 0.0001, "lr": 1.3046698657430053e-06, "epoch": 2.3065741577700143, "percentage": 46.13, "elapsed_time": "2:02:32", "remaining_time": "2:23:06", "throughput": 8657.18, "total_tokens": 63655976} +{"current_steps": 94420, "total_steps": 204665, "loss": 0.1456, "lr": 1.3045886413020491e-06, "epoch": 2.3066963086018615, "percentage": 46.13, "elapsed_time": "2:02:33", "remaining_time": "2:23:05", "throughput": 8657.2, "total_tokens": 63659048} +{"current_steps": 94425, "total_steps": 204665, "loss": 0.0004, "lr": 1.304507414646042e-06, "epoch": 2.3068184594337087, "percentage": 46.14, "elapsed_time": "2:02:33", "remaining_time": "2:23:05", "throughput": 8657.23, "total_tokens": 63662248} +{"current_steps": 94430, "total_steps": 204665, "loss": 0.0001, "lr": 1.3044261857755753e-06, "epoch": 2.306940610265556, "percentage": 46.14, "elapsed_time": "2:02:33", "remaining_time": "2:23:04", "throughput": 8657.24, "total_tokens": 63665320} +{"current_steps": 94435, "total_steps": 204665, "loss": 0.0, "lr": 1.3043449546912394e-06, "epoch": 2.307062761097403, "percentage": 46.14, "elapsed_time": "2:02:34", "remaining_time": "2:23:04", "throughput": 8657.32, "total_tokens": 63668968} +{"current_steps": 94440, "total_steps": 204665, "loss": 0.1804, "lr": 1.3042637213936255e-06, "epoch": 2.3071849119292502, "percentage": 46.14, "elapsed_time": "2:02:34", "remaining_time": "2:23:03", "throughput": 8657.36, "total_tokens": 63672296} +{"current_steps": 94445, "total_steps": 204665, "loss": 0.0012, "lr": 1.3041824858833235e-06, "epoch": 2.3073070627610974, "percentage": 46.15, "elapsed_time": "2:02:35", "remaining_time": "2:23:03", "throughput": 8657.37, "total_tokens": 63675304} +{"current_steps": 94450, "total_steps": 204665, "loss": 0.0335, "lr": 1.3041012481609248e-06, "epoch": 2.3074292135929446, "percentage": 46.15, "elapsed_time": "2:02:35", "remaining_time": "2:23:03", "throughput": 8657.41, "total_tokens": 63678568} +{"current_steps": 94455, "total_steps": 204665, "loss": 0.1145, "lr": 1.3040200082270202e-06, "epoch": 2.307551364424792, "percentage": 46.15, "elapsed_time": "2:02:35", "remaining_time": "2:23:02", "throughput": 8657.47, "total_tokens": 63682088} +{"current_steps": 94460, "total_steps": 204665, "loss": 0.0367, "lr": 1.3039387660822e-06, "epoch": 2.307673515256639, "percentage": 46.15, "elapsed_time": "2:02:36", "remaining_time": "2:23:02", "throughput": 8657.54, "total_tokens": 63685672} +{"current_steps": 94465, "total_steps": 204665, "loss": 0.0003, "lr": 1.3038575217270555e-06, "epoch": 2.307795666088486, "percentage": 46.16, "elapsed_time": "2:02:36", "remaining_time": "2:23:01", "throughput": 8657.6, "total_tokens": 63689128} +{"current_steps": 94470, "total_steps": 204665, "loss": 0.0002, "lr": 1.3037762751621773e-06, "epoch": 2.3079178169203334, "percentage": 46.16, "elapsed_time": "2:02:36", "remaining_time": "2:23:01", "throughput": 8657.65, "total_tokens": 63692520} +{"current_steps": 94475, "total_steps": 204665, "loss": 0.0533, "lr": 1.3036950263881563e-06, "epoch": 2.3080399677521806, "percentage": 46.16, "elapsed_time": "2:02:37", "remaining_time": "2:23:00", "throughput": 8657.68, "total_tokens": 63695784} +{"current_steps": 94480, "total_steps": 204665, "loss": 0.0001, "lr": 1.3036137754055835e-06, "epoch": 2.3081621185840273, "percentage": 46.16, "elapsed_time": "2:02:37", "remaining_time": "2:23:00", "throughput": 8657.71, "total_tokens": 63698984} +{"current_steps": 94485, "total_steps": 204665, "loss": 0.0002, "lr": 1.30353252221505e-06, "epoch": 2.308284269415875, "percentage": 46.17, "elapsed_time": "2:02:37", "remaining_time": "2:23:00", "throughput": 8657.76, "total_tokens": 63702376} +{"current_steps": 94490, "total_steps": 204665, "loss": 0.0008, "lr": 1.3034512668171457e-06, "epoch": 2.3084064202477217, "percentage": 46.17, "elapsed_time": "2:02:38", "remaining_time": "2:22:59", "throughput": 8657.81, "total_tokens": 63705704} +{"current_steps": 94495, "total_steps": 204665, "loss": 0.0522, "lr": 1.3033700092124626e-06, "epoch": 2.308528571079569, "percentage": 46.17, "elapsed_time": "2:02:38", "remaining_time": "2:22:59", "throughput": 8657.83, "total_tokens": 63708904} +{"current_steps": 94500, "total_steps": 204665, "loss": 0.047, "lr": 1.3032887494015913e-06, "epoch": 2.308650721911416, "percentage": 46.17, "elapsed_time": "2:02:38", "remaining_time": "2:22:58", "throughput": 8657.85, "total_tokens": 63712040} +{"current_steps": 94505, "total_steps": 204665, "loss": 0.0001, "lr": 1.3032074873851224e-06, "epoch": 2.3087728727432633, "percentage": 46.18, "elapsed_time": "2:02:39", "remaining_time": "2:22:58", "throughput": 8657.9, "total_tokens": 63715368} +{"current_steps": 94510, "total_steps": 204665, "loss": 0.0001, "lr": 1.303126223163647e-06, "epoch": 2.3088950235751105, "percentage": 46.18, "elapsed_time": "2:02:39", "remaining_time": "2:22:57", "throughput": 8657.97, "total_tokens": 63719016} +{"current_steps": 94515, "total_steps": 204665, "loss": 0.0573, "lr": 1.3030449567377565e-06, "epoch": 2.3090171744069576, "percentage": 46.18, "elapsed_time": "2:02:39", "remaining_time": "2:22:57", "throughput": 8658.01, "total_tokens": 63722344} +{"current_steps": 94520, "total_steps": 204665, "loss": 0.0007, "lr": 1.3029636881080412e-06, "epoch": 2.309139325238805, "percentage": 46.18, "elapsed_time": "2:02:40", "remaining_time": "2:22:56", "throughput": 8658.04, "total_tokens": 63725480} +{"current_steps": 94525, "total_steps": 204665, "loss": 0.0003, "lr": 1.3028824172750927e-06, "epoch": 2.309261476070652, "percentage": 46.19, "elapsed_time": "2:02:40", "remaining_time": "2:22:56", "throughput": 8658.04, "total_tokens": 63728488} +{"current_steps": 94530, "total_steps": 204665, "loss": 0.0389, "lr": 1.3028011442395017e-06, "epoch": 2.309383626902499, "percentage": 46.19, "elapsed_time": "2:02:40", "remaining_time": "2:22:56", "throughput": 8658.12, "total_tokens": 63732136} +{"current_steps": 94535, "total_steps": 204665, "loss": 0.0534, "lr": 1.3027198690018592e-06, "epoch": 2.3095057777343464, "percentage": 46.19, "elapsed_time": "2:02:41", "remaining_time": "2:22:55", "throughput": 8658.17, "total_tokens": 63735528} +{"current_steps": 94540, "total_steps": 204665, "loss": 0.0, "lr": 1.3026385915627566e-06, "epoch": 2.3096279285661936, "percentage": 46.19, "elapsed_time": "2:02:41", "remaining_time": "2:22:55", "throughput": 8658.23, "total_tokens": 63738984} +{"current_steps": 94545, "total_steps": 204665, "loss": 0.0932, "lr": 1.3025573119227847e-06, "epoch": 2.309750079398041, "percentage": 46.2, "elapsed_time": "2:02:42", "remaining_time": "2:22:54", "throughput": 8658.26, "total_tokens": 63742248} +{"current_steps": 94550, "total_steps": 204665, "loss": 0.0247, "lr": 1.3024760300825348e-06, "epoch": 2.309872230229888, "percentage": 46.2, "elapsed_time": "2:02:42", "remaining_time": "2:22:54", "throughput": 8658.36, "total_tokens": 63746024} +{"current_steps": 94555, "total_steps": 204665, "loss": 0.0825, "lr": 1.302394746042598e-06, "epoch": 2.309994381061735, "percentage": 46.2, "elapsed_time": "2:02:42", "remaining_time": "2:22:53", "throughput": 8658.37, "total_tokens": 63749096} +{"current_steps": 94560, "total_steps": 204665, "loss": 0.0284, "lr": 1.3023134598035647e-06, "epoch": 2.3101165318935823, "percentage": 46.2, "elapsed_time": "2:02:43", "remaining_time": "2:22:53", "throughput": 8658.44, "total_tokens": 63752616} +{"current_steps": 94565, "total_steps": 204665, "loss": 0.0539, "lr": 1.3022321713660268e-06, "epoch": 2.3102386827254295, "percentage": 46.2, "elapsed_time": "2:02:43", "remaining_time": "2:22:53", "throughput": 8658.5, "total_tokens": 63756136} +{"current_steps": 94570, "total_steps": 204665, "loss": 0.0002, "lr": 1.3021508807305754e-06, "epoch": 2.3103608335572767, "percentage": 46.21, "elapsed_time": "2:02:43", "remaining_time": "2:22:52", "throughput": 8658.57, "total_tokens": 63759720} +{"current_steps": 94575, "total_steps": 204665, "loss": 0.0007, "lr": 1.3020695878978015e-06, "epoch": 2.3104829843891235, "percentage": 46.21, "elapsed_time": "2:02:44", "remaining_time": "2:22:52", "throughput": 8658.67, "total_tokens": 63763496} +{"current_steps": 94580, "total_steps": 204665, "loss": 0.0664, "lr": 1.3019882928682963e-06, "epoch": 2.3106051352209707, "percentage": 46.21, "elapsed_time": "2:02:44", "remaining_time": "2:22:51", "throughput": 8658.7, "total_tokens": 63766760} +{"current_steps": 94585, "total_steps": 204665, "loss": 0.0004, "lr": 1.3019069956426511e-06, "epoch": 2.310727286052818, "percentage": 46.21, "elapsed_time": "2:02:44", "remaining_time": "2:22:51", "throughput": 8658.74, "total_tokens": 63770024} +{"current_steps": 94590, "total_steps": 204665, "loss": 0.0002, "lr": 1.301825696221457e-06, "epoch": 2.310849436884665, "percentage": 46.22, "elapsed_time": "2:02:45", "remaining_time": "2:22:50", "throughput": 8658.81, "total_tokens": 63773608} +{"current_steps": 94595, "total_steps": 204665, "loss": 0.0001, "lr": 1.301744394605305e-06, "epoch": 2.3109715877165122, "percentage": 46.22, "elapsed_time": "2:02:45", "remaining_time": "2:22:50", "throughput": 8658.85, "total_tokens": 63776936} +{"current_steps": 94600, "total_steps": 204665, "loss": 0.0002, "lr": 1.3016630907947868e-06, "epoch": 2.3110937385483594, "percentage": 46.22, "elapsed_time": "2:02:45", "remaining_time": "2:22:50", "throughput": 8658.87, "total_tokens": 63780072} +{"current_steps": 94605, "total_steps": 204665, "loss": 0.2443, "lr": 1.3015817847904934e-06, "epoch": 2.3112158893802066, "percentage": 46.22, "elapsed_time": "2:02:46", "remaining_time": "2:22:49", "throughput": 8658.89, "total_tokens": 63783208} +{"current_steps": 94610, "total_steps": 204665, "loss": 0.1325, "lr": 1.3015004765930164e-06, "epoch": 2.311338040212054, "percentage": 46.23, "elapsed_time": "2:02:46", "remaining_time": "2:22:49", "throughput": 8658.94, "total_tokens": 63786536} +{"current_steps": 94615, "total_steps": 204665, "loss": 0.0448, "lr": 1.3014191662029466e-06, "epoch": 2.311460191043901, "percentage": 46.23, "elapsed_time": "2:02:46", "remaining_time": "2:22:48", "throughput": 8659.01, "total_tokens": 63790184} +{"current_steps": 94620, "total_steps": 204665, "loss": 0.0543, "lr": 1.3013378536208757e-06, "epoch": 2.311582341875748, "percentage": 46.23, "elapsed_time": "2:02:47", "remaining_time": "2:22:48", "throughput": 8659.06, "total_tokens": 63793512} +{"current_steps": 94625, "total_steps": 204665, "loss": 0.0007, "lr": 1.3012565388473947e-06, "epoch": 2.3117044927075954, "percentage": 46.23, "elapsed_time": "2:02:47", "remaining_time": "2:22:47", "throughput": 8659.09, "total_tokens": 63796712} +{"current_steps": 94630, "total_steps": 204665, "loss": 0.0001, "lr": 1.301175221883095e-06, "epoch": 2.3118266435394426, "percentage": 46.24, "elapsed_time": "2:02:47", "remaining_time": "2:22:47", "throughput": 8659.08, "total_tokens": 63799592} +{"current_steps": 94635, "total_steps": 204665, "loss": 0.0326, "lr": 1.3010939027285684e-06, "epoch": 2.3119487943712898, "percentage": 46.24, "elapsed_time": "2:02:48", "remaining_time": "2:22:46", "throughput": 8659.17, "total_tokens": 63803304} +{"current_steps": 94640, "total_steps": 204665, "loss": 0.0001, "lr": 1.3010125813844057e-06, "epoch": 2.312070945203137, "percentage": 46.24, "elapsed_time": "2:02:48", "remaining_time": "2:22:46", "throughput": 8659.2, "total_tokens": 63806568} +{"current_steps": 94645, "total_steps": 204665, "loss": 0.0019, "lr": 1.3009312578511988e-06, "epoch": 2.312193096034984, "percentage": 46.24, "elapsed_time": "2:02:48", "remaining_time": "2:22:46", "throughput": 8659.26, "total_tokens": 63810024} +{"current_steps": 94650, "total_steps": 204665, "loss": 0.2024, "lr": 1.3008499321295388e-06, "epoch": 2.3123152468668313, "percentage": 46.25, "elapsed_time": "2:02:49", "remaining_time": "2:22:45", "throughput": 8659.34, "total_tokens": 63813736} +{"current_steps": 94655, "total_steps": 204665, "loss": 0.0003, "lr": 1.300768604220017e-06, "epoch": 2.3124373976986785, "percentage": 46.25, "elapsed_time": "2:02:49", "remaining_time": "2:22:45", "throughput": 8659.41, "total_tokens": 63817320} +{"current_steps": 94660, "total_steps": 204665, "loss": 0.0001, "lr": 1.3006872741232252e-06, "epoch": 2.3125595485305253, "percentage": 46.25, "elapsed_time": "2:02:50", "remaining_time": "2:22:44", "throughput": 8659.5, "total_tokens": 63821096} +{"current_steps": 94665, "total_steps": 204665, "loss": 0.0955, "lr": 1.3006059418397545e-06, "epoch": 2.312681699362373, "percentage": 46.25, "elapsed_time": "2:02:50", "remaining_time": "2:22:44", "throughput": 8659.61, "total_tokens": 63825000} +{"current_steps": 94670, "total_steps": 204665, "loss": 0.0467, "lr": 1.3005246073701965e-06, "epoch": 2.3128038501942196, "percentage": 46.26, "elapsed_time": "2:02:50", "remaining_time": "2:22:43", "throughput": 8659.69, "total_tokens": 63828712} +{"current_steps": 94675, "total_steps": 204665, "loss": 0.0992, "lr": 1.3004432707151428e-06, "epoch": 2.312926001026067, "percentage": 46.26, "elapsed_time": "2:02:51", "remaining_time": "2:22:43", "throughput": 8659.77, "total_tokens": 63832360} +{"current_steps": 94680, "total_steps": 204665, "loss": 0.0536, "lr": 1.300361931875185e-06, "epoch": 2.313048151857914, "percentage": 46.26, "elapsed_time": "2:02:51", "remaining_time": "2:22:43", "throughput": 8659.84, "total_tokens": 63835944} +{"current_steps": 94685, "total_steps": 204665, "loss": 0.0001, "lr": 1.3002805908509144e-06, "epoch": 2.313170302689761, "percentage": 46.26, "elapsed_time": "2:02:51", "remaining_time": "2:22:42", "throughput": 8659.92, "total_tokens": 63839592} +{"current_steps": 94690, "total_steps": 204665, "loss": 0.0001, "lr": 1.3001992476429221e-06, "epoch": 2.3132924535216084, "percentage": 46.27, "elapsed_time": "2:02:52", "remaining_time": "2:22:42", "throughput": 8659.95, "total_tokens": 63842792} +{"current_steps": 94695, "total_steps": 204665, "loss": 0.0005, "lr": 1.3001179022518006e-06, "epoch": 2.3134146043534556, "percentage": 46.27, "elapsed_time": "2:02:52", "remaining_time": "2:22:41", "throughput": 8659.95, "total_tokens": 63845736} +{"current_steps": 94700, "total_steps": 204665, "loss": 0.0839, "lr": 1.300036554678141e-06, "epoch": 2.3135367551853028, "percentage": 46.27, "elapsed_time": "2:02:52", "remaining_time": "2:22:41", "throughput": 8660.0, "total_tokens": 63849128} +{"current_steps": 94705, "total_steps": 204665, "loss": 0.0553, "lr": 1.299955204922535e-06, "epoch": 2.31365890601715, "percentage": 46.27, "elapsed_time": "2:02:53", "remaining_time": "2:22:40", "throughput": 8660.01, "total_tokens": 63852200} +{"current_steps": 94710, "total_steps": 204665, "loss": 0.0305, "lr": 1.299873852985574e-06, "epoch": 2.313781056848997, "percentage": 46.28, "elapsed_time": "2:02:53", "remaining_time": "2:22:40", "throughput": 8660.08, "total_tokens": 63855784} +{"current_steps": 94715, "total_steps": 204665, "loss": 0.0003, "lr": 1.2997924988678496e-06, "epoch": 2.3139032076808443, "percentage": 46.28, "elapsed_time": "2:02:53", "remaining_time": "2:22:40", "throughput": 8660.1, "total_tokens": 63858920} +{"current_steps": 94720, "total_steps": 204665, "loss": 0.0399, "lr": 1.2997111425699532e-06, "epoch": 2.3140253585126915, "percentage": 46.28, "elapsed_time": "2:02:54", "remaining_time": "2:22:39", "throughput": 8660.12, "total_tokens": 63862056} +{"current_steps": 94725, "total_steps": 204665, "loss": 0.0012, "lr": 1.299629784092477e-06, "epoch": 2.3141475093445387, "percentage": 46.28, "elapsed_time": "2:02:54", "remaining_time": "2:22:39", "throughput": 8660.19, "total_tokens": 63865576} +{"current_steps": 94730, "total_steps": 204665, "loss": 0.0367, "lr": 1.2995484234360123e-06, "epoch": 2.314269660176386, "percentage": 46.29, "elapsed_time": "2:02:54", "remaining_time": "2:22:38", "throughput": 8660.24, "total_tokens": 63868968} +{"current_steps": 94735, "total_steps": 204665, "loss": 0.047, "lr": 1.299467060601151e-06, "epoch": 2.314391811008233, "percentage": 46.29, "elapsed_time": "2:02:55", "remaining_time": "2:22:38", "throughput": 8660.32, "total_tokens": 63872616} +{"current_steps": 94740, "total_steps": 204665, "loss": 0.0569, "lr": 1.299385695588485e-06, "epoch": 2.3145139618400803, "percentage": 46.29, "elapsed_time": "2:02:55", "remaining_time": "2:22:37", "throughput": 8660.39, "total_tokens": 63876200} +{"current_steps": 94745, "total_steps": 204665, "loss": 0.0926, "lr": 1.2993043283986055e-06, "epoch": 2.3146361126719275, "percentage": 46.29, "elapsed_time": "2:02:56", "remaining_time": "2:22:37", "throughput": 8660.46, "total_tokens": 63879784} +{"current_steps": 94750, "total_steps": 204665, "loss": 0.0635, "lr": 1.2992229590321042e-06, "epoch": 2.3147582635037747, "percentage": 46.3, "elapsed_time": "2:02:56", "remaining_time": "2:22:36", "throughput": 8660.49, "total_tokens": 63882984} +{"current_steps": 94755, "total_steps": 204665, "loss": 0.0324, "lr": 1.299141587489573e-06, "epoch": 2.3148804143356214, "percentage": 46.3, "elapsed_time": "2:02:56", "remaining_time": "2:22:36", "throughput": 8660.53, "total_tokens": 63886312} +{"current_steps": 94760, "total_steps": 204665, "loss": 0.0005, "lr": 1.299060213771604e-06, "epoch": 2.3150025651674686, "percentage": 46.3, "elapsed_time": "2:02:57", "remaining_time": "2:22:36", "throughput": 8660.53, "total_tokens": 63889256} +{"current_steps": 94765, "total_steps": 204665, "loss": 0.0006, "lr": 1.2989788378787886e-06, "epoch": 2.315124715999316, "percentage": 46.3, "elapsed_time": "2:02:57", "remaining_time": "2:22:35", "throughput": 8660.54, "total_tokens": 63892264} +{"current_steps": 94770, "total_steps": 204665, "loss": 0.0002, "lr": 1.2988974598117188e-06, "epoch": 2.315246866831163, "percentage": 46.3, "elapsed_time": "2:02:57", "remaining_time": "2:22:35", "throughput": 8660.58, "total_tokens": 63895528} +{"current_steps": 94775, "total_steps": 204665, "loss": 0.0003, "lr": 1.2988160795709861e-06, "epoch": 2.31536901766301, "percentage": 46.31, "elapsed_time": "2:02:58", "remaining_time": "2:22:34", "throughput": 8660.63, "total_tokens": 63898984} +{"current_steps": 94780, "total_steps": 204665, "loss": 0.0001, "lr": 1.2987346971571823e-06, "epoch": 2.3154911684948574, "percentage": 46.31, "elapsed_time": "2:02:58", "remaining_time": "2:22:34", "throughput": 8660.68, "total_tokens": 63902376} +{"current_steps": 94785, "total_steps": 204665, "loss": 0.0693, "lr": 1.2986533125708998e-06, "epoch": 2.3156133193267046, "percentage": 46.31, "elapsed_time": "2:02:58", "remaining_time": "2:22:33", "throughput": 8660.73, "total_tokens": 63905704} +{"current_steps": 94790, "total_steps": 204665, "loss": 0.0716, "lr": 1.2985719258127299e-06, "epoch": 2.3157354701585517, "percentage": 46.31, "elapsed_time": "2:02:59", "remaining_time": "2:22:33", "throughput": 8660.73, "total_tokens": 63908648} +{"current_steps": 94795, "total_steps": 204665, "loss": 0.0001, "lr": 1.298490536883265e-06, "epoch": 2.315857620990399, "percentage": 46.32, "elapsed_time": "2:02:59", "remaining_time": "2:22:33", "throughput": 8660.76, "total_tokens": 63911912} +{"current_steps": 94800, "total_steps": 204665, "loss": 0.0525, "lr": 1.2984091457830961e-06, "epoch": 2.315979771822246, "percentage": 46.32, "elapsed_time": "2:02:59", "remaining_time": "2:22:32", "throughput": 8660.89, "total_tokens": 63916008} +{"current_steps": 94805, "total_steps": 204665, "loss": 0.0595, "lr": 1.298327752512816e-06, "epoch": 2.3161019226540933, "percentage": 46.32, "elapsed_time": "2:03:00", "remaining_time": "2:22:32", "throughput": 8660.92, "total_tokens": 63919208} +{"current_steps": 94810, "total_steps": 204665, "loss": 0.0679, "lr": 1.298246357073016e-06, "epoch": 2.3162240734859405, "percentage": 46.32, "elapsed_time": "2:03:00", "remaining_time": "2:22:31", "throughput": 8661.0, "total_tokens": 63922920} +{"current_steps": 94815, "total_steps": 204665, "loss": 0.0776, "lr": 1.2981649594642886e-06, "epoch": 2.3163462243177877, "percentage": 46.33, "elapsed_time": "2:03:00", "remaining_time": "2:22:31", "throughput": 8661.04, "total_tokens": 63926248} +{"current_steps": 94820, "total_steps": 204665, "loss": 0.0362, "lr": 1.298083559687225e-06, "epoch": 2.316468375149635, "percentage": 46.33, "elapsed_time": "2:03:01", "remaining_time": "2:22:30", "throughput": 8661.06, "total_tokens": 63929384} +{"current_steps": 94825, "total_steps": 204665, "loss": 0.0431, "lr": 1.2980021577424178e-06, "epoch": 2.316590525981482, "percentage": 46.33, "elapsed_time": "2:03:01", "remaining_time": "2:22:30", "throughput": 8661.08, "total_tokens": 63932520} +{"current_steps": 94830, "total_steps": 204665, "loss": 0.0002, "lr": 1.2979207536304588e-06, "epoch": 2.3167126768133293, "percentage": 46.33, "elapsed_time": "2:03:01", "remaining_time": "2:22:29", "throughput": 8661.11, "total_tokens": 63935720} +{"current_steps": 94835, "total_steps": 204665, "loss": 0.1531, "lr": 1.29783934735194e-06, "epoch": 2.3168348276451765, "percentage": 46.34, "elapsed_time": "2:03:02", "remaining_time": "2:22:29", "throughput": 8661.19, "total_tokens": 63939368} +{"current_steps": 94840, "total_steps": 204665, "loss": 0.0001, "lr": 1.2977579389074533e-06, "epoch": 2.316956978477023, "percentage": 46.34, "elapsed_time": "2:03:02", "remaining_time": "2:22:29", "throughput": 8661.26, "total_tokens": 63943016} +{"current_steps": 94845, "total_steps": 204665, "loss": 0.1494, "lr": 1.2976765282975905e-06, "epoch": 2.317079129308871, "percentage": 46.34, "elapsed_time": "2:03:02", "remaining_time": "2:22:28", "throughput": 8661.31, "total_tokens": 63946408} +{"current_steps": 94850, "total_steps": 204665, "loss": 0.001, "lr": 1.297595115522944e-06, "epoch": 2.3172012801407176, "percentage": 46.34, "elapsed_time": "2:03:03", "remaining_time": "2:22:28", "throughput": 8661.34, "total_tokens": 63949608} +{"current_steps": 94855, "total_steps": 204665, "loss": 0.0007, "lr": 1.297513700584106e-06, "epoch": 2.3173234309725648, "percentage": 46.35, "elapsed_time": "2:03:03", "remaining_time": "2:22:27", "throughput": 8661.4, "total_tokens": 63953128} +{"current_steps": 94860, "total_steps": 204665, "loss": 0.001, "lr": 1.2974322834816684e-06, "epoch": 2.317445581804412, "percentage": 46.35, "elapsed_time": "2:03:04", "remaining_time": "2:22:27", "throughput": 8661.41, "total_tokens": 63956136} +{"current_steps": 94865, "total_steps": 204665, "loss": 0.0416, "lr": 1.2973508642162233e-06, "epoch": 2.317567732636259, "percentage": 46.35, "elapsed_time": "2:03:04", "remaining_time": "2:22:26", "throughput": 8661.52, "total_tokens": 63960040} +{"current_steps": 94870, "total_steps": 204665, "loss": 0.0003, "lr": 1.2972694427883625e-06, "epoch": 2.3176898834681063, "percentage": 46.35, "elapsed_time": "2:03:04", "remaining_time": "2:22:26", "throughput": 8661.56, "total_tokens": 63963304} +{"current_steps": 94875, "total_steps": 204665, "loss": 0.0003, "lr": 1.2971880191986785e-06, "epoch": 2.3178120342999535, "percentage": 46.36, "elapsed_time": "2:03:05", "remaining_time": "2:22:26", "throughput": 8661.6, "total_tokens": 63966632} +{"current_steps": 94880, "total_steps": 204665, "loss": 0.0303, "lr": 1.297106593447763e-06, "epoch": 2.3179341851318007, "percentage": 46.36, "elapsed_time": "2:03:05", "remaining_time": "2:22:25", "throughput": 8661.65, "total_tokens": 63970088} +{"current_steps": 94885, "total_steps": 204665, "loss": 0.0005, "lr": 1.2970251655362086e-06, "epoch": 2.318056335963648, "percentage": 46.36, "elapsed_time": "2:03:05", "remaining_time": "2:22:25", "throughput": 8661.69, "total_tokens": 63973352} +{"current_steps": 94890, "total_steps": 204665, "loss": 0.0743, "lr": 1.2969437354646073e-06, "epoch": 2.318178486795495, "percentage": 46.36, "elapsed_time": "2:03:06", "remaining_time": "2:22:24", "throughput": 8661.72, "total_tokens": 63976552} +{"current_steps": 94895, "total_steps": 204665, "loss": 0.0535, "lr": 1.2968623032335515e-06, "epoch": 2.3183006376273423, "percentage": 46.37, "elapsed_time": "2:03:06", "remaining_time": "2:22:24", "throughput": 8661.74, "total_tokens": 63979752} +{"current_steps": 94900, "total_steps": 204665, "loss": 0.0007, "lr": 1.296780868843633e-06, "epoch": 2.3184227884591895, "percentage": 46.37, "elapsed_time": "2:03:06", "remaining_time": "2:22:23", "throughput": 8661.86, "total_tokens": 63983720} +{"current_steps": 94905, "total_steps": 204665, "loss": 0.0007, "lr": 1.2966994322954439e-06, "epoch": 2.3185449392910367, "percentage": 46.37, "elapsed_time": "2:03:07", "remaining_time": "2:22:23", "throughput": 8661.93, "total_tokens": 63987368} +{"current_steps": 94910, "total_steps": 204665, "loss": 0.0444, "lr": 1.2966179935895774e-06, "epoch": 2.318667090122884, "percentage": 46.37, "elapsed_time": "2:03:07", "remaining_time": "2:22:23", "throughput": 8661.96, "total_tokens": 63990568} +{"current_steps": 94915, "total_steps": 204665, "loss": 0.1065, "lr": 1.2965365527266245e-06, "epoch": 2.318789240954731, "percentage": 46.38, "elapsed_time": "2:03:07", "remaining_time": "2:22:22", "throughput": 8661.98, "total_tokens": 63993704} +{"current_steps": 94920, "total_steps": 204665, "loss": 0.0003, "lr": 1.2964551097071784e-06, "epoch": 2.3189113917865782, "percentage": 46.38, "elapsed_time": "2:03:08", "remaining_time": "2:22:22", "throughput": 8662.04, "total_tokens": 63997160} +{"current_steps": 94925, "total_steps": 204665, "loss": 0.0519, "lr": 1.2963736645318308e-06, "epoch": 2.3190335426184254, "percentage": 46.38, "elapsed_time": "2:03:08", "remaining_time": "2:22:21", "throughput": 8662.08, "total_tokens": 64000488} +{"current_steps": 94930, "total_steps": 204665, "loss": 0.0711, "lr": 1.2962922172011744e-06, "epoch": 2.3191556934502726, "percentage": 46.38, "elapsed_time": "2:03:08", "remaining_time": "2:22:21", "throughput": 8662.15, "total_tokens": 64004008} +{"current_steps": 94935, "total_steps": 204665, "loss": 0.0287, "lr": 1.296210767715801e-06, "epoch": 2.3192778442821194, "percentage": 46.39, "elapsed_time": "2:03:09", "remaining_time": "2:22:20", "throughput": 8662.21, "total_tokens": 64007528} +{"current_steps": 94940, "total_steps": 204665, "loss": 0.049, "lr": 1.296129316076303e-06, "epoch": 2.3193999951139666, "percentage": 46.39, "elapsed_time": "2:03:09", "remaining_time": "2:22:20", "throughput": 8662.28, "total_tokens": 64011112} +{"current_steps": 94945, "total_steps": 204665, "loss": 0.0318, "lr": 1.2960478622832736e-06, "epoch": 2.3195221459458137, "percentage": 46.39, "elapsed_time": "2:03:09", "remaining_time": "2:22:19", "throughput": 8662.32, "total_tokens": 64014440} +{"current_steps": 94950, "total_steps": 204665, "loss": 0.0549, "lr": 1.295966406337304e-06, "epoch": 2.319644296777661, "percentage": 46.39, "elapsed_time": "2:03:10", "remaining_time": "2:22:19", "throughput": 8662.39, "total_tokens": 64018024} +{"current_steps": 94955, "total_steps": 204665, "loss": 0.0363, "lr": 1.2958849482389876e-06, "epoch": 2.319766447609508, "percentage": 46.4, "elapsed_time": "2:03:10", "remaining_time": "2:22:19", "throughput": 8662.44, "total_tokens": 64021352} +{"current_steps": 94960, "total_steps": 204665, "loss": 0.0001, "lr": 1.2958034879889158e-06, "epoch": 2.3198885984413553, "percentage": 46.4, "elapsed_time": "2:03:11", "remaining_time": "2:22:18", "throughput": 8662.47, "total_tokens": 64024552} +{"current_steps": 94965, "total_steps": 204665, "loss": 0.0935, "lr": 1.295722025587682e-06, "epoch": 2.3200107492732025, "percentage": 46.4, "elapsed_time": "2:03:11", "remaining_time": "2:22:18", "throughput": 8662.51, "total_tokens": 64027944} +{"current_steps": 94970, "total_steps": 204665, "loss": 0.0005, "lr": 1.2956405610358776e-06, "epoch": 2.3201329001050497, "percentage": 46.4, "elapsed_time": "2:03:11", "remaining_time": "2:22:17", "throughput": 8662.59, "total_tokens": 64031528} +{"current_steps": 94975, "total_steps": 204665, "loss": 0.0001, "lr": 1.2955590943340956e-06, "epoch": 2.320255050936897, "percentage": 46.41, "elapsed_time": "2:03:12", "remaining_time": "2:22:17", "throughput": 8662.61, "total_tokens": 64034728} +{"current_steps": 94980, "total_steps": 204665, "loss": 0.0003, "lr": 1.2954776254829288e-06, "epoch": 2.320377201768744, "percentage": 46.41, "elapsed_time": "2:03:12", "remaining_time": "2:22:16", "throughput": 8662.64, "total_tokens": 64037864} +{"current_steps": 94985, "total_steps": 204665, "loss": 0.141, "lr": 1.2953961544829688e-06, "epoch": 2.3204993526005913, "percentage": 46.41, "elapsed_time": "2:03:12", "remaining_time": "2:22:16", "throughput": 8662.72, "total_tokens": 64041576} +{"current_steps": 94990, "total_steps": 204665, "loss": 0.0004, "lr": 1.2953146813348085e-06, "epoch": 2.3206215034324384, "percentage": 46.41, "elapsed_time": "2:03:13", "remaining_time": "2:22:16", "throughput": 8662.73, "total_tokens": 64044584} +{"current_steps": 94995, "total_steps": 204665, "loss": 0.0626, "lr": 1.2952332060390405e-06, "epoch": 2.3207436542642856, "percentage": 46.41, "elapsed_time": "2:03:13", "remaining_time": "2:22:15", "throughput": 8662.77, "total_tokens": 64047848} +{"current_steps": 95000, "total_steps": 204665, "loss": 0.0465, "lr": 1.2951517285962571e-06, "epoch": 2.320865805096133, "percentage": 46.42, "elapsed_time": "2:03:13", "remaining_time": "2:22:15", "throughput": 8662.77, "total_tokens": 64050856} +{"current_steps": 95005, "total_steps": 204665, "loss": 0.0296, "lr": 1.2950702490070514e-06, "epoch": 2.32098795592798, "percentage": 46.42, "elapsed_time": "2:03:14", "remaining_time": "2:22:14", "throughput": 8662.83, "total_tokens": 64054312} +{"current_steps": 95010, "total_steps": 204665, "loss": 0.0689, "lr": 1.294988767272015e-06, "epoch": 2.321110106759827, "percentage": 46.42, "elapsed_time": "2:03:14", "remaining_time": "2:22:14", "throughput": 8662.87, "total_tokens": 64057640} +{"current_steps": 95015, "total_steps": 204665, "loss": 0.0004, "lr": 1.2949072833917414e-06, "epoch": 2.3212322575916744, "percentage": 46.42, "elapsed_time": "2:03:14", "remaining_time": "2:22:13", "throughput": 8662.93, "total_tokens": 64061160} +{"current_steps": 95020, "total_steps": 204665, "loss": 0.0004, "lr": 1.2948257973668224e-06, "epoch": 2.321354408423521, "percentage": 46.43, "elapsed_time": "2:03:15", "remaining_time": "2:22:13", "throughput": 8662.96, "total_tokens": 64064296} +{"current_steps": 95025, "total_steps": 204665, "loss": 0.0002, "lr": 1.294744309197851e-06, "epoch": 2.3214765592553688, "percentage": 46.43, "elapsed_time": "2:03:15", "remaining_time": "2:22:12", "throughput": 8663.0, "total_tokens": 64067688} +{"current_steps": 95030, "total_steps": 204665, "loss": 0.0288, "lr": 1.294662818885419e-06, "epoch": 2.3215987100872155, "percentage": 46.43, "elapsed_time": "2:03:15", "remaining_time": "2:22:12", "throughput": 8663.01, "total_tokens": 64070696} +{"current_steps": 95035, "total_steps": 204665, "loss": 0.0002, "lr": 1.2945813264301207e-06, "epoch": 2.3217208609190627, "percentage": 46.43, "elapsed_time": "2:03:16", "remaining_time": "2:22:12", "throughput": 8663.02, "total_tokens": 64073704} +{"current_steps": 95040, "total_steps": 204665, "loss": 0.0003, "lr": 1.2944998318325474e-06, "epoch": 2.32184301175091, "percentage": 46.44, "elapsed_time": "2:03:16", "remaining_time": "2:22:11", "throughput": 8663.08, "total_tokens": 64077160} +{"current_steps": 95045, "total_steps": 204665, "loss": 0.0002, "lr": 1.2944183350932922e-06, "epoch": 2.321965162582757, "percentage": 46.44, "elapsed_time": "2:03:16", "remaining_time": "2:22:11", "throughput": 8663.13, "total_tokens": 64080552} +{"current_steps": 95050, "total_steps": 204665, "loss": 0.0017, "lr": 1.2943368362129477e-06, "epoch": 2.3220873134146043, "percentage": 46.44, "elapsed_time": "2:03:17", "remaining_time": "2:22:10", "throughput": 8663.25, "total_tokens": 64084648} +{"current_steps": 95055, "total_steps": 204665, "loss": 0.047, "lr": 1.2942553351921063e-06, "epoch": 2.3222094642464515, "percentage": 46.44, "elapsed_time": "2:03:17", "remaining_time": "2:22:10", "throughput": 8663.3, "total_tokens": 64088040} +{"current_steps": 95060, "total_steps": 204665, "loss": 0.0577, "lr": 1.294173832031361e-06, "epoch": 2.3223316150782987, "percentage": 46.45, "elapsed_time": "2:03:18", "remaining_time": "2:22:09", "throughput": 8663.38, "total_tokens": 64091752} +{"current_steps": 95065, "total_steps": 204665, "loss": 0.0907, "lr": 1.2940923267313049e-06, "epoch": 2.322453765910146, "percentage": 46.45, "elapsed_time": "2:03:18", "remaining_time": "2:22:09", "throughput": 8663.4, "total_tokens": 64094824} +{"current_steps": 95070, "total_steps": 204665, "loss": 0.0004, "lr": 1.2940108192925298e-06, "epoch": 2.322575916741993, "percentage": 46.45, "elapsed_time": "2:03:18", "remaining_time": "2:22:09", "throughput": 8663.43, "total_tokens": 64098088} +{"current_steps": 95075, "total_steps": 204665, "loss": 0.0001, "lr": 1.2939293097156295e-06, "epoch": 2.3226980675738402, "percentage": 46.45, "elapsed_time": "2:03:19", "remaining_time": "2:22:08", "throughput": 8663.52, "total_tokens": 64101800} +{"current_steps": 95080, "total_steps": 204665, "loss": 0.0591, "lr": 1.2938477980011958e-06, "epoch": 2.3228202184056874, "percentage": 46.46, "elapsed_time": "2:03:19", "remaining_time": "2:22:08", "throughput": 8663.58, "total_tokens": 64105320} +{"current_steps": 95085, "total_steps": 204665, "loss": 0.1721, "lr": 1.2937662841498218e-06, "epoch": 2.3229423692375346, "percentage": 46.46, "elapsed_time": "2:03:19", "remaining_time": "2:22:07", "throughput": 8663.64, "total_tokens": 64108776} +{"current_steps": 95090, "total_steps": 204665, "loss": 0.0525, "lr": 1.2936847681621003e-06, "epoch": 2.323064520069382, "percentage": 46.46, "elapsed_time": "2:03:20", "remaining_time": "2:22:07", "throughput": 8663.69, "total_tokens": 64112232} +{"current_steps": 95095, "total_steps": 204665, "loss": 0.06, "lr": 1.2936032500386242e-06, "epoch": 2.323186670901229, "percentage": 46.46, "elapsed_time": "2:03:20", "remaining_time": "2:22:06", "throughput": 8663.71, "total_tokens": 64115368} +{"current_steps": 95100, "total_steps": 204665, "loss": 0.0291, "lr": 1.2935217297799866e-06, "epoch": 2.323308821733076, "percentage": 46.47, "elapsed_time": "2:03:20", "remaining_time": "2:22:06", "throughput": 8663.81, "total_tokens": 64119144} +{"current_steps": 95105, "total_steps": 204665, "loss": 0.0002, "lr": 1.2934402073867798e-06, "epoch": 2.323430972564923, "percentage": 46.47, "elapsed_time": "2:03:21", "remaining_time": "2:22:06", "throughput": 8663.91, "total_tokens": 64123048} +{"current_steps": 95110, "total_steps": 204665, "loss": 0.0822, "lr": 1.293358682859597e-06, "epoch": 2.3235531233967706, "percentage": 46.47, "elapsed_time": "2:03:21", "remaining_time": "2:22:05", "throughput": 8663.98, "total_tokens": 64126568} +{"current_steps": 95115, "total_steps": 204665, "loss": 0.0745, "lr": 1.293277156199031e-06, "epoch": 2.3236752742286173, "percentage": 46.47, "elapsed_time": "2:03:21", "remaining_time": "2:22:05", "throughput": 8664.05, "total_tokens": 64130216} +{"current_steps": 95120, "total_steps": 204665, "loss": 0.055, "lr": 1.2931956274056747e-06, "epoch": 2.3237974250604645, "percentage": 46.48, "elapsed_time": "2:03:22", "remaining_time": "2:22:04", "throughput": 8664.16, "total_tokens": 64134120} +{"current_steps": 95125, "total_steps": 204665, "loss": 0.1924, "lr": 1.2931140964801208e-06, "epoch": 2.3239195758923117, "percentage": 46.48, "elapsed_time": "2:03:22", "remaining_time": "2:22:04", "throughput": 8664.17, "total_tokens": 64137128} +{"current_steps": 95130, "total_steps": 204665, "loss": 0.0003, "lr": 1.293032563422962e-06, "epoch": 2.324041726724159, "percentage": 46.48, "elapsed_time": "2:03:22", "remaining_time": "2:22:03", "throughput": 8664.24, "total_tokens": 64140648} +{"current_steps": 95135, "total_steps": 204665, "loss": 0.0003, "lr": 1.2929510282347922e-06, "epoch": 2.324163877556006, "percentage": 46.48, "elapsed_time": "2:03:23", "remaining_time": "2:22:03", "throughput": 8664.29, "total_tokens": 64144104} +{"current_steps": 95140, "total_steps": 204665, "loss": 0.082, "lr": 1.2928694909162036e-06, "epoch": 2.3242860283878533, "percentage": 46.49, "elapsed_time": "2:03:23", "remaining_time": "2:22:03", "throughput": 8664.33, "total_tokens": 64147432} +{"current_steps": 95145, "total_steps": 204665, "loss": 0.089, "lr": 1.2927879514677891e-06, "epoch": 2.3244081792197004, "percentage": 46.49, "elapsed_time": "2:03:23", "remaining_time": "2:22:02", "throughput": 8664.37, "total_tokens": 64150696} +{"current_steps": 95150, "total_steps": 204665, "loss": 0.0979, "lr": 1.2927064098901418e-06, "epoch": 2.3245303300515476, "percentage": 46.49, "elapsed_time": "2:03:24", "remaining_time": "2:22:02", "throughput": 8664.42, "total_tokens": 64154088} +{"current_steps": 95155, "total_steps": 204665, "loss": 0.0014, "lr": 1.2926248661838548e-06, "epoch": 2.324652480883395, "percentage": 46.49, "elapsed_time": "2:03:24", "remaining_time": "2:22:01", "throughput": 8664.46, "total_tokens": 64157416} +{"current_steps": 95160, "total_steps": 204665, "loss": 0.0361, "lr": 1.2925433203495213e-06, "epoch": 2.324774631715242, "percentage": 46.5, "elapsed_time": "2:03:25", "remaining_time": "2:22:01", "throughput": 8664.52, "total_tokens": 64160872} +{"current_steps": 95165, "total_steps": 204665, "loss": 0.0002, "lr": 1.2924617723877337e-06, "epoch": 2.324896782547089, "percentage": 46.5, "elapsed_time": "2:03:25", "remaining_time": "2:22:00", "throughput": 8664.56, "total_tokens": 64164200} +{"current_steps": 95170, "total_steps": 204665, "loss": 0.0714, "lr": 1.292380222299086e-06, "epoch": 2.3250189333789364, "percentage": 46.5, "elapsed_time": "2:03:25", "remaining_time": "2:22:00", "throughput": 8664.63, "total_tokens": 64167784} +{"current_steps": 95175, "total_steps": 204665, "loss": 0.0007, "lr": 1.2922986700841704e-06, "epoch": 2.3251410842107836, "percentage": 46.5, "elapsed_time": "2:03:26", "remaining_time": "2:21:59", "throughput": 8664.7, "total_tokens": 64171368} +{"current_steps": 95180, "total_steps": 204665, "loss": 0.0005, "lr": 1.2922171157435802e-06, "epoch": 2.3252632350426308, "percentage": 46.51, "elapsed_time": "2:03:26", "remaining_time": "2:21:59", "throughput": 8664.75, "total_tokens": 64174696} +{"current_steps": 95185, "total_steps": 204665, "loss": 0.0627, "lr": 1.2921355592779083e-06, "epoch": 2.325385385874478, "percentage": 46.51, "elapsed_time": "2:03:26", "remaining_time": "2:21:59", "throughput": 8664.82, "total_tokens": 64178280} +{"current_steps": 95190, "total_steps": 204665, "loss": 0.0005, "lr": 1.2920540006877483e-06, "epoch": 2.325507536706325, "percentage": 46.51, "elapsed_time": "2:03:27", "remaining_time": "2:21:58", "throughput": 8664.92, "total_tokens": 64182120} +{"current_steps": 95195, "total_steps": 204665, "loss": 0.0009, "lr": 1.2919724399736931e-06, "epoch": 2.3256296875381723, "percentage": 46.51, "elapsed_time": "2:03:27", "remaining_time": "2:21:58", "throughput": 8664.92, "total_tokens": 64185064} +{"current_steps": 95200, "total_steps": 204665, "loss": 0.0502, "lr": 1.2918908771363358e-06, "epoch": 2.325751838370019, "percentage": 46.52, "elapsed_time": "2:03:27", "remaining_time": "2:21:57", "throughput": 8664.97, "total_tokens": 64188456} +{"current_steps": 95205, "total_steps": 204665, "loss": 0.0002, "lr": 1.2918093121762694e-06, "epoch": 2.3258739892018663, "percentage": 46.52, "elapsed_time": "2:03:28", "remaining_time": "2:21:57", "throughput": 8665.01, "total_tokens": 64191720} +{"current_steps": 95210, "total_steps": 204665, "loss": 0.0004, "lr": 1.2917277450940874e-06, "epoch": 2.3259961400337135, "percentage": 46.52, "elapsed_time": "2:03:28", "remaining_time": "2:21:56", "throughput": 8665.05, "total_tokens": 64195048} +{"current_steps": 95215, "total_steps": 204665, "loss": 0.0004, "lr": 1.2916461758903823e-06, "epoch": 2.3261182908655607, "percentage": 46.52, "elapsed_time": "2:03:28", "remaining_time": "2:21:56", "throughput": 8665.1, "total_tokens": 64198440} +{"current_steps": 95220, "total_steps": 204665, "loss": 0.049, "lr": 1.2915646045657483e-06, "epoch": 2.326240441697408, "percentage": 46.52, "elapsed_time": "2:03:29", "remaining_time": "2:21:56", "throughput": 8665.18, "total_tokens": 64202088} +{"current_steps": 95225, "total_steps": 204665, "loss": 0.0608, "lr": 1.2914830311207778e-06, "epoch": 2.326362592529255, "percentage": 46.53, "elapsed_time": "2:03:29", "remaining_time": "2:21:55", "throughput": 8665.19, "total_tokens": 64205096} +{"current_steps": 95230, "total_steps": 204665, "loss": 0.0003, "lr": 1.2914014555560643e-06, "epoch": 2.3264847433611022, "percentage": 46.53, "elapsed_time": "2:03:29", "remaining_time": "2:21:55", "throughput": 8665.21, "total_tokens": 64208232} +{"current_steps": 95235, "total_steps": 204665, "loss": 0.0907, "lr": 1.2913198778722013e-06, "epoch": 2.3266068941929494, "percentage": 46.53, "elapsed_time": "2:03:30", "remaining_time": "2:21:54", "throughput": 8665.26, "total_tokens": 64211624} +{"current_steps": 95240, "total_steps": 204665, "loss": 0.0001, "lr": 1.2912382980697816e-06, "epoch": 2.3267290450247966, "percentage": 46.53, "elapsed_time": "2:03:30", "remaining_time": "2:21:54", "throughput": 8665.29, "total_tokens": 64214760} +{"current_steps": 95245, "total_steps": 204665, "loss": 0.0725, "lr": 1.2911567161493985e-06, "epoch": 2.326851195856644, "percentage": 46.54, "elapsed_time": "2:03:30", "remaining_time": "2:21:53", "throughput": 8665.32, "total_tokens": 64217960} +{"current_steps": 95250, "total_steps": 204665, "loss": 0.0575, "lr": 1.2910751321116455e-06, "epoch": 2.326973346688491, "percentage": 46.54, "elapsed_time": "2:03:31", "remaining_time": "2:21:53", "throughput": 8665.35, "total_tokens": 64221288} +{"current_steps": 95255, "total_steps": 204665, "loss": 0.0007, "lr": 1.2909935459571159e-06, "epoch": 2.327095497520338, "percentage": 46.54, "elapsed_time": "2:03:31", "remaining_time": "2:21:53", "throughput": 8665.44, "total_tokens": 64225000} +{"current_steps": 95260, "total_steps": 204665, "loss": 0.0886, "lr": 1.290911957686403e-06, "epoch": 2.3272176483521854, "percentage": 46.54, "elapsed_time": "2:03:31", "remaining_time": "2:21:52", "throughput": 8665.48, "total_tokens": 64228328} +{"current_steps": 95265, "total_steps": 204665, "loss": 0.1204, "lr": 1.2908303673001e-06, "epoch": 2.3273397991840326, "percentage": 46.55, "elapsed_time": "2:03:32", "remaining_time": "2:21:52", "throughput": 8665.55, "total_tokens": 64231912} +{"current_steps": 95270, "total_steps": 204665, "loss": 0.0006, "lr": 1.2907487747988007e-06, "epoch": 2.3274619500158797, "percentage": 46.55, "elapsed_time": "2:03:32", "remaining_time": "2:21:51", "throughput": 8665.58, "total_tokens": 64235112} +{"current_steps": 95275, "total_steps": 204665, "loss": 0.0002, "lr": 1.2906671801830978e-06, "epoch": 2.327584100847727, "percentage": 46.55, "elapsed_time": "2:03:33", "remaining_time": "2:21:51", "throughput": 8665.66, "total_tokens": 64238824} +{"current_steps": 95280, "total_steps": 204665, "loss": 0.0632, "lr": 1.290585583453585e-06, "epoch": 2.327706251679574, "percentage": 46.55, "elapsed_time": "2:03:33", "remaining_time": "2:21:50", "throughput": 8665.68, "total_tokens": 64241896} +{"current_steps": 95285, "total_steps": 204665, "loss": 0.044, "lr": 1.2905039846108558e-06, "epoch": 2.327828402511421, "percentage": 46.56, "elapsed_time": "2:03:33", "remaining_time": "2:21:50", "throughput": 8665.73, "total_tokens": 64245352} +{"current_steps": 95290, "total_steps": 204665, "loss": 0.044, "lr": 1.2904223836555035e-06, "epoch": 2.3279505533432685, "percentage": 46.56, "elapsed_time": "2:03:34", "remaining_time": "2:21:49", "throughput": 8665.79, "total_tokens": 64248872} +{"current_steps": 95295, "total_steps": 204665, "loss": 0.0008, "lr": 1.2903407805881215e-06, "epoch": 2.3280727041751152, "percentage": 46.56, "elapsed_time": "2:03:34", "remaining_time": "2:21:49", "throughput": 8665.88, "total_tokens": 64252584} +{"current_steps": 95300, "total_steps": 204665, "loss": 0.0401, "lr": 1.290259175409303e-06, "epoch": 2.3281948550069624, "percentage": 46.56, "elapsed_time": "2:03:34", "remaining_time": "2:21:49", "throughput": 8665.9, "total_tokens": 64255656} +{"current_steps": 95305, "total_steps": 204665, "loss": 0.0003, "lr": 1.290177568119642e-06, "epoch": 2.3283170058388096, "percentage": 46.57, "elapsed_time": "2:03:35", "remaining_time": "2:21:48", "throughput": 8665.97, "total_tokens": 64259240} +{"current_steps": 95310, "total_steps": 204665, "loss": 0.0001, "lr": 1.2900959587197314e-06, "epoch": 2.328439156670657, "percentage": 46.57, "elapsed_time": "2:03:35", "remaining_time": "2:21:48", "throughput": 8666.05, "total_tokens": 64262952} +{"current_steps": 95315, "total_steps": 204665, "loss": 0.0008, "lr": 1.2900143472101652e-06, "epoch": 2.328561307502504, "percentage": 46.57, "elapsed_time": "2:03:35", "remaining_time": "2:21:47", "throughput": 8666.07, "total_tokens": 64266024} +{"current_steps": 95320, "total_steps": 204665, "loss": 0.0002, "lr": 1.2899327335915364e-06, "epoch": 2.328683458334351, "percentage": 46.57, "elapsed_time": "2:03:36", "remaining_time": "2:21:47", "throughput": 8666.1, "total_tokens": 64269288} +{"current_steps": 95325, "total_steps": 204665, "loss": 0.0527, "lr": 1.2898511178644394e-06, "epoch": 2.3288056091661984, "percentage": 46.58, "elapsed_time": "2:03:36", "remaining_time": "2:21:46", "throughput": 8666.13, "total_tokens": 64272488} +{"current_steps": 95330, "total_steps": 204665, "loss": 0.0001, "lr": 1.2897695000294666e-06, "epoch": 2.3289277599980456, "percentage": 46.58, "elapsed_time": "2:03:36", "remaining_time": "2:21:46", "throughput": 8666.2, "total_tokens": 64276008} +{"current_steps": 95335, "total_steps": 204665, "loss": 0.0, "lr": 1.2896878800872122e-06, "epoch": 2.3290499108298928, "percentage": 46.58, "elapsed_time": "2:03:37", "remaining_time": "2:21:46", "throughput": 8666.23, "total_tokens": 64279208} +{"current_steps": 95340, "total_steps": 204665, "loss": 0.0003, "lr": 1.2896062580382693e-06, "epoch": 2.32917206166174, "percentage": 46.58, "elapsed_time": "2:03:37", "remaining_time": "2:21:45", "throughput": 8666.22, "total_tokens": 64282088} +{"current_steps": 95345, "total_steps": 204665, "loss": 0.0003, "lr": 1.289524633883232e-06, "epoch": 2.329294212493587, "percentage": 46.59, "elapsed_time": "2:03:37", "remaining_time": "2:21:45", "throughput": 8666.31, "total_tokens": 64285864} +{"current_steps": 95350, "total_steps": 204665, "loss": 0.0603, "lr": 1.2894430076226939e-06, "epoch": 2.3294163633254343, "percentage": 46.59, "elapsed_time": "2:03:38", "remaining_time": "2:21:44", "throughput": 8666.36, "total_tokens": 64289256} +{"current_steps": 95355, "total_steps": 204665, "loss": 0.0, "lr": 1.2893613792572482e-06, "epoch": 2.3295385141572815, "percentage": 46.59, "elapsed_time": "2:03:38", "remaining_time": "2:21:44", "throughput": 8666.43, "total_tokens": 64292776} +{"current_steps": 95360, "total_steps": 204665, "loss": 0.0003, "lr": 1.2892797487874888e-06, "epoch": 2.3296606649891287, "percentage": 46.59, "elapsed_time": "2:03:38", "remaining_time": "2:21:43", "throughput": 8666.42, "total_tokens": 64295656} +{"current_steps": 95365, "total_steps": 204665, "loss": 0.0526, "lr": 1.289198116214009e-06, "epoch": 2.329782815820976, "percentage": 46.6, "elapsed_time": "2:03:39", "remaining_time": "2:21:43", "throughput": 8666.43, "total_tokens": 64298600} +{"current_steps": 95370, "total_steps": 204665, "loss": 0.0571, "lr": 1.289116481537403e-06, "epoch": 2.329904966652823, "percentage": 46.6, "elapsed_time": "2:03:39", "remaining_time": "2:21:42", "throughput": 8666.47, "total_tokens": 64301864} +{"current_steps": 95375, "total_steps": 204665, "loss": 0.1041, "lr": 1.2890348447582642e-06, "epoch": 2.3300271174846703, "percentage": 46.6, "elapsed_time": "2:03:39", "remaining_time": "2:21:42", "throughput": 8666.44, "total_tokens": 64304552} +{"current_steps": 95380, "total_steps": 204665, "loss": 0.0001, "lr": 1.288953205877186e-06, "epoch": 2.330149268316517, "percentage": 46.6, "elapsed_time": "2:03:40", "remaining_time": "2:21:42", "throughput": 8666.46, "total_tokens": 64307624} +{"current_steps": 95385, "total_steps": 204665, "loss": 0.1376, "lr": 1.2888715648947629e-06, "epoch": 2.330271419148364, "percentage": 46.61, "elapsed_time": "2:03:40", "remaining_time": "2:21:41", "throughput": 8666.51, "total_tokens": 64311016} +{"current_steps": 95390, "total_steps": 204665, "loss": 0.0712, "lr": 1.2887899218115876e-06, "epoch": 2.3303935699802114, "percentage": 46.61, "elapsed_time": "2:03:40", "remaining_time": "2:21:41", "throughput": 8666.55, "total_tokens": 64314280} +{"current_steps": 95395, "total_steps": 204665, "loss": 0.0002, "lr": 1.2887082766282545e-06, "epoch": 2.3305157208120586, "percentage": 46.61, "elapsed_time": "2:03:41", "remaining_time": "2:21:40", "throughput": 8666.57, "total_tokens": 64317416} +{"current_steps": 95400, "total_steps": 204665, "loss": 0.0003, "lr": 1.288626629345357e-06, "epoch": 2.330637871643906, "percentage": 46.61, "elapsed_time": "2:03:41", "remaining_time": "2:21:40", "throughput": 8666.63, "total_tokens": 64320936} +{"current_steps": 95405, "total_steps": 204665, "loss": 0.0454, "lr": 1.2885449799634888e-06, "epoch": 2.330760022475753, "percentage": 46.62, "elapsed_time": "2:03:42", "remaining_time": "2:21:39", "throughput": 8666.67, "total_tokens": 64324200} +{"current_steps": 95410, "total_steps": 204665, "loss": 0.1252, "lr": 1.2884633284832446e-06, "epoch": 2.3308821733076, "percentage": 46.62, "elapsed_time": "2:03:42", "remaining_time": "2:21:39", "throughput": 8666.7, "total_tokens": 64327464} +{"current_steps": 95415, "total_steps": 204665, "loss": 0.1476, "lr": 1.288381674905217e-06, "epoch": 2.3310043241394474, "percentage": 46.62, "elapsed_time": "2:03:42", "remaining_time": "2:21:38", "throughput": 8666.74, "total_tokens": 64330728} +{"current_steps": 95420, "total_steps": 204665, "loss": 0.0612, "lr": 1.2883000192300003e-06, "epoch": 2.3311264749712945, "percentage": 46.62, "elapsed_time": "2:03:43", "remaining_time": "2:21:38", "throughput": 8666.79, "total_tokens": 64334120} +{"current_steps": 95425, "total_steps": 204665, "loss": 0.0001, "lr": 1.2882183614581885e-06, "epoch": 2.3312486258031417, "percentage": 46.62, "elapsed_time": "2:03:43", "remaining_time": "2:21:38", "throughput": 8666.81, "total_tokens": 64337192} +{"current_steps": 95430, "total_steps": 204665, "loss": 0.001, "lr": 1.2881367015903752e-06, "epoch": 2.331370776634989, "percentage": 46.63, "elapsed_time": "2:03:43", "remaining_time": "2:21:37", "throughput": 8666.83, "total_tokens": 64340328} +{"current_steps": 95435, "total_steps": 204665, "loss": 0.0007, "lr": 1.2880550396271543e-06, "epoch": 2.331492927466836, "percentage": 46.63, "elapsed_time": "2:03:44", "remaining_time": "2:21:37", "throughput": 8666.84, "total_tokens": 64343400} +{"current_steps": 95440, "total_steps": 204665, "loss": 0.0337, "lr": 1.2879733755691196e-06, "epoch": 2.3316150782986833, "percentage": 46.63, "elapsed_time": "2:03:44", "remaining_time": "2:21:36", "throughput": 8666.86, "total_tokens": 64346472} +{"current_steps": 95445, "total_steps": 204665, "loss": 0.0002, "lr": 1.287891709416865e-06, "epoch": 2.3317372291305305, "percentage": 46.63, "elapsed_time": "2:03:44", "remaining_time": "2:21:36", "throughput": 8666.93, "total_tokens": 64350056} +{"current_steps": 95450, "total_steps": 204665, "loss": 0.0443, "lr": 1.2878100411709847e-06, "epoch": 2.3318593799623777, "percentage": 46.64, "elapsed_time": "2:03:45", "remaining_time": "2:21:35", "throughput": 8666.94, "total_tokens": 64353128} +{"current_steps": 95455, "total_steps": 204665, "loss": 0.0003, "lr": 1.2877283708320724e-06, "epoch": 2.331981530794225, "percentage": 46.64, "elapsed_time": "2:03:45", "remaining_time": "2:21:35", "throughput": 8666.98, "total_tokens": 64356392} +{"current_steps": 95460, "total_steps": 204665, "loss": 0.0379, "lr": 1.2876466984007217e-06, "epoch": 2.332103681626072, "percentage": 46.64, "elapsed_time": "2:03:45", "remaining_time": "2:21:35", "throughput": 8667.0, "total_tokens": 64359592} +{"current_steps": 95465, "total_steps": 204665, "loss": 0.0756, "lr": 1.2875650238775268e-06, "epoch": 2.332225832457919, "percentage": 46.64, "elapsed_time": "2:03:46", "remaining_time": "2:21:34", "throughput": 8667.08, "total_tokens": 64363240} +{"current_steps": 95470, "total_steps": 204665, "loss": 0.0002, "lr": 1.2874833472630819e-06, "epoch": 2.3323479832897664, "percentage": 46.65, "elapsed_time": "2:03:46", "remaining_time": "2:21:34", "throughput": 8667.17, "total_tokens": 64367016} +{"current_steps": 95475, "total_steps": 204665, "loss": 0.0408, "lr": 1.2874016685579807e-06, "epoch": 2.332470134121613, "percentage": 46.65, "elapsed_time": "2:03:46", "remaining_time": "2:21:33", "throughput": 8667.2, "total_tokens": 64370152} +{"current_steps": 95480, "total_steps": 204665, "loss": 0.0529, "lr": 1.2873199877628177e-06, "epoch": 2.3325922849534604, "percentage": 46.65, "elapsed_time": "2:03:47", "remaining_time": "2:21:33", "throughput": 8667.26, "total_tokens": 64373672} +{"current_steps": 95485, "total_steps": 204665, "loss": 0.0004, "lr": 1.2872383048781862e-06, "epoch": 2.3327144357853076, "percentage": 46.65, "elapsed_time": "2:03:47", "remaining_time": "2:21:32", "throughput": 8667.24, "total_tokens": 64376488} +{"current_steps": 95490, "total_steps": 204665, "loss": 0.0001, "lr": 1.2871566199046801e-06, "epoch": 2.3328365866171548, "percentage": 46.66, "elapsed_time": "2:03:47", "remaining_time": "2:21:32", "throughput": 8667.3, "total_tokens": 64379944} +{"current_steps": 95495, "total_steps": 204665, "loss": 0.0003, "lr": 1.287074932842894e-06, "epoch": 2.332958737449002, "percentage": 46.66, "elapsed_time": "2:03:48", "remaining_time": "2:21:31", "throughput": 8667.29, "total_tokens": 64382760} +{"current_steps": 95500, "total_steps": 204665, "loss": 0.0001, "lr": 1.286993243693422e-06, "epoch": 2.333080888280849, "percentage": 46.66, "elapsed_time": "2:03:48", "remaining_time": "2:21:31", "throughput": 8667.32, "total_tokens": 64386024} +{"current_steps": 95505, "total_steps": 204665, "loss": 0.0001, "lr": 1.2869115524568577e-06, "epoch": 2.3332030391126963, "percentage": 46.66, "elapsed_time": "2:03:48", "remaining_time": "2:21:31", "throughput": 8667.35, "total_tokens": 64389224} +{"current_steps": 95510, "total_steps": 204665, "loss": 0.1076, "lr": 1.2868298591337955e-06, "epoch": 2.3333251899445435, "percentage": 46.67, "elapsed_time": "2:03:49", "remaining_time": "2:21:30", "throughput": 8667.37, "total_tokens": 64392296} +{"current_steps": 95515, "total_steps": 204665, "loss": 0.113, "lr": 1.2867481637248294e-06, "epoch": 2.3334473407763907, "percentage": 46.67, "elapsed_time": "2:03:49", "remaining_time": "2:21:30", "throughput": 8667.44, "total_tokens": 64395880} +{"current_steps": 95520, "total_steps": 204665, "loss": 0.0001, "lr": 1.2866664662305537e-06, "epoch": 2.333569491608238, "percentage": 46.67, "elapsed_time": "2:03:49", "remaining_time": "2:21:29", "throughput": 8667.49, "total_tokens": 64399208} +{"current_steps": 95525, "total_steps": 204665, "loss": 0.0492, "lr": 1.2865847666515622e-06, "epoch": 2.333691642440085, "percentage": 46.67, "elapsed_time": "2:03:50", "remaining_time": "2:21:29", "throughput": 8667.48, "total_tokens": 64402024} +{"current_steps": 95530, "total_steps": 204665, "loss": 0.0528, "lr": 1.2865030649884493e-06, "epoch": 2.3338137932719323, "percentage": 46.68, "elapsed_time": "2:03:50", "remaining_time": "2:21:28", "throughput": 8667.54, "total_tokens": 64405608} +{"current_steps": 95535, "total_steps": 204665, "loss": 0.0001, "lr": 1.2864213612418088e-06, "epoch": 2.3339359441037795, "percentage": 46.68, "elapsed_time": "2:03:51", "remaining_time": "2:21:28", "throughput": 8667.61, "total_tokens": 64409128} +{"current_steps": 95540, "total_steps": 204665, "loss": 0.0845, "lr": 1.2863396554122355e-06, "epoch": 2.3340580949356267, "percentage": 46.68, "elapsed_time": "2:03:51", "remaining_time": "2:21:28", "throughput": 8667.69, "total_tokens": 64412776} +{"current_steps": 95545, "total_steps": 204665, "loss": 0.1, "lr": 1.286257947500323e-06, "epoch": 2.334180245767474, "percentage": 46.68, "elapsed_time": "2:03:51", "remaining_time": "2:21:27", "throughput": 8667.73, "total_tokens": 64416104} +{"current_steps": 95550, "total_steps": 204665, "loss": 0.0377, "lr": 1.2861762375066658e-06, "epoch": 2.3343023965993206, "percentage": 46.69, "elapsed_time": "2:03:52", "remaining_time": "2:21:27", "throughput": 8667.76, "total_tokens": 64419240} +{"current_steps": 95555, "total_steps": 204665, "loss": 0.0003, "lr": 1.2860945254318578e-06, "epoch": 2.3344245474311682, "percentage": 46.69, "elapsed_time": "2:03:52", "remaining_time": "2:21:26", "throughput": 8667.82, "total_tokens": 64422696} +{"current_steps": 95560, "total_steps": 204665, "loss": 0.0908, "lr": 1.2860128112764934e-06, "epoch": 2.334546698263015, "percentage": 46.69, "elapsed_time": "2:03:52", "remaining_time": "2:21:26", "throughput": 8667.83, "total_tokens": 64425768} +{"current_steps": 95565, "total_steps": 204665, "loss": 0.0001, "lr": 1.2859310950411672e-06, "epoch": 2.334668849094862, "percentage": 46.69, "elapsed_time": "2:03:53", "remaining_time": "2:21:25", "throughput": 8667.87, "total_tokens": 64429096} +{"current_steps": 95570, "total_steps": 204665, "loss": 0.0002, "lr": 1.285849376726473e-06, "epoch": 2.3347909999267094, "percentage": 46.7, "elapsed_time": "2:03:53", "remaining_time": "2:21:25", "throughput": 8667.99, "total_tokens": 64433064} +{"current_steps": 95575, "total_steps": 204665, "loss": 0.0002, "lr": 1.2857676563330055e-06, "epoch": 2.3349131507585565, "percentage": 46.7, "elapsed_time": "2:03:53", "remaining_time": "2:21:24", "throughput": 8668.01, "total_tokens": 64436200} +{"current_steps": 95580, "total_steps": 204665, "loss": 0.0737, "lr": 1.2856859338613585e-06, "epoch": 2.3350353015904037, "percentage": 46.7, "elapsed_time": "2:03:54", "remaining_time": "2:21:24", "throughput": 8668.07, "total_tokens": 64439656} +{"current_steps": 95585, "total_steps": 204665, "loss": 0.1409, "lr": 1.2856042093121267e-06, "epoch": 2.335157452422251, "percentage": 46.7, "elapsed_time": "2:03:54", "remaining_time": "2:21:24", "throughput": 8668.09, "total_tokens": 64442792} +{"current_steps": 95590, "total_steps": 204665, "loss": 0.0001, "lr": 1.2855224826859045e-06, "epoch": 2.335279603254098, "percentage": 46.71, "elapsed_time": "2:03:54", "remaining_time": "2:21:23", "throughput": 8668.12, "total_tokens": 64445992} +{"current_steps": 95595, "total_steps": 204665, "loss": 0.0564, "lr": 1.2854407539832855e-06, "epoch": 2.3354017540859453, "percentage": 46.71, "elapsed_time": "2:03:55", "remaining_time": "2:21:23", "throughput": 8668.2, "total_tokens": 64449640} +{"current_steps": 95600, "total_steps": 204665, "loss": 0.0468, "lr": 1.2853590232048648e-06, "epoch": 2.3355239049177925, "percentage": 46.71, "elapsed_time": "2:03:55", "remaining_time": "2:21:22", "throughput": 8668.21, "total_tokens": 64452648} +{"current_steps": 95605, "total_steps": 204665, "loss": 0.0495, "lr": 1.2852772903512366e-06, "epoch": 2.3356460557496397, "percentage": 46.71, "elapsed_time": "2:03:55", "remaining_time": "2:21:22", "throughput": 8668.23, "total_tokens": 64455784} +{"current_steps": 95610, "total_steps": 204665, "loss": 0.0705, "lr": 1.285195555422995e-06, "epoch": 2.335768206581487, "percentage": 46.72, "elapsed_time": "2:03:56", "remaining_time": "2:21:21", "throughput": 8668.23, "total_tokens": 64458664} +{"current_steps": 95615, "total_steps": 204665, "loss": 0.0775, "lr": 1.2851138184207345e-06, "epoch": 2.335890357413334, "percentage": 46.72, "elapsed_time": "2:03:56", "remaining_time": "2:21:21", "throughput": 8668.29, "total_tokens": 64462184} +{"current_steps": 95620, "total_steps": 204665, "loss": 0.035, "lr": 1.2850320793450497e-06, "epoch": 2.3360125082451813, "percentage": 46.72, "elapsed_time": "2:03:56", "remaining_time": "2:21:21", "throughput": 8668.36, "total_tokens": 64465768} +{"current_steps": 95625, "total_steps": 204665, "loss": 0.0754, "lr": 1.2849503381965354e-06, "epoch": 2.3361346590770284, "percentage": 46.72, "elapsed_time": "2:03:57", "remaining_time": "2:21:20", "throughput": 8668.4, "total_tokens": 64469096} +{"current_steps": 95630, "total_steps": 204665, "loss": 0.0429, "lr": 1.2848685949757853e-06, "epoch": 2.3362568099088756, "percentage": 46.73, "elapsed_time": "2:03:57", "remaining_time": "2:21:20", "throughput": 8668.43, "total_tokens": 64472296} +{"current_steps": 95635, "total_steps": 204665, "loss": 0.0127, "lr": 1.2847868496833942e-06, "epoch": 2.336378960740723, "percentage": 46.73, "elapsed_time": "2:03:57", "remaining_time": "2:21:19", "throughput": 8668.42, "total_tokens": 64475112} +{"current_steps": 95640, "total_steps": 204665, "loss": 0.0723, "lr": 1.2847051023199566e-06, "epoch": 2.33650111157257, "percentage": 46.73, "elapsed_time": "2:03:58", "remaining_time": "2:21:19", "throughput": 8668.47, "total_tokens": 64478568} +{"current_steps": 95645, "total_steps": 204665, "loss": 0.0002, "lr": 1.2846233528860667e-06, "epoch": 2.3366232624044168, "percentage": 46.73, "elapsed_time": "2:03:58", "remaining_time": "2:21:18", "throughput": 8668.53, "total_tokens": 64482088} +{"current_steps": 95650, "total_steps": 204665, "loss": 0.0469, "lr": 1.2845416013823195e-06, "epoch": 2.336745413236264, "percentage": 46.73, "elapsed_time": "2:03:58", "remaining_time": "2:21:18", "throughput": 8668.56, "total_tokens": 64485288} +{"current_steps": 95655, "total_steps": 204665, "loss": 0.0005, "lr": 1.284459847809309e-06, "epoch": 2.336867564068111, "percentage": 46.74, "elapsed_time": "2:03:59", "remaining_time": "2:21:17", "throughput": 8668.64, "total_tokens": 64488936} +{"current_steps": 95660, "total_steps": 204665, "loss": 0.0004, "lr": 1.28437809216763e-06, "epoch": 2.3369897148999583, "percentage": 46.74, "elapsed_time": "2:03:59", "remaining_time": "2:21:17", "throughput": 8668.75, "total_tokens": 64492840} +{"current_steps": 95665, "total_steps": 204665, "loss": 0.0005, "lr": 1.284296334457877e-06, "epoch": 2.3371118657318055, "percentage": 46.74, "elapsed_time": "2:04:00", "remaining_time": "2:21:17", "throughput": 8668.84, "total_tokens": 64496616} +{"current_steps": 95670, "total_steps": 204665, "loss": 0.0029, "lr": 1.2842145746806448e-06, "epoch": 2.3372340165636527, "percentage": 46.74, "elapsed_time": "2:04:00", "remaining_time": "2:21:16", "throughput": 8668.89, "total_tokens": 64500008} +{"current_steps": 95675, "total_steps": 204665, "loss": 0.0001, "lr": 1.2841328128365275e-06, "epoch": 2.3373561673955, "percentage": 46.75, "elapsed_time": "2:04:00", "remaining_time": "2:21:16", "throughput": 8668.97, "total_tokens": 64503720} +{"current_steps": 95680, "total_steps": 204665, "loss": 0.0002, "lr": 1.2840510489261202e-06, "epoch": 2.337478318227347, "percentage": 46.75, "elapsed_time": "2:04:01", "remaining_time": "2:21:15", "throughput": 8668.99, "total_tokens": 64506792} +{"current_steps": 95685, "total_steps": 204665, "loss": 0.0001, "lr": 1.2839692829500172e-06, "epoch": 2.3376004690591943, "percentage": 46.75, "elapsed_time": "2:04:01", "remaining_time": "2:21:15", "throughput": 8669.0, "total_tokens": 64509864} +{"current_steps": 95690, "total_steps": 204665, "loss": 0.034, "lr": 1.2838875149088133e-06, "epoch": 2.3377226198910415, "percentage": 46.75, "elapsed_time": "2:04:01", "remaining_time": "2:21:14", "throughput": 8669.02, "total_tokens": 64513000} +{"current_steps": 95695, "total_steps": 204665, "loss": 0.0001, "lr": 1.2838057448031028e-06, "epoch": 2.3378447707228887, "percentage": 46.76, "elapsed_time": "2:04:02", "remaining_time": "2:21:14", "throughput": 8669.09, "total_tokens": 64516520} +{"current_steps": 95700, "total_steps": 204665, "loss": 0.0477, "lr": 1.2837239726334807e-06, "epoch": 2.337966921554736, "percentage": 46.76, "elapsed_time": "2:04:02", "remaining_time": "2:21:14", "throughput": 8669.21, "total_tokens": 64520552} +{"current_steps": 95705, "total_steps": 204665, "loss": 0.0413, "lr": 1.2836421984005416e-06, "epoch": 2.338089072386583, "percentage": 46.76, "elapsed_time": "2:04:02", "remaining_time": "2:21:13", "throughput": 8669.22, "total_tokens": 64523624} +{"current_steps": 95710, "total_steps": 204665, "loss": 0.0001, "lr": 1.2835604221048801e-06, "epoch": 2.3382112232184302, "percentage": 46.76, "elapsed_time": "2:04:03", "remaining_time": "2:21:13", "throughput": 8669.22, "total_tokens": 64526568} +{"current_steps": 95715, "total_steps": 204665, "loss": 0.0001, "lr": 1.283478643747091e-06, "epoch": 2.3383333740502774, "percentage": 46.77, "elapsed_time": "2:04:03", "remaining_time": "2:21:12", "throughput": 8669.33, "total_tokens": 64530472} +{"current_steps": 95720, "total_steps": 204665, "loss": 0.0059, "lr": 1.2833968633277685e-06, "epoch": 2.3384555248821246, "percentage": 46.77, "elapsed_time": "2:04:03", "remaining_time": "2:21:12", "throughput": 8669.38, "total_tokens": 64533928} +{"current_steps": 95725, "total_steps": 204665, "loss": 0.0002, "lr": 1.2833150808475085e-06, "epoch": 2.338577675713972, "percentage": 46.77, "elapsed_time": "2:04:04", "remaining_time": "2:21:11", "throughput": 8669.43, "total_tokens": 64537256} +{"current_steps": 95730, "total_steps": 204665, "loss": 0.0001, "lr": 1.2832332963069045e-06, "epoch": 2.3386998265458185, "percentage": 46.77, "elapsed_time": "2:04:04", "remaining_time": "2:21:11", "throughput": 8669.46, "total_tokens": 64540520} +{"current_steps": 95735, "total_steps": 204665, "loss": 0.0003, "lr": 1.2831515097065521e-06, "epoch": 2.338821977377666, "percentage": 46.78, "elapsed_time": "2:04:04", "remaining_time": "2:21:11", "throughput": 8669.54, "total_tokens": 64544168} +{"current_steps": 95740, "total_steps": 204665, "loss": 0.0001, "lr": 1.2830697210470455e-06, "epoch": 2.338944128209513, "percentage": 46.78, "elapsed_time": "2:04:05", "remaining_time": "2:21:10", "throughput": 8669.6, "total_tokens": 64547624} +{"current_steps": 95745, "total_steps": 204665, "loss": 0.056, "lr": 1.28298793032898e-06, "epoch": 2.33906627904136, "percentage": 46.78, "elapsed_time": "2:04:05", "remaining_time": "2:21:10", "throughput": 8669.67, "total_tokens": 64551144} +{"current_steps": 95750, "total_steps": 204665, "loss": 0.0003, "lr": 1.2829061375529503e-06, "epoch": 2.3391884298732073, "percentage": 46.78, "elapsed_time": "2:04:05", "remaining_time": "2:21:09", "throughput": 8669.7, "total_tokens": 64554344} +{"current_steps": 95755, "total_steps": 204665, "loss": 0.0604, "lr": 1.2828243427195507e-06, "epoch": 2.3393105807050545, "percentage": 46.79, "elapsed_time": "2:04:06", "remaining_time": "2:21:09", "throughput": 8669.74, "total_tokens": 64557736} +{"current_steps": 95760, "total_steps": 204665, "loss": 0.0002, "lr": 1.2827425458293766e-06, "epoch": 2.3394327315369017, "percentage": 46.79, "elapsed_time": "2:04:06", "remaining_time": "2:21:08", "throughput": 8669.76, "total_tokens": 64560808} +{"current_steps": 95765, "total_steps": 204665, "loss": 0.0002, "lr": 1.2826607468830227e-06, "epoch": 2.339554882368749, "percentage": 46.79, "elapsed_time": "2:04:07", "remaining_time": "2:21:08", "throughput": 8669.86, "total_tokens": 64564712} +{"current_steps": 95770, "total_steps": 204665, "loss": 0.0019, "lr": 1.2825789458810836e-06, "epoch": 2.339677033200596, "percentage": 46.79, "elapsed_time": "2:04:07", "remaining_time": "2:21:08", "throughput": 8669.87, "total_tokens": 64567656} +{"current_steps": 95775, "total_steps": 204665, "loss": 0.0279, "lr": 1.2824971428241544e-06, "epoch": 2.3397991840324432, "percentage": 46.8, "elapsed_time": "2:04:07", "remaining_time": "2:21:07", "throughput": 8669.89, "total_tokens": 64570792} +{"current_steps": 95780, "total_steps": 204665, "loss": 0.0007, "lr": 1.2824153377128301e-06, "epoch": 2.3399213348642904, "percentage": 46.8, "elapsed_time": "2:04:08", "remaining_time": "2:21:07", "throughput": 8669.97, "total_tokens": 64574440} +{"current_steps": 95785, "total_steps": 204665, "loss": 0.0764, "lr": 1.2823335305477058e-06, "epoch": 2.3400434856961376, "percentage": 46.8, "elapsed_time": "2:04:08", "remaining_time": "2:21:06", "throughput": 8670.03, "total_tokens": 64577896} +{"current_steps": 95790, "total_steps": 204665, "loss": 0.0002, "lr": 1.2822517213293756e-06, "epoch": 2.340165636527985, "percentage": 46.8, "elapsed_time": "2:04:08", "remaining_time": "2:21:06", "throughput": 8670.04, "total_tokens": 64580904} +{"current_steps": 95795, "total_steps": 204665, "loss": 0.1267, "lr": 1.2821699100584354e-06, "epoch": 2.340287787359832, "percentage": 46.81, "elapsed_time": "2:04:09", "remaining_time": "2:21:05", "throughput": 8670.12, "total_tokens": 64584616} +{"current_steps": 95800, "total_steps": 204665, "loss": 0.0413, "lr": 1.2820880967354798e-06, "epoch": 2.340409938191679, "percentage": 46.81, "elapsed_time": "2:04:09", "remaining_time": "2:21:05", "throughput": 8670.14, "total_tokens": 64587752} +{"current_steps": 95805, "total_steps": 204665, "loss": 0.0001, "lr": 1.2820062813611033e-06, "epoch": 2.3405320890235264, "percentage": 46.81, "elapsed_time": "2:04:09", "remaining_time": "2:21:04", "throughput": 8670.2, "total_tokens": 64591208} +{"current_steps": 95810, "total_steps": 204665, "loss": 0.0549, "lr": 1.2819244639359012e-06, "epoch": 2.3406542398553736, "percentage": 46.81, "elapsed_time": "2:04:10", "remaining_time": "2:21:04", "throughput": 8670.28, "total_tokens": 64594856} +{"current_steps": 95815, "total_steps": 204665, "loss": 0.0697, "lr": 1.2818426444604686e-06, "epoch": 2.3407763906872208, "percentage": 46.82, "elapsed_time": "2:04:10", "remaining_time": "2:21:04", "throughput": 8670.32, "total_tokens": 64598120} +{"current_steps": 95820, "total_steps": 204665, "loss": 0.0001, "lr": 1.2817608229354009e-06, "epoch": 2.340898541519068, "percentage": 46.82, "elapsed_time": "2:04:10", "remaining_time": "2:21:03", "throughput": 8670.36, "total_tokens": 64601448} +{"current_steps": 95825, "total_steps": 204665, "loss": 0.1384, "lr": 1.2816789993612925e-06, "epoch": 2.3410206923509147, "percentage": 46.82, "elapsed_time": "2:04:11", "remaining_time": "2:21:03", "throughput": 8670.38, "total_tokens": 64604648} +{"current_steps": 95830, "total_steps": 204665, "loss": 0.0585, "lr": 1.2815971737387385e-06, "epoch": 2.341142843182762, "percentage": 46.82, "elapsed_time": "2:04:11", "remaining_time": "2:21:02", "throughput": 8670.42, "total_tokens": 64607912} +{"current_steps": 95835, "total_steps": 204665, "loss": 0.0027, "lr": 1.2815153460683343e-06, "epoch": 2.341264994014609, "percentage": 46.83, "elapsed_time": "2:04:11", "remaining_time": "2:21:02", "throughput": 8670.49, "total_tokens": 64611432} +{"current_steps": 95840, "total_steps": 204665, "loss": 0.0001, "lr": 1.2814335163506746e-06, "epoch": 2.3413871448464563, "percentage": 46.83, "elapsed_time": "2:04:12", "remaining_time": "2:21:01", "throughput": 8670.52, "total_tokens": 64614632} +{"current_steps": 95845, "total_steps": 204665, "loss": 0.0002, "lr": 1.281351684586355e-06, "epoch": 2.3415092956783035, "percentage": 46.83, "elapsed_time": "2:04:12", "remaining_time": "2:21:01", "throughput": 8670.53, "total_tokens": 64617640} +{"current_steps": 95850, "total_steps": 204665, "loss": 0.0746, "lr": 1.28126985077597e-06, "epoch": 2.3416314465101506, "percentage": 46.83, "elapsed_time": "2:04:12", "remaining_time": "2:21:01", "throughput": 8670.54, "total_tokens": 64620712} +{"current_steps": 95855, "total_steps": 204665, "loss": 0.0003, "lr": 1.281188014920115e-06, "epoch": 2.341753597341998, "percentage": 46.84, "elapsed_time": "2:04:13", "remaining_time": "2:21:00", "throughput": 8670.57, "total_tokens": 64623912} +{"current_steps": 95860, "total_steps": 204665, "loss": 0.0004, "lr": 1.2811061770193852e-06, "epoch": 2.341875748173845, "percentage": 46.84, "elapsed_time": "2:04:13", "remaining_time": "2:21:00", "throughput": 8670.65, "total_tokens": 64627560} +{"current_steps": 95865, "total_steps": 204665, "loss": 0.0355, "lr": 1.2810243370743758e-06, "epoch": 2.341997899005692, "percentage": 46.84, "elapsed_time": "2:04:13", "remaining_time": "2:20:59", "throughput": 8670.73, "total_tokens": 64631272} +{"current_steps": 95870, "total_steps": 204665, "loss": 0.0002, "lr": 1.2809424950856814e-06, "epoch": 2.3421200498375394, "percentage": 46.84, "elapsed_time": "2:04:14", "remaining_time": "2:20:59", "throughput": 8670.79, "total_tokens": 64634792} +{"current_steps": 95875, "total_steps": 204665, "loss": 0.0259, "lr": 1.280860651053898e-06, "epoch": 2.3422422006693866, "percentage": 46.84, "elapsed_time": "2:04:14", "remaining_time": "2:20:58", "throughput": 8670.89, "total_tokens": 64638632} +{"current_steps": 95880, "total_steps": 204665, "loss": 0.0002, "lr": 1.2807788049796201e-06, "epoch": 2.342364351501234, "percentage": 46.85, "elapsed_time": "2:04:15", "remaining_time": "2:20:58", "throughput": 8670.94, "total_tokens": 64642024} +{"current_steps": 95885, "total_steps": 204665, "loss": 0.1175, "lr": 1.2806969568634436e-06, "epoch": 2.342486502333081, "percentage": 46.85, "elapsed_time": "2:04:15", "remaining_time": "2:20:57", "throughput": 8670.99, "total_tokens": 64645416} +{"current_steps": 95890, "total_steps": 204665, "loss": 0.0397, "lr": 1.2806151067059632e-06, "epoch": 2.342608653164928, "percentage": 46.85, "elapsed_time": "2:04:15", "remaining_time": "2:20:57", "throughput": 8671.03, "total_tokens": 64648680} +{"current_steps": 95895, "total_steps": 204665, "loss": 0.0945, "lr": 1.280533254507774e-06, "epoch": 2.3427308039967754, "percentage": 46.85, "elapsed_time": "2:04:16", "remaining_time": "2:20:57", "throughput": 8671.07, "total_tokens": 64651944} +{"current_steps": 95900, "total_steps": 204665, "loss": 0.0003, "lr": 1.280451400269472e-06, "epoch": 2.3428529548286225, "percentage": 46.86, "elapsed_time": "2:04:16", "remaining_time": "2:20:56", "throughput": 8671.13, "total_tokens": 64655400} +{"current_steps": 95905, "total_steps": 204665, "loss": 0.0002, "lr": 1.2803695439916515e-06, "epoch": 2.3429751056604697, "percentage": 46.86, "elapsed_time": "2:04:16", "remaining_time": "2:20:56", "throughput": 8671.21, "total_tokens": 64659112} +{"current_steps": 95910, "total_steps": 204665, "loss": 0.0001, "lr": 1.2802876856749088e-06, "epoch": 2.3430972564923165, "percentage": 46.86, "elapsed_time": "2:04:17", "remaining_time": "2:20:55", "throughput": 8671.24, "total_tokens": 64662312} +{"current_steps": 95915, "total_steps": 204665, "loss": 0.0, "lr": 1.2802058253198383e-06, "epoch": 2.343219407324164, "percentage": 46.86, "elapsed_time": "2:04:17", "remaining_time": "2:20:55", "throughput": 8671.3, "total_tokens": 64665832} +{"current_steps": 95920, "total_steps": 204665, "loss": 0.0914, "lr": 1.2801239629270356e-06, "epoch": 2.343341558156011, "percentage": 46.87, "elapsed_time": "2:04:17", "remaining_time": "2:20:54", "throughput": 8671.34, "total_tokens": 64669160} +{"current_steps": 95925, "total_steps": 204665, "loss": 0.0702, "lr": 1.2800420984970962e-06, "epoch": 2.343463708987858, "percentage": 46.87, "elapsed_time": "2:04:18", "remaining_time": "2:20:54", "throughput": 8671.39, "total_tokens": 64672552} +{"current_steps": 95930, "total_steps": 204665, "loss": 0.067, "lr": 1.2799602320306151e-06, "epoch": 2.3435858598197052, "percentage": 46.87, "elapsed_time": "2:04:18", "remaining_time": "2:20:54", "throughput": 8671.42, "total_tokens": 64675752} +{"current_steps": 95935, "total_steps": 204665, "loss": 0.0002, "lr": 1.2798783635281881e-06, "epoch": 2.3437080106515524, "percentage": 46.87, "elapsed_time": "2:04:18", "remaining_time": "2:20:53", "throughput": 8671.48, "total_tokens": 64679272} +{"current_steps": 95940, "total_steps": 204665, "loss": 0.0002, "lr": 1.2797964929904106e-06, "epoch": 2.3438301614833996, "percentage": 46.88, "elapsed_time": "2:04:19", "remaining_time": "2:20:53", "throughput": 8671.54, "total_tokens": 64682792} +{"current_steps": 95945, "total_steps": 204665, "loss": 0.0002, "lr": 1.2797146204178775e-06, "epoch": 2.343952312315247, "percentage": 46.88, "elapsed_time": "2:04:19", "remaining_time": "2:20:52", "throughput": 8671.6, "total_tokens": 64686184} +{"current_steps": 95950, "total_steps": 204665, "loss": 0.029, "lr": 1.2796327458111847e-06, "epoch": 2.344074463147094, "percentage": 46.88, "elapsed_time": "2:04:19", "remaining_time": "2:20:52", "throughput": 8671.62, "total_tokens": 64689256} +{"current_steps": 95955, "total_steps": 204665, "loss": 0.0989, "lr": 1.2795508691709272e-06, "epoch": 2.344196613978941, "percentage": 46.88, "elapsed_time": "2:04:20", "remaining_time": "2:20:51", "throughput": 8671.66, "total_tokens": 64692648} +{"current_steps": 95960, "total_steps": 204665, "loss": 0.0515, "lr": 1.2794689904977006e-06, "epoch": 2.3443187648107884, "percentage": 46.89, "elapsed_time": "2:04:20", "remaining_time": "2:20:51", "throughput": 8671.7, "total_tokens": 64695912} +{"current_steps": 95965, "total_steps": 204665, "loss": 0.0006, "lr": 1.2793871097921e-06, "epoch": 2.3444409156426356, "percentage": 46.89, "elapsed_time": "2:04:20", "remaining_time": "2:20:51", "throughput": 8671.7, "total_tokens": 64698856} +{"current_steps": 95970, "total_steps": 204665, "loss": 0.0502, "lr": 1.2793052270547215e-06, "epoch": 2.3445630664744828, "percentage": 46.89, "elapsed_time": "2:04:21", "remaining_time": "2:20:50", "throughput": 8671.72, "total_tokens": 64701928} +{"current_steps": 95975, "total_steps": 204665, "loss": 0.1532, "lr": 1.2792233422861603e-06, "epoch": 2.34468521730633, "percentage": 46.89, "elapsed_time": "2:04:21", "remaining_time": "2:20:50", "throughput": 8671.75, "total_tokens": 64705128} +{"current_steps": 95980, "total_steps": 204665, "loss": 0.0481, "lr": 1.2791414554870116e-06, "epoch": 2.344807368138177, "percentage": 46.9, "elapsed_time": "2:04:21", "remaining_time": "2:20:49", "throughput": 8671.75, "total_tokens": 64708072} +{"current_steps": 95985, "total_steps": 204665, "loss": 0.037, "lr": 1.2790595666578717e-06, "epoch": 2.3449295189700243, "percentage": 46.9, "elapsed_time": "2:04:22", "remaining_time": "2:20:49", "throughput": 8671.77, "total_tokens": 64711144} +{"current_steps": 95990, "total_steps": 204665, "loss": 0.0006, "lr": 1.2789776757993352e-06, "epoch": 2.3450516698018715, "percentage": 46.9, "elapsed_time": "2:04:22", "remaining_time": "2:20:48", "throughput": 8671.82, "total_tokens": 64714536} +{"current_steps": 95995, "total_steps": 204665, "loss": 0.0004, "lr": 1.278895782911998e-06, "epoch": 2.3451738206337187, "percentage": 46.9, "elapsed_time": "2:04:22", "remaining_time": "2:20:48", "throughput": 8671.82, "total_tokens": 64717480} +{"current_steps": 96000, "total_steps": 204665, "loss": 0.0002, "lr": 1.2788138879964557e-06, "epoch": 2.345295971465566, "percentage": 46.91, "elapsed_time": "2:04:23", "remaining_time": "2:20:47", "throughput": 8671.88, "total_tokens": 64721000} +{"current_steps": 96005, "total_steps": 204665, "loss": 0.041, "lr": 1.2787319910533036e-06, "epoch": 2.3454181222974126, "percentage": 46.91, "elapsed_time": "2:04:23", "remaining_time": "2:20:47", "throughput": 8671.63, "total_tokens": 64724456} +{"current_steps": 96010, "total_steps": 204665, "loss": 0.049, "lr": 1.2786500920831377e-06, "epoch": 2.34554027312926, "percentage": 46.91, "elapsed_time": "2:04:24", "remaining_time": "2:20:47", "throughput": 8671.71, "total_tokens": 64728104} +{"current_steps": 96015, "total_steps": 204665, "loss": 0.0442, "lr": 1.2785681910865535e-06, "epoch": 2.345662423961107, "percentage": 46.91, "elapsed_time": "2:04:24", "remaining_time": "2:20:46", "throughput": 8671.72, "total_tokens": 64731112} +{"current_steps": 96020, "total_steps": 204665, "loss": 0.0736, "lr": 1.2784862880641462e-06, "epoch": 2.345784574792954, "percentage": 46.92, "elapsed_time": "2:04:24", "remaining_time": "2:20:46", "throughput": 8671.75, "total_tokens": 64734376} +{"current_steps": 96025, "total_steps": 204665, "loss": 0.0002, "lr": 1.2784043830165119e-06, "epoch": 2.3459067256248014, "percentage": 46.92, "elapsed_time": "2:04:25", "remaining_time": "2:20:46", "throughput": 8671.8, "total_tokens": 64737768} +{"current_steps": 96030, "total_steps": 204665, "loss": 0.0594, "lr": 1.2783224759442459e-06, "epoch": 2.3460288764566486, "percentage": 46.92, "elapsed_time": "2:04:25", "remaining_time": "2:20:45", "throughput": 8671.81, "total_tokens": 64740776} +{"current_steps": 96035, "total_steps": 204665, "loss": 0.0458, "lr": 1.2782405668479442e-06, "epoch": 2.346151027288496, "percentage": 46.92, "elapsed_time": "2:04:25", "remaining_time": "2:20:45", "throughput": 8671.84, "total_tokens": 64743976} +{"current_steps": 96040, "total_steps": 204665, "loss": 0.0798, "lr": 1.2781586557282022e-06, "epoch": 2.346273178120343, "percentage": 46.93, "elapsed_time": "2:04:26", "remaining_time": "2:20:44", "throughput": 8671.87, "total_tokens": 64747176} +{"current_steps": 96045, "total_steps": 204665, "loss": 0.0426, "lr": 1.2780767425856155e-06, "epoch": 2.34639532895219, "percentage": 46.93, "elapsed_time": "2:04:26", "remaining_time": "2:20:44", "throughput": 8671.89, "total_tokens": 64750248} +{"current_steps": 96050, "total_steps": 204665, "loss": 0.0006, "lr": 1.2779948274207802e-06, "epoch": 2.3465174797840374, "percentage": 46.93, "elapsed_time": "2:04:27", "remaining_time": "2:20:43", "throughput": 8671.89, "total_tokens": 64753192} +{"current_steps": 96055, "total_steps": 204665, "loss": 0.0522, "lr": 1.2779129102342915e-06, "epoch": 2.3466396306158845, "percentage": 46.93, "elapsed_time": "2:04:27", "remaining_time": "2:20:43", "throughput": 8671.94, "total_tokens": 64756584} +{"current_steps": 96060, "total_steps": 204665, "loss": 0.0005, "lr": 1.2778309910267454e-06, "epoch": 2.3467617814477317, "percentage": 46.94, "elapsed_time": "2:04:27", "remaining_time": "2:20:42", "throughput": 8672.01, "total_tokens": 64760168} +{"current_steps": 96065, "total_steps": 204665, "loss": 0.0003, "lr": 1.2777490697987378e-06, "epoch": 2.346883932279579, "percentage": 46.94, "elapsed_time": "2:04:28", "remaining_time": "2:20:42", "throughput": 8672.04, "total_tokens": 64763432} +{"current_steps": 96070, "total_steps": 204665, "loss": 0.0005, "lr": 1.277667146550864e-06, "epoch": 2.347006083111426, "percentage": 46.94, "elapsed_time": "2:04:28", "remaining_time": "2:20:42", "throughput": 8672.06, "total_tokens": 64766568} +{"current_steps": 96075, "total_steps": 204665, "loss": 0.0002, "lr": 1.2775852212837202e-06, "epoch": 2.3471282339432733, "percentage": 46.94, "elapsed_time": "2:04:28", "remaining_time": "2:20:41", "throughput": 8672.09, "total_tokens": 64769768} +{"current_steps": 96080, "total_steps": 204665, "loss": 0.0669, "lr": 1.277503293997902e-06, "epoch": 2.3472503847751205, "percentage": 46.95, "elapsed_time": "2:04:29", "remaining_time": "2:20:41", "throughput": 8672.09, "total_tokens": 64772648} +{"current_steps": 96085, "total_steps": 204665, "loss": 0.0002, "lr": 1.277421364694005e-06, "epoch": 2.3473725356069677, "percentage": 46.95, "elapsed_time": "2:04:29", "remaining_time": "2:20:40", "throughput": 8672.12, "total_tokens": 64775912} +{"current_steps": 96090, "total_steps": 204665, "loss": 0.0228, "lr": 1.2773394333726253e-06, "epoch": 2.3474946864388144, "percentage": 46.95, "elapsed_time": "2:04:29", "remaining_time": "2:20:40", "throughput": 8672.16, "total_tokens": 64779176} +{"current_steps": 96095, "total_steps": 204665, "loss": 0.0003, "lr": 1.2772575000343589e-06, "epoch": 2.347616837270662, "percentage": 46.95, "elapsed_time": "2:04:30", "remaining_time": "2:20:39", "throughput": 8672.22, "total_tokens": 64782696} +{"current_steps": 96100, "total_steps": 204665, "loss": 0.0837, "lr": 1.277175564679801e-06, "epoch": 2.347738988102509, "percentage": 46.95, "elapsed_time": "2:04:30", "remaining_time": "2:20:39", "throughput": 8672.26, "total_tokens": 64785960} +{"current_steps": 96105, "total_steps": 204665, "loss": 0.0741, "lr": 1.2770936273095483e-06, "epoch": 2.347861138934356, "percentage": 46.96, "elapsed_time": "2:04:30", "remaining_time": "2:20:39", "throughput": 8672.3, "total_tokens": 64789224} +{"current_steps": 96110, "total_steps": 204665, "loss": 0.0006, "lr": 1.2770116879241961e-06, "epoch": 2.347983289766203, "percentage": 46.96, "elapsed_time": "2:04:31", "remaining_time": "2:20:38", "throughput": 8672.32, "total_tokens": 64792360} +{"current_steps": 96115, "total_steps": 204665, "loss": 0.0465, "lr": 1.27692974652434e-06, "epoch": 2.3481054405980504, "percentage": 46.96, "elapsed_time": "2:04:31", "remaining_time": "2:20:38", "throughput": 8672.4, "total_tokens": 64796072} +{"current_steps": 96120, "total_steps": 204665, "loss": 0.0628, "lr": 1.2768478031105764e-06, "epoch": 2.3482275914298976, "percentage": 46.96, "elapsed_time": "2:04:31", "remaining_time": "2:20:37", "throughput": 8672.4, "total_tokens": 64799016} +{"current_steps": 96125, "total_steps": 204665, "loss": 0.0003, "lr": 1.2767658576835015e-06, "epoch": 2.3483497422617448, "percentage": 46.97, "elapsed_time": "2:04:32", "remaining_time": "2:20:37", "throughput": 8672.48, "total_tokens": 64802600} +{"current_steps": 96130, "total_steps": 204665, "loss": 0.0532, "lr": 1.2766839102437105e-06, "epoch": 2.348471893093592, "percentage": 46.97, "elapsed_time": "2:04:32", "remaining_time": "2:20:36", "throughput": 8672.52, "total_tokens": 64805928} +{"current_steps": 96135, "total_steps": 204665, "loss": 0.0017, "lr": 1.2766019607917997e-06, "epoch": 2.348594043925439, "percentage": 46.97, "elapsed_time": "2:04:32", "remaining_time": "2:20:36", "throughput": 8672.57, "total_tokens": 64809320} +{"current_steps": 96140, "total_steps": 204665, "loss": 0.063, "lr": 1.2765200093283653e-06, "epoch": 2.3487161947572863, "percentage": 46.97, "elapsed_time": "2:04:33", "remaining_time": "2:20:35", "throughput": 8672.62, "total_tokens": 64812712} +{"current_steps": 96145, "total_steps": 204665, "loss": 0.0786, "lr": 1.2764380558540028e-06, "epoch": 2.3488383455891335, "percentage": 46.98, "elapsed_time": "2:04:33", "remaining_time": "2:20:35", "throughput": 8672.64, "total_tokens": 64815912} +{"current_steps": 96150, "total_steps": 204665, "loss": 0.0001, "lr": 1.2763561003693087e-06, "epoch": 2.3489604964209807, "percentage": 46.98, "elapsed_time": "2:04:33", "remaining_time": "2:20:35", "throughput": 8672.72, "total_tokens": 64819560} +{"current_steps": 96155, "total_steps": 204665, "loss": 0.0348, "lr": 1.2762741428748785e-06, "epoch": 2.349082647252828, "percentage": 46.98, "elapsed_time": "2:04:34", "remaining_time": "2:20:34", "throughput": 8672.79, "total_tokens": 64823144} +{"current_steps": 96160, "total_steps": 204665, "loss": 0.1014, "lr": 1.2761921833713082e-06, "epoch": 2.349204798084675, "percentage": 46.98, "elapsed_time": "2:04:34", "remaining_time": "2:20:34", "throughput": 8672.88, "total_tokens": 64826920} +{"current_steps": 96165, "total_steps": 204665, "loss": 0.0003, "lr": 1.2761102218591943e-06, "epoch": 2.3493269489165223, "percentage": 46.99, "elapsed_time": "2:04:35", "remaining_time": "2:20:33", "throughput": 8672.93, "total_tokens": 64830248} +{"current_steps": 96170, "total_steps": 204665, "loss": 0.0028, "lr": 1.2760282583391327e-06, "epoch": 2.3494490997483695, "percentage": 46.99, "elapsed_time": "2:04:35", "remaining_time": "2:20:33", "throughput": 8672.94, "total_tokens": 64833320} +{"current_steps": 96175, "total_steps": 204665, "loss": 0.1198, "lr": 1.275946292811719e-06, "epoch": 2.349571250580216, "percentage": 46.99, "elapsed_time": "2:04:35", "remaining_time": "2:20:32", "throughput": 8672.99, "total_tokens": 64836648} +{"current_steps": 96180, "total_steps": 204665, "loss": 0.1153, "lr": 1.2758643252775498e-06, "epoch": 2.349693401412064, "percentage": 46.99, "elapsed_time": "2:04:36", "remaining_time": "2:20:32", "throughput": 8673.01, "total_tokens": 64839784} +{"current_steps": 96185, "total_steps": 204665, "loss": 0.0293, "lr": 1.275782355737221e-06, "epoch": 2.3498155522439106, "percentage": 47.0, "elapsed_time": "2:04:36", "remaining_time": "2:20:32", "throughput": 8673.04, "total_tokens": 64842984} +{"current_steps": 96190, "total_steps": 204665, "loss": 0.0003, "lr": 1.275700384191329e-06, "epoch": 2.3499377030757578, "percentage": 47.0, "elapsed_time": "2:04:36", "remaining_time": "2:20:31", "throughput": 8673.07, "total_tokens": 64846184} +{"current_steps": 96195, "total_steps": 204665, "loss": 0.0002, "lr": 1.2756184106404693e-06, "epoch": 2.350059853907605, "percentage": 47.0, "elapsed_time": "2:04:37", "remaining_time": "2:20:31", "throughput": 8673.08, "total_tokens": 64849192} +{"current_steps": 96200, "total_steps": 204665, "loss": 0.0001, "lr": 1.2755364350852387e-06, "epoch": 2.350182004739452, "percentage": 47.0, "elapsed_time": "2:04:37", "remaining_time": "2:20:30", "throughput": 8673.13, "total_tokens": 64852584} +{"current_steps": 96205, "total_steps": 204665, "loss": 0.0001, "lr": 1.2754544575262327e-06, "epoch": 2.3503041555712993, "percentage": 47.01, "elapsed_time": "2:04:37", "remaining_time": "2:20:30", "throughput": 8673.36, "total_tokens": 64857768} +{"current_steps": 96210, "total_steps": 204665, "loss": 0.0503, "lr": 1.2753724779640483e-06, "epoch": 2.3504263064031465, "percentage": 47.01, "elapsed_time": "2:04:38", "remaining_time": "2:20:29", "throughput": 8673.42, "total_tokens": 64861224} +{"current_steps": 96215, "total_steps": 204665, "loss": 0.0816, "lr": 1.2752904963992807e-06, "epoch": 2.3505484572349937, "percentage": 47.01, "elapsed_time": "2:04:38", "remaining_time": "2:20:29", "throughput": 8673.46, "total_tokens": 64864552} +{"current_steps": 96220, "total_steps": 204665, "loss": 0.0001, "lr": 1.2752085128325267e-06, "epoch": 2.350670608066841, "percentage": 47.01, "elapsed_time": "2:04:38", "remaining_time": "2:20:29", "throughput": 8673.53, "total_tokens": 64868072} +{"current_steps": 96225, "total_steps": 204665, "loss": 0.0004, "lr": 1.2751265272643826e-06, "epoch": 2.350792758898688, "percentage": 47.02, "elapsed_time": "2:04:39", "remaining_time": "2:20:28", "throughput": 8673.54, "total_tokens": 64871144} +{"current_steps": 96230, "total_steps": 204665, "loss": 0.0466, "lr": 1.275044539695444e-06, "epoch": 2.3509149097305353, "percentage": 47.02, "elapsed_time": "2:04:39", "remaining_time": "2:20:28", "throughput": 8673.6, "total_tokens": 64874600} +{"current_steps": 96235, "total_steps": 204665, "loss": 0.0395, "lr": 1.2749625501263076e-06, "epoch": 2.3510370605623825, "percentage": 47.02, "elapsed_time": "2:04:39", "remaining_time": "2:20:27", "throughput": 8673.64, "total_tokens": 64877992} +{"current_steps": 96240, "total_steps": 204665, "loss": 0.0002, "lr": 1.2748805585575699e-06, "epoch": 2.3511592113942297, "percentage": 47.02, "elapsed_time": "2:04:40", "remaining_time": "2:20:27", "throughput": 8673.67, "total_tokens": 64881128} +{"current_steps": 96245, "total_steps": 204665, "loss": 0.0001, "lr": 1.2747985649898265e-06, "epoch": 2.351281362226077, "percentage": 47.03, "elapsed_time": "2:04:40", "remaining_time": "2:20:26", "throughput": 8673.67, "total_tokens": 64884136} +{"current_steps": 96250, "total_steps": 204665, "loss": 0.1235, "lr": 1.2747165694236741e-06, "epoch": 2.351403513057924, "percentage": 47.03, "elapsed_time": "2:04:40", "remaining_time": "2:20:26", "throughput": 8673.67, "total_tokens": 64887016} +{"current_steps": 96255, "total_steps": 204665, "loss": 0.0001, "lr": 1.274634571859709e-06, "epoch": 2.3515256638897712, "percentage": 47.03, "elapsed_time": "2:04:41", "remaining_time": "2:20:25", "throughput": 8673.72, "total_tokens": 64890472} +{"current_steps": 96260, "total_steps": 204665, "loss": 0.0003, "lr": 1.2745525722985276e-06, "epoch": 2.3516478147216184, "percentage": 47.03, "elapsed_time": "2:04:41", "remaining_time": "2:20:25", "throughput": 8673.72, "total_tokens": 64893352} +{"current_steps": 96265, "total_steps": 204665, "loss": 0.0322, "lr": 1.2744705707407259e-06, "epoch": 2.3517699655534656, "percentage": 47.04, "elapsed_time": "2:04:41", "remaining_time": "2:20:25", "throughput": 8673.73, "total_tokens": 64896360} +{"current_steps": 96270, "total_steps": 204665, "loss": 0.1283, "lr": 1.2743885671869003e-06, "epoch": 2.3518921163853124, "percentage": 47.04, "elapsed_time": "2:04:42", "remaining_time": "2:20:24", "throughput": 8673.78, "total_tokens": 64899752} +{"current_steps": 96275, "total_steps": 204665, "loss": 0.0737, "lr": 1.2743065616376472e-06, "epoch": 2.3520142672171596, "percentage": 47.04, "elapsed_time": "2:04:42", "remaining_time": "2:20:24", "throughput": 8673.83, "total_tokens": 64903208} +{"current_steps": 96280, "total_steps": 204665, "loss": 0.0397, "lr": 1.274224554093563e-06, "epoch": 2.3521364180490067, "percentage": 47.04, "elapsed_time": "2:04:42", "remaining_time": "2:20:23", "throughput": 8673.83, "total_tokens": 64906152} +{"current_steps": 96285, "total_steps": 204665, "loss": 0.0003, "lr": 1.2741425445552442e-06, "epoch": 2.352258568880854, "percentage": 47.05, "elapsed_time": "2:04:43", "remaining_time": "2:20:23", "throughput": 8673.87, "total_tokens": 64909480} +{"current_steps": 96290, "total_steps": 204665, "loss": 0.015, "lr": 1.274060533023287e-06, "epoch": 2.352380719712701, "percentage": 47.05, "elapsed_time": "2:04:43", "remaining_time": "2:20:22", "throughput": 8673.87, "total_tokens": 64912360} +{"current_steps": 96295, "total_steps": 204665, "loss": 0.1568, "lr": 1.273978519498288e-06, "epoch": 2.3525028705445483, "percentage": 47.05, "elapsed_time": "2:04:44", "remaining_time": "2:20:22", "throughput": 8673.91, "total_tokens": 64915688} +{"current_steps": 96300, "total_steps": 204665, "loss": 0.0002, "lr": 1.2738965039808433e-06, "epoch": 2.3526250213763955, "percentage": 47.05, "elapsed_time": "2:04:44", "remaining_time": "2:20:22", "throughput": 8673.92, "total_tokens": 64918696} +{"current_steps": 96305, "total_steps": 204665, "loss": 0.2386, "lr": 1.2738144864715498e-06, "epoch": 2.3527471722082427, "percentage": 47.05, "elapsed_time": "2:04:44", "remaining_time": "2:20:21", "throughput": 8673.95, "total_tokens": 64921896} +{"current_steps": 96310, "total_steps": 204665, "loss": 0.0751, "lr": 1.2737324669710036e-06, "epoch": 2.35286932304009, "percentage": 47.06, "elapsed_time": "2:04:45", "remaining_time": "2:20:21", "throughput": 8674.07, "total_tokens": 64925992} +{"current_steps": 96315, "total_steps": 204665, "loss": 0.0601, "lr": 1.2736504454798013e-06, "epoch": 2.352991473871937, "percentage": 47.06, "elapsed_time": "2:04:45", "remaining_time": "2:20:20", "throughput": 8674.09, "total_tokens": 64929064} +{"current_steps": 96320, "total_steps": 204665, "loss": 0.0003, "lr": 1.2735684219985395e-06, "epoch": 2.3531136247037843, "percentage": 47.06, "elapsed_time": "2:04:45", "remaining_time": "2:20:20", "throughput": 8674.16, "total_tokens": 64932648} +{"current_steps": 96325, "total_steps": 204665, "loss": 0.001, "lr": 1.2734863965278143e-06, "epoch": 2.3532357755356315, "percentage": 47.06, "elapsed_time": "2:04:46", "remaining_time": "2:20:19", "throughput": 8674.21, "total_tokens": 64936104} +{"current_steps": 96330, "total_steps": 204665, "loss": 0.0004, "lr": 1.2734043690682228e-06, "epoch": 2.3533579263674786, "percentage": 47.07, "elapsed_time": "2:04:46", "remaining_time": "2:20:19", "throughput": 8674.23, "total_tokens": 64939176} +{"current_steps": 96335, "total_steps": 204665, "loss": 0.0729, "lr": 1.2733223396203606e-06, "epoch": 2.353480077199326, "percentage": 47.07, "elapsed_time": "2:04:46", "remaining_time": "2:20:19", "throughput": 8674.29, "total_tokens": 64942696} +{"current_steps": 96340, "total_steps": 204665, "loss": 0.1607, "lr": 1.2732403081848254e-06, "epoch": 2.353602228031173, "percentage": 47.07, "elapsed_time": "2:04:47", "remaining_time": "2:20:18", "throughput": 8674.34, "total_tokens": 64946088} +{"current_steps": 96345, "total_steps": 204665, "loss": 0.0001, "lr": 1.273158274762213e-06, "epoch": 2.35372437886302, "percentage": 47.07, "elapsed_time": "2:04:47", "remaining_time": "2:20:18", "throughput": 8674.41, "total_tokens": 64949736} +{"current_steps": 96350, "total_steps": 204665, "loss": 0.001, "lr": 1.27307623935312e-06, "epoch": 2.3538465296948674, "percentage": 47.08, "elapsed_time": "2:04:47", "remaining_time": "2:20:17", "throughput": 8674.43, "total_tokens": 64952872} +{"current_steps": 96355, "total_steps": 204665, "loss": 0.0491, "lr": 1.2729942019581433e-06, "epoch": 2.353968680526714, "percentage": 47.08, "elapsed_time": "2:04:48", "remaining_time": "2:20:17", "throughput": 8674.57, "total_tokens": 64957096} +{"current_steps": 96360, "total_steps": 204665, "loss": 0.0009, "lr": 1.2729121625778793e-06, "epoch": 2.354090831358562, "percentage": 47.08, "elapsed_time": "2:04:48", "remaining_time": "2:20:16", "throughput": 8674.65, "total_tokens": 64960744} +{"current_steps": 96365, "total_steps": 204665, "loss": 0.0507, "lr": 1.2728301212129246e-06, "epoch": 2.3542129821904085, "percentage": 47.08, "elapsed_time": "2:04:48", "remaining_time": "2:20:16", "throughput": 8674.75, "total_tokens": 64964584} +{"current_steps": 96370, "total_steps": 204665, "loss": 0.0248, "lr": 1.272748077863876e-06, "epoch": 2.3543351330222557, "percentage": 47.09, "elapsed_time": "2:04:49", "remaining_time": "2:20:16", "throughput": 8674.82, "total_tokens": 64968232} +{"current_steps": 96375, "total_steps": 204665, "loss": 0.0002, "lr": 1.27266603253133e-06, "epoch": 2.354457283854103, "percentage": 47.09, "elapsed_time": "2:04:49", "remaining_time": "2:20:15", "throughput": 8674.85, "total_tokens": 64971496} +{"current_steps": 96380, "total_steps": 204665, "loss": 0.0775, "lr": 1.272583985215883e-06, "epoch": 2.35457943468595, "percentage": 47.09, "elapsed_time": "2:04:49", "remaining_time": "2:20:15", "throughput": 8674.9, "total_tokens": 64974824} +{"current_steps": 96385, "total_steps": 204665, "loss": 0.0183, "lr": 1.2725019359181323e-06, "epoch": 2.3547015855177973, "percentage": 47.09, "elapsed_time": "2:04:50", "remaining_time": "2:20:14", "throughput": 8674.93, "total_tokens": 64978088} +{"current_steps": 96390, "total_steps": 204665, "loss": 0.1264, "lr": 1.2724198846386743e-06, "epoch": 2.3548237363496445, "percentage": 47.1, "elapsed_time": "2:04:50", "remaining_time": "2:20:14", "throughput": 8675.0, "total_tokens": 64981608} +{"current_steps": 96395, "total_steps": 204665, "loss": 0.0001, "lr": 1.2723378313781053e-06, "epoch": 2.3549458871814917, "percentage": 47.1, "elapsed_time": "2:04:51", "remaining_time": "2:20:13", "throughput": 8675.01, "total_tokens": 64984744} +{"current_steps": 96400, "total_steps": 204665, "loss": 0.0253, "lr": 1.2722557761370224e-06, "epoch": 2.355068038013339, "percentage": 47.1, "elapsed_time": "2:04:51", "remaining_time": "2:20:13", "throughput": 8675.09, "total_tokens": 64988456} +{"current_steps": 96405, "total_steps": 204665, "loss": 0.0001, "lr": 1.2721737189160221e-06, "epoch": 2.355190188845186, "percentage": 47.1, "elapsed_time": "2:04:51", "remaining_time": "2:20:12", "throughput": 8675.13, "total_tokens": 64991784} +{"current_steps": 96410, "total_steps": 204665, "loss": 0.0002, "lr": 1.2720916597157017e-06, "epoch": 2.3553123396770332, "percentage": 47.11, "elapsed_time": "2:04:52", "remaining_time": "2:20:12", "throughput": 8675.16, "total_tokens": 64994920} +{"current_steps": 96415, "total_steps": 204665, "loss": 0.0005, "lr": 1.2720095985366578e-06, "epoch": 2.3554344905088804, "percentage": 47.11, "elapsed_time": "2:04:52", "remaining_time": "2:20:12", "throughput": 8675.21, "total_tokens": 64998376} +{"current_steps": 96420, "total_steps": 204665, "loss": 0.0002, "lr": 1.2719275353794863e-06, "epoch": 2.3555566413407276, "percentage": 47.11, "elapsed_time": "2:04:52", "remaining_time": "2:20:11", "throughput": 8675.22, "total_tokens": 65001448} +{"current_steps": 96425, "total_steps": 204665, "loss": 0.0178, "lr": 1.271845470244785e-06, "epoch": 2.355678792172575, "percentage": 47.11, "elapsed_time": "2:04:53", "remaining_time": "2:20:11", "throughput": 8675.48, "total_tokens": 65006824} +{"current_steps": 96430, "total_steps": 204665, "loss": 0.0458, "lr": 1.27176340313315e-06, "epoch": 2.355800943004422, "percentage": 47.12, "elapsed_time": "2:04:53", "remaining_time": "2:20:10", "throughput": 8675.51, "total_tokens": 65010024} +{"current_steps": 96435, "total_steps": 204665, "loss": 0.0004, "lr": 1.2716813340451787e-06, "epoch": 2.355923093836269, "percentage": 47.12, "elapsed_time": "2:04:53", "remaining_time": "2:20:10", "throughput": 8675.54, "total_tokens": 65013288} +{"current_steps": 96440, "total_steps": 204665, "loss": 0.0004, "lr": 1.2715992629814673e-06, "epoch": 2.3560452446681164, "percentage": 47.12, "elapsed_time": "2:04:54", "remaining_time": "2:20:10", "throughput": 8675.59, "total_tokens": 65016680} +{"current_steps": 96445, "total_steps": 204665, "loss": 0.0003, "lr": 1.2715171899426134e-06, "epoch": 2.3561673954999636, "percentage": 47.12, "elapsed_time": "2:04:54", "remaining_time": "2:20:09", "throughput": 8675.59, "total_tokens": 65019560} +{"current_steps": 96450, "total_steps": 204665, "loss": 0.0581, "lr": 1.2714351149292135e-06, "epoch": 2.3562895463318103, "percentage": 47.13, "elapsed_time": "2:04:54", "remaining_time": "2:20:09", "throughput": 8675.62, "total_tokens": 65022888} +{"current_steps": 96455, "total_steps": 204665, "loss": 0.104, "lr": 1.2713530379418642e-06, "epoch": 2.3564116971636575, "percentage": 47.13, "elapsed_time": "2:04:55", "remaining_time": "2:20:08", "throughput": 8675.62, "total_tokens": 65025768} +{"current_steps": 96460, "total_steps": 204665, "loss": 0.0005, "lr": 1.2712709589811628e-06, "epoch": 2.3565338479955047, "percentage": 47.13, "elapsed_time": "2:04:55", "remaining_time": "2:20:08", "throughput": 8675.72, "total_tokens": 65029672} +{"current_steps": 96465, "total_steps": 204665, "loss": 0.0004, "lr": 1.271188878047706e-06, "epoch": 2.356655998827352, "percentage": 47.13, "elapsed_time": "2:04:55", "remaining_time": "2:20:07", "throughput": 8675.76, "total_tokens": 65032936} +{"current_steps": 96470, "total_steps": 204665, "loss": 0.0393, "lr": 1.2711067951420906e-06, "epoch": 2.356778149659199, "percentage": 47.14, "elapsed_time": "2:04:56", "remaining_time": "2:20:07", "throughput": 8675.77, "total_tokens": 65035944} +{"current_steps": 96475, "total_steps": 204665, "loss": 0.0338, "lr": 1.2710247102649138e-06, "epoch": 2.3569003004910463, "percentage": 47.14, "elapsed_time": "2:04:56", "remaining_time": "2:20:06", "throughput": 8675.79, "total_tokens": 65039080} +{"current_steps": 96480, "total_steps": 204665, "loss": 0.0011, "lr": 1.2709426234167723e-06, "epoch": 2.3570224513228935, "percentage": 47.14, "elapsed_time": "2:04:56", "remaining_time": "2:20:06", "throughput": 8675.84, "total_tokens": 65042536} +{"current_steps": 96485, "total_steps": 204665, "loss": 0.129, "lr": 1.2708605345982634e-06, "epoch": 2.3571446021547406, "percentage": 47.14, "elapsed_time": "2:04:57", "remaining_time": "2:20:06", "throughput": 8675.91, "total_tokens": 65046056} +{"current_steps": 96490, "total_steps": 204665, "loss": 0.0008, "lr": 1.2707784438099833e-06, "epoch": 2.357266752986588, "percentage": 47.15, "elapsed_time": "2:04:57", "remaining_time": "2:20:05", "throughput": 8675.96, "total_tokens": 65049512} +{"current_steps": 96495, "total_steps": 204665, "loss": 0.0002, "lr": 1.27069635105253e-06, "epoch": 2.357388903818435, "percentage": 47.15, "elapsed_time": "2:04:58", "remaining_time": "2:20:05", "throughput": 8676.02, "total_tokens": 65052968} +{"current_steps": 96500, "total_steps": 204665, "loss": 0.0453, "lr": 1.2706142563264999e-06, "epoch": 2.357511054650282, "percentage": 47.15, "elapsed_time": "2:04:58", "remaining_time": "2:20:04", "throughput": 8676.02, "total_tokens": 65055976} +{"current_steps": 96505, "total_steps": 204665, "loss": 0.0924, "lr": 1.2705321596324901e-06, "epoch": 2.3576332054821294, "percentage": 47.15, "elapsed_time": "2:04:58", "remaining_time": "2:20:04", "throughput": 8676.08, "total_tokens": 65059432} +{"current_steps": 96510, "total_steps": 204665, "loss": 0.0001, "lr": 1.2704500609710977e-06, "epoch": 2.3577553563139766, "percentage": 47.16, "elapsed_time": "2:04:59", "remaining_time": "2:20:03", "throughput": 8676.12, "total_tokens": 65062760} +{"current_steps": 96515, "total_steps": 204665, "loss": 0.145, "lr": 1.2703679603429198e-06, "epoch": 2.357877507145824, "percentage": 47.16, "elapsed_time": "2:04:59", "remaining_time": "2:20:03", "throughput": 8676.16, "total_tokens": 65066088} +{"current_steps": 96520, "total_steps": 204665, "loss": 0.0889, "lr": 1.2702858577485533e-06, "epoch": 2.357999657977671, "percentage": 47.16, "elapsed_time": "2:04:59", "remaining_time": "2:20:03", "throughput": 8676.19, "total_tokens": 65069352} +{"current_steps": 96525, "total_steps": 204665, "loss": 0.0003, "lr": 1.2702037531885954e-06, "epoch": 2.358121808809518, "percentage": 47.16, "elapsed_time": "2:05:00", "remaining_time": "2:20:02", "throughput": 8676.26, "total_tokens": 65072936} +{"current_steps": 96530, "total_steps": 204665, "loss": 0.0004, "lr": 1.270121646663643e-06, "epoch": 2.3582439596413654, "percentage": 47.16, "elapsed_time": "2:05:00", "remaining_time": "2:20:02", "throughput": 8676.3, "total_tokens": 65076200} +{"current_steps": 96535, "total_steps": 204665, "loss": 0.0002, "lr": 1.2700395381742937e-06, "epoch": 2.358366110473212, "percentage": 47.17, "elapsed_time": "2:05:00", "remaining_time": "2:20:01", "throughput": 8676.32, "total_tokens": 65079400} +{"current_steps": 96540, "total_steps": 204665, "loss": 0.0814, "lr": 1.269957427721144e-06, "epoch": 2.3584882613050597, "percentage": 47.17, "elapsed_time": "2:05:01", "remaining_time": "2:20:01", "throughput": 8676.34, "total_tokens": 65082536} +{"current_steps": 96545, "total_steps": 204665, "loss": 0.1191, "lr": 1.2698753153047913e-06, "epoch": 2.3586104121369065, "percentage": 47.17, "elapsed_time": "2:05:01", "remaining_time": "2:20:00", "throughput": 8676.39, "total_tokens": 65085928} +{"current_steps": 96550, "total_steps": 204665, "loss": 0.0002, "lr": 1.2697932009258324e-06, "epoch": 2.3587325629687537, "percentage": 47.17, "elapsed_time": "2:05:01", "remaining_time": "2:20:00", "throughput": 8676.44, "total_tokens": 65089320} +{"current_steps": 96555, "total_steps": 204665, "loss": 0.0001, "lr": 1.269711084584865e-06, "epoch": 2.358854713800601, "percentage": 47.18, "elapsed_time": "2:05:02", "remaining_time": "2:19:59", "throughput": 8676.43, "total_tokens": 65092136} +{"current_steps": 96560, "total_steps": 204665, "loss": 0.0002, "lr": 1.2696289662824863e-06, "epoch": 2.358976864632448, "percentage": 47.18, "elapsed_time": "2:05:02", "remaining_time": "2:19:59", "throughput": 8676.48, "total_tokens": 65095528} +{"current_steps": 96565, "total_steps": 204665, "loss": 0.0002, "lr": 1.2695468460192928e-06, "epoch": 2.3590990154642952, "percentage": 47.18, "elapsed_time": "2:05:02", "remaining_time": "2:19:59", "throughput": 8676.47, "total_tokens": 65098472} +{"current_steps": 96570, "total_steps": 204665, "loss": 0.0438, "lr": 1.2694647237958827e-06, "epoch": 2.3592211662961424, "percentage": 47.18, "elapsed_time": "2:05:03", "remaining_time": "2:19:58", "throughput": 8676.53, "total_tokens": 65101992} +{"current_steps": 96575, "total_steps": 204665, "loss": 0.0005, "lr": 1.2693825996128524e-06, "epoch": 2.3593433171279896, "percentage": 47.19, "elapsed_time": "2:05:03", "remaining_time": "2:19:58", "throughput": 8676.6, "total_tokens": 65105576} +{"current_steps": 96580, "total_steps": 204665, "loss": 0.0029, "lr": 1.2693004734707993e-06, "epoch": 2.359465467959837, "percentage": 47.19, "elapsed_time": "2:05:03", "remaining_time": "2:19:57", "throughput": 8676.65, "total_tokens": 65108968} +{"current_steps": 96585, "total_steps": 204665, "loss": 0.0005, "lr": 1.2692183453703205e-06, "epoch": 2.359587618791684, "percentage": 47.19, "elapsed_time": "2:05:04", "remaining_time": "2:19:57", "throughput": 8676.65, "total_tokens": 65111976} +{"current_steps": 96590, "total_steps": 204665, "loss": 0.0001, "lr": 1.2691362153120135e-06, "epoch": 2.359709769623531, "percentage": 47.19, "elapsed_time": "2:05:04", "remaining_time": "2:19:56", "throughput": 8676.72, "total_tokens": 65115560} +{"current_steps": 96595, "total_steps": 204665, "loss": 0.0001, "lr": 1.269054083296476e-06, "epoch": 2.3598319204553784, "percentage": 47.2, "elapsed_time": "2:05:04", "remaining_time": "2:19:56", "throughput": 8676.76, "total_tokens": 65118888} +{"current_steps": 96600, "total_steps": 204665, "loss": 0.0454, "lr": 1.2689719493243046e-06, "epoch": 2.3599540712872256, "percentage": 47.2, "elapsed_time": "2:05:05", "remaining_time": "2:19:56", "throughput": 8676.84, "total_tokens": 65122600} +{"current_steps": 96605, "total_steps": 204665, "loss": 0.0003, "lr": 1.2688898133960968e-06, "epoch": 2.3600762221190728, "percentage": 47.2, "elapsed_time": "2:05:05", "remaining_time": "2:19:55", "throughput": 8676.92, "total_tokens": 65126248} +{"current_steps": 96610, "total_steps": 204665, "loss": 0.1508, "lr": 1.2688076755124499e-06, "epoch": 2.36019837295092, "percentage": 47.2, "elapsed_time": "2:05:06", "remaining_time": "2:19:55", "throughput": 8676.97, "total_tokens": 65129640} +{"current_steps": 96615, "total_steps": 204665, "loss": 0.0001, "lr": 1.2687255356739615e-06, "epoch": 2.360320523782767, "percentage": 47.21, "elapsed_time": "2:05:06", "remaining_time": "2:19:54", "throughput": 8676.99, "total_tokens": 65132712} +{"current_steps": 96620, "total_steps": 204665, "loss": 0.0591, "lr": 1.2686433938812287e-06, "epoch": 2.360442674614614, "percentage": 47.21, "elapsed_time": "2:05:06", "remaining_time": "2:19:54", "throughput": 8677.04, "total_tokens": 65136104} +{"current_steps": 96625, "total_steps": 204665, "loss": 0.0002, "lr": 1.2685612501348486e-06, "epoch": 2.3605648254464615, "percentage": 47.21, "elapsed_time": "2:05:07", "remaining_time": "2:19:53", "throughput": 8677.1, "total_tokens": 65139624} +{"current_steps": 96630, "total_steps": 204665, "loss": 0.0002, "lr": 1.268479104435419e-06, "epoch": 2.3606869762783083, "percentage": 47.21, "elapsed_time": "2:05:07", "remaining_time": "2:19:53", "throughput": 8677.17, "total_tokens": 65143208} +{"current_steps": 96635, "total_steps": 204665, "loss": 0.0001, "lr": 1.2683969567835372e-06, "epoch": 2.3608091271101554, "percentage": 47.22, "elapsed_time": "2:05:07", "remaining_time": "2:19:53", "throughput": 8677.21, "total_tokens": 65146536} +{"current_steps": 96640, "total_steps": 204665, "loss": 0.0007, "lr": 1.2683148071798006e-06, "epoch": 2.3609312779420026, "percentage": 47.22, "elapsed_time": "2:05:08", "remaining_time": "2:19:52", "throughput": 8677.25, "total_tokens": 65149928} +{"current_steps": 96645, "total_steps": 204665, "loss": 0.0001, "lr": 1.2682326556248066e-06, "epoch": 2.36105342877385, "percentage": 47.22, "elapsed_time": "2:05:08", "remaining_time": "2:19:52", "throughput": 8677.3, "total_tokens": 65153256} +{"current_steps": 96650, "total_steps": 204665, "loss": 0.0002, "lr": 1.2681505021191523e-06, "epoch": 2.361175579605697, "percentage": 47.22, "elapsed_time": "2:05:08", "remaining_time": "2:19:51", "throughput": 8677.31, "total_tokens": 65156328} +{"current_steps": 96655, "total_steps": 204665, "loss": 0.0003, "lr": 1.2680683466634355e-06, "epoch": 2.361297730437544, "percentage": 47.23, "elapsed_time": "2:05:09", "remaining_time": "2:19:51", "throughput": 8677.34, "total_tokens": 65159528} +{"current_steps": 96660, "total_steps": 204665, "loss": 0.1168, "lr": 1.2679861892582535e-06, "epoch": 2.3614198812693914, "percentage": 47.23, "elapsed_time": "2:05:09", "remaining_time": "2:19:50", "throughput": 8677.41, "total_tokens": 65163176} +{"current_steps": 96665, "total_steps": 204665, "loss": 0.0502, "lr": 1.2679040299042041e-06, "epoch": 2.3615420321012386, "percentage": 47.23, "elapsed_time": "2:05:09", "remaining_time": "2:19:50", "throughput": 8677.45, "total_tokens": 65166504} +{"current_steps": 96670, "total_steps": 204665, "loss": 0.0626, "lr": 1.2678218686018848e-06, "epoch": 2.3616641829330858, "percentage": 47.23, "elapsed_time": "2:05:10", "remaining_time": "2:19:50", "throughput": 8677.48, "total_tokens": 65169704} +{"current_steps": 96675, "total_steps": 204665, "loss": 0.0961, "lr": 1.267739705351892e-06, "epoch": 2.361786333764933, "percentage": 47.24, "elapsed_time": "2:05:10", "remaining_time": "2:19:49", "throughput": 8677.58, "total_tokens": 65173608} +{"current_steps": 96680, "total_steps": 204665, "loss": 0.0542, "lr": 1.2676575401548248e-06, "epoch": 2.36190848459678, "percentage": 47.24, "elapsed_time": "2:05:10", "remaining_time": "2:19:49", "throughput": 8677.6, "total_tokens": 65176680} +{"current_steps": 96685, "total_steps": 204665, "loss": 0.0672, "lr": 1.2675753730112798e-06, "epoch": 2.3620306354286273, "percentage": 47.24, "elapsed_time": "2:05:11", "remaining_time": "2:19:48", "throughput": 8677.6, "total_tokens": 65179688} +{"current_steps": 96690, "total_steps": 204665, "loss": 0.0005, "lr": 1.2674932039218545e-06, "epoch": 2.3621527862604745, "percentage": 47.24, "elapsed_time": "2:05:11", "remaining_time": "2:19:48", "throughput": 8677.67, "total_tokens": 65183208} +{"current_steps": 96695, "total_steps": 204665, "loss": 0.0004, "lr": 1.2674110328871469e-06, "epoch": 2.3622749370923217, "percentage": 47.25, "elapsed_time": "2:05:11", "remaining_time": "2:19:47", "throughput": 8677.71, "total_tokens": 65186536} +{"current_steps": 96700, "total_steps": 204665, "loss": 0.1285, "lr": 1.2673288599077543e-06, "epoch": 2.362397087924169, "percentage": 47.25, "elapsed_time": "2:05:12", "remaining_time": "2:19:47", "throughput": 8677.76, "total_tokens": 65189992} +{"current_steps": 96705, "total_steps": 204665, "loss": 0.1172, "lr": 1.2672466849842742e-06, "epoch": 2.362519238756016, "percentage": 47.25, "elapsed_time": "2:05:12", "remaining_time": "2:19:47", "throughput": 8677.86, "total_tokens": 65193832} +{"current_steps": 96710, "total_steps": 204665, "loss": 0.0933, "lr": 1.2671645081173044e-06, "epoch": 2.3626413895878633, "percentage": 47.25, "elapsed_time": "2:05:13", "remaining_time": "2:19:46", "throughput": 8677.91, "total_tokens": 65197224} +{"current_steps": 96715, "total_steps": 204665, "loss": 0.0002, "lr": 1.2670823293074423e-06, "epoch": 2.36276354041971, "percentage": 47.26, "elapsed_time": "2:05:13", "remaining_time": "2:19:46", "throughput": 8677.98, "total_tokens": 65200872} +{"current_steps": 96720, "total_steps": 204665, "loss": 0.004, "lr": 1.2670001485552858e-06, "epoch": 2.3628856912515572, "percentage": 47.26, "elapsed_time": "2:05:13", "remaining_time": "2:19:45", "throughput": 8678.0, "total_tokens": 65204008} +{"current_steps": 96725, "total_steps": 204665, "loss": 0.0002, "lr": 1.2669179658614327e-06, "epoch": 2.3630078420834044, "percentage": 47.26, "elapsed_time": "2:05:14", "remaining_time": "2:19:45", "throughput": 8678.06, "total_tokens": 65207464} +{"current_steps": 96730, "total_steps": 204665, "loss": 0.0003, "lr": 1.26683578122648e-06, "epoch": 2.3631299929152516, "percentage": 47.26, "elapsed_time": "2:05:14", "remaining_time": "2:19:44", "throughput": 8678.05, "total_tokens": 65210344} +{"current_steps": 96735, "total_steps": 204665, "loss": 0.0606, "lr": 1.2667535946510258e-06, "epoch": 2.363252143747099, "percentage": 47.27, "elapsed_time": "2:05:14", "remaining_time": "2:19:44", "throughput": 8678.13, "total_tokens": 65214056} +{"current_steps": 96740, "total_steps": 204665, "loss": 0.0378, "lr": 1.2666714061356675e-06, "epoch": 2.363374294578946, "percentage": 47.27, "elapsed_time": "2:05:15", "remaining_time": "2:19:43", "throughput": 8678.12, "total_tokens": 65216936} +{"current_steps": 96745, "total_steps": 204665, "loss": 0.0013, "lr": 1.2665892156810035e-06, "epoch": 2.363496445410793, "percentage": 47.27, "elapsed_time": "2:05:15", "remaining_time": "2:19:43", "throughput": 8678.11, "total_tokens": 65219816} +{"current_steps": 96750, "total_steps": 204665, "loss": 0.0414, "lr": 1.2665070232876304e-06, "epoch": 2.3636185962426404, "percentage": 47.27, "elapsed_time": "2:05:15", "remaining_time": "2:19:43", "throughput": 8678.15, "total_tokens": 65223080} +{"current_steps": 96755, "total_steps": 204665, "loss": 0.0002, "lr": 1.266424828956147e-06, "epoch": 2.3637407470744876, "percentage": 47.27, "elapsed_time": "2:05:16", "remaining_time": "2:19:42", "throughput": 8678.2, "total_tokens": 65226472} +{"current_steps": 96760, "total_steps": 204665, "loss": 0.0013, "lr": 1.2663426326871505e-06, "epoch": 2.3638628979063347, "percentage": 47.28, "elapsed_time": "2:05:16", "remaining_time": "2:19:42", "throughput": 8678.21, "total_tokens": 65229544} +{"current_steps": 96765, "total_steps": 204665, "loss": 0.0692, "lr": 1.2662604344812387e-06, "epoch": 2.363985048738182, "percentage": 47.28, "elapsed_time": "2:05:16", "remaining_time": "2:19:41", "throughput": 8678.28, "total_tokens": 65233128} +{"current_steps": 96770, "total_steps": 204665, "loss": 0.0002, "lr": 1.2661782343390096e-06, "epoch": 2.364107199570029, "percentage": 47.28, "elapsed_time": "2:05:17", "remaining_time": "2:19:41", "throughput": 8678.32, "total_tokens": 65236456} +{"current_steps": 96775, "total_steps": 204665, "loss": 0.0439, "lr": 1.2660960322610605e-06, "epoch": 2.3642293504018763, "percentage": 47.28, "elapsed_time": "2:05:17", "remaining_time": "2:19:40", "throughput": 8678.37, "total_tokens": 65239848} +{"current_steps": 96780, "total_steps": 204665, "loss": 0.0145, "lr": 1.2660138282479894e-06, "epoch": 2.3643515012337235, "percentage": 47.29, "elapsed_time": "2:05:17", "remaining_time": "2:19:40", "throughput": 8678.4, "total_tokens": 65243176} +{"current_steps": 96785, "total_steps": 204665, "loss": 0.0001, "lr": 1.2659316223003945e-06, "epoch": 2.3644736520655707, "percentage": 47.29, "elapsed_time": "2:05:18", "remaining_time": "2:19:40", "throughput": 8678.54, "total_tokens": 65247336} +{"current_steps": 96790, "total_steps": 204665, "loss": 0.0001, "lr": 1.2658494144188732e-06, "epoch": 2.364595802897418, "percentage": 47.29, "elapsed_time": "2:05:18", "remaining_time": "2:19:39", "throughput": 8678.55, "total_tokens": 65250408} +{"current_steps": 96795, "total_steps": 204665, "loss": 0.0001, "lr": 1.2657672046040235e-06, "epoch": 2.364717953729265, "percentage": 47.29, "elapsed_time": "2:05:18", "remaining_time": "2:19:39", "throughput": 8678.58, "total_tokens": 65253608} +{"current_steps": 96800, "total_steps": 204665, "loss": 0.1215, "lr": 1.265684992856443e-06, "epoch": 2.364840104561112, "percentage": 47.3, "elapsed_time": "2:05:19", "remaining_time": "2:19:38", "throughput": 8678.62, "total_tokens": 65256936} +{"current_steps": 96805, "total_steps": 204665, "loss": 0.038, "lr": 1.2656027791767299e-06, "epoch": 2.3649622553929595, "percentage": 47.3, "elapsed_time": "2:05:19", "remaining_time": "2:19:38", "throughput": 8678.69, "total_tokens": 65260520} +{"current_steps": 96810, "total_steps": 204665, "loss": 0.0, "lr": 1.2655205635654819e-06, "epoch": 2.365084406224806, "percentage": 47.3, "elapsed_time": "2:05:19", "remaining_time": "2:19:37", "throughput": 8678.74, "total_tokens": 65263976} +{"current_steps": 96815, "total_steps": 204665, "loss": 0.0403, "lr": 1.2654383460232972e-06, "epoch": 2.3652065570566534, "percentage": 47.3, "elapsed_time": "2:05:20", "remaining_time": "2:19:37", "throughput": 8678.77, "total_tokens": 65267176} +{"current_steps": 96820, "total_steps": 204665, "loss": 0.0907, "lr": 1.265356126550773e-06, "epoch": 2.3653287078885006, "percentage": 47.31, "elapsed_time": "2:05:20", "remaining_time": "2:19:37", "throughput": 8678.81, "total_tokens": 65270504} +{"current_steps": 96825, "total_steps": 204665, "loss": 0.0002, "lr": 1.2652739051485083e-06, "epoch": 2.3654508587203478, "percentage": 47.31, "elapsed_time": "2:05:21", "remaining_time": "2:19:36", "throughput": 8678.83, "total_tokens": 65273640} +{"current_steps": 96830, "total_steps": 204665, "loss": 0.0002, "lr": 1.2651916818170998e-06, "epoch": 2.365573009552195, "percentage": 47.31, "elapsed_time": "2:05:21", "remaining_time": "2:19:36", "throughput": 8678.86, "total_tokens": 65276840} +{"current_steps": 96835, "total_steps": 204665, "loss": 0.0513, "lr": 1.2651094565571465e-06, "epoch": 2.365695160384042, "percentage": 47.31, "elapsed_time": "2:05:21", "remaining_time": "2:19:35", "throughput": 8678.89, "total_tokens": 65280040} +{"current_steps": 96840, "total_steps": 204665, "loss": 0.049, "lr": 1.2650272293692457e-06, "epoch": 2.3658173112158893, "percentage": 47.32, "elapsed_time": "2:05:22", "remaining_time": "2:19:35", "throughput": 8678.92, "total_tokens": 65283304} +{"current_steps": 96845, "total_steps": 204665, "loss": 0.044, "lr": 1.2649450002539957e-06, "epoch": 2.3659394620477365, "percentage": 47.32, "elapsed_time": "2:05:22", "remaining_time": "2:19:34", "throughput": 8678.98, "total_tokens": 65286824} +{"current_steps": 96850, "total_steps": 204665, "loss": 0.0003, "lr": 1.2648627692119942e-06, "epoch": 2.3660616128795837, "percentage": 47.32, "elapsed_time": "2:05:22", "remaining_time": "2:19:34", "throughput": 8679.01, "total_tokens": 65290024} +{"current_steps": 96855, "total_steps": 204665, "loss": 0.1146, "lr": 1.2647805362438395e-06, "epoch": 2.366183763711431, "percentage": 47.32, "elapsed_time": "2:05:23", "remaining_time": "2:19:34", "throughput": 8679.02, "total_tokens": 65293096} +{"current_steps": 96860, "total_steps": 204665, "loss": 0.0003, "lr": 1.2646983013501298e-06, "epoch": 2.366305914543278, "percentage": 47.33, "elapsed_time": "2:05:23", "remaining_time": "2:19:33", "throughput": 8679.1, "total_tokens": 65296744} +{"current_steps": 96865, "total_steps": 204665, "loss": 0.0361, "lr": 1.2646160645314623e-06, "epoch": 2.3664280653751253, "percentage": 47.33, "elapsed_time": "2:05:23", "remaining_time": "2:19:33", "throughput": 8679.11, "total_tokens": 65299816} +{"current_steps": 96870, "total_steps": 204665, "loss": 0.0456, "lr": 1.264533825788436e-06, "epoch": 2.3665502162069725, "percentage": 47.33, "elapsed_time": "2:05:24", "remaining_time": "2:19:32", "throughput": 8679.13, "total_tokens": 65302952} +{"current_steps": 96875, "total_steps": 204665, "loss": 0.08, "lr": 1.2644515851216487e-06, "epoch": 2.3666723670388197, "percentage": 47.33, "elapsed_time": "2:05:24", "remaining_time": "2:19:32", "throughput": 8679.21, "total_tokens": 65306664} +{"current_steps": 96880, "total_steps": 204665, "loss": 0.0327, "lr": 1.2643693425316981e-06, "epoch": 2.366794517870667, "percentage": 47.34, "elapsed_time": "2:05:24", "remaining_time": "2:19:31", "throughput": 8679.23, "total_tokens": 65309736} +{"current_steps": 96885, "total_steps": 204665, "loss": 0.0004, "lr": 1.2642870980191827e-06, "epoch": 2.366916668702514, "percentage": 47.34, "elapsed_time": "2:05:25", "remaining_time": "2:19:31", "throughput": 8679.24, "total_tokens": 65312744} +{"current_steps": 96890, "total_steps": 204665, "loss": 0.0504, "lr": 1.2642048515847003e-06, "epoch": 2.3670388195343612, "percentage": 47.34, "elapsed_time": "2:05:25", "remaining_time": "2:19:30", "throughput": 8679.24, "total_tokens": 65315752} +{"current_steps": 96895, "total_steps": 204665, "loss": 0.0005, "lr": 1.264122603228849e-06, "epoch": 2.367160970366208, "percentage": 47.34, "elapsed_time": "2:05:25", "remaining_time": "2:19:30", "throughput": 8679.33, "total_tokens": 65319528} +{"current_steps": 96900, "total_steps": 204665, "loss": 0.0331, "lr": 1.2640403529522272e-06, "epoch": 2.367283121198055, "percentage": 47.35, "elapsed_time": "2:05:26", "remaining_time": "2:19:30", "throughput": 8679.34, "total_tokens": 65322536} +{"current_steps": 96905, "total_steps": 204665, "loss": 0.0002, "lr": 1.263958100755433e-06, "epoch": 2.3674052720299024, "percentage": 47.35, "elapsed_time": "2:05:26", "remaining_time": "2:19:29", "throughput": 8679.32, "total_tokens": 65325352} +{"current_steps": 96910, "total_steps": 204665, "loss": 0.0001, "lr": 1.2638758466390647e-06, "epoch": 2.3675274228617496, "percentage": 47.35, "elapsed_time": "2:05:26", "remaining_time": "2:19:29", "throughput": 8679.38, "total_tokens": 65328808} +{"current_steps": 96915, "total_steps": 204665, "loss": 0.0001, "lr": 1.2637935906037199e-06, "epoch": 2.3676495736935967, "percentage": 47.35, "elapsed_time": "2:05:27", "remaining_time": "2:19:28", "throughput": 8679.43, "total_tokens": 65332264} +{"current_steps": 96920, "total_steps": 204665, "loss": 0.0002, "lr": 1.2637113326499973e-06, "epoch": 2.367771724525444, "percentage": 47.36, "elapsed_time": "2:05:27", "remaining_time": "2:19:28", "throughput": 8679.47, "total_tokens": 65335656} +{"current_steps": 96925, "total_steps": 204665, "loss": 0.0001, "lr": 1.2636290727784951e-06, "epoch": 2.367893875357291, "percentage": 47.36, "elapsed_time": "2:05:27", "remaining_time": "2:19:27", "throughput": 8679.49, "total_tokens": 65338792} +{"current_steps": 96930, "total_steps": 204665, "loss": 0.0439, "lr": 1.2635468109898112e-06, "epoch": 2.3680160261891383, "percentage": 47.36, "elapsed_time": "2:05:28", "remaining_time": "2:19:27", "throughput": 8679.54, "total_tokens": 65342184} +{"current_steps": 96935, "total_steps": 204665, "loss": 0.0003, "lr": 1.263464547284544e-06, "epoch": 2.3681381770209855, "percentage": 47.36, "elapsed_time": "2:05:28", "remaining_time": "2:19:27", "throughput": 8679.54, "total_tokens": 65345064} +{"current_steps": 96940, "total_steps": 204665, "loss": 0.0641, "lr": 1.263382281663292e-06, "epoch": 2.3682603278528327, "percentage": 47.37, "elapsed_time": "2:05:28", "remaining_time": "2:19:26", "throughput": 8679.55, "total_tokens": 65348136} +{"current_steps": 96945, "total_steps": 204665, "loss": 0.0008, "lr": 1.263300014126653e-06, "epoch": 2.36838247868468, "percentage": 47.37, "elapsed_time": "2:05:29", "remaining_time": "2:19:26", "throughput": 8679.59, "total_tokens": 65351464} +{"current_steps": 96950, "total_steps": 204665, "loss": 0.1049, "lr": 1.2632177446752255e-06, "epoch": 2.368504629516527, "percentage": 47.37, "elapsed_time": "2:05:29", "remaining_time": "2:19:25", "throughput": 8679.67, "total_tokens": 65355176} +{"current_steps": 96955, "total_steps": 204665, "loss": 0.0001, "lr": 1.2631354733096075e-06, "epoch": 2.3686267803483743, "percentage": 47.37, "elapsed_time": "2:05:30", "remaining_time": "2:19:25", "throughput": 8679.77, "total_tokens": 65359080} +{"current_steps": 96960, "total_steps": 204665, "loss": 0.0001, "lr": 1.2630532000303978e-06, "epoch": 2.3687489311802215, "percentage": 47.37, "elapsed_time": "2:05:30", "remaining_time": "2:19:24", "throughput": 8679.83, "total_tokens": 65362536} +{"current_steps": 96965, "total_steps": 204665, "loss": 0.1348, "lr": 1.2629709248381946e-06, "epoch": 2.3688710820120686, "percentage": 47.38, "elapsed_time": "2:05:30", "remaining_time": "2:19:24", "throughput": 8679.92, "total_tokens": 65366312} +{"current_steps": 96970, "total_steps": 204665, "loss": 0.0415, "lr": 1.2628886477335958e-06, "epoch": 2.368993232843916, "percentage": 47.38, "elapsed_time": "2:05:31", "remaining_time": "2:19:24", "throughput": 8679.9, "total_tokens": 65369128} +{"current_steps": 96975, "total_steps": 204665, "loss": 0.0003, "lr": 1.2628063687172004e-06, "epoch": 2.369115383675763, "percentage": 47.38, "elapsed_time": "2:05:31", "remaining_time": "2:19:23", "throughput": 8679.96, "total_tokens": 65372584} +{"current_steps": 96980, "total_steps": 204665, "loss": 0.0008, "lr": 1.2627240877896063e-06, "epoch": 2.3692375345076098, "percentage": 47.38, "elapsed_time": "2:05:31", "remaining_time": "2:19:23", "throughput": 8679.93, "total_tokens": 65375272} +{"current_steps": 96985, "total_steps": 204665, "loss": 0.0454, "lr": 1.2626418049514118e-06, "epoch": 2.3693596853394574, "percentage": 47.39, "elapsed_time": "2:05:32", "remaining_time": "2:19:22", "throughput": 8679.97, "total_tokens": 65378536} +{"current_steps": 96990, "total_steps": 204665, "loss": 0.0002, "lr": 1.2625595202032156e-06, "epoch": 2.369481836171304, "percentage": 47.39, "elapsed_time": "2:05:32", "remaining_time": "2:19:22", "throughput": 8680.0, "total_tokens": 65381736} +{"current_steps": 96995, "total_steps": 204665, "loss": 0.0004, "lr": 1.262477233545616e-06, "epoch": 2.3696039870031513, "percentage": 47.39, "elapsed_time": "2:05:32", "remaining_time": "2:19:21", "throughput": 8680.02, "total_tokens": 65384872} +{"current_steps": 97000, "total_steps": 204665, "loss": 0.0601, "lr": 1.2623949449792112e-06, "epoch": 2.3697261378349985, "percentage": 47.39, "elapsed_time": "2:05:33", "remaining_time": "2:19:21", "throughput": 8680.04, "total_tokens": 65388008} +{"current_steps": 97005, "total_steps": 204665, "loss": 0.0001, "lr": 1.2623126545045999e-06, "epoch": 2.3698482886668457, "percentage": 47.4, "elapsed_time": "2:05:33", "remaining_time": "2:19:20", "throughput": 8680.1, "total_tokens": 65391528} +{"current_steps": 97010, "total_steps": 204665, "loss": 0.0024, "lr": 1.2622303621223804e-06, "epoch": 2.369970439498693, "percentage": 47.4, "elapsed_time": "2:05:33", "remaining_time": "2:19:20", "throughput": 8680.12, "total_tokens": 65394664} +{"current_steps": 97015, "total_steps": 204665, "loss": 0.0382, "lr": 1.2621480678331513e-06, "epoch": 2.37009259033054, "percentage": 47.4, "elapsed_time": "2:05:34", "remaining_time": "2:19:20", "throughput": 8680.16, "total_tokens": 65397928} +{"current_steps": 97020, "total_steps": 204665, "loss": 0.0002, "lr": 1.2620657716375104e-06, "epoch": 2.3702147411623873, "percentage": 47.4, "elapsed_time": "2:05:34", "remaining_time": "2:19:19", "throughput": 8680.2, "total_tokens": 65401192} +{"current_steps": 97025, "total_steps": 204665, "loss": 0.1228, "lr": 1.2619834735360573e-06, "epoch": 2.3703368919942345, "percentage": 47.41, "elapsed_time": "2:05:34", "remaining_time": "2:19:19", "throughput": 8680.2, "total_tokens": 65404136} +{"current_steps": 97030, "total_steps": 204665, "loss": 0.068, "lr": 1.2619011735293897e-06, "epoch": 2.3704590428260817, "percentage": 47.41, "elapsed_time": "2:05:35", "remaining_time": "2:19:18", "throughput": 8680.26, "total_tokens": 65407720} +{"current_steps": 97035, "total_steps": 204665, "loss": 0.0719, "lr": 1.2618188716181065e-06, "epoch": 2.370581193657929, "percentage": 47.41, "elapsed_time": "2:05:35", "remaining_time": "2:19:18", "throughput": 8680.3, "total_tokens": 65411048} +{"current_steps": 97040, "total_steps": 204665, "loss": 0.0726, "lr": 1.261736567802806e-06, "epoch": 2.370703344489776, "percentage": 47.41, "elapsed_time": "2:05:35", "remaining_time": "2:19:17", "throughput": 8680.31, "total_tokens": 65414056} +{"current_steps": 97045, "total_steps": 204665, "loss": 0.049, "lr": 1.2616542620840867e-06, "epoch": 2.3708254953216232, "percentage": 47.42, "elapsed_time": "2:05:36", "remaining_time": "2:19:17", "throughput": 8680.36, "total_tokens": 65417512} +{"current_steps": 97050, "total_steps": 204665, "loss": 0.0768, "lr": 1.261571954462547e-06, "epoch": 2.3709476461534704, "percentage": 47.42, "elapsed_time": "2:05:36", "remaining_time": "2:19:17", "throughput": 8680.4, "total_tokens": 65420840} +{"current_steps": 97055, "total_steps": 204665, "loss": 0.0577, "lr": 1.261489644938786e-06, "epoch": 2.3710697969853176, "percentage": 47.42, "elapsed_time": "2:05:36", "remaining_time": "2:19:16", "throughput": 8680.4, "total_tokens": 65423720} +{"current_steps": 97060, "total_steps": 204665, "loss": 0.0003, "lr": 1.2614073335134018e-06, "epoch": 2.371191947817165, "percentage": 47.42, "elapsed_time": "2:05:37", "remaining_time": "2:19:16", "throughput": 8680.41, "total_tokens": 65426792} +{"current_steps": 97065, "total_steps": 204665, "loss": 0.0704, "lr": 1.2613250201869931e-06, "epoch": 2.371314098649012, "percentage": 47.43, "elapsed_time": "2:05:37", "remaining_time": "2:19:15", "throughput": 8680.47, "total_tokens": 65430312} +{"current_steps": 97070, "total_steps": 204665, "loss": 0.0835, "lr": 1.2612427049601589e-06, "epoch": 2.371436249480859, "percentage": 47.43, "elapsed_time": "2:05:37", "remaining_time": "2:19:15", "throughput": 8680.52, "total_tokens": 65433704} +{"current_steps": 97075, "total_steps": 204665, "loss": 0.0434, "lr": 1.261160387833497e-06, "epoch": 2.371558400312706, "percentage": 47.43, "elapsed_time": "2:05:38", "remaining_time": "2:19:14", "throughput": 8680.54, "total_tokens": 65436840} +{"current_steps": 97080, "total_steps": 204665, "loss": 0.0316, "lr": 1.261078068807607e-06, "epoch": 2.371680551144553, "percentage": 47.43, "elapsed_time": "2:05:38", "remaining_time": "2:19:14", "throughput": 8680.52, "total_tokens": 65439656} +{"current_steps": 97085, "total_steps": 204665, "loss": 0.0409, "lr": 1.260995747883087e-06, "epoch": 2.3718027019764003, "percentage": 47.44, "elapsed_time": "2:05:39", "remaining_time": "2:19:13", "throughput": 8680.52, "total_tokens": 65442600} +{"current_steps": 97090, "total_steps": 204665, "loss": 0.0391, "lr": 1.2609134250605355e-06, "epoch": 2.3719248528082475, "percentage": 47.44, "elapsed_time": "2:05:39", "remaining_time": "2:19:13", "throughput": 8680.56, "total_tokens": 65445928} +{"current_steps": 97095, "total_steps": 204665, "loss": 0.0358, "lr": 1.2608311003405513e-06, "epoch": 2.3720470036400947, "percentage": 47.44, "elapsed_time": "2:05:39", "remaining_time": "2:19:13", "throughput": 8680.59, "total_tokens": 65449192} +{"current_steps": 97100, "total_steps": 204665, "loss": 0.0344, "lr": 1.2607487737237334e-06, "epoch": 2.372169154471942, "percentage": 47.44, "elapsed_time": "2:05:40", "remaining_time": "2:19:12", "throughput": 8680.63, "total_tokens": 65452520} +{"current_steps": 97105, "total_steps": 204665, "loss": 0.0459, "lr": 1.2606664452106804e-06, "epoch": 2.372291305303789, "percentage": 47.45, "elapsed_time": "2:05:40", "remaining_time": "2:19:12", "throughput": 8680.67, "total_tokens": 65455848} +{"current_steps": 97110, "total_steps": 204665, "loss": 0.0682, "lr": 1.2605841148019907e-06, "epoch": 2.3724134561356363, "percentage": 47.45, "elapsed_time": "2:05:40", "remaining_time": "2:19:11", "throughput": 8680.72, "total_tokens": 65459240} +{"current_steps": 97115, "total_steps": 204665, "loss": 0.0521, "lr": 1.260501782498263e-06, "epoch": 2.3725356069674834, "percentage": 47.45, "elapsed_time": "2:05:41", "remaining_time": "2:19:11", "throughput": 8680.79, "total_tokens": 65462888} +{"current_steps": 97120, "total_steps": 204665, "loss": 0.038, "lr": 1.2604194483000966e-06, "epoch": 2.3726577577993306, "percentage": 47.45, "elapsed_time": "2:05:41", "remaining_time": "2:19:10", "throughput": 8680.81, "total_tokens": 65465960} +{"current_steps": 97125, "total_steps": 204665, "loss": 0.0486, "lr": 1.2603371122080901e-06, "epoch": 2.372779908631178, "percentage": 47.46, "elapsed_time": "2:05:41", "remaining_time": "2:19:10", "throughput": 8680.84, "total_tokens": 65469224} +{"current_steps": 97130, "total_steps": 204665, "loss": 0.001, "lr": 1.2602547742228417e-06, "epoch": 2.372902059463025, "percentage": 47.46, "elapsed_time": "2:05:42", "remaining_time": "2:19:10", "throughput": 8680.86, "total_tokens": 65472296} +{"current_steps": 97135, "total_steps": 204665, "loss": 0.0912, "lr": 1.260172434344951e-06, "epoch": 2.373024210294872, "percentage": 47.46, "elapsed_time": "2:05:42", "remaining_time": "2:19:09", "throughput": 8680.93, "total_tokens": 65475944} +{"current_steps": 97140, "total_steps": 204665, "loss": 0.0719, "lr": 1.260090092575016e-06, "epoch": 2.3731463611267194, "percentage": 47.46, "elapsed_time": "2:05:42", "remaining_time": "2:19:09", "throughput": 8680.99, "total_tokens": 65479464} +{"current_steps": 97145, "total_steps": 204665, "loss": 0.0739, "lr": 1.260007748913636e-06, "epoch": 2.3732685119585666, "percentage": 47.47, "elapsed_time": "2:05:43", "remaining_time": "2:19:08", "throughput": 8681.02, "total_tokens": 65482664} +{"current_steps": 97150, "total_steps": 204665, "loss": 0.0003, "lr": 1.2599254033614098e-06, "epoch": 2.3733906627904138, "percentage": 47.47, "elapsed_time": "2:05:43", "remaining_time": "2:19:08", "throughput": 8681.06, "total_tokens": 65485992} +{"current_steps": 97155, "total_steps": 204665, "loss": 0.0751, "lr": 1.259843055918936e-06, "epoch": 2.373512813622261, "percentage": 47.47, "elapsed_time": "2:05:43", "remaining_time": "2:19:07", "throughput": 8681.12, "total_tokens": 65489512} +{"current_steps": 97160, "total_steps": 204665, "loss": 0.0396, "lr": 1.2597607065868138e-06, "epoch": 2.3736349644541077, "percentage": 47.47, "elapsed_time": "2:05:44", "remaining_time": "2:19:07", "throughput": 8681.15, "total_tokens": 65492712} +{"current_steps": 97165, "total_steps": 204665, "loss": 0.0246, "lr": 1.2596783553656418e-06, "epoch": 2.3737571152859553, "percentage": 47.48, "elapsed_time": "2:05:44", "remaining_time": "2:19:07", "throughput": 8681.16, "total_tokens": 65495720} +{"current_steps": 97170, "total_steps": 204665, "loss": 0.0728, "lr": 1.259596002256019e-06, "epoch": 2.373879266117802, "percentage": 47.48, "elapsed_time": "2:05:44", "remaining_time": "2:19:06", "throughput": 8681.16, "total_tokens": 65498664} +{"current_steps": 97175, "total_steps": 204665, "loss": 0.0002, "lr": 1.259513647258544e-06, "epoch": 2.3740014169496493, "percentage": 47.48, "elapsed_time": "2:05:45", "remaining_time": "2:19:06", "throughput": 8681.21, "total_tokens": 65502120} +{"current_steps": 97180, "total_steps": 204665, "loss": 0.0014, "lr": 1.2594312903738161e-06, "epoch": 2.3741235677814965, "percentage": 47.48, "elapsed_time": "2:05:45", "remaining_time": "2:19:05", "throughput": 8681.26, "total_tokens": 65505512} +{"current_steps": 97185, "total_steps": 204665, "loss": 0.0003, "lr": 1.259348931602434e-06, "epoch": 2.3742457186133437, "percentage": 47.48, "elapsed_time": "2:05:45", "remaining_time": "2:19:05", "throughput": 8681.3, "total_tokens": 65508840} +{"current_steps": 97190, "total_steps": 204665, "loss": 0.0836, "lr": 1.2592665709449972e-06, "epoch": 2.374367869445191, "percentage": 47.49, "elapsed_time": "2:05:46", "remaining_time": "2:19:04", "throughput": 8681.33, "total_tokens": 65512040} +{"current_steps": 97195, "total_steps": 204665, "loss": 0.0004, "lr": 1.2591842084021037e-06, "epoch": 2.374490020277038, "percentage": 47.49, "elapsed_time": "2:05:46", "remaining_time": "2:19:04", "throughput": 8681.43, "total_tokens": 65515944} +{"current_steps": 97200, "total_steps": 204665, "loss": 0.0224, "lr": 1.259101843974353e-06, "epoch": 2.3746121711088852, "percentage": 47.49, "elapsed_time": "2:05:47", "remaining_time": "2:19:04", "throughput": 8681.48, "total_tokens": 65519336} +{"current_steps": 97205, "total_steps": 204665, "loss": 0.1061, "lr": 1.259019477662344e-06, "epoch": 2.3747343219407324, "percentage": 47.49, "elapsed_time": "2:05:47", "remaining_time": "2:19:03", "throughput": 8681.49, "total_tokens": 65522408} +{"current_steps": 97210, "total_steps": 204665, "loss": 0.0096, "lr": 1.2589371094666757e-06, "epoch": 2.3748564727725796, "percentage": 47.5, "elapsed_time": "2:05:47", "remaining_time": "2:19:03", "throughput": 8681.59, "total_tokens": 65526248} +{"current_steps": 97215, "total_steps": 204665, "loss": 0.0008, "lr": 1.2588547393879472e-06, "epoch": 2.374978623604427, "percentage": 47.5, "elapsed_time": "2:05:48", "remaining_time": "2:19:02", "throughput": 8681.66, "total_tokens": 65529896} +{"current_steps": 97220, "total_steps": 204665, "loss": 0.0001, "lr": 1.2587723674267572e-06, "epoch": 2.375100774436274, "percentage": 47.5, "elapsed_time": "2:05:48", "remaining_time": "2:19:02", "throughput": 8681.76, "total_tokens": 65533736} +{"current_steps": 97225, "total_steps": 204665, "loss": 0.0004, "lr": 1.258689993583705e-06, "epoch": 2.375222925268121, "percentage": 47.5, "elapsed_time": "2:05:48", "remaining_time": "2:19:01", "throughput": 8681.81, "total_tokens": 65537128} +{"current_steps": 97230, "total_steps": 204665, "loss": 0.0525, "lr": 1.2586076178593896e-06, "epoch": 2.3753450760999684, "percentage": 47.51, "elapsed_time": "2:05:49", "remaining_time": "2:19:01", "throughput": 8681.82, "total_tokens": 65540200} +{"current_steps": 97235, "total_steps": 204665, "loss": 0.1004, "lr": 1.2585252402544101e-06, "epoch": 2.3754672269318156, "percentage": 47.51, "elapsed_time": "2:05:49", "remaining_time": "2:19:01", "throughput": 8681.81, "total_tokens": 65543080} +{"current_steps": 97240, "total_steps": 204665, "loss": 0.001, "lr": 1.2584428607693655e-06, "epoch": 2.3755893777636627, "percentage": 47.51, "elapsed_time": "2:05:49", "remaining_time": "2:19:00", "throughput": 8681.89, "total_tokens": 65546728} +{"current_steps": 97245, "total_steps": 204665, "loss": 0.0003, "lr": 1.258360479404855e-06, "epoch": 2.3757115285955095, "percentage": 47.51, "elapsed_time": "2:05:50", "remaining_time": "2:19:00", "throughput": 8681.91, "total_tokens": 65549864} +{"current_steps": 97250, "total_steps": 204665, "loss": 0.0002, "lr": 1.2582780961614776e-06, "epoch": 2.375833679427357, "percentage": 47.52, "elapsed_time": "2:05:50", "remaining_time": "2:18:59", "throughput": 8681.95, "total_tokens": 65553192} +{"current_steps": 97255, "total_steps": 204665, "loss": 0.069, "lr": 1.2581957110398322e-06, "epoch": 2.375955830259204, "percentage": 47.52, "elapsed_time": "2:05:50", "remaining_time": "2:18:59", "throughput": 8682.0, "total_tokens": 65556584} +{"current_steps": 97260, "total_steps": 204665, "loss": 0.0304, "lr": 1.2581133240405184e-06, "epoch": 2.376077981091051, "percentage": 47.52, "elapsed_time": "2:05:51", "remaining_time": "2:18:58", "throughput": 8682.07, "total_tokens": 65560168} +{"current_steps": 97265, "total_steps": 204665, "loss": 0.0003, "lr": 1.258030935164135e-06, "epoch": 2.3762001319228983, "percentage": 47.52, "elapsed_time": "2:05:51", "remaining_time": "2:18:58", "throughput": 8682.11, "total_tokens": 65563432} +{"current_steps": 97270, "total_steps": 204665, "loss": 0.0467, "lr": 1.257948544411281e-06, "epoch": 2.3763222827547454, "percentage": 47.53, "elapsed_time": "2:05:51", "remaining_time": "2:18:58", "throughput": 8682.18, "total_tokens": 65567080} +{"current_steps": 97275, "total_steps": 204665, "loss": 0.1122, "lr": 1.257866151782556e-06, "epoch": 2.3764444335865926, "percentage": 47.53, "elapsed_time": "2:05:52", "remaining_time": "2:18:57", "throughput": 8682.25, "total_tokens": 65570728} +{"current_steps": 97280, "total_steps": 204665, "loss": 0.0001, "lr": 1.257783757278559e-06, "epoch": 2.37656658441844, "percentage": 47.53, "elapsed_time": "2:05:52", "remaining_time": "2:18:57", "throughput": 8682.3, "total_tokens": 65574056} +{"current_steps": 97285, "total_steps": 204665, "loss": 0.0002, "lr": 1.2577013608998892e-06, "epoch": 2.376688735250287, "percentage": 47.53, "elapsed_time": "2:05:52", "remaining_time": "2:18:56", "throughput": 8682.35, "total_tokens": 65577512} +{"current_steps": 97290, "total_steps": 204665, "loss": 0.0001, "lr": 1.2576189626471459e-06, "epoch": 2.376810886082134, "percentage": 47.54, "elapsed_time": "2:05:53", "remaining_time": "2:18:56", "throughput": 8682.36, "total_tokens": 65580520} +{"current_steps": 97295, "total_steps": 204665, "loss": 0.0573, "lr": 1.257536562520928e-06, "epoch": 2.3769330369139814, "percentage": 47.54, "elapsed_time": "2:05:53", "remaining_time": "2:18:55", "throughput": 8682.39, "total_tokens": 65583720} +{"current_steps": 97300, "total_steps": 204665, "loss": 0.0002, "lr": 1.257454160521835e-06, "epoch": 2.3770551877458286, "percentage": 47.54, "elapsed_time": "2:05:53", "remaining_time": "2:18:55", "throughput": 8682.39, "total_tokens": 65586664} +{"current_steps": 97305, "total_steps": 204665, "loss": 0.0003, "lr": 1.257371756650466e-06, "epoch": 2.3771773385776758, "percentage": 47.54, "elapsed_time": "2:05:54", "remaining_time": "2:18:54", "throughput": 8682.4, "total_tokens": 65589672} +{"current_steps": 97310, "total_steps": 204665, "loss": 0.0648, "lr": 1.2572893509074206e-06, "epoch": 2.377299489409523, "percentage": 47.55, "elapsed_time": "2:05:54", "remaining_time": "2:18:54", "throughput": 8682.43, "total_tokens": 65592936} +{"current_steps": 97315, "total_steps": 204665, "loss": 0.0001, "lr": 1.2572069432932978e-06, "epoch": 2.37742164024137, "percentage": 47.55, "elapsed_time": "2:05:55", "remaining_time": "2:18:54", "throughput": 8682.49, "total_tokens": 65596392} +{"current_steps": 97320, "total_steps": 204665, "loss": 0.1605, "lr": 1.2571245338086966e-06, "epoch": 2.3775437910732173, "percentage": 47.55, "elapsed_time": "2:05:55", "remaining_time": "2:18:53", "throughput": 8682.53, "total_tokens": 65599784} +{"current_steps": 97325, "total_steps": 204665, "loss": 0.0001, "lr": 1.2570421224542169e-06, "epoch": 2.3776659419050645, "percentage": 47.55, "elapsed_time": "2:05:55", "remaining_time": "2:18:53", "throughput": 8682.55, "total_tokens": 65602856} +{"current_steps": 97330, "total_steps": 204665, "loss": 0.0008, "lr": 1.2569597092304576e-06, "epoch": 2.3777880927369117, "percentage": 47.56, "elapsed_time": "2:05:56", "remaining_time": "2:18:52", "throughput": 8682.62, "total_tokens": 65606504} +{"current_steps": 97335, "total_steps": 204665, "loss": 0.0003, "lr": 1.2568772941380183e-06, "epoch": 2.377910243568759, "percentage": 47.56, "elapsed_time": "2:05:56", "remaining_time": "2:18:52", "throughput": 8682.66, "total_tokens": 65609768} +{"current_steps": 97340, "total_steps": 204665, "loss": 0.0022, "lr": 1.2567948771774984e-06, "epoch": 2.3780323944006057, "percentage": 47.56, "elapsed_time": "2:05:56", "remaining_time": "2:18:51", "throughput": 8682.75, "total_tokens": 65613480} +{"current_steps": 97345, "total_steps": 204665, "loss": 0.2043, "lr": 1.256712458349497e-06, "epoch": 2.378154545232453, "percentage": 47.56, "elapsed_time": "2:05:57", "remaining_time": "2:18:51", "throughput": 8682.82, "total_tokens": 65617128} +{"current_steps": 97350, "total_steps": 204665, "loss": 0.0008, "lr": 1.2566300376546135e-06, "epoch": 2.3782766960643, "percentage": 47.57, "elapsed_time": "2:05:57", "remaining_time": "2:18:51", "throughput": 8682.83, "total_tokens": 65620136} +{"current_steps": 97355, "total_steps": 204665, "loss": 0.068, "lr": 1.2565476150934472e-06, "epoch": 2.3783988468961472, "percentage": 47.57, "elapsed_time": "2:05:57", "remaining_time": "2:18:50", "throughput": 8682.87, "total_tokens": 65623464} +{"current_steps": 97360, "total_steps": 204665, "loss": 0.0003, "lr": 1.2564651906665979e-06, "epoch": 2.3785209977279944, "percentage": 47.57, "elapsed_time": "2:05:58", "remaining_time": "2:18:50", "throughput": 8682.96, "total_tokens": 65627176} +{"current_steps": 97365, "total_steps": 204665, "loss": 0.0387, "lr": 1.2563827643746644e-06, "epoch": 2.3786431485598416, "percentage": 47.57, "elapsed_time": "2:05:58", "remaining_time": "2:18:49", "throughput": 8683.21, "total_tokens": 65632552} +{"current_steps": 97370, "total_steps": 204665, "loss": 0.1144, "lr": 1.2563003362182466e-06, "epoch": 2.378765299391689, "percentage": 47.58, "elapsed_time": "2:05:58", "remaining_time": "2:18:49", "throughput": 8683.25, "total_tokens": 65635880} +{"current_steps": 97375, "total_steps": 204665, "loss": 0.0004, "lr": 1.256217906197944e-06, "epoch": 2.378887450223536, "percentage": 47.58, "elapsed_time": "2:05:59", "remaining_time": "2:18:48", "throughput": 8683.29, "total_tokens": 65639208} +{"current_steps": 97380, "total_steps": 204665, "loss": 0.1245, "lr": 1.2561354743143558e-06, "epoch": 2.379009601055383, "percentage": 47.58, "elapsed_time": "2:05:59", "remaining_time": "2:18:48", "throughput": 8683.31, "total_tokens": 65642344} +{"current_steps": 97385, "total_steps": 204665, "loss": 0.0055, "lr": 1.2560530405680813e-06, "epoch": 2.3791317518872304, "percentage": 47.58, "elapsed_time": "2:05:59", "remaining_time": "2:18:48", "throughput": 8683.35, "total_tokens": 65645608} +{"current_steps": 97390, "total_steps": 204665, "loss": 0.0488, "lr": 1.2559706049597205e-06, "epoch": 2.3792539027190776, "percentage": 47.59, "elapsed_time": "2:06:00", "remaining_time": "2:18:47", "throughput": 8683.4, "total_tokens": 65649064} +{"current_steps": 97395, "total_steps": 204665, "loss": 0.0002, "lr": 1.2558881674898727e-06, "epoch": 2.3793760535509247, "percentage": 47.59, "elapsed_time": "2:06:00", "remaining_time": "2:18:47", "throughput": 8683.4, "total_tokens": 65651944} +{"current_steps": 97400, "total_steps": 204665, "loss": 0.0002, "lr": 1.2558057281591373e-06, "epoch": 2.379498204382772, "percentage": 47.59, "elapsed_time": "2:06:00", "remaining_time": "2:18:46", "throughput": 8683.45, "total_tokens": 65655336} +{"current_steps": 97405, "total_steps": 204665, "loss": 0.0006, "lr": 1.2557232869681136e-06, "epoch": 2.379620355214619, "percentage": 47.59, "elapsed_time": "2:06:01", "remaining_time": "2:18:46", "throughput": 8683.47, "total_tokens": 65658536} +{"current_steps": 97410, "total_steps": 204665, "loss": 0.0114, "lr": 1.2556408439174016e-06, "epoch": 2.3797425060464663, "percentage": 47.59, "elapsed_time": "2:06:01", "remaining_time": "2:18:45", "throughput": 8683.5, "total_tokens": 65661736} +{"current_steps": 97415, "total_steps": 204665, "loss": 0.0002, "lr": 1.2555583990076005e-06, "epoch": 2.3798646568783135, "percentage": 47.6, "elapsed_time": "2:06:02", "remaining_time": "2:18:45", "throughput": 8683.55, "total_tokens": 65665064} +{"current_steps": 97420, "total_steps": 204665, "loss": 0.0342, "lr": 1.25547595223931e-06, "epoch": 2.3799868077101607, "percentage": 47.6, "elapsed_time": "2:06:02", "remaining_time": "2:18:45", "throughput": 8683.61, "total_tokens": 65668648} +{"current_steps": 97425, "total_steps": 204665, "loss": 0.0004, "lr": 1.2553935036131294e-06, "epoch": 2.3801089585420074, "percentage": 47.6, "elapsed_time": "2:06:02", "remaining_time": "2:18:44", "throughput": 8683.62, "total_tokens": 65671656} +{"current_steps": 97430, "total_steps": 204665, "loss": 0.0378, "lr": 1.2553110531296588e-06, "epoch": 2.380231109373855, "percentage": 47.6, "elapsed_time": "2:06:03", "remaining_time": "2:18:44", "throughput": 8683.66, "total_tokens": 65674920} +{"current_steps": 97435, "total_steps": 204665, "loss": 0.0491, "lr": 1.2552286007894974e-06, "epoch": 2.380353260205702, "percentage": 47.61, "elapsed_time": "2:06:03", "remaining_time": "2:18:43", "throughput": 8683.74, "total_tokens": 65678632} +{"current_steps": 97440, "total_steps": 204665, "loss": 0.2491, "lr": 1.2551461465932453e-06, "epoch": 2.380475411037549, "percentage": 47.61, "elapsed_time": "2:06:03", "remaining_time": "2:18:43", "throughput": 8683.76, "total_tokens": 65681768} +{"current_steps": 97445, "total_steps": 204665, "loss": 0.0065, "lr": 1.2550636905415014e-06, "epoch": 2.380597561869396, "percentage": 47.61, "elapsed_time": "2:06:04", "remaining_time": "2:18:42", "throughput": 8683.83, "total_tokens": 65685352} +{"current_steps": 97450, "total_steps": 204665, "loss": 0.0148, "lr": 1.2549812326348662e-06, "epoch": 2.3807197127012434, "percentage": 47.61, "elapsed_time": "2:06:04", "remaining_time": "2:18:42", "throughput": 8683.85, "total_tokens": 65688488} +{"current_steps": 97455, "total_steps": 204665, "loss": 0.0004, "lr": 1.2548987728739386e-06, "epoch": 2.3808418635330906, "percentage": 47.62, "elapsed_time": "2:06:04", "remaining_time": "2:18:42", "throughput": 8683.91, "total_tokens": 65692008} +{"current_steps": 97460, "total_steps": 204665, "loss": 0.095, "lr": 1.2548163112593187e-06, "epoch": 2.3809640143649378, "percentage": 47.62, "elapsed_time": "2:06:05", "remaining_time": "2:18:41", "throughput": 8683.97, "total_tokens": 65695464} +{"current_steps": 97465, "total_steps": 204665, "loss": 0.0882, "lr": 1.2547338477916058e-06, "epoch": 2.381086165196785, "percentage": 47.62, "elapsed_time": "2:06:05", "remaining_time": "2:18:41", "throughput": 8684.0, "total_tokens": 65698728} +{"current_steps": 97470, "total_steps": 204665, "loss": 0.0001, "lr": 1.2546513824714e-06, "epoch": 2.381208316028632, "percentage": 47.62, "elapsed_time": "2:06:05", "remaining_time": "2:18:40", "throughput": 8684.03, "total_tokens": 65701928} +{"current_steps": 97475, "total_steps": 204665, "loss": 0.0002, "lr": 1.2545689152993008e-06, "epoch": 2.3813304668604793, "percentage": 47.63, "elapsed_time": "2:06:06", "remaining_time": "2:18:40", "throughput": 8684.08, "total_tokens": 65705320} +{"current_steps": 97480, "total_steps": 204665, "loss": 0.0001, "lr": 1.2544864462759083e-06, "epoch": 2.3814526176923265, "percentage": 47.63, "elapsed_time": "2:06:06", "remaining_time": "2:18:39", "throughput": 8684.15, "total_tokens": 65708904} +{"current_steps": 97485, "total_steps": 204665, "loss": 0.0393, "lr": 1.2544039754018213e-06, "epoch": 2.3815747685241737, "percentage": 47.63, "elapsed_time": "2:06:06", "remaining_time": "2:18:39", "throughput": 8684.2, "total_tokens": 65712296} +{"current_steps": 97490, "total_steps": 204665, "loss": 0.0427, "lr": 1.2543215026776406e-06, "epoch": 2.381696919356021, "percentage": 47.63, "elapsed_time": "2:06:07", "remaining_time": "2:18:38", "throughput": 8684.27, "total_tokens": 65715880} +{"current_steps": 97495, "total_steps": 204665, "loss": 0.0008, "lr": 1.2542390281039654e-06, "epoch": 2.381819070187868, "percentage": 47.64, "elapsed_time": "2:06:07", "remaining_time": "2:18:38", "throughput": 8684.35, "total_tokens": 65719528} +{"current_steps": 97500, "total_steps": 204665, "loss": 0.04, "lr": 1.254156551681396e-06, "epoch": 2.3819412210197153, "percentage": 47.64, "elapsed_time": "2:06:07", "remaining_time": "2:18:38", "throughput": 8684.4, "total_tokens": 65722920} +{"current_steps": 97505, "total_steps": 204665, "loss": 0.0423, "lr": 1.2540740734105313e-06, "epoch": 2.3820633718515625, "percentage": 47.64, "elapsed_time": "2:06:08", "remaining_time": "2:18:37", "throughput": 8684.45, "total_tokens": 65726376} +{"current_steps": 97510, "total_steps": 204665, "loss": 0.0005, "lr": 1.2539915932919717e-06, "epoch": 2.3821855226834097, "percentage": 47.64, "elapsed_time": "2:06:08", "remaining_time": "2:18:37", "throughput": 8684.49, "total_tokens": 65729704} +{"current_steps": 97515, "total_steps": 204665, "loss": 0.0778, "lr": 1.2539091113263172e-06, "epoch": 2.382307673515257, "percentage": 47.65, "elapsed_time": "2:06:08", "remaining_time": "2:18:36", "throughput": 8684.55, "total_tokens": 65733160} +{"current_steps": 97520, "total_steps": 204665, "loss": 0.0717, "lr": 1.2538266275141667e-06, "epoch": 2.3824298243471036, "percentage": 47.65, "elapsed_time": "2:06:09", "remaining_time": "2:18:36", "throughput": 8684.6, "total_tokens": 65736616} +{"current_steps": 97525, "total_steps": 204665, "loss": 0.0003, "lr": 1.2537441418561213e-06, "epoch": 2.382551975178951, "percentage": 47.65, "elapsed_time": "2:06:09", "remaining_time": "2:18:35", "throughput": 8684.62, "total_tokens": 65739752} +{"current_steps": 97530, "total_steps": 204665, "loss": 0.0002, "lr": 1.25366165435278e-06, "epoch": 2.382674126010798, "percentage": 47.65, "elapsed_time": "2:06:10", "remaining_time": "2:18:35", "throughput": 8684.67, "total_tokens": 65743144} +{"current_steps": 97535, "total_steps": 204665, "loss": 0.0535, "lr": 1.2535791650047428e-06, "epoch": 2.382796276842645, "percentage": 47.66, "elapsed_time": "2:06:10", "remaining_time": "2:18:35", "throughput": 8684.7, "total_tokens": 65746344} +{"current_steps": 97540, "total_steps": 204665, "loss": 0.0053, "lr": 1.25349667381261e-06, "epoch": 2.3829184276744924, "percentage": 47.66, "elapsed_time": "2:06:10", "remaining_time": "2:18:34", "throughput": 8684.84, "total_tokens": 65750568} +{"current_steps": 97545, "total_steps": 204665, "loss": 0.0004, "lr": 1.2534141807769811e-06, "epoch": 2.3830405785063395, "percentage": 47.66, "elapsed_time": "2:06:11", "remaining_time": "2:18:34", "throughput": 8684.89, "total_tokens": 65753960} +{"current_steps": 97550, "total_steps": 204665, "loss": 0.0001, "lr": 1.253331685898456e-06, "epoch": 2.3831627293381867, "percentage": 47.66, "elapsed_time": "2:06:11", "remaining_time": "2:18:33", "throughput": 8684.91, "total_tokens": 65757160} +{"current_steps": 97555, "total_steps": 204665, "loss": 0.0002, "lr": 1.253249189177635e-06, "epoch": 2.383284880170034, "percentage": 47.67, "elapsed_time": "2:06:11", "remaining_time": "2:18:33", "throughput": 8684.95, "total_tokens": 65760424} +{"current_steps": 97560, "total_steps": 204665, "loss": 0.0003, "lr": 1.2531666906151177e-06, "epoch": 2.383407031001881, "percentage": 47.67, "elapsed_time": "2:06:12", "remaining_time": "2:18:32", "throughput": 8684.96, "total_tokens": 65763432} +{"current_steps": 97565, "total_steps": 204665, "loss": 0.0001, "lr": 1.253084190211504e-06, "epoch": 2.3835291818337283, "percentage": 47.67, "elapsed_time": "2:06:12", "remaining_time": "2:18:32", "throughput": 8685.01, "total_tokens": 65766824} +{"current_steps": 97570, "total_steps": 204665, "loss": 0.0003, "lr": 1.2530016879673942e-06, "epoch": 2.3836513326655755, "percentage": 47.67, "elapsed_time": "2:06:12", "remaining_time": "2:18:32", "throughput": 8685.03, "total_tokens": 65770024} +{"current_steps": 97575, "total_steps": 204665, "loss": 0.0373, "lr": 1.252919183883388e-06, "epoch": 2.3837734834974227, "percentage": 47.68, "elapsed_time": "2:06:13", "remaining_time": "2:18:31", "throughput": 8685.1, "total_tokens": 65773544} +{"current_steps": 97580, "total_steps": 204665, "loss": 0.0311, "lr": 1.252836677960085e-06, "epoch": 2.38389563432927, "percentage": 47.68, "elapsed_time": "2:06:13", "remaining_time": "2:18:31", "throughput": 8685.18, "total_tokens": 65777256} +{"current_steps": 97585, "total_steps": 204665, "loss": 0.0398, "lr": 1.2527541701980861e-06, "epoch": 2.384017785161117, "percentage": 47.68, "elapsed_time": "2:06:13", "remaining_time": "2:18:30", "throughput": 8685.2, "total_tokens": 65780392} +{"current_steps": 97590, "total_steps": 204665, "loss": 0.0601, "lr": 1.2526716605979909e-06, "epoch": 2.3841399359929643, "percentage": 47.68, "elapsed_time": "2:06:14", "remaining_time": "2:18:30", "throughput": 8685.27, "total_tokens": 65783976} +{"current_steps": 97595, "total_steps": 204665, "loss": 0.0002, "lr": 1.2525891491603995e-06, "epoch": 2.3842620868248114, "percentage": 47.69, "elapsed_time": "2:06:14", "remaining_time": "2:18:29", "throughput": 8685.31, "total_tokens": 65787304} +{"current_steps": 97600, "total_steps": 204665, "loss": 0.1335, "lr": 1.2525066358859119e-06, "epoch": 2.3843842376566586, "percentage": 47.69, "elapsed_time": "2:06:14", "remaining_time": "2:18:29", "throughput": 8685.41, "total_tokens": 65791144} +{"current_steps": 97605, "total_steps": 204665, "loss": 0.0001, "lr": 1.2524241207751278e-06, "epoch": 2.3845063884885054, "percentage": 47.69, "elapsed_time": "2:06:15", "remaining_time": "2:18:29", "throughput": 8685.49, "total_tokens": 65794792} +{"current_steps": 97610, "total_steps": 204665, "loss": 0.0413, "lr": 1.2523416038286478e-06, "epoch": 2.384628539320353, "percentage": 47.69, "elapsed_time": "2:06:15", "remaining_time": "2:18:28", "throughput": 8685.53, "total_tokens": 65798184} +{"current_steps": 97615, "total_steps": 204665, "loss": 0.0003, "lr": 1.2522590850470717e-06, "epoch": 2.3847506901521998, "percentage": 47.7, "elapsed_time": "2:06:15", "remaining_time": "2:18:28", "throughput": 8685.57, "total_tokens": 65801512} +{"current_steps": 97620, "total_steps": 204665, "loss": 0.0397, "lr": 1.2521765644309998e-06, "epoch": 2.384872840984047, "percentage": 47.7, "elapsed_time": "2:06:16", "remaining_time": "2:18:27", "throughput": 8685.58, "total_tokens": 65804520} +{"current_steps": 97625, "total_steps": 204665, "loss": 0.0002, "lr": 1.252094041981032e-06, "epoch": 2.384994991815894, "percentage": 47.7, "elapsed_time": "2:06:16", "remaining_time": "2:18:27", "throughput": 8685.62, "total_tokens": 65807848} +{"current_steps": 97630, "total_steps": 204665, "loss": 0.0603, "lr": 1.2520115176977686e-06, "epoch": 2.3851171426477413, "percentage": 47.7, "elapsed_time": "2:06:16", "remaining_time": "2:18:26", "throughput": 8685.68, "total_tokens": 65811304} +{"current_steps": 97635, "total_steps": 204665, "loss": 0.0478, "lr": 1.2519289915818096e-06, "epoch": 2.3852392934795885, "percentage": 47.7, "elapsed_time": "2:06:17", "remaining_time": "2:18:26", "throughput": 8685.72, "total_tokens": 65814632} +{"current_steps": 97640, "total_steps": 204665, "loss": 0.0407, "lr": 1.2518464636337552e-06, "epoch": 2.3853614443114357, "percentage": 47.71, "elapsed_time": "2:06:17", "remaining_time": "2:18:26", "throughput": 8685.76, "total_tokens": 65817896} +{"current_steps": 97645, "total_steps": 204665, "loss": 0.0489, "lr": 1.2517639338542056e-06, "epoch": 2.385483595143283, "percentage": 47.71, "elapsed_time": "2:06:18", "remaining_time": "2:18:25", "throughput": 8685.81, "total_tokens": 65821288} +{"current_steps": 97650, "total_steps": 204665, "loss": 0.0002, "lr": 1.251681402243761e-06, "epoch": 2.38560574597513, "percentage": 47.71, "elapsed_time": "2:06:18", "remaining_time": "2:18:25", "throughput": 8685.88, "total_tokens": 65824936} +{"current_steps": 97655, "total_steps": 204665, "loss": 0.1391, "lr": 1.2515988688030217e-06, "epoch": 2.3857278968069773, "percentage": 47.71, "elapsed_time": "2:06:18", "remaining_time": "2:18:24", "throughput": 8685.91, "total_tokens": 65828072} +{"current_steps": 97660, "total_steps": 204665, "loss": 0.0664, "lr": 1.2515163335325875e-06, "epoch": 2.3858500476388245, "percentage": 47.72, "elapsed_time": "2:06:19", "remaining_time": "2:18:24", "throughput": 8685.96, "total_tokens": 65831528} +{"current_steps": 97665, "total_steps": 204665, "loss": 0.0578, "lr": 1.251433796433059e-06, "epoch": 2.3859721984706717, "percentage": 47.72, "elapsed_time": "2:06:19", "remaining_time": "2:18:23", "throughput": 8686.01, "total_tokens": 65834984} +{"current_steps": 97670, "total_steps": 204665, "loss": 0.0484, "lr": 1.2513512575050365e-06, "epoch": 2.386094349302519, "percentage": 47.72, "elapsed_time": "2:06:19", "remaining_time": "2:18:23", "throughput": 8686.03, "total_tokens": 65838056} +{"current_steps": 97675, "total_steps": 204665, "loss": 0.0519, "lr": 1.2512687167491193e-06, "epoch": 2.386216500134366, "percentage": 47.72, "elapsed_time": "2:06:20", "remaining_time": "2:18:23", "throughput": 8686.07, "total_tokens": 65841320} +{"current_steps": 97680, "total_steps": 204665, "loss": 0.0002, "lr": 1.2511861741659092e-06, "epoch": 2.3863386509662132, "percentage": 47.73, "elapsed_time": "2:06:20", "remaining_time": "2:18:22", "throughput": 8686.18, "total_tokens": 65845288} +{"current_steps": 97685, "total_steps": 204665, "loss": 0.0007, "lr": 1.2511036297560054e-06, "epoch": 2.3864608017980604, "percentage": 47.73, "elapsed_time": "2:06:20", "remaining_time": "2:18:22", "throughput": 8686.22, "total_tokens": 65848680} +{"current_steps": 97690, "total_steps": 204665, "loss": 0.0004, "lr": 1.2510210835200082e-06, "epoch": 2.386582952629907, "percentage": 47.73, "elapsed_time": "2:06:21", "remaining_time": "2:18:21", "throughput": 8686.25, "total_tokens": 65851944} +{"current_steps": 97695, "total_steps": 204665, "loss": 0.0586, "lr": 1.2509385354585187e-06, "epoch": 2.386705103461755, "percentage": 47.73, "elapsed_time": "2:06:21", "remaining_time": "2:18:21", "throughput": 8686.26, "total_tokens": 65854952} +{"current_steps": 97700, "total_steps": 204665, "loss": 0.0007, "lr": 1.2508559855721363e-06, "epoch": 2.3868272542936015, "percentage": 47.74, "elapsed_time": "2:06:21", "remaining_time": "2:18:20", "throughput": 8686.3, "total_tokens": 65858216} +{"current_steps": 97705, "total_steps": 204665, "loss": 0.0336, "lr": 1.250773433861462e-06, "epoch": 2.3869494051254487, "percentage": 47.74, "elapsed_time": "2:06:22", "remaining_time": "2:18:20", "throughput": 8686.32, "total_tokens": 65861352} +{"current_steps": 97710, "total_steps": 204665, "loss": 0.0408, "lr": 1.2506908803270954e-06, "epoch": 2.387071555957296, "percentage": 47.74, "elapsed_time": "2:06:22", "remaining_time": "2:18:19", "throughput": 8686.36, "total_tokens": 65864616} +{"current_steps": 97715, "total_steps": 204665, "loss": 0.0982, "lr": 1.2506083249696374e-06, "epoch": 2.387193706789143, "percentage": 47.74, "elapsed_time": "2:06:22", "remaining_time": "2:18:19", "throughput": 8686.38, "total_tokens": 65867816} +{"current_steps": 97720, "total_steps": 204665, "loss": 0.0005, "lr": 1.2505257677896887e-06, "epoch": 2.3873158576209903, "percentage": 47.75, "elapsed_time": "2:06:23", "remaining_time": "2:18:19", "throughput": 8686.44, "total_tokens": 65871272} +{"current_steps": 97725, "total_steps": 204665, "loss": 0.0002, "lr": 1.250443208787849e-06, "epoch": 2.3874380084528375, "percentage": 47.75, "elapsed_time": "2:06:23", "remaining_time": "2:18:18", "throughput": 8686.49, "total_tokens": 65874728} +{"current_steps": 97730, "total_steps": 204665, "loss": 0.0001, "lr": 1.2503606479647189e-06, "epoch": 2.3875601592846847, "percentage": 47.75, "elapsed_time": "2:06:23", "remaining_time": "2:18:18", "throughput": 8686.52, "total_tokens": 65877928} +{"current_steps": 97735, "total_steps": 204665, "loss": 0.1042, "lr": 1.2502780853208986e-06, "epoch": 2.387682310116532, "percentage": 47.75, "elapsed_time": "2:06:24", "remaining_time": "2:18:17", "throughput": 8686.62, "total_tokens": 65881768} +{"current_steps": 97740, "total_steps": 204665, "loss": 0.0326, "lr": 1.2501955208569887e-06, "epoch": 2.387804460948379, "percentage": 47.76, "elapsed_time": "2:06:24", "remaining_time": "2:18:17", "throughput": 8686.65, "total_tokens": 65885032} +{"current_steps": 97745, "total_steps": 204665, "loss": 0.0472, "lr": 1.25011295457359e-06, "epoch": 2.3879266117802262, "percentage": 47.76, "elapsed_time": "2:06:24", "remaining_time": "2:18:16", "throughput": 8686.71, "total_tokens": 65888552} +{"current_steps": 97750, "total_steps": 204665, "loss": 0.0001, "lr": 1.2500303864713027e-06, "epoch": 2.3880487626120734, "percentage": 47.76, "elapsed_time": "2:06:25", "remaining_time": "2:18:16", "throughput": 8686.72, "total_tokens": 65891560} +{"current_steps": 97755, "total_steps": 204665, "loss": 0.0461, "lr": 1.249947816550727e-06, "epoch": 2.3881709134439206, "percentage": 47.76, "elapsed_time": "2:06:25", "remaining_time": "2:18:16", "throughput": 8686.74, "total_tokens": 65894760} +{"current_steps": 97760, "total_steps": 204665, "loss": 0.0464, "lr": 1.2498652448124634e-06, "epoch": 2.388293064275768, "percentage": 47.77, "elapsed_time": "2:06:26", "remaining_time": "2:18:15", "throughput": 8686.76, "total_tokens": 65897832} +{"current_steps": 97765, "total_steps": 204665, "loss": 0.0001, "lr": 1.2497826712571126e-06, "epoch": 2.388415215107615, "percentage": 47.77, "elapsed_time": "2:06:26", "remaining_time": "2:18:15", "throughput": 8686.77, "total_tokens": 65900840} +{"current_steps": 97770, "total_steps": 204665, "loss": 0.0001, "lr": 1.2497000958852753e-06, "epoch": 2.388537365939462, "percentage": 47.77, "elapsed_time": "2:06:26", "remaining_time": "2:18:14", "throughput": 8686.8, "total_tokens": 65904104} +{"current_steps": 97775, "total_steps": 204665, "loss": 0.0938, "lr": 1.2496175186975514e-06, "epoch": 2.3886595167713094, "percentage": 47.77, "elapsed_time": "2:06:27", "remaining_time": "2:18:14", "throughput": 8686.86, "total_tokens": 65907560} +{"current_steps": 97780, "total_steps": 204665, "loss": 0.0753, "lr": 1.249534939694542e-06, "epoch": 2.3887816676031566, "percentage": 47.78, "elapsed_time": "2:06:27", "remaining_time": "2:18:13", "throughput": 8686.89, "total_tokens": 65910760} +{"current_steps": 97785, "total_steps": 204665, "loss": 0.0403, "lr": 1.2494523588768473e-06, "epoch": 2.3889038184350033, "percentage": 47.78, "elapsed_time": "2:06:27", "remaining_time": "2:18:13", "throughput": 8686.94, "total_tokens": 65914152} +{"current_steps": 97790, "total_steps": 204665, "loss": 0.0392, "lr": 1.2493697762450681e-06, "epoch": 2.3890259692668505, "percentage": 47.78, "elapsed_time": "2:06:28", "remaining_time": "2:18:13", "throughput": 8687.0, "total_tokens": 65917672} +{"current_steps": 97795, "total_steps": 204665, "loss": 0.0004, "lr": 1.2492871917998048e-06, "epoch": 2.3891481200986977, "percentage": 47.78, "elapsed_time": "2:06:28", "remaining_time": "2:18:12", "throughput": 8687.02, "total_tokens": 65920808} +{"current_steps": 97800, "total_steps": 204665, "loss": 0.0705, "lr": 1.2492046055416576e-06, "epoch": 2.389270270930545, "percentage": 47.79, "elapsed_time": "2:06:28", "remaining_time": "2:18:12", "throughput": 8687.05, "total_tokens": 65923944} +{"current_steps": 97805, "total_steps": 204665, "loss": 0.0001, "lr": 1.249122017471228e-06, "epoch": 2.389392421762392, "percentage": 47.79, "elapsed_time": "2:06:29", "remaining_time": "2:18:11", "throughput": 8687.09, "total_tokens": 65927272} +{"current_steps": 97810, "total_steps": 204665, "loss": 0.0001, "lr": 1.2490394275891159e-06, "epoch": 2.3895145725942393, "percentage": 47.79, "elapsed_time": "2:06:29", "remaining_time": "2:18:11", "throughput": 8687.1, "total_tokens": 65930280} +{"current_steps": 97815, "total_steps": 204665, "loss": 0.0004, "lr": 1.248956835895922e-06, "epoch": 2.3896367234260865, "percentage": 47.79, "elapsed_time": "2:06:29", "remaining_time": "2:18:10", "throughput": 8687.15, "total_tokens": 65933736} +{"current_steps": 97820, "total_steps": 204665, "loss": 0.1203, "lr": 1.2488742423922472e-06, "epoch": 2.3897588742579337, "percentage": 47.8, "elapsed_time": "2:06:30", "remaining_time": "2:18:10", "throughput": 8687.23, "total_tokens": 65937384} +{"current_steps": 97825, "total_steps": 204665, "loss": 0.0294, "lr": 1.2487916470786916e-06, "epoch": 2.389881025089781, "percentage": 47.8, "elapsed_time": "2:06:30", "remaining_time": "2:18:10", "throughput": 8687.3, "total_tokens": 65941032} +{"current_steps": 97830, "total_steps": 204665, "loss": 0.0692, "lr": 1.2487090499558563e-06, "epoch": 2.390003175921628, "percentage": 47.8, "elapsed_time": "2:06:30", "remaining_time": "2:18:09", "throughput": 8687.43, "total_tokens": 65945192} +{"current_steps": 97835, "total_steps": 204665, "loss": 0.0003, "lr": 1.248626451024342e-06, "epoch": 2.390125326753475, "percentage": 47.8, "elapsed_time": "2:06:31", "remaining_time": "2:18:09", "throughput": 8687.42, "total_tokens": 65948008} +{"current_steps": 97840, "total_steps": 204665, "loss": 0.0314, "lr": 1.2485438502847494e-06, "epoch": 2.3902474775853224, "percentage": 47.8, "elapsed_time": "2:06:31", "remaining_time": "2:18:08", "throughput": 8687.43, "total_tokens": 65951016} +{"current_steps": 97845, "total_steps": 204665, "loss": 0.0001, "lr": 1.248461247737679e-06, "epoch": 2.3903696284171696, "percentage": 47.81, "elapsed_time": "2:06:31", "remaining_time": "2:18:08", "throughput": 8687.49, "total_tokens": 65954600} +{"current_steps": 97850, "total_steps": 204665, "loss": 0.0002, "lr": 1.2483786433837319e-06, "epoch": 2.390491779249017, "percentage": 47.81, "elapsed_time": "2:06:32", "remaining_time": "2:18:07", "throughput": 8687.53, "total_tokens": 65957864} +{"current_steps": 97855, "total_steps": 204665, "loss": 0.1456, "lr": 1.2482960372235082e-06, "epoch": 2.390613930080864, "percentage": 47.81, "elapsed_time": "2:06:32", "remaining_time": "2:18:07", "throughput": 8687.58, "total_tokens": 65961320} +{"current_steps": 97860, "total_steps": 204665, "loss": 0.0886, "lr": 1.2482134292576088e-06, "epoch": 2.390736080912711, "percentage": 47.81, "elapsed_time": "2:06:32", "remaining_time": "2:18:06", "throughput": 8687.62, "total_tokens": 65964584} +{"current_steps": 97865, "total_steps": 204665, "loss": 0.0002, "lr": 1.2481308194866347e-06, "epoch": 2.3908582317445584, "percentage": 47.82, "elapsed_time": "2:06:33", "remaining_time": "2:18:06", "throughput": 8687.63, "total_tokens": 65967656} +{"current_steps": 97870, "total_steps": 204665, "loss": 0.0497, "lr": 1.2480482079111864e-06, "epoch": 2.390980382576405, "percentage": 47.82, "elapsed_time": "2:06:33", "remaining_time": "2:18:06", "throughput": 8687.65, "total_tokens": 65970728} +{"current_steps": 97875, "total_steps": 204665, "loss": 0.0006, "lr": 1.2479655945318652e-06, "epoch": 2.3911025334082527, "percentage": 47.82, "elapsed_time": "2:06:33", "remaining_time": "2:18:05", "throughput": 8687.72, "total_tokens": 65974312} +{"current_steps": 97880, "total_steps": 204665, "loss": 0.0443, "lr": 1.2478829793492712e-06, "epoch": 2.3912246842400995, "percentage": 47.82, "elapsed_time": "2:06:34", "remaining_time": "2:18:05", "throughput": 8687.78, "total_tokens": 65977768} +{"current_steps": 97885, "total_steps": 204665, "loss": 0.0368, "lr": 1.2478003623640056e-06, "epoch": 2.3913468350719467, "percentage": 47.83, "elapsed_time": "2:06:34", "remaining_time": "2:18:04", "throughput": 8687.82, "total_tokens": 65981096} +{"current_steps": 97890, "total_steps": 204665, "loss": 0.0628, "lr": 1.2477177435766687e-06, "epoch": 2.391468985903794, "percentage": 47.83, "elapsed_time": "2:06:35", "remaining_time": "2:18:04", "throughput": 8687.87, "total_tokens": 65984488} +{"current_steps": 97895, "total_steps": 204665, "loss": 0.0001, "lr": 1.2476351229878624e-06, "epoch": 2.391591136735641, "percentage": 47.83, "elapsed_time": "2:06:35", "remaining_time": "2:18:03", "throughput": 8687.93, "total_tokens": 65988008} +{"current_steps": 97900, "total_steps": 204665, "loss": 0.0003, "lr": 1.2475525005981867e-06, "epoch": 2.3917132875674882, "percentage": 47.83, "elapsed_time": "2:06:35", "remaining_time": "2:18:03", "throughput": 8687.94, "total_tokens": 65991016} +{"current_steps": 97905, "total_steps": 204665, "loss": 0.001, "lr": 1.2474698764082423e-06, "epoch": 2.3918354383993354, "percentage": 47.84, "elapsed_time": "2:06:36", "remaining_time": "2:18:03", "throughput": 8687.99, "total_tokens": 65994408} +{"current_steps": 97910, "total_steps": 204665, "loss": 0.0003, "lr": 1.2473872504186306e-06, "epoch": 2.3919575892311826, "percentage": 47.84, "elapsed_time": "2:06:36", "remaining_time": "2:18:02", "throughput": 8688.04, "total_tokens": 65997800} +{"current_steps": 97915, "total_steps": 204665, "loss": 0.0003, "lr": 1.2473046226299523e-06, "epoch": 2.39207974006303, "percentage": 47.84, "elapsed_time": "2:06:36", "remaining_time": "2:18:02", "throughput": 8688.12, "total_tokens": 66001512} +{"current_steps": 97920, "total_steps": 204665, "loss": 0.0561, "lr": 1.2472219930428086e-06, "epoch": 2.392201890894877, "percentage": 47.84, "elapsed_time": "2:06:37", "remaining_time": "2:18:01", "throughput": 8688.14, "total_tokens": 66004584} +{"current_steps": 97925, "total_steps": 204665, "loss": 0.0565, "lr": 1.2471393616577995e-06, "epoch": 2.392324041726724, "percentage": 47.85, "elapsed_time": "2:06:37", "remaining_time": "2:18:01", "throughput": 8688.2, "total_tokens": 66008104} +{"current_steps": 97930, "total_steps": 204665, "loss": 0.0005, "lr": 1.2470567284755267e-06, "epoch": 2.3924461925585714, "percentage": 47.85, "elapsed_time": "2:06:37", "remaining_time": "2:18:00", "throughput": 8688.25, "total_tokens": 66011496} +{"current_steps": 97935, "total_steps": 204665, "loss": 0.0514, "lr": 1.246974093496591e-06, "epoch": 2.3925683433904186, "percentage": 47.85, "elapsed_time": "2:06:38", "remaining_time": "2:18:00", "throughput": 8688.29, "total_tokens": 66014824} +{"current_steps": 97940, "total_steps": 204665, "loss": 0.0001, "lr": 1.2468914567215933e-06, "epoch": 2.3926904942222658, "percentage": 47.85, "elapsed_time": "2:06:38", "remaining_time": "2:18:00", "throughput": 8688.35, "total_tokens": 66018344} +{"current_steps": 97945, "total_steps": 204665, "loss": 0.0001, "lr": 1.2468088181511345e-06, "epoch": 2.392812645054113, "percentage": 47.86, "elapsed_time": "2:06:38", "remaining_time": "2:17:59", "throughput": 8688.34, "total_tokens": 66021160} +{"current_steps": 97950, "total_steps": 204665, "loss": 0.0004, "lr": 1.2467261777858156e-06, "epoch": 2.39293479588596, "percentage": 47.86, "elapsed_time": "2:06:39", "remaining_time": "2:17:59", "throughput": 8688.39, "total_tokens": 66024616} +{"current_steps": 97955, "total_steps": 204665, "loss": 0.0239, "lr": 1.2466435356262372e-06, "epoch": 2.3930569467178073, "percentage": 47.86, "elapsed_time": "2:06:39", "remaining_time": "2:17:58", "throughput": 8688.43, "total_tokens": 66027944} +{"current_steps": 97960, "total_steps": 204665, "loss": 0.1521, "lr": 1.246560891673001e-06, "epoch": 2.3931790975496545, "percentage": 47.86, "elapsed_time": "2:06:39", "remaining_time": "2:17:58", "throughput": 8688.45, "total_tokens": 66031080} +{"current_steps": 97965, "total_steps": 204665, "loss": 0.088, "lr": 1.2464782459267078e-06, "epoch": 2.3933012483815013, "percentage": 47.87, "elapsed_time": "2:06:40", "remaining_time": "2:17:57", "throughput": 8688.6, "total_tokens": 66035432} +{"current_steps": 97970, "total_steps": 204665, "loss": 0.0542, "lr": 1.2463955983879584e-06, "epoch": 2.3934233992133485, "percentage": 47.87, "elapsed_time": "2:06:40", "remaining_time": "2:17:57", "throughput": 8688.61, "total_tokens": 66038440} +{"current_steps": 97975, "total_steps": 204665, "loss": 0.0005, "lr": 1.2463129490573538e-06, "epoch": 2.3935455500451956, "percentage": 47.87, "elapsed_time": "2:06:40", "remaining_time": "2:17:57", "throughput": 8688.62, "total_tokens": 66041512} +{"current_steps": 97980, "total_steps": 204665, "loss": 0.0001, "lr": 1.2462302979354955e-06, "epoch": 2.393667700877043, "percentage": 47.87, "elapsed_time": "2:06:41", "remaining_time": "2:17:56", "throughput": 8688.66, "total_tokens": 66044840} +{"current_steps": 97985, "total_steps": 204665, "loss": 0.057, "lr": 1.2461476450229838e-06, "epoch": 2.39378985170889, "percentage": 47.88, "elapsed_time": "2:06:41", "remaining_time": "2:17:56", "throughput": 8688.71, "total_tokens": 66048296} +{"current_steps": 97990, "total_steps": 204665, "loss": 0.0002, "lr": 1.2460649903204204e-06, "epoch": 2.393912002540737, "percentage": 47.88, "elapsed_time": "2:06:41", "remaining_time": "2:17:55", "throughput": 8688.74, "total_tokens": 66051496} +{"current_steps": 97995, "total_steps": 204665, "loss": 0.0002, "lr": 1.245982333828406e-06, "epoch": 2.3940341533725844, "percentage": 47.88, "elapsed_time": "2:06:42", "remaining_time": "2:17:55", "throughput": 8688.84, "total_tokens": 66055400} +{"current_steps": 98000, "total_steps": 204665, "loss": 0.1055, "lr": 1.2458996755475424e-06, "epoch": 2.3941563042044316, "percentage": 47.88, "elapsed_time": "2:06:42", "remaining_time": "2:17:54", "throughput": 8688.88, "total_tokens": 66058728} +{"current_steps": 98005, "total_steps": 204665, "loss": 0.0003, "lr": 1.24581701547843e-06, "epoch": 2.394278455036279, "percentage": 47.89, "elapsed_time": "2:06:43", "remaining_time": "2:17:54", "throughput": 8688.92, "total_tokens": 66061992} +{"current_steps": 98010, "total_steps": 204665, "loss": 0.0459, "lr": 1.24573435362167e-06, "epoch": 2.394400605868126, "percentage": 47.89, "elapsed_time": "2:06:43", "remaining_time": "2:17:54", "throughput": 8689.0, "total_tokens": 66065704} +{"current_steps": 98015, "total_steps": 204665, "loss": 0.0003, "lr": 1.245651689977864e-06, "epoch": 2.394522756699973, "percentage": 47.89, "elapsed_time": "2:06:43", "remaining_time": "2:17:53", "throughput": 8689.04, "total_tokens": 66069032} +{"current_steps": 98020, "total_steps": 204665, "loss": 0.0468, "lr": 1.2455690245476126e-06, "epoch": 2.3946449075318204, "percentage": 47.89, "elapsed_time": "2:06:44", "remaining_time": "2:17:53", "throughput": 8689.04, "total_tokens": 66071912} +{"current_steps": 98025, "total_steps": 204665, "loss": 0.0004, "lr": 1.2454863573315174e-06, "epoch": 2.3947670583636675, "percentage": 47.9, "elapsed_time": "2:06:44", "remaining_time": "2:17:52", "throughput": 8689.1, "total_tokens": 66075432} +{"current_steps": 98030, "total_steps": 204665, "loss": 0.0001, "lr": 1.245403688330179e-06, "epoch": 2.3948892091955147, "percentage": 47.9, "elapsed_time": "2:06:44", "remaining_time": "2:17:52", "throughput": 8689.13, "total_tokens": 66078696} +{"current_steps": 98035, "total_steps": 204665, "loss": 0.1373, "lr": 1.2453210175441993e-06, "epoch": 2.395011360027362, "percentage": 47.9, "elapsed_time": "2:06:45", "remaining_time": "2:17:51", "throughput": 8689.21, "total_tokens": 66082408} +{"current_steps": 98040, "total_steps": 204665, "loss": 0.0438, "lr": 1.245238344974179e-06, "epoch": 2.395133510859209, "percentage": 47.9, "elapsed_time": "2:06:45", "remaining_time": "2:17:51", "throughput": 8689.22, "total_tokens": 66085416} +{"current_steps": 98045, "total_steps": 204665, "loss": 0.0366, "lr": 1.2451556706207194e-06, "epoch": 2.3952556616910563, "percentage": 47.91, "elapsed_time": "2:06:45", "remaining_time": "2:17:51", "throughput": 8689.28, "total_tokens": 66088936} +{"current_steps": 98050, "total_steps": 204665, "loss": 0.0444, "lr": 1.245072994484422e-06, "epoch": 2.395377812522903, "percentage": 47.91, "elapsed_time": "2:06:46", "remaining_time": "2:17:50", "throughput": 8689.37, "total_tokens": 66092648} +{"current_steps": 98055, "total_steps": 204665, "loss": 0.0002, "lr": 1.2449903165658879e-06, "epoch": 2.3954999633547507, "percentage": 47.91, "elapsed_time": "2:06:46", "remaining_time": "2:17:50", "throughput": 8689.45, "total_tokens": 66096296} +{"current_steps": 98060, "total_steps": 204665, "loss": 0.0005, "lr": 1.2449076368657184e-06, "epoch": 2.3956221141865974, "percentage": 47.91, "elapsed_time": "2:06:46", "remaining_time": "2:17:49", "throughput": 8689.56, "total_tokens": 66100264} +{"current_steps": 98065, "total_steps": 204665, "loss": 0.1342, "lr": 1.2448249553845146e-06, "epoch": 2.3957442650184446, "percentage": 47.91, "elapsed_time": "2:06:47", "remaining_time": "2:17:49", "throughput": 8689.6, "total_tokens": 66103592} +{"current_steps": 98070, "total_steps": 204665, "loss": 0.0447, "lr": 1.2447422721228777e-06, "epoch": 2.395866415850292, "percentage": 47.92, "elapsed_time": "2:06:47", "remaining_time": "2:17:48", "throughput": 8689.65, "total_tokens": 66107048} +{"current_steps": 98075, "total_steps": 204665, "loss": 0.043, "lr": 1.2446595870814096e-06, "epoch": 2.395988566682139, "percentage": 47.92, "elapsed_time": "2:06:47", "remaining_time": "2:17:48", "throughput": 8689.7, "total_tokens": 66110440} +{"current_steps": 98080, "total_steps": 204665, "loss": 0.0005, "lr": 1.2445769002607108e-06, "epoch": 2.396110717513986, "percentage": 47.92, "elapsed_time": "2:06:48", "remaining_time": "2:17:47", "throughput": 8689.7, "total_tokens": 66113448} +{"current_steps": 98085, "total_steps": 204665, "loss": 0.0386, "lr": 1.244494211661383e-06, "epoch": 2.3962328683458334, "percentage": 47.92, "elapsed_time": "2:06:48", "remaining_time": "2:17:47", "throughput": 8689.75, "total_tokens": 66116776} +{"current_steps": 98090, "total_steps": 204665, "loss": 0.0002, "lr": 1.2444115212840276e-06, "epoch": 2.3963550191776806, "percentage": 47.93, "elapsed_time": "2:06:48", "remaining_time": "2:17:47", "throughput": 8689.78, "total_tokens": 66119976} +{"current_steps": 98095, "total_steps": 204665, "loss": 0.1071, "lr": 1.244328829129246e-06, "epoch": 2.3964771700095278, "percentage": 47.93, "elapsed_time": "2:06:49", "remaining_time": "2:17:46", "throughput": 8689.83, "total_tokens": 66123496} +{"current_steps": 98100, "total_steps": 204665, "loss": 0.0257, "lr": 1.2442461351976395e-06, "epoch": 2.396599320841375, "percentage": 47.93, "elapsed_time": "2:06:49", "remaining_time": "2:17:46", "throughput": 8689.96, "total_tokens": 66127592} +{"current_steps": 98105, "total_steps": 204665, "loss": 0.0559, "lr": 1.244163439489809e-06, "epoch": 2.396721471673222, "percentage": 47.93, "elapsed_time": "2:06:50", "remaining_time": "2:17:45", "throughput": 8690.01, "total_tokens": 66131048} +{"current_steps": 98110, "total_steps": 204665, "loss": 0.0539, "lr": 1.2440807420063565e-06, "epoch": 2.3968436225050693, "percentage": 47.94, "elapsed_time": "2:06:50", "remaining_time": "2:17:45", "throughput": 8690.06, "total_tokens": 66134376} +{"current_steps": 98115, "total_steps": 204665, "loss": 0.0912, "lr": 1.2439980427478833e-06, "epoch": 2.3969657733369165, "percentage": 47.94, "elapsed_time": "2:06:50", "remaining_time": "2:17:44", "throughput": 8690.07, "total_tokens": 66137448} +{"current_steps": 98120, "total_steps": 204665, "loss": 0.0001, "lr": 1.2439153417149908e-06, "epoch": 2.3970879241687637, "percentage": 47.94, "elapsed_time": "2:06:51", "remaining_time": "2:17:44", "throughput": 8690.16, "total_tokens": 66141160} +{"current_steps": 98125, "total_steps": 204665, "loss": 0.0504, "lr": 1.2438326389082803e-06, "epoch": 2.397210075000611, "percentage": 47.94, "elapsed_time": "2:06:51", "remaining_time": "2:17:44", "throughput": 8690.19, "total_tokens": 66144424} +{"current_steps": 98130, "total_steps": 204665, "loss": 0.0387, "lr": 1.243749934328353e-06, "epoch": 2.397332225832458, "percentage": 47.95, "elapsed_time": "2:06:51", "remaining_time": "2:17:43", "throughput": 8690.28, "total_tokens": 66148136} +{"current_steps": 98135, "total_steps": 204665, "loss": 0.0435, "lr": 1.2436672279758108e-06, "epoch": 2.3974543766643053, "percentage": 47.95, "elapsed_time": "2:06:52", "remaining_time": "2:17:43", "throughput": 8690.29, "total_tokens": 66151208} +{"current_steps": 98140, "total_steps": 204665, "loss": 0.063, "lr": 1.2435845198512547e-06, "epoch": 2.3975765274961525, "percentage": 47.95, "elapsed_time": "2:06:52", "remaining_time": "2:17:42", "throughput": 8690.35, "total_tokens": 66154728} +{"current_steps": 98145, "total_steps": 204665, "loss": 0.0013, "lr": 1.2435018099552867e-06, "epoch": 2.397698678327999, "percentage": 47.95, "elapsed_time": "2:06:52", "remaining_time": "2:17:42", "throughput": 8690.39, "total_tokens": 66157992} +{"current_steps": 98150, "total_steps": 204665, "loss": 0.0432, "lr": 1.2434190982885082e-06, "epoch": 2.3978208291598464, "percentage": 47.96, "elapsed_time": "2:06:53", "remaining_time": "2:17:41", "throughput": 8690.46, "total_tokens": 66161640} +{"current_steps": 98155, "total_steps": 204665, "loss": 0.0003, "lr": 1.2433363848515204e-06, "epoch": 2.3979429799916936, "percentage": 47.96, "elapsed_time": "2:06:53", "remaining_time": "2:17:41", "throughput": 8690.49, "total_tokens": 66164840} +{"current_steps": 98160, "total_steps": 204665, "loss": 0.0707, "lr": 1.243253669644925e-06, "epoch": 2.398065130823541, "percentage": 47.96, "elapsed_time": "2:06:53", "remaining_time": "2:17:41", "throughput": 8690.54, "total_tokens": 66168296} +{"current_steps": 98165, "total_steps": 204665, "loss": 0.0504, "lr": 1.2431709526693234e-06, "epoch": 2.398187281655388, "percentage": 47.96, "elapsed_time": "2:06:54", "remaining_time": "2:17:40", "throughput": 8690.58, "total_tokens": 66171624} +{"current_steps": 98170, "total_steps": 204665, "loss": 0.0005, "lr": 1.2430882339253172e-06, "epoch": 2.398309432487235, "percentage": 47.97, "elapsed_time": "2:06:54", "remaining_time": "2:17:40", "throughput": 8690.64, "total_tokens": 66175080} +{"current_steps": 98175, "total_steps": 204665, "loss": 0.0003, "lr": 1.243005513413508e-06, "epoch": 2.3984315833190823, "percentage": 47.97, "elapsed_time": "2:06:54", "remaining_time": "2:17:39", "throughput": 8690.69, "total_tokens": 66178536} +{"current_steps": 98180, "total_steps": 204665, "loss": 0.0017, "lr": 1.2429227911344976e-06, "epoch": 2.3985537341509295, "percentage": 47.97, "elapsed_time": "2:06:55", "remaining_time": "2:17:39", "throughput": 8690.74, "total_tokens": 66181928} +{"current_steps": 98185, "total_steps": 204665, "loss": 0.0002, "lr": 1.242840067088887e-06, "epoch": 2.3986758849827767, "percentage": 47.97, "elapsed_time": "2:06:55", "remaining_time": "2:17:38", "throughput": 8690.77, "total_tokens": 66185192} +{"current_steps": 98190, "total_steps": 204665, "loss": 0.0319, "lr": 1.2427573412772783e-06, "epoch": 2.398798035814624, "percentage": 47.98, "elapsed_time": "2:06:55", "remaining_time": "2:17:38", "throughput": 8690.81, "total_tokens": 66188456} +{"current_steps": 98195, "total_steps": 204665, "loss": 0.0581, "lr": 1.2426746137002727e-06, "epoch": 2.398920186646471, "percentage": 47.98, "elapsed_time": "2:06:56", "remaining_time": "2:17:38", "throughput": 8690.85, "total_tokens": 66191784} +{"current_steps": 98200, "total_steps": 204665, "loss": 0.0005, "lr": 1.2425918843584721e-06, "epoch": 2.3990423374783183, "percentage": 47.98, "elapsed_time": "2:06:56", "remaining_time": "2:17:37", "throughput": 8690.88, "total_tokens": 66194984} +{"current_steps": 98205, "total_steps": 204665, "loss": 0.0467, "lr": 1.2425091532524783e-06, "epoch": 2.3991644883101655, "percentage": 47.98, "elapsed_time": "2:06:56", "remaining_time": "2:17:37", "throughput": 8690.9, "total_tokens": 66198120} +{"current_steps": 98210, "total_steps": 204665, "loss": 0.0259, "lr": 1.2424264203828924e-06, "epoch": 2.3992866391420127, "percentage": 47.99, "elapsed_time": "2:06:57", "remaining_time": "2:17:36", "throughput": 8690.98, "total_tokens": 66201832} +{"current_steps": 98215, "total_steps": 204665, "loss": 0.0009, "lr": 1.2423436857503167e-06, "epoch": 2.39940878997386, "percentage": 47.99, "elapsed_time": "2:06:57", "remaining_time": "2:17:36", "throughput": 8691.05, "total_tokens": 66205480} +{"current_steps": 98220, "total_steps": 204665, "loss": 0.0456, "lr": 1.2422609493553522e-06, "epoch": 2.399530940805707, "percentage": 47.99, "elapsed_time": "2:06:58", "remaining_time": "2:17:35", "throughput": 8691.07, "total_tokens": 66208616} +{"current_steps": 98225, "total_steps": 204665, "loss": 0.0009, "lr": 1.2421782111986013e-06, "epoch": 2.3996530916375542, "percentage": 47.99, "elapsed_time": "2:06:58", "remaining_time": "2:17:35", "throughput": 8691.14, "total_tokens": 66212200} +{"current_steps": 98230, "total_steps": 204665, "loss": 0.0394, "lr": 1.2420954712806653e-06, "epoch": 2.399775242469401, "percentage": 48.0, "elapsed_time": "2:06:58", "remaining_time": "2:17:35", "throughput": 8691.17, "total_tokens": 66215400} +{"current_steps": 98235, "total_steps": 204665, "loss": 0.0604, "lr": 1.2420127296021454e-06, "epoch": 2.3998973933012486, "percentage": 48.0, "elapsed_time": "2:06:59", "remaining_time": "2:17:34", "throughput": 8691.2, "total_tokens": 66218664} +{"current_steps": 98240, "total_steps": 204665, "loss": 0.0012, "lr": 1.241929986163644e-06, "epoch": 2.4000195441330954, "percentage": 48.0, "elapsed_time": "2:06:59", "remaining_time": "2:17:34", "throughput": 8691.21, "total_tokens": 66221672} +{"current_steps": 98245, "total_steps": 204665, "loss": 0.0004, "lr": 1.241847240965763e-06, "epoch": 2.4001416949649426, "percentage": 48.0, "elapsed_time": "2:06:59", "remaining_time": "2:17:33", "throughput": 8691.25, "total_tokens": 66225064} +{"current_steps": 98250, "total_steps": 204665, "loss": 0.0325, "lr": 1.2417644940091036e-06, "epoch": 2.4002638457967898, "percentage": 48.01, "elapsed_time": "2:07:00", "remaining_time": "2:17:33", "throughput": 8691.27, "total_tokens": 66228200} +{"current_steps": 98255, "total_steps": 204665, "loss": 0.0371, "lr": 1.2416817452942678e-06, "epoch": 2.400385996628637, "percentage": 48.01, "elapsed_time": "2:07:00", "remaining_time": "2:17:32", "throughput": 8691.31, "total_tokens": 66231464} +{"current_steps": 98260, "total_steps": 204665, "loss": 0.0214, "lr": 1.2415989948218575e-06, "epoch": 2.400508147460484, "percentage": 48.01, "elapsed_time": "2:07:00", "remaining_time": "2:17:32", "throughput": 8691.37, "total_tokens": 66234984} +{"current_steps": 98265, "total_steps": 204665, "loss": 0.0574, "lr": 1.2415162425924739e-06, "epoch": 2.4006302982923313, "percentage": 48.01, "elapsed_time": "2:07:01", "remaining_time": "2:17:32", "throughput": 8691.43, "total_tokens": 66238504} +{"current_steps": 98270, "total_steps": 204665, "loss": 0.0002, "lr": 1.2414334886067196e-06, "epoch": 2.4007524491241785, "percentage": 48.02, "elapsed_time": "2:07:01", "remaining_time": "2:17:31", "throughput": 8691.45, "total_tokens": 66241640} +{"current_steps": 98275, "total_steps": 204665, "loss": 0.0002, "lr": 1.241350732865196e-06, "epoch": 2.4008745999560257, "percentage": 48.02, "elapsed_time": "2:07:01", "remaining_time": "2:17:31", "throughput": 8691.5, "total_tokens": 66245096} +{"current_steps": 98280, "total_steps": 204665, "loss": 0.0002, "lr": 1.241267975368505e-06, "epoch": 2.400996750787873, "percentage": 48.02, "elapsed_time": "2:07:02", "remaining_time": "2:17:30", "throughput": 8691.53, "total_tokens": 66248296} +{"current_steps": 98285, "total_steps": 204665, "loss": 0.0999, "lr": 1.2411852161172482e-06, "epoch": 2.40111890161972, "percentage": 48.02, "elapsed_time": "2:07:02", "remaining_time": "2:17:30", "throughput": 8691.59, "total_tokens": 66251752} +{"current_steps": 98290, "total_steps": 204665, "loss": 0.0002, "lr": 1.2411024551120277e-06, "epoch": 2.4012410524515673, "percentage": 48.02, "elapsed_time": "2:07:02", "remaining_time": "2:17:29", "throughput": 8691.63, "total_tokens": 66255144} +{"current_steps": 98295, "total_steps": 204665, "loss": 0.0001, "lr": 1.2410196923534454e-06, "epoch": 2.4013632032834145, "percentage": 48.03, "elapsed_time": "2:07:03", "remaining_time": "2:17:29", "throughput": 8691.7, "total_tokens": 66258664} +{"current_steps": 98300, "total_steps": 204665, "loss": 0.0864, "lr": 1.2409369278421026e-06, "epoch": 2.4014853541152617, "percentage": 48.03, "elapsed_time": "2:07:03", "remaining_time": "2:17:29", "throughput": 8691.77, "total_tokens": 66262312} +{"current_steps": 98305, "total_steps": 204665, "loss": 0.0399, "lr": 1.2408541615786022e-06, "epoch": 2.401607504947109, "percentage": 48.03, "elapsed_time": "2:07:03", "remaining_time": "2:17:28", "throughput": 8691.83, "total_tokens": 66265832} +{"current_steps": 98310, "total_steps": 204665, "loss": 0.0001, "lr": 1.2407713935635453e-06, "epoch": 2.401729655778956, "percentage": 48.03, "elapsed_time": "2:07:04", "remaining_time": "2:17:28", "throughput": 8691.85, "total_tokens": 66268968} +{"current_steps": 98315, "total_steps": 204665, "loss": 0.0456, "lr": 1.2406886237975342e-06, "epoch": 2.4018518066108028, "percentage": 48.04, "elapsed_time": "2:07:04", "remaining_time": "2:17:27", "throughput": 8691.86, "total_tokens": 66272104} +{"current_steps": 98320, "total_steps": 204665, "loss": 0.0015, "lr": 1.240605852281171e-06, "epoch": 2.4019739574426504, "percentage": 48.04, "elapsed_time": "2:07:04", "remaining_time": "2:17:27", "throughput": 8691.94, "total_tokens": 66275752} +{"current_steps": 98325, "total_steps": 204665, "loss": 0.0334, "lr": 1.2405230790150566e-06, "epoch": 2.402096108274497, "percentage": 48.04, "elapsed_time": "2:07:05", "remaining_time": "2:17:26", "throughput": 8691.99, "total_tokens": 66279144} +{"current_steps": 98330, "total_steps": 204665, "loss": 0.1057, "lr": 1.240440303999794e-06, "epoch": 2.4022182591063443, "percentage": 48.04, "elapsed_time": "2:07:05", "remaining_time": "2:17:26", "throughput": 8692.02, "total_tokens": 66282408} +{"current_steps": 98335, "total_steps": 204665, "loss": 0.0004, "lr": 1.2403575272359853e-06, "epoch": 2.4023404099381915, "percentage": 48.05, "elapsed_time": "2:07:06", "remaining_time": "2:17:26", "throughput": 8692.08, "total_tokens": 66285864} +{"current_steps": 98340, "total_steps": 204665, "loss": 0.0415, "lr": 1.2402747487242313e-06, "epoch": 2.4024625607700387, "percentage": 48.05, "elapsed_time": "2:07:06", "remaining_time": "2:17:25", "throughput": 8692.08, "total_tokens": 66288872} +{"current_steps": 98345, "total_steps": 204665, "loss": 0.0706, "lr": 1.240191968465135e-06, "epoch": 2.402584711601886, "percentage": 48.05, "elapsed_time": "2:07:06", "remaining_time": "2:17:25", "throughput": 8692.13, "total_tokens": 66292264} +{"current_steps": 98350, "total_steps": 204665, "loss": 0.0002, "lr": 1.2401091864592984e-06, "epoch": 2.402706862433733, "percentage": 48.05, "elapsed_time": "2:07:07", "remaining_time": "2:17:24", "throughput": 8692.17, "total_tokens": 66295592} +{"current_steps": 98355, "total_steps": 204665, "loss": 0.0002, "lr": 1.2400264027073227e-06, "epoch": 2.4028290132655803, "percentage": 48.06, "elapsed_time": "2:07:07", "remaining_time": "2:17:24", "throughput": 8692.2, "total_tokens": 66298792} +{"current_steps": 98360, "total_steps": 204665, "loss": 0.0626, "lr": 1.2399436172098106e-06, "epoch": 2.4029511640974275, "percentage": 48.06, "elapsed_time": "2:07:07", "remaining_time": "2:17:23", "throughput": 8692.26, "total_tokens": 66302312} +{"current_steps": 98365, "total_steps": 204665, "loss": 0.0002, "lr": 1.239860829967364e-06, "epoch": 2.4030733149292747, "percentage": 48.06, "elapsed_time": "2:07:08", "remaining_time": "2:17:23", "throughput": 8692.31, "total_tokens": 66305704} +{"current_steps": 98370, "total_steps": 204665, "loss": 0.0761, "lr": 1.239778040980585e-06, "epoch": 2.403195465761122, "percentage": 48.06, "elapsed_time": "2:07:08", "remaining_time": "2:17:23", "throughput": 8692.43, "total_tokens": 66309800} +{"current_steps": 98375, "total_steps": 204665, "loss": 0.0349, "lr": 1.2396952502500756e-06, "epoch": 2.403317616592969, "percentage": 48.07, "elapsed_time": "2:07:08", "remaining_time": "2:17:22", "throughput": 8692.46, "total_tokens": 66313064} +{"current_steps": 98380, "total_steps": 204665, "loss": 0.0001, "lr": 1.2396124577764378e-06, "epoch": 2.4034397674248162, "percentage": 48.07, "elapsed_time": "2:07:09", "remaining_time": "2:17:22", "throughput": 8692.49, "total_tokens": 66316264} +{"current_steps": 98385, "total_steps": 204665, "loss": 0.0003, "lr": 1.239529663560274e-06, "epoch": 2.4035619182566634, "percentage": 48.07, "elapsed_time": "2:07:09", "remaining_time": "2:17:21", "throughput": 8692.54, "total_tokens": 66319656} +{"current_steps": 98390, "total_steps": 204665, "loss": 0.0759, "lr": 1.2394468676021856e-06, "epoch": 2.4036840690885106, "percentage": 48.07, "elapsed_time": "2:07:09", "remaining_time": "2:17:21", "throughput": 8692.59, "total_tokens": 66323112} +{"current_steps": 98395, "total_steps": 204665, "loss": 0.0001, "lr": 1.2393640699027757e-06, "epoch": 2.403806219920358, "percentage": 48.08, "elapsed_time": "2:07:10", "remaining_time": "2:17:20", "throughput": 8692.63, "total_tokens": 66326376} +{"current_steps": 98400, "total_steps": 204665, "loss": 0.0869, "lr": 1.2392812704626453e-06, "epoch": 2.403928370752205, "percentage": 48.08, "elapsed_time": "2:07:10", "remaining_time": "2:17:20", "throughput": 8692.68, "total_tokens": 66329832} +{"current_steps": 98405, "total_steps": 204665, "loss": 0.0004, "lr": 1.2391984692823976e-06, "epoch": 2.404050521584052, "percentage": 48.08, "elapsed_time": "2:07:10", "remaining_time": "2:17:20", "throughput": 8692.72, "total_tokens": 66333160} +{"current_steps": 98410, "total_steps": 204665, "loss": 0.0592, "lr": 1.2391156663626343e-06, "epoch": 2.404172672415899, "percentage": 48.08, "elapsed_time": "2:07:11", "remaining_time": "2:17:19", "throughput": 8692.72, "total_tokens": 66336104} +{"current_steps": 98415, "total_steps": 204665, "loss": 0.0003, "lr": 1.2390328617039574e-06, "epoch": 2.404294823247746, "percentage": 48.09, "elapsed_time": "2:07:11", "remaining_time": "2:17:19", "throughput": 8692.78, "total_tokens": 66339560} +{"current_steps": 98420, "total_steps": 204665, "loss": 0.0004, "lr": 1.238950055306969e-06, "epoch": 2.4044169740795933, "percentage": 48.09, "elapsed_time": "2:07:11", "remaining_time": "2:17:18", "throughput": 8692.82, "total_tokens": 66342888} +{"current_steps": 98425, "total_steps": 204665, "loss": 0.0005, "lr": 1.2388672471722719e-06, "epoch": 2.4045391249114405, "percentage": 48.09, "elapsed_time": "2:07:12", "remaining_time": "2:17:18", "throughput": 8692.87, "total_tokens": 66346344} +{"current_steps": 98430, "total_steps": 204665, "loss": 0.0001, "lr": 1.238784437300468e-06, "epoch": 2.4046612757432877, "percentage": 48.09, "elapsed_time": "2:07:12", "remaining_time": "2:17:17", "throughput": 8692.89, "total_tokens": 66349480} +{"current_steps": 98435, "total_steps": 204665, "loss": 0.0003, "lr": 1.2387016256921593e-06, "epoch": 2.404783426575135, "percentage": 48.1, "elapsed_time": "2:07:12", "remaining_time": "2:17:17", "throughput": 8692.98, "total_tokens": 66353256} +{"current_steps": 98440, "total_steps": 204665, "loss": 0.0448, "lr": 1.2386188123479482e-06, "epoch": 2.404905577406982, "percentage": 48.1, "elapsed_time": "2:07:13", "remaining_time": "2:17:16", "throughput": 8693.01, "total_tokens": 66356520} +{"current_steps": 98445, "total_steps": 204665, "loss": 0.03, "lr": 1.238535997268437e-06, "epoch": 2.4050277282388293, "percentage": 48.1, "elapsed_time": "2:07:13", "remaining_time": "2:17:16", "throughput": 8693.06, "total_tokens": 66359912} +{"current_steps": 98450, "total_steps": 204665, "loss": 0.0022, "lr": 1.2384531804542272e-06, "epoch": 2.4051498790706765, "percentage": 48.1, "elapsed_time": "2:07:14", "remaining_time": "2:17:16", "throughput": 8693.1, "total_tokens": 66363176} +{"current_steps": 98455, "total_steps": 204665, "loss": 0.0555, "lr": 1.2383703619059225e-06, "epoch": 2.4052720299025236, "percentage": 48.11, "elapsed_time": "2:07:14", "remaining_time": "2:17:15", "throughput": 8693.14, "total_tokens": 66366504} +{"current_steps": 98460, "total_steps": 204665, "loss": 0.0763, "lr": 1.238287541624124e-06, "epoch": 2.405394180734371, "percentage": 48.11, "elapsed_time": "2:07:14", "remaining_time": "2:17:15", "throughput": 8693.2, "total_tokens": 66370024} +{"current_steps": 98465, "total_steps": 204665, "loss": 0.0281, "lr": 1.2382047196094348e-06, "epoch": 2.405516331566218, "percentage": 48.11, "elapsed_time": "2:07:15", "remaining_time": "2:17:14", "throughput": 8693.21, "total_tokens": 66373096} +{"current_steps": 98470, "total_steps": 204665, "loss": 0.0863, "lr": 1.2381218958624565e-06, "epoch": 2.405638482398065, "percentage": 48.11, "elapsed_time": "2:07:15", "remaining_time": "2:17:14", "throughput": 8693.23, "total_tokens": 66376168} +{"current_steps": 98475, "total_steps": 204665, "loss": 0.0351, "lr": 1.238039070383792e-06, "epoch": 2.4057606332299124, "percentage": 48.12, "elapsed_time": "2:07:15", "remaining_time": "2:17:13", "throughput": 8693.29, "total_tokens": 66379688} +{"current_steps": 98480, "total_steps": 204665, "loss": 0.0002, "lr": 1.237956243174043e-06, "epoch": 2.4058827840617596, "percentage": 48.12, "elapsed_time": "2:07:16", "remaining_time": "2:17:13", "throughput": 8693.33, "total_tokens": 66383016} +{"current_steps": 98485, "total_steps": 204665, "loss": 0.0001, "lr": 1.2378734142338126e-06, "epoch": 2.406004934893607, "percentage": 48.12, "elapsed_time": "2:07:16", "remaining_time": "2:17:13", "throughput": 8693.35, "total_tokens": 66386216} +{"current_steps": 98490, "total_steps": 204665, "loss": 0.0553, "lr": 1.2377905835637024e-06, "epoch": 2.406127085725454, "percentage": 48.12, "elapsed_time": "2:07:16", "remaining_time": "2:17:12", "throughput": 8693.4, "total_tokens": 66389608} +{"current_steps": 98495, "total_steps": 204665, "loss": 0.0818, "lr": 1.2377077511643152e-06, "epoch": 2.4062492365573007, "percentage": 48.12, "elapsed_time": "2:07:17", "remaining_time": "2:17:12", "throughput": 8693.4, "total_tokens": 66392552} +{"current_steps": 98500, "total_steps": 204665, "loss": 0.0648, "lr": 1.2376249170362533e-06, "epoch": 2.4063713873891484, "percentage": 48.13, "elapsed_time": "2:07:17", "remaining_time": "2:17:11", "throughput": 8693.42, "total_tokens": 66395624} +{"current_steps": 98505, "total_steps": 204665, "loss": 0.0115, "lr": 1.237542081180119e-06, "epoch": 2.406493538220995, "percentage": 48.13, "elapsed_time": "2:07:17", "remaining_time": "2:17:11", "throughput": 8693.48, "total_tokens": 66399144} +{"current_steps": 98510, "total_steps": 204665, "loss": 0.0002, "lr": 1.2374592435965152e-06, "epoch": 2.4066156890528423, "percentage": 48.13, "elapsed_time": "2:07:18", "remaining_time": "2:17:10", "throughput": 8693.52, "total_tokens": 66402472} +{"current_steps": 98515, "total_steps": 204665, "loss": 0.0815, "lr": 1.2373764042860434e-06, "epoch": 2.4067378398846895, "percentage": 48.13, "elapsed_time": "2:07:18", "remaining_time": "2:17:10", "throughput": 8693.58, "total_tokens": 66406056} +{"current_steps": 98520, "total_steps": 204665, "loss": 0.0011, "lr": 1.2372935632493068e-06, "epoch": 2.4068599907165367, "percentage": 48.14, "elapsed_time": "2:07:18", "remaining_time": "2:17:10", "throughput": 8693.65, "total_tokens": 66409640} +{"current_steps": 98525, "total_steps": 204665, "loss": 0.0797, "lr": 1.2372107204869075e-06, "epoch": 2.406982141548384, "percentage": 48.14, "elapsed_time": "2:07:19", "remaining_time": "2:17:09", "throughput": 8693.67, "total_tokens": 66412776} +{"current_steps": 98530, "total_steps": 204665, "loss": 0.1063, "lr": 1.237127875999448e-06, "epoch": 2.407104292380231, "percentage": 48.14, "elapsed_time": "2:07:19", "remaining_time": "2:17:09", "throughput": 8693.75, "total_tokens": 66416424} +{"current_steps": 98535, "total_steps": 204665, "loss": 0.0633, "lr": 1.2370450297875312e-06, "epoch": 2.4072264432120782, "percentage": 48.14, "elapsed_time": "2:07:19", "remaining_time": "2:17:08", "throughput": 8693.8, "total_tokens": 66419880} +{"current_steps": 98540, "total_steps": 204665, "loss": 0.0002, "lr": 1.236962181851759e-06, "epoch": 2.4073485940439254, "percentage": 48.15, "elapsed_time": "2:07:20", "remaining_time": "2:17:08", "throughput": 8693.83, "total_tokens": 66423080} +{"current_steps": 98545, "total_steps": 204665, "loss": 0.0003, "lr": 1.2368793321927338e-06, "epoch": 2.4074707448757726, "percentage": 48.15, "elapsed_time": "2:07:20", "remaining_time": "2:17:07", "throughput": 8693.83, "total_tokens": 66426088} +{"current_steps": 98550, "total_steps": 204665, "loss": 0.0004, "lr": 1.2367964808110585e-06, "epoch": 2.40759289570762, "percentage": 48.15, "elapsed_time": "2:07:20", "remaining_time": "2:17:07", "throughput": 8693.91, "total_tokens": 66429736} +{"current_steps": 98555, "total_steps": 204665, "loss": 0.0002, "lr": 1.2367136277073358e-06, "epoch": 2.407715046539467, "percentage": 48.15, "elapsed_time": "2:07:21", "remaining_time": "2:17:07", "throughput": 8694.0, "total_tokens": 66433576} +{"current_steps": 98560, "total_steps": 204665, "loss": 0.0003, "lr": 1.2366307728821676e-06, "epoch": 2.407837197371314, "percentage": 48.16, "elapsed_time": "2:07:21", "remaining_time": "2:17:06", "throughput": 8694.04, "total_tokens": 66436904} +{"current_steps": 98565, "total_steps": 204665, "loss": 0.1034, "lr": 1.236547916336157e-06, "epoch": 2.4079593482031614, "percentage": 48.16, "elapsed_time": "2:07:22", "remaining_time": "2:17:06", "throughput": 8694.13, "total_tokens": 66440616} +{"current_steps": 98570, "total_steps": 204665, "loss": 0.0001, "lr": 1.236465058069906e-06, "epoch": 2.4080814990350086, "percentage": 48.16, "elapsed_time": "2:07:22", "remaining_time": "2:17:05", "throughput": 8694.16, "total_tokens": 66443816} +{"current_steps": 98575, "total_steps": 204665, "loss": 0.0004, "lr": 1.2363821980840173e-06, "epoch": 2.4082036498668558, "percentage": 48.16, "elapsed_time": "2:07:22", "remaining_time": "2:17:05", "throughput": 8694.22, "total_tokens": 66447400} +{"current_steps": 98580, "total_steps": 204665, "loss": 0.049, "lr": 1.2362993363790943e-06, "epoch": 2.408325800698703, "percentage": 48.17, "elapsed_time": "2:07:23", "remaining_time": "2:17:04", "throughput": 8694.29, "total_tokens": 66450984} +{"current_steps": 98585, "total_steps": 204665, "loss": 0.0301, "lr": 1.236216472955739e-06, "epoch": 2.40844795153055, "percentage": 48.17, "elapsed_time": "2:07:23", "remaining_time": "2:17:04", "throughput": 8694.29, "total_tokens": 66453928} +{"current_steps": 98590, "total_steps": 204665, "loss": 0.0167, "lr": 1.2361336078145536e-06, "epoch": 2.408570102362397, "percentage": 48.17, "elapsed_time": "2:07:23", "remaining_time": "2:17:04", "throughput": 8694.31, "total_tokens": 66457000} +{"current_steps": 98595, "total_steps": 204665, "loss": 0.0003, "lr": 1.2360507409561413e-06, "epoch": 2.408692253194244, "percentage": 48.17, "elapsed_time": "2:07:24", "remaining_time": "2:17:03", "throughput": 8694.32, "total_tokens": 66460008} +{"current_steps": 98600, "total_steps": 204665, "loss": 0.0003, "lr": 1.2359678723811045e-06, "epoch": 2.4088144040260913, "percentage": 48.18, "elapsed_time": "2:07:24", "remaining_time": "2:17:03", "throughput": 8694.34, "total_tokens": 66463144} +{"current_steps": 98605, "total_steps": 204665, "loss": 0.0982, "lr": 1.2358850020900454e-06, "epoch": 2.4089365548579385, "percentage": 48.18, "elapsed_time": "2:07:24", "remaining_time": "2:17:02", "throughput": 8694.34, "total_tokens": 66466024} +{"current_steps": 98610, "total_steps": 204665, "loss": 0.0003, "lr": 1.2358021300835676e-06, "epoch": 2.4090587056897856, "percentage": 48.18, "elapsed_time": "2:07:25", "remaining_time": "2:17:02", "throughput": 8694.37, "total_tokens": 66469224} +{"current_steps": 98615, "total_steps": 204665, "loss": 0.0229, "lr": 1.235719256362273e-06, "epoch": 2.409180856521633, "percentage": 48.18, "elapsed_time": "2:07:25", "remaining_time": "2:17:01", "throughput": 8694.42, "total_tokens": 66472744} +{"current_steps": 98620, "total_steps": 204665, "loss": 0.1211, "lr": 1.235636380926765e-06, "epoch": 2.40930300735348, "percentage": 48.19, "elapsed_time": "2:07:25", "remaining_time": "2:17:01", "throughput": 8694.44, "total_tokens": 66475816} +{"current_steps": 98625, "total_steps": 204665, "loss": 0.0007, "lr": 1.2355535037776456e-06, "epoch": 2.409425158185327, "percentage": 48.19, "elapsed_time": "2:07:26", "remaining_time": "2:17:00", "throughput": 8694.47, "total_tokens": 66479016} +{"current_steps": 98630, "total_steps": 204665, "loss": 0.0003, "lr": 1.2354706249155177e-06, "epoch": 2.4095473090171744, "percentage": 48.19, "elapsed_time": "2:07:26", "remaining_time": "2:17:00", "throughput": 8694.5, "total_tokens": 66482280} +{"current_steps": 98635, "total_steps": 204665, "loss": 0.0004, "lr": 1.2353877443409844e-06, "epoch": 2.4096694598490216, "percentage": 48.19, "elapsed_time": "2:07:26", "remaining_time": "2:17:00", "throughput": 8694.54, "total_tokens": 66485608} +{"current_steps": 98640, "total_steps": 204665, "loss": 0.0002, "lr": 1.2353048620546477e-06, "epoch": 2.409791610680869, "percentage": 48.2, "elapsed_time": "2:07:27", "remaining_time": "2:16:59", "throughput": 8694.61, "total_tokens": 66489192} +{"current_steps": 98645, "total_steps": 204665, "loss": 0.0002, "lr": 1.235221978057111e-06, "epoch": 2.409913761512716, "percentage": 48.2, "elapsed_time": "2:07:27", "remaining_time": "2:16:59", "throughput": 8694.66, "total_tokens": 66492648} +{"current_steps": 98650, "total_steps": 204665, "loss": 0.0544, "lr": 1.235139092348977e-06, "epoch": 2.410035912344563, "percentage": 48.2, "elapsed_time": "2:07:27", "remaining_time": "2:16:58", "throughput": 8694.69, "total_tokens": 66495848} +{"current_steps": 98655, "total_steps": 204665, "loss": 0.0525, "lr": 1.2350562049308477e-06, "epoch": 2.4101580631764103, "percentage": 48.2, "elapsed_time": "2:07:28", "remaining_time": "2:16:58", "throughput": 8694.75, "total_tokens": 66499432} +{"current_steps": 98660, "total_steps": 204665, "loss": 0.0456, "lr": 1.2349733158033268e-06, "epoch": 2.4102802140082575, "percentage": 48.21, "elapsed_time": "2:07:28", "remaining_time": "2:16:57", "throughput": 8694.79, "total_tokens": 66502760} +{"current_steps": 98665, "total_steps": 204665, "loss": 0.0002, "lr": 1.2348904249670169e-06, "epoch": 2.4104023648401047, "percentage": 48.21, "elapsed_time": "2:07:28", "remaining_time": "2:16:57", "throughput": 8694.83, "total_tokens": 66506088} +{"current_steps": 98670, "total_steps": 204665, "loss": 0.0002, "lr": 1.2348075324225202e-06, "epoch": 2.410524515671952, "percentage": 48.21, "elapsed_time": "2:07:29", "remaining_time": "2:16:57", "throughput": 8694.84, "total_tokens": 66509160} +{"current_steps": 98675, "total_steps": 204665, "loss": 0.0, "lr": 1.2347246381704402e-06, "epoch": 2.4106466665037987, "percentage": 48.21, "elapsed_time": "2:07:29", "remaining_time": "2:16:56", "throughput": 8694.88, "total_tokens": 66512488} +{"current_steps": 98680, "total_steps": 204665, "loss": 0.0355, "lr": 1.2346417422113794e-06, "epoch": 2.4107688173356463, "percentage": 48.22, "elapsed_time": "2:07:29", "remaining_time": "2:16:56", "throughput": 8694.97, "total_tokens": 66516264} +{"current_steps": 98685, "total_steps": 204665, "loss": 0.0002, "lr": 1.234558844545941e-06, "epoch": 2.410890968167493, "percentage": 48.22, "elapsed_time": "2:07:30", "remaining_time": "2:16:55", "throughput": 8695.0, "total_tokens": 66519528} +{"current_steps": 98690, "total_steps": 204665, "loss": 0.0001, "lr": 1.2344759451747275e-06, "epoch": 2.4110131189993402, "percentage": 48.22, "elapsed_time": "2:07:30", "remaining_time": "2:16:55", "throughput": 8695.12, "total_tokens": 66523624} +{"current_steps": 98695, "total_steps": 204665, "loss": 0.0004, "lr": 1.2343930440983422e-06, "epoch": 2.4111352698311874, "percentage": 48.22, "elapsed_time": "2:07:31", "remaining_time": "2:16:54", "throughput": 8695.14, "total_tokens": 66526760} +{"current_steps": 98700, "total_steps": 204665, "loss": 0.0466, "lr": 1.2343101413173869e-06, "epoch": 2.4112574206630346, "percentage": 48.23, "elapsed_time": "2:07:31", "remaining_time": "2:16:54", "throughput": 8695.21, "total_tokens": 66530344} +{"current_steps": 98705, "total_steps": 204665, "loss": 0.0003, "lr": 1.2342272368324658e-06, "epoch": 2.411379571494882, "percentage": 48.23, "elapsed_time": "2:07:31", "remaining_time": "2:16:54", "throughput": 8695.24, "total_tokens": 66533608} +{"current_steps": 98710, "total_steps": 204665, "loss": 0.108, "lr": 1.234144330644181e-06, "epoch": 2.411501722326729, "percentage": 48.23, "elapsed_time": "2:07:32", "remaining_time": "2:16:53", "throughput": 8695.29, "total_tokens": 66536936} +{"current_steps": 98715, "total_steps": 204665, "loss": 0.0361, "lr": 1.2340614227531355e-06, "epoch": 2.411623873158576, "percentage": 48.23, "elapsed_time": "2:07:32", "remaining_time": "2:16:53", "throughput": 8695.37, "total_tokens": 66540648} +{"current_steps": 98720, "total_steps": 204665, "loss": 0.0001, "lr": 1.2339785131599328e-06, "epoch": 2.4117460239904234, "percentage": 48.23, "elapsed_time": "2:07:32", "remaining_time": "2:16:52", "throughput": 8695.42, "total_tokens": 66544104} +{"current_steps": 98725, "total_steps": 204665, "loss": 0.0428, "lr": 1.2338956018651749e-06, "epoch": 2.4118681748222706, "percentage": 48.24, "elapsed_time": "2:07:33", "remaining_time": "2:16:52", "throughput": 8695.45, "total_tokens": 66547368} +{"current_steps": 98730, "total_steps": 204665, "loss": 0.0545, "lr": 1.2338126888694656e-06, "epoch": 2.4119903256541178, "percentage": 48.24, "elapsed_time": "2:07:33", "remaining_time": "2:16:51", "throughput": 8695.53, "total_tokens": 66551016} +{"current_steps": 98735, "total_steps": 204665, "loss": 0.0401, "lr": 1.2337297741734075e-06, "epoch": 2.412112476485965, "percentage": 48.24, "elapsed_time": "2:07:33", "remaining_time": "2:16:51", "throughput": 8695.6, "total_tokens": 66554600} +{"current_steps": 98740, "total_steps": 204665, "loss": 0.0004, "lr": 1.2336468577776037e-06, "epoch": 2.412234627317812, "percentage": 48.24, "elapsed_time": "2:07:34", "remaining_time": "2:16:51", "throughput": 8695.71, "total_tokens": 66558568} +{"current_steps": 98745, "total_steps": 204665, "loss": 0.1005, "lr": 1.2335639396826572e-06, "epoch": 2.4123567781496593, "percentage": 48.25, "elapsed_time": "2:07:34", "remaining_time": "2:16:50", "throughput": 8695.83, "total_tokens": 66562664} +{"current_steps": 98750, "total_steps": 204665, "loss": 0.0755, "lr": 1.2334810198891705e-06, "epoch": 2.4124789289815065, "percentage": 48.25, "elapsed_time": "2:07:34", "remaining_time": "2:16:50", "throughput": 8695.88, "total_tokens": 66566056} +{"current_steps": 98755, "total_steps": 204665, "loss": 0.0719, "lr": 1.2333980983977474e-06, "epoch": 2.4126010798133537, "percentage": 48.25, "elapsed_time": "2:07:35", "remaining_time": "2:16:49", "throughput": 8695.94, "total_tokens": 66569512} +{"current_steps": 98760, "total_steps": 204665, "loss": 0.0695, "lr": 1.2333151752089901e-06, "epoch": 2.4127232306452004, "percentage": 48.25, "elapsed_time": "2:07:35", "remaining_time": "2:16:49", "throughput": 8695.97, "total_tokens": 66572776} +{"current_steps": 98765, "total_steps": 204665, "loss": 0.0005, "lr": 1.2332322503235024e-06, "epoch": 2.412845381477048, "percentage": 48.26, "elapsed_time": "2:07:35", "remaining_time": "2:16:49", "throughput": 8696.1, "total_tokens": 66576936} +{"current_steps": 98770, "total_steps": 204665, "loss": 0.0237, "lr": 1.2331493237418871e-06, "epoch": 2.412967532308895, "percentage": 48.26, "elapsed_time": "2:07:36", "remaining_time": "2:16:48", "throughput": 8696.14, "total_tokens": 66580264} +{"current_steps": 98775, "total_steps": 204665, "loss": 0.0456, "lr": 1.2330663954647471e-06, "epoch": 2.413089683140742, "percentage": 48.26, "elapsed_time": "2:07:36", "remaining_time": "2:16:48", "throughput": 8696.25, "total_tokens": 66584232} +{"current_steps": 98780, "total_steps": 204665, "loss": 0.049, "lr": 1.2329834654926855e-06, "epoch": 2.413211833972589, "percentage": 48.26, "elapsed_time": "2:07:37", "remaining_time": "2:16:47", "throughput": 8696.29, "total_tokens": 66587496} +{"current_steps": 98785, "total_steps": 204665, "loss": 0.0841, "lr": 1.2329005338263058e-06, "epoch": 2.4133339848044364, "percentage": 48.27, "elapsed_time": "2:07:37", "remaining_time": "2:16:47", "throughput": 8696.33, "total_tokens": 66590888} +{"current_steps": 98790, "total_steps": 204665, "loss": 0.0065, "lr": 1.2328176004662105e-06, "epoch": 2.4134561356362836, "percentage": 48.27, "elapsed_time": "2:07:37", "remaining_time": "2:16:46", "throughput": 8696.46, "total_tokens": 66595048} +{"current_steps": 98795, "total_steps": 204665, "loss": 0.0562, "lr": 1.232734665413003e-06, "epoch": 2.4135782864681308, "percentage": 48.27, "elapsed_time": "2:07:38", "remaining_time": "2:16:46", "throughput": 8696.51, "total_tokens": 66598440} +{"current_steps": 98800, "total_steps": 204665, "loss": 0.0518, "lr": 1.2326517286672867e-06, "epoch": 2.413700437299978, "percentage": 48.27, "elapsed_time": "2:07:38", "remaining_time": "2:16:46", "throughput": 8696.56, "total_tokens": 66601832} +{"current_steps": 98805, "total_steps": 204665, "loss": 0.1387, "lr": 1.2325687902296642e-06, "epoch": 2.413822588131825, "percentage": 48.28, "elapsed_time": "2:07:38", "remaining_time": "2:16:45", "throughput": 8696.56, "total_tokens": 66604776} +{"current_steps": 98810, "total_steps": 204665, "loss": 0.0728, "lr": 1.2324858501007389e-06, "epoch": 2.4139447389636723, "percentage": 48.28, "elapsed_time": "2:07:39", "remaining_time": "2:16:45", "throughput": 8696.57, "total_tokens": 66607784} +{"current_steps": 98815, "total_steps": 204665, "loss": 0.0007, "lr": 1.232402908281114e-06, "epoch": 2.4140668897955195, "percentage": 48.28, "elapsed_time": "2:07:39", "remaining_time": "2:16:44", "throughput": 8696.64, "total_tokens": 66611432} +{"current_steps": 98820, "total_steps": 204665, "loss": 0.0464, "lr": 1.2323199647713927e-06, "epoch": 2.4141890406273667, "percentage": 48.28, "elapsed_time": "2:07:39", "remaining_time": "2:16:44", "throughput": 8696.67, "total_tokens": 66614568} +{"current_steps": 98825, "total_steps": 204665, "loss": 0.0378, "lr": 1.232237019572178e-06, "epoch": 2.414311191459214, "percentage": 48.29, "elapsed_time": "2:07:40", "remaining_time": "2:16:43", "throughput": 8696.77, "total_tokens": 66618536} +{"current_steps": 98830, "total_steps": 204665, "loss": 0.0525, "lr": 1.2321540726840734e-06, "epoch": 2.414433342291061, "percentage": 48.29, "elapsed_time": "2:07:40", "remaining_time": "2:16:43", "throughput": 8696.8, "total_tokens": 66621736} +{"current_steps": 98835, "total_steps": 204665, "loss": 0.0022, "lr": 1.2320711241076817e-06, "epoch": 2.4145554931229083, "percentage": 48.29, "elapsed_time": "2:07:40", "remaining_time": "2:16:43", "throughput": 8696.84, "total_tokens": 66625128} +{"current_steps": 98840, "total_steps": 204665, "loss": 0.0003, "lr": 1.2319881738436065e-06, "epoch": 2.4146776439547555, "percentage": 48.29, "elapsed_time": "2:07:41", "remaining_time": "2:16:42", "throughput": 8696.88, "total_tokens": 66628392} +{"current_steps": 98845, "total_steps": 204665, "loss": 0.0008, "lr": 1.2319052218924509e-06, "epoch": 2.4147997947866027, "percentage": 48.3, "elapsed_time": "2:07:41", "remaining_time": "2:16:42", "throughput": 8696.93, "total_tokens": 66631848} +{"current_steps": 98850, "total_steps": 204665, "loss": 0.0005, "lr": 1.2318222682548185e-06, "epoch": 2.41492194561845, "percentage": 48.3, "elapsed_time": "2:07:41", "remaining_time": "2:16:41", "throughput": 8696.94, "total_tokens": 66634920} +{"current_steps": 98855, "total_steps": 204665, "loss": 0.0004, "lr": 1.2317393129313115e-06, "epoch": 2.4150440964502966, "percentage": 48.3, "elapsed_time": "2:07:42", "remaining_time": "2:16:41", "throughput": 8696.99, "total_tokens": 66638312} +{"current_steps": 98860, "total_steps": 204665, "loss": 0.0002, "lr": 1.2316563559225345e-06, "epoch": 2.4151662472821442, "percentage": 48.3, "elapsed_time": "2:07:42", "remaining_time": "2:16:40", "throughput": 8697.02, "total_tokens": 66641576} +{"current_steps": 98865, "total_steps": 204665, "loss": 0.0401, "lr": 1.2315733972290897e-06, "epoch": 2.415288398113991, "percentage": 48.31, "elapsed_time": "2:07:42", "remaining_time": "2:16:40", "throughput": 8697.09, "total_tokens": 66645096} +{"current_steps": 98870, "total_steps": 204665, "loss": 0.0005, "lr": 1.2314904368515813e-06, "epoch": 2.415410548945838, "percentage": 48.31, "elapsed_time": "2:07:43", "remaining_time": "2:16:40", "throughput": 8697.09, "total_tokens": 66648040} +{"current_steps": 98875, "total_steps": 204665, "loss": 0.0006, "lr": 1.231407474790612e-06, "epoch": 2.4155326997776854, "percentage": 48.31, "elapsed_time": "2:07:43", "remaining_time": "2:16:39", "throughput": 8697.11, "total_tokens": 66651176} +{"current_steps": 98880, "total_steps": 204665, "loss": 0.0577, "lr": 1.2313245110467853e-06, "epoch": 2.4156548506095326, "percentage": 48.31, "elapsed_time": "2:07:43", "remaining_time": "2:16:39", "throughput": 8697.16, "total_tokens": 66654568} +{"current_steps": 98885, "total_steps": 204665, "loss": 0.0002, "lr": 1.2312415456207045e-06, "epoch": 2.4157770014413797, "percentage": 48.32, "elapsed_time": "2:07:44", "remaining_time": "2:16:38", "throughput": 8697.21, "total_tokens": 66657960} +{"current_steps": 98890, "total_steps": 204665, "loss": 0.0001, "lr": 1.2311585785129727e-06, "epoch": 2.415899152273227, "percentage": 48.32, "elapsed_time": "2:07:44", "remaining_time": "2:16:38", "throughput": 8697.22, "total_tokens": 66661032} +{"current_steps": 98895, "total_steps": 204665, "loss": 0.0951, "lr": 1.2310756097241942e-06, "epoch": 2.416021303105074, "percentage": 48.32, "elapsed_time": "2:07:44", "remaining_time": "2:16:37", "throughput": 8697.27, "total_tokens": 66664424} +{"current_steps": 98900, "total_steps": 204665, "loss": 0.0481, "lr": 1.2309926392549713e-06, "epoch": 2.4161434539369213, "percentage": 48.32, "elapsed_time": "2:07:45", "remaining_time": "2:16:37", "throughput": 8697.32, "total_tokens": 66667816} +{"current_steps": 98905, "total_steps": 204665, "loss": 0.0678, "lr": 1.230909667105908e-06, "epoch": 2.4162656047687685, "percentage": 48.33, "elapsed_time": "2:07:45", "remaining_time": "2:16:36", "throughput": 8697.4, "total_tokens": 66671464} +{"current_steps": 98910, "total_steps": 204665, "loss": 0.0602, "lr": 1.2308266932776073e-06, "epoch": 2.4163877556006157, "percentage": 48.33, "elapsed_time": "2:07:46", "remaining_time": "2:16:36", "throughput": 8697.42, "total_tokens": 66674664} +{"current_steps": 98915, "total_steps": 204665, "loss": 0.0001, "lr": 1.2307437177706727e-06, "epoch": 2.416509906432463, "percentage": 48.33, "elapsed_time": "2:07:46", "remaining_time": "2:16:36", "throughput": 8697.5, "total_tokens": 66678376} +{"current_steps": 98920, "total_steps": 204665, "loss": 0.0001, "lr": 1.2306607405857078e-06, "epoch": 2.41663205726431, "percentage": 48.33, "elapsed_time": "2:07:46", "remaining_time": "2:16:35", "throughput": 8697.58, "total_tokens": 66681960} +{"current_steps": 98925, "total_steps": 204665, "loss": 0.036, "lr": 1.2305777617233162e-06, "epoch": 2.4167542080961573, "percentage": 48.34, "elapsed_time": "2:07:47", "remaining_time": "2:16:35", "throughput": 8697.58, "total_tokens": 66684904} +{"current_steps": 98930, "total_steps": 204665, "loss": 0.0003, "lr": 1.2304947811841008e-06, "epoch": 2.4168763589280045, "percentage": 48.34, "elapsed_time": "2:07:47", "remaining_time": "2:16:34", "throughput": 8697.62, "total_tokens": 66688232} +{"current_steps": 98935, "total_steps": 204665, "loss": 0.0452, "lr": 1.2304117989686655e-06, "epoch": 2.4169985097598516, "percentage": 48.34, "elapsed_time": "2:07:47", "remaining_time": "2:16:34", "throughput": 8697.64, "total_tokens": 66691432} +{"current_steps": 98940, "total_steps": 204665, "loss": 0.0003, "lr": 1.2303288150776138e-06, "epoch": 2.4171206605916984, "percentage": 48.34, "elapsed_time": "2:07:48", "remaining_time": "2:16:33", "throughput": 8697.73, "total_tokens": 66695208} +{"current_steps": 98945, "total_steps": 204665, "loss": 0.0, "lr": 1.2302458295115488e-06, "epoch": 2.417242811423546, "percentage": 48.34, "elapsed_time": "2:07:48", "remaining_time": "2:16:33", "throughput": 8697.77, "total_tokens": 66698472} +{"current_steps": 98950, "total_steps": 204665, "loss": 0.095, "lr": 1.2301628422710742e-06, "epoch": 2.4173649622553928, "percentage": 48.35, "elapsed_time": "2:07:48", "remaining_time": "2:16:33", "throughput": 8697.79, "total_tokens": 66701608} +{"current_steps": 98955, "total_steps": 204665, "loss": 0.0, "lr": 1.2300798533567935e-06, "epoch": 2.41748711308724, "percentage": 48.35, "elapsed_time": "2:07:49", "remaining_time": "2:16:32", "throughput": 8697.82, "total_tokens": 66704872} +{"current_steps": 98960, "total_steps": 204665, "loss": 0.0558, "lr": 1.2299968627693102e-06, "epoch": 2.417609263919087, "percentage": 48.35, "elapsed_time": "2:07:49", "remaining_time": "2:16:32", "throughput": 8697.82, "total_tokens": 66707816} +{"current_steps": 98965, "total_steps": 204665, "loss": 0.1187, "lr": 1.229913870509228e-06, "epoch": 2.4177314147509343, "percentage": 48.35, "elapsed_time": "2:07:49", "remaining_time": "2:16:31", "throughput": 8697.86, "total_tokens": 66711144} +{"current_steps": 98970, "total_steps": 204665, "loss": 0.1665, "lr": 1.22983087657715e-06, "epoch": 2.4178535655827815, "percentage": 48.36, "elapsed_time": "2:07:50", "remaining_time": "2:16:31", "throughput": 8697.9, "total_tokens": 66714408} +{"current_steps": 98975, "total_steps": 204665, "loss": 0.0467, "lr": 1.2297478809736804e-06, "epoch": 2.4179757164146287, "percentage": 48.36, "elapsed_time": "2:07:50", "remaining_time": "2:16:30", "throughput": 8697.95, "total_tokens": 66717864} +{"current_steps": 98980, "total_steps": 204665, "loss": 0.0003, "lr": 1.229664883699422e-06, "epoch": 2.418097867246476, "percentage": 48.36, "elapsed_time": "2:07:50", "remaining_time": "2:16:30", "throughput": 8698.0, "total_tokens": 66721256} +{"current_steps": 98985, "total_steps": 204665, "loss": 0.0032, "lr": 1.229581884754979e-06, "epoch": 2.418220018078323, "percentage": 48.36, "elapsed_time": "2:07:51", "remaining_time": "2:16:30", "throughput": 8698.03, "total_tokens": 66724456} +{"current_steps": 98990, "total_steps": 204665, "loss": 0.0001, "lr": 1.229498884140955e-06, "epoch": 2.4183421689101703, "percentage": 48.37, "elapsed_time": "2:07:51", "remaining_time": "2:16:29", "throughput": 8698.12, "total_tokens": 66728232} +{"current_steps": 98995, "total_steps": 204665, "loss": 0.0465, "lr": 1.2294158818579533e-06, "epoch": 2.4184643197420175, "percentage": 48.37, "elapsed_time": "2:07:51", "remaining_time": "2:16:29", "throughput": 8698.16, "total_tokens": 66731624} +{"current_steps": 99000, "total_steps": 204665, "loss": 0.0002, "lr": 1.2293328779065774e-06, "epoch": 2.4185864705738647, "percentage": 48.37, "elapsed_time": "2:07:52", "remaining_time": "2:16:28", "throughput": 8698.26, "total_tokens": 66735464} +{"current_steps": 99005, "total_steps": 204665, "loss": 0.0344, "lr": 1.2292498722874316e-06, "epoch": 2.418708621405712, "percentage": 48.37, "elapsed_time": "2:07:52", "remaining_time": "2:16:28", "throughput": 8698.35, "total_tokens": 66739176} +{"current_steps": 99010, "total_steps": 204665, "loss": 0.0004, "lr": 1.2291668650011185e-06, "epoch": 2.418830772237559, "percentage": 48.38, "elapsed_time": "2:07:52", "remaining_time": "2:16:27", "throughput": 8698.36, "total_tokens": 66742248} +{"current_steps": 99015, "total_steps": 204665, "loss": 0.0399, "lr": 1.2290838560482427e-06, "epoch": 2.4189529230694062, "percentage": 48.38, "elapsed_time": "2:07:53", "remaining_time": "2:16:27", "throughput": 8698.39, "total_tokens": 66745512} +{"current_steps": 99020, "total_steps": 204665, "loss": 0.0005, "lr": 1.2290008454294072e-06, "epoch": 2.4190750739012534, "percentage": 48.38, "elapsed_time": "2:07:53", "remaining_time": "2:16:27", "throughput": 8698.44, "total_tokens": 66748968} +{"current_steps": 99025, "total_steps": 204665, "loss": 0.128, "lr": 1.228917833145216e-06, "epoch": 2.4191972247331006, "percentage": 48.38, "elapsed_time": "2:07:54", "remaining_time": "2:16:26", "throughput": 8698.51, "total_tokens": 66752552} +{"current_steps": 99030, "total_steps": 204665, "loss": 0.0296, "lr": 1.228834819196273e-06, "epoch": 2.419319375564948, "percentage": 48.39, "elapsed_time": "2:07:54", "remaining_time": "2:16:26", "throughput": 8698.53, "total_tokens": 66755688} +{"current_steps": 99035, "total_steps": 204665, "loss": 0.1457, "lr": 1.2287518035831815e-06, "epoch": 2.4194415263967946, "percentage": 48.39, "elapsed_time": "2:07:54", "remaining_time": "2:16:25", "throughput": 8698.56, "total_tokens": 66758888} +{"current_steps": 99040, "total_steps": 204665, "loss": 0.0006, "lr": 1.228668786306545e-06, "epoch": 2.4195636772286417, "percentage": 48.39, "elapsed_time": "2:07:55", "remaining_time": "2:16:25", "throughput": 8698.62, "total_tokens": 66762408} +{"current_steps": 99045, "total_steps": 204665, "loss": 0.0956, "lr": 1.228585767366968e-06, "epoch": 2.419685828060489, "percentage": 48.39, "elapsed_time": "2:07:55", "remaining_time": "2:16:24", "throughput": 8698.61, "total_tokens": 66765352} +{"current_steps": 99050, "total_steps": 204665, "loss": 0.0024, "lr": 1.228502746765054e-06, "epoch": 2.419807978892336, "percentage": 48.4, "elapsed_time": "2:07:55", "remaining_time": "2:16:24", "throughput": 8698.6, "total_tokens": 66768168} +{"current_steps": 99055, "total_steps": 204665, "loss": 0.0001, "lr": 1.2284197245014062e-06, "epoch": 2.4199301297241833, "percentage": 48.4, "elapsed_time": "2:07:56", "remaining_time": "2:16:24", "throughput": 8698.61, "total_tokens": 66771240} +{"current_steps": 99060, "total_steps": 204665, "loss": 0.034, "lr": 1.2283367005766288e-06, "epoch": 2.4200522805560305, "percentage": 48.4, "elapsed_time": "2:07:56", "remaining_time": "2:16:23", "throughput": 8698.62, "total_tokens": 66774248} +{"current_steps": 99065, "total_steps": 204665, "loss": 0.0001, "lr": 1.2282536749913255e-06, "epoch": 2.4201744313878777, "percentage": 48.4, "elapsed_time": "2:07:56", "remaining_time": "2:16:23", "throughput": 8698.67, "total_tokens": 66777576} +{"current_steps": 99070, "total_steps": 204665, "loss": 0.0316, "lr": 1.2281706477461002e-06, "epoch": 2.420296582219725, "percentage": 48.41, "elapsed_time": "2:07:57", "remaining_time": "2:16:22", "throughput": 8698.69, "total_tokens": 66780712} +{"current_steps": 99075, "total_steps": 204665, "loss": 0.1086, "lr": 1.2280876188415562e-06, "epoch": 2.420418733051572, "percentage": 48.41, "elapsed_time": "2:07:57", "remaining_time": "2:16:22", "throughput": 8698.74, "total_tokens": 66784168} +{"current_steps": 99080, "total_steps": 204665, "loss": 0.0006, "lr": 1.2280045882782978e-06, "epoch": 2.4205408838834193, "percentage": 48.41, "elapsed_time": "2:07:57", "remaining_time": "2:16:21", "throughput": 8698.77, "total_tokens": 66787432} +{"current_steps": 99085, "total_steps": 204665, "loss": 0.0001, "lr": 1.227921556056929e-06, "epoch": 2.4206630347152664, "percentage": 48.41, "elapsed_time": "2:07:58", "remaining_time": "2:16:21", "throughput": 8698.79, "total_tokens": 66790504} +{"current_steps": 99090, "total_steps": 204665, "loss": 0.0944, "lr": 1.2278385221780534e-06, "epoch": 2.4207851855471136, "percentage": 48.42, "elapsed_time": "2:07:58", "remaining_time": "2:16:21", "throughput": 8698.83, "total_tokens": 66793896} +{"current_steps": 99095, "total_steps": 204665, "loss": 0.1299, "lr": 1.2277554866422746e-06, "epoch": 2.420907336378961, "percentage": 48.42, "elapsed_time": "2:07:58", "remaining_time": "2:16:20", "throughput": 8698.88, "total_tokens": 66797352} +{"current_steps": 99100, "total_steps": 204665, "loss": 0.0002, "lr": 1.2276724494501966e-06, "epoch": 2.421029487210808, "percentage": 48.42, "elapsed_time": "2:07:59", "remaining_time": "2:16:20", "throughput": 8698.91, "total_tokens": 66800552} +{"current_steps": 99105, "total_steps": 204665, "loss": 0.0049, "lr": 1.2275894106024234e-06, "epoch": 2.421151638042655, "percentage": 48.42, "elapsed_time": "2:07:59", "remaining_time": "2:16:19", "throughput": 8698.95, "total_tokens": 66803880} +{"current_steps": 99110, "total_steps": 204665, "loss": 0.0443, "lr": 1.2275063700995587e-06, "epoch": 2.4212737888745024, "percentage": 48.43, "elapsed_time": "2:07:59", "remaining_time": "2:16:19", "throughput": 8699.0, "total_tokens": 66807336} +{"current_steps": 99115, "total_steps": 204665, "loss": 0.043, "lr": 1.2274233279422065e-06, "epoch": 2.4213959397063496, "percentage": 48.43, "elapsed_time": "2:08:00", "remaining_time": "2:16:18", "throughput": 8699.04, "total_tokens": 66810600} +{"current_steps": 99120, "total_steps": 204665, "loss": 0.1082, "lr": 1.2273402841309709e-06, "epoch": 2.4215180905381963, "percentage": 48.43, "elapsed_time": "2:08:00", "remaining_time": "2:16:18", "throughput": 8699.12, "total_tokens": 66814376} +{"current_steps": 99125, "total_steps": 204665, "loss": 0.0003, "lr": 1.2272572386664552e-06, "epoch": 2.421640241370044, "percentage": 48.43, "elapsed_time": "2:08:00", "remaining_time": "2:16:18", "throughput": 8699.18, "total_tokens": 66817832} +{"current_steps": 99130, "total_steps": 204665, "loss": 0.0002, "lr": 1.2271741915492642e-06, "epoch": 2.4217623922018907, "percentage": 48.44, "elapsed_time": "2:08:01", "remaining_time": "2:16:17", "throughput": 8699.22, "total_tokens": 66821160} +{"current_steps": 99135, "total_steps": 204665, "loss": 0.0349, "lr": 1.2270911427800008e-06, "epoch": 2.421884543033738, "percentage": 48.44, "elapsed_time": "2:08:01", "remaining_time": "2:16:17", "throughput": 8699.24, "total_tokens": 66824296} +{"current_steps": 99140, "total_steps": 204665, "loss": 0.0006, "lr": 1.2270080923592699e-06, "epoch": 2.422006693865585, "percentage": 48.44, "elapsed_time": "2:08:01", "remaining_time": "2:16:16", "throughput": 8699.35, "total_tokens": 66828264} +{"current_steps": 99145, "total_steps": 204665, "loss": 0.116, "lr": 1.2269250402876749e-06, "epoch": 2.4221288446974323, "percentage": 48.44, "elapsed_time": "2:08:02", "remaining_time": "2:16:16", "throughput": 8699.37, "total_tokens": 66831464} +{"current_steps": 99150, "total_steps": 204665, "loss": 0.0007, "lr": 1.2268419865658204e-06, "epoch": 2.4222509955292795, "percentage": 48.45, "elapsed_time": "2:08:02", "remaining_time": "2:16:15", "throughput": 8699.41, "total_tokens": 66834728} +{"current_steps": 99155, "total_steps": 204665, "loss": 0.0002, "lr": 1.2267589311943096e-06, "epoch": 2.4223731463611267, "percentage": 48.45, "elapsed_time": "2:08:03", "remaining_time": "2:16:15", "throughput": 8699.44, "total_tokens": 66837992} +{"current_steps": 99160, "total_steps": 204665, "loss": 0.1335, "lr": 1.2266758741737472e-06, "epoch": 2.422495297192974, "percentage": 48.45, "elapsed_time": "2:08:03", "remaining_time": "2:16:15", "throughput": 8699.49, "total_tokens": 66841448} +{"current_steps": 99165, "total_steps": 204665, "loss": 0.1043, "lr": 1.2265928155047365e-06, "epoch": 2.422617448024821, "percentage": 48.45, "elapsed_time": "2:08:03", "remaining_time": "2:16:14", "throughput": 8699.52, "total_tokens": 66844648} +{"current_steps": 99170, "total_steps": 204665, "loss": 0.0985, "lr": 1.226509755187882e-06, "epoch": 2.4227395988566682, "percentage": 48.45, "elapsed_time": "2:08:04", "remaining_time": "2:16:14", "throughput": 8699.56, "total_tokens": 66847976} +{"current_steps": 99175, "total_steps": 204665, "loss": 0.0708, "lr": 1.2264266932237878e-06, "epoch": 2.4228617496885154, "percentage": 48.46, "elapsed_time": "2:08:04", "remaining_time": "2:16:13", "throughput": 8699.58, "total_tokens": 66851112} +{"current_steps": 99180, "total_steps": 204665, "loss": 0.0514, "lr": 1.2263436296130577e-06, "epoch": 2.4229839005203626, "percentage": 48.46, "elapsed_time": "2:08:04", "remaining_time": "2:16:13", "throughput": 8699.59, "total_tokens": 66854184} +{"current_steps": 99185, "total_steps": 204665, "loss": 0.0003, "lr": 1.2262605643562956e-06, "epoch": 2.42310605135221, "percentage": 48.46, "elapsed_time": "2:08:05", "remaining_time": "2:16:12", "throughput": 8699.67, "total_tokens": 66857896} +{"current_steps": 99190, "total_steps": 204665, "loss": 0.0948, "lr": 1.2261774974541062e-06, "epoch": 2.423228202184057, "percentage": 48.46, "elapsed_time": "2:08:05", "remaining_time": "2:16:12", "throughput": 8699.73, "total_tokens": 66861352} +{"current_steps": 99195, "total_steps": 204665, "loss": 0.0002, "lr": 1.2260944289070928e-06, "epoch": 2.423350353015904, "percentage": 48.47, "elapsed_time": "2:08:05", "remaining_time": "2:16:11", "throughput": 8699.76, "total_tokens": 66864616} +{"current_steps": 99200, "total_steps": 204665, "loss": 0.0003, "lr": 1.22601135871586e-06, "epoch": 2.4234725038477514, "percentage": 48.47, "elapsed_time": "2:08:06", "remaining_time": "2:16:11", "throughput": 8699.77, "total_tokens": 66867624} +{"current_steps": 99205, "total_steps": 204665, "loss": 0.0001, "lr": 1.2259282868810122e-06, "epoch": 2.4235946546795986, "percentage": 48.47, "elapsed_time": "2:08:06", "remaining_time": "2:16:11", "throughput": 8699.85, "total_tokens": 66871336} +{"current_steps": 99210, "total_steps": 204665, "loss": 0.0507, "lr": 1.225845213403153e-06, "epoch": 2.4237168055114457, "percentage": 48.47, "elapsed_time": "2:08:06", "remaining_time": "2:16:10", "throughput": 8699.87, "total_tokens": 66874536} +{"current_steps": 99215, "total_steps": 204665, "loss": 0.0006, "lr": 1.2257621382828864e-06, "epoch": 2.4238389563432925, "percentage": 48.48, "elapsed_time": "2:08:07", "remaining_time": "2:16:10", "throughput": 8699.95, "total_tokens": 66878248} +{"current_steps": 99220, "total_steps": 204665, "loss": 0.0824, "lr": 1.225679061520817e-06, "epoch": 2.4239611071751397, "percentage": 48.48, "elapsed_time": "2:08:07", "remaining_time": "2:16:09", "throughput": 8700.02, "total_tokens": 66881768} +{"current_steps": 99225, "total_steps": 204665, "loss": 0.0933, "lr": 1.2255959831175486e-06, "epoch": 2.424083258006987, "percentage": 48.48, "elapsed_time": "2:08:07", "remaining_time": "2:16:09", "throughput": 8700.05, "total_tokens": 66885096} +{"current_steps": 99230, "total_steps": 204665, "loss": 0.0997, "lr": 1.2255129030736856e-06, "epoch": 2.424205408838834, "percentage": 48.48, "elapsed_time": "2:08:08", "remaining_time": "2:16:08", "throughput": 8700.09, "total_tokens": 66888360} +{"current_steps": 99235, "total_steps": 204665, "loss": 0.0002, "lr": 1.225429821389832e-06, "epoch": 2.4243275596706813, "percentage": 48.49, "elapsed_time": "2:08:08", "remaining_time": "2:16:08", "throughput": 8700.16, "total_tokens": 66892008} +{"current_steps": 99240, "total_steps": 204665, "loss": 0.0515, "lr": 1.2253467380665923e-06, "epoch": 2.4244497105025284, "percentage": 48.49, "elapsed_time": "2:08:08", "remaining_time": "2:16:08", "throughput": 8700.24, "total_tokens": 66895656} +{"current_steps": 99245, "total_steps": 204665, "loss": 0.0004, "lr": 1.2252636531045704e-06, "epoch": 2.4245718613343756, "percentage": 48.49, "elapsed_time": "2:08:09", "remaining_time": "2:16:07", "throughput": 8700.29, "total_tokens": 66899112} +{"current_steps": 99250, "total_steps": 204665, "loss": 0.0008, "lr": 1.2251805665043708e-06, "epoch": 2.424694012166223, "percentage": 48.49, "elapsed_time": "2:08:09", "remaining_time": "2:16:07", "throughput": 8700.29, "total_tokens": 66902056} +{"current_steps": 99255, "total_steps": 204665, "loss": 0.0014, "lr": 1.2250974782665976e-06, "epoch": 2.42481616299807, "percentage": 48.5, "elapsed_time": "2:08:09", "remaining_time": "2:16:06", "throughput": 8700.32, "total_tokens": 66905320} +{"current_steps": 99260, "total_steps": 204665, "loss": 0.0665, "lr": 1.2250143883918546e-06, "epoch": 2.424938313829917, "percentage": 48.5, "elapsed_time": "2:08:10", "remaining_time": "2:16:06", "throughput": 8700.35, "total_tokens": 66908584} +{"current_steps": 99265, "total_steps": 204665, "loss": 0.0006, "lr": 1.224931296880747e-06, "epoch": 2.4250604646617644, "percentage": 48.5, "elapsed_time": "2:08:10", "remaining_time": "2:16:06", "throughput": 8700.44, "total_tokens": 66912360} +{"current_steps": 99270, "total_steps": 204665, "loss": 0.0001, "lr": 1.2248482037338778e-06, "epoch": 2.4251826154936116, "percentage": 48.5, "elapsed_time": "2:08:11", "remaining_time": "2:16:05", "throughput": 8700.57, "total_tokens": 66916520} +{"current_steps": 99275, "total_steps": 204665, "loss": 0.0003, "lr": 1.2247651089518524e-06, "epoch": 2.4253047663254588, "percentage": 48.51, "elapsed_time": "2:08:11", "remaining_time": "2:16:05", "throughput": 8700.63, "total_tokens": 66920104} +{"current_steps": 99280, "total_steps": 204665, "loss": 0.0004, "lr": 1.2246820125352747e-06, "epoch": 2.425426917157306, "percentage": 48.51, "elapsed_time": "2:08:11", "remaining_time": "2:16:04", "throughput": 8700.69, "total_tokens": 66923560} +{"current_steps": 99285, "total_steps": 204665, "loss": 0.1132, "lr": 1.224598914484749e-06, "epoch": 2.425549067989153, "percentage": 48.51, "elapsed_time": "2:08:12", "remaining_time": "2:16:04", "throughput": 8700.72, "total_tokens": 66926760} +{"current_steps": 99290, "total_steps": 204665, "loss": 0.0003, "lr": 1.2245158148008795e-06, "epoch": 2.4256712188210003, "percentage": 48.51, "elapsed_time": "2:08:12", "remaining_time": "2:16:03", "throughput": 8700.75, "total_tokens": 66930024} +{"current_steps": 99295, "total_steps": 204665, "loss": 0.0972, "lr": 1.2244327134842704e-06, "epoch": 2.4257933696528475, "percentage": 48.52, "elapsed_time": "2:08:12", "remaining_time": "2:16:03", "throughput": 8700.76, "total_tokens": 66933032} +{"current_steps": 99300, "total_steps": 204665, "loss": 0.0512, "lr": 1.2243496105355265e-06, "epoch": 2.4259155204846943, "percentage": 48.52, "elapsed_time": "2:08:13", "remaining_time": "2:16:03", "throughput": 8700.77, "total_tokens": 66936104} +{"current_steps": 99305, "total_steps": 204665, "loss": 0.0399, "lr": 1.224266505955252e-06, "epoch": 2.426037671316542, "percentage": 48.52, "elapsed_time": "2:08:13", "remaining_time": "2:16:02", "throughput": 8700.79, "total_tokens": 66939240} +{"current_steps": 99310, "total_steps": 204665, "loss": 0.0002, "lr": 1.224183399744051e-06, "epoch": 2.4261598221483887, "percentage": 48.52, "elapsed_time": "2:08:13", "remaining_time": "2:16:02", "throughput": 8700.82, "total_tokens": 66942440} +{"current_steps": 99315, "total_steps": 204665, "loss": 0.0368, "lr": 1.224100291902528e-06, "epoch": 2.426281972980236, "percentage": 48.53, "elapsed_time": "2:08:14", "remaining_time": "2:16:01", "throughput": 8700.91, "total_tokens": 66946280} +{"current_steps": 99320, "total_steps": 204665, "loss": 0.0006, "lr": 1.2240171824312873e-06, "epoch": 2.426404123812083, "percentage": 48.53, "elapsed_time": "2:08:14", "remaining_time": "2:16:01", "throughput": 8700.99, "total_tokens": 66949928} +{"current_steps": 99325, "total_steps": 204665, "loss": 0.0002, "lr": 1.2239340713309335e-06, "epoch": 2.4265262746439302, "percentage": 48.53, "elapsed_time": "2:08:14", "remaining_time": "2:16:00", "throughput": 8701.07, "total_tokens": 66953704} +{"current_steps": 99330, "total_steps": 204665, "loss": 0.0491, "lr": 1.2238509586020708e-06, "epoch": 2.4266484254757774, "percentage": 48.53, "elapsed_time": "2:08:15", "remaining_time": "2:16:00", "throughput": 8701.17, "total_tokens": 66957544} +{"current_steps": 99335, "total_steps": 204665, "loss": 0.0001, "lr": 1.2237678442453042e-06, "epoch": 2.4267705763076246, "percentage": 48.54, "elapsed_time": "2:08:15", "remaining_time": "2:16:00", "throughput": 8701.21, "total_tokens": 66960936} +{"current_steps": 99340, "total_steps": 204665, "loss": 0.0567, "lr": 1.223684728261237e-06, "epoch": 2.426892727139472, "percentage": 48.54, "elapsed_time": "2:08:15", "remaining_time": "2:15:59", "throughput": 8701.25, "total_tokens": 66964200} +{"current_steps": 99345, "total_steps": 204665, "loss": 0.072, "lr": 1.2236016106504747e-06, "epoch": 2.427014877971319, "percentage": 48.54, "elapsed_time": "2:08:16", "remaining_time": "2:15:59", "throughput": 8701.27, "total_tokens": 66967400} +{"current_steps": 99350, "total_steps": 204665, "loss": 0.0574, "lr": 1.223518491413621e-06, "epoch": 2.427137028803166, "percentage": 48.54, "elapsed_time": "2:08:16", "remaining_time": "2:15:58", "throughput": 8701.27, "total_tokens": 66970280} +{"current_steps": 99355, "total_steps": 204665, "loss": 0.052, "lr": 1.223435370551281e-06, "epoch": 2.4272591796350134, "percentage": 48.55, "elapsed_time": "2:08:16", "remaining_time": "2:15:58", "throughput": 8701.29, "total_tokens": 66973416} +{"current_steps": 99360, "total_steps": 204665, "loss": 0.0002, "lr": 1.223352248064059e-06, "epoch": 2.4273813304668606, "percentage": 48.55, "elapsed_time": "2:08:17", "remaining_time": "2:15:57", "throughput": 8701.38, "total_tokens": 66977192} +{"current_steps": 99365, "total_steps": 204665, "loss": 0.0706, "lr": 1.2232691239525592e-06, "epoch": 2.4275034812987077, "percentage": 48.55, "elapsed_time": "2:08:17", "remaining_time": "2:15:57", "throughput": 8701.41, "total_tokens": 66980456} +{"current_steps": 99370, "total_steps": 204665, "loss": 0.0003, "lr": 1.2231859982173862e-06, "epoch": 2.427625632130555, "percentage": 48.55, "elapsed_time": "2:08:18", "remaining_time": "2:15:57", "throughput": 8701.5, "total_tokens": 66984296} +{"current_steps": 99375, "total_steps": 204665, "loss": 0.0855, "lr": 1.2231028708591447e-06, "epoch": 2.427747782962402, "percentage": 48.55, "elapsed_time": "2:08:18", "remaining_time": "2:15:56", "throughput": 8701.57, "total_tokens": 66987944} +{"current_steps": 99380, "total_steps": 204665, "loss": 0.0503, "lr": 1.2230197418784391e-06, "epoch": 2.4278699337942493, "percentage": 48.56, "elapsed_time": "2:08:18", "remaining_time": "2:15:56", "throughput": 8701.72, "total_tokens": 66992296} +{"current_steps": 99385, "total_steps": 204665, "loss": 0.0108, "lr": 1.2229366112758739e-06, "epoch": 2.427992084626096, "percentage": 48.56, "elapsed_time": "2:08:19", "remaining_time": "2:15:55", "throughput": 8701.79, "total_tokens": 66995944} +{"current_steps": 99390, "total_steps": 204665, "loss": 0.049, "lr": 1.2228534790520537e-06, "epoch": 2.4281142354579437, "percentage": 48.56, "elapsed_time": "2:08:19", "remaining_time": "2:15:55", "throughput": 8701.81, "total_tokens": 66999080} +{"current_steps": 99395, "total_steps": 204665, "loss": 0.0002, "lr": 1.222770345207583e-06, "epoch": 2.4282363862897904, "percentage": 48.56, "elapsed_time": "2:08:19", "remaining_time": "2:15:54", "throughput": 8701.86, "total_tokens": 67002408} +{"current_steps": 99400, "total_steps": 204665, "loss": 0.1616, "lr": 1.2226872097430665e-06, "epoch": 2.4283585371216376, "percentage": 48.57, "elapsed_time": "2:08:20", "remaining_time": "2:15:54", "throughput": 8701.88, "total_tokens": 67005608} +{"current_steps": 99405, "total_steps": 204665, "loss": 0.0432, "lr": 1.2226040726591088e-06, "epoch": 2.428480687953485, "percentage": 48.57, "elapsed_time": "2:08:20", "remaining_time": "2:15:54", "throughput": 8701.92, "total_tokens": 67008872} +{"current_steps": 99410, "total_steps": 204665, "loss": 0.0721, "lr": 1.2225209339563143e-06, "epoch": 2.428602838785332, "percentage": 48.57, "elapsed_time": "2:08:20", "remaining_time": "2:15:53", "throughput": 8702.02, "total_tokens": 67012840} +{"current_steps": 99415, "total_steps": 204665, "loss": 0.0391, "lr": 1.222437793635288e-06, "epoch": 2.428724989617179, "percentage": 48.57, "elapsed_time": "2:08:21", "remaining_time": "2:15:53", "throughput": 8702.05, "total_tokens": 67016040} +{"current_steps": 99420, "total_steps": 204665, "loss": 0.0002, "lr": 1.2223546516966339e-06, "epoch": 2.4288471404490264, "percentage": 48.58, "elapsed_time": "2:08:21", "remaining_time": "2:15:52", "throughput": 8702.11, "total_tokens": 67019560} +{"current_steps": 99425, "total_steps": 204665, "loss": 0.0425, "lr": 1.2222715081409572e-06, "epoch": 2.4289692912808736, "percentage": 48.58, "elapsed_time": "2:08:21", "remaining_time": "2:15:52", "throughput": 8702.14, "total_tokens": 67022760} +{"current_steps": 99430, "total_steps": 204665, "loss": 0.0563, "lr": 1.2221883629688622e-06, "epoch": 2.4290914421127208, "percentage": 48.58, "elapsed_time": "2:08:22", "remaining_time": "2:15:51", "throughput": 8702.2, "total_tokens": 67026344} +{"current_steps": 99435, "total_steps": 204665, "loss": 0.0584, "lr": 1.2221052161809535e-06, "epoch": 2.429213592944568, "percentage": 48.58, "elapsed_time": "2:08:22", "remaining_time": "2:15:51", "throughput": 8702.3, "total_tokens": 67030184} +{"current_steps": 99440, "total_steps": 204665, "loss": 0.0001, "lr": 1.222022067777836e-06, "epoch": 2.429335743776415, "percentage": 48.59, "elapsed_time": "2:08:22", "remaining_time": "2:15:51", "throughput": 8702.4, "total_tokens": 67034088} +{"current_steps": 99445, "total_steps": 204665, "loss": 0.031, "lr": 1.2219389177601142e-06, "epoch": 2.4294578946082623, "percentage": 48.59, "elapsed_time": "2:08:23", "remaining_time": "2:15:50", "throughput": 8702.42, "total_tokens": 67037288} +{"current_steps": 99450, "total_steps": 204665, "loss": 0.0184, "lr": 1.2218557661283932e-06, "epoch": 2.4295800454401095, "percentage": 48.59, "elapsed_time": "2:08:23", "remaining_time": "2:15:50", "throughput": 8702.48, "total_tokens": 67040808} +{"current_steps": 99455, "total_steps": 204665, "loss": 0.0002, "lr": 1.2217726128832773e-06, "epoch": 2.4297021962719567, "percentage": 48.59, "elapsed_time": "2:08:23", "remaining_time": "2:15:49", "throughput": 8702.51, "total_tokens": 67043944} +{"current_steps": 99460, "total_steps": 204665, "loss": 0.0002, "lr": 1.2216894580253711e-06, "epoch": 2.429824347103804, "percentage": 48.6, "elapsed_time": "2:08:24", "remaining_time": "2:15:49", "throughput": 8702.55, "total_tokens": 67047336} +{"current_steps": 99465, "total_steps": 204665, "loss": 0.041, "lr": 1.2216063015552798e-06, "epoch": 2.429946497935651, "percentage": 48.6, "elapsed_time": "2:08:24", "remaining_time": "2:15:48", "throughput": 8702.66, "total_tokens": 67051368} +{"current_steps": 99470, "total_steps": 204665, "loss": 0.0402, "lr": 1.221523143473608e-06, "epoch": 2.4300686487674983, "percentage": 48.6, "elapsed_time": "2:08:25", "remaining_time": "2:15:48", "throughput": 8702.7, "total_tokens": 67054632} +{"current_steps": 99475, "total_steps": 204665, "loss": 0.1122, "lr": 1.2214399837809599e-06, "epoch": 2.4301907995993455, "percentage": 48.6, "elapsed_time": "2:08:25", "remaining_time": "2:15:48", "throughput": 8702.71, "total_tokens": 67057640} +{"current_steps": 99480, "total_steps": 204665, "loss": 0.0461, "lr": 1.2213568224779408e-06, "epoch": 2.430312950431192, "percentage": 48.61, "elapsed_time": "2:08:25", "remaining_time": "2:15:47", "throughput": 8702.79, "total_tokens": 67061352} +{"current_steps": 99485, "total_steps": 204665, "loss": 0.0007, "lr": 1.2212736595651555e-06, "epoch": 2.4304351012630394, "percentage": 48.61, "elapsed_time": "2:08:26", "remaining_time": "2:15:47", "throughput": 8702.8, "total_tokens": 67064424} +{"current_steps": 99490, "total_steps": 204665, "loss": 0.0001, "lr": 1.2211904950432086e-06, "epoch": 2.4305572520948866, "percentage": 48.61, "elapsed_time": "2:08:26", "remaining_time": "2:15:46", "throughput": 8702.84, "total_tokens": 67067752} +{"current_steps": 99495, "total_steps": 204665, "loss": 0.0542, "lr": 1.221107328912705e-06, "epoch": 2.430679402926734, "percentage": 48.61, "elapsed_time": "2:08:26", "remaining_time": "2:15:46", "throughput": 8702.84, "total_tokens": 67070696} +{"current_steps": 99500, "total_steps": 204665, "loss": 0.0716, "lr": 1.2210241611742494e-06, "epoch": 2.430801553758581, "percentage": 48.62, "elapsed_time": "2:08:27", "remaining_time": "2:15:45", "throughput": 8702.92, "total_tokens": 67074408} +{"current_steps": 99505, "total_steps": 204665, "loss": 0.0489, "lr": 1.2209409918284465e-06, "epoch": 2.430923704590428, "percentage": 48.62, "elapsed_time": "2:08:27", "remaining_time": "2:15:45", "throughput": 8702.99, "total_tokens": 67077992} +{"current_steps": 99510, "total_steps": 204665, "loss": 0.0348, "lr": 1.220857820875901e-06, "epoch": 2.4310458554222754, "percentage": 48.62, "elapsed_time": "2:08:27", "remaining_time": "2:15:45", "throughput": 8703.03, "total_tokens": 67081384} +{"current_steps": 99515, "total_steps": 204665, "loss": 0.1613, "lr": 1.2207746483172185e-06, "epoch": 2.4311680062541225, "percentage": 48.62, "elapsed_time": "2:08:28", "remaining_time": "2:15:44", "throughput": 8703.04, "total_tokens": 67084456} +{"current_steps": 99520, "total_steps": 204665, "loss": 0.0002, "lr": 1.2206914741530034e-06, "epoch": 2.4312901570859697, "percentage": 48.63, "elapsed_time": "2:08:28", "remaining_time": "2:15:44", "throughput": 8703.09, "total_tokens": 67087912} +{"current_steps": 99525, "total_steps": 204665, "loss": 0.0345, "lr": 1.2206082983838606e-06, "epoch": 2.431412307917817, "percentage": 48.63, "elapsed_time": "2:08:28", "remaining_time": "2:15:43", "throughput": 8703.13, "total_tokens": 67091176} +{"current_steps": 99530, "total_steps": 204665, "loss": 0.0293, "lr": 1.2205251210103945e-06, "epoch": 2.431534458749664, "percentage": 48.63, "elapsed_time": "2:08:29", "remaining_time": "2:15:43", "throughput": 8703.21, "total_tokens": 67094952} +{"current_steps": 99535, "total_steps": 204665, "loss": 0.0385, "lr": 1.2204419420332108e-06, "epoch": 2.4316566095815113, "percentage": 48.63, "elapsed_time": "2:08:29", "remaining_time": "2:15:42", "throughput": 8703.23, "total_tokens": 67098088} +{"current_steps": 99540, "total_steps": 204665, "loss": 0.0706, "lr": 1.2203587614529136e-06, "epoch": 2.4317787604133585, "percentage": 48.64, "elapsed_time": "2:08:29", "remaining_time": "2:15:42", "throughput": 8703.24, "total_tokens": 67101160} +{"current_steps": 99545, "total_steps": 204665, "loss": 0.0004, "lr": 1.2202755792701085e-06, "epoch": 2.4319009112452057, "percentage": 48.64, "elapsed_time": "2:08:30", "remaining_time": "2:15:42", "throughput": 8703.29, "total_tokens": 67104552} +{"current_steps": 99550, "total_steps": 204665, "loss": 0.1449, "lr": 1.2201923954854e-06, "epoch": 2.432023062077053, "percentage": 48.64, "elapsed_time": "2:08:30", "remaining_time": "2:15:41", "throughput": 8703.32, "total_tokens": 67107816} +{"current_steps": 99555, "total_steps": 204665, "loss": 0.0387, "lr": 1.2201092100993933e-06, "epoch": 2.4321452129089, "percentage": 48.64, "elapsed_time": "2:08:30", "remaining_time": "2:15:41", "throughput": 8703.36, "total_tokens": 67111144} +{"current_steps": 99560, "total_steps": 204665, "loss": 0.0329, "lr": 1.2200260231126933e-06, "epoch": 2.4322673637407473, "percentage": 48.65, "elapsed_time": "2:08:31", "remaining_time": "2:15:40", "throughput": 8703.37, "total_tokens": 67114152} +{"current_steps": 99565, "total_steps": 204665, "loss": 0.0382, "lr": 1.2199428345259047e-06, "epoch": 2.432389514572594, "percentage": 48.65, "elapsed_time": "2:08:31", "remaining_time": "2:15:40", "throughput": 8703.36, "total_tokens": 67117032} +{"current_steps": 99570, "total_steps": 204665, "loss": 0.027, "lr": 1.2198596443396328e-06, "epoch": 2.4325116654044416, "percentage": 48.65, "elapsed_time": "2:08:31", "remaining_time": "2:15:39", "throughput": 8703.37, "total_tokens": 67120040} +{"current_steps": 99575, "total_steps": 204665, "loss": 0.0008, "lr": 1.2197764525544822e-06, "epoch": 2.4326338162362884, "percentage": 48.65, "elapsed_time": "2:08:32", "remaining_time": "2:15:39", "throughput": 8703.45, "total_tokens": 67123688} +{"current_steps": 99580, "total_steps": 204665, "loss": 0.0007, "lr": 1.2196932591710583e-06, "epoch": 2.4327559670681356, "percentage": 48.66, "elapsed_time": "2:08:32", "remaining_time": "2:15:39", "throughput": 8703.48, "total_tokens": 67127016} +{"current_steps": 99585, "total_steps": 204665, "loss": 0.0012, "lr": 1.219610064189966e-06, "epoch": 2.4328781178999828, "percentage": 48.66, "elapsed_time": "2:08:33", "remaining_time": "2:15:38", "throughput": 8703.52, "total_tokens": 67130344} +{"current_steps": 99590, "total_steps": 204665, "loss": 0.0003, "lr": 1.21952686761181e-06, "epoch": 2.43300026873183, "percentage": 48.66, "elapsed_time": "2:08:33", "remaining_time": "2:15:38", "throughput": 8703.62, "total_tokens": 67134184} +{"current_steps": 99595, "total_steps": 204665, "loss": 0.0229, "lr": 1.2194436694371959e-06, "epoch": 2.433122419563677, "percentage": 48.66, "elapsed_time": "2:08:33", "remaining_time": "2:15:37", "throughput": 8703.7, "total_tokens": 67137896} +{"current_steps": 99600, "total_steps": 204665, "loss": 0.0008, "lr": 1.219360469666728e-06, "epoch": 2.4332445703955243, "percentage": 48.66, "elapsed_time": "2:08:34", "remaining_time": "2:15:37", "throughput": 8703.72, "total_tokens": 67141032} +{"current_steps": 99605, "total_steps": 204665, "loss": 0.0664, "lr": 1.219277268301012e-06, "epoch": 2.4333667212273715, "percentage": 48.67, "elapsed_time": "2:08:34", "remaining_time": "2:15:36", "throughput": 8703.72, "total_tokens": 67143976} +{"current_steps": 99610, "total_steps": 204665, "loss": 0.0192, "lr": 1.2191940653406528e-06, "epoch": 2.4334888720592187, "percentage": 48.67, "elapsed_time": "2:08:34", "remaining_time": "2:15:36", "throughput": 8703.73, "total_tokens": 67147048} +{"current_steps": 99615, "total_steps": 204665, "loss": 0.0024, "lr": 1.2191108607862553e-06, "epoch": 2.433611022891066, "percentage": 48.67, "elapsed_time": "2:08:35", "remaining_time": "2:15:36", "throughput": 8703.75, "total_tokens": 67150120} +{"current_steps": 99620, "total_steps": 204665, "loss": 0.0001, "lr": 1.2190276546384247e-06, "epoch": 2.433733173722913, "percentage": 48.67, "elapsed_time": "2:08:35", "remaining_time": "2:15:35", "throughput": 8703.77, "total_tokens": 67153320} +{"current_steps": 99625, "total_steps": 204665, "loss": 0.065, "lr": 1.2189444468977664e-06, "epoch": 2.4338553245547603, "percentage": 48.68, "elapsed_time": "2:08:35", "remaining_time": "2:15:35", "throughput": 8703.78, "total_tokens": 67156328} +{"current_steps": 99630, "total_steps": 204665, "loss": 0.0002, "lr": 1.2188612375648846e-06, "epoch": 2.4339774753866075, "percentage": 48.68, "elapsed_time": "2:08:36", "remaining_time": "2:15:34", "throughput": 8703.86, "total_tokens": 67159976} +{"current_steps": 99635, "total_steps": 204665, "loss": 0.0561, "lr": 1.2187780266403853e-06, "epoch": 2.4340996262184547, "percentage": 48.68, "elapsed_time": "2:08:36", "remaining_time": "2:15:34", "throughput": 8703.9, "total_tokens": 67163368} +{"current_steps": 99640, "total_steps": 204665, "loss": 0.0002, "lr": 1.218694814124873e-06, "epoch": 2.434221777050302, "percentage": 48.68, "elapsed_time": "2:08:36", "remaining_time": "2:15:33", "throughput": 8703.92, "total_tokens": 67166440} +{"current_steps": 99645, "total_steps": 204665, "loss": 0.0002, "lr": 1.2186116000189536e-06, "epoch": 2.434343927882149, "percentage": 48.69, "elapsed_time": "2:08:37", "remaining_time": "2:15:33", "throughput": 8703.93, "total_tokens": 67169576} +{"current_steps": 99650, "total_steps": 204665, "loss": 0.0346, "lr": 1.218528384323232e-06, "epoch": 2.4344660787139962, "percentage": 48.69, "elapsed_time": "2:08:37", "remaining_time": "2:15:32", "throughput": 8703.97, "total_tokens": 67172840} +{"current_steps": 99655, "total_steps": 204665, "loss": 0.0457, "lr": 1.2184451670383129e-06, "epoch": 2.4345882295458434, "percentage": 48.69, "elapsed_time": "2:08:37", "remaining_time": "2:15:32", "throughput": 8703.99, "total_tokens": 67176040} +{"current_steps": 99660, "total_steps": 204665, "loss": 0.042, "lr": 1.218361948164802e-06, "epoch": 2.43471038037769, "percentage": 48.69, "elapsed_time": "2:08:38", "remaining_time": "2:15:32", "throughput": 8704.02, "total_tokens": 67179240} +{"current_steps": 99665, "total_steps": 204665, "loss": 0.0396, "lr": 1.218278727703304e-06, "epoch": 2.4348325312095374, "percentage": 48.7, "elapsed_time": "2:08:38", "remaining_time": "2:15:31", "throughput": 8704.03, "total_tokens": 67182312} +{"current_steps": 99670, "total_steps": 204665, "loss": 0.0075, "lr": 1.2181955056544244e-06, "epoch": 2.4349546820413845, "percentage": 48.7, "elapsed_time": "2:08:38", "remaining_time": "2:15:31", "throughput": 8704.08, "total_tokens": 67185704} +{"current_steps": 99675, "total_steps": 204665, "loss": 0.0602, "lr": 1.2181122820187689e-06, "epoch": 2.4350768328732317, "percentage": 48.7, "elapsed_time": "2:08:39", "remaining_time": "2:15:30", "throughput": 8704.06, "total_tokens": 67188456} +{"current_steps": 99680, "total_steps": 204665, "loss": 0.0023, "lr": 1.2180290567969417e-06, "epoch": 2.435198983705079, "percentage": 48.7, "elapsed_time": "2:08:39", "remaining_time": "2:15:30", "throughput": 8704.1, "total_tokens": 67191720} +{"current_steps": 99685, "total_steps": 204665, "loss": 0.0002, "lr": 1.217945829989549e-06, "epoch": 2.435321134536926, "percentage": 48.71, "elapsed_time": "2:08:39", "remaining_time": "2:15:29", "throughput": 8704.17, "total_tokens": 67195368} +{"current_steps": 99690, "total_steps": 204665, "loss": 0.0401, "lr": 1.217862601597195e-06, "epoch": 2.4354432853687733, "percentage": 48.71, "elapsed_time": "2:08:40", "remaining_time": "2:15:29", "throughput": 8704.23, "total_tokens": 67198888} +{"current_steps": 99695, "total_steps": 204665, "loss": 0.0534, "lr": 1.2177793716204858e-06, "epoch": 2.4355654362006205, "percentage": 48.71, "elapsed_time": "2:08:40", "remaining_time": "2:15:29", "throughput": 8704.28, "total_tokens": 67202344} +{"current_steps": 99700, "total_steps": 204665, "loss": 0.0002, "lr": 1.2176961400600265e-06, "epoch": 2.4356875870324677, "percentage": 48.71, "elapsed_time": "2:08:40", "remaining_time": "2:15:28", "throughput": 8704.35, "total_tokens": 67205928} +{"current_steps": 99705, "total_steps": 204665, "loss": 0.0001, "lr": 1.2176129069164225e-06, "epoch": 2.435809737864315, "percentage": 48.72, "elapsed_time": "2:08:41", "remaining_time": "2:15:28", "throughput": 8704.36, "total_tokens": 67209000} +{"current_steps": 99710, "total_steps": 204665, "loss": 0.0008, "lr": 1.2175296721902786e-06, "epoch": 2.435931888696162, "percentage": 48.72, "elapsed_time": "2:08:41", "remaining_time": "2:15:27", "throughput": 8704.4, "total_tokens": 67212328} +{"current_steps": 99715, "total_steps": 204665, "loss": 0.001, "lr": 1.2174464358822005e-06, "epoch": 2.4360540395280093, "percentage": 48.72, "elapsed_time": "2:08:41", "remaining_time": "2:15:27", "throughput": 8704.43, "total_tokens": 67215592} +{"current_steps": 99720, "total_steps": 204665, "loss": 0.0495, "lr": 1.2173631979927935e-06, "epoch": 2.4361761903598564, "percentage": 48.72, "elapsed_time": "2:08:42", "remaining_time": "2:15:26", "throughput": 8704.5, "total_tokens": 67219176} +{"current_steps": 99725, "total_steps": 204665, "loss": 0.0502, "lr": 1.217279958522663e-06, "epoch": 2.4362983411917036, "percentage": 48.73, "elapsed_time": "2:08:42", "remaining_time": "2:15:26", "throughput": 8704.53, "total_tokens": 67222440} +{"current_steps": 99730, "total_steps": 204665, "loss": 0.1152, "lr": 1.217196717472414e-06, "epoch": 2.436420492023551, "percentage": 48.73, "elapsed_time": "2:08:43", "remaining_time": "2:15:26", "throughput": 8704.59, "total_tokens": 67225960} +{"current_steps": 99735, "total_steps": 204665, "loss": 0.0001, "lr": 1.2171134748426522e-06, "epoch": 2.436542642855398, "percentage": 48.73, "elapsed_time": "2:08:43", "remaining_time": "2:15:25", "throughput": 8704.65, "total_tokens": 67229480} +{"current_steps": 99740, "total_steps": 204665, "loss": 0.0763, "lr": 1.2170302306339825e-06, "epoch": 2.436664793687245, "percentage": 48.73, "elapsed_time": "2:08:43", "remaining_time": "2:15:25", "throughput": 8704.69, "total_tokens": 67232744} +{"current_steps": 99745, "total_steps": 204665, "loss": 0.0002, "lr": 1.216946984847011e-06, "epoch": 2.436786944519092, "percentage": 48.74, "elapsed_time": "2:08:44", "remaining_time": "2:15:24", "throughput": 8704.7, "total_tokens": 67235816} +{"current_steps": 99750, "total_steps": 204665, "loss": 0.0852, "lr": 1.2168637374823425e-06, "epoch": 2.4369090953509396, "percentage": 48.74, "elapsed_time": "2:08:44", "remaining_time": "2:15:24", "throughput": 8704.74, "total_tokens": 67239144} +{"current_steps": 99755, "total_steps": 204665, "loss": 0.0001, "lr": 1.2167804885405825e-06, "epoch": 2.4370312461827863, "percentage": 48.74, "elapsed_time": "2:08:44", "remaining_time": "2:15:23", "throughput": 8704.76, "total_tokens": 67242280} +{"current_steps": 99760, "total_steps": 204665, "loss": 0.0001, "lr": 1.2166972380223363e-06, "epoch": 2.4371533970146335, "percentage": 48.74, "elapsed_time": "2:08:45", "remaining_time": "2:15:23", "throughput": 8704.79, "total_tokens": 67245608} +{"current_steps": 99765, "total_steps": 204665, "loss": 0.0002, "lr": 1.2166139859282098e-06, "epoch": 2.4372755478464807, "percentage": 48.75, "elapsed_time": "2:08:45", "remaining_time": "2:15:23", "throughput": 8704.85, "total_tokens": 67249064} +{"current_steps": 99770, "total_steps": 204665, "loss": 0.0003, "lr": 1.2165307322588082e-06, "epoch": 2.437397698678328, "percentage": 48.75, "elapsed_time": "2:08:45", "remaining_time": "2:15:22", "throughput": 8704.87, "total_tokens": 67252264} +{"current_steps": 99775, "total_steps": 204665, "loss": 0.0372, "lr": 1.2164474770147365e-06, "epoch": 2.437519849510175, "percentage": 48.75, "elapsed_time": "2:08:46", "remaining_time": "2:15:22", "throughput": 8704.94, "total_tokens": 67255848} +{"current_steps": 99780, "total_steps": 204665, "loss": 0.0001, "lr": 1.2163642201966011e-06, "epoch": 2.4376420003420223, "percentage": 48.75, "elapsed_time": "2:08:46", "remaining_time": "2:15:21", "throughput": 8704.97, "total_tokens": 67259176} +{"current_steps": 99785, "total_steps": 204665, "loss": 0.0832, "lr": 1.2162809618050065e-06, "epoch": 2.4377641511738695, "percentage": 48.76, "elapsed_time": "2:08:46", "remaining_time": "2:15:21", "throughput": 8704.98, "total_tokens": 67262184} +{"current_steps": 99790, "total_steps": 204665, "loss": 0.0491, "lr": 1.2161977018405586e-06, "epoch": 2.4378863020057167, "percentage": 48.76, "elapsed_time": "2:08:47", "remaining_time": "2:15:20", "throughput": 8705.0, "total_tokens": 67265320} +{"current_steps": 99795, "total_steps": 204665, "loss": 0.1263, "lr": 1.2161144403038629e-06, "epoch": 2.438008452837564, "percentage": 48.76, "elapsed_time": "2:08:47", "remaining_time": "2:15:20", "throughput": 8705.05, "total_tokens": 67268776} +{"current_steps": 99800, "total_steps": 204665, "loss": 0.0476, "lr": 1.2160311771955246e-06, "epoch": 2.438130603669411, "percentage": 48.76, "elapsed_time": "2:08:47", "remaining_time": "2:15:20", "throughput": 8705.09, "total_tokens": 67272104} +{"current_steps": 99805, "total_steps": 204665, "loss": 0.0002, "lr": 1.21594791251615e-06, "epoch": 2.4382527545012582, "percentage": 48.77, "elapsed_time": "2:08:48", "remaining_time": "2:15:19", "throughput": 8705.12, "total_tokens": 67275304} +{"current_steps": 99810, "total_steps": 204665, "loss": 0.0281, "lr": 1.2158646462663437e-06, "epoch": 2.4383749053331054, "percentage": 48.77, "elapsed_time": "2:08:48", "remaining_time": "2:15:19", "throughput": 8705.11, "total_tokens": 67278184} +{"current_steps": 99815, "total_steps": 204665, "loss": 0.1022, "lr": 1.215781378446712e-06, "epoch": 2.4384970561649526, "percentage": 48.77, "elapsed_time": "2:08:48", "remaining_time": "2:15:18", "throughput": 8705.15, "total_tokens": 67281512} +{"current_steps": 99820, "total_steps": 204665, "loss": 0.0003, "lr": 1.2156981090578594e-06, "epoch": 2.4386192069968, "percentage": 48.77, "elapsed_time": "2:08:49", "remaining_time": "2:15:18", "throughput": 8705.25, "total_tokens": 67285352} +{"current_steps": 99825, "total_steps": 204665, "loss": 0.0044, "lr": 1.2156148381003926e-06, "epoch": 2.438741357828647, "percentage": 48.77, "elapsed_time": "2:08:49", "remaining_time": "2:15:17", "throughput": 8705.25, "total_tokens": 67288360} +{"current_steps": 99830, "total_steps": 204665, "loss": 0.0003, "lr": 1.215531565574917e-06, "epoch": 2.4388635086604937, "percentage": 48.78, "elapsed_time": "2:08:49", "remaining_time": "2:15:17", "throughput": 8705.29, "total_tokens": 67291624} +{"current_steps": 99835, "total_steps": 204665, "loss": 0.0004, "lr": 1.2154482914820375e-06, "epoch": 2.4389856594923414, "percentage": 48.78, "elapsed_time": "2:08:50", "remaining_time": "2:15:17", "throughput": 8705.32, "total_tokens": 67294824} +{"current_steps": 99840, "total_steps": 204665, "loss": 0.0002, "lr": 1.2153650158223602e-06, "epoch": 2.439107810324188, "percentage": 48.78, "elapsed_time": "2:08:50", "remaining_time": "2:15:16", "throughput": 8705.35, "total_tokens": 67298088} +{"current_steps": 99845, "total_steps": 204665, "loss": 0.0006, "lr": 1.2152817385964906e-06, "epoch": 2.4392299611560353, "percentage": 48.78, "elapsed_time": "2:08:51", "remaining_time": "2:15:16", "throughput": 8705.38, "total_tokens": 67301352} +{"current_steps": 99850, "total_steps": 204665, "loss": 0.0, "lr": 1.215198459805034e-06, "epoch": 2.4393521119878825, "percentage": 48.79, "elapsed_time": "2:08:51", "remaining_time": "2:15:15", "throughput": 8705.45, "total_tokens": 67305000} +{"current_steps": 99855, "total_steps": 204665, "loss": 0.0001, "lr": 1.2151151794485966e-06, "epoch": 2.4394742628197297, "percentage": 48.79, "elapsed_time": "2:08:51", "remaining_time": "2:15:15", "throughput": 8705.51, "total_tokens": 67308520} +{"current_steps": 99860, "total_steps": 204665, "loss": 0.0883, "lr": 1.2150318975277835e-06, "epoch": 2.439596413651577, "percentage": 48.79, "elapsed_time": "2:08:52", "remaining_time": "2:15:14", "throughput": 8705.54, "total_tokens": 67311784} +{"current_steps": 99865, "total_steps": 204665, "loss": 0.0014, "lr": 1.2149486140432008e-06, "epoch": 2.439718564483424, "percentage": 48.79, "elapsed_time": "2:08:52", "remaining_time": "2:15:14", "throughput": 8705.58, "total_tokens": 67315112} +{"current_steps": 99870, "total_steps": 204665, "loss": 0.1583, "lr": 1.214865328995454e-06, "epoch": 2.4398407153152712, "percentage": 48.8, "elapsed_time": "2:08:52", "remaining_time": "2:15:14", "throughput": 8705.58, "total_tokens": 67318056} +{"current_steps": 99875, "total_steps": 204665, "loss": 0.0569, "lr": 1.2147820423851487e-06, "epoch": 2.4399628661471184, "percentage": 48.8, "elapsed_time": "2:08:53", "remaining_time": "2:15:13", "throughput": 8705.67, "total_tokens": 67321832} +{"current_steps": 99880, "total_steps": 204665, "loss": 0.0001, "lr": 1.2146987542128906e-06, "epoch": 2.4400850169789656, "percentage": 48.8, "elapsed_time": "2:08:53", "remaining_time": "2:15:13", "throughput": 8705.69, "total_tokens": 67324968} +{"current_steps": 99885, "total_steps": 204665, "loss": 0.0002, "lr": 1.2146154644792855e-06, "epoch": 2.440207167810813, "percentage": 48.8, "elapsed_time": "2:08:53", "remaining_time": "2:15:12", "throughput": 8705.72, "total_tokens": 67328232} +{"current_steps": 99890, "total_steps": 204665, "loss": 0.0005, "lr": 1.214532173184939e-06, "epoch": 2.44032931864266, "percentage": 48.81, "elapsed_time": "2:08:54", "remaining_time": "2:15:12", "throughput": 8705.74, "total_tokens": 67331368} +{"current_steps": 99895, "total_steps": 204665, "loss": 0.0002, "lr": 1.2144488803304566e-06, "epoch": 2.440451469474507, "percentage": 48.81, "elapsed_time": "2:08:54", "remaining_time": "2:15:11", "throughput": 8705.77, "total_tokens": 67334568} +{"current_steps": 99900, "total_steps": 204665, "loss": 0.0142, "lr": 1.2143655859164445e-06, "epoch": 2.4405736203063544, "percentage": 48.81, "elapsed_time": "2:08:54", "remaining_time": "2:15:11", "throughput": 8705.85, "total_tokens": 67338280} +{"current_steps": 99905, "total_steps": 204665, "loss": 0.0004, "lr": 1.2142822899435083e-06, "epoch": 2.4406957711382016, "percentage": 48.81, "elapsed_time": "2:08:55", "remaining_time": "2:15:11", "throughput": 8705.93, "total_tokens": 67342056} +{"current_steps": 99910, "total_steps": 204665, "loss": 0.0276, "lr": 1.2141989924122534e-06, "epoch": 2.4408179219700488, "percentage": 48.82, "elapsed_time": "2:08:55", "remaining_time": "2:15:10", "throughput": 8706.03, "total_tokens": 67345896} +{"current_steps": 99915, "total_steps": 204665, "loss": 0.0574, "lr": 1.2141156933232856e-06, "epoch": 2.440940072801896, "percentage": 48.82, "elapsed_time": "2:08:55", "remaining_time": "2:15:10", "throughput": 8706.03, "total_tokens": 67348904} +{"current_steps": 99920, "total_steps": 204665, "loss": 0.034, "lr": 1.2140323926772114e-06, "epoch": 2.441062223633743, "percentage": 48.82, "elapsed_time": "2:08:56", "remaining_time": "2:15:09", "throughput": 8706.12, "total_tokens": 67352744} +{"current_steps": 99925, "total_steps": 204665, "loss": 0.0001, "lr": 1.2139490904746359e-06, "epoch": 2.44118437446559, "percentage": 48.82, "elapsed_time": "2:08:56", "remaining_time": "2:15:09", "throughput": 8706.19, "total_tokens": 67356328} +{"current_steps": 99930, "total_steps": 204665, "loss": 0.0002, "lr": 1.213865786716165e-06, "epoch": 2.4413065252974375, "percentage": 48.83, "elapsed_time": "2:08:56", "remaining_time": "2:15:08", "throughput": 8706.26, "total_tokens": 67359976} +{"current_steps": 99935, "total_steps": 204665, "loss": 0.0003, "lr": 1.2137824814024048e-06, "epoch": 2.4414286761292843, "percentage": 48.83, "elapsed_time": "2:08:57", "remaining_time": "2:15:08", "throughput": 8706.32, "total_tokens": 67363496} +{"current_steps": 99940, "total_steps": 204665, "loss": 0.0665, "lr": 1.2136991745339606e-06, "epoch": 2.4415508269611315, "percentage": 48.83, "elapsed_time": "2:08:57", "remaining_time": "2:15:08", "throughput": 8706.33, "total_tokens": 67366504} +{"current_steps": 99945, "total_steps": 204665, "loss": 0.0004, "lr": 1.2136158661114387e-06, "epoch": 2.4416729777929786, "percentage": 48.83, "elapsed_time": "2:08:58", "remaining_time": "2:15:07", "throughput": 8706.39, "total_tokens": 67370088} +{"current_steps": 99950, "total_steps": 204665, "loss": 0.101, "lr": 1.2135325561354446e-06, "epoch": 2.441795128624826, "percentage": 48.84, "elapsed_time": "2:08:58", "remaining_time": "2:15:07", "throughput": 8706.43, "total_tokens": 67373480} +{"current_steps": 99955, "total_steps": 204665, "loss": 0.0351, "lr": 1.2134492446065844e-06, "epoch": 2.441917279456673, "percentage": 48.84, "elapsed_time": "2:08:58", "remaining_time": "2:15:06", "throughput": 8706.46, "total_tokens": 67376680} +{"current_steps": 99960, "total_steps": 204665, "loss": 0.0004, "lr": 1.2133659315254637e-06, "epoch": 2.44203943028852, "percentage": 48.84, "elapsed_time": "2:08:59", "remaining_time": "2:15:06", "throughput": 8706.51, "total_tokens": 67380072} +{"current_steps": 99965, "total_steps": 204665, "loss": 0.0005, "lr": 1.2132826168926888e-06, "epoch": 2.4421615811203674, "percentage": 48.84, "elapsed_time": "2:08:59", "remaining_time": "2:15:05", "throughput": 8706.52, "total_tokens": 67383144} +{"current_steps": 99970, "total_steps": 204665, "loss": 0.0002, "lr": 1.2131993007088654e-06, "epoch": 2.4422837319522146, "percentage": 48.85, "elapsed_time": "2:08:59", "remaining_time": "2:15:05", "throughput": 8706.56, "total_tokens": 67386472} +{"current_steps": 99975, "total_steps": 204665, "loss": 0.0466, "lr": 1.2131159829745991e-06, "epoch": 2.442405882784062, "percentage": 48.85, "elapsed_time": "2:09:00", "remaining_time": "2:15:05", "throughput": 8706.6, "total_tokens": 67389800} +{"current_steps": 99980, "total_steps": 204665, "loss": 0.0412, "lr": 1.2130326636904963e-06, "epoch": 2.442528033615909, "percentage": 48.85, "elapsed_time": "2:09:00", "remaining_time": "2:15:04", "throughput": 8706.68, "total_tokens": 67393512} +{"current_steps": 99985, "total_steps": 204665, "loss": 0.0408, "lr": 1.2129493428571627e-06, "epoch": 2.442650184447756, "percentage": 48.85, "elapsed_time": "2:09:00", "remaining_time": "2:15:04", "throughput": 8706.69, "total_tokens": 67396648} +{"current_steps": 99990, "total_steps": 204665, "loss": 0.0002, "lr": 1.2128660204752042e-06, "epoch": 2.4427723352796034, "percentage": 48.86, "elapsed_time": "2:09:01", "remaining_time": "2:15:03", "throughput": 8706.8, "total_tokens": 67400552} +{"current_steps": 99995, "total_steps": 204665, "loss": 0.0639, "lr": 1.2127826965452266e-06, "epoch": 2.4428944861114505, "percentage": 48.86, "elapsed_time": "2:09:01", "remaining_time": "2:15:03", "throughput": 8706.85, "total_tokens": 67404008} +{"current_steps": 100000, "total_steps": 204665, "loss": 0.1636, "lr": 1.212699371067836e-06, "epoch": 2.4430166369432977, "percentage": 48.86, "elapsed_time": "2:09:01", "remaining_time": "2:15:02", "throughput": 8706.89, "total_tokens": 67407336} +{"current_steps": 100005, "total_steps": 204665, "loss": 0.0291, "lr": 1.212616044043638e-06, "epoch": 2.443138787775145, "percentage": 48.86, "elapsed_time": "2:09:02", "remaining_time": "2:15:02", "throughput": 8706.92, "total_tokens": 67410664} +{"current_steps": 100010, "total_steps": 204665, "loss": 0.0003, "lr": 1.2125327154732394e-06, "epoch": 2.4432609386069917, "percentage": 48.87, "elapsed_time": "2:09:02", "remaining_time": "2:15:02", "throughput": 8706.94, "total_tokens": 67413800} +{"current_steps": 100015, "total_steps": 204665, "loss": 0.0801, "lr": 1.2124493853572458e-06, "epoch": 2.4433830894388393, "percentage": 48.87, "elapsed_time": "2:09:02", "remaining_time": "2:15:01", "throughput": 8706.93, "total_tokens": 67416616} +{"current_steps": 100020, "total_steps": 204665, "loss": 0.0002, "lr": 1.2123660536962628e-06, "epoch": 2.443505240270686, "percentage": 48.87, "elapsed_time": "2:09:03", "remaining_time": "2:15:01", "throughput": 8706.93, "total_tokens": 67419560} +{"current_steps": 100025, "total_steps": 204665, "loss": 0.043, "lr": 1.212282720490897e-06, "epoch": 2.4436273911025332, "percentage": 48.87, "elapsed_time": "2:09:03", "remaining_time": "2:15:00", "throughput": 8707.01, "total_tokens": 67423272} +{"current_steps": 100030, "total_steps": 204665, "loss": 0.0004, "lr": 1.2121993857417542e-06, "epoch": 2.4437495419343804, "percentage": 48.87, "elapsed_time": "2:09:03", "remaining_time": "2:15:00", "throughput": 8707.08, "total_tokens": 67426920} +{"current_steps": 100035, "total_steps": 204665, "loss": 0.0002, "lr": 1.21211604944944e-06, "epoch": 2.4438716927662276, "percentage": 48.88, "elapsed_time": "2:09:04", "remaining_time": "2:14:59", "throughput": 8707.12, "total_tokens": 67430248} +{"current_steps": 100040, "total_steps": 204665, "loss": 0.082, "lr": 1.2120327116145611e-06, "epoch": 2.443993843598075, "percentage": 48.88, "elapsed_time": "2:09:04", "remaining_time": "2:14:59", "throughput": 8707.17, "total_tokens": 67433576} +{"current_steps": 100045, "total_steps": 204665, "loss": 0.0875, "lr": 1.2119493722377233e-06, "epoch": 2.444115994429922, "percentage": 48.88, "elapsed_time": "2:09:04", "remaining_time": "2:14:59", "throughput": 8707.22, "total_tokens": 67437032} +{"current_steps": 100050, "total_steps": 204665, "loss": 0.0665, "lr": 1.2118660313195327e-06, "epoch": 2.444238145261769, "percentage": 48.88, "elapsed_time": "2:09:05", "remaining_time": "2:14:58", "throughput": 8707.28, "total_tokens": 67440552} +{"current_steps": 100055, "total_steps": 204665, "loss": 0.0003, "lr": 1.2117826888605953e-06, "epoch": 2.4443602960936164, "percentage": 48.89, "elapsed_time": "2:09:05", "remaining_time": "2:14:58", "throughput": 8707.29, "total_tokens": 67443624} +{"current_steps": 100060, "total_steps": 204665, "loss": 0.0004, "lr": 1.2116993448615173e-06, "epoch": 2.4444824469254636, "percentage": 48.89, "elapsed_time": "2:09:06", "remaining_time": "2:14:57", "throughput": 8707.34, "total_tokens": 67447080} +{"current_steps": 100065, "total_steps": 204665, "loss": 0.12, "lr": 1.2116159993229045e-06, "epoch": 2.4446045977573108, "percentage": 48.89, "elapsed_time": "2:09:06", "remaining_time": "2:14:57", "throughput": 8707.41, "total_tokens": 67450728} +{"current_steps": 100070, "total_steps": 204665, "loss": 0.0004, "lr": 1.2115326522453632e-06, "epoch": 2.444726748589158, "percentage": 48.89, "elapsed_time": "2:09:06", "remaining_time": "2:14:57", "throughput": 8707.48, "total_tokens": 67454376} +{"current_steps": 100075, "total_steps": 204665, "loss": 0.0526, "lr": 1.2114493036294996e-06, "epoch": 2.444848899421005, "percentage": 48.9, "elapsed_time": "2:09:07", "remaining_time": "2:14:56", "throughput": 8707.53, "total_tokens": 67457768} +{"current_steps": 100080, "total_steps": 204665, "loss": 0.0651, "lr": 1.21136595347592e-06, "epoch": 2.4449710502528523, "percentage": 48.9, "elapsed_time": "2:09:07", "remaining_time": "2:14:56", "throughput": 8707.57, "total_tokens": 67461160} +{"current_steps": 100085, "total_steps": 204665, "loss": 0.0733, "lr": 1.2112826017852303e-06, "epoch": 2.4450932010846995, "percentage": 48.9, "elapsed_time": "2:09:07", "remaining_time": "2:14:55", "throughput": 8707.56, "total_tokens": 67464040} +{"current_steps": 100090, "total_steps": 204665, "loss": 0.0002, "lr": 1.211199248558037e-06, "epoch": 2.4452153519165467, "percentage": 48.9, "elapsed_time": "2:09:08", "remaining_time": "2:14:55", "throughput": 8707.61, "total_tokens": 67467496} +{"current_steps": 100095, "total_steps": 204665, "loss": 0.04, "lr": 1.2111158937949456e-06, "epoch": 2.445337502748394, "percentage": 48.91, "elapsed_time": "2:09:08", "remaining_time": "2:14:54", "throughput": 8707.65, "total_tokens": 67470824} +{"current_steps": 100100, "total_steps": 204665, "loss": 0.0986, "lr": 1.2110325374965624e-06, "epoch": 2.445459653580241, "percentage": 48.91, "elapsed_time": "2:09:08", "remaining_time": "2:14:54", "throughput": 8707.68, "total_tokens": 67474088} +{"current_steps": 100105, "total_steps": 204665, "loss": 0.0009, "lr": 1.2109491796634942e-06, "epoch": 2.445581804412088, "percentage": 48.91, "elapsed_time": "2:09:09", "remaining_time": "2:14:54", "throughput": 8707.7, "total_tokens": 67477160} +{"current_steps": 100110, "total_steps": 204665, "loss": 0.0002, "lr": 1.2108658202963464e-06, "epoch": 2.445703955243935, "percentage": 48.91, "elapsed_time": "2:09:09", "remaining_time": "2:14:53", "throughput": 8707.74, "total_tokens": 67480552} +{"current_steps": 100115, "total_steps": 204665, "loss": 0.0446, "lr": 1.210782459395726e-06, "epoch": 2.445826106075782, "percentage": 48.92, "elapsed_time": "2:09:09", "remaining_time": "2:14:53", "throughput": 8707.77, "total_tokens": 67483816} +{"current_steps": 100120, "total_steps": 204665, "loss": 0.0379, "lr": 1.2106990969622388e-06, "epoch": 2.4459482569076294, "percentage": 48.92, "elapsed_time": "2:09:10", "remaining_time": "2:14:52", "throughput": 8707.85, "total_tokens": 67487464} +{"current_steps": 100125, "total_steps": 204665, "loss": 0.0002, "lr": 1.2106157329964913e-06, "epoch": 2.4460704077394766, "percentage": 48.92, "elapsed_time": "2:09:10", "remaining_time": "2:14:52", "throughput": 8707.89, "total_tokens": 67490792} +{"current_steps": 100130, "total_steps": 204665, "loss": 0.0465, "lr": 1.210532367499089e-06, "epoch": 2.446192558571324, "percentage": 48.92, "elapsed_time": "2:09:10", "remaining_time": "2:14:51", "throughput": 8707.92, "total_tokens": 67494056} +{"current_steps": 100135, "total_steps": 204665, "loss": 0.0004, "lr": 1.210449000470639e-06, "epoch": 2.446314709403171, "percentage": 48.93, "elapsed_time": "2:09:11", "remaining_time": "2:14:51", "throughput": 8707.97, "total_tokens": 67497512} +{"current_steps": 100140, "total_steps": 204665, "loss": 0.0529, "lr": 1.2103656319117474e-06, "epoch": 2.446436860235018, "percentage": 48.93, "elapsed_time": "2:09:11", "remaining_time": "2:14:51", "throughput": 8708.01, "total_tokens": 67500840} +{"current_steps": 100145, "total_steps": 204665, "loss": 0.0396, "lr": 1.2102822618230204e-06, "epoch": 2.4465590110668654, "percentage": 48.93, "elapsed_time": "2:09:11", "remaining_time": "2:14:50", "throughput": 8708.05, "total_tokens": 67504168} +{"current_steps": 100150, "total_steps": 204665, "loss": 0.0005, "lr": 1.210198890205064e-06, "epoch": 2.4466811618987125, "percentage": 48.93, "elapsed_time": "2:09:12", "remaining_time": "2:14:50", "throughput": 8708.09, "total_tokens": 67507496} +{"current_steps": 100155, "total_steps": 204665, "loss": 0.0004, "lr": 1.210115517058485e-06, "epoch": 2.4468033127305597, "percentage": 48.94, "elapsed_time": "2:09:12", "remaining_time": "2:14:49", "throughput": 8708.13, "total_tokens": 67510824} +{"current_steps": 100160, "total_steps": 204665, "loss": 0.0614, "lr": 1.2100321423838889e-06, "epoch": 2.446925463562407, "percentage": 48.94, "elapsed_time": "2:09:12", "remaining_time": "2:14:49", "throughput": 8708.17, "total_tokens": 67514216} +{"current_steps": 100165, "total_steps": 204665, "loss": 0.0004, "lr": 1.2099487661818829e-06, "epoch": 2.447047614394254, "percentage": 48.94, "elapsed_time": "2:09:13", "remaining_time": "2:14:48", "throughput": 8708.22, "total_tokens": 67517608} +{"current_steps": 100170, "total_steps": 204665, "loss": 0.0595, "lr": 1.209865388453073e-06, "epoch": 2.4471697652261013, "percentage": 48.94, "elapsed_time": "2:09:13", "remaining_time": "2:14:48", "throughput": 8708.25, "total_tokens": 67520872} +{"current_steps": 100175, "total_steps": 204665, "loss": 0.0971, "lr": 1.2097820091980654e-06, "epoch": 2.4472919160579485, "percentage": 48.95, "elapsed_time": "2:09:14", "remaining_time": "2:14:48", "throughput": 8708.25, "total_tokens": 67523880} +{"current_steps": 100180, "total_steps": 204665, "loss": 0.166, "lr": 1.209698628417467e-06, "epoch": 2.4474140668897957, "percentage": 48.95, "elapsed_time": "2:09:14", "remaining_time": "2:14:47", "throughput": 8708.26, "total_tokens": 67526888} +{"current_steps": 100185, "total_steps": 204665, "loss": 0.0003, "lr": 1.2096152461118836e-06, "epoch": 2.447536217721643, "percentage": 48.95, "elapsed_time": "2:09:14", "remaining_time": "2:14:47", "throughput": 8708.29, "total_tokens": 67530088} +{"current_steps": 100190, "total_steps": 204665, "loss": 0.0469, "lr": 1.2095318622819216e-06, "epoch": 2.4476583685534896, "percentage": 48.95, "elapsed_time": "2:09:15", "remaining_time": "2:14:46", "throughput": 8708.29, "total_tokens": 67533032} +{"current_steps": 100195, "total_steps": 204665, "loss": 0.0003, "lr": 1.2094484769281877e-06, "epoch": 2.4477805193853373, "percentage": 48.96, "elapsed_time": "2:09:15", "remaining_time": "2:14:46", "throughput": 8708.29, "total_tokens": 67536040} +{"current_steps": 100200, "total_steps": 204665, "loss": 0.0562, "lr": 1.2093650900512879e-06, "epoch": 2.447902670217184, "percentage": 48.96, "elapsed_time": "2:09:15", "remaining_time": "2:14:45", "throughput": 8708.35, "total_tokens": 67539496} +{"current_steps": 100205, "total_steps": 204665, "loss": 0.0657, "lr": 1.2092817016518291e-06, "epoch": 2.448024821049031, "percentage": 48.96, "elapsed_time": "2:09:16", "remaining_time": "2:14:45", "throughput": 8708.39, "total_tokens": 67542824} +{"current_steps": 100210, "total_steps": 204665, "loss": 0.0311, "lr": 1.2091983117304174e-06, "epoch": 2.4481469718808784, "percentage": 48.96, "elapsed_time": "2:09:16", "remaining_time": "2:14:44", "throughput": 8708.4, "total_tokens": 67545832} +{"current_steps": 100215, "total_steps": 204665, "loss": 0.0398, "lr": 1.2091149202876593e-06, "epoch": 2.4482691227127256, "percentage": 48.97, "elapsed_time": "2:09:16", "remaining_time": "2:14:44", "throughput": 8708.45, "total_tokens": 67549288} +{"current_steps": 100220, "total_steps": 204665, "loss": 0.0006, "lr": 1.2090315273241613e-06, "epoch": 2.4483912735445728, "percentage": 48.97, "elapsed_time": "2:09:17", "remaining_time": "2:14:44", "throughput": 8708.46, "total_tokens": 67552296} +{"current_steps": 100225, "total_steps": 204665, "loss": 0.0542, "lr": 1.2089481328405295e-06, "epoch": 2.44851342437642, "percentage": 48.97, "elapsed_time": "2:09:17", "remaining_time": "2:14:43", "throughput": 8708.49, "total_tokens": 67555624} +{"current_steps": 100230, "total_steps": 204665, "loss": 0.0003, "lr": 1.208864736837371e-06, "epoch": 2.448635575208267, "percentage": 48.97, "elapsed_time": "2:09:17", "remaining_time": "2:14:43", "throughput": 8708.55, "total_tokens": 67559144} +{"current_steps": 100235, "total_steps": 204665, "loss": 0.0007, "lr": 1.2087813393152919e-06, "epoch": 2.4487577260401143, "percentage": 48.98, "elapsed_time": "2:09:18", "remaining_time": "2:14:42", "throughput": 8708.6, "total_tokens": 67562600} +{"current_steps": 100240, "total_steps": 204665, "loss": 0.0804, "lr": 1.208697940274899e-06, "epoch": 2.4488798768719615, "percentage": 48.98, "elapsed_time": "2:09:18", "remaining_time": "2:14:42", "throughput": 8708.62, "total_tokens": 67565736} +{"current_steps": 100245, "total_steps": 204665, "loss": 0.0418, "lr": 1.2086145397167981e-06, "epoch": 2.4490020277038087, "percentage": 48.98, "elapsed_time": "2:09:18", "remaining_time": "2:14:41", "throughput": 8708.66, "total_tokens": 67569000} +{"current_steps": 100250, "total_steps": 204665, "loss": 0.0003, "lr": 1.2085311376415965e-06, "epoch": 2.449124178535656, "percentage": 48.98, "elapsed_time": "2:09:19", "remaining_time": "2:14:41", "throughput": 8708.68, "total_tokens": 67572136} +{"current_steps": 100255, "total_steps": 204665, "loss": 0.0004, "lr": 1.2084477340499003e-06, "epoch": 2.449246329367503, "percentage": 48.98, "elapsed_time": "2:09:19", "remaining_time": "2:14:41", "throughput": 8708.7, "total_tokens": 67575272} +{"current_steps": 100260, "total_steps": 204665, "loss": 0.003, "lr": 1.208364328942316e-06, "epoch": 2.4493684801993503, "percentage": 48.99, "elapsed_time": "2:09:19", "remaining_time": "2:14:40", "throughput": 8708.73, "total_tokens": 67578536} +{"current_steps": 100265, "total_steps": 204665, "loss": 0.0006, "lr": 1.2082809223194502e-06, "epoch": 2.4494906310311975, "percentage": 48.99, "elapsed_time": "2:09:20", "remaining_time": "2:14:40", "throughput": 8708.77, "total_tokens": 67581864} +{"current_steps": 100270, "total_steps": 204665, "loss": 0.0401, "lr": 1.2081975141819097e-06, "epoch": 2.4496127818630447, "percentage": 48.99, "elapsed_time": "2:09:20", "remaining_time": "2:14:39", "throughput": 8708.79, "total_tokens": 67585000} +{"current_steps": 100275, "total_steps": 204665, "loss": 0.0002, "lr": 1.2081141045303007e-06, "epoch": 2.449734932694892, "percentage": 48.99, "elapsed_time": "2:09:20", "remaining_time": "2:14:39", "throughput": 8708.83, "total_tokens": 67588264} +{"current_steps": 100280, "total_steps": 204665, "loss": 0.0003, "lr": 1.20803069336523e-06, "epoch": 2.449857083526739, "percentage": 49.0, "elapsed_time": "2:09:21", "remaining_time": "2:14:38", "throughput": 8708.92, "total_tokens": 67592104} +{"current_steps": 100285, "total_steps": 204665, "loss": 0.0428, "lr": 1.207947280687304e-06, "epoch": 2.4499792343585858, "percentage": 49.0, "elapsed_time": "2:09:21", "remaining_time": "2:14:38", "throughput": 8708.97, "total_tokens": 67595560} +{"current_steps": 100290, "total_steps": 204665, "loss": 0.0002, "lr": 1.2078638664971297e-06, "epoch": 2.450101385190433, "percentage": 49.0, "elapsed_time": "2:09:21", "remaining_time": "2:14:38", "throughput": 8709.0, "total_tokens": 67598760} +{"current_steps": 100295, "total_steps": 204665, "loss": 0.009, "lr": 1.2077804507953135e-06, "epoch": 2.45022353602228, "percentage": 49.0, "elapsed_time": "2:09:22", "remaining_time": "2:14:37", "throughput": 8709.05, "total_tokens": 67602152} +{"current_steps": 100300, "total_steps": 204665, "loss": 0.1083, "lr": 1.2076970335824618e-06, "epoch": 2.4503456868541273, "percentage": 49.01, "elapsed_time": "2:09:22", "remaining_time": "2:14:37", "throughput": 8709.1, "total_tokens": 67605608} +{"current_steps": 100305, "total_steps": 204665, "loss": 0.0788, "lr": 1.2076136148591817e-06, "epoch": 2.4504678376859745, "percentage": 49.01, "elapsed_time": "2:09:22", "remaining_time": "2:14:36", "throughput": 8709.09, "total_tokens": 67608488} +{"current_steps": 100310, "total_steps": 204665, "loss": 0.0001, "lr": 1.2075301946260795e-06, "epoch": 2.4505899885178217, "percentage": 49.01, "elapsed_time": "2:09:23", "remaining_time": "2:14:36", "throughput": 8709.15, "total_tokens": 67611944} +{"current_steps": 100315, "total_steps": 204665, "loss": 0.0004, "lr": 1.2074467728837615e-06, "epoch": 2.450712139349669, "percentage": 49.01, "elapsed_time": "2:09:23", "remaining_time": "2:14:35", "throughput": 8709.18, "total_tokens": 67615272} +{"current_steps": 100320, "total_steps": 204665, "loss": 0.0182, "lr": 1.2073633496328348e-06, "epoch": 2.450834290181516, "percentage": 49.02, "elapsed_time": "2:09:24", "remaining_time": "2:14:35", "throughput": 8709.23, "total_tokens": 67618600} +{"current_steps": 100325, "total_steps": 204665, "loss": 0.097, "lr": 1.2072799248739062e-06, "epoch": 2.4509564410133633, "percentage": 49.02, "elapsed_time": "2:09:24", "remaining_time": "2:14:35", "throughput": 8709.24, "total_tokens": 67621736} +{"current_steps": 100330, "total_steps": 204665, "loss": 0.0042, "lr": 1.2071964986075825e-06, "epoch": 2.4510785918452105, "percentage": 49.02, "elapsed_time": "2:09:24", "remaining_time": "2:14:34", "throughput": 8709.32, "total_tokens": 67625448} +{"current_steps": 100335, "total_steps": 204665, "loss": 0.0003, "lr": 1.2071130708344698e-06, "epoch": 2.4512007426770577, "percentage": 49.02, "elapsed_time": "2:09:25", "remaining_time": "2:14:34", "throughput": 8709.36, "total_tokens": 67628776} +{"current_steps": 100340, "total_steps": 204665, "loss": 0.0886, "lr": 1.2070296415551754e-06, "epoch": 2.451322893508905, "percentage": 49.03, "elapsed_time": "2:09:25", "remaining_time": "2:14:33", "throughput": 8709.41, "total_tokens": 67632168} +{"current_steps": 100345, "total_steps": 204665, "loss": 0.0001, "lr": 1.2069462107703055e-06, "epoch": 2.451445044340752, "percentage": 49.03, "elapsed_time": "2:09:25", "remaining_time": "2:14:33", "throughput": 8709.49, "total_tokens": 67635880} +{"current_steps": 100350, "total_steps": 204665, "loss": 0.0002, "lr": 1.2068627784804673e-06, "epoch": 2.4515671951725992, "percentage": 49.03, "elapsed_time": "2:09:26", "remaining_time": "2:14:32", "throughput": 8709.53, "total_tokens": 67639272} +{"current_steps": 100355, "total_steps": 204665, "loss": 0.0013, "lr": 1.206779344686267e-06, "epoch": 2.4516893460044464, "percentage": 49.03, "elapsed_time": "2:09:26", "remaining_time": "2:14:32", "throughput": 8709.61, "total_tokens": 67643048} +{"current_steps": 100360, "total_steps": 204665, "loss": 0.0665, "lr": 1.2066959093883122e-06, "epoch": 2.4518114968362936, "percentage": 49.04, "elapsed_time": "2:09:26", "remaining_time": "2:14:32", "throughput": 8709.67, "total_tokens": 67646504} +{"current_steps": 100365, "total_steps": 204665, "loss": 0.0002, "lr": 1.206612472587209e-06, "epoch": 2.451933647668141, "percentage": 49.04, "elapsed_time": "2:09:27", "remaining_time": "2:14:31", "throughput": 8709.75, "total_tokens": 67650216} +{"current_steps": 100370, "total_steps": 204665, "loss": 0.0328, "lr": 1.206529034283564e-06, "epoch": 2.4520557984999876, "percentage": 49.04, "elapsed_time": "2:09:27", "remaining_time": "2:14:31", "throughput": 8709.8, "total_tokens": 67653672} +{"current_steps": 100375, "total_steps": 204665, "loss": 0.0379, "lr": 1.2064455944779845e-06, "epoch": 2.452177949331835, "percentage": 49.04, "elapsed_time": "2:09:27", "remaining_time": "2:14:30", "throughput": 8709.86, "total_tokens": 67657256} +{"current_steps": 100380, "total_steps": 204665, "loss": 0.0913, "lr": 1.206362153171077e-06, "epoch": 2.452300100163682, "percentage": 49.05, "elapsed_time": "2:09:28", "remaining_time": "2:14:30", "throughput": 8709.89, "total_tokens": 67660456} +{"current_steps": 100385, "total_steps": 204665, "loss": 0.0008, "lr": 1.2062787103634486e-06, "epoch": 2.452422250995529, "percentage": 49.05, "elapsed_time": "2:09:28", "remaining_time": "2:14:30", "throughput": 8709.95, "total_tokens": 67663976} +{"current_steps": 100390, "total_steps": 204665, "loss": 0.0002, "lr": 1.206195266055706e-06, "epoch": 2.4525444018273763, "percentage": 49.05, "elapsed_time": "2:09:28", "remaining_time": "2:14:29", "throughput": 8710.02, "total_tokens": 67667560} +{"current_steps": 100395, "total_steps": 204665, "loss": 0.0678, "lr": 1.2061118202484556e-06, "epoch": 2.4526665526592235, "percentage": 49.05, "elapsed_time": "2:09:29", "remaining_time": "2:14:29", "throughput": 8710.05, "total_tokens": 67670824} +{"current_steps": 100400, "total_steps": 204665, "loss": 0.0514, "lr": 1.206028372942305e-06, "epoch": 2.4527887034910707, "percentage": 49.06, "elapsed_time": "2:09:29", "remaining_time": "2:14:28", "throughput": 8710.1, "total_tokens": 67674344} +{"current_steps": 100405, "total_steps": 204665, "loss": 0.0522, "lr": 1.2059449241378608e-06, "epoch": 2.452910854322918, "percentage": 49.06, "elapsed_time": "2:09:29", "remaining_time": "2:14:28", "throughput": 8710.22, "total_tokens": 67678376} +{"current_steps": 100410, "total_steps": 204665, "loss": 0.0158, "lr": 1.2058614738357294e-06, "epoch": 2.453033005154765, "percentage": 49.06, "elapsed_time": "2:09:30", "remaining_time": "2:14:27", "throughput": 8710.23, "total_tokens": 67681448} +{"current_steps": 100415, "total_steps": 204665, "loss": 0.0392, "lr": 1.205778022036518e-06, "epoch": 2.4531551559866123, "percentage": 49.06, "elapsed_time": "2:09:30", "remaining_time": "2:14:27", "throughput": 8710.28, "total_tokens": 67684904} +{"current_steps": 100420, "total_steps": 204665, "loss": 0.0002, "lr": 1.2056945687408334e-06, "epoch": 2.4532773068184595, "percentage": 49.07, "elapsed_time": "2:09:31", "remaining_time": "2:14:27", "throughput": 8710.29, "total_tokens": 67687912} +{"current_steps": 100425, "total_steps": 204665, "loss": 0.0553, "lr": 1.2056111139492827e-06, "epoch": 2.4533994576503066, "percentage": 49.07, "elapsed_time": "2:09:31", "remaining_time": "2:14:26", "throughput": 8710.32, "total_tokens": 67691176} +{"current_steps": 100430, "total_steps": 204665, "loss": 0.0639, "lr": 1.2055276576624727e-06, "epoch": 2.453521608482154, "percentage": 49.07, "elapsed_time": "2:09:31", "remaining_time": "2:14:26", "throughput": 8710.37, "total_tokens": 67694568} +{"current_steps": 100435, "total_steps": 204665, "loss": 0.1097, "lr": 1.2054441998810103e-06, "epoch": 2.453643759314001, "percentage": 49.07, "elapsed_time": "2:09:32", "remaining_time": "2:14:25", "throughput": 8710.42, "total_tokens": 67698088} +{"current_steps": 100440, "total_steps": 204665, "loss": 0.0523, "lr": 1.205360740605502e-06, "epoch": 2.453765910145848, "percentage": 49.08, "elapsed_time": "2:09:32", "remaining_time": "2:14:25", "throughput": 8710.47, "total_tokens": 67701544} +{"current_steps": 100445, "total_steps": 204665, "loss": 0.0754, "lr": 1.2052772798365556e-06, "epoch": 2.4538880609776954, "percentage": 49.08, "elapsed_time": "2:09:32", "remaining_time": "2:14:24", "throughput": 8710.49, "total_tokens": 67704616} +{"current_steps": 100450, "total_steps": 204665, "loss": 0.0731, "lr": 1.2051938175747777e-06, "epoch": 2.4540102118095426, "percentage": 49.08, "elapsed_time": "2:09:33", "remaining_time": "2:14:24", "throughput": 8710.54, "total_tokens": 67708136} +{"current_steps": 100455, "total_steps": 204665, "loss": 0.0003, "lr": 1.2051103538207752e-06, "epoch": 2.4541323626413893, "percentage": 49.08, "elapsed_time": "2:09:33", "remaining_time": "2:14:24", "throughput": 8710.61, "total_tokens": 67711784} +{"current_steps": 100460, "total_steps": 204665, "loss": 0.2318, "lr": 1.2050268885751547e-06, "epoch": 2.454254513473237, "percentage": 49.09, "elapsed_time": "2:09:33", "remaining_time": "2:14:23", "throughput": 8710.66, "total_tokens": 67715176} +{"current_steps": 100465, "total_steps": 204665, "loss": 0.0004, "lr": 1.2049434218385236e-06, "epoch": 2.4543766643050837, "percentage": 49.09, "elapsed_time": "2:09:34", "remaining_time": "2:14:23", "throughput": 8710.71, "total_tokens": 67718632} +{"current_steps": 100470, "total_steps": 204665, "loss": 0.0355, "lr": 1.2048599536114887e-06, "epoch": 2.454498815136931, "percentage": 49.09, "elapsed_time": "2:09:34", "remaining_time": "2:14:22", "throughput": 8710.81, "total_tokens": 67722472} +{"current_steps": 100475, "total_steps": 204665, "loss": 0.0015, "lr": 1.2047764838946574e-06, "epoch": 2.454620965968778, "percentage": 49.09, "elapsed_time": "2:09:34", "remaining_time": "2:14:22", "throughput": 8710.85, "total_tokens": 67725864} +{"current_steps": 100480, "total_steps": 204665, "loss": 0.0008, "lr": 1.2046930126886362e-06, "epoch": 2.4547431168006253, "percentage": 49.09, "elapsed_time": "2:09:35", "remaining_time": "2:14:21", "throughput": 8710.88, "total_tokens": 67729128} +{"current_steps": 100485, "total_steps": 204665, "loss": 0.0001, "lr": 1.2046095399940326e-06, "epoch": 2.4548652676324725, "percentage": 49.1, "elapsed_time": "2:09:35", "remaining_time": "2:14:21", "throughput": 8710.91, "total_tokens": 67732328} +{"current_steps": 100490, "total_steps": 204665, "loss": 0.095, "lr": 1.2045260658114534e-06, "epoch": 2.4549874184643197, "percentage": 49.1, "elapsed_time": "2:09:35", "remaining_time": "2:14:21", "throughput": 8710.95, "total_tokens": 67735656} +{"current_steps": 100495, "total_steps": 204665, "loss": 0.0004, "lr": 1.2044425901415053e-06, "epoch": 2.455109569296167, "percentage": 49.1, "elapsed_time": "2:09:36", "remaining_time": "2:14:20", "throughput": 8710.98, "total_tokens": 67738920} +{"current_steps": 100500, "total_steps": 204665, "loss": 0.0003, "lr": 1.204359112984796e-06, "epoch": 2.455231720128014, "percentage": 49.1, "elapsed_time": "2:09:36", "remaining_time": "2:14:20", "throughput": 8711.05, "total_tokens": 67742568} +{"current_steps": 100505, "total_steps": 204665, "loss": 0.0003, "lr": 1.2042756343419324e-06, "epoch": 2.4553538709598612, "percentage": 49.11, "elapsed_time": "2:09:36", "remaining_time": "2:14:19", "throughput": 8711.14, "total_tokens": 67746344} +{"current_steps": 100510, "total_steps": 204665, "loss": 0.0011, "lr": 1.2041921542135209e-06, "epoch": 2.4554760217917084, "percentage": 49.11, "elapsed_time": "2:09:37", "remaining_time": "2:14:19", "throughput": 8711.16, "total_tokens": 67749544} +{"current_steps": 100515, "total_steps": 204665, "loss": 0.0002, "lr": 1.2041086726001696e-06, "epoch": 2.4555981726235556, "percentage": 49.11, "elapsed_time": "2:09:37", "remaining_time": "2:14:18", "throughput": 8711.18, "total_tokens": 67752744} +{"current_steps": 100520, "total_steps": 204665, "loss": 0.0983, "lr": 1.204025189502485e-06, "epoch": 2.455720323455403, "percentage": 49.11, "elapsed_time": "2:09:38", "remaining_time": "2:14:18", "throughput": 8711.26, "total_tokens": 67756456} +{"current_steps": 100525, "total_steps": 204665, "loss": 0.0002, "lr": 1.2039417049210743e-06, "epoch": 2.45584247428725, "percentage": 49.12, "elapsed_time": "2:09:38", "remaining_time": "2:14:18", "throughput": 8711.3, "total_tokens": 67759848} +{"current_steps": 100530, "total_steps": 204665, "loss": 0.0562, "lr": 1.2038582188565448e-06, "epoch": 2.455964625119097, "percentage": 49.12, "elapsed_time": "2:09:38", "remaining_time": "2:14:17", "throughput": 8711.34, "total_tokens": 67763176} +{"current_steps": 100535, "total_steps": 204665, "loss": 0.0707, "lr": 1.2037747313095032e-06, "epoch": 2.4560867759509444, "percentage": 49.12, "elapsed_time": "2:09:39", "remaining_time": "2:14:17", "throughput": 8711.37, "total_tokens": 67766376} +{"current_steps": 100540, "total_steps": 204665, "loss": 0.0001, "lr": 1.2036912422805572e-06, "epoch": 2.4562089267827916, "percentage": 49.12, "elapsed_time": "2:09:39", "remaining_time": "2:14:16", "throughput": 8711.4, "total_tokens": 67769704} +{"current_steps": 100545, "total_steps": 204665, "loss": 0.0005, "lr": 1.2036077517703136e-06, "epoch": 2.4563310776146388, "percentage": 49.13, "elapsed_time": "2:09:39", "remaining_time": "2:14:16", "throughput": 8711.4, "total_tokens": 67772648} +{"current_steps": 100550, "total_steps": 204665, "loss": 0.0008, "lr": 1.2035242597793796e-06, "epoch": 2.4564532284464855, "percentage": 49.13, "elapsed_time": "2:09:40", "remaining_time": "2:14:15", "throughput": 8711.45, "total_tokens": 67776040} +{"current_steps": 100555, "total_steps": 204665, "loss": 0.0917, "lr": 1.2034407663083626e-06, "epoch": 2.4565753792783327, "percentage": 49.13, "elapsed_time": "2:09:40", "remaining_time": "2:14:15", "throughput": 8711.49, "total_tokens": 67779368} +{"current_steps": 100560, "total_steps": 204665, "loss": 0.0638, "lr": 1.2033572713578698e-06, "epoch": 2.45669753011018, "percentage": 49.13, "elapsed_time": "2:09:40", "remaining_time": "2:14:15", "throughput": 8711.54, "total_tokens": 67782824} +{"current_steps": 100565, "total_steps": 204665, "loss": 0.0003, "lr": 1.2032737749285077e-06, "epoch": 2.456819680942027, "percentage": 49.14, "elapsed_time": "2:09:41", "remaining_time": "2:14:14", "throughput": 8711.6, "total_tokens": 67786344} +{"current_steps": 100570, "total_steps": 204665, "loss": 0.0584, "lr": 1.2031902770208846e-06, "epoch": 2.4569418317738743, "percentage": 49.14, "elapsed_time": "2:09:41", "remaining_time": "2:14:14", "throughput": 8711.66, "total_tokens": 67789864} +{"current_steps": 100575, "total_steps": 204665, "loss": 0.0627, "lr": 1.2031067776356068e-06, "epoch": 2.4570639826057215, "percentage": 49.14, "elapsed_time": "2:09:41", "remaining_time": "2:14:13", "throughput": 8711.7, "total_tokens": 67793192} +{"current_steps": 100580, "total_steps": 204665, "loss": 0.1053, "lr": 1.203023276773282e-06, "epoch": 2.4571861334375686, "percentage": 49.14, "elapsed_time": "2:09:42", "remaining_time": "2:14:13", "throughput": 8711.73, "total_tokens": 67796520} +{"current_steps": 100585, "total_steps": 204665, "loss": 0.1509, "lr": 1.2029397744345173e-06, "epoch": 2.457308284269416, "percentage": 49.15, "elapsed_time": "2:09:42", "remaining_time": "2:14:12", "throughput": 8711.77, "total_tokens": 67799848} +{"current_steps": 100590, "total_steps": 204665, "loss": 0.0352, "lr": 1.2028562706199198e-06, "epoch": 2.457430435101263, "percentage": 49.15, "elapsed_time": "2:09:42", "remaining_time": "2:14:12", "throughput": 8711.8, "total_tokens": 67803048} +{"current_steps": 100595, "total_steps": 204665, "loss": 0.1097, "lr": 1.202772765330097e-06, "epoch": 2.45755258593311, "percentage": 49.15, "elapsed_time": "2:09:43", "remaining_time": "2:14:12", "throughput": 8711.83, "total_tokens": 67806312} +{"current_steps": 100600, "total_steps": 204665, "loss": 0.0007, "lr": 1.2026892585656564e-06, "epoch": 2.4576747367649574, "percentage": 49.15, "elapsed_time": "2:09:43", "remaining_time": "2:14:11", "throughput": 8711.91, "total_tokens": 67810024} +{"current_steps": 100605, "total_steps": 204665, "loss": 0.0495, "lr": 1.2026057503272048e-06, "epoch": 2.4577968875968046, "percentage": 49.16, "elapsed_time": "2:09:43", "remaining_time": "2:14:11", "throughput": 8711.94, "total_tokens": 67813224} +{"current_steps": 100610, "total_steps": 204665, "loss": 0.0053, "lr": 1.2025222406153499e-06, "epoch": 2.457919038428652, "percentage": 49.16, "elapsed_time": "2:09:44", "remaining_time": "2:14:10", "throughput": 8711.97, "total_tokens": 67816488} +{"current_steps": 100615, "total_steps": 204665, "loss": 0.056, "lr": 1.2024387294306986e-06, "epoch": 2.458041189260499, "percentage": 49.16, "elapsed_time": "2:09:44", "remaining_time": "2:14:10", "throughput": 8712.0, "total_tokens": 67819752} +{"current_steps": 100620, "total_steps": 204665, "loss": 0.0334, "lr": 1.2023552167738585e-06, "epoch": 2.458163340092346, "percentage": 49.16, "elapsed_time": "2:09:44", "remaining_time": "2:14:09", "throughput": 8712.04, "total_tokens": 67823144} +{"current_steps": 100625, "total_steps": 204665, "loss": 0.0162, "lr": 1.2022717026454365e-06, "epoch": 2.4582854909241934, "percentage": 49.17, "elapsed_time": "2:09:45", "remaining_time": "2:14:09", "throughput": 8712.09, "total_tokens": 67826600} +{"current_steps": 100630, "total_steps": 204665, "loss": 0.0006, "lr": 1.2021881870460404e-06, "epoch": 2.4584076417560405, "percentage": 49.17, "elapsed_time": "2:09:45", "remaining_time": "2:14:09", "throughput": 8712.12, "total_tokens": 67829800} +{"current_steps": 100635, "total_steps": 204665, "loss": 0.0005, "lr": 1.2021046699762777e-06, "epoch": 2.4585297925878873, "percentage": 49.17, "elapsed_time": "2:09:46", "remaining_time": "2:14:08", "throughput": 8712.16, "total_tokens": 67833192} +{"current_steps": 100640, "total_steps": 204665, "loss": 0.0717, "lr": 1.2020211514367552e-06, "epoch": 2.458651943419735, "percentage": 49.17, "elapsed_time": "2:09:46", "remaining_time": "2:14:08", "throughput": 8712.21, "total_tokens": 67836584} +{"current_steps": 100645, "total_steps": 204665, "loss": 0.0005, "lr": 1.2019376314280808e-06, "epoch": 2.4587740942515817, "percentage": 49.18, "elapsed_time": "2:09:46", "remaining_time": "2:14:07", "throughput": 8712.23, "total_tokens": 67839784} +{"current_steps": 100650, "total_steps": 204665, "loss": 0.0005, "lr": 1.2018541099508614e-06, "epoch": 2.458896245083429, "percentage": 49.18, "elapsed_time": "2:09:47", "remaining_time": "2:14:07", "throughput": 8712.27, "total_tokens": 67843112} +{"current_steps": 100655, "total_steps": 204665, "loss": 0.0463, "lr": 1.201770587005705e-06, "epoch": 2.459018395915276, "percentage": 49.18, "elapsed_time": "2:09:47", "remaining_time": "2:14:07", "throughput": 8712.37, "total_tokens": 67847080} +{"current_steps": 100660, "total_steps": 204665, "loss": 0.0002, "lr": 1.2016870625932182e-06, "epoch": 2.4591405467471232, "percentage": 49.18, "elapsed_time": "2:09:47", "remaining_time": "2:14:06", "throughput": 8712.42, "total_tokens": 67850536} +{"current_steps": 100665, "total_steps": 204665, "loss": 0.0466, "lr": 1.201603536714009e-06, "epoch": 2.4592626975789704, "percentage": 49.19, "elapsed_time": "2:09:48", "remaining_time": "2:14:06", "throughput": 8712.5, "total_tokens": 67854248} +{"current_steps": 100670, "total_steps": 204665, "loss": 0.0002, "lr": 1.2015200093686845e-06, "epoch": 2.4593848484108176, "percentage": 49.19, "elapsed_time": "2:09:48", "remaining_time": "2:14:05", "throughput": 8712.53, "total_tokens": 67857512} +{"current_steps": 100675, "total_steps": 204665, "loss": 0.0477, "lr": 1.2014364805578525e-06, "epoch": 2.459506999242665, "percentage": 49.19, "elapsed_time": "2:09:48", "remaining_time": "2:14:05", "throughput": 8712.57, "total_tokens": 67860776} +{"current_steps": 100680, "total_steps": 204665, "loss": 0.0445, "lr": 1.2013529502821203e-06, "epoch": 2.459629150074512, "percentage": 49.19, "elapsed_time": "2:09:49", "remaining_time": "2:14:04", "throughput": 8712.59, "total_tokens": 67863976} +{"current_steps": 100685, "total_steps": 204665, "loss": 0.0361, "lr": 1.201269418542095e-06, "epoch": 2.459751300906359, "percentage": 49.2, "elapsed_time": "2:09:49", "remaining_time": "2:14:04", "throughput": 8712.61, "total_tokens": 67867112} +{"current_steps": 100690, "total_steps": 204665, "loss": 0.0516, "lr": 1.2011858853383846e-06, "epoch": 2.4598734517382064, "percentage": 49.2, "elapsed_time": "2:09:49", "remaining_time": "2:14:04", "throughput": 8712.67, "total_tokens": 67870632} +{"current_steps": 100695, "total_steps": 204665, "loss": 0.0327, "lr": 1.201102350671596e-06, "epoch": 2.4599956025700536, "percentage": 49.2, "elapsed_time": "2:09:50", "remaining_time": "2:14:03", "throughput": 8712.7, "total_tokens": 67873896} +{"current_steps": 100700, "total_steps": 204665, "loss": 0.0366, "lr": 1.2010188145423373e-06, "epoch": 2.4601177534019008, "percentage": 49.2, "elapsed_time": "2:09:50", "remaining_time": "2:14:03", "throughput": 8712.74, "total_tokens": 67877224} +{"current_steps": 100705, "total_steps": 204665, "loss": 0.0995, "lr": 1.2009352769512157e-06, "epoch": 2.460239904233748, "percentage": 49.2, "elapsed_time": "2:09:50", "remaining_time": "2:14:02", "throughput": 8712.83, "total_tokens": 67881064} +{"current_steps": 100710, "total_steps": 204665, "loss": 0.0004, "lr": 1.2008517378988387e-06, "epoch": 2.460362055065595, "percentage": 49.21, "elapsed_time": "2:09:51", "remaining_time": "2:14:02", "throughput": 8712.88, "total_tokens": 67884520} +{"current_steps": 100715, "total_steps": 204665, "loss": 0.0346, "lr": 1.200768197385814e-06, "epoch": 2.4604842058974423, "percentage": 49.21, "elapsed_time": "2:09:51", "remaining_time": "2:14:01", "throughput": 8712.95, "total_tokens": 67888104} +{"current_steps": 100720, "total_steps": 204665, "loss": 0.0423, "lr": 1.2006846554127485e-06, "epoch": 2.4606063567292895, "percentage": 49.21, "elapsed_time": "2:09:51", "remaining_time": "2:14:01", "throughput": 8712.97, "total_tokens": 67891304} +{"current_steps": 100725, "total_steps": 204665, "loss": 0.0481, "lr": 1.2006011119802506e-06, "epoch": 2.4607285075611367, "percentage": 49.21, "elapsed_time": "2:09:52", "remaining_time": "2:14:01", "throughput": 8713.0, "total_tokens": 67894568} +{"current_steps": 100730, "total_steps": 204665, "loss": 0.0716, "lr": 1.2005175670889273e-06, "epoch": 2.4608506583929834, "percentage": 49.22, "elapsed_time": "2:09:52", "remaining_time": "2:14:00", "throughput": 8713.04, "total_tokens": 67897960} +{"current_steps": 100735, "total_steps": 204665, "loss": 0.0449, "lr": 1.2004340207393866e-06, "epoch": 2.4609728092248306, "percentage": 49.22, "elapsed_time": "2:09:53", "remaining_time": "2:14:00", "throughput": 8713.09, "total_tokens": 67901352} +{"current_steps": 100740, "total_steps": 204665, "loss": 0.0001, "lr": 1.2003504729322355e-06, "epoch": 2.461094960056678, "percentage": 49.22, "elapsed_time": "2:09:53", "remaining_time": "2:13:59", "throughput": 8713.12, "total_tokens": 67904616} +{"current_steps": 100745, "total_steps": 204665, "loss": 0.0502, "lr": 1.200266923668082e-06, "epoch": 2.461217110888525, "percentage": 49.22, "elapsed_time": "2:09:53", "remaining_time": "2:13:59", "throughput": 8713.16, "total_tokens": 67908008} +{"current_steps": 100750, "total_steps": 204665, "loss": 0.0611, "lr": 1.2001833729475332e-06, "epoch": 2.461339261720372, "percentage": 49.23, "elapsed_time": "2:09:54", "remaining_time": "2:13:58", "throughput": 8713.19, "total_tokens": 67911208} +{"current_steps": 100755, "total_steps": 204665, "loss": 0.0005, "lr": 1.2000998207711974e-06, "epoch": 2.4614614125522194, "percentage": 49.23, "elapsed_time": "2:09:54", "remaining_time": "2:13:58", "throughput": 8713.28, "total_tokens": 67915048} +{"current_steps": 100760, "total_steps": 204665, "loss": 0.033, "lr": 1.200016267139682e-06, "epoch": 2.4615835633840666, "percentage": 49.23, "elapsed_time": "2:09:54", "remaining_time": "2:13:58", "throughput": 8713.31, "total_tokens": 67918312} +{"current_steps": 100765, "total_steps": 204665, "loss": 0.0005, "lr": 1.1999327120535945e-06, "epoch": 2.4617057142159138, "percentage": 49.23, "elapsed_time": "2:09:55", "remaining_time": "2:13:57", "throughput": 8713.34, "total_tokens": 67921576} +{"current_steps": 100770, "total_steps": 204665, "loss": 0.0006, "lr": 1.1998491555135424e-06, "epoch": 2.461827865047761, "percentage": 49.24, "elapsed_time": "2:09:55", "remaining_time": "2:13:57", "throughput": 8713.35, "total_tokens": 67924520} +{"current_steps": 100775, "total_steps": 204665, "loss": 0.0009, "lr": 1.1997655975201335e-06, "epoch": 2.461950015879608, "percentage": 49.24, "elapsed_time": "2:09:55", "remaining_time": "2:13:56", "throughput": 8713.37, "total_tokens": 67927656} +{"current_steps": 100780, "total_steps": 204665, "loss": 0.0867, "lr": 1.1996820380739754e-06, "epoch": 2.4620721667114553, "percentage": 49.24, "elapsed_time": "2:09:56", "remaining_time": "2:13:56", "throughput": 8713.44, "total_tokens": 67931304} +{"current_steps": 100785, "total_steps": 204665, "loss": 0.0005, "lr": 1.1995984771756757e-06, "epoch": 2.4621943175433025, "percentage": 49.24, "elapsed_time": "2:09:56", "remaining_time": "2:13:55", "throughput": 8713.49, "total_tokens": 67934760} +{"current_steps": 100790, "total_steps": 204665, "loss": 0.0, "lr": 1.1995149148258423e-06, "epoch": 2.4623164683751497, "percentage": 49.25, "elapsed_time": "2:09:56", "remaining_time": "2:13:55", "throughput": 8713.52, "total_tokens": 67937960} +{"current_steps": 100795, "total_steps": 204665, "loss": 0.098, "lr": 1.1994313510250828e-06, "epoch": 2.462438619206997, "percentage": 49.25, "elapsed_time": "2:09:57", "remaining_time": "2:13:55", "throughput": 8713.54, "total_tokens": 67941160} +{"current_steps": 100800, "total_steps": 204665, "loss": 0.0002, "lr": 1.1993477857740049e-06, "epoch": 2.462560770038844, "percentage": 49.25, "elapsed_time": "2:09:57", "remaining_time": "2:13:54", "throughput": 8713.58, "total_tokens": 67944488} +{"current_steps": 100805, "total_steps": 204665, "loss": 0.1641, "lr": 1.1992642190732163e-06, "epoch": 2.4626829208706913, "percentage": 49.25, "elapsed_time": "2:09:57", "remaining_time": "2:13:54", "throughput": 8713.62, "total_tokens": 67947880} +{"current_steps": 100810, "total_steps": 204665, "loss": 0.0002, "lr": 1.1991806509233246e-06, "epoch": 2.4628050717025385, "percentage": 49.26, "elapsed_time": "2:09:58", "remaining_time": "2:13:53", "throughput": 8713.66, "total_tokens": 67951144} +{"current_steps": 100815, "total_steps": 204665, "loss": 0.142, "lr": 1.199097081324938e-06, "epoch": 2.4629272225343852, "percentage": 49.26, "elapsed_time": "2:09:58", "remaining_time": "2:13:53", "throughput": 8713.75, "total_tokens": 67954984} +{"current_steps": 100820, "total_steps": 204665, "loss": 0.0753, "lr": 1.1990135102786634e-06, "epoch": 2.463049373366233, "percentage": 49.26, "elapsed_time": "2:09:58", "remaining_time": "2:13:52", "throughput": 8713.8, "total_tokens": 67958440} +{"current_steps": 100825, "total_steps": 204665, "loss": 0.097, "lr": 1.1989299377851093e-06, "epoch": 2.4631715241980796, "percentage": 49.26, "elapsed_time": "2:09:59", "remaining_time": "2:13:52", "throughput": 8713.88, "total_tokens": 67962152} +{"current_steps": 100830, "total_steps": 204665, "loss": 0.0011, "lr": 1.1988463638448832e-06, "epoch": 2.463293675029927, "percentage": 49.27, "elapsed_time": "2:09:59", "remaining_time": "2:13:52", "throughput": 8713.93, "total_tokens": 67965608} +{"current_steps": 100835, "total_steps": 204665, "loss": 0.0433, "lr": 1.1987627884585927e-06, "epoch": 2.463415825861774, "percentage": 49.27, "elapsed_time": "2:09:59", "remaining_time": "2:13:51", "throughput": 8713.94, "total_tokens": 67968744} +{"current_steps": 100840, "total_steps": 204665, "loss": 0.0758, "lr": 1.1986792116268458e-06, "epoch": 2.463537976693621, "percentage": 49.27, "elapsed_time": "2:10:00", "remaining_time": "2:13:51", "throughput": 8713.95, "total_tokens": 67971752} +{"current_steps": 100845, "total_steps": 204665, "loss": 0.0019, "lr": 1.19859563335025e-06, "epoch": 2.4636601275254684, "percentage": 49.27, "elapsed_time": "2:10:00", "remaining_time": "2:13:50", "throughput": 8714.0, "total_tokens": 67975208} +{"current_steps": 100850, "total_steps": 204665, "loss": 0.0311, "lr": 1.1985120536294135e-06, "epoch": 2.4637822783573156, "percentage": 49.28, "elapsed_time": "2:10:01", "remaining_time": "2:13:50", "throughput": 8714.0, "total_tokens": 67978152} +{"current_steps": 100855, "total_steps": 204665, "loss": 0.002, "lr": 1.198428472464944e-06, "epoch": 2.4639044291891627, "percentage": 49.28, "elapsed_time": "2:10:01", "remaining_time": "2:13:49", "throughput": 8714.04, "total_tokens": 67981480} +{"current_steps": 100860, "total_steps": 204665, "loss": 0.0004, "lr": 1.1983448898574493e-06, "epoch": 2.46402658002101, "percentage": 49.28, "elapsed_time": "2:10:01", "remaining_time": "2:13:49", "throughput": 8714.07, "total_tokens": 67984680} +{"current_steps": 100865, "total_steps": 204665, "loss": 0.132, "lr": 1.1982613058075372e-06, "epoch": 2.464148730852857, "percentage": 49.28, "elapsed_time": "2:10:02", "remaining_time": "2:13:49", "throughput": 8714.1, "total_tokens": 67987944} +{"current_steps": 100870, "total_steps": 204665, "loss": 0.0002, "lr": 1.198177720315816e-06, "epoch": 2.4642708816847043, "percentage": 49.29, "elapsed_time": "2:10:02", "remaining_time": "2:13:48", "throughput": 8714.23, "total_tokens": 67992104} +{"current_steps": 100875, "total_steps": 204665, "loss": 0.1005, "lr": 1.1980941333828924e-06, "epoch": 2.4643930325165515, "percentage": 49.29, "elapsed_time": "2:10:02", "remaining_time": "2:13:48", "throughput": 8714.26, "total_tokens": 67995368} +{"current_steps": 100880, "total_steps": 204665, "loss": 0.0001, "lr": 1.1980105450093754e-06, "epoch": 2.4645151833483987, "percentage": 49.29, "elapsed_time": "2:10:03", "remaining_time": "2:13:47", "throughput": 8714.32, "total_tokens": 67998952} +{"current_steps": 100885, "total_steps": 204665, "loss": 0.0002, "lr": 1.1979269551958722e-06, "epoch": 2.464637334180246, "percentage": 49.29, "elapsed_time": "2:10:03", "remaining_time": "2:13:47", "throughput": 8714.37, "total_tokens": 68002408} +{"current_steps": 100890, "total_steps": 204665, "loss": 0.0338, "lr": 1.197843363942991e-06, "epoch": 2.464759485012093, "percentage": 49.3, "elapsed_time": "2:10:03", "remaining_time": "2:13:46", "throughput": 8714.41, "total_tokens": 68005736} +{"current_steps": 100895, "total_steps": 204665, "loss": 0.0468, "lr": 1.19775977125134e-06, "epoch": 2.4648816358439403, "percentage": 49.3, "elapsed_time": "2:10:04", "remaining_time": "2:13:46", "throughput": 8714.5, "total_tokens": 68009512} +{"current_steps": 100900, "total_steps": 204665, "loss": 0.0015, "lr": 1.1976761771215262e-06, "epoch": 2.4650037866757875, "percentage": 49.3, "elapsed_time": "2:10:04", "remaining_time": "2:13:46", "throughput": 8714.51, "total_tokens": 68012584} +{"current_steps": 100905, "total_steps": 204665, "loss": 0.149, "lr": 1.1975925815541582e-06, "epoch": 2.4651259375076346, "percentage": 49.3, "elapsed_time": "2:10:04", "remaining_time": "2:13:45", "throughput": 8714.57, "total_tokens": 68016168} +{"current_steps": 100910, "total_steps": 204665, "loss": 0.0001, "lr": 1.197508984549844e-06, "epoch": 2.4652480883394814, "percentage": 49.3, "elapsed_time": "2:10:05", "remaining_time": "2:13:45", "throughput": 8714.63, "total_tokens": 68019688} +{"current_steps": 100915, "total_steps": 204665, "loss": 0.039, "lr": 1.1974253861091914e-06, "epoch": 2.4653702391713286, "percentage": 49.31, "elapsed_time": "2:10:05", "remaining_time": "2:13:44", "throughput": 8714.65, "total_tokens": 68022824} +{"current_steps": 100920, "total_steps": 204665, "loss": 0.0191, "lr": 1.1973417862328084e-06, "epoch": 2.4654923900031758, "percentage": 49.31, "elapsed_time": "2:10:05", "remaining_time": "2:13:44", "throughput": 8714.71, "total_tokens": 68026344} +{"current_steps": 100925, "total_steps": 204665, "loss": 0.0004, "lr": 1.1972581849213024e-06, "epoch": 2.465614540835023, "percentage": 49.31, "elapsed_time": "2:10:06", "remaining_time": "2:13:43", "throughput": 8714.73, "total_tokens": 68029480} +{"current_steps": 100930, "total_steps": 204665, "loss": 0.0001, "lr": 1.197174582175282e-06, "epoch": 2.46573669166687, "percentage": 49.31, "elapsed_time": "2:10:06", "remaining_time": "2:13:43", "throughput": 8714.76, "total_tokens": 68032744} +{"current_steps": 100935, "total_steps": 204665, "loss": 0.1133, "lr": 1.1970909779953553e-06, "epoch": 2.4658588424987173, "percentage": 49.32, "elapsed_time": "2:10:06", "remaining_time": "2:13:43", "throughput": 8714.78, "total_tokens": 68035880} +{"current_steps": 100940, "total_steps": 204665, "loss": 0.0452, "lr": 1.1970073723821294e-06, "epoch": 2.4659809933305645, "percentage": 49.32, "elapsed_time": "2:10:07", "remaining_time": "2:13:42", "throughput": 8714.82, "total_tokens": 68039208} +{"current_steps": 100945, "total_steps": 204665, "loss": 0.0796, "lr": 1.1969237653362135e-06, "epoch": 2.4661031441624117, "percentage": 49.32, "elapsed_time": "2:10:07", "remaining_time": "2:13:42", "throughput": 8714.87, "total_tokens": 68042664} +{"current_steps": 100950, "total_steps": 204665, "loss": 0.0028, "lr": 1.1968401568582145e-06, "epoch": 2.466225294994259, "percentage": 49.32, "elapsed_time": "2:10:08", "remaining_time": "2:13:41", "throughput": 8714.93, "total_tokens": 68046248} +{"current_steps": 100955, "total_steps": 204665, "loss": 0.0006, "lr": 1.1967565469487413e-06, "epoch": 2.466347445826106, "percentage": 49.33, "elapsed_time": "2:10:08", "remaining_time": "2:13:41", "throughput": 8714.96, "total_tokens": 68049512} +{"current_steps": 100960, "total_steps": 204665, "loss": 0.0002, "lr": 1.1966729356084016e-06, "epoch": 2.4664695966579533, "percentage": 49.33, "elapsed_time": "2:10:08", "remaining_time": "2:13:41", "throughput": 8715.0, "total_tokens": 68052840} +{"current_steps": 100965, "total_steps": 204665, "loss": 0.0533, "lr": 1.1965893228378032e-06, "epoch": 2.4665917474898005, "percentage": 49.33, "elapsed_time": "2:10:09", "remaining_time": "2:13:40", "throughput": 8715.02, "total_tokens": 68056040} +{"current_steps": 100970, "total_steps": 204665, "loss": 0.0005, "lr": 1.1965057086375546e-06, "epoch": 2.4667138983216477, "percentage": 49.33, "elapsed_time": "2:10:09", "remaining_time": "2:13:40", "throughput": 8715.11, "total_tokens": 68059880} +{"current_steps": 100975, "total_steps": 204665, "loss": 0.0501, "lr": 1.1964220930082633e-06, "epoch": 2.466836049153495, "percentage": 49.34, "elapsed_time": "2:10:09", "remaining_time": "2:13:39", "throughput": 8715.16, "total_tokens": 68063272} +{"current_steps": 100980, "total_steps": 204665, "loss": 0.0002, "lr": 1.1963384759505378e-06, "epoch": 2.466958199985342, "percentage": 49.34, "elapsed_time": "2:10:10", "remaining_time": "2:13:39", "throughput": 8715.17, "total_tokens": 68066280} +{"current_steps": 100985, "total_steps": 204665, "loss": 0.0009, "lr": 1.1962548574649863e-06, "epoch": 2.4670803508171892, "percentage": 49.34, "elapsed_time": "2:10:10", "remaining_time": "2:13:38", "throughput": 8715.18, "total_tokens": 68069416} +{"current_steps": 100990, "total_steps": 204665, "loss": 0.0002, "lr": 1.1961712375522166e-06, "epoch": 2.4672025016490364, "percentage": 49.34, "elapsed_time": "2:10:10", "remaining_time": "2:13:38", "throughput": 8715.22, "total_tokens": 68072744} +{"current_steps": 100995, "total_steps": 204665, "loss": 0.0306, "lr": 1.1960876162128368e-06, "epoch": 2.467324652480883, "percentage": 49.35, "elapsed_time": "2:10:11", "remaining_time": "2:13:38", "throughput": 8715.3, "total_tokens": 68076392} +{"current_steps": 101000, "total_steps": 204665, "loss": 0.0401, "lr": 1.1960039934474552e-06, "epoch": 2.467446803312731, "percentage": 49.35, "elapsed_time": "2:10:11", "remaining_time": "2:13:37", "throughput": 8715.33, "total_tokens": 68079720} +{"current_steps": 101005, "total_steps": 204665, "loss": 0.0006, "lr": 1.1959203692566797e-06, "epoch": 2.4675689541445776, "percentage": 49.35, "elapsed_time": "2:10:11", "remaining_time": "2:13:37", "throughput": 8715.37, "total_tokens": 68083048} +{"current_steps": 101010, "total_steps": 204665, "loss": 0.0004, "lr": 1.1958367436411189e-06, "epoch": 2.4676911049764247, "percentage": 49.35, "elapsed_time": "2:10:12", "remaining_time": "2:13:36", "throughput": 8715.41, "total_tokens": 68086376} +{"current_steps": 101015, "total_steps": 204665, "loss": 0.0602, "lr": 1.1957531166013803e-06, "epoch": 2.467813255808272, "percentage": 49.36, "elapsed_time": "2:10:12", "remaining_time": "2:13:36", "throughput": 8715.45, "total_tokens": 68089704} +{"current_steps": 101020, "total_steps": 204665, "loss": 0.059, "lr": 1.1956694881380724e-06, "epoch": 2.467935406640119, "percentage": 49.36, "elapsed_time": "2:10:12", "remaining_time": "2:13:35", "throughput": 8715.46, "total_tokens": 68092840} +{"current_steps": 101025, "total_steps": 204665, "loss": 0.0007, "lr": 1.1955858582518036e-06, "epoch": 2.4680575574719663, "percentage": 49.36, "elapsed_time": "2:10:13", "remaining_time": "2:13:35", "throughput": 8715.56, "total_tokens": 68096680} +{"current_steps": 101030, "total_steps": 204665, "loss": 0.0001, "lr": 1.1955022269431816e-06, "epoch": 2.4681797083038135, "percentage": 49.36, "elapsed_time": "2:10:13", "remaining_time": "2:13:35", "throughput": 8715.59, "total_tokens": 68099944} +{"current_steps": 101035, "total_steps": 204665, "loss": 0.0002, "lr": 1.195418594212815e-06, "epoch": 2.4683018591356607, "percentage": 49.37, "elapsed_time": "2:10:13", "remaining_time": "2:13:34", "throughput": 8715.6, "total_tokens": 68102952} +{"current_steps": 101040, "total_steps": 204665, "loss": 0.0489, "lr": 1.1953349600613116e-06, "epoch": 2.468424009967508, "percentage": 49.37, "elapsed_time": "2:10:14", "remaining_time": "2:13:34", "throughput": 8715.61, "total_tokens": 68106024} +{"current_steps": 101045, "total_steps": 204665, "loss": 0.0, "lr": 1.1952513244892802e-06, "epoch": 2.468546160799355, "percentage": 49.37, "elapsed_time": "2:10:14", "remaining_time": "2:13:33", "throughput": 8715.65, "total_tokens": 68109352} +{"current_steps": 101050, "total_steps": 204665, "loss": 0.0354, "lr": 1.1951676874973284e-06, "epoch": 2.4686683116312023, "percentage": 49.37, "elapsed_time": "2:10:14", "remaining_time": "2:13:33", "throughput": 8715.7, "total_tokens": 68112872} +{"current_steps": 101055, "total_steps": 204665, "loss": 0.0776, "lr": 1.1950840490860647e-06, "epoch": 2.4687904624630495, "percentage": 49.38, "elapsed_time": "2:10:15", "remaining_time": "2:13:32", "throughput": 8715.7, "total_tokens": 68115816} +{"current_steps": 101060, "total_steps": 204665, "loss": 0.1308, "lr": 1.1950004092560973e-06, "epoch": 2.4689126132948966, "percentage": 49.38, "elapsed_time": "2:10:15", "remaining_time": "2:13:32", "throughput": 8715.74, "total_tokens": 68119080} +{"current_steps": 101065, "total_steps": 204665, "loss": 0.0002, "lr": 1.1949167680080344e-06, "epoch": 2.469034764126744, "percentage": 49.38, "elapsed_time": "2:10:16", "remaining_time": "2:13:32", "throughput": 8715.83, "total_tokens": 68122984} +{"current_steps": 101070, "total_steps": 204665, "loss": 0.0423, "lr": 1.1948331253424846e-06, "epoch": 2.469156914958591, "percentage": 49.38, "elapsed_time": "2:10:16", "remaining_time": "2:13:31", "throughput": 8715.86, "total_tokens": 68126184} +{"current_steps": 101075, "total_steps": 204665, "loss": 0.0003, "lr": 1.1947494812600558e-06, "epoch": 2.469279065790438, "percentage": 49.39, "elapsed_time": "2:10:16", "remaining_time": "2:13:31", "throughput": 8715.9, "total_tokens": 68129576} +{"current_steps": 101080, "total_steps": 204665, "loss": 0.0579, "lr": 1.1946658357613564e-06, "epoch": 2.469401216622285, "percentage": 49.39, "elapsed_time": "2:10:17", "remaining_time": "2:13:30", "throughput": 8715.93, "total_tokens": 68132776} +{"current_steps": 101085, "total_steps": 204665, "loss": 0.0374, "lr": 1.1945821888469946e-06, "epoch": 2.4695233674541326, "percentage": 49.39, "elapsed_time": "2:10:17", "remaining_time": "2:13:30", "throughput": 8715.94, "total_tokens": 68135784} +{"current_steps": 101090, "total_steps": 204665, "loss": 0.0795, "lr": 1.1944985405175788e-06, "epoch": 2.4696455182859793, "percentage": 49.39, "elapsed_time": "2:10:17", "remaining_time": "2:13:29", "throughput": 8715.95, "total_tokens": 68138856} +{"current_steps": 101095, "total_steps": 204665, "loss": 0.0528, "lr": 1.1944148907737171e-06, "epoch": 2.4697676691178265, "percentage": 49.4, "elapsed_time": "2:10:18", "remaining_time": "2:13:29", "throughput": 8715.98, "total_tokens": 68142056} +{"current_steps": 101100, "total_steps": 204665, "loss": 0.077, "lr": 1.1943312396160181e-06, "epoch": 2.4698898199496737, "percentage": 49.4, "elapsed_time": "2:10:18", "remaining_time": "2:13:29", "throughput": 8716.03, "total_tokens": 68145512} +{"current_steps": 101105, "total_steps": 204665, "loss": 0.0002, "lr": 1.1942475870450904e-06, "epoch": 2.470011970781521, "percentage": 49.4, "elapsed_time": "2:10:18", "remaining_time": "2:13:28", "throughput": 8716.09, "total_tokens": 68149032} +{"current_steps": 101110, "total_steps": 204665, "loss": 0.0004, "lr": 1.1941639330615419e-06, "epoch": 2.470134121613368, "percentage": 49.4, "elapsed_time": "2:10:19", "remaining_time": "2:13:28", "throughput": 8716.14, "total_tokens": 68152552} +{"current_steps": 101115, "total_steps": 204665, "loss": 0.0003, "lr": 1.1940802776659808e-06, "epoch": 2.4702562724452153, "percentage": 49.41, "elapsed_time": "2:10:19", "remaining_time": "2:13:27", "throughput": 8716.19, "total_tokens": 68155944} +{"current_steps": 101120, "total_steps": 204665, "loss": 0.0378, "lr": 1.193996620859016e-06, "epoch": 2.4703784232770625, "percentage": 49.41, "elapsed_time": "2:10:19", "remaining_time": "2:13:27", "throughput": 8716.25, "total_tokens": 68159528} +{"current_steps": 101125, "total_steps": 204665, "loss": 0.0356, "lr": 1.1939129626412553e-06, "epoch": 2.4705005741089097, "percentage": 49.41, "elapsed_time": "2:10:20", "remaining_time": "2:13:26", "throughput": 8716.28, "total_tokens": 68162728} +{"current_steps": 101130, "total_steps": 204665, "loss": 0.0308, "lr": 1.1938293030133075e-06, "epoch": 2.470622724940757, "percentage": 49.41, "elapsed_time": "2:10:20", "remaining_time": "2:13:26", "throughput": 8716.4, "total_tokens": 68166952} +{"current_steps": 101135, "total_steps": 204665, "loss": 0.0009, "lr": 1.193745641975781e-06, "epoch": 2.470744875772604, "percentage": 49.41, "elapsed_time": "2:10:20", "remaining_time": "2:13:26", "throughput": 8716.44, "total_tokens": 68170216} +{"current_steps": 101140, "total_steps": 204665, "loss": 0.0002, "lr": 1.193661979529284e-06, "epoch": 2.4708670266044512, "percentage": 49.42, "elapsed_time": "2:10:21", "remaining_time": "2:13:25", "throughput": 8716.48, "total_tokens": 68173544} +{"current_steps": 101145, "total_steps": 204665, "loss": 0.0003, "lr": 1.193578315674425e-06, "epoch": 2.4709891774362984, "percentage": 49.42, "elapsed_time": "2:10:21", "remaining_time": "2:13:25", "throughput": 8716.56, "total_tokens": 68177320} +{"current_steps": 101150, "total_steps": 204665, "loss": 0.0582, "lr": 1.1934946504118123e-06, "epoch": 2.4711113282681456, "percentage": 49.42, "elapsed_time": "2:10:21", "remaining_time": "2:13:24", "throughput": 8716.64, "total_tokens": 68181032} +{"current_steps": 101155, "total_steps": 204665, "loss": 0.0014, "lr": 1.1934109837420544e-06, "epoch": 2.471233479099993, "percentage": 49.42, "elapsed_time": "2:10:22", "remaining_time": "2:13:24", "throughput": 8716.67, "total_tokens": 68184232} +{"current_steps": 101160, "total_steps": 204665, "loss": 0.091, "lr": 1.1933273156657602e-06, "epoch": 2.47135562993184, "percentage": 49.43, "elapsed_time": "2:10:22", "remaining_time": "2:13:23", "throughput": 8716.72, "total_tokens": 68187688} +{"current_steps": 101165, "total_steps": 204665, "loss": 0.0287, "lr": 1.1932436461835376e-06, "epoch": 2.471477780763687, "percentage": 49.43, "elapsed_time": "2:10:22", "remaining_time": "2:13:23", "throughput": 8716.8, "total_tokens": 68191464} +{"current_steps": 101170, "total_steps": 204665, "loss": 0.0449, "lr": 1.193159975295995e-06, "epoch": 2.4715999315955344, "percentage": 49.43, "elapsed_time": "2:10:23", "remaining_time": "2:13:23", "throughput": 8716.91, "total_tokens": 68195432} +{"current_steps": 101175, "total_steps": 204665, "loss": 0.0272, "lr": 1.1930763030037413e-06, "epoch": 2.471722082427381, "percentage": 49.43, "elapsed_time": "2:10:23", "remaining_time": "2:13:22", "throughput": 8716.95, "total_tokens": 68198824} +{"current_steps": 101180, "total_steps": 204665, "loss": 0.0009, "lr": 1.1929926293073852e-06, "epoch": 2.4718442332592283, "percentage": 49.44, "elapsed_time": "2:10:24", "remaining_time": "2:13:22", "throughput": 8717.05, "total_tokens": 68202728} +{"current_steps": 101185, "total_steps": 204665, "loss": 0.005, "lr": 1.192908954207534e-06, "epoch": 2.4719663840910755, "percentage": 49.44, "elapsed_time": "2:10:24", "remaining_time": "2:13:21", "throughput": 8717.12, "total_tokens": 68206312} +{"current_steps": 101190, "total_steps": 204665, "loss": 0.0002, "lr": 1.1928252777047974e-06, "epoch": 2.4720885349229227, "percentage": 49.44, "elapsed_time": "2:10:24", "remaining_time": "2:13:21", "throughput": 8717.2, "total_tokens": 68210088} +{"current_steps": 101195, "total_steps": 204665, "loss": 0.0007, "lr": 1.1927415997997834e-06, "epoch": 2.47221068575477, "percentage": 49.44, "elapsed_time": "2:10:25", "remaining_time": "2:13:21", "throughput": 8717.25, "total_tokens": 68213480} +{"current_steps": 101200, "total_steps": 204665, "loss": 0.0008, "lr": 1.192657920493101e-06, "epoch": 2.472332836586617, "percentage": 49.45, "elapsed_time": "2:10:25", "remaining_time": "2:13:20", "throughput": 8717.28, "total_tokens": 68216744} +{"current_steps": 101205, "total_steps": 204665, "loss": 0.0942, "lr": 1.192574239785358e-06, "epoch": 2.4724549874184643, "percentage": 49.45, "elapsed_time": "2:10:25", "remaining_time": "2:13:20", "throughput": 8717.31, "total_tokens": 68220008} +{"current_steps": 101210, "total_steps": 204665, "loss": 0.0419, "lr": 1.1924905576771634e-06, "epoch": 2.4725771382503114, "percentage": 49.45, "elapsed_time": "2:10:26", "remaining_time": "2:13:19", "throughput": 8717.37, "total_tokens": 68223592} +{"current_steps": 101215, "total_steps": 204665, "loss": 0.0008, "lr": 1.1924068741691258e-06, "epoch": 2.4726992890821586, "percentage": 49.45, "elapsed_time": "2:10:26", "remaining_time": "2:13:19", "throughput": 8717.42, "total_tokens": 68227048} +{"current_steps": 101220, "total_steps": 204665, "loss": 0.0426, "lr": 1.1923231892618532e-06, "epoch": 2.472821439914006, "percentage": 49.46, "elapsed_time": "2:10:26", "remaining_time": "2:13:18", "throughput": 8717.47, "total_tokens": 68230440} +{"current_steps": 101225, "total_steps": 204665, "loss": 0.0001, "lr": 1.1922395029559554e-06, "epoch": 2.472943590745853, "percentage": 49.46, "elapsed_time": "2:10:27", "remaining_time": "2:13:18", "throughput": 8717.48, "total_tokens": 68233512} +{"current_steps": 101230, "total_steps": 204665, "loss": 0.0836, "lr": 1.1921558152520399e-06, "epoch": 2.4730657415777, "percentage": 49.46, "elapsed_time": "2:10:27", "remaining_time": "2:13:18", "throughput": 8717.58, "total_tokens": 68237416} +{"current_steps": 101235, "total_steps": 204665, "loss": 0.0423, "lr": 1.1920721261507156e-06, "epoch": 2.4731878924095474, "percentage": 49.46, "elapsed_time": "2:10:27", "remaining_time": "2:13:17", "throughput": 8717.56, "total_tokens": 68240168} +{"current_steps": 101240, "total_steps": 204665, "loss": 0.0702, "lr": 1.191988435652591e-06, "epoch": 2.4733100432413946, "percentage": 49.47, "elapsed_time": "2:10:28", "remaining_time": "2:13:17", "throughput": 8717.58, "total_tokens": 68243304} +{"current_steps": 101245, "total_steps": 204665, "loss": 0.0002, "lr": 1.191904743758275e-06, "epoch": 2.4734321940732418, "percentage": 49.47, "elapsed_time": "2:10:28", "remaining_time": "2:13:16", "throughput": 8717.66, "total_tokens": 68247016} +{"current_steps": 101250, "total_steps": 204665, "loss": 0.0774, "lr": 1.1918210504683759e-06, "epoch": 2.473554344905089, "percentage": 49.47, "elapsed_time": "2:10:28", "remaining_time": "2:13:16", "throughput": 8717.71, "total_tokens": 68250472} +{"current_steps": 101255, "total_steps": 204665, "loss": 0.0002, "lr": 1.1917373557835026e-06, "epoch": 2.473676495736936, "percentage": 49.47, "elapsed_time": "2:10:29", "remaining_time": "2:13:15", "throughput": 8717.78, "total_tokens": 68254120} +{"current_steps": 101260, "total_steps": 204665, "loss": 0.089, "lr": 1.191653659704264e-06, "epoch": 2.473798646568783, "percentage": 49.48, "elapsed_time": "2:10:29", "remaining_time": "2:13:15", "throughput": 8717.82, "total_tokens": 68257448} +{"current_steps": 101265, "total_steps": 204665, "loss": 0.0752, "lr": 1.191569962231268e-06, "epoch": 2.4739207974006305, "percentage": 49.48, "elapsed_time": "2:10:29", "remaining_time": "2:13:15", "throughput": 8717.84, "total_tokens": 68260584} +{"current_steps": 101270, "total_steps": 204665, "loss": 0.0001, "lr": 1.191486263365124e-06, "epoch": 2.4740429482324773, "percentage": 49.48, "elapsed_time": "2:10:30", "remaining_time": "2:13:14", "throughput": 8717.87, "total_tokens": 68263784} +{"current_steps": 101275, "total_steps": 204665, "loss": 0.0138, "lr": 1.1914025631064403e-06, "epoch": 2.4741650990643245, "percentage": 49.48, "elapsed_time": "2:10:30", "remaining_time": "2:13:14", "throughput": 8717.88, "total_tokens": 68266856} +{"current_steps": 101280, "total_steps": 204665, "loss": 0.0002, "lr": 1.1913188614558255e-06, "epoch": 2.4742872498961717, "percentage": 49.49, "elapsed_time": "2:10:31", "remaining_time": "2:13:13", "throughput": 8717.91, "total_tokens": 68270056} +{"current_steps": 101285, "total_steps": 204665, "loss": 0.0739, "lr": 1.1912351584138889e-06, "epoch": 2.474409400728019, "percentage": 49.49, "elapsed_time": "2:10:31", "remaining_time": "2:13:13", "throughput": 8717.91, "total_tokens": 68273064} +{"current_steps": 101290, "total_steps": 204665, "loss": 0.0001, "lr": 1.1911514539812386e-06, "epoch": 2.474531551559866, "percentage": 49.49, "elapsed_time": "2:10:31", "remaining_time": "2:13:12", "throughput": 8717.97, "total_tokens": 68276584} +{"current_steps": 101295, "total_steps": 204665, "loss": 0.0782, "lr": 1.1910677481584835e-06, "epoch": 2.4746537023917132, "percentage": 49.49, "elapsed_time": "2:10:32", "remaining_time": "2:13:12", "throughput": 8718.0, "total_tokens": 68279848} +{"current_steps": 101300, "total_steps": 204665, "loss": 0.0576, "lr": 1.1909840409462322e-06, "epoch": 2.4747758532235604, "percentage": 49.5, "elapsed_time": "2:10:32", "remaining_time": "2:13:12", "throughput": 8718.08, "total_tokens": 68283496} +{"current_steps": 101305, "total_steps": 204665, "loss": 0.1167, "lr": 1.1909003323450938e-06, "epoch": 2.4748980040554076, "percentage": 49.5, "elapsed_time": "2:10:32", "remaining_time": "2:13:11", "throughput": 8718.14, "total_tokens": 68287080} +{"current_steps": 101310, "total_steps": 204665, "loss": 0.0003, "lr": 1.1908166223556766e-06, "epoch": 2.475020154887255, "percentage": 49.5, "elapsed_time": "2:10:33", "remaining_time": "2:13:11", "throughput": 8718.18, "total_tokens": 68290408} +{"current_steps": 101315, "total_steps": 204665, "loss": 0.0942, "lr": 1.1907329109785895e-06, "epoch": 2.475142305719102, "percentage": 49.5, "elapsed_time": "2:10:33", "remaining_time": "2:13:10", "throughput": 8718.2, "total_tokens": 68293544} +{"current_steps": 101320, "total_steps": 204665, "loss": 0.1748, "lr": 1.1906491982144417e-06, "epoch": 2.475264456550949, "percentage": 49.51, "elapsed_time": "2:10:33", "remaining_time": "2:13:10", "throughput": 8718.3, "total_tokens": 68297384} +{"current_steps": 101325, "total_steps": 204665, "loss": 0.0603, "lr": 1.1905654840638417e-06, "epoch": 2.4753866073827964, "percentage": 49.51, "elapsed_time": "2:10:34", "remaining_time": "2:13:09", "throughput": 8718.32, "total_tokens": 68300520} +{"current_steps": 101330, "total_steps": 204665, "loss": 0.0008, "lr": 1.190481768527398e-06, "epoch": 2.4755087582146436, "percentage": 49.51, "elapsed_time": "2:10:34", "remaining_time": "2:13:09", "throughput": 8718.37, "total_tokens": 68304040} +{"current_steps": 101335, "total_steps": 204665, "loss": 0.0688, "lr": 1.19039805160572e-06, "epoch": 2.4756309090464907, "percentage": 49.51, "elapsed_time": "2:10:34", "remaining_time": "2:13:09", "throughput": 8718.41, "total_tokens": 68307304} +{"current_steps": 101340, "total_steps": 204665, "loss": 0.0007, "lr": 1.1903143332994156e-06, "epoch": 2.475753059878338, "percentage": 49.52, "elapsed_time": "2:10:35", "remaining_time": "2:13:08", "throughput": 8718.44, "total_tokens": 68310568} +{"current_steps": 101345, "total_steps": 204665, "loss": 0.06, "lr": 1.1902306136090947e-06, "epoch": 2.475875210710185, "percentage": 49.52, "elapsed_time": "2:10:35", "remaining_time": "2:13:08", "throughput": 8718.57, "total_tokens": 68314792} +{"current_steps": 101350, "total_steps": 204665, "loss": 0.0807, "lr": 1.1901468925353652e-06, "epoch": 2.4759973615420323, "percentage": 49.52, "elapsed_time": "2:10:35", "remaining_time": "2:13:07", "throughput": 8718.6, "total_tokens": 68317992} +{"current_steps": 101355, "total_steps": 204665, "loss": 0.0612, "lr": 1.1900631700788366e-06, "epoch": 2.476119512373879, "percentage": 49.52, "elapsed_time": "2:10:36", "remaining_time": "2:13:07", "throughput": 8718.66, "total_tokens": 68321512} +{"current_steps": 101360, "total_steps": 204665, "loss": 0.1757, "lr": 1.1899794462401176e-06, "epoch": 2.4762416632057263, "percentage": 49.52, "elapsed_time": "2:10:36", "remaining_time": "2:13:06", "throughput": 8718.7, "total_tokens": 68324840} +{"current_steps": 101365, "total_steps": 204665, "loss": 0.0006, "lr": 1.1898957210198168e-06, "epoch": 2.4763638140375734, "percentage": 49.53, "elapsed_time": "2:10:36", "remaining_time": "2:13:06", "throughput": 8718.73, "total_tokens": 68328104} +{"current_steps": 101370, "total_steps": 204665, "loss": 0.0344, "lr": 1.1898119944185432e-06, "epoch": 2.4764859648694206, "percentage": 49.53, "elapsed_time": "2:10:37", "remaining_time": "2:13:06", "throughput": 8718.81, "total_tokens": 68331880} +{"current_steps": 101375, "total_steps": 204665, "loss": 0.0518, "lr": 1.1897282664369058e-06, "epoch": 2.476608115701268, "percentage": 49.53, "elapsed_time": "2:10:37", "remaining_time": "2:13:05", "throughput": 8718.84, "total_tokens": 68335080} +{"current_steps": 101380, "total_steps": 204665, "loss": 0.0003, "lr": 1.1896445370755135e-06, "epoch": 2.476730266533115, "percentage": 49.53, "elapsed_time": "2:10:37", "remaining_time": "2:13:05", "throughput": 8718.86, "total_tokens": 68338216} +{"current_steps": 101385, "total_steps": 204665, "loss": 0.0006, "lr": 1.189560806334975e-06, "epoch": 2.476852417364962, "percentage": 49.54, "elapsed_time": "2:10:38", "remaining_time": "2:13:04", "throughput": 8718.9, "total_tokens": 68341544} +{"current_steps": 101390, "total_steps": 204665, "loss": 0.1015, "lr": 1.1894770742158992e-06, "epoch": 2.4769745681968094, "percentage": 49.54, "elapsed_time": "2:10:38", "remaining_time": "2:13:04", "throughput": 8718.94, "total_tokens": 68344872} +{"current_steps": 101395, "total_steps": 204665, "loss": 0.0425, "lr": 1.1893933407188957e-06, "epoch": 2.4770967190286566, "percentage": 49.54, "elapsed_time": "2:10:39", "remaining_time": "2:13:03", "throughput": 8719.0, "total_tokens": 68348392} +{"current_steps": 101400, "total_steps": 204665, "loss": 0.0001, "lr": 1.1893096058445723e-06, "epoch": 2.4772188698605038, "percentage": 49.54, "elapsed_time": "2:10:39", "remaining_time": "2:13:03", "throughput": 8719.05, "total_tokens": 68351848} +{"current_steps": 101405, "total_steps": 204665, "loss": 0.0002, "lr": 1.1892258695935383e-06, "epoch": 2.477341020692351, "percentage": 49.55, "elapsed_time": "2:10:39", "remaining_time": "2:13:03", "throughput": 8719.11, "total_tokens": 68355368} +{"current_steps": 101410, "total_steps": 204665, "loss": 0.0958, "lr": 1.1891421319664034e-06, "epoch": 2.477463171524198, "percentage": 49.55, "elapsed_time": "2:10:40", "remaining_time": "2:13:02", "throughput": 8719.13, "total_tokens": 68358504} +{"current_steps": 101415, "total_steps": 204665, "loss": 0.0002, "lr": 1.1890583929637761e-06, "epoch": 2.4775853223560453, "percentage": 49.55, "elapsed_time": "2:10:40", "remaining_time": "2:13:02", "throughput": 8719.16, "total_tokens": 68361768} +{"current_steps": 101420, "total_steps": 204665, "loss": 0.0479, "lr": 1.188974652586265e-06, "epoch": 2.4777074731878925, "percentage": 49.55, "elapsed_time": "2:10:40", "remaining_time": "2:13:01", "throughput": 8719.19, "total_tokens": 68364968} +{"current_steps": 101425, "total_steps": 204665, "loss": 0.0002, "lr": 1.1888909108344797e-06, "epoch": 2.4778296240197397, "percentage": 49.56, "elapsed_time": "2:10:41", "remaining_time": "2:13:01", "throughput": 8719.23, "total_tokens": 68368296} +{"current_steps": 101430, "total_steps": 204665, "loss": 0.0514, "lr": 1.1888071677090288e-06, "epoch": 2.477951774851587, "percentage": 49.56, "elapsed_time": "2:10:41", "remaining_time": "2:13:00", "throughput": 8719.34, "total_tokens": 68372328} +{"current_steps": 101435, "total_steps": 204665, "loss": 0.0002, "lr": 1.1887234232105215e-06, "epoch": 2.478073925683434, "percentage": 49.56, "elapsed_time": "2:10:41", "remaining_time": "2:13:00", "throughput": 8719.38, "total_tokens": 68375656} +{"current_steps": 101440, "total_steps": 204665, "loss": 0.0457, "lr": 1.1886396773395664e-06, "epoch": 2.478196076515281, "percentage": 49.56, "elapsed_time": "2:10:42", "remaining_time": "2:13:00", "throughput": 8719.41, "total_tokens": 68378920} +{"current_steps": 101445, "total_steps": 204665, "loss": 0.0011, "lr": 1.1885559300967728e-06, "epoch": 2.4783182273471285, "percentage": 49.57, "elapsed_time": "2:10:42", "remaining_time": "2:12:59", "throughput": 8719.44, "total_tokens": 68382184} +{"current_steps": 101450, "total_steps": 204665, "loss": 0.0422, "lr": 1.18847218148275e-06, "epoch": 2.4784403781789752, "percentage": 49.57, "elapsed_time": "2:10:42", "remaining_time": "2:12:59", "throughput": 8719.48, "total_tokens": 68385448} +{"current_steps": 101455, "total_steps": 204665, "loss": 0.0003, "lr": 1.188388431498107e-06, "epoch": 2.4785625290108224, "percentage": 49.57, "elapsed_time": "2:10:43", "remaining_time": "2:12:58", "throughput": 8719.52, "total_tokens": 68388840} +{"current_steps": 101460, "total_steps": 204665, "loss": 0.0001, "lr": 1.1883046801434524e-06, "epoch": 2.4786846798426696, "percentage": 49.57, "elapsed_time": "2:10:43", "remaining_time": "2:12:58", "throughput": 8719.6, "total_tokens": 68392552} +{"current_steps": 101465, "total_steps": 204665, "loss": 0.0005, "lr": 1.1882209274193954e-06, "epoch": 2.478806830674517, "percentage": 49.58, "elapsed_time": "2:10:43", "remaining_time": "2:12:58", "throughput": 8719.64, "total_tokens": 68395880} +{"current_steps": 101470, "total_steps": 204665, "loss": 0.0385, "lr": 1.1881371733265451e-06, "epoch": 2.478928981506364, "percentage": 49.58, "elapsed_time": "2:10:44", "remaining_time": "2:12:57", "throughput": 8719.68, "total_tokens": 68399272} +{"current_steps": 101475, "total_steps": 204665, "loss": 0.031, "lr": 1.188053417865511e-06, "epoch": 2.479051132338211, "percentage": 49.58, "elapsed_time": "2:10:44", "remaining_time": "2:12:57", "throughput": 8719.7, "total_tokens": 68402408} +{"current_steps": 101480, "total_steps": 204665, "loss": 0.0004, "lr": 1.1879696610369017e-06, "epoch": 2.4791732831700584, "percentage": 49.58, "elapsed_time": "2:10:44", "remaining_time": "2:12:56", "throughput": 8719.79, "total_tokens": 68406248} +{"current_steps": 101485, "total_steps": 204665, "loss": 0.0001, "lr": 1.1878859028413267e-06, "epoch": 2.4792954340019056, "percentage": 49.59, "elapsed_time": "2:10:45", "remaining_time": "2:12:56", "throughput": 8719.81, "total_tokens": 68409384} +{"current_steps": 101490, "total_steps": 204665, "loss": 0.115, "lr": 1.1878021432793948e-06, "epoch": 2.4794175848337527, "percentage": 49.59, "elapsed_time": "2:10:45", "remaining_time": "2:12:55", "throughput": 8719.85, "total_tokens": 68412712} +{"current_steps": 101495, "total_steps": 204665, "loss": 0.0002, "lr": 1.187718382351715e-06, "epoch": 2.4795397356656, "percentage": 49.59, "elapsed_time": "2:10:45", "remaining_time": "2:12:55", "throughput": 8719.88, "total_tokens": 68416040} +{"current_steps": 101500, "total_steps": 204665, "loss": 0.0624, "lr": 1.1876346200588966e-06, "epoch": 2.479661886497447, "percentage": 49.59, "elapsed_time": "2:10:46", "remaining_time": "2:12:55", "throughput": 8719.9, "total_tokens": 68419112} +{"current_steps": 101505, "total_steps": 204665, "loss": 0.0002, "lr": 1.1875508564015488e-06, "epoch": 2.4797840373292943, "percentage": 49.6, "elapsed_time": "2:10:46", "remaining_time": "2:12:54", "throughput": 8719.92, "total_tokens": 68422248} +{"current_steps": 101510, "total_steps": 204665, "loss": 0.0699, "lr": 1.187467091380281e-06, "epoch": 2.4799061881611415, "percentage": 49.6, "elapsed_time": "2:10:47", "remaining_time": "2:12:54", "throughput": 8719.94, "total_tokens": 68425384} +{"current_steps": 101515, "total_steps": 204665, "loss": 0.0005, "lr": 1.187383324995702e-06, "epoch": 2.4800283389929887, "percentage": 49.6, "elapsed_time": "2:10:47", "remaining_time": "2:12:53", "throughput": 8719.97, "total_tokens": 68428584} +{"current_steps": 101520, "total_steps": 204665, "loss": 0.0838, "lr": 1.1872995572484208e-06, "epoch": 2.480150489824836, "percentage": 49.6, "elapsed_time": "2:10:47", "remaining_time": "2:12:53", "throughput": 8720.01, "total_tokens": 68431912} +{"current_steps": 101525, "total_steps": 204665, "loss": 0.0483, "lr": 1.187215788139047e-06, "epoch": 2.4802726406566826, "percentage": 49.61, "elapsed_time": "2:10:48", "remaining_time": "2:12:52", "throughput": 8720.02, "total_tokens": 68434984} +{"current_steps": 101530, "total_steps": 204665, "loss": 0.0374, "lr": 1.1871320176681896e-06, "epoch": 2.4803947914885303, "percentage": 49.61, "elapsed_time": "2:10:48", "remaining_time": "2:12:52", "throughput": 8720.02, "total_tokens": 68437928} +{"current_steps": 101535, "total_steps": 204665, "loss": 0.0006, "lr": 1.1870482458364582e-06, "epoch": 2.480516942320377, "percentage": 49.61, "elapsed_time": "2:10:48", "remaining_time": "2:12:52", "throughput": 8720.1, "total_tokens": 68441640} +{"current_steps": 101540, "total_steps": 204665, "loss": 0.0002, "lr": 1.186964472644461e-06, "epoch": 2.480639093152224, "percentage": 49.61, "elapsed_time": "2:10:49", "remaining_time": "2:12:51", "throughput": 8720.13, "total_tokens": 68444968} +{"current_steps": 101545, "total_steps": 204665, "loss": 0.0745, "lr": 1.1868806980928084e-06, "epoch": 2.4807612439840714, "percentage": 49.62, "elapsed_time": "2:10:49", "remaining_time": "2:12:51", "throughput": 8720.22, "total_tokens": 68448808} +{"current_steps": 101550, "total_steps": 204665, "loss": 0.0394, "lr": 1.186796922182109e-06, "epoch": 2.4808833948159186, "percentage": 49.62, "elapsed_time": "2:10:49", "remaining_time": "2:12:50", "throughput": 8720.29, "total_tokens": 68452392} +{"current_steps": 101555, "total_steps": 204665, "loss": 0.0417, "lr": 1.186713144912972e-06, "epoch": 2.4810055456477658, "percentage": 49.62, "elapsed_time": "2:10:50", "remaining_time": "2:12:50", "throughput": 8720.3, "total_tokens": 68455464} +{"current_steps": 101560, "total_steps": 204665, "loss": 0.0427, "lr": 1.1866293662860066e-06, "epoch": 2.481127696479613, "percentage": 49.62, "elapsed_time": "2:10:50", "remaining_time": "2:12:49", "throughput": 8720.3, "total_tokens": 68458408} +{"current_steps": 101565, "total_steps": 204665, "loss": 0.0351, "lr": 1.1865455863018226e-06, "epoch": 2.48124984731146, "percentage": 49.62, "elapsed_time": "2:10:50", "remaining_time": "2:12:49", "throughput": 8720.31, "total_tokens": 68461480} +{"current_steps": 101570, "total_steps": 204665, "loss": 0.1186, "lr": 1.1864618049610287e-06, "epoch": 2.4813719981433073, "percentage": 49.63, "elapsed_time": "2:10:51", "remaining_time": "2:12:49", "throughput": 8720.46, "total_tokens": 68465896} +{"current_steps": 101575, "total_steps": 204665, "loss": 0.0002, "lr": 1.1863780222642346e-06, "epoch": 2.4814941489751545, "percentage": 49.63, "elapsed_time": "2:10:51", "remaining_time": "2:12:48", "throughput": 8720.55, "total_tokens": 68469672} +{"current_steps": 101580, "total_steps": 204665, "loss": 0.0668, "lr": 1.1862942382120492e-06, "epoch": 2.4816162998070017, "percentage": 49.63, "elapsed_time": "2:10:51", "remaining_time": "2:12:48", "throughput": 8720.58, "total_tokens": 68472936} +{"current_steps": 101585, "total_steps": 204665, "loss": 0.0545, "lr": 1.1862104528050823e-06, "epoch": 2.481738450638849, "percentage": 49.63, "elapsed_time": "2:10:52", "remaining_time": "2:12:47", "throughput": 8720.69, "total_tokens": 68476968} +{"current_steps": 101590, "total_steps": 204665, "loss": 0.0706, "lr": 1.1861266660439427e-06, "epoch": 2.481860601470696, "percentage": 49.64, "elapsed_time": "2:10:52", "remaining_time": "2:12:47", "throughput": 8720.7, "total_tokens": 68479976} +{"current_steps": 101595, "total_steps": 204665, "loss": 0.0895, "lr": 1.1860428779292398e-06, "epoch": 2.4819827523025433, "percentage": 49.64, "elapsed_time": "2:10:52", "remaining_time": "2:12:46", "throughput": 8720.73, "total_tokens": 68483240} +{"current_steps": 101600, "total_steps": 204665, "loss": 0.0595, "lr": 1.1859590884615832e-06, "epoch": 2.4821049031343905, "percentage": 49.64, "elapsed_time": "2:10:53", "remaining_time": "2:12:46", "throughput": 8720.78, "total_tokens": 68486696} +{"current_steps": 101605, "total_steps": 204665, "loss": 0.0004, "lr": 1.185875297641582e-06, "epoch": 2.4822270539662377, "percentage": 49.64, "elapsed_time": "2:10:53", "remaining_time": "2:12:46", "throughput": 8720.87, "total_tokens": 68490536} +{"current_steps": 101610, "total_steps": 204665, "loss": 0.1793, "lr": 1.1857915054698457e-06, "epoch": 2.482349204798085, "percentage": 49.65, "elapsed_time": "2:10:53", "remaining_time": "2:12:45", "throughput": 8720.92, "total_tokens": 68493928} +{"current_steps": 101615, "total_steps": 204665, "loss": 0.0518, "lr": 1.1857077119469835e-06, "epoch": 2.482471355629932, "percentage": 49.65, "elapsed_time": "2:10:54", "remaining_time": "2:12:45", "throughput": 8720.92, "total_tokens": 68496936} +{"current_steps": 101620, "total_steps": 204665, "loss": 0.108, "lr": 1.1856239170736048e-06, "epoch": 2.482593506461779, "percentage": 49.65, "elapsed_time": "2:10:54", "remaining_time": "2:12:44", "throughput": 8720.94, "total_tokens": 68500008} +{"current_steps": 101625, "total_steps": 204665, "loss": 0.0003, "lr": 1.1855401208503192e-06, "epoch": 2.482715657293626, "percentage": 49.65, "elapsed_time": "2:10:55", "remaining_time": "2:12:44", "throughput": 8721.0, "total_tokens": 68503592} +{"current_steps": 101630, "total_steps": 204665, "loss": 0.0004, "lr": 1.1854563232777362e-06, "epoch": 2.482837808125473, "percentage": 49.66, "elapsed_time": "2:10:55", "remaining_time": "2:12:43", "throughput": 8721.02, "total_tokens": 68506664} +{"current_steps": 101635, "total_steps": 204665, "loss": 0.0424, "lr": 1.1853725243564645e-06, "epoch": 2.4829599589573204, "percentage": 49.66, "elapsed_time": "2:10:55", "remaining_time": "2:12:43", "throughput": 8721.05, "total_tokens": 68509928} +{"current_steps": 101640, "total_steps": 204665, "loss": 0.0455, "lr": 1.1852887240871142e-06, "epoch": 2.4830821097891675, "percentage": 49.66, "elapsed_time": "2:10:56", "remaining_time": "2:12:43", "throughput": 8721.14, "total_tokens": 68513768} +{"current_steps": 101645, "total_steps": 204665, "loss": 0.0006, "lr": 1.1852049224702947e-06, "epoch": 2.4832042606210147, "percentage": 49.66, "elapsed_time": "2:10:56", "remaining_time": "2:12:42", "throughput": 8721.15, "total_tokens": 68516840} +{"current_steps": 101650, "total_steps": 204665, "loss": 0.0005, "lr": 1.1851211195066148e-06, "epoch": 2.483326411452862, "percentage": 49.67, "elapsed_time": "2:10:56", "remaining_time": "2:12:42", "throughput": 8721.16, "total_tokens": 68519976} +{"current_steps": 101655, "total_steps": 204665, "loss": 0.001, "lr": 1.1850373151966845e-06, "epoch": 2.483448562284709, "percentage": 49.67, "elapsed_time": "2:10:57", "remaining_time": "2:12:41", "throughput": 8721.2, "total_tokens": 68523304} +{"current_steps": 101660, "total_steps": 204665, "loss": 0.0003, "lr": 1.1849535095411127e-06, "epoch": 2.4835707131165563, "percentage": 49.67, "elapsed_time": "2:10:57", "remaining_time": "2:12:41", "throughput": 8721.24, "total_tokens": 68526632} +{"current_steps": 101665, "total_steps": 204665, "loss": 0.0443, "lr": 1.1848697025405096e-06, "epoch": 2.4836928639484035, "percentage": 49.67, "elapsed_time": "2:10:57", "remaining_time": "2:12:40", "throughput": 8721.26, "total_tokens": 68529768} +{"current_steps": 101670, "total_steps": 204665, "loss": 0.0367, "lr": 1.1847858941954843e-06, "epoch": 2.4838150147802507, "percentage": 49.68, "elapsed_time": "2:10:58", "remaining_time": "2:12:40", "throughput": 8721.29, "total_tokens": 68533032} +{"current_steps": 101675, "total_steps": 204665, "loss": 0.0002, "lr": 1.1847020845066462e-06, "epoch": 2.483937165612098, "percentage": 49.68, "elapsed_time": "2:10:58", "remaining_time": "2:12:40", "throughput": 8721.34, "total_tokens": 68536488} +{"current_steps": 101680, "total_steps": 204665, "loss": 0.0306, "lr": 1.1846182734746048e-06, "epoch": 2.484059316443945, "percentage": 49.68, "elapsed_time": "2:10:58", "remaining_time": "2:12:39", "throughput": 8721.38, "total_tokens": 68539752} +{"current_steps": 101685, "total_steps": 204665, "loss": 0.1347, "lr": 1.1845344610999694e-06, "epoch": 2.4841814672757923, "percentage": 49.68, "elapsed_time": "2:10:59", "remaining_time": "2:12:39", "throughput": 8721.46, "total_tokens": 68543528} +{"current_steps": 101690, "total_steps": 204665, "loss": 0.0296, "lr": 1.1844506473833504e-06, "epoch": 2.4843036181076394, "percentage": 49.69, "elapsed_time": "2:10:59", "remaining_time": "2:12:38", "throughput": 8721.53, "total_tokens": 68547112} +{"current_steps": 101695, "total_steps": 204665, "loss": 0.0396, "lr": 1.1843668323253564e-06, "epoch": 2.4844257689394866, "percentage": 49.69, "elapsed_time": "2:10:59", "remaining_time": "2:12:38", "throughput": 8721.56, "total_tokens": 68550376} +{"current_steps": 101700, "total_steps": 204665, "loss": 0.0443, "lr": 1.184283015926597e-06, "epoch": 2.484547919771334, "percentage": 49.69, "elapsed_time": "2:11:00", "remaining_time": "2:12:38", "throughput": 8721.62, "total_tokens": 68553960} +{"current_steps": 101705, "total_steps": 204665, "loss": 0.0003, "lr": 1.1841991981876823e-06, "epoch": 2.4846700706031806, "percentage": 49.69, "elapsed_time": "2:11:00", "remaining_time": "2:12:37", "throughput": 8721.67, "total_tokens": 68557352} +{"current_steps": 101710, "total_steps": 204665, "loss": 0.1567, "lr": 1.1841153791092213e-06, "epoch": 2.484792221435028, "percentage": 49.7, "elapsed_time": "2:11:00", "remaining_time": "2:12:37", "throughput": 8721.73, "total_tokens": 68560936} +{"current_steps": 101715, "total_steps": 204665, "loss": 0.1355, "lr": 1.1840315586918234e-06, "epoch": 2.484914372266875, "percentage": 49.7, "elapsed_time": "2:11:01", "remaining_time": "2:12:36", "throughput": 8721.74, "total_tokens": 68563944} +{"current_steps": 101720, "total_steps": 204665, "loss": 0.0716, "lr": 1.183947736936099e-06, "epoch": 2.485036523098722, "percentage": 49.7, "elapsed_time": "2:11:01", "remaining_time": "2:12:36", "throughput": 8721.73, "total_tokens": 68566760} +{"current_steps": 101725, "total_steps": 204665, "loss": 0.0006, "lr": 1.1838639138426572e-06, "epoch": 2.4851586739305693, "percentage": 49.7, "elapsed_time": "2:11:01", "remaining_time": "2:12:35", "throughput": 8721.79, "total_tokens": 68570280} +{"current_steps": 101730, "total_steps": 204665, "loss": 0.0537, "lr": 1.1837800894121072e-06, "epoch": 2.4852808247624165, "percentage": 49.71, "elapsed_time": "2:11:02", "remaining_time": "2:12:35", "throughput": 8721.8, "total_tokens": 68573352} +{"current_steps": 101735, "total_steps": 204665, "loss": 0.0771, "lr": 1.183696263645059e-06, "epoch": 2.4854029755942637, "percentage": 49.71, "elapsed_time": "2:11:02", "remaining_time": "2:12:34", "throughput": 8721.84, "total_tokens": 68576744} +{"current_steps": 101740, "total_steps": 204665, "loss": 0.0349, "lr": 1.1836124365421225e-06, "epoch": 2.485525126426111, "percentage": 49.71, "elapsed_time": "2:11:02", "remaining_time": "2:12:34", "throughput": 8721.93, "total_tokens": 68580456} +{"current_steps": 101745, "total_steps": 204665, "loss": 0.0135, "lr": 1.1835286081039068e-06, "epoch": 2.485647277257958, "percentage": 49.71, "elapsed_time": "2:11:03", "remaining_time": "2:12:34", "throughput": 8722.02, "total_tokens": 68584296} +{"current_steps": 101750, "total_steps": 204665, "loss": 0.0005, "lr": 1.1834447783310217e-06, "epoch": 2.4857694280898053, "percentage": 49.72, "elapsed_time": "2:11:03", "remaining_time": "2:12:33", "throughput": 8722.01, "total_tokens": 68587176} +{"current_steps": 101755, "total_steps": 204665, "loss": 0.0003, "lr": 1.183360947224077e-06, "epoch": 2.4858915789216525, "percentage": 49.72, "elapsed_time": "2:11:04", "remaining_time": "2:12:33", "throughput": 8722.04, "total_tokens": 68590376} +{"current_steps": 101760, "total_steps": 204665, "loss": 0.0009, "lr": 1.183277114783682e-06, "epoch": 2.4860137297534997, "percentage": 49.72, "elapsed_time": "2:11:04", "remaining_time": "2:12:32", "throughput": 8722.09, "total_tokens": 68593832} +{"current_steps": 101765, "total_steps": 204665, "loss": 0.0956, "lr": 1.1831932810104463e-06, "epoch": 2.486135880585347, "percentage": 49.72, "elapsed_time": "2:11:04", "remaining_time": "2:12:32", "throughput": 8722.19, "total_tokens": 68597736} +{"current_steps": 101770, "total_steps": 204665, "loss": 0.0408, "lr": 1.1831094459049802e-06, "epoch": 2.486258031417194, "percentage": 49.73, "elapsed_time": "2:11:05", "remaining_time": "2:12:32", "throughput": 8722.26, "total_tokens": 68601320} +{"current_steps": 101775, "total_steps": 204665, "loss": 0.0697, "lr": 1.1830256094678927e-06, "epoch": 2.4863801822490412, "percentage": 49.73, "elapsed_time": "2:11:05", "remaining_time": "2:12:31", "throughput": 8722.3, "total_tokens": 68604712} +{"current_steps": 101780, "total_steps": 204665, "loss": 0.0003, "lr": 1.1829417716997936e-06, "epoch": 2.4865023330808884, "percentage": 49.73, "elapsed_time": "2:11:05", "remaining_time": "2:12:31", "throughput": 8722.31, "total_tokens": 68607720} +{"current_steps": 101785, "total_steps": 204665, "loss": 0.0001, "lr": 1.1828579326012928e-06, "epoch": 2.4866244839127356, "percentage": 49.73, "elapsed_time": "2:11:06", "remaining_time": "2:12:30", "throughput": 8722.36, "total_tokens": 68611240} +{"current_steps": 101790, "total_steps": 204665, "loss": 0.0001, "lr": 1.1827740921730003e-06, "epoch": 2.486746634744583, "percentage": 49.73, "elapsed_time": "2:11:06", "remaining_time": "2:12:30", "throughput": 8722.42, "total_tokens": 68614824} +{"current_steps": 101795, "total_steps": 204665, "loss": 0.0302, "lr": 1.1826902504155253e-06, "epoch": 2.48686878557643, "percentage": 49.74, "elapsed_time": "2:11:06", "remaining_time": "2:12:29", "throughput": 8722.54, "total_tokens": 68618920} +{"current_steps": 101800, "total_steps": 204665, "loss": 0.0936, "lr": 1.1826064073294778e-06, "epoch": 2.4869909364082767, "percentage": 49.74, "elapsed_time": "2:11:07", "remaining_time": "2:12:29", "throughput": 8722.56, "total_tokens": 68621992} +{"current_steps": 101805, "total_steps": 204665, "loss": 0.0003, "lr": 1.182522562915467e-06, "epoch": 2.487113087240124, "percentage": 49.74, "elapsed_time": "2:11:07", "remaining_time": "2:12:29", "throughput": 8722.57, "total_tokens": 68625128} +{"current_steps": 101810, "total_steps": 204665, "loss": 0.0002, "lr": 1.182438717174103e-06, "epoch": 2.487235238071971, "percentage": 49.74, "elapsed_time": "2:11:07", "remaining_time": "2:12:28", "throughput": 8722.67, "total_tokens": 68629032} +{"current_steps": 101815, "total_steps": 204665, "loss": 0.0124, "lr": 1.1823548701059957e-06, "epoch": 2.4873573889038183, "percentage": 49.75, "elapsed_time": "2:11:08", "remaining_time": "2:12:28", "throughput": 8722.73, "total_tokens": 68632616} +{"current_steps": 101820, "total_steps": 204665, "loss": 0.0003, "lr": 1.182271021711755e-06, "epoch": 2.4874795397356655, "percentage": 49.75, "elapsed_time": "2:11:08", "remaining_time": "2:12:27", "throughput": 8722.74, "total_tokens": 68635624} +{"current_steps": 101825, "total_steps": 204665, "loss": 0.0373, "lr": 1.1821871719919902e-06, "epoch": 2.4876016905675127, "percentage": 49.75, "elapsed_time": "2:11:08", "remaining_time": "2:12:27", "throughput": 8722.77, "total_tokens": 68638888} +{"current_steps": 101830, "total_steps": 204665, "loss": 0.0001, "lr": 1.1821033209473113e-06, "epoch": 2.48772384139936, "percentage": 49.75, "elapsed_time": "2:11:09", "remaining_time": "2:12:26", "throughput": 8722.85, "total_tokens": 68642600} +{"current_steps": 101835, "total_steps": 204665, "loss": 0.0001, "lr": 1.182019468578328e-06, "epoch": 2.487845992231207, "percentage": 49.76, "elapsed_time": "2:11:09", "remaining_time": "2:12:26", "throughput": 8722.87, "total_tokens": 68645800} +{"current_steps": 101840, "total_steps": 204665, "loss": 0.0612, "lr": 1.1819356148856501e-06, "epoch": 2.4879681430630542, "percentage": 49.76, "elapsed_time": "2:11:09", "remaining_time": "2:12:26", "throughput": 8722.95, "total_tokens": 68649448} +{"current_steps": 101845, "total_steps": 204665, "loss": 0.0002, "lr": 1.181851759869888e-06, "epoch": 2.4880902938949014, "percentage": 49.76, "elapsed_time": "2:11:10", "remaining_time": "2:12:25", "throughput": 8722.98, "total_tokens": 68652712} +{"current_steps": 101850, "total_steps": 204665, "loss": 0.0752, "lr": 1.1817679035316504e-06, "epoch": 2.4882124447267486, "percentage": 49.76, "elapsed_time": "2:11:10", "remaining_time": "2:12:25", "throughput": 8723.0, "total_tokens": 68655848} +{"current_steps": 101855, "total_steps": 204665, "loss": 0.0002, "lr": 1.181684045871548e-06, "epoch": 2.488334595558596, "percentage": 49.77, "elapsed_time": "2:11:11", "remaining_time": "2:12:24", "throughput": 8723.05, "total_tokens": 68659240} +{"current_steps": 101860, "total_steps": 204665, "loss": 0.0001, "lr": 1.1816001868901902e-06, "epoch": 2.488456746390443, "percentage": 49.77, "elapsed_time": "2:11:11", "remaining_time": "2:12:24", "throughput": 8723.13, "total_tokens": 68662952} +{"current_steps": 101865, "total_steps": 204665, "loss": 0.0012, "lr": 1.181516326588187e-06, "epoch": 2.48857889722229, "percentage": 49.77, "elapsed_time": "2:11:11", "remaining_time": "2:12:23", "throughput": 8723.16, "total_tokens": 68666280} +{"current_steps": 101870, "total_steps": 204665, "loss": 0.1029, "lr": 1.1814324649661483e-06, "epoch": 2.4887010480541374, "percentage": 49.77, "elapsed_time": "2:11:12", "remaining_time": "2:12:23", "throughput": 8723.21, "total_tokens": 68669672} +{"current_steps": 101875, "total_steps": 204665, "loss": 0.0001, "lr": 1.181348602024684e-06, "epoch": 2.4888231988859846, "percentage": 49.78, "elapsed_time": "2:11:12", "remaining_time": "2:12:23", "throughput": 8723.3, "total_tokens": 68673512} +{"current_steps": 101880, "total_steps": 204665, "loss": 0.0225, "lr": 1.1812647377644035e-06, "epoch": 2.4889453497178318, "percentage": 49.78, "elapsed_time": "2:11:12", "remaining_time": "2:12:22", "throughput": 8723.42, "total_tokens": 68677608} +{"current_steps": 101885, "total_steps": 204665, "loss": 0.0001, "lr": 1.1811808721859175e-06, "epoch": 2.4890675005496785, "percentage": 49.78, "elapsed_time": "2:11:13", "remaining_time": "2:12:22", "throughput": 8723.51, "total_tokens": 68681512} +{"current_steps": 101890, "total_steps": 204665, "loss": 0.1399, "lr": 1.1810970052898355e-06, "epoch": 2.489189651381526, "percentage": 49.78, "elapsed_time": "2:11:13", "remaining_time": "2:12:21", "throughput": 8723.58, "total_tokens": 68685160} +{"current_steps": 101895, "total_steps": 204665, "loss": 0.0001, "lr": 1.1810131370767671e-06, "epoch": 2.489311802213373, "percentage": 49.79, "elapsed_time": "2:11:13", "remaining_time": "2:12:21", "throughput": 8723.62, "total_tokens": 68688488} +{"current_steps": 101900, "total_steps": 204665, "loss": 0.0006, "lr": 1.1809292675473226e-06, "epoch": 2.48943395304522, "percentage": 49.79, "elapsed_time": "2:11:14", "remaining_time": "2:12:21", "throughput": 8723.63, "total_tokens": 68691560} +{"current_steps": 101905, "total_steps": 204665, "loss": 0.0558, "lr": 1.1808453967021119e-06, "epoch": 2.4895561038770673, "percentage": 49.79, "elapsed_time": "2:11:14", "remaining_time": "2:12:20", "throughput": 8723.67, "total_tokens": 68694952} +{"current_steps": 101910, "total_steps": 204665, "loss": 0.0548, "lr": 1.1807615245417447e-06, "epoch": 2.4896782547089145, "percentage": 49.79, "elapsed_time": "2:11:14", "remaining_time": "2:12:20", "throughput": 8723.76, "total_tokens": 68698792} +{"current_steps": 101915, "total_steps": 204665, "loss": 0.0802, "lr": 1.1806776510668312e-06, "epoch": 2.4898004055407617, "percentage": 49.8, "elapsed_time": "2:11:15", "remaining_time": "2:12:19", "throughput": 8723.77, "total_tokens": 68701864} +{"current_steps": 101920, "total_steps": 204665, "loss": 0.001, "lr": 1.1805937762779812e-06, "epoch": 2.489922556372609, "percentage": 49.8, "elapsed_time": "2:11:15", "remaining_time": "2:12:19", "throughput": 8723.79, "total_tokens": 68705000} +{"current_steps": 101925, "total_steps": 204665, "loss": 0.055, "lr": 1.1805099001758045e-06, "epoch": 2.490044707204456, "percentage": 49.8, "elapsed_time": "2:11:15", "remaining_time": "2:12:18", "throughput": 8723.84, "total_tokens": 68708392} +{"current_steps": 101930, "total_steps": 204665, "loss": 0.0283, "lr": 1.1804260227609116e-06, "epoch": 2.490166858036303, "percentage": 49.8, "elapsed_time": "2:11:16", "remaining_time": "2:12:18", "throughput": 8723.87, "total_tokens": 68711720} +{"current_steps": 101935, "total_steps": 204665, "loss": 0.0226, "lr": 1.1803421440339113e-06, "epoch": 2.4902890088681504, "percentage": 49.81, "elapsed_time": "2:11:16", "remaining_time": "2:12:18", "throughput": 8723.9, "total_tokens": 68714920} +{"current_steps": 101940, "total_steps": 204665, "loss": 0.0003, "lr": 1.1802582639954152e-06, "epoch": 2.4904111596999976, "percentage": 49.81, "elapsed_time": "2:11:16", "remaining_time": "2:12:17", "throughput": 8723.95, "total_tokens": 68718440} +{"current_steps": 101945, "total_steps": 204665, "loss": 0.0002, "lr": 1.1801743826460324e-06, "epoch": 2.490533310531845, "percentage": 49.81, "elapsed_time": "2:11:17", "remaining_time": "2:12:17", "throughput": 8724.01, "total_tokens": 68721960} +{"current_steps": 101950, "total_steps": 204665, "loss": 0.1339, "lr": 1.180090499986373e-06, "epoch": 2.490655461363692, "percentage": 49.81, "elapsed_time": "2:11:17", "remaining_time": "2:12:16", "throughput": 8724.01, "total_tokens": 68724904} +{"current_steps": 101955, "total_steps": 204665, "loss": 0.0326, "lr": 1.1800066160170472e-06, "epoch": 2.490777612195539, "percentage": 49.82, "elapsed_time": "2:11:18", "remaining_time": "2:12:16", "throughput": 8724.03, "total_tokens": 68728040} +{"current_steps": 101960, "total_steps": 204665, "loss": 0.0002, "lr": 1.1799227307386648e-06, "epoch": 2.4908997630273864, "percentage": 49.82, "elapsed_time": "2:11:18", "remaining_time": "2:12:15", "throughput": 8724.07, "total_tokens": 68731368} +{"current_steps": 101965, "total_steps": 204665, "loss": 0.0001, "lr": 1.1798388441518357e-06, "epoch": 2.4910219138592335, "percentage": 49.82, "elapsed_time": "2:11:18", "remaining_time": "2:12:15", "throughput": 8724.07, "total_tokens": 68734376} +{"current_steps": 101970, "total_steps": 204665, "loss": 0.0001, "lr": 1.1797549562571702e-06, "epoch": 2.4911440646910807, "percentage": 49.82, "elapsed_time": "2:11:19", "remaining_time": "2:12:15", "throughput": 8724.12, "total_tokens": 68737768} +{"current_steps": 101975, "total_steps": 204665, "loss": 0.0001, "lr": 1.1796710670552783e-06, "epoch": 2.491266215522928, "percentage": 49.83, "elapsed_time": "2:11:19", "remaining_time": "2:12:14", "throughput": 8724.19, "total_tokens": 68741480} +{"current_steps": 101980, "total_steps": 204665, "loss": 0.0002, "lr": 1.1795871765467701e-06, "epoch": 2.4913883663547747, "percentage": 49.83, "elapsed_time": "2:11:19", "remaining_time": "2:12:14", "throughput": 8724.23, "total_tokens": 68744808} +{"current_steps": 101985, "total_steps": 204665, "loss": 0.0002, "lr": 1.179503284732256e-06, "epoch": 2.491510517186622, "percentage": 49.83, "elapsed_time": "2:11:20", "remaining_time": "2:12:13", "throughput": 8724.3, "total_tokens": 68748456} +{"current_steps": 101990, "total_steps": 204665, "loss": 0.1791, "lr": 1.1794193916123453e-06, "epoch": 2.491632668018469, "percentage": 49.83, "elapsed_time": "2:11:20", "remaining_time": "2:12:13", "throughput": 8724.37, "total_tokens": 68752104} +{"current_steps": 101995, "total_steps": 204665, "loss": 0.0003, "lr": 1.1793354971876483e-06, "epoch": 2.4917548188503162, "percentage": 49.84, "elapsed_time": "2:11:20", "remaining_time": "2:12:12", "throughput": 8724.48, "total_tokens": 68756136} +{"current_steps": 102000, "total_steps": 204665, "loss": 0.0004, "lr": 1.179251601458776e-06, "epoch": 2.4918769696821634, "percentage": 49.84, "elapsed_time": "2:11:21", "remaining_time": "2:12:12", "throughput": 8724.53, "total_tokens": 68759528} +{"current_steps": 102005, "total_steps": 204665, "loss": 0.0004, "lr": 1.1791677044263375e-06, "epoch": 2.4919991205140106, "percentage": 49.84, "elapsed_time": "2:11:21", "remaining_time": "2:12:12", "throughput": 8724.56, "total_tokens": 68762728} +{"current_steps": 102010, "total_steps": 204665, "loss": 0.0001, "lr": 1.1790838060909434e-06, "epoch": 2.492121271345858, "percentage": 49.84, "elapsed_time": "2:11:21", "remaining_time": "2:12:11", "throughput": 8724.61, "total_tokens": 68766248} +{"current_steps": 102015, "total_steps": 204665, "loss": 0.0407, "lr": 1.1789999064532034e-06, "epoch": 2.492243422177705, "percentage": 49.84, "elapsed_time": "2:11:22", "remaining_time": "2:12:11", "throughput": 8724.61, "total_tokens": 68769192} +{"current_steps": 102020, "total_steps": 204665, "loss": 0.1691, "lr": 1.1789160055137282e-06, "epoch": 2.492365573009552, "percentage": 49.85, "elapsed_time": "2:11:22", "remaining_time": "2:12:10", "throughput": 8724.63, "total_tokens": 68772328} +{"current_steps": 102025, "total_steps": 204665, "loss": 0.1607, "lr": 1.1788321032731274e-06, "epoch": 2.4924877238413994, "percentage": 49.85, "elapsed_time": "2:11:22", "remaining_time": "2:12:10", "throughput": 8724.73, "total_tokens": 68776232} +{"current_steps": 102030, "total_steps": 204665, "loss": 0.0006, "lr": 1.1787481997320117e-06, "epoch": 2.4926098746732466, "percentage": 49.85, "elapsed_time": "2:11:23", "remaining_time": "2:12:09", "throughput": 8724.75, "total_tokens": 68779432} +{"current_steps": 102035, "total_steps": 204665, "loss": 0.0666, "lr": 1.178664294890991e-06, "epoch": 2.4927320255050938, "percentage": 49.85, "elapsed_time": "2:11:23", "remaining_time": "2:12:09", "throughput": 8724.77, "total_tokens": 68782632} +{"current_steps": 102040, "total_steps": 204665, "loss": 0.0004, "lr": 1.1785803887506756e-06, "epoch": 2.492854176336941, "percentage": 49.86, "elapsed_time": "2:11:23", "remaining_time": "2:12:09", "throughput": 8724.81, "total_tokens": 68785896} +{"current_steps": 102045, "total_steps": 204665, "loss": 0.001, "lr": 1.1784964813116754e-06, "epoch": 2.492976327168788, "percentage": 49.86, "elapsed_time": "2:11:24", "remaining_time": "2:12:08", "throughput": 8724.82, "total_tokens": 68788968} +{"current_steps": 102050, "total_steps": 204665, "loss": 0.0002, "lr": 1.1784125725746008e-06, "epoch": 2.4930984780006353, "percentage": 49.86, "elapsed_time": "2:11:24", "remaining_time": "2:12:08", "throughput": 8724.89, "total_tokens": 68792552} +{"current_steps": 102055, "total_steps": 204665, "loss": 0.0465, "lr": 1.1783286625400619e-06, "epoch": 2.4932206288324825, "percentage": 49.86, "elapsed_time": "2:11:24", "remaining_time": "2:12:07", "throughput": 8724.92, "total_tokens": 68795816} +{"current_steps": 102060, "total_steps": 204665, "loss": 0.0494, "lr": 1.1782447512086693e-06, "epoch": 2.4933427796643297, "percentage": 49.87, "elapsed_time": "2:11:25", "remaining_time": "2:12:07", "throughput": 8724.98, "total_tokens": 68799400} +{"current_steps": 102065, "total_steps": 204665, "loss": 0.0899, "lr": 1.1781608385810327e-06, "epoch": 2.4934649304961765, "percentage": 49.87, "elapsed_time": "2:11:25", "remaining_time": "2:12:07", "throughput": 8725.04, "total_tokens": 68802920} +{"current_steps": 102070, "total_steps": 204665, "loss": 0.0366, "lr": 1.1780769246577625e-06, "epoch": 2.493587081328024, "percentage": 49.87, "elapsed_time": "2:11:26", "remaining_time": "2:12:06", "throughput": 8725.04, "total_tokens": 68805864} +{"current_steps": 102075, "total_steps": 204665, "loss": 0.0005, "lr": 1.1779930094394692e-06, "epoch": 2.493709232159871, "percentage": 49.87, "elapsed_time": "2:11:26", "remaining_time": "2:12:06", "throughput": 8725.04, "total_tokens": 68808808} +{"current_steps": 102080, "total_steps": 204665, "loss": 0.0685, "lr": 1.1779090929267628e-06, "epoch": 2.493831382991718, "percentage": 49.88, "elapsed_time": "2:11:26", "remaining_time": "2:12:05", "throughput": 8725.08, "total_tokens": 68812200} +{"current_steps": 102085, "total_steps": 204665, "loss": 0.0001, "lr": 1.1778251751202534e-06, "epoch": 2.493953533823565, "percentage": 49.88, "elapsed_time": "2:11:27", "remaining_time": "2:12:05", "throughput": 8725.16, "total_tokens": 68815976} +{"current_steps": 102090, "total_steps": 204665, "loss": 0.0001, "lr": 1.1777412560205515e-06, "epoch": 2.4940756846554124, "percentage": 49.88, "elapsed_time": "2:11:27", "remaining_time": "2:12:04", "throughput": 8725.25, "total_tokens": 68819752} +{"current_steps": 102095, "total_steps": 204665, "loss": 0.0002, "lr": 1.1776573356282677e-06, "epoch": 2.4941978354872596, "percentage": 49.88, "elapsed_time": "2:11:27", "remaining_time": "2:12:04", "throughput": 8725.31, "total_tokens": 68823272} +{"current_steps": 102100, "total_steps": 204665, "loss": 0.0002, "lr": 1.1775734139440116e-06, "epoch": 2.494319986319107, "percentage": 49.89, "elapsed_time": "2:11:28", "remaining_time": "2:12:04", "throughput": 8725.34, "total_tokens": 68826536} +{"current_steps": 102105, "total_steps": 204665, "loss": 0.0002, "lr": 1.1774894909683943e-06, "epoch": 2.494442137150954, "percentage": 49.89, "elapsed_time": "2:11:28", "remaining_time": "2:12:03", "throughput": 8725.39, "total_tokens": 68829992} +{"current_steps": 102110, "total_steps": 204665, "loss": 0.0443, "lr": 1.1774055667020258e-06, "epoch": 2.494564287982801, "percentage": 49.89, "elapsed_time": "2:11:28", "remaining_time": "2:12:03", "throughput": 8725.43, "total_tokens": 68833384} +{"current_steps": 102115, "total_steps": 204665, "loss": 0.0002, "lr": 1.1773216411455158e-06, "epoch": 2.4946864388146484, "percentage": 49.89, "elapsed_time": "2:11:29", "remaining_time": "2:12:02", "throughput": 8725.45, "total_tokens": 68836456} +{"current_steps": 102120, "total_steps": 204665, "loss": 0.07, "lr": 1.1772377142994752e-06, "epoch": 2.4948085896464955, "percentage": 49.9, "elapsed_time": "2:11:29", "remaining_time": "2:12:02", "throughput": 8725.48, "total_tokens": 68839784} +{"current_steps": 102125, "total_steps": 204665, "loss": 0.0823, "lr": 1.1771537861645143e-06, "epoch": 2.4949307404783427, "percentage": 49.9, "elapsed_time": "2:11:29", "remaining_time": "2:12:01", "throughput": 8725.5, "total_tokens": 68842920} +{"current_steps": 102130, "total_steps": 204665, "loss": 0.0001, "lr": 1.1770698567412437e-06, "epoch": 2.49505289131019, "percentage": 49.9, "elapsed_time": "2:11:30", "remaining_time": "2:12:01", "throughput": 8725.54, "total_tokens": 68846248} +{"current_steps": 102135, "total_steps": 204665, "loss": 0.0799, "lr": 1.1769859260302733e-06, "epoch": 2.495175042142037, "percentage": 49.9, "elapsed_time": "2:11:30", "remaining_time": "2:12:01", "throughput": 8725.57, "total_tokens": 68849448} +{"current_steps": 102140, "total_steps": 204665, "loss": 0.0002, "lr": 1.1769019940322137e-06, "epoch": 2.4952971929738843, "percentage": 49.91, "elapsed_time": "2:11:30", "remaining_time": "2:12:00", "throughput": 8725.65, "total_tokens": 68853224} +{"current_steps": 102145, "total_steps": 204665, "loss": 0.0007, "lr": 1.176818060747675e-06, "epoch": 2.4954193438057315, "percentage": 49.91, "elapsed_time": "2:11:31", "remaining_time": "2:12:00", "throughput": 8725.76, "total_tokens": 68857256} +{"current_steps": 102150, "total_steps": 204665, "loss": 0.0003, "lr": 1.176734126177268e-06, "epoch": 2.4955414946375782, "percentage": 49.91, "elapsed_time": "2:11:31", "remaining_time": "2:11:59", "throughput": 8725.8, "total_tokens": 68860584} +{"current_steps": 102155, "total_steps": 204665, "loss": 0.0017, "lr": 1.1766501903216028e-06, "epoch": 2.495663645469426, "percentage": 49.91, "elapsed_time": "2:11:31", "remaining_time": "2:11:59", "throughput": 8725.87, "total_tokens": 68864296} +{"current_steps": 102160, "total_steps": 204665, "loss": 0.1199, "lr": 1.17656625318129e-06, "epoch": 2.4957857963012726, "percentage": 49.92, "elapsed_time": "2:11:32", "remaining_time": "2:11:58", "throughput": 8725.92, "total_tokens": 68867688} +{"current_steps": 102165, "total_steps": 204665, "loss": 0.0003, "lr": 1.1764823147569399e-06, "epoch": 2.49590794713312, "percentage": 49.92, "elapsed_time": "2:11:32", "remaining_time": "2:11:58", "throughput": 8725.95, "total_tokens": 68870952} +{"current_steps": 102170, "total_steps": 204665, "loss": 0.0366, "lr": 1.1763983750491629e-06, "epoch": 2.496030097964967, "percentage": 49.92, "elapsed_time": "2:11:33", "remaining_time": "2:11:58", "throughput": 8725.97, "total_tokens": 68874088} +{"current_steps": 102175, "total_steps": 204665, "loss": 0.0152, "lr": 1.1763144340585695e-06, "epoch": 2.496152248796814, "percentage": 49.92, "elapsed_time": "2:11:33", "remaining_time": "2:11:57", "throughput": 8726.0, "total_tokens": 68877288} +{"current_steps": 102180, "total_steps": 204665, "loss": 0.1067, "lr": 1.17623049178577e-06, "epoch": 2.4962743996286614, "percentage": 49.93, "elapsed_time": "2:11:33", "remaining_time": "2:11:57", "throughput": 8726.03, "total_tokens": 68880552} +{"current_steps": 102185, "total_steps": 204665, "loss": 0.0001, "lr": 1.176146548231375e-06, "epoch": 2.4963965504605086, "percentage": 49.93, "elapsed_time": "2:11:34", "remaining_time": "2:11:56", "throughput": 8726.11, "total_tokens": 68884264} +{"current_steps": 102190, "total_steps": 204665, "loss": 0.0002, "lr": 1.176062603395995e-06, "epoch": 2.4965187012923558, "percentage": 49.93, "elapsed_time": "2:11:34", "remaining_time": "2:11:56", "throughput": 8726.18, "total_tokens": 68887976} +{"current_steps": 102195, "total_steps": 204665, "loss": 0.1329, "lr": 1.1759786572802405e-06, "epoch": 2.496640852124203, "percentage": 49.93, "elapsed_time": "2:11:34", "remaining_time": "2:11:55", "throughput": 8726.21, "total_tokens": 68891176} +{"current_steps": 102200, "total_steps": 204665, "loss": 0.1105, "lr": 1.1758947098847217e-06, "epoch": 2.49676300295605, "percentage": 49.94, "elapsed_time": "2:11:35", "remaining_time": "2:11:55", "throughput": 8726.25, "total_tokens": 68894568} +{"current_steps": 102205, "total_steps": 204665, "loss": 0.0012, "lr": 1.1758107612100491e-06, "epoch": 2.4968851537878973, "percentage": 49.94, "elapsed_time": "2:11:35", "remaining_time": "2:11:55", "throughput": 8726.28, "total_tokens": 68897832} +{"current_steps": 102210, "total_steps": 204665, "loss": 0.0991, "lr": 1.1757268112568337e-06, "epoch": 2.4970073046197445, "percentage": 49.94, "elapsed_time": "2:11:35", "remaining_time": "2:11:54", "throughput": 8726.34, "total_tokens": 68901352} +{"current_steps": 102215, "total_steps": 204665, "loss": 0.0007, "lr": 1.1756428600256855e-06, "epoch": 2.4971294554515917, "percentage": 49.94, "elapsed_time": "2:11:36", "remaining_time": "2:11:54", "throughput": 8726.36, "total_tokens": 68904488} +{"current_steps": 102220, "total_steps": 204665, "loss": 0.1971, "lr": 1.1755589075172152e-06, "epoch": 2.497251606283439, "percentage": 49.95, "elapsed_time": "2:11:36", "remaining_time": "2:11:53", "throughput": 8726.37, "total_tokens": 68907560} +{"current_steps": 102225, "total_steps": 204665, "loss": 0.0004, "lr": 1.1754749537320333e-06, "epoch": 2.497373757115286, "percentage": 49.95, "elapsed_time": "2:11:36", "remaining_time": "2:11:53", "throughput": 8726.4, "total_tokens": 68910760} +{"current_steps": 102230, "total_steps": 204665, "loss": 0.0293, "lr": 1.1753909986707505e-06, "epoch": 2.4974959079471333, "percentage": 49.95, "elapsed_time": "2:11:37", "remaining_time": "2:11:53", "throughput": 8726.47, "total_tokens": 68914472} +{"current_steps": 102235, "total_steps": 204665, "loss": 0.1082, "lr": 1.1753070423339768e-06, "epoch": 2.4976180587789805, "percentage": 49.95, "elapsed_time": "2:11:37", "remaining_time": "2:11:52", "throughput": 8726.49, "total_tokens": 68917608} +{"current_steps": 102240, "total_steps": 204665, "loss": 0.0013, "lr": 1.1752230847223235e-06, "epoch": 2.4977402096108277, "percentage": 49.95, "elapsed_time": "2:11:37", "remaining_time": "2:11:52", "throughput": 8726.52, "total_tokens": 68920808} +{"current_steps": 102245, "total_steps": 204665, "loss": 0.0066, "lr": 1.1751391258364005e-06, "epoch": 2.4978623604426744, "percentage": 49.96, "elapsed_time": "2:11:38", "remaining_time": "2:11:51", "throughput": 8726.56, "total_tokens": 68924264} +{"current_steps": 102250, "total_steps": 204665, "loss": 0.0508, "lr": 1.1750551656768188e-06, "epoch": 2.4979845112745216, "percentage": 49.96, "elapsed_time": "2:11:38", "remaining_time": "2:11:51", "throughput": 8726.6, "total_tokens": 68927528} +{"current_steps": 102255, "total_steps": 204665, "loss": 0.0005, "lr": 1.174971204244189e-06, "epoch": 2.498106662106369, "percentage": 49.96, "elapsed_time": "2:11:38", "remaining_time": "2:11:50", "throughput": 8726.58, "total_tokens": 68930344} +{"current_steps": 102260, "total_steps": 204665, "loss": 0.0003, "lr": 1.1748872415391214e-06, "epoch": 2.498228812938216, "percentage": 49.96, "elapsed_time": "2:11:39", "remaining_time": "2:11:50", "throughput": 8726.61, "total_tokens": 68933544} +{"current_steps": 102265, "total_steps": 204665, "loss": 0.0003, "lr": 1.1748032775622269e-06, "epoch": 2.498350963770063, "percentage": 49.97, "elapsed_time": "2:11:39", "remaining_time": "2:11:50", "throughput": 8726.7, "total_tokens": 68937384} +{"current_steps": 102270, "total_steps": 204665, "loss": 0.0345, "lr": 1.174719312314116e-06, "epoch": 2.4984731146019103, "percentage": 49.97, "elapsed_time": "2:11:39", "remaining_time": "2:11:49", "throughput": 8726.73, "total_tokens": 68940648} +{"current_steps": 102275, "total_steps": 204665, "loss": 0.0002, "lr": 1.1746353457953988e-06, "epoch": 2.4985952654337575, "percentage": 49.97, "elapsed_time": "2:11:40", "remaining_time": "2:11:49", "throughput": 8726.8, "total_tokens": 68944360} +{"current_steps": 102280, "total_steps": 204665, "loss": 0.035, "lr": 1.1745513780066867e-06, "epoch": 2.4987174162656047, "percentage": 49.97, "elapsed_time": "2:11:40", "remaining_time": "2:11:48", "throughput": 8726.88, "total_tokens": 68948072} +{"current_steps": 102285, "total_steps": 204665, "loss": 0.1122, "lr": 1.17446740894859e-06, "epoch": 2.498839567097452, "percentage": 49.98, "elapsed_time": "2:11:40", "remaining_time": "2:11:48", "throughput": 8726.91, "total_tokens": 68951336} +{"current_steps": 102290, "total_steps": 204665, "loss": 0.098, "lr": 1.1743834386217192e-06, "epoch": 2.498961717929299, "percentage": 49.98, "elapsed_time": "2:11:41", "remaining_time": "2:11:47", "throughput": 8726.94, "total_tokens": 68954600} +{"current_steps": 102295, "total_steps": 204665, "loss": 0.0631, "lr": 1.1742994670266856e-06, "epoch": 2.4990838687611463, "percentage": 49.98, "elapsed_time": "2:11:41", "remaining_time": "2:11:47", "throughput": 8726.97, "total_tokens": 68957800} +{"current_steps": 102300, "total_steps": 204665, "loss": 0.0015, "lr": 1.1742154941640989e-06, "epoch": 2.4992060195929935, "percentage": 49.98, "elapsed_time": "2:11:42", "remaining_time": "2:11:47", "throughput": 8727.03, "total_tokens": 68961384} +{"current_steps": 102305, "total_steps": 204665, "loss": 0.0263, "lr": 1.1741315200345703e-06, "epoch": 2.4993281704248407, "percentage": 49.99, "elapsed_time": "2:11:42", "remaining_time": "2:11:46", "throughput": 8727.07, "total_tokens": 68964648} +{"current_steps": 102310, "total_steps": 204665, "loss": 0.0331, "lr": 1.174047544638711e-06, "epoch": 2.499450321256688, "percentage": 49.99, "elapsed_time": "2:11:42", "remaining_time": "2:11:46", "throughput": 8727.12, "total_tokens": 68968168} +{"current_steps": 102315, "total_steps": 204665, "loss": 0.0007, "lr": 1.1739635679771306e-06, "epoch": 2.499572472088535, "percentage": 49.99, "elapsed_time": "2:11:43", "remaining_time": "2:11:45", "throughput": 8727.15, "total_tokens": 68971368} +{"current_steps": 102320, "total_steps": 204665, "loss": 0.0006, "lr": 1.1738795900504406e-06, "epoch": 2.4996946229203822, "percentage": 49.99, "elapsed_time": "2:11:43", "remaining_time": "2:11:45", "throughput": 8727.19, "total_tokens": 68974760} +{"current_steps": 102325, "total_steps": 204665, "loss": 0.0299, "lr": 1.1737956108592512e-06, "epoch": 2.4998167737522294, "percentage": 50.0, "elapsed_time": "2:11:43", "remaining_time": "2:11:44", "throughput": 8727.24, "total_tokens": 68978216} +{"current_steps": 102330, "total_steps": 204665, "loss": 0.0428, "lr": 1.1737116304041736e-06, "epoch": 2.499938924584076, "percentage": 50.0, "elapsed_time": "2:11:44", "remaining_time": "2:11:44", "throughput": 8727.28, "total_tokens": 68981480} +{"current_steps": 102335, "total_steps": 204665, "loss": 0.0745, "lr": 1.173627648685818e-06, "epoch": 2.500061075415924, "percentage": 50.0, "elapsed_time": "2:11:44", "remaining_time": "2:11:44", "throughput": 8727.31, "total_tokens": 68984744} +{"current_steps": 102340, "total_steps": 204665, "loss": 0.0236, "lr": 1.173543665704796e-06, "epoch": 2.5001832262477706, "percentage": 50.0, "elapsed_time": "2:11:44", "remaining_time": "2:11:43", "throughput": 8727.39, "total_tokens": 68988456} +{"current_steps": 102340, "total_steps": 204665, "eval_loss": 0.18347086012363434, "epoch": 2.5001832262477706, "percentage": 50.0, "elapsed_time": "2:12:32", "remaining_time": "2:12:31", "throughput": 8674.95, "total_tokens": 68988456} +{"current_steps": 102345, "total_steps": 204665, "loss": 0.0005, "lr": 1.1734596814617173e-06, "epoch": 2.5003053770796178, "percentage": 50.01, "elapsed_time": "2:13:09", "remaining_time": "2:13:07", "throughput": 8635.14, "total_tokens": 68991656} +{"current_steps": 102350, "total_steps": 204665, "loss": 0.063, "lr": 1.1733756959571933e-06, "epoch": 2.500427527911465, "percentage": 50.01, "elapsed_time": "2:13:09", "remaining_time": "2:13:07", "throughput": 8635.15, "total_tokens": 68994728} +{"current_steps": 102355, "total_steps": 204665, "loss": 0.0003, "lr": 1.1732917091918347e-06, "epoch": 2.500549678743312, "percentage": 50.01, "elapsed_time": "2:13:10", "remaining_time": "2:13:06", "throughput": 8635.19, "total_tokens": 68998056} +{"current_steps": 102360, "total_steps": 204665, "loss": 0.0003, "lr": 1.173207721166252e-06, "epoch": 2.5006718295751593, "percentage": 50.01, "elapsed_time": "2:13:10", "remaining_time": "2:13:06", "throughput": 8635.21, "total_tokens": 69001192} +{"current_steps": 102365, "total_steps": 204665, "loss": 0.0551, "lr": 1.1731237318810562e-06, "epoch": 2.5007939804070065, "percentage": 50.02, "elapsed_time": "2:13:11", "remaining_time": "2:13:05", "throughput": 8635.26, "total_tokens": 69004584} +{"current_steps": 102370, "total_steps": 204665, "loss": 0.0401, "lr": 1.1730397413368583e-06, "epoch": 2.5009161312388537, "percentage": 50.02, "elapsed_time": "2:13:11", "remaining_time": "2:13:05", "throughput": 8635.26, "total_tokens": 69007464} +{"current_steps": 102375, "total_steps": 204665, "loss": 0.0504, "lr": 1.1729557495342685e-06, "epoch": 2.501038282070701, "percentage": 50.02, "elapsed_time": "2:13:11", "remaining_time": "2:13:05", "throughput": 8635.28, "total_tokens": 69010536} +{"current_steps": 102380, "total_steps": 204665, "loss": 0.0621, "lr": 1.1728717564738983e-06, "epoch": 2.501160432902548, "percentage": 50.02, "elapsed_time": "2:13:12", "remaining_time": "2:13:04", "throughput": 8635.35, "total_tokens": 69014184} +{"current_steps": 102385, "total_steps": 204665, "loss": 0.0002, "lr": 1.172787762156358e-06, "epoch": 2.5012825837343953, "percentage": 50.03, "elapsed_time": "2:13:12", "remaining_time": "2:13:04", "throughput": 8635.46, "total_tokens": 69018152} +{"current_steps": 102390, "total_steps": 204665, "loss": 0.054, "lr": 1.1727037665822588e-06, "epoch": 2.5014047345662425, "percentage": 50.03, "elapsed_time": "2:13:12", "remaining_time": "2:13:03", "throughput": 8635.51, "total_tokens": 69021608} +{"current_steps": 102395, "total_steps": 204665, "loss": 0.0003, "lr": 1.172619769752211e-06, "epoch": 2.5015268853980897, "percentage": 50.03, "elapsed_time": "2:13:13", "remaining_time": "2:13:03", "throughput": 8635.53, "total_tokens": 69024680} +{"current_steps": 102400, "total_steps": 204665, "loss": 0.1057, "lr": 1.172535771666826e-06, "epoch": 2.501649036229937, "percentage": 50.03, "elapsed_time": "2:13:13", "remaining_time": "2:13:02", "throughput": 8635.54, "total_tokens": 69027752} +{"current_steps": 102405, "total_steps": 204665, "loss": 0.0002, "lr": 1.1724517723267143e-06, "epoch": 2.501771187061784, "percentage": 50.04, "elapsed_time": "2:13:13", "remaining_time": "2:13:02", "throughput": 8635.58, "total_tokens": 69031080} +{"current_steps": 102410, "total_steps": 204665, "loss": 0.0002, "lr": 1.172367771732487e-06, "epoch": 2.501893337893631, "percentage": 50.04, "elapsed_time": "2:13:14", "remaining_time": "2:13:02", "throughput": 8635.62, "total_tokens": 69034408} +{"current_steps": 102415, "total_steps": 204665, "loss": 0.0434, "lr": 1.1722837698847552e-06, "epoch": 2.502015488725478, "percentage": 50.04, "elapsed_time": "2:13:14", "remaining_time": "2:13:01", "throughput": 8635.74, "total_tokens": 69038568} +{"current_steps": 102420, "total_steps": 204665, "loss": 0.124, "lr": 1.1721997667841295e-06, "epoch": 2.5021376395573256, "percentage": 50.04, "elapsed_time": "2:13:14", "remaining_time": "2:13:01", "throughput": 8635.79, "total_tokens": 69041960} +{"current_steps": 102425, "total_steps": 204665, "loss": 0.0012, "lr": 1.1721157624312206e-06, "epoch": 2.5022597903891723, "percentage": 50.05, "elapsed_time": "2:13:15", "remaining_time": "2:13:00", "throughput": 8635.85, "total_tokens": 69045544} +{"current_steps": 102430, "total_steps": 204665, "loss": 0.0541, "lr": 1.1720317568266393e-06, "epoch": 2.50238194122102, "percentage": 50.05, "elapsed_time": "2:13:15", "remaining_time": "2:13:00", "throughput": 8635.92, "total_tokens": 69049064} +{"current_steps": 102435, "total_steps": 204665, "loss": 0.0013, "lr": 1.1719477499709971e-06, "epoch": 2.5025040920528667, "percentage": 50.05, "elapsed_time": "2:13:15", "remaining_time": "2:12:59", "throughput": 8635.96, "total_tokens": 69052456} +{"current_steps": 102440, "total_steps": 204665, "loss": 0.0623, "lr": 1.1718637418649047e-06, "epoch": 2.502626242884714, "percentage": 50.05, "elapsed_time": "2:13:16", "remaining_time": "2:12:59", "throughput": 8636.0, "total_tokens": 69055720} +{"current_steps": 102445, "total_steps": 204665, "loss": 0.0002, "lr": 1.1717797325089727e-06, "epoch": 2.502748393716561, "percentage": 50.05, "elapsed_time": "2:13:16", "remaining_time": "2:12:59", "throughput": 8636.02, "total_tokens": 69058856} +{"current_steps": 102450, "total_steps": 204665, "loss": 0.0003, "lr": 1.1716957219038123e-06, "epoch": 2.5028705445484083, "percentage": 50.06, "elapsed_time": "2:13:16", "remaining_time": "2:12:58", "throughput": 8636.08, "total_tokens": 69062440} +{"current_steps": 102455, "total_steps": 204665, "loss": 0.0627, "lr": 1.1716117100500347e-06, "epoch": 2.5029926953802555, "percentage": 50.06, "elapsed_time": "2:13:17", "remaining_time": "2:12:58", "throughput": 8636.12, "total_tokens": 69065704} +{"current_steps": 102460, "total_steps": 204665, "loss": 0.0004, "lr": 1.1715276969482502e-06, "epoch": 2.5031148462121027, "percentage": 50.06, "elapsed_time": "2:13:17", "remaining_time": "2:12:57", "throughput": 8636.16, "total_tokens": 69069096} +{"current_steps": 102465, "total_steps": 204665, "loss": 0.0859, "lr": 1.1714436825990706e-06, "epoch": 2.50323699704395, "percentage": 50.06, "elapsed_time": "2:13:18", "remaining_time": "2:12:57", "throughput": 8636.23, "total_tokens": 69072680} +{"current_steps": 102470, "total_steps": 204665, "loss": 0.0266, "lr": 1.1713596670031061e-06, "epoch": 2.503359147875797, "percentage": 50.07, "elapsed_time": "2:13:18", "remaining_time": "2:12:56", "throughput": 8636.27, "total_tokens": 69076008} +{"current_steps": 102475, "total_steps": 204665, "loss": 0.0594, "lr": 1.1712756501609681e-06, "epoch": 2.5034812987076442, "percentage": 50.07, "elapsed_time": "2:13:18", "remaining_time": "2:12:56", "throughput": 8636.34, "total_tokens": 69079656} +{"current_steps": 102480, "total_steps": 204665, "loss": 0.0634, "lr": 1.1711916320732675e-06, "epoch": 2.5036034495394914, "percentage": 50.07, "elapsed_time": "2:13:19", "remaining_time": "2:12:56", "throughput": 8636.35, "total_tokens": 69082664} +{"current_steps": 102485, "total_steps": 204665, "loss": 0.047, "lr": 1.1711076127406155e-06, "epoch": 2.5037256003713386, "percentage": 50.07, "elapsed_time": "2:13:19", "remaining_time": "2:12:55", "throughput": 8636.38, "total_tokens": 69085864} +{"current_steps": 102490, "total_steps": 204665, "loss": 0.0005, "lr": 1.1710235921636228e-06, "epoch": 2.503847751203186, "percentage": 50.08, "elapsed_time": "2:13:19", "remaining_time": "2:12:55", "throughput": 8636.45, "total_tokens": 69089512} +{"current_steps": 102495, "total_steps": 204665, "loss": 0.0004, "lr": 1.1709395703429002e-06, "epoch": 2.503969902035033, "percentage": 50.08, "elapsed_time": "2:13:20", "remaining_time": "2:12:54", "throughput": 8636.51, "total_tokens": 69093032} +{"current_steps": 102500, "total_steps": 204665, "loss": 0.0003, "lr": 1.1708555472790593e-06, "epoch": 2.5040920528668797, "percentage": 50.08, "elapsed_time": "2:13:20", "remaining_time": "2:12:54", "throughput": 8636.54, "total_tokens": 69096232} +{"current_steps": 102505, "total_steps": 204665, "loss": 0.0004, "lr": 1.170771522972711e-06, "epoch": 2.5042142036987274, "percentage": 50.08, "elapsed_time": "2:13:20", "remaining_time": "2:12:53", "throughput": 8636.56, "total_tokens": 69099432} +{"current_steps": 102510, "total_steps": 204665, "loss": 0.0921, "lr": 1.1706874974244661e-06, "epoch": 2.504336354530574, "percentage": 50.09, "elapsed_time": "2:13:21", "remaining_time": "2:12:53", "throughput": 8636.57, "total_tokens": 69102504} +{"current_steps": 102515, "total_steps": 204665, "loss": 0.0003, "lr": 1.1706034706349358e-06, "epoch": 2.5044585053624218, "percentage": 50.09, "elapsed_time": "2:13:21", "remaining_time": "2:12:53", "throughput": 8636.65, "total_tokens": 69106152} +{"current_steps": 102520, "total_steps": 204665, "loss": 0.0002, "lr": 1.1705194426047314e-06, "epoch": 2.5045806561942685, "percentage": 50.09, "elapsed_time": "2:13:21", "remaining_time": "2:12:52", "throughput": 8636.72, "total_tokens": 69109800} +{"current_steps": 102525, "total_steps": 204665, "loss": 0.0436, "lr": 1.1704354133344635e-06, "epoch": 2.5047028070261157, "percentage": 50.09, "elapsed_time": "2:13:22", "remaining_time": "2:12:52", "throughput": 8636.75, "total_tokens": 69113064} +{"current_steps": 102530, "total_steps": 204665, "loss": 0.0002, "lr": 1.1703513828247436e-06, "epoch": 2.504824957857963, "percentage": 50.1, "elapsed_time": "2:13:22", "remaining_time": "2:12:51", "throughput": 8636.78, "total_tokens": 69116328} +{"current_steps": 102535, "total_steps": 204665, "loss": 0.0006, "lr": 1.1702673510761827e-06, "epoch": 2.50494710868981, "percentage": 50.1, "elapsed_time": "2:13:22", "remaining_time": "2:12:51", "throughput": 8636.82, "total_tokens": 69119592} +{"current_steps": 102540, "total_steps": 204665, "loss": 0.0006, "lr": 1.1701833180893917e-06, "epoch": 2.5050692595216573, "percentage": 50.1, "elapsed_time": "2:13:23", "remaining_time": "2:12:50", "throughput": 8636.87, "total_tokens": 69123048} +{"current_steps": 102545, "total_steps": 204665, "loss": 0.0006, "lr": 1.1700992838649819e-06, "epoch": 2.5051914103535045, "percentage": 50.1, "elapsed_time": "2:13:23", "remaining_time": "2:12:50", "throughput": 8636.96, "total_tokens": 69126888} +{"current_steps": 102550, "total_steps": 204665, "loss": 0.1027, "lr": 1.170015248403564e-06, "epoch": 2.5053135611853516, "percentage": 50.11, "elapsed_time": "2:13:23", "remaining_time": "2:12:50", "throughput": 8636.99, "total_tokens": 69130088} +{"current_steps": 102555, "total_steps": 204665, "loss": 0.0005, "lr": 1.1699312117057498e-06, "epoch": 2.505435712017199, "percentage": 50.11, "elapsed_time": "2:13:24", "remaining_time": "2:12:49", "throughput": 8637.0, "total_tokens": 69133096} +{"current_steps": 102560, "total_steps": 204665, "loss": 0.0943, "lr": 1.16984717377215e-06, "epoch": 2.505557862849046, "percentage": 50.11, "elapsed_time": "2:13:24", "remaining_time": "2:12:49", "throughput": 8637.05, "total_tokens": 69136616} +{"current_steps": 102565, "total_steps": 204665, "loss": 0.0563, "lr": 1.169763134603376e-06, "epoch": 2.505680013680893, "percentage": 50.11, "elapsed_time": "2:13:24", "remaining_time": "2:12:48", "throughput": 8637.09, "total_tokens": 69139880} +{"current_steps": 102570, "total_steps": 204665, "loss": 0.0428, "lr": 1.1696790942000389e-06, "epoch": 2.5058021645127404, "percentage": 50.12, "elapsed_time": "2:13:25", "remaining_time": "2:12:48", "throughput": 8637.2, "total_tokens": 69143848} +{"current_steps": 102575, "total_steps": 204665, "loss": 0.1425, "lr": 1.1695950525627499e-06, "epoch": 2.5059243153445876, "percentage": 50.12, "elapsed_time": "2:13:25", "remaining_time": "2:12:47", "throughput": 8637.22, "total_tokens": 69146984} +{"current_steps": 102580, "total_steps": 204665, "loss": 0.0607, "lr": 1.16951100969212e-06, "epoch": 2.506046466176435, "percentage": 50.12, "elapsed_time": "2:13:26", "remaining_time": "2:12:47", "throughput": 8637.27, "total_tokens": 69150440} +{"current_steps": 102585, "total_steps": 204665, "loss": 0.0006, "lr": 1.1694269655887602e-06, "epoch": 2.506168617008282, "percentage": 50.12, "elapsed_time": "2:13:26", "remaining_time": "2:12:46", "throughput": 8637.32, "total_tokens": 69153896} +{"current_steps": 102590, "total_steps": 204665, "loss": 0.0629, "lr": 1.169342920253282e-06, "epoch": 2.506290767840129, "percentage": 50.13, "elapsed_time": "2:13:26", "remaining_time": "2:12:46", "throughput": 8637.34, "total_tokens": 69157032} +{"current_steps": 102595, "total_steps": 204665, "loss": 0.042, "lr": 1.1692588736862966e-06, "epoch": 2.506412918671976, "percentage": 50.13, "elapsed_time": "2:13:27", "remaining_time": "2:12:46", "throughput": 8637.36, "total_tokens": 69160168} +{"current_steps": 102600, "total_steps": 204665, "loss": 0.0005, "lr": 1.169174825888415e-06, "epoch": 2.5065350695038235, "percentage": 50.13, "elapsed_time": "2:13:27", "remaining_time": "2:12:45", "throughput": 8637.4, "total_tokens": 69163560} +{"current_steps": 102605, "total_steps": 204665, "loss": 0.032, "lr": 1.1690907768602487e-06, "epoch": 2.5066572203356703, "percentage": 50.13, "elapsed_time": "2:13:27", "remaining_time": "2:12:45", "throughput": 8637.45, "total_tokens": 69167080} +{"current_steps": 102610, "total_steps": 204665, "loss": 0.0282, "lr": 1.1690067266024086e-06, "epoch": 2.506779371167518, "percentage": 50.14, "elapsed_time": "2:13:28", "remaining_time": "2:12:44", "throughput": 8637.52, "total_tokens": 69170792} +{"current_steps": 102615, "total_steps": 204665, "loss": 0.097, "lr": 1.1689226751155062e-06, "epoch": 2.5069015219993647, "percentage": 50.14, "elapsed_time": "2:13:28", "remaining_time": "2:12:44", "throughput": 8637.55, "total_tokens": 69174056} +{"current_steps": 102620, "total_steps": 204665, "loss": 0.0013, "lr": 1.168838622400153e-06, "epoch": 2.507023672831212, "percentage": 50.14, "elapsed_time": "2:13:28", "remaining_time": "2:12:43", "throughput": 8637.56, "total_tokens": 69177064} +{"current_steps": 102625, "total_steps": 204665, "loss": 0.0284, "lr": 1.1687545684569598e-06, "epoch": 2.507145823663059, "percentage": 50.14, "elapsed_time": "2:13:29", "remaining_time": "2:12:43", "throughput": 8637.58, "total_tokens": 69180264} +{"current_steps": 102630, "total_steps": 204665, "loss": 0.0003, "lr": 1.1686705132865377e-06, "epoch": 2.5072679744949062, "percentage": 50.15, "elapsed_time": "2:13:29", "remaining_time": "2:12:43", "throughput": 8637.63, "total_tokens": 69183784} +{"current_steps": 102635, "total_steps": 204665, "loss": 0.0509, "lr": 1.1685864568894984e-06, "epoch": 2.5073901253267534, "percentage": 50.15, "elapsed_time": "2:13:29", "remaining_time": "2:12:42", "throughput": 8637.71, "total_tokens": 69187496} +{"current_steps": 102640, "total_steps": 204665, "loss": 0.0002, "lr": 1.1685023992664533e-06, "epoch": 2.5075122761586006, "percentage": 50.15, "elapsed_time": "2:13:30", "remaining_time": "2:12:42", "throughput": 8637.75, "total_tokens": 69190888} +{"current_steps": 102645, "total_steps": 204665, "loss": 0.0588, "lr": 1.1684183404180132e-06, "epoch": 2.507634426990448, "percentage": 50.15, "elapsed_time": "2:13:30", "remaining_time": "2:12:41", "throughput": 8637.86, "total_tokens": 69194856} +{"current_steps": 102650, "total_steps": 204665, "loss": 0.0007, "lr": 1.1683342803447894e-06, "epoch": 2.507756577822295, "percentage": 50.16, "elapsed_time": "2:13:31", "remaining_time": "2:12:41", "throughput": 8637.89, "total_tokens": 69198120} +{"current_steps": 102655, "total_steps": 204665, "loss": 0.0005, "lr": 1.1682502190473937e-06, "epoch": 2.507878728654142, "percentage": 50.16, "elapsed_time": "2:13:31", "remaining_time": "2:12:41", "throughput": 8637.92, "total_tokens": 69201384} +{"current_steps": 102660, "total_steps": 204665, "loss": 0.0002, "lr": 1.168166156526437e-06, "epoch": 2.5080008794859894, "percentage": 50.16, "elapsed_time": "2:13:31", "remaining_time": "2:12:40", "throughput": 8637.95, "total_tokens": 69204648} +{"current_steps": 102665, "total_steps": 204665, "loss": 0.0459, "lr": 1.168082092782531e-06, "epoch": 2.5081230303178366, "percentage": 50.16, "elapsed_time": "2:13:32", "remaining_time": "2:12:40", "throughput": 8637.96, "total_tokens": 69207720} +{"current_steps": 102670, "total_steps": 204665, "loss": 0.0002, "lr": 1.167998027816287e-06, "epoch": 2.5082451811496838, "percentage": 50.16, "elapsed_time": "2:13:32", "remaining_time": "2:12:39", "throughput": 8637.99, "total_tokens": 69211048} +{"current_steps": 102675, "total_steps": 204665, "loss": 0.0464, "lr": 1.1679139616283155e-06, "epoch": 2.508367331981531, "percentage": 50.17, "elapsed_time": "2:13:32", "remaining_time": "2:12:39", "throughput": 8638.0, "total_tokens": 69214120} +{"current_steps": 102680, "total_steps": 204665, "loss": 0.0003, "lr": 1.1678298942192292e-06, "epoch": 2.5084894828133777, "percentage": 50.17, "elapsed_time": "2:13:33", "remaining_time": "2:12:38", "throughput": 8638.08, "total_tokens": 69217896} +{"current_steps": 102685, "total_steps": 204665, "loss": 0.0001, "lr": 1.1677458255896384e-06, "epoch": 2.5086116336452253, "percentage": 50.17, "elapsed_time": "2:13:33", "remaining_time": "2:12:38", "throughput": 8638.07, "total_tokens": 69220776} +{"current_steps": 102690, "total_steps": 204665, "loss": 0.0002, "lr": 1.1676617557401547e-06, "epoch": 2.508733784477072, "percentage": 50.17, "elapsed_time": "2:13:33", "remaining_time": "2:12:38", "throughput": 8638.11, "total_tokens": 69224104} +{"current_steps": 102695, "total_steps": 204665, "loss": 0.0048, "lr": 1.1675776846713899e-06, "epoch": 2.5088559353089197, "percentage": 50.18, "elapsed_time": "2:13:34", "remaining_time": "2:12:37", "throughput": 8638.12, "total_tokens": 69227112} +{"current_steps": 102700, "total_steps": 204665, "loss": 0.0385, "lr": 1.167493612383955e-06, "epoch": 2.5089780861407665, "percentage": 50.18, "elapsed_time": "2:13:34", "remaining_time": "2:12:37", "throughput": 8638.18, "total_tokens": 69230696} +{"current_steps": 102705, "total_steps": 204665, "loss": 0.0003, "lr": 1.1674095388784616e-06, "epoch": 2.5091002369726136, "percentage": 50.18, "elapsed_time": "2:13:34", "remaining_time": "2:12:36", "throughput": 8638.23, "total_tokens": 69234152} +{"current_steps": 102710, "total_steps": 204665, "loss": 0.0002, "lr": 1.1673254641555206e-06, "epoch": 2.509222387804461, "percentage": 50.18, "elapsed_time": "2:13:35", "remaining_time": "2:12:36", "throughput": 8638.28, "total_tokens": 69237544} +{"current_steps": 102715, "total_steps": 204665, "loss": 0.0215, "lr": 1.1672413882157442e-06, "epoch": 2.509344538636308, "percentage": 50.19, "elapsed_time": "2:13:35", "remaining_time": "2:12:35", "throughput": 8638.29, "total_tokens": 69240616} +{"current_steps": 102720, "total_steps": 204665, "loss": 0.0633, "lr": 1.1671573110597434e-06, "epoch": 2.509466689468155, "percentage": 50.19, "elapsed_time": "2:13:35", "remaining_time": "2:12:35", "throughput": 8638.29, "total_tokens": 69243624} +{"current_steps": 102725, "total_steps": 204665, "loss": 0.1228, "lr": 1.1670732326881297e-06, "epoch": 2.5095888403000024, "percentage": 50.19, "elapsed_time": "2:13:36", "remaining_time": "2:12:34", "throughput": 8638.33, "total_tokens": 69246888} +{"current_steps": 102730, "total_steps": 204665, "loss": 0.0002, "lr": 1.1669891531015145e-06, "epoch": 2.5097109911318496, "percentage": 50.19, "elapsed_time": "2:13:36", "remaining_time": "2:12:34", "throughput": 8638.41, "total_tokens": 69250664} +{"current_steps": 102735, "total_steps": 204665, "loss": 0.0002, "lr": 1.1669050723005095e-06, "epoch": 2.509833141963697, "percentage": 50.2, "elapsed_time": "2:13:36", "remaining_time": "2:12:34", "throughput": 8638.49, "total_tokens": 69254440} +{"current_steps": 102740, "total_steps": 204665, "loss": 0.0003, "lr": 1.1668209902857253e-06, "epoch": 2.509955292795544, "percentage": 50.2, "elapsed_time": "2:13:37", "remaining_time": "2:12:33", "throughput": 8638.51, "total_tokens": 69257640} +{"current_steps": 102745, "total_steps": 204665, "loss": 0.0002, "lr": 1.1667369070577744e-06, "epoch": 2.510077443627391, "percentage": 50.2, "elapsed_time": "2:13:37", "remaining_time": "2:12:33", "throughput": 8638.58, "total_tokens": 69261288} +{"current_steps": 102750, "total_steps": 204665, "loss": 0.0002, "lr": 1.1666528226172678e-06, "epoch": 2.5101995944592383, "percentage": 50.2, "elapsed_time": "2:13:38", "remaining_time": "2:12:32", "throughput": 8638.59, "total_tokens": 69264360} +{"current_steps": 102755, "total_steps": 204665, "loss": 0.0681, "lr": 1.1665687369648172e-06, "epoch": 2.5103217452910855, "percentage": 50.21, "elapsed_time": "2:13:38", "remaining_time": "2:12:32", "throughput": 8638.63, "total_tokens": 69267624} +{"current_steps": 102760, "total_steps": 204665, "loss": 0.0329, "lr": 1.1664846501010336e-06, "epoch": 2.5104438961229327, "percentage": 50.21, "elapsed_time": "2:13:38", "remaining_time": "2:12:31", "throughput": 8638.65, "total_tokens": 69270888} +{"current_steps": 102765, "total_steps": 204665, "loss": 0.092, "lr": 1.1664005620265292e-06, "epoch": 2.51056604695478, "percentage": 50.21, "elapsed_time": "2:13:39", "remaining_time": "2:12:31", "throughput": 8638.72, "total_tokens": 69274536} +{"current_steps": 102770, "total_steps": 204665, "loss": 0.0003, "lr": 1.166316472741915e-06, "epoch": 2.510688197786627, "percentage": 50.21, "elapsed_time": "2:13:39", "remaining_time": "2:12:31", "throughput": 8638.78, "total_tokens": 69278120} +{"current_steps": 102775, "total_steps": 204665, "loss": 0.0467, "lr": 1.1662323822478026e-06, "epoch": 2.510810348618474, "percentage": 50.22, "elapsed_time": "2:13:39", "remaining_time": "2:12:30", "throughput": 8638.88, "total_tokens": 69282024} +{"current_steps": 102780, "total_steps": 204665, "loss": 0.0007, "lr": 1.166148290544804e-06, "epoch": 2.5109324994503215, "percentage": 50.22, "elapsed_time": "2:13:40", "remaining_time": "2:12:30", "throughput": 8638.91, "total_tokens": 69285288} +{"current_steps": 102785, "total_steps": 204665, "loss": 0.0006, "lr": 1.16606419763353e-06, "epoch": 2.5110546502821682, "percentage": 50.22, "elapsed_time": "2:13:40", "remaining_time": "2:12:29", "throughput": 8638.97, "total_tokens": 69288808} +{"current_steps": 102790, "total_steps": 204665, "loss": 0.0018, "lr": 1.1659801035145925e-06, "epoch": 2.5111768011140154, "percentage": 50.22, "elapsed_time": "2:13:40", "remaining_time": "2:12:29", "throughput": 8639.0, "total_tokens": 69292072} +{"current_steps": 102795, "total_steps": 204665, "loss": 0.0431, "lr": 1.165896008188603e-06, "epoch": 2.5112989519458626, "percentage": 50.23, "elapsed_time": "2:13:41", "remaining_time": "2:12:29", "throughput": 8639.04, "total_tokens": 69295400} +{"current_steps": 102800, "total_steps": 204665, "loss": 0.1111, "lr": 1.1658119116561732e-06, "epoch": 2.51142110277771, "percentage": 50.23, "elapsed_time": "2:13:41", "remaining_time": "2:12:28", "throughput": 8639.04, "total_tokens": 69298408} +{"current_steps": 102805, "total_steps": 204665, "loss": 0.0003, "lr": 1.1657278139179143e-06, "epoch": 2.511543253609557, "percentage": 50.23, "elapsed_time": "2:13:41", "remaining_time": "2:12:28", "throughput": 8639.1, "total_tokens": 69301928} +{"current_steps": 102810, "total_steps": 204665, "loss": 0.0001, "lr": 1.1656437149744384e-06, "epoch": 2.511665404441404, "percentage": 50.23, "elapsed_time": "2:13:42", "remaining_time": "2:12:27", "throughput": 8639.13, "total_tokens": 69305320} +{"current_steps": 102815, "total_steps": 204665, "loss": 0.0, "lr": 1.1655596148263568e-06, "epoch": 2.5117875552732514, "percentage": 50.24, "elapsed_time": "2:13:42", "remaining_time": "2:12:27", "throughput": 8639.16, "total_tokens": 69308520} +{"current_steps": 102820, "total_steps": 204665, "loss": 0.0502, "lr": 1.1654755134742814e-06, "epoch": 2.5119097061050986, "percentage": 50.24, "elapsed_time": "2:13:42", "remaining_time": "2:12:26", "throughput": 8639.19, "total_tokens": 69311720} +{"current_steps": 102825, "total_steps": 204665, "loss": 0.0321, "lr": 1.1653914109188233e-06, "epoch": 2.5120318569369458, "percentage": 50.24, "elapsed_time": "2:13:43", "remaining_time": "2:12:26", "throughput": 8639.25, "total_tokens": 69315240} +{"current_steps": 102830, "total_steps": 204665, "loss": 0.0001, "lr": 1.1653073071605945e-06, "epoch": 2.512154007768793, "percentage": 50.24, "elapsed_time": "2:13:43", "remaining_time": "2:12:26", "throughput": 8639.32, "total_tokens": 69318888} +{"current_steps": 102835, "total_steps": 204665, "loss": 0.0001, "lr": 1.1652232022002064e-06, "epoch": 2.51227615860064, "percentage": 50.25, "elapsed_time": "2:13:44", "remaining_time": "2:12:25", "throughput": 8639.33, "total_tokens": 69322024} +{"current_steps": 102840, "total_steps": 204665, "loss": 0.0501, "lr": 1.1651390960382707e-06, "epoch": 2.5123983094324873, "percentage": 50.25, "elapsed_time": "2:13:44", "remaining_time": "2:12:25", "throughput": 8639.39, "total_tokens": 69325480} +{"current_steps": 102845, "total_steps": 204665, "loss": 0.0132, "lr": 1.1650549886753994e-06, "epoch": 2.5125204602643345, "percentage": 50.25, "elapsed_time": "2:13:44", "remaining_time": "2:12:24", "throughput": 8639.48, "total_tokens": 69329320} +{"current_steps": 102850, "total_steps": 204665, "loss": 0.0525, "lr": 1.1649708801122036e-06, "epoch": 2.5126426110961817, "percentage": 50.25, "elapsed_time": "2:13:45", "remaining_time": "2:12:24", "throughput": 8639.52, "total_tokens": 69332712} +{"current_steps": 102855, "total_steps": 204665, "loss": 0.0337, "lr": 1.1648867703492951e-06, "epoch": 2.512764761928029, "percentage": 50.26, "elapsed_time": "2:13:45", "remaining_time": "2:12:23", "throughput": 8639.57, "total_tokens": 69336168} +{"current_steps": 102860, "total_steps": 204665, "loss": 0.0858, "lr": 1.1648026593872858e-06, "epoch": 2.5128869127598756, "percentage": 50.26, "elapsed_time": "2:13:45", "remaining_time": "2:12:23", "throughput": 8639.61, "total_tokens": 69339432} +{"current_steps": 102865, "total_steps": 204665, "loss": 0.0842, "lr": 1.1647185472267868e-06, "epoch": 2.5130090635917233, "percentage": 50.26, "elapsed_time": "2:13:46", "remaining_time": "2:12:23", "throughput": 8639.66, "total_tokens": 69342888} +{"current_steps": 102870, "total_steps": 204665, "loss": 0.1118, "lr": 1.1646344338684107e-06, "epoch": 2.51313121442357, "percentage": 50.26, "elapsed_time": "2:13:46", "remaining_time": "2:12:22", "throughput": 8639.71, "total_tokens": 69346344} +{"current_steps": 102875, "total_steps": 204665, "loss": 0.0001, "lr": 1.1645503193127685e-06, "epoch": 2.5132533652554176, "percentage": 50.27, "elapsed_time": "2:13:46", "remaining_time": "2:12:22", "throughput": 8639.72, "total_tokens": 69349416} +{"current_steps": 102880, "total_steps": 204665, "loss": 0.0941, "lr": 1.1644662035604725e-06, "epoch": 2.5133755160872644, "percentage": 50.27, "elapsed_time": "2:13:47", "remaining_time": "2:12:21", "throughput": 8639.74, "total_tokens": 69352552} +{"current_steps": 102885, "total_steps": 204665, "loss": 0.0003, "lr": 1.1643820866121338e-06, "epoch": 2.5134976669191116, "percentage": 50.27, "elapsed_time": "2:13:47", "remaining_time": "2:12:21", "throughput": 8639.78, "total_tokens": 69355944} +{"current_steps": 102890, "total_steps": 204665, "loss": 0.0003, "lr": 1.1642979684683642e-06, "epoch": 2.5136198177509588, "percentage": 50.27, "elapsed_time": "2:13:48", "remaining_time": "2:12:21", "throughput": 8639.49, "total_tokens": 69358952} +{"current_steps": 102895, "total_steps": 204665, "loss": 0.0004, "lr": 1.1642138491297756e-06, "epoch": 2.513741968582806, "percentage": 50.27, "elapsed_time": "2:13:48", "remaining_time": "2:12:20", "throughput": 8639.57, "total_tokens": 69362728} +{"current_steps": 102900, "total_steps": 204665, "loss": 0.0002, "lr": 1.1641297285969798e-06, "epoch": 2.513864119414653, "percentage": 50.28, "elapsed_time": "2:13:48", "remaining_time": "2:12:20", "throughput": 8639.62, "total_tokens": 69366120} +{"current_steps": 102905, "total_steps": 204665, "loss": 0.0772, "lr": 1.1640456068705886e-06, "epoch": 2.5139862702465003, "percentage": 50.28, "elapsed_time": "2:13:49", "remaining_time": "2:12:19", "throughput": 8639.67, "total_tokens": 69369576} +{"current_steps": 102910, "total_steps": 204665, "loss": 0.0373, "lr": 1.1639614839512133e-06, "epoch": 2.5141084210783475, "percentage": 50.28, "elapsed_time": "2:13:49", "remaining_time": "2:12:19", "throughput": 8639.69, "total_tokens": 69372776} +{"current_steps": 102915, "total_steps": 204665, "loss": 0.0064, "lr": 1.1638773598394663e-06, "epoch": 2.5142305719101947, "percentage": 50.28, "elapsed_time": "2:13:49", "remaining_time": "2:12:18", "throughput": 8639.72, "total_tokens": 69375976} +{"current_steps": 102920, "total_steps": 204665, "loss": 0.0004, "lr": 1.1637932345359588e-06, "epoch": 2.514352722742042, "percentage": 50.29, "elapsed_time": "2:13:50", "remaining_time": "2:12:18", "throughput": 8639.73, "total_tokens": 69379048} +{"current_steps": 102925, "total_steps": 204665, "loss": 0.0003, "lr": 1.1637091080413032e-06, "epoch": 2.514474873573889, "percentage": 50.29, "elapsed_time": "2:13:50", "remaining_time": "2:12:18", "throughput": 8639.75, "total_tokens": 69382184} +{"current_steps": 102930, "total_steps": 204665, "loss": 0.155, "lr": 1.1636249803561106e-06, "epoch": 2.5145970244057363, "percentage": 50.29, "elapsed_time": "2:13:50", "remaining_time": "2:12:17", "throughput": 8639.8, "total_tokens": 69385576} +{"current_steps": 102935, "total_steps": 204665, "loss": 0.0001, "lr": 1.1635408514809934e-06, "epoch": 2.5147191752375835, "percentage": 50.29, "elapsed_time": "2:13:51", "remaining_time": "2:12:17", "throughput": 8639.85, "total_tokens": 69389032} +{"current_steps": 102940, "total_steps": 204665, "loss": 0.0005, "lr": 1.163456721416563e-06, "epoch": 2.5148413260694307, "percentage": 50.3, "elapsed_time": "2:13:51", "remaining_time": "2:12:16", "throughput": 8639.89, "total_tokens": 69392360} +{"current_steps": 102945, "total_steps": 204665, "loss": 0.0002, "lr": 1.1633725901634312e-06, "epoch": 2.514963476901278, "percentage": 50.3, "elapsed_time": "2:13:51", "remaining_time": "2:12:16", "throughput": 8639.93, "total_tokens": 69395688} +{"current_steps": 102950, "total_steps": 204665, "loss": 0.0509, "lr": 1.1632884577222105e-06, "epoch": 2.515085627733125, "percentage": 50.3, "elapsed_time": "2:13:52", "remaining_time": "2:12:15", "throughput": 8639.99, "total_tokens": 69399208} +{"current_steps": 102955, "total_steps": 204665, "loss": 0.0452, "lr": 1.1632043240935118e-06, "epoch": 2.515207778564972, "percentage": 50.3, "elapsed_time": "2:13:52", "remaining_time": "2:12:15", "throughput": 8640.06, "total_tokens": 69402856} +{"current_steps": 102960, "total_steps": 204665, "loss": 0.0503, "lr": 1.1631201892779473e-06, "epoch": 2.5153299293968194, "percentage": 50.31, "elapsed_time": "2:13:53", "remaining_time": "2:12:15", "throughput": 8640.08, "total_tokens": 69405992} +{"current_steps": 102965, "total_steps": 204665, "loss": 0.0001, "lr": 1.1630360532761287e-06, "epoch": 2.515452080228666, "percentage": 50.31, "elapsed_time": "2:13:53", "remaining_time": "2:12:14", "throughput": 8640.12, "total_tokens": 69409320} +{"current_steps": 102970, "total_steps": 204665, "loss": 0.0001, "lr": 1.1629519160886685e-06, "epoch": 2.5155742310605134, "percentage": 50.31, "elapsed_time": "2:13:53", "remaining_time": "2:12:14", "throughput": 8640.22, "total_tokens": 69413224} +{"current_steps": 102975, "total_steps": 204665, "loss": 0.0417, "lr": 1.1628677777161782e-06, "epoch": 2.5156963818923606, "percentage": 50.31, "elapsed_time": "2:13:54", "remaining_time": "2:12:13", "throughput": 8640.25, "total_tokens": 69416488} +{"current_steps": 102980, "total_steps": 204665, "loss": 0.0454, "lr": 1.1627836381592694e-06, "epoch": 2.5158185327242077, "percentage": 50.32, "elapsed_time": "2:13:54", "remaining_time": "2:12:13", "throughput": 8640.29, "total_tokens": 69419816} +{"current_steps": 102985, "total_steps": 204665, "loss": 0.0751, "lr": 1.1626994974185542e-06, "epoch": 2.515940683556055, "percentage": 50.32, "elapsed_time": "2:13:54", "remaining_time": "2:12:12", "throughput": 8640.36, "total_tokens": 69423464} +{"current_steps": 102990, "total_steps": 204665, "loss": 0.1411, "lr": 1.1626153554946446e-06, "epoch": 2.516062834387902, "percentage": 50.32, "elapsed_time": "2:13:55", "remaining_time": "2:12:12", "throughput": 8640.38, "total_tokens": 69426600} +{"current_steps": 102995, "total_steps": 204665, "loss": 0.0443, "lr": 1.1625312123881522e-06, "epoch": 2.5161849852197493, "percentage": 50.32, "elapsed_time": "2:13:55", "remaining_time": "2:12:12", "throughput": 8640.41, "total_tokens": 69429928} +{"current_steps": 103000, "total_steps": 204665, "loss": 0.0634, "lr": 1.1624470680996894e-06, "epoch": 2.5163071360515965, "percentage": 50.33, "elapsed_time": "2:13:55", "remaining_time": "2:12:11", "throughput": 8640.44, "total_tokens": 69433128} +{"current_steps": 103005, "total_steps": 204665, "loss": 0.0001, "lr": 1.1623629226298677e-06, "epoch": 2.5164292868834437, "percentage": 50.33, "elapsed_time": "2:13:56", "remaining_time": "2:12:11", "throughput": 8640.55, "total_tokens": 69437224} +{"current_steps": 103010, "total_steps": 204665, "loss": 0.1557, "lr": 1.1622787759792991e-06, "epoch": 2.516551437715291, "percentage": 50.33, "elapsed_time": "2:13:56", "remaining_time": "2:12:10", "throughput": 8640.59, "total_tokens": 69440488} +{"current_steps": 103015, "total_steps": 204665, "loss": 0.2014, "lr": 1.1621946281485957e-06, "epoch": 2.516673588547138, "percentage": 50.33, "elapsed_time": "2:13:56", "remaining_time": "2:12:10", "throughput": 8640.61, "total_tokens": 69443624} +{"current_steps": 103020, "total_steps": 204665, "loss": 0.0351, "lr": 1.1621104791383688e-06, "epoch": 2.5167957393789853, "percentage": 50.34, "elapsed_time": "2:13:57", "remaining_time": "2:12:09", "throughput": 8640.73, "total_tokens": 69447720} +{"current_steps": 103025, "total_steps": 204665, "loss": 0.0007, "lr": 1.1620263289492316e-06, "epoch": 2.5169178902108325, "percentage": 50.34, "elapsed_time": "2:13:57", "remaining_time": "2:12:09", "throughput": 8640.75, "total_tokens": 69450920} +{"current_steps": 103030, "total_steps": 204665, "loss": 0.0003, "lr": 1.161942177581795e-06, "epoch": 2.5170400410426796, "percentage": 50.34, "elapsed_time": "2:13:57", "remaining_time": "2:12:09", "throughput": 8640.79, "total_tokens": 69454248} +{"current_steps": 103035, "total_steps": 204665, "loss": 0.0843, "lr": 1.1618580250366714e-06, "epoch": 2.517162191874527, "percentage": 50.34, "elapsed_time": "2:13:58", "remaining_time": "2:12:08", "throughput": 8640.83, "total_tokens": 69457512} +{"current_steps": 103040, "total_steps": 204665, "loss": 0.0816, "lr": 1.161773871314473e-06, "epoch": 2.5172843427063736, "percentage": 50.35, "elapsed_time": "2:13:58", "remaining_time": "2:12:08", "throughput": 8640.85, "total_tokens": 69460648} +{"current_steps": 103045, "total_steps": 204665, "loss": 0.0288, "lr": 1.1616897164158112e-06, "epoch": 2.517406493538221, "percentage": 50.35, "elapsed_time": "2:13:58", "remaining_time": "2:12:07", "throughput": 8640.89, "total_tokens": 69463912} +{"current_steps": 103050, "total_steps": 204665, "loss": 0.0004, "lr": 1.1616055603412982e-06, "epoch": 2.517528644370068, "percentage": 50.35, "elapsed_time": "2:13:59", "remaining_time": "2:12:07", "throughput": 8640.96, "total_tokens": 69467560} +{"current_steps": 103055, "total_steps": 204665, "loss": 0.0506, "lr": 1.1615214030915463e-06, "epoch": 2.5176507952019156, "percentage": 50.35, "elapsed_time": "2:13:59", "remaining_time": "2:12:06", "throughput": 8641.01, "total_tokens": 69470952} +{"current_steps": 103060, "total_steps": 204665, "loss": 0.0483, "lr": 1.1614372446671672e-06, "epoch": 2.5177729460337623, "percentage": 50.36, "elapsed_time": "2:14:00", "remaining_time": "2:12:06", "throughput": 8641.04, "total_tokens": 69474216} +{"current_steps": 103065, "total_steps": 204665, "loss": 0.0006, "lr": 1.1613530850687731e-06, "epoch": 2.5178950968656095, "percentage": 50.36, "elapsed_time": "2:14:00", "remaining_time": "2:12:06", "throughput": 8641.12, "total_tokens": 69477864} +{"current_steps": 103070, "total_steps": 204665, "loss": 0.0548, "lr": 1.161268924296976e-06, "epoch": 2.5180172476974567, "percentage": 50.36, "elapsed_time": "2:14:00", "remaining_time": "2:12:05", "throughput": 8641.12, "total_tokens": 69480808} +{"current_steps": 103075, "total_steps": 204665, "loss": 0.0295, "lr": 1.161184762352388e-06, "epoch": 2.518139398529304, "percentage": 50.36, "elapsed_time": "2:14:01", "remaining_time": "2:12:05", "throughput": 8641.22, "total_tokens": 69484712} +{"current_steps": 103080, "total_steps": 204665, "loss": 0.1276, "lr": 1.1611005992356208e-06, "epoch": 2.518261549361151, "percentage": 50.37, "elapsed_time": "2:14:01", "remaining_time": "2:12:04", "throughput": 8641.28, "total_tokens": 69488296} +{"current_steps": 103085, "total_steps": 204665, "loss": 0.0009, "lr": 1.1610164349472868e-06, "epoch": 2.5183837001929983, "percentage": 50.37, "elapsed_time": "2:14:01", "remaining_time": "2:12:04", "throughput": 8641.28, "total_tokens": 69491176} +{"current_steps": 103090, "total_steps": 204665, "loss": 0.0004, "lr": 1.160932269487998e-06, "epoch": 2.5185058510248455, "percentage": 50.37, "elapsed_time": "2:14:02", "remaining_time": "2:12:03", "throughput": 8641.31, "total_tokens": 69494440} +{"current_steps": 103095, "total_steps": 204665, "loss": 0.0757, "lr": 1.1608481028583666e-06, "epoch": 2.5186280018566927, "percentage": 50.37, "elapsed_time": "2:14:02", "remaining_time": "2:12:03", "throughput": 8641.38, "total_tokens": 69497960} +{"current_steps": 103100, "total_steps": 204665, "loss": 0.0007, "lr": 1.1607639350590042e-06, "epoch": 2.51875015268854, "percentage": 50.38, "elapsed_time": "2:14:02", "remaining_time": "2:12:03", "throughput": 8641.45, "total_tokens": 69501608} +{"current_steps": 103105, "total_steps": 204665, "loss": 0.1427, "lr": 1.1606797660905235e-06, "epoch": 2.518872303520387, "percentage": 50.38, "elapsed_time": "2:14:03", "remaining_time": "2:12:02", "throughput": 8641.51, "total_tokens": 69505192} +{"current_steps": 103110, "total_steps": 204665, "loss": 0.0422, "lr": 1.1605955959535363e-06, "epoch": 2.5189944543522342, "percentage": 50.38, "elapsed_time": "2:14:03", "remaining_time": "2:12:02", "throughput": 8641.54, "total_tokens": 69508456} +{"current_steps": 103115, "total_steps": 204665, "loss": 0.0303, "lr": 1.1605114246486545e-06, "epoch": 2.5191166051840814, "percentage": 50.38, "elapsed_time": "2:14:03", "remaining_time": "2:12:01", "throughput": 8641.57, "total_tokens": 69511656} +{"current_steps": 103120, "total_steps": 204665, "loss": 0.1495, "lr": 1.1604272521764904e-06, "epoch": 2.5192387560159286, "percentage": 50.38, "elapsed_time": "2:14:04", "remaining_time": "2:12:01", "throughput": 8641.63, "total_tokens": 69515112} +{"current_steps": 103125, "total_steps": 204665, "loss": 0.0458, "lr": 1.1603430785376564e-06, "epoch": 2.5193609068477754, "percentage": 50.39, "elapsed_time": "2:14:04", "remaining_time": "2:12:00", "throughput": 8641.66, "total_tokens": 69518376} +{"current_steps": 103130, "total_steps": 204665, "loss": 0.0005, "lr": 1.1602589037327644e-06, "epoch": 2.519483057679623, "percentage": 50.39, "elapsed_time": "2:14:04", "remaining_time": "2:12:00", "throughput": 8641.75, "total_tokens": 69522216} +{"current_steps": 103135, "total_steps": 204665, "loss": 0.0009, "lr": 1.1601747277624265e-06, "epoch": 2.5196052085114697, "percentage": 50.39, "elapsed_time": "2:14:05", "remaining_time": "2:12:00", "throughput": 8641.83, "total_tokens": 69525864} +{"current_steps": 103140, "total_steps": 204665, "loss": 0.0003, "lr": 1.1600905506272552e-06, "epoch": 2.5197273593433174, "percentage": 50.39, "elapsed_time": "2:14:05", "remaining_time": "2:11:59", "throughput": 8641.84, "total_tokens": 69528936} +{"current_steps": 103145, "total_steps": 204665, "loss": 0.0003, "lr": 1.1600063723278618e-06, "epoch": 2.519849510175164, "percentage": 50.4, "elapsed_time": "2:14:05", "remaining_time": "2:11:59", "throughput": 8641.87, "total_tokens": 69532136} +{"current_steps": 103150, "total_steps": 204665, "loss": 0.0004, "lr": 1.1599221928648595e-06, "epoch": 2.5199716610070113, "percentage": 50.4, "elapsed_time": "2:14:06", "remaining_time": "2:11:58", "throughput": 8641.89, "total_tokens": 69535272} +{"current_steps": 103155, "total_steps": 204665, "loss": 0.1149, "lr": 1.1598380122388598e-06, "epoch": 2.5200938118388585, "percentage": 50.4, "elapsed_time": "2:14:06", "remaining_time": "2:11:58", "throughput": 8641.93, "total_tokens": 69538664} +{"current_steps": 103160, "total_steps": 204665, "loss": 0.0008, "lr": 1.1597538304504751e-06, "epoch": 2.5202159626707057, "percentage": 50.4, "elapsed_time": "2:14:06", "remaining_time": "2:11:57", "throughput": 8641.96, "total_tokens": 69541800} +{"current_steps": 103165, "total_steps": 204665, "loss": 0.0886, "lr": 1.1596696475003176e-06, "epoch": 2.520338113502553, "percentage": 50.41, "elapsed_time": "2:14:07", "remaining_time": "2:11:57", "throughput": 8642.02, "total_tokens": 69545320} +{"current_steps": 103170, "total_steps": 204665, "loss": 0.0001, "lr": 1.1595854633889994e-06, "epoch": 2.5204602643344, "percentage": 50.41, "elapsed_time": "2:14:07", "remaining_time": "2:11:57", "throughput": 8642.04, "total_tokens": 69548520} +{"current_steps": 103175, "total_steps": 204665, "loss": 0.0377, "lr": 1.1595012781171326e-06, "epoch": 2.5205824151662473, "percentage": 50.41, "elapsed_time": "2:14:08", "remaining_time": "2:11:56", "throughput": 8642.1, "total_tokens": 69552040} +{"current_steps": 103180, "total_steps": 204665, "loss": 0.0368, "lr": 1.1594170916853298e-06, "epoch": 2.5207045659980944, "percentage": 50.41, "elapsed_time": "2:14:08", "remaining_time": "2:11:56", "throughput": 8642.23, "total_tokens": 69556200} +{"current_steps": 103185, "total_steps": 204665, "loss": 0.0416, "lr": 1.1593329040942032e-06, "epoch": 2.5208267168299416, "percentage": 50.42, "elapsed_time": "2:14:08", "remaining_time": "2:11:55", "throughput": 8642.23, "total_tokens": 69559144} +{"current_steps": 103190, "total_steps": 204665, "loss": 0.0662, "lr": 1.159248715344365e-06, "epoch": 2.520948867661789, "percentage": 50.42, "elapsed_time": "2:14:09", "remaining_time": "2:11:55", "throughput": 8642.33, "total_tokens": 69562984} +{"current_steps": 103195, "total_steps": 204665, "loss": 0.1218, "lr": 1.159164525436427e-06, "epoch": 2.521071018493636, "percentage": 50.42, "elapsed_time": "2:14:09", "remaining_time": "2:11:54", "throughput": 8642.33, "total_tokens": 69566056} +{"current_steps": 103200, "total_steps": 204665, "loss": 0.0007, "lr": 1.1590803343710018e-06, "epoch": 2.521193169325483, "percentage": 50.42, "elapsed_time": "2:14:09", "remaining_time": "2:11:54", "throughput": 8642.36, "total_tokens": 69569256} +{"current_steps": 103205, "total_steps": 204665, "loss": 0.0293, "lr": 1.1589961421487017e-06, "epoch": 2.5213153201573304, "percentage": 50.43, "elapsed_time": "2:14:10", "remaining_time": "2:11:54", "throughput": 8642.39, "total_tokens": 69572456} +{"current_steps": 103210, "total_steps": 204665, "loss": 0.0006, "lr": 1.1589119487701386e-06, "epoch": 2.5214374709891776, "percentage": 50.43, "elapsed_time": "2:14:10", "remaining_time": "2:11:53", "throughput": 8642.44, "total_tokens": 69575912} +{"current_steps": 103215, "total_steps": 204665, "loss": 0.0392, "lr": 1.1588277542359253e-06, "epoch": 2.5215596218210248, "percentage": 50.43, "elapsed_time": "2:14:10", "remaining_time": "2:11:53", "throughput": 8642.5, "total_tokens": 69579432} +{"current_steps": 103220, "total_steps": 204665, "loss": 0.0003, "lr": 1.1587435585466738e-06, "epoch": 2.5216817726528715, "percentage": 50.43, "elapsed_time": "2:14:11", "remaining_time": "2:11:52", "throughput": 8642.57, "total_tokens": 69583016} +{"current_steps": 103225, "total_steps": 204665, "loss": 0.0011, "lr": 1.1586593617029966e-06, "epoch": 2.521803923484719, "percentage": 50.44, "elapsed_time": "2:14:11", "remaining_time": "2:11:52", "throughput": 8642.59, "total_tokens": 69586216} +{"current_steps": 103230, "total_steps": 204665, "loss": 0.0006, "lr": 1.1585751637055056e-06, "epoch": 2.521926074316566, "percentage": 50.44, "elapsed_time": "2:14:11", "remaining_time": "2:11:51", "throughput": 8642.62, "total_tokens": 69589352} +{"current_steps": 103235, "total_steps": 204665, "loss": 0.0373, "lr": 1.1584909645548136e-06, "epoch": 2.5220482251484135, "percentage": 50.44, "elapsed_time": "2:14:12", "remaining_time": "2:11:51", "throughput": 8642.68, "total_tokens": 69592936} +{"current_steps": 103240, "total_steps": 204665, "loss": 0.0005, "lr": 1.1584067642515325e-06, "epoch": 2.5221703759802603, "percentage": 50.44, "elapsed_time": "2:14:12", "remaining_time": "2:11:51", "throughput": 8642.68, "total_tokens": 69595816} +{"current_steps": 103245, "total_steps": 204665, "loss": 0.001, "lr": 1.158322562796275e-06, "epoch": 2.5222925268121075, "percentage": 50.45, "elapsed_time": "2:14:12", "remaining_time": "2:11:50", "throughput": 8642.7, "total_tokens": 69598888} +{"current_steps": 103250, "total_steps": 204665, "loss": 0.0006, "lr": 1.158238360189653e-06, "epoch": 2.5224146776439547, "percentage": 50.45, "elapsed_time": "2:14:13", "remaining_time": "2:11:50", "throughput": 8642.79, "total_tokens": 69602728} +{"current_steps": 103255, "total_steps": 204665, "loss": 0.0002, "lr": 1.1581541564322792e-06, "epoch": 2.522536828475802, "percentage": 50.45, "elapsed_time": "2:14:13", "remaining_time": "2:11:49", "throughput": 8642.82, "total_tokens": 69605928} +{"current_steps": 103260, "total_steps": 204665, "loss": 0.0002, "lr": 1.1580699515247658e-06, "epoch": 2.522658979307649, "percentage": 50.45, "elapsed_time": "2:14:13", "remaining_time": "2:11:49", "throughput": 8642.87, "total_tokens": 69609384} +{"current_steps": 103265, "total_steps": 204665, "loss": 0.0614, "lr": 1.1579857454677253e-06, "epoch": 2.5227811301394962, "percentage": 50.46, "elapsed_time": "2:14:14", "remaining_time": "2:11:48", "throughput": 8642.89, "total_tokens": 69612520} +{"current_steps": 103270, "total_steps": 204665, "loss": 0.0446, "lr": 1.1579015382617696e-06, "epoch": 2.5229032809713434, "percentage": 50.46, "elapsed_time": "2:14:14", "remaining_time": "2:11:48", "throughput": 8642.99, "total_tokens": 69616424} +{"current_steps": 103275, "total_steps": 204665, "loss": 0.0894, "lr": 1.1578173299075118e-06, "epoch": 2.5230254318031906, "percentage": 50.46, "elapsed_time": "2:14:15", "remaining_time": "2:11:47", "throughput": 8643.03, "total_tokens": 69619688} +{"current_steps": 103280, "total_steps": 204665, "loss": 0.0003, "lr": 1.1577331204055638e-06, "epoch": 2.523147582635038, "percentage": 50.46, "elapsed_time": "2:14:15", "remaining_time": "2:11:47", "throughput": 8643.07, "total_tokens": 69623080} +{"current_steps": 103285, "total_steps": 204665, "loss": 0.0001, "lr": 1.1576489097565383e-06, "epoch": 2.523269733466885, "percentage": 50.47, "elapsed_time": "2:14:15", "remaining_time": "2:11:47", "throughput": 8643.12, "total_tokens": 69626408} +{"current_steps": 103290, "total_steps": 204665, "loss": 0.0268, "lr": 1.1575646979610475e-06, "epoch": 2.523391884298732, "percentage": 50.47, "elapsed_time": "2:14:16", "remaining_time": "2:11:46", "throughput": 8643.2, "total_tokens": 69630120} +{"current_steps": 103295, "total_steps": 204665, "loss": 0.0504, "lr": 1.1574804850197037e-06, "epoch": 2.5235140351305794, "percentage": 50.47, "elapsed_time": "2:14:16", "remaining_time": "2:11:46", "throughput": 8643.26, "total_tokens": 69633704} +{"current_steps": 103300, "total_steps": 204665, "loss": 0.0002, "lr": 1.1573962709331196e-06, "epoch": 2.5236361859624266, "percentage": 50.47, "elapsed_time": "2:14:16", "remaining_time": "2:11:45", "throughput": 8643.3, "total_tokens": 69636968} +{"current_steps": 103305, "total_steps": 204665, "loss": 0.0004, "lr": 1.1573120557019071e-06, "epoch": 2.5237583367942733, "percentage": 50.48, "elapsed_time": "2:14:17", "remaining_time": "2:11:45", "throughput": 8643.33, "total_tokens": 69640232} +{"current_steps": 103310, "total_steps": 204665, "loss": 0.0786, "lr": 1.1572278393266794e-06, "epoch": 2.523880487626121, "percentage": 50.48, "elapsed_time": "2:14:17", "remaining_time": "2:11:44", "throughput": 8643.37, "total_tokens": 69643496} +{"current_steps": 103315, "total_steps": 204665, "loss": 0.0001, "lr": 1.1571436218080485e-06, "epoch": 2.5240026384579677, "percentage": 50.48, "elapsed_time": "2:14:17", "remaining_time": "2:11:44", "throughput": 8643.45, "total_tokens": 69647208} +{"current_steps": 103320, "total_steps": 204665, "loss": 0.0002, "lr": 1.157059403146627e-06, "epoch": 2.5241247892898153, "percentage": 50.48, "elapsed_time": "2:14:18", "remaining_time": "2:11:44", "throughput": 8643.49, "total_tokens": 69650600} +{"current_steps": 103325, "total_steps": 204665, "loss": 0.0005, "lr": 1.156975183343027e-06, "epoch": 2.524246940121662, "percentage": 50.48, "elapsed_time": "2:14:18", "remaining_time": "2:11:43", "throughput": 8643.56, "total_tokens": 69654184} +{"current_steps": 103330, "total_steps": 204665, "loss": 0.1288, "lr": 1.1568909623978612e-06, "epoch": 2.5243690909535093, "percentage": 50.49, "elapsed_time": "2:14:18", "remaining_time": "2:11:43", "throughput": 8643.59, "total_tokens": 69657448} +{"current_steps": 103335, "total_steps": 204665, "loss": 0.0426, "lr": 1.1568067403117426e-06, "epoch": 2.5244912417853564, "percentage": 50.49, "elapsed_time": "2:14:19", "remaining_time": "2:11:42", "throughput": 8643.63, "total_tokens": 69660712} +{"current_steps": 103340, "total_steps": 204665, "loss": 0.0002, "lr": 1.1567225170852828e-06, "epoch": 2.5246133926172036, "percentage": 50.49, "elapsed_time": "2:14:19", "remaining_time": "2:11:42", "throughput": 8643.68, "total_tokens": 69664104} +{"current_steps": 103345, "total_steps": 204665, "loss": 0.0005, "lr": 1.156638292719095e-06, "epoch": 2.524735543449051, "percentage": 50.49, "elapsed_time": "2:14:19", "remaining_time": "2:11:41", "throughput": 8643.73, "total_tokens": 69667496} +{"current_steps": 103350, "total_steps": 204665, "loss": 0.0001, "lr": 1.1565540672137913e-06, "epoch": 2.524857694280898, "percentage": 50.5, "elapsed_time": "2:14:20", "remaining_time": "2:11:41", "throughput": 8643.73, "total_tokens": 69670504} +{"current_steps": 103355, "total_steps": 204665, "loss": 0.0945, "lr": 1.1564698405699843e-06, "epoch": 2.524979845112745, "percentage": 50.5, "elapsed_time": "2:14:20", "remaining_time": "2:11:41", "throughput": 8643.74, "total_tokens": 69673512} +{"current_steps": 103360, "total_steps": 204665, "loss": 0.0491, "lr": 1.1563856127882865e-06, "epoch": 2.5251019959445924, "percentage": 50.5, "elapsed_time": "2:14:20", "remaining_time": "2:11:40", "throughput": 8643.8, "total_tokens": 69677032} +{"current_steps": 103365, "total_steps": 204665, "loss": 0.0574, "lr": 1.1563013838693102e-06, "epoch": 2.5252241467764396, "percentage": 50.5, "elapsed_time": "2:14:21", "remaining_time": "2:11:40", "throughput": 8643.88, "total_tokens": 69680680} +{"current_steps": 103370, "total_steps": 204665, "loss": 0.0349, "lr": 1.1562171538136684e-06, "epoch": 2.5253462976082868, "percentage": 50.51, "elapsed_time": "2:14:21", "remaining_time": "2:11:39", "throughput": 8643.94, "total_tokens": 69684200} +{"current_steps": 103375, "total_steps": 204665, "loss": 0.0358, "lr": 1.1561329226219736e-06, "epoch": 2.525468448440134, "percentage": 50.51, "elapsed_time": "2:14:21", "remaining_time": "2:11:39", "throughput": 8643.99, "total_tokens": 69687656} +{"current_steps": 103380, "total_steps": 204665, "loss": 0.0501, "lr": 1.156048690294838e-06, "epoch": 2.525590599271981, "percentage": 50.51, "elapsed_time": "2:14:22", "remaining_time": "2:11:38", "throughput": 8644.05, "total_tokens": 69691112} +{"current_steps": 103385, "total_steps": 204665, "loss": 0.0477, "lr": 1.1559644568328746e-06, "epoch": 2.5257127501038283, "percentage": 50.51, "elapsed_time": "2:14:22", "remaining_time": "2:11:38", "throughput": 8644.05, "total_tokens": 69694056} +{"current_steps": 103390, "total_steps": 204665, "loss": 0.0014, "lr": 1.1558802222366954e-06, "epoch": 2.5258349009356755, "percentage": 50.52, "elapsed_time": "2:14:23", "remaining_time": "2:11:38", "throughput": 8644.07, "total_tokens": 69697192} +{"current_steps": 103395, "total_steps": 204665, "loss": 0.01, "lr": 1.1557959865069133e-06, "epoch": 2.5259570517675227, "percentage": 50.52, "elapsed_time": "2:14:23", "remaining_time": "2:11:37", "throughput": 8644.08, "total_tokens": 69700264} +{"current_steps": 103400, "total_steps": 204665, "loss": 0.0002, "lr": 1.1557117496441414e-06, "epoch": 2.5260792025993695, "percentage": 50.52, "elapsed_time": "2:14:23", "remaining_time": "2:11:37", "throughput": 8644.15, "total_tokens": 69703784} +{"current_steps": 103405, "total_steps": 204665, "loss": 0.0667, "lr": 1.1556275116489913e-06, "epoch": 2.526201353431217, "percentage": 50.52, "elapsed_time": "2:14:24", "remaining_time": "2:11:36", "throughput": 8644.2, "total_tokens": 69707240} +{"current_steps": 103410, "total_steps": 204665, "loss": 0.0407, "lr": 1.1555432725220762e-06, "epoch": 2.526323504263064, "percentage": 50.53, "elapsed_time": "2:14:24", "remaining_time": "2:11:36", "throughput": 8644.2, "total_tokens": 69710120} +{"current_steps": 103415, "total_steps": 204665, "loss": 0.0411, "lr": 1.1554590322640088e-06, "epoch": 2.526445655094911, "percentage": 50.53, "elapsed_time": "2:14:24", "remaining_time": "2:11:35", "throughput": 8644.24, "total_tokens": 69713448} +{"current_steps": 103420, "total_steps": 204665, "loss": 0.0514, "lr": 1.1553747908754012e-06, "epoch": 2.5265678059267582, "percentage": 50.53, "elapsed_time": "2:14:25", "remaining_time": "2:11:35", "throughput": 8644.28, "total_tokens": 69716776} +{"current_steps": 103425, "total_steps": 204665, "loss": 0.1284, "lr": 1.1552905483568662e-06, "epoch": 2.5266899567586054, "percentage": 50.53, "elapsed_time": "2:14:25", "remaining_time": "2:11:35", "throughput": 8644.33, "total_tokens": 69720168} +{"current_steps": 103430, "total_steps": 204665, "loss": 0.0299, "lr": 1.1552063047090167e-06, "epoch": 2.5268121075904526, "percentage": 50.54, "elapsed_time": "2:14:25", "remaining_time": "2:11:34", "throughput": 8644.37, "total_tokens": 69723496} +{"current_steps": 103435, "total_steps": 204665, "loss": 0.0372, "lr": 1.1551220599324654e-06, "epoch": 2.5269342584223, "percentage": 50.54, "elapsed_time": "2:14:26", "remaining_time": "2:11:34", "throughput": 8644.45, "total_tokens": 69727144} +{"current_steps": 103440, "total_steps": 204665, "loss": 0.0396, "lr": 1.1550378140278245e-06, "epoch": 2.527056409254147, "percentage": 50.54, "elapsed_time": "2:14:26", "remaining_time": "2:11:33", "throughput": 8644.49, "total_tokens": 69730536} +{"current_steps": 103445, "total_steps": 204665, "loss": 0.0008, "lr": 1.1549535669957072e-06, "epoch": 2.527178560085994, "percentage": 50.54, "elapsed_time": "2:14:26", "remaining_time": "2:11:33", "throughput": 8644.54, "total_tokens": 69733928} +{"current_steps": 103450, "total_steps": 204665, "loss": 0.0341, "lr": 1.1548693188367256e-06, "epoch": 2.5273007109178414, "percentage": 50.55, "elapsed_time": "2:14:27", "remaining_time": "2:11:32", "throughput": 8644.57, "total_tokens": 69737192} +{"current_steps": 103455, "total_steps": 204665, "loss": 0.0003, "lr": 1.1547850695514929e-06, "epoch": 2.5274228617496886, "percentage": 50.55, "elapsed_time": "2:14:27", "remaining_time": "2:11:32", "throughput": 8644.59, "total_tokens": 69740200} +{"current_steps": 103460, "total_steps": 204665, "loss": 0.0002, "lr": 1.1547008191406213e-06, "epoch": 2.5275450125815357, "percentage": 50.55, "elapsed_time": "2:14:27", "remaining_time": "2:11:32", "throughput": 8644.65, "total_tokens": 69743720} +{"current_steps": 103465, "total_steps": 204665, "loss": 0.0002, "lr": 1.154616567604724e-06, "epoch": 2.527667163413383, "percentage": 50.55, "elapsed_time": "2:14:28", "remaining_time": "2:11:31", "throughput": 8644.67, "total_tokens": 69746856} +{"current_steps": 103470, "total_steps": 204665, "loss": 0.0006, "lr": 1.1545323149444132e-06, "epoch": 2.52778931424523, "percentage": 50.56, "elapsed_time": "2:14:28", "remaining_time": "2:11:31", "throughput": 8644.72, "total_tokens": 69750248} +{"current_steps": 103475, "total_steps": 204665, "loss": 0.0527, "lr": 1.1544480611603021e-06, "epoch": 2.5279114650770773, "percentage": 50.56, "elapsed_time": "2:14:28", "remaining_time": "2:11:30", "throughput": 8644.76, "total_tokens": 69753576} +{"current_steps": 103480, "total_steps": 204665, "loss": 0.0353, "lr": 1.154363806253003e-06, "epoch": 2.5280336159089245, "percentage": 50.56, "elapsed_time": "2:14:29", "remaining_time": "2:11:30", "throughput": 8644.78, "total_tokens": 69756712} +{"current_steps": 103485, "total_steps": 204665, "loss": 0.0446, "lr": 1.1542795502231289e-06, "epoch": 2.5281557667407712, "percentage": 50.56, "elapsed_time": "2:14:29", "remaining_time": "2:11:29", "throughput": 8644.79, "total_tokens": 69759720} +{"current_steps": 103490, "total_steps": 204665, "loss": 0.0001, "lr": 1.1541952930712919e-06, "epoch": 2.528277917572619, "percentage": 50.57, "elapsed_time": "2:14:29", "remaining_time": "2:11:29", "throughput": 8644.82, "total_tokens": 69762920} +{"current_steps": 103495, "total_steps": 204665, "loss": 0.0397, "lr": 1.1541110347981059e-06, "epoch": 2.5284000684044656, "percentage": 50.57, "elapsed_time": "2:14:30", "remaining_time": "2:11:28", "throughput": 8644.88, "total_tokens": 69766376} +{"current_steps": 103500, "total_steps": 204665, "loss": 0.0452, "lr": 1.1540267754041826e-06, "epoch": 2.5285222192363133, "percentage": 50.57, "elapsed_time": "2:14:30", "remaining_time": "2:11:28", "throughput": 8644.96, "total_tokens": 69770088} +{"current_steps": 103505, "total_steps": 204665, "loss": 0.0005, "lr": 1.1539425148901356e-06, "epoch": 2.52864437006816, "percentage": 50.57, "elapsed_time": "2:14:30", "remaining_time": "2:11:28", "throughput": 8645.04, "total_tokens": 69773864} +{"current_steps": 103510, "total_steps": 204665, "loss": 0.0002, "lr": 1.1538582532565768e-06, "epoch": 2.528766520900007, "percentage": 50.58, "elapsed_time": "2:14:31", "remaining_time": "2:11:27", "throughput": 8645.12, "total_tokens": 69777576} +{"current_steps": 103515, "total_steps": 204665, "loss": 0.0003, "lr": 1.1537739905041197e-06, "epoch": 2.5288886717318544, "percentage": 50.58, "elapsed_time": "2:14:31", "remaining_time": "2:11:27", "throughput": 8645.2, "total_tokens": 69781288} +{"current_steps": 103520, "total_steps": 204665, "loss": 0.0001, "lr": 1.1536897266333766e-06, "epoch": 2.5290108225637016, "percentage": 50.58, "elapsed_time": "2:14:32", "remaining_time": "2:11:26", "throughput": 8645.23, "total_tokens": 69784488} +{"current_steps": 103525, "total_steps": 204665, "loss": 0.0615, "lr": 1.1536054616449602e-06, "epoch": 2.5291329733955488, "percentage": 50.58, "elapsed_time": "2:14:32", "remaining_time": "2:11:26", "throughput": 8645.27, "total_tokens": 69787752} +{"current_steps": 103530, "total_steps": 204665, "loss": 0.0001, "lr": 1.153521195539484e-06, "epoch": 2.529255124227396, "percentage": 50.59, "elapsed_time": "2:14:32", "remaining_time": "2:11:25", "throughput": 8645.29, "total_tokens": 69790888} +{"current_steps": 103535, "total_steps": 204665, "loss": 0.059, "lr": 1.1534369283175602e-06, "epoch": 2.529377275059243, "percentage": 50.59, "elapsed_time": "2:14:33", "remaining_time": "2:11:25", "throughput": 8645.35, "total_tokens": 69794408} +{"current_steps": 103540, "total_steps": 204665, "loss": 0.154, "lr": 1.1533526599798017e-06, "epoch": 2.5294994258910903, "percentage": 50.59, "elapsed_time": "2:14:33", "remaining_time": "2:11:25", "throughput": 8645.39, "total_tokens": 69797672} +{"current_steps": 103545, "total_steps": 204665, "loss": 0.0, "lr": 1.1532683905268216e-06, "epoch": 2.5296215767229375, "percentage": 50.59, "elapsed_time": "2:14:33", "remaining_time": "2:11:24", "throughput": 8645.4, "total_tokens": 69800744} +{"current_steps": 103550, "total_steps": 204665, "loss": 0.0537, "lr": 1.1531841199592323e-06, "epoch": 2.5297437275547847, "percentage": 50.59, "elapsed_time": "2:14:34", "remaining_time": "2:11:24", "throughput": 8645.48, "total_tokens": 69804456} +{"current_steps": 103555, "total_steps": 204665, "loss": 0.0003, "lr": 1.1530998482776473e-06, "epoch": 2.529865878386632, "percentage": 50.6, "elapsed_time": "2:14:34", "remaining_time": "2:11:23", "throughput": 8645.52, "total_tokens": 69807784} +{"current_steps": 103560, "total_steps": 204665, "loss": 0.0007, "lr": 1.1530155754826788e-06, "epoch": 2.529988029218479, "percentage": 50.6, "elapsed_time": "2:14:34", "remaining_time": "2:11:23", "throughput": 8645.6, "total_tokens": 69811496} +{"current_steps": 103565, "total_steps": 204665, "loss": 0.0411, "lr": 1.1529313015749399e-06, "epoch": 2.5301101800503263, "percentage": 50.6, "elapsed_time": "2:14:35", "remaining_time": "2:11:22", "throughput": 8645.65, "total_tokens": 69814888} +{"current_steps": 103570, "total_steps": 204665, "loss": 0.0004, "lr": 1.1528470265550434e-06, "epoch": 2.530232330882173, "percentage": 50.6, "elapsed_time": "2:14:35", "remaining_time": "2:11:22", "throughput": 8645.66, "total_tokens": 69817896} +{"current_steps": 103575, "total_steps": 204665, "loss": 0.0003, "lr": 1.1527627504236022e-06, "epoch": 2.5303544817140207, "percentage": 50.61, "elapsed_time": "2:14:35", "remaining_time": "2:11:22", "throughput": 8645.72, "total_tokens": 69821416} +{"current_steps": 103580, "total_steps": 204665, "loss": 0.0481, "lr": 1.1526784731812292e-06, "epoch": 2.5304766325458674, "percentage": 50.61, "elapsed_time": "2:14:36", "remaining_time": "2:11:21", "throughput": 8645.77, "total_tokens": 69824808} +{"current_steps": 103585, "total_steps": 204665, "loss": 0.0322, "lr": 1.1525941948285372e-06, "epoch": 2.530598783377715, "percentage": 50.61, "elapsed_time": "2:14:36", "remaining_time": "2:11:21", "throughput": 8645.78, "total_tokens": 69827880} +{"current_steps": 103590, "total_steps": 204665, "loss": 0.0001, "lr": 1.1525099153661391e-06, "epoch": 2.530720934209562, "percentage": 50.61, "elapsed_time": "2:14:36", "remaining_time": "2:11:20", "throughput": 8645.81, "total_tokens": 69831144} +{"current_steps": 103595, "total_steps": 204665, "loss": 0.0001, "lr": 1.1524256347946482e-06, "epoch": 2.530843085041409, "percentage": 50.62, "elapsed_time": "2:14:37", "remaining_time": "2:11:20", "throughput": 8645.83, "total_tokens": 69834216} +{"current_steps": 103600, "total_steps": 204665, "loss": 0.0003, "lr": 1.1523413531146768e-06, "epoch": 2.530965235873256, "percentage": 50.62, "elapsed_time": "2:14:37", "remaining_time": "2:11:19", "throughput": 8645.89, "total_tokens": 69837736} +{"current_steps": 103605, "total_steps": 204665, "loss": 0.2075, "lr": 1.1522570703268381e-06, "epoch": 2.5310873867051034, "percentage": 50.62, "elapsed_time": "2:14:37", "remaining_time": "2:11:19", "throughput": 8645.94, "total_tokens": 69841192} +{"current_steps": 103610, "total_steps": 204665, "loss": 0.0003, "lr": 1.152172786431745e-06, "epoch": 2.5312095375369505, "percentage": 50.62, "elapsed_time": "2:14:38", "remaining_time": "2:11:19", "throughput": 8645.99, "total_tokens": 69844584} +{"current_steps": 103615, "total_steps": 204665, "loss": 0.0927, "lr": 1.152088501430011e-06, "epoch": 2.5313316883687977, "percentage": 50.63, "elapsed_time": "2:14:38", "remaining_time": "2:11:18", "throughput": 8646.01, "total_tokens": 69847656} +{"current_steps": 103620, "total_steps": 204665, "loss": 0.1143, "lr": 1.152004215322248e-06, "epoch": 2.531453839200645, "percentage": 50.63, "elapsed_time": "2:14:38", "remaining_time": "2:11:18", "throughput": 8646.04, "total_tokens": 69850920} +{"current_steps": 103625, "total_steps": 204665, "loss": 0.0559, "lr": 1.1519199281090697e-06, "epoch": 2.531575990032492, "percentage": 50.63, "elapsed_time": "2:14:39", "remaining_time": "2:11:17", "throughput": 8646.08, "total_tokens": 69854248} +{"current_steps": 103630, "total_steps": 204665, "loss": 0.0527, "lr": 1.1518356397910887e-06, "epoch": 2.5316981408643393, "percentage": 50.63, "elapsed_time": "2:14:39", "remaining_time": "2:11:17", "throughput": 8646.13, "total_tokens": 69857640} +{"current_steps": 103635, "total_steps": 204665, "loss": 0.0001, "lr": 1.151751350368918e-06, "epoch": 2.5318202916961865, "percentage": 50.64, "elapsed_time": "2:14:39", "remaining_time": "2:11:16", "throughput": 8646.2, "total_tokens": 69861288} +{"current_steps": 103640, "total_steps": 204665, "loss": 0.076, "lr": 1.1516670598431709e-06, "epoch": 2.5319424425280337, "percentage": 50.64, "elapsed_time": "2:14:40", "remaining_time": "2:11:16", "throughput": 8646.26, "total_tokens": 69864744} +{"current_steps": 103645, "total_steps": 204665, "loss": 0.0737, "lr": 1.15158276821446e-06, "epoch": 2.532064593359881, "percentage": 50.64, "elapsed_time": "2:14:40", "remaining_time": "2:11:16", "throughput": 8646.28, "total_tokens": 69867880} +{"current_steps": 103650, "total_steps": 204665, "loss": 0.0002, "lr": 1.1514984754833983e-06, "epoch": 2.532186744191728, "percentage": 50.64, "elapsed_time": "2:14:41", "remaining_time": "2:11:15", "throughput": 8646.35, "total_tokens": 69871464} +{"current_steps": 103655, "total_steps": 204665, "loss": 0.0004, "lr": 1.1514141816505992e-06, "epoch": 2.5323088950235753, "percentage": 50.65, "elapsed_time": "2:14:41", "remaining_time": "2:11:15", "throughput": 8646.37, "total_tokens": 69874600} +{"current_steps": 103660, "total_steps": 204665, "loss": 0.0003, "lr": 1.1513298867166755e-06, "epoch": 2.5324310458554224, "percentage": 50.65, "elapsed_time": "2:14:41", "remaining_time": "2:11:14", "throughput": 8646.39, "total_tokens": 69877736} +{"current_steps": 103665, "total_steps": 204665, "loss": 0.0001, "lr": 1.1512455906822398e-06, "epoch": 2.532553196687269, "percentage": 50.65, "elapsed_time": "2:14:42", "remaining_time": "2:11:14", "throughput": 8646.43, "total_tokens": 69881000} +{"current_steps": 103670, "total_steps": 204665, "loss": 0.0609, "lr": 1.1511612935479058e-06, "epoch": 2.532675347519117, "percentage": 50.65, "elapsed_time": "2:14:42", "remaining_time": "2:11:13", "throughput": 8646.48, "total_tokens": 69884456} +{"current_steps": 103675, "total_steps": 204665, "loss": 0.0002, "lr": 1.1510769953142858e-06, "epoch": 2.5327974983509636, "percentage": 50.66, "elapsed_time": "2:14:42", "remaining_time": "2:11:13", "throughput": 8646.54, "total_tokens": 69887976} +{"current_steps": 103680, "total_steps": 204665, "loss": 0.0003, "lr": 1.1509926959819936e-06, "epoch": 2.532919649182811, "percentage": 50.66, "elapsed_time": "2:14:43", "remaining_time": "2:11:13", "throughput": 8646.58, "total_tokens": 69891304} +{"current_steps": 103685, "total_steps": 204665, "loss": 0.0623, "lr": 1.1509083955516418e-06, "epoch": 2.533041800014658, "percentage": 50.66, "elapsed_time": "2:14:43", "remaining_time": "2:11:12", "throughput": 8646.63, "total_tokens": 69894696} +{"current_steps": 103690, "total_steps": 204665, "loss": 0.0379, "lr": 1.1508240940238438e-06, "epoch": 2.533163950846505, "percentage": 50.66, "elapsed_time": "2:14:43", "remaining_time": "2:11:12", "throughput": 8646.66, "total_tokens": 69897960} +{"current_steps": 103695, "total_steps": 204665, "loss": 0.0005, "lr": 1.150739791399212e-06, "epoch": 2.5332861016783523, "percentage": 50.67, "elapsed_time": "2:14:44", "remaining_time": "2:11:11", "throughput": 8646.71, "total_tokens": 69901352} +{"current_steps": 103700, "total_steps": 204665, "loss": 0.0708, "lr": 1.1506554876783604e-06, "epoch": 2.5334082525101995, "percentage": 50.67, "elapsed_time": "2:14:44", "remaining_time": "2:11:11", "throughput": 8646.76, "total_tokens": 69904744} +{"current_steps": 103705, "total_steps": 204665, "loss": 0.0433, "lr": 1.1505711828619008e-06, "epoch": 2.5335304033420467, "percentage": 50.67, "elapsed_time": "2:14:44", "remaining_time": "2:11:10", "throughput": 8646.83, "total_tokens": 69908328} +{"current_steps": 103710, "total_steps": 204665, "loss": 0.0002, "lr": 1.150486876950448e-06, "epoch": 2.533652554173894, "percentage": 50.67, "elapsed_time": "2:14:45", "remaining_time": "2:11:10", "throughput": 8646.89, "total_tokens": 69911848} +{"current_steps": 103715, "total_steps": 204665, "loss": 0.0379, "lr": 1.1504025699446136e-06, "epoch": 2.533774705005741, "percentage": 50.68, "elapsed_time": "2:14:45", "remaining_time": "2:11:09", "throughput": 8646.91, "total_tokens": 69914984} +{"current_steps": 103720, "total_steps": 204665, "loss": 0.04, "lr": 1.1503182618450114e-06, "epoch": 2.5338968558375883, "percentage": 50.68, "elapsed_time": "2:14:45", "remaining_time": "2:11:09", "throughput": 8646.96, "total_tokens": 69918440} +{"current_steps": 103725, "total_steps": 204665, "loss": 0.103, "lr": 1.1502339526522545e-06, "epoch": 2.5340190066694355, "percentage": 50.68, "elapsed_time": "2:14:46", "remaining_time": "2:11:09", "throughput": 8646.97, "total_tokens": 69921448} +{"current_steps": 103730, "total_steps": 204665, "loss": 0.1077, "lr": 1.1501496423669557e-06, "epoch": 2.5341411575012827, "percentage": 50.68, "elapsed_time": "2:14:46", "remaining_time": "2:11:08", "throughput": 8647.01, "total_tokens": 69924776} +{"current_steps": 103735, "total_steps": 204665, "loss": 0.0513, "lr": 1.1500653309897282e-06, "epoch": 2.53426330833313, "percentage": 50.69, "elapsed_time": "2:14:46", "remaining_time": "2:11:08", "throughput": 8647.03, "total_tokens": 69927912} +{"current_steps": 103740, "total_steps": 204665, "loss": 0.1221, "lr": 1.1499810185211853e-06, "epoch": 2.534385459164977, "percentage": 50.69, "elapsed_time": "2:14:47", "remaining_time": "2:11:07", "throughput": 8647.06, "total_tokens": 69931112} +{"current_steps": 103745, "total_steps": 204665, "loss": 0.0003, "lr": 1.14989670496194e-06, "epoch": 2.5345076099968242, "percentage": 50.69, "elapsed_time": "2:14:47", "remaining_time": "2:11:07", "throughput": 8647.14, "total_tokens": 69934760} +{"current_steps": 103750, "total_steps": 204665, "loss": 0.0005, "lr": 1.149812390312606e-06, "epoch": 2.534629760828671, "percentage": 50.69, "elapsed_time": "2:14:47", "remaining_time": "2:11:06", "throughput": 8647.22, "total_tokens": 69938472} +{"current_steps": 103755, "total_steps": 204665, "loss": 0.0005, "lr": 1.1497280745737955e-06, "epoch": 2.5347519116605186, "percentage": 50.7, "elapsed_time": "2:14:48", "remaining_time": "2:11:06", "throughput": 8647.23, "total_tokens": 69941544} +{"current_steps": 103760, "total_steps": 204665, "loss": 0.0577, "lr": 1.1496437577461227e-06, "epoch": 2.5348740624923654, "percentage": 50.7, "elapsed_time": "2:14:48", "remaining_time": "2:11:06", "throughput": 8647.26, "total_tokens": 69944744} +{"current_steps": 103765, "total_steps": 204665, "loss": 0.0896, "lr": 1.1495594398301998e-06, "epoch": 2.534996213324213, "percentage": 50.7, "elapsed_time": "2:14:49", "remaining_time": "2:11:05", "throughput": 8647.29, "total_tokens": 69948008} +{"current_steps": 103770, "total_steps": 204665, "loss": 0.0024, "lr": 1.1494751208266408e-06, "epoch": 2.5351183641560597, "percentage": 50.7, "elapsed_time": "2:14:49", "remaining_time": "2:11:05", "throughput": 8647.32, "total_tokens": 69951144} +{"current_steps": 103775, "total_steps": 204665, "loss": 0.0001, "lr": 1.1493908007360581e-06, "epoch": 2.535240514987907, "percentage": 50.7, "elapsed_time": "2:14:49", "remaining_time": "2:11:04", "throughput": 8647.44, "total_tokens": 69955304} +{"current_steps": 103780, "total_steps": 204665, "loss": 0.0611, "lr": 1.1493064795590655e-06, "epoch": 2.535362665819754, "percentage": 50.71, "elapsed_time": "2:14:50", "remaining_time": "2:11:04", "throughput": 8647.49, "total_tokens": 69958696} +{"current_steps": 103785, "total_steps": 204665, "loss": 0.0002, "lr": 1.1492221572962762e-06, "epoch": 2.5354848166516013, "percentage": 50.71, "elapsed_time": "2:14:50", "remaining_time": "2:11:03", "throughput": 8647.53, "total_tokens": 69962024} +{"current_steps": 103790, "total_steps": 204665, "loss": 0.1421, "lr": 1.1491378339483028e-06, "epoch": 2.5356069674834485, "percentage": 50.71, "elapsed_time": "2:14:50", "remaining_time": "2:11:03", "throughput": 8647.58, "total_tokens": 69965480} +{"current_steps": 103795, "total_steps": 204665, "loss": 0.0547, "lr": 1.1490535095157594e-06, "epoch": 2.5357291183152957, "percentage": 50.71, "elapsed_time": "2:14:51", "remaining_time": "2:11:03", "throughput": 8647.62, "total_tokens": 69968808} +{"current_steps": 103800, "total_steps": 204665, "loss": 0.1093, "lr": 1.1489691839992584e-06, "epoch": 2.535851269147143, "percentage": 50.72, "elapsed_time": "2:14:51", "remaining_time": "2:11:02", "throughput": 8647.66, "total_tokens": 69972072} +{"current_steps": 103805, "total_steps": 204665, "loss": 0.0117, "lr": 1.1488848573994137e-06, "epoch": 2.53597341997899, "percentage": 50.72, "elapsed_time": "2:14:51", "remaining_time": "2:11:02", "throughput": 8647.72, "total_tokens": 69975592} +{"current_steps": 103810, "total_steps": 204665, "loss": 0.0939, "lr": 1.148800529716838e-06, "epoch": 2.5360955708108373, "percentage": 50.72, "elapsed_time": "2:14:52", "remaining_time": "2:11:01", "throughput": 8647.75, "total_tokens": 69978792} +{"current_steps": 103815, "total_steps": 204665, "loss": 0.0549, "lr": 1.1487162009521453e-06, "epoch": 2.5362177216426844, "percentage": 50.72, "elapsed_time": "2:14:52", "remaining_time": "2:11:01", "throughput": 8647.88, "total_tokens": 69983080} +{"current_steps": 103820, "total_steps": 204665, "loss": 0.0003, "lr": 1.1486318711059481e-06, "epoch": 2.5363398724745316, "percentage": 50.73, "elapsed_time": "2:14:52", "remaining_time": "2:11:00", "throughput": 8647.92, "total_tokens": 69986408} +{"current_steps": 103825, "total_steps": 204665, "loss": 0.0032, "lr": 1.14854754017886e-06, "epoch": 2.536462023306379, "percentage": 50.73, "elapsed_time": "2:14:53", "remaining_time": "2:11:00", "throughput": 8647.96, "total_tokens": 69989672} +{"current_steps": 103830, "total_steps": 204665, "loss": 0.0553, "lr": 1.1484632081714941e-06, "epoch": 2.536584174138226, "percentage": 50.73, "elapsed_time": "2:14:53", "remaining_time": "2:11:00", "throughput": 8647.99, "total_tokens": 69992872} +{"current_steps": 103835, "total_steps": 204665, "loss": 0.0003, "lr": 1.148378875084464e-06, "epoch": 2.536706324970073, "percentage": 50.73, "elapsed_time": "2:14:53", "remaining_time": "2:10:59", "throughput": 8648.04, "total_tokens": 69996328} +{"current_steps": 103840, "total_steps": 204665, "loss": 0.0003, "lr": 1.1482945409183825e-06, "epoch": 2.5368284758019204, "percentage": 50.74, "elapsed_time": "2:14:54", "remaining_time": "2:10:59", "throughput": 8648.05, "total_tokens": 69999400} +{"current_steps": 103845, "total_steps": 204665, "loss": 0.0552, "lr": 1.1482102056738636e-06, "epoch": 2.536950626633767, "percentage": 50.74, "elapsed_time": "2:14:54", "remaining_time": "2:10:58", "throughput": 8648.09, "total_tokens": 70002728} +{"current_steps": 103850, "total_steps": 204665, "loss": 0.0368, "lr": 1.1481258693515202e-06, "epoch": 2.5370727774656148, "percentage": 50.74, "elapsed_time": "2:14:54", "remaining_time": "2:10:58", "throughput": 8648.21, "total_tokens": 70006824} +{"current_steps": 103855, "total_steps": 204665, "loss": 0.0001, "lr": 1.1480415319519653e-06, "epoch": 2.5371949282974615, "percentage": 50.74, "elapsed_time": "2:14:55", "remaining_time": "2:10:57", "throughput": 8648.31, "total_tokens": 70010792} +{"current_steps": 103860, "total_steps": 204665, "loss": 0.0492, "lr": 1.1479571934758128e-06, "epoch": 2.5373170791293087, "percentage": 50.75, "elapsed_time": "2:14:55", "remaining_time": "2:10:57", "throughput": 8648.35, "total_tokens": 70014120} +{"current_steps": 103865, "total_steps": 204665, "loss": 0.001, "lr": 1.147872853923676e-06, "epoch": 2.537439229961156, "percentage": 50.75, "elapsed_time": "2:14:56", "remaining_time": "2:10:57", "throughput": 8648.37, "total_tokens": 70017192} +{"current_steps": 103870, "total_steps": 204665, "loss": 0.1266, "lr": 1.1477885132961678e-06, "epoch": 2.537561380793003, "percentage": 50.75, "elapsed_time": "2:14:56", "remaining_time": "2:10:56", "throughput": 8648.48, "total_tokens": 70021288} +{"current_steps": 103875, "total_steps": 204665, "loss": 0.0002, "lr": 1.1477041715939018e-06, "epoch": 2.5376835316248503, "percentage": 50.75, "elapsed_time": "2:14:56", "remaining_time": "2:10:56", "throughput": 8648.52, "total_tokens": 70024616} +{"current_steps": 103880, "total_steps": 204665, "loss": 0.0003, "lr": 1.1476198288174912e-06, "epoch": 2.5378056824566975, "percentage": 50.76, "elapsed_time": "2:14:57", "remaining_time": "2:10:55", "throughput": 8648.54, "total_tokens": 70027752} +{"current_steps": 103885, "total_steps": 204665, "loss": 0.0002, "lr": 1.1475354849675496e-06, "epoch": 2.5379278332885447, "percentage": 50.76, "elapsed_time": "2:14:57", "remaining_time": "2:10:55", "throughput": 8648.58, "total_tokens": 70031080} +{"current_steps": 103890, "total_steps": 204665, "loss": 0.1082, "lr": 1.1474511400446903e-06, "epoch": 2.538049984120392, "percentage": 50.76, "elapsed_time": "2:14:57", "remaining_time": "2:10:54", "throughput": 8648.63, "total_tokens": 70034472} +{"current_steps": 103895, "total_steps": 204665, "loss": 0.0636, "lr": 1.1473667940495265e-06, "epoch": 2.538172134952239, "percentage": 50.76, "elapsed_time": "2:14:58", "remaining_time": "2:10:54", "throughput": 8648.67, "total_tokens": 70037800} +{"current_steps": 103900, "total_steps": 204665, "loss": 0.0506, "lr": 1.1472824469826718e-06, "epoch": 2.5382942857840862, "percentage": 50.77, "elapsed_time": "2:14:58", "remaining_time": "2:10:54", "throughput": 8648.75, "total_tokens": 70041448} +{"current_steps": 103905, "total_steps": 204665, "loss": 0.0272, "lr": 1.1471980988447397e-06, "epoch": 2.5384164366159334, "percentage": 50.77, "elapsed_time": "2:14:58", "remaining_time": "2:10:53", "throughput": 8648.78, "total_tokens": 70044712} +{"current_steps": 103910, "total_steps": 204665, "loss": 0.0003, "lr": 1.1471137496363435e-06, "epoch": 2.5385385874477806, "percentage": 50.77, "elapsed_time": "2:14:59", "remaining_time": "2:10:53", "throughput": 8648.81, "total_tokens": 70047848} +{"current_steps": 103915, "total_steps": 204665, "loss": 0.0003, "lr": 1.1470293993580961e-06, "epoch": 2.538660738279628, "percentage": 50.77, "elapsed_time": "2:14:59", "remaining_time": "2:10:52", "throughput": 8648.81, "total_tokens": 70050856} +{"current_steps": 103920, "total_steps": 204665, "loss": 0.1717, "lr": 1.1469450480106118e-06, "epoch": 2.538782889111475, "percentage": 50.78, "elapsed_time": "2:14:59", "remaining_time": "2:10:52", "throughput": 8648.84, "total_tokens": 70054056} +{"current_steps": 103925, "total_steps": 204665, "loss": 0.0005, "lr": 1.1468606955945034e-06, "epoch": 2.538905039943322, "percentage": 50.78, "elapsed_time": "2:15:00", "remaining_time": "2:10:51", "throughput": 8648.88, "total_tokens": 70057384} +{"current_steps": 103930, "total_steps": 204665, "loss": 0.0002, "lr": 1.1467763421103846e-06, "epoch": 2.539027190775169, "percentage": 50.78, "elapsed_time": "2:15:00", "remaining_time": "2:10:51", "throughput": 8648.89, "total_tokens": 70060328} +{"current_steps": 103935, "total_steps": 204665, "loss": 0.0002, "lr": 1.1466919875588688e-06, "epoch": 2.5391493416070166, "percentage": 50.78, "elapsed_time": "2:15:00", "remaining_time": "2:10:51", "throughput": 8648.91, "total_tokens": 70063528} +{"current_steps": 103940, "total_steps": 204665, "loss": 0.0429, "lr": 1.1466076319405693e-06, "epoch": 2.5392714924388633, "percentage": 50.79, "elapsed_time": "2:15:01", "remaining_time": "2:10:50", "throughput": 8648.95, "total_tokens": 70066856} +{"current_steps": 103945, "total_steps": 204665, "loss": 0.0001, "lr": 1.1465232752560996e-06, "epoch": 2.539393643270711, "percentage": 50.79, "elapsed_time": "2:15:01", "remaining_time": "2:10:50", "throughput": 8649.01, "total_tokens": 70070440} +{"current_steps": 103950, "total_steps": 204665, "loss": 0.0763, "lr": 1.1464389175060734e-06, "epoch": 2.5395157941025577, "percentage": 50.79, "elapsed_time": "2:15:01", "remaining_time": "2:10:49", "throughput": 8649.04, "total_tokens": 70073640} +{"current_steps": 103955, "total_steps": 204665, "loss": 0.061, "lr": 1.1463545586911036e-06, "epoch": 2.539637944934405, "percentage": 50.79, "elapsed_time": "2:15:02", "remaining_time": "2:10:49", "throughput": 8649.11, "total_tokens": 70077224} +{"current_steps": 103960, "total_steps": 204665, "loss": 0.0204, "lr": 1.1462701988118047e-06, "epoch": 2.539760095766252, "percentage": 50.8, "elapsed_time": "2:15:02", "remaining_time": "2:10:48", "throughput": 8649.19, "total_tokens": 70080872} +{"current_steps": 103965, "total_steps": 204665, "loss": 0.001, "lr": 1.146185837868789e-06, "epoch": 2.5398822465980992, "percentage": 50.8, "elapsed_time": "2:15:02", "remaining_time": "2:10:48", "throughput": 8649.19, "total_tokens": 70083816} +{"current_steps": 103970, "total_steps": 204665, "loss": 0.0478, "lr": 1.1461014758626712e-06, "epoch": 2.5400043974299464, "percentage": 50.8, "elapsed_time": "2:15:03", "remaining_time": "2:10:48", "throughput": 8649.22, "total_tokens": 70087080} +{"current_steps": 103975, "total_steps": 204665, "loss": 0.0004, "lr": 1.146017112794064e-06, "epoch": 2.5401265482617936, "percentage": 50.8, "elapsed_time": "2:15:03", "remaining_time": "2:10:47", "throughput": 8649.26, "total_tokens": 70090344} +{"current_steps": 103980, "total_steps": 204665, "loss": 0.0001, "lr": 1.1459327486635808e-06, "epoch": 2.540248699093641, "percentage": 50.8, "elapsed_time": "2:15:03", "remaining_time": "2:10:47", "throughput": 8649.28, "total_tokens": 70093480} +{"current_steps": 103985, "total_steps": 204665, "loss": 0.0005, "lr": 1.1458483834718352e-06, "epoch": 2.540370849925488, "percentage": 50.81, "elapsed_time": "2:15:04", "remaining_time": "2:10:46", "throughput": 8649.29, "total_tokens": 70096552} +{"current_steps": 103990, "total_steps": 204665, "loss": 0.0779, "lr": 1.1457640172194414e-06, "epoch": 2.540493000757335, "percentage": 50.81, "elapsed_time": "2:15:04", "remaining_time": "2:10:46", "throughput": 8649.36, "total_tokens": 70100072} +{"current_steps": 103995, "total_steps": 204665, "loss": 0.0007, "lr": 1.1456796499070123e-06, "epoch": 2.5406151515891824, "percentage": 50.81, "elapsed_time": "2:15:04", "remaining_time": "2:10:45", "throughput": 8649.38, "total_tokens": 70103208} +{"current_steps": 104000, "total_steps": 204665, "loss": 0.0004, "lr": 1.1455952815351616e-06, "epoch": 2.5407373024210296, "percentage": 50.81, "elapsed_time": "2:15:05", "remaining_time": "2:10:45", "throughput": 8649.46, "total_tokens": 70106984} +{"current_steps": 104005, "total_steps": 204665, "loss": 0.0016, "lr": 1.1455109121045028e-06, "epoch": 2.5408594532528768, "percentage": 50.82, "elapsed_time": "2:15:05", "remaining_time": "2:10:45", "throughput": 8649.48, "total_tokens": 70110056} +{"current_steps": 104010, "total_steps": 204665, "loss": 0.0786, "lr": 1.1454265416156497e-06, "epoch": 2.540981604084724, "percentage": 50.82, "elapsed_time": "2:15:06", "remaining_time": "2:10:44", "throughput": 8649.51, "total_tokens": 70113256} +{"current_steps": 104015, "total_steps": 204665, "loss": 0.1102, "lr": 1.1453421700692152e-06, "epoch": 2.541103754916571, "percentage": 50.82, "elapsed_time": "2:15:06", "remaining_time": "2:10:44", "throughput": 8649.55, "total_tokens": 70116584} +{"current_steps": 104020, "total_steps": 204665, "loss": 0.0002, "lr": 1.1452577974658139e-06, "epoch": 2.5412259057484183, "percentage": 50.82, "elapsed_time": "2:15:06", "remaining_time": "2:10:43", "throughput": 8649.63, "total_tokens": 70120296} +{"current_steps": 104025, "total_steps": 204665, "loss": 0.0001, "lr": 1.1451734238060587e-06, "epoch": 2.541348056580265, "percentage": 50.83, "elapsed_time": "2:15:07", "remaining_time": "2:10:43", "throughput": 8649.71, "total_tokens": 70124008} +{"current_steps": 104030, "total_steps": 204665, "loss": 0.0318, "lr": 1.145089049090563e-06, "epoch": 2.5414702074121127, "percentage": 50.83, "elapsed_time": "2:15:07", "remaining_time": "2:10:42", "throughput": 8649.75, "total_tokens": 70127336} +{"current_steps": 104035, "total_steps": 204665, "loss": 0.0001, "lr": 1.145004673319941e-06, "epoch": 2.5415923582439595, "percentage": 50.83, "elapsed_time": "2:15:07", "remaining_time": "2:10:42", "throughput": 8649.74, "total_tokens": 70130216} +{"current_steps": 104040, "total_steps": 204665, "loss": 0.08, "lr": 1.144920296494806e-06, "epoch": 2.5417145090758066, "percentage": 50.83, "elapsed_time": "2:15:08", "remaining_time": "2:10:41", "throughput": 8649.74, "total_tokens": 70133032} +{"current_steps": 104045, "total_steps": 204665, "loss": 0.1615, "lr": 1.1448359186157714e-06, "epoch": 2.541836659907654, "percentage": 50.84, "elapsed_time": "2:15:08", "remaining_time": "2:10:41", "throughput": 8649.76, "total_tokens": 70136168} +{"current_steps": 104050, "total_steps": 204665, "loss": 0.0461, "lr": 1.1447515396834513e-06, "epoch": 2.541958810739501, "percentage": 50.84, "elapsed_time": "2:15:08", "remaining_time": "2:10:41", "throughput": 8649.76, "total_tokens": 70139112} +{"current_steps": 104055, "total_steps": 204665, "loss": 0.0632, "lr": 1.144667159698459e-06, "epoch": 2.542080961571348, "percentage": 50.84, "elapsed_time": "2:15:09", "remaining_time": "2:10:40", "throughput": 8649.76, "total_tokens": 70142056} +{"current_steps": 104060, "total_steps": 204665, "loss": 0.0895, "lr": 1.1445827786614082e-06, "epoch": 2.5422031124031954, "percentage": 50.84, "elapsed_time": "2:15:09", "remaining_time": "2:10:40", "throughput": 8649.79, "total_tokens": 70145256} +{"current_steps": 104065, "total_steps": 204665, "loss": 0.0009, "lr": 1.1444983965729125e-06, "epoch": 2.5423252632350426, "percentage": 50.85, "elapsed_time": "2:15:09", "remaining_time": "2:10:39", "throughput": 8649.83, "total_tokens": 70148584} +{"current_steps": 104070, "total_steps": 204665, "loss": 0.0608, "lr": 1.1444140134335855e-06, "epoch": 2.54244741406689, "percentage": 50.85, "elapsed_time": "2:15:10", "remaining_time": "2:10:39", "throughput": 8649.86, "total_tokens": 70151720} +{"current_steps": 104075, "total_steps": 204665, "loss": 0.0352, "lr": 1.1443296292440412e-06, "epoch": 2.542569564898737, "percentage": 50.85, "elapsed_time": "2:15:10", "remaining_time": "2:10:38", "throughput": 8649.93, "total_tokens": 70155432} +{"current_steps": 104080, "total_steps": 204665, "loss": 0.0566, "lr": 1.1442452440048929e-06, "epoch": 2.542691715730584, "percentage": 50.85, "elapsed_time": "2:15:10", "remaining_time": "2:10:38", "throughput": 8649.94, "total_tokens": 70158440} +{"current_steps": 104085, "total_steps": 204665, "loss": 0.0275, "lr": 1.1441608577167544e-06, "epoch": 2.5428138665624314, "percentage": 50.86, "elapsed_time": "2:15:11", "remaining_time": "2:10:38", "throughput": 8650.0, "total_tokens": 70161960} +{"current_steps": 104090, "total_steps": 204665, "loss": 0.0454, "lr": 1.1440764703802394e-06, "epoch": 2.5429360173942785, "percentage": 50.86, "elapsed_time": "2:15:11", "remaining_time": "2:10:37", "throughput": 8650.03, "total_tokens": 70165160} +{"current_steps": 104095, "total_steps": 204665, "loss": 0.0004, "lr": 1.1439920819959614e-06, "epoch": 2.5430581682261257, "percentage": 50.86, "elapsed_time": "2:15:11", "remaining_time": "2:10:37", "throughput": 8650.06, "total_tokens": 70168360} +{"current_steps": 104100, "total_steps": 204665, "loss": 0.0001, "lr": 1.1439076925645347e-06, "epoch": 2.543180319057973, "percentage": 50.86, "elapsed_time": "2:15:12", "remaining_time": "2:10:36", "throughput": 8650.07, "total_tokens": 70171368} +{"current_steps": 104105, "total_steps": 204665, "loss": 0.0002, "lr": 1.143823302086572e-06, "epoch": 2.54330246988982, "percentage": 50.87, "elapsed_time": "2:15:12", "remaining_time": "2:10:36", "throughput": 8650.11, "total_tokens": 70174760} +{"current_steps": 104110, "total_steps": 204665, "loss": 0.1443, "lr": 1.1437389105626877e-06, "epoch": 2.543424620721667, "percentage": 50.87, "elapsed_time": "2:15:12", "remaining_time": "2:10:35", "throughput": 8650.15, "total_tokens": 70178088} +{"current_steps": 104115, "total_steps": 204665, "loss": 0.0003, "lr": 1.1436545179934953e-06, "epoch": 2.5435467715535145, "percentage": 50.87, "elapsed_time": "2:15:13", "remaining_time": "2:10:35", "throughput": 8650.21, "total_tokens": 70181544} +{"current_steps": 104120, "total_steps": 204665, "loss": 0.0225, "lr": 1.1435701243796088e-06, "epoch": 2.5436689223853612, "percentage": 50.87, "elapsed_time": "2:15:13", "remaining_time": "2:10:35", "throughput": 8650.3, "total_tokens": 70185320} +{"current_steps": 104125, "total_steps": 204665, "loss": 0.0005, "lr": 1.1434857297216417e-06, "epoch": 2.543791073217209, "percentage": 50.88, "elapsed_time": "2:15:13", "remaining_time": "2:10:34", "throughput": 8650.34, "total_tokens": 70188712} +{"current_steps": 104130, "total_steps": 204665, "loss": 0.0001, "lr": 1.143401334020208e-06, "epoch": 2.5439132240490556, "percentage": 50.88, "elapsed_time": "2:15:14", "remaining_time": "2:10:34", "throughput": 8650.36, "total_tokens": 70191784} +{"current_steps": 104135, "total_steps": 204665, "loss": 0.0745, "lr": 1.143316937275921e-06, "epoch": 2.544035374880903, "percentage": 50.88, "elapsed_time": "2:15:14", "remaining_time": "2:10:33", "throughput": 8650.4, "total_tokens": 70195176} +{"current_steps": 104140, "total_steps": 204665, "loss": 0.0892, "lr": 1.1432325394893946e-06, "epoch": 2.54415752571275, "percentage": 50.88, "elapsed_time": "2:15:15", "remaining_time": "2:10:33", "throughput": 8650.43, "total_tokens": 70198376} +{"current_steps": 104145, "total_steps": 204665, "loss": 0.0002, "lr": 1.1431481406612427e-06, "epoch": 2.544279676544597, "percentage": 50.89, "elapsed_time": "2:15:15", "remaining_time": "2:10:32", "throughput": 8650.46, "total_tokens": 70201640} +{"current_steps": 104150, "total_steps": 204665, "loss": 0.0331, "lr": 1.143063740792079e-06, "epoch": 2.5444018273764444, "percentage": 50.89, "elapsed_time": "2:15:15", "remaining_time": "2:10:32", "throughput": 8650.48, "total_tokens": 70204776} +{"current_steps": 104155, "total_steps": 204665, "loss": 0.0663, "lr": 1.1429793398825173e-06, "epoch": 2.5445239782082916, "percentage": 50.89, "elapsed_time": "2:15:16", "remaining_time": "2:10:32", "throughput": 8650.52, "total_tokens": 70208040} +{"current_steps": 104160, "total_steps": 204665, "loss": 0.1334, "lr": 1.1428949379331716e-06, "epoch": 2.5446461290401388, "percentage": 50.89, "elapsed_time": "2:15:16", "remaining_time": "2:10:31", "throughput": 8650.54, "total_tokens": 70211240} +{"current_steps": 104165, "total_steps": 204665, "loss": 0.0003, "lr": 1.1428105349446554e-06, "epoch": 2.544768279871986, "percentage": 50.9, "elapsed_time": "2:15:16", "remaining_time": "2:10:31", "throughput": 8650.61, "total_tokens": 70214824} +{"current_steps": 104170, "total_steps": 204665, "loss": 0.0404, "lr": 1.1427261309175821e-06, "epoch": 2.544890430703833, "percentage": 50.9, "elapsed_time": "2:15:17", "remaining_time": "2:10:30", "throughput": 8650.61, "total_tokens": 70217832} +{"current_steps": 104175, "total_steps": 204665, "loss": 0.001, "lr": 1.1426417258525667e-06, "epoch": 2.5450125815356803, "percentage": 50.9, "elapsed_time": "2:15:17", "remaining_time": "2:10:30", "throughput": 8650.71, "total_tokens": 70221672} +{"current_steps": 104180, "total_steps": 204665, "loss": 0.0196, "lr": 1.1425573197502221e-06, "epoch": 2.5451347323675275, "percentage": 50.9, "elapsed_time": "2:15:17", "remaining_time": "2:10:29", "throughput": 8650.71, "total_tokens": 70224616} +{"current_steps": 104185, "total_steps": 204665, "loss": 0.0895, "lr": 1.1424729126111623e-06, "epoch": 2.5452568831993747, "percentage": 50.91, "elapsed_time": "2:15:18", "remaining_time": "2:10:29", "throughput": 8650.75, "total_tokens": 70227944} +{"current_steps": 104190, "total_steps": 204665, "loss": 0.0384, "lr": 1.142388504436001e-06, "epoch": 2.545379034031222, "percentage": 50.91, "elapsed_time": "2:15:18", "remaining_time": "2:10:29", "throughput": 8650.81, "total_tokens": 70231400} +{"current_steps": 104195, "total_steps": 204665, "loss": 0.0854, "lr": 1.1423040952253523e-06, "epoch": 2.5455011848630686, "percentage": 50.91, "elapsed_time": "2:15:18", "remaining_time": "2:10:28", "throughput": 8650.88, "total_tokens": 70235048} +{"current_steps": 104200, "total_steps": 204665, "loss": 0.0004, "lr": 1.14221968497983e-06, "epoch": 2.5456233356949163, "percentage": 50.91, "elapsed_time": "2:15:19", "remaining_time": "2:10:28", "throughput": 8650.89, "total_tokens": 70238056} +{"current_steps": 104205, "total_steps": 204665, "loss": 0.0048, "lr": 1.1421352737000475e-06, "epoch": 2.545745486526763, "percentage": 50.91, "elapsed_time": "2:15:19", "remaining_time": "2:10:27", "throughput": 8650.91, "total_tokens": 70241256} +{"current_steps": 104210, "total_steps": 204665, "loss": 0.0939, "lr": 1.1420508613866193e-06, "epoch": 2.5458676373586107, "percentage": 50.92, "elapsed_time": "2:15:19", "remaining_time": "2:10:27", "throughput": 8650.98, "total_tokens": 70244840} +{"current_steps": 104215, "total_steps": 204665, "loss": 0.0282, "lr": 1.1419664480401592e-06, "epoch": 2.5459897881904574, "percentage": 50.92, "elapsed_time": "2:15:20", "remaining_time": "2:10:26", "throughput": 8651.01, "total_tokens": 70248040} +{"current_steps": 104220, "total_steps": 204665, "loss": 0.0002, "lr": 1.141882033661281e-06, "epoch": 2.5461119390223046, "percentage": 50.92, "elapsed_time": "2:15:20", "remaining_time": "2:10:26", "throughput": 8651.07, "total_tokens": 70251560} +{"current_steps": 104225, "total_steps": 204665, "loss": 0.0384, "lr": 1.1417976182505981e-06, "epoch": 2.546234089854152, "percentage": 50.92, "elapsed_time": "2:15:20", "remaining_time": "2:10:25", "throughput": 8651.09, "total_tokens": 70254696} +{"current_steps": 104230, "total_steps": 204665, "loss": 0.0489, "lr": 1.141713201808725e-06, "epoch": 2.546356240685999, "percentage": 50.93, "elapsed_time": "2:15:21", "remaining_time": "2:10:25", "throughput": 8651.17, "total_tokens": 70258408} +{"current_steps": 104235, "total_steps": 204665, "loss": 0.0386, "lr": 1.1416287843362753e-06, "epoch": 2.546478391517846, "percentage": 50.93, "elapsed_time": "2:15:21", "remaining_time": "2:10:25", "throughput": 8651.2, "total_tokens": 70261544} +{"current_steps": 104240, "total_steps": 204665, "loss": 0.049, "lr": 1.1415443658338632e-06, "epoch": 2.5466005423496934, "percentage": 50.93, "elapsed_time": "2:15:21", "remaining_time": "2:10:24", "throughput": 8651.3, "total_tokens": 70265512} +{"current_steps": 104245, "total_steps": 204665, "loss": 0.0002, "lr": 1.141459946302102e-06, "epoch": 2.5467226931815405, "percentage": 50.93, "elapsed_time": "2:15:22", "remaining_time": "2:10:24", "throughput": 8651.36, "total_tokens": 70269032} +{"current_steps": 104250, "total_steps": 204665, "loss": 0.0005, "lr": 1.1413755257416064e-06, "epoch": 2.5468448440133877, "percentage": 50.94, "elapsed_time": "2:15:22", "remaining_time": "2:10:23", "throughput": 8651.42, "total_tokens": 70272552} +{"current_steps": 104255, "total_steps": 204665, "loss": 0.0149, "lr": 1.14129110415299e-06, "epoch": 2.546966994845235, "percentage": 50.94, "elapsed_time": "2:15:23", "remaining_time": "2:10:23", "throughput": 8651.5, "total_tokens": 70276264} +{"current_steps": 104260, "total_steps": 204665, "loss": 0.0001, "lr": 1.1412066815368664e-06, "epoch": 2.547089145677082, "percentage": 50.94, "elapsed_time": "2:15:23", "remaining_time": "2:10:23", "throughput": 8651.6, "total_tokens": 70280232} +{"current_steps": 104265, "total_steps": 204665, "loss": 0.0857, "lr": 1.1411222578938496e-06, "epoch": 2.5472112965089293, "percentage": 50.94, "elapsed_time": "2:15:23", "remaining_time": "2:10:22", "throughput": 8651.67, "total_tokens": 70283944} +{"current_steps": 104270, "total_steps": 204665, "loss": 0.0043, "lr": 1.1410378332245542e-06, "epoch": 2.5473334473407765, "percentage": 50.95, "elapsed_time": "2:15:24", "remaining_time": "2:10:22", "throughput": 8651.68, "total_tokens": 70286888} +{"current_steps": 104275, "total_steps": 204665, "loss": 0.0002, "lr": 1.1409534075295938e-06, "epoch": 2.5474555981726237, "percentage": 50.95, "elapsed_time": "2:15:24", "remaining_time": "2:10:21", "throughput": 8651.69, "total_tokens": 70289960} +{"current_steps": 104280, "total_steps": 204665, "loss": 0.0002, "lr": 1.140868980809582e-06, "epoch": 2.547577749004471, "percentage": 50.95, "elapsed_time": "2:15:24", "remaining_time": "2:10:21", "throughput": 8651.75, "total_tokens": 70293480} +{"current_steps": 104285, "total_steps": 204665, "loss": 0.0424, "lr": 1.1407845530651336e-06, "epoch": 2.547699899836318, "percentage": 50.95, "elapsed_time": "2:15:25", "remaining_time": "2:10:20", "throughput": 8651.81, "total_tokens": 70297064} +{"current_steps": 104290, "total_steps": 204665, "loss": 0.0003, "lr": 1.140700124296862e-06, "epoch": 2.547822050668165, "percentage": 50.96, "elapsed_time": "2:15:25", "remaining_time": "2:10:20", "throughput": 8651.87, "total_tokens": 70300584} +{"current_steps": 104295, "total_steps": 204665, "loss": 0.0005, "lr": 1.1406156945053806e-06, "epoch": 2.5479442015000124, "percentage": 50.96, "elapsed_time": "2:15:25", "remaining_time": "2:10:20", "throughput": 8651.91, "total_tokens": 70303848} +{"current_steps": 104300, "total_steps": 204665, "loss": 0.0167, "lr": 1.1405312636913044e-06, "epoch": 2.548066352331859, "percentage": 50.96, "elapsed_time": "2:15:26", "remaining_time": "2:10:19", "throughput": 8651.96, "total_tokens": 70307304} +{"current_steps": 104305, "total_steps": 204665, "loss": 0.0823, "lr": 1.1404468318552471e-06, "epoch": 2.548188503163707, "percentage": 50.96, "elapsed_time": "2:15:26", "remaining_time": "2:10:19", "throughput": 8652.03, "total_tokens": 70310888} +{"current_steps": 104310, "total_steps": 204665, "loss": 0.0244, "lr": 1.140362398997823e-06, "epoch": 2.5483106539955536, "percentage": 50.97, "elapsed_time": "2:15:26", "remaining_time": "2:10:18", "throughput": 8652.11, "total_tokens": 70314664} +{"current_steps": 104315, "total_steps": 204665, "loss": 0.0003, "lr": 1.1402779651196452e-06, "epoch": 2.5484328048274008, "percentage": 50.97, "elapsed_time": "2:15:27", "remaining_time": "2:10:18", "throughput": 8652.16, "total_tokens": 70318120} +{"current_steps": 104320, "total_steps": 204665, "loss": 0.0411, "lr": 1.1401935302213286e-06, "epoch": 2.548554955659248, "percentage": 50.97, "elapsed_time": "2:15:27", "remaining_time": "2:10:17", "throughput": 8652.25, "total_tokens": 70321960} +{"current_steps": 104325, "total_steps": 204665, "loss": 0.0766, "lr": 1.1401090943034865e-06, "epoch": 2.548677106491095, "percentage": 50.97, "elapsed_time": "2:15:27", "remaining_time": "2:10:17", "throughput": 8652.29, "total_tokens": 70325288} +{"current_steps": 104330, "total_steps": 204665, "loss": 0.0956, "lr": 1.140024657366734e-06, "epoch": 2.5487992573229423, "percentage": 50.98, "elapsed_time": "2:15:28", "remaining_time": "2:10:17", "throughput": 8652.37, "total_tokens": 70329000} +{"current_steps": 104335, "total_steps": 204665, "loss": 0.0524, "lr": 1.1399402194116842e-06, "epoch": 2.5489214081547895, "percentage": 50.98, "elapsed_time": "2:15:28", "remaining_time": "2:10:16", "throughput": 8652.47, "total_tokens": 70332904} +{"current_steps": 104340, "total_steps": 204665, "loss": 0.0008, "lr": 1.1398557804389517e-06, "epoch": 2.5490435589866367, "percentage": 50.98, "elapsed_time": "2:15:28", "remaining_time": "2:10:16", "throughput": 8652.48, "total_tokens": 70335912} +{"current_steps": 104345, "total_steps": 204665, "loss": 0.0751, "lr": 1.1397713404491503e-06, "epoch": 2.549165709818484, "percentage": 50.98, "elapsed_time": "2:15:29", "remaining_time": "2:10:15", "throughput": 8652.56, "total_tokens": 70339624} +{"current_steps": 104350, "total_steps": 204665, "loss": 0.0365, "lr": 1.139686899442894e-06, "epoch": 2.549287860650331, "percentage": 50.99, "elapsed_time": "2:15:29", "remaining_time": "2:10:15", "throughput": 8652.56, "total_tokens": 70342568} +{"current_steps": 104355, "total_steps": 204665, "loss": 0.0006, "lr": 1.139602457420797e-06, "epoch": 2.5494100114821783, "percentage": 50.99, "elapsed_time": "2:15:30", "remaining_time": "2:10:14", "throughput": 8652.6, "total_tokens": 70345896} +{"current_steps": 104360, "total_steps": 204665, "loss": 0.0002, "lr": 1.1395180143834734e-06, "epoch": 2.5495321623140255, "percentage": 50.99, "elapsed_time": "2:15:30", "remaining_time": "2:10:14", "throughput": 8652.62, "total_tokens": 70349096} +{"current_steps": 104365, "total_steps": 204665, "loss": 0.0394, "lr": 1.139433570331537e-06, "epoch": 2.5496543131458727, "percentage": 50.99, "elapsed_time": "2:15:30", "remaining_time": "2:10:14", "throughput": 8652.68, "total_tokens": 70352616} +{"current_steps": 104370, "total_steps": 204665, "loss": 0.0001, "lr": 1.1393491252656025e-06, "epoch": 2.54977646397772, "percentage": 51.0, "elapsed_time": "2:15:31", "remaining_time": "2:10:13", "throughput": 8652.72, "total_tokens": 70355944} +{"current_steps": 104375, "total_steps": 204665, "loss": 0.0468, "lr": 1.1392646791862836e-06, "epoch": 2.5498986148095666, "percentage": 51.0, "elapsed_time": "2:15:31", "remaining_time": "2:10:13", "throughput": 8652.82, "total_tokens": 70359848} +{"current_steps": 104380, "total_steps": 204665, "loss": 0.0005, "lr": 1.1391802320941946e-06, "epoch": 2.5500207656414142, "percentage": 51.0, "elapsed_time": "2:15:31", "remaining_time": "2:10:12", "throughput": 8652.84, "total_tokens": 70362984} +{"current_steps": 104385, "total_steps": 204665, "loss": 0.1049, "lr": 1.1390957839899495e-06, "epoch": 2.550142916473261, "percentage": 51.0, "elapsed_time": "2:15:32", "remaining_time": "2:10:12", "throughput": 8652.86, "total_tokens": 70366184} +{"current_steps": 104390, "total_steps": 204665, "loss": 0.0002, "lr": 1.1390113348741624e-06, "epoch": 2.5502650673051086, "percentage": 51.01, "elapsed_time": "2:15:32", "remaining_time": "2:10:11", "throughput": 8652.87, "total_tokens": 70369192} +{"current_steps": 104395, "total_steps": 204665, "loss": 0.0037, "lr": 1.1389268847474476e-06, "epoch": 2.5503872181369553, "percentage": 51.01, "elapsed_time": "2:15:32", "remaining_time": "2:10:11", "throughput": 8652.89, "total_tokens": 70372328} +{"current_steps": 104400, "total_steps": 204665, "loss": 0.0006, "lr": 1.138842433610419e-06, "epoch": 2.5505093689688025, "percentage": 51.01, "elapsed_time": "2:15:33", "remaining_time": "2:10:11", "throughput": 8652.91, "total_tokens": 70375400} +{"current_steps": 104405, "total_steps": 204665, "loss": 0.1083, "lr": 1.1387579814636908e-06, "epoch": 2.5506315198006497, "percentage": 51.01, "elapsed_time": "2:15:33", "remaining_time": "2:10:10", "throughput": 8652.93, "total_tokens": 70378600} +{"current_steps": 104410, "total_steps": 204665, "loss": 0.0582, "lr": 1.1386735283078775e-06, "epoch": 2.550753670632497, "percentage": 51.02, "elapsed_time": "2:15:33", "remaining_time": "2:10:10", "throughput": 8652.94, "total_tokens": 70381608} +{"current_steps": 104415, "total_steps": 204665, "loss": 0.0683, "lr": 1.1385890741435926e-06, "epoch": 2.550875821464344, "percentage": 51.02, "elapsed_time": "2:15:34", "remaining_time": "2:10:09", "throughput": 8652.94, "total_tokens": 70384552} +{"current_steps": 104420, "total_steps": 204665, "loss": 0.0298, "lr": 1.1385046189714509e-06, "epoch": 2.5509979722961913, "percentage": 51.02, "elapsed_time": "2:15:34", "remaining_time": "2:10:09", "throughput": 8653.0, "total_tokens": 70388072} +{"current_steps": 104425, "total_steps": 204665, "loss": 0.0002, "lr": 1.1384201627920663e-06, "epoch": 2.5511201231280385, "percentage": 51.02, "elapsed_time": "2:15:34", "remaining_time": "2:10:08", "throughput": 8653.08, "total_tokens": 70391784} +{"current_steps": 104430, "total_steps": 204665, "loss": 0.0701, "lr": 1.1383357056060531e-06, "epoch": 2.5512422739598857, "percentage": 51.02, "elapsed_time": "2:15:35", "remaining_time": "2:10:08", "throughput": 8653.19, "total_tokens": 70395880} +{"current_steps": 104435, "total_steps": 204665, "loss": 0.1085, "lr": 1.1382512474140255e-06, "epoch": 2.551364424791733, "percentage": 51.03, "elapsed_time": "2:15:35", "remaining_time": "2:10:08", "throughput": 8653.24, "total_tokens": 70399272} +{"current_steps": 104440, "total_steps": 204665, "loss": 0.0453, "lr": 1.1381667882165977e-06, "epoch": 2.55148657562358, "percentage": 51.03, "elapsed_time": "2:15:35", "remaining_time": "2:10:07", "throughput": 8653.27, "total_tokens": 70402472} +{"current_steps": 104445, "total_steps": 204665, "loss": 0.0013, "lr": 1.138082328014384e-06, "epoch": 2.5516087264554272, "percentage": 51.03, "elapsed_time": "2:15:36", "remaining_time": "2:10:07", "throughput": 8653.26, "total_tokens": 70405288} +{"current_steps": 104450, "total_steps": 204665, "loss": 0.0005, "lr": 1.137997866807998e-06, "epoch": 2.5517308772872744, "percentage": 51.03, "elapsed_time": "2:15:36", "remaining_time": "2:10:06", "throughput": 8653.29, "total_tokens": 70408488} +{"current_steps": 104455, "total_steps": 204665, "loss": 0.0669, "lr": 1.1379134045980545e-06, "epoch": 2.5518530281191216, "percentage": 51.04, "elapsed_time": "2:15:36", "remaining_time": "2:10:06", "throughput": 8653.35, "total_tokens": 70412072} +{"current_steps": 104460, "total_steps": 204665, "loss": 0.0398, "lr": 1.1378289413851677e-06, "epoch": 2.551975178950969, "percentage": 51.04, "elapsed_time": "2:15:37", "remaining_time": "2:10:05", "throughput": 8653.37, "total_tokens": 70415272} +{"current_steps": 104465, "total_steps": 204665, "loss": 0.0236, "lr": 1.1377444771699519e-06, "epoch": 2.552097329782816, "percentage": 51.04, "elapsed_time": "2:15:37", "remaining_time": "2:10:05", "throughput": 8653.44, "total_tokens": 70418856} +{"current_steps": 104470, "total_steps": 204665, "loss": 0.058, "lr": 1.1376600119530211e-06, "epoch": 2.5522194806146627, "percentage": 51.04, "elapsed_time": "2:15:38", "remaining_time": "2:10:04", "throughput": 8653.46, "total_tokens": 70421992} +{"current_steps": 104475, "total_steps": 204665, "loss": 0.0004, "lr": 1.1375755457349896e-06, "epoch": 2.5523416314465104, "percentage": 51.05, "elapsed_time": "2:15:38", "remaining_time": "2:10:04", "throughput": 8653.5, "total_tokens": 70425320} +{"current_steps": 104480, "total_steps": 204665, "loss": 0.0008, "lr": 1.1374910785164717e-06, "epoch": 2.552463782278357, "percentage": 51.05, "elapsed_time": "2:15:38", "remaining_time": "2:10:04", "throughput": 8653.53, "total_tokens": 70428584} +{"current_steps": 104485, "total_steps": 204665, "loss": 0.0002, "lr": 1.1374066102980819e-06, "epoch": 2.5525859331102043, "percentage": 51.05, "elapsed_time": "2:15:39", "remaining_time": "2:10:03", "throughput": 8653.54, "total_tokens": 70431592} +{"current_steps": 104490, "total_steps": 204665, "loss": 0.0543, "lr": 1.1373221410804343e-06, "epoch": 2.5527080839420515, "percentage": 51.05, "elapsed_time": "2:15:39", "remaining_time": "2:10:03", "throughput": 8653.59, "total_tokens": 70435048} +{"current_steps": 104495, "total_steps": 204665, "loss": 0.0001, "lr": 1.1372376708641432e-06, "epoch": 2.5528302347738987, "percentage": 51.06, "elapsed_time": "2:15:39", "remaining_time": "2:10:02", "throughput": 8653.6, "total_tokens": 70438056} +{"current_steps": 104500, "total_steps": 204665, "loss": 0.0525, "lr": 1.1371531996498226e-06, "epoch": 2.552952385605746, "percentage": 51.06, "elapsed_time": "2:15:40", "remaining_time": "2:10:02", "throughput": 8653.65, "total_tokens": 70441448} +{"current_steps": 104505, "total_steps": 204665, "loss": 0.0002, "lr": 1.137068727438087e-06, "epoch": 2.553074536437593, "percentage": 51.06, "elapsed_time": "2:15:40", "remaining_time": "2:10:01", "throughput": 8653.73, "total_tokens": 70445160} +{"current_steps": 104510, "total_steps": 204665, "loss": 0.1141, "lr": 1.136984254229551e-06, "epoch": 2.5531966872694403, "percentage": 51.06, "elapsed_time": "2:15:40", "remaining_time": "2:10:01", "throughput": 8653.77, "total_tokens": 70448488} +{"current_steps": 104515, "total_steps": 204665, "loss": 0.0478, "lr": 1.1368997800248284e-06, "epoch": 2.5533188381012875, "percentage": 51.07, "elapsed_time": "2:15:41", "remaining_time": "2:10:01", "throughput": 8653.78, "total_tokens": 70451496} +{"current_steps": 104520, "total_steps": 204665, "loss": 0.0003, "lr": 1.1368153048245337e-06, "epoch": 2.5534409889331346, "percentage": 51.07, "elapsed_time": "2:15:41", "remaining_time": "2:10:00", "throughput": 8653.84, "total_tokens": 70455080} +{"current_steps": 104525, "total_steps": 204665, "loss": 0.0002, "lr": 1.1367308286292816e-06, "epoch": 2.553563139764982, "percentage": 51.07, "elapsed_time": "2:15:41", "remaining_time": "2:10:00", "throughput": 8653.86, "total_tokens": 70458152} +{"current_steps": 104530, "total_steps": 204665, "loss": 0.0514, "lr": 1.136646351439686e-06, "epoch": 2.553685290596829, "percentage": 51.07, "elapsed_time": "2:15:42", "remaining_time": "2:09:59", "throughput": 8653.88, "total_tokens": 70461352} +{"current_steps": 104535, "total_steps": 204665, "loss": 0.0001, "lr": 1.1365618732563616e-06, "epoch": 2.553807441428676, "percentage": 51.08, "elapsed_time": "2:15:42", "remaining_time": "2:09:59", "throughput": 8653.93, "total_tokens": 70464744} +{"current_steps": 104540, "total_steps": 204665, "loss": 0.0001, "lr": 1.1364773940799222e-06, "epoch": 2.5539295922605234, "percentage": 51.08, "elapsed_time": "2:15:42", "remaining_time": "2:09:58", "throughput": 8654.03, "total_tokens": 70468712} +{"current_steps": 104545, "total_steps": 204665, "loss": 0.0677, "lr": 1.1363929139109827e-06, "epoch": 2.5540517430923706, "percentage": 51.08, "elapsed_time": "2:15:43", "remaining_time": "2:09:58", "throughput": 8654.07, "total_tokens": 70472040} +{"current_steps": 104550, "total_steps": 204665, "loss": 0.0709, "lr": 1.1363084327501573e-06, "epoch": 2.554173893924218, "percentage": 51.08, "elapsed_time": "2:15:43", "remaining_time": "2:09:58", "throughput": 8654.13, "total_tokens": 70475560} +{"current_steps": 104555, "total_steps": 204665, "loss": 0.0426, "lr": 1.1362239505980602e-06, "epoch": 2.5542960447560645, "percentage": 51.09, "elapsed_time": "2:15:43", "remaining_time": "2:09:57", "throughput": 8654.15, "total_tokens": 70478696} +{"current_steps": 104560, "total_steps": 204665, "loss": 0.061, "lr": 1.1361394674553058e-06, "epoch": 2.554418195587912, "percentage": 51.09, "elapsed_time": "2:15:44", "remaining_time": "2:09:57", "throughput": 8654.18, "total_tokens": 70481960} +{"current_steps": 104565, "total_steps": 204665, "loss": 0.0904, "lr": 1.1360549833225087e-06, "epoch": 2.554540346419759, "percentage": 51.09, "elapsed_time": "2:15:44", "remaining_time": "2:09:56", "throughput": 8654.24, "total_tokens": 70485480} +{"current_steps": 104570, "total_steps": 204665, "loss": 0.0503, "lr": 1.1359704982002832e-06, "epoch": 2.5546624972516065, "percentage": 51.09, "elapsed_time": "2:15:44", "remaining_time": "2:09:56", "throughput": 8654.28, "total_tokens": 70488872} +{"current_steps": 104575, "total_steps": 204665, "loss": 0.0002, "lr": 1.1358860120892433e-06, "epoch": 2.5547846480834533, "percentage": 51.1, "elapsed_time": "2:15:45", "remaining_time": "2:09:55", "throughput": 8654.33, "total_tokens": 70492264} +{"current_steps": 104580, "total_steps": 204665, "loss": 0.0004, "lr": 1.135801524990004e-06, "epoch": 2.5549067989153005, "percentage": 51.1, "elapsed_time": "2:15:45", "remaining_time": "2:09:55", "throughput": 8654.4, "total_tokens": 70495912} +{"current_steps": 104585, "total_steps": 204665, "loss": 0.039, "lr": 1.1357170369031797e-06, "epoch": 2.5550289497471477, "percentage": 51.1, "elapsed_time": "2:15:46", "remaining_time": "2:09:55", "throughput": 8654.44, "total_tokens": 70499240} +{"current_steps": 104590, "total_steps": 204665, "loss": 0.0529, "lr": 1.1356325478293844e-06, "epoch": 2.555151100578995, "percentage": 51.1, "elapsed_time": "2:15:46", "remaining_time": "2:09:54", "throughput": 8654.46, "total_tokens": 70502376} +{"current_steps": 104595, "total_steps": 204665, "loss": 0.0002, "lr": 1.135548057769233e-06, "epoch": 2.555273251410842, "percentage": 51.11, "elapsed_time": "2:15:46", "remaining_time": "2:09:54", "throughput": 8654.52, "total_tokens": 70505960} +{"current_steps": 104600, "total_steps": 204665, "loss": 0.088, "lr": 1.1354635667233394e-06, "epoch": 2.5553954022426892, "percentage": 51.11, "elapsed_time": "2:15:47", "remaining_time": "2:09:53", "throughput": 8654.56, "total_tokens": 70509224} +{"current_steps": 104605, "total_steps": 204665, "loss": 0.0003, "lr": 1.1353790746923182e-06, "epoch": 2.5555175530745364, "percentage": 51.11, "elapsed_time": "2:15:47", "remaining_time": "2:09:53", "throughput": 8654.62, "total_tokens": 70512808} +{"current_steps": 104610, "total_steps": 204665, "loss": 0.0002, "lr": 1.1352945816767843e-06, "epoch": 2.5556397039063836, "percentage": 51.11, "elapsed_time": "2:15:47", "remaining_time": "2:09:52", "throughput": 8654.65, "total_tokens": 70516072} +{"current_steps": 104615, "total_steps": 204665, "loss": 0.0295, "lr": 1.1352100876773515e-06, "epoch": 2.555761854738231, "percentage": 51.12, "elapsed_time": "2:15:48", "remaining_time": "2:09:52", "throughput": 8654.73, "total_tokens": 70519784} +{"current_steps": 104620, "total_steps": 204665, "loss": 0.0026, "lr": 1.1351255926946348e-06, "epoch": 2.555884005570078, "percentage": 51.12, "elapsed_time": "2:15:48", "remaining_time": "2:09:52", "throughput": 8654.77, "total_tokens": 70523112} +{"current_steps": 104625, "total_steps": 204665, "loss": 0.0491, "lr": 1.1350410967292483e-06, "epoch": 2.556006156401925, "percentage": 51.12, "elapsed_time": "2:15:48", "remaining_time": "2:09:51", "throughput": 8654.82, "total_tokens": 70526568} +{"current_steps": 104630, "total_steps": 204665, "loss": 0.0004, "lr": 1.1349565997818067e-06, "epoch": 2.5561283072337724, "percentage": 51.12, "elapsed_time": "2:15:49", "remaining_time": "2:09:51", "throughput": 8654.86, "total_tokens": 70529832} +{"current_steps": 104635, "total_steps": 204665, "loss": 0.0526, "lr": 1.1348721018529243e-06, "epoch": 2.5562504580656196, "percentage": 51.13, "elapsed_time": "2:15:49", "remaining_time": "2:09:50", "throughput": 8654.87, "total_tokens": 70532904} +{"current_steps": 104640, "total_steps": 204665, "loss": 0.0003, "lr": 1.134787602943216e-06, "epoch": 2.5563726088974663, "percentage": 51.13, "elapsed_time": "2:15:49", "remaining_time": "2:09:50", "throughput": 8654.86, "total_tokens": 70535656} +{"current_steps": 104645, "total_steps": 204665, "loss": 0.0007, "lr": 1.1347031030532956e-06, "epoch": 2.556494759729314, "percentage": 51.13, "elapsed_time": "2:15:50", "remaining_time": "2:09:49", "throughput": 8654.87, "total_tokens": 70538728} +{"current_steps": 104650, "total_steps": 204665, "loss": 0.0001, "lr": 1.1346186021837782e-06, "epoch": 2.5566169105611607, "percentage": 51.13, "elapsed_time": "2:15:50", "remaining_time": "2:09:49", "throughput": 8654.95, "total_tokens": 70542440} +{"current_steps": 104655, "total_steps": 204665, "loss": 0.1421, "lr": 1.1345341003352782e-06, "epoch": 2.5567390613930083, "percentage": 51.13, "elapsed_time": "2:15:50", "remaining_time": "2:09:49", "throughput": 8655.02, "total_tokens": 70546152} +{"current_steps": 104660, "total_steps": 204665, "loss": 0.0421, "lr": 1.1344495975084098e-06, "epoch": 2.556861212224855, "percentage": 51.14, "elapsed_time": "2:15:51", "remaining_time": "2:09:48", "throughput": 8655.08, "total_tokens": 70549672} +{"current_steps": 104665, "total_steps": 204665, "loss": 0.0003, "lr": 1.134365093703788e-06, "epoch": 2.5569833630567023, "percentage": 51.14, "elapsed_time": "2:15:51", "remaining_time": "2:09:48", "throughput": 8655.19, "total_tokens": 70553768} +{"current_steps": 104670, "total_steps": 204665, "loss": 0.0027, "lr": 1.1342805889220269e-06, "epoch": 2.5571055138885495, "percentage": 51.14, "elapsed_time": "2:15:51", "remaining_time": "2:09:47", "throughput": 8655.24, "total_tokens": 70557160} +{"current_steps": 104675, "total_steps": 204665, "loss": 0.0029, "lr": 1.1341960831637414e-06, "epoch": 2.5572276647203966, "percentage": 51.14, "elapsed_time": "2:15:52", "remaining_time": "2:09:47", "throughput": 8655.27, "total_tokens": 70560360} +{"current_steps": 104680, "total_steps": 204665, "loss": 0.0108, "lr": 1.1341115764295458e-06, "epoch": 2.557349815552244, "percentage": 51.15, "elapsed_time": "2:15:52", "remaining_time": "2:09:46", "throughput": 8655.33, "total_tokens": 70563880} +{"current_steps": 104685, "total_steps": 204665, "loss": 0.0001, "lr": 1.1340270687200547e-06, "epoch": 2.557471966384091, "percentage": 51.15, "elapsed_time": "2:15:53", "remaining_time": "2:09:46", "throughput": 8655.38, "total_tokens": 70567400} +{"current_steps": 104690, "total_steps": 204665, "loss": 0.0127, "lr": 1.1339425600358827e-06, "epoch": 2.557594117215938, "percentage": 51.15, "elapsed_time": "2:15:53", "remaining_time": "2:09:46", "throughput": 8655.42, "total_tokens": 70570600} +{"current_steps": 104695, "total_steps": 204665, "loss": 0.0003, "lr": 1.1338580503776445e-06, "epoch": 2.5577162680477854, "percentage": 51.15, "elapsed_time": "2:15:53", "remaining_time": "2:09:45", "throughput": 8655.41, "total_tokens": 70573480} +{"current_steps": 104700, "total_steps": 204665, "loss": 0.0947, "lr": 1.1337735397459547e-06, "epoch": 2.5578384188796326, "percentage": 51.16, "elapsed_time": "2:15:54", "remaining_time": "2:09:45", "throughput": 8655.43, "total_tokens": 70576552} +{"current_steps": 104705, "total_steps": 204665, "loss": 0.0, "lr": 1.1336890281414275e-06, "epoch": 2.55796056971148, "percentage": 51.16, "elapsed_time": "2:15:54", "remaining_time": "2:09:44", "throughput": 8655.44, "total_tokens": 70579624} +{"current_steps": 104710, "total_steps": 204665, "loss": 0.0491, "lr": 1.133604515564678e-06, "epoch": 2.558082720543327, "percentage": 51.16, "elapsed_time": "2:15:54", "remaining_time": "2:09:44", "throughput": 8655.46, "total_tokens": 70582760} +{"current_steps": 104715, "total_steps": 204665, "loss": 0.0449, "lr": 1.13352000201632e-06, "epoch": 2.558204871375174, "percentage": 51.16, "elapsed_time": "2:15:55", "remaining_time": "2:09:43", "throughput": 8655.47, "total_tokens": 70585832} +{"current_steps": 104720, "total_steps": 204665, "loss": 0.0007, "lr": 1.133435487496969e-06, "epoch": 2.5583270222070214, "percentage": 51.17, "elapsed_time": "2:15:55", "remaining_time": "2:09:43", "throughput": 8655.51, "total_tokens": 70589096} +{"current_steps": 104725, "total_steps": 204665, "loss": 0.0004, "lr": 1.1333509720072392e-06, "epoch": 2.5584491730388685, "percentage": 51.17, "elapsed_time": "2:15:55", "remaining_time": "2:09:43", "throughput": 8655.59, "total_tokens": 70592872} +{"current_steps": 104730, "total_steps": 204665, "loss": 0.0002, "lr": 1.133266455547745e-06, "epoch": 2.5585713238707157, "percentage": 51.17, "elapsed_time": "2:15:56", "remaining_time": "2:09:42", "throughput": 8655.67, "total_tokens": 70596584} +{"current_steps": 104735, "total_steps": 204665, "loss": 0.0001, "lr": 1.1331819381191016e-06, "epoch": 2.5586934747025625, "percentage": 51.17, "elapsed_time": "2:15:56", "remaining_time": "2:09:42", "throughput": 8655.7, "total_tokens": 70599848} +{"current_steps": 104740, "total_steps": 204665, "loss": 0.1445, "lr": 1.1330974197219233e-06, "epoch": 2.55881562553441, "percentage": 51.18, "elapsed_time": "2:15:56", "remaining_time": "2:09:41", "throughput": 8655.71, "total_tokens": 70602856} +{"current_steps": 104745, "total_steps": 204665, "loss": 0.0707, "lr": 1.1330129003568247e-06, "epoch": 2.558937776366257, "percentage": 51.18, "elapsed_time": "2:15:57", "remaining_time": "2:09:41", "throughput": 8655.76, "total_tokens": 70606312} +{"current_steps": 104750, "total_steps": 204665, "loss": 0.0002, "lr": 1.1329283800244206e-06, "epoch": 2.5590599271981045, "percentage": 51.18, "elapsed_time": "2:15:57", "remaining_time": "2:09:40", "throughput": 8655.8, "total_tokens": 70609704} +{"current_steps": 104755, "total_steps": 204665, "loss": 0.0465, "lr": 1.1328438587253258e-06, "epoch": 2.5591820780299512, "percentage": 51.18, "elapsed_time": "2:15:57", "remaining_time": "2:09:40", "throughput": 8655.84, "total_tokens": 70613032} +{"current_steps": 104760, "total_steps": 204665, "loss": 0.0001, "lr": 1.132759336460154e-06, "epoch": 2.5593042288617984, "percentage": 51.19, "elapsed_time": "2:15:58", "remaining_time": "2:09:40", "throughput": 8655.87, "total_tokens": 70616232} +{"current_steps": 104765, "total_steps": 204665, "loss": 0.0001, "lr": 1.1326748132295211e-06, "epoch": 2.5594263796936456, "percentage": 51.19, "elapsed_time": "2:15:58", "remaining_time": "2:09:39", "throughput": 8655.9, "total_tokens": 70619432} +{"current_steps": 104770, "total_steps": 204665, "loss": 0.0002, "lr": 1.1325902890340414e-06, "epoch": 2.559548530525493, "percentage": 51.19, "elapsed_time": "2:15:58", "remaining_time": "2:09:39", "throughput": 8655.95, "total_tokens": 70622824} +{"current_steps": 104775, "total_steps": 204665, "loss": 0.0308, "lr": 1.1325057638743292e-06, "epoch": 2.55967068135734, "percentage": 51.19, "elapsed_time": "2:15:59", "remaining_time": "2:09:38", "throughput": 8655.97, "total_tokens": 70626024} +{"current_steps": 104780, "total_steps": 204665, "loss": 0.0441, "lr": 1.1324212377509993e-06, "epoch": 2.559792832189187, "percentage": 51.2, "elapsed_time": "2:15:59", "remaining_time": "2:09:38", "throughput": 8656.09, "total_tokens": 70630120} +{"current_steps": 104785, "total_steps": 204665, "loss": 0.0148, "lr": 1.132336710664667e-06, "epoch": 2.5599149830210344, "percentage": 51.2, "elapsed_time": "2:15:59", "remaining_time": "2:09:37", "throughput": 8656.15, "total_tokens": 70633640} +{"current_steps": 104790, "total_steps": 204665, "loss": 0.0002, "lr": 1.1322521826159464e-06, "epoch": 2.5600371338528816, "percentage": 51.2, "elapsed_time": "2:16:00", "remaining_time": "2:09:37", "throughput": 8656.21, "total_tokens": 70637160} +{"current_steps": 104795, "total_steps": 204665, "loss": 0.2187, "lr": 1.1321676536054522e-06, "epoch": 2.5601592846847288, "percentage": 51.2, "elapsed_time": "2:16:00", "remaining_time": "2:09:37", "throughput": 8656.22, "total_tokens": 70640168} +{"current_steps": 104800, "total_steps": 204665, "loss": 0.0387, "lr": 1.1320831236337994e-06, "epoch": 2.560281435516576, "percentage": 51.21, "elapsed_time": "2:16:00", "remaining_time": "2:09:36", "throughput": 8656.26, "total_tokens": 70643560} +{"current_steps": 104805, "total_steps": 204665, "loss": 0.0059, "lr": 1.1319985927016026e-06, "epoch": 2.560403586348423, "percentage": 51.21, "elapsed_time": "2:16:01", "remaining_time": "2:09:36", "throughput": 8656.36, "total_tokens": 70647464} +{"current_steps": 104810, "total_steps": 204665, "loss": 0.0004, "lr": 1.1319140608094762e-06, "epoch": 2.5605257371802703, "percentage": 51.21, "elapsed_time": "2:16:01", "remaining_time": "2:09:35", "throughput": 8656.4, "total_tokens": 70650728} +{"current_steps": 104815, "total_steps": 204665, "loss": 0.0838, "lr": 1.1318295279580357e-06, "epoch": 2.5606478880121175, "percentage": 51.21, "elapsed_time": "2:16:02", "remaining_time": "2:09:35", "throughput": 8656.39, "total_tokens": 70653544} +{"current_steps": 104820, "total_steps": 204665, "loss": 0.0363, "lr": 1.1317449941478952e-06, "epoch": 2.5607700388439643, "percentage": 51.22, "elapsed_time": "2:16:02", "remaining_time": "2:09:34", "throughput": 8656.4, "total_tokens": 70656552} +{"current_steps": 104825, "total_steps": 204665, "loss": 0.0003, "lr": 1.1316604593796695e-06, "epoch": 2.560892189675812, "percentage": 51.22, "elapsed_time": "2:16:02", "remaining_time": "2:09:34", "throughput": 8656.41, "total_tokens": 70659560} +{"current_steps": 104830, "total_steps": 204665, "loss": 0.0803, "lr": 1.1315759236539737e-06, "epoch": 2.5610143405076586, "percentage": 51.22, "elapsed_time": "2:16:03", "remaining_time": "2:09:34", "throughput": 8656.44, "total_tokens": 70662824} +{"current_steps": 104835, "total_steps": 204665, "loss": 0.0002, "lr": 1.1314913869714225e-06, "epoch": 2.5611364913395063, "percentage": 51.22, "elapsed_time": "2:16:03", "remaining_time": "2:09:33", "throughput": 8656.5, "total_tokens": 70666344} +{"current_steps": 104840, "total_steps": 204665, "loss": 0.0007, "lr": 1.1314068493326305e-06, "epoch": 2.561258642171353, "percentage": 51.23, "elapsed_time": "2:16:03", "remaining_time": "2:09:33", "throughput": 8656.54, "total_tokens": 70669672} +{"current_steps": 104845, "total_steps": 204665, "loss": 0.0153, "lr": 1.1313223107382124e-06, "epoch": 2.5613807930032, "percentage": 51.23, "elapsed_time": "2:16:04", "remaining_time": "2:09:32", "throughput": 8656.59, "total_tokens": 70673128} +{"current_steps": 104850, "total_steps": 204665, "loss": 0.013, "lr": 1.1312377711887835e-06, "epoch": 2.5615029438350474, "percentage": 51.23, "elapsed_time": "2:16:04", "remaining_time": "2:09:32", "throughput": 8656.66, "total_tokens": 70676776} +{"current_steps": 104855, "total_steps": 204665, "loss": 0.0668, "lr": 1.1311532306849579e-06, "epoch": 2.5616250946668946, "percentage": 51.23, "elapsed_time": "2:16:04", "remaining_time": "2:09:31", "throughput": 8656.69, "total_tokens": 70679976} +{"current_steps": 104860, "total_steps": 204665, "loss": 0.0443, "lr": 1.131068689227351e-06, "epoch": 2.5617472454987418, "percentage": 51.23, "elapsed_time": "2:16:05", "remaining_time": "2:09:31", "throughput": 8656.73, "total_tokens": 70683240} +{"current_steps": 104865, "total_steps": 204665, "loss": 0.0001, "lr": 1.130984146816577e-06, "epoch": 2.561869396330589, "percentage": 51.24, "elapsed_time": "2:16:05", "remaining_time": "2:09:31", "throughput": 8656.82, "total_tokens": 70687208} +{"current_steps": 104870, "total_steps": 204665, "loss": 0.0, "lr": 1.1308996034532511e-06, "epoch": 2.561991547162436, "percentage": 51.24, "elapsed_time": "2:16:05", "remaining_time": "2:09:30", "throughput": 8656.86, "total_tokens": 70690472} +{"current_steps": 104875, "total_steps": 204665, "loss": 0.0573, "lr": 1.1308150591379884e-06, "epoch": 2.5621136979942833, "percentage": 51.24, "elapsed_time": "2:16:06", "remaining_time": "2:09:30", "throughput": 8656.9, "total_tokens": 70693864} +{"current_steps": 104880, "total_steps": 204665, "loss": 0.0002, "lr": 1.1307305138714032e-06, "epoch": 2.5622358488261305, "percentage": 51.24, "elapsed_time": "2:16:06", "remaining_time": "2:09:29", "throughput": 8656.97, "total_tokens": 70697448} +{"current_steps": 104885, "total_steps": 204665, "loss": 0.0601, "lr": 1.1306459676541104e-06, "epoch": 2.5623579996579777, "percentage": 51.25, "elapsed_time": "2:16:06", "remaining_time": "2:09:29", "throughput": 8657.03, "total_tokens": 70700968} +{"current_steps": 104890, "total_steps": 204665, "loss": 0.0004, "lr": 1.130561420486725e-06, "epoch": 2.562480150489825, "percentage": 51.25, "elapsed_time": "2:16:07", "remaining_time": "2:09:28", "throughput": 8657.09, "total_tokens": 70704488} +{"current_steps": 104895, "total_steps": 204665, "loss": 0.0356, "lr": 1.1304768723698622e-06, "epoch": 2.562602301321672, "percentage": 51.25, "elapsed_time": "2:16:07", "remaining_time": "2:09:28", "throughput": 8657.15, "total_tokens": 70708008} +{"current_steps": 104900, "total_steps": 204665, "loss": 0.0384, "lr": 1.130392323304136e-06, "epoch": 2.5627244521535193, "percentage": 51.25, "elapsed_time": "2:16:07", "remaining_time": "2:09:28", "throughput": 8657.17, "total_tokens": 70711208} +{"current_steps": 104905, "total_steps": 204665, "loss": 0.0003, "lr": 1.1303077732901625e-06, "epoch": 2.5628466029853665, "percentage": 51.26, "elapsed_time": "2:16:08", "remaining_time": "2:09:27", "throughput": 8657.21, "total_tokens": 70714536} +{"current_steps": 104910, "total_steps": 204665, "loss": 0.0339, "lr": 1.1302232223285554e-06, "epoch": 2.5629687538172137, "percentage": 51.26, "elapsed_time": "2:16:08", "remaining_time": "2:09:27", "throughput": 8657.25, "total_tokens": 70717800} +{"current_steps": 104915, "total_steps": 204665, "loss": 0.0424, "lr": 1.1301386704199298e-06, "epoch": 2.5630909046490604, "percentage": 51.26, "elapsed_time": "2:16:08", "remaining_time": "2:09:26", "throughput": 8657.3, "total_tokens": 70721320} +{"current_steps": 104920, "total_steps": 204665, "loss": 0.1076, "lr": 1.1300541175649008e-06, "epoch": 2.563213055480908, "percentage": 51.26, "elapsed_time": "2:16:09", "remaining_time": "2:09:26", "throughput": 8657.32, "total_tokens": 70724328} +{"current_steps": 104925, "total_steps": 204665, "loss": 0.1206, "lr": 1.1299695637640834e-06, "epoch": 2.563335206312755, "percentage": 51.27, "elapsed_time": "2:16:09", "remaining_time": "2:09:25", "throughput": 8657.35, "total_tokens": 70727592} +{"current_steps": 104930, "total_steps": 204665, "loss": 0.0001, "lr": 1.1298850090180923e-06, "epoch": 2.563457357144602, "percentage": 51.27, "elapsed_time": "2:16:10", "remaining_time": "2:09:25", "throughput": 8657.4, "total_tokens": 70730984} +{"current_steps": 104935, "total_steps": 204665, "loss": 0.0515, "lr": 1.1298004533275426e-06, "epoch": 2.563579507976449, "percentage": 51.27, "elapsed_time": "2:16:10", "remaining_time": "2:09:25", "throughput": 8657.42, "total_tokens": 70734120} +{"current_steps": 104940, "total_steps": 204665, "loss": 0.061, "lr": 1.1297158966930492e-06, "epoch": 2.5637016588082964, "percentage": 51.27, "elapsed_time": "2:16:10", "remaining_time": "2:09:24", "throughput": 8657.49, "total_tokens": 70737768} +{"current_steps": 104945, "total_steps": 204665, "loss": 0.0008, "lr": 1.1296313391152266e-06, "epoch": 2.5638238096401436, "percentage": 51.28, "elapsed_time": "2:16:11", "remaining_time": "2:09:24", "throughput": 8657.55, "total_tokens": 70741288} +{"current_steps": 104950, "total_steps": 204665, "loss": 0.0874, "lr": 1.1295467805946902e-06, "epoch": 2.5639459604719907, "percentage": 51.28, "elapsed_time": "2:16:11", "remaining_time": "2:09:23", "throughput": 8657.58, "total_tokens": 70744552} +{"current_steps": 104955, "total_steps": 204665, "loss": 0.0529, "lr": 1.1294622211320548e-06, "epoch": 2.564068111303838, "percentage": 51.28, "elapsed_time": "2:16:11", "remaining_time": "2:09:23", "throughput": 8657.64, "total_tokens": 70748136} +{"current_steps": 104960, "total_steps": 204665, "loss": 0.0002, "lr": 1.1293776607279352e-06, "epoch": 2.564190262135685, "percentage": 51.28, "elapsed_time": "2:16:12", "remaining_time": "2:09:22", "throughput": 8657.68, "total_tokens": 70751464} +{"current_steps": 104965, "total_steps": 204665, "loss": 0.1121, "lr": 1.1292930993829465e-06, "epoch": 2.5643124129675323, "percentage": 51.29, "elapsed_time": "2:16:12", "remaining_time": "2:09:22", "throughput": 8657.72, "total_tokens": 70754792} +{"current_steps": 104970, "total_steps": 204665, "loss": 0.0725, "lr": 1.1292085370977036e-06, "epoch": 2.5644345637993795, "percentage": 51.29, "elapsed_time": "2:16:12", "remaining_time": "2:09:22", "throughput": 8657.74, "total_tokens": 70757928} +{"current_steps": 104975, "total_steps": 204665, "loss": 0.0597, "lr": 1.1291239738728214e-06, "epoch": 2.5645567146312267, "percentage": 51.29, "elapsed_time": "2:16:13", "remaining_time": "2:09:21", "throughput": 8657.74, "total_tokens": 70760808} +{"current_steps": 104980, "total_steps": 204665, "loss": 0.063, "lr": 1.129039409708915e-06, "epoch": 2.564678865463074, "percentage": 51.29, "elapsed_time": "2:16:13", "remaining_time": "2:09:21", "throughput": 8657.79, "total_tokens": 70764264} +{"current_steps": 104985, "total_steps": 204665, "loss": 0.125, "lr": 1.1289548446065993e-06, "epoch": 2.564801016294921, "percentage": 51.3, "elapsed_time": "2:16:13", "remaining_time": "2:09:20", "throughput": 8657.84, "total_tokens": 70767656} +{"current_steps": 104990, "total_steps": 204665, "loss": 0.0002, "lr": 1.1288702785664894e-06, "epoch": 2.5649231671267683, "percentage": 51.3, "elapsed_time": "2:16:14", "remaining_time": "2:09:20", "throughput": 8657.87, "total_tokens": 70770920} +{"current_steps": 104995, "total_steps": 204665, "loss": 0.0001, "lr": 1.1287857115892002e-06, "epoch": 2.5650453179586155, "percentage": 51.3, "elapsed_time": "2:16:14", "remaining_time": "2:09:19", "throughput": 8657.92, "total_tokens": 70774312} +{"current_steps": 105000, "total_steps": 204665, "loss": 0.0115, "lr": 1.1287011436753465e-06, "epoch": 2.565167468790462, "percentage": 51.3, "elapsed_time": "2:16:14", "remaining_time": "2:09:19", "throughput": 8657.93, "total_tokens": 70777320} +{"current_steps": 105005, "total_steps": 204665, "loss": 0.0479, "lr": 1.1286165748255434e-06, "epoch": 2.56528961962231, "percentage": 51.31, "elapsed_time": "2:16:15", "remaining_time": "2:09:19", "throughput": 8657.92, "total_tokens": 70780200} +{"current_steps": 105010, "total_steps": 204665, "loss": 0.0056, "lr": 1.128532005040406e-06, "epoch": 2.5654117704541566, "percentage": 51.31, "elapsed_time": "2:16:15", "remaining_time": "2:09:18", "throughput": 8657.97, "total_tokens": 70783656} +{"current_steps": 105015, "total_steps": 204665, "loss": 0.0001, "lr": 1.1284474343205494e-06, "epoch": 2.565533921286004, "percentage": 51.31, "elapsed_time": "2:16:15", "remaining_time": "2:09:18", "throughput": 8658.05, "total_tokens": 70787368} +{"current_steps": 105020, "total_steps": 204665, "loss": 0.0002, "lr": 1.1283628626665887e-06, "epoch": 2.565656072117851, "percentage": 51.31, "elapsed_time": "2:16:16", "remaining_time": "2:09:17", "throughput": 8658.08, "total_tokens": 70790696} +{"current_steps": 105025, "total_steps": 204665, "loss": 0.0017, "lr": 1.1282782900791384e-06, "epoch": 2.565778222949698, "percentage": 51.32, "elapsed_time": "2:16:16", "remaining_time": "2:09:17", "throughput": 8658.12, "total_tokens": 70794024} +{"current_steps": 105030, "total_steps": 204665, "loss": 0.0585, "lr": 1.128193716558814e-06, "epoch": 2.5659003737815453, "percentage": 51.32, "elapsed_time": "2:16:16", "remaining_time": "2:09:16", "throughput": 8658.14, "total_tokens": 70797160} +{"current_steps": 105035, "total_steps": 204665, "loss": 0.0408, "lr": 1.1281091421062306e-06, "epoch": 2.5660225246133925, "percentage": 51.32, "elapsed_time": "2:16:17", "remaining_time": "2:09:16", "throughput": 8658.19, "total_tokens": 70800616} +{"current_steps": 105040, "total_steps": 204665, "loss": 0.1486, "lr": 1.1280245667220024e-06, "epoch": 2.5661446754452397, "percentage": 51.32, "elapsed_time": "2:16:17", "remaining_time": "2:09:16", "throughput": 8658.23, "total_tokens": 70803880} +{"current_steps": 105045, "total_steps": 204665, "loss": 0.0004, "lr": 1.1279399904067457e-06, "epoch": 2.566266826277087, "percentage": 51.33, "elapsed_time": "2:16:17", "remaining_time": "2:09:15", "throughput": 8658.27, "total_tokens": 70807272} +{"current_steps": 105050, "total_steps": 204665, "loss": 0.0849, "lr": 1.127855413161075e-06, "epoch": 2.566388977108934, "percentage": 51.33, "elapsed_time": "2:16:18", "remaining_time": "2:09:15", "throughput": 8658.31, "total_tokens": 70810600} +{"current_steps": 105055, "total_steps": 204665, "loss": 0.0003, "lr": 1.1277708349856053e-06, "epoch": 2.5665111279407813, "percentage": 51.33, "elapsed_time": "2:16:18", "remaining_time": "2:09:14", "throughput": 8658.4, "total_tokens": 70814312} +{"current_steps": 105060, "total_steps": 204665, "loss": 0.0004, "lr": 1.1276862558809518e-06, "epoch": 2.5666332787726285, "percentage": 51.33, "elapsed_time": "2:16:19", "remaining_time": "2:09:14", "throughput": 8658.4, "total_tokens": 70817256} +{"current_steps": 105065, "total_steps": 204665, "loss": 0.1226, "lr": 1.127601675847729e-06, "epoch": 2.5667554296044757, "percentage": 51.34, "elapsed_time": "2:16:19", "remaining_time": "2:09:13", "throughput": 8658.42, "total_tokens": 70820328} +{"current_steps": 105070, "total_steps": 204665, "loss": 0.0547, "lr": 1.1275170948865531e-06, "epoch": 2.566877580436323, "percentage": 51.34, "elapsed_time": "2:16:19", "remaining_time": "2:09:13", "throughput": 8658.44, "total_tokens": 70823528} +{"current_steps": 105075, "total_steps": 204665, "loss": 0.0004, "lr": 1.127432512998038e-06, "epoch": 2.56699973126817, "percentage": 51.34, "elapsed_time": "2:16:20", "remaining_time": "2:09:13", "throughput": 8658.49, "total_tokens": 70826856} +{"current_steps": 105080, "total_steps": 204665, "loss": 0.0396, "lr": 1.1273479301827999e-06, "epoch": 2.5671218821000172, "percentage": 51.34, "elapsed_time": "2:16:20", "remaining_time": "2:09:12", "throughput": 8658.6, "total_tokens": 70830952} +{"current_steps": 105085, "total_steps": 204665, "loss": 0.0001, "lr": 1.1272633464414533e-06, "epoch": 2.5672440329318644, "percentage": 51.34, "elapsed_time": "2:16:20", "remaining_time": "2:09:12", "throughput": 8658.64, "total_tokens": 70834216} +{"current_steps": 105090, "total_steps": 204665, "loss": 0.0003, "lr": 1.127178761774613e-06, "epoch": 2.5673661837637116, "percentage": 51.35, "elapsed_time": "2:16:21", "remaining_time": "2:09:11", "throughput": 8658.65, "total_tokens": 70837224} +{"current_steps": 105095, "total_steps": 204665, "loss": 0.1041, "lr": 1.127094176182895e-06, "epoch": 2.5674883345955584, "percentage": 51.35, "elapsed_time": "2:16:21", "remaining_time": "2:09:11", "throughput": 8658.67, "total_tokens": 70840360} +{"current_steps": 105100, "total_steps": 204665, "loss": 0.0004, "lr": 1.1270095896669138e-06, "epoch": 2.567610485427406, "percentage": 51.35, "elapsed_time": "2:16:21", "remaining_time": "2:09:10", "throughput": 8658.74, "total_tokens": 70844008} +{"current_steps": 105105, "total_steps": 204665, "loss": 0.0003, "lr": 1.126925002227285e-06, "epoch": 2.5677326362592527, "percentage": 51.35, "elapsed_time": "2:16:22", "remaining_time": "2:09:10", "throughput": 8658.8, "total_tokens": 70847528} +{"current_steps": 105110, "total_steps": 204665, "loss": 0.0001, "lr": 1.126840413864623e-06, "epoch": 2.5678547870911, "percentage": 51.36, "elapsed_time": "2:16:22", "remaining_time": "2:09:10", "throughput": 8658.85, "total_tokens": 70850984} +{"current_steps": 105115, "total_steps": 204665, "loss": 0.0006, "lr": 1.1267558245795438e-06, "epoch": 2.567976937922947, "percentage": 51.36, "elapsed_time": "2:16:22", "remaining_time": "2:09:09", "throughput": 8658.94, "total_tokens": 70854824} +{"current_steps": 105120, "total_steps": 204665, "loss": 0.0005, "lr": 1.1266712343726617e-06, "epoch": 2.5680990887547943, "percentage": 51.36, "elapsed_time": "2:16:23", "remaining_time": "2:09:09", "throughput": 8658.99, "total_tokens": 70858216} +{"current_steps": 105125, "total_steps": 204665, "loss": 0.0001, "lr": 1.1265866432445925e-06, "epoch": 2.5682212395866415, "percentage": 51.36, "elapsed_time": "2:16:23", "remaining_time": "2:09:08", "throughput": 8659.02, "total_tokens": 70861544} +{"current_steps": 105130, "total_steps": 204665, "loss": 0.0753, "lr": 1.1265020511959514e-06, "epoch": 2.5683433904184887, "percentage": 51.37, "elapsed_time": "2:16:23", "remaining_time": "2:09:08", "throughput": 8659.04, "total_tokens": 70864616} +{"current_steps": 105135, "total_steps": 204665, "loss": 0.0005, "lr": 1.126417458227353e-06, "epoch": 2.568465541250336, "percentage": 51.37, "elapsed_time": "2:16:24", "remaining_time": "2:09:07", "throughput": 8659.04, "total_tokens": 70867496} +{"current_steps": 105140, "total_steps": 204665, "loss": 0.034, "lr": 1.126332864339413e-06, "epoch": 2.568587692082183, "percentage": 51.37, "elapsed_time": "2:16:24", "remaining_time": "2:09:07", "throughput": 8659.12, "total_tokens": 70871272} +{"current_steps": 105145, "total_steps": 204665, "loss": 0.0221, "lr": 1.1262482695327464e-06, "epoch": 2.5687098429140303, "percentage": 51.37, "elapsed_time": "2:16:24", "remaining_time": "2:09:07", "throughput": 8659.16, "total_tokens": 70874600} +{"current_steps": 105150, "total_steps": 204665, "loss": 0.0683, "lr": 1.1261636738079686e-06, "epoch": 2.5688319937458775, "percentage": 51.38, "elapsed_time": "2:16:25", "remaining_time": "2:09:06", "throughput": 8659.22, "total_tokens": 70878184} +{"current_steps": 105155, "total_steps": 204665, "loss": 0.1187, "lr": 1.1260790771656944e-06, "epoch": 2.5689541445777246, "percentage": 51.38, "elapsed_time": "2:16:25", "remaining_time": "2:09:06", "throughput": 8659.27, "total_tokens": 70881640} +{"current_steps": 105160, "total_steps": 204665, "loss": 0.0005, "lr": 1.1259944796065392e-06, "epoch": 2.569076295409572, "percentage": 51.38, "elapsed_time": "2:16:25", "remaining_time": "2:09:05", "throughput": 8659.33, "total_tokens": 70885160} +{"current_steps": 105165, "total_steps": 204665, "loss": 0.0457, "lr": 1.1259098811311184e-06, "epoch": 2.569198446241419, "percentage": 51.38, "elapsed_time": "2:16:26", "remaining_time": "2:09:05", "throughput": 8659.36, "total_tokens": 70888360} +{"current_steps": 105170, "total_steps": 204665, "loss": 0.0001, "lr": 1.1258252817400472e-06, "epoch": 2.569320597073266, "percentage": 51.39, "elapsed_time": "2:16:26", "remaining_time": "2:09:04", "throughput": 8659.35, "total_tokens": 70891176} +{"current_steps": 105175, "total_steps": 204665, "loss": 0.0005, "lr": 1.1257406814339404e-06, "epoch": 2.5694427479051134, "percentage": 51.39, "elapsed_time": "2:16:27", "remaining_time": "2:09:04", "throughput": 8659.45, "total_tokens": 70895144} +{"current_steps": 105180, "total_steps": 204665, "loss": 0.0957, "lr": 1.1256560802134138e-06, "epoch": 2.56956489873696, "percentage": 51.39, "elapsed_time": "2:16:27", "remaining_time": "2:09:04", "throughput": 8659.51, "total_tokens": 70898728} +{"current_steps": 105185, "total_steps": 204665, "loss": 0.0003, "lr": 1.1255714780790823e-06, "epoch": 2.569687049568808, "percentage": 51.39, "elapsed_time": "2:16:27", "remaining_time": "2:09:03", "throughput": 8659.54, "total_tokens": 70901928} +{"current_steps": 105190, "total_steps": 204665, "loss": 0.0618, "lr": 1.1254868750315613e-06, "epoch": 2.5698092004006545, "percentage": 51.4, "elapsed_time": "2:16:28", "remaining_time": "2:09:03", "throughput": 8659.54, "total_tokens": 70904872} +{"current_steps": 105195, "total_steps": 204665, "loss": 0.0002, "lr": 1.1254022710714662e-06, "epoch": 2.569931351232502, "percentage": 51.4, "elapsed_time": "2:16:28", "remaining_time": "2:09:02", "throughput": 8659.58, "total_tokens": 70908200} +{"current_steps": 105200, "total_steps": 204665, "loss": 0.1128, "lr": 1.1253176661994114e-06, "epoch": 2.570053502064349, "percentage": 51.4, "elapsed_time": "2:16:28", "remaining_time": "2:09:02", "throughput": 8659.56, "total_tokens": 70910888} +{"current_steps": 105205, "total_steps": 204665, "loss": 0.0522, "lr": 1.1252330604160134e-06, "epoch": 2.570175652896196, "percentage": 51.4, "elapsed_time": "2:16:29", "remaining_time": "2:09:01", "throughput": 8659.58, "total_tokens": 70913960} +{"current_steps": 105210, "total_steps": 204665, "loss": 0.0927, "lr": 1.1251484537218867e-06, "epoch": 2.5702978037280433, "percentage": 51.41, "elapsed_time": "2:16:29", "remaining_time": "2:09:01", "throughput": 8659.59, "total_tokens": 70917032} +{"current_steps": 105215, "total_steps": 204665, "loss": 0.0654, "lr": 1.125063846117647e-06, "epoch": 2.5704199545598905, "percentage": 51.41, "elapsed_time": "2:16:29", "remaining_time": "2:09:01", "throughput": 8659.66, "total_tokens": 70920616} +{"current_steps": 105220, "total_steps": 204665, "loss": 0.0005, "lr": 1.124979237603909e-06, "epoch": 2.5705421053917377, "percentage": 51.41, "elapsed_time": "2:16:30", "remaining_time": "2:09:00", "throughput": 8659.67, "total_tokens": 70923688} +{"current_steps": 105225, "total_steps": 204665, "loss": 0.0007, "lr": 1.1248946281812888e-06, "epoch": 2.570664256223585, "percentage": 51.41, "elapsed_time": "2:16:30", "remaining_time": "2:09:00", "throughput": 8659.7, "total_tokens": 70926888} +{"current_steps": 105230, "total_steps": 204665, "loss": 0.0345, "lr": 1.1248100178504008e-06, "epoch": 2.570786407055432, "percentage": 51.42, "elapsed_time": "2:16:30", "remaining_time": "2:08:59", "throughput": 8659.74, "total_tokens": 70930216} +{"current_steps": 105235, "total_steps": 204665, "loss": 0.0003, "lr": 1.124725406611861e-06, "epoch": 2.5709085578872792, "percentage": 51.42, "elapsed_time": "2:16:31", "remaining_time": "2:08:59", "throughput": 8659.82, "total_tokens": 70933928} +{"current_steps": 105240, "total_steps": 204665, "loss": 0.0002, "lr": 1.1246407944662846e-06, "epoch": 2.5710307087191264, "percentage": 51.42, "elapsed_time": "2:16:31", "remaining_time": "2:08:58", "throughput": 8659.91, "total_tokens": 70937768} +{"current_steps": 105245, "total_steps": 204665, "loss": 0.0001, "lr": 1.1245561814142865e-06, "epoch": 2.5711528595509736, "percentage": 51.42, "elapsed_time": "2:16:31", "remaining_time": "2:08:58", "throughput": 8659.9, "total_tokens": 70940584} +{"current_steps": 105250, "total_steps": 204665, "loss": 0.0001, "lr": 1.1244715674564827e-06, "epoch": 2.571275010382821, "percentage": 51.43, "elapsed_time": "2:16:32", "remaining_time": "2:08:58", "throughput": 8659.94, "total_tokens": 70943912} +{"current_steps": 105255, "total_steps": 204665, "loss": 0.0001, "lr": 1.1243869525934881e-06, "epoch": 2.571397161214668, "percentage": 51.43, "elapsed_time": "2:16:32", "remaining_time": "2:08:57", "throughput": 8659.98, "total_tokens": 70947304} +{"current_steps": 105260, "total_steps": 204665, "loss": 0.0585, "lr": 1.124302336825918e-06, "epoch": 2.571519312046515, "percentage": 51.43, "elapsed_time": "2:16:32", "remaining_time": "2:08:57", "throughput": 8660.0, "total_tokens": 70950440} +{"current_steps": 105265, "total_steps": 204665, "loss": 0.0586, "lr": 1.1242177201543881e-06, "epoch": 2.571641462878362, "percentage": 51.43, "elapsed_time": "2:16:33", "remaining_time": "2:08:56", "throughput": 8660.07, "total_tokens": 70954024} +{"current_steps": 105270, "total_steps": 204665, "loss": 0.0002, "lr": 1.1241331025795132e-06, "epoch": 2.5717636137102096, "percentage": 51.44, "elapsed_time": "2:16:33", "remaining_time": "2:08:56", "throughput": 8660.09, "total_tokens": 70957160} +{"current_steps": 105275, "total_steps": 204665, "loss": 0.0006, "lr": 1.1240484841019094e-06, "epoch": 2.5718857645420563, "percentage": 51.44, "elapsed_time": "2:16:33", "remaining_time": "2:08:55", "throughput": 8660.15, "total_tokens": 70960680} +{"current_steps": 105280, "total_steps": 204665, "loss": 0.0338, "lr": 1.1239638647221915e-06, "epoch": 2.572007915373904, "percentage": 51.44, "elapsed_time": "2:16:34", "remaining_time": "2:08:55", "throughput": 8660.18, "total_tokens": 70963944} +{"current_steps": 105285, "total_steps": 204665, "loss": 0.0267, "lr": 1.123879244440975e-06, "epoch": 2.5721300662057507, "percentage": 51.44, "elapsed_time": "2:16:34", "remaining_time": "2:08:55", "throughput": 8660.21, "total_tokens": 70967144} +{"current_steps": 105290, "total_steps": 204665, "loss": 0.0523, "lr": 1.1237946232588749e-06, "epoch": 2.572252217037598, "percentage": 51.45, "elapsed_time": "2:16:34", "remaining_time": "2:08:54", "throughput": 8660.27, "total_tokens": 70970728} +{"current_steps": 105295, "total_steps": 204665, "loss": 0.097, "lr": 1.1237100011765074e-06, "epoch": 2.572374367869445, "percentage": 51.45, "elapsed_time": "2:16:35", "remaining_time": "2:08:54", "throughput": 8660.32, "total_tokens": 70974120} +{"current_steps": 105300, "total_steps": 204665, "loss": 0.0057, "lr": 1.1236253781944873e-06, "epoch": 2.5724965187012923, "percentage": 51.45, "elapsed_time": "2:16:35", "remaining_time": "2:08:53", "throughput": 8660.37, "total_tokens": 70977576} +{"current_steps": 105305, "total_steps": 204665, "loss": 0.0995, "lr": 1.1235407543134305e-06, "epoch": 2.5726186695331394, "percentage": 51.45, "elapsed_time": "2:16:36", "remaining_time": "2:08:53", "throughput": 8660.39, "total_tokens": 70980648} +{"current_steps": 105310, "total_steps": 204665, "loss": 0.0181, "lr": 1.123456129533952e-06, "epoch": 2.5727408203649866, "percentage": 51.45, "elapsed_time": "2:16:36", "remaining_time": "2:08:52", "throughput": 8660.43, "total_tokens": 70984040} +{"current_steps": 105315, "total_steps": 204665, "loss": 0.0529, "lr": 1.1233715038566673e-06, "epoch": 2.572862971196834, "percentage": 51.46, "elapsed_time": "2:16:36", "remaining_time": "2:08:52", "throughput": 8660.46, "total_tokens": 70987240} +{"current_steps": 105320, "total_steps": 204665, "loss": 0.0002, "lr": 1.123286877282192e-06, "epoch": 2.572985122028681, "percentage": 51.46, "elapsed_time": "2:16:37", "remaining_time": "2:08:52", "throughput": 8660.52, "total_tokens": 70990760} +{"current_steps": 105325, "total_steps": 204665, "loss": 0.0819, "lr": 1.123202249811141e-06, "epoch": 2.573107272860528, "percentage": 51.46, "elapsed_time": "2:16:37", "remaining_time": "2:08:51", "throughput": 8660.55, "total_tokens": 70994024} +{"current_steps": 105330, "total_steps": 204665, "loss": 0.0822, "lr": 1.1231176214441305e-06, "epoch": 2.5732294236923754, "percentage": 51.46, "elapsed_time": "2:16:37", "remaining_time": "2:08:51", "throughput": 8660.62, "total_tokens": 70997672} +{"current_steps": 105335, "total_steps": 204665, "loss": 0.0558, "lr": 1.123032992181775e-06, "epoch": 2.5733515745242226, "percentage": 51.47, "elapsed_time": "2:16:38", "remaining_time": "2:08:50", "throughput": 8660.67, "total_tokens": 71001128} +{"current_steps": 105340, "total_steps": 204665, "loss": 0.035, "lr": 1.1229483620246912e-06, "epoch": 2.5734737253560698, "percentage": 51.47, "elapsed_time": "2:16:38", "remaining_time": "2:08:50", "throughput": 8660.71, "total_tokens": 71004456} +{"current_steps": 105345, "total_steps": 204665, "loss": 0.0001, "lr": 1.1228637309734935e-06, "epoch": 2.573595876187917, "percentage": 51.47, "elapsed_time": "2:16:38", "remaining_time": "2:08:49", "throughput": 8660.77, "total_tokens": 71008040} +{"current_steps": 105350, "total_steps": 204665, "loss": 0.0002, "lr": 1.1227790990287976e-06, "epoch": 2.573718027019764, "percentage": 51.47, "elapsed_time": "2:16:39", "remaining_time": "2:08:49", "throughput": 8660.82, "total_tokens": 71011432} +{"current_steps": 105355, "total_steps": 204665, "loss": 0.0002, "lr": 1.122694466191219e-06, "epoch": 2.5738401778516113, "percentage": 51.48, "elapsed_time": "2:16:39", "remaining_time": "2:08:49", "throughput": 8660.83, "total_tokens": 71014504} +{"current_steps": 105360, "total_steps": 204665, "loss": 0.0604, "lr": 1.1226098324613733e-06, "epoch": 2.573962328683458, "percentage": 51.48, "elapsed_time": "2:16:39", "remaining_time": "2:08:48", "throughput": 8660.83, "total_tokens": 71017384} +{"current_steps": 105365, "total_steps": 204665, "loss": 0.0862, "lr": 1.122525197839876e-06, "epoch": 2.5740844795153057, "percentage": 51.48, "elapsed_time": "2:16:40", "remaining_time": "2:08:48", "throughput": 8660.92, "total_tokens": 71021160} +{"current_steps": 105370, "total_steps": 204665, "loss": 0.0003, "lr": 1.1224405623273427e-06, "epoch": 2.5742066303471525, "percentage": 51.48, "elapsed_time": "2:16:40", "remaining_time": "2:08:47", "throughput": 8660.94, "total_tokens": 71024296} +{"current_steps": 105375, "total_steps": 204665, "loss": 0.0003, "lr": 1.1223559259243886e-06, "epoch": 2.574328781179, "percentage": 51.49, "elapsed_time": "2:16:40", "remaining_time": "2:08:47", "throughput": 8660.96, "total_tokens": 71027496} +{"current_steps": 105380, "total_steps": 204665, "loss": 0.0031, "lr": 1.1222712886316292e-06, "epoch": 2.574450932010847, "percentage": 51.49, "elapsed_time": "2:16:41", "remaining_time": "2:08:46", "throughput": 8661.0, "total_tokens": 71030760} +{"current_steps": 105385, "total_steps": 204665, "loss": 0.0002, "lr": 1.12218665044968e-06, "epoch": 2.574573082842694, "percentage": 51.49, "elapsed_time": "2:16:41", "remaining_time": "2:08:46", "throughput": 8661.07, "total_tokens": 71034472} +{"current_steps": 105390, "total_steps": 204665, "loss": 0.0005, "lr": 1.1221020113791567e-06, "epoch": 2.5746952336745412, "percentage": 51.49, "elapsed_time": "2:16:41", "remaining_time": "2:08:46", "throughput": 8661.09, "total_tokens": 71037544} +{"current_steps": 105395, "total_steps": 204665, "loss": 0.1522, "lr": 1.122017371420675e-06, "epoch": 2.5748173845063884, "percentage": 51.5, "elapsed_time": "2:16:42", "remaining_time": "2:08:45", "throughput": 8661.17, "total_tokens": 71041320} +{"current_steps": 105400, "total_steps": 204665, "loss": 0.0685, "lr": 1.1219327305748499e-06, "epoch": 2.5749395353382356, "percentage": 51.5, "elapsed_time": "2:16:42", "remaining_time": "2:08:45", "throughput": 8661.18, "total_tokens": 71044328} +{"current_steps": 105405, "total_steps": 204665, "loss": 0.0003, "lr": 1.121848088842297e-06, "epoch": 2.575061686170083, "percentage": 51.5, "elapsed_time": "2:16:42", "remaining_time": "2:08:44", "throughput": 8661.19, "total_tokens": 71047400} +{"current_steps": 105410, "total_steps": 204665, "loss": 0.0553, "lr": 1.1217634462236323e-06, "epoch": 2.57518383700193, "percentage": 51.5, "elapsed_time": "2:16:43", "remaining_time": "2:08:44", "throughput": 8661.24, "total_tokens": 71050792} +{"current_steps": 105415, "total_steps": 204665, "loss": 0.0514, "lr": 1.121678802719471e-06, "epoch": 2.575305987833777, "percentage": 51.51, "elapsed_time": "2:16:43", "remaining_time": "2:08:43", "throughput": 8661.26, "total_tokens": 71053928} +{"current_steps": 105420, "total_steps": 204665, "loss": 0.0257, "lr": 1.1215941583304288e-06, "epoch": 2.5754281386656244, "percentage": 51.51, "elapsed_time": "2:16:44", "remaining_time": "2:08:43", "throughput": 8661.31, "total_tokens": 71057384} +{"current_steps": 105425, "total_steps": 204665, "loss": 0.0033, "lr": 1.121509513057121e-06, "epoch": 2.5755502894974716, "percentage": 51.51, "elapsed_time": "2:16:44", "remaining_time": "2:08:43", "throughput": 8661.33, "total_tokens": 71060520} +{"current_steps": 105430, "total_steps": 204665, "loss": 0.0305, "lr": 1.1214248669001635e-06, "epoch": 2.5756724403293187, "percentage": 51.51, "elapsed_time": "2:16:44", "remaining_time": "2:08:42", "throughput": 8661.4, "total_tokens": 71064104} +{"current_steps": 105435, "total_steps": 204665, "loss": 0.0323, "lr": 1.1213402198601715e-06, "epoch": 2.575794591161166, "percentage": 51.52, "elapsed_time": "2:16:45", "remaining_time": "2:08:42", "throughput": 8661.4, "total_tokens": 71067112} +{"current_steps": 105440, "total_steps": 204665, "loss": 0.0435, "lr": 1.1212555719377612e-06, "epoch": 2.575916741993013, "percentage": 51.52, "elapsed_time": "2:16:45", "remaining_time": "2:08:41", "throughput": 8661.44, "total_tokens": 71070376} +{"current_steps": 105445, "total_steps": 204665, "loss": 0.0008, "lr": 1.121170923133547e-06, "epoch": 2.57603889282486, "percentage": 51.52, "elapsed_time": "2:16:45", "remaining_time": "2:08:41", "throughput": 8661.47, "total_tokens": 71073704} +{"current_steps": 105450, "total_steps": 204665, "loss": 0.0006, "lr": 1.1210862734481459e-06, "epoch": 2.5761610436567075, "percentage": 51.52, "elapsed_time": "2:16:46", "remaining_time": "2:08:40", "throughput": 8661.49, "total_tokens": 71076840} +{"current_steps": 105455, "total_steps": 204665, "loss": 0.0011, "lr": 1.1210016228821729e-06, "epoch": 2.5762831944885543, "percentage": 51.53, "elapsed_time": "2:16:46", "remaining_time": "2:08:40", "throughput": 8661.54, "total_tokens": 71080168} +{"current_steps": 105460, "total_steps": 204665, "loss": 0.0003, "lr": 1.1209169714362431e-06, "epoch": 2.576405345320402, "percentage": 51.53, "elapsed_time": "2:16:46", "remaining_time": "2:08:40", "throughput": 8661.61, "total_tokens": 71083816} +{"current_steps": 105465, "total_steps": 204665, "loss": 0.0002, "lr": 1.120832319110973e-06, "epoch": 2.5765274961522486, "percentage": 51.53, "elapsed_time": "2:16:47", "remaining_time": "2:08:39", "throughput": 8661.64, "total_tokens": 71087080} +{"current_steps": 105470, "total_steps": 204665, "loss": 0.0426, "lr": 1.1207476659069776e-06, "epoch": 2.576649646984096, "percentage": 51.53, "elapsed_time": "2:16:47", "remaining_time": "2:08:39", "throughput": 8661.7, "total_tokens": 71090664} +{"current_steps": 105475, "total_steps": 204665, "loss": 0.0008, "lr": 1.1206630118248725e-06, "epoch": 2.576771797815943, "percentage": 51.54, "elapsed_time": "2:16:47", "remaining_time": "2:08:38", "throughput": 8661.79, "total_tokens": 71094440} +{"current_steps": 105480, "total_steps": 204665, "loss": 0.0001, "lr": 1.1205783568652738e-06, "epoch": 2.57689394864779, "percentage": 51.54, "elapsed_time": "2:16:48", "remaining_time": "2:08:38", "throughput": 8661.81, "total_tokens": 71097576} +{"current_steps": 105485, "total_steps": 204665, "loss": 0.0618, "lr": 1.1204937010287968e-06, "epoch": 2.5770160994796374, "percentage": 51.54, "elapsed_time": "2:16:48", "remaining_time": "2:08:37", "throughput": 8661.84, "total_tokens": 71100840} +{"current_steps": 105490, "total_steps": 204665, "loss": 0.0001, "lr": 1.1204090443160573e-06, "epoch": 2.5771382503114846, "percentage": 51.54, "elapsed_time": "2:16:48", "remaining_time": "2:08:37", "throughput": 8661.87, "total_tokens": 71104040} +{"current_steps": 105495, "total_steps": 204665, "loss": 0.0431, "lr": 1.1203243867276706e-06, "epoch": 2.5772604011433318, "percentage": 51.55, "elapsed_time": "2:16:49", "remaining_time": "2:08:37", "throughput": 8661.93, "total_tokens": 71107624} +{"current_steps": 105500, "total_steps": 204665, "loss": 0.0615, "lr": 1.1202397282642529e-06, "epoch": 2.577382551975179, "percentage": 51.55, "elapsed_time": "2:16:49", "remaining_time": "2:08:36", "throughput": 8661.98, "total_tokens": 71111016} +{"current_steps": 105505, "total_steps": 204665, "loss": 0.0249, "lr": 1.1201550689264191e-06, "epoch": 2.577504702807026, "percentage": 51.55, "elapsed_time": "2:16:49", "remaining_time": "2:08:36", "throughput": 8662.05, "total_tokens": 71114664} +{"current_steps": 105510, "total_steps": 204665, "loss": 0.0515, "lr": 1.1200704087147854e-06, "epoch": 2.5776268536388733, "percentage": 51.55, "elapsed_time": "2:16:50", "remaining_time": "2:08:35", "throughput": 8662.11, "total_tokens": 71118248} +{"current_steps": 105515, "total_steps": 204665, "loss": 0.0328, "lr": 1.1199857476299678e-06, "epoch": 2.5777490044707205, "percentage": 51.55, "elapsed_time": "2:16:50", "remaining_time": "2:08:35", "throughput": 8662.14, "total_tokens": 71121448} +{"current_steps": 105520, "total_steps": 204665, "loss": 0.038, "lr": 1.1199010856725813e-06, "epoch": 2.5778711553025677, "percentage": 51.56, "elapsed_time": "2:16:50", "remaining_time": "2:08:34", "throughput": 8662.27, "total_tokens": 71125736} +{"current_steps": 105525, "total_steps": 204665, "loss": 0.0903, "lr": 1.119816422843242e-06, "epoch": 2.577993306134415, "percentage": 51.56, "elapsed_time": "2:16:51", "remaining_time": "2:08:34", "throughput": 8662.32, "total_tokens": 71129064} +{"current_steps": 105530, "total_steps": 204665, "loss": 0.0002, "lr": 1.1197317591425656e-06, "epoch": 2.578115456966262, "percentage": 51.56, "elapsed_time": "2:16:51", "remaining_time": "2:08:34", "throughput": 8662.35, "total_tokens": 71132328} +{"current_steps": 105535, "total_steps": 204665, "loss": 0.0002, "lr": 1.1196470945711674e-06, "epoch": 2.5782376077981093, "percentage": 51.56, "elapsed_time": "2:16:52", "remaining_time": "2:08:33", "throughput": 8662.39, "total_tokens": 71135720} +{"current_steps": 105540, "total_steps": 204665, "loss": 0.0004, "lr": 1.119562429129663e-06, "epoch": 2.578359758629956, "percentage": 51.57, "elapsed_time": "2:16:52", "remaining_time": "2:08:33", "throughput": 8662.42, "total_tokens": 71138920} +{"current_steps": 105545, "total_steps": 204665, "loss": 0.0558, "lr": 1.1194777628186686e-06, "epoch": 2.5784819094618037, "percentage": 51.57, "elapsed_time": "2:16:52", "remaining_time": "2:08:32", "throughput": 8662.45, "total_tokens": 71142184} +{"current_steps": 105550, "total_steps": 204665, "loss": 0.1206, "lr": 1.1193930956388004e-06, "epoch": 2.5786040602936504, "percentage": 51.57, "elapsed_time": "2:16:53", "remaining_time": "2:08:32", "throughput": 8662.52, "total_tokens": 71145768} +{"current_steps": 105555, "total_steps": 204665, "loss": 0.072, "lr": 1.1193084275906727e-06, "epoch": 2.5787262111254976, "percentage": 51.57, "elapsed_time": "2:16:53", "remaining_time": "2:08:31", "throughput": 8662.56, "total_tokens": 71149032} +{"current_steps": 105560, "total_steps": 204665, "loss": 0.0004, "lr": 1.1192237586749025e-06, "epoch": 2.578848361957345, "percentage": 51.58, "elapsed_time": "2:16:53", "remaining_time": "2:08:31", "throughput": 8662.58, "total_tokens": 71152232} +{"current_steps": 105565, "total_steps": 204665, "loss": 0.0338, "lr": 1.119139088892105e-06, "epoch": 2.578970512789192, "percentage": 51.58, "elapsed_time": "2:16:54", "remaining_time": "2:08:31", "throughput": 8662.66, "total_tokens": 71155944} +{"current_steps": 105570, "total_steps": 204665, "loss": 0.0276, "lr": 1.119054418242896e-06, "epoch": 2.579092663621039, "percentage": 51.58, "elapsed_time": "2:16:54", "remaining_time": "2:08:30", "throughput": 8662.7, "total_tokens": 71159272} +{"current_steps": 105575, "total_steps": 204665, "loss": 0.0489, "lr": 1.1189697467278911e-06, "epoch": 2.5792148144528864, "percentage": 51.58, "elapsed_time": "2:16:54", "remaining_time": "2:08:30", "throughput": 8662.74, "total_tokens": 71162664} +{"current_steps": 105580, "total_steps": 204665, "loss": 0.0843, "lr": 1.1188850743477062e-06, "epoch": 2.5793369652847336, "percentage": 51.59, "elapsed_time": "2:16:55", "remaining_time": "2:08:29", "throughput": 8662.79, "total_tokens": 71166056} +{"current_steps": 105585, "total_steps": 204665, "loss": 0.0413, "lr": 1.1188004011029573e-06, "epoch": 2.5794591161165807, "percentage": 51.59, "elapsed_time": "2:16:55", "remaining_time": "2:08:29", "throughput": 8662.89, "total_tokens": 71170088} +{"current_steps": 105590, "total_steps": 204665, "loss": 0.0483, "lr": 1.1187157269942598e-06, "epoch": 2.579581266948428, "percentage": 51.59, "elapsed_time": "2:16:55", "remaining_time": "2:08:28", "throughput": 8662.94, "total_tokens": 71173480} +{"current_steps": 105595, "total_steps": 204665, "loss": 0.1053, "lr": 1.1186310520222297e-06, "epoch": 2.579703417780275, "percentage": 51.59, "elapsed_time": "2:16:56", "remaining_time": "2:08:28", "throughput": 8662.99, "total_tokens": 71176936} +{"current_steps": 105600, "total_steps": 204665, "loss": 0.0391, "lr": 1.1185463761874823e-06, "epoch": 2.5798255686121223, "percentage": 51.6, "elapsed_time": "2:16:56", "remaining_time": "2:08:28", "throughput": 8663.02, "total_tokens": 71180200} +{"current_steps": 105605, "total_steps": 204665, "loss": 0.0013, "lr": 1.1184616994906341e-06, "epoch": 2.5799477194439695, "percentage": 51.6, "elapsed_time": "2:16:56", "remaining_time": "2:08:27", "throughput": 8663.05, "total_tokens": 71183400} +{"current_steps": 105610, "total_steps": 204665, "loss": 0.0414, "lr": 1.1183770219323005e-06, "epoch": 2.5800698702758167, "percentage": 51.6, "elapsed_time": "2:16:57", "remaining_time": "2:08:27", "throughput": 8663.17, "total_tokens": 71187624} +{"current_steps": 105615, "total_steps": 204665, "loss": 0.0002, "lr": 1.1182923435130972e-06, "epoch": 2.580192021107664, "percentage": 51.6, "elapsed_time": "2:16:57", "remaining_time": "2:08:26", "throughput": 8663.24, "total_tokens": 71191208} +{"current_steps": 105620, "total_steps": 204665, "loss": 0.0011, "lr": 1.1182076642336405e-06, "epoch": 2.580314171939511, "percentage": 51.61, "elapsed_time": "2:16:57", "remaining_time": "2:08:26", "throughput": 8663.31, "total_tokens": 71194920} +{"current_steps": 105625, "total_steps": 204665, "loss": 0.0513, "lr": 1.1181229840945457e-06, "epoch": 2.580436322771358, "percentage": 51.61, "elapsed_time": "2:16:58", "remaining_time": "2:08:25", "throughput": 8663.33, "total_tokens": 71197992} +{"current_steps": 105630, "total_steps": 204665, "loss": 0.06, "lr": 1.1180383030964288e-06, "epoch": 2.5805584736032054, "percentage": 51.61, "elapsed_time": "2:16:58", "remaining_time": "2:08:25", "throughput": 8663.33, "total_tokens": 71200936} +{"current_steps": 105635, "total_steps": 204665, "loss": 0.0589, "lr": 1.1179536212399057e-06, "epoch": 2.580680624435052, "percentage": 51.61, "elapsed_time": "2:16:59", "remaining_time": "2:08:25", "throughput": 8663.41, "total_tokens": 71204648} +{"current_steps": 105640, "total_steps": 204665, "loss": 0.0012, "lr": 1.1178689385255919e-06, "epoch": 2.5808027752669, "percentage": 51.62, "elapsed_time": "2:16:59", "remaining_time": "2:08:24", "throughput": 8663.46, "total_tokens": 71208104} +{"current_steps": 105645, "total_steps": 204665, "loss": 0.0709, "lr": 1.1177842549541036e-06, "epoch": 2.5809249260987466, "percentage": 51.62, "elapsed_time": "2:16:59", "remaining_time": "2:08:24", "throughput": 8663.49, "total_tokens": 71211368} +{"current_steps": 105650, "total_steps": 204665, "loss": 0.0003, "lr": 1.1176995705260566e-06, "epoch": 2.5810470769305938, "percentage": 51.62, "elapsed_time": "2:17:00", "remaining_time": "2:08:23", "throughput": 8663.51, "total_tokens": 71214568} +{"current_steps": 105655, "total_steps": 204665, "loss": 0.0699, "lr": 1.1176148852420666e-06, "epoch": 2.581169227762441, "percentage": 51.62, "elapsed_time": "2:17:00", "remaining_time": "2:08:23", "throughput": 8663.51, "total_tokens": 71217512} +{"current_steps": 105660, "total_steps": 204665, "loss": 0.0004, "lr": 1.1175301991027494e-06, "epoch": 2.581291378594288, "percentage": 51.63, "elapsed_time": "2:17:00", "remaining_time": "2:08:22", "throughput": 8663.58, "total_tokens": 71221160} +{"current_steps": 105665, "total_steps": 204665, "loss": 0.1022, "lr": 1.117445512108721e-06, "epoch": 2.5814135294261353, "percentage": 51.63, "elapsed_time": "2:17:01", "remaining_time": "2:08:22", "throughput": 8663.62, "total_tokens": 71224488} +{"current_steps": 105670, "total_steps": 204665, "loss": 0.0003, "lr": 1.1173608242605974e-06, "epoch": 2.5815356802579825, "percentage": 51.63, "elapsed_time": "2:17:01", "remaining_time": "2:08:22", "throughput": 8663.66, "total_tokens": 71227752} +{"current_steps": 105675, "total_steps": 204665, "loss": 0.0007, "lr": 1.1172761355589941e-06, "epoch": 2.5816578310898297, "percentage": 51.63, "elapsed_time": "2:17:01", "remaining_time": "2:08:21", "throughput": 8663.69, "total_tokens": 71230952} +{"current_steps": 105680, "total_steps": 204665, "loss": 0.0002, "lr": 1.1171914460045278e-06, "epoch": 2.581779981921677, "percentage": 51.64, "elapsed_time": "2:17:02", "remaining_time": "2:08:21", "throughput": 8663.73, "total_tokens": 71234280} +{"current_steps": 105685, "total_steps": 204665, "loss": 0.0385, "lr": 1.1171067555978133e-06, "epoch": 2.581902132753524, "percentage": 51.64, "elapsed_time": "2:17:02", "remaining_time": "2:08:20", "throughput": 8663.77, "total_tokens": 71237608} +{"current_steps": 105690, "total_steps": 204665, "loss": 0.0005, "lr": 1.117022064339467e-06, "epoch": 2.5820242835853713, "percentage": 51.64, "elapsed_time": "2:17:02", "remaining_time": "2:08:20", "throughput": 8663.83, "total_tokens": 71241192} +{"current_steps": 105695, "total_steps": 204665, "loss": 0.0006, "lr": 1.1169373722301044e-06, "epoch": 2.5821464344172185, "percentage": 51.64, "elapsed_time": "2:17:03", "remaining_time": "2:08:19", "throughput": 8663.97, "total_tokens": 71245544} +{"current_steps": 105700, "total_steps": 204665, "loss": 0.0001, "lr": 1.1168526792703422e-06, "epoch": 2.5822685852490657, "percentage": 51.65, "elapsed_time": "2:17:03", "remaining_time": "2:08:19", "throughput": 8663.99, "total_tokens": 71248680} +{"current_steps": 105705, "total_steps": 204665, "loss": 0.1098, "lr": 1.1167679854607962e-06, "epoch": 2.582390736080913, "percentage": 51.65, "elapsed_time": "2:17:03", "remaining_time": "2:08:19", "throughput": 8663.99, "total_tokens": 71251624} +{"current_steps": 105710, "total_steps": 204665, "loss": 0.0002, "lr": 1.1166832908020815e-06, "epoch": 2.58251288691276, "percentage": 51.65, "elapsed_time": "2:17:04", "remaining_time": "2:08:18", "throughput": 8664.05, "total_tokens": 71255144} +{"current_steps": 105715, "total_steps": 204665, "loss": 0.0002, "lr": 1.1165985952948146e-06, "epoch": 2.5826350377446072, "percentage": 51.65, "elapsed_time": "2:17:04", "remaining_time": "2:08:18", "throughput": 8664.09, "total_tokens": 71258600} +{"current_steps": 105720, "total_steps": 204665, "loss": 0.1239, "lr": 1.1165138989396114e-06, "epoch": 2.582757188576454, "percentage": 51.66, "elapsed_time": "2:17:04", "remaining_time": "2:08:17", "throughput": 8664.14, "total_tokens": 71261992} +{"current_steps": 105725, "total_steps": 204665, "loss": 0.0002, "lr": 1.116429201737088e-06, "epoch": 2.5828793394083016, "percentage": 51.66, "elapsed_time": "2:17:05", "remaining_time": "2:08:17", "throughput": 8664.17, "total_tokens": 71265256} +{"current_steps": 105730, "total_steps": 204665, "loss": 0.0001, "lr": 1.11634450368786e-06, "epoch": 2.5830014902401484, "percentage": 51.66, "elapsed_time": "2:17:05", "remaining_time": "2:08:16", "throughput": 8664.25, "total_tokens": 71268968} +{"current_steps": 105735, "total_steps": 204665, "loss": 0.0003, "lr": 1.1162598047925434e-06, "epoch": 2.5831236410719955, "percentage": 51.66, "elapsed_time": "2:17:05", "remaining_time": "2:08:16", "throughput": 8664.3, "total_tokens": 71272360} +{"current_steps": 105740, "total_steps": 204665, "loss": 0.0006, "lr": 1.1161751050517544e-06, "epoch": 2.5832457919038427, "percentage": 51.66, "elapsed_time": "2:17:06", "remaining_time": "2:08:16", "throughput": 8664.31, "total_tokens": 71275432} +{"current_steps": 105745, "total_steps": 204665, "loss": 0.0468, "lr": 1.1160904044661086e-06, "epoch": 2.58336794273569, "percentage": 51.67, "elapsed_time": "2:17:06", "remaining_time": "2:08:15", "throughput": 8664.34, "total_tokens": 71278632} +{"current_steps": 105750, "total_steps": 204665, "loss": 0.0655, "lr": 1.1160057030362221e-06, "epoch": 2.583490093567537, "percentage": 51.67, "elapsed_time": "2:17:07", "remaining_time": "2:08:15", "throughput": 8664.38, "total_tokens": 71281960} +{"current_steps": 105755, "total_steps": 204665, "loss": 0.0002, "lr": 1.115921000762711e-06, "epoch": 2.5836122443993843, "percentage": 51.67, "elapsed_time": "2:17:07", "remaining_time": "2:08:14", "throughput": 8664.41, "total_tokens": 71285288} +{"current_steps": 105760, "total_steps": 204665, "loss": 0.0257, "lr": 1.115836297646191e-06, "epoch": 2.5837343952312315, "percentage": 51.67, "elapsed_time": "2:17:07", "remaining_time": "2:08:14", "throughput": 8664.45, "total_tokens": 71288616} +{"current_steps": 105765, "total_steps": 204665, "loss": 0.062, "lr": 1.1157515936872785e-06, "epoch": 2.5838565460630787, "percentage": 51.68, "elapsed_time": "2:17:08", "remaining_time": "2:08:14", "throughput": 8664.52, "total_tokens": 71292264} +{"current_steps": 105770, "total_steps": 204665, "loss": 0.0001, "lr": 1.1156668888865895e-06, "epoch": 2.583978696894926, "percentage": 51.68, "elapsed_time": "2:17:08", "remaining_time": "2:08:13", "throughput": 8664.57, "total_tokens": 71295656} +{"current_steps": 105775, "total_steps": 204665, "loss": 0.1369, "lr": 1.1155821832447394e-06, "epoch": 2.584100847726773, "percentage": 51.68, "elapsed_time": "2:17:08", "remaining_time": "2:08:13", "throughput": 8664.63, "total_tokens": 71299240} +{"current_steps": 105780, "total_steps": 204665, "loss": 0.0002, "lr": 1.1154974767623448e-06, "epoch": 2.5842229985586203, "percentage": 51.68, "elapsed_time": "2:17:09", "remaining_time": "2:08:12", "throughput": 8664.65, "total_tokens": 71302376} +{"current_steps": 105785, "total_steps": 204665, "loss": 0.0633, "lr": 1.1154127694400215e-06, "epoch": 2.5843451493904674, "percentage": 51.69, "elapsed_time": "2:17:09", "remaining_time": "2:08:12", "throughput": 8664.69, "total_tokens": 71305704} +{"current_steps": 105790, "total_steps": 204665, "loss": 0.1287, "lr": 1.1153280612783856e-06, "epoch": 2.5844673002223146, "percentage": 51.69, "elapsed_time": "2:17:09", "remaining_time": "2:08:11", "throughput": 8664.78, "total_tokens": 71309544} +{"current_steps": 105795, "total_steps": 204665, "loss": 0.0481, "lr": 1.1152433522780526e-06, "epoch": 2.584589451054162, "percentage": 51.69, "elapsed_time": "2:17:10", "remaining_time": "2:08:11", "throughput": 8664.85, "total_tokens": 71313256} +{"current_steps": 105800, "total_steps": 204665, "loss": 0.1074, "lr": 1.1151586424396394e-06, "epoch": 2.584711601886009, "percentage": 51.69, "elapsed_time": "2:17:10", "remaining_time": "2:08:11", "throughput": 8664.89, "total_tokens": 71316520} +{"current_steps": 105805, "total_steps": 204665, "loss": 0.0003, "lr": 1.1150739317637613e-06, "epoch": 2.5848337527178558, "percentage": 51.7, "elapsed_time": "2:17:10", "remaining_time": "2:08:10", "throughput": 8664.93, "total_tokens": 71319912} +{"current_steps": 105810, "total_steps": 204665, "loss": 0.0219, "lr": 1.1149892202510347e-06, "epoch": 2.5849559035497034, "percentage": 51.7, "elapsed_time": "2:17:11", "remaining_time": "2:08:10", "throughput": 8664.95, "total_tokens": 71323048} +{"current_steps": 105815, "total_steps": 204665, "loss": 0.0004, "lr": 1.1149045079020755e-06, "epoch": 2.58507805438155, "percentage": 51.7, "elapsed_time": "2:17:11", "remaining_time": "2:08:09", "throughput": 8664.98, "total_tokens": 71326248} +{"current_steps": 105820, "total_steps": 204665, "loss": 0.049, "lr": 1.1148197947174997e-06, "epoch": 2.5852002052133978, "percentage": 51.7, "elapsed_time": "2:17:11", "remaining_time": "2:08:09", "throughput": 8665.0, "total_tokens": 71329448} +{"current_steps": 105825, "total_steps": 204665, "loss": 0.0583, "lr": 1.1147350806979237e-06, "epoch": 2.5853223560452445, "percentage": 51.71, "elapsed_time": "2:17:12", "remaining_time": "2:08:08", "throughput": 8665.06, "total_tokens": 71332968} +{"current_steps": 105830, "total_steps": 204665, "loss": 0.0449, "lr": 1.1146503658439632e-06, "epoch": 2.5854445068770917, "percentage": 51.71, "elapsed_time": "2:17:12", "remaining_time": "2:08:08", "throughput": 8665.11, "total_tokens": 71336360} +{"current_steps": 105835, "total_steps": 204665, "loss": 0.0003, "lr": 1.1145656501562346e-06, "epoch": 2.585566657708939, "percentage": 51.71, "elapsed_time": "2:17:12", "remaining_time": "2:08:08", "throughput": 8665.21, "total_tokens": 71340392} +{"current_steps": 105840, "total_steps": 204665, "loss": 0.0697, "lr": 1.1144809336353538e-06, "epoch": 2.585688808540786, "percentage": 51.71, "elapsed_time": "2:17:13", "remaining_time": "2:08:07", "throughput": 8665.24, "total_tokens": 71343592} +{"current_steps": 105845, "total_steps": 204665, "loss": 0.0399, "lr": 1.1143962162819367e-06, "epoch": 2.5858109593726333, "percentage": 51.72, "elapsed_time": "2:17:13", "remaining_time": "2:08:07", "throughput": 8665.28, "total_tokens": 71346856} +{"current_steps": 105850, "total_steps": 204665, "loss": 0.0008, "lr": 1.1143114980965993e-06, "epoch": 2.5859331102044805, "percentage": 51.72, "elapsed_time": "2:17:13", "remaining_time": "2:08:06", "throughput": 8665.27, "total_tokens": 71349672} +{"current_steps": 105855, "total_steps": 204665, "loss": 0.0481, "lr": 1.1142267790799581e-06, "epoch": 2.5860552610363277, "percentage": 51.72, "elapsed_time": "2:17:14", "remaining_time": "2:08:06", "throughput": 8665.32, "total_tokens": 71353192} +{"current_steps": 105860, "total_steps": 204665, "loss": 0.0546, "lr": 1.1141420592326292e-06, "epoch": 2.586177411868175, "percentage": 51.72, "elapsed_time": "2:17:14", "remaining_time": "2:08:05", "throughput": 8665.34, "total_tokens": 71356264} +{"current_steps": 105865, "total_steps": 204665, "loss": 0.0001, "lr": 1.1140573385552285e-06, "epoch": 2.586299562700022, "percentage": 51.73, "elapsed_time": "2:17:15", "remaining_time": "2:08:05", "throughput": 8665.42, "total_tokens": 71360040} +{"current_steps": 105870, "total_steps": 204665, "loss": 0.0003, "lr": 1.113972617048372e-06, "epoch": 2.5864217135318692, "percentage": 51.73, "elapsed_time": "2:17:15", "remaining_time": "2:08:05", "throughput": 8665.45, "total_tokens": 71363304} +{"current_steps": 105875, "total_steps": 204665, "loss": 0.038, "lr": 1.1138878947126761e-06, "epoch": 2.5865438643637164, "percentage": 51.73, "elapsed_time": "2:17:15", "remaining_time": "2:08:04", "throughput": 8665.51, "total_tokens": 71366824} +{"current_steps": 105880, "total_steps": 204665, "loss": 0.0006, "lr": 1.113803171548757e-06, "epoch": 2.5866660151955636, "percentage": 51.73, "elapsed_time": "2:17:16", "remaining_time": "2:08:04", "throughput": 8665.58, "total_tokens": 71370536} +{"current_steps": 105885, "total_steps": 204665, "loss": 0.0009, "lr": 1.1137184475572305e-06, "epoch": 2.586788166027411, "percentage": 51.74, "elapsed_time": "2:17:16", "remaining_time": "2:08:03", "throughput": 8665.58, "total_tokens": 71373480} +{"current_steps": 105890, "total_steps": 204665, "loss": 0.0489, "lr": 1.1136337227387126e-06, "epoch": 2.5869103168592575, "percentage": 51.74, "elapsed_time": "2:17:16", "remaining_time": "2:08:03", "throughput": 8665.6, "total_tokens": 71376616} +{"current_steps": 105895, "total_steps": 204665, "loss": 0.0004, "lr": 1.11354899709382e-06, "epoch": 2.587032467691105, "percentage": 51.74, "elapsed_time": "2:17:17", "remaining_time": "2:08:02", "throughput": 8665.66, "total_tokens": 71380200} +{"current_steps": 105900, "total_steps": 204665, "loss": 0.0481, "lr": 1.1134642706231685e-06, "epoch": 2.587154618522952, "percentage": 51.74, "elapsed_time": "2:17:17", "remaining_time": "2:08:02", "throughput": 8665.69, "total_tokens": 71383464} +{"current_steps": 105905, "total_steps": 204665, "loss": 0.0592, "lr": 1.1133795433273742e-06, "epoch": 2.5872767693547996, "percentage": 51.75, "elapsed_time": "2:17:17", "remaining_time": "2:08:02", "throughput": 8665.73, "total_tokens": 71386792} +{"current_steps": 105910, "total_steps": 204665, "loss": 0.0425, "lr": 1.1132948152070535e-06, "epoch": 2.5873989201866463, "percentage": 51.75, "elapsed_time": "2:17:18", "remaining_time": "2:08:01", "throughput": 8665.79, "total_tokens": 71390312} +{"current_steps": 105915, "total_steps": 204665, "loss": 0.0389, "lr": 1.1132100862628222e-06, "epoch": 2.5875210710184935, "percentage": 51.75, "elapsed_time": "2:17:18", "remaining_time": "2:08:01", "throughput": 8665.87, "total_tokens": 71394088} +{"current_steps": 105920, "total_steps": 204665, "loss": 0.0007, "lr": 1.1131253564952969e-06, "epoch": 2.5876432218503407, "percentage": 51.75, "elapsed_time": "2:17:18", "remaining_time": "2:08:00", "throughput": 8665.98, "total_tokens": 71398184} +{"current_steps": 105925, "total_steps": 204665, "loss": 0.0373, "lr": 1.1130406259050935e-06, "epoch": 2.587765372682188, "percentage": 51.76, "elapsed_time": "2:17:19", "remaining_time": "2:08:00", "throughput": 8666.02, "total_tokens": 71401448} +{"current_steps": 105930, "total_steps": 204665, "loss": 0.0506, "lr": 1.1129558944928284e-06, "epoch": 2.587887523514035, "percentage": 51.76, "elapsed_time": "2:17:19", "remaining_time": "2:07:59", "throughput": 8666.05, "total_tokens": 71404776} +{"current_steps": 105935, "total_steps": 204665, "loss": 0.1251, "lr": 1.1128711622591173e-06, "epoch": 2.5880096743458822, "percentage": 51.76, "elapsed_time": "2:17:19", "remaining_time": "2:07:59", "throughput": 8666.1, "total_tokens": 71408168} +{"current_steps": 105940, "total_steps": 204665, "loss": 0.0879, "lr": 1.1127864292045773e-06, "epoch": 2.5881318251777294, "percentage": 51.76, "elapsed_time": "2:17:20", "remaining_time": "2:07:59", "throughput": 8666.13, "total_tokens": 71411432} +{"current_steps": 105945, "total_steps": 204665, "loss": 0.0024, "lr": 1.1127016953298237e-06, "epoch": 2.5882539760095766, "percentage": 51.77, "elapsed_time": "2:17:20", "remaining_time": "2:07:58", "throughput": 8666.15, "total_tokens": 71414632} +{"current_steps": 105950, "total_steps": 204665, "loss": 0.0003, "lr": 1.112616960635473e-06, "epoch": 2.588376126841424, "percentage": 51.77, "elapsed_time": "2:17:20", "remaining_time": "2:07:58", "throughput": 8666.21, "total_tokens": 71418152} +{"current_steps": 105955, "total_steps": 204665, "loss": 0.0001, "lr": 1.1125322251221416e-06, "epoch": 2.588498277673271, "percentage": 51.77, "elapsed_time": "2:17:21", "remaining_time": "2:07:57", "throughput": 8666.22, "total_tokens": 71421224} +{"current_steps": 105960, "total_steps": 204665, "loss": 0.1095, "lr": 1.1124474887904457e-06, "epoch": 2.588620428505118, "percentage": 51.77, "elapsed_time": "2:17:21", "remaining_time": "2:07:57", "throughput": 8666.26, "total_tokens": 71424552} +{"current_steps": 105965, "total_steps": 204665, "loss": 0.0455, "lr": 1.1123627516410013e-06, "epoch": 2.5887425793369654, "percentage": 51.77, "elapsed_time": "2:17:22", "remaining_time": "2:07:56", "throughput": 8666.28, "total_tokens": 71427688} +{"current_steps": 105970, "total_steps": 204665, "loss": 0.0001, "lr": 1.1122780136744247e-06, "epoch": 2.5888647301688126, "percentage": 51.78, "elapsed_time": "2:17:22", "remaining_time": "2:07:56", "throughput": 8666.32, "total_tokens": 71431080} +{"current_steps": 105975, "total_steps": 204665, "loss": 0.0223, "lr": 1.1121932748913318e-06, "epoch": 2.5889868810006598, "percentage": 51.78, "elapsed_time": "2:17:22", "remaining_time": "2:07:56", "throughput": 8666.37, "total_tokens": 71434536} +{"current_steps": 105980, "total_steps": 204665, "loss": 0.0356, "lr": 1.11210853529234e-06, "epoch": 2.589109031832507, "percentage": 51.78, "elapsed_time": "2:17:23", "remaining_time": "2:07:55", "throughput": 8666.43, "total_tokens": 71438120} +{"current_steps": 105985, "total_steps": 204665, "loss": 0.0387, "lr": 1.1120237948780642e-06, "epoch": 2.5892311826643537, "percentage": 51.78, "elapsed_time": "2:17:23", "remaining_time": "2:07:55", "throughput": 8666.46, "total_tokens": 71441384} +{"current_steps": 105990, "total_steps": 204665, "loss": 0.0381, "lr": 1.1119390536491218e-06, "epoch": 2.5893533334962013, "percentage": 51.79, "elapsed_time": "2:17:23", "remaining_time": "2:07:54", "throughput": 8666.47, "total_tokens": 71444392} +{"current_steps": 105995, "total_steps": 204665, "loss": 0.0539, "lr": 1.1118543116061282e-06, "epoch": 2.589475484328048, "percentage": 51.79, "elapsed_time": "2:17:24", "remaining_time": "2:07:54", "throughput": 8666.49, "total_tokens": 71447464} +{"current_steps": 106000, "total_steps": 204665, "loss": 0.0538, "lr": 1.1117695687497e-06, "epoch": 2.5895976351598953, "percentage": 51.79, "elapsed_time": "2:17:24", "remaining_time": "2:07:53", "throughput": 8666.57, "total_tokens": 71451240} +{"current_steps": 106005, "total_steps": 204665, "loss": 0.0004, "lr": 1.1116848250804529e-06, "epoch": 2.5897197859917425, "percentage": 51.79, "elapsed_time": "2:17:24", "remaining_time": "2:07:53", "throughput": 8666.58, "total_tokens": 71454376} +{"current_steps": 106010, "total_steps": 204665, "loss": 0.0004, "lr": 1.1116000805990043e-06, "epoch": 2.5898419368235897, "percentage": 51.8, "elapsed_time": "2:17:25", "remaining_time": "2:07:53", "throughput": 8666.61, "total_tokens": 71457576} +{"current_steps": 106015, "total_steps": 204665, "loss": 0.0003, "lr": 1.1115153353059698e-06, "epoch": 2.589964087655437, "percentage": 51.8, "elapsed_time": "2:17:25", "remaining_time": "2:07:52", "throughput": 8666.66, "total_tokens": 71461032} +{"current_steps": 106020, "total_steps": 204665, "loss": 0.0524, "lr": 1.1114305892019656e-06, "epoch": 2.590086238487284, "percentage": 51.8, "elapsed_time": "2:17:25", "remaining_time": "2:07:52", "throughput": 8666.74, "total_tokens": 71464744} +{"current_steps": 106025, "total_steps": 204665, "loss": 0.0001, "lr": 1.1113458422876085e-06, "epoch": 2.590208389319131, "percentage": 51.8, "elapsed_time": "2:17:26", "remaining_time": "2:07:51", "throughput": 8666.85, "total_tokens": 71468840} +{"current_steps": 106030, "total_steps": 204665, "loss": 0.001, "lr": 1.1112610945635145e-06, "epoch": 2.5903305401509784, "percentage": 51.81, "elapsed_time": "2:17:26", "remaining_time": "2:07:51", "throughput": 8666.93, "total_tokens": 71472552} +{"current_steps": 106035, "total_steps": 204665, "loss": 0.0379, "lr": 1.1111763460302994e-06, "epoch": 2.5904526909828256, "percentage": 51.81, "elapsed_time": "2:17:26", "remaining_time": "2:07:50", "throughput": 8666.93, "total_tokens": 71475496} +{"current_steps": 106040, "total_steps": 204665, "loss": 0.0496, "lr": 1.1110915966885805e-06, "epoch": 2.590574841814673, "percentage": 51.81, "elapsed_time": "2:17:27", "remaining_time": "2:07:50", "throughput": 8666.97, "total_tokens": 71478888} +{"current_steps": 106045, "total_steps": 204665, "loss": 0.1119, "lr": 1.1110068465389735e-06, "epoch": 2.59069699264652, "percentage": 51.81, "elapsed_time": "2:17:27", "remaining_time": "2:07:50", "throughput": 8667.05, "total_tokens": 71482664} +{"current_steps": 106050, "total_steps": 204665, "loss": 0.0002, "lr": 1.1109220955820948e-06, "epoch": 2.590819143478367, "percentage": 51.82, "elapsed_time": "2:17:27", "remaining_time": "2:07:49", "throughput": 8667.09, "total_tokens": 71485992} +{"current_steps": 106055, "total_steps": 204665, "loss": 0.0002, "lr": 1.1108373438185608e-06, "epoch": 2.5909412943102144, "percentage": 51.82, "elapsed_time": "2:17:28", "remaining_time": "2:07:49", "throughput": 8667.13, "total_tokens": 71489320} +{"current_steps": 106060, "total_steps": 204665, "loss": 0.001, "lr": 1.1107525912489878e-06, "epoch": 2.5910634451420615, "percentage": 51.82, "elapsed_time": "2:17:28", "remaining_time": "2:07:48", "throughput": 8667.16, "total_tokens": 71492648} +{"current_steps": 106065, "total_steps": 204665, "loss": 0.0004, "lr": 1.1106678378739922e-06, "epoch": 2.5911855959739087, "percentage": 51.82, "elapsed_time": "2:17:29", "remaining_time": "2:07:48", "throughput": 8667.16, "total_tokens": 71495592} +{"current_steps": 106070, "total_steps": 204665, "loss": 0.0003, "lr": 1.11058308369419e-06, "epoch": 2.5913077468057555, "percentage": 51.83, "elapsed_time": "2:17:29", "remaining_time": "2:07:48", "throughput": 8667.19, "total_tokens": 71498856} +{"current_steps": 106075, "total_steps": 204665, "loss": 0.0003, "lr": 1.1104983287101982e-06, "epoch": 2.591429897637603, "percentage": 51.83, "elapsed_time": "2:17:29", "remaining_time": "2:07:47", "throughput": 8667.24, "total_tokens": 71502248} +{"current_steps": 106080, "total_steps": 204665, "loss": 0.0001, "lr": 1.1104135729226329e-06, "epoch": 2.59155204846945, "percentage": 51.83, "elapsed_time": "2:17:30", "remaining_time": "2:07:47", "throughput": 8667.27, "total_tokens": 71505512} +{"current_steps": 106085, "total_steps": 204665, "loss": 0.0002, "lr": 1.1103288163321103e-06, "epoch": 2.5916741993012975, "percentage": 51.83, "elapsed_time": "2:17:30", "remaining_time": "2:07:46", "throughput": 8667.28, "total_tokens": 71508584} +{"current_steps": 106090, "total_steps": 204665, "loss": 0.0564, "lr": 1.1102440589392468e-06, "epoch": 2.5917963501331442, "percentage": 51.84, "elapsed_time": "2:17:30", "remaining_time": "2:07:46", "throughput": 8667.3, "total_tokens": 71511720} +{"current_steps": 106095, "total_steps": 204665, "loss": 0.0, "lr": 1.110159300744659e-06, "epoch": 2.5919185009649914, "percentage": 51.84, "elapsed_time": "2:17:31", "remaining_time": "2:07:45", "throughput": 8667.34, "total_tokens": 71515112} +{"current_steps": 106100, "total_steps": 204665, "loss": 0.0367, "lr": 1.1100745417489629e-06, "epoch": 2.5920406517968386, "percentage": 51.84, "elapsed_time": "2:17:31", "remaining_time": "2:07:45", "throughput": 8667.39, "total_tokens": 71518568} +{"current_steps": 106105, "total_steps": 204665, "loss": 0.0002, "lr": 1.1099897819527755e-06, "epoch": 2.592162802628686, "percentage": 51.84, "elapsed_time": "2:17:31", "remaining_time": "2:07:45", "throughput": 8667.44, "total_tokens": 71521960} +{"current_steps": 106110, "total_steps": 204665, "loss": 0.1, "lr": 1.1099050213567127e-06, "epoch": 2.592284953460533, "percentage": 51.85, "elapsed_time": "2:17:32", "remaining_time": "2:07:44", "throughput": 8667.45, "total_tokens": 71525032} +{"current_steps": 106115, "total_steps": 204665, "loss": 0.0616, "lr": 1.109820259961391e-06, "epoch": 2.59240710429238, "percentage": 51.85, "elapsed_time": "2:17:32", "remaining_time": "2:07:44", "throughput": 8667.46, "total_tokens": 71528040} +{"current_steps": 106120, "total_steps": 204665, "loss": 0.034, "lr": 1.1097354977674267e-06, "epoch": 2.5925292551242274, "percentage": 51.85, "elapsed_time": "2:17:32", "remaining_time": "2:07:43", "throughput": 8667.53, "total_tokens": 71531688} +{"current_steps": 106125, "total_steps": 204665, "loss": 0.0806, "lr": 1.1096507347754364e-06, "epoch": 2.5926514059560746, "percentage": 51.85, "elapsed_time": "2:17:33", "remaining_time": "2:07:43", "throughput": 8667.58, "total_tokens": 71535144} +{"current_steps": 106130, "total_steps": 204665, "loss": 0.0007, "lr": 1.1095659709860363e-06, "epoch": 2.5927735567879218, "percentage": 51.86, "elapsed_time": "2:17:33", "remaining_time": "2:07:42", "throughput": 8667.59, "total_tokens": 71538152} +{"current_steps": 106135, "total_steps": 204665, "loss": 0.0003, "lr": 1.1094812063998431e-06, "epoch": 2.592895707619769, "percentage": 51.86, "elapsed_time": "2:17:33", "remaining_time": "2:07:42", "throughput": 8667.66, "total_tokens": 71541800} +{"current_steps": 106140, "total_steps": 204665, "loss": 0.0434, "lr": 1.1093964410174733e-06, "epoch": 2.593017858451616, "percentage": 51.86, "elapsed_time": "2:17:34", "remaining_time": "2:07:42", "throughput": 8667.7, "total_tokens": 71545064} +{"current_steps": 106145, "total_steps": 204665, "loss": 0.0001, "lr": 1.1093116748395432e-06, "epoch": 2.5931400092834633, "percentage": 51.86, "elapsed_time": "2:17:34", "remaining_time": "2:07:41", "throughput": 8667.74, "total_tokens": 71548392} +{"current_steps": 106150, "total_steps": 204665, "loss": 0.0001, "lr": 1.1092269078666689e-06, "epoch": 2.5932621601153105, "percentage": 51.87, "elapsed_time": "2:17:34", "remaining_time": "2:07:41", "throughput": 8667.82, "total_tokens": 71552168} +{"current_steps": 106155, "total_steps": 204665, "loss": 0.0002, "lr": 1.1091421400994674e-06, "epoch": 2.5933843109471577, "percentage": 51.87, "elapsed_time": "2:17:35", "remaining_time": "2:07:40", "throughput": 8667.89, "total_tokens": 71555880} +{"current_steps": 106160, "total_steps": 204665, "loss": 0.0002, "lr": 1.1090573715385547e-06, "epoch": 2.593506461779005, "percentage": 51.87, "elapsed_time": "2:17:35", "remaining_time": "2:07:40", "throughput": 8667.92, "total_tokens": 71559080} +{"current_steps": 106165, "total_steps": 204665, "loss": 0.0005, "lr": 1.1089726021845475e-06, "epoch": 2.5936286126108516, "percentage": 51.87, "elapsed_time": "2:17:35", "remaining_time": "2:07:39", "throughput": 8667.98, "total_tokens": 71562664} +{"current_steps": 106170, "total_steps": 204665, "loss": 0.1754, "lr": 1.1088878320380623e-06, "epoch": 2.5937507634426993, "percentage": 51.88, "elapsed_time": "2:17:36", "remaining_time": "2:07:39", "throughput": 8668.0, "total_tokens": 71565736} +{"current_steps": 106175, "total_steps": 204665, "loss": 0.0001, "lr": 1.1088030610997155e-06, "epoch": 2.593872914274546, "percentage": 51.88, "elapsed_time": "2:17:36", "remaining_time": "2:07:39", "throughput": 8668.02, "total_tokens": 71568872} +{"current_steps": 106180, "total_steps": 204665, "loss": 0.0001, "lr": 1.1087182893701234e-06, "epoch": 2.593995065106393, "percentage": 51.88, "elapsed_time": "2:17:37", "remaining_time": "2:07:38", "throughput": 8668.03, "total_tokens": 71571944} +{"current_steps": 106185, "total_steps": 204665, "loss": 0.0002, "lr": 1.108633516849903e-06, "epoch": 2.5941172159382404, "percentage": 51.88, "elapsed_time": "2:17:37", "remaining_time": "2:07:38", "throughput": 8668.04, "total_tokens": 71575016} +{"current_steps": 106190, "total_steps": 204665, "loss": 0.0885, "lr": 1.1085487435396703e-06, "epoch": 2.5942393667700876, "percentage": 51.88, "elapsed_time": "2:17:37", "remaining_time": "2:07:37", "throughput": 8668.11, "total_tokens": 71578600} +{"current_steps": 106195, "total_steps": 204665, "loss": 0.1151, "lr": 1.108463969440042e-06, "epoch": 2.594361517601935, "percentage": 51.89, "elapsed_time": "2:17:38", "remaining_time": "2:07:37", "throughput": 8668.16, "total_tokens": 71582120} +{"current_steps": 106200, "total_steps": 204665, "loss": 0.0001, "lr": 1.1083791945516344e-06, "epoch": 2.594483668433782, "percentage": 51.89, "elapsed_time": "2:17:38", "remaining_time": "2:07:36", "throughput": 8668.22, "total_tokens": 71585640} +{"current_steps": 106205, "total_steps": 204665, "loss": 0.0824, "lr": 1.108294418875064e-06, "epoch": 2.594605819265629, "percentage": 51.89, "elapsed_time": "2:17:38", "remaining_time": "2:07:36", "throughput": 8668.23, "total_tokens": 71588648} +{"current_steps": 106210, "total_steps": 204665, "loss": 0.0405, "lr": 1.1082096424109476e-06, "epoch": 2.5947279700974764, "percentage": 51.89, "elapsed_time": "2:17:39", "remaining_time": "2:07:36", "throughput": 8668.29, "total_tokens": 71592232} +{"current_steps": 106215, "total_steps": 204665, "loss": 0.0002, "lr": 1.1081248651599017e-06, "epoch": 2.5948501209293235, "percentage": 51.9, "elapsed_time": "2:17:39", "remaining_time": "2:07:35", "throughput": 8668.34, "total_tokens": 71595624} +{"current_steps": 106220, "total_steps": 204665, "loss": 0.0711, "lr": 1.1080400871225429e-06, "epoch": 2.5949722717611707, "percentage": 51.9, "elapsed_time": "2:17:39", "remaining_time": "2:07:35", "throughput": 8668.34, "total_tokens": 71598632} +{"current_steps": 106225, "total_steps": 204665, "loss": 0.0373, "lr": 1.1079553082994868e-06, "epoch": 2.595094422593018, "percentage": 51.9, "elapsed_time": "2:17:40", "remaining_time": "2:07:34", "throughput": 8668.39, "total_tokens": 71602024} +{"current_steps": 106230, "total_steps": 204665, "loss": 0.0002, "lr": 1.1078705286913513e-06, "epoch": 2.595216573424865, "percentage": 51.9, "elapsed_time": "2:17:40", "remaining_time": "2:07:34", "throughput": 8668.42, "total_tokens": 71605224} +{"current_steps": 106235, "total_steps": 204665, "loss": 0.001, "lr": 1.107785748298752e-06, "epoch": 2.5953387242567123, "percentage": 51.91, "elapsed_time": "2:17:40", "remaining_time": "2:07:33", "throughput": 8668.41, "total_tokens": 71608104} +{"current_steps": 106240, "total_steps": 204665, "loss": 0.0735, "lr": 1.1077009671223059e-06, "epoch": 2.5954608750885595, "percentage": 51.91, "elapsed_time": "2:17:41", "remaining_time": "2:07:33", "throughput": 8668.44, "total_tokens": 71611368} +{"current_steps": 106245, "total_steps": 204665, "loss": 0.0959, "lr": 1.1076161851626294e-06, "epoch": 2.5955830259204067, "percentage": 51.91, "elapsed_time": "2:17:41", "remaining_time": "2:07:33", "throughput": 8668.52, "total_tokens": 71615080} +{"current_steps": 106250, "total_steps": 204665, "loss": 0.0983, "lr": 1.107531402420339e-06, "epoch": 2.5957051767522534, "percentage": 51.91, "elapsed_time": "2:17:41", "remaining_time": "2:07:32", "throughput": 8668.63, "total_tokens": 71619176} +{"current_steps": 106255, "total_steps": 204665, "loss": 0.0505, "lr": 1.1074466188960515e-06, "epoch": 2.595827327584101, "percentage": 51.92, "elapsed_time": "2:17:42", "remaining_time": "2:07:32", "throughput": 8668.67, "total_tokens": 71622568} +{"current_steps": 106260, "total_steps": 204665, "loss": 0.0746, "lr": 1.107361834590383e-06, "epoch": 2.595949478415948, "percentage": 51.92, "elapsed_time": "2:17:42", "remaining_time": "2:07:31", "throughput": 8668.7, "total_tokens": 71625832} +{"current_steps": 106265, "total_steps": 204665, "loss": 0.0001, "lr": 1.1072770495039506e-06, "epoch": 2.5960716292477954, "percentage": 51.92, "elapsed_time": "2:17:42", "remaining_time": "2:07:31", "throughput": 8668.76, "total_tokens": 71629352} +{"current_steps": 106270, "total_steps": 204665, "loss": 0.1042, "lr": 1.1071922636373702e-06, "epoch": 2.596193780079642, "percentage": 51.92, "elapsed_time": "2:17:43", "remaining_time": "2:07:30", "throughput": 8668.77, "total_tokens": 71632424} +{"current_steps": 106275, "total_steps": 204665, "loss": 0.0007, "lr": 1.1071074769912593e-06, "epoch": 2.5963159309114894, "percentage": 51.93, "elapsed_time": "2:17:43", "remaining_time": "2:07:30", "throughput": 8668.81, "total_tokens": 71635688} +{"current_steps": 106280, "total_steps": 204665, "loss": 0.0956, "lr": 1.107022689566234e-06, "epoch": 2.5964380817433366, "percentage": 51.93, "elapsed_time": "2:17:43", "remaining_time": "2:07:30", "throughput": 8668.84, "total_tokens": 71639016} +{"current_steps": 106285, "total_steps": 204665, "loss": 0.0249, "lr": 1.1069379013629105e-06, "epoch": 2.5965602325751838, "percentage": 51.93, "elapsed_time": "2:17:44", "remaining_time": "2:07:29", "throughput": 8668.93, "total_tokens": 71642856} +{"current_steps": 106290, "total_steps": 204665, "loss": 0.0006, "lr": 1.106853112381906e-06, "epoch": 2.596682383407031, "percentage": 51.93, "elapsed_time": "2:17:44", "remaining_time": "2:07:29", "throughput": 8668.93, "total_tokens": 71645800} +{"current_steps": 106295, "total_steps": 204665, "loss": 0.082, "lr": 1.1067683226238372e-06, "epoch": 2.596804534238878, "percentage": 51.94, "elapsed_time": "2:17:45", "remaining_time": "2:07:28", "throughput": 8668.95, "total_tokens": 71649000} +{"current_steps": 106300, "total_steps": 204665, "loss": 0.1012, "lr": 1.1066835320893204e-06, "epoch": 2.5969266850707253, "percentage": 51.94, "elapsed_time": "2:17:45", "remaining_time": "2:07:28", "throughput": 8668.97, "total_tokens": 71652136} +{"current_steps": 106305, "total_steps": 204665, "loss": 0.0006, "lr": 1.1065987407789724e-06, "epoch": 2.5970488359025725, "percentage": 51.94, "elapsed_time": "2:17:45", "remaining_time": "2:07:27", "throughput": 8668.99, "total_tokens": 71655272} +{"current_steps": 106310, "total_steps": 204665, "loss": 0.0003, "lr": 1.1065139486934092e-06, "epoch": 2.5971709867344197, "percentage": 51.94, "elapsed_time": "2:17:46", "remaining_time": "2:07:27", "throughput": 8669.04, "total_tokens": 71658728} +{"current_steps": 106315, "total_steps": 204665, "loss": 0.0397, "lr": 1.106429155833248e-06, "epoch": 2.597293137566267, "percentage": 51.95, "elapsed_time": "2:17:46", "remaining_time": "2:07:27", "throughput": 8669.07, "total_tokens": 71661928} +{"current_steps": 106320, "total_steps": 204665, "loss": 0.1028, "lr": 1.1063443621991056e-06, "epoch": 2.597415288398114, "percentage": 51.95, "elapsed_time": "2:17:46", "remaining_time": "2:07:26", "throughput": 8669.19, "total_tokens": 71666088} +{"current_steps": 106325, "total_steps": 204665, "loss": 0.0001, "lr": 1.1062595677915983e-06, "epoch": 2.5975374392299613, "percentage": 51.95, "elapsed_time": "2:17:47", "remaining_time": "2:07:26", "throughput": 8669.23, "total_tokens": 71669416} +{"current_steps": 106330, "total_steps": 204665, "loss": 0.0008, "lr": 1.1061747726113427e-06, "epoch": 2.5976595900618085, "percentage": 51.95, "elapsed_time": "2:17:47", "remaining_time": "2:07:25", "throughput": 8669.25, "total_tokens": 71672616} +{"current_steps": 106335, "total_steps": 204665, "loss": 0.0001, "lr": 1.1060899766589558e-06, "epoch": 2.597781740893655, "percentage": 51.96, "elapsed_time": "2:17:47", "remaining_time": "2:07:25", "throughput": 8669.26, "total_tokens": 71675560} +{"current_steps": 106340, "total_steps": 204665, "loss": 0.0002, "lr": 1.1060051799350538e-06, "epoch": 2.597903891725503, "percentage": 51.96, "elapsed_time": "2:17:48", "remaining_time": "2:07:24", "throughput": 8669.3, "total_tokens": 71678952} +{"current_steps": 106345, "total_steps": 204665, "loss": 0.0003, "lr": 1.105920382440254e-06, "epoch": 2.5980260425573496, "percentage": 51.96, "elapsed_time": "2:17:48", "remaining_time": "2:07:24", "throughput": 8669.37, "total_tokens": 71682664} +{"current_steps": 106350, "total_steps": 204665, "loss": 0.0007, "lr": 1.1058355841751723e-06, "epoch": 2.5981481933891972, "percentage": 51.96, "elapsed_time": "2:17:48", "remaining_time": "2:07:24", "throughput": 8669.36, "total_tokens": 71685480} +{"current_steps": 106355, "total_steps": 204665, "loss": 0.0004, "lr": 1.105750785140426e-06, "epoch": 2.598270344221044, "percentage": 51.97, "elapsed_time": "2:17:49", "remaining_time": "2:07:23", "throughput": 8669.45, "total_tokens": 71689320} +{"current_steps": 106360, "total_steps": 204665, "loss": 0.0632, "lr": 1.1056659853366315e-06, "epoch": 2.598392495052891, "percentage": 51.97, "elapsed_time": "2:17:49", "remaining_time": "2:07:23", "throughput": 8669.48, "total_tokens": 71692584} +{"current_steps": 106365, "total_steps": 204665, "loss": 0.0002, "lr": 1.1055811847644057e-06, "epoch": 2.5985146458847383, "percentage": 51.97, "elapsed_time": "2:17:49", "remaining_time": "2:07:22", "throughput": 8669.55, "total_tokens": 71696168} +{"current_steps": 106370, "total_steps": 204665, "loss": 0.0345, "lr": 1.105496383424365e-06, "epoch": 2.5986367967165855, "percentage": 51.97, "elapsed_time": "2:17:50", "remaining_time": "2:07:22", "throughput": 8669.59, "total_tokens": 71699560} +{"current_steps": 106375, "total_steps": 204665, "loss": 0.1055, "lr": 1.1054115813171262e-06, "epoch": 2.5987589475484327, "percentage": 51.98, "elapsed_time": "2:17:50", "remaining_time": "2:07:21", "throughput": 8669.65, "total_tokens": 71703080} +{"current_steps": 106380, "total_steps": 204665, "loss": 0.0006, "lr": 1.1053267784433057e-06, "epoch": 2.59888109838028, "percentage": 51.98, "elapsed_time": "2:17:50", "remaining_time": "2:07:21", "throughput": 8669.68, "total_tokens": 71706344} +{"current_steps": 106385, "total_steps": 204665, "loss": 0.0001, "lr": 1.105241974803521e-06, "epoch": 2.599003249212127, "percentage": 51.98, "elapsed_time": "2:17:51", "remaining_time": "2:07:21", "throughput": 8669.7, "total_tokens": 71709480} +{"current_steps": 106390, "total_steps": 204665, "loss": 0.1092, "lr": 1.105157170398388e-06, "epoch": 2.5991254000439743, "percentage": 51.98, "elapsed_time": "2:17:51", "remaining_time": "2:07:20", "throughput": 8669.76, "total_tokens": 71713064} +{"current_steps": 106395, "total_steps": 204665, "loss": 0.0829, "lr": 1.105072365228524e-06, "epoch": 2.5992475508758215, "percentage": 51.98, "elapsed_time": "2:17:51", "remaining_time": "2:07:20", "throughput": 8669.79, "total_tokens": 71716264} +{"current_steps": 106400, "total_steps": 204665, "loss": 0.0351, "lr": 1.1049875592945454e-06, "epoch": 2.5993697017076687, "percentage": 51.99, "elapsed_time": "2:17:52", "remaining_time": "2:07:19", "throughput": 8669.8, "total_tokens": 71719336} +{"current_steps": 106405, "total_steps": 204665, "loss": 0.0424, "lr": 1.1049027525970691e-06, "epoch": 2.599491852539516, "percentage": 51.99, "elapsed_time": "2:17:52", "remaining_time": "2:07:19", "throughput": 8669.85, "total_tokens": 71722792} +{"current_steps": 106410, "total_steps": 204665, "loss": 0.1048, "lr": 1.104817945136712e-06, "epoch": 2.599614003371363, "percentage": 51.99, "elapsed_time": "2:17:53", "remaining_time": "2:07:18", "throughput": 8669.88, "total_tokens": 71725992} +{"current_steps": 106415, "total_steps": 204665, "loss": 0.0006, "lr": 1.1047331369140901e-06, "epoch": 2.5997361542032102, "percentage": 51.99, "elapsed_time": "2:17:53", "remaining_time": "2:07:18", "throughput": 8669.96, "total_tokens": 71729832} +{"current_steps": 106420, "total_steps": 204665, "loss": 0.0009, "lr": 1.1046483279298212e-06, "epoch": 2.5998583050350574, "percentage": 52.0, "elapsed_time": "2:17:53", "remaining_time": "2:07:18", "throughput": 8670.03, "total_tokens": 71733416} +{"current_steps": 106425, "total_steps": 204665, "loss": 0.0681, "lr": 1.1045635181845212e-06, "epoch": 2.5999804558669046, "percentage": 52.0, "elapsed_time": "2:17:54", "remaining_time": "2:07:17", "throughput": 8670.08, "total_tokens": 71736872} +{"current_steps": 106430, "total_steps": 204665, "loss": 0.053, "lr": 1.104478707678807e-06, "epoch": 2.6001026066987514, "percentage": 52.0, "elapsed_time": "2:17:54", "remaining_time": "2:07:17", "throughput": 8670.2, "total_tokens": 71741096} +{"current_steps": 106435, "total_steps": 204665, "loss": 0.0423, "lr": 1.104393896413296e-06, "epoch": 2.600224757530599, "percentage": 52.0, "elapsed_time": "2:17:54", "remaining_time": "2:07:16", "throughput": 8670.26, "total_tokens": 71744616} +{"current_steps": 106440, "total_steps": 204665, "loss": 0.0314, "lr": 1.104309084388604e-06, "epoch": 2.6003469083624458, "percentage": 52.01, "elapsed_time": "2:17:55", "remaining_time": "2:07:16", "throughput": 8670.32, "total_tokens": 71748072} +{"current_steps": 106445, "total_steps": 204665, "loss": 0.0395, "lr": 1.1042242716053486e-06, "epoch": 2.6004690591942934, "percentage": 52.01, "elapsed_time": "2:17:55", "remaining_time": "2:07:16", "throughput": 8670.38, "total_tokens": 71751656} +{"current_steps": 106450, "total_steps": 204665, "loss": 0.0008, "lr": 1.1041394580641464e-06, "epoch": 2.60059121002614, "percentage": 52.01, "elapsed_time": "2:17:55", "remaining_time": "2:07:15", "throughput": 8670.42, "total_tokens": 71754984} +{"current_steps": 106455, "total_steps": 204665, "loss": 0.0907, "lr": 1.104054643765614e-06, "epoch": 2.6007133608579873, "percentage": 52.01, "elapsed_time": "2:17:56", "remaining_time": "2:07:15", "throughput": 8670.54, "total_tokens": 71759144} +{"current_steps": 106460, "total_steps": 204665, "loss": 0.139, "lr": 1.103969828710368e-06, "epoch": 2.6008355116898345, "percentage": 52.02, "elapsed_time": "2:17:56", "remaining_time": "2:07:14", "throughput": 8670.55, "total_tokens": 71762216} +{"current_steps": 106465, "total_steps": 204665, "loss": 0.0731, "lr": 1.1038850128990255e-06, "epoch": 2.6009576625216817, "percentage": 52.02, "elapsed_time": "2:17:56", "remaining_time": "2:07:14", "throughput": 8670.57, "total_tokens": 71765288} +{"current_steps": 106470, "total_steps": 204665, "loss": 0.0346, "lr": 1.1038001963322031e-06, "epoch": 2.601079813353529, "percentage": 52.02, "elapsed_time": "2:17:57", "remaining_time": "2:07:13", "throughput": 8670.6, "total_tokens": 71768616} +{"current_steps": 106475, "total_steps": 204665, "loss": 0.1645, "lr": 1.103715379010518e-06, "epoch": 2.601201964185376, "percentage": 52.02, "elapsed_time": "2:17:57", "remaining_time": "2:07:13", "throughput": 8670.64, "total_tokens": 71771944} +{"current_steps": 106480, "total_steps": 204665, "loss": 0.0008, "lr": 1.103630560934587e-06, "epoch": 2.6013241150172233, "percentage": 52.03, "elapsed_time": "2:17:57", "remaining_time": "2:07:13", "throughput": 8670.64, "total_tokens": 71774824} +{"current_steps": 106485, "total_steps": 204665, "loss": 0.0003, "lr": 1.1035457421050262e-06, "epoch": 2.6014462658490705, "percentage": 52.03, "elapsed_time": "2:17:58", "remaining_time": "2:07:12", "throughput": 8670.64, "total_tokens": 71777704} +{"current_steps": 106490, "total_steps": 204665, "loss": 0.0582, "lr": 1.1034609225224531e-06, "epoch": 2.6015684166809177, "percentage": 52.03, "elapsed_time": "2:17:58", "remaining_time": "2:07:12", "throughput": 8670.66, "total_tokens": 71780904} +{"current_steps": 106495, "total_steps": 204665, "loss": 0.057, "lr": 1.1033761021874844e-06, "epoch": 2.601690567512765, "percentage": 52.03, "elapsed_time": "2:17:58", "remaining_time": "2:07:11", "throughput": 8670.72, "total_tokens": 71784488} +{"current_steps": 106500, "total_steps": 204665, "loss": 0.0008, "lr": 1.103291281100737e-06, "epoch": 2.601812718344612, "percentage": 52.04, "elapsed_time": "2:17:59", "remaining_time": "2:07:11", "throughput": 8670.76, "total_tokens": 71787816} +{"current_steps": 106505, "total_steps": 204665, "loss": 0.0037, "lr": 1.1032064592628275e-06, "epoch": 2.601934869176459, "percentage": 52.04, "elapsed_time": "2:17:59", "remaining_time": "2:07:10", "throughput": 8670.8, "total_tokens": 71791144} +{"current_steps": 106510, "total_steps": 204665, "loss": 0.0006, "lr": 1.1031216366743727e-06, "epoch": 2.6020570200083064, "percentage": 52.04, "elapsed_time": "2:17:59", "remaining_time": "2:07:10", "throughput": 8670.84, "total_tokens": 71794536} +{"current_steps": 106515, "total_steps": 204665, "loss": 0.0006, "lr": 1.1030368133359897e-06, "epoch": 2.602179170840153, "percentage": 52.04, "elapsed_time": "2:18:00", "remaining_time": "2:07:10", "throughput": 8670.89, "total_tokens": 71797992} +{"current_steps": 106520, "total_steps": 204665, "loss": 0.0132, "lr": 1.1029519892482953e-06, "epoch": 2.602301321672001, "percentage": 52.05, "elapsed_time": "2:18:00", "remaining_time": "2:07:09", "throughput": 8670.92, "total_tokens": 71801320} +{"current_steps": 106525, "total_steps": 204665, "loss": 0.0659, "lr": 1.1028671644119066e-06, "epoch": 2.6024234725038475, "percentage": 52.05, "elapsed_time": "2:18:01", "remaining_time": "2:07:09", "throughput": 8670.97, "total_tokens": 71804712} +{"current_steps": 106530, "total_steps": 204665, "loss": 0.0356, "lr": 1.1027823388274397e-06, "epoch": 2.602545623335695, "percentage": 52.05, "elapsed_time": "2:18:01", "remaining_time": "2:07:08", "throughput": 8671.0, "total_tokens": 71807976} +{"current_steps": 106535, "total_steps": 204665, "loss": 0.0395, "lr": 1.1026975124955123e-06, "epoch": 2.602667774167542, "percentage": 52.05, "elapsed_time": "2:18:01", "remaining_time": "2:07:08", "throughput": 8671.04, "total_tokens": 71811368} +{"current_steps": 106540, "total_steps": 204665, "loss": 0.0551, "lr": 1.1026126854167408e-06, "epoch": 2.602789924999389, "percentage": 52.06, "elapsed_time": "2:18:02", "remaining_time": "2:07:07", "throughput": 8671.09, "total_tokens": 71814824} +{"current_steps": 106545, "total_steps": 204665, "loss": 0.105, "lr": 1.1025278575917425e-06, "epoch": 2.6029120758312363, "percentage": 52.06, "elapsed_time": "2:18:02", "remaining_time": "2:07:07", "throughput": 8671.17, "total_tokens": 71818536} +{"current_steps": 106550, "total_steps": 204665, "loss": 0.1427, "lr": 1.102443029021134e-06, "epoch": 2.6030342266630835, "percentage": 52.06, "elapsed_time": "2:18:02", "remaining_time": "2:07:07", "throughput": 8671.18, "total_tokens": 71821608} +{"current_steps": 106555, "total_steps": 204665, "loss": 0.0354, "lr": 1.102358199705532e-06, "epoch": 2.6031563774949307, "percentage": 52.06, "elapsed_time": "2:18:03", "remaining_time": "2:07:06", "throughput": 8671.23, "total_tokens": 71825000} +{"current_steps": 106560, "total_steps": 204665, "loss": 0.1585, "lr": 1.102273369645554e-06, "epoch": 2.603278528326778, "percentage": 52.07, "elapsed_time": "2:18:03", "remaining_time": "2:07:06", "throughput": 8671.24, "total_tokens": 71828072} +{"current_steps": 106565, "total_steps": 204665, "loss": 0.0227, "lr": 1.1021885388418164e-06, "epoch": 2.603400679158625, "percentage": 52.07, "elapsed_time": "2:18:03", "remaining_time": "2:07:05", "throughput": 8671.28, "total_tokens": 71831400} +{"current_steps": 106570, "total_steps": 204665, "loss": 0.0262, "lr": 1.1021037072949362e-06, "epoch": 2.6035228299904722, "percentage": 52.07, "elapsed_time": "2:18:04", "remaining_time": "2:07:05", "throughput": 8671.28, "total_tokens": 71834344} +{"current_steps": 106575, "total_steps": 204665, "loss": 0.0009, "lr": 1.1020188750055304e-06, "epoch": 2.6036449808223194, "percentage": 52.07, "elapsed_time": "2:18:04", "remaining_time": "2:07:04", "throughput": 8671.33, "total_tokens": 71837736} +{"current_steps": 106580, "total_steps": 204665, "loss": 0.046, "lr": 1.1019340419742157e-06, "epoch": 2.6037671316541666, "percentage": 52.08, "elapsed_time": "2:18:04", "remaining_time": "2:07:04", "throughput": 8671.39, "total_tokens": 71841256} +{"current_steps": 106585, "total_steps": 204665, "loss": 0.0006, "lr": 1.1018492082016095e-06, "epoch": 2.603889282486014, "percentage": 52.08, "elapsed_time": "2:18:05", "remaining_time": "2:07:04", "throughput": 8671.39, "total_tokens": 71844264} +{"current_steps": 106590, "total_steps": 204665, "loss": 0.0029, "lr": 1.1017643736883284e-06, "epoch": 2.604011433317861, "percentage": 52.08, "elapsed_time": "2:18:05", "remaining_time": "2:07:03", "throughput": 8671.46, "total_tokens": 71847912} +{"current_steps": 106595, "total_steps": 204665, "loss": 0.0539, "lr": 1.1016795384349892e-06, "epoch": 2.604133584149708, "percentage": 52.08, "elapsed_time": "2:18:05", "remaining_time": "2:07:03", "throughput": 8671.49, "total_tokens": 71851176} +{"current_steps": 106600, "total_steps": 204665, "loss": 0.0338, "lr": 1.1015947024422094e-06, "epoch": 2.6042557349815554, "percentage": 52.09, "elapsed_time": "2:18:06", "remaining_time": "2:07:02", "throughput": 8671.55, "total_tokens": 71854696} +{"current_steps": 106605, "total_steps": 204665, "loss": 0.0005, "lr": 1.1015098657106054e-06, "epoch": 2.6043778858134026, "percentage": 52.09, "elapsed_time": "2:18:06", "remaining_time": "2:07:02", "throughput": 8671.57, "total_tokens": 71857896} +{"current_steps": 106610, "total_steps": 204665, "loss": 0.0001, "lr": 1.1014250282407946e-06, "epoch": 2.6045000366452493, "percentage": 52.09, "elapsed_time": "2:18:06", "remaining_time": "2:07:01", "throughput": 8671.59, "total_tokens": 71861032} +{"current_steps": 106615, "total_steps": 204665, "loss": 0.0005, "lr": 1.1013401900333937e-06, "epoch": 2.604622187477097, "percentage": 52.09, "elapsed_time": "2:18:07", "remaining_time": "2:07:01", "throughput": 8671.6, "total_tokens": 71864104} +{"current_steps": 106620, "total_steps": 204665, "loss": 0.1128, "lr": 1.1012553510890192e-06, "epoch": 2.6047443383089437, "percentage": 52.09, "elapsed_time": "2:18:07", "remaining_time": "2:07:01", "throughput": 8671.62, "total_tokens": 71867176} +{"current_steps": 106625, "total_steps": 204665, "loss": 0.0007, "lr": 1.101170511408289e-06, "epoch": 2.604866489140791, "percentage": 52.1, "elapsed_time": "2:18:07", "remaining_time": "2:07:00", "throughput": 8671.61, "total_tokens": 71870120} +{"current_steps": 106630, "total_steps": 204665, "loss": 0.2105, "lr": 1.1010856709918193e-06, "epoch": 2.604988639972638, "percentage": 52.1, "elapsed_time": "2:18:08", "remaining_time": "2:07:00", "throughput": 8671.68, "total_tokens": 71873704} +{"current_steps": 106635, "total_steps": 204665, "loss": 0.0681, "lr": 1.1010008298402275e-06, "epoch": 2.6051107908044853, "percentage": 52.1, "elapsed_time": "2:18:08", "remaining_time": "2:06:59", "throughput": 8671.71, "total_tokens": 71876968} +{"current_steps": 106640, "total_steps": 204665, "loss": 0.0504, "lr": 1.1009159879541307e-06, "epoch": 2.6052329416363325, "percentage": 52.1, "elapsed_time": "2:18:09", "remaining_time": "2:06:59", "throughput": 8671.73, "total_tokens": 71880104} +{"current_steps": 106645, "total_steps": 204665, "loss": 0.0761, "lr": 1.1008311453341455e-06, "epoch": 2.6053550924681796, "percentage": 52.11, "elapsed_time": "2:18:09", "remaining_time": "2:06:58", "throughput": 8671.81, "total_tokens": 71883816} +{"current_steps": 106650, "total_steps": 204665, "loss": 0.0003, "lr": 1.1007463019808892e-06, "epoch": 2.605477243300027, "percentage": 52.11, "elapsed_time": "2:18:09", "remaining_time": "2:06:58", "throughput": 8671.85, "total_tokens": 71887208} +{"current_steps": 106655, "total_steps": 204665, "loss": 0.0006, "lr": 1.1006614578949786e-06, "epoch": 2.605599394131874, "percentage": 52.11, "elapsed_time": "2:18:10", "remaining_time": "2:06:58", "throughput": 8671.87, "total_tokens": 71890280} +{"current_steps": 106660, "total_steps": 204665, "loss": 0.0002, "lr": 1.1005766130770312e-06, "epoch": 2.605721544963721, "percentage": 52.11, "elapsed_time": "2:18:10", "remaining_time": "2:06:57", "throughput": 8671.94, "total_tokens": 71893992} +{"current_steps": 106665, "total_steps": 204665, "loss": 0.0005, "lr": 1.100491767527663e-06, "epoch": 2.6058436957955684, "percentage": 52.12, "elapsed_time": "2:18:10", "remaining_time": "2:06:57", "throughput": 8671.94, "total_tokens": 71896936} +{"current_steps": 106670, "total_steps": 204665, "loss": 0.0567, "lr": 1.1004069212474921e-06, "epoch": 2.6059658466274156, "percentage": 52.12, "elapsed_time": "2:18:11", "remaining_time": "2:06:56", "throughput": 8671.98, "total_tokens": 71900264} +{"current_steps": 106675, "total_steps": 204665, "loss": 0.0003, "lr": 1.1003220742371348e-06, "epoch": 2.606087997459263, "percentage": 52.12, "elapsed_time": "2:18:11", "remaining_time": "2:06:56", "throughput": 8672.05, "total_tokens": 71903848} +{"current_steps": 106680, "total_steps": 204665, "loss": 0.0285, "lr": 1.1002372264972083e-06, "epoch": 2.60621014829111, "percentage": 52.12, "elapsed_time": "2:18:11", "remaining_time": "2:06:55", "throughput": 8672.1, "total_tokens": 71907368} +{"current_steps": 106685, "total_steps": 204665, "loss": 0.0645, "lr": 1.1001523780283302e-06, "epoch": 2.606332299122957, "percentage": 52.13, "elapsed_time": "2:18:12", "remaining_time": "2:06:55", "throughput": 8672.13, "total_tokens": 71910632} +{"current_steps": 106690, "total_steps": 204665, "loss": 0.0915, "lr": 1.1000675288311166e-06, "epoch": 2.6064544499548044, "percentage": 52.13, "elapsed_time": "2:18:12", "remaining_time": "2:06:55", "throughput": 8672.19, "total_tokens": 71914152} +{"current_steps": 106695, "total_steps": 204665, "loss": 0.0004, "lr": 1.099982678906185e-06, "epoch": 2.606576600786651, "percentage": 52.13, "elapsed_time": "2:18:12", "remaining_time": "2:06:54", "throughput": 8672.23, "total_tokens": 71917544} +{"current_steps": 106700, "total_steps": 204665, "loss": 0.0003, "lr": 1.0998978282541527e-06, "epoch": 2.6066987516184987, "percentage": 52.13, "elapsed_time": "2:18:13", "remaining_time": "2:06:54", "throughput": 8672.25, "total_tokens": 71920616} +{"current_steps": 106705, "total_steps": 204665, "loss": 0.0006, "lr": 1.0998129768756365e-06, "epoch": 2.6068209024503455, "percentage": 52.14, "elapsed_time": "2:18:13", "remaining_time": "2:06:53", "throughput": 8672.24, "total_tokens": 71923496} +{"current_steps": 106710, "total_steps": 204665, "loss": 0.0002, "lr": 1.0997281247712536e-06, "epoch": 2.606943053282193, "percentage": 52.14, "elapsed_time": "2:18:13", "remaining_time": "2:06:53", "throughput": 8672.28, "total_tokens": 71926760} +{"current_steps": 106715, "total_steps": 204665, "loss": 0.0627, "lr": 1.0996432719416209e-06, "epoch": 2.60706520411404, "percentage": 52.14, "elapsed_time": "2:18:14", "remaining_time": "2:06:52", "throughput": 8672.31, "total_tokens": 71930024} +{"current_steps": 106720, "total_steps": 204665, "loss": 0.055, "lr": 1.0995584183873553e-06, "epoch": 2.607187354945887, "percentage": 52.14, "elapsed_time": "2:18:14", "remaining_time": "2:06:52", "throughput": 8672.33, "total_tokens": 71933096} +{"current_steps": 106725, "total_steps": 204665, "loss": 0.1001, "lr": 1.0994735641090742e-06, "epoch": 2.6073095057777342, "percentage": 52.15, "elapsed_time": "2:18:14", "remaining_time": "2:06:52", "throughput": 8672.37, "total_tokens": 71936552} +{"current_steps": 106730, "total_steps": 204665, "loss": 0.0001, "lr": 1.0993887091073947e-06, "epoch": 2.6074316566095814, "percentage": 52.15, "elapsed_time": "2:18:15", "remaining_time": "2:06:51", "throughput": 8672.4, "total_tokens": 71939752} +{"current_steps": 106735, "total_steps": 204665, "loss": 0.0597, "lr": 1.0993038533829338e-06, "epoch": 2.6075538074414286, "percentage": 52.15, "elapsed_time": "2:18:15", "remaining_time": "2:06:51", "throughput": 8672.45, "total_tokens": 71943208} +{"current_steps": 106740, "total_steps": 204665, "loss": 0.1481, "lr": 1.0992189969363084e-06, "epoch": 2.607675958273276, "percentage": 52.15, "elapsed_time": "2:18:15", "remaining_time": "2:06:50", "throughput": 8672.47, "total_tokens": 71946408} +{"current_steps": 106745, "total_steps": 204665, "loss": 0.0688, "lr": 1.099134139768136e-06, "epoch": 2.607798109105123, "percentage": 52.16, "elapsed_time": "2:18:16", "remaining_time": "2:06:50", "throughput": 8672.52, "total_tokens": 71949864} +{"current_steps": 106750, "total_steps": 204665, "loss": 0.0416, "lr": 1.0990492818790331e-06, "epoch": 2.60792025993697, "percentage": 52.16, "elapsed_time": "2:18:16", "remaining_time": "2:06:49", "throughput": 8672.53, "total_tokens": 71952872} +{"current_steps": 106755, "total_steps": 204665, "loss": 0.0018, "lr": 1.0989644232696174e-06, "epoch": 2.6080424107688174, "percentage": 52.16, "elapsed_time": "2:18:16", "remaining_time": "2:06:49", "throughput": 8672.54, "total_tokens": 71955944} +{"current_steps": 106760, "total_steps": 204665, "loss": 0.0482, "lr": 1.0988795639405056e-06, "epoch": 2.6081645616006646, "percentage": 52.16, "elapsed_time": "2:18:17", "remaining_time": "2:06:49", "throughput": 8672.6, "total_tokens": 71959528} +{"current_steps": 106765, "total_steps": 204665, "loss": 0.0607, "lr": 1.0987947038923155e-06, "epoch": 2.6082867124325118, "percentage": 52.17, "elapsed_time": "2:18:17", "remaining_time": "2:06:48", "throughput": 8672.64, "total_tokens": 71962856} +{"current_steps": 106770, "total_steps": 204665, "loss": 0.0006, "lr": 1.0987098431256637e-06, "epoch": 2.608408863264359, "percentage": 52.17, "elapsed_time": "2:18:18", "remaining_time": "2:06:48", "throughput": 8672.69, "total_tokens": 71966312} +{"current_steps": 106775, "total_steps": 204665, "loss": 0.0003, "lr": 1.098624981641167e-06, "epoch": 2.608531014096206, "percentage": 52.17, "elapsed_time": "2:18:18", "remaining_time": "2:06:47", "throughput": 8672.71, "total_tokens": 71969384} +{"current_steps": 106780, "total_steps": 204665, "loss": 0.0861, "lr": 1.0985401194394431e-06, "epoch": 2.6086531649280533, "percentage": 52.17, "elapsed_time": "2:18:18", "remaining_time": "2:06:47", "throughput": 8672.82, "total_tokens": 71973480} +{"current_steps": 106785, "total_steps": 204665, "loss": 0.0238, "lr": 1.0984552565211087e-06, "epoch": 2.6087753157599005, "percentage": 52.18, "elapsed_time": "2:18:19", "remaining_time": "2:06:47", "throughput": 8672.85, "total_tokens": 71976744} +{"current_steps": 106790, "total_steps": 204665, "loss": 0.0192, "lr": 1.0983703928867813e-06, "epoch": 2.6088974665917473, "percentage": 52.18, "elapsed_time": "2:18:19", "remaining_time": "2:06:46", "throughput": 8672.87, "total_tokens": 71979880} +{"current_steps": 106795, "total_steps": 204665, "loss": 0.0003, "lr": 1.098285528537078e-06, "epoch": 2.609019617423595, "percentage": 52.18, "elapsed_time": "2:18:19", "remaining_time": "2:06:46", "throughput": 8672.9, "total_tokens": 71983144} +{"current_steps": 106800, "total_steps": 204665, "loss": 0.0002, "lr": 1.098200663472616e-06, "epoch": 2.6091417682554416, "percentage": 52.18, "elapsed_time": "2:18:20", "remaining_time": "2:06:45", "throughput": 8672.95, "total_tokens": 71986536} +{"current_steps": 106805, "total_steps": 204665, "loss": 0.0006, "lr": 1.0981157976940124e-06, "epoch": 2.609263919087289, "percentage": 52.19, "elapsed_time": "2:18:20", "remaining_time": "2:06:45", "throughput": 8673.02, "total_tokens": 71990184} +{"current_steps": 106810, "total_steps": 204665, "loss": 0.0002, "lr": 1.0980309312018841e-06, "epoch": 2.609386069919136, "percentage": 52.19, "elapsed_time": "2:18:20", "remaining_time": "2:06:44", "throughput": 8673.04, "total_tokens": 71993320} +{"current_steps": 106815, "total_steps": 204665, "loss": 0.0633, "lr": 1.0979460639968485e-06, "epoch": 2.609508220750983, "percentage": 52.19, "elapsed_time": "2:18:21", "remaining_time": "2:06:44", "throughput": 8673.06, "total_tokens": 71996456} +{"current_steps": 106820, "total_steps": 204665, "loss": 0.0005, "lr": 1.097861196079523e-06, "epoch": 2.6096303715828304, "percentage": 52.19, "elapsed_time": "2:18:21", "remaining_time": "2:06:44", "throughput": 8673.14, "total_tokens": 72000232} +{"current_steps": 106825, "total_steps": 204665, "loss": 0.0001, "lr": 1.0977763274505244e-06, "epoch": 2.6097525224146776, "percentage": 52.2, "elapsed_time": "2:18:21", "remaining_time": "2:06:43", "throughput": 8673.16, "total_tokens": 72003368} +{"current_steps": 106830, "total_steps": 204665, "loss": 0.0448, "lr": 1.09769145811047e-06, "epoch": 2.609874673246525, "percentage": 52.2, "elapsed_time": "2:18:22", "remaining_time": "2:06:43", "throughput": 8673.19, "total_tokens": 72006568} +{"current_steps": 106835, "total_steps": 204665, "loss": 0.0073, "lr": 1.0976065880599772e-06, "epoch": 2.609996824078372, "percentage": 52.2, "elapsed_time": "2:18:22", "remaining_time": "2:06:42", "throughput": 8673.23, "total_tokens": 72009896} +{"current_steps": 106840, "total_steps": 204665, "loss": 0.0017, "lr": 1.097521717299663e-06, "epoch": 2.610118974910219, "percentage": 52.2, "elapsed_time": "2:18:22", "remaining_time": "2:06:42", "throughput": 8673.27, "total_tokens": 72013352} +{"current_steps": 106845, "total_steps": 204665, "loss": 0.0055, "lr": 1.0974368458301444e-06, "epoch": 2.6102411257420663, "percentage": 52.2, "elapsed_time": "2:18:23", "remaining_time": "2:06:41", "throughput": 8673.38, "total_tokens": 72017320} +{"current_steps": 106850, "total_steps": 204665, "loss": 0.0002, "lr": 1.0973519736520392e-06, "epoch": 2.6103632765739135, "percentage": 52.21, "elapsed_time": "2:18:23", "remaining_time": "2:06:41", "throughput": 8673.4, "total_tokens": 72020520} +{"current_steps": 106855, "total_steps": 204665, "loss": 0.0536, "lr": 1.0972671007659642e-06, "epoch": 2.6104854274057607, "percentage": 52.21, "elapsed_time": "2:18:23", "remaining_time": "2:06:41", "throughput": 8673.44, "total_tokens": 72023848} +{"current_steps": 106860, "total_steps": 204665, "loss": 0.0004, "lr": 1.0971822271725367e-06, "epoch": 2.610607578237608, "percentage": 52.21, "elapsed_time": "2:18:24", "remaining_time": "2:06:40", "throughput": 8673.47, "total_tokens": 72027112} +{"current_steps": 106865, "total_steps": 204665, "loss": 0.0671, "lr": 1.0970973528723736e-06, "epoch": 2.610729729069455, "percentage": 52.21, "elapsed_time": "2:18:24", "remaining_time": "2:06:40", "throughput": 8673.52, "total_tokens": 72030632} +{"current_steps": 106870, "total_steps": 204665, "loss": 0.0002, "lr": 1.0970124778660928e-06, "epoch": 2.6108518799013023, "percentage": 52.22, "elapsed_time": "2:18:25", "remaining_time": "2:06:39", "throughput": 8673.6, "total_tokens": 72034280} +{"current_steps": 106875, "total_steps": 204665, "loss": 0.0008, "lr": 1.096927602154311e-06, "epoch": 2.610974030733149, "percentage": 52.22, "elapsed_time": "2:18:25", "remaining_time": "2:06:39", "throughput": 8673.66, "total_tokens": 72037928} +{"current_steps": 106880, "total_steps": 204665, "loss": 0.0001, "lr": 1.0968427257376455e-06, "epoch": 2.6110961815649967, "percentage": 52.22, "elapsed_time": "2:18:25", "remaining_time": "2:06:38", "throughput": 8673.69, "total_tokens": 72041128} +{"current_steps": 106885, "total_steps": 204665, "loss": 0.1136, "lr": 1.096757848616714e-06, "epoch": 2.6112183323968434, "percentage": 52.22, "elapsed_time": "2:18:26", "remaining_time": "2:06:38", "throughput": 8673.81, "total_tokens": 72045288} +{"current_steps": 106890, "total_steps": 204665, "loss": 0.0686, "lr": 1.096672970792133e-06, "epoch": 2.611340483228691, "percentage": 52.23, "elapsed_time": "2:18:26", "remaining_time": "2:06:38", "throughput": 8673.85, "total_tokens": 72048680} +{"current_steps": 106895, "total_steps": 204665, "loss": 0.0818, "lr": 1.0965880922645204e-06, "epoch": 2.611462634060538, "percentage": 52.23, "elapsed_time": "2:18:26", "remaining_time": "2:06:37", "throughput": 8673.92, "total_tokens": 72052328} +{"current_steps": 106900, "total_steps": 204665, "loss": 0.0543, "lr": 1.0965032130344932e-06, "epoch": 2.611584784892385, "percentage": 52.23, "elapsed_time": "2:18:27", "remaining_time": "2:06:37", "throughput": 8673.96, "total_tokens": 72055592} +{"current_steps": 106905, "total_steps": 204665, "loss": 0.039, "lr": 1.0964183331026686e-06, "epoch": 2.611706935724232, "percentage": 52.23, "elapsed_time": "2:18:27", "remaining_time": "2:06:36", "throughput": 8673.99, "total_tokens": 72058920} +{"current_steps": 106910, "total_steps": 204665, "loss": 0.038, "lr": 1.0963334524696637e-06, "epoch": 2.6118290865560794, "percentage": 52.24, "elapsed_time": "2:18:27", "remaining_time": "2:06:36", "throughput": 8674.06, "total_tokens": 72062568} +{"current_steps": 106915, "total_steps": 204665, "loss": 0.0005, "lr": 1.0962485711360963e-06, "epoch": 2.6119512373879266, "percentage": 52.24, "elapsed_time": "2:18:28", "remaining_time": "2:06:35", "throughput": 8674.11, "total_tokens": 72065960} +{"current_steps": 106920, "total_steps": 204665, "loss": 0.0006, "lr": 1.0961636891025836e-06, "epoch": 2.6120733882197738, "percentage": 52.24, "elapsed_time": "2:18:28", "remaining_time": "2:06:35", "throughput": 8674.15, "total_tokens": 72069352} +{"current_steps": 106925, "total_steps": 204665, "loss": 0.0421, "lr": 1.0960788063697425e-06, "epoch": 2.612195539051621, "percentage": 52.24, "elapsed_time": "2:18:28", "remaining_time": "2:06:35", "throughput": 8674.17, "total_tokens": 72072424} +{"current_steps": 106930, "total_steps": 204665, "loss": 0.0334, "lr": 1.0959939229381906e-06, "epoch": 2.612317689883468, "percentage": 52.25, "elapsed_time": "2:18:29", "remaining_time": "2:06:34", "throughput": 8674.22, "total_tokens": 72075944} +{"current_steps": 106935, "total_steps": 204665, "loss": 0.0002, "lr": 1.0959090388085448e-06, "epoch": 2.6124398407153153, "percentage": 52.25, "elapsed_time": "2:18:29", "remaining_time": "2:06:34", "throughput": 8674.28, "total_tokens": 72079464} +{"current_steps": 106940, "total_steps": 204665, "loss": 0.0002, "lr": 1.0958241539814226e-06, "epoch": 2.6125619915471625, "percentage": 52.25, "elapsed_time": "2:18:29", "remaining_time": "2:06:33", "throughput": 8674.28, "total_tokens": 72082408} +{"current_steps": 106945, "total_steps": 204665, "loss": 0.0002, "lr": 1.0957392684574415e-06, "epoch": 2.6126841423790097, "percentage": 52.25, "elapsed_time": "2:18:30", "remaining_time": "2:06:33", "throughput": 8674.34, "total_tokens": 72085928} +{"current_steps": 106950, "total_steps": 204665, "loss": 0.0555, "lr": 1.095654382237219e-06, "epoch": 2.612806293210857, "percentage": 52.26, "elapsed_time": "2:18:30", "remaining_time": "2:06:32", "throughput": 8674.35, "total_tokens": 72089000} +{"current_steps": 106955, "total_steps": 204665, "loss": 0.0447, "lr": 1.0955694953213718e-06, "epoch": 2.612928444042704, "percentage": 52.26, "elapsed_time": "2:18:30", "remaining_time": "2:06:32", "throughput": 8674.4, "total_tokens": 72092456} +{"current_steps": 106960, "total_steps": 204665, "loss": 0.0293, "lr": 1.0954846077105178e-06, "epoch": 2.613050594874551, "percentage": 52.26, "elapsed_time": "2:18:31", "remaining_time": "2:06:32", "throughput": 8674.43, "total_tokens": 72095720} +{"current_steps": 106965, "total_steps": 204665, "loss": 0.0001, "lr": 1.0953997194052738e-06, "epoch": 2.6131727457063985, "percentage": 52.26, "elapsed_time": "2:18:31", "remaining_time": "2:06:31", "throughput": 8674.58, "total_tokens": 72100136} +{"current_steps": 106970, "total_steps": 204665, "loss": 0.0002, "lr": 1.0953148304062575e-06, "epoch": 2.613294896538245, "percentage": 52.27, "elapsed_time": "2:18:32", "remaining_time": "2:06:31", "throughput": 8674.59, "total_tokens": 72103208} +{"current_steps": 106975, "total_steps": 204665, "loss": 0.0588, "lr": 1.095229940714086e-06, "epoch": 2.613417047370093, "percentage": 52.27, "elapsed_time": "2:18:32", "remaining_time": "2:06:30", "throughput": 8674.66, "total_tokens": 72106856} +{"current_steps": 106980, "total_steps": 204665, "loss": 0.1241, "lr": 1.0951450503293769e-06, "epoch": 2.6135391982019396, "percentage": 52.27, "elapsed_time": "2:18:32", "remaining_time": "2:06:30", "throughput": 8674.73, "total_tokens": 72110568} +{"current_steps": 106985, "total_steps": 204665, "loss": 0.0004, "lr": 1.0950601592527473e-06, "epoch": 2.6136613490337868, "percentage": 52.27, "elapsed_time": "2:18:33", "remaining_time": "2:06:30", "throughput": 8674.74, "total_tokens": 72113512} +{"current_steps": 106990, "total_steps": 204665, "loss": 0.0002, "lr": 1.0949752674848144e-06, "epoch": 2.613783499865634, "percentage": 52.28, "elapsed_time": "2:18:33", "remaining_time": "2:06:29", "throughput": 8674.79, "total_tokens": 72117032} +{"current_steps": 106995, "total_steps": 204665, "loss": 0.0003, "lr": 1.0948903750261962e-06, "epoch": 2.613905650697481, "percentage": 52.28, "elapsed_time": "2:18:33", "remaining_time": "2:06:29", "throughput": 8674.83, "total_tokens": 72120360} +{"current_steps": 107000, "total_steps": 204665, "loss": 0.0755, "lr": 1.0948054818775094e-06, "epoch": 2.6140278015293283, "percentage": 52.28, "elapsed_time": "2:18:34", "remaining_time": "2:06:28", "throughput": 8674.87, "total_tokens": 72123688} +{"current_steps": 107005, "total_steps": 204665, "loss": 0.0001, "lr": 1.094720588039372e-06, "epoch": 2.6141499523611755, "percentage": 52.28, "elapsed_time": "2:18:34", "remaining_time": "2:06:28", "throughput": 8674.9, "total_tokens": 72126952} +{"current_steps": 107010, "total_steps": 204665, "loss": 0.0002, "lr": 1.0946356935124006e-06, "epoch": 2.6142721031930227, "percentage": 52.29, "elapsed_time": "2:18:34", "remaining_time": "2:06:27", "throughput": 8674.95, "total_tokens": 72130472} +{"current_steps": 107015, "total_steps": 204665, "loss": 0.1374, "lr": 1.0945507982972134e-06, "epoch": 2.61439425402487, "percentage": 52.29, "elapsed_time": "2:18:35", "remaining_time": "2:06:27", "throughput": 8675.04, "total_tokens": 72134248} +{"current_steps": 107020, "total_steps": 204665, "loss": 0.0003, "lr": 1.0944659023944269e-06, "epoch": 2.614516404856717, "percentage": 52.29, "elapsed_time": "2:18:35", "remaining_time": "2:06:27", "throughput": 8675.1, "total_tokens": 72137832} +{"current_steps": 107025, "total_steps": 204665, "loss": 0.0468, "lr": 1.0943810058046591e-06, "epoch": 2.6146385556885643, "percentage": 52.29, "elapsed_time": "2:18:35", "remaining_time": "2:06:26", "throughput": 8675.13, "total_tokens": 72141096} +{"current_steps": 107030, "total_steps": 204665, "loss": 0.0003, "lr": 1.0942961085285275e-06, "epoch": 2.6147607065204115, "percentage": 52.3, "elapsed_time": "2:18:36", "remaining_time": "2:06:26", "throughput": 8675.17, "total_tokens": 72144488} +{"current_steps": 107035, "total_steps": 204665, "loss": 0.0328, "lr": 1.094211210566649e-06, "epoch": 2.6148828573522587, "percentage": 52.3, "elapsed_time": "2:18:36", "remaining_time": "2:06:25", "throughput": 8675.23, "total_tokens": 72147944} +{"current_steps": 107040, "total_steps": 204665, "loss": 0.0216, "lr": 1.0941263119196413e-06, "epoch": 2.615005008184106, "percentage": 52.3, "elapsed_time": "2:18:36", "remaining_time": "2:06:25", "throughput": 8675.29, "total_tokens": 72151528} +{"current_steps": 107045, "total_steps": 204665, "loss": 0.0005, "lr": 1.094041412588122e-06, "epoch": 2.615127159015953, "percentage": 52.3, "elapsed_time": "2:18:37", "remaining_time": "2:06:24", "throughput": 8675.33, "total_tokens": 72154984} +{"current_steps": 107050, "total_steps": 204665, "loss": 0.0319, "lr": 1.093956512572708e-06, "epoch": 2.6152493098478002, "percentage": 52.3, "elapsed_time": "2:18:37", "remaining_time": "2:06:24", "throughput": 8675.33, "total_tokens": 72157928} +{"current_steps": 107055, "total_steps": 204665, "loss": 0.102, "lr": 1.0938716118740167e-06, "epoch": 2.615371460679647, "percentage": 52.31, "elapsed_time": "2:18:37", "remaining_time": "2:06:24", "throughput": 8675.38, "total_tokens": 72161320} +{"current_steps": 107060, "total_steps": 204665, "loss": 0.0676, "lr": 1.0937867104926662e-06, "epoch": 2.6154936115114946, "percentage": 52.31, "elapsed_time": "2:18:38", "remaining_time": "2:06:23", "throughput": 8675.41, "total_tokens": 72164648} +{"current_steps": 107065, "total_steps": 204665, "loss": 0.1215, "lr": 1.0937018084292731e-06, "epoch": 2.6156157623433414, "percentage": 52.31, "elapsed_time": "2:18:38", "remaining_time": "2:06:23", "throughput": 8675.45, "total_tokens": 72168040} +{"current_steps": 107070, "total_steps": 204665, "loss": 0.0002, "lr": 1.0936169056844556e-06, "epoch": 2.615737913175189, "percentage": 52.31, "elapsed_time": "2:18:38", "remaining_time": "2:06:22", "throughput": 8675.5, "total_tokens": 72171432} +{"current_steps": 107075, "total_steps": 204665, "loss": 0.0514, "lr": 1.093532002258831e-06, "epoch": 2.6158600640070357, "percentage": 52.32, "elapsed_time": "2:18:39", "remaining_time": "2:06:22", "throughput": 8675.54, "total_tokens": 72174824} +{"current_steps": 107080, "total_steps": 204665, "loss": 0.0002, "lr": 1.0934470981530162e-06, "epoch": 2.615982214838883, "percentage": 52.32, "elapsed_time": "2:18:39", "remaining_time": "2:06:21", "throughput": 8675.58, "total_tokens": 72178280} +{"current_steps": 107085, "total_steps": 204665, "loss": 0.0679, "lr": 1.093362193367629e-06, "epoch": 2.61610436567073, "percentage": 52.32, "elapsed_time": "2:18:40", "remaining_time": "2:06:21", "throughput": 8675.62, "total_tokens": 72181608} +{"current_steps": 107090, "total_steps": 204665, "loss": 0.0029, "lr": 1.0932772879032868e-06, "epoch": 2.6162265165025773, "percentage": 52.32, "elapsed_time": "2:18:40", "remaining_time": "2:06:21", "throughput": 8675.69, "total_tokens": 72185320} +{"current_steps": 107095, "total_steps": 204665, "loss": 0.0907, "lr": 1.0931923817606068e-06, "epoch": 2.6163486673344245, "percentage": 52.33, "elapsed_time": "2:18:40", "remaining_time": "2:06:20", "throughput": 8675.72, "total_tokens": 72188520} +{"current_steps": 107100, "total_steps": 204665, "loss": 0.0006, "lr": 1.0931074749402072e-06, "epoch": 2.6164708181662717, "percentage": 52.33, "elapsed_time": "2:18:41", "remaining_time": "2:06:20", "throughput": 8675.77, "total_tokens": 72191976} +{"current_steps": 107105, "total_steps": 204665, "loss": 0.0831, "lr": 1.0930225674427047e-06, "epoch": 2.616592968998119, "percentage": 52.33, "elapsed_time": "2:18:41", "remaining_time": "2:06:19", "throughput": 8675.79, "total_tokens": 72195176} +{"current_steps": 107110, "total_steps": 204665, "loss": 0.0003, "lr": 1.0929376592687173e-06, "epoch": 2.616715119829966, "percentage": 52.33, "elapsed_time": "2:18:41", "remaining_time": "2:06:19", "throughput": 8675.81, "total_tokens": 72198312} +{"current_steps": 107115, "total_steps": 204665, "loss": 0.0003, "lr": 1.092852750418862e-06, "epoch": 2.6168372706618133, "percentage": 52.34, "elapsed_time": "2:18:42", "remaining_time": "2:06:19", "throughput": 8675.85, "total_tokens": 72201640} +{"current_steps": 107120, "total_steps": 204665, "loss": 0.0884, "lr": 1.092767840893757e-06, "epoch": 2.6169594214936605, "percentage": 52.34, "elapsed_time": "2:18:42", "remaining_time": "2:06:18", "throughput": 8675.89, "total_tokens": 72205032} +{"current_steps": 107125, "total_steps": 204665, "loss": 0.0003, "lr": 1.0926829306940186e-06, "epoch": 2.6170815723255076, "percentage": 52.34, "elapsed_time": "2:18:42", "remaining_time": "2:06:18", "throughput": 8675.92, "total_tokens": 72208296} +{"current_steps": 107130, "total_steps": 204665, "loss": 0.0576, "lr": 1.0925980198202655e-06, "epoch": 2.617203723157355, "percentage": 52.34, "elapsed_time": "2:18:43", "remaining_time": "2:06:17", "throughput": 8675.96, "total_tokens": 72211624} +{"current_steps": 107135, "total_steps": 204665, "loss": 0.0413, "lr": 1.0925131082731146e-06, "epoch": 2.617325873989202, "percentage": 52.35, "elapsed_time": "2:18:43", "remaining_time": "2:06:17", "throughput": 8675.98, "total_tokens": 72214760} +{"current_steps": 107140, "total_steps": 204665, "loss": 0.0006, "lr": 1.0924281960531834e-06, "epoch": 2.6174480248210488, "percentage": 52.35, "elapsed_time": "2:18:43", "remaining_time": "2:06:16", "throughput": 8676.01, "total_tokens": 72218088} +{"current_steps": 107145, "total_steps": 204665, "loss": 0.125, "lr": 1.0923432831610897e-06, "epoch": 2.6175701756528964, "percentage": 52.35, "elapsed_time": "2:18:44", "remaining_time": "2:06:16", "throughput": 8676.1, "total_tokens": 72221864} +{"current_steps": 107150, "total_steps": 204665, "loss": 0.0002, "lr": 1.0922583695974506e-06, "epoch": 2.617692326484743, "percentage": 52.35, "elapsed_time": "2:18:44", "remaining_time": "2:06:16", "throughput": 8676.15, "total_tokens": 72225384} +{"current_steps": 107155, "total_steps": 204665, "loss": 0.001, "lr": 1.0921734553628836e-06, "epoch": 2.617814477316591, "percentage": 52.36, "elapsed_time": "2:18:44", "remaining_time": "2:06:15", "throughput": 8676.19, "total_tokens": 72228712} +{"current_steps": 107160, "total_steps": 204665, "loss": 0.0004, "lr": 1.0920885404580066e-06, "epoch": 2.6179366281484375, "percentage": 52.36, "elapsed_time": "2:18:45", "remaining_time": "2:06:15", "throughput": 8676.24, "total_tokens": 72232168} +{"current_steps": 107165, "total_steps": 204665, "loss": 0.0667, "lr": 1.0920036248834373e-06, "epoch": 2.6180587789802847, "percentage": 52.36, "elapsed_time": "2:18:45", "remaining_time": "2:06:14", "throughput": 8676.24, "total_tokens": 72235112} +{"current_steps": 107170, "total_steps": 204665, "loss": 0.1277, "lr": 1.0919187086397928e-06, "epoch": 2.618180929812132, "percentage": 52.36, "elapsed_time": "2:18:45", "remaining_time": "2:06:14", "throughput": 8676.32, "total_tokens": 72238824} +{"current_steps": 107175, "total_steps": 204665, "loss": 0.0355, "lr": 1.0918337917276906e-06, "epoch": 2.618303080643979, "percentage": 52.37, "elapsed_time": "2:18:46", "remaining_time": "2:06:13", "throughput": 8676.34, "total_tokens": 72242024} +{"current_steps": 107180, "total_steps": 204665, "loss": 0.0037, "lr": 1.0917488741477483e-06, "epoch": 2.6184252314758263, "percentage": 52.37, "elapsed_time": "2:18:46", "remaining_time": "2:06:13", "throughput": 8676.35, "total_tokens": 72245032} +{"current_steps": 107185, "total_steps": 204665, "loss": 0.0374, "lr": 1.0916639559005837e-06, "epoch": 2.6185473823076735, "percentage": 52.37, "elapsed_time": "2:18:47", "remaining_time": "2:06:13", "throughput": 8676.38, "total_tokens": 72248360} +{"current_steps": 107190, "total_steps": 204665, "loss": 0.0883, "lr": 1.091579036986814e-06, "epoch": 2.6186695331395207, "percentage": 52.37, "elapsed_time": "2:18:47", "remaining_time": "2:06:12", "throughput": 8676.39, "total_tokens": 72251368} +{"current_steps": 107195, "total_steps": 204665, "loss": 0.165, "lr": 1.091494117407057e-06, "epoch": 2.618791683971368, "percentage": 52.38, "elapsed_time": "2:18:47", "remaining_time": "2:06:12", "throughput": 8676.4, "total_tokens": 72254440} +{"current_steps": 107200, "total_steps": 204665, "loss": 0.0005, "lr": 1.0914091971619299e-06, "epoch": 2.618913834803215, "percentage": 52.38, "elapsed_time": "2:18:48", "remaining_time": "2:06:11", "throughput": 8676.39, "total_tokens": 72257256} +{"current_steps": 107205, "total_steps": 204665, "loss": 0.065, "lr": 1.091324276252051e-06, "epoch": 2.6190359856350622, "percentage": 52.38, "elapsed_time": "2:18:48", "remaining_time": "2:06:11", "throughput": 8676.41, "total_tokens": 72260328} +{"current_steps": 107210, "total_steps": 204665, "loss": 0.0003, "lr": 1.091239354678037e-06, "epoch": 2.6191581364669094, "percentage": 52.38, "elapsed_time": "2:18:48", "remaining_time": "2:06:10", "throughput": 8676.47, "total_tokens": 72263912} +{"current_steps": 107215, "total_steps": 204665, "loss": 0.0407, "lr": 1.091154432440506e-06, "epoch": 2.6192802872987566, "percentage": 52.39, "elapsed_time": "2:18:49", "remaining_time": "2:06:10", "throughput": 8676.53, "total_tokens": 72267496} +{"current_steps": 107220, "total_steps": 204665, "loss": 0.1219, "lr": 1.0910695095400753e-06, "epoch": 2.619402438130604, "percentage": 52.39, "elapsed_time": "2:18:49", "remaining_time": "2:06:10", "throughput": 8676.55, "total_tokens": 72270696} +{"current_steps": 107225, "total_steps": 204665, "loss": 0.042, "lr": 1.0909845859773628e-06, "epoch": 2.619524588962451, "percentage": 52.39, "elapsed_time": "2:18:49", "remaining_time": "2:06:09", "throughput": 8676.56, "total_tokens": 72273768} +{"current_steps": 107230, "total_steps": 204665, "loss": 0.059, "lr": 1.0908996617529862e-06, "epoch": 2.619646739794298, "percentage": 52.39, "elapsed_time": "2:18:50", "remaining_time": "2:06:09", "throughput": 8676.6, "total_tokens": 72277096} +{"current_steps": 107235, "total_steps": 204665, "loss": 0.0013, "lr": 1.0908147368675626e-06, "epoch": 2.619768890626145, "percentage": 52.4, "elapsed_time": "2:18:50", "remaining_time": "2:06:08", "throughput": 8676.64, "total_tokens": 72280488} +{"current_steps": 107240, "total_steps": 204665, "loss": 0.0383, "lr": 1.09072981132171e-06, "epoch": 2.6198910414579926, "percentage": 52.4, "elapsed_time": "2:18:50", "remaining_time": "2:06:08", "throughput": 8676.71, "total_tokens": 72284136} +{"current_steps": 107245, "total_steps": 204665, "loss": 0.0349, "lr": 1.0906448851160453e-06, "epoch": 2.6200131922898393, "percentage": 52.4, "elapsed_time": "2:18:51", "remaining_time": "2:06:07", "throughput": 8676.77, "total_tokens": 72287720} +{"current_steps": 107250, "total_steps": 204665, "loss": 0.0361, "lr": 1.0905599582511866e-06, "epoch": 2.6201353431216865, "percentage": 52.4, "elapsed_time": "2:18:51", "remaining_time": "2:06:07", "throughput": 8676.82, "total_tokens": 72291176} +{"current_steps": 107255, "total_steps": 204665, "loss": 0.0229, "lr": 1.0904750307277519e-06, "epoch": 2.6202574939535337, "percentage": 52.41, "elapsed_time": "2:18:51", "remaining_time": "2:06:07", "throughput": 8676.9, "total_tokens": 72294888} +{"current_steps": 107260, "total_steps": 204665, "loss": 0.0008, "lr": 1.0903901025463581e-06, "epoch": 2.620379644785381, "percentage": 52.41, "elapsed_time": "2:18:52", "remaining_time": "2:06:06", "throughput": 8676.95, "total_tokens": 72298408} +{"current_steps": 107265, "total_steps": 204665, "loss": 0.0814, "lr": 1.0903051737076236e-06, "epoch": 2.620501795617228, "percentage": 52.41, "elapsed_time": "2:18:52", "remaining_time": "2:06:06", "throughput": 8677.09, "total_tokens": 72302824} +{"current_steps": 107270, "total_steps": 204665, "loss": 0.03, "lr": 1.0902202442121654e-06, "epoch": 2.6206239464490753, "percentage": 52.41, "elapsed_time": "2:18:52", "remaining_time": "2:06:05", "throughput": 8677.15, "total_tokens": 72306344} +{"current_steps": 107275, "total_steps": 204665, "loss": 0.0003, "lr": 1.0901353140606013e-06, "epoch": 2.6207460972809224, "percentage": 52.41, "elapsed_time": "2:18:53", "remaining_time": "2:06:05", "throughput": 8677.14, "total_tokens": 72309288} +{"current_steps": 107280, "total_steps": 204665, "loss": 0.0002, "lr": 1.090050383253549e-06, "epoch": 2.6208682481127696, "percentage": 52.42, "elapsed_time": "2:18:53", "remaining_time": "2:06:04", "throughput": 8677.19, "total_tokens": 72312744} +{"current_steps": 107285, "total_steps": 204665, "loss": 0.1156, "lr": 1.089965451791626e-06, "epoch": 2.620990398944617, "percentage": 52.42, "elapsed_time": "2:18:54", "remaining_time": "2:06:04", "throughput": 8677.22, "total_tokens": 72315944} +{"current_steps": 107290, "total_steps": 204665, "loss": 0.0001, "lr": 1.0898805196754502e-06, "epoch": 2.621112549776464, "percentage": 52.42, "elapsed_time": "2:18:54", "remaining_time": "2:06:04", "throughput": 8677.26, "total_tokens": 72319336} +{"current_steps": 107295, "total_steps": 204665, "loss": 0.0004, "lr": 1.089795586905639e-06, "epoch": 2.621234700608311, "percentage": 52.42, "elapsed_time": "2:18:54", "remaining_time": "2:06:03", "throughput": 8677.35, "total_tokens": 72323240} +{"current_steps": 107300, "total_steps": 204665, "loss": 0.0003, "lr": 1.08971065348281e-06, "epoch": 2.6213568514401584, "percentage": 52.43, "elapsed_time": "2:18:55", "remaining_time": "2:06:03", "throughput": 8677.36, "total_tokens": 72326312} +{"current_steps": 107305, "total_steps": 204665, "loss": 0.0003, "lr": 1.0896257194075812e-06, "epoch": 2.6214790022720056, "percentage": 52.43, "elapsed_time": "2:18:55", "remaining_time": "2:06:02", "throughput": 8677.39, "total_tokens": 72329512} +{"current_steps": 107310, "total_steps": 204665, "loss": 0.0728, "lr": 1.0895407846805698e-06, "epoch": 2.6216011531038528, "percentage": 52.43, "elapsed_time": "2:18:55", "remaining_time": "2:06:02", "throughput": 8677.42, "total_tokens": 72332776} +{"current_steps": 107315, "total_steps": 204665, "loss": 0.0272, "lr": 1.0894558493023937e-06, "epoch": 2.6217233039357, "percentage": 52.43, "elapsed_time": "2:18:56", "remaining_time": "2:06:02", "throughput": 8677.43, "total_tokens": 72335848} +{"current_steps": 107320, "total_steps": 204665, "loss": 0.1086, "lr": 1.089370913273671e-06, "epoch": 2.6218454547675467, "percentage": 52.44, "elapsed_time": "2:18:56", "remaining_time": "2:06:01", "throughput": 8677.49, "total_tokens": 72339432} +{"current_steps": 107325, "total_steps": 204665, "loss": 0.0533, "lr": 1.0892859765950187e-06, "epoch": 2.6219676055993943, "percentage": 52.44, "elapsed_time": "2:18:56", "remaining_time": "2:06:01", "throughput": 8677.5, "total_tokens": 72342440} +{"current_steps": 107330, "total_steps": 204665, "loss": 0.001, "lr": 1.089201039267055e-06, "epoch": 2.622089756431241, "percentage": 52.44, "elapsed_time": "2:18:57", "remaining_time": "2:06:00", "throughput": 8677.5, "total_tokens": 72345384} +{"current_steps": 107335, "total_steps": 204665, "loss": 0.0707, "lr": 1.0891161012903971e-06, "epoch": 2.6222119072630887, "percentage": 52.44, "elapsed_time": "2:18:57", "remaining_time": "2:06:00", "throughput": 8677.5, "total_tokens": 72348264} +{"current_steps": 107340, "total_steps": 204665, "loss": 0.0004, "lr": 1.0890311626656631e-06, "epoch": 2.6223340580949355, "percentage": 52.45, "elapsed_time": "2:18:57", "remaining_time": "2:05:59", "throughput": 8677.51, "total_tokens": 72351400} +{"current_steps": 107345, "total_steps": 204665, "loss": 0.0444, "lr": 1.0889462233934704e-06, "epoch": 2.6224562089267827, "percentage": 52.45, "elapsed_time": "2:18:58", "remaining_time": "2:05:59", "throughput": 8677.56, "total_tokens": 72354792} +{"current_steps": 107350, "total_steps": 204665, "loss": 0.0006, "lr": 1.088861283474437e-06, "epoch": 2.62257835975863, "percentage": 52.45, "elapsed_time": "2:18:58", "remaining_time": "2:05:59", "throughput": 8677.68, "total_tokens": 72359016} +{"current_steps": 107355, "total_steps": 204665, "loss": 0.0004, "lr": 1.0887763429091804e-06, "epoch": 2.622700510590477, "percentage": 52.45, "elapsed_time": "2:18:58", "remaining_time": "2:05:58", "throughput": 8677.68, "total_tokens": 72361960} +{"current_steps": 107360, "total_steps": 204665, "loss": 0.0087, "lr": 1.0886914016983182e-06, "epoch": 2.6228226614223242, "percentage": 52.46, "elapsed_time": "2:18:59", "remaining_time": "2:05:58", "throughput": 8677.77, "total_tokens": 72365864} +{"current_steps": 107365, "total_steps": 204665, "loss": 0.0458, "lr": 1.0886064598424684e-06, "epoch": 2.6229448122541714, "percentage": 52.46, "elapsed_time": "2:18:59", "remaining_time": "2:05:57", "throughput": 8677.81, "total_tokens": 72369192} +{"current_steps": 107370, "total_steps": 204665, "loss": 0.038, "lr": 1.0885215173422486e-06, "epoch": 2.6230669630860186, "percentage": 52.46, "elapsed_time": "2:18:59", "remaining_time": "2:05:57", "throughput": 8677.82, "total_tokens": 72372264} +{"current_steps": 107375, "total_steps": 204665, "loss": 0.0536, "lr": 1.0884365741982764e-06, "epoch": 2.623189113917866, "percentage": 52.46, "elapsed_time": "2:19:00", "remaining_time": "2:05:56", "throughput": 8677.85, "total_tokens": 72375528} +{"current_steps": 107380, "total_steps": 204665, "loss": 0.0377, "lr": 1.0883516304111698e-06, "epoch": 2.623311264749713, "percentage": 52.47, "elapsed_time": "2:19:00", "remaining_time": "2:05:56", "throughput": 8677.88, "total_tokens": 72378792} +{"current_steps": 107385, "total_steps": 204665, "loss": 0.0281, "lr": 1.0882666859815466e-06, "epoch": 2.62343341558156, "percentage": 52.47, "elapsed_time": "2:19:00", "remaining_time": "2:05:56", "throughput": 8677.94, "total_tokens": 72382312} +{"current_steps": 107390, "total_steps": 204665, "loss": 0.0007, "lr": 1.088181740910024e-06, "epoch": 2.6235555664134074, "percentage": 52.47, "elapsed_time": "2:19:01", "remaining_time": "2:05:55", "throughput": 8677.97, "total_tokens": 72385576} +{"current_steps": 107395, "total_steps": 204665, "loss": 0.0006, "lr": 1.0880967951972201e-06, "epoch": 2.6236777172452546, "percentage": 52.47, "elapsed_time": "2:19:01", "remaining_time": "2:05:55", "throughput": 8678.01, "total_tokens": 72388968} +{"current_steps": 107400, "total_steps": 204665, "loss": 0.0001, "lr": 1.0880118488437526e-06, "epoch": 2.6237998680771017, "percentage": 52.48, "elapsed_time": "2:19:02", "remaining_time": "2:05:54", "throughput": 8678.04, "total_tokens": 72392232} +{"current_steps": 107405, "total_steps": 204665, "loss": 0.0431, "lr": 1.087926901850239e-06, "epoch": 2.6239220189089485, "percentage": 52.48, "elapsed_time": "2:19:02", "remaining_time": "2:05:54", "throughput": 8678.05, "total_tokens": 72395304} +{"current_steps": 107410, "total_steps": 204665, "loss": 0.0003, "lr": 1.0878419542172975e-06, "epoch": 2.624044169740796, "percentage": 52.48, "elapsed_time": "2:19:02", "remaining_time": "2:05:53", "throughput": 8678.11, "total_tokens": 72398824} +{"current_steps": 107415, "total_steps": 204665, "loss": 0.0326, "lr": 1.087757005945546e-06, "epoch": 2.624166320572643, "percentage": 52.48, "elapsed_time": "2:19:03", "remaining_time": "2:05:53", "throughput": 8678.11, "total_tokens": 72401768} +{"current_steps": 107420, "total_steps": 204665, "loss": 0.0902, "lr": 1.0876720570356018e-06, "epoch": 2.6242884714044905, "percentage": 52.49, "elapsed_time": "2:19:03", "remaining_time": "2:05:53", "throughput": 8678.15, "total_tokens": 72405096} +{"current_steps": 107425, "total_steps": 204665, "loss": 0.0002, "lr": 1.0875871074880827e-06, "epoch": 2.6244106222363373, "percentage": 52.49, "elapsed_time": "2:19:03", "remaining_time": "2:05:52", "throughput": 8678.16, "total_tokens": 72408168} +{"current_steps": 107430, "total_steps": 204665, "loss": 0.0002, "lr": 1.0875021573036067e-06, "epoch": 2.6245327730681844, "percentage": 52.49, "elapsed_time": "2:19:04", "remaining_time": "2:05:52", "throughput": 8678.19, "total_tokens": 72411368} +{"current_steps": 107435, "total_steps": 204665, "loss": 0.0367, "lr": 1.0874172064827913e-06, "epoch": 2.6246549239000316, "percentage": 52.49, "elapsed_time": "2:19:04", "remaining_time": "2:05:51", "throughput": 8678.26, "total_tokens": 72415080} +{"current_steps": 107440, "total_steps": 204665, "loss": 0.104, "lr": 1.0873322550262548e-06, "epoch": 2.624777074731879, "percentage": 52.5, "elapsed_time": "2:19:04", "remaining_time": "2:05:51", "throughput": 8678.3, "total_tokens": 72418408} +{"current_steps": 107445, "total_steps": 204665, "loss": 0.0003, "lr": 1.0872473029346143e-06, "epoch": 2.624899225563726, "percentage": 52.5, "elapsed_time": "2:19:05", "remaining_time": "2:05:50", "throughput": 8678.37, "total_tokens": 72422184} +{"current_steps": 107450, "total_steps": 204665, "loss": 0.0004, "lr": 1.0871623502084882e-06, "epoch": 2.625021376395573, "percentage": 52.5, "elapsed_time": "2:19:05", "remaining_time": "2:05:50", "throughput": 8678.44, "total_tokens": 72425768} +{"current_steps": 107455, "total_steps": 204665, "loss": 0.0001, "lr": 1.087077396848494e-06, "epoch": 2.6251435272274204, "percentage": 52.5, "elapsed_time": "2:19:05", "remaining_time": "2:05:50", "throughput": 8678.48, "total_tokens": 72429160} +{"current_steps": 107460, "total_steps": 204665, "loss": 0.1302, "lr": 1.0869924428552492e-06, "epoch": 2.6252656780592676, "percentage": 52.51, "elapsed_time": "2:19:06", "remaining_time": "2:05:49", "throughput": 8678.51, "total_tokens": 72432424} +{"current_steps": 107465, "total_steps": 204665, "loss": 0.0571, "lr": 1.0869074882293723e-06, "epoch": 2.6253878288911148, "percentage": 52.51, "elapsed_time": "2:19:06", "remaining_time": "2:05:49", "throughput": 8678.51, "total_tokens": 72435432} +{"current_steps": 107470, "total_steps": 204665, "loss": 0.0333, "lr": 1.0868225329714806e-06, "epoch": 2.625509979722962, "percentage": 52.51, "elapsed_time": "2:19:06", "remaining_time": "2:05:48", "throughput": 8678.55, "total_tokens": 72438824} +{"current_steps": 107475, "total_steps": 204665, "loss": 0.0397, "lr": 1.0867375770821922e-06, "epoch": 2.625632130554809, "percentage": 52.51, "elapsed_time": "2:19:07", "remaining_time": "2:05:48", "throughput": 8678.62, "total_tokens": 72442472} +{"current_steps": 107480, "total_steps": 204665, "loss": 0.0001, "lr": 1.086652620562125e-06, "epoch": 2.6257542813866563, "percentage": 52.52, "elapsed_time": "2:19:07", "remaining_time": "2:05:48", "throughput": 8678.66, "total_tokens": 72445864} +{"current_steps": 107485, "total_steps": 204665, "loss": 0.0005, "lr": 1.0865676634118963e-06, "epoch": 2.6258764322185035, "percentage": 52.52, "elapsed_time": "2:19:07", "remaining_time": "2:05:47", "throughput": 8678.71, "total_tokens": 72449256} +{"current_steps": 107490, "total_steps": 204665, "loss": 0.068, "lr": 1.0864827056321243e-06, "epoch": 2.6259985830503507, "percentage": 52.52, "elapsed_time": "2:19:08", "remaining_time": "2:05:47", "throughput": 8678.72, "total_tokens": 72452328} +{"current_steps": 107495, "total_steps": 204665, "loss": 0.0141, "lr": 1.0863977472234275e-06, "epoch": 2.626120733882198, "percentage": 52.52, "elapsed_time": "2:19:08", "remaining_time": "2:05:46", "throughput": 8678.76, "total_tokens": 72455720} +{"current_steps": 107500, "total_steps": 204665, "loss": 0.0773, "lr": 1.086312788186422e-06, "epoch": 2.6262428847140447, "percentage": 52.52, "elapsed_time": "2:19:08", "remaining_time": "2:05:46", "throughput": 8678.78, "total_tokens": 72458856} +{"current_steps": 107505, "total_steps": 204665, "loss": 0.0408, "lr": 1.0862278285217272e-06, "epoch": 2.6263650355458923, "percentage": 52.53, "elapsed_time": "2:19:09", "remaining_time": "2:05:45", "throughput": 8678.82, "total_tokens": 72462184} +{"current_steps": 107510, "total_steps": 204665, "loss": 0.0002, "lr": 1.0861428682299605e-06, "epoch": 2.626487186377739, "percentage": 52.53, "elapsed_time": "2:19:09", "remaining_time": "2:05:45", "throughput": 8678.86, "total_tokens": 72465576} +{"current_steps": 107515, "total_steps": 204665, "loss": 0.0005, "lr": 1.0860579073117394e-06, "epoch": 2.6266093372095867, "percentage": 52.53, "elapsed_time": "2:19:10", "remaining_time": "2:05:45", "throughput": 8678.92, "total_tokens": 72469096} +{"current_steps": 107520, "total_steps": 204665, "loss": 0.0541, "lr": 1.0859729457676823e-06, "epoch": 2.6267314880414334, "percentage": 52.53, "elapsed_time": "2:19:10", "remaining_time": "2:05:44", "throughput": 8679.01, "total_tokens": 72473064} +{"current_steps": 107525, "total_steps": 204665, "loss": 0.0007, "lr": 1.0858879835984067e-06, "epoch": 2.6268536388732806, "percentage": 52.54, "elapsed_time": "2:19:10", "remaining_time": "2:05:44", "throughput": 8679.05, "total_tokens": 72476392} +{"current_steps": 107530, "total_steps": 204665, "loss": 0.0004, "lr": 1.0858030208045305e-06, "epoch": 2.626975789705128, "percentage": 52.54, "elapsed_time": "2:19:11", "remaining_time": "2:05:43", "throughput": 8679.09, "total_tokens": 72479784} +{"current_steps": 107535, "total_steps": 204665, "loss": 0.0005, "lr": 1.0857180573866718e-06, "epoch": 2.627097940536975, "percentage": 52.54, "elapsed_time": "2:19:11", "remaining_time": "2:05:43", "throughput": 8679.11, "total_tokens": 72482984} +{"current_steps": 107540, "total_steps": 204665, "loss": 0.0002, "lr": 1.0856330933454485e-06, "epoch": 2.627220091368822, "percentage": 52.54, "elapsed_time": "2:19:11", "remaining_time": "2:05:42", "throughput": 8679.12, "total_tokens": 72486056} +{"current_steps": 107545, "total_steps": 204665, "loss": 0.0002, "lr": 1.0855481286814781e-06, "epoch": 2.6273422422006694, "percentage": 52.55, "elapsed_time": "2:19:12", "remaining_time": "2:05:42", "throughput": 8679.16, "total_tokens": 72489320} +{"current_steps": 107550, "total_steps": 204665, "loss": 0.0526, "lr": 1.0854631633953788e-06, "epoch": 2.6274643930325166, "percentage": 52.55, "elapsed_time": "2:19:12", "remaining_time": "2:05:42", "throughput": 8679.17, "total_tokens": 72492392} +{"current_steps": 107555, "total_steps": 204665, "loss": 0.0002, "lr": 1.0853781974877682e-06, "epoch": 2.6275865438643637, "percentage": 52.55, "elapsed_time": "2:19:12", "remaining_time": "2:05:41", "throughput": 8679.2, "total_tokens": 72495720} +{"current_steps": 107560, "total_steps": 204665, "loss": 0.0, "lr": 1.0852932309592644e-06, "epoch": 2.627708694696211, "percentage": 52.55, "elapsed_time": "2:19:13", "remaining_time": "2:05:41", "throughput": 8679.22, "total_tokens": 72498792} +{"current_steps": 107565, "total_steps": 204665, "loss": 0.0453, "lr": 1.0852082638104854e-06, "epoch": 2.627830845528058, "percentage": 52.56, "elapsed_time": "2:19:13", "remaining_time": "2:05:40", "throughput": 8679.24, "total_tokens": 72501928} +{"current_steps": 107570, "total_steps": 204665, "loss": 0.0501, "lr": 1.0851232960420488e-06, "epoch": 2.6279529963599053, "percentage": 52.56, "elapsed_time": "2:19:13", "remaining_time": "2:05:40", "throughput": 8679.29, "total_tokens": 72505384} +{"current_steps": 107575, "total_steps": 204665, "loss": 0.0009, "lr": 1.085038327654573e-06, "epoch": 2.6280751471917525, "percentage": 52.56, "elapsed_time": "2:19:14", "remaining_time": "2:05:39", "throughput": 8679.29, "total_tokens": 72508328} +{"current_steps": 107580, "total_steps": 204665, "loss": 0.0004, "lr": 1.0849533586486754e-06, "epoch": 2.6281972980235997, "percentage": 52.56, "elapsed_time": "2:19:14", "remaining_time": "2:05:39", "throughput": 8679.31, "total_tokens": 72511528} +{"current_steps": 107585, "total_steps": 204665, "loss": 0.0138, "lr": 1.0848683890249743e-06, "epoch": 2.6283194488554464, "percentage": 52.57, "elapsed_time": "2:19:14", "remaining_time": "2:05:39", "throughput": 8679.33, "total_tokens": 72514664} +{"current_steps": 107590, "total_steps": 204665, "loss": 0.0008, "lr": 1.0847834187840873e-06, "epoch": 2.628441599687294, "percentage": 52.57, "elapsed_time": "2:19:15", "remaining_time": "2:05:38", "throughput": 8679.39, "total_tokens": 72518312} +{"current_steps": 107595, "total_steps": 204665, "loss": 0.0001, "lr": 1.0846984479266326e-06, "epoch": 2.628563750519141, "percentage": 52.57, "elapsed_time": "2:19:15", "remaining_time": "2:05:38", "throughput": 8679.43, "total_tokens": 72521640} +{"current_steps": 107600, "total_steps": 204665, "loss": 0.0467, "lr": 1.084613476453228e-06, "epoch": 2.6286859013509885, "percentage": 52.57, "elapsed_time": "2:19:15", "remaining_time": "2:05:37", "throughput": 8679.48, "total_tokens": 72525096} +{"current_steps": 107605, "total_steps": 204665, "loss": 0.0555, "lr": 1.0845285043644914e-06, "epoch": 2.628808052182835, "percentage": 52.58, "elapsed_time": "2:19:16", "remaining_time": "2:05:37", "throughput": 8679.62, "total_tokens": 72529512} +{"current_steps": 107610, "total_steps": 204665, "loss": 0.0999, "lr": 1.0844435316610408e-06, "epoch": 2.6289302030146824, "percentage": 52.58, "elapsed_time": "2:19:16", "remaining_time": "2:05:36", "throughput": 8679.69, "total_tokens": 72533224} +{"current_steps": 107615, "total_steps": 204665, "loss": 0.0004, "lr": 1.084358558343494e-06, "epoch": 2.6290523538465296, "percentage": 52.58, "elapsed_time": "2:19:17", "remaining_time": "2:05:36", "throughput": 8679.72, "total_tokens": 72536552} +{"current_steps": 107620, "total_steps": 204665, "loss": 0.0028, "lr": 1.084273584412469e-06, "epoch": 2.6291745046783768, "percentage": 52.58, "elapsed_time": "2:19:17", "remaining_time": "2:05:36", "throughput": 8679.8, "total_tokens": 72540264} +{"current_steps": 107625, "total_steps": 204665, "loss": 0.0367, "lr": 1.084188609868584e-06, "epoch": 2.629296655510224, "percentage": 52.59, "elapsed_time": "2:19:17", "remaining_time": "2:05:35", "throughput": 8679.82, "total_tokens": 72543464} +{"current_steps": 107630, "total_steps": 204665, "loss": 0.0459, "lr": 1.0841036347124565e-06, "epoch": 2.629418806342071, "percentage": 52.59, "elapsed_time": "2:19:18", "remaining_time": "2:05:35", "throughput": 8679.82, "total_tokens": 72546408} +{"current_steps": 107635, "total_steps": 204665, "loss": 0.0002, "lr": 1.0840186589447052e-06, "epoch": 2.6295409571739183, "percentage": 52.59, "elapsed_time": "2:19:18", "remaining_time": "2:05:34", "throughput": 8679.93, "total_tokens": 72550568} +{"current_steps": 107640, "total_steps": 204665, "loss": 0.0001, "lr": 1.0839336825659473e-06, "epoch": 2.6296631080057655, "percentage": 52.59, "elapsed_time": "2:19:18", "remaining_time": "2:05:34", "throughput": 8679.97, "total_tokens": 72553896} +{"current_steps": 107645, "total_steps": 204665, "loss": 0.0003, "lr": 1.0838487055768013e-06, "epoch": 2.6297852588376127, "percentage": 52.6, "elapsed_time": "2:19:19", "remaining_time": "2:05:34", "throughput": 8680.03, "total_tokens": 72557480} +{"current_steps": 107650, "total_steps": 204665, "loss": 0.1545, "lr": 1.083763727977885e-06, "epoch": 2.62990740966946, "percentage": 52.6, "elapsed_time": "2:19:19", "remaining_time": "2:05:33", "throughput": 8680.07, "total_tokens": 72560808} +{"current_steps": 107655, "total_steps": 204665, "loss": 0.1782, "lr": 1.0836787497698161e-06, "epoch": 2.630029560501307, "percentage": 52.6, "elapsed_time": "2:19:19", "remaining_time": "2:05:33", "throughput": 8680.12, "total_tokens": 72564328} +{"current_steps": 107660, "total_steps": 204665, "loss": 0.0003, "lr": 1.0835937709532131e-06, "epoch": 2.6301517113331543, "percentage": 52.6, "elapsed_time": "2:19:20", "remaining_time": "2:05:32", "throughput": 8680.12, "total_tokens": 72567272} +{"current_steps": 107665, "total_steps": 204665, "loss": 0.0573, "lr": 1.0835087915286933e-06, "epoch": 2.6302738621650015, "percentage": 52.61, "elapsed_time": "2:19:20", "remaining_time": "2:05:32", "throughput": 8680.15, "total_tokens": 72570472} +{"current_steps": 107670, "total_steps": 204665, "loss": 0.0001, "lr": 1.0834238114968754e-06, "epoch": 2.6303960129968487, "percentage": 52.61, "elapsed_time": "2:19:20", "remaining_time": "2:05:31", "throughput": 8680.17, "total_tokens": 72573672} +{"current_steps": 107675, "total_steps": 204665, "loss": 0.1061, "lr": 1.0833388308583772e-06, "epoch": 2.630518163828696, "percentage": 52.61, "elapsed_time": "2:19:21", "remaining_time": "2:05:31", "throughput": 8680.17, "total_tokens": 72576616} +{"current_steps": 107680, "total_steps": 204665, "loss": 0.0239, "lr": 1.0832538496138165e-06, "epoch": 2.6306403146605426, "percentage": 52.61, "elapsed_time": "2:19:21", "remaining_time": "2:05:31", "throughput": 8680.25, "total_tokens": 72580392} +{"current_steps": 107685, "total_steps": 204665, "loss": 0.0002, "lr": 1.0831688677638112e-06, "epoch": 2.6307624654923902, "percentage": 52.62, "elapsed_time": "2:19:21", "remaining_time": "2:05:30", "throughput": 8680.26, "total_tokens": 72583464} +{"current_steps": 107690, "total_steps": 204665, "loss": 0.0003, "lr": 1.0830838853089796e-06, "epoch": 2.630884616324237, "percentage": 52.62, "elapsed_time": "2:19:22", "remaining_time": "2:05:30", "throughput": 8680.32, "total_tokens": 72586984} +{"current_steps": 107695, "total_steps": 204665, "loss": 0.0636, "lr": 1.08299890224994e-06, "epoch": 2.631006767156084, "percentage": 52.62, "elapsed_time": "2:19:22", "remaining_time": "2:05:29", "throughput": 8680.29, "total_tokens": 72589672} +{"current_steps": 107700, "total_steps": 204665, "loss": 0.0014, "lr": 1.0829139185873097e-06, "epoch": 2.6311289179879314, "percentage": 52.62, "elapsed_time": "2:19:22", "remaining_time": "2:05:29", "throughput": 8680.36, "total_tokens": 72593320} +{"current_steps": 107705, "total_steps": 204665, "loss": 0.0544, "lr": 1.082828934321707e-06, "epoch": 2.6312510688197785, "percentage": 52.63, "elapsed_time": "2:19:23", "remaining_time": "2:05:28", "throughput": 8680.42, "total_tokens": 72596904} +{"current_steps": 107710, "total_steps": 204665, "loss": 0.107, "lr": 1.0827439494537506e-06, "epoch": 2.6313732196516257, "percentage": 52.63, "elapsed_time": "2:19:23", "remaining_time": "2:05:28", "throughput": 8680.46, "total_tokens": 72600360} +{"current_steps": 107715, "total_steps": 204665, "loss": 0.0938, "lr": 1.0826589639840572e-06, "epoch": 2.631495370483473, "percentage": 52.63, "elapsed_time": "2:19:24", "remaining_time": "2:05:28", "throughput": 8680.51, "total_tokens": 72603816} +{"current_steps": 107720, "total_steps": 204665, "loss": 0.0401, "lr": 1.082573977913246e-06, "epoch": 2.63161752131532, "percentage": 52.63, "elapsed_time": "2:19:24", "remaining_time": "2:05:27", "throughput": 8680.58, "total_tokens": 72607400} +{"current_steps": 107725, "total_steps": 204665, "loss": 0.0001, "lr": 1.0824889912419344e-06, "epoch": 2.6317396721471673, "percentage": 52.63, "elapsed_time": "2:19:24", "remaining_time": "2:05:27", "throughput": 8680.6, "total_tokens": 72610600} +{"current_steps": 107730, "total_steps": 204665, "loss": 0.007, "lr": 1.0824040039707409e-06, "epoch": 2.6318618229790145, "percentage": 52.64, "elapsed_time": "2:19:25", "remaining_time": "2:05:26", "throughput": 8680.65, "total_tokens": 72614120} +{"current_steps": 107735, "total_steps": 204665, "loss": 0.1447, "lr": 1.0823190161002834e-06, "epoch": 2.6319839738108617, "percentage": 52.64, "elapsed_time": "2:19:25", "remaining_time": "2:05:26", "throughput": 8680.67, "total_tokens": 72617192} +{"current_steps": 107740, "total_steps": 204665, "loss": 0.0502, "lr": 1.0822340276311794e-06, "epoch": 2.632106124642709, "percentage": 52.64, "elapsed_time": "2:19:25", "remaining_time": "2:05:25", "throughput": 8680.74, "total_tokens": 72620968} +{"current_steps": 107745, "total_steps": 204665, "loss": 0.0001, "lr": 1.082149038564048e-06, "epoch": 2.632228275474556, "percentage": 52.64, "elapsed_time": "2:19:26", "remaining_time": "2:05:25", "throughput": 8680.8, "total_tokens": 72624488} +{"current_steps": 107750, "total_steps": 204665, "loss": 0.0002, "lr": 1.0820640488995062e-06, "epoch": 2.6323504263064033, "percentage": 52.65, "elapsed_time": "2:19:26", "remaining_time": "2:05:25", "throughput": 8680.83, "total_tokens": 72627816} +{"current_steps": 107755, "total_steps": 204665, "loss": 0.0009, "lr": 1.0819790586381729e-06, "epoch": 2.6324725771382504, "percentage": 52.65, "elapsed_time": "2:19:26", "remaining_time": "2:05:24", "throughput": 8680.86, "total_tokens": 72631080} +{"current_steps": 107760, "total_steps": 204665, "loss": 0.0577, "lr": 1.0818940677806657e-06, "epoch": 2.6325947279700976, "percentage": 52.65, "elapsed_time": "2:19:27", "remaining_time": "2:05:24", "throughput": 8680.92, "total_tokens": 72634664} +{"current_steps": 107765, "total_steps": 204665, "loss": 0.0003, "lr": 1.081809076327603e-06, "epoch": 2.6327168788019444, "percentage": 52.65, "elapsed_time": "2:19:27", "remaining_time": "2:05:23", "throughput": 8680.99, "total_tokens": 72638376} +{"current_steps": 107770, "total_steps": 204665, "loss": 0.0004, "lr": 1.0817240842796025e-06, "epoch": 2.632839029633792, "percentage": 52.66, "elapsed_time": "2:19:27", "remaining_time": "2:05:23", "throughput": 8681.04, "total_tokens": 72641832} +{"current_steps": 107775, "total_steps": 204665, "loss": 0.0004, "lr": 1.0816390916372824e-06, "epoch": 2.6329611804656388, "percentage": 52.66, "elapsed_time": "2:19:28", "remaining_time": "2:05:23", "throughput": 8681.07, "total_tokens": 72645096} +{"current_steps": 107780, "total_steps": 204665, "loss": 0.0003, "lr": 1.0815540984012608e-06, "epoch": 2.6330833312974864, "percentage": 52.66, "elapsed_time": "2:19:28", "remaining_time": "2:05:22", "throughput": 8681.28, "total_tokens": 72650280} +{"current_steps": 107785, "total_steps": 204665, "loss": 0.0002, "lr": 1.081469104572156e-06, "epoch": 2.633205482129333, "percentage": 52.66, "elapsed_time": "2:19:28", "remaining_time": "2:05:22", "throughput": 8681.29, "total_tokens": 72653352} +{"current_steps": 107790, "total_steps": 204665, "loss": 0.0716, "lr": 1.081384110150586e-06, "epoch": 2.6333276329611803, "percentage": 52.67, "elapsed_time": "2:19:29", "remaining_time": "2:05:21", "throughput": 8681.31, "total_tokens": 72656488} +{"current_steps": 107795, "total_steps": 204665, "loss": 0.0003, "lr": 1.0812991151371691e-06, "epoch": 2.6334497837930275, "percentage": 52.67, "elapsed_time": "2:19:29", "remaining_time": "2:05:21", "throughput": 8681.34, "total_tokens": 72659752} +{"current_steps": 107800, "total_steps": 204665, "loss": 0.1924, "lr": 1.0812141195325228e-06, "epoch": 2.6335719346248747, "percentage": 52.67, "elapsed_time": "2:19:30", "remaining_time": "2:05:20", "throughput": 8681.39, "total_tokens": 72663208} +{"current_steps": 107805, "total_steps": 204665, "loss": 0.0001, "lr": 1.0811291233372659e-06, "epoch": 2.633694085456722, "percentage": 52.67, "elapsed_time": "2:19:30", "remaining_time": "2:05:20", "throughput": 8681.42, "total_tokens": 72666472} +{"current_steps": 107810, "total_steps": 204665, "loss": 0.0924, "lr": 1.081044126552016e-06, "epoch": 2.633816236288569, "percentage": 52.68, "elapsed_time": "2:19:30", "remaining_time": "2:05:20", "throughput": 8681.45, "total_tokens": 72669800} +{"current_steps": 107815, "total_steps": 204665, "loss": 0.0457, "lr": 1.0809591291773913e-06, "epoch": 2.6339383871204163, "percentage": 52.68, "elapsed_time": "2:19:31", "remaining_time": "2:05:19", "throughput": 8681.48, "total_tokens": 72673000} +{"current_steps": 107820, "total_steps": 204665, "loss": 0.0397, "lr": 1.0808741312140103e-06, "epoch": 2.6340605379522635, "percentage": 52.68, "elapsed_time": "2:19:31", "remaining_time": "2:05:19", "throughput": 8681.54, "total_tokens": 72676584} +{"current_steps": 107825, "total_steps": 204665, "loss": 0.0004, "lr": 1.0807891326624906e-06, "epoch": 2.6341826887841107, "percentage": 52.68, "elapsed_time": "2:19:31", "remaining_time": "2:05:18", "throughput": 8681.67, "total_tokens": 72680936} +{"current_steps": 107830, "total_steps": 204665, "loss": 0.0002, "lr": 1.0807041335234508e-06, "epoch": 2.634304839615958, "percentage": 52.69, "elapsed_time": "2:19:32", "remaining_time": "2:05:18", "throughput": 8681.71, "total_tokens": 72684264} +{"current_steps": 107835, "total_steps": 204665, "loss": 0.0007, "lr": 1.0806191337975085e-06, "epoch": 2.634426990447805, "percentage": 52.69, "elapsed_time": "2:19:32", "remaining_time": "2:05:18", "throughput": 8681.76, "total_tokens": 72687720} +{"current_steps": 107840, "total_steps": 204665, "loss": 0.0407, "lr": 1.0805341334852824e-06, "epoch": 2.6345491412796522, "percentage": 52.69, "elapsed_time": "2:19:32", "remaining_time": "2:05:17", "throughput": 8681.81, "total_tokens": 72691240} +{"current_steps": 107845, "total_steps": 204665, "loss": 0.1027, "lr": 1.0804491325873904e-06, "epoch": 2.6346712921114994, "percentage": 52.69, "elapsed_time": "2:19:33", "remaining_time": "2:05:17", "throughput": 8681.83, "total_tokens": 72694440} +{"current_steps": 107850, "total_steps": 204665, "loss": 0.0001, "lr": 1.0803641311044507e-06, "epoch": 2.6347934429433466, "percentage": 52.7, "elapsed_time": "2:19:33", "remaining_time": "2:05:16", "throughput": 8681.9, "total_tokens": 72698088} +{"current_steps": 107855, "total_steps": 204665, "loss": 0.1029, "lr": 1.0802791290370819e-06, "epoch": 2.634915593775194, "percentage": 52.7, "elapsed_time": "2:19:33", "remaining_time": "2:05:16", "throughput": 8681.98, "total_tokens": 72701864} +{"current_steps": 107860, "total_steps": 204665, "loss": 0.0001, "lr": 1.0801941263859011e-06, "epoch": 2.6350377446070405, "percentage": 52.7, "elapsed_time": "2:19:34", "remaining_time": "2:05:15", "throughput": 8682.02, "total_tokens": 72705256} +{"current_steps": 107865, "total_steps": 204665, "loss": 0.0008, "lr": 1.0801091231515272e-06, "epoch": 2.635159895438888, "percentage": 52.7, "elapsed_time": "2:19:34", "remaining_time": "2:05:15", "throughput": 8682.03, "total_tokens": 72708328} +{"current_steps": 107870, "total_steps": 204665, "loss": 0.0444, "lr": 1.0800241193345778e-06, "epoch": 2.635282046270735, "percentage": 52.71, "elapsed_time": "2:19:34", "remaining_time": "2:05:15", "throughput": 8682.1, "total_tokens": 72712040} +{"current_steps": 107875, "total_steps": 204665, "loss": 0.0004, "lr": 1.079939114935672e-06, "epoch": 2.635404197102582, "percentage": 52.71, "elapsed_time": "2:19:35", "remaining_time": "2:05:14", "throughput": 8682.19, "total_tokens": 72715944} +{"current_steps": 107880, "total_steps": 204665, "loss": 0.0002, "lr": 1.0798541099554272e-06, "epoch": 2.6355263479344293, "percentage": 52.71, "elapsed_time": "2:19:35", "remaining_time": "2:05:14", "throughput": 8682.21, "total_tokens": 72719080} +{"current_steps": 107885, "total_steps": 204665, "loss": 0.0332, "lr": 1.0797691043944622e-06, "epoch": 2.6356484987662765, "percentage": 52.71, "elapsed_time": "2:19:35", "remaining_time": "2:05:13", "throughput": 8682.22, "total_tokens": 72722088} +{"current_steps": 107890, "total_steps": 204665, "loss": 0.0018, "lr": 1.0796840982533943e-06, "epoch": 2.6357706495981237, "percentage": 52.72, "elapsed_time": "2:19:36", "remaining_time": "2:05:13", "throughput": 8682.25, "total_tokens": 72725352} +{"current_steps": 107895, "total_steps": 204665, "loss": 0.0002, "lr": 1.0795990915328426e-06, "epoch": 2.635892800429971, "percentage": 52.72, "elapsed_time": "2:19:36", "remaining_time": "2:05:12", "throughput": 8682.31, "total_tokens": 72728936} +{"current_steps": 107900, "total_steps": 204665, "loss": 0.0652, "lr": 1.0795140842334248e-06, "epoch": 2.636014951261818, "percentage": 52.72, "elapsed_time": "2:19:37", "remaining_time": "2:05:12", "throughput": 8682.36, "total_tokens": 72732392} +{"current_steps": 107905, "total_steps": 204665, "loss": 0.0001, "lr": 1.0794290763557591e-06, "epoch": 2.6361371020936653, "percentage": 52.72, "elapsed_time": "2:19:37", "remaining_time": "2:05:12", "throughput": 8682.39, "total_tokens": 72735656} +{"current_steps": 107910, "total_steps": 204665, "loss": 0.0377, "lr": 1.0793440679004638e-06, "epoch": 2.6362592529255124, "percentage": 52.73, "elapsed_time": "2:19:37", "remaining_time": "2:05:11", "throughput": 8682.41, "total_tokens": 72738856} +{"current_steps": 107915, "total_steps": 204665, "loss": 0.0366, "lr": 1.0792590588681572e-06, "epoch": 2.6363814037573596, "percentage": 52.73, "elapsed_time": "2:19:38", "remaining_time": "2:05:11", "throughput": 8682.43, "total_tokens": 72741992} +{"current_steps": 107920, "total_steps": 204665, "loss": 0.1173, "lr": 1.0791740492594574e-06, "epoch": 2.636503554589207, "percentage": 52.73, "elapsed_time": "2:19:38", "remaining_time": "2:05:10", "throughput": 8682.47, "total_tokens": 72745320} +{"current_steps": 107925, "total_steps": 204665, "loss": 0.0003, "lr": 1.0790890390749824e-06, "epoch": 2.636625705421054, "percentage": 52.73, "elapsed_time": "2:19:38", "remaining_time": "2:05:10", "throughput": 8682.52, "total_tokens": 72748840} +{"current_steps": 107930, "total_steps": 204665, "loss": 0.0001, "lr": 1.079004028315351e-06, "epoch": 2.636747856252901, "percentage": 52.73, "elapsed_time": "2:19:39", "remaining_time": "2:05:09", "throughput": 8682.55, "total_tokens": 72752040} +{"current_steps": 107935, "total_steps": 204665, "loss": 0.0002, "lr": 1.0789190169811806e-06, "epoch": 2.6368700070847484, "percentage": 52.74, "elapsed_time": "2:19:39", "remaining_time": "2:05:09", "throughput": 8682.6, "total_tokens": 72755624} +{"current_steps": 107940, "total_steps": 204665, "loss": 0.0001, "lr": 1.0788340050730902e-06, "epoch": 2.6369921579165956, "percentage": 52.74, "elapsed_time": "2:19:39", "remaining_time": "2:05:09", "throughput": 8682.64, "total_tokens": 72758952} +{"current_steps": 107945, "total_steps": 204665, "loss": 0.0457, "lr": 1.0787489925916976e-06, "epoch": 2.6371143087484423, "percentage": 52.74, "elapsed_time": "2:19:40", "remaining_time": "2:05:08", "throughput": 8682.68, "total_tokens": 72762280} +{"current_steps": 107950, "total_steps": 204665, "loss": 0.0002, "lr": 1.0786639795376214e-06, "epoch": 2.63723645958029, "percentage": 52.74, "elapsed_time": "2:19:40", "remaining_time": "2:05:08", "throughput": 8682.71, "total_tokens": 72765544} +{"current_steps": 107955, "total_steps": 204665, "loss": 0.0002, "lr": 1.0785789659114792e-06, "epoch": 2.6373586104121367, "percentage": 52.75, "elapsed_time": "2:19:40", "remaining_time": "2:05:07", "throughput": 8682.74, "total_tokens": 72768808} +{"current_steps": 107960, "total_steps": 204665, "loss": 0.0001, "lr": 1.07849395171389e-06, "epoch": 2.6374807612439843, "percentage": 52.75, "elapsed_time": "2:19:41", "remaining_time": "2:05:07", "throughput": 8682.8, "total_tokens": 72772328} +{"current_steps": 107965, "total_steps": 204665, "loss": 0.0002, "lr": 1.0784089369454714e-06, "epoch": 2.637602912075831, "percentage": 52.75, "elapsed_time": "2:19:41", "remaining_time": "2:05:07", "throughput": 8682.79, "total_tokens": 72775208} +{"current_steps": 107970, "total_steps": 204665, "loss": 0.0001, "lr": 1.0783239216068421e-06, "epoch": 2.6377250629076783, "percentage": 52.75, "elapsed_time": "2:19:41", "remaining_time": "2:05:06", "throughput": 8682.81, "total_tokens": 72778408} +{"current_steps": 107975, "total_steps": 204665, "loss": 0.021, "lr": 1.07823890569862e-06, "epoch": 2.6378472137395255, "percentage": 52.76, "elapsed_time": "2:19:42", "remaining_time": "2:05:06", "throughput": 8682.86, "total_tokens": 72781864} +{"current_steps": 107980, "total_steps": 204665, "loss": 0.0156, "lr": 1.0781538892214235e-06, "epoch": 2.6379693645713727, "percentage": 52.76, "elapsed_time": "2:19:42", "remaining_time": "2:05:05", "throughput": 8682.92, "total_tokens": 72785448} +{"current_steps": 107985, "total_steps": 204665, "loss": 0.1625, "lr": 1.078068872175871e-06, "epoch": 2.63809151540322, "percentage": 52.76, "elapsed_time": "2:19:42", "remaining_time": "2:05:05", "throughput": 8682.94, "total_tokens": 72788584} +{"current_steps": 107990, "total_steps": 204665, "loss": 0.0002, "lr": 1.0779838545625808e-06, "epoch": 2.638213666235067, "percentage": 52.76, "elapsed_time": "2:19:43", "remaining_time": "2:05:04", "throughput": 8682.94, "total_tokens": 72791528} +{"current_steps": 107995, "total_steps": 204665, "loss": 0.0865, "lr": 1.0778988363821706e-06, "epoch": 2.6383358170669142, "percentage": 52.77, "elapsed_time": "2:19:43", "remaining_time": "2:05:04", "throughput": 8683.05, "total_tokens": 72795688} +{"current_steps": 108000, "total_steps": 204665, "loss": 0.0782, "lr": 1.0778138176352596e-06, "epoch": 2.6384579678987614, "percentage": 52.77, "elapsed_time": "2:19:43", "remaining_time": "2:05:04", "throughput": 8683.07, "total_tokens": 72798824} +{"current_steps": 108005, "total_steps": 204665, "loss": 0.0001, "lr": 1.0777287983224652e-06, "epoch": 2.6385801187306086, "percentage": 52.77, "elapsed_time": "2:19:44", "remaining_time": "2:05:03", "throughput": 8683.09, "total_tokens": 72801960} +{"current_steps": 108010, "total_steps": 204665, "loss": 0.0001, "lr": 1.0776437784444065e-06, "epoch": 2.638702269562456, "percentage": 52.77, "elapsed_time": "2:19:44", "remaining_time": "2:05:03", "throughput": 8683.09, "total_tokens": 72804968} +{"current_steps": 108015, "total_steps": 204665, "loss": 0.1758, "lr": 1.0775587580017012e-06, "epoch": 2.638824420394303, "percentage": 52.78, "elapsed_time": "2:19:45", "remaining_time": "2:05:02", "throughput": 8683.16, "total_tokens": 72808552} +{"current_steps": 108020, "total_steps": 204665, "loss": 0.0349, "lr": 1.0774737369949678e-06, "epoch": 2.63894657122615, "percentage": 52.78, "elapsed_time": "2:19:45", "remaining_time": "2:05:02", "throughput": 8683.17, "total_tokens": 72811624} +{"current_steps": 108025, "total_steps": 204665, "loss": 0.0004, "lr": 1.0773887154248242e-06, "epoch": 2.6390687220579974, "percentage": 52.78, "elapsed_time": "2:19:45", "remaining_time": "2:05:01", "throughput": 8683.27, "total_tokens": 72815720} +{"current_steps": 108030, "total_steps": 204665, "loss": 0.03, "lr": 1.0773036932918892e-06, "epoch": 2.639190872889844, "percentage": 52.78, "elapsed_time": "2:19:46", "remaining_time": "2:05:01", "throughput": 8683.31, "total_tokens": 72819112} +{"current_steps": 108035, "total_steps": 204665, "loss": 0.0002, "lr": 1.077218670596781e-06, "epoch": 2.6393130237216917, "percentage": 52.79, "elapsed_time": "2:19:46", "remaining_time": "2:05:01", "throughput": 8683.41, "total_tokens": 72823016} +{"current_steps": 108040, "total_steps": 204665, "loss": 0.0001, "lr": 1.077133647340118e-06, "epoch": 2.6394351745535385, "percentage": 52.79, "elapsed_time": "2:19:46", "remaining_time": "2:05:00", "throughput": 8683.46, "total_tokens": 72826472} +{"current_steps": 108045, "total_steps": 204665, "loss": 0.1396, "lr": 1.0770486235225182e-06, "epoch": 2.639557325385386, "percentage": 52.79, "elapsed_time": "2:19:47", "remaining_time": "2:05:00", "throughput": 8683.5, "total_tokens": 72829864} +{"current_steps": 108050, "total_steps": 204665, "loss": 0.0001, "lr": 1.0769635991446002e-06, "epoch": 2.639679476217233, "percentage": 52.79, "elapsed_time": "2:19:47", "remaining_time": "2:04:59", "throughput": 8683.53, "total_tokens": 72833192} +{"current_steps": 108055, "total_steps": 204665, "loss": 0.0002, "lr": 1.076878574206982e-06, "epoch": 2.63980162704908, "percentage": 52.8, "elapsed_time": "2:19:47", "remaining_time": "2:04:59", "throughput": 8683.56, "total_tokens": 72836392} +{"current_steps": 108060, "total_steps": 204665, "loss": 0.0351, "lr": 1.0767935487102823e-06, "epoch": 2.6399237778809272, "percentage": 52.8, "elapsed_time": "2:19:48", "remaining_time": "2:04:59", "throughput": 8683.64, "total_tokens": 72840168} +{"current_steps": 108065, "total_steps": 204665, "loss": 0.0398, "lr": 1.0767085226551194e-06, "epoch": 2.6400459287127744, "percentage": 52.8, "elapsed_time": "2:19:48", "remaining_time": "2:04:58", "throughput": 8683.68, "total_tokens": 72843496} +{"current_steps": 108070, "total_steps": 204665, "loss": 0.0001, "lr": 1.0766234960421112e-06, "epoch": 2.6401680795446216, "percentage": 52.8, "elapsed_time": "2:19:48", "remaining_time": "2:04:58", "throughput": 8683.74, "total_tokens": 72847144} +{"current_steps": 108075, "total_steps": 204665, "loss": 0.0001, "lr": 1.0765384688718766e-06, "epoch": 2.640290230376469, "percentage": 52.81, "elapsed_time": "2:19:49", "remaining_time": "2:04:57", "throughput": 8683.76, "total_tokens": 72850280} +{"current_steps": 108080, "total_steps": 204665, "loss": 0.0539, "lr": 1.0764534411450334e-06, "epoch": 2.640412381208316, "percentage": 52.81, "elapsed_time": "2:19:49", "remaining_time": "2:04:57", "throughput": 8683.81, "total_tokens": 72853736} +{"current_steps": 108085, "total_steps": 204665, "loss": 0.0002, "lr": 1.0763684128622003e-06, "epoch": 2.640534532040163, "percentage": 52.81, "elapsed_time": "2:19:49", "remaining_time": "2:04:56", "throughput": 8683.85, "total_tokens": 72857192} +{"current_steps": 108090, "total_steps": 204665, "loss": 0.0377, "lr": 1.0762833840239956e-06, "epoch": 2.6406566828720104, "percentage": 52.81, "elapsed_time": "2:19:50", "remaining_time": "2:04:56", "throughput": 8683.96, "total_tokens": 72861288} +{"current_steps": 108095, "total_steps": 204665, "loss": 0.0013, "lr": 1.0761983546310376e-06, "epoch": 2.6407788337038576, "percentage": 52.82, "elapsed_time": "2:19:50", "remaining_time": "2:04:56", "throughput": 8684.04, "total_tokens": 72865000} +{"current_steps": 108100, "total_steps": 204665, "loss": 0.0492, "lr": 1.0761133246839446e-06, "epoch": 2.6409009845357048, "percentage": 52.82, "elapsed_time": "2:19:51", "remaining_time": "2:04:55", "throughput": 8684.04, "total_tokens": 72868008} +{"current_steps": 108105, "total_steps": 204665, "loss": 0.0002, "lr": 1.076028294183335e-06, "epoch": 2.641023135367552, "percentage": 52.82, "elapsed_time": "2:19:51", "remaining_time": "2:04:55", "throughput": 8684.05, "total_tokens": 72871016} +{"current_steps": 108110, "total_steps": 204665, "loss": 0.1439, "lr": 1.0759432631298276e-06, "epoch": 2.641145286199399, "percentage": 52.82, "elapsed_time": "2:19:51", "remaining_time": "2:04:54", "throughput": 8684.08, "total_tokens": 72874344} +{"current_steps": 108115, "total_steps": 204665, "loss": 0.0002, "lr": 1.0758582315240402e-06, "epoch": 2.6412674370312463, "percentage": 52.83, "elapsed_time": "2:19:52", "remaining_time": "2:04:54", "throughput": 8684.12, "total_tokens": 72877672} +{"current_steps": 108120, "total_steps": 204665, "loss": 0.0001, "lr": 1.075773199366591e-06, "epoch": 2.6413895878630935, "percentage": 52.83, "elapsed_time": "2:19:52", "remaining_time": "2:04:53", "throughput": 8684.13, "total_tokens": 72880744} +{"current_steps": 108125, "total_steps": 204665, "loss": 0.0002, "lr": 1.075688166658099e-06, "epoch": 2.6415117386949403, "percentage": 52.83, "elapsed_time": "2:19:52", "remaining_time": "2:04:53", "throughput": 8684.16, "total_tokens": 72884072} +{"current_steps": 108130, "total_steps": 204665, "loss": 0.0001, "lr": 1.0756031333991822e-06, "epoch": 2.641633889526788, "percentage": 52.83, "elapsed_time": "2:19:53", "remaining_time": "2:04:53", "throughput": 8684.22, "total_tokens": 72887656} +{"current_steps": 108135, "total_steps": 204665, "loss": 0.001, "lr": 1.075518099590459e-06, "epoch": 2.6417560403586346, "percentage": 52.84, "elapsed_time": "2:19:53", "remaining_time": "2:04:52", "throughput": 8684.27, "total_tokens": 72891176} +{"current_steps": 108140, "total_steps": 204665, "loss": 0.0001, "lr": 1.0754330652325481e-06, "epoch": 2.6418781911904823, "percentage": 52.84, "elapsed_time": "2:19:53", "remaining_time": "2:04:52", "throughput": 8684.33, "total_tokens": 72894696} +{"current_steps": 108145, "total_steps": 204665, "loss": 0.0003, "lr": 1.0753480303260675e-06, "epoch": 2.642000342022329, "percentage": 52.84, "elapsed_time": "2:19:54", "remaining_time": "2:04:51", "throughput": 8684.34, "total_tokens": 72897832} +{"current_steps": 108150, "total_steps": 204665, "loss": 0.1065, "lr": 1.0752629948716356e-06, "epoch": 2.642122492854176, "percentage": 52.84, "elapsed_time": "2:19:54", "remaining_time": "2:04:51", "throughput": 8684.39, "total_tokens": 72901224} +{"current_steps": 108155, "total_steps": 204665, "loss": 0.0002, "lr": 1.075177958869871e-06, "epoch": 2.6422446436860234, "percentage": 52.84, "elapsed_time": "2:19:54", "remaining_time": "2:04:50", "throughput": 8684.45, "total_tokens": 72904872} +{"current_steps": 108160, "total_steps": 204665, "loss": 0.0002, "lr": 1.0750929223213923e-06, "epoch": 2.6423667945178706, "percentage": 52.85, "elapsed_time": "2:19:55", "remaining_time": "2:04:50", "throughput": 8684.5, "total_tokens": 72908328} +{"current_steps": 108165, "total_steps": 204665, "loss": 0.0373, "lr": 1.0750078852268178e-06, "epoch": 2.642488945349718, "percentage": 52.85, "elapsed_time": "2:19:55", "remaining_time": "2:04:50", "throughput": 8684.53, "total_tokens": 72911592} +{"current_steps": 108170, "total_steps": 204665, "loss": 0.0021, "lr": 1.0749228475867656e-06, "epoch": 2.642611096181565, "percentage": 52.85, "elapsed_time": "2:19:55", "remaining_time": "2:04:49", "throughput": 8684.57, "total_tokens": 72914984} +{"current_steps": 108175, "total_steps": 204665, "loss": 0.0402, "lr": 1.0748378094018541e-06, "epoch": 2.642733247013412, "percentage": 52.85, "elapsed_time": "2:19:56", "remaining_time": "2:04:49", "throughput": 8684.59, "total_tokens": 72918120} +{"current_steps": 108180, "total_steps": 204665, "loss": 0.0542, "lr": 1.0747527706727022e-06, "epoch": 2.6428553978452594, "percentage": 52.86, "elapsed_time": "2:19:56", "remaining_time": "2:04:48", "throughput": 8684.67, "total_tokens": 72921896} +{"current_steps": 108185, "total_steps": 204665, "loss": 0.0001, "lr": 1.0746677313999277e-06, "epoch": 2.6429775486771065, "percentage": 52.86, "elapsed_time": "2:19:56", "remaining_time": "2:04:48", "throughput": 8684.8, "total_tokens": 72926248} +{"current_steps": 108190, "total_steps": 204665, "loss": 0.0366, "lr": 1.0745826915841495e-06, "epoch": 2.6430996995089537, "percentage": 52.86, "elapsed_time": "2:19:57", "remaining_time": "2:04:48", "throughput": 8684.83, "total_tokens": 72929512} +{"current_steps": 108195, "total_steps": 204665, "loss": 0.0567, "lr": 1.0744976512259862e-06, "epoch": 2.643221850340801, "percentage": 52.86, "elapsed_time": "2:19:57", "remaining_time": "2:04:47", "throughput": 8684.88, "total_tokens": 72932968} +{"current_steps": 108200, "total_steps": 204665, "loss": 0.0003, "lr": 1.0744126103260558e-06, "epoch": 2.643344001172648, "percentage": 52.87, "elapsed_time": "2:19:58", "remaining_time": "2:04:47", "throughput": 8684.99, "total_tokens": 72937000} +{"current_steps": 108205, "total_steps": 204665, "loss": 0.0941, "lr": 1.0743275688849767e-06, "epoch": 2.6434661520044953, "percentage": 52.87, "elapsed_time": "2:19:58", "remaining_time": "2:04:46", "throughput": 8685.0, "total_tokens": 72940136} +{"current_steps": 108210, "total_steps": 204665, "loss": 0.0424, "lr": 1.0742425269033678e-06, "epoch": 2.643588302836342, "percentage": 52.87, "elapsed_time": "2:19:58", "remaining_time": "2:04:46", "throughput": 8685.1, "total_tokens": 72944040} +{"current_steps": 108215, "total_steps": 204665, "loss": 0.1378, "lr": 1.074157484381847e-06, "epoch": 2.6437104536681897, "percentage": 52.87, "elapsed_time": "2:19:59", "remaining_time": "2:04:45", "throughput": 8685.12, "total_tokens": 72947240} +{"current_steps": 108220, "total_steps": 204665, "loss": 0.0001, "lr": 1.0740724413210332e-06, "epoch": 2.6438326045000364, "percentage": 52.88, "elapsed_time": "2:19:59", "remaining_time": "2:04:45", "throughput": 8685.17, "total_tokens": 72950760} +{"current_steps": 108225, "total_steps": 204665, "loss": 0.0452, "lr": 1.0739873977215447e-06, "epoch": 2.643954755331884, "percentage": 52.88, "elapsed_time": "2:19:59", "remaining_time": "2:04:45", "throughput": 8685.2, "total_tokens": 72954024} +{"current_steps": 108230, "total_steps": 204665, "loss": 0.0432, "lr": 1.073902353584e-06, "epoch": 2.644076906163731, "percentage": 52.88, "elapsed_time": "2:20:00", "remaining_time": "2:04:44", "throughput": 8685.28, "total_tokens": 72957736} +{"current_steps": 108235, "total_steps": 204665, "loss": 0.1475, "lr": 1.0738173089090172e-06, "epoch": 2.644199056995578, "percentage": 52.88, "elapsed_time": "2:20:00", "remaining_time": "2:04:44", "throughput": 8685.33, "total_tokens": 72961256} +{"current_steps": 108240, "total_steps": 204665, "loss": 0.021, "lr": 1.0737322636972155e-06, "epoch": 2.644321207827425, "percentage": 52.89, "elapsed_time": "2:20:00", "remaining_time": "2:04:43", "throughput": 8685.36, "total_tokens": 72964456} +{"current_steps": 108245, "total_steps": 204665, "loss": 0.0001, "lr": 1.0736472179492125e-06, "epoch": 2.6444433586592724, "percentage": 52.89, "elapsed_time": "2:20:01", "remaining_time": "2:04:43", "throughput": 8685.42, "total_tokens": 72968040} +{"current_steps": 108250, "total_steps": 204665, "loss": 0.0004, "lr": 1.0735621716656274e-06, "epoch": 2.6445655094911196, "percentage": 52.89, "elapsed_time": "2:20:01", "remaining_time": "2:04:43", "throughput": 8685.46, "total_tokens": 72971432} +{"current_steps": 108255, "total_steps": 204665, "loss": 0.0004, "lr": 1.0734771248470783e-06, "epoch": 2.6446876603229668, "percentage": 52.89, "elapsed_time": "2:20:01", "remaining_time": "2:04:42", "throughput": 8685.48, "total_tokens": 72974632} +{"current_steps": 108260, "total_steps": 204665, "loss": 0.1142, "lr": 1.0733920774941837e-06, "epoch": 2.644809811154814, "percentage": 52.9, "elapsed_time": "2:20:02", "remaining_time": "2:04:42", "throughput": 8685.5, "total_tokens": 72977704} +{"current_steps": 108265, "total_steps": 204665, "loss": 0.0015, "lr": 1.0733070296075623e-06, "epoch": 2.644931961986661, "percentage": 52.9, "elapsed_time": "2:20:02", "remaining_time": "2:04:41", "throughput": 8685.57, "total_tokens": 72981288} +{"current_steps": 108270, "total_steps": 204665, "loss": 0.0004, "lr": 1.0732219811878327e-06, "epoch": 2.6450541128185083, "percentage": 52.9, "elapsed_time": "2:20:02", "remaining_time": "2:04:41", "throughput": 8685.62, "total_tokens": 72984744} +{"current_steps": 108275, "total_steps": 204665, "loss": 0.0593, "lr": 1.0731369322356127e-06, "epoch": 2.6451762636503555, "percentage": 52.9, "elapsed_time": "2:20:03", "remaining_time": "2:04:40", "throughput": 8685.69, "total_tokens": 72988456} +{"current_steps": 108280, "total_steps": 204665, "loss": 0.0002, "lr": 1.0730518827515216e-06, "epoch": 2.6452984144822027, "percentage": 52.91, "elapsed_time": "2:20:03", "remaining_time": "2:04:40", "throughput": 8685.71, "total_tokens": 72991656} +{"current_steps": 108285, "total_steps": 204665, "loss": 0.082, "lr": 1.0729668327361774e-06, "epoch": 2.64542056531405, "percentage": 52.91, "elapsed_time": "2:20:03", "remaining_time": "2:04:40", "throughput": 8685.78, "total_tokens": 72995304} +{"current_steps": 108290, "total_steps": 204665, "loss": 0.0756, "lr": 1.0728817821901988e-06, "epoch": 2.645542716145897, "percentage": 52.91, "elapsed_time": "2:20:04", "remaining_time": "2:04:39", "throughput": 8685.79, "total_tokens": 72998312} +{"current_steps": 108295, "total_steps": 204665, "loss": 0.0001, "lr": 1.0727967311142044e-06, "epoch": 2.6456648669777443, "percentage": 52.91, "elapsed_time": "2:20:04", "remaining_time": "2:04:39", "throughput": 8685.81, "total_tokens": 73001384} +{"current_steps": 108300, "total_steps": 204665, "loss": 0.0173, "lr": 1.0727116795088125e-06, "epoch": 2.6457870178095915, "percentage": 52.92, "elapsed_time": "2:20:05", "remaining_time": "2:04:38", "throughput": 8685.81, "total_tokens": 73004392} +{"current_steps": 108305, "total_steps": 204665, "loss": 0.0006, "lr": 1.0726266273746414e-06, "epoch": 2.645909168641438, "percentage": 52.92, "elapsed_time": "2:20:05", "remaining_time": "2:04:38", "throughput": 8685.84, "total_tokens": 73007656} +{"current_steps": 108310, "total_steps": 204665, "loss": 0.0002, "lr": 1.0725415747123102e-06, "epoch": 2.646031319473286, "percentage": 52.92, "elapsed_time": "2:20:05", "remaining_time": "2:04:37", "throughput": 8685.93, "total_tokens": 73011496} +{"current_steps": 108315, "total_steps": 204665, "loss": 0.0925, "lr": 1.0724565215224373e-06, "epoch": 2.6461534703051326, "percentage": 52.92, "elapsed_time": "2:20:06", "remaining_time": "2:04:37", "throughput": 8685.97, "total_tokens": 73014824} +{"current_steps": 108320, "total_steps": 204665, "loss": 0.0002, "lr": 1.072371467805641e-06, "epoch": 2.64627562113698, "percentage": 52.93, "elapsed_time": "2:20:06", "remaining_time": "2:04:37", "throughput": 8685.99, "total_tokens": 73017960} +{"current_steps": 108325, "total_steps": 204665, "loss": 0.0005, "lr": 1.07228641356254e-06, "epoch": 2.646397771968827, "percentage": 52.93, "elapsed_time": "2:20:06", "remaining_time": "2:04:36", "throughput": 8686.03, "total_tokens": 73021288} +{"current_steps": 108330, "total_steps": 204665, "loss": 0.0516, "lr": 1.0722013587937526e-06, "epoch": 2.646519922800674, "percentage": 52.93, "elapsed_time": "2:20:07", "remaining_time": "2:04:36", "throughput": 8686.05, "total_tokens": 73024424} +{"current_steps": 108335, "total_steps": 204665, "loss": 0.0482, "lr": 1.0721163034998975e-06, "epoch": 2.6466420736325214, "percentage": 52.93, "elapsed_time": "2:20:07", "remaining_time": "2:04:35", "throughput": 8686.06, "total_tokens": 73027496} +{"current_steps": 108340, "total_steps": 204665, "loss": 0.0002, "lr": 1.0720312476815932e-06, "epoch": 2.6467642244643685, "percentage": 52.94, "elapsed_time": "2:20:07", "remaining_time": "2:04:35", "throughput": 8686.14, "total_tokens": 73031272} +{"current_steps": 108345, "total_steps": 204665, "loss": 0.0004, "lr": 1.0719461913394582e-06, "epoch": 2.6468863752962157, "percentage": 52.94, "elapsed_time": "2:20:08", "remaining_time": "2:04:34", "throughput": 8686.15, "total_tokens": 73034280} +{"current_steps": 108350, "total_steps": 204665, "loss": 0.0822, "lr": 1.0718611344741116e-06, "epoch": 2.647008526128063, "percentage": 52.94, "elapsed_time": "2:20:08", "remaining_time": "2:04:34", "throughput": 8686.15, "total_tokens": 73037224} +{"current_steps": 108355, "total_steps": 204665, "loss": 0.0934, "lr": 1.071776077086171e-06, "epoch": 2.64713067695991, "percentage": 52.94, "elapsed_time": "2:20:08", "remaining_time": "2:04:34", "throughput": 8686.21, "total_tokens": 73040808} +{"current_steps": 108360, "total_steps": 204665, "loss": 0.0623, "lr": 1.0716910191762559e-06, "epoch": 2.6472528277917573, "percentage": 52.95, "elapsed_time": "2:20:09", "remaining_time": "2:04:33", "throughput": 8686.24, "total_tokens": 73044072} +{"current_steps": 108365, "total_steps": 204665, "loss": 0.0005, "lr": 1.0716059607449842e-06, "epoch": 2.6473749786236045, "percentage": 52.95, "elapsed_time": "2:20:09", "remaining_time": "2:04:33", "throughput": 8686.25, "total_tokens": 73047080} +{"current_steps": 108370, "total_steps": 204665, "loss": 0.0398, "lr": 1.0715209017929744e-06, "epoch": 2.6474971294554517, "percentage": 52.95, "elapsed_time": "2:20:09", "remaining_time": "2:04:32", "throughput": 8686.26, "total_tokens": 73050152} +{"current_steps": 108375, "total_steps": 204665, "loss": 0.0184, "lr": 1.0714358423208457e-06, "epoch": 2.647619280287299, "percentage": 52.95, "elapsed_time": "2:20:10", "remaining_time": "2:04:32", "throughput": 8686.32, "total_tokens": 73053672} +{"current_steps": 108380, "total_steps": 204665, "loss": 0.0849, "lr": 1.0713507823292165e-06, "epoch": 2.647741431119146, "percentage": 52.95, "elapsed_time": "2:20:10", "remaining_time": "2:04:31", "throughput": 8686.35, "total_tokens": 73056936} +{"current_steps": 108385, "total_steps": 204665, "loss": 0.0005, "lr": 1.071265721818705e-06, "epoch": 2.6478635819509933, "percentage": 52.96, "elapsed_time": "2:20:10", "remaining_time": "2:04:31", "throughput": 8686.38, "total_tokens": 73060136} +{"current_steps": 108390, "total_steps": 204665, "loss": 0.1114, "lr": 1.0711806607899302e-06, "epoch": 2.64798573278284, "percentage": 52.96, "elapsed_time": "2:20:11", "remaining_time": "2:04:31", "throughput": 8686.44, "total_tokens": 73063784} +{"current_steps": 108395, "total_steps": 204665, "loss": 0.0003, "lr": 1.0710955992435101e-06, "epoch": 2.6481078836146876, "percentage": 52.96, "elapsed_time": "2:20:11", "remaining_time": "2:04:30", "throughput": 8686.5, "total_tokens": 73067304} +{"current_steps": 108400, "total_steps": 204665, "loss": 0.0002, "lr": 1.0710105371800637e-06, "epoch": 2.6482300344465344, "percentage": 52.96, "elapsed_time": "2:20:11", "remaining_time": "2:04:30", "throughput": 8686.56, "total_tokens": 73070888} +{"current_steps": 108405, "total_steps": 204665, "loss": 0.0008, "lr": 1.07092547460021e-06, "epoch": 2.648352185278382, "percentage": 52.97, "elapsed_time": "2:20:12", "remaining_time": "2:04:29", "throughput": 8686.59, "total_tokens": 73074088} +{"current_steps": 108410, "total_steps": 204665, "loss": 0.1232, "lr": 1.0708404115045669e-06, "epoch": 2.6484743361102288, "percentage": 52.97, "elapsed_time": "2:20:12", "remaining_time": "2:04:29", "throughput": 8686.65, "total_tokens": 73077608} +{"current_steps": 108415, "total_steps": 204665, "loss": 0.026, "lr": 1.0707553478937533e-06, "epoch": 2.648596486942076, "percentage": 52.97, "elapsed_time": "2:20:12", "remaining_time": "2:04:28", "throughput": 8686.69, "total_tokens": 73081000} +{"current_steps": 108420, "total_steps": 204665, "loss": 0.0003, "lr": 1.070670283768388e-06, "epoch": 2.648718637773923, "percentage": 52.97, "elapsed_time": "2:20:13", "remaining_time": "2:04:28", "throughput": 8686.71, "total_tokens": 73084136} +{"current_steps": 108425, "total_steps": 204665, "loss": 0.0421, "lr": 1.0705852191290891e-06, "epoch": 2.6488407886057703, "percentage": 52.98, "elapsed_time": "2:20:13", "remaining_time": "2:04:28", "throughput": 8686.75, "total_tokens": 73087528} +{"current_steps": 108430, "total_steps": 204665, "loss": 0.0002, "lr": 1.0705001539764754e-06, "epoch": 2.6489629394376175, "percentage": 52.98, "elapsed_time": "2:20:14", "remaining_time": "2:04:27", "throughput": 8686.81, "total_tokens": 73091048} +{"current_steps": 108435, "total_steps": 204665, "loss": 0.0005, "lr": 1.0704150883111659e-06, "epoch": 2.6490850902694647, "percentage": 52.98, "elapsed_time": "2:20:14", "remaining_time": "2:04:27", "throughput": 8686.85, "total_tokens": 73094440} +{"current_steps": 108440, "total_steps": 204665, "loss": 0.0002, "lr": 1.0703300221337787e-06, "epoch": 2.649207241101312, "percentage": 52.98, "elapsed_time": "2:20:14", "remaining_time": "2:04:26", "throughput": 8686.96, "total_tokens": 73098472} +{"current_steps": 108445, "total_steps": 204665, "loss": 0.0863, "lr": 1.070244955444933e-06, "epoch": 2.649329391933159, "percentage": 52.99, "elapsed_time": "2:20:15", "remaining_time": "2:04:26", "throughput": 8687.01, "total_tokens": 73101928} +{"current_steps": 108450, "total_steps": 204665, "loss": 0.0002, "lr": 1.0701598882452469e-06, "epoch": 2.6494515427650063, "percentage": 52.99, "elapsed_time": "2:20:15", "remaining_time": "2:04:26", "throughput": 8687.09, "total_tokens": 73105768} +{"current_steps": 108455, "total_steps": 204665, "loss": 0.1254, "lr": 1.070074820535339e-06, "epoch": 2.6495736935968535, "percentage": 52.99, "elapsed_time": "2:20:15", "remaining_time": "2:04:25", "throughput": 8687.17, "total_tokens": 73109544} +{"current_steps": 108460, "total_steps": 204665, "loss": 0.0293, "lr": 1.0699897523158283e-06, "epoch": 2.6496958444287007, "percentage": 52.99, "elapsed_time": "2:20:16", "remaining_time": "2:04:25", "throughput": 8687.23, "total_tokens": 73113064} +{"current_steps": 108465, "total_steps": 204665, "loss": 0.0029, "lr": 1.0699046835873336e-06, "epoch": 2.649817995260548, "percentage": 53.0, "elapsed_time": "2:20:16", "remaining_time": "2:04:24", "throughput": 8687.25, "total_tokens": 73116200} +{"current_steps": 108470, "total_steps": 204665, "loss": 0.0515, "lr": 1.069819614350473e-06, "epoch": 2.649940146092395, "percentage": 53.0, "elapsed_time": "2:20:16", "remaining_time": "2:04:24", "throughput": 8687.3, "total_tokens": 73119592} +{"current_steps": 108475, "total_steps": 204665, "loss": 0.0455, "lr": 1.0697345446058654e-06, "epoch": 2.650062296924242, "percentage": 53.0, "elapsed_time": "2:20:17", "remaining_time": "2:04:23", "throughput": 8687.34, "total_tokens": 73122984} +{"current_steps": 108480, "total_steps": 204665, "loss": 0.0456, "lr": 1.0696494743541296e-06, "epoch": 2.6501844477560894, "percentage": 53.0, "elapsed_time": "2:20:17", "remaining_time": "2:04:23", "throughput": 8687.37, "total_tokens": 73126248} +{"current_steps": 108485, "total_steps": 204665, "loss": 0.0004, "lr": 1.069564403595884e-06, "epoch": 2.650306598587936, "percentage": 53.01, "elapsed_time": "2:20:17", "remaining_time": "2:04:23", "throughput": 8687.4, "total_tokens": 73129448} +{"current_steps": 108490, "total_steps": 204665, "loss": 0.0008, "lr": 1.0694793323317473e-06, "epoch": 2.650428749419784, "percentage": 53.01, "elapsed_time": "2:20:18", "remaining_time": "2:04:22", "throughput": 8687.44, "total_tokens": 73132840} +{"current_steps": 108495, "total_steps": 204665, "loss": 0.0907, "lr": 1.0693942605623378e-06, "epoch": 2.6505509002516305, "percentage": 53.01, "elapsed_time": "2:20:18", "remaining_time": "2:04:22", "throughput": 8687.47, "total_tokens": 73136040} +{"current_steps": 108500, "total_steps": 204665, "loss": 0.1461, "lr": 1.069309188288275e-06, "epoch": 2.6506730510834777, "percentage": 53.01, "elapsed_time": "2:20:18", "remaining_time": "2:04:21", "throughput": 8687.52, "total_tokens": 73139496} +{"current_steps": 108505, "total_steps": 204665, "loss": 0.0003, "lr": 1.069224115510177e-06, "epoch": 2.650795201915325, "percentage": 53.02, "elapsed_time": "2:20:19", "remaining_time": "2:04:21", "throughput": 8687.58, "total_tokens": 73143080} +{"current_steps": 108510, "total_steps": 204665, "loss": 0.0458, "lr": 1.0691390422286627e-06, "epoch": 2.650917352747172, "percentage": 53.02, "elapsed_time": "2:20:19", "remaining_time": "2:04:20", "throughput": 8687.68, "total_tokens": 73147112} +{"current_steps": 108515, "total_steps": 204665, "loss": 0.0001, "lr": 1.0690539684443507e-06, "epoch": 2.6510395035790193, "percentage": 53.02, "elapsed_time": "2:20:19", "remaining_time": "2:04:20", "throughput": 8687.73, "total_tokens": 73150568} +{"current_steps": 108520, "total_steps": 204665, "loss": 0.0005, "lr": 1.06896889415786e-06, "epoch": 2.6511616544108665, "percentage": 53.02, "elapsed_time": "2:20:20", "remaining_time": "2:04:20", "throughput": 8687.75, "total_tokens": 73153640} +{"current_steps": 108525, "total_steps": 204665, "loss": 0.0003, "lr": 1.0688838193698083e-06, "epoch": 2.6512838052427137, "percentage": 53.03, "elapsed_time": "2:20:20", "remaining_time": "2:04:19", "throughput": 8687.8, "total_tokens": 73157096} +{"current_steps": 108530, "total_steps": 204665, "loss": 0.0606, "lr": 1.0687987440808153e-06, "epoch": 2.651405956074561, "percentage": 53.03, "elapsed_time": "2:20:21", "remaining_time": "2:04:19", "throughput": 8687.9, "total_tokens": 73161128} +{"current_steps": 108535, "total_steps": 204665, "loss": 0.0434, "lr": 1.0687136682914993e-06, "epoch": 2.651528106906408, "percentage": 53.03, "elapsed_time": "2:20:21", "remaining_time": "2:04:18", "throughput": 8687.93, "total_tokens": 73164328} +{"current_steps": 108540, "total_steps": 204665, "loss": 0.0347, "lr": 1.068628592002479e-06, "epoch": 2.6516502577382552, "percentage": 53.03, "elapsed_time": "2:20:21", "remaining_time": "2:04:18", "throughput": 8687.93, "total_tokens": 73167272} +{"current_steps": 108545, "total_steps": 204665, "loss": 0.0002, "lr": 1.068543515214373e-06, "epoch": 2.6517724085701024, "percentage": 53.04, "elapsed_time": "2:20:22", "remaining_time": "2:04:17", "throughput": 8687.95, "total_tokens": 73170344} +{"current_steps": 108550, "total_steps": 204665, "loss": 0.0003, "lr": 1.0684584379278004e-06, "epoch": 2.6518945594019496, "percentage": 53.04, "elapsed_time": "2:20:22", "remaining_time": "2:04:17", "throughput": 8687.94, "total_tokens": 73173224} +{"current_steps": 108555, "total_steps": 204665, "loss": 0.0238, "lr": 1.0683733601433793e-06, "epoch": 2.652016710233797, "percentage": 53.04, "elapsed_time": "2:20:22", "remaining_time": "2:04:17", "throughput": 8687.96, "total_tokens": 73176296} +{"current_steps": 108560, "total_steps": 204665, "loss": 0.0469, "lr": 1.068288281861729e-06, "epoch": 2.652138861065644, "percentage": 53.04, "elapsed_time": "2:20:23", "remaining_time": "2:04:16", "throughput": 8687.96, "total_tokens": 73179176} +{"current_steps": 108565, "total_steps": 204665, "loss": 0.0003, "lr": 1.0682032030834678e-06, "epoch": 2.652261011897491, "percentage": 53.05, "elapsed_time": "2:20:23", "remaining_time": "2:04:16", "throughput": 8687.98, "total_tokens": 73182376} +{"current_steps": 108570, "total_steps": 204665, "loss": 0.0007, "lr": 1.0681181238092146e-06, "epoch": 2.652383162729338, "percentage": 53.05, "elapsed_time": "2:20:23", "remaining_time": "2:04:15", "throughput": 8688.01, "total_tokens": 73185640} +{"current_steps": 108575, "total_steps": 204665, "loss": 0.0396, "lr": 1.0680330440395882e-06, "epoch": 2.6525053135611856, "percentage": 53.05, "elapsed_time": "2:20:24", "remaining_time": "2:04:15", "throughput": 8688.01, "total_tokens": 73188584} +{"current_steps": 108580, "total_steps": 204665, "loss": 0.0931, "lr": 1.0679479637752069e-06, "epoch": 2.6526274643930323, "percentage": 53.05, "elapsed_time": "2:20:24", "remaining_time": "2:04:14", "throughput": 8688.05, "total_tokens": 73191912} +{"current_steps": 108585, "total_steps": 204665, "loss": 0.0326, "lr": 1.06786288301669e-06, "epoch": 2.65274961522488, "percentage": 53.05, "elapsed_time": "2:20:24", "remaining_time": "2:04:14", "throughput": 8688.1, "total_tokens": 73195304} +{"current_steps": 108590, "total_steps": 204665, "loss": 0.0002, "lr": 1.0677778017646558e-06, "epoch": 2.6528717660567267, "percentage": 53.06, "elapsed_time": "2:20:25", "remaining_time": "2:04:14", "throughput": 8688.13, "total_tokens": 73198568} +{"current_steps": 108595, "total_steps": 204665, "loss": 0.0001, "lr": 1.0676927200197234e-06, "epoch": 2.652993916888574, "percentage": 53.06, "elapsed_time": "2:20:25", "remaining_time": "2:04:13", "throughput": 8688.14, "total_tokens": 73201640} +{"current_steps": 108600, "total_steps": 204665, "loss": 0.0535, "lr": 1.067607637782511e-06, "epoch": 2.653116067720421, "percentage": 53.06, "elapsed_time": "2:20:25", "remaining_time": "2:04:13", "throughput": 8688.16, "total_tokens": 73204712} +{"current_steps": 108605, "total_steps": 204665, "loss": 0.0002, "lr": 1.0675225550536383e-06, "epoch": 2.6532382185522683, "percentage": 53.06, "elapsed_time": "2:20:26", "remaining_time": "2:04:12", "throughput": 8688.25, "total_tokens": 73208616} +{"current_steps": 108610, "total_steps": 204665, "loss": 0.147, "lr": 1.067437471833723e-06, "epoch": 2.6533603693841155, "percentage": 53.07, "elapsed_time": "2:20:26", "remaining_time": "2:04:12", "throughput": 8688.28, "total_tokens": 73211944} +{"current_steps": 108615, "total_steps": 204665, "loss": 0.0615, "lr": 1.0673523881233841e-06, "epoch": 2.6534825202159626, "percentage": 53.07, "elapsed_time": "2:20:26", "remaining_time": "2:04:12", "throughput": 8688.36, "total_tokens": 73215592} +{"current_steps": 108620, "total_steps": 204665, "loss": 0.1221, "lr": 1.0672673039232405e-06, "epoch": 2.65360467104781, "percentage": 53.07, "elapsed_time": "2:20:27", "remaining_time": "2:04:11", "throughput": 8688.38, "total_tokens": 73218792} +{"current_steps": 108625, "total_steps": 204665, "loss": 0.0805, "lr": 1.0671822192339112e-06, "epoch": 2.653726821879657, "percentage": 53.07, "elapsed_time": "2:20:27", "remaining_time": "2:04:11", "throughput": 8688.46, "total_tokens": 73222568} +{"current_steps": 108630, "total_steps": 204665, "loss": 0.0002, "lr": 1.0670971340560148e-06, "epoch": 2.653848972711504, "percentage": 53.08, "elapsed_time": "2:20:27", "remaining_time": "2:04:10", "throughput": 8688.47, "total_tokens": 73225640} +{"current_steps": 108635, "total_steps": 204665, "loss": 0.0003, "lr": 1.06701204839017e-06, "epoch": 2.6539711235433514, "percentage": 53.08, "elapsed_time": "2:20:28", "remaining_time": "2:04:10", "throughput": 8688.52, "total_tokens": 73229032} +{"current_steps": 108640, "total_steps": 204665, "loss": 0.0764, "lr": 1.0669269622369957e-06, "epoch": 2.6540932743751986, "percentage": 53.08, "elapsed_time": "2:20:28", "remaining_time": "2:04:09", "throughput": 8688.56, "total_tokens": 73232360} +{"current_steps": 108645, "total_steps": 204665, "loss": 0.0002, "lr": 1.06684187559711e-06, "epoch": 2.654215425207046, "percentage": 53.08, "elapsed_time": "2:20:28", "remaining_time": "2:04:09", "throughput": 8688.59, "total_tokens": 73235688} +{"current_steps": 108650, "total_steps": 204665, "loss": 0.0005, "lr": 1.0667567884711323e-06, "epoch": 2.654337576038893, "percentage": 53.09, "elapsed_time": "2:20:29", "remaining_time": "2:04:09", "throughput": 8688.62, "total_tokens": 73238888} +{"current_steps": 108655, "total_steps": 204665, "loss": 0.0003, "lr": 1.0666717008596814e-06, "epoch": 2.6544597268707397, "percentage": 53.09, "elapsed_time": "2:20:29", "remaining_time": "2:04:08", "throughput": 8688.65, "total_tokens": 73242216} +{"current_steps": 108660, "total_steps": 204665, "loss": 0.0155, "lr": 1.0665866127633762e-06, "epoch": 2.6545818777025874, "percentage": 53.09, "elapsed_time": "2:20:29", "remaining_time": "2:04:08", "throughput": 8688.69, "total_tokens": 73245608} +{"current_steps": 108665, "total_steps": 204665, "loss": 0.1046, "lr": 1.066501524182835e-06, "epoch": 2.654704028534434, "percentage": 53.09, "elapsed_time": "2:20:30", "remaining_time": "2:04:07", "throughput": 8688.71, "total_tokens": 73248680} +{"current_steps": 108670, "total_steps": 204665, "loss": 0.0481, "lr": 1.066416435118677e-06, "epoch": 2.6548261793662817, "percentage": 53.1, "elapsed_time": "2:20:30", "remaining_time": "2:04:07", "throughput": 8688.72, "total_tokens": 73251688} +{"current_steps": 108675, "total_steps": 204665, "loss": 0.0006, "lr": 1.0663313455715208e-06, "epoch": 2.6549483301981285, "percentage": 53.1, "elapsed_time": "2:20:31", "remaining_time": "2:04:06", "throughput": 8688.73, "total_tokens": 73254824} +{"current_steps": 108680, "total_steps": 204665, "loss": 0.0009, "lr": 1.066246255541985e-06, "epoch": 2.6550704810299757, "percentage": 53.1, "elapsed_time": "2:20:31", "remaining_time": "2:04:06", "throughput": 8688.77, "total_tokens": 73258088} +{"current_steps": 108685, "total_steps": 204665, "loss": 0.0344, "lr": 1.0661611650306887e-06, "epoch": 2.655192631861823, "percentage": 53.1, "elapsed_time": "2:20:31", "remaining_time": "2:04:06", "throughput": 8688.84, "total_tokens": 73261800} +{"current_steps": 108690, "total_steps": 204665, "loss": 0.0007, "lr": 1.0660760740382508e-06, "epoch": 2.65531478269367, "percentage": 53.11, "elapsed_time": "2:20:32", "remaining_time": "2:04:05", "throughput": 8688.89, "total_tokens": 73265256} +{"current_steps": 108695, "total_steps": 204665, "loss": 0.0007, "lr": 1.0659909825652898e-06, "epoch": 2.6554369335255172, "percentage": 53.11, "elapsed_time": "2:20:32", "remaining_time": "2:04:05", "throughput": 8688.92, "total_tokens": 73268520} +{"current_steps": 108700, "total_steps": 204665, "loss": 0.1883, "lr": 1.0659058906124245e-06, "epoch": 2.6555590843573644, "percentage": 53.11, "elapsed_time": "2:20:32", "remaining_time": "2:04:04", "throughput": 8688.97, "total_tokens": 73272040} +{"current_steps": 108705, "total_steps": 204665, "loss": 0.0034, "lr": 1.0658207981802741e-06, "epoch": 2.6556812351892116, "percentage": 53.11, "elapsed_time": "2:20:33", "remaining_time": "2:04:04", "throughput": 8688.99, "total_tokens": 73275176} +{"current_steps": 108710, "total_steps": 204665, "loss": 0.0002, "lr": 1.0657357052694567e-06, "epoch": 2.655803386021059, "percentage": 53.12, "elapsed_time": "2:20:33", "remaining_time": "2:04:03", "throughput": 8689.03, "total_tokens": 73278504} +{"current_steps": 108715, "total_steps": 204665, "loss": 0.1303, "lr": 1.0656506118805919e-06, "epoch": 2.655925536852906, "percentage": 53.12, "elapsed_time": "2:20:33", "remaining_time": "2:04:03", "throughput": 8689.11, "total_tokens": 73282280} +{"current_steps": 108720, "total_steps": 204665, "loss": 0.0003, "lr": 1.0655655180142982e-06, "epoch": 2.656047687684753, "percentage": 53.12, "elapsed_time": "2:20:34", "remaining_time": "2:04:03", "throughput": 8689.13, "total_tokens": 73285480} +{"current_steps": 108725, "total_steps": 204665, "loss": 0.0318, "lr": 1.0654804236711942e-06, "epoch": 2.6561698385166004, "percentage": 53.12, "elapsed_time": "2:20:34", "remaining_time": "2:04:02", "throughput": 8689.17, "total_tokens": 73288872} +{"current_steps": 108730, "total_steps": 204665, "loss": 0.0001, "lr": 1.0653953288518994e-06, "epoch": 2.6562919893484476, "percentage": 53.13, "elapsed_time": "2:20:34", "remaining_time": "2:04:02", "throughput": 8689.19, "total_tokens": 73292008} +{"current_steps": 108735, "total_steps": 204665, "loss": 0.0007, "lr": 1.0653102335570317e-06, "epoch": 2.6564141401802948, "percentage": 53.13, "elapsed_time": "2:20:35", "remaining_time": "2:04:01", "throughput": 8689.24, "total_tokens": 73295464} +{"current_steps": 108740, "total_steps": 204665, "loss": 0.0001, "lr": 1.0652251377872108e-06, "epoch": 2.656536291012142, "percentage": 53.13, "elapsed_time": "2:20:35", "remaining_time": "2:04:01", "throughput": 8689.27, "total_tokens": 73298728} +{"current_steps": 108745, "total_steps": 204665, "loss": 0.0319, "lr": 1.065140041543055e-06, "epoch": 2.656658441843989, "percentage": 53.13, "elapsed_time": "2:20:35", "remaining_time": "2:04:00", "throughput": 8689.33, "total_tokens": 73302248} +{"current_steps": 108750, "total_steps": 204665, "loss": 0.0538, "lr": 1.0650549448251831e-06, "epoch": 2.656780592675836, "percentage": 53.14, "elapsed_time": "2:20:36", "remaining_time": "2:04:00", "throughput": 8689.38, "total_tokens": 73305768} +{"current_steps": 108755, "total_steps": 204665, "loss": 0.0003, "lr": 1.0649698476342144e-06, "epoch": 2.6569027435076835, "percentage": 53.14, "elapsed_time": "2:20:36", "remaining_time": "2:04:00", "throughput": 8689.45, "total_tokens": 73309480} +{"current_steps": 108760, "total_steps": 204665, "loss": 0.0003, "lr": 1.0648847499707673e-06, "epoch": 2.6570248943395303, "percentage": 53.14, "elapsed_time": "2:20:36", "remaining_time": "2:03:59", "throughput": 8689.48, "total_tokens": 73312680} +{"current_steps": 108765, "total_steps": 204665, "loss": 0.0005, "lr": 1.0647996518354608e-06, "epoch": 2.6571470451713775, "percentage": 53.14, "elapsed_time": "2:20:37", "remaining_time": "2:03:59", "throughput": 8689.53, "total_tokens": 73316136} +{"current_steps": 108770, "total_steps": 204665, "loss": 0.0002, "lr": 1.0647145532289142e-06, "epoch": 2.6572691960032246, "percentage": 53.15, "elapsed_time": "2:20:37", "remaining_time": "2:03:58", "throughput": 8689.52, "total_tokens": 73318952} +{"current_steps": 108775, "total_steps": 204665, "loss": 0.0639, "lr": 1.0646294541517456e-06, "epoch": 2.657391346835072, "percentage": 53.15, "elapsed_time": "2:20:37", "remaining_time": "2:03:58", "throughput": 8689.52, "total_tokens": 73321960} +{"current_steps": 108780, "total_steps": 204665, "loss": 0.0747, "lr": 1.0645443546045743e-06, "epoch": 2.657513497666919, "percentage": 53.15, "elapsed_time": "2:20:38", "remaining_time": "2:03:58", "throughput": 8689.53, "total_tokens": 73325032} +{"current_steps": 108785, "total_steps": 204665, "loss": 0.0002, "lr": 1.0644592545880193e-06, "epoch": 2.657635648498766, "percentage": 53.15, "elapsed_time": "2:20:38", "remaining_time": "2:03:57", "throughput": 8689.6, "total_tokens": 73328616} +{"current_steps": 108790, "total_steps": 204665, "loss": 0.0795, "lr": 1.0643741541026991e-06, "epoch": 2.6577577993306134, "percentage": 53.16, "elapsed_time": "2:20:39", "remaining_time": "2:03:57", "throughput": 8689.62, "total_tokens": 73331752} +{"current_steps": 108795, "total_steps": 204665, "loss": 0.0526, "lr": 1.0642890531492327e-06, "epoch": 2.6578799501624606, "percentage": 53.16, "elapsed_time": "2:20:39", "remaining_time": "2:03:56", "throughput": 8689.67, "total_tokens": 73335272} +{"current_steps": 108800, "total_steps": 204665, "loss": 0.0645, "lr": 1.064203951728239e-06, "epoch": 2.658002100994308, "percentage": 53.16, "elapsed_time": "2:20:39", "remaining_time": "2:03:56", "throughput": 8689.7, "total_tokens": 73338536} +{"current_steps": 108805, "total_steps": 204665, "loss": 0.1111, "lr": 1.064118849840337e-06, "epoch": 2.658124251826155, "percentage": 53.16, "elapsed_time": "2:20:40", "remaining_time": "2:03:55", "throughput": 8689.75, "total_tokens": 73341992} +{"current_steps": 108810, "total_steps": 204665, "loss": 0.0614, "lr": 1.0640337474861453e-06, "epoch": 2.658246402658002, "percentage": 53.16, "elapsed_time": "2:20:40", "remaining_time": "2:03:55", "throughput": 8689.79, "total_tokens": 73345256} +{"current_steps": 108815, "total_steps": 204665, "loss": 0.049, "lr": 1.063948644666283e-06, "epoch": 2.6583685534898494, "percentage": 53.17, "elapsed_time": "2:20:40", "remaining_time": "2:03:55", "throughput": 8689.82, "total_tokens": 73348520} +{"current_steps": 108820, "total_steps": 204665, "loss": 0.0002, "lr": 1.063863541381369e-06, "epoch": 2.6584907043216965, "percentage": 53.17, "elapsed_time": "2:20:41", "remaining_time": "2:03:54", "throughput": 8689.81, "total_tokens": 73351400} +{"current_steps": 108825, "total_steps": 204665, "loss": 0.0517, "lr": 1.0637784376320222e-06, "epoch": 2.6586128551535437, "percentage": 53.17, "elapsed_time": "2:20:41", "remaining_time": "2:03:54", "throughput": 8689.89, "total_tokens": 73355176} +{"current_steps": 108830, "total_steps": 204665, "loss": 0.0001, "lr": 1.0636933334188615e-06, "epoch": 2.658735005985391, "percentage": 53.17, "elapsed_time": "2:20:41", "remaining_time": "2:03:53", "throughput": 8689.94, "total_tokens": 73358568} +{"current_steps": 108835, "total_steps": 204665, "loss": 0.0933, "lr": 1.0636082287425058e-06, "epoch": 2.6588571568172377, "percentage": 53.18, "elapsed_time": "2:20:42", "remaining_time": "2:03:53", "throughput": 8689.99, "total_tokens": 73362088} +{"current_steps": 108840, "total_steps": 204665, "loss": 0.0006, "lr": 1.0635231236035739e-06, "epoch": 2.6589793076490853, "percentage": 53.18, "elapsed_time": "2:20:42", "remaining_time": "2:03:52", "throughput": 8690.01, "total_tokens": 73365224} +{"current_steps": 108845, "total_steps": 204665, "loss": 0.0004, "lr": 1.0634380180026846e-06, "epoch": 2.659101458480932, "percentage": 53.18, "elapsed_time": "2:20:42", "remaining_time": "2:03:52", "throughput": 8690.02, "total_tokens": 73368232} +{"current_steps": 108850, "total_steps": 204665, "loss": 0.0001, "lr": 1.063352911940457e-06, "epoch": 2.6592236093127797, "percentage": 53.18, "elapsed_time": "2:20:43", "remaining_time": "2:03:52", "throughput": 8690.04, "total_tokens": 73371432} +{"current_steps": 108855, "total_steps": 204665, "loss": 0.0944, "lr": 1.0632678054175102e-06, "epoch": 2.6593457601446264, "percentage": 53.19, "elapsed_time": "2:20:43", "remaining_time": "2:03:51", "throughput": 8690.1, "total_tokens": 73374952} +{"current_steps": 108860, "total_steps": 204665, "loss": 0.0001, "lr": 1.0631826984344624e-06, "epoch": 2.6594679109764736, "percentage": 53.19, "elapsed_time": "2:20:43", "remaining_time": "2:03:51", "throughput": 8690.16, "total_tokens": 73378600} +{"current_steps": 108865, "total_steps": 204665, "loss": 0.1409, "lr": 1.0630975909919334e-06, "epoch": 2.659590061808321, "percentage": 53.19, "elapsed_time": "2:20:44", "remaining_time": "2:03:50", "throughput": 8690.22, "total_tokens": 73382184} +{"current_steps": 108870, "total_steps": 204665, "loss": 0.0337, "lr": 1.0630124830905418e-06, "epoch": 2.659712212640168, "percentage": 53.19, "elapsed_time": "2:20:44", "remaining_time": "2:03:50", "throughput": 8690.25, "total_tokens": 73385384} +{"current_steps": 108875, "total_steps": 204665, "loss": 0.0398, "lr": 1.0629273747309064e-06, "epoch": 2.659834363472015, "percentage": 53.2, "elapsed_time": "2:20:44", "remaining_time": "2:03:49", "throughput": 8690.32, "total_tokens": 73389096} +{"current_steps": 108880, "total_steps": 204665, "loss": 0.053, "lr": 1.0628422659136463e-06, "epoch": 2.6599565143038624, "percentage": 53.2, "elapsed_time": "2:20:45", "remaining_time": "2:03:49", "throughput": 8690.33, "total_tokens": 73392104} +{"current_steps": 108885, "total_steps": 204665, "loss": 0.0375, "lr": 1.06275715663938e-06, "epoch": 2.6600786651357096, "percentage": 53.2, "elapsed_time": "2:20:45", "remaining_time": "2:03:49", "throughput": 8690.34, "total_tokens": 73395176} +{"current_steps": 108890, "total_steps": 204665, "loss": 0.0002, "lr": 1.062672046908727e-06, "epoch": 2.6602008159675568, "percentage": 53.2, "elapsed_time": "2:20:45", "remaining_time": "2:03:48", "throughput": 8690.4, "total_tokens": 73398696} +{"current_steps": 108895, "total_steps": 204665, "loss": 0.0645, "lr": 1.0625869367223063e-06, "epoch": 2.660322966799404, "percentage": 53.21, "elapsed_time": "2:20:46", "remaining_time": "2:03:48", "throughput": 8690.43, "total_tokens": 73401960} +{"current_steps": 108900, "total_steps": 204665, "loss": 0.0004, "lr": 1.062501826080736e-06, "epoch": 2.660445117631251, "percentage": 53.21, "elapsed_time": "2:20:46", "remaining_time": "2:03:47", "throughput": 8690.44, "total_tokens": 73404968} +{"current_steps": 108905, "total_steps": 204665, "loss": 0.035, "lr": 1.062416714984636e-06, "epoch": 2.6605672684630983, "percentage": 53.21, "elapsed_time": "2:20:46", "remaining_time": "2:03:47", "throughput": 8690.46, "total_tokens": 73408168} +{"current_steps": 108910, "total_steps": 204665, "loss": 0.0526, "lr": 1.0623316034346248e-06, "epoch": 2.6606894192949455, "percentage": 53.21, "elapsed_time": "2:20:47", "remaining_time": "2:03:46", "throughput": 8690.51, "total_tokens": 73411560} +{"current_steps": 108915, "total_steps": 204665, "loss": 0.1244, "lr": 1.0622464914313214e-06, "epoch": 2.6608115701267927, "percentage": 53.22, "elapsed_time": "2:20:47", "remaining_time": "2:03:46", "throughput": 8690.55, "total_tokens": 73414888} +{"current_steps": 108920, "total_steps": 204665, "loss": 0.0005, "lr": 1.0621613789753447e-06, "epoch": 2.66093372095864, "percentage": 53.22, "elapsed_time": "2:20:48", "remaining_time": "2:03:46", "throughput": 8690.59, "total_tokens": 73418280} +{"current_steps": 108925, "total_steps": 204665, "loss": 0.065, "lr": 1.062076266067314e-06, "epoch": 2.661055871790487, "percentage": 53.22, "elapsed_time": "2:20:48", "remaining_time": "2:03:45", "throughput": 8690.67, "total_tokens": 73422056} +{"current_steps": 108930, "total_steps": 204665, "loss": 0.0006, "lr": 1.0619911527078475e-06, "epoch": 2.661178022622334, "percentage": 53.22, "elapsed_time": "2:20:48", "remaining_time": "2:03:45", "throughput": 8690.77, "total_tokens": 73426088} +{"current_steps": 108935, "total_steps": 204665, "loss": 0.0002, "lr": 1.0619060388975649e-06, "epoch": 2.6613001734541815, "percentage": 53.23, "elapsed_time": "2:20:49", "remaining_time": "2:03:44", "throughput": 8690.85, "total_tokens": 73429864} +{"current_steps": 108940, "total_steps": 204665, "loss": 0.0009, "lr": 1.0618209246370853e-06, "epoch": 2.661422324286028, "percentage": 53.23, "elapsed_time": "2:20:49", "remaining_time": "2:03:44", "throughput": 8690.9, "total_tokens": 73433256} +{"current_steps": 108945, "total_steps": 204665, "loss": 0.0418, "lr": 1.061735809927027e-06, "epoch": 2.6615444751178754, "percentage": 53.23, "elapsed_time": "2:20:49", "remaining_time": "2:03:44", "throughput": 8690.92, "total_tokens": 73436392} +{"current_steps": 108950, "total_steps": 204665, "loss": 0.0004, "lr": 1.0616506947680092e-06, "epoch": 2.6616666259497226, "percentage": 53.23, "elapsed_time": "2:20:50", "remaining_time": "2:03:43", "throughput": 8690.95, "total_tokens": 73439656} +{"current_steps": 108955, "total_steps": 204665, "loss": 0.0398, "lr": 1.0615655791606511e-06, "epoch": 2.6617887767815698, "percentage": 53.24, "elapsed_time": "2:20:50", "remaining_time": "2:03:43", "throughput": 8690.97, "total_tokens": 73442792} +{"current_steps": 108960, "total_steps": 204665, "loss": 0.028, "lr": 1.0614804631055713e-06, "epoch": 2.661910927613417, "percentage": 53.24, "elapsed_time": "2:20:50", "remaining_time": "2:03:42", "throughput": 8691.01, "total_tokens": 73446184} +{"current_steps": 108965, "total_steps": 204665, "loss": 0.0002, "lr": 1.0613953466033892e-06, "epoch": 2.662033078445264, "percentage": 53.24, "elapsed_time": "2:20:51", "remaining_time": "2:03:42", "throughput": 8691.07, "total_tokens": 73449704} +{"current_steps": 108970, "total_steps": 204665, "loss": 0.0732, "lr": 1.0613102296547237e-06, "epoch": 2.6621552292771113, "percentage": 53.24, "elapsed_time": "2:20:51", "remaining_time": "2:03:41", "throughput": 8691.11, "total_tokens": 73452968} +{"current_steps": 108975, "total_steps": 204665, "loss": 0.0004, "lr": 1.0612251122601936e-06, "epoch": 2.6622773801089585, "percentage": 53.25, "elapsed_time": "2:20:51", "remaining_time": "2:03:41", "throughput": 8691.16, "total_tokens": 73456424} +{"current_steps": 108980, "total_steps": 204665, "loss": 0.0002, "lr": 1.0611399944204181e-06, "epoch": 2.6623995309408057, "percentage": 53.25, "elapsed_time": "2:20:52", "remaining_time": "2:03:41", "throughput": 8691.2, "total_tokens": 73459816} +{"current_steps": 108985, "total_steps": 204665, "loss": 0.0805, "lr": 1.061054876136016e-06, "epoch": 2.662521681772653, "percentage": 53.25, "elapsed_time": "2:20:52", "remaining_time": "2:03:40", "throughput": 8691.24, "total_tokens": 73463208} +{"current_steps": 108990, "total_steps": 204665, "loss": 0.0002, "lr": 1.0609697574076066e-06, "epoch": 2.6626438326045, "percentage": 53.25, "elapsed_time": "2:20:52", "remaining_time": "2:03:40", "throughput": 8691.29, "total_tokens": 73466600} +{"current_steps": 108995, "total_steps": 204665, "loss": 0.0896, "lr": 1.0608846382358087e-06, "epoch": 2.6627659834363473, "percentage": 53.26, "elapsed_time": "2:20:53", "remaining_time": "2:03:39", "throughput": 8691.28, "total_tokens": 73469480} +{"current_steps": 109000, "total_steps": 204665, "loss": 0.0629, "lr": 1.0607995186212413e-06, "epoch": 2.6628881342681945, "percentage": 53.26, "elapsed_time": "2:20:53", "remaining_time": "2:03:39", "throughput": 8691.32, "total_tokens": 73472872} +{"current_steps": 109005, "total_steps": 204665, "loss": 0.0002, "lr": 1.0607143985645235e-06, "epoch": 2.6630102851000417, "percentage": 53.26, "elapsed_time": "2:20:53", "remaining_time": "2:03:38", "throughput": 8691.35, "total_tokens": 73476072} +{"current_steps": 109010, "total_steps": 204665, "loss": 0.0007, "lr": 1.0606292780662742e-06, "epoch": 2.663132435931889, "percentage": 53.26, "elapsed_time": "2:20:54", "remaining_time": "2:03:38", "throughput": 8691.37, "total_tokens": 73479208} +{"current_steps": 109015, "total_steps": 204665, "loss": 0.0978, "lr": 1.0605441571271126e-06, "epoch": 2.6632545867637356, "percentage": 53.27, "elapsed_time": "2:20:54", "remaining_time": "2:03:38", "throughput": 8691.41, "total_tokens": 73482600} +{"current_steps": 109020, "total_steps": 204665, "loss": 0.1106, "lr": 1.0604590357476571e-06, "epoch": 2.6633767375955832, "percentage": 53.27, "elapsed_time": "2:20:54", "remaining_time": "2:03:37", "throughput": 8691.48, "total_tokens": 73486248} +{"current_steps": 109025, "total_steps": 204665, "loss": 0.0001, "lr": 1.0603739139285276e-06, "epoch": 2.66349888842743, "percentage": 53.27, "elapsed_time": "2:20:55", "remaining_time": "2:03:37", "throughput": 8691.54, "total_tokens": 73489832} +{"current_steps": 109030, "total_steps": 204665, "loss": 0.0776, "lr": 1.0602887916703429e-06, "epoch": 2.6636210392592776, "percentage": 53.27, "elapsed_time": "2:20:55", "remaining_time": "2:03:36", "throughput": 8691.57, "total_tokens": 73493096} +{"current_steps": 109035, "total_steps": 204665, "loss": 0.0434, "lr": 1.0602036689737218e-06, "epoch": 2.6637431900911244, "percentage": 53.27, "elapsed_time": "2:20:56", "remaining_time": "2:03:36", "throughput": 8691.57, "total_tokens": 73495976} +{"current_steps": 109040, "total_steps": 204665, "loss": 0.0723, "lr": 1.0601185458392833e-06, "epoch": 2.6638653409229716, "percentage": 53.28, "elapsed_time": "2:20:56", "remaining_time": "2:03:35", "throughput": 8691.59, "total_tokens": 73499112} +{"current_steps": 109045, "total_steps": 204665, "loss": 0.0641, "lr": 1.0600334222676469e-06, "epoch": 2.6639874917548187, "percentage": 53.28, "elapsed_time": "2:20:56", "remaining_time": "2:03:35", "throughput": 8691.6, "total_tokens": 73502248} +{"current_steps": 109050, "total_steps": 204665, "loss": 0.0585, "lr": 1.059948298259431e-06, "epoch": 2.664109642586666, "percentage": 53.28, "elapsed_time": "2:20:57", "remaining_time": "2:03:35", "throughput": 8691.66, "total_tokens": 73505768} +{"current_steps": 109055, "total_steps": 204665, "loss": 0.0025, "lr": 1.0598631738152547e-06, "epoch": 2.664231793418513, "percentage": 53.28, "elapsed_time": "2:20:57", "remaining_time": "2:03:34", "throughput": 8691.71, "total_tokens": 73509224} +{"current_steps": 109060, "total_steps": 204665, "loss": 0.0002, "lr": 1.0597780489357378e-06, "epoch": 2.6643539442503603, "percentage": 53.29, "elapsed_time": "2:20:57", "remaining_time": "2:03:34", "throughput": 8691.71, "total_tokens": 73512232} +{"current_steps": 109065, "total_steps": 204665, "loss": 0.0629, "lr": 1.0596929236214986e-06, "epoch": 2.6644760950822075, "percentage": 53.29, "elapsed_time": "2:20:58", "remaining_time": "2:03:33", "throughput": 8691.74, "total_tokens": 73515432} +{"current_steps": 109070, "total_steps": 204665, "loss": 0.0002, "lr": 1.0596077978731565e-06, "epoch": 2.6645982459140547, "percentage": 53.29, "elapsed_time": "2:20:58", "remaining_time": "2:03:33", "throughput": 8691.76, "total_tokens": 73518632} +{"current_steps": 109075, "total_steps": 204665, "loss": 0.0379, "lr": 1.0595226716913307e-06, "epoch": 2.664720396745902, "percentage": 53.29, "elapsed_time": "2:20:58", "remaining_time": "2:03:33", "throughput": 8691.78, "total_tokens": 73521768} +{"current_steps": 109080, "total_steps": 204665, "loss": 0.0423, "lr": 1.0594375450766394e-06, "epoch": 2.664842547577749, "percentage": 53.3, "elapsed_time": "2:20:59", "remaining_time": "2:03:32", "throughput": 8691.84, "total_tokens": 73525288} +{"current_steps": 109085, "total_steps": 204665, "loss": 0.0643, "lr": 1.0593524180297026e-06, "epoch": 2.6649646984095963, "percentage": 53.3, "elapsed_time": "2:20:59", "remaining_time": "2:03:32", "throughput": 8691.89, "total_tokens": 73528744} +{"current_steps": 109090, "total_steps": 204665, "loss": 0.0247, "lr": 1.059267290551139e-06, "epoch": 2.6650868492414435, "percentage": 53.3, "elapsed_time": "2:20:59", "remaining_time": "2:03:31", "throughput": 8691.95, "total_tokens": 73532264} +{"current_steps": 109095, "total_steps": 204665, "loss": 0.1019, "lr": 1.059182162641568e-06, "epoch": 2.6652090000732906, "percentage": 53.3, "elapsed_time": "2:21:00", "remaining_time": "2:03:31", "throughput": 8691.98, "total_tokens": 73535528} +{"current_steps": 109100, "total_steps": 204665, "loss": 0.0003, "lr": 1.0590970343016083e-06, "epoch": 2.6653311509051374, "percentage": 53.31, "elapsed_time": "2:21:00", "remaining_time": "2:03:30", "throughput": 8692.03, "total_tokens": 73539048} +{"current_steps": 109105, "total_steps": 204665, "loss": 0.0003, "lr": 1.059011905531879e-06, "epoch": 2.665453301736985, "percentage": 53.31, "elapsed_time": "2:21:00", "remaining_time": "2:03:30", "throughput": 8692.1, "total_tokens": 73542632} +{"current_steps": 109110, "total_steps": 204665, "loss": 0.0005, "lr": 1.0589267763329992e-06, "epoch": 2.6655754525688318, "percentage": 53.31, "elapsed_time": "2:21:01", "remaining_time": "2:03:30", "throughput": 8692.17, "total_tokens": 73546280} +{"current_steps": 109115, "total_steps": 204665, "loss": 0.0432, "lr": 1.0588416467055878e-06, "epoch": 2.6656976034006794, "percentage": 53.31, "elapsed_time": "2:21:01", "remaining_time": "2:03:29", "throughput": 8692.2, "total_tokens": 73549608} +{"current_steps": 109120, "total_steps": 204665, "loss": 0.0004, "lr": 1.0587565166502646e-06, "epoch": 2.665819754232526, "percentage": 53.32, "elapsed_time": "2:21:01", "remaining_time": "2:03:29", "throughput": 8692.24, "total_tokens": 73552872} +{"current_steps": 109125, "total_steps": 204665, "loss": 0.0005, "lr": 1.058671386167648e-06, "epoch": 2.6659419050643733, "percentage": 53.32, "elapsed_time": "2:21:02", "remaining_time": "2:03:28", "throughput": 8692.31, "total_tokens": 73556584} +{"current_steps": 109130, "total_steps": 204665, "loss": 0.0417, "lr": 1.058586255258357e-06, "epoch": 2.6660640558962205, "percentage": 53.32, "elapsed_time": "2:21:02", "remaining_time": "2:03:28", "throughput": 8692.38, "total_tokens": 73560296} +{"current_steps": 109135, "total_steps": 204665, "loss": 0.0329, "lr": 1.0585011239230114e-06, "epoch": 2.6661862067280677, "percentage": 53.32, "elapsed_time": "2:21:02", "remaining_time": "2:03:27", "throughput": 8692.43, "total_tokens": 73563688} +{"current_steps": 109140, "total_steps": 204665, "loss": 0.001, "lr": 1.0584159921622298e-06, "epoch": 2.666308357559915, "percentage": 53.33, "elapsed_time": "2:21:03", "remaining_time": "2:03:27", "throughput": 8692.46, "total_tokens": 73566888} +{"current_steps": 109145, "total_steps": 204665, "loss": 0.0001, "lr": 1.0583308599766314e-06, "epoch": 2.666430508391762, "percentage": 53.33, "elapsed_time": "2:21:03", "remaining_time": "2:03:27", "throughput": 8692.46, "total_tokens": 73569896} +{"current_steps": 109150, "total_steps": 204665, "loss": 0.0778, "lr": 1.0582457273668352e-06, "epoch": 2.6665526592236093, "percentage": 53.33, "elapsed_time": "2:21:04", "remaining_time": "2:03:26", "throughput": 8692.55, "total_tokens": 73573864} +{"current_steps": 109155, "total_steps": 204665, "loss": 0.0621, "lr": 1.0581605943334606e-06, "epoch": 2.6666748100554565, "percentage": 53.33, "elapsed_time": "2:21:04", "remaining_time": "2:03:26", "throughput": 8692.64, "total_tokens": 73577640} +{"current_steps": 109160, "total_steps": 204665, "loss": 0.0427, "lr": 1.0580754608771263e-06, "epoch": 2.6667969608873037, "percentage": 53.34, "elapsed_time": "2:21:04", "remaining_time": "2:03:25", "throughput": 8692.69, "total_tokens": 73581224} +{"current_steps": 109165, "total_steps": 204665, "loss": 0.0369, "lr": 1.0579903269984519e-06, "epoch": 2.666919111719151, "percentage": 53.34, "elapsed_time": "2:21:05", "remaining_time": "2:03:25", "throughput": 8692.72, "total_tokens": 73584488} +{"current_steps": 109170, "total_steps": 204665, "loss": 0.0395, "lr": 1.0579051926980558e-06, "epoch": 2.667041262550998, "percentage": 53.34, "elapsed_time": "2:21:05", "remaining_time": "2:03:25", "throughput": 8692.81, "total_tokens": 73588392} +{"current_steps": 109175, "total_steps": 204665, "loss": 0.0004, "lr": 1.0578200579765577e-06, "epoch": 2.6671634133828452, "percentage": 53.34, "elapsed_time": "2:21:05", "remaining_time": "2:03:24", "throughput": 8692.85, "total_tokens": 73591720} +{"current_steps": 109180, "total_steps": 204665, "loss": 0.0005, "lr": 1.0577349228345766e-06, "epoch": 2.6672855642146924, "percentage": 53.35, "elapsed_time": "2:21:06", "remaining_time": "2:03:24", "throughput": 8692.86, "total_tokens": 73594792} +{"current_steps": 109185, "total_steps": 204665, "loss": 0.0002, "lr": 1.0576497872727318e-06, "epoch": 2.6674077150465396, "percentage": 53.35, "elapsed_time": "2:21:06", "remaining_time": "2:03:23", "throughput": 8692.9, "total_tokens": 73598120} +{"current_steps": 109190, "total_steps": 204665, "loss": 0.0004, "lr": 1.0575646512916422e-06, "epoch": 2.667529865878387, "percentage": 53.35, "elapsed_time": "2:21:06", "remaining_time": "2:03:23", "throughput": 8692.95, "total_tokens": 73601576} +{"current_steps": 109195, "total_steps": 204665, "loss": 0.0617, "lr": 1.0574795148919268e-06, "epoch": 2.6676520167102336, "percentage": 53.35, "elapsed_time": "2:21:07", "remaining_time": "2:03:22", "throughput": 8693.02, "total_tokens": 73605288} +{"current_steps": 109200, "total_steps": 204665, "loss": 0.0706, "lr": 1.0573943780742051e-06, "epoch": 2.667774167542081, "percentage": 53.36, "elapsed_time": "2:21:07", "remaining_time": "2:03:22", "throughput": 8693.04, "total_tokens": 73608424} +{"current_steps": 109205, "total_steps": 204665, "loss": 0.0431, "lr": 1.0573092408390963e-06, "epoch": 2.667896318373928, "percentage": 53.36, "elapsed_time": "2:21:07", "remaining_time": "2:03:22", "throughput": 8693.08, "total_tokens": 73611752} +{"current_steps": 109210, "total_steps": 204665, "loss": 0.0003, "lr": 1.0572241031872187e-06, "epoch": 2.6680184692057756, "percentage": 53.36, "elapsed_time": "2:21:08", "remaining_time": "2:03:21", "throughput": 8693.08, "total_tokens": 73614696} +{"current_steps": 109215, "total_steps": 204665, "loss": 0.0003, "lr": 1.0571389651191924e-06, "epoch": 2.6681406200376223, "percentage": 53.36, "elapsed_time": "2:21:08", "remaining_time": "2:03:21", "throughput": 8693.14, "total_tokens": 73618216} +{"current_steps": 109220, "total_steps": 204665, "loss": 0.0004, "lr": 1.057053826635636e-06, "epoch": 2.6682627708694695, "percentage": 53.37, "elapsed_time": "2:21:08", "remaining_time": "2:03:20", "throughput": 8693.15, "total_tokens": 73621288} +{"current_steps": 109225, "total_steps": 204665, "loss": 0.0963, "lr": 1.0569686877371688e-06, "epoch": 2.6683849217013167, "percentage": 53.37, "elapsed_time": "2:21:09", "remaining_time": "2:03:20", "throughput": 8693.25, "total_tokens": 73625320} +{"current_steps": 109230, "total_steps": 204665, "loss": 0.0002, "lr": 1.0568835484244103e-06, "epoch": 2.668507072533164, "percentage": 53.37, "elapsed_time": "2:21:09", "remaining_time": "2:03:19", "throughput": 8693.32, "total_tokens": 73628904} +{"current_steps": 109235, "total_steps": 204665, "loss": 0.0003, "lr": 1.056798408697979e-06, "epoch": 2.668629223365011, "percentage": 53.37, "elapsed_time": "2:21:09", "remaining_time": "2:03:19", "throughput": 8693.34, "total_tokens": 73632040} +{"current_steps": 109240, "total_steps": 204665, "loss": 0.0456, "lr": 1.0567132685584944e-06, "epoch": 2.6687513741968583, "percentage": 53.38, "elapsed_time": "2:21:10", "remaining_time": "2:03:19", "throughput": 8693.38, "total_tokens": 73635432} +{"current_steps": 109245, "total_steps": 204665, "loss": 0.0002, "lr": 1.0566281280065757e-06, "epoch": 2.6688735250287055, "percentage": 53.38, "elapsed_time": "2:21:10", "remaining_time": "2:03:18", "throughput": 8693.42, "total_tokens": 73638760} +{"current_steps": 109250, "total_steps": 204665, "loss": 0.0653, "lr": 1.0565429870428424e-06, "epoch": 2.6689956758605526, "percentage": 53.38, "elapsed_time": "2:21:10", "remaining_time": "2:03:18", "throughput": 8693.43, "total_tokens": 73641832} +{"current_steps": 109255, "total_steps": 204665, "loss": 0.104, "lr": 1.056457845667913e-06, "epoch": 2.6691178266924, "percentage": 53.38, "elapsed_time": "2:21:11", "remaining_time": "2:03:17", "throughput": 8693.45, "total_tokens": 73644968} +{"current_steps": 109260, "total_steps": 204665, "loss": 0.0002, "lr": 1.0563727038824069e-06, "epoch": 2.669239977524247, "percentage": 53.38, "elapsed_time": "2:21:11", "remaining_time": "2:03:17", "throughput": 8693.48, "total_tokens": 73648296} +{"current_steps": 109265, "total_steps": 204665, "loss": 0.0002, "lr": 1.0562875616869433e-06, "epoch": 2.669362128356094, "percentage": 53.39, "elapsed_time": "2:21:12", "remaining_time": "2:03:16", "throughput": 8693.51, "total_tokens": 73651496} +{"current_steps": 109270, "total_steps": 204665, "loss": 0.0999, "lr": 1.0562024190821412e-06, "epoch": 2.6694842791879414, "percentage": 53.39, "elapsed_time": "2:21:12", "remaining_time": "2:03:16", "throughput": 8693.52, "total_tokens": 73654568} +{"current_steps": 109275, "total_steps": 204665, "loss": 0.029, "lr": 1.0561172760686204e-06, "epoch": 2.6696064300197886, "percentage": 53.39, "elapsed_time": "2:21:12", "remaining_time": "2:03:16", "throughput": 8693.59, "total_tokens": 73658280} +{"current_steps": 109280, "total_steps": 204665, "loss": 0.0345, "lr": 1.0560321326469996e-06, "epoch": 2.6697285808516353, "percentage": 53.39, "elapsed_time": "2:21:13", "remaining_time": "2:03:15", "throughput": 8693.6, "total_tokens": 73661288} +{"current_steps": 109285, "total_steps": 204665, "loss": 0.0469, "lr": 1.055946988817898e-06, "epoch": 2.669850731683483, "percentage": 53.4, "elapsed_time": "2:21:13", "remaining_time": "2:03:15", "throughput": 8693.63, "total_tokens": 73664488} +{"current_steps": 109290, "total_steps": 204665, "loss": 0.0789, "lr": 1.055861844581935e-06, "epoch": 2.6699728825153297, "percentage": 53.4, "elapsed_time": "2:21:13", "remaining_time": "2:03:14", "throughput": 8693.67, "total_tokens": 73667880} +{"current_steps": 109295, "total_steps": 204665, "loss": 0.0226, "lr": 1.0557766999397295e-06, "epoch": 2.6700950333471773, "percentage": 53.4, "elapsed_time": "2:21:14", "remaining_time": "2:03:14", "throughput": 8693.71, "total_tokens": 73671144} +{"current_steps": 109300, "total_steps": 204665, "loss": 0.0373, "lr": 1.0556915548919007e-06, "epoch": 2.670217184179024, "percentage": 53.4, "elapsed_time": "2:21:14", "remaining_time": "2:03:13", "throughput": 8693.78, "total_tokens": 73674792} +{"current_steps": 109305, "total_steps": 204665, "loss": 0.076, "lr": 1.0556064094390682e-06, "epoch": 2.6703393350108713, "percentage": 53.41, "elapsed_time": "2:21:14", "remaining_time": "2:03:13", "throughput": 8693.8, "total_tokens": 73677928} +{"current_steps": 109310, "total_steps": 204665, "loss": 0.0333, "lr": 1.0555212635818507e-06, "epoch": 2.6704614858427185, "percentage": 53.41, "elapsed_time": "2:21:15", "remaining_time": "2:03:13", "throughput": 8693.83, "total_tokens": 73681256} +{"current_steps": 109315, "total_steps": 204665, "loss": 0.0005, "lr": 1.0554361173208679e-06, "epoch": 2.6705836366745657, "percentage": 53.41, "elapsed_time": "2:21:15", "remaining_time": "2:03:12", "throughput": 8693.89, "total_tokens": 73684776} +{"current_steps": 109320, "total_steps": 204665, "loss": 0.0482, "lr": 1.0553509706567384e-06, "epoch": 2.670705787506413, "percentage": 53.41, "elapsed_time": "2:21:15", "remaining_time": "2:03:12", "throughput": 8693.88, "total_tokens": 73687656} +{"current_steps": 109325, "total_steps": 204665, "loss": 0.0003, "lr": 1.055265823590082e-06, "epoch": 2.67082793833826, "percentage": 53.42, "elapsed_time": "2:21:16", "remaining_time": "2:03:11", "throughput": 8693.9, "total_tokens": 73690792} +{"current_steps": 109330, "total_steps": 204665, "loss": 0.0578, "lr": 1.0551806761215175e-06, "epoch": 2.6709500891701072, "percentage": 53.42, "elapsed_time": "2:21:16", "remaining_time": "2:03:11", "throughput": 8693.99, "total_tokens": 73694632} +{"current_steps": 109335, "total_steps": 204665, "loss": 0.0572, "lr": 1.0550955282516644e-06, "epoch": 2.6710722400019544, "percentage": 53.42, "elapsed_time": "2:21:16", "remaining_time": "2:03:11", "throughput": 8694.01, "total_tokens": 73697832} +{"current_steps": 109340, "total_steps": 204665, "loss": 0.0728, "lr": 1.0550103799811419e-06, "epoch": 2.6711943908338016, "percentage": 53.42, "elapsed_time": "2:21:17", "remaining_time": "2:03:10", "throughput": 8694.02, "total_tokens": 73700904} +{"current_steps": 109345, "total_steps": 204665, "loss": 0.0323, "lr": 1.054925231310569e-06, "epoch": 2.671316541665649, "percentage": 53.43, "elapsed_time": "2:21:17", "remaining_time": "2:03:10", "throughput": 8694.05, "total_tokens": 73704104} +{"current_steps": 109350, "total_steps": 204665, "loss": 0.0353, "lr": 1.054840082240565e-06, "epoch": 2.671438692497496, "percentage": 53.43, "elapsed_time": "2:21:17", "remaining_time": "2:03:09", "throughput": 8694.08, "total_tokens": 73707432} +{"current_steps": 109355, "total_steps": 204665, "loss": 0.0406, "lr": 1.0547549327717491e-06, "epoch": 2.671560843329343, "percentage": 53.43, "elapsed_time": "2:21:18", "remaining_time": "2:03:09", "throughput": 8694.11, "total_tokens": 73710632} +{"current_steps": 109360, "total_steps": 204665, "loss": 0.0004, "lr": 1.054669782904741e-06, "epoch": 2.6716829941611904, "percentage": 53.43, "elapsed_time": "2:21:18", "remaining_time": "2:03:08", "throughput": 8694.17, "total_tokens": 73714216} +{"current_steps": 109365, "total_steps": 204665, "loss": 0.0004, "lr": 1.054584632640159e-06, "epoch": 2.6718051449930376, "percentage": 53.44, "elapsed_time": "2:21:18", "remaining_time": "2:03:08", "throughput": 8694.18, "total_tokens": 73717288} +{"current_steps": 109370, "total_steps": 204665, "loss": 0.0627, "lr": 1.0544994819786233e-06, "epoch": 2.6719272958248848, "percentage": 53.44, "elapsed_time": "2:21:19", "remaining_time": "2:03:08", "throughput": 8694.23, "total_tokens": 73720680} +{"current_steps": 109375, "total_steps": 204665, "loss": 0.0008, "lr": 1.0544143309207525e-06, "epoch": 2.6720494466567315, "percentage": 53.44, "elapsed_time": "2:21:19", "remaining_time": "2:03:07", "throughput": 8694.25, "total_tokens": 73723880} +{"current_steps": 109380, "total_steps": 204665, "loss": 0.0004, "lr": 1.054329179467166e-06, "epoch": 2.672171597488579, "percentage": 53.44, "elapsed_time": "2:21:19", "remaining_time": "2:03:07", "throughput": 8694.27, "total_tokens": 73727016} +{"current_steps": 109385, "total_steps": 204665, "loss": 0.1426, "lr": 1.0542440276184835e-06, "epoch": 2.672293748320426, "percentage": 53.45, "elapsed_time": "2:21:20", "remaining_time": "2:03:06", "throughput": 8694.29, "total_tokens": 73730152} +{"current_steps": 109390, "total_steps": 204665, "loss": 0.0894, "lr": 1.0541588753753235e-06, "epoch": 2.672415899152273, "percentage": 53.45, "elapsed_time": "2:21:20", "remaining_time": "2:03:06", "throughput": 8694.33, "total_tokens": 73733544} +{"current_steps": 109395, "total_steps": 204665, "loss": 0.0003, "lr": 1.0540737227383052e-06, "epoch": 2.6725380499841203, "percentage": 53.45, "elapsed_time": "2:21:20", "remaining_time": "2:03:05", "throughput": 8694.41, "total_tokens": 73737256} +{"current_steps": 109400, "total_steps": 204665, "loss": 0.0003, "lr": 1.0539885697080487e-06, "epoch": 2.6726602008159674, "percentage": 53.45, "elapsed_time": "2:21:21", "remaining_time": "2:03:05", "throughput": 8694.44, "total_tokens": 73740584} +{"current_steps": 109405, "total_steps": 204665, "loss": 0.0398, "lr": 1.053903416285173e-06, "epoch": 2.6727823516478146, "percentage": 53.46, "elapsed_time": "2:21:21", "remaining_time": "2:03:05", "throughput": 8694.49, "total_tokens": 73744040} +{"current_steps": 109410, "total_steps": 204665, "loss": 0.0559, "lr": 1.053818262470297e-06, "epoch": 2.672904502479662, "percentage": 53.46, "elapsed_time": "2:21:22", "remaining_time": "2:03:04", "throughput": 8694.51, "total_tokens": 73747176} +{"current_steps": 109415, "total_steps": 204665, "loss": 0.043, "lr": 1.0537331082640399e-06, "epoch": 2.673026653311509, "percentage": 53.46, "elapsed_time": "2:21:22", "remaining_time": "2:03:04", "throughput": 8694.55, "total_tokens": 73750568} +{"current_steps": 109420, "total_steps": 204665, "loss": 0.0016, "lr": 1.0536479536670215e-06, "epoch": 2.673148804143356, "percentage": 53.46, "elapsed_time": "2:21:22", "remaining_time": "2:03:03", "throughput": 8694.58, "total_tokens": 73753768} +{"current_steps": 109425, "total_steps": 204665, "loss": 0.0005, "lr": 1.0535627986798603e-06, "epoch": 2.6732709549752034, "percentage": 53.47, "elapsed_time": "2:21:23", "remaining_time": "2:03:03", "throughput": 8694.59, "total_tokens": 73756776} +{"current_steps": 109430, "total_steps": 204665, "loss": 0.0491, "lr": 1.0534776433031764e-06, "epoch": 2.6733931058070506, "percentage": 53.47, "elapsed_time": "2:21:23", "remaining_time": "2:03:02", "throughput": 8694.64, "total_tokens": 73760232} +{"current_steps": 109435, "total_steps": 204665, "loss": 0.0004, "lr": 1.0533924875375886e-06, "epoch": 2.6735152566388978, "percentage": 53.47, "elapsed_time": "2:21:23", "remaining_time": "2:03:02", "throughput": 8694.69, "total_tokens": 73763752} +{"current_steps": 109440, "total_steps": 204665, "loss": 0.0002, "lr": 1.0533073313837163e-06, "epoch": 2.673637407470745, "percentage": 53.47, "elapsed_time": "2:21:24", "remaining_time": "2:03:02", "throughput": 8694.72, "total_tokens": 73766952} +{"current_steps": 109445, "total_steps": 204665, "loss": 0.0642, "lr": 1.0532221748421785e-06, "epoch": 2.673759558302592, "percentage": 53.48, "elapsed_time": "2:21:24", "remaining_time": "2:03:01", "throughput": 8694.75, "total_tokens": 73770216} +{"current_steps": 109450, "total_steps": 204665, "loss": 0.0589, "lr": 1.053137017913595e-06, "epoch": 2.6738817091344393, "percentage": 53.48, "elapsed_time": "2:21:24", "remaining_time": "2:03:01", "throughput": 8694.79, "total_tokens": 73773608} +{"current_steps": 109455, "total_steps": 204665, "loss": 0.0693, "lr": 1.0530518605985848e-06, "epoch": 2.6740038599662865, "percentage": 53.48, "elapsed_time": "2:21:25", "remaining_time": "2:03:00", "throughput": 8694.83, "total_tokens": 73777000} +{"current_steps": 109460, "total_steps": 204665, "loss": 0.0567, "lr": 1.052966702897767e-06, "epoch": 2.6741260107981333, "percentage": 53.48, "elapsed_time": "2:21:25", "remaining_time": "2:03:00", "throughput": 8694.87, "total_tokens": 73780328} +{"current_steps": 109465, "total_steps": 204665, "loss": 0.0032, "lr": 1.0528815448117613e-06, "epoch": 2.674248161629981, "percentage": 53.48, "elapsed_time": "2:21:25", "remaining_time": "2:03:00", "throughput": 8694.87, "total_tokens": 73783336} +{"current_steps": 109470, "total_steps": 204665, "loss": 0.0556, "lr": 1.052796386341187e-06, "epoch": 2.6743703124618277, "percentage": 53.49, "elapsed_time": "2:21:26", "remaining_time": "2:02:59", "throughput": 8694.89, "total_tokens": 73786408} +{"current_steps": 109475, "total_steps": 204665, "loss": 0.0095, "lr": 1.0527112274866628e-06, "epoch": 2.6744924632936753, "percentage": 53.49, "elapsed_time": "2:21:26", "remaining_time": "2:02:59", "throughput": 8694.9, "total_tokens": 73789416} +{"current_steps": 109480, "total_steps": 204665, "loss": 0.0003, "lr": 1.0526260682488085e-06, "epoch": 2.674614614125522, "percentage": 53.49, "elapsed_time": "2:21:26", "remaining_time": "2:02:58", "throughput": 8694.89, "total_tokens": 73792296} +{"current_steps": 109485, "total_steps": 204665, "loss": 0.0002, "lr": 1.0525409086282433e-06, "epoch": 2.6747367649573692, "percentage": 53.49, "elapsed_time": "2:21:27", "remaining_time": "2:02:58", "throughput": 8694.95, "total_tokens": 73795880} +{"current_steps": 109490, "total_steps": 204665, "loss": 0.0256, "lr": 1.0524557486255862e-06, "epoch": 2.6748589157892164, "percentage": 53.5, "elapsed_time": "2:21:27", "remaining_time": "2:02:57", "throughput": 8695.0, "total_tokens": 73799336} +{"current_steps": 109495, "total_steps": 204665, "loss": 0.0001, "lr": 1.052370588241457e-06, "epoch": 2.6749810666210636, "percentage": 53.5, "elapsed_time": "2:21:27", "remaining_time": "2:02:57", "throughput": 8695.05, "total_tokens": 73802856} +{"current_steps": 109500, "total_steps": 204665, "loss": 0.0006, "lr": 1.052285427476475e-06, "epoch": 2.675103217452911, "percentage": 53.5, "elapsed_time": "2:21:28", "remaining_time": "2:02:57", "throughput": 8695.1, "total_tokens": 73806312} +{"current_steps": 109505, "total_steps": 204665, "loss": 0.0001, "lr": 1.052200266331259e-06, "epoch": 2.675225368284758, "percentage": 53.5, "elapsed_time": "2:21:28", "remaining_time": "2:02:56", "throughput": 8695.13, "total_tokens": 73809576} +{"current_steps": 109510, "total_steps": 204665, "loss": 0.0672, "lr": 1.0521151048064287e-06, "epoch": 2.675347519116605, "percentage": 53.51, "elapsed_time": "2:21:28", "remaining_time": "2:02:56", "throughput": 8695.2, "total_tokens": 73813224} +{"current_steps": 109515, "total_steps": 204665, "loss": 0.0002, "lr": 1.0520299429026038e-06, "epoch": 2.6754696699484524, "percentage": 53.51, "elapsed_time": "2:21:29", "remaining_time": "2:02:55", "throughput": 8695.27, "total_tokens": 73816872} +{"current_steps": 109520, "total_steps": 204665, "loss": 0.0004, "lr": 1.0519447806204026e-06, "epoch": 2.6755918207802996, "percentage": 53.51, "elapsed_time": "2:21:29", "remaining_time": "2:02:55", "throughput": 8695.3, "total_tokens": 73820136} +{"current_steps": 109525, "total_steps": 204665, "loss": 0.0003, "lr": 1.0518596179604451e-06, "epoch": 2.6757139716121467, "percentage": 53.51, "elapsed_time": "2:21:30", "remaining_time": "2:02:54", "throughput": 8695.34, "total_tokens": 73823528} +{"current_steps": 109530, "total_steps": 204665, "loss": 0.075, "lr": 1.0517744549233505e-06, "epoch": 2.675836122443994, "percentage": 53.52, "elapsed_time": "2:21:30", "remaining_time": "2:02:54", "throughput": 8695.39, "total_tokens": 73826984} +{"current_steps": 109535, "total_steps": 204665, "loss": 0.0002, "lr": 1.0516892915097381e-06, "epoch": 2.675958273275841, "percentage": 53.52, "elapsed_time": "2:21:30", "remaining_time": "2:02:54", "throughput": 8695.43, "total_tokens": 73830248} +{"current_steps": 109540, "total_steps": 204665, "loss": 0.1249, "lr": 1.0516041277202275e-06, "epoch": 2.6760804241076883, "percentage": 53.52, "elapsed_time": "2:21:31", "remaining_time": "2:02:53", "throughput": 8695.54, "total_tokens": 73834408} +{"current_steps": 109545, "total_steps": 204665, "loss": 0.0002, "lr": 1.0515189635554375e-06, "epoch": 2.676202574939535, "percentage": 53.52, "elapsed_time": "2:21:31", "remaining_time": "2:02:53", "throughput": 8695.58, "total_tokens": 73837736} +{"current_steps": 109550, "total_steps": 204665, "loss": 0.1554, "lr": 1.0514337990159879e-06, "epoch": 2.6763247257713827, "percentage": 53.53, "elapsed_time": "2:21:31", "remaining_time": "2:02:52", "throughput": 8695.66, "total_tokens": 73841512} +{"current_steps": 109555, "total_steps": 204665, "loss": 0.0001, "lr": 1.0513486341024978e-06, "epoch": 2.6764468766032294, "percentage": 53.53, "elapsed_time": "2:21:32", "remaining_time": "2:02:52", "throughput": 8695.67, "total_tokens": 73844584} +{"current_steps": 109560, "total_steps": 204665, "loss": 0.0004, "lr": 1.051263468815587e-06, "epoch": 2.676569027435077, "percentage": 53.53, "elapsed_time": "2:21:32", "remaining_time": "2:02:51", "throughput": 8695.7, "total_tokens": 73847784} +{"current_steps": 109565, "total_steps": 204665, "loss": 0.0002, "lr": 1.0511783031558741e-06, "epoch": 2.676691178266924, "percentage": 53.53, "elapsed_time": "2:21:32", "remaining_time": "2:02:51", "throughput": 8695.74, "total_tokens": 73851112} +{"current_steps": 109570, "total_steps": 204665, "loss": 0.0271, "lr": 1.051093137123979e-06, "epoch": 2.676813329098771, "percentage": 53.54, "elapsed_time": "2:21:33", "remaining_time": "2:02:51", "throughput": 8695.76, "total_tokens": 73854248} +{"current_steps": 109575, "total_steps": 204665, "loss": 0.049, "lr": 1.0510079707205206e-06, "epoch": 2.676935479930618, "percentage": 53.54, "elapsed_time": "2:21:33", "remaining_time": "2:02:50", "throughput": 8695.77, "total_tokens": 73857320} +{"current_steps": 109580, "total_steps": 204665, "loss": 0.0453, "lr": 1.0509228039461184e-06, "epoch": 2.6770576307624654, "percentage": 53.54, "elapsed_time": "2:21:33", "remaining_time": "2:02:50", "throughput": 8695.77, "total_tokens": 73860328} +{"current_steps": 109585, "total_steps": 204665, "loss": 0.0002, "lr": 1.0508376368013922e-06, "epoch": 2.6771797815943126, "percentage": 53.54, "elapsed_time": "2:21:34", "remaining_time": "2:02:49", "throughput": 8695.8, "total_tokens": 73863592} +{"current_steps": 109590, "total_steps": 204665, "loss": 0.0542, "lr": 1.050752469286961e-06, "epoch": 2.6773019324261598, "percentage": 53.55, "elapsed_time": "2:21:34", "remaining_time": "2:02:49", "throughput": 8695.9, "total_tokens": 73867496} +{"current_steps": 109595, "total_steps": 204665, "loss": 0.0002, "lr": 1.0506673014034441e-06, "epoch": 2.677424083258007, "percentage": 53.55, "elapsed_time": "2:21:34", "remaining_time": "2:02:49", "throughput": 8695.96, "total_tokens": 73871016} +{"current_steps": 109600, "total_steps": 204665, "loss": 0.1749, "lr": 1.050582133151461e-06, "epoch": 2.677546234089854, "percentage": 53.55, "elapsed_time": "2:21:35", "remaining_time": "2:02:48", "throughput": 8696.04, "total_tokens": 73874792} +{"current_steps": 109605, "total_steps": 204665, "loss": 0.0002, "lr": 1.0504969645316313e-06, "epoch": 2.6776683849217013, "percentage": 53.55, "elapsed_time": "2:21:35", "remaining_time": "2:02:48", "throughput": 8696.12, "total_tokens": 73878568} +{"current_steps": 109610, "total_steps": 204665, "loss": 0.0592, "lr": 1.0504117955445736e-06, "epoch": 2.6777905357535485, "percentage": 53.56, "elapsed_time": "2:21:35", "remaining_time": "2:02:47", "throughput": 8696.13, "total_tokens": 73881576} +{"current_steps": 109615, "total_steps": 204665, "loss": 0.0768, "lr": 1.050326626190908e-06, "epoch": 2.6779126865853957, "percentage": 53.56, "elapsed_time": "2:21:36", "remaining_time": "2:02:47", "throughput": 8696.18, "total_tokens": 73885032} +{"current_steps": 109620, "total_steps": 204665, "loss": 0.0703, "lr": 1.050241456471254e-06, "epoch": 2.678034837417243, "percentage": 53.56, "elapsed_time": "2:21:36", "remaining_time": "2:02:46", "throughput": 8696.2, "total_tokens": 73888232} +{"current_steps": 109625, "total_steps": 204665, "loss": 0.0003, "lr": 1.05015628638623e-06, "epoch": 2.67815698824909, "percentage": 53.56, "elapsed_time": "2:21:36", "remaining_time": "2:02:46", "throughput": 8696.21, "total_tokens": 73891304} +{"current_steps": 109630, "total_steps": 204665, "loss": 0.0004, "lr": 1.0500711159364564e-06, "epoch": 2.6782791390809373, "percentage": 53.57, "elapsed_time": "2:21:37", "remaining_time": "2:02:46", "throughput": 8696.23, "total_tokens": 73894376} +{"current_steps": 109635, "total_steps": 204665, "loss": 0.0007, "lr": 1.049985945122552e-06, "epoch": 2.6784012899127845, "percentage": 53.57, "elapsed_time": "2:21:37", "remaining_time": "2:02:45", "throughput": 8696.28, "total_tokens": 73897832} +{"current_steps": 109640, "total_steps": 204665, "loss": 0.0138, "lr": 1.0499007739451364e-06, "epoch": 2.6785234407446312, "percentage": 53.57, "elapsed_time": "2:21:37", "remaining_time": "2:02:45", "throughput": 8696.33, "total_tokens": 73901352} +{"current_steps": 109645, "total_steps": 204665, "loss": 0.0374, "lr": 1.0498156024048285e-06, "epoch": 2.678645591576479, "percentage": 53.57, "elapsed_time": "2:21:38", "remaining_time": "2:02:44", "throughput": 8696.37, "total_tokens": 73904680} +{"current_steps": 109650, "total_steps": 204665, "loss": 0.0002, "lr": 1.0497304305022488e-06, "epoch": 2.6787677424083256, "percentage": 53.58, "elapsed_time": "2:21:38", "remaining_time": "2:02:44", "throughput": 8696.43, "total_tokens": 73908264} +{"current_steps": 109655, "total_steps": 204665, "loss": 0.0327, "lr": 1.0496452582380158e-06, "epoch": 2.6788898932401732, "percentage": 53.58, "elapsed_time": "2:21:39", "remaining_time": "2:02:43", "throughput": 8696.47, "total_tokens": 73911592} +{"current_steps": 109660, "total_steps": 204665, "loss": 0.0616, "lr": 1.0495600856127492e-06, "epoch": 2.67901204407202, "percentage": 53.58, "elapsed_time": "2:21:39", "remaining_time": "2:02:43", "throughput": 8696.51, "total_tokens": 73914984} +{"current_steps": 109665, "total_steps": 204665, "loss": 0.1155, "lr": 1.0494749126270681e-06, "epoch": 2.679134194903867, "percentage": 53.58, "elapsed_time": "2:21:39", "remaining_time": "2:02:43", "throughput": 8696.52, "total_tokens": 73917992} +{"current_steps": 109670, "total_steps": 204665, "loss": 0.0319, "lr": 1.0493897392815927e-06, "epoch": 2.6792563457357144, "percentage": 53.59, "elapsed_time": "2:21:40", "remaining_time": "2:02:42", "throughput": 8696.54, "total_tokens": 73921128} +{"current_steps": 109675, "total_steps": 204665, "loss": 0.0491, "lr": 1.049304565576941e-06, "epoch": 2.6793784965675616, "percentage": 53.59, "elapsed_time": "2:21:40", "remaining_time": "2:02:42", "throughput": 8696.63, "total_tokens": 73924968} +{"current_steps": 109680, "total_steps": 204665, "loss": 0.0897, "lr": 1.0492193915137337e-06, "epoch": 2.6795006473994087, "percentage": 53.59, "elapsed_time": "2:21:40", "remaining_time": "2:02:41", "throughput": 8696.65, "total_tokens": 73928168} +{"current_steps": 109685, "total_steps": 204665, "loss": 0.0729, "lr": 1.0491342170925898e-06, "epoch": 2.679622798231256, "percentage": 53.59, "elapsed_time": "2:21:41", "remaining_time": "2:02:41", "throughput": 8696.7, "total_tokens": 73931560} +{"current_steps": 109690, "total_steps": 204665, "loss": 0.0372, "lr": 1.0490490423141286e-06, "epoch": 2.679744949063103, "percentage": 53.59, "elapsed_time": "2:21:41", "remaining_time": "2:02:40", "throughput": 8696.72, "total_tokens": 73934696} +{"current_steps": 109695, "total_steps": 204665, "loss": 0.0507, "lr": 1.0489638671789695e-06, "epoch": 2.6798670998949503, "percentage": 53.6, "elapsed_time": "2:21:41", "remaining_time": "2:02:40", "throughput": 8696.79, "total_tokens": 73938408} +{"current_steps": 109700, "total_steps": 204665, "loss": 0.0004, "lr": 1.0488786916877322e-06, "epoch": 2.6799892507267975, "percentage": 53.6, "elapsed_time": "2:21:42", "remaining_time": "2:02:40", "throughput": 8696.81, "total_tokens": 73941544} +{"current_steps": 109705, "total_steps": 204665, "loss": 0.045, "lr": 1.0487935158410354e-06, "epoch": 2.6801114015586447, "percentage": 53.6, "elapsed_time": "2:21:42", "remaining_time": "2:02:39", "throughput": 8696.9, "total_tokens": 73945448} +{"current_steps": 109710, "total_steps": 204665, "loss": 0.0579, "lr": 1.0487083396394994e-06, "epoch": 2.680233552390492, "percentage": 53.6, "elapsed_time": "2:21:42", "remaining_time": "2:02:39", "throughput": 8696.94, "total_tokens": 73948712} +{"current_steps": 109715, "total_steps": 204665, "loss": 0.027, "lr": 1.0486231630837435e-06, "epoch": 2.680355703222339, "percentage": 53.61, "elapsed_time": "2:21:43", "remaining_time": "2:02:38", "throughput": 8697.0, "total_tokens": 73952360} +{"current_steps": 109720, "total_steps": 204665, "loss": 0.0385, "lr": 1.0485379861743867e-06, "epoch": 2.6804778540541863, "percentage": 53.61, "elapsed_time": "2:21:43", "remaining_time": "2:02:38", "throughput": 8697.05, "total_tokens": 73955752} +{"current_steps": 109725, "total_steps": 204665, "loss": 0.0506, "lr": 1.0484528089120484e-06, "epoch": 2.680600004886033, "percentage": 53.61, "elapsed_time": "2:21:43", "remaining_time": "2:02:38", "throughput": 8697.09, "total_tokens": 73959144} +{"current_steps": 109730, "total_steps": 204665, "loss": 0.0005, "lr": 1.0483676312973484e-06, "epoch": 2.6807221557178806, "percentage": 53.61, "elapsed_time": "2:21:44", "remaining_time": "2:02:37", "throughput": 8697.11, "total_tokens": 73962344} +{"current_steps": 109735, "total_steps": 204665, "loss": 0.0003, "lr": 1.0482824533309057e-06, "epoch": 2.6808443065497274, "percentage": 53.62, "elapsed_time": "2:21:44", "remaining_time": "2:02:37", "throughput": 8697.14, "total_tokens": 73965544} +{"current_steps": 109740, "total_steps": 204665, "loss": 0.0005, "lr": 1.04819727501334e-06, "epoch": 2.680966457381575, "percentage": 53.62, "elapsed_time": "2:21:44", "remaining_time": "2:02:36", "throughput": 8697.18, "total_tokens": 73968936} +{"current_steps": 109745, "total_steps": 204665, "loss": 0.0503, "lr": 1.0481120963452712e-06, "epoch": 2.6810886082134218, "percentage": 53.62, "elapsed_time": "2:21:45", "remaining_time": "2:02:36", "throughput": 8697.21, "total_tokens": 73972200} +{"current_steps": 109750, "total_steps": 204665, "loss": 0.0004, "lr": 1.048026917327318e-06, "epoch": 2.681210759045269, "percentage": 53.62, "elapsed_time": "2:21:45", "remaining_time": "2:02:35", "throughput": 8697.2, "total_tokens": 73975080} +{"current_steps": 109755, "total_steps": 204665, "loss": 0.0591, "lr": 1.0479417379601003e-06, "epoch": 2.681332909877116, "percentage": 53.63, "elapsed_time": "2:21:45", "remaining_time": "2:02:35", "throughput": 8697.21, "total_tokens": 73978152} +{"current_steps": 109760, "total_steps": 204665, "loss": 0.0969, "lr": 1.0478565582442374e-06, "epoch": 2.6814550607089633, "percentage": 53.63, "elapsed_time": "2:21:46", "remaining_time": "2:02:35", "throughput": 8697.24, "total_tokens": 73981416} +{"current_steps": 109765, "total_steps": 204665, "loss": 0.0928, "lr": 1.0477713781803487e-06, "epoch": 2.6815772115408105, "percentage": 53.63, "elapsed_time": "2:21:46", "remaining_time": "2:02:34", "throughput": 8697.31, "total_tokens": 73985128} +{"current_steps": 109770, "total_steps": 204665, "loss": 0.0412, "lr": 1.0476861977690533e-06, "epoch": 2.6816993623726577, "percentage": 53.63, "elapsed_time": "2:21:47", "remaining_time": "2:02:34", "throughput": 8697.34, "total_tokens": 73988328} +{"current_steps": 109775, "total_steps": 204665, "loss": 0.0452, "lr": 1.0476010170109715e-06, "epoch": 2.681821513204505, "percentage": 53.64, "elapsed_time": "2:21:47", "remaining_time": "2:02:33", "throughput": 8697.37, "total_tokens": 73991592} +{"current_steps": 109780, "total_steps": 204665, "loss": 0.0005, "lr": 1.0475158359067222e-06, "epoch": 2.681943664036352, "percentage": 53.64, "elapsed_time": "2:21:47", "remaining_time": "2:02:33", "throughput": 8697.4, "total_tokens": 73994920} +{"current_steps": 109785, "total_steps": 204665, "loss": 0.1469, "lr": 1.047430654456925e-06, "epoch": 2.6820658148681993, "percentage": 53.64, "elapsed_time": "2:21:48", "remaining_time": "2:02:32", "throughput": 8697.41, "total_tokens": 73997992} +{"current_steps": 109790, "total_steps": 204665, "loss": 0.1009, "lr": 1.0473454726621992e-06, "epoch": 2.6821879657000465, "percentage": 53.64, "elapsed_time": "2:21:48", "remaining_time": "2:02:32", "throughput": 8697.44, "total_tokens": 74001192} +{"current_steps": 109795, "total_steps": 204665, "loss": 0.001, "lr": 1.0472602905231647e-06, "epoch": 2.6823101165318937, "percentage": 53.65, "elapsed_time": "2:21:48", "remaining_time": "2:02:32", "throughput": 8697.48, "total_tokens": 74004584} +{"current_steps": 109800, "total_steps": 204665, "loss": 0.0008, "lr": 1.0471751080404401e-06, "epoch": 2.682432267363741, "percentage": 53.65, "elapsed_time": "2:21:49", "remaining_time": "2:02:31", "throughput": 8697.53, "total_tokens": 74008040} +{"current_steps": 109805, "total_steps": 204665, "loss": 0.0005, "lr": 1.0470899252146456e-06, "epoch": 2.682554418195588, "percentage": 53.65, "elapsed_time": "2:21:49", "remaining_time": "2:02:31", "throughput": 8697.53, "total_tokens": 74010984} +{"current_steps": 109810, "total_steps": 204665, "loss": 0.0008, "lr": 1.0470047420464008e-06, "epoch": 2.6826765690274352, "percentage": 53.65, "elapsed_time": "2:21:49", "remaining_time": "2:02:30", "throughput": 8697.56, "total_tokens": 74014184} +{"current_steps": 109815, "total_steps": 204665, "loss": 0.0006, "lr": 1.0469195585363246e-06, "epoch": 2.6827987198592824, "percentage": 53.66, "elapsed_time": "2:21:50", "remaining_time": "2:02:30", "throughput": 8697.59, "total_tokens": 74017448} +{"current_steps": 109820, "total_steps": 204665, "loss": 0.1279, "lr": 1.0468343746850369e-06, "epoch": 2.682920870691129, "percentage": 53.66, "elapsed_time": "2:21:50", "remaining_time": "2:02:29", "throughput": 8697.64, "total_tokens": 74020904} +{"current_steps": 109825, "total_steps": 204665, "loss": 0.0209, "lr": 1.0467491904931574e-06, "epoch": 2.683043021522977, "percentage": 53.66, "elapsed_time": "2:21:50", "remaining_time": "2:02:29", "throughput": 8697.69, "total_tokens": 74024424} +{"current_steps": 109830, "total_steps": 204665, "loss": 0.0351, "lr": 1.0466640059613045e-06, "epoch": 2.6831651723548235, "percentage": 53.66, "elapsed_time": "2:21:51", "remaining_time": "2:02:29", "throughput": 8697.72, "total_tokens": 74027752} +{"current_steps": 109835, "total_steps": 204665, "loss": 0.0001, "lr": 1.0465788210900987e-06, "epoch": 2.6832873231866707, "percentage": 53.67, "elapsed_time": "2:21:51", "remaining_time": "2:02:28", "throughput": 8697.76, "total_tokens": 74031016} +{"current_steps": 109840, "total_steps": 204665, "loss": 0.059, "lr": 1.046493635880159e-06, "epoch": 2.683409474018518, "percentage": 53.67, "elapsed_time": "2:21:52", "remaining_time": "2:02:28", "throughput": 8697.56, "total_tokens": 74034600} +{"current_steps": 109845, "total_steps": 204665, "loss": 0.0526, "lr": 1.0464084503321053e-06, "epoch": 2.683531624850365, "percentage": 53.67, "elapsed_time": "2:21:52", "remaining_time": "2:02:28", "throughput": 8697.58, "total_tokens": 74037672} +{"current_steps": 109850, "total_steps": 204665, "loss": 0.0802, "lr": 1.046323264446557e-06, "epoch": 2.6836537756822123, "percentage": 53.67, "elapsed_time": "2:21:52", "remaining_time": "2:02:27", "throughput": 8697.59, "total_tokens": 74040808} +{"current_steps": 109855, "total_steps": 204665, "loss": 0.0002, "lr": 1.046238078224133e-06, "epoch": 2.6837759265140595, "percentage": 53.68, "elapsed_time": "2:21:53", "remaining_time": "2:02:27", "throughput": 8697.61, "total_tokens": 74043944} +{"current_steps": 109860, "total_steps": 204665, "loss": 0.0271, "lr": 1.0461528916654536e-06, "epoch": 2.6838980773459067, "percentage": 53.68, "elapsed_time": "2:21:53", "remaining_time": "2:02:26", "throughput": 8697.66, "total_tokens": 74047464} +{"current_steps": 109865, "total_steps": 204665, "loss": 0.0003, "lr": 1.046067704771138e-06, "epoch": 2.684020228177754, "percentage": 53.68, "elapsed_time": "2:21:53", "remaining_time": "2:02:26", "throughput": 8697.71, "total_tokens": 74050920} +{"current_steps": 109870, "total_steps": 204665, "loss": 0.0001, "lr": 1.0459825175418057e-06, "epoch": 2.684142379009601, "percentage": 53.68, "elapsed_time": "2:21:54", "remaining_time": "2:02:25", "throughput": 8697.75, "total_tokens": 74054312} +{"current_steps": 109875, "total_steps": 204665, "loss": 0.0002, "lr": 1.045897329978076e-06, "epoch": 2.6842645298414483, "percentage": 53.69, "elapsed_time": "2:21:54", "remaining_time": "2:02:25", "throughput": 8697.79, "total_tokens": 74057640} +{"current_steps": 109880, "total_steps": 204665, "loss": 0.0004, "lr": 1.0458121420805685e-06, "epoch": 2.6843866806732954, "percentage": 53.69, "elapsed_time": "2:21:54", "remaining_time": "2:02:25", "throughput": 8697.8, "total_tokens": 74060648} +{"current_steps": 109885, "total_steps": 204665, "loss": 0.0581, "lr": 1.045726953849903e-06, "epoch": 2.6845088315051426, "percentage": 53.69, "elapsed_time": "2:21:55", "remaining_time": "2:02:24", "throughput": 8697.86, "total_tokens": 74064232} +{"current_steps": 109890, "total_steps": 204665, "loss": 0.0007, "lr": 1.0456417652866986e-06, "epoch": 2.68463098233699, "percentage": 53.69, "elapsed_time": "2:21:55", "remaining_time": "2:02:24", "throughput": 8697.88, "total_tokens": 74067368} +{"current_steps": 109895, "total_steps": 204665, "loss": 0.0438, "lr": 1.045556576391575e-06, "epoch": 2.684753133168837, "percentage": 53.7, "elapsed_time": "2:21:55", "remaining_time": "2:02:23", "throughput": 8697.95, "total_tokens": 74071016} +{"current_steps": 109900, "total_steps": 204665, "loss": 0.0004, "lr": 1.0454713871651518e-06, "epoch": 2.684875284000684, "percentage": 53.7, "elapsed_time": "2:21:56", "remaining_time": "2:02:23", "throughput": 8697.99, "total_tokens": 74074408} +{"current_steps": 109905, "total_steps": 204665, "loss": 0.0523, "lr": 1.0453861976080485e-06, "epoch": 2.684997434832531, "percentage": 53.7, "elapsed_time": "2:21:56", "remaining_time": "2:02:23", "throughput": 8698.07, "total_tokens": 74078184} +{"current_steps": 109910, "total_steps": 204665, "loss": 0.0002, "lr": 1.0453010077208845e-06, "epoch": 2.6851195856643786, "percentage": 53.7, "elapsed_time": "2:21:56", "remaining_time": "2:02:22", "throughput": 8698.11, "total_tokens": 74081576} +{"current_steps": 109915, "total_steps": 204665, "loss": 0.0134, "lr": 1.0452158175042794e-06, "epoch": 2.6852417364962253, "percentage": 53.7, "elapsed_time": "2:21:57", "remaining_time": "2:02:22", "throughput": 8698.15, "total_tokens": 74084904} +{"current_steps": 109920, "total_steps": 204665, "loss": 0.0866, "lr": 1.0451306269588526e-06, "epoch": 2.685363887328073, "percentage": 53.71, "elapsed_time": "2:21:57", "remaining_time": "2:02:21", "throughput": 8698.2, "total_tokens": 74088360} +{"current_steps": 109925, "total_steps": 204665, "loss": 0.0005, "lr": 1.0450454360852238e-06, "epoch": 2.6854860381599197, "percentage": 53.71, "elapsed_time": "2:21:58", "remaining_time": "2:02:21", "throughput": 8698.21, "total_tokens": 74091432} +{"current_steps": 109930, "total_steps": 204665, "loss": 0.0003, "lr": 1.0449602448840127e-06, "epoch": 2.685608188991767, "percentage": 53.71, "elapsed_time": "2:21:58", "remaining_time": "2:02:20", "throughput": 8698.23, "total_tokens": 74094568} +{"current_steps": 109935, "total_steps": 204665, "loss": 0.0733, "lr": 1.0448750533558383e-06, "epoch": 2.685730339823614, "percentage": 53.71, "elapsed_time": "2:21:58", "remaining_time": "2:02:20", "throughput": 8698.27, "total_tokens": 74097896} +{"current_steps": 109940, "total_steps": 204665, "loss": 0.0455, "lr": 1.0447898615013206e-06, "epoch": 2.6858524906554613, "percentage": 53.72, "elapsed_time": "2:21:59", "remaining_time": "2:02:20", "throughput": 8698.28, "total_tokens": 74101032} +{"current_steps": 109945, "total_steps": 204665, "loss": 0.0701, "lr": 1.044704669321079e-06, "epoch": 2.6859746414873085, "percentage": 53.72, "elapsed_time": "2:21:59", "remaining_time": "2:02:19", "throughput": 8698.34, "total_tokens": 74104552} +{"current_steps": 109950, "total_steps": 204665, "loss": 0.0004, "lr": 1.044619476815733e-06, "epoch": 2.6860967923191557, "percentage": 53.72, "elapsed_time": "2:21:59", "remaining_time": "2:02:19", "throughput": 8698.42, "total_tokens": 74108328} +{"current_steps": 109955, "total_steps": 204665, "loss": 0.074, "lr": 1.044534283985902e-06, "epoch": 2.686218943151003, "percentage": 53.72, "elapsed_time": "2:22:00", "remaining_time": "2:02:18", "throughput": 8698.5, "total_tokens": 74112168} +{"current_steps": 109960, "total_steps": 204665, "loss": 0.0443, "lr": 1.044449090832206e-06, "epoch": 2.68634109398285, "percentage": 53.73, "elapsed_time": "2:22:00", "remaining_time": "2:02:18", "throughput": 8698.56, "total_tokens": 74115624} +{"current_steps": 109965, "total_steps": 204665, "loss": 0.0447, "lr": 1.044363897355264e-06, "epoch": 2.6864632448146972, "percentage": 53.73, "elapsed_time": "2:22:00", "remaining_time": "2:02:17", "throughput": 8698.57, "total_tokens": 74118696} +{"current_steps": 109970, "total_steps": 204665, "loss": 0.1297, "lr": 1.044278703555696e-06, "epoch": 2.6865853956465444, "percentage": 53.73, "elapsed_time": "2:22:01", "remaining_time": "2:02:17", "throughput": 8698.61, "total_tokens": 74122088} +{"current_steps": 109975, "total_steps": 204665, "loss": 0.0467, "lr": 1.044193509434121e-06, "epoch": 2.6867075464783916, "percentage": 53.73, "elapsed_time": "2:22:01", "remaining_time": "2:02:17", "throughput": 8698.61, "total_tokens": 74125032} +{"current_steps": 109980, "total_steps": 204665, "loss": 0.0384, "lr": 1.0441083149911596e-06, "epoch": 2.686829697310239, "percentage": 53.74, "elapsed_time": "2:22:01", "remaining_time": "2:02:16", "throughput": 8698.65, "total_tokens": 74128360} +{"current_steps": 109985, "total_steps": 204665, "loss": 0.0019, "lr": 1.04402312022743e-06, "epoch": 2.686951848142086, "percentage": 53.74, "elapsed_time": "2:22:02", "remaining_time": "2:02:16", "throughput": 8698.72, "total_tokens": 74132008} +{"current_steps": 109990, "total_steps": 204665, "loss": 0.0002, "lr": 1.0439379251435527e-06, "epoch": 2.687073998973933, "percentage": 53.74, "elapsed_time": "2:22:02", "remaining_time": "2:02:15", "throughput": 8698.75, "total_tokens": 74135208} +{"current_steps": 109995, "total_steps": 204665, "loss": 0.0008, "lr": 1.043852729740147e-06, "epoch": 2.6871961498057804, "percentage": 53.74, "elapsed_time": "2:22:02", "remaining_time": "2:02:15", "throughput": 8698.79, "total_tokens": 74138600} +{"current_steps": 110000, "total_steps": 204665, "loss": 0.0655, "lr": 1.0437675340178322e-06, "epoch": 2.687318300637627, "percentage": 53.75, "elapsed_time": "2:22:03", "remaining_time": "2:02:14", "throughput": 8698.83, "total_tokens": 74141992} +{"current_steps": 110005, "total_steps": 204665, "loss": 0.0427, "lr": 1.0436823379772283e-06, "epoch": 2.6874404514694747, "percentage": 53.75, "elapsed_time": "2:22:03", "remaining_time": "2:02:14", "throughput": 8698.85, "total_tokens": 74145128} +{"current_steps": 110010, "total_steps": 204665, "loss": 0.0003, "lr": 1.0435971416189549e-06, "epoch": 2.6875626023013215, "percentage": 53.75, "elapsed_time": "2:22:03", "remaining_time": "2:02:14", "throughput": 8698.92, "total_tokens": 74148776} +{"current_steps": 110015, "total_steps": 204665, "loss": 0.0481, "lr": 1.0435119449436309e-06, "epoch": 2.6876847531331687, "percentage": 53.75, "elapsed_time": "2:22:04", "remaining_time": "2:02:13", "throughput": 8698.94, "total_tokens": 74151912} +{"current_steps": 110020, "total_steps": 204665, "loss": 0.0373, "lr": 1.0434267479518768e-06, "epoch": 2.687806903965016, "percentage": 53.76, "elapsed_time": "2:22:04", "remaining_time": "2:02:13", "throughput": 8698.95, "total_tokens": 74154984} +{"current_steps": 110025, "total_steps": 204665, "loss": 0.193, "lr": 1.0433415506443117e-06, "epoch": 2.687929054796863, "percentage": 53.76, "elapsed_time": "2:22:04", "remaining_time": "2:02:12", "throughput": 8698.96, "total_tokens": 74157928} +{"current_steps": 110030, "total_steps": 204665, "loss": 0.0006, "lr": 1.043256353021555e-06, "epoch": 2.6880512056287102, "percentage": 53.76, "elapsed_time": "2:22:05", "remaining_time": "2:02:12", "throughput": 8699.02, "total_tokens": 74161512} +{"current_steps": 110035, "total_steps": 204665, "loss": 0.0002, "lr": 1.0431711550842265e-06, "epoch": 2.6881733564605574, "percentage": 53.76, "elapsed_time": "2:22:05", "remaining_time": "2:02:12", "throughput": 8699.07, "total_tokens": 74164968} +{"current_steps": 110040, "total_steps": 204665, "loss": 0.0004, "lr": 1.0430859568329458e-06, "epoch": 2.6882955072924046, "percentage": 53.77, "elapsed_time": "2:22:05", "remaining_time": "2:02:11", "throughput": 8699.1, "total_tokens": 74168232} +{"current_steps": 110045, "total_steps": 204665, "loss": 0.0494, "lr": 1.0430007582683322e-06, "epoch": 2.688417658124252, "percentage": 53.77, "elapsed_time": "2:22:06", "remaining_time": "2:02:11", "throughput": 8699.14, "total_tokens": 74171624} +{"current_steps": 110050, "total_steps": 204665, "loss": 0.0137, "lr": 1.0429155593910055e-06, "epoch": 2.688539808956099, "percentage": 53.77, "elapsed_time": "2:22:06", "remaining_time": "2:02:10", "throughput": 8699.21, "total_tokens": 74175272} +{"current_steps": 110055, "total_steps": 204665, "loss": 0.0411, "lr": 1.0428303602015856e-06, "epoch": 2.688661959787946, "percentage": 53.77, "elapsed_time": "2:22:07", "remaining_time": "2:02:10", "throughput": 8699.26, "total_tokens": 74178792} +{"current_steps": 110060, "total_steps": 204665, "loss": 0.1165, "lr": 1.0427451607006916e-06, "epoch": 2.6887841106197934, "percentage": 53.78, "elapsed_time": "2:22:07", "remaining_time": "2:02:09", "throughput": 8699.31, "total_tokens": 74182248} +{"current_steps": 110065, "total_steps": 204665, "loss": 0.0005, "lr": 1.0426599608889435e-06, "epoch": 2.6889062614516406, "percentage": 53.78, "elapsed_time": "2:22:07", "remaining_time": "2:02:09", "throughput": 8699.38, "total_tokens": 74185896} +{"current_steps": 110070, "total_steps": 204665, "loss": 0.0047, "lr": 1.0425747607669607e-06, "epoch": 2.6890284122834878, "percentage": 53.78, "elapsed_time": "2:22:08", "remaining_time": "2:02:09", "throughput": 8699.4, "total_tokens": 74189096} +{"current_steps": 110075, "total_steps": 204665, "loss": 0.0301, "lr": 1.0424895603353626e-06, "epoch": 2.689150563115335, "percentage": 53.78, "elapsed_time": "2:22:08", "remaining_time": "2:02:08", "throughput": 8699.42, "total_tokens": 74192168} +{"current_steps": 110080, "total_steps": 204665, "loss": 0.0455, "lr": 1.0424043595947692e-06, "epoch": 2.689272713947182, "percentage": 53.79, "elapsed_time": "2:22:08", "remaining_time": "2:02:08", "throughput": 8699.44, "total_tokens": 74195368} +{"current_steps": 110085, "total_steps": 204665, "loss": 0.0002, "lr": 1.0423191585457997e-06, "epoch": 2.689394864779029, "percentage": 53.79, "elapsed_time": "2:22:09", "remaining_time": "2:02:07", "throughput": 8699.47, "total_tokens": 74198568} +{"current_steps": 110090, "total_steps": 204665, "loss": 0.0326, "lr": 1.0422339571890738e-06, "epoch": 2.6895170156108765, "percentage": 53.79, "elapsed_time": "2:22:09", "remaining_time": "2:02:07", "throughput": 8699.51, "total_tokens": 74202024} +{"current_steps": 110095, "total_steps": 204665, "loss": 0.0468, "lr": 1.0421487555252115e-06, "epoch": 2.6896391664427233, "percentage": 53.79, "elapsed_time": "2:22:09", "remaining_time": "2:02:06", "throughput": 8699.59, "total_tokens": 74205736} +{"current_steps": 110100, "total_steps": 204665, "loss": 0.1252, "lr": 1.042063553554832e-06, "epoch": 2.689761317274571, "percentage": 53.8, "elapsed_time": "2:22:10", "remaining_time": "2:02:06", "throughput": 8699.61, "total_tokens": 74208936} +{"current_steps": 110105, "total_steps": 204665, "loss": 0.0002, "lr": 1.041978351278555e-06, "epoch": 2.6898834681064177, "percentage": 53.8, "elapsed_time": "2:22:10", "remaining_time": "2:02:06", "throughput": 8699.65, "total_tokens": 74212200} +{"current_steps": 110110, "total_steps": 204665, "loss": 0.0007, "lr": 1.0418931486969998e-06, "epoch": 2.690005618938265, "percentage": 53.8, "elapsed_time": "2:22:10", "remaining_time": "2:02:05", "throughput": 8699.72, "total_tokens": 74215976} +{"current_steps": 110115, "total_steps": 204665, "loss": 0.0001, "lr": 1.0418079458107868e-06, "epoch": 2.690127769770112, "percentage": 53.8, "elapsed_time": "2:22:11", "remaining_time": "2:02:05", "throughput": 8699.75, "total_tokens": 74219176} +{"current_steps": 110120, "total_steps": 204665, "loss": 0.0011, "lr": 1.041722742620535e-06, "epoch": 2.690249920601959, "percentage": 53.8, "elapsed_time": "2:22:11", "remaining_time": "2:02:04", "throughput": 8699.81, "total_tokens": 74222696} +{"current_steps": 110125, "total_steps": 204665, "loss": 0.17, "lr": 1.0416375391268642e-06, "epoch": 2.6903720714338064, "percentage": 53.81, "elapsed_time": "2:22:11", "remaining_time": "2:02:04", "throughput": 8699.84, "total_tokens": 74225960} +{"current_steps": 110130, "total_steps": 204665, "loss": 0.0067, "lr": 1.0415523353303942e-06, "epoch": 2.6904942222656536, "percentage": 53.81, "elapsed_time": "2:22:12", "remaining_time": "2:02:04", "throughput": 8699.94, "total_tokens": 74229992} +{"current_steps": 110135, "total_steps": 204665, "loss": 0.0004, "lr": 1.0414671312317444e-06, "epoch": 2.690616373097501, "percentage": 53.81, "elapsed_time": "2:22:12", "remaining_time": "2:02:03", "throughput": 8700.0, "total_tokens": 74233576} +{"current_steps": 110140, "total_steps": 204665, "loss": 0.1042, "lr": 1.0413819268315343e-06, "epoch": 2.690738523929348, "percentage": 53.81, "elapsed_time": "2:22:12", "remaining_time": "2:02:03", "throughput": 8700.06, "total_tokens": 74237224} +{"current_steps": 110145, "total_steps": 204665, "loss": 0.0367, "lr": 1.0412967221303836e-06, "epoch": 2.690860674761195, "percentage": 53.82, "elapsed_time": "2:22:13", "remaining_time": "2:02:02", "throughput": 8700.13, "total_tokens": 74240808} +{"current_steps": 110150, "total_steps": 204665, "loss": 0.0001, "lr": 1.041211517128912e-06, "epoch": 2.6909828255930424, "percentage": 53.82, "elapsed_time": "2:22:13", "remaining_time": "2:02:02", "throughput": 8700.19, "total_tokens": 74244392} +{"current_steps": 110155, "total_steps": 204665, "loss": 0.1193, "lr": 1.0411263118277396e-06, "epoch": 2.6911049764248895, "percentage": 53.82, "elapsed_time": "2:22:13", "remaining_time": "2:02:01", "throughput": 8700.2, "total_tokens": 74247464} +{"current_steps": 110160, "total_steps": 204665, "loss": 0.0004, "lr": 1.0410411062274856e-06, "epoch": 2.6912271272567367, "percentage": 53.82, "elapsed_time": "2:22:14", "remaining_time": "2:02:01", "throughput": 8700.22, "total_tokens": 74250664} +{"current_steps": 110165, "total_steps": 204665, "loss": 0.066, "lr": 1.0409559003287692e-06, "epoch": 2.691349278088584, "percentage": 53.83, "elapsed_time": "2:22:14", "remaining_time": "2:02:01", "throughput": 8700.3, "total_tokens": 74254376} +{"current_steps": 110170, "total_steps": 204665, "loss": 0.0647, "lr": 1.0408706941322105e-06, "epoch": 2.6914714289204307, "percentage": 53.83, "elapsed_time": "2:22:15", "remaining_time": "2:02:00", "throughput": 8700.35, "total_tokens": 74257832} +{"current_steps": 110175, "total_steps": 204665, "loss": 0.0744, "lr": 1.0407854876384293e-06, "epoch": 2.6915935797522783, "percentage": 53.83, "elapsed_time": "2:22:15", "remaining_time": "2:02:00", "throughput": 8700.38, "total_tokens": 74261096} +{"current_steps": 110180, "total_steps": 204665, "loss": 0.0001, "lr": 1.0407002808480454e-06, "epoch": 2.691715730584125, "percentage": 53.83, "elapsed_time": "2:22:15", "remaining_time": "2:01:59", "throughput": 8700.44, "total_tokens": 74264680} +{"current_steps": 110185, "total_steps": 204665, "loss": 0.0811, "lr": 1.0406150737616776e-06, "epoch": 2.6918378814159727, "percentage": 53.84, "elapsed_time": "2:22:16", "remaining_time": "2:01:59", "throughput": 8700.51, "total_tokens": 74268264} +{"current_steps": 110190, "total_steps": 204665, "loss": 0.0358, "lr": 1.0405298663799463e-06, "epoch": 2.6919600322478194, "percentage": 53.84, "elapsed_time": "2:22:16", "remaining_time": "2:01:58", "throughput": 8700.53, "total_tokens": 74271464} +{"current_steps": 110195, "total_steps": 204665, "loss": 0.0011, "lr": 1.0404446587034708e-06, "epoch": 2.6920821830796666, "percentage": 53.84, "elapsed_time": "2:22:16", "remaining_time": "2:01:58", "throughput": 8700.54, "total_tokens": 74274536} +{"current_steps": 110200, "total_steps": 204665, "loss": 0.0002, "lr": 1.040359450732871e-06, "epoch": 2.692204333911514, "percentage": 53.84, "elapsed_time": "2:22:17", "remaining_time": "2:01:58", "throughput": 8700.58, "total_tokens": 74277928} +{"current_steps": 110205, "total_steps": 204665, "loss": 0.0001, "lr": 1.040274242468766e-06, "epoch": 2.692326484743361, "percentage": 53.85, "elapsed_time": "2:22:17", "remaining_time": "2:01:57", "throughput": 8700.62, "total_tokens": 74281256} +{"current_steps": 110210, "total_steps": 204665, "loss": 0.0004, "lr": 1.0401890339117763e-06, "epoch": 2.692448635575208, "percentage": 53.85, "elapsed_time": "2:22:17", "remaining_time": "2:01:57", "throughput": 8700.69, "total_tokens": 74284968} +{"current_steps": 110215, "total_steps": 204665, "loss": 0.0004, "lr": 1.0401038250625212e-06, "epoch": 2.6925707864070554, "percentage": 53.85, "elapsed_time": "2:22:18", "remaining_time": "2:01:56", "throughput": 8700.74, "total_tokens": 74288360} +{"current_steps": 110220, "total_steps": 204665, "loss": 0.0001, "lr": 1.04001861592162e-06, "epoch": 2.6926929372389026, "percentage": 53.85, "elapsed_time": "2:22:18", "remaining_time": "2:01:56", "throughput": 8700.74, "total_tokens": 74291304} +{"current_steps": 110225, "total_steps": 204665, "loss": 0.0396, "lr": 1.0399334064896927e-06, "epoch": 2.6928150880707498, "percentage": 53.86, "elapsed_time": "2:22:18", "remaining_time": "2:01:56", "throughput": 8700.86, "total_tokens": 74295528} +{"current_steps": 110230, "total_steps": 204665, "loss": 0.0001, "lr": 1.0398481967673592e-06, "epoch": 2.692937238902597, "percentage": 53.86, "elapsed_time": "2:22:19", "remaining_time": "2:01:55", "throughput": 8700.92, "total_tokens": 74299112} +{"current_steps": 110235, "total_steps": 204665, "loss": 0.0483, "lr": 1.0397629867552387e-06, "epoch": 2.693059389734444, "percentage": 53.86, "elapsed_time": "2:22:19", "remaining_time": "2:01:55", "throughput": 8700.97, "total_tokens": 74302632} +{"current_steps": 110240, "total_steps": 204665, "loss": 0.0002, "lr": 1.039677776453951e-06, "epoch": 2.6931815405662913, "percentage": 53.86, "elapsed_time": "2:22:19", "remaining_time": "2:01:54", "throughput": 8701.0, "total_tokens": 74305832} +{"current_steps": 110245, "total_steps": 204665, "loss": 0.0002, "lr": 1.0395925658641161e-06, "epoch": 2.6933036913981385, "percentage": 53.87, "elapsed_time": "2:22:20", "remaining_time": "2:01:54", "throughput": 8701.04, "total_tokens": 74309224} +{"current_steps": 110250, "total_steps": 204665, "loss": 0.0001, "lr": 1.039507354986353e-06, "epoch": 2.6934258422299857, "percentage": 53.87, "elapsed_time": "2:22:20", "remaining_time": "2:01:53", "throughput": 8701.08, "total_tokens": 74312552} +{"current_steps": 110255, "total_steps": 204665, "loss": 0.0001, "lr": 1.0394221438212822e-06, "epoch": 2.693547993061833, "percentage": 53.87, "elapsed_time": "2:22:20", "remaining_time": "2:01:53", "throughput": 8701.12, "total_tokens": 74315880} +{"current_steps": 110260, "total_steps": 204665, "loss": 0.0373, "lr": 1.0393369323695227e-06, "epoch": 2.69367014389368, "percentage": 53.87, "elapsed_time": "2:22:21", "remaining_time": "2:01:53", "throughput": 8701.14, "total_tokens": 74319144} +{"current_steps": 110265, "total_steps": 204665, "loss": 0.0397, "lr": 1.0392517206316944e-06, "epoch": 2.693792294725527, "percentage": 53.88, "elapsed_time": "2:22:21", "remaining_time": "2:01:52", "throughput": 8701.17, "total_tokens": 74322408} +{"current_steps": 110270, "total_steps": 204665, "loss": 0.0002, "lr": 1.0391665086084172e-06, "epoch": 2.6939144455573745, "percentage": 53.88, "elapsed_time": "2:22:21", "remaining_time": "2:01:52", "throughput": 8701.19, "total_tokens": 74325480} +{"current_steps": 110275, "total_steps": 204665, "loss": 0.0656, "lr": 1.0390812963003105e-06, "epoch": 2.694036596389221, "percentage": 53.88, "elapsed_time": "2:22:22", "remaining_time": "2:01:51", "throughput": 8701.2, "total_tokens": 74328488} +{"current_steps": 110280, "total_steps": 204665, "loss": 0.0378, "lr": 1.0389960837079944e-06, "epoch": 2.694158747221069, "percentage": 53.88, "elapsed_time": "2:22:22", "remaining_time": "2:01:51", "throughput": 8701.24, "total_tokens": 74331880} +{"current_steps": 110285, "total_steps": 204665, "loss": 0.0332, "lr": 1.0389108708320879e-06, "epoch": 2.6942808980529156, "percentage": 53.89, "elapsed_time": "2:22:23", "remaining_time": "2:01:50", "throughput": 8701.27, "total_tokens": 74335144} +{"current_steps": 110290, "total_steps": 204665, "loss": 0.0656, "lr": 1.0388256576732115e-06, "epoch": 2.694403048884763, "percentage": 53.89, "elapsed_time": "2:22:23", "remaining_time": "2:01:50", "throughput": 8701.32, "total_tokens": 74338664} +{"current_steps": 110295, "total_steps": 204665, "loss": 0.0001, "lr": 1.038740444231984e-06, "epoch": 2.69452519971661, "percentage": 53.89, "elapsed_time": "2:22:23", "remaining_time": "2:01:50", "throughput": 8701.36, "total_tokens": 74341992} +{"current_steps": 110300, "total_steps": 204665, "loss": 0.0523, "lr": 1.0386552305090256e-06, "epoch": 2.694647350548457, "percentage": 53.89, "elapsed_time": "2:22:24", "remaining_time": "2:01:49", "throughput": 8701.4, "total_tokens": 74345384} +{"current_steps": 110305, "total_steps": 204665, "loss": 0.035, "lr": 1.0385700165049565e-06, "epoch": 2.6947695013803044, "percentage": 53.9, "elapsed_time": "2:22:24", "remaining_time": "2:01:49", "throughput": 8701.43, "total_tokens": 74348584} +{"current_steps": 110310, "total_steps": 204665, "loss": 0.0611, "lr": 1.0384848022203955e-06, "epoch": 2.6948916522121515, "percentage": 53.9, "elapsed_time": "2:22:24", "remaining_time": "2:01:48", "throughput": 8701.51, "total_tokens": 74352360} +{"current_steps": 110315, "total_steps": 204665, "loss": 0.001, "lr": 1.0383995876559626e-06, "epoch": 2.6950138030439987, "percentage": 53.9, "elapsed_time": "2:22:25", "remaining_time": "2:01:48", "throughput": 8701.57, "total_tokens": 74356008} +{"current_steps": 110320, "total_steps": 204665, "loss": 0.0002, "lr": 1.0383143728122776e-06, "epoch": 2.695135953875846, "percentage": 53.9, "elapsed_time": "2:22:25", "remaining_time": "2:01:48", "throughput": 8701.6, "total_tokens": 74359272} +{"current_steps": 110325, "total_steps": 204665, "loss": 0.0304, "lr": 1.0382291576899605e-06, "epoch": 2.695258104707693, "percentage": 53.91, "elapsed_time": "2:22:25", "remaining_time": "2:01:47", "throughput": 8701.63, "total_tokens": 74362536} +{"current_steps": 110330, "total_steps": 204665, "loss": 0.0001, "lr": 1.03814394228963e-06, "epoch": 2.6953802555395403, "percentage": 53.91, "elapsed_time": "2:22:26", "remaining_time": "2:01:47", "throughput": 8701.64, "total_tokens": 74365544} +{"current_steps": 110335, "total_steps": 204665, "loss": 0.0003, "lr": 1.0380587266119072e-06, "epoch": 2.6955024063713875, "percentage": 53.91, "elapsed_time": "2:22:26", "remaining_time": "2:01:46", "throughput": 8701.68, "total_tokens": 74368872} +{"current_steps": 110340, "total_steps": 204665, "loss": 0.0465, "lr": 1.0379735106574107e-06, "epoch": 2.6956245572032347, "percentage": 53.91, "elapsed_time": "2:22:26", "remaining_time": "2:01:46", "throughput": 8701.71, "total_tokens": 74372200} +{"current_steps": 110345, "total_steps": 204665, "loss": 0.0579, "lr": 1.0378882944267608e-06, "epoch": 2.695746708035082, "percentage": 53.91, "elapsed_time": "2:22:27", "remaining_time": "2:01:45", "throughput": 8701.77, "total_tokens": 74375784} +{"current_steps": 110350, "total_steps": 204665, "loss": 0.1055, "lr": 1.037803077920577e-06, "epoch": 2.6958688588669286, "percentage": 53.92, "elapsed_time": "2:22:27", "remaining_time": "2:01:45", "throughput": 8701.81, "total_tokens": 74379112} +{"current_steps": 110355, "total_steps": 204665, "loss": 0.0908, "lr": 1.037717861139479e-06, "epoch": 2.6959910096987763, "percentage": 53.92, "elapsed_time": "2:22:27", "remaining_time": "2:01:45", "throughput": 8701.83, "total_tokens": 74382248} +{"current_steps": 110360, "total_steps": 204665, "loss": 0.083, "lr": 1.0376326440840863e-06, "epoch": 2.696113160530623, "percentage": 53.92, "elapsed_time": "2:22:28", "remaining_time": "2:01:44", "throughput": 8701.86, "total_tokens": 74385512} +{"current_steps": 110365, "total_steps": 204665, "loss": 0.0502, "lr": 1.0375474267550192e-06, "epoch": 2.6962353113624706, "percentage": 53.92, "elapsed_time": "2:22:28", "remaining_time": "2:01:44", "throughput": 8701.9, "total_tokens": 74388904} +{"current_steps": 110370, "total_steps": 204665, "loss": 0.0367, "lr": 1.0374622091528973e-06, "epoch": 2.6963574621943174, "percentage": 53.93, "elapsed_time": "2:22:28", "remaining_time": "2:01:43", "throughput": 8701.95, "total_tokens": 74392424} +{"current_steps": 110375, "total_steps": 204665, "loss": 0.0426, "lr": 1.03737699127834e-06, "epoch": 2.6964796130261646, "percentage": 53.93, "elapsed_time": "2:22:29", "remaining_time": "2:01:43", "throughput": 8702.05, "total_tokens": 74396392} +{"current_steps": 110380, "total_steps": 204665, "loss": 0.0002, "lr": 1.037291773131967e-06, "epoch": 2.6966017638580118, "percentage": 53.93, "elapsed_time": "2:22:29", "remaining_time": "2:01:42", "throughput": 8702.08, "total_tokens": 74399656} +{"current_steps": 110385, "total_steps": 204665, "loss": 0.061, "lr": 1.0372065547143982e-06, "epoch": 2.696723914689859, "percentage": 53.93, "elapsed_time": "2:22:29", "remaining_time": "2:01:42", "throughput": 8702.14, "total_tokens": 74403240} +{"current_steps": 110390, "total_steps": 204665, "loss": 0.0491, "lr": 1.0371213360262537e-06, "epoch": 2.696846065521706, "percentage": 53.94, "elapsed_time": "2:22:30", "remaining_time": "2:01:42", "throughput": 8702.21, "total_tokens": 74406888} +{"current_steps": 110395, "total_steps": 204665, "loss": 0.0968, "lr": 1.0370361170681525e-06, "epoch": 2.6969682163535533, "percentage": 53.94, "elapsed_time": "2:22:30", "remaining_time": "2:01:41", "throughput": 8702.23, "total_tokens": 74410024} +{"current_steps": 110400, "total_steps": 204665, "loss": 0.009, "lr": 1.0369508978407146e-06, "epoch": 2.6970903671854005, "percentage": 53.94, "elapsed_time": "2:22:31", "remaining_time": "2:01:41", "throughput": 8702.28, "total_tokens": 74413480} +{"current_steps": 110405, "total_steps": 204665, "loss": 0.0004, "lr": 1.0368656783445603e-06, "epoch": 2.6972125180172477, "percentage": 53.94, "elapsed_time": "2:22:31", "remaining_time": "2:01:40", "throughput": 8702.32, "total_tokens": 74416808} +{"current_steps": 110410, "total_steps": 204665, "loss": 0.0412, "lr": 1.0367804585803084e-06, "epoch": 2.697334668849095, "percentage": 53.95, "elapsed_time": "2:22:31", "remaining_time": "2:01:40", "throughput": 8702.46, "total_tokens": 74421224} +{"current_steps": 110415, "total_steps": 204665, "loss": 0.0004, "lr": 1.0366952385485792e-06, "epoch": 2.697456819680942, "percentage": 53.95, "elapsed_time": "2:22:32", "remaining_time": "2:01:40", "throughput": 8702.53, "total_tokens": 74425000} +{"current_steps": 110420, "total_steps": 204665, "loss": 0.0001, "lr": 1.0366100182499923e-06, "epoch": 2.6975789705127893, "percentage": 53.95, "elapsed_time": "2:22:32", "remaining_time": "2:01:39", "throughput": 8702.57, "total_tokens": 74428328} +{"current_steps": 110425, "total_steps": 204665, "loss": 0.015, "lr": 1.0365247976851677e-06, "epoch": 2.6977011213446365, "percentage": 53.95, "elapsed_time": "2:22:32", "remaining_time": "2:01:39", "throughput": 8702.68, "total_tokens": 74432424} +{"current_steps": 110430, "total_steps": 204665, "loss": 0.0002, "lr": 1.036439576854725e-06, "epoch": 2.6978232721764837, "percentage": 53.96, "elapsed_time": "2:22:33", "remaining_time": "2:01:38", "throughput": 8702.72, "total_tokens": 74435752} +{"current_steps": 110435, "total_steps": 204665, "loss": 0.0002, "lr": 1.0363543557592838e-06, "epoch": 2.697945423008331, "percentage": 53.96, "elapsed_time": "2:22:33", "remaining_time": "2:01:38", "throughput": 8702.75, "total_tokens": 74439016} +{"current_steps": 110440, "total_steps": 204665, "loss": 0.0001, "lr": 1.036269134399464e-06, "epoch": 2.698067573840178, "percentage": 53.96, "elapsed_time": "2:22:33", "remaining_time": "2:01:37", "throughput": 8702.81, "total_tokens": 74442600} +{"current_steps": 110445, "total_steps": 204665, "loss": 0.0664, "lr": 1.0361839127758854e-06, "epoch": 2.698189724672025, "percentage": 53.96, "elapsed_time": "2:22:34", "remaining_time": "2:01:37", "throughput": 8702.84, "total_tokens": 74445928} +{"current_steps": 110450, "total_steps": 204665, "loss": 0.0002, "lr": 1.0360986908891672e-06, "epoch": 2.6983118755038724, "percentage": 53.97, "elapsed_time": "2:22:34", "remaining_time": "2:01:37", "throughput": 8702.91, "total_tokens": 74449640} +{"current_steps": 110455, "total_steps": 204665, "loss": 0.0002, "lr": 1.03601346873993e-06, "epoch": 2.698434026335719, "percentage": 53.97, "elapsed_time": "2:22:34", "remaining_time": "2:01:36", "throughput": 8702.96, "total_tokens": 74453032} +{"current_steps": 110460, "total_steps": 204665, "loss": 0.0714, "lr": 1.035928246328793e-06, "epoch": 2.6985561771675664, "percentage": 53.97, "elapsed_time": "2:22:35", "remaining_time": "2:01:36", "throughput": 8702.99, "total_tokens": 74456360} +{"current_steps": 110465, "total_steps": 204665, "loss": 0.0001, "lr": 1.0358430236563762e-06, "epoch": 2.6986783279994135, "percentage": 53.97, "elapsed_time": "2:22:35", "remaining_time": "2:01:35", "throughput": 8703.06, "total_tokens": 74460008} +{"current_steps": 110470, "total_steps": 204665, "loss": 0.001, "lr": 1.0357578007232991e-06, "epoch": 2.6988004788312607, "percentage": 53.98, "elapsed_time": "2:22:35", "remaining_time": "2:01:35", "throughput": 8703.08, "total_tokens": 74463144} +{"current_steps": 110475, "total_steps": 204665, "loss": 0.0366, "lr": 1.035672577530182e-06, "epoch": 2.698922629663108, "percentage": 53.98, "elapsed_time": "2:22:36", "remaining_time": "2:01:35", "throughput": 8703.15, "total_tokens": 74466856} +{"current_steps": 110480, "total_steps": 204665, "loss": 0.0362, "lr": 1.0355873540776443e-06, "epoch": 2.699044780494955, "percentage": 53.98, "elapsed_time": "2:22:36", "remaining_time": "2:01:34", "throughput": 8703.19, "total_tokens": 74470184} +{"current_steps": 110485, "total_steps": 204665, "loss": 0.0514, "lr": 1.0355021303663053e-06, "epoch": 2.6991669313268023, "percentage": 53.98, "elapsed_time": "2:22:37", "remaining_time": "2:01:34", "throughput": 8703.26, "total_tokens": 74473896} +{"current_steps": 110490, "total_steps": 204665, "loss": 0.0279, "lr": 1.035416906396786e-06, "epoch": 2.6992890821586495, "percentage": 53.99, "elapsed_time": "2:22:37", "remaining_time": "2:01:33", "throughput": 8703.34, "total_tokens": 74477736} +{"current_steps": 110495, "total_steps": 204665, "loss": 0.0001, "lr": 1.035331682169705e-06, "epoch": 2.6994112329904967, "percentage": 53.99, "elapsed_time": "2:22:37", "remaining_time": "2:01:33", "throughput": 8703.36, "total_tokens": 74480872} +{"current_steps": 110500, "total_steps": 204665, "loss": 0.0, "lr": 1.0352464576856826e-06, "epoch": 2.699533383822344, "percentage": 53.99, "elapsed_time": "2:22:38", "remaining_time": "2:01:32", "throughput": 8703.38, "total_tokens": 74484072} +{"current_steps": 110505, "total_steps": 204665, "loss": 0.0002, "lr": 1.0351612329453384e-06, "epoch": 2.699655534654191, "percentage": 53.99, "elapsed_time": "2:22:38", "remaining_time": "2:01:32", "throughput": 8703.39, "total_tokens": 74487208} +{"current_steps": 110510, "total_steps": 204665, "loss": 0.0001, "lr": 1.0350760079492922e-06, "epoch": 2.6997776854860382, "percentage": 54.0, "elapsed_time": "2:22:38", "remaining_time": "2:01:32", "throughput": 8703.43, "total_tokens": 74490472} +{"current_steps": 110515, "total_steps": 204665, "loss": 0.0002, "lr": 1.0349907826981638e-06, "epoch": 2.6998998363178854, "percentage": 54.0, "elapsed_time": "2:22:39", "remaining_time": "2:01:31", "throughput": 8703.47, "total_tokens": 74493928} +{"current_steps": 110520, "total_steps": 204665, "loss": 0.0002, "lr": 1.0349055571925731e-06, "epoch": 2.7000219871497326, "percentage": 54.0, "elapsed_time": "2:22:39", "remaining_time": "2:01:31", "throughput": 8703.49, "total_tokens": 74497064} +{"current_steps": 110525, "total_steps": 204665, "loss": 0.0001, "lr": 1.0348203314331398e-06, "epoch": 2.70014413798158, "percentage": 54.0, "elapsed_time": "2:22:39", "remaining_time": "2:01:30", "throughput": 8703.52, "total_tokens": 74500456} +{"current_steps": 110530, "total_steps": 204665, "loss": 0.063, "lr": 1.0347351054204839e-06, "epoch": 2.7002662888134266, "percentage": 54.01, "elapsed_time": "2:22:40", "remaining_time": "2:01:30", "throughput": 8703.55, "total_tokens": 74503656} +{"current_steps": 110535, "total_steps": 204665, "loss": 0.0384, "lr": 1.0346498791552247e-06, "epoch": 2.700388439645274, "percentage": 54.01, "elapsed_time": "2:22:40", "remaining_time": "2:01:29", "throughput": 8703.58, "total_tokens": 74507048} +{"current_steps": 110540, "total_steps": 204665, "loss": 0.0447, "lr": 1.0345646526379824e-06, "epoch": 2.700510590477121, "percentage": 54.01, "elapsed_time": "2:22:40", "remaining_time": "2:01:29", "throughput": 8703.63, "total_tokens": 74510504} +{"current_steps": 110545, "total_steps": 204665, "loss": 0.0005, "lr": 1.0344794258693766e-06, "epoch": 2.7006327413089686, "percentage": 54.01, "elapsed_time": "2:22:41", "remaining_time": "2:01:29", "throughput": 8703.67, "total_tokens": 74513768} +{"current_steps": 110550, "total_steps": 204665, "loss": 0.0004, "lr": 1.0343941988500271e-06, "epoch": 2.7007548921408153, "percentage": 54.02, "elapsed_time": "2:22:41", "remaining_time": "2:01:28", "throughput": 8703.69, "total_tokens": 74517032} +{"current_steps": 110555, "total_steps": 204665, "loss": 0.0001, "lr": 1.0343089715805537e-06, "epoch": 2.7008770429726625, "percentage": 54.02, "elapsed_time": "2:22:41", "remaining_time": "2:01:28", "throughput": 8703.73, "total_tokens": 74520360} +{"current_steps": 110560, "total_steps": 204665, "loss": 0.0001, "lr": 1.0342237440615765e-06, "epoch": 2.7009991938045097, "percentage": 54.02, "elapsed_time": "2:22:42", "remaining_time": "2:01:27", "throughput": 8703.82, "total_tokens": 74524264} +{"current_steps": 110565, "total_steps": 204665, "loss": 0.0879, "lr": 1.0341385162937147e-06, "epoch": 2.701121344636357, "percentage": 54.02, "elapsed_time": "2:22:42", "remaining_time": "2:01:27", "throughput": 8703.87, "total_tokens": 74527848} +{"current_steps": 110570, "total_steps": 204665, "loss": 0.0001, "lr": 1.0340532882775887e-06, "epoch": 2.701243495468204, "percentage": 54.02, "elapsed_time": "2:22:42", "remaining_time": "2:01:27", "throughput": 8703.91, "total_tokens": 74531176} +{"current_steps": 110575, "total_steps": 204665, "loss": 0.0001, "lr": 1.0339680600138176e-06, "epoch": 2.7013656463000513, "percentage": 54.03, "elapsed_time": "2:22:43", "remaining_time": "2:01:26", "throughput": 8703.96, "total_tokens": 74534632} +{"current_steps": 110580, "total_steps": 204665, "loss": 0.115, "lr": 1.033882831503022e-06, "epoch": 2.7014877971318985, "percentage": 54.03, "elapsed_time": "2:22:43", "remaining_time": "2:01:26", "throughput": 8704.0, "total_tokens": 74538024} +{"current_steps": 110585, "total_steps": 204665, "loss": 0.0003, "lr": 1.0337976027458213e-06, "epoch": 2.7016099479637457, "percentage": 54.03, "elapsed_time": "2:22:43", "remaining_time": "2:01:25", "throughput": 8704.03, "total_tokens": 74541288} +{"current_steps": 110590, "total_steps": 204665, "loss": 0.0002, "lr": 1.0337123737428352e-06, "epoch": 2.701732098795593, "percentage": 54.03, "elapsed_time": "2:22:44", "remaining_time": "2:01:25", "throughput": 8704.04, "total_tokens": 74544360} +{"current_steps": 110595, "total_steps": 204665, "loss": 0.0741, "lr": 1.033627144494684e-06, "epoch": 2.70185424962744, "percentage": 54.04, "elapsed_time": "2:22:44", "remaining_time": "2:01:24", "throughput": 8704.08, "total_tokens": 74547688} +{"current_steps": 110600, "total_steps": 204665, "loss": 0.0002, "lr": 1.033541915001987e-06, "epoch": 2.701976400459287, "percentage": 54.04, "elapsed_time": "2:22:45", "remaining_time": "2:01:24", "throughput": 8704.11, "total_tokens": 74550888} +{"current_steps": 110605, "total_steps": 204665, "loss": 0.0412, "lr": 1.033456685265364e-06, "epoch": 2.7020985512911344, "percentage": 54.04, "elapsed_time": "2:22:45", "remaining_time": "2:01:24", "throughput": 8704.15, "total_tokens": 74554280} +{"current_steps": 110610, "total_steps": 204665, "loss": 0.0001, "lr": 1.0333714552854349e-06, "epoch": 2.7022207021229816, "percentage": 54.04, "elapsed_time": "2:22:45", "remaining_time": "2:01:23", "throughput": 8704.18, "total_tokens": 74557544} +{"current_steps": 110615, "total_steps": 204665, "loss": 0.0001, "lr": 1.0332862250628198e-06, "epoch": 2.7023428529548283, "percentage": 54.05, "elapsed_time": "2:22:46", "remaining_time": "2:01:23", "throughput": 8704.19, "total_tokens": 74560552} +{"current_steps": 110620, "total_steps": 204665, "loss": 0.0002, "lr": 1.0332009945981384e-06, "epoch": 2.702465003786676, "percentage": 54.05, "elapsed_time": "2:22:46", "remaining_time": "2:01:22", "throughput": 8704.26, "total_tokens": 74564264} +{"current_steps": 110625, "total_steps": 204665, "loss": 0.0114, "lr": 1.0331157638920102e-06, "epoch": 2.7025871546185227, "percentage": 54.05, "elapsed_time": "2:22:46", "remaining_time": "2:01:22", "throughput": 8704.29, "total_tokens": 74567464} +{"current_steps": 110630, "total_steps": 204665, "loss": 0.0342, "lr": 1.0330305329450555e-06, "epoch": 2.7027093054503704, "percentage": 54.05, "elapsed_time": "2:22:47", "remaining_time": "2:01:21", "throughput": 8704.3, "total_tokens": 74570536} +{"current_steps": 110635, "total_steps": 204665, "loss": 0.0003, "lr": 1.0329453017578937e-06, "epoch": 2.702831456282217, "percentage": 54.06, "elapsed_time": "2:22:47", "remaining_time": "2:01:21", "throughput": 8704.38, "total_tokens": 74574312} +{"current_steps": 110640, "total_steps": 204665, "loss": 0.0523, "lr": 1.0328600703311447e-06, "epoch": 2.7029536071140643, "percentage": 54.06, "elapsed_time": "2:22:47", "remaining_time": "2:01:21", "throughput": 8704.41, "total_tokens": 74577640} +{"current_steps": 110645, "total_steps": 204665, "loss": 0.0002, "lr": 1.0327748386654287e-06, "epoch": 2.7030757579459115, "percentage": 54.06, "elapsed_time": "2:22:48", "remaining_time": "2:01:20", "throughput": 8704.47, "total_tokens": 74581160} +{"current_steps": 110650, "total_steps": 204665, "loss": 0.0001, "lr": 1.0326896067613654e-06, "epoch": 2.7031979087777587, "percentage": 54.06, "elapsed_time": "2:22:48", "remaining_time": "2:01:20", "throughput": 8704.54, "total_tokens": 74584808} +{"current_steps": 110655, "total_steps": 204665, "loss": 0.0419, "lr": 1.032604374619574e-06, "epoch": 2.703320059609606, "percentage": 54.07, "elapsed_time": "2:22:48", "remaining_time": "2:01:19", "throughput": 8704.61, "total_tokens": 74588520} +{"current_steps": 110660, "total_steps": 204665, "loss": 0.049, "lr": 1.0325191422406751e-06, "epoch": 2.703442210441453, "percentage": 54.07, "elapsed_time": "2:22:49", "remaining_time": "2:01:19", "throughput": 8704.67, "total_tokens": 74592104} +{"current_steps": 110665, "total_steps": 204665, "loss": 0.0638, "lr": 1.0324339096252883e-06, "epoch": 2.7035643612733002, "percentage": 54.07, "elapsed_time": "2:22:49", "remaining_time": "2:01:19", "throughput": 8704.75, "total_tokens": 74595944} +{"current_steps": 110670, "total_steps": 204665, "loss": 0.0001, "lr": 1.0323486767740331e-06, "epoch": 2.7036865121051474, "percentage": 54.07, "elapsed_time": "2:22:49", "remaining_time": "2:01:18", "throughput": 8704.81, "total_tokens": 74599528} +{"current_steps": 110675, "total_steps": 204665, "loss": 0.0002, "lr": 1.03226344368753e-06, "epoch": 2.7038086629369946, "percentage": 54.08, "elapsed_time": "2:22:50", "remaining_time": "2:01:18", "throughput": 8704.85, "total_tokens": 74602856} +{"current_steps": 110680, "total_steps": 204665, "loss": 0.0468, "lr": 1.032178210366398e-06, "epoch": 2.703930813768842, "percentage": 54.08, "elapsed_time": "2:22:50", "remaining_time": "2:01:17", "throughput": 8704.92, "total_tokens": 74606632} +{"current_steps": 110685, "total_steps": 204665, "loss": 0.0465, "lr": 1.0320929768112578e-06, "epoch": 2.704052964600689, "percentage": 54.08, "elapsed_time": "2:22:50", "remaining_time": "2:01:17", "throughput": 8704.94, "total_tokens": 74609704} +{"current_steps": 110690, "total_steps": 204665, "loss": 0.0526, "lr": 1.0320077430227287e-06, "epoch": 2.704175115432536, "percentage": 54.08, "elapsed_time": "2:22:51", "remaining_time": "2:01:16", "throughput": 8705.02, "total_tokens": 74613544} +{"current_steps": 110695, "total_steps": 204665, "loss": 0.0001, "lr": 1.0319225090014307e-06, "epoch": 2.7042972662643834, "percentage": 54.09, "elapsed_time": "2:22:51", "remaining_time": "2:01:16", "throughput": 8705.09, "total_tokens": 74617192} +{"current_steps": 110700, "total_steps": 204665, "loss": 0.0001, "lr": 1.0318372747479838e-06, "epoch": 2.7044194170962306, "percentage": 54.09, "elapsed_time": "2:22:52", "remaining_time": "2:01:16", "throughput": 8705.13, "total_tokens": 74620584} +{"current_steps": 110705, "total_steps": 204665, "loss": 0.0967, "lr": 1.0317520402630076e-06, "epoch": 2.7045415679280778, "percentage": 54.09, "elapsed_time": "2:22:52", "remaining_time": "2:01:15", "throughput": 8705.17, "total_tokens": 74623912} +{"current_steps": 110710, "total_steps": 204665, "loss": 0.055, "lr": 1.0316668055471219e-06, "epoch": 2.7046637187599245, "percentage": 54.09, "elapsed_time": "2:22:52", "remaining_time": "2:01:15", "throughput": 8705.21, "total_tokens": 74627304} +{"current_steps": 110715, "total_steps": 204665, "loss": 0.002, "lr": 1.0315815706009464e-06, "epoch": 2.704785869591772, "percentage": 54.1, "elapsed_time": "2:22:53", "remaining_time": "2:01:14", "throughput": 8705.23, "total_tokens": 74630440} +{"current_steps": 110720, "total_steps": 204665, "loss": 0.0004, "lr": 1.0314963354251018e-06, "epoch": 2.704908020423619, "percentage": 54.1, "elapsed_time": "2:22:53", "remaining_time": "2:01:14", "throughput": 8705.27, "total_tokens": 74633832} +{"current_steps": 110725, "total_steps": 204665, "loss": 0.0001, "lr": 1.031411100020207e-06, "epoch": 2.7050301712554665, "percentage": 54.1, "elapsed_time": "2:22:53", "remaining_time": "2:01:14", "throughput": 8705.29, "total_tokens": 74636968} +{"current_steps": 110730, "total_steps": 204665, "loss": 0.0001, "lr": 1.0313258643868823e-06, "epoch": 2.7051523220873133, "percentage": 54.1, "elapsed_time": "2:22:54", "remaining_time": "2:01:13", "throughput": 8705.34, "total_tokens": 74640424} +{"current_steps": 110735, "total_steps": 204665, "loss": 0.0516, "lr": 1.0312406285257474e-06, "epoch": 2.7052744729191605, "percentage": 54.11, "elapsed_time": "2:22:54", "remaining_time": "2:01:13", "throughput": 8705.34, "total_tokens": 74643368} +{"current_steps": 110740, "total_steps": 204665, "loss": 0.0442, "lr": 1.0311553924374224e-06, "epoch": 2.7053966237510076, "percentage": 54.11, "elapsed_time": "2:22:54", "remaining_time": "2:01:12", "throughput": 8705.35, "total_tokens": 74646440} +{"current_steps": 110745, "total_steps": 204665, "loss": 0.0001, "lr": 1.031070156122527e-06, "epoch": 2.705518774582855, "percentage": 54.11, "elapsed_time": "2:22:55", "remaining_time": "2:01:12", "throughput": 8705.4, "total_tokens": 74649960} +{"current_steps": 110750, "total_steps": 204665, "loss": 0.0001, "lr": 1.030984919581681e-06, "epoch": 2.705640925414702, "percentage": 54.11, "elapsed_time": "2:22:55", "remaining_time": "2:01:11", "throughput": 8705.45, "total_tokens": 74653480} +{"current_steps": 110755, "total_steps": 204665, "loss": 0.1049, "lr": 1.0308996828155048e-06, "epoch": 2.705763076246549, "percentage": 54.12, "elapsed_time": "2:22:55", "remaining_time": "2:01:11", "throughput": 8705.48, "total_tokens": 74656744} +{"current_steps": 110760, "total_steps": 204665, "loss": 0.0002, "lr": 1.0308144458246172e-06, "epoch": 2.7058852270783964, "percentage": 54.12, "elapsed_time": "2:22:56", "remaining_time": "2:01:11", "throughput": 8705.52, "total_tokens": 74660136} +{"current_steps": 110765, "total_steps": 204665, "loss": 0.0001, "lr": 1.0307292086096386e-06, "epoch": 2.7060073779102436, "percentage": 54.12, "elapsed_time": "2:22:56", "remaining_time": "2:01:10", "throughput": 8705.54, "total_tokens": 74663272} +{"current_steps": 110770, "total_steps": 204665, "loss": 0.1075, "lr": 1.0306439711711893e-06, "epoch": 2.706129528742091, "percentage": 54.12, "elapsed_time": "2:22:56", "remaining_time": "2:01:10", "throughput": 8705.59, "total_tokens": 74666792} +{"current_steps": 110775, "total_steps": 204665, "loss": 0.0689, "lr": 1.0305587335098887e-06, "epoch": 2.706251679573938, "percentage": 54.13, "elapsed_time": "2:22:57", "remaining_time": "2:01:09", "throughput": 8705.61, "total_tokens": 74670056} +{"current_steps": 110780, "total_steps": 204665, "loss": 0.0003, "lr": 1.0304734956263567e-06, "epoch": 2.706373830405785, "percentage": 54.13, "elapsed_time": "2:22:57", "remaining_time": "2:01:09", "throughput": 8705.64, "total_tokens": 74673256} +{"current_steps": 110785, "total_steps": 204665, "loss": 0.0788, "lr": 1.0303882575212132e-06, "epoch": 2.7064959812376324, "percentage": 54.13, "elapsed_time": "2:22:57", "remaining_time": "2:01:08", "throughput": 8705.67, "total_tokens": 74676584} +{"current_steps": 110790, "total_steps": 204665, "loss": 0.0001, "lr": 1.0303030191950784e-06, "epoch": 2.7066181320694795, "percentage": 54.13, "elapsed_time": "2:22:58", "remaining_time": "2:01:08", "throughput": 8705.73, "total_tokens": 74680168} +{"current_steps": 110795, "total_steps": 204665, "loss": 0.0001, "lr": 1.0302177806485715e-06, "epoch": 2.7067402829013263, "percentage": 54.13, "elapsed_time": "2:22:58", "remaining_time": "2:01:08", "throughput": 8705.76, "total_tokens": 74683496} +{"current_steps": 110800, "total_steps": 204665, "loss": 0.0943, "lr": 1.030132541882313e-06, "epoch": 2.706862433733174, "percentage": 54.14, "elapsed_time": "2:22:58", "remaining_time": "2:01:07", "throughput": 8705.8, "total_tokens": 74686760} +{"current_steps": 110805, "total_steps": 204665, "loss": 0.0065, "lr": 1.0300473028969225e-06, "epoch": 2.7069845845650207, "percentage": 54.14, "elapsed_time": "2:22:59", "remaining_time": "2:01:07", "throughput": 8705.82, "total_tokens": 74689960} +{"current_steps": 110810, "total_steps": 204665, "loss": 0.0007, "lr": 1.02996206369302e-06, "epoch": 2.7071067353968683, "percentage": 54.14, "elapsed_time": "2:22:59", "remaining_time": "2:01:06", "throughput": 8705.84, "total_tokens": 74693160} +{"current_steps": 110815, "total_steps": 204665, "loss": 0.0002, "lr": 1.0298768242712253e-06, "epoch": 2.707228886228715, "percentage": 54.14, "elapsed_time": "2:23:00", "remaining_time": "2:01:06", "throughput": 8705.89, "total_tokens": 74696552} +{"current_steps": 110820, "total_steps": 204665, "loss": 0.0875, "lr": 1.0297915846321583e-06, "epoch": 2.7073510370605622, "percentage": 54.15, "elapsed_time": "2:23:00", "remaining_time": "2:01:06", "throughput": 8705.94, "total_tokens": 74700008} +{"current_steps": 110825, "total_steps": 204665, "loss": 0.1667, "lr": 1.0297063447764387e-06, "epoch": 2.7074731878924094, "percentage": 54.15, "elapsed_time": "2:23:00", "remaining_time": "2:01:05", "throughput": 8705.94, "total_tokens": 74703016} +{"current_steps": 110830, "total_steps": 204665, "loss": 0.0567, "lr": 1.0296211047046865e-06, "epoch": 2.7075953387242566, "percentage": 54.15, "elapsed_time": "2:23:01", "remaining_time": "2:01:05", "throughput": 8705.96, "total_tokens": 74706216} +{"current_steps": 110835, "total_steps": 204665, "loss": 0.0626, "lr": 1.0295358644175222e-06, "epoch": 2.707717489556104, "percentage": 54.15, "elapsed_time": "2:23:01", "remaining_time": "2:01:04", "throughput": 8706.05, "total_tokens": 74710056} +{"current_steps": 110840, "total_steps": 204665, "loss": 0.001, "lr": 1.0294506239155647e-06, "epoch": 2.707839640387951, "percentage": 54.16, "elapsed_time": "2:23:01", "remaining_time": "2:01:04", "throughput": 8706.08, "total_tokens": 74713448} +{"current_steps": 110845, "total_steps": 204665, "loss": 0.0469, "lr": 1.0293653831994345e-06, "epoch": 2.707961791219798, "percentage": 54.16, "elapsed_time": "2:23:02", "remaining_time": "2:01:03", "throughput": 8706.14, "total_tokens": 74717032} +{"current_steps": 110850, "total_steps": 204665, "loss": 0.0831, "lr": 1.0292801422697512e-06, "epoch": 2.7080839420516454, "percentage": 54.16, "elapsed_time": "2:23:02", "remaining_time": "2:01:03", "throughput": 8706.19, "total_tokens": 74720424} +{"current_steps": 110855, "total_steps": 204665, "loss": 0.0002, "lr": 1.029194901127135e-06, "epoch": 2.7082060928834926, "percentage": 54.16, "elapsed_time": "2:23:02", "remaining_time": "2:01:03", "throughput": 8706.2, "total_tokens": 74723496} +{"current_steps": 110860, "total_steps": 204665, "loss": 0.0571, "lr": 1.0291096597722054e-06, "epoch": 2.7083282437153398, "percentage": 54.17, "elapsed_time": "2:23:03", "remaining_time": "2:01:02", "throughput": 8706.24, "total_tokens": 74726888} +{"current_steps": 110865, "total_steps": 204665, "loss": 0.0003, "lr": 1.0290244182055828e-06, "epoch": 2.708450394547187, "percentage": 54.17, "elapsed_time": "2:23:03", "remaining_time": "2:01:02", "throughput": 8706.27, "total_tokens": 74730216} +{"current_steps": 110870, "total_steps": 204665, "loss": 0.0573, "lr": 1.0289391764278868e-06, "epoch": 2.708572545379034, "percentage": 54.17, "elapsed_time": "2:23:03", "remaining_time": "2:01:01", "throughput": 8706.28, "total_tokens": 74733224} +{"current_steps": 110875, "total_steps": 204665, "loss": 0.0729, "lr": 1.0288539344397371e-06, "epoch": 2.7086946962108813, "percentage": 54.17, "elapsed_time": "2:23:04", "remaining_time": "2:01:01", "throughput": 8706.3, "total_tokens": 74736360} +{"current_steps": 110880, "total_steps": 204665, "loss": 0.0665, "lr": 1.028768692241754e-06, "epoch": 2.7088168470427285, "percentage": 54.18, "elapsed_time": "2:23:04", "remaining_time": "2:01:00", "throughput": 8706.34, "total_tokens": 74739752} +{"current_steps": 110885, "total_steps": 204665, "loss": 0.0434, "lr": 1.028683449834557e-06, "epoch": 2.7089389978745757, "percentage": 54.18, "elapsed_time": "2:23:04", "remaining_time": "2:01:00", "throughput": 8706.36, "total_tokens": 74742888} +{"current_steps": 110890, "total_steps": 204665, "loss": 0.0005, "lr": 1.0285982072187665e-06, "epoch": 2.7090611487064225, "percentage": 54.18, "elapsed_time": "2:23:05", "remaining_time": "2:01:00", "throughput": 8706.37, "total_tokens": 74746024} +{"current_steps": 110895, "total_steps": 204665, "loss": 0.0003, "lr": 1.028512964395002e-06, "epoch": 2.70918329953827, "percentage": 54.18, "elapsed_time": "2:23:05", "remaining_time": "2:00:59", "throughput": 8706.46, "total_tokens": 74749928} +{"current_steps": 110900, "total_steps": 204665, "loss": 0.049, "lr": 1.0284277213638837e-06, "epoch": 2.709305450370117, "percentage": 54.19, "elapsed_time": "2:23:05", "remaining_time": "2:00:59", "throughput": 8706.49, "total_tokens": 74753192} +{"current_steps": 110905, "total_steps": 204665, "loss": 0.035, "lr": 1.0283424781260312e-06, "epoch": 2.709427601201964, "percentage": 54.19, "elapsed_time": "2:23:06", "remaining_time": "2:00:58", "throughput": 8706.5, "total_tokens": 74756264} +{"current_steps": 110910, "total_steps": 204665, "loss": 0.0002, "lr": 1.028257234682065e-06, "epoch": 2.709549752033811, "percentage": 54.19, "elapsed_time": "2:23:06", "remaining_time": "2:00:58", "throughput": 8706.51, "total_tokens": 74759272} +{"current_steps": 110915, "total_steps": 204665, "loss": 0.072, "lr": 1.028171991032604e-06, "epoch": 2.7096719028656584, "percentage": 54.19, "elapsed_time": "2:23:06", "remaining_time": "2:00:58", "throughput": 8706.55, "total_tokens": 74762728} +{"current_steps": 110920, "total_steps": 204665, "loss": 0.0009, "lr": 1.0280867471782691e-06, "epoch": 2.7097940536975056, "percentage": 54.2, "elapsed_time": "2:23:07", "remaining_time": "2:00:57", "throughput": 8706.57, "total_tokens": 74765864} +{"current_steps": 110925, "total_steps": 204665, "loss": 0.0694, "lr": 1.0280015031196796e-06, "epoch": 2.709916204529353, "percentage": 54.2, "elapsed_time": "2:23:07", "remaining_time": "2:00:57", "throughput": 8706.6, "total_tokens": 74769064} +{"current_steps": 110930, "total_steps": 204665, "loss": 0.0984, "lr": 1.0279162588574557e-06, "epoch": 2.7100383553612, "percentage": 54.2, "elapsed_time": "2:23:07", "remaining_time": "2:00:56", "throughput": 8706.61, "total_tokens": 74772136} +{"current_steps": 110935, "total_steps": 204665, "loss": 0.1046, "lr": 1.0278310143922173e-06, "epoch": 2.710160506193047, "percentage": 54.2, "elapsed_time": "2:23:08", "remaining_time": "2:00:56", "throughput": 8706.67, "total_tokens": 74775784} +{"current_steps": 110940, "total_steps": 204665, "loss": 0.0369, "lr": 1.0277457697245842e-06, "epoch": 2.7102826570248943, "percentage": 54.21, "elapsed_time": "2:23:08", "remaining_time": "2:00:55", "throughput": 8706.73, "total_tokens": 74779304} +{"current_steps": 110945, "total_steps": 204665, "loss": 0.0423, "lr": 1.0276605248551764e-06, "epoch": 2.7104048078567415, "percentage": 54.21, "elapsed_time": "2:23:09", "remaining_time": "2:00:55", "throughput": 8706.74, "total_tokens": 74782440} +{"current_steps": 110950, "total_steps": 204665, "loss": 0.053, "lr": 1.0275752797846137e-06, "epoch": 2.7105269586885887, "percentage": 54.21, "elapsed_time": "2:23:09", "remaining_time": "2:00:55", "throughput": 8706.75, "total_tokens": 74785448} +{"current_steps": 110955, "total_steps": 204665, "loss": 0.0615, "lr": 1.0274900345135167e-06, "epoch": 2.710649109520436, "percentage": 54.21, "elapsed_time": "2:23:09", "remaining_time": "2:00:54", "throughput": 8706.76, "total_tokens": 74788584} +{"current_steps": 110960, "total_steps": 204665, "loss": 0.0954, "lr": 1.0274047890425043e-06, "epoch": 2.710771260352283, "percentage": 54.22, "elapsed_time": "2:23:10", "remaining_time": "2:00:54", "throughput": 8706.77, "total_tokens": 74791656} +{"current_steps": 110965, "total_steps": 204665, "loss": 0.0367, "lr": 1.027319543372197e-06, "epoch": 2.7108934111841303, "percentage": 54.22, "elapsed_time": "2:23:10", "remaining_time": "2:00:53", "throughput": 8706.79, "total_tokens": 74794792} +{"current_steps": 110970, "total_steps": 204665, "loss": 0.0134, "lr": 1.0272342975032147e-06, "epoch": 2.7110155620159775, "percentage": 54.22, "elapsed_time": "2:23:10", "remaining_time": "2:00:53", "throughput": 8706.8, "total_tokens": 74797928} +{"current_steps": 110975, "total_steps": 204665, "loss": 0.0001, "lr": 1.0271490514361771e-06, "epoch": 2.7111377128478242, "percentage": 54.22, "elapsed_time": "2:23:11", "remaining_time": "2:00:52", "throughput": 8706.86, "total_tokens": 74801448} +{"current_steps": 110980, "total_steps": 204665, "loss": 0.0006, "lr": 1.0270638051717041e-06, "epoch": 2.711259863679672, "percentage": 54.23, "elapsed_time": "2:23:11", "remaining_time": "2:00:52", "throughput": 8706.9, "total_tokens": 74804904} +{"current_steps": 110985, "total_steps": 204665, "loss": 0.0006, "lr": 1.0269785587104163e-06, "epoch": 2.7113820145115186, "percentage": 54.23, "elapsed_time": "2:23:11", "remaining_time": "2:00:52", "throughput": 8706.97, "total_tokens": 74808616} +{"current_steps": 110990, "total_steps": 204665, "loss": 0.0005, "lr": 1.0268933120529332e-06, "epoch": 2.7115041653433662, "percentage": 54.23, "elapsed_time": "2:23:12", "remaining_time": "2:00:51", "throughput": 8706.99, "total_tokens": 74811752} +{"current_steps": 110995, "total_steps": 204665, "loss": 0.0923, "lr": 1.0268080651998744e-06, "epoch": 2.711626316175213, "percentage": 54.23, "elapsed_time": "2:23:12", "remaining_time": "2:00:51", "throughput": 8707.02, "total_tokens": 74814952} +{"current_steps": 111000, "total_steps": 204665, "loss": 0.1456, "lr": 1.0267228181518601e-06, "epoch": 2.71174846700706, "percentage": 54.23, "elapsed_time": "2:23:12", "remaining_time": "2:00:50", "throughput": 8707.05, "total_tokens": 74818280} +{"current_steps": 111005, "total_steps": 204665, "loss": 0.0662, "lr": 1.0266375709095103e-06, "epoch": 2.7118706178389074, "percentage": 54.24, "elapsed_time": "2:23:13", "remaining_time": "2:00:50", "throughput": 8707.07, "total_tokens": 74821352} +{"current_steps": 111010, "total_steps": 204665, "loss": 0.0913, "lr": 1.0265523234734453e-06, "epoch": 2.7119927686707546, "percentage": 54.24, "elapsed_time": "2:23:13", "remaining_time": "2:00:50", "throughput": 8707.13, "total_tokens": 74824936} +{"current_steps": 111015, "total_steps": 204665, "loss": 0.0007, "lr": 1.0264670758442843e-06, "epoch": 2.7121149195026018, "percentage": 54.24, "elapsed_time": "2:23:13", "remaining_time": "2:00:49", "throughput": 8707.23, "total_tokens": 74828968} +{"current_steps": 111020, "total_steps": 204665, "loss": 0.0701, "lr": 1.0263818280226477e-06, "epoch": 2.712237070334449, "percentage": 54.24, "elapsed_time": "2:23:14", "remaining_time": "2:00:49", "throughput": 8707.27, "total_tokens": 74832360} +{"current_steps": 111025, "total_steps": 204665, "loss": 0.0004, "lr": 1.0262965800091553e-06, "epoch": 2.712359221166296, "percentage": 54.25, "elapsed_time": "2:23:14", "remaining_time": "2:00:48", "throughput": 8707.34, "total_tokens": 74836008} +{"current_steps": 111030, "total_steps": 204665, "loss": 0.0006, "lr": 1.0262113318044271e-06, "epoch": 2.7124813719981433, "percentage": 54.25, "elapsed_time": "2:23:14", "remaining_time": "2:00:48", "throughput": 8707.36, "total_tokens": 74839272} +{"current_steps": 111035, "total_steps": 204665, "loss": 0.001, "lr": 1.0261260834090833e-06, "epoch": 2.7126035228299905, "percentage": 54.25, "elapsed_time": "2:23:15", "remaining_time": "2:00:47", "throughput": 8707.37, "total_tokens": 74842280} +{"current_steps": 111040, "total_steps": 204665, "loss": 0.0838, "lr": 1.0260408348237432e-06, "epoch": 2.7127256736618377, "percentage": 54.25, "elapsed_time": "2:23:15", "remaining_time": "2:00:47", "throughput": 8707.41, "total_tokens": 74845608} +{"current_steps": 111045, "total_steps": 204665, "loss": 0.1082, "lr": 1.0259555860490272e-06, "epoch": 2.712847824493685, "percentage": 54.26, "elapsed_time": "2:23:15", "remaining_time": "2:00:47", "throughput": 8707.44, "total_tokens": 74848936} +{"current_steps": 111050, "total_steps": 204665, "loss": 0.0002, "lr": 1.0258703370855553e-06, "epoch": 2.712969975325532, "percentage": 54.26, "elapsed_time": "2:23:16", "remaining_time": "2:00:46", "throughput": 8707.45, "total_tokens": 74852008} +{"current_steps": 111055, "total_steps": 204665, "loss": 0.0004, "lr": 1.0257850879339474e-06, "epoch": 2.7130921261573793, "percentage": 54.26, "elapsed_time": "2:23:16", "remaining_time": "2:00:46", "throughput": 8707.49, "total_tokens": 74855336} +{"current_steps": 111060, "total_steps": 204665, "loss": 0.0002, "lr": 1.0256998385948234e-06, "epoch": 2.7132142769892265, "percentage": 54.26, "elapsed_time": "2:23:17", "remaining_time": "2:00:45", "throughput": 8707.5, "total_tokens": 74858408} +{"current_steps": 111065, "total_steps": 204665, "loss": 0.0816, "lr": 1.0256145890688035e-06, "epoch": 2.7133364278210736, "percentage": 54.27, "elapsed_time": "2:23:17", "remaining_time": "2:00:45", "throughput": 8707.57, "total_tokens": 74862056} +{"current_steps": 111070, "total_steps": 204665, "loss": 0.0331, "lr": 1.0255293393565073e-06, "epoch": 2.7134585786529204, "percentage": 54.27, "elapsed_time": "2:23:17", "remaining_time": "2:00:45", "throughput": 8707.61, "total_tokens": 74865512} +{"current_steps": 111075, "total_steps": 204665, "loss": 0.0002, "lr": 1.0254440894585543e-06, "epoch": 2.713580729484768, "percentage": 54.27, "elapsed_time": "2:23:18", "remaining_time": "2:00:44", "throughput": 8707.67, "total_tokens": 74869096} +{"current_steps": 111080, "total_steps": 204665, "loss": 0.085, "lr": 1.0253588393755653e-06, "epoch": 2.7137028803166148, "percentage": 54.27, "elapsed_time": "2:23:18", "remaining_time": "2:00:44", "throughput": 8707.7, "total_tokens": 74872296} +{"current_steps": 111085, "total_steps": 204665, "loss": 0.0866, "lr": 1.0252735891081604e-06, "epoch": 2.713825031148462, "percentage": 54.28, "elapsed_time": "2:23:18", "remaining_time": "2:00:43", "throughput": 8707.72, "total_tokens": 74875496} +{"current_steps": 111090, "total_steps": 204665, "loss": 0.0001, "lr": 1.025188338656959e-06, "epoch": 2.713947181980309, "percentage": 54.28, "elapsed_time": "2:23:19", "remaining_time": "2:00:43", "throughput": 8707.74, "total_tokens": 74878696} +{"current_steps": 111095, "total_steps": 204665, "loss": 0.0315, "lr": 1.025103088022581e-06, "epoch": 2.7140693328121563, "percentage": 54.28, "elapsed_time": "2:23:19", "remaining_time": "2:00:42", "throughput": 8707.76, "total_tokens": 74881768} +{"current_steps": 111100, "total_steps": 204665, "loss": 0.0004, "lr": 1.0250178372056468e-06, "epoch": 2.7141914836440035, "percentage": 54.28, "elapsed_time": "2:23:19", "remaining_time": "2:00:42", "throughput": 8707.77, "total_tokens": 74884904} +{"current_steps": 111105, "total_steps": 204665, "loss": 0.0004, "lr": 1.024932586206776e-06, "epoch": 2.7143136344758507, "percentage": 54.29, "elapsed_time": "2:23:20", "remaining_time": "2:00:42", "throughput": 8707.83, "total_tokens": 74888424} +{"current_steps": 111110, "total_steps": 204665, "loss": 0.0539, "lr": 1.0248473350265892e-06, "epoch": 2.714435785307698, "percentage": 54.29, "elapsed_time": "2:23:20", "remaining_time": "2:00:41", "throughput": 8707.83, "total_tokens": 74891432} +{"current_steps": 111115, "total_steps": 204665, "loss": 0.0001, "lr": 1.0247620836657053e-06, "epoch": 2.714557936139545, "percentage": 54.29, "elapsed_time": "2:23:20", "remaining_time": "2:00:41", "throughput": 8707.88, "total_tokens": 74894888} +{"current_steps": 111120, "total_steps": 204665, "loss": 0.0004, "lr": 1.0246768321247452e-06, "epoch": 2.7146800869713923, "percentage": 54.29, "elapsed_time": "2:23:21", "remaining_time": "2:00:40", "throughput": 8707.9, "total_tokens": 74898088} +{"current_steps": 111125, "total_steps": 204665, "loss": 0.0001, "lr": 1.0245915804043283e-06, "epoch": 2.7148022378032395, "percentage": 54.3, "elapsed_time": "2:23:21", "remaining_time": "2:00:40", "throughput": 8707.91, "total_tokens": 74901160} +{"current_steps": 111130, "total_steps": 204665, "loss": 0.1044, "lr": 1.0245063285050751e-06, "epoch": 2.7149243886350867, "percentage": 54.3, "elapsed_time": "2:23:21", "remaining_time": "2:00:39", "throughput": 8707.99, "total_tokens": 74904936} +{"current_steps": 111135, "total_steps": 204665, "loss": 0.0001, "lr": 1.024421076427605e-06, "epoch": 2.715046539466934, "percentage": 54.3, "elapsed_time": "2:23:22", "remaining_time": "2:00:39", "throughput": 8708.04, "total_tokens": 74908392} +{"current_steps": 111140, "total_steps": 204665, "loss": 0.0003, "lr": 1.0243358241725383e-06, "epoch": 2.715168690298781, "percentage": 54.3, "elapsed_time": "2:23:22", "remaining_time": "2:00:39", "throughput": 8708.04, "total_tokens": 74911400} +{"current_steps": 111145, "total_steps": 204665, "loss": 0.0717, "lr": 1.0242505717404953e-06, "epoch": 2.7152908411306282, "percentage": 54.31, "elapsed_time": "2:23:22", "remaining_time": "2:00:38", "throughput": 8708.09, "total_tokens": 74914792} +{"current_steps": 111150, "total_steps": 204665, "loss": 0.0001, "lr": 1.0241653191320952e-06, "epoch": 2.7154129919624754, "percentage": 54.31, "elapsed_time": "2:23:23", "remaining_time": "2:00:38", "throughput": 8708.11, "total_tokens": 74918056} +{"current_steps": 111155, "total_steps": 204665, "loss": 0.0002, "lr": 1.0240800663479586e-06, "epoch": 2.715535142794322, "percentage": 54.31, "elapsed_time": "2:23:23", "remaining_time": "2:00:37", "throughput": 8708.12, "total_tokens": 74921064} +{"current_steps": 111160, "total_steps": 204665, "loss": 0.0001, "lr": 1.0239948133887053e-06, "epoch": 2.71565729362617, "percentage": 54.31, "elapsed_time": "2:23:23", "remaining_time": "2:00:37", "throughput": 8708.13, "total_tokens": 74924200} +{"current_steps": 111165, "total_steps": 204665, "loss": 0.0001, "lr": 1.0239095602549552e-06, "epoch": 2.7157794444580166, "percentage": 54.32, "elapsed_time": "2:23:24", "remaining_time": "2:00:36", "throughput": 8708.17, "total_tokens": 74927528} +{"current_steps": 111170, "total_steps": 204665, "loss": 0.0002, "lr": 1.0238243069473283e-06, "epoch": 2.715901595289864, "percentage": 54.32, "elapsed_time": "2:23:24", "remaining_time": "2:00:36", "throughput": 8708.22, "total_tokens": 74930984} +{"current_steps": 111175, "total_steps": 204665, "loss": 0.1227, "lr": 1.0237390534664447e-06, "epoch": 2.716023746121711, "percentage": 54.32, "elapsed_time": "2:23:24", "remaining_time": "2:00:36", "throughput": 8708.25, "total_tokens": 74934184} +{"current_steps": 111180, "total_steps": 204665, "loss": 0.0002, "lr": 1.0236537998129245e-06, "epoch": 2.716145896953558, "percentage": 54.32, "elapsed_time": "2:23:25", "remaining_time": "2:00:35", "throughput": 8708.31, "total_tokens": 74937768} +{"current_steps": 111185, "total_steps": 204665, "loss": 0.0001, "lr": 1.0235685459873873e-06, "epoch": 2.7162680477854053, "percentage": 54.33, "elapsed_time": "2:23:25", "remaining_time": "2:00:35", "throughput": 8708.33, "total_tokens": 74940968} +{"current_steps": 111190, "total_steps": 204665, "loss": 0.1094, "lr": 1.0234832919904533e-06, "epoch": 2.7163901986172525, "percentage": 54.33, "elapsed_time": "2:23:26", "remaining_time": "2:00:34", "throughput": 8708.35, "total_tokens": 74944104} +{"current_steps": 111195, "total_steps": 204665, "loss": 0.0005, "lr": 1.0233980378227426e-06, "epoch": 2.7165123494490997, "percentage": 54.33, "elapsed_time": "2:23:26", "remaining_time": "2:00:34", "throughput": 8708.35, "total_tokens": 74947112} +{"current_steps": 111200, "total_steps": 204665, "loss": 0.0515, "lr": 1.0233127834848744e-06, "epoch": 2.716634500280947, "percentage": 54.33, "elapsed_time": "2:23:26", "remaining_time": "2:00:34", "throughput": 8708.4, "total_tokens": 74950568} +{"current_steps": 111205, "total_steps": 204665, "loss": 0.0549, "lr": 1.0232275289774702e-06, "epoch": 2.716756651112794, "percentage": 54.34, "elapsed_time": "2:23:27", "remaining_time": "2:00:33", "throughput": 8708.42, "total_tokens": 74953704} +{"current_steps": 111210, "total_steps": 204665, "loss": 0.0341, "lr": 1.0231422743011488e-06, "epoch": 2.7168788019446413, "percentage": 54.34, "elapsed_time": "2:23:27", "remaining_time": "2:00:33", "throughput": 8708.45, "total_tokens": 74957032} +{"current_steps": 111215, "total_steps": 204665, "loss": 0.0003, "lr": 1.0230570194565307e-06, "epoch": 2.7170009527764885, "percentage": 54.34, "elapsed_time": "2:23:27", "remaining_time": "2:00:32", "throughput": 8708.49, "total_tokens": 74960360} +{"current_steps": 111220, "total_steps": 204665, "loss": 0.0002, "lr": 1.022971764444236e-06, "epoch": 2.7171231036083356, "percentage": 54.34, "elapsed_time": "2:23:28", "remaining_time": "2:00:32", "throughput": 8708.5, "total_tokens": 74963496} +{"current_steps": 111225, "total_steps": 204665, "loss": 0.0649, "lr": 1.0228865092648842e-06, "epoch": 2.717245254440183, "percentage": 54.34, "elapsed_time": "2:23:28", "remaining_time": "2:00:31", "throughput": 8708.51, "total_tokens": 74966568} +{"current_steps": 111230, "total_steps": 204665, "loss": 0.0351, "lr": 1.022801253919095e-06, "epoch": 2.71736740527203, "percentage": 54.35, "elapsed_time": "2:23:28", "remaining_time": "2:00:31", "throughput": 8708.54, "total_tokens": 74969768} +{"current_steps": 111235, "total_steps": 204665, "loss": 0.0572, "lr": 1.0227159984074895e-06, "epoch": 2.717489556103877, "percentage": 54.35, "elapsed_time": "2:23:29", "remaining_time": "2:00:31", "throughput": 8708.56, "total_tokens": 74972968} +{"current_steps": 111240, "total_steps": 204665, "loss": 0.001, "lr": 1.0226307427306873e-06, "epoch": 2.717611706935724, "percentage": 54.35, "elapsed_time": "2:23:29", "remaining_time": "2:00:30", "throughput": 8708.61, "total_tokens": 74976424} +{"current_steps": 111245, "total_steps": 204665, "loss": 0.0011, "lr": 1.022545486889308e-06, "epoch": 2.7177338577675716, "percentage": 54.35, "elapsed_time": "2:23:29", "remaining_time": "2:00:30", "throughput": 8708.64, "total_tokens": 74979752} +{"current_steps": 111250, "total_steps": 204665, "loss": 0.0011, "lr": 1.022460230883972e-06, "epoch": 2.7178560085994183, "percentage": 54.36, "elapsed_time": "2:23:30", "remaining_time": "2:00:29", "throughput": 8708.7, "total_tokens": 74983336} +{"current_steps": 111255, "total_steps": 204665, "loss": 0.0007, "lr": 1.022374974715299e-06, "epoch": 2.717978159431266, "percentage": 54.36, "elapsed_time": "2:23:30", "remaining_time": "2:00:29", "throughput": 8708.76, "total_tokens": 74986920} +{"current_steps": 111260, "total_steps": 204665, "loss": 0.0009, "lr": 1.022289718383909e-06, "epoch": 2.7181003102631127, "percentage": 54.36, "elapsed_time": "2:23:30", "remaining_time": "2:00:28", "throughput": 8708.78, "total_tokens": 74990120} +{"current_steps": 111265, "total_steps": 204665, "loss": 0.0554, "lr": 1.0222044618904225e-06, "epoch": 2.71822246109496, "percentage": 54.36, "elapsed_time": "2:23:31", "remaining_time": "2:00:28", "throughput": 8708.8, "total_tokens": 74993192} +{"current_steps": 111270, "total_steps": 204665, "loss": 0.0469, "lr": 1.0221192052354593e-06, "epoch": 2.718344611926807, "percentage": 54.37, "elapsed_time": "2:23:31", "remaining_time": "2:00:28", "throughput": 8708.84, "total_tokens": 74996584} +{"current_steps": 111275, "total_steps": 204665, "loss": 0.0437, "lr": 1.0220339484196392e-06, "epoch": 2.7184667627586543, "percentage": 54.37, "elapsed_time": "2:23:31", "remaining_time": "2:00:27", "throughput": 8708.88, "total_tokens": 74999976} +{"current_steps": 111280, "total_steps": 204665, "loss": 0.0002, "lr": 1.0219486914435823e-06, "epoch": 2.7185889135905015, "percentage": 54.37, "elapsed_time": "2:23:32", "remaining_time": "2:00:27", "throughput": 8708.91, "total_tokens": 75003240} +{"current_steps": 111285, "total_steps": 204665, "loss": 0.0002, "lr": 1.0218634343079082e-06, "epoch": 2.7187110644223487, "percentage": 54.37, "elapsed_time": "2:23:32", "remaining_time": "2:00:26", "throughput": 8708.96, "total_tokens": 75006760} +{"current_steps": 111290, "total_steps": 204665, "loss": 0.0001, "lr": 1.0217781770132375e-06, "epoch": 2.718833215254196, "percentage": 54.38, "elapsed_time": "2:23:32", "remaining_time": "2:00:26", "throughput": 8709.01, "total_tokens": 75010280} +{"current_steps": 111295, "total_steps": 204665, "loss": 0.0352, "lr": 1.0216929195601903e-06, "epoch": 2.718955366086043, "percentage": 54.38, "elapsed_time": "2:23:33", "remaining_time": "2:00:26", "throughput": 8709.07, "total_tokens": 75013864} +{"current_steps": 111300, "total_steps": 204665, "loss": 0.0565, "lr": 1.0216076619493861e-06, "epoch": 2.7190775169178902, "percentage": 54.38, "elapsed_time": "2:23:33", "remaining_time": "2:00:25", "throughput": 8709.07, "total_tokens": 75016872} +{"current_steps": 111305, "total_steps": 204665, "loss": 0.0642, "lr": 1.0215224041814455e-06, "epoch": 2.7191996677497374, "percentage": 54.38, "elapsed_time": "2:23:33", "remaining_time": "2:00:25", "throughput": 8709.09, "total_tokens": 75019944} +{"current_steps": 111310, "total_steps": 204665, "loss": 0.0002, "lr": 1.0214371462569878e-06, "epoch": 2.7193218185815846, "percentage": 54.39, "elapsed_time": "2:23:34", "remaining_time": "2:00:24", "throughput": 8709.11, "total_tokens": 75023144} +{"current_steps": 111315, "total_steps": 204665, "loss": 0.0376, "lr": 1.0213518881766337e-06, "epoch": 2.719443969413432, "percentage": 54.39, "elapsed_time": "2:23:34", "remaining_time": "2:00:24", "throughput": 8709.14, "total_tokens": 75026408} +{"current_steps": 111320, "total_steps": 204665, "loss": 0.095, "lr": 1.0212666299410026e-06, "epoch": 2.719566120245279, "percentage": 54.39, "elapsed_time": "2:23:35", "remaining_time": "2:00:23", "throughput": 8709.19, "total_tokens": 75029928} +{"current_steps": 111325, "total_steps": 204665, "loss": 0.0001, "lr": 1.0211813715507151e-06, "epoch": 2.719688271077126, "percentage": 54.39, "elapsed_time": "2:23:35", "remaining_time": "2:00:23", "throughput": 8709.22, "total_tokens": 75033192} +{"current_steps": 111330, "total_steps": 204665, "loss": 0.0002, "lr": 1.0210961130063911e-06, "epoch": 2.7198104219089734, "percentage": 54.4, "elapsed_time": "2:23:35", "remaining_time": "2:00:23", "throughput": 8709.23, "total_tokens": 75036328} +{"current_steps": 111335, "total_steps": 204665, "loss": 0.0006, "lr": 1.0210108543086502e-06, "epoch": 2.71993257274082, "percentage": 54.4, "elapsed_time": "2:23:36", "remaining_time": "2:00:22", "throughput": 8709.24, "total_tokens": 75039400} +{"current_steps": 111340, "total_steps": 204665, "loss": 0.0005, "lr": 1.020925595458113e-06, "epoch": 2.7200547235726678, "percentage": 54.4, "elapsed_time": "2:23:36", "remaining_time": "2:00:22", "throughput": 8709.27, "total_tokens": 75042600} +{"current_steps": 111345, "total_steps": 204665, "loss": 0.0708, "lr": 1.020840336455399e-06, "epoch": 2.7201768744045145, "percentage": 54.4, "elapsed_time": "2:23:36", "remaining_time": "2:00:21", "throughput": 8709.31, "total_tokens": 75045992} +{"current_steps": 111350, "total_steps": 204665, "loss": 0.0451, "lr": 1.0207550773011285e-06, "epoch": 2.720299025236362, "percentage": 54.41, "elapsed_time": "2:23:37", "remaining_time": "2:00:21", "throughput": 8709.34, "total_tokens": 75049256} +{"current_steps": 111355, "total_steps": 204665, "loss": 0.0575, "lr": 1.0206698179959213e-06, "epoch": 2.720421176068209, "percentage": 54.41, "elapsed_time": "2:23:37", "remaining_time": "2:00:21", "throughput": 8709.4, "total_tokens": 75052840} +{"current_steps": 111360, "total_steps": 204665, "loss": 0.0001, "lr": 1.0205845585403978e-06, "epoch": 2.720543326900056, "percentage": 54.41, "elapsed_time": "2:23:37", "remaining_time": "2:00:20", "throughput": 8709.44, "total_tokens": 75056296} +{"current_steps": 111365, "total_steps": 204665, "loss": 0.0007, "lr": 1.020499298935178e-06, "epoch": 2.7206654777319033, "percentage": 54.41, "elapsed_time": "2:23:38", "remaining_time": "2:00:20", "throughput": 8709.45, "total_tokens": 75059240} +{"current_steps": 111370, "total_steps": 204665, "loss": 0.0009, "lr": 1.0204140391808818e-06, "epoch": 2.7207876285637504, "percentage": 54.42, "elapsed_time": "2:23:38", "remaining_time": "2:00:19", "throughput": 8709.5, "total_tokens": 75062760} +{"current_steps": 111375, "total_steps": 204665, "loss": 0.0915, "lr": 1.0203287792781293e-06, "epoch": 2.7209097793955976, "percentage": 54.42, "elapsed_time": "2:23:38", "remaining_time": "2:00:19", "throughput": 8709.51, "total_tokens": 75065832} +{"current_steps": 111380, "total_steps": 204665, "loss": 0.049, "lr": 1.0202435192275404e-06, "epoch": 2.721031930227445, "percentage": 54.42, "elapsed_time": "2:23:39", "remaining_time": "2:00:18", "throughput": 8709.52, "total_tokens": 75068840} +{"current_steps": 111385, "total_steps": 204665, "loss": 0.0844, "lr": 1.0201582590297345e-06, "epoch": 2.721154081059292, "percentage": 54.42, "elapsed_time": "2:23:39", "remaining_time": "2:00:18", "throughput": 8709.53, "total_tokens": 75071912} +{"current_steps": 111390, "total_steps": 204665, "loss": 0.0358, "lr": 1.0200729986853332e-06, "epoch": 2.721276231891139, "percentage": 54.43, "elapsed_time": "2:23:39", "remaining_time": "2:00:18", "throughput": 8709.56, "total_tokens": 75075112} +{"current_steps": 111395, "total_steps": 204665, "loss": 0.1127, "lr": 1.0199877381949552e-06, "epoch": 2.7213983827229864, "percentage": 54.43, "elapsed_time": "2:23:40", "remaining_time": "2:00:17", "throughput": 8709.63, "total_tokens": 75078824} +{"current_steps": 111400, "total_steps": 204665, "loss": 0.0515, "lr": 1.019902477559221e-06, "epoch": 2.7215205335548336, "percentage": 54.43, "elapsed_time": "2:23:40", "remaining_time": "2:00:17", "throughput": 8709.66, "total_tokens": 75082088} +{"current_steps": 111405, "total_steps": 204665, "loss": 0.001, "lr": 1.0198172167787508e-06, "epoch": 2.7216426843866808, "percentage": 54.43, "elapsed_time": "2:23:40", "remaining_time": "2:00:16", "throughput": 8709.69, "total_tokens": 75085416} +{"current_steps": 111410, "total_steps": 204665, "loss": 0.0569, "lr": 1.0197319558541645e-06, "epoch": 2.721764835218528, "percentage": 54.44, "elapsed_time": "2:23:41", "remaining_time": "2:00:16", "throughput": 8709.7, "total_tokens": 75088424} +{"current_steps": 111415, "total_steps": 204665, "loss": 0.0385, "lr": 1.0196466947860819e-06, "epoch": 2.721886986050375, "percentage": 54.44, "elapsed_time": "2:23:41", "remaining_time": "2:00:15", "throughput": 8709.72, "total_tokens": 75091624} +{"current_steps": 111420, "total_steps": 204665, "loss": 0.0003, "lr": 1.0195614335751234e-06, "epoch": 2.722009136882222, "percentage": 54.44, "elapsed_time": "2:23:41", "remaining_time": "2:00:15", "throughput": 8709.81, "total_tokens": 75095528} +{"current_steps": 111425, "total_steps": 204665, "loss": 0.0823, "lr": 1.019476172221909e-06, "epoch": 2.7221312877140695, "percentage": 54.44, "elapsed_time": "2:23:42", "remaining_time": "2:00:15", "throughput": 8709.86, "total_tokens": 75098984} +{"current_steps": 111430, "total_steps": 204665, "loss": 0.069, "lr": 1.0193909107270583e-06, "epoch": 2.7222534385459163, "percentage": 54.45, "elapsed_time": "2:23:42", "remaining_time": "2:00:14", "throughput": 8709.91, "total_tokens": 75102440} +{"current_steps": 111435, "total_steps": 204665, "loss": 0.0002, "lr": 1.019305649091192e-06, "epoch": 2.722375589377764, "percentage": 54.45, "elapsed_time": "2:23:42", "remaining_time": "2:00:14", "throughput": 8709.91, "total_tokens": 75105384} +{"current_steps": 111440, "total_steps": 204665, "loss": 0.0002, "lr": 1.0192203873149299e-06, "epoch": 2.7224977402096107, "percentage": 54.45, "elapsed_time": "2:23:43", "remaining_time": "2:00:13", "throughput": 8709.93, "total_tokens": 75108584} +{"current_steps": 111445, "total_steps": 204665, "loss": 0.0537, "lr": 1.0191351253988915e-06, "epoch": 2.722619891041458, "percentage": 54.45, "elapsed_time": "2:23:43", "remaining_time": "2:00:13", "throughput": 8709.96, "total_tokens": 75111848} +{"current_steps": 111450, "total_steps": 204665, "loss": 0.0003, "lr": 1.0190498633436976e-06, "epoch": 2.722742041873305, "percentage": 54.45, "elapsed_time": "2:23:44", "remaining_time": "2:00:12", "throughput": 8709.97, "total_tokens": 75114984} +{"current_steps": 111455, "total_steps": 204665, "loss": 0.036, "lr": 1.018964601149968e-06, "epoch": 2.7228641927051522, "percentage": 54.46, "elapsed_time": "2:23:44", "remaining_time": "2:00:12", "throughput": 8710.02, "total_tokens": 75118376} +{"current_steps": 111460, "total_steps": 204665, "loss": 0.0004, "lr": 1.0188793388183229e-06, "epoch": 2.7229863435369994, "percentage": 54.46, "elapsed_time": "2:23:44", "remaining_time": "2:00:12", "throughput": 8710.06, "total_tokens": 75121768} +{"current_steps": 111465, "total_steps": 204665, "loss": 0.0001, "lr": 1.0187940763493818e-06, "epoch": 2.7231084943688466, "percentage": 54.46, "elapsed_time": "2:23:45", "remaining_time": "2:00:11", "throughput": 8710.11, "total_tokens": 75125288} +{"current_steps": 111470, "total_steps": 204665, "loss": 0.0005, "lr": 1.0187088137437652e-06, "epoch": 2.723230645200694, "percentage": 54.46, "elapsed_time": "2:23:45", "remaining_time": "2:00:11", "throughput": 8710.16, "total_tokens": 75128808} +{"current_steps": 111475, "total_steps": 204665, "loss": 0.0002, "lr": 1.0186235510020933e-06, "epoch": 2.723352796032541, "percentage": 54.47, "elapsed_time": "2:23:45", "remaining_time": "2:00:10", "throughput": 8710.2, "total_tokens": 75132136} +{"current_steps": 111480, "total_steps": 204665, "loss": 0.0686, "lr": 1.0185382881249857e-06, "epoch": 2.723474946864388, "percentage": 54.47, "elapsed_time": "2:23:46", "remaining_time": "2:00:10", "throughput": 8710.25, "total_tokens": 75135656} +{"current_steps": 111485, "total_steps": 204665, "loss": 0.0898, "lr": 1.0184530251130628e-06, "epoch": 2.7235970976962354, "percentage": 54.47, "elapsed_time": "2:23:46", "remaining_time": "2:00:10", "throughput": 8710.31, "total_tokens": 75139240} +{"current_steps": 111490, "total_steps": 204665, "loss": 0.0002, "lr": 1.0183677619669446e-06, "epoch": 2.7237192485280826, "percentage": 54.47, "elapsed_time": "2:23:46", "remaining_time": "2:00:09", "throughput": 8710.35, "total_tokens": 75142568} +{"current_steps": 111495, "total_steps": 204665, "loss": 0.0829, "lr": 1.0182824986872509e-06, "epoch": 2.7238413993599297, "percentage": 54.48, "elapsed_time": "2:23:47", "remaining_time": "2:00:09", "throughput": 8710.39, "total_tokens": 75145960} +{"current_steps": 111500, "total_steps": 204665, "loss": 0.1665, "lr": 1.0181972352746022e-06, "epoch": 2.723963550191777, "percentage": 54.48, "elapsed_time": "2:23:47", "remaining_time": "2:00:08", "throughput": 8710.42, "total_tokens": 75149160} +{"current_steps": 111505, "total_steps": 204665, "loss": 0.0468, "lr": 1.018111971729618e-06, "epoch": 2.724085701023624, "percentage": 54.48, "elapsed_time": "2:23:47", "remaining_time": "2:00:08", "throughput": 8710.43, "total_tokens": 75152232} +{"current_steps": 111510, "total_steps": 204665, "loss": 0.0844, "lr": 1.0180267080529187e-06, "epoch": 2.7242078518554713, "percentage": 54.48, "elapsed_time": "2:23:48", "remaining_time": "2:00:07", "throughput": 8710.43, "total_tokens": 75155240} +{"current_steps": 111515, "total_steps": 204665, "loss": 0.0002, "lr": 1.0179414442451244e-06, "epoch": 2.724330002687318, "percentage": 54.49, "elapsed_time": "2:23:48", "remaining_time": "2:00:07", "throughput": 8710.45, "total_tokens": 75158376} +{"current_steps": 111520, "total_steps": 204665, "loss": 0.0005, "lr": 1.0178561803068554e-06, "epoch": 2.7244521535191657, "percentage": 54.49, "elapsed_time": "2:23:48", "remaining_time": "2:00:07", "throughput": 8710.47, "total_tokens": 75161512} +{"current_steps": 111525, "total_steps": 204665, "loss": 0.1018, "lr": 1.0177709162387311e-06, "epoch": 2.7245743043510124, "percentage": 54.49, "elapsed_time": "2:23:49", "remaining_time": "2:00:06", "throughput": 8710.53, "total_tokens": 75165096} +{"current_steps": 111530, "total_steps": 204665, "loss": 0.1232, "lr": 1.0176856520413723e-06, "epoch": 2.7246964551828596, "percentage": 54.49, "elapsed_time": "2:23:49", "remaining_time": "2:00:06", "throughput": 8710.59, "total_tokens": 75168680} +{"current_steps": 111535, "total_steps": 204665, "loss": 0.0565, "lr": 1.0176003877153986e-06, "epoch": 2.724818606014707, "percentage": 54.5, "elapsed_time": "2:23:49", "remaining_time": "2:00:05", "throughput": 8710.63, "total_tokens": 75172072} +{"current_steps": 111540, "total_steps": 204665, "loss": 0.0412, "lr": 1.0175151232614296e-06, "epoch": 2.724940756846554, "percentage": 54.5, "elapsed_time": "2:23:50", "remaining_time": "2:00:05", "throughput": 8710.69, "total_tokens": 75175656} +{"current_steps": 111545, "total_steps": 204665, "loss": 0.0868, "lr": 1.0174298586800862e-06, "epoch": 2.725062907678401, "percentage": 54.5, "elapsed_time": "2:23:50", "remaining_time": "2:00:05", "throughput": 8710.78, "total_tokens": 75179560} +{"current_steps": 111550, "total_steps": 204665, "loss": 0.0638, "lr": 1.0173445939719882e-06, "epoch": 2.7251850585102484, "percentage": 54.5, "elapsed_time": "2:23:50", "remaining_time": "2:00:04", "throughput": 8710.83, "total_tokens": 75183016} +{"current_steps": 111555, "total_steps": 204665, "loss": 0.0006, "lr": 1.0172593291377559e-06, "epoch": 2.7253072093420956, "percentage": 54.51, "elapsed_time": "2:23:51", "remaining_time": "2:00:04", "throughput": 8710.89, "total_tokens": 75186664} +{"current_steps": 111560, "total_steps": 204665, "loss": 0.042, "lr": 1.017174064178009e-06, "epoch": 2.7254293601739428, "percentage": 54.51, "elapsed_time": "2:23:51", "remaining_time": "2:00:03", "throughput": 8710.91, "total_tokens": 75189736} +{"current_steps": 111565, "total_steps": 204665, "loss": 0.0351, "lr": 1.0170887990933675e-06, "epoch": 2.72555151100579, "percentage": 54.51, "elapsed_time": "2:23:52", "remaining_time": "2:00:03", "throughput": 8710.93, "total_tokens": 75193000} +{"current_steps": 111570, "total_steps": 204665, "loss": 0.0005, "lr": 1.0170035338844514e-06, "epoch": 2.725673661837637, "percentage": 54.51, "elapsed_time": "2:23:52", "remaining_time": "2:00:02", "throughput": 8710.99, "total_tokens": 75196520} +{"current_steps": 111575, "total_steps": 204665, "loss": 0.0004, "lr": 1.0169182685518817e-06, "epoch": 2.7257958126694843, "percentage": 54.52, "elapsed_time": "2:23:52", "remaining_time": "2:00:02", "throughput": 8711.04, "total_tokens": 75199976} +{"current_steps": 111580, "total_steps": 204665, "loss": 0.029, "lr": 1.0168330030962775e-06, "epoch": 2.7259179635013315, "percentage": 54.52, "elapsed_time": "2:23:53", "remaining_time": "2:00:02", "throughput": 8711.06, "total_tokens": 75203176} +{"current_steps": 111585, "total_steps": 204665, "loss": 0.0004, "lr": 1.0167477375182592e-06, "epoch": 2.7260401143331787, "percentage": 54.52, "elapsed_time": "2:23:53", "remaining_time": "2:00:01", "throughput": 8711.11, "total_tokens": 75206632} +{"current_steps": 111590, "total_steps": 204665, "loss": 0.0851, "lr": 1.0166624718184467e-06, "epoch": 2.726162265165026, "percentage": 54.52, "elapsed_time": "2:23:53", "remaining_time": "2:00:01", "throughput": 8711.13, "total_tokens": 75209832} +{"current_steps": 111595, "total_steps": 204665, "loss": 0.0901, "lr": 1.0165772059974604e-06, "epoch": 2.726284415996873, "percentage": 54.53, "elapsed_time": "2:23:54", "remaining_time": "2:00:00", "throughput": 8711.15, "total_tokens": 75213032} +{"current_steps": 111600, "total_steps": 204665, "loss": 0.0003, "lr": 1.0164919400559202e-06, "epoch": 2.72640656682872, "percentage": 54.53, "elapsed_time": "2:23:54", "remaining_time": "2:00:00", "throughput": 8711.17, "total_tokens": 75216232} +{"current_steps": 111605, "total_steps": 204665, "loss": 0.0001, "lr": 1.016406673994446e-06, "epoch": 2.7265287176605675, "percentage": 54.53, "elapsed_time": "2:23:54", "remaining_time": "1:59:59", "throughput": 8711.2, "total_tokens": 75219560} +{"current_steps": 111610, "total_steps": 204665, "loss": 0.0337, "lr": 1.016321407813658e-06, "epoch": 2.7266508684924142, "percentage": 54.53, "elapsed_time": "2:23:55", "remaining_time": "1:59:59", "throughput": 8711.31, "total_tokens": 75223720} +{"current_steps": 111615, "total_steps": 204665, "loss": 0.0276, "lr": 1.0162361415141766e-06, "epoch": 2.726773019324262, "percentage": 54.54, "elapsed_time": "2:23:55", "remaining_time": "1:59:59", "throughput": 8711.39, "total_tokens": 75227560} +{"current_steps": 111620, "total_steps": 204665, "loss": 0.044, "lr": 1.0161508750966214e-06, "epoch": 2.7268951701561086, "percentage": 54.54, "elapsed_time": "2:23:55", "remaining_time": "1:59:58", "throughput": 8711.42, "total_tokens": 75230888} +{"current_steps": 111625, "total_steps": 204665, "loss": 0.0003, "lr": 1.0160656085616128e-06, "epoch": 2.727017320987956, "percentage": 54.54, "elapsed_time": "2:23:56", "remaining_time": "1:59:58", "throughput": 8711.46, "total_tokens": 75234216} +{"current_steps": 111630, "total_steps": 204665, "loss": 0.0228, "lr": 1.0159803419097708e-06, "epoch": 2.727139471819803, "percentage": 54.54, "elapsed_time": "2:23:56", "remaining_time": "1:59:57", "throughput": 8711.5, "total_tokens": 75237608} +{"current_steps": 111635, "total_steps": 204665, "loss": 0.0006, "lr": 1.0158950751417155e-06, "epoch": 2.72726162265165, "percentage": 54.55, "elapsed_time": "2:23:56", "remaining_time": "1:59:57", "throughput": 8711.49, "total_tokens": 75240488} +{"current_steps": 111640, "total_steps": 204665, "loss": 0.0002, "lr": 1.0158098082580669e-06, "epoch": 2.7273837734834974, "percentage": 54.55, "elapsed_time": "2:23:57", "remaining_time": "1:59:57", "throughput": 8711.52, "total_tokens": 75243816} +{"current_steps": 111645, "total_steps": 204665, "loss": 0.0294, "lr": 1.015724541259445e-06, "epoch": 2.7275059243153446, "percentage": 54.55, "elapsed_time": "2:23:57", "remaining_time": "1:59:56", "throughput": 8711.54, "total_tokens": 75246952} +{"current_steps": 111650, "total_steps": 204665, "loss": 0.0006, "lr": 1.01563927414647e-06, "epoch": 2.7276280751471917, "percentage": 54.55, "elapsed_time": "2:23:57", "remaining_time": "1:59:56", "throughput": 8711.57, "total_tokens": 75250216} +{"current_steps": 111655, "total_steps": 204665, "loss": 0.0515, "lr": 1.0155540069197623e-06, "epoch": 2.727750225979039, "percentage": 54.56, "elapsed_time": "2:23:58", "remaining_time": "1:59:55", "throughput": 8711.6, "total_tokens": 75253480} +{"current_steps": 111660, "total_steps": 204665, "loss": 0.0007, "lr": 1.0154687395799415e-06, "epoch": 2.727872376810886, "percentage": 54.56, "elapsed_time": "2:23:58", "remaining_time": "1:59:55", "throughput": 8711.64, "total_tokens": 75256936} +{"current_steps": 111665, "total_steps": 204665, "loss": 0.0002, "lr": 1.0153834721276276e-06, "epoch": 2.7279945276427333, "percentage": 54.56, "elapsed_time": "2:23:59", "remaining_time": "1:59:54", "throughput": 8711.69, "total_tokens": 75260392} +{"current_steps": 111670, "total_steps": 204665, "loss": 0.054, "lr": 1.0152982045634411e-06, "epoch": 2.7281166784745805, "percentage": 54.56, "elapsed_time": "2:23:59", "remaining_time": "1:59:54", "throughput": 8711.73, "total_tokens": 75263784} +{"current_steps": 111675, "total_steps": 204665, "loss": 0.0001, "lr": 1.015212936888002e-06, "epoch": 2.7282388293064277, "percentage": 54.56, "elapsed_time": "2:23:59", "remaining_time": "1:59:54", "throughput": 8711.73, "total_tokens": 75266664} +{"current_steps": 111680, "total_steps": 204665, "loss": 0.0001, "lr": 1.0151276691019304e-06, "epoch": 2.728360980138275, "percentage": 54.57, "elapsed_time": "2:24:00", "remaining_time": "1:59:53", "throughput": 8711.81, "total_tokens": 75270504} +{"current_steps": 111685, "total_steps": 204665, "loss": 0.1038, "lr": 1.0150424012058466e-06, "epoch": 2.728483130970122, "percentage": 54.57, "elapsed_time": "2:24:00", "remaining_time": "1:59:53", "throughput": 8711.88, "total_tokens": 75274280} +{"current_steps": 111690, "total_steps": 204665, "loss": 0.001, "lr": 1.0149571332003702e-06, "epoch": 2.7286052818019693, "percentage": 54.57, "elapsed_time": "2:24:00", "remaining_time": "1:59:52", "throughput": 8711.92, "total_tokens": 75277672} +{"current_steps": 111695, "total_steps": 204665, "loss": 0.068, "lr": 1.014871865086121e-06, "epoch": 2.728727432633816, "percentage": 54.57, "elapsed_time": "2:24:01", "remaining_time": "1:59:52", "throughput": 8711.96, "total_tokens": 75281000} +{"current_steps": 111700, "total_steps": 204665, "loss": 0.0001, "lr": 1.01478659686372e-06, "epoch": 2.7288495834656636, "percentage": 54.58, "elapsed_time": "2:24:01", "remaining_time": "1:59:52", "throughput": 8712.03, "total_tokens": 75284712} +{"current_steps": 111705, "total_steps": 204665, "loss": 0.0002, "lr": 1.0147013285337868e-06, "epoch": 2.7289717342975104, "percentage": 54.58, "elapsed_time": "2:24:01", "remaining_time": "1:59:51", "throughput": 8712.08, "total_tokens": 75288168} +{"current_steps": 111710, "total_steps": 204665, "loss": 0.0541, "lr": 1.0146160600969419e-06, "epoch": 2.7290938851293576, "percentage": 54.58, "elapsed_time": "2:24:02", "remaining_time": "1:59:51", "throughput": 8712.11, "total_tokens": 75291432} +{"current_steps": 111715, "total_steps": 204665, "loss": 0.0503, "lr": 1.0145307915538047e-06, "epoch": 2.7292160359612048, "percentage": 54.58, "elapsed_time": "2:24:02", "remaining_time": "1:59:50", "throughput": 8712.16, "total_tokens": 75294952} +{"current_steps": 111720, "total_steps": 204665, "loss": 0.002, "lr": 1.014445522904996e-06, "epoch": 2.729338186793052, "percentage": 54.59, "elapsed_time": "2:24:02", "remaining_time": "1:59:50", "throughput": 8712.17, "total_tokens": 75298024} +{"current_steps": 111725, "total_steps": 204665, "loss": 0.0644, "lr": 1.014360254151135e-06, "epoch": 2.729460337624899, "percentage": 54.59, "elapsed_time": "2:24:03", "remaining_time": "1:59:49", "throughput": 8712.23, "total_tokens": 75301544} +{"current_steps": 111730, "total_steps": 204665, "loss": 0.0001, "lr": 1.014274985292843e-06, "epoch": 2.7295824884567463, "percentage": 54.59, "elapsed_time": "2:24:03", "remaining_time": "1:59:49", "throughput": 8712.28, "total_tokens": 75305128} +{"current_steps": 111735, "total_steps": 204665, "loss": 0.0358, "lr": 1.0141897163307394e-06, "epoch": 2.7297046392885935, "percentage": 54.59, "elapsed_time": "2:24:03", "remaining_time": "1:59:49", "throughput": 8712.32, "total_tokens": 75308520} +{"current_steps": 111740, "total_steps": 204665, "loss": 0.0006, "lr": 1.0141044472654441e-06, "epoch": 2.7298267901204407, "percentage": 54.6, "elapsed_time": "2:24:04", "remaining_time": "1:59:48", "throughput": 8712.34, "total_tokens": 75311656} +{"current_steps": 111745, "total_steps": 204665, "loss": 0.0003, "lr": 1.0140191780975776e-06, "epoch": 2.729948940952288, "percentage": 54.6, "elapsed_time": "2:24:04", "remaining_time": "1:59:48", "throughput": 8712.37, "total_tokens": 75314920} +{"current_steps": 111750, "total_steps": 204665, "loss": 0.0012, "lr": 1.0139339088277599e-06, "epoch": 2.730071091784135, "percentage": 54.6, "elapsed_time": "2:24:04", "remaining_time": "1:59:47", "throughput": 8712.43, "total_tokens": 75318504} +{"current_steps": 111755, "total_steps": 204665, "loss": 0.0693, "lr": 1.013848639456611e-06, "epoch": 2.7301932426159823, "percentage": 54.6, "elapsed_time": "2:24:05", "remaining_time": "1:59:47", "throughput": 8712.44, "total_tokens": 75321640} +{"current_steps": 111760, "total_steps": 204665, "loss": 0.0515, "lr": 1.0137633699847507e-06, "epoch": 2.7303153934478295, "percentage": 54.61, "elapsed_time": "2:24:05", "remaining_time": "1:59:47", "throughput": 8712.51, "total_tokens": 75325288} +{"current_steps": 111765, "total_steps": 204665, "loss": 0.0002, "lr": 1.0136781004128e-06, "epoch": 2.7304375442796767, "percentage": 54.61, "elapsed_time": "2:24:05", "remaining_time": "1:59:46", "throughput": 8712.56, "total_tokens": 75328808} +{"current_steps": 111770, "total_steps": 204665, "loss": 0.0646, "lr": 1.0135928307413785e-06, "epoch": 2.730559695111524, "percentage": 54.61, "elapsed_time": "2:24:06", "remaining_time": "1:59:46", "throughput": 8712.62, "total_tokens": 75332392} +{"current_steps": 111775, "total_steps": 204665, "loss": 0.0001, "lr": 1.013507560971106e-06, "epoch": 2.730681845943371, "percentage": 54.61, "elapsed_time": "2:24:06", "remaining_time": "1:59:45", "throughput": 8712.69, "total_tokens": 75336104} +{"current_steps": 111780, "total_steps": 204665, "loss": 0.0465, "lr": 1.013422291102603e-06, "epoch": 2.730803996775218, "percentage": 54.62, "elapsed_time": "2:24:07", "remaining_time": "1:59:45", "throughput": 8712.72, "total_tokens": 75339368} +{"current_steps": 111785, "total_steps": 204665, "loss": 0.044, "lr": 1.0133370211364892e-06, "epoch": 2.7309261476070654, "percentage": 54.62, "elapsed_time": "2:24:07", "remaining_time": "1:59:44", "throughput": 8712.75, "total_tokens": 75342568} +{"current_steps": 111790, "total_steps": 204665, "loss": 0.0007, "lr": 1.0132517510733853e-06, "epoch": 2.731048298438912, "percentage": 54.62, "elapsed_time": "2:24:07", "remaining_time": "1:59:44", "throughput": 8712.76, "total_tokens": 75345704} +{"current_steps": 111795, "total_steps": 204665, "loss": 0.1245, "lr": 1.0131664809139111e-06, "epoch": 2.73117044927076, "percentage": 54.62, "elapsed_time": "2:24:08", "remaining_time": "1:59:44", "throughput": 8712.83, "total_tokens": 75349288} +{"current_steps": 111800, "total_steps": 204665, "loss": 0.0002, "lr": 1.0130812106586868e-06, "epoch": 2.7312926001026065, "percentage": 54.63, "elapsed_time": "2:24:08", "remaining_time": "1:59:43", "throughput": 8712.86, "total_tokens": 75352552} +{"current_steps": 111805, "total_steps": 204665, "loss": 0.0465, "lr": 1.012995940308332e-06, "epoch": 2.7314147509344537, "percentage": 54.63, "elapsed_time": "2:24:08", "remaining_time": "1:59:43", "throughput": 8712.89, "total_tokens": 75355816} +{"current_steps": 111810, "total_steps": 204665, "loss": 0.0003, "lr": 1.0129106698634676e-06, "epoch": 2.731536901766301, "percentage": 54.63, "elapsed_time": "2:24:09", "remaining_time": "1:59:42", "throughput": 8712.91, "total_tokens": 75359016} +{"current_steps": 111815, "total_steps": 204665, "loss": 0.0834, "lr": 1.0128253993247132e-06, "epoch": 2.731659052598148, "percentage": 54.63, "elapsed_time": "2:24:09", "remaining_time": "1:59:42", "throughput": 8712.93, "total_tokens": 75362216} +{"current_steps": 111820, "total_steps": 204665, "loss": 0.0003, "lr": 1.012740128692689e-06, "epoch": 2.7317812034299953, "percentage": 54.64, "elapsed_time": "2:24:09", "remaining_time": "1:59:42", "throughput": 8712.98, "total_tokens": 75365672} +{"current_steps": 111825, "total_steps": 204665, "loss": 0.1318, "lr": 1.0126548579680154e-06, "epoch": 2.7319033542618425, "percentage": 54.64, "elapsed_time": "2:24:10", "remaining_time": "1:59:41", "throughput": 8713.0, "total_tokens": 75368936} +{"current_steps": 111830, "total_steps": 204665, "loss": 0.0329, "lr": 1.012569587151312e-06, "epoch": 2.7320255050936897, "percentage": 54.64, "elapsed_time": "2:24:10", "remaining_time": "1:59:41", "throughput": 8713.04, "total_tokens": 75372328} +{"current_steps": 111835, "total_steps": 204665, "loss": 0.0027, "lr": 1.0124843162431994e-06, "epoch": 2.732147655925537, "percentage": 54.64, "elapsed_time": "2:24:10", "remaining_time": "1:59:40", "throughput": 8713.07, "total_tokens": 75375528} +{"current_steps": 111840, "total_steps": 204665, "loss": 0.0477, "lr": 1.0123990452442977e-06, "epoch": 2.732269806757384, "percentage": 54.65, "elapsed_time": "2:24:11", "remaining_time": "1:59:40", "throughput": 8713.07, "total_tokens": 75378472} +{"current_steps": 111845, "total_steps": 204665, "loss": 0.0479, "lr": 1.0123137741552264e-06, "epoch": 2.7323919575892313, "percentage": 54.65, "elapsed_time": "2:24:11", "remaining_time": "1:59:39", "throughput": 8713.1, "total_tokens": 75381736} +{"current_steps": 111850, "total_steps": 204665, "loss": 0.0001, "lr": 1.012228502976606e-06, "epoch": 2.7325141084210784, "percentage": 54.65, "elapsed_time": "2:24:11", "remaining_time": "1:59:39", "throughput": 8713.14, "total_tokens": 75385128} +{"current_steps": 111855, "total_steps": 204665, "loss": 0.0001, "lr": 1.0121432317090568e-06, "epoch": 2.7326362592529256, "percentage": 54.65, "elapsed_time": "2:24:12", "remaining_time": "1:59:39", "throughput": 8713.16, "total_tokens": 75388264} +{"current_steps": 111860, "total_steps": 204665, "loss": 0.0334, "lr": 1.0120579603531987e-06, "epoch": 2.732758410084773, "percentage": 54.66, "elapsed_time": "2:24:12", "remaining_time": "1:59:38", "throughput": 8713.23, "total_tokens": 75391912} +{"current_steps": 111865, "total_steps": 204665, "loss": 0.0002, "lr": 1.0119726889096518e-06, "epoch": 2.7328805609166196, "percentage": 54.66, "elapsed_time": "2:24:12", "remaining_time": "1:59:38", "throughput": 8713.27, "total_tokens": 75395304} +{"current_steps": 111870, "total_steps": 204665, "loss": 0.0001, "lr": 1.0118874173790364e-06, "epoch": 2.733002711748467, "percentage": 54.66, "elapsed_time": "2:24:13", "remaining_time": "1:59:37", "throughput": 8713.29, "total_tokens": 75398440} +{"current_steps": 111875, "total_steps": 204665, "loss": 0.0004, "lr": 1.0118021457619725e-06, "epoch": 2.733124862580314, "percentage": 54.66, "elapsed_time": "2:24:13", "remaining_time": "1:59:37", "throughput": 8713.34, "total_tokens": 75401896} +{"current_steps": 111880, "total_steps": 204665, "loss": 0.1077, "lr": 1.01171687405908e-06, "epoch": 2.7332470134121616, "percentage": 54.66, "elapsed_time": "2:24:13", "remaining_time": "1:59:36", "throughput": 8713.36, "total_tokens": 75405096} +{"current_steps": 111885, "total_steps": 204665, "loss": 0.0356, "lr": 1.0116316022709794e-06, "epoch": 2.7333691642440083, "percentage": 54.67, "elapsed_time": "2:24:14", "remaining_time": "1:59:36", "throughput": 8713.44, "total_tokens": 75408936} +{"current_steps": 111890, "total_steps": 204665, "loss": 0.0008, "lr": 1.0115463303982909e-06, "epoch": 2.7334913150758555, "percentage": 54.67, "elapsed_time": "2:24:14", "remaining_time": "1:59:36", "throughput": 8713.48, "total_tokens": 75412328} +{"current_steps": 111895, "total_steps": 204665, "loss": 0.0011, "lr": 1.0114610584416342e-06, "epoch": 2.7336134659077027, "percentage": 54.67, "elapsed_time": "2:24:15", "remaining_time": "1:59:35", "throughput": 8713.53, "total_tokens": 75415784} +{"current_steps": 111900, "total_steps": 204665, "loss": 0.0411, "lr": 1.0113757864016298e-06, "epoch": 2.73373561673955, "percentage": 54.67, "elapsed_time": "2:24:15", "remaining_time": "1:59:35", "throughput": 8713.58, "total_tokens": 75419304} +{"current_steps": 111905, "total_steps": 204665, "loss": 0.0423, "lr": 1.0112905142788973e-06, "epoch": 2.733857767571397, "percentage": 54.68, "elapsed_time": "2:24:15", "remaining_time": "1:59:34", "throughput": 8713.58, "total_tokens": 75422248} +{"current_steps": 111910, "total_steps": 204665, "loss": 0.0229, "lr": 1.0112052420740573e-06, "epoch": 2.7339799184032443, "percentage": 54.68, "elapsed_time": "2:24:16", "remaining_time": "1:59:34", "throughput": 8713.6, "total_tokens": 75425384} +{"current_steps": 111915, "total_steps": 204665, "loss": 0.1241, "lr": 1.0111199697877295e-06, "epoch": 2.7341020692350915, "percentage": 54.68, "elapsed_time": "2:24:16", "remaining_time": "1:59:34", "throughput": 8713.62, "total_tokens": 75428456} +{"current_steps": 111920, "total_steps": 204665, "loss": 0.0039, "lr": 1.0110346974205344e-06, "epoch": 2.7342242200669387, "percentage": 54.68, "elapsed_time": "2:24:16", "remaining_time": "1:59:33", "throughput": 8713.71, "total_tokens": 75432424} +{"current_steps": 111925, "total_steps": 204665, "loss": 0.0002, "lr": 1.010949424973092e-06, "epoch": 2.734346370898786, "percentage": 54.69, "elapsed_time": "2:24:17", "remaining_time": "1:59:33", "throughput": 8713.76, "total_tokens": 75435880} +{"current_steps": 111930, "total_steps": 204665, "loss": 0.0335, "lr": 1.0108641524460227e-06, "epoch": 2.734468521730633, "percentage": 54.69, "elapsed_time": "2:24:17", "remaining_time": "1:59:32", "throughput": 8713.82, "total_tokens": 75439528} +{"current_steps": 111935, "total_steps": 204665, "loss": 0.0004, "lr": 1.010778879839946e-06, "epoch": 2.7345906725624802, "percentage": 54.69, "elapsed_time": "2:24:17", "remaining_time": "1:59:32", "throughput": 8713.85, "total_tokens": 75442792} +{"current_steps": 111940, "total_steps": 204665, "loss": 0.0001, "lr": 1.0106936071554828e-06, "epoch": 2.7347128233943274, "percentage": 54.69, "elapsed_time": "2:24:18", "remaining_time": "1:59:31", "throughput": 8713.95, "total_tokens": 75446760} +{"current_steps": 111945, "total_steps": 204665, "loss": 0.0312, "lr": 1.0106083343932527e-06, "epoch": 2.7348349742261746, "percentage": 54.7, "elapsed_time": "2:24:18", "remaining_time": "1:59:31", "throughput": 8714.02, "total_tokens": 75450536} +{"current_steps": 111950, "total_steps": 204665, "loss": 0.0496, "lr": 1.0105230615538757e-06, "epoch": 2.734957125058022, "percentage": 54.7, "elapsed_time": "2:24:18", "remaining_time": "1:59:31", "throughput": 8714.11, "total_tokens": 75454376} +{"current_steps": 111955, "total_steps": 204665, "loss": 0.0975, "lr": 1.0104377886379725e-06, "epoch": 2.735079275889869, "percentage": 54.7, "elapsed_time": "2:24:19", "remaining_time": "1:59:30", "throughput": 8714.15, "total_tokens": 75457832} +{"current_steps": 111960, "total_steps": 204665, "loss": 0.0478, "lr": 1.0103525156461628e-06, "epoch": 2.7352014267217157, "percentage": 54.7, "elapsed_time": "2:24:19", "remaining_time": "1:59:30", "throughput": 8714.25, "total_tokens": 75461800} +{"current_steps": 111965, "total_steps": 204665, "loss": 0.0003, "lr": 1.0102672425790665e-06, "epoch": 2.7353235775535634, "percentage": 54.71, "elapsed_time": "2:24:19", "remaining_time": "1:59:29", "throughput": 8714.27, "total_tokens": 75464936} +{"current_steps": 111970, "total_steps": 204665, "loss": 0.0001, "lr": 1.0101819694373045e-06, "epoch": 2.73544572838541, "percentage": 54.71, "elapsed_time": "2:24:20", "remaining_time": "1:59:29", "throughput": 8714.32, "total_tokens": 75468456} +{"current_steps": 111975, "total_steps": 204665, "loss": 0.0001, "lr": 1.0100966962214959e-06, "epoch": 2.7355678792172573, "percentage": 54.71, "elapsed_time": "2:24:20", "remaining_time": "1:59:29", "throughput": 8714.38, "total_tokens": 75472040} +{"current_steps": 111980, "total_steps": 204665, "loss": 0.176, "lr": 1.0100114229322618e-06, "epoch": 2.7356900300491045, "percentage": 54.71, "elapsed_time": "2:24:20", "remaining_time": "1:59:28", "throughput": 8714.43, "total_tokens": 75475560} +{"current_steps": 111985, "total_steps": 204665, "loss": 0.0004, "lr": 1.009926149570222e-06, "epoch": 2.7358121808809517, "percentage": 54.72, "elapsed_time": "2:24:21", "remaining_time": "1:59:28", "throughput": 8714.47, "total_tokens": 75478952} +{"current_steps": 111990, "total_steps": 204665, "loss": 0.0003, "lr": 1.0098408761359965e-06, "epoch": 2.735934331712799, "percentage": 54.72, "elapsed_time": "2:24:21", "remaining_time": "1:59:27", "throughput": 8714.49, "total_tokens": 75482088} +{"current_steps": 111995, "total_steps": 204665, "loss": 0.0492, "lr": 1.0097556026302056e-06, "epoch": 2.736056482544646, "percentage": 54.72, "elapsed_time": "2:24:22", "remaining_time": "1:59:27", "throughput": 8714.51, "total_tokens": 75485160} +{"current_steps": 112000, "total_steps": 204665, "loss": 0.0853, "lr": 1.0096703290534693e-06, "epoch": 2.7361786333764933, "percentage": 54.72, "elapsed_time": "2:24:22", "remaining_time": "1:59:26", "throughput": 8714.52, "total_tokens": 75488232} +{"current_steps": 112005, "total_steps": 204665, "loss": 0.0004, "lr": 1.0095850554064074e-06, "epoch": 2.7363007842083404, "percentage": 54.73, "elapsed_time": "2:24:22", "remaining_time": "1:59:26", "throughput": 8714.51, "total_tokens": 75491112} +{"current_steps": 112010, "total_steps": 204665, "loss": 0.0004, "lr": 1.0094997816896407e-06, "epoch": 2.7364229350401876, "percentage": 54.73, "elapsed_time": "2:24:23", "remaining_time": "1:59:26", "throughput": 8714.52, "total_tokens": 75494120} +{"current_steps": 112015, "total_steps": 204665, "loss": 0.0351, "lr": 1.009414507903789e-06, "epoch": 2.736545085872035, "percentage": 54.73, "elapsed_time": "2:24:23", "remaining_time": "1:59:25", "throughput": 8714.53, "total_tokens": 75497192} +{"current_steps": 112020, "total_steps": 204665, "loss": 0.0548, "lr": 1.0093292340494726e-06, "epoch": 2.736667236703882, "percentage": 54.73, "elapsed_time": "2:24:23", "remaining_time": "1:59:25", "throughput": 8714.59, "total_tokens": 75500840} +{"current_steps": 112025, "total_steps": 204665, "loss": 0.0271, "lr": 1.0092439601273112e-06, "epoch": 2.736789387535729, "percentage": 54.74, "elapsed_time": "2:24:24", "remaining_time": "1:59:24", "throughput": 8714.63, "total_tokens": 75504168} +{"current_steps": 112030, "total_steps": 204665, "loss": 0.0604, "lr": 1.0091586861379256e-06, "epoch": 2.7369115383675764, "percentage": 54.74, "elapsed_time": "2:24:24", "remaining_time": "1:59:24", "throughput": 8714.66, "total_tokens": 75507432} +{"current_steps": 112035, "total_steps": 204665, "loss": 0.0002, "lr": 1.0090734120819353e-06, "epoch": 2.7370336891994236, "percentage": 54.74, "elapsed_time": "2:24:24", "remaining_time": "1:59:23", "throughput": 8714.67, "total_tokens": 75510504} +{"current_steps": 112040, "total_steps": 204665, "loss": 0.115, "lr": 1.0089881379599605e-06, "epoch": 2.7371558400312708, "percentage": 54.74, "elapsed_time": "2:24:25", "remaining_time": "1:59:23", "throughput": 8714.68, "total_tokens": 75513576} +{"current_steps": 112045, "total_steps": 204665, "loss": 0.0002, "lr": 1.0089028637726223e-06, "epoch": 2.7372779908631175, "percentage": 54.75, "elapsed_time": "2:24:25", "remaining_time": "1:59:23", "throughput": 8714.74, "total_tokens": 75517160} +{"current_steps": 112050, "total_steps": 204665, "loss": 0.1897, "lr": 1.0088175895205396e-06, "epoch": 2.737400141694965, "percentage": 54.75, "elapsed_time": "2:24:25", "remaining_time": "1:59:22", "throughput": 8714.8, "total_tokens": 75520744} +{"current_steps": 112055, "total_steps": 204665, "loss": 0.0008, "lr": 1.008732315204333e-06, "epoch": 2.737522292526812, "percentage": 54.75, "elapsed_time": "2:24:26", "remaining_time": "1:59:22", "throughput": 8714.82, "total_tokens": 75523944} +{"current_steps": 112060, "total_steps": 204665, "loss": 0.0002, "lr": 1.0086470408246225e-06, "epoch": 2.7376444433586595, "percentage": 54.75, "elapsed_time": "2:24:26", "remaining_time": "1:59:21", "throughput": 8714.91, "total_tokens": 75527848} +{"current_steps": 112065, "total_steps": 204665, "loss": 0.0832, "lr": 1.0085617663820288e-06, "epoch": 2.7377665941905063, "percentage": 54.76, "elapsed_time": "2:24:26", "remaining_time": "1:59:21", "throughput": 8714.91, "total_tokens": 75530856} +{"current_steps": 112070, "total_steps": 204665, "loss": 0.0334, "lr": 1.0084764918771711e-06, "epoch": 2.7378887450223535, "percentage": 54.76, "elapsed_time": "2:24:27", "remaining_time": "1:59:21", "throughput": 8714.98, "total_tokens": 75534504} +{"current_steps": 112075, "total_steps": 204665, "loss": 0.0002, "lr": 1.0083912173106703e-06, "epoch": 2.7380108958542007, "percentage": 54.76, "elapsed_time": "2:24:27", "remaining_time": "1:59:20", "throughput": 8715.0, "total_tokens": 75537704} +{"current_steps": 112080, "total_steps": 204665, "loss": 0.0529, "lr": 1.0083059426831466e-06, "epoch": 2.738133046686048, "percentage": 54.76, "elapsed_time": "2:24:27", "remaining_time": "1:59:20", "throughput": 8715.0, "total_tokens": 75540712} +{"current_steps": 112085, "total_steps": 204665, "loss": 0.0003, "lr": 1.0082206679952197e-06, "epoch": 2.738255197517895, "percentage": 54.77, "elapsed_time": "2:24:28", "remaining_time": "1:59:19", "throughput": 8715.03, "total_tokens": 75543912} +{"current_steps": 112090, "total_steps": 204665, "loss": 0.077, "lr": 1.00813539324751e-06, "epoch": 2.7383773483497422, "percentage": 54.77, "elapsed_time": "2:24:28", "remaining_time": "1:59:19", "throughput": 8715.1, "total_tokens": 75547560} +{"current_steps": 112095, "total_steps": 204665, "loss": 0.0003, "lr": 1.0080501184406372e-06, "epoch": 2.7384994991815894, "percentage": 54.77, "elapsed_time": "2:24:28", "remaining_time": "1:59:18", "throughput": 8715.11, "total_tokens": 75550632} +{"current_steps": 112100, "total_steps": 204665, "loss": 0.0004, "lr": 1.007964843575222e-06, "epoch": 2.7386216500134366, "percentage": 54.77, "elapsed_time": "2:24:29", "remaining_time": "1:59:18", "throughput": 8715.16, "total_tokens": 75554088} +{"current_steps": 112105, "total_steps": 204665, "loss": 0.0002, "lr": 1.007879568651884e-06, "epoch": 2.738743800845284, "percentage": 54.77, "elapsed_time": "2:24:29", "remaining_time": "1:59:18", "throughput": 8715.2, "total_tokens": 75557480} +{"current_steps": 112110, "total_steps": 204665, "loss": 0.1363, "lr": 1.007794293671244e-06, "epoch": 2.738865951677131, "percentage": 54.78, "elapsed_time": "2:24:29", "remaining_time": "1:59:17", "throughput": 8715.28, "total_tokens": 75561320} +{"current_steps": 112115, "total_steps": 204665, "loss": 0.0579, "lr": 1.0077090186339218e-06, "epoch": 2.738988102508978, "percentage": 54.78, "elapsed_time": "2:24:30", "remaining_time": "1:59:17", "throughput": 8715.31, "total_tokens": 75564648} +{"current_steps": 112120, "total_steps": 204665, "loss": 0.0003, "lr": 1.0076237435405374e-06, "epoch": 2.7391102533408254, "percentage": 54.78, "elapsed_time": "2:24:30", "remaining_time": "1:59:16", "throughput": 8715.34, "total_tokens": 75567912} +{"current_steps": 112125, "total_steps": 204665, "loss": 0.0162, "lr": 1.0075384683917111e-06, "epoch": 2.7392324041726726, "percentage": 54.78, "elapsed_time": "2:24:31", "remaining_time": "1:59:16", "throughput": 8715.38, "total_tokens": 75571240} +{"current_steps": 112130, "total_steps": 204665, "loss": 0.0249, "lr": 1.007453193188063e-06, "epoch": 2.7393545550045197, "percentage": 54.79, "elapsed_time": "2:24:31", "remaining_time": "1:59:16", "throughput": 8715.43, "total_tokens": 75574696} +{"current_steps": 112135, "total_steps": 204665, "loss": 0.1026, "lr": 1.0073679179302133e-06, "epoch": 2.739476705836367, "percentage": 54.79, "elapsed_time": "2:24:31", "remaining_time": "1:59:15", "throughput": 8715.44, "total_tokens": 75577832} +{"current_steps": 112140, "total_steps": 204665, "loss": 0.0002, "lr": 1.0072826426187821e-06, "epoch": 2.7395988566682137, "percentage": 54.79, "elapsed_time": "2:24:32", "remaining_time": "1:59:15", "throughput": 8715.48, "total_tokens": 75581224} +{"current_steps": 112145, "total_steps": 204665, "loss": 0.0561, "lr": 1.0071973672543898e-06, "epoch": 2.7397210075000613, "percentage": 54.79, "elapsed_time": "2:24:32", "remaining_time": "1:59:14", "throughput": 8715.54, "total_tokens": 75584744} +{"current_steps": 112150, "total_steps": 204665, "loss": 0.0002, "lr": 1.0071120918376563e-06, "epoch": 2.739843158331908, "percentage": 54.8, "elapsed_time": "2:24:32", "remaining_time": "1:59:14", "throughput": 8715.58, "total_tokens": 75588200} +{"current_steps": 112155, "total_steps": 204665, "loss": 0.0543, "lr": 1.0070268163692017e-06, "epoch": 2.7399653091637552, "percentage": 54.8, "elapsed_time": "2:24:33", "remaining_time": "1:59:13", "throughput": 8715.59, "total_tokens": 75591208} +{"current_steps": 112160, "total_steps": 204665, "loss": 0.0554, "lr": 1.0069415408496458e-06, "epoch": 2.7400874599956024, "percentage": 54.8, "elapsed_time": "2:24:33", "remaining_time": "1:59:13", "throughput": 8715.61, "total_tokens": 75594408} +{"current_steps": 112165, "total_steps": 204665, "loss": 0.0006, "lr": 1.0068562652796095e-06, "epoch": 2.7402096108274496, "percentage": 54.8, "elapsed_time": "2:24:33", "remaining_time": "1:59:13", "throughput": 8715.68, "total_tokens": 75598120} +{"current_steps": 112170, "total_steps": 204665, "loss": 0.0003, "lr": 1.0067709896597126e-06, "epoch": 2.740331761659297, "percentage": 54.81, "elapsed_time": "2:24:34", "remaining_time": "1:59:12", "throughput": 8715.69, "total_tokens": 75601192} +{"current_steps": 112175, "total_steps": 204665, "loss": 0.0003, "lr": 1.0066857139905752e-06, "epoch": 2.740453912491144, "percentage": 54.81, "elapsed_time": "2:24:34", "remaining_time": "1:59:12", "throughput": 8715.71, "total_tokens": 75604328} +{"current_steps": 112180, "total_steps": 204665, "loss": 0.048, "lr": 1.0066004382728176e-06, "epoch": 2.740576063322991, "percentage": 54.81, "elapsed_time": "2:24:34", "remaining_time": "1:59:11", "throughput": 8715.76, "total_tokens": 75607784} +{"current_steps": 112185, "total_steps": 204665, "loss": 0.0001, "lr": 1.0065151625070595e-06, "epoch": 2.7406982141548384, "percentage": 54.81, "elapsed_time": "2:24:35", "remaining_time": "1:59:11", "throughput": 8715.78, "total_tokens": 75610984} +{"current_steps": 112190, "total_steps": 204665, "loss": 0.0001, "lr": 1.0064298866939216e-06, "epoch": 2.7408203649866856, "percentage": 54.82, "elapsed_time": "2:24:35", "remaining_time": "1:59:10", "throughput": 8715.85, "total_tokens": 75614632} +{"current_steps": 112195, "total_steps": 204665, "loss": 0.0069, "lr": 1.0063446108340236e-06, "epoch": 2.7409425158185328, "percentage": 54.82, "elapsed_time": "2:24:35", "remaining_time": "1:59:10", "throughput": 8715.88, "total_tokens": 75617960} +{"current_steps": 112200, "total_steps": 204665, "loss": 0.0001, "lr": 1.0062593349279865e-06, "epoch": 2.74106466665038, "percentage": 54.82, "elapsed_time": "2:24:36", "remaining_time": "1:59:10", "throughput": 8715.95, "total_tokens": 75621672} +{"current_steps": 112205, "total_steps": 204665, "loss": 0.0379, "lr": 1.0061740589764294e-06, "epoch": 2.741186817482227, "percentage": 54.82, "elapsed_time": "2:24:36", "remaining_time": "1:59:09", "throughput": 8715.99, "total_tokens": 75625000} +{"current_steps": 112210, "total_steps": 204665, "loss": 0.0185, "lr": 1.0060887829799728e-06, "epoch": 2.7413089683140743, "percentage": 54.83, "elapsed_time": "2:24:36", "remaining_time": "1:59:09", "throughput": 8716.03, "total_tokens": 75628392} +{"current_steps": 112215, "total_steps": 204665, "loss": 0.0003, "lr": 1.0060035069392371e-06, "epoch": 2.7414311191459215, "percentage": 54.83, "elapsed_time": "2:24:37", "remaining_time": "1:59:08", "throughput": 8716.06, "total_tokens": 75631720} +{"current_steps": 112220, "total_steps": 204665, "loss": 0.0847, "lr": 1.0059182308548424e-06, "epoch": 2.7415532699777687, "percentage": 54.83, "elapsed_time": "2:24:37", "remaining_time": "1:59:08", "throughput": 8716.13, "total_tokens": 75635432} +{"current_steps": 112225, "total_steps": 204665, "loss": 0.0001, "lr": 1.0058329547274083e-06, "epoch": 2.7416754208096155, "percentage": 54.83, "elapsed_time": "2:24:37", "remaining_time": "1:59:08", "throughput": 8716.17, "total_tokens": 75638824} +{"current_steps": 112230, "total_steps": 204665, "loss": 0.0539, "lr": 1.0057476785575555e-06, "epoch": 2.741797571641463, "percentage": 54.84, "elapsed_time": "2:24:38", "remaining_time": "1:59:07", "throughput": 8716.23, "total_tokens": 75642472} +{"current_steps": 112235, "total_steps": 204665, "loss": 0.0001, "lr": 1.0056624023459045e-06, "epoch": 2.74191972247331, "percentage": 54.84, "elapsed_time": "2:24:38", "remaining_time": "1:59:07", "throughput": 8716.34, "total_tokens": 75646504} +{"current_steps": 112240, "total_steps": 204665, "loss": 0.0002, "lr": 1.0055771260930745e-06, "epoch": 2.7420418733051575, "percentage": 54.84, "elapsed_time": "2:24:39", "remaining_time": "1:59:06", "throughput": 8716.4, "total_tokens": 75650152} +{"current_steps": 112245, "total_steps": 204665, "loss": 0.1006, "lr": 1.0054918497996865e-06, "epoch": 2.742164024137004, "percentage": 54.84, "elapsed_time": "2:24:39", "remaining_time": "1:59:06", "throughput": 8716.42, "total_tokens": 75653288} +{"current_steps": 112250, "total_steps": 204665, "loss": 0.0779, "lr": 1.00540657346636e-06, "epoch": 2.7422861749688514, "percentage": 54.85, "elapsed_time": "2:24:39", "remaining_time": "1:59:06", "throughput": 8716.45, "total_tokens": 75656616} +{"current_steps": 112255, "total_steps": 204665, "loss": 0.0001, "lr": 1.0053212970937157e-06, "epoch": 2.7424083258006986, "percentage": 54.85, "elapsed_time": "2:24:40", "remaining_time": "1:59:05", "throughput": 8716.48, "total_tokens": 75659944} +{"current_steps": 112260, "total_steps": 204665, "loss": 0.0383, "lr": 1.0052360206823733e-06, "epoch": 2.742530476632546, "percentage": 54.85, "elapsed_time": "2:24:40", "remaining_time": "1:59:05", "throughput": 8716.49, "total_tokens": 75663016} +{"current_steps": 112265, "total_steps": 204665, "loss": 0.0728, "lr": 1.0051507442329533e-06, "epoch": 2.742652627464393, "percentage": 54.85, "elapsed_time": "2:24:40", "remaining_time": "1:59:04", "throughput": 8716.54, "total_tokens": 75666472} +{"current_steps": 112270, "total_steps": 204665, "loss": 0.1776, "lr": 1.0050654677460754e-06, "epoch": 2.74277477829624, "percentage": 54.86, "elapsed_time": "2:24:41", "remaining_time": "1:59:04", "throughput": 8716.56, "total_tokens": 75669672} +{"current_steps": 112275, "total_steps": 204665, "loss": 0.0558, "lr": 1.0049801912223603e-06, "epoch": 2.7428969291280874, "percentage": 54.86, "elapsed_time": "2:24:41", "remaining_time": "1:59:03", "throughput": 8716.57, "total_tokens": 75672744} +{"current_steps": 112280, "total_steps": 204665, "loss": 0.1883, "lr": 1.004894914662428e-06, "epoch": 2.7430190799599345, "percentage": 54.86, "elapsed_time": "2:24:41", "remaining_time": "1:59:03", "throughput": 8716.61, "total_tokens": 75676072} +{"current_steps": 112285, "total_steps": 204665, "loss": 0.1013, "lr": 1.0048096380668982e-06, "epoch": 2.7431412307917817, "percentage": 54.86, "elapsed_time": "2:24:42", "remaining_time": "1:59:03", "throughput": 8716.62, "total_tokens": 75679208} +{"current_steps": 112290, "total_steps": 204665, "loss": 0.0001, "lr": 1.0047243614363916e-06, "epoch": 2.743263381623629, "percentage": 54.87, "elapsed_time": "2:24:42", "remaining_time": "1:59:02", "throughput": 8716.7, "total_tokens": 75682984} +{"current_steps": 112295, "total_steps": 204665, "loss": 0.0477, "lr": 1.0046390847715282e-06, "epoch": 2.743385532455476, "percentage": 54.87, "elapsed_time": "2:24:42", "remaining_time": "1:59:02", "throughput": 8716.76, "total_tokens": 75686568} +{"current_steps": 112300, "total_steps": 204665, "loss": 0.0003, "lr": 1.0045538080729283e-06, "epoch": 2.7435076832873233, "percentage": 54.87, "elapsed_time": "2:24:43", "remaining_time": "1:59:01", "throughput": 8716.79, "total_tokens": 75689896} +{"current_steps": 112305, "total_steps": 204665, "loss": 0.0502, "lr": 1.004468531341212e-06, "epoch": 2.7436298341191705, "percentage": 54.87, "elapsed_time": "2:24:43", "remaining_time": "1:59:01", "throughput": 8716.84, "total_tokens": 75693416} +{"current_steps": 112310, "total_steps": 204665, "loss": 0.0489, "lr": 1.004383254576999e-06, "epoch": 2.7437519849510172, "percentage": 54.88, "elapsed_time": "2:24:43", "remaining_time": "1:59:00", "throughput": 8716.85, "total_tokens": 75696488} +{"current_steps": 112315, "total_steps": 204665, "loss": 0.1186, "lr": 1.00429797778091e-06, "epoch": 2.743874135782865, "percentage": 54.88, "elapsed_time": "2:24:44", "remaining_time": "1:59:00", "throughput": 8716.88, "total_tokens": 75699752} +{"current_steps": 112320, "total_steps": 204665, "loss": 0.056, "lr": 1.0042127009535647e-06, "epoch": 2.7439962866147116, "percentage": 54.88, "elapsed_time": "2:24:44", "remaining_time": "1:59:00", "throughput": 8716.86, "total_tokens": 75702568} +{"current_steps": 112325, "total_steps": 204665, "loss": 0.1368, "lr": 1.0041274240955834e-06, "epoch": 2.7441184374465593, "percentage": 54.88, "elapsed_time": "2:24:44", "remaining_time": "1:58:59", "throughput": 8716.89, "total_tokens": 75705832} +{"current_steps": 112330, "total_steps": 204665, "loss": 0.0002, "lr": 1.0040421472075865e-06, "epoch": 2.744240588278406, "percentage": 54.88, "elapsed_time": "2:24:45", "remaining_time": "1:58:59", "throughput": 8716.93, "total_tokens": 75709224} +{"current_steps": 112335, "total_steps": 204665, "loss": 0.1433, "lr": 1.0039568702901942e-06, "epoch": 2.744362739110253, "percentage": 54.89, "elapsed_time": "2:24:45", "remaining_time": "1:58:58", "throughput": 8716.95, "total_tokens": 75712488} +{"current_steps": 112340, "total_steps": 204665, "loss": 0.0005, "lr": 1.0038715933440265e-06, "epoch": 2.7444848899421004, "percentage": 54.89, "elapsed_time": "2:24:46", "remaining_time": "1:58:58", "throughput": 8717.0, "total_tokens": 75715944} +{"current_steps": 112345, "total_steps": 204665, "loss": 0.0003, "lr": 1.0037863163697034e-06, "epoch": 2.7446070407739476, "percentage": 54.89, "elapsed_time": "2:24:46", "remaining_time": "1:58:58", "throughput": 8717.0, "total_tokens": 75718952} +{"current_steps": 112350, "total_steps": 204665, "loss": 0.0397, "lr": 1.0037010393678449e-06, "epoch": 2.7447291916057948, "percentage": 54.89, "elapsed_time": "2:24:46", "remaining_time": "1:58:57", "throughput": 8717.02, "total_tokens": 75722152} +{"current_steps": 112355, "total_steps": 204665, "loss": 0.0005, "lr": 1.003615762339072e-06, "epoch": 2.744851342437642, "percentage": 54.9, "elapsed_time": "2:24:47", "remaining_time": "1:58:57", "throughput": 8717.02, "total_tokens": 75725032} +{"current_steps": 112360, "total_steps": 204665, "loss": 0.0392, "lr": 1.0035304852840042e-06, "epoch": 2.744973493269489, "percentage": 54.9, "elapsed_time": "2:24:47", "remaining_time": "1:58:56", "throughput": 8717.03, "total_tokens": 75728168} +{"current_steps": 112365, "total_steps": 204665, "loss": 0.0927, "lr": 1.0034452082032615e-06, "epoch": 2.7450956441013363, "percentage": 54.9, "elapsed_time": "2:24:47", "remaining_time": "1:58:56", "throughput": 8717.03, "total_tokens": 75731112} +{"current_steps": 112370, "total_steps": 204665, "loss": 0.0002, "lr": 1.0033599310974645e-06, "epoch": 2.7452177949331835, "percentage": 54.9, "elapsed_time": "2:24:48", "remaining_time": "1:58:55", "throughput": 8717.05, "total_tokens": 75734312} +{"current_steps": 112375, "total_steps": 204665, "loss": 0.0007, "lr": 1.003274653967233e-06, "epoch": 2.7453399457650307, "percentage": 54.91, "elapsed_time": "2:24:48", "remaining_time": "1:58:55", "throughput": 8717.09, "total_tokens": 75737704} +{"current_steps": 112380, "total_steps": 204665, "loss": 0.0004, "lr": 1.0031893768131874e-06, "epoch": 2.745462096596878, "percentage": 54.91, "elapsed_time": "2:24:48", "remaining_time": "1:58:55", "throughput": 8717.13, "total_tokens": 75741096} +{"current_steps": 112385, "total_steps": 204665, "loss": 0.0006, "lr": 1.0031040996359478e-06, "epoch": 2.745584247428725, "percentage": 54.91, "elapsed_time": "2:24:49", "remaining_time": "1:58:54", "throughput": 8717.16, "total_tokens": 75744424} +{"current_steps": 112390, "total_steps": 204665, "loss": 0.0002, "lr": 1.0030188224361344e-06, "epoch": 2.7457063982605723, "percentage": 54.91, "elapsed_time": "2:24:49", "remaining_time": "1:58:54", "throughput": 8717.24, "total_tokens": 75748136} +{"current_steps": 112395, "total_steps": 204665, "loss": 0.0344, "lr": 1.0029335452143673e-06, "epoch": 2.7458285490924195, "percentage": 54.92, "elapsed_time": "2:24:49", "remaining_time": "1:58:53", "throughput": 8717.44, "total_tokens": 75753384} +{"current_steps": 112400, "total_steps": 204665, "loss": 0.0005, "lr": 1.0028482679712667e-06, "epoch": 2.7459506999242667, "percentage": 54.92, "elapsed_time": "2:24:50", "remaining_time": "1:58:53", "throughput": 8717.47, "total_tokens": 75756648} +{"current_steps": 112405, "total_steps": 204665, "loss": 0.0003, "lr": 1.0027629907074527e-06, "epoch": 2.7460728507561134, "percentage": 54.92, "elapsed_time": "2:24:50", "remaining_time": "1:58:53", "throughput": 8717.51, "total_tokens": 75760104} +{"current_steps": 112410, "total_steps": 204665, "loss": 0.0717, "lr": 1.0026777134235456e-06, "epoch": 2.746195001587961, "percentage": 54.92, "elapsed_time": "2:24:50", "remaining_time": "1:58:52", "throughput": 8717.56, "total_tokens": 75763624} +{"current_steps": 112415, "total_steps": 204665, "loss": 0.0823, "lr": 1.0025924361201652e-06, "epoch": 2.746317152419808, "percentage": 54.93, "elapsed_time": "2:24:51", "remaining_time": "1:58:52", "throughput": 8717.57, "total_tokens": 75766632} +{"current_steps": 112420, "total_steps": 204665, "loss": 0.0001, "lr": 1.0025071587979322e-06, "epoch": 2.7464393032516554, "percentage": 54.93, "elapsed_time": "2:24:51", "remaining_time": "1:58:51", "throughput": 8717.58, "total_tokens": 75769768} +{"current_steps": 112425, "total_steps": 204665, "loss": 0.0004, "lr": 1.0024218814574664e-06, "epoch": 2.746561454083502, "percentage": 54.93, "elapsed_time": "2:24:51", "remaining_time": "1:58:51", "throughput": 8717.62, "total_tokens": 75773096} +{"current_steps": 112430, "total_steps": 204665, "loss": 0.0716, "lr": 1.0023366040993876e-06, "epoch": 2.7466836049153494, "percentage": 54.93, "elapsed_time": "2:24:52", "remaining_time": "1:58:50", "throughput": 8717.65, "total_tokens": 75776424} +{"current_steps": 112435, "total_steps": 204665, "loss": 0.0001, "lr": 1.0022513267243169e-06, "epoch": 2.7468057557471965, "percentage": 54.94, "elapsed_time": "2:24:52", "remaining_time": "1:58:50", "throughput": 8717.67, "total_tokens": 75779560} +{"current_steps": 112440, "total_steps": 204665, "loss": 0.0013, "lr": 1.0021660493328737e-06, "epoch": 2.7469279065790437, "percentage": 54.94, "elapsed_time": "2:24:52", "remaining_time": "1:58:50", "throughput": 8717.72, "total_tokens": 75783080} +{"current_steps": 112445, "total_steps": 204665, "loss": 0.046, "lr": 1.0020807719256784e-06, "epoch": 2.747050057410891, "percentage": 54.94, "elapsed_time": "2:24:53", "remaining_time": "1:58:49", "throughput": 8717.75, "total_tokens": 75786408} +{"current_steps": 112450, "total_steps": 204665, "loss": 0.0004, "lr": 1.0019954945033513e-06, "epoch": 2.747172208242738, "percentage": 54.94, "elapsed_time": "2:24:53", "remaining_time": "1:58:49", "throughput": 8717.76, "total_tokens": 75789480} +{"current_steps": 112455, "total_steps": 204665, "loss": 0.0313, "lr": 1.0019102170665124e-06, "epoch": 2.7472943590745853, "percentage": 54.95, "elapsed_time": "2:24:54", "remaining_time": "1:58:48", "throughput": 8717.81, "total_tokens": 75792936} +{"current_steps": 112460, "total_steps": 204665, "loss": 0.1355, "lr": 1.0018249396157818e-06, "epoch": 2.7474165099064325, "percentage": 54.95, "elapsed_time": "2:24:54", "remaining_time": "1:58:48", "throughput": 8717.83, "total_tokens": 75796136} +{"current_steps": 112465, "total_steps": 204665, "loss": 0.0003, "lr": 1.00173966215178e-06, "epoch": 2.7475386607382797, "percentage": 54.95, "elapsed_time": "2:24:54", "remaining_time": "1:58:48", "throughput": 8717.84, "total_tokens": 75799208} +{"current_steps": 112470, "total_steps": 204665, "loss": 0.0389, "lr": 1.0016543846751265e-06, "epoch": 2.747660811570127, "percentage": 54.95, "elapsed_time": "2:24:55", "remaining_time": "1:58:47", "throughput": 8717.88, "total_tokens": 75802536} +{"current_steps": 112475, "total_steps": 204665, "loss": 0.0, "lr": 1.001569107186442e-06, "epoch": 2.747782962401974, "percentage": 54.96, "elapsed_time": "2:24:55", "remaining_time": "1:58:47", "throughput": 8717.95, "total_tokens": 75806312} +{"current_steps": 112480, "total_steps": 204665, "loss": 0.0008, "lr": 1.0014838296863467e-06, "epoch": 2.7479051132338213, "percentage": 54.96, "elapsed_time": "2:24:55", "remaining_time": "1:58:46", "throughput": 8718.04, "total_tokens": 75810216} +{"current_steps": 112485, "total_steps": 204665, "loss": 0.0002, "lr": 1.0013985521754606e-06, "epoch": 2.7480272640656684, "percentage": 54.96, "elapsed_time": "2:24:56", "remaining_time": "1:58:46", "throughput": 8718.08, "total_tokens": 75813672} +{"current_steps": 112490, "total_steps": 204665, "loss": 0.0526, "lr": 1.0013132746544038e-06, "epoch": 2.748149414897515, "percentage": 54.96, "elapsed_time": "2:24:56", "remaining_time": "1:58:45", "throughput": 8718.13, "total_tokens": 75817192} +{"current_steps": 112495, "total_steps": 204665, "loss": 0.0005, "lr": 1.0012279971237965e-06, "epoch": 2.748271565729363, "percentage": 54.97, "elapsed_time": "2:24:56", "remaining_time": "1:58:45", "throughput": 8718.14, "total_tokens": 75820264} +{"current_steps": 112500, "total_steps": 204665, "loss": 0.0785, "lr": 1.0011427195842589e-06, "epoch": 2.7483937165612096, "percentage": 54.97, "elapsed_time": "2:24:57", "remaining_time": "1:58:45", "throughput": 8718.15, "total_tokens": 75823272} +{"current_steps": 112505, "total_steps": 204665, "loss": 0.0461, "lr": 1.0010574420364108e-06, "epoch": 2.748515867393057, "percentage": 54.97, "elapsed_time": "2:24:57", "remaining_time": "1:58:44", "throughput": 8718.16, "total_tokens": 75826344} +{"current_steps": 112510, "total_steps": 204665, "loss": 0.0008, "lr": 1.0009721644808734e-06, "epoch": 2.748638018224904, "percentage": 54.97, "elapsed_time": "2:24:57", "remaining_time": "1:58:44", "throughput": 8718.19, "total_tokens": 75829672} +{"current_steps": 112515, "total_steps": 204665, "loss": 0.0408, "lr": 1.0008868869182656e-06, "epoch": 2.748760169056751, "percentage": 54.98, "elapsed_time": "2:24:58", "remaining_time": "1:58:43", "throughput": 8718.24, "total_tokens": 75833192} +{"current_steps": 112520, "total_steps": 204665, "loss": 0.0003, "lr": 1.0008016093492082e-06, "epoch": 2.7488823198885983, "percentage": 54.98, "elapsed_time": "2:24:58", "remaining_time": "1:58:43", "throughput": 8718.32, "total_tokens": 75836968} +{"current_steps": 112525, "total_steps": 204665, "loss": 0.0, "lr": 1.0007163317743214e-06, "epoch": 2.7490044707204455, "percentage": 54.98, "elapsed_time": "2:24:58", "remaining_time": "1:58:43", "throughput": 8718.33, "total_tokens": 75840040} +{"current_steps": 112530, "total_steps": 204665, "loss": 0.025, "lr": 1.000631054194225e-06, "epoch": 2.7491266215522927, "percentage": 54.98, "elapsed_time": "2:24:59", "remaining_time": "1:58:42", "throughput": 8718.37, "total_tokens": 75843432} +{"current_steps": 112535, "total_steps": 204665, "loss": 0.0011, "lr": 1.0005457766095395e-06, "epoch": 2.74924877238414, "percentage": 54.98, "elapsed_time": "2:24:59", "remaining_time": "1:58:42", "throughput": 8718.41, "total_tokens": 75846824} +{"current_steps": 112540, "total_steps": 204665, "loss": 0.0434, "lr": 1.000460499020885e-06, "epoch": 2.749370923215987, "percentage": 54.99, "elapsed_time": "2:24:59", "remaining_time": "1:58:41", "throughput": 8718.46, "total_tokens": 75850344} +{"current_steps": 112545, "total_steps": 204665, "loss": 0.0944, "lr": 1.0003752214288818e-06, "epoch": 2.7494930740478343, "percentage": 54.99, "elapsed_time": "2:25:00", "remaining_time": "1:58:41", "throughput": 8718.54, "total_tokens": 75854120} +{"current_steps": 112550, "total_steps": 204665, "loss": 0.0541, "lr": 1.0002899438341498e-06, "epoch": 2.7496152248796815, "percentage": 54.99, "elapsed_time": "2:25:00", "remaining_time": "1:58:40", "throughput": 8718.57, "total_tokens": 75857384} +{"current_steps": 112555, "total_steps": 204665, "loss": 0.0001, "lr": 1.0002046662373092e-06, "epoch": 2.7497373757115287, "percentage": 54.99, "elapsed_time": "2:25:01", "remaining_time": "1:58:40", "throughput": 8718.62, "total_tokens": 75860968} +{"current_steps": 112560, "total_steps": 204665, "loss": 0.0002, "lr": 1.0001193886389803e-06, "epoch": 2.749859526543376, "percentage": 55.0, "elapsed_time": "2:25:01", "remaining_time": "1:58:40", "throughput": 8718.68, "total_tokens": 75864552} +{"current_steps": 112565, "total_steps": 204665, "loss": 0.0001, "lr": 1.000034111039783e-06, "epoch": 2.749981677375223, "percentage": 55.0, "elapsed_time": "2:25:01", "remaining_time": "1:58:39", "throughput": 8718.7, "total_tokens": 75867752} +{"current_steps": 112570, "total_steps": 204665, "loss": 0.0008, "lr": 9.99948833440338e-07, "epoch": 2.7501038282070702, "percentage": 55.0, "elapsed_time": "2:25:02", "remaining_time": "1:58:39", "throughput": 8718.77, "total_tokens": 75871400} +{"current_steps": 112574, "total_steps": 204665, "eval_loss": 0.20390097796916962, "epoch": 2.750201548872548, "percentage": 55.0, "elapsed_time": "2:25:49", "remaining_time": "1:59:17", "throughput": 8671.38, "total_tokens": 75874280} +{"current_steps": 112575, "total_steps": 204665, "loss": 0.0001, "lr": 9.998635558412646e-07, "epoch": 2.7502259790389174, "percentage": 55.0, "elapsed_time": "2:26:22", "remaining_time": "1:59:44", "throughput": 8639.07, "total_tokens": 75874856} +{"current_steps": 112580, "total_steps": 204665, "loss": 0.0653, "lr": 9.997782782431837e-07, "epoch": 2.7503481298707646, "percentage": 55.01, "elapsed_time": "2:26:23", "remaining_time": "1:59:44", "throughput": 8639.11, "total_tokens": 75878184} +{"current_steps": 112585, "total_steps": 204665, "loss": 0.0, "lr": 9.996930006467153e-07, "epoch": 2.7504702807026113, "percentage": 55.01, "elapsed_time": "2:26:23", "remaining_time": "1:59:43", "throughput": 8639.14, "total_tokens": 75881448} +{"current_steps": 112590, "total_steps": 204665, "loss": 0.0685, "lr": 9.996077230524793e-07, "epoch": 2.750592431534459, "percentage": 55.01, "elapsed_time": "2:26:23", "remaining_time": "1:59:43", "throughput": 8639.14, "total_tokens": 75884392} +{"current_steps": 112595, "total_steps": 204665, "loss": 0.0523, "lr": 9.995224454610963e-07, "epoch": 2.7507145823663057, "percentage": 55.01, "elapsed_time": "2:26:24", "remaining_time": "1:59:42", "throughput": 8639.16, "total_tokens": 75887464} +{"current_steps": 112600, "total_steps": 204665, "loss": 0.0001, "lr": 9.994371678731857e-07, "epoch": 2.750836733198153, "percentage": 55.02, "elapsed_time": "2:26:24", "remaining_time": "1:59:42", "throughput": 8639.17, "total_tokens": 75890472} +{"current_steps": 112605, "total_steps": 204665, "loss": 0.0001, "lr": 9.993518902893688e-07, "epoch": 2.75095888403, "percentage": 55.02, "elapsed_time": "2:26:24", "remaining_time": "1:59:42", "throughput": 8639.21, "total_tokens": 75893800} +{"current_steps": 112610, "total_steps": 204665, "loss": 0.1098, "lr": 9.992666127102648e-07, "epoch": 2.7510810348618473, "percentage": 55.02, "elapsed_time": "2:26:25", "remaining_time": "1:59:41", "throughput": 8639.25, "total_tokens": 75897192} +{"current_steps": 112615, "total_steps": 204665, "loss": 0.0004, "lr": 9.991813351364941e-07, "epoch": 2.7512031856936945, "percentage": 55.02, "elapsed_time": "2:26:25", "remaining_time": "1:59:41", "throughput": 8639.28, "total_tokens": 75900392} +{"current_steps": 112620, "total_steps": 204665, "loss": 0.0393, "lr": 9.990960575686773e-07, "epoch": 2.7513253365255417, "percentage": 55.03, "elapsed_time": "2:26:25", "remaining_time": "1:59:40", "throughput": 8639.35, "total_tokens": 75904040} +{"current_steps": 112625, "total_steps": 204665, "loss": 0.0002, "lr": 9.990107800074338e-07, "epoch": 2.751447487357389, "percentage": 55.03, "elapsed_time": "2:26:26", "remaining_time": "1:59:40", "throughput": 8639.39, "total_tokens": 75907432} +{"current_steps": 112630, "total_steps": 204665, "loss": 0.0374, "lr": 9.989255024533846e-07, "epoch": 2.751569638189236, "percentage": 55.03, "elapsed_time": "2:26:26", "remaining_time": "1:59:39", "throughput": 8639.43, "total_tokens": 75910824} +{"current_steps": 112635, "total_steps": 204665, "loss": 0.1025, "lr": 9.98840224907149e-07, "epoch": 2.7516917890210832, "percentage": 55.03, "elapsed_time": "2:26:26", "remaining_time": "1:59:39", "throughput": 8639.47, "total_tokens": 75914088} +{"current_steps": 112640, "total_steps": 204665, "loss": 0.0001, "lr": 9.98754947369348e-07, "epoch": 2.7518139398529304, "percentage": 55.04, "elapsed_time": "2:26:27", "remaining_time": "1:59:39", "throughput": 8639.49, "total_tokens": 75917224} +{"current_steps": 112645, "total_steps": 204665, "loss": 0.0677, "lr": 9.98669669840601e-07, "epoch": 2.7519360906847776, "percentage": 55.04, "elapsed_time": "2:26:27", "remaining_time": "1:59:38", "throughput": 8639.55, "total_tokens": 75920808} +{"current_steps": 112650, "total_steps": 204665, "loss": 0.0001, "lr": 9.985843923215284e-07, "epoch": 2.752058241516625, "percentage": 55.04, "elapsed_time": "2:26:27", "remaining_time": "1:59:38", "throughput": 8639.62, "total_tokens": 75924520} +{"current_steps": 112655, "total_steps": 204665, "loss": 0.0261, "lr": 9.98499114812751e-07, "epoch": 2.752180392348472, "percentage": 55.04, "elapsed_time": "2:26:28", "remaining_time": "1:59:37", "throughput": 8639.63, "total_tokens": 75927528} +{"current_steps": 112660, "total_steps": 204665, "loss": 0.0002, "lr": 9.98413837314888e-07, "epoch": 2.752302543180319, "percentage": 55.05, "elapsed_time": "2:26:28", "remaining_time": "1:59:37", "throughput": 8639.64, "total_tokens": 75930536} +{"current_steps": 112665, "total_steps": 204665, "loss": 0.0001, "lr": 9.983285598285606e-07, "epoch": 2.7524246940121664, "percentage": 55.05, "elapsed_time": "2:26:28", "remaining_time": "1:59:36", "throughput": 8639.71, "total_tokens": 75934248} +{"current_steps": 112670, "total_steps": 204665, "loss": 0.0701, "lr": 9.98243282354388e-07, "epoch": 2.752546844844013, "percentage": 55.05, "elapsed_time": "2:26:29", "remaining_time": "1:59:36", "throughput": 8639.73, "total_tokens": 75937320} +{"current_steps": 112675, "total_steps": 204665, "loss": 0.095, "lr": 9.981580048929904e-07, "epoch": 2.7526689956758608, "percentage": 55.05, "elapsed_time": "2:26:29", "remaining_time": "1:59:36", "throughput": 8639.71, "total_tokens": 75940072} +{"current_steps": 112680, "total_steps": 204665, "loss": 0.0005, "lr": 9.980727274449886e-07, "epoch": 2.7527911465077075, "percentage": 55.06, "elapsed_time": "2:26:30", "remaining_time": "1:59:35", "throughput": 8639.79, "total_tokens": 75943848} +{"current_steps": 112685, "total_steps": 204665, "loss": 0.0933, "lr": 9.979874500110023e-07, "epoch": 2.752913297339555, "percentage": 55.06, "elapsed_time": "2:26:30", "remaining_time": "1:59:35", "throughput": 8639.81, "total_tokens": 75946984} +{"current_steps": 112690, "total_steps": 204665, "loss": 0.0004, "lr": 9.979021725916521e-07, "epoch": 2.753035448171402, "percentage": 55.06, "elapsed_time": "2:26:30", "remaining_time": "1:59:34", "throughput": 8639.83, "total_tokens": 75950120} +{"current_steps": 112695, "total_steps": 204665, "loss": 0.0001, "lr": 9.978168951875576e-07, "epoch": 2.753157599003249, "percentage": 55.06, "elapsed_time": "2:26:31", "remaining_time": "1:59:34", "throughput": 8639.87, "total_tokens": 75953512} +{"current_steps": 112700, "total_steps": 204665, "loss": 0.0721, "lr": 9.977316177993395e-07, "epoch": 2.7532797498350963, "percentage": 55.07, "elapsed_time": "2:26:31", "remaining_time": "1:59:33", "throughput": 8639.89, "total_tokens": 75956584} +{"current_steps": 112705, "total_steps": 204665, "loss": 0.0661, "lr": 9.976463404276173e-07, "epoch": 2.7534019006669435, "percentage": 55.07, "elapsed_time": "2:26:31", "remaining_time": "1:59:33", "throughput": 8639.95, "total_tokens": 75960168} +{"current_steps": 112710, "total_steps": 204665, "loss": 0.0001, "lr": 9.975610630730118e-07, "epoch": 2.7535240514987906, "percentage": 55.07, "elapsed_time": "2:26:32", "remaining_time": "1:59:33", "throughput": 8640.02, "total_tokens": 75963816} +{"current_steps": 112715, "total_steps": 204665, "loss": 0.0405, "lr": 9.97475785736143e-07, "epoch": 2.753646202330638, "percentage": 55.07, "elapsed_time": "2:26:32", "remaining_time": "1:59:32", "throughput": 8640.05, "total_tokens": 75967016} +{"current_steps": 112720, "total_steps": 204665, "loss": 0.0001, "lr": 9.973905084176307e-07, "epoch": 2.753768353162485, "percentage": 55.08, "elapsed_time": "2:26:32", "remaining_time": "1:59:32", "throughput": 8640.11, "total_tokens": 75970600} +{"current_steps": 112725, "total_steps": 204665, "loss": 0.1143, "lr": 9.973052311180956e-07, "epoch": 2.753890503994332, "percentage": 55.08, "elapsed_time": "2:26:33", "remaining_time": "1:59:31", "throughput": 8640.13, "total_tokens": 75973736} +{"current_steps": 112730, "total_steps": 204665, "loss": 0.0002, "lr": 9.972199538381573e-07, "epoch": 2.7540126548261794, "percentage": 55.08, "elapsed_time": "2:26:33", "remaining_time": "1:59:31", "throughput": 8640.18, "total_tokens": 75977192} +{"current_steps": 112735, "total_steps": 204665, "loss": 0.0349, "lr": 9.97134676578436e-07, "epoch": 2.7541348056580266, "percentage": 55.08, "elapsed_time": "2:26:33", "remaining_time": "1:59:30", "throughput": 8640.21, "total_tokens": 75980520} +{"current_steps": 112740, "total_steps": 204665, "loss": 0.2066, "lr": 9.970493993395527e-07, "epoch": 2.754256956489874, "percentage": 55.09, "elapsed_time": "2:26:34", "remaining_time": "1:59:30", "throughput": 8640.27, "total_tokens": 75984104} +{"current_steps": 112745, "total_steps": 204665, "loss": 0.0005, "lr": 9.969641221221267e-07, "epoch": 2.754379107321721, "percentage": 55.09, "elapsed_time": "2:26:34", "remaining_time": "1:59:30", "throughput": 8640.3, "total_tokens": 75987368} +{"current_steps": 112750, "total_steps": 204665, "loss": 0.0365, "lr": 9.968788449267786e-07, "epoch": 2.754501258153568, "percentage": 55.09, "elapsed_time": "2:26:34", "remaining_time": "1:59:29", "throughput": 8640.33, "total_tokens": 75990632} +{"current_steps": 112755, "total_steps": 204665, "loss": 0.0005, "lr": 9.96793567754128e-07, "epoch": 2.7546234089854154, "percentage": 55.09, "elapsed_time": "2:26:35", "remaining_time": "1:59:29", "throughput": 8640.4, "total_tokens": 75994216} +{"current_steps": 112760, "total_steps": 204665, "loss": 0.0001, "lr": 9.967082906047958e-07, "epoch": 2.7547455598172625, "percentage": 55.09, "elapsed_time": "2:26:35", "remaining_time": "1:59:28", "throughput": 8640.43, "total_tokens": 75997480} +{"current_steps": 112765, "total_steps": 204665, "loss": 0.0611, "lr": 9.966230134794017e-07, "epoch": 2.7548677106491093, "percentage": 55.1, "elapsed_time": "2:26:35", "remaining_time": "1:59:28", "throughput": 8640.46, "total_tokens": 76000744} +{"current_steps": 112770, "total_steps": 204665, "loss": 0.0002, "lr": 9.965377363785657e-07, "epoch": 2.754989861480957, "percentage": 55.1, "elapsed_time": "2:26:36", "remaining_time": "1:59:27", "throughput": 8640.54, "total_tokens": 76004456} +{"current_steps": 112775, "total_steps": 204665, "loss": 0.146, "lr": 9.964524593029089e-07, "epoch": 2.7551120123128037, "percentage": 55.1, "elapsed_time": "2:26:36", "remaining_time": "1:59:27", "throughput": 8640.56, "total_tokens": 76007656} +{"current_steps": 112780, "total_steps": 204665, "loss": 0.0002, "lr": 9.963671822530499e-07, "epoch": 2.755234163144651, "percentage": 55.1, "elapsed_time": "2:26:36", "remaining_time": "1:59:27", "throughput": 8640.6, "total_tokens": 76010920} +{"current_steps": 112785, "total_steps": 204665, "loss": 0.0003, "lr": 9.962819052296105e-07, "epoch": 2.755356313976498, "percentage": 55.11, "elapsed_time": "2:26:37", "remaining_time": "1:59:26", "throughput": 8640.65, "total_tokens": 76014440} +{"current_steps": 112790, "total_steps": 204665, "loss": 0.0998, "lr": 9.961966282332093e-07, "epoch": 2.7554784648083452, "percentage": 55.11, "elapsed_time": "2:26:37", "remaining_time": "1:59:26", "throughput": 8640.68, "total_tokens": 76017704} +{"current_steps": 112795, "total_steps": 204665, "loss": 0.0668, "lr": 9.96111351264468e-07, "epoch": 2.7556006156401924, "percentage": 55.11, "elapsed_time": "2:26:37", "remaining_time": "1:59:25", "throughput": 8640.69, "total_tokens": 76020648} +{"current_steps": 112800, "total_steps": 204665, "loss": 0.0536, "lr": 9.960260743240054e-07, "epoch": 2.7557227664720396, "percentage": 55.11, "elapsed_time": "2:26:38", "remaining_time": "1:59:25", "throughput": 8640.69, "total_tokens": 76023592} +{"current_steps": 112805, "total_steps": 204665, "loss": 0.0483, "lr": 9.959407974124423e-07, "epoch": 2.755844917303887, "percentage": 55.12, "elapsed_time": "2:26:38", "remaining_time": "1:59:24", "throughput": 8640.69, "total_tokens": 76026536} +{"current_steps": 112810, "total_steps": 204665, "loss": 0.0415, "lr": 9.958555205303992e-07, "epoch": 2.755967068135734, "percentage": 55.12, "elapsed_time": "2:26:39", "remaining_time": "1:59:24", "throughput": 8640.72, "total_tokens": 76029800} +{"current_steps": 112815, "total_steps": 204665, "loss": 0.0215, "lr": 9.957702436784956e-07, "epoch": 2.756089218967581, "percentage": 55.12, "elapsed_time": "2:26:39", "remaining_time": "1:59:24", "throughput": 8640.72, "total_tokens": 76032616} +{"current_steps": 112820, "total_steps": 204665, "loss": 0.1356, "lr": 9.95684966857352e-07, "epoch": 2.7562113697994284, "percentage": 55.12, "elapsed_time": "2:26:39", "remaining_time": "1:59:23", "throughput": 8640.73, "total_tokens": 76035624} +{"current_steps": 112825, "total_steps": 204665, "loss": 0.001, "lr": 9.955996900675888e-07, "epoch": 2.7563335206312756, "percentage": 55.13, "elapsed_time": "2:26:40", "remaining_time": "1:59:23", "throughput": 8640.75, "total_tokens": 76038760} +{"current_steps": 112830, "total_steps": 204665, "loss": 0.0445, "lr": 9.955144133098253e-07, "epoch": 2.7564556714631228, "percentage": 55.13, "elapsed_time": "2:26:40", "remaining_time": "1:59:22", "throughput": 8640.81, "total_tokens": 76042344} +{"current_steps": 112835, "total_steps": 204665, "loss": 0.0004, "lr": 9.954291365846825e-07, "epoch": 2.75657782229497, "percentage": 55.13, "elapsed_time": "2:26:40", "remaining_time": "1:59:22", "throughput": 8640.84, "total_tokens": 76045608} +{"current_steps": 112840, "total_steps": 204665, "loss": 0.0006, "lr": 9.953438598927801e-07, "epoch": 2.756699973126817, "percentage": 55.13, "elapsed_time": "2:26:41", "remaining_time": "1:59:21", "throughput": 8640.86, "total_tokens": 76048744} +{"current_steps": 112845, "total_steps": 204665, "loss": 0.0003, "lr": 9.952585832347387e-07, "epoch": 2.7568221239586643, "percentage": 55.14, "elapsed_time": "2:26:41", "remaining_time": "1:59:21", "throughput": 8640.87, "total_tokens": 76051816} +{"current_steps": 112850, "total_steps": 204665, "loss": 0.0392, "lr": 9.951733066111776e-07, "epoch": 2.756944274790511, "percentage": 55.14, "elapsed_time": "2:26:41", "remaining_time": "1:59:21", "throughput": 8640.89, "total_tokens": 76054952} +{"current_steps": 112855, "total_steps": 204665, "loss": 0.1102, "lr": 9.950880300227183e-07, "epoch": 2.7570664256223587, "percentage": 55.14, "elapsed_time": "2:26:42", "remaining_time": "1:59:20", "throughput": 8640.93, "total_tokens": 76058280} +{"current_steps": 112860, "total_steps": 204665, "loss": 0.0001, "lr": 9.950027534699793e-07, "epoch": 2.7571885764542055, "percentage": 55.14, "elapsed_time": "2:26:42", "remaining_time": "1:59:20", "throughput": 8641.01, "total_tokens": 76062120} +{"current_steps": 112865, "total_steps": 204665, "loss": 0.0002, "lr": 9.949174769535821e-07, "epoch": 2.757310727286053, "percentage": 55.15, "elapsed_time": "2:26:42", "remaining_time": "1:59:19", "throughput": 8641.05, "total_tokens": 76065448} +{"current_steps": 112870, "total_steps": 204665, "loss": 0.1247, "lr": 9.948322004741465e-07, "epoch": 2.7574328781179, "percentage": 55.15, "elapsed_time": "2:26:43", "remaining_time": "1:59:19", "throughput": 8641.11, "total_tokens": 76069032} +{"current_steps": 112875, "total_steps": 204665, "loss": 0.0006, "lr": 9.947469240322922e-07, "epoch": 2.757555028949747, "percentage": 55.15, "elapsed_time": "2:26:43", "remaining_time": "1:59:19", "throughput": 8641.16, "total_tokens": 76072616} +{"current_steps": 112880, "total_steps": 204665, "loss": 0.0514, "lr": 9.946616476286402e-07, "epoch": 2.757677179781594, "percentage": 55.15, "elapsed_time": "2:26:43", "remaining_time": "1:59:18", "throughput": 8641.19, "total_tokens": 76075816} +{"current_steps": 112885, "total_steps": 204665, "loss": 0.0008, "lr": 9.945763712638094e-07, "epoch": 2.7577993306134414, "percentage": 55.16, "elapsed_time": "2:26:44", "remaining_time": "1:59:18", "throughput": 8641.22, "total_tokens": 76079144} +{"current_steps": 112890, "total_steps": 204665, "loss": 0.0408, "lr": 9.944910949384213e-07, "epoch": 2.7579214814452886, "percentage": 55.16, "elapsed_time": "2:26:44", "remaining_time": "1:59:17", "throughput": 8641.27, "total_tokens": 76082664} +{"current_steps": 112895, "total_steps": 204665, "loss": 0.0002, "lr": 9.944058186530951e-07, "epoch": 2.758043632277136, "percentage": 55.16, "elapsed_time": "2:26:44", "remaining_time": "1:59:17", "throughput": 8641.33, "total_tokens": 76086248} +{"current_steps": 112900, "total_steps": 204665, "loss": 0.0001, "lr": 9.94320542408451e-07, "epoch": 2.758165783108983, "percentage": 55.16, "elapsed_time": "2:26:45", "remaining_time": "1:59:16", "throughput": 8641.36, "total_tokens": 76089512} +{"current_steps": 112905, "total_steps": 204665, "loss": 0.1132, "lr": 9.9423526620511e-07, "epoch": 2.75828793394083, "percentage": 55.17, "elapsed_time": "2:26:45", "remaining_time": "1:59:16", "throughput": 8641.37, "total_tokens": 76092584} +{"current_steps": 112910, "total_steps": 204665, "loss": 0.0003, "lr": 9.941499900436915e-07, "epoch": 2.7584100847726774, "percentage": 55.17, "elapsed_time": "2:26:45", "remaining_time": "1:59:16", "throughput": 8641.42, "total_tokens": 76096104} +{"current_steps": 112915, "total_steps": 204665, "loss": 0.0628, "lr": 9.94064713924816e-07, "epoch": 2.7585322356045245, "percentage": 55.17, "elapsed_time": "2:26:46", "remaining_time": "1:59:15", "throughput": 8641.46, "total_tokens": 76099432} +{"current_steps": 112920, "total_steps": 204665, "loss": 0.0576, "lr": 9.93979437849103e-07, "epoch": 2.7586543864363717, "percentage": 55.17, "elapsed_time": "2:26:46", "remaining_time": "1:59:15", "throughput": 8641.56, "total_tokens": 76103464} +{"current_steps": 112925, "total_steps": 204665, "loss": 0.049, "lr": 9.938941618171736e-07, "epoch": 2.758776537268219, "percentage": 55.18, "elapsed_time": "2:26:47", "remaining_time": "1:59:14", "throughput": 8641.58, "total_tokens": 76106664} +{"current_steps": 112930, "total_steps": 204665, "loss": 0.0003, "lr": 9.938088858296477e-07, "epoch": 2.758898688100066, "percentage": 55.18, "elapsed_time": "2:26:47", "remaining_time": "1:59:14", "throughput": 8641.65, "total_tokens": 76110376} +{"current_steps": 112935, "total_steps": 204665, "loss": 0.0705, "lr": 9.937236098871447e-07, "epoch": 2.759020838931913, "percentage": 55.18, "elapsed_time": "2:26:47", "remaining_time": "1:59:13", "throughput": 8641.73, "total_tokens": 76114152} +{"current_steps": 112940, "total_steps": 204665, "loss": 0.0008, "lr": 9.936383339902858e-07, "epoch": 2.7591429897637605, "percentage": 55.18, "elapsed_time": "2:26:48", "remaining_time": "1:59:13", "throughput": 8641.77, "total_tokens": 76117480} +{"current_steps": 112945, "total_steps": 204665, "loss": 0.0002, "lr": 9.935530581396902e-07, "epoch": 2.7592651405956072, "percentage": 55.19, "elapsed_time": "2:26:48", "remaining_time": "1:59:13", "throughput": 8641.8, "total_tokens": 76120744} +{"current_steps": 112950, "total_steps": 204665, "loss": 0.0257, "lr": 9.93467782335979e-07, "epoch": 2.759387291427455, "percentage": 55.19, "elapsed_time": "2:26:48", "remaining_time": "1:59:12", "throughput": 8641.85, "total_tokens": 76124264} +{"current_steps": 112955, "total_steps": 204665, "loss": 0.0001, "lr": 9.933825065797711e-07, "epoch": 2.7595094422593016, "percentage": 55.19, "elapsed_time": "2:26:49", "remaining_time": "1:59:12", "throughput": 8641.95, "total_tokens": 76128296} +{"current_steps": 112960, "total_steps": 204665, "loss": 0.0007, "lr": 9.932972308716877e-07, "epoch": 2.759631593091149, "percentage": 55.19, "elapsed_time": "2:26:49", "remaining_time": "1:59:11", "throughput": 8642.02, "total_tokens": 76132008} +{"current_steps": 112965, "total_steps": 204665, "loss": 0.0002, "lr": 9.93211955212349e-07, "epoch": 2.759753743922996, "percentage": 55.2, "elapsed_time": "2:26:49", "remaining_time": "1:59:11", "throughput": 8642.12, "total_tokens": 76136104} +{"current_steps": 112970, "total_steps": 204665, "loss": 0.0009, "lr": 9.931266796023744e-07, "epoch": 2.759875894754843, "percentage": 55.2, "elapsed_time": "2:26:50", "remaining_time": "1:59:11", "throughput": 8642.18, "total_tokens": 76139688} +{"current_steps": 112975, "total_steps": 204665, "loss": 0.0008, "lr": 9.930414040423848e-07, "epoch": 2.7599980455866904, "percentage": 55.2, "elapsed_time": "2:26:50", "remaining_time": "1:59:10", "throughput": 8642.25, "total_tokens": 76143400} +{"current_steps": 112980, "total_steps": 204665, "loss": 0.0592, "lr": 9.929561285329997e-07, "epoch": 2.7601201964185376, "percentage": 55.2, "elapsed_time": "2:26:50", "remaining_time": "1:59:10", "throughput": 8642.36, "total_tokens": 76147560} +{"current_steps": 112985, "total_steps": 204665, "loss": 0.0453, "lr": 9.928708530748395e-07, "epoch": 2.7602423472503848, "percentage": 55.2, "elapsed_time": "2:26:51", "remaining_time": "1:59:09", "throughput": 8642.37, "total_tokens": 76150760} +{"current_steps": 112990, "total_steps": 204665, "loss": 0.0001, "lr": 9.927855776685247e-07, "epoch": 2.760364498082232, "percentage": 55.21, "elapsed_time": "2:26:51", "remaining_time": "1:59:09", "throughput": 8642.42, "total_tokens": 76154216} +{"current_steps": 112995, "total_steps": 204665, "loss": 0.0004, "lr": 9.927003023146745e-07, "epoch": 2.760486648914079, "percentage": 55.21, "elapsed_time": "2:26:52", "remaining_time": "1:59:08", "throughput": 8642.43, "total_tokens": 76157352} +{"current_steps": 113000, "total_steps": 204665, "loss": 0.0006, "lr": 9.926150270139104e-07, "epoch": 2.7606087997459263, "percentage": 55.21, "elapsed_time": "2:26:52", "remaining_time": "1:59:08", "throughput": 8642.46, "total_tokens": 76160680} +{"current_steps": 113005, "total_steps": 204665, "loss": 0.0001, "lr": 9.925297517668512e-07, "epoch": 2.7607309505777735, "percentage": 55.21, "elapsed_time": "2:26:52", "remaining_time": "1:59:08", "throughput": 8642.49, "total_tokens": 76163944} +{"current_steps": 113010, "total_steps": 204665, "loss": 0.0002, "lr": 9.924444765741183e-07, "epoch": 2.7608531014096207, "percentage": 55.22, "elapsed_time": "2:26:53", "remaining_time": "1:59:07", "throughput": 8642.49, "total_tokens": 76167016} +{"current_steps": 113015, "total_steps": 204665, "loss": 0.0008, "lr": 9.923592014363305e-07, "epoch": 2.760975252241468, "percentage": 55.22, "elapsed_time": "2:26:53", "remaining_time": "1:59:07", "throughput": 8642.55, "total_tokens": 76170600} +{"current_steps": 113020, "total_steps": 204665, "loss": 0.0372, "lr": 9.92273926354109e-07, "epoch": 2.761097403073315, "percentage": 55.22, "elapsed_time": "2:26:53", "remaining_time": "1:59:06", "throughput": 8642.57, "total_tokens": 76173800} +{"current_steps": 113025, "total_steps": 204665, "loss": 0.0001, "lr": 9.921886513280735e-07, "epoch": 2.7612195539051623, "percentage": 55.22, "elapsed_time": "2:26:54", "remaining_time": "1:59:06", "throughput": 8642.58, "total_tokens": 76176936} +{"current_steps": 113030, "total_steps": 204665, "loss": 0.0004, "lr": 9.921033763588444e-07, "epoch": 2.761341704737009, "percentage": 55.23, "elapsed_time": "2:26:54", "remaining_time": "1:59:06", "throughput": 8642.58, "total_tokens": 76179880} +{"current_steps": 113035, "total_steps": 204665, "loss": 0.0002, "lr": 9.920181014470417e-07, "epoch": 2.7614638555688567, "percentage": 55.23, "elapsed_time": "2:26:54", "remaining_time": "1:59:05", "throughput": 8642.64, "total_tokens": 76183528} +{"current_steps": 113040, "total_steps": 204665, "loss": 0.0705, "lr": 9.919328265932852e-07, "epoch": 2.7615860064007034, "percentage": 55.23, "elapsed_time": "2:26:55", "remaining_time": "1:59:05", "throughput": 8642.67, "total_tokens": 76186856} +{"current_steps": 113045, "total_steps": 204665, "loss": 0.0, "lr": 9.918475517981958e-07, "epoch": 2.761708157232551, "percentage": 55.23, "elapsed_time": "2:26:55", "remaining_time": "1:59:04", "throughput": 8642.72, "total_tokens": 76190376} +{"current_steps": 113050, "total_steps": 204665, "loss": 0.049, "lr": 9.917622770623925e-07, "epoch": 2.7618303080643978, "percentage": 55.24, "elapsed_time": "2:26:55", "remaining_time": "1:59:04", "throughput": 8642.79, "total_tokens": 76194216} +{"current_steps": 113055, "total_steps": 204665, "loss": 0.0647, "lr": 9.916770023864964e-07, "epoch": 2.761952458896245, "percentage": 55.24, "elapsed_time": "2:26:56", "remaining_time": "1:59:03", "throughput": 8642.8, "total_tokens": 76197224} +{"current_steps": 113060, "total_steps": 204665, "loss": 0.0004, "lr": 9.915917277711277e-07, "epoch": 2.762074609728092, "percentage": 55.24, "elapsed_time": "2:26:56", "remaining_time": "1:59:03", "throughput": 8642.85, "total_tokens": 76200808} +{"current_steps": 113065, "total_steps": 204665, "loss": 0.0004, "lr": 9.915064532169058e-07, "epoch": 2.7621967605599393, "percentage": 55.24, "elapsed_time": "2:26:56", "remaining_time": "1:59:03", "throughput": 8642.85, "total_tokens": 76203816} +{"current_steps": 113070, "total_steps": 204665, "loss": 0.0001, "lr": 9.91421178724452e-07, "epoch": 2.7623189113917865, "percentage": 55.25, "elapsed_time": "2:26:57", "remaining_time": "1:59:02", "throughput": 8642.87, "total_tokens": 76206952} +{"current_steps": 113075, "total_steps": 204665, "loss": 0.0002, "lr": 9.913359042943848e-07, "epoch": 2.7624410622236337, "percentage": 55.25, "elapsed_time": "2:26:57", "remaining_time": "1:59:02", "throughput": 8642.88, "total_tokens": 76210024} +{"current_steps": 113080, "total_steps": 204665, "loss": 0.0261, "lr": 9.912506299273256e-07, "epoch": 2.762563213055481, "percentage": 55.25, "elapsed_time": "2:26:58", "remaining_time": "1:59:01", "throughput": 8642.92, "total_tokens": 76213480} +{"current_steps": 113085, "total_steps": 204665, "loss": 0.1107, "lr": 9.911653556238945e-07, "epoch": 2.762685363887328, "percentage": 55.25, "elapsed_time": "2:26:58", "remaining_time": "1:59:01", "throughput": 8642.92, "total_tokens": 76216488} +{"current_steps": 113090, "total_steps": 204665, "loss": 0.0676, "lr": 9.910800813847107e-07, "epoch": 2.7628075147191753, "percentage": 55.26, "elapsed_time": "2:26:58", "remaining_time": "1:59:00", "throughput": 8642.95, "total_tokens": 76219816} +{"current_steps": 113095, "total_steps": 204665, "loss": 0.0007, "lr": 9.909948072103956e-07, "epoch": 2.7629296655510225, "percentage": 55.26, "elapsed_time": "2:26:59", "remaining_time": "1:59:00", "throughput": 8642.98, "total_tokens": 76223144} +{"current_steps": 113100, "total_steps": 204665, "loss": 0.045, "lr": 9.90909533101568e-07, "epoch": 2.7630518163828697, "percentage": 55.26, "elapsed_time": "2:26:59", "remaining_time": "1:59:00", "throughput": 8643.02, "total_tokens": 76226536} +{"current_steps": 113105, "total_steps": 204665, "loss": 0.0005, "lr": 9.908242590588494e-07, "epoch": 2.763173967214717, "percentage": 55.26, "elapsed_time": "2:26:59", "remaining_time": "1:58:59", "throughput": 8643.1, "total_tokens": 76230248} +{"current_steps": 113110, "total_steps": 204665, "loss": 0.0003, "lr": 9.907389850828586e-07, "epoch": 2.763296118046564, "percentage": 55.27, "elapsed_time": "2:27:00", "remaining_time": "1:58:59", "throughput": 8643.13, "total_tokens": 76233576} +{"current_steps": 113115, "total_steps": 204665, "loss": 0.0006, "lr": 9.906537111742167e-07, "epoch": 2.763418268878411, "percentage": 55.27, "elapsed_time": "2:27:00", "remaining_time": "1:58:58", "throughput": 8643.18, "total_tokens": 76237096} +{"current_steps": 113120, "total_steps": 204665, "loss": 0.0524, "lr": 9.905684373335436e-07, "epoch": 2.7635404197102584, "percentage": 55.27, "elapsed_time": "2:27:00", "remaining_time": "1:58:58", "throughput": 8643.23, "total_tokens": 76240616} +{"current_steps": 113125, "total_steps": 204665, "loss": 0.0543, "lr": 9.90483163561459e-07, "epoch": 2.763662570542105, "percentage": 55.27, "elapsed_time": "2:27:01", "remaining_time": "1:58:58", "throughput": 8643.23, "total_tokens": 76243496} +{"current_steps": 113130, "total_steps": 204665, "loss": 0.0777, "lr": 9.90397889858584e-07, "epoch": 2.763784721373953, "percentage": 55.28, "elapsed_time": "2:27:01", "remaining_time": "1:58:57", "throughput": 8643.29, "total_tokens": 76247144} +{"current_steps": 113135, "total_steps": 204665, "loss": 0.0001, "lr": 9.903126162255379e-07, "epoch": 2.7639068722057996, "percentage": 55.28, "elapsed_time": "2:27:01", "remaining_time": "1:58:57", "throughput": 8643.31, "total_tokens": 76250280} +{"current_steps": 113140, "total_steps": 204665, "loss": 0.1067, "lr": 9.902273426629406e-07, "epoch": 2.7640290230376467, "percentage": 55.28, "elapsed_time": "2:27:02", "remaining_time": "1:58:56", "throughput": 8643.34, "total_tokens": 76253608} +{"current_steps": 113145, "total_steps": 204665, "loss": 0.0005, "lr": 9.901420691714135e-07, "epoch": 2.764151173869494, "percentage": 55.28, "elapsed_time": "2:27:02", "remaining_time": "1:58:56", "throughput": 8643.44, "total_tokens": 76257640} +{"current_steps": 113150, "total_steps": 204665, "loss": 0.0003, "lr": 9.900567957515752e-07, "epoch": 2.764273324701341, "percentage": 55.29, "elapsed_time": "2:27:02", "remaining_time": "1:58:55", "throughput": 8643.49, "total_tokens": 76261096} +{"current_steps": 113155, "total_steps": 204665, "loss": 0.0451, "lr": 9.89971522404047e-07, "epoch": 2.7643954755331883, "percentage": 55.29, "elapsed_time": "2:27:03", "remaining_time": "1:58:55", "throughput": 8643.53, "total_tokens": 76264552} +{"current_steps": 113160, "total_steps": 204665, "loss": 0.0003, "lr": 9.898862491294483e-07, "epoch": 2.7645176263650355, "percentage": 55.29, "elapsed_time": "2:27:03", "remaining_time": "1:58:55", "throughput": 8643.57, "total_tokens": 76267880} +{"current_steps": 113165, "total_steps": 204665, "loss": 0.0569, "lr": 9.898009759283999e-07, "epoch": 2.7646397771968827, "percentage": 55.29, "elapsed_time": "2:27:04", "remaining_time": "1:58:54", "throughput": 8643.6, "total_tokens": 76271144} +{"current_steps": 113170, "total_steps": 204665, "loss": 0.0003, "lr": 9.89715702801521e-07, "epoch": 2.76476192802873, "percentage": 55.3, "elapsed_time": "2:27:04", "remaining_time": "1:58:54", "throughput": 8643.65, "total_tokens": 76274600} +{"current_steps": 113175, "total_steps": 204665, "loss": 0.0003, "lr": 9.896304297494327e-07, "epoch": 2.764884078860577, "percentage": 55.3, "elapsed_time": "2:27:04", "remaining_time": "1:58:53", "throughput": 8643.69, "total_tokens": 76277992} +{"current_steps": 113180, "total_steps": 204665, "loss": 0.0003, "lr": 9.895451567727544e-07, "epoch": 2.7650062296924243, "percentage": 55.3, "elapsed_time": "2:27:05", "remaining_time": "1:58:53", "throughput": 8643.7, "total_tokens": 76281128} +{"current_steps": 113185, "total_steps": 204665, "loss": 0.0009, "lr": 9.894598838721069e-07, "epoch": 2.7651283805242715, "percentage": 55.3, "elapsed_time": "2:27:05", "remaining_time": "1:58:52", "throughput": 8643.74, "total_tokens": 76284456} +{"current_steps": 113190, "total_steps": 204665, "loss": 0.07, "lr": 9.893746110481097e-07, "epoch": 2.7652505313561186, "percentage": 55.31, "elapsed_time": "2:27:05", "remaining_time": "1:58:52", "throughput": 8643.77, "total_tokens": 76287656} +{"current_steps": 113195, "total_steps": 204665, "loss": 0.0002, "lr": 9.892893383013833e-07, "epoch": 2.765372682187966, "percentage": 55.31, "elapsed_time": "2:27:06", "remaining_time": "1:58:52", "throughput": 8643.79, "total_tokens": 76290856} +{"current_steps": 113200, "total_steps": 204665, "loss": 0.1193, "lr": 9.89204065632548e-07, "epoch": 2.765494833019813, "percentage": 55.31, "elapsed_time": "2:27:06", "remaining_time": "1:58:51", "throughput": 8643.81, "total_tokens": 76293992} +{"current_steps": 113205, "total_steps": 204665, "loss": 0.0896, "lr": 9.89118793042223e-07, "epoch": 2.76561698385166, "percentage": 55.31, "elapsed_time": "2:27:06", "remaining_time": "1:58:51", "throughput": 8643.83, "total_tokens": 76297128} +{"current_steps": 113210, "total_steps": 204665, "loss": 0.0004, "lr": 9.890335205310291e-07, "epoch": 2.765739134683507, "percentage": 55.31, "elapsed_time": "2:27:07", "remaining_time": "1:58:50", "throughput": 8643.89, "total_tokens": 76300648} +{"current_steps": 113215, "total_steps": 204665, "loss": 0.0428, "lr": 9.88948248099587e-07, "epoch": 2.7658612855153546, "percentage": 55.32, "elapsed_time": "2:27:07", "remaining_time": "1:58:50", "throughput": 8643.89, "total_tokens": 76303656} +{"current_steps": 113220, "total_steps": 204665, "loss": 0.0539, "lr": 9.888629757485156e-07, "epoch": 2.7659834363472013, "percentage": 55.32, "elapsed_time": "2:27:07", "remaining_time": "1:58:50", "throughput": 8643.99, "total_tokens": 76307560} +{"current_steps": 113225, "total_steps": 204665, "loss": 0.0398, "lr": 9.88777703478436e-07, "epoch": 2.7661055871790485, "percentage": 55.32, "elapsed_time": "2:27:08", "remaining_time": "1:58:49", "throughput": 8644.07, "total_tokens": 76311336} +{"current_steps": 113230, "total_steps": 204665, "loss": 0.0003, "lr": 9.886924312899679e-07, "epoch": 2.7662277380108957, "percentage": 55.32, "elapsed_time": "2:27:08", "remaining_time": "1:58:49", "throughput": 8644.12, "total_tokens": 76314856} +{"current_steps": 113235, "total_steps": 204665, "loss": 0.0458, "lr": 9.886071591837314e-07, "epoch": 2.766349888842743, "percentage": 55.33, "elapsed_time": "2:27:08", "remaining_time": "1:58:48", "throughput": 8644.16, "total_tokens": 76318184} +{"current_steps": 113240, "total_steps": 204665, "loss": 0.0004, "lr": 9.88521887160347e-07, "epoch": 2.76647203967459, "percentage": 55.33, "elapsed_time": "2:27:09", "remaining_time": "1:58:48", "throughput": 8644.18, "total_tokens": 76321320} +{"current_steps": 113245, "total_steps": 204665, "loss": 0.0004, "lr": 9.88436615220434e-07, "epoch": 2.7665941905064373, "percentage": 55.33, "elapsed_time": "2:27:09", "remaining_time": "1:58:47", "throughput": 8644.21, "total_tokens": 76324648} +{"current_steps": 113250, "total_steps": 204665, "loss": 0.0001, "lr": 9.883513433646135e-07, "epoch": 2.7667163413382845, "percentage": 55.33, "elapsed_time": "2:27:09", "remaining_time": "1:58:47", "throughput": 8644.24, "total_tokens": 76327848} +{"current_steps": 113255, "total_steps": 204665, "loss": 0.0001, "lr": 9.882660715935047e-07, "epoch": 2.7668384921701317, "percentage": 55.34, "elapsed_time": "2:27:10", "remaining_time": "1:58:47", "throughput": 8644.3, "total_tokens": 76331496} +{"current_steps": 113260, "total_steps": 204665, "loss": 0.0431, "lr": 9.881807999077288e-07, "epoch": 2.766960643001979, "percentage": 55.34, "elapsed_time": "2:27:10", "remaining_time": "1:58:46", "throughput": 8644.3, "total_tokens": 76334568} +{"current_steps": 113265, "total_steps": 204665, "loss": 0.0002, "lr": 9.880955283079047e-07, "epoch": 2.767082793833826, "percentage": 55.34, "elapsed_time": "2:27:10", "remaining_time": "1:58:46", "throughput": 8644.32, "total_tokens": 76337704} +{"current_steps": 113270, "total_steps": 204665, "loss": 0.0003, "lr": 9.880102567946533e-07, "epoch": 2.7672049446656732, "percentage": 55.34, "elapsed_time": "2:27:11", "remaining_time": "1:58:45", "throughput": 8644.35, "total_tokens": 76341032} +{"current_steps": 113275, "total_steps": 204665, "loss": 0.1252, "lr": 9.879249853685949e-07, "epoch": 2.7673270954975204, "percentage": 55.35, "elapsed_time": "2:27:11", "remaining_time": "1:58:45", "throughput": 8644.38, "total_tokens": 76344232} +{"current_steps": 113280, "total_steps": 204665, "loss": 0.0001, "lr": 9.878397140303487e-07, "epoch": 2.7674492463293676, "percentage": 55.35, "elapsed_time": "2:27:12", "remaining_time": "1:58:44", "throughput": 8644.46, "total_tokens": 76348008} +{"current_steps": 113285, "total_steps": 204665, "loss": 0.0323, "lr": 9.877544427805358e-07, "epoch": 2.767571397161215, "percentage": 55.35, "elapsed_time": "2:27:12", "remaining_time": "1:58:44", "throughput": 8644.49, "total_tokens": 76351336} +{"current_steps": 113290, "total_steps": 204665, "loss": 0.0003, "lr": 9.876691716197759e-07, "epoch": 2.767693547993062, "percentage": 55.35, "elapsed_time": "2:27:12", "remaining_time": "1:58:44", "throughput": 8644.49, "total_tokens": 76354216} +{"current_steps": 113295, "total_steps": 204665, "loss": 0.0585, "lr": 9.875839005486886e-07, "epoch": 2.7678156988249087, "percentage": 55.36, "elapsed_time": "2:27:13", "remaining_time": "1:58:43", "throughput": 8644.52, "total_tokens": 76357544} +{"current_steps": 113300, "total_steps": 204665, "loss": 0.0257, "lr": 9.87498629567895e-07, "epoch": 2.7679378496567564, "percentage": 55.36, "elapsed_time": "2:27:13", "remaining_time": "1:58:43", "throughput": 8644.59, "total_tokens": 76361256} +{"current_steps": 113305, "total_steps": 204665, "loss": 0.0002, "lr": 9.874133586780145e-07, "epoch": 2.768060000488603, "percentage": 55.36, "elapsed_time": "2:27:13", "remaining_time": "1:58:42", "throughput": 8644.63, "total_tokens": 76364520} +{"current_steps": 113310, "total_steps": 204665, "loss": 0.0002, "lr": 9.873280878796676e-07, "epoch": 2.7681821513204508, "percentage": 55.36, "elapsed_time": "2:27:14", "remaining_time": "1:58:42", "throughput": 8644.63, "total_tokens": 76367528} +{"current_steps": 113315, "total_steps": 204665, "loss": 0.0002, "lr": 9.87242817173474e-07, "epoch": 2.7683043021522975, "percentage": 55.37, "elapsed_time": "2:27:14", "remaining_time": "1:58:41", "throughput": 8644.66, "total_tokens": 76370792} +{"current_steps": 113320, "total_steps": 204665, "loss": 0.0002, "lr": 9.871575465600546e-07, "epoch": 2.7684264529841447, "percentage": 55.37, "elapsed_time": "2:27:14", "remaining_time": "1:58:41", "throughput": 8644.66, "total_tokens": 76373736} +{"current_steps": 113325, "total_steps": 204665, "loss": 0.0006, "lr": 9.870722760400285e-07, "epoch": 2.768548603815992, "percentage": 55.37, "elapsed_time": "2:27:15", "remaining_time": "1:58:41", "throughput": 8644.7, "total_tokens": 76377128} +{"current_steps": 113330, "total_steps": 204665, "loss": 0.0002, "lr": 9.869870056140163e-07, "epoch": 2.768670754647839, "percentage": 55.37, "elapsed_time": "2:27:15", "remaining_time": "1:58:40", "throughput": 8644.79, "total_tokens": 76380968} +{"current_steps": 113335, "total_steps": 204665, "loss": 0.0426, "lr": 9.869017352826382e-07, "epoch": 2.7687929054796863, "percentage": 55.38, "elapsed_time": "2:27:15", "remaining_time": "1:58:40", "throughput": 8644.81, "total_tokens": 76384168} +{"current_steps": 113340, "total_steps": 204665, "loss": 0.0001, "lr": 9.86816465046514e-07, "epoch": 2.7689150563115335, "percentage": 55.38, "elapsed_time": "2:27:16", "remaining_time": "1:58:39", "throughput": 8644.83, "total_tokens": 76387240} +{"current_steps": 113345, "total_steps": 204665, "loss": 0.0003, "lr": 9.867311949062644e-07, "epoch": 2.7690372071433806, "percentage": 55.38, "elapsed_time": "2:27:16", "remaining_time": "1:58:39", "throughput": 8644.85, "total_tokens": 76390504} +{"current_steps": 113350, "total_steps": 204665, "loss": 0.0004, "lr": 9.86645924862509e-07, "epoch": 2.769159357975228, "percentage": 55.38, "elapsed_time": "2:27:16", "remaining_time": "1:58:39", "throughput": 8644.87, "total_tokens": 76393576} +{"current_steps": 113355, "total_steps": 204665, "loss": 0.0725, "lr": 9.865606549158681e-07, "epoch": 2.769281508807075, "percentage": 55.39, "elapsed_time": "2:27:17", "remaining_time": "1:58:38", "throughput": 8644.91, "total_tokens": 76397032} +{"current_steps": 113360, "total_steps": 204665, "loss": 0.0001, "lr": 9.864753850669613e-07, "epoch": 2.769403659638922, "percentage": 55.39, "elapsed_time": "2:27:17", "remaining_time": "1:58:38", "throughput": 8644.98, "total_tokens": 76400744} +{"current_steps": 113365, "total_steps": 204665, "loss": 0.0001, "lr": 9.863901153164094e-07, "epoch": 2.7695258104707694, "percentage": 55.39, "elapsed_time": "2:27:17", "remaining_time": "1:58:37", "throughput": 8645.03, "total_tokens": 76404136} +{"current_steps": 113370, "total_steps": 204665, "loss": 0.1539, "lr": 9.863048456648324e-07, "epoch": 2.7696479613026166, "percentage": 55.39, "elapsed_time": "2:27:18", "remaining_time": "1:58:37", "throughput": 8645.07, "total_tokens": 76407528} +{"current_steps": 113375, "total_steps": 204665, "loss": 0.0002, "lr": 9.862195761128498e-07, "epoch": 2.769770112134464, "percentage": 55.4, "elapsed_time": "2:27:18", "remaining_time": "1:58:36", "throughput": 8645.13, "total_tokens": 76411176} +{"current_steps": 113380, "total_steps": 204665, "loss": 0.0967, "lr": 9.861343066610829e-07, "epoch": 2.7698922629663105, "percentage": 55.4, "elapsed_time": "2:27:18", "remaining_time": "1:58:36", "throughput": 8645.14, "total_tokens": 76414184} +{"current_steps": 113385, "total_steps": 204665, "loss": 0.0002, "lr": 9.860490373101503e-07, "epoch": 2.770014413798158, "percentage": 55.4, "elapsed_time": "2:27:19", "remaining_time": "1:58:36", "throughput": 8645.14, "total_tokens": 76417128} +{"current_steps": 113390, "total_steps": 204665, "loss": 0.0002, "lr": 9.859637680606732e-07, "epoch": 2.770136564630005, "percentage": 55.4, "elapsed_time": "2:27:19", "remaining_time": "1:58:35", "throughput": 8645.18, "total_tokens": 76420456} +{"current_steps": 113395, "total_steps": 204665, "loss": 0.0728, "lr": 9.858784989132717e-07, "epoch": 2.7702587154618525, "percentage": 55.41, "elapsed_time": "2:27:20", "remaining_time": "1:58:35", "throughput": 8645.22, "total_tokens": 76423784} +{"current_steps": 113400, "total_steps": 204665, "loss": 0.0002, "lr": 9.857932298685648e-07, "epoch": 2.7703808662936993, "percentage": 55.41, "elapsed_time": "2:27:20", "remaining_time": "1:58:34", "throughput": 8645.3, "total_tokens": 76427624} +{"current_steps": 113405, "total_steps": 204665, "loss": 0.0504, "lr": 9.85707960927174e-07, "epoch": 2.7705030171255465, "percentage": 55.41, "elapsed_time": "2:27:20", "remaining_time": "1:58:34", "throughput": 8645.37, "total_tokens": 76431272} +{"current_steps": 113410, "total_steps": 204665, "loss": 0.0524, "lr": 9.856226920897182e-07, "epoch": 2.7706251679573937, "percentage": 55.41, "elapsed_time": "2:27:21", "remaining_time": "1:58:33", "throughput": 8645.43, "total_tokens": 76434856} +{"current_steps": 113415, "total_steps": 204665, "loss": 0.0002, "lr": 9.855374233568186e-07, "epoch": 2.770747318789241, "percentage": 55.41, "elapsed_time": "2:27:21", "remaining_time": "1:58:33", "throughput": 8645.45, "total_tokens": 76437992} +{"current_steps": 113420, "total_steps": 204665, "loss": 0.0984, "lr": 9.854521547290942e-07, "epoch": 2.770869469621088, "percentage": 55.42, "elapsed_time": "2:27:21", "remaining_time": "1:58:33", "throughput": 8645.5, "total_tokens": 76441448} +{"current_steps": 113425, "total_steps": 204665, "loss": 0.0019, "lr": 9.853668862071657e-07, "epoch": 2.7709916204529352, "percentage": 55.42, "elapsed_time": "2:27:22", "remaining_time": "1:58:32", "throughput": 8645.55, "total_tokens": 76444904} +{"current_steps": 113430, "total_steps": 204665, "loss": 0.0006, "lr": 9.852816177916535e-07, "epoch": 2.7711137712847824, "percentage": 55.42, "elapsed_time": "2:27:22", "remaining_time": "1:58:32", "throughput": 8645.56, "total_tokens": 76448040} +{"current_steps": 113435, "total_steps": 204665, "loss": 0.0007, "lr": 9.851963494831771e-07, "epoch": 2.7712359221166296, "percentage": 55.42, "elapsed_time": "2:27:22", "remaining_time": "1:58:31", "throughput": 8645.6, "total_tokens": 76451304} +{"current_steps": 113440, "total_steps": 204665, "loss": 0.0018, "lr": 9.851110812823571e-07, "epoch": 2.771358072948477, "percentage": 55.43, "elapsed_time": "2:27:23", "remaining_time": "1:58:31", "throughput": 8645.63, "total_tokens": 76454632} +{"current_steps": 113445, "total_steps": 204665, "loss": 0.0006, "lr": 9.850258131898133e-07, "epoch": 2.771480223780324, "percentage": 55.43, "elapsed_time": "2:27:23", "remaining_time": "1:58:30", "throughput": 8645.67, "total_tokens": 76457896} +{"current_steps": 113450, "total_steps": 204665, "loss": 0.0169, "lr": 9.849405452061654e-07, "epoch": 2.771602374612171, "percentage": 55.43, "elapsed_time": "2:27:23", "remaining_time": "1:58:30", "throughput": 8645.68, "total_tokens": 76461032} +{"current_steps": 113455, "total_steps": 204665, "loss": 0.0567, "lr": 9.848552773320345e-07, "epoch": 2.7717245254440184, "percentage": 55.43, "elapsed_time": "2:27:24", "remaining_time": "1:58:30", "throughput": 8645.72, "total_tokens": 76464360} +{"current_steps": 113460, "total_steps": 204665, "loss": 0.0002, "lr": 9.847700095680394e-07, "epoch": 2.7718466762758656, "percentage": 55.44, "elapsed_time": "2:27:24", "remaining_time": "1:58:29", "throughput": 8645.75, "total_tokens": 76467624} +{"current_steps": 113465, "total_steps": 204665, "loss": 0.0483, "lr": 9.846847419148016e-07, "epoch": 2.7719688271077128, "percentage": 55.44, "elapsed_time": "2:27:24", "remaining_time": "1:58:29", "throughput": 8645.79, "total_tokens": 76470952} +{"current_steps": 113470, "total_steps": 204665, "loss": 0.0459, "lr": 9.8459947437294e-07, "epoch": 2.77209097793956, "percentage": 55.44, "elapsed_time": "2:27:25", "remaining_time": "1:58:28", "throughput": 8645.82, "total_tokens": 76474280} +{"current_steps": 113475, "total_steps": 204665, "loss": 0.1802, "lr": 9.845142069430754e-07, "epoch": 2.7722131287714067, "percentage": 55.44, "elapsed_time": "2:27:25", "remaining_time": "1:58:28", "throughput": 8645.86, "total_tokens": 76477608} +{"current_steps": 113480, "total_steps": 204665, "loss": 0.0344, "lr": 9.844289396258272e-07, "epoch": 2.7723352796032543, "percentage": 55.45, "elapsed_time": "2:27:25", "remaining_time": "1:58:27", "throughput": 8645.89, "total_tokens": 76480872} +{"current_steps": 113485, "total_steps": 204665, "loss": 0.0002, "lr": 9.843436724218163e-07, "epoch": 2.772457430435101, "percentage": 55.45, "elapsed_time": "2:27:26", "remaining_time": "1:58:27", "throughput": 8645.93, "total_tokens": 76484200} +{"current_steps": 113490, "total_steps": 204665, "loss": 0.0005, "lr": 9.842584053316626e-07, "epoch": 2.7725795812669487, "percentage": 55.45, "elapsed_time": "2:27:26", "remaining_time": "1:58:27", "throughput": 8645.97, "total_tokens": 76487592} +{"current_steps": 113495, "total_steps": 204665, "loss": 0.0005, "lr": 9.841731383559857e-07, "epoch": 2.7727017320987954, "percentage": 55.45, "elapsed_time": "2:27:26", "remaining_time": "1:58:26", "throughput": 8646.01, "total_tokens": 76490984} +{"current_steps": 113500, "total_steps": 204665, "loss": 0.0002, "lr": 9.840878714954063e-07, "epoch": 2.7728238829306426, "percentage": 55.46, "elapsed_time": "2:27:27", "remaining_time": "1:58:26", "throughput": 8646.03, "total_tokens": 76494120} +{"current_steps": 113505, "total_steps": 204665, "loss": 0.0468, "lr": 9.840026047505438e-07, "epoch": 2.77294603376249, "percentage": 55.46, "elapsed_time": "2:27:27", "remaining_time": "1:58:25", "throughput": 8646.06, "total_tokens": 76497384} +{"current_steps": 113510, "total_steps": 204665, "loss": 0.0001, "lr": 9.839173381220191e-07, "epoch": 2.773068184594337, "percentage": 55.46, "elapsed_time": "2:27:28", "remaining_time": "1:58:25", "throughput": 8646.11, "total_tokens": 76500840} +{"current_steps": 113515, "total_steps": 204665, "loss": 0.151, "lr": 9.838320716104515e-07, "epoch": 2.773190335426184, "percentage": 55.46, "elapsed_time": "2:27:28", "remaining_time": "1:58:25", "throughput": 8646.2, "total_tokens": 76504744} +{"current_steps": 113520, "total_steps": 204665, "loss": 0.0003, "lr": 9.837468052164612e-07, "epoch": 2.7733124862580314, "percentage": 55.47, "elapsed_time": "2:27:28", "remaining_time": "1:58:24", "throughput": 8646.2, "total_tokens": 76507624} +{"current_steps": 113525, "total_steps": 204665, "loss": 0.0006, "lr": 9.83661538940669e-07, "epoch": 2.7734346370898786, "percentage": 55.47, "elapsed_time": "2:27:29", "remaining_time": "1:58:24", "throughput": 8646.23, "total_tokens": 76510952} +{"current_steps": 113530, "total_steps": 204665, "loss": 0.0001, "lr": 9.83576272783694e-07, "epoch": 2.7735567879217258, "percentage": 55.47, "elapsed_time": "2:27:29", "remaining_time": "1:58:23", "throughput": 8646.26, "total_tokens": 76514216} +{"current_steps": 113535, "total_steps": 204665, "loss": 0.0008, "lr": 9.834910067461574e-07, "epoch": 2.773678938753573, "percentage": 55.47, "elapsed_time": "2:27:29", "remaining_time": "1:58:23", "throughput": 8646.31, "total_tokens": 76517672} +{"current_steps": 113540, "total_steps": 204665, "loss": 0.053, "lr": 9.834057408286782e-07, "epoch": 2.77380108958542, "percentage": 55.48, "elapsed_time": "2:27:30", "remaining_time": "1:58:22", "throughput": 8646.41, "total_tokens": 76521640} +{"current_steps": 113545, "total_steps": 204665, "loss": 0.0001, "lr": 9.83320475031877e-07, "epoch": 2.7739232404172673, "percentage": 55.48, "elapsed_time": "2:27:30", "remaining_time": "1:58:22", "throughput": 8646.49, "total_tokens": 76525480} +{"current_steps": 113550, "total_steps": 204665, "loss": 0.0002, "lr": 9.83235209356374e-07, "epoch": 2.7740453912491145, "percentage": 55.48, "elapsed_time": "2:27:30", "remaining_time": "1:58:22", "throughput": 8646.54, "total_tokens": 76529000} +{"current_steps": 113555, "total_steps": 204665, "loss": 0.141, "lr": 9.831499438027888e-07, "epoch": 2.7741675420809617, "percentage": 55.48, "elapsed_time": "2:27:31", "remaining_time": "1:58:21", "throughput": 8646.6, "total_tokens": 76532584} +{"current_steps": 113560, "total_steps": 204665, "loss": 0.0664, "lr": 9.83064678371742e-07, "epoch": 2.7742896929128085, "percentage": 55.49, "elapsed_time": "2:27:31", "remaining_time": "1:58:21", "throughput": 8646.64, "total_tokens": 76535912} +{"current_steps": 113565, "total_steps": 204665, "loss": 0.0002, "lr": 9.82979413063853e-07, "epoch": 2.774411843744656, "percentage": 55.49, "elapsed_time": "2:27:31", "remaining_time": "1:58:20", "throughput": 8646.7, "total_tokens": 76539560} +{"current_steps": 113570, "total_steps": 204665, "loss": 0.0001, "lr": 9.828941478797428e-07, "epoch": 2.774533994576503, "percentage": 55.49, "elapsed_time": "2:27:32", "remaining_time": "1:58:20", "throughput": 8646.76, "total_tokens": 76543144} +{"current_steps": 113575, "total_steps": 204665, "loss": 0.0469, "lr": 9.828088828200303e-07, "epoch": 2.7746561454083505, "percentage": 55.49, "elapsed_time": "2:27:32", "remaining_time": "1:58:20", "throughput": 8646.88, "total_tokens": 76547368} +{"current_steps": 113580, "total_steps": 204665, "loss": 0.0001, "lr": 9.827236178853366e-07, "epoch": 2.7747782962401972, "percentage": 55.5, "elapsed_time": "2:27:32", "remaining_time": "1:58:19", "throughput": 8646.91, "total_tokens": 76550632} +{"current_steps": 113585, "total_steps": 204665, "loss": 0.115, "lr": 9.826383530762817e-07, "epoch": 2.7749004470720444, "percentage": 55.5, "elapsed_time": "2:27:33", "remaining_time": "1:58:19", "throughput": 8646.96, "total_tokens": 76554152} +{"current_steps": 113590, "total_steps": 204665, "loss": 0.0001, "lr": 9.825530883934847e-07, "epoch": 2.7750225979038916, "percentage": 55.5, "elapsed_time": "2:27:33", "remaining_time": "1:58:18", "throughput": 8646.99, "total_tokens": 76557352} +{"current_steps": 113595, "total_steps": 204665, "loss": 0.0003, "lr": 9.82467823837567e-07, "epoch": 2.775144748735739, "percentage": 55.5, "elapsed_time": "2:27:33", "remaining_time": "1:58:18", "throughput": 8647.04, "total_tokens": 76560808} +{"current_steps": 113600, "total_steps": 204665, "loss": 0.0767, "lr": 9.823825594091477e-07, "epoch": 2.775266899567586, "percentage": 55.51, "elapsed_time": "2:27:34", "remaining_time": "1:58:17", "throughput": 8647.07, "total_tokens": 76564072} +{"current_steps": 113605, "total_steps": 204665, "loss": 0.0003, "lr": 9.822972951088473e-07, "epoch": 2.775389050399433, "percentage": 55.51, "elapsed_time": "2:27:34", "remaining_time": "1:58:17", "throughput": 8647.09, "total_tokens": 76567144} +{"current_steps": 113610, "total_steps": 204665, "loss": 0.0002, "lr": 9.822120309372855e-07, "epoch": 2.7755112012312804, "percentage": 55.51, "elapsed_time": "2:27:35", "remaining_time": "1:58:17", "throughput": 8647.12, "total_tokens": 76570472} +{"current_steps": 113615, "total_steps": 204665, "loss": 0.0002, "lr": 9.821267668950824e-07, "epoch": 2.7756333520631276, "percentage": 55.51, "elapsed_time": "2:27:35", "remaining_time": "1:58:16", "throughput": 8647.15, "total_tokens": 76573672} +{"current_steps": 113620, "total_steps": 204665, "loss": 0.0003, "lr": 9.820415029828588e-07, "epoch": 2.7757555028949747, "percentage": 55.52, "elapsed_time": "2:27:35", "remaining_time": "1:58:16", "throughput": 8647.18, "total_tokens": 76577000} +{"current_steps": 113625, "total_steps": 204665, "loss": 0.078, "lr": 9.81956239201234e-07, "epoch": 2.775877653726822, "percentage": 55.52, "elapsed_time": "2:27:36", "remaining_time": "1:58:15", "throughput": 8647.24, "total_tokens": 76580584} +{"current_steps": 113630, "total_steps": 204665, "loss": 0.0664, "lr": 9.818709755508284e-07, "epoch": 2.775999804558669, "percentage": 55.52, "elapsed_time": "2:27:36", "remaining_time": "1:58:15", "throughput": 8647.28, "total_tokens": 76583848} +{"current_steps": 113635, "total_steps": 204665, "loss": 0.0001, "lr": 9.817857120322615e-07, "epoch": 2.7761219553905163, "percentage": 55.52, "elapsed_time": "2:27:36", "remaining_time": "1:58:14", "throughput": 8647.32, "total_tokens": 76587240} +{"current_steps": 113640, "total_steps": 204665, "loss": 0.0002, "lr": 9.81700448646154e-07, "epoch": 2.7762441062223635, "percentage": 55.52, "elapsed_time": "2:27:37", "remaining_time": "1:58:14", "throughput": 8647.36, "total_tokens": 76590632} +{"current_steps": 113645, "total_steps": 204665, "loss": 0.058, "lr": 9.81615185393126e-07, "epoch": 2.7763662570542107, "percentage": 55.53, "elapsed_time": "2:27:37", "remaining_time": "1:58:14", "throughput": 8647.45, "total_tokens": 76594536} +{"current_steps": 113650, "total_steps": 204665, "loss": 0.0001, "lr": 9.815299222737972e-07, "epoch": 2.776488407886058, "percentage": 55.53, "elapsed_time": "2:27:37", "remaining_time": "1:58:13", "throughput": 8647.52, "total_tokens": 76598184} +{"current_steps": 113655, "total_steps": 204665, "loss": 0.0623, "lr": 9.814446592887878e-07, "epoch": 2.7766105587179046, "percentage": 55.53, "elapsed_time": "2:27:38", "remaining_time": "1:58:13", "throughput": 8647.52, "total_tokens": 76601128} +{"current_steps": 113660, "total_steps": 204665, "loss": 0.0008, "lr": 9.813593964387177e-07, "epoch": 2.7767327095497523, "percentage": 55.53, "elapsed_time": "2:27:38", "remaining_time": "1:58:12", "throughput": 8647.61, "total_tokens": 76604968} +{"current_steps": 113665, "total_steps": 204665, "loss": 0.0001, "lr": 9.812741337242074e-07, "epoch": 2.776854860381599, "percentage": 55.54, "elapsed_time": "2:27:38", "remaining_time": "1:58:12", "throughput": 8647.68, "total_tokens": 76608680} +{"current_steps": 113670, "total_steps": 204665, "loss": 0.0472, "lr": 9.811888711458762e-07, "epoch": 2.776977011213446, "percentage": 55.54, "elapsed_time": "2:27:39", "remaining_time": "1:58:11", "throughput": 8647.72, "total_tokens": 76612072} +{"current_steps": 113675, "total_steps": 204665, "loss": 0.0003, "lr": 9.811036087043445e-07, "epoch": 2.7770991620452934, "percentage": 55.54, "elapsed_time": "2:27:39", "remaining_time": "1:58:11", "throughput": 8647.77, "total_tokens": 76615592} +{"current_steps": 113680, "total_steps": 204665, "loss": 0.0837, "lr": 9.81018346400233e-07, "epoch": 2.7772213128771406, "percentage": 55.54, "elapsed_time": "2:27:39", "remaining_time": "1:58:11", "throughput": 8647.84, "total_tokens": 76619176} +{"current_steps": 113685, "total_steps": 204665, "loss": 0.0002, "lr": 9.809330842341607e-07, "epoch": 2.7773434637089878, "percentage": 55.55, "elapsed_time": "2:27:40", "remaining_time": "1:58:10", "throughput": 8647.93, "total_tokens": 76623080} +{"current_steps": 113690, "total_steps": 204665, "loss": 0.0598, "lr": 9.808478222067487e-07, "epoch": 2.777465614540835, "percentage": 55.55, "elapsed_time": "2:27:40", "remaining_time": "1:58:10", "throughput": 8647.96, "total_tokens": 76626344} +{"current_steps": 113695, "total_steps": 204665, "loss": 0.0004, "lr": 9.807625603186158e-07, "epoch": 2.777587765372682, "percentage": 55.55, "elapsed_time": "2:27:40", "remaining_time": "1:58:09", "throughput": 8648.0, "total_tokens": 76629736} +{"current_steps": 113700, "total_steps": 204665, "loss": 0.0003, "lr": 9.80677298570383e-07, "epoch": 2.7777099162045293, "percentage": 55.55, "elapsed_time": "2:27:41", "remaining_time": "1:58:09", "throughput": 8648.03, "total_tokens": 76632936} +{"current_steps": 113705, "total_steps": 204665, "loss": 0.0001, "lr": 9.805920369626706e-07, "epoch": 2.7778320670363765, "percentage": 55.56, "elapsed_time": "2:27:41", "remaining_time": "1:58:09", "throughput": 8648.07, "total_tokens": 76636264} +{"current_steps": 113710, "total_steps": 204665, "loss": 0.0168, "lr": 9.805067754960973e-07, "epoch": 2.7779542178682237, "percentage": 55.56, "elapsed_time": "2:27:42", "remaining_time": "1:58:08", "throughput": 8648.08, "total_tokens": 76639336} +{"current_steps": 113715, "total_steps": 204665, "loss": 0.0004, "lr": 9.804215141712848e-07, "epoch": 2.778076368700071, "percentage": 55.56, "elapsed_time": "2:27:42", "remaining_time": "1:58:08", "throughput": 8648.12, "total_tokens": 76642664} +{"current_steps": 113720, "total_steps": 204665, "loss": 0.0001, "lr": 9.803362529888516e-07, "epoch": 2.778198519531918, "percentage": 55.56, "elapsed_time": "2:27:42", "remaining_time": "1:58:07", "throughput": 8648.15, "total_tokens": 76645864} +{"current_steps": 113725, "total_steps": 204665, "loss": 0.0001, "lr": 9.80250991949419e-07, "epoch": 2.7783206703637653, "percentage": 55.57, "elapsed_time": "2:27:43", "remaining_time": "1:58:07", "throughput": 8648.18, "total_tokens": 76649256} +{"current_steps": 113730, "total_steps": 204665, "loss": 0.1172, "lr": 9.80165731053606e-07, "epoch": 2.7784428211956125, "percentage": 55.57, "elapsed_time": "2:27:43", "remaining_time": "1:58:06", "throughput": 8648.21, "total_tokens": 76652456} +{"current_steps": 113735, "total_steps": 204665, "loss": 0.0001, "lr": 9.800804703020331e-07, "epoch": 2.7785649720274597, "percentage": 55.57, "elapsed_time": "2:27:43", "remaining_time": "1:58:06", "throughput": 8648.28, "total_tokens": 76656168} +{"current_steps": 113740, "total_steps": 204665, "loss": 0.0002, "lr": 9.79995209695321e-07, "epoch": 2.7786871228593064, "percentage": 55.57, "elapsed_time": "2:27:44", "remaining_time": "1:58:06", "throughput": 8648.42, "total_tokens": 76660584} +{"current_steps": 113745, "total_steps": 204665, "loss": 0.1553, "lr": 9.799099492340885e-07, "epoch": 2.778809273691154, "percentage": 55.58, "elapsed_time": "2:27:44", "remaining_time": "1:58:05", "throughput": 8648.45, "total_tokens": 76663912} +{"current_steps": 113750, "total_steps": 204665, "loss": 0.0001, "lr": 9.798246889189567e-07, "epoch": 2.778931424523001, "percentage": 55.58, "elapsed_time": "2:27:44", "remaining_time": "1:58:05", "throughput": 8648.48, "total_tokens": 76667048} +{"current_steps": 113755, "total_steps": 204665, "loss": 0.0978, "lr": 9.79739428750545e-07, "epoch": 2.7790535753548484, "percentage": 55.58, "elapsed_time": "2:27:45", "remaining_time": "1:58:04", "throughput": 8648.53, "total_tokens": 76670568} +{"current_steps": 113760, "total_steps": 204665, "loss": 0.0001, "lr": 9.796541687294738e-07, "epoch": 2.779175726186695, "percentage": 55.58, "elapsed_time": "2:27:45", "remaining_time": "1:58:04", "throughput": 8648.58, "total_tokens": 76674024} +{"current_steps": 113765, "total_steps": 204665, "loss": 0.0366, "lr": 9.795689088563626e-07, "epoch": 2.7792978770185424, "percentage": 55.59, "elapsed_time": "2:27:45", "remaining_time": "1:58:03", "throughput": 8648.61, "total_tokens": 76677288} +{"current_steps": 113770, "total_steps": 204665, "loss": 0.0404, "lr": 9.79483649131832e-07, "epoch": 2.7794200278503896, "percentage": 55.59, "elapsed_time": "2:27:46", "remaining_time": "1:58:03", "throughput": 8648.68, "total_tokens": 76681000} +{"current_steps": 113775, "total_steps": 204665, "loss": 0.0566, "lr": 9.79398389556502e-07, "epoch": 2.7795421786822367, "percentage": 55.59, "elapsed_time": "2:27:46", "remaining_time": "1:58:03", "throughput": 8648.71, "total_tokens": 76684200} +{"current_steps": 113780, "total_steps": 204665, "loss": 0.0003, "lr": 9.79313130130992e-07, "epoch": 2.779664329514084, "percentage": 55.59, "elapsed_time": "2:27:46", "remaining_time": "1:58:02", "throughput": 8648.72, "total_tokens": 76687272} +{"current_steps": 113785, "total_steps": 204665, "loss": 0.0536, "lr": 9.79227870855923e-07, "epoch": 2.779786480345931, "percentage": 55.6, "elapsed_time": "2:27:47", "remaining_time": "1:58:02", "throughput": 8648.73, "total_tokens": 76690280} +{"current_steps": 113790, "total_steps": 204665, "loss": 0.0336, "lr": 9.79142611731914e-07, "epoch": 2.7799086311777783, "percentage": 55.6, "elapsed_time": "2:27:47", "remaining_time": "1:58:01", "throughput": 8648.77, "total_tokens": 76693736} +{"current_steps": 113795, "total_steps": 204665, "loss": 0.0004, "lr": 9.790573527595856e-07, "epoch": 2.7800307820096255, "percentage": 55.6, "elapsed_time": "2:27:47", "remaining_time": "1:58:01", "throughput": 8648.81, "total_tokens": 76697064} +{"current_steps": 113800, "total_steps": 204665, "loss": 0.0004, "lr": 9.789720939395581e-07, "epoch": 2.7801529328414727, "percentage": 55.6, "elapsed_time": "2:27:48", "remaining_time": "1:58:00", "throughput": 8648.84, "total_tokens": 76700264} +{"current_steps": 113805, "total_steps": 204665, "loss": 0.0105, "lr": 9.78886835272451e-07, "epoch": 2.78027508367332, "percentage": 55.61, "elapsed_time": "2:27:48", "remaining_time": "1:58:00", "throughput": 8648.91, "total_tokens": 76704040} +{"current_steps": 113810, "total_steps": 204665, "loss": 0.0003, "lr": 9.788015767588846e-07, "epoch": 2.780397234505167, "percentage": 55.61, "elapsed_time": "2:27:48", "remaining_time": "1:58:00", "throughput": 8648.98, "total_tokens": 76707752} +{"current_steps": 113815, "total_steps": 204665, "loss": 0.0331, "lr": 9.787163183994787e-07, "epoch": 2.7805193853370143, "percentage": 55.61, "elapsed_time": "2:27:49", "remaining_time": "1:57:59", "throughput": 8649.05, "total_tokens": 76711400} +{"current_steps": 113820, "total_steps": 204665, "loss": 0.0001, "lr": 9.786310601948538e-07, "epoch": 2.7806415361688614, "percentage": 55.61, "elapsed_time": "2:27:49", "remaining_time": "1:57:59", "throughput": 8649.17, "total_tokens": 76715688} +{"current_steps": 113825, "total_steps": 204665, "loss": 0.0429, "lr": 9.78545802145629e-07, "epoch": 2.7807636870007086, "percentage": 55.62, "elapsed_time": "2:27:50", "remaining_time": "1:57:58", "throughput": 8649.23, "total_tokens": 76719272} +{"current_steps": 113830, "total_steps": 204665, "loss": 0.0002, "lr": 9.784605442524252e-07, "epoch": 2.780885837832556, "percentage": 55.62, "elapsed_time": "2:27:50", "remaining_time": "1:57:58", "throughput": 8649.28, "total_tokens": 76722728} +{"current_steps": 113835, "total_steps": 204665, "loss": 0.0003, "lr": 9.783752865158623e-07, "epoch": 2.7810079886644026, "percentage": 55.62, "elapsed_time": "2:27:50", "remaining_time": "1:57:58", "throughput": 8649.26, "total_tokens": 76725480} +{"current_steps": 113840, "total_steps": 204665, "loss": 0.1236, "lr": 9.782900289365597e-07, "epoch": 2.78113013949625, "percentage": 55.62, "elapsed_time": "2:27:51", "remaining_time": "1:57:57", "throughput": 8649.33, "total_tokens": 76729192} +{"current_steps": 113845, "total_steps": 204665, "loss": 0.0393, "lr": 9.782047715151384e-07, "epoch": 2.781252290328097, "percentage": 55.63, "elapsed_time": "2:27:51", "remaining_time": "1:57:57", "throughput": 8649.38, "total_tokens": 76732648} +{"current_steps": 113850, "total_steps": 204665, "loss": 0.0977, "lr": 9.781195142522175e-07, "epoch": 2.781374441159944, "percentage": 55.63, "elapsed_time": "2:27:51", "remaining_time": "1:57:56", "throughput": 8649.42, "total_tokens": 76736104} +{"current_steps": 113855, "total_steps": 204665, "loss": 0.0477, "lr": 9.780342571484174e-07, "epoch": 2.7814965919917913, "percentage": 55.63, "elapsed_time": "2:27:52", "remaining_time": "1:57:56", "throughput": 8649.49, "total_tokens": 76739816} +{"current_steps": 113860, "total_steps": 204665, "loss": 0.0001, "lr": 9.779490002043584e-07, "epoch": 2.7816187428236385, "percentage": 55.63, "elapsed_time": "2:27:52", "remaining_time": "1:57:55", "throughput": 8649.58, "total_tokens": 76743784} +{"current_steps": 113865, "total_steps": 204665, "loss": 0.0001, "lr": 9.778637434206595e-07, "epoch": 2.7817408936554857, "percentage": 55.63, "elapsed_time": "2:27:52", "remaining_time": "1:57:55", "throughput": 8649.66, "total_tokens": 76747496} +{"current_steps": 113870, "total_steps": 204665, "loss": 0.0002, "lr": 9.777784867979422e-07, "epoch": 2.781863044487333, "percentage": 55.64, "elapsed_time": "2:27:53", "remaining_time": "1:57:55", "throughput": 8649.72, "total_tokens": 76751080} +{"current_steps": 113875, "total_steps": 204665, "loss": 0.0002, "lr": 9.776932303368252e-07, "epoch": 2.78198519531918, "percentage": 55.64, "elapsed_time": "2:27:53", "remaining_time": "1:57:54", "throughput": 8649.76, "total_tokens": 76754408} +{"current_steps": 113880, "total_steps": 204665, "loss": 0.0373, "lr": 9.776079740379294e-07, "epoch": 2.7821073461510273, "percentage": 55.64, "elapsed_time": "2:27:53", "remaining_time": "1:57:54", "throughput": 8649.79, "total_tokens": 76757672} +{"current_steps": 113885, "total_steps": 204665, "loss": 0.0002, "lr": 9.77522717901874e-07, "epoch": 2.7822294969828745, "percentage": 55.64, "elapsed_time": "2:27:54", "remaining_time": "1:57:53", "throughput": 8649.87, "total_tokens": 76761448} +{"current_steps": 113890, "total_steps": 204665, "loss": 0.0002, "lr": 9.774374619292796e-07, "epoch": 2.7823516478147217, "percentage": 55.65, "elapsed_time": "2:27:54", "remaining_time": "1:57:53", "throughput": 8649.92, "total_tokens": 76764904} +{"current_steps": 113895, "total_steps": 204665, "loss": 0.0001, "lr": 9.773522061207664e-07, "epoch": 2.782473798646569, "percentage": 55.65, "elapsed_time": "2:27:54", "remaining_time": "1:57:53", "throughput": 8649.94, "total_tokens": 76768104} +{"current_steps": 113900, "total_steps": 204665, "loss": 0.0002, "lr": 9.772669504769534e-07, "epoch": 2.782595949478416, "percentage": 55.65, "elapsed_time": "2:27:55", "remaining_time": "1:57:52", "throughput": 8650.0, "total_tokens": 76771752} +{"current_steps": 113905, "total_steps": 204665, "loss": 0.062, "lr": 9.77181694998462e-07, "epoch": 2.7827181003102632, "percentage": 55.65, "elapsed_time": "2:27:55", "remaining_time": "1:57:52", "throughput": 8650.04, "total_tokens": 76775080} +{"current_steps": 113910, "total_steps": 204665, "loss": 0.0001, "lr": 9.77096439685911e-07, "epoch": 2.7828402511421104, "percentage": 55.66, "elapsed_time": "2:27:56", "remaining_time": "1:57:51", "throughput": 8650.08, "total_tokens": 76778408} +{"current_steps": 113915, "total_steps": 204665, "loss": 0.0615, "lr": 9.770111845399209e-07, "epoch": 2.7829624019739576, "percentage": 55.66, "elapsed_time": "2:27:56", "remaining_time": "1:57:51", "throughput": 8650.1, "total_tokens": 76781544} +{"current_steps": 113920, "total_steps": 204665, "loss": 0.0005, "lr": 9.769259295611117e-07, "epoch": 2.7830845528058044, "percentage": 55.66, "elapsed_time": "2:27:56", "remaining_time": "1:57:50", "throughput": 8650.13, "total_tokens": 76784872} +{"current_steps": 113925, "total_steps": 204665, "loss": 0.0311, "lr": 9.768406747501032e-07, "epoch": 2.783206703637652, "percentage": 55.66, "elapsed_time": "2:27:57", "remaining_time": "1:57:50", "throughput": 8650.19, "total_tokens": 76788392} +{"current_steps": 113930, "total_steps": 204665, "loss": 0.0004, "lr": 9.76755420107516e-07, "epoch": 2.7833288544694987, "percentage": 55.67, "elapsed_time": "2:27:57", "remaining_time": "1:57:50", "throughput": 8650.2, "total_tokens": 76791464} +{"current_steps": 113935, "total_steps": 204665, "loss": 0.0001, "lr": 9.76670165633969e-07, "epoch": 2.7834510053013464, "percentage": 55.67, "elapsed_time": "2:27:57", "remaining_time": "1:57:49", "throughput": 8650.22, "total_tokens": 76794664} +{"current_steps": 113940, "total_steps": 204665, "loss": 0.0498, "lr": 9.765849113300833e-07, "epoch": 2.783573156133193, "percentage": 55.67, "elapsed_time": "2:27:58", "remaining_time": "1:57:49", "throughput": 8650.21, "total_tokens": 76797416} +{"current_steps": 113945, "total_steps": 204665, "loss": 0.0002, "lr": 9.76499657196478e-07, "epoch": 2.7836953069650403, "percentage": 55.67, "elapsed_time": "2:27:58", "remaining_time": "1:57:48", "throughput": 8650.24, "total_tokens": 76800680} +{"current_steps": 113950, "total_steps": 204665, "loss": 0.0001, "lr": 9.764144032337738e-07, "epoch": 2.7838174577968875, "percentage": 55.68, "elapsed_time": "2:27:58", "remaining_time": "1:57:48", "throughput": 8650.28, "total_tokens": 76804008} +{"current_steps": 113955, "total_steps": 204665, "loss": 0.0, "lr": 9.763291494425904e-07, "epoch": 2.7839396086287347, "percentage": 55.68, "elapsed_time": "2:27:59", "remaining_time": "1:57:47", "throughput": 8650.31, "total_tokens": 76807336} +{"current_steps": 113960, "total_steps": 204665, "loss": 0.0464, "lr": 9.762438958235479e-07, "epoch": 2.784061759460582, "percentage": 55.68, "elapsed_time": "2:27:59", "remaining_time": "1:57:47", "throughput": 8650.35, "total_tokens": 76810664} +{"current_steps": 113965, "total_steps": 204665, "loss": 0.0434, "lr": 9.76158642377266e-07, "epoch": 2.784183910292429, "percentage": 55.68, "elapsed_time": "2:27:59", "remaining_time": "1:57:47", "throughput": 8650.38, "total_tokens": 76813928} +{"current_steps": 113970, "total_steps": 204665, "loss": 0.0327, "lr": 9.760733891043648e-07, "epoch": 2.7843060611242763, "percentage": 55.69, "elapsed_time": "2:28:00", "remaining_time": "1:57:46", "throughput": 8650.38, "total_tokens": 76816872} +{"current_steps": 113975, "total_steps": 204665, "loss": 0.0003, "lr": 9.759881360054646e-07, "epoch": 2.7844282119561234, "percentage": 55.69, "elapsed_time": "2:28:00", "remaining_time": "1:57:46", "throughput": 8650.39, "total_tokens": 76819880} +{"current_steps": 113980, "total_steps": 204665, "loss": 0.0751, "lr": 9.75902883081185e-07, "epoch": 2.7845503627879706, "percentage": 55.69, "elapsed_time": "2:28:00", "remaining_time": "1:57:45", "throughput": 8650.4, "total_tokens": 76822888} +{"current_steps": 113985, "total_steps": 204665, "loss": 0.0019, "lr": 9.758176303321458e-07, "epoch": 2.784672513619818, "percentage": 55.69, "elapsed_time": "2:28:01", "remaining_time": "1:57:45", "throughput": 8650.44, "total_tokens": 76826216} +{"current_steps": 113990, "total_steps": 204665, "loss": 0.1272, "lr": 9.757323777589678e-07, "epoch": 2.784794664451665, "percentage": 55.7, "elapsed_time": "2:28:01", "remaining_time": "1:57:44", "throughput": 8650.48, "total_tokens": 76829608} +{"current_steps": 113995, "total_steps": 204665, "loss": 0.0002, "lr": 9.7564712536227e-07, "epoch": 2.784916815283512, "percentage": 55.7, "elapsed_time": "2:28:01", "remaining_time": "1:57:44", "throughput": 8650.54, "total_tokens": 76833256} +{"current_steps": 114000, "total_steps": 204665, "loss": 0.0843, "lr": 9.755618731426735e-07, "epoch": 2.7850389661153594, "percentage": 55.7, "elapsed_time": "2:28:02", "remaining_time": "1:57:44", "throughput": 8650.61, "total_tokens": 76836840} +{"current_steps": 114005, "total_steps": 204665, "loss": 0.0657, "lr": 9.754766211007972e-07, "epoch": 2.785161116947206, "percentage": 55.7, "elapsed_time": "2:28:02", "remaining_time": "1:57:43", "throughput": 8650.62, "total_tokens": 76839976} +{"current_steps": 114010, "total_steps": 204665, "loss": 0.049, "lr": 9.753913692372615e-07, "epoch": 2.7852832677790538, "percentage": 55.71, "elapsed_time": "2:28:02", "remaining_time": "1:57:43", "throughput": 8650.64, "total_tokens": 76843112} +{"current_steps": 114015, "total_steps": 204665, "loss": 0.0002, "lr": 9.753061175526867e-07, "epoch": 2.7854054186109005, "percentage": 55.71, "elapsed_time": "2:28:03", "remaining_time": "1:57:42", "throughput": 8650.72, "total_tokens": 76846824} +{"current_steps": 114020, "total_steps": 204665, "loss": 0.0001, "lr": 9.752208660476919e-07, "epoch": 2.785527569442748, "percentage": 55.71, "elapsed_time": "2:28:03", "remaining_time": "1:57:42", "throughput": 8650.78, "total_tokens": 76850472} +{"current_steps": 114025, "total_steps": 204665, "loss": 0.0228, "lr": 9.751356147228982e-07, "epoch": 2.785649720274595, "percentage": 55.71, "elapsed_time": "2:28:04", "remaining_time": "1:57:42", "throughput": 8650.89, "total_tokens": 76854568} +{"current_steps": 114030, "total_steps": 204665, "loss": 0.0465, "lr": 9.750503635789246e-07, "epoch": 2.785771871106442, "percentage": 55.72, "elapsed_time": "2:28:04", "remaining_time": "1:57:41", "throughput": 8650.92, "total_tokens": 76857832} +{"current_steps": 114035, "total_steps": 204665, "loss": 0.0738, "lr": 9.749651126163919e-07, "epoch": 2.7858940219382893, "percentage": 55.72, "elapsed_time": "2:28:04", "remaining_time": "1:57:41", "throughput": 8650.96, "total_tokens": 76861288} +{"current_steps": 114040, "total_steps": 204665, "loss": 0.0677, "lr": 9.74879861835919e-07, "epoch": 2.7860161727701365, "percentage": 55.72, "elapsed_time": "2:28:05", "remaining_time": "1:57:40", "throughput": 8650.98, "total_tokens": 76864360} +{"current_steps": 114045, "total_steps": 204665, "loss": 0.0312, "lr": 9.747946112381266e-07, "epoch": 2.7861383236019837, "percentage": 55.72, "elapsed_time": "2:28:05", "remaining_time": "1:57:40", "throughput": 8651.02, "total_tokens": 76867688} +{"current_steps": 114050, "total_steps": 204665, "loss": 0.0002, "lr": 9.747093608236352e-07, "epoch": 2.786260474433831, "percentage": 55.73, "elapsed_time": "2:28:05", "remaining_time": "1:57:39", "throughput": 8651.08, "total_tokens": 76871272} +{"current_steps": 114055, "total_steps": 204665, "loss": 0.0739, "lr": 9.746241105930634e-07, "epoch": 2.786382625265678, "percentage": 55.73, "elapsed_time": "2:28:06", "remaining_time": "1:57:39", "throughput": 8651.13, "total_tokens": 76874728} +{"current_steps": 114060, "total_steps": 204665, "loss": 0.0991, "lr": 9.745388605470324e-07, "epoch": 2.7865047760975252, "percentage": 55.73, "elapsed_time": "2:28:06", "remaining_time": "1:57:39", "throughput": 8651.13, "total_tokens": 76877736} +{"current_steps": 114065, "total_steps": 204665, "loss": 0.0004, "lr": 9.744536106861615e-07, "epoch": 2.7866269269293724, "percentage": 55.73, "elapsed_time": "2:28:06", "remaining_time": "1:57:38", "throughput": 8651.16, "total_tokens": 76880936} +{"current_steps": 114070, "total_steps": 204665, "loss": 0.0363, "lr": 9.74368361011071e-07, "epoch": 2.7867490777612196, "percentage": 55.73, "elapsed_time": "2:28:07", "remaining_time": "1:57:38", "throughput": 8651.25, "total_tokens": 76884840} +{"current_steps": 114075, "total_steps": 204665, "loss": 0.0005, "lr": 9.742831115223802e-07, "epoch": 2.786871228593067, "percentage": 55.74, "elapsed_time": "2:28:07", "remaining_time": "1:57:37", "throughput": 8651.28, "total_tokens": 76888104} +{"current_steps": 114080, "total_steps": 204665, "loss": 0.0001, "lr": 9.741978622207097e-07, "epoch": 2.786993379424914, "percentage": 55.74, "elapsed_time": "2:28:07", "remaining_time": "1:57:37", "throughput": 8651.32, "total_tokens": 76891560} +{"current_steps": 114085, "total_steps": 204665, "loss": 0.0989, "lr": 9.741126131066796e-07, "epoch": 2.787115530256761, "percentage": 55.74, "elapsed_time": "2:28:08", "remaining_time": "1:57:36", "throughput": 8651.36, "total_tokens": 76894888} +{"current_steps": 114090, "total_steps": 204665, "loss": 0.0002, "lr": 9.740273641809092e-07, "epoch": 2.7872376810886084, "percentage": 55.74, "elapsed_time": "2:28:08", "remaining_time": "1:57:36", "throughput": 8651.4, "total_tokens": 76898280} +{"current_steps": 114095, "total_steps": 204665, "loss": 0.0003, "lr": 9.739421154440192e-07, "epoch": 2.7873598319204556, "percentage": 55.75, "elapsed_time": "2:28:08", "remaining_time": "1:57:36", "throughput": 8651.44, "total_tokens": 76901608} +{"current_steps": 114100, "total_steps": 204665, "loss": 0.0602, "lr": 9.738568668966286e-07, "epoch": 2.7874819827523023, "percentage": 55.75, "elapsed_time": "2:28:09", "remaining_time": "1:57:35", "throughput": 8651.49, "total_tokens": 76905064} +{"current_steps": 114105, "total_steps": 204665, "loss": 0.0537, "lr": 9.737716185393582e-07, "epoch": 2.78760413358415, "percentage": 55.75, "elapsed_time": "2:28:09", "remaining_time": "1:57:35", "throughput": 8651.52, "total_tokens": 76908392} +{"current_steps": 114110, "total_steps": 204665, "loss": 0.0227, "lr": 9.736863703728275e-07, "epoch": 2.7877262844159967, "percentage": 55.75, "elapsed_time": "2:28:09", "remaining_time": "1:57:34", "throughput": 8651.59, "total_tokens": 76911976} +{"current_steps": 114115, "total_steps": 204665, "loss": 0.0002, "lr": 9.736011223976567e-07, "epoch": 2.7878484352478443, "percentage": 55.76, "elapsed_time": "2:28:10", "remaining_time": "1:57:34", "throughput": 8651.62, "total_tokens": 76915304} +{"current_steps": 114120, "total_steps": 204665, "loss": 0.0002, "lr": 9.735158746144657e-07, "epoch": 2.787970586079691, "percentage": 55.76, "elapsed_time": "2:28:10", "remaining_time": "1:57:33", "throughput": 8651.69, "total_tokens": 76919016} +{"current_steps": 114125, "total_steps": 204665, "loss": 0.0003, "lr": 9.734306270238744e-07, "epoch": 2.7880927369115382, "percentage": 55.76, "elapsed_time": "2:28:10", "remaining_time": "1:57:33", "throughput": 8651.69, "total_tokens": 76921896} +{"current_steps": 114130, "total_steps": 204665, "loss": 0.0425, "lr": 9.733453796265029e-07, "epoch": 2.7882148877433854, "percentage": 55.76, "elapsed_time": "2:28:11", "remaining_time": "1:57:33", "throughput": 8651.73, "total_tokens": 76925224} +{"current_steps": 114135, "total_steps": 204665, "loss": 0.0003, "lr": 9.732601324229704e-07, "epoch": 2.7883370385752326, "percentage": 55.77, "elapsed_time": "2:28:11", "remaining_time": "1:57:32", "throughput": 8651.79, "total_tokens": 76928808} +{"current_steps": 114140, "total_steps": 204665, "loss": 0.0005, "lr": 9.731748854138977e-07, "epoch": 2.78845918940708, "percentage": 55.77, "elapsed_time": "2:28:12", "remaining_time": "1:57:32", "throughput": 8651.83, "total_tokens": 76932200} +{"current_steps": 114145, "total_steps": 204665, "loss": 0.0001, "lr": 9.730896385999045e-07, "epoch": 2.788581340238927, "percentage": 55.77, "elapsed_time": "2:28:12", "remaining_time": "1:57:31", "throughput": 8651.87, "total_tokens": 76935528} +{"current_steps": 114150, "total_steps": 204665, "loss": 0.0325, "lr": 9.730043919816104e-07, "epoch": 2.788703491070774, "percentage": 55.77, "elapsed_time": "2:28:12", "remaining_time": "1:57:31", "throughput": 8651.87, "total_tokens": 76938536} +{"current_steps": 114155, "total_steps": 204665, "loss": 0.0001, "lr": 9.72919145559636e-07, "epoch": 2.7888256419026214, "percentage": 55.78, "elapsed_time": "2:28:13", "remaining_time": "1:57:31", "throughput": 8651.94, "total_tokens": 76942248} +{"current_steps": 114160, "total_steps": 204665, "loss": 0.0003, "lr": 9.728338993346007e-07, "epoch": 2.7889477927344686, "percentage": 55.78, "elapsed_time": "2:28:13", "remaining_time": "1:57:30", "throughput": 8651.96, "total_tokens": 76945448} +{"current_steps": 114165, "total_steps": 204665, "loss": 0.0501, "lr": 9.727486533071248e-07, "epoch": 2.7890699435663158, "percentage": 55.78, "elapsed_time": "2:28:13", "remaining_time": "1:57:30", "throughput": 8652.04, "total_tokens": 76949160} +{"current_steps": 114170, "total_steps": 204665, "loss": 0.128, "lr": 9.72663407477828e-07, "epoch": 2.789192094398163, "percentage": 55.78, "elapsed_time": "2:28:14", "remaining_time": "1:57:29", "throughput": 8652.05, "total_tokens": 76952232} +{"current_steps": 114175, "total_steps": 204665, "loss": 0.0003, "lr": 9.7257816184733e-07, "epoch": 2.78931424523001, "percentage": 55.79, "elapsed_time": "2:28:14", "remaining_time": "1:57:29", "throughput": 8652.07, "total_tokens": 76955368} +{"current_steps": 114180, "total_steps": 204665, "loss": 0.0002, "lr": 9.724929164162512e-07, "epoch": 2.7894363960618573, "percentage": 55.79, "elapsed_time": "2:28:14", "remaining_time": "1:57:28", "throughput": 8652.09, "total_tokens": 76958568} +{"current_steps": 114185, "total_steps": 204665, "loss": 0.0391, "lr": 9.72407671185211e-07, "epoch": 2.789558546893704, "percentage": 55.79, "elapsed_time": "2:28:15", "remaining_time": "1:57:28", "throughput": 8652.17, "total_tokens": 76962344} +{"current_steps": 114190, "total_steps": 204665, "loss": 0.0002, "lr": 9.7232242615483e-07, "epoch": 2.7896806977255517, "percentage": 55.79, "elapsed_time": "2:28:15", "remaining_time": "1:57:28", "throughput": 8652.22, "total_tokens": 76965864} +{"current_steps": 114195, "total_steps": 204665, "loss": 0.1431, "lr": 9.722371813257274e-07, "epoch": 2.7898028485573985, "percentage": 55.8, "elapsed_time": "2:28:15", "remaining_time": "1:57:27", "throughput": 8652.25, "total_tokens": 76969064} +{"current_steps": 114200, "total_steps": 204665, "loss": 0.0559, "lr": 9.721519366985234e-07, "epoch": 2.789924999389246, "percentage": 55.8, "elapsed_time": "2:28:16", "remaining_time": "1:57:27", "throughput": 8652.32, "total_tokens": 76972840} +{"current_steps": 114205, "total_steps": 204665, "loss": 0.0503, "lr": 9.720666922738386e-07, "epoch": 2.790047150221093, "percentage": 55.8, "elapsed_time": "2:28:16", "remaining_time": "1:57:26", "throughput": 8652.34, "total_tokens": 76975976} +{"current_steps": 114210, "total_steps": 204665, "loss": 0.0392, "lr": 9.719814480522918e-07, "epoch": 2.79016930105294, "percentage": 55.8, "elapsed_time": "2:28:16", "remaining_time": "1:57:26", "throughput": 8652.39, "total_tokens": 76979496} +{"current_steps": 114215, "total_steps": 204665, "loss": 0.0003, "lr": 9.718962040345038e-07, "epoch": 2.790291451884787, "percentage": 55.81, "elapsed_time": "2:28:17", "remaining_time": "1:57:25", "throughput": 8652.44, "total_tokens": 76982952} +{"current_steps": 114220, "total_steps": 204665, "loss": 0.0005, "lr": 9.718109602210941e-07, "epoch": 2.7904136027166344, "percentage": 55.81, "elapsed_time": "2:28:17", "remaining_time": "1:57:25", "throughput": 8652.49, "total_tokens": 76986408} +{"current_steps": 114225, "total_steps": 204665, "loss": 0.0251, "lr": 9.717257166126827e-07, "epoch": 2.7905357535484816, "percentage": 55.81, "elapsed_time": "2:28:17", "remaining_time": "1:57:25", "throughput": 8652.5, "total_tokens": 76989480} +{"current_steps": 114230, "total_steps": 204665, "loss": 0.1115, "lr": 9.716404732098894e-07, "epoch": 2.790657904380329, "percentage": 55.81, "elapsed_time": "2:28:18", "remaining_time": "1:57:24", "throughput": 8652.56, "total_tokens": 76993064} +{"current_steps": 114235, "total_steps": 204665, "loss": 0.0002, "lr": 9.71555230013334e-07, "epoch": 2.790780055212176, "percentage": 55.82, "elapsed_time": "2:28:18", "remaining_time": "1:57:24", "throughput": 8652.59, "total_tokens": 76996264} +{"current_steps": 114240, "total_steps": 204665, "loss": 0.0004, "lr": 9.71469987023637e-07, "epoch": 2.790902206044023, "percentage": 55.82, "elapsed_time": "2:28:18", "remaining_time": "1:57:23", "throughput": 8652.67, "total_tokens": 77000040} +{"current_steps": 114245, "total_steps": 204665, "loss": 0.0002, "lr": 9.713847442414174e-07, "epoch": 2.7910243568758704, "percentage": 55.82, "elapsed_time": "2:28:19", "remaining_time": "1:57:23", "throughput": 8652.71, "total_tokens": 77003496} +{"current_steps": 114250, "total_steps": 204665, "loss": 0.106, "lr": 9.712995016672963e-07, "epoch": 2.7911465077077176, "percentage": 55.82, "elapsed_time": "2:28:19", "remaining_time": "1:57:23", "throughput": 8652.76, "total_tokens": 77006952} +{"current_steps": 114255, "total_steps": 204665, "loss": 0.0004, "lr": 9.712142593018926e-07, "epoch": 2.7912686585395647, "percentage": 55.83, "elapsed_time": "2:28:20", "remaining_time": "1:57:22", "throughput": 8652.82, "total_tokens": 77010536} +{"current_steps": 114260, "total_steps": 204665, "loss": 0.1619, "lr": 9.711290171458265e-07, "epoch": 2.791390809371412, "percentage": 55.83, "elapsed_time": "2:28:20", "remaining_time": "1:57:22", "throughput": 8652.89, "total_tokens": 77014248} +{"current_steps": 114265, "total_steps": 204665, "loss": 0.0399, "lr": 9.71043775199718e-07, "epoch": 2.791512960203259, "percentage": 55.83, "elapsed_time": "2:28:20", "remaining_time": "1:57:21", "throughput": 8652.95, "total_tokens": 77017896} +{"current_steps": 114270, "total_steps": 204665, "loss": 0.0002, "lr": 9.70958533464187e-07, "epoch": 2.7916351110351063, "percentage": 55.83, "elapsed_time": "2:28:21", "remaining_time": "1:57:21", "throughput": 8653.0, "total_tokens": 77021416} +{"current_steps": 114275, "total_steps": 204665, "loss": 0.0003, "lr": 9.708732919398534e-07, "epoch": 2.7917572618669535, "percentage": 55.84, "elapsed_time": "2:28:21", "remaining_time": "1:57:20", "throughput": 8653.07, "total_tokens": 77025064} +{"current_steps": 114280, "total_steps": 204665, "loss": 0.0005, "lr": 9.707880506273369e-07, "epoch": 2.7918794126988002, "percentage": 55.84, "elapsed_time": "2:28:21", "remaining_time": "1:57:20", "throughput": 8653.12, "total_tokens": 77028520} +{"current_steps": 114285, "total_steps": 204665, "loss": 0.0312, "lr": 9.70702809527258e-07, "epoch": 2.792001563530648, "percentage": 55.84, "elapsed_time": "2:28:22", "remaining_time": "1:57:20", "throughput": 8653.15, "total_tokens": 77031784} +{"current_steps": 114290, "total_steps": 204665, "loss": 0.0003, "lr": 9.706175686402354e-07, "epoch": 2.7921237143624946, "percentage": 55.84, "elapsed_time": "2:28:22", "remaining_time": "1:57:19", "throughput": 8653.15, "total_tokens": 77034792} +{"current_steps": 114295, "total_steps": 204665, "loss": 0.0019, "lr": 9.7053232796689e-07, "epoch": 2.792245865194342, "percentage": 55.84, "elapsed_time": "2:28:22", "remaining_time": "1:57:19", "throughput": 8653.17, "total_tokens": 77037992} +{"current_steps": 114300, "total_steps": 204665, "loss": 0.0002, "lr": 9.704470875078419e-07, "epoch": 2.792368016026189, "percentage": 55.85, "elapsed_time": "2:28:23", "remaining_time": "1:57:18", "throughput": 8653.21, "total_tokens": 77041320} +{"current_steps": 114305, "total_steps": 204665, "loss": 0.0492, "lr": 9.7036184726371e-07, "epoch": 2.792490166858036, "percentage": 55.85, "elapsed_time": "2:28:23", "remaining_time": "1:57:18", "throughput": 8653.25, "total_tokens": 77044712} +{"current_steps": 114310, "total_steps": 204665, "loss": 0.001, "lr": 9.70276607235115e-07, "epoch": 2.7926123176898834, "percentage": 55.85, "elapsed_time": "2:28:23", "remaining_time": "1:57:17", "throughput": 8653.26, "total_tokens": 77047720} +{"current_steps": 114315, "total_steps": 204665, "loss": 0.0556, "lr": 9.701913674226764e-07, "epoch": 2.7927344685217306, "percentage": 55.85, "elapsed_time": "2:28:24", "remaining_time": "1:57:17", "throughput": 8653.3, "total_tokens": 77051048} +{"current_steps": 114320, "total_steps": 204665, "loss": 0.0003, "lr": 9.701061278270143e-07, "epoch": 2.7928566193535778, "percentage": 55.86, "elapsed_time": "2:28:24", "remaining_time": "1:57:17", "throughput": 8653.35, "total_tokens": 77054504} +{"current_steps": 114325, "total_steps": 204665, "loss": 0.0002, "lr": 9.700208884487485e-07, "epoch": 2.792978770185425, "percentage": 55.86, "elapsed_time": "2:28:24", "remaining_time": "1:57:16", "throughput": 8653.37, "total_tokens": 77057704} +{"current_steps": 114330, "total_steps": 204665, "loss": 0.005, "lr": 9.699356492884986e-07, "epoch": 2.793100921017272, "percentage": 55.86, "elapsed_time": "2:28:25", "remaining_time": "1:57:16", "throughput": 8653.41, "total_tokens": 77061096} +{"current_steps": 114335, "total_steps": 204665, "loss": 0.0504, "lr": 9.698504103468851e-07, "epoch": 2.7932230718491193, "percentage": 55.86, "elapsed_time": "2:28:25", "remaining_time": "1:57:15", "throughput": 8653.44, "total_tokens": 77064424} +{"current_steps": 114340, "total_steps": 204665, "loss": 0.0347, "lr": 9.697651716245271e-07, "epoch": 2.7933452226809665, "percentage": 55.87, "elapsed_time": "2:28:25", "remaining_time": "1:57:15", "throughput": 8653.47, "total_tokens": 77067688} +{"current_steps": 114345, "total_steps": 204665, "loss": 0.0002, "lr": 9.696799331220453e-07, "epoch": 2.7934673735128137, "percentage": 55.87, "elapsed_time": "2:28:26", "remaining_time": "1:57:15", "throughput": 8653.51, "total_tokens": 77071016} +{"current_steps": 114350, "total_steps": 204665, "loss": 0.0, "lr": 9.69594694840059e-07, "epoch": 2.793589524344661, "percentage": 55.87, "elapsed_time": "2:28:26", "remaining_time": "1:57:14", "throughput": 8653.55, "total_tokens": 77074408} +{"current_steps": 114355, "total_steps": 204665, "loss": 0.0513, "lr": 9.69509456779188e-07, "epoch": 2.793711675176508, "percentage": 55.87, "elapsed_time": "2:28:27", "remaining_time": "1:57:14", "throughput": 8653.6, "total_tokens": 77077800} +{"current_steps": 114360, "total_steps": 204665, "loss": 0.0007, "lr": 9.694242189400528e-07, "epoch": 2.7938338260083553, "percentage": 55.88, "elapsed_time": "2:28:27", "remaining_time": "1:57:13", "throughput": 8653.63, "total_tokens": 77081128} +{"current_steps": 114365, "total_steps": 204665, "loss": 0.0006, "lr": 9.693389813232727e-07, "epoch": 2.793955976840202, "percentage": 55.88, "elapsed_time": "2:28:27", "remaining_time": "1:57:13", "throughput": 8653.66, "total_tokens": 77084328} +{"current_steps": 114370, "total_steps": 204665, "loss": 0.0002, "lr": 9.69253743929468e-07, "epoch": 2.7940781276720497, "percentage": 55.88, "elapsed_time": "2:28:28", "remaining_time": "1:57:12", "throughput": 8653.64, "total_tokens": 77087080} +{"current_steps": 114375, "total_steps": 204665, "loss": 0.0002, "lr": 9.691685067592584e-07, "epoch": 2.7942002785038964, "percentage": 55.88, "elapsed_time": "2:28:28", "remaining_time": "1:57:12", "throughput": 8653.67, "total_tokens": 77090344} +{"current_steps": 114380, "total_steps": 204665, "loss": 0.0001, "lr": 9.690832698132636e-07, "epoch": 2.794322429335744, "percentage": 55.89, "elapsed_time": "2:28:28", "remaining_time": "1:57:12", "throughput": 8653.72, "total_tokens": 77093864} +{"current_steps": 114385, "total_steps": 204665, "loss": 0.0002, "lr": 9.689980330921035e-07, "epoch": 2.794444580167591, "percentage": 55.89, "elapsed_time": "2:28:29", "remaining_time": "1:57:11", "throughput": 8653.77, "total_tokens": 77097384} +{"current_steps": 114390, "total_steps": 204665, "loss": 0.0001, "lr": 9.689127965963978e-07, "epoch": 2.794566730999438, "percentage": 55.89, "elapsed_time": "2:28:29", "remaining_time": "1:57:11", "throughput": 8653.83, "total_tokens": 77100904} +{"current_steps": 114395, "total_steps": 204665, "loss": 0.1179, "lr": 9.68827560326767e-07, "epoch": 2.794688881831285, "percentage": 55.89, "elapsed_time": "2:28:29", "remaining_time": "1:57:10", "throughput": 8653.86, "total_tokens": 77104168} +{"current_steps": 114400, "total_steps": 204665, "loss": 0.0004, "lr": 9.687423242838303e-07, "epoch": 2.7948110326631324, "percentage": 55.9, "elapsed_time": "2:28:30", "remaining_time": "1:57:10", "throughput": 8653.87, "total_tokens": 77107304} +{"current_steps": 114405, "total_steps": 204665, "loss": 0.0501, "lr": 9.686570884682082e-07, "epoch": 2.7949331834949795, "percentage": 55.9, "elapsed_time": "2:28:30", "remaining_time": "1:57:09", "throughput": 8653.88, "total_tokens": 77110312} +{"current_steps": 114410, "total_steps": 204665, "loss": 0.0001, "lr": 9.685718528805199e-07, "epoch": 2.7950553343268267, "percentage": 55.9, "elapsed_time": "2:28:30", "remaining_time": "1:57:09", "throughput": 8653.94, "total_tokens": 77113896} +{"current_steps": 114415, "total_steps": 204665, "loss": 0.0002, "lr": 9.684866175213856e-07, "epoch": 2.795177485158674, "percentage": 55.9, "elapsed_time": "2:28:31", "remaining_time": "1:57:09", "throughput": 8653.99, "total_tokens": 77117352} +{"current_steps": 114420, "total_steps": 204665, "loss": 0.0001, "lr": 9.68401382391425e-07, "epoch": 2.795299635990521, "percentage": 55.91, "elapsed_time": "2:28:31", "remaining_time": "1:57:08", "throughput": 8654.03, "total_tokens": 77120744} +{"current_steps": 114425, "total_steps": 204665, "loss": 0.0006, "lr": 9.68316147491258e-07, "epoch": 2.7954217868223683, "percentage": 55.91, "elapsed_time": "2:28:31", "remaining_time": "1:57:08", "throughput": 8654.07, "total_tokens": 77124200} +{"current_steps": 114430, "total_steps": 204665, "loss": 0.1171, "lr": 9.682309128215047e-07, "epoch": 2.7955439376542155, "percentage": 55.91, "elapsed_time": "2:28:32", "remaining_time": "1:57:07", "throughput": 8654.13, "total_tokens": 77127720} +{"current_steps": 114435, "total_steps": 204665, "loss": 0.0003, "lr": 9.681456783827848e-07, "epoch": 2.7956660884860627, "percentage": 55.91, "elapsed_time": "2:28:32", "remaining_time": "1:57:07", "throughput": 8654.16, "total_tokens": 77131048} +{"current_steps": 114440, "total_steps": 204665, "loss": 0.0001, "lr": 9.68060444175718e-07, "epoch": 2.79578823931791, "percentage": 55.92, "elapsed_time": "2:28:32", "remaining_time": "1:57:07", "throughput": 8654.24, "total_tokens": 77134760} +{"current_steps": 114445, "total_steps": 204665, "loss": 0.0002, "lr": 9.67975210200924e-07, "epoch": 2.795910390149757, "percentage": 55.92, "elapsed_time": "2:28:33", "remaining_time": "1:57:06", "throughput": 8654.27, "total_tokens": 77138088} +{"current_steps": 114450, "total_steps": 204665, "loss": 0.066, "lr": 9.67889976459023e-07, "epoch": 2.796032540981604, "percentage": 55.92, "elapsed_time": "2:28:33", "remaining_time": "1:57:06", "throughput": 8654.3, "total_tokens": 77141352} +{"current_steps": 114455, "total_steps": 204665, "loss": 0.0845, "lr": 9.678047429506352e-07, "epoch": 2.7961546918134514, "percentage": 55.92, "elapsed_time": "2:28:33", "remaining_time": "1:57:05", "throughput": 8654.37, "total_tokens": 77145064} +{"current_steps": 114460, "total_steps": 204665, "loss": 0.0015, "lr": 9.677195096763791e-07, "epoch": 2.796276842645298, "percentage": 55.93, "elapsed_time": "2:28:34", "remaining_time": "1:57:05", "throughput": 8654.38, "total_tokens": 77148072} +{"current_steps": 114465, "total_steps": 204665, "loss": 0.0003, "lr": 9.676342766368763e-07, "epoch": 2.796398993477146, "percentage": 55.93, "elapsed_time": "2:28:34", "remaining_time": "1:57:04", "throughput": 8654.4, "total_tokens": 77151272} +{"current_steps": 114470, "total_steps": 204665, "loss": 0.0525, "lr": 9.67549043832745e-07, "epoch": 2.7965211443089926, "percentage": 55.93, "elapsed_time": "2:28:35", "remaining_time": "1:57:04", "throughput": 8654.44, "total_tokens": 77154664} +{"current_steps": 114475, "total_steps": 204665, "loss": 0.1032, "lr": 9.674638112646065e-07, "epoch": 2.7966432951408398, "percentage": 55.93, "elapsed_time": "2:28:35", "remaining_time": "1:57:04", "throughput": 8654.48, "total_tokens": 77157992} +{"current_steps": 114480, "total_steps": 204665, "loss": 0.0004, "lr": 9.673785789330795e-07, "epoch": 2.796765445972687, "percentage": 55.94, "elapsed_time": "2:28:35", "remaining_time": "1:57:03", "throughput": 8654.54, "total_tokens": 77161640} +{"current_steps": 114485, "total_steps": 204665, "loss": 0.0006, "lr": 9.67293346838784e-07, "epoch": 2.796887596804534, "percentage": 55.94, "elapsed_time": "2:28:36", "remaining_time": "1:57:03", "throughput": 8654.58, "total_tokens": 77164968} +{"current_steps": 114490, "total_steps": 204665, "loss": 0.0677, "lr": 9.672081149823406e-07, "epoch": 2.7970097476363813, "percentage": 55.94, "elapsed_time": "2:28:36", "remaining_time": "1:57:02", "throughput": 8654.63, "total_tokens": 77168488} +{"current_steps": 114495, "total_steps": 204665, "loss": 0.0006, "lr": 9.671228833643683e-07, "epoch": 2.7971318984682285, "percentage": 55.94, "elapsed_time": "2:28:36", "remaining_time": "1:57:02", "throughput": 8654.66, "total_tokens": 77171816} +{"current_steps": 114500, "total_steps": 204665, "loss": 0.0961, "lr": 9.670376519854874e-07, "epoch": 2.7972540493000757, "percentage": 55.95, "elapsed_time": "2:28:37", "remaining_time": "1:57:01", "throughput": 8654.78, "total_tokens": 77176040} +{"current_steps": 114505, "total_steps": 204665, "loss": 0.0001, "lr": 9.669524208463172e-07, "epoch": 2.797376200131923, "percentage": 55.95, "elapsed_time": "2:28:37", "remaining_time": "1:57:01", "throughput": 8654.84, "total_tokens": 77179624} +{"current_steps": 114510, "total_steps": 204665, "loss": 0.0786, "lr": 9.66867189947478e-07, "epoch": 2.79749835096377, "percentage": 55.95, "elapsed_time": "2:28:37", "remaining_time": "1:57:01", "throughput": 8654.9, "total_tokens": 77183208} +{"current_steps": 114515, "total_steps": 204665, "loss": 0.1302, "lr": 9.667819592895899e-07, "epoch": 2.7976205017956173, "percentage": 55.95, "elapsed_time": "2:28:38", "remaining_time": "1:57:00", "throughput": 8654.92, "total_tokens": 77186408} +{"current_steps": 114520, "total_steps": 204665, "loss": 0.0002, "lr": 9.666967288732719e-07, "epoch": 2.7977426526274645, "percentage": 55.95, "elapsed_time": "2:28:38", "remaining_time": "1:57:00", "throughput": 8654.98, "total_tokens": 77189992} +{"current_steps": 114525, "total_steps": 204665, "loss": 0.0535, "lr": 9.666114986991446e-07, "epoch": 2.7978648034593117, "percentage": 55.96, "elapsed_time": "2:28:38", "remaining_time": "1:56:59", "throughput": 8654.99, "total_tokens": 77193064} +{"current_steps": 114530, "total_steps": 204665, "loss": 0.0011, "lr": 9.665262687678273e-07, "epoch": 2.797986954291159, "percentage": 55.96, "elapsed_time": "2:28:39", "remaining_time": "1:56:59", "throughput": 8655.01, "total_tokens": 77196200} +{"current_steps": 114535, "total_steps": 204665, "loss": 0.0468, "lr": 9.6644103907994e-07, "epoch": 2.798109105123006, "percentage": 55.96, "elapsed_time": "2:28:39", "remaining_time": "1:56:59", "throughput": 8655.04, "total_tokens": 77199464} +{"current_steps": 114540, "total_steps": 204665, "loss": 0.0001, "lr": 9.663558096361023e-07, "epoch": 2.7982312559548532, "percentage": 55.96, "elapsed_time": "2:28:39", "remaining_time": "1:56:58", "throughput": 8655.08, "total_tokens": 77202792} +{"current_steps": 114545, "total_steps": 204665, "loss": 0.1592, "lr": 9.662705804369343e-07, "epoch": 2.7983534067867, "percentage": 55.97, "elapsed_time": "2:28:40", "remaining_time": "1:56:58", "throughput": 8655.13, "total_tokens": 77206248} +{"current_steps": 114550, "total_steps": 204665, "loss": 0.0375, "lr": 9.66185351483056e-07, "epoch": 2.7984755576185476, "percentage": 55.97, "elapsed_time": "2:28:40", "remaining_time": "1:56:57", "throughput": 8655.16, "total_tokens": 77209512} +{"current_steps": 114555, "total_steps": 204665, "loss": 0.0478, "lr": 9.661001227750864e-07, "epoch": 2.7985977084503944, "percentage": 55.97, "elapsed_time": "2:28:40", "remaining_time": "1:56:57", "throughput": 8655.18, "total_tokens": 77212648} +{"current_steps": 114560, "total_steps": 204665, "loss": 0.1093, "lr": 9.660148943136465e-07, "epoch": 2.798719859282242, "percentage": 55.97, "elapsed_time": "2:28:41", "remaining_time": "1:56:56", "throughput": 8655.19, "total_tokens": 77215784} +{"current_steps": 114565, "total_steps": 204665, "loss": 0.0492, "lr": 9.659296660993548e-07, "epoch": 2.7988420101140887, "percentage": 55.98, "elapsed_time": "2:28:41", "remaining_time": "1:56:56", "throughput": 8655.29, "total_tokens": 77219752} +{"current_steps": 114570, "total_steps": 204665, "loss": 0.0809, "lr": 9.65844438132832e-07, "epoch": 2.798964160945936, "percentage": 55.98, "elapsed_time": "2:28:42", "remaining_time": "1:56:56", "throughput": 8655.34, "total_tokens": 77223208} +{"current_steps": 114575, "total_steps": 204665, "loss": 0.045, "lr": 9.657592104146976e-07, "epoch": 2.799086311777783, "percentage": 55.98, "elapsed_time": "2:28:42", "remaining_time": "1:56:55", "throughput": 8655.35, "total_tokens": 77226216} +{"current_steps": 114580, "total_steps": 204665, "loss": 0.0002, "lr": 9.656739829455712e-07, "epoch": 2.7992084626096303, "percentage": 55.98, "elapsed_time": "2:28:42", "remaining_time": "1:56:55", "throughput": 8655.43, "total_tokens": 77229928} +{"current_steps": 114585, "total_steps": 204665, "loss": 0.0017, "lr": 9.655887557260731e-07, "epoch": 2.7993306134414775, "percentage": 55.99, "elapsed_time": "2:28:43", "remaining_time": "1:56:54", "throughput": 8655.47, "total_tokens": 77233384} +{"current_steps": 114590, "total_steps": 204665, "loss": 0.0002, "lr": 9.655035287568229e-07, "epoch": 2.7994527642733247, "percentage": 55.99, "elapsed_time": "2:28:43", "remaining_time": "1:56:54", "throughput": 8655.56, "total_tokens": 77237288} +{"current_steps": 114595, "total_steps": 204665, "loss": 0.0004, "lr": 9.654183020384405e-07, "epoch": 2.799574915105172, "percentage": 55.99, "elapsed_time": "2:28:43", "remaining_time": "1:56:53", "throughput": 8655.59, "total_tokens": 77240552} +{"current_steps": 114600, "total_steps": 204665, "loss": 0.0413, "lr": 9.65333075571545e-07, "epoch": 2.799697065937019, "percentage": 55.99, "elapsed_time": "2:28:44", "remaining_time": "1:56:53", "throughput": 8655.61, "total_tokens": 77243688} +{"current_steps": 114605, "total_steps": 204665, "loss": 0.1785, "lr": 9.652478493567566e-07, "epoch": 2.7998192167688662, "percentage": 56.0, "elapsed_time": "2:28:44", "remaining_time": "1:56:53", "throughput": 8655.63, "total_tokens": 77246824} +{"current_steps": 114610, "total_steps": 204665, "loss": 0.0537, "lr": 9.651626233946959e-07, "epoch": 2.7999413676007134, "percentage": 56.0, "elapsed_time": "2:28:44", "remaining_time": "1:56:52", "throughput": 8655.64, "total_tokens": 77249896} +{"current_steps": 114615, "total_steps": 204665, "loss": 0.0716, "lr": 9.650773976859812e-07, "epoch": 2.8000635184325606, "percentage": 56.0, "elapsed_time": "2:28:45", "remaining_time": "1:56:52", "throughput": 8655.65, "total_tokens": 77252968} +{"current_steps": 114620, "total_steps": 204665, "loss": 0.0002, "lr": 9.649921722312337e-07, "epoch": 2.800185669264408, "percentage": 56.0, "elapsed_time": "2:28:45", "remaining_time": "1:56:51", "throughput": 8655.71, "total_tokens": 77256552} +{"current_steps": 114625, "total_steps": 204665, "loss": 0.0003, "lr": 9.64906947031072e-07, "epoch": 2.800307820096255, "percentage": 56.01, "elapsed_time": "2:28:45", "remaining_time": "1:56:51", "throughput": 8655.76, "total_tokens": 77259944} +{"current_steps": 114630, "total_steps": 204665, "loss": 0.0004, "lr": 9.64821722086117e-07, "epoch": 2.8004299709281018, "percentage": 56.01, "elapsed_time": "2:28:46", "remaining_time": "1:56:50", "throughput": 8655.77, "total_tokens": 77263080} +{"current_steps": 114635, "total_steps": 204665, "loss": 0.0006, "lr": 9.647364973969876e-07, "epoch": 2.8005521217599494, "percentage": 56.01, "elapsed_time": "2:28:46", "remaining_time": "1:56:50", "throughput": 8655.8, "total_tokens": 77266344} +{"current_steps": 114640, "total_steps": 204665, "loss": 0.0404, "lr": 9.646512729643037e-07, "epoch": 2.800674272591796, "percentage": 56.01, "elapsed_time": "2:28:46", "remaining_time": "1:56:50", "throughput": 8655.85, "total_tokens": 77269800} +{"current_steps": 114645, "total_steps": 204665, "loss": 0.0002, "lr": 9.645660487886856e-07, "epoch": 2.8007964234236438, "percentage": 56.02, "elapsed_time": "2:28:47", "remaining_time": "1:56:49", "throughput": 8655.89, "total_tokens": 77273128} +{"current_steps": 114650, "total_steps": 204665, "loss": 0.0002, "lr": 9.644808248707523e-07, "epoch": 2.8009185742554905, "percentage": 56.02, "elapsed_time": "2:28:47", "remaining_time": "1:56:49", "throughput": 8655.92, "total_tokens": 77276328} +{"current_steps": 114655, "total_steps": 204665, "loss": 0.0009, "lr": 9.643956012111247e-07, "epoch": 2.8010407250873377, "percentage": 56.02, "elapsed_time": "2:28:47", "remaining_time": "1:56:48", "throughput": 8655.98, "total_tokens": 77279976} +{"current_steps": 114660, "total_steps": 204665, "loss": 0.0442, "lr": 9.64310377810421e-07, "epoch": 2.801162875919185, "percentage": 56.02, "elapsed_time": "2:28:48", "remaining_time": "1:56:48", "throughput": 8656.01, "total_tokens": 77283176} +{"current_steps": 114665, "total_steps": 204665, "loss": 0.0006, "lr": 9.642251546692621e-07, "epoch": 2.801285026751032, "percentage": 56.03, "elapsed_time": "2:28:48", "remaining_time": "1:56:48", "throughput": 8656.02, "total_tokens": 77286312} +{"current_steps": 114670, "total_steps": 204665, "loss": 0.0478, "lr": 9.641399317882678e-07, "epoch": 2.8014071775828793, "percentage": 56.03, "elapsed_time": "2:28:48", "remaining_time": "1:56:47", "throughput": 8656.07, "total_tokens": 77289704} +{"current_steps": 114675, "total_steps": 204665, "loss": 0.0001, "lr": 9.640547091680572e-07, "epoch": 2.8015293284147265, "percentage": 56.03, "elapsed_time": "2:28:49", "remaining_time": "1:56:47", "throughput": 8656.14, "total_tokens": 77293416} +{"current_steps": 114680, "total_steps": 204665, "loss": 0.0008, "lr": 9.639694868092509e-07, "epoch": 2.8016514792465737, "percentage": 56.03, "elapsed_time": "2:28:49", "remaining_time": "1:56:46", "throughput": 8656.21, "total_tokens": 77297128} +{"current_steps": 114685, "total_steps": 204665, "loss": 0.137, "lr": 9.638842647124679e-07, "epoch": 2.801773630078421, "percentage": 56.04, "elapsed_time": "2:28:50", "remaining_time": "1:56:46", "throughput": 8656.28, "total_tokens": 77300904} +{"current_steps": 114690, "total_steps": 204665, "loss": 0.0002, "lr": 9.637990428783282e-07, "epoch": 2.801895780910268, "percentage": 56.04, "elapsed_time": "2:28:50", "remaining_time": "1:56:45", "throughput": 8656.36, "total_tokens": 77304680} +{"current_steps": 114695, "total_steps": 204665, "loss": 0.0002, "lr": 9.637138213074516e-07, "epoch": 2.802017931742115, "percentage": 56.04, "elapsed_time": "2:28:50", "remaining_time": "1:56:45", "throughput": 8656.38, "total_tokens": 77307816} +{"current_steps": 114700, "total_steps": 204665, "loss": 0.0464, "lr": 9.636286000004578e-07, "epoch": 2.8021400825739624, "percentage": 56.04, "elapsed_time": "2:28:51", "remaining_time": "1:56:45", "throughput": 8656.4, "total_tokens": 77311016} +{"current_steps": 114705, "total_steps": 204665, "loss": 0.0951, "lr": 9.63543378957967e-07, "epoch": 2.8022622334058096, "percentage": 56.05, "elapsed_time": "2:28:51", "remaining_time": "1:56:44", "throughput": 8656.42, "total_tokens": 77314152} +{"current_steps": 114710, "total_steps": 204665, "loss": 0.0961, "lr": 9.63458158180598e-07, "epoch": 2.802384384237657, "percentage": 56.05, "elapsed_time": "2:28:51", "remaining_time": "1:56:44", "throughput": 8656.52, "total_tokens": 77318184} +{"current_steps": 114715, "total_steps": 204665, "loss": 0.0001, "lr": 9.633729376689715e-07, "epoch": 2.802506535069504, "percentage": 56.05, "elapsed_time": "2:28:52", "remaining_time": "1:56:43", "throughput": 8656.55, "total_tokens": 77321448} +{"current_steps": 114720, "total_steps": 204665, "loss": 0.0527, "lr": 9.632877174237066e-07, "epoch": 2.802628685901351, "percentage": 56.05, "elapsed_time": "2:28:52", "remaining_time": "1:56:43", "throughput": 8656.57, "total_tokens": 77324584} +{"current_steps": 114725, "total_steps": 204665, "loss": 0.0005, "lr": 9.632024974454233e-07, "epoch": 2.802750836733198, "percentage": 56.06, "elapsed_time": "2:28:52", "remaining_time": "1:56:43", "throughput": 8656.67, "total_tokens": 77328680} +{"current_steps": 114730, "total_steps": 204665, "loss": 0.0593, "lr": 9.631172777347414e-07, "epoch": 2.8028729875650455, "percentage": 56.06, "elapsed_time": "2:28:53", "remaining_time": "1:56:42", "throughput": 8656.69, "total_tokens": 77331816} +{"current_steps": 114735, "total_steps": 204665, "loss": 0.139, "lr": 9.630320582922805e-07, "epoch": 2.8029951383968923, "percentage": 56.06, "elapsed_time": "2:28:53", "remaining_time": "1:56:42", "throughput": 8656.7, "total_tokens": 77334888} +{"current_steps": 114740, "total_steps": 204665, "loss": 0.0428, "lr": 9.629468391186605e-07, "epoch": 2.8031172892287395, "percentage": 56.06, "elapsed_time": "2:28:53", "remaining_time": "1:56:41", "throughput": 8656.92, "total_tokens": 77340328} +{"current_steps": 114745, "total_steps": 204665, "loss": 0.0409, "lr": 9.628616202145012e-07, "epoch": 2.8032394400605867, "percentage": 56.06, "elapsed_time": "2:28:54", "remaining_time": "1:56:41", "throughput": 8656.94, "total_tokens": 77343464} +{"current_steps": 114750, "total_steps": 204665, "loss": 0.0007, "lr": 9.627764015804223e-07, "epoch": 2.803361590892434, "percentage": 56.07, "elapsed_time": "2:28:54", "remaining_time": "1:56:40", "throughput": 8656.98, "total_tokens": 77346856} +{"current_steps": 114755, "total_steps": 204665, "loss": 0.0529, "lr": 9.62691183217043e-07, "epoch": 2.803483741724281, "percentage": 56.07, "elapsed_time": "2:28:54", "remaining_time": "1:56:40", "throughput": 8657.03, "total_tokens": 77350376} +{"current_steps": 114760, "total_steps": 204665, "loss": 0.0002, "lr": 9.626059651249834e-07, "epoch": 2.8036058925561282, "percentage": 56.07, "elapsed_time": "2:28:55", "remaining_time": "1:56:40", "throughput": 8657.07, "total_tokens": 77353768} +{"current_steps": 114765, "total_steps": 204665, "loss": 0.0332, "lr": 9.625207473048638e-07, "epoch": 2.8037280433879754, "percentage": 56.07, "elapsed_time": "2:28:55", "remaining_time": "1:56:39", "throughput": 8657.12, "total_tokens": 77357224} +{"current_steps": 114770, "total_steps": 204665, "loss": 0.0003, "lr": 9.624355297573028e-07, "epoch": 2.8038501942198226, "percentage": 56.08, "elapsed_time": "2:28:56", "remaining_time": "1:56:39", "throughput": 8657.16, "total_tokens": 77360616} +{"current_steps": 114775, "total_steps": 204665, "loss": 0.0404, "lr": 9.623503124829213e-07, "epoch": 2.80397234505167, "percentage": 56.08, "elapsed_time": "2:28:56", "remaining_time": "1:56:38", "throughput": 8657.19, "total_tokens": 77363816} +{"current_steps": 114780, "total_steps": 204665, "loss": 0.0002, "lr": 9.622650954823378e-07, "epoch": 2.804094495883517, "percentage": 56.08, "elapsed_time": "2:28:56", "remaining_time": "1:56:38", "throughput": 8657.22, "total_tokens": 77367080} +{"current_steps": 114785, "total_steps": 204665, "loss": 0.0003, "lr": 9.621798787561736e-07, "epoch": 2.804216646715364, "percentage": 56.08, "elapsed_time": "2:28:57", "remaining_time": "1:56:37", "throughput": 8657.24, "total_tokens": 77370280} +{"current_steps": 114790, "total_steps": 204665, "loss": 0.0004, "lr": 9.620946623050468e-07, "epoch": 2.8043387975472114, "percentage": 56.09, "elapsed_time": "2:28:57", "remaining_time": "1:56:37", "throughput": 8657.37, "total_tokens": 77374632} +{"current_steps": 114795, "total_steps": 204665, "loss": 0.0841, "lr": 9.620094461295779e-07, "epoch": 2.8044609483790586, "percentage": 56.09, "elapsed_time": "2:28:57", "remaining_time": "1:56:37", "throughput": 8657.43, "total_tokens": 77378216} +{"current_steps": 114800, "total_steps": 204665, "loss": 0.0001, "lr": 9.619242302303867e-07, "epoch": 2.8045830992109058, "percentage": 56.09, "elapsed_time": "2:28:58", "remaining_time": "1:56:36", "throughput": 8657.53, "total_tokens": 77382184} +{"current_steps": 114805, "total_steps": 204665, "loss": 0.0003, "lr": 9.618390146080925e-07, "epoch": 2.804705250042753, "percentage": 56.09, "elapsed_time": "2:28:58", "remaining_time": "1:56:36", "throughput": 8657.54, "total_tokens": 77385256} +{"current_steps": 114810, "total_steps": 204665, "loss": 0.0314, "lr": 9.617537992633155e-07, "epoch": 2.8048274008745997, "percentage": 56.1, "elapsed_time": "2:28:58", "remaining_time": "1:56:35", "throughput": 8657.56, "total_tokens": 77388456} +{"current_steps": 114815, "total_steps": 204665, "loss": 0.1347, "lr": 9.61668584196675e-07, "epoch": 2.8049495517064473, "percentage": 56.1, "elapsed_time": "2:28:59", "remaining_time": "1:56:35", "throughput": 8657.65, "total_tokens": 77392360} +{"current_steps": 114820, "total_steps": 204665, "loss": 0.0002, "lr": 9.615833694087908e-07, "epoch": 2.805071702538294, "percentage": 56.1, "elapsed_time": "2:28:59", "remaining_time": "1:56:35", "throughput": 8657.69, "total_tokens": 77395688} +{"current_steps": 114825, "total_steps": 204665, "loss": 0.0527, "lr": 9.614981549002828e-07, "epoch": 2.8051938533701417, "percentage": 56.1, "elapsed_time": "2:28:59", "remaining_time": "1:56:34", "throughput": 8657.69, "total_tokens": 77398632} +{"current_steps": 114830, "total_steps": 204665, "loss": 0.0573, "lr": 9.614129406717703e-07, "epoch": 2.8053160042019885, "percentage": 56.11, "elapsed_time": "2:29:00", "remaining_time": "1:56:34", "throughput": 8657.73, "total_tokens": 77401960} +{"current_steps": 114835, "total_steps": 204665, "loss": 0.053, "lr": 9.61327726723874e-07, "epoch": 2.8054381550338356, "percentage": 56.11, "elapsed_time": "2:29:00", "remaining_time": "1:56:33", "throughput": 8657.74, "total_tokens": 77404968} +{"current_steps": 114840, "total_steps": 204665, "loss": 0.1165, "lr": 9.612425130572124e-07, "epoch": 2.805560305865683, "percentage": 56.11, "elapsed_time": "2:29:00", "remaining_time": "1:56:33", "throughput": 8657.79, "total_tokens": 77408488} +{"current_steps": 114845, "total_steps": 204665, "loss": 0.0006, "lr": 9.611572996724055e-07, "epoch": 2.80568245669753, "percentage": 56.11, "elapsed_time": "2:29:01", "remaining_time": "1:56:32", "throughput": 8657.81, "total_tokens": 77411688} +{"current_steps": 114850, "total_steps": 204665, "loss": 0.0002, "lr": 9.610720865700735e-07, "epoch": 2.805804607529377, "percentage": 56.12, "elapsed_time": "2:29:01", "remaining_time": "1:56:32", "throughput": 8657.82, "total_tokens": 77414760} +{"current_steps": 114855, "total_steps": 204665, "loss": 0.0004, "lr": 9.609868737508353e-07, "epoch": 2.8059267583612244, "percentage": 56.12, "elapsed_time": "2:29:01", "remaining_time": "1:56:32", "throughput": 8657.86, "total_tokens": 77418152} +{"current_steps": 114860, "total_steps": 204665, "loss": 0.0003, "lr": 9.609016612153115e-07, "epoch": 2.8060489091930716, "percentage": 56.12, "elapsed_time": "2:29:02", "remaining_time": "1:56:31", "throughput": 8657.91, "total_tokens": 77421544} +{"current_steps": 114865, "total_steps": 204665, "loss": 0.0805, "lr": 9.60816448964121e-07, "epoch": 2.806171060024919, "percentage": 56.12, "elapsed_time": "2:29:02", "remaining_time": "1:56:31", "throughput": 8657.94, "total_tokens": 77424808} +{"current_steps": 114870, "total_steps": 204665, "loss": 0.0, "lr": 9.607312369978842e-07, "epoch": 2.806293210856766, "percentage": 56.13, "elapsed_time": "2:29:02", "remaining_time": "1:56:30", "throughput": 8657.98, "total_tokens": 77428200} +{"current_steps": 114875, "total_steps": 204665, "loss": 0.0002, "lr": 9.606460253172201e-07, "epoch": 2.806415361688613, "percentage": 56.13, "elapsed_time": "2:29:03", "remaining_time": "1:56:30", "throughput": 8658.03, "total_tokens": 77431656} +{"current_steps": 114880, "total_steps": 204665, "loss": 0.0337, "lr": 9.60560813922749e-07, "epoch": 2.8065375125204604, "percentage": 56.13, "elapsed_time": "2:29:03", "remaining_time": "1:56:29", "throughput": 8658.07, "total_tokens": 77435112} +{"current_steps": 114885, "total_steps": 204665, "loss": 0.0006, "lr": 9.604756028150898e-07, "epoch": 2.8066596633523075, "percentage": 56.13, "elapsed_time": "2:29:04", "remaining_time": "1:56:29", "throughput": 8658.1, "total_tokens": 77438312} +{"current_steps": 114890, "total_steps": 204665, "loss": 0.0456, "lr": 9.60390391994863e-07, "epoch": 2.8067818141841547, "percentage": 56.14, "elapsed_time": "2:29:04", "remaining_time": "1:56:29", "throughput": 8658.11, "total_tokens": 77441320} +{"current_steps": 114895, "total_steps": 204665, "loss": 0.0, "lr": 9.603051814626877e-07, "epoch": 2.806903965016002, "percentage": 56.14, "elapsed_time": "2:29:04", "remaining_time": "1:56:28", "throughput": 8658.17, "total_tokens": 77444968} +{"current_steps": 114900, "total_steps": 204665, "loss": 0.0489, "lr": 9.60219971219184e-07, "epoch": 2.807026115847849, "percentage": 56.14, "elapsed_time": "2:29:05", "remaining_time": "1:56:28", "throughput": 8658.2, "total_tokens": 77448232} +{"current_steps": 114905, "total_steps": 204665, "loss": 0.1049, "lr": 9.601347612649715e-07, "epoch": 2.807148266679696, "percentage": 56.14, "elapsed_time": "2:29:05", "remaining_time": "1:56:27", "throughput": 8658.28, "total_tokens": 77452072} +{"current_steps": 114910, "total_steps": 204665, "loss": 0.0002, "lr": 9.600495516006694e-07, "epoch": 2.8072704175115435, "percentage": 56.15, "elapsed_time": "2:29:05", "remaining_time": "1:56:27", "throughput": 8658.36, "total_tokens": 77455912} +{"current_steps": 114915, "total_steps": 204665, "loss": 0.0718, "lr": 9.599643422268976e-07, "epoch": 2.8073925683433902, "percentage": 56.15, "elapsed_time": "2:29:06", "remaining_time": "1:56:27", "throughput": 8658.41, "total_tokens": 77459304} +{"current_steps": 114920, "total_steps": 204665, "loss": 0.0013, "lr": 9.598791331442765e-07, "epoch": 2.8075147191752374, "percentage": 56.15, "elapsed_time": "2:29:06", "remaining_time": "1:56:26", "throughput": 8658.45, "total_tokens": 77462696} +{"current_steps": 114925, "total_steps": 204665, "loss": 0.0004, "lr": 9.597939243534244e-07, "epoch": 2.8076368700070846, "percentage": 56.15, "elapsed_time": "2:29:06", "remaining_time": "1:56:26", "throughput": 8658.52, "total_tokens": 77466472} +{"current_steps": 114930, "total_steps": 204665, "loss": 0.0032, "lr": 9.597087158549623e-07, "epoch": 2.807759020838932, "percentage": 56.16, "elapsed_time": "2:29:07", "remaining_time": "1:56:25", "throughput": 8658.54, "total_tokens": 77469608} +{"current_steps": 114935, "total_steps": 204665, "loss": 0.1067, "lr": 9.596235076495088e-07, "epoch": 2.807881171670779, "percentage": 56.16, "elapsed_time": "2:29:07", "remaining_time": "1:56:25", "throughput": 8658.59, "total_tokens": 77473064} +{"current_steps": 114940, "total_steps": 204665, "loss": 0.0006, "lr": 9.595382997376846e-07, "epoch": 2.808003322502626, "percentage": 56.16, "elapsed_time": "2:29:07", "remaining_time": "1:56:24", "throughput": 8658.63, "total_tokens": 77476456} +{"current_steps": 114945, "total_steps": 204665, "loss": 0.0002, "lr": 9.594530921201082e-07, "epoch": 2.8081254733344734, "percentage": 56.16, "elapsed_time": "2:29:08", "remaining_time": "1:56:24", "throughput": 8658.68, "total_tokens": 77479976} +{"current_steps": 114950, "total_steps": 204665, "loss": 0.0001, "lr": 9.593678847974e-07, "epoch": 2.8082476241663206, "percentage": 56.16, "elapsed_time": "2:29:08", "remaining_time": "1:56:24", "throughput": 8658.71, "total_tokens": 77483240} +{"current_steps": 114955, "total_steps": 204665, "loss": 0.0007, "lr": 9.592826777701796e-07, "epoch": 2.8083697749981678, "percentage": 56.17, "elapsed_time": "2:29:08", "remaining_time": "1:56:23", "throughput": 8658.75, "total_tokens": 77486568} +{"current_steps": 114960, "total_steps": 204665, "loss": 0.0416, "lr": 9.591974710390663e-07, "epoch": 2.808491925830015, "percentage": 56.17, "elapsed_time": "2:29:09", "remaining_time": "1:56:23", "throughput": 8658.76, "total_tokens": 77489640} +{"current_steps": 114965, "total_steps": 204665, "loss": 0.0001, "lr": 9.591122646046802e-07, "epoch": 2.808614076661862, "percentage": 56.17, "elapsed_time": "2:29:09", "remaining_time": "1:56:22", "throughput": 8658.82, "total_tokens": 77493224} +{"current_steps": 114970, "total_steps": 204665, "loss": 0.0001, "lr": 9.590270584676403e-07, "epoch": 2.8087362274937093, "percentage": 56.17, "elapsed_time": "2:29:09", "remaining_time": "1:56:22", "throughput": 8658.9, "total_tokens": 77497128} +{"current_steps": 114975, "total_steps": 204665, "loss": 0.1205, "lr": 9.589418526285667e-07, "epoch": 2.8088583783255565, "percentage": 56.18, "elapsed_time": "2:29:10", "remaining_time": "1:56:22", "throughput": 8658.94, "total_tokens": 77500392} +{"current_steps": 114980, "total_steps": 204665, "loss": 0.116, "lr": 9.588566470880794e-07, "epoch": 2.8089805291574037, "percentage": 56.18, "elapsed_time": "2:29:10", "remaining_time": "1:56:21", "throughput": 8658.96, "total_tokens": 77503528} +{"current_steps": 114985, "total_steps": 204665, "loss": 0.1401, "lr": 9.587714418467974e-07, "epoch": 2.809102679989251, "percentage": 56.18, "elapsed_time": "2:29:11", "remaining_time": "1:56:21", "throughput": 8658.98, "total_tokens": 77506728} +{"current_steps": 114990, "total_steps": 204665, "loss": 0.0006, "lr": 9.586862369053409e-07, "epoch": 2.8092248308210976, "percentage": 56.18, "elapsed_time": "2:29:11", "remaining_time": "1:56:20", "throughput": 8659.03, "total_tokens": 77510248} +{"current_steps": 114995, "total_steps": 204665, "loss": 0.0479, "lr": 9.586010322643287e-07, "epoch": 2.8093469816529453, "percentage": 56.19, "elapsed_time": "2:29:11", "remaining_time": "1:56:20", "throughput": 8659.09, "total_tokens": 77513704} +{"current_steps": 115000, "total_steps": 204665, "loss": 0.0371, "lr": 9.585158279243812e-07, "epoch": 2.809469132484792, "percentage": 56.19, "elapsed_time": "2:29:12", "remaining_time": "1:56:19", "throughput": 8659.13, "total_tokens": 77517096} +{"current_steps": 115005, "total_steps": 204665, "loss": 0.1121, "lr": 9.584306238861178e-07, "epoch": 2.8095912833166397, "percentage": 56.19, "elapsed_time": "2:29:12", "remaining_time": "1:56:19", "throughput": 8659.14, "total_tokens": 77520168} +{"current_steps": 115010, "total_steps": 204665, "loss": 0.0471, "lr": 9.583454201501576e-07, "epoch": 2.8097134341484864, "percentage": 56.19, "elapsed_time": "2:29:12", "remaining_time": "1:56:19", "throughput": 8659.22, "total_tokens": 77523944} +{"current_steps": 115015, "total_steps": 204665, "loss": 0.0003, "lr": 9.582602167171215e-07, "epoch": 2.8098355849803336, "percentage": 56.2, "elapsed_time": "2:29:13", "remaining_time": "1:56:18", "throughput": 8659.25, "total_tokens": 77527208} +{"current_steps": 115020, "total_steps": 204665, "loss": 0.0006, "lr": 9.581750135876275e-07, "epoch": 2.809957735812181, "percentage": 56.2, "elapsed_time": "2:29:13", "remaining_time": "1:56:18", "throughput": 8659.24, "total_tokens": 77530024} +{"current_steps": 115025, "total_steps": 204665, "loss": 0.0001, "lr": 9.580898107622967e-07, "epoch": 2.810079886644028, "percentage": 56.2, "elapsed_time": "2:29:13", "remaining_time": "1:56:17", "throughput": 8659.25, "total_tokens": 77533096} +{"current_steps": 115030, "total_steps": 204665, "loss": 0.0003, "lr": 9.580046082417476e-07, "epoch": 2.810202037475875, "percentage": 56.2, "elapsed_time": "2:29:14", "remaining_time": "1:56:17", "throughput": 8659.28, "total_tokens": 77536360} +{"current_steps": 115035, "total_steps": 204665, "loss": 0.0416, "lr": 9.57919406026601e-07, "epoch": 2.8103241883077223, "percentage": 56.21, "elapsed_time": "2:29:14", "remaining_time": "1:56:16", "throughput": 8659.31, "total_tokens": 77539560} +{"current_steps": 115040, "total_steps": 204665, "loss": 0.0003, "lr": 9.57834204117475e-07, "epoch": 2.8104463391395695, "percentage": 56.21, "elapsed_time": "2:29:14", "remaining_time": "1:56:16", "throughput": 8659.32, "total_tokens": 77542632} +{"current_steps": 115045, "total_steps": 204665, "loss": 0.0707, "lr": 9.577490025149901e-07, "epoch": 2.8105684899714167, "percentage": 56.21, "elapsed_time": "2:29:15", "remaining_time": "1:56:16", "throughput": 8659.37, "total_tokens": 77546152} +{"current_steps": 115050, "total_steps": 204665, "loss": 0.0003, "lr": 9.576638012197661e-07, "epoch": 2.810690640803264, "percentage": 56.21, "elapsed_time": "2:29:15", "remaining_time": "1:56:15", "throughput": 8659.42, "total_tokens": 77549544} +{"current_steps": 115055, "total_steps": 204665, "loss": 0.0002, "lr": 9.575786002324225e-07, "epoch": 2.810812791635111, "percentage": 56.22, "elapsed_time": "2:29:15", "remaining_time": "1:56:15", "throughput": 8659.44, "total_tokens": 77552680} +{"current_steps": 115060, "total_steps": 204665, "loss": 0.0019, "lr": 9.574933995535786e-07, "epoch": 2.8109349424669583, "percentage": 56.22, "elapsed_time": "2:29:16", "remaining_time": "1:56:14", "throughput": 8659.46, "total_tokens": 77555880} +{"current_steps": 115065, "total_steps": 204665, "loss": 0.0742, "lr": 9.57408199183854e-07, "epoch": 2.8110570932988055, "percentage": 56.22, "elapsed_time": "2:29:16", "remaining_time": "1:56:14", "throughput": 8659.47, "total_tokens": 77558952} +{"current_steps": 115070, "total_steps": 204665, "loss": 0.0004, "lr": 9.57322999123868e-07, "epoch": 2.8111792441306527, "percentage": 56.22, "elapsed_time": "2:29:16", "remaining_time": "1:56:13", "throughput": 8659.54, "total_tokens": 77562600} +{"current_steps": 115075, "total_steps": 204665, "loss": 0.0002, "lr": 9.572377993742413e-07, "epoch": 2.8113013949624994, "percentage": 56.23, "elapsed_time": "2:29:17", "remaining_time": "1:56:13", "throughput": 8659.61, "total_tokens": 77566312} +{"current_steps": 115080, "total_steps": 204665, "loss": 0.0333, "lr": 9.571525999355926e-07, "epoch": 2.811423545794347, "percentage": 56.23, "elapsed_time": "2:29:17", "remaining_time": "1:56:13", "throughput": 8659.63, "total_tokens": 77569384} +{"current_steps": 115085, "total_steps": 204665, "loss": 0.0003, "lr": 9.570674008085419e-07, "epoch": 2.811545696626194, "percentage": 56.23, "elapsed_time": "2:29:17", "remaining_time": "1:56:12", "throughput": 8659.67, "total_tokens": 77572776} +{"current_steps": 115090, "total_steps": 204665, "loss": 0.0554, "lr": 9.569822019937082e-07, "epoch": 2.8116678474580414, "percentage": 56.23, "elapsed_time": "2:29:18", "remaining_time": "1:56:12", "throughput": 8659.71, "total_tokens": 77576232} +{"current_steps": 115095, "total_steps": 204665, "loss": 0.0236, "lr": 9.568970034917119e-07, "epoch": 2.811789998289888, "percentage": 56.24, "elapsed_time": "2:29:18", "remaining_time": "1:56:11", "throughput": 8659.76, "total_tokens": 77579624} +{"current_steps": 115100, "total_steps": 204665, "loss": 0.0628, "lr": 9.56811805303172e-07, "epoch": 2.8119121491217354, "percentage": 56.24, "elapsed_time": "2:29:18", "remaining_time": "1:56:11", "throughput": 8659.84, "total_tokens": 77583464} +{"current_steps": 115105, "total_steps": 204665, "loss": 0.0399, "lr": 9.56726607428708e-07, "epoch": 2.8120342999535826, "percentage": 56.24, "elapsed_time": "2:29:19", "remaining_time": "1:56:11", "throughput": 8659.88, "total_tokens": 77586856} +{"current_steps": 115110, "total_steps": 204665, "loss": 0.0001, "lr": 9.566414098689404e-07, "epoch": 2.8121564507854298, "percentage": 56.24, "elapsed_time": "2:29:19", "remaining_time": "1:56:10", "throughput": 8659.89, "total_tokens": 77589864} +{"current_steps": 115115, "total_steps": 204665, "loss": 0.0761, "lr": 9.565562126244876e-07, "epoch": 2.812278601617277, "percentage": 56.25, "elapsed_time": "2:29:20", "remaining_time": "1:56:10", "throughput": 8659.9, "total_tokens": 77592808} +{"current_steps": 115120, "total_steps": 204665, "loss": 0.0002, "lr": 9.5647101569597e-07, "epoch": 2.812400752449124, "percentage": 56.25, "elapsed_time": "2:29:20", "remaining_time": "1:56:09", "throughput": 8659.94, "total_tokens": 77596200} +{"current_steps": 115125, "total_steps": 204665, "loss": 0.0723, "lr": 9.563858190840066e-07, "epoch": 2.8125229032809713, "percentage": 56.25, "elapsed_time": "2:29:20", "remaining_time": "1:56:09", "throughput": 8659.97, "total_tokens": 77599528} +{"current_steps": 115130, "total_steps": 204665, "loss": 0.0001, "lr": 9.563006227892172e-07, "epoch": 2.8126450541128185, "percentage": 56.25, "elapsed_time": "2:29:21", "remaining_time": "1:56:08", "throughput": 8660.01, "total_tokens": 77602920} +{"current_steps": 115135, "total_steps": 204665, "loss": 0.0001, "lr": 9.562154268122217e-07, "epoch": 2.8127672049446657, "percentage": 56.26, "elapsed_time": "2:29:21", "remaining_time": "1:56:08", "throughput": 8660.08, "total_tokens": 77606632} +{"current_steps": 115140, "total_steps": 204665, "loss": 0.0291, "lr": 9.561302311536392e-07, "epoch": 2.812889355776513, "percentage": 56.26, "elapsed_time": "2:29:21", "remaining_time": "1:56:08", "throughput": 8660.14, "total_tokens": 77610216} +{"current_steps": 115145, "total_steps": 204665, "loss": 0.0193, "lr": 9.5604503581409e-07, "epoch": 2.81301150660836, "percentage": 56.26, "elapsed_time": "2:29:22", "remaining_time": "1:56:07", "throughput": 8660.2, "total_tokens": 77613800} +{"current_steps": 115150, "total_steps": 204665, "loss": 0.0568, "lr": 9.559598407941925e-07, "epoch": 2.8131336574402073, "percentage": 56.26, "elapsed_time": "2:29:22", "remaining_time": "1:56:07", "throughput": 8660.22, "total_tokens": 77617000} +{"current_steps": 115155, "total_steps": 204665, "loss": 0.0002, "lr": 9.558746460945672e-07, "epoch": 2.8132558082720545, "percentage": 56.27, "elapsed_time": "2:29:22", "remaining_time": "1:56:06", "throughput": 8660.24, "total_tokens": 77620072} +{"current_steps": 115160, "total_steps": 204665, "loss": 0.0001, "lr": 9.557894517158332e-07, "epoch": 2.8133779591039016, "percentage": 56.27, "elapsed_time": "2:29:23", "remaining_time": "1:56:06", "throughput": 8660.25, "total_tokens": 77623208} +{"current_steps": 115165, "total_steps": 204665, "loss": 0.0002, "lr": 9.557042576586101e-07, "epoch": 2.813500109935749, "percentage": 56.27, "elapsed_time": "2:29:23", "remaining_time": "1:56:05", "throughput": 8660.25, "total_tokens": 77626088} +{"current_steps": 115170, "total_steps": 204665, "loss": 0.0003, "lr": 9.55619063923518e-07, "epoch": 2.8136222607675956, "percentage": 56.27, "elapsed_time": "2:29:23", "remaining_time": "1:56:05", "throughput": 8660.26, "total_tokens": 77629160} +{"current_steps": 115175, "total_steps": 204665, "loss": 0.0001, "lr": 9.555338705111753e-07, "epoch": 2.813744411599443, "percentage": 56.27, "elapsed_time": "2:29:24", "remaining_time": "1:56:05", "throughput": 8660.33, "total_tokens": 77632808} +{"current_steps": 115180, "total_steps": 204665, "loss": 0.0002, "lr": 9.55448677422203e-07, "epoch": 2.81386656243129, "percentage": 56.28, "elapsed_time": "2:29:24", "remaining_time": "1:56:04", "throughput": 8660.41, "total_tokens": 77636712} +{"current_steps": 115185, "total_steps": 204665, "loss": 0.0386, "lr": 9.55363484657219e-07, "epoch": 2.8139887132631376, "percentage": 56.28, "elapsed_time": "2:29:24", "remaining_time": "1:56:04", "throughput": 8660.46, "total_tokens": 77640104} +{"current_steps": 115190, "total_steps": 204665, "loss": 0.0113, "lr": 9.552782922168447e-07, "epoch": 2.8141108640949843, "percentage": 56.28, "elapsed_time": "2:29:25", "remaining_time": "1:56:03", "throughput": 8660.48, "total_tokens": 77643304} +{"current_steps": 115195, "total_steps": 204665, "loss": 0.0599, "lr": 9.55193100101698e-07, "epoch": 2.8142330149268315, "percentage": 56.28, "elapsed_time": "2:29:25", "remaining_time": "1:56:03", "throughput": 8660.52, "total_tokens": 77646632} +{"current_steps": 115200, "total_steps": 204665, "loss": 0.0457, "lr": 9.551079083123996e-07, "epoch": 2.8143551657586787, "percentage": 56.29, "elapsed_time": "2:29:25", "remaining_time": "1:56:02", "throughput": 8660.54, "total_tokens": 77649768} +{"current_steps": 115205, "total_steps": 204665, "loss": 0.0008, "lr": 9.550227168495683e-07, "epoch": 2.814477316590526, "percentage": 56.29, "elapsed_time": "2:29:26", "remaining_time": "1:56:02", "throughput": 8660.57, "total_tokens": 77653032} +{"current_steps": 115210, "total_steps": 204665, "loss": 0.0429, "lr": 9.54937525713824e-07, "epoch": 2.814599467422373, "percentage": 56.29, "elapsed_time": "2:29:26", "remaining_time": "1:56:02", "throughput": 8660.6, "total_tokens": 77656296} +{"current_steps": 115215, "total_steps": 204665, "loss": 0.0993, "lr": 9.548523349057864e-07, "epoch": 2.8147216182542203, "percentage": 56.29, "elapsed_time": "2:29:26", "remaining_time": "1:56:01", "throughput": 8660.62, "total_tokens": 77659496} +{"current_steps": 115220, "total_steps": 204665, "loss": 0.0003, "lr": 9.54767144426074e-07, "epoch": 2.8148437690860675, "percentage": 56.3, "elapsed_time": "2:29:27", "remaining_time": "1:56:01", "throughput": 8660.68, "total_tokens": 77663016} +{"current_steps": 115225, "total_steps": 204665, "loss": 0.0411, "lr": 9.546819542753074e-07, "epoch": 2.8149659199179147, "percentage": 56.3, "elapsed_time": "2:29:27", "remaining_time": "1:56:00", "throughput": 8660.76, "total_tokens": 77666920} +{"current_steps": 115230, "total_steps": 204665, "loss": 0.007, "lr": 9.545967644541063e-07, "epoch": 2.815088070749762, "percentage": 56.3, "elapsed_time": "2:29:28", "remaining_time": "1:56:00", "throughput": 8660.81, "total_tokens": 77670312} +{"current_steps": 115235, "total_steps": 204665, "loss": 0.0466, "lr": 9.545115749630891e-07, "epoch": 2.815210221581609, "percentage": 56.3, "elapsed_time": "2:29:28", "remaining_time": "1:56:00", "throughput": 8660.86, "total_tokens": 77673832} +{"current_steps": 115240, "total_steps": 204665, "loss": 0.0004, "lr": 9.544263858028765e-07, "epoch": 2.8153323724134562, "percentage": 56.31, "elapsed_time": "2:29:28", "remaining_time": "1:55:59", "throughput": 8660.89, "total_tokens": 77677032} +{"current_steps": 115245, "total_steps": 204665, "loss": 0.0046, "lr": 9.54341196974087e-07, "epoch": 2.8154545232453034, "percentage": 56.31, "elapsed_time": "2:29:29", "remaining_time": "1:55:59", "throughput": 8660.92, "total_tokens": 77680296} +{"current_steps": 115250, "total_steps": 204665, "loss": 0.0458, "lr": 9.542560084773412e-07, "epoch": 2.8155766740771506, "percentage": 56.31, "elapsed_time": "2:29:29", "remaining_time": "1:55:58", "throughput": 8660.98, "total_tokens": 77683880} +{"current_steps": 115255, "total_steps": 204665, "loss": 0.0001, "lr": 9.541708203132577e-07, "epoch": 2.8156988249089974, "percentage": 56.31, "elapsed_time": "2:29:29", "remaining_time": "1:55:58", "throughput": 8661.0, "total_tokens": 77687080} +{"current_steps": 115260, "total_steps": 204665, "loss": 0.0001, "lr": 9.54085632482456e-07, "epoch": 2.815820975740845, "percentage": 56.32, "elapsed_time": "2:29:30", "remaining_time": "1:55:57", "throughput": 8661.04, "total_tokens": 77690472} +{"current_steps": 115265, "total_steps": 204665, "loss": 0.0751, "lr": 9.540004449855565e-07, "epoch": 2.8159431265726917, "percentage": 56.32, "elapsed_time": "2:29:30", "remaining_time": "1:55:57", "throughput": 8661.09, "total_tokens": 77693864} +{"current_steps": 115270, "total_steps": 204665, "loss": 0.0454, "lr": 9.539152578231776e-07, "epoch": 2.8160652774045394, "percentage": 56.32, "elapsed_time": "2:29:30", "remaining_time": "1:55:57", "throughput": 8661.12, "total_tokens": 77697192} +{"current_steps": 115275, "total_steps": 204665, "loss": 0.069, "lr": 9.538300709959398e-07, "epoch": 2.816187428236386, "percentage": 56.32, "elapsed_time": "2:29:31", "remaining_time": "1:55:56", "throughput": 8661.16, "total_tokens": 77700520} +{"current_steps": 115280, "total_steps": 204665, "loss": 0.0, "lr": 9.537448845044617e-07, "epoch": 2.8163095790682333, "percentage": 56.33, "elapsed_time": "2:29:31", "remaining_time": "1:55:56", "throughput": 8661.21, "total_tokens": 77704040} +{"current_steps": 115285, "total_steps": 204665, "loss": 0.0002, "lr": 9.536596983493633e-07, "epoch": 2.8164317299000805, "percentage": 56.33, "elapsed_time": "2:29:31", "remaining_time": "1:55:55", "throughput": 8661.29, "total_tokens": 77707880} +{"current_steps": 115290, "total_steps": 204665, "loss": 0.035, "lr": 9.535745125312644e-07, "epoch": 2.8165538807319277, "percentage": 56.33, "elapsed_time": "2:29:32", "remaining_time": "1:55:55", "throughput": 8661.33, "total_tokens": 77711208} +{"current_steps": 115295, "total_steps": 204665, "loss": 0.0001, "lr": 9.534893270507837e-07, "epoch": 2.816676031563775, "percentage": 56.33, "elapsed_time": "2:29:32", "remaining_time": "1:55:55", "throughput": 8661.35, "total_tokens": 77714472} +{"current_steps": 115300, "total_steps": 204665, "loss": 0.0003, "lr": 9.534041419085417e-07, "epoch": 2.816798182395622, "percentage": 56.34, "elapsed_time": "2:29:32", "remaining_time": "1:55:54", "throughput": 8661.4, "total_tokens": 77717864} +{"current_steps": 115305, "total_steps": 204665, "loss": 0.0001, "lr": 9.53318957105157e-07, "epoch": 2.8169203332274693, "percentage": 56.34, "elapsed_time": "2:29:33", "remaining_time": "1:55:54", "throughput": 8661.44, "total_tokens": 77721256} +{"current_steps": 115310, "total_steps": 204665, "loss": 0.0654, "lr": 9.532337726412494e-07, "epoch": 2.8170424840593165, "percentage": 56.34, "elapsed_time": "2:29:33", "remaining_time": "1:55:53", "throughput": 8661.46, "total_tokens": 77724456} +{"current_steps": 115315, "total_steps": 204665, "loss": 0.0492, "lr": 9.531485885174384e-07, "epoch": 2.8171646348911636, "percentage": 56.34, "elapsed_time": "2:29:33", "remaining_time": "1:55:53", "throughput": 8661.51, "total_tokens": 77727912} +{"current_steps": 115320, "total_steps": 204665, "loss": 0.0479, "lr": 9.530634047343432e-07, "epoch": 2.817286785723011, "percentage": 56.35, "elapsed_time": "2:29:34", "remaining_time": "1:55:52", "throughput": 8661.57, "total_tokens": 77731432} +{"current_steps": 115325, "total_steps": 204665, "loss": 0.0404, "lr": 9.52978221292584e-07, "epoch": 2.817408936554858, "percentage": 56.35, "elapsed_time": "2:29:34", "remaining_time": "1:55:52", "throughput": 8661.6, "total_tokens": 77734760} +{"current_steps": 115330, "total_steps": 204665, "loss": 0.0004, "lr": 9.528930381927794e-07, "epoch": 2.817531087386705, "percentage": 56.35, "elapsed_time": "2:29:34", "remaining_time": "1:55:52", "throughput": 8661.65, "total_tokens": 77738216} +{"current_steps": 115335, "total_steps": 204665, "loss": 0.1156, "lr": 9.528078554355497e-07, "epoch": 2.8176532382185524, "percentage": 56.35, "elapsed_time": "2:29:35", "remaining_time": "1:55:51", "throughput": 8661.76, "total_tokens": 77742504} +{"current_steps": 115340, "total_steps": 204665, "loss": 0.0003, "lr": 9.527226730215136e-07, "epoch": 2.8177753890503996, "percentage": 56.36, "elapsed_time": "2:29:35", "remaining_time": "1:55:51", "throughput": 8661.81, "total_tokens": 77746024} +{"current_steps": 115345, "total_steps": 204665, "loss": 0.0, "lr": 9.526374909512913e-07, "epoch": 2.817897539882247, "percentage": 56.36, "elapsed_time": "2:29:36", "remaining_time": "1:55:50", "throughput": 8661.89, "total_tokens": 77749800} +{"current_steps": 115350, "total_steps": 204665, "loss": 0.0001, "lr": 9.525523092255015e-07, "epoch": 2.8180196907140935, "percentage": 56.36, "elapsed_time": "2:29:36", "remaining_time": "1:55:50", "throughput": 8661.93, "total_tokens": 77753192} +{"current_steps": 115355, "total_steps": 204665, "loss": 0.0001, "lr": 9.524671278447642e-07, "epoch": 2.818141841545941, "percentage": 56.36, "elapsed_time": "2:29:36", "remaining_time": "1:55:49", "throughput": 8662.03, "total_tokens": 77757160} +{"current_steps": 115360, "total_steps": 204665, "loss": 0.0004, "lr": 9.523819468096988e-07, "epoch": 2.818263992377788, "percentage": 56.37, "elapsed_time": "2:29:37", "remaining_time": "1:55:49", "throughput": 8662.08, "total_tokens": 77760744} +{"current_steps": 115365, "total_steps": 204665, "loss": 0.0002, "lr": 9.522967661209249e-07, "epoch": 2.818386143209635, "percentage": 56.37, "elapsed_time": "2:29:37", "remaining_time": "1:55:49", "throughput": 8662.13, "total_tokens": 77764200} +{"current_steps": 115370, "total_steps": 204665, "loss": 0.036, "lr": 9.522115857790616e-07, "epoch": 2.8185082940414823, "percentage": 56.37, "elapsed_time": "2:29:37", "remaining_time": "1:55:48", "throughput": 8662.18, "total_tokens": 77767656} +{"current_steps": 115375, "total_steps": 204665, "loss": 0.0542, "lr": 9.521264057847283e-07, "epoch": 2.8186304448733295, "percentage": 56.37, "elapsed_time": "2:29:38", "remaining_time": "1:55:48", "throughput": 8662.18, "total_tokens": 77770600} +{"current_steps": 115380, "total_steps": 204665, "loss": 0.0633, "lr": 9.520412261385445e-07, "epoch": 2.8187525957051767, "percentage": 56.38, "elapsed_time": "2:29:38", "remaining_time": "1:55:47", "throughput": 8662.2, "total_tokens": 77773736} +{"current_steps": 115385, "total_steps": 204665, "loss": 0.0526, "lr": 9.519560468411304e-07, "epoch": 2.818874746537024, "percentage": 56.38, "elapsed_time": "2:29:38", "remaining_time": "1:55:47", "throughput": 8662.22, "total_tokens": 77776872} +{"current_steps": 115390, "total_steps": 204665, "loss": 0.0002, "lr": 9.518708678931044e-07, "epoch": 2.818996897368871, "percentage": 56.38, "elapsed_time": "2:29:39", "remaining_time": "1:55:47", "throughput": 8662.23, "total_tokens": 77779944} +{"current_steps": 115395, "total_steps": 204665, "loss": 0.0981, "lr": 9.517856892950866e-07, "epoch": 2.8191190482007182, "percentage": 56.38, "elapsed_time": "2:29:39", "remaining_time": "1:55:46", "throughput": 8662.26, "total_tokens": 77783208} +{"current_steps": 115400, "total_steps": 204665, "loss": 0.0001, "lr": 9.51700511047696e-07, "epoch": 2.8192411990325654, "percentage": 56.38, "elapsed_time": "2:29:39", "remaining_time": "1:55:46", "throughput": 8662.32, "total_tokens": 77786728} +{"current_steps": 115405, "total_steps": 204665, "loss": 0.0721, "lr": 9.516153331515528e-07, "epoch": 2.8193633498644126, "percentage": 56.39, "elapsed_time": "2:29:40", "remaining_time": "1:55:45", "throughput": 8662.36, "total_tokens": 77790120} +{"current_steps": 115410, "total_steps": 204665, "loss": 0.1149, "lr": 9.515301556072754e-07, "epoch": 2.81948550069626, "percentage": 56.39, "elapsed_time": "2:29:40", "remaining_time": "1:55:45", "throughput": 8662.4, "total_tokens": 77793512} +{"current_steps": 115415, "total_steps": 204665, "loss": 0.0486, "lr": 9.514449784154837e-07, "epoch": 2.819607651528107, "percentage": 56.39, "elapsed_time": "2:29:40", "remaining_time": "1:55:44", "throughput": 8662.51, "total_tokens": 77797608} +{"current_steps": 115420, "total_steps": 204665, "loss": 0.0898, "lr": 9.513598015767978e-07, "epoch": 2.819729802359954, "percentage": 56.39, "elapsed_time": "2:29:41", "remaining_time": "1:55:44", "throughput": 8662.58, "total_tokens": 77801384} +{"current_steps": 115425, "total_steps": 204665, "loss": 0.0009, "lr": 9.512746250918358e-07, "epoch": 2.8198519531918014, "percentage": 56.4, "elapsed_time": "2:29:41", "remaining_time": "1:55:44", "throughput": 8662.65, "total_tokens": 77805096} +{"current_steps": 115430, "total_steps": 204665, "loss": 0.001, "lr": 9.511894489612183e-07, "epoch": 2.8199741040236486, "percentage": 56.4, "elapsed_time": "2:29:42", "remaining_time": "1:55:43", "throughput": 8662.66, "total_tokens": 77808168} +{"current_steps": 115435, "total_steps": 204665, "loss": 0.0834, "lr": 9.51104273185564e-07, "epoch": 2.8200962548554953, "percentage": 56.4, "elapsed_time": "2:29:42", "remaining_time": "1:55:43", "throughput": 8662.73, "total_tokens": 77811816} +{"current_steps": 115440, "total_steps": 204665, "loss": 0.0453, "lr": 9.510190977654924e-07, "epoch": 2.820218405687343, "percentage": 56.4, "elapsed_time": "2:29:42", "remaining_time": "1:55:42", "throughput": 8662.75, "total_tokens": 77814952} +{"current_steps": 115445, "total_steps": 204665, "loss": 0.0005, "lr": 9.509339227016235e-07, "epoch": 2.8203405565191897, "percentage": 56.41, "elapsed_time": "2:29:43", "remaining_time": "1:55:42", "throughput": 8662.8, "total_tokens": 77818472} +{"current_steps": 115450, "total_steps": 204665, "loss": 0.0503, "lr": 9.50848747994576e-07, "epoch": 2.8204627073510373, "percentage": 56.41, "elapsed_time": "2:29:43", "remaining_time": "1:55:42", "throughput": 8662.86, "total_tokens": 77821992} +{"current_steps": 115455, "total_steps": 204665, "loss": 0.1241, "lr": 9.5076357364497e-07, "epoch": 2.820584858182884, "percentage": 56.41, "elapsed_time": "2:29:43", "remaining_time": "1:55:41", "throughput": 8662.86, "total_tokens": 77824936} +{"current_steps": 115460, "total_steps": 204665, "loss": 0.0472, "lr": 9.506783996534244e-07, "epoch": 2.8207070090147313, "percentage": 56.41, "elapsed_time": "2:29:44", "remaining_time": "1:55:41", "throughput": 8662.89, "total_tokens": 77828264} +{"current_steps": 115465, "total_steps": 204665, "loss": 0.0284, "lr": 9.505932260205586e-07, "epoch": 2.8208291598465784, "percentage": 56.42, "elapsed_time": "2:29:44", "remaining_time": "1:55:40", "throughput": 8662.91, "total_tokens": 77831400} +{"current_steps": 115470, "total_steps": 204665, "loss": 0.0399, "lr": 9.505080527469924e-07, "epoch": 2.8209513106784256, "percentage": 56.42, "elapsed_time": "2:29:44", "remaining_time": "1:55:40", "throughput": 8662.93, "total_tokens": 77834536} +{"current_steps": 115475, "total_steps": 204665, "loss": 0.0344, "lr": 9.504228798333445e-07, "epoch": 2.821073461510273, "percentage": 56.42, "elapsed_time": "2:29:45", "remaining_time": "1:55:39", "throughput": 8662.97, "total_tokens": 77837928} +{"current_steps": 115480, "total_steps": 204665, "loss": 0.0446, "lr": 9.503377072802353e-07, "epoch": 2.82119561234212, "percentage": 56.42, "elapsed_time": "2:29:45", "remaining_time": "1:55:39", "throughput": 8663.03, "total_tokens": 77841576} +{"current_steps": 115485, "total_steps": 204665, "loss": 0.0524, "lr": 9.502525350882831e-07, "epoch": 2.821317763173967, "percentage": 56.43, "elapsed_time": "2:29:45", "remaining_time": "1:55:39", "throughput": 8663.03, "total_tokens": 77844520} +{"current_steps": 115490, "total_steps": 204665, "loss": 0.0459, "lr": 9.501673632581083e-07, "epoch": 2.8214399140058144, "percentage": 56.43, "elapsed_time": "2:29:46", "remaining_time": "1:55:38", "throughput": 8663.12, "total_tokens": 77848424} +{"current_steps": 115495, "total_steps": 204665, "loss": 0.0005, "lr": 9.500821917903295e-07, "epoch": 2.8215620648376616, "percentage": 56.43, "elapsed_time": "2:29:46", "remaining_time": "1:55:38", "throughput": 8663.17, "total_tokens": 77851944} +{"current_steps": 115500, "total_steps": 204665, "loss": 0.0003, "lr": 9.499970206855668e-07, "epoch": 2.8216842156695088, "percentage": 56.43, "elapsed_time": "2:29:46", "remaining_time": "1:55:37", "throughput": 8663.22, "total_tokens": 77855400} +{"current_steps": 115505, "total_steps": 204665, "loss": 0.0003, "lr": 9.499118499444388e-07, "epoch": 2.821806366501356, "percentage": 56.44, "elapsed_time": "2:29:47", "remaining_time": "1:55:37", "throughput": 8663.28, "total_tokens": 77858920} +{"current_steps": 115510, "total_steps": 204665, "loss": 0.0011, "lr": 9.498266795675654e-07, "epoch": 2.821928517333203, "percentage": 56.44, "elapsed_time": "2:29:47", "remaining_time": "1:55:36", "throughput": 8663.33, "total_tokens": 77862376} +{"current_steps": 115515, "total_steps": 204665, "loss": 0.0787, "lr": 9.497415095555659e-07, "epoch": 2.8220506681650503, "percentage": 56.44, "elapsed_time": "2:29:47", "remaining_time": "1:55:36", "throughput": 8663.38, "total_tokens": 77865896} +{"current_steps": 115520, "total_steps": 204665, "loss": 0.0487, "lr": 9.496563399090598e-07, "epoch": 2.822172818996897, "percentage": 56.44, "elapsed_time": "2:29:48", "remaining_time": "1:55:36", "throughput": 8663.47, "total_tokens": 77869864} +{"current_steps": 115525, "total_steps": 204665, "loss": 0.0255, "lr": 9.495711706286666e-07, "epoch": 2.8222949698287447, "percentage": 56.45, "elapsed_time": "2:29:48", "remaining_time": "1:55:35", "throughput": 8663.5, "total_tokens": 77873064} +{"current_steps": 115530, "total_steps": 204665, "loss": 0.0015, "lr": 9.494860017150048e-07, "epoch": 2.8224171206605915, "percentage": 56.45, "elapsed_time": "2:29:48", "remaining_time": "1:55:35", "throughput": 8663.54, "total_tokens": 77876520} +{"current_steps": 115535, "total_steps": 204665, "loss": 0.0005, "lr": 9.494008331686945e-07, "epoch": 2.822539271492439, "percentage": 56.45, "elapsed_time": "2:29:49", "remaining_time": "1:55:34", "throughput": 8663.59, "total_tokens": 77879976} +{"current_steps": 115540, "total_steps": 204665, "loss": 0.0006, "lr": 9.493156649903553e-07, "epoch": 2.822661422324286, "percentage": 56.45, "elapsed_time": "2:29:49", "remaining_time": "1:55:34", "throughput": 8663.66, "total_tokens": 77883624} +{"current_steps": 115545, "total_steps": 204665, "loss": 0.0001, "lr": 9.492304971806059e-07, "epoch": 2.822783573156133, "percentage": 56.46, "elapsed_time": "2:29:50", "remaining_time": "1:55:34", "throughput": 8663.73, "total_tokens": 77887400} +{"current_steps": 115550, "total_steps": 204665, "loss": 0.0258, "lr": 9.491453297400663e-07, "epoch": 2.8229057239879802, "percentage": 56.46, "elapsed_time": "2:29:50", "remaining_time": "1:55:33", "throughput": 8663.8, "total_tokens": 77891048} +{"current_steps": 115555, "total_steps": 204665, "loss": 0.0001, "lr": 9.490601626693551e-07, "epoch": 2.8230278748198274, "percentage": 56.46, "elapsed_time": "2:29:50", "remaining_time": "1:55:33", "throughput": 8663.82, "total_tokens": 77894184} +{"current_steps": 115560, "total_steps": 204665, "loss": 0.0004, "lr": 9.489749959690926e-07, "epoch": 2.8231500256516746, "percentage": 56.46, "elapsed_time": "2:29:51", "remaining_time": "1:55:32", "throughput": 8663.84, "total_tokens": 77897384} +{"current_steps": 115565, "total_steps": 204665, "loss": 0.0003, "lr": 9.488898296398975e-07, "epoch": 2.823272176483522, "percentage": 56.47, "elapsed_time": "2:29:51", "remaining_time": "1:55:32", "throughput": 8663.89, "total_tokens": 77900840} +{"current_steps": 115570, "total_steps": 204665, "loss": 0.0249, "lr": 9.48804663682389e-07, "epoch": 2.823394327315369, "percentage": 56.47, "elapsed_time": "2:29:51", "remaining_time": "1:55:31", "throughput": 8663.91, "total_tokens": 77904040} +{"current_steps": 115575, "total_steps": 204665, "loss": 0.0003, "lr": 9.487194980971871e-07, "epoch": 2.823516478147216, "percentage": 56.47, "elapsed_time": "2:29:52", "remaining_time": "1:55:31", "throughput": 8663.97, "total_tokens": 77907624} +{"current_steps": 115580, "total_steps": 204665, "loss": 0.0279, "lr": 9.486343328849105e-07, "epoch": 2.8236386289790634, "percentage": 56.47, "elapsed_time": "2:29:52", "remaining_time": "1:55:31", "throughput": 8664.01, "total_tokens": 77911080} +{"current_steps": 115585, "total_steps": 204665, "loss": 0.0, "lr": 9.485491680461792e-07, "epoch": 2.8237607798109106, "percentage": 56.48, "elapsed_time": "2:29:52", "remaining_time": "1:55:30", "throughput": 8664.06, "total_tokens": 77914536} +{"current_steps": 115590, "total_steps": 204665, "loss": 0.0459, "lr": 9.484640035816119e-07, "epoch": 2.8238829306427577, "percentage": 56.48, "elapsed_time": "2:29:53", "remaining_time": "1:55:30", "throughput": 8664.14, "total_tokens": 77918312} +{"current_steps": 115595, "total_steps": 204665, "loss": 0.073, "lr": 9.483788394918285e-07, "epoch": 2.824005081474605, "percentage": 56.48, "elapsed_time": "2:29:53", "remaining_time": "1:55:29", "throughput": 8664.22, "total_tokens": 77922216} +{"current_steps": 115600, "total_steps": 204665, "loss": 0.0002, "lr": 9.482936757774477e-07, "epoch": 2.824127232306452, "percentage": 56.48, "elapsed_time": "2:29:53", "remaining_time": "1:55:29", "throughput": 8664.28, "total_tokens": 77925736} +{"current_steps": 115605, "total_steps": 204665, "loss": 0.0001, "lr": 9.482085124390892e-07, "epoch": 2.8242493831382993, "percentage": 56.48, "elapsed_time": "2:29:54", "remaining_time": "1:55:29", "throughput": 8664.31, "total_tokens": 77929064} +{"current_steps": 115610, "total_steps": 204665, "loss": 0.0004, "lr": 9.481233494773727e-07, "epoch": 2.8243715339701465, "percentage": 56.49, "elapsed_time": "2:29:54", "remaining_time": "1:55:28", "throughput": 8664.32, "total_tokens": 77932136} +{"current_steps": 115615, "total_steps": 204665, "loss": 0.063, "lr": 9.48038186892917e-07, "epoch": 2.8244936848019933, "percentage": 56.49, "elapsed_time": "2:29:54", "remaining_time": "1:55:28", "throughput": 8664.31, "total_tokens": 77934888} +{"current_steps": 115620, "total_steps": 204665, "loss": 0.0003, "lr": 9.479530246863416e-07, "epoch": 2.824615835633841, "percentage": 56.49, "elapsed_time": "2:29:55", "remaining_time": "1:55:27", "throughput": 8664.36, "total_tokens": 77938344} +{"current_steps": 115625, "total_steps": 204665, "loss": 0.0007, "lr": 9.478678628582657e-07, "epoch": 2.8247379864656876, "percentage": 56.49, "elapsed_time": "2:29:55", "remaining_time": "1:55:27", "throughput": 8664.39, "total_tokens": 77941672} +{"current_steps": 115630, "total_steps": 204665, "loss": 0.0834, "lr": 9.477827014093086e-07, "epoch": 2.8248601372975353, "percentage": 56.5, "elapsed_time": "2:29:55", "remaining_time": "1:55:26", "throughput": 8664.43, "total_tokens": 77945000} +{"current_steps": 115635, "total_steps": 204665, "loss": 0.0504, "lr": 9.476975403400902e-07, "epoch": 2.824982288129382, "percentage": 56.5, "elapsed_time": "2:29:56", "remaining_time": "1:55:26", "throughput": 8664.44, "total_tokens": 77948072} +{"current_steps": 115640, "total_steps": 204665, "loss": 0.0314, "lr": 9.476123796512288e-07, "epoch": 2.825104438961229, "percentage": 56.5, "elapsed_time": "2:29:56", "remaining_time": "1:55:26", "throughput": 8664.46, "total_tokens": 77951208} +{"current_steps": 115645, "total_steps": 204665, "loss": 0.0604, "lr": 9.475272193433448e-07, "epoch": 2.8252265897930764, "percentage": 56.5, "elapsed_time": "2:29:57", "remaining_time": "1:55:25", "throughput": 8664.52, "total_tokens": 77954792} +{"current_steps": 115650, "total_steps": 204665, "loss": 0.0883, "lr": 9.474420594170566e-07, "epoch": 2.8253487406249236, "percentage": 56.51, "elapsed_time": "2:29:57", "remaining_time": "1:55:25", "throughput": 8664.6, "total_tokens": 77958632} +{"current_steps": 115655, "total_steps": 204665, "loss": 0.034, "lr": 9.473568998729842e-07, "epoch": 2.8254708914567708, "percentage": 56.51, "elapsed_time": "2:29:57", "remaining_time": "1:55:24", "throughput": 8664.68, "total_tokens": 77962408} +{"current_steps": 115660, "total_steps": 204665, "loss": 0.0001, "lr": 9.472717407117461e-07, "epoch": 2.825593042288618, "percentage": 56.51, "elapsed_time": "2:29:58", "remaining_time": "1:55:24", "throughput": 8664.72, "total_tokens": 77965736} +{"current_steps": 115665, "total_steps": 204665, "loss": 0.0514, "lr": 9.471865819339624e-07, "epoch": 2.825715193120465, "percentage": 56.51, "elapsed_time": "2:29:58", "remaining_time": "1:55:23", "throughput": 8664.79, "total_tokens": 77969512} +{"current_steps": 115670, "total_steps": 204665, "loss": 0.1223, "lr": 9.471014235402521e-07, "epoch": 2.8258373439523123, "percentage": 56.52, "elapsed_time": "2:29:58", "remaining_time": "1:55:23", "throughput": 8664.84, "total_tokens": 77972968} +{"current_steps": 115675, "total_steps": 204665, "loss": 0.0635, "lr": 9.470162655312344e-07, "epoch": 2.8259594947841595, "percentage": 56.52, "elapsed_time": "2:29:59", "remaining_time": "1:55:23", "throughput": 8664.84, "total_tokens": 77975912} +{"current_steps": 115680, "total_steps": 204665, "loss": 0.0108, "lr": 9.46931107907529e-07, "epoch": 2.8260816456160067, "percentage": 56.52, "elapsed_time": "2:29:59", "remaining_time": "1:55:22", "throughput": 8664.87, "total_tokens": 77979176} +{"current_steps": 115685, "total_steps": 204665, "loss": 0.0002, "lr": 9.468459506697543e-07, "epoch": 2.826203796447854, "percentage": 56.52, "elapsed_time": "2:29:59", "remaining_time": "1:55:22", "throughput": 8664.93, "total_tokens": 77982760} +{"current_steps": 115690, "total_steps": 204665, "loss": 0.0003, "lr": 9.467607938185301e-07, "epoch": 2.826325947279701, "percentage": 56.53, "elapsed_time": "2:30:00", "remaining_time": "1:55:21", "throughput": 8664.98, "total_tokens": 77986216} +{"current_steps": 115695, "total_steps": 204665, "loss": 0.0501, "lr": 9.466756373544763e-07, "epoch": 2.8264480981115483, "percentage": 56.53, "elapsed_time": "2:30:00", "remaining_time": "1:55:21", "throughput": 8665.02, "total_tokens": 77989672} +{"current_steps": 115700, "total_steps": 204665, "loss": 0.1267, "lr": 9.465904812782112e-07, "epoch": 2.826570248943395, "percentage": 56.53, "elapsed_time": "2:30:00", "remaining_time": "1:55:21", "throughput": 8665.05, "total_tokens": 77992872} +{"current_steps": 115705, "total_steps": 204665, "loss": 0.0841, "lr": 9.465053255903548e-07, "epoch": 2.8266923997752427, "percentage": 56.53, "elapsed_time": "2:30:01", "remaining_time": "1:55:20", "throughput": 8665.05, "total_tokens": 77995880} +{"current_steps": 115710, "total_steps": 204665, "loss": 0.0007, "lr": 9.464201702915256e-07, "epoch": 2.8268145506070894, "percentage": 56.54, "elapsed_time": "2:30:01", "remaining_time": "1:55:20", "throughput": 8665.11, "total_tokens": 77999400} +{"current_steps": 115715, "total_steps": 204665, "loss": 0.1111, "lr": 9.463350153823438e-07, "epoch": 2.826936701438937, "percentage": 56.54, "elapsed_time": "2:30:01", "remaining_time": "1:55:19", "throughput": 8665.17, "total_tokens": 78003112} +{"current_steps": 115720, "total_steps": 204665, "loss": 0.0557, "lr": 9.462498608634281e-07, "epoch": 2.827058852270784, "percentage": 56.54, "elapsed_time": "2:30:02", "remaining_time": "1:55:19", "throughput": 8665.22, "total_tokens": 78006568} +{"current_steps": 115725, "total_steps": 204665, "loss": 0.0315, "lr": 9.461647067353975e-07, "epoch": 2.827181003102631, "percentage": 56.54, "elapsed_time": "2:30:02", "remaining_time": "1:55:18", "throughput": 8665.22, "total_tokens": 78009512} +{"current_steps": 115730, "total_steps": 204665, "loss": 0.0488, "lr": 9.460795529988723e-07, "epoch": 2.827303153934478, "percentage": 56.55, "elapsed_time": "2:30:02", "remaining_time": "1:55:18", "throughput": 8665.29, "total_tokens": 78013160} +{"current_steps": 115735, "total_steps": 204665, "loss": 0.041, "lr": 9.459943996544703e-07, "epoch": 2.8274253047663254, "percentage": 56.55, "elapsed_time": "2:30:03", "remaining_time": "1:55:18", "throughput": 8665.3, "total_tokens": 78016232} +{"current_steps": 115740, "total_steps": 204665, "loss": 0.0001, "lr": 9.459092467028122e-07, "epoch": 2.8275474555981726, "percentage": 56.55, "elapsed_time": "2:30:03", "remaining_time": "1:55:17", "throughput": 8665.35, "total_tokens": 78019688} +{"current_steps": 115745, "total_steps": 204665, "loss": 0.0002, "lr": 9.458240941445163e-07, "epoch": 2.8276696064300197, "percentage": 56.55, "elapsed_time": "2:30:03", "remaining_time": "1:55:17", "throughput": 8665.39, "total_tokens": 78023016} +{"current_steps": 115750, "total_steps": 204665, "loss": 0.0003, "lr": 9.457389419802024e-07, "epoch": 2.827791757261867, "percentage": 56.56, "elapsed_time": "2:30:04", "remaining_time": "1:55:16", "throughput": 8665.42, "total_tokens": 78026344} +{"current_steps": 115755, "total_steps": 204665, "loss": 0.0001, "lr": 9.45653790210489e-07, "epoch": 2.827913908093714, "percentage": 56.56, "elapsed_time": "2:30:04", "remaining_time": "1:55:16", "throughput": 8665.48, "total_tokens": 78029928} +{"current_steps": 115760, "total_steps": 204665, "loss": 0.0024, "lr": 9.455686388359961e-07, "epoch": 2.8280360589255613, "percentage": 56.56, "elapsed_time": "2:30:05", "remaining_time": "1:55:15", "throughput": 8665.52, "total_tokens": 78033320} +{"current_steps": 115765, "total_steps": 204665, "loss": 0.0001, "lr": 9.45483487857343e-07, "epoch": 2.8281582097574085, "percentage": 56.56, "elapsed_time": "2:30:05", "remaining_time": "1:55:15", "throughput": 8665.52, "total_tokens": 78036264} +{"current_steps": 115770, "total_steps": 204665, "loss": 0.0918, "lr": 9.453983372751484e-07, "epoch": 2.8282803605892557, "percentage": 56.57, "elapsed_time": "2:30:05", "remaining_time": "1:55:15", "throughput": 8665.56, "total_tokens": 78039656} +{"current_steps": 115775, "total_steps": 204665, "loss": 0.1312, "lr": 9.453131870900318e-07, "epoch": 2.828402511421103, "percentage": 56.57, "elapsed_time": "2:30:06", "remaining_time": "1:55:14", "throughput": 8665.56, "total_tokens": 78042600} +{"current_steps": 115780, "total_steps": 204665, "loss": 0.032, "lr": 9.452280373026125e-07, "epoch": 2.82852466225295, "percentage": 56.57, "elapsed_time": "2:30:06", "remaining_time": "1:55:14", "throughput": 8665.56, "total_tokens": 78045544} +{"current_steps": 115785, "total_steps": 204665, "loss": 0.0007, "lr": 9.451428879135093e-07, "epoch": 2.8286468130847973, "percentage": 56.57, "elapsed_time": "2:30:06", "remaining_time": "1:55:13", "throughput": 8665.56, "total_tokens": 78048424} +{"current_steps": 115790, "total_steps": 204665, "loss": 0.0537, "lr": 9.450577389233423e-07, "epoch": 2.8287689639166445, "percentage": 56.58, "elapsed_time": "2:30:07", "remaining_time": "1:55:13", "throughput": 8665.59, "total_tokens": 78051688} +{"current_steps": 115795, "total_steps": 204665, "loss": 0.0318, "lr": 9.449725903327297e-07, "epoch": 2.828891114748491, "percentage": 56.58, "elapsed_time": "2:30:07", "remaining_time": "1:55:13", "throughput": 8665.65, "total_tokens": 78055272} +{"current_steps": 115800, "total_steps": 204665, "loss": 0.0493, "lr": 9.448874421422916e-07, "epoch": 2.829013265580339, "percentage": 56.58, "elapsed_time": "2:30:07", "remaining_time": "1:55:12", "throughput": 8665.74, "total_tokens": 78059240} +{"current_steps": 115805, "total_steps": 204665, "loss": 0.0586, "lr": 9.448022943526466e-07, "epoch": 2.8291354164121856, "percentage": 56.58, "elapsed_time": "2:30:08", "remaining_time": "1:55:12", "throughput": 8665.78, "total_tokens": 78062568} +{"current_steps": 115810, "total_steps": 204665, "loss": 0.0004, "lr": 9.447171469644144e-07, "epoch": 2.8292575672440328, "percentage": 56.59, "elapsed_time": "2:30:08", "remaining_time": "1:55:11", "throughput": 8665.83, "total_tokens": 78066152} +{"current_steps": 115815, "total_steps": 204665, "loss": 0.0, "lr": 9.446319999782136e-07, "epoch": 2.82937971807588, "percentage": 56.59, "elapsed_time": "2:30:08", "remaining_time": "1:55:11", "throughput": 8665.85, "total_tokens": 78069288} +{"current_steps": 115820, "total_steps": 204665, "loss": 0.0536, "lr": 9.445468533946641e-07, "epoch": 2.829501868907727, "percentage": 56.59, "elapsed_time": "2:30:09", "remaining_time": "1:55:10", "throughput": 8665.91, "total_tokens": 78072872} +{"current_steps": 115825, "total_steps": 204665, "loss": 0.0002, "lr": 9.444617072143848e-07, "epoch": 2.8296240197395743, "percentage": 56.59, "elapsed_time": "2:30:09", "remaining_time": "1:55:10", "throughput": 8665.95, "total_tokens": 78076200} +{"current_steps": 115830, "total_steps": 204665, "loss": 0.0005, "lr": 9.443765614379948e-07, "epoch": 2.8297461705714215, "percentage": 56.59, "elapsed_time": "2:30:09", "remaining_time": "1:55:10", "throughput": 8665.96, "total_tokens": 78079336} +{"current_steps": 115835, "total_steps": 204665, "loss": 0.0002, "lr": 9.442914160661137e-07, "epoch": 2.8298683214032687, "percentage": 56.6, "elapsed_time": "2:30:10", "remaining_time": "1:55:09", "throughput": 8666.01, "total_tokens": 78082792} +{"current_steps": 115840, "total_steps": 204665, "loss": 0.0349, "lr": 9.442062710993599e-07, "epoch": 2.829990472235116, "percentage": 56.6, "elapsed_time": "2:30:10", "remaining_time": "1:55:09", "throughput": 8666.11, "total_tokens": 78086824} +{"current_steps": 115845, "total_steps": 204665, "loss": 0.1367, "lr": 9.44121126538353e-07, "epoch": 2.830112623066963, "percentage": 56.6, "elapsed_time": "2:30:10", "remaining_time": "1:55:08", "throughput": 8666.19, "total_tokens": 78090664} +{"current_steps": 115850, "total_steps": 204665, "loss": 0.0001, "lr": 9.44035982383713e-07, "epoch": 2.8302347738988103, "percentage": 56.6, "elapsed_time": "2:30:11", "remaining_time": "1:55:08", "throughput": 8666.21, "total_tokens": 78093800} +{"current_steps": 115855, "total_steps": 204665, "loss": 0.2, "lr": 9.439508386360577e-07, "epoch": 2.8303569247306575, "percentage": 56.61, "elapsed_time": "2:30:11", "remaining_time": "1:55:07", "throughput": 8666.25, "total_tokens": 78097192} +{"current_steps": 115860, "total_steps": 204665, "loss": 0.0004, "lr": 9.438656952960076e-07, "epoch": 2.8304790755625047, "percentage": 56.61, "elapsed_time": "2:30:11", "remaining_time": "1:55:07", "throughput": 8666.29, "total_tokens": 78100584} +{"current_steps": 115865, "total_steps": 204665, "loss": 0.0002, "lr": 9.437805523641808e-07, "epoch": 2.830601226394352, "percentage": 56.61, "elapsed_time": "2:30:12", "remaining_time": "1:55:07", "throughput": 8666.3, "total_tokens": 78103656} +{"current_steps": 115870, "total_steps": 204665, "loss": 0.0003, "lr": 9.436954098411973e-07, "epoch": 2.830723377226199, "percentage": 56.61, "elapsed_time": "2:30:12", "remaining_time": "1:55:06", "throughput": 8666.32, "total_tokens": 78106792} +{"current_steps": 115875, "total_steps": 204665, "loss": 0.0003, "lr": 9.436102677276757e-07, "epoch": 2.8308455280580462, "percentage": 56.62, "elapsed_time": "2:30:13", "remaining_time": "1:55:06", "throughput": 8666.33, "total_tokens": 78109864} +{"current_steps": 115880, "total_steps": 204665, "loss": 0.1042, "lr": 9.435251260242352e-07, "epoch": 2.830967678889893, "percentage": 56.62, "elapsed_time": "2:30:13", "remaining_time": "1:55:05", "throughput": 8666.35, "total_tokens": 78113000} +{"current_steps": 115885, "total_steps": 204665, "loss": 0.0387, "lr": 9.434399847314958e-07, "epoch": 2.8310898297217406, "percentage": 56.62, "elapsed_time": "2:30:13", "remaining_time": "1:55:05", "throughput": 8666.36, "total_tokens": 78116072} +{"current_steps": 115890, "total_steps": 204665, "loss": 0.0591, "lr": 9.433548438500753e-07, "epoch": 2.8312119805535874, "percentage": 56.62, "elapsed_time": "2:30:14", "remaining_time": "1:55:05", "throughput": 8666.4, "total_tokens": 78119400} +{"current_steps": 115895, "total_steps": 204665, "loss": 0.0005, "lr": 9.432697033805943e-07, "epoch": 2.831334131385435, "percentage": 56.63, "elapsed_time": "2:30:14", "remaining_time": "1:55:04", "throughput": 8666.45, "total_tokens": 78122856} +{"current_steps": 115900, "total_steps": 204665, "loss": 0.0006, "lr": 9.431845633236707e-07, "epoch": 2.8314562822172817, "percentage": 56.63, "elapsed_time": "2:30:14", "remaining_time": "1:55:04", "throughput": 8666.51, "total_tokens": 78126440} +{"current_steps": 115905, "total_steps": 204665, "loss": 0.0405, "lr": 9.430994236799247e-07, "epoch": 2.831578433049129, "percentage": 56.63, "elapsed_time": "2:30:15", "remaining_time": "1:55:03", "throughput": 8666.56, "total_tokens": 78130024} +{"current_steps": 115910, "total_steps": 204665, "loss": 0.0005, "lr": 9.430142844499746e-07, "epoch": 2.831700583880976, "percentage": 56.63, "elapsed_time": "2:30:15", "remaining_time": "1:55:03", "throughput": 8666.59, "total_tokens": 78133288} +{"current_steps": 115915, "total_steps": 204665, "loss": 0.0001, "lr": 9.429291456344398e-07, "epoch": 2.8318227347128233, "percentage": 56.64, "elapsed_time": "2:30:15", "remaining_time": "1:55:02", "throughput": 8666.6, "total_tokens": 78136296} +{"current_steps": 115920, "total_steps": 204665, "loss": 0.0636, "lr": 9.428440072339402e-07, "epoch": 2.8319448855446705, "percentage": 56.64, "elapsed_time": "2:30:16", "remaining_time": "1:55:02", "throughput": 8666.64, "total_tokens": 78139624} +{"current_steps": 115925, "total_steps": 204665, "loss": 0.0001, "lr": 9.42758869249094e-07, "epoch": 2.8320670363765177, "percentage": 56.64, "elapsed_time": "2:30:16", "remaining_time": "1:55:02", "throughput": 8666.68, "total_tokens": 78143080} +{"current_steps": 115930, "total_steps": 204665, "loss": 0.0874, "lr": 9.426737316805209e-07, "epoch": 2.832189187208365, "percentage": 56.64, "elapsed_time": "2:30:16", "remaining_time": "1:55:01", "throughput": 8666.69, "total_tokens": 78146088} +{"current_steps": 115935, "total_steps": 204665, "loss": 0.0012, "lr": 9.425885945288397e-07, "epoch": 2.832311338040212, "percentage": 56.65, "elapsed_time": "2:30:17", "remaining_time": "1:55:01", "throughput": 8666.76, "total_tokens": 78149800} +{"current_steps": 115940, "total_steps": 204665, "loss": 0.0344, "lr": 9.425034577946696e-07, "epoch": 2.8324334888720593, "percentage": 56.65, "elapsed_time": "2:30:17", "remaining_time": "1:55:00", "throughput": 8666.78, "total_tokens": 78152936} +{"current_steps": 115945, "total_steps": 204665, "loss": 0.0427, "lr": 9.424183214786301e-07, "epoch": 2.8325556397039064, "percentage": 56.65, "elapsed_time": "2:30:17", "remaining_time": "1:55:00", "throughput": 8666.79, "total_tokens": 78156008} +{"current_steps": 115950, "total_steps": 204665, "loss": 0.0006, "lr": 9.423331855813396e-07, "epoch": 2.8326777905357536, "percentage": 56.65, "elapsed_time": "2:30:18", "remaining_time": "1:54:59", "throughput": 8666.83, "total_tokens": 78159336} +{"current_steps": 115955, "total_steps": 204665, "loss": 0.0478, "lr": 9.422480501034183e-07, "epoch": 2.832799941367601, "percentage": 56.66, "elapsed_time": "2:30:18", "remaining_time": "1:54:59", "throughput": 8666.84, "total_tokens": 78162344} +{"current_steps": 115960, "total_steps": 204665, "loss": 0.0656, "lr": 9.421629150454841e-07, "epoch": 2.832922092199448, "percentage": 56.66, "elapsed_time": "2:30:18", "remaining_time": "1:54:59", "throughput": 8666.86, "total_tokens": 78165480} +{"current_steps": 115965, "total_steps": 204665, "loss": 0.1134, "lr": 9.420777804081572e-07, "epoch": 2.833044243031295, "percentage": 56.66, "elapsed_time": "2:30:19", "remaining_time": "1:54:58", "throughput": 8666.88, "total_tokens": 78168616} +{"current_steps": 115970, "total_steps": 204665, "loss": 0.0411, "lr": 9.419926461920559e-07, "epoch": 2.8331663938631424, "percentage": 56.66, "elapsed_time": "2:30:19", "remaining_time": "1:54:58", "throughput": 8666.95, "total_tokens": 78172392} +{"current_steps": 115975, "total_steps": 204665, "loss": 0.03, "lr": 9.419075123977999e-07, "epoch": 2.833288544694989, "percentage": 56.67, "elapsed_time": "2:30:19", "remaining_time": "1:54:57", "throughput": 8666.97, "total_tokens": 78175528} +{"current_steps": 115980, "total_steps": 204665, "loss": 0.0001, "lr": 9.41822379026008e-07, "epoch": 2.8334106955268368, "percentage": 56.67, "elapsed_time": "2:30:20", "remaining_time": "1:54:57", "throughput": 8667.03, "total_tokens": 78179176} +{"current_steps": 115985, "total_steps": 204665, "loss": 0.0004, "lr": 9.417372460772994e-07, "epoch": 2.8335328463586835, "percentage": 56.67, "elapsed_time": "2:30:20", "remaining_time": "1:54:57", "throughput": 8667.07, "total_tokens": 78182504} +{"current_steps": 115990, "total_steps": 204665, "loss": 0.0001, "lr": 9.416521135522936e-07, "epoch": 2.8336549971905307, "percentage": 56.67, "elapsed_time": "2:30:20", "remaining_time": "1:54:56", "throughput": 8667.12, "total_tokens": 78185960} +{"current_steps": 115995, "total_steps": 204665, "loss": 0.0012, "lr": 9.415669814516087e-07, "epoch": 2.833777148022378, "percentage": 56.68, "elapsed_time": "2:30:21", "remaining_time": "1:54:56", "throughput": 8667.16, "total_tokens": 78189416} +{"current_steps": 116000, "total_steps": 204665, "loss": 0.0218, "lr": 9.414818497758645e-07, "epoch": 2.833899298854225, "percentage": 56.68, "elapsed_time": "2:30:21", "remaining_time": "1:54:55", "throughput": 8667.22, "total_tokens": 78192936} +{"current_steps": 116005, "total_steps": 204665, "loss": 0.0002, "lr": 9.413967185256806e-07, "epoch": 2.8340214496860723, "percentage": 56.68, "elapsed_time": "2:30:22", "remaining_time": "1:54:55", "throughput": 8667.25, "total_tokens": 78196200} +{"current_steps": 116010, "total_steps": 204665, "loss": 0.0003, "lr": 9.413115877016749e-07, "epoch": 2.8341436005179195, "percentage": 56.68, "elapsed_time": "2:30:22", "remaining_time": "1:54:54", "throughput": 8667.34, "total_tokens": 78200104} +{"current_steps": 116015, "total_steps": 204665, "loss": 0.092, "lr": 9.412264573044676e-07, "epoch": 2.8342657513497667, "percentage": 56.69, "elapsed_time": "2:30:22", "remaining_time": "1:54:54", "throughput": 8667.37, "total_tokens": 78203432} +{"current_steps": 116020, "total_steps": 204665, "loss": 0.0005, "lr": 9.411413273346769e-07, "epoch": 2.834387902181614, "percentage": 56.69, "elapsed_time": "2:30:23", "remaining_time": "1:54:54", "throughput": 8667.41, "total_tokens": 78206760} +{"current_steps": 116025, "total_steps": 204665, "loss": 0.0717, "lr": 9.410561977929228e-07, "epoch": 2.834510053013461, "percentage": 56.69, "elapsed_time": "2:30:23", "remaining_time": "1:54:53", "throughput": 8667.48, "total_tokens": 78210536} +{"current_steps": 116030, "total_steps": 204665, "loss": 0.1138, "lr": 9.409710686798236e-07, "epoch": 2.8346322038453082, "percentage": 56.69, "elapsed_time": "2:30:23", "remaining_time": "1:54:53", "throughput": 8667.52, "total_tokens": 78213864} +{"current_steps": 116035, "total_steps": 204665, "loss": 0.0712, "lr": 9.408859399959984e-07, "epoch": 2.8347543546771554, "percentage": 56.7, "elapsed_time": "2:30:24", "remaining_time": "1:54:52", "throughput": 8667.53, "total_tokens": 78216936} +{"current_steps": 116040, "total_steps": 204665, "loss": 0.0003, "lr": 9.408008117420671e-07, "epoch": 2.8348765055090026, "percentage": 56.7, "elapsed_time": "2:30:24", "remaining_time": "1:54:52", "throughput": 8667.55, "total_tokens": 78220200} +{"current_steps": 116045, "total_steps": 204665, "loss": 0.0403, "lr": 9.407156839186477e-07, "epoch": 2.83499865634085, "percentage": 56.7, "elapsed_time": "2:30:24", "remaining_time": "1:54:51", "throughput": 8667.58, "total_tokens": 78223464} +{"current_steps": 116050, "total_steps": 204665, "loss": 0.0548, "lr": 9.406305565263602e-07, "epoch": 2.835120807172697, "percentage": 56.7, "elapsed_time": "2:30:25", "remaining_time": "1:54:51", "throughput": 8667.65, "total_tokens": 78227176} +{"current_steps": 116055, "total_steps": 204665, "loss": 0.0007, "lr": 9.405454295658229e-07, "epoch": 2.835242958004544, "percentage": 56.7, "elapsed_time": "2:30:25", "remaining_time": "1:54:51", "throughput": 8667.7, "total_tokens": 78230696} +{"current_steps": 116060, "total_steps": 204665, "loss": 0.0455, "lr": 9.404603030376555e-07, "epoch": 2.835365108836391, "percentage": 56.71, "elapsed_time": "2:30:25", "remaining_time": "1:54:50", "throughput": 8667.7, "total_tokens": 78233640} +{"current_steps": 116065, "total_steps": 204665, "loss": 0.0569, "lr": 9.403751769424765e-07, "epoch": 2.8354872596682386, "percentage": 56.71, "elapsed_time": "2:30:26", "remaining_time": "1:54:50", "throughput": 8667.79, "total_tokens": 78237480} +{"current_steps": 116070, "total_steps": 204665, "loss": 0.0715, "lr": 9.402900512809052e-07, "epoch": 2.8356094105000853, "percentage": 56.71, "elapsed_time": "2:30:26", "remaining_time": "1:54:49", "throughput": 8667.81, "total_tokens": 78240680} +{"current_steps": 116075, "total_steps": 204665, "loss": 0.0405, "lr": 9.402049260535613e-07, "epoch": 2.835731561331933, "percentage": 56.71, "elapsed_time": "2:30:26", "remaining_time": "1:54:49", "throughput": 8667.86, "total_tokens": 78244136} +{"current_steps": 116080, "total_steps": 204665, "loss": 0.0513, "lr": 9.401198012610628e-07, "epoch": 2.8358537121637797, "percentage": 56.72, "elapsed_time": "2:30:27", "remaining_time": "1:54:49", "throughput": 8667.93, "total_tokens": 78247912} +{"current_steps": 116085, "total_steps": 204665, "loss": 0.0001, "lr": 9.400346769040294e-07, "epoch": 2.835975862995627, "percentage": 56.72, "elapsed_time": "2:30:27", "remaining_time": "1:54:48", "throughput": 8667.96, "total_tokens": 78251176} +{"current_steps": 116090, "total_steps": 204665, "loss": 0.0773, "lr": 9.399495529830798e-07, "epoch": 2.836098013827474, "percentage": 56.72, "elapsed_time": "2:30:27", "remaining_time": "1:54:48", "throughput": 8667.96, "total_tokens": 78254120} +{"current_steps": 116095, "total_steps": 204665, "loss": 0.1131, "lr": 9.398644294988332e-07, "epoch": 2.8362201646593213, "percentage": 56.72, "elapsed_time": "2:30:28", "remaining_time": "1:54:47", "throughput": 8667.98, "total_tokens": 78257256} +{"current_steps": 116100, "total_steps": 204665, "loss": 0.0006, "lr": 9.397793064519088e-07, "epoch": 2.8363423154911684, "percentage": 56.73, "elapsed_time": "2:30:28", "remaining_time": "1:54:47", "throughput": 8668.07, "total_tokens": 78261160} +{"current_steps": 116105, "total_steps": 204665, "loss": 0.0003, "lr": 9.396941838429253e-07, "epoch": 2.8364644663230156, "percentage": 56.73, "elapsed_time": "2:30:29", "remaining_time": "1:54:46", "throughput": 8668.11, "total_tokens": 78264488} +{"current_steps": 116110, "total_steps": 204665, "loss": 0.0073, "lr": 9.396090616725022e-07, "epoch": 2.836586617154863, "percentage": 56.73, "elapsed_time": "2:30:29", "remaining_time": "1:54:46", "throughput": 8668.13, "total_tokens": 78267624} +{"current_steps": 116115, "total_steps": 204665, "loss": 0.0315, "lr": 9.395239399412579e-07, "epoch": 2.83670876798671, "percentage": 56.73, "elapsed_time": "2:30:29", "remaining_time": "1:54:46", "throughput": 8668.17, "total_tokens": 78271016} +{"current_steps": 116120, "total_steps": 204665, "loss": 0.0016, "lr": 9.394388186498121e-07, "epoch": 2.836830918818557, "percentage": 56.74, "elapsed_time": "2:30:30", "remaining_time": "1:54:45", "throughput": 8668.22, "total_tokens": 78274536} +{"current_steps": 116125, "total_steps": 204665, "loss": 0.0001, "lr": 9.393536977987831e-07, "epoch": 2.8369530696504044, "percentage": 56.74, "elapsed_time": "2:30:30", "remaining_time": "1:54:45", "throughput": 8668.32, "total_tokens": 78278504} +{"current_steps": 116130, "total_steps": 204665, "loss": 0.0003, "lr": 9.392685773887907e-07, "epoch": 2.8370752204822516, "percentage": 56.74, "elapsed_time": "2:30:30", "remaining_time": "1:54:44", "throughput": 8668.3, "total_tokens": 78281256} +{"current_steps": 116135, "total_steps": 204665, "loss": 0.103, "lr": 9.391834574204534e-07, "epoch": 2.8371973713140988, "percentage": 56.74, "elapsed_time": "2:30:31", "remaining_time": "1:54:44", "throughput": 8668.32, "total_tokens": 78284392} +{"current_steps": 116140, "total_steps": 204665, "loss": 0.0001, "lr": 9.390983378943903e-07, "epoch": 2.837319522145946, "percentage": 56.75, "elapsed_time": "2:30:31", "remaining_time": "1:54:44", "throughput": 8668.39, "total_tokens": 78288104} +{"current_steps": 116145, "total_steps": 204665, "loss": 0.0001, "lr": 9.390132188112207e-07, "epoch": 2.8374416729777927, "percentage": 56.75, "elapsed_time": "2:30:31", "remaining_time": "1:54:43", "throughput": 8668.46, "total_tokens": 78291944} +{"current_steps": 116150, "total_steps": 204665, "loss": 0.0004, "lr": 9.389281001715631e-07, "epoch": 2.8375638238096403, "percentage": 56.75, "elapsed_time": "2:30:32", "remaining_time": "1:54:43", "throughput": 8668.52, "total_tokens": 78295464} +{"current_steps": 116155, "total_steps": 204665, "loss": 0.0004, "lr": 9.388429819760367e-07, "epoch": 2.837685974641487, "percentage": 56.75, "elapsed_time": "2:30:32", "remaining_time": "1:54:42", "throughput": 8668.53, "total_tokens": 78298536} +{"current_steps": 116160, "total_steps": 204665, "loss": 0.0002, "lr": 9.38757864225261e-07, "epoch": 2.8378081254733347, "percentage": 56.76, "elapsed_time": "2:30:32", "remaining_time": "1:54:42", "throughput": 8668.6, "total_tokens": 78302184} +{"current_steps": 116165, "total_steps": 204665, "loss": 0.0452, "lr": 9.386727469198541e-07, "epoch": 2.8379302763051815, "percentage": 56.76, "elapsed_time": "2:30:33", "remaining_time": "1:54:41", "throughput": 8668.65, "total_tokens": 78305704} +{"current_steps": 116170, "total_steps": 204665, "loss": 0.036, "lr": 9.385876300604359e-07, "epoch": 2.8380524271370287, "percentage": 56.76, "elapsed_time": "2:30:33", "remaining_time": "1:54:41", "throughput": 8668.7, "total_tokens": 78309160} +{"current_steps": 116175, "total_steps": 204665, "loss": 0.0001, "lr": 9.385025136476246e-07, "epoch": 2.838174577968876, "percentage": 56.76, "elapsed_time": "2:30:33", "remaining_time": "1:54:41", "throughput": 8668.76, "total_tokens": 78312744} +{"current_steps": 116180, "total_steps": 204665, "loss": 0.0361, "lr": 9.3841739768204e-07, "epoch": 2.838296728800723, "percentage": 56.77, "elapsed_time": "2:30:34", "remaining_time": "1:54:40", "throughput": 8668.8, "total_tokens": 78316136} +{"current_steps": 116185, "total_steps": 204665, "loss": 0.0304, "lr": 9.383322821643003e-07, "epoch": 2.8384188796325702, "percentage": 56.77, "elapsed_time": "2:30:34", "remaining_time": "1:54:40", "throughput": 8668.87, "total_tokens": 78319784} +{"current_steps": 116190, "total_steps": 204665, "loss": 0.0021, "lr": 9.382471670950248e-07, "epoch": 2.8385410304644174, "percentage": 56.77, "elapsed_time": "2:30:34", "remaining_time": "1:54:39", "throughput": 8668.87, "total_tokens": 78322728} +{"current_steps": 116195, "total_steps": 204665, "loss": 0.0001, "lr": 9.38162052474833e-07, "epoch": 2.8386631812962646, "percentage": 56.77, "elapsed_time": "2:30:35", "remaining_time": "1:54:39", "throughput": 8668.93, "total_tokens": 78326312} +{"current_steps": 116200, "total_steps": 204665, "loss": 0.0001, "lr": 9.380769383043428e-07, "epoch": 2.838785332128112, "percentage": 56.78, "elapsed_time": "2:30:35", "remaining_time": "1:54:38", "throughput": 8668.98, "total_tokens": 78329832} +{"current_steps": 116205, "total_steps": 204665, "loss": 0.066, "lr": 9.379918245841741e-07, "epoch": 2.838907482959959, "percentage": 56.78, "elapsed_time": "2:30:35", "remaining_time": "1:54:38", "throughput": 8669.01, "total_tokens": 78333096} +{"current_steps": 116210, "total_steps": 204665, "loss": 0.0003, "lr": 9.379067113149452e-07, "epoch": 2.839029633791806, "percentage": 56.78, "elapsed_time": "2:30:36", "remaining_time": "1:54:38", "throughput": 8669.06, "total_tokens": 78336552} +{"current_steps": 116215, "total_steps": 204665, "loss": 0.1872, "lr": 9.378215984972759e-07, "epoch": 2.8391517846236534, "percentage": 56.78, "elapsed_time": "2:30:36", "remaining_time": "1:54:37", "throughput": 8669.15, "total_tokens": 78340584} +{"current_steps": 116220, "total_steps": 204665, "loss": 0.0005, "lr": 9.377364861317843e-07, "epoch": 2.8392739354555006, "percentage": 56.79, "elapsed_time": "2:30:37", "remaining_time": "1:54:37", "throughput": 8669.27, "total_tokens": 78344744} +{"current_steps": 116225, "total_steps": 204665, "loss": 0.0476, "lr": 9.376513742190896e-07, "epoch": 2.8393960862873477, "percentage": 56.79, "elapsed_time": "2:30:37", "remaining_time": "1:54:36", "throughput": 8669.31, "total_tokens": 78348136} +{"current_steps": 116230, "total_steps": 204665, "loss": 0.0004, "lr": 9.375662627598113e-07, "epoch": 2.839518237119195, "percentage": 56.79, "elapsed_time": "2:30:37", "remaining_time": "1:54:36", "throughput": 8669.34, "total_tokens": 78351400} +{"current_steps": 116235, "total_steps": 204665, "loss": 0.0415, "lr": 9.374811517545677e-07, "epoch": 2.839640387951042, "percentage": 56.79, "elapsed_time": "2:30:38", "remaining_time": "1:54:36", "throughput": 8669.36, "total_tokens": 78354536} +{"current_steps": 116240, "total_steps": 204665, "loss": 0.1079, "lr": 9.37396041203978e-07, "epoch": 2.839762538782889, "percentage": 56.8, "elapsed_time": "2:30:38", "remaining_time": "1:54:35", "throughput": 8669.39, "total_tokens": 78357864} +{"current_steps": 116245, "total_steps": 204665, "loss": 0.0373, "lr": 9.373109311086612e-07, "epoch": 2.8398846896147365, "percentage": 56.8, "elapsed_time": "2:30:38", "remaining_time": "1:54:35", "throughput": 8669.46, "total_tokens": 78361576} +{"current_steps": 116250, "total_steps": 204665, "loss": 0.1102, "lr": 9.372258214692358e-07, "epoch": 2.8400068404465832, "percentage": 56.8, "elapsed_time": "2:30:39", "remaining_time": "1:54:34", "throughput": 8669.51, "total_tokens": 78365032} +{"current_steps": 116255, "total_steps": 204665, "loss": 0.0763, "lr": 9.371407122863217e-07, "epoch": 2.840128991278431, "percentage": 56.8, "elapsed_time": "2:30:39", "remaining_time": "1:54:34", "throughput": 8669.53, "total_tokens": 78368232} +{"current_steps": 116260, "total_steps": 204665, "loss": 0.1492, "lr": 9.370556035605366e-07, "epoch": 2.8402511421102776, "percentage": 56.81, "elapsed_time": "2:30:39", "remaining_time": "1:54:33", "throughput": 8669.56, "total_tokens": 78371432} +{"current_steps": 116265, "total_steps": 204665, "loss": 0.048, "lr": 9.369704952925007e-07, "epoch": 2.840373292942125, "percentage": 56.81, "elapsed_time": "2:30:40", "remaining_time": "1:54:33", "throughput": 8669.6, "total_tokens": 78374824} +{"current_steps": 116270, "total_steps": 204665, "loss": 0.0225, "lr": 9.368853874828318e-07, "epoch": 2.840495443773972, "percentage": 56.81, "elapsed_time": "2:30:40", "remaining_time": "1:54:33", "throughput": 8669.63, "total_tokens": 78378088} +{"current_steps": 116275, "total_steps": 204665, "loss": 0.0001, "lr": 9.368002801321499e-07, "epoch": 2.840617594605819, "percentage": 56.81, "elapsed_time": "2:30:40", "remaining_time": "1:54:32", "throughput": 8669.68, "total_tokens": 78381608} +{"current_steps": 116280, "total_steps": 204665, "loss": 0.0282, "lr": 9.367151732410727e-07, "epoch": 2.8407397454376664, "percentage": 56.81, "elapsed_time": "2:30:41", "remaining_time": "1:54:32", "throughput": 8669.72, "total_tokens": 78384936} +{"current_steps": 116285, "total_steps": 204665, "loss": 0.0012, "lr": 9.366300668102201e-07, "epoch": 2.8408618962695136, "percentage": 56.82, "elapsed_time": "2:30:41", "remaining_time": "1:54:31", "throughput": 8669.74, "total_tokens": 78388136} +{"current_steps": 116290, "total_steps": 204665, "loss": 0.0479, "lr": 9.365449608402107e-07, "epoch": 2.8409840471013608, "percentage": 56.82, "elapsed_time": "2:30:41", "remaining_time": "1:54:31", "throughput": 8669.8, "total_tokens": 78391784} +{"current_steps": 116295, "total_steps": 204665, "loss": 0.001, "lr": 9.364598553316635e-07, "epoch": 2.841106197933208, "percentage": 56.82, "elapsed_time": "2:30:42", "remaining_time": "1:54:31", "throughput": 8669.81, "total_tokens": 78394792} +{"current_steps": 116300, "total_steps": 204665, "loss": 0.0002, "lr": 9.363747502851975e-07, "epoch": 2.841228348765055, "percentage": 56.82, "elapsed_time": "2:30:42", "remaining_time": "1:54:30", "throughput": 8669.88, "total_tokens": 78398440} +{"current_steps": 116305, "total_steps": 204665, "loss": 0.0001, "lr": 9.36289645701431e-07, "epoch": 2.8413504995969023, "percentage": 56.83, "elapsed_time": "2:30:42", "remaining_time": "1:54:30", "throughput": 8669.94, "total_tokens": 78402088} +{"current_steps": 116310, "total_steps": 204665, "loss": 0.0002, "lr": 9.362045415809837e-07, "epoch": 2.8414726504287495, "percentage": 56.83, "elapsed_time": "2:30:43", "remaining_time": "1:54:29", "throughput": 8669.98, "total_tokens": 78405480} +{"current_steps": 116315, "total_steps": 204665, "loss": 0.026, "lr": 9.361194379244738e-07, "epoch": 2.8415948012605967, "percentage": 56.83, "elapsed_time": "2:30:43", "remaining_time": "1:54:29", "throughput": 8670.02, "total_tokens": 78408808} +{"current_steps": 116320, "total_steps": 204665, "loss": 0.0271, "lr": 9.360343347325204e-07, "epoch": 2.841716952092444, "percentage": 56.83, "elapsed_time": "2:30:44", "remaining_time": "1:54:28", "throughput": 8670.06, "total_tokens": 78412200} +{"current_steps": 116325, "total_steps": 204665, "loss": 0.0005, "lr": 9.359492320057431e-07, "epoch": 2.8418391029242906, "percentage": 56.84, "elapsed_time": "2:30:44", "remaining_time": "1:54:28", "throughput": 8670.11, "total_tokens": 78415656} +{"current_steps": 116330, "total_steps": 204665, "loss": 0.0516, "lr": 9.358641297447596e-07, "epoch": 2.8419612537561383, "percentage": 56.84, "elapsed_time": "2:30:44", "remaining_time": "1:54:28", "throughput": 8670.21, "total_tokens": 78419752} +{"current_steps": 116335, "total_steps": 204665, "loss": 0.0686, "lr": 9.357790279501901e-07, "epoch": 2.842083404587985, "percentage": 56.84, "elapsed_time": "2:30:45", "remaining_time": "1:54:27", "throughput": 8670.22, "total_tokens": 78422760} +{"current_steps": 116340, "total_steps": 204665, "loss": 0.0587, "lr": 9.35693926622652e-07, "epoch": 2.8422055554198327, "percentage": 56.84, "elapsed_time": "2:30:45", "remaining_time": "1:54:27", "throughput": 8670.24, "total_tokens": 78425960} +{"current_steps": 116345, "total_steps": 204665, "loss": 0.0005, "lr": 9.356088257627655e-07, "epoch": 2.8423277062516794, "percentage": 56.85, "elapsed_time": "2:30:45", "remaining_time": "1:54:26", "throughput": 8670.27, "total_tokens": 78429160} +{"current_steps": 116350, "total_steps": 204665, "loss": 0.0003, "lr": 9.355237253711489e-07, "epoch": 2.8424498570835266, "percentage": 56.85, "elapsed_time": "2:30:46", "remaining_time": "1:54:26", "throughput": 8670.32, "total_tokens": 78432680} +{"current_steps": 116355, "total_steps": 204665, "loss": 0.08, "lr": 9.354386254484207e-07, "epoch": 2.842572007915374, "percentage": 56.85, "elapsed_time": "2:30:46", "remaining_time": "1:54:25", "throughput": 8670.33, "total_tokens": 78435688} +{"current_steps": 116360, "total_steps": 204665, "loss": 0.0001, "lr": 9.353535259952009e-07, "epoch": 2.842694158747221, "percentage": 56.85, "elapsed_time": "2:30:46", "remaining_time": "1:54:25", "throughput": 8670.37, "total_tokens": 78439144} +{"current_steps": 116365, "total_steps": 204665, "loss": 0.0002, "lr": 9.35268427012107e-07, "epoch": 2.842816309579068, "percentage": 56.86, "elapsed_time": "2:30:47", "remaining_time": "1:54:25", "throughput": 8670.4, "total_tokens": 78442408} +{"current_steps": 116370, "total_steps": 204665, "loss": 0.0001, "lr": 9.351833284997589e-07, "epoch": 2.8429384604109154, "percentage": 56.86, "elapsed_time": "2:30:47", "remaining_time": "1:54:24", "throughput": 8670.43, "total_tokens": 78445608} +{"current_steps": 116375, "total_steps": 204665, "loss": 0.0001, "lr": 9.350982304587746e-07, "epoch": 2.8430606112427625, "percentage": 56.86, "elapsed_time": "2:30:47", "remaining_time": "1:54:24", "throughput": 8670.47, "total_tokens": 78449064} +{"current_steps": 116380, "total_steps": 204665, "loss": 0.0079, "lr": 9.350131328897736e-07, "epoch": 2.8431827620746097, "percentage": 56.86, "elapsed_time": "2:30:48", "remaining_time": "1:54:23", "throughput": 8670.5, "total_tokens": 78452328} +{"current_steps": 116385, "total_steps": 204665, "loss": 0.0005, "lr": 9.34928035793375e-07, "epoch": 2.843304912906457, "percentage": 56.87, "elapsed_time": "2:30:48", "remaining_time": "1:54:23", "throughput": 8670.55, "total_tokens": 78455784} +{"current_steps": 116390, "total_steps": 204665, "loss": 0.0002, "lr": 9.348429391701969e-07, "epoch": 2.843427063738304, "percentage": 56.87, "elapsed_time": "2:30:48", "remaining_time": "1:54:23", "throughput": 8670.57, "total_tokens": 78458920} +{"current_steps": 116395, "total_steps": 204665, "loss": 0.0007, "lr": 9.347578430208586e-07, "epoch": 2.8435492145701513, "percentage": 56.87, "elapsed_time": "2:30:49", "remaining_time": "1:54:22", "throughput": 8670.6, "total_tokens": 78462248} +{"current_steps": 116400, "total_steps": 204665, "loss": 0.0003, "lr": 9.346727473459787e-07, "epoch": 2.8436713654019985, "percentage": 56.87, "elapsed_time": "2:30:49", "remaining_time": "1:54:22", "throughput": 8670.65, "total_tokens": 78465768} +{"current_steps": 116405, "total_steps": 204665, "loss": 0.0006, "lr": 9.34587652146176e-07, "epoch": 2.8437935162338457, "percentage": 56.88, "elapsed_time": "2:30:49", "remaining_time": "1:54:21", "throughput": 8670.74, "total_tokens": 78469672} +{"current_steps": 116410, "total_steps": 204665, "loss": 0.0397, "lr": 9.345025574220698e-07, "epoch": 2.843915667065693, "percentage": 56.88, "elapsed_time": "2:30:50", "remaining_time": "1:54:21", "throughput": 8670.75, "total_tokens": 78472744} +{"current_steps": 116415, "total_steps": 204665, "loss": 0.0002, "lr": 9.344174631742782e-07, "epoch": 2.84403781789754, "percentage": 56.88, "elapsed_time": "2:30:50", "remaining_time": "1:54:20", "throughput": 8670.79, "total_tokens": 78476136} +{"current_steps": 116420, "total_steps": 204665, "loss": 0.0002, "lr": 9.34332369403421e-07, "epoch": 2.844159968729387, "percentage": 56.88, "elapsed_time": "2:30:50", "remaining_time": "1:54:20", "throughput": 8670.84, "total_tokens": 78479656} +{"current_steps": 116425, "total_steps": 204665, "loss": 0.0707, "lr": 9.342472761101161e-07, "epoch": 2.8442821195612344, "percentage": 56.89, "elapsed_time": "2:30:51", "remaining_time": "1:54:20", "throughput": 8670.87, "total_tokens": 78482856} +{"current_steps": 116430, "total_steps": 204665, "loss": 0.0003, "lr": 9.34162183294983e-07, "epoch": 2.844404270393081, "percentage": 56.89, "elapsed_time": "2:30:51", "remaining_time": "1:54:19", "throughput": 8670.88, "total_tokens": 78485928} +{"current_steps": 116435, "total_steps": 204665, "loss": 0.0332, "lr": 9.340770909586397e-07, "epoch": 2.8445264212249284, "percentage": 56.89, "elapsed_time": "2:30:52", "remaining_time": "1:54:19", "throughput": 8670.94, "total_tokens": 78489512} +{"current_steps": 116440, "total_steps": 204665, "loss": 0.0, "lr": 9.339919991017059e-07, "epoch": 2.8446485720567756, "percentage": 56.89, "elapsed_time": "2:30:52", "remaining_time": "1:54:18", "throughput": 8670.95, "total_tokens": 78492648} +{"current_steps": 116445, "total_steps": 204665, "loss": 0.112, "lr": 9.339069077248e-07, "epoch": 2.8447707228886228, "percentage": 56.9, "elapsed_time": "2:30:52", "remaining_time": "1:54:18", "throughput": 8670.99, "total_tokens": 78495976} +{"current_steps": 116450, "total_steps": 204665, "loss": 0.0001, "lr": 9.338218168285407e-07, "epoch": 2.84489287372047, "percentage": 56.9, "elapsed_time": "2:30:53", "remaining_time": "1:54:18", "throughput": 8671.03, "total_tokens": 78499432} +{"current_steps": 116455, "total_steps": 204665, "loss": 0.0442, "lr": 9.337367264135474e-07, "epoch": 2.845015024552317, "percentage": 56.9, "elapsed_time": "2:30:53", "remaining_time": "1:54:17", "throughput": 8671.08, "total_tokens": 78502952} +{"current_steps": 116460, "total_steps": 204665, "loss": 0.0002, "lr": 9.336516364804379e-07, "epoch": 2.8451371753841643, "percentage": 56.9, "elapsed_time": "2:30:53", "remaining_time": "1:54:17", "throughput": 8671.1, "total_tokens": 78506088} +{"current_steps": 116465, "total_steps": 204665, "loss": 0.0, "lr": 9.335665470298319e-07, "epoch": 2.8452593262160115, "percentage": 56.91, "elapsed_time": "2:30:54", "remaining_time": "1:54:16", "throughput": 8671.19, "total_tokens": 78509992} +{"current_steps": 116470, "total_steps": 204665, "loss": 0.0432, "lr": 9.334814580623476e-07, "epoch": 2.8453814770478587, "percentage": 56.91, "elapsed_time": "2:30:54", "remaining_time": "1:54:16", "throughput": 8671.27, "total_tokens": 78513832} +{"current_steps": 116475, "total_steps": 204665, "loss": 0.0003, "lr": 9.333963695786038e-07, "epoch": 2.845503627879706, "percentage": 56.91, "elapsed_time": "2:30:54", "remaining_time": "1:54:15", "throughput": 8671.29, "total_tokens": 78517032} +{"current_steps": 116480, "total_steps": 204665, "loss": 0.0007, "lr": 9.333112815792202e-07, "epoch": 2.845625778711553, "percentage": 56.91, "elapsed_time": "2:30:55", "remaining_time": "1:54:15", "throughput": 8671.32, "total_tokens": 78520232} +{"current_steps": 116485, "total_steps": 204665, "loss": 0.0563, "lr": 9.332261940648143e-07, "epoch": 2.8457479295434003, "percentage": 56.91, "elapsed_time": "2:30:55", "remaining_time": "1:54:15", "throughput": 8671.36, "total_tokens": 78523560} +{"current_steps": 116490, "total_steps": 204665, "loss": 0.0002, "lr": 9.331411070360059e-07, "epoch": 2.8458700803752475, "percentage": 56.92, "elapsed_time": "2:30:55", "remaining_time": "1:54:14", "throughput": 8671.42, "total_tokens": 78527208} +{"current_steps": 116495, "total_steps": 204665, "loss": 0.0016, "lr": 9.330560204934129e-07, "epoch": 2.8459922312070947, "percentage": 56.92, "elapsed_time": "2:30:56", "remaining_time": "1:54:14", "throughput": 8671.44, "total_tokens": 78530408} +{"current_steps": 116500, "total_steps": 204665, "loss": 0.0004, "lr": 9.329709344376549e-07, "epoch": 2.846114382038942, "percentage": 56.92, "elapsed_time": "2:30:56", "remaining_time": "1:54:13", "throughput": 8671.46, "total_tokens": 78533544} +{"current_steps": 116505, "total_steps": 204665, "loss": 0.0606, "lr": 9.328858488693503e-07, "epoch": 2.8462365328707886, "percentage": 56.92, "elapsed_time": "2:30:56", "remaining_time": "1:54:13", "throughput": 8671.48, "total_tokens": 78536680} +{"current_steps": 116510, "total_steps": 204665, "loss": 0.0001, "lr": 9.328007637891175e-07, "epoch": 2.8463586837026362, "percentage": 56.93, "elapsed_time": "2:30:57", "remaining_time": "1:54:12", "throughput": 8671.49, "total_tokens": 78539752} +{"current_steps": 116515, "total_steps": 204665, "loss": 0.0001, "lr": 9.327156791975762e-07, "epoch": 2.846480834534483, "percentage": 56.93, "elapsed_time": "2:30:57", "remaining_time": "1:54:12", "throughput": 8671.56, "total_tokens": 78543400} +{"current_steps": 116520, "total_steps": 204665, "loss": 0.0796, "lr": 9.326305950953439e-07, "epoch": 2.8466029853663306, "percentage": 56.93, "elapsed_time": "2:30:57", "remaining_time": "1:54:12", "throughput": 8671.6, "total_tokens": 78546792} +{"current_steps": 116525, "total_steps": 204665, "loss": 0.0829, "lr": 9.325455114830406e-07, "epoch": 2.8467251361981774, "percentage": 56.93, "elapsed_time": "2:30:58", "remaining_time": "1:54:11", "throughput": 8671.63, "total_tokens": 78550120} +{"current_steps": 116530, "total_steps": 204665, "loss": 0.1552, "lr": 9.32460428361284e-07, "epoch": 2.8468472870300245, "percentage": 56.94, "elapsed_time": "2:30:58", "remaining_time": "1:54:11", "throughput": 8671.64, "total_tokens": 78553128} +{"current_steps": 116535, "total_steps": 204665, "loss": 0.0618, "lr": 9.323753457306934e-07, "epoch": 2.8469694378618717, "percentage": 56.94, "elapsed_time": "2:30:58", "remaining_time": "1:54:10", "throughput": 8671.69, "total_tokens": 78556584} +{"current_steps": 116540, "total_steps": 204665, "loss": 0.0, "lr": 9.322902635918879e-07, "epoch": 2.847091588693719, "percentage": 56.94, "elapsed_time": "2:30:59", "remaining_time": "1:54:10", "throughput": 8671.74, "total_tokens": 78560040} +{"current_steps": 116545, "total_steps": 204665, "loss": 0.0001, "lr": 9.322051819454856e-07, "epoch": 2.847213739525566, "percentage": 56.94, "elapsed_time": "2:30:59", "remaining_time": "1:54:10", "throughput": 8671.78, "total_tokens": 78563432} +{"current_steps": 116550, "total_steps": 204665, "loss": 0.0003, "lr": 9.321201007921054e-07, "epoch": 2.8473358903574133, "percentage": 56.95, "elapsed_time": "2:31:00", "remaining_time": "1:54:09", "throughput": 8671.78, "total_tokens": 78566376} +{"current_steps": 116555, "total_steps": 204665, "loss": 0.0002, "lr": 9.320350201323662e-07, "epoch": 2.8474580411892605, "percentage": 56.95, "elapsed_time": "2:31:00", "remaining_time": "1:54:09", "throughput": 8671.8, "total_tokens": 78569576} +{"current_steps": 116560, "total_steps": 204665, "loss": 0.0002, "lr": 9.319499399668863e-07, "epoch": 2.8475801920211077, "percentage": 56.95, "elapsed_time": "2:31:00", "remaining_time": "1:54:08", "throughput": 8671.82, "total_tokens": 78572712} +{"current_steps": 116565, "total_steps": 204665, "loss": 0.0002, "lr": 9.318648602962852e-07, "epoch": 2.847702342852955, "percentage": 56.95, "elapsed_time": "2:31:01", "remaining_time": "1:54:08", "throughput": 8671.83, "total_tokens": 78575784} +{"current_steps": 116570, "total_steps": 204665, "loss": 0.0001, "lr": 9.317797811211807e-07, "epoch": 2.847824493684802, "percentage": 56.96, "elapsed_time": "2:31:01", "remaining_time": "1:54:07", "throughput": 8671.88, "total_tokens": 78579304} +{"current_steps": 116575, "total_steps": 204665, "loss": 0.122, "lr": 9.316947024421923e-07, "epoch": 2.8479466445166493, "percentage": 56.96, "elapsed_time": "2:31:01", "remaining_time": "1:54:07", "throughput": 8671.9, "total_tokens": 78582440} +{"current_steps": 116580, "total_steps": 204665, "loss": 0.0641, "lr": 9.316096242599382e-07, "epoch": 2.8480687953484964, "percentage": 56.96, "elapsed_time": "2:31:02", "remaining_time": "1:54:07", "throughput": 8671.95, "total_tokens": 78585960} +{"current_steps": 116585, "total_steps": 204665, "loss": 0.0, "lr": 9.315245465750376e-07, "epoch": 2.8481909461803436, "percentage": 56.96, "elapsed_time": "2:31:02", "remaining_time": "1:54:06", "throughput": 8672.02, "total_tokens": 78589672} +{"current_steps": 116590, "total_steps": 204665, "loss": 0.0003, "lr": 9.314394693881086e-07, "epoch": 2.8483130970121904, "percentage": 56.97, "elapsed_time": "2:31:02", "remaining_time": "1:54:06", "throughput": 8672.08, "total_tokens": 78593256} +{"current_steps": 116595, "total_steps": 204665, "loss": 0.0001, "lr": 9.313543926997703e-07, "epoch": 2.848435247844038, "percentage": 56.97, "elapsed_time": "2:31:03", "remaining_time": "1:54:05", "throughput": 8672.15, "total_tokens": 78596968} +{"current_steps": 116600, "total_steps": 204665, "loss": 0.0002, "lr": 9.312693165106413e-07, "epoch": 2.8485573986758848, "percentage": 56.97, "elapsed_time": "2:31:03", "remaining_time": "1:54:05", "throughput": 8672.2, "total_tokens": 78600424} +{"current_steps": 116605, "total_steps": 204665, "loss": 0.0411, "lr": 9.311842408213404e-07, "epoch": 2.8486795495077324, "percentage": 56.97, "elapsed_time": "2:31:03", "remaining_time": "1:54:05", "throughput": 8672.23, "total_tokens": 78603688} +{"current_steps": 116610, "total_steps": 204665, "loss": 0.0001, "lr": 9.310991656324865e-07, "epoch": 2.848801700339579, "percentage": 56.98, "elapsed_time": "2:31:04", "remaining_time": "1:54:04", "throughput": 8672.22, "total_tokens": 78606568} +{"current_steps": 116615, "total_steps": 204665, "loss": 0.0405, "lr": 9.310140909446974e-07, "epoch": 2.8489238511714263, "percentage": 56.98, "elapsed_time": "2:31:04", "remaining_time": "1:54:04", "throughput": 8672.26, "total_tokens": 78609896} +{"current_steps": 116620, "total_steps": 204665, "loss": 0.0679, "lr": 9.309290167585929e-07, "epoch": 2.8490460020032735, "percentage": 56.98, "elapsed_time": "2:31:04", "remaining_time": "1:54:03", "throughput": 8672.31, "total_tokens": 78613416} +{"current_steps": 116625, "total_steps": 204665, "loss": 0.0002, "lr": 9.308439430747908e-07, "epoch": 2.8491681528351207, "percentage": 56.98, "elapsed_time": "2:31:05", "remaining_time": "1:54:03", "throughput": 8672.4, "total_tokens": 78617320} +{"current_steps": 116630, "total_steps": 204665, "loss": 0.0717, "lr": 9.307588698939101e-07, "epoch": 2.849290303666968, "percentage": 56.99, "elapsed_time": "2:31:05", "remaining_time": "1:54:02", "throughput": 8672.42, "total_tokens": 78620584} +{"current_steps": 116635, "total_steps": 204665, "loss": 0.0423, "lr": 9.306737972165699e-07, "epoch": 2.849412454498815, "percentage": 56.99, "elapsed_time": "2:31:05", "remaining_time": "1:54:02", "throughput": 8672.47, "total_tokens": 78623976} +{"current_steps": 116640, "total_steps": 204665, "loss": 0.0002, "lr": 9.30588725043388e-07, "epoch": 2.8495346053306623, "percentage": 56.99, "elapsed_time": "2:31:06", "remaining_time": "1:54:02", "throughput": 8672.5, "total_tokens": 78627304} +{"current_steps": 116645, "total_steps": 204665, "loss": 0.0814, "lr": 9.305036533749842e-07, "epoch": 2.8496567561625095, "percentage": 56.99, "elapsed_time": "2:31:06", "remaining_time": "1:54:01", "throughput": 8672.5, "total_tokens": 78630184} +{"current_steps": 116650, "total_steps": 204665, "loss": 0.0217, "lr": 9.304185822119759e-07, "epoch": 2.8497789069943567, "percentage": 57.0, "elapsed_time": "2:31:06", "remaining_time": "1:54:01", "throughput": 8672.56, "total_tokens": 78633832} +{"current_steps": 116655, "total_steps": 204665, "loss": 0.0672, "lr": 9.303335115549828e-07, "epoch": 2.849901057826204, "percentage": 57.0, "elapsed_time": "2:31:07", "remaining_time": "1:54:00", "throughput": 8672.61, "total_tokens": 78637288} +{"current_steps": 116660, "total_steps": 204665, "loss": 0.0001, "lr": 9.302484414046233e-07, "epoch": 2.850023208658051, "percentage": 57.0, "elapsed_time": "2:31:07", "remaining_time": "1:54:00", "throughput": 8672.7, "total_tokens": 78641256} +{"current_steps": 116665, "total_steps": 204665, "loss": 0.1293, "lr": 9.301633717615152e-07, "epoch": 2.8501453594898982, "percentage": 57.0, "elapsed_time": "2:31:08", "remaining_time": "1:53:59", "throughput": 8672.76, "total_tokens": 78644840} +{"current_steps": 116670, "total_steps": 204665, "loss": 0.0001, "lr": 9.300783026262785e-07, "epoch": 2.8502675103217454, "percentage": 57.01, "elapsed_time": "2:31:08", "remaining_time": "1:53:59", "throughput": 8672.79, "total_tokens": 78648104} +{"current_steps": 116675, "total_steps": 204665, "loss": 0.0458, "lr": 9.299932339995308e-07, "epoch": 2.8503896611535926, "percentage": 57.01, "elapsed_time": "2:31:08", "remaining_time": "1:53:59", "throughput": 8672.86, "total_tokens": 78651880} +{"current_steps": 116680, "total_steps": 204665, "loss": 0.029, "lr": 9.299081658818915e-07, "epoch": 2.85051181198544, "percentage": 57.01, "elapsed_time": "2:31:09", "remaining_time": "1:53:58", "throughput": 8672.9, "total_tokens": 78655208} +{"current_steps": 116685, "total_steps": 204665, "loss": 0.1522, "lr": 9.298230982739784e-07, "epoch": 2.8506339628172865, "percentage": 57.01, "elapsed_time": "2:31:09", "remaining_time": "1:53:58", "throughput": 8672.9, "total_tokens": 78658216} +{"current_steps": 116690, "total_steps": 204665, "loss": 0.0002, "lr": 9.297380311764107e-07, "epoch": 2.850756113649134, "percentage": 57.02, "elapsed_time": "2:31:09", "remaining_time": "1:53:57", "throughput": 8672.9, "total_tokens": 78661160} +{"current_steps": 116695, "total_steps": 204665, "loss": 0.0005, "lr": 9.296529645898073e-07, "epoch": 2.850878264480981, "percentage": 57.02, "elapsed_time": "2:31:10", "remaining_time": "1:53:57", "throughput": 8672.91, "total_tokens": 78664232} +{"current_steps": 116700, "total_steps": 204665, "loss": 0.0003, "lr": 9.295678985147863e-07, "epoch": 2.8510004153128286, "percentage": 57.02, "elapsed_time": "2:31:10", "remaining_time": "1:53:57", "throughput": 8672.94, "total_tokens": 78667496} +{"current_steps": 116705, "total_steps": 204665, "loss": 0.0002, "lr": 9.294828329519664e-07, "epoch": 2.8511225661446753, "percentage": 57.02, "elapsed_time": "2:31:10", "remaining_time": "1:53:56", "throughput": 8672.96, "total_tokens": 78670632} +{"current_steps": 116710, "total_steps": 204665, "loss": 0.0565, "lr": 9.293977679019663e-07, "epoch": 2.8512447169765225, "percentage": 57.02, "elapsed_time": "2:31:11", "remaining_time": "1:53:56", "throughput": 8672.97, "total_tokens": 78673768} +{"current_steps": 116715, "total_steps": 204665, "loss": 0.0215, "lr": 9.293127033654045e-07, "epoch": 2.8513668678083697, "percentage": 57.03, "elapsed_time": "2:31:11", "remaining_time": "1:53:55", "throughput": 8673.03, "total_tokens": 78677352} +{"current_steps": 116720, "total_steps": 204665, "loss": 0.0767, "lr": 9.292276393429001e-07, "epoch": 2.851489018640217, "percentage": 57.03, "elapsed_time": "2:31:11", "remaining_time": "1:53:55", "throughput": 8673.07, "total_tokens": 78680744} +{"current_steps": 116725, "total_steps": 204665, "loss": 0.0001, "lr": 9.291425758350709e-07, "epoch": 2.851611169472064, "percentage": 57.03, "elapsed_time": "2:31:12", "remaining_time": "1:53:54", "throughput": 8673.1, "total_tokens": 78684008} +{"current_steps": 116730, "total_steps": 204665, "loss": 0.0004, "lr": 9.290575128425364e-07, "epoch": 2.8517333203039112, "percentage": 57.03, "elapsed_time": "2:31:12", "remaining_time": "1:53:54", "throughput": 8673.13, "total_tokens": 78687336} +{"current_steps": 116735, "total_steps": 204665, "loss": 0.0002, "lr": 9.289724503659145e-07, "epoch": 2.8518554711357584, "percentage": 57.04, "elapsed_time": "2:31:12", "remaining_time": "1:53:54", "throughput": 8673.18, "total_tokens": 78690856} +{"current_steps": 116740, "total_steps": 204665, "loss": 0.0536, "lr": 9.288873884058242e-07, "epoch": 2.8519776219676056, "percentage": 57.04, "elapsed_time": "2:31:13", "remaining_time": "1:53:53", "throughput": 8673.2, "total_tokens": 78694056} +{"current_steps": 116745, "total_steps": 204665, "loss": 0.1149, "lr": 9.288023269628836e-07, "epoch": 2.852099772799453, "percentage": 57.04, "elapsed_time": "2:31:13", "remaining_time": "1:53:53", "throughput": 8673.25, "total_tokens": 78697512} +{"current_steps": 116750, "total_steps": 204665, "loss": 0.0704, "lr": 9.287172660377119e-07, "epoch": 2.8522219236313, "percentage": 57.04, "elapsed_time": "2:31:13", "remaining_time": "1:53:52", "throughput": 8673.27, "total_tokens": 78700712} +{"current_steps": 116755, "total_steps": 204665, "loss": 0.0002, "lr": 9.286322056309272e-07, "epoch": 2.852344074463147, "percentage": 57.05, "elapsed_time": "2:31:14", "remaining_time": "1:53:52", "throughput": 8673.31, "total_tokens": 78704040} +{"current_steps": 116760, "total_steps": 204665, "loss": 0.0325, "lr": 9.285471457431486e-07, "epoch": 2.8524662252949944, "percentage": 57.05, "elapsed_time": "2:31:14", "remaining_time": "1:53:52", "throughput": 8673.37, "total_tokens": 78707688} +{"current_steps": 116765, "total_steps": 204665, "loss": 0.0999, "lr": 9.284620863749945e-07, "epoch": 2.8525883761268416, "percentage": 57.05, "elapsed_time": "2:31:15", "remaining_time": "1:53:51", "throughput": 8673.18, "total_tokens": 78711208} +{"current_steps": 116770, "total_steps": 204665, "loss": 0.1455, "lr": 9.283770275270828e-07, "epoch": 2.8527105269586883, "percentage": 57.05, "elapsed_time": "2:31:15", "remaining_time": "1:53:51", "throughput": 8673.21, "total_tokens": 78714472} +{"current_steps": 116775, "total_steps": 204665, "loss": 0.0001, "lr": 9.282919692000331e-07, "epoch": 2.852832677790536, "percentage": 57.06, "elapsed_time": "2:31:15", "remaining_time": "1:53:50", "throughput": 8673.25, "total_tokens": 78717928} +{"current_steps": 116780, "total_steps": 204665, "loss": 0.0002, "lr": 9.282069113944631e-07, "epoch": 2.8529548286223827, "percentage": 57.06, "elapsed_time": "2:31:16", "remaining_time": "1:53:50", "throughput": 8673.29, "total_tokens": 78721256} +{"current_steps": 116785, "total_steps": 204665, "loss": 0.0378, "lr": 9.281218541109917e-07, "epoch": 2.8530769794542303, "percentage": 57.06, "elapsed_time": "2:31:16", "remaining_time": "1:53:50", "throughput": 8673.35, "total_tokens": 78724904} +{"current_steps": 116790, "total_steps": 204665, "loss": 0.0001, "lr": 9.28036797350238e-07, "epoch": 2.853199130286077, "percentage": 57.06, "elapsed_time": "2:31:16", "remaining_time": "1:53:49", "throughput": 8673.42, "total_tokens": 78728616} +{"current_steps": 116795, "total_steps": 204665, "loss": 0.0005, "lr": 9.279517411128196e-07, "epoch": 2.8533212811179243, "percentage": 57.07, "elapsed_time": "2:31:17", "remaining_time": "1:53:49", "throughput": 8673.47, "total_tokens": 78732072} +{"current_steps": 116800, "total_steps": 204665, "loss": 0.054, "lr": 9.27866685399356e-07, "epoch": 2.8534434319497715, "percentage": 57.07, "elapsed_time": "2:31:17", "remaining_time": "1:53:48", "throughput": 8673.49, "total_tokens": 78735208} +{"current_steps": 116805, "total_steps": 204665, "loss": 0.0594, "lr": 9.277816302104647e-07, "epoch": 2.8535655827816186, "percentage": 57.07, "elapsed_time": "2:31:18", "remaining_time": "1:53:48", "throughput": 8673.47, "total_tokens": 78737960} +{"current_steps": 116810, "total_steps": 204665, "loss": 0.0001, "lr": 9.276965755467652e-07, "epoch": 2.853687733613466, "percentage": 57.07, "elapsed_time": "2:31:18", "remaining_time": "1:53:48", "throughput": 8673.5, "total_tokens": 78741224} +{"current_steps": 116815, "total_steps": 204665, "loss": 0.0185, "lr": 9.276115214088758e-07, "epoch": 2.853809884445313, "percentage": 57.08, "elapsed_time": "2:31:18", "remaining_time": "1:53:47", "throughput": 8673.52, "total_tokens": 78744360} +{"current_steps": 116820, "total_steps": 204665, "loss": 0.0538, "lr": 9.275264677974144e-07, "epoch": 2.85393203527716, "percentage": 57.08, "elapsed_time": "2:31:19", "remaining_time": "1:53:47", "throughput": 8673.59, "total_tokens": 78748072} +{"current_steps": 116825, "total_steps": 204665, "loss": 0.0005, "lr": 9.274414147130006e-07, "epoch": 2.8540541861090074, "percentage": 57.08, "elapsed_time": "2:31:19", "remaining_time": "1:53:46", "throughput": 8673.63, "total_tokens": 78751464} +{"current_steps": 116830, "total_steps": 204665, "loss": 0.0495, "lr": 9.273563621562516e-07, "epoch": 2.8541763369408546, "percentage": 57.08, "elapsed_time": "2:31:19", "remaining_time": "1:53:46", "throughput": 8673.69, "total_tokens": 78755112} +{"current_steps": 116835, "total_steps": 204665, "loss": 0.0566, "lr": 9.272713101277873e-07, "epoch": 2.854298487772702, "percentage": 57.09, "elapsed_time": "2:31:20", "remaining_time": "1:53:45", "throughput": 8673.75, "total_tokens": 78758632} +{"current_steps": 116840, "total_steps": 204665, "loss": 0.0003, "lr": 9.271862586282252e-07, "epoch": 2.854420638604549, "percentage": 57.09, "elapsed_time": "2:31:20", "remaining_time": "1:53:45", "throughput": 8673.78, "total_tokens": 78761896} +{"current_steps": 116845, "total_steps": 204665, "loss": 0.0005, "lr": 9.271012076581842e-07, "epoch": 2.854542789436396, "percentage": 57.09, "elapsed_time": "2:31:20", "remaining_time": "1:53:45", "throughput": 8673.79, "total_tokens": 78765032} +{"current_steps": 116850, "total_steps": 204665, "loss": 0.0002, "lr": 9.270161572182833e-07, "epoch": 2.8546649402682434, "percentage": 57.09, "elapsed_time": "2:31:21", "remaining_time": "1:53:44", "throughput": 8673.81, "total_tokens": 78768168} +{"current_steps": 116855, "total_steps": 204665, "loss": 0.0751, "lr": 9.269311073091403e-07, "epoch": 2.8547870911000905, "percentage": 57.1, "elapsed_time": "2:31:21", "remaining_time": "1:53:44", "throughput": 8673.84, "total_tokens": 78771432} +{"current_steps": 116860, "total_steps": 204665, "loss": 0.0001, "lr": 9.268460579313738e-07, "epoch": 2.8549092419319377, "percentage": 57.1, "elapsed_time": "2:31:21", "remaining_time": "1:53:43", "throughput": 8673.89, "total_tokens": 78774888} +{"current_steps": 116865, "total_steps": 204665, "loss": 0.0604, "lr": 9.267610090856025e-07, "epoch": 2.8550313927637845, "percentage": 57.1, "elapsed_time": "2:31:22", "remaining_time": "1:53:43", "throughput": 8673.91, "total_tokens": 78778024} +{"current_steps": 116870, "total_steps": 204665, "loss": 0.1097, "lr": 9.266759607724451e-07, "epoch": 2.855153543595632, "percentage": 57.1, "elapsed_time": "2:31:22", "remaining_time": "1:53:42", "throughput": 8673.92, "total_tokens": 78781160} +{"current_steps": 116875, "total_steps": 204665, "loss": 0.0001, "lr": 9.265909129925194e-07, "epoch": 2.855275694427479, "percentage": 57.11, "elapsed_time": "2:31:22", "remaining_time": "1:53:42", "throughput": 8673.96, "total_tokens": 78784552} +{"current_steps": 116880, "total_steps": 204665, "loss": 0.0936, "lr": 9.265058657464444e-07, "epoch": 2.855397845259326, "percentage": 57.11, "elapsed_time": "2:31:23", "remaining_time": "1:53:42", "throughput": 8673.97, "total_tokens": 78787560} +{"current_steps": 116885, "total_steps": 204665, "loss": 0.1411, "lr": 9.264208190348388e-07, "epoch": 2.8555199960911732, "percentage": 57.11, "elapsed_time": "2:31:23", "remaining_time": "1:53:41", "throughput": 8674.02, "total_tokens": 78791080} +{"current_steps": 116890, "total_steps": 204665, "loss": 0.0001, "lr": 9.263357728583204e-07, "epoch": 2.8556421469230204, "percentage": 57.11, "elapsed_time": "2:31:23", "remaining_time": "1:53:41", "throughput": 8674.08, "total_tokens": 78794728} +{"current_steps": 116895, "total_steps": 204665, "loss": 0.0003, "lr": 9.262507272175087e-07, "epoch": 2.8557642977548676, "percentage": 57.12, "elapsed_time": "2:31:24", "remaining_time": "1:53:40", "throughput": 8674.15, "total_tokens": 78798376} +{"current_steps": 116900, "total_steps": 204665, "loss": 0.0517, "lr": 9.261656821130208e-07, "epoch": 2.855886448586715, "percentage": 57.12, "elapsed_time": "2:31:24", "remaining_time": "1:53:40", "throughput": 8674.18, "total_tokens": 78801576} +{"current_steps": 116905, "total_steps": 204665, "loss": 0.0004, "lr": 9.260806375454764e-07, "epoch": 2.856008599418562, "percentage": 57.12, "elapsed_time": "2:31:24", "remaining_time": "1:53:40", "throughput": 8674.23, "total_tokens": 78805160} +{"current_steps": 116910, "total_steps": 204665, "loss": 0.0003, "lr": 9.259955935154932e-07, "epoch": 2.856130750250409, "percentage": 57.12, "elapsed_time": "2:31:25", "remaining_time": "1:53:39", "throughput": 8674.28, "total_tokens": 78808552} +{"current_steps": 116915, "total_steps": 204665, "loss": 0.0674, "lr": 9.259105500236902e-07, "epoch": 2.8562529010822564, "percentage": 57.13, "elapsed_time": "2:31:25", "remaining_time": "1:53:39", "throughput": 8674.31, "total_tokens": 78811816} +{"current_steps": 116920, "total_steps": 204665, "loss": 0.0481, "lr": 9.258255070706857e-07, "epoch": 2.8563750519141036, "percentage": 57.13, "elapsed_time": "2:31:26", "remaining_time": "1:53:38", "throughput": 8674.32, "total_tokens": 78814952} +{"current_steps": 116925, "total_steps": 204665, "loss": 0.0651, "lr": 9.257404646570978e-07, "epoch": 2.8564972027459508, "percentage": 57.13, "elapsed_time": "2:31:26", "remaining_time": "1:53:38", "throughput": 8674.35, "total_tokens": 78818152} +{"current_steps": 116930, "total_steps": 204665, "loss": 0.0628, "lr": 9.256554227835455e-07, "epoch": 2.856619353577798, "percentage": 57.13, "elapsed_time": "2:31:26", "remaining_time": "1:53:37", "throughput": 8674.36, "total_tokens": 78821288} +{"current_steps": 116935, "total_steps": 204665, "loss": 0.0003, "lr": 9.255703814506466e-07, "epoch": 2.856741504409645, "percentage": 57.13, "elapsed_time": "2:31:27", "remaining_time": "1:53:37", "throughput": 8674.39, "total_tokens": 78824552} +{"current_steps": 116940, "total_steps": 204665, "loss": 0.0002, "lr": 9.254853406590197e-07, "epoch": 2.8568636552414923, "percentage": 57.14, "elapsed_time": "2:31:27", "remaining_time": "1:53:37", "throughput": 8674.42, "total_tokens": 78827816} +{"current_steps": 116945, "total_steps": 204665, "loss": 0.0845, "lr": 9.254003004092841e-07, "epoch": 2.8569858060733395, "percentage": 57.14, "elapsed_time": "2:31:27", "remaining_time": "1:53:36", "throughput": 8674.44, "total_tokens": 78830952} +{"current_steps": 116950, "total_steps": 204665, "loss": 0.0004, "lr": 9.253152607020572e-07, "epoch": 2.8571079569051863, "percentage": 57.14, "elapsed_time": "2:31:28", "remaining_time": "1:53:36", "throughput": 8674.44, "total_tokens": 78833832} +{"current_steps": 116955, "total_steps": 204665, "loss": 0.0001, "lr": 9.25230221537958e-07, "epoch": 2.857230107737034, "percentage": 57.14, "elapsed_time": "2:31:28", "remaining_time": "1:53:35", "throughput": 8674.46, "total_tokens": 78836968} +{"current_steps": 116960, "total_steps": 204665, "loss": 0.0715, "lr": 9.251451829176045e-07, "epoch": 2.8573522585688806, "percentage": 57.15, "elapsed_time": "2:31:28", "remaining_time": "1:53:35", "throughput": 8674.48, "total_tokens": 78840168} +{"current_steps": 116965, "total_steps": 204665, "loss": 0.0757, "lr": 9.250601448416155e-07, "epoch": 2.8574744094007283, "percentage": 57.15, "elapsed_time": "2:31:29", "remaining_time": "1:53:34", "throughput": 8674.52, "total_tokens": 78843560} +{"current_steps": 116970, "total_steps": 204665, "loss": 0.0006, "lr": 9.249751073106095e-07, "epoch": 2.857596560232575, "percentage": 57.15, "elapsed_time": "2:31:29", "remaining_time": "1:53:34", "throughput": 8674.58, "total_tokens": 78847080} +{"current_steps": 116975, "total_steps": 204665, "loss": 0.0008, "lr": 9.248900703252042e-07, "epoch": 2.857718711064422, "percentage": 57.15, "elapsed_time": "2:31:29", "remaining_time": "1:53:34", "throughput": 8674.62, "total_tokens": 78850472} +{"current_steps": 116980, "total_steps": 204665, "loss": 0.0529, "lr": 9.248050338860192e-07, "epoch": 2.8578408618962694, "percentage": 57.16, "elapsed_time": "2:31:30", "remaining_time": "1:53:33", "throughput": 8674.67, "total_tokens": 78853992} +{"current_steps": 116985, "total_steps": 204665, "loss": 0.1234, "lr": 9.247199979936715e-07, "epoch": 2.8579630127281166, "percentage": 57.16, "elapsed_time": "2:31:30", "remaining_time": "1:53:33", "throughput": 8674.71, "total_tokens": 78857384} +{"current_steps": 116990, "total_steps": 204665, "loss": 0.0639, "lr": 9.246349626487809e-07, "epoch": 2.858085163559964, "percentage": 57.16, "elapsed_time": "2:31:30", "remaining_time": "1:53:32", "throughput": 8674.72, "total_tokens": 78860456} +{"current_steps": 116995, "total_steps": 204665, "loss": 0.0381, "lr": 9.245499278519644e-07, "epoch": 2.858207314391811, "percentage": 57.16, "elapsed_time": "2:31:31", "remaining_time": "1:53:32", "throughput": 8674.75, "total_tokens": 78863720} +{"current_steps": 117000, "total_steps": 204665, "loss": 0.0433, "lr": 9.244648936038412e-07, "epoch": 2.858329465223658, "percentage": 57.17, "elapsed_time": "2:31:31", "remaining_time": "1:53:32", "throughput": 8674.76, "total_tokens": 78866792} +{"current_steps": 117005, "total_steps": 204665, "loss": 0.0666, "lr": 9.243798599050302e-07, "epoch": 2.8584516160555054, "percentage": 57.17, "elapsed_time": "2:31:31", "remaining_time": "1:53:31", "throughput": 8674.8, "total_tokens": 78870120} +{"current_steps": 117010, "total_steps": 204665, "loss": 0.0207, "lr": 9.242948267561489e-07, "epoch": 2.8585737668873525, "percentage": 57.17, "elapsed_time": "2:31:32", "remaining_time": "1:53:31", "throughput": 8674.82, "total_tokens": 78873320} +{"current_steps": 117015, "total_steps": 204665, "loss": 0.031, "lr": 9.242097941578159e-07, "epoch": 2.8586959177191997, "percentage": 57.17, "elapsed_time": "2:31:32", "remaining_time": "1:53:30", "throughput": 8674.84, "total_tokens": 78876520} +{"current_steps": 117020, "total_steps": 204665, "loss": 0.0434, "lr": 9.241247621106498e-07, "epoch": 2.858818068551047, "percentage": 57.18, "elapsed_time": "2:31:32", "remaining_time": "1:53:30", "throughput": 8674.87, "total_tokens": 78879720} +{"current_steps": 117025, "total_steps": 204665, "loss": 0.067, "lr": 9.24039730615269e-07, "epoch": 2.858940219382894, "percentage": 57.18, "elapsed_time": "2:31:33", "remaining_time": "1:53:29", "throughput": 8674.9, "total_tokens": 78882920} +{"current_steps": 117030, "total_steps": 204665, "loss": 0.0288, "lr": 9.239546996722914e-07, "epoch": 2.8590623702147413, "percentage": 57.18, "elapsed_time": "2:31:33", "remaining_time": "1:53:29", "throughput": 8674.93, "total_tokens": 78886248} +{"current_steps": 117035, "total_steps": 204665, "loss": 0.064, "lr": 9.238696692823355e-07, "epoch": 2.8591845210465885, "percentage": 57.18, "elapsed_time": "2:31:33", "remaining_time": "1:53:29", "throughput": 8674.96, "total_tokens": 78889448} +{"current_steps": 117040, "total_steps": 204665, "loss": 0.0519, "lr": 9.237846394460203e-07, "epoch": 2.8593066718784357, "percentage": 57.19, "elapsed_time": "2:31:34", "remaining_time": "1:53:28", "throughput": 8674.97, "total_tokens": 78892456} +{"current_steps": 117045, "total_steps": 204665, "loss": 0.0001, "lr": 9.236996101639632e-07, "epoch": 2.8594288227102824, "percentage": 57.19, "elapsed_time": "2:31:34", "remaining_time": "1:53:28", "throughput": 8675.02, "total_tokens": 78895976} +{"current_steps": 117050, "total_steps": 204665, "loss": 0.0754, "lr": 9.236145814367836e-07, "epoch": 2.85955097354213, "percentage": 57.19, "elapsed_time": "2:31:34", "remaining_time": "1:53:27", "throughput": 8675.06, "total_tokens": 78899368} +{"current_steps": 117055, "total_steps": 204665, "loss": 0.0563, "lr": 9.23529553265099e-07, "epoch": 2.859673124373977, "percentage": 57.19, "elapsed_time": "2:31:35", "remaining_time": "1:53:27", "throughput": 8675.07, "total_tokens": 78902440} +{"current_steps": 117060, "total_steps": 204665, "loss": 0.0582, "lr": 9.23444525649528e-07, "epoch": 2.859795275205824, "percentage": 57.2, "elapsed_time": "2:31:35", "remaining_time": "1:53:26", "throughput": 8675.1, "total_tokens": 78905640} +{"current_steps": 117065, "total_steps": 204665, "loss": 0.0004, "lr": 9.233594985906892e-07, "epoch": 2.859917426037671, "percentage": 57.2, "elapsed_time": "2:31:36", "remaining_time": "1:53:26", "throughput": 8675.15, "total_tokens": 78909224} +{"current_steps": 117070, "total_steps": 204665, "loss": 0.0002, "lr": 9.232744720892006e-07, "epoch": 2.8600395768695184, "percentage": 57.2, "elapsed_time": "2:31:36", "remaining_time": "1:53:26", "throughput": 8675.17, "total_tokens": 78912360} +{"current_steps": 117075, "total_steps": 204665, "loss": 0.0495, "lr": 9.23189446145681e-07, "epoch": 2.8601617277013656, "percentage": 57.2, "elapsed_time": "2:31:36", "remaining_time": "1:53:25", "throughput": 8675.21, "total_tokens": 78915688} +{"current_steps": 117080, "total_steps": 204665, "loss": 0.0003, "lr": 9.231044207607479e-07, "epoch": 2.8602838785332128, "percentage": 57.21, "elapsed_time": "2:31:37", "remaining_time": "1:53:25", "throughput": 8675.23, "total_tokens": 78918888} +{"current_steps": 117085, "total_steps": 204665, "loss": 0.0357, "lr": 9.230193959350207e-07, "epoch": 2.86040602936506, "percentage": 57.21, "elapsed_time": "2:31:37", "remaining_time": "1:53:24", "throughput": 8675.28, "total_tokens": 78922408} +{"current_steps": 117090, "total_steps": 204665, "loss": 0.1141, "lr": 9.229343716691166e-07, "epoch": 2.860528180196907, "percentage": 57.21, "elapsed_time": "2:31:37", "remaining_time": "1:53:24", "throughput": 8675.35, "total_tokens": 78926056} +{"current_steps": 117095, "total_steps": 204665, "loss": 0.0002, "lr": 9.228493479636545e-07, "epoch": 2.8606503310287543, "percentage": 57.21, "elapsed_time": "2:31:38", "remaining_time": "1:53:24", "throughput": 8675.37, "total_tokens": 78929192} +{"current_steps": 117100, "total_steps": 204665, "loss": 0.0, "lr": 9.227643248192532e-07, "epoch": 2.8607724818606015, "percentage": 57.22, "elapsed_time": "2:31:38", "remaining_time": "1:53:23", "throughput": 8675.41, "total_tokens": 78932648} +{"current_steps": 117105, "total_steps": 204665, "loss": 0.0474, "lr": 9.226793022365299e-07, "epoch": 2.8608946326924487, "percentage": 57.22, "elapsed_time": "2:31:38", "remaining_time": "1:53:23", "throughput": 8675.46, "total_tokens": 78936104} +{"current_steps": 117110, "total_steps": 204665, "loss": 0.0352, "lr": 9.225942802161041e-07, "epoch": 2.861016783524296, "percentage": 57.22, "elapsed_time": "2:31:39", "remaining_time": "1:53:22", "throughput": 8675.47, "total_tokens": 78939176} +{"current_steps": 117115, "total_steps": 204665, "loss": 0.0001, "lr": 9.225092587585929e-07, "epoch": 2.861138934356143, "percentage": 57.22, "elapsed_time": "2:31:39", "remaining_time": "1:53:22", "throughput": 8675.48, "total_tokens": 78942248} +{"current_steps": 117120, "total_steps": 204665, "loss": 0.0004, "lr": 9.224242378646156e-07, "epoch": 2.8612610851879903, "percentage": 57.23, "elapsed_time": "2:31:39", "remaining_time": "1:53:21", "throughput": 8675.5, "total_tokens": 78945384} +{"current_steps": 117125, "total_steps": 204665, "loss": 0.0003, "lr": 9.223392175347903e-07, "epoch": 2.8613832360198375, "percentage": 57.23, "elapsed_time": "2:31:40", "remaining_time": "1:53:21", "throughput": 8675.58, "total_tokens": 78949224} +{"current_steps": 117130, "total_steps": 204665, "loss": 0.0003, "lr": 9.222541977697346e-07, "epoch": 2.861505386851684, "percentage": 57.23, "elapsed_time": "2:31:40", "remaining_time": "1:53:21", "throughput": 8675.6, "total_tokens": 78952424} +{"current_steps": 117135, "total_steps": 204665, "loss": 0.062, "lr": 9.221691785700679e-07, "epoch": 2.861627537683532, "percentage": 57.23, "elapsed_time": "2:31:40", "remaining_time": "1:53:20", "throughput": 8675.64, "total_tokens": 78955816} +{"current_steps": 117140, "total_steps": 204665, "loss": 0.0006, "lr": 9.220841599364073e-07, "epoch": 2.8617496885153786, "percentage": 57.23, "elapsed_time": "2:31:41", "remaining_time": "1:53:20", "throughput": 8675.68, "total_tokens": 78959144} +{"current_steps": 117145, "total_steps": 204665, "loss": 0.0002, "lr": 9.219991418693721e-07, "epoch": 2.861871839347226, "percentage": 57.24, "elapsed_time": "2:31:41", "remaining_time": "1:53:19", "throughput": 8675.7, "total_tokens": 78962344} +{"current_steps": 117150, "total_steps": 204665, "loss": 0.0002, "lr": 9.219141243695796e-07, "epoch": 2.861993990179073, "percentage": 57.24, "elapsed_time": "2:31:41", "remaining_time": "1:53:19", "throughput": 8675.73, "total_tokens": 78965608} +{"current_steps": 117155, "total_steps": 204665, "loss": 0.0878, "lr": 9.218291074376487e-07, "epoch": 2.86211614101092, "percentage": 57.24, "elapsed_time": "2:31:42", "remaining_time": "1:53:19", "throughput": 8675.74, "total_tokens": 78968680} +{"current_steps": 117160, "total_steps": 204665, "loss": 0.0415, "lr": 9.217440910741979e-07, "epoch": 2.8622382918427673, "percentage": 57.24, "elapsed_time": "2:31:42", "remaining_time": "1:53:18", "throughput": 8675.79, "total_tokens": 78972136} +{"current_steps": 117165, "total_steps": 204665, "loss": 0.076, "lr": 9.21659075279845e-07, "epoch": 2.8623604426746145, "percentage": 57.25, "elapsed_time": "2:31:42", "remaining_time": "1:53:18", "throughput": 8675.79, "total_tokens": 78975144} +{"current_steps": 117170, "total_steps": 204665, "loss": 0.0004, "lr": 9.215740600552084e-07, "epoch": 2.8624825935064617, "percentage": 57.25, "elapsed_time": "2:31:43", "remaining_time": "1:53:17", "throughput": 8675.83, "total_tokens": 78978472} +{"current_steps": 117175, "total_steps": 204665, "loss": 0.0381, "lr": 9.214890454009062e-07, "epoch": 2.862604744338309, "percentage": 57.25, "elapsed_time": "2:31:43", "remaining_time": "1:53:17", "throughput": 8675.87, "total_tokens": 78981928} +{"current_steps": 117180, "total_steps": 204665, "loss": 0.0068, "lr": 9.214040313175571e-07, "epoch": 2.862726895170156, "percentage": 57.25, "elapsed_time": "2:31:43", "remaining_time": "1:53:16", "throughput": 8675.91, "total_tokens": 78985256} +{"current_steps": 117185, "total_steps": 204665, "loss": 0.0431, "lr": 9.213190178057784e-07, "epoch": 2.8628490460020033, "percentage": 57.26, "elapsed_time": "2:31:44", "remaining_time": "1:53:16", "throughput": 8675.94, "total_tokens": 78988520} +{"current_steps": 117190, "total_steps": 204665, "loss": 0.0001, "lr": 9.212340048661892e-07, "epoch": 2.8629711968338505, "percentage": 57.26, "elapsed_time": "2:31:44", "remaining_time": "1:53:16", "throughput": 8675.99, "total_tokens": 78991976} +{"current_steps": 117195, "total_steps": 204665, "loss": 0.0006, "lr": 9.211489924994078e-07, "epoch": 2.8630933476656977, "percentage": 57.26, "elapsed_time": "2:31:45", "remaining_time": "1:53:15", "throughput": 8675.99, "total_tokens": 78994984} +{"current_steps": 117200, "total_steps": 204665, "loss": 0.0001, "lr": 9.210639807060518e-07, "epoch": 2.863215498497545, "percentage": 57.26, "elapsed_time": "2:31:45", "remaining_time": "1:53:15", "throughput": 8676.05, "total_tokens": 78998568} +{"current_steps": 117205, "total_steps": 204665, "loss": 0.0717, "lr": 9.209789694867401e-07, "epoch": 2.863337649329392, "percentage": 57.27, "elapsed_time": "2:31:45", "remaining_time": "1:53:14", "throughput": 8676.12, "total_tokens": 79002344} +{"current_steps": 117210, "total_steps": 204665, "loss": 0.0002, "lr": 9.208939588420902e-07, "epoch": 2.8634598001612392, "percentage": 57.27, "elapsed_time": "2:31:46", "remaining_time": "1:53:14", "throughput": 8676.17, "total_tokens": 79005864} +{"current_steps": 117215, "total_steps": 204665, "loss": 0.1023, "lr": 9.208089487727208e-07, "epoch": 2.863581950993086, "percentage": 57.27, "elapsed_time": "2:31:46", "remaining_time": "1:53:13", "throughput": 8676.2, "total_tokens": 79009064} +{"current_steps": 117220, "total_steps": 204665, "loss": 0.0004, "lr": 9.207239392792503e-07, "epoch": 2.8637041018249336, "percentage": 57.27, "elapsed_time": "2:31:46", "remaining_time": "1:53:13", "throughput": 8676.22, "total_tokens": 79012200} +{"current_steps": 117225, "total_steps": 204665, "loss": 0.0346, "lr": 9.206389303622964e-07, "epoch": 2.8638262526567804, "percentage": 57.28, "elapsed_time": "2:31:47", "remaining_time": "1:53:13", "throughput": 8676.25, "total_tokens": 79015464} +{"current_steps": 117230, "total_steps": 204665, "loss": 0.0002, "lr": 9.205539220224779e-07, "epoch": 2.863948403488628, "percentage": 57.28, "elapsed_time": "2:31:47", "remaining_time": "1:53:12", "throughput": 8676.3, "total_tokens": 79018920} +{"current_steps": 117235, "total_steps": 204665, "loss": 0.0442, "lr": 9.20468914260412e-07, "epoch": 2.8640705543204747, "percentage": 57.28, "elapsed_time": "2:31:47", "remaining_time": "1:53:12", "throughput": 8676.35, "total_tokens": 79022504} +{"current_steps": 117240, "total_steps": 204665, "loss": 0.0002, "lr": 9.203839070767182e-07, "epoch": 2.864192705152322, "percentage": 57.28, "elapsed_time": "2:31:48", "remaining_time": "1:53:11", "throughput": 8676.38, "total_tokens": 79025704} +{"current_steps": 117245, "total_steps": 204665, "loss": 0.0865, "lr": 9.202989004720136e-07, "epoch": 2.864314855984169, "percentage": 57.29, "elapsed_time": "2:31:48", "remaining_time": "1:53:11", "throughput": 8676.38, "total_tokens": 79028712} +{"current_steps": 117250, "total_steps": 204665, "loss": 0.0, "lr": 9.202138944469168e-07, "epoch": 2.8644370068160163, "percentage": 57.29, "elapsed_time": "2:31:48", "remaining_time": "1:53:11", "throughput": 8676.38, "total_tokens": 79031656} +{"current_steps": 117255, "total_steps": 204665, "loss": 0.0002, "lr": 9.201288890020464e-07, "epoch": 2.8645591576478635, "percentage": 57.29, "elapsed_time": "2:31:49", "remaining_time": "1:53:10", "throughput": 8676.5, "total_tokens": 79035944} +{"current_steps": 117260, "total_steps": 204665, "loss": 0.0001, "lr": 9.200438841380198e-07, "epoch": 2.8646813084797107, "percentage": 57.29, "elapsed_time": "2:31:49", "remaining_time": "1:53:10", "throughput": 8676.52, "total_tokens": 79039080} +{"current_steps": 117265, "total_steps": 204665, "loss": 0.0225, "lr": 9.199588798554559e-07, "epoch": 2.864803459311558, "percentage": 57.3, "elapsed_time": "2:31:49", "remaining_time": "1:53:09", "throughput": 8676.62, "total_tokens": 79043112} +{"current_steps": 117270, "total_steps": 204665, "loss": 0.0026, "lr": 9.198738761549724e-07, "epoch": 2.864925610143405, "percentage": 57.3, "elapsed_time": "2:31:50", "remaining_time": "1:53:09", "throughput": 8676.66, "total_tokens": 79046568} +{"current_steps": 117275, "total_steps": 204665, "loss": 0.0501, "lr": 9.197888730371875e-07, "epoch": 2.8650477609752523, "percentage": 57.3, "elapsed_time": "2:31:50", "remaining_time": "1:53:08", "throughput": 8676.72, "total_tokens": 79050152} +{"current_steps": 117280, "total_steps": 204665, "loss": 0.0002, "lr": 9.1970387050272e-07, "epoch": 2.8651699118070995, "percentage": 57.3, "elapsed_time": "2:31:50", "remaining_time": "1:53:08", "throughput": 8676.79, "total_tokens": 79053800} +{"current_steps": 117285, "total_steps": 204665, "loss": 0.0236, "lr": 9.196188685521869e-07, "epoch": 2.8652920626389466, "percentage": 57.31, "elapsed_time": "2:31:51", "remaining_time": "1:53:08", "throughput": 8676.82, "total_tokens": 79057128} +{"current_steps": 117290, "total_steps": 204665, "loss": 0.1096, "lr": 9.195338671862077e-07, "epoch": 2.865414213470794, "percentage": 57.31, "elapsed_time": "2:31:51", "remaining_time": "1:53:07", "throughput": 8676.89, "total_tokens": 79060776} +{"current_steps": 117295, "total_steps": 204665, "loss": 0.0395, "lr": 9.194488664053992e-07, "epoch": 2.865536364302641, "percentage": 57.31, "elapsed_time": "2:31:51", "remaining_time": "1:53:07", "throughput": 8676.9, "total_tokens": 79063848} +{"current_steps": 117300, "total_steps": 204665, "loss": 0.0001, "lr": 9.193638662103807e-07, "epoch": 2.865658515134488, "percentage": 57.31, "elapsed_time": "2:31:52", "remaining_time": "1:53:06", "throughput": 8676.92, "total_tokens": 79066984} +{"current_steps": 117305, "total_steps": 204665, "loss": 0.0002, "lr": 9.192788666017695e-07, "epoch": 2.8657806659663354, "percentage": 57.32, "elapsed_time": "2:31:52", "remaining_time": "1:53:06", "throughput": 8676.94, "total_tokens": 79070184} +{"current_steps": 117310, "total_steps": 204665, "loss": 0.0002, "lr": 9.191938675801839e-07, "epoch": 2.865902816798182, "percentage": 57.32, "elapsed_time": "2:31:53", "remaining_time": "1:53:06", "throughput": 8676.98, "total_tokens": 79073512} +{"current_steps": 117315, "total_steps": 204665, "loss": 0.0003, "lr": 9.191088691462428e-07, "epoch": 2.86602496763003, "percentage": 57.32, "elapsed_time": "2:31:53", "remaining_time": "1:53:05", "throughput": 8677.04, "total_tokens": 79077160} +{"current_steps": 117320, "total_steps": 204665, "loss": 0.0005, "lr": 9.190238713005636e-07, "epoch": 2.8661471184618765, "percentage": 57.32, "elapsed_time": "2:31:53", "remaining_time": "1:53:05", "throughput": 8677.08, "total_tokens": 79080552} +{"current_steps": 117325, "total_steps": 204665, "loss": 0.0422, "lr": 9.189388740437645e-07, "epoch": 2.866269269293724, "percentage": 57.33, "elapsed_time": "2:31:54", "remaining_time": "1:53:04", "throughput": 8677.16, "total_tokens": 79084328} +{"current_steps": 117330, "total_steps": 204665, "loss": 0.1515, "lr": 9.188538773764637e-07, "epoch": 2.866391420125571, "percentage": 57.33, "elapsed_time": "2:31:54", "remaining_time": "1:53:04", "throughput": 8677.23, "total_tokens": 79088104} +{"current_steps": 117335, "total_steps": 204665, "loss": 0.0001, "lr": 9.187688812992796e-07, "epoch": 2.866513570957418, "percentage": 57.33, "elapsed_time": "2:31:54", "remaining_time": "1:53:03", "throughput": 8677.26, "total_tokens": 79091432} +{"current_steps": 117340, "total_steps": 204665, "loss": 0.0315, "lr": 9.186838858128295e-07, "epoch": 2.8666357217892653, "percentage": 57.33, "elapsed_time": "2:31:55", "remaining_time": "1:53:03", "throughput": 8677.34, "total_tokens": 79095272} +{"current_steps": 117345, "total_steps": 204665, "loss": 0.0003, "lr": 9.185988909177321e-07, "epoch": 2.8667578726211125, "percentage": 57.34, "elapsed_time": "2:31:55", "remaining_time": "1:53:03", "throughput": 8677.36, "total_tokens": 79098408} +{"current_steps": 117350, "total_steps": 204665, "loss": 0.1138, "lr": 9.18513896614606e-07, "epoch": 2.8668800234529597, "percentage": 57.34, "elapsed_time": "2:31:55", "remaining_time": "1:53:02", "throughput": 8677.36, "total_tokens": 79101352} +{"current_steps": 117355, "total_steps": 204665, "loss": 0.0741, "lr": 9.184289029040683e-07, "epoch": 2.867002174284807, "percentage": 57.34, "elapsed_time": "2:31:56", "remaining_time": "1:53:02", "throughput": 8677.38, "total_tokens": 79104424} +{"current_steps": 117360, "total_steps": 204665, "loss": 0.0324, "lr": 9.18343909786738e-07, "epoch": 2.867124325116654, "percentage": 57.34, "elapsed_time": "2:31:56", "remaining_time": "1:53:01", "throughput": 8677.43, "total_tokens": 79107944} +{"current_steps": 117365, "total_steps": 204665, "loss": 0.0407, "lr": 9.182589172632321e-07, "epoch": 2.8672464759485012, "percentage": 57.34, "elapsed_time": "2:31:56", "remaining_time": "1:53:01", "throughput": 8677.49, "total_tokens": 79111528} +{"current_steps": 117370, "total_steps": 204665, "loss": 0.0002, "lr": 9.181739253341699e-07, "epoch": 2.8673686267803484, "percentage": 57.35, "elapsed_time": "2:31:57", "remaining_time": "1:53:01", "throughput": 8677.53, "total_tokens": 79114984} +{"current_steps": 117375, "total_steps": 204665, "loss": 0.0382, "lr": 9.180889340001686e-07, "epoch": 2.8674907776121956, "percentage": 57.35, "elapsed_time": "2:31:57", "remaining_time": "1:53:00", "throughput": 8677.59, "total_tokens": 79118632} +{"current_steps": 117380, "total_steps": 204665, "loss": 0.0347, "lr": 9.180039432618467e-07, "epoch": 2.867612928444043, "percentage": 57.35, "elapsed_time": "2:31:57", "remaining_time": "1:53:00", "throughput": 8677.57, "total_tokens": 79121320} +{"current_steps": 117385, "total_steps": 204665, "loss": 0.0292, "lr": 9.179189531198225e-07, "epoch": 2.86773507927589, "percentage": 57.35, "elapsed_time": "2:31:58", "remaining_time": "1:52:59", "throughput": 8677.59, "total_tokens": 79124456} +{"current_steps": 117390, "total_steps": 204665, "loss": 0.1183, "lr": 9.178339635747132e-07, "epoch": 2.867857230107737, "percentage": 57.36, "elapsed_time": "2:31:58", "remaining_time": "1:52:59", "throughput": 8677.62, "total_tokens": 79127720} +{"current_steps": 117395, "total_steps": 204665, "loss": 0.0002, "lr": 9.17748974627138e-07, "epoch": 2.867979380939584, "percentage": 57.36, "elapsed_time": "2:31:58", "remaining_time": "1:52:58", "throughput": 8677.65, "total_tokens": 79131048} +{"current_steps": 117400, "total_steps": 204665, "loss": 0.0332, "lr": 9.176639862777138e-07, "epoch": 2.8681015317714316, "percentage": 57.36, "elapsed_time": "2:31:59", "remaining_time": "1:52:58", "throughput": 8677.69, "total_tokens": 79134376} +{"current_steps": 117405, "total_steps": 204665, "loss": 0.0001, "lr": 9.175789985270593e-07, "epoch": 2.8682236826032783, "percentage": 57.36, "elapsed_time": "2:31:59", "remaining_time": "1:52:58", "throughput": 8677.75, "total_tokens": 79138024} +{"current_steps": 117410, "total_steps": 204665, "loss": 0.0003, "lr": 9.174940113757929e-07, "epoch": 2.868345833435126, "percentage": 57.37, "elapsed_time": "2:32:00", "remaining_time": "1:52:57", "throughput": 8677.82, "total_tokens": 79141736} +{"current_steps": 117415, "total_steps": 204665, "loss": 0.0004, "lr": 9.174090248245318e-07, "epoch": 2.8684679842669727, "percentage": 57.37, "elapsed_time": "2:32:00", "remaining_time": "1:52:57", "throughput": 8677.87, "total_tokens": 79145192} +{"current_steps": 117420, "total_steps": 204665, "loss": 0.0003, "lr": 9.17324038873895e-07, "epoch": 2.86859013509882, "percentage": 57.37, "elapsed_time": "2:32:00", "remaining_time": "1:52:56", "throughput": 8677.92, "total_tokens": 79148712} +{"current_steps": 117425, "total_steps": 204665, "loss": 0.0569, "lr": 9.172390535244996e-07, "epoch": 2.868712285930667, "percentage": 57.37, "elapsed_time": "2:32:01", "remaining_time": "1:52:56", "throughput": 8677.99, "total_tokens": 79152424} +{"current_steps": 117430, "total_steps": 204665, "loss": 0.0572, "lr": 9.171540687769641e-07, "epoch": 2.8688344367625143, "percentage": 57.38, "elapsed_time": "2:32:01", "remaining_time": "1:52:55", "throughput": 8678.02, "total_tokens": 79155688} +{"current_steps": 117435, "total_steps": 204665, "loss": 0.0388, "lr": 9.170690846319069e-07, "epoch": 2.8689565875943615, "percentage": 57.38, "elapsed_time": "2:32:01", "remaining_time": "1:52:55", "throughput": 8678.06, "total_tokens": 79159080} +{"current_steps": 117440, "total_steps": 204665, "loss": 0.0622, "lr": 9.16984101089945e-07, "epoch": 2.8690787384262086, "percentage": 57.38, "elapsed_time": "2:32:02", "remaining_time": "1:52:55", "throughput": 8678.08, "total_tokens": 79162280} +{"current_steps": 117445, "total_steps": 204665, "loss": 0.0002, "lr": 9.168991181516977e-07, "epoch": 2.869200889258056, "percentage": 57.38, "elapsed_time": "2:32:02", "remaining_time": "1:52:54", "throughput": 8678.1, "total_tokens": 79165416} +{"current_steps": 117450, "total_steps": 204665, "loss": 0.0002, "lr": 9.168141358177819e-07, "epoch": 2.869323040089903, "percentage": 57.39, "elapsed_time": "2:32:02", "remaining_time": "1:52:54", "throughput": 8678.16, "total_tokens": 79169000} +{"current_steps": 117455, "total_steps": 204665, "loss": 0.0004, "lr": 9.167291540888163e-07, "epoch": 2.86944519092175, "percentage": 57.39, "elapsed_time": "2:32:03", "remaining_time": "1:52:53", "throughput": 8678.2, "total_tokens": 79172392} +{"current_steps": 117460, "total_steps": 204665, "loss": 0.0382, "lr": 9.166441729654184e-07, "epoch": 2.8695673417535974, "percentage": 57.39, "elapsed_time": "2:32:03", "remaining_time": "1:52:53", "throughput": 8678.23, "total_tokens": 79175592} +{"current_steps": 117465, "total_steps": 204665, "loss": 0.0005, "lr": 9.165591924482066e-07, "epoch": 2.8696894925854446, "percentage": 57.39, "elapsed_time": "2:32:03", "remaining_time": "1:52:53", "throughput": 8678.26, "total_tokens": 79178856} +{"current_steps": 117470, "total_steps": 204665, "loss": 0.0001, "lr": 9.164742125377991e-07, "epoch": 2.869811643417292, "percentage": 57.4, "elapsed_time": "2:32:04", "remaining_time": "1:52:52", "throughput": 8678.29, "total_tokens": 79182184} +{"current_steps": 117475, "total_steps": 204665, "loss": 0.0441, "lr": 9.163892332348133e-07, "epoch": 2.869933794249139, "percentage": 57.4, "elapsed_time": "2:32:04", "remaining_time": "1:52:52", "throughput": 8678.3, "total_tokens": 79185256} +{"current_steps": 117480, "total_steps": 204665, "loss": 0.0002, "lr": 9.163042545398676e-07, "epoch": 2.870055945080986, "percentage": 57.4, "elapsed_time": "2:32:04", "remaining_time": "1:52:51", "throughput": 8678.33, "total_tokens": 79188456} +{"current_steps": 117485, "total_steps": 204665, "loss": 0.0422, "lr": 9.162192764535798e-07, "epoch": 2.8701780959128333, "percentage": 57.4, "elapsed_time": "2:32:05", "remaining_time": "1:52:51", "throughput": 8678.35, "total_tokens": 79191592} +{"current_steps": 117490, "total_steps": 204665, "loss": 0.0001, "lr": 9.161342989765683e-07, "epoch": 2.87030024674468, "percentage": 57.41, "elapsed_time": "2:32:05", "remaining_time": "1:52:50", "throughput": 8678.41, "total_tokens": 79195240} +{"current_steps": 117495, "total_steps": 204665, "loss": 0.0, "lr": 9.160493221094502e-07, "epoch": 2.8704223975765277, "percentage": 57.41, "elapsed_time": "2:32:05", "remaining_time": "1:52:50", "throughput": 8678.44, "total_tokens": 79198504} +{"current_steps": 117500, "total_steps": 204665, "loss": 0.0476, "lr": 9.159643458528441e-07, "epoch": 2.8705445484083745, "percentage": 57.41, "elapsed_time": "2:32:06", "remaining_time": "1:52:50", "throughput": 8678.5, "total_tokens": 79202152} +{"current_steps": 117505, "total_steps": 204665, "loss": 0.1373, "lr": 9.158793702073682e-07, "epoch": 2.8706666992402217, "percentage": 57.41, "elapsed_time": "2:32:06", "remaining_time": "1:52:49", "throughput": 8678.59, "total_tokens": 79206056} +{"current_steps": 117510, "total_steps": 204665, "loss": 0.0454, "lr": 9.157943951736397e-07, "epoch": 2.870788850072069, "percentage": 57.42, "elapsed_time": "2:32:06", "remaining_time": "1:52:49", "throughput": 8678.64, "total_tokens": 79209512} +{"current_steps": 117515, "total_steps": 204665, "loss": 0.0001, "lr": 9.157094207522775e-07, "epoch": 2.870911000903916, "percentage": 57.42, "elapsed_time": "2:32:07", "remaining_time": "1:52:48", "throughput": 8678.68, "total_tokens": 79212968} +{"current_steps": 117520, "total_steps": 204665, "loss": 0.0, "lr": 9.156244469438987e-07, "epoch": 2.8710331517357632, "percentage": 57.42, "elapsed_time": "2:32:07", "remaining_time": "1:52:48", "throughput": 8678.73, "total_tokens": 79216360} +{"current_steps": 117525, "total_steps": 204665, "loss": 0.1394, "lr": 9.155394737491218e-07, "epoch": 2.8711553025676104, "percentage": 57.42, "elapsed_time": "2:32:08", "remaining_time": "1:52:48", "throughput": 8678.79, "total_tokens": 79220008} +{"current_steps": 117530, "total_steps": 204665, "loss": 0.0001, "lr": 9.154545011685645e-07, "epoch": 2.8712774533994576, "percentage": 57.43, "elapsed_time": "2:32:08", "remaining_time": "1:52:47", "throughput": 8678.8, "total_tokens": 79223080} +{"current_steps": 117535, "total_steps": 204665, "loss": 0.0538, "lr": 9.153695292028449e-07, "epoch": 2.871399604231305, "percentage": 57.43, "elapsed_time": "2:32:08", "remaining_time": "1:52:47", "throughput": 8678.84, "total_tokens": 79226408} +{"current_steps": 117540, "total_steps": 204665, "loss": 0.0013, "lr": 9.15284557852581e-07, "epoch": 2.871521755063152, "percentage": 57.43, "elapsed_time": "2:32:09", "remaining_time": "1:52:46", "throughput": 8678.85, "total_tokens": 79229480} +{"current_steps": 117545, "total_steps": 204665, "loss": 0.0001, "lr": 9.1519958711839e-07, "epoch": 2.871643905894999, "percentage": 57.43, "elapsed_time": "2:32:09", "remaining_time": "1:52:46", "throughput": 8678.91, "total_tokens": 79233064} +{"current_steps": 117550, "total_steps": 204665, "loss": 0.0001, "lr": 9.151146170008911e-07, "epoch": 2.8717660567268464, "percentage": 57.44, "elapsed_time": "2:32:09", "remaining_time": "1:52:45", "throughput": 8678.94, "total_tokens": 79236328} +{"current_steps": 117555, "total_steps": 204665, "loss": 0.0425, "lr": 9.150296475007009e-07, "epoch": 2.8718882075586936, "percentage": 57.44, "elapsed_time": "2:32:10", "remaining_time": "1:52:45", "throughput": 8679.01, "total_tokens": 79240040} +{"current_steps": 117560, "total_steps": 204665, "loss": 0.0569, "lr": 9.14944678618438e-07, "epoch": 2.8720103583905408, "percentage": 57.44, "elapsed_time": "2:32:10", "remaining_time": "1:52:45", "throughput": 8679.01, "total_tokens": 79242920} +{"current_steps": 117565, "total_steps": 204665, "loss": 0.0001, "lr": 9.148597103547209e-07, "epoch": 2.872132509222388, "percentage": 57.44, "elapsed_time": "2:32:10", "remaining_time": "1:52:44", "throughput": 8679.03, "total_tokens": 79246056} +{"current_steps": 117570, "total_steps": 204665, "loss": 0.0001, "lr": 9.147747427101663e-07, "epoch": 2.872254660054235, "percentage": 57.45, "elapsed_time": "2:32:11", "remaining_time": "1:52:44", "throughput": 8679.05, "total_tokens": 79249256} +{"current_steps": 117575, "total_steps": 204665, "loss": 0.0872, "lr": 9.146897756853931e-07, "epoch": 2.872376810886082, "percentage": 57.45, "elapsed_time": "2:32:11", "remaining_time": "1:52:43", "throughput": 8679.08, "total_tokens": 79252456} +{"current_steps": 117580, "total_steps": 204665, "loss": 0.0549, "lr": 9.146048092810184e-07, "epoch": 2.8724989617179295, "percentage": 57.45, "elapsed_time": "2:32:11", "remaining_time": "1:52:43", "throughput": 8679.11, "total_tokens": 79255720} +{"current_steps": 117585, "total_steps": 204665, "loss": 0.0717, "lr": 9.145198434976609e-07, "epoch": 2.8726211125497763, "percentage": 57.45, "elapsed_time": "2:32:12", "remaining_time": "1:52:42", "throughput": 8679.13, "total_tokens": 79258856} +{"current_steps": 117590, "total_steps": 204665, "loss": 0.1152, "lr": 9.144348783359379e-07, "epoch": 2.872743263381624, "percentage": 57.45, "elapsed_time": "2:32:12", "remaining_time": "1:52:42", "throughput": 8679.16, "total_tokens": 79262184} +{"current_steps": 117595, "total_steps": 204665, "loss": 0.0368, "lr": 9.143499137964673e-07, "epoch": 2.8728654142134706, "percentage": 57.46, "elapsed_time": "2:32:12", "remaining_time": "1:52:42", "throughput": 8679.17, "total_tokens": 79265320} +{"current_steps": 117600, "total_steps": 204665, "loss": 0.0002, "lr": 9.142649498798675e-07, "epoch": 2.872987565045318, "percentage": 57.46, "elapsed_time": "2:32:13", "remaining_time": "1:52:41", "throughput": 8679.19, "total_tokens": 79268456} +{"current_steps": 117605, "total_steps": 204665, "loss": 0.0458, "lr": 9.141799865867558e-07, "epoch": 2.873109715877165, "percentage": 57.46, "elapsed_time": "2:32:13", "remaining_time": "1:52:41", "throughput": 8679.21, "total_tokens": 79271656} +{"current_steps": 117610, "total_steps": 204665, "loss": 0.0651, "lr": 9.140950239177505e-07, "epoch": 2.873231866709012, "percentage": 57.46, "elapsed_time": "2:32:13", "remaining_time": "1:52:40", "throughput": 8679.28, "total_tokens": 79275304} +{"current_steps": 117615, "total_steps": 204665, "loss": 0.0012, "lr": 9.140100618734691e-07, "epoch": 2.8733540175408594, "percentage": 57.47, "elapsed_time": "2:32:14", "remaining_time": "1:52:40", "throughput": 8679.31, "total_tokens": 79278632} +{"current_steps": 117620, "total_steps": 204665, "loss": 0.0002, "lr": 9.139251004545296e-07, "epoch": 2.8734761683727066, "percentage": 57.47, "elapsed_time": "2:32:14", "remaining_time": "1:52:40", "throughput": 8679.42, "total_tokens": 79282728} +{"current_steps": 117625, "total_steps": 204665, "loss": 0.0611, "lr": 9.138401396615502e-07, "epoch": 2.8735983192045538, "percentage": 57.47, "elapsed_time": "2:32:14", "remaining_time": "1:52:39", "throughput": 8679.44, "total_tokens": 79285928} +{"current_steps": 117630, "total_steps": 204665, "loss": 0.0664, "lr": 9.137551794951483e-07, "epoch": 2.873720470036401, "percentage": 57.47, "elapsed_time": "2:32:15", "remaining_time": "1:52:39", "throughput": 8679.46, "total_tokens": 79289128} +{"current_steps": 117635, "total_steps": 204665, "loss": 0.0003, "lr": 9.136702199559421e-07, "epoch": 2.873842620868248, "percentage": 57.48, "elapsed_time": "2:32:15", "remaining_time": "1:52:38", "throughput": 8679.5, "total_tokens": 79292520} +{"current_steps": 117640, "total_steps": 204665, "loss": 0.0246, "lr": 9.135852610445491e-07, "epoch": 2.8739647717000953, "percentage": 57.48, "elapsed_time": "2:32:15", "remaining_time": "1:52:38", "throughput": 8679.52, "total_tokens": 79295656} +{"current_steps": 117645, "total_steps": 204665, "loss": 0.0599, "lr": 9.135003027615876e-07, "epoch": 2.8740869225319425, "percentage": 57.48, "elapsed_time": "2:32:16", "remaining_time": "1:52:37", "throughput": 8679.56, "total_tokens": 79299112} +{"current_steps": 117650, "total_steps": 204665, "loss": 0.0503, "lr": 9.134153451076748e-07, "epoch": 2.8742090733637897, "percentage": 57.48, "elapsed_time": "2:32:16", "remaining_time": "1:52:37", "throughput": 8679.6, "total_tokens": 79302504} +{"current_steps": 117655, "total_steps": 204665, "loss": 0.0781, "lr": 9.133303880834287e-07, "epoch": 2.874331224195637, "percentage": 57.49, "elapsed_time": "2:32:17", "remaining_time": "1:52:37", "throughput": 8679.64, "total_tokens": 79305896} +{"current_steps": 117660, "total_steps": 204665, "loss": 0.1096, "lr": 9.13245431689468e-07, "epoch": 2.874453375027484, "percentage": 57.49, "elapsed_time": "2:32:17", "remaining_time": "1:52:36", "throughput": 8679.68, "total_tokens": 79309288} +{"current_steps": 117665, "total_steps": 204665, "loss": 0.0001, "lr": 9.131604759264093e-07, "epoch": 2.8745755258593313, "percentage": 57.49, "elapsed_time": "2:32:17", "remaining_time": "1:52:36", "throughput": 8679.71, "total_tokens": 79312616} +{"current_steps": 117670, "total_steps": 204665, "loss": 0.027, "lr": 9.130755207948715e-07, "epoch": 2.874697676691178, "percentage": 57.49, "elapsed_time": "2:32:18", "remaining_time": "1:52:35", "throughput": 8679.74, "total_tokens": 79315816} +{"current_steps": 117675, "total_steps": 204665, "loss": 0.0957, "lr": 9.129905662954713e-07, "epoch": 2.8748198275230257, "percentage": 57.5, "elapsed_time": "2:32:18", "remaining_time": "1:52:35", "throughput": 8679.77, "total_tokens": 79319144} +{"current_steps": 117680, "total_steps": 204665, "loss": 0.062, "lr": 9.129056124288275e-07, "epoch": 2.8749419783548724, "percentage": 57.5, "elapsed_time": "2:32:18", "remaining_time": "1:52:35", "throughput": 8679.81, "total_tokens": 79322600} +{"current_steps": 117685, "total_steps": 204665, "loss": 0.0008, "lr": 9.128206591955574e-07, "epoch": 2.8750641291867196, "percentage": 57.5, "elapsed_time": "2:32:19", "remaining_time": "1:52:34", "throughput": 8679.88, "total_tokens": 79326248} +{"current_steps": 117690, "total_steps": 204665, "loss": 0.0772, "lr": 9.12735706596279e-07, "epoch": 2.875186280018567, "percentage": 57.5, "elapsed_time": "2:32:19", "remaining_time": "1:52:34", "throughput": 8679.91, "total_tokens": 79329576} +{"current_steps": 117695, "total_steps": 204665, "loss": 0.0556, "lr": 9.126507546316102e-07, "epoch": 2.875308430850414, "percentage": 57.51, "elapsed_time": "2:32:19", "remaining_time": "1:52:33", "throughput": 8679.94, "total_tokens": 79332840} +{"current_steps": 117700, "total_steps": 204665, "loss": 0.0282, "lr": 9.125658033021682e-07, "epoch": 2.875430581682261, "percentage": 57.51, "elapsed_time": "2:32:20", "remaining_time": "1:52:33", "throughput": 8679.96, "total_tokens": 79335976} +{"current_steps": 117705, "total_steps": 204665, "loss": 0.146, "lr": 9.124808526085714e-07, "epoch": 2.8755527325141084, "percentage": 57.51, "elapsed_time": "2:32:20", "remaining_time": "1:52:32", "throughput": 8680.16, "total_tokens": 79341288} +{"current_steps": 117710, "total_steps": 204665, "loss": 0.0616, "lr": 9.123959025514372e-07, "epoch": 2.8756748833459556, "percentage": 57.51, "elapsed_time": "2:32:20", "remaining_time": "1:52:32", "throughput": 8680.18, "total_tokens": 79344424} +{"current_steps": 117715, "total_steps": 204665, "loss": 0.0348, "lr": 9.123109531313836e-07, "epoch": 2.8757970341778027, "percentage": 57.52, "elapsed_time": "2:32:21", "remaining_time": "1:52:32", "throughput": 8680.21, "total_tokens": 79347816} +{"current_steps": 117720, "total_steps": 204665, "loss": 0.026, "lr": 9.122260043490285e-07, "epoch": 2.87591918500965, "percentage": 57.52, "elapsed_time": "2:32:21", "remaining_time": "1:52:31", "throughput": 8680.28, "total_tokens": 79351528} +{"current_steps": 117725, "total_steps": 204665, "loss": 0.0003, "lr": 9.121410562049893e-07, "epoch": 2.876041335841497, "percentage": 57.52, "elapsed_time": "2:32:21", "remaining_time": "1:52:31", "throughput": 8680.32, "total_tokens": 79354984} +{"current_steps": 117730, "total_steps": 204665, "loss": 0.0001, "lr": 9.120561086998842e-07, "epoch": 2.8761634866733443, "percentage": 57.52, "elapsed_time": "2:32:22", "remaining_time": "1:52:30", "throughput": 8680.36, "total_tokens": 79358312} +{"current_steps": 117735, "total_steps": 204665, "loss": 0.0341, "lr": 9.119711618343305e-07, "epoch": 2.8762856375051915, "percentage": 57.53, "elapsed_time": "2:32:22", "remaining_time": "1:52:30", "throughput": 8680.37, "total_tokens": 79361448} +{"current_steps": 117740, "total_steps": 204665, "loss": 0.0311, "lr": 9.118862156089465e-07, "epoch": 2.8764077883370387, "percentage": 57.53, "elapsed_time": "2:32:22", "remaining_time": "1:52:30", "throughput": 8680.4, "total_tokens": 79364712} +{"current_steps": 117745, "total_steps": 204665, "loss": 0.0303, "lr": 9.118012700243495e-07, "epoch": 2.876529939168886, "percentage": 57.53, "elapsed_time": "2:32:23", "remaining_time": "1:52:29", "throughput": 8680.47, "total_tokens": 79368424} +{"current_steps": 117750, "total_steps": 204665, "loss": 0.0004, "lr": 9.117163250811571e-07, "epoch": 2.876652090000733, "percentage": 57.53, "elapsed_time": "2:32:23", "remaining_time": "1:52:29", "throughput": 8680.51, "total_tokens": 79371880} +{"current_steps": 117755, "total_steps": 204665, "loss": 0.0165, "lr": 9.116313807799878e-07, "epoch": 2.87677424083258, "percentage": 57.54, "elapsed_time": "2:32:24", "remaining_time": "1:52:28", "throughput": 8680.54, "total_tokens": 79375144} +{"current_steps": 117760, "total_steps": 204665, "loss": 0.0751, "lr": 9.115464371214585e-07, "epoch": 2.8768963916644275, "percentage": 57.54, "elapsed_time": "2:32:24", "remaining_time": "1:52:28", "throughput": 8680.57, "total_tokens": 79378472} +{"current_steps": 117765, "total_steps": 204665, "loss": 0.1053, "lr": 9.114614941061877e-07, "epoch": 2.877018542496274, "percentage": 57.54, "elapsed_time": "2:32:24", "remaining_time": "1:52:27", "throughput": 8680.64, "total_tokens": 79382184} +{"current_steps": 117770, "total_steps": 204665, "loss": 0.0002, "lr": 9.113765517347922e-07, "epoch": 2.877140693328122, "percentage": 57.54, "elapsed_time": "2:32:25", "remaining_time": "1:52:27", "throughput": 8680.71, "total_tokens": 79385896} +{"current_steps": 117775, "total_steps": 204665, "loss": 0.0003, "lr": 9.112916100078903e-07, "epoch": 2.8772628441599686, "percentage": 57.55, "elapsed_time": "2:32:25", "remaining_time": "1:52:27", "throughput": 8680.77, "total_tokens": 79389480} +{"current_steps": 117780, "total_steps": 204665, "loss": 0.141, "lr": 9.112066689261001e-07, "epoch": 2.8773849949918158, "percentage": 57.55, "elapsed_time": "2:32:25", "remaining_time": "1:52:26", "throughput": 8680.8, "total_tokens": 79392808} +{"current_steps": 117785, "total_steps": 204665, "loss": 0.051, "lr": 9.111217284900387e-07, "epoch": 2.877507145823663, "percentage": 57.55, "elapsed_time": "2:32:26", "remaining_time": "1:52:26", "throughput": 8680.85, "total_tokens": 79396328} +{"current_steps": 117790, "total_steps": 204665, "loss": 0.0002, "lr": 9.110367887003241e-07, "epoch": 2.87762929665551, "percentage": 57.55, "elapsed_time": "2:32:26", "remaining_time": "1:52:25", "throughput": 8680.9, "total_tokens": 79399848} +{"current_steps": 117795, "total_steps": 204665, "loss": 0.0005, "lr": 9.109518495575736e-07, "epoch": 2.8777514474873573, "percentage": 57.56, "elapsed_time": "2:32:26", "remaining_time": "1:52:25", "throughput": 8680.94, "total_tokens": 79403304} +{"current_steps": 117800, "total_steps": 204665, "loss": 0.0006, "lr": 9.108669110624055e-07, "epoch": 2.8778735983192045, "percentage": 57.56, "elapsed_time": "2:32:27", "remaining_time": "1:52:25", "throughput": 8680.95, "total_tokens": 79406312} +{"current_steps": 117805, "total_steps": 204665, "loss": 0.0482, "lr": 9.107819732154371e-07, "epoch": 2.8779957491510517, "percentage": 57.56, "elapsed_time": "2:32:27", "remaining_time": "1:52:24", "throughput": 8680.98, "total_tokens": 79409576} +{"current_steps": 117810, "total_steps": 204665, "loss": 0.0002, "lr": 9.10697036017286e-07, "epoch": 2.878117899982899, "percentage": 57.56, "elapsed_time": "2:32:27", "remaining_time": "1:52:24", "throughput": 8681.02, "total_tokens": 79413032} +{"current_steps": 117815, "total_steps": 204665, "loss": 0.0005, "lr": 9.106120994685704e-07, "epoch": 2.878240050814746, "percentage": 57.56, "elapsed_time": "2:32:28", "remaining_time": "1:52:23", "throughput": 8681.06, "total_tokens": 79416360} +{"current_steps": 117820, "total_steps": 204665, "loss": 0.0006, "lr": 9.105271635699072e-07, "epoch": 2.8783622016465933, "percentage": 57.57, "elapsed_time": "2:32:28", "remaining_time": "1:52:23", "throughput": 8681.06, "total_tokens": 79419304} +{"current_steps": 117825, "total_steps": 204665, "loss": 0.0025, "lr": 9.104422283219151e-07, "epoch": 2.8784843524784405, "percentage": 57.57, "elapsed_time": "2:32:28", "remaining_time": "1:52:22", "throughput": 8681.11, "total_tokens": 79422824} +{"current_steps": 117830, "total_steps": 204665, "loss": 0.0386, "lr": 9.103572937252107e-07, "epoch": 2.8786065033102877, "percentage": 57.57, "elapsed_time": "2:32:29", "remaining_time": "1:52:22", "throughput": 8681.14, "total_tokens": 79426152} +{"current_steps": 117835, "total_steps": 204665, "loss": 0.0001, "lr": 9.102723597804125e-07, "epoch": 2.878728654142135, "percentage": 57.57, "elapsed_time": "2:32:29", "remaining_time": "1:52:22", "throughput": 8681.18, "total_tokens": 79429544} +{"current_steps": 117840, "total_steps": 204665, "loss": 0.0002, "lr": 9.101874264881377e-07, "epoch": 2.8788508049739816, "percentage": 57.58, "elapsed_time": "2:32:29", "remaining_time": "1:52:21", "throughput": 8681.21, "total_tokens": 79432872} +{"current_steps": 117845, "total_steps": 204665, "loss": 0.04, "lr": 9.101024938490041e-07, "epoch": 2.8789729558058292, "percentage": 57.58, "elapsed_time": "2:32:30", "remaining_time": "1:52:21", "throughput": 8681.25, "total_tokens": 79436264} +{"current_steps": 117850, "total_steps": 204665, "loss": 0.0291, "lr": 9.100175618636296e-07, "epoch": 2.879095106637676, "percentage": 57.58, "elapsed_time": "2:32:30", "remaining_time": "1:52:20", "throughput": 8681.36, "total_tokens": 79440360} +{"current_steps": 117855, "total_steps": 204665, "loss": 0.0001, "lr": 9.099326305326311e-07, "epoch": 2.8792172574695236, "percentage": 57.58, "elapsed_time": "2:32:31", "remaining_time": "1:52:20", "throughput": 8681.4, "total_tokens": 79443880} +{"current_steps": 117860, "total_steps": 204665, "loss": 0.0001, "lr": 9.098476998566274e-07, "epoch": 2.8793394083013704, "percentage": 57.59, "elapsed_time": "2:32:31", "remaining_time": "1:52:20", "throughput": 8681.43, "total_tokens": 79447080} +{"current_steps": 117865, "total_steps": 204665, "loss": 0.0258, "lr": 9.097627698362348e-07, "epoch": 2.8794615591332176, "percentage": 57.59, "elapsed_time": "2:32:31", "remaining_time": "1:52:19", "throughput": 8681.48, "total_tokens": 79450600} +{"current_steps": 117870, "total_steps": 204665, "loss": 0.0003, "lr": 9.096778404720716e-07, "epoch": 2.8795837099650647, "percentage": 57.59, "elapsed_time": "2:32:32", "remaining_time": "1:52:19", "throughput": 8681.51, "total_tokens": 79453864} +{"current_steps": 117875, "total_steps": 204665, "loss": 0.0002, "lr": 9.095929117647559e-07, "epoch": 2.879705860796912, "percentage": 57.59, "elapsed_time": "2:32:32", "remaining_time": "1:52:18", "throughput": 8681.59, "total_tokens": 79457768} +{"current_steps": 117880, "total_steps": 204665, "loss": 0.1143, "lr": 9.095079837149046e-07, "epoch": 2.879828011628759, "percentage": 57.6, "elapsed_time": "2:32:32", "remaining_time": "1:52:18", "throughput": 8681.64, "total_tokens": 79461352} +{"current_steps": 117885, "total_steps": 204665, "loss": 0.0, "lr": 9.094230563231359e-07, "epoch": 2.8799501624606063, "percentage": 57.6, "elapsed_time": "2:32:33", "remaining_time": "1:52:18", "throughput": 8681.68, "total_tokens": 79464744} +{"current_steps": 117890, "total_steps": 204665, "loss": 0.0752, "lr": 9.093381295900666e-07, "epoch": 2.8800723132924535, "percentage": 57.6, "elapsed_time": "2:32:33", "remaining_time": "1:52:17", "throughput": 8681.71, "total_tokens": 79468072} +{"current_steps": 117895, "total_steps": 204665, "loss": 0.0001, "lr": 9.092532035163153e-07, "epoch": 2.8801944641243007, "percentage": 57.6, "elapsed_time": "2:32:33", "remaining_time": "1:52:17", "throughput": 8681.8, "total_tokens": 79471976} +{"current_steps": 117900, "total_steps": 204665, "loss": 0.061, "lr": 9.091682781024989e-07, "epoch": 2.880316614956148, "percentage": 57.61, "elapsed_time": "2:32:34", "remaining_time": "1:52:16", "throughput": 8681.82, "total_tokens": 79475176} +{"current_steps": 117905, "total_steps": 204665, "loss": 0.0468, "lr": 9.090833533492349e-07, "epoch": 2.880438765787995, "percentage": 57.61, "elapsed_time": "2:32:34", "remaining_time": "1:52:16", "throughput": 8681.89, "total_tokens": 79478888} +{"current_steps": 117910, "total_steps": 204665, "loss": 0.0002, "lr": 9.089984292571418e-07, "epoch": 2.8805609166198423, "percentage": 57.61, "elapsed_time": "2:32:34", "remaining_time": "1:52:15", "throughput": 8681.97, "total_tokens": 79482792} +{"current_steps": 117915, "total_steps": 204665, "loss": 0.1394, "lr": 9.08913505826836e-07, "epoch": 2.8806830674516894, "percentage": 57.61, "elapsed_time": "2:32:35", "remaining_time": "1:52:15", "throughput": 8682.01, "total_tokens": 79486120} +{"current_steps": 117920, "total_steps": 204665, "loss": 0.031, "lr": 9.088285830589362e-07, "epoch": 2.8808052182835366, "percentage": 57.62, "elapsed_time": "2:32:35", "remaining_time": "1:52:15", "throughput": 8682.01, "total_tokens": 79489064} +{"current_steps": 117925, "total_steps": 204665, "loss": 0.1359, "lr": 9.087436609540591e-07, "epoch": 2.880927369115384, "percentage": 57.62, "elapsed_time": "2:32:35", "remaining_time": "1:52:14", "throughput": 8682.05, "total_tokens": 79492456} +{"current_steps": 117930, "total_steps": 204665, "loss": 0.0421, "lr": 9.086587395128226e-07, "epoch": 2.881049519947231, "percentage": 57.62, "elapsed_time": "2:32:36", "remaining_time": "1:52:14", "throughput": 8682.09, "total_tokens": 79495848} +{"current_steps": 117935, "total_steps": 204665, "loss": 0.0386, "lr": 9.085738187358447e-07, "epoch": 2.8811716707790778, "percentage": 57.62, "elapsed_time": "2:32:36", "remaining_time": "1:52:13", "throughput": 8682.1, "total_tokens": 79498856} +{"current_steps": 117940, "total_steps": 204665, "loss": 0.0005, "lr": 9.084888986237425e-07, "epoch": 2.8812938216109254, "percentage": 57.63, "elapsed_time": "2:32:36", "remaining_time": "1:52:13", "throughput": 8682.13, "total_tokens": 79502184} +{"current_steps": 117945, "total_steps": 204665, "loss": 0.163, "lr": 9.084039791771334e-07, "epoch": 2.881415972442772, "percentage": 57.63, "elapsed_time": "2:32:37", "remaining_time": "1:52:12", "throughput": 8682.13, "total_tokens": 79505128} +{"current_steps": 117950, "total_steps": 204665, "loss": 0.0004, "lr": 9.083190603966354e-07, "epoch": 2.8815381232746193, "percentage": 57.63, "elapsed_time": "2:32:37", "remaining_time": "1:52:12", "throughput": 8682.17, "total_tokens": 79508520} +{"current_steps": 117955, "total_steps": 204665, "loss": 0.0002, "lr": 9.082341422828657e-07, "epoch": 2.8816602741064665, "percentage": 57.63, "elapsed_time": "2:32:38", "remaining_time": "1:52:12", "throughput": 8682.19, "total_tokens": 79511656} +{"current_steps": 117960, "total_steps": 204665, "loss": 0.0002, "lr": 9.081492248364422e-07, "epoch": 2.8817824249383137, "percentage": 57.64, "elapsed_time": "2:32:38", "remaining_time": "1:52:11", "throughput": 8682.3, "total_tokens": 79515816} +{"current_steps": 117965, "total_steps": 204665, "loss": 0.0003, "lr": 9.080643080579818e-07, "epoch": 2.881904575770161, "percentage": 57.64, "elapsed_time": "2:32:38", "remaining_time": "1:52:11", "throughput": 8682.32, "total_tokens": 79519016} +{"current_steps": 117970, "total_steps": 204665, "loss": 0.0845, "lr": 9.079793919481032e-07, "epoch": 2.882026726602008, "percentage": 57.64, "elapsed_time": "2:32:39", "remaining_time": "1:52:10", "throughput": 8682.35, "total_tokens": 79522280} +{"current_steps": 117975, "total_steps": 204665, "loss": 0.0587, "lr": 9.078944765074225e-07, "epoch": 2.8821488774338553, "percentage": 57.64, "elapsed_time": "2:32:39", "remaining_time": "1:52:10", "throughput": 8682.42, "total_tokens": 79525992} +{"current_steps": 117980, "total_steps": 204665, "loss": 0.0467, "lr": 9.078095617365584e-07, "epoch": 2.8822710282657025, "percentage": 57.65, "elapsed_time": "2:32:39", "remaining_time": "1:52:10", "throughput": 8682.43, "total_tokens": 79529128} +{"current_steps": 117985, "total_steps": 204665, "loss": 0.0002, "lr": 9.077246476361276e-07, "epoch": 2.8823931790975497, "percentage": 57.65, "elapsed_time": "2:32:40", "remaining_time": "1:52:09", "throughput": 8682.48, "total_tokens": 79532648} +{"current_steps": 117990, "total_steps": 204665, "loss": 0.1119, "lr": 9.076397342067483e-07, "epoch": 2.882515329929397, "percentage": 57.65, "elapsed_time": "2:32:40", "remaining_time": "1:52:09", "throughput": 8682.5, "total_tokens": 79535720} +{"current_steps": 117995, "total_steps": 204665, "loss": 0.0794, "lr": 9.075548214490376e-07, "epoch": 2.882637480761244, "percentage": 57.65, "elapsed_time": "2:32:40", "remaining_time": "1:52:08", "throughput": 8682.57, "total_tokens": 79539496} +{"current_steps": 118000, "total_steps": 204665, "loss": 0.0648, "lr": 9.074699093636131e-07, "epoch": 2.8827596315930912, "percentage": 57.66, "elapsed_time": "2:32:41", "remaining_time": "1:52:08", "throughput": 8682.58, "total_tokens": 79542568} +{"current_steps": 118005, "total_steps": 204665, "loss": 0.1219, "lr": 9.073849979510926e-07, "epoch": 2.8828817824249384, "percentage": 57.66, "elapsed_time": "2:32:41", "remaining_time": "1:52:07", "throughput": 8682.61, "total_tokens": 79545832} +{"current_steps": 118010, "total_steps": 204665, "loss": 0.0005, "lr": 9.073000872120927e-07, "epoch": 2.8830039332567856, "percentage": 57.66, "elapsed_time": "2:32:41", "remaining_time": "1:52:07", "throughput": 8682.63, "total_tokens": 79548968} +{"current_steps": 118015, "total_steps": 204665, "loss": 0.0312, "lr": 9.072151771472321e-07, "epoch": 2.883126084088633, "percentage": 57.66, "elapsed_time": "2:32:42", "remaining_time": "1:52:07", "throughput": 8682.69, "total_tokens": 79552552} +{"current_steps": 118020, "total_steps": 204665, "loss": 0.0009, "lr": 9.071302677571272e-07, "epoch": 2.8832482349204795, "percentage": 57.66, "elapsed_time": "2:32:42", "remaining_time": "1:52:06", "throughput": 8682.71, "total_tokens": 79555752} +{"current_steps": 118025, "total_steps": 204665, "loss": 0.022, "lr": 9.070453590423959e-07, "epoch": 2.883370385752327, "percentage": 57.67, "elapsed_time": "2:32:42", "remaining_time": "1:52:06", "throughput": 8682.73, "total_tokens": 79558888} +{"current_steps": 118030, "total_steps": 204665, "loss": 0.0734, "lr": 9.069604510036563e-07, "epoch": 2.883492536584174, "percentage": 57.67, "elapsed_time": "2:32:43", "remaining_time": "1:52:05", "throughput": 8682.79, "total_tokens": 79562472} +{"current_steps": 118035, "total_steps": 204665, "loss": 0.0395, "lr": 9.068755436415247e-07, "epoch": 2.8836146874160216, "percentage": 57.67, "elapsed_time": "2:32:43", "remaining_time": "1:52:05", "throughput": 8682.79, "total_tokens": 79565480} +{"current_steps": 118040, "total_steps": 204665, "loss": 0.0004, "lr": 9.067906369566198e-07, "epoch": 2.8837368382478683, "percentage": 57.67, "elapsed_time": "2:32:43", "remaining_time": "1:52:05", "throughput": 8682.81, "total_tokens": 79568616} +{"current_steps": 118045, "total_steps": 204665, "loss": 0.0279, "lr": 9.06705730949558e-07, "epoch": 2.8838589890797155, "percentage": 57.68, "elapsed_time": "2:32:44", "remaining_time": "1:52:04", "throughput": 8682.82, "total_tokens": 79571688} +{"current_steps": 118050, "total_steps": 204665, "loss": 0.0527, "lr": 9.066208256209576e-07, "epoch": 2.8839811399115627, "percentage": 57.68, "elapsed_time": "2:32:44", "remaining_time": "1:52:04", "throughput": 8682.85, "total_tokens": 79574952} +{"current_steps": 118055, "total_steps": 204665, "loss": 0.1484, "lr": 9.065359209714356e-07, "epoch": 2.88410329074341, "percentage": 57.68, "elapsed_time": "2:32:44", "remaining_time": "1:52:03", "throughput": 8682.87, "total_tokens": 79578088} +{"current_steps": 118060, "total_steps": 204665, "loss": 0.0526, "lr": 9.064510170016092e-07, "epoch": 2.884225441575257, "percentage": 57.68, "elapsed_time": "2:32:45", "remaining_time": "1:52:03", "throughput": 8682.91, "total_tokens": 79581544} +{"current_steps": 118065, "total_steps": 204665, "loss": 0.0003, "lr": 9.063661137120966e-07, "epoch": 2.8843475924071043, "percentage": 57.69, "elapsed_time": "2:32:45", "remaining_time": "1:52:02", "throughput": 8682.92, "total_tokens": 79584616} +{"current_steps": 118070, "total_steps": 204665, "loss": 0.0003, "lr": 9.062812111035143e-07, "epoch": 2.8844697432389514, "percentage": 57.69, "elapsed_time": "2:32:46", "remaining_time": "1:52:02", "throughput": 8683.0, "total_tokens": 79588456} +{"current_steps": 118075, "total_steps": 204665, "loss": 0.048, "lr": 9.061963091764809e-07, "epoch": 2.8845918940707986, "percentage": 57.69, "elapsed_time": "2:32:46", "remaining_time": "1:52:02", "throughput": 8683.01, "total_tokens": 79591400} +{"current_steps": 118080, "total_steps": 204665, "loss": 0.0317, "lr": 9.061114079316124e-07, "epoch": 2.884714044902646, "percentage": 57.69, "elapsed_time": "2:32:46", "remaining_time": "1:52:01", "throughput": 8683.07, "total_tokens": 79595112} +{"current_steps": 118085, "total_steps": 204665, "loss": 0.0711, "lr": 9.060265073695272e-07, "epoch": 2.884836195734493, "percentage": 57.7, "elapsed_time": "2:32:47", "remaining_time": "1:52:01", "throughput": 8683.08, "total_tokens": 79598120} +{"current_steps": 118090, "total_steps": 204665, "loss": 0.0002, "lr": 9.059416074908429e-07, "epoch": 2.88495834656634, "percentage": 57.7, "elapsed_time": "2:32:47", "remaining_time": "1:52:00", "throughput": 8683.1, "total_tokens": 79601256} +{"current_steps": 118095, "total_steps": 204665, "loss": 0.0311, "lr": 9.058567082961764e-07, "epoch": 2.8850804973981874, "percentage": 57.7, "elapsed_time": "2:32:47", "remaining_time": "1:52:00", "throughput": 8683.11, "total_tokens": 79604328} +{"current_steps": 118100, "total_steps": 204665, "loss": 0.0378, "lr": 9.057718097861452e-07, "epoch": 2.8852026482300346, "percentage": 57.7, "elapsed_time": "2:32:48", "remaining_time": "1:52:00", "throughput": 8683.15, "total_tokens": 79607720} +{"current_steps": 118105, "total_steps": 204665, "loss": 0.0387, "lr": 9.056869119613667e-07, "epoch": 2.8853247990618818, "percentage": 57.71, "elapsed_time": "2:32:48", "remaining_time": "1:51:59", "throughput": 8683.18, "total_tokens": 79611048} +{"current_steps": 118110, "total_steps": 204665, "loss": 0.0, "lr": 9.056020148224584e-07, "epoch": 2.885446949893729, "percentage": 57.71, "elapsed_time": "2:32:48", "remaining_time": "1:51:59", "throughput": 8683.24, "total_tokens": 79614632} +{"current_steps": 118115, "total_steps": 204665, "loss": 0.0003, "lr": 9.055171183700376e-07, "epoch": 2.8855691007255757, "percentage": 57.71, "elapsed_time": "2:32:49", "remaining_time": "1:51:58", "throughput": 8683.28, "total_tokens": 79617960} +{"current_steps": 118120, "total_steps": 204665, "loss": 0.0904, "lr": 9.054322226047214e-07, "epoch": 2.8856912515574233, "percentage": 57.71, "elapsed_time": "2:32:49", "remaining_time": "1:51:58", "throughput": 8683.33, "total_tokens": 79621480} +{"current_steps": 118125, "total_steps": 204665, "loss": 0.0001, "lr": 9.053473275271281e-07, "epoch": 2.88581340238927, "percentage": 57.72, "elapsed_time": "2:32:49", "remaining_time": "1:51:57", "throughput": 8683.37, "total_tokens": 79624872} +{"current_steps": 118130, "total_steps": 204665, "loss": 0.0001, "lr": 9.05262433137874e-07, "epoch": 2.8859355532211173, "percentage": 57.72, "elapsed_time": "2:32:50", "remaining_time": "1:51:57", "throughput": 8683.4, "total_tokens": 79628200} +{"current_steps": 118135, "total_steps": 204665, "loss": 0.0235, "lr": 9.051775394375775e-07, "epoch": 2.8860577040529645, "percentage": 57.72, "elapsed_time": "2:32:50", "remaining_time": "1:51:57", "throughput": 8683.44, "total_tokens": 79631656} +{"current_steps": 118140, "total_steps": 204665, "loss": 0.0429, "lr": 9.050926464268549e-07, "epoch": 2.8861798548848117, "percentage": 57.72, "elapsed_time": "2:32:50", "remaining_time": "1:51:56", "throughput": 8683.49, "total_tokens": 79635112} +{"current_steps": 118145, "total_steps": 204665, "loss": 0.0283, "lr": 9.050077541063243e-07, "epoch": 2.886302005716659, "percentage": 57.73, "elapsed_time": "2:32:51", "remaining_time": "1:51:56", "throughput": 8683.51, "total_tokens": 79638248} +{"current_steps": 118150, "total_steps": 204665, "loss": 0.0365, "lr": 9.049228624766029e-07, "epoch": 2.886424156548506, "percentage": 57.73, "elapsed_time": "2:32:51", "remaining_time": "1:51:55", "throughput": 8683.51, "total_tokens": 79641256} +{"current_steps": 118155, "total_steps": 204665, "loss": 0.0593, "lr": 9.04837971538308e-07, "epoch": 2.8865463073803532, "percentage": 57.73, "elapsed_time": "2:32:51", "remaining_time": "1:51:55", "throughput": 8683.57, "total_tokens": 79644776} +{"current_steps": 118160, "total_steps": 204665, "loss": 0.0001, "lr": 9.047530812920572e-07, "epoch": 2.8866684582122004, "percentage": 57.73, "elapsed_time": "2:32:52", "remaining_time": "1:51:55", "throughput": 8683.65, "total_tokens": 79648680} +{"current_steps": 118165, "total_steps": 204665, "loss": 0.0596, "lr": 9.046681917384672e-07, "epoch": 2.8867906090440476, "percentage": 57.74, "elapsed_time": "2:32:52", "remaining_time": "1:51:54", "throughput": 8683.69, "total_tokens": 79652008} +{"current_steps": 118170, "total_steps": 204665, "loss": 0.0422, "lr": 9.045833028781562e-07, "epoch": 2.886912759875895, "percentage": 57.74, "elapsed_time": "2:32:52", "remaining_time": "1:51:54", "throughput": 8683.72, "total_tokens": 79655336} +{"current_steps": 118175, "total_steps": 204665, "loss": 0.0441, "lr": 9.044984147117406e-07, "epoch": 2.887034910707742, "percentage": 57.74, "elapsed_time": "2:32:53", "remaining_time": "1:51:53", "throughput": 8683.74, "total_tokens": 79658408} +{"current_steps": 118180, "total_steps": 204665, "loss": 0.0003, "lr": 9.044135272398382e-07, "epoch": 2.887157061539589, "percentage": 57.74, "elapsed_time": "2:32:53", "remaining_time": "1:51:53", "throughput": 8683.75, "total_tokens": 79661480} +{"current_steps": 118185, "total_steps": 204665, "loss": 0.0311, "lr": 9.043286404630668e-07, "epoch": 2.8872792123714364, "percentage": 57.75, "elapsed_time": "2:32:53", "remaining_time": "1:51:52", "throughput": 8683.78, "total_tokens": 79664808} +{"current_steps": 118190, "total_steps": 204665, "loss": 0.1177, "lr": 9.042437543820428e-07, "epoch": 2.8874013632032836, "percentage": 57.75, "elapsed_time": "2:32:54", "remaining_time": "1:51:52", "throughput": 8683.81, "total_tokens": 79668072} +{"current_steps": 118195, "total_steps": 204665, "loss": 0.0004, "lr": 9.041588689973845e-07, "epoch": 2.8875235140351307, "percentage": 57.75, "elapsed_time": "2:32:54", "remaining_time": "1:51:52", "throughput": 8683.84, "total_tokens": 79671336} +{"current_steps": 118200, "total_steps": 204665, "loss": 0.0513, "lr": 9.040739843097082e-07, "epoch": 2.8876456648669775, "percentage": 57.75, "elapsed_time": "2:32:55", "remaining_time": "1:51:51", "throughput": 8683.89, "total_tokens": 79674856} +{"current_steps": 118205, "total_steps": 204665, "loss": 0.0004, "lr": 9.03989100319632e-07, "epoch": 2.887767815698825, "percentage": 57.76, "elapsed_time": "2:32:55", "remaining_time": "1:51:51", "throughput": 8683.92, "total_tokens": 79678120} +{"current_steps": 118210, "total_steps": 204665, "loss": 0.0283, "lr": 9.039042170277728e-07, "epoch": 2.887889966530672, "percentage": 57.76, "elapsed_time": "2:32:55", "remaining_time": "1:51:50", "throughput": 8683.95, "total_tokens": 79681384} +{"current_steps": 118215, "total_steps": 204665, "loss": 0.002, "lr": 9.038193344347478e-07, "epoch": 2.8880121173625195, "percentage": 57.76, "elapsed_time": "2:32:56", "remaining_time": "1:51:50", "throughput": 8683.98, "total_tokens": 79684712} +{"current_steps": 118220, "total_steps": 204665, "loss": 0.0011, "lr": 9.037344525411747e-07, "epoch": 2.8881342681943662, "percentage": 57.76, "elapsed_time": "2:32:56", "remaining_time": "1:51:49", "throughput": 8684.03, "total_tokens": 79688168} +{"current_steps": 118225, "total_steps": 204665, "loss": 0.1608, "lr": 9.036495713476704e-07, "epoch": 2.8882564190262134, "percentage": 57.77, "elapsed_time": "2:32:56", "remaining_time": "1:51:49", "throughput": 8684.07, "total_tokens": 79691624} +{"current_steps": 118230, "total_steps": 204665, "loss": 0.0006, "lr": 9.035646908548527e-07, "epoch": 2.8883785698580606, "percentage": 57.77, "elapsed_time": "2:32:57", "remaining_time": "1:51:49", "throughput": 8684.09, "total_tokens": 79694696} +{"current_steps": 118235, "total_steps": 204665, "loss": 0.1012, "lr": 9.034798110633379e-07, "epoch": 2.888500720689908, "percentage": 57.77, "elapsed_time": "2:32:57", "remaining_time": "1:51:48", "throughput": 8684.13, "total_tokens": 79698088} +{"current_steps": 118240, "total_steps": 204665, "loss": 0.0404, "lr": 9.033949319737439e-07, "epoch": 2.888622871521755, "percentage": 57.77, "elapsed_time": "2:32:57", "remaining_time": "1:51:48", "throughput": 8684.19, "total_tokens": 79701736} +{"current_steps": 118245, "total_steps": 204665, "loss": 0.0011, "lr": 9.033100535866885e-07, "epoch": 2.888745022353602, "percentage": 57.77, "elapsed_time": "2:32:58", "remaining_time": "1:51:47", "throughput": 8684.2, "total_tokens": 79704808} +{"current_steps": 118250, "total_steps": 204665, "loss": 0.0005, "lr": 9.032251759027881e-07, "epoch": 2.8888671731854494, "percentage": 57.78, "elapsed_time": "2:32:58", "remaining_time": "1:51:47", "throughput": 8684.24, "total_tokens": 79708136} +{"current_steps": 118255, "total_steps": 204665, "loss": 0.0314, "lr": 9.031402989226603e-07, "epoch": 2.8889893240172966, "percentage": 57.78, "elapsed_time": "2:32:58", "remaining_time": "1:51:47", "throughput": 8684.27, "total_tokens": 79711464} +{"current_steps": 118260, "total_steps": 204665, "loss": 0.0678, "lr": 9.030554226469222e-07, "epoch": 2.8891114748491438, "percentage": 57.78, "elapsed_time": "2:32:59", "remaining_time": "1:51:46", "throughput": 8684.33, "total_tokens": 79714984} +{"current_steps": 118265, "total_steps": 204665, "loss": 0.0005, "lr": 9.029705470761913e-07, "epoch": 2.889233625680991, "percentage": 57.78, "elapsed_time": "2:32:59", "remaining_time": "1:51:46", "throughput": 8684.37, "total_tokens": 79718376} +{"current_steps": 118270, "total_steps": 204665, "loss": 0.0003, "lr": 9.028856722110846e-07, "epoch": 2.889355776512838, "percentage": 57.79, "elapsed_time": "2:32:59", "remaining_time": "1:51:45", "throughput": 8684.39, "total_tokens": 79721576} +{"current_steps": 118275, "total_steps": 204665, "loss": 0.0543, "lr": 9.028007980522192e-07, "epoch": 2.8894779273446853, "percentage": 57.79, "elapsed_time": "2:33:00", "remaining_time": "1:51:45", "throughput": 8684.41, "total_tokens": 79724712} +{"current_steps": 118280, "total_steps": 204665, "loss": 0.0002, "lr": 9.02715924600213e-07, "epoch": 2.8896000781765325, "percentage": 57.79, "elapsed_time": "2:33:00", "remaining_time": "1:51:44", "throughput": 8684.43, "total_tokens": 79727848} +{"current_steps": 118285, "total_steps": 204665, "loss": 0.0002, "lr": 9.026310518556822e-07, "epoch": 2.8897222290083793, "percentage": 57.79, "elapsed_time": "2:33:00", "remaining_time": "1:51:44", "throughput": 8684.44, "total_tokens": 79730920} +{"current_steps": 118290, "total_steps": 204665, "loss": 0.1201, "lr": 9.025461798192452e-07, "epoch": 2.889844379840227, "percentage": 57.8, "elapsed_time": "2:33:01", "remaining_time": "1:51:44", "throughput": 8684.49, "total_tokens": 79734376} +{"current_steps": 118295, "total_steps": 204665, "loss": 0.0417, "lr": 9.024613084915181e-07, "epoch": 2.8899665306720737, "percentage": 57.8, "elapsed_time": "2:33:01", "remaining_time": "1:51:43", "throughput": 8684.5, "total_tokens": 79737448} +{"current_steps": 118300, "total_steps": 204665, "loss": 0.0603, "lr": 9.023764378731189e-07, "epoch": 2.8900886815039213, "percentage": 57.8, "elapsed_time": "2:33:01", "remaining_time": "1:51:43", "throughput": 8684.5, "total_tokens": 79740392} +{"current_steps": 118305, "total_steps": 204665, "loss": 0.0005, "lr": 9.022915679646643e-07, "epoch": 2.890210832335768, "percentage": 57.8, "elapsed_time": "2:33:02", "remaining_time": "1:51:42", "throughput": 8684.54, "total_tokens": 79743848} +{"current_steps": 118310, "total_steps": 204665, "loss": 0.0001, "lr": 9.022066987667717e-07, "epoch": 2.890332983167615, "percentage": 57.81, "elapsed_time": "2:33:02", "remaining_time": "1:51:42", "throughput": 8684.56, "total_tokens": 79746920} +{"current_steps": 118315, "total_steps": 204665, "loss": 0.0396, "lr": 9.021218302800586e-07, "epoch": 2.8904551339994624, "percentage": 57.81, "elapsed_time": "2:33:02", "remaining_time": "1:51:42", "throughput": 8684.59, "total_tokens": 79750184} +{"current_steps": 118320, "total_steps": 204665, "loss": 0.0003, "lr": 9.020369625051414e-07, "epoch": 2.8905772848313096, "percentage": 57.81, "elapsed_time": "2:33:03", "remaining_time": "1:51:41", "throughput": 8684.63, "total_tokens": 79753640} +{"current_steps": 118325, "total_steps": 204665, "loss": 0.0379, "lr": 9.019520954426383e-07, "epoch": 2.890699435663157, "percentage": 57.81, "elapsed_time": "2:33:03", "remaining_time": "1:51:41", "throughput": 8684.66, "total_tokens": 79756904} +{"current_steps": 118330, "total_steps": 204665, "loss": 0.0003, "lr": 9.018672290931654e-07, "epoch": 2.890821586495004, "percentage": 57.82, "elapsed_time": "2:33:03", "remaining_time": "1:51:40", "throughput": 8684.65, "total_tokens": 79759720} +{"current_steps": 118335, "total_steps": 204665, "loss": 0.0473, "lr": 9.017823634573404e-07, "epoch": 2.890943737326851, "percentage": 57.82, "elapsed_time": "2:33:04", "remaining_time": "1:51:40", "throughput": 8684.67, "total_tokens": 79762856} +{"current_steps": 118340, "total_steps": 204665, "loss": 0.0804, "lr": 9.01697498535781e-07, "epoch": 2.8910658881586984, "percentage": 57.82, "elapsed_time": "2:33:04", "remaining_time": "1:51:39", "throughput": 8684.68, "total_tokens": 79765864} +{"current_steps": 118345, "total_steps": 204665, "loss": 0.0002, "lr": 9.016126343291033e-07, "epoch": 2.8911880389905456, "percentage": 57.82, "elapsed_time": "2:33:05", "remaining_time": "1:51:39", "throughput": 8684.73, "total_tokens": 79769384} +{"current_steps": 118350, "total_steps": 204665, "loss": 0.0477, "lr": 9.015277708379254e-07, "epoch": 2.8913101898223927, "percentage": 57.83, "elapsed_time": "2:33:05", "remaining_time": "1:51:39", "throughput": 8684.78, "total_tokens": 79772840} +{"current_steps": 118355, "total_steps": 204665, "loss": 0.0381, "lr": 9.014429080628636e-07, "epoch": 2.89143234065424, "percentage": 57.83, "elapsed_time": "2:33:05", "remaining_time": "1:51:38", "throughput": 8684.79, "total_tokens": 79775912} +{"current_steps": 118360, "total_steps": 204665, "loss": 0.0549, "lr": 9.01358046004536e-07, "epoch": 2.891554491486087, "percentage": 57.83, "elapsed_time": "2:33:06", "remaining_time": "1:51:38", "throughput": 8684.81, "total_tokens": 79779112} +{"current_steps": 118365, "total_steps": 204665, "loss": 0.0525, "lr": 9.012731846635589e-07, "epoch": 2.8916766423179343, "percentage": 57.83, "elapsed_time": "2:33:06", "remaining_time": "1:51:37", "throughput": 8684.83, "total_tokens": 79782248} +{"current_steps": 118370, "total_steps": 204665, "loss": 0.0164, "lr": 9.011883240405496e-07, "epoch": 2.8917987931497815, "percentage": 57.84, "elapsed_time": "2:33:06", "remaining_time": "1:51:37", "throughput": 8684.86, "total_tokens": 79785512} +{"current_steps": 118375, "total_steps": 204665, "loss": 0.0004, "lr": 9.011034641361259e-07, "epoch": 2.8919209439816287, "percentage": 57.84, "elapsed_time": "2:33:07", "remaining_time": "1:51:36", "throughput": 8684.89, "total_tokens": 79788712} +{"current_steps": 118380, "total_steps": 204665, "loss": 0.0002, "lr": 9.010186049509038e-07, "epoch": 2.8920430948134754, "percentage": 57.84, "elapsed_time": "2:33:07", "remaining_time": "1:51:36", "throughput": 8684.96, "total_tokens": 79792424} +{"current_steps": 118385, "total_steps": 204665, "loss": 0.0003, "lr": 9.009337464855016e-07, "epoch": 2.892165245645323, "percentage": 57.84, "elapsed_time": "2:33:07", "remaining_time": "1:51:36", "throughput": 8684.98, "total_tokens": 79795560} +{"current_steps": 118390, "total_steps": 204665, "loss": 0.0077, "lr": 9.008488887405354e-07, "epoch": 2.89228739647717, "percentage": 57.85, "elapsed_time": "2:33:08", "remaining_time": "1:51:35", "throughput": 8685.0, "total_tokens": 79798824} +{"current_steps": 118395, "total_steps": 204665, "loss": 0.0389, "lr": 9.007640317166228e-07, "epoch": 2.8924095473090174, "percentage": 57.85, "elapsed_time": "2:33:08", "remaining_time": "1:51:35", "throughput": 8685.08, "total_tokens": 79802664} +{"current_steps": 118400, "total_steps": 204665, "loss": 0.0001, "lr": 9.006791754143812e-07, "epoch": 2.892531698140864, "percentage": 57.85, "elapsed_time": "2:33:08", "remaining_time": "1:51:34", "throughput": 8685.11, "total_tokens": 79805928} +{"current_steps": 118405, "total_steps": 204665, "loss": 0.0004, "lr": 9.005943198344271e-07, "epoch": 2.8926538489727114, "percentage": 57.85, "elapsed_time": "2:33:09", "remaining_time": "1:51:34", "throughput": 8685.12, "total_tokens": 79808936} +{"current_steps": 118410, "total_steps": 204665, "loss": 0.0001, "lr": 9.005094649773779e-07, "epoch": 2.8927759998045586, "percentage": 57.86, "elapsed_time": "2:33:09", "remaining_time": "1:51:34", "throughput": 8685.18, "total_tokens": 79812520} +{"current_steps": 118415, "total_steps": 204665, "loss": 0.066, "lr": 9.004246108438505e-07, "epoch": 2.8928981506364058, "percentage": 57.86, "elapsed_time": "2:33:09", "remaining_time": "1:51:33", "throughput": 8685.2, "total_tokens": 79815720} +{"current_steps": 118420, "total_steps": 204665, "loss": 0.1074, "lr": 9.003397574344624e-07, "epoch": 2.893020301468253, "percentage": 57.86, "elapsed_time": "2:33:10", "remaining_time": "1:51:33", "throughput": 8685.26, "total_tokens": 79819368} +{"current_steps": 118425, "total_steps": 204665, "loss": 0.0001, "lr": 9.002549047498301e-07, "epoch": 2.8931424523001, "percentage": 57.86, "elapsed_time": "2:33:10", "remaining_time": "1:51:32", "throughput": 8685.29, "total_tokens": 79822632} +{"current_steps": 118430, "total_steps": 204665, "loss": 0.0616, "lr": 9.001700527905709e-07, "epoch": 2.8932646031319473, "percentage": 57.87, "elapsed_time": "2:33:10", "remaining_time": "1:51:32", "throughput": 8685.34, "total_tokens": 79826152} +{"current_steps": 118435, "total_steps": 204665, "loss": 0.0312, "lr": 9.000852015573024e-07, "epoch": 2.8933867539637945, "percentage": 57.87, "elapsed_time": "2:33:11", "remaining_time": "1:51:31", "throughput": 8685.36, "total_tokens": 79829288} +{"current_steps": 118440, "total_steps": 204665, "loss": 0.0863, "lr": 9.000003510506407e-07, "epoch": 2.8935089047956417, "percentage": 57.87, "elapsed_time": "2:33:11", "remaining_time": "1:51:31", "throughput": 8685.4, "total_tokens": 79832680} +{"current_steps": 118445, "total_steps": 204665, "loss": 0.0971, "lr": 8.999155012712036e-07, "epoch": 2.893631055627489, "percentage": 57.87, "elapsed_time": "2:33:11", "remaining_time": "1:51:31", "throughput": 8685.45, "total_tokens": 79836200} +{"current_steps": 118450, "total_steps": 204665, "loss": 0.0002, "lr": 8.998306522196077e-07, "epoch": 2.893753206459336, "percentage": 57.88, "elapsed_time": "2:33:12", "remaining_time": "1:51:30", "throughput": 8685.48, "total_tokens": 79839592} +{"current_steps": 118455, "total_steps": 204665, "loss": 0.0369, "lr": 8.997458038964706e-07, "epoch": 2.8938753572911833, "percentage": 57.88, "elapsed_time": "2:33:12", "remaining_time": "1:51:30", "throughput": 8685.52, "total_tokens": 79842984} +{"current_steps": 118460, "total_steps": 204665, "loss": 0.0433, "lr": 8.996609563024084e-07, "epoch": 2.8939975081230305, "percentage": 57.88, "elapsed_time": "2:33:13", "remaining_time": "1:51:29", "throughput": 8685.54, "total_tokens": 79846184} +{"current_steps": 118465, "total_steps": 204665, "loss": 0.0678, "lr": 8.995761094380392e-07, "epoch": 2.894119658954877, "percentage": 57.88, "elapsed_time": "2:33:13", "remaining_time": "1:51:29", "throughput": 8685.62, "total_tokens": 79850024} +{"current_steps": 118470, "total_steps": 204665, "loss": 0.0003, "lr": 8.994912633039796e-07, "epoch": 2.894241809786725, "percentage": 57.88, "elapsed_time": "2:33:13", "remaining_time": "1:51:29", "throughput": 8685.66, "total_tokens": 79853416} +{"current_steps": 118475, "total_steps": 204665, "loss": 0.1456, "lr": 8.994064179008461e-07, "epoch": 2.8943639606185716, "percentage": 57.89, "elapsed_time": "2:33:14", "remaining_time": "1:51:28", "throughput": 8685.69, "total_tokens": 79856680} +{"current_steps": 118480, "total_steps": 204665, "loss": 0.0005, "lr": 8.993215732292567e-07, "epoch": 2.8944861114504192, "percentage": 57.89, "elapsed_time": "2:33:14", "remaining_time": "1:51:28", "throughput": 8685.72, "total_tokens": 79860072} +{"current_steps": 118485, "total_steps": 204665, "loss": 0.069, "lr": 8.992367292898274e-07, "epoch": 2.894608262282266, "percentage": 57.89, "elapsed_time": "2:33:14", "remaining_time": "1:51:27", "throughput": 8685.77, "total_tokens": 79863528} +{"current_steps": 118490, "total_steps": 204665, "loss": 0.0002, "lr": 8.991518860831758e-07, "epoch": 2.894730413114113, "percentage": 57.89, "elapsed_time": "2:33:15", "remaining_time": "1:51:27", "throughput": 8685.78, "total_tokens": 79866600} +{"current_steps": 118495, "total_steps": 204665, "loss": 0.002, "lr": 8.990670436099192e-07, "epoch": 2.8948525639459604, "percentage": 57.9, "elapsed_time": "2:33:15", "remaining_time": "1:51:26", "throughput": 8685.8, "total_tokens": 79869800} +{"current_steps": 118500, "total_steps": 204665, "loss": 0.0002, "lr": 8.989822018706738e-07, "epoch": 2.8949747147778075, "percentage": 57.9, "elapsed_time": "2:33:15", "remaining_time": "1:51:26", "throughput": 8685.83, "total_tokens": 79873064} +{"current_steps": 118505, "total_steps": 204665, "loss": 0.0997, "lr": 8.988973608660572e-07, "epoch": 2.8950968656096547, "percentage": 57.9, "elapsed_time": "2:33:16", "remaining_time": "1:51:26", "throughput": 8685.89, "total_tokens": 79876776} +{"current_steps": 118510, "total_steps": 204665, "loss": 0.102, "lr": 8.988125205966861e-07, "epoch": 2.895219016441502, "percentage": 57.9, "elapsed_time": "2:33:16", "remaining_time": "1:51:25", "throughput": 8685.93, "total_tokens": 79880232} +{"current_steps": 118515, "total_steps": 204665, "loss": 0.0003, "lr": 8.987276810631779e-07, "epoch": 2.895341167273349, "percentage": 57.91, "elapsed_time": "2:33:16", "remaining_time": "1:51:25", "throughput": 8685.97, "total_tokens": 79883624} +{"current_steps": 118520, "total_steps": 204665, "loss": 0.113, "lr": 8.986428422661489e-07, "epoch": 2.8954633181051963, "percentage": 57.91, "elapsed_time": "2:33:17", "remaining_time": "1:51:24", "throughput": 8685.99, "total_tokens": 79886760} +{"current_steps": 118525, "total_steps": 204665, "loss": 0.0002, "lr": 8.985580042062163e-07, "epoch": 2.8955854689370435, "percentage": 57.91, "elapsed_time": "2:33:17", "remaining_time": "1:51:24", "throughput": 8686.06, "total_tokens": 79890408} +{"current_steps": 118530, "total_steps": 204665, "loss": 0.117, "lr": 8.984731668839976e-07, "epoch": 2.8957076197688907, "percentage": 57.91, "elapsed_time": "2:33:17", "remaining_time": "1:51:24", "throughput": 8686.09, "total_tokens": 79893800} +{"current_steps": 118535, "total_steps": 204665, "loss": 0.0008, "lr": 8.983883303001088e-07, "epoch": 2.895829770600738, "percentage": 57.92, "elapsed_time": "2:33:18", "remaining_time": "1:51:23", "throughput": 8686.11, "total_tokens": 79897000} +{"current_steps": 118540, "total_steps": 204665, "loss": 0.0004, "lr": 8.98303494455168e-07, "epoch": 2.895951921432585, "percentage": 57.92, "elapsed_time": "2:33:18", "remaining_time": "1:51:23", "throughput": 8686.15, "total_tokens": 79900328} +{"current_steps": 118545, "total_steps": 204665, "loss": 0.0257, "lr": 8.982186593497909e-07, "epoch": 2.8960740722644323, "percentage": 57.92, "elapsed_time": "2:33:18", "remaining_time": "1:51:22", "throughput": 8686.15, "total_tokens": 79903272} +{"current_steps": 118550, "total_steps": 204665, "loss": 0.0007, "lr": 8.981338249845952e-07, "epoch": 2.8961962230962794, "percentage": 57.92, "elapsed_time": "2:33:19", "remaining_time": "1:51:22", "throughput": 8686.24, "total_tokens": 79907240} +{"current_steps": 118555, "total_steps": 204665, "loss": 0.046, "lr": 8.980489913601982e-07, "epoch": 2.8963183739281266, "percentage": 57.93, "elapsed_time": "2:33:19", "remaining_time": "1:51:21", "throughput": 8686.24, "total_tokens": 79910248} +{"current_steps": 118560, "total_steps": 204665, "loss": 0.0285, "lr": 8.979641584772161e-07, "epoch": 2.8964405247599734, "percentage": 57.93, "elapsed_time": "2:33:19", "remaining_time": "1:51:21", "throughput": 8686.3, "total_tokens": 79913832} +{"current_steps": 118565, "total_steps": 204665, "loss": 0.03, "lr": 8.97879326336266e-07, "epoch": 2.896562675591821, "percentage": 57.93, "elapsed_time": "2:33:20", "remaining_time": "1:51:21", "throughput": 8686.37, "total_tokens": 79917608} +{"current_steps": 118570, "total_steps": 204665, "loss": 0.038, "lr": 8.977944949379652e-07, "epoch": 2.8966848264236678, "percentage": 57.93, "elapsed_time": "2:33:20", "remaining_time": "1:51:20", "throughput": 8686.43, "total_tokens": 79921192} +{"current_steps": 118575, "total_steps": 204665, "loss": 0.0002, "lr": 8.977096642829301e-07, "epoch": 2.896806977255515, "percentage": 57.94, "elapsed_time": "2:33:21", "remaining_time": "1:51:20", "throughput": 8686.44, "total_tokens": 79924264} +{"current_steps": 118580, "total_steps": 204665, "loss": 0.101, "lr": 8.976248343717778e-07, "epoch": 2.896929128087362, "percentage": 57.94, "elapsed_time": "2:33:21", "remaining_time": "1:51:19", "throughput": 8686.48, "total_tokens": 79927656} +{"current_steps": 118585, "total_steps": 204665, "loss": 0.0175, "lr": 8.97540005205125e-07, "epoch": 2.8970512789192093, "percentage": 57.94, "elapsed_time": "2:33:21", "remaining_time": "1:51:19", "throughput": 8686.52, "total_tokens": 79931112} +{"current_steps": 118590, "total_steps": 204665, "loss": 0.052, "lr": 8.974551767835893e-07, "epoch": 2.8971734297510565, "percentage": 57.94, "elapsed_time": "2:33:22", "remaining_time": "1:51:19", "throughput": 8686.53, "total_tokens": 79934184} +{"current_steps": 118595, "total_steps": 204665, "loss": 0.076, "lr": 8.973703491077867e-07, "epoch": 2.8972955805829037, "percentage": 57.95, "elapsed_time": "2:33:22", "remaining_time": "1:51:18", "throughput": 8686.62, "total_tokens": 79938152} +{"current_steps": 118600, "total_steps": 204665, "loss": 0.001, "lr": 8.972855221783351e-07, "epoch": 2.897417731414751, "percentage": 57.95, "elapsed_time": "2:33:22", "remaining_time": "1:51:18", "throughput": 8686.7, "total_tokens": 79941992} +{"current_steps": 118605, "total_steps": 204665, "loss": 0.108, "lr": 8.972006959958502e-07, "epoch": 2.897539882246598, "percentage": 57.95, "elapsed_time": "2:33:23", "remaining_time": "1:51:17", "throughput": 8686.75, "total_tokens": 79945512} +{"current_steps": 118610, "total_steps": 204665, "loss": 0.1229, "lr": 8.9711587056095e-07, "epoch": 2.8976620330784453, "percentage": 57.95, "elapsed_time": "2:33:23", "remaining_time": "1:51:17", "throughput": 8686.76, "total_tokens": 79948520} +{"current_steps": 118615, "total_steps": 204665, "loss": 0.0696, "lr": 8.970310458742505e-07, "epoch": 2.8977841839102925, "percentage": 57.96, "elapsed_time": "2:33:23", "remaining_time": "1:51:16", "throughput": 8686.8, "total_tokens": 79951976} +{"current_steps": 118620, "total_steps": 204665, "loss": 0.0124, "lr": 8.969462219363691e-07, "epoch": 2.8979063347421397, "percentage": 57.96, "elapsed_time": "2:33:24", "remaining_time": "1:51:16", "throughput": 8686.83, "total_tokens": 79955304} +{"current_steps": 118625, "total_steps": 204665, "loss": 0.0007, "lr": 8.968613987479227e-07, "epoch": 2.898028485573987, "percentage": 57.96, "elapsed_time": "2:33:24", "remaining_time": "1:51:16", "throughput": 8686.86, "total_tokens": 79958632} +{"current_steps": 118630, "total_steps": 204665, "loss": 0.1746, "lr": 8.967765763095274e-07, "epoch": 2.898150636405834, "percentage": 57.96, "elapsed_time": "2:33:24", "remaining_time": "1:51:15", "throughput": 8686.9, "total_tokens": 79961960} +{"current_steps": 118635, "total_steps": 204665, "loss": 0.0957, "lr": 8.966917546218012e-07, "epoch": 2.8982727872376812, "percentage": 57.97, "elapsed_time": "2:33:25", "remaining_time": "1:51:15", "throughput": 8686.97, "total_tokens": 79965672} +{"current_steps": 118640, "total_steps": 204665, "loss": 0.0004, "lr": 8.966069336853598e-07, "epoch": 2.8983949380695284, "percentage": 57.97, "elapsed_time": "2:33:25", "remaining_time": "1:51:14", "throughput": 8687.01, "total_tokens": 79969128} +{"current_steps": 118645, "total_steps": 204665, "loss": 0.0006, "lr": 8.965221135008207e-07, "epoch": 2.898517088901375, "percentage": 57.97, "elapsed_time": "2:33:25", "remaining_time": "1:51:14", "throughput": 8687.1, "total_tokens": 79973096} +{"current_steps": 118650, "total_steps": 204665, "loss": 0.0502, "lr": 8.96437294068801e-07, "epoch": 2.898639239733223, "percentage": 57.97, "elapsed_time": "2:33:26", "remaining_time": "1:51:14", "throughput": 8687.17, "total_tokens": 79976936} +{"current_steps": 118655, "total_steps": 204665, "loss": 0.0165, "lr": 8.963524753899167e-07, "epoch": 2.8987613905650695, "percentage": 57.98, "elapsed_time": "2:33:26", "remaining_time": "1:51:13", "throughput": 8687.21, "total_tokens": 79980264} +{"current_steps": 118660, "total_steps": 204665, "loss": 0.0442, "lr": 8.962676574647855e-07, "epoch": 2.898883541396917, "percentage": 57.98, "elapsed_time": "2:33:27", "remaining_time": "1:51:13", "throughput": 8687.28, "total_tokens": 79984040} +{"current_steps": 118665, "total_steps": 204665, "loss": 0.0008, "lr": 8.961828402940233e-07, "epoch": 2.899005692228764, "percentage": 57.98, "elapsed_time": "2:33:27", "remaining_time": "1:51:12", "throughput": 8687.3, "total_tokens": 79987240} +{"current_steps": 118670, "total_steps": 204665, "loss": 0.0009, "lr": 8.96098023878248e-07, "epoch": 2.899127843060611, "percentage": 57.98, "elapsed_time": "2:33:27", "remaining_time": "1:51:12", "throughput": 8687.33, "total_tokens": 79990568} +{"current_steps": 118675, "total_steps": 204665, "loss": 0.0004, "lr": 8.960132082180755e-07, "epoch": 2.8992499938924583, "percentage": 57.98, "elapsed_time": "2:33:28", "remaining_time": "1:51:12", "throughput": 8687.44, "total_tokens": 79994664} +{"current_steps": 118680, "total_steps": 204665, "loss": 0.0007, "lr": 8.959283933141227e-07, "epoch": 2.8993721447243055, "percentage": 57.99, "elapsed_time": "2:33:28", "remaining_time": "1:51:11", "throughput": 8687.49, "total_tokens": 79998312} +{"current_steps": 118685, "total_steps": 204665, "loss": 0.0703, "lr": 8.958435791670071e-07, "epoch": 2.8994942955561527, "percentage": 57.99, "elapsed_time": "2:33:28", "remaining_time": "1:51:11", "throughput": 8687.55, "total_tokens": 80001960} +{"current_steps": 118690, "total_steps": 204665, "loss": 0.0645, "lr": 8.957587657773447e-07, "epoch": 2.899616446388, "percentage": 57.99, "elapsed_time": "2:33:29", "remaining_time": "1:51:10", "throughput": 8687.61, "total_tokens": 80005544} +{"current_steps": 118695, "total_steps": 204665, "loss": 0.0504, "lr": 8.956739531457528e-07, "epoch": 2.899738597219847, "percentage": 57.99, "elapsed_time": "2:33:29", "remaining_time": "1:51:10", "throughput": 8687.66, "total_tokens": 80009128} +{"current_steps": 118700, "total_steps": 204665, "loss": 0.0348, "lr": 8.955891412728476e-07, "epoch": 2.8998607480516942, "percentage": 58.0, "elapsed_time": "2:33:29", "remaining_time": "1:51:09", "throughput": 8687.67, "total_tokens": 80012136} +{"current_steps": 118705, "total_steps": 204665, "loss": 0.0459, "lr": 8.955043301592463e-07, "epoch": 2.8999828988835414, "percentage": 58.0, "elapsed_time": "2:33:30", "remaining_time": "1:51:09", "throughput": 8687.77, "total_tokens": 80016296} +{"current_steps": 118710, "total_steps": 204665, "loss": 0.1135, "lr": 8.954195198055659e-07, "epoch": 2.9001050497153886, "percentage": 58.0, "elapsed_time": "2:33:30", "remaining_time": "1:51:09", "throughput": 8687.78, "total_tokens": 80019368} +{"current_steps": 118715, "total_steps": 204665, "loss": 0.0673, "lr": 8.953347102124229e-07, "epoch": 2.900227200547236, "percentage": 58.0, "elapsed_time": "2:33:30", "remaining_time": "1:51:08", "throughput": 8687.8, "total_tokens": 80022440} +{"current_steps": 118720, "total_steps": 204665, "loss": 0.0003, "lr": 8.952499013804339e-07, "epoch": 2.900349351379083, "percentage": 58.01, "elapsed_time": "2:33:31", "remaining_time": "1:51:08", "throughput": 8687.85, "total_tokens": 80025960} +{"current_steps": 118725, "total_steps": 204665, "loss": 0.0003, "lr": 8.951650933102158e-07, "epoch": 2.90047150221093, "percentage": 58.01, "elapsed_time": "2:33:31", "remaining_time": "1:51:07", "throughput": 8687.87, "total_tokens": 80029224} +{"current_steps": 118730, "total_steps": 204665, "loss": 0.0007, "lr": 8.950802860023854e-07, "epoch": 2.9005936530427774, "percentage": 58.01, "elapsed_time": "2:33:31", "remaining_time": "1:51:07", "throughput": 8687.88, "total_tokens": 80032296} +{"current_steps": 118735, "total_steps": 204665, "loss": 0.0003, "lr": 8.949954794575593e-07, "epoch": 2.9007158038746246, "percentage": 58.01, "elapsed_time": "2:33:32", "remaining_time": "1:51:07", "throughput": 8687.91, "total_tokens": 80035560} +{"current_steps": 118740, "total_steps": 204665, "loss": 0.033, "lr": 8.949106736763541e-07, "epoch": 2.9008379547064713, "percentage": 58.02, "elapsed_time": "2:33:32", "remaining_time": "1:51:06", "throughput": 8687.93, "total_tokens": 80038760} +{"current_steps": 118745, "total_steps": 204665, "loss": 0.0003, "lr": 8.948258686593872e-07, "epoch": 2.900960105538319, "percentage": 58.02, "elapsed_time": "2:33:32", "remaining_time": "1:51:06", "throughput": 8687.95, "total_tokens": 80042024} +{"current_steps": 118750, "total_steps": 204665, "loss": 0.0002, "lr": 8.947410644072745e-07, "epoch": 2.9010822563701657, "percentage": 58.02, "elapsed_time": "2:33:33", "remaining_time": "1:51:05", "throughput": 8687.98, "total_tokens": 80045288} +{"current_steps": 118755, "total_steps": 204665, "loss": 0.0002, "lr": 8.946562609206334e-07, "epoch": 2.901204407202013, "percentage": 58.02, "elapsed_time": "2:33:33", "remaining_time": "1:51:05", "throughput": 8688.0, "total_tokens": 80048488} +{"current_steps": 118760, "total_steps": 204665, "loss": 0.0005, "lr": 8.9457145820008e-07, "epoch": 2.90132655803386, "percentage": 58.03, "elapsed_time": "2:33:34", "remaining_time": "1:51:04", "throughput": 8688.03, "total_tokens": 80051752} +{"current_steps": 118765, "total_steps": 204665, "loss": 0.0, "lr": 8.944866562462317e-07, "epoch": 2.9014487088657073, "percentage": 58.03, "elapsed_time": "2:33:34", "remaining_time": "1:51:04", "throughput": 8688.07, "total_tokens": 80055144} +{"current_steps": 118770, "total_steps": 204665, "loss": 0.0548, "lr": 8.944018550597043e-07, "epoch": 2.9015708596975545, "percentage": 58.03, "elapsed_time": "2:33:34", "remaining_time": "1:51:04", "throughput": 8688.12, "total_tokens": 80058664} +{"current_steps": 118775, "total_steps": 204665, "loss": 0.0006, "lr": 8.943170546411153e-07, "epoch": 2.9016930105294017, "percentage": 58.03, "elapsed_time": "2:33:35", "remaining_time": "1:51:03", "throughput": 8688.13, "total_tokens": 80061672} +{"current_steps": 118780, "total_steps": 204665, "loss": 0.0002, "lr": 8.942322549910813e-07, "epoch": 2.901815161361249, "percentage": 58.04, "elapsed_time": "2:33:35", "remaining_time": "1:51:03", "throughput": 8688.14, "total_tokens": 80064744} +{"current_steps": 118785, "total_steps": 204665, "loss": 0.0701, "lr": 8.941474561102185e-07, "epoch": 2.901937312193096, "percentage": 58.04, "elapsed_time": "2:33:35", "remaining_time": "1:51:02", "throughput": 8688.18, "total_tokens": 80068200} +{"current_steps": 118790, "total_steps": 204665, "loss": 0.0, "lr": 8.940626579991442e-07, "epoch": 2.902059463024943, "percentage": 58.04, "elapsed_time": "2:33:36", "remaining_time": "1:51:02", "throughput": 8688.21, "total_tokens": 80071464} +{"current_steps": 118795, "total_steps": 204665, "loss": 0.0936, "lr": 8.939778606584743e-07, "epoch": 2.9021816138567904, "percentage": 58.04, "elapsed_time": "2:33:36", "remaining_time": "1:51:02", "throughput": 8688.24, "total_tokens": 80074728} +{"current_steps": 118800, "total_steps": 204665, "loss": 0.035, "lr": 8.938930640888258e-07, "epoch": 2.9023037646886376, "percentage": 58.05, "elapsed_time": "2:33:36", "remaining_time": "1:51:01", "throughput": 8688.26, "total_tokens": 80077928} +{"current_steps": 118805, "total_steps": 204665, "loss": 0.1283, "lr": 8.93808268290816e-07, "epoch": 2.902425915520485, "percentage": 58.05, "elapsed_time": "2:33:37", "remaining_time": "1:51:01", "throughput": 8688.27, "total_tokens": 80080936} +{"current_steps": 118810, "total_steps": 204665, "loss": 0.0009, "lr": 8.937234732650606e-07, "epoch": 2.902548066352332, "percentage": 58.05, "elapsed_time": "2:33:37", "remaining_time": "1:51:00", "throughput": 8688.33, "total_tokens": 80084648} +{"current_steps": 118815, "total_steps": 204665, "loss": 0.0339, "lr": 8.936386790121772e-07, "epoch": 2.902670217184179, "percentage": 58.05, "elapsed_time": "2:33:37", "remaining_time": "1:51:00", "throughput": 8688.38, "total_tokens": 80088168} +{"current_steps": 118820, "total_steps": 204665, "loss": 0.1282, "lr": 8.935538855327814e-07, "epoch": 2.9027923680160264, "percentage": 58.06, "elapsed_time": "2:33:38", "remaining_time": "1:50:59", "throughput": 8688.41, "total_tokens": 80091432} +{"current_steps": 118825, "total_steps": 204665, "loss": 0.0003, "lr": 8.934690928274908e-07, "epoch": 2.902914518847873, "percentage": 58.06, "elapsed_time": "2:33:38", "remaining_time": "1:50:59", "throughput": 8688.45, "total_tokens": 80094824} +{"current_steps": 118830, "total_steps": 204665, "loss": 0.0645, "lr": 8.933843008969215e-07, "epoch": 2.9030366696797207, "percentage": 58.06, "elapsed_time": "2:33:38", "remaining_time": "1:50:59", "throughput": 8688.51, "total_tokens": 80098344} +{"current_steps": 118835, "total_steps": 204665, "loss": 0.0006, "lr": 8.9329950974169e-07, "epoch": 2.9031588205115675, "percentage": 58.06, "elapsed_time": "2:33:39", "remaining_time": "1:50:58", "throughput": 8688.6, "total_tokens": 80102312} +{"current_steps": 118840, "total_steps": 204665, "loss": 0.0003, "lr": 8.932147193624135e-07, "epoch": 2.903280971343415, "percentage": 58.07, "elapsed_time": "2:33:39", "remaining_time": "1:50:58", "throughput": 8688.63, "total_tokens": 80105640} +{"current_steps": 118845, "total_steps": 204665, "loss": 0.0005, "lr": 8.931299297597079e-07, "epoch": 2.903403122175262, "percentage": 58.07, "elapsed_time": "2:33:39", "remaining_time": "1:50:57", "throughput": 8688.66, "total_tokens": 80108968} +{"current_steps": 118850, "total_steps": 204665, "loss": 0.0633, "lr": 8.930451409341908e-07, "epoch": 2.903525273007109, "percentage": 58.07, "elapsed_time": "2:33:40", "remaining_time": "1:50:57", "throughput": 8688.69, "total_tokens": 80112232} +{"current_steps": 118855, "total_steps": 204665, "loss": 0.0004, "lr": 8.929603528864775e-07, "epoch": 2.9036474238389562, "percentage": 58.07, "elapsed_time": "2:33:40", "remaining_time": "1:50:57", "throughput": 8688.7, "total_tokens": 80115176} +{"current_steps": 118860, "total_steps": 204665, "loss": 0.0004, "lr": 8.928755656171853e-07, "epoch": 2.9037695746708034, "percentage": 58.08, "elapsed_time": "2:33:40", "remaining_time": "1:50:56", "throughput": 8688.74, "total_tokens": 80118632} +{"current_steps": 118865, "total_steps": 204665, "loss": 0.0001, "lr": 8.927907791269314e-07, "epoch": 2.9038917255026506, "percentage": 58.08, "elapsed_time": "2:33:41", "remaining_time": "1:50:56", "throughput": 8688.79, "total_tokens": 80122152} +{"current_steps": 118870, "total_steps": 204665, "loss": 0.0005, "lr": 8.927059934163316e-07, "epoch": 2.904013876334498, "percentage": 58.08, "elapsed_time": "2:33:41", "remaining_time": "1:50:55", "throughput": 8688.84, "total_tokens": 80125672} +{"current_steps": 118875, "total_steps": 204665, "loss": 0.0004, "lr": 8.926212084860025e-07, "epoch": 2.904136027166345, "percentage": 58.08, "elapsed_time": "2:33:42", "remaining_time": "1:50:55", "throughput": 8688.88, "total_tokens": 80129064} +{"current_steps": 118880, "total_steps": 204665, "loss": 0.0452, "lr": 8.925364243365609e-07, "epoch": 2.904258177998192, "percentage": 58.09, "elapsed_time": "2:33:42", "remaining_time": "1:50:54", "throughput": 8688.93, "total_tokens": 80132520} +{"current_steps": 118885, "total_steps": 204665, "loss": 0.0003, "lr": 8.924516409686235e-07, "epoch": 2.9043803288300394, "percentage": 58.09, "elapsed_time": "2:33:42", "remaining_time": "1:50:54", "throughput": 8688.98, "total_tokens": 80136104} +{"current_steps": 118890, "total_steps": 204665, "loss": 0.0, "lr": 8.923668583828066e-07, "epoch": 2.9045024796618866, "percentage": 58.09, "elapsed_time": "2:33:43", "remaining_time": "1:50:54", "throughput": 8689.01, "total_tokens": 80139304} +{"current_steps": 118895, "total_steps": 204665, "loss": 0.0005, "lr": 8.922820765797265e-07, "epoch": 2.9046246304937338, "percentage": 58.09, "elapsed_time": "2:33:43", "remaining_time": "1:50:53", "throughput": 8689.05, "total_tokens": 80142760} +{"current_steps": 118900, "total_steps": 204665, "loss": 0.0583, "lr": 8.921972955600006e-07, "epoch": 2.904746781325581, "percentage": 58.09, "elapsed_time": "2:33:43", "remaining_time": "1:50:53", "throughput": 8689.13, "total_tokens": 80146664} +{"current_steps": 118905, "total_steps": 204665, "loss": 0.0017, "lr": 8.921125153242447e-07, "epoch": 2.904868932157428, "percentage": 58.1, "elapsed_time": "2:33:44", "remaining_time": "1:50:52", "throughput": 8689.18, "total_tokens": 80150184} +{"current_steps": 118910, "total_steps": 204665, "loss": 0.0004, "lr": 8.920277358730759e-07, "epoch": 2.904991082989275, "percentage": 58.1, "elapsed_time": "2:33:44", "remaining_time": "1:50:52", "throughput": 8689.21, "total_tokens": 80153448} +{"current_steps": 118915, "total_steps": 204665, "loss": 0.0466, "lr": 8.9194295720711e-07, "epoch": 2.9051132338211225, "percentage": 58.1, "elapsed_time": "2:33:44", "remaining_time": "1:50:52", "throughput": 8689.26, "total_tokens": 80156904} +{"current_steps": 118920, "total_steps": 204665, "loss": 0.0635, "lr": 8.918581793269645e-07, "epoch": 2.9052353846529693, "percentage": 58.1, "elapsed_time": "2:33:45", "remaining_time": "1:50:51", "throughput": 8689.26, "total_tokens": 80159848} +{"current_steps": 118925, "total_steps": 204665, "loss": 0.0002, "lr": 8.917734022332549e-07, "epoch": 2.905357535484817, "percentage": 58.11, "elapsed_time": "2:33:45", "remaining_time": "1:50:51", "throughput": 8689.31, "total_tokens": 80163304} +{"current_steps": 118930, "total_steps": 204665, "loss": 0.0615, "lr": 8.916886259265985e-07, "epoch": 2.9054796863166636, "percentage": 58.11, "elapsed_time": "2:33:45", "remaining_time": "1:50:50", "throughput": 8689.34, "total_tokens": 80166632} +{"current_steps": 118935, "total_steps": 204665, "loss": 0.0403, "lr": 8.916038504076117e-07, "epoch": 2.905601837148511, "percentage": 58.11, "elapsed_time": "2:33:46", "remaining_time": "1:50:50", "throughput": 8689.36, "total_tokens": 80169832} +{"current_steps": 118940, "total_steps": 204665, "loss": 0.0435, "lr": 8.915190756769104e-07, "epoch": 2.905723987980358, "percentage": 58.11, "elapsed_time": "2:33:46", "remaining_time": "1:50:49", "throughput": 8689.4, "total_tokens": 80173224} +{"current_steps": 118945, "total_steps": 204665, "loss": 0.0, "lr": 8.91434301735112e-07, "epoch": 2.905846138812205, "percentage": 58.12, "elapsed_time": "2:33:46", "remaining_time": "1:50:49", "throughput": 8689.45, "total_tokens": 80176680} +{"current_steps": 118950, "total_steps": 204665, "loss": 0.0005, "lr": 8.913495285828323e-07, "epoch": 2.9059682896440524, "percentage": 58.12, "elapsed_time": "2:33:47", "remaining_time": "1:50:49", "throughput": 8689.46, "total_tokens": 80179688} +{"current_steps": 118955, "total_steps": 204665, "loss": 0.0277, "lr": 8.912647562206879e-07, "epoch": 2.9060904404758996, "percentage": 58.12, "elapsed_time": "2:33:47", "remaining_time": "1:50:48", "throughput": 8689.51, "total_tokens": 80183208} +{"current_steps": 118960, "total_steps": 204665, "loss": 0.0351, "lr": 8.911799846492959e-07, "epoch": 2.906212591307747, "percentage": 58.12, "elapsed_time": "2:33:47", "remaining_time": "1:50:48", "throughput": 8689.52, "total_tokens": 80186280} +{"current_steps": 118965, "total_steps": 204665, "loss": 0.0683, "lr": 8.910952138692718e-07, "epoch": 2.906334742139594, "percentage": 58.13, "elapsed_time": "2:33:48", "remaining_time": "1:50:47", "throughput": 8689.52, "total_tokens": 80189224} +{"current_steps": 118970, "total_steps": 204665, "loss": 0.0866, "lr": 8.910104438812332e-07, "epoch": 2.906456892971441, "percentage": 58.13, "elapsed_time": "2:33:48", "remaining_time": "1:50:47", "throughput": 8689.55, "total_tokens": 80192488} +{"current_steps": 118975, "total_steps": 204665, "loss": 0.1192, "lr": 8.909256746857953e-07, "epoch": 2.9065790438032884, "percentage": 58.13, "elapsed_time": "2:33:48", "remaining_time": "1:50:47", "throughput": 8689.6, "total_tokens": 80196008} +{"current_steps": 118980, "total_steps": 204665, "loss": 0.0297, "lr": 8.908409062835759e-07, "epoch": 2.9067011946351355, "percentage": 58.13, "elapsed_time": "2:33:49", "remaining_time": "1:50:46", "throughput": 8689.69, "total_tokens": 80199976} +{"current_steps": 118985, "total_steps": 204665, "loss": 0.001, "lr": 8.907561386751905e-07, "epoch": 2.9068233454669827, "percentage": 58.14, "elapsed_time": "2:33:49", "remaining_time": "1:50:46", "throughput": 8689.7, "total_tokens": 80203048} +{"current_steps": 118990, "total_steps": 204665, "loss": 0.0578, "lr": 8.906713718612555e-07, "epoch": 2.90694549629883, "percentage": 58.14, "elapsed_time": "2:33:50", "remaining_time": "1:50:45", "throughput": 8689.73, "total_tokens": 80206248} +{"current_steps": 118995, "total_steps": 204665, "loss": 0.0005, "lr": 8.905866058423884e-07, "epoch": 2.907067647130677, "percentage": 58.14, "elapsed_time": "2:33:50", "remaining_time": "1:50:45", "throughput": 8689.74, "total_tokens": 80209384} +{"current_steps": 119000, "total_steps": 204665, "loss": 0.0004, "lr": 8.905018406192042e-07, "epoch": 2.9071897979625243, "percentage": 58.14, "elapsed_time": "2:33:50", "remaining_time": "1:50:44", "throughput": 8689.78, "total_tokens": 80212776} +{"current_steps": 119005, "total_steps": 204665, "loss": 0.0001, "lr": 8.904170761923206e-07, "epoch": 2.907311948794371, "percentage": 58.15, "elapsed_time": "2:33:51", "remaining_time": "1:50:44", "throughput": 8689.81, "total_tokens": 80216040} +{"current_steps": 119010, "total_steps": 204665, "loss": 0.0406, "lr": 8.903323125623531e-07, "epoch": 2.9074340996262187, "percentage": 58.15, "elapsed_time": "2:33:51", "remaining_time": "1:50:44", "throughput": 8689.87, "total_tokens": 80219624} +{"current_steps": 119015, "total_steps": 204665, "loss": 0.0843, "lr": 8.90247549729919e-07, "epoch": 2.9075562504580654, "percentage": 58.15, "elapsed_time": "2:33:51", "remaining_time": "1:50:43", "throughput": 8689.92, "total_tokens": 80223144} +{"current_steps": 119020, "total_steps": 204665, "loss": 0.1005, "lr": 8.901627876956337e-07, "epoch": 2.907678401289913, "percentage": 58.15, "elapsed_time": "2:33:52", "remaining_time": "1:50:43", "throughput": 8689.98, "total_tokens": 80226856} +{"current_steps": 119025, "total_steps": 204665, "loss": 0.0006, "lr": 8.900780264601144e-07, "epoch": 2.90780055212176, "percentage": 58.16, "elapsed_time": "2:33:52", "remaining_time": "1:50:42", "throughput": 8690.01, "total_tokens": 80230120} +{"current_steps": 119030, "total_steps": 204665, "loss": 0.0362, "lr": 8.899932660239773e-07, "epoch": 2.907922702953607, "percentage": 58.16, "elapsed_time": "2:33:52", "remaining_time": "1:50:42", "throughput": 8690.05, "total_tokens": 80233512} +{"current_steps": 119035, "total_steps": 204665, "loss": 0.0922, "lr": 8.899085063878387e-07, "epoch": 2.908044853785454, "percentage": 58.16, "elapsed_time": "2:33:53", "remaining_time": "1:50:42", "throughput": 8690.07, "total_tokens": 80236648} +{"current_steps": 119040, "total_steps": 204665, "loss": 0.0004, "lr": 8.89823747552315e-07, "epoch": 2.9081670046173014, "percentage": 58.16, "elapsed_time": "2:33:53", "remaining_time": "1:50:41", "throughput": 8690.07, "total_tokens": 80239656} +{"current_steps": 119045, "total_steps": 204665, "loss": 0.0003, "lr": 8.897389895180228e-07, "epoch": 2.9082891554491486, "percentage": 58.17, "elapsed_time": "2:33:53", "remaining_time": "1:50:41", "throughput": 8690.13, "total_tokens": 80243176} +{"current_steps": 119050, "total_steps": 204665, "loss": 0.0005, "lr": 8.89654232285578e-07, "epoch": 2.9084113062809958, "percentage": 58.17, "elapsed_time": "2:33:54", "remaining_time": "1:50:40", "throughput": 8690.15, "total_tokens": 80246440} +{"current_steps": 119055, "total_steps": 204665, "loss": 0.1446, "lr": 8.895694758555979e-07, "epoch": 2.908533457112843, "percentage": 58.17, "elapsed_time": "2:33:54", "remaining_time": "1:50:40", "throughput": 8690.19, "total_tokens": 80249832} +{"current_steps": 119060, "total_steps": 204665, "loss": 0.0979, "lr": 8.894847202286976e-07, "epoch": 2.90865560794469, "percentage": 58.17, "elapsed_time": "2:33:54", "remaining_time": "1:50:39", "throughput": 8690.29, "total_tokens": 80253864} +{"current_steps": 119065, "total_steps": 204665, "loss": 0.0005, "lr": 8.893999654054947e-07, "epoch": 2.9087777587765373, "percentage": 58.18, "elapsed_time": "2:33:55", "remaining_time": "1:50:39", "throughput": 8690.31, "total_tokens": 80257064} +{"current_steps": 119070, "total_steps": 204665, "loss": 0.0008, "lr": 8.893152113866045e-07, "epoch": 2.9088999096083845, "percentage": 58.18, "elapsed_time": "2:33:55", "remaining_time": "1:50:39", "throughput": 8690.35, "total_tokens": 80260456} +{"current_steps": 119075, "total_steps": 204665, "loss": 0.0608, "lr": 8.892304581726444e-07, "epoch": 2.9090220604402317, "percentage": 58.18, "elapsed_time": "2:33:55", "remaining_time": "1:50:38", "throughput": 8690.4, "total_tokens": 80263912} +{"current_steps": 119080, "total_steps": 204665, "loss": 0.0639, "lr": 8.891457057642296e-07, "epoch": 2.909144211272079, "percentage": 58.18, "elapsed_time": "2:33:56", "remaining_time": "1:50:38", "throughput": 8690.43, "total_tokens": 80267240} +{"current_steps": 119085, "total_steps": 204665, "loss": 0.0015, "lr": 8.890609541619775e-07, "epoch": 2.909266362103926, "percentage": 58.19, "elapsed_time": "2:33:56", "remaining_time": "1:50:37", "throughput": 8690.46, "total_tokens": 80270504} +{"current_steps": 119090, "total_steps": 204665, "loss": 0.0003, "lr": 8.88976203366504e-07, "epoch": 2.909388512935773, "percentage": 58.19, "elapsed_time": "2:33:56", "remaining_time": "1:50:37", "throughput": 8690.5, "total_tokens": 80273960} +{"current_steps": 119095, "total_steps": 204665, "loss": 0.0896, "lr": 8.88891453378425e-07, "epoch": 2.9095106637676205, "percentage": 58.19, "elapsed_time": "2:33:57", "remaining_time": "1:50:37", "throughput": 8690.53, "total_tokens": 80277160} +{"current_steps": 119100, "total_steps": 204665, "loss": 0.082, "lr": 8.888067041983577e-07, "epoch": 2.909632814599467, "percentage": 58.19, "elapsed_time": "2:33:57", "remaining_time": "1:50:36", "throughput": 8690.53, "total_tokens": 80280168} +{"current_steps": 119105, "total_steps": 204665, "loss": 0.0943, "lr": 8.887219558269176e-07, "epoch": 2.909754965431315, "percentage": 58.2, "elapsed_time": "2:33:58", "remaining_time": "1:50:36", "throughput": 8690.57, "total_tokens": 80283496} +{"current_steps": 119110, "total_steps": 204665, "loss": 0.0014, "lr": 8.886372082647212e-07, "epoch": 2.9098771162631616, "percentage": 58.2, "elapsed_time": "2:33:58", "remaining_time": "1:50:35", "throughput": 8690.61, "total_tokens": 80286952} +{"current_steps": 119115, "total_steps": 204665, "loss": 0.0001, "lr": 8.885524615123855e-07, "epoch": 2.909999267095009, "percentage": 58.2, "elapsed_time": "2:33:58", "remaining_time": "1:50:35", "throughput": 8690.64, "total_tokens": 80290216} +{"current_steps": 119120, "total_steps": 204665, "loss": 0.0558, "lr": 8.88467715570526e-07, "epoch": 2.910121417926856, "percentage": 58.2, "elapsed_time": "2:33:59", "remaining_time": "1:50:34", "throughput": 8690.68, "total_tokens": 80293608} +{"current_steps": 119125, "total_steps": 204665, "loss": 0.0561, "lr": 8.883829704397594e-07, "epoch": 2.910243568758703, "percentage": 58.2, "elapsed_time": "2:33:59", "remaining_time": "1:50:34", "throughput": 8690.73, "total_tokens": 80297064} +{"current_steps": 119130, "total_steps": 204665, "loss": 0.135, "lr": 8.882982261207016e-07, "epoch": 2.9103657195905503, "percentage": 58.21, "elapsed_time": "2:33:59", "remaining_time": "1:50:34", "throughput": 8690.74, "total_tokens": 80300136} +{"current_steps": 119135, "total_steps": 204665, "loss": 0.0004, "lr": 8.882134826139695e-07, "epoch": 2.9104878704223975, "percentage": 58.21, "elapsed_time": "2:34:00", "remaining_time": "1:50:33", "throughput": 8690.8, "total_tokens": 80303720} +{"current_steps": 119140, "total_steps": 204665, "loss": 0.029, "lr": 8.881287399201789e-07, "epoch": 2.9106100212542447, "percentage": 58.21, "elapsed_time": "2:34:00", "remaining_time": "1:50:33", "throughput": 8690.8, "total_tokens": 80306728} +{"current_steps": 119145, "total_steps": 204665, "loss": 0.0001, "lr": 8.880439980399459e-07, "epoch": 2.910732172086092, "percentage": 58.21, "elapsed_time": "2:34:00", "remaining_time": "1:50:32", "throughput": 8690.82, "total_tokens": 80309864} +{"current_steps": 119150, "total_steps": 204665, "loss": 0.0003, "lr": 8.879592569738875e-07, "epoch": 2.910854322917939, "percentage": 58.22, "elapsed_time": "2:34:01", "remaining_time": "1:50:32", "throughput": 8690.87, "total_tokens": 80313320} +{"current_steps": 119155, "total_steps": 204665, "loss": 0.0001, "lr": 8.878745167226192e-07, "epoch": 2.9109764737497863, "percentage": 58.22, "elapsed_time": "2:34:01", "remaining_time": "1:50:32", "throughput": 8690.88, "total_tokens": 80316456} +{"current_steps": 119160, "total_steps": 204665, "loss": 0.0462, "lr": 8.877897772867579e-07, "epoch": 2.9110986245816335, "percentage": 58.22, "elapsed_time": "2:34:01", "remaining_time": "1:50:31", "throughput": 8690.91, "total_tokens": 80319720} +{"current_steps": 119165, "total_steps": 204665, "loss": 0.069, "lr": 8.877050386669191e-07, "epoch": 2.9112207754134807, "percentage": 58.22, "elapsed_time": "2:34:02", "remaining_time": "1:50:31", "throughput": 8690.93, "total_tokens": 80322920} +{"current_steps": 119170, "total_steps": 204665, "loss": 0.0012, "lr": 8.876203008637198e-07, "epoch": 2.911342926245328, "percentage": 58.23, "elapsed_time": "2:34:02", "remaining_time": "1:50:30", "throughput": 8690.93, "total_tokens": 80325864} +{"current_steps": 119175, "total_steps": 204665, "loss": 0.0837, "lr": 8.875355638777756e-07, "epoch": 2.911465077077175, "percentage": 58.23, "elapsed_time": "2:34:02", "remaining_time": "1:50:30", "throughput": 8690.93, "total_tokens": 80328808} +{"current_steps": 119180, "total_steps": 204665, "loss": 0.0001, "lr": 8.874508277097033e-07, "epoch": 2.9115872279090222, "percentage": 58.23, "elapsed_time": "2:34:03", "remaining_time": "1:50:29", "throughput": 8690.98, "total_tokens": 80332200} +{"current_steps": 119185, "total_steps": 204665, "loss": 0.0373, "lr": 8.873660923601187e-07, "epoch": 2.911709378740869, "percentage": 58.23, "elapsed_time": "2:34:03", "remaining_time": "1:50:29", "throughput": 8691.01, "total_tokens": 80335528} +{"current_steps": 119190, "total_steps": 204665, "loss": 0.0005, "lr": 8.872813578296382e-07, "epoch": 2.9118315295727166, "percentage": 58.24, "elapsed_time": "2:34:03", "remaining_time": "1:50:29", "throughput": 8691.07, "total_tokens": 80339176} +{"current_steps": 119195, "total_steps": 204665, "loss": 0.0008, "lr": 8.871966241188781e-07, "epoch": 2.9119536804045634, "percentage": 58.24, "elapsed_time": "2:34:04", "remaining_time": "1:50:28", "throughput": 8691.1, "total_tokens": 80342440} +{"current_steps": 119200, "total_steps": 204665, "loss": 0.0004, "lr": 8.871118912284543e-07, "epoch": 2.9120758312364106, "percentage": 58.24, "elapsed_time": "2:34:04", "remaining_time": "1:50:28", "throughput": 8691.15, "total_tokens": 80345896} +{"current_steps": 119205, "total_steps": 204665, "loss": 0.0496, "lr": 8.870271591589831e-07, "epoch": 2.9121979820682578, "percentage": 58.24, "elapsed_time": "2:34:04", "remaining_time": "1:50:27", "throughput": 8691.17, "total_tokens": 80349032} +{"current_steps": 119210, "total_steps": 204665, "loss": 0.0837, "lr": 8.869424279110812e-07, "epoch": 2.912320132900105, "percentage": 58.25, "elapsed_time": "2:34:05", "remaining_time": "1:50:27", "throughput": 8691.21, "total_tokens": 80352424} +{"current_steps": 119215, "total_steps": 204665, "loss": 0.0317, "lr": 8.86857697485364e-07, "epoch": 2.912442283731952, "percentage": 58.25, "elapsed_time": "2:34:05", "remaining_time": "1:50:26", "throughput": 8691.22, "total_tokens": 80355560} +{"current_steps": 119220, "total_steps": 204665, "loss": 0.0856, "lr": 8.867729678824484e-07, "epoch": 2.9125644345637993, "percentage": 58.25, "elapsed_time": "2:34:05", "remaining_time": "1:50:26", "throughput": 8691.29, "total_tokens": 80359272} +{"current_steps": 119225, "total_steps": 204665, "loss": 0.1324, "lr": 8.866882391029498e-07, "epoch": 2.9126865853956465, "percentage": 58.25, "elapsed_time": "2:34:06", "remaining_time": "1:50:26", "throughput": 8691.31, "total_tokens": 80362472} +{"current_steps": 119230, "total_steps": 204665, "loss": 0.0467, "lr": 8.866035111474853e-07, "epoch": 2.9128087362274937, "percentage": 58.26, "elapsed_time": "2:34:06", "remaining_time": "1:50:25", "throughput": 8691.33, "total_tokens": 80365672} +{"current_steps": 119235, "total_steps": 204665, "loss": 0.0007, "lr": 8.865187840166701e-07, "epoch": 2.912930887059341, "percentage": 58.26, "elapsed_time": "2:34:06", "remaining_time": "1:50:25", "throughput": 8691.35, "total_tokens": 80368872} +{"current_steps": 119240, "total_steps": 204665, "loss": 0.0001, "lr": 8.86434057711121e-07, "epoch": 2.913053037891188, "percentage": 58.26, "elapsed_time": "2:34:07", "remaining_time": "1:50:24", "throughput": 8691.35, "total_tokens": 80371816} +{"current_steps": 119245, "total_steps": 204665, "loss": 0.1107, "lr": 8.863493322314543e-07, "epoch": 2.9131751887230353, "percentage": 58.26, "elapsed_time": "2:34:07", "remaining_time": "1:50:24", "throughput": 8691.39, "total_tokens": 80375144} +{"current_steps": 119250, "total_steps": 204665, "loss": 0.0006, "lr": 8.862646075782852e-07, "epoch": 2.9132973395548825, "percentage": 58.27, "elapsed_time": "2:34:08", "remaining_time": "1:50:24", "throughput": 8691.4, "total_tokens": 80378216} +{"current_steps": 119255, "total_steps": 204665, "loss": 0.0002, "lr": 8.861798837522311e-07, "epoch": 2.9134194903867296, "percentage": 58.27, "elapsed_time": "2:34:08", "remaining_time": "1:50:23", "throughput": 8691.41, "total_tokens": 80381288} +{"current_steps": 119260, "total_steps": 204665, "loss": 0.0004, "lr": 8.86095160753907e-07, "epoch": 2.913541641218577, "percentage": 58.27, "elapsed_time": "2:34:08", "remaining_time": "1:50:23", "throughput": 8691.43, "total_tokens": 80384488} +{"current_steps": 119265, "total_steps": 204665, "loss": 0.0001, "lr": 8.860104385839295e-07, "epoch": 2.913663792050424, "percentage": 58.27, "elapsed_time": "2:34:09", "remaining_time": "1:50:22", "throughput": 8691.49, "total_tokens": 80388072} +{"current_steps": 119270, "total_steps": 204665, "loss": 0.0408, "lr": 8.859257172429153e-07, "epoch": 2.9137859428822708, "percentage": 58.28, "elapsed_time": "2:34:09", "remaining_time": "1:50:22", "throughput": 8691.52, "total_tokens": 80391400} +{"current_steps": 119275, "total_steps": 204665, "loss": 0.001, "lr": 8.858409967314792e-07, "epoch": 2.9139080937141184, "percentage": 58.28, "elapsed_time": "2:34:09", "remaining_time": "1:50:21", "throughput": 8691.54, "total_tokens": 80394472} +{"current_steps": 119280, "total_steps": 204665, "loss": 0.0334, "lr": 8.857562770502389e-07, "epoch": 2.914030244545965, "percentage": 58.28, "elapsed_time": "2:34:10", "remaining_time": "1:50:21", "throughput": 8691.63, "total_tokens": 80398568} +{"current_steps": 119285, "total_steps": 204665, "loss": 0.0006, "lr": 8.856715581998091e-07, "epoch": 2.914152395377813, "percentage": 58.28, "elapsed_time": "2:34:10", "remaining_time": "1:50:21", "throughput": 8691.65, "total_tokens": 80401640} +{"current_steps": 119290, "total_steps": 204665, "loss": 0.0001, "lr": 8.855868401808069e-07, "epoch": 2.9142745462096595, "percentage": 58.29, "elapsed_time": "2:34:10", "remaining_time": "1:50:20", "throughput": 8691.7, "total_tokens": 80405224} +{"current_steps": 119295, "total_steps": 204665, "loss": 0.0001, "lr": 8.855021229938478e-07, "epoch": 2.9143966970415067, "percentage": 58.29, "elapsed_time": "2:34:11", "remaining_time": "1:50:20", "throughput": 8691.75, "total_tokens": 80408808} +{"current_steps": 119300, "total_steps": 204665, "loss": 0.0503, "lr": 8.854174066395476e-07, "epoch": 2.914518847873354, "percentage": 58.29, "elapsed_time": "2:34:11", "remaining_time": "1:50:19", "throughput": 8691.84, "total_tokens": 80412776} +{"current_steps": 119305, "total_steps": 204665, "loss": 0.043, "lr": 8.853326911185236e-07, "epoch": 2.914640998705201, "percentage": 58.29, "elapsed_time": "2:34:11", "remaining_time": "1:50:19", "throughput": 8691.91, "total_tokens": 80416488} +{"current_steps": 119310, "total_steps": 204665, "loss": 0.1288, "lr": 8.852479764313905e-07, "epoch": 2.9147631495370483, "percentage": 58.3, "elapsed_time": "2:34:12", "remaining_time": "1:50:19", "throughput": 8691.94, "total_tokens": 80419752} +{"current_steps": 119315, "total_steps": 204665, "loss": 0.0579, "lr": 8.851632625787655e-07, "epoch": 2.9148853003688955, "percentage": 58.3, "elapsed_time": "2:34:12", "remaining_time": "1:50:18", "throughput": 8691.96, "total_tokens": 80422952} +{"current_steps": 119320, "total_steps": 204665, "loss": 0.031, "lr": 8.850785495612636e-07, "epoch": 2.9150074512007427, "percentage": 58.3, "elapsed_time": "2:34:12", "remaining_time": "1:50:18", "throughput": 8691.99, "total_tokens": 80426216} +{"current_steps": 119325, "total_steps": 204665, "loss": 0.0002, "lr": 8.84993837379502e-07, "epoch": 2.91512960203259, "percentage": 58.3, "elapsed_time": "2:34:13", "remaining_time": "1:50:17", "throughput": 8692.02, "total_tokens": 80429544} +{"current_steps": 119330, "total_steps": 204665, "loss": 0.1621, "lr": 8.849091260340955e-07, "epoch": 2.915251752864437, "percentage": 58.31, "elapsed_time": "2:34:13", "remaining_time": "1:50:17", "throughput": 8692.06, "total_tokens": 80432936} +{"current_steps": 119335, "total_steps": 204665, "loss": 0.0385, "lr": 8.848244155256613e-07, "epoch": 2.9153739036962842, "percentage": 58.31, "elapsed_time": "2:34:13", "remaining_time": "1:50:17", "throughput": 8692.11, "total_tokens": 80436456} +{"current_steps": 119340, "total_steps": 204665, "loss": 0.0395, "lr": 8.847397058548146e-07, "epoch": 2.9154960545281314, "percentage": 58.31, "elapsed_time": "2:34:14", "remaining_time": "1:50:16", "throughput": 8692.19, "total_tokens": 80440296} +{"current_steps": 119345, "total_steps": 204665, "loss": 0.0423, "lr": 8.846549970221719e-07, "epoch": 2.9156182053599786, "percentage": 58.31, "elapsed_time": "2:34:14", "remaining_time": "1:50:16", "throughput": 8692.24, "total_tokens": 80443880} +{"current_steps": 119350, "total_steps": 204665, "loss": 0.0439, "lr": 8.845702890283492e-07, "epoch": 2.915740356191826, "percentage": 58.31, "elapsed_time": "2:34:15", "remaining_time": "1:50:15", "throughput": 8692.3, "total_tokens": 80447464} +{"current_steps": 119355, "total_steps": 204665, "loss": 0.0005, "lr": 8.844855818739623e-07, "epoch": 2.9158625070236726, "percentage": 58.32, "elapsed_time": "2:34:15", "remaining_time": "1:50:15", "throughput": 8692.39, "total_tokens": 80451496} +{"current_steps": 119360, "total_steps": 204665, "loss": 0.0001, "lr": 8.844008755596271e-07, "epoch": 2.91598465785552, "percentage": 58.32, "elapsed_time": "2:34:15", "remaining_time": "1:50:14", "throughput": 8692.43, "total_tokens": 80454888} +{"current_steps": 119365, "total_steps": 204665, "loss": 0.0004, "lr": 8.843161700859602e-07, "epoch": 2.916106808687367, "percentage": 58.32, "elapsed_time": "2:34:16", "remaining_time": "1:50:14", "throughput": 8692.47, "total_tokens": 80458280} +{"current_steps": 119370, "total_steps": 204665, "loss": 0.0002, "lr": 8.842314654535769e-07, "epoch": 2.9162289595192146, "percentage": 58.32, "elapsed_time": "2:34:16", "remaining_time": "1:50:14", "throughput": 8692.48, "total_tokens": 80461352} +{"current_steps": 119375, "total_steps": 204665, "loss": 0.0729, "lr": 8.841467616630939e-07, "epoch": 2.9163511103510613, "percentage": 58.33, "elapsed_time": "2:34:16", "remaining_time": "1:50:13", "throughput": 8692.54, "total_tokens": 80464936} +{"current_steps": 119380, "total_steps": 204665, "loss": 0.0541, "lr": 8.840620587151264e-07, "epoch": 2.9164732611829085, "percentage": 58.33, "elapsed_time": "2:34:17", "remaining_time": "1:50:13", "throughput": 8692.57, "total_tokens": 80468200} +{"current_steps": 119385, "total_steps": 204665, "loss": 0.0778, "lr": 8.839773566102912e-07, "epoch": 2.9165954120147557, "percentage": 58.33, "elapsed_time": "2:34:17", "remaining_time": "1:50:12", "throughput": 8692.62, "total_tokens": 80471784} +{"current_steps": 119390, "total_steps": 204665, "loss": 0.0001, "lr": 8.838926553492035e-07, "epoch": 2.916717562846603, "percentage": 58.33, "elapsed_time": "2:34:17", "remaining_time": "1:50:12", "throughput": 8692.69, "total_tokens": 80475560} +{"current_steps": 119395, "total_steps": 204665, "loss": 0.0434, "lr": 8.838079549324797e-07, "epoch": 2.91683971367845, "percentage": 58.34, "elapsed_time": "2:34:18", "remaining_time": "1:50:12", "throughput": 8692.69, "total_tokens": 80478440} +{"current_steps": 119400, "total_steps": 204665, "loss": 0.1949, "lr": 8.837232553607361e-07, "epoch": 2.9169618645102973, "percentage": 58.34, "elapsed_time": "2:34:18", "remaining_time": "1:50:11", "throughput": 8692.76, "total_tokens": 80482152} +{"current_steps": 119405, "total_steps": 204665, "loss": 0.0005, "lr": 8.836385566345878e-07, "epoch": 2.9170840153421445, "percentage": 58.34, "elapsed_time": "2:34:18", "remaining_time": "1:50:11", "throughput": 8692.77, "total_tokens": 80485288} +{"current_steps": 119410, "total_steps": 204665, "loss": 0.0007, "lr": 8.835538587546515e-07, "epoch": 2.9172061661739916, "percentage": 58.34, "elapsed_time": "2:34:19", "remaining_time": "1:50:10", "throughput": 8692.79, "total_tokens": 80488424} +{"current_steps": 119415, "total_steps": 204665, "loss": 0.0004, "lr": 8.834691617215425e-07, "epoch": 2.917328317005839, "percentage": 58.35, "elapsed_time": "2:34:19", "remaining_time": "1:50:10", "throughput": 8692.83, "total_tokens": 80491880} +{"current_steps": 119420, "total_steps": 204665, "loss": 0.0002, "lr": 8.833844655358772e-07, "epoch": 2.917450467837686, "percentage": 58.35, "elapsed_time": "2:34:19", "remaining_time": "1:50:09", "throughput": 8692.89, "total_tokens": 80495464} +{"current_steps": 119425, "total_steps": 204665, "loss": 0.0002, "lr": 8.832997701982718e-07, "epoch": 2.917572618669533, "percentage": 58.35, "elapsed_time": "2:34:20", "remaining_time": "1:50:09", "throughput": 8692.96, "total_tokens": 80499176} +{"current_steps": 119430, "total_steps": 204665, "loss": 0.038, "lr": 8.832150757093414e-07, "epoch": 2.9176947695013804, "percentage": 58.35, "elapsed_time": "2:34:20", "remaining_time": "1:50:09", "throughput": 8692.98, "total_tokens": 80502440} +{"current_steps": 119435, "total_steps": 204665, "loss": 0.0007, "lr": 8.831303820697028e-07, "epoch": 2.9178169203332276, "percentage": 58.36, "elapsed_time": "2:34:20", "remaining_time": "1:50:08", "throughput": 8693.03, "total_tokens": 80505896} +{"current_steps": 119440, "total_steps": 204665, "loss": 0.0727, "lr": 8.830456892799712e-07, "epoch": 2.917939071165075, "percentage": 58.36, "elapsed_time": "2:34:21", "remaining_time": "1:50:08", "throughput": 8693.1, "total_tokens": 80509608} +{"current_steps": 119445, "total_steps": 204665, "loss": 0.0004, "lr": 8.82960997340763e-07, "epoch": 2.918061221996922, "percentage": 58.36, "elapsed_time": "2:34:21", "remaining_time": "1:50:07", "throughput": 8693.14, "total_tokens": 80513064} +{"current_steps": 119450, "total_steps": 204665, "loss": 0.0613, "lr": 8.828763062526938e-07, "epoch": 2.9181833728287687, "percentage": 58.36, "elapsed_time": "2:34:22", "remaining_time": "1:50:07", "throughput": 8693.16, "total_tokens": 80516200} +{"current_steps": 119455, "total_steps": 204665, "loss": 0.0504, "lr": 8.827916160163794e-07, "epoch": 2.9183055236606164, "percentage": 58.37, "elapsed_time": "2:34:22", "remaining_time": "1:50:07", "throughput": 8693.19, "total_tokens": 80519464} +{"current_steps": 119460, "total_steps": 204665, "loss": 0.0493, "lr": 8.827069266324364e-07, "epoch": 2.918427674492463, "percentage": 58.37, "elapsed_time": "2:34:22", "remaining_time": "1:50:06", "throughput": 8693.23, "total_tokens": 80522856} +{"current_steps": 119465, "total_steps": 204665, "loss": 0.0707, "lr": 8.826222381014796e-07, "epoch": 2.9185498253243107, "percentage": 58.37, "elapsed_time": "2:34:23", "remaining_time": "1:50:06", "throughput": 8693.26, "total_tokens": 80526184} +{"current_steps": 119470, "total_steps": 204665, "loss": 0.0001, "lr": 8.82537550424126e-07, "epoch": 2.9186719761561575, "percentage": 58.37, "elapsed_time": "2:34:23", "remaining_time": "1:50:05", "throughput": 8693.28, "total_tokens": 80529384} +{"current_steps": 119475, "total_steps": 204665, "loss": 0.0003, "lr": 8.824528636009904e-07, "epoch": 2.9187941269880047, "percentage": 58.38, "elapsed_time": "2:34:23", "remaining_time": "1:50:05", "throughput": 8693.33, "total_tokens": 80532904} +{"current_steps": 119480, "total_steps": 204665, "loss": 0.0424, "lr": 8.823681776326898e-07, "epoch": 2.918916277819852, "percentage": 58.38, "elapsed_time": "2:34:24", "remaining_time": "1:50:04", "throughput": 8693.37, "total_tokens": 80536232} +{"current_steps": 119485, "total_steps": 204665, "loss": 0.0442, "lr": 8.822834925198389e-07, "epoch": 2.919038428651699, "percentage": 58.38, "elapsed_time": "2:34:24", "remaining_time": "1:50:04", "throughput": 8693.39, "total_tokens": 80539432} +{"current_steps": 119490, "total_steps": 204665, "loss": 0.0366, "lr": 8.82198808263054e-07, "epoch": 2.9191605794835462, "percentage": 58.38, "elapsed_time": "2:34:24", "remaining_time": "1:50:04", "throughput": 8693.44, "total_tokens": 80542952} +{"current_steps": 119495, "total_steps": 204665, "loss": 0.1097, "lr": 8.821141248629516e-07, "epoch": 2.9192827303153934, "percentage": 58.39, "elapsed_time": "2:34:25", "remaining_time": "1:50:03", "throughput": 8693.47, "total_tokens": 80546280} +{"current_steps": 119500, "total_steps": 204665, "loss": 0.0007, "lr": 8.820294423201469e-07, "epoch": 2.9194048811472406, "percentage": 58.39, "elapsed_time": "2:34:25", "remaining_time": "1:50:03", "throughput": 8693.5, "total_tokens": 80549608} +{"current_steps": 119505, "total_steps": 204665, "loss": 0.0007, "lr": 8.819447606352557e-07, "epoch": 2.919527031979088, "percentage": 58.39, "elapsed_time": "2:34:25", "remaining_time": "1:50:02", "throughput": 8693.5, "total_tokens": 80552488} +{"current_steps": 119510, "total_steps": 204665, "loss": 0.0443, "lr": 8.818600798088939e-07, "epoch": 2.919649182810935, "percentage": 58.39, "elapsed_time": "2:34:26", "remaining_time": "1:50:02", "throughput": 8693.54, "total_tokens": 80555880} +{"current_steps": 119515, "total_steps": 204665, "loss": 0.0484, "lr": 8.817753998416772e-07, "epoch": 2.919771333642782, "percentage": 58.4, "elapsed_time": "2:34:26", "remaining_time": "1:50:02", "throughput": 8693.57, "total_tokens": 80559208} +{"current_steps": 119520, "total_steps": 204665, "loss": 0.0005, "lr": 8.81690720734222e-07, "epoch": 2.9198934844746294, "percentage": 58.4, "elapsed_time": "2:34:26", "remaining_time": "1:50:01", "throughput": 8693.6, "total_tokens": 80562472} +{"current_steps": 119525, "total_steps": 204665, "loss": 0.0001, "lr": 8.816060424871433e-07, "epoch": 2.9200156353064766, "percentage": 58.4, "elapsed_time": "2:34:27", "remaining_time": "1:50:01", "throughput": 8693.64, "total_tokens": 80565864} +{"current_steps": 119530, "total_steps": 204665, "loss": 0.0344, "lr": 8.815213651010578e-07, "epoch": 2.9201377861383238, "percentage": 58.4, "elapsed_time": "2:34:27", "remaining_time": "1:50:00", "throughput": 8693.73, "total_tokens": 80569896} +{"current_steps": 119535, "total_steps": 204665, "loss": 0.0379, "lr": 8.814366885765802e-07, "epoch": 2.9202599369701705, "percentage": 58.41, "elapsed_time": "2:34:27", "remaining_time": "1:50:00", "throughput": 8693.76, "total_tokens": 80573160} +{"current_steps": 119540, "total_steps": 204665, "loss": 0.0001, "lr": 8.813520129143275e-07, "epoch": 2.920382087802018, "percentage": 58.41, "elapsed_time": "2:34:28", "remaining_time": "1:49:59", "throughput": 8693.78, "total_tokens": 80576360} +{"current_steps": 119545, "total_steps": 204665, "loss": 0.1346, "lr": 8.812673381149143e-07, "epoch": 2.920504238633865, "percentage": 58.41, "elapsed_time": "2:34:28", "remaining_time": "1:49:59", "throughput": 8693.81, "total_tokens": 80579560} +{"current_steps": 119550, "total_steps": 204665, "loss": 0.0011, "lr": 8.811826641789572e-07, "epoch": 2.9206263894657125, "percentage": 58.41, "elapsed_time": "2:34:28", "remaining_time": "1:49:59", "throughput": 8693.83, "total_tokens": 80582824} +{"current_steps": 119555, "total_steps": 204665, "loss": 0.0475, "lr": 8.810979911070721e-07, "epoch": 2.9207485402975593, "percentage": 58.41, "elapsed_time": "2:34:29", "remaining_time": "1:49:58", "throughput": 8693.86, "total_tokens": 80586088} +{"current_steps": 119560, "total_steps": 204665, "loss": 0.0415, "lr": 8.810133188998739e-07, "epoch": 2.9208706911294064, "percentage": 58.42, "elapsed_time": "2:34:29", "remaining_time": "1:49:58", "throughput": 8693.89, "total_tokens": 80589352} +{"current_steps": 119565, "total_steps": 204665, "loss": 0.0768, "lr": 8.809286475579791e-07, "epoch": 2.9209928419612536, "percentage": 58.42, "elapsed_time": "2:34:29", "remaining_time": "1:49:57", "throughput": 8693.9, "total_tokens": 80592424} +{"current_steps": 119570, "total_steps": 204665, "loss": 0.0411, "lr": 8.808439770820028e-07, "epoch": 2.921114992793101, "percentage": 58.42, "elapsed_time": "2:34:30", "remaining_time": "1:49:57", "throughput": 8693.92, "total_tokens": 80595560} +{"current_steps": 119575, "total_steps": 204665, "loss": 0.038, "lr": 8.807593074725618e-07, "epoch": 2.921237143624948, "percentage": 58.42, "elapsed_time": "2:34:30", "remaining_time": "1:49:57", "throughput": 8693.94, "total_tokens": 80598760} +{"current_steps": 119580, "total_steps": 204665, "loss": 0.0347, "lr": 8.806746387302706e-07, "epoch": 2.921359294456795, "percentage": 58.43, "elapsed_time": "2:34:31", "remaining_time": "1:49:56", "throughput": 8693.98, "total_tokens": 80602088} +{"current_steps": 119585, "total_steps": 204665, "loss": 0.0412, "lr": 8.805899708557454e-07, "epoch": 2.9214814452886424, "percentage": 58.43, "elapsed_time": "2:34:31", "remaining_time": "1:49:56", "throughput": 8694.03, "total_tokens": 80605608} +{"current_steps": 119590, "total_steps": 204665, "loss": 0.017, "lr": 8.805053038496028e-07, "epoch": 2.9216035961204896, "percentage": 58.43, "elapsed_time": "2:34:31", "remaining_time": "1:49:55", "throughput": 8694.04, "total_tokens": 80608680} +{"current_steps": 119595, "total_steps": 204665, "loss": 0.0006, "lr": 8.804206377124571e-07, "epoch": 2.9217257469523368, "percentage": 58.43, "elapsed_time": "2:34:32", "remaining_time": "1:49:55", "throughput": 8694.09, "total_tokens": 80612264} +{"current_steps": 119600, "total_steps": 204665, "loss": 0.0524, "lr": 8.803359724449252e-07, "epoch": 2.921847897784184, "percentage": 58.44, "elapsed_time": "2:34:32", "remaining_time": "1:49:54", "throughput": 8694.14, "total_tokens": 80615784} +{"current_steps": 119605, "total_steps": 204665, "loss": 0.0002, "lr": 8.80251308047622e-07, "epoch": 2.921970048616031, "percentage": 58.44, "elapsed_time": "2:34:32", "remaining_time": "1:49:54", "throughput": 8694.13, "total_tokens": 80618664} +{"current_steps": 119610, "total_steps": 204665, "loss": 0.0706, "lr": 8.801666445211634e-07, "epoch": 2.9220921994478783, "percentage": 58.44, "elapsed_time": "2:34:33", "remaining_time": "1:49:54", "throughput": 8694.12, "total_tokens": 80621480} +{"current_steps": 119615, "total_steps": 204665, "loss": 0.0004, "lr": 8.800819818661655e-07, "epoch": 2.9222143502797255, "percentage": 58.44, "elapsed_time": "2:34:33", "remaining_time": "1:49:53", "throughput": 8694.17, "total_tokens": 80625000} +{"current_steps": 119620, "total_steps": 204665, "loss": 0.0479, "lr": 8.799973200832434e-07, "epoch": 2.9223365011115727, "percentage": 58.45, "elapsed_time": "2:34:33", "remaining_time": "1:49:53", "throughput": 8694.17, "total_tokens": 80627944} +{"current_steps": 119625, "total_steps": 204665, "loss": 0.0536, "lr": 8.799126591730134e-07, "epoch": 2.92245865194342, "percentage": 58.45, "elapsed_time": "2:34:34", "remaining_time": "1:49:52", "throughput": 8694.23, "total_tokens": 80631528} +{"current_steps": 119630, "total_steps": 204665, "loss": 0.0004, "lr": 8.798279991360904e-07, "epoch": 2.9225808027752667, "percentage": 58.45, "elapsed_time": "2:34:34", "remaining_time": "1:49:52", "throughput": 8694.24, "total_tokens": 80634600} +{"current_steps": 119635, "total_steps": 204665, "loss": 0.0009, "lr": 8.797433399730909e-07, "epoch": 2.9227029536071143, "percentage": 58.45, "elapsed_time": "2:34:34", "remaining_time": "1:49:52", "throughput": 8694.26, "total_tokens": 80637736} +{"current_steps": 119640, "total_steps": 204665, "loss": 0.0002, "lr": 8.796586816846299e-07, "epoch": 2.922825104438961, "percentage": 58.46, "elapsed_time": "2:34:35", "remaining_time": "1:49:51", "throughput": 8694.28, "total_tokens": 80640936} +{"current_steps": 119645, "total_steps": 204665, "loss": 0.0851, "lr": 8.795740242713232e-07, "epoch": 2.9229472552708082, "percentage": 58.46, "elapsed_time": "2:34:35", "remaining_time": "1:49:51", "throughput": 8694.33, "total_tokens": 80644520} +{"current_steps": 119650, "total_steps": 204665, "loss": 0.0007, "lr": 8.794893677337872e-07, "epoch": 2.9230694061026554, "percentage": 58.46, "elapsed_time": "2:34:35", "remaining_time": "1:49:50", "throughput": 8694.37, "total_tokens": 80647848} +{"current_steps": 119655, "total_steps": 204665, "loss": 0.085, "lr": 8.794047120726364e-07, "epoch": 2.9231915569345026, "percentage": 58.46, "elapsed_time": "2:34:36", "remaining_time": "1:49:50", "throughput": 8694.39, "total_tokens": 80651048} +{"current_steps": 119660, "total_steps": 204665, "loss": 0.0325, "lr": 8.793200572884873e-07, "epoch": 2.92331370776635, "percentage": 58.47, "elapsed_time": "2:34:36", "remaining_time": "1:49:49", "throughput": 8694.45, "total_tokens": 80654696} +{"current_steps": 119665, "total_steps": 204665, "loss": 0.0006, "lr": 8.792354033819549e-07, "epoch": 2.923435858598197, "percentage": 58.47, "elapsed_time": "2:34:36", "remaining_time": "1:49:49", "throughput": 8694.47, "total_tokens": 80657832} +{"current_steps": 119670, "total_steps": 204665, "loss": 0.0542, "lr": 8.79150750353655e-07, "epoch": 2.923558009430044, "percentage": 58.47, "elapsed_time": "2:34:37", "remaining_time": "1:49:49", "throughput": 8694.49, "total_tokens": 80661032} +{"current_steps": 119675, "total_steps": 204665, "loss": 0.0402, "lr": 8.79066098204204e-07, "epoch": 2.9236801602618914, "percentage": 58.47, "elapsed_time": "2:34:37", "remaining_time": "1:49:48", "throughput": 8694.57, "total_tokens": 80664872} +{"current_steps": 119680, "total_steps": 204665, "loss": 0.0004, "lr": 8.789814469342161e-07, "epoch": 2.9238023110937386, "percentage": 58.48, "elapsed_time": "2:34:37", "remaining_time": "1:49:48", "throughput": 8694.6, "total_tokens": 80668136} +{"current_steps": 119685, "total_steps": 204665, "loss": 0.0003, "lr": 8.788967965443083e-07, "epoch": 2.9239244619255857, "percentage": 58.48, "elapsed_time": "2:34:38", "remaining_time": "1:49:47", "throughput": 8694.61, "total_tokens": 80671272} +{"current_steps": 119690, "total_steps": 204665, "loss": 0.0599, "lr": 8.788121470350952e-07, "epoch": 2.924046612757433, "percentage": 58.48, "elapsed_time": "2:34:38", "remaining_time": "1:49:47", "throughput": 8694.64, "total_tokens": 80674472} +{"current_steps": 119695, "total_steps": 204665, "loss": 0.0005, "lr": 8.78727498407193e-07, "epoch": 2.92416876358928, "percentage": 58.48, "elapsed_time": "2:34:38", "remaining_time": "1:49:47", "throughput": 8694.67, "total_tokens": 80677864} +{"current_steps": 119700, "total_steps": 204665, "loss": 0.0001, "lr": 8.786428506612168e-07, "epoch": 2.9242909144211273, "percentage": 58.49, "elapsed_time": "2:34:39", "remaining_time": "1:49:46", "throughput": 8694.72, "total_tokens": 80681320} +{"current_steps": 119705, "total_steps": 204665, "loss": 0.0456, "lr": 8.785582037977826e-07, "epoch": 2.9244130652529745, "percentage": 58.49, "elapsed_time": "2:34:39", "remaining_time": "1:49:46", "throughput": 8694.71, "total_tokens": 80684136} +{"current_steps": 119710, "total_steps": 204665, "loss": 0.079, "lr": 8.78473557817506e-07, "epoch": 2.9245352160848217, "percentage": 58.49, "elapsed_time": "2:34:40", "remaining_time": "1:49:45", "throughput": 8694.78, "total_tokens": 80687848} +{"current_steps": 119715, "total_steps": 204665, "loss": 0.0381, "lr": 8.783889127210019e-07, "epoch": 2.9246573669166684, "percentage": 58.49, "elapsed_time": "2:34:40", "remaining_time": "1:49:45", "throughput": 8694.89, "total_tokens": 80692008} +{"current_steps": 119720, "total_steps": 204665, "loss": 0.0368, "lr": 8.783042685088869e-07, "epoch": 2.924779517748516, "percentage": 58.5, "elapsed_time": "2:34:40", "remaining_time": "1:49:44", "throughput": 8694.93, "total_tokens": 80695464} +{"current_steps": 119725, "total_steps": 204665, "loss": 0.0003, "lr": 8.782196251817754e-07, "epoch": 2.924901668580363, "percentage": 58.5, "elapsed_time": "2:34:41", "remaining_time": "1:49:44", "throughput": 8694.97, "total_tokens": 80698856} +{"current_steps": 119730, "total_steps": 204665, "loss": 0.0534, "lr": 8.78134982740284e-07, "epoch": 2.9250238194122105, "percentage": 58.5, "elapsed_time": "2:34:41", "remaining_time": "1:49:44", "throughput": 8695.02, "total_tokens": 80702440} +{"current_steps": 119735, "total_steps": 204665, "loss": 0.0969, "lr": 8.780503411850273e-07, "epoch": 2.925145970244057, "percentage": 58.5, "elapsed_time": "2:34:41", "remaining_time": "1:49:43", "throughput": 8695.06, "total_tokens": 80705832} +{"current_steps": 119740, "total_steps": 204665, "loss": 0.0409, "lr": 8.779657005166215e-07, "epoch": 2.9252681210759044, "percentage": 58.51, "elapsed_time": "2:34:42", "remaining_time": "1:49:43", "throughput": 8695.09, "total_tokens": 80709096} +{"current_steps": 119745, "total_steps": 204665, "loss": 0.0003, "lr": 8.778810607356822e-07, "epoch": 2.9253902719077516, "percentage": 58.51, "elapsed_time": "2:34:42", "remaining_time": "1:49:42", "throughput": 8695.14, "total_tokens": 80712552} +{"current_steps": 119750, "total_steps": 204665, "loss": 0.0006, "lr": 8.777964218428243e-07, "epoch": 2.9255124227395988, "percentage": 58.51, "elapsed_time": "2:34:42", "remaining_time": "1:49:42", "throughput": 8695.2, "total_tokens": 80716200} +{"current_steps": 119755, "total_steps": 204665, "loss": 0.0347, "lr": 8.777117838386642e-07, "epoch": 2.925634573571446, "percentage": 58.51, "elapsed_time": "2:34:43", "remaining_time": "1:49:42", "throughput": 8695.21, "total_tokens": 80719208} +{"current_steps": 119760, "total_steps": 204665, "loss": 0.0806, "lr": 8.776271467238166e-07, "epoch": 2.925756724403293, "percentage": 58.52, "elapsed_time": "2:34:43", "remaining_time": "1:49:41", "throughput": 8695.22, "total_tokens": 80722344} +{"current_steps": 119765, "total_steps": 204665, "loss": 0.0008, "lr": 8.775425104988971e-07, "epoch": 2.9258788752351403, "percentage": 58.52, "elapsed_time": "2:34:43", "remaining_time": "1:49:41", "throughput": 8695.3, "total_tokens": 80726184} +{"current_steps": 119770, "total_steps": 204665, "loss": 0.0004, "lr": 8.774578751645219e-07, "epoch": 2.9260010260669875, "percentage": 58.52, "elapsed_time": "2:34:44", "remaining_time": "1:49:40", "throughput": 8695.32, "total_tokens": 80729384} +{"current_steps": 119775, "total_steps": 204665, "loss": 0.0429, "lr": 8.773732407213055e-07, "epoch": 2.9261231768988347, "percentage": 58.52, "elapsed_time": "2:34:44", "remaining_time": "1:49:40", "throughput": 8695.36, "total_tokens": 80732712} +{"current_steps": 119780, "total_steps": 204665, "loss": 0.1408, "lr": 8.772886071698643e-07, "epoch": 2.926245327730682, "percentage": 58.52, "elapsed_time": "2:34:44", "remaining_time": "1:49:39", "throughput": 8695.38, "total_tokens": 80735848} +{"current_steps": 119785, "total_steps": 204665, "loss": 0.1689, "lr": 8.772039745108129e-07, "epoch": 2.926367478562529, "percentage": 58.53, "elapsed_time": "2:34:45", "remaining_time": "1:49:39", "throughput": 8695.41, "total_tokens": 80739112} +{"current_steps": 119790, "total_steps": 204665, "loss": 0.0001, "lr": 8.771193427447677e-07, "epoch": 2.9264896293943763, "percentage": 58.53, "elapsed_time": "2:34:45", "remaining_time": "1:49:39", "throughput": 8695.42, "total_tokens": 80742184} +{"current_steps": 119795, "total_steps": 204665, "loss": 0.0205, "lr": 8.770347118723433e-07, "epoch": 2.9266117802262235, "percentage": 58.53, "elapsed_time": "2:34:45", "remaining_time": "1:49:38", "throughput": 8695.47, "total_tokens": 80745704} +{"current_steps": 119800, "total_steps": 204665, "loss": 0.0481, "lr": 8.769500818941555e-07, "epoch": 2.9267339310580707, "percentage": 58.53, "elapsed_time": "2:34:46", "remaining_time": "1:49:38", "throughput": 8695.51, "total_tokens": 80749096} +{"current_steps": 119805, "total_steps": 204665, "loss": 0.0003, "lr": 8.768654528108202e-07, "epoch": 2.926856081889918, "percentage": 58.54, "elapsed_time": "2:34:46", "remaining_time": "1:49:37", "throughput": 8695.6, "total_tokens": 80753064} +{"current_steps": 119810, "total_steps": 204665, "loss": 0.0007, "lr": 8.767808246229523e-07, "epoch": 2.9269782327217646, "percentage": 58.54, "elapsed_time": "2:34:47", "remaining_time": "1:49:37", "throughput": 8695.62, "total_tokens": 80756328} +{"current_steps": 119815, "total_steps": 204665, "loss": 0.0002, "lr": 8.766961973311674e-07, "epoch": 2.9271003835536122, "percentage": 58.54, "elapsed_time": "2:34:47", "remaining_time": "1:49:37", "throughput": 8695.65, "total_tokens": 80759592} +{"current_steps": 119820, "total_steps": 204665, "loss": 0.0002, "lr": 8.766115709360808e-07, "epoch": 2.927222534385459, "percentage": 58.54, "elapsed_time": "2:34:47", "remaining_time": "1:49:36", "throughput": 8695.66, "total_tokens": 80762664} +{"current_steps": 119825, "total_steps": 204665, "loss": 0.0503, "lr": 8.76526945438308e-07, "epoch": 2.927344685217306, "percentage": 58.55, "elapsed_time": "2:34:48", "remaining_time": "1:49:36", "throughput": 8695.7, "total_tokens": 80766056} +{"current_steps": 119830, "total_steps": 204665, "loss": 0.0013, "lr": 8.764423208384647e-07, "epoch": 2.9274668360491534, "percentage": 58.55, "elapsed_time": "2:34:48", "remaining_time": "1:49:35", "throughput": 8695.73, "total_tokens": 80769256} +{"current_steps": 119835, "total_steps": 204665, "loss": 0.0003, "lr": 8.763576971371658e-07, "epoch": 2.9275889868810006, "percentage": 58.55, "elapsed_time": "2:34:48", "remaining_time": "1:49:35", "throughput": 8695.77, "total_tokens": 80772712} +{"current_steps": 119840, "total_steps": 204665, "loss": 0.0003, "lr": 8.762730743350273e-07, "epoch": 2.9277111377128477, "percentage": 58.55, "elapsed_time": "2:34:49", "remaining_time": "1:49:34", "throughput": 8695.82, "total_tokens": 80776232} +{"current_steps": 119845, "total_steps": 204665, "loss": 0.0004, "lr": 8.761884524326639e-07, "epoch": 2.927833288544695, "percentage": 58.56, "elapsed_time": "2:34:49", "remaining_time": "1:49:34", "throughput": 8695.84, "total_tokens": 80779432} +{"current_steps": 119850, "total_steps": 204665, "loss": 0.0, "lr": 8.761038314306918e-07, "epoch": 2.927955439376542, "percentage": 58.56, "elapsed_time": "2:34:49", "remaining_time": "1:49:34", "throughput": 8695.87, "total_tokens": 80782696} +{"current_steps": 119855, "total_steps": 204665, "loss": 0.0748, "lr": 8.760192113297255e-07, "epoch": 2.9280775902083893, "percentage": 58.56, "elapsed_time": "2:34:50", "remaining_time": "1:49:33", "throughput": 8695.94, "total_tokens": 80786408} +{"current_steps": 119860, "total_steps": 204665, "loss": 0.0003, "lr": 8.759345921303811e-07, "epoch": 2.9281997410402365, "percentage": 58.56, "elapsed_time": "2:34:50", "remaining_time": "1:49:33", "throughput": 8695.94, "total_tokens": 80789416} +{"current_steps": 119865, "total_steps": 204665, "loss": 0.0001, "lr": 8.75849973833274e-07, "epoch": 2.9283218918720837, "percentage": 58.57, "elapsed_time": "2:34:50", "remaining_time": "1:49:32", "throughput": 8695.95, "total_tokens": 80792424} +{"current_steps": 119870, "total_steps": 204665, "loss": 0.0918, "lr": 8.757653564390187e-07, "epoch": 2.928444042703931, "percentage": 58.57, "elapsed_time": "2:34:51", "remaining_time": "1:49:32", "throughput": 8695.96, "total_tokens": 80795496} +{"current_steps": 119875, "total_steps": 204665, "loss": 0.0604, "lr": 8.756807399482316e-07, "epoch": 2.928566193535778, "percentage": 58.57, "elapsed_time": "2:34:51", "remaining_time": "1:49:32", "throughput": 8696.01, "total_tokens": 80798952} +{"current_steps": 119880, "total_steps": 204665, "loss": 0.0002, "lr": 8.755961243615273e-07, "epoch": 2.9286883443676253, "percentage": 58.57, "elapsed_time": "2:34:51", "remaining_time": "1:49:31", "throughput": 8696.05, "total_tokens": 80802408} +{"current_steps": 119885, "total_steps": 204665, "loss": 0.0001, "lr": 8.755115096795218e-07, "epoch": 2.9288104951994725, "percentage": 58.58, "elapsed_time": "2:34:52", "remaining_time": "1:49:31", "throughput": 8696.08, "total_tokens": 80805736} +{"current_steps": 119890, "total_steps": 204665, "loss": 0.0742, "lr": 8.754268959028297e-07, "epoch": 2.9289326460313196, "percentage": 58.58, "elapsed_time": "2:34:52", "remaining_time": "1:49:30", "throughput": 8696.12, "total_tokens": 80809064} +{"current_steps": 119895, "total_steps": 204665, "loss": 0.0888, "lr": 8.753422830320666e-07, "epoch": 2.9290547968631664, "percentage": 58.58, "elapsed_time": "2:34:52", "remaining_time": "1:49:30", "throughput": 8696.13, "total_tokens": 80812200} +{"current_steps": 119900, "total_steps": 204665, "loss": 0.0339, "lr": 8.752576710678484e-07, "epoch": 2.929176947695014, "percentage": 58.58, "elapsed_time": "2:34:53", "remaining_time": "1:49:29", "throughput": 8696.17, "total_tokens": 80815592} +{"current_steps": 119905, "total_steps": 204665, "loss": 0.0002, "lr": 8.751730600107896e-07, "epoch": 2.9292990985268608, "percentage": 58.59, "elapsed_time": "2:34:53", "remaining_time": "1:49:29", "throughput": 8696.19, "total_tokens": 80818792} +{"current_steps": 119910, "total_steps": 204665, "loss": 0.1176, "lr": 8.750884498615063e-07, "epoch": 2.9294212493587084, "percentage": 58.59, "elapsed_time": "2:34:53", "remaining_time": "1:49:29", "throughput": 8696.23, "total_tokens": 80822248} +{"current_steps": 119915, "total_steps": 204665, "loss": 0.0195, "lr": 8.750038406206131e-07, "epoch": 2.929543400190555, "percentage": 58.59, "elapsed_time": "2:34:54", "remaining_time": "1:49:28", "throughput": 8696.26, "total_tokens": 80825512} +{"current_steps": 119920, "total_steps": 204665, "loss": 0.0833, "lr": 8.749192322887255e-07, "epoch": 2.9296655510224023, "percentage": 58.59, "elapsed_time": "2:34:54", "remaining_time": "1:49:28", "throughput": 8696.26, "total_tokens": 80828392} +{"current_steps": 119925, "total_steps": 204665, "loss": 0.064, "lr": 8.748346248664593e-07, "epoch": 2.9297877018542495, "percentage": 58.6, "elapsed_time": "2:34:54", "remaining_time": "1:49:27", "throughput": 8696.29, "total_tokens": 80831656} +{"current_steps": 119930, "total_steps": 204665, "loss": 0.0004, "lr": 8.74750018354429e-07, "epoch": 2.9299098526860967, "percentage": 58.6, "elapsed_time": "2:34:55", "remaining_time": "1:49:27", "throughput": 8696.32, "total_tokens": 80835048} +{"current_steps": 119935, "total_steps": 204665, "loss": 0.0185, "lr": 8.746654127532505e-07, "epoch": 2.930032003517944, "percentage": 58.6, "elapsed_time": "2:34:55", "remaining_time": "1:49:27", "throughput": 8696.34, "total_tokens": 80838184} +{"current_steps": 119940, "total_steps": 204665, "loss": 0.0455, "lr": 8.745808080635385e-07, "epoch": 2.930154154349791, "percentage": 58.6, "elapsed_time": "2:34:55", "remaining_time": "1:49:26", "throughput": 8696.34, "total_tokens": 80841128} +{"current_steps": 119945, "total_steps": 204665, "loss": 0.0456, "lr": 8.744962042859089e-07, "epoch": 2.9302763051816383, "percentage": 58.61, "elapsed_time": "2:34:56", "remaining_time": "1:49:26", "throughput": 8696.37, "total_tokens": 80844456} +{"current_steps": 119950, "total_steps": 204665, "loss": 0.0452, "lr": 8.744116014209763e-07, "epoch": 2.9303984560134855, "percentage": 58.61, "elapsed_time": "2:34:56", "remaining_time": "1:49:25", "throughput": 8696.41, "total_tokens": 80847848} +{"current_steps": 119955, "total_steps": 204665, "loss": 0.0002, "lr": 8.743269994693565e-07, "epoch": 2.9305206068453327, "percentage": 58.61, "elapsed_time": "2:34:57", "remaining_time": "1:49:25", "throughput": 8696.43, "total_tokens": 80851048} +{"current_steps": 119960, "total_steps": 204665, "loss": 0.0404, "lr": 8.742423984316648e-07, "epoch": 2.93064275767718, "percentage": 58.61, "elapsed_time": "2:34:57", "remaining_time": "1:49:24", "throughput": 8696.43, "total_tokens": 80853992} +{"current_steps": 119965, "total_steps": 204665, "loss": 0.0002, "lr": 8.741577983085161e-07, "epoch": 2.930764908509027, "percentage": 58.62, "elapsed_time": "2:34:57", "remaining_time": "1:49:24", "throughput": 8696.54, "total_tokens": 80858088} +{"current_steps": 119970, "total_steps": 204665, "loss": 0.0668, "lr": 8.740731991005257e-07, "epoch": 2.9308870593408742, "percentage": 58.62, "elapsed_time": "2:34:58", "remaining_time": "1:49:24", "throughput": 8696.58, "total_tokens": 80861480} +{"current_steps": 119975, "total_steps": 204665, "loss": 0.041, "lr": 8.739886008083088e-07, "epoch": 2.9310092101727214, "percentage": 58.62, "elapsed_time": "2:34:58", "remaining_time": "1:49:23", "throughput": 8696.61, "total_tokens": 80864808} +{"current_steps": 119980, "total_steps": 204665, "loss": 0.0002, "lr": 8.739040034324805e-07, "epoch": 2.931131361004568, "percentage": 58.62, "elapsed_time": "2:34:58", "remaining_time": "1:49:23", "throughput": 8696.65, "total_tokens": 80868328} +{"current_steps": 119985, "total_steps": 204665, "loss": 0.0014, "lr": 8.738194069736566e-07, "epoch": 2.931253511836416, "percentage": 58.63, "elapsed_time": "2:34:59", "remaining_time": "1:49:22", "throughput": 8696.68, "total_tokens": 80871656} +{"current_steps": 119990, "total_steps": 204665, "loss": 0.0004, "lr": 8.737348114324516e-07, "epoch": 2.9313756626682625, "percentage": 58.63, "elapsed_time": "2:34:59", "remaining_time": "1:49:22", "throughput": 8696.72, "total_tokens": 80874984} +{"current_steps": 119995, "total_steps": 204665, "loss": 0.0002, "lr": 8.736502168094814e-07, "epoch": 2.93149781350011, "percentage": 58.63, "elapsed_time": "2:34:59", "remaining_time": "1:49:22", "throughput": 8696.71, "total_tokens": 80877864} +{"current_steps": 120000, "total_steps": 204665, "loss": 0.0002, "lr": 8.735656231053603e-07, "epoch": 2.931619964331957, "percentage": 58.63, "elapsed_time": "2:35:00", "remaining_time": "1:49:21", "throughput": 8696.78, "total_tokens": 80881576} +{"current_steps": 120005, "total_steps": 204665, "loss": 0.0001, "lr": 8.734810303207046e-07, "epoch": 2.931742115163804, "percentage": 58.63, "elapsed_time": "2:35:00", "remaining_time": "1:49:21", "throughput": 8696.84, "total_tokens": 80885160} +{"current_steps": 120010, "total_steps": 204665, "loss": 0.0001, "lr": 8.733964384561282e-07, "epoch": 2.9318642659956513, "percentage": 58.64, "elapsed_time": "2:35:00", "remaining_time": "1:49:20", "throughput": 8696.88, "total_tokens": 80888616} +{"current_steps": 120015, "total_steps": 204665, "loss": 0.0528, "lr": 8.733118475122473e-07, "epoch": 2.9319864168274985, "percentage": 58.64, "elapsed_time": "2:35:01", "remaining_time": "1:49:20", "throughput": 8696.94, "total_tokens": 80892200} +{"current_steps": 120020, "total_steps": 204665, "loss": 0.0002, "lr": 8.732272574896769e-07, "epoch": 2.9321085676593457, "percentage": 58.64, "elapsed_time": "2:35:01", "remaining_time": "1:49:20", "throughput": 8696.96, "total_tokens": 80895400} +{"current_steps": 120025, "total_steps": 204665, "loss": 0.0595, "lr": 8.731426683890315e-07, "epoch": 2.932230718491193, "percentage": 58.64, "elapsed_time": "2:35:01", "remaining_time": "1:49:19", "throughput": 8696.96, "total_tokens": 80898344} +{"current_steps": 120030, "total_steps": 204665, "loss": 0.0001, "lr": 8.730580802109273e-07, "epoch": 2.93235286932304, "percentage": 58.65, "elapsed_time": "2:35:02", "remaining_time": "1:49:19", "throughput": 8697.02, "total_tokens": 80901992} +{"current_steps": 120035, "total_steps": 204665, "loss": 0.081, "lr": 8.729734929559785e-07, "epoch": 2.9324750201548873, "percentage": 58.65, "elapsed_time": "2:35:02", "remaining_time": "1:49:18", "throughput": 8697.08, "total_tokens": 80905576} +{"current_steps": 120040, "total_steps": 204665, "loss": 0.0757, "lr": 8.728889066248009e-07, "epoch": 2.9325971709867344, "percentage": 58.65, "elapsed_time": "2:35:02", "remaining_time": "1:49:18", "throughput": 8697.11, "total_tokens": 80908904} +{"current_steps": 120045, "total_steps": 204665, "loss": 0.0523, "lr": 8.72804321218009e-07, "epoch": 2.9327193218185816, "percentage": 58.65, "elapsed_time": "2:35:03", "remaining_time": "1:49:17", "throughput": 8697.13, "total_tokens": 80912040} +{"current_steps": 120050, "total_steps": 204665, "loss": 0.0001, "lr": 8.727197367362182e-07, "epoch": 2.932841472650429, "percentage": 58.66, "elapsed_time": "2:35:03", "remaining_time": "1:49:17", "throughput": 8697.2, "total_tokens": 80915816} +{"current_steps": 120055, "total_steps": 204665, "loss": 0.0552, "lr": 8.726351531800442e-07, "epoch": 2.932963623482276, "percentage": 58.66, "elapsed_time": "2:35:04", "remaining_time": "1:49:17", "throughput": 8697.27, "total_tokens": 80919592} +{"current_steps": 120060, "total_steps": 204665, "loss": 0.0407, "lr": 8.725505705501012e-07, "epoch": 2.933085774314123, "percentage": 58.66, "elapsed_time": "2:35:04", "remaining_time": "1:49:16", "throughput": 8697.33, "total_tokens": 80923112} +{"current_steps": 120065, "total_steps": 204665, "loss": 0.1252, "lr": 8.724659888470052e-07, "epoch": 2.9332079251459704, "percentage": 58.66, "elapsed_time": "2:35:04", "remaining_time": "1:49:16", "throughput": 8697.38, "total_tokens": 80926696} +{"current_steps": 120070, "total_steps": 204665, "loss": 0.1547, "lr": 8.723814080713705e-07, "epoch": 2.9333300759778176, "percentage": 58.67, "elapsed_time": "2:35:05", "remaining_time": "1:49:15", "throughput": 8697.4, "total_tokens": 80929896} +{"current_steps": 120075, "total_steps": 204665, "loss": 0.0726, "lr": 8.722968282238124e-07, "epoch": 2.9334522268096643, "percentage": 58.67, "elapsed_time": "2:35:05", "remaining_time": "1:49:15", "throughput": 8697.47, "total_tokens": 80933608} +{"current_steps": 120080, "total_steps": 204665, "loss": 0.0006, "lr": 8.722122493049465e-07, "epoch": 2.933574377641512, "percentage": 58.67, "elapsed_time": "2:35:05", "remaining_time": "1:49:15", "throughput": 8697.54, "total_tokens": 80937320} +{"current_steps": 120085, "total_steps": 204665, "loss": 0.0139, "lr": 8.721276713153871e-07, "epoch": 2.9336965284733587, "percentage": 58.67, "elapsed_time": "2:35:06", "remaining_time": "1:49:14", "throughput": 8697.54, "total_tokens": 80940264} +{"current_steps": 120090, "total_steps": 204665, "loss": 0.0001, "lr": 8.720430942557502e-07, "epoch": 2.9338186793052063, "percentage": 58.68, "elapsed_time": "2:35:06", "remaining_time": "1:49:14", "throughput": 8697.59, "total_tokens": 80943784} +{"current_steps": 120095, "total_steps": 204665, "loss": 0.0395, "lr": 8.719585181266498e-07, "epoch": 2.933940830137053, "percentage": 58.68, "elapsed_time": "2:35:06", "remaining_time": "1:49:13", "throughput": 8697.59, "total_tokens": 80946728} +{"current_steps": 120100, "total_steps": 204665, "loss": 0.0009, "lr": 8.718739429287018e-07, "epoch": 2.9340629809689003, "percentage": 58.68, "elapsed_time": "2:35:07", "remaining_time": "1:49:13", "throughput": 8697.64, "total_tokens": 80950312} +{"current_steps": 120105, "total_steps": 204665, "loss": 0.0001, "lr": 8.717893686625206e-07, "epoch": 2.9341851318007475, "percentage": 58.68, "elapsed_time": "2:35:07", "remaining_time": "1:49:12", "throughput": 8697.64, "total_tokens": 80953256} +{"current_steps": 120110, "total_steps": 204665, "loss": 0.0003, "lr": 8.717047953287217e-07, "epoch": 2.9343072826325947, "percentage": 58.69, "elapsed_time": "2:35:07", "remaining_time": "1:49:12", "throughput": 8697.65, "total_tokens": 80956392} +{"current_steps": 120115, "total_steps": 204665, "loss": 0.0006, "lr": 8.716202229279204e-07, "epoch": 2.934429433464442, "percentage": 58.69, "elapsed_time": "2:35:08", "remaining_time": "1:49:12", "throughput": 8697.66, "total_tokens": 80959464} +{"current_steps": 120120, "total_steps": 204665, "loss": 0.1412, "lr": 8.715356514607312e-07, "epoch": 2.934551584296289, "percentage": 58.69, "elapsed_time": "2:35:08", "remaining_time": "1:49:11", "throughput": 8697.71, "total_tokens": 80962984} +{"current_steps": 120125, "total_steps": 204665, "loss": 0.0863, "lr": 8.714510809277692e-07, "epoch": 2.9346737351281362, "percentage": 58.69, "elapsed_time": "2:35:08", "remaining_time": "1:49:11", "throughput": 8697.76, "total_tokens": 80966568} +{"current_steps": 120130, "total_steps": 204665, "loss": 0.0521, "lr": 8.713665113296495e-07, "epoch": 2.9347958859599834, "percentage": 58.7, "elapsed_time": "2:35:09", "remaining_time": "1:49:10", "throughput": 8697.79, "total_tokens": 80969960} +{"current_steps": 120135, "total_steps": 204665, "loss": 0.0003, "lr": 8.71281942666987e-07, "epoch": 2.9349180367918306, "percentage": 58.7, "elapsed_time": "2:35:09", "remaining_time": "1:49:10", "throughput": 8697.85, "total_tokens": 80973544} +{"current_steps": 120140, "total_steps": 204665, "loss": 0.0001, "lr": 8.711973749403974e-07, "epoch": 2.935040187623678, "percentage": 58.7, "elapsed_time": "2:35:09", "remaining_time": "1:49:10", "throughput": 8697.89, "total_tokens": 80976936} +{"current_steps": 120145, "total_steps": 204665, "loss": 0.0008, "lr": 8.711128081504945e-07, "epoch": 2.935162338455525, "percentage": 58.7, "elapsed_time": "2:35:10", "remaining_time": "1:49:09", "throughput": 8697.93, "total_tokens": 80980392} +{"current_steps": 120150, "total_steps": 204665, "loss": 0.0004, "lr": 8.710282422978942e-07, "epoch": 2.935284489287372, "percentage": 58.71, "elapsed_time": "2:35:10", "remaining_time": "1:49:09", "throughput": 8697.94, "total_tokens": 80983464} +{"current_steps": 120155, "total_steps": 204665, "loss": 0.0001, "lr": 8.709436773832111e-07, "epoch": 2.9354066401192194, "percentage": 58.71, "elapsed_time": "2:35:11", "remaining_time": "1:49:08", "throughput": 8698.0, "total_tokens": 80987112} +{"current_steps": 120160, "total_steps": 204665, "loss": 0.0004, "lr": 8.708591134070607e-07, "epoch": 2.935528790951066, "percentage": 58.71, "elapsed_time": "2:35:11", "remaining_time": "1:49:08", "throughput": 8698.03, "total_tokens": 80990440} +{"current_steps": 120165, "total_steps": 204665, "loss": 0.0001, "lr": 8.707745503700569e-07, "epoch": 2.9356509417829137, "percentage": 58.71, "elapsed_time": "2:35:11", "remaining_time": "1:49:07", "throughput": 8698.07, "total_tokens": 80993768} +{"current_steps": 120170, "total_steps": 204665, "loss": 0.0545, "lr": 8.706899882728157e-07, "epoch": 2.9357730926147605, "percentage": 58.72, "elapsed_time": "2:35:12", "remaining_time": "1:49:07", "throughput": 8698.09, "total_tokens": 80997032} +{"current_steps": 120175, "total_steps": 204665, "loss": 0.0672, "lr": 8.70605427115952e-07, "epoch": 2.935895243446608, "percentage": 58.72, "elapsed_time": "2:35:12", "remaining_time": "1:49:07", "throughput": 8698.15, "total_tokens": 81000744} +{"current_steps": 120180, "total_steps": 204665, "loss": 0.0002, "lr": 8.705208669000798e-07, "epoch": 2.936017394278455, "percentage": 58.72, "elapsed_time": "2:35:12", "remaining_time": "1:49:06", "throughput": 8698.15, "total_tokens": 81003624} +{"current_steps": 120185, "total_steps": 204665, "loss": 0.0415, "lr": 8.704363076258152e-07, "epoch": 2.936139545110302, "percentage": 58.72, "elapsed_time": "2:35:13", "remaining_time": "1:49:06", "throughput": 8698.2, "total_tokens": 81007144} +{"current_steps": 120190, "total_steps": 204665, "loss": 0.0488, "lr": 8.703517492937721e-07, "epoch": 2.9362616959421493, "percentage": 58.73, "elapsed_time": "2:35:13", "remaining_time": "1:49:05", "throughput": 8698.24, "total_tokens": 81010536} +{"current_steps": 120195, "total_steps": 204665, "loss": 0.0002, "lr": 8.702671919045665e-07, "epoch": 2.9363838467739964, "percentage": 58.73, "elapsed_time": "2:35:13", "remaining_time": "1:49:05", "throughput": 8698.29, "total_tokens": 81014120} +{"current_steps": 120200, "total_steps": 204665, "loss": 0.0001, "lr": 8.701826354588123e-07, "epoch": 2.9365059976058436, "percentage": 58.73, "elapsed_time": "2:35:14", "remaining_time": "1:49:05", "throughput": 8698.3, "total_tokens": 81017128} +{"current_steps": 120205, "total_steps": 204665, "loss": 0.0001, "lr": 8.70098079957125e-07, "epoch": 2.936628148437691, "percentage": 58.73, "elapsed_time": "2:35:14", "remaining_time": "1:49:04", "throughput": 8698.33, "total_tokens": 81020392} +{"current_steps": 120210, "total_steps": 204665, "loss": 0.0936, "lr": 8.700135254001197e-07, "epoch": 2.936750299269538, "percentage": 58.74, "elapsed_time": "2:35:14", "remaining_time": "1:49:04", "throughput": 8698.34, "total_tokens": 81023400} +{"current_steps": 120215, "total_steps": 204665, "loss": 0.0829, "lr": 8.699289717884106e-07, "epoch": 2.936872450101385, "percentage": 58.74, "elapsed_time": "2:35:15", "remaining_time": "1:49:03", "throughput": 8698.38, "total_tokens": 81026920} +{"current_steps": 120220, "total_steps": 204665, "loss": 0.0003, "lr": 8.698444191226134e-07, "epoch": 2.9369946009332324, "percentage": 58.74, "elapsed_time": "2:35:15", "remaining_time": "1:49:03", "throughput": 8698.44, "total_tokens": 81030504} +{"current_steps": 120225, "total_steps": 204665, "loss": 0.0006, "lr": 8.697598674033424e-07, "epoch": 2.9371167517650796, "percentage": 58.74, "elapsed_time": "2:35:15", "remaining_time": "1:49:02", "throughput": 8698.47, "total_tokens": 81033832} +{"current_steps": 120230, "total_steps": 204665, "loss": 0.0007, "lr": 8.696753166312125e-07, "epoch": 2.9372389025969268, "percentage": 58.74, "elapsed_time": "2:35:16", "remaining_time": "1:49:02", "throughput": 8698.51, "total_tokens": 81037160} +{"current_steps": 120235, "total_steps": 204665, "loss": 0.0003, "lr": 8.695907668068392e-07, "epoch": 2.937361053428774, "percentage": 58.75, "elapsed_time": "2:35:16", "remaining_time": "1:49:02", "throughput": 8698.56, "total_tokens": 81040680} +{"current_steps": 120240, "total_steps": 204665, "loss": 0.0676, "lr": 8.695062179308365e-07, "epoch": 2.937483204260621, "percentage": 58.75, "elapsed_time": "2:35:16", "remaining_time": "1:49:01", "throughput": 8698.61, "total_tokens": 81044264} +{"current_steps": 120245, "total_steps": 204665, "loss": 0.0002, "lr": 8.694216700038199e-07, "epoch": 2.9376053550924683, "percentage": 58.75, "elapsed_time": "2:35:17", "remaining_time": "1:49:01", "throughput": 8698.61, "total_tokens": 81047144} +{"current_steps": 120250, "total_steps": 204665, "loss": 0.0415, "lr": 8.693371230264038e-07, "epoch": 2.9377275059243155, "percentage": 58.75, "elapsed_time": "2:35:17", "remaining_time": "1:49:00", "throughput": 8698.67, "total_tokens": 81050792} +{"current_steps": 120255, "total_steps": 204665, "loss": 0.0001, "lr": 8.692525769992037e-07, "epoch": 2.9378496567561623, "percentage": 58.76, "elapsed_time": "2:35:17", "remaining_time": "1:49:00", "throughput": 8698.7, "total_tokens": 81054056} +{"current_steps": 120260, "total_steps": 204665, "loss": 0.0015, "lr": 8.691680319228337e-07, "epoch": 2.93797180758801, "percentage": 58.76, "elapsed_time": "2:35:18", "remaining_time": "1:49:00", "throughput": 8698.76, "total_tokens": 81057704} +{"current_steps": 120265, "total_steps": 204665, "loss": 0.0378, "lr": 8.690834877979087e-07, "epoch": 2.9380939584198567, "percentage": 58.76, "elapsed_time": "2:35:18", "remaining_time": "1:48:59", "throughput": 8698.8, "total_tokens": 81061096} +{"current_steps": 120270, "total_steps": 204665, "loss": 0.0002, "lr": 8.689989446250444e-07, "epoch": 2.938216109251704, "percentage": 58.76, "elapsed_time": "2:35:19", "remaining_time": "1:48:59", "throughput": 8698.82, "total_tokens": 81064360} +{"current_steps": 120275, "total_steps": 204665, "loss": 0.0424, "lr": 8.689144024048549e-07, "epoch": 2.938338260083551, "percentage": 58.77, "elapsed_time": "2:35:19", "remaining_time": "1:48:58", "throughput": 8698.88, "total_tokens": 81067880} +{"current_steps": 120280, "total_steps": 204665, "loss": 0.0002, "lr": 8.688298611379548e-07, "epoch": 2.9384604109153982, "percentage": 58.77, "elapsed_time": "2:35:19", "remaining_time": "1:48:58", "throughput": 8698.93, "total_tokens": 81071400} +{"current_steps": 120285, "total_steps": 204665, "loss": 0.0571, "lr": 8.687453208249594e-07, "epoch": 2.9385825617472454, "percentage": 58.77, "elapsed_time": "2:35:20", "remaining_time": "1:48:58", "throughput": 8699.0, "total_tokens": 81075240} +{"current_steps": 120290, "total_steps": 204665, "loss": 0.0005, "lr": 8.686607814664836e-07, "epoch": 2.9387047125790926, "percentage": 58.77, "elapsed_time": "2:35:20", "remaining_time": "1:48:57", "throughput": 8699.07, "total_tokens": 81079016} +{"current_steps": 120295, "total_steps": 204665, "loss": 0.0893, "lr": 8.685762430631415e-07, "epoch": 2.93882686341094, "percentage": 58.78, "elapsed_time": "2:35:20", "remaining_time": "1:48:57", "throughput": 8699.13, "total_tokens": 81082600} +{"current_steps": 120300, "total_steps": 204665, "loss": 0.0513, "lr": 8.684917056155482e-07, "epoch": 2.938949014242787, "percentage": 58.78, "elapsed_time": "2:35:21", "remaining_time": "1:48:56", "throughput": 8699.22, "total_tokens": 81086568} +{"current_steps": 120305, "total_steps": 204665, "loss": 0.0001, "lr": 8.684071691243191e-07, "epoch": 2.939071165074634, "percentage": 58.78, "elapsed_time": "2:35:21", "remaining_time": "1:48:56", "throughput": 8699.31, "total_tokens": 81090600} +{"current_steps": 120310, "total_steps": 204665, "loss": 0.0467, "lr": 8.68322633590068e-07, "epoch": 2.9391933159064814, "percentage": 58.78, "elapsed_time": "2:35:21", "remaining_time": "1:48:55", "throughput": 8699.35, "total_tokens": 81093928} +{"current_steps": 120315, "total_steps": 204665, "loss": 0.0514, "lr": 8.682380990134106e-07, "epoch": 2.9393154667383286, "percentage": 58.79, "elapsed_time": "2:35:22", "remaining_time": "1:48:55", "throughput": 8699.37, "total_tokens": 81097192} +{"current_steps": 120320, "total_steps": 204665, "loss": 0.0329, "lr": 8.681535653949607e-07, "epoch": 2.9394376175701757, "percentage": 58.79, "elapsed_time": "2:35:22", "remaining_time": "1:48:55", "throughput": 8699.44, "total_tokens": 81100840} +{"current_steps": 120325, "total_steps": 204665, "loss": 0.043, "lr": 8.680690327353338e-07, "epoch": 2.939559768402023, "percentage": 58.79, "elapsed_time": "2:35:22", "remaining_time": "1:48:54", "throughput": 8699.46, "total_tokens": 81104104} +{"current_steps": 120330, "total_steps": 204665, "loss": 0.0461, "lr": 8.679845010351446e-07, "epoch": 2.93968191923387, "percentage": 58.79, "elapsed_time": "2:35:23", "remaining_time": "1:48:54", "throughput": 8699.49, "total_tokens": 81107368} +{"current_steps": 120335, "total_steps": 204665, "loss": 0.0743, "lr": 8.678999702950069e-07, "epoch": 2.9398040700657173, "percentage": 58.8, "elapsed_time": "2:35:23", "remaining_time": "1:48:53", "throughput": 8699.51, "total_tokens": 81110504} +{"current_steps": 120340, "total_steps": 204665, "loss": 0.0001, "lr": 8.678154405155369e-07, "epoch": 2.939926220897564, "percentage": 58.8, "elapsed_time": "2:35:23", "remaining_time": "1:48:53", "throughput": 8699.56, "total_tokens": 81114024} +{"current_steps": 120345, "total_steps": 204665, "loss": 0.0901, "lr": 8.677309116973481e-07, "epoch": 2.9400483717294117, "percentage": 58.8, "elapsed_time": "2:35:24", "remaining_time": "1:48:53", "throughput": 8699.59, "total_tokens": 81117352} +{"current_steps": 120350, "total_steps": 204665, "loss": 0.0569, "lr": 8.67646383841056e-07, "epoch": 2.9401705225612584, "percentage": 58.8, "elapsed_time": "2:35:24", "remaining_time": "1:48:52", "throughput": 8699.62, "total_tokens": 81120616} +{"current_steps": 120355, "total_steps": 204665, "loss": 0.0745, "lr": 8.675618569472747e-07, "epoch": 2.940292673393106, "percentage": 58.81, "elapsed_time": "2:35:24", "remaining_time": "1:48:52", "throughput": 8699.62, "total_tokens": 81123560} +{"current_steps": 120360, "total_steps": 204665, "loss": 0.049, "lr": 8.674773310166191e-07, "epoch": 2.940414824224953, "percentage": 58.81, "elapsed_time": "2:35:25", "remaining_time": "1:48:51", "throughput": 8699.67, "total_tokens": 81127080} +{"current_steps": 120365, "total_steps": 204665, "loss": 0.0007, "lr": 8.673928060497045e-07, "epoch": 2.9405369750568, "percentage": 58.81, "elapsed_time": "2:35:25", "remaining_time": "1:48:51", "throughput": 8699.75, "total_tokens": 81130920} +{"current_steps": 120370, "total_steps": 204665, "loss": 0.0001, "lr": 8.673082820471447e-07, "epoch": 2.940659125888647, "percentage": 58.81, "elapsed_time": "2:35:26", "remaining_time": "1:48:50", "throughput": 8699.76, "total_tokens": 81133992} +{"current_steps": 120375, "total_steps": 204665, "loss": 0.0162, "lr": 8.67223759009555e-07, "epoch": 2.9407812767204944, "percentage": 58.82, "elapsed_time": "2:35:26", "remaining_time": "1:48:50", "throughput": 8699.8, "total_tokens": 81137448} +{"current_steps": 120380, "total_steps": 204665, "loss": 0.0002, "lr": 8.671392369375498e-07, "epoch": 2.9409034275523416, "percentage": 58.82, "elapsed_time": "2:35:26", "remaining_time": "1:48:50", "throughput": 8699.84, "total_tokens": 81140904} +{"current_steps": 120385, "total_steps": 204665, "loss": 0.0001, "lr": 8.670547158317434e-07, "epoch": 2.9410255783841888, "percentage": 58.82, "elapsed_time": "2:35:27", "remaining_time": "1:48:49", "throughput": 8699.9, "total_tokens": 81144552} +{"current_steps": 120390, "total_steps": 204665, "loss": 0.0562, "lr": 8.669701956927515e-07, "epoch": 2.941147729216036, "percentage": 58.82, "elapsed_time": "2:35:27", "remaining_time": "1:48:49", "throughput": 8699.95, "total_tokens": 81148072} +{"current_steps": 120395, "total_steps": 204665, "loss": 0.1314, "lr": 8.668856765211876e-07, "epoch": 2.941269880047883, "percentage": 58.83, "elapsed_time": "2:35:27", "remaining_time": "1:48:48", "throughput": 8699.97, "total_tokens": 81151208} +{"current_steps": 120400, "total_steps": 204665, "loss": 0.0001, "lr": 8.668011583176673e-07, "epoch": 2.9413920308797303, "percentage": 58.83, "elapsed_time": "2:35:28", "remaining_time": "1:48:48", "throughput": 8700.02, "total_tokens": 81154792} +{"current_steps": 120405, "total_steps": 204665, "loss": 0.0786, "lr": 8.667166410828044e-07, "epoch": 2.9415141817115775, "percentage": 58.83, "elapsed_time": "2:35:28", "remaining_time": "1:48:48", "throughput": 8700.05, "total_tokens": 81158056} +{"current_steps": 120410, "total_steps": 204665, "loss": 0.0003, "lr": 8.666321248172143e-07, "epoch": 2.9416363325434247, "percentage": 58.83, "elapsed_time": "2:35:28", "remaining_time": "1:48:47", "throughput": 8700.12, "total_tokens": 81161768} +{"current_steps": 120415, "total_steps": 204665, "loss": 0.0005, "lr": 8.665476095215109e-07, "epoch": 2.941758483375272, "percentage": 58.84, "elapsed_time": "2:35:29", "remaining_time": "1:48:47", "throughput": 8700.17, "total_tokens": 81165288} +{"current_steps": 120420, "total_steps": 204665, "loss": 0.0666, "lr": 8.664630951963091e-07, "epoch": 2.941880634207119, "percentage": 58.84, "elapsed_time": "2:35:29", "remaining_time": "1:48:46", "throughput": 8700.21, "total_tokens": 81168680} +{"current_steps": 120425, "total_steps": 204665, "loss": 0.0008, "lr": 8.66378581842224e-07, "epoch": 2.942002785038966, "percentage": 58.84, "elapsed_time": "2:35:29", "remaining_time": "1:48:46", "throughput": 8700.29, "total_tokens": 81172584} +{"current_steps": 120430, "total_steps": 204665, "loss": 0.0002, "lr": 8.662940694598697e-07, "epoch": 2.9421249358708135, "percentage": 58.84, "elapsed_time": "2:35:30", "remaining_time": "1:48:46", "throughput": 8700.32, "total_tokens": 81175784} +{"current_steps": 120435, "total_steps": 204665, "loss": 0.0002, "lr": 8.662095580498607e-07, "epoch": 2.94224708670266, "percentage": 58.84, "elapsed_time": "2:35:30", "remaining_time": "1:48:45", "throughput": 8700.38, "total_tokens": 81179496} +{"current_steps": 120440, "total_steps": 204665, "loss": 0.0003, "lr": 8.66125047612812e-07, "epoch": 2.942369237534508, "percentage": 58.85, "elapsed_time": "2:35:30", "remaining_time": "1:48:45", "throughput": 8700.42, "total_tokens": 81182888} +{"current_steps": 120445, "total_steps": 204665, "loss": 0.0444, "lr": 8.660405381493381e-07, "epoch": 2.9424913883663546, "percentage": 58.85, "elapsed_time": "2:35:31", "remaining_time": "1:48:44", "throughput": 8700.48, "total_tokens": 81186472} +{"current_steps": 120450, "total_steps": 204665, "loss": 0.0001, "lr": 8.65956029660053e-07, "epoch": 2.942613539198202, "percentage": 58.85, "elapsed_time": "2:35:31", "remaining_time": "1:48:44", "throughput": 8700.52, "total_tokens": 81189928} +{"current_steps": 120455, "total_steps": 204665, "loss": 0.0373, "lr": 8.658715221455717e-07, "epoch": 2.942735690030049, "percentage": 58.85, "elapsed_time": "2:35:31", "remaining_time": "1:48:43", "throughput": 8700.53, "total_tokens": 81193000} +{"current_steps": 120460, "total_steps": 204665, "loss": 0.0634, "lr": 8.657870156065091e-07, "epoch": 2.942857840861896, "percentage": 58.86, "elapsed_time": "2:35:32", "remaining_time": "1:48:43", "throughput": 8700.55, "total_tokens": 81196136} +{"current_steps": 120465, "total_steps": 204665, "loss": 0.0003, "lr": 8.657025100434792e-07, "epoch": 2.9429799916937434, "percentage": 58.86, "elapsed_time": "2:35:32", "remaining_time": "1:48:43", "throughput": 8700.56, "total_tokens": 81199144} +{"current_steps": 120470, "total_steps": 204665, "loss": 0.0535, "lr": 8.65618005457097e-07, "epoch": 2.9431021425255905, "percentage": 58.86, "elapsed_time": "2:35:32", "remaining_time": "1:48:42", "throughput": 8700.61, "total_tokens": 81202728} +{"current_steps": 120475, "total_steps": 204665, "loss": 0.0379, "lr": 8.655335018479764e-07, "epoch": 2.9432242933574377, "percentage": 58.86, "elapsed_time": "2:35:33", "remaining_time": "1:48:42", "throughput": 8700.63, "total_tokens": 81205864} +{"current_steps": 120480, "total_steps": 204665, "loss": 0.0515, "lr": 8.654489992167326e-07, "epoch": 2.943346444189285, "percentage": 58.87, "elapsed_time": "2:35:33", "remaining_time": "1:48:41", "throughput": 8700.66, "total_tokens": 81209192} +{"current_steps": 120485, "total_steps": 204665, "loss": 0.0004, "lr": 8.653644975639802e-07, "epoch": 2.943468595021132, "percentage": 58.87, "elapsed_time": "2:35:34", "remaining_time": "1:48:41", "throughput": 8700.73, "total_tokens": 81212904} +{"current_steps": 120490, "total_steps": 204665, "loss": 0.0001, "lr": 8.652799968903328e-07, "epoch": 2.9435907458529793, "percentage": 58.87, "elapsed_time": "2:35:34", "remaining_time": "1:48:41", "throughput": 8700.77, "total_tokens": 81216360} +{"current_steps": 120495, "total_steps": 204665, "loss": 0.0606, "lr": 8.65195497196406e-07, "epoch": 2.9437128966848265, "percentage": 58.87, "elapsed_time": "2:35:34", "remaining_time": "1:48:40", "throughput": 8700.74, "total_tokens": 81218984} +{"current_steps": 120500, "total_steps": 204665, "loss": 0.0446, "lr": 8.651109984828133e-07, "epoch": 2.9438350475166737, "percentage": 58.88, "elapsed_time": "2:35:35", "remaining_time": "1:48:40", "throughput": 8700.77, "total_tokens": 81222184} +{"current_steps": 120505, "total_steps": 204665, "loss": 0.0004, "lr": 8.650265007501702e-07, "epoch": 2.943957198348521, "percentage": 58.88, "elapsed_time": "2:35:35", "remaining_time": "1:48:39", "throughput": 8700.77, "total_tokens": 81225192} +{"current_steps": 120510, "total_steps": 204665, "loss": 0.0001, "lr": 8.649420039990904e-07, "epoch": 2.944079349180368, "percentage": 58.88, "elapsed_time": "2:35:35", "remaining_time": "1:48:39", "throughput": 8700.86, "total_tokens": 81229224} +{"current_steps": 120515, "total_steps": 204665, "loss": 0.0379, "lr": 8.648575082301884e-07, "epoch": 2.9442015000122153, "percentage": 58.88, "elapsed_time": "2:35:36", "remaining_time": "1:48:38", "throughput": 8700.9, "total_tokens": 81232552} +{"current_steps": 120520, "total_steps": 204665, "loss": 0.0001, "lr": 8.647730134440796e-07, "epoch": 2.944323650844062, "percentage": 58.89, "elapsed_time": "2:35:36", "remaining_time": "1:48:38", "throughput": 8700.96, "total_tokens": 81236264} +{"current_steps": 120525, "total_steps": 204665, "loss": 0.0005, "lr": 8.646885196413772e-07, "epoch": 2.9444458016759096, "percentage": 58.89, "elapsed_time": "2:35:36", "remaining_time": "1:48:38", "throughput": 8701.01, "total_tokens": 81239720} +{"current_steps": 120530, "total_steps": 204665, "loss": 0.0001, "lr": 8.646040268226969e-07, "epoch": 2.9445679525077564, "percentage": 58.89, "elapsed_time": "2:35:37", "remaining_time": "1:48:37", "throughput": 8701.14, "total_tokens": 81244200} +{"current_steps": 120535, "total_steps": 204665, "loss": 0.0627, "lr": 8.645195349886522e-07, "epoch": 2.944690103339604, "percentage": 58.89, "elapsed_time": "2:35:37", "remaining_time": "1:48:37", "throughput": 8701.16, "total_tokens": 81247336} +{"current_steps": 120540, "total_steps": 204665, "loss": 0.0279, "lr": 8.644350441398575e-07, "epoch": 2.9448122541714508, "percentage": 58.9, "elapsed_time": "2:35:37", "remaining_time": "1:48:36", "throughput": 8701.2, "total_tokens": 81250792} +{"current_steps": 120545, "total_steps": 204665, "loss": 0.0466, "lr": 8.643505542769283e-07, "epoch": 2.944934405003298, "percentage": 58.9, "elapsed_time": "2:35:38", "remaining_time": "1:48:36", "throughput": 8701.23, "total_tokens": 81253992} +{"current_steps": 120550, "total_steps": 204665, "loss": 0.0002, "lr": 8.642660654004779e-07, "epoch": 2.945056555835145, "percentage": 58.9, "elapsed_time": "2:35:38", "remaining_time": "1:48:36", "throughput": 8701.27, "total_tokens": 81257448} +{"current_steps": 120555, "total_steps": 204665, "loss": 0.0001, "lr": 8.641815775111216e-07, "epoch": 2.9451787066669923, "percentage": 58.9, "elapsed_time": "2:35:38", "remaining_time": "1:48:35", "throughput": 8701.29, "total_tokens": 81260584} +{"current_steps": 120560, "total_steps": 204665, "loss": 0.0765, "lr": 8.640970906094729e-07, "epoch": 2.9453008574988395, "percentage": 58.91, "elapsed_time": "2:35:39", "remaining_time": "1:48:35", "throughput": 8701.31, "total_tokens": 81263848} +{"current_steps": 120565, "total_steps": 204665, "loss": 0.0003, "lr": 8.640126046961473e-07, "epoch": 2.9454230083306867, "percentage": 58.91, "elapsed_time": "2:35:39", "remaining_time": "1:48:34", "throughput": 8701.39, "total_tokens": 81267688} +{"current_steps": 120570, "total_steps": 204665, "loss": 0.0001, "lr": 8.639281197717579e-07, "epoch": 2.945545159162534, "percentage": 58.91, "elapsed_time": "2:35:39", "remaining_time": "1:48:34", "throughput": 8701.42, "total_tokens": 81271016} +{"current_steps": 120575, "total_steps": 204665, "loss": 0.0001, "lr": 8.6384363583692e-07, "epoch": 2.945667309994381, "percentage": 58.91, "elapsed_time": "2:35:40", "remaining_time": "1:48:34", "throughput": 8701.46, "total_tokens": 81274408} +{"current_steps": 120580, "total_steps": 204665, "loss": 0.0, "lr": 8.637591528922482e-07, "epoch": 2.9457894608262283, "percentage": 58.92, "elapsed_time": "2:35:40", "remaining_time": "1:48:33", "throughput": 8701.48, "total_tokens": 81277544} +{"current_steps": 120585, "total_steps": 204665, "loss": 0.0515, "lr": 8.636746709383563e-07, "epoch": 2.9459116116580755, "percentage": 58.92, "elapsed_time": "2:35:41", "remaining_time": "1:48:33", "throughput": 8701.52, "total_tokens": 81281000} +{"current_steps": 120590, "total_steps": 204665, "loss": 0.0343, "lr": 8.635901899758589e-07, "epoch": 2.9460337624899227, "percentage": 58.92, "elapsed_time": "2:35:41", "remaining_time": "1:48:32", "throughput": 8701.55, "total_tokens": 81284264} +{"current_steps": 120595, "total_steps": 204665, "loss": 0.0548, "lr": 8.635057100053702e-07, "epoch": 2.94615591332177, "percentage": 58.92, "elapsed_time": "2:35:41", "remaining_time": "1:48:32", "throughput": 8701.58, "total_tokens": 81287528} +{"current_steps": 120600, "total_steps": 204665, "loss": 0.0001, "lr": 8.63421231027505e-07, "epoch": 2.946278064153617, "percentage": 58.93, "elapsed_time": "2:35:42", "remaining_time": "1:48:31", "throughput": 8701.6, "total_tokens": 81290792} +{"current_steps": 120605, "total_steps": 204665, "loss": 0.0002, "lr": 8.633367530428769e-07, "epoch": 2.946400214985464, "percentage": 58.93, "elapsed_time": "2:35:42", "remaining_time": "1:48:31", "throughput": 8701.63, "total_tokens": 81294056} +{"current_steps": 120610, "total_steps": 204665, "loss": 0.0001, "lr": 8.632522760521006e-07, "epoch": 2.9465223658173114, "percentage": 58.93, "elapsed_time": "2:35:42", "remaining_time": "1:48:31", "throughput": 8701.69, "total_tokens": 81297704} +{"current_steps": 120615, "total_steps": 204665, "loss": 0.0008, "lr": 8.63167800055791e-07, "epoch": 2.946644516649158, "percentage": 58.93, "elapsed_time": "2:35:43", "remaining_time": "1:48:30", "throughput": 8701.71, "total_tokens": 81300904} +{"current_steps": 120620, "total_steps": 204665, "loss": 0.059, "lr": 8.630833250545616e-07, "epoch": 2.946766667481006, "percentage": 58.94, "elapsed_time": "2:35:43", "remaining_time": "1:48:30", "throughput": 8701.76, "total_tokens": 81304360} +{"current_steps": 120625, "total_steps": 204665, "loss": 0.0009, "lr": 8.629988510490274e-07, "epoch": 2.9468888183128525, "percentage": 58.94, "elapsed_time": "2:35:43", "remaining_time": "1:48:29", "throughput": 8701.81, "total_tokens": 81307880} +{"current_steps": 120630, "total_steps": 204665, "loss": 0.0728, "lr": 8.629143780398022e-07, "epoch": 2.9470109691446997, "percentage": 58.94, "elapsed_time": "2:35:44", "remaining_time": "1:48:29", "throughput": 8701.83, "total_tokens": 81311144} +{"current_steps": 120635, "total_steps": 204665, "loss": 0.0006, "lr": 8.628299060275006e-07, "epoch": 2.947133119976547, "percentage": 58.94, "elapsed_time": "2:35:44", "remaining_time": "1:48:29", "throughput": 8701.86, "total_tokens": 81314408} +{"current_steps": 120640, "total_steps": 204665, "loss": 0.0001, "lr": 8.62745435012737e-07, "epoch": 2.947255270808394, "percentage": 58.95, "elapsed_time": "2:35:44", "remaining_time": "1:48:28", "throughput": 8701.89, "total_tokens": 81317736} +{"current_steps": 120645, "total_steps": 204665, "loss": 0.0788, "lr": 8.62660964996125e-07, "epoch": 2.9473774216402413, "percentage": 58.95, "elapsed_time": "2:35:45", "remaining_time": "1:48:28", "throughput": 8701.93, "total_tokens": 81321064} +{"current_steps": 120650, "total_steps": 204665, "loss": 0.0423, "lr": 8.625764959782799e-07, "epoch": 2.9474995724720885, "percentage": 58.95, "elapsed_time": "2:35:45", "remaining_time": "1:48:27", "throughput": 8701.94, "total_tokens": 81324200} +{"current_steps": 120655, "total_steps": 204665, "loss": 0.1261, "lr": 8.624920279598152e-07, "epoch": 2.9476217233039357, "percentage": 58.95, "elapsed_time": "2:35:45", "remaining_time": "1:48:27", "throughput": 8701.99, "total_tokens": 81327784} +{"current_steps": 120660, "total_steps": 204665, "loss": 0.0719, "lr": 8.624075609413457e-07, "epoch": 2.947743874135783, "percentage": 58.95, "elapsed_time": "2:35:46", "remaining_time": "1:48:26", "throughput": 8702.01, "total_tokens": 81330984} +{"current_steps": 120665, "total_steps": 204665, "loss": 0.0002, "lr": 8.623230949234851e-07, "epoch": 2.94786602496763, "percentage": 58.96, "elapsed_time": "2:35:46", "remaining_time": "1:48:26", "throughput": 8702.04, "total_tokens": 81334184} +{"current_steps": 120670, "total_steps": 204665, "loss": 0.1365, "lr": 8.62238629906848e-07, "epoch": 2.9479881757994773, "percentage": 58.96, "elapsed_time": "2:35:46", "remaining_time": "1:48:26", "throughput": 8702.05, "total_tokens": 81337256} +{"current_steps": 120675, "total_steps": 204665, "loss": 0.0002, "lr": 8.62154165892049e-07, "epoch": 2.9481103266313244, "percentage": 58.96, "elapsed_time": "2:35:47", "remaining_time": "1:48:25", "throughput": 8702.07, "total_tokens": 81340456} +{"current_steps": 120680, "total_steps": 204665, "loss": 0.0003, "lr": 8.620697028797016e-07, "epoch": 2.9482324774631716, "percentage": 58.96, "elapsed_time": "2:35:47", "remaining_time": "1:48:25", "throughput": 8702.1, "total_tokens": 81343720} +{"current_steps": 120685, "total_steps": 204665, "loss": 0.0586, "lr": 8.619852408704208e-07, "epoch": 2.948354628295019, "percentage": 58.97, "elapsed_time": "2:35:47", "remaining_time": "1:48:24", "throughput": 8702.21, "total_tokens": 81348008} +{"current_steps": 120690, "total_steps": 204665, "loss": 0.0004, "lr": 8.619007798648202e-07, "epoch": 2.948476779126866, "percentage": 58.97, "elapsed_time": "2:35:48", "remaining_time": "1:48:24", "throughput": 8702.26, "total_tokens": 81351592} +{"current_steps": 120695, "total_steps": 204665, "loss": 0.0006, "lr": 8.618163198635142e-07, "epoch": 2.948598929958713, "percentage": 58.97, "elapsed_time": "2:35:48", "remaining_time": "1:48:24", "throughput": 8702.31, "total_tokens": 81355048} +{"current_steps": 120700, "total_steps": 204665, "loss": 0.0334, "lr": 8.617318608671174e-07, "epoch": 2.94872108079056, "percentage": 58.97, "elapsed_time": "2:35:49", "remaining_time": "1:48:23", "throughput": 8702.34, "total_tokens": 81358440} +{"current_steps": 120705, "total_steps": 204665, "loss": 0.0654, "lr": 8.616474028762432e-07, "epoch": 2.9488432316224076, "percentage": 58.98, "elapsed_time": "2:35:49", "remaining_time": "1:48:23", "throughput": 8702.37, "total_tokens": 81361704} +{"current_steps": 120710, "total_steps": 204665, "loss": 0.0003, "lr": 8.615629458915069e-07, "epoch": 2.9489653824542543, "percentage": 58.98, "elapsed_time": "2:35:49", "remaining_time": "1:48:22", "throughput": 8702.42, "total_tokens": 81365224} +{"current_steps": 120715, "total_steps": 204665, "loss": 0.0775, "lr": 8.614784899135216e-07, "epoch": 2.9490875332861015, "percentage": 58.98, "elapsed_time": "2:35:50", "remaining_time": "1:48:22", "throughput": 8702.46, "total_tokens": 81368680} +{"current_steps": 120720, "total_steps": 204665, "loss": 0.0002, "lr": 8.613940349429024e-07, "epoch": 2.9492096841179487, "percentage": 58.98, "elapsed_time": "2:35:50", "remaining_time": "1:48:21", "throughput": 8702.46, "total_tokens": 81371560} +{"current_steps": 120725, "total_steps": 204665, "loss": 0.0003, "lr": 8.613095809802626e-07, "epoch": 2.949331834949796, "percentage": 58.99, "elapsed_time": "2:35:50", "remaining_time": "1:48:21", "throughput": 8702.5, "total_tokens": 81375016} +{"current_steps": 120730, "total_steps": 204665, "loss": 0.0294, "lr": 8.612251280262168e-07, "epoch": 2.949453985781643, "percentage": 58.99, "elapsed_time": "2:35:51", "remaining_time": "1:48:21", "throughput": 8702.56, "total_tokens": 81378664} +{"current_steps": 120735, "total_steps": 204665, "loss": 0.1169, "lr": 8.611406760813797e-07, "epoch": 2.9495761366134903, "percentage": 58.99, "elapsed_time": "2:35:51", "remaining_time": "1:48:20", "throughput": 8702.55, "total_tokens": 81381544} +{"current_steps": 120740, "total_steps": 204665, "loss": 0.0003, "lr": 8.610562251463648e-07, "epoch": 2.9496982874453375, "percentage": 58.99, "elapsed_time": "2:35:51", "remaining_time": "1:48:20", "throughput": 8702.59, "total_tokens": 81384936} +{"current_steps": 120745, "total_steps": 204665, "loss": 0.1131, "lr": 8.609717752217864e-07, "epoch": 2.9498204382771847, "percentage": 59.0, "elapsed_time": "2:35:52", "remaining_time": "1:48:19", "throughput": 8702.64, "total_tokens": 81388392} +{"current_steps": 120750, "total_steps": 204665, "loss": 0.058, "lr": 8.608873263082584e-07, "epoch": 2.949942589109032, "percentage": 59.0, "elapsed_time": "2:35:52", "remaining_time": "1:48:19", "throughput": 8702.66, "total_tokens": 81391656} +{"current_steps": 120755, "total_steps": 204665, "loss": 0.0001, "lr": 8.608028784063957e-07, "epoch": 2.950064739940879, "percentage": 59.0, "elapsed_time": "2:35:52", "remaining_time": "1:48:19", "throughput": 8702.68, "total_tokens": 81394792} +{"current_steps": 120760, "total_steps": 204665, "loss": 0.1291, "lr": 8.607184315168112e-07, "epoch": 2.9501868907727262, "percentage": 59.0, "elapsed_time": "2:35:53", "remaining_time": "1:48:18", "throughput": 8702.69, "total_tokens": 81397928} +{"current_steps": 120765, "total_steps": 204665, "loss": 0.0884, "lr": 8.6063398564012e-07, "epoch": 2.9503090416045734, "percentage": 59.01, "elapsed_time": "2:35:53", "remaining_time": "1:48:18", "throughput": 8702.74, "total_tokens": 81401512} +{"current_steps": 120770, "total_steps": 204665, "loss": 0.0323, "lr": 8.605495407769362e-07, "epoch": 2.9504311924364206, "percentage": 59.01, "elapsed_time": "2:35:53", "remaining_time": "1:48:17", "throughput": 8702.75, "total_tokens": 81404520} +{"current_steps": 120775, "total_steps": 204665, "loss": 0.0005, "lr": 8.604650969278733e-07, "epoch": 2.950553343268268, "percentage": 59.01, "elapsed_time": "2:35:54", "remaining_time": "1:48:17", "throughput": 8702.8, "total_tokens": 81408104} +{"current_steps": 120780, "total_steps": 204665, "loss": 0.0481, "lr": 8.603806540935461e-07, "epoch": 2.950675494100115, "percentage": 59.01, "elapsed_time": "2:35:54", "remaining_time": "1:48:17", "throughput": 8702.87, "total_tokens": 81411880} +{"current_steps": 120785, "total_steps": 204665, "loss": 0.0005, "lr": 8.602962122745679e-07, "epoch": 2.9507976449319617, "percentage": 59.02, "elapsed_time": "2:35:54", "remaining_time": "1:48:16", "throughput": 8702.93, "total_tokens": 81415528} +{"current_steps": 120790, "total_steps": 204665, "loss": 0.0011, "lr": 8.602117714715536e-07, "epoch": 2.9509197957638094, "percentage": 59.02, "elapsed_time": "2:35:55", "remaining_time": "1:48:16", "throughput": 8702.97, "total_tokens": 81418984} +{"current_steps": 120795, "total_steps": 204665, "loss": 0.0006, "lr": 8.601273316851168e-07, "epoch": 2.951041946595656, "percentage": 59.02, "elapsed_time": "2:35:55", "remaining_time": "1:48:15", "throughput": 8703.0, "total_tokens": 81422248} +{"current_steps": 120800, "total_steps": 204665, "loss": 0.0004, "lr": 8.600428929158715e-07, "epoch": 2.9511640974275037, "percentage": 59.02, "elapsed_time": "2:35:55", "remaining_time": "1:48:15", "throughput": 8703.01, "total_tokens": 81425320} +{"current_steps": 120805, "total_steps": 204665, "loss": 0.1369, "lr": 8.599584551644324e-07, "epoch": 2.9512862482593505, "percentage": 59.03, "elapsed_time": "2:35:56", "remaining_time": "1:48:14", "throughput": 8703.05, "total_tokens": 81428712} +{"current_steps": 120810, "total_steps": 204665, "loss": 0.0005, "lr": 8.598740184314124e-07, "epoch": 2.9514083990911977, "percentage": 59.03, "elapsed_time": "2:35:56", "remaining_time": "1:48:14", "throughput": 8703.06, "total_tokens": 81431784} +{"current_steps": 120815, "total_steps": 204665, "loss": 0.0376, "lr": 8.597895827174269e-07, "epoch": 2.951530549923045, "percentage": 59.03, "elapsed_time": "2:35:57", "remaining_time": "1:48:14", "throughput": 8703.09, "total_tokens": 81435176} +{"current_steps": 120820, "total_steps": 204665, "loss": 0.0005, "lr": 8.597051480230886e-07, "epoch": 2.951652700754892, "percentage": 59.03, "elapsed_time": "2:35:57", "remaining_time": "1:48:13", "throughput": 8703.15, "total_tokens": 81438760} +{"current_steps": 120825, "total_steps": 204665, "loss": 0.0537, "lr": 8.596207143490123e-07, "epoch": 2.9517748515867392, "percentage": 59.04, "elapsed_time": "2:35:57", "remaining_time": "1:48:13", "throughput": 8703.17, "total_tokens": 81441960} +{"current_steps": 120830, "total_steps": 204665, "loss": 0.001, "lr": 8.595362816958124e-07, "epoch": 2.9518970024185864, "percentage": 59.04, "elapsed_time": "2:35:58", "remaining_time": "1:48:12", "throughput": 8703.2, "total_tokens": 81445288} +{"current_steps": 120835, "total_steps": 204665, "loss": 0.0002, "lr": 8.594518500641019e-07, "epoch": 2.9520191532504336, "percentage": 59.04, "elapsed_time": "2:35:58", "remaining_time": "1:48:12", "throughput": 8703.22, "total_tokens": 81448424} +{"current_steps": 120840, "total_steps": 204665, "loss": 0.0427, "lr": 8.59367419454496e-07, "epoch": 2.952141304082281, "percentage": 59.04, "elapsed_time": "2:35:58", "remaining_time": "1:48:12", "throughput": 8703.22, "total_tokens": 81451368} +{"current_steps": 120845, "total_steps": 204665, "loss": 0.0004, "lr": 8.592829898676076e-07, "epoch": 2.952263454914128, "percentage": 59.05, "elapsed_time": "2:35:59", "remaining_time": "1:48:11", "throughput": 8703.25, "total_tokens": 81454760} +{"current_steps": 120850, "total_steps": 204665, "loss": 0.1223, "lr": 8.591985613040511e-07, "epoch": 2.952385605745975, "percentage": 59.05, "elapsed_time": "2:35:59", "remaining_time": "1:48:11", "throughput": 8703.27, "total_tokens": 81457960} +{"current_steps": 120855, "total_steps": 204665, "loss": 0.0372, "lr": 8.591141337644409e-07, "epoch": 2.9525077565778224, "percentage": 59.05, "elapsed_time": "2:35:59", "remaining_time": "1:48:10", "throughput": 8703.31, "total_tokens": 81461352} +{"current_steps": 120860, "total_steps": 204665, "loss": 0.0711, "lr": 8.590297072493901e-07, "epoch": 2.9526299074096696, "percentage": 59.05, "elapsed_time": "2:36:00", "remaining_time": "1:48:10", "throughput": 8703.33, "total_tokens": 81464552} +{"current_steps": 120865, "total_steps": 204665, "loss": 0.0007, "lr": 8.589452817595138e-07, "epoch": 2.9527520582415168, "percentage": 59.06, "elapsed_time": "2:36:00", "remaining_time": "1:48:09", "throughput": 8703.35, "total_tokens": 81467752} +{"current_steps": 120870, "total_steps": 204665, "loss": 0.0001, "lr": 8.588608572954248e-07, "epoch": 2.952874209073364, "percentage": 59.06, "elapsed_time": "2:36:00", "remaining_time": "1:48:09", "throughput": 8703.39, "total_tokens": 81471208} +{"current_steps": 120875, "total_steps": 204665, "loss": 0.0384, "lr": 8.587764338577381e-07, "epoch": 2.952996359905211, "percentage": 59.06, "elapsed_time": "2:36:01", "remaining_time": "1:48:09", "throughput": 8703.43, "total_tokens": 81474664} +{"current_steps": 120880, "total_steps": 204665, "loss": 0.0005, "lr": 8.586920114470666e-07, "epoch": 2.953118510737058, "percentage": 59.06, "elapsed_time": "2:36:01", "remaining_time": "1:48:08", "throughput": 8703.53, "total_tokens": 81478696} +{"current_steps": 120885, "total_steps": 204665, "loss": 0.117, "lr": 8.586075900640248e-07, "epoch": 2.9532406615689055, "percentage": 59.06, "elapsed_time": "2:36:01", "remaining_time": "1:48:08", "throughput": 8703.52, "total_tokens": 81481640} +{"current_steps": 120890, "total_steps": 204665, "loss": 0.0002, "lr": 8.585231697092272e-07, "epoch": 2.9533628124007523, "percentage": 59.07, "elapsed_time": "2:36:02", "remaining_time": "1:48:07", "throughput": 8703.59, "total_tokens": 81485416} +{"current_steps": 120895, "total_steps": 204665, "loss": 0.0558, "lr": 8.584387503832868e-07, "epoch": 2.9534849632325995, "percentage": 59.07, "elapsed_time": "2:36:02", "remaining_time": "1:48:07", "throughput": 8703.61, "total_tokens": 81488616} +{"current_steps": 120900, "total_steps": 204665, "loss": 0.0993, "lr": 8.583543320868181e-07, "epoch": 2.9536071140644466, "percentage": 59.07, "elapsed_time": "2:36:02", "remaining_time": "1:48:07", "throughput": 8703.65, "total_tokens": 81492008} +{"current_steps": 120905, "total_steps": 204665, "loss": 0.0326, "lr": 8.582699148204347e-07, "epoch": 2.953729264896294, "percentage": 59.07, "elapsed_time": "2:36:03", "remaining_time": "1:48:06", "throughput": 8703.68, "total_tokens": 81495336} +{"current_steps": 120910, "total_steps": 204665, "loss": 0.0003, "lr": 8.581854985847508e-07, "epoch": 2.953851415728141, "percentage": 59.08, "elapsed_time": "2:36:03", "remaining_time": "1:48:06", "throughput": 8703.7, "total_tokens": 81498536} +{"current_steps": 120915, "total_steps": 204665, "loss": 0.0576, "lr": 8.581010833803795e-07, "epoch": 2.953973566559988, "percentage": 59.08, "elapsed_time": "2:36:04", "remaining_time": "1:48:05", "throughput": 8703.76, "total_tokens": 81502184} +{"current_steps": 120920, "total_steps": 204665, "loss": 0.0441, "lr": 8.580166692079355e-07, "epoch": 2.9540957173918354, "percentage": 59.08, "elapsed_time": "2:36:04", "remaining_time": "1:48:05", "throughput": 8703.78, "total_tokens": 81505384} +{"current_steps": 120925, "total_steps": 204665, "loss": 0.0665, "lr": 8.579322560680329e-07, "epoch": 2.9542178682236826, "percentage": 59.08, "elapsed_time": "2:36:04", "remaining_time": "1:48:05", "throughput": 8703.79, "total_tokens": 81508456} +{"current_steps": 120930, "total_steps": 204665, "loss": 0.0293, "lr": 8.578478439612846e-07, "epoch": 2.95434001905553, "percentage": 59.09, "elapsed_time": "2:36:05", "remaining_time": "1:48:04", "throughput": 8703.8, "total_tokens": 81511464} +{"current_steps": 120935, "total_steps": 204665, "loss": 0.0624, "lr": 8.577634328883055e-07, "epoch": 2.954462169887377, "percentage": 59.09, "elapsed_time": "2:36:05", "remaining_time": "1:48:04", "throughput": 8703.83, "total_tokens": 81514792} +{"current_steps": 120940, "total_steps": 204665, "loss": 0.0006, "lr": 8.576790228497085e-07, "epoch": 2.954584320719224, "percentage": 59.09, "elapsed_time": "2:36:05", "remaining_time": "1:48:03", "throughput": 8703.89, "total_tokens": 81518440} +{"current_steps": 120945, "total_steps": 204665, "loss": 0.0003, "lr": 8.575946138461082e-07, "epoch": 2.9547064715510714, "percentage": 59.09, "elapsed_time": "2:36:06", "remaining_time": "1:48:03", "throughput": 8703.98, "total_tokens": 81522408} +{"current_steps": 120950, "total_steps": 204665, "loss": 0.0007, "lr": 8.575102058781181e-07, "epoch": 2.9548286223829185, "percentage": 59.1, "elapsed_time": "2:36:06", "remaining_time": "1:48:02", "throughput": 8704.01, "total_tokens": 81525800} +{"current_steps": 120955, "total_steps": 204665, "loss": 0.0014, "lr": 8.574257989463522e-07, "epoch": 2.9549507732147657, "percentage": 59.1, "elapsed_time": "2:36:06", "remaining_time": "1:48:02", "throughput": 8704.04, "total_tokens": 81529064} +{"current_steps": 120960, "total_steps": 204665, "loss": 0.0004, "lr": 8.573413930514244e-07, "epoch": 2.955072924046613, "percentage": 59.1, "elapsed_time": "2:36:07", "remaining_time": "1:48:02", "throughput": 8704.09, "total_tokens": 81532584} +{"current_steps": 120965, "total_steps": 204665, "loss": 0.0609, "lr": 8.57256988193948e-07, "epoch": 2.9551950748784597, "percentage": 59.1, "elapsed_time": "2:36:07", "remaining_time": "1:48:01", "throughput": 8704.12, "total_tokens": 81535912} +{"current_steps": 120970, "total_steps": 204665, "loss": 0.1942, "lr": 8.571725843745374e-07, "epoch": 2.9553172257103073, "percentage": 59.11, "elapsed_time": "2:36:07", "remaining_time": "1:48:01", "throughput": 8704.16, "total_tokens": 81539304} +{"current_steps": 120975, "total_steps": 204665, "loss": 0.0004, "lr": 8.57088181593806e-07, "epoch": 2.955439376542154, "percentage": 59.11, "elapsed_time": "2:36:08", "remaining_time": "1:48:00", "throughput": 8704.19, "total_tokens": 81542632} +{"current_steps": 120980, "total_steps": 204665, "loss": 0.0085, "lr": 8.570037798523677e-07, "epoch": 2.9555615273740017, "percentage": 59.11, "elapsed_time": "2:36:08", "remaining_time": "1:48:00", "throughput": 8704.21, "total_tokens": 81545832} +{"current_steps": 120985, "total_steps": 204665, "loss": 0.0031, "lr": 8.569193791508368e-07, "epoch": 2.9556836782058484, "percentage": 59.11, "elapsed_time": "2:36:08", "remaining_time": "1:48:00", "throughput": 8704.21, "total_tokens": 81548840} +{"current_steps": 120990, "total_steps": 204665, "loss": 0.0003, "lr": 8.568349794898262e-07, "epoch": 2.9558058290376956, "percentage": 59.12, "elapsed_time": "2:36:09", "remaining_time": "1:47:59", "throughput": 8704.23, "total_tokens": 81552040} +{"current_steps": 120995, "total_steps": 204665, "loss": 0.0001, "lr": 8.567505808699506e-07, "epoch": 2.955927979869543, "percentage": 59.12, "elapsed_time": "2:36:09", "remaining_time": "1:47:59", "throughput": 8704.26, "total_tokens": 81555240} +{"current_steps": 121000, "total_steps": 204665, "loss": 0.0002, "lr": 8.566661832918231e-07, "epoch": 2.95605013070139, "percentage": 59.12, "elapsed_time": "2:36:09", "remaining_time": "1:47:58", "throughput": 8704.3, "total_tokens": 81558760} +{"current_steps": 121005, "total_steps": 204665, "loss": 0.0002, "lr": 8.565817867560576e-07, "epoch": 2.956172281533237, "percentage": 59.12, "elapsed_time": "2:36:10", "remaining_time": "1:47:58", "throughput": 8704.3, "total_tokens": 81561640} +{"current_steps": 121010, "total_steps": 204665, "loss": 0.0845, "lr": 8.564973912632679e-07, "epoch": 2.9562944323650844, "percentage": 59.13, "elapsed_time": "2:36:10", "remaining_time": "1:47:57", "throughput": 8704.31, "total_tokens": 81564776} +{"current_steps": 121015, "total_steps": 204665, "loss": 0.0002, "lr": 8.564129968140677e-07, "epoch": 2.9564165831969316, "percentage": 59.13, "elapsed_time": "2:36:10", "remaining_time": "1:47:57", "throughput": 8704.32, "total_tokens": 81567848} +{"current_steps": 121020, "total_steps": 204665, "loss": 0.0001, "lr": 8.563286034090711e-07, "epoch": 2.9565387340287788, "percentage": 59.13, "elapsed_time": "2:36:11", "remaining_time": "1:47:57", "throughput": 8704.37, "total_tokens": 81571368} +{"current_steps": 121025, "total_steps": 204665, "loss": 0.049, "lr": 8.562442110488911e-07, "epoch": 2.956660884860626, "percentage": 59.13, "elapsed_time": "2:36:11", "remaining_time": "1:47:56", "throughput": 8704.41, "total_tokens": 81574824} +{"current_steps": 121030, "total_steps": 204665, "loss": 0.0005, "lr": 8.561598197341424e-07, "epoch": 2.956783035692473, "percentage": 59.14, "elapsed_time": "2:36:12", "remaining_time": "1:47:56", "throughput": 8704.43, "total_tokens": 81578024} +{"current_steps": 121035, "total_steps": 204665, "loss": 0.048, "lr": 8.560754294654377e-07, "epoch": 2.9569051865243203, "percentage": 59.14, "elapsed_time": "2:36:12", "remaining_time": "1:47:55", "throughput": 8704.53, "total_tokens": 81582120} +{"current_steps": 121040, "total_steps": 204665, "loss": 0.0003, "lr": 8.559910402433912e-07, "epoch": 2.9570273373561675, "percentage": 59.14, "elapsed_time": "2:36:12", "remaining_time": "1:47:55", "throughput": 8704.57, "total_tokens": 81585640} +{"current_steps": 121045, "total_steps": 204665, "loss": 0.0613, "lr": 8.55906652068617e-07, "epoch": 2.9571494881880147, "percentage": 59.14, "elapsed_time": "2:36:13", "remaining_time": "1:47:55", "throughput": 8704.57, "total_tokens": 81588520} +{"current_steps": 121050, "total_steps": 204665, "loss": 0.0588, "lr": 8.558222649417282e-07, "epoch": 2.9572716390198615, "percentage": 59.15, "elapsed_time": "2:36:13", "remaining_time": "1:47:54", "throughput": 8704.64, "total_tokens": 81592296} +{"current_steps": 121055, "total_steps": 204665, "loss": 0.0004, "lr": 8.557378788633386e-07, "epoch": 2.957393789851709, "percentage": 59.15, "elapsed_time": "2:36:13", "remaining_time": "1:47:54", "throughput": 8704.68, "total_tokens": 81595688} +{"current_steps": 121060, "total_steps": 204665, "loss": 0.0867, "lr": 8.55653493834062e-07, "epoch": 2.957515940683556, "percentage": 59.15, "elapsed_time": "2:36:14", "remaining_time": "1:47:53", "throughput": 8704.73, "total_tokens": 81599208} +{"current_steps": 121065, "total_steps": 204665, "loss": 0.0317, "lr": 8.555691098545122e-07, "epoch": 2.9576380915154035, "percentage": 59.15, "elapsed_time": "2:36:14", "remaining_time": "1:47:53", "throughput": 8704.8, "total_tokens": 81602984} +{"current_steps": 121070, "total_steps": 204665, "loss": 0.0933, "lr": 8.554847269253023e-07, "epoch": 2.95776024234725, "percentage": 59.16, "elapsed_time": "2:36:14", "remaining_time": "1:47:53", "throughput": 8704.83, "total_tokens": 81606312} +{"current_steps": 121075, "total_steps": 204665, "loss": 0.0954, "lr": 8.554003450470463e-07, "epoch": 2.9578823931790974, "percentage": 59.16, "elapsed_time": "2:36:15", "remaining_time": "1:47:52", "throughput": 8704.88, "total_tokens": 81609832} +{"current_steps": 121080, "total_steps": 204665, "loss": 0.0003, "lr": 8.553159642203584e-07, "epoch": 2.9580045440109446, "percentage": 59.16, "elapsed_time": "2:36:15", "remaining_time": "1:47:52", "throughput": 8704.9, "total_tokens": 81613096} +{"current_steps": 121085, "total_steps": 204665, "loss": 0.049, "lr": 8.552315844458511e-07, "epoch": 2.958126694842792, "percentage": 59.16, "elapsed_time": "2:36:15", "remaining_time": "1:47:51", "throughput": 8704.94, "total_tokens": 81616552} +{"current_steps": 121090, "total_steps": 204665, "loss": 0.0008, "lr": 8.551472057241393e-07, "epoch": 2.958248845674639, "percentage": 59.16, "elapsed_time": "2:36:16", "remaining_time": "1:47:51", "throughput": 8704.95, "total_tokens": 81619560} +{"current_steps": 121095, "total_steps": 204665, "loss": 0.0002, "lr": 8.550628280558354e-07, "epoch": 2.958370996506486, "percentage": 59.17, "elapsed_time": "2:36:16", "remaining_time": "1:47:50", "throughput": 8704.97, "total_tokens": 81622824} +{"current_steps": 121100, "total_steps": 204665, "loss": 0.0003, "lr": 8.549784514415539e-07, "epoch": 2.9584931473383334, "percentage": 59.17, "elapsed_time": "2:36:16", "remaining_time": "1:47:50", "throughput": 8704.99, "total_tokens": 81625896} +{"current_steps": 121105, "total_steps": 204665, "loss": 0.0002, "lr": 8.548940758819081e-07, "epoch": 2.9586152981701805, "percentage": 59.17, "elapsed_time": "2:36:17", "remaining_time": "1:47:50", "throughput": 8705.02, "total_tokens": 81629224} +{"current_steps": 121110, "total_steps": 204665, "loss": 0.0641, "lr": 8.548097013775116e-07, "epoch": 2.9587374490020277, "percentage": 59.17, "elapsed_time": "2:36:17", "remaining_time": "1:47:49", "throughput": 8705.06, "total_tokens": 81632680} +{"current_steps": 121115, "total_steps": 204665, "loss": 0.0467, "lr": 8.547253279289781e-07, "epoch": 2.958859599833875, "percentage": 59.18, "elapsed_time": "2:36:17", "remaining_time": "1:47:49", "throughput": 8705.08, "total_tokens": 81635880} +{"current_steps": 121120, "total_steps": 204665, "loss": 0.0001, "lr": 8.546409555369207e-07, "epoch": 2.958981750665722, "percentage": 59.18, "elapsed_time": "2:36:18", "remaining_time": "1:47:48", "throughput": 8705.09, "total_tokens": 81638888} +{"current_steps": 121125, "total_steps": 204665, "loss": 0.0001, "lr": 8.545565842019539e-07, "epoch": 2.9591039014975693, "percentage": 59.18, "elapsed_time": "2:36:18", "remaining_time": "1:47:48", "throughput": 8705.26, "total_tokens": 81643944} +{"current_steps": 121130, "total_steps": 204665, "loss": 0.1017, "lr": 8.544722139246902e-07, "epoch": 2.9592260523294165, "percentage": 59.18, "elapsed_time": "2:36:19", "remaining_time": "1:47:48", "throughput": 8705.27, "total_tokens": 81647016} +{"current_steps": 121135, "total_steps": 204665, "loss": 0.0002, "lr": 8.543878447057439e-07, "epoch": 2.9593482031612637, "percentage": 59.19, "elapsed_time": "2:36:19", "remaining_time": "1:47:47", "throughput": 8705.34, "total_tokens": 81650792} +{"current_steps": 121140, "total_steps": 204665, "loss": 0.0007, "lr": 8.543034765457286e-07, "epoch": 2.959470353993111, "percentage": 59.19, "elapsed_time": "2:36:19", "remaining_time": "1:47:47", "throughput": 8705.38, "total_tokens": 81654184} +{"current_steps": 121145, "total_steps": 204665, "loss": 0.0001, "lr": 8.542191094452574e-07, "epoch": 2.9595925048249576, "percentage": 59.19, "elapsed_time": "2:36:20", "remaining_time": "1:47:46", "throughput": 8705.4, "total_tokens": 81657384} +{"current_steps": 121150, "total_steps": 204665, "loss": 0.0003, "lr": 8.541347434049442e-07, "epoch": 2.9597146556568052, "percentage": 59.19, "elapsed_time": "2:36:20", "remaining_time": "1:47:46", "throughput": 8705.46, "total_tokens": 81661032} +{"current_steps": 121155, "total_steps": 204665, "loss": 0.0001, "lr": 8.540503784254023e-07, "epoch": 2.959836806488652, "percentage": 59.2, "elapsed_time": "2:36:20", "remaining_time": "1:47:46", "throughput": 8705.5, "total_tokens": 81664488} +{"current_steps": 121160, "total_steps": 204665, "loss": 0.0002, "lr": 8.539660145072452e-07, "epoch": 2.9599589573204996, "percentage": 59.2, "elapsed_time": "2:36:21", "remaining_time": "1:47:45", "throughput": 8705.53, "total_tokens": 81667688} +{"current_steps": 121165, "total_steps": 204665, "loss": 0.0002, "lr": 8.538816516510866e-07, "epoch": 2.9600811081523464, "percentage": 59.2, "elapsed_time": "2:36:21", "remaining_time": "1:47:45", "throughput": 8705.56, "total_tokens": 81671016} +{"current_steps": 121170, "total_steps": 204665, "loss": 0.0225, "lr": 8.537972898575398e-07, "epoch": 2.9602032589841936, "percentage": 59.2, "elapsed_time": "2:36:21", "remaining_time": "1:47:44", "throughput": 8705.62, "total_tokens": 81674728} +{"current_steps": 121175, "total_steps": 204665, "loss": 0.073, "lr": 8.537129291272187e-07, "epoch": 2.9603254098160408, "percentage": 59.21, "elapsed_time": "2:36:22", "remaining_time": "1:47:44", "throughput": 8705.68, "total_tokens": 81678312} +{"current_steps": 121180, "total_steps": 204665, "loss": 0.0501, "lr": 8.536285694607361e-07, "epoch": 2.960447560647888, "percentage": 59.21, "elapsed_time": "2:36:22", "remaining_time": "1:47:43", "throughput": 8705.7, "total_tokens": 81681448} +{"current_steps": 121185, "total_steps": 204665, "loss": 0.1142, "lr": 8.535442108587066e-07, "epoch": 2.960569711479735, "percentage": 59.21, "elapsed_time": "2:36:22", "remaining_time": "1:47:43", "throughput": 8705.78, "total_tokens": 81685416} +{"current_steps": 121190, "total_steps": 204665, "loss": 0.0001, "lr": 8.534598533217423e-07, "epoch": 2.9606918623115823, "percentage": 59.21, "elapsed_time": "2:36:23", "remaining_time": "1:47:43", "throughput": 8705.83, "total_tokens": 81688872} +{"current_steps": 121195, "total_steps": 204665, "loss": 0.0535, "lr": 8.533754968504574e-07, "epoch": 2.9608140131434295, "percentage": 59.22, "elapsed_time": "2:36:23", "remaining_time": "1:47:42", "throughput": 8705.85, "total_tokens": 81692008} +{"current_steps": 121200, "total_steps": 204665, "loss": 0.0858, "lr": 8.532911414454657e-07, "epoch": 2.9609361639752767, "percentage": 59.22, "elapsed_time": "2:36:23", "remaining_time": "1:47:42", "throughput": 8705.87, "total_tokens": 81695272} +{"current_steps": 121205, "total_steps": 204665, "loss": 0.0493, "lr": 8.532067871073803e-07, "epoch": 2.961058314807124, "percentage": 59.22, "elapsed_time": "2:36:24", "remaining_time": "1:47:41", "throughput": 8705.9, "total_tokens": 81698536} +{"current_steps": 121210, "total_steps": 204665, "loss": 0.1358, "lr": 8.531224338368144e-07, "epoch": 2.961180465638971, "percentage": 59.22, "elapsed_time": "2:36:24", "remaining_time": "1:47:41", "throughput": 8705.93, "total_tokens": 81701736} +{"current_steps": 121215, "total_steps": 204665, "loss": 0.0583, "lr": 8.530380816343818e-07, "epoch": 2.9613026164708183, "percentage": 59.23, "elapsed_time": "2:36:24", "remaining_time": "1:47:41", "throughput": 8705.96, "total_tokens": 81705128} +{"current_steps": 121220, "total_steps": 204665, "loss": 0.1345, "lr": 8.52953730500696e-07, "epoch": 2.9614247673026655, "percentage": 59.23, "elapsed_time": "2:36:25", "remaining_time": "1:47:40", "throughput": 8705.97, "total_tokens": 81708200} +{"current_steps": 121225, "total_steps": 204665, "loss": 0.0674, "lr": 8.528693804363697e-07, "epoch": 2.9615469181345127, "percentage": 59.23, "elapsed_time": "2:36:25", "remaining_time": "1:47:40", "throughput": 8706.03, "total_tokens": 81711784} +{"current_steps": 121230, "total_steps": 204665, "loss": 0.0849, "lr": 8.527850314420169e-07, "epoch": 2.9616690689663594, "percentage": 59.23, "elapsed_time": "2:36:25", "remaining_time": "1:47:39", "throughput": 8706.02, "total_tokens": 81714600} +{"current_steps": 121235, "total_steps": 204665, "loss": 0.0005, "lr": 8.527006835182514e-07, "epoch": 2.961791219798207, "percentage": 59.24, "elapsed_time": "2:36:26", "remaining_time": "1:47:39", "throughput": 8706.04, "total_tokens": 81717736} +{"current_steps": 121240, "total_steps": 204665, "loss": 0.0429, "lr": 8.526163366656857e-07, "epoch": 2.9619133706300538, "percentage": 59.24, "elapsed_time": "2:36:26", "remaining_time": "1:47:38", "throughput": 8706.07, "total_tokens": 81721128} +{"current_steps": 121245, "total_steps": 204665, "loss": 0.0115, "lr": 8.52531990884934e-07, "epoch": 2.9620355214619014, "percentage": 59.24, "elapsed_time": "2:36:27", "remaining_time": "1:47:38", "throughput": 8706.08, "total_tokens": 81724200} +{"current_steps": 121250, "total_steps": 204665, "loss": 0.0013, "lr": 8.52447646176609e-07, "epoch": 2.962157672293748, "percentage": 59.24, "elapsed_time": "2:36:27", "remaining_time": "1:47:38", "throughput": 8706.11, "total_tokens": 81727464} +{"current_steps": 121255, "total_steps": 204665, "loss": 0.0742, "lr": 8.523633025413246e-07, "epoch": 2.9622798231255953, "percentage": 59.25, "elapsed_time": "2:36:27", "remaining_time": "1:47:37", "throughput": 8706.12, "total_tokens": 81730536} +{"current_steps": 121260, "total_steps": 204665, "loss": 0.0003, "lr": 8.522789599796939e-07, "epoch": 2.9624019739574425, "percentage": 59.25, "elapsed_time": "2:36:28", "remaining_time": "1:47:37", "throughput": 8706.13, "total_tokens": 81733544} +{"current_steps": 121265, "total_steps": 204665, "loss": 0.1397, "lr": 8.521946184923304e-07, "epoch": 2.9625241247892897, "percentage": 59.25, "elapsed_time": "2:36:28", "remaining_time": "1:47:36", "throughput": 8706.15, "total_tokens": 81736744} +{"current_steps": 121270, "total_steps": 204665, "loss": 0.0798, "lr": 8.521102780798475e-07, "epoch": 2.962646275621137, "percentage": 59.25, "elapsed_time": "2:36:28", "remaining_time": "1:47:36", "throughput": 8706.19, "total_tokens": 81740136} +{"current_steps": 121275, "total_steps": 204665, "loss": 0.0639, "lr": 8.520259387428582e-07, "epoch": 2.962768426452984, "percentage": 59.26, "elapsed_time": "2:36:29", "remaining_time": "1:47:36", "throughput": 8706.21, "total_tokens": 81743336} +{"current_steps": 121280, "total_steps": 204665, "loss": 0.0005, "lr": 8.519416004819764e-07, "epoch": 2.9628905772848313, "percentage": 59.26, "elapsed_time": "2:36:29", "remaining_time": "1:47:35", "throughput": 8706.21, "total_tokens": 81746344} +{"current_steps": 121285, "total_steps": 204665, "loss": 0.0007, "lr": 8.518572632978147e-07, "epoch": 2.9630127281166785, "percentage": 59.26, "elapsed_time": "2:36:29", "remaining_time": "1:47:35", "throughput": 8706.24, "total_tokens": 81749608} +{"current_steps": 121290, "total_steps": 204665, "loss": 0.1305, "lr": 8.517729271909869e-07, "epoch": 2.9631348789485257, "percentage": 59.26, "elapsed_time": "2:36:30", "remaining_time": "1:47:34", "throughput": 8706.26, "total_tokens": 81752872} +{"current_steps": 121295, "total_steps": 204665, "loss": 0.0004, "lr": 8.516885921621064e-07, "epoch": 2.963257029780373, "percentage": 59.27, "elapsed_time": "2:36:30", "remaining_time": "1:47:34", "throughput": 8706.28, "total_tokens": 81755944} +{"current_steps": 121300, "total_steps": 204665, "loss": 0.0011, "lr": 8.516042582117862e-07, "epoch": 2.96337918061222, "percentage": 59.27, "elapsed_time": "2:36:30", "remaining_time": "1:47:33", "throughput": 8706.35, "total_tokens": 81759720} +{"current_steps": 121305, "total_steps": 204665, "loss": 0.0005, "lr": 8.5151992534064e-07, "epoch": 2.9635013314440672, "percentage": 59.27, "elapsed_time": "2:36:31", "remaining_time": "1:47:33", "throughput": 8706.41, "total_tokens": 81763368} +{"current_steps": 121310, "total_steps": 204665, "loss": 0.1187, "lr": 8.514355935492806e-07, "epoch": 2.9636234822759144, "percentage": 59.27, "elapsed_time": "2:36:31", "remaining_time": "1:47:33", "throughput": 8706.45, "total_tokens": 81766824} +{"current_steps": 121315, "total_steps": 204665, "loss": 0.0003, "lr": 8.513512628383217e-07, "epoch": 2.9637456331077616, "percentage": 59.27, "elapsed_time": "2:36:31", "remaining_time": "1:47:32", "throughput": 8706.46, "total_tokens": 81769896} +{"current_steps": 121320, "total_steps": 204665, "loss": 0.0732, "lr": 8.512669332083763e-07, "epoch": 2.963867783939609, "percentage": 59.28, "elapsed_time": "2:36:32", "remaining_time": "1:47:32", "throughput": 8706.53, "total_tokens": 81773608} +{"current_steps": 121325, "total_steps": 204665, "loss": 0.0336, "lr": 8.511826046600575e-07, "epoch": 2.9639899347714556, "percentage": 59.28, "elapsed_time": "2:36:32", "remaining_time": "1:47:31", "throughput": 8706.56, "total_tokens": 81777000} +{"current_steps": 121330, "total_steps": 204665, "loss": 0.035, "lr": 8.510982771939794e-07, "epoch": 2.964112085603303, "percentage": 59.28, "elapsed_time": "2:36:32", "remaining_time": "1:47:31", "throughput": 8706.63, "total_tokens": 81780712} +{"current_steps": 121335, "total_steps": 204665, "loss": 0.0515, "lr": 8.510139508107541e-07, "epoch": 2.96423423643515, "percentage": 59.28, "elapsed_time": "2:36:33", "remaining_time": "1:47:31", "throughput": 8706.63, "total_tokens": 81783720} +{"current_steps": 121340, "total_steps": 204665, "loss": 0.0001, "lr": 8.509296255109959e-07, "epoch": 2.964356387266997, "percentage": 59.29, "elapsed_time": "2:36:33", "remaining_time": "1:47:30", "throughput": 8706.67, "total_tokens": 81787112} +{"current_steps": 121345, "total_steps": 204665, "loss": 0.0445, "lr": 8.508453012953172e-07, "epoch": 2.9644785380988443, "percentage": 59.29, "elapsed_time": "2:36:33", "remaining_time": "1:47:30", "throughput": 8706.71, "total_tokens": 81790568} +{"current_steps": 121350, "total_steps": 204665, "loss": 0.0408, "lr": 8.507609781643316e-07, "epoch": 2.9646006889306915, "percentage": 59.29, "elapsed_time": "2:36:34", "remaining_time": "1:47:29", "throughput": 8706.71, "total_tokens": 81793576} +{"current_steps": 121355, "total_steps": 204665, "loss": 0.0401, "lr": 8.506766561186526e-07, "epoch": 2.9647228397625387, "percentage": 59.29, "elapsed_time": "2:36:34", "remaining_time": "1:47:29", "throughput": 8706.76, "total_tokens": 81797096} +{"current_steps": 121360, "total_steps": 204665, "loss": 0.0665, "lr": 8.505923351588931e-07, "epoch": 2.964844990594386, "percentage": 59.3, "elapsed_time": "2:36:35", "remaining_time": "1:47:29", "throughput": 8706.78, "total_tokens": 81800296} +{"current_steps": 121365, "total_steps": 204665, "loss": 0.0237, "lr": 8.505080152856661e-07, "epoch": 2.964967141426233, "percentage": 59.3, "elapsed_time": "2:36:35", "remaining_time": "1:47:28", "throughput": 8706.8, "total_tokens": 81803432} +{"current_steps": 121370, "total_steps": 204665, "loss": 0.0005, "lr": 8.504236964995851e-07, "epoch": 2.9650892922580803, "percentage": 59.3, "elapsed_time": "2:36:35", "remaining_time": "1:47:28", "throughput": 8706.81, "total_tokens": 81806568} +{"current_steps": 121375, "total_steps": 204665, "loss": 0.0384, "lr": 8.503393788012635e-07, "epoch": 2.9652114430899275, "percentage": 59.3, "elapsed_time": "2:36:36", "remaining_time": "1:47:27", "throughput": 8706.87, "total_tokens": 81810152} +{"current_steps": 121380, "total_steps": 204665, "loss": 0.0011, "lr": 8.502550621913137e-07, "epoch": 2.9653335939217746, "percentage": 59.31, "elapsed_time": "2:36:36", "remaining_time": "1:47:27", "throughput": 8706.93, "total_tokens": 81813800} +{"current_steps": 121385, "total_steps": 204665, "loss": 0.128, "lr": 8.501707466703494e-07, "epoch": 2.965455744753622, "percentage": 59.31, "elapsed_time": "2:36:36", "remaining_time": "1:47:26", "throughput": 8706.92, "total_tokens": 81816616} +{"current_steps": 121390, "total_steps": 204665, "loss": 0.0599, "lr": 8.50086432238984e-07, "epoch": 2.965577895585469, "percentage": 59.31, "elapsed_time": "2:36:37", "remaining_time": "1:47:26", "throughput": 8706.92, "total_tokens": 81819560} +{"current_steps": 121395, "total_steps": 204665, "loss": 0.0009, "lr": 8.500021188978301e-07, "epoch": 2.965700046417316, "percentage": 59.31, "elapsed_time": "2:36:37", "remaining_time": "1:47:26", "throughput": 8706.94, "total_tokens": 81822760} +{"current_steps": 121400, "total_steps": 204665, "loss": 0.049, "lr": 8.499178066475016e-07, "epoch": 2.9658221972491634, "percentage": 59.32, "elapsed_time": "2:36:37", "remaining_time": "1:47:25", "throughput": 8706.97, "total_tokens": 81826024} +{"current_steps": 121405, "total_steps": 204665, "loss": 0.0004, "lr": 8.498334954886107e-07, "epoch": 2.9659443480810106, "percentage": 59.32, "elapsed_time": "2:36:38", "remaining_time": "1:47:25", "throughput": 8706.99, "total_tokens": 81829224} +{"current_steps": 121410, "total_steps": 204665, "loss": 0.0006, "lr": 8.497491854217713e-07, "epoch": 2.9660664989128573, "percentage": 59.32, "elapsed_time": "2:36:38", "remaining_time": "1:47:24", "throughput": 8707.03, "total_tokens": 81832680} +{"current_steps": 121415, "total_steps": 204665, "loss": 0.0006, "lr": 8.496648764475961e-07, "epoch": 2.966188649744705, "percentage": 59.32, "elapsed_time": "2:36:38", "remaining_time": "1:47:24", "throughput": 8707.07, "total_tokens": 81836072} +{"current_steps": 121420, "total_steps": 204665, "loss": 0.0003, "lr": 8.495805685666985e-07, "epoch": 2.9663108005765517, "percentage": 59.33, "elapsed_time": "2:36:39", "remaining_time": "1:47:24", "throughput": 8707.12, "total_tokens": 81839592} +{"current_steps": 121425, "total_steps": 204665, "loss": 0.0445, "lr": 8.494962617796915e-07, "epoch": 2.9664329514083994, "percentage": 59.33, "elapsed_time": "2:36:39", "remaining_time": "1:47:23", "throughput": 8707.16, "total_tokens": 81843048} +{"current_steps": 121430, "total_steps": 204665, "loss": 0.0362, "lr": 8.494119560871879e-07, "epoch": 2.966555102240246, "percentage": 59.33, "elapsed_time": "2:36:39", "remaining_time": "1:47:23", "throughput": 8707.15, "total_tokens": 81845928} +{"current_steps": 121435, "total_steps": 204665, "loss": 0.0004, "lr": 8.493276514898014e-07, "epoch": 2.9666772530720933, "percentage": 59.33, "elapsed_time": "2:36:40", "remaining_time": "1:47:22", "throughput": 8707.23, "total_tokens": 81849704} +{"current_steps": 121440, "total_steps": 204665, "loss": 0.001, "lr": 8.492433479881444e-07, "epoch": 2.9667994039039405, "percentage": 59.34, "elapsed_time": "2:36:40", "remaining_time": "1:47:22", "throughput": 8707.26, "total_tokens": 81853032} +{"current_steps": 121445, "total_steps": 204665, "loss": 0.0002, "lr": 8.491590455828302e-07, "epoch": 2.9669215547357877, "percentage": 59.34, "elapsed_time": "2:36:40", "remaining_time": "1:47:21", "throughput": 8707.29, "total_tokens": 81856360} +{"current_steps": 121450, "total_steps": 204665, "loss": 0.0014, "lr": 8.490747442744725e-07, "epoch": 2.967043705567635, "percentage": 59.34, "elapsed_time": "2:36:41", "remaining_time": "1:47:21", "throughput": 8707.31, "total_tokens": 81859624} +{"current_steps": 121455, "total_steps": 204665, "loss": 0.0002, "lr": 8.489904440636833e-07, "epoch": 2.967165856399482, "percentage": 59.34, "elapsed_time": "2:36:41", "remaining_time": "1:47:21", "throughput": 8707.36, "total_tokens": 81863144} +{"current_steps": 121460, "total_steps": 204665, "loss": 0.0291, "lr": 8.489061449510768e-07, "epoch": 2.9672880072313292, "percentage": 59.35, "elapsed_time": "2:36:41", "remaining_time": "1:47:20", "throughput": 8707.38, "total_tokens": 81866408} +{"current_steps": 121465, "total_steps": 204665, "loss": 0.0001, "lr": 8.488218469372652e-07, "epoch": 2.9674101580631764, "percentage": 59.35, "elapsed_time": "2:36:42", "remaining_time": "1:47:20", "throughput": 8707.39, "total_tokens": 81869480} +{"current_steps": 121470, "total_steps": 204665, "loss": 0.0002, "lr": 8.487375500228617e-07, "epoch": 2.9675323088950236, "percentage": 59.35, "elapsed_time": "2:36:42", "remaining_time": "1:47:19", "throughput": 8707.46, "total_tokens": 81873192} +{"current_steps": 121475, "total_steps": 204665, "loss": 0.0002, "lr": 8.486532542084795e-07, "epoch": 2.967654459726871, "percentage": 59.35, "elapsed_time": "2:36:43", "remaining_time": "1:47:19", "throughput": 8707.49, "total_tokens": 81876584} +{"current_steps": 121480, "total_steps": 204665, "loss": 0.0008, "lr": 8.485689594947314e-07, "epoch": 2.967776610558718, "percentage": 59.36, "elapsed_time": "2:36:43", "remaining_time": "1:47:19", "throughput": 8707.54, "total_tokens": 81880040} +{"current_steps": 121485, "total_steps": 204665, "loss": 0.1177, "lr": 8.484846658822308e-07, "epoch": 2.967898761390565, "percentage": 59.36, "elapsed_time": "2:36:43", "remaining_time": "1:47:18", "throughput": 8707.58, "total_tokens": 81883560} +{"current_steps": 121490, "total_steps": 204665, "loss": 0.027, "lr": 8.484003733715902e-07, "epoch": 2.9680209122224124, "percentage": 59.36, "elapsed_time": "2:36:44", "remaining_time": "1:47:18", "throughput": 8707.63, "total_tokens": 81887016} +{"current_steps": 121495, "total_steps": 204665, "loss": 0.0002, "lr": 8.483160819634232e-07, "epoch": 2.968143063054259, "percentage": 59.36, "elapsed_time": "2:36:44", "remaining_time": "1:47:17", "throughput": 8707.69, "total_tokens": 81890792} +{"current_steps": 121500, "total_steps": 204665, "loss": 0.0389, "lr": 8.482317916583422e-07, "epoch": 2.9682652138861068, "percentage": 59.37, "elapsed_time": "2:36:44", "remaining_time": "1:47:17", "throughput": 8707.72, "total_tokens": 81894056} +{"current_steps": 121505, "total_steps": 204665, "loss": 0.0513, "lr": 8.481475024569602e-07, "epoch": 2.9683873647179535, "percentage": 59.37, "elapsed_time": "2:36:45", "remaining_time": "1:47:17", "throughput": 8707.75, "total_tokens": 81897320} +{"current_steps": 121510, "total_steps": 204665, "loss": 0.1138, "lr": 8.480632143598909e-07, "epoch": 2.968509515549801, "percentage": 59.37, "elapsed_time": "2:36:45", "remaining_time": "1:47:16", "throughput": 8707.75, "total_tokens": 81900392} +{"current_steps": 121515, "total_steps": 204665, "loss": 0.0574, "lr": 8.479789273677465e-07, "epoch": 2.968631666381648, "percentage": 59.37, "elapsed_time": "2:36:45", "remaining_time": "1:47:16", "throughput": 8707.78, "total_tokens": 81903656} +{"current_steps": 121520, "total_steps": 204665, "loss": 0.0004, "lr": 8.478946414811403e-07, "epoch": 2.968753817213495, "percentage": 59.38, "elapsed_time": "2:36:46", "remaining_time": "1:47:15", "throughput": 8707.8, "total_tokens": 81906856} +{"current_steps": 121525, "total_steps": 204665, "loss": 0.0002, "lr": 8.478103567006853e-07, "epoch": 2.9688759680453423, "percentage": 59.38, "elapsed_time": "2:36:46", "remaining_time": "1:47:15", "throughput": 8707.84, "total_tokens": 81910312} +{"current_steps": 121530, "total_steps": 204665, "loss": 0.0725, "lr": 8.477260730269944e-07, "epoch": 2.9689981188771895, "percentage": 59.38, "elapsed_time": "2:36:46", "remaining_time": "1:47:14", "throughput": 8707.87, "total_tokens": 81913576} +{"current_steps": 121535, "total_steps": 204665, "loss": 0.0002, "lr": 8.4764179046068e-07, "epoch": 2.9691202697090366, "percentage": 59.38, "elapsed_time": "2:36:47", "remaining_time": "1:47:14", "throughput": 8707.93, "total_tokens": 81917224} +{"current_steps": 121540, "total_steps": 204665, "loss": 0.0007, "lr": 8.475575090023555e-07, "epoch": 2.969242420540884, "percentage": 59.38, "elapsed_time": "2:36:47", "remaining_time": "1:47:14", "throughput": 8707.98, "total_tokens": 81920744} +{"current_steps": 121545, "total_steps": 204665, "loss": 0.0429, "lr": 8.474732286526342e-07, "epoch": 2.969364571372731, "percentage": 59.39, "elapsed_time": "2:36:47", "remaining_time": "1:47:13", "throughput": 8708.01, "total_tokens": 81924072} +{"current_steps": 121550, "total_steps": 204665, "loss": 0.0468, "lr": 8.473889494121282e-07, "epoch": 2.969486722204578, "percentage": 59.39, "elapsed_time": "2:36:48", "remaining_time": "1:47:13", "throughput": 8708.04, "total_tokens": 81927336} +{"current_steps": 121555, "total_steps": 204665, "loss": 0.0852, "lr": 8.473046712814513e-07, "epoch": 2.9696088730364254, "percentage": 59.39, "elapsed_time": "2:36:48", "remaining_time": "1:47:12", "throughput": 8708.07, "total_tokens": 81930664} +{"current_steps": 121560, "total_steps": 204665, "loss": 0.0011, "lr": 8.472203942612154e-07, "epoch": 2.9697310238682726, "percentage": 59.39, "elapsed_time": "2:36:48", "remaining_time": "1:47:12", "throughput": 8708.08, "total_tokens": 81933736} +{"current_steps": 121565, "total_steps": 204665, "loss": 0.042, "lr": 8.471361183520341e-07, "epoch": 2.96985317470012, "percentage": 59.4, "elapsed_time": "2:36:49", "remaining_time": "1:47:12", "throughput": 8708.14, "total_tokens": 81937384} +{"current_steps": 121570, "total_steps": 204665, "loss": 0.0002, "lr": 8.470518435545202e-07, "epoch": 2.969975325531967, "percentage": 59.4, "elapsed_time": "2:36:49", "remaining_time": "1:47:11", "throughput": 8708.18, "total_tokens": 81940776} +{"current_steps": 121575, "total_steps": 204665, "loss": 0.0001, "lr": 8.469675698692862e-07, "epoch": 2.970097476363814, "percentage": 59.4, "elapsed_time": "2:36:49", "remaining_time": "1:47:11", "throughput": 8708.21, "total_tokens": 81944168} +{"current_steps": 121580, "total_steps": 204665, "loss": 0.0002, "lr": 8.468832972969457e-07, "epoch": 2.9702196271956613, "percentage": 59.4, "elapsed_time": "2:36:50", "remaining_time": "1:47:10", "throughput": 8708.27, "total_tokens": 81947752} +{"current_steps": 121585, "total_steps": 204665, "loss": 0.0919, "lr": 8.467990258381104e-07, "epoch": 2.9703417780275085, "percentage": 59.41, "elapsed_time": "2:36:50", "remaining_time": "1:47:10", "throughput": 8708.29, "total_tokens": 81950952} +{"current_steps": 121590, "total_steps": 204665, "loss": 0.0725, "lr": 8.467147554933942e-07, "epoch": 2.9704639288593553, "percentage": 59.41, "elapsed_time": "2:36:51", "remaining_time": "1:47:09", "throughput": 8708.31, "total_tokens": 81954216} +{"current_steps": 121595, "total_steps": 204665, "loss": 0.0004, "lr": 8.466304862634092e-07, "epoch": 2.970586079691203, "percentage": 59.41, "elapsed_time": "2:36:51", "remaining_time": "1:47:09", "throughput": 8708.34, "total_tokens": 81957480} +{"current_steps": 121600, "total_steps": 204665, "loss": 0.0345, "lr": 8.465462181487684e-07, "epoch": 2.9707082305230497, "percentage": 59.41, "elapsed_time": "2:36:51", "remaining_time": "1:47:09", "throughput": 8708.37, "total_tokens": 81960808} +{"current_steps": 121605, "total_steps": 204665, "loss": 0.0002, "lr": 8.464619511500855e-07, "epoch": 2.9708303813548973, "percentage": 59.42, "elapsed_time": "2:36:52", "remaining_time": "1:47:08", "throughput": 8708.42, "total_tokens": 81964328} +{"current_steps": 121610, "total_steps": 204665, "loss": 0.0376, "lr": 8.463776852679718e-07, "epoch": 2.970952532186744, "percentage": 59.42, "elapsed_time": "2:36:52", "remaining_time": "1:47:08", "throughput": 8708.51, "total_tokens": 81968296} +{"current_steps": 121615, "total_steps": 204665, "loss": 0.0004, "lr": 8.462934205030417e-07, "epoch": 2.9710746830185912, "percentage": 59.42, "elapsed_time": "2:36:52", "remaining_time": "1:47:07", "throughput": 8708.54, "total_tokens": 81971624} +{"current_steps": 121620, "total_steps": 204665, "loss": 0.0007, "lr": 8.462091568559067e-07, "epoch": 2.9711968338504384, "percentage": 59.42, "elapsed_time": "2:36:53", "remaining_time": "1:47:07", "throughput": 8708.58, "total_tokens": 81974952} +{"current_steps": 121625, "total_steps": 204665, "loss": 0.0005, "lr": 8.461248943271802e-07, "epoch": 2.9713189846822856, "percentage": 59.43, "elapsed_time": "2:36:53", "remaining_time": "1:47:07", "throughput": 8708.63, "total_tokens": 81978600} +{"current_steps": 121630, "total_steps": 204665, "loss": 0.0003, "lr": 8.460406329174748e-07, "epoch": 2.971441135514133, "percentage": 59.43, "elapsed_time": "2:36:53", "remaining_time": "1:47:06", "throughput": 8708.64, "total_tokens": 81981608} +{"current_steps": 121635, "total_steps": 204665, "loss": 0.0003, "lr": 8.459563726274031e-07, "epoch": 2.97156328634598, "percentage": 59.43, "elapsed_time": "2:36:54", "remaining_time": "1:47:06", "throughput": 8708.66, "total_tokens": 81984872} +{"current_steps": 121640, "total_steps": 204665, "loss": 0.0568, "lr": 8.458721134575785e-07, "epoch": 2.971685437177827, "percentage": 59.43, "elapsed_time": "2:36:54", "remaining_time": "1:47:05", "throughput": 8708.68, "total_tokens": 81988008} +{"current_steps": 121645, "total_steps": 204665, "loss": 0.0342, "lr": 8.457878554086129e-07, "epoch": 2.9718075880096744, "percentage": 59.44, "elapsed_time": "2:36:54", "remaining_time": "1:47:05", "throughput": 8708.68, "total_tokens": 81990952} +{"current_steps": 121650, "total_steps": 204665, "loss": 0.0345, "lr": 8.4570359848112e-07, "epoch": 2.9719297388415216, "percentage": 59.44, "elapsed_time": "2:36:55", "remaining_time": "1:47:05", "throughput": 8708.71, "total_tokens": 81994344} +{"current_steps": 121655, "total_steps": 204665, "loss": 0.0003, "lr": 8.456193426757117e-07, "epoch": 2.9720518896733688, "percentage": 59.44, "elapsed_time": "2:36:55", "remaining_time": "1:47:04", "throughput": 8708.75, "total_tokens": 81997672} +{"current_steps": 121660, "total_steps": 204665, "loss": 0.0001, "lr": 8.455350879930009e-07, "epoch": 2.972174040505216, "percentage": 59.44, "elapsed_time": "2:36:55", "remaining_time": "1:47:04", "throughput": 8708.75, "total_tokens": 82000680} +{"current_steps": 121665, "total_steps": 204665, "loss": 0.0002, "lr": 8.454508344336009e-07, "epoch": 2.972296191337063, "percentage": 59.45, "elapsed_time": "2:36:56", "remaining_time": "1:47:03", "throughput": 8708.74, "total_tokens": 82003560} +{"current_steps": 121670, "total_steps": 204665, "loss": 0.0001, "lr": 8.453665819981239e-07, "epoch": 2.9724183421689103, "percentage": 59.45, "elapsed_time": "2:36:56", "remaining_time": "1:47:03", "throughput": 8708.77, "total_tokens": 82006824} +{"current_steps": 121675, "total_steps": 204665, "loss": 0.0457, "lr": 8.452823306871826e-07, "epoch": 2.972540493000757, "percentage": 59.45, "elapsed_time": "2:36:56", "remaining_time": "1:47:02", "throughput": 8708.79, "total_tokens": 82010024} +{"current_steps": 121680, "total_steps": 204665, "loss": 0.1149, "lr": 8.451980805013898e-07, "epoch": 2.9726626438326047, "percentage": 59.45, "elapsed_time": "2:36:57", "remaining_time": "1:47:02", "throughput": 8708.81, "total_tokens": 82013224} +{"current_steps": 121685, "total_steps": 204665, "loss": 0.0007, "lr": 8.451138314413586e-07, "epoch": 2.9727847946644514, "percentage": 59.46, "elapsed_time": "2:36:57", "remaining_time": "1:47:02", "throughput": 8708.85, "total_tokens": 82016680} +{"current_steps": 121690, "total_steps": 204665, "loss": 0.071, "lr": 8.450295835077007e-07, "epoch": 2.972906945496299, "percentage": 59.46, "elapsed_time": "2:36:57", "remaining_time": "1:47:01", "throughput": 8708.89, "total_tokens": 82020072} +{"current_steps": 121695, "total_steps": 204665, "loss": 0.0457, "lr": 8.449453367010293e-07, "epoch": 2.973029096328146, "percentage": 59.46, "elapsed_time": "2:36:58", "remaining_time": "1:47:01", "throughput": 8708.92, "total_tokens": 82023400} +{"current_steps": 121700, "total_steps": 204665, "loss": 0.0517, "lr": 8.448610910219577e-07, "epoch": 2.973151247159993, "percentage": 59.46, "elapsed_time": "2:36:58", "remaining_time": "1:47:00", "throughput": 8708.95, "total_tokens": 82026728} +{"current_steps": 121705, "total_steps": 204665, "loss": 0.0089, "lr": 8.447768464710974e-07, "epoch": 2.97327339799184, "percentage": 59.47, "elapsed_time": "2:36:59", "remaining_time": "1:47:00", "throughput": 8708.99, "total_tokens": 82030120} +{"current_steps": 121710, "total_steps": 204665, "loss": 0.0001, "lr": 8.446926030490622e-07, "epoch": 2.9733955488236874, "percentage": 59.47, "elapsed_time": "2:36:59", "remaining_time": "1:47:00", "throughput": 8709.04, "total_tokens": 82033768} +{"current_steps": 121715, "total_steps": 204665, "loss": 0.0642, "lr": 8.446083607564636e-07, "epoch": 2.9735176996555346, "percentage": 59.47, "elapsed_time": "2:36:59", "remaining_time": "1:46:59", "throughput": 8709.09, "total_tokens": 82037224} +{"current_steps": 121720, "total_steps": 204665, "loss": 0.1574, "lr": 8.445241195939152e-07, "epoch": 2.9736398504873818, "percentage": 59.47, "elapsed_time": "2:37:00", "remaining_time": "1:46:59", "throughput": 8709.12, "total_tokens": 82040616} +{"current_steps": 121725, "total_steps": 204665, "loss": 0.0398, "lr": 8.444398795620289e-07, "epoch": 2.973762001319229, "percentage": 59.48, "elapsed_time": "2:37:00", "remaining_time": "1:46:58", "throughput": 8709.13, "total_tokens": 82043688} +{"current_steps": 121730, "total_steps": 204665, "loss": 0.0905, "lr": 8.443556406614179e-07, "epoch": 2.973884152151076, "percentage": 59.48, "elapsed_time": "2:37:00", "remaining_time": "1:46:58", "throughput": 8709.21, "total_tokens": 82047528} +{"current_steps": 121735, "total_steps": 204665, "loss": 0.0304, "lr": 8.442714028926946e-07, "epoch": 2.9740063029829233, "percentage": 59.48, "elapsed_time": "2:37:01", "remaining_time": "1:46:57", "throughput": 8709.24, "total_tokens": 82050920} +{"current_steps": 121740, "total_steps": 204665, "loss": 0.0853, "lr": 8.441871662564712e-07, "epoch": 2.9741284538147705, "percentage": 59.48, "elapsed_time": "2:37:01", "remaining_time": "1:46:57", "throughput": 8709.27, "total_tokens": 82054120} +{"current_steps": 121745, "total_steps": 204665, "loss": 0.0006, "lr": 8.44102930753361e-07, "epoch": 2.9742506046466177, "percentage": 59.49, "elapsed_time": "2:37:01", "remaining_time": "1:46:57", "throughput": 8709.29, "total_tokens": 82057384} +{"current_steps": 121750, "total_steps": 204665, "loss": 0.0003, "lr": 8.44018696383976e-07, "epoch": 2.974372755478465, "percentage": 59.49, "elapsed_time": "2:37:02", "remaining_time": "1:46:56", "throughput": 8709.32, "total_tokens": 82060648} +{"current_steps": 121755, "total_steps": 204665, "loss": 0.0006, "lr": 8.439344631489287e-07, "epoch": 2.974494906310312, "percentage": 59.49, "elapsed_time": "2:37:02", "remaining_time": "1:46:56", "throughput": 8709.36, "total_tokens": 82064104} +{"current_steps": 121760, "total_steps": 204665, "loss": 0.0518, "lr": 8.438502310488326e-07, "epoch": 2.9746170571421593, "percentage": 59.49, "elapsed_time": "2:37:02", "remaining_time": "1:46:55", "throughput": 8709.37, "total_tokens": 82067112} +{"current_steps": 121765, "total_steps": 204665, "loss": 0.0008, "lr": 8.437660000842991e-07, "epoch": 2.9747392079740065, "percentage": 59.49, "elapsed_time": "2:37:03", "remaining_time": "1:46:55", "throughput": 8709.39, "total_tokens": 82070312} +{"current_steps": 121770, "total_steps": 204665, "loss": 0.1004, "lr": 8.436817702559417e-07, "epoch": 2.9748613588058532, "percentage": 59.5, "elapsed_time": "2:37:03", "remaining_time": "1:46:55", "throughput": 8709.4, "total_tokens": 82073384} +{"current_steps": 121775, "total_steps": 204665, "loss": 0.1113, "lr": 8.435975415643724e-07, "epoch": 2.974983509637701, "percentage": 59.5, "elapsed_time": "2:37:03", "remaining_time": "1:46:54", "throughput": 8709.46, "total_tokens": 82077032} +{"current_steps": 121780, "total_steps": 204665, "loss": 0.0006, "lr": 8.435133140102036e-07, "epoch": 2.9751056604695476, "percentage": 59.5, "elapsed_time": "2:37:04", "remaining_time": "1:46:54", "throughput": 8709.46, "total_tokens": 82080040} +{"current_steps": 121785, "total_steps": 204665, "loss": 0.0452, "lr": 8.434290875940483e-07, "epoch": 2.975227811301395, "percentage": 59.5, "elapsed_time": "2:37:04", "remaining_time": "1:46:53", "throughput": 8709.47, "total_tokens": 82083112} +{"current_steps": 121790, "total_steps": 204665, "loss": 0.0002, "lr": 8.433448623165185e-07, "epoch": 2.975349962133242, "percentage": 59.51, "elapsed_time": "2:37:04", "remaining_time": "1:46:53", "throughput": 8709.5, "total_tokens": 82086440} +{"current_steps": 121795, "total_steps": 204665, "loss": 0.0002, "lr": 8.432606381782275e-07, "epoch": 2.975472112965089, "percentage": 59.51, "elapsed_time": "2:37:05", "remaining_time": "1:46:53", "throughput": 8709.55, "total_tokens": 82089896} +{"current_steps": 121800, "total_steps": 204665, "loss": 0.0003, "lr": 8.431764151797867e-07, "epoch": 2.9755942637969364, "percentage": 59.51, "elapsed_time": "2:37:05", "remaining_time": "1:46:52", "throughput": 8709.54, "total_tokens": 82092712} +{"current_steps": 121805, "total_steps": 204665, "loss": 0.0001, "lr": 8.430921933218097e-07, "epoch": 2.9757164146287836, "percentage": 59.51, "elapsed_time": "2:37:05", "remaining_time": "1:46:52", "throughput": 8709.53, "total_tokens": 82095592} +{"current_steps": 121810, "total_steps": 204665, "loss": 0.0001, "lr": 8.430079726049081e-07, "epoch": 2.9758385654606307, "percentage": 59.52, "elapsed_time": "2:37:06", "remaining_time": "1:46:51", "throughput": 8709.57, "total_tokens": 82098984} +{"current_steps": 121815, "total_steps": 204665, "loss": 0.0666, "lr": 8.429237530296946e-07, "epoch": 2.975960716292478, "percentage": 59.52, "elapsed_time": "2:37:06", "remaining_time": "1:46:51", "throughput": 8709.59, "total_tokens": 82102184} +{"current_steps": 121820, "total_steps": 204665, "loss": 0.0001, "lr": 8.428395345967825e-07, "epoch": 2.976082867124325, "percentage": 59.52, "elapsed_time": "2:37:06", "remaining_time": "1:46:50", "throughput": 8709.59, "total_tokens": 82105192} +{"current_steps": 121825, "total_steps": 204665, "loss": 0.0002, "lr": 8.427553173067832e-07, "epoch": 2.9762050179561723, "percentage": 59.52, "elapsed_time": "2:37:07", "remaining_time": "1:46:50", "throughput": 8709.61, "total_tokens": 82108392} +{"current_steps": 121830, "total_steps": 204665, "loss": 0.0345, "lr": 8.426711011603094e-07, "epoch": 2.9763271687880195, "percentage": 59.53, "elapsed_time": "2:37:07", "remaining_time": "1:46:50", "throughput": 8709.67, "total_tokens": 82112040} +{"current_steps": 121835, "total_steps": 204665, "loss": 0.0001, "lr": 8.425868861579739e-07, "epoch": 2.9764493196198667, "percentage": 59.53, "elapsed_time": "2:37:08", "remaining_time": "1:46:49", "throughput": 8709.73, "total_tokens": 82115624} +{"current_steps": 121840, "total_steps": 204665, "loss": 0.0657, "lr": 8.425026723003889e-07, "epoch": 2.976571470451714, "percentage": 59.53, "elapsed_time": "2:37:08", "remaining_time": "1:46:49", "throughput": 8709.75, "total_tokens": 82118824} +{"current_steps": 121845, "total_steps": 204665, "loss": 0.0003, "lr": 8.424184595881666e-07, "epoch": 2.976693621283561, "percentage": 59.53, "elapsed_time": "2:37:08", "remaining_time": "1:46:48", "throughput": 8709.81, "total_tokens": 82122472} +{"current_steps": 121850, "total_steps": 204665, "loss": 0.0441, "lr": 8.423342480219195e-07, "epoch": 2.9768157721154083, "percentage": 59.54, "elapsed_time": "2:37:09", "remaining_time": "1:46:48", "throughput": 8709.88, "total_tokens": 82126248} +{"current_steps": 121855, "total_steps": 204665, "loss": 0.0734, "lr": 8.422500376022607e-07, "epoch": 2.976937922947255, "percentage": 59.54, "elapsed_time": "2:37:09", "remaining_time": "1:46:48", "throughput": 8709.92, "total_tokens": 82129704} +{"current_steps": 121860, "total_steps": 204665, "loss": 0.0002, "lr": 8.421658283298017e-07, "epoch": 2.9770600737791026, "percentage": 59.54, "elapsed_time": "2:37:09", "remaining_time": "1:46:47", "throughput": 8709.97, "total_tokens": 82133224} +{"current_steps": 121865, "total_steps": 204665, "loss": 0.0003, "lr": 8.420816202051555e-07, "epoch": 2.9771822246109494, "percentage": 59.54, "elapsed_time": "2:37:10", "remaining_time": "1:46:47", "throughput": 8710.01, "total_tokens": 82136616} +{"current_steps": 121870, "total_steps": 204665, "loss": 0.0505, "lr": 8.419974132289338e-07, "epoch": 2.977304375442797, "percentage": 59.55, "elapsed_time": "2:37:10", "remaining_time": "1:46:46", "throughput": 8710.05, "total_tokens": 82140136} +{"current_steps": 121875, "total_steps": 204665, "loss": 0.0423, "lr": 8.419132074017499e-07, "epoch": 2.9774265262746438, "percentage": 59.55, "elapsed_time": "2:37:10", "remaining_time": "1:46:46", "throughput": 8710.06, "total_tokens": 82143144} +{"current_steps": 121880, "total_steps": 204665, "loss": 0.0182, "lr": 8.418290027242153e-07, "epoch": 2.977548677106491, "percentage": 59.55, "elapsed_time": "2:37:11", "remaining_time": "1:46:45", "throughput": 8710.08, "total_tokens": 82146408} +{"current_steps": 121885, "total_steps": 204665, "loss": 0.0004, "lr": 8.417447991969429e-07, "epoch": 2.977670827938338, "percentage": 59.55, "elapsed_time": "2:37:11", "remaining_time": "1:46:45", "throughput": 8710.13, "total_tokens": 82149864} +{"current_steps": 121890, "total_steps": 204665, "loss": 0.0002, "lr": 8.41660596820545e-07, "epoch": 2.9777929787701853, "percentage": 59.56, "elapsed_time": "2:37:11", "remaining_time": "1:46:45", "throughput": 8710.19, "total_tokens": 82153512} +{"current_steps": 121895, "total_steps": 204665, "loss": 0.0526, "lr": 8.415763955956336e-07, "epoch": 2.9779151296020325, "percentage": 59.56, "elapsed_time": "2:37:12", "remaining_time": "1:46:44", "throughput": 8710.21, "total_tokens": 82156648} +{"current_steps": 121900, "total_steps": 204665, "loss": 0.0731, "lr": 8.414921955228216e-07, "epoch": 2.9780372804338797, "percentage": 59.56, "elapsed_time": "2:37:12", "remaining_time": "1:46:44", "throughput": 8710.2, "total_tokens": 82159528} +{"current_steps": 121905, "total_steps": 204665, "loss": 0.0002, "lr": 8.414079966027206e-07, "epoch": 2.978159431265727, "percentage": 59.56, "elapsed_time": "2:37:12", "remaining_time": "1:46:43", "throughput": 8710.24, "total_tokens": 82162984} +{"current_steps": 121910, "total_steps": 204665, "loss": 0.0691, "lr": 8.413237988359432e-07, "epoch": 2.978281582097574, "percentage": 59.57, "elapsed_time": "2:37:13", "remaining_time": "1:46:43", "throughput": 8710.28, "total_tokens": 82166440} +{"current_steps": 121915, "total_steps": 204665, "loss": 0.049, "lr": 8.412396022231023e-07, "epoch": 2.9784037329294213, "percentage": 59.57, "elapsed_time": "2:37:13", "remaining_time": "1:46:43", "throughput": 8710.38, "total_tokens": 82170472} +{"current_steps": 121920, "total_steps": 204665, "loss": 0.1157, "lr": 8.411554067648092e-07, "epoch": 2.9785258837612685, "percentage": 59.57, "elapsed_time": "2:37:13", "remaining_time": "1:46:42", "throughput": 8710.39, "total_tokens": 82173544} +{"current_steps": 121925, "total_steps": 204665, "loss": 0.0765, "lr": 8.410712124616773e-07, "epoch": 2.9786480345931157, "percentage": 59.57, "elapsed_time": "2:37:14", "remaining_time": "1:46:42", "throughput": 8710.46, "total_tokens": 82177384} +{"current_steps": 121930, "total_steps": 204665, "loss": 0.0004, "lr": 8.409870193143179e-07, "epoch": 2.978770185424963, "percentage": 59.58, "elapsed_time": "2:37:14", "remaining_time": "1:46:41", "throughput": 8710.5, "total_tokens": 82180776} +{"current_steps": 121935, "total_steps": 204665, "loss": 0.0006, "lr": 8.409028273233439e-07, "epoch": 2.97889233625681, "percentage": 59.58, "elapsed_time": "2:37:15", "remaining_time": "1:46:41", "throughput": 8710.53, "total_tokens": 82184168} +{"current_steps": 121940, "total_steps": 204665, "loss": 0.0001, "lr": 8.40818636489367e-07, "epoch": 2.9790144870886572, "percentage": 59.58, "elapsed_time": "2:37:15", "remaining_time": "1:46:41", "throughput": 8710.6, "total_tokens": 82187944} +{"current_steps": 121945, "total_steps": 204665, "loss": 0.0001, "lr": 8.407344468129998e-07, "epoch": 2.9791366379205044, "percentage": 59.58, "elapsed_time": "2:37:15", "remaining_time": "1:46:40", "throughput": 8710.65, "total_tokens": 82191528} +{"current_steps": 121950, "total_steps": 204665, "loss": 0.0849, "lr": 8.40650258294855e-07, "epoch": 2.979258788752351, "percentage": 59.59, "elapsed_time": "2:37:16", "remaining_time": "1:46:40", "throughput": 8710.7, "total_tokens": 82195048} +{"current_steps": 121955, "total_steps": 204665, "loss": 0.0, "lr": 8.405660709355439e-07, "epoch": 2.979380939584199, "percentage": 59.59, "elapsed_time": "2:37:16", "remaining_time": "1:46:39", "throughput": 8710.72, "total_tokens": 82198248} +{"current_steps": 121960, "total_steps": 204665, "loss": 0.0002, "lr": 8.404818847356796e-07, "epoch": 2.9795030904160456, "percentage": 59.59, "elapsed_time": "2:37:16", "remaining_time": "1:46:39", "throughput": 8710.76, "total_tokens": 82201704} +{"current_steps": 121965, "total_steps": 204665, "loss": 0.0003, "lr": 8.403976996958735e-07, "epoch": 2.9796252412478927, "percentage": 59.59, "elapsed_time": "2:37:17", "remaining_time": "1:46:38", "throughput": 8710.79, "total_tokens": 82204968} +{"current_steps": 121970, "total_steps": 204665, "loss": 0.0454, "lr": 8.403135158167382e-07, "epoch": 2.97974739207974, "percentage": 59.59, "elapsed_time": "2:37:17", "remaining_time": "1:46:38", "throughput": 8710.8, "total_tokens": 82208104} +{"current_steps": 121975, "total_steps": 204665, "loss": 0.0002, "lr": 8.402293330988866e-07, "epoch": 2.979869542911587, "percentage": 59.6, "elapsed_time": "2:37:17", "remaining_time": "1:46:38", "throughput": 8710.83, "total_tokens": 82211368} +{"current_steps": 121980, "total_steps": 204665, "loss": 0.1058, "lr": 8.401451515429299e-07, "epoch": 2.9799916937434343, "percentage": 59.6, "elapsed_time": "2:37:18", "remaining_time": "1:46:37", "throughput": 8710.84, "total_tokens": 82214440} +{"current_steps": 121985, "total_steps": 204665, "loss": 0.0618, "lr": 8.400609711494807e-07, "epoch": 2.9801138445752815, "percentage": 59.6, "elapsed_time": "2:37:18", "remaining_time": "1:46:37", "throughput": 8710.87, "total_tokens": 82217832} +{"current_steps": 121990, "total_steps": 204665, "loss": 0.0467, "lr": 8.399767919191511e-07, "epoch": 2.9802359954071287, "percentage": 59.6, "elapsed_time": "2:37:18", "remaining_time": "1:46:36", "throughput": 8710.93, "total_tokens": 82221416} +{"current_steps": 121995, "total_steps": 204665, "loss": 0.0016, "lr": 8.398926138525536e-07, "epoch": 2.980358146238976, "percentage": 59.61, "elapsed_time": "2:37:19", "remaining_time": "1:46:36", "throughput": 8710.93, "total_tokens": 82224360} +{"current_steps": 122000, "total_steps": 204665, "loss": 0.0424, "lr": 8.398084369502996e-07, "epoch": 2.980480297070823, "percentage": 59.61, "elapsed_time": "2:37:19", "remaining_time": "1:46:36", "throughput": 8710.95, "total_tokens": 82227560} +{"current_steps": 122005, "total_steps": 204665, "loss": 0.0332, "lr": 8.397242612130017e-07, "epoch": 2.9806024479026703, "percentage": 59.61, "elapsed_time": "2:37:19", "remaining_time": "1:46:35", "throughput": 8710.96, "total_tokens": 82230632} +{"current_steps": 122010, "total_steps": 204665, "loss": 0.0683, "lr": 8.396400866412725e-07, "epoch": 2.9807245987345174, "percentage": 59.61, "elapsed_time": "2:37:20", "remaining_time": "1:46:35", "throughput": 8711.0, "total_tokens": 82234088} +{"current_steps": 122015, "total_steps": 204665, "loss": 0.0003, "lr": 8.395559132357234e-07, "epoch": 2.9808467495663646, "percentage": 59.62, "elapsed_time": "2:37:20", "remaining_time": "1:46:34", "throughput": 8711.04, "total_tokens": 82237416} +{"current_steps": 122020, "total_steps": 204665, "loss": 0.0005, "lr": 8.394717409969671e-07, "epoch": 2.980968900398212, "percentage": 59.62, "elapsed_time": "2:37:20", "remaining_time": "1:46:34", "throughput": 8711.08, "total_tokens": 82240872} +{"current_steps": 122025, "total_steps": 204665, "loss": 0.1045, "lr": 8.393875699256152e-07, "epoch": 2.981091051230059, "percentage": 59.62, "elapsed_time": "2:37:21", "remaining_time": "1:46:34", "throughput": 8711.16, "total_tokens": 82244776} +{"current_steps": 122030, "total_steps": 204665, "loss": 0.056, "lr": 8.393034000222805e-07, "epoch": 2.981213202061906, "percentage": 59.62, "elapsed_time": "2:37:21", "remaining_time": "1:46:33", "throughput": 8711.18, "total_tokens": 82247912} +{"current_steps": 122035, "total_steps": 204665, "loss": 0.0002, "lr": 8.392192312875742e-07, "epoch": 2.981335352893753, "percentage": 59.63, "elapsed_time": "2:37:22", "remaining_time": "1:46:33", "throughput": 8711.24, "total_tokens": 82251624} +{"current_steps": 122040, "total_steps": 204665, "loss": 0.0001, "lr": 8.391350637221092e-07, "epoch": 2.9814575037256006, "percentage": 59.63, "elapsed_time": "2:37:22", "remaining_time": "1:46:32", "throughput": 8711.25, "total_tokens": 82254696} +{"current_steps": 122045, "total_steps": 204665, "loss": 0.0001, "lr": 8.390508973264974e-07, "epoch": 2.9815796545574473, "percentage": 59.63, "elapsed_time": "2:37:22", "remaining_time": "1:46:32", "throughput": 8711.3, "total_tokens": 82258216} +{"current_steps": 122050, "total_steps": 204665, "loss": 0.0971, "lr": 8.389667321013505e-07, "epoch": 2.981701805389295, "percentage": 59.63, "elapsed_time": "2:37:23", "remaining_time": "1:46:31", "throughput": 8711.33, "total_tokens": 82261544} +{"current_steps": 122055, "total_steps": 204665, "loss": 0.0002, "lr": 8.388825680472811e-07, "epoch": 2.9818239562211417, "percentage": 59.64, "elapsed_time": "2:37:23", "remaining_time": "1:46:31", "throughput": 8711.36, "total_tokens": 82264808} +{"current_steps": 122060, "total_steps": 204665, "loss": 0.0009, "lr": 8.387984051649006e-07, "epoch": 2.981946107052989, "percentage": 59.64, "elapsed_time": "2:37:23", "remaining_time": "1:46:31", "throughput": 8711.38, "total_tokens": 82268072} +{"current_steps": 122065, "total_steps": 204665, "loss": 0.0004, "lr": 8.387142434548216e-07, "epoch": 2.982068257884836, "percentage": 59.64, "elapsed_time": "2:37:24", "remaining_time": "1:46:30", "throughput": 8711.43, "total_tokens": 82271592} +{"current_steps": 122070, "total_steps": 204665, "loss": 0.0491, "lr": 8.386300829176563e-07, "epoch": 2.9821904087166833, "percentage": 59.64, "elapsed_time": "2:37:24", "remaining_time": "1:46:30", "throughput": 8711.42, "total_tokens": 82274408} +{"current_steps": 122075, "total_steps": 204665, "loss": 0.0003, "lr": 8.38545923554016e-07, "epoch": 2.9823125595485305, "percentage": 59.65, "elapsed_time": "2:37:24", "remaining_time": "1:46:29", "throughput": 8711.45, "total_tokens": 82277672} +{"current_steps": 122080, "total_steps": 204665, "loss": 0.0442, "lr": 8.384617653645136e-07, "epoch": 2.9824347103803777, "percentage": 59.65, "elapsed_time": "2:37:25", "remaining_time": "1:46:29", "throughput": 8711.49, "total_tokens": 82281064} +{"current_steps": 122085, "total_steps": 204665, "loss": 0.0342, "lr": 8.383776083497604e-07, "epoch": 2.982556861212225, "percentage": 59.65, "elapsed_time": "2:37:25", "remaining_time": "1:46:29", "throughput": 8711.58, "total_tokens": 82285096} +{"current_steps": 122090, "total_steps": 204665, "loss": 0.1432, "lr": 8.382934525103688e-07, "epoch": 2.982679012044072, "percentage": 59.65, "elapsed_time": "2:37:25", "remaining_time": "1:46:28", "throughput": 8711.6, "total_tokens": 82288296} +{"current_steps": 122095, "total_steps": 204665, "loss": 0.0003, "lr": 8.382092978469508e-07, "epoch": 2.9828011628759192, "percentage": 59.66, "elapsed_time": "2:37:26", "remaining_time": "1:46:28", "throughput": 8711.66, "total_tokens": 82291880} +{"current_steps": 122100, "total_steps": 204665, "loss": 0.0005, "lr": 8.381251443601181e-07, "epoch": 2.9829233137077664, "percentage": 59.66, "elapsed_time": "2:37:26", "remaining_time": "1:46:27", "throughput": 8711.69, "total_tokens": 82295208} +{"current_steps": 122105, "total_steps": 204665, "loss": 0.062, "lr": 8.380409920504832e-07, "epoch": 2.9830454645396136, "percentage": 59.66, "elapsed_time": "2:37:26", "remaining_time": "1:46:27", "throughput": 8711.73, "total_tokens": 82298664} +{"current_steps": 122110, "total_steps": 204665, "loss": 0.0502, "lr": 8.379568409186573e-07, "epoch": 2.983167615371461, "percentage": 59.66, "elapsed_time": "2:37:27", "remaining_time": "1:46:26", "throughput": 8711.76, "total_tokens": 82301992} +{"current_steps": 122115, "total_steps": 204665, "loss": 0.0002, "lr": 8.378726909652533e-07, "epoch": 2.983289766203308, "percentage": 59.67, "elapsed_time": "2:37:27", "remaining_time": "1:46:26", "throughput": 8711.81, "total_tokens": 82305448} +{"current_steps": 122120, "total_steps": 204665, "loss": 0.0539, "lr": 8.377885421908824e-07, "epoch": 2.9834119170351547, "percentage": 59.67, "elapsed_time": "2:37:27", "remaining_time": "1:46:26", "throughput": 8711.81, "total_tokens": 82308392} +{"current_steps": 122125, "total_steps": 204665, "loss": 0.034, "lr": 8.377043945961566e-07, "epoch": 2.9835340678670024, "percentage": 59.67, "elapsed_time": "2:37:28", "remaining_time": "1:46:25", "throughput": 8711.85, "total_tokens": 82311848} +{"current_steps": 122130, "total_steps": 204665, "loss": 0.0556, "lr": 8.376202481816888e-07, "epoch": 2.983656218698849, "percentage": 59.67, "elapsed_time": "2:37:28", "remaining_time": "1:46:25", "throughput": 8711.87, "total_tokens": 82315048} +{"current_steps": 122135, "total_steps": 204665, "loss": 0.0474, "lr": 8.375361029480898e-07, "epoch": 2.9837783695306968, "percentage": 59.68, "elapsed_time": "2:37:28", "remaining_time": "1:46:24", "throughput": 8711.9, "total_tokens": 82318312} +{"current_steps": 122140, "total_steps": 204665, "loss": 0.0002, "lr": 8.374519588959721e-07, "epoch": 2.9839005203625435, "percentage": 59.68, "elapsed_time": "2:37:29", "remaining_time": "1:46:24", "throughput": 8711.96, "total_tokens": 82322024} +{"current_steps": 122145, "total_steps": 204665, "loss": 0.0384, "lr": 8.373678160259474e-07, "epoch": 2.9840226711943907, "percentage": 59.68, "elapsed_time": "2:37:29", "remaining_time": "1:46:24", "throughput": 8712.02, "total_tokens": 82325672} +{"current_steps": 122150, "total_steps": 204665, "loss": 0.0317, "lr": 8.372836743386279e-07, "epoch": 2.984144822026238, "percentage": 59.68, "elapsed_time": "2:37:30", "remaining_time": "1:46:23", "throughput": 8712.06, "total_tokens": 82329128} +{"current_steps": 122155, "total_steps": 204665, "loss": 0.0001, "lr": 8.371995338346249e-07, "epoch": 2.984266972858085, "percentage": 59.69, "elapsed_time": "2:37:30", "remaining_time": "1:46:23", "throughput": 8712.08, "total_tokens": 82332264} +{"current_steps": 122160, "total_steps": 204665, "loss": 0.0006, "lr": 8.371153945145506e-07, "epoch": 2.9843891236899323, "percentage": 59.69, "elapsed_time": "2:37:30", "remaining_time": "1:46:22", "throughput": 8712.13, "total_tokens": 82335848} +{"current_steps": 122165, "total_steps": 204665, "loss": 0.1012, "lr": 8.370312563790174e-07, "epoch": 2.9845112745217794, "percentage": 59.69, "elapsed_time": "2:37:31", "remaining_time": "1:46:22", "throughput": 8712.16, "total_tokens": 82339176} +{"current_steps": 122170, "total_steps": 204665, "loss": 0.0005, "lr": 8.369471194286364e-07, "epoch": 2.9846334253536266, "percentage": 59.69, "elapsed_time": "2:37:31", "remaining_time": "1:46:22", "throughput": 8712.22, "total_tokens": 82342824} +{"current_steps": 122175, "total_steps": 204665, "loss": 0.0002, "lr": 8.368629836640202e-07, "epoch": 2.984755576185474, "percentage": 59.7, "elapsed_time": "2:37:31", "remaining_time": "1:46:21", "throughput": 8712.24, "total_tokens": 82346024} +{"current_steps": 122180, "total_steps": 204665, "loss": 0.0257, "lr": 8.367788490857798e-07, "epoch": 2.984877727017321, "percentage": 59.7, "elapsed_time": "2:37:32", "remaining_time": "1:46:21", "throughput": 8712.3, "total_tokens": 82349672} +{"current_steps": 122185, "total_steps": 204665, "loss": 0.0003, "lr": 8.366947156945279e-07, "epoch": 2.984999877849168, "percentage": 59.7, "elapsed_time": "2:37:32", "remaining_time": "1:46:20", "throughput": 8712.32, "total_tokens": 82352872} +{"current_steps": 122190, "total_steps": 204665, "loss": 0.0001, "lr": 8.366105834908756e-07, "epoch": 2.9851220286810154, "percentage": 59.7, "elapsed_time": "2:37:32", "remaining_time": "1:46:20", "throughput": 8712.35, "total_tokens": 82356200} +{"current_steps": 122195, "total_steps": 204665, "loss": 0.0564, "lr": 8.365264524754353e-07, "epoch": 2.9852441795128626, "percentage": 59.7, "elapsed_time": "2:37:33", "remaining_time": "1:46:19", "throughput": 8712.42, "total_tokens": 82359848} +{"current_steps": 122200, "total_steps": 204665, "loss": 0.0002, "lr": 8.364423226488187e-07, "epoch": 2.9853663303447098, "percentage": 59.71, "elapsed_time": "2:37:33", "remaining_time": "1:46:19", "throughput": 8712.45, "total_tokens": 82363176} +{"current_steps": 122205, "total_steps": 204665, "loss": 0.0343, "lr": 8.363581940116373e-07, "epoch": 2.985488481176557, "percentage": 59.71, "elapsed_time": "2:37:33", "remaining_time": "1:46:19", "throughput": 8712.49, "total_tokens": 82366632} +{"current_steps": 122210, "total_steps": 204665, "loss": 0.0003, "lr": 8.362740665645034e-07, "epoch": 2.985610632008404, "percentage": 59.71, "elapsed_time": "2:37:34", "remaining_time": "1:46:18", "throughput": 8712.54, "total_tokens": 82370216} +{"current_steps": 122215, "total_steps": 204665, "loss": 0.062, "lr": 8.361899403080282e-07, "epoch": 2.985732782840251, "percentage": 59.71, "elapsed_time": "2:37:34", "remaining_time": "1:46:18", "throughput": 8712.6, "total_tokens": 82373864} +{"current_steps": 122220, "total_steps": 204665, "loss": 0.0334, "lr": 8.361058152428238e-07, "epoch": 2.9858549336720985, "percentage": 59.72, "elapsed_time": "2:37:34", "remaining_time": "1:46:17", "throughput": 8712.65, "total_tokens": 82377384} +{"current_steps": 122225, "total_steps": 204665, "loss": 0.0373, "lr": 8.360216913695023e-07, "epoch": 2.9859770845039453, "percentage": 59.72, "elapsed_time": "2:37:35", "remaining_time": "1:46:17", "throughput": 8712.65, "total_tokens": 82380328} +{"current_steps": 122230, "total_steps": 204665, "loss": 0.1049, "lr": 8.359375686886748e-07, "epoch": 2.986099235335793, "percentage": 59.72, "elapsed_time": "2:37:35", "remaining_time": "1:46:17", "throughput": 8712.69, "total_tokens": 82383784} +{"current_steps": 122235, "total_steps": 204665, "loss": 0.0002, "lr": 8.358534472009538e-07, "epoch": 2.9862213861676397, "percentage": 59.72, "elapsed_time": "2:37:35", "remaining_time": "1:46:16", "throughput": 8712.76, "total_tokens": 82387624} +{"current_steps": 122240, "total_steps": 204665, "loss": 0.0445, "lr": 8.357693269069506e-07, "epoch": 2.986343536999487, "percentage": 59.73, "elapsed_time": "2:37:36", "remaining_time": "1:46:16", "throughput": 8712.81, "total_tokens": 82391144} +{"current_steps": 122245, "total_steps": 204665, "loss": 0.0002, "lr": 8.356852078072769e-07, "epoch": 2.986465687831334, "percentage": 59.73, "elapsed_time": "2:37:36", "remaining_time": "1:46:15", "throughput": 8712.83, "total_tokens": 82394280} +{"current_steps": 122250, "total_steps": 204665, "loss": 0.0002, "lr": 8.356010899025448e-07, "epoch": 2.9865878386631812, "percentage": 59.73, "elapsed_time": "2:37:37", "remaining_time": "1:46:15", "throughput": 8712.86, "total_tokens": 82397608} +{"current_steps": 122255, "total_steps": 204665, "loss": 0.0866, "lr": 8.355169731933654e-07, "epoch": 2.9867099894950284, "percentage": 59.73, "elapsed_time": "2:37:37", "remaining_time": "1:46:15", "throughput": 8712.89, "total_tokens": 82400936} +{"current_steps": 122260, "total_steps": 204665, "loss": 0.1082, "lr": 8.354328576803511e-07, "epoch": 2.9868321403268756, "percentage": 59.74, "elapsed_time": "2:37:37", "remaining_time": "1:46:14", "throughput": 8712.91, "total_tokens": 82404072} +{"current_steps": 122265, "total_steps": 204665, "loss": 0.001, "lr": 8.353487433641131e-07, "epoch": 2.986954291158723, "percentage": 59.74, "elapsed_time": "2:37:38", "remaining_time": "1:46:14", "throughput": 8712.98, "total_tokens": 82407912} +{"current_steps": 122270, "total_steps": 204665, "loss": 0.0002, "lr": 8.352646302452637e-07, "epoch": 2.98707644199057, "percentage": 59.74, "elapsed_time": "2:37:38", "remaining_time": "1:46:13", "throughput": 8713.0, "total_tokens": 82411112} +{"current_steps": 122275, "total_steps": 204665, "loss": 0.0992, "lr": 8.351805183244137e-07, "epoch": 2.987198592822417, "percentage": 59.74, "elapsed_time": "2:37:38", "remaining_time": "1:46:13", "throughput": 8713.05, "total_tokens": 82414696} +{"current_steps": 122280, "total_steps": 204665, "loss": 0.0001, "lr": 8.350964076021754e-07, "epoch": 2.9873207436542644, "percentage": 59.75, "elapsed_time": "2:37:39", "remaining_time": "1:46:12", "throughput": 8713.1, "total_tokens": 82418216} +{"current_steps": 122285, "total_steps": 204665, "loss": 0.0007, "lr": 8.350122980791608e-07, "epoch": 2.9874428944861116, "percentage": 59.75, "elapsed_time": "2:37:39", "remaining_time": "1:46:12", "throughput": 8713.12, "total_tokens": 82421352} +{"current_steps": 122290, "total_steps": 204665, "loss": 0.0323, "lr": 8.349281897559808e-07, "epoch": 2.9875650453179587, "percentage": 59.75, "elapsed_time": "2:37:39", "remaining_time": "1:46:12", "throughput": 8713.15, "total_tokens": 82424680} +{"current_steps": 122295, "total_steps": 204665, "loss": 0.0002, "lr": 8.348440826332477e-07, "epoch": 2.987687196149806, "percentage": 59.75, "elapsed_time": "2:37:40", "remaining_time": "1:46:11", "throughput": 8713.16, "total_tokens": 82427816} +{"current_steps": 122300, "total_steps": 204665, "loss": 0.0663, "lr": 8.347599767115726e-07, "epoch": 2.9878093469816527, "percentage": 59.76, "elapsed_time": "2:37:40", "remaining_time": "1:46:11", "throughput": 8713.22, "total_tokens": 82431464} +{"current_steps": 122305, "total_steps": 204665, "loss": 0.0001, "lr": 8.346758719915677e-07, "epoch": 2.9879314978135003, "percentage": 59.76, "elapsed_time": "2:37:40", "remaining_time": "1:46:10", "throughput": 8713.24, "total_tokens": 82434664} +{"current_steps": 122310, "total_steps": 204665, "loss": 0.0007, "lr": 8.345917684738439e-07, "epoch": 2.988053648645347, "percentage": 59.76, "elapsed_time": "2:37:41", "remaining_time": "1:46:10", "throughput": 8713.28, "total_tokens": 82438056} +{"current_steps": 122315, "total_steps": 204665, "loss": 0.0277, "lr": 8.345076661590133e-07, "epoch": 2.9881757994771947, "percentage": 59.76, "elapsed_time": "2:37:41", "remaining_time": "1:46:10", "throughput": 8713.31, "total_tokens": 82441448} +{"current_steps": 122320, "total_steps": 204665, "loss": 0.0001, "lr": 8.344235650476878e-07, "epoch": 2.9882979503090414, "percentage": 59.77, "elapsed_time": "2:37:41", "remaining_time": "1:46:09", "throughput": 8713.55, "total_tokens": 82447208} +{"current_steps": 122325, "total_steps": 204665, "loss": 0.0547, "lr": 8.343394651404783e-07, "epoch": 2.9884201011408886, "percentage": 59.77, "elapsed_time": "2:37:42", "remaining_time": "1:46:09", "throughput": 8713.57, "total_tokens": 82450472} +{"current_steps": 122330, "total_steps": 204665, "loss": 0.0001, "lr": 8.342553664379971e-07, "epoch": 2.988542251972736, "percentage": 59.77, "elapsed_time": "2:37:42", "remaining_time": "1:46:08", "throughput": 8713.64, "total_tokens": 82454184} +{"current_steps": 122335, "total_steps": 204665, "loss": 0.1056, "lr": 8.341712689408551e-07, "epoch": 2.988664402804583, "percentage": 59.77, "elapsed_time": "2:37:43", "remaining_time": "1:46:08", "throughput": 8713.68, "total_tokens": 82457640} +{"current_steps": 122340, "total_steps": 204665, "loss": 0.1443, "lr": 8.340871726496646e-07, "epoch": 2.98878655363643, "percentage": 59.78, "elapsed_time": "2:37:43", "remaining_time": "1:46:08", "throughput": 8713.71, "total_tokens": 82460904} +{"current_steps": 122345, "total_steps": 204665, "loss": 0.0491, "lr": 8.340030775650364e-07, "epoch": 2.9889087044682774, "percentage": 59.78, "elapsed_time": "2:37:43", "remaining_time": "1:46:07", "throughput": 8713.79, "total_tokens": 82464808} +{"current_steps": 122350, "total_steps": 204665, "loss": 0.0002, "lr": 8.339189836875827e-07, "epoch": 2.9890308553001246, "percentage": 59.78, "elapsed_time": "2:37:44", "remaining_time": "1:46:07", "throughput": 8713.82, "total_tokens": 82468136} +{"current_steps": 122355, "total_steps": 204665, "loss": 0.0624, "lr": 8.338348910179151e-07, "epoch": 2.9891530061319718, "percentage": 59.78, "elapsed_time": "2:37:44", "remaining_time": "1:46:06", "throughput": 8713.85, "total_tokens": 82471400} +{"current_steps": 122360, "total_steps": 204665, "loss": 0.0363, "lr": 8.337507995566444e-07, "epoch": 2.989275156963819, "percentage": 59.79, "elapsed_time": "2:37:44", "remaining_time": "1:46:06", "throughput": 8713.89, "total_tokens": 82474792} +{"current_steps": 122365, "total_steps": 204665, "loss": 0.0002, "lr": 8.33666709304383e-07, "epoch": 2.989397307795666, "percentage": 59.79, "elapsed_time": "2:37:45", "remaining_time": "1:46:06", "throughput": 8713.93, "total_tokens": 82478248} +{"current_steps": 122370, "total_steps": 204665, "loss": 0.0351, "lr": 8.335826202617416e-07, "epoch": 2.9895194586275133, "percentage": 59.79, "elapsed_time": "2:37:45", "remaining_time": "1:46:05", "throughput": 8713.99, "total_tokens": 82481960} +{"current_steps": 122375, "total_steps": 204665, "loss": 0.0016, "lr": 8.334985324293321e-07, "epoch": 2.9896416094593605, "percentage": 59.79, "elapsed_time": "2:37:45", "remaining_time": "1:46:05", "throughput": 8713.99, "total_tokens": 82484904} +{"current_steps": 122380, "total_steps": 204665, "loss": 0.0666, "lr": 8.334144458077665e-07, "epoch": 2.9897637602912077, "percentage": 59.8, "elapsed_time": "2:37:46", "remaining_time": "1:46:04", "throughput": 8714.04, "total_tokens": 82488488} +{"current_steps": 122385, "total_steps": 204665, "loss": 0.0276, "lr": 8.333303603976555e-07, "epoch": 2.989885911123055, "percentage": 59.8, "elapsed_time": "2:37:46", "remaining_time": "1:46:04", "throughput": 8714.04, "total_tokens": 82491432} +{"current_steps": 122390, "total_steps": 204665, "loss": 0.0001, "lr": 8.332462761996114e-07, "epoch": 2.990008061954902, "percentage": 59.8, "elapsed_time": "2:37:46", "remaining_time": "1:46:03", "throughput": 8714.06, "total_tokens": 82494568} +{"current_steps": 122395, "total_steps": 204665, "loss": 0.0772, "lr": 8.33162193214245e-07, "epoch": 2.990130212786749, "percentage": 59.8, "elapsed_time": "2:37:47", "remaining_time": "1:46:03", "throughput": 8714.07, "total_tokens": 82497640} +{"current_steps": 122400, "total_steps": 204665, "loss": 0.0973, "lr": 8.330781114421678e-07, "epoch": 2.9902523636185965, "percentage": 59.81, "elapsed_time": "2:37:47", "remaining_time": "1:46:03", "throughput": 8714.13, "total_tokens": 82501352} +{"current_steps": 122405, "total_steps": 204665, "loss": 0.0007, "lr": 8.329940308839918e-07, "epoch": 2.990374514450443, "percentage": 59.81, "elapsed_time": "2:37:47", "remaining_time": "1:46:02", "throughput": 8714.13, "total_tokens": 82504424} +{"current_steps": 122410, "total_steps": 204665, "loss": 0.0004, "lr": 8.329099515403277e-07, "epoch": 2.9904966652822904, "percentage": 59.81, "elapsed_time": "2:37:48", "remaining_time": "1:46:02", "throughput": 8714.17, "total_tokens": 82507752} +{"current_steps": 122415, "total_steps": 204665, "loss": 0.0001, "lr": 8.328258734117879e-07, "epoch": 2.9906188161141376, "percentage": 59.81, "elapsed_time": "2:37:48", "remaining_time": "1:46:01", "throughput": 8714.19, "total_tokens": 82510952} +{"current_steps": 122420, "total_steps": 204665, "loss": 0.0692, "lr": 8.327417964989827e-07, "epoch": 2.990740966945985, "percentage": 59.81, "elapsed_time": "2:37:48", "remaining_time": "1:46:01", "throughput": 8714.18, "total_tokens": 82513896} +{"current_steps": 122425, "total_steps": 204665, "loss": 0.0001, "lr": 8.326577208025247e-07, "epoch": 2.990863117777832, "percentage": 59.82, "elapsed_time": "2:37:49", "remaining_time": "1:46:01", "throughput": 8714.21, "total_tokens": 82517160} +{"current_steps": 122430, "total_steps": 204665, "loss": 0.0538, "lr": 8.325736463230244e-07, "epoch": 2.990985268609679, "percentage": 59.82, "elapsed_time": "2:37:49", "remaining_time": "1:46:00", "throughput": 8714.29, "total_tokens": 82521064} +{"current_steps": 122435, "total_steps": 204665, "loss": 0.0005, "lr": 8.324895730610939e-07, "epoch": 2.9911074194415264, "percentage": 59.82, "elapsed_time": "2:37:49", "remaining_time": "1:46:00", "throughput": 8714.31, "total_tokens": 82524264} +{"current_steps": 122440, "total_steps": 204665, "loss": 0.0017, "lr": 8.32405501017344e-07, "epoch": 2.9912295702733736, "percentage": 59.82, "elapsed_time": "2:37:50", "remaining_time": "1:45:59", "throughput": 8714.31, "total_tokens": 82527208} +{"current_steps": 122445, "total_steps": 204665, "loss": 0.0107, "lr": 8.323214301923865e-07, "epoch": 2.9913517211052207, "percentage": 59.83, "elapsed_time": "2:37:50", "remaining_time": "1:45:59", "throughput": 8714.34, "total_tokens": 82530472} +{"current_steps": 122450, "total_steps": 204665, "loss": 0.0723, "lr": 8.322373605868326e-07, "epoch": 2.991473871937068, "percentage": 59.83, "elapsed_time": "2:37:51", "remaining_time": "1:45:58", "throughput": 8714.39, "total_tokens": 82534056} +{"current_steps": 122455, "total_steps": 204665, "loss": 0.0001, "lr": 8.321532922012937e-07, "epoch": 2.991596022768915, "percentage": 59.83, "elapsed_time": "2:37:51", "remaining_time": "1:45:58", "throughput": 8714.46, "total_tokens": 82537832} +{"current_steps": 122460, "total_steps": 204665, "loss": 0.0827, "lr": 8.320692250363816e-07, "epoch": 2.9917181736007623, "percentage": 59.83, "elapsed_time": "2:37:51", "remaining_time": "1:45:58", "throughput": 8714.51, "total_tokens": 82541352} +{"current_steps": 122465, "total_steps": 204665, "loss": 0.0002, "lr": 8.319851590927067e-07, "epoch": 2.9918403244326095, "percentage": 59.84, "elapsed_time": "2:37:52", "remaining_time": "1:45:57", "throughput": 8714.54, "total_tokens": 82544744} +{"current_steps": 122470, "total_steps": 204665, "loss": 0.0006, "lr": 8.31901094370881e-07, "epoch": 2.9919624752644567, "percentage": 59.84, "elapsed_time": "2:37:52", "remaining_time": "1:45:57", "throughput": 8714.62, "total_tokens": 82548584} +{"current_steps": 122475, "total_steps": 204665, "loss": 0.0001, "lr": 8.318170308715161e-07, "epoch": 2.992084626096304, "percentage": 59.84, "elapsed_time": "2:37:52", "remaining_time": "1:45:56", "throughput": 8714.66, "total_tokens": 82551976} +{"current_steps": 122480, "total_steps": 204665, "loss": 0.1283, "lr": 8.317329685952226e-07, "epoch": 2.9922067769281506, "percentage": 59.84, "elapsed_time": "2:37:53", "remaining_time": "1:45:56", "throughput": 8714.69, "total_tokens": 82555304} +{"current_steps": 122485, "total_steps": 204665, "loss": 0.0862, "lr": 8.316489075426127e-07, "epoch": 2.9923289277599983, "percentage": 59.85, "elapsed_time": "2:37:53", "remaining_time": "1:45:56", "throughput": 8714.7, "total_tokens": 82558312} +{"current_steps": 122490, "total_steps": 204665, "loss": 0.0003, "lr": 8.315648477142967e-07, "epoch": 2.992451078591845, "percentage": 59.85, "elapsed_time": "2:37:53", "remaining_time": "1:45:55", "throughput": 8714.7, "total_tokens": 82561320} +{"current_steps": 122495, "total_steps": 204665, "loss": 0.0004, "lr": 8.314807891108869e-07, "epoch": 2.9925732294236926, "percentage": 59.85, "elapsed_time": "2:37:54", "remaining_time": "1:45:55", "throughput": 8714.74, "total_tokens": 82564776} +{"current_steps": 122500, "total_steps": 204665, "loss": 0.0001, "lr": 8.313967317329936e-07, "epoch": 2.9926953802555394, "percentage": 59.85, "elapsed_time": "2:37:54", "remaining_time": "1:45:54", "throughput": 8714.79, "total_tokens": 82568296} +{"current_steps": 122505, "total_steps": 204665, "loss": 0.0401, "lr": 8.313126755812289e-07, "epoch": 2.9928175310873866, "percentage": 59.86, "elapsed_time": "2:37:54", "remaining_time": "1:45:54", "throughput": 8714.8, "total_tokens": 82571304} +{"current_steps": 122510, "total_steps": 204665, "loss": 0.0003, "lr": 8.31228620656204e-07, "epoch": 2.9929396819192338, "percentage": 59.86, "elapsed_time": "2:37:55", "remaining_time": "1:45:54", "throughput": 8714.8, "total_tokens": 82574312} +{"current_steps": 122515, "total_steps": 204665, "loss": 0.0303, "lr": 8.311445669585297e-07, "epoch": 2.993061832751081, "percentage": 59.86, "elapsed_time": "2:37:55", "remaining_time": "1:45:53", "throughput": 8714.9, "total_tokens": 82578536} +{"current_steps": 122520, "total_steps": 204665, "loss": 0.0379, "lr": 8.310605144888177e-07, "epoch": 2.993183983582928, "percentage": 59.86, "elapsed_time": "2:37:55", "remaining_time": "1:45:53", "throughput": 8714.93, "total_tokens": 82581736} +{"current_steps": 122525, "total_steps": 204665, "loss": 0.0499, "lr": 8.309764632476788e-07, "epoch": 2.9933061344147753, "percentage": 59.87, "elapsed_time": "2:37:56", "remaining_time": "1:45:52", "throughput": 8714.94, "total_tokens": 82584872} +{"current_steps": 122530, "total_steps": 204665, "loss": 0.0547, "lr": 8.308924132357245e-07, "epoch": 2.9934282852466225, "percentage": 59.87, "elapsed_time": "2:37:56", "remaining_time": "1:45:52", "throughput": 8714.94, "total_tokens": 82587752} +{"current_steps": 122535, "total_steps": 204665, "loss": 0.0685, "lr": 8.308083644535665e-07, "epoch": 2.9935504360784697, "percentage": 59.87, "elapsed_time": "2:37:56", "remaining_time": "1:45:51", "throughput": 8714.96, "total_tokens": 82590952} +{"current_steps": 122540, "total_steps": 204665, "loss": 0.1417, "lr": 8.307243169018151e-07, "epoch": 2.993672586910317, "percentage": 59.87, "elapsed_time": "2:37:57", "remaining_time": "1:45:51", "throughput": 8714.99, "total_tokens": 82594344} +{"current_steps": 122545, "total_steps": 204665, "loss": 0.1196, "lr": 8.306402705810824e-07, "epoch": 2.993794737742164, "percentage": 59.88, "elapsed_time": "2:37:57", "remaining_time": "1:45:51", "throughput": 8715.04, "total_tokens": 82597864} +{"current_steps": 122550, "total_steps": 204665, "loss": 0.035, "lr": 8.305562254919791e-07, "epoch": 2.9939168885740113, "percentage": 59.88, "elapsed_time": "2:37:57", "remaining_time": "1:45:50", "throughput": 8715.06, "total_tokens": 82601064} +{"current_steps": 122555, "total_steps": 204665, "loss": 0.0011, "lr": 8.304721816351164e-07, "epoch": 2.9940390394058585, "percentage": 59.88, "elapsed_time": "2:37:58", "remaining_time": "1:45:50", "throughput": 8715.1, "total_tokens": 82604520} +{"current_steps": 122560, "total_steps": 204665, "loss": 0.0345, "lr": 8.303881390111056e-07, "epoch": 2.9941611902377057, "percentage": 59.88, "elapsed_time": "2:37:58", "remaining_time": "1:45:49", "throughput": 8715.12, "total_tokens": 82607720} +{"current_steps": 122565, "total_steps": 204665, "loss": 0.0002, "lr": 8.303040976205578e-07, "epoch": 2.9942833410695524, "percentage": 59.89, "elapsed_time": "2:37:59", "remaining_time": "1:45:49", "throughput": 8715.14, "total_tokens": 82610920} +{"current_steps": 122570, "total_steps": 204665, "loss": 0.0001, "lr": 8.302200574640845e-07, "epoch": 2.9944054919014, "percentage": 59.89, "elapsed_time": "2:37:59", "remaining_time": "1:45:49", "throughput": 8715.2, "total_tokens": 82614632} +{"current_steps": 122575, "total_steps": 204665, "loss": 0.0005, "lr": 8.301360185422963e-07, "epoch": 2.994527642733247, "percentage": 59.89, "elapsed_time": "2:37:59", "remaining_time": "1:45:48", "throughput": 8715.23, "total_tokens": 82617896} +{"current_steps": 122580, "total_steps": 204665, "loss": 0.1424, "lr": 8.30051980855805e-07, "epoch": 2.9946497935650944, "percentage": 59.89, "elapsed_time": "2:38:00", "remaining_time": "1:45:48", "throughput": 8715.29, "total_tokens": 82621608} +{"current_steps": 122585, "total_steps": 204665, "loss": 0.0424, "lr": 8.29967944405221e-07, "epoch": 2.994771944396941, "percentage": 59.9, "elapsed_time": "2:38:00", "remaining_time": "1:45:47", "throughput": 8715.3, "total_tokens": 82624680} +{"current_steps": 122590, "total_steps": 204665, "loss": 0.0853, "lr": 8.298839091911562e-07, "epoch": 2.9948940952287884, "percentage": 59.9, "elapsed_time": "2:38:00", "remaining_time": "1:45:47", "throughput": 8715.35, "total_tokens": 82628200} +{"current_steps": 122595, "total_steps": 204665, "loss": 0.0002, "lr": 8.297998752142211e-07, "epoch": 2.9950162460606355, "percentage": 59.9, "elapsed_time": "2:38:01", "remaining_time": "1:45:47", "throughput": 8715.4, "total_tokens": 82631720} +{"current_steps": 122600, "total_steps": 204665, "loss": 0.0006, "lr": 8.297158424750272e-07, "epoch": 2.9951383968924827, "percentage": 59.9, "elapsed_time": "2:38:01", "remaining_time": "1:45:46", "throughput": 8715.44, "total_tokens": 82635112} +{"current_steps": 122605, "total_steps": 204665, "loss": 0.1043, "lr": 8.296318109741856e-07, "epoch": 2.99526054772433, "percentage": 59.91, "elapsed_time": "2:38:01", "remaining_time": "1:45:46", "throughput": 8715.49, "total_tokens": 82638632} +{"current_steps": 122610, "total_steps": 204665, "loss": 0.0001, "lr": 8.295477807123071e-07, "epoch": 2.995382698556177, "percentage": 59.91, "elapsed_time": "2:38:02", "remaining_time": "1:45:45", "throughput": 8715.53, "total_tokens": 82642152} +{"current_steps": 122615, "total_steps": 204665, "loss": 0.1018, "lr": 8.294637516900034e-07, "epoch": 2.9955048493880243, "percentage": 59.91, "elapsed_time": "2:38:02", "remaining_time": "1:45:45", "throughput": 8715.57, "total_tokens": 82645544} +{"current_steps": 122620, "total_steps": 204665, "loss": 0.0574, "lr": 8.293797239078846e-07, "epoch": 2.9956270002198715, "percentage": 59.91, "elapsed_time": "2:38:02", "remaining_time": "1:45:44", "throughput": 8715.6, "total_tokens": 82648936} +{"current_steps": 122625, "total_steps": 204665, "loss": 0.0003, "lr": 8.292956973665624e-07, "epoch": 2.9957491510517187, "percentage": 59.91, "elapsed_time": "2:38:03", "remaining_time": "1:45:44", "throughput": 8715.62, "total_tokens": 82652008} +{"current_steps": 122630, "total_steps": 204665, "loss": 0.0001, "lr": 8.292116720666482e-07, "epoch": 2.995871301883566, "percentage": 59.92, "elapsed_time": "2:38:03", "remaining_time": "1:45:44", "throughput": 8715.64, "total_tokens": 82655336} +{"current_steps": 122635, "total_steps": 204665, "loss": 0.0002, "lr": 8.291276480087522e-07, "epoch": 2.995993452715413, "percentage": 59.92, "elapsed_time": "2:38:03", "remaining_time": "1:45:43", "throughput": 8715.64, "total_tokens": 82658280} +{"current_steps": 122640, "total_steps": 204665, "loss": 0.0589, "lr": 8.290436251934865e-07, "epoch": 2.9961156035472603, "percentage": 59.92, "elapsed_time": "2:38:04", "remaining_time": "1:45:43", "throughput": 8715.7, "total_tokens": 82661864} +{"current_steps": 122645, "total_steps": 204665, "loss": 0.0005, "lr": 8.289596036214609e-07, "epoch": 2.9962377543791074, "percentage": 59.92, "elapsed_time": "2:38:04", "remaining_time": "1:45:42", "throughput": 8715.71, "total_tokens": 82664936} +{"current_steps": 122650, "total_steps": 204665, "loss": 0.0001, "lr": 8.288755832932877e-07, "epoch": 2.9963599052109546, "percentage": 59.93, "elapsed_time": "2:38:04", "remaining_time": "1:45:42", "throughput": 8715.77, "total_tokens": 82668584} +{"current_steps": 122655, "total_steps": 204665, "loss": 0.0285, "lr": 8.287915642095766e-07, "epoch": 2.996482056042802, "percentage": 59.93, "elapsed_time": "2:38:05", "remaining_time": "1:45:42", "throughput": 8715.77, "total_tokens": 82671528} +{"current_steps": 122660, "total_steps": 204665, "loss": 0.0394, "lr": 8.287075463709396e-07, "epoch": 2.9966042068746486, "percentage": 59.93, "elapsed_time": "2:38:05", "remaining_time": "1:45:41", "throughput": 8715.78, "total_tokens": 82674600} +{"current_steps": 122665, "total_steps": 204665, "loss": 0.0007, "lr": 8.286235297779878e-07, "epoch": 2.996726357706496, "percentage": 59.93, "elapsed_time": "2:38:05", "remaining_time": "1:45:41", "throughput": 8715.8, "total_tokens": 82677800} +{"current_steps": 122670, "total_steps": 204665, "loss": 0.002, "lr": 8.285395144313312e-07, "epoch": 2.996848508538343, "percentage": 59.94, "elapsed_time": "2:38:06", "remaining_time": "1:45:40", "throughput": 8715.85, "total_tokens": 82681320} +{"current_steps": 122675, "total_steps": 204665, "loss": 0.0726, "lr": 8.284555003315819e-07, "epoch": 2.9969706593701906, "percentage": 59.94, "elapsed_time": "2:38:06", "remaining_time": "1:45:40", "throughput": 8715.83, "total_tokens": 82684072} +{"current_steps": 122680, "total_steps": 204665, "loss": 0.0866, "lr": 8.283714874793497e-07, "epoch": 2.9970928102020373, "percentage": 59.94, "elapsed_time": "2:38:07", "remaining_time": "1:45:40", "throughput": 8715.87, "total_tokens": 82687528} +{"current_steps": 122685, "total_steps": 204665, "loss": 0.0152, "lr": 8.282874758752464e-07, "epoch": 2.9972149610338845, "percentage": 59.94, "elapsed_time": "2:38:07", "remaining_time": "1:45:39", "throughput": 8715.92, "total_tokens": 82691048} +{"current_steps": 122690, "total_steps": 204665, "loss": 0.0684, "lr": 8.28203465519883e-07, "epoch": 2.9973371118657317, "percentage": 59.95, "elapsed_time": "2:38:07", "remaining_time": "1:45:39", "throughput": 8715.93, "total_tokens": 82694120} +{"current_steps": 122695, "total_steps": 204665, "loss": 0.0517, "lr": 8.2811945641387e-07, "epoch": 2.997459262697579, "percentage": 59.95, "elapsed_time": "2:38:08", "remaining_time": "1:45:38", "throughput": 8715.97, "total_tokens": 82697512} +{"current_steps": 122700, "total_steps": 204665, "loss": 0.0149, "lr": 8.280354485578188e-07, "epoch": 2.997581413529426, "percentage": 59.95, "elapsed_time": "2:38:08", "remaining_time": "1:45:38", "throughput": 8715.98, "total_tokens": 82700648} +{"current_steps": 122705, "total_steps": 204665, "loss": 0.0664, "lr": 8.2795144195234e-07, "epoch": 2.9977035643612733, "percentage": 59.95, "elapsed_time": "2:38:08", "remaining_time": "1:45:37", "throughput": 8716.04, "total_tokens": 82704232} +{"current_steps": 122710, "total_steps": 204665, "loss": 0.0004, "lr": 8.278674365980445e-07, "epoch": 2.9978257151931205, "percentage": 59.96, "elapsed_time": "2:38:09", "remaining_time": "1:45:37", "throughput": 8716.08, "total_tokens": 82707688} +{"current_steps": 122715, "total_steps": 204665, "loss": 0.0002, "lr": 8.277834324955433e-07, "epoch": 2.9979478660249677, "percentage": 59.96, "elapsed_time": "2:38:09", "remaining_time": "1:45:37", "throughput": 8716.09, "total_tokens": 82710760} +{"current_steps": 122720, "total_steps": 204665, "loss": 0.0002, "lr": 8.276994296454471e-07, "epoch": 2.998070016856815, "percentage": 59.96, "elapsed_time": "2:38:09", "remaining_time": "1:45:36", "throughput": 8716.11, "total_tokens": 82714024} +{"current_steps": 122725, "total_steps": 204665, "loss": 0.0016, "lr": 8.276154280483674e-07, "epoch": 2.998192167688662, "percentage": 59.96, "elapsed_time": "2:38:10", "remaining_time": "1:45:36", "throughput": 8716.14, "total_tokens": 82717352} +{"current_steps": 122730, "total_steps": 204665, "loss": 0.038, "lr": 8.275314277049144e-07, "epoch": 2.9983143185205092, "percentage": 59.97, "elapsed_time": "2:38:10", "remaining_time": "1:45:35", "throughput": 8716.17, "total_tokens": 82720616} +{"current_steps": 122735, "total_steps": 204665, "loss": 0.0933, "lr": 8.274474286156994e-07, "epoch": 2.9984364693523564, "percentage": 59.97, "elapsed_time": "2:38:10", "remaining_time": "1:45:35", "throughput": 8716.18, "total_tokens": 82723688} +{"current_steps": 122740, "total_steps": 204665, "loss": 0.0555, "lr": 8.273634307813329e-07, "epoch": 2.9985586201842036, "percentage": 59.97, "elapsed_time": "2:38:11", "remaining_time": "1:45:35", "throughput": 8716.2, "total_tokens": 82726888} +{"current_steps": 122745, "total_steps": 204665, "loss": 0.0492, "lr": 8.272794342024263e-07, "epoch": 2.9986807710160504, "percentage": 59.97, "elapsed_time": "2:38:11", "remaining_time": "1:45:34", "throughput": 8716.23, "total_tokens": 82730216} +{"current_steps": 122750, "total_steps": 204665, "loss": 0.0003, "lr": 8.271954388795897e-07, "epoch": 2.998802921847898, "percentage": 59.98, "elapsed_time": "2:38:11", "remaining_time": "1:45:34", "throughput": 8716.25, "total_tokens": 82733480} +{"current_steps": 122755, "total_steps": 204665, "loss": 0.0561, "lr": 8.271114448134345e-07, "epoch": 2.9989250726797447, "percentage": 59.98, "elapsed_time": "2:38:12", "remaining_time": "1:45:33", "throughput": 8716.27, "total_tokens": 82736680} +{"current_steps": 122760, "total_steps": 204665, "loss": 0.0001, "lr": 8.270274520045715e-07, "epoch": 2.9990472235115924, "percentage": 59.98, "elapsed_time": "2:38:12", "remaining_time": "1:45:33", "throughput": 8716.31, "total_tokens": 82740072} +{"current_steps": 122765, "total_steps": 204665, "loss": 0.0002, "lr": 8.269434604536113e-07, "epoch": 2.999169374343439, "percentage": 59.98, "elapsed_time": "2:38:12", "remaining_time": "1:45:32", "throughput": 8716.33, "total_tokens": 82743272} +{"current_steps": 122770, "total_steps": 204665, "loss": 0.0002, "lr": 8.268594701611651e-07, "epoch": 2.9992915251752863, "percentage": 59.99, "elapsed_time": "2:38:13", "remaining_time": "1:45:32", "throughput": 8716.36, "total_tokens": 82746600} +{"current_steps": 122775, "total_steps": 204665, "loss": 0.0518, "lr": 8.267754811278429e-07, "epoch": 2.9994136760071335, "percentage": 59.99, "elapsed_time": "2:38:13", "remaining_time": "1:45:32", "throughput": 8716.41, "total_tokens": 82750248} +{"current_steps": 122780, "total_steps": 204665, "loss": 0.0044, "lr": 8.266914933542559e-07, "epoch": 2.9995358268389807, "percentage": 59.99, "elapsed_time": "2:38:13", "remaining_time": "1:45:31", "throughput": 8716.46, "total_tokens": 82753768} +{"current_steps": 122785, "total_steps": 204665, "loss": 0.1181, "lr": 8.266075068410156e-07, "epoch": 2.999657977670828, "percentage": 59.99, "elapsed_time": "2:38:14", "remaining_time": "1:45:31", "throughput": 8716.52, "total_tokens": 82757416} +{"current_steps": 122790, "total_steps": 204665, "loss": 0.0627, "lr": 8.265235215887317e-07, "epoch": 2.999780128502675, "percentage": 60.0, "elapsed_time": "2:38:14", "remaining_time": "1:45:30", "throughput": 8716.55, "total_tokens": 82760936} +{"current_steps": 122795, "total_steps": 204665, "loss": 0.0001, "lr": 8.264395375980156e-07, "epoch": 2.9999022793345222, "percentage": 60.0, "elapsed_time": "2:38:15", "remaining_time": "1:45:30", "throughput": 8716.56, "total_tokens": 82763880} +{"current_steps": 122800, "total_steps": 204665, "loss": 0.0002, "lr": 8.263555548694777e-07, "epoch": 3.0000244301663694, "percentage": 60.0, "elapsed_time": "2:38:15", "remaining_time": "1:45:30", "throughput": 8716.45, "total_tokens": 82767184} +{"current_steps": 122805, "total_steps": 204665, "loss": 0.0003, "lr": 8.262715734037292e-07, "epoch": 3.0001465809982166, "percentage": 60.0, "elapsed_time": "2:38:15", "remaining_time": "1:45:29", "throughput": 8716.44, "total_tokens": 82770064} +{"current_steps": 122808, "total_steps": 204665, "eval_loss": 0.19360828399658203, "epoch": 3.000219871497325, "percentage": 60.0, "elapsed_time": "2:39:03", "remaining_time": "1:46:01", "throughput": 8672.92, "total_tokens": 82772304} +{"current_steps": 122810, "total_steps": 204665, "loss": 0.0303, "lr": 8.261875932013802e-07, "epoch": 3.000268731830064, "percentage": 60.01, "elapsed_time": "2:39:39", "remaining_time": "1:46:24", "throughput": 8641.19, "total_tokens": 82774032} +{"current_steps": 122815, "total_steps": 204665, "loss": 0.0002, "lr": 8.26103614263042e-07, "epoch": 3.000390882661911, "percentage": 60.01, "elapsed_time": "2:39:39", "remaining_time": "1:46:24", "throughput": 8641.26, "total_tokens": 82777744} +{"current_steps": 122820, "total_steps": 204665, "loss": 0.0634, "lr": 8.260196365893252e-07, "epoch": 3.000513033493758, "percentage": 60.01, "elapsed_time": "2:39:39", "remaining_time": "1:46:23", "throughput": 8641.29, "total_tokens": 82781072} +{"current_steps": 122825, "total_steps": 204665, "loss": 0.0001, "lr": 8.2593566018084e-07, "epoch": 3.0006351843256054, "percentage": 60.01, "elapsed_time": "2:39:40", "remaining_time": "1:46:23", "throughput": 8641.33, "total_tokens": 82784400} +{"current_steps": 122830, "total_steps": 204665, "loss": 0.0001, "lr": 8.25851685038198e-07, "epoch": 3.0007573351574526, "percentage": 60.02, "elapsed_time": "2:39:40", "remaining_time": "1:46:22", "throughput": 8641.34, "total_tokens": 82787472} +{"current_steps": 122835, "total_steps": 204665, "loss": 0.0002, "lr": 8.257677111620089e-07, "epoch": 3.0008794859892998, "percentage": 60.02, "elapsed_time": "2:39:40", "remaining_time": "1:46:22", "throughput": 8641.38, "total_tokens": 82790864} +{"current_steps": 122840, "total_steps": 204665, "loss": 0.0135, "lr": 8.256837385528839e-07, "epoch": 3.001001636821147, "percentage": 60.02, "elapsed_time": "2:39:41", "remaining_time": "1:46:22", "throughput": 8641.43, "total_tokens": 82794384} +{"current_steps": 122845, "total_steps": 204665, "loss": 0.0002, "lr": 8.25599767211434e-07, "epoch": 3.0011237876529937, "percentage": 60.02, "elapsed_time": "2:39:41", "remaining_time": "1:46:21", "throughput": 8641.5, "total_tokens": 82798288} +{"current_steps": 122850, "total_steps": 204665, "loss": 0.0, "lr": 8.255157971382691e-07, "epoch": 3.001245938484841, "percentage": 60.02, "elapsed_time": "2:39:41", "remaining_time": "1:46:21", "throughput": 8641.5, "total_tokens": 82801232} +{"current_steps": 122855, "total_steps": 204665, "loss": 0.0683, "lr": 8.254318283340007e-07, "epoch": 3.001368089316688, "percentage": 60.03, "elapsed_time": "2:39:42", "remaining_time": "1:46:20", "throughput": 8641.51, "total_tokens": 82804304} +{"current_steps": 122860, "total_steps": 204665, "loss": 0.0001, "lr": 8.253478607992388e-07, "epoch": 3.0014902401485353, "percentage": 60.03, "elapsed_time": "2:39:42", "remaining_time": "1:46:20", "throughput": 8641.54, "total_tokens": 82807568} +{"current_steps": 122865, "total_steps": 204665, "loss": 0.0001, "lr": 8.252638945345942e-07, "epoch": 3.0016123909803825, "percentage": 60.03, "elapsed_time": "2:39:42", "remaining_time": "1:46:19", "throughput": 8641.57, "total_tokens": 82810768} +{"current_steps": 122870, "total_steps": 204665, "loss": 0.0001, "lr": 8.251799295406776e-07, "epoch": 3.0017345418122297, "percentage": 60.03, "elapsed_time": "2:39:43", "remaining_time": "1:46:19", "throughput": 8641.6, "total_tokens": 82814096} +{"current_steps": 122875, "total_steps": 204665, "loss": 0.0, "lr": 8.250959658180993e-07, "epoch": 3.001856692644077, "percentage": 60.04, "elapsed_time": "2:39:43", "remaining_time": "1:46:19", "throughput": 8641.67, "total_tokens": 82817872} +{"current_steps": 122880, "total_steps": 204665, "loss": 0.0002, "lr": 8.250120033674706e-07, "epoch": 3.001978843475924, "percentage": 60.04, "elapsed_time": "2:39:43", "remaining_time": "1:46:18", "throughput": 8641.73, "total_tokens": 82821456} +{"current_steps": 122885, "total_steps": 204665, "loss": 0.0001, "lr": 8.249280421894012e-07, "epoch": 3.002100994307771, "percentage": 60.04, "elapsed_time": "2:39:44", "remaining_time": "1:46:18", "throughput": 8641.75, "total_tokens": 82824720} +{"current_steps": 122890, "total_steps": 204665, "loss": 0.0, "lr": 8.248440822845028e-07, "epoch": 3.0022231451396184, "percentage": 60.04, "elapsed_time": "2:39:44", "remaining_time": "1:46:17", "throughput": 8641.82, "total_tokens": 82828432} +{"current_steps": 122895, "total_steps": 204665, "loss": 0.001, "lr": 8.247601236533848e-07, "epoch": 3.0023452959714656, "percentage": 60.05, "elapsed_time": "2:39:44", "remaining_time": "1:46:17", "throughput": 8641.83, "total_tokens": 82831440} +{"current_steps": 122900, "total_steps": 204665, "loss": 0.0001, "lr": 8.246761662966587e-07, "epoch": 3.002467446803313, "percentage": 60.05, "elapsed_time": "2:39:45", "remaining_time": "1:46:17", "throughput": 8641.89, "total_tokens": 82835152} +{"current_steps": 122905, "total_steps": 204665, "loss": 0.0001, "lr": 8.245922102149343e-07, "epoch": 3.00258959763516, "percentage": 60.05, "elapsed_time": "2:39:45", "remaining_time": "1:46:16", "throughput": 8641.95, "total_tokens": 82838800} +{"current_steps": 122910, "total_steps": 204665, "loss": 0.0001, "lr": 8.245082554088228e-07, "epoch": 3.002711748467007, "percentage": 60.05, "elapsed_time": "2:39:46", "remaining_time": "1:46:16", "throughput": 8641.97, "total_tokens": 82841936} +{"current_steps": 122915, "total_steps": 204665, "loss": 0.0001, "lr": 8.244243018789343e-07, "epoch": 3.0028338992988544, "percentage": 60.06, "elapsed_time": "2:39:46", "remaining_time": "1:46:15", "throughput": 8642.01, "total_tokens": 82845328} +{"current_steps": 122920, "total_steps": 204665, "loss": 0.0391, "lr": 8.243403496258797e-07, "epoch": 3.0029560501307015, "percentage": 60.06, "elapsed_time": "2:39:46", "remaining_time": "1:46:15", "throughput": 8642.03, "total_tokens": 82848592} +{"current_steps": 122925, "total_steps": 204665, "loss": 0.0016, "lr": 8.242563986502693e-07, "epoch": 3.0030782009625487, "percentage": 60.06, "elapsed_time": "2:39:47", "remaining_time": "1:46:14", "throughput": 8642.11, "total_tokens": 82852368} +{"current_steps": 122930, "total_steps": 204665, "loss": 0.0001, "lr": 8.241724489527134e-07, "epoch": 3.003200351794396, "percentage": 60.06, "elapsed_time": "2:39:47", "remaining_time": "1:46:14", "throughput": 8642.17, "total_tokens": 82856016} +{"current_steps": 122935, "total_steps": 204665, "loss": 0.0094, "lr": 8.240885005338227e-07, "epoch": 3.0033225026262427, "percentage": 60.07, "elapsed_time": "2:39:47", "remaining_time": "1:46:14", "throughput": 8642.2, "total_tokens": 82859344} +{"current_steps": 122940, "total_steps": 204665, "loss": 0.0081, "lr": 8.240045533942081e-07, "epoch": 3.00344465345809, "percentage": 60.07, "elapsed_time": "2:39:48", "remaining_time": "1:46:13", "throughput": 8642.2, "total_tokens": 82862288} +{"current_steps": 122945, "total_steps": 204665, "loss": 0.0, "lr": 8.239206075344793e-07, "epoch": 3.003566804289937, "percentage": 60.07, "elapsed_time": "2:39:48", "remaining_time": "1:46:13", "throughput": 8642.21, "total_tokens": 82865296} +{"current_steps": 122950, "total_steps": 204665, "loss": 0.0001, "lr": 8.238366629552478e-07, "epoch": 3.0036889551217842, "percentage": 60.07, "elapsed_time": "2:39:48", "remaining_time": "1:46:12", "throughput": 8642.27, "total_tokens": 82868880} +{"current_steps": 122955, "total_steps": 204665, "loss": 0.0431, "lr": 8.237527196571229e-07, "epoch": 3.0038111059536314, "percentage": 60.08, "elapsed_time": "2:39:49", "remaining_time": "1:46:12", "throughput": 8642.29, "total_tokens": 82872144} +{"current_steps": 122960, "total_steps": 204665, "loss": 0.0001, "lr": 8.23668777640716e-07, "epoch": 3.0039332567854786, "percentage": 60.08, "elapsed_time": "2:39:49", "remaining_time": "1:46:12", "throughput": 8642.31, "total_tokens": 82875216} +{"current_steps": 122965, "total_steps": 204665, "loss": 0.0294, "lr": 8.23584836906637e-07, "epoch": 3.004055407617326, "percentage": 60.08, "elapsed_time": "2:39:49", "remaining_time": "1:46:11", "throughput": 8642.36, "total_tokens": 82878736} +{"current_steps": 122970, "total_steps": 204665, "loss": 0.0, "lr": 8.235008974554964e-07, "epoch": 3.004177558449173, "percentage": 60.08, "elapsed_time": "2:39:50", "remaining_time": "1:46:11", "throughput": 8642.41, "total_tokens": 82882384} +{"current_steps": 122975, "total_steps": 204665, "loss": 0.0, "lr": 8.234169592879053e-07, "epoch": 3.00429970928102, "percentage": 60.09, "elapsed_time": "2:39:50", "remaining_time": "1:46:10", "throughput": 8642.44, "total_tokens": 82885648} +{"current_steps": 122980, "total_steps": 204665, "loss": 0.0002, "lr": 8.233330224044728e-07, "epoch": 3.0044218601128674, "percentage": 60.09, "elapsed_time": "2:39:50", "remaining_time": "1:46:10", "throughput": 8642.48, "total_tokens": 82889104} +{"current_steps": 122985, "total_steps": 204665, "loss": 0.0, "lr": 8.232490868058106e-07, "epoch": 3.0045440109447146, "percentage": 60.09, "elapsed_time": "2:39:51", "remaining_time": "1:46:09", "throughput": 8642.53, "total_tokens": 82892688} +{"current_steps": 122990, "total_steps": 204665, "loss": 0.0001, "lr": 8.231651524925283e-07, "epoch": 3.0046661617765618, "percentage": 60.09, "elapsed_time": "2:39:51", "remaining_time": "1:46:09", "throughput": 8642.55, "total_tokens": 82895888} +{"current_steps": 122995, "total_steps": 204665, "loss": 0.0002, "lr": 8.230812194652369e-07, "epoch": 3.004788312608409, "percentage": 60.1, "elapsed_time": "2:39:51", "remaining_time": "1:46:09", "throughput": 8642.56, "total_tokens": 82899024} +{"current_steps": 123000, "total_steps": 204665, "loss": 0.0, "lr": 8.229972877245461e-07, "epoch": 3.004910463440256, "percentage": 60.1, "elapsed_time": "2:39:52", "remaining_time": "1:46:08", "throughput": 8642.59, "total_tokens": 82902352} +{"current_steps": 123005, "total_steps": 204665, "loss": 0.0, "lr": 8.229133572710665e-07, "epoch": 3.0050326142721033, "percentage": 60.1, "elapsed_time": "2:39:52", "remaining_time": "1:46:08", "throughput": 8642.61, "total_tokens": 82905424} +{"current_steps": 123010, "total_steps": 204665, "loss": 0.0001, "lr": 8.228294281054091e-07, "epoch": 3.0051547651039505, "percentage": 60.1, "elapsed_time": "2:39:52", "remaining_time": "1:46:07", "throughput": 8642.61, "total_tokens": 82908432} +{"current_steps": 123015, "total_steps": 204665, "loss": 0.0002, "lr": 8.227455002281835e-07, "epoch": 3.0052769159357977, "percentage": 60.11, "elapsed_time": "2:39:53", "remaining_time": "1:46:07", "throughput": 8642.66, "total_tokens": 82911888} +{"current_steps": 123020, "total_steps": 204665, "loss": 0.0, "lr": 8.226615736400004e-07, "epoch": 3.005399066767645, "percentage": 60.11, "elapsed_time": "2:39:53", "remaining_time": "1:46:07", "throughput": 8642.68, "total_tokens": 82915152} +{"current_steps": 123025, "total_steps": 204665, "loss": 0.0001, "lr": 8.225776483414699e-07, "epoch": 3.0055212175994916, "percentage": 60.11, "elapsed_time": "2:39:54", "remaining_time": "1:46:06", "throughput": 8642.71, "total_tokens": 82918416} +{"current_steps": 123030, "total_steps": 204665, "loss": 0.0, "lr": 8.224937243332024e-07, "epoch": 3.005643368431339, "percentage": 60.11, "elapsed_time": "2:39:54", "remaining_time": "1:46:06", "throughput": 8642.78, "total_tokens": 82922128} +{"current_steps": 123035, "total_steps": 204665, "loss": 0.0001, "lr": 8.224098016158087e-07, "epoch": 3.005765519263186, "percentage": 60.12, "elapsed_time": "2:39:54", "remaining_time": "1:46:05", "throughput": 8642.81, "total_tokens": 82925456} +{"current_steps": 123040, "total_steps": 204665, "loss": 0.0476, "lr": 8.223258801898981e-07, "epoch": 3.005887670095033, "percentage": 60.12, "elapsed_time": "2:39:55", "remaining_time": "1:46:05", "throughput": 8642.83, "total_tokens": 82928528} +{"current_steps": 123045, "total_steps": 204665, "loss": 0.0001, "lr": 8.22241960056082e-07, "epoch": 3.0060098209268804, "percentage": 60.12, "elapsed_time": "2:39:55", "remaining_time": "1:46:04", "throughput": 8642.88, "total_tokens": 82932112} +{"current_steps": 123050, "total_steps": 204665, "loss": 0.0307, "lr": 8.221580412149697e-07, "epoch": 3.0061319717587276, "percentage": 60.12, "elapsed_time": "2:39:55", "remaining_time": "1:46:04", "throughput": 8642.89, "total_tokens": 82935184} +{"current_steps": 123055, "total_steps": 204665, "loss": 0.0001, "lr": 8.220741236671726e-07, "epoch": 3.006254122590575, "percentage": 60.13, "elapsed_time": "2:39:56", "remaining_time": "1:46:04", "throughput": 8642.94, "total_tokens": 82938704} +{"current_steps": 123060, "total_steps": 204665, "loss": 0.0, "lr": 8.219902074132996e-07, "epoch": 3.006376273422422, "percentage": 60.13, "elapsed_time": "2:39:56", "remaining_time": "1:46:03", "throughput": 8642.96, "total_tokens": 82941840} +{"current_steps": 123065, "total_steps": 204665, "loss": 0.0003, "lr": 8.219062924539621e-07, "epoch": 3.006498424254269, "percentage": 60.13, "elapsed_time": "2:39:56", "remaining_time": "1:46:03", "throughput": 8642.97, "total_tokens": 82944912} +{"current_steps": 123070, "total_steps": 204665, "loss": 0.0001, "lr": 8.218223787897699e-07, "epoch": 3.0066205750861164, "percentage": 60.13, "elapsed_time": "2:39:57", "remaining_time": "1:46:02", "throughput": 8642.95, "total_tokens": 82947600} +{"current_steps": 123075, "total_steps": 204665, "loss": 0.0, "lr": 8.217384664213332e-07, "epoch": 3.0067427259179635, "percentage": 60.13, "elapsed_time": "2:39:57", "remaining_time": "1:46:02", "throughput": 8642.98, "total_tokens": 82950928} +{"current_steps": 123080, "total_steps": 204665, "loss": 0.0001, "lr": 8.216545553492626e-07, "epoch": 3.0068648767498107, "percentage": 60.14, "elapsed_time": "2:39:57", "remaining_time": "1:46:02", "throughput": 8643.01, "total_tokens": 82954256} +{"current_steps": 123085, "total_steps": 204665, "loss": 0.0, "lr": 8.215706455741677e-07, "epoch": 3.006987027581658, "percentage": 60.14, "elapsed_time": "2:39:58", "remaining_time": "1:46:01", "throughput": 8643.07, "total_tokens": 82957904} +{"current_steps": 123090, "total_steps": 204665, "loss": 0.0, "lr": 8.214867370966589e-07, "epoch": 3.007109178413505, "percentage": 60.14, "elapsed_time": "2:39:58", "remaining_time": "1:46:01", "throughput": 8643.13, "total_tokens": 82961616} +{"current_steps": 123095, "total_steps": 204665, "loss": 0.0, "lr": 8.214028299173471e-07, "epoch": 3.0072313292453523, "percentage": 60.14, "elapsed_time": "2:39:58", "remaining_time": "1:46:00", "throughput": 8643.25, "total_tokens": 82966032} +{"current_steps": 123100, "total_steps": 204665, "loss": 0.0001, "lr": 8.213189240368416e-07, "epoch": 3.0073534800771995, "percentage": 60.15, "elapsed_time": "2:39:59", "remaining_time": "1:46:00", "throughput": 8643.29, "total_tokens": 82969424} +{"current_steps": 123105, "total_steps": 204665, "loss": 0.0028, "lr": 8.212350194557532e-07, "epoch": 3.0074756309090467, "percentage": 60.15, "elapsed_time": "2:39:59", "remaining_time": "1:46:00", "throughput": 8643.36, "total_tokens": 82973264} +{"current_steps": 123110, "total_steps": 204665, "loss": 0.0, "lr": 8.211511161746914e-07, "epoch": 3.007597781740894, "percentage": 60.15, "elapsed_time": "2:40:00", "remaining_time": "1:45:59", "throughput": 8643.39, "total_tokens": 82976592} +{"current_steps": 123115, "total_steps": 204665, "loss": 0.0028, "lr": 8.210672141942674e-07, "epoch": 3.0077199325727406, "percentage": 60.15, "elapsed_time": "2:40:00", "remaining_time": "1:45:59", "throughput": 8643.41, "total_tokens": 82979856} +{"current_steps": 123120, "total_steps": 204665, "loss": 0.1153, "lr": 8.209833135150901e-07, "epoch": 3.007842083404588, "percentage": 60.16, "elapsed_time": "2:40:00", "remaining_time": "1:45:58", "throughput": 8643.44, "total_tokens": 82983184} +{"current_steps": 123125, "total_steps": 204665, "loss": 0.0, "lr": 8.208994141377706e-07, "epoch": 3.007964234236435, "percentage": 60.16, "elapsed_time": "2:40:01", "remaining_time": "1:45:58", "throughput": 8643.45, "total_tokens": 82986256} +{"current_steps": 123130, "total_steps": 204665, "loss": 0.0001, "lr": 8.20815516062919e-07, "epoch": 3.008086385068282, "percentage": 60.16, "elapsed_time": "2:40:01", "remaining_time": "1:45:57", "throughput": 8643.51, "total_tokens": 82990032} +{"current_steps": 123135, "total_steps": 204665, "loss": 0.0001, "lr": 8.207316192911447e-07, "epoch": 3.0082085359001294, "percentage": 60.16, "elapsed_time": "2:40:01", "remaining_time": "1:45:57", "throughput": 8643.59, "total_tokens": 82993936} +{"current_steps": 123140, "total_steps": 204665, "loss": 0.0001, "lr": 8.206477238230587e-07, "epoch": 3.0083306867319766, "percentage": 60.17, "elapsed_time": "2:40:02", "remaining_time": "1:45:57", "throughput": 8643.6, "total_tokens": 82997072} +{"current_steps": 123145, "total_steps": 204665, "loss": 0.0001, "lr": 8.205638296592703e-07, "epoch": 3.0084528375638238, "percentage": 60.17, "elapsed_time": "2:40:02", "remaining_time": "1:45:56", "throughput": 8643.64, "total_tokens": 83000528} +{"current_steps": 123150, "total_steps": 204665, "loss": 0.0002, "lr": 8.204799368003903e-07, "epoch": 3.008574988395671, "percentage": 60.17, "elapsed_time": "2:40:02", "remaining_time": "1:45:56", "throughput": 8643.66, "total_tokens": 83003728} +{"current_steps": 123155, "total_steps": 204665, "loss": 0.0001, "lr": 8.203960452470282e-07, "epoch": 3.008697139227518, "percentage": 60.17, "elapsed_time": "2:40:03", "remaining_time": "1:45:55", "throughput": 8643.68, "total_tokens": 83006928} +{"current_steps": 123160, "total_steps": 204665, "loss": 0.0764, "lr": 8.203121549997942e-07, "epoch": 3.0088192900593653, "percentage": 60.18, "elapsed_time": "2:40:03", "remaining_time": "1:45:55", "throughput": 8643.68, "total_tokens": 83010064} +{"current_steps": 123165, "total_steps": 204665, "loss": 0.0001, "lr": 8.202282660592992e-07, "epoch": 3.0089414408912125, "percentage": 60.18, "elapsed_time": "2:40:03", "remaining_time": "1:45:55", "throughput": 8643.73, "total_tokens": 83013520} +{"current_steps": 123170, "total_steps": 204665, "loss": 0.0589, "lr": 8.201443784261522e-07, "epoch": 3.0090635917230597, "percentage": 60.18, "elapsed_time": "2:40:04", "remaining_time": "1:45:54", "throughput": 8643.77, "total_tokens": 83016912} +{"current_steps": 123175, "total_steps": 204665, "loss": 0.0002, "lr": 8.200604921009637e-07, "epoch": 3.009185742554907, "percentage": 60.18, "elapsed_time": "2:40:04", "remaining_time": "1:45:54", "throughput": 8643.82, "total_tokens": 83020368} +{"current_steps": 123180, "total_steps": 204665, "loss": 0.0, "lr": 8.199766070843437e-07, "epoch": 3.009307893386754, "percentage": 60.19, "elapsed_time": "2:40:04", "remaining_time": "1:45:53", "throughput": 8643.85, "total_tokens": 83023760} +{"current_steps": 123185, "total_steps": 204665, "loss": 0.0, "lr": 8.198927233769021e-07, "epoch": 3.0094300442186013, "percentage": 60.19, "elapsed_time": "2:40:05", "remaining_time": "1:45:53", "throughput": 8643.89, "total_tokens": 83027088} +{"current_steps": 123190, "total_steps": 204665, "loss": 0.0001, "lr": 8.198088409792495e-07, "epoch": 3.0095521950504485, "percentage": 60.19, "elapsed_time": "2:40:05", "remaining_time": "1:45:52", "throughput": 8643.97, "total_tokens": 83031056} +{"current_steps": 123195, "total_steps": 204665, "loss": 0.0, "lr": 8.197249598919949e-07, "epoch": 3.0096743458822957, "percentage": 60.19, "elapsed_time": "2:40:06", "remaining_time": "1:45:52", "throughput": 8643.99, "total_tokens": 83034192} +{"current_steps": 123200, "total_steps": 204665, "loss": 0.0001, "lr": 8.196410801157494e-07, "epoch": 3.009796496714143, "percentage": 60.2, "elapsed_time": "2:40:06", "remaining_time": "1:45:52", "throughput": 8644.05, "total_tokens": 83037904} +{"current_steps": 123205, "total_steps": 204665, "loss": 0.0001, "lr": 8.19557201651122e-07, "epoch": 3.0099186475459896, "percentage": 60.2, "elapsed_time": "2:40:06", "remaining_time": "1:45:51", "throughput": 8644.14, "total_tokens": 83042000} +{"current_steps": 123210, "total_steps": 204665, "loss": 0.0, "lr": 8.194733244987235e-07, "epoch": 3.010040798377837, "percentage": 60.2, "elapsed_time": "2:40:07", "remaining_time": "1:45:51", "throughput": 8644.18, "total_tokens": 83045456} +{"current_steps": 123215, "total_steps": 204665, "loss": 0.0, "lr": 8.193894486591633e-07, "epoch": 3.010162949209684, "percentage": 60.2, "elapsed_time": "2:40:07", "remaining_time": "1:45:50", "throughput": 8644.19, "total_tokens": 83048528} +{"current_steps": 123220, "total_steps": 204665, "loss": 0.0, "lr": 8.193055741330517e-07, "epoch": 3.010285100041531, "percentage": 60.21, "elapsed_time": "2:40:07", "remaining_time": "1:45:50", "throughput": 8644.19, "total_tokens": 83051472} +{"current_steps": 123225, "total_steps": 204665, "loss": 0.0637, "lr": 8.192217009209986e-07, "epoch": 3.0104072508733783, "percentage": 60.21, "elapsed_time": "2:40:08", "remaining_time": "1:45:50", "throughput": 8644.21, "total_tokens": 83054672} +{"current_steps": 123230, "total_steps": 204665, "loss": 0.0001, "lr": 8.191378290236139e-07, "epoch": 3.0105294017052255, "percentage": 60.21, "elapsed_time": "2:40:08", "remaining_time": "1:45:49", "throughput": 8644.25, "total_tokens": 83058192} +{"current_steps": 123235, "total_steps": 204665, "loss": 0.0001, "lr": 8.19053958441508e-07, "epoch": 3.0106515525370727, "percentage": 60.21, "elapsed_time": "2:40:08", "remaining_time": "1:45:49", "throughput": 8644.27, "total_tokens": 83061392} +{"current_steps": 123240, "total_steps": 204665, "loss": 0.0532, "lr": 8.189700891752897e-07, "epoch": 3.01077370336892, "percentage": 60.22, "elapsed_time": "2:40:09", "remaining_time": "1:45:48", "throughput": 8644.29, "total_tokens": 83064592} +{"current_steps": 123245, "total_steps": 204665, "loss": 0.0002, "lr": 8.188862212255696e-07, "epoch": 3.010895854200767, "percentage": 60.22, "elapsed_time": "2:40:09", "remaining_time": "1:45:48", "throughput": 8644.3, "total_tokens": 83067728} +{"current_steps": 123250, "total_steps": 204665, "loss": 0.0, "lr": 8.188023545929581e-07, "epoch": 3.0110180050326143, "percentage": 60.22, "elapsed_time": "2:40:09", "remaining_time": "1:45:47", "throughput": 8644.31, "total_tokens": 83070736} +{"current_steps": 123255, "total_steps": 204665, "loss": 0.0001, "lr": 8.187184892780641e-07, "epoch": 3.0111401558644615, "percentage": 60.22, "elapsed_time": "2:40:10", "remaining_time": "1:45:47", "throughput": 8644.37, "total_tokens": 83074384} +{"current_steps": 123260, "total_steps": 204665, "loss": 0.0, "lr": 8.186346252814986e-07, "epoch": 3.0112623066963087, "percentage": 60.23, "elapsed_time": "2:40:10", "remaining_time": "1:45:47", "throughput": 8644.37, "total_tokens": 83077456} +{"current_steps": 123265, "total_steps": 204665, "loss": 0.0, "lr": 8.185507626038703e-07, "epoch": 3.011384457528156, "percentage": 60.23, "elapsed_time": "2:40:10", "remaining_time": "1:45:46", "throughput": 8644.42, "total_tokens": 83080976} +{"current_steps": 123270, "total_steps": 204665, "loss": 0.0, "lr": 8.184669012457902e-07, "epoch": 3.011506608360003, "percentage": 60.23, "elapsed_time": "2:40:11", "remaining_time": "1:45:46", "throughput": 8644.51, "total_tokens": 83084944} +{"current_steps": 123275, "total_steps": 204665, "loss": 0.0002, "lr": 8.183830412078671e-07, "epoch": 3.0116287591918502, "percentage": 60.23, "elapsed_time": "2:40:11", "remaining_time": "1:45:45", "throughput": 8644.52, "total_tokens": 83088144} +{"current_steps": 123280, "total_steps": 204665, "loss": 0.0, "lr": 8.182991824907118e-07, "epoch": 3.0117509100236974, "percentage": 60.24, "elapsed_time": "2:40:11", "remaining_time": "1:45:45", "throughput": 8644.56, "total_tokens": 83091472} +{"current_steps": 123285, "total_steps": 204665, "loss": 0.0, "lr": 8.182153250949336e-07, "epoch": 3.0118730608555446, "percentage": 60.24, "elapsed_time": "2:40:12", "remaining_time": "1:45:45", "throughput": 8644.61, "total_tokens": 83095056} +{"current_steps": 123290, "total_steps": 204665, "loss": 0.0, "lr": 8.181314690211422e-07, "epoch": 3.0119952116873914, "percentage": 60.24, "elapsed_time": "2:40:12", "remaining_time": "1:45:44", "throughput": 8644.64, "total_tokens": 83098384} +{"current_steps": 123295, "total_steps": 204665, "loss": 0.0001, "lr": 8.180476142699482e-07, "epoch": 3.0121173625192386, "percentage": 60.24, "elapsed_time": "2:40:13", "remaining_time": "1:45:44", "throughput": 8644.66, "total_tokens": 83101648} +{"current_steps": 123300, "total_steps": 204665, "loss": 0.0002, "lr": 8.179637608419603e-07, "epoch": 3.0122395133510858, "percentage": 60.24, "elapsed_time": "2:40:13", "remaining_time": "1:45:43", "throughput": 8644.72, "total_tokens": 83105360} +{"current_steps": 123305, "total_steps": 204665, "loss": 0.0017, "lr": 8.178799087377893e-07, "epoch": 3.012361664182933, "percentage": 60.25, "elapsed_time": "2:40:13", "remaining_time": "1:45:43", "throughput": 8644.73, "total_tokens": 83108368} +{"current_steps": 123310, "total_steps": 204665, "loss": 0.0001, "lr": 8.177960579580443e-07, "epoch": 3.01248381501478, "percentage": 60.25, "elapsed_time": "2:40:14", "remaining_time": "1:45:43", "throughput": 8644.73, "total_tokens": 83111440} +{"current_steps": 123315, "total_steps": 204665, "loss": 0.0, "lr": 8.177122085033352e-07, "epoch": 3.0126059658466273, "percentage": 60.25, "elapsed_time": "2:40:14", "remaining_time": "1:45:42", "throughput": 8644.77, "total_tokens": 83114896} +{"current_steps": 123320, "total_steps": 204665, "loss": 0.0179, "lr": 8.176283603742726e-07, "epoch": 3.0127281166784745, "percentage": 60.25, "elapsed_time": "2:40:14", "remaining_time": "1:45:42", "throughput": 8644.78, "total_tokens": 83117968} +{"current_steps": 123325, "total_steps": 204665, "loss": 0.0, "lr": 8.175445135714653e-07, "epoch": 3.0128502675103217, "percentage": 60.26, "elapsed_time": "2:40:15", "remaining_time": "1:45:41", "throughput": 8644.8, "total_tokens": 83121104} +{"current_steps": 123330, "total_steps": 204665, "loss": 0.0, "lr": 8.174606680955232e-07, "epoch": 3.012972418342169, "percentage": 60.26, "elapsed_time": "2:40:15", "remaining_time": "1:45:41", "throughput": 8644.83, "total_tokens": 83124432} +{"current_steps": 123335, "total_steps": 204665, "loss": 0.0534, "lr": 8.173768239470564e-07, "epoch": 3.013094569174016, "percentage": 60.26, "elapsed_time": "2:40:15", "remaining_time": "1:45:40", "throughput": 8644.84, "total_tokens": 83127504} +{"current_steps": 123340, "total_steps": 204665, "loss": 0.0, "lr": 8.172929811266744e-07, "epoch": 3.0132167200058633, "percentage": 60.26, "elapsed_time": "2:40:16", "remaining_time": "1:45:40", "throughput": 8644.91, "total_tokens": 83131280} +{"current_steps": 123345, "total_steps": 204665, "loss": 0.0, "lr": 8.172091396349871e-07, "epoch": 3.0133388708377105, "percentage": 60.27, "elapsed_time": "2:40:16", "remaining_time": "1:45:40", "throughput": 8644.94, "total_tokens": 83134544} +{"current_steps": 123350, "total_steps": 204665, "loss": 0.0, "lr": 8.171252994726039e-07, "epoch": 3.0134610216695576, "percentage": 60.27, "elapsed_time": "2:40:16", "remaining_time": "1:45:39", "throughput": 8644.95, "total_tokens": 83137616} +{"current_steps": 123355, "total_steps": 204665, "loss": 0.0001, "lr": 8.17041460640135e-07, "epoch": 3.013583172501405, "percentage": 60.27, "elapsed_time": "2:40:17", "remaining_time": "1:45:39", "throughput": 8644.98, "total_tokens": 83141008} +{"current_steps": 123360, "total_steps": 204665, "loss": 0.0, "lr": 8.169576231381894e-07, "epoch": 3.013705323333252, "percentage": 60.27, "elapsed_time": "2:40:17", "remaining_time": "1:45:38", "throughput": 8645.06, "total_tokens": 83144912} +{"current_steps": 123365, "total_steps": 204665, "loss": 0.0, "lr": 8.168737869673776e-07, "epoch": 3.013827474165099, "percentage": 60.28, "elapsed_time": "2:40:17", "remaining_time": "1:45:38", "throughput": 8645.12, "total_tokens": 83148560} +{"current_steps": 123370, "total_steps": 204665, "loss": 0.0001, "lr": 8.167899521283086e-07, "epoch": 3.0139496249969464, "percentage": 60.28, "elapsed_time": "2:40:18", "remaining_time": "1:45:38", "throughput": 8645.14, "total_tokens": 83151824} +{"current_steps": 123375, "total_steps": 204665, "loss": 0.0002, "lr": 8.167061186215925e-07, "epoch": 3.0140717758287936, "percentage": 60.28, "elapsed_time": "2:40:18", "remaining_time": "1:45:37", "throughput": 8645.19, "total_tokens": 83155472} +{"current_steps": 123380, "total_steps": 204665, "loss": 0.0, "lr": 8.166222864478387e-07, "epoch": 3.0141939266606403, "percentage": 60.28, "elapsed_time": "2:40:19", "remaining_time": "1:45:37", "throughput": 8645.23, "total_tokens": 83158864} +{"current_steps": 123385, "total_steps": 204665, "loss": 0.0, "lr": 8.16538455607657e-07, "epoch": 3.0143160774924875, "percentage": 60.29, "elapsed_time": "2:40:19", "remaining_time": "1:45:36", "throughput": 8645.25, "total_tokens": 83161936} +{"current_steps": 123390, "total_steps": 204665, "loss": 0.0565, "lr": 8.164546261016572e-07, "epoch": 3.0144382283243347, "percentage": 60.29, "elapsed_time": "2:40:19", "remaining_time": "1:45:36", "throughput": 8645.26, "total_tokens": 83165008} +{"current_steps": 123395, "total_steps": 204665, "loss": 0.0688, "lr": 8.163707979304483e-07, "epoch": 3.014560379156182, "percentage": 60.29, "elapsed_time": "2:40:20", "remaining_time": "1:45:35", "throughput": 8645.35, "total_tokens": 83168976} +{"current_steps": 123400, "total_steps": 204665, "loss": 0.0, "lr": 8.162869710946404e-07, "epoch": 3.014682529988029, "percentage": 60.29, "elapsed_time": "2:40:20", "remaining_time": "1:45:35", "throughput": 8645.38, "total_tokens": 83172368} +{"current_steps": 123405, "total_steps": 204665, "loss": 0.0, "lr": 8.162031455948435e-07, "epoch": 3.0148046808198763, "percentage": 60.3, "elapsed_time": "2:40:20", "remaining_time": "1:45:35", "throughput": 8645.43, "total_tokens": 83175824} +{"current_steps": 123410, "total_steps": 204665, "loss": 0.0, "lr": 8.161193214316662e-07, "epoch": 3.0149268316517235, "percentage": 60.3, "elapsed_time": "2:40:21", "remaining_time": "1:45:34", "throughput": 8645.44, "total_tokens": 83178896} +{"current_steps": 123415, "total_steps": 204665, "loss": 0.0001, "lr": 8.160354986057192e-07, "epoch": 3.0150489824835707, "percentage": 60.3, "elapsed_time": "2:40:21", "remaining_time": "1:45:34", "throughput": 8645.45, "total_tokens": 83181968} +{"current_steps": 123420, "total_steps": 204665, "loss": 0.0, "lr": 8.15951677117611e-07, "epoch": 3.015171133315418, "percentage": 60.3, "elapsed_time": "2:40:21", "remaining_time": "1:45:33", "throughput": 8645.52, "total_tokens": 83185680} +{"current_steps": 123425, "total_steps": 204665, "loss": 0.0, "lr": 8.158678569679523e-07, "epoch": 3.015293284147265, "percentage": 60.31, "elapsed_time": "2:40:22", "remaining_time": "1:45:33", "throughput": 8645.54, "total_tokens": 83188944} +{"current_steps": 123430, "total_steps": 204665, "loss": 0.0, "lr": 8.157840381573515e-07, "epoch": 3.0154154349791122, "percentage": 60.31, "elapsed_time": "2:40:22", "remaining_time": "1:45:33", "throughput": 8645.56, "total_tokens": 83192080} +{"current_steps": 123435, "total_steps": 204665, "loss": 0.0, "lr": 8.15700220686419e-07, "epoch": 3.0155375858109594, "percentage": 60.31, "elapsed_time": "2:40:22", "remaining_time": "1:45:32", "throughput": 8645.57, "total_tokens": 83195152} +{"current_steps": 123440, "total_steps": 204665, "loss": 0.0, "lr": 8.156164045557643e-07, "epoch": 3.0156597366428066, "percentage": 60.31, "elapsed_time": "2:40:23", "remaining_time": "1:45:32", "throughput": 8645.61, "total_tokens": 83198544} +{"current_steps": 123445, "total_steps": 204665, "loss": 0.0, "lr": 8.155325897659963e-07, "epoch": 3.015781887474654, "percentage": 60.32, "elapsed_time": "2:40:23", "remaining_time": "1:45:31", "throughput": 8645.67, "total_tokens": 83202192} +{"current_steps": 123450, "total_steps": 204665, "loss": 0.0003, "lr": 8.154487763177252e-07, "epoch": 3.015904038306501, "percentage": 60.32, "elapsed_time": "2:40:23", "remaining_time": "1:45:31", "throughput": 8645.72, "total_tokens": 83205648} +{"current_steps": 123455, "total_steps": 204665, "loss": 0.0001, "lr": 8.1536496421156e-07, "epoch": 3.016026189138348, "percentage": 60.32, "elapsed_time": "2:40:24", "remaining_time": "1:45:30", "throughput": 8645.78, "total_tokens": 83209296} +{"current_steps": 123460, "total_steps": 204665, "loss": 0.0551, "lr": 8.152811534481108e-07, "epoch": 3.0161483399701954, "percentage": 60.32, "elapsed_time": "2:40:24", "remaining_time": "1:45:30", "throughput": 8645.81, "total_tokens": 83212560} +{"current_steps": 123465, "total_steps": 204665, "loss": 0.0, "lr": 8.151973440279862e-07, "epoch": 3.0162704908020426, "percentage": 60.33, "elapsed_time": "2:40:24", "remaining_time": "1:45:30", "throughput": 8645.85, "total_tokens": 83216016} +{"current_steps": 123470, "total_steps": 204665, "loss": 0.0001, "lr": 8.151135359517963e-07, "epoch": 3.0163926416338893, "percentage": 60.33, "elapsed_time": "2:40:25", "remaining_time": "1:45:29", "throughput": 8645.87, "total_tokens": 83219152} +{"current_steps": 123475, "total_steps": 204665, "loss": 0.0183, "lr": 8.150297292201509e-07, "epoch": 3.0165147924657365, "percentage": 60.33, "elapsed_time": "2:40:25", "remaining_time": "1:45:29", "throughput": 8645.91, "total_tokens": 83222608} +{"current_steps": 123480, "total_steps": 204665, "loss": 0.0, "lr": 8.149459238336589e-07, "epoch": 3.0166369432975837, "percentage": 60.33, "elapsed_time": "2:40:26", "remaining_time": "1:45:28", "throughput": 8645.93, "total_tokens": 83226000} +{"current_steps": 123485, "total_steps": 204665, "loss": 0.0002, "lr": 8.148621197929298e-07, "epoch": 3.016759094129431, "percentage": 60.34, "elapsed_time": "2:40:26", "remaining_time": "1:45:28", "throughput": 8645.93, "total_tokens": 83228880} +{"current_steps": 123490, "total_steps": 204665, "loss": 0.0, "lr": 8.147783170985734e-07, "epoch": 3.016881244961278, "percentage": 60.34, "elapsed_time": "2:40:26", "remaining_time": "1:45:28", "throughput": 8645.97, "total_tokens": 83232272} +{"current_steps": 123495, "total_steps": 204665, "loss": 0.0, "lr": 8.146945157511984e-07, "epoch": 3.0170033957931253, "percentage": 60.34, "elapsed_time": "2:40:27", "remaining_time": "1:45:27", "throughput": 8645.99, "total_tokens": 83235536} +{"current_steps": 123500, "total_steps": 204665, "loss": 0.0, "lr": 8.146107157514152e-07, "epoch": 3.0171255466249725, "percentage": 60.34, "elapsed_time": "2:40:27", "remaining_time": "1:45:27", "throughput": 8646.04, "total_tokens": 83239056} +{"current_steps": 123505, "total_steps": 204665, "loss": 0.0001, "lr": 8.145269170998326e-07, "epoch": 3.0172476974568196, "percentage": 60.34, "elapsed_time": "2:40:27", "remaining_time": "1:45:26", "throughput": 8646.07, "total_tokens": 83242320} +{"current_steps": 123510, "total_steps": 204665, "loss": 0.0, "lr": 8.144431197970602e-07, "epoch": 3.017369848288667, "percentage": 60.35, "elapsed_time": "2:40:28", "remaining_time": "1:45:26", "throughput": 8646.1, "total_tokens": 83245648} +{"current_steps": 123515, "total_steps": 204665, "loss": 0.0, "lr": 8.143593238437072e-07, "epoch": 3.017491999120514, "percentage": 60.35, "elapsed_time": "2:40:28", "remaining_time": "1:45:25", "throughput": 8646.2, "total_tokens": 83249744} +{"current_steps": 123520, "total_steps": 204665, "loss": 0.0, "lr": 8.142755292403833e-07, "epoch": 3.017614149952361, "percentage": 60.35, "elapsed_time": "2:40:28", "remaining_time": "1:45:25", "throughput": 8646.24, "total_tokens": 83253072} +{"current_steps": 123525, "total_steps": 204665, "loss": 0.0, "lr": 8.141917359876975e-07, "epoch": 3.0177363007842084, "percentage": 60.35, "elapsed_time": "2:40:29", "remaining_time": "1:45:25", "throughput": 8646.31, "total_tokens": 83256848} +{"current_steps": 123530, "total_steps": 204665, "loss": 0.0, "lr": 8.141079440862595e-07, "epoch": 3.0178584516160556, "percentage": 60.36, "elapsed_time": "2:40:29", "remaining_time": "1:45:24", "throughput": 8646.37, "total_tokens": 83260496} +{"current_steps": 123535, "total_steps": 204665, "loss": 0.0, "lr": 8.140241535366785e-07, "epoch": 3.017980602447903, "percentage": 60.36, "elapsed_time": "2:40:29", "remaining_time": "1:45:24", "throughput": 8646.4, "total_tokens": 83263824} +{"current_steps": 123540, "total_steps": 204665, "loss": 0.0004, "lr": 8.139403643395639e-07, "epoch": 3.01810275327975, "percentage": 60.36, "elapsed_time": "2:40:30", "remaining_time": "1:45:23", "throughput": 8646.43, "total_tokens": 83267152} +{"current_steps": 123545, "total_steps": 204665, "loss": 0.0, "lr": 8.138565764955252e-07, "epoch": 3.018224904111597, "percentage": 60.36, "elapsed_time": "2:40:30", "remaining_time": "1:45:23", "throughput": 8646.47, "total_tokens": 83270544} +{"current_steps": 123550, "total_steps": 204665, "loss": 0.0, "lr": 8.137727900051712e-07, "epoch": 3.0183470549434444, "percentage": 60.37, "elapsed_time": "2:40:30", "remaining_time": "1:45:23", "throughput": 8646.47, "total_tokens": 83273424} +{"current_steps": 123555, "total_steps": 204665, "loss": 0.0, "lr": 8.136890048691116e-07, "epoch": 3.0184692057752915, "percentage": 60.37, "elapsed_time": "2:40:31", "remaining_time": "1:45:22", "throughput": 8646.52, "total_tokens": 83276944} +{"current_steps": 123560, "total_steps": 204665, "loss": 0.0001, "lr": 8.136052210879559e-07, "epoch": 3.0185913566071383, "percentage": 60.37, "elapsed_time": "2:40:31", "remaining_time": "1:45:22", "throughput": 8646.54, "total_tokens": 83280144} +{"current_steps": 123565, "total_steps": 204665, "loss": 0.0, "lr": 8.135214386623128e-07, "epoch": 3.0187135074389855, "percentage": 60.37, "elapsed_time": "2:40:31", "remaining_time": "1:45:21", "throughput": 8646.6, "total_tokens": 83283728} +{"current_steps": 123570, "total_steps": 204665, "loss": 0.0667, "lr": 8.134376575927924e-07, "epoch": 3.0188356582708327, "percentage": 60.38, "elapsed_time": "2:40:32", "remaining_time": "1:45:21", "throughput": 8646.62, "total_tokens": 83286928} +{"current_steps": 123575, "total_steps": 204665, "loss": 0.0, "lr": 8.133538778800032e-07, "epoch": 3.01895780910268, "percentage": 60.38, "elapsed_time": "2:40:32", "remaining_time": "1:45:20", "throughput": 8646.68, "total_tokens": 83290576} +{"current_steps": 123580, "total_steps": 204665, "loss": 0.0, "lr": 8.132700995245552e-07, "epoch": 3.019079959934527, "percentage": 60.38, "elapsed_time": "2:40:33", "remaining_time": "1:45:20", "throughput": 8646.7, "total_tokens": 83293776} +{"current_steps": 123585, "total_steps": 204665, "loss": 0.0, "lr": 8.131863225270568e-07, "epoch": 3.0192021107663742, "percentage": 60.38, "elapsed_time": "2:40:33", "remaining_time": "1:45:20", "throughput": 8646.73, "total_tokens": 83297040} +{"current_steps": 123590, "total_steps": 204665, "loss": 0.0001, "lr": 8.131025468881179e-07, "epoch": 3.0193242615982214, "percentage": 60.39, "elapsed_time": "2:40:33", "remaining_time": "1:45:19", "throughput": 8646.8, "total_tokens": 83300816} +{"current_steps": 123595, "total_steps": 204665, "loss": 0.0533, "lr": 8.130187726083477e-07, "epoch": 3.0194464124300686, "percentage": 60.39, "elapsed_time": "2:40:34", "remaining_time": "1:45:19", "throughput": 8646.85, "total_tokens": 83304336} +{"current_steps": 123600, "total_steps": 204665, "loss": 0.0, "lr": 8.12934999688355e-07, "epoch": 3.019568563261916, "percentage": 60.39, "elapsed_time": "2:40:34", "remaining_time": "1:45:18", "throughput": 8646.88, "total_tokens": 83307600} +{"current_steps": 123605, "total_steps": 204665, "loss": 0.0414, "lr": 8.128512281287496e-07, "epoch": 3.019690714093763, "percentage": 60.39, "elapsed_time": "2:40:34", "remaining_time": "1:45:18", "throughput": 8646.94, "total_tokens": 83311184} +{"current_steps": 123610, "total_steps": 204665, "loss": 0.0006, "lr": 8.1276745793014e-07, "epoch": 3.01981286492561, "percentage": 60.4, "elapsed_time": "2:40:35", "remaining_time": "1:45:18", "throughput": 8646.96, "total_tokens": 83314384} +{"current_steps": 123615, "total_steps": 204665, "loss": 0.0001, "lr": 8.126836890931363e-07, "epoch": 3.0199350157574574, "percentage": 60.4, "elapsed_time": "2:40:35", "remaining_time": "1:45:17", "throughput": 8646.96, "total_tokens": 83317328} +{"current_steps": 123620, "total_steps": 204665, "loss": 0.0, "lr": 8.125999216183466e-07, "epoch": 3.0200571665893046, "percentage": 60.4, "elapsed_time": "2:40:35", "remaining_time": "1:45:17", "throughput": 8647.03, "total_tokens": 83321040} +{"current_steps": 123625, "total_steps": 204665, "loss": 0.0, "lr": 8.125161555063809e-07, "epoch": 3.0201793174211518, "percentage": 60.4, "elapsed_time": "2:40:36", "remaining_time": "1:45:16", "throughput": 8647.06, "total_tokens": 83324368} +{"current_steps": 123630, "total_steps": 204665, "loss": 0.0203, "lr": 8.124323907578485e-07, "epoch": 3.020301468252999, "percentage": 60.41, "elapsed_time": "2:40:36", "remaining_time": "1:45:16", "throughput": 8647.12, "total_tokens": 83328016} +{"current_steps": 123635, "total_steps": 204665, "loss": 0.0, "lr": 8.12348627373358e-07, "epoch": 3.020423619084846, "percentage": 60.41, "elapsed_time": "2:40:36", "remaining_time": "1:45:15", "throughput": 8647.19, "total_tokens": 83331792} +{"current_steps": 123640, "total_steps": 204665, "loss": 0.0, "lr": 8.122648653535187e-07, "epoch": 3.0205457699166933, "percentage": 60.41, "elapsed_time": "2:40:37", "remaining_time": "1:45:15", "throughput": 8647.24, "total_tokens": 83335248} +{"current_steps": 123645, "total_steps": 204665, "loss": 0.0739, "lr": 8.121811046989397e-07, "epoch": 3.0206679207485405, "percentage": 60.41, "elapsed_time": "2:40:37", "remaining_time": "1:45:15", "throughput": 8647.26, "total_tokens": 83338448} +{"current_steps": 123650, "total_steps": 204665, "loss": 0.0001, "lr": 8.120973454102303e-07, "epoch": 3.0207900715803873, "percentage": 60.42, "elapsed_time": "2:40:37", "remaining_time": "1:45:14", "throughput": 8647.29, "total_tokens": 83341712} +{"current_steps": 123655, "total_steps": 204665, "loss": 0.0501, "lr": 8.120135874879998e-07, "epoch": 3.0209122224122344, "percentage": 60.42, "elapsed_time": "2:40:38", "remaining_time": "1:45:14", "throughput": 8647.34, "total_tokens": 83345296} +{"current_steps": 123660, "total_steps": 204665, "loss": 0.0, "lr": 8.119298309328565e-07, "epoch": 3.0210343732440816, "percentage": 60.42, "elapsed_time": "2:40:38", "remaining_time": "1:45:13", "throughput": 8647.37, "total_tokens": 83348496} +{"current_steps": 123665, "total_steps": 204665, "loss": 0.0, "lr": 8.118460757454107e-07, "epoch": 3.021156524075929, "percentage": 60.42, "elapsed_time": "2:40:38", "remaining_time": "1:45:13", "throughput": 8647.4, "total_tokens": 83351824} +{"current_steps": 123670, "total_steps": 204665, "loss": 0.0001, "lr": 8.117623219262702e-07, "epoch": 3.021278674907776, "percentage": 60.43, "elapsed_time": "2:40:39", "remaining_time": "1:45:13", "throughput": 8647.44, "total_tokens": 83355216} +{"current_steps": 123675, "total_steps": 204665, "loss": 0.0, "lr": 8.116785694760453e-07, "epoch": 3.021400825739623, "percentage": 60.43, "elapsed_time": "2:40:39", "remaining_time": "1:45:12", "throughput": 8647.5, "total_tokens": 83358928} +{"current_steps": 123680, "total_steps": 204665, "loss": 0.0, "lr": 8.115948183953441e-07, "epoch": 3.0215229765714704, "percentage": 60.43, "elapsed_time": "2:40:40", "remaining_time": "1:45:12", "throughput": 8647.3, "total_tokens": 83362128} +{"current_steps": 123685, "total_steps": 204665, "loss": 0.0907, "lr": 8.115110686847762e-07, "epoch": 3.0216451274033176, "percentage": 60.43, "elapsed_time": "2:40:40", "remaining_time": "1:45:11", "throughput": 8647.32, "total_tokens": 83365328} +{"current_steps": 123690, "total_steps": 204665, "loss": 0.0, "lr": 8.114273203449504e-07, "epoch": 3.0217672782351648, "percentage": 60.44, "elapsed_time": "2:40:40", "remaining_time": "1:45:11", "throughput": 8647.35, "total_tokens": 83368592} +{"current_steps": 123695, "total_steps": 204665, "loss": 0.0377, "lr": 8.11343573376476e-07, "epoch": 3.021889429067012, "percentage": 60.44, "elapsed_time": "2:40:41", "remaining_time": "1:45:11", "throughput": 8647.38, "total_tokens": 83371920} +{"current_steps": 123700, "total_steps": 204665, "loss": 0.0, "lr": 8.112598277799621e-07, "epoch": 3.022011579898859, "percentage": 60.44, "elapsed_time": "2:40:41", "remaining_time": "1:45:10", "throughput": 8647.42, "total_tokens": 83375312} +{"current_steps": 123705, "total_steps": 204665, "loss": 0.0, "lr": 8.111760835560171e-07, "epoch": 3.0221337307307063, "percentage": 60.44, "elapsed_time": "2:40:42", "remaining_time": "1:45:10", "throughput": 8647.47, "total_tokens": 83378896} +{"current_steps": 123710, "total_steps": 204665, "loss": 0.0001, "lr": 8.110923407052507e-07, "epoch": 3.0222558815625535, "percentage": 60.45, "elapsed_time": "2:40:42", "remaining_time": "1:45:09", "throughput": 8647.48, "total_tokens": 83381904} +{"current_steps": 123715, "total_steps": 204665, "loss": 0.0001, "lr": 8.110085992282713e-07, "epoch": 3.0223780323944007, "percentage": 60.45, "elapsed_time": "2:40:42", "remaining_time": "1:45:09", "throughput": 8647.52, "total_tokens": 83385360} +{"current_steps": 123720, "total_steps": 204665, "loss": 0.0002, "lr": 8.10924859125688e-07, "epoch": 3.022500183226248, "percentage": 60.45, "elapsed_time": "2:40:43", "remaining_time": "1:45:09", "throughput": 8647.6, "total_tokens": 83389264} +{"current_steps": 123725, "total_steps": 204665, "loss": 0.0444, "lr": 8.108411203981106e-07, "epoch": 3.022622334058095, "percentage": 60.45, "elapsed_time": "2:40:43", "remaining_time": "1:45:08", "throughput": 8647.65, "total_tokens": 83392784} +{"current_steps": 123730, "total_steps": 204665, "loss": 0.0, "lr": 8.107573830461469e-07, "epoch": 3.0227444848899423, "percentage": 60.45, "elapsed_time": "2:40:43", "remaining_time": "1:45:08", "throughput": 8647.7, "total_tokens": 83396240} +{"current_steps": 123735, "total_steps": 204665, "loss": 0.0, "lr": 8.10673647070407e-07, "epoch": 3.0228666357217895, "percentage": 60.46, "elapsed_time": "2:40:44", "remaining_time": "1:45:07", "throughput": 8647.74, "total_tokens": 83399696} +{"current_steps": 123740, "total_steps": 204665, "loss": 0.0022, "lr": 8.105899124714987e-07, "epoch": 3.0229887865536362, "percentage": 60.46, "elapsed_time": "2:40:44", "remaining_time": "1:45:07", "throughput": 8647.81, "total_tokens": 83403408} +{"current_steps": 123745, "total_steps": 204665, "loss": 0.0, "lr": 8.105061792500317e-07, "epoch": 3.0231109373854834, "percentage": 60.46, "elapsed_time": "2:40:44", "remaining_time": "1:45:06", "throughput": 8647.83, "total_tokens": 83406608} +{"current_steps": 123750, "total_steps": 204665, "loss": 0.0001, "lr": 8.10422447406615e-07, "epoch": 3.0232330882173306, "percentage": 60.46, "elapsed_time": "2:40:45", "remaining_time": "1:45:06", "throughput": 8647.88, "total_tokens": 83410128} +{"current_steps": 123755, "total_steps": 204665, "loss": 0.0, "lr": 8.10338716941857e-07, "epoch": 3.023355239049178, "percentage": 60.47, "elapsed_time": "2:40:45", "remaining_time": "1:45:06", "throughput": 8647.95, "total_tokens": 83413840} +{"current_steps": 123760, "total_steps": 204665, "loss": 0.0, "lr": 8.10254987856367e-07, "epoch": 3.023477389881025, "percentage": 60.47, "elapsed_time": "2:40:45", "remaining_time": "1:45:05", "throughput": 8647.95, "total_tokens": 83416848} +{"current_steps": 123765, "total_steps": 204665, "loss": 0.0688, "lr": 8.101712601507535e-07, "epoch": 3.023599540712872, "percentage": 60.47, "elapsed_time": "2:40:46", "remaining_time": "1:45:05", "throughput": 8648.0, "total_tokens": 83420368} +{"current_steps": 123770, "total_steps": 204665, "loss": 0.0312, "lr": 8.10087533825626e-07, "epoch": 3.0237216915447194, "percentage": 60.47, "elapsed_time": "2:40:46", "remaining_time": "1:45:04", "throughput": 8648.03, "total_tokens": 83423632} +{"current_steps": 123775, "total_steps": 204665, "loss": 0.0001, "lr": 8.100038088815925e-07, "epoch": 3.0238438423765666, "percentage": 60.48, "elapsed_time": "2:40:46", "remaining_time": "1:45:04", "throughput": 8648.06, "total_tokens": 83426896} +{"current_steps": 123780, "total_steps": 204665, "loss": 0.0001, "lr": 8.099200853192627e-07, "epoch": 3.0239659932084137, "percentage": 60.48, "elapsed_time": "2:40:47", "remaining_time": "1:45:04", "throughput": 8648.1, "total_tokens": 83430352} +{"current_steps": 123785, "total_steps": 204665, "loss": 0.0, "lr": 8.098363631392454e-07, "epoch": 3.024088144040261, "percentage": 60.48, "elapsed_time": "2:40:47", "remaining_time": "1:45:03", "throughput": 8648.14, "total_tokens": 83433744} +{"current_steps": 123790, "total_steps": 204665, "loss": 0.0, "lr": 8.09752642342149e-07, "epoch": 3.024210294872108, "percentage": 60.48, "elapsed_time": "2:40:47", "remaining_time": "1:45:03", "throughput": 8648.2, "total_tokens": 83437328} +{"current_steps": 123795, "total_steps": 204665, "loss": 0.0001, "lr": 8.096689229285827e-07, "epoch": 3.0243324457039553, "percentage": 60.49, "elapsed_time": "2:40:48", "remaining_time": "1:45:02", "throughput": 8648.24, "total_tokens": 83440656} +{"current_steps": 123800, "total_steps": 204665, "loss": 0.0002, "lr": 8.095852048991551e-07, "epoch": 3.0244545965358025, "percentage": 60.49, "elapsed_time": "2:40:48", "remaining_time": "1:45:02", "throughput": 8648.26, "total_tokens": 83443856} +{"current_steps": 123805, "total_steps": 204665, "loss": 0.0, "lr": 8.095014882544749e-07, "epoch": 3.0245767473676497, "percentage": 60.49, "elapsed_time": "2:40:48", "remaining_time": "1:45:01", "throughput": 8648.27, "total_tokens": 83446864} +{"current_steps": 123810, "total_steps": 204665, "loss": 0.0, "lr": 8.094177729951515e-07, "epoch": 3.024698898199497, "percentage": 60.49, "elapsed_time": "2:40:49", "remaining_time": "1:45:01", "throughput": 8648.28, "total_tokens": 83450000} +{"current_steps": 123815, "total_steps": 204665, "loss": 0.0, "lr": 8.093340591217928e-07, "epoch": 3.024821049031344, "percentage": 60.5, "elapsed_time": "2:40:49", "remaining_time": "1:45:01", "throughput": 8648.35, "total_tokens": 83453712} +{"current_steps": 123820, "total_steps": 204665, "loss": 0.0001, "lr": 8.092503466350086e-07, "epoch": 3.0249431998631913, "percentage": 60.5, "elapsed_time": "2:40:50", "remaining_time": "1:45:00", "throughput": 8648.38, "total_tokens": 83457040} +{"current_steps": 123825, "total_steps": 204665, "loss": 0.0, "lr": 8.091666355354069e-07, "epoch": 3.025065350695038, "percentage": 60.5, "elapsed_time": "2:40:50", "remaining_time": "1:45:00", "throughput": 8648.42, "total_tokens": 83460496} +{"current_steps": 123830, "total_steps": 204665, "loss": 0.0, "lr": 8.09082925823597e-07, "epoch": 3.025187501526885, "percentage": 60.5, "elapsed_time": "2:40:50", "remaining_time": "1:44:59", "throughput": 8648.42, "total_tokens": 83463376} +{"current_steps": 123835, "total_steps": 204665, "loss": 0.0659, "lr": 8.089992175001871e-07, "epoch": 3.0253096523587324, "percentage": 60.51, "elapsed_time": "2:40:51", "remaining_time": "1:44:59", "throughput": 8648.44, "total_tokens": 83466448} +{"current_steps": 123840, "total_steps": 204665, "loss": 0.0, "lr": 8.089155105657864e-07, "epoch": 3.0254318031905796, "percentage": 60.51, "elapsed_time": "2:40:51", "remaining_time": "1:44:59", "throughput": 8648.46, "total_tokens": 83469712} +{"current_steps": 123845, "total_steps": 204665, "loss": 0.0427, "lr": 8.088318050210036e-07, "epoch": 3.0255539540224268, "percentage": 60.51, "elapsed_time": "2:40:51", "remaining_time": "1:44:58", "throughput": 8648.48, "total_tokens": 83472784} +{"current_steps": 123850, "total_steps": 204665, "loss": 0.0, "lr": 8.087481008664471e-07, "epoch": 3.025676104854274, "percentage": 60.51, "elapsed_time": "2:40:52", "remaining_time": "1:44:58", "throughput": 8648.53, "total_tokens": 83476304} +{"current_steps": 123855, "total_steps": 204665, "loss": 0.0, "lr": 8.086643981027264e-07, "epoch": 3.025798255686121, "percentage": 60.52, "elapsed_time": "2:40:52", "remaining_time": "1:44:57", "throughput": 8648.58, "total_tokens": 83479824} +{"current_steps": 123860, "total_steps": 204665, "loss": 0.0, "lr": 8.085806967304491e-07, "epoch": 3.0259204065179683, "percentage": 60.52, "elapsed_time": "2:40:52", "remaining_time": "1:44:57", "throughput": 8648.67, "total_tokens": 83483792} +{"current_steps": 123865, "total_steps": 204665, "loss": 0.0, "lr": 8.084969967502248e-07, "epoch": 3.0260425573498155, "percentage": 60.52, "elapsed_time": "2:40:53", "remaining_time": "1:44:56", "throughput": 8648.71, "total_tokens": 83487184} +{"current_steps": 123870, "total_steps": 204665, "loss": 0.0, "lr": 8.084132981626615e-07, "epoch": 3.0261647081816627, "percentage": 60.52, "elapsed_time": "2:40:53", "remaining_time": "1:44:56", "throughput": 8648.73, "total_tokens": 83490448} +{"current_steps": 123875, "total_steps": 204665, "loss": 0.0, "lr": 8.083296009683683e-07, "epoch": 3.02628685901351, "percentage": 60.53, "elapsed_time": "2:40:53", "remaining_time": "1:44:56", "throughput": 8648.77, "total_tokens": 83493840} +{"current_steps": 123880, "total_steps": 204665, "loss": 0.0, "lr": 8.08245905167954e-07, "epoch": 3.026409009845357, "percentage": 60.53, "elapsed_time": "2:40:54", "remaining_time": "1:44:55", "throughput": 8648.83, "total_tokens": 83497424} +{"current_steps": 123885, "total_steps": 204665, "loss": 0.0001, "lr": 8.081622107620267e-07, "epoch": 3.0265311606772043, "percentage": 60.53, "elapsed_time": "2:40:54", "remaining_time": "1:44:55", "throughput": 8648.86, "total_tokens": 83500688} +{"current_steps": 123890, "total_steps": 204665, "loss": 0.0, "lr": 8.08078517751196e-07, "epoch": 3.0266533115090515, "percentage": 60.53, "elapsed_time": "2:40:54", "remaining_time": "1:44:54", "throughput": 8648.86, "total_tokens": 83503696} +{"current_steps": 123895, "total_steps": 204665, "loss": 0.0002, "lr": 8.079948261360693e-07, "epoch": 3.0267754623408987, "percentage": 60.54, "elapsed_time": "2:40:55", "remaining_time": "1:44:54", "throughput": 8648.9, "total_tokens": 83507088} +{"current_steps": 123900, "total_steps": 204665, "loss": 0.0, "lr": 8.079111359172561e-07, "epoch": 3.026897613172746, "percentage": 60.54, "elapsed_time": "2:40:55", "remaining_time": "1:44:54", "throughput": 8648.92, "total_tokens": 83510224} +{"current_steps": 123905, "total_steps": 204665, "loss": 0.0, "lr": 8.078274470953652e-07, "epoch": 3.027019764004593, "percentage": 60.54, "elapsed_time": "2:40:55", "remaining_time": "1:44:53", "throughput": 8648.93, "total_tokens": 83513232} +{"current_steps": 123910, "total_steps": 204665, "loss": 0.0, "lr": 8.077437596710042e-07, "epoch": 3.0271419148364402, "percentage": 60.54, "elapsed_time": "2:40:56", "remaining_time": "1:44:53", "throughput": 8648.94, "total_tokens": 83516240} +{"current_steps": 123915, "total_steps": 204665, "loss": 0.068, "lr": 8.076600736447827e-07, "epoch": 3.027264065668287, "percentage": 60.55, "elapsed_time": "2:40:56", "remaining_time": "1:44:52", "throughput": 8648.99, "total_tokens": 83519824} +{"current_steps": 123920, "total_steps": 204665, "loss": 0.0, "lr": 8.075763890173086e-07, "epoch": 3.027386216500134, "percentage": 60.55, "elapsed_time": "2:40:56", "remaining_time": "1:44:52", "throughput": 8649.05, "total_tokens": 83523408} +{"current_steps": 123925, "total_steps": 204665, "loss": 0.0, "lr": 8.074927057891911e-07, "epoch": 3.0275083673319814, "percentage": 60.55, "elapsed_time": "2:40:57", "remaining_time": "1:44:51", "throughput": 8649.05, "total_tokens": 83526416} +{"current_steps": 123930, "total_steps": 204665, "loss": 0.074, "lr": 8.07409023961038e-07, "epoch": 3.0276305181638286, "percentage": 60.55, "elapsed_time": "2:40:57", "remaining_time": "1:44:51", "throughput": 8649.07, "total_tokens": 83529552} +{"current_steps": 123935, "total_steps": 204665, "loss": 0.0, "lr": 8.073253435334582e-07, "epoch": 3.0277526689956757, "percentage": 60.56, "elapsed_time": "2:40:57", "remaining_time": "1:44:51", "throughput": 8649.1, "total_tokens": 83532880} +{"current_steps": 123940, "total_steps": 204665, "loss": 0.0, "lr": 8.072416645070607e-07, "epoch": 3.027874819827523, "percentage": 60.56, "elapsed_time": "2:40:58", "remaining_time": "1:44:50", "throughput": 8649.15, "total_tokens": 83536400} +{"current_steps": 123945, "total_steps": 204665, "loss": 0.0, "lr": 8.071579868824536e-07, "epoch": 3.02799697065937, "percentage": 60.56, "elapsed_time": "2:40:58", "remaining_time": "1:44:50", "throughput": 8649.19, "total_tokens": 83539728} +{"current_steps": 123950, "total_steps": 204665, "loss": 0.0, "lr": 8.070743106602455e-07, "epoch": 3.0281191214912173, "percentage": 60.56, "elapsed_time": "2:40:59", "remaining_time": "1:44:49", "throughput": 8649.21, "total_tokens": 83542992} +{"current_steps": 123955, "total_steps": 204665, "loss": 0.0, "lr": 8.069906358410448e-07, "epoch": 3.0282412723230645, "percentage": 60.56, "elapsed_time": "2:40:59", "remaining_time": "1:44:49", "throughput": 8649.25, "total_tokens": 83546384} +{"current_steps": 123960, "total_steps": 204665, "loss": 0.0, "lr": 8.0690696242546e-07, "epoch": 3.0283634231549117, "percentage": 60.57, "elapsed_time": "2:40:59", "remaining_time": "1:44:49", "throughput": 8649.27, "total_tokens": 83549520} +{"current_steps": 123965, "total_steps": 204665, "loss": 0.0, "lr": 8.068232904141002e-07, "epoch": 3.028485573986759, "percentage": 60.57, "elapsed_time": "2:41:00", "remaining_time": "1:44:48", "throughput": 8649.36, "total_tokens": 83553552} +{"current_steps": 123970, "total_steps": 204665, "loss": 0.0001, "lr": 8.067396198075727e-07, "epoch": 3.028607724818606, "percentage": 60.57, "elapsed_time": "2:41:00", "remaining_time": "1:44:48", "throughput": 8649.39, "total_tokens": 83556816} +{"current_steps": 123975, "total_steps": 204665, "loss": 0.0001, "lr": 8.066559506064873e-07, "epoch": 3.0287298756504533, "percentage": 60.57, "elapsed_time": "2:41:00", "remaining_time": "1:44:47", "throughput": 8649.4, "total_tokens": 83559824} +{"current_steps": 123980, "total_steps": 204665, "loss": 0.0, "lr": 8.065722828114513e-07, "epoch": 3.0288520264823005, "percentage": 60.58, "elapsed_time": "2:41:01", "remaining_time": "1:44:47", "throughput": 8649.43, "total_tokens": 83563088} +{"current_steps": 123985, "total_steps": 204665, "loss": 0.0002, "lr": 8.064886164230742e-07, "epoch": 3.0289741773141476, "percentage": 60.58, "elapsed_time": "2:41:01", "remaining_time": "1:44:46", "throughput": 8649.49, "total_tokens": 83566736} +{"current_steps": 123990, "total_steps": 204665, "loss": 0.0, "lr": 8.064049514419635e-07, "epoch": 3.029096328145995, "percentage": 60.58, "elapsed_time": "2:41:01", "remaining_time": "1:44:46", "throughput": 8649.57, "total_tokens": 83570704} +{"current_steps": 123995, "total_steps": 204665, "loss": 0.0, "lr": 8.063212878687282e-07, "epoch": 3.029218478977842, "percentage": 60.58, "elapsed_time": "2:41:02", "remaining_time": "1:44:46", "throughput": 8649.61, "total_tokens": 83574096} +{"current_steps": 124000, "total_steps": 204665, "loss": 0.0, "lr": 8.062376257039766e-07, "epoch": 3.029340629809689, "percentage": 60.59, "elapsed_time": "2:41:02", "remaining_time": "1:44:45", "throughput": 8649.63, "total_tokens": 83577296} +{"current_steps": 124005, "total_steps": 204665, "loss": 0.0003, "lr": 8.061539649483171e-07, "epoch": 3.029462780641536, "percentage": 60.59, "elapsed_time": "2:41:02", "remaining_time": "1:44:45", "throughput": 8649.66, "total_tokens": 83580560} +{"current_steps": 124010, "total_steps": 204665, "loss": 0.0, "lr": 8.060703056023583e-07, "epoch": 3.029584931473383, "percentage": 60.59, "elapsed_time": "2:41:03", "remaining_time": "1:44:44", "throughput": 8649.68, "total_tokens": 83583696} +{"current_steps": 124015, "total_steps": 204665, "loss": 0.0359, "lr": 8.059866476667081e-07, "epoch": 3.0297070823052303, "percentage": 60.59, "elapsed_time": "2:41:03", "remaining_time": "1:44:44", "throughput": 8649.69, "total_tokens": 83586704} +{"current_steps": 124020, "total_steps": 204665, "loss": 0.0002, "lr": 8.059029911419755e-07, "epoch": 3.0298292331370775, "percentage": 60.6, "elapsed_time": "2:41:03", "remaining_time": "1:44:44", "throughput": 8649.73, "total_tokens": 83590160} +{"current_steps": 124025, "total_steps": 204665, "loss": 0.0, "lr": 8.058193360287681e-07, "epoch": 3.0299513839689247, "percentage": 60.6, "elapsed_time": "2:41:04", "remaining_time": "1:44:43", "throughput": 8649.75, "total_tokens": 83593296} +{"current_steps": 124030, "total_steps": 204665, "loss": 0.0, "lr": 8.057356823276947e-07, "epoch": 3.030073534800772, "percentage": 60.6, "elapsed_time": "2:41:04", "remaining_time": "1:44:43", "throughput": 8649.81, "total_tokens": 83596944} +{"current_steps": 124035, "total_steps": 204665, "loss": 0.0513, "lr": 8.056520300393642e-07, "epoch": 3.030195685632619, "percentage": 60.6, "elapsed_time": "2:41:04", "remaining_time": "1:44:42", "throughput": 8649.83, "total_tokens": 83600144} +{"current_steps": 124040, "total_steps": 204665, "loss": 0.0348, "lr": 8.055683791643839e-07, "epoch": 3.0303178364644663, "percentage": 60.61, "elapsed_time": "2:41:05", "remaining_time": "1:44:42", "throughput": 8649.84, "total_tokens": 83603152} +{"current_steps": 124045, "total_steps": 204665, "loss": 0.0882, "lr": 8.054847297033633e-07, "epoch": 3.0304399872963135, "percentage": 60.61, "elapsed_time": "2:41:05", "remaining_time": "1:44:41", "throughput": 8649.85, "total_tokens": 83606288} +{"current_steps": 124050, "total_steps": 204665, "loss": 0.0, "lr": 8.054010816569094e-07, "epoch": 3.0305621381281607, "percentage": 60.61, "elapsed_time": "2:41:05", "remaining_time": "1:44:41", "throughput": 8649.91, "total_tokens": 83609872} +{"current_steps": 124055, "total_steps": 204665, "loss": 0.0001, "lr": 8.053174350256313e-07, "epoch": 3.030684288960008, "percentage": 60.61, "elapsed_time": "2:41:06", "remaining_time": "1:44:41", "throughput": 8649.96, "total_tokens": 83613520} +{"current_steps": 124060, "total_steps": 204665, "loss": 0.0, "lr": 8.052337898101376e-07, "epoch": 3.030806439791855, "percentage": 60.62, "elapsed_time": "2:41:06", "remaining_time": "1:44:40", "throughput": 8649.98, "total_tokens": 83616720} +{"current_steps": 124065, "total_steps": 204665, "loss": 0.0001, "lr": 8.051501460110357e-07, "epoch": 3.0309285906237022, "percentage": 60.62, "elapsed_time": "2:41:07", "remaining_time": "1:44:40", "throughput": 8650.01, "total_tokens": 83619984} +{"current_steps": 124070, "total_steps": 204665, "loss": 0.0, "lr": 8.050665036289347e-07, "epoch": 3.0310507414555494, "percentage": 60.62, "elapsed_time": "2:41:07", "remaining_time": "1:44:39", "throughput": 8650.02, "total_tokens": 83622992} +{"current_steps": 124075, "total_steps": 204665, "loss": 0.0, "lr": 8.049828626644422e-07, "epoch": 3.0311728922873966, "percentage": 60.62, "elapsed_time": "2:41:07", "remaining_time": "1:44:39", "throughput": 8650.03, "total_tokens": 83626128} +{"current_steps": 124080, "total_steps": 204665, "loss": 0.0098, "lr": 8.048992231181671e-07, "epoch": 3.031295043119244, "percentage": 60.63, "elapsed_time": "2:41:08", "remaining_time": "1:44:39", "throughput": 8650.14, "total_tokens": 83630352} +{"current_steps": 124085, "total_steps": 204665, "loss": 0.0, "lr": 8.048155849907168e-07, "epoch": 3.031417193951091, "percentage": 60.63, "elapsed_time": "2:41:08", "remaining_time": "1:44:38", "throughput": 8650.17, "total_tokens": 83633616} +{"current_steps": 124090, "total_steps": 204665, "loss": 0.0571, "lr": 8.047319482827003e-07, "epoch": 3.031539344782938, "percentage": 60.63, "elapsed_time": "2:41:08", "remaining_time": "1:44:38", "throughput": 8650.18, "total_tokens": 83636688} +{"current_steps": 124095, "total_steps": 204665, "loss": 0.0, "lr": 8.046483129947259e-07, "epoch": 3.031661495614785, "percentage": 60.63, "elapsed_time": "2:41:09", "remaining_time": "1:44:37", "throughput": 8650.24, "total_tokens": 83640336} +{"current_steps": 124100, "total_steps": 204665, "loss": 0.0, "lr": 8.045646791274011e-07, "epoch": 3.031783646446632, "percentage": 60.64, "elapsed_time": "2:41:09", "remaining_time": "1:44:37", "throughput": 8650.27, "total_tokens": 83643600} +{"current_steps": 124105, "total_steps": 204665, "loss": 0.0, "lr": 8.04481046681335e-07, "epoch": 3.0319057972784793, "percentage": 60.64, "elapsed_time": "2:41:09", "remaining_time": "1:44:36", "throughput": 8650.3, "total_tokens": 83646992} +{"current_steps": 124110, "total_steps": 204665, "loss": 0.0001, "lr": 8.043974156571351e-07, "epoch": 3.0320279481103265, "percentage": 60.64, "elapsed_time": "2:41:10", "remaining_time": "1:44:36", "throughput": 8650.33, "total_tokens": 83650256} +{"current_steps": 124115, "total_steps": 204665, "loss": 0.0325, "lr": 8.043137860554094e-07, "epoch": 3.0321500989421737, "percentage": 60.64, "elapsed_time": "2:41:10", "remaining_time": "1:44:36", "throughput": 8650.36, "total_tokens": 83653584} +{"current_steps": 124120, "total_steps": 204665, "loss": 0.0514, "lr": 8.042301578767671e-07, "epoch": 3.032272249774021, "percentage": 60.65, "elapsed_time": "2:41:10", "remaining_time": "1:44:35", "throughput": 8650.37, "total_tokens": 83656592} +{"current_steps": 124125, "total_steps": 204665, "loss": 0.0, "lr": 8.041465311218153e-07, "epoch": 3.032394400605868, "percentage": 60.65, "elapsed_time": "2:41:11", "remaining_time": "1:44:35", "throughput": 8650.4, "total_tokens": 83659920} +{"current_steps": 124130, "total_steps": 204665, "loss": 0.0001, "lr": 8.040629057911629e-07, "epoch": 3.0325165514377153, "percentage": 60.65, "elapsed_time": "2:41:11", "remaining_time": "1:44:34", "throughput": 8650.45, "total_tokens": 83663376} +{"current_steps": 124135, "total_steps": 204665, "loss": 0.0002, "lr": 8.039792818854175e-07, "epoch": 3.0326387022695624, "percentage": 60.65, "elapsed_time": "2:41:11", "remaining_time": "1:44:34", "throughput": 8650.47, "total_tokens": 83666576} +{"current_steps": 124140, "total_steps": 204665, "loss": 0.0, "lr": 8.038956594051878e-07, "epoch": 3.0327608531014096, "percentage": 60.66, "elapsed_time": "2:41:12", "remaining_time": "1:44:34", "throughput": 8650.54, "total_tokens": 83670352} +{"current_steps": 124145, "total_steps": 204665, "loss": 0.0501, "lr": 8.038120383510813e-07, "epoch": 3.032883003933257, "percentage": 60.66, "elapsed_time": "2:41:12", "remaining_time": "1:44:33", "throughput": 8650.58, "total_tokens": 83673744} +{"current_steps": 124150, "total_steps": 204665, "loss": 0.0001, "lr": 8.037284187237065e-07, "epoch": 3.033005154765104, "percentage": 60.66, "elapsed_time": "2:41:12", "remaining_time": "1:44:33", "throughput": 8650.59, "total_tokens": 83676816} +{"current_steps": 124155, "total_steps": 204665, "loss": 0.0, "lr": 8.036448005236715e-07, "epoch": 3.033127305596951, "percentage": 60.66, "elapsed_time": "2:41:13", "remaining_time": "1:44:32", "throughput": 8650.65, "total_tokens": 83680464} +{"current_steps": 124160, "total_steps": 204665, "loss": 0.0, "lr": 8.035611837515843e-07, "epoch": 3.0332494564287984, "percentage": 60.66, "elapsed_time": "2:41:13", "remaining_time": "1:44:32", "throughput": 8650.69, "total_tokens": 83683856} +{"current_steps": 124165, "total_steps": 204665, "loss": 0.085, "lr": 8.034775684080532e-07, "epoch": 3.0333716072606456, "percentage": 60.67, "elapsed_time": "2:41:14", "remaining_time": "1:44:31", "throughput": 8650.69, "total_tokens": 83686864} +{"current_steps": 124170, "total_steps": 204665, "loss": 0.0001, "lr": 8.033939544936857e-07, "epoch": 3.0334937580924928, "percentage": 60.67, "elapsed_time": "2:41:14", "remaining_time": "1:44:31", "throughput": 8650.74, "total_tokens": 83690384} +{"current_steps": 124175, "total_steps": 204665, "loss": 0.0002, "lr": 8.033103420090906e-07, "epoch": 3.03361590892434, "percentage": 60.67, "elapsed_time": "2:41:14", "remaining_time": "1:44:31", "throughput": 8650.8, "total_tokens": 83693968} +{"current_steps": 124180, "total_steps": 204665, "loss": 0.0, "lr": 8.032267309548752e-07, "epoch": 3.033738059756187, "percentage": 60.67, "elapsed_time": "2:41:15", "remaining_time": "1:44:30", "throughput": 8650.81, "total_tokens": 83697104} +{"current_steps": 124185, "total_steps": 204665, "loss": 0.0, "lr": 8.03143121331648e-07, "epoch": 3.033860210588034, "percentage": 60.68, "elapsed_time": "2:41:15", "remaining_time": "1:44:30", "throughput": 8650.87, "total_tokens": 83700688} +{"current_steps": 124190, "total_steps": 204665, "loss": 0.0001, "lr": 8.030595131400174e-07, "epoch": 3.033982361419881, "percentage": 60.68, "elapsed_time": "2:41:15", "remaining_time": "1:44:29", "throughput": 8650.9, "total_tokens": 83704016} +{"current_steps": 124195, "total_steps": 204665, "loss": 0.0, "lr": 8.029759063805906e-07, "epoch": 3.0341045122517283, "percentage": 60.68, "elapsed_time": "2:41:16", "remaining_time": "1:44:29", "throughput": 8651.01, "total_tokens": 83708240} +{"current_steps": 124200, "total_steps": 204665, "loss": 0.0, "lr": 8.028923010539763e-07, "epoch": 3.0342266630835755, "percentage": 60.68, "elapsed_time": "2:41:16", "remaining_time": "1:44:29", "throughput": 8651.04, "total_tokens": 83711568} +{"current_steps": 124205, "total_steps": 204665, "loss": 0.0439, "lr": 8.028086971607818e-07, "epoch": 3.0343488139154227, "percentage": 60.69, "elapsed_time": "2:41:16", "remaining_time": "1:44:28", "throughput": 8651.08, "total_tokens": 83714896} +{"current_steps": 124210, "total_steps": 204665, "loss": 0.0, "lr": 8.027250947016157e-07, "epoch": 3.03447096474727, "percentage": 60.69, "elapsed_time": "2:41:17", "remaining_time": "1:44:28", "throughput": 8651.1, "total_tokens": 83718160} +{"current_steps": 124215, "total_steps": 204665, "loss": 0.0002, "lr": 8.026414936770861e-07, "epoch": 3.034593115579117, "percentage": 60.69, "elapsed_time": "2:41:17", "remaining_time": "1:44:27", "throughput": 8651.14, "total_tokens": 83721488} +{"current_steps": 124220, "total_steps": 204665, "loss": 0.0751, "lr": 8.025578940878001e-07, "epoch": 3.0347152664109642, "percentage": 60.69, "elapsed_time": "2:41:17", "remaining_time": "1:44:27", "throughput": 8651.14, "total_tokens": 83724432} +{"current_steps": 124225, "total_steps": 204665, "loss": 0.0001, "lr": 8.024742959343667e-07, "epoch": 3.0348374172428114, "percentage": 60.7, "elapsed_time": "2:41:18", "remaining_time": "1:44:26", "throughput": 8651.2, "total_tokens": 83728144} +{"current_steps": 124230, "total_steps": 204665, "loss": 0.041, "lr": 8.023906992173929e-07, "epoch": 3.0349595680746586, "percentage": 60.7, "elapsed_time": "2:41:18", "remaining_time": "1:44:26", "throughput": 8651.22, "total_tokens": 83731280} +{"current_steps": 124235, "total_steps": 204665, "loss": 0.0002, "lr": 8.023071039374875e-07, "epoch": 3.035081718906506, "percentage": 60.7, "elapsed_time": "2:41:18", "remaining_time": "1:44:26", "throughput": 8651.25, "total_tokens": 83734608} +{"current_steps": 124240, "total_steps": 204665, "loss": 0.0, "lr": 8.022235100952576e-07, "epoch": 3.035203869738353, "percentage": 60.7, "elapsed_time": "2:41:19", "remaining_time": "1:44:25", "throughput": 8651.29, "total_tokens": 83737936} +{"current_steps": 124245, "total_steps": 204665, "loss": 0.0002, "lr": 8.021399176913115e-07, "epoch": 3.0353260205702, "percentage": 60.71, "elapsed_time": "2:41:19", "remaining_time": "1:44:25", "throughput": 8651.34, "total_tokens": 83741456} +{"current_steps": 124250, "total_steps": 204665, "loss": 0.0, "lr": 8.020563267262576e-07, "epoch": 3.0354481714020474, "percentage": 60.71, "elapsed_time": "2:41:19", "remaining_time": "1:44:24", "throughput": 8651.36, "total_tokens": 83744720} +{"current_steps": 124255, "total_steps": 204665, "loss": 0.0001, "lr": 8.019727372007028e-07, "epoch": 3.0355703222338946, "percentage": 60.71, "elapsed_time": "2:41:20", "remaining_time": "1:44:24", "throughput": 8651.42, "total_tokens": 83748368} +{"current_steps": 124260, "total_steps": 204665, "loss": 0.006, "lr": 8.01889149115256e-07, "epoch": 3.0356924730657417, "percentage": 60.71, "elapsed_time": "2:41:20", "remaining_time": "1:44:24", "throughput": 8651.43, "total_tokens": 83751376} +{"current_steps": 124265, "total_steps": 204665, "loss": 0.0, "lr": 8.018055624705244e-07, "epoch": 3.035814623897589, "percentage": 60.72, "elapsed_time": "2:41:20", "remaining_time": "1:44:23", "throughput": 8651.5, "total_tokens": 83755152} +{"current_steps": 124270, "total_steps": 204665, "loss": 0.0001, "lr": 8.017219772671158e-07, "epoch": 3.0359367747294357, "percentage": 60.72, "elapsed_time": "2:41:21", "remaining_time": "1:44:23", "throughput": 8651.53, "total_tokens": 83758352} +{"current_steps": 124275, "total_steps": 204665, "loss": 0.0, "lr": 8.016383935056389e-07, "epoch": 3.036058925561283, "percentage": 60.72, "elapsed_time": "2:41:21", "remaining_time": "1:44:22", "throughput": 8651.57, "total_tokens": 83761744} +{"current_steps": 124280, "total_steps": 204665, "loss": 0.0, "lr": 8.015548111867003e-07, "epoch": 3.03618107639313, "percentage": 60.72, "elapsed_time": "2:41:22", "remaining_time": "1:44:22", "throughput": 8651.58, "total_tokens": 83764816} +{"current_steps": 124285, "total_steps": 204665, "loss": 0.0001, "lr": 8.014712303109092e-07, "epoch": 3.0363032272249773, "percentage": 60.73, "elapsed_time": "2:41:22", "remaining_time": "1:44:21", "throughput": 8651.65, "total_tokens": 83768528} +{"current_steps": 124290, "total_steps": 204665, "loss": 0.0751, "lr": 8.01387650878872e-07, "epoch": 3.0364253780568244, "percentage": 60.73, "elapsed_time": "2:41:22", "remaining_time": "1:44:21", "throughput": 8651.67, "total_tokens": 83771728} +{"current_steps": 124295, "total_steps": 204665, "loss": 0.0673, "lr": 8.013040728911977e-07, "epoch": 3.0365475288886716, "percentage": 60.73, "elapsed_time": "2:41:23", "remaining_time": "1:44:21", "throughput": 8651.69, "total_tokens": 83774928} +{"current_steps": 124300, "total_steps": 204665, "loss": 0.0, "lr": 8.012204963484934e-07, "epoch": 3.036669679720519, "percentage": 60.73, "elapsed_time": "2:41:23", "remaining_time": "1:44:20", "throughput": 8651.75, "total_tokens": 83778512} +{"current_steps": 124305, "total_steps": 204665, "loss": 0.0, "lr": 8.011369212513671e-07, "epoch": 3.036791830552366, "percentage": 60.74, "elapsed_time": "2:41:23", "remaining_time": "1:44:20", "throughput": 8651.75, "total_tokens": 83781520} +{"current_steps": 124310, "total_steps": 204665, "loss": 0.0, "lr": 8.010533476004267e-07, "epoch": 3.036913981384213, "percentage": 60.74, "elapsed_time": "2:41:24", "remaining_time": "1:44:19", "throughput": 8651.78, "total_tokens": 83784784} +{"current_steps": 124315, "total_steps": 204665, "loss": 0.0, "lr": 8.009697753962799e-07, "epoch": 3.0370361322160604, "percentage": 60.74, "elapsed_time": "2:41:24", "remaining_time": "1:44:19", "throughput": 8651.82, "total_tokens": 83788112} +{"current_steps": 124320, "total_steps": 204665, "loss": 0.0, "lr": 8.008862046395346e-07, "epoch": 3.0371582830479076, "percentage": 60.74, "elapsed_time": "2:41:24", "remaining_time": "1:44:19", "throughput": 8651.84, "total_tokens": 83791376} +{"current_steps": 124325, "total_steps": 204665, "loss": 0.0001, "lr": 8.00802635330798e-07, "epoch": 3.0372804338797548, "percentage": 60.75, "elapsed_time": "2:41:25", "remaining_time": "1:44:18", "throughput": 8651.95, "total_tokens": 83795536} +{"current_steps": 124330, "total_steps": 204665, "loss": 0.0, "lr": 8.007190674706786e-07, "epoch": 3.037402584711602, "percentage": 60.75, "elapsed_time": "2:41:25", "remaining_time": "1:44:18", "throughput": 8651.95, "total_tokens": 83798480} +{"current_steps": 124335, "total_steps": 204665, "loss": 0.0, "lr": 8.006355010597832e-07, "epoch": 3.037524735543449, "percentage": 60.75, "elapsed_time": "2:41:25", "remaining_time": "1:44:17", "throughput": 8651.97, "total_tokens": 83801680} +{"current_steps": 124340, "total_steps": 204665, "loss": 0.0001, "lr": 8.005519360987201e-07, "epoch": 3.0376468863752963, "percentage": 60.75, "elapsed_time": "2:41:26", "remaining_time": "1:44:17", "throughput": 8652.0, "total_tokens": 83805008} +{"current_steps": 124345, "total_steps": 204665, "loss": 0.0, "lr": 8.004683725880976e-07, "epoch": 3.0377690372071435, "percentage": 60.76, "elapsed_time": "2:41:26", "remaining_time": "1:44:16", "throughput": 8652.05, "total_tokens": 83808464} +{"current_steps": 124350, "total_steps": 204665, "loss": 0.0, "lr": 8.00384810528522e-07, "epoch": 3.0378911880389907, "percentage": 60.76, "elapsed_time": "2:41:26", "remaining_time": "1:44:16", "throughput": 8652.08, "total_tokens": 83811728} +{"current_steps": 124355, "total_steps": 204665, "loss": 0.0, "lr": 8.003012499206025e-07, "epoch": 3.038013338870838, "percentage": 60.76, "elapsed_time": "2:41:27", "remaining_time": "1:44:16", "throughput": 8652.1, "total_tokens": 83814992} +{"current_steps": 124360, "total_steps": 204665, "loss": 0.0, "lr": 8.002176907649454e-07, "epoch": 3.038135489702685, "percentage": 60.76, "elapsed_time": "2:41:27", "remaining_time": "1:44:15", "throughput": 8652.13, "total_tokens": 83818256} +{"current_steps": 124365, "total_steps": 204665, "loss": 0.0, "lr": 8.001341330621593e-07, "epoch": 3.038257640534532, "percentage": 60.77, "elapsed_time": "2:41:27", "remaining_time": "1:44:15", "throughput": 8652.16, "total_tokens": 83821520} +{"current_steps": 124370, "total_steps": 204665, "loss": 0.0, "lr": 8.000505768128517e-07, "epoch": 3.038379791366379, "percentage": 60.77, "elapsed_time": "2:41:28", "remaining_time": "1:44:14", "throughput": 8652.2, "total_tokens": 83824912} +{"current_steps": 124375, "total_steps": 204665, "loss": 0.0, "lr": 7.999670220176297e-07, "epoch": 3.0385019421982262, "percentage": 60.77, "elapsed_time": "2:41:28", "remaining_time": "1:44:14", "throughput": 8652.24, "total_tokens": 83828304} +{"current_steps": 124380, "total_steps": 204665, "loss": 0.0, "lr": 7.998834686771016e-07, "epoch": 3.0386240930300734, "percentage": 60.77, "elapsed_time": "2:41:28", "remaining_time": "1:44:14", "throughput": 8652.35, "total_tokens": 83832528} +{"current_steps": 124385, "total_steps": 204665, "loss": 0.0572, "lr": 7.997999167918745e-07, "epoch": 3.0387462438619206, "percentage": 60.77, "elapsed_time": "2:41:29", "remaining_time": "1:44:13", "throughput": 8652.39, "total_tokens": 83835856} +{"current_steps": 124390, "total_steps": 204665, "loss": 0.0, "lr": 7.997163663625566e-07, "epoch": 3.038868394693768, "percentage": 60.78, "elapsed_time": "2:41:29", "remaining_time": "1:44:13", "throughput": 8652.42, "total_tokens": 83839184} +{"current_steps": 124395, "total_steps": 204665, "loss": 0.0, "lr": 7.996328173897548e-07, "epoch": 3.038990545525615, "percentage": 60.78, "elapsed_time": "2:41:30", "remaining_time": "1:44:12", "throughput": 8652.45, "total_tokens": 83842448} +{"current_steps": 124400, "total_steps": 204665, "loss": 0.0, "lr": 7.995492698740769e-07, "epoch": 3.039112696357462, "percentage": 60.78, "elapsed_time": "2:41:30", "remaining_time": "1:44:12", "throughput": 8652.53, "total_tokens": 83846288} +{"current_steps": 124405, "total_steps": 204665, "loss": 0.0, "lr": 7.994657238161311e-07, "epoch": 3.0392348471893094, "percentage": 60.78, "elapsed_time": "2:41:30", "remaining_time": "1:44:11", "throughput": 8652.58, "total_tokens": 83849872} +{"current_steps": 124410, "total_steps": 204665, "loss": 0.0, "lr": 7.99382179216524e-07, "epoch": 3.0393569980211566, "percentage": 60.79, "elapsed_time": "2:41:31", "remaining_time": "1:44:11", "throughput": 8652.61, "total_tokens": 83853136} +{"current_steps": 124415, "total_steps": 204665, "loss": 0.0302, "lr": 7.992986360758643e-07, "epoch": 3.0394791488530037, "percentage": 60.79, "elapsed_time": "2:41:31", "remaining_time": "1:44:11", "throughput": 8652.65, "total_tokens": 83856464} +{"current_steps": 124420, "total_steps": 204665, "loss": 0.0, "lr": 7.992150943947586e-07, "epoch": 3.039601299684851, "percentage": 60.79, "elapsed_time": "2:41:31", "remaining_time": "1:44:10", "throughput": 8652.67, "total_tokens": 83859664} +{"current_steps": 124425, "total_steps": 204665, "loss": 0.0001, "lr": 7.991315541738147e-07, "epoch": 3.039723450516698, "percentage": 60.79, "elapsed_time": "2:41:32", "remaining_time": "1:44:10", "throughput": 8652.7, "total_tokens": 83862928} +{"current_steps": 124430, "total_steps": 204665, "loss": 0.0, "lr": 7.990480154136401e-07, "epoch": 3.0398456013485453, "percentage": 60.8, "elapsed_time": "2:41:32", "remaining_time": "1:44:09", "throughput": 8652.7, "total_tokens": 83865808} +{"current_steps": 124435, "total_steps": 204665, "loss": 0.0001, "lr": 7.989644781148422e-07, "epoch": 3.0399677521803925, "percentage": 60.8, "elapsed_time": "2:41:32", "remaining_time": "1:44:09", "throughput": 8652.72, "total_tokens": 83869072} +{"current_steps": 124440, "total_steps": 204665, "loss": 0.0001, "lr": 7.988809422780292e-07, "epoch": 3.0400899030122397, "percentage": 60.8, "elapsed_time": "2:41:33", "remaining_time": "1:44:09", "throughput": 8652.77, "total_tokens": 83872592} +{"current_steps": 124445, "total_steps": 204665, "loss": 0.0, "lr": 7.987974079038076e-07, "epoch": 3.040212053844087, "percentage": 60.8, "elapsed_time": "2:41:33", "remaining_time": "1:44:08", "throughput": 8652.79, "total_tokens": 83875728} +{"current_steps": 124450, "total_steps": 204665, "loss": 0.1003, "lr": 7.987138749927858e-07, "epoch": 3.0403342046759336, "percentage": 60.81, "elapsed_time": "2:41:33", "remaining_time": "1:44:08", "throughput": 8652.82, "total_tokens": 83878992} +{"current_steps": 124455, "total_steps": 204665, "loss": 0.0, "lr": 7.986303435455705e-07, "epoch": 3.040456355507781, "percentage": 60.81, "elapsed_time": "2:41:34", "remaining_time": "1:44:07", "throughput": 8652.82, "total_tokens": 83881936} +{"current_steps": 124460, "total_steps": 204665, "loss": 0.0005, "lr": 7.985468135627696e-07, "epoch": 3.040578506339628, "percentage": 60.81, "elapsed_time": "2:41:34", "remaining_time": "1:44:07", "throughput": 8652.89, "total_tokens": 83885648} +{"current_steps": 124465, "total_steps": 204665, "loss": 0.0, "lr": 7.984632850449903e-07, "epoch": 3.040700657171475, "percentage": 60.81, "elapsed_time": "2:41:34", "remaining_time": "1:44:06", "throughput": 8652.95, "total_tokens": 83889232} +{"current_steps": 124470, "total_steps": 204665, "loss": 0.0, "lr": 7.983797579928406e-07, "epoch": 3.0408228080033224, "percentage": 60.82, "elapsed_time": "2:41:35", "remaining_time": "1:44:06", "throughput": 8653.01, "total_tokens": 83892944} +{"current_steps": 124475, "total_steps": 204665, "loss": 0.0, "lr": 7.982962324069275e-07, "epoch": 3.0409449588351696, "percentage": 60.82, "elapsed_time": "2:41:35", "remaining_time": "1:44:06", "throughput": 8653.01, "total_tokens": 83895888} +{"current_steps": 124480, "total_steps": 204665, "loss": 0.0, "lr": 7.98212708287858e-07, "epoch": 3.0410671096670168, "percentage": 60.82, "elapsed_time": "2:41:35", "remaining_time": "1:44:05", "throughput": 8653.09, "total_tokens": 83899856} +{"current_steps": 124485, "total_steps": 204665, "loss": 0.0, "lr": 7.981291856362405e-07, "epoch": 3.041189260498864, "percentage": 60.82, "elapsed_time": "2:41:36", "remaining_time": "1:44:05", "throughput": 8653.12, "total_tokens": 83903120} +{"current_steps": 124490, "total_steps": 204665, "loss": 0.0, "lr": 7.980456644526813e-07, "epoch": 3.041311411330711, "percentage": 60.83, "elapsed_time": "2:41:36", "remaining_time": "1:44:04", "throughput": 8653.15, "total_tokens": 83906384} +{"current_steps": 124495, "total_steps": 204665, "loss": 0.0001, "lr": 7.979621447377885e-07, "epoch": 3.0414335621625583, "percentage": 60.83, "elapsed_time": "2:41:36", "remaining_time": "1:44:04", "throughput": 8653.18, "total_tokens": 83909648} +{"current_steps": 124500, "total_steps": 204665, "loss": 0.0372, "lr": 7.978786264921695e-07, "epoch": 3.0415557129944055, "percentage": 60.83, "elapsed_time": "2:41:37", "remaining_time": "1:44:04", "throughput": 8653.22, "total_tokens": 83913040} +{"current_steps": 124505, "total_steps": 204665, "loss": 0.0001, "lr": 7.977951097164312e-07, "epoch": 3.0416778638262527, "percentage": 60.83, "elapsed_time": "2:41:37", "remaining_time": "1:44:03", "throughput": 8653.3, "total_tokens": 83916944} +{"current_steps": 124510, "total_steps": 204665, "loss": 0.0002, "lr": 7.977115944111819e-07, "epoch": 3.0418000146581, "percentage": 60.84, "elapsed_time": "2:41:38", "remaining_time": "1:44:03", "throughput": 8653.31, "total_tokens": 83920016} +{"current_steps": 124515, "total_steps": 204665, "loss": 0.0, "lr": 7.976280805770275e-07, "epoch": 3.041922165489947, "percentage": 60.84, "elapsed_time": "2:41:38", "remaining_time": "1:44:02", "throughput": 8653.37, "total_tokens": 83923600} +{"current_steps": 124520, "total_steps": 204665, "loss": 0.0, "lr": 7.975445682145766e-07, "epoch": 3.0420443163217943, "percentage": 60.84, "elapsed_time": "2:41:38", "remaining_time": "1:44:02", "throughput": 8653.41, "total_tokens": 83926992} +{"current_steps": 124525, "total_steps": 204665, "loss": 0.0, "lr": 7.974610573244362e-07, "epoch": 3.0421664671536415, "percentage": 60.84, "elapsed_time": "2:41:39", "remaining_time": "1:44:01", "throughput": 8653.46, "total_tokens": 83930576} +{"current_steps": 124530, "total_steps": 204665, "loss": 0.0, "lr": 7.97377547907213e-07, "epoch": 3.0422886179854887, "percentage": 60.85, "elapsed_time": "2:41:39", "remaining_time": "1:44:01", "throughput": 8653.48, "total_tokens": 83933648} +{"current_steps": 124535, "total_steps": 204665, "loss": 0.0, "lr": 7.972940399635153e-07, "epoch": 3.042410768817336, "percentage": 60.85, "elapsed_time": "2:41:39", "remaining_time": "1:44:01", "throughput": 8653.52, "total_tokens": 83937040} +{"current_steps": 124540, "total_steps": 204665, "loss": 0.0465, "lr": 7.972105334939493e-07, "epoch": 3.0425329196491826, "percentage": 60.85, "elapsed_time": "2:41:40", "remaining_time": "1:44:00", "throughput": 8653.54, "total_tokens": 83940240} +{"current_steps": 124545, "total_steps": 204665, "loss": 0.0, "lr": 7.971270284991234e-07, "epoch": 3.04265507048103, "percentage": 60.85, "elapsed_time": "2:41:40", "remaining_time": "1:44:00", "throughput": 8653.56, "total_tokens": 83943376} +{"current_steps": 124550, "total_steps": 204665, "loss": 0.0, "lr": 7.970435249796438e-07, "epoch": 3.042777221312877, "percentage": 60.86, "elapsed_time": "2:41:40", "remaining_time": "1:43:59", "throughput": 8653.58, "total_tokens": 83946512} +{"current_steps": 124555, "total_steps": 204665, "loss": 0.0, "lr": 7.969600229361181e-07, "epoch": 3.042899372144724, "percentage": 60.86, "elapsed_time": "2:41:41", "remaining_time": "1:43:59", "throughput": 8653.64, "total_tokens": 83950160} +{"current_steps": 124560, "total_steps": 204665, "loss": 0.0, "lr": 7.968765223691544e-07, "epoch": 3.0430215229765714, "percentage": 60.86, "elapsed_time": "2:41:41", "remaining_time": "1:43:59", "throughput": 8653.64, "total_tokens": 83953168} +{"current_steps": 124565, "total_steps": 204665, "loss": 0.0001, "lr": 7.967930232793589e-07, "epoch": 3.0431436738084185, "percentage": 60.86, "elapsed_time": "2:41:41", "remaining_time": "1:43:58", "throughput": 8653.65, "total_tokens": 83956176} +{"current_steps": 124570, "total_steps": 204665, "loss": 0.0001, "lr": 7.967095256673395e-07, "epoch": 3.0432658246402657, "percentage": 60.87, "elapsed_time": "2:41:42", "remaining_time": "1:43:58", "throughput": 8653.69, "total_tokens": 83959632} +{"current_steps": 124575, "total_steps": 204665, "loss": 0.0, "lr": 7.966260295337029e-07, "epoch": 3.043387975472113, "percentage": 60.87, "elapsed_time": "2:41:42", "remaining_time": "1:43:57", "throughput": 8653.75, "total_tokens": 83963280} +{"current_steps": 124580, "total_steps": 204665, "loss": 0.0, "lr": 7.965425348790564e-07, "epoch": 3.04351012630396, "percentage": 60.87, "elapsed_time": "2:41:42", "remaining_time": "1:43:57", "throughput": 8653.76, "total_tokens": 83966352} +{"current_steps": 124585, "total_steps": 204665, "loss": 0.0751, "lr": 7.964590417040075e-07, "epoch": 3.0436322771358073, "percentage": 60.87, "elapsed_time": "2:41:43", "remaining_time": "1:43:56", "throughput": 8653.8, "total_tokens": 83969744} +{"current_steps": 124590, "total_steps": 204665, "loss": 0.0, "lr": 7.963755500091629e-07, "epoch": 3.0437544279676545, "percentage": 60.88, "elapsed_time": "2:41:43", "remaining_time": "1:43:56", "throughput": 8653.82, "total_tokens": 83972944} +{"current_steps": 124595, "total_steps": 204665, "loss": 0.0001, "lr": 7.962920597951305e-07, "epoch": 3.0438765787995017, "percentage": 60.88, "elapsed_time": "2:41:43", "remaining_time": "1:43:56", "throughput": 8653.9, "total_tokens": 83976720} +{"current_steps": 124600, "total_steps": 204665, "loss": 0.0, "lr": 7.962085710625166e-07, "epoch": 3.043998729631349, "percentage": 60.88, "elapsed_time": "2:41:44", "remaining_time": "1:43:55", "throughput": 8653.92, "total_tokens": 83979984} +{"current_steps": 124605, "total_steps": 204665, "loss": 0.0, "lr": 7.961250838119292e-07, "epoch": 3.044120880463196, "percentage": 60.88, "elapsed_time": "2:41:44", "remaining_time": "1:43:55", "throughput": 8653.95, "total_tokens": 83983184} +{"current_steps": 124610, "total_steps": 204665, "loss": 0.0727, "lr": 7.960415980439747e-07, "epoch": 3.0442430312950433, "percentage": 60.88, "elapsed_time": "2:41:44", "remaining_time": "1:43:54", "throughput": 8653.98, "total_tokens": 83986576} +{"current_steps": 124615, "total_steps": 204665, "loss": 0.0001, "lr": 7.959581137592606e-07, "epoch": 3.0443651821268904, "percentage": 60.89, "elapsed_time": "2:41:45", "remaining_time": "1:43:54", "throughput": 8654.01, "total_tokens": 83989776} +{"current_steps": 124620, "total_steps": 204665, "loss": 0.0001, "lr": 7.95874630958394e-07, "epoch": 3.0444873329587376, "percentage": 60.89, "elapsed_time": "2:41:45", "remaining_time": "1:43:54", "throughput": 8654.06, "total_tokens": 83993360} +{"current_steps": 124625, "total_steps": 204665, "loss": 0.0, "lr": 7.957911496419821e-07, "epoch": 3.044609483790585, "percentage": 60.89, "elapsed_time": "2:41:46", "remaining_time": "1:43:53", "throughput": 8654.11, "total_tokens": 83996752} +{"current_steps": 124630, "total_steps": 204665, "loss": 0.0, "lr": 7.95707669810632e-07, "epoch": 3.0447316346224316, "percentage": 60.89, "elapsed_time": "2:41:46", "remaining_time": "1:43:53", "throughput": 8654.16, "total_tokens": 84000272} +{"current_steps": 124635, "total_steps": 204665, "loss": 0.0, "lr": 7.956241914649503e-07, "epoch": 3.0448537854542788, "percentage": 60.9, "elapsed_time": "2:41:46", "remaining_time": "1:43:52", "throughput": 8654.19, "total_tokens": 84003536} +{"current_steps": 124640, "total_steps": 204665, "loss": 0.0, "lr": 7.955407146055448e-07, "epoch": 3.044975936286126, "percentage": 60.9, "elapsed_time": "2:41:47", "remaining_time": "1:43:52", "throughput": 8654.23, "total_tokens": 84007056} +{"current_steps": 124645, "total_steps": 204665, "loss": 0.0, "lr": 7.954572392330219e-07, "epoch": 3.045098087117973, "percentage": 60.9, "elapsed_time": "2:41:47", "remaining_time": "1:43:51", "throughput": 8654.24, "total_tokens": 84010064} +{"current_steps": 124650, "total_steps": 204665, "loss": 0.0814, "lr": 7.953737653479889e-07, "epoch": 3.0452202379498203, "percentage": 60.9, "elapsed_time": "2:41:47", "remaining_time": "1:43:51", "throughput": 8654.29, "total_tokens": 84013584} +{"current_steps": 124655, "total_steps": 204665, "loss": 0.0625, "lr": 7.952902929510534e-07, "epoch": 3.0453423887816675, "percentage": 60.91, "elapsed_time": "2:41:48", "remaining_time": "1:43:51", "throughput": 8654.34, "total_tokens": 84017168} +{"current_steps": 124660, "total_steps": 204665, "loss": 0.0, "lr": 7.952068220428215e-07, "epoch": 3.0454645396135147, "percentage": 60.91, "elapsed_time": "2:41:48", "remaining_time": "1:43:50", "throughput": 8654.41, "total_tokens": 84020944} +{"current_steps": 124665, "total_steps": 204665, "loss": 0.0225, "lr": 7.951233526239012e-07, "epoch": 3.045586690445362, "percentage": 60.91, "elapsed_time": "2:41:48", "remaining_time": "1:43:50", "throughput": 8654.45, "total_tokens": 84024336} +{"current_steps": 124670, "total_steps": 204665, "loss": 0.0, "lr": 7.950398846948984e-07, "epoch": 3.045708841277209, "percentage": 60.91, "elapsed_time": "2:41:49", "remaining_time": "1:43:49", "throughput": 8654.49, "total_tokens": 84027728} +{"current_steps": 124675, "total_steps": 204665, "loss": 0.0, "lr": 7.949564182564209e-07, "epoch": 3.0458309921090563, "percentage": 60.92, "elapsed_time": "2:41:49", "remaining_time": "1:43:49", "throughput": 8654.51, "total_tokens": 84030928} +{"current_steps": 124680, "total_steps": 204665, "loss": 0.0, "lr": 7.948729533090758e-07, "epoch": 3.0459531429409035, "percentage": 60.92, "elapsed_time": "2:41:49", "remaining_time": "1:43:49", "throughput": 8654.56, "total_tokens": 84034448} +{"current_steps": 124685, "total_steps": 204665, "loss": 0.0378, "lr": 7.947894898534693e-07, "epoch": 3.0460752937727507, "percentage": 60.92, "elapsed_time": "2:41:50", "remaining_time": "1:43:48", "throughput": 8654.57, "total_tokens": 84037520} +{"current_steps": 124690, "total_steps": 204665, "loss": 0.0, "lr": 7.947060278902091e-07, "epoch": 3.046197444604598, "percentage": 60.92, "elapsed_time": "2:41:50", "remaining_time": "1:43:48", "throughput": 8654.66, "total_tokens": 84041488} +{"current_steps": 124695, "total_steps": 204665, "loss": 0.0411, "lr": 7.946225674199017e-07, "epoch": 3.046319595436445, "percentage": 60.93, "elapsed_time": "2:41:50", "remaining_time": "1:43:47", "throughput": 8654.67, "total_tokens": 84044624} +{"current_steps": 124700, "total_steps": 204665, "loss": 0.0, "lr": 7.945391084431546e-07, "epoch": 3.0464417462682922, "percentage": 60.93, "elapsed_time": "2:41:51", "remaining_time": "1:43:47", "throughput": 8654.7, "total_tokens": 84047888} +{"current_steps": 124705, "total_steps": 204665, "loss": 0.0, "lr": 7.944556509605737e-07, "epoch": 3.0465638971001394, "percentage": 60.93, "elapsed_time": "2:41:51", "remaining_time": "1:43:47", "throughput": 8654.74, "total_tokens": 84051344} +{"current_steps": 124710, "total_steps": 204665, "loss": 0.0002, "lr": 7.943721949727668e-07, "epoch": 3.0466860479319866, "percentage": 60.93, "elapsed_time": "2:41:51", "remaining_time": "1:43:46", "throughput": 8654.75, "total_tokens": 84054416} +{"current_steps": 124715, "total_steps": 204665, "loss": 0.0, "lr": 7.94288740480341e-07, "epoch": 3.046808198763834, "percentage": 60.94, "elapsed_time": "2:41:52", "remaining_time": "1:43:46", "throughput": 8654.8, "total_tokens": 84057872} +{"current_steps": 124720, "total_steps": 204665, "loss": 0.0, "lr": 7.942052874839024e-07, "epoch": 3.0469303495956805, "percentage": 60.94, "elapsed_time": "2:41:52", "remaining_time": "1:43:45", "throughput": 8654.81, "total_tokens": 84060880} +{"current_steps": 124725, "total_steps": 204665, "loss": 0.0, "lr": 7.941218359840587e-07, "epoch": 3.0470525004275277, "percentage": 60.94, "elapsed_time": "2:41:52", "remaining_time": "1:43:45", "throughput": 8654.82, "total_tokens": 84063952} +{"current_steps": 124730, "total_steps": 204665, "loss": 0.0725, "lr": 7.940383859814162e-07, "epoch": 3.047174651259375, "percentage": 60.94, "elapsed_time": "2:41:53", "remaining_time": "1:43:44", "throughput": 8654.86, "total_tokens": 84067472} +{"current_steps": 124735, "total_steps": 204665, "loss": 0.0, "lr": 7.93954937476582e-07, "epoch": 3.047296802091222, "percentage": 60.95, "elapsed_time": "2:41:53", "remaining_time": "1:43:44", "throughput": 8654.94, "total_tokens": 84071312} +{"current_steps": 124740, "total_steps": 204665, "loss": 0.0001, "lr": 7.938714904701627e-07, "epoch": 3.0474189529230693, "percentage": 60.95, "elapsed_time": "2:41:54", "remaining_time": "1:43:44", "throughput": 8655.0, "total_tokens": 84074960} +{"current_steps": 124745, "total_steps": 204665, "loss": 0.0365, "lr": 7.937880449627655e-07, "epoch": 3.0475411037549165, "percentage": 60.95, "elapsed_time": "2:41:54", "remaining_time": "1:43:43", "throughput": 8655.03, "total_tokens": 84078224} +{"current_steps": 124750, "total_steps": 204665, "loss": 0.012, "lr": 7.937046009549971e-07, "epoch": 3.0476632545867637, "percentage": 60.95, "elapsed_time": "2:41:54", "remaining_time": "1:43:43", "throughput": 8655.07, "total_tokens": 84081680} +{"current_steps": 124755, "total_steps": 204665, "loss": 0.0002, "lr": 7.936211584474641e-07, "epoch": 3.047785405418611, "percentage": 60.96, "elapsed_time": "2:41:55", "remaining_time": "1:43:42", "throughput": 8655.11, "total_tokens": 84085008} +{"current_steps": 124760, "total_steps": 204665, "loss": 0.1145, "lr": 7.935377174407742e-07, "epoch": 3.047907556250458, "percentage": 60.96, "elapsed_time": "2:41:55", "remaining_time": "1:43:42", "throughput": 8655.12, "total_tokens": 84088080} +{"current_steps": 124765, "total_steps": 204665, "loss": 0.0, "lr": 7.934542779355329e-07, "epoch": 3.0480297070823053, "percentage": 60.96, "elapsed_time": "2:41:55", "remaining_time": "1:43:42", "throughput": 8655.16, "total_tokens": 84091472} +{"current_steps": 124770, "total_steps": 204665, "loss": 0.035, "lr": 7.933708399323478e-07, "epoch": 3.0481518579141524, "percentage": 60.96, "elapsed_time": "2:41:56", "remaining_time": "1:43:41", "throughput": 8655.18, "total_tokens": 84094736} +{"current_steps": 124775, "total_steps": 204665, "loss": 0.0, "lr": 7.932874034318256e-07, "epoch": 3.0482740087459996, "percentage": 60.97, "elapsed_time": "2:41:56", "remaining_time": "1:43:41", "throughput": 8655.21, "total_tokens": 84098000} +{"current_steps": 124780, "total_steps": 204665, "loss": 0.0738, "lr": 7.932039684345731e-07, "epoch": 3.048396159577847, "percentage": 60.97, "elapsed_time": "2:41:56", "remaining_time": "1:43:40", "throughput": 8655.22, "total_tokens": 84101008} +{"current_steps": 124785, "total_steps": 204665, "loss": 0.0001, "lr": 7.93120534941197e-07, "epoch": 3.048518310409694, "percentage": 60.97, "elapsed_time": "2:41:57", "remaining_time": "1:43:40", "throughput": 8655.23, "total_tokens": 84104080} +{"current_steps": 124790, "total_steps": 204665, "loss": 0.0001, "lr": 7.930371029523037e-07, "epoch": 3.048640461241541, "percentage": 60.97, "elapsed_time": "2:41:57", "remaining_time": "1:43:39", "throughput": 8655.3, "total_tokens": 84107792} +{"current_steps": 124795, "total_steps": 204665, "loss": 0.0004, "lr": 7.929536724685006e-07, "epoch": 3.0487626120733884, "percentage": 60.98, "elapsed_time": "2:41:57", "remaining_time": "1:43:39", "throughput": 8655.36, "total_tokens": 84111504} +{"current_steps": 124800, "total_steps": 204665, "loss": 0.0001, "lr": 7.928702434903938e-07, "epoch": 3.0488847629052356, "percentage": 60.98, "elapsed_time": "2:41:58", "remaining_time": "1:43:39", "throughput": 8655.4, "total_tokens": 84114832} +{"current_steps": 124805, "total_steps": 204665, "loss": 0.0002, "lr": 7.927868160185901e-07, "epoch": 3.0490069137370828, "percentage": 60.98, "elapsed_time": "2:41:58", "remaining_time": "1:43:38", "throughput": 8655.43, "total_tokens": 84118224} +{"current_steps": 124810, "total_steps": 204665, "loss": 0.0, "lr": 7.92703390053697e-07, "epoch": 3.0491290645689295, "percentage": 60.98, "elapsed_time": "2:41:58", "remaining_time": "1:43:38", "throughput": 8655.45, "total_tokens": 84121360} +{"current_steps": 124815, "total_steps": 204665, "loss": 0.0004, "lr": 7.926199655963201e-07, "epoch": 3.0492512154007767, "percentage": 60.99, "elapsed_time": "2:41:59", "remaining_time": "1:43:37", "throughput": 8655.47, "total_tokens": 84124432} +{"current_steps": 124820, "total_steps": 204665, "loss": 0.0, "lr": 7.92536542647067e-07, "epoch": 3.049373366232624, "percentage": 60.99, "elapsed_time": "2:41:59", "remaining_time": "1:43:37", "throughput": 8655.47, "total_tokens": 84127376} +{"current_steps": 124825, "total_steps": 204665, "loss": 0.0001, "lr": 7.924531212065436e-07, "epoch": 3.049495517064471, "percentage": 60.99, "elapsed_time": "2:41:59", "remaining_time": "1:43:37", "throughput": 8655.47, "total_tokens": 84130384} +{"current_steps": 124830, "total_steps": 204665, "loss": 0.0, "lr": 7.923697012753572e-07, "epoch": 3.0496176678963183, "percentage": 60.99, "elapsed_time": "2:42:00", "remaining_time": "1:43:36", "throughput": 8655.48, "total_tokens": 84133328} +{"current_steps": 124835, "total_steps": 204665, "loss": 0.0, "lr": 7.922862828541143e-07, "epoch": 3.0497398187281655, "percentage": 60.99, "elapsed_time": "2:42:00", "remaining_time": "1:43:36", "throughput": 8655.5, "total_tokens": 84136592} +{"current_steps": 124840, "total_steps": 204665, "loss": 0.0, "lr": 7.922028659434209e-07, "epoch": 3.0498619695600127, "percentage": 61.0, "elapsed_time": "2:42:00", "remaining_time": "1:43:35", "throughput": 8655.55, "total_tokens": 84140112} +{"current_steps": 124845, "total_steps": 204665, "loss": 0.0001, "lr": 7.921194505438848e-07, "epoch": 3.04998412039186, "percentage": 61.0, "elapsed_time": "2:42:01", "remaining_time": "1:43:35", "throughput": 8655.61, "total_tokens": 84143760} +{"current_steps": 124850, "total_steps": 204665, "loss": 0.0, "lr": 7.920360366561113e-07, "epoch": 3.050106271223707, "percentage": 61.0, "elapsed_time": "2:42:01", "remaining_time": "1:43:34", "throughput": 8655.65, "total_tokens": 84147152} +{"current_steps": 124855, "total_steps": 204665, "loss": 0.0, "lr": 7.919526242807082e-07, "epoch": 3.0502284220555542, "percentage": 61.0, "elapsed_time": "2:42:01", "remaining_time": "1:43:34", "throughput": 8655.67, "total_tokens": 84150288} +{"current_steps": 124860, "total_steps": 204665, "loss": 0.0, "lr": 7.918692134182812e-07, "epoch": 3.0503505728874014, "percentage": 61.01, "elapsed_time": "2:42:02", "remaining_time": "1:43:34", "throughput": 8655.69, "total_tokens": 84153424} +{"current_steps": 124865, "total_steps": 204665, "loss": 0.0, "lr": 7.917858040694372e-07, "epoch": 3.0504727237192486, "percentage": 61.01, "elapsed_time": "2:42:02", "remaining_time": "1:43:33", "throughput": 8655.73, "total_tokens": 84156880} +{"current_steps": 124870, "total_steps": 204665, "loss": 0.0, "lr": 7.917023962347833e-07, "epoch": 3.050594874551096, "percentage": 61.01, "elapsed_time": "2:42:03", "remaining_time": "1:43:33", "throughput": 8655.78, "total_tokens": 84160336} +{"current_steps": 124875, "total_steps": 204665, "loss": 0.0, "lr": 7.916189899149251e-07, "epoch": 3.050717025382943, "percentage": 61.01, "elapsed_time": "2:42:03", "remaining_time": "1:43:32", "throughput": 8655.84, "total_tokens": 84164048} +{"current_steps": 124880, "total_steps": 204665, "loss": 0.0, "lr": 7.915355851104701e-07, "epoch": 3.05083917621479, "percentage": 61.02, "elapsed_time": "2:42:03", "remaining_time": "1:43:32", "throughput": 8655.88, "total_tokens": 84167376} +{"current_steps": 124885, "total_steps": 204665, "loss": 0.0001, "lr": 7.914521818220243e-07, "epoch": 3.0509613270466374, "percentage": 61.02, "elapsed_time": "2:42:04", "remaining_time": "1:43:32", "throughput": 8655.9, "total_tokens": 84170640} +{"current_steps": 124890, "total_steps": 204665, "loss": 0.0679, "lr": 7.913687800501942e-07, "epoch": 3.0510834778784846, "percentage": 61.02, "elapsed_time": "2:42:04", "remaining_time": "1:43:31", "throughput": 8655.97, "total_tokens": 84174352} +{"current_steps": 124895, "total_steps": 204665, "loss": 0.0001, "lr": 7.912853797955866e-07, "epoch": 3.0512056287103313, "percentage": 61.02, "elapsed_time": "2:42:04", "remaining_time": "1:43:31", "throughput": 8655.99, "total_tokens": 84177616} +{"current_steps": 124900, "total_steps": 204665, "loss": 0.0, "lr": 7.912019810588075e-07, "epoch": 3.0513277795421785, "percentage": 61.03, "elapsed_time": "2:42:05", "remaining_time": "1:43:30", "throughput": 8656.01, "total_tokens": 84180752} +{"current_steps": 124905, "total_steps": 204665, "loss": 0.0, "lr": 7.911185838404642e-07, "epoch": 3.0514499303740257, "percentage": 61.03, "elapsed_time": "2:42:05", "remaining_time": "1:43:30", "throughput": 8656.02, "total_tokens": 84183824} +{"current_steps": 124910, "total_steps": 204665, "loss": 0.0, "lr": 7.910351881411624e-07, "epoch": 3.051572081205873, "percentage": 61.03, "elapsed_time": "2:42:05", "remaining_time": "1:43:29", "throughput": 8656.07, "total_tokens": 84187280} +{"current_steps": 124915, "total_steps": 204665, "loss": 0.0002, "lr": 7.909517939615092e-07, "epoch": 3.05169423203772, "percentage": 61.03, "elapsed_time": "2:42:06", "remaining_time": "1:43:29", "throughput": 8656.09, "total_tokens": 84190480} +{"current_steps": 124920, "total_steps": 204665, "loss": 0.0, "lr": 7.908684013021106e-07, "epoch": 3.0518163828695672, "percentage": 61.04, "elapsed_time": "2:42:06", "remaining_time": "1:43:29", "throughput": 8656.18, "total_tokens": 84194448} +{"current_steps": 124925, "total_steps": 204665, "loss": 0.0, "lr": 7.907850101635731e-07, "epoch": 3.0519385337014144, "percentage": 61.04, "elapsed_time": "2:42:06", "remaining_time": "1:43:28", "throughput": 8656.25, "total_tokens": 84198224} +{"current_steps": 124930, "total_steps": 204665, "loss": 0.0, "lr": 7.907016205465035e-07, "epoch": 3.0520606845332616, "percentage": 61.04, "elapsed_time": "2:42:07", "remaining_time": "1:43:28", "throughput": 8656.27, "total_tokens": 84201488} +{"current_steps": 124935, "total_steps": 204665, "loss": 0.0648, "lr": 7.906182324515079e-07, "epoch": 3.052182835365109, "percentage": 61.04, "elapsed_time": "2:42:07", "remaining_time": "1:43:27", "throughput": 8656.32, "total_tokens": 84205072} +{"current_steps": 124940, "total_steps": 204665, "loss": 0.0, "lr": 7.905348458791932e-07, "epoch": 3.052304986196956, "percentage": 61.05, "elapsed_time": "2:42:07", "remaining_time": "1:43:27", "throughput": 8656.41, "total_tokens": 84209040} +{"current_steps": 124945, "total_steps": 204665, "loss": 0.0001, "lr": 7.904514608301649e-07, "epoch": 3.052427137028803, "percentage": 61.05, "elapsed_time": "2:42:08", "remaining_time": "1:43:27", "throughput": 8656.44, "total_tokens": 84212368} +{"current_steps": 124950, "total_steps": 204665, "loss": 0.0, "lr": 7.903680773050304e-07, "epoch": 3.0525492878606504, "percentage": 61.05, "elapsed_time": "2:42:08", "remaining_time": "1:43:26", "throughput": 8656.46, "total_tokens": 84215568} +{"current_steps": 124955, "total_steps": 204665, "loss": 0.0, "lr": 7.902846953043951e-07, "epoch": 3.0526714386924976, "percentage": 61.05, "elapsed_time": "2:42:08", "remaining_time": "1:43:26", "throughput": 8656.51, "total_tokens": 84219024} +{"current_steps": 124960, "total_steps": 204665, "loss": 0.0245, "lr": 7.902013148288661e-07, "epoch": 3.0527935895243448, "percentage": 61.06, "elapsed_time": "2:42:09", "remaining_time": "1:43:25", "throughput": 8656.56, "total_tokens": 84222544} +{"current_steps": 124965, "total_steps": 204665, "loss": 0.0001, "lr": 7.901179358790499e-07, "epoch": 3.052915740356192, "percentage": 61.06, "elapsed_time": "2:42:09", "remaining_time": "1:43:25", "throughput": 8656.58, "total_tokens": 84225680} +{"current_steps": 124970, "total_steps": 204665, "loss": 0.0001, "lr": 7.90034558455552e-07, "epoch": 3.053037891188039, "percentage": 61.06, "elapsed_time": "2:42:10", "remaining_time": "1:43:24", "throughput": 8656.64, "total_tokens": 84229392} +{"current_steps": 124975, "total_steps": 204665, "loss": 0.0, "lr": 7.899511825589798e-07, "epoch": 3.0531600420198863, "percentage": 61.06, "elapsed_time": "2:42:10", "remaining_time": "1:43:24", "throughput": 8656.66, "total_tokens": 84232528} +{"current_steps": 124980, "total_steps": 204665, "loss": 0.0, "lr": 7.898678081899386e-07, "epoch": 3.0532821928517335, "percentage": 61.07, "elapsed_time": "2:42:10", "remaining_time": "1:43:24", "throughput": 8656.67, "total_tokens": 84235600} +{"current_steps": 124985, "total_steps": 204665, "loss": 0.0, "lr": 7.897844353490355e-07, "epoch": 3.0534043436835803, "percentage": 61.07, "elapsed_time": "2:42:11", "remaining_time": "1:43:23", "throughput": 8656.75, "total_tokens": 84239440} +{"current_steps": 124990, "total_steps": 204665, "loss": 0.0, "lr": 7.897010640368767e-07, "epoch": 3.0535264945154275, "percentage": 61.07, "elapsed_time": "2:42:11", "remaining_time": "1:43:23", "throughput": 8656.79, "total_tokens": 84242896} +{"current_steps": 124995, "total_steps": 204665, "loss": 0.004, "lr": 7.89617694254068e-07, "epoch": 3.0536486453472746, "percentage": 61.07, "elapsed_time": "2:42:11", "remaining_time": "1:43:22", "throughput": 8656.83, "total_tokens": 84246352} +{"current_steps": 125000, "total_steps": 204665, "loss": 0.0328, "lr": 7.895343260012163e-07, "epoch": 3.053770796179122, "percentage": 61.08, "elapsed_time": "2:42:12", "remaining_time": "1:43:22", "throughput": 8656.85, "total_tokens": 84249488} +{"current_steps": 125005, "total_steps": 204665, "loss": 0.0572, "lr": 7.894509592789272e-07, "epoch": 3.053892947010969, "percentage": 61.08, "elapsed_time": "2:42:12", "remaining_time": "1:43:22", "throughput": 8656.87, "total_tokens": 84252688} +{"current_steps": 125010, "total_steps": 204665, "loss": 0.0455, "lr": 7.893675940878079e-07, "epoch": 3.054015097842816, "percentage": 61.08, "elapsed_time": "2:42:12", "remaining_time": "1:43:21", "throughput": 8656.93, "total_tokens": 84256336} +{"current_steps": 125015, "total_steps": 204665, "loss": 0.0, "lr": 7.892842304284634e-07, "epoch": 3.0541372486746634, "percentage": 61.08, "elapsed_time": "2:42:13", "remaining_time": "1:43:21", "throughput": 8656.99, "total_tokens": 84259920} +{"current_steps": 125020, "total_steps": 204665, "loss": 0.0291, "lr": 7.892008683015009e-07, "epoch": 3.0542593995065106, "percentage": 61.09, "elapsed_time": "2:42:13", "remaining_time": "1:43:20", "throughput": 8657.0, "total_tokens": 84262992} +{"current_steps": 125025, "total_steps": 204665, "loss": 0.0408, "lr": 7.891175077075267e-07, "epoch": 3.054381550338358, "percentage": 61.09, "elapsed_time": "2:42:13", "remaining_time": "1:43:20", "throughput": 8657.06, "total_tokens": 84266640} +{"current_steps": 125030, "total_steps": 204665, "loss": 0.0, "lr": 7.890341486471464e-07, "epoch": 3.054503701170205, "percentage": 61.09, "elapsed_time": "2:42:14", "remaining_time": "1:43:19", "throughput": 8657.14, "total_tokens": 84270544} +{"current_steps": 125035, "total_steps": 204665, "loss": 0.0, "lr": 7.889507911209669e-07, "epoch": 3.054625852002052, "percentage": 61.09, "elapsed_time": "2:42:14", "remaining_time": "1:43:19", "throughput": 8657.18, "total_tokens": 84274000} +{"current_steps": 125040, "total_steps": 204665, "loss": 0.0, "lr": 7.888674351295937e-07, "epoch": 3.0547480028338994, "percentage": 61.09, "elapsed_time": "2:42:14", "remaining_time": "1:43:19", "throughput": 8657.19, "total_tokens": 84277008} +{"current_steps": 125045, "total_steps": 204665, "loss": 0.0001, "lr": 7.887840806736335e-07, "epoch": 3.0548701536657465, "percentage": 61.1, "elapsed_time": "2:42:15", "remaining_time": "1:43:18", "throughput": 8657.27, "total_tokens": 84280912} +{"current_steps": 125050, "total_steps": 204665, "loss": 0.0002, "lr": 7.88700727753692e-07, "epoch": 3.0549923044975937, "percentage": 61.1, "elapsed_time": "2:42:15", "remaining_time": "1:43:18", "throughput": 8657.28, "total_tokens": 84283984} +{"current_steps": 125055, "total_steps": 204665, "loss": 0.0002, "lr": 7.886173763703756e-07, "epoch": 3.055114455329441, "percentage": 61.1, "elapsed_time": "2:42:15", "remaining_time": "1:43:17", "throughput": 8657.29, "total_tokens": 84287056} +{"current_steps": 125060, "total_steps": 204665, "loss": 0.0, "lr": 7.885340265242909e-07, "epoch": 3.055236606161288, "percentage": 61.1, "elapsed_time": "2:42:16", "remaining_time": "1:43:17", "throughput": 8657.38, "total_tokens": 84291088} +{"current_steps": 125065, "total_steps": 204665, "loss": 0.0, "lr": 7.884506782160431e-07, "epoch": 3.0553587569931353, "percentage": 61.11, "elapsed_time": "2:42:16", "remaining_time": "1:43:17", "throughput": 8657.41, "total_tokens": 84294352} +{"current_steps": 125070, "total_steps": 204665, "loss": 0.0572, "lr": 7.883673314462394e-07, "epoch": 3.0554809078249825, "percentage": 61.11, "elapsed_time": "2:42:17", "remaining_time": "1:43:16", "throughput": 8657.45, "total_tokens": 84297680} +{"current_steps": 125075, "total_steps": 204665, "loss": 0.0003, "lr": 7.882839862154849e-07, "epoch": 3.0556030586568292, "percentage": 61.11, "elapsed_time": "2:42:17", "remaining_time": "1:43:16", "throughput": 8657.47, "total_tokens": 84300880} +{"current_steps": 125080, "total_steps": 204665, "loss": 0.0002, "lr": 7.882006425243866e-07, "epoch": 3.0557252094886764, "percentage": 61.11, "elapsed_time": "2:42:17", "remaining_time": "1:43:15", "throughput": 8657.5, "total_tokens": 84304272} +{"current_steps": 125085, "total_steps": 204665, "loss": 0.0001, "lr": 7.8811730037355e-07, "epoch": 3.0558473603205236, "percentage": 61.12, "elapsed_time": "2:42:18", "remaining_time": "1:43:15", "throughput": 8657.53, "total_tokens": 84307472} +{"current_steps": 125090, "total_steps": 204665, "loss": 0.0, "lr": 7.880339597635814e-07, "epoch": 3.055969511152371, "percentage": 61.12, "elapsed_time": "2:42:18", "remaining_time": "1:43:15", "throughput": 8657.55, "total_tokens": 84310736} +{"current_steps": 125095, "total_steps": 204665, "loss": 0.0, "lr": 7.879506206950872e-07, "epoch": 3.056091661984218, "percentage": 61.12, "elapsed_time": "2:42:18", "remaining_time": "1:43:14", "throughput": 8657.58, "total_tokens": 84314000} +{"current_steps": 125100, "total_steps": 204665, "loss": 0.0, "lr": 7.878672831686725e-07, "epoch": 3.056213812816065, "percentage": 61.12, "elapsed_time": "2:42:19", "remaining_time": "1:43:14", "throughput": 8657.6, "total_tokens": 84317200} +{"current_steps": 125105, "total_steps": 204665, "loss": 0.0, "lr": 7.877839471849446e-07, "epoch": 3.0563359636479124, "percentage": 61.13, "elapsed_time": "2:42:19", "remaining_time": "1:43:13", "throughput": 8657.7, "total_tokens": 84321232} +{"current_steps": 125110, "total_steps": 204665, "loss": 0.0, "lr": 7.877006127445084e-07, "epoch": 3.0564581144797596, "percentage": 61.13, "elapsed_time": "2:42:19", "remaining_time": "1:43:13", "throughput": 8657.79, "total_tokens": 84325264} +{"current_steps": 125115, "total_steps": 204665, "loss": 0.0034, "lr": 7.876172798479703e-07, "epoch": 3.0565802653116068, "percentage": 61.13, "elapsed_time": "2:42:20", "remaining_time": "1:43:12", "throughput": 8657.82, "total_tokens": 84328592} +{"current_steps": 125120, "total_steps": 204665, "loss": 0.0, "lr": 7.875339484959371e-07, "epoch": 3.056702416143454, "percentage": 61.13, "elapsed_time": "2:42:20", "remaining_time": "1:43:12", "throughput": 8657.84, "total_tokens": 84331728} +{"current_steps": 125125, "total_steps": 204665, "loss": 0.0001, "lr": 7.874506186890138e-07, "epoch": 3.056824566975301, "percentage": 61.14, "elapsed_time": "2:42:20", "remaining_time": "1:43:12", "throughput": 8657.89, "total_tokens": 84335248} +{"current_steps": 125130, "total_steps": 204665, "loss": 0.0, "lr": 7.873672904278069e-07, "epoch": 3.0569467178071483, "percentage": 61.14, "elapsed_time": "2:42:21", "remaining_time": "1:43:11", "throughput": 8657.91, "total_tokens": 84338384} +{"current_steps": 125135, "total_steps": 204665, "loss": 0.0001, "lr": 7.87283963712922e-07, "epoch": 3.0570688686389955, "percentage": 61.14, "elapsed_time": "2:42:21", "remaining_time": "1:43:11", "throughput": 8657.96, "total_tokens": 84342032} +{"current_steps": 125140, "total_steps": 204665, "loss": 0.035, "lr": 7.872006385449658e-07, "epoch": 3.0571910194708427, "percentage": 61.14, "elapsed_time": "2:42:21", "remaining_time": "1:43:10", "throughput": 8657.99, "total_tokens": 84345232} +{"current_steps": 125145, "total_steps": 204665, "loss": 0.0, "lr": 7.871173149245436e-07, "epoch": 3.05731317030269, "percentage": 61.15, "elapsed_time": "2:42:22", "remaining_time": "1:43:10", "throughput": 8658.05, "total_tokens": 84348880} +{"current_steps": 125150, "total_steps": 204665, "loss": 0.0, "lr": 7.870339928522613e-07, "epoch": 3.057435321134537, "percentage": 61.15, "elapsed_time": "2:42:22", "remaining_time": "1:43:10", "throughput": 8658.09, "total_tokens": 84352400} +{"current_steps": 125155, "total_steps": 204665, "loss": 0.0, "lr": 7.869506723287254e-07, "epoch": 3.0575574719663843, "percentage": 61.15, "elapsed_time": "2:42:22", "remaining_time": "1:43:09", "throughput": 8658.13, "total_tokens": 84355728} +{"current_steps": 125160, "total_steps": 204665, "loss": 0.0, "lr": 7.868673533545411e-07, "epoch": 3.0576796227982315, "percentage": 61.15, "elapsed_time": "2:42:23", "remaining_time": "1:43:09", "throughput": 8658.2, "total_tokens": 84359568} +{"current_steps": 125165, "total_steps": 204665, "loss": 0.0, "lr": 7.867840359303153e-07, "epoch": 3.057801773630078, "percentage": 61.16, "elapsed_time": "2:42:23", "remaining_time": "1:43:08", "throughput": 8658.22, "total_tokens": 84362768} +{"current_steps": 125170, "total_steps": 204665, "loss": 0.0465, "lr": 7.867007200566527e-07, "epoch": 3.0579239244619254, "percentage": 61.16, "elapsed_time": "2:42:24", "remaining_time": "1:43:08", "throughput": 8658.29, "total_tokens": 84366480} +{"current_steps": 125175, "total_steps": 204665, "loss": 0.0788, "lr": 7.866174057341601e-07, "epoch": 3.0580460752937726, "percentage": 61.16, "elapsed_time": "2:42:24", "remaining_time": "1:43:07", "throughput": 8658.32, "total_tokens": 84369744} +{"current_steps": 125180, "total_steps": 204665, "loss": 0.0002, "lr": 7.865340929634434e-07, "epoch": 3.05816822612562, "percentage": 61.16, "elapsed_time": "2:42:24", "remaining_time": "1:43:07", "throughput": 8658.33, "total_tokens": 84372816} +{"current_steps": 125185, "total_steps": 204665, "loss": 0.031, "lr": 7.864507817451079e-07, "epoch": 3.058290376957467, "percentage": 61.17, "elapsed_time": "2:42:25", "remaining_time": "1:43:07", "throughput": 8658.37, "total_tokens": 84376208} +{"current_steps": 125190, "total_steps": 204665, "loss": 0.0, "lr": 7.8636747207976e-07, "epoch": 3.058412527789314, "percentage": 61.17, "elapsed_time": "2:42:25", "remaining_time": "1:43:06", "throughput": 8658.43, "total_tokens": 84379856} +{"current_steps": 125195, "total_steps": 204665, "loss": 0.0, "lr": 7.862841639680052e-07, "epoch": 3.0585346786211614, "percentage": 61.17, "elapsed_time": "2:42:25", "remaining_time": "1:43:06", "throughput": 8658.46, "total_tokens": 84383248} +{"current_steps": 125200, "total_steps": 204665, "loss": 0.0, "lr": 7.862008574104493e-07, "epoch": 3.0586568294530085, "percentage": 61.17, "elapsed_time": "2:42:26", "remaining_time": "1:43:05", "throughput": 8658.49, "total_tokens": 84386448} +{"current_steps": 125205, "total_steps": 204665, "loss": 0.0, "lr": 7.861175524076984e-07, "epoch": 3.0587789802848557, "percentage": 61.18, "elapsed_time": "2:42:26", "remaining_time": "1:43:05", "throughput": 8658.52, "total_tokens": 84389840} +{"current_steps": 125210, "total_steps": 204665, "loss": 0.0, "lr": 7.860342489603578e-07, "epoch": 3.058901131116703, "percentage": 61.18, "elapsed_time": "2:42:26", "remaining_time": "1:43:05", "throughput": 8658.54, "total_tokens": 84392976} +{"current_steps": 125215, "total_steps": 204665, "loss": 0.0383, "lr": 7.859509470690343e-07, "epoch": 3.05902328194855, "percentage": 61.18, "elapsed_time": "2:42:27", "remaining_time": "1:43:04", "throughput": 8658.58, "total_tokens": 84396368} +{"current_steps": 125220, "total_steps": 204665, "loss": 0.0, "lr": 7.858676467343326e-07, "epoch": 3.0591454327803973, "percentage": 61.18, "elapsed_time": "2:42:27", "remaining_time": "1:43:04", "throughput": 8658.59, "total_tokens": 84399376} +{"current_steps": 125225, "total_steps": 204665, "loss": 0.0008, "lr": 7.857843479568595e-07, "epoch": 3.0592675836122445, "percentage": 61.19, "elapsed_time": "2:42:27", "remaining_time": "1:43:03", "throughput": 8658.61, "total_tokens": 84402576} +{"current_steps": 125230, "total_steps": 204665, "loss": 0.0, "lr": 7.857010507372197e-07, "epoch": 3.0593897344440917, "percentage": 61.19, "elapsed_time": "2:42:28", "remaining_time": "1:43:03", "throughput": 8658.65, "total_tokens": 84405904} +{"current_steps": 125235, "total_steps": 204665, "loss": 0.0201, "lr": 7.856177550760197e-07, "epoch": 3.059511885275939, "percentage": 61.19, "elapsed_time": "2:42:28", "remaining_time": "1:43:02", "throughput": 8658.67, "total_tokens": 84409104} +{"current_steps": 125240, "total_steps": 204665, "loss": 0.0, "lr": 7.85534460973865e-07, "epoch": 3.059634036107786, "percentage": 61.19, "elapsed_time": "2:42:28", "remaining_time": "1:43:02", "throughput": 8658.73, "total_tokens": 84412752} +{"current_steps": 125245, "total_steps": 204665, "loss": 0.0, "lr": 7.854511684313615e-07, "epoch": 3.0597561869396332, "percentage": 61.2, "elapsed_time": "2:42:29", "remaining_time": "1:43:02", "throughput": 8658.75, "total_tokens": 84415952} +{"current_steps": 125250, "total_steps": 204665, "loss": 0.0523, "lr": 7.85367877449115e-07, "epoch": 3.0598783377714804, "percentage": 61.2, "elapsed_time": "2:42:29", "remaining_time": "1:43:01", "throughput": 8658.78, "total_tokens": 84419216} +{"current_steps": 125255, "total_steps": 204665, "loss": 0.0, "lr": 7.852845880277306e-07, "epoch": 3.060000488603327, "percentage": 61.2, "elapsed_time": "2:42:29", "remaining_time": "1:43:01", "throughput": 8658.84, "total_tokens": 84422928} +{"current_steps": 125260, "total_steps": 204665, "loss": 0.0871, "lr": 7.852013001678149e-07, "epoch": 3.0601226394351744, "percentage": 61.2, "elapsed_time": "2:42:30", "remaining_time": "1:43:00", "throughput": 8658.89, "total_tokens": 84426384} +{"current_steps": 125265, "total_steps": 204665, "loss": 0.0001, "lr": 7.851180138699725e-07, "epoch": 3.0602447902670216, "percentage": 61.2, "elapsed_time": "2:42:30", "remaining_time": "1:43:00", "throughput": 8658.92, "total_tokens": 84429776} +{"current_steps": 125270, "total_steps": 204665, "loss": 0.0001, "lr": 7.850347291348098e-07, "epoch": 3.0603669410988688, "percentage": 61.21, "elapsed_time": "2:42:30", "remaining_time": "1:43:00", "throughput": 8658.98, "total_tokens": 84433424} +{"current_steps": 125275, "total_steps": 204665, "loss": 0.0001, "lr": 7.849514459629329e-07, "epoch": 3.060489091930716, "percentage": 61.21, "elapsed_time": "2:42:31", "remaining_time": "1:42:59", "throughput": 8658.99, "total_tokens": 84436432} +{"current_steps": 125280, "total_steps": 204665, "loss": 0.0, "lr": 7.848681643549464e-07, "epoch": 3.060611242762563, "percentage": 61.21, "elapsed_time": "2:42:31", "remaining_time": "1:42:59", "throughput": 8659.04, "total_tokens": 84440016} +{"current_steps": 125285, "total_steps": 204665, "loss": 0.0, "lr": 7.84784884311457e-07, "epoch": 3.0607333935944103, "percentage": 61.21, "elapsed_time": "2:42:31", "remaining_time": "1:42:58", "throughput": 8659.05, "total_tokens": 84443024} +{"current_steps": 125290, "total_steps": 204665, "loss": 0.0001, "lr": 7.847016058330692e-07, "epoch": 3.0608555444262575, "percentage": 61.22, "elapsed_time": "2:42:32", "remaining_time": "1:42:58", "throughput": 8659.08, "total_tokens": 84446416} +{"current_steps": 125295, "total_steps": 204665, "loss": 0.0325, "lr": 7.846183289203898e-07, "epoch": 3.0609776952581047, "percentage": 61.22, "elapsed_time": "2:42:32", "remaining_time": "1:42:57", "throughput": 8659.1, "total_tokens": 84449488} +{"current_steps": 125300, "total_steps": 204665, "loss": 0.0, "lr": 7.845350535740236e-07, "epoch": 3.061099846089952, "percentage": 61.22, "elapsed_time": "2:42:33", "remaining_time": "1:42:57", "throughput": 8659.17, "total_tokens": 84453328} +{"current_steps": 125305, "total_steps": 204665, "loss": 0.0002, "lr": 7.844517797945763e-07, "epoch": 3.061221996921799, "percentage": 61.22, "elapsed_time": "2:42:33", "remaining_time": "1:42:57", "throughput": 8659.2, "total_tokens": 84456656} +{"current_steps": 125310, "total_steps": 204665, "loss": 0.0001, "lr": 7.843685075826538e-07, "epoch": 3.0613441477536463, "percentage": 61.23, "elapsed_time": "2:42:33", "remaining_time": "1:42:56", "throughput": 8659.21, "total_tokens": 84459728} +{"current_steps": 125315, "total_steps": 204665, "loss": 0.0001, "lr": 7.842852369388612e-07, "epoch": 3.0614662985854935, "percentage": 61.23, "elapsed_time": "2:42:34", "remaining_time": "1:42:56", "throughput": 8659.23, "total_tokens": 84462800} +{"current_steps": 125320, "total_steps": 204665, "loss": 0.0, "lr": 7.84201967863805e-07, "epoch": 3.0615884494173407, "percentage": 61.23, "elapsed_time": "2:42:34", "remaining_time": "1:42:55", "throughput": 8659.24, "total_tokens": 84465872} +{"current_steps": 125325, "total_steps": 204665, "loss": 0.0, "lr": 7.841187003580895e-07, "epoch": 3.061710600249188, "percentage": 61.23, "elapsed_time": "2:42:34", "remaining_time": "1:42:55", "throughput": 8659.34, "total_tokens": 84470032} +{"current_steps": 125330, "total_steps": 204665, "loss": 0.0, "lr": 7.84035434422321e-07, "epoch": 3.061832751081035, "percentage": 61.24, "elapsed_time": "2:42:35", "remaining_time": "1:42:55", "throughput": 8659.37, "total_tokens": 84473296} +{"current_steps": 125335, "total_steps": 204665, "loss": 0.0453, "lr": 7.839521700571053e-07, "epoch": 3.061954901912882, "percentage": 61.24, "elapsed_time": "2:42:35", "remaining_time": "1:42:54", "throughput": 8659.41, "total_tokens": 84476816} +{"current_steps": 125340, "total_steps": 204665, "loss": 0.0, "lr": 7.838689072630471e-07, "epoch": 3.062077052744729, "percentage": 61.24, "elapsed_time": "2:42:35", "remaining_time": "1:42:54", "throughput": 8659.48, "total_tokens": 84480528} +{"current_steps": 125345, "total_steps": 204665, "loss": 0.0, "lr": 7.837856460407527e-07, "epoch": 3.062199203576576, "percentage": 61.24, "elapsed_time": "2:42:36", "remaining_time": "1:42:53", "throughput": 8659.53, "total_tokens": 84484112} +{"current_steps": 125350, "total_steps": 204665, "loss": 0.0, "lr": 7.837023863908271e-07, "epoch": 3.0623213544084233, "percentage": 61.25, "elapsed_time": "2:42:36", "remaining_time": "1:42:53", "throughput": 8659.59, "total_tokens": 84487696} +{"current_steps": 125355, "total_steps": 204665, "loss": 0.0, "lr": 7.836191283138759e-07, "epoch": 3.0624435052402705, "percentage": 61.25, "elapsed_time": "2:42:36", "remaining_time": "1:42:53", "throughput": 8659.64, "total_tokens": 84491280} +{"current_steps": 125360, "total_steps": 204665, "loss": 0.0, "lr": 7.835358718105046e-07, "epoch": 3.0625656560721177, "percentage": 61.25, "elapsed_time": "2:42:37", "remaining_time": "1:42:52", "throughput": 8659.66, "total_tokens": 84494416} +{"current_steps": 125365, "total_steps": 204665, "loss": 0.0001, "lr": 7.834526168813185e-07, "epoch": 3.062687806903965, "percentage": 61.25, "elapsed_time": "2:42:37", "remaining_time": "1:42:52", "throughput": 8659.66, "total_tokens": 84497360} +{"current_steps": 125370, "total_steps": 204665, "loss": 0.0, "lr": 7.833693635269236e-07, "epoch": 3.062809957735812, "percentage": 61.26, "elapsed_time": "2:42:37", "remaining_time": "1:42:51", "throughput": 8659.69, "total_tokens": 84500624} +{"current_steps": 125375, "total_steps": 204665, "loss": 0.0, "lr": 7.832861117479245e-07, "epoch": 3.0629321085676593, "percentage": 61.26, "elapsed_time": "2:42:38", "remaining_time": "1:42:51", "throughput": 8659.72, "total_tokens": 84503952} +{"current_steps": 125380, "total_steps": 204665, "loss": 0.0001, "lr": 7.832028615449275e-07, "epoch": 3.0630542593995065, "percentage": 61.26, "elapsed_time": "2:42:38", "remaining_time": "1:42:50", "throughput": 8659.74, "total_tokens": 84507152} +{"current_steps": 125385, "total_steps": 204665, "loss": 0.0, "lr": 7.831196129185371e-07, "epoch": 3.0631764102313537, "percentage": 61.26, "elapsed_time": "2:42:38", "remaining_time": "1:42:50", "throughput": 8659.77, "total_tokens": 84510416} +{"current_steps": 125390, "total_steps": 204665, "loss": 0.0, "lr": 7.830363658693596e-07, "epoch": 3.063298561063201, "percentage": 61.27, "elapsed_time": "2:42:39", "remaining_time": "1:42:50", "throughput": 8659.82, "total_tokens": 84513936} +{"current_steps": 125395, "total_steps": 204665, "loss": 0.0001, "lr": 7.82953120398e-07, "epoch": 3.063420711895048, "percentage": 61.27, "elapsed_time": "2:42:39", "remaining_time": "1:42:49", "throughput": 8659.86, "total_tokens": 84517392} +{"current_steps": 125400, "total_steps": 204665, "loss": 0.0001, "lr": 7.828698765050636e-07, "epoch": 3.0635428627268952, "percentage": 61.27, "elapsed_time": "2:42:40", "remaining_time": "1:42:49", "throughput": 8659.9, "total_tokens": 84520784} +{"current_steps": 125405, "total_steps": 204665, "loss": 0.0, "lr": 7.827866341911558e-07, "epoch": 3.0636650135587424, "percentage": 61.27, "elapsed_time": "2:42:40", "remaining_time": "1:42:48", "throughput": 8659.93, "total_tokens": 84524048} +{"current_steps": 125410, "total_steps": 204665, "loss": 0.0, "lr": 7.827033934568821e-07, "epoch": 3.0637871643905896, "percentage": 61.28, "elapsed_time": "2:42:40", "remaining_time": "1:42:48", "throughput": 8659.95, "total_tokens": 84527248} +{"current_steps": 125415, "total_steps": 204665, "loss": 0.0, "lr": 7.82620154302848e-07, "epoch": 3.063909315222437, "percentage": 61.28, "elapsed_time": "2:42:41", "remaining_time": "1:42:48", "throughput": 8659.98, "total_tokens": 84530576} +{"current_steps": 125420, "total_steps": 204665, "loss": 0.0, "lr": 7.825369167296581e-07, "epoch": 3.064031466054284, "percentage": 61.28, "elapsed_time": "2:42:41", "remaining_time": "1:42:47", "throughput": 8660.01, "total_tokens": 84533776} +{"current_steps": 125425, "total_steps": 204665, "loss": 0.0001, "lr": 7.824536807379183e-07, "epoch": 3.064153616886131, "percentage": 61.28, "elapsed_time": "2:42:41", "remaining_time": "1:42:47", "throughput": 8660.07, "total_tokens": 84537424} +{"current_steps": 125430, "total_steps": 204665, "loss": 0.0, "lr": 7.823704463282342e-07, "epoch": 3.0642757677179784, "percentage": 61.29, "elapsed_time": "2:42:42", "remaining_time": "1:42:46", "throughput": 8660.09, "total_tokens": 84540688} +{"current_steps": 125435, "total_steps": 204665, "loss": 0.0, "lr": 7.822872135012104e-07, "epoch": 3.064397918549825, "percentage": 61.29, "elapsed_time": "2:42:42", "remaining_time": "1:42:46", "throughput": 8660.14, "total_tokens": 84544144} +{"current_steps": 125440, "total_steps": 204665, "loss": 0.0003, "lr": 7.82203982257453e-07, "epoch": 3.0645200693816723, "percentage": 61.29, "elapsed_time": "2:42:42", "remaining_time": "1:42:45", "throughput": 8660.18, "total_tokens": 84547536} +{"current_steps": 125445, "total_steps": 204665, "loss": 0.0, "lr": 7.821207525975664e-07, "epoch": 3.0646422202135195, "percentage": 61.29, "elapsed_time": "2:42:43", "remaining_time": "1:42:45", "throughput": 8660.23, "total_tokens": 84551056} +{"current_steps": 125450, "total_steps": 204665, "loss": 0.0, "lr": 7.820375245221567e-07, "epoch": 3.0647643710453667, "percentage": 61.3, "elapsed_time": "2:42:43", "remaining_time": "1:42:45", "throughput": 8660.27, "total_tokens": 84554448} +{"current_steps": 125455, "total_steps": 204665, "loss": 0.0, "lr": 7.819542980318283e-07, "epoch": 3.064886521877214, "percentage": 61.3, "elapsed_time": "2:42:43", "remaining_time": "1:42:44", "throughput": 8660.31, "total_tokens": 84557840} +{"current_steps": 125460, "total_steps": 204665, "loss": 0.0, "lr": 7.81871073127187e-07, "epoch": 3.065008672709061, "percentage": 61.3, "elapsed_time": "2:42:44", "remaining_time": "1:42:44", "throughput": 8660.36, "total_tokens": 84561424} +{"current_steps": 125465, "total_steps": 204665, "loss": 0.0, "lr": 7.817878498088382e-07, "epoch": 3.0651308235409083, "percentage": 61.3, "elapsed_time": "2:42:44", "remaining_time": "1:42:43", "throughput": 8660.43, "total_tokens": 84565136} +{"current_steps": 125470, "total_steps": 204665, "loss": 0.0, "lr": 7.817046280773864e-07, "epoch": 3.0652529743727555, "percentage": 61.31, "elapsed_time": "2:42:44", "remaining_time": "1:42:43", "throughput": 8660.47, "total_tokens": 84568592} +{"current_steps": 125475, "total_steps": 204665, "loss": 0.0, "lr": 7.816214079334378e-07, "epoch": 3.0653751252046026, "percentage": 61.31, "elapsed_time": "2:42:45", "remaining_time": "1:42:43", "throughput": 8660.5, "total_tokens": 84571920} +{"current_steps": 125480, "total_steps": 204665, "loss": 0.0001, "lr": 7.815381893775965e-07, "epoch": 3.06549727603645, "percentage": 61.31, "elapsed_time": "2:42:45", "remaining_time": "1:42:42", "throughput": 8660.5, "total_tokens": 84574736} +{"current_steps": 125485, "total_steps": 204665, "loss": 0.0, "lr": 7.814549724104683e-07, "epoch": 3.065619426868297, "percentage": 61.31, "elapsed_time": "2:42:45", "remaining_time": "1:42:42", "throughput": 8660.53, "total_tokens": 84578064} +{"current_steps": 125490, "total_steps": 204665, "loss": 0.0, "lr": 7.813717570326588e-07, "epoch": 3.065741577700144, "percentage": 61.31, "elapsed_time": "2:42:46", "remaining_time": "1:42:41", "throughput": 8660.55, "total_tokens": 84581200} +{"current_steps": 125495, "total_steps": 204665, "loss": 0.0, "lr": 7.812885432447722e-07, "epoch": 3.0658637285319914, "percentage": 61.32, "elapsed_time": "2:42:46", "remaining_time": "1:42:41", "throughput": 8660.57, "total_tokens": 84584464} +{"current_steps": 125500, "total_steps": 204665, "loss": 0.0016, "lr": 7.812053310474146e-07, "epoch": 3.0659858793638386, "percentage": 61.32, "elapsed_time": "2:42:46", "remaining_time": "1:42:40", "throughput": 8660.62, "total_tokens": 84587984} +{"current_steps": 125505, "total_steps": 204665, "loss": 0.0716, "lr": 7.811221204411905e-07, "epoch": 3.066108030195686, "percentage": 61.32, "elapsed_time": "2:42:47", "remaining_time": "1:42:40", "throughput": 8660.66, "total_tokens": 84591376} +{"current_steps": 125510, "total_steps": 204665, "loss": 0.0, "lr": 7.810389114267051e-07, "epoch": 3.066230181027533, "percentage": 61.32, "elapsed_time": "2:42:47", "remaining_time": "1:42:40", "throughput": 8660.68, "total_tokens": 84594512} +{"current_steps": 125515, "total_steps": 204665, "loss": 0.0, "lr": 7.809557040045637e-07, "epoch": 3.06635233185938, "percentage": 61.33, "elapsed_time": "2:42:48", "remaining_time": "1:42:39", "throughput": 8660.7, "total_tokens": 84597712} +{"current_steps": 125520, "total_steps": 204665, "loss": 0.0, "lr": 7.808724981753712e-07, "epoch": 3.066474482691227, "percentage": 61.33, "elapsed_time": "2:42:48", "remaining_time": "1:42:39", "throughput": 8660.73, "total_tokens": 84601040} +{"current_steps": 125525, "total_steps": 204665, "loss": 0.0001, "lr": 7.807892939397331e-07, "epoch": 3.066596633523074, "percentage": 61.33, "elapsed_time": "2:42:48", "remaining_time": "1:42:38", "throughput": 8660.75, "total_tokens": 84604176} +{"current_steps": 125530, "total_steps": 204665, "loss": 0.0001, "lr": 7.807060912982538e-07, "epoch": 3.0667187843549213, "percentage": 61.33, "elapsed_time": "2:42:49", "remaining_time": "1:42:38", "throughput": 8660.79, "total_tokens": 84607632} +{"current_steps": 125535, "total_steps": 204665, "loss": 0.0, "lr": 7.806228902515393e-07, "epoch": 3.0668409351867685, "percentage": 61.34, "elapsed_time": "2:42:49", "remaining_time": "1:42:38", "throughput": 8660.86, "total_tokens": 84611344} +{"current_steps": 125540, "total_steps": 204665, "loss": 0.0001, "lr": 7.805396908001938e-07, "epoch": 3.0669630860186157, "percentage": 61.34, "elapsed_time": "2:42:49", "remaining_time": "1:42:37", "throughput": 8660.96, "total_tokens": 84615568} +{"current_steps": 125545, "total_steps": 204665, "loss": 0.054, "lr": 7.804564929448227e-07, "epoch": 3.067085236850463, "percentage": 61.34, "elapsed_time": "2:42:50", "remaining_time": "1:42:37", "throughput": 8661.0, "total_tokens": 84618896} +{"current_steps": 125550, "total_steps": 204665, "loss": 0.0, "lr": 7.803732966860311e-07, "epoch": 3.06720738768231, "percentage": 61.34, "elapsed_time": "2:42:50", "remaining_time": "1:42:36", "throughput": 8661.02, "total_tokens": 84622096} +{"current_steps": 125555, "total_steps": 204665, "loss": 0.0001, "lr": 7.80290102024424e-07, "epoch": 3.0673295385141572, "percentage": 61.35, "elapsed_time": "2:42:50", "remaining_time": "1:42:36", "throughput": 8661.07, "total_tokens": 84625680} +{"current_steps": 125560, "total_steps": 204665, "loss": 0.0, "lr": 7.802069089606064e-07, "epoch": 3.0674516893460044, "percentage": 61.35, "elapsed_time": "2:42:51", "remaining_time": "1:42:35", "throughput": 8661.1, "total_tokens": 84628944} +{"current_steps": 125565, "total_steps": 204665, "loss": 0.0, "lr": 7.801237174951833e-07, "epoch": 3.0675738401778516, "percentage": 61.35, "elapsed_time": "2:42:51", "remaining_time": "1:42:35", "throughput": 8661.15, "total_tokens": 84632528} +{"current_steps": 125570, "total_steps": 204665, "loss": 0.0, "lr": 7.800405276287599e-07, "epoch": 3.067695991009699, "percentage": 61.35, "elapsed_time": "2:42:51", "remaining_time": "1:42:35", "throughput": 8661.17, "total_tokens": 84635664} +{"current_steps": 125575, "total_steps": 204665, "loss": 0.0, "lr": 7.799573393619403e-07, "epoch": 3.067818141841546, "percentage": 61.36, "elapsed_time": "2:42:52", "remaining_time": "1:42:34", "throughput": 8661.2, "total_tokens": 84638928} +{"current_steps": 125580, "total_steps": 204665, "loss": 0.0, "lr": 7.798741526953303e-07, "epoch": 3.067940292673393, "percentage": 61.36, "elapsed_time": "2:42:52", "remaining_time": "1:42:34", "throughput": 8661.25, "total_tokens": 84642448} +{"current_steps": 125585, "total_steps": 204665, "loss": 0.0, "lr": 7.797909676295351e-07, "epoch": 3.0680624435052404, "percentage": 61.36, "elapsed_time": "2:42:52", "remaining_time": "1:42:33", "throughput": 8661.28, "total_tokens": 84645840} +{"current_steps": 125590, "total_steps": 204665, "loss": 0.0308, "lr": 7.797077841651587e-07, "epoch": 3.0681845943370876, "percentage": 61.36, "elapsed_time": "2:42:53", "remaining_time": "1:42:33", "throughput": 8661.27, "total_tokens": 84648592} +{"current_steps": 125595, "total_steps": 204665, "loss": 0.0, "lr": 7.79624602302807e-07, "epoch": 3.0683067451689348, "percentage": 61.37, "elapsed_time": "2:42:53", "remaining_time": "1:42:33", "throughput": 8661.32, "total_tokens": 84652112} +{"current_steps": 125600, "total_steps": 204665, "loss": 0.0001, "lr": 7.79541422043084e-07, "epoch": 3.068428896000782, "percentage": 61.37, "elapsed_time": "2:42:53", "remaining_time": "1:42:32", "throughput": 8661.35, "total_tokens": 84655376} +{"current_steps": 125605, "total_steps": 204665, "loss": 0.0, "lr": 7.794582433865956e-07, "epoch": 3.068551046832629, "percentage": 61.37, "elapsed_time": "2:42:54", "remaining_time": "1:42:32", "throughput": 8661.38, "total_tokens": 84658640} +{"current_steps": 125610, "total_steps": 204665, "loss": 0.0, "lr": 7.793750663339459e-07, "epoch": 3.068673197664476, "percentage": 61.37, "elapsed_time": "2:42:54", "remaining_time": "1:42:31", "throughput": 8661.37, "total_tokens": 84661456} +{"current_steps": 125615, "total_steps": 204665, "loss": 0.0, "lr": 7.792918908857399e-07, "epoch": 3.068795348496323, "percentage": 61.38, "elapsed_time": "2:42:54", "remaining_time": "1:42:31", "throughput": 8661.4, "total_tokens": 84664784} +{"current_steps": 125620, "total_steps": 204665, "loss": 0.0, "lr": 7.792087170425829e-07, "epoch": 3.0689174993281703, "percentage": 61.38, "elapsed_time": "2:42:55", "remaining_time": "1:42:30", "throughput": 8661.44, "total_tokens": 84668176} +{"current_steps": 125625, "total_steps": 204665, "loss": 0.0003, "lr": 7.791255448050793e-07, "epoch": 3.0690396501600175, "percentage": 61.38, "elapsed_time": "2:42:55", "remaining_time": "1:42:30", "throughput": 8661.45, "total_tokens": 84671248} +{"current_steps": 125630, "total_steps": 204665, "loss": 0.0, "lr": 7.790423741738344e-07, "epoch": 3.0691618009918646, "percentage": 61.38, "elapsed_time": "2:42:55", "remaining_time": "1:42:30", "throughput": 8661.48, "total_tokens": 84674576} +{"current_steps": 125635, "total_steps": 204665, "loss": 0.0, "lr": 7.789592051494524e-07, "epoch": 3.069283951823712, "percentage": 61.39, "elapsed_time": "2:42:56", "remaining_time": "1:42:29", "throughput": 8661.52, "total_tokens": 84677904} +{"current_steps": 125640, "total_steps": 204665, "loss": 0.0, "lr": 7.788760377325385e-07, "epoch": 3.069406102655559, "percentage": 61.39, "elapsed_time": "2:42:56", "remaining_time": "1:42:29", "throughput": 8661.56, "total_tokens": 84681360} +{"current_steps": 125645, "total_steps": 204665, "loss": 0.0, "lr": 7.78792871923698e-07, "epoch": 3.069528253487406, "percentage": 61.39, "elapsed_time": "2:42:57", "remaining_time": "1:42:28", "throughput": 8661.6, "total_tokens": 84684752} +{"current_steps": 125650, "total_steps": 204665, "loss": 0.0, "lr": 7.787097077235348e-07, "epoch": 3.0696504043192534, "percentage": 61.39, "elapsed_time": "2:42:57", "remaining_time": "1:42:28", "throughput": 8661.66, "total_tokens": 84688464} +{"current_steps": 125655, "total_steps": 204665, "loss": 0.0, "lr": 7.786265451326546e-07, "epoch": 3.0697725551511006, "percentage": 61.4, "elapsed_time": "2:42:57", "remaining_time": "1:42:28", "throughput": 8661.74, "total_tokens": 84692368} +{"current_steps": 125660, "total_steps": 204665, "loss": 0.0, "lr": 7.785433841516614e-07, "epoch": 3.069894705982948, "percentage": 61.4, "elapsed_time": "2:42:58", "remaining_time": "1:42:27", "throughput": 8661.79, "total_tokens": 84695824} +{"current_steps": 125665, "total_steps": 204665, "loss": 0.0, "lr": 7.784602247811603e-07, "epoch": 3.070016856814795, "percentage": 61.4, "elapsed_time": "2:42:58", "remaining_time": "1:42:27", "throughput": 8661.84, "total_tokens": 84699472} +{"current_steps": 125670, "total_steps": 204665, "loss": 0.041, "lr": 7.783770670217561e-07, "epoch": 3.070139007646642, "percentage": 61.4, "elapsed_time": "2:42:58", "remaining_time": "1:42:26", "throughput": 8661.91, "total_tokens": 84703184} +{"current_steps": 125675, "total_steps": 204665, "loss": 0.0, "lr": 7.782939108740532e-07, "epoch": 3.0702611584784893, "percentage": 61.41, "elapsed_time": "2:42:59", "remaining_time": "1:42:26", "throughput": 8661.96, "total_tokens": 84706768} +{"current_steps": 125680, "total_steps": 204665, "loss": 0.0, "lr": 7.782107563386571e-07, "epoch": 3.0703833093103365, "percentage": 61.41, "elapsed_time": "2:42:59", "remaining_time": "1:42:26", "throughput": 8661.97, "total_tokens": 84709776} +{"current_steps": 125685, "total_steps": 204665, "loss": 0.0284, "lr": 7.781276034161714e-07, "epoch": 3.0705054601421837, "percentage": 61.41, "elapsed_time": "2:42:59", "remaining_time": "1:42:25", "throughput": 8662.04, "total_tokens": 84713552} +{"current_steps": 125690, "total_steps": 204665, "loss": 0.0, "lr": 7.780444521072021e-07, "epoch": 3.070627610974031, "percentage": 61.41, "elapsed_time": "2:43:00", "remaining_time": "1:42:25", "throughput": 8662.06, "total_tokens": 84716816} +{"current_steps": 125695, "total_steps": 204665, "loss": 0.0, "lr": 7.779613024123528e-07, "epoch": 3.070749761805878, "percentage": 61.41, "elapsed_time": "2:43:00", "remaining_time": "1:42:24", "throughput": 8662.13, "total_tokens": 84720592} +{"current_steps": 125700, "total_steps": 204665, "loss": 0.0, "lr": 7.778781543322289e-07, "epoch": 3.070871912637725, "percentage": 61.42, "elapsed_time": "2:43:00", "remaining_time": "1:42:24", "throughput": 8662.17, "total_tokens": 84723984} +{"current_steps": 125705, "total_steps": 204665, "loss": 0.0, "lr": 7.777950078674345e-07, "epoch": 3.070994063469572, "percentage": 61.42, "elapsed_time": "2:43:01", "remaining_time": "1:42:23", "throughput": 8662.21, "total_tokens": 84727312} +{"current_steps": 125710, "total_steps": 204665, "loss": 0.0, "lr": 7.777118630185748e-07, "epoch": 3.0711162143014192, "percentage": 61.42, "elapsed_time": "2:43:01", "remaining_time": "1:42:23", "throughput": 8662.25, "total_tokens": 84730768} +{"current_steps": 125715, "total_steps": 204665, "loss": 0.0, "lr": 7.776287197862541e-07, "epoch": 3.0712383651332664, "percentage": 61.42, "elapsed_time": "2:43:01", "remaining_time": "1:42:23", "throughput": 8662.28, "total_tokens": 84734032} +{"current_steps": 125720, "total_steps": 204665, "loss": 0.0, "lr": 7.775455781710774e-07, "epoch": 3.0713605159651136, "percentage": 61.43, "elapsed_time": "2:43:02", "remaining_time": "1:42:22", "throughput": 8662.31, "total_tokens": 84737296} +{"current_steps": 125725, "total_steps": 204665, "loss": 0.0001, "lr": 7.774624381736489e-07, "epoch": 3.071482666796961, "percentage": 61.43, "elapsed_time": "2:43:02", "remaining_time": "1:42:22", "throughput": 8662.35, "total_tokens": 84740752} +{"current_steps": 125730, "total_steps": 204665, "loss": 0.0003, "lr": 7.773792997945734e-07, "epoch": 3.071604817628808, "percentage": 61.43, "elapsed_time": "2:43:03", "remaining_time": "1:42:21", "throughput": 8662.42, "total_tokens": 84744528} +{"current_steps": 125735, "total_steps": 204665, "loss": 0.0, "lr": 7.772961630344552e-07, "epoch": 3.071726968460655, "percentage": 61.43, "elapsed_time": "2:43:03", "remaining_time": "1:42:21", "throughput": 8662.46, "total_tokens": 84747984} +{"current_steps": 125740, "total_steps": 204665, "loss": 0.0001, "lr": 7.772130278938999e-07, "epoch": 3.0718491192925024, "percentage": 61.44, "elapsed_time": "2:43:03", "remaining_time": "1:42:21", "throughput": 8662.48, "total_tokens": 84751184} +{"current_steps": 125745, "total_steps": 204665, "loss": 0.0, "lr": 7.771298943735108e-07, "epoch": 3.0719712701243496, "percentage": 61.44, "elapsed_time": "2:43:04", "remaining_time": "1:42:20", "throughput": 8662.56, "total_tokens": 84755088} +{"current_steps": 125750, "total_steps": 204665, "loss": 0.0, "lr": 7.770467624738935e-07, "epoch": 3.0720934209561968, "percentage": 61.44, "elapsed_time": "2:43:04", "remaining_time": "1:42:20", "throughput": 8662.6, "total_tokens": 84758544} +{"current_steps": 125755, "total_steps": 204665, "loss": 0.0, "lr": 7.769636321956516e-07, "epoch": 3.072215571788044, "percentage": 61.44, "elapsed_time": "2:43:04", "remaining_time": "1:42:19", "throughput": 8662.66, "total_tokens": 84762192} +{"current_steps": 125760, "total_steps": 204665, "loss": 0.0, "lr": 7.768805035393907e-07, "epoch": 3.072337722619891, "percentage": 61.45, "elapsed_time": "2:43:05", "remaining_time": "1:42:19", "throughput": 8662.68, "total_tokens": 84765328} +{"current_steps": 125765, "total_steps": 204665, "loss": 0.1304, "lr": 7.767973765057145e-07, "epoch": 3.0724598734517383, "percentage": 61.45, "elapsed_time": "2:43:05", "remaining_time": "1:42:19", "throughput": 8662.73, "total_tokens": 84768912} +{"current_steps": 125770, "total_steps": 204665, "loss": 0.0, "lr": 7.767142510952277e-07, "epoch": 3.0725820242835855, "percentage": 61.45, "elapsed_time": "2:43:05", "remaining_time": "1:42:18", "throughput": 8662.76, "total_tokens": 84772112} +{"current_steps": 125775, "total_steps": 204665, "loss": 0.0072, "lr": 7.766311273085353e-07, "epoch": 3.0727041751154327, "percentage": 61.45, "elapsed_time": "2:43:06", "remaining_time": "1:42:18", "throughput": 8662.75, "total_tokens": 84774992} +{"current_steps": 125780, "total_steps": 204665, "loss": 0.0, "lr": 7.765480051462409e-07, "epoch": 3.07282632594728, "percentage": 61.46, "elapsed_time": "2:43:06", "remaining_time": "1:42:17", "throughput": 8662.76, "total_tokens": 84777936} +{"current_steps": 125785, "total_steps": 204665, "loss": 0.0318, "lr": 7.764648846089501e-07, "epoch": 3.072948476779127, "percentage": 61.46, "elapsed_time": "2:43:06", "remaining_time": "1:42:17", "throughput": 8662.81, "total_tokens": 84781456} +{"current_steps": 125790, "total_steps": 204665, "loss": 0.0418, "lr": 7.763817656972662e-07, "epoch": 3.073070627610974, "percentage": 61.46, "elapsed_time": "2:43:07", "remaining_time": "1:42:16", "throughput": 8662.83, "total_tokens": 84784656} +{"current_steps": 125795, "total_steps": 204665, "loss": 0.0, "lr": 7.762986484117943e-07, "epoch": 3.073192778442821, "percentage": 61.46, "elapsed_time": "2:43:07", "remaining_time": "1:42:16", "throughput": 8662.83, "total_tokens": 84787600} +{"current_steps": 125800, "total_steps": 204665, "loss": 0.0307, "lr": 7.762155327531392e-07, "epoch": 3.073314929274668, "percentage": 61.47, "elapsed_time": "2:43:07", "remaining_time": "1:42:16", "throughput": 8662.85, "total_tokens": 84790736} +{"current_steps": 125805, "total_steps": 204665, "loss": 0.0, "lr": 7.761324187219046e-07, "epoch": 3.0734370801065154, "percentage": 61.47, "elapsed_time": "2:43:08", "remaining_time": "1:42:15", "throughput": 8662.89, "total_tokens": 84794192} +{"current_steps": 125810, "total_steps": 204665, "loss": 0.0, "lr": 7.760493063186957e-07, "epoch": 3.0735592309383626, "percentage": 61.47, "elapsed_time": "2:43:08", "remaining_time": "1:42:15", "throughput": 8662.92, "total_tokens": 84797456} +{"current_steps": 125815, "total_steps": 204665, "loss": 0.0001, "lr": 7.759661955441161e-07, "epoch": 3.0736813817702098, "percentage": 61.47, "elapsed_time": "2:43:08", "remaining_time": "1:42:14", "throughput": 8662.98, "total_tokens": 84801104} +{"current_steps": 125820, "total_steps": 204665, "loss": 0.0, "lr": 7.758830863987707e-07, "epoch": 3.073803532602057, "percentage": 61.48, "elapsed_time": "2:43:09", "remaining_time": "1:42:14", "throughput": 8663.02, "total_tokens": 84804496} +{"current_steps": 125825, "total_steps": 204665, "loss": 0.0001, "lr": 7.757999788832639e-07, "epoch": 3.073925683433904, "percentage": 61.48, "elapsed_time": "2:43:09", "remaining_time": "1:42:14", "throughput": 8663.06, "total_tokens": 84807952} +{"current_steps": 125830, "total_steps": 204665, "loss": 0.0, "lr": 7.757168729981996e-07, "epoch": 3.0740478342657513, "percentage": 61.48, "elapsed_time": "2:43:09", "remaining_time": "1:42:13", "throughput": 8663.08, "total_tokens": 84811088} +{"current_steps": 125835, "total_steps": 204665, "loss": 0.0489, "lr": 7.75633768744183e-07, "epoch": 3.0741699850975985, "percentage": 61.48, "elapsed_time": "2:43:10", "remaining_time": "1:42:13", "throughput": 8663.12, "total_tokens": 84814608} +{"current_steps": 125840, "total_steps": 204665, "loss": 0.0, "lr": 7.755506661218174e-07, "epoch": 3.0742921359294457, "percentage": 61.49, "elapsed_time": "2:43:10", "remaining_time": "1:42:12", "throughput": 8663.14, "total_tokens": 84817680} +{"current_steps": 125845, "total_steps": 204665, "loss": 0.0005, "lr": 7.754675651317083e-07, "epoch": 3.074414286761293, "percentage": 61.49, "elapsed_time": "2:43:10", "remaining_time": "1:42:12", "throughput": 8663.2, "total_tokens": 84821328} +{"current_steps": 125850, "total_steps": 204665, "loss": 0.0001, "lr": 7.753844657744591e-07, "epoch": 3.07453643759314, "percentage": 61.49, "elapsed_time": "2:43:11", "remaining_time": "1:42:11", "throughput": 8663.25, "total_tokens": 84824848} +{"current_steps": 125855, "total_steps": 204665, "loss": 0.0, "lr": 7.753013680506747e-07, "epoch": 3.0746585884249873, "percentage": 61.49, "elapsed_time": "2:43:11", "remaining_time": "1:42:11", "throughput": 8663.25, "total_tokens": 84827792} +{"current_steps": 125860, "total_steps": 204665, "loss": 0.0, "lr": 7.752182719609589e-07, "epoch": 3.0747807392568345, "percentage": 61.5, "elapsed_time": "2:43:12", "remaining_time": "1:42:11", "throughput": 8663.29, "total_tokens": 84831248} +{"current_steps": 125865, "total_steps": 204665, "loss": 0.0, "lr": 7.751351775059165e-07, "epoch": 3.0749028900886817, "percentage": 61.5, "elapsed_time": "2:43:12", "remaining_time": "1:42:10", "throughput": 8663.32, "total_tokens": 84834448} +{"current_steps": 125870, "total_steps": 204665, "loss": 0.0, "lr": 7.750520846861515e-07, "epoch": 3.075025040920529, "percentage": 61.5, "elapsed_time": "2:43:12", "remaining_time": "1:42:10", "throughput": 8663.37, "total_tokens": 84837968} +{"current_steps": 125875, "total_steps": 204665, "loss": 0.0, "lr": 7.749689935022683e-07, "epoch": 3.075147191752376, "percentage": 61.5, "elapsed_time": "2:43:13", "remaining_time": "1:42:09", "throughput": 8663.38, "total_tokens": 84840976} +{"current_steps": 125880, "total_steps": 204665, "loss": 0.0, "lr": 7.748859039548713e-07, "epoch": 3.075269342584223, "percentage": 61.51, "elapsed_time": "2:43:13", "remaining_time": "1:42:09", "throughput": 8663.4, "total_tokens": 84844240} +{"current_steps": 125885, "total_steps": 204665, "loss": 0.05, "lr": 7.748028160445641e-07, "epoch": 3.07539149341607, "percentage": 61.51, "elapsed_time": "2:43:13", "remaining_time": "1:42:09", "throughput": 8663.53, "total_tokens": 84848656} +{"current_steps": 125890, "total_steps": 204665, "loss": 0.0, "lr": 7.747197297719515e-07, "epoch": 3.075513644247917, "percentage": 61.51, "elapsed_time": "2:43:14", "remaining_time": "1:42:08", "throughput": 8663.55, "total_tokens": 84851856} +{"current_steps": 125895, "total_steps": 204665, "loss": 0.0002, "lr": 7.746366451376379e-07, "epoch": 3.0756357950797644, "percentage": 61.51, "elapsed_time": "2:43:14", "remaining_time": "1:42:08", "throughput": 8663.57, "total_tokens": 84855056} +{"current_steps": 125900, "total_steps": 204665, "loss": 0.0001, "lr": 7.745535621422269e-07, "epoch": 3.0757579459116116, "percentage": 61.52, "elapsed_time": "2:43:14", "remaining_time": "1:42:07", "throughput": 8663.63, "total_tokens": 84858704} +{"current_steps": 125905, "total_steps": 204665, "loss": 0.0761, "lr": 7.744704807863234e-07, "epoch": 3.0758800967434587, "percentage": 61.52, "elapsed_time": "2:43:15", "remaining_time": "1:42:07", "throughput": 8663.7, "total_tokens": 84862480} +{"current_steps": 125910, "total_steps": 204665, "loss": 0.0001, "lr": 7.743874010705308e-07, "epoch": 3.076002247575306, "percentage": 61.52, "elapsed_time": "2:43:15", "remaining_time": "1:42:06", "throughput": 8663.77, "total_tokens": 84866256} +{"current_steps": 125915, "total_steps": 204665, "loss": 0.0002, "lr": 7.743043229954542e-07, "epoch": 3.076124398407153, "percentage": 61.52, "elapsed_time": "2:43:15", "remaining_time": "1:42:06", "throughput": 8663.82, "total_tokens": 84869776} +{"current_steps": 125920, "total_steps": 204665, "loss": 0.0444, "lr": 7.74221246561697e-07, "epoch": 3.0762465492390003, "percentage": 61.52, "elapsed_time": "2:43:16", "remaining_time": "1:42:06", "throughput": 8663.88, "total_tokens": 84873488} +{"current_steps": 125925, "total_steps": 204665, "loss": 0.0, "lr": 7.741381717698634e-07, "epoch": 3.0763687000708475, "percentage": 61.53, "elapsed_time": "2:43:16", "remaining_time": "1:42:05", "throughput": 8663.92, "total_tokens": 84876880} +{"current_steps": 125930, "total_steps": 204665, "loss": 0.0, "lr": 7.740550986205582e-07, "epoch": 3.0764908509026947, "percentage": 61.53, "elapsed_time": "2:43:16", "remaining_time": "1:42:05", "throughput": 8663.95, "total_tokens": 84880208} +{"current_steps": 125935, "total_steps": 204665, "loss": 0.0001, "lr": 7.739720271143847e-07, "epoch": 3.076613001734542, "percentage": 61.53, "elapsed_time": "2:43:17", "remaining_time": "1:42:04", "throughput": 8663.94, "total_tokens": 84883088} +{"current_steps": 125940, "total_steps": 204665, "loss": 0.0, "lr": 7.738889572519478e-07, "epoch": 3.076735152566389, "percentage": 61.53, "elapsed_time": "2:43:17", "remaining_time": "1:42:04", "throughput": 8664.01, "total_tokens": 84886800} +{"current_steps": 125945, "total_steps": 204665, "loss": 0.0013, "lr": 7.738058890338508e-07, "epoch": 3.0768573033982363, "percentage": 61.54, "elapsed_time": "2:43:17", "remaining_time": "1:42:04", "throughput": 8664.05, "total_tokens": 84890256} +{"current_steps": 125950, "total_steps": 204665, "loss": 0.0, "lr": 7.737228224606982e-07, "epoch": 3.0769794542300835, "percentage": 61.54, "elapsed_time": "2:43:18", "remaining_time": "1:42:03", "throughput": 8664.07, "total_tokens": 84893520} +{"current_steps": 125955, "total_steps": 204665, "loss": 0.0001, "lr": 7.736397575330946e-07, "epoch": 3.0771016050619306, "percentage": 61.54, "elapsed_time": "2:43:18", "remaining_time": "1:42:03", "throughput": 8664.14, "total_tokens": 84897232} +{"current_steps": 125960, "total_steps": 204665, "loss": 0.0, "lr": 7.735566942516431e-07, "epoch": 3.077223755893778, "percentage": 61.54, "elapsed_time": "2:43:19", "remaining_time": "1:42:02", "throughput": 8664.18, "total_tokens": 84900688} +{"current_steps": 125965, "total_steps": 204665, "loss": 0.0, "lr": 7.734736326169487e-07, "epoch": 3.0773459067256246, "percentage": 61.55, "elapsed_time": "2:43:19", "remaining_time": "1:42:02", "throughput": 8664.23, "total_tokens": 84904208} +{"current_steps": 125970, "total_steps": 204665, "loss": 0.0, "lr": 7.733905726296146e-07, "epoch": 3.0774680575574718, "percentage": 61.55, "elapsed_time": "2:43:19", "remaining_time": "1:42:02", "throughput": 8664.24, "total_tokens": 84907280} +{"current_steps": 125975, "total_steps": 204665, "loss": 0.0, "lr": 7.733075142902453e-07, "epoch": 3.077590208389319, "percentage": 61.55, "elapsed_time": "2:43:20", "remaining_time": "1:42:01", "throughput": 8664.26, "total_tokens": 84910416} +{"current_steps": 125980, "total_steps": 204665, "loss": 0.0, "lr": 7.732244575994447e-07, "epoch": 3.077712359221166, "percentage": 61.55, "elapsed_time": "2:43:20", "remaining_time": "1:42:01", "throughput": 8664.3, "total_tokens": 84913808} +{"current_steps": 125985, "total_steps": 204665, "loss": 0.0553, "lr": 7.731414025578168e-07, "epoch": 3.0778345100530133, "percentage": 61.56, "elapsed_time": "2:43:20", "remaining_time": "1:42:00", "throughput": 8664.33, "total_tokens": 84917136} +{"current_steps": 125990, "total_steps": 204665, "loss": 0.0, "lr": 7.730583491659659e-07, "epoch": 3.0779566608848605, "percentage": 61.56, "elapsed_time": "2:43:21", "remaining_time": "1:42:00", "throughput": 8664.39, "total_tokens": 84920784} +{"current_steps": 125995, "total_steps": 204665, "loss": 0.0, "lr": 7.729752974244953e-07, "epoch": 3.0780788117167077, "percentage": 61.56, "elapsed_time": "2:43:21", "remaining_time": "1:41:59", "throughput": 8664.44, "total_tokens": 84924368} +{"current_steps": 126000, "total_steps": 204665, "loss": 0.0663, "lr": 7.728922473340098e-07, "epoch": 3.078200962548555, "percentage": 61.56, "elapsed_time": "2:43:21", "remaining_time": "1:41:59", "throughput": 8664.51, "total_tokens": 84928080} +{"current_steps": 126005, "total_steps": 204665, "loss": 0.0002, "lr": 7.728091988951126e-07, "epoch": 3.078323113380402, "percentage": 61.57, "elapsed_time": "2:43:22", "remaining_time": "1:41:59", "throughput": 8664.53, "total_tokens": 84931280} +{"current_steps": 126010, "total_steps": 204665, "loss": 0.0, "lr": 7.727261521084085e-07, "epoch": 3.0784452642122493, "percentage": 61.57, "elapsed_time": "2:43:22", "remaining_time": "1:41:58", "throughput": 8664.56, "total_tokens": 84934672} +{"current_steps": 126015, "total_steps": 204665, "loss": 0.0, "lr": 7.726431069745004e-07, "epoch": 3.0785674150440965, "percentage": 61.57, "elapsed_time": "2:43:22", "remaining_time": "1:41:58", "throughput": 8664.62, "total_tokens": 84938256} +{"current_steps": 126020, "total_steps": 204665, "loss": 0.0, "lr": 7.725600634939931e-07, "epoch": 3.0786895658759437, "percentage": 61.57, "elapsed_time": "2:43:23", "remaining_time": "1:41:57", "throughput": 8664.63, "total_tokens": 84941328} +{"current_steps": 126025, "total_steps": 204665, "loss": 0.0, "lr": 7.724770216674901e-07, "epoch": 3.078811716707791, "percentage": 61.58, "elapsed_time": "2:43:23", "remaining_time": "1:41:57", "throughput": 8664.67, "total_tokens": 84944720} +{"current_steps": 126030, "total_steps": 204665, "loss": 0.0, "lr": 7.723939814955955e-07, "epoch": 3.078933867539638, "percentage": 61.58, "elapsed_time": "2:43:23", "remaining_time": "1:41:57", "throughput": 8664.68, "total_tokens": 84947792} +{"current_steps": 126035, "total_steps": 204665, "loss": 0.0, "lr": 7.723109429789132e-07, "epoch": 3.0790560183714852, "percentage": 61.58, "elapsed_time": "2:43:24", "remaining_time": "1:41:56", "throughput": 8664.72, "total_tokens": 84951248} +{"current_steps": 126040, "total_steps": 204665, "loss": 0.0, "lr": 7.722279061180465e-07, "epoch": 3.0791781692033324, "percentage": 61.58, "elapsed_time": "2:43:24", "remaining_time": "1:41:56", "throughput": 8664.4, "total_tokens": 84954128} +{"current_steps": 126045, "total_steps": 204665, "loss": 0.0, "lr": 7.721448709135998e-07, "epoch": 3.0793003200351796, "percentage": 61.59, "elapsed_time": "2:43:25", "remaining_time": "1:41:56", "throughput": 8664.45, "total_tokens": 84957648} +{"current_steps": 126050, "total_steps": 204665, "loss": 0.0001, "lr": 7.720618373661774e-07, "epoch": 3.079422470867027, "percentage": 61.59, "elapsed_time": "2:43:25", "remaining_time": "1:41:55", "throughput": 8664.47, "total_tokens": 84960912} +{"current_steps": 126055, "total_steps": 204665, "loss": 0.0, "lr": 7.719788054763821e-07, "epoch": 3.079544621698874, "percentage": 61.59, "elapsed_time": "2:43:26", "remaining_time": "1:41:55", "throughput": 8664.49, "total_tokens": 84964048} +{"current_steps": 126060, "total_steps": 204665, "loss": 0.0, "lr": 7.718957752448187e-07, "epoch": 3.0796667725307207, "percentage": 61.59, "elapsed_time": "2:43:26", "remaining_time": "1:41:54", "throughput": 8664.55, "total_tokens": 84967696} +{"current_steps": 126065, "total_steps": 204665, "loss": 0.0857, "lr": 7.718127466720901e-07, "epoch": 3.079788923362568, "percentage": 61.6, "elapsed_time": "2:43:26", "remaining_time": "1:41:54", "throughput": 8664.59, "total_tokens": 84971088} +{"current_steps": 126070, "total_steps": 204665, "loss": 0.0003, "lr": 7.71729719758801e-07, "epoch": 3.079911074194415, "percentage": 61.6, "elapsed_time": "2:43:27", "remaining_time": "1:41:53", "throughput": 8664.63, "total_tokens": 84974608} +{"current_steps": 126075, "total_steps": 204665, "loss": 0.0838, "lr": 7.716466945055546e-07, "epoch": 3.0800332250262623, "percentage": 61.6, "elapsed_time": "2:43:27", "remaining_time": "1:41:53", "throughput": 8664.66, "total_tokens": 84977936} +{"current_steps": 126080, "total_steps": 204665, "loss": 0.0003, "lr": 7.715636709129547e-07, "epoch": 3.0801553758581095, "percentage": 61.6, "elapsed_time": "2:43:27", "remaining_time": "1:41:53", "throughput": 8664.67, "total_tokens": 84980880} +{"current_steps": 126085, "total_steps": 204665, "loss": 0.0002, "lr": 7.714806489816056e-07, "epoch": 3.0802775266899567, "percentage": 61.61, "elapsed_time": "2:43:28", "remaining_time": "1:41:52", "throughput": 8664.66, "total_tokens": 84983696} +{"current_steps": 126090, "total_steps": 204665, "loss": 0.0, "lr": 7.713976287121102e-07, "epoch": 3.080399677521804, "percentage": 61.61, "elapsed_time": "2:43:28", "remaining_time": "1:41:52", "throughput": 8664.72, "total_tokens": 84987344} +{"current_steps": 126095, "total_steps": 204665, "loss": 0.0, "lr": 7.713146101050733e-07, "epoch": 3.080521828353651, "percentage": 61.61, "elapsed_time": "2:43:28", "remaining_time": "1:41:51", "throughput": 8664.75, "total_tokens": 84990736} +{"current_steps": 126100, "total_steps": 204665, "loss": 0.0001, "lr": 7.712315931610976e-07, "epoch": 3.0806439791854983, "percentage": 61.61, "elapsed_time": "2:43:29", "remaining_time": "1:41:51", "throughput": 8664.79, "total_tokens": 84994064} +{"current_steps": 126105, "total_steps": 204665, "loss": 0.0533, "lr": 7.711485778807872e-07, "epoch": 3.0807661300173454, "percentage": 61.62, "elapsed_time": "2:43:29", "remaining_time": "1:41:51", "throughput": 8664.81, "total_tokens": 84997264} +{"current_steps": 126110, "total_steps": 204665, "loss": 0.0, "lr": 7.710655642647463e-07, "epoch": 3.0808882808491926, "percentage": 61.62, "elapsed_time": "2:43:29", "remaining_time": "1:41:50", "throughput": 8664.84, "total_tokens": 85000656} +{"current_steps": 126115, "total_steps": 204665, "loss": 0.0003, "lr": 7.709825523135778e-07, "epoch": 3.08101043168104, "percentage": 61.62, "elapsed_time": "2:43:30", "remaining_time": "1:41:50", "throughput": 8664.88, "total_tokens": 85004048} +{"current_steps": 126120, "total_steps": 204665, "loss": 0.0, "lr": 7.708995420278864e-07, "epoch": 3.081132582512887, "percentage": 61.62, "elapsed_time": "2:43:30", "remaining_time": "1:41:49", "throughput": 8664.93, "total_tokens": 85007568} +{"current_steps": 126125, "total_steps": 204665, "loss": 0.0, "lr": 7.708165334082746e-07, "epoch": 3.081254733344734, "percentage": 61.63, "elapsed_time": "2:43:30", "remaining_time": "1:41:49", "throughput": 8665.0, "total_tokens": 85011344} +{"current_steps": 126130, "total_steps": 204665, "loss": 0.0, "lr": 7.707335264553469e-07, "epoch": 3.0813768841765814, "percentage": 61.63, "elapsed_time": "2:43:31", "remaining_time": "1:41:48", "throughput": 8665.02, "total_tokens": 85014544} +{"current_steps": 126135, "total_steps": 204665, "loss": 0.0, "lr": 7.706505211697065e-07, "epoch": 3.0814990350084286, "percentage": 61.63, "elapsed_time": "2:43:31", "remaining_time": "1:41:48", "throughput": 8665.07, "total_tokens": 85018064} +{"current_steps": 126140, "total_steps": 204665, "loss": 0.0, "lr": 7.70567517551957e-07, "epoch": 3.081621185840276, "percentage": 61.63, "elapsed_time": "2:43:31", "remaining_time": "1:41:48", "throughput": 8665.1, "total_tokens": 85021392} +{"current_steps": 126145, "total_steps": 204665, "loss": 0.0002, "lr": 7.704845156027027e-07, "epoch": 3.0817433366721225, "percentage": 61.63, "elapsed_time": "2:43:32", "remaining_time": "1:41:47", "throughput": 8665.12, "total_tokens": 85024528} +{"current_steps": 126150, "total_steps": 204665, "loss": 0.0418, "lr": 7.704015153225463e-07, "epoch": 3.0818654875039697, "percentage": 61.64, "elapsed_time": "2:43:32", "remaining_time": "1:41:47", "throughput": 8665.14, "total_tokens": 85027728} +{"current_steps": 126155, "total_steps": 204665, "loss": 0.0, "lr": 7.703185167120922e-07, "epoch": 3.081987638335817, "percentage": 61.64, "elapsed_time": "2:43:32", "remaining_time": "1:41:46", "throughput": 8665.16, "total_tokens": 85030864} +{"current_steps": 126160, "total_steps": 204665, "loss": 0.0, "lr": 7.702355197719432e-07, "epoch": 3.082109789167664, "percentage": 61.64, "elapsed_time": "2:43:33", "remaining_time": "1:41:46", "throughput": 8665.22, "total_tokens": 85034512} +{"current_steps": 126165, "total_steps": 204665, "loss": 0.067, "lr": 7.701525245027039e-07, "epoch": 3.0822319399995113, "percentage": 61.64, "elapsed_time": "2:43:33", "remaining_time": "1:41:46", "throughput": 8665.28, "total_tokens": 85038160} +{"current_steps": 126170, "total_steps": 204665, "loss": 0.0, "lr": 7.700695309049766e-07, "epoch": 3.0823540908313585, "percentage": 61.65, "elapsed_time": "2:43:34", "remaining_time": "1:41:45", "throughput": 8665.32, "total_tokens": 85041616} +{"current_steps": 126175, "total_steps": 204665, "loss": 0.0, "lr": 7.699865389793659e-07, "epoch": 3.0824762416632057, "percentage": 61.65, "elapsed_time": "2:43:34", "remaining_time": "1:41:45", "throughput": 8665.35, "total_tokens": 85044880} +{"current_steps": 126180, "total_steps": 204665, "loss": 0.0, "lr": 7.699035487264749e-07, "epoch": 3.082598392495053, "percentage": 61.65, "elapsed_time": "2:43:34", "remaining_time": "1:41:44", "throughput": 8665.35, "total_tokens": 85047760} +{"current_steps": 126185, "total_steps": 204665, "loss": 0.0, "lr": 7.698205601469072e-07, "epoch": 3.0827205433269, "percentage": 61.65, "elapsed_time": "2:43:35", "remaining_time": "1:41:44", "throughput": 8665.39, "total_tokens": 85051216} +{"current_steps": 126190, "total_steps": 204665, "loss": 0.0, "lr": 7.697375732412665e-07, "epoch": 3.0828426941587472, "percentage": 61.66, "elapsed_time": "2:43:35", "remaining_time": "1:41:43", "throughput": 8665.44, "total_tokens": 85054736} +{"current_steps": 126195, "total_steps": 204665, "loss": 0.0, "lr": 7.696545880101556e-07, "epoch": 3.0829648449905944, "percentage": 61.66, "elapsed_time": "2:43:35", "remaining_time": "1:41:43", "throughput": 8665.48, "total_tokens": 85058192} +{"current_steps": 126200, "total_steps": 204665, "loss": 0.0002, "lr": 7.695716044541786e-07, "epoch": 3.0830869958224416, "percentage": 61.66, "elapsed_time": "2:43:36", "remaining_time": "1:41:43", "throughput": 8665.51, "total_tokens": 85061456} +{"current_steps": 126205, "total_steps": 204665, "loss": 0.0, "lr": 7.694886225739392e-07, "epoch": 3.083209146654289, "percentage": 61.66, "elapsed_time": "2:43:36", "remaining_time": "1:41:42", "throughput": 8665.53, "total_tokens": 85064656} +{"current_steps": 126210, "total_steps": 204665, "loss": 0.0, "lr": 7.694056423700401e-07, "epoch": 3.083331297486136, "percentage": 61.67, "elapsed_time": "2:43:36", "remaining_time": "1:41:42", "throughput": 8665.55, "total_tokens": 85067856} +{"current_steps": 126215, "total_steps": 204665, "loss": 0.0433, "lr": 7.693226638430857e-07, "epoch": 3.083453448317983, "percentage": 61.67, "elapsed_time": "2:43:37", "remaining_time": "1:41:41", "throughput": 8665.58, "total_tokens": 85071120} +{"current_steps": 126220, "total_steps": 204665, "loss": 0.0, "lr": 7.692396869936784e-07, "epoch": 3.0835755991498304, "percentage": 61.67, "elapsed_time": "2:43:37", "remaining_time": "1:41:41", "throughput": 8665.61, "total_tokens": 85074384} +{"current_steps": 126225, "total_steps": 204665, "loss": 0.0, "lr": 7.691567118224228e-07, "epoch": 3.0836977499816776, "percentage": 61.67, "elapsed_time": "2:43:37", "remaining_time": "1:41:41", "throughput": 8665.68, "total_tokens": 85078224} +{"current_steps": 126230, "total_steps": 204665, "loss": 0.0, "lr": 7.690737383299212e-07, "epoch": 3.0838199008135248, "percentage": 61.68, "elapsed_time": "2:43:38", "remaining_time": "1:41:40", "throughput": 8665.7, "total_tokens": 85081296} +{"current_steps": 126235, "total_steps": 204665, "loss": 0.0, "lr": 7.689907665167775e-07, "epoch": 3.0839420516453715, "percentage": 61.68, "elapsed_time": "2:43:38", "remaining_time": "1:41:40", "throughput": 8665.74, "total_tokens": 85084816} +{"current_steps": 126240, "total_steps": 204665, "loss": 0.0, "lr": 7.689077963835955e-07, "epoch": 3.0840642024772187, "percentage": 61.68, "elapsed_time": "2:43:38", "remaining_time": "1:41:39", "throughput": 8665.78, "total_tokens": 85088272} +{"current_steps": 126245, "total_steps": 204665, "loss": 0.0, "lr": 7.688248279309776e-07, "epoch": 3.084186353309066, "percentage": 61.68, "elapsed_time": "2:43:39", "remaining_time": "1:41:39", "throughput": 8665.83, "total_tokens": 85091792} +{"current_steps": 126250, "total_steps": 204665, "loss": 0.0541, "lr": 7.687418611595282e-07, "epoch": 3.084308504140913, "percentage": 61.69, "elapsed_time": "2:43:39", "remaining_time": "1:41:39", "throughput": 8665.85, "total_tokens": 85094992} +{"current_steps": 126255, "total_steps": 204665, "loss": 0.0002, "lr": 7.686588960698497e-07, "epoch": 3.0844306549727603, "percentage": 61.69, "elapsed_time": "2:43:39", "remaining_time": "1:41:38", "throughput": 8665.9, "total_tokens": 85098512} +{"current_steps": 126260, "total_steps": 204665, "loss": 0.0, "lr": 7.685759326625461e-07, "epoch": 3.0845528058046074, "percentage": 61.69, "elapsed_time": "2:43:40", "remaining_time": "1:41:38", "throughput": 8665.92, "total_tokens": 85101712} +{"current_steps": 126265, "total_steps": 204665, "loss": 0.0, "lr": 7.684929709382209e-07, "epoch": 3.0846749566364546, "percentage": 61.69, "elapsed_time": "2:43:40", "remaining_time": "1:41:37", "throughput": 8665.96, "total_tokens": 85105040} +{"current_steps": 126270, "total_steps": 204665, "loss": 0.0001, "lr": 7.684100108974766e-07, "epoch": 3.084797107468302, "percentage": 61.7, "elapsed_time": "2:43:40", "remaining_time": "1:41:37", "throughput": 8665.98, "total_tokens": 85108304} +{"current_steps": 126275, "total_steps": 204665, "loss": 0.0, "lr": 7.683270525409175e-07, "epoch": 3.084919258300149, "percentage": 61.7, "elapsed_time": "2:43:41", "remaining_time": "1:41:36", "throughput": 8666.01, "total_tokens": 85111632} +{"current_steps": 126280, "total_steps": 204665, "loss": 0.0003, "lr": 7.682440958691461e-07, "epoch": 3.085041409131996, "percentage": 61.7, "elapsed_time": "2:43:41", "remaining_time": "1:41:36", "throughput": 8666.08, "total_tokens": 85115408} +{"current_steps": 126285, "total_steps": 204665, "loss": 0.0001, "lr": 7.681611408827661e-07, "epoch": 3.0851635599638434, "percentage": 61.7, "elapsed_time": "2:43:42", "remaining_time": "1:41:36", "throughput": 8666.14, "total_tokens": 85119056} +{"current_steps": 126290, "total_steps": 204665, "loss": 0.0, "lr": 7.680781875823805e-07, "epoch": 3.0852857107956906, "percentage": 61.71, "elapsed_time": "2:43:42", "remaining_time": "1:41:35", "throughput": 8666.17, "total_tokens": 85122384} +{"current_steps": 126295, "total_steps": 204665, "loss": 0.0, "lr": 7.679952359685925e-07, "epoch": 3.0854078616275378, "percentage": 61.71, "elapsed_time": "2:43:42", "remaining_time": "1:41:35", "throughput": 8666.2, "total_tokens": 85125584} +{"current_steps": 126300, "total_steps": 204665, "loss": 0.0002, "lr": 7.679122860420059e-07, "epoch": 3.085530012459385, "percentage": 61.71, "elapsed_time": "2:43:43", "remaining_time": "1:41:34", "throughput": 8666.3, "total_tokens": 85129808} +{"current_steps": 126305, "total_steps": 204665, "loss": 0.0, "lr": 7.678293378032234e-07, "epoch": 3.085652163291232, "percentage": 61.71, "elapsed_time": "2:43:43", "remaining_time": "1:41:34", "throughput": 8666.3, "total_tokens": 85132752} +{"current_steps": 126310, "total_steps": 204665, "loss": 0.0, "lr": 7.677463912528487e-07, "epoch": 3.0857743141230793, "percentage": 61.72, "elapsed_time": "2:43:43", "remaining_time": "1:41:34", "throughput": 8666.32, "total_tokens": 85135888} +{"current_steps": 126315, "total_steps": 204665, "loss": 0.0, "lr": 7.676634463914843e-07, "epoch": 3.0858964649549265, "percentage": 61.72, "elapsed_time": "2:43:44", "remaining_time": "1:41:33", "throughput": 8666.4, "total_tokens": 85139728} +{"current_steps": 126320, "total_steps": 204665, "loss": 0.0, "lr": 7.675805032197342e-07, "epoch": 3.0860186157867737, "percentage": 61.72, "elapsed_time": "2:43:44", "remaining_time": "1:41:33", "throughput": 8666.44, "total_tokens": 85143248} +{"current_steps": 126325, "total_steps": 204665, "loss": 0.0003, "lr": 7.674975617382007e-07, "epoch": 3.0861407666186205, "percentage": 61.72, "elapsed_time": "2:43:44", "remaining_time": "1:41:32", "throughput": 8666.48, "total_tokens": 85146576} +{"current_steps": 126330, "total_steps": 204665, "loss": 0.0, "lr": 7.674146219474877e-07, "epoch": 3.0862629174504677, "percentage": 61.73, "elapsed_time": "2:43:45", "remaining_time": "1:41:32", "throughput": 8666.51, "total_tokens": 85149904} +{"current_steps": 126335, "total_steps": 204665, "loss": 0.0, "lr": 7.673316838481982e-07, "epoch": 3.086385068282315, "percentage": 61.73, "elapsed_time": "2:43:45", "remaining_time": "1:41:31", "throughput": 8666.54, "total_tokens": 85153232} +{"current_steps": 126340, "total_steps": 204665, "loss": 0.0, "lr": 7.672487474409353e-07, "epoch": 3.086507219114162, "percentage": 61.73, "elapsed_time": "2:43:45", "remaining_time": "1:41:31", "throughput": 8666.55, "total_tokens": 85156304} +{"current_steps": 126345, "total_steps": 204665, "loss": 0.0, "lr": 7.671658127263023e-07, "epoch": 3.0866293699460092, "percentage": 61.73, "elapsed_time": "2:43:46", "remaining_time": "1:41:31", "throughput": 8666.6, "total_tokens": 85159824} +{"current_steps": 126350, "total_steps": 204665, "loss": 0.0535, "lr": 7.670828797049017e-07, "epoch": 3.0867515207778564, "percentage": 61.74, "elapsed_time": "2:43:46", "remaining_time": "1:41:30", "throughput": 8666.63, "total_tokens": 85163088} +{"current_steps": 126355, "total_steps": 204665, "loss": 0.0002, "lr": 7.66999948377337e-07, "epoch": 3.0868736716097036, "percentage": 61.74, "elapsed_time": "2:43:46", "remaining_time": "1:41:30", "throughput": 8666.7, "total_tokens": 85166928} +{"current_steps": 126360, "total_steps": 204665, "loss": 0.0, "lr": 7.669170187442119e-07, "epoch": 3.086995822441551, "percentage": 61.74, "elapsed_time": "2:43:47", "remaining_time": "1:41:29", "throughput": 8666.72, "total_tokens": 85170064} +{"current_steps": 126365, "total_steps": 204665, "loss": 0.0, "lr": 7.668340908061284e-07, "epoch": 3.087117973273398, "percentage": 61.74, "elapsed_time": "2:43:47", "remaining_time": "1:41:29", "throughput": 8666.78, "total_tokens": 85173712} +{"current_steps": 126370, "total_steps": 204665, "loss": 0.0, "lr": 7.667511645636905e-07, "epoch": 3.087240124105245, "percentage": 61.74, "elapsed_time": "2:43:47", "remaining_time": "1:41:29", "throughput": 8666.8, "total_tokens": 85176848} +{"current_steps": 126375, "total_steps": 204665, "loss": 0.0, "lr": 7.666682400175005e-07, "epoch": 3.0873622749370924, "percentage": 61.75, "elapsed_time": "2:43:48", "remaining_time": "1:41:28", "throughput": 8666.81, "total_tokens": 85179920} +{"current_steps": 126380, "total_steps": 204665, "loss": 0.0001, "lr": 7.665853171681622e-07, "epoch": 3.0874844257689396, "percentage": 61.75, "elapsed_time": "2:43:48", "remaining_time": "1:41:28", "throughput": 8666.82, "total_tokens": 85182928} +{"current_steps": 126385, "total_steps": 204665, "loss": 0.0, "lr": 7.665023960162781e-07, "epoch": 3.0876065766007867, "percentage": 61.75, "elapsed_time": "2:43:48", "remaining_time": "1:41:27", "throughput": 8666.84, "total_tokens": 85186064} +{"current_steps": 126390, "total_steps": 204665, "loss": 0.0, "lr": 7.664194765624512e-07, "epoch": 3.087728727432634, "percentage": 61.75, "elapsed_time": "2:43:49", "remaining_time": "1:41:27", "throughput": 8666.85, "total_tokens": 85189072} +{"current_steps": 126395, "total_steps": 204665, "loss": 0.0546, "lr": 7.66336558807285e-07, "epoch": 3.087850878264481, "percentage": 61.76, "elapsed_time": "2:43:49", "remaining_time": "1:41:27", "throughput": 8666.89, "total_tokens": 85192528} +{"current_steps": 126400, "total_steps": 204665, "loss": 0.0, "lr": 7.662536427513818e-07, "epoch": 3.0879730290963283, "percentage": 61.76, "elapsed_time": "2:43:50", "remaining_time": "1:41:26", "throughput": 8666.95, "total_tokens": 85196240} +{"current_steps": 126405, "total_steps": 204665, "loss": 0.0, "lr": 7.661707283953455e-07, "epoch": 3.0880951799281755, "percentage": 61.76, "elapsed_time": "2:43:50", "remaining_time": "1:41:26", "throughput": 8666.99, "total_tokens": 85199632} +{"current_steps": 126410, "total_steps": 204665, "loss": 0.0, "lr": 7.660878157397779e-07, "epoch": 3.0882173307600223, "percentage": 61.76, "elapsed_time": "2:43:50", "remaining_time": "1:41:25", "throughput": 8667.04, "total_tokens": 85203280} +{"current_steps": 126415, "total_steps": 204665, "loss": 0.0, "lr": 7.660049047852834e-07, "epoch": 3.0883394815918694, "percentage": 61.77, "elapsed_time": "2:43:51", "remaining_time": "1:41:25", "throughput": 8667.09, "total_tokens": 85206800} +{"current_steps": 126420, "total_steps": 204665, "loss": 0.0, "lr": 7.659219955324635e-07, "epoch": 3.0884616324237166, "percentage": 61.77, "elapsed_time": "2:43:51", "remaining_time": "1:41:24", "throughput": 8667.1, "total_tokens": 85209808} +{"current_steps": 126425, "total_steps": 204665, "loss": 0.0868, "lr": 7.658390879819218e-07, "epoch": 3.088583783255564, "percentage": 61.77, "elapsed_time": "2:43:51", "remaining_time": "1:41:24", "throughput": 8667.15, "total_tokens": 85213328} +{"current_steps": 126430, "total_steps": 204665, "loss": 0.0, "lr": 7.657561821342617e-07, "epoch": 3.088705934087411, "percentage": 61.77, "elapsed_time": "2:43:52", "remaining_time": "1:41:24", "throughput": 8667.15, "total_tokens": 85216400} +{"current_steps": 126435, "total_steps": 204665, "loss": 0.0, "lr": 7.656732779900856e-07, "epoch": 3.088828084919258, "percentage": 61.78, "elapsed_time": "2:43:52", "remaining_time": "1:41:23", "throughput": 8667.18, "total_tokens": 85219600} +{"current_steps": 126440, "total_steps": 204665, "loss": 0.0, "lr": 7.655903755499961e-07, "epoch": 3.0889502357511054, "percentage": 61.78, "elapsed_time": "2:43:52", "remaining_time": "1:41:23", "throughput": 8667.19, "total_tokens": 85222736} +{"current_steps": 126445, "total_steps": 204665, "loss": 0.0, "lr": 7.655074748145968e-07, "epoch": 3.0890723865829526, "percentage": 61.78, "elapsed_time": "2:43:53", "remaining_time": "1:41:22", "throughput": 8667.21, "total_tokens": 85225872} +{"current_steps": 126450, "total_steps": 204665, "loss": 0.0, "lr": 7.654245757844897e-07, "epoch": 3.0891945374147998, "percentage": 61.78, "elapsed_time": "2:43:53", "remaining_time": "1:41:22", "throughput": 8667.28, "total_tokens": 85229584} +{"current_steps": 126455, "total_steps": 204665, "loss": 0.0, "lr": 7.653416784602789e-07, "epoch": 3.089316688246647, "percentage": 61.79, "elapsed_time": "2:43:53", "remaining_time": "1:41:22", "throughput": 8667.34, "total_tokens": 85233232} +{"current_steps": 126460, "total_steps": 204665, "loss": 0.0526, "lr": 7.652587828425659e-07, "epoch": 3.089438839078494, "percentage": 61.79, "elapsed_time": "2:43:54", "remaining_time": "1:41:21", "throughput": 8667.38, "total_tokens": 85236688} +{"current_steps": 126465, "total_steps": 204665, "loss": 0.0, "lr": 7.651758889319547e-07, "epoch": 3.0895609899103413, "percentage": 61.79, "elapsed_time": "2:43:54", "remaining_time": "1:41:21", "throughput": 8667.41, "total_tokens": 85239888} +{"current_steps": 126470, "total_steps": 204665, "loss": 0.0001, "lr": 7.650929967290471e-07, "epoch": 3.0896831407421885, "percentage": 61.79, "elapsed_time": "2:43:54", "remaining_time": "1:41:20", "throughput": 8667.43, "total_tokens": 85243024} +{"current_steps": 126475, "total_steps": 204665, "loss": 0.0, "lr": 7.650101062344468e-07, "epoch": 3.0898052915740357, "percentage": 61.8, "elapsed_time": "2:43:55", "remaining_time": "1:41:20", "throughput": 8667.45, "total_tokens": 85246288} +{"current_steps": 126480, "total_steps": 204665, "loss": 0.0, "lr": 7.649272174487558e-07, "epoch": 3.089927442405883, "percentage": 61.8, "elapsed_time": "2:43:55", "remaining_time": "1:41:19", "throughput": 8667.45, "total_tokens": 85249104} +{"current_steps": 126485, "total_steps": 204665, "loss": 0.0, "lr": 7.648443303725775e-07, "epoch": 3.09004959323773, "percentage": 61.8, "elapsed_time": "2:43:55", "remaining_time": "1:41:19", "throughput": 8667.52, "total_tokens": 85252880} +{"current_steps": 126490, "total_steps": 204665, "loss": 0.0, "lr": 7.647614450065145e-07, "epoch": 3.0901717440695773, "percentage": 61.8, "elapsed_time": "2:43:56", "remaining_time": "1:41:19", "throughput": 8667.56, "total_tokens": 85256400} +{"current_steps": 126495, "total_steps": 204665, "loss": 0.0004, "lr": 7.646785613511696e-07, "epoch": 3.0902938949014245, "percentage": 61.81, "elapsed_time": "2:43:56", "remaining_time": "1:41:18", "throughput": 8667.59, "total_tokens": 85259600} +{"current_steps": 126500, "total_steps": 204665, "loss": 0.0, "lr": 7.645956794071457e-07, "epoch": 3.0904160457332717, "percentage": 61.81, "elapsed_time": "2:43:56", "remaining_time": "1:41:18", "throughput": 8667.62, "total_tokens": 85262928} +{"current_steps": 126505, "total_steps": 204665, "loss": 0.0, "lr": 7.645127991750449e-07, "epoch": 3.0905381965651184, "percentage": 61.81, "elapsed_time": "2:43:57", "remaining_time": "1:41:17", "throughput": 8667.66, "total_tokens": 85266384} +{"current_steps": 126510, "total_steps": 204665, "loss": 0.0001, "lr": 7.644299206554702e-07, "epoch": 3.0906603473969656, "percentage": 61.81, "elapsed_time": "2:43:57", "remaining_time": "1:41:17", "throughput": 8667.67, "total_tokens": 85269456} +{"current_steps": 126515, "total_steps": 204665, "loss": 0.0, "lr": 7.643470438490252e-07, "epoch": 3.090782498228813, "percentage": 61.82, "elapsed_time": "2:43:57", "remaining_time": "1:41:17", "throughput": 8667.68, "total_tokens": 85272528} +{"current_steps": 126520, "total_steps": 204665, "loss": 0.0002, "lr": 7.642641687563112e-07, "epoch": 3.09090464906066, "percentage": 61.82, "elapsed_time": "2:43:58", "remaining_time": "1:41:16", "throughput": 8667.72, "total_tokens": 85275920} +{"current_steps": 126525, "total_steps": 204665, "loss": 0.0, "lr": 7.641812953779322e-07, "epoch": 3.091026799892507, "percentage": 61.82, "elapsed_time": "2:43:58", "remaining_time": "1:41:16", "throughput": 8667.79, "total_tokens": 85279632} +{"current_steps": 126530, "total_steps": 204665, "loss": 0.0, "lr": 7.640984237144898e-07, "epoch": 3.0911489507243544, "percentage": 61.82, "elapsed_time": "2:43:59", "remaining_time": "1:41:15", "throughput": 8667.8, "total_tokens": 85282768} +{"current_steps": 126535, "total_steps": 204665, "loss": 0.0, "lr": 7.640155537665875e-07, "epoch": 3.0912711015562016, "percentage": 61.83, "elapsed_time": "2:43:59", "remaining_time": "1:41:15", "throughput": 8667.82, "total_tokens": 85285840} +{"current_steps": 126540, "total_steps": 204665, "loss": 0.0, "lr": 7.639326855348273e-07, "epoch": 3.0913932523880487, "percentage": 61.83, "elapsed_time": "2:43:59", "remaining_time": "1:41:14", "throughput": 8667.86, "total_tokens": 85289360} +{"current_steps": 126545, "total_steps": 204665, "loss": 0.0, "lr": 7.638498190198119e-07, "epoch": 3.091515403219896, "percentage": 61.83, "elapsed_time": "2:44:00", "remaining_time": "1:41:14", "throughput": 8667.87, "total_tokens": 85292368} +{"current_steps": 126550, "total_steps": 204665, "loss": 0.0, "lr": 7.637669542221445e-07, "epoch": 3.091637554051743, "percentage": 61.83, "elapsed_time": "2:44:00", "remaining_time": "1:41:14", "throughput": 8667.94, "total_tokens": 85296144} +{"current_steps": 126555, "total_steps": 204665, "loss": 0.0242, "lr": 7.63684091142427e-07, "epoch": 3.0917597048835903, "percentage": 61.84, "elapsed_time": "2:44:00", "remaining_time": "1:41:13", "throughput": 8667.97, "total_tokens": 85299408} +{"current_steps": 126560, "total_steps": 204665, "loss": 0.0812, "lr": 7.636012297812627e-07, "epoch": 3.0918818557154375, "percentage": 61.84, "elapsed_time": "2:44:01", "remaining_time": "1:41:13", "throughput": 8668.03, "total_tokens": 85302992} +{"current_steps": 126565, "total_steps": 204665, "loss": 0.0, "lr": 7.635183701392536e-07, "epoch": 3.0920040065472847, "percentage": 61.84, "elapsed_time": "2:44:01", "remaining_time": "1:41:12", "throughput": 8668.06, "total_tokens": 85306320} +{"current_steps": 126570, "total_steps": 204665, "loss": 0.0001, "lr": 7.634355122170028e-07, "epoch": 3.092126157379132, "percentage": 61.84, "elapsed_time": "2:44:01", "remaining_time": "1:41:12", "throughput": 8668.06, "total_tokens": 85309264} +{"current_steps": 126575, "total_steps": 204665, "loss": 0.0, "lr": 7.633526560151121e-07, "epoch": 3.092248308210979, "percentage": 61.84, "elapsed_time": "2:44:02", "remaining_time": "1:41:12", "throughput": 8668.1, "total_tokens": 85312656} +{"current_steps": 126580, "total_steps": 204665, "loss": 0.0279, "lr": 7.632698015341846e-07, "epoch": 3.0923704590428263, "percentage": 61.85, "elapsed_time": "2:44:02", "remaining_time": "1:41:11", "throughput": 8668.11, "total_tokens": 85315792} +{"current_steps": 126585, "total_steps": 204665, "loss": 0.0, "lr": 7.631869487748232e-07, "epoch": 3.0924926098746734, "percentage": 61.85, "elapsed_time": "2:44:02", "remaining_time": "1:41:11", "throughput": 8668.13, "total_tokens": 85318928} +{"current_steps": 126590, "total_steps": 204665, "loss": 0.0, "lr": 7.631040977376299e-07, "epoch": 3.09261476070652, "percentage": 61.85, "elapsed_time": "2:44:03", "remaining_time": "1:41:10", "throughput": 8668.14, "total_tokens": 85321936} +{"current_steps": 126595, "total_steps": 204665, "loss": 0.0, "lr": 7.630212484232072e-07, "epoch": 3.0927369115383674, "percentage": 61.85, "elapsed_time": "2:44:03", "remaining_time": "1:41:10", "throughput": 8668.19, "total_tokens": 85325520} +{"current_steps": 126600, "total_steps": 204665, "loss": 0.0346, "lr": 7.629384008321578e-07, "epoch": 3.0928590623702146, "percentage": 61.86, "elapsed_time": "2:44:03", "remaining_time": "1:41:09", "throughput": 8668.19, "total_tokens": 85328464} +{"current_steps": 126605, "total_steps": 204665, "loss": 0.0, "lr": 7.628555549650838e-07, "epoch": 3.0929812132020618, "percentage": 61.86, "elapsed_time": "2:44:04", "remaining_time": "1:41:09", "throughput": 8668.21, "total_tokens": 85331536} +{"current_steps": 126610, "total_steps": 204665, "loss": 0.0, "lr": 7.627727108225886e-07, "epoch": 3.093103364033909, "percentage": 61.86, "elapsed_time": "2:44:04", "remaining_time": "1:41:09", "throughput": 8668.23, "total_tokens": 85334800} +{"current_steps": 126615, "total_steps": 204665, "loss": 0.0, "lr": 7.626898684052734e-07, "epoch": 3.093225514865756, "percentage": 61.86, "elapsed_time": "2:44:04", "remaining_time": "1:41:08", "throughput": 8668.26, "total_tokens": 85338064} +{"current_steps": 126620, "total_steps": 204665, "loss": 0.0, "lr": 7.626070277137417e-07, "epoch": 3.0933476656976033, "percentage": 61.87, "elapsed_time": "2:44:05", "remaining_time": "1:41:08", "throughput": 8668.3, "total_tokens": 85341520} +{"current_steps": 126625, "total_steps": 204665, "loss": 0.0453, "lr": 7.625241887485954e-07, "epoch": 3.0934698165294505, "percentage": 61.87, "elapsed_time": "2:44:05", "remaining_time": "1:41:07", "throughput": 8668.29, "total_tokens": 85344400} +{"current_steps": 126630, "total_steps": 204665, "loss": 0.0, "lr": 7.624413515104373e-07, "epoch": 3.0935919673612977, "percentage": 61.87, "elapsed_time": "2:44:05", "remaining_time": "1:41:07", "throughput": 8668.32, "total_tokens": 85347664} +{"current_steps": 126635, "total_steps": 204665, "loss": 0.0, "lr": 7.623585159998692e-07, "epoch": 3.093714118193145, "percentage": 61.87, "elapsed_time": "2:44:06", "remaining_time": "1:41:07", "throughput": 8668.35, "total_tokens": 85350992} +{"current_steps": 126640, "total_steps": 204665, "loss": 0.0, "lr": 7.622756822174941e-07, "epoch": 3.093836269024992, "percentage": 61.88, "elapsed_time": "2:44:06", "remaining_time": "1:41:06", "throughput": 8668.39, "total_tokens": 85354384} +{"current_steps": 126645, "total_steps": 204665, "loss": 0.0, "lr": 7.621928501639142e-07, "epoch": 3.0939584198568393, "percentage": 61.88, "elapsed_time": "2:44:06", "remaining_time": "1:41:06", "throughput": 8668.39, "total_tokens": 85357328} +{"current_steps": 126650, "total_steps": 204665, "loss": 0.0, "lr": 7.621100198397318e-07, "epoch": 3.0940805706886865, "percentage": 61.88, "elapsed_time": "2:44:07", "remaining_time": "1:41:05", "throughput": 8668.45, "total_tokens": 85360912} +{"current_steps": 126655, "total_steps": 204665, "loss": 0.0, "lr": 7.620271912455496e-07, "epoch": 3.0942027215205337, "percentage": 61.88, "elapsed_time": "2:44:07", "remaining_time": "1:41:05", "throughput": 8668.47, "total_tokens": 85364112} +{"current_steps": 126660, "total_steps": 204665, "loss": 0.0, "lr": 7.619443643819691e-07, "epoch": 3.094324872352381, "percentage": 61.89, "elapsed_time": "2:44:08", "remaining_time": "1:41:05", "throughput": 8668.5, "total_tokens": 85367440} +{"current_steps": 126665, "total_steps": 204665, "loss": 0.0001, "lr": 7.618615392495935e-07, "epoch": 3.094447023184228, "percentage": 61.89, "elapsed_time": "2:44:08", "remaining_time": "1:41:04", "throughput": 8668.56, "total_tokens": 85371024} +{"current_steps": 126670, "total_steps": 204665, "loss": 0.0, "lr": 7.61778715849025e-07, "epoch": 3.0945691740160752, "percentage": 61.89, "elapsed_time": "2:44:08", "remaining_time": "1:41:04", "throughput": 8668.64, "total_tokens": 85374992} +{"current_steps": 126675, "total_steps": 204665, "loss": 0.0, "lr": 7.616958941808654e-07, "epoch": 3.0946913248479224, "percentage": 61.89, "elapsed_time": "2:44:09", "remaining_time": "1:41:03", "throughput": 8668.65, "total_tokens": 85378000} +{"current_steps": 126680, "total_steps": 204665, "loss": 0.0, "lr": 7.616130742457178e-07, "epoch": 3.094813475679769, "percentage": 61.9, "elapsed_time": "2:44:09", "remaining_time": "1:41:03", "throughput": 8668.65, "total_tokens": 85380944} +{"current_steps": 126685, "total_steps": 204665, "loss": 0.0, "lr": 7.615302560441837e-07, "epoch": 3.0949356265116164, "percentage": 61.9, "elapsed_time": "2:44:09", "remaining_time": "1:41:02", "throughput": 8668.68, "total_tokens": 85384272} +{"current_steps": 126690, "total_steps": 204665, "loss": 0.0, "lr": 7.614474395768661e-07, "epoch": 3.0950577773434635, "percentage": 61.9, "elapsed_time": "2:44:10", "remaining_time": "1:41:02", "throughput": 8668.72, "total_tokens": 85387664} +{"current_steps": 126695, "total_steps": 204665, "loss": 0.0317, "lr": 7.613646248443666e-07, "epoch": 3.0951799281753107, "percentage": 61.9, "elapsed_time": "2:44:10", "remaining_time": "1:41:02", "throughput": 8668.76, "total_tokens": 85390992} +{"current_steps": 126700, "total_steps": 204665, "loss": 0.0, "lr": 7.612818118472875e-07, "epoch": 3.095302079007158, "percentage": 61.91, "elapsed_time": "2:44:10", "remaining_time": "1:41:01", "throughput": 8668.76, "total_tokens": 85394000} +{"current_steps": 126705, "total_steps": 204665, "loss": 0.0, "lr": 7.611990005862318e-07, "epoch": 3.095424229839005, "percentage": 61.91, "elapsed_time": "2:44:11", "remaining_time": "1:41:01", "throughput": 8668.82, "total_tokens": 85397584} +{"current_steps": 126710, "total_steps": 204665, "loss": 0.0716, "lr": 7.611161910618007e-07, "epoch": 3.0955463806708523, "percentage": 61.91, "elapsed_time": "2:44:11", "remaining_time": "1:41:00", "throughput": 8668.91, "total_tokens": 85401616} +{"current_steps": 126715, "total_steps": 204665, "loss": 0.0, "lr": 7.610333832745974e-07, "epoch": 3.0956685315026995, "percentage": 61.91, "elapsed_time": "2:44:11", "remaining_time": "1:41:00", "throughput": 8668.98, "total_tokens": 85405392} +{"current_steps": 126720, "total_steps": 204665, "loss": 0.0, "lr": 7.609505772252232e-07, "epoch": 3.0957906823345467, "percentage": 61.92, "elapsed_time": "2:44:12", "remaining_time": "1:41:00", "throughput": 8669.01, "total_tokens": 85408656} +{"current_steps": 126725, "total_steps": 204665, "loss": 0.0, "lr": 7.60867772914281e-07, "epoch": 3.095912833166394, "percentage": 61.92, "elapsed_time": "2:44:12", "remaining_time": "1:40:59", "throughput": 8669.03, "total_tokens": 85411920} +{"current_steps": 126730, "total_steps": 204665, "loss": 0.0, "lr": 7.607849703423723e-07, "epoch": 3.096034983998241, "percentage": 61.92, "elapsed_time": "2:44:12", "remaining_time": "1:40:59", "throughput": 8669.08, "total_tokens": 85415376} +{"current_steps": 126735, "total_steps": 204665, "loss": 0.0, "lr": 7.607021695100997e-07, "epoch": 3.0961571348300883, "percentage": 61.92, "elapsed_time": "2:44:13", "remaining_time": "1:40:58", "throughput": 8669.09, "total_tokens": 85418448} +{"current_steps": 126740, "total_steps": 204665, "loss": 0.0533, "lr": 7.606193704180655e-07, "epoch": 3.0962792856619354, "percentage": 61.93, "elapsed_time": "2:44:13", "remaining_time": "1:40:58", "throughput": 8669.11, "total_tokens": 85421648} +{"current_steps": 126745, "total_steps": 204665, "loss": 0.0, "lr": 7.605365730668717e-07, "epoch": 3.0964014364937826, "percentage": 61.93, "elapsed_time": "2:44:13", "remaining_time": "1:40:57", "throughput": 8669.16, "total_tokens": 85425168} +{"current_steps": 126750, "total_steps": 204665, "loss": 0.0002, "lr": 7.604537774571203e-07, "epoch": 3.09652358732563, "percentage": 61.93, "elapsed_time": "2:44:14", "remaining_time": "1:40:57", "throughput": 8669.19, "total_tokens": 85428496} +{"current_steps": 126755, "total_steps": 204665, "loss": 0.0, "lr": 7.603709835894133e-07, "epoch": 3.096645738157477, "percentage": 61.93, "elapsed_time": "2:44:14", "remaining_time": "1:40:57", "throughput": 8669.26, "total_tokens": 85432272} +{"current_steps": 126760, "total_steps": 204665, "loss": 0.0, "lr": 7.602881914643529e-07, "epoch": 3.096767888989324, "percentage": 61.94, "elapsed_time": "2:44:14", "remaining_time": "1:40:56", "throughput": 8669.3, "total_tokens": 85435664} +{"current_steps": 126765, "total_steps": 204665, "loss": 0.0, "lr": 7.602054010825415e-07, "epoch": 3.0968900398211714, "percentage": 61.94, "elapsed_time": "2:44:15", "remaining_time": "1:40:56", "throughput": 8669.36, "total_tokens": 85439248} +{"current_steps": 126770, "total_steps": 204665, "loss": 0.0204, "lr": 7.601226124445806e-07, "epoch": 3.097012190653018, "percentage": 61.94, "elapsed_time": "2:44:15", "remaining_time": "1:40:55", "throughput": 8669.39, "total_tokens": 85442640} +{"current_steps": 126775, "total_steps": 204665, "loss": 0.0, "lr": 7.60039825551073e-07, "epoch": 3.0971343414848653, "percentage": 61.94, "elapsed_time": "2:44:16", "remaining_time": "1:40:55", "throughput": 8669.43, "total_tokens": 85446032} +{"current_steps": 126780, "total_steps": 204665, "loss": 0.0, "lr": 7.599570404026199e-07, "epoch": 3.0972564923167125, "percentage": 61.95, "elapsed_time": "2:44:16", "remaining_time": "1:40:55", "throughput": 8669.44, "total_tokens": 85449168} +{"current_steps": 126785, "total_steps": 204665, "loss": 0.0, "lr": 7.598742569998243e-07, "epoch": 3.0973786431485597, "percentage": 61.95, "elapsed_time": "2:44:16", "remaining_time": "1:40:54", "throughput": 8669.48, "total_tokens": 85452496} +{"current_steps": 126790, "total_steps": 204665, "loss": 0.0359, "lr": 7.597914753432871e-07, "epoch": 3.097500793980407, "percentage": 61.95, "elapsed_time": "2:44:17", "remaining_time": "1:40:54", "throughput": 8669.51, "total_tokens": 85455824} +{"current_steps": 126795, "total_steps": 204665, "loss": 0.0, "lr": 7.597086954336112e-07, "epoch": 3.097622944812254, "percentage": 61.95, "elapsed_time": "2:44:17", "remaining_time": "1:40:53", "throughput": 8669.6, "total_tokens": 85459920} +{"current_steps": 126800, "total_steps": 204665, "loss": 0.0383, "lr": 7.596259172713982e-07, "epoch": 3.0977450956441013, "percentage": 61.95, "elapsed_time": "2:44:17", "remaining_time": "1:40:53", "throughput": 8669.64, "total_tokens": 85463312} +{"current_steps": 126805, "total_steps": 204665, "loss": 0.0, "lr": 7.595431408572504e-07, "epoch": 3.0978672464759485, "percentage": 61.96, "elapsed_time": "2:44:18", "remaining_time": "1:40:53", "throughput": 8669.64, "total_tokens": 85466256} +{"current_steps": 126810, "total_steps": 204665, "loss": 0.0, "lr": 7.594603661917695e-07, "epoch": 3.0979893973077957, "percentage": 61.96, "elapsed_time": "2:44:18", "remaining_time": "1:40:52", "throughput": 8669.71, "total_tokens": 85470096} +{"current_steps": 126815, "total_steps": 204665, "loss": 0.0726, "lr": 7.593775932755573e-07, "epoch": 3.098111548139643, "percentage": 61.96, "elapsed_time": "2:44:18", "remaining_time": "1:40:52", "throughput": 8669.73, "total_tokens": 85473232} +{"current_steps": 126820, "total_steps": 204665, "loss": 0.0366, "lr": 7.592948221092158e-07, "epoch": 3.09823369897149, "percentage": 61.96, "elapsed_time": "2:44:19", "remaining_time": "1:40:51", "throughput": 8669.76, "total_tokens": 85476560} +{"current_steps": 126825, "total_steps": 204665, "loss": 0.0, "lr": 7.592120526933477e-07, "epoch": 3.0983558498033372, "percentage": 61.97, "elapsed_time": "2:44:19", "remaining_time": "1:40:51", "throughput": 8669.77, "total_tokens": 85479632} +{"current_steps": 126830, "total_steps": 204665, "loss": 0.0, "lr": 7.591292850285538e-07, "epoch": 3.0984780006351844, "percentage": 61.97, "elapsed_time": "2:44:19", "remaining_time": "1:40:50", "throughput": 8669.78, "total_tokens": 85482704} +{"current_steps": 126835, "total_steps": 204665, "loss": 0.0, "lr": 7.59046519115437e-07, "epoch": 3.0986001514670316, "percentage": 61.97, "elapsed_time": "2:44:20", "remaining_time": "1:40:50", "throughput": 8669.83, "total_tokens": 85486288} +{"current_steps": 126840, "total_steps": 204665, "loss": 0.0001, "lr": 7.589637549545981e-07, "epoch": 3.098722302298879, "percentage": 61.97, "elapsed_time": "2:44:20", "remaining_time": "1:40:50", "throughput": 8669.87, "total_tokens": 85489680} +{"current_steps": 126845, "total_steps": 204665, "loss": 0.0, "lr": 7.588809925466402e-07, "epoch": 3.098844453130726, "percentage": 61.98, "elapsed_time": "2:44:20", "remaining_time": "1:40:49", "throughput": 8669.9, "total_tokens": 85493008} +{"current_steps": 126850, "total_steps": 204665, "loss": 0.0, "lr": 7.587982318921643e-07, "epoch": 3.098966603962573, "percentage": 61.98, "elapsed_time": "2:44:21", "remaining_time": "1:40:49", "throughput": 8669.92, "total_tokens": 85496208} +{"current_steps": 126855, "total_steps": 204665, "loss": 0.0001, "lr": 7.587154729917724e-07, "epoch": 3.0990887547944204, "percentage": 61.98, "elapsed_time": "2:44:21", "remaining_time": "1:40:48", "throughput": 8669.93, "total_tokens": 85499280} +{"current_steps": 126860, "total_steps": 204665, "loss": 0.0966, "lr": 7.586327158460668e-07, "epoch": 3.099210905626267, "percentage": 61.98, "elapsed_time": "2:44:21", "remaining_time": "1:40:48", "throughput": 8669.96, "total_tokens": 85502608} +{"current_steps": 126865, "total_steps": 204665, "loss": 0.0, "lr": 7.585499604556487e-07, "epoch": 3.0993330564581143, "percentage": 61.99, "elapsed_time": "2:44:22", "remaining_time": "1:40:48", "throughput": 8669.95, "total_tokens": 85505424} +{"current_steps": 126870, "total_steps": 204665, "loss": 0.0, "lr": 7.584672068211205e-07, "epoch": 3.0994552072899615, "percentage": 61.99, "elapsed_time": "2:44:22", "remaining_time": "1:40:47", "throughput": 8669.99, "total_tokens": 85508816} +{"current_steps": 126875, "total_steps": 204665, "loss": 0.0, "lr": 7.583844549430835e-07, "epoch": 3.0995773581218087, "percentage": 61.99, "elapsed_time": "2:44:22", "remaining_time": "1:40:47", "throughput": 8670.03, "total_tokens": 85512272} +{"current_steps": 126880, "total_steps": 204665, "loss": 0.0, "lr": 7.583017048221401e-07, "epoch": 3.099699508953656, "percentage": 61.99, "elapsed_time": "2:44:23", "remaining_time": "1:40:46", "throughput": 8670.05, "total_tokens": 85515472} +{"current_steps": 126885, "total_steps": 204665, "loss": 0.0, "lr": 7.582189564588912e-07, "epoch": 3.099821659785503, "percentage": 62.0, "elapsed_time": "2:44:23", "remaining_time": "1:40:46", "throughput": 8670.06, "total_tokens": 85518544} +{"current_steps": 126890, "total_steps": 204665, "loss": 0.0, "lr": 7.581362098539391e-07, "epoch": 3.0999438106173502, "percentage": 62.0, "elapsed_time": "2:44:24", "remaining_time": "1:40:45", "throughput": 8670.07, "total_tokens": 85521616} +{"current_steps": 126895, "total_steps": 204665, "loss": 0.0, "lr": 7.58053465007886e-07, "epoch": 3.1000659614491974, "percentage": 62.0, "elapsed_time": "2:44:24", "remaining_time": "1:40:45", "throughput": 8670.06, "total_tokens": 85524368} +{"current_steps": 126900, "total_steps": 204665, "loss": 0.0, "lr": 7.57970721921333e-07, "epoch": 3.1001881122810446, "percentage": 62.0, "elapsed_time": "2:44:24", "remaining_time": "1:40:45", "throughput": 8670.07, "total_tokens": 85527376} +{"current_steps": 126905, "total_steps": 204665, "loss": 0.0001, "lr": 7.578879805948819e-07, "epoch": 3.100310263112892, "percentage": 62.01, "elapsed_time": "2:44:25", "remaining_time": "1:40:44", "throughput": 8670.16, "total_tokens": 85531472} +{"current_steps": 126910, "total_steps": 204665, "loss": 0.0, "lr": 7.578052410291346e-07, "epoch": 3.100432413944739, "percentage": 62.01, "elapsed_time": "2:44:25", "remaining_time": "1:40:44", "throughput": 8670.19, "total_tokens": 85534672} +{"current_steps": 126915, "total_steps": 204665, "loss": 0.0, "lr": 7.577225032246925e-07, "epoch": 3.100554564776586, "percentage": 62.01, "elapsed_time": "2:44:25", "remaining_time": "1:40:43", "throughput": 8670.21, "total_tokens": 85537872} +{"current_steps": 126920, "total_steps": 204665, "loss": 0.0, "lr": 7.576397671821579e-07, "epoch": 3.1006767156084334, "percentage": 62.01, "elapsed_time": "2:44:26", "remaining_time": "1:40:43", "throughput": 8670.27, "total_tokens": 85541584} +{"current_steps": 126925, "total_steps": 204665, "loss": 0.0, "lr": 7.575570329021316e-07, "epoch": 3.1007988664402806, "percentage": 62.02, "elapsed_time": "2:44:26", "remaining_time": "1:40:43", "throughput": 8670.29, "total_tokens": 85544720} +{"current_steps": 126930, "total_steps": 204665, "loss": 0.0, "lr": 7.574743003852164e-07, "epoch": 3.1009210172721278, "percentage": 62.02, "elapsed_time": "2:44:26", "remaining_time": "1:40:42", "throughput": 8670.31, "total_tokens": 85547984} +{"current_steps": 126935, "total_steps": 204665, "loss": 0.0, "lr": 7.573915696320128e-07, "epoch": 3.101043168103975, "percentage": 62.02, "elapsed_time": "2:44:27", "remaining_time": "1:40:42", "throughput": 8670.32, "total_tokens": 85550928} +{"current_steps": 126940, "total_steps": 204665, "loss": 0.0005, "lr": 7.573088406431236e-07, "epoch": 3.101165318935822, "percentage": 62.02, "elapsed_time": "2:44:27", "remaining_time": "1:40:41", "throughput": 8670.37, "total_tokens": 85554512} +{"current_steps": 126945, "total_steps": 204665, "loss": 0.0001, "lr": 7.57226113419149e-07, "epoch": 3.1012874697676693, "percentage": 62.03, "elapsed_time": "2:44:27", "remaining_time": "1:40:41", "throughput": 8670.39, "total_tokens": 85557648} +{"current_steps": 126950, "total_steps": 204665, "loss": 0.0, "lr": 7.571433879606918e-07, "epoch": 3.101409620599516, "percentage": 62.03, "elapsed_time": "2:44:28", "remaining_time": "1:40:40", "throughput": 8670.5, "total_tokens": 85561936} +{"current_steps": 126955, "total_steps": 204665, "loss": 0.0, "lr": 7.570606642683531e-07, "epoch": 3.1015317714313633, "percentage": 62.03, "elapsed_time": "2:44:28", "remaining_time": "1:40:40", "throughput": 8670.53, "total_tokens": 85565264} +{"current_steps": 126960, "total_steps": 204665, "loss": 0.075, "lr": 7.569779423427347e-07, "epoch": 3.1016539222632105, "percentage": 62.03, "elapsed_time": "2:44:28", "remaining_time": "1:40:40", "throughput": 8670.55, "total_tokens": 85568400} +{"current_steps": 126965, "total_steps": 204665, "loss": 0.0001, "lr": 7.568952221844383e-07, "epoch": 3.1017760730950577, "percentage": 62.04, "elapsed_time": "2:44:29", "remaining_time": "1:40:39", "throughput": 8670.6, "total_tokens": 85571984} +{"current_steps": 126970, "total_steps": 204665, "loss": 0.0, "lr": 7.568125037940647e-07, "epoch": 3.101898223926905, "percentage": 62.04, "elapsed_time": "2:44:29", "remaining_time": "1:40:39", "throughput": 8670.63, "total_tokens": 85575312} +{"current_steps": 126975, "total_steps": 204665, "loss": 0.0, "lr": 7.56729787172216e-07, "epoch": 3.102020374758752, "percentage": 62.04, "elapsed_time": "2:44:29", "remaining_time": "1:40:38", "throughput": 8670.71, "total_tokens": 85579152} +{"current_steps": 126980, "total_steps": 204665, "loss": 0.0001, "lr": 7.566470723194942e-07, "epoch": 3.102142525590599, "percentage": 62.04, "elapsed_time": "2:44:30", "remaining_time": "1:40:38", "throughput": 8670.76, "total_tokens": 85582672} +{"current_steps": 126985, "total_steps": 204665, "loss": 0.0003, "lr": 7.565643592364999e-07, "epoch": 3.1022646764224464, "percentage": 62.05, "elapsed_time": "2:44:30", "remaining_time": "1:40:38", "throughput": 8670.78, "total_tokens": 85585872} +{"current_steps": 126990, "total_steps": 204665, "loss": 0.0425, "lr": 7.564816479238355e-07, "epoch": 3.1023868272542936, "percentage": 62.05, "elapsed_time": "2:44:30", "remaining_time": "1:40:37", "throughput": 8670.83, "total_tokens": 85589456} +{"current_steps": 126995, "total_steps": 204665, "loss": 0.0, "lr": 7.563989383821017e-07, "epoch": 3.102508978086141, "percentage": 62.05, "elapsed_time": "2:44:31", "remaining_time": "1:40:37", "throughput": 8670.87, "total_tokens": 85592784} +{"current_steps": 127000, "total_steps": 204665, "loss": 0.0, "lr": 7.563162306119006e-07, "epoch": 3.102631128917988, "percentage": 62.05, "elapsed_time": "2:44:31", "remaining_time": "1:40:36", "throughput": 8670.88, "total_tokens": 85595920} +{"current_steps": 127005, "total_steps": 204665, "loss": 0.029, "lr": 7.562335246138333e-07, "epoch": 3.102753279749835, "percentage": 62.06, "elapsed_time": "2:44:31", "remaining_time": "1:40:36", "throughput": 8670.92, "total_tokens": 85599184} +{"current_steps": 127010, "total_steps": 204665, "loss": 0.0005, "lr": 7.561508203885012e-07, "epoch": 3.1028754305816824, "percentage": 62.06, "elapsed_time": "2:44:32", "remaining_time": "1:40:36", "throughput": 8670.93, "total_tokens": 85602384} +{"current_steps": 127015, "total_steps": 204665, "loss": 0.0, "lr": 7.560681179365062e-07, "epoch": 3.1029975814135295, "percentage": 62.06, "elapsed_time": "2:44:32", "remaining_time": "1:40:35", "throughput": 8670.96, "total_tokens": 85605648} +{"current_steps": 127020, "total_steps": 204665, "loss": 0.0, "lr": 7.559854172584491e-07, "epoch": 3.1031197322453767, "percentage": 62.06, "elapsed_time": "2:44:33", "remaining_time": "1:40:35", "throughput": 8671.03, "total_tokens": 85609424} +{"current_steps": 127025, "total_steps": 204665, "loss": 0.0, "lr": 7.559027183549322e-07, "epoch": 3.103241883077224, "percentage": 62.06, "elapsed_time": "2:44:33", "remaining_time": "1:40:34", "throughput": 8671.06, "total_tokens": 85612752} +{"current_steps": 127030, "total_steps": 204665, "loss": 0.0001, "lr": 7.558200212265558e-07, "epoch": 3.103364033909071, "percentage": 62.07, "elapsed_time": "2:44:33", "remaining_time": "1:40:34", "throughput": 8671.11, "total_tokens": 85616336} +{"current_steps": 127035, "total_steps": 204665, "loss": 0.1186, "lr": 7.557373258739224e-07, "epoch": 3.103486184740918, "percentage": 62.07, "elapsed_time": "2:44:34", "remaining_time": "1:40:33", "throughput": 8671.15, "total_tokens": 85619792} +{"current_steps": 127040, "total_steps": 204665, "loss": 0.0, "lr": 7.556546322976324e-07, "epoch": 3.103608335572765, "percentage": 62.07, "elapsed_time": "2:44:34", "remaining_time": "1:40:33", "throughput": 8671.2, "total_tokens": 85623248} +{"current_steps": 127045, "total_steps": 204665, "loss": 0.0, "lr": 7.555719404982878e-07, "epoch": 3.1037304864046122, "percentage": 62.07, "elapsed_time": "2:44:34", "remaining_time": "1:40:33", "throughput": 8671.26, "total_tokens": 85626896} +{"current_steps": 127050, "total_steps": 204665, "loss": 0.0001, "lr": 7.5548925047649e-07, "epoch": 3.1038526372364594, "percentage": 62.08, "elapsed_time": "2:44:35", "remaining_time": "1:40:32", "throughput": 8671.28, "total_tokens": 85630160} +{"current_steps": 127055, "total_steps": 204665, "loss": 0.0, "lr": 7.5540656223284e-07, "epoch": 3.1039747880683066, "percentage": 62.08, "elapsed_time": "2:44:35", "remaining_time": "1:40:32", "throughput": 8671.28, "total_tokens": 85633040} +{"current_steps": 127060, "total_steps": 204665, "loss": 0.0001, "lr": 7.553238757679393e-07, "epoch": 3.104096938900154, "percentage": 62.08, "elapsed_time": "2:44:35", "remaining_time": "1:40:31", "throughput": 8671.29, "total_tokens": 85636176} +{"current_steps": 127065, "total_steps": 204665, "loss": 0.076, "lr": 7.552411910823891e-07, "epoch": 3.104219089732001, "percentage": 62.08, "elapsed_time": "2:44:36", "remaining_time": "1:40:31", "throughput": 8671.33, "total_tokens": 85639568} +{"current_steps": 127070, "total_steps": 204665, "loss": 0.0, "lr": 7.551585081767906e-07, "epoch": 3.104341240563848, "percentage": 62.09, "elapsed_time": "2:44:36", "remaining_time": "1:40:31", "throughput": 8671.36, "total_tokens": 85642896} +{"current_steps": 127075, "total_steps": 204665, "loss": 0.0, "lr": 7.550758270517458e-07, "epoch": 3.1044633913956954, "percentage": 62.09, "elapsed_time": "2:44:36", "remaining_time": "1:40:30", "throughput": 8671.37, "total_tokens": 85645968} +{"current_steps": 127080, "total_steps": 204665, "loss": 0.0, "lr": 7.54993147707855e-07, "epoch": 3.1045855422275426, "percentage": 62.09, "elapsed_time": "2:44:37", "remaining_time": "1:40:30", "throughput": 8671.41, "total_tokens": 85649424} +{"current_steps": 127085, "total_steps": 204665, "loss": 0.0, "lr": 7.549104701457203e-07, "epoch": 3.1047076930593898, "percentage": 62.09, "elapsed_time": "2:44:37", "remaining_time": "1:40:29", "throughput": 8671.44, "total_tokens": 85652688} +{"current_steps": 127090, "total_steps": 204665, "loss": 0.0002, "lr": 7.548277943659422e-07, "epoch": 3.104829843891237, "percentage": 62.1, "elapsed_time": "2:44:37", "remaining_time": "1:40:29", "throughput": 8671.46, "total_tokens": 85655952} +{"current_steps": 127095, "total_steps": 204665, "loss": 0.0, "lr": 7.547451203691227e-07, "epoch": 3.104951994723084, "percentage": 62.1, "elapsed_time": "2:44:38", "remaining_time": "1:40:29", "throughput": 8671.52, "total_tokens": 85659600} +{"current_steps": 127100, "total_steps": 204665, "loss": 0.0001, "lr": 7.546624481558623e-07, "epoch": 3.1050741455549313, "percentage": 62.1, "elapsed_time": "2:44:38", "remaining_time": "1:40:28", "throughput": 8671.55, "total_tokens": 85662864} +{"current_steps": 127105, "total_steps": 204665, "loss": 0.075, "lr": 7.545797777267627e-07, "epoch": 3.1051962963867785, "percentage": 62.1, "elapsed_time": "2:44:38", "remaining_time": "1:40:28", "throughput": 8671.6, "total_tokens": 85666448} +{"current_steps": 127110, "total_steps": 204665, "loss": 0.0, "lr": 7.54497109082425e-07, "epoch": 3.1053184472186257, "percentage": 62.11, "elapsed_time": "2:44:39", "remaining_time": "1:40:27", "throughput": 8671.64, "total_tokens": 85669840} +{"current_steps": 127115, "total_steps": 204665, "loss": 0.0, "lr": 7.544144422234504e-07, "epoch": 3.105440598050473, "percentage": 62.11, "elapsed_time": "2:44:39", "remaining_time": "1:40:27", "throughput": 8671.68, "total_tokens": 85673296} +{"current_steps": 127120, "total_steps": 204665, "loss": 0.0, "lr": 7.543317771504402e-07, "epoch": 3.10556274888232, "percentage": 62.11, "elapsed_time": "2:44:40", "remaining_time": "1:40:26", "throughput": 8671.72, "total_tokens": 85676624} +{"current_steps": 127125, "total_steps": 204665, "loss": 0.0, "lr": 7.542491138639951e-07, "epoch": 3.1056848997141673, "percentage": 62.11, "elapsed_time": "2:44:40", "remaining_time": "1:40:26", "throughput": 8671.74, "total_tokens": 85679888} +{"current_steps": 127130, "total_steps": 204665, "loss": 0.0001, "lr": 7.541664523647168e-07, "epoch": 3.105807050546014, "percentage": 62.12, "elapsed_time": "2:44:40", "remaining_time": "1:40:26", "throughput": 8671.81, "total_tokens": 85683664} +{"current_steps": 127135, "total_steps": 204665, "loss": 0.0, "lr": 7.540837926532057e-07, "epoch": 3.105929201377861, "percentage": 62.12, "elapsed_time": "2:44:41", "remaining_time": "1:40:25", "throughput": 8671.87, "total_tokens": 85687312} +{"current_steps": 127140, "total_steps": 204665, "loss": 0.0, "lr": 7.540011347300635e-07, "epoch": 3.1060513522097084, "percentage": 62.12, "elapsed_time": "2:44:41", "remaining_time": "1:40:25", "throughput": 8671.89, "total_tokens": 85690512} +{"current_steps": 127145, "total_steps": 204665, "loss": 0.0, "lr": 7.539184785958916e-07, "epoch": 3.1061735030415556, "percentage": 62.12, "elapsed_time": "2:44:41", "remaining_time": "1:40:24", "throughput": 8671.9, "total_tokens": 85693648} +{"current_steps": 127150, "total_steps": 204665, "loss": 0.0003, "lr": 7.538358242512905e-07, "epoch": 3.106295653873403, "percentage": 62.13, "elapsed_time": "2:44:42", "remaining_time": "1:40:24", "throughput": 8671.92, "total_tokens": 85696848} +{"current_steps": 127155, "total_steps": 204665, "loss": 0.0, "lr": 7.537531716968617e-07, "epoch": 3.10641780470525, "percentage": 62.13, "elapsed_time": "2:44:42", "remaining_time": "1:40:24", "throughput": 8671.97, "total_tokens": 85700368} +{"current_steps": 127160, "total_steps": 204665, "loss": 0.0, "lr": 7.536705209332059e-07, "epoch": 3.106539955537097, "percentage": 62.13, "elapsed_time": "2:44:42", "remaining_time": "1:40:23", "throughput": 8671.99, "total_tokens": 85703568} +{"current_steps": 127165, "total_steps": 204665, "loss": 0.0, "lr": 7.535878719609241e-07, "epoch": 3.1066621063689444, "percentage": 62.13, "elapsed_time": "2:44:43", "remaining_time": "1:40:23", "throughput": 8672.05, "total_tokens": 85707216} +{"current_steps": 127170, "total_steps": 204665, "loss": 0.0, "lr": 7.535052247806179e-07, "epoch": 3.1067842572007915, "percentage": 62.14, "elapsed_time": "2:44:43", "remaining_time": "1:40:22", "throughput": 8672.09, "total_tokens": 85710736} +{"current_steps": 127175, "total_steps": 204665, "loss": 0.0, "lr": 7.534225793928878e-07, "epoch": 3.1069064080326387, "percentage": 62.14, "elapsed_time": "2:44:43", "remaining_time": "1:40:22", "throughput": 8672.13, "total_tokens": 85714064} +{"current_steps": 127180, "total_steps": 204665, "loss": 0.0, "lr": 7.533399357983353e-07, "epoch": 3.107028558864486, "percentage": 62.14, "elapsed_time": "2:44:44", "remaining_time": "1:40:21", "throughput": 8672.15, "total_tokens": 85717328} +{"current_steps": 127185, "total_steps": 204665, "loss": 0.0, "lr": 7.532572939975608e-07, "epoch": 3.107150709696333, "percentage": 62.14, "elapsed_time": "2:44:44", "remaining_time": "1:40:21", "throughput": 8672.22, "total_tokens": 85721040} +{"current_steps": 127190, "total_steps": 204665, "loss": 0.0, "lr": 7.53174653991166e-07, "epoch": 3.1072728605281803, "percentage": 62.15, "elapsed_time": "2:44:44", "remaining_time": "1:40:21", "throughput": 8672.23, "total_tokens": 85724112} +{"current_steps": 127195, "total_steps": 204665, "loss": 0.0, "lr": 7.530920157797511e-07, "epoch": 3.1073950113600275, "percentage": 62.15, "elapsed_time": "2:44:45", "remaining_time": "1:40:20", "throughput": 8672.33, "total_tokens": 85728336} +{"current_steps": 127200, "total_steps": 204665, "loss": 0.0001, "lr": 7.530093793639174e-07, "epoch": 3.1075171621918747, "percentage": 62.15, "elapsed_time": "2:44:45", "remaining_time": "1:40:20", "throughput": 8672.39, "total_tokens": 85731920} +{"current_steps": 127205, "total_steps": 204665, "loss": 0.0, "lr": 7.529267447442664e-07, "epoch": 3.107639313023722, "percentage": 62.15, "elapsed_time": "2:44:45", "remaining_time": "1:40:19", "throughput": 8672.46, "total_tokens": 85735696} +{"current_steps": 127210, "total_steps": 204665, "loss": 0.0365, "lr": 7.528441119213984e-07, "epoch": 3.107761463855569, "percentage": 62.16, "elapsed_time": "2:44:46", "remaining_time": "1:40:19", "throughput": 8672.5, "total_tokens": 85739152} +{"current_steps": 127215, "total_steps": 204665, "loss": 0.0, "lr": 7.527614808959144e-07, "epoch": 3.107883614687416, "percentage": 62.16, "elapsed_time": "2:44:46", "remaining_time": "1:40:19", "throughput": 8672.53, "total_tokens": 85742416} +{"current_steps": 127220, "total_steps": 204665, "loss": 0.0, "lr": 7.526788516684155e-07, "epoch": 3.108005765519263, "percentage": 62.16, "elapsed_time": "2:44:47", "remaining_time": "1:40:18", "throughput": 8672.54, "total_tokens": 85745488} +{"current_steps": 127225, "total_steps": 204665, "loss": 0.0583, "lr": 7.525962242395022e-07, "epoch": 3.10812791635111, "percentage": 62.16, "elapsed_time": "2:44:47", "remaining_time": "1:40:18", "throughput": 8672.55, "total_tokens": 85748560} +{"current_steps": 127230, "total_steps": 204665, "loss": 0.0, "lr": 7.525135986097762e-07, "epoch": 3.1082500671829574, "percentage": 62.17, "elapsed_time": "2:44:47", "remaining_time": "1:40:17", "throughput": 8672.61, "total_tokens": 85752272} +{"current_steps": 127235, "total_steps": 204665, "loss": 0.0774, "lr": 7.524309747798374e-07, "epoch": 3.1083722180148046, "percentage": 62.17, "elapsed_time": "2:44:48", "remaining_time": "1:40:17", "throughput": 8672.63, "total_tokens": 85755472} +{"current_steps": 127240, "total_steps": 204665, "loss": 0.0, "lr": 7.523483527502876e-07, "epoch": 3.1084943688466518, "percentage": 62.17, "elapsed_time": "2:44:48", "remaining_time": "1:40:17", "throughput": 8672.64, "total_tokens": 85758480} +{"current_steps": 127245, "total_steps": 204665, "loss": 0.0, "lr": 7.522657325217267e-07, "epoch": 3.108616519678499, "percentage": 62.17, "elapsed_time": "2:44:48", "remaining_time": "1:40:16", "throughput": 8672.64, "total_tokens": 85761424} +{"current_steps": 127250, "total_steps": 204665, "loss": 0.0001, "lr": 7.521831140947566e-07, "epoch": 3.108738670510346, "percentage": 62.17, "elapsed_time": "2:44:49", "remaining_time": "1:40:16", "throughput": 8672.67, "total_tokens": 85764688} +{"current_steps": 127255, "total_steps": 204665, "loss": 0.0, "lr": 7.52100497469977e-07, "epoch": 3.1088608213421933, "percentage": 62.18, "elapsed_time": "2:44:49", "remaining_time": "1:40:15", "throughput": 8672.68, "total_tokens": 85767760} +{"current_steps": 127260, "total_steps": 204665, "loss": 0.0, "lr": 7.520178826479895e-07, "epoch": 3.1089829721740405, "percentage": 62.18, "elapsed_time": "2:44:49", "remaining_time": "1:40:15", "throughput": 8672.69, "total_tokens": 85770832} +{"current_steps": 127265, "total_steps": 204665, "loss": 0.0001, "lr": 7.519352696293948e-07, "epoch": 3.1091051230058877, "percentage": 62.18, "elapsed_time": "2:44:50", "remaining_time": "1:40:14", "throughput": 8672.75, "total_tokens": 85774544} +{"current_steps": 127270, "total_steps": 204665, "loss": 0.0325, "lr": 7.518526584147934e-07, "epoch": 3.109227273837735, "percentage": 62.18, "elapsed_time": "2:44:50", "remaining_time": "1:40:14", "throughput": 8672.8, "total_tokens": 85778064} +{"current_steps": 127275, "total_steps": 204665, "loss": 0.0, "lr": 7.517700490047864e-07, "epoch": 3.109349424669582, "percentage": 62.19, "elapsed_time": "2:44:50", "remaining_time": "1:40:14", "throughput": 8672.82, "total_tokens": 85781328} +{"current_steps": 127280, "total_steps": 204665, "loss": 0.0002, "lr": 7.516874413999739e-07, "epoch": 3.1094715755014293, "percentage": 62.19, "elapsed_time": "2:44:51", "remaining_time": "1:40:13", "throughput": 8672.83, "total_tokens": 85784400} +{"current_steps": 127285, "total_steps": 204665, "loss": 0.0, "lr": 7.516048356009577e-07, "epoch": 3.1095937263332765, "percentage": 62.19, "elapsed_time": "2:44:51", "remaining_time": "1:40:13", "throughput": 8672.86, "total_tokens": 85787600} +{"current_steps": 127290, "total_steps": 204665, "loss": 0.0, "lr": 7.515222316083374e-07, "epoch": 3.1097158771651237, "percentage": 62.19, "elapsed_time": "2:44:51", "remaining_time": "1:40:12", "throughput": 8672.88, "total_tokens": 85790864} +{"current_steps": 127295, "total_steps": 204665, "loss": 0.0, "lr": 7.514396294227143e-07, "epoch": 3.109838027996971, "percentage": 62.2, "elapsed_time": "2:44:52", "remaining_time": "1:40:12", "throughput": 8672.92, "total_tokens": 85794320} +{"current_steps": 127300, "total_steps": 204665, "loss": 0.0, "lr": 7.513570290446896e-07, "epoch": 3.109960178828818, "percentage": 62.2, "elapsed_time": "2:44:52", "remaining_time": "1:40:12", "throughput": 8672.95, "total_tokens": 85797584} +{"current_steps": 127305, "total_steps": 204665, "loss": 0.0, "lr": 7.512744304748629e-07, "epoch": 3.110082329660665, "percentage": 62.2, "elapsed_time": "2:44:52", "remaining_time": "1:40:11", "throughput": 8673.0, "total_tokens": 85801104} +{"current_steps": 127310, "total_steps": 204665, "loss": 0.0, "lr": 7.511918337138359e-07, "epoch": 3.110204480492512, "percentage": 62.2, "elapsed_time": "2:44:53", "remaining_time": "1:40:11", "throughput": 8673.02, "total_tokens": 85804368} +{"current_steps": 127315, "total_steps": 204665, "loss": 0.0001, "lr": 7.511092387622086e-07, "epoch": 3.110326631324359, "percentage": 62.21, "elapsed_time": "2:44:53", "remaining_time": "1:40:10", "throughput": 8673.05, "total_tokens": 85807696} +{"current_steps": 127320, "total_steps": 204665, "loss": 0.0001, "lr": 7.510266456205816e-07, "epoch": 3.1104487821562063, "percentage": 62.21, "elapsed_time": "2:44:53", "remaining_time": "1:40:10", "throughput": 8673.04, "total_tokens": 85810512} +{"current_steps": 127325, "total_steps": 204665, "loss": 0.0, "lr": 7.509440542895562e-07, "epoch": 3.1105709329880535, "percentage": 62.21, "elapsed_time": "2:44:54", "remaining_time": "1:40:09", "throughput": 8673.05, "total_tokens": 85813520} +{"current_steps": 127330, "total_steps": 204665, "loss": 0.0002, "lr": 7.508614647697324e-07, "epoch": 3.1106930838199007, "percentage": 62.21, "elapsed_time": "2:44:54", "remaining_time": "1:40:09", "throughput": 8673.08, "total_tokens": 85816784} +{"current_steps": 127335, "total_steps": 204665, "loss": 0.0, "lr": 7.507788770617111e-07, "epoch": 3.110815234651748, "percentage": 62.22, "elapsed_time": "2:44:54", "remaining_time": "1:40:09", "throughput": 8673.11, "total_tokens": 85820112} +{"current_steps": 127340, "total_steps": 204665, "loss": 0.0, "lr": 7.506962911660927e-07, "epoch": 3.110937385483595, "percentage": 62.22, "elapsed_time": "2:44:55", "remaining_time": "1:40:08", "throughput": 8673.11, "total_tokens": 85823056} +{"current_steps": 127345, "total_steps": 204665, "loss": 0.0, "lr": 7.506137070834784e-07, "epoch": 3.1110595363154423, "percentage": 62.22, "elapsed_time": "2:44:55", "remaining_time": "1:40:08", "throughput": 8673.15, "total_tokens": 85826448} +{"current_steps": 127350, "total_steps": 204665, "loss": 0.0, "lr": 7.505311248144677e-07, "epoch": 3.1111816871472895, "percentage": 62.22, "elapsed_time": "2:44:55", "remaining_time": "1:40:07", "throughput": 8673.16, "total_tokens": 85829456} +{"current_steps": 127355, "total_steps": 204665, "loss": 0.0, "lr": 7.504485443596619e-07, "epoch": 3.1113038379791367, "percentage": 62.23, "elapsed_time": "2:44:56", "remaining_time": "1:40:07", "throughput": 8673.19, "total_tokens": 85832720} +{"current_steps": 127360, "total_steps": 204665, "loss": 0.0, "lr": 7.503659657196617e-07, "epoch": 3.111425988810984, "percentage": 62.23, "elapsed_time": "2:44:56", "remaining_time": "1:40:07", "throughput": 8673.22, "total_tokens": 85836048} +{"current_steps": 127365, "total_steps": 204665, "loss": 0.0, "lr": 7.502833888950672e-07, "epoch": 3.111548139642831, "percentage": 62.23, "elapsed_time": "2:44:57", "remaining_time": "1:40:06", "throughput": 8673.25, "total_tokens": 85839376} +{"current_steps": 127370, "total_steps": 204665, "loss": 0.0, "lr": 7.502008138864791e-07, "epoch": 3.1116702904746782, "percentage": 62.23, "elapsed_time": "2:44:57", "remaining_time": "1:40:06", "throughput": 8673.26, "total_tokens": 85842448} +{"current_steps": 127375, "total_steps": 204665, "loss": 0.0442, "lr": 7.501182406944977e-07, "epoch": 3.1117924413065254, "percentage": 62.24, "elapsed_time": "2:44:57", "remaining_time": "1:40:05", "throughput": 8673.28, "total_tokens": 85845648} +{"current_steps": 127380, "total_steps": 204665, "loss": 0.0, "lr": 7.500356693197236e-07, "epoch": 3.1119145921383726, "percentage": 62.24, "elapsed_time": "2:44:58", "remaining_time": "1:40:05", "throughput": 8673.3, "total_tokens": 85848784} +{"current_steps": 127385, "total_steps": 204665, "loss": 0.0551, "lr": 7.499530997627576e-07, "epoch": 3.11203674297022, "percentage": 62.24, "elapsed_time": "2:44:58", "remaining_time": "1:40:05", "throughput": 8673.31, "total_tokens": 85851920} +{"current_steps": 127390, "total_steps": 204665, "loss": 0.0619, "lr": 7.498705320241998e-07, "epoch": 3.112158893802067, "percentage": 62.24, "elapsed_time": "2:44:58", "remaining_time": "1:40:04", "throughput": 8673.33, "total_tokens": 85855056} +{"current_steps": 127395, "total_steps": 204665, "loss": 0.0, "lr": 7.49787966104651e-07, "epoch": 3.1122810446339138, "percentage": 62.25, "elapsed_time": "2:44:59", "remaining_time": "1:40:04", "throughput": 8673.35, "total_tokens": 85858256} +{"current_steps": 127400, "total_steps": 204665, "loss": 0.0002, "lr": 7.49705402004711e-07, "epoch": 3.112403195465761, "percentage": 62.25, "elapsed_time": "2:44:59", "remaining_time": "1:40:03", "throughput": 8673.4, "total_tokens": 85861712} +{"current_steps": 127405, "total_steps": 204665, "loss": 0.0, "lr": 7.49622839724981e-07, "epoch": 3.112525346297608, "percentage": 62.25, "elapsed_time": "2:44:59", "remaining_time": "1:40:03", "throughput": 8673.43, "total_tokens": 85865104} +{"current_steps": 127410, "total_steps": 204665, "loss": 0.0002, "lr": 7.495402792660608e-07, "epoch": 3.1126474971294553, "percentage": 62.25, "elapsed_time": "2:45:00", "remaining_time": "1:40:02", "throughput": 8673.46, "total_tokens": 85868368} +{"current_steps": 127415, "total_steps": 204665, "loss": 0.0, "lr": 7.494577206285511e-07, "epoch": 3.1127696479613025, "percentage": 62.26, "elapsed_time": "2:45:00", "remaining_time": "1:40:02", "throughput": 8673.47, "total_tokens": 85871440} +{"current_steps": 127420, "total_steps": 204665, "loss": 0.0001, "lr": 7.493751638130523e-07, "epoch": 3.1128917987931497, "percentage": 62.26, "elapsed_time": "2:45:00", "remaining_time": "1:40:02", "throughput": 8673.56, "total_tokens": 85875472} +{"current_steps": 127425, "total_steps": 204665, "loss": 0.0001, "lr": 7.492926088201648e-07, "epoch": 3.113013949624997, "percentage": 62.26, "elapsed_time": "2:45:01", "remaining_time": "1:40:01", "throughput": 8673.58, "total_tokens": 85878672} +{"current_steps": 127430, "total_steps": 204665, "loss": 0.0, "lr": 7.49210055650489e-07, "epoch": 3.113136100456844, "percentage": 62.26, "elapsed_time": "2:45:01", "remaining_time": "1:40:01", "throughput": 8673.61, "total_tokens": 85881936} +{"current_steps": 127435, "total_steps": 204665, "loss": 0.0, "lr": 7.491275043046246e-07, "epoch": 3.1132582512886913, "percentage": 62.27, "elapsed_time": "2:45:01", "remaining_time": "1:40:00", "throughput": 8673.64, "total_tokens": 85885264} +{"current_steps": 127440, "total_steps": 204665, "loss": 0.0663, "lr": 7.49044954783173e-07, "epoch": 3.1133804021205385, "percentage": 62.27, "elapsed_time": "2:45:02", "remaining_time": "1:40:00", "throughput": 8673.65, "total_tokens": 85888400} +{"current_steps": 127445, "total_steps": 204665, "loss": 0.0, "lr": 7.489624070867337e-07, "epoch": 3.1135025529523856, "percentage": 62.27, "elapsed_time": "2:45:02", "remaining_time": "1:40:00", "throughput": 8673.71, "total_tokens": 85892048} +{"current_steps": 127450, "total_steps": 204665, "loss": 0.0, "lr": 7.48879861215907e-07, "epoch": 3.113624703784233, "percentage": 62.27, "elapsed_time": "2:45:02", "remaining_time": "1:39:59", "throughput": 8673.72, "total_tokens": 85895056} +{"current_steps": 127455, "total_steps": 204665, "loss": 0.0006, "lr": 7.487973171712942e-07, "epoch": 3.11374685461608, "percentage": 62.27, "elapsed_time": "2:45:03", "remaining_time": "1:39:59", "throughput": 8673.72, "total_tokens": 85898000} +{"current_steps": 127460, "total_steps": 204665, "loss": 0.0, "lr": 7.487147749534943e-07, "epoch": 3.113869005447927, "percentage": 62.28, "elapsed_time": "2:45:03", "remaining_time": "1:39:58", "throughput": 8673.77, "total_tokens": 85901584} +{"current_steps": 127465, "total_steps": 204665, "loss": 0.0, "lr": 7.486322345631086e-07, "epoch": 3.1139911562797744, "percentage": 62.28, "elapsed_time": "2:45:03", "remaining_time": "1:39:58", "throughput": 8673.79, "total_tokens": 85904784} +{"current_steps": 127470, "total_steps": 204665, "loss": 0.0, "lr": 7.485496960007367e-07, "epoch": 3.1141133071116216, "percentage": 62.28, "elapsed_time": "2:45:04", "remaining_time": "1:39:57", "throughput": 8673.81, "total_tokens": 85908048} +{"current_steps": 127475, "total_steps": 204665, "loss": 0.0, "lr": 7.484671592669789e-07, "epoch": 3.114235457943469, "percentage": 62.28, "elapsed_time": "2:45:04", "remaining_time": "1:39:57", "throughput": 8673.83, "total_tokens": 85911248} +{"current_steps": 127480, "total_steps": 204665, "loss": 0.0001, "lr": 7.483846243624359e-07, "epoch": 3.1143576087753155, "percentage": 62.29, "elapsed_time": "2:45:04", "remaining_time": "1:39:57", "throughput": 8673.88, "total_tokens": 85914768} +{"current_steps": 127485, "total_steps": 204665, "loss": 0.0927, "lr": 7.483020912877072e-07, "epoch": 3.1144797596071627, "percentage": 62.29, "elapsed_time": "2:45:05", "remaining_time": "1:39:56", "throughput": 8673.91, "total_tokens": 85918032} +{"current_steps": 127490, "total_steps": 204665, "loss": 0.0, "lr": 7.482195600433938e-07, "epoch": 3.11460191043901, "percentage": 62.29, "elapsed_time": "2:45:05", "remaining_time": "1:39:56", "throughput": 8673.96, "total_tokens": 85921552} +{"current_steps": 127495, "total_steps": 204665, "loss": 0.0, "lr": 7.481370306300949e-07, "epoch": 3.114724061270857, "percentage": 62.29, "elapsed_time": "2:45:06", "remaining_time": "1:39:55", "throughput": 8674.02, "total_tokens": 85925200} +{"current_steps": 127500, "total_steps": 204665, "loss": 0.0001, "lr": 7.48054503048412e-07, "epoch": 3.1148462121027043, "percentage": 62.3, "elapsed_time": "2:45:06", "remaining_time": "1:39:55", "throughput": 8674.05, "total_tokens": 85928528} +{"current_steps": 127505, "total_steps": 204665, "loss": 0.0, "lr": 7.479719772989439e-07, "epoch": 3.1149683629345515, "percentage": 62.3, "elapsed_time": "2:45:06", "remaining_time": "1:39:55", "throughput": 8674.1, "total_tokens": 85932112} +{"current_steps": 127510, "total_steps": 204665, "loss": 0.0004, "lr": 7.478894533822914e-07, "epoch": 3.1150905137663987, "percentage": 62.3, "elapsed_time": "2:45:07", "remaining_time": "1:39:54", "throughput": 8674.11, "total_tokens": 85935120} +{"current_steps": 127515, "total_steps": 204665, "loss": 0.0, "lr": 7.478069312990549e-07, "epoch": 3.115212664598246, "percentage": 62.3, "elapsed_time": "2:45:07", "remaining_time": "1:39:54", "throughput": 8674.13, "total_tokens": 85938320} +{"current_steps": 127520, "total_steps": 204665, "loss": 0.0313, "lr": 7.477244110498342e-07, "epoch": 3.115334815430093, "percentage": 62.31, "elapsed_time": "2:45:07", "remaining_time": "1:39:53", "throughput": 8674.15, "total_tokens": 85941456} +{"current_steps": 127525, "total_steps": 204665, "loss": 0.0676, "lr": 7.476418926352295e-07, "epoch": 3.1154569662619402, "percentage": 62.31, "elapsed_time": "2:45:08", "remaining_time": "1:39:53", "throughput": 8674.17, "total_tokens": 85944656} +{"current_steps": 127530, "total_steps": 204665, "loss": 0.0, "lr": 7.475593760558406e-07, "epoch": 3.1155791170937874, "percentage": 62.31, "elapsed_time": "2:45:08", "remaining_time": "1:39:53", "throughput": 8674.18, "total_tokens": 85947728} +{"current_steps": 127535, "total_steps": 204665, "loss": 0.0, "lr": 7.474768613122678e-07, "epoch": 3.1157012679256346, "percentage": 62.31, "elapsed_time": "2:45:08", "remaining_time": "1:39:52", "throughput": 8674.18, "total_tokens": 85950672} +{"current_steps": 127540, "total_steps": 204665, "loss": 0.0224, "lr": 7.473943484051115e-07, "epoch": 3.115823418757482, "percentage": 62.32, "elapsed_time": "2:45:09", "remaining_time": "1:39:52", "throughput": 8674.25, "total_tokens": 85954384} +{"current_steps": 127545, "total_steps": 204665, "loss": 0.0348, "lr": 7.473118373349709e-07, "epoch": 3.115945569589329, "percentage": 62.32, "elapsed_time": "2:45:09", "remaining_time": "1:39:51", "throughput": 8674.27, "total_tokens": 85957520} +{"current_steps": 127550, "total_steps": 204665, "loss": 0.0, "lr": 7.472293281024474e-07, "epoch": 3.116067720421176, "percentage": 62.32, "elapsed_time": "2:45:09", "remaining_time": "1:39:51", "throughput": 8674.29, "total_tokens": 85960720} +{"current_steps": 127555, "total_steps": 204665, "loss": 0.0, "lr": 7.471468207081394e-07, "epoch": 3.1161898712530234, "percentage": 62.32, "elapsed_time": "2:45:10", "remaining_time": "1:39:50", "throughput": 8674.32, "total_tokens": 85963984} +{"current_steps": 127560, "total_steps": 204665, "loss": 0.041, "lr": 7.470643151526483e-07, "epoch": 3.1163120220848706, "percentage": 62.33, "elapsed_time": "2:45:10", "remaining_time": "1:39:50", "throughput": 8674.33, "total_tokens": 85966992} +{"current_steps": 127565, "total_steps": 204665, "loss": 0.0, "lr": 7.469818114365732e-07, "epoch": 3.1164341729167178, "percentage": 62.33, "elapsed_time": "2:45:10", "remaining_time": "1:39:50", "throughput": 8674.34, "total_tokens": 85970128} +{"current_steps": 127570, "total_steps": 204665, "loss": 0.0, "lr": 7.468993095605143e-07, "epoch": 3.116556323748565, "percentage": 62.33, "elapsed_time": "2:45:11", "remaining_time": "1:39:49", "throughput": 8674.37, "total_tokens": 85973392} +{"current_steps": 127575, "total_steps": 204665, "loss": 0.0, "lr": 7.46816809525072e-07, "epoch": 3.1166784745804117, "percentage": 62.33, "elapsed_time": "2:45:11", "remaining_time": "1:39:49", "throughput": 8674.41, "total_tokens": 85976784} +{"current_steps": 127580, "total_steps": 204665, "loss": 0.0, "lr": 7.467343113308459e-07, "epoch": 3.116800625412259, "percentage": 62.34, "elapsed_time": "2:45:11", "remaining_time": "1:39:48", "throughput": 8674.46, "total_tokens": 85980432} +{"current_steps": 127585, "total_steps": 204665, "loss": 0.0788, "lr": 7.466518149784362e-07, "epoch": 3.116922776244106, "percentage": 62.34, "elapsed_time": "2:45:12", "remaining_time": "1:39:48", "throughput": 8674.53, "total_tokens": 85984144} +{"current_steps": 127590, "total_steps": 204665, "loss": 0.0, "lr": 7.465693204684422e-07, "epoch": 3.1170449270759533, "percentage": 62.34, "elapsed_time": "2:45:12", "remaining_time": "1:39:48", "throughput": 8674.54, "total_tokens": 85987280} +{"current_steps": 127595, "total_steps": 204665, "loss": 0.0, "lr": 7.464868278014647e-07, "epoch": 3.1171670779078005, "percentage": 62.34, "elapsed_time": "2:45:12", "remaining_time": "1:39:47", "throughput": 8674.57, "total_tokens": 85990544} +{"current_steps": 127600, "total_steps": 204665, "loss": 0.0, "lr": 7.464043369781027e-07, "epoch": 3.1172892287396476, "percentage": 62.35, "elapsed_time": "2:45:13", "remaining_time": "1:39:47", "throughput": 8674.62, "total_tokens": 85994064} +{"current_steps": 127605, "total_steps": 204665, "loss": 0.0, "lr": 7.463218479989568e-07, "epoch": 3.117411379571495, "percentage": 62.35, "elapsed_time": "2:45:13", "remaining_time": "1:39:46", "throughput": 8674.68, "total_tokens": 85997712} +{"current_steps": 127610, "total_steps": 204665, "loss": 0.0, "lr": 7.462393608646269e-07, "epoch": 3.117533530403342, "percentage": 62.35, "elapsed_time": "2:45:14", "remaining_time": "1:39:46", "throughput": 8674.74, "total_tokens": 86001488} +{"current_steps": 127615, "total_steps": 204665, "loss": 0.0, "lr": 7.461568755757122e-07, "epoch": 3.117655681235189, "percentage": 62.35, "elapsed_time": "2:45:14", "remaining_time": "1:39:45", "throughput": 8674.78, "total_tokens": 86004880} +{"current_steps": 127620, "total_steps": 204665, "loss": 0.0, "lr": 7.460743921328134e-07, "epoch": 3.1177778320670364, "percentage": 62.36, "elapsed_time": "2:45:14", "remaining_time": "1:39:45", "throughput": 8674.82, "total_tokens": 86008272} +{"current_steps": 127625, "total_steps": 204665, "loss": 0.0864, "lr": 7.459919105365297e-07, "epoch": 3.1178999828988836, "percentage": 62.36, "elapsed_time": "2:45:15", "remaining_time": "1:39:45", "throughput": 8674.83, "total_tokens": 86011408} +{"current_steps": 127630, "total_steps": 204665, "loss": 0.0001, "lr": 7.459094307874609e-07, "epoch": 3.118022133730731, "percentage": 62.36, "elapsed_time": "2:45:15", "remaining_time": "1:39:44", "throughput": 8674.9, "total_tokens": 86015184} +{"current_steps": 127635, "total_steps": 204665, "loss": 0.0003, "lr": 7.458269528862075e-07, "epoch": 3.118144284562578, "percentage": 62.36, "elapsed_time": "2:45:15", "remaining_time": "1:39:44", "throughput": 8674.91, "total_tokens": 86018256} +{"current_steps": 127640, "total_steps": 204665, "loss": 0.058, "lr": 7.457444768333686e-07, "epoch": 3.118266435394425, "percentage": 62.37, "elapsed_time": "2:45:16", "remaining_time": "1:39:43", "throughput": 8674.93, "total_tokens": 86021392} +{"current_steps": 127645, "total_steps": 204665, "loss": 0.0, "lr": 7.456620026295446e-07, "epoch": 3.1183885862262724, "percentage": 62.37, "elapsed_time": "2:45:16", "remaining_time": "1:39:43", "throughput": 8675.01, "total_tokens": 86025296} +{"current_steps": 127650, "total_steps": 204665, "loss": 0.0, "lr": 7.455795302753345e-07, "epoch": 3.1185107370581195, "percentage": 62.37, "elapsed_time": "2:45:16", "remaining_time": "1:39:43", "throughput": 8675.06, "total_tokens": 86028880} +{"current_steps": 127655, "total_steps": 204665, "loss": 0.0, "lr": 7.454970597713388e-07, "epoch": 3.1186328878899667, "percentage": 62.37, "elapsed_time": "2:45:17", "remaining_time": "1:39:42", "throughput": 8675.1, "total_tokens": 86032272} +{"current_steps": 127660, "total_steps": 204665, "loss": 0.0, "lr": 7.454145911181566e-07, "epoch": 3.1187550387218135, "percentage": 62.38, "elapsed_time": "2:45:17", "remaining_time": "1:39:42", "throughput": 8675.1, "total_tokens": 86035280} +{"current_steps": 127665, "total_steps": 204665, "loss": 0.0, "lr": 7.453321243163879e-07, "epoch": 3.1188771895536607, "percentage": 62.38, "elapsed_time": "2:45:17", "remaining_time": "1:39:41", "throughput": 8675.12, "total_tokens": 86038416} +{"current_steps": 127670, "total_steps": 204665, "loss": 0.0002, "lr": 7.452496593666329e-07, "epoch": 3.118999340385508, "percentage": 62.38, "elapsed_time": "2:45:18", "remaining_time": "1:39:41", "throughput": 8675.13, "total_tokens": 86041488} +{"current_steps": 127675, "total_steps": 204665, "loss": 0.0003, "lr": 7.451671962694907e-07, "epoch": 3.119121491217355, "percentage": 62.38, "elapsed_time": "2:45:18", "remaining_time": "1:39:41", "throughput": 8675.17, "total_tokens": 86044880} +{"current_steps": 127680, "total_steps": 204665, "loss": 0.0, "lr": 7.450847350255613e-07, "epoch": 3.1192436420492022, "percentage": 62.38, "elapsed_time": "2:45:18", "remaining_time": "1:39:40", "throughput": 8675.22, "total_tokens": 86048400} +{"current_steps": 127685, "total_steps": 204665, "loss": 0.0, "lr": 7.450022756354443e-07, "epoch": 3.1193657928810494, "percentage": 62.39, "elapsed_time": "2:45:19", "remaining_time": "1:39:40", "throughput": 8675.25, "total_tokens": 86051728} +{"current_steps": 127690, "total_steps": 204665, "loss": 0.0, "lr": 7.449198180997389e-07, "epoch": 3.1194879437128966, "percentage": 62.39, "elapsed_time": "2:45:19", "remaining_time": "1:39:39", "throughput": 8675.28, "total_tokens": 86054992} +{"current_steps": 127695, "total_steps": 204665, "loss": 0.0, "lr": 7.448373624190458e-07, "epoch": 3.119610094544744, "percentage": 62.39, "elapsed_time": "2:45:19", "remaining_time": "1:39:39", "throughput": 8675.28, "total_tokens": 86057936} +{"current_steps": 127700, "total_steps": 204665, "loss": 0.0609, "lr": 7.447549085939636e-07, "epoch": 3.119732245376591, "percentage": 62.39, "elapsed_time": "2:45:20", "remaining_time": "1:39:38", "throughput": 8675.3, "total_tokens": 86061200} +{"current_steps": 127705, "total_steps": 204665, "loss": 0.0001, "lr": 7.446724566250927e-07, "epoch": 3.119854396208438, "percentage": 62.4, "elapsed_time": "2:45:20", "remaining_time": "1:39:38", "throughput": 8675.34, "total_tokens": 86064528} +{"current_steps": 127710, "total_steps": 204665, "loss": 0.0001, "lr": 7.44590006513032e-07, "epoch": 3.1199765470402854, "percentage": 62.4, "elapsed_time": "2:45:20", "remaining_time": "1:39:38", "throughput": 8675.36, "total_tokens": 86067728} +{"current_steps": 127715, "total_steps": 204665, "loss": 0.0001, "lr": 7.445075582583819e-07, "epoch": 3.1200986978721326, "percentage": 62.4, "elapsed_time": "2:45:21", "remaining_time": "1:39:37", "throughput": 8675.38, "total_tokens": 86070928} +{"current_steps": 127720, "total_steps": 204665, "loss": 0.0, "lr": 7.444251118617411e-07, "epoch": 3.1202208487039798, "percentage": 62.4, "elapsed_time": "2:45:21", "remaining_time": "1:39:37", "throughput": 8675.41, "total_tokens": 86074256} +{"current_steps": 127725, "total_steps": 204665, "loss": 0.0, "lr": 7.443426673237098e-07, "epoch": 3.120342999535827, "percentage": 62.41, "elapsed_time": "2:45:21", "remaining_time": "1:39:36", "throughput": 8675.44, "total_tokens": 86077520} +{"current_steps": 127730, "total_steps": 204665, "loss": 0.0, "lr": 7.442602246448875e-07, "epoch": 3.120465150367674, "percentage": 62.41, "elapsed_time": "2:45:22", "remaining_time": "1:39:36", "throughput": 8675.46, "total_tokens": 86080720} +{"current_steps": 127735, "total_steps": 204665, "loss": 0.0002, "lr": 7.441777838258736e-07, "epoch": 3.1205873011995213, "percentage": 62.41, "elapsed_time": "2:45:22", "remaining_time": "1:39:36", "throughput": 8675.52, "total_tokens": 86084304} +{"current_steps": 127740, "total_steps": 204665, "loss": 0.0, "lr": 7.440953448672678e-07, "epoch": 3.1207094520313685, "percentage": 62.41, "elapsed_time": "2:45:23", "remaining_time": "1:39:35", "throughput": 8675.54, "total_tokens": 86087504} +{"current_steps": 127745, "total_steps": 204665, "loss": 0.0, "lr": 7.440129077696691e-07, "epoch": 3.1208316028632157, "percentage": 62.42, "elapsed_time": "2:45:23", "remaining_time": "1:39:35", "throughput": 8675.57, "total_tokens": 86090832} +{"current_steps": 127750, "total_steps": 204665, "loss": 0.0, "lr": 7.439304725336778e-07, "epoch": 3.1209537536950624, "percentage": 62.42, "elapsed_time": "2:45:23", "remaining_time": "1:39:34", "throughput": 8675.62, "total_tokens": 86094352} +{"current_steps": 127755, "total_steps": 204665, "loss": 0.0836, "lr": 7.438480391598925e-07, "epoch": 3.1210759045269096, "percentage": 62.42, "elapsed_time": "2:45:24", "remaining_time": "1:39:34", "throughput": 8675.64, "total_tokens": 86097488} +{"current_steps": 127760, "total_steps": 204665, "loss": 0.0, "lr": 7.437656076489133e-07, "epoch": 3.121198055358757, "percentage": 62.42, "elapsed_time": "2:45:24", "remaining_time": "1:39:33", "throughput": 8675.66, "total_tokens": 86100624} +{"current_steps": 127765, "total_steps": 204665, "loss": 0.0052, "lr": 7.436831780013398e-07, "epoch": 3.121320206190604, "percentage": 62.43, "elapsed_time": "2:45:24", "remaining_time": "1:39:33", "throughput": 8675.72, "total_tokens": 86104336} +{"current_steps": 127770, "total_steps": 204665, "loss": 0.0003, "lr": 7.436007502177708e-07, "epoch": 3.121442357022451, "percentage": 62.43, "elapsed_time": "2:45:25", "remaining_time": "1:39:33", "throughput": 8675.79, "total_tokens": 86108240} +{"current_steps": 127775, "total_steps": 204665, "loss": 0.0, "lr": 7.435183242988066e-07, "epoch": 3.1215645078542984, "percentage": 62.43, "elapsed_time": "2:45:25", "remaining_time": "1:39:32", "throughput": 8675.81, "total_tokens": 86111440} +{"current_steps": 127780, "total_steps": 204665, "loss": 0.0, "lr": 7.434359002450458e-07, "epoch": 3.1216866586861456, "percentage": 62.43, "elapsed_time": "2:45:25", "remaining_time": "1:39:32", "throughput": 8675.88, "total_tokens": 86115216} +{"current_steps": 127785, "total_steps": 204665, "loss": 0.0002, "lr": 7.433534780570881e-07, "epoch": 3.1218088095179928, "percentage": 62.44, "elapsed_time": "2:45:26", "remaining_time": "1:39:31", "throughput": 8675.93, "total_tokens": 86118672} +{"current_steps": 127790, "total_steps": 204665, "loss": 0.0, "lr": 7.432710577355332e-07, "epoch": 3.12193096034984, "percentage": 62.44, "elapsed_time": "2:45:26", "remaining_time": "1:39:31", "throughput": 8675.92, "total_tokens": 86121552} +{"current_steps": 127795, "total_steps": 204665, "loss": 0.131, "lr": 7.431886392809799e-07, "epoch": 3.122053111181687, "percentage": 62.44, "elapsed_time": "2:45:26", "remaining_time": "1:39:31", "throughput": 8675.97, "total_tokens": 86125136} +{"current_steps": 127800, "total_steps": 204665, "loss": 0.0, "lr": 7.431062226940281e-07, "epoch": 3.1221752620135343, "percentage": 62.44, "elapsed_time": "2:45:27", "remaining_time": "1:39:30", "throughput": 8676.0, "total_tokens": 86128336} +{"current_steps": 127805, "total_steps": 204665, "loss": 0.0, "lr": 7.430238079752768e-07, "epoch": 3.1222974128453815, "percentage": 62.45, "elapsed_time": "2:45:27", "remaining_time": "1:39:30", "throughput": 8676.02, "total_tokens": 86131536} +{"current_steps": 127810, "total_steps": 204665, "loss": 0.0, "lr": 7.429413951253259e-07, "epoch": 3.1224195636772287, "percentage": 62.45, "elapsed_time": "2:45:27", "remaining_time": "1:39:29", "throughput": 8676.03, "total_tokens": 86134672} +{"current_steps": 127815, "total_steps": 204665, "loss": 0.0, "lr": 7.428589841447737e-07, "epoch": 3.122541714509076, "percentage": 62.45, "elapsed_time": "2:45:28", "remaining_time": "1:39:29", "throughput": 8676.11, "total_tokens": 86138512} +{"current_steps": 127820, "total_steps": 204665, "loss": 0.0, "lr": 7.427765750342201e-07, "epoch": 3.122663865340923, "percentage": 62.45, "elapsed_time": "2:45:28", "remaining_time": "1:39:29", "throughput": 8676.12, "total_tokens": 86141648} +{"current_steps": 127825, "total_steps": 204665, "loss": 0.0, "lr": 7.42694167794265e-07, "epoch": 3.1227860161727703, "percentage": 62.46, "elapsed_time": "2:45:28", "remaining_time": "1:39:28", "throughput": 8676.17, "total_tokens": 86145168} +{"current_steps": 127830, "total_steps": 204665, "loss": 0.0001, "lr": 7.426117624255068e-07, "epoch": 3.1229081670046175, "percentage": 62.46, "elapsed_time": "2:45:29", "remaining_time": "1:39:28", "throughput": 8676.19, "total_tokens": 86148304} +{"current_steps": 127835, "total_steps": 204665, "loss": 0.0, "lr": 7.425293589285453e-07, "epoch": 3.1230303178364647, "percentage": 62.46, "elapsed_time": "2:45:29", "remaining_time": "1:39:27", "throughput": 8676.21, "total_tokens": 86151440} +{"current_steps": 127840, "total_steps": 204665, "loss": 0.0, "lr": 7.424469573039793e-07, "epoch": 3.1231524686683114, "percentage": 62.46, "elapsed_time": "2:45:29", "remaining_time": "1:39:27", "throughput": 8676.24, "total_tokens": 86154832} +{"current_steps": 127845, "total_steps": 204665, "loss": 0.1155, "lr": 7.423645575524087e-07, "epoch": 3.1232746195001586, "percentage": 62.47, "elapsed_time": "2:45:30", "remaining_time": "1:39:26", "throughput": 8676.3, "total_tokens": 86158480} +{"current_steps": 127850, "total_steps": 204665, "loss": 0.0, "lr": 7.422821596744318e-07, "epoch": 3.123396770332006, "percentage": 62.47, "elapsed_time": "2:45:30", "remaining_time": "1:39:26", "throughput": 8676.36, "total_tokens": 86162128} +{"current_steps": 127855, "total_steps": 204665, "loss": 0.0, "lr": 7.421997636706486e-07, "epoch": 3.123518921163853, "percentage": 62.47, "elapsed_time": "2:45:31", "remaining_time": "1:39:26", "throughput": 8676.39, "total_tokens": 86165456} +{"current_steps": 127860, "total_steps": 204665, "loss": 0.0978, "lr": 7.421173695416582e-07, "epoch": 3.1236410719957, "percentage": 62.47, "elapsed_time": "2:45:31", "remaining_time": "1:39:25", "throughput": 8676.43, "total_tokens": 86168912} +{"current_steps": 127865, "total_steps": 204665, "loss": 0.0001, "lr": 7.420349772880592e-07, "epoch": 3.1237632228275474, "percentage": 62.48, "elapsed_time": "2:45:31", "remaining_time": "1:39:25", "throughput": 8676.48, "total_tokens": 86172432} +{"current_steps": 127870, "total_steps": 204665, "loss": 0.0, "lr": 7.419525869104518e-07, "epoch": 3.1238853736593946, "percentage": 62.48, "elapsed_time": "2:45:32", "remaining_time": "1:39:24", "throughput": 8676.5, "total_tokens": 86175568} +{"current_steps": 127875, "total_steps": 204665, "loss": 0.0, "lr": 7.418701984094341e-07, "epoch": 3.1240075244912417, "percentage": 62.48, "elapsed_time": "2:45:32", "remaining_time": "1:39:24", "throughput": 8676.52, "total_tokens": 86178832} +{"current_steps": 127880, "total_steps": 204665, "loss": 0.0, "lr": 7.41787811785606e-07, "epoch": 3.124129675323089, "percentage": 62.48, "elapsed_time": "2:45:32", "remaining_time": "1:39:24", "throughput": 8676.54, "total_tokens": 86181968} +{"current_steps": 127885, "total_steps": 204665, "loss": 0.0, "lr": 7.417054270395664e-07, "epoch": 3.124251826154936, "percentage": 62.49, "elapsed_time": "2:45:33", "remaining_time": "1:39:23", "throughput": 8676.57, "total_tokens": 86185360} +{"current_steps": 127890, "total_steps": 204665, "loss": 0.0, "lr": 7.416230441719143e-07, "epoch": 3.1243739769867833, "percentage": 62.49, "elapsed_time": "2:45:33", "remaining_time": "1:39:23", "throughput": 8676.62, "total_tokens": 86188944} +{"current_steps": 127895, "total_steps": 204665, "loss": 0.0, "lr": 7.415406631832493e-07, "epoch": 3.1244961278186305, "percentage": 62.49, "elapsed_time": "2:45:33", "remaining_time": "1:39:22", "throughput": 8676.66, "total_tokens": 86192336} +{"current_steps": 127900, "total_steps": 204665, "loss": 0.0001, "lr": 7.414582840741696e-07, "epoch": 3.1246182786504777, "percentage": 62.49, "elapsed_time": "2:45:34", "remaining_time": "1:39:22", "throughput": 8676.75, "total_tokens": 86196368} +{"current_steps": 127905, "total_steps": 204665, "loss": 0.0, "lr": 7.413759068452752e-07, "epoch": 3.124740429482325, "percentage": 62.49, "elapsed_time": "2:45:34", "remaining_time": "1:39:22", "throughput": 8676.78, "total_tokens": 86199760} +{"current_steps": 127910, "total_steps": 204665, "loss": 0.0339, "lr": 7.412935314971643e-07, "epoch": 3.124862580314172, "percentage": 62.5, "elapsed_time": "2:45:34", "remaining_time": "1:39:21", "throughput": 8676.83, "total_tokens": 86203216} +{"current_steps": 127915, "total_steps": 204665, "loss": 0.0, "lr": 7.412111580304366e-07, "epoch": 3.1249847311460193, "percentage": 62.5, "elapsed_time": "2:45:35", "remaining_time": "1:39:21", "throughput": 8676.83, "total_tokens": 86206224} +{"current_steps": 127920, "total_steps": 204665, "loss": 0.0001, "lr": 7.411287864456912e-07, "epoch": 3.1251068819778665, "percentage": 62.5, "elapsed_time": "2:45:35", "remaining_time": "1:39:20", "throughput": 8676.87, "total_tokens": 86209616} +{"current_steps": 127925, "total_steps": 204665, "loss": 0.0001, "lr": 7.410464167435265e-07, "epoch": 3.125229032809713, "percentage": 62.5, "elapsed_time": "2:45:35", "remaining_time": "1:39:20", "throughput": 8676.89, "total_tokens": 86212880} +{"current_steps": 127930, "total_steps": 204665, "loss": 0.0, "lr": 7.409640489245423e-07, "epoch": 3.1253511836415604, "percentage": 62.51, "elapsed_time": "2:45:36", "remaining_time": "1:39:19", "throughput": 8676.94, "total_tokens": 86216400} +{"current_steps": 127935, "total_steps": 204665, "loss": 0.0, "lr": 7.408816829893371e-07, "epoch": 3.1254733344734076, "percentage": 62.51, "elapsed_time": "2:45:36", "remaining_time": "1:39:19", "throughput": 8676.95, "total_tokens": 86219472} +{"current_steps": 127940, "total_steps": 204665, "loss": 0.0001, "lr": 7.407993189385098e-07, "epoch": 3.1255954853052548, "percentage": 62.51, "elapsed_time": "2:45:36", "remaining_time": "1:39:19", "throughput": 8677.01, "total_tokens": 86223184} +{"current_steps": 127945, "total_steps": 204665, "loss": 0.0588, "lr": 7.407169567726599e-07, "epoch": 3.125717636137102, "percentage": 62.51, "elapsed_time": "2:45:37", "remaining_time": "1:39:18", "throughput": 8677.03, "total_tokens": 86226320} +{"current_steps": 127950, "total_steps": 204665, "loss": 0.0, "lr": 7.406345964923857e-07, "epoch": 3.125839786968949, "percentage": 62.52, "elapsed_time": "2:45:37", "remaining_time": "1:39:18", "throughput": 8677.07, "total_tokens": 86229840} +{"current_steps": 127955, "total_steps": 204665, "loss": 0.0, "lr": 7.40552238098287e-07, "epoch": 3.1259619378007963, "percentage": 62.52, "elapsed_time": "2:45:38", "remaining_time": "1:39:17", "throughput": 8677.15, "total_tokens": 86233744} +{"current_steps": 127960, "total_steps": 204665, "loss": 0.0002, "lr": 7.404698815909616e-07, "epoch": 3.1260840886326435, "percentage": 62.52, "elapsed_time": "2:45:38", "remaining_time": "1:39:17", "throughput": 8677.2, "total_tokens": 86237264} +{"current_steps": 127965, "total_steps": 204665, "loss": 0.0024, "lr": 7.403875269710093e-07, "epoch": 3.1262062394644907, "percentage": 62.52, "elapsed_time": "2:45:38", "remaining_time": "1:39:17", "throughput": 8677.25, "total_tokens": 86240720} +{"current_steps": 127970, "total_steps": 204665, "loss": 0.0, "lr": 7.403051742390285e-07, "epoch": 3.126328390296338, "percentage": 62.53, "elapsed_time": "2:45:39", "remaining_time": "1:39:16", "throughput": 8677.26, "total_tokens": 86243856} +{"current_steps": 127975, "total_steps": 204665, "loss": 0.0345, "lr": 7.402228233956184e-07, "epoch": 3.126450541128185, "percentage": 62.53, "elapsed_time": "2:45:39", "remaining_time": "1:39:16", "throughput": 8677.3, "total_tokens": 86247248} +{"current_steps": 127980, "total_steps": 204665, "loss": 0.0, "lr": 7.401404744413782e-07, "epoch": 3.1265726919600323, "percentage": 62.53, "elapsed_time": "2:45:39", "remaining_time": "1:39:15", "throughput": 8677.35, "total_tokens": 86250832} +{"current_steps": 127985, "total_steps": 204665, "loss": 0.0, "lr": 7.40058127376906e-07, "epoch": 3.1266948427918795, "percentage": 62.53, "elapsed_time": "2:45:40", "remaining_time": "1:39:15", "throughput": 8677.44, "total_tokens": 86254800} +{"current_steps": 127990, "total_steps": 204665, "loss": 0.0, "lr": 7.399757822028011e-07, "epoch": 3.1268169936237267, "percentage": 62.54, "elapsed_time": "2:45:40", "remaining_time": "1:39:15", "throughput": 8677.48, "total_tokens": 86258256} +{"current_steps": 127995, "total_steps": 204665, "loss": 0.0, "lr": 7.398934389196622e-07, "epoch": 3.126939144455574, "percentage": 62.54, "elapsed_time": "2:45:40", "remaining_time": "1:39:14", "throughput": 8677.5, "total_tokens": 86261456} +{"current_steps": 128000, "total_steps": 204665, "loss": 0.0, "lr": 7.398110975280884e-07, "epoch": 3.127061295287421, "percentage": 62.54, "elapsed_time": "2:45:41", "remaining_time": "1:39:14", "throughput": 8677.58, "total_tokens": 86265424} +{"current_steps": 128005, "total_steps": 204665, "loss": 0.0, "lr": 7.39728758028678e-07, "epoch": 3.1271834461192682, "percentage": 62.54, "elapsed_time": "2:45:41", "remaining_time": "1:39:13", "throughput": 8677.63, "total_tokens": 86269008} +{"current_steps": 128010, "total_steps": 204665, "loss": 0.0002, "lr": 7.3964642042203e-07, "epoch": 3.1273055969511154, "percentage": 62.55, "elapsed_time": "2:45:41", "remaining_time": "1:39:13", "throughput": 8677.67, "total_tokens": 86272464} +{"current_steps": 128015, "total_steps": 204665, "loss": 0.0, "lr": 7.395640847087436e-07, "epoch": 3.1274277477829626, "percentage": 62.55, "elapsed_time": "2:45:42", "remaining_time": "1:39:12", "throughput": 8677.71, "total_tokens": 86275856} +{"current_steps": 128020, "total_steps": 204665, "loss": 0.0447, "lr": 7.394817508894169e-07, "epoch": 3.1275498986148094, "percentage": 62.55, "elapsed_time": "2:45:42", "remaining_time": "1:39:12", "throughput": 8677.74, "total_tokens": 86279184} +{"current_steps": 128025, "total_steps": 204665, "loss": 0.0, "lr": 7.393994189646493e-07, "epoch": 3.1276720494466566, "percentage": 62.55, "elapsed_time": "2:45:42", "remaining_time": "1:39:12", "throughput": 8677.76, "total_tokens": 86282320} +{"current_steps": 128030, "total_steps": 204665, "loss": 0.0, "lr": 7.393170889350388e-07, "epoch": 3.1277942002785037, "percentage": 62.56, "elapsed_time": "2:45:43", "remaining_time": "1:39:11", "throughput": 8677.8, "total_tokens": 86285712} +{"current_steps": 128035, "total_steps": 204665, "loss": 0.0, "lr": 7.392347608011849e-07, "epoch": 3.127916351110351, "percentage": 62.56, "elapsed_time": "2:45:43", "remaining_time": "1:39:11", "throughput": 8677.88, "total_tokens": 86289616} +{"current_steps": 128040, "total_steps": 204665, "loss": 0.0, "lr": 7.391524345636859e-07, "epoch": 3.128038501942198, "percentage": 62.56, "elapsed_time": "2:45:43", "remaining_time": "1:39:10", "throughput": 8677.92, "total_tokens": 86293072} +{"current_steps": 128045, "total_steps": 204665, "loss": 0.0, "lr": 7.390701102231405e-07, "epoch": 3.1281606527740453, "percentage": 62.56, "elapsed_time": "2:45:44", "remaining_time": "1:39:10", "throughput": 8677.97, "total_tokens": 86296592} +{"current_steps": 128050, "total_steps": 204665, "loss": 0.0, "lr": 7.389877877801476e-07, "epoch": 3.1282828036058925, "percentage": 62.57, "elapsed_time": "2:45:44", "remaining_time": "1:39:10", "throughput": 8677.99, "total_tokens": 86299792} +{"current_steps": 128055, "total_steps": 204665, "loss": 0.0, "lr": 7.389054672353054e-07, "epoch": 3.1284049544377397, "percentage": 62.57, "elapsed_time": "2:45:45", "remaining_time": "1:39:09", "throughput": 8678.05, "total_tokens": 86303568} +{"current_steps": 128060, "total_steps": 204665, "loss": 0.0, "lr": 7.388231485892132e-07, "epoch": 3.128527105269587, "percentage": 62.57, "elapsed_time": "2:45:45", "remaining_time": "1:39:09", "throughput": 8678.09, "total_tokens": 86306960} +{"current_steps": 128065, "total_steps": 204665, "loss": 0.0, "lr": 7.38740831842469e-07, "epoch": 3.128649256101434, "percentage": 62.57, "elapsed_time": "2:45:45", "remaining_time": "1:39:08", "throughput": 8678.14, "total_tokens": 86310544} +{"current_steps": 128070, "total_steps": 204665, "loss": 0.0553, "lr": 7.386585169956717e-07, "epoch": 3.1287714069332813, "percentage": 62.58, "elapsed_time": "2:45:46", "remaining_time": "1:39:08", "throughput": 8678.17, "total_tokens": 86313808} +{"current_steps": 128075, "total_steps": 204665, "loss": 0.0, "lr": 7.385762040494203e-07, "epoch": 3.1288935577651285, "percentage": 62.58, "elapsed_time": "2:45:46", "remaining_time": "1:39:08", "throughput": 8678.2, "total_tokens": 86317200} +{"current_steps": 128080, "total_steps": 204665, "loss": 0.0, "lr": 7.384938930043128e-07, "epoch": 3.1290157085969756, "percentage": 62.58, "elapsed_time": "2:45:46", "remaining_time": "1:39:07", "throughput": 8678.25, "total_tokens": 86320784} +{"current_steps": 128085, "total_steps": 204665, "loss": 0.0, "lr": 7.384115838609483e-07, "epoch": 3.129137859428823, "percentage": 62.58, "elapsed_time": "2:45:47", "remaining_time": "1:39:07", "throughput": 8678.3, "total_tokens": 86324240} +{"current_steps": 128090, "total_steps": 204665, "loss": 0.0, "lr": 7.38329276619925e-07, "epoch": 3.12926001026067, "percentage": 62.59, "elapsed_time": "2:45:47", "remaining_time": "1:39:06", "throughput": 8678.33, "total_tokens": 86327632} +{"current_steps": 128095, "total_steps": 204665, "loss": 0.0439, "lr": 7.382469712818413e-07, "epoch": 3.129382161092517, "percentage": 62.59, "elapsed_time": "2:45:47", "remaining_time": "1:39:06", "throughput": 8678.36, "total_tokens": 86330960} +{"current_steps": 128100, "total_steps": 204665, "loss": 0.0949, "lr": 7.381646678472965e-07, "epoch": 3.1295043119243644, "percentage": 62.59, "elapsed_time": "2:45:48", "remaining_time": "1:39:06", "throughput": 8678.4, "total_tokens": 86334352} +{"current_steps": 128105, "total_steps": 204665, "loss": 0.0, "lr": 7.380823663168882e-07, "epoch": 3.129626462756211, "percentage": 62.59, "elapsed_time": "2:45:48", "remaining_time": "1:39:05", "throughput": 8678.45, "total_tokens": 86338000} +{"current_steps": 128110, "total_steps": 204665, "loss": 0.0439, "lr": 7.380000666912158e-07, "epoch": 3.1297486135880583, "percentage": 62.59, "elapsed_time": "2:45:48", "remaining_time": "1:39:05", "throughput": 8678.53, "total_tokens": 86341840} +{"current_steps": 128115, "total_steps": 204665, "loss": 0.0, "lr": 7.379177689708771e-07, "epoch": 3.1298707644199055, "percentage": 62.6, "elapsed_time": "2:45:49", "remaining_time": "1:39:04", "throughput": 8678.55, "total_tokens": 86345040} +{"current_steps": 128120, "total_steps": 204665, "loss": 0.0408, "lr": 7.378354731564711e-07, "epoch": 3.1299929152517527, "percentage": 62.6, "elapsed_time": "2:45:49", "remaining_time": "1:39:04", "throughput": 8678.61, "total_tokens": 86348688} +{"current_steps": 128125, "total_steps": 204665, "loss": 0.0332, "lr": 7.377531792485958e-07, "epoch": 3.1301150660836, "percentage": 62.6, "elapsed_time": "2:45:49", "remaining_time": "1:39:03", "throughput": 8678.62, "total_tokens": 86351760} +{"current_steps": 128130, "total_steps": 204665, "loss": 0.0, "lr": 7.376708872478499e-07, "epoch": 3.130237216915447, "percentage": 62.6, "elapsed_time": "2:45:50", "remaining_time": "1:39:03", "throughput": 8678.63, "total_tokens": 86354832} +{"current_steps": 128135, "total_steps": 204665, "loss": 0.0003, "lr": 7.375885971548321e-07, "epoch": 3.1303593677472943, "percentage": 62.61, "elapsed_time": "2:45:50", "remaining_time": "1:39:03", "throughput": 8678.68, "total_tokens": 86358416} +{"current_steps": 128140, "total_steps": 204665, "loss": 0.0, "lr": 7.375063089701405e-07, "epoch": 3.1304815185791415, "percentage": 62.61, "elapsed_time": "2:45:50", "remaining_time": "1:39:02", "throughput": 8678.71, "total_tokens": 86361680} +{"current_steps": 128145, "total_steps": 204665, "loss": 0.0001, "lr": 7.374240226943737e-07, "epoch": 3.1306036694109887, "percentage": 62.61, "elapsed_time": "2:45:51", "remaining_time": "1:39:02", "throughput": 8678.73, "total_tokens": 86364880} +{"current_steps": 128150, "total_steps": 204665, "loss": 0.0008, "lr": 7.3734173832813e-07, "epoch": 3.130725820242836, "percentage": 62.61, "elapsed_time": "2:45:51", "remaining_time": "1:39:01", "throughput": 8678.74, "total_tokens": 86368016} +{"current_steps": 128155, "total_steps": 204665, "loss": 0.0, "lr": 7.37259455872008e-07, "epoch": 3.130847971074683, "percentage": 62.62, "elapsed_time": "2:45:52", "remaining_time": "1:39:01", "throughput": 8678.75, "total_tokens": 86371088} +{"current_steps": 128160, "total_steps": 204665, "loss": 0.0, "lr": 7.371771753266055e-07, "epoch": 3.1309701219065302, "percentage": 62.62, "elapsed_time": "2:45:52", "remaining_time": "1:39:01", "throughput": 8678.8, "total_tokens": 86374608} +{"current_steps": 128165, "total_steps": 204665, "loss": 0.0, "lr": 7.370948966925212e-07, "epoch": 3.1310922727383774, "percentage": 62.62, "elapsed_time": "2:45:52", "remaining_time": "1:39:00", "throughput": 8678.87, "total_tokens": 86378384} +{"current_steps": 128170, "total_steps": 204665, "loss": 0.0621, "lr": 7.370126199703541e-07, "epoch": 3.1312144235702246, "percentage": 62.62, "elapsed_time": "2:45:53", "remaining_time": "1:39:00", "throughput": 8678.86, "total_tokens": 86381264} +{"current_steps": 128175, "total_steps": 204665, "loss": 0.0168, "lr": 7.369303451607014e-07, "epoch": 3.131336574402072, "percentage": 62.63, "elapsed_time": "2:45:53", "remaining_time": "1:38:59", "throughput": 8678.87, "total_tokens": 86384336} +{"current_steps": 128180, "total_steps": 204665, "loss": 0.0, "lr": 7.368480722641626e-07, "epoch": 3.131458725233919, "percentage": 62.63, "elapsed_time": "2:45:53", "remaining_time": "1:38:59", "throughput": 8678.94, "total_tokens": 86388112} +{"current_steps": 128185, "total_steps": 204665, "loss": 0.0, "lr": 7.367658012813347e-07, "epoch": 3.131580876065766, "percentage": 62.63, "elapsed_time": "2:45:54", "remaining_time": "1:38:59", "throughput": 8679.02, "total_tokens": 86392016} +{"current_steps": 128190, "total_steps": 204665, "loss": 0.0, "lr": 7.366835322128171e-07, "epoch": 3.1317030268976134, "percentage": 62.63, "elapsed_time": "2:45:54", "remaining_time": "1:38:58", "throughput": 8679.05, "total_tokens": 86395344} +{"current_steps": 128195, "total_steps": 204665, "loss": 0.0001, "lr": 7.366012650592076e-07, "epoch": 3.1318251777294606, "percentage": 62.64, "elapsed_time": "2:45:54", "remaining_time": "1:38:58", "throughput": 8679.09, "total_tokens": 86398736} +{"current_steps": 128200, "total_steps": 204665, "loss": 0.0001, "lr": 7.365189998211046e-07, "epoch": 3.1319473285613073, "percentage": 62.64, "elapsed_time": "2:45:55", "remaining_time": "1:38:57", "throughput": 8679.14, "total_tokens": 86402384} +{"current_steps": 128205, "total_steps": 204665, "loss": 0.0, "lr": 7.364367364991064e-07, "epoch": 3.1320694793931545, "percentage": 62.64, "elapsed_time": "2:45:55", "remaining_time": "1:38:57", "throughput": 8679.2, "total_tokens": 86406032} +{"current_steps": 128210, "total_steps": 204665, "loss": 0.0, "lr": 7.363544750938109e-07, "epoch": 3.1321916302250017, "percentage": 62.64, "elapsed_time": "2:45:55", "remaining_time": "1:38:56", "throughput": 8679.3, "total_tokens": 86410128} +{"current_steps": 128215, "total_steps": 204665, "loss": 0.0002, "lr": 7.362722156058169e-07, "epoch": 3.132313781056849, "percentage": 62.65, "elapsed_time": "2:45:56", "remaining_time": "1:38:56", "throughput": 8679.33, "total_tokens": 86413520} +{"current_steps": 128220, "total_steps": 204665, "loss": 0.0, "lr": 7.361899580357219e-07, "epoch": 3.132435931888696, "percentage": 62.65, "elapsed_time": "2:45:56", "remaining_time": "1:38:56", "throughput": 8679.34, "total_tokens": 86416592} +{"current_steps": 128225, "total_steps": 204665, "loss": 0.0, "lr": 7.361077023841244e-07, "epoch": 3.1325580827205433, "percentage": 62.65, "elapsed_time": "2:45:56", "remaining_time": "1:38:55", "throughput": 8679.38, "total_tokens": 86419984} +{"current_steps": 128230, "total_steps": 204665, "loss": 0.0, "lr": 7.360254486516231e-07, "epoch": 3.1326802335523904, "percentage": 62.65, "elapsed_time": "2:45:57", "remaining_time": "1:38:55", "throughput": 8679.43, "total_tokens": 86423568} +{"current_steps": 128235, "total_steps": 204665, "loss": 0.0001, "lr": 7.359431968388153e-07, "epoch": 3.1328023843842376, "percentage": 62.66, "elapsed_time": "2:45:57", "remaining_time": "1:38:54", "throughput": 8679.48, "total_tokens": 86427088} +{"current_steps": 128240, "total_steps": 204665, "loss": 0.0, "lr": 7.358609469463e-07, "epoch": 3.132924535216085, "percentage": 62.66, "elapsed_time": "2:45:57", "remaining_time": "1:38:54", "throughput": 8679.49, "total_tokens": 86430160} +{"current_steps": 128245, "total_steps": 204665, "loss": 0.0001, "lr": 7.357786989746748e-07, "epoch": 3.133046686047932, "percentage": 62.66, "elapsed_time": "2:45:58", "remaining_time": "1:38:54", "throughput": 8679.5, "total_tokens": 86433232} +{"current_steps": 128250, "total_steps": 204665, "loss": 0.0002, "lr": 7.356964529245378e-07, "epoch": 3.133168836879779, "percentage": 62.66, "elapsed_time": "2:45:58", "remaining_time": "1:38:53", "throughput": 8679.52, "total_tokens": 86436432} +{"current_steps": 128255, "total_steps": 204665, "loss": 0.0002, "lr": 7.356142087964876e-07, "epoch": 3.1332909877116264, "percentage": 62.67, "elapsed_time": "2:45:59", "remaining_time": "1:38:53", "throughput": 8679.57, "total_tokens": 86439888} +{"current_steps": 128260, "total_steps": 204665, "loss": 0.0, "lr": 7.355319665911217e-07, "epoch": 3.1334131385434736, "percentage": 62.67, "elapsed_time": "2:45:59", "remaining_time": "1:38:52", "throughput": 8679.61, "total_tokens": 86443408} +{"current_steps": 128265, "total_steps": 204665, "loss": 0.0, "lr": 7.354497263090386e-07, "epoch": 3.1335352893753208, "percentage": 62.67, "elapsed_time": "2:45:59", "remaining_time": "1:38:52", "throughput": 8679.63, "total_tokens": 86446608} +{"current_steps": 128270, "total_steps": 204665, "loss": 0.0, "lr": 7.353674879508363e-07, "epoch": 3.133657440207168, "percentage": 62.67, "elapsed_time": "2:46:00", "remaining_time": "1:38:52", "throughput": 8679.71, "total_tokens": 86450448} +{"current_steps": 128275, "total_steps": 204665, "loss": 0.0001, "lr": 7.352852515171128e-07, "epoch": 3.133779591039015, "percentage": 62.68, "elapsed_time": "2:46:00", "remaining_time": "1:38:51", "throughput": 8679.73, "total_tokens": 86453648} +{"current_steps": 128280, "total_steps": 204665, "loss": 0.0, "lr": 7.35203017008466e-07, "epoch": 3.1339017418708623, "percentage": 62.68, "elapsed_time": "2:46:00", "remaining_time": "1:38:51", "throughput": 8679.77, "total_tokens": 86457104} +{"current_steps": 128285, "total_steps": 204665, "loss": 0.0, "lr": 7.351207844254938e-07, "epoch": 3.134023892702709, "percentage": 62.68, "elapsed_time": "2:46:01", "remaining_time": "1:38:50", "throughput": 8679.81, "total_tokens": 86460560} +{"current_steps": 128290, "total_steps": 204665, "loss": 0.0001, "lr": 7.350385537687951e-07, "epoch": 3.1341460435345563, "percentage": 62.68, "elapsed_time": "2:46:01", "remaining_time": "1:38:50", "throughput": 8679.84, "total_tokens": 86463824} +{"current_steps": 128295, "total_steps": 204665, "loss": 0.0, "lr": 7.349563250389672e-07, "epoch": 3.1342681943664035, "percentage": 62.69, "elapsed_time": "2:46:01", "remaining_time": "1:38:49", "throughput": 8679.87, "total_tokens": 86467088} +{"current_steps": 128300, "total_steps": 204665, "loss": 0.0002, "lr": 7.34874098236608e-07, "epoch": 3.1343903451982507, "percentage": 62.69, "elapsed_time": "2:46:02", "remaining_time": "1:38:49", "throughput": 8679.92, "total_tokens": 86470736} +{"current_steps": 128305, "total_steps": 204665, "loss": 0.0, "lr": 7.347918733623157e-07, "epoch": 3.134512496030098, "percentage": 62.69, "elapsed_time": "2:46:02", "remaining_time": "1:38:49", "throughput": 8680.0, "total_tokens": 86474576} +{"current_steps": 128310, "total_steps": 204665, "loss": 0.0, "lr": 7.347096504166885e-07, "epoch": 3.134634646861945, "percentage": 62.69, "elapsed_time": "2:46:02", "remaining_time": "1:38:48", "throughput": 8680.05, "total_tokens": 86478160} +{"current_steps": 128315, "total_steps": 204665, "loss": 0.0, "lr": 7.346274294003237e-07, "epoch": 3.1347567976937922, "percentage": 62.7, "elapsed_time": "2:46:03", "remaining_time": "1:38:48", "throughput": 8680.08, "total_tokens": 86481552} +{"current_steps": 128320, "total_steps": 204665, "loss": 0.0, "lr": 7.345452103138195e-07, "epoch": 3.1348789485256394, "percentage": 62.7, "elapsed_time": "2:46:03", "remaining_time": "1:38:47", "throughput": 8680.14, "total_tokens": 86485200} +{"current_steps": 128325, "total_steps": 204665, "loss": 0.0, "lr": 7.344629931577744e-07, "epoch": 3.1350010993574866, "percentage": 62.7, "elapsed_time": "2:46:03", "remaining_time": "1:38:47", "throughput": 8680.18, "total_tokens": 86488592} +{"current_steps": 128330, "total_steps": 204665, "loss": 0.0, "lr": 7.343807779327855e-07, "epoch": 3.135123250189334, "percentage": 62.7, "elapsed_time": "2:46:04", "remaining_time": "1:38:47", "throughput": 8680.2, "total_tokens": 86491792} +{"current_steps": 128335, "total_steps": 204665, "loss": 0.0001, "lr": 7.342985646394513e-07, "epoch": 3.135245401021181, "percentage": 62.7, "elapsed_time": "2:46:04", "remaining_time": "1:38:46", "throughput": 8680.29, "total_tokens": 86495888} +{"current_steps": 128340, "total_steps": 204665, "loss": 0.0, "lr": 7.342163532783689e-07, "epoch": 3.135367551853028, "percentage": 62.71, "elapsed_time": "2:46:04", "remaining_time": "1:38:46", "throughput": 8680.31, "total_tokens": 86499024} +{"current_steps": 128345, "total_steps": 204665, "loss": 0.0, "lr": 7.341341438501372e-07, "epoch": 3.1354897026848754, "percentage": 62.71, "elapsed_time": "2:46:05", "remaining_time": "1:38:45", "throughput": 8680.36, "total_tokens": 86502608} +{"current_steps": 128350, "total_steps": 204665, "loss": 0.0, "lr": 7.340519363553532e-07, "epoch": 3.1356118535167226, "percentage": 62.71, "elapsed_time": "2:46:05", "remaining_time": "1:38:45", "throughput": 8680.41, "total_tokens": 86506128} +{"current_steps": 128355, "total_steps": 204665, "loss": 0.0, "lr": 7.339697307946152e-07, "epoch": 3.1357340043485697, "percentage": 62.71, "elapsed_time": "2:46:06", "remaining_time": "1:38:45", "throughput": 8680.47, "total_tokens": 86509776} +{"current_steps": 128360, "total_steps": 204665, "loss": 0.0, "lr": 7.338875271685211e-07, "epoch": 3.135856155180417, "percentage": 62.72, "elapsed_time": "2:46:06", "remaining_time": "1:38:44", "throughput": 8680.5, "total_tokens": 86513168} +{"current_steps": 128365, "total_steps": 204665, "loss": 0.0001, "lr": 7.33805325477668e-07, "epoch": 3.135978306012264, "percentage": 62.72, "elapsed_time": "2:46:06", "remaining_time": "1:38:44", "throughput": 8680.54, "total_tokens": 86516560} +{"current_steps": 128370, "total_steps": 204665, "loss": 0.0005, "lr": 7.337231257226546e-07, "epoch": 3.136100456844111, "percentage": 62.72, "elapsed_time": "2:46:07", "remaining_time": "1:38:43", "throughput": 8680.58, "total_tokens": 86520080} +{"current_steps": 128375, "total_steps": 204665, "loss": 0.0, "lr": 7.336409279040778e-07, "epoch": 3.1362226076759585, "percentage": 62.72, "elapsed_time": "2:46:07", "remaining_time": "1:38:43", "throughput": 8680.63, "total_tokens": 86523536} +{"current_steps": 128380, "total_steps": 204665, "loss": 0.0, "lr": 7.335587320225359e-07, "epoch": 3.1363447585078053, "percentage": 62.73, "elapsed_time": "2:46:07", "remaining_time": "1:38:42", "throughput": 8680.66, "total_tokens": 86526864} +{"current_steps": 128385, "total_steps": 204665, "loss": 0.0001, "lr": 7.33476538078627e-07, "epoch": 3.1364669093396524, "percentage": 62.73, "elapsed_time": "2:46:08", "remaining_time": "1:38:42", "throughput": 8680.71, "total_tokens": 86530384} +{"current_steps": 128390, "total_steps": 204665, "loss": 0.0, "lr": 7.333943460729479e-07, "epoch": 3.1365890601714996, "percentage": 62.73, "elapsed_time": "2:46:08", "remaining_time": "1:38:42", "throughput": 8680.74, "total_tokens": 86533776} +{"current_steps": 128395, "total_steps": 204665, "loss": 0.0, "lr": 7.333121560060973e-07, "epoch": 3.136711211003347, "percentage": 62.73, "elapsed_time": "2:46:08", "remaining_time": "1:38:41", "throughput": 8680.8, "total_tokens": 86537424} +{"current_steps": 128400, "total_steps": 204665, "loss": 0.0, "lr": 7.332299678786722e-07, "epoch": 3.136833361835194, "percentage": 62.74, "elapsed_time": "2:46:09", "remaining_time": "1:38:41", "throughput": 8680.89, "total_tokens": 86541520} +{"current_steps": 128405, "total_steps": 204665, "loss": 0.0329, "lr": 7.331477816912703e-07, "epoch": 3.136955512667041, "percentage": 62.74, "elapsed_time": "2:46:09", "remaining_time": "1:38:40", "throughput": 8680.9, "total_tokens": 86544464} +{"current_steps": 128410, "total_steps": 204665, "loss": 0.0, "lr": 7.330655974444899e-07, "epoch": 3.1370776634988884, "percentage": 62.74, "elapsed_time": "2:46:09", "remaining_time": "1:38:40", "throughput": 8680.92, "total_tokens": 86547664} +{"current_steps": 128415, "total_steps": 204665, "loss": 0.0, "lr": 7.329834151389278e-07, "epoch": 3.1371998143307356, "percentage": 62.74, "elapsed_time": "2:46:10", "remaining_time": "1:38:40", "throughput": 8680.91, "total_tokens": 86550544} +{"current_steps": 128420, "total_steps": 204665, "loss": 0.0406, "lr": 7.329012347751827e-07, "epoch": 3.1373219651625828, "percentage": 62.75, "elapsed_time": "2:46:10", "remaining_time": "1:38:39", "throughput": 8680.94, "total_tokens": 86553872} +{"current_steps": 128425, "total_steps": 204665, "loss": 0.0, "lr": 7.328190563538512e-07, "epoch": 3.13744411599443, "percentage": 62.75, "elapsed_time": "2:46:10", "remaining_time": "1:38:39", "throughput": 8680.96, "total_tokens": 86557072} +{"current_steps": 128430, "total_steps": 204665, "loss": 0.0, "lr": 7.327368798755318e-07, "epoch": 3.137566266826277, "percentage": 62.75, "elapsed_time": "2:46:11", "remaining_time": "1:38:38", "throughput": 8681.01, "total_tokens": 86560656} +{"current_steps": 128435, "total_steps": 204665, "loss": 0.0, "lr": 7.326547053408212e-07, "epoch": 3.1376884176581243, "percentage": 62.75, "elapsed_time": "2:46:11", "remaining_time": "1:38:38", "throughput": 8681.08, "total_tokens": 86564368} +{"current_steps": 128440, "total_steps": 204665, "loss": 0.0001, "lr": 7.325725327503175e-07, "epoch": 3.1378105684899715, "percentage": 62.76, "elapsed_time": "2:46:11", "remaining_time": "1:38:38", "throughput": 8681.08, "total_tokens": 86567312} +{"current_steps": 128445, "total_steps": 204665, "loss": 0.0, "lr": 7.324903621046188e-07, "epoch": 3.1379327193218187, "percentage": 62.76, "elapsed_time": "2:46:12", "remaining_time": "1:38:37", "throughput": 8681.1, "total_tokens": 86570512} +{"current_steps": 128450, "total_steps": 204665, "loss": 0.0, "lr": 7.324081934043218e-07, "epoch": 3.138054870153666, "percentage": 62.76, "elapsed_time": "2:46:12", "remaining_time": "1:38:37", "throughput": 8681.23, "total_tokens": 86574992} +{"current_steps": 128455, "total_steps": 204665, "loss": 0.0479, "lr": 7.323260266500244e-07, "epoch": 3.138177020985513, "percentage": 62.76, "elapsed_time": "2:46:13", "remaining_time": "1:38:36", "throughput": 8681.26, "total_tokens": 86578384} +{"current_steps": 128460, "total_steps": 204665, "loss": 0.0, "lr": 7.322438618423241e-07, "epoch": 3.1382991718173603, "percentage": 62.77, "elapsed_time": "2:46:13", "remaining_time": "1:38:36", "throughput": 8681.46, "total_tokens": 86583760} +{"current_steps": 128465, "total_steps": 204665, "loss": 0.0339, "lr": 7.321616989818189e-07, "epoch": 3.138421322649207, "percentage": 62.77, "elapsed_time": "2:46:13", "remaining_time": "1:38:36", "throughput": 8681.53, "total_tokens": 86587600} +{"current_steps": 128470, "total_steps": 204665, "loss": 0.0587, "lr": 7.320795380691051e-07, "epoch": 3.1385434734810542, "percentage": 62.77, "elapsed_time": "2:46:14", "remaining_time": "1:38:35", "throughput": 8681.56, "total_tokens": 86590992} +{"current_steps": 128475, "total_steps": 204665, "loss": 0.0, "lr": 7.319973791047813e-07, "epoch": 3.1386656243129014, "percentage": 62.77, "elapsed_time": "2:46:14", "remaining_time": "1:38:35", "throughput": 8681.59, "total_tokens": 86594320} +{"current_steps": 128480, "total_steps": 204665, "loss": 0.0, "lr": 7.319152220894449e-07, "epoch": 3.1387877751447486, "percentage": 62.78, "elapsed_time": "2:46:14", "remaining_time": "1:38:34", "throughput": 8681.62, "total_tokens": 86597584} +{"current_steps": 128485, "total_steps": 204665, "loss": 0.0255, "lr": 7.318330670236927e-07, "epoch": 3.138909925976596, "percentage": 62.78, "elapsed_time": "2:46:15", "remaining_time": "1:38:34", "throughput": 8681.7, "total_tokens": 86601488} +{"current_steps": 128490, "total_steps": 204665, "loss": 0.0696, "lr": 7.31750913908123e-07, "epoch": 3.139032076808443, "percentage": 62.78, "elapsed_time": "2:46:15", "remaining_time": "1:38:33", "throughput": 8681.73, "total_tokens": 86604880} +{"current_steps": 128495, "total_steps": 204665, "loss": 0.0703, "lr": 7.316687627433323e-07, "epoch": 3.13915422764029, "percentage": 62.78, "elapsed_time": "2:46:15", "remaining_time": "1:38:33", "throughput": 8681.79, "total_tokens": 86608592} +{"current_steps": 128500, "total_steps": 204665, "loss": 0.0002, "lr": 7.315866135299189e-07, "epoch": 3.1392763784721374, "percentage": 62.79, "elapsed_time": "2:46:16", "remaining_time": "1:38:33", "throughput": 8681.84, "total_tokens": 86612112} +{"current_steps": 128505, "total_steps": 204665, "loss": 0.0, "lr": 7.315044662684797e-07, "epoch": 3.1393985293039846, "percentage": 62.79, "elapsed_time": "2:46:16", "remaining_time": "1:38:32", "throughput": 8681.85, "total_tokens": 86615184} +{"current_steps": 128510, "total_steps": 204665, "loss": 0.0003, "lr": 7.314223209596122e-07, "epoch": 3.1395206801358317, "percentage": 62.79, "elapsed_time": "2:46:16", "remaining_time": "1:38:32", "throughput": 8681.87, "total_tokens": 86618384} +{"current_steps": 128515, "total_steps": 204665, "loss": 0.0001, "lr": 7.313401776039142e-07, "epoch": 3.139642830967679, "percentage": 62.79, "elapsed_time": "2:46:17", "remaining_time": "1:38:31", "throughput": 8681.9, "total_tokens": 86621648} +{"current_steps": 128520, "total_steps": 204665, "loss": 0.0003, "lr": 7.312580362019822e-07, "epoch": 3.139764981799526, "percentage": 62.8, "elapsed_time": "2:46:17", "remaining_time": "1:38:31", "throughput": 8681.93, "total_tokens": 86624912} +{"current_steps": 128525, "total_steps": 204665, "loss": 0.0, "lr": 7.311758967544143e-07, "epoch": 3.1398871326313733, "percentage": 62.8, "elapsed_time": "2:46:17", "remaining_time": "1:38:31", "throughput": 8681.95, "total_tokens": 86628112} +{"current_steps": 128530, "total_steps": 204665, "loss": 0.0716, "lr": 7.310937592618074e-07, "epoch": 3.1400092834632205, "percentage": 62.8, "elapsed_time": "2:46:18", "remaining_time": "1:38:30", "throughput": 8681.98, "total_tokens": 86631440} +{"current_steps": 128535, "total_steps": 204665, "loss": 0.0, "lr": 7.31011623724759e-07, "epoch": 3.1401314342950677, "percentage": 62.8, "elapsed_time": "2:46:18", "remaining_time": "1:38:30", "throughput": 8682.03, "total_tokens": 86635088} +{"current_steps": 128540, "total_steps": 204665, "loss": 0.0001, "lr": 7.309294901438667e-07, "epoch": 3.140253585126915, "percentage": 62.81, "elapsed_time": "2:46:19", "remaining_time": "1:38:29", "throughput": 8682.07, "total_tokens": 86638480} +{"current_steps": 128545, "total_steps": 204665, "loss": 0.0, "lr": 7.308473585197272e-07, "epoch": 3.140375735958762, "percentage": 62.81, "elapsed_time": "2:46:19", "remaining_time": "1:38:29", "throughput": 8682.11, "total_tokens": 86641936} +{"current_steps": 128550, "total_steps": 204665, "loss": 0.0, "lr": 7.307652288529385e-07, "epoch": 3.140497886790609, "percentage": 62.81, "elapsed_time": "2:46:19", "remaining_time": "1:38:29", "throughput": 8682.11, "total_tokens": 86644880} +{"current_steps": 128555, "total_steps": 204665, "loss": 0.0, "lr": 7.306831011440971e-07, "epoch": 3.140620037622456, "percentage": 62.81, "elapsed_time": "2:46:20", "remaining_time": "1:38:28", "throughput": 8682.22, "total_tokens": 86649168} +{"current_steps": 128560, "total_steps": 204665, "loss": 0.0, "lr": 7.30600975393801e-07, "epoch": 3.140742188454303, "percentage": 62.81, "elapsed_time": "2:46:20", "remaining_time": "1:38:28", "throughput": 8682.24, "total_tokens": 86652432} +{"current_steps": 128565, "total_steps": 204665, "loss": 0.0822, "lr": 7.305188516026468e-07, "epoch": 3.1408643392861504, "percentage": 62.82, "elapsed_time": "2:46:20", "remaining_time": "1:38:27", "throughput": 8682.27, "total_tokens": 86655696} +{"current_steps": 128570, "total_steps": 204665, "loss": 0.0048, "lr": 7.304367297712318e-07, "epoch": 3.1409864901179976, "percentage": 62.82, "elapsed_time": "2:46:21", "remaining_time": "1:38:27", "throughput": 8682.31, "total_tokens": 86659088} +{"current_steps": 128575, "total_steps": 204665, "loss": 0.0, "lr": 7.303546099001539e-07, "epoch": 3.1411086409498448, "percentage": 62.82, "elapsed_time": "2:46:21", "remaining_time": "1:38:26", "throughput": 8682.39, "total_tokens": 86663056} +{"current_steps": 128580, "total_steps": 204665, "loss": 0.0, "lr": 7.302724919900093e-07, "epoch": 3.141230791781692, "percentage": 62.82, "elapsed_time": "2:46:21", "remaining_time": "1:38:26", "throughput": 8682.4, "total_tokens": 86666128} +{"current_steps": 128585, "total_steps": 204665, "loss": 0.036, "lr": 7.301903760413961e-07, "epoch": 3.141352942613539, "percentage": 62.83, "elapsed_time": "2:46:22", "remaining_time": "1:38:26", "throughput": 8682.46, "total_tokens": 86669776} +{"current_steps": 128590, "total_steps": 204665, "loss": 0.0002, "lr": 7.301082620549107e-07, "epoch": 3.1414750934453863, "percentage": 62.83, "elapsed_time": "2:46:22", "remaining_time": "1:38:25", "throughput": 8682.45, "total_tokens": 86672656} +{"current_steps": 128595, "total_steps": 204665, "loss": 0.0541, "lr": 7.300261500311507e-07, "epoch": 3.1415972442772335, "percentage": 62.83, "elapsed_time": "2:46:22", "remaining_time": "1:38:25", "throughput": 8682.48, "total_tokens": 86675984} +{"current_steps": 128600, "total_steps": 204665, "loss": 0.0001, "lr": 7.299440399707133e-07, "epoch": 3.1417193951090807, "percentage": 62.83, "elapsed_time": "2:46:23", "remaining_time": "1:38:24", "throughput": 8682.52, "total_tokens": 86679376} +{"current_steps": 128605, "total_steps": 204665, "loss": 0.0, "lr": 7.298619318741955e-07, "epoch": 3.141841545940928, "percentage": 62.84, "elapsed_time": "2:46:23", "remaining_time": "1:38:24", "throughput": 8682.54, "total_tokens": 86682576} +{"current_steps": 128610, "total_steps": 204665, "loss": 0.0, "lr": 7.297798257421944e-07, "epoch": 3.141963696772775, "percentage": 62.84, "elapsed_time": "2:46:23", "remaining_time": "1:38:24", "throughput": 8682.56, "total_tokens": 86685712} +{"current_steps": 128615, "total_steps": 204665, "loss": 0.0, "lr": 7.296977215753069e-07, "epoch": 3.1420858476046223, "percentage": 62.84, "elapsed_time": "2:46:24", "remaining_time": "1:38:23", "throughput": 8682.55, "total_tokens": 86688528} +{"current_steps": 128620, "total_steps": 204665, "loss": 0.0, "lr": 7.296156193741305e-07, "epoch": 3.1422079984364695, "percentage": 62.84, "elapsed_time": "2:46:24", "remaining_time": "1:38:23", "throughput": 8682.58, "total_tokens": 86691856} +{"current_steps": 128625, "total_steps": 204665, "loss": 0.0, "lr": 7.295335191392617e-07, "epoch": 3.1423301492683167, "percentage": 62.85, "elapsed_time": "2:46:24", "remaining_time": "1:38:22", "throughput": 8682.59, "total_tokens": 86694928} +{"current_steps": 128630, "total_steps": 204665, "loss": 0.0004, "lr": 7.294514208712979e-07, "epoch": 3.142452300100164, "percentage": 62.85, "elapsed_time": "2:46:25", "remaining_time": "1:38:22", "throughput": 8682.63, "total_tokens": 86698320} +{"current_steps": 128635, "total_steps": 204665, "loss": 0.0001, "lr": 7.293693245708365e-07, "epoch": 3.142574450932011, "percentage": 62.85, "elapsed_time": "2:46:25", "remaining_time": "1:38:22", "throughput": 8682.68, "total_tokens": 86701904} +{"current_steps": 128640, "total_steps": 204665, "loss": 0.0546, "lr": 7.29287230238474e-07, "epoch": 3.1426966017638582, "percentage": 62.85, "elapsed_time": "2:46:25", "remaining_time": "1:38:21", "throughput": 8682.75, "total_tokens": 86705680} +{"current_steps": 128645, "total_steps": 204665, "loss": 0.0, "lr": 7.292051378748076e-07, "epoch": 3.142818752595705, "percentage": 62.86, "elapsed_time": "2:46:26", "remaining_time": "1:38:21", "throughput": 8682.78, "total_tokens": 86709072} +{"current_steps": 128650, "total_steps": 204665, "loss": 0.0, "lr": 7.291230474804342e-07, "epoch": 3.142940903427552, "percentage": 62.86, "elapsed_time": "2:46:26", "remaining_time": "1:38:20", "throughput": 8682.81, "total_tokens": 86712336} +{"current_steps": 128655, "total_steps": 204665, "loss": 0.0458, "lr": 7.290409590559508e-07, "epoch": 3.1430630542593994, "percentage": 62.86, "elapsed_time": "2:46:27", "remaining_time": "1:38:20", "throughput": 8682.8, "total_tokens": 86715216} +{"current_steps": 128660, "total_steps": 204665, "loss": 0.0, "lr": 7.289588726019547e-07, "epoch": 3.1431852050912465, "percentage": 62.86, "elapsed_time": "2:46:27", "remaining_time": "1:38:19", "throughput": 8682.84, "total_tokens": 86718672} +{"current_steps": 128665, "total_steps": 204665, "loss": 0.0, "lr": 7.288767881190423e-07, "epoch": 3.1433073559230937, "percentage": 62.87, "elapsed_time": "2:46:27", "remaining_time": "1:38:19", "throughput": 8682.86, "total_tokens": 86721872} +{"current_steps": 128670, "total_steps": 204665, "loss": 0.1059, "lr": 7.287947056078112e-07, "epoch": 3.143429506754941, "percentage": 62.87, "elapsed_time": "2:46:28", "remaining_time": "1:38:19", "throughput": 8682.91, "total_tokens": 86725392} +{"current_steps": 128675, "total_steps": 204665, "loss": 0.0001, "lr": 7.287126250688575e-07, "epoch": 3.143551657586788, "percentage": 62.87, "elapsed_time": "2:46:28", "remaining_time": "1:38:18", "throughput": 8682.92, "total_tokens": 86728464} +{"current_steps": 128680, "total_steps": 204665, "loss": 0.0, "lr": 7.286305465027789e-07, "epoch": 3.1436738084186353, "percentage": 62.87, "elapsed_time": "2:46:28", "remaining_time": "1:38:18", "throughput": 8682.93, "total_tokens": 86731536} +{"current_steps": 128685, "total_steps": 204665, "loss": 0.0, "lr": 7.285484699101716e-07, "epoch": 3.1437959592504825, "percentage": 62.88, "elapsed_time": "2:46:29", "remaining_time": "1:38:17", "throughput": 8682.95, "total_tokens": 86734736} +{"current_steps": 128690, "total_steps": 204665, "loss": 0.0305, "lr": 7.284663952916328e-07, "epoch": 3.1439181100823297, "percentage": 62.88, "elapsed_time": "2:46:29", "remaining_time": "1:38:17", "throughput": 8682.98, "total_tokens": 86738128} +{"current_steps": 128695, "total_steps": 204665, "loss": 0.0001, "lr": 7.283843226477598e-07, "epoch": 3.144040260914177, "percentage": 62.88, "elapsed_time": "2:46:29", "remaining_time": "1:38:17", "throughput": 8683.01, "total_tokens": 86741328} +{"current_steps": 128700, "total_steps": 204665, "loss": 0.0, "lr": 7.283022519791487e-07, "epoch": 3.144162411746024, "percentage": 62.88, "elapsed_time": "2:46:30", "remaining_time": "1:38:16", "throughput": 8683.04, "total_tokens": 86744720} +{"current_steps": 128705, "total_steps": 204665, "loss": 0.0, "lr": 7.282201832863972e-07, "epoch": 3.1442845625778713, "percentage": 62.89, "elapsed_time": "2:46:30", "remaining_time": "1:38:16", "throughput": 8683.12, "total_tokens": 86748688} +{"current_steps": 128710, "total_steps": 204665, "loss": 0.0001, "lr": 7.281381165701011e-07, "epoch": 3.1444067134097184, "percentage": 62.89, "elapsed_time": "2:46:30", "remaining_time": "1:38:15", "throughput": 8683.17, "total_tokens": 86752144} +{"current_steps": 128715, "total_steps": 204665, "loss": 0.0, "lr": 7.280560518308582e-07, "epoch": 3.1445288642415656, "percentage": 62.89, "elapsed_time": "2:46:31", "remaining_time": "1:38:15", "throughput": 8683.21, "total_tokens": 86755600} +{"current_steps": 128720, "total_steps": 204665, "loss": 0.0002, "lr": 7.279739890692646e-07, "epoch": 3.144651015073413, "percentage": 62.89, "elapsed_time": "2:46:31", "remaining_time": "1:38:15", "throughput": 8683.23, "total_tokens": 86758864} +{"current_steps": 128725, "total_steps": 204665, "loss": 0.0, "lr": 7.27891928285917e-07, "epoch": 3.14477316590526, "percentage": 62.9, "elapsed_time": "2:46:31", "remaining_time": "1:38:14", "throughput": 8683.28, "total_tokens": 86762448} +{"current_steps": 128730, "total_steps": 204665, "loss": 0.0343, "lr": 7.278098694814131e-07, "epoch": 3.1448953167371068, "percentage": 62.9, "elapsed_time": "2:46:32", "remaining_time": "1:38:14", "throughput": 8683.32, "total_tokens": 86765840} +{"current_steps": 128735, "total_steps": 204665, "loss": 0.0003, "lr": 7.277278126563485e-07, "epoch": 3.145017467568954, "percentage": 62.9, "elapsed_time": "2:46:32", "remaining_time": "1:38:13", "throughput": 8683.36, "total_tokens": 86769296} +{"current_steps": 128740, "total_steps": 204665, "loss": 0.0, "lr": 7.27645757811321e-07, "epoch": 3.145139618400801, "percentage": 62.9, "elapsed_time": "2:46:32", "remaining_time": "1:38:13", "throughput": 8683.39, "total_tokens": 86772560} +{"current_steps": 128745, "total_steps": 204665, "loss": 0.0, "lr": 7.275637049469263e-07, "epoch": 3.1452617692326483, "percentage": 62.91, "elapsed_time": "2:46:33", "remaining_time": "1:38:12", "throughput": 8683.43, "total_tokens": 86776080} +{"current_steps": 128750, "total_steps": 204665, "loss": 0.0, "lr": 7.274816540637616e-07, "epoch": 3.1453839200644955, "percentage": 62.91, "elapsed_time": "2:46:33", "remaining_time": "1:38:12", "throughput": 8683.44, "total_tokens": 86779088} +{"current_steps": 128755, "total_steps": 204665, "loss": 0.036, "lr": 7.27399605162424e-07, "epoch": 3.1455060708963427, "percentage": 62.91, "elapsed_time": "2:46:33", "remaining_time": "1:38:12", "throughput": 8683.48, "total_tokens": 86782544} +{"current_steps": 128760, "total_steps": 204665, "loss": 0.0, "lr": 7.273175582435098e-07, "epoch": 3.14562822172819, "percentage": 62.91, "elapsed_time": "2:46:34", "remaining_time": "1:38:11", "throughput": 8683.49, "total_tokens": 86785680} +{"current_steps": 128765, "total_steps": 204665, "loss": 0.0, "lr": 7.272355133076154e-07, "epoch": 3.145750372560037, "percentage": 62.92, "elapsed_time": "2:46:34", "remaining_time": "1:38:11", "throughput": 8683.54, "total_tokens": 86789136} +{"current_steps": 128770, "total_steps": 204665, "loss": 0.0, "lr": 7.271534703553379e-07, "epoch": 3.1458725233918843, "percentage": 62.92, "elapsed_time": "2:46:35", "remaining_time": "1:38:10", "throughput": 8683.58, "total_tokens": 86792656} +{"current_steps": 128775, "total_steps": 204665, "loss": 0.0003, "lr": 7.270714293872738e-07, "epoch": 3.1459946742237315, "percentage": 62.92, "elapsed_time": "2:46:35", "remaining_time": "1:38:10", "throughput": 8683.6, "total_tokens": 86795792} +{"current_steps": 128780, "total_steps": 204665, "loss": 0.0, "lr": 7.269893904040194e-07, "epoch": 3.1461168250555787, "percentage": 62.92, "elapsed_time": "2:46:35", "remaining_time": "1:38:10", "throughput": 8683.68, "total_tokens": 86799696} +{"current_steps": 128785, "total_steps": 204665, "loss": 0.0001, "lr": 7.269073534061715e-07, "epoch": 3.146238975887426, "percentage": 62.92, "elapsed_time": "2:46:36", "remaining_time": "1:38:09", "throughput": 8683.72, "total_tokens": 86803216} +{"current_steps": 128790, "total_steps": 204665, "loss": 0.0, "lr": 7.268253183943271e-07, "epoch": 3.146361126719273, "percentage": 62.93, "elapsed_time": "2:46:36", "remaining_time": "1:38:09", "throughput": 8683.73, "total_tokens": 86806224} +{"current_steps": 128795, "total_steps": 204665, "loss": 0.0477, "lr": 7.267432853690823e-07, "epoch": 3.1464832775511202, "percentage": 62.93, "elapsed_time": "2:46:36", "remaining_time": "1:38:08", "throughput": 8683.73, "total_tokens": 86809168} +{"current_steps": 128800, "total_steps": 204665, "loss": 0.0, "lr": 7.266612543310339e-07, "epoch": 3.1466054283829674, "percentage": 62.93, "elapsed_time": "2:46:37", "remaining_time": "1:38:08", "throughput": 8683.77, "total_tokens": 86812688} +{"current_steps": 128805, "total_steps": 204665, "loss": 0.0772, "lr": 7.265792252807783e-07, "epoch": 3.1467275792148146, "percentage": 62.93, "elapsed_time": "2:46:37", "remaining_time": "1:38:08", "throughput": 8683.78, "total_tokens": 86815696} +{"current_steps": 128810, "total_steps": 204665, "loss": 0.0288, "lr": 7.264971982189122e-07, "epoch": 3.146849730046662, "percentage": 62.94, "elapsed_time": "2:46:37", "remaining_time": "1:38:07", "throughput": 8683.83, "total_tokens": 86819344} +{"current_steps": 128815, "total_steps": 204665, "loss": 0.0001, "lr": 7.26415173146032e-07, "epoch": 3.146971880878509, "percentage": 62.94, "elapsed_time": "2:46:38", "remaining_time": "1:38:07", "throughput": 8683.89, "total_tokens": 86822928} +{"current_steps": 128820, "total_steps": 204665, "loss": 0.0, "lr": 7.263331500627343e-07, "epoch": 3.147094031710356, "percentage": 62.94, "elapsed_time": "2:46:38", "remaining_time": "1:38:06", "throughput": 8683.91, "total_tokens": 86826192} +{"current_steps": 128825, "total_steps": 204665, "loss": 0.0, "lr": 7.262511289696158e-07, "epoch": 3.147216182542203, "percentage": 62.94, "elapsed_time": "2:46:38", "remaining_time": "1:38:06", "throughput": 8683.92, "total_tokens": 86829264} +{"current_steps": 128830, "total_steps": 204665, "loss": 0.0, "lr": 7.261691098672722e-07, "epoch": 3.14733833337405, "percentage": 62.95, "elapsed_time": "2:46:39", "remaining_time": "1:38:05", "throughput": 8684.02, "total_tokens": 86833424} +{"current_steps": 128835, "total_steps": 204665, "loss": 0.0, "lr": 7.260870927563009e-07, "epoch": 3.1474604842058973, "percentage": 62.95, "elapsed_time": "2:46:39", "remaining_time": "1:38:05", "throughput": 8684.03, "total_tokens": 86836432} +{"current_steps": 128840, "total_steps": 204665, "loss": 0.0, "lr": 7.260050776372974e-07, "epoch": 3.1475826350377445, "percentage": 62.95, "elapsed_time": "2:46:39", "remaining_time": "1:38:05", "throughput": 8684.07, "total_tokens": 86839952} +{"current_steps": 128845, "total_steps": 204665, "loss": 0.0003, "lr": 7.259230645108589e-07, "epoch": 3.1477047858695917, "percentage": 62.95, "elapsed_time": "2:46:40", "remaining_time": "1:38:04", "throughput": 8684.11, "total_tokens": 86843344} +{"current_steps": 128850, "total_steps": 204665, "loss": 0.0, "lr": 7.258410533775819e-07, "epoch": 3.147826936701439, "percentage": 62.96, "elapsed_time": "2:46:40", "remaining_time": "1:38:04", "throughput": 8684.13, "total_tokens": 86846608} +{"current_steps": 128855, "total_steps": 204665, "loss": 0.0, "lr": 7.257590442380621e-07, "epoch": 3.147949087533286, "percentage": 62.96, "elapsed_time": "2:46:40", "remaining_time": "1:38:03", "throughput": 8684.16, "total_tokens": 86849808} +{"current_steps": 128860, "total_steps": 204665, "loss": 0.0, "lr": 7.256770370928968e-07, "epoch": 3.1480712383651333, "percentage": 62.96, "elapsed_time": "2:46:41", "remaining_time": "1:38:03", "throughput": 8684.2, "total_tokens": 86853264} +{"current_steps": 128865, "total_steps": 204665, "loss": 0.0, "lr": 7.255950319426814e-07, "epoch": 3.1481933891969804, "percentage": 62.96, "elapsed_time": "2:46:41", "remaining_time": "1:38:03", "throughput": 8684.27, "total_tokens": 86857040} +{"current_steps": 128870, "total_steps": 204665, "loss": 0.0, "lr": 7.25513028788013e-07, "epoch": 3.1483155400288276, "percentage": 62.97, "elapsed_time": "2:46:42", "remaining_time": "1:38:02", "throughput": 8684.33, "total_tokens": 86860752} +{"current_steps": 128875, "total_steps": 204665, "loss": 0.0538, "lr": 7.254310276294876e-07, "epoch": 3.148437690860675, "percentage": 62.97, "elapsed_time": "2:46:42", "remaining_time": "1:38:02", "throughput": 8684.34, "total_tokens": 86863824} +{"current_steps": 128880, "total_steps": 204665, "loss": 0.0, "lr": 7.253490284677015e-07, "epoch": 3.148559841692522, "percentage": 62.97, "elapsed_time": "2:46:42", "remaining_time": "1:38:01", "throughput": 8684.35, "total_tokens": 86866896} +{"current_steps": 128885, "total_steps": 204665, "loss": 0.0003, "lr": 7.252670313032514e-07, "epoch": 3.148681992524369, "percentage": 62.97, "elapsed_time": "2:46:43", "remaining_time": "1:38:01", "throughput": 8684.38, "total_tokens": 86870224} +{"current_steps": 128890, "total_steps": 204665, "loss": 0.0, "lr": 7.251850361367329e-07, "epoch": 3.1488041433562164, "percentage": 62.98, "elapsed_time": "2:46:43", "remaining_time": "1:38:01", "throughput": 8684.41, "total_tokens": 86873552} +{"current_steps": 128895, "total_steps": 204665, "loss": 0.0, "lr": 7.251030429687433e-07, "epoch": 3.1489262941880636, "percentage": 62.98, "elapsed_time": "2:46:43", "remaining_time": "1:38:00", "throughput": 8684.44, "total_tokens": 86876816} +{"current_steps": 128900, "total_steps": 204665, "loss": 0.0, "lr": 7.250210517998778e-07, "epoch": 3.1490484450199108, "percentage": 62.98, "elapsed_time": "2:46:44", "remaining_time": "1:38:00", "throughput": 8684.47, "total_tokens": 86880144} +{"current_steps": 128905, "total_steps": 204665, "loss": 0.0001, "lr": 7.249390626307332e-07, "epoch": 3.149170595851758, "percentage": 62.98, "elapsed_time": "2:46:44", "remaining_time": "1:37:59", "throughput": 8684.51, "total_tokens": 86883664} +{"current_steps": 128910, "total_steps": 204665, "loss": 0.0013, "lr": 7.248570754619061e-07, "epoch": 3.1492927466836047, "percentage": 62.99, "elapsed_time": "2:46:44", "remaining_time": "1:37:59", "throughput": 8684.53, "total_tokens": 86886736} +{"current_steps": 128915, "total_steps": 204665, "loss": 0.0, "lr": 7.247750902939922e-07, "epoch": 3.149414897515452, "percentage": 62.99, "elapsed_time": "2:46:45", "remaining_time": "1:37:58", "throughput": 8684.54, "total_tokens": 86889808} +{"current_steps": 128920, "total_steps": 204665, "loss": 0.0, "lr": 7.246931071275879e-07, "epoch": 3.149537048347299, "percentage": 62.99, "elapsed_time": "2:46:45", "remaining_time": "1:37:58", "throughput": 8684.57, "total_tokens": 86893072} +{"current_steps": 128925, "total_steps": 204665, "loss": 0.0001, "lr": 7.246111259632892e-07, "epoch": 3.1496591991791463, "percentage": 62.99, "elapsed_time": "2:46:45", "remaining_time": "1:37:58", "throughput": 8684.58, "total_tokens": 86896208} +{"current_steps": 128930, "total_steps": 204665, "loss": 0.0, "lr": 7.245291468016928e-07, "epoch": 3.1497813500109935, "percentage": 63.0, "elapsed_time": "2:46:46", "remaining_time": "1:37:57", "throughput": 8684.61, "total_tokens": 86899536} +{"current_steps": 128935, "total_steps": 204665, "loss": 0.0, "lr": 7.244471696433943e-07, "epoch": 3.1499035008428407, "percentage": 63.0, "elapsed_time": "2:46:46", "remaining_time": "1:37:57", "throughput": 8684.62, "total_tokens": 86902672} +{"current_steps": 128940, "total_steps": 204665, "loss": 0.0, "lr": 7.243651944889897e-07, "epoch": 3.150025651674688, "percentage": 63.0, "elapsed_time": "2:46:46", "remaining_time": "1:37:56", "throughput": 8684.64, "total_tokens": 86905808} +{"current_steps": 128945, "total_steps": 204665, "loss": 0.0, "lr": 7.242832213390763e-07, "epoch": 3.150147802506535, "percentage": 63.0, "elapsed_time": "2:46:47", "remaining_time": "1:37:56", "throughput": 8684.67, "total_tokens": 86909072} +{"current_steps": 128950, "total_steps": 204665, "loss": 0.0001, "lr": 7.24201250194249e-07, "epoch": 3.1502699533383822, "percentage": 63.01, "elapsed_time": "2:46:47", "remaining_time": "1:37:56", "throughput": 8684.68, "total_tokens": 86912208} +{"current_steps": 128955, "total_steps": 204665, "loss": 0.0574, "lr": 7.241192810551047e-07, "epoch": 3.1503921041702294, "percentage": 63.01, "elapsed_time": "2:46:47", "remaining_time": "1:37:55", "throughput": 8684.69, "total_tokens": 86915280} +{"current_steps": 128960, "total_steps": 204665, "loss": 0.0, "lr": 7.240373139222387e-07, "epoch": 3.1505142550020766, "percentage": 63.01, "elapsed_time": "2:46:48", "remaining_time": "1:37:55", "throughput": 8684.75, "total_tokens": 86918864} +{"current_steps": 128965, "total_steps": 204665, "loss": 0.0001, "lr": 7.239553487962479e-07, "epoch": 3.150636405833924, "percentage": 63.01, "elapsed_time": "2:46:48", "remaining_time": "1:37:54", "throughput": 8684.77, "total_tokens": 86922000} +{"current_steps": 128970, "total_steps": 204665, "loss": 0.0005, "lr": 7.238733856777281e-07, "epoch": 3.150758556665771, "percentage": 63.02, "elapsed_time": "2:46:48", "remaining_time": "1:37:54", "throughput": 8684.79, "total_tokens": 86925264} +{"current_steps": 128975, "total_steps": 204665, "loss": 0.0, "lr": 7.237914245672752e-07, "epoch": 3.150880707497618, "percentage": 63.02, "elapsed_time": "2:46:49", "remaining_time": "1:37:54", "throughput": 8684.84, "total_tokens": 86928784} +{"current_steps": 128980, "total_steps": 204665, "loss": 0.0, "lr": 7.237094654654857e-07, "epoch": 3.1510028583294654, "percentage": 63.02, "elapsed_time": "2:46:49", "remaining_time": "1:37:53", "throughput": 8684.88, "total_tokens": 86932240} +{"current_steps": 128985, "total_steps": 204665, "loss": 0.0, "lr": 7.236275083729546e-07, "epoch": 3.1511250091613126, "percentage": 63.02, "elapsed_time": "2:46:49", "remaining_time": "1:37:53", "throughput": 8684.9, "total_tokens": 86935440} +{"current_steps": 128990, "total_steps": 204665, "loss": 0.0, "lr": 7.235455532902793e-07, "epoch": 3.1512471599931597, "percentage": 63.02, "elapsed_time": "2:46:50", "remaining_time": "1:37:52", "throughput": 8684.92, "total_tokens": 86938704} +{"current_steps": 128995, "total_steps": 204665, "loss": 0.0, "lr": 7.234636002180545e-07, "epoch": 3.1513693108250065, "percentage": 63.03, "elapsed_time": "2:46:50", "remaining_time": "1:37:52", "throughput": 8684.96, "total_tokens": 86942160} +{"current_steps": 129000, "total_steps": 204665, "loss": 0.0001, "lr": 7.233816491568768e-07, "epoch": 3.1514914616568537, "percentage": 63.03, "elapsed_time": "2:46:51", "remaining_time": "1:37:51", "throughput": 8685.03, "total_tokens": 86945872} +{"current_steps": 129005, "total_steps": 204665, "loss": 0.0, "lr": 7.232997001073427e-07, "epoch": 3.151613612488701, "percentage": 63.03, "elapsed_time": "2:46:51", "remaining_time": "1:37:51", "throughput": 8685.04, "total_tokens": 86948944} +{"current_steps": 129010, "total_steps": 204665, "loss": 0.0, "lr": 7.23217753070047e-07, "epoch": 3.151735763320548, "percentage": 63.03, "elapsed_time": "2:46:51", "remaining_time": "1:37:51", "throughput": 8685.05, "total_tokens": 86952080} +{"current_steps": 129015, "total_steps": 204665, "loss": 0.0002, "lr": 7.231358080455868e-07, "epoch": 3.1518579141523952, "percentage": 63.04, "elapsed_time": "2:46:52", "remaining_time": "1:37:50", "throughput": 8685.06, "total_tokens": 86955088} +{"current_steps": 129020, "total_steps": 204665, "loss": 0.0348, "lr": 7.23053865034557e-07, "epoch": 3.1519800649842424, "percentage": 63.04, "elapsed_time": "2:46:52", "remaining_time": "1:37:50", "throughput": 8685.08, "total_tokens": 86958288} +{"current_steps": 129025, "total_steps": 204665, "loss": 0.0, "lr": 7.229719240375545e-07, "epoch": 3.1521022158160896, "percentage": 63.04, "elapsed_time": "2:46:52", "remaining_time": "1:37:49", "throughput": 8685.12, "total_tokens": 86961744} +{"current_steps": 129030, "total_steps": 204665, "loss": 0.0, "lr": 7.228899850551743e-07, "epoch": 3.152224366647937, "percentage": 63.04, "elapsed_time": "2:46:53", "remaining_time": "1:37:49", "throughput": 8685.17, "total_tokens": 86965264} +{"current_steps": 129035, "total_steps": 204665, "loss": 0.0, "lr": 7.228080480880125e-07, "epoch": 3.152346517479784, "percentage": 63.05, "elapsed_time": "2:46:53", "remaining_time": "1:37:49", "throughput": 8685.18, "total_tokens": 86968336} +{"current_steps": 129040, "total_steps": 204665, "loss": 0.0, "lr": 7.227261131366655e-07, "epoch": 3.152468668311631, "percentage": 63.05, "elapsed_time": "2:46:53", "remaining_time": "1:37:48", "throughput": 8685.24, "total_tokens": 86971984} +{"current_steps": 129045, "total_steps": 204665, "loss": 0.0, "lr": 7.226441802017286e-07, "epoch": 3.1525908191434784, "percentage": 63.05, "elapsed_time": "2:46:54", "remaining_time": "1:37:48", "throughput": 8685.25, "total_tokens": 86975120} +{"current_steps": 129050, "total_steps": 204665, "loss": 0.0, "lr": 7.22562249283798e-07, "epoch": 3.1527129699753256, "percentage": 63.05, "elapsed_time": "2:46:54", "remaining_time": "1:37:47", "throughput": 8685.29, "total_tokens": 86978576} +{"current_steps": 129055, "total_steps": 204665, "loss": 0.0001, "lr": 7.224803203834691e-07, "epoch": 3.1528351208071728, "percentage": 63.06, "elapsed_time": "2:46:54", "remaining_time": "1:37:47", "throughput": 8685.3, "total_tokens": 86981648} +{"current_steps": 129060, "total_steps": 204665, "loss": 0.0416, "lr": 7.223983935013378e-07, "epoch": 3.15295727163902, "percentage": 63.06, "elapsed_time": "2:46:55", "remaining_time": "1:37:47", "throughput": 8685.33, "total_tokens": 86984912} +{"current_steps": 129065, "total_steps": 204665, "loss": 0.0001, "lr": 7.223164686380005e-07, "epoch": 3.153079422470867, "percentage": 63.06, "elapsed_time": "2:46:55", "remaining_time": "1:37:46", "throughput": 8685.36, "total_tokens": 86988240} +{"current_steps": 129070, "total_steps": 204665, "loss": 0.1514, "lr": 7.222345457940524e-07, "epoch": 3.1532015733027143, "percentage": 63.06, "elapsed_time": "2:46:55", "remaining_time": "1:37:46", "throughput": 8685.37, "total_tokens": 86991248} +{"current_steps": 129075, "total_steps": 204665, "loss": 0.0, "lr": 7.221526249700893e-07, "epoch": 3.1533237241345615, "percentage": 63.07, "elapsed_time": "2:46:56", "remaining_time": "1:37:45", "throughput": 8685.43, "total_tokens": 86994896} +{"current_steps": 129080, "total_steps": 204665, "loss": 0.0464, "lr": 7.220707061667072e-07, "epoch": 3.1534458749664087, "percentage": 63.07, "elapsed_time": "2:46:56", "remaining_time": "1:37:45", "throughput": 8685.46, "total_tokens": 86998160} +{"current_steps": 129085, "total_steps": 204665, "loss": 0.0489, "lr": 7.219887893845018e-07, "epoch": 3.153568025798256, "percentage": 63.07, "elapsed_time": "2:46:56", "remaining_time": "1:37:44", "throughput": 8685.51, "total_tokens": 87001680} +{"current_steps": 129090, "total_steps": 204665, "loss": 0.0688, "lr": 7.219068746240682e-07, "epoch": 3.1536901766301026, "percentage": 63.07, "elapsed_time": "2:46:57", "remaining_time": "1:37:44", "throughput": 8685.55, "total_tokens": 87005200} +{"current_steps": 129095, "total_steps": 204665, "loss": 0.0005, "lr": 7.218249618860026e-07, "epoch": 3.15381232746195, "percentage": 63.08, "elapsed_time": "2:46:57", "remaining_time": "1:37:44", "throughput": 8685.6, "total_tokens": 87008720} +{"current_steps": 129100, "total_steps": 204665, "loss": 0.0, "lr": 7.217430511709013e-07, "epoch": 3.153934478293797, "percentage": 63.08, "elapsed_time": "2:46:57", "remaining_time": "1:37:43", "throughput": 8685.67, "total_tokens": 87012560} +{"current_steps": 129105, "total_steps": 204665, "loss": 0.0, "lr": 7.216611424793588e-07, "epoch": 3.154056629125644, "percentage": 63.08, "elapsed_time": "2:46:58", "remaining_time": "1:37:43", "throughput": 8685.67, "total_tokens": 87015504} +{"current_steps": 129110, "total_steps": 204665, "loss": 0.0, "lr": 7.215792358119718e-07, "epoch": 3.1541787799574914, "percentage": 63.08, "elapsed_time": "2:46:58", "remaining_time": "1:37:42", "throughput": 8685.7, "total_tokens": 87018704} +{"current_steps": 129115, "total_steps": 204665, "loss": 0.0, "lr": 7.21497331169335e-07, "epoch": 3.1543009307893386, "percentage": 63.09, "elapsed_time": "2:46:58", "remaining_time": "1:37:42", "throughput": 8685.75, "total_tokens": 87022288} +{"current_steps": 129120, "total_steps": 204665, "loss": 0.0, "lr": 7.214154285520451e-07, "epoch": 3.154423081621186, "percentage": 63.09, "elapsed_time": "2:46:59", "remaining_time": "1:37:42", "throughput": 8685.8, "total_tokens": 87025872} +{"current_steps": 129125, "total_steps": 204665, "loss": 0.0131, "lr": 7.213335279606965e-07, "epoch": 3.154545232453033, "percentage": 63.09, "elapsed_time": "2:46:59", "remaining_time": "1:37:41", "throughput": 8685.85, "total_tokens": 87029456} +{"current_steps": 129130, "total_steps": 204665, "loss": 0.0001, "lr": 7.212516293958857e-07, "epoch": 3.15466738328488, "percentage": 63.09, "elapsed_time": "2:47:00", "remaining_time": "1:37:41", "throughput": 8685.91, "total_tokens": 87033104} +{"current_steps": 129135, "total_steps": 204665, "loss": 0.0002, "lr": 7.211697328582082e-07, "epoch": 3.1547895341167274, "percentage": 63.1, "elapsed_time": "2:47:00", "remaining_time": "1:37:40", "throughput": 8685.94, "total_tokens": 87036368} +{"current_steps": 129140, "total_steps": 204665, "loss": 0.0001, "lr": 7.210878383482593e-07, "epoch": 3.1549116849485745, "percentage": 63.1, "elapsed_time": "2:47:00", "remaining_time": "1:37:40", "throughput": 8685.96, "total_tokens": 87039568} +{"current_steps": 129145, "total_steps": 204665, "loss": 0.0, "lr": 7.210059458666348e-07, "epoch": 3.1550338357804217, "percentage": 63.1, "elapsed_time": "2:47:01", "remaining_time": "1:37:40", "throughput": 8685.97, "total_tokens": 87042704} +{"current_steps": 129150, "total_steps": 204665, "loss": 0.1131, "lr": 7.209240554139296e-07, "epoch": 3.155155986612269, "percentage": 63.1, "elapsed_time": "2:47:01", "remaining_time": "1:37:39", "throughput": 8686.04, "total_tokens": 87046416} +{"current_steps": 129155, "total_steps": 204665, "loss": 0.0002, "lr": 7.208421669907398e-07, "epoch": 3.155278137444116, "percentage": 63.11, "elapsed_time": "2:47:01", "remaining_time": "1:37:39", "throughput": 8686.08, "total_tokens": 87049872} +{"current_steps": 129160, "total_steps": 204665, "loss": 0.0001, "lr": 7.207602805976613e-07, "epoch": 3.1554002882759633, "percentage": 63.11, "elapsed_time": "2:47:02", "remaining_time": "1:37:38", "throughput": 8686.11, "total_tokens": 87053200} +{"current_steps": 129165, "total_steps": 204665, "loss": 0.0, "lr": 7.206783962352889e-07, "epoch": 3.1555224391078105, "percentage": 63.11, "elapsed_time": "2:47:02", "remaining_time": "1:37:38", "throughput": 8686.26, "total_tokens": 87057936} +{"current_steps": 129170, "total_steps": 204665, "loss": 0.0, "lr": 7.205965139042186e-07, "epoch": 3.1556445899396577, "percentage": 63.11, "elapsed_time": "2:47:02", "remaining_time": "1:37:37", "throughput": 8686.31, "total_tokens": 87061520} +{"current_steps": 129175, "total_steps": 204665, "loss": 0.0, "lr": 7.205146336050451e-07, "epoch": 3.1557667407715044, "percentage": 63.12, "elapsed_time": "2:47:03", "remaining_time": "1:37:37", "throughput": 8686.34, "total_tokens": 87064848} +{"current_steps": 129180, "total_steps": 204665, "loss": 0.0501, "lr": 7.204327553383649e-07, "epoch": 3.1558888916033516, "percentage": 63.12, "elapsed_time": "2:47:03", "remaining_time": "1:37:37", "throughput": 8686.36, "total_tokens": 87067984} +{"current_steps": 129185, "total_steps": 204665, "loss": 0.0055, "lr": 7.203508791047727e-07, "epoch": 3.156011042435199, "percentage": 63.12, "elapsed_time": "2:47:03", "remaining_time": "1:37:36", "throughput": 8686.36, "total_tokens": 87070928} +{"current_steps": 129190, "total_steps": 204665, "loss": 0.0003, "lr": 7.202690049048638e-07, "epoch": 3.156133193267046, "percentage": 63.12, "elapsed_time": "2:47:04", "remaining_time": "1:37:36", "throughput": 8686.38, "total_tokens": 87074128} +{"current_steps": 129195, "total_steps": 204665, "loss": 0.0, "lr": 7.201871327392344e-07, "epoch": 3.156255344098893, "percentage": 63.13, "elapsed_time": "2:47:04", "remaining_time": "1:37:35", "throughput": 8686.41, "total_tokens": 87077392} +{"current_steps": 129200, "total_steps": 204665, "loss": 0.0004, "lr": 7.201052626084792e-07, "epoch": 3.1563774949307404, "percentage": 63.13, "elapsed_time": "2:47:04", "remaining_time": "1:37:35", "throughput": 8686.43, "total_tokens": 87080592} +{"current_steps": 129205, "total_steps": 204665, "loss": 0.0, "lr": 7.200233945131939e-07, "epoch": 3.1564996457625876, "percentage": 63.13, "elapsed_time": "2:47:05", "remaining_time": "1:37:35", "throughput": 8686.45, "total_tokens": 87083856} +{"current_steps": 129210, "total_steps": 204665, "loss": 0.0, "lr": 7.199415284539736e-07, "epoch": 3.1566217965944348, "percentage": 63.13, "elapsed_time": "2:47:05", "remaining_time": "1:37:34", "throughput": 8686.48, "total_tokens": 87087056} +{"current_steps": 129215, "total_steps": 204665, "loss": 0.0001, "lr": 7.198596644314137e-07, "epoch": 3.156743947426282, "percentage": 63.13, "elapsed_time": "2:47:05", "remaining_time": "1:37:34", "throughput": 8686.5, "total_tokens": 87090256} +{"current_steps": 129220, "total_steps": 204665, "loss": 0.043, "lr": 7.1977780244611e-07, "epoch": 3.156866098258129, "percentage": 63.14, "elapsed_time": "2:47:06", "remaining_time": "1:37:33", "throughput": 8686.52, "total_tokens": 87093520} +{"current_steps": 129225, "total_steps": 204665, "loss": 0.0001, "lr": 7.196959424986575e-07, "epoch": 3.1569882490899763, "percentage": 63.14, "elapsed_time": "2:47:06", "remaining_time": "1:37:33", "throughput": 8686.54, "total_tokens": 87096656} +{"current_steps": 129230, "total_steps": 204665, "loss": 0.0, "lr": 7.196140845896514e-07, "epoch": 3.1571103999218235, "percentage": 63.14, "elapsed_time": "2:47:06", "remaining_time": "1:37:33", "throughput": 8686.57, "total_tokens": 87100048} +{"current_steps": 129235, "total_steps": 204665, "loss": 0.0, "lr": 7.195322287196872e-07, "epoch": 3.1572325507536707, "percentage": 63.14, "elapsed_time": "2:47:07", "remaining_time": "1:37:32", "throughput": 8686.64, "total_tokens": 87103824} +{"current_steps": 129240, "total_steps": 204665, "loss": 0.0003, "lr": 7.194503748893601e-07, "epoch": 3.157354701585518, "percentage": 63.15, "elapsed_time": "2:47:07", "remaining_time": "1:37:32", "throughput": 8686.69, "total_tokens": 87107344} +{"current_steps": 129245, "total_steps": 204665, "loss": 0.0, "lr": 7.193685230992651e-07, "epoch": 3.157476852417365, "percentage": 63.15, "elapsed_time": "2:47:08", "remaining_time": "1:37:31", "throughput": 8686.72, "total_tokens": 87110736} +{"current_steps": 129250, "total_steps": 204665, "loss": 0.0002, "lr": 7.192866733499976e-07, "epoch": 3.1575990032492123, "percentage": 63.15, "elapsed_time": "2:47:08", "remaining_time": "1:37:31", "throughput": 8686.74, "total_tokens": 87113936} +{"current_steps": 129255, "total_steps": 204665, "loss": 0.0, "lr": 7.192048256421532e-07, "epoch": 3.1577211540810595, "percentage": 63.15, "elapsed_time": "2:47:08", "remaining_time": "1:37:30", "throughput": 8686.78, "total_tokens": 87117328} +{"current_steps": 129260, "total_steps": 204665, "loss": 0.0, "lr": 7.191229799763265e-07, "epoch": 3.1578433049129067, "percentage": 63.16, "elapsed_time": "2:47:09", "remaining_time": "1:37:30", "throughput": 8686.81, "total_tokens": 87120592} +{"current_steps": 129265, "total_steps": 204665, "loss": 0.0, "lr": 7.190411363531136e-07, "epoch": 3.157965455744754, "percentage": 63.16, "elapsed_time": "2:47:09", "remaining_time": "1:37:30", "throughput": 8686.83, "total_tokens": 87123856} +{"current_steps": 129270, "total_steps": 204665, "loss": 0.0489, "lr": 7.189592947731085e-07, "epoch": 3.1580876065766006, "percentage": 63.16, "elapsed_time": "2:47:09", "remaining_time": "1:37:29", "throughput": 8686.86, "total_tokens": 87127184} +{"current_steps": 129275, "total_steps": 204665, "loss": 0.0, "lr": 7.188774552369077e-07, "epoch": 3.158209757408448, "percentage": 63.16, "elapsed_time": "2:47:10", "remaining_time": "1:37:29", "throughput": 8686.89, "total_tokens": 87130512} +{"current_steps": 129280, "total_steps": 204665, "loss": 0.0, "lr": 7.187956177451049e-07, "epoch": 3.158331908240295, "percentage": 63.17, "elapsed_time": "2:47:10", "remaining_time": "1:37:28", "throughput": 8686.93, "total_tokens": 87133904} +{"current_steps": 129285, "total_steps": 204665, "loss": 0.0001, "lr": 7.187137822982965e-07, "epoch": 3.158454059072142, "percentage": 63.17, "elapsed_time": "2:47:10", "remaining_time": "1:37:28", "throughput": 8686.96, "total_tokens": 87137168} +{"current_steps": 129290, "total_steps": 204665, "loss": 0.0, "lr": 7.186319488970771e-07, "epoch": 3.1585762099039894, "percentage": 63.17, "elapsed_time": "2:47:11", "remaining_time": "1:37:28", "throughput": 8686.99, "total_tokens": 87140560} +{"current_steps": 129295, "total_steps": 204665, "loss": 0.0001, "lr": 7.185501175420416e-07, "epoch": 3.1586983607358365, "percentage": 63.17, "elapsed_time": "2:47:11", "remaining_time": "1:37:27", "throughput": 8687.05, "total_tokens": 87144208} +{"current_steps": 129300, "total_steps": 204665, "loss": 0.0, "lr": 7.184682882337856e-07, "epoch": 3.1588205115676837, "percentage": 63.18, "elapsed_time": "2:47:11", "remaining_time": "1:37:27", "throughput": 8687.08, "total_tokens": 87147472} +{"current_steps": 129305, "total_steps": 204665, "loss": 0.0, "lr": 7.183864609729037e-07, "epoch": 3.158942662399531, "percentage": 63.18, "elapsed_time": "2:47:12", "remaining_time": "1:37:26", "throughput": 8687.1, "total_tokens": 87150672} +{"current_steps": 129310, "total_steps": 204665, "loss": 0.0, "lr": 7.183046357599912e-07, "epoch": 3.159064813231378, "percentage": 63.18, "elapsed_time": "2:47:12", "remaining_time": "1:37:26", "throughput": 8687.11, "total_tokens": 87153744} +{"current_steps": 129315, "total_steps": 204665, "loss": 0.0291, "lr": 7.182228125956433e-07, "epoch": 3.1591869640632253, "percentage": 63.18, "elapsed_time": "2:47:12", "remaining_time": "1:37:26", "throughput": 8687.13, "total_tokens": 87156944} +{"current_steps": 129320, "total_steps": 204665, "loss": 0.0001, "lr": 7.181409914804547e-07, "epoch": 3.1593091148950725, "percentage": 63.19, "elapsed_time": "2:47:13", "remaining_time": "1:37:25", "throughput": 8687.18, "total_tokens": 87160528} +{"current_steps": 129325, "total_steps": 204665, "loss": 0.0591, "lr": 7.18059172415021e-07, "epoch": 3.1594312657269197, "percentage": 63.19, "elapsed_time": "2:47:13", "remaining_time": "1:37:25", "throughput": 8687.24, "total_tokens": 87164176} +{"current_steps": 129330, "total_steps": 204665, "loss": 0.0, "lr": 7.179773553999364e-07, "epoch": 3.159553416558767, "percentage": 63.19, "elapsed_time": "2:47:13", "remaining_time": "1:37:24", "throughput": 8687.28, "total_tokens": 87167696} +{"current_steps": 129335, "total_steps": 204665, "loss": 0.0, "lr": 7.178955404357967e-07, "epoch": 3.159675567390614, "percentage": 63.19, "elapsed_time": "2:47:14", "remaining_time": "1:37:24", "throughput": 8687.31, "total_tokens": 87170896} +{"current_steps": 129340, "total_steps": 204665, "loss": 0.0001, "lr": 7.178137275231963e-07, "epoch": 3.1597977182224612, "percentage": 63.2, "elapsed_time": "2:47:14", "remaining_time": "1:37:23", "throughput": 8687.32, "total_tokens": 87174032} +{"current_steps": 129345, "total_steps": 204665, "loss": 0.0001, "lr": 7.177319166627304e-07, "epoch": 3.1599198690543084, "percentage": 63.2, "elapsed_time": "2:47:14", "remaining_time": "1:37:23", "throughput": 8687.35, "total_tokens": 87177296} +{"current_steps": 129350, "total_steps": 204665, "loss": 0.0, "lr": 7.176501078549941e-07, "epoch": 3.1600420198861556, "percentage": 63.2, "elapsed_time": "2:47:15", "remaining_time": "1:37:23", "throughput": 8687.43, "total_tokens": 87181200} +{"current_steps": 129355, "total_steps": 204665, "loss": 0.0001, "lr": 7.175683011005818e-07, "epoch": 3.1601641707180024, "percentage": 63.2, "elapsed_time": "2:47:15", "remaining_time": "1:37:22", "throughput": 8687.47, "total_tokens": 87184656} +{"current_steps": 129360, "total_steps": 204665, "loss": 0.0, "lr": 7.174864964000893e-07, "epoch": 3.1602863215498496, "percentage": 63.21, "elapsed_time": "2:47:16", "remaining_time": "1:37:22", "throughput": 8687.48, "total_tokens": 87187728} +{"current_steps": 129365, "total_steps": 204665, "loss": 0.0443, "lr": 7.174046937541103e-07, "epoch": 3.1604084723816968, "percentage": 63.21, "elapsed_time": "2:47:16", "remaining_time": "1:37:21", "throughput": 8687.52, "total_tokens": 87191184} +{"current_steps": 129370, "total_steps": 204665, "loss": 0.0, "lr": 7.173228931632406e-07, "epoch": 3.160530623213544, "percentage": 63.21, "elapsed_time": "2:47:16", "remaining_time": "1:37:21", "throughput": 8687.52, "total_tokens": 87194128} +{"current_steps": 129375, "total_steps": 204665, "loss": 0.0, "lr": 7.172410946280752e-07, "epoch": 3.160652774045391, "percentage": 63.21, "elapsed_time": "2:47:17", "remaining_time": "1:37:21", "throughput": 8687.57, "total_tokens": 87197712} +{"current_steps": 129380, "total_steps": 204665, "loss": 0.0001, "lr": 7.171592981492085e-07, "epoch": 3.1607749248772383, "percentage": 63.22, "elapsed_time": "2:47:17", "remaining_time": "1:37:20", "throughput": 8687.6, "total_tokens": 87201040} +{"current_steps": 129385, "total_steps": 204665, "loss": 0.0, "lr": 7.170775037272353e-07, "epoch": 3.1608970757090855, "percentage": 63.22, "elapsed_time": "2:47:17", "remaining_time": "1:37:20", "throughput": 8687.64, "total_tokens": 87204432} +{"current_steps": 129390, "total_steps": 204665, "loss": 0.0626, "lr": 7.169957113627507e-07, "epoch": 3.1610192265409327, "percentage": 63.22, "elapsed_time": "2:47:18", "remaining_time": "1:37:19", "throughput": 8687.65, "total_tokens": 87207504} +{"current_steps": 129395, "total_steps": 204665, "loss": 0.0, "lr": 7.169139210563495e-07, "epoch": 3.16114137737278, "percentage": 63.22, "elapsed_time": "2:47:18", "remaining_time": "1:37:19", "throughput": 8687.7, "total_tokens": 87211024} +{"current_steps": 129400, "total_steps": 204665, "loss": 0.0269, "lr": 7.168321328086262e-07, "epoch": 3.161263528204627, "percentage": 63.23, "elapsed_time": "2:47:18", "remaining_time": "1:37:19", "throughput": 8687.72, "total_tokens": 87214288} +{"current_steps": 129405, "total_steps": 204665, "loss": 0.0003, "lr": 7.167503466201757e-07, "epoch": 3.1613856790364743, "percentage": 63.23, "elapsed_time": "2:47:19", "remaining_time": "1:37:18", "throughput": 8687.78, "total_tokens": 87218000} +{"current_steps": 129410, "total_steps": 204665, "loss": 0.0688, "lr": 7.166685624915931e-07, "epoch": 3.1615078298683215, "percentage": 63.23, "elapsed_time": "2:47:19", "remaining_time": "1:37:18", "throughput": 8687.79, "total_tokens": 87221072} +{"current_steps": 129415, "total_steps": 204665, "loss": 0.0, "lr": 7.165867804234727e-07, "epoch": 3.1616299807001687, "percentage": 63.23, "elapsed_time": "2:47:19", "remaining_time": "1:37:17", "throughput": 8687.85, "total_tokens": 87224720} +{"current_steps": 129420, "total_steps": 204665, "loss": 0.0, "lr": 7.165050004164098e-07, "epoch": 3.161752131532016, "percentage": 63.24, "elapsed_time": "2:47:20", "remaining_time": "1:37:17", "throughput": 8687.91, "total_tokens": 87228368} +{"current_steps": 129425, "total_steps": 204665, "loss": 0.0, "lr": 7.164232224709984e-07, "epoch": 3.161874282363863, "percentage": 63.24, "elapsed_time": "2:47:20", "remaining_time": "1:37:16", "throughput": 8687.94, "total_tokens": 87231696} +{"current_steps": 129430, "total_steps": 204665, "loss": 0.0489, "lr": 7.16341446587834e-07, "epoch": 3.16199643319571, "percentage": 63.24, "elapsed_time": "2:47:20", "remaining_time": "1:37:16", "throughput": 8687.97, "total_tokens": 87235024} +{"current_steps": 129435, "total_steps": 204665, "loss": 0.0, "lr": 7.162596727675105e-07, "epoch": 3.1621185840275574, "percentage": 63.24, "elapsed_time": "2:47:21", "remaining_time": "1:37:16", "throughput": 8688.03, "total_tokens": 87238800} +{"current_steps": 129440, "total_steps": 204665, "loss": 0.0642, "lr": 7.161779010106233e-07, "epoch": 3.1622407348594046, "percentage": 63.24, "elapsed_time": "2:47:21", "remaining_time": "1:37:15", "throughput": 8688.06, "total_tokens": 87242064} +{"current_steps": 129445, "total_steps": 204665, "loss": 0.0, "lr": 7.160961313177667e-07, "epoch": 3.162362885691252, "percentage": 63.25, "elapsed_time": "2:47:21", "remaining_time": "1:37:15", "throughput": 8688.05, "total_tokens": 87244944} +{"current_steps": 129450, "total_steps": 204665, "loss": 0.0, "lr": 7.16014363689535e-07, "epoch": 3.1624850365230985, "percentage": 63.25, "elapsed_time": "2:47:22", "remaining_time": "1:37:14", "throughput": 8688.09, "total_tokens": 87248400} +{"current_steps": 129455, "total_steps": 204665, "loss": 0.0, "lr": 7.159325981265238e-07, "epoch": 3.1626071873549457, "percentage": 63.25, "elapsed_time": "2:47:22", "remaining_time": "1:37:14", "throughput": 8688.14, "total_tokens": 87251920} +{"current_steps": 129460, "total_steps": 204665, "loss": 0.0001, "lr": 7.158508346293268e-07, "epoch": 3.162729338186793, "percentage": 63.25, "elapsed_time": "2:47:23", "remaining_time": "1:37:14", "throughput": 8688.22, "total_tokens": 87255824} +{"current_steps": 129465, "total_steps": 204665, "loss": 0.0477, "lr": 7.157690731985388e-07, "epoch": 3.16285148901864, "percentage": 63.26, "elapsed_time": "2:47:23", "remaining_time": "1:37:13", "throughput": 8688.27, "total_tokens": 87259408} +{"current_steps": 129470, "total_steps": 204665, "loss": 0.0637, "lr": 7.15687313834755e-07, "epoch": 3.1629736398504873, "percentage": 63.26, "elapsed_time": "2:47:23", "remaining_time": "1:37:13", "throughput": 8688.3, "total_tokens": 87262672} +{"current_steps": 129475, "total_steps": 204665, "loss": 0.0, "lr": 7.156055565385692e-07, "epoch": 3.1630957906823345, "percentage": 63.26, "elapsed_time": "2:47:24", "remaining_time": "1:37:12", "throughput": 8688.31, "total_tokens": 87265744} +{"current_steps": 129480, "total_steps": 204665, "loss": 0.0, "lr": 7.155238013105765e-07, "epoch": 3.1632179415141817, "percentage": 63.26, "elapsed_time": "2:47:24", "remaining_time": "1:37:12", "throughput": 8688.34, "total_tokens": 87269072} +{"current_steps": 129485, "total_steps": 204665, "loss": 0.0001, "lr": 7.15442048151371e-07, "epoch": 3.163340092346029, "percentage": 63.27, "elapsed_time": "2:47:24", "remaining_time": "1:37:12", "throughput": 8688.35, "total_tokens": 87272144} +{"current_steps": 129490, "total_steps": 204665, "loss": 0.0, "lr": 7.153602970615478e-07, "epoch": 3.163462243177876, "percentage": 63.27, "elapsed_time": "2:47:25", "remaining_time": "1:37:11", "throughput": 8688.43, "total_tokens": 87275984} +{"current_steps": 129495, "total_steps": 204665, "loss": 0.0, "lr": 7.152785480417009e-07, "epoch": 3.1635843940097232, "percentage": 63.27, "elapsed_time": "2:47:25", "remaining_time": "1:37:11", "throughput": 8688.46, "total_tokens": 87279312} +{"current_steps": 129500, "total_steps": 204665, "loss": 0.0718, "lr": 7.151968010924247e-07, "epoch": 3.1637065448415704, "percentage": 63.27, "elapsed_time": "2:47:25", "remaining_time": "1:37:10", "throughput": 8688.48, "total_tokens": 87282512} +{"current_steps": 129505, "total_steps": 204665, "loss": 0.0, "lr": 7.151150562143145e-07, "epoch": 3.1638286956734176, "percentage": 63.28, "elapsed_time": "2:47:26", "remaining_time": "1:37:10", "throughput": 8688.52, "total_tokens": 87285968} +{"current_steps": 129510, "total_steps": 204665, "loss": 0.0007, "lr": 7.150333134079636e-07, "epoch": 3.163950846505265, "percentage": 63.28, "elapsed_time": "2:47:26", "remaining_time": "1:37:09", "throughput": 8688.55, "total_tokens": 87289296} +{"current_steps": 129515, "total_steps": 204665, "loss": 0.0, "lr": 7.149515726739677e-07, "epoch": 3.164072997337112, "percentage": 63.28, "elapsed_time": "2:47:26", "remaining_time": "1:37:09", "throughput": 8688.59, "total_tokens": 87292752} +{"current_steps": 129520, "total_steps": 204665, "loss": 0.0, "lr": 7.148698340129202e-07, "epoch": 3.164195148168959, "percentage": 63.28, "elapsed_time": "2:47:27", "remaining_time": "1:37:09", "throughput": 8688.65, "total_tokens": 87296464} +{"current_steps": 129525, "total_steps": 204665, "loss": 0.0, "lr": 7.14788097425416e-07, "epoch": 3.1643172990008064, "percentage": 63.29, "elapsed_time": "2:47:27", "remaining_time": "1:37:08", "throughput": 8688.66, "total_tokens": 87299536} +{"current_steps": 129530, "total_steps": 204665, "loss": 0.0, "lr": 7.147063629120497e-07, "epoch": 3.1644394498326536, "percentage": 63.29, "elapsed_time": "2:47:27", "remaining_time": "1:37:08", "throughput": 8688.68, "total_tokens": 87302736} +{"current_steps": 129535, "total_steps": 204665, "loss": 0.0001, "lr": 7.146246304734155e-07, "epoch": 3.1645616006645003, "percentage": 63.29, "elapsed_time": "2:47:28", "remaining_time": "1:37:07", "throughput": 8688.69, "total_tokens": 87305744} +{"current_steps": 129540, "total_steps": 204665, "loss": 0.0, "lr": 7.145429001101077e-07, "epoch": 3.1646837514963475, "percentage": 63.29, "elapsed_time": "2:47:28", "remaining_time": "1:37:07", "throughput": 8688.7, "total_tokens": 87308752} +{"current_steps": 129545, "total_steps": 204665, "loss": 0.0, "lr": 7.144611718227206e-07, "epoch": 3.1648059023281947, "percentage": 63.3, "elapsed_time": "2:47:28", "remaining_time": "1:37:07", "throughput": 8688.73, "total_tokens": 87312144} +{"current_steps": 129550, "total_steps": 204665, "loss": 0.0, "lr": 7.143794456118488e-07, "epoch": 3.164928053160042, "percentage": 63.3, "elapsed_time": "2:47:29", "remaining_time": "1:37:06", "throughput": 8688.75, "total_tokens": 87315344} +{"current_steps": 129555, "total_steps": 204665, "loss": 0.0, "lr": 7.142977214780863e-07, "epoch": 3.165050203991889, "percentage": 63.3, "elapsed_time": "2:47:29", "remaining_time": "1:37:06", "throughput": 8688.79, "total_tokens": 87318736} +{"current_steps": 129560, "total_steps": 204665, "loss": 0.0, "lr": 7.142159994220274e-07, "epoch": 3.1651723548237363, "percentage": 63.3, "elapsed_time": "2:47:29", "remaining_time": "1:37:05", "throughput": 8688.85, "total_tokens": 87322384} +{"current_steps": 129565, "total_steps": 204665, "loss": 0.0001, "lr": 7.141342794442671e-07, "epoch": 3.1652945056555835, "percentage": 63.31, "elapsed_time": "2:47:30", "remaining_time": "1:37:05", "throughput": 8688.87, "total_tokens": 87325648} +{"current_steps": 129570, "total_steps": 204665, "loss": 0.0001, "lr": 7.140525615453989e-07, "epoch": 3.1654166564874306, "percentage": 63.31, "elapsed_time": "2:47:30", "remaining_time": "1:37:05", "throughput": 8688.95, "total_tokens": 87329488} +{"current_steps": 129575, "total_steps": 204665, "loss": 0.0, "lr": 7.139708457260176e-07, "epoch": 3.165538807319278, "percentage": 63.31, "elapsed_time": "2:47:30", "remaining_time": "1:37:04", "throughput": 8688.96, "total_tokens": 87332560} +{"current_steps": 129580, "total_steps": 204665, "loss": 0.0685, "lr": 7.138891319867171e-07, "epoch": 3.165660958151125, "percentage": 63.31, "elapsed_time": "2:47:31", "remaining_time": "1:37:04", "throughput": 8688.95, "total_tokens": 87335440} +{"current_steps": 129585, "total_steps": 204665, "loss": 0.0402, "lr": 7.138074203280919e-07, "epoch": 3.165783108982972, "percentage": 63.32, "elapsed_time": "2:47:31", "remaining_time": "1:37:03", "throughput": 8688.96, "total_tokens": 87338448} +{"current_steps": 129590, "total_steps": 204665, "loss": 0.0001, "lr": 7.137257107507359e-07, "epoch": 3.1659052598148194, "percentage": 63.32, "elapsed_time": "2:47:32", "remaining_time": "1:37:03", "throughput": 8689.07, "total_tokens": 87342736} +{"current_steps": 129595, "total_steps": 204665, "loss": 0.0, "lr": 7.136440032552437e-07, "epoch": 3.1660274106466666, "percentage": 63.32, "elapsed_time": "2:47:32", "remaining_time": "1:37:02", "throughput": 8689.08, "total_tokens": 87345872} +{"current_steps": 129600, "total_steps": 204665, "loss": 0.0, "lr": 7.135622978422096e-07, "epoch": 3.166149561478514, "percentage": 63.32, "elapsed_time": "2:47:32", "remaining_time": "1:37:02", "throughput": 8689.13, "total_tokens": 87349392} +{"current_steps": 129605, "total_steps": 204665, "loss": 0.0, "lr": 7.13480594512227e-07, "epoch": 3.166271712310361, "percentage": 63.33, "elapsed_time": "2:47:33", "remaining_time": "1:37:02", "throughput": 8689.16, "total_tokens": 87352784} +{"current_steps": 129610, "total_steps": 204665, "loss": 0.0, "lr": 7.13398893265891e-07, "epoch": 3.166393863142208, "percentage": 63.33, "elapsed_time": "2:47:33", "remaining_time": "1:37:01", "throughput": 8689.21, "total_tokens": 87356304} +{"current_steps": 129615, "total_steps": 204665, "loss": 0.0, "lr": 7.13317194103795e-07, "epoch": 3.1665160139740554, "percentage": 63.33, "elapsed_time": "2:47:33", "remaining_time": "1:37:01", "throughput": 8689.24, "total_tokens": 87359568} +{"current_steps": 129620, "total_steps": 204665, "loss": 0.0, "lr": 7.132354970265333e-07, "epoch": 3.166638164805902, "percentage": 63.33, "elapsed_time": "2:47:34", "remaining_time": "1:37:00", "throughput": 8689.31, "total_tokens": 87363472} +{"current_steps": 129625, "total_steps": 204665, "loss": 0.0, "lr": 7.131538020347007e-07, "epoch": 3.1667603156377493, "percentage": 63.34, "elapsed_time": "2:47:34", "remaining_time": "1:37:00", "throughput": 8689.33, "total_tokens": 87366608} +{"current_steps": 129630, "total_steps": 204665, "loss": 0.0543, "lr": 7.130721091288905e-07, "epoch": 3.1668824664695965, "percentage": 63.34, "elapsed_time": "2:47:34", "remaining_time": "1:37:00", "throughput": 8689.35, "total_tokens": 87369808} +{"current_steps": 129635, "total_steps": 204665, "loss": 0.0, "lr": 7.129904183096973e-07, "epoch": 3.1670046173014437, "percentage": 63.34, "elapsed_time": "2:47:35", "remaining_time": "1:36:59", "throughput": 8689.42, "total_tokens": 87373584} +{"current_steps": 129640, "total_steps": 204665, "loss": 0.0441, "lr": 7.129087295777148e-07, "epoch": 3.167126768133291, "percentage": 63.34, "elapsed_time": "2:47:35", "remaining_time": "1:36:59", "throughput": 8689.48, "total_tokens": 87377296} +{"current_steps": 129645, "total_steps": 204665, "loss": 0.1569, "lr": 7.128270429335375e-07, "epoch": 3.167248918965138, "percentage": 63.34, "elapsed_time": "2:47:35", "remaining_time": "1:36:58", "throughput": 8689.51, "total_tokens": 87380560} +{"current_steps": 129650, "total_steps": 204665, "loss": 0.0, "lr": 7.12745358377759e-07, "epoch": 3.1673710697969852, "percentage": 63.35, "elapsed_time": "2:47:36", "remaining_time": "1:36:58", "throughput": 8689.53, "total_tokens": 87383888} +{"current_steps": 129655, "total_steps": 204665, "loss": 0.0, "lr": 7.126636759109732e-07, "epoch": 3.1674932206288324, "percentage": 63.35, "elapsed_time": "2:47:36", "remaining_time": "1:36:58", "throughput": 8689.56, "total_tokens": 87387152} +{"current_steps": 129660, "total_steps": 204665, "loss": 0.0, "lr": 7.12581995533775e-07, "epoch": 3.1676153714606796, "percentage": 63.35, "elapsed_time": "2:47:36", "remaining_time": "1:36:57", "throughput": 8689.63, "total_tokens": 87390928} +{"current_steps": 129665, "total_steps": 204665, "loss": 0.0, "lr": 7.125003172467574e-07, "epoch": 3.167737522292527, "percentage": 63.35, "elapsed_time": "2:47:37", "remaining_time": "1:36:57", "throughput": 8689.67, "total_tokens": 87394384} +{"current_steps": 129670, "total_steps": 204665, "loss": 0.0001, "lr": 7.124186410505153e-07, "epoch": 3.167859673124374, "percentage": 63.36, "elapsed_time": "2:47:37", "remaining_time": "1:36:56", "throughput": 8689.69, "total_tokens": 87397584} +{"current_steps": 129675, "total_steps": 204665, "loss": 0.0455, "lr": 7.123369669456417e-07, "epoch": 3.167981823956221, "percentage": 63.36, "elapsed_time": "2:47:37", "remaining_time": "1:36:56", "throughput": 8689.72, "total_tokens": 87400976} +{"current_steps": 129680, "total_steps": 204665, "loss": 0.0002, "lr": 7.12255294932731e-07, "epoch": 3.1681039747880684, "percentage": 63.36, "elapsed_time": "2:47:38", "remaining_time": "1:36:56", "throughput": 8689.75, "total_tokens": 87404240} +{"current_steps": 129685, "total_steps": 204665, "loss": 0.0, "lr": 7.121736250123777e-07, "epoch": 3.1682261256199156, "percentage": 63.36, "elapsed_time": "2:47:38", "remaining_time": "1:36:55", "throughput": 8689.76, "total_tokens": 87407376} +{"current_steps": 129690, "total_steps": 204665, "loss": 0.0001, "lr": 7.120919571851749e-07, "epoch": 3.1683482764517628, "percentage": 63.37, "elapsed_time": "2:47:39", "remaining_time": "1:36:55", "throughput": 8689.8, "total_tokens": 87410832} +{"current_steps": 129695, "total_steps": 204665, "loss": 0.0, "lr": 7.120102914517168e-07, "epoch": 3.16847042728361, "percentage": 63.37, "elapsed_time": "2:47:39", "remaining_time": "1:36:54", "throughput": 8689.86, "total_tokens": 87414416} +{"current_steps": 129700, "total_steps": 204665, "loss": 0.0, "lr": 7.119286278125976e-07, "epoch": 3.168592578115457, "percentage": 63.37, "elapsed_time": "2:47:39", "remaining_time": "1:36:54", "throughput": 8689.87, "total_tokens": 87417488} +{"current_steps": 129705, "total_steps": 204665, "loss": 0.0001, "lr": 7.118469662684108e-07, "epoch": 3.1687147289473043, "percentage": 63.37, "elapsed_time": "2:47:40", "remaining_time": "1:36:53", "throughput": 8689.9, "total_tokens": 87420752} +{"current_steps": 129710, "total_steps": 204665, "loss": 0.0875, "lr": 7.1176530681975e-07, "epoch": 3.1688368797791515, "percentage": 63.38, "elapsed_time": "2:47:40", "remaining_time": "1:36:53", "throughput": 8689.93, "total_tokens": 87424144} +{"current_steps": 129715, "total_steps": 204665, "loss": 0.0002, "lr": 7.116836494672096e-07, "epoch": 3.1689590306109983, "percentage": 63.38, "elapsed_time": "2:47:40", "remaining_time": "1:36:53", "throughput": 8689.99, "total_tokens": 87427792} +{"current_steps": 129720, "total_steps": 204665, "loss": 0.0, "lr": 7.116019942113835e-07, "epoch": 3.1690811814428455, "percentage": 63.38, "elapsed_time": "2:47:41", "remaining_time": "1:36:52", "throughput": 8690.11, "total_tokens": 87432336} +{"current_steps": 129725, "total_steps": 204665, "loss": 0.0255, "lr": 7.115203410528649e-07, "epoch": 3.1692033322746926, "percentage": 63.38, "elapsed_time": "2:47:41", "remaining_time": "1:36:52", "throughput": 8690.13, "total_tokens": 87435536} +{"current_steps": 129730, "total_steps": 204665, "loss": 0.0, "lr": 7.114386899922483e-07, "epoch": 3.16932548310654, "percentage": 63.39, "elapsed_time": "2:47:41", "remaining_time": "1:36:51", "throughput": 8690.16, "total_tokens": 87438800} +{"current_steps": 129735, "total_steps": 204665, "loss": 0.0001, "lr": 7.113570410301268e-07, "epoch": 3.169447633938387, "percentage": 63.39, "elapsed_time": "2:47:42", "remaining_time": "1:36:51", "throughput": 8690.23, "total_tokens": 87442576} +{"current_steps": 129740, "total_steps": 204665, "loss": 0.0, "lr": 7.11275394167095e-07, "epoch": 3.169569784770234, "percentage": 63.39, "elapsed_time": "2:47:42", "remaining_time": "1:36:51", "throughput": 8690.29, "total_tokens": 87446288} +{"current_steps": 129745, "total_steps": 204665, "loss": 0.0001, "lr": 7.111937494037457e-07, "epoch": 3.1696919356020814, "percentage": 63.39, "elapsed_time": "2:47:42", "remaining_time": "1:36:50", "throughput": 8690.32, "total_tokens": 87449552} +{"current_steps": 129750, "total_steps": 204665, "loss": 0.0, "lr": 7.111121067406735e-07, "epoch": 3.1698140864339286, "percentage": 63.4, "elapsed_time": "2:47:43", "remaining_time": "1:36:50", "throughput": 8690.32, "total_tokens": 87452496} +{"current_steps": 129755, "total_steps": 204665, "loss": 0.0, "lr": 7.110304661784719e-07, "epoch": 3.169936237265776, "percentage": 63.4, "elapsed_time": "2:47:43", "remaining_time": "1:36:49", "throughput": 8690.38, "total_tokens": 87456272} +{"current_steps": 129760, "total_steps": 204665, "loss": 0.0001, "lr": 7.10948827717734e-07, "epoch": 3.170058388097623, "percentage": 63.4, "elapsed_time": "2:47:43", "remaining_time": "1:36:49", "throughput": 8690.44, "total_tokens": 87459984} +{"current_steps": 129765, "total_steps": 204665, "loss": 0.0001, "lr": 7.108671913590543e-07, "epoch": 3.17018053892947, "percentage": 63.4, "elapsed_time": "2:47:44", "remaining_time": "1:36:49", "throughput": 8690.5, "total_tokens": 87463632} +{"current_steps": 129770, "total_steps": 204665, "loss": 0.0, "lr": 7.107855571030259e-07, "epoch": 3.1703026897613173, "percentage": 63.41, "elapsed_time": "2:47:44", "remaining_time": "1:36:48", "throughput": 8690.54, "total_tokens": 87467088} +{"current_steps": 129775, "total_steps": 204665, "loss": 0.0001, "lr": 7.107039249502427e-07, "epoch": 3.1704248405931645, "percentage": 63.41, "elapsed_time": "2:47:44", "remaining_time": "1:36:48", "throughput": 8690.56, "total_tokens": 87470288} +{"current_steps": 129780, "total_steps": 204665, "loss": 0.0, "lr": 7.106222949012988e-07, "epoch": 3.1705469914250117, "percentage": 63.41, "elapsed_time": "2:47:45", "remaining_time": "1:36:47", "throughput": 8690.57, "total_tokens": 87473424} +{"current_steps": 129785, "total_steps": 204665, "loss": 0.0087, "lr": 7.105406669567869e-07, "epoch": 3.170669142256859, "percentage": 63.41, "elapsed_time": "2:47:45", "remaining_time": "1:36:47", "throughput": 8690.6, "total_tokens": 87476688} +{"current_steps": 129790, "total_steps": 204665, "loss": 0.0001, "lr": 7.104590411173014e-07, "epoch": 3.170791293088706, "percentage": 63.42, "elapsed_time": "2:47:46", "remaining_time": "1:36:47", "throughput": 8690.62, "total_tokens": 87479952} +{"current_steps": 129795, "total_steps": 204665, "loss": 0.0, "lr": 7.103774173834354e-07, "epoch": 3.1709134439205533, "percentage": 63.42, "elapsed_time": "2:47:46", "remaining_time": "1:36:46", "throughput": 8690.67, "total_tokens": 87483536} +{"current_steps": 129800, "total_steps": 204665, "loss": 0.0, "lr": 7.102957957557831e-07, "epoch": 3.1710355947524, "percentage": 63.42, "elapsed_time": "2:47:46", "remaining_time": "1:36:46", "throughput": 8690.73, "total_tokens": 87487184} +{"current_steps": 129805, "total_steps": 204665, "loss": 0.0001, "lr": 7.102141762349376e-07, "epoch": 3.1711577455842472, "percentage": 63.42, "elapsed_time": "2:47:47", "remaining_time": "1:36:45", "throughput": 8690.78, "total_tokens": 87490768} +{"current_steps": 129810, "total_steps": 204665, "loss": 0.0, "lr": 7.101325588214923e-07, "epoch": 3.1712798964160944, "percentage": 63.43, "elapsed_time": "2:47:47", "remaining_time": "1:36:45", "throughput": 8690.81, "total_tokens": 87494096} +{"current_steps": 129815, "total_steps": 204665, "loss": 0.0, "lr": 7.100509435160413e-07, "epoch": 3.1714020472479416, "percentage": 63.43, "elapsed_time": "2:47:47", "remaining_time": "1:36:44", "throughput": 8690.82, "total_tokens": 87497168} +{"current_steps": 129820, "total_steps": 204665, "loss": 0.0, "lr": 7.099693303191775e-07, "epoch": 3.171524198079789, "percentage": 63.43, "elapsed_time": "2:47:48", "remaining_time": "1:36:44", "throughput": 8690.86, "total_tokens": 87500624} +{"current_steps": 129825, "total_steps": 204665, "loss": 0.0, "lr": 7.098877192314952e-07, "epoch": 3.171646348911636, "percentage": 63.43, "elapsed_time": "2:47:48", "remaining_time": "1:36:44", "throughput": 8690.89, "total_tokens": 87503952} +{"current_steps": 129830, "total_steps": 204665, "loss": 0.0, "lr": 7.098061102535868e-07, "epoch": 3.171768499743483, "percentage": 63.44, "elapsed_time": "2:47:48", "remaining_time": "1:36:43", "throughput": 8690.93, "total_tokens": 87507408} +{"current_steps": 129835, "total_steps": 204665, "loss": 0.0501, "lr": 7.09724503386047e-07, "epoch": 3.1718906505753304, "percentage": 63.44, "elapsed_time": "2:47:49", "remaining_time": "1:36:43", "throughput": 8690.94, "total_tokens": 87510480} +{"current_steps": 129840, "total_steps": 204665, "loss": 0.0001, "lr": 7.096428986294682e-07, "epoch": 3.1720128014071776, "percentage": 63.44, "elapsed_time": "2:47:49", "remaining_time": "1:36:42", "throughput": 8690.98, "total_tokens": 87513936} +{"current_steps": 129845, "total_steps": 204665, "loss": 0.0001, "lr": 7.095612959844447e-07, "epoch": 3.1721349522390248, "percentage": 63.44, "elapsed_time": "2:47:49", "remaining_time": "1:36:42", "throughput": 8691.07, "total_tokens": 87517904} +{"current_steps": 129850, "total_steps": 204665, "loss": 0.0003, "lr": 7.094796954515695e-07, "epoch": 3.172257103070872, "percentage": 63.45, "elapsed_time": "2:47:50", "remaining_time": "1:36:42", "throughput": 8691.08, "total_tokens": 87520976} +{"current_steps": 129855, "total_steps": 204665, "loss": 0.0003, "lr": 7.093980970314361e-07, "epoch": 3.172379253902719, "percentage": 63.45, "elapsed_time": "2:47:50", "remaining_time": "1:36:41", "throughput": 8691.08, "total_tokens": 87523984} +{"current_steps": 129860, "total_steps": 204665, "loss": 0.0251, "lr": 7.09316500724638e-07, "epoch": 3.1725014047345663, "percentage": 63.45, "elapsed_time": "2:47:50", "remaining_time": "1:36:41", "throughput": 8691.1, "total_tokens": 87527120} +{"current_steps": 129865, "total_steps": 204665, "loss": 0.0688, "lr": 7.092349065317683e-07, "epoch": 3.1726235555664135, "percentage": 63.45, "elapsed_time": "2:47:51", "remaining_time": "1:36:40", "throughput": 8691.14, "total_tokens": 87530576} +{"current_steps": 129870, "total_steps": 204665, "loss": 0.0, "lr": 7.091533144534203e-07, "epoch": 3.1727457063982607, "percentage": 63.45, "elapsed_time": "2:47:51", "remaining_time": "1:36:40", "throughput": 8691.18, "total_tokens": 87533968} +{"current_steps": 129875, "total_steps": 204665, "loss": 0.0, "lr": 7.090717244901883e-07, "epoch": 3.172867857230108, "percentage": 63.46, "elapsed_time": "2:47:51", "remaining_time": "1:36:40", "throughput": 8691.24, "total_tokens": 87537744} +{"current_steps": 129880, "total_steps": 204665, "loss": 0.0, "lr": 7.089901366426642e-07, "epoch": 3.172990008061955, "percentage": 63.46, "elapsed_time": "2:47:52", "remaining_time": "1:36:39", "throughput": 8691.28, "total_tokens": 87541200} +{"current_steps": 129885, "total_steps": 204665, "loss": 0.0001, "lr": 7.089085509114428e-07, "epoch": 3.1731121588938023, "percentage": 63.46, "elapsed_time": "2:47:52", "remaining_time": "1:36:39", "throughput": 8691.29, "total_tokens": 87544272} +{"current_steps": 129890, "total_steps": 204665, "loss": 0.0, "lr": 7.088269672971164e-07, "epoch": 3.1732343097256495, "percentage": 63.46, "elapsed_time": "2:47:52", "remaining_time": "1:36:38", "throughput": 8691.29, "total_tokens": 87547216} +{"current_steps": 129895, "total_steps": 204665, "loss": 0.0, "lr": 7.087453858002787e-07, "epoch": 3.173356460557496, "percentage": 63.47, "elapsed_time": "2:47:53", "remaining_time": "1:36:38", "throughput": 8691.32, "total_tokens": 87550608} +{"current_steps": 129900, "total_steps": 204665, "loss": 0.0, "lr": 7.086638064215226e-07, "epoch": 3.1734786113893434, "percentage": 63.47, "elapsed_time": "2:47:53", "remaining_time": "1:36:37", "throughput": 8691.34, "total_tokens": 87553680} +{"current_steps": 129905, "total_steps": 204665, "loss": 0.0, "lr": 7.085822291614419e-07, "epoch": 3.1736007622211906, "percentage": 63.47, "elapsed_time": "2:47:54", "remaining_time": "1:36:37", "throughput": 8691.35, "total_tokens": 87556816} +{"current_steps": 129910, "total_steps": 204665, "loss": 0.0, "lr": 7.085006540206298e-07, "epoch": 3.1737229130530378, "percentage": 63.47, "elapsed_time": "2:47:54", "remaining_time": "1:36:37", "throughput": 8691.41, "total_tokens": 87560592} +{"current_steps": 129915, "total_steps": 204665, "loss": 0.0, "lr": 7.08419080999679e-07, "epoch": 3.173845063884885, "percentage": 63.48, "elapsed_time": "2:47:54", "remaining_time": "1:36:36", "throughput": 8691.44, "total_tokens": 87563856} +{"current_steps": 129920, "total_steps": 204665, "loss": 0.0, "lr": 7.083375100991835e-07, "epoch": 3.173967214716732, "percentage": 63.48, "elapsed_time": "2:47:55", "remaining_time": "1:36:36", "throughput": 8691.47, "total_tokens": 87567248} +{"current_steps": 129925, "total_steps": 204665, "loss": 0.0, "lr": 7.082559413197356e-07, "epoch": 3.1740893655485793, "percentage": 63.48, "elapsed_time": "2:47:55", "remaining_time": "1:36:35", "throughput": 8691.5, "total_tokens": 87570576} +{"current_steps": 129930, "total_steps": 204665, "loss": 0.0427, "lr": 7.081743746619289e-07, "epoch": 3.1742115163804265, "percentage": 63.48, "elapsed_time": "2:47:55", "remaining_time": "1:36:35", "throughput": 8691.52, "total_tokens": 87573776} +{"current_steps": 129935, "total_steps": 204665, "loss": 0.0, "lr": 7.080928101263571e-07, "epoch": 3.1743336672122737, "percentage": 63.49, "elapsed_time": "2:47:56", "remaining_time": "1:36:35", "throughput": 8691.54, "total_tokens": 87576912} +{"current_steps": 129940, "total_steps": 204665, "loss": 0.0001, "lr": 7.080112477136124e-07, "epoch": 3.174455818044121, "percentage": 63.49, "elapsed_time": "2:47:56", "remaining_time": "1:36:34", "throughput": 8691.57, "total_tokens": 87580240} +{"current_steps": 129945, "total_steps": 204665, "loss": 0.0, "lr": 7.07929687424289e-07, "epoch": 3.174577968875968, "percentage": 63.49, "elapsed_time": "2:47:56", "remaining_time": "1:36:34", "throughput": 8691.62, "total_tokens": 87583824} +{"current_steps": 129950, "total_steps": 204665, "loss": 0.0679, "lr": 7.078481292589791e-07, "epoch": 3.1747001197078153, "percentage": 63.49, "elapsed_time": "2:47:57", "remaining_time": "1:36:33", "throughput": 8691.66, "total_tokens": 87587280} +{"current_steps": 129955, "total_steps": 204665, "loss": 0.0637, "lr": 7.077665732182765e-07, "epoch": 3.1748222705396625, "percentage": 63.5, "elapsed_time": "2:47:57", "remaining_time": "1:36:33", "throughput": 8691.68, "total_tokens": 87590480} +{"current_steps": 129960, "total_steps": 204665, "loss": 0.0388, "lr": 7.076850193027737e-07, "epoch": 3.1749444213715097, "percentage": 63.5, "elapsed_time": "2:47:57", "remaining_time": "1:36:33", "throughput": 8691.72, "total_tokens": 87593936} +{"current_steps": 129965, "total_steps": 204665, "loss": 0.0001, "lr": 7.07603467513064e-07, "epoch": 3.175066572203357, "percentage": 63.5, "elapsed_time": "2:47:58", "remaining_time": "1:36:32", "throughput": 8691.76, "total_tokens": 87597392} +{"current_steps": 129970, "total_steps": 204665, "loss": 0.0, "lr": 7.075219178497409e-07, "epoch": 3.175188723035204, "percentage": 63.5, "elapsed_time": "2:47:58", "remaining_time": "1:36:32", "throughput": 8691.88, "total_tokens": 87601872} +{"current_steps": 129975, "total_steps": 204665, "loss": 0.0001, "lr": 7.074403703133967e-07, "epoch": 3.1753108738670512, "percentage": 63.51, "elapsed_time": "2:47:58", "remaining_time": "1:36:31", "throughput": 8691.91, "total_tokens": 87605264} +{"current_steps": 129980, "total_steps": 204665, "loss": 0.0001, "lr": 7.073588249046252e-07, "epoch": 3.175433024698898, "percentage": 63.51, "elapsed_time": "2:47:59", "remaining_time": "1:36:31", "throughput": 8691.99, "total_tokens": 87609168} +{"current_steps": 129985, "total_steps": 204665, "loss": 0.0001, "lr": 7.072772816240184e-07, "epoch": 3.175555175530745, "percentage": 63.51, "elapsed_time": "2:47:59", "remaining_time": "1:36:31", "throughput": 8692.01, "total_tokens": 87612304} +{"current_steps": 129990, "total_steps": 204665, "loss": 0.0001, "lr": 7.071957404721707e-07, "epoch": 3.1756773263625924, "percentage": 63.51, "elapsed_time": "2:47:59", "remaining_time": "1:36:30", "throughput": 8692.01, "total_tokens": 87615312} +{"current_steps": 129995, "total_steps": 204665, "loss": 0.0001, "lr": 7.071142014496737e-07, "epoch": 3.1757994771944396, "percentage": 63.52, "elapsed_time": "2:48:00", "remaining_time": "1:36:30", "throughput": 8692.07, "total_tokens": 87618960} +{"current_steps": 130000, "total_steps": 204665, "loss": 0.0003, "lr": 7.070326645571213e-07, "epoch": 3.1759216280262867, "percentage": 63.52, "elapsed_time": "2:48:00", "remaining_time": "1:36:29", "throughput": 8692.09, "total_tokens": 87622096} +{"current_steps": 130005, "total_steps": 204665, "loss": 0.062, "lr": 7.06951129795106e-07, "epoch": 3.176043778858134, "percentage": 63.52, "elapsed_time": "2:48:01", "remaining_time": "1:36:29", "throughput": 8692.09, "total_tokens": 87625040} +{"current_steps": 130010, "total_steps": 204665, "loss": 0.0, "lr": 7.068695971642212e-07, "epoch": 3.176165929689981, "percentage": 63.52, "elapsed_time": "2:48:01", "remaining_time": "1:36:28", "throughput": 8692.14, "total_tokens": 87628624} +{"current_steps": 130015, "total_steps": 204665, "loss": 0.0835, "lr": 7.067880666650594e-07, "epoch": 3.1762880805218283, "percentage": 63.53, "elapsed_time": "2:48:01", "remaining_time": "1:36:28", "throughput": 8692.17, "total_tokens": 87631888} +{"current_steps": 130020, "total_steps": 204665, "loss": 0.0, "lr": 7.067065382982136e-07, "epoch": 3.1764102313536755, "percentage": 63.53, "elapsed_time": "2:48:02", "remaining_time": "1:36:28", "throughput": 8692.22, "total_tokens": 87635472} +{"current_steps": 130025, "total_steps": 204665, "loss": 0.0, "lr": 7.066250120642765e-07, "epoch": 3.1765323821855227, "percentage": 63.53, "elapsed_time": "2:48:02", "remaining_time": "1:36:27", "throughput": 8692.26, "total_tokens": 87638992} +{"current_steps": 130030, "total_steps": 204665, "loss": 0.0001, "lr": 7.065434879638417e-07, "epoch": 3.17665453301737, "percentage": 63.53, "elapsed_time": "2:48:02", "remaining_time": "1:36:27", "throughput": 8692.29, "total_tokens": 87642192} +{"current_steps": 130035, "total_steps": 204665, "loss": 0.0, "lr": 7.064619659975012e-07, "epoch": 3.176776683849217, "percentage": 63.54, "elapsed_time": "2:48:03", "remaining_time": "1:36:26", "throughput": 8692.31, "total_tokens": 87645392} +{"current_steps": 130040, "total_steps": 204665, "loss": 0.0, "lr": 7.063804461658486e-07, "epoch": 3.1768988346810643, "percentage": 63.54, "elapsed_time": "2:48:03", "remaining_time": "1:36:26", "throughput": 8692.35, "total_tokens": 87648912} +{"current_steps": 130045, "total_steps": 204665, "loss": 0.0001, "lr": 7.06298928469476e-07, "epoch": 3.1770209855129115, "percentage": 63.54, "elapsed_time": "2:48:03", "remaining_time": "1:36:26", "throughput": 8692.41, "total_tokens": 87652560} +{"current_steps": 130050, "total_steps": 204665, "loss": 0.0001, "lr": 7.06217412908977e-07, "epoch": 3.1771431363447586, "percentage": 63.54, "elapsed_time": "2:48:04", "remaining_time": "1:36:25", "throughput": 8692.46, "total_tokens": 87656144} +{"current_steps": 130055, "total_steps": 204665, "loss": 0.0, "lr": 7.061358994849434e-07, "epoch": 3.177265287176606, "percentage": 63.55, "elapsed_time": "2:48:04", "remaining_time": "1:36:25", "throughput": 8692.48, "total_tokens": 87659344} +{"current_steps": 130060, "total_steps": 204665, "loss": 0.0, "lr": 7.06054388197969e-07, "epoch": 3.177387438008453, "percentage": 63.55, "elapsed_time": "2:48:04", "remaining_time": "1:36:24", "throughput": 8692.52, "total_tokens": 87662800} +{"current_steps": 130065, "total_steps": 204665, "loss": 0.0162, "lr": 7.059728790486463e-07, "epoch": 3.1775095888402998, "percentage": 63.55, "elapsed_time": "2:48:05", "remaining_time": "1:36:24", "throughput": 8692.55, "total_tokens": 87666128} +{"current_steps": 130070, "total_steps": 204665, "loss": 0.0002, "lr": 7.058913720375674e-07, "epoch": 3.177631739672147, "percentage": 63.55, "elapsed_time": "2:48:05", "remaining_time": "1:36:24", "throughput": 8692.55, "total_tokens": 87669072} +{"current_steps": 130075, "total_steps": 204665, "loss": 0.0, "lr": 7.058098671653261e-07, "epoch": 3.177753890503994, "percentage": 63.56, "elapsed_time": "2:48:05", "remaining_time": "1:36:23", "throughput": 8692.56, "total_tokens": 87672208} +{"current_steps": 130080, "total_steps": 204665, "loss": 0.0001, "lr": 7.057283644325141e-07, "epoch": 3.1778760413358413, "percentage": 63.56, "elapsed_time": "2:48:06", "remaining_time": "1:36:23", "throughput": 8692.64, "total_tokens": 87676112} +{"current_steps": 130085, "total_steps": 204665, "loss": 0.0, "lr": 7.056468638397246e-07, "epoch": 3.1779981921676885, "percentage": 63.56, "elapsed_time": "2:48:06", "remaining_time": "1:36:22", "throughput": 8692.67, "total_tokens": 87679376} +{"current_steps": 130090, "total_steps": 204665, "loss": 0.0655, "lr": 7.055653653875507e-07, "epoch": 3.1781203429995357, "percentage": 63.56, "elapsed_time": "2:48:06", "remaining_time": "1:36:22", "throughput": 8692.69, "total_tokens": 87682576} +{"current_steps": 130095, "total_steps": 204665, "loss": 0.0, "lr": 7.054838690765843e-07, "epoch": 3.178242493831383, "percentage": 63.56, "elapsed_time": "2:48:07", "remaining_time": "1:36:21", "throughput": 8692.72, "total_tokens": 87685968} +{"current_steps": 130100, "total_steps": 204665, "loss": 0.0, "lr": 7.054023749074188e-07, "epoch": 3.17836464466323, "percentage": 63.57, "elapsed_time": "2:48:07", "remaining_time": "1:36:21", "throughput": 8692.74, "total_tokens": 87689168} +{"current_steps": 130105, "total_steps": 204665, "loss": 0.0, "lr": 7.053208828806459e-07, "epoch": 3.1784867954950773, "percentage": 63.57, "elapsed_time": "2:48:07", "remaining_time": "1:36:21", "throughput": 8692.79, "total_tokens": 87692752} +{"current_steps": 130110, "total_steps": 204665, "loss": 0.0007, "lr": 7.052393929968593e-07, "epoch": 3.1786089463269245, "percentage": 63.57, "elapsed_time": "2:48:08", "remaining_time": "1:36:20", "throughput": 8692.79, "total_tokens": 87695632} +{"current_steps": 130115, "total_steps": 204665, "loss": 0.0002, "lr": 7.05157905256651e-07, "epoch": 3.1787310971587717, "percentage": 63.57, "elapsed_time": "2:48:08", "remaining_time": "1:36:20", "throughput": 8692.8, "total_tokens": 87698768} +{"current_steps": 130120, "total_steps": 204665, "loss": 0.0, "lr": 7.050764196606134e-07, "epoch": 3.178853247990619, "percentage": 63.58, "elapsed_time": "2:48:09", "remaining_time": "1:36:19", "throughput": 8692.85, "total_tokens": 87702288} +{"current_steps": 130125, "total_steps": 204665, "loss": 0.0, "lr": 7.049949362093399e-07, "epoch": 3.178975398822466, "percentage": 63.58, "elapsed_time": "2:48:09", "remaining_time": "1:36:19", "throughput": 8692.85, "total_tokens": 87705232} +{"current_steps": 130130, "total_steps": 204665, "loss": 0.0511, "lr": 7.049134549034222e-07, "epoch": 3.1790975496543132, "percentage": 63.58, "elapsed_time": "2:48:09", "remaining_time": "1:36:19", "throughput": 8692.93, "total_tokens": 87709136} +{"current_steps": 130135, "total_steps": 204665, "loss": 0.0001, "lr": 7.048319757434535e-07, "epoch": 3.1792197004861604, "percentage": 63.58, "elapsed_time": "2:48:10", "remaining_time": "1:36:18", "throughput": 8692.95, "total_tokens": 87712400} +{"current_steps": 130140, "total_steps": 204665, "loss": 0.0, "lr": 7.047504987300256e-07, "epoch": 3.1793418513180076, "percentage": 63.59, "elapsed_time": "2:48:10", "remaining_time": "1:36:18", "throughput": 8692.97, "total_tokens": 87715600} +{"current_steps": 130145, "total_steps": 204665, "loss": 0.0, "lr": 7.046690238637321e-07, "epoch": 3.179464002149855, "percentage": 63.59, "elapsed_time": "2:48:10", "remaining_time": "1:36:17", "throughput": 8693.02, "total_tokens": 87719184} +{"current_steps": 130150, "total_steps": 204665, "loss": 0.0, "lr": 7.045875511451642e-07, "epoch": 3.179586152981702, "percentage": 63.59, "elapsed_time": "2:48:11", "remaining_time": "1:36:17", "throughput": 8693.05, "total_tokens": 87722512} +{"current_steps": 130155, "total_steps": 204665, "loss": 0.0, "lr": 7.045060805749156e-07, "epoch": 3.179708303813549, "percentage": 63.59, "elapsed_time": "2:48:11", "remaining_time": "1:36:17", "throughput": 8693.07, "total_tokens": 87725648} +{"current_steps": 130160, "total_steps": 204665, "loss": 0.0613, "lr": 7.044246121535781e-07, "epoch": 3.179830454645396, "percentage": 63.6, "elapsed_time": "2:48:11", "remaining_time": "1:36:16", "throughput": 8693.09, "total_tokens": 87728848} +{"current_steps": 130165, "total_steps": 204665, "loss": 0.0001, "lr": 7.043431458817444e-07, "epoch": 3.179952605477243, "percentage": 63.6, "elapsed_time": "2:48:12", "remaining_time": "1:36:16", "throughput": 8693.09, "total_tokens": 87731920} +{"current_steps": 130170, "total_steps": 204665, "loss": 0.0, "lr": 7.042616817600067e-07, "epoch": 3.1800747563090903, "percentage": 63.6, "elapsed_time": "2:48:12", "remaining_time": "1:36:15", "throughput": 8693.12, "total_tokens": 87735184} +{"current_steps": 130175, "total_steps": 204665, "loss": 0.0366, "lr": 7.041802197889577e-07, "epoch": 3.1801969071409375, "percentage": 63.6, "elapsed_time": "2:48:12", "remaining_time": "1:36:15", "throughput": 8693.17, "total_tokens": 87738704} +{"current_steps": 130180, "total_steps": 204665, "loss": 0.0402, "lr": 7.040987599691895e-07, "epoch": 3.1803190579727847, "percentage": 63.61, "elapsed_time": "2:48:13", "remaining_time": "1:36:15", "throughput": 8693.18, "total_tokens": 87741840} +{"current_steps": 130185, "total_steps": 204665, "loss": 0.0, "lr": 7.040173023012952e-07, "epoch": 3.180441208804632, "percentage": 63.61, "elapsed_time": "2:48:13", "remaining_time": "1:36:14", "throughput": 8693.2, "total_tokens": 87745104} +{"current_steps": 130190, "total_steps": 204665, "loss": 0.0001, "lr": 7.039358467858662e-07, "epoch": 3.180563359636479, "percentage": 63.61, "elapsed_time": "2:48:13", "remaining_time": "1:36:14", "throughput": 8693.28, "total_tokens": 87749008} +{"current_steps": 130195, "total_steps": 204665, "loss": 0.0, "lr": 7.038543934234957e-07, "epoch": 3.1806855104683263, "percentage": 63.61, "elapsed_time": "2:48:14", "remaining_time": "1:36:13", "throughput": 8693.35, "total_tokens": 87752848} +{"current_steps": 130200, "total_steps": 204665, "loss": 0.0, "lr": 7.037729422147754e-07, "epoch": 3.1808076613001735, "percentage": 63.62, "elapsed_time": "2:48:14", "remaining_time": "1:36:13", "throughput": 8693.37, "total_tokens": 87756048} +{"current_steps": 130205, "total_steps": 204665, "loss": 0.0, "lr": 7.036914931602984e-07, "epoch": 3.1809298121320206, "percentage": 63.62, "elapsed_time": "2:48:14", "remaining_time": "1:36:12", "throughput": 8693.41, "total_tokens": 87759504} +{"current_steps": 130210, "total_steps": 204665, "loss": 0.0003, "lr": 7.03610046260656e-07, "epoch": 3.181051962963868, "percentage": 63.62, "elapsed_time": "2:48:15", "remaining_time": "1:36:12", "throughput": 8693.42, "total_tokens": 87762576} +{"current_steps": 130215, "total_steps": 204665, "loss": 0.0, "lr": 7.035286015164413e-07, "epoch": 3.181174113795715, "percentage": 63.62, "elapsed_time": "2:48:15", "remaining_time": "1:36:12", "throughput": 8693.43, "total_tokens": 87765712} +{"current_steps": 130220, "total_steps": 204665, "loss": 0.0003, "lr": 7.034471589282467e-07, "epoch": 3.181296264627562, "percentage": 63.63, "elapsed_time": "2:48:15", "remaining_time": "1:36:11", "throughput": 8693.47, "total_tokens": 87769168} +{"current_steps": 130225, "total_steps": 204665, "loss": 0.0399, "lr": 7.033657184966634e-07, "epoch": 3.1814184154594094, "percentage": 63.63, "elapsed_time": "2:48:16", "remaining_time": "1:36:11", "throughput": 8693.51, "total_tokens": 87772624} +{"current_steps": 130230, "total_steps": 204665, "loss": 0.0, "lr": 7.032842802222851e-07, "epoch": 3.1815405662912566, "percentage": 63.63, "elapsed_time": "2:48:16", "remaining_time": "1:36:10", "throughput": 8693.55, "total_tokens": 87776080} +{"current_steps": 130235, "total_steps": 204665, "loss": 0.0672, "lr": 7.032028441057028e-07, "epoch": 3.181662717123104, "percentage": 63.63, "elapsed_time": "2:48:17", "remaining_time": "1:36:10", "throughput": 8693.59, "total_tokens": 87779472} +{"current_steps": 130240, "total_steps": 204665, "loss": 0.0, "lr": 7.031214101475092e-07, "epoch": 3.181784867954951, "percentage": 63.64, "elapsed_time": "2:48:17", "remaining_time": "1:36:10", "throughput": 8693.62, "total_tokens": 87782800} +{"current_steps": 130245, "total_steps": 204665, "loss": 0.0, "lr": 7.030399783482971e-07, "epoch": 3.1819070187867977, "percentage": 63.64, "elapsed_time": "2:48:17", "remaining_time": "1:36:09", "throughput": 8693.64, "total_tokens": 87786064} +{"current_steps": 130250, "total_steps": 204665, "loss": 0.0, "lr": 7.029585487086576e-07, "epoch": 3.182029169618645, "percentage": 63.64, "elapsed_time": "2:48:18", "remaining_time": "1:36:09", "throughput": 8693.65, "total_tokens": 87789072} +{"current_steps": 130255, "total_steps": 204665, "loss": 0.0, "lr": 7.028771212291839e-07, "epoch": 3.182151320450492, "percentage": 63.64, "elapsed_time": "2:48:18", "remaining_time": "1:36:08", "throughput": 8693.67, "total_tokens": 87792272} +{"current_steps": 130260, "total_steps": 204665, "loss": 0.0002, "lr": 7.027956959104673e-07, "epoch": 3.1822734712823393, "percentage": 63.65, "elapsed_time": "2:48:18", "remaining_time": "1:36:08", "throughput": 8693.69, "total_tokens": 87795472} +{"current_steps": 130265, "total_steps": 204665, "loss": 0.0002, "lr": 7.027142727531008e-07, "epoch": 3.1823956221141865, "percentage": 63.65, "elapsed_time": "2:48:19", "remaining_time": "1:36:08", "throughput": 8693.72, "total_tokens": 87798800} +{"current_steps": 130270, "total_steps": 204665, "loss": 0.0, "lr": 7.026328517576757e-07, "epoch": 3.1825177729460337, "percentage": 63.65, "elapsed_time": "2:48:19", "remaining_time": "1:36:07", "throughput": 8693.72, "total_tokens": 87801872} +{"current_steps": 130275, "total_steps": 204665, "loss": 0.0004, "lr": 7.025514329247844e-07, "epoch": 3.182639923777881, "percentage": 63.65, "elapsed_time": "2:48:19", "remaining_time": "1:36:07", "throughput": 8693.73, "total_tokens": 87804880} +{"current_steps": 130280, "total_steps": 204665, "loss": 0.0002, "lr": 7.024700162550194e-07, "epoch": 3.182762074609728, "percentage": 63.66, "elapsed_time": "2:48:20", "remaining_time": "1:36:06", "throughput": 8693.76, "total_tokens": 87808272} +{"current_steps": 130285, "total_steps": 204665, "loss": 0.0, "lr": 7.023886017489721e-07, "epoch": 3.1828842254415752, "percentage": 63.66, "elapsed_time": "2:48:20", "remaining_time": "1:36:06", "throughput": 8693.82, "total_tokens": 87811856} +{"current_steps": 130290, "total_steps": 204665, "loss": 0.0317, "lr": 7.023071894072354e-07, "epoch": 3.1830063762734224, "percentage": 63.66, "elapsed_time": "2:48:20", "remaining_time": "1:36:05", "throughput": 8693.84, "total_tokens": 87815120} +{"current_steps": 130295, "total_steps": 204665, "loss": 0.0, "lr": 7.022257792304005e-07, "epoch": 3.1831285271052696, "percentage": 63.66, "elapsed_time": "2:48:21", "remaining_time": "1:36:05", "throughput": 8693.9, "total_tokens": 87818832} +{"current_steps": 130300, "total_steps": 204665, "loss": 0.0005, "lr": 7.021443712190601e-07, "epoch": 3.183250677937117, "percentage": 63.67, "elapsed_time": "2:48:21", "remaining_time": "1:36:05", "throughput": 8693.96, "total_tokens": 87822544} +{"current_steps": 130305, "total_steps": 204665, "loss": 0.0, "lr": 7.020629653738056e-07, "epoch": 3.183372828768964, "percentage": 63.67, "elapsed_time": "2:48:21", "remaining_time": "1:36:04", "throughput": 8694.01, "total_tokens": 87826064} +{"current_steps": 130310, "total_steps": 204665, "loss": 0.0, "lr": 7.019815616952295e-07, "epoch": 3.183494979600811, "percentage": 63.67, "elapsed_time": "2:48:22", "remaining_time": "1:36:04", "throughput": 8694.05, "total_tokens": 87829520} +{"current_steps": 130315, "total_steps": 204665, "loss": 0.0, "lr": 7.019001601839238e-07, "epoch": 3.1836171304326584, "percentage": 63.67, "elapsed_time": "2:48:22", "remaining_time": "1:36:03", "throughput": 8694.09, "total_tokens": 87832976} +{"current_steps": 130320, "total_steps": 204665, "loss": 0.0674, "lr": 7.018187608404802e-07, "epoch": 3.1837392812645056, "percentage": 63.67, "elapsed_time": "2:48:22", "remaining_time": "1:36:03", "throughput": 8694.11, "total_tokens": 87836112} +{"current_steps": 130325, "total_steps": 204665, "loss": 0.0703, "lr": 7.017373636654908e-07, "epoch": 3.1838614320963528, "percentage": 63.68, "elapsed_time": "2:48:23", "remaining_time": "1:36:03", "throughput": 8694.11, "total_tokens": 87839056} +{"current_steps": 130330, "total_steps": 204665, "loss": 0.0, "lr": 7.016559686595475e-07, "epoch": 3.1839835829282, "percentage": 63.68, "elapsed_time": "2:48:23", "remaining_time": "1:36:02", "throughput": 8694.14, "total_tokens": 87842320} +{"current_steps": 130335, "total_steps": 204665, "loss": 0.0, "lr": 7.015745758232421e-07, "epoch": 3.184105733760047, "percentage": 63.68, "elapsed_time": "2:48:23", "remaining_time": "1:36:02", "throughput": 8694.18, "total_tokens": 87845776} +{"current_steps": 130340, "total_steps": 204665, "loss": 0.0001, "lr": 7.01493185157167e-07, "epoch": 3.184227884591894, "percentage": 63.68, "elapsed_time": "2:48:24", "remaining_time": "1:36:01", "throughput": 8694.19, "total_tokens": 87848848} +{"current_steps": 130345, "total_steps": 204665, "loss": 0.0, "lr": 7.014117966619133e-07, "epoch": 3.184350035423741, "percentage": 63.69, "elapsed_time": "2:48:24", "remaining_time": "1:36:01", "throughput": 8694.27, "total_tokens": 87852880} +{"current_steps": 130350, "total_steps": 204665, "loss": 0.0, "lr": 7.013304103380738e-07, "epoch": 3.1844721862555883, "percentage": 63.69, "elapsed_time": "2:48:25", "remaining_time": "1:36:01", "throughput": 8694.28, "total_tokens": 87855952} +{"current_steps": 130355, "total_steps": 204665, "loss": 0.0001, "lr": 7.012490261862394e-07, "epoch": 3.1845943370874354, "percentage": 63.69, "elapsed_time": "2:48:25", "remaining_time": "1:36:00", "throughput": 8694.32, "total_tokens": 87859472} +{"current_steps": 130360, "total_steps": 204665, "loss": 0.031, "lr": 7.011676442070029e-07, "epoch": 3.1847164879192826, "percentage": 63.69, "elapsed_time": "2:48:25", "remaining_time": "1:36:00", "throughput": 8694.34, "total_tokens": 87862608} +{"current_steps": 130365, "total_steps": 204665, "loss": 0.0, "lr": 7.010862644009553e-07, "epoch": 3.18483863875113, "percentage": 63.7, "elapsed_time": "2:48:26", "remaining_time": "1:35:59", "throughput": 8694.4, "total_tokens": 87866320} +{"current_steps": 130370, "total_steps": 204665, "loss": 0.0001, "lr": 7.010048867686889e-07, "epoch": 3.184960789582977, "percentage": 63.7, "elapsed_time": "2:48:26", "remaining_time": "1:35:59", "throughput": 8694.44, "total_tokens": 87869712} +{"current_steps": 130375, "total_steps": 204665, "loss": 0.0, "lr": 7.009235113107956e-07, "epoch": 3.185082940414824, "percentage": 63.7, "elapsed_time": "2:48:26", "remaining_time": "1:35:59", "throughput": 8694.5, "total_tokens": 87873424} +{"current_steps": 130380, "total_steps": 204665, "loss": 0.0311, "lr": 7.008421380278666e-07, "epoch": 3.1852050912466714, "percentage": 63.7, "elapsed_time": "2:48:27", "remaining_time": "1:35:58", "throughput": 8694.51, "total_tokens": 87876496} +{"current_steps": 130385, "total_steps": 204665, "loss": 0.0317, "lr": 7.007607669204944e-07, "epoch": 3.1853272420785186, "percentage": 63.71, "elapsed_time": "2:48:27", "remaining_time": "1:35:58", "throughput": 8694.54, "total_tokens": 87879824} +{"current_steps": 130390, "total_steps": 204665, "loss": 0.0418, "lr": 7.006793979892702e-07, "epoch": 3.1854493929103658, "percentage": 63.71, "elapsed_time": "2:48:27", "remaining_time": "1:35:57", "throughput": 8694.57, "total_tokens": 87883152} +{"current_steps": 130395, "total_steps": 204665, "loss": 0.0, "lr": 7.005980312347856e-07, "epoch": 3.185571543742213, "percentage": 63.71, "elapsed_time": "2:48:28", "remaining_time": "1:35:57", "throughput": 8694.62, "total_tokens": 87886736} +{"current_steps": 130400, "total_steps": 204665, "loss": 0.033, "lr": 7.005166666576333e-07, "epoch": 3.18569369457406, "percentage": 63.71, "elapsed_time": "2:48:28", "remaining_time": "1:35:56", "throughput": 8694.67, "total_tokens": 87890320} +{"current_steps": 130405, "total_steps": 204665, "loss": 0.0002, "lr": 7.004353042584038e-07, "epoch": 3.1858158454059073, "percentage": 63.72, "elapsed_time": "2:48:28", "remaining_time": "1:35:56", "throughput": 8694.76, "total_tokens": 87894288} +{"current_steps": 130410, "total_steps": 204665, "loss": 0.0, "lr": 7.003539440376898e-07, "epoch": 3.1859379962377545, "percentage": 63.72, "elapsed_time": "2:48:29", "remaining_time": "1:35:56", "throughput": 8694.81, "total_tokens": 87897872} +{"current_steps": 130415, "total_steps": 204665, "loss": 0.0, "lr": 7.002725859960821e-07, "epoch": 3.1860601470696017, "percentage": 63.72, "elapsed_time": "2:48:29", "remaining_time": "1:35:55", "throughput": 8694.92, "total_tokens": 87902224} +{"current_steps": 130420, "total_steps": 204665, "loss": 0.0, "lr": 7.001912301341732e-07, "epoch": 3.186182297901449, "percentage": 63.72, "elapsed_time": "2:48:29", "remaining_time": "1:35:55", "throughput": 8694.97, "total_tokens": 87905872} +{"current_steps": 130425, "total_steps": 204665, "loss": 0.0001, "lr": 7.001098764525542e-07, "epoch": 3.1863044487332957, "percentage": 63.73, "elapsed_time": "2:48:30", "remaining_time": "1:35:54", "throughput": 8695.0, "total_tokens": 87909072} +{"current_steps": 130430, "total_steps": 204665, "loss": 0.0, "lr": 7.000285249518164e-07, "epoch": 3.186426599565143, "percentage": 63.73, "elapsed_time": "2:48:30", "remaining_time": "1:35:54", "throughput": 8695.02, "total_tokens": 87912336} +{"current_steps": 130435, "total_steps": 204665, "loss": 0.0, "lr": 6.999471756325523e-07, "epoch": 3.18654875039699, "percentage": 63.73, "elapsed_time": "2:48:31", "remaining_time": "1:35:54", "throughput": 8695.06, "total_tokens": 87915792} +{"current_steps": 130440, "total_steps": 204665, "loss": 0.0001, "lr": 6.998658284953528e-07, "epoch": 3.1866709012288372, "percentage": 63.73, "elapsed_time": "2:48:31", "remaining_time": "1:35:53", "throughput": 8695.09, "total_tokens": 87919056} +{"current_steps": 130445, "total_steps": 204665, "loss": 0.0572, "lr": 6.9978448354081e-07, "epoch": 3.1867930520606844, "percentage": 63.74, "elapsed_time": "2:48:31", "remaining_time": "1:35:53", "throughput": 8695.12, "total_tokens": 87922448} +{"current_steps": 130450, "total_steps": 204665, "loss": 0.0, "lr": 6.997031407695148e-07, "epoch": 3.1869152028925316, "percentage": 63.74, "elapsed_time": "2:48:32", "remaining_time": "1:35:52", "throughput": 8695.17, "total_tokens": 87926032} +{"current_steps": 130455, "total_steps": 204665, "loss": 0.0555, "lr": 6.996218001820596e-07, "epoch": 3.187037353724379, "percentage": 63.74, "elapsed_time": "2:48:32", "remaining_time": "1:35:52", "throughput": 8695.2, "total_tokens": 87929296} +{"current_steps": 130460, "total_steps": 204665, "loss": 0.0, "lr": 6.99540461779035e-07, "epoch": 3.187159504556226, "percentage": 63.74, "elapsed_time": "2:48:32", "remaining_time": "1:35:52", "throughput": 8695.25, "total_tokens": 87932944} +{"current_steps": 130465, "total_steps": 204665, "loss": 0.0, "lr": 6.994591255610331e-07, "epoch": 3.187281655388073, "percentage": 63.75, "elapsed_time": "2:48:33", "remaining_time": "1:35:51", "throughput": 8695.26, "total_tokens": 87936016} +{"current_steps": 130470, "total_steps": 204665, "loss": 0.0, "lr": 6.993777915286455e-07, "epoch": 3.1874038062199204, "percentage": 63.75, "elapsed_time": "2:48:33", "remaining_time": "1:35:51", "throughput": 8695.31, "total_tokens": 87939600} +{"current_steps": 130475, "total_steps": 204665, "loss": 0.0, "lr": 6.992964596824633e-07, "epoch": 3.1875259570517676, "percentage": 63.75, "elapsed_time": "2:48:33", "remaining_time": "1:35:50", "throughput": 8695.32, "total_tokens": 87942608} +{"current_steps": 130480, "total_steps": 204665, "loss": 0.0002, "lr": 6.992151300230782e-07, "epoch": 3.1876481078836147, "percentage": 63.75, "elapsed_time": "2:48:34", "remaining_time": "1:35:50", "throughput": 8695.35, "total_tokens": 87946000} +{"current_steps": 130485, "total_steps": 204665, "loss": 0.0, "lr": 6.991338025510816e-07, "epoch": 3.187770258715462, "percentage": 63.76, "elapsed_time": "2:48:34", "remaining_time": "1:35:50", "throughput": 8695.4, "total_tokens": 87949520} +{"current_steps": 130490, "total_steps": 204665, "loss": 0.0, "lr": 6.990524772670645e-07, "epoch": 3.187892409547309, "percentage": 63.76, "elapsed_time": "2:48:34", "remaining_time": "1:35:49", "throughput": 8695.45, "total_tokens": 87953104} +{"current_steps": 130495, "total_steps": 204665, "loss": 0.0, "lr": 6.989711541716192e-07, "epoch": 3.1880145603791563, "percentage": 63.76, "elapsed_time": "2:48:35", "remaining_time": "1:35:49", "throughput": 8695.49, "total_tokens": 87956624} +{"current_steps": 130500, "total_steps": 204665, "loss": 0.0, "lr": 6.988898332653363e-07, "epoch": 3.1881367112110035, "percentage": 63.76, "elapsed_time": "2:48:35", "remaining_time": "1:35:48", "throughput": 8695.52, "total_tokens": 87960016} +{"current_steps": 130505, "total_steps": 204665, "loss": 0.0, "lr": 6.988085145488081e-07, "epoch": 3.1882588620428507, "percentage": 63.77, "elapsed_time": "2:48:35", "remaining_time": "1:35:48", "throughput": 8695.54, "total_tokens": 87963152} +{"current_steps": 130510, "total_steps": 204665, "loss": 0.0, "lr": 6.987271980226247e-07, "epoch": 3.188381012874698, "percentage": 63.77, "elapsed_time": "2:48:36", "remaining_time": "1:35:47", "throughput": 8695.58, "total_tokens": 87966544} +{"current_steps": 130515, "total_steps": 204665, "loss": 0.0, "lr": 6.986458836873787e-07, "epoch": 3.188503163706545, "percentage": 63.77, "elapsed_time": "2:48:36", "remaining_time": "1:35:47", "throughput": 8695.59, "total_tokens": 87969616} +{"current_steps": 130520, "total_steps": 204665, "loss": 0.0, "lr": 6.985645715436605e-07, "epoch": 3.188625314538392, "percentage": 63.77, "elapsed_time": "2:48:36", "remaining_time": "1:35:47", "throughput": 8695.64, "total_tokens": 87973264} +{"current_steps": 130525, "total_steps": 204665, "loss": 0.0365, "lr": 6.98483261592062e-07, "epoch": 3.188747465370239, "percentage": 63.77, "elapsed_time": "2:48:37", "remaining_time": "1:35:46", "throughput": 8695.66, "total_tokens": 87976464} +{"current_steps": 130530, "total_steps": 204665, "loss": 0.0001, "lr": 6.984019538331745e-07, "epoch": 3.188869616202086, "percentage": 63.78, "elapsed_time": "2:48:37", "remaining_time": "1:35:46", "throughput": 8695.68, "total_tokens": 87979600} +{"current_steps": 130535, "total_steps": 204665, "loss": 0.0, "lr": 6.983206482675885e-07, "epoch": 3.1889917670339334, "percentage": 63.78, "elapsed_time": "2:48:37", "remaining_time": "1:35:45", "throughput": 8695.71, "total_tokens": 87982864} +{"current_steps": 130540, "total_steps": 204665, "loss": 0.0002, "lr": 6.982393448958965e-07, "epoch": 3.1891139178657806, "percentage": 63.78, "elapsed_time": "2:48:38", "remaining_time": "1:35:45", "throughput": 8695.76, "total_tokens": 87986448} +{"current_steps": 130545, "total_steps": 204665, "loss": 0.0, "lr": 6.981580437186887e-07, "epoch": 3.1892360686976278, "percentage": 63.78, "elapsed_time": "2:48:38", "remaining_time": "1:35:45", "throughput": 8695.8, "total_tokens": 87989904} +{"current_steps": 130550, "total_steps": 204665, "loss": 0.0, "lr": 6.980767447365574e-07, "epoch": 3.189358219529475, "percentage": 63.79, "elapsed_time": "2:48:39", "remaining_time": "1:35:44", "throughput": 8695.82, "total_tokens": 87993168} +{"current_steps": 130555, "total_steps": 204665, "loss": 0.0, "lr": 6.979954479500924e-07, "epoch": 3.189480370361322, "percentage": 63.79, "elapsed_time": "2:48:39", "remaining_time": "1:35:44", "throughput": 8695.84, "total_tokens": 87996240} +{"current_steps": 130560, "total_steps": 204665, "loss": 0.0001, "lr": 6.979141533598861e-07, "epoch": 3.1896025211931693, "percentage": 63.79, "elapsed_time": "2:48:39", "remaining_time": "1:35:43", "throughput": 8695.91, "total_tokens": 88000080} +{"current_steps": 130565, "total_steps": 204665, "loss": 0.0, "lr": 6.978328609665296e-07, "epoch": 3.1897246720250165, "percentage": 63.79, "elapsed_time": "2:48:40", "remaining_time": "1:35:43", "throughput": 8695.94, "total_tokens": 88003408} +{"current_steps": 130570, "total_steps": 204665, "loss": 0.0498, "lr": 6.977515707706134e-07, "epoch": 3.1898468228568637, "percentage": 63.8, "elapsed_time": "2:48:40", "remaining_time": "1:35:43", "throughput": 8695.95, "total_tokens": 88006480} +{"current_steps": 130575, "total_steps": 204665, "loss": 0.0001, "lr": 6.976702827727294e-07, "epoch": 3.189968973688711, "percentage": 63.8, "elapsed_time": "2:48:40", "remaining_time": "1:35:42", "throughput": 8695.99, "total_tokens": 88009872} +{"current_steps": 130580, "total_steps": 204665, "loss": 0.0002, "lr": 6.975889969734684e-07, "epoch": 3.190091124520558, "percentage": 63.8, "elapsed_time": "2:48:41", "remaining_time": "1:35:42", "throughput": 8696.03, "total_tokens": 88013328} +{"current_steps": 130585, "total_steps": 204665, "loss": 0.0, "lr": 6.975077133734213e-07, "epoch": 3.1902132753524053, "percentage": 63.8, "elapsed_time": "2:48:41", "remaining_time": "1:35:41", "throughput": 8696.1, "total_tokens": 88017232} +{"current_steps": 130590, "total_steps": 204665, "loss": 0.0, "lr": 6.974264319731797e-07, "epoch": 3.1903354261842525, "percentage": 63.81, "elapsed_time": "2:48:41", "remaining_time": "1:35:41", "throughput": 8696.14, "total_tokens": 88020688} +{"current_steps": 130595, "total_steps": 204665, "loss": 0.0, "lr": 6.973451527733343e-07, "epoch": 3.1904575770160997, "percentage": 63.81, "elapsed_time": "2:48:42", "remaining_time": "1:35:41", "throughput": 8696.14, "total_tokens": 88023568} +{"current_steps": 130600, "total_steps": 204665, "loss": 0.0, "lr": 6.972638757744766e-07, "epoch": 3.190579727847947, "percentage": 63.81, "elapsed_time": "2:48:42", "remaining_time": "1:35:40", "throughput": 8696.17, "total_tokens": 88027024} +{"current_steps": 130605, "total_steps": 204665, "loss": 0.0, "lr": 6.971826009771971e-07, "epoch": 3.1907018786797936, "percentage": 63.81, "elapsed_time": "2:48:42", "remaining_time": "1:35:40", "throughput": 8696.22, "total_tokens": 88030544} +{"current_steps": 130610, "total_steps": 204665, "loss": 0.0001, "lr": 6.971013283820876e-07, "epoch": 3.190824029511641, "percentage": 63.82, "elapsed_time": "2:48:43", "remaining_time": "1:35:39", "throughput": 8696.26, "total_tokens": 88034064} +{"current_steps": 130615, "total_steps": 204665, "loss": 0.0, "lr": 6.970200579897382e-07, "epoch": 3.190946180343488, "percentage": 63.82, "elapsed_time": "2:48:43", "remaining_time": "1:35:39", "throughput": 8696.07, "total_tokens": 88037328} +{"current_steps": 130620, "total_steps": 204665, "loss": 0.0, "lr": 6.969387898007406e-07, "epoch": 3.191068331175335, "percentage": 63.82, "elapsed_time": "2:48:44", "remaining_time": "1:35:39", "throughput": 8696.13, "total_tokens": 88041040} +{"current_steps": 130625, "total_steps": 204665, "loss": 0.0, "lr": 6.968575238156858e-07, "epoch": 3.1911904820071824, "percentage": 63.82, "elapsed_time": "2:48:44", "remaining_time": "1:35:38", "throughput": 8696.18, "total_tokens": 88044624} +{"current_steps": 130630, "total_steps": 204665, "loss": 0.0, "lr": 6.967762600351646e-07, "epoch": 3.1913126328390296, "percentage": 63.83, "elapsed_time": "2:48:44", "remaining_time": "1:35:38", "throughput": 8696.21, "total_tokens": 88047952} +{"current_steps": 130635, "total_steps": 204665, "loss": 0.0, "lr": 6.966949984597679e-07, "epoch": 3.1914347836708767, "percentage": 63.83, "elapsed_time": "2:48:45", "remaining_time": "1:35:37", "throughput": 8696.24, "total_tokens": 88051216} +{"current_steps": 130640, "total_steps": 204665, "loss": 0.0001, "lr": 6.966137390900868e-07, "epoch": 3.191556934502724, "percentage": 63.83, "elapsed_time": "2:48:45", "remaining_time": "1:35:37", "throughput": 8696.26, "total_tokens": 88054352} +{"current_steps": 130645, "total_steps": 204665, "loss": 0.0467, "lr": 6.96532481926712e-07, "epoch": 3.191679085334571, "percentage": 63.83, "elapsed_time": "2:48:45", "remaining_time": "1:35:37", "throughput": 8696.3, "total_tokens": 88057872} +{"current_steps": 130650, "total_steps": 204665, "loss": 0.0, "lr": 6.96451226970235e-07, "epoch": 3.1918012361664183, "percentage": 63.84, "elapsed_time": "2:48:46", "remaining_time": "1:35:36", "throughput": 8696.3, "total_tokens": 88060880} +{"current_steps": 130655, "total_steps": 204665, "loss": 0.0, "lr": 6.963699742212459e-07, "epoch": 3.1919233869982655, "percentage": 63.84, "elapsed_time": "2:48:46", "remaining_time": "1:35:36", "throughput": 8696.34, "total_tokens": 88064272} +{"current_steps": 130660, "total_steps": 204665, "loss": 0.0418, "lr": 6.962887236803363e-07, "epoch": 3.1920455378301127, "percentage": 63.84, "elapsed_time": "2:48:46", "remaining_time": "1:35:35", "throughput": 8696.4, "total_tokens": 88067920} +{"current_steps": 130665, "total_steps": 204665, "loss": 0.0, "lr": 6.962074753480966e-07, "epoch": 3.19216768866196, "percentage": 63.84, "elapsed_time": "2:48:47", "remaining_time": "1:35:35", "throughput": 8696.45, "total_tokens": 88071504} +{"current_steps": 130670, "total_steps": 204665, "loss": 0.0, "lr": 6.961262292251182e-07, "epoch": 3.192289839493807, "percentage": 63.85, "elapsed_time": "2:48:47", "remaining_time": "1:35:35", "throughput": 8696.45, "total_tokens": 88074576} +{"current_steps": 130675, "total_steps": 204665, "loss": 0.0, "lr": 6.96044985311991e-07, "epoch": 3.1924119903256543, "percentage": 63.85, "elapsed_time": "2:48:47", "remaining_time": "1:35:34", "throughput": 8696.49, "total_tokens": 88078032} +{"current_steps": 130680, "total_steps": 204665, "loss": 0.0, "lr": 6.959637436093069e-07, "epoch": 3.1925341411575014, "percentage": 63.85, "elapsed_time": "2:48:48", "remaining_time": "1:35:34", "throughput": 8696.53, "total_tokens": 88081488} +{"current_steps": 130685, "total_steps": 204665, "loss": 0.0, "lr": 6.958825041176564e-07, "epoch": 3.1926562919893486, "percentage": 63.85, "elapsed_time": "2:48:48", "remaining_time": "1:35:33", "throughput": 8696.56, "total_tokens": 88084752} +{"current_steps": 130690, "total_steps": 204665, "loss": 0.0, "lr": 6.958012668376295e-07, "epoch": 3.1927784428211954, "percentage": 63.86, "elapsed_time": "2:48:49", "remaining_time": "1:35:33", "throughput": 8696.58, "total_tokens": 88087952} +{"current_steps": 130695, "total_steps": 204665, "loss": 0.0, "lr": 6.957200317698182e-07, "epoch": 3.1929005936530426, "percentage": 63.86, "elapsed_time": "2:48:49", "remaining_time": "1:35:32", "throughput": 8696.61, "total_tokens": 88091280} +{"current_steps": 130700, "total_steps": 204665, "loss": 0.0001, "lr": 6.956387989148123e-07, "epoch": 3.1930227444848898, "percentage": 63.86, "elapsed_time": "2:48:49", "remaining_time": "1:35:32", "throughput": 8696.65, "total_tokens": 88094800} +{"current_steps": 130705, "total_steps": 204665, "loss": 0.0, "lr": 6.955575682732032e-07, "epoch": 3.193144895316737, "percentage": 63.86, "elapsed_time": "2:48:50", "remaining_time": "1:35:32", "throughput": 8696.66, "total_tokens": 88097872} +{"current_steps": 130710, "total_steps": 204665, "loss": 0.0, "lr": 6.95476339845581e-07, "epoch": 3.193267046148584, "percentage": 63.87, "elapsed_time": "2:48:50", "remaining_time": "1:35:31", "throughput": 8696.73, "total_tokens": 88101648} +{"current_steps": 130715, "total_steps": 204665, "loss": 0.0002, "lr": 6.953951136325367e-07, "epoch": 3.1933891969804313, "percentage": 63.87, "elapsed_time": "2:48:50", "remaining_time": "1:35:31", "throughput": 8696.78, "total_tokens": 88105296} +{"current_steps": 130720, "total_steps": 204665, "loss": 0.0, "lr": 6.953138896346615e-07, "epoch": 3.1935113478122785, "percentage": 63.87, "elapsed_time": "2:48:51", "remaining_time": "1:35:30", "throughput": 8696.82, "total_tokens": 88108688} +{"current_steps": 130725, "total_steps": 204665, "loss": 0.0903, "lr": 6.952326678525452e-07, "epoch": 3.1936334986441257, "percentage": 63.87, "elapsed_time": "2:48:51", "remaining_time": "1:35:30", "throughput": 8696.88, "total_tokens": 88112400} +{"current_steps": 130730, "total_steps": 204665, "loss": 0.0, "lr": 6.951514482867794e-07, "epoch": 3.193755649475973, "percentage": 63.88, "elapsed_time": "2:48:51", "remaining_time": "1:35:30", "throughput": 8696.91, "total_tokens": 88115728} +{"current_steps": 130735, "total_steps": 204665, "loss": 0.0005, "lr": 6.95070230937954e-07, "epoch": 3.19387780030782, "percentage": 63.88, "elapsed_time": "2:48:52", "remaining_time": "1:35:29", "throughput": 8696.91, "total_tokens": 88118672} +{"current_steps": 130740, "total_steps": 204665, "loss": 0.0001, "lr": 6.949890158066598e-07, "epoch": 3.1939999511396673, "percentage": 63.88, "elapsed_time": "2:48:52", "remaining_time": "1:35:29", "throughput": 8696.94, "total_tokens": 88122064} +{"current_steps": 130745, "total_steps": 204665, "loss": 0.0, "lr": 6.949078028934879e-07, "epoch": 3.1941221019715145, "percentage": 63.88, "elapsed_time": "2:48:52", "remaining_time": "1:35:28", "throughput": 8696.95, "total_tokens": 88125136} +{"current_steps": 130750, "total_steps": 204665, "loss": 0.0002, "lr": 6.94826592199028e-07, "epoch": 3.1942442528033617, "percentage": 63.88, "elapsed_time": "2:48:53", "remaining_time": "1:35:28", "throughput": 8696.96, "total_tokens": 88128208} +{"current_steps": 130755, "total_steps": 204665, "loss": 0.0, "lr": 6.947453837238716e-07, "epoch": 3.194366403635209, "percentage": 63.89, "elapsed_time": "2:48:53", "remaining_time": "1:35:28", "throughput": 8697.03, "total_tokens": 88131984} +{"current_steps": 130760, "total_steps": 204665, "loss": 0.1728, "lr": 6.946641774686085e-07, "epoch": 3.194488554467056, "percentage": 63.89, "elapsed_time": "2:48:53", "remaining_time": "1:35:27", "throughput": 8697.08, "total_tokens": 88135696} +{"current_steps": 130765, "total_steps": 204665, "loss": 0.0, "lr": 6.945829734338301e-07, "epoch": 3.1946107052989032, "percentage": 63.89, "elapsed_time": "2:48:54", "remaining_time": "1:35:27", "throughput": 8697.12, "total_tokens": 88139088} +{"current_steps": 130770, "total_steps": 204665, "loss": 0.0379, "lr": 6.94501771620126e-07, "epoch": 3.1947328561307504, "percentage": 63.89, "elapsed_time": "2:48:54", "remaining_time": "1:35:26", "throughput": 8697.14, "total_tokens": 88142224} +{"current_steps": 130775, "total_steps": 204665, "loss": 0.0019, "lr": 6.944205720280875e-07, "epoch": 3.1948550069625976, "percentage": 63.9, "elapsed_time": "2:48:54", "remaining_time": "1:35:26", "throughput": 8697.16, "total_tokens": 88145488} +{"current_steps": 130780, "total_steps": 204665, "loss": 0.0001, "lr": 6.943393746583047e-07, "epoch": 3.194977157794445, "percentage": 63.9, "elapsed_time": "2:48:55", "remaining_time": "1:35:26", "throughput": 8697.18, "total_tokens": 88148624} +{"current_steps": 130785, "total_steps": 204665, "loss": 0.0, "lr": 6.942581795113681e-07, "epoch": 3.1950993086262915, "percentage": 63.9, "elapsed_time": "2:48:55", "remaining_time": "1:35:25", "throughput": 8697.23, "total_tokens": 88152336} +{"current_steps": 130790, "total_steps": 204665, "loss": 0.0, "lr": 6.941769865878684e-07, "epoch": 3.1952214594581387, "percentage": 63.9, "elapsed_time": "2:48:56", "remaining_time": "1:35:25", "throughput": 8697.26, "total_tokens": 88155600} +{"current_steps": 130795, "total_steps": 204665, "loss": 0.0003, "lr": 6.940957958883957e-07, "epoch": 3.195343610289986, "percentage": 63.91, "elapsed_time": "2:48:56", "remaining_time": "1:35:24", "throughput": 8697.31, "total_tokens": 88159248} +{"current_steps": 130800, "total_steps": 204665, "loss": 0.0683, "lr": 6.940146074135406e-07, "epoch": 3.195465761121833, "percentage": 63.91, "elapsed_time": "2:48:56", "remaining_time": "1:35:24", "throughput": 8697.36, "total_tokens": 88162832} +{"current_steps": 130805, "total_steps": 204665, "loss": 0.0465, "lr": 6.93933421163894e-07, "epoch": 3.1955879119536803, "percentage": 63.91, "elapsed_time": "2:48:57", "remaining_time": "1:35:23", "throughput": 8697.39, "total_tokens": 88166096} +{"current_steps": 130810, "total_steps": 204665, "loss": 0.0, "lr": 6.938522371400455e-07, "epoch": 3.1957100627855275, "percentage": 63.91, "elapsed_time": "2:48:57", "remaining_time": "1:35:23", "throughput": 8697.43, "total_tokens": 88169552} +{"current_steps": 130815, "total_steps": 204665, "loss": 0.0, "lr": 6.937710553425862e-07, "epoch": 3.1958322136173747, "percentage": 63.92, "elapsed_time": "2:48:57", "remaining_time": "1:35:23", "throughput": 8697.45, "total_tokens": 88172688} +{"current_steps": 130820, "total_steps": 204665, "loss": 0.0, "lr": 6.936898757721059e-07, "epoch": 3.195954364449222, "percentage": 63.92, "elapsed_time": "2:48:58", "remaining_time": "1:35:22", "throughput": 8697.47, "total_tokens": 88175952} +{"current_steps": 130825, "total_steps": 204665, "loss": 0.0, "lr": 6.936086984291954e-07, "epoch": 3.196076515281069, "percentage": 63.92, "elapsed_time": "2:48:58", "remaining_time": "1:35:22", "throughput": 8697.52, "total_tokens": 88179472} +{"current_steps": 130830, "total_steps": 204665, "loss": 0.0407, "lr": 6.935275233144447e-07, "epoch": 3.1961986661129163, "percentage": 63.92, "elapsed_time": "2:48:58", "remaining_time": "1:35:21", "throughput": 8697.56, "total_tokens": 88182928} +{"current_steps": 130835, "total_steps": 204665, "loss": 0.0, "lr": 6.934463504284442e-07, "epoch": 3.1963208169447634, "percentage": 63.93, "elapsed_time": "2:48:59", "remaining_time": "1:35:21", "throughput": 8697.58, "total_tokens": 88186064} +{"current_steps": 130840, "total_steps": 204665, "loss": 0.0002, "lr": 6.933651797717847e-07, "epoch": 3.1964429677766106, "percentage": 63.93, "elapsed_time": "2:48:59", "remaining_time": "1:35:21", "throughput": 8697.58, "total_tokens": 88189072} +{"current_steps": 130845, "total_steps": 204665, "loss": 0.0, "lr": 6.932840113450555e-07, "epoch": 3.196565118608458, "percentage": 63.93, "elapsed_time": "2:48:59", "remaining_time": "1:35:20", "throughput": 8697.61, "total_tokens": 88192336} +{"current_steps": 130850, "total_steps": 204665, "loss": 0.0, "lr": 6.932028451488481e-07, "epoch": 3.196687269440305, "percentage": 63.93, "elapsed_time": "2:49:00", "remaining_time": "1:35:20", "throughput": 8697.64, "total_tokens": 88195728} +{"current_steps": 130855, "total_steps": 204665, "loss": 0.0, "lr": 6.931216811837515e-07, "epoch": 3.196809420272152, "percentage": 63.94, "elapsed_time": "2:49:00", "remaining_time": "1:35:19", "throughput": 8697.7, "total_tokens": 88199312} +{"current_steps": 130860, "total_steps": 204665, "loss": 0.0, "lr": 6.930405194503571e-07, "epoch": 3.1969315711039994, "percentage": 63.94, "elapsed_time": "2:49:00", "remaining_time": "1:35:19", "throughput": 8697.72, "total_tokens": 88202576} +{"current_steps": 130865, "total_steps": 204665, "loss": 0.0, "lr": 6.929593599492543e-07, "epoch": 3.1970537219358466, "percentage": 63.94, "elapsed_time": "2:49:01", "remaining_time": "1:35:19", "throughput": 8697.73, "total_tokens": 88205648} +{"current_steps": 130870, "total_steps": 204665, "loss": 0.0558, "lr": 6.928782026810333e-07, "epoch": 3.1971758727676933, "percentage": 63.94, "elapsed_time": "2:49:01", "remaining_time": "1:35:18", "throughput": 8697.75, "total_tokens": 88208784} +{"current_steps": 130875, "total_steps": 204665, "loss": 0.0, "lr": 6.927970476462853e-07, "epoch": 3.1972980235995405, "percentage": 63.95, "elapsed_time": "2:49:01", "remaining_time": "1:35:18", "throughput": 8697.77, "total_tokens": 88211920} +{"current_steps": 130880, "total_steps": 204665, "loss": 0.0, "lr": 6.927158948455994e-07, "epoch": 3.1974201744313877, "percentage": 63.95, "elapsed_time": "2:49:02", "remaining_time": "1:35:17", "throughput": 8697.81, "total_tokens": 88215376} +{"current_steps": 130885, "total_steps": 204665, "loss": 0.0006, "lr": 6.926347442795664e-07, "epoch": 3.197542325263235, "percentage": 63.95, "elapsed_time": "2:49:02", "remaining_time": "1:35:17", "throughput": 8697.87, "total_tokens": 88219152} +{"current_steps": 130890, "total_steps": 204665, "loss": 0.0001, "lr": 6.925535959487761e-07, "epoch": 3.197664476095082, "percentage": 63.95, "elapsed_time": "2:49:02", "remaining_time": "1:35:16", "throughput": 8697.95, "total_tokens": 88222992} +{"current_steps": 130895, "total_steps": 204665, "loss": 0.0, "lr": 6.924724498538186e-07, "epoch": 3.1977866269269293, "percentage": 63.96, "elapsed_time": "2:49:03", "remaining_time": "1:35:16", "throughput": 8697.98, "total_tokens": 88226320} +{"current_steps": 130900, "total_steps": 204665, "loss": 0.0, "lr": 6.923913059952844e-07, "epoch": 3.1979087777587765, "percentage": 63.96, "elapsed_time": "2:49:03", "remaining_time": "1:35:16", "throughput": 8698.04, "total_tokens": 88230032} +{"current_steps": 130905, "total_steps": 204665, "loss": 0.0, "lr": 6.92310164373763e-07, "epoch": 3.1980309285906237, "percentage": 63.96, "elapsed_time": "2:49:04", "remaining_time": "1:35:15", "throughput": 8698.04, "total_tokens": 88232976} +{"current_steps": 130910, "total_steps": 204665, "loss": 0.0, "lr": 6.922290249898454e-07, "epoch": 3.198153079422471, "percentage": 63.96, "elapsed_time": "2:49:04", "remaining_time": "1:35:15", "throughput": 8698.08, "total_tokens": 88236432} +{"current_steps": 130915, "total_steps": 204665, "loss": 0.0, "lr": 6.921478878441206e-07, "epoch": 3.198275230254318, "percentage": 63.97, "elapsed_time": "2:49:04", "remaining_time": "1:35:14", "throughput": 8698.16, "total_tokens": 88240336} +{"current_steps": 130920, "total_steps": 204665, "loss": 0.0, "lr": 6.920667529371795e-07, "epoch": 3.1983973810861652, "percentage": 63.97, "elapsed_time": "2:49:05", "remaining_time": "1:35:14", "throughput": 8698.18, "total_tokens": 88243536} +{"current_steps": 130925, "total_steps": 204665, "loss": 0.0, "lr": 6.919856202696115e-07, "epoch": 3.1985195319180124, "percentage": 63.97, "elapsed_time": "2:49:05", "remaining_time": "1:35:14", "throughput": 8698.26, "total_tokens": 88247440} +{"current_steps": 130930, "total_steps": 204665, "loss": 0.0, "lr": 6.919044898420072e-07, "epoch": 3.1986416827498596, "percentage": 63.97, "elapsed_time": "2:49:05", "remaining_time": "1:35:13", "throughput": 8698.35, "total_tokens": 88251472} +{"current_steps": 130935, "total_steps": 204665, "loss": 0.0, "lr": 6.918233616549561e-07, "epoch": 3.198763833581707, "percentage": 63.98, "elapsed_time": "2:49:06", "remaining_time": "1:35:13", "throughput": 8698.38, "total_tokens": 88254864} +{"current_steps": 130940, "total_steps": 204665, "loss": 0.0001, "lr": 6.917422357090485e-07, "epoch": 3.198885984413554, "percentage": 63.98, "elapsed_time": "2:49:06", "remaining_time": "1:35:12", "throughput": 8698.41, "total_tokens": 88258128} +{"current_steps": 130945, "total_steps": 204665, "loss": 0.0, "lr": 6.916611120048743e-07, "epoch": 3.199008135245401, "percentage": 63.98, "elapsed_time": "2:49:06", "remaining_time": "1:35:12", "throughput": 8698.45, "total_tokens": 88261584} +{"current_steps": 130950, "total_steps": 204665, "loss": 0.0, "lr": 6.915799905430233e-07, "epoch": 3.1991302860772484, "percentage": 63.98, "elapsed_time": "2:49:07", "remaining_time": "1:35:12", "throughput": 8698.49, "total_tokens": 88264976} +{"current_steps": 130955, "total_steps": 204665, "loss": 0.0, "lr": 6.914988713240855e-07, "epoch": 3.1992524369090956, "percentage": 63.99, "elapsed_time": "2:49:07", "remaining_time": "1:35:11", "throughput": 8698.5, "total_tokens": 88268048} +{"current_steps": 130960, "total_steps": 204665, "loss": 0.0, "lr": 6.914177543486512e-07, "epoch": 3.1993745877409427, "percentage": 63.99, "elapsed_time": "2:49:07", "remaining_time": "1:35:11", "throughput": 8698.54, "total_tokens": 88271568} +{"current_steps": 130965, "total_steps": 204665, "loss": 0.0555, "lr": 6.913366396173097e-07, "epoch": 3.1994967385727895, "percentage": 63.99, "elapsed_time": "2:49:08", "remaining_time": "1:35:10", "throughput": 8698.59, "total_tokens": 88275088} +{"current_steps": 130970, "total_steps": 204665, "loss": 0.0, "lr": 6.912555271306515e-07, "epoch": 3.1996188894046367, "percentage": 63.99, "elapsed_time": "2:49:08", "remaining_time": "1:35:10", "throughput": 8698.62, "total_tokens": 88278352} +{"current_steps": 130975, "total_steps": 204665, "loss": 0.0, "lr": 6.911744168892657e-07, "epoch": 3.199741040236484, "percentage": 63.99, "elapsed_time": "2:49:08", "remaining_time": "1:35:10", "throughput": 8698.65, "total_tokens": 88281680} +{"current_steps": 130980, "total_steps": 204665, "loss": 0.0619, "lr": 6.910933088937432e-07, "epoch": 3.199863191068331, "percentage": 64.0, "elapsed_time": "2:49:09", "remaining_time": "1:35:09", "throughput": 8698.66, "total_tokens": 88284752} +{"current_steps": 130985, "total_steps": 204665, "loss": 0.0, "lr": 6.910122031446726e-07, "epoch": 3.1999853419001782, "percentage": 64.0, "elapsed_time": "2:49:09", "remaining_time": "1:35:09", "throughput": 8698.66, "total_tokens": 88287760} +{"current_steps": 130990, "total_steps": 204665, "loss": 0.0, "lr": 6.909310996426447e-07, "epoch": 3.2001074927320254, "percentage": 64.0, "elapsed_time": "2:49:09", "remaining_time": "1:35:08", "throughput": 8698.69, "total_tokens": 88291024} +{"current_steps": 130995, "total_steps": 204665, "loss": 0.0, "lr": 6.908499983882492e-07, "epoch": 3.2002296435638726, "percentage": 64.0, "elapsed_time": "2:49:10", "remaining_time": "1:35:08", "throughput": 8698.74, "total_tokens": 88294672} +{"current_steps": 131000, "total_steps": 204665, "loss": 0.0, "lr": 6.907688993820752e-07, "epoch": 3.20035179439572, "percentage": 64.01, "elapsed_time": "2:49:10", "remaining_time": "1:35:07", "throughput": 8698.77, "total_tokens": 88297936} +{"current_steps": 131005, "total_steps": 204665, "loss": 0.0001, "lr": 6.906878026247134e-07, "epoch": 3.200473945227567, "percentage": 64.01, "elapsed_time": "2:49:10", "remaining_time": "1:35:07", "throughput": 8698.79, "total_tokens": 88301072} +{"current_steps": 131010, "total_steps": 204665, "loss": 0.0, "lr": 6.906067081167526e-07, "epoch": 3.200596096059414, "percentage": 64.01, "elapsed_time": "2:49:11", "remaining_time": "1:35:07", "throughput": 8698.81, "total_tokens": 88304336} +{"current_steps": 131015, "total_steps": 204665, "loss": 0.0001, "lr": 6.905256158587836e-07, "epoch": 3.2007182468912614, "percentage": 64.01, "elapsed_time": "2:49:11", "remaining_time": "1:35:06", "throughput": 8698.84, "total_tokens": 88307664} +{"current_steps": 131020, "total_steps": 204665, "loss": 0.0003, "lr": 6.904445258513949e-07, "epoch": 3.2008403977231086, "percentage": 64.02, "elapsed_time": "2:49:12", "remaining_time": "1:35:06", "throughput": 8698.88, "total_tokens": 88311056} +{"current_steps": 131025, "total_steps": 204665, "loss": 0.0625, "lr": 6.903634380951771e-07, "epoch": 3.2009625485549558, "percentage": 64.02, "elapsed_time": "2:49:12", "remaining_time": "1:35:05", "throughput": 8698.91, "total_tokens": 88314320} +{"current_steps": 131030, "total_steps": 204665, "loss": 0.0, "lr": 6.9028235259072e-07, "epoch": 3.201084699386803, "percentage": 64.02, "elapsed_time": "2:49:12", "remaining_time": "1:35:05", "throughput": 8698.94, "total_tokens": 88317712} +{"current_steps": 131035, "total_steps": 204665, "loss": 0.0, "lr": 6.902012693386123e-07, "epoch": 3.20120685021865, "percentage": 64.02, "elapsed_time": "2:49:13", "remaining_time": "1:35:05", "throughput": 8698.98, "total_tokens": 88321104} +{"current_steps": 131040, "total_steps": 204665, "loss": 0.0, "lr": 6.90120188339445e-07, "epoch": 3.2013290010504973, "percentage": 64.03, "elapsed_time": "2:49:13", "remaining_time": "1:35:04", "throughput": 8699.01, "total_tokens": 88324368} +{"current_steps": 131045, "total_steps": 204665, "loss": 0.0, "lr": 6.900391095938068e-07, "epoch": 3.2014511518823445, "percentage": 64.03, "elapsed_time": "2:49:13", "remaining_time": "1:35:04", "throughput": 8699.08, "total_tokens": 88328208} +{"current_steps": 131050, "total_steps": 204665, "loss": 0.0, "lr": 6.899580331022873e-07, "epoch": 3.2015733027141913, "percentage": 64.03, "elapsed_time": "2:49:14", "remaining_time": "1:35:03", "throughput": 8699.08, "total_tokens": 88331152} +{"current_steps": 131055, "total_steps": 204665, "loss": 0.0, "lr": 6.898769588654767e-07, "epoch": 3.2016954535460385, "percentage": 64.03, "elapsed_time": "2:49:14", "remaining_time": "1:35:03", "throughput": 8699.07, "total_tokens": 88333904} +{"current_steps": 131060, "total_steps": 204665, "loss": 0.0, "lr": 6.89795886883964e-07, "epoch": 3.2018176043778857, "percentage": 64.04, "elapsed_time": "2:49:14", "remaining_time": "1:35:03", "throughput": 8699.06, "total_tokens": 88336784} +{"current_steps": 131065, "total_steps": 204665, "loss": 0.034, "lr": 6.897148171583394e-07, "epoch": 3.201939755209733, "percentage": 64.04, "elapsed_time": "2:49:15", "remaining_time": "1:35:02", "throughput": 8699.09, "total_tokens": 88340112} +{"current_steps": 131070, "total_steps": 204665, "loss": 0.0, "lr": 6.896337496891917e-07, "epoch": 3.20206190604158, "percentage": 64.04, "elapsed_time": "2:49:15", "remaining_time": "1:35:02", "throughput": 8699.11, "total_tokens": 88343312} +{"current_steps": 131075, "total_steps": 204665, "loss": 0.0, "lr": 6.895526844771112e-07, "epoch": 3.202184056873427, "percentage": 64.04, "elapsed_time": "2:49:15", "remaining_time": "1:35:01", "throughput": 8699.16, "total_tokens": 88346896} +{"current_steps": 131080, "total_steps": 204665, "loss": 0.0, "lr": 6.894716215226868e-07, "epoch": 3.2023062077052744, "percentage": 64.05, "elapsed_time": "2:49:16", "remaining_time": "1:35:01", "throughput": 8699.21, "total_tokens": 88350480} +{"current_steps": 131085, "total_steps": 204665, "loss": 0.0, "lr": 6.893905608265084e-07, "epoch": 3.2024283585371216, "percentage": 64.05, "elapsed_time": "2:49:16", "remaining_time": "1:35:00", "throughput": 8699.28, "total_tokens": 88354256} +{"current_steps": 131090, "total_steps": 204665, "loss": 0.0, "lr": 6.893095023891653e-07, "epoch": 3.202550509368969, "percentage": 64.05, "elapsed_time": "2:49:16", "remaining_time": "1:35:00", "throughput": 8699.31, "total_tokens": 88357584} +{"current_steps": 131095, "total_steps": 204665, "loss": 0.0, "lr": 6.892284462112472e-07, "epoch": 3.202672660200816, "percentage": 64.05, "elapsed_time": "2:49:17", "remaining_time": "1:35:00", "throughput": 8699.35, "total_tokens": 88360976} +{"current_steps": 131100, "total_steps": 204665, "loss": 0.0, "lr": 6.891473922933435e-07, "epoch": 3.202794811032663, "percentage": 64.06, "elapsed_time": "2:49:17", "remaining_time": "1:34:59", "throughput": 8699.39, "total_tokens": 88364368} +{"current_steps": 131105, "total_steps": 204665, "loss": 0.0, "lr": 6.890663406360434e-07, "epoch": 3.2029169618645104, "percentage": 64.06, "elapsed_time": "2:49:17", "remaining_time": "1:34:59", "throughput": 8699.41, "total_tokens": 88367696} +{"current_steps": 131110, "total_steps": 204665, "loss": 0.0001, "lr": 6.889852912399363e-07, "epoch": 3.2030391126963575, "percentage": 64.06, "elapsed_time": "2:49:18", "remaining_time": "1:34:58", "throughput": 8699.44, "total_tokens": 88370896} +{"current_steps": 131115, "total_steps": 204665, "loss": 0.0, "lr": 6.889042441056123e-07, "epoch": 3.2031612635282047, "percentage": 64.06, "elapsed_time": "2:49:18", "remaining_time": "1:34:58", "throughput": 8699.49, "total_tokens": 88374544} +{"current_steps": 131120, "total_steps": 204665, "loss": 0.0, "lr": 6.888231992336599e-07, "epoch": 3.203283414360052, "percentage": 64.07, "elapsed_time": "2:49:18", "remaining_time": "1:34:58", "throughput": 8699.54, "total_tokens": 88378128} +{"current_steps": 131125, "total_steps": 204665, "loss": 0.0305, "lr": 6.887421566246694e-07, "epoch": 3.203405565191899, "percentage": 64.07, "elapsed_time": "2:49:19", "remaining_time": "1:34:57", "throughput": 8699.57, "total_tokens": 88381392} +{"current_steps": 131130, "total_steps": 204665, "loss": 0.0, "lr": 6.886611162792291e-07, "epoch": 3.2035277160237463, "percentage": 64.07, "elapsed_time": "2:49:19", "remaining_time": "1:34:57", "throughput": 8699.59, "total_tokens": 88384592} +{"current_steps": 131135, "total_steps": 204665, "loss": 0.0, "lr": 6.885800781979294e-07, "epoch": 3.203649866855593, "percentage": 64.07, "elapsed_time": "2:49:19", "remaining_time": "1:34:56", "throughput": 8699.62, "total_tokens": 88387984} +{"current_steps": 131140, "total_steps": 204665, "loss": 0.0, "lr": 6.884990423813586e-07, "epoch": 3.2037720176874402, "percentage": 64.08, "elapsed_time": "2:49:20", "remaining_time": "1:34:56", "throughput": 8699.68, "total_tokens": 88391632} +{"current_steps": 131145, "total_steps": 204665, "loss": 0.0, "lr": 6.884180088301068e-07, "epoch": 3.2038941685192874, "percentage": 64.08, "elapsed_time": "2:49:20", "remaining_time": "1:34:56", "throughput": 8699.68, "total_tokens": 88394576} +{"current_steps": 131150, "total_steps": 204665, "loss": 0.0616, "lr": 6.883369775447633e-07, "epoch": 3.2040163193511346, "percentage": 64.08, "elapsed_time": "2:49:21", "remaining_time": "1:34:55", "throughput": 8699.71, "total_tokens": 88397840} +{"current_steps": 131155, "total_steps": 204665, "loss": 0.0, "lr": 6.882559485259167e-07, "epoch": 3.204138470182982, "percentage": 64.08, "elapsed_time": "2:49:21", "remaining_time": "1:34:55", "throughput": 8699.75, "total_tokens": 88401296} +{"current_steps": 131160, "total_steps": 204665, "loss": 0.0, "lr": 6.88174921774157e-07, "epoch": 3.204260621014829, "percentage": 64.09, "elapsed_time": "2:49:21", "remaining_time": "1:34:54", "throughput": 8699.8, "total_tokens": 88404880} +{"current_steps": 131165, "total_steps": 204665, "loss": 0.0332, "lr": 6.880938972900729e-07, "epoch": 3.204382771846676, "percentage": 64.09, "elapsed_time": "2:49:22", "remaining_time": "1:34:54", "throughput": 8699.86, "total_tokens": 88408656} +{"current_steps": 131170, "total_steps": 204665, "loss": 0.0, "lr": 6.880128750742542e-07, "epoch": 3.2045049226785234, "percentage": 64.09, "elapsed_time": "2:49:22", "remaining_time": "1:34:54", "throughput": 8699.93, "total_tokens": 88412432} +{"current_steps": 131175, "total_steps": 204665, "loss": 0.0, "lr": 6.879318551272894e-07, "epoch": 3.2046270735103706, "percentage": 64.09, "elapsed_time": "2:49:22", "remaining_time": "1:34:53", "throughput": 8699.93, "total_tokens": 88415440} +{"current_steps": 131180, "total_steps": 204665, "loss": 0.0, "lr": 6.87850837449768e-07, "epoch": 3.2047492243422178, "percentage": 64.09, "elapsed_time": "2:49:23", "remaining_time": "1:34:53", "throughput": 8699.98, "total_tokens": 88418960} +{"current_steps": 131185, "total_steps": 204665, "loss": 0.0546, "lr": 6.877698220422799e-07, "epoch": 3.204871375174065, "percentage": 64.1, "elapsed_time": "2:49:23", "remaining_time": "1:34:52", "throughput": 8700.01, "total_tokens": 88422352} +{"current_steps": 131190, "total_steps": 204665, "loss": 0.0, "lr": 6.87688808905413e-07, "epoch": 3.204993526005912, "percentage": 64.1, "elapsed_time": "2:49:23", "remaining_time": "1:34:52", "throughput": 8700.02, "total_tokens": 88425424} +{"current_steps": 131195, "total_steps": 204665, "loss": 0.0, "lr": 6.876077980397575e-07, "epoch": 3.2051156768377593, "percentage": 64.1, "elapsed_time": "2:49:24", "remaining_time": "1:34:51", "throughput": 8700.06, "total_tokens": 88428816} +{"current_steps": 131200, "total_steps": 204665, "loss": 0.0002, "lr": 6.87526789445902e-07, "epoch": 3.2052378276696065, "percentage": 64.1, "elapsed_time": "2:49:24", "remaining_time": "1:34:51", "throughput": 8700.09, "total_tokens": 88432208} +{"current_steps": 131205, "total_steps": 204665, "loss": 0.0001, "lr": 6.874457831244355e-07, "epoch": 3.2053599785014537, "percentage": 64.11, "elapsed_time": "2:49:24", "remaining_time": "1:34:51", "throughput": 8700.16, "total_tokens": 88435920} +{"current_steps": 131210, "total_steps": 204665, "loss": 0.0, "lr": 6.873647790759478e-07, "epoch": 3.205482129333301, "percentage": 64.11, "elapsed_time": "2:49:25", "remaining_time": "1:34:50", "throughput": 8700.25, "total_tokens": 88440080} +{"current_steps": 131215, "total_steps": 204665, "loss": 0.0, "lr": 6.87283777301027e-07, "epoch": 3.205604280165148, "percentage": 64.11, "elapsed_time": "2:49:25", "remaining_time": "1:34:50", "throughput": 8700.29, "total_tokens": 88443536} +{"current_steps": 131220, "total_steps": 204665, "loss": 0.0001, "lr": 6.872027778002632e-07, "epoch": 3.2057264309969953, "percentage": 64.11, "elapsed_time": "2:49:25", "remaining_time": "1:34:49", "throughput": 8700.34, "total_tokens": 88447056} +{"current_steps": 131225, "total_steps": 204665, "loss": 0.0002, "lr": 6.871217805742444e-07, "epoch": 3.2058485818288425, "percentage": 64.12, "elapsed_time": "2:49:26", "remaining_time": "1:34:49", "throughput": 8700.39, "total_tokens": 88450640} +{"current_steps": 131230, "total_steps": 204665, "loss": 0.0001, "lr": 6.870407856235608e-07, "epoch": 3.205970732660689, "percentage": 64.12, "elapsed_time": "2:49:26", "remaining_time": "1:34:49", "throughput": 8700.45, "total_tokens": 88454352} +{"current_steps": 131235, "total_steps": 204665, "loss": 0.0, "lr": 6.869597929488e-07, "epoch": 3.2060928834925364, "percentage": 64.12, "elapsed_time": "2:49:26", "remaining_time": "1:34:48", "throughput": 8700.52, "total_tokens": 88458192} +{"current_steps": 131240, "total_steps": 204665, "loss": 0.0, "lr": 6.868788025505523e-07, "epoch": 3.2062150343243836, "percentage": 64.12, "elapsed_time": "2:49:27", "remaining_time": "1:34:48", "throughput": 8700.56, "total_tokens": 88461648} +{"current_steps": 131245, "total_steps": 204665, "loss": 0.0, "lr": 6.86797814429406e-07, "epoch": 3.206337185156231, "percentage": 64.13, "elapsed_time": "2:49:27", "remaining_time": "1:34:47", "throughput": 8700.65, "total_tokens": 88465616} +{"current_steps": 131250, "total_steps": 204665, "loss": 0.0, "lr": 6.867168285859504e-07, "epoch": 3.206459335988078, "percentage": 64.13, "elapsed_time": "2:49:28", "remaining_time": "1:34:47", "throughput": 8700.69, "total_tokens": 88469136} +{"current_steps": 131255, "total_steps": 204665, "loss": 0.0001, "lr": 6.866358450207741e-07, "epoch": 3.206581486819925, "percentage": 64.13, "elapsed_time": "2:49:28", "remaining_time": "1:34:47", "throughput": 8700.77, "total_tokens": 88473040} +{"current_steps": 131260, "total_steps": 204665, "loss": 0.0, "lr": 6.865548637344664e-07, "epoch": 3.2067036376517724, "percentage": 64.13, "elapsed_time": "2:49:28", "remaining_time": "1:34:46", "throughput": 8700.77, "total_tokens": 88475984} +{"current_steps": 131265, "total_steps": 204665, "loss": 0.0, "lr": 6.864738847276162e-07, "epoch": 3.2068257884836195, "percentage": 64.14, "elapsed_time": "2:49:29", "remaining_time": "1:34:46", "throughput": 8700.82, "total_tokens": 88479568} +{"current_steps": 131270, "total_steps": 204665, "loss": 0.0501, "lr": 6.863929080008118e-07, "epoch": 3.2069479393154667, "percentage": 64.14, "elapsed_time": "2:49:29", "remaining_time": "1:34:45", "throughput": 8700.83, "total_tokens": 88482640} +{"current_steps": 131275, "total_steps": 204665, "loss": 0.0, "lr": 6.863119335546423e-07, "epoch": 3.207070090147314, "percentage": 64.14, "elapsed_time": "2:49:29", "remaining_time": "1:34:45", "throughput": 8700.86, "total_tokens": 88486032} +{"current_steps": 131280, "total_steps": 204665, "loss": 0.0, "lr": 6.862309613896975e-07, "epoch": 3.207192240979161, "percentage": 64.14, "elapsed_time": "2:49:30", "remaining_time": "1:34:45", "throughput": 8700.94, "total_tokens": 88490000} +{"current_steps": 131285, "total_steps": 204665, "loss": 0.0, "lr": 6.86149991506565e-07, "epoch": 3.2073143918110083, "percentage": 64.15, "elapsed_time": "2:49:30", "remaining_time": "1:34:44", "throughput": 8700.97, "total_tokens": 88493264} +{"current_steps": 131290, "total_steps": 204665, "loss": 0.0, "lr": 6.860690239058347e-07, "epoch": 3.2074365426428555, "percentage": 64.15, "elapsed_time": "2:49:30", "remaining_time": "1:34:44", "throughput": 8701.03, "total_tokens": 88496976} +{"current_steps": 131295, "total_steps": 204665, "loss": 0.0553, "lr": 6.859880585880944e-07, "epoch": 3.2075586934747027, "percentage": 64.15, "elapsed_time": "2:49:31", "remaining_time": "1:34:43", "throughput": 8701.07, "total_tokens": 88500432} +{"current_steps": 131300, "total_steps": 204665, "loss": 0.0, "lr": 6.859070955539337e-07, "epoch": 3.20768084430655, "percentage": 64.15, "elapsed_time": "2:49:31", "remaining_time": "1:34:43", "throughput": 8701.11, "total_tokens": 88503952} +{"current_steps": 131305, "total_steps": 204665, "loss": 0.0377, "lr": 6.858261348039411e-07, "epoch": 3.207802995138397, "percentage": 64.16, "elapsed_time": "2:49:31", "remaining_time": "1:34:43", "throughput": 8701.15, "total_tokens": 88507280} +{"current_steps": 131310, "total_steps": 204665, "loss": 0.0, "lr": 6.85745176338705e-07, "epoch": 3.2079251459702443, "percentage": 64.16, "elapsed_time": "2:49:32", "remaining_time": "1:34:42", "throughput": 8701.18, "total_tokens": 88510672} +{"current_steps": 131315, "total_steps": 204665, "loss": 0.0, "lr": 6.856642201588149e-07, "epoch": 3.208047296802091, "percentage": 64.16, "elapsed_time": "2:49:32", "remaining_time": "1:34:42", "throughput": 8701.26, "total_tokens": 88514576} +{"current_steps": 131320, "total_steps": 204665, "loss": 0.0, "lr": 6.855832662648589e-07, "epoch": 3.208169447633938, "percentage": 64.16, "elapsed_time": "2:49:32", "remaining_time": "1:34:41", "throughput": 8701.31, "total_tokens": 88518224} +{"current_steps": 131325, "total_steps": 204665, "loss": 0.0, "lr": 6.855023146574262e-07, "epoch": 3.2082915984657854, "percentage": 64.17, "elapsed_time": "2:49:33", "remaining_time": "1:34:41", "throughput": 8701.37, "total_tokens": 88521872} +{"current_steps": 131330, "total_steps": 204665, "loss": 0.0526, "lr": 6.854213653371049e-07, "epoch": 3.2084137492976326, "percentage": 64.17, "elapsed_time": "2:49:33", "remaining_time": "1:34:41", "throughput": 8701.41, "total_tokens": 88525264} +{"current_steps": 131335, "total_steps": 204665, "loss": 0.0001, "lr": 6.85340418304484e-07, "epoch": 3.2085359001294798, "percentage": 64.17, "elapsed_time": "2:49:34", "remaining_time": "1:34:40", "throughput": 8701.41, "total_tokens": 88528272} +{"current_steps": 131340, "total_steps": 204665, "loss": 0.0, "lr": 6.852594735601527e-07, "epoch": 3.208658050961327, "percentage": 64.17, "elapsed_time": "2:49:34", "remaining_time": "1:34:40", "throughput": 8701.46, "total_tokens": 88531728} +{"current_steps": 131345, "total_steps": 204665, "loss": 0.0154, "lr": 6.851785311046987e-07, "epoch": 3.208780201793174, "percentage": 64.18, "elapsed_time": "2:49:34", "remaining_time": "1:34:39", "throughput": 8701.48, "total_tokens": 88534992} +{"current_steps": 131350, "total_steps": 204665, "loss": 0.0543, "lr": 6.850975909387115e-07, "epoch": 3.2089023526250213, "percentage": 64.18, "elapsed_time": "2:49:35", "remaining_time": "1:34:39", "throughput": 8701.52, "total_tokens": 88538384} +{"current_steps": 131355, "total_steps": 204665, "loss": 0.0552, "lr": 6.850166530627791e-07, "epoch": 3.2090245034568685, "percentage": 64.18, "elapsed_time": "2:49:35", "remaining_time": "1:34:38", "throughput": 8701.52, "total_tokens": 88541392} +{"current_steps": 131360, "total_steps": 204665, "loss": 0.0504, "lr": 6.849357174774901e-07, "epoch": 3.2091466542887157, "percentage": 64.18, "elapsed_time": "2:49:35", "remaining_time": "1:34:38", "throughput": 8701.55, "total_tokens": 88544656} +{"current_steps": 131365, "total_steps": 204665, "loss": 0.0004, "lr": 6.848547841834338e-07, "epoch": 3.209268805120563, "percentage": 64.19, "elapsed_time": "2:49:36", "remaining_time": "1:34:38", "throughput": 8701.59, "total_tokens": 88548176} +{"current_steps": 131370, "total_steps": 204665, "loss": 0.0, "lr": 6.847738531811978e-07, "epoch": 3.20939095595241, "percentage": 64.19, "elapsed_time": "2:49:36", "remaining_time": "1:34:37", "throughput": 8701.61, "total_tokens": 88551312} +{"current_steps": 131375, "total_steps": 204665, "loss": 0.0, "lr": 6.846929244713717e-07, "epoch": 3.2095131067842573, "percentage": 64.19, "elapsed_time": "2:49:36", "remaining_time": "1:34:37", "throughput": 8701.65, "total_tokens": 88554832} +{"current_steps": 131380, "total_steps": 204665, "loss": 0.0, "lr": 6.846119980545429e-07, "epoch": 3.2096352576161045, "percentage": 64.19, "elapsed_time": "2:49:37", "remaining_time": "1:34:36", "throughput": 8701.65, "total_tokens": 88557712} +{"current_steps": 131385, "total_steps": 204665, "loss": 0.0, "lr": 6.845310739313008e-07, "epoch": 3.2097574084479517, "percentage": 64.2, "elapsed_time": "2:49:37", "remaining_time": "1:34:36", "throughput": 8701.67, "total_tokens": 88560912} +{"current_steps": 131390, "total_steps": 204665, "loss": 0.0003, "lr": 6.844501521022333e-07, "epoch": 3.209879559279799, "percentage": 64.2, "elapsed_time": "2:49:37", "remaining_time": "1:34:36", "throughput": 8701.69, "total_tokens": 88564176} +{"current_steps": 131395, "total_steps": 204665, "loss": 0.0052, "lr": 6.843692325679293e-07, "epoch": 3.210001710111646, "percentage": 64.2, "elapsed_time": "2:49:38", "remaining_time": "1:34:35", "throughput": 8701.72, "total_tokens": 88567440} +{"current_steps": 131400, "total_steps": 204665, "loss": 0.0001, "lr": 6.842883153289771e-07, "epoch": 3.2101238609434932, "percentage": 64.2, "elapsed_time": "2:49:38", "remaining_time": "1:34:35", "throughput": 8701.78, "total_tokens": 88571152} +{"current_steps": 131405, "total_steps": 204665, "loss": 0.0, "lr": 6.842074003859654e-07, "epoch": 3.2102460117753404, "percentage": 64.2, "elapsed_time": "2:49:38", "remaining_time": "1:34:34", "throughput": 8701.84, "total_tokens": 88574800} +{"current_steps": 131410, "total_steps": 204665, "loss": 0.0, "lr": 6.841264877394822e-07, "epoch": 3.210368162607187, "percentage": 64.21, "elapsed_time": "2:49:39", "remaining_time": "1:34:34", "throughput": 8701.91, "total_tokens": 88578640} +{"current_steps": 131415, "total_steps": 204665, "loss": 0.0, "lr": 6.840455773901163e-07, "epoch": 3.2104903134390343, "percentage": 64.21, "elapsed_time": "2:49:39", "remaining_time": "1:34:34", "throughput": 8701.95, "total_tokens": 88582032} +{"current_steps": 131420, "total_steps": 204665, "loss": 0.0, "lr": 6.839646693384559e-07, "epoch": 3.2106124642708815, "percentage": 64.21, "elapsed_time": "2:49:39", "remaining_time": "1:34:33", "throughput": 8701.99, "total_tokens": 88585488} +{"current_steps": 131425, "total_steps": 204665, "loss": 0.0, "lr": 6.838837635850894e-07, "epoch": 3.2107346151027287, "percentage": 64.21, "elapsed_time": "2:49:40", "remaining_time": "1:34:33", "throughput": 8702.01, "total_tokens": 88588688} +{"current_steps": 131430, "total_steps": 204665, "loss": 0.0, "lr": 6.83802860130605e-07, "epoch": 3.210856765934576, "percentage": 64.22, "elapsed_time": "2:49:40", "remaining_time": "1:34:32", "throughput": 8702.07, "total_tokens": 88592400} +{"current_steps": 131435, "total_steps": 204665, "loss": 0.1358, "lr": 6.837219589755916e-07, "epoch": 3.210978916766423, "percentage": 64.22, "elapsed_time": "2:49:40", "remaining_time": "1:34:32", "throughput": 8702.08, "total_tokens": 88595536} +{"current_steps": 131440, "total_steps": 204665, "loss": 0.0, "lr": 6.836410601206368e-07, "epoch": 3.2111010675982703, "percentage": 64.22, "elapsed_time": "2:49:41", "remaining_time": "1:34:31", "throughput": 8702.11, "total_tokens": 88598800} +{"current_steps": 131445, "total_steps": 204665, "loss": 0.0003, "lr": 6.835601635663297e-07, "epoch": 3.2112232184301175, "percentage": 64.22, "elapsed_time": "2:49:41", "remaining_time": "1:34:31", "throughput": 8702.14, "total_tokens": 88602128} +{"current_steps": 131450, "total_steps": 204665, "loss": 0.0001, "lr": 6.834792693132578e-07, "epoch": 3.2113453692619647, "percentage": 64.23, "elapsed_time": "2:49:41", "remaining_time": "1:34:31", "throughput": 8702.17, "total_tokens": 88605456} +{"current_steps": 131455, "total_steps": 204665, "loss": 0.0, "lr": 6.8339837736201e-07, "epoch": 3.211467520093812, "percentage": 64.23, "elapsed_time": "2:49:42", "remaining_time": "1:34:30", "throughput": 8702.19, "total_tokens": 88608656} +{"current_steps": 131460, "total_steps": 204665, "loss": 0.0002, "lr": 6.833174877131746e-07, "epoch": 3.211589670925659, "percentage": 64.23, "elapsed_time": "2:49:42", "remaining_time": "1:34:30", "throughput": 8702.2, "total_tokens": 88611728} +{"current_steps": 131465, "total_steps": 204665, "loss": 0.1079, "lr": 6.832366003673392e-07, "epoch": 3.2117118217575062, "percentage": 64.23, "elapsed_time": "2:49:43", "remaining_time": "1:34:29", "throughput": 8702.23, "total_tokens": 88615056} +{"current_steps": 131470, "total_steps": 204665, "loss": 0.0, "lr": 6.831557153250929e-07, "epoch": 3.2118339725893534, "percentage": 64.24, "elapsed_time": "2:49:43", "remaining_time": "1:34:29", "throughput": 8702.24, "total_tokens": 88618128} +{"current_steps": 131475, "total_steps": 204665, "loss": 0.0006, "lr": 6.830748325870229e-07, "epoch": 3.2119561234212006, "percentage": 64.24, "elapsed_time": "2:49:43", "remaining_time": "1:34:29", "throughput": 8702.24, "total_tokens": 88621136} +{"current_steps": 131480, "total_steps": 204665, "loss": 0.0466, "lr": 6.829939521537184e-07, "epoch": 3.212078274253048, "percentage": 64.24, "elapsed_time": "2:49:44", "remaining_time": "1:34:28", "throughput": 8702.3, "total_tokens": 88624784} +{"current_steps": 131485, "total_steps": 204665, "loss": 0.0501, "lr": 6.829130740257668e-07, "epoch": 3.212200425084895, "percentage": 64.24, "elapsed_time": "2:49:44", "remaining_time": "1:34:28", "throughput": 8702.31, "total_tokens": 88627792} +{"current_steps": 131490, "total_steps": 204665, "loss": 0.0589, "lr": 6.828321982037566e-07, "epoch": 3.212322575916742, "percentage": 64.25, "elapsed_time": "2:49:44", "remaining_time": "1:34:27", "throughput": 8702.36, "total_tokens": 88631376} +{"current_steps": 131495, "total_steps": 204665, "loss": 0.0, "lr": 6.827513246882763e-07, "epoch": 3.212444726748589, "percentage": 64.25, "elapsed_time": "2:49:45", "remaining_time": "1:34:27", "throughput": 8702.38, "total_tokens": 88634576} +{"current_steps": 131500, "total_steps": 204665, "loss": 0.0917, "lr": 6.826704534799135e-07, "epoch": 3.212566877580436, "percentage": 64.25, "elapsed_time": "2:49:45", "remaining_time": "1:34:27", "throughput": 8702.38, "total_tokens": 88637584} +{"current_steps": 131505, "total_steps": 204665, "loss": 0.0, "lr": 6.825895845792567e-07, "epoch": 3.2126890284122833, "percentage": 64.25, "elapsed_time": "2:49:45", "remaining_time": "1:34:26", "throughput": 8702.45, "total_tokens": 88641296} +{"current_steps": 131510, "total_steps": 204665, "loss": 0.0, "lr": 6.825087179868935e-07, "epoch": 3.2128111792441305, "percentage": 64.26, "elapsed_time": "2:49:46", "remaining_time": "1:34:26", "throughput": 8702.48, "total_tokens": 88644624} +{"current_steps": 131515, "total_steps": 204665, "loss": 0.0, "lr": 6.824278537034125e-07, "epoch": 3.2129333300759777, "percentage": 64.26, "elapsed_time": "2:49:46", "remaining_time": "1:34:25", "throughput": 8702.49, "total_tokens": 88647696} +{"current_steps": 131520, "total_steps": 204665, "loss": 0.0001, "lr": 6.823469917294017e-07, "epoch": 3.213055480907825, "percentage": 64.26, "elapsed_time": "2:49:46", "remaining_time": "1:34:25", "throughput": 8702.52, "total_tokens": 88651088} +{"current_steps": 131525, "total_steps": 204665, "loss": 0.0, "lr": 6.822661320654486e-07, "epoch": 3.213177631739672, "percentage": 64.26, "elapsed_time": "2:49:47", "remaining_time": "1:34:25", "throughput": 8702.53, "total_tokens": 88654160} +{"current_steps": 131530, "total_steps": 204665, "loss": 0.0001, "lr": 6.821852747121422e-07, "epoch": 3.2132997825715193, "percentage": 64.27, "elapsed_time": "2:49:47", "remaining_time": "1:34:24", "throughput": 8702.61, "total_tokens": 88658000} +{"current_steps": 131535, "total_steps": 204665, "loss": 0.0002, "lr": 6.821044196700694e-07, "epoch": 3.2134219334033665, "percentage": 64.27, "elapsed_time": "2:49:47", "remaining_time": "1:34:24", "throughput": 8702.64, "total_tokens": 88661392} +{"current_steps": 131540, "total_steps": 204665, "loss": 0.0001, "lr": 6.820235669398192e-07, "epoch": 3.2135440842352136, "percentage": 64.27, "elapsed_time": "2:49:48", "remaining_time": "1:34:23", "throughput": 8702.74, "total_tokens": 88665488} +{"current_steps": 131545, "total_steps": 204665, "loss": 0.0001, "lr": 6.819427165219789e-07, "epoch": 3.213666235067061, "percentage": 64.27, "elapsed_time": "2:49:48", "remaining_time": "1:34:23", "throughput": 8702.77, "total_tokens": 88668880} +{"current_steps": 131550, "total_steps": 204665, "loss": 0.0, "lr": 6.818618684171367e-07, "epoch": 3.213788385898908, "percentage": 64.28, "elapsed_time": "2:49:48", "remaining_time": "1:34:22", "throughput": 8702.83, "total_tokens": 88672528} +{"current_steps": 131555, "total_steps": 204665, "loss": 0.0348, "lr": 6.817810226258806e-07, "epoch": 3.213910536730755, "percentage": 64.28, "elapsed_time": "2:49:49", "remaining_time": "1:34:22", "throughput": 8702.83, "total_tokens": 88675536} +{"current_steps": 131560, "total_steps": 204665, "loss": 0.0, "lr": 6.817001791487986e-07, "epoch": 3.2140326875626024, "percentage": 64.28, "elapsed_time": "2:49:49", "remaining_time": "1:34:22", "throughput": 8702.85, "total_tokens": 88678800} +{"current_steps": 131565, "total_steps": 204665, "loss": 0.0525, "lr": 6.816193379864785e-07, "epoch": 3.2141548383944496, "percentage": 64.28, "elapsed_time": "2:49:49", "remaining_time": "1:34:21", "throughput": 8702.9, "total_tokens": 88682320} +{"current_steps": 131570, "total_steps": 204665, "loss": 0.0, "lr": 6.815384991395081e-07, "epoch": 3.214276989226297, "percentage": 64.29, "elapsed_time": "2:49:50", "remaining_time": "1:34:21", "throughput": 8702.97, "total_tokens": 88686096} +{"current_steps": 131575, "total_steps": 204665, "loss": 0.0001, "lr": 6.814576626084757e-07, "epoch": 3.214399140058144, "percentage": 64.29, "elapsed_time": "2:49:50", "remaining_time": "1:34:20", "throughput": 8702.99, "total_tokens": 88689296} +{"current_steps": 131580, "total_steps": 204665, "loss": 0.0, "lr": 6.813768283939683e-07, "epoch": 3.214521290889991, "percentage": 64.29, "elapsed_time": "2:49:51", "remaining_time": "1:34:20", "throughput": 8703.0, "total_tokens": 88692368} +{"current_steps": 131585, "total_steps": 204665, "loss": 0.0, "lr": 6.812959964965746e-07, "epoch": 3.2146434417218384, "percentage": 64.29, "elapsed_time": "2:49:51", "remaining_time": "1:34:20", "throughput": 8703.03, "total_tokens": 88695632} +{"current_steps": 131590, "total_steps": 204665, "loss": 0.0, "lr": 6.812151669168821e-07, "epoch": 3.214765592553685, "percentage": 64.3, "elapsed_time": "2:49:51", "remaining_time": "1:34:19", "throughput": 8703.07, "total_tokens": 88699088} +{"current_steps": 131595, "total_steps": 204665, "loss": 0.0838, "lr": 6.811343396554786e-07, "epoch": 3.2148877433855323, "percentage": 64.3, "elapsed_time": "2:49:52", "remaining_time": "1:34:19", "throughput": 8703.07, "total_tokens": 88702096} +{"current_steps": 131600, "total_steps": 204665, "loss": 0.0002, "lr": 6.810535147129524e-07, "epoch": 3.2150098942173795, "percentage": 64.3, "elapsed_time": "2:49:52", "remaining_time": "1:34:18", "throughput": 8703.06, "total_tokens": 88704848} +{"current_steps": 131605, "total_steps": 204665, "loss": 0.0513, "lr": 6.809726920898902e-07, "epoch": 3.2151320450492267, "percentage": 64.3, "elapsed_time": "2:49:52", "remaining_time": "1:34:18", "throughput": 8703.12, "total_tokens": 88708432} +{"current_steps": 131610, "total_steps": 204665, "loss": 0.0002, "lr": 6.808918717868805e-07, "epoch": 3.215254195881074, "percentage": 64.31, "elapsed_time": "2:49:53", "remaining_time": "1:34:18", "throughput": 8703.14, "total_tokens": 88711696} +{"current_steps": 131615, "total_steps": 204665, "loss": 0.0001, "lr": 6.808110538045114e-07, "epoch": 3.215376346712921, "percentage": 64.31, "elapsed_time": "2:49:53", "remaining_time": "1:34:17", "throughput": 8703.17, "total_tokens": 88714960} +{"current_steps": 131620, "total_steps": 204665, "loss": 0.0, "lr": 6.807302381433697e-07, "epoch": 3.2154984975447682, "percentage": 64.31, "elapsed_time": "2:49:53", "remaining_time": "1:34:17", "throughput": 8703.23, "total_tokens": 88718672} +{"current_steps": 131625, "total_steps": 204665, "loss": 0.0688, "lr": 6.806494248040439e-07, "epoch": 3.2156206483766154, "percentage": 64.31, "elapsed_time": "2:49:54", "remaining_time": "1:34:16", "throughput": 8703.27, "total_tokens": 88722128} +{"current_steps": 131630, "total_steps": 204665, "loss": 0.0371, "lr": 6.805686137871211e-07, "epoch": 3.2157427992084626, "percentage": 64.31, "elapsed_time": "2:49:54", "remaining_time": "1:34:16", "throughput": 8703.27, "total_tokens": 88725200} +{"current_steps": 131635, "total_steps": 204665, "loss": 0.0002, "lr": 6.804878050931895e-07, "epoch": 3.21586495004031, "percentage": 64.32, "elapsed_time": "2:49:54", "remaining_time": "1:34:15", "throughput": 8703.27, "total_tokens": 88728144} +{"current_steps": 131640, "total_steps": 204665, "loss": 0.0002, "lr": 6.804069987228363e-07, "epoch": 3.215987100872157, "percentage": 64.32, "elapsed_time": "2:49:55", "remaining_time": "1:34:15", "throughput": 8703.31, "total_tokens": 88731536} +{"current_steps": 131645, "total_steps": 204665, "loss": 0.0001, "lr": 6.803261946766492e-07, "epoch": 3.216109251704004, "percentage": 64.32, "elapsed_time": "2:49:55", "remaining_time": "1:34:15", "throughput": 8703.34, "total_tokens": 88734864} +{"current_steps": 131650, "total_steps": 204665, "loss": 0.0551, "lr": 6.802453929552165e-07, "epoch": 3.2162314025358514, "percentage": 64.32, "elapsed_time": "2:49:55", "remaining_time": "1:34:14", "throughput": 8703.37, "total_tokens": 88738256} +{"current_steps": 131655, "total_steps": 204665, "loss": 0.0355, "lr": 6.801645935591249e-07, "epoch": 3.2163535533676986, "percentage": 64.33, "elapsed_time": "2:49:56", "remaining_time": "1:34:14", "throughput": 8703.39, "total_tokens": 88741392} +{"current_steps": 131660, "total_steps": 204665, "loss": 0.0002, "lr": 6.800837964889627e-07, "epoch": 3.2164757041995458, "percentage": 64.33, "elapsed_time": "2:49:56", "remaining_time": "1:34:13", "throughput": 8703.44, "total_tokens": 88744976} +{"current_steps": 131665, "total_steps": 204665, "loss": 0.0001, "lr": 6.800030017453171e-07, "epoch": 3.216597855031393, "percentage": 64.33, "elapsed_time": "2:49:56", "remaining_time": "1:34:13", "throughput": 8703.46, "total_tokens": 88748304} +{"current_steps": 131670, "total_steps": 204665, "loss": 0.0, "lr": 6.799222093287756e-07, "epoch": 3.21672000586324, "percentage": 64.33, "elapsed_time": "2:49:57", "remaining_time": "1:34:13", "throughput": 8703.48, "total_tokens": 88751504} +{"current_steps": 131675, "total_steps": 204665, "loss": 0.0001, "lr": 6.79841419239926e-07, "epoch": 3.216842156695087, "percentage": 64.34, "elapsed_time": "2:49:57", "remaining_time": "1:34:12", "throughput": 8703.47, "total_tokens": 88754256} +{"current_steps": 131680, "total_steps": 204665, "loss": 0.0, "lr": 6.797606314793556e-07, "epoch": 3.216964307526934, "percentage": 64.34, "elapsed_time": "2:49:57", "remaining_time": "1:34:12", "throughput": 8703.47, "total_tokens": 88757264} +{"current_steps": 131685, "total_steps": 204665, "loss": 0.0001, "lr": 6.796798460476523e-07, "epoch": 3.2170864583587813, "percentage": 64.34, "elapsed_time": "2:49:58", "remaining_time": "1:34:11", "throughput": 8703.49, "total_tokens": 88760400} +{"current_steps": 131690, "total_steps": 204665, "loss": 0.0004, "lr": 6.795990629454029e-07, "epoch": 3.2172086091906285, "percentage": 64.34, "elapsed_time": "2:49:58", "remaining_time": "1:34:11", "throughput": 8703.53, "total_tokens": 88763920} +{"current_steps": 131695, "total_steps": 204665, "loss": 0.0001, "lr": 6.795182821731957e-07, "epoch": 3.2173307600224756, "percentage": 64.35, "elapsed_time": "2:49:58", "remaining_time": "1:34:11", "throughput": 8703.55, "total_tokens": 88767056} +{"current_steps": 131700, "total_steps": 204665, "loss": 0.0, "lr": 6.794375037316173e-07, "epoch": 3.217452910854323, "percentage": 64.35, "elapsed_time": "2:49:59", "remaining_time": "1:34:10", "throughput": 8703.56, "total_tokens": 88770064} +{"current_steps": 131705, "total_steps": 204665, "loss": 0.0, "lr": 6.793567276212557e-07, "epoch": 3.21757506168617, "percentage": 64.35, "elapsed_time": "2:49:59", "remaining_time": "1:34:10", "throughput": 8703.57, "total_tokens": 88773136} +{"current_steps": 131710, "total_steps": 204665, "loss": 0.0, "lr": 6.792759538426987e-07, "epoch": 3.217697212518017, "percentage": 64.35, "elapsed_time": "2:49:59", "remaining_time": "1:34:09", "throughput": 8703.58, "total_tokens": 88776208} +{"current_steps": 131715, "total_steps": 204665, "loss": 0.0001, "lr": 6.791951823965327e-07, "epoch": 3.2178193633498644, "percentage": 64.36, "elapsed_time": "2:50:00", "remaining_time": "1:34:09", "throughput": 8703.61, "total_tokens": 88779664} +{"current_steps": 131720, "total_steps": 204665, "loss": 0.0, "lr": 6.791144132833459e-07, "epoch": 3.2179415141817116, "percentage": 64.36, "elapsed_time": "2:50:00", "remaining_time": "1:34:09", "throughput": 8703.65, "total_tokens": 88783056} +{"current_steps": 131725, "total_steps": 204665, "loss": 0.0, "lr": 6.790336465037254e-07, "epoch": 3.218063665013559, "percentage": 64.36, "elapsed_time": "2:50:01", "remaining_time": "1:34:08", "throughput": 8703.66, "total_tokens": 88786128} +{"current_steps": 131730, "total_steps": 204665, "loss": 0.0001, "lr": 6.789528820582587e-07, "epoch": 3.218185815845406, "percentage": 64.36, "elapsed_time": "2:50:01", "remaining_time": "1:34:08", "throughput": 8703.69, "total_tokens": 88789456} +{"current_steps": 131735, "total_steps": 204665, "loss": 0.0, "lr": 6.788721199475325e-07, "epoch": 3.218307966677253, "percentage": 64.37, "elapsed_time": "2:50:01", "remaining_time": "1:34:07", "throughput": 8703.74, "total_tokens": 88793168} +{"current_steps": 131740, "total_steps": 204665, "loss": 0.0001, "lr": 6.787913601721346e-07, "epoch": 3.2184301175091004, "percentage": 64.37, "elapsed_time": "2:50:02", "remaining_time": "1:34:07", "throughput": 8703.76, "total_tokens": 88796368} +{"current_steps": 131745, "total_steps": 204665, "loss": 0.0, "lr": 6.787106027326529e-07, "epoch": 3.2185522683409475, "percentage": 64.37, "elapsed_time": "2:50:02", "remaining_time": "1:34:06", "throughput": 8703.8, "total_tokens": 88799824} +{"current_steps": 131750, "total_steps": 204665, "loss": 0.0, "lr": 6.786298476296737e-07, "epoch": 3.2186744191727947, "percentage": 64.37, "elapsed_time": "2:50:02", "remaining_time": "1:34:06", "throughput": 8703.84, "total_tokens": 88803280} +{"current_steps": 131755, "total_steps": 204665, "loss": 0.0595, "lr": 6.785490948637849e-07, "epoch": 3.218796570004642, "percentage": 64.38, "elapsed_time": "2:50:03", "remaining_time": "1:34:06", "throughput": 8703.9, "total_tokens": 88806928} +{"current_steps": 131760, "total_steps": 204665, "loss": 0.105, "lr": 6.784683444355732e-07, "epoch": 3.2189187208364887, "percentage": 64.38, "elapsed_time": "2:50:03", "remaining_time": "1:34:05", "throughput": 8703.92, "total_tokens": 88810192} +{"current_steps": 131765, "total_steps": 204665, "loss": 0.0001, "lr": 6.783875963456264e-07, "epoch": 3.219040871668336, "percentage": 64.38, "elapsed_time": "2:50:03", "remaining_time": "1:34:05", "throughput": 8703.95, "total_tokens": 88813456} +{"current_steps": 131770, "total_steps": 204665, "loss": 0.0, "lr": 6.783068505945319e-07, "epoch": 3.219163022500183, "percentage": 64.38, "elapsed_time": "2:50:04", "remaining_time": "1:34:04", "throughput": 8704.02, "total_tokens": 88817360} +{"current_steps": 131775, "total_steps": 204665, "loss": 0.0382, "lr": 6.782261071828759e-07, "epoch": 3.2192851733320302, "percentage": 64.39, "elapsed_time": "2:50:04", "remaining_time": "1:34:04", "throughput": 8704.04, "total_tokens": 88820560} +{"current_steps": 131780, "total_steps": 204665, "loss": 0.0001, "lr": 6.781453661112466e-07, "epoch": 3.2194073241638774, "percentage": 64.39, "elapsed_time": "2:50:04", "remaining_time": "1:34:04", "throughput": 8704.06, "total_tokens": 88823824} +{"current_steps": 131785, "total_steps": 204665, "loss": 0.0, "lr": 6.780646273802304e-07, "epoch": 3.2195294749957246, "percentage": 64.39, "elapsed_time": "2:50:05", "remaining_time": "1:34:03", "throughput": 8704.11, "total_tokens": 88827408} +{"current_steps": 131790, "total_steps": 204665, "loss": 0.0002, "lr": 6.779838909904152e-07, "epoch": 3.219651625827572, "percentage": 64.39, "elapsed_time": "2:50:05", "remaining_time": "1:34:03", "throughput": 8704.12, "total_tokens": 88830480} +{"current_steps": 131795, "total_steps": 204665, "loss": 0.0, "lr": 6.779031569423874e-07, "epoch": 3.219773776659419, "percentage": 64.4, "elapsed_time": "2:50:05", "remaining_time": "1:34:02", "throughput": 8704.17, "total_tokens": 88834000} +{"current_steps": 131800, "total_steps": 204665, "loss": 0.0002, "lr": 6.778224252367344e-07, "epoch": 3.219895927491266, "percentage": 64.4, "elapsed_time": "2:50:06", "remaining_time": "1:34:02", "throughput": 8704.17, "total_tokens": 88837072} +{"current_steps": 131805, "total_steps": 204665, "loss": 0.0, "lr": 6.777416958740438e-07, "epoch": 3.2200180783231134, "percentage": 64.4, "elapsed_time": "2:50:06", "remaining_time": "1:34:02", "throughput": 8704.19, "total_tokens": 88840272} +{"current_steps": 131810, "total_steps": 204665, "loss": 0.0, "lr": 6.77660968854902e-07, "epoch": 3.2201402291549606, "percentage": 64.4, "elapsed_time": "2:50:06", "remaining_time": "1:34:01", "throughput": 8704.23, "total_tokens": 88843664} +{"current_steps": 131815, "total_steps": 204665, "loss": 0.0, "lr": 6.775802441798966e-07, "epoch": 3.2202623799868078, "percentage": 64.41, "elapsed_time": "2:50:07", "remaining_time": "1:34:01", "throughput": 8704.26, "total_tokens": 88847056} +{"current_steps": 131820, "total_steps": 204665, "loss": 0.0, "lr": 6.774995218496142e-07, "epoch": 3.220384530818655, "percentage": 64.41, "elapsed_time": "2:50:07", "remaining_time": "1:34:00", "throughput": 8704.29, "total_tokens": 88850384} +{"current_steps": 131825, "total_steps": 204665, "loss": 0.0001, "lr": 6.774188018646421e-07, "epoch": 3.220506681650502, "percentage": 64.41, "elapsed_time": "2:50:07", "remaining_time": "1:34:00", "throughput": 8704.3, "total_tokens": 88853392} +{"current_steps": 131830, "total_steps": 204665, "loss": 0.0, "lr": 6.773380842255671e-07, "epoch": 3.2206288324823493, "percentage": 64.41, "elapsed_time": "2:50:08", "remaining_time": "1:34:00", "throughput": 8704.35, "total_tokens": 88856976} +{"current_steps": 131835, "total_steps": 204665, "loss": 0.0, "lr": 6.772573689329763e-07, "epoch": 3.2207509833141965, "percentage": 64.42, "elapsed_time": "2:50:08", "remaining_time": "1:33:59", "throughput": 8704.39, "total_tokens": 88860496} +{"current_steps": 131840, "total_steps": 204665, "loss": 0.0, "lr": 6.77176655987457e-07, "epoch": 3.2208731341460437, "percentage": 64.42, "elapsed_time": "2:50:09", "remaining_time": "1:33:59", "throughput": 8704.46, "total_tokens": 88864336} +{"current_steps": 131845, "total_steps": 204665, "loss": 0.0, "lr": 6.770959453895955e-07, "epoch": 3.220995284977891, "percentage": 64.42, "elapsed_time": "2:50:09", "remaining_time": "1:33:58", "throughput": 8704.51, "total_tokens": 88867856} +{"current_steps": 131850, "total_steps": 204665, "loss": 0.0, "lr": 6.770152371399795e-07, "epoch": 3.221117435809738, "percentage": 64.42, "elapsed_time": "2:50:09", "remaining_time": "1:33:58", "throughput": 8704.57, "total_tokens": 88871632} +{"current_steps": 131855, "total_steps": 204665, "loss": 0.0, "lr": 6.769345312391952e-07, "epoch": 3.221239586641585, "percentage": 64.42, "elapsed_time": "2:50:10", "remaining_time": "1:33:57", "throughput": 8704.6, "total_tokens": 88874960} +{"current_steps": 131860, "total_steps": 204665, "loss": 0.0, "lr": 6.7685382768783e-07, "epoch": 3.221361737473432, "percentage": 64.43, "elapsed_time": "2:50:10", "remaining_time": "1:33:57", "throughput": 8704.64, "total_tokens": 88878416} +{"current_steps": 131865, "total_steps": 204665, "loss": 0.0563, "lr": 6.767731264864709e-07, "epoch": 3.221483888305279, "percentage": 64.43, "elapsed_time": "2:50:10", "remaining_time": "1:33:57", "throughput": 8704.67, "total_tokens": 88881744} +{"current_steps": 131870, "total_steps": 204665, "loss": 0.0, "lr": 6.766924276357044e-07, "epoch": 3.2216060391371264, "percentage": 64.43, "elapsed_time": "2:50:11", "remaining_time": "1:33:56", "throughput": 8704.69, "total_tokens": 88884944} +{"current_steps": 131875, "total_steps": 204665, "loss": 0.0, "lr": 6.766117311361177e-07, "epoch": 3.2217281899689736, "percentage": 64.43, "elapsed_time": "2:50:11", "remaining_time": "1:33:56", "throughput": 8704.7, "total_tokens": 88888016} +{"current_steps": 131880, "total_steps": 204665, "loss": 0.0, "lr": 6.765310369882973e-07, "epoch": 3.2218503408008208, "percentage": 64.44, "elapsed_time": "2:50:11", "remaining_time": "1:33:55", "throughput": 8704.76, "total_tokens": 88891728} +{"current_steps": 131885, "total_steps": 204665, "loss": 0.0, "lr": 6.764503451928305e-07, "epoch": 3.221972491632668, "percentage": 64.44, "elapsed_time": "2:50:12", "remaining_time": "1:33:55", "throughput": 8704.77, "total_tokens": 88894736} +{"current_steps": 131890, "total_steps": 204665, "loss": 0.0181, "lr": 6.763696557503034e-07, "epoch": 3.222094642464515, "percentage": 64.44, "elapsed_time": "2:50:12", "remaining_time": "1:33:55", "throughput": 8704.8, "total_tokens": 88898064} +{"current_steps": 131895, "total_steps": 204665, "loss": 0.0378, "lr": 6.762889686613032e-07, "epoch": 3.2222167932963623, "percentage": 64.44, "elapsed_time": "2:50:12", "remaining_time": "1:33:54", "throughput": 8704.83, "total_tokens": 88901456} +{"current_steps": 131900, "total_steps": 204665, "loss": 0.0, "lr": 6.762082839264169e-07, "epoch": 3.2223389441282095, "percentage": 64.45, "elapsed_time": "2:50:13", "remaining_time": "1:33:54", "throughput": 8704.87, "total_tokens": 88904912} +{"current_steps": 131905, "total_steps": 204665, "loss": 0.0001, "lr": 6.761276015462309e-07, "epoch": 3.2224610949600567, "percentage": 64.45, "elapsed_time": "2:50:13", "remaining_time": "1:33:53", "throughput": 8704.88, "total_tokens": 88907984} +{"current_steps": 131910, "total_steps": 204665, "loss": 0.0, "lr": 6.760469215213324e-07, "epoch": 3.222583245791904, "percentage": 64.45, "elapsed_time": "2:50:13", "remaining_time": "1:33:53", "throughput": 8704.94, "total_tokens": 88911632} +{"current_steps": 131915, "total_steps": 204665, "loss": 0.0317, "lr": 6.759662438523074e-07, "epoch": 3.222705396623751, "percentage": 64.45, "elapsed_time": "2:50:14", "remaining_time": "1:33:53", "throughput": 8704.99, "total_tokens": 88915152} +{"current_steps": 131920, "total_steps": 204665, "loss": 0.0, "lr": 6.758855685397432e-07, "epoch": 3.2228275474555983, "percentage": 64.46, "elapsed_time": "2:50:14", "remaining_time": "1:33:52", "throughput": 8705.01, "total_tokens": 88918416} +{"current_steps": 131925, "total_steps": 204665, "loss": 0.0, "lr": 6.758048955842266e-07, "epoch": 3.2229496982874455, "percentage": 64.46, "elapsed_time": "2:50:14", "remaining_time": "1:33:52", "throughput": 8705.02, "total_tokens": 88921488} +{"current_steps": 131930, "total_steps": 204665, "loss": 0.0663, "lr": 6.757242249863435e-07, "epoch": 3.2230718491192927, "percentage": 64.46, "elapsed_time": "2:50:15", "remaining_time": "1:33:51", "throughput": 8705.06, "total_tokens": 88924944} +{"current_steps": 131935, "total_steps": 204665, "loss": 0.0397, "lr": 6.756435567466816e-07, "epoch": 3.22319399995114, "percentage": 64.46, "elapsed_time": "2:50:15", "remaining_time": "1:33:51", "throughput": 8705.08, "total_tokens": 88928144} +{"current_steps": 131940, "total_steps": 204665, "loss": 0.0, "lr": 6.755628908658265e-07, "epoch": 3.2233161507829866, "percentage": 64.47, "elapsed_time": "2:50:16", "remaining_time": "1:33:51", "throughput": 8705.12, "total_tokens": 88931472} +{"current_steps": 131945, "total_steps": 204665, "loss": 0.0, "lr": 6.754822273443657e-07, "epoch": 3.223438301614834, "percentage": 64.47, "elapsed_time": "2:50:16", "remaining_time": "1:33:50", "throughput": 8705.18, "total_tokens": 88935248} +{"current_steps": 131950, "total_steps": 204665, "loss": 0.0, "lr": 6.754015661828851e-07, "epoch": 3.223560452446681, "percentage": 64.47, "elapsed_time": "2:50:16", "remaining_time": "1:33:50", "throughput": 8705.21, "total_tokens": 88938576} +{"current_steps": 131955, "total_steps": 204665, "loss": 0.0, "lr": 6.753209073819717e-07, "epoch": 3.223682603278528, "percentage": 64.47, "elapsed_time": "2:50:17", "remaining_time": "1:33:49", "throughput": 8705.25, "total_tokens": 88942032} +{"current_steps": 131960, "total_steps": 204665, "loss": 0.0, "lr": 6.752402509422121e-07, "epoch": 3.2238047541103754, "percentage": 64.48, "elapsed_time": "2:50:17", "remaining_time": "1:33:49", "throughput": 8705.26, "total_tokens": 88945104} +{"current_steps": 131965, "total_steps": 204665, "loss": 0.0, "lr": 6.751595968641925e-07, "epoch": 3.2239269049422226, "percentage": 64.48, "elapsed_time": "2:50:17", "remaining_time": "1:33:49", "throughput": 8705.34, "total_tokens": 88949072} +{"current_steps": 131970, "total_steps": 204665, "loss": 0.0728, "lr": 6.750789451485002e-07, "epoch": 3.2240490557740697, "percentage": 64.48, "elapsed_time": "2:50:18", "remaining_time": "1:33:48", "throughput": 8705.41, "total_tokens": 88952976} +{"current_steps": 131975, "total_steps": 204665, "loss": 0.0, "lr": 6.74998295795721e-07, "epoch": 3.224171206605917, "percentage": 64.48, "elapsed_time": "2:50:18", "remaining_time": "1:33:48", "throughput": 8705.44, "total_tokens": 88956304} +{"current_steps": 131980, "total_steps": 204665, "loss": 0.0, "lr": 6.749176488064414e-07, "epoch": 3.224293357437764, "percentage": 64.49, "elapsed_time": "2:50:18", "remaining_time": "1:33:47", "throughput": 8705.48, "total_tokens": 88959696} +{"current_steps": 131985, "total_steps": 204665, "loss": 0.0, "lr": 6.748370041812482e-07, "epoch": 3.2244155082696113, "percentage": 64.49, "elapsed_time": "2:50:19", "remaining_time": "1:33:47", "throughput": 8705.51, "total_tokens": 88963088} +{"current_steps": 131990, "total_steps": 204665, "loss": 0.0006, "lr": 6.747563619207276e-07, "epoch": 3.2245376591014585, "percentage": 64.49, "elapsed_time": "2:50:19", "remaining_time": "1:33:46", "throughput": 8705.53, "total_tokens": 88966288} +{"current_steps": 131995, "total_steps": 204665, "loss": 0.0, "lr": 6.746757220254667e-07, "epoch": 3.2246598099333057, "percentage": 64.49, "elapsed_time": "2:50:19", "remaining_time": "1:33:46", "throughput": 8705.61, "total_tokens": 88970256} +{"current_steps": 132000, "total_steps": 204665, "loss": 0.0001, "lr": 6.745950844960509e-07, "epoch": 3.224781960765153, "percentage": 64.5, "elapsed_time": "2:50:20", "remaining_time": "1:33:46", "throughput": 8705.64, "total_tokens": 88973520} +{"current_steps": 132005, "total_steps": 204665, "loss": 0.0, "lr": 6.745144493330676e-07, "epoch": 3.224904111597, "percentage": 64.5, "elapsed_time": "2:50:20", "remaining_time": "1:33:45", "throughput": 8705.69, "total_tokens": 88977104} +{"current_steps": 132010, "total_steps": 204665, "loss": 0.0002, "lr": 6.744338165371023e-07, "epoch": 3.2250262624288473, "percentage": 64.5, "elapsed_time": "2:50:20", "remaining_time": "1:33:45", "throughput": 8705.72, "total_tokens": 88980560} +{"current_steps": 132015, "total_steps": 204665, "loss": 0.0, "lr": 6.74353186108742e-07, "epoch": 3.2251484132606945, "percentage": 64.5, "elapsed_time": "2:50:21", "remaining_time": "1:33:44", "throughput": 8705.74, "total_tokens": 88983696} +{"current_steps": 132020, "total_steps": 204665, "loss": 0.0001, "lr": 6.742725580485732e-07, "epoch": 3.2252705640925416, "percentage": 64.51, "elapsed_time": "2:50:21", "remaining_time": "1:33:44", "throughput": 8705.75, "total_tokens": 88986704} +{"current_steps": 132025, "total_steps": 204665, "loss": 0.0001, "lr": 6.741919323571818e-07, "epoch": 3.225392714924389, "percentage": 64.51, "elapsed_time": "2:50:21", "remaining_time": "1:33:44", "throughput": 8705.74, "total_tokens": 88989584} +{"current_steps": 132030, "total_steps": 204665, "loss": 0.0636, "lr": 6.741113090351544e-07, "epoch": 3.225514865756236, "percentage": 64.51, "elapsed_time": "2:50:22", "remaining_time": "1:33:43", "throughput": 8705.78, "total_tokens": 88992976} +{"current_steps": 132035, "total_steps": 204665, "loss": 0.0001, "lr": 6.740306880830771e-07, "epoch": 3.2256370165880828, "percentage": 64.51, "elapsed_time": "2:50:22", "remaining_time": "1:33:43", "throughput": 8705.81, "total_tokens": 88996368} +{"current_steps": 132040, "total_steps": 204665, "loss": 0.0001, "lr": 6.739500695015365e-07, "epoch": 3.22575916741993, "percentage": 64.52, "elapsed_time": "2:50:22", "remaining_time": "1:33:42", "throughput": 8705.83, "total_tokens": 88999568} +{"current_steps": 132045, "total_steps": 204665, "loss": 0.0004, "lr": 6.738694532911184e-07, "epoch": 3.225881318251777, "percentage": 64.52, "elapsed_time": "2:50:23", "remaining_time": "1:33:42", "throughput": 8705.86, "total_tokens": 89002896} +{"current_steps": 132050, "total_steps": 204665, "loss": 0.0001, "lr": 6.737888394524092e-07, "epoch": 3.2260034690836243, "percentage": 64.52, "elapsed_time": "2:50:23", "remaining_time": "1:33:42", "throughput": 8705.91, "total_tokens": 89006480} +{"current_steps": 132055, "total_steps": 204665, "loss": 0.0002, "lr": 6.737082279859958e-07, "epoch": 3.2261256199154715, "percentage": 64.52, "elapsed_time": "2:50:24", "remaining_time": "1:33:41", "throughput": 8705.95, "total_tokens": 89009936} +{"current_steps": 132060, "total_steps": 204665, "loss": 0.0, "lr": 6.736276188924636e-07, "epoch": 3.2262477707473187, "percentage": 64.52, "elapsed_time": "2:50:24", "remaining_time": "1:33:41", "throughput": 8705.97, "total_tokens": 89013136} +{"current_steps": 132065, "total_steps": 204665, "loss": 0.0, "lr": 6.735470121723994e-07, "epoch": 3.226369921579166, "percentage": 64.53, "elapsed_time": "2:50:24", "remaining_time": "1:33:40", "throughput": 8706.01, "total_tokens": 89016528} +{"current_steps": 132070, "total_steps": 204665, "loss": 0.0, "lr": 6.734664078263887e-07, "epoch": 3.226492072411013, "percentage": 64.53, "elapsed_time": "2:50:25", "remaining_time": "1:33:40", "throughput": 8706.08, "total_tokens": 89020368} +{"current_steps": 132075, "total_steps": 204665, "loss": 0.0, "lr": 6.733858058550185e-07, "epoch": 3.2266142232428603, "percentage": 64.53, "elapsed_time": "2:50:25", "remaining_time": "1:33:40", "throughput": 8706.1, "total_tokens": 89023568} +{"current_steps": 132080, "total_steps": 204665, "loss": 0.0001, "lr": 6.733052062588747e-07, "epoch": 3.2267363740747075, "percentage": 64.53, "elapsed_time": "2:50:25", "remaining_time": "1:33:39", "throughput": 8706.13, "total_tokens": 89026960} +{"current_steps": 132085, "total_steps": 204665, "loss": 0.0001, "lr": 6.732246090385428e-07, "epoch": 3.2268585249065547, "percentage": 64.54, "elapsed_time": "2:50:26", "remaining_time": "1:33:39", "throughput": 8706.18, "total_tokens": 89030480} +{"current_steps": 132090, "total_steps": 204665, "loss": 0.0, "lr": 6.7314401419461e-07, "epoch": 3.226980675738402, "percentage": 64.54, "elapsed_time": "2:50:26", "remaining_time": "1:33:38", "throughput": 8706.23, "total_tokens": 89034064} +{"current_steps": 132095, "total_steps": 204665, "loss": 0.0, "lr": 6.730634217276614e-07, "epoch": 3.227102826570249, "percentage": 64.54, "elapsed_time": "2:50:26", "remaining_time": "1:33:38", "throughput": 8706.27, "total_tokens": 89037584} +{"current_steps": 132100, "total_steps": 204665, "loss": 0.0, "lr": 6.729828316382837e-07, "epoch": 3.2272249774020962, "percentage": 64.54, "elapsed_time": "2:50:27", "remaining_time": "1:33:37", "throughput": 8706.31, "total_tokens": 89041040} +{"current_steps": 132105, "total_steps": 204665, "loss": 0.0, "lr": 6.729022439270627e-07, "epoch": 3.2273471282339434, "percentage": 64.55, "elapsed_time": "2:50:27", "remaining_time": "1:33:37", "throughput": 8706.32, "total_tokens": 89044048} +{"current_steps": 132110, "total_steps": 204665, "loss": 0.0, "lr": 6.728216585945845e-07, "epoch": 3.2274692790657906, "percentage": 64.55, "elapsed_time": "2:50:27", "remaining_time": "1:33:37", "throughput": 8706.35, "total_tokens": 89047376} +{"current_steps": 132115, "total_steps": 204665, "loss": 0.0, "lr": 6.727410756414356e-07, "epoch": 3.227591429897638, "percentage": 64.55, "elapsed_time": "2:50:28", "remaining_time": "1:33:36", "throughput": 8706.38, "total_tokens": 89050768} +{"current_steps": 132120, "total_steps": 204665, "loss": 0.0, "lr": 6.726604950682011e-07, "epoch": 3.2277135807294846, "percentage": 64.55, "elapsed_time": "2:50:28", "remaining_time": "1:33:36", "throughput": 8706.4, "total_tokens": 89053968} +{"current_steps": 132125, "total_steps": 204665, "loss": 0.0001, "lr": 6.72579916875468e-07, "epoch": 3.2278357315613317, "percentage": 64.56, "elapsed_time": "2:50:28", "remaining_time": "1:33:35", "throughput": 8706.45, "total_tokens": 89057488} +{"current_steps": 132130, "total_steps": 204665, "loss": 0.0, "lr": 6.724993410638216e-07, "epoch": 3.227957882393179, "percentage": 64.56, "elapsed_time": "2:50:29", "remaining_time": "1:33:35", "throughput": 8706.48, "total_tokens": 89060880} +{"current_steps": 132135, "total_steps": 204665, "loss": 0.0, "lr": 6.724187676338481e-07, "epoch": 3.228080033225026, "percentage": 64.56, "elapsed_time": "2:50:29", "remaining_time": "1:33:35", "throughput": 8706.5, "total_tokens": 89064016} +{"current_steps": 132140, "total_steps": 204665, "loss": 0.0, "lr": 6.723381965861334e-07, "epoch": 3.2282021840568733, "percentage": 64.56, "elapsed_time": "2:50:29", "remaining_time": "1:33:34", "throughput": 8706.52, "total_tokens": 89067216} +{"current_steps": 132145, "total_steps": 204665, "loss": 0.0, "lr": 6.722576279212632e-07, "epoch": 3.2283243348887205, "percentage": 64.57, "elapsed_time": "2:50:30", "remaining_time": "1:33:34", "throughput": 8706.54, "total_tokens": 89070544} +{"current_steps": 132150, "total_steps": 204665, "loss": 0.0, "lr": 6.721770616398242e-07, "epoch": 3.2284464857205677, "percentage": 64.57, "elapsed_time": "2:50:30", "remaining_time": "1:33:33", "throughput": 8706.61, "total_tokens": 89074320} +{"current_steps": 132155, "total_steps": 204665, "loss": 0.0, "lr": 6.720964977424013e-07, "epoch": 3.228568636552415, "percentage": 64.57, "elapsed_time": "2:50:31", "remaining_time": "1:33:33", "throughput": 8706.65, "total_tokens": 89077776} +{"current_steps": 132160, "total_steps": 204665, "loss": 0.0, "lr": 6.720159362295814e-07, "epoch": 3.228690787384262, "percentage": 64.57, "elapsed_time": "2:50:31", "remaining_time": "1:33:33", "throughput": 8706.66, "total_tokens": 89080912} +{"current_steps": 132165, "total_steps": 204665, "loss": 0.0, "lr": 6.719353771019494e-07, "epoch": 3.2288129382161093, "percentage": 64.58, "elapsed_time": "2:50:31", "remaining_time": "1:33:32", "throughput": 8706.72, "total_tokens": 89084560} +{"current_steps": 132170, "total_steps": 204665, "loss": 0.0023, "lr": 6.718548203600915e-07, "epoch": 3.2289350890479565, "percentage": 64.58, "elapsed_time": "2:50:32", "remaining_time": "1:33:32", "throughput": 8706.75, "total_tokens": 89087888} +{"current_steps": 132175, "total_steps": 204665, "loss": 0.0488, "lr": 6.71774266004594e-07, "epoch": 3.2290572398798036, "percentage": 64.58, "elapsed_time": "2:50:32", "remaining_time": "1:33:31", "throughput": 8706.76, "total_tokens": 89091024} +{"current_steps": 132180, "total_steps": 204665, "loss": 0.0, "lr": 6.716937140360421e-07, "epoch": 3.229179390711651, "percentage": 64.58, "elapsed_time": "2:50:32", "remaining_time": "1:33:31", "throughput": 8706.77, "total_tokens": 89094160} +{"current_steps": 132185, "total_steps": 204665, "loss": 0.0875, "lr": 6.716131644550218e-07, "epoch": 3.229301541543498, "percentage": 64.59, "elapsed_time": "2:50:33", "remaining_time": "1:33:31", "throughput": 8706.81, "total_tokens": 89097552} +{"current_steps": 132190, "total_steps": 204665, "loss": 0.0, "lr": 6.71532617262119e-07, "epoch": 3.229423692375345, "percentage": 64.59, "elapsed_time": "2:50:33", "remaining_time": "1:33:30", "throughput": 8706.83, "total_tokens": 89100816} +{"current_steps": 132195, "total_steps": 204665, "loss": 0.0002, "lr": 6.714520724579196e-07, "epoch": 3.2295458432071924, "percentage": 64.59, "elapsed_time": "2:50:33", "remaining_time": "1:33:30", "throughput": 8706.84, "total_tokens": 89103888} +{"current_steps": 132200, "total_steps": 204665, "loss": 0.0, "lr": 6.713715300430085e-07, "epoch": 3.2296679940390396, "percentage": 64.59, "elapsed_time": "2:50:34", "remaining_time": "1:33:29", "throughput": 8706.86, "total_tokens": 89107024} +{"current_steps": 132205, "total_steps": 204665, "loss": 0.0, "lr": 6.712909900179722e-07, "epoch": 3.2297901448708863, "percentage": 64.6, "elapsed_time": "2:50:34", "remaining_time": "1:33:29", "throughput": 8706.88, "total_tokens": 89110288} +{"current_steps": 132210, "total_steps": 204665, "loss": 0.0, "lr": 6.712104523833965e-07, "epoch": 3.229912295702734, "percentage": 64.6, "elapsed_time": "2:50:34", "remaining_time": "1:33:28", "throughput": 8706.91, "total_tokens": 89113552} +{"current_steps": 132215, "total_steps": 204665, "loss": 0.0579, "lr": 6.711299171398664e-07, "epoch": 3.2300344465345807, "percentage": 64.6, "elapsed_time": "2:50:35", "remaining_time": "1:33:28", "throughput": 8706.96, "total_tokens": 89117136} +{"current_steps": 132220, "total_steps": 204665, "loss": 0.0, "lr": 6.710493842879685e-07, "epoch": 3.230156597366428, "percentage": 64.6, "elapsed_time": "2:50:35", "remaining_time": "1:33:28", "throughput": 8706.95, "total_tokens": 89120016} +{"current_steps": 132225, "total_steps": 204665, "loss": 0.0001, "lr": 6.709688538282876e-07, "epoch": 3.230278748198275, "percentage": 64.61, "elapsed_time": "2:50:35", "remaining_time": "1:33:27", "throughput": 8706.95, "total_tokens": 89122896} +{"current_steps": 132230, "total_steps": 204665, "loss": 0.0, "lr": 6.708883257614098e-07, "epoch": 3.2304008990301223, "percentage": 64.61, "elapsed_time": "2:50:36", "remaining_time": "1:33:27", "throughput": 8707.0, "total_tokens": 89126544} +{"current_steps": 132235, "total_steps": 204665, "loss": 0.0, "lr": 6.708078000879209e-07, "epoch": 3.2305230498619695, "percentage": 64.61, "elapsed_time": "2:50:36", "remaining_time": "1:33:26", "throughput": 8707.03, "total_tokens": 89129744} +{"current_steps": 132240, "total_steps": 204665, "loss": 0.0001, "lr": 6.707272768084057e-07, "epoch": 3.2306452006938167, "percentage": 64.61, "elapsed_time": "2:50:36", "remaining_time": "1:33:26", "throughput": 8707.04, "total_tokens": 89132816} +{"current_steps": 132245, "total_steps": 204665, "loss": 0.0, "lr": 6.706467559234507e-07, "epoch": 3.230767351525664, "percentage": 64.62, "elapsed_time": "2:50:37", "remaining_time": "1:33:26", "throughput": 8707.04, "total_tokens": 89135760} +{"current_steps": 132250, "total_steps": 204665, "loss": 0.0, "lr": 6.705662374336409e-07, "epoch": 3.230889502357511, "percentage": 64.62, "elapsed_time": "2:50:37", "remaining_time": "1:33:25", "throughput": 8707.06, "total_tokens": 89139088} +{"current_steps": 132255, "total_steps": 204665, "loss": 0.0, "lr": 6.704857213395622e-07, "epoch": 3.2310116531893582, "percentage": 64.62, "elapsed_time": "2:50:37", "remaining_time": "1:33:25", "throughput": 8707.09, "total_tokens": 89142288} +{"current_steps": 132260, "total_steps": 204665, "loss": 0.0, "lr": 6.704052076417996e-07, "epoch": 3.2311338040212054, "percentage": 64.62, "elapsed_time": "2:50:38", "remaining_time": "1:33:24", "throughput": 8707.11, "total_tokens": 89145552} +{"current_steps": 132265, "total_steps": 204665, "loss": 0.0, "lr": 6.70324696340939e-07, "epoch": 3.2312559548530526, "percentage": 64.63, "elapsed_time": "2:50:38", "remaining_time": "1:33:24", "throughput": 8707.14, "total_tokens": 89148880} +{"current_steps": 132270, "total_steps": 204665, "loss": 0.0, "lr": 6.702441874375664e-07, "epoch": 3.2313781056849, "percentage": 64.63, "elapsed_time": "2:50:38", "remaining_time": "1:33:24", "throughput": 8707.19, "total_tokens": 89152400} +{"current_steps": 132275, "total_steps": 204665, "loss": 0.0001, "lr": 6.701636809322662e-07, "epoch": 3.231500256516747, "percentage": 64.63, "elapsed_time": "2:50:39", "remaining_time": "1:33:23", "throughput": 8707.19, "total_tokens": 89155408} +{"current_steps": 132280, "total_steps": 204665, "loss": 0.0, "lr": 6.700831768256249e-07, "epoch": 3.231622407348594, "percentage": 64.63, "elapsed_time": "2:50:39", "remaining_time": "1:33:23", "throughput": 8707.23, "total_tokens": 89158800} +{"current_steps": 132285, "total_steps": 204665, "loss": 0.0, "lr": 6.700026751182273e-07, "epoch": 3.2317445581804414, "percentage": 64.63, "elapsed_time": "2:50:39", "remaining_time": "1:33:22", "throughput": 8707.26, "total_tokens": 89162192} +{"current_steps": 132290, "total_steps": 204665, "loss": 0.0001, "lr": 6.69922175810659e-07, "epoch": 3.2318667090122886, "percentage": 64.64, "elapsed_time": "2:50:40", "remaining_time": "1:33:22", "throughput": 8707.26, "total_tokens": 89165136} +{"current_steps": 132295, "total_steps": 204665, "loss": 0.0, "lr": 6.698416789035053e-07, "epoch": 3.2319888598441358, "percentage": 64.64, "elapsed_time": "2:50:40", "remaining_time": "1:33:22", "throughput": 8707.28, "total_tokens": 89168336} +{"current_steps": 132300, "total_steps": 204665, "loss": 0.0266, "lr": 6.697611843973517e-07, "epoch": 3.2321110106759825, "percentage": 64.64, "elapsed_time": "2:50:41", "remaining_time": "1:33:21", "throughput": 8707.3, "total_tokens": 89171472} +{"current_steps": 132305, "total_steps": 204665, "loss": 0.0, "lr": 6.696806922927838e-07, "epoch": 3.2322331615078297, "percentage": 64.64, "elapsed_time": "2:50:41", "remaining_time": "1:33:21", "throughput": 8707.33, "total_tokens": 89174864} +{"current_steps": 132310, "total_steps": 204665, "loss": 0.0, "lr": 6.696002025903864e-07, "epoch": 3.232355312339677, "percentage": 64.65, "elapsed_time": "2:50:41", "remaining_time": "1:33:20", "throughput": 8707.36, "total_tokens": 89178064} +{"current_steps": 132315, "total_steps": 204665, "loss": 0.0, "lr": 6.695197152907457e-07, "epoch": 3.232477463171524, "percentage": 64.65, "elapsed_time": "2:50:42", "remaining_time": "1:33:20", "throughput": 8707.36, "total_tokens": 89181008} +{"current_steps": 132320, "total_steps": 204665, "loss": 0.0, "lr": 6.694392303944461e-07, "epoch": 3.2325996140033713, "percentage": 64.65, "elapsed_time": "2:50:42", "remaining_time": "1:33:19", "throughput": 8707.39, "total_tokens": 89184336} +{"current_steps": 132325, "total_steps": 204665, "loss": 0.0, "lr": 6.693587479020732e-07, "epoch": 3.2327217648352184, "percentage": 64.65, "elapsed_time": "2:50:42", "remaining_time": "1:33:19", "throughput": 8707.43, "total_tokens": 89187792} +{"current_steps": 132330, "total_steps": 204665, "loss": 0.0, "lr": 6.692782678142129e-07, "epoch": 3.2328439156670656, "percentage": 64.66, "elapsed_time": "2:50:43", "remaining_time": "1:33:19", "throughput": 8707.46, "total_tokens": 89191184} +{"current_steps": 132335, "total_steps": 204665, "loss": 0.0462, "lr": 6.691977901314498e-07, "epoch": 3.232966066498913, "percentage": 64.66, "elapsed_time": "2:50:43", "remaining_time": "1:33:18", "throughput": 8707.49, "total_tokens": 89194384} +{"current_steps": 132340, "total_steps": 204665, "loss": 0.0, "lr": 6.691173148543694e-07, "epoch": 3.23308821733076, "percentage": 64.66, "elapsed_time": "2:50:43", "remaining_time": "1:33:18", "throughput": 8707.55, "total_tokens": 89198224} +{"current_steps": 132345, "total_steps": 204665, "loss": 0.0, "lr": 6.690368419835569e-07, "epoch": 3.233210368162607, "percentage": 64.66, "elapsed_time": "2:50:44", "remaining_time": "1:33:17", "throughput": 8707.57, "total_tokens": 89201360} +{"current_steps": 132350, "total_steps": 204665, "loss": 0.0, "lr": 6.689563715195975e-07, "epoch": 3.2333325189944544, "percentage": 64.67, "elapsed_time": "2:50:44", "remaining_time": "1:33:17", "throughput": 8707.59, "total_tokens": 89204560} +{"current_steps": 132355, "total_steps": 204665, "loss": 0.0, "lr": 6.688759034630761e-07, "epoch": 3.2334546698263016, "percentage": 64.67, "elapsed_time": "2:50:44", "remaining_time": "1:33:17", "throughput": 8707.62, "total_tokens": 89207952} +{"current_steps": 132360, "total_steps": 204665, "loss": 0.0, "lr": 6.687954378145782e-07, "epoch": 3.2335768206581488, "percentage": 64.67, "elapsed_time": "2:50:45", "remaining_time": "1:33:16", "throughput": 8707.63, "total_tokens": 89211024} +{"current_steps": 132365, "total_steps": 204665, "loss": 0.1191, "lr": 6.687149745746894e-07, "epoch": 3.233698971489996, "percentage": 64.67, "elapsed_time": "2:50:45", "remaining_time": "1:33:16", "throughput": 8707.69, "total_tokens": 89214608} +{"current_steps": 132370, "total_steps": 204665, "loss": 0.1076, "lr": 6.686345137439939e-07, "epoch": 3.233821122321843, "percentage": 64.68, "elapsed_time": "2:50:45", "remaining_time": "1:33:15", "throughput": 8707.75, "total_tokens": 89218384} +{"current_steps": 132375, "total_steps": 204665, "loss": 0.0, "lr": 6.68554055323078e-07, "epoch": 3.2339432731536903, "percentage": 64.68, "elapsed_time": "2:50:46", "remaining_time": "1:33:15", "throughput": 8707.76, "total_tokens": 89221456} +{"current_steps": 132380, "total_steps": 204665, "loss": 0.0043, "lr": 6.684735993125255e-07, "epoch": 3.2340654239855375, "percentage": 64.68, "elapsed_time": "2:50:46", "remaining_time": "1:33:15", "throughput": 8707.81, "total_tokens": 89225040} +{"current_steps": 132385, "total_steps": 204665, "loss": 0.0, "lr": 6.683931457129224e-07, "epoch": 3.2341875748173843, "percentage": 64.68, "elapsed_time": "2:50:46", "remaining_time": "1:33:14", "throughput": 8707.83, "total_tokens": 89228240} +{"current_steps": 132390, "total_steps": 204665, "loss": 0.0, "lr": 6.683126945248538e-07, "epoch": 3.2343097256492315, "percentage": 64.69, "elapsed_time": "2:50:47", "remaining_time": "1:33:14", "throughput": 8707.85, "total_tokens": 89231376} +{"current_steps": 132395, "total_steps": 204665, "loss": 0.0, "lr": 6.682322457489042e-07, "epoch": 3.2344318764810787, "percentage": 64.69, "elapsed_time": "2:50:47", "remaining_time": "1:33:13", "throughput": 8707.87, "total_tokens": 89234576} +{"current_steps": 132400, "total_steps": 204665, "loss": 0.0, "lr": 6.681517993856592e-07, "epoch": 3.234554027312926, "percentage": 64.69, "elapsed_time": "2:50:47", "remaining_time": "1:33:13", "throughput": 8707.89, "total_tokens": 89237840} +{"current_steps": 132405, "total_steps": 204665, "loss": 0.0354, "lr": 6.680713554357031e-07, "epoch": 3.234676178144773, "percentage": 64.69, "elapsed_time": "2:50:48", "remaining_time": "1:33:12", "throughput": 8707.94, "total_tokens": 89241360} +{"current_steps": 132410, "total_steps": 204665, "loss": 0.0, "lr": 6.679909138996219e-07, "epoch": 3.2347983289766202, "percentage": 64.7, "elapsed_time": "2:50:48", "remaining_time": "1:33:12", "throughput": 8707.98, "total_tokens": 89244880} +{"current_steps": 132415, "total_steps": 204665, "loss": 0.0, "lr": 6.679104747779996e-07, "epoch": 3.2349204798084674, "percentage": 64.7, "elapsed_time": "2:50:48", "remaining_time": "1:33:12", "throughput": 8707.99, "total_tokens": 89247952} +{"current_steps": 132420, "total_steps": 204665, "loss": 0.0001, "lr": 6.678300380714217e-07, "epoch": 3.2350426306403146, "percentage": 64.7, "elapsed_time": "2:50:49", "remaining_time": "1:33:11", "throughput": 8708.04, "total_tokens": 89251472} +{"current_steps": 132425, "total_steps": 204665, "loss": 0.0239, "lr": 6.677496037804735e-07, "epoch": 3.235164781472162, "percentage": 64.7, "elapsed_time": "2:50:49", "remaining_time": "1:33:11", "throughput": 8708.09, "total_tokens": 89255120} +{"current_steps": 132430, "total_steps": 204665, "loss": 0.0, "lr": 6.676691719057393e-07, "epoch": 3.235286932304009, "percentage": 64.71, "elapsed_time": "2:50:50", "remaining_time": "1:33:10", "throughput": 8708.14, "total_tokens": 89258640} +{"current_steps": 132435, "total_steps": 204665, "loss": 0.0, "lr": 6.675887424478044e-07, "epoch": 3.235409083135856, "percentage": 64.71, "elapsed_time": "2:50:50", "remaining_time": "1:33:10", "throughput": 8708.19, "total_tokens": 89262224} +{"current_steps": 132440, "total_steps": 204665, "loss": 0.0, "lr": 6.675083154072535e-07, "epoch": 3.2355312339677034, "percentage": 64.71, "elapsed_time": "2:50:50", "remaining_time": "1:33:10", "throughput": 8708.24, "total_tokens": 89265808} +{"current_steps": 132445, "total_steps": 204665, "loss": 0.0001, "lr": 6.674278907846715e-07, "epoch": 3.2356533847995506, "percentage": 64.71, "elapsed_time": "2:50:51", "remaining_time": "1:33:09", "throughput": 8708.26, "total_tokens": 89269072} +{"current_steps": 132450, "total_steps": 204665, "loss": 0.0001, "lr": 6.673474685806435e-07, "epoch": 3.2357755356313977, "percentage": 64.72, "elapsed_time": "2:50:51", "remaining_time": "1:33:09", "throughput": 8708.29, "total_tokens": 89272336} +{"current_steps": 132455, "total_steps": 204665, "loss": 0.0, "lr": 6.672670487957538e-07, "epoch": 3.235897686463245, "percentage": 64.72, "elapsed_time": "2:50:51", "remaining_time": "1:33:08", "throughput": 8708.31, "total_tokens": 89275600} +{"current_steps": 132460, "total_steps": 204665, "loss": 0.0, "lr": 6.671866314305881e-07, "epoch": 3.236019837295092, "percentage": 64.72, "elapsed_time": "2:50:52", "remaining_time": "1:33:08", "throughput": 8708.33, "total_tokens": 89278736} +{"current_steps": 132465, "total_steps": 204665, "loss": 0.0, "lr": 6.671062164857303e-07, "epoch": 3.2361419881269393, "percentage": 64.72, "elapsed_time": "2:50:52", "remaining_time": "1:33:08", "throughput": 8708.37, "total_tokens": 89282192} +{"current_steps": 132470, "total_steps": 204665, "loss": 0.0002, "lr": 6.67025803961766e-07, "epoch": 3.2362641389587865, "percentage": 64.73, "elapsed_time": "2:50:52", "remaining_time": "1:33:07", "throughput": 8708.36, "total_tokens": 89285072} +{"current_steps": 132475, "total_steps": 204665, "loss": 0.0, "lr": 6.669453938592793e-07, "epoch": 3.2363862897906337, "percentage": 64.73, "elapsed_time": "2:50:53", "remaining_time": "1:33:07", "throughput": 8708.39, "total_tokens": 89288400} +{"current_steps": 132480, "total_steps": 204665, "loss": 0.0325, "lr": 6.668649861788551e-07, "epoch": 3.2365084406224804, "percentage": 64.73, "elapsed_time": "2:50:53", "remaining_time": "1:33:06", "throughput": 8708.4, "total_tokens": 89291344} +{"current_steps": 132485, "total_steps": 204665, "loss": 0.0663, "lr": 6.66784580921079e-07, "epoch": 3.2366305914543276, "percentage": 64.73, "elapsed_time": "2:50:53", "remaining_time": "1:33:06", "throughput": 8708.46, "total_tokens": 89295184} +{"current_steps": 132490, "total_steps": 204665, "loss": 0.0439, "lr": 6.667041780865347e-07, "epoch": 3.236752742286175, "percentage": 64.74, "elapsed_time": "2:50:54", "remaining_time": "1:33:06", "throughput": 8708.47, "total_tokens": 89298192} +{"current_steps": 132495, "total_steps": 204665, "loss": 0.0, "lr": 6.666237776758072e-07, "epoch": 3.236874893118022, "percentage": 64.74, "elapsed_time": "2:50:54", "remaining_time": "1:33:05", "throughput": 8708.49, "total_tokens": 89301392} +{"current_steps": 132500, "total_steps": 204665, "loss": 0.0, "lr": 6.665433796894812e-07, "epoch": 3.236997043949869, "percentage": 64.74, "elapsed_time": "2:50:54", "remaining_time": "1:33:05", "throughput": 8708.52, "total_tokens": 89304784} +{"current_steps": 132505, "total_steps": 204665, "loss": 0.0, "lr": 6.66462984128142e-07, "epoch": 3.2371191947817164, "percentage": 64.74, "elapsed_time": "2:50:55", "remaining_time": "1:33:04", "throughput": 8708.55, "total_tokens": 89308048} +{"current_steps": 132510, "total_steps": 204665, "loss": 0.0001, "lr": 6.663825909923729e-07, "epoch": 3.2372413456135636, "percentage": 64.74, "elapsed_time": "2:50:55", "remaining_time": "1:33:04", "throughput": 8708.58, "total_tokens": 89311376} +{"current_steps": 132515, "total_steps": 204665, "loss": 0.0, "lr": 6.663022002827595e-07, "epoch": 3.2373634964454108, "percentage": 64.75, "elapsed_time": "2:50:55", "remaining_time": "1:33:03", "throughput": 8708.59, "total_tokens": 89314448} +{"current_steps": 132520, "total_steps": 204665, "loss": 0.001, "lr": 6.662218119998866e-07, "epoch": 3.237485647277258, "percentage": 64.75, "elapsed_time": "2:50:56", "remaining_time": "1:33:03", "throughput": 8708.6, "total_tokens": 89317520} +{"current_steps": 132525, "total_steps": 204665, "loss": 0.0, "lr": 6.661414261443381e-07, "epoch": 3.237607798109105, "percentage": 64.75, "elapsed_time": "2:50:56", "remaining_time": "1:33:03", "throughput": 8708.62, "total_tokens": 89320720} +{"current_steps": 132530, "total_steps": 204665, "loss": 0.0, "lr": 6.660610427166993e-07, "epoch": 3.2377299489409523, "percentage": 64.75, "elapsed_time": "2:50:56", "remaining_time": "1:33:02", "throughput": 8708.63, "total_tokens": 89323856} +{"current_steps": 132535, "total_steps": 204665, "loss": 0.0, "lr": 6.659806617175541e-07, "epoch": 3.2378520997727995, "percentage": 64.76, "elapsed_time": "2:50:57", "remaining_time": "1:33:02", "throughput": 8708.73, "total_tokens": 89328016} +{"current_steps": 132540, "total_steps": 204665, "loss": 0.0, "lr": 6.659002831474878e-07, "epoch": 3.2379742506046467, "percentage": 64.76, "elapsed_time": "2:50:57", "remaining_time": "1:33:01", "throughput": 8708.75, "total_tokens": 89331344} +{"current_steps": 132545, "total_steps": 204665, "loss": 0.0, "lr": 6.658199070070842e-07, "epoch": 3.238096401436494, "percentage": 64.76, "elapsed_time": "2:50:58", "remaining_time": "1:33:01", "throughput": 8708.81, "total_tokens": 89334992} +{"current_steps": 132550, "total_steps": 204665, "loss": 0.0, "lr": 6.657395332969279e-07, "epoch": 3.238218552268341, "percentage": 64.76, "elapsed_time": "2:50:58", "remaining_time": "1:33:01", "throughput": 8708.83, "total_tokens": 89338128} +{"current_steps": 132555, "total_steps": 204665, "loss": 0.0, "lr": 6.656591620176041e-07, "epoch": 3.2383407031001883, "percentage": 64.77, "elapsed_time": "2:50:58", "remaining_time": "1:33:00", "throughput": 8708.87, "total_tokens": 89341648} +{"current_steps": 132560, "total_steps": 204665, "loss": 0.0, "lr": 6.655787931696964e-07, "epoch": 3.2384628539320355, "percentage": 64.77, "elapsed_time": "2:50:59", "remaining_time": "1:33:00", "throughput": 8708.91, "total_tokens": 89345104} +{"current_steps": 132565, "total_steps": 204665, "loss": 0.0001, "lr": 6.6549842675379e-07, "epoch": 3.2385850047638822, "percentage": 64.77, "elapsed_time": "2:50:59", "remaining_time": "1:32:59", "throughput": 8708.9, "total_tokens": 89347856} +{"current_steps": 132570, "total_steps": 204665, "loss": 0.0001, "lr": 6.654180627704687e-07, "epoch": 3.2387071555957294, "percentage": 64.77, "elapsed_time": "2:50:59", "remaining_time": "1:32:59", "throughput": 8708.91, "total_tokens": 89350992} +{"current_steps": 132575, "total_steps": 204665, "loss": 0.0, "lr": 6.653377012203171e-07, "epoch": 3.2388293064275766, "percentage": 64.78, "elapsed_time": "2:51:00", "remaining_time": "1:32:59", "throughput": 8708.92, "total_tokens": 89354000} +{"current_steps": 132580, "total_steps": 204665, "loss": 0.1238, "lr": 6.652573421039203e-07, "epoch": 3.238951457259424, "percentage": 64.78, "elapsed_time": "2:51:00", "remaining_time": "1:32:58", "throughput": 8708.99, "total_tokens": 89357904} +{"current_steps": 132585, "total_steps": 204665, "loss": 0.0, "lr": 6.651769854218615e-07, "epoch": 3.239073608091271, "percentage": 64.78, "elapsed_time": "2:51:00", "remaining_time": "1:32:58", "throughput": 8709.05, "total_tokens": 89361616} +{"current_steps": 132590, "total_steps": 204665, "loss": 0.0, "lr": 6.650966311747263e-07, "epoch": 3.239195758923118, "percentage": 64.78, "elapsed_time": "2:51:01", "remaining_time": "1:32:57", "throughput": 8709.1, "total_tokens": 89365200} +{"current_steps": 132595, "total_steps": 204665, "loss": 0.0, "lr": 6.650162793630982e-07, "epoch": 3.2393179097549654, "percentage": 64.79, "elapsed_time": "2:51:01", "remaining_time": "1:32:57", "throughput": 8709.18, "total_tokens": 89369168} +{"current_steps": 132600, "total_steps": 204665, "loss": 0.0, "lr": 6.649359299875619e-07, "epoch": 3.2394400605868126, "percentage": 64.79, "elapsed_time": "2:51:01", "remaining_time": "1:32:57", "throughput": 8709.22, "total_tokens": 89372624} +{"current_steps": 132605, "total_steps": 204665, "loss": 0.0001, "lr": 6.648555830487018e-07, "epoch": 3.2395622114186597, "percentage": 64.79, "elapsed_time": "2:51:02", "remaining_time": "1:32:56", "throughput": 8709.25, "total_tokens": 89375888} +{"current_steps": 132610, "total_steps": 204665, "loss": 0.0, "lr": 6.647752385471015e-07, "epoch": 3.239684362250507, "percentage": 64.79, "elapsed_time": "2:51:02", "remaining_time": "1:32:56", "throughput": 8709.28, "total_tokens": 89379280} +{"current_steps": 132615, "total_steps": 204665, "loss": 0.0003, "lr": 6.646948964833465e-07, "epoch": 3.239806513082354, "percentage": 64.8, "elapsed_time": "2:51:02", "remaining_time": "1:32:55", "throughput": 8709.3, "total_tokens": 89382416} +{"current_steps": 132620, "total_steps": 204665, "loss": 0.0, "lr": 6.646145568580198e-07, "epoch": 3.2399286639142013, "percentage": 64.8, "elapsed_time": "2:51:03", "remaining_time": "1:32:55", "throughput": 8709.34, "total_tokens": 89385872} +{"current_steps": 132625, "total_steps": 204665, "loss": 0.0, "lr": 6.645342196717067e-07, "epoch": 3.2400508147460485, "percentage": 64.8, "elapsed_time": "2:51:03", "remaining_time": "1:32:55", "throughput": 8709.36, "total_tokens": 89389136} +{"current_steps": 132630, "total_steps": 204665, "loss": 0.0, "lr": 6.644538849249907e-07, "epoch": 3.2401729655778957, "percentage": 64.8, "elapsed_time": "2:51:03", "remaining_time": "1:32:54", "throughput": 8709.57, "total_tokens": 89394768} +{"current_steps": 132635, "total_steps": 204665, "loss": 0.0, "lr": 6.643735526184562e-07, "epoch": 3.240295116409743, "percentage": 64.81, "elapsed_time": "2:51:04", "remaining_time": "1:32:54", "throughput": 8709.64, "total_tokens": 89398608} +{"current_steps": 132640, "total_steps": 204665, "loss": 0.0285, "lr": 6.64293222752688e-07, "epoch": 3.24041726724159, "percentage": 64.81, "elapsed_time": "2:51:04", "remaining_time": "1:32:53", "throughput": 8709.68, "total_tokens": 89402128} +{"current_steps": 132645, "total_steps": 204665, "loss": 0.0087, "lr": 6.642128953282695e-07, "epoch": 3.2405394180734373, "percentage": 64.81, "elapsed_time": "2:51:05", "remaining_time": "1:32:53", "throughput": 8709.71, "total_tokens": 89405456} +{"current_steps": 132650, "total_steps": 204665, "loss": 0.0684, "lr": 6.641325703457852e-07, "epoch": 3.2406615689052845, "percentage": 64.81, "elapsed_time": "2:51:05", "remaining_time": "1:32:53", "throughput": 8709.81, "total_tokens": 89409680} +{"current_steps": 132655, "total_steps": 204665, "loss": 0.0, "lr": 6.64052247805819e-07, "epoch": 3.2407837197371316, "percentage": 64.82, "elapsed_time": "2:51:05", "remaining_time": "1:32:52", "throughput": 8709.86, "total_tokens": 89413264} +{"current_steps": 132660, "total_steps": 204665, "loss": 0.0, "lr": 6.639719277089556e-07, "epoch": 3.2409058705689784, "percentage": 64.82, "elapsed_time": "2:51:06", "remaining_time": "1:32:52", "throughput": 8709.89, "total_tokens": 89416592} +{"current_steps": 132665, "total_steps": 204665, "loss": 0.0001, "lr": 6.638916100557782e-07, "epoch": 3.2410280214008256, "percentage": 64.82, "elapsed_time": "2:51:06", "remaining_time": "1:32:51", "throughput": 8709.93, "total_tokens": 89420048} +{"current_steps": 132670, "total_steps": 204665, "loss": 0.0, "lr": 6.638112948468715e-07, "epoch": 3.2411501722326728, "percentage": 64.82, "elapsed_time": "2:51:06", "remaining_time": "1:32:51", "throughput": 8709.93, "total_tokens": 89423056} +{"current_steps": 132675, "total_steps": 204665, "loss": 0.0, "lr": 6.637309820828199e-07, "epoch": 3.24127232306452, "percentage": 64.83, "elapsed_time": "2:51:07", "remaining_time": "1:32:50", "throughput": 8709.99, "total_tokens": 89426768} +{"current_steps": 132680, "total_steps": 204665, "loss": 0.0, "lr": 6.636506717642066e-07, "epoch": 3.241394473896367, "percentage": 64.83, "elapsed_time": "2:51:07", "remaining_time": "1:32:50", "throughput": 8710.02, "total_tokens": 89430032} +{"current_steps": 132685, "total_steps": 204665, "loss": 0.0203, "lr": 6.635703638916165e-07, "epoch": 3.2415166247282143, "percentage": 64.83, "elapsed_time": "2:51:07", "remaining_time": "1:32:50", "throughput": 8710.06, "total_tokens": 89433424} +{"current_steps": 132690, "total_steps": 204665, "loss": 0.0, "lr": 6.634900584656328e-07, "epoch": 3.2416387755600615, "percentage": 64.83, "elapsed_time": "2:51:08", "remaining_time": "1:32:49", "throughput": 8710.13, "total_tokens": 89437264} +{"current_steps": 132695, "total_steps": 204665, "loss": 0.0, "lr": 6.634097554868403e-07, "epoch": 3.2417609263919087, "percentage": 64.84, "elapsed_time": "2:51:08", "remaining_time": "1:32:49", "throughput": 8710.15, "total_tokens": 89440464} +{"current_steps": 132700, "total_steps": 204665, "loss": 0.0, "lr": 6.633294549558223e-07, "epoch": 3.241883077223756, "percentage": 64.84, "elapsed_time": "2:51:08", "remaining_time": "1:32:48", "throughput": 8710.18, "total_tokens": 89443792} +{"current_steps": 132705, "total_steps": 204665, "loss": 0.0158, "lr": 6.63249156873163e-07, "epoch": 3.242005228055603, "percentage": 64.84, "elapsed_time": "2:51:09", "remaining_time": "1:32:48", "throughput": 8710.21, "total_tokens": 89447184} +{"current_steps": 132710, "total_steps": 204665, "loss": 0.0, "lr": 6.631688612394469e-07, "epoch": 3.2421273788874503, "percentage": 64.84, "elapsed_time": "2:51:09", "remaining_time": "1:32:48", "throughput": 8710.28, "total_tokens": 89450960} +{"current_steps": 132715, "total_steps": 204665, "loss": 0.0, "lr": 6.630885680552567e-07, "epoch": 3.2422495297192975, "percentage": 64.84, "elapsed_time": "2:51:09", "remaining_time": "1:32:47", "throughput": 8710.3, "total_tokens": 89454224} +{"current_steps": 132720, "total_steps": 204665, "loss": 0.0, "lr": 6.630082773211777e-07, "epoch": 3.2423716805511447, "percentage": 64.85, "elapsed_time": "2:51:10", "remaining_time": "1:32:47", "throughput": 8710.35, "total_tokens": 89457808} +{"current_steps": 132725, "total_steps": 204665, "loss": 0.0519, "lr": 6.629279890377926e-07, "epoch": 3.242493831382992, "percentage": 64.85, "elapsed_time": "2:51:10", "remaining_time": "1:32:46", "throughput": 8710.41, "total_tokens": 89461520} +{"current_steps": 132730, "total_steps": 204665, "loss": 0.0, "lr": 6.62847703205686e-07, "epoch": 3.242615982214839, "percentage": 64.85, "elapsed_time": "2:51:10", "remaining_time": "1:32:46", "throughput": 8710.45, "total_tokens": 89464912} +{"current_steps": 132735, "total_steps": 204665, "loss": 0.0961, "lr": 6.627674198254419e-07, "epoch": 3.2427381330466862, "percentage": 64.85, "elapsed_time": "2:51:11", "remaining_time": "1:32:46", "throughput": 8710.46, "total_tokens": 89467984} +{"current_steps": 132740, "total_steps": 204665, "loss": 0.0001, "lr": 6.626871388976433e-07, "epoch": 3.2428602838785334, "percentage": 64.86, "elapsed_time": "2:51:11", "remaining_time": "1:32:45", "throughput": 8710.46, "total_tokens": 89470992} +{"current_steps": 132745, "total_steps": 204665, "loss": 0.0, "lr": 6.626068604228752e-07, "epoch": 3.24298243471038, "percentage": 64.86, "elapsed_time": "2:51:12", "remaining_time": "1:32:45", "throughput": 8710.49, "total_tokens": 89474256} +{"current_steps": 132750, "total_steps": 204665, "loss": 0.0, "lr": 6.625265844017205e-07, "epoch": 3.2431045855422274, "percentage": 64.86, "elapsed_time": "2:51:12", "remaining_time": "1:32:44", "throughput": 8710.5, "total_tokens": 89477328} +{"current_steps": 132755, "total_steps": 204665, "loss": 0.0, "lr": 6.624463108347631e-07, "epoch": 3.2432267363740745, "percentage": 64.86, "elapsed_time": "2:51:12", "remaining_time": "1:32:44", "throughput": 8710.56, "total_tokens": 89481040} +{"current_steps": 132760, "total_steps": 204665, "loss": 0.0, "lr": 6.62366039722587e-07, "epoch": 3.2433488872059217, "percentage": 64.87, "elapsed_time": "2:51:13", "remaining_time": "1:32:44", "throughput": 8710.6, "total_tokens": 89484560} +{"current_steps": 132765, "total_steps": 204665, "loss": 0.0, "lr": 6.622857710657757e-07, "epoch": 3.243471038037769, "percentage": 64.87, "elapsed_time": "2:51:13", "remaining_time": "1:32:43", "throughput": 8710.64, "total_tokens": 89488016} +{"current_steps": 132770, "total_steps": 204665, "loss": 0.0, "lr": 6.622055048649135e-07, "epoch": 3.243593188869616, "percentage": 64.87, "elapsed_time": "2:51:13", "remaining_time": "1:32:43", "throughput": 8710.67, "total_tokens": 89491344} +{"current_steps": 132775, "total_steps": 204665, "loss": 0.0, "lr": 6.621252411205834e-07, "epoch": 3.2437153397014633, "percentage": 64.87, "elapsed_time": "2:51:14", "remaining_time": "1:32:42", "throughput": 8710.71, "total_tokens": 89494864} +{"current_steps": 132780, "total_steps": 204665, "loss": 0.0001, "lr": 6.620449798333696e-07, "epoch": 3.2438374905333105, "percentage": 64.88, "elapsed_time": "2:51:14", "remaining_time": "1:32:42", "throughput": 8710.76, "total_tokens": 89498384} +{"current_steps": 132785, "total_steps": 204665, "loss": 0.0, "lr": 6.619647210038554e-07, "epoch": 3.2439596413651577, "percentage": 64.88, "elapsed_time": "2:51:14", "remaining_time": "1:32:42", "throughput": 8710.78, "total_tokens": 89501584} +{"current_steps": 132790, "total_steps": 204665, "loss": 0.0001, "lr": 6.618844646326245e-07, "epoch": 3.244081792197005, "percentage": 64.88, "elapsed_time": "2:51:15", "remaining_time": "1:32:41", "throughput": 8710.79, "total_tokens": 89504656} +{"current_steps": 132795, "total_steps": 204665, "loss": 0.0066, "lr": 6.618042107202613e-07, "epoch": 3.244203943028852, "percentage": 64.88, "elapsed_time": "2:51:15", "remaining_time": "1:32:41", "throughput": 8710.82, "total_tokens": 89507984} +{"current_steps": 132800, "total_steps": 204665, "loss": 0.0, "lr": 6.617239592673485e-07, "epoch": 3.2443260938606993, "percentage": 64.89, "elapsed_time": "2:51:15", "remaining_time": "1:32:40", "throughput": 8710.86, "total_tokens": 89511440} +{"current_steps": 132805, "total_steps": 204665, "loss": 0.0, "lr": 6.616437102744701e-07, "epoch": 3.2444482446925464, "percentage": 64.89, "elapsed_time": "2:51:16", "remaining_time": "1:32:40", "throughput": 8710.89, "total_tokens": 89514704} +{"current_steps": 132810, "total_steps": 204665, "loss": 0.0, "lr": 6.615634637422097e-07, "epoch": 3.2445703955243936, "percentage": 64.89, "elapsed_time": "2:51:16", "remaining_time": "1:32:39", "throughput": 8710.89, "total_tokens": 89517712} +{"current_steps": 132815, "total_steps": 204665, "loss": 0.0, "lr": 6.61483219671151e-07, "epoch": 3.244692546356241, "percentage": 64.89, "elapsed_time": "2:51:16", "remaining_time": "1:32:39", "throughput": 8710.94, "total_tokens": 89521232} +{"current_steps": 132820, "total_steps": 204665, "loss": 0.0468, "lr": 6.61402978061877e-07, "epoch": 3.244814697188088, "percentage": 64.9, "elapsed_time": "2:51:17", "remaining_time": "1:32:39", "throughput": 8711.0, "total_tokens": 89525008} +{"current_steps": 132825, "total_steps": 204665, "loss": 0.0001, "lr": 6.613227389149716e-07, "epoch": 3.244936848019935, "percentage": 64.9, "elapsed_time": "2:51:17", "remaining_time": "1:32:38", "throughput": 8710.99, "total_tokens": 89527824} +{"current_steps": 132830, "total_steps": 204665, "loss": 0.0, "lr": 6.612425022310186e-07, "epoch": 3.245058998851782, "percentage": 64.9, "elapsed_time": "2:51:17", "remaining_time": "1:32:38", "throughput": 8711.03, "total_tokens": 89531280} +{"current_steps": 132835, "total_steps": 204665, "loss": 0.0, "lr": 6.611622680106011e-07, "epoch": 3.245181149683629, "percentage": 64.9, "elapsed_time": "2:51:18", "remaining_time": "1:32:37", "throughput": 8711.09, "total_tokens": 89534992} +{"current_steps": 132840, "total_steps": 204665, "loss": 0.0543, "lr": 6.610820362543028e-07, "epoch": 3.2453033005154763, "percentage": 64.91, "elapsed_time": "2:51:18", "remaining_time": "1:32:37", "throughput": 8711.15, "total_tokens": 89538640} +{"current_steps": 132845, "total_steps": 204665, "loss": 0.0003, "lr": 6.61001806962707e-07, "epoch": 3.2454254513473235, "percentage": 64.91, "elapsed_time": "2:51:18", "remaining_time": "1:32:37", "throughput": 8711.17, "total_tokens": 89541904} +{"current_steps": 132850, "total_steps": 204665, "loss": 0.0, "lr": 6.609215801363974e-07, "epoch": 3.2455476021791707, "percentage": 64.91, "elapsed_time": "2:51:19", "remaining_time": "1:32:36", "throughput": 8711.19, "total_tokens": 89545040} +{"current_steps": 132855, "total_steps": 204665, "loss": 0.0003, "lr": 6.608413557759572e-07, "epoch": 3.245669753011018, "percentage": 64.91, "elapsed_time": "2:51:19", "remaining_time": "1:32:36", "throughput": 8711.22, "total_tokens": 89548432} +{"current_steps": 132860, "total_steps": 204665, "loss": 0.0, "lr": 6.607611338819696e-07, "epoch": 3.245791903842865, "percentage": 64.92, "elapsed_time": "2:51:20", "remaining_time": "1:32:35", "throughput": 8711.25, "total_tokens": 89551696} +{"current_steps": 132865, "total_steps": 204665, "loss": 0.0001, "lr": 6.606809144550188e-07, "epoch": 3.2459140546747123, "percentage": 64.92, "elapsed_time": "2:51:20", "remaining_time": "1:32:35", "throughput": 8711.27, "total_tokens": 89554960} +{"current_steps": 132870, "total_steps": 204665, "loss": 0.0831, "lr": 6.606006974956872e-07, "epoch": 3.2460362055065595, "percentage": 64.92, "elapsed_time": "2:51:20", "remaining_time": "1:32:35", "throughput": 8711.32, "total_tokens": 89558544} +{"current_steps": 132875, "total_steps": 204665, "loss": 0.0, "lr": 6.60520483004559e-07, "epoch": 3.2461583563384067, "percentage": 64.92, "elapsed_time": "2:51:21", "remaining_time": "1:32:34", "throughput": 8711.35, "total_tokens": 89561872} +{"current_steps": 132880, "total_steps": 204665, "loss": 0.0001, "lr": 6.604402709822168e-07, "epoch": 3.246280507170254, "percentage": 64.93, "elapsed_time": "2:51:21", "remaining_time": "1:32:34", "throughput": 8711.42, "total_tokens": 89565648} +{"current_steps": 132885, "total_steps": 204665, "loss": 0.0, "lr": 6.603600614292441e-07, "epoch": 3.246402658002101, "percentage": 64.93, "elapsed_time": "2:51:21", "remaining_time": "1:32:33", "throughput": 8711.48, "total_tokens": 89569424} +{"current_steps": 132890, "total_steps": 204665, "loss": 0.0, "lr": 6.602798543462252e-07, "epoch": 3.2465248088339482, "percentage": 64.93, "elapsed_time": "2:51:22", "remaining_time": "1:32:33", "throughput": 8711.51, "total_tokens": 89572752} +{"current_steps": 132895, "total_steps": 204665, "loss": 0.0, "lr": 6.601996497337418e-07, "epoch": 3.2466469596657954, "percentage": 64.93, "elapsed_time": "2:51:22", "remaining_time": "1:32:33", "throughput": 8711.55, "total_tokens": 89576272} +{"current_steps": 132900, "total_steps": 204665, "loss": 0.0, "lr": 6.601194475923784e-07, "epoch": 3.2467691104976426, "percentage": 64.94, "elapsed_time": "2:51:22", "remaining_time": "1:32:32", "throughput": 8711.53, "total_tokens": 89578960} +{"current_steps": 132905, "total_steps": 204665, "loss": 0.0, "lr": 6.600392479227177e-07, "epoch": 3.24689126132949, "percentage": 64.94, "elapsed_time": "2:51:23", "remaining_time": "1:32:32", "throughput": 8711.57, "total_tokens": 89582288} +{"current_steps": 132910, "total_steps": 204665, "loss": 0.0625, "lr": 6.599590507253429e-07, "epoch": 3.247013412161337, "percentage": 64.94, "elapsed_time": "2:51:23", "remaining_time": "1:32:31", "throughput": 8711.65, "total_tokens": 89586320} +{"current_steps": 132915, "total_steps": 204665, "loss": 0.0, "lr": 6.598788560008375e-07, "epoch": 3.247135562993184, "percentage": 64.94, "elapsed_time": "2:51:23", "remaining_time": "1:32:31", "throughput": 8711.71, "total_tokens": 89590096} +{"current_steps": 132920, "total_steps": 204665, "loss": 0.0, "lr": 6.597986637497841e-07, "epoch": 3.2472577138250314, "percentage": 64.95, "elapsed_time": "2:51:24", "remaining_time": "1:32:31", "throughput": 8711.74, "total_tokens": 89593360} +{"current_steps": 132925, "total_steps": 204665, "loss": 0.0001, "lr": 6.597184739727669e-07, "epoch": 3.247379864656878, "percentage": 64.95, "elapsed_time": "2:51:24", "remaining_time": "1:32:30", "throughput": 8711.79, "total_tokens": 89596944} +{"current_steps": 132930, "total_steps": 204665, "loss": 0.0, "lr": 6.59638286670368e-07, "epoch": 3.2475020154887253, "percentage": 64.95, "elapsed_time": "2:51:24", "remaining_time": "1:32:30", "throughput": 8711.82, "total_tokens": 89600272} +{"current_steps": 132935, "total_steps": 204665, "loss": 0.0206, "lr": 6.595581018431715e-07, "epoch": 3.2476241663205725, "percentage": 64.95, "elapsed_time": "2:51:25", "remaining_time": "1:32:29", "throughput": 8711.84, "total_tokens": 89603472} +{"current_steps": 132940, "total_steps": 204665, "loss": 0.0, "lr": 6.594779194917596e-07, "epoch": 3.2477463171524197, "percentage": 64.95, "elapsed_time": "2:51:25", "remaining_time": "1:32:29", "throughput": 8711.89, "total_tokens": 89607056} +{"current_steps": 132945, "total_steps": 204665, "loss": 0.0, "lr": 6.593977396167159e-07, "epoch": 3.247868467984267, "percentage": 64.96, "elapsed_time": "2:51:25", "remaining_time": "1:32:28", "throughput": 8711.94, "total_tokens": 89610704} +{"current_steps": 132950, "total_steps": 204665, "loss": 0.0433, "lr": 6.59317562218624e-07, "epoch": 3.247990618816114, "percentage": 64.96, "elapsed_time": "2:51:26", "remaining_time": "1:32:28", "throughput": 8711.98, "total_tokens": 89614096} +{"current_steps": 132955, "total_steps": 204665, "loss": 0.0, "lr": 6.59237387298066e-07, "epoch": 3.2481127696479613, "percentage": 64.96, "elapsed_time": "2:51:26", "remaining_time": "1:32:28", "throughput": 8712.01, "total_tokens": 89617424} +{"current_steps": 132960, "total_steps": 204665, "loss": 0.0, "lr": 6.591572148556254e-07, "epoch": 3.2482349204798084, "percentage": 64.96, "elapsed_time": "2:51:26", "remaining_time": "1:32:27", "throughput": 8712.03, "total_tokens": 89620560} +{"current_steps": 132965, "total_steps": 204665, "loss": 0.0, "lr": 6.590770448918852e-07, "epoch": 3.2483570713116556, "percentage": 64.97, "elapsed_time": "2:51:27", "remaining_time": "1:32:27", "throughput": 8712.08, "total_tokens": 89624144} +{"current_steps": 132970, "total_steps": 204665, "loss": 0.0, "lr": 6.589968774074287e-07, "epoch": 3.248479222143503, "percentage": 64.97, "elapsed_time": "2:51:27", "remaining_time": "1:32:26", "throughput": 8712.13, "total_tokens": 89627856} +{"current_steps": 132975, "total_steps": 204665, "loss": 0.0002, "lr": 6.589167124028382e-07, "epoch": 3.24860137297535, "percentage": 64.97, "elapsed_time": "2:51:28", "remaining_time": "1:32:26", "throughput": 8712.17, "total_tokens": 89631312} +{"current_steps": 132980, "total_steps": 204665, "loss": 0.0, "lr": 6.588365498786972e-07, "epoch": 3.248723523807197, "percentage": 64.97, "elapsed_time": "2:51:28", "remaining_time": "1:32:26", "throughput": 8712.18, "total_tokens": 89634384} +{"current_steps": 132985, "total_steps": 204665, "loss": 0.0, "lr": 6.587563898355888e-07, "epoch": 3.2488456746390444, "percentage": 64.98, "elapsed_time": "2:51:28", "remaining_time": "1:32:25", "throughput": 8712.24, "total_tokens": 89638032} +{"current_steps": 132990, "total_steps": 204665, "loss": 0.0, "lr": 6.586762322740953e-07, "epoch": 3.2489678254708916, "percentage": 64.98, "elapsed_time": "2:51:29", "remaining_time": "1:32:25", "throughput": 8712.31, "total_tokens": 89642000} +{"current_steps": 132995, "total_steps": 204665, "loss": 0.0, "lr": 6.585960771948006e-07, "epoch": 3.2490899763027388, "percentage": 64.98, "elapsed_time": "2:51:29", "remaining_time": "1:32:24", "throughput": 8712.35, "total_tokens": 89645392} +{"current_steps": 133000, "total_steps": 204665, "loss": 0.0, "lr": 6.585159245982866e-07, "epoch": 3.249212127134586, "percentage": 64.98, "elapsed_time": "2:51:29", "remaining_time": "1:32:24", "throughput": 8712.38, "total_tokens": 89648720} +{"current_steps": 133005, "total_steps": 204665, "loss": 0.0005, "lr": 6.584357744851369e-07, "epoch": 3.249334277966433, "percentage": 64.99, "elapsed_time": "2:51:30", "remaining_time": "1:32:24", "throughput": 8712.4, "total_tokens": 89651920} +{"current_steps": 133010, "total_steps": 204665, "loss": 0.0, "lr": 6.583556268559343e-07, "epoch": 3.24945642879828, "percentage": 64.99, "elapsed_time": "2:51:30", "remaining_time": "1:32:23", "throughput": 8712.4, "total_tokens": 89654864} +{"current_steps": 133015, "total_steps": 204665, "loss": 0.0, "lr": 6.582754817112609e-07, "epoch": 3.249578579630127, "percentage": 64.99, "elapsed_time": "2:51:30", "remaining_time": "1:32:23", "throughput": 8712.44, "total_tokens": 89658384} +{"current_steps": 133020, "total_steps": 204665, "loss": 0.0572, "lr": 6.581953390517007e-07, "epoch": 3.2497007304619743, "percentage": 64.99, "elapsed_time": "2:51:31", "remaining_time": "1:32:22", "throughput": 8712.45, "total_tokens": 89661392} +{"current_steps": 133025, "total_steps": 204665, "loss": 0.0, "lr": 6.581151988778354e-07, "epoch": 3.2498228812938215, "percentage": 65.0, "elapsed_time": "2:51:31", "remaining_time": "1:32:22", "throughput": 8712.47, "total_tokens": 89664592} +{"current_steps": 133030, "total_steps": 204665, "loss": 0.0, "lr": 6.580350611902488e-07, "epoch": 3.2499450321256687, "percentage": 65.0, "elapsed_time": "2:51:31", "remaining_time": "1:32:22", "throughput": 8712.52, "total_tokens": 89668176} +{"current_steps": 133035, "total_steps": 204665, "loss": 0.0, "lr": 6.579549259895227e-07, "epoch": 3.250067182957516, "percentage": 65.0, "elapsed_time": "2:51:32", "remaining_time": "1:32:21", "throughput": 8712.54, "total_tokens": 89671440} +{"current_steps": 133040, "total_steps": 204665, "loss": 0.0, "lr": 6.578747932762405e-07, "epoch": 3.250189333789363, "percentage": 65.0, "elapsed_time": "2:51:32", "remaining_time": "1:32:21", "throughput": 8712.56, "total_tokens": 89674576} +{"current_steps": 133042, "total_steps": 204665, "eval_loss": 0.26102444529533386, "epoch": 3.250238194122102, "percentage": 65.0, "elapsed_time": "2:52:20", "remaining_time": "1:32:46", "throughput": 8672.3, "total_tokens": 89675984} +{"current_steps": 133045, "total_steps": 204665, "loss": 0.0, "lr": 6.577946630509852e-07, "epoch": 3.2503114846212102, "percentage": 65.01, "elapsed_time": "2:52:54", "remaining_time": "1:33:04", "throughput": 8644.22, "total_tokens": 89678096} +{"current_steps": 133050, "total_steps": 204665, "loss": 0.1774, "lr": 6.577145353143388e-07, "epoch": 3.2504336354530574, "percentage": 65.01, "elapsed_time": "2:52:54", "remaining_time": "1:33:04", "throughput": 8644.26, "total_tokens": 89681424} +{"current_steps": 133055, "total_steps": 204665, "loss": 0.0, "lr": 6.576344100668847e-07, "epoch": 3.2505557862849046, "percentage": 65.01, "elapsed_time": "2:52:55", "remaining_time": "1:33:03", "throughput": 8644.25, "total_tokens": 89684240} +{"current_steps": 133060, "total_steps": 204665, "loss": 0.0, "lr": 6.575542873092051e-07, "epoch": 3.250677937116752, "percentage": 65.01, "elapsed_time": "2:52:55", "remaining_time": "1:33:03", "throughput": 8644.33, "total_tokens": 89688208} +{"current_steps": 133065, "total_steps": 204665, "loss": 0.0943, "lr": 6.574741670418829e-07, "epoch": 3.250800087948599, "percentage": 65.02, "elapsed_time": "2:52:55", "remaining_time": "1:33:03", "throughput": 8644.36, "total_tokens": 89691536} +{"current_steps": 133070, "total_steps": 204665, "loss": 0.0, "lr": 6.573940492655005e-07, "epoch": 3.250922238780446, "percentage": 65.02, "elapsed_time": "2:52:56", "remaining_time": "1:33:02", "throughput": 8644.39, "total_tokens": 89694864} +{"current_steps": 133075, "total_steps": 204665, "loss": 0.0, "lr": 6.573139339806406e-07, "epoch": 3.2510443896122934, "percentage": 65.02, "elapsed_time": "2:52:56", "remaining_time": "1:33:02", "throughput": 8644.41, "total_tokens": 89698064} +{"current_steps": 133080, "total_steps": 204665, "loss": 0.0, "lr": 6.572338211878864e-07, "epoch": 3.2511665404441406, "percentage": 65.02, "elapsed_time": "2:52:56", "remaining_time": "1:33:01", "throughput": 8644.43, "total_tokens": 89701328} +{"current_steps": 133085, "total_steps": 204665, "loss": 0.0313, "lr": 6.571537108878195e-07, "epoch": 3.2512886912759877, "percentage": 65.03, "elapsed_time": "2:52:57", "remaining_time": "1:33:01", "throughput": 8644.49, "total_tokens": 89705040} +{"current_steps": 133090, "total_steps": 204665, "loss": 0.0, "lr": 6.570736030810236e-07, "epoch": 3.251410842107835, "percentage": 65.03, "elapsed_time": "2:52:57", "remaining_time": "1:33:00", "throughput": 8644.56, "total_tokens": 89708880} +{"current_steps": 133095, "total_steps": 204665, "loss": 0.0, "lr": 6.569934977680802e-07, "epoch": 3.2515329929396817, "percentage": 65.03, "elapsed_time": "2:52:57", "remaining_time": "1:33:00", "throughput": 8644.61, "total_tokens": 89712464} +{"current_steps": 133100, "total_steps": 204665, "loss": 0.0001, "lr": 6.569133949495724e-07, "epoch": 3.2516551437715293, "percentage": 65.03, "elapsed_time": "2:52:58", "remaining_time": "1:33:00", "throughput": 8644.68, "total_tokens": 89716176} +{"current_steps": 133105, "total_steps": 204665, "loss": 0.0, "lr": 6.568332946260831e-07, "epoch": 3.251777294603376, "percentage": 65.04, "elapsed_time": "2:52:58", "remaining_time": "1:32:59", "throughput": 8644.67, "total_tokens": 89719056} +{"current_steps": 133110, "total_steps": 204665, "loss": 0.0, "lr": 6.56753196798194e-07, "epoch": 3.2518994454352232, "percentage": 65.04, "elapsed_time": "2:52:58", "remaining_time": "1:32:59", "throughput": 8644.71, "total_tokens": 89722448} +{"current_steps": 133115, "total_steps": 204665, "loss": 0.0001, "lr": 6.566731014664881e-07, "epoch": 3.2520215962670704, "percentage": 65.04, "elapsed_time": "2:52:59", "remaining_time": "1:32:58", "throughput": 8644.73, "total_tokens": 89725648} +{"current_steps": 133120, "total_steps": 204665, "loss": 0.0001, "lr": 6.565930086315479e-07, "epoch": 3.2521437470989176, "percentage": 65.04, "elapsed_time": "2:52:59", "remaining_time": "1:32:58", "throughput": 8644.73, "total_tokens": 89728592} +{"current_steps": 133125, "total_steps": 204665, "loss": 0.0, "lr": 6.565129182939557e-07, "epoch": 3.252265897930765, "percentage": 65.05, "elapsed_time": "2:52:59", "remaining_time": "1:32:58", "throughput": 8644.75, "total_tokens": 89731792} +{"current_steps": 133130, "total_steps": 204665, "loss": 0.0, "lr": 6.564328304542936e-07, "epoch": 3.252388048762612, "percentage": 65.05, "elapsed_time": "2:53:00", "remaining_time": "1:32:57", "throughput": 8644.81, "total_tokens": 89735504} +{"current_steps": 133135, "total_steps": 204665, "loss": 0.0002, "lr": 6.563527451131443e-07, "epoch": 3.252510199594459, "percentage": 65.05, "elapsed_time": "2:53:00", "remaining_time": "1:32:57", "throughput": 8644.87, "total_tokens": 89739280} +{"current_steps": 133140, "total_steps": 204665, "loss": 0.0, "lr": 6.562726622710908e-07, "epoch": 3.2526323504263064, "percentage": 65.05, "elapsed_time": "2:53:01", "remaining_time": "1:32:56", "throughput": 8644.95, "total_tokens": 89743248} +{"current_steps": 133145, "total_steps": 204665, "loss": 0.0, "lr": 6.561925819287144e-07, "epoch": 3.2527545012581536, "percentage": 65.06, "elapsed_time": "2:53:01", "remaining_time": "1:32:56", "throughput": 8644.96, "total_tokens": 89746384} +{"current_steps": 133150, "total_steps": 204665, "loss": 0.0838, "lr": 6.561125040865984e-07, "epoch": 3.2528766520900008, "percentage": 65.06, "elapsed_time": "2:53:01", "remaining_time": "1:32:56", "throughput": 8645.02, "total_tokens": 89749968} +{"current_steps": 133155, "total_steps": 204665, "loss": 0.0, "lr": 6.560324287453246e-07, "epoch": 3.252998802921848, "percentage": 65.06, "elapsed_time": "2:53:02", "remaining_time": "1:32:55", "throughput": 8645.03, "total_tokens": 89753040} +{"current_steps": 133160, "total_steps": 204665, "loss": 0.0778, "lr": 6.559523559054758e-07, "epoch": 3.253120953753695, "percentage": 65.06, "elapsed_time": "2:53:02", "remaining_time": "1:32:55", "throughput": 8645.07, "total_tokens": 89756496} +{"current_steps": 133165, "total_steps": 204665, "loss": 0.0002, "lr": 6.558722855676335e-07, "epoch": 3.2532431045855423, "percentage": 65.06, "elapsed_time": "2:53:02", "remaining_time": "1:32:54", "throughput": 8645.07, "total_tokens": 89759440} +{"current_steps": 133170, "total_steps": 204665, "loss": 0.0611, "lr": 6.557922177323807e-07, "epoch": 3.2533652554173895, "percentage": 65.07, "elapsed_time": "2:53:03", "remaining_time": "1:32:54", "throughput": 8645.11, "total_tokens": 89762960} +{"current_steps": 133175, "total_steps": 204665, "loss": 0.0609, "lr": 6.557121524002998e-07, "epoch": 3.2534874062492367, "percentage": 65.07, "elapsed_time": "2:53:03", "remaining_time": "1:32:53", "throughput": 8645.16, "total_tokens": 89766544} +{"current_steps": 133180, "total_steps": 204665, "loss": 0.0, "lr": 6.556320895719723e-07, "epoch": 3.253609557081084, "percentage": 65.07, "elapsed_time": "2:53:03", "remaining_time": "1:32:53", "throughput": 8645.18, "total_tokens": 89769744} +{"current_steps": 133185, "total_steps": 204665, "loss": 0.0, "lr": 6.555520292479812e-07, "epoch": 3.253731707912931, "percentage": 65.07, "elapsed_time": "2:53:04", "remaining_time": "1:32:53", "throughput": 8645.25, "total_tokens": 89773584} +{"current_steps": 133190, "total_steps": 204665, "loss": 0.0002, "lr": 6.554719714289081e-07, "epoch": 3.253853858744778, "percentage": 65.08, "elapsed_time": "2:53:04", "remaining_time": "1:32:52", "throughput": 8645.27, "total_tokens": 89776656} +{"current_steps": 133195, "total_steps": 204665, "loss": 0.0557, "lr": 6.553919161153354e-07, "epoch": 3.253976009576625, "percentage": 65.08, "elapsed_time": "2:53:04", "remaining_time": "1:32:52", "throughput": 8645.3, "total_tokens": 89779984} +{"current_steps": 133200, "total_steps": 204665, "loss": 0.0, "lr": 6.553118633078457e-07, "epoch": 3.254098160408472, "percentage": 65.08, "elapsed_time": "2:53:05", "remaining_time": "1:32:51", "throughput": 8645.35, "total_tokens": 89783568} +{"current_steps": 133205, "total_steps": 204665, "loss": 0.0001, "lr": 6.552318130070206e-07, "epoch": 3.2542203112403194, "percentage": 65.08, "elapsed_time": "2:53:05", "remaining_time": "1:32:51", "throughput": 8645.38, "total_tokens": 89786896} +{"current_steps": 133210, "total_steps": 204665, "loss": 0.0513, "lr": 6.551517652134428e-07, "epoch": 3.2543424620721666, "percentage": 65.09, "elapsed_time": "2:53:05", "remaining_time": "1:32:51", "throughput": 8645.39, "total_tokens": 89789904} +{"current_steps": 133215, "total_steps": 204665, "loss": 0.0001, "lr": 6.550717199276939e-07, "epoch": 3.254464612904014, "percentage": 65.09, "elapsed_time": "2:53:06", "remaining_time": "1:32:50", "throughput": 8645.41, "total_tokens": 89793104} +{"current_steps": 133220, "total_steps": 204665, "loss": 0.0344, "lr": 6.549916771503564e-07, "epoch": 3.254586763735861, "percentage": 65.09, "elapsed_time": "2:53:06", "remaining_time": "1:32:50", "throughput": 8645.42, "total_tokens": 89796304} +{"current_steps": 133225, "total_steps": 204665, "loss": 0.0058, "lr": 6.549116368820121e-07, "epoch": 3.254708914567708, "percentage": 65.09, "elapsed_time": "2:53:06", "remaining_time": "1:32:49", "throughput": 8645.41, "total_tokens": 89799184} +{"current_steps": 133230, "total_steps": 204665, "loss": 0.0377, "lr": 6.548315991232428e-07, "epoch": 3.2548310653995554, "percentage": 65.1, "elapsed_time": "2:53:07", "remaining_time": "1:32:49", "throughput": 8645.43, "total_tokens": 89802384} +{"current_steps": 133235, "total_steps": 204665, "loss": 0.0001, "lr": 6.547515638746315e-07, "epoch": 3.2549532162314025, "percentage": 65.1, "elapsed_time": "2:53:07", "remaining_time": "1:32:49", "throughput": 8645.49, "total_tokens": 89806032} +{"current_steps": 133240, "total_steps": 204665, "loss": 0.0137, "lr": 6.546715311367593e-07, "epoch": 3.2550753670632497, "percentage": 65.1, "elapsed_time": "2:53:07", "remaining_time": "1:32:48", "throughput": 8645.49, "total_tokens": 89808976} +{"current_steps": 133245, "total_steps": 204665, "loss": 0.0284, "lr": 6.545915009102091e-07, "epoch": 3.255197517895097, "percentage": 65.1, "elapsed_time": "2:53:08", "remaining_time": "1:32:48", "throughput": 8645.59, "total_tokens": 89813200} +{"current_steps": 133250, "total_steps": 204665, "loss": 0.0005, "lr": 6.545114731955619e-07, "epoch": 3.255319668726944, "percentage": 65.11, "elapsed_time": "2:53:08", "remaining_time": "1:32:47", "throughput": 8645.61, "total_tokens": 89816400} +{"current_steps": 133255, "total_steps": 204665, "loss": 0.0, "lr": 6.544314479934005e-07, "epoch": 3.2554418195587913, "percentage": 65.11, "elapsed_time": "2:53:09", "remaining_time": "1:32:47", "throughput": 8645.64, "total_tokens": 89819728} +{"current_steps": 133260, "total_steps": 204665, "loss": 0.0001, "lr": 6.543514253043063e-07, "epoch": 3.2555639703906385, "percentage": 65.11, "elapsed_time": "2:53:09", "remaining_time": "1:32:46", "throughput": 8645.69, "total_tokens": 89823312} +{"current_steps": 133265, "total_steps": 204665, "loss": 0.0, "lr": 6.542714051288618e-07, "epoch": 3.2556861212224857, "percentage": 65.11, "elapsed_time": "2:53:09", "remaining_time": "1:32:46", "throughput": 8645.73, "total_tokens": 89826832} +{"current_steps": 133270, "total_steps": 204665, "loss": 0.0, "lr": 6.541913874676486e-07, "epoch": 3.255808272054333, "percentage": 65.12, "elapsed_time": "2:53:10", "remaining_time": "1:32:46", "throughput": 8645.75, "total_tokens": 89829968} +{"current_steps": 133275, "total_steps": 204665, "loss": 0.0465, "lr": 6.541113723212484e-07, "epoch": 3.2559304228861796, "percentage": 65.12, "elapsed_time": "2:53:10", "remaining_time": "1:32:45", "throughput": 8645.77, "total_tokens": 89833168} +{"current_steps": 133280, "total_steps": 204665, "loss": 0.0002, "lr": 6.540313596902438e-07, "epoch": 3.2560525737180273, "percentage": 65.12, "elapsed_time": "2:53:10", "remaining_time": "1:32:45", "throughput": 8645.82, "total_tokens": 89836752} +{"current_steps": 133285, "total_steps": 204665, "loss": 0.0, "lr": 6.539513495752155e-07, "epoch": 3.256174724549874, "percentage": 65.12, "elapsed_time": "2:53:11", "remaining_time": "1:32:44", "throughput": 8645.86, "total_tokens": 89840208} +{"current_steps": 133290, "total_steps": 204665, "loss": 0.0002, "lr": 6.538713419767463e-07, "epoch": 3.256296875381721, "percentage": 65.13, "elapsed_time": "2:53:11", "remaining_time": "1:32:44", "throughput": 8645.85, "total_tokens": 89843088} +{"current_steps": 133295, "total_steps": 204665, "loss": 0.0, "lr": 6.537913368954182e-07, "epoch": 3.2564190262135684, "percentage": 65.13, "elapsed_time": "2:53:11", "remaining_time": "1:32:44", "throughput": 8645.88, "total_tokens": 89846416} +{"current_steps": 133300, "total_steps": 204665, "loss": 0.0001, "lr": 6.537113343318122e-07, "epoch": 3.2565411770454156, "percentage": 65.13, "elapsed_time": "2:53:12", "remaining_time": "1:32:43", "throughput": 8645.93, "total_tokens": 89850000} +{"current_steps": 133305, "total_steps": 204665, "loss": 0.0001, "lr": 6.536313342865109e-07, "epoch": 3.2566633278772628, "percentage": 65.13, "elapsed_time": "2:53:12", "remaining_time": "1:32:43", "throughput": 8645.93, "total_tokens": 89853008} +{"current_steps": 133310, "total_steps": 204665, "loss": 0.0, "lr": 6.535513367600953e-07, "epoch": 3.25678547870911, "percentage": 65.14, "elapsed_time": "2:53:12", "remaining_time": "1:32:42", "throughput": 8645.97, "total_tokens": 89856464} +{"current_steps": 133315, "total_steps": 204665, "loss": 0.0343, "lr": 6.534713417531479e-07, "epoch": 3.256907629540957, "percentage": 65.14, "elapsed_time": "2:53:13", "remaining_time": "1:32:42", "throughput": 8645.96, "total_tokens": 89859216} +{"current_steps": 133320, "total_steps": 204665, "loss": 0.0001, "lr": 6.533913492662497e-07, "epoch": 3.2570297803728043, "percentage": 65.14, "elapsed_time": "2:53:13", "remaining_time": "1:32:42", "throughput": 8646.02, "total_tokens": 89862992} +{"current_steps": 133325, "total_steps": 204665, "loss": 0.0, "lr": 6.533113592999833e-07, "epoch": 3.2571519312046515, "percentage": 65.14, "elapsed_time": "2:53:13", "remaining_time": "1:32:41", "throughput": 8646.03, "total_tokens": 89866000} +{"current_steps": 133330, "total_steps": 204665, "loss": 0.0247, "lr": 6.532313718549299e-07, "epoch": 3.2572740820364987, "percentage": 65.15, "elapsed_time": "2:53:14", "remaining_time": "1:32:41", "throughput": 8646.05, "total_tokens": 89869200} +{"current_steps": 133335, "total_steps": 204665, "loss": 0.0, "lr": 6.531513869316707e-07, "epoch": 3.257396232868346, "percentage": 65.15, "elapsed_time": "2:53:14", "remaining_time": "1:32:40", "throughput": 8646.08, "total_tokens": 89872528} +{"current_steps": 133340, "total_steps": 204665, "loss": 0.0, "lr": 6.530714045307886e-07, "epoch": 3.257518383700193, "percentage": 65.15, "elapsed_time": "2:53:14", "remaining_time": "1:32:40", "throughput": 8646.12, "total_tokens": 89875920} +{"current_steps": 133345, "total_steps": 204665, "loss": 0.0, "lr": 6.52991424652864e-07, "epoch": 3.2576405345320403, "percentage": 65.15, "elapsed_time": "2:53:15", "remaining_time": "1:32:39", "throughput": 8646.18, "total_tokens": 89879568} +{"current_steps": 133350, "total_steps": 204665, "loss": 0.0418, "lr": 6.529114472984791e-07, "epoch": 3.2577626853638875, "percentage": 65.16, "elapsed_time": "2:53:15", "remaining_time": "1:32:39", "throughput": 8646.19, "total_tokens": 89882640} +{"current_steps": 133355, "total_steps": 204665, "loss": 0.0001, "lr": 6.52831472468216e-07, "epoch": 3.2578848361957347, "percentage": 65.16, "elapsed_time": "2:53:15", "remaining_time": "1:32:39", "throughput": 8646.23, "total_tokens": 89886160} +{"current_steps": 133360, "total_steps": 204665, "loss": 0.0004, "lr": 6.527515001626554e-07, "epoch": 3.258006987027582, "percentage": 65.16, "elapsed_time": "2:53:16", "remaining_time": "1:32:38", "throughput": 8646.28, "total_tokens": 89889680} +{"current_steps": 133365, "total_steps": 204665, "loss": 0.0, "lr": 6.526715303823795e-07, "epoch": 3.258129137859429, "percentage": 65.16, "elapsed_time": "2:53:16", "remaining_time": "1:32:38", "throughput": 8646.33, "total_tokens": 89893264} +{"current_steps": 133370, "total_steps": 204665, "loss": 0.0, "lr": 6.525915631279697e-07, "epoch": 3.258251288691276, "percentage": 65.17, "elapsed_time": "2:53:17", "remaining_time": "1:32:37", "throughput": 8646.36, "total_tokens": 89896592} +{"current_steps": 133375, "total_steps": 204665, "loss": 0.0, "lr": 6.525115984000073e-07, "epoch": 3.258373439523123, "percentage": 65.17, "elapsed_time": "2:53:17", "remaining_time": "1:32:37", "throughput": 8646.4, "total_tokens": 89900048} +{"current_steps": 133380, "total_steps": 204665, "loss": 0.036, "lr": 6.524316361990741e-07, "epoch": 3.25849559035497, "percentage": 65.17, "elapsed_time": "2:53:17", "remaining_time": "1:32:37", "throughput": 8646.46, "total_tokens": 89903760} +{"current_steps": 133385, "total_steps": 204665, "loss": 0.0, "lr": 6.523516765257513e-07, "epoch": 3.2586177411868174, "percentage": 65.17, "elapsed_time": "2:53:18", "remaining_time": "1:32:36", "throughput": 8646.51, "total_tokens": 89907344} +{"current_steps": 133390, "total_steps": 204665, "loss": 0.0, "lr": 6.522717193806211e-07, "epoch": 3.2587398920186645, "percentage": 65.17, "elapsed_time": "2:53:18", "remaining_time": "1:32:36", "throughput": 8646.52, "total_tokens": 89910416} +{"current_steps": 133395, "total_steps": 204665, "loss": 0.0, "lr": 6.52191764764264e-07, "epoch": 3.2588620428505117, "percentage": 65.18, "elapsed_time": "2:53:18", "remaining_time": "1:32:35", "throughput": 8646.58, "total_tokens": 89914064} +{"current_steps": 133400, "total_steps": 204665, "loss": 0.0, "lr": 6.521118126772625e-07, "epoch": 3.258984193682359, "percentage": 65.18, "elapsed_time": "2:53:19", "remaining_time": "1:32:35", "throughput": 8646.62, "total_tokens": 89917520} +{"current_steps": 133405, "total_steps": 204665, "loss": 0.0001, "lr": 6.520318631201969e-07, "epoch": 3.259106344514206, "percentage": 65.18, "elapsed_time": "2:53:19", "remaining_time": "1:32:35", "throughput": 8646.62, "total_tokens": 89920464} +{"current_steps": 133410, "total_steps": 204665, "loss": 0.0, "lr": 6.519519160936495e-07, "epoch": 3.2592284953460533, "percentage": 65.18, "elapsed_time": "2:53:19", "remaining_time": "1:32:34", "throughput": 8646.63, "total_tokens": 89923536} +{"current_steps": 133415, "total_steps": 204665, "loss": 0.0, "lr": 6.518719715982011e-07, "epoch": 3.2593506461779005, "percentage": 65.19, "elapsed_time": "2:53:20", "remaining_time": "1:32:34", "throughput": 8646.68, "total_tokens": 89927184} +{"current_steps": 133420, "total_steps": 204665, "loss": 0.0001, "lr": 6.517920296344335e-07, "epoch": 3.2594727970097477, "percentage": 65.19, "elapsed_time": "2:53:20", "remaining_time": "1:32:33", "throughput": 8646.71, "total_tokens": 89930512} +{"current_steps": 133425, "total_steps": 204665, "loss": 0.0003, "lr": 6.517120902029281e-07, "epoch": 3.259594947841595, "percentage": 65.19, "elapsed_time": "2:53:20", "remaining_time": "1:32:33", "throughput": 8646.74, "total_tokens": 89933776} +{"current_steps": 133430, "total_steps": 204665, "loss": 0.0, "lr": 6.516321533042659e-07, "epoch": 3.259717098673442, "percentage": 65.19, "elapsed_time": "2:53:21", "remaining_time": "1:32:32", "throughput": 8646.76, "total_tokens": 89936976} +{"current_steps": 133435, "total_steps": 204665, "loss": 0.0309, "lr": 6.515522189390286e-07, "epoch": 3.2598392495052892, "percentage": 65.2, "elapsed_time": "2:53:21", "remaining_time": "1:32:32", "throughput": 8646.79, "total_tokens": 89940368} +{"current_steps": 133440, "total_steps": 204665, "loss": 0.0, "lr": 6.514722871077969e-07, "epoch": 3.2599614003371364, "percentage": 65.2, "elapsed_time": "2:53:21", "remaining_time": "1:32:32", "throughput": 8646.8, "total_tokens": 89943376} +{"current_steps": 133445, "total_steps": 204665, "loss": 0.0, "lr": 6.513923578111525e-07, "epoch": 3.2600835511689836, "percentage": 65.2, "elapsed_time": "2:53:22", "remaining_time": "1:32:31", "throughput": 8646.8, "total_tokens": 89946320} +{"current_steps": 133450, "total_steps": 204665, "loss": 0.0, "lr": 6.513124310496769e-07, "epoch": 3.260205702000831, "percentage": 65.2, "elapsed_time": "2:53:22", "remaining_time": "1:32:31", "throughput": 8646.83, "total_tokens": 89949648} +{"current_steps": 133455, "total_steps": 204665, "loss": 0.0, "lr": 6.512325068239508e-07, "epoch": 3.2603278528326776, "percentage": 65.21, "elapsed_time": "2:53:22", "remaining_time": "1:32:30", "throughput": 8646.85, "total_tokens": 89952912} +{"current_steps": 133460, "total_steps": 204665, "loss": 0.0, "lr": 6.511525851345562e-07, "epoch": 3.260450003664525, "percentage": 65.21, "elapsed_time": "2:53:23", "remaining_time": "1:32:30", "throughput": 8646.9, "total_tokens": 89956560} +{"current_steps": 133465, "total_steps": 204665, "loss": 0.0265, "lr": 6.510726659820733e-07, "epoch": 3.260572154496372, "percentage": 65.21, "elapsed_time": "2:53:23", "remaining_time": "1:32:30", "throughput": 8646.91, "total_tokens": 89959632} +{"current_steps": 133470, "total_steps": 204665, "loss": 0.0002, "lr": 6.509927493670842e-07, "epoch": 3.260694305328219, "percentage": 65.21, "elapsed_time": "2:53:24", "remaining_time": "1:32:29", "throughput": 8646.92, "total_tokens": 89962704} +{"current_steps": 133475, "total_steps": 204665, "loss": 0.0, "lr": 6.509128352901694e-07, "epoch": 3.2608164561600663, "percentage": 65.22, "elapsed_time": "2:53:24", "remaining_time": "1:32:29", "throughput": 8646.92, "total_tokens": 89965712} +{"current_steps": 133480, "total_steps": 204665, "loss": 0.0, "lr": 6.508329237519106e-07, "epoch": 3.2609386069919135, "percentage": 65.22, "elapsed_time": "2:53:24", "remaining_time": "1:32:28", "throughput": 8646.94, "total_tokens": 89968976} +{"current_steps": 133485, "total_steps": 204665, "loss": 0.0, "lr": 6.507530147528888e-07, "epoch": 3.2610607578237607, "percentage": 65.22, "elapsed_time": "2:53:25", "remaining_time": "1:32:28", "throughput": 8646.95, "total_tokens": 89972176} +{"current_steps": 133490, "total_steps": 204665, "loss": 0.0, "lr": 6.506731082936845e-07, "epoch": 3.261182908655608, "percentage": 65.22, "elapsed_time": "2:53:25", "remaining_time": "1:32:28", "throughput": 8646.94, "total_tokens": 89975056} +{"current_steps": 133495, "total_steps": 204665, "loss": 0.0, "lr": 6.505932043748798e-07, "epoch": 3.261305059487455, "percentage": 65.23, "elapsed_time": "2:53:25", "remaining_time": "1:32:27", "throughput": 8646.99, "total_tokens": 89978640} +{"current_steps": 133500, "total_steps": 204665, "loss": 0.0002, "lr": 6.505133029970551e-07, "epoch": 3.2614272103193023, "percentage": 65.23, "elapsed_time": "2:53:26", "remaining_time": "1:32:27", "throughput": 8647.04, "total_tokens": 89982288} +{"current_steps": 133505, "total_steps": 204665, "loss": 0.0, "lr": 6.504334041607914e-07, "epoch": 3.2615493611511495, "percentage": 65.23, "elapsed_time": "2:53:26", "remaining_time": "1:32:26", "throughput": 8647.1, "total_tokens": 89986064} +{"current_steps": 133510, "total_steps": 204665, "loss": 0.0, "lr": 6.503535078666705e-07, "epoch": 3.2616715119829967, "percentage": 65.23, "elapsed_time": "2:53:26", "remaining_time": "1:32:26", "throughput": 8647.12, "total_tokens": 89989328} +{"current_steps": 133515, "total_steps": 204665, "loss": 0.0527, "lr": 6.502736141152724e-07, "epoch": 3.261793662814844, "percentage": 65.24, "elapsed_time": "2:53:27", "remaining_time": "1:32:25", "throughput": 8647.14, "total_tokens": 89992592} +{"current_steps": 133520, "total_steps": 204665, "loss": 0.0548, "lr": 6.501937229071793e-07, "epoch": 3.261915813646691, "percentage": 65.24, "elapsed_time": "2:53:27", "remaining_time": "1:32:25", "throughput": 8647.16, "total_tokens": 89995856} +{"current_steps": 133525, "total_steps": 204665, "loss": 0.0, "lr": 6.501138342429713e-07, "epoch": 3.262037964478538, "percentage": 65.24, "elapsed_time": "2:53:27", "remaining_time": "1:32:25", "throughput": 8647.2, "total_tokens": 89999312} +{"current_steps": 133530, "total_steps": 204665, "loss": 0.0, "lr": 6.500339481232296e-07, "epoch": 3.2621601153103854, "percentage": 65.24, "elapsed_time": "2:53:28", "remaining_time": "1:32:24", "throughput": 8647.22, "total_tokens": 90002512} +{"current_steps": 133535, "total_steps": 204665, "loss": 0.0, "lr": 6.49954064548535e-07, "epoch": 3.2622822661422326, "percentage": 65.25, "elapsed_time": "2:53:28", "remaining_time": "1:32:24", "throughput": 8647.23, "total_tokens": 90005520} +{"current_steps": 133540, "total_steps": 204665, "loss": 0.0, "lr": 6.498741835194684e-07, "epoch": 3.2624044169740793, "percentage": 65.25, "elapsed_time": "2:53:28", "remaining_time": "1:32:23", "throughput": 8647.24, "total_tokens": 90008656} +{"current_steps": 133545, "total_steps": 204665, "loss": 0.0, "lr": 6.497943050366115e-07, "epoch": 3.262526567805927, "percentage": 65.25, "elapsed_time": "2:53:29", "remaining_time": "1:32:23", "throughput": 8647.32, "total_tokens": 90012624} +{"current_steps": 133550, "total_steps": 204665, "loss": 0.0, "lr": 6.49714429100544e-07, "epoch": 3.2626487186377737, "percentage": 65.25, "elapsed_time": "2:53:29", "remaining_time": "1:32:23", "throughput": 8647.32, "total_tokens": 90015632} +{"current_steps": 133555, "total_steps": 204665, "loss": 0.0441, "lr": 6.496345557118478e-07, "epoch": 3.262770869469621, "percentage": 65.26, "elapsed_time": "2:53:30", "remaining_time": "1:32:22", "throughput": 8647.38, "total_tokens": 90019344} +{"current_steps": 133560, "total_steps": 204665, "loss": 0.075, "lr": 6.495546848711031e-07, "epoch": 3.262893020301468, "percentage": 65.26, "elapsed_time": "2:53:30", "remaining_time": "1:32:22", "throughput": 8647.4, "total_tokens": 90022672} +{"current_steps": 133565, "total_steps": 204665, "loss": 0.0, "lr": 6.494748165788912e-07, "epoch": 3.2630151711333153, "percentage": 65.26, "elapsed_time": "2:53:30", "remaining_time": "1:32:21", "throughput": 8647.44, "total_tokens": 90026128} +{"current_steps": 133570, "total_steps": 204665, "loss": 0.0004, "lr": 6.493949508357926e-07, "epoch": 3.2631373219651625, "percentage": 65.26, "elapsed_time": "2:53:31", "remaining_time": "1:32:21", "throughput": 8647.47, "total_tokens": 90029456} +{"current_steps": 133575, "total_steps": 204665, "loss": 0.0, "lr": 6.493150876423882e-07, "epoch": 3.2632594727970097, "percentage": 65.27, "elapsed_time": "2:53:31", "remaining_time": "1:32:21", "throughput": 8647.48, "total_tokens": 90032592} +{"current_steps": 133580, "total_steps": 204665, "loss": 0.0, "lr": 6.492352269992588e-07, "epoch": 3.263381623628857, "percentage": 65.27, "elapsed_time": "2:53:31", "remaining_time": "1:32:20", "throughput": 8647.53, "total_tokens": 90036240} +{"current_steps": 133585, "total_steps": 204665, "loss": 0.0, "lr": 6.491553689069853e-07, "epoch": 3.263503774460704, "percentage": 65.27, "elapsed_time": "2:53:32", "remaining_time": "1:32:20", "throughput": 8647.54, "total_tokens": 90039376} +{"current_steps": 133590, "total_steps": 204665, "loss": 0.1105, "lr": 6.490755133661484e-07, "epoch": 3.2636259252925512, "percentage": 65.27, "elapsed_time": "2:53:32", "remaining_time": "1:32:19", "throughput": 8647.55, "total_tokens": 90042448} +{"current_steps": 133595, "total_steps": 204665, "loss": 0.0406, "lr": 6.489956603773284e-07, "epoch": 3.2637480761243984, "percentage": 65.27, "elapsed_time": "2:53:32", "remaining_time": "1:32:19", "throughput": 8647.56, "total_tokens": 90045648} +{"current_steps": 133600, "total_steps": 204665, "loss": 0.0001, "lr": 6.489158099411062e-07, "epoch": 3.2638702269562456, "percentage": 65.28, "elapsed_time": "2:53:33", "remaining_time": "1:32:19", "throughput": 8647.6, "total_tokens": 90049104} +{"current_steps": 133605, "total_steps": 204665, "loss": 0.056, "lr": 6.488359620580634e-07, "epoch": 3.263992377788093, "percentage": 65.28, "elapsed_time": "2:53:33", "remaining_time": "1:32:18", "throughput": 8647.63, "total_tokens": 90052560} +{"current_steps": 133610, "total_steps": 204665, "loss": 0.0, "lr": 6.487561167287794e-07, "epoch": 3.26411452861994, "percentage": 65.28, "elapsed_time": "2:53:33", "remaining_time": "1:32:18", "throughput": 8647.65, "total_tokens": 90055760} +{"current_steps": 133615, "total_steps": 204665, "loss": 0.0, "lr": 6.486762739538356e-07, "epoch": 3.264236679451787, "percentage": 65.28, "elapsed_time": "2:53:34", "remaining_time": "1:32:17", "throughput": 8647.7, "total_tokens": 90059216} +{"current_steps": 133620, "total_steps": 204665, "loss": 0.0348, "lr": 6.485964337338124e-07, "epoch": 3.2643588302836344, "percentage": 65.29, "elapsed_time": "2:53:34", "remaining_time": "1:32:17", "throughput": 8647.73, "total_tokens": 90062672} +{"current_steps": 133625, "total_steps": 204665, "loss": 0.0686, "lr": 6.485165960692906e-07, "epoch": 3.2644809811154816, "percentage": 65.29, "elapsed_time": "2:53:34", "remaining_time": "1:32:16", "throughput": 8647.75, "total_tokens": 90065808} +{"current_steps": 133630, "total_steps": 204665, "loss": 0.0004, "lr": 6.484367609608502e-07, "epoch": 3.2646031319473288, "percentage": 65.29, "elapsed_time": "2:53:35", "remaining_time": "1:32:16", "throughput": 8647.8, "total_tokens": 90069392} +{"current_steps": 133635, "total_steps": 204665, "loss": 0.0, "lr": 6.483569284090725e-07, "epoch": 3.2647252827791755, "percentage": 65.29, "elapsed_time": "2:53:35", "remaining_time": "1:32:16", "throughput": 8647.81, "total_tokens": 90072400} +{"current_steps": 133640, "total_steps": 204665, "loss": 0.0, "lr": 6.482770984145381e-07, "epoch": 3.2648474336110227, "percentage": 65.3, "elapsed_time": "2:53:35", "remaining_time": "1:32:15", "throughput": 8647.87, "total_tokens": 90076048} +{"current_steps": 133645, "total_steps": 204665, "loss": 0.0, "lr": 6.481972709778267e-07, "epoch": 3.26496958444287, "percentage": 65.3, "elapsed_time": "2:53:36", "remaining_time": "1:32:15", "throughput": 8647.87, "total_tokens": 90078992} +{"current_steps": 133650, "total_steps": 204665, "loss": 0.0, "lr": 6.481174460995198e-07, "epoch": 3.265091735274717, "percentage": 65.3, "elapsed_time": "2:53:36", "remaining_time": "1:32:14", "throughput": 8647.92, "total_tokens": 90082576} +{"current_steps": 133655, "total_steps": 204665, "loss": 0.0, "lr": 6.480376237801973e-07, "epoch": 3.2652138861065643, "percentage": 65.3, "elapsed_time": "2:53:37", "remaining_time": "1:32:14", "throughput": 8647.93, "total_tokens": 90085712} +{"current_steps": 133660, "total_steps": 204665, "loss": 0.0, "lr": 6.479578040204396e-07, "epoch": 3.2653360369384115, "percentage": 65.31, "elapsed_time": "2:53:37", "remaining_time": "1:32:14", "throughput": 8647.95, "total_tokens": 90088848} +{"current_steps": 133665, "total_steps": 204665, "loss": 0.0, "lr": 6.478779868208278e-07, "epoch": 3.2654581877702586, "percentage": 65.31, "elapsed_time": "2:53:37", "remaining_time": "1:32:13", "throughput": 8647.96, "total_tokens": 90091920} +{"current_steps": 133670, "total_steps": 204665, "loss": 0.0, "lr": 6.477981721819416e-07, "epoch": 3.265580338602106, "percentage": 65.31, "elapsed_time": "2:53:38", "remaining_time": "1:32:13", "throughput": 8647.98, "total_tokens": 90095120} +{"current_steps": 133675, "total_steps": 204665, "loss": 0.0, "lr": 6.477183601043625e-07, "epoch": 3.265702489433953, "percentage": 65.31, "elapsed_time": "2:53:38", "remaining_time": "1:32:12", "throughput": 8648.0, "total_tokens": 90098192} +{"current_steps": 133680, "total_steps": 204665, "loss": 0.0, "lr": 6.476385505886698e-07, "epoch": 3.2658246402658, "percentage": 65.32, "elapsed_time": "2:53:38", "remaining_time": "1:32:12", "throughput": 8648.01, "total_tokens": 90101264} +{"current_steps": 133685, "total_steps": 204665, "loss": 0.0, "lr": 6.475587436354443e-07, "epoch": 3.2659467910976474, "percentage": 65.32, "elapsed_time": "2:53:39", "remaining_time": "1:32:12", "throughput": 8648.02, "total_tokens": 90104400} +{"current_steps": 133690, "total_steps": 204665, "loss": 0.0, "lr": 6.474789392452666e-07, "epoch": 3.2660689419294946, "percentage": 65.32, "elapsed_time": "2:53:39", "remaining_time": "1:32:11", "throughput": 8648.09, "total_tokens": 90108112} +{"current_steps": 133695, "total_steps": 204665, "loss": 0.0001, "lr": 6.473991374187166e-07, "epoch": 3.266191092761342, "percentage": 65.32, "elapsed_time": "2:53:39", "remaining_time": "1:32:11", "throughput": 8648.09, "total_tokens": 90111056} +{"current_steps": 133700, "total_steps": 204665, "loss": 0.0, "lr": 6.473193381563753e-07, "epoch": 3.266313243593189, "percentage": 65.33, "elapsed_time": "2:53:40", "remaining_time": "1:32:10", "throughput": 8648.13, "total_tokens": 90114512} +{"current_steps": 133705, "total_steps": 204665, "loss": 0.0, "lr": 6.472395414588222e-07, "epoch": 3.266435394425036, "percentage": 65.33, "elapsed_time": "2:53:40", "remaining_time": "1:32:10", "throughput": 8648.16, "total_tokens": 90117840} +{"current_steps": 133710, "total_steps": 204665, "loss": 0.0359, "lr": 6.471597473266385e-07, "epoch": 3.2665575452568834, "percentage": 65.33, "elapsed_time": "2:53:40", "remaining_time": "1:32:09", "throughput": 8648.21, "total_tokens": 90121424} +{"current_steps": 133715, "total_steps": 204665, "loss": 0.0, "lr": 6.470799557604035e-07, "epoch": 3.2666796960887305, "percentage": 65.33, "elapsed_time": "2:53:41", "remaining_time": "1:32:09", "throughput": 8648.25, "total_tokens": 90124880} +{"current_steps": 133720, "total_steps": 204665, "loss": 0.0, "lr": 6.470001667606986e-07, "epoch": 3.2668018469205773, "percentage": 65.34, "elapsed_time": "2:53:41", "remaining_time": "1:32:09", "throughput": 8648.26, "total_tokens": 90127888} +{"current_steps": 133725, "total_steps": 204665, "loss": 0.0, "lr": 6.469203803281027e-07, "epoch": 3.266923997752425, "percentage": 65.34, "elapsed_time": "2:53:41", "remaining_time": "1:32:08", "throughput": 8648.3, "total_tokens": 90131344} +{"current_steps": 133730, "total_steps": 204665, "loss": 0.0453, "lr": 6.468405964631972e-07, "epoch": 3.2670461485842717, "percentage": 65.34, "elapsed_time": "2:53:42", "remaining_time": "1:32:08", "throughput": 8648.3, "total_tokens": 90134288} +{"current_steps": 133735, "total_steps": 204665, "loss": 0.0, "lr": 6.467608151665618e-07, "epoch": 3.267168299416119, "percentage": 65.34, "elapsed_time": "2:53:42", "remaining_time": "1:32:07", "throughput": 8648.33, "total_tokens": 90137616} +{"current_steps": 133740, "total_steps": 204665, "loss": 0.0, "lr": 6.46681036438777e-07, "epoch": 3.267290450247966, "percentage": 65.35, "elapsed_time": "2:53:42", "remaining_time": "1:32:07", "throughput": 8648.38, "total_tokens": 90141264} +{"current_steps": 133745, "total_steps": 204665, "loss": 0.0, "lr": 6.466012602804225e-07, "epoch": 3.2674126010798132, "percentage": 65.35, "elapsed_time": "2:53:43", "remaining_time": "1:32:07", "throughput": 8648.4, "total_tokens": 90144400} +{"current_steps": 133750, "total_steps": 204665, "loss": 0.0, "lr": 6.465214866920785e-07, "epoch": 3.2675347519116604, "percentage": 65.35, "elapsed_time": "2:53:43", "remaining_time": "1:32:06", "throughput": 8648.44, "total_tokens": 90147856} +{"current_steps": 133755, "total_steps": 204665, "loss": 0.0002, "lr": 6.464417156743253e-07, "epoch": 3.2676569027435076, "percentage": 65.35, "elapsed_time": "2:53:43", "remaining_time": "1:32:06", "throughput": 8648.49, "total_tokens": 90151504} +{"current_steps": 133760, "total_steps": 204665, "loss": 0.0, "lr": 6.463619472277436e-07, "epoch": 3.267779053575355, "percentage": 65.36, "elapsed_time": "2:53:44", "remaining_time": "1:32:05", "throughput": 8648.55, "total_tokens": 90155088} +{"current_steps": 133765, "total_steps": 204665, "loss": 0.0, "lr": 6.462821813529125e-07, "epoch": 3.267901204407202, "percentage": 65.36, "elapsed_time": "2:53:44", "remaining_time": "1:32:05", "throughput": 8648.55, "total_tokens": 90158032} +{"current_steps": 133770, "total_steps": 204665, "loss": 0.0043, "lr": 6.462024180504128e-07, "epoch": 3.268023355239049, "percentage": 65.36, "elapsed_time": "2:53:44", "remaining_time": "1:32:05", "throughput": 8648.55, "total_tokens": 90161040} +{"current_steps": 133775, "total_steps": 204665, "loss": 0.0001, "lr": 6.461226573208239e-07, "epoch": 3.2681455060708964, "percentage": 65.36, "elapsed_time": "2:53:45", "remaining_time": "1:32:04", "throughput": 8648.58, "total_tokens": 90164368} +{"current_steps": 133780, "total_steps": 204665, "loss": 0.0, "lr": 6.460428991647265e-07, "epoch": 3.2682676569027436, "percentage": 65.37, "elapsed_time": "2:53:45", "remaining_time": "1:32:04", "throughput": 8648.6, "total_tokens": 90167504} +{"current_steps": 133785, "total_steps": 204665, "loss": 0.0, "lr": 6.459631435827001e-07, "epoch": 3.2683898077345908, "percentage": 65.37, "elapsed_time": "2:53:46", "remaining_time": "1:32:03", "throughput": 8648.64, "total_tokens": 90170960} +{"current_steps": 133790, "total_steps": 204665, "loss": 0.0, "lr": 6.458833905753251e-07, "epoch": 3.268511958566438, "percentage": 65.37, "elapsed_time": "2:53:46", "remaining_time": "1:32:03", "throughput": 8648.66, "total_tokens": 90174160} +{"current_steps": 133795, "total_steps": 204665, "loss": 0.0, "lr": 6.458036401431816e-07, "epoch": 3.268634109398285, "percentage": 65.37, "elapsed_time": "2:53:46", "remaining_time": "1:32:02", "throughput": 8648.7, "total_tokens": 90177616} +{"current_steps": 133800, "total_steps": 204665, "loss": 0.0, "lr": 6.457238922868487e-07, "epoch": 3.2687562602301323, "percentage": 65.38, "elapsed_time": "2:53:47", "remaining_time": "1:32:02", "throughput": 8648.75, "total_tokens": 90181200} +{"current_steps": 133805, "total_steps": 204665, "loss": 0.0, "lr": 6.456441470069076e-07, "epoch": 3.2688784110619795, "percentage": 65.38, "elapsed_time": "2:53:47", "remaining_time": "1:32:02", "throughput": 8648.79, "total_tokens": 90184656} +{"current_steps": 133810, "total_steps": 204665, "loss": 0.0, "lr": 6.45564404303937e-07, "epoch": 3.2690005618938267, "percentage": 65.38, "elapsed_time": "2:53:47", "remaining_time": "1:32:01", "throughput": 8648.82, "total_tokens": 90187920} +{"current_steps": 133815, "total_steps": 204665, "loss": 0.0, "lr": 6.454846641785174e-07, "epoch": 3.2691227127256735, "percentage": 65.38, "elapsed_time": "2:53:48", "remaining_time": "1:32:01", "throughput": 8648.84, "total_tokens": 90191184} +{"current_steps": 133820, "total_steps": 204665, "loss": 0.0, "lr": 6.454049266312291e-07, "epoch": 3.2692448635575206, "percentage": 65.38, "elapsed_time": "2:53:48", "remaining_time": "1:32:00", "throughput": 8648.87, "total_tokens": 90194448} +{"current_steps": 133825, "total_steps": 204665, "loss": 0.0, "lr": 6.453251916626512e-07, "epoch": 3.269367014389368, "percentage": 65.39, "elapsed_time": "2:53:48", "remaining_time": "1:32:00", "throughput": 8648.88, "total_tokens": 90197584} +{"current_steps": 133830, "total_steps": 204665, "loss": 0.0, "lr": 6.452454592733642e-07, "epoch": 3.269489165221215, "percentage": 65.39, "elapsed_time": "2:53:49", "remaining_time": "1:32:00", "throughput": 8648.9, "total_tokens": 90200720} +{"current_steps": 133835, "total_steps": 204665, "loss": 0.0838, "lr": 6.451657294639475e-07, "epoch": 3.269611316053062, "percentage": 65.39, "elapsed_time": "2:53:49", "remaining_time": "1:31:59", "throughput": 8648.96, "total_tokens": 90204496} +{"current_steps": 133840, "total_steps": 204665, "loss": 0.0318, "lr": 6.450860022349811e-07, "epoch": 3.2697334668849094, "percentage": 65.39, "elapsed_time": "2:53:49", "remaining_time": "1:31:59", "throughput": 8649.02, "total_tokens": 90208144} +{"current_steps": 133845, "total_steps": 204665, "loss": 0.0204, "lr": 6.450062775870446e-07, "epoch": 3.2698556177167566, "percentage": 65.4, "elapsed_time": "2:53:50", "remaining_time": "1:31:58", "throughput": 8649.06, "total_tokens": 90211600} +{"current_steps": 133850, "total_steps": 204665, "loss": 0.0, "lr": 6.44926555520718e-07, "epoch": 3.269977768548604, "percentage": 65.4, "elapsed_time": "2:53:50", "remaining_time": "1:31:58", "throughput": 8649.09, "total_tokens": 90214928} +{"current_steps": 133855, "total_steps": 204665, "loss": 0.0451, "lr": 6.44846836036581e-07, "epoch": 3.270099919380451, "percentage": 65.4, "elapsed_time": "2:53:50", "remaining_time": "1:31:58", "throughput": 8649.15, "total_tokens": 90218640} +{"current_steps": 133860, "total_steps": 204665, "loss": 0.105, "lr": 6.447671191352134e-07, "epoch": 3.270222070212298, "percentage": 65.4, "elapsed_time": "2:53:51", "remaining_time": "1:31:57", "throughput": 8649.18, "total_tokens": 90221840} +{"current_steps": 133865, "total_steps": 204665, "loss": 0.0, "lr": 6.446874048171948e-07, "epoch": 3.2703442210441453, "percentage": 65.41, "elapsed_time": "2:53:51", "remaining_time": "1:31:57", "throughput": 8649.21, "total_tokens": 90225168} +{"current_steps": 133870, "total_steps": 204665, "loss": 0.0, "lr": 6.446076930831049e-07, "epoch": 3.2704663718759925, "percentage": 65.41, "elapsed_time": "2:53:51", "remaining_time": "1:31:56", "throughput": 8649.23, "total_tokens": 90228368} +{"current_steps": 133875, "total_steps": 204665, "loss": 0.0, "lr": 6.445279839335237e-07, "epoch": 3.2705885227078397, "percentage": 65.41, "elapsed_time": "2:53:52", "remaining_time": "1:31:56", "throughput": 8649.25, "total_tokens": 90231504} +{"current_steps": 133880, "total_steps": 204665, "loss": 0.0, "lr": 6.444482773690303e-07, "epoch": 3.270710673539687, "percentage": 65.41, "elapsed_time": "2:53:52", "remaining_time": "1:31:55", "throughput": 8649.28, "total_tokens": 90234832} +{"current_steps": 133885, "total_steps": 204665, "loss": 0.0451, "lr": 6.443685733902046e-07, "epoch": 3.270832824371534, "percentage": 65.42, "elapsed_time": "2:53:52", "remaining_time": "1:31:55", "throughput": 8649.3, "total_tokens": 90238032} +{"current_steps": 133890, "total_steps": 204665, "loss": 0.0, "lr": 6.442888719976266e-07, "epoch": 3.2709549752033813, "percentage": 65.42, "elapsed_time": "2:53:53", "remaining_time": "1:31:55", "throughput": 8649.34, "total_tokens": 90241488} +{"current_steps": 133895, "total_steps": 204665, "loss": 0.0001, "lr": 6.442091731918756e-07, "epoch": 3.2710771260352285, "percentage": 65.42, "elapsed_time": "2:53:53", "remaining_time": "1:31:54", "throughput": 8649.38, "total_tokens": 90244944} +{"current_steps": 133900, "total_steps": 204665, "loss": 0.0, "lr": 6.441294769735312e-07, "epoch": 3.2711992768670752, "percentage": 65.42, "elapsed_time": "2:53:54", "remaining_time": "1:31:54", "throughput": 8649.44, "total_tokens": 90248656} +{"current_steps": 133905, "total_steps": 204665, "loss": 0.0203, "lr": 6.440497833431726e-07, "epoch": 3.271321427698923, "percentage": 65.43, "elapsed_time": "2:53:54", "remaining_time": "1:31:53", "throughput": 8649.47, "total_tokens": 90251984} +{"current_steps": 133910, "total_steps": 204665, "loss": 0.0, "lr": 6.439700923013798e-07, "epoch": 3.2714435785307696, "percentage": 65.43, "elapsed_time": "2:53:54", "remaining_time": "1:31:53", "throughput": 8649.48, "total_tokens": 90254992} +{"current_steps": 133915, "total_steps": 204665, "loss": 0.0, "lr": 6.438904038487325e-07, "epoch": 3.271565729362617, "percentage": 65.43, "elapsed_time": "2:53:55", "remaining_time": "1:31:53", "throughput": 8649.5, "total_tokens": 90258256} +{"current_steps": 133920, "total_steps": 204665, "loss": 0.0, "lr": 6.438107179858097e-07, "epoch": 3.271687880194464, "percentage": 65.43, "elapsed_time": "2:53:55", "remaining_time": "1:31:52", "throughput": 8649.53, "total_tokens": 90261520} +{"current_steps": 133925, "total_steps": 204665, "loss": 0.0688, "lr": 6.437310347131915e-07, "epoch": 3.271810031026311, "percentage": 65.44, "elapsed_time": "2:53:55", "remaining_time": "1:31:52", "throughput": 8649.56, "total_tokens": 90264848} +{"current_steps": 133930, "total_steps": 204665, "loss": 0.0, "lr": 6.436513540314566e-07, "epoch": 3.2719321818581584, "percentage": 65.44, "elapsed_time": "2:53:56", "remaining_time": "1:31:51", "throughput": 8649.61, "total_tokens": 90268304} +{"current_steps": 133935, "total_steps": 204665, "loss": 0.0, "lr": 6.435716759411853e-07, "epoch": 3.2720543326900056, "percentage": 65.44, "elapsed_time": "2:53:56", "remaining_time": "1:31:51", "throughput": 8649.64, "total_tokens": 90271632} +{"current_steps": 133940, "total_steps": 204665, "loss": 0.0115, "lr": 6.434920004429564e-07, "epoch": 3.2721764835218528, "percentage": 65.44, "elapsed_time": "2:53:56", "remaining_time": "1:31:51", "throughput": 8649.69, "total_tokens": 90275280} +{"current_steps": 133945, "total_steps": 204665, "loss": 0.0, "lr": 6.434123275373496e-07, "epoch": 3.2722986343537, "percentage": 65.45, "elapsed_time": "2:53:57", "remaining_time": "1:31:50", "throughput": 8649.72, "total_tokens": 90278544} +{"current_steps": 133950, "total_steps": 204665, "loss": 0.0225, "lr": 6.433326572249446e-07, "epoch": 3.272420785185547, "percentage": 65.45, "elapsed_time": "2:53:57", "remaining_time": "1:31:50", "throughput": 8649.73, "total_tokens": 90281552} +{"current_steps": 133955, "total_steps": 204665, "loss": 0.0, "lr": 6.432529895063199e-07, "epoch": 3.2725429360173943, "percentage": 65.45, "elapsed_time": "2:53:57", "remaining_time": "1:31:49", "throughput": 8649.78, "total_tokens": 90285200} +{"current_steps": 133960, "total_steps": 204665, "loss": 0.0001, "lr": 6.431733243820558e-07, "epoch": 3.2726650868492415, "percentage": 65.45, "elapsed_time": "2:53:58", "remaining_time": "1:31:49", "throughput": 8649.85, "total_tokens": 90289040} +{"current_steps": 133965, "total_steps": 204665, "loss": 0.0, "lr": 6.430936618527311e-07, "epoch": 3.2727872376810887, "percentage": 65.46, "elapsed_time": "2:53:58", "remaining_time": "1:31:48", "throughput": 8649.91, "total_tokens": 90292752} +{"current_steps": 133970, "total_steps": 204665, "loss": 0.0, "lr": 6.430140019189254e-07, "epoch": 3.272909388512936, "percentage": 65.46, "elapsed_time": "2:53:58", "remaining_time": "1:31:48", "throughput": 8649.97, "total_tokens": 90296336} +{"current_steps": 133975, "total_steps": 204665, "loss": 0.0, "lr": 6.429343445812177e-07, "epoch": 3.273031539344783, "percentage": 65.46, "elapsed_time": "2:53:59", "remaining_time": "1:31:48", "throughput": 8649.97, "total_tokens": 90299280} +{"current_steps": 133980, "total_steps": 204665, "loss": 0.0, "lr": 6.428546898401874e-07, "epoch": 3.2731536901766303, "percentage": 65.46, "elapsed_time": "2:53:59", "remaining_time": "1:31:47", "throughput": 8649.98, "total_tokens": 90302416} +{"current_steps": 133985, "total_steps": 204665, "loss": 0.0, "lr": 6.427750376964143e-07, "epoch": 3.2732758410084775, "percentage": 65.47, "elapsed_time": "2:53:59", "remaining_time": "1:31:47", "throughput": 8650.0, "total_tokens": 90305616} +{"current_steps": 133990, "total_steps": 204665, "loss": 0.0, "lr": 6.42695388150477e-07, "epoch": 3.2733979918403247, "percentage": 65.47, "elapsed_time": "2:54:00", "remaining_time": "1:31:46", "throughput": 8650.07, "total_tokens": 90309456} +{"current_steps": 133995, "total_steps": 204665, "loss": 0.0027, "lr": 6.426157412029549e-07, "epoch": 3.2735201426721714, "percentage": 65.47, "elapsed_time": "2:54:00", "remaining_time": "1:31:46", "throughput": 8650.1, "total_tokens": 90312720} +{"current_steps": 134000, "total_steps": 204665, "loss": 0.0, "lr": 6.425360968544272e-07, "epoch": 3.2736422935040186, "percentage": 65.47, "elapsed_time": "2:54:01", "remaining_time": "1:31:46", "throughput": 8650.15, "total_tokens": 90316304} +{"current_steps": 134005, "total_steps": 204665, "loss": 0.0021, "lr": 6.42456455105473e-07, "epoch": 3.2737644443358658, "percentage": 65.48, "elapsed_time": "2:54:01", "remaining_time": "1:31:45", "throughput": 8650.16, "total_tokens": 90319376} +{"current_steps": 134010, "total_steps": 204665, "loss": 0.0, "lr": 6.42376815956672e-07, "epoch": 3.273886595167713, "percentage": 65.48, "elapsed_time": "2:54:01", "remaining_time": "1:31:45", "throughput": 8650.2, "total_tokens": 90322832} +{"current_steps": 134015, "total_steps": 204665, "loss": 0.0, "lr": 6.422971794086028e-07, "epoch": 3.27400874599956, "percentage": 65.48, "elapsed_time": "2:54:02", "remaining_time": "1:31:44", "throughput": 8650.21, "total_tokens": 90325840} +{"current_steps": 134020, "total_steps": 204665, "loss": 0.0455, "lr": 6.422175454618448e-07, "epoch": 3.2741308968314073, "percentage": 65.48, "elapsed_time": "2:54:02", "remaining_time": "1:31:44", "throughput": 8650.27, "total_tokens": 90329488} +{"current_steps": 134025, "total_steps": 204665, "loss": 0.095, "lr": 6.421379141169769e-07, "epoch": 3.2742530476632545, "percentage": 65.49, "elapsed_time": "2:54:02", "remaining_time": "1:31:44", "throughput": 8650.32, "total_tokens": 90333072} +{"current_steps": 134030, "total_steps": 204665, "loss": 0.0001, "lr": 6.420582853745787e-07, "epoch": 3.2743751984951017, "percentage": 65.49, "elapsed_time": "2:54:03", "remaining_time": "1:31:43", "throughput": 8650.33, "total_tokens": 90336144} +{"current_steps": 134035, "total_steps": 204665, "loss": 0.0591, "lr": 6.419786592352283e-07, "epoch": 3.274497349326949, "percentage": 65.49, "elapsed_time": "2:54:03", "remaining_time": "1:31:43", "throughput": 8650.37, "total_tokens": 90339664} +{"current_steps": 134040, "total_steps": 204665, "loss": 0.0, "lr": 6.418990356995058e-07, "epoch": 3.274619500158796, "percentage": 65.49, "elapsed_time": "2:54:03", "remaining_time": "1:31:42", "throughput": 8650.39, "total_tokens": 90342736} +{"current_steps": 134045, "total_steps": 204665, "loss": 0.0, "lr": 6.418194147679898e-07, "epoch": 3.2747416509906433, "percentage": 65.49, "elapsed_time": "2:54:04", "remaining_time": "1:31:42", "throughput": 8650.42, "total_tokens": 90346128} +{"current_steps": 134050, "total_steps": 204665, "loss": 0.0, "lr": 6.417397964412594e-07, "epoch": 3.2748638018224905, "percentage": 65.5, "elapsed_time": "2:54:04", "remaining_time": "1:31:41", "throughput": 8650.46, "total_tokens": 90349520} +{"current_steps": 134055, "total_steps": 204665, "loss": 0.0214, "lr": 6.416601807198936e-07, "epoch": 3.2749859526543377, "percentage": 65.5, "elapsed_time": "2:54:04", "remaining_time": "1:31:41", "throughput": 8650.49, "total_tokens": 90352848} +{"current_steps": 134060, "total_steps": 204665, "loss": 0.0247, "lr": 6.41580567604471e-07, "epoch": 3.275108103486185, "percentage": 65.5, "elapsed_time": "2:54:05", "remaining_time": "1:31:41", "throughput": 8650.54, "total_tokens": 90356432} +{"current_steps": 134065, "total_steps": 204665, "loss": 0.0001, "lr": 6.415009570955709e-07, "epoch": 3.275230254318032, "percentage": 65.5, "elapsed_time": "2:54:05", "remaining_time": "1:31:40", "throughput": 8650.56, "total_tokens": 90359632} +{"current_steps": 134070, "total_steps": 204665, "loss": 0.0348, "lr": 6.414213491937728e-07, "epoch": 3.2753524051498792, "percentage": 65.51, "elapsed_time": "2:54:05", "remaining_time": "1:31:40", "throughput": 8650.64, "total_tokens": 90363536} +{"current_steps": 134075, "total_steps": 204665, "loss": 0.0001, "lr": 6.413417438996547e-07, "epoch": 3.2754745559817264, "percentage": 65.51, "elapsed_time": "2:54:06", "remaining_time": "1:31:39", "throughput": 8650.68, "total_tokens": 90366928} +{"current_steps": 134080, "total_steps": 204665, "loss": 0.0001, "lr": 6.412621412137962e-07, "epoch": 3.275596706813573, "percentage": 65.51, "elapsed_time": "2:54:06", "remaining_time": "1:31:39", "throughput": 8650.71, "total_tokens": 90370256} +{"current_steps": 134085, "total_steps": 204665, "loss": 0.0001, "lr": 6.411825411367755e-07, "epoch": 3.275718857645421, "percentage": 65.51, "elapsed_time": "2:54:06", "remaining_time": "1:31:39", "throughput": 8650.73, "total_tokens": 90373456} +{"current_steps": 134090, "total_steps": 204665, "loss": 0.0, "lr": 6.411029436691723e-07, "epoch": 3.2758410084772676, "percentage": 65.52, "elapsed_time": "2:54:07", "remaining_time": "1:31:38", "throughput": 8650.74, "total_tokens": 90376592} +{"current_steps": 134095, "total_steps": 204665, "loss": 0.0, "lr": 6.410233488115646e-07, "epoch": 3.2759631593091147, "percentage": 65.52, "elapsed_time": "2:54:07", "remaining_time": "1:31:38", "throughput": 8650.79, "total_tokens": 90380112} +{"current_steps": 134100, "total_steps": 204665, "loss": 0.0, "lr": 6.409437565645319e-07, "epoch": 3.276085310140962, "percentage": 65.52, "elapsed_time": "2:54:07", "remaining_time": "1:31:37", "throughput": 8650.83, "total_tokens": 90383568} +{"current_steps": 134105, "total_steps": 204665, "loss": 0.0, "lr": 6.408641669286529e-07, "epoch": 3.276207460972809, "percentage": 65.52, "elapsed_time": "2:54:08", "remaining_time": "1:31:37", "throughput": 8650.87, "total_tokens": 90386960} +{"current_steps": 134110, "total_steps": 204665, "loss": 0.0024, "lr": 6.40784579904506e-07, "epoch": 3.2763296118046563, "percentage": 65.53, "elapsed_time": "2:54:08", "remaining_time": "1:31:37", "throughput": 8650.88, "total_tokens": 90390096} +{"current_steps": 134115, "total_steps": 204665, "loss": 0.0653, "lr": 6.407049954926705e-07, "epoch": 3.2764517626365035, "percentage": 65.53, "elapsed_time": "2:54:09", "remaining_time": "1:31:36", "throughput": 8650.92, "total_tokens": 90393424} +{"current_steps": 134120, "total_steps": 204665, "loss": 0.0, "lr": 6.406254136937246e-07, "epoch": 3.2765739134683507, "percentage": 65.53, "elapsed_time": "2:54:09", "remaining_time": "1:31:36", "throughput": 8650.94, "total_tokens": 90396624} +{"current_steps": 134125, "total_steps": 204665, "loss": 0.0001, "lr": 6.405458345082477e-07, "epoch": 3.276696064300198, "percentage": 65.53, "elapsed_time": "2:54:09", "remaining_time": "1:31:35", "throughput": 8650.96, "total_tokens": 90399824} +{"current_steps": 134130, "total_steps": 204665, "loss": 0.0002, "lr": 6.404662579368178e-07, "epoch": 3.276818215132045, "percentage": 65.54, "elapsed_time": "2:54:10", "remaining_time": "1:31:35", "throughput": 8651.01, "total_tokens": 90403344} +{"current_steps": 134135, "total_steps": 204665, "loss": 0.1229, "lr": 6.403866839800141e-07, "epoch": 3.2769403659638923, "percentage": 65.54, "elapsed_time": "2:54:10", "remaining_time": "1:31:34", "throughput": 8651.02, "total_tokens": 90406416} +{"current_steps": 134140, "total_steps": 204665, "loss": 0.0001, "lr": 6.403071126384154e-07, "epoch": 3.2770625167957395, "percentage": 65.54, "elapsed_time": "2:54:10", "remaining_time": "1:31:34", "throughput": 8651.08, "total_tokens": 90410064} +{"current_steps": 134145, "total_steps": 204665, "loss": 0.0, "lr": 6.402275439126e-07, "epoch": 3.2771846676275866, "percentage": 65.54, "elapsed_time": "2:54:11", "remaining_time": "1:31:34", "throughput": 8651.1, "total_tokens": 90413328} +{"current_steps": 134150, "total_steps": 204665, "loss": 0.0, "lr": 6.401479778031467e-07, "epoch": 3.277306818459434, "percentage": 65.55, "elapsed_time": "2:54:11", "remaining_time": "1:31:33", "throughput": 8651.13, "total_tokens": 90416592} +{"current_steps": 134155, "total_steps": 204665, "loss": 0.0, "lr": 6.40068414310634e-07, "epoch": 3.277428969291281, "percentage": 65.55, "elapsed_time": "2:54:11", "remaining_time": "1:31:33", "throughput": 8651.14, "total_tokens": 90419664} +{"current_steps": 134160, "total_steps": 204665, "loss": 0.0, "lr": 6.399888534356404e-07, "epoch": 3.277551120123128, "percentage": 65.55, "elapsed_time": "2:54:12", "remaining_time": "1:31:32", "throughput": 8651.17, "total_tokens": 90422928} +{"current_steps": 134165, "total_steps": 204665, "loss": 0.0, "lr": 6.399092951787451e-07, "epoch": 3.277673270954975, "percentage": 65.55, "elapsed_time": "2:54:12", "remaining_time": "1:31:32", "throughput": 8651.2, "total_tokens": 90426256} +{"current_steps": 134170, "total_steps": 204665, "loss": 0.0, "lr": 6.398297395405259e-07, "epoch": 3.2777954217868226, "percentage": 65.56, "elapsed_time": "2:54:12", "remaining_time": "1:31:32", "throughput": 8651.24, "total_tokens": 90429712} +{"current_steps": 134175, "total_steps": 204665, "loss": 0.0214, "lr": 6.39750186521562e-07, "epoch": 3.2779175726186693, "percentage": 65.56, "elapsed_time": "2:54:13", "remaining_time": "1:31:31", "throughput": 8651.27, "total_tokens": 90432912} +{"current_steps": 134180, "total_steps": 204665, "loss": 0.0, "lr": 6.396706361224313e-07, "epoch": 3.2780397234505165, "percentage": 65.56, "elapsed_time": "2:54:13", "remaining_time": "1:31:31", "throughput": 8651.33, "total_tokens": 90436624} +{"current_steps": 134185, "total_steps": 204665, "loss": 0.0, "lr": 6.395910883437132e-07, "epoch": 3.2781618742823637, "percentage": 65.56, "elapsed_time": "2:54:13", "remaining_time": "1:31:30", "throughput": 8651.36, "total_tokens": 90440016} +{"current_steps": 134190, "total_steps": 204665, "loss": 0.0, "lr": 6.395115431859851e-07, "epoch": 3.278284025114211, "percentage": 65.57, "elapsed_time": "2:54:14", "remaining_time": "1:31:30", "throughput": 8651.39, "total_tokens": 90443344} +{"current_steps": 134195, "total_steps": 204665, "loss": 0.0, "lr": 6.394320006498262e-07, "epoch": 3.278406175946058, "percentage": 65.57, "elapsed_time": "2:54:14", "remaining_time": "1:31:30", "throughput": 8651.42, "total_tokens": 90446608} +{"current_steps": 134200, "total_steps": 204665, "loss": 0.006, "lr": 6.393524607358149e-07, "epoch": 3.2785283267779053, "percentage": 65.57, "elapsed_time": "2:54:14", "remaining_time": "1:31:29", "throughput": 8651.44, "total_tokens": 90449808} +{"current_steps": 134205, "total_steps": 204665, "loss": 0.0, "lr": 6.392729234445293e-07, "epoch": 3.2786504776097525, "percentage": 65.57, "elapsed_time": "2:54:15", "remaining_time": "1:31:29", "throughput": 8651.56, "total_tokens": 90454352} +{"current_steps": 134210, "total_steps": 204665, "loss": 0.0001, "lr": 6.391933887765484e-07, "epoch": 3.2787726284415997, "percentage": 65.58, "elapsed_time": "2:54:15", "remaining_time": "1:31:28", "throughput": 8651.63, "total_tokens": 90458128} +{"current_steps": 134215, "total_steps": 204665, "loss": 0.0, "lr": 6.391138567324497e-07, "epoch": 3.278894779273447, "percentage": 65.58, "elapsed_time": "2:54:15", "remaining_time": "1:31:28", "throughput": 8651.68, "total_tokens": 90461776} +{"current_steps": 134220, "total_steps": 204665, "loss": 0.0, "lr": 6.390343273128123e-07, "epoch": 3.279016930105294, "percentage": 65.58, "elapsed_time": "2:54:16", "remaining_time": "1:31:27", "throughput": 8651.72, "total_tokens": 90465168} +{"current_steps": 134225, "total_steps": 204665, "loss": 0.0001, "lr": 6.389548005182146e-07, "epoch": 3.2791390809371412, "percentage": 65.58, "elapsed_time": "2:54:16", "remaining_time": "1:31:27", "throughput": 8651.73, "total_tokens": 90468304} +{"current_steps": 134230, "total_steps": 204665, "loss": 0.0544, "lr": 6.388752763492344e-07, "epoch": 3.2792612317689884, "percentage": 65.59, "elapsed_time": "2:54:17", "remaining_time": "1:31:27", "throughput": 8651.77, "total_tokens": 90471760} +{"current_steps": 134235, "total_steps": 204665, "loss": 0.129, "lr": 6.387957548064505e-07, "epoch": 3.2793833826008356, "percentage": 65.59, "elapsed_time": "2:54:17", "remaining_time": "1:31:26", "throughput": 8651.79, "total_tokens": 90474896} +{"current_steps": 134240, "total_steps": 204665, "loss": 0.0568, "lr": 6.387162358904408e-07, "epoch": 3.279505533432683, "percentage": 65.59, "elapsed_time": "2:54:17", "remaining_time": "1:31:26", "throughput": 8651.81, "total_tokens": 90478096} +{"current_steps": 134245, "total_steps": 204665, "loss": 0.0546, "lr": 6.386367196017842e-07, "epoch": 3.27962768426453, "percentage": 65.59, "elapsed_time": "2:54:18", "remaining_time": "1:31:25", "throughput": 8651.89, "total_tokens": 90482064} +{"current_steps": 134250, "total_steps": 204665, "loss": 0.0775, "lr": 6.385572059410583e-07, "epoch": 3.279749835096377, "percentage": 65.59, "elapsed_time": "2:54:18", "remaining_time": "1:31:25", "throughput": 8651.93, "total_tokens": 90485456} +{"current_steps": 134255, "total_steps": 204665, "loss": 0.0155, "lr": 6.384776949088416e-07, "epoch": 3.2798719859282244, "percentage": 65.6, "elapsed_time": "2:54:18", "remaining_time": "1:31:25", "throughput": 8651.94, "total_tokens": 90488464} +{"current_steps": 134260, "total_steps": 204665, "loss": 0.0, "lr": 6.383981865057125e-07, "epoch": 3.279994136760071, "percentage": 65.6, "elapsed_time": "2:54:19", "remaining_time": "1:31:24", "throughput": 8651.98, "total_tokens": 90491920} +{"current_steps": 134265, "total_steps": 204665, "loss": 0.0, "lr": 6.383186807322488e-07, "epoch": 3.2801162875919183, "percentage": 65.6, "elapsed_time": "2:54:19", "remaining_time": "1:31:24", "throughput": 8652.03, "total_tokens": 90495504} +{"current_steps": 134270, "total_steps": 204665, "loss": 0.1083, "lr": 6.382391775890293e-07, "epoch": 3.2802384384237655, "percentage": 65.6, "elapsed_time": "2:54:19", "remaining_time": "1:31:23", "throughput": 8652.06, "total_tokens": 90498896} +{"current_steps": 134275, "total_steps": 204665, "loss": 0.0001, "lr": 6.381596770766313e-07, "epoch": 3.2803605892556127, "percentage": 65.61, "elapsed_time": "2:54:20", "remaining_time": "1:31:23", "throughput": 8652.12, "total_tokens": 90502480} +{"current_steps": 134280, "total_steps": 204665, "loss": 0.0007, "lr": 6.380801791956341e-07, "epoch": 3.28048274008746, "percentage": 65.61, "elapsed_time": "2:54:20", "remaining_time": "1:31:23", "throughput": 8652.13, "total_tokens": 90505552} +{"current_steps": 134285, "total_steps": 204665, "loss": 0.0008, "lr": 6.380006839466146e-07, "epoch": 3.280604890919307, "percentage": 65.61, "elapsed_time": "2:54:20", "remaining_time": "1:31:22", "throughput": 8652.16, "total_tokens": 90509008} +{"current_steps": 134290, "total_steps": 204665, "loss": 0.0192, "lr": 6.379211913301514e-07, "epoch": 3.2807270417511543, "percentage": 65.61, "elapsed_time": "2:54:21", "remaining_time": "1:31:22", "throughput": 8652.22, "total_tokens": 90512592} +{"current_steps": 134295, "total_steps": 204665, "loss": 0.0, "lr": 6.378417013468233e-07, "epoch": 3.2808491925830015, "percentage": 65.62, "elapsed_time": "2:54:21", "remaining_time": "1:31:21", "throughput": 8652.22, "total_tokens": 90515536} +{"current_steps": 134300, "total_steps": 204665, "loss": 0.0, "lr": 6.377622139972074e-07, "epoch": 3.2809713434148486, "percentage": 65.62, "elapsed_time": "2:54:21", "remaining_time": "1:31:21", "throughput": 8652.25, "total_tokens": 90518800} +{"current_steps": 134305, "total_steps": 204665, "loss": 0.0001, "lr": 6.376827292818822e-07, "epoch": 3.281093494246696, "percentage": 65.62, "elapsed_time": "2:54:22", "remaining_time": "1:31:20", "throughput": 8652.27, "total_tokens": 90522000} +{"current_steps": 134310, "total_steps": 204665, "loss": 0.0, "lr": 6.376032472014256e-07, "epoch": 3.281215645078543, "percentage": 65.62, "elapsed_time": "2:54:22", "remaining_time": "1:31:20", "throughput": 8652.29, "total_tokens": 90525200} +{"current_steps": 134315, "total_steps": 204665, "loss": 0.029, "lr": 6.375237677564154e-07, "epoch": 3.28133779591039, "percentage": 65.63, "elapsed_time": "2:54:22", "remaining_time": "1:31:20", "throughput": 8652.31, "total_tokens": 90528272} +{"current_steps": 134320, "total_steps": 204665, "loss": 0.0, "lr": 6.374442909474304e-07, "epoch": 3.2814599467422374, "percentage": 65.63, "elapsed_time": "2:54:23", "remaining_time": "1:31:19", "throughput": 8652.31, "total_tokens": 90531216} +{"current_steps": 134325, "total_steps": 204665, "loss": 0.0001, "lr": 6.373648167750475e-07, "epoch": 3.2815820975740846, "percentage": 65.63, "elapsed_time": "2:54:23", "remaining_time": "1:31:19", "throughput": 8652.35, "total_tokens": 90534672} +{"current_steps": 134330, "total_steps": 204665, "loss": 0.0, "lr": 6.372853452398457e-07, "epoch": 3.281704248405932, "percentage": 65.63, "elapsed_time": "2:54:23", "remaining_time": "1:31:18", "throughput": 8652.38, "total_tokens": 90538000} +{"current_steps": 134335, "total_steps": 204665, "loss": 0.0002, "lr": 6.37205876342402e-07, "epoch": 3.281826399237779, "percentage": 65.64, "elapsed_time": "2:54:24", "remaining_time": "1:31:18", "throughput": 8652.42, "total_tokens": 90541392} +{"current_steps": 134340, "total_steps": 204665, "loss": 0.057, "lr": 6.371264100832951e-07, "epoch": 3.281948550069626, "percentage": 65.64, "elapsed_time": "2:54:24", "remaining_time": "1:31:18", "throughput": 8652.47, "total_tokens": 90544976} +{"current_steps": 134345, "total_steps": 204665, "loss": 0.0, "lr": 6.370469464631021e-07, "epoch": 3.282070700901473, "percentage": 65.64, "elapsed_time": "2:54:24", "remaining_time": "1:31:17", "throughput": 8652.47, "total_tokens": 90547920} +{"current_steps": 134350, "total_steps": 204665, "loss": 0.0, "lr": 6.369674854824017e-07, "epoch": 3.2821928517333205, "percentage": 65.64, "elapsed_time": "2:54:25", "remaining_time": "1:31:17", "throughput": 8652.48, "total_tokens": 90550992} +{"current_steps": 134355, "total_steps": 204665, "loss": 0.0001, "lr": 6.368880271417712e-07, "epoch": 3.2823150025651673, "percentage": 65.65, "elapsed_time": "2:54:25", "remaining_time": "1:31:16", "throughput": 8652.48, "total_tokens": 90553872} +{"current_steps": 134360, "total_steps": 204665, "loss": 0.0, "lr": 6.368085714417888e-07, "epoch": 3.2824371533970145, "percentage": 65.65, "elapsed_time": "2:54:26", "remaining_time": "1:31:16", "throughput": 8652.54, "total_tokens": 90557584} +{"current_steps": 134365, "total_steps": 204665, "loss": 0.0, "lr": 6.367291183830322e-07, "epoch": 3.2825593042288617, "percentage": 65.65, "elapsed_time": "2:54:26", "remaining_time": "1:31:16", "throughput": 8652.58, "total_tokens": 90561104} +{"current_steps": 134370, "total_steps": 204665, "loss": 0.0144, "lr": 6.366496679660789e-07, "epoch": 3.282681455060709, "percentage": 65.65, "elapsed_time": "2:54:26", "remaining_time": "1:31:15", "throughput": 8652.62, "total_tokens": 90564560} +{"current_steps": 134375, "total_steps": 204665, "loss": 0.0507, "lr": 6.365702201915069e-07, "epoch": 3.282803605892556, "percentage": 65.66, "elapsed_time": "2:54:27", "remaining_time": "1:31:15", "throughput": 8652.63, "total_tokens": 90567632} +{"current_steps": 134380, "total_steps": 204665, "loss": 0.0, "lr": 6.364907750598942e-07, "epoch": 3.2829257567244032, "percentage": 65.66, "elapsed_time": "2:54:27", "remaining_time": "1:31:14", "throughput": 8652.67, "total_tokens": 90571088} +{"current_steps": 134385, "total_steps": 204665, "loss": 0.0, "lr": 6.364113325718183e-07, "epoch": 3.2830479075562504, "percentage": 65.66, "elapsed_time": "2:54:27", "remaining_time": "1:31:14", "throughput": 8652.7, "total_tokens": 90574416} +{"current_steps": 134390, "total_steps": 204665, "loss": 0.0, "lr": 6.363318927278571e-07, "epoch": 3.2831700583880976, "percentage": 65.66, "elapsed_time": "2:54:28", "remaining_time": "1:31:13", "throughput": 8652.73, "total_tokens": 90577744} +{"current_steps": 134395, "total_steps": 204665, "loss": 0.0395, "lr": 6.36252455528588e-07, "epoch": 3.283292209219945, "percentage": 65.67, "elapsed_time": "2:54:28", "remaining_time": "1:31:13", "throughput": 8652.74, "total_tokens": 90580752} +{"current_steps": 134400, "total_steps": 204665, "loss": 0.0001, "lr": 6.361730209745891e-07, "epoch": 3.283414360051792, "percentage": 65.67, "elapsed_time": "2:54:28", "remaining_time": "1:31:13", "throughput": 8652.76, "total_tokens": 90583888} +{"current_steps": 134405, "total_steps": 204665, "loss": 0.0, "lr": 6.360935890664376e-07, "epoch": 3.283536510883639, "percentage": 65.67, "elapsed_time": "2:54:29", "remaining_time": "1:31:12", "throughput": 8652.8, "total_tokens": 90587408} +{"current_steps": 134410, "total_steps": 204665, "loss": 0.0, "lr": 6.360141598047115e-07, "epoch": 3.2836586617154864, "percentage": 65.67, "elapsed_time": "2:54:29", "remaining_time": "1:31:12", "throughput": 8652.88, "total_tokens": 90591248} +{"current_steps": 134415, "total_steps": 204665, "loss": 0.0001, "lr": 6.359347331899887e-07, "epoch": 3.2837808125473336, "percentage": 65.68, "elapsed_time": "2:54:29", "remaining_time": "1:31:11", "throughput": 8652.96, "total_tokens": 90595280} +{"current_steps": 134420, "total_steps": 204665, "loss": 0.0, "lr": 6.358553092228458e-07, "epoch": 3.2839029633791808, "percentage": 65.68, "elapsed_time": "2:54:30", "remaining_time": "1:31:11", "throughput": 8652.97, "total_tokens": 90598288} +{"current_steps": 134425, "total_steps": 204665, "loss": 0.0001, "lr": 6.357758879038617e-07, "epoch": 3.284025114211028, "percentage": 65.68, "elapsed_time": "2:54:30", "remaining_time": "1:31:11", "throughput": 8652.99, "total_tokens": 90601488} +{"current_steps": 134430, "total_steps": 204665, "loss": 0.0, "lr": 6.356964692336127e-07, "epoch": 3.284147265042875, "percentage": 65.68, "elapsed_time": "2:54:30", "remaining_time": "1:31:10", "throughput": 8653.03, "total_tokens": 90604944} +{"current_steps": 134435, "total_steps": 204665, "loss": 0.0, "lr": 6.356170532126774e-07, "epoch": 3.2842694158747223, "percentage": 65.69, "elapsed_time": "2:54:31", "remaining_time": "1:31:10", "throughput": 8653.06, "total_tokens": 90608272} +{"current_steps": 134440, "total_steps": 204665, "loss": 0.0718, "lr": 6.355376398416325e-07, "epoch": 3.284391566706569, "percentage": 65.69, "elapsed_time": "2:54:31", "remaining_time": "1:31:09", "throughput": 8653.08, "total_tokens": 90611408} +{"current_steps": 134445, "total_steps": 204665, "loss": 0.0, "lr": 6.354582291210559e-07, "epoch": 3.2845137175384163, "percentage": 65.69, "elapsed_time": "2:54:31", "remaining_time": "1:31:09", "throughput": 8653.07, "total_tokens": 90614288} +{"current_steps": 134450, "total_steps": 204665, "loss": 0.0, "lr": 6.353788210515255e-07, "epoch": 3.2846358683702634, "percentage": 65.69, "elapsed_time": "2:54:32", "remaining_time": "1:31:09", "throughput": 8653.1, "total_tokens": 90617488} +{"current_steps": 134455, "total_steps": 204665, "loss": 0.0, "lr": 6.352994156336182e-07, "epoch": 3.2847580192021106, "percentage": 65.7, "elapsed_time": "2:54:32", "remaining_time": "1:31:08", "throughput": 8653.12, "total_tokens": 90620688} +{"current_steps": 134460, "total_steps": 204665, "loss": 0.0001, "lr": 6.352200128679117e-07, "epoch": 3.284880170033958, "percentage": 65.7, "elapsed_time": "2:54:32", "remaining_time": "1:31:08", "throughput": 8653.14, "total_tokens": 90623888} +{"current_steps": 134465, "total_steps": 204665, "loss": 0.0, "lr": 6.351406127549834e-07, "epoch": 3.285002320865805, "percentage": 65.7, "elapsed_time": "2:54:33", "remaining_time": "1:31:07", "throughput": 8653.16, "total_tokens": 90627088} +{"current_steps": 134470, "total_steps": 204665, "loss": 0.0512, "lr": 6.350612152954105e-07, "epoch": 3.285124471697652, "percentage": 65.7, "elapsed_time": "2:54:33", "remaining_time": "1:31:07", "throughput": 8653.25, "total_tokens": 90631184} +{"current_steps": 134475, "total_steps": 204665, "loss": 0.0382, "lr": 6.349818204897708e-07, "epoch": 3.2852466225294994, "percentage": 65.7, "elapsed_time": "2:54:34", "remaining_time": "1:31:06", "throughput": 8653.29, "total_tokens": 90634576} +{"current_steps": 134480, "total_steps": 204665, "loss": 0.0, "lr": 6.349024283386413e-07, "epoch": 3.2853687733613466, "percentage": 65.71, "elapsed_time": "2:54:34", "remaining_time": "1:31:06", "throughput": 8653.33, "total_tokens": 90638032} +{"current_steps": 134485, "total_steps": 204665, "loss": 0.0, "lr": 6.348230388425999e-07, "epoch": 3.2854909241931938, "percentage": 65.71, "elapsed_time": "2:54:34", "remaining_time": "1:31:06", "throughput": 8653.37, "total_tokens": 90641424} +{"current_steps": 134490, "total_steps": 204665, "loss": 0.0, "lr": 6.347436520022231e-07, "epoch": 3.285613075025041, "percentage": 65.71, "elapsed_time": "2:54:35", "remaining_time": "1:31:05", "throughput": 8653.39, "total_tokens": 90644688} +{"current_steps": 134495, "total_steps": 204665, "loss": 0.0134, "lr": 6.346642678180891e-07, "epoch": 3.285735225856888, "percentage": 65.71, "elapsed_time": "2:54:35", "remaining_time": "1:31:05", "throughput": 8653.42, "total_tokens": 90648016} +{"current_steps": 134500, "total_steps": 204665, "loss": 0.0007, "lr": 6.345848862907746e-07, "epoch": 3.2858573766887353, "percentage": 65.72, "elapsed_time": "2:54:35", "remaining_time": "1:31:04", "throughput": 8653.46, "total_tokens": 90651344} +{"current_steps": 134505, "total_steps": 204665, "loss": 0.0, "lr": 6.34505507420857e-07, "epoch": 3.2859795275205825, "percentage": 65.72, "elapsed_time": "2:54:36", "remaining_time": "1:31:04", "throughput": 8653.5, "total_tokens": 90654864} +{"current_steps": 134510, "total_steps": 204665, "loss": 0.0, "lr": 6.344261312089138e-07, "epoch": 3.2861016783524297, "percentage": 65.72, "elapsed_time": "2:54:36", "remaining_time": "1:31:04", "throughput": 8653.56, "total_tokens": 90658576} +{"current_steps": 134515, "total_steps": 204665, "loss": 0.0002, "lr": 6.343467576555222e-07, "epoch": 3.286223829184277, "percentage": 65.72, "elapsed_time": "2:54:36", "remaining_time": "1:31:03", "throughput": 8653.58, "total_tokens": 90661648} +{"current_steps": 134520, "total_steps": 204665, "loss": 0.0001, "lr": 6.342673867612594e-07, "epoch": 3.286345980016124, "percentage": 65.73, "elapsed_time": "2:54:37", "remaining_time": "1:31:03", "throughput": 8653.64, "total_tokens": 90665424} +{"current_steps": 134525, "total_steps": 204665, "loss": 0.0001, "lr": 6.341880185267021e-07, "epoch": 3.286468130847971, "percentage": 65.73, "elapsed_time": "2:54:37", "remaining_time": "1:31:02", "throughput": 8653.66, "total_tokens": 90668624} +{"current_steps": 134530, "total_steps": 204665, "loss": 0.0, "lr": 6.34108652952428e-07, "epoch": 3.2865902816798185, "percentage": 65.73, "elapsed_time": "2:54:37", "remaining_time": "1:31:02", "throughput": 8653.69, "total_tokens": 90671952} +{"current_steps": 134535, "total_steps": 204665, "loss": 0.0, "lr": 6.340292900390146e-07, "epoch": 3.2867124325116652, "percentage": 65.73, "elapsed_time": "2:54:38", "remaining_time": "1:31:02", "throughput": 8653.75, "total_tokens": 90675536} +{"current_steps": 134540, "total_steps": 204665, "loss": 0.0, "lr": 6.339499297870382e-07, "epoch": 3.2868345833435124, "percentage": 65.74, "elapsed_time": "2:54:38", "remaining_time": "1:31:01", "throughput": 8653.74, "total_tokens": 90678416} +{"current_steps": 134545, "total_steps": 204665, "loss": 0.0, "lr": 6.338705721970768e-07, "epoch": 3.2869567341753596, "percentage": 65.74, "elapsed_time": "2:54:38", "remaining_time": "1:31:01", "throughput": 8653.76, "total_tokens": 90681616} +{"current_steps": 134550, "total_steps": 204665, "loss": 0.0, "lr": 6.337912172697066e-07, "epoch": 3.287078885007207, "percentage": 65.74, "elapsed_time": "2:54:39", "remaining_time": "1:31:00", "throughput": 8653.8, "total_tokens": 90685072} +{"current_steps": 134555, "total_steps": 204665, "loss": 0.0003, "lr": 6.337118650055056e-07, "epoch": 3.287201035839054, "percentage": 65.74, "elapsed_time": "2:54:39", "remaining_time": "1:31:00", "throughput": 8653.81, "total_tokens": 90688080} +{"current_steps": 134560, "total_steps": 204665, "loss": 0.0002, "lr": 6.336325154050502e-07, "epoch": 3.287323186670901, "percentage": 65.75, "elapsed_time": "2:54:39", "remaining_time": "1:30:59", "throughput": 8653.83, "total_tokens": 90691280} +{"current_steps": 134565, "total_steps": 204665, "loss": 0.0, "lr": 6.335531684689177e-07, "epoch": 3.2874453375027484, "percentage": 65.75, "elapsed_time": "2:54:40", "remaining_time": "1:30:59", "throughput": 8653.89, "total_tokens": 90694928} +{"current_steps": 134570, "total_steps": 204665, "loss": 0.0319, "lr": 6.334738241976854e-07, "epoch": 3.2875674883345956, "percentage": 65.75, "elapsed_time": "2:54:40", "remaining_time": "1:30:59", "throughput": 8653.93, "total_tokens": 90698448} +{"current_steps": 134575, "total_steps": 204665, "loss": 0.0001, "lr": 6.333944825919295e-07, "epoch": 3.2876896391664427, "percentage": 65.75, "elapsed_time": "2:54:40", "remaining_time": "1:30:58", "throughput": 8653.96, "total_tokens": 90701648} +{"current_steps": 134580, "total_steps": 204665, "loss": 0.0, "lr": 6.333151436522282e-07, "epoch": 3.28781178999829, "percentage": 65.76, "elapsed_time": "2:54:41", "remaining_time": "1:30:58", "throughput": 8654.02, "total_tokens": 90705424} +{"current_steps": 134585, "total_steps": 204665, "loss": 0.0, "lr": 6.33235807379157e-07, "epoch": 3.287933940830137, "percentage": 65.76, "elapsed_time": "2:54:41", "remaining_time": "1:30:57", "throughput": 8654.07, "total_tokens": 90708880} +{"current_steps": 134590, "total_steps": 204665, "loss": 0.1364, "lr": 6.331564737732944e-07, "epoch": 3.2880560916619843, "percentage": 65.76, "elapsed_time": "2:54:41", "remaining_time": "1:30:57", "throughput": 8654.08, "total_tokens": 90711888} +{"current_steps": 134595, "total_steps": 204665, "loss": 0.0446, "lr": 6.330771428352161e-07, "epoch": 3.2881782424938315, "percentage": 65.76, "elapsed_time": "2:54:42", "remaining_time": "1:30:57", "throughput": 8654.07, "total_tokens": 90714768} +{"current_steps": 134600, "total_steps": 204665, "loss": 0.0, "lr": 6.329978145654994e-07, "epoch": 3.2883003933256787, "percentage": 65.77, "elapsed_time": "2:54:42", "remaining_time": "1:30:56", "throughput": 8654.09, "total_tokens": 90717904} +{"current_steps": 134605, "total_steps": 204665, "loss": 0.0, "lr": 6.329184889647219e-07, "epoch": 3.288422544157526, "percentage": 65.77, "elapsed_time": "2:54:43", "remaining_time": "1:30:56", "throughput": 8654.18, "total_tokens": 90722000} +{"current_steps": 134610, "total_steps": 204665, "loss": 0.0001, "lr": 6.328391660334596e-07, "epoch": 3.2885446949893726, "percentage": 65.77, "elapsed_time": "2:54:43", "remaining_time": "1:30:55", "throughput": 8654.19, "total_tokens": 90725008} +{"current_steps": 134615, "total_steps": 204665, "loss": 0.0378, "lr": 6.327598457722896e-07, "epoch": 3.2886668458212203, "percentage": 65.77, "elapsed_time": "2:54:43", "remaining_time": "1:30:55", "throughput": 8654.22, "total_tokens": 90728336} +{"current_steps": 134620, "total_steps": 204665, "loss": 0.0, "lr": 6.326805281817887e-07, "epoch": 3.288788996653067, "percentage": 65.78, "elapsed_time": "2:54:44", "remaining_time": "1:30:55", "throughput": 8654.27, "total_tokens": 90731792} +{"current_steps": 134625, "total_steps": 204665, "loss": 0.0, "lr": 6.326012132625338e-07, "epoch": 3.288911147484914, "percentage": 65.78, "elapsed_time": "2:54:44", "remaining_time": "1:30:54", "throughput": 8654.33, "total_tokens": 90735504} +{"current_steps": 134630, "total_steps": 204665, "loss": 0.0, "lr": 6.32521901015102e-07, "epoch": 3.2890332983167614, "percentage": 65.78, "elapsed_time": "2:54:44", "remaining_time": "1:30:54", "throughput": 8654.37, "total_tokens": 90738960} +{"current_steps": 134635, "total_steps": 204665, "loss": 0.0, "lr": 6.324425914400693e-07, "epoch": 3.2891554491486086, "percentage": 65.78, "elapsed_time": "2:54:45", "remaining_time": "1:30:53", "throughput": 8654.37, "total_tokens": 90741840} +{"current_steps": 134640, "total_steps": 204665, "loss": 0.0028, "lr": 6.323632845380134e-07, "epoch": 3.2892775999804558, "percentage": 65.79, "elapsed_time": "2:54:45", "remaining_time": "1:30:53", "throughput": 8654.39, "total_tokens": 90745104} +{"current_steps": 134645, "total_steps": 204665, "loss": 0.0, "lr": 6.322839803095102e-07, "epoch": 3.289399750812303, "percentage": 65.79, "elapsed_time": "2:54:45", "remaining_time": "1:30:52", "throughput": 8654.43, "total_tokens": 90748432} +{"current_steps": 134650, "total_steps": 204665, "loss": 0.0, "lr": 6.322046787551372e-07, "epoch": 3.28952190164415, "percentage": 65.79, "elapsed_time": "2:54:46", "remaining_time": "1:30:52", "throughput": 8654.43, "total_tokens": 90751440} +{"current_steps": 134655, "total_steps": 204665, "loss": 0.0, "lr": 6.321253798754702e-07, "epoch": 3.2896440524759973, "percentage": 65.79, "elapsed_time": "2:54:46", "remaining_time": "1:30:52", "throughput": 8654.48, "total_tokens": 90755024} +{"current_steps": 134660, "total_steps": 204665, "loss": 0.0002, "lr": 6.320460836710866e-07, "epoch": 3.2897662033078445, "percentage": 65.8, "elapsed_time": "2:54:46", "remaining_time": "1:30:51", "throughput": 8654.53, "total_tokens": 90758544} +{"current_steps": 134665, "total_steps": 204665, "loss": 0.0, "lr": 6.319667901425629e-07, "epoch": 3.2898883541396917, "percentage": 65.8, "elapsed_time": "2:54:47", "remaining_time": "1:30:51", "throughput": 8654.56, "total_tokens": 90761936} +{"current_steps": 134670, "total_steps": 204665, "loss": 0.0, "lr": 6.318874992904757e-07, "epoch": 3.290010504971539, "percentage": 65.8, "elapsed_time": "2:54:47", "remaining_time": "1:30:50", "throughput": 8654.58, "total_tokens": 90765008} +{"current_steps": 134675, "total_steps": 204665, "loss": 0.0, "lr": 6.318082111154018e-07, "epoch": 3.290132655803386, "percentage": 65.8, "elapsed_time": "2:54:47", "remaining_time": "1:30:50", "throughput": 8654.61, "total_tokens": 90768336} +{"current_steps": 134680, "total_steps": 204665, "loss": 0.0, "lr": 6.317289256179172e-07, "epoch": 3.2902548066352333, "percentage": 65.81, "elapsed_time": "2:54:48", "remaining_time": "1:30:50", "throughput": 8654.63, "total_tokens": 90771536} +{"current_steps": 134685, "total_steps": 204665, "loss": 0.0004, "lr": 6.316496427985995e-07, "epoch": 3.2903769574670805, "percentage": 65.81, "elapsed_time": "2:54:48", "remaining_time": "1:30:49", "throughput": 8654.67, "total_tokens": 90774928} +{"current_steps": 134690, "total_steps": 204665, "loss": 0.0083, "lr": 6.31570362658024e-07, "epoch": 3.2904991082989277, "percentage": 65.81, "elapsed_time": "2:54:48", "remaining_time": "1:30:49", "throughput": 8654.7, "total_tokens": 90778320} +{"current_steps": 134695, "total_steps": 204665, "loss": 0.0, "lr": 6.31491085196768e-07, "epoch": 3.290621259130775, "percentage": 65.81, "elapsed_time": "2:54:49", "remaining_time": "1:30:48", "throughput": 8654.72, "total_tokens": 90781520} +{"current_steps": 134700, "total_steps": 204665, "loss": 0.0, "lr": 6.314118104154084e-07, "epoch": 3.290743409962622, "percentage": 65.81, "elapsed_time": "2:54:49", "remaining_time": "1:30:48", "throughput": 8654.75, "total_tokens": 90784784} +{"current_steps": 134705, "total_steps": 204665, "loss": 0.0001, "lr": 6.313325383145208e-07, "epoch": 3.290865560794469, "percentage": 65.82, "elapsed_time": "2:54:49", "remaining_time": "1:30:48", "throughput": 8654.75, "total_tokens": 90787664} +{"current_steps": 134710, "total_steps": 204665, "loss": 0.0, "lr": 6.312532688946826e-07, "epoch": 3.290987711626316, "percentage": 65.82, "elapsed_time": "2:54:50", "remaining_time": "1:30:47", "throughput": 8654.76, "total_tokens": 90790800} +{"current_steps": 134715, "total_steps": 204665, "loss": 0.0, "lr": 6.311740021564693e-07, "epoch": 3.291109862458163, "percentage": 65.82, "elapsed_time": "2:54:50", "remaining_time": "1:30:47", "throughput": 8654.78, "total_tokens": 90793872} +{"current_steps": 134720, "total_steps": 204665, "loss": 0.0001, "lr": 6.310947381004582e-07, "epoch": 3.2912320132900104, "percentage": 65.82, "elapsed_time": "2:54:50", "remaining_time": "1:30:46", "throughput": 8654.8, "total_tokens": 90797136} +{"current_steps": 134725, "total_steps": 204665, "loss": 0.0, "lr": 6.310154767272255e-07, "epoch": 3.2913541641218576, "percentage": 65.83, "elapsed_time": "2:54:51", "remaining_time": "1:30:46", "throughput": 8654.84, "total_tokens": 90800528} +{"current_steps": 134730, "total_steps": 204665, "loss": 0.0609, "lr": 6.309362180373472e-07, "epoch": 3.2914763149537047, "percentage": 65.83, "elapsed_time": "2:54:51", "remaining_time": "1:30:45", "throughput": 8655.09, "total_tokens": 90806800} +{"current_steps": 134735, "total_steps": 204665, "loss": 0.0, "lr": 6.308569620314003e-07, "epoch": 3.291598465785552, "percentage": 65.83, "elapsed_time": "2:54:52", "remaining_time": "1:30:45", "throughput": 8655.12, "total_tokens": 90810128} +{"current_steps": 134740, "total_steps": 204665, "loss": 0.0, "lr": 6.307777087099603e-07, "epoch": 3.291720616617399, "percentage": 65.83, "elapsed_time": "2:54:52", "remaining_time": "1:30:45", "throughput": 8655.12, "total_tokens": 90813008} +{"current_steps": 134745, "total_steps": 204665, "loss": 0.0299, "lr": 6.306984580736048e-07, "epoch": 3.2918427674492463, "percentage": 65.84, "elapsed_time": "2:54:52", "remaining_time": "1:30:44", "throughput": 8655.14, "total_tokens": 90816208} +{"current_steps": 134750, "total_steps": 204665, "loss": 0.0011, "lr": 6.306192101229089e-07, "epoch": 3.2919649182810935, "percentage": 65.84, "elapsed_time": "2:54:53", "remaining_time": "1:30:44", "throughput": 8655.18, "total_tokens": 90819664} +{"current_steps": 134755, "total_steps": 204665, "loss": 0.0, "lr": 6.305399648584495e-07, "epoch": 3.2920870691129407, "percentage": 65.84, "elapsed_time": "2:54:53", "remaining_time": "1:30:43", "throughput": 8655.2, "total_tokens": 90822864} +{"current_steps": 134760, "total_steps": 204665, "loss": 0.0, "lr": 6.304607222808032e-07, "epoch": 3.292209219944788, "percentage": 65.84, "elapsed_time": "2:54:53", "remaining_time": "1:30:43", "throughput": 8655.22, "total_tokens": 90826064} +{"current_steps": 134765, "total_steps": 204665, "loss": 0.0, "lr": 6.303814823905458e-07, "epoch": 3.292331370776635, "percentage": 65.85, "elapsed_time": "2:54:54", "remaining_time": "1:30:43", "throughput": 8655.23, "total_tokens": 90829072} +{"current_steps": 134770, "total_steps": 204665, "loss": 0.0679, "lr": 6.303022451882536e-07, "epoch": 3.2924535216084823, "percentage": 65.85, "elapsed_time": "2:54:54", "remaining_time": "1:30:42", "throughput": 8655.27, "total_tokens": 90832592} +{"current_steps": 134775, "total_steps": 204665, "loss": 0.0, "lr": 6.30223010674503e-07, "epoch": 3.2925756724403294, "percentage": 65.85, "elapsed_time": "2:54:54", "remaining_time": "1:30:42", "throughput": 8655.31, "total_tokens": 90836048} +{"current_steps": 134780, "total_steps": 204665, "loss": 0.0, "lr": 6.301437788498698e-07, "epoch": 3.2926978232721766, "percentage": 65.85, "elapsed_time": "2:54:55", "remaining_time": "1:30:41", "throughput": 8655.34, "total_tokens": 90839312} +{"current_steps": 134785, "total_steps": 204665, "loss": 0.0002, "lr": 6.30064549714931e-07, "epoch": 3.292819974104024, "percentage": 65.86, "elapsed_time": "2:54:55", "remaining_time": "1:30:41", "throughput": 8655.37, "total_tokens": 90842576} +{"current_steps": 134790, "total_steps": 204665, "loss": 0.0, "lr": 6.299853232702619e-07, "epoch": 3.2929421249358706, "percentage": 65.86, "elapsed_time": "2:54:55", "remaining_time": "1:30:41", "throughput": 8655.38, "total_tokens": 90845648} +{"current_steps": 134795, "total_steps": 204665, "loss": 0.056, "lr": 6.299060995164394e-07, "epoch": 3.293064275767718, "percentage": 65.86, "elapsed_time": "2:54:56", "remaining_time": "1:30:40", "throughput": 8655.4, "total_tokens": 90848784} +{"current_steps": 134800, "total_steps": 204665, "loss": 0.0, "lr": 6.298268784540389e-07, "epoch": 3.293186426599565, "percentage": 65.86, "elapsed_time": "2:54:56", "remaining_time": "1:30:40", "throughput": 8655.44, "total_tokens": 90852304} +{"current_steps": 134805, "total_steps": 204665, "loss": 0.0, "lr": 6.297476600836374e-07, "epoch": 3.293308577431412, "percentage": 65.87, "elapsed_time": "2:54:56", "remaining_time": "1:30:39", "throughput": 8655.45, "total_tokens": 90855312} +{"current_steps": 134810, "total_steps": 204665, "loss": 0.0003, "lr": 6.2966844440581e-07, "epoch": 3.2934307282632593, "percentage": 65.87, "elapsed_time": "2:54:57", "remaining_time": "1:30:39", "throughput": 8655.49, "total_tokens": 90858768} +{"current_steps": 134815, "total_steps": 204665, "loss": 0.0002, "lr": 6.295892314211334e-07, "epoch": 3.2935528790951065, "percentage": 65.87, "elapsed_time": "2:54:57", "remaining_time": "1:30:38", "throughput": 8655.52, "total_tokens": 90862032} +{"current_steps": 134820, "total_steps": 204665, "loss": 0.0001, "lr": 6.295100211301836e-07, "epoch": 3.2936750299269537, "percentage": 65.87, "elapsed_time": "2:54:57", "remaining_time": "1:30:38", "throughput": 8655.56, "total_tokens": 90865552} +{"current_steps": 134825, "total_steps": 204665, "loss": 0.0, "lr": 6.294308135335367e-07, "epoch": 3.293797180758801, "percentage": 65.88, "elapsed_time": "2:54:58", "remaining_time": "1:30:38", "throughput": 8655.58, "total_tokens": 90868688} +{"current_steps": 134830, "total_steps": 204665, "loss": 0.0, "lr": 6.293516086317687e-07, "epoch": 3.293919331590648, "percentage": 65.88, "elapsed_time": "2:54:58", "remaining_time": "1:30:37", "throughput": 8655.61, "total_tokens": 90872016} +{"current_steps": 134835, "total_steps": 204665, "loss": 0.0, "lr": 6.292724064254551e-07, "epoch": 3.2940414824224953, "percentage": 65.88, "elapsed_time": "2:54:58", "remaining_time": "1:30:37", "throughput": 8655.63, "total_tokens": 90875216} +{"current_steps": 134840, "total_steps": 204665, "loss": 0.0001, "lr": 6.291932069151726e-07, "epoch": 3.2941636332543425, "percentage": 65.88, "elapsed_time": "2:54:59", "remaining_time": "1:30:36", "throughput": 8655.65, "total_tokens": 90878288} +{"current_steps": 134845, "total_steps": 204665, "loss": 0.0, "lr": 6.291140101014966e-07, "epoch": 3.2942857840861897, "percentage": 65.89, "elapsed_time": "2:54:59", "remaining_time": "1:30:36", "throughput": 8655.68, "total_tokens": 90881616} +{"current_steps": 134850, "total_steps": 204665, "loss": 0.0536, "lr": 6.290348159850032e-07, "epoch": 3.294407934918037, "percentage": 65.89, "elapsed_time": "2:54:59", "remaining_time": "1:30:36", "throughput": 8655.69, "total_tokens": 90884688} +{"current_steps": 134855, "total_steps": 204665, "loss": 0.0, "lr": 6.289556245662687e-07, "epoch": 3.294530085749884, "percentage": 65.89, "elapsed_time": "2:55:00", "remaining_time": "1:30:35", "throughput": 8655.77, "total_tokens": 90888720} +{"current_steps": 134860, "total_steps": 204665, "loss": 0.0001, "lr": 6.288764358458685e-07, "epoch": 3.2946522365817312, "percentage": 65.89, "elapsed_time": "2:55:00", "remaining_time": "1:30:35", "throughput": 8655.78, "total_tokens": 90891728} +{"current_steps": 134865, "total_steps": 204665, "loss": 0.0, "lr": 6.287972498243788e-07, "epoch": 3.2947743874135784, "percentage": 65.9, "elapsed_time": "2:55:01", "remaining_time": "1:30:34", "throughput": 8655.81, "total_tokens": 90895056} +{"current_steps": 134870, "total_steps": 204665, "loss": 0.0, "lr": 6.287180665023751e-07, "epoch": 3.2948965382454256, "percentage": 65.9, "elapsed_time": "2:55:01", "remaining_time": "1:30:34", "throughput": 8655.84, "total_tokens": 90898320} +{"current_steps": 134875, "total_steps": 204665, "loss": 0.0, "lr": 6.286388858804337e-07, "epoch": 3.295018689077273, "percentage": 65.9, "elapsed_time": "2:55:01", "remaining_time": "1:30:34", "throughput": 8655.88, "total_tokens": 90901840} +{"current_steps": 134880, "total_steps": 204665, "loss": 0.0001, "lr": 6.285597079591305e-07, "epoch": 3.29514083990912, "percentage": 65.9, "elapsed_time": "2:55:02", "remaining_time": "1:30:33", "throughput": 8655.91, "total_tokens": 90905104} +{"current_steps": 134885, "total_steps": 204665, "loss": 0.0, "lr": 6.284805327390404e-07, "epoch": 3.2952629907409667, "percentage": 65.91, "elapsed_time": "2:55:02", "remaining_time": "1:30:33", "throughput": 8655.94, "total_tokens": 90908496} +{"current_steps": 134890, "total_steps": 204665, "loss": 0.0, "lr": 6.284013602207403e-07, "epoch": 3.295385141572814, "percentage": 65.91, "elapsed_time": "2:55:02", "remaining_time": "1:30:32", "throughput": 8655.97, "total_tokens": 90911760} +{"current_steps": 134895, "total_steps": 204665, "loss": 0.0, "lr": 6.283221904048051e-07, "epoch": 3.295507292404661, "percentage": 65.91, "elapsed_time": "2:55:03", "remaining_time": "1:30:32", "throughput": 8655.99, "total_tokens": 90915024} +{"current_steps": 134900, "total_steps": 204665, "loss": 0.0773, "lr": 6.282430232918112e-07, "epoch": 3.2956294432365083, "percentage": 65.91, "elapsed_time": "2:55:03", "remaining_time": "1:30:31", "throughput": 8656.02, "total_tokens": 90918288} +{"current_steps": 134905, "total_steps": 204665, "loss": 0.0365, "lr": 6.281638588823337e-07, "epoch": 3.2957515940683555, "percentage": 65.92, "elapsed_time": "2:55:03", "remaining_time": "1:30:31", "throughput": 8656.03, "total_tokens": 90921360} +{"current_steps": 134910, "total_steps": 204665, "loss": 0.0978, "lr": 6.280846971769486e-07, "epoch": 3.2958737449002027, "percentage": 65.92, "elapsed_time": "2:55:04", "remaining_time": "1:30:31", "throughput": 8656.05, "total_tokens": 90924432} +{"current_steps": 134915, "total_steps": 204665, "loss": 0.0001, "lr": 6.280055381762319e-07, "epoch": 3.29599589573205, "percentage": 65.92, "elapsed_time": "2:55:04", "remaining_time": "1:30:30", "throughput": 8656.11, "total_tokens": 90928144} +{"current_steps": 134920, "total_steps": 204665, "loss": 0.0002, "lr": 6.27926381880759e-07, "epoch": 3.296118046563897, "percentage": 65.92, "elapsed_time": "2:55:04", "remaining_time": "1:30:30", "throughput": 8656.15, "total_tokens": 90931600} +{"current_steps": 134925, "total_steps": 204665, "loss": 0.0003, "lr": 6.278472282911054e-07, "epoch": 3.2962401973957443, "percentage": 65.92, "elapsed_time": "2:55:05", "remaining_time": "1:30:29", "throughput": 8656.16, "total_tokens": 90934672} +{"current_steps": 134930, "total_steps": 204665, "loss": 0.0, "lr": 6.277680774078469e-07, "epoch": 3.2963623482275914, "percentage": 65.93, "elapsed_time": "2:55:05", "remaining_time": "1:30:29", "throughput": 8656.21, "total_tokens": 90938256} +{"current_steps": 134935, "total_steps": 204665, "loss": 0.0643, "lr": 6.276889292315588e-07, "epoch": 3.2964844990594386, "percentage": 65.93, "elapsed_time": "2:55:05", "remaining_time": "1:30:29", "throughput": 8656.23, "total_tokens": 90941456} +{"current_steps": 134940, "total_steps": 204665, "loss": 0.0, "lr": 6.276097837628174e-07, "epoch": 3.296606649891286, "percentage": 65.93, "elapsed_time": "2:55:06", "remaining_time": "1:30:28", "throughput": 8656.25, "total_tokens": 90944656} +{"current_steps": 134945, "total_steps": 204665, "loss": 0.0565, "lr": 6.275306410021974e-07, "epoch": 3.296728800723133, "percentage": 65.93, "elapsed_time": "2:55:06", "remaining_time": "1:30:28", "throughput": 8656.27, "total_tokens": 90947856} +{"current_steps": 134950, "total_steps": 204665, "loss": 0.0555, "lr": 6.274515009502751e-07, "epoch": 3.29685095155498, "percentage": 65.94, "elapsed_time": "2:55:06", "remaining_time": "1:30:27", "throughput": 8656.32, "total_tokens": 90951376} +{"current_steps": 134955, "total_steps": 204665, "loss": 0.0, "lr": 6.273723636076254e-07, "epoch": 3.2969731023868274, "percentage": 65.94, "elapsed_time": "2:55:07", "remaining_time": "1:30:27", "throughput": 8656.36, "total_tokens": 90954832} +{"current_steps": 134960, "total_steps": 204665, "loss": 0.0005, "lr": 6.272932289748244e-07, "epoch": 3.2970952532186746, "percentage": 65.94, "elapsed_time": "2:55:07", "remaining_time": "1:30:27", "throughput": 8656.39, "total_tokens": 90958224} +{"current_steps": 134965, "total_steps": 204665, "loss": 0.0, "lr": 6.272140970524469e-07, "epoch": 3.2972174040505218, "percentage": 65.94, "elapsed_time": "2:55:07", "remaining_time": "1:30:26", "throughput": 8656.42, "total_tokens": 90961552} +{"current_steps": 134970, "total_steps": 204665, "loss": 0.0001, "lr": 6.27134967841069e-07, "epoch": 3.2973395548823685, "percentage": 65.95, "elapsed_time": "2:55:08", "remaining_time": "1:30:26", "throughput": 8656.45, "total_tokens": 90964880} +{"current_steps": 134975, "total_steps": 204665, "loss": 0.0, "lr": 6.270558413412659e-07, "epoch": 3.297461705714216, "percentage": 65.95, "elapsed_time": "2:55:08", "remaining_time": "1:30:25", "throughput": 8656.48, "total_tokens": 90968208} +{"current_steps": 134980, "total_steps": 204665, "loss": 0.0001, "lr": 6.26976717553613e-07, "epoch": 3.297583856546063, "percentage": 65.95, "elapsed_time": "2:55:09", "remaining_time": "1:30:25", "throughput": 8656.51, "total_tokens": 90971536} +{"current_steps": 134985, "total_steps": 204665, "loss": 0.0489, "lr": 6.26897596478686e-07, "epoch": 3.29770600737791, "percentage": 65.95, "elapsed_time": "2:55:09", "remaining_time": "1:30:24", "throughput": 8656.53, "total_tokens": 90974672} +{"current_steps": 134990, "total_steps": 204665, "loss": 0.0003, "lr": 6.268184781170596e-07, "epoch": 3.2978281582097573, "percentage": 65.96, "elapsed_time": "2:55:09", "remaining_time": "1:30:24", "throughput": 8656.59, "total_tokens": 90978320} +{"current_steps": 134995, "total_steps": 204665, "loss": 0.0, "lr": 6.2673936246931e-07, "epoch": 3.2979503090416045, "percentage": 65.96, "elapsed_time": "2:55:10", "remaining_time": "1:30:24", "throughput": 8656.65, "total_tokens": 90981968} +{"current_steps": 135000, "total_steps": 204665, "loss": 0.0, "lr": 6.266602495360116e-07, "epoch": 3.2980724598734517, "percentage": 65.96, "elapsed_time": "2:55:10", "remaining_time": "1:30:23", "throughput": 8656.66, "total_tokens": 90985040} +{"current_steps": 135005, "total_steps": 204665, "loss": 0.0, "lr": 6.265811393177405e-07, "epoch": 3.298194610705299, "percentage": 65.96, "elapsed_time": "2:55:10", "remaining_time": "1:30:23", "throughput": 8656.67, "total_tokens": 90988240} +{"current_steps": 135010, "total_steps": 204665, "loss": 0.0, "lr": 6.265020318150721e-07, "epoch": 3.298316761537146, "percentage": 65.97, "elapsed_time": "2:55:11", "remaining_time": "1:30:22", "throughput": 8656.72, "total_tokens": 90991696} +{"current_steps": 135015, "total_steps": 204665, "loss": 0.0, "lr": 6.26422927028581e-07, "epoch": 3.2984389123689932, "percentage": 65.97, "elapsed_time": "2:55:11", "remaining_time": "1:30:22", "throughput": 8656.75, "total_tokens": 90995024} +{"current_steps": 135020, "total_steps": 204665, "loss": 0.0, "lr": 6.263438249588433e-07, "epoch": 3.2985610632008404, "percentage": 65.97, "elapsed_time": "2:55:11", "remaining_time": "1:30:22", "throughput": 8656.8, "total_tokens": 90998672} +{"current_steps": 135025, "total_steps": 204665, "loss": 0.0, "lr": 6.262647256064333e-07, "epoch": 3.2986832140326876, "percentage": 65.97, "elapsed_time": "2:55:12", "remaining_time": "1:30:21", "throughput": 8656.86, "total_tokens": 91002384} +{"current_steps": 135030, "total_steps": 204665, "loss": 0.0, "lr": 6.26185628971927e-07, "epoch": 3.298805364864535, "percentage": 65.98, "elapsed_time": "2:55:12", "remaining_time": "1:30:21", "throughput": 8656.89, "total_tokens": 91005648} +{"current_steps": 135035, "total_steps": 204665, "loss": 0.0, "lr": 6.261065350558996e-07, "epoch": 3.298927515696382, "percentage": 65.98, "elapsed_time": "2:55:12", "remaining_time": "1:30:20", "throughput": 8656.92, "total_tokens": 91009040} +{"current_steps": 135040, "total_steps": 204665, "loss": 0.0, "lr": 6.260274438589254e-07, "epoch": 3.299049666528229, "percentage": 65.98, "elapsed_time": "2:55:13", "remaining_time": "1:30:20", "throughput": 8656.98, "total_tokens": 91012688} +{"current_steps": 135045, "total_steps": 204665, "loss": 0.0, "lr": 6.25948355381581e-07, "epoch": 3.2991718173600764, "percentage": 65.98, "elapsed_time": "2:55:13", "remaining_time": "1:30:20", "throughput": 8657.03, "total_tokens": 91016272} +{"current_steps": 135050, "total_steps": 204665, "loss": 0.0, "lr": 6.258692696244401e-07, "epoch": 3.2992939681919236, "percentage": 65.99, "elapsed_time": "2:55:13", "remaining_time": "1:30:19", "throughput": 8657.08, "total_tokens": 91019792} +{"current_steps": 135055, "total_steps": 204665, "loss": 0.0, "lr": 6.257901865880791e-07, "epoch": 3.2994161190237707, "percentage": 65.99, "elapsed_time": "2:55:14", "remaining_time": "1:30:19", "throughput": 8657.1, "total_tokens": 91022992} +{"current_steps": 135060, "total_steps": 204665, "loss": 0.0, "lr": 6.257111062730718e-07, "epoch": 3.299538269855618, "percentage": 65.99, "elapsed_time": "2:55:14", "remaining_time": "1:30:18", "throughput": 8657.12, "total_tokens": 91026192} +{"current_steps": 135065, "total_steps": 204665, "loss": 0.0685, "lr": 6.256320286799944e-07, "epoch": 3.2996604206874647, "percentage": 65.99, "elapsed_time": "2:55:14", "remaining_time": "1:30:18", "throughput": 8657.13, "total_tokens": 91029200} +{"current_steps": 135070, "total_steps": 204665, "loss": 0.0, "lr": 6.255529538094216e-07, "epoch": 3.299782571519312, "percentage": 66.0, "elapsed_time": "2:55:15", "remaining_time": "1:30:18", "throughput": 8657.14, "total_tokens": 91032272} +{"current_steps": 135075, "total_steps": 204665, "loss": 0.0, "lr": 6.254738816619285e-07, "epoch": 3.299904722351159, "percentage": 66.0, "elapsed_time": "2:55:15", "remaining_time": "1:30:17", "throughput": 8657.17, "total_tokens": 91035664} +{"current_steps": 135080, "total_steps": 204665, "loss": 0.0, "lr": 6.253948122380898e-07, "epoch": 3.3000268731830062, "percentage": 66.0, "elapsed_time": "2:55:15", "remaining_time": "1:30:17", "throughput": 8657.22, "total_tokens": 91039248} +{"current_steps": 135085, "total_steps": 204665, "loss": 0.0, "lr": 6.25315745538481e-07, "epoch": 3.3001490240148534, "percentage": 66.0, "elapsed_time": "2:55:16", "remaining_time": "1:30:16", "throughput": 8657.3, "total_tokens": 91043216} +{"current_steps": 135090, "total_steps": 204665, "loss": 0.0, "lr": 6.252366815636767e-07, "epoch": 3.3002711748467006, "percentage": 66.01, "elapsed_time": "2:55:16", "remaining_time": "1:30:16", "throughput": 8657.37, "total_tokens": 91046992} +{"current_steps": 135095, "total_steps": 204665, "loss": 0.0, "lr": 6.251576203142524e-07, "epoch": 3.300393325678548, "percentage": 66.01, "elapsed_time": "2:55:17", "remaining_time": "1:30:15", "throughput": 8657.39, "total_tokens": 91050192} +{"current_steps": 135100, "total_steps": 204665, "loss": 0.0, "lr": 6.250785617907822e-07, "epoch": 3.300515476510395, "percentage": 66.01, "elapsed_time": "2:55:17", "remaining_time": "1:30:15", "throughput": 8657.42, "total_tokens": 91053456} +{"current_steps": 135105, "total_steps": 204665, "loss": 0.0001, "lr": 6.249995059938421e-07, "epoch": 3.300637627342242, "percentage": 66.01, "elapsed_time": "2:55:17", "remaining_time": "1:30:15", "throughput": 8657.45, "total_tokens": 91056784} +{"current_steps": 135110, "total_steps": 204665, "loss": 0.0001, "lr": 6.24920452924006e-07, "epoch": 3.3007597781740894, "percentage": 66.02, "elapsed_time": "2:55:18", "remaining_time": "1:30:14", "throughput": 8657.49, "total_tokens": 91060240} +{"current_steps": 135115, "total_steps": 204665, "loss": 0.096, "lr": 6.248414025818496e-07, "epoch": 3.3008819290059366, "percentage": 66.02, "elapsed_time": "2:55:18", "remaining_time": "1:30:14", "throughput": 8657.52, "total_tokens": 91063504} +{"current_steps": 135120, "total_steps": 204665, "loss": 0.0, "lr": 6.247623549679471e-07, "epoch": 3.3010040798377838, "percentage": 66.02, "elapsed_time": "2:55:18", "remaining_time": "1:30:13", "throughput": 8657.54, "total_tokens": 91066768} +{"current_steps": 135125, "total_steps": 204665, "loss": 0.0, "lr": 6.246833100828738e-07, "epoch": 3.301126230669631, "percentage": 66.02, "elapsed_time": "2:55:19", "remaining_time": "1:30:13", "throughput": 8657.57, "total_tokens": 91070032} +{"current_steps": 135130, "total_steps": 204665, "loss": 0.0, "lr": 6.246042679272044e-07, "epoch": 3.301248381501478, "percentage": 66.02, "elapsed_time": "2:55:19", "remaining_time": "1:30:13", "throughput": 8657.61, "total_tokens": 91073552} +{"current_steps": 135135, "total_steps": 204665, "loss": 0.0696, "lr": 6.245252285015139e-07, "epoch": 3.3013705323333253, "percentage": 66.03, "elapsed_time": "2:55:19", "remaining_time": "1:30:12", "throughput": 8657.69, "total_tokens": 91077392} +{"current_steps": 135140, "total_steps": 204665, "loss": 0.0, "lr": 6.24446191806377e-07, "epoch": 3.3014926831651725, "percentage": 66.03, "elapsed_time": "2:55:20", "remaining_time": "1:30:12", "throughput": 8657.71, "total_tokens": 91080592} +{"current_steps": 135145, "total_steps": 204665, "loss": 0.0001, "lr": 6.243671578423679e-07, "epoch": 3.3016148339970197, "percentage": 66.03, "elapsed_time": "2:55:20", "remaining_time": "1:30:11", "throughput": 8657.74, "total_tokens": 91083920} +{"current_steps": 135150, "total_steps": 204665, "loss": 0.0, "lr": 6.242881266100625e-07, "epoch": 3.3017369848288665, "percentage": 66.03, "elapsed_time": "2:55:20", "remaining_time": "1:30:11", "throughput": 8657.75, "total_tokens": 91087056} +{"current_steps": 135155, "total_steps": 204665, "loss": 0.0, "lr": 6.242090981100343e-07, "epoch": 3.301859135660714, "percentage": 66.04, "elapsed_time": "2:55:21", "remaining_time": "1:30:11", "throughput": 8657.77, "total_tokens": 91090192} +{"current_steps": 135160, "total_steps": 204665, "loss": 0.0, "lr": 6.241300723428587e-07, "epoch": 3.301981286492561, "percentage": 66.04, "elapsed_time": "2:55:21", "remaining_time": "1:30:10", "throughput": 8657.79, "total_tokens": 91093328} +{"current_steps": 135165, "total_steps": 204665, "loss": 0.0, "lr": 6.240510493091108e-07, "epoch": 3.302103437324408, "percentage": 66.04, "elapsed_time": "2:55:21", "remaining_time": "1:30:10", "throughput": 8657.81, "total_tokens": 91096464} +{"current_steps": 135170, "total_steps": 204665, "loss": 0.0362, "lr": 6.239720290093642e-07, "epoch": 3.302225588156255, "percentage": 66.04, "elapsed_time": "2:55:22", "remaining_time": "1:30:09", "throughput": 8657.83, "total_tokens": 91099664} +{"current_steps": 135175, "total_steps": 204665, "loss": 0.0, "lr": 6.238930114441947e-07, "epoch": 3.3023477389881024, "percentage": 66.05, "elapsed_time": "2:55:22", "remaining_time": "1:30:09", "throughput": 8657.85, "total_tokens": 91102864} +{"current_steps": 135180, "total_steps": 204665, "loss": 0.0, "lr": 6.23813996614176e-07, "epoch": 3.3024698898199496, "percentage": 66.05, "elapsed_time": "2:55:22", "remaining_time": "1:30:08", "throughput": 8657.89, "total_tokens": 91106320} +{"current_steps": 135185, "total_steps": 204665, "loss": 0.0, "lr": 6.237349845198831e-07, "epoch": 3.302592040651797, "percentage": 66.05, "elapsed_time": "2:55:23", "remaining_time": "1:30:08", "throughput": 8657.92, "total_tokens": 91109648} +{"current_steps": 135190, "total_steps": 204665, "loss": 0.0001, "lr": 6.23655975161891e-07, "epoch": 3.302714191483644, "percentage": 66.05, "elapsed_time": "2:55:23", "remaining_time": "1:30:08", "throughput": 8657.94, "total_tokens": 91112848} +{"current_steps": 135195, "total_steps": 204665, "loss": 0.0875, "lr": 6.235769685407734e-07, "epoch": 3.302836342315491, "percentage": 66.06, "elapsed_time": "2:55:23", "remaining_time": "1:30:07", "throughput": 8657.96, "total_tokens": 91116048} +{"current_steps": 135200, "total_steps": 204665, "loss": 0.0, "lr": 6.234979646571057e-07, "epoch": 3.3029584931473384, "percentage": 66.06, "elapsed_time": "2:55:24", "remaining_time": "1:30:07", "throughput": 8658.01, "total_tokens": 91119568} +{"current_steps": 135205, "total_steps": 204665, "loss": 0.0, "lr": 6.234189635114617e-07, "epoch": 3.3030806439791855, "percentage": 66.06, "elapsed_time": "2:55:24", "remaining_time": "1:30:06", "throughput": 8658.04, "total_tokens": 91122960} +{"current_steps": 135210, "total_steps": 204665, "loss": 0.0, "lr": 6.233399651044167e-07, "epoch": 3.3032027948110327, "percentage": 66.06, "elapsed_time": "2:55:24", "remaining_time": "1:30:06", "throughput": 8658.04, "total_tokens": 91125840} +{"current_steps": 135215, "total_steps": 204665, "loss": 0.0, "lr": 6.232609694365443e-07, "epoch": 3.30332494564288, "percentage": 66.07, "elapsed_time": "2:55:25", "remaining_time": "1:30:06", "throughput": 8658.07, "total_tokens": 91129168} +{"current_steps": 135220, "total_steps": 204665, "loss": 0.0, "lr": 6.231819765084195e-07, "epoch": 3.303447096474727, "percentage": 66.07, "elapsed_time": "2:55:25", "remaining_time": "1:30:05", "throughput": 8658.12, "total_tokens": 91132688} +{"current_steps": 135225, "total_steps": 204665, "loss": 0.005, "lr": 6.231029863206172e-07, "epoch": 3.3035692473065743, "percentage": 66.07, "elapsed_time": "2:55:26", "remaining_time": "1:30:05", "throughput": 8658.16, "total_tokens": 91136208} +{"current_steps": 135230, "total_steps": 204665, "loss": 0.0, "lr": 6.23023998873711e-07, "epoch": 3.3036913981384215, "percentage": 66.07, "elapsed_time": "2:55:26", "remaining_time": "1:30:04", "throughput": 8658.18, "total_tokens": 91139408} +{"current_steps": 135235, "total_steps": 204665, "loss": 0.0001, "lr": 6.229450141682758e-07, "epoch": 3.3038135489702682, "percentage": 66.08, "elapsed_time": "2:55:26", "remaining_time": "1:30:04", "throughput": 8658.19, "total_tokens": 91142480} +{"current_steps": 135240, "total_steps": 204665, "loss": 0.0004, "lr": 6.228660322048858e-07, "epoch": 3.303935699802116, "percentage": 66.08, "elapsed_time": "2:55:27", "remaining_time": "1:30:04", "throughput": 8658.24, "total_tokens": 91146064} +{"current_steps": 135245, "total_steps": 204665, "loss": 0.029, "lr": 6.227870529841155e-07, "epoch": 3.3040578506339626, "percentage": 66.08, "elapsed_time": "2:55:27", "remaining_time": "1:30:03", "throughput": 8658.3, "total_tokens": 91149776} +{"current_steps": 135250, "total_steps": 204665, "loss": 0.0, "lr": 6.227080765065392e-07, "epoch": 3.30418000146581, "percentage": 66.08, "elapsed_time": "2:55:27", "remaining_time": "1:30:03", "throughput": 8658.32, "total_tokens": 91152976} +{"current_steps": 135255, "total_steps": 204665, "loss": 0.0, "lr": 6.226291027727311e-07, "epoch": 3.304302152297657, "percentage": 66.09, "elapsed_time": "2:55:28", "remaining_time": "1:30:02", "throughput": 8658.35, "total_tokens": 91156240} +{"current_steps": 135260, "total_steps": 204665, "loss": 0.0, "lr": 6.22550131783266e-07, "epoch": 3.304424303129504, "percentage": 66.09, "elapsed_time": "2:55:28", "remaining_time": "1:30:02", "throughput": 8658.39, "total_tokens": 91159632} +{"current_steps": 135265, "total_steps": 204665, "loss": 0.0, "lr": 6.224711635387174e-07, "epoch": 3.3045464539613514, "percentage": 66.09, "elapsed_time": "2:55:28", "remaining_time": "1:30:01", "throughput": 8658.4, "total_tokens": 91162704} +{"current_steps": 135270, "total_steps": 204665, "loss": 0.0001, "lr": 6.223921980396606e-07, "epoch": 3.3046686047931986, "percentage": 66.09, "elapsed_time": "2:55:29", "remaining_time": "1:30:01", "throughput": 8658.44, "total_tokens": 91166160} +{"current_steps": 135275, "total_steps": 204665, "loss": 0.0, "lr": 6.223132352866688e-07, "epoch": 3.3047907556250458, "percentage": 66.1, "elapsed_time": "2:55:29", "remaining_time": "1:30:01", "throughput": 8658.46, "total_tokens": 91169424} +{"current_steps": 135280, "total_steps": 204665, "loss": 0.0, "lr": 6.22234275280317e-07, "epoch": 3.304912906456893, "percentage": 66.1, "elapsed_time": "2:55:29", "remaining_time": "1:30:00", "throughput": 8658.47, "total_tokens": 91172432} +{"current_steps": 135285, "total_steps": 204665, "loss": 0.0001, "lr": 6.221553180211791e-07, "epoch": 3.30503505728874, "percentage": 66.1, "elapsed_time": "2:55:30", "remaining_time": "1:30:00", "throughput": 8658.48, "total_tokens": 91175504} +{"current_steps": 135290, "total_steps": 204665, "loss": 0.0339, "lr": 6.220763635098294e-07, "epoch": 3.3051572081205873, "percentage": 66.1, "elapsed_time": "2:55:30", "remaining_time": "1:29:59", "throughput": 8658.51, "total_tokens": 91178768} +{"current_steps": 135295, "total_steps": 204665, "loss": 0.0, "lr": 6.21997411746842e-07, "epoch": 3.3052793589524345, "percentage": 66.11, "elapsed_time": "2:55:30", "remaining_time": "1:29:59", "throughput": 8658.54, "total_tokens": 91182032} +{"current_steps": 135300, "total_steps": 204665, "loss": 0.0001, "lr": 6.21918462732791e-07, "epoch": 3.3054015097842817, "percentage": 66.11, "elapsed_time": "2:55:31", "remaining_time": "1:29:59", "throughput": 8658.59, "total_tokens": 91185616} +{"current_steps": 135305, "total_steps": 204665, "loss": 0.0, "lr": 6.218395164682509e-07, "epoch": 3.305523660616129, "percentage": 66.11, "elapsed_time": "2:55:31", "remaining_time": "1:29:58", "throughput": 8658.59, "total_tokens": 91188624} +{"current_steps": 135310, "total_steps": 204665, "loss": 0.1288, "lr": 6.217605729537952e-07, "epoch": 3.305645811447976, "percentage": 66.11, "elapsed_time": "2:55:31", "remaining_time": "1:29:58", "throughput": 8658.6, "total_tokens": 91191632} +{"current_steps": 135315, "total_steps": 204665, "loss": 0.0002, "lr": 6.216816321899984e-07, "epoch": 3.3057679622798233, "percentage": 66.12, "elapsed_time": "2:55:32", "remaining_time": "1:29:57", "throughput": 8658.63, "total_tokens": 91194960} +{"current_steps": 135320, "total_steps": 204665, "loss": 0.0034, "lr": 6.216026941774348e-07, "epoch": 3.3058901131116705, "percentage": 66.12, "elapsed_time": "2:55:32", "remaining_time": "1:29:57", "throughput": 8658.69, "total_tokens": 91198608} +{"current_steps": 135325, "total_steps": 204665, "loss": 0.0, "lr": 6.215237589166778e-07, "epoch": 3.3060122639435177, "percentage": 66.12, "elapsed_time": "2:55:32", "remaining_time": "1:29:57", "throughput": 8658.69, "total_tokens": 91201488} +{"current_steps": 135330, "total_steps": 204665, "loss": 0.0002, "lr": 6.214448264083024e-07, "epoch": 3.3061344147753644, "percentage": 66.12, "elapsed_time": "2:55:33", "remaining_time": "1:29:56", "throughput": 8658.72, "total_tokens": 91204816} +{"current_steps": 135335, "total_steps": 204665, "loss": 0.0001, "lr": 6.213658966528814e-07, "epoch": 3.3062565656072116, "percentage": 66.13, "elapsed_time": "2:55:33", "remaining_time": "1:29:56", "throughput": 8658.77, "total_tokens": 91208400} +{"current_steps": 135340, "total_steps": 204665, "loss": 0.0, "lr": 6.212869696509896e-07, "epoch": 3.306378716439059, "percentage": 66.13, "elapsed_time": "2:55:33", "remaining_time": "1:29:55", "throughput": 8658.77, "total_tokens": 91211280} +{"current_steps": 135345, "total_steps": 204665, "loss": 0.0, "lr": 6.212080454032013e-07, "epoch": 3.306500867270906, "percentage": 66.13, "elapsed_time": "2:55:34", "remaining_time": "1:29:55", "throughput": 8658.8, "total_tokens": 91214672} +{"current_steps": 135350, "total_steps": 204665, "loss": 0.0, "lr": 6.211291239100893e-07, "epoch": 3.306623018102753, "percentage": 66.13, "elapsed_time": "2:55:34", "remaining_time": "1:29:54", "throughput": 8658.87, "total_tokens": 91218512} +{"current_steps": 135355, "total_steps": 204665, "loss": 0.0576, "lr": 6.210502051722289e-07, "epoch": 3.3067451689346004, "percentage": 66.13, "elapsed_time": "2:55:35", "remaining_time": "1:29:54", "throughput": 8658.94, "total_tokens": 91222288} +{"current_steps": 135360, "total_steps": 204665, "loss": 0.0, "lr": 6.209712891901927e-07, "epoch": 3.3068673197664475, "percentage": 66.14, "elapsed_time": "2:55:35", "remaining_time": "1:29:54", "throughput": 8658.96, "total_tokens": 91225616} +{"current_steps": 135365, "total_steps": 204665, "loss": 0.0, "lr": 6.208923759645557e-07, "epoch": 3.3069894705982947, "percentage": 66.14, "elapsed_time": "2:55:35", "remaining_time": "1:29:53", "throughput": 8659.01, "total_tokens": 91229072} +{"current_steps": 135370, "total_steps": 204665, "loss": 0.0, "lr": 6.20813465495891e-07, "epoch": 3.307111621430142, "percentage": 66.14, "elapsed_time": "2:55:36", "remaining_time": "1:29:53", "throughput": 8659.04, "total_tokens": 91232400} +{"current_steps": 135375, "total_steps": 204665, "loss": 0.0, "lr": 6.207345577847727e-07, "epoch": 3.307233772261989, "percentage": 66.14, "elapsed_time": "2:55:36", "remaining_time": "1:29:52", "throughput": 8659.06, "total_tokens": 91235536} +{"current_steps": 135380, "total_steps": 204665, "loss": 0.0256, "lr": 6.20655652831775e-07, "epoch": 3.3073559230938363, "percentage": 66.15, "elapsed_time": "2:55:36", "remaining_time": "1:29:52", "throughput": 8659.1, "total_tokens": 91238928} +{"current_steps": 135385, "total_steps": 204665, "loss": 0.0, "lr": 6.205767506374713e-07, "epoch": 3.3074780739256835, "percentage": 66.15, "elapsed_time": "2:55:37", "remaining_time": "1:29:52", "throughput": 8659.12, "total_tokens": 91242128} +{"current_steps": 135390, "total_steps": 204665, "loss": 0.0, "lr": 6.204978512024355e-07, "epoch": 3.3076002247575307, "percentage": 66.15, "elapsed_time": "2:55:37", "remaining_time": "1:29:51", "throughput": 8659.15, "total_tokens": 91245520} +{"current_steps": 135395, "total_steps": 204665, "loss": 0.0001, "lr": 6.204189545272415e-07, "epoch": 3.307722375589378, "percentage": 66.15, "elapsed_time": "2:55:37", "remaining_time": "1:29:51", "throughput": 8659.18, "total_tokens": 91248848} +{"current_steps": 135400, "total_steps": 204665, "loss": 0.0418, "lr": 6.203400606124629e-07, "epoch": 3.307844526421225, "percentage": 66.16, "elapsed_time": "2:55:38", "remaining_time": "1:29:50", "throughput": 8659.18, "total_tokens": 91251792} +{"current_steps": 135405, "total_steps": 204665, "loss": 0.0334, "lr": 6.202611694586735e-07, "epoch": 3.3079666772530723, "percentage": 66.16, "elapsed_time": "2:55:38", "remaining_time": "1:29:50", "throughput": 8659.23, "total_tokens": 91255376} +{"current_steps": 135410, "total_steps": 204665, "loss": 0.0397, "lr": 6.201822810664468e-07, "epoch": 3.3080888280849194, "percentage": 66.16, "elapsed_time": "2:55:38", "remaining_time": "1:29:50", "throughput": 8659.24, "total_tokens": 91258448} +{"current_steps": 135415, "total_steps": 204665, "loss": 0.0214, "lr": 6.201033954363571e-07, "epoch": 3.308210978916766, "percentage": 66.16, "elapsed_time": "2:55:39", "remaining_time": "1:29:49", "throughput": 8659.3, "total_tokens": 91262096} +{"current_steps": 135420, "total_steps": 204665, "loss": 0.0, "lr": 6.200245125689774e-07, "epoch": 3.308333129748614, "percentage": 66.17, "elapsed_time": "2:55:39", "remaining_time": "1:29:49", "throughput": 8659.34, "total_tokens": 91265552} +{"current_steps": 135425, "total_steps": 204665, "loss": 0.0, "lr": 6.19945632464882e-07, "epoch": 3.3084552805804606, "percentage": 66.17, "elapsed_time": "2:55:39", "remaining_time": "1:29:48", "throughput": 8659.36, "total_tokens": 91268816} +{"current_steps": 135430, "total_steps": 204665, "loss": 0.0001, "lr": 6.198667551246437e-07, "epoch": 3.3085774314123078, "percentage": 66.17, "elapsed_time": "2:55:40", "remaining_time": "1:29:48", "throughput": 8659.4, "total_tokens": 91272208} +{"current_steps": 135435, "total_steps": 204665, "loss": 0.0, "lr": 6.197878805488368e-07, "epoch": 3.308699582244155, "percentage": 66.17, "elapsed_time": "2:55:40", "remaining_time": "1:29:48", "throughput": 8659.41, "total_tokens": 91275280} +{"current_steps": 135440, "total_steps": 204665, "loss": 0.0004, "lr": 6.197090087380348e-07, "epoch": 3.308821733076002, "percentage": 66.18, "elapsed_time": "2:55:40", "remaining_time": "1:29:47", "throughput": 8659.45, "total_tokens": 91278736} +{"current_steps": 135445, "total_steps": 204665, "loss": 0.0, "lr": 6.196301396928109e-07, "epoch": 3.3089438839078493, "percentage": 66.18, "elapsed_time": "2:55:41", "remaining_time": "1:29:47", "throughput": 8659.51, "total_tokens": 91282512} +{"current_steps": 135450, "total_steps": 204665, "loss": 0.0726, "lr": 6.195512734137395e-07, "epoch": 3.3090660347396965, "percentage": 66.18, "elapsed_time": "2:55:41", "remaining_time": "1:29:46", "throughput": 8659.5, "total_tokens": 91285200} +{"current_steps": 135455, "total_steps": 204665, "loss": 0.0016, "lr": 6.194724099013929e-07, "epoch": 3.3091881855715437, "percentage": 66.18, "elapsed_time": "2:55:41", "remaining_time": "1:29:46", "throughput": 8659.51, "total_tokens": 91288208} +{"current_steps": 135460, "total_steps": 204665, "loss": 0.0001, "lr": 6.193935491563458e-07, "epoch": 3.309310336403391, "percentage": 66.19, "elapsed_time": "2:55:42", "remaining_time": "1:29:45", "throughput": 8659.58, "total_tokens": 91292112} +{"current_steps": 135465, "total_steps": 204665, "loss": 0.0001, "lr": 6.19314691179171e-07, "epoch": 3.309432487235238, "percentage": 66.19, "elapsed_time": "2:55:42", "remaining_time": "1:29:45", "throughput": 8659.61, "total_tokens": 91295376} +{"current_steps": 135470, "total_steps": 204665, "loss": 0.0011, "lr": 6.192358359704417e-07, "epoch": 3.3095546380670853, "percentage": 66.19, "elapsed_time": "2:55:43", "remaining_time": "1:29:45", "throughput": 8659.65, "total_tokens": 91298832} +{"current_steps": 135475, "total_steps": 204665, "loss": 0.0, "lr": 6.191569835307324e-07, "epoch": 3.3096767888989325, "percentage": 66.19, "elapsed_time": "2:55:43", "remaining_time": "1:29:44", "throughput": 8659.72, "total_tokens": 91302672} +{"current_steps": 135480, "total_steps": 204665, "loss": 0.0, "lr": 6.190781338606157e-07, "epoch": 3.3097989397307797, "percentage": 66.2, "elapsed_time": "2:55:43", "remaining_time": "1:29:44", "throughput": 8659.75, "total_tokens": 91306000} +{"current_steps": 135485, "total_steps": 204665, "loss": 0.0, "lr": 6.189992869606655e-07, "epoch": 3.309921090562627, "percentage": 66.2, "elapsed_time": "2:55:44", "remaining_time": "1:29:43", "throughput": 8659.78, "total_tokens": 91309392} +{"current_steps": 135490, "total_steps": 204665, "loss": 0.0001, "lr": 6.189204428314547e-07, "epoch": 3.310043241394474, "percentage": 66.2, "elapsed_time": "2:55:44", "remaining_time": "1:29:43", "throughput": 8659.8, "total_tokens": 91312592} +{"current_steps": 135495, "total_steps": 204665, "loss": 0.0, "lr": 6.18841601473557e-07, "epoch": 3.3101653922263212, "percentage": 66.2, "elapsed_time": "2:55:44", "remaining_time": "1:29:43", "throughput": 8659.83, "total_tokens": 91315920} +{"current_steps": 135500, "total_steps": 204665, "loss": 0.0027, "lr": 6.18762762887546e-07, "epoch": 3.3102875430581684, "percentage": 66.21, "elapsed_time": "2:55:45", "remaining_time": "1:29:42", "throughput": 8659.85, "total_tokens": 91319056} +{"current_steps": 135505, "total_steps": 204665, "loss": 0.0, "lr": 6.186839270739943e-07, "epoch": 3.3104096938900156, "percentage": 66.21, "elapsed_time": "2:55:45", "remaining_time": "1:29:42", "throughput": 8659.85, "total_tokens": 91322000} +{"current_steps": 135510, "total_steps": 204665, "loss": 0.0001, "lr": 6.18605094033476e-07, "epoch": 3.3105318447218623, "percentage": 66.21, "elapsed_time": "2:55:45", "remaining_time": "1:29:41", "throughput": 8659.86, "total_tokens": 91325136} +{"current_steps": 135515, "total_steps": 204665, "loss": 0.0, "lr": 6.185262637665636e-07, "epoch": 3.3106539955537095, "percentage": 66.21, "elapsed_time": "2:55:46", "remaining_time": "1:29:41", "throughput": 8659.86, "total_tokens": 91328016} +{"current_steps": 135520, "total_steps": 204665, "loss": 0.0311, "lr": 6.184474362738314e-07, "epoch": 3.3107761463855567, "percentage": 66.22, "elapsed_time": "2:55:46", "remaining_time": "1:29:41", "throughput": 8659.9, "total_tokens": 91331472} +{"current_steps": 135525, "total_steps": 204665, "loss": 0.1495, "lr": 6.183686115558515e-07, "epoch": 3.310898297217404, "percentage": 66.22, "elapsed_time": "2:55:46", "remaining_time": "1:29:40", "throughput": 8659.93, "total_tokens": 91334864} +{"current_steps": 135530, "total_steps": 204665, "loss": 0.0895, "lr": 6.182897896131977e-07, "epoch": 3.311020448049251, "percentage": 66.22, "elapsed_time": "2:55:47", "remaining_time": "1:29:40", "throughput": 8659.95, "total_tokens": 91338000} +{"current_steps": 135535, "total_steps": 204665, "loss": 0.0, "lr": 6.182109704464438e-07, "epoch": 3.3111425988810983, "percentage": 66.22, "elapsed_time": "2:55:47", "remaining_time": "1:29:39", "throughput": 8659.99, "total_tokens": 91341456} +{"current_steps": 135540, "total_steps": 204665, "loss": 0.0, "lr": 6.181321540561619e-07, "epoch": 3.3112647497129455, "percentage": 66.23, "elapsed_time": "2:55:47", "remaining_time": "1:29:39", "throughput": 8660.01, "total_tokens": 91344656} +{"current_steps": 135545, "total_steps": 204665, "loss": 0.0, "lr": 6.18053340442926e-07, "epoch": 3.3113869005447927, "percentage": 66.23, "elapsed_time": "2:55:48", "remaining_time": "1:29:38", "throughput": 8660.03, "total_tokens": 91347792} +{"current_steps": 135550, "total_steps": 204665, "loss": 0.0, "lr": 6.179745296073087e-07, "epoch": 3.31150905137664, "percentage": 66.23, "elapsed_time": "2:55:48", "remaining_time": "1:29:38", "throughput": 8660.08, "total_tokens": 91351440} +{"current_steps": 135555, "total_steps": 204665, "loss": 0.0, "lr": 6.178957215498836e-07, "epoch": 3.311631202208487, "percentage": 66.23, "elapsed_time": "2:55:48", "remaining_time": "1:29:38", "throughput": 8660.14, "total_tokens": 91355152} +{"current_steps": 135560, "total_steps": 204665, "loss": 0.0366, "lr": 6.178169162712234e-07, "epoch": 3.3117533530403342, "percentage": 66.24, "elapsed_time": "2:55:49", "remaining_time": "1:29:37", "throughput": 8660.15, "total_tokens": 91358160} +{"current_steps": 135565, "total_steps": 204665, "loss": 0.0, "lr": 6.177381137719013e-07, "epoch": 3.3118755038721814, "percentage": 66.24, "elapsed_time": "2:55:49", "remaining_time": "1:29:37", "throughput": 8660.17, "total_tokens": 91361360} +{"current_steps": 135570, "total_steps": 204665, "loss": 0.0002, "lr": 6.176593140524909e-07, "epoch": 3.3119976547040286, "percentage": 66.24, "elapsed_time": "2:55:49", "remaining_time": "1:29:36", "throughput": 8660.23, "total_tokens": 91365072} +{"current_steps": 135575, "total_steps": 204665, "loss": 0.0, "lr": 6.175805171135642e-07, "epoch": 3.312119805535876, "percentage": 66.24, "elapsed_time": "2:55:50", "remaining_time": "1:29:36", "throughput": 8660.29, "total_tokens": 91368784} +{"current_steps": 135580, "total_steps": 204665, "loss": 0.0, "lr": 6.175017229556953e-07, "epoch": 3.312241956367723, "percentage": 66.24, "elapsed_time": "2:55:50", "remaining_time": "1:29:36", "throughput": 8660.31, "total_tokens": 91371984} +{"current_steps": 135585, "total_steps": 204665, "loss": 0.0658, "lr": 6.174229315794564e-07, "epoch": 3.31236410719957, "percentage": 66.25, "elapsed_time": "2:55:50", "remaining_time": "1:29:35", "throughput": 8660.3, "total_tokens": 91374736} +{"current_steps": 135590, "total_steps": 204665, "loss": 0.0009, "lr": 6.17344142985421e-07, "epoch": 3.3124862580314174, "percentage": 66.25, "elapsed_time": "2:55:51", "remaining_time": "1:29:35", "throughput": 8660.32, "total_tokens": 91377936} +{"current_steps": 135595, "total_steps": 204665, "loss": 0.0, "lr": 6.17265357174162e-07, "epoch": 3.312608408863264, "percentage": 66.25, "elapsed_time": "2:55:51", "remaining_time": "1:29:34", "throughput": 8660.35, "total_tokens": 91381200} +{"current_steps": 135600, "total_steps": 204665, "loss": 0.0123, "lr": 6.171865741462522e-07, "epoch": 3.3127305596951118, "percentage": 66.25, "elapsed_time": "2:55:52", "remaining_time": "1:29:34", "throughput": 8660.36, "total_tokens": 91384208} +{"current_steps": 135605, "total_steps": 204665, "loss": 0.0, "lr": 6.171077939022649e-07, "epoch": 3.3128527105269585, "percentage": 66.26, "elapsed_time": "2:55:52", "remaining_time": "1:29:34", "throughput": 8660.4, "total_tokens": 91387728} +{"current_steps": 135610, "total_steps": 204665, "loss": 0.0001, "lr": 6.170290164427721e-07, "epoch": 3.3129748613588057, "percentage": 66.26, "elapsed_time": "2:55:52", "remaining_time": "1:29:33", "throughput": 8660.41, "total_tokens": 91390736} +{"current_steps": 135615, "total_steps": 204665, "loss": 0.0002, "lr": 6.169502417683478e-07, "epoch": 3.313097012190653, "percentage": 66.26, "elapsed_time": "2:55:53", "remaining_time": "1:29:33", "throughput": 8660.44, "total_tokens": 91394000} +{"current_steps": 135620, "total_steps": 204665, "loss": 0.0, "lr": 6.16871469879564e-07, "epoch": 3.3132191630225, "percentage": 66.26, "elapsed_time": "2:55:53", "remaining_time": "1:29:32", "throughput": 8660.47, "total_tokens": 91397392} +{"current_steps": 135625, "total_steps": 204665, "loss": 0.0403, "lr": 6.16792700776994e-07, "epoch": 3.3133413138543473, "percentage": 66.27, "elapsed_time": "2:55:53", "remaining_time": "1:29:32", "throughput": 8660.5, "total_tokens": 91400656} +{"current_steps": 135630, "total_steps": 204665, "loss": 0.0, "lr": 6.167139344612108e-07, "epoch": 3.3134634646861945, "percentage": 66.27, "elapsed_time": "2:55:54", "remaining_time": "1:29:31", "throughput": 8660.53, "total_tokens": 91403984} +{"current_steps": 135635, "total_steps": 204665, "loss": 0.0, "lr": 6.166351709327866e-07, "epoch": 3.3135856155180416, "percentage": 66.27, "elapsed_time": "2:55:54", "remaining_time": "1:29:31", "throughput": 8660.55, "total_tokens": 91407120} +{"current_steps": 135640, "total_steps": 204665, "loss": 0.0, "lr": 6.16556410192295e-07, "epoch": 3.313707766349889, "percentage": 66.27, "elapsed_time": "2:55:54", "remaining_time": "1:29:31", "throughput": 8660.59, "total_tokens": 91410640} +{"current_steps": 135645, "total_steps": 204665, "loss": 0.0575, "lr": 6.164776522403079e-07, "epoch": 3.313829917181736, "percentage": 66.28, "elapsed_time": "2:55:55", "remaining_time": "1:29:30", "throughput": 8660.61, "total_tokens": 91413712} +{"current_steps": 135650, "total_steps": 204665, "loss": 0.041, "lr": 6.163988970773985e-07, "epoch": 3.313952068013583, "percentage": 66.28, "elapsed_time": "2:55:55", "remaining_time": "1:29:30", "throughput": 8660.68, "total_tokens": 91417680} +{"current_steps": 135655, "total_steps": 204665, "loss": 0.0, "lr": 6.163201447041399e-07, "epoch": 3.3140742188454304, "percentage": 66.28, "elapsed_time": "2:55:55", "remaining_time": "1:29:29", "throughput": 8660.69, "total_tokens": 91420752} +{"current_steps": 135660, "total_steps": 204665, "loss": 0.0003, "lr": 6.162413951211041e-07, "epoch": 3.3141963696772776, "percentage": 66.28, "elapsed_time": "2:55:56", "remaining_time": "1:29:29", "throughput": 8660.74, "total_tokens": 91424336} +{"current_steps": 135665, "total_steps": 204665, "loss": 0.0, "lr": 6.161626483288643e-07, "epoch": 3.314318520509125, "percentage": 66.29, "elapsed_time": "2:55:56", "remaining_time": "1:29:29", "throughput": 8660.77, "total_tokens": 91427536} +{"current_steps": 135670, "total_steps": 204665, "loss": 0.0, "lr": 6.160839043279926e-07, "epoch": 3.314440671340972, "percentage": 66.29, "elapsed_time": "2:55:56", "remaining_time": "1:29:28", "throughput": 8660.77, "total_tokens": 91430544} +{"current_steps": 135675, "total_steps": 204665, "loss": 0.0, "lr": 6.160051631190623e-07, "epoch": 3.314562822172819, "percentage": 66.29, "elapsed_time": "2:55:57", "remaining_time": "1:29:28", "throughput": 8660.8, "total_tokens": 91433872} +{"current_steps": 135680, "total_steps": 204665, "loss": 0.0, "lr": 6.159264247026456e-07, "epoch": 3.314684973004666, "percentage": 66.29, "elapsed_time": "2:55:57", "remaining_time": "1:29:27", "throughput": 8660.84, "total_tokens": 91437200} +{"current_steps": 135685, "total_steps": 204665, "loss": 0.0, "lr": 6.158476890793152e-07, "epoch": 3.3148071238365135, "percentage": 66.3, "elapsed_time": "2:55:57", "remaining_time": "1:29:27", "throughput": 8660.87, "total_tokens": 91440528} +{"current_steps": 135690, "total_steps": 204665, "loss": 0.0709, "lr": 6.157689562496439e-07, "epoch": 3.3149292746683603, "percentage": 66.3, "elapsed_time": "2:55:58", "remaining_time": "1:29:27", "throughput": 8660.9, "total_tokens": 91443984} +{"current_steps": 135695, "total_steps": 204665, "loss": 0.0, "lr": 6.15690226214204e-07, "epoch": 3.3150514255002075, "percentage": 66.3, "elapsed_time": "2:55:58", "remaining_time": "1:29:26", "throughput": 8660.93, "total_tokens": 91447248} +{"current_steps": 135700, "total_steps": 204665, "loss": 0.0, "lr": 6.156114989735682e-07, "epoch": 3.3151735763320547, "percentage": 66.3, "elapsed_time": "2:55:58", "remaining_time": "1:29:26", "throughput": 8660.95, "total_tokens": 91450448} +{"current_steps": 135705, "total_steps": 204665, "loss": 0.0, "lr": 6.15532774528309e-07, "epoch": 3.315295727163902, "percentage": 66.31, "elapsed_time": "2:55:59", "remaining_time": "1:29:25", "throughput": 8661.01, "total_tokens": 91454096} +{"current_steps": 135710, "total_steps": 204665, "loss": 0.0, "lr": 6.154540528789988e-07, "epoch": 3.315417877995749, "percentage": 66.31, "elapsed_time": "2:55:59", "remaining_time": "1:29:25", "throughput": 8661.05, "total_tokens": 91457552} +{"current_steps": 135715, "total_steps": 204665, "loss": 0.0, "lr": 6.153753340262101e-07, "epoch": 3.3155400288275962, "percentage": 66.31, "elapsed_time": "2:55:59", "remaining_time": "1:29:25", "throughput": 8661.11, "total_tokens": 91461264} +{"current_steps": 135720, "total_steps": 204665, "loss": 0.0, "lr": 6.152966179705154e-07, "epoch": 3.3156621796594434, "percentage": 66.31, "elapsed_time": "2:56:00", "remaining_time": "1:29:24", "throughput": 8661.14, "total_tokens": 91464656} +{"current_steps": 135725, "total_steps": 204665, "loss": 0.2296, "lr": 6.152179047124875e-07, "epoch": 3.3157843304912906, "percentage": 66.32, "elapsed_time": "2:56:00", "remaining_time": "1:29:24", "throughput": 8661.15, "total_tokens": 91467728} +{"current_steps": 135730, "total_steps": 204665, "loss": 0.0, "lr": 6.15139194252698e-07, "epoch": 3.315906481323138, "percentage": 66.32, "elapsed_time": "2:56:01", "remaining_time": "1:29:23", "throughput": 8661.17, "total_tokens": 91470928} +{"current_steps": 135735, "total_steps": 204665, "loss": 0.0001, "lr": 6.150604865917201e-07, "epoch": 3.316028632154985, "percentage": 66.32, "elapsed_time": "2:56:01", "remaining_time": "1:29:23", "throughput": 8661.22, "total_tokens": 91474448} +{"current_steps": 135740, "total_steps": 204665, "loss": 0.0, "lr": 6.149817817301257e-07, "epoch": 3.316150782986832, "percentage": 66.32, "elapsed_time": "2:56:01", "remaining_time": "1:29:22", "throughput": 8661.27, "total_tokens": 91478032} +{"current_steps": 135745, "total_steps": 204665, "loss": 0.0, "lr": 6.149030796684875e-07, "epoch": 3.3162729338186794, "percentage": 66.33, "elapsed_time": "2:56:02", "remaining_time": "1:29:22", "throughput": 8661.33, "total_tokens": 91481744} +{"current_steps": 135750, "total_steps": 204665, "loss": 0.0, "lr": 6.148243804073776e-07, "epoch": 3.3163950846505266, "percentage": 66.33, "elapsed_time": "2:56:02", "remaining_time": "1:29:22", "throughput": 8661.36, "total_tokens": 91485072} +{"current_steps": 135755, "total_steps": 204665, "loss": 0.0, "lr": 6.147456839473684e-07, "epoch": 3.3165172354823738, "percentage": 66.33, "elapsed_time": "2:56:02", "remaining_time": "1:29:21", "throughput": 8661.4, "total_tokens": 91488464} +{"current_steps": 135760, "total_steps": 204665, "loss": 0.0001, "lr": 6.146669902890324e-07, "epoch": 3.316639386314221, "percentage": 66.33, "elapsed_time": "2:56:03", "remaining_time": "1:29:21", "throughput": 8661.42, "total_tokens": 91491792} +{"current_steps": 135765, "total_steps": 204665, "loss": 0.0, "lr": 6.145882994329414e-07, "epoch": 3.316761537146068, "percentage": 66.34, "elapsed_time": "2:56:03", "remaining_time": "1:29:20", "throughput": 8661.45, "total_tokens": 91495056} +{"current_steps": 135770, "total_steps": 204665, "loss": 0.0, "lr": 6.145096113796684e-07, "epoch": 3.3168836879779153, "percentage": 66.34, "elapsed_time": "2:56:03", "remaining_time": "1:29:20", "throughput": 8661.47, "total_tokens": 91498256} +{"current_steps": 135775, "total_steps": 204665, "loss": 0.0546, "lr": 6.144309261297847e-07, "epoch": 3.317005838809762, "percentage": 66.34, "elapsed_time": "2:56:04", "remaining_time": "1:29:20", "throughput": 8661.52, "total_tokens": 91501776} +{"current_steps": 135780, "total_steps": 204665, "loss": 0.0001, "lr": 6.143522436838628e-07, "epoch": 3.3171279896416097, "percentage": 66.34, "elapsed_time": "2:56:04", "remaining_time": "1:29:19", "throughput": 8661.51, "total_tokens": 91504656} +{"current_steps": 135785, "total_steps": 204665, "loss": 0.0027, "lr": 6.142735640424759e-07, "epoch": 3.3172501404734565, "percentage": 66.35, "elapsed_time": "2:56:04", "remaining_time": "1:29:19", "throughput": 8661.53, "total_tokens": 91507792} +{"current_steps": 135790, "total_steps": 204665, "loss": 0.0001, "lr": 6.141948872061947e-07, "epoch": 3.3173722913053036, "percentage": 66.35, "elapsed_time": "2:56:05", "remaining_time": "1:29:18", "throughput": 8661.56, "total_tokens": 91511184} +{"current_steps": 135795, "total_steps": 204665, "loss": 0.0, "lr": 6.141162131755926e-07, "epoch": 3.317494442137151, "percentage": 66.35, "elapsed_time": "2:56:05", "remaining_time": "1:29:18", "throughput": 8661.6, "total_tokens": 91514576} +{"current_steps": 135800, "total_steps": 204665, "loss": 0.0001, "lr": 6.140375419512406e-07, "epoch": 3.317616592968998, "percentage": 66.35, "elapsed_time": "2:56:05", "remaining_time": "1:29:18", "throughput": 8661.63, "total_tokens": 91517840} +{"current_steps": 135805, "total_steps": 204665, "loss": 0.0, "lr": 6.139588735337118e-07, "epoch": 3.317738743800845, "percentage": 66.35, "elapsed_time": "2:56:06", "remaining_time": "1:29:17", "throughput": 8661.66, "total_tokens": 91521232} +{"current_steps": 135810, "total_steps": 204665, "loss": 0.0, "lr": 6.138802079235781e-07, "epoch": 3.3178608946326924, "percentage": 66.36, "elapsed_time": "2:56:06", "remaining_time": "1:29:17", "throughput": 8661.67, "total_tokens": 91524240} +{"current_steps": 135815, "total_steps": 204665, "loss": 0.0002, "lr": 6.138015451214109e-07, "epoch": 3.3179830454645396, "percentage": 66.36, "elapsed_time": "2:56:06", "remaining_time": "1:29:16", "throughput": 8661.71, "total_tokens": 91527696} +{"current_steps": 135820, "total_steps": 204665, "loss": 0.0001, "lr": 6.137228851277831e-07, "epoch": 3.318105196296387, "percentage": 66.36, "elapsed_time": "2:56:07", "remaining_time": "1:29:16", "throughput": 8661.81, "total_tokens": 91531920} +{"current_steps": 135825, "total_steps": 204665, "loss": 0.058, "lr": 6.136442279432661e-07, "epoch": 3.318227347128234, "percentage": 66.36, "elapsed_time": "2:56:07", "remaining_time": "1:29:15", "throughput": 8661.88, "total_tokens": 91535824} +{"current_steps": 135830, "total_steps": 204665, "loss": 0.0468, "lr": 6.135655735684327e-07, "epoch": 3.318349497960081, "percentage": 66.37, "elapsed_time": "2:56:08", "remaining_time": "1:29:15", "throughput": 8661.91, "total_tokens": 91539216} +{"current_steps": 135835, "total_steps": 204665, "loss": 0.0526, "lr": 6.134869220038537e-07, "epoch": 3.3184716487919284, "percentage": 66.37, "elapsed_time": "2:56:08", "remaining_time": "1:29:15", "throughput": 8661.97, "total_tokens": 91542864} +{"current_steps": 135840, "total_steps": 204665, "loss": 0.0, "lr": 6.134082732501018e-07, "epoch": 3.3185937996237755, "percentage": 66.37, "elapsed_time": "2:56:08", "remaining_time": "1:29:14", "throughput": 8661.98, "total_tokens": 91546000} +{"current_steps": 135845, "total_steps": 204665, "loss": 0.0, "lr": 6.133296273077495e-07, "epoch": 3.3187159504556227, "percentage": 66.37, "elapsed_time": "2:56:09", "remaining_time": "1:29:14", "throughput": 8662.02, "total_tokens": 91549392} +{"current_steps": 135850, "total_steps": 204665, "loss": 0.0001, "lr": 6.132509841773678e-07, "epoch": 3.31883810128747, "percentage": 66.38, "elapsed_time": "2:56:09", "remaining_time": "1:29:13", "throughput": 8662.06, "total_tokens": 91552912} +{"current_steps": 135855, "total_steps": 204665, "loss": 0.0, "lr": 6.131723438595291e-07, "epoch": 3.318960252119317, "percentage": 66.38, "elapsed_time": "2:56:09", "remaining_time": "1:29:13", "throughput": 8662.08, "total_tokens": 91555984} +{"current_steps": 135860, "total_steps": 204665, "loss": 0.0, "lr": 6.13093706354805e-07, "epoch": 3.319082402951164, "percentage": 66.38, "elapsed_time": "2:56:10", "remaining_time": "1:29:13", "throughput": 8662.11, "total_tokens": 91559312} +{"current_steps": 135865, "total_steps": 204665, "loss": 0.0, "lr": 6.130150716637677e-07, "epoch": 3.3192045537830115, "percentage": 66.38, "elapsed_time": "2:56:10", "remaining_time": "1:29:12", "throughput": 8662.19, "total_tokens": 91563280} +{"current_steps": 135870, "total_steps": 204665, "loss": 0.0, "lr": 6.129364397869887e-07, "epoch": 3.3193267046148582, "percentage": 66.39, "elapsed_time": "2:56:10", "remaining_time": "1:29:12", "throughput": 8662.23, "total_tokens": 91566736} +{"current_steps": 135875, "total_steps": 204665, "loss": 0.0, "lr": 6.128578107250399e-07, "epoch": 3.3194488554467054, "percentage": 66.39, "elapsed_time": "2:56:11", "remaining_time": "1:29:11", "throughput": 8662.27, "total_tokens": 91570256} +{"current_steps": 135880, "total_steps": 204665, "loss": 0.0, "lr": 6.127791844784937e-07, "epoch": 3.3195710062785526, "percentage": 66.39, "elapsed_time": "2:56:11", "remaining_time": "1:29:11", "throughput": 8662.34, "total_tokens": 91573968} +{"current_steps": 135885, "total_steps": 204665, "loss": 0.0, "lr": 6.12700561047921e-07, "epoch": 3.3196931571104, "percentage": 66.39, "elapsed_time": "2:56:11", "remaining_time": "1:29:11", "throughput": 8662.38, "total_tokens": 91577424} +{"current_steps": 135890, "total_steps": 204665, "loss": 0.0002, "lr": 6.126219404338944e-07, "epoch": 3.319815307942247, "percentage": 66.4, "elapsed_time": "2:56:12", "remaining_time": "1:29:10", "throughput": 8662.43, "total_tokens": 91581072} +{"current_steps": 135895, "total_steps": 204665, "loss": 0.0095, "lr": 6.125433226369847e-07, "epoch": 3.319937458774094, "percentage": 66.4, "elapsed_time": "2:56:12", "remaining_time": "1:29:10", "throughput": 8662.5, "total_tokens": 91584848} +{"current_steps": 135900, "total_steps": 204665, "loss": 0.0, "lr": 6.124647076577644e-07, "epoch": 3.3200596096059414, "percentage": 66.4, "elapsed_time": "2:56:12", "remaining_time": "1:29:09", "throughput": 8662.53, "total_tokens": 91588240} +{"current_steps": 135905, "total_steps": 204665, "loss": 0.0, "lr": 6.123860954968051e-07, "epoch": 3.3201817604377886, "percentage": 66.4, "elapsed_time": "2:56:13", "remaining_time": "1:29:09", "throughput": 8662.54, "total_tokens": 91591184} +{"current_steps": 135910, "total_steps": 204665, "loss": 0.0477, "lr": 6.123074861546783e-07, "epoch": 3.3203039112696358, "percentage": 66.41, "elapsed_time": "2:56:13", "remaining_time": "1:29:09", "throughput": 8662.57, "total_tokens": 91594576} +{"current_steps": 135915, "total_steps": 204665, "loss": 0.0001, "lr": 6.122288796319559e-07, "epoch": 3.320426062101483, "percentage": 66.41, "elapsed_time": "2:56:13", "remaining_time": "1:29:08", "throughput": 8662.6, "total_tokens": 91597776} +{"current_steps": 135920, "total_steps": 204665, "loss": 0.0001, "lr": 6.121502759292091e-07, "epoch": 3.32054821293333, "percentage": 66.41, "elapsed_time": "2:56:14", "remaining_time": "1:29:08", "throughput": 8662.63, "total_tokens": 91601168} +{"current_steps": 135925, "total_steps": 204665, "loss": 0.0, "lr": 6.120716750470102e-07, "epoch": 3.3206703637651773, "percentage": 66.41, "elapsed_time": "2:56:14", "remaining_time": "1:29:07", "throughput": 8662.66, "total_tokens": 91604496} +{"current_steps": 135930, "total_steps": 204665, "loss": 0.0, "lr": 6.119930769859299e-07, "epoch": 3.3207925145970245, "percentage": 66.42, "elapsed_time": "2:56:14", "remaining_time": "1:29:07", "throughput": 8662.68, "total_tokens": 91607696} +{"current_steps": 135935, "total_steps": 204665, "loss": 0.0001, "lr": 6.119144817465405e-07, "epoch": 3.3209146654288717, "percentage": 66.42, "elapsed_time": "2:56:15", "remaining_time": "1:29:06", "throughput": 8662.72, "total_tokens": 91611216} +{"current_steps": 135940, "total_steps": 204665, "loss": 0.0, "lr": 6.118358893294135e-07, "epoch": 3.321036816260719, "percentage": 66.42, "elapsed_time": "2:56:15", "remaining_time": "1:29:06", "throughput": 8662.76, "total_tokens": 91614608} +{"current_steps": 135945, "total_steps": 204665, "loss": 0.0, "lr": 6.117572997351201e-07, "epoch": 3.321158967092566, "percentage": 66.42, "elapsed_time": "2:56:16", "remaining_time": "1:29:06", "throughput": 8662.77, "total_tokens": 91617680} +{"current_steps": 135950, "total_steps": 204665, "loss": 0.0, "lr": 6.116787129642324e-07, "epoch": 3.3212811179244133, "percentage": 66.43, "elapsed_time": "2:56:16", "remaining_time": "1:29:05", "throughput": 8662.8, "total_tokens": 91620944} +{"current_steps": 135955, "total_steps": 204665, "loss": 0.0, "lr": 6.116001290173211e-07, "epoch": 3.32140326875626, "percentage": 66.43, "elapsed_time": "2:56:16", "remaining_time": "1:29:05", "throughput": 8662.84, "total_tokens": 91624464} +{"current_steps": 135960, "total_steps": 204665, "loss": 0.0, "lr": 6.115215478949587e-07, "epoch": 3.321525419588107, "percentage": 66.43, "elapsed_time": "2:56:17", "remaining_time": "1:29:04", "throughput": 8662.87, "total_tokens": 91627728} +{"current_steps": 135965, "total_steps": 204665, "loss": 0.0, "lr": 6.114429695977157e-07, "epoch": 3.3216475704199544, "percentage": 66.43, "elapsed_time": "2:56:17", "remaining_time": "1:29:04", "throughput": 8662.91, "total_tokens": 91631184} +{"current_steps": 135970, "total_steps": 204665, "loss": 0.0, "lr": 6.113643941261639e-07, "epoch": 3.3217697212518016, "percentage": 66.44, "elapsed_time": "2:56:17", "remaining_time": "1:29:04", "throughput": 8662.93, "total_tokens": 91634384} +{"current_steps": 135975, "total_steps": 204665, "loss": 0.0, "lr": 6.112858214808749e-07, "epoch": 3.3218918720836488, "percentage": 66.44, "elapsed_time": "2:56:18", "remaining_time": "1:29:03", "throughput": 8662.98, "total_tokens": 91638032} +{"current_steps": 135980, "total_steps": 204665, "loss": 0.0, "lr": 6.112072516624198e-07, "epoch": 3.322014022915496, "percentage": 66.44, "elapsed_time": "2:56:18", "remaining_time": "1:29:03", "throughput": 8663.02, "total_tokens": 91641552} +{"current_steps": 135985, "total_steps": 204665, "loss": 0.0, "lr": 6.111286846713704e-07, "epoch": 3.322136173747343, "percentage": 66.44, "elapsed_time": "2:56:18", "remaining_time": "1:29:02", "throughput": 8663.05, "total_tokens": 91644816} +{"current_steps": 135990, "total_steps": 204665, "loss": 0.0001, "lr": 6.110501205082976e-07, "epoch": 3.3222583245791903, "percentage": 66.45, "elapsed_time": "2:56:19", "remaining_time": "1:29:02", "throughput": 8663.07, "total_tokens": 91648016} +{"current_steps": 135995, "total_steps": 204665, "loss": 0.0512, "lr": 6.109715591737727e-07, "epoch": 3.3223804754110375, "percentage": 66.45, "elapsed_time": "2:56:19", "remaining_time": "1:29:02", "throughput": 8663.08, "total_tokens": 91651088} +{"current_steps": 136000, "total_steps": 204665, "loss": 0.0356, "lr": 6.108930006683678e-07, "epoch": 3.3225026262428847, "percentage": 66.45, "elapsed_time": "2:56:19", "remaining_time": "1:29:01", "throughput": 8663.11, "total_tokens": 91654352} +{"current_steps": 136005, "total_steps": 204665, "loss": 0.0, "lr": 6.108144449926533e-07, "epoch": 3.322624777074732, "percentage": 66.45, "elapsed_time": "2:56:20", "remaining_time": "1:29:01", "throughput": 8663.11, "total_tokens": 91657360} +{"current_steps": 136010, "total_steps": 204665, "loss": 0.0, "lr": 6.10735892147201e-07, "epoch": 3.322746927906579, "percentage": 66.45, "elapsed_time": "2:56:20", "remaining_time": "1:29:00", "throughput": 8663.13, "total_tokens": 91660560} +{"current_steps": 136015, "total_steps": 204665, "loss": 0.0407, "lr": 6.10657342132582e-07, "epoch": 3.3228690787384263, "percentage": 66.46, "elapsed_time": "2:56:20", "remaining_time": "1:29:00", "throughput": 8663.16, "total_tokens": 91663824} +{"current_steps": 136020, "total_steps": 204665, "loss": 0.0, "lr": 6.105787949493675e-07, "epoch": 3.3229912295702735, "percentage": 66.46, "elapsed_time": "2:56:21", "remaining_time": "1:29:00", "throughput": 8663.17, "total_tokens": 91666896} +{"current_steps": 136025, "total_steps": 204665, "loss": 0.0, "lr": 6.105002505981287e-07, "epoch": 3.3231133804021207, "percentage": 66.46, "elapsed_time": "2:56:21", "remaining_time": "1:28:59", "throughput": 8663.2, "total_tokens": 91670224} +{"current_steps": 136030, "total_steps": 204665, "loss": 0.0, "lr": 6.104217090794365e-07, "epoch": 3.323235531233968, "percentage": 66.46, "elapsed_time": "2:56:21", "remaining_time": "1:28:59", "throughput": 8663.24, "total_tokens": 91673616} +{"current_steps": 136035, "total_steps": 204665, "loss": 0.0001, "lr": 6.10343170393863e-07, "epoch": 3.323357682065815, "percentage": 66.47, "elapsed_time": "2:56:22", "remaining_time": "1:28:58", "throughput": 8663.28, "total_tokens": 91677136} +{"current_steps": 136040, "total_steps": 204665, "loss": 0.0, "lr": 6.102646345419784e-07, "epoch": 3.323479832897662, "percentage": 66.47, "elapsed_time": "2:56:22", "remaining_time": "1:28:58", "throughput": 8663.29, "total_tokens": 91680208} +{"current_steps": 136045, "total_steps": 204665, "loss": 0.0, "lr": 6.101861015243546e-07, "epoch": 3.3236019837295094, "percentage": 66.47, "elapsed_time": "2:56:22", "remaining_time": "1:28:57", "throughput": 8663.36, "total_tokens": 91683984} +{"current_steps": 136050, "total_steps": 204665, "loss": 0.0256, "lr": 6.101075713415617e-07, "epoch": 3.323724134561356, "percentage": 66.47, "elapsed_time": "2:56:23", "remaining_time": "1:28:57", "throughput": 8663.37, "total_tokens": 91687120} +{"current_steps": 136055, "total_steps": 204665, "loss": 0.0001, "lr": 6.100290439941718e-07, "epoch": 3.3238462853932034, "percentage": 66.48, "elapsed_time": "2:56:23", "remaining_time": "1:28:57", "throughput": 8663.41, "total_tokens": 91690576} +{"current_steps": 136060, "total_steps": 204665, "loss": 0.0653, "lr": 6.099505194827557e-07, "epoch": 3.3239684362250506, "percentage": 66.48, "elapsed_time": "2:56:23", "remaining_time": "1:28:56", "throughput": 8663.43, "total_tokens": 91693648} +{"current_steps": 136065, "total_steps": 204665, "loss": 0.0001, "lr": 6.098719978078841e-07, "epoch": 3.3240905870568977, "percentage": 66.48, "elapsed_time": "2:56:24", "remaining_time": "1:28:56", "throughput": 8663.46, "total_tokens": 91696976} +{"current_steps": 136070, "total_steps": 204665, "loss": 0.0, "lr": 6.097934789701285e-07, "epoch": 3.324212737888745, "percentage": 66.48, "elapsed_time": "2:56:24", "remaining_time": "1:28:55", "throughput": 8663.53, "total_tokens": 91700816} +{"current_steps": 136075, "total_steps": 204665, "loss": 0.0, "lr": 6.097149629700593e-07, "epoch": 3.324334888720592, "percentage": 66.49, "elapsed_time": "2:56:25", "remaining_time": "1:28:55", "throughput": 8663.57, "total_tokens": 91704336} +{"current_steps": 136080, "total_steps": 204665, "loss": 0.0022, "lr": 6.096364498082483e-07, "epoch": 3.3244570395524393, "percentage": 66.49, "elapsed_time": "2:56:25", "remaining_time": "1:28:55", "throughput": 8663.59, "total_tokens": 91707536} +{"current_steps": 136085, "total_steps": 204665, "loss": 0.0, "lr": 6.095579394852657e-07, "epoch": 3.3245791903842865, "percentage": 66.49, "elapsed_time": "2:56:25", "remaining_time": "1:28:54", "throughput": 8663.62, "total_tokens": 91710800} +{"current_steps": 136090, "total_steps": 204665, "loss": 0.0003, "lr": 6.094794320016826e-07, "epoch": 3.3247013412161337, "percentage": 66.49, "elapsed_time": "2:56:26", "remaining_time": "1:28:54", "throughput": 8663.63, "total_tokens": 91713872} +{"current_steps": 136095, "total_steps": 204665, "loss": 0.0, "lr": 6.094009273580707e-07, "epoch": 3.324823492047981, "percentage": 66.5, "elapsed_time": "2:56:26", "remaining_time": "1:28:53", "throughput": 8663.69, "total_tokens": 91717520} +{"current_steps": 136100, "total_steps": 204665, "loss": 0.0, "lr": 6.093224255549998e-07, "epoch": 3.324945642879828, "percentage": 66.5, "elapsed_time": "2:56:26", "remaining_time": "1:28:53", "throughput": 8663.73, "total_tokens": 91720912} +{"current_steps": 136105, "total_steps": 204665, "loss": 0.0385, "lr": 6.092439265930416e-07, "epoch": 3.3250677937116753, "percentage": 66.5, "elapsed_time": "2:56:27", "remaining_time": "1:28:53", "throughput": 8663.76, "total_tokens": 91724240} +{"current_steps": 136110, "total_steps": 204665, "loss": 0.0001, "lr": 6.091654304727665e-07, "epoch": 3.3251899445435225, "percentage": 66.5, "elapsed_time": "2:56:27", "remaining_time": "1:28:52", "throughput": 8663.78, "total_tokens": 91727504} +{"current_steps": 136115, "total_steps": 204665, "loss": 0.0, "lr": 6.090869371947458e-07, "epoch": 3.3253120953753696, "percentage": 66.51, "elapsed_time": "2:56:27", "remaining_time": "1:28:52", "throughput": 8663.81, "total_tokens": 91730704} +{"current_steps": 136120, "total_steps": 204665, "loss": 0.0, "lr": 6.090084467595497e-07, "epoch": 3.325434246207217, "percentage": 66.51, "elapsed_time": "2:56:28", "remaining_time": "1:28:51", "throughput": 8663.85, "total_tokens": 91734224} +{"current_steps": 136125, "total_steps": 204665, "loss": 0.0, "lr": 6.089299591677492e-07, "epoch": 3.325556397039064, "percentage": 66.51, "elapsed_time": "2:56:28", "remaining_time": "1:28:51", "throughput": 8663.87, "total_tokens": 91737360} +{"current_steps": 136130, "total_steps": 204665, "loss": 0.0, "lr": 6.088514744199158e-07, "epoch": 3.325678547870911, "percentage": 66.51, "elapsed_time": "2:56:28", "remaining_time": "1:28:50", "throughput": 8663.88, "total_tokens": 91740432} +{"current_steps": 136135, "total_steps": 204665, "loss": 0.0005, "lr": 6.087729925166191e-07, "epoch": 3.325800698702758, "percentage": 66.52, "elapsed_time": "2:56:29", "remaining_time": "1:28:50", "throughput": 8663.92, "total_tokens": 91743952} +{"current_steps": 136140, "total_steps": 204665, "loss": 0.0, "lr": 6.086945134584311e-07, "epoch": 3.325922849534605, "percentage": 66.52, "elapsed_time": "2:56:29", "remaining_time": "1:28:50", "throughput": 8663.99, "total_tokens": 91747792} +{"current_steps": 136145, "total_steps": 204665, "loss": 0.0, "lr": 6.086160372459211e-07, "epoch": 3.3260450003664523, "percentage": 66.52, "elapsed_time": "2:56:29", "remaining_time": "1:28:49", "throughput": 8664.04, "total_tokens": 91751312} +{"current_steps": 136150, "total_steps": 204665, "loss": 0.0, "lr": 6.085375638796608e-07, "epoch": 3.3261671511982995, "percentage": 66.52, "elapsed_time": "2:56:30", "remaining_time": "1:28:49", "throughput": 8664.07, "total_tokens": 91754576} +{"current_steps": 136155, "total_steps": 204665, "loss": 0.0, "lr": 6.084590933602209e-07, "epoch": 3.3262893020301467, "percentage": 66.53, "elapsed_time": "2:56:30", "remaining_time": "1:28:48", "throughput": 8664.1, "total_tokens": 91757968} +{"current_steps": 136160, "total_steps": 204665, "loss": 0.0, "lr": 6.083806256881716e-07, "epoch": 3.326411452861994, "percentage": 66.53, "elapsed_time": "2:56:30", "remaining_time": "1:28:48", "throughput": 8664.14, "total_tokens": 91761360} +{"current_steps": 136165, "total_steps": 204665, "loss": 0.0, "lr": 6.083021608640837e-07, "epoch": 3.326533603693841, "percentage": 66.53, "elapsed_time": "2:56:31", "remaining_time": "1:28:48", "throughput": 8664.19, "total_tokens": 91764944} +{"current_steps": 136170, "total_steps": 204665, "loss": 0.0, "lr": 6.082236988885279e-07, "epoch": 3.3266557545256883, "percentage": 66.53, "elapsed_time": "2:56:31", "remaining_time": "1:28:47", "throughput": 8664.2, "total_tokens": 91768016} +{"current_steps": 136175, "total_steps": 204665, "loss": 0.0414, "lr": 6.081452397620747e-07, "epoch": 3.3267779053575355, "percentage": 66.54, "elapsed_time": "2:56:31", "remaining_time": "1:28:47", "throughput": 8664.2, "total_tokens": 91770960} +{"current_steps": 136180, "total_steps": 204665, "loss": 0.0, "lr": 6.080667834852948e-07, "epoch": 3.3269000561893827, "percentage": 66.54, "elapsed_time": "2:56:32", "remaining_time": "1:28:46", "throughput": 8664.23, "total_tokens": 91774160} +{"current_steps": 136185, "total_steps": 204665, "loss": 0.0, "lr": 6.079883300587583e-07, "epoch": 3.32702220702123, "percentage": 66.54, "elapsed_time": "2:56:32", "remaining_time": "1:28:46", "throughput": 8664.21, "total_tokens": 91776912} +{"current_steps": 136190, "total_steps": 204665, "loss": 0.0001, "lr": 6.079098794830366e-07, "epoch": 3.327144357853077, "percentage": 66.54, "elapsed_time": "2:56:32", "remaining_time": "1:28:46", "throughput": 8664.24, "total_tokens": 91780176} +{"current_steps": 136195, "total_steps": 204665, "loss": 0.0001, "lr": 6.078314317586992e-07, "epoch": 3.3272665086849242, "percentage": 66.55, "elapsed_time": "2:56:33", "remaining_time": "1:28:45", "throughput": 8664.24, "total_tokens": 91783120} +{"current_steps": 136200, "total_steps": 204665, "loss": 0.0, "lr": 6.077529868863178e-07, "epoch": 3.3273886595167714, "percentage": 66.55, "elapsed_time": "2:56:33", "remaining_time": "1:28:45", "throughput": 8664.28, "total_tokens": 91786512} +{"current_steps": 136205, "total_steps": 204665, "loss": 0.0, "lr": 6.076745448664616e-07, "epoch": 3.3275108103486186, "percentage": 66.55, "elapsed_time": "2:56:34", "remaining_time": "1:28:44", "throughput": 8664.3, "total_tokens": 91789648} +{"current_steps": 136210, "total_steps": 204665, "loss": 0.0, "lr": 6.075961056997017e-07, "epoch": 3.327632961180466, "percentage": 66.55, "elapsed_time": "2:56:34", "remaining_time": "1:28:44", "throughput": 8664.32, "total_tokens": 91792848} +{"current_steps": 136215, "total_steps": 204665, "loss": 0.0433, "lr": 6.075176693866086e-07, "epoch": 3.327755112012313, "percentage": 66.56, "elapsed_time": "2:56:34", "remaining_time": "1:28:43", "throughput": 8664.35, "total_tokens": 91796176} +{"current_steps": 136220, "total_steps": 204665, "loss": 0.0353, "lr": 6.074392359277526e-07, "epoch": 3.3278772628441597, "percentage": 66.56, "elapsed_time": "2:56:35", "remaining_time": "1:28:43", "throughput": 8664.38, "total_tokens": 91799568} +{"current_steps": 136225, "total_steps": 204665, "loss": 0.0001, "lr": 6.073608053237042e-07, "epoch": 3.3279994136760074, "percentage": 66.56, "elapsed_time": "2:56:35", "remaining_time": "1:28:43", "throughput": 8664.42, "total_tokens": 91802960} +{"current_steps": 136230, "total_steps": 204665, "loss": 0.0001, "lr": 6.072823775750333e-07, "epoch": 3.328121564507854, "percentage": 66.56, "elapsed_time": "2:56:35", "remaining_time": "1:28:42", "throughput": 8664.42, "total_tokens": 91805904} +{"current_steps": 136235, "total_steps": 204665, "loss": 0.0, "lr": 6.072039526823109e-07, "epoch": 3.3282437153397013, "percentage": 66.56, "elapsed_time": "2:56:36", "remaining_time": "1:28:42", "throughput": 8664.45, "total_tokens": 91809232} +{"current_steps": 136240, "total_steps": 204665, "loss": 0.0, "lr": 6.071255306461067e-07, "epoch": 3.3283658661715485, "percentage": 66.57, "elapsed_time": "2:56:36", "remaining_time": "1:28:41", "throughput": 8664.46, "total_tokens": 91812368} +{"current_steps": 136245, "total_steps": 204665, "loss": 0.0001, "lr": 6.070471114669913e-07, "epoch": 3.3284880170033957, "percentage": 66.57, "elapsed_time": "2:56:36", "remaining_time": "1:28:41", "throughput": 8664.49, "total_tokens": 91815632} +{"current_steps": 136250, "total_steps": 204665, "loss": 0.0, "lr": 6.069686951455353e-07, "epoch": 3.328610167835243, "percentage": 66.57, "elapsed_time": "2:56:37", "remaining_time": "1:28:41", "throughput": 8664.5, "total_tokens": 91818768} +{"current_steps": 136255, "total_steps": 204665, "loss": 0.0, "lr": 6.068902816823083e-07, "epoch": 3.32873231866709, "percentage": 66.57, "elapsed_time": "2:56:37", "remaining_time": "1:28:40", "throughput": 8664.57, "total_tokens": 91822480} +{"current_steps": 136260, "total_steps": 204665, "loss": 0.0001, "lr": 6.068118710778813e-07, "epoch": 3.3288544694989373, "percentage": 66.58, "elapsed_time": "2:56:37", "remaining_time": "1:28:40", "throughput": 8664.59, "total_tokens": 91825680} +{"current_steps": 136265, "total_steps": 204665, "loss": 0.0001, "lr": 6.067334633328237e-07, "epoch": 3.3289766203307845, "percentage": 66.58, "elapsed_time": "2:56:38", "remaining_time": "1:28:39", "throughput": 8664.64, "total_tokens": 91829392} +{"current_steps": 136270, "total_steps": 204665, "loss": 0.0001, "lr": 6.066550584477065e-07, "epoch": 3.3290987711626316, "percentage": 66.58, "elapsed_time": "2:56:38", "remaining_time": "1:28:39", "throughput": 8664.68, "total_tokens": 91832784} +{"current_steps": 136275, "total_steps": 204665, "loss": 0.0, "lr": 6.065766564230995e-07, "epoch": 3.329220921994479, "percentage": 66.58, "elapsed_time": "2:56:38", "remaining_time": "1:28:39", "throughput": 8664.72, "total_tokens": 91836176} +{"current_steps": 136280, "total_steps": 204665, "loss": 0.0607, "lr": 6.064982572595725e-07, "epoch": 3.329343072826326, "percentage": 66.59, "elapsed_time": "2:56:39", "remaining_time": "1:28:38", "throughput": 8664.73, "total_tokens": 91839312} +{"current_steps": 136285, "total_steps": 204665, "loss": 0.0, "lr": 6.064198609576965e-07, "epoch": 3.329465223658173, "percentage": 66.59, "elapsed_time": "2:56:39", "remaining_time": "1:28:38", "throughput": 8664.81, "total_tokens": 91843216} +{"current_steps": 136290, "total_steps": 204665, "loss": 0.0, "lr": 6.063414675180407e-07, "epoch": 3.3295873744900204, "percentage": 66.59, "elapsed_time": "2:56:39", "remaining_time": "1:28:37", "throughput": 8664.83, "total_tokens": 91846480} +{"current_steps": 136295, "total_steps": 204665, "loss": 0.0, "lr": 6.06263076941176e-07, "epoch": 3.3297095253218676, "percentage": 66.59, "elapsed_time": "2:56:40", "remaining_time": "1:28:37", "throughput": 8664.9, "total_tokens": 91850320} +{"current_steps": 136300, "total_steps": 204665, "loss": 0.0, "lr": 6.061846892276718e-07, "epoch": 3.329831676153715, "percentage": 66.6, "elapsed_time": "2:56:40", "remaining_time": "1:28:37", "throughput": 8664.94, "total_tokens": 91853712} +{"current_steps": 136305, "total_steps": 204665, "loss": 0.0, "lr": 6.061063043780985e-07, "epoch": 3.3299538269855615, "percentage": 66.6, "elapsed_time": "2:56:40", "remaining_time": "1:28:36", "throughput": 8664.97, "total_tokens": 91857040} +{"current_steps": 136310, "total_steps": 204665, "loss": 0.0343, "lr": 6.060279223930263e-07, "epoch": 3.330075977817409, "percentage": 66.6, "elapsed_time": "2:56:41", "remaining_time": "1:28:36", "throughput": 8665.0, "total_tokens": 91860368} +{"current_steps": 136315, "total_steps": 204665, "loss": 0.0, "lr": 6.059495432730248e-07, "epoch": 3.330198128649256, "percentage": 66.6, "elapsed_time": "2:56:41", "remaining_time": "1:28:35", "throughput": 8665.03, "total_tokens": 91863632} +{"current_steps": 136320, "total_steps": 204665, "loss": 0.0, "lr": 6.058711670186645e-07, "epoch": 3.330320279481103, "percentage": 66.61, "elapsed_time": "2:56:42", "remaining_time": "1:28:35", "throughput": 8665.06, "total_tokens": 91867024} +{"current_steps": 136325, "total_steps": 204665, "loss": 0.0006, "lr": 6.057927936305149e-07, "epoch": 3.3304424303129503, "percentage": 66.61, "elapsed_time": "2:56:42", "remaining_time": "1:28:34", "throughput": 8665.08, "total_tokens": 91870160} +{"current_steps": 136330, "total_steps": 204665, "loss": 0.0, "lr": 6.057144231091461e-07, "epoch": 3.3305645811447975, "percentage": 66.61, "elapsed_time": "2:56:42", "remaining_time": "1:28:34", "throughput": 8665.09, "total_tokens": 91873232} +{"current_steps": 136335, "total_steps": 204665, "loss": 0.0, "lr": 6.056360554551281e-07, "epoch": 3.3306867319766447, "percentage": 66.61, "elapsed_time": "2:56:43", "remaining_time": "1:28:34", "throughput": 8665.08, "total_tokens": 91876048} +{"current_steps": 136340, "total_steps": 204665, "loss": 0.0001, "lr": 6.055576906690306e-07, "epoch": 3.330808882808492, "percentage": 66.62, "elapsed_time": "2:56:43", "remaining_time": "1:28:33", "throughput": 8665.1, "total_tokens": 91879120} +{"current_steps": 136345, "total_steps": 204665, "loss": 0.0, "lr": 6.054793287514241e-07, "epoch": 3.330931033640339, "percentage": 66.62, "elapsed_time": "2:56:43", "remaining_time": "1:28:33", "throughput": 8665.12, "total_tokens": 91882320} +{"current_steps": 136350, "total_steps": 204665, "loss": 0.0, "lr": 6.054009697028776e-07, "epoch": 3.3310531844721862, "percentage": 66.62, "elapsed_time": "2:56:44", "remaining_time": "1:28:32", "throughput": 8665.16, "total_tokens": 91885776} +{"current_steps": 136355, "total_steps": 204665, "loss": 0.0, "lr": 6.053226135239618e-07, "epoch": 3.3311753353040334, "percentage": 66.62, "elapsed_time": "2:56:44", "remaining_time": "1:28:32", "throughput": 8665.18, "total_tokens": 91889040} +{"current_steps": 136360, "total_steps": 204665, "loss": 0.0002, "lr": 6.052442602152457e-07, "epoch": 3.3312974861358806, "percentage": 66.63, "elapsed_time": "2:56:44", "remaining_time": "1:28:32", "throughput": 8665.22, "total_tokens": 91892496} +{"current_steps": 136365, "total_steps": 204665, "loss": 0.0, "lr": 6.051659097772996e-07, "epoch": 3.331419636967728, "percentage": 66.63, "elapsed_time": "2:56:45", "remaining_time": "1:28:31", "throughput": 8665.25, "total_tokens": 91895760} +{"current_steps": 136370, "total_steps": 204665, "loss": 0.0, "lr": 6.050875622106932e-07, "epoch": 3.331541787799575, "percentage": 66.63, "elapsed_time": "2:56:45", "remaining_time": "1:28:31", "throughput": 8665.25, "total_tokens": 91898704} +{"current_steps": 136375, "total_steps": 204665, "loss": 0.0667, "lr": 6.050092175159964e-07, "epoch": 3.331663938631422, "percentage": 66.63, "elapsed_time": "2:56:45", "remaining_time": "1:28:30", "throughput": 8665.27, "total_tokens": 91901968} +{"current_steps": 136380, "total_steps": 204665, "loss": 0.0, "lr": 6.04930875693779e-07, "epoch": 3.3317860894632694, "percentage": 66.64, "elapsed_time": "2:56:46", "remaining_time": "1:28:30", "throughput": 8665.28, "total_tokens": 91904976} +{"current_steps": 136385, "total_steps": 204665, "loss": 0.0, "lr": 6.048525367446102e-07, "epoch": 3.3319082402951166, "percentage": 66.64, "elapsed_time": "2:56:46", "remaining_time": "1:28:30", "throughput": 8665.32, "total_tokens": 91908368} +{"current_steps": 136390, "total_steps": 204665, "loss": 0.0527, "lr": 6.047742006690602e-07, "epoch": 3.3320303911269638, "percentage": 66.64, "elapsed_time": "2:56:46", "remaining_time": "1:28:29", "throughput": 8665.32, "total_tokens": 91911312} +{"current_steps": 136395, "total_steps": 204665, "loss": 0.0, "lr": 6.046958674676983e-07, "epoch": 3.332152541958811, "percentage": 66.64, "elapsed_time": "2:56:47", "remaining_time": "1:28:29", "throughput": 8665.34, "total_tokens": 91914512} +{"current_steps": 136400, "total_steps": 204665, "loss": 0.0, "lr": 6.046175371410944e-07, "epoch": 3.3322746927906577, "percentage": 66.65, "elapsed_time": "2:56:47", "remaining_time": "1:28:28", "throughput": 8665.38, "total_tokens": 91917968} +{"current_steps": 136405, "total_steps": 204665, "loss": 0.0, "lr": 6.045392096898184e-07, "epoch": 3.332396843622505, "percentage": 66.65, "elapsed_time": "2:56:47", "remaining_time": "1:28:28", "throughput": 8665.42, "total_tokens": 91921488} +{"current_steps": 136410, "total_steps": 204665, "loss": 0.0, "lr": 6.044608851144392e-07, "epoch": 3.332518994454352, "percentage": 66.65, "elapsed_time": "2:56:48", "remaining_time": "1:28:27", "throughput": 8665.42, "total_tokens": 91924368} +{"current_steps": 136415, "total_steps": 204665, "loss": 0.0453, "lr": 6.043825634155274e-07, "epoch": 3.3326411452861993, "percentage": 66.65, "elapsed_time": "2:56:48", "remaining_time": "1:28:27", "throughput": 8665.44, "total_tokens": 91927568} +{"current_steps": 136420, "total_steps": 204665, "loss": 0.0, "lr": 6.043042445936515e-07, "epoch": 3.3327632961180464, "percentage": 66.66, "elapsed_time": "2:56:48", "remaining_time": "1:28:27", "throughput": 8665.46, "total_tokens": 91930832} +{"current_steps": 136425, "total_steps": 204665, "loss": 0.0, "lr": 6.04225928649382e-07, "epoch": 3.3328854469498936, "percentage": 66.66, "elapsed_time": "2:56:49", "remaining_time": "1:28:26", "throughput": 8665.53, "total_tokens": 91934608} +{"current_steps": 136430, "total_steps": 204665, "loss": 0.0, "lr": 6.041476155832877e-07, "epoch": 3.333007597781741, "percentage": 66.66, "elapsed_time": "2:56:49", "remaining_time": "1:28:26", "throughput": 8665.56, "total_tokens": 91937936} +{"current_steps": 136435, "total_steps": 204665, "loss": 0.0, "lr": 6.040693053959384e-07, "epoch": 3.333129748613588, "percentage": 66.66, "elapsed_time": "2:56:49", "remaining_time": "1:28:25", "throughput": 8665.59, "total_tokens": 91941200} +{"current_steps": 136440, "total_steps": 204665, "loss": 0.0, "lr": 6.039909980879039e-07, "epoch": 3.333251899445435, "percentage": 66.67, "elapsed_time": "2:56:50", "remaining_time": "1:28:25", "throughput": 8665.61, "total_tokens": 91944400} +{"current_steps": 136445, "total_steps": 204665, "loss": 0.0, "lr": 6.039126936597529e-07, "epoch": 3.3333740502772824, "percentage": 66.67, "elapsed_time": "2:56:50", "remaining_time": "1:28:25", "throughput": 8665.65, "total_tokens": 91947920} +{"current_steps": 136450, "total_steps": 204665, "loss": 0.0, "lr": 6.038343921120558e-07, "epoch": 3.3334962011091296, "percentage": 66.67, "elapsed_time": "2:56:50", "remaining_time": "1:28:24", "throughput": 8665.67, "total_tokens": 91951120} +{"current_steps": 136455, "total_steps": 204665, "loss": 0.0, "lr": 6.037560934453812e-07, "epoch": 3.3336183519409768, "percentage": 66.67, "elapsed_time": "2:56:51", "remaining_time": "1:28:24", "throughput": 8665.69, "total_tokens": 91954192} +{"current_steps": 136460, "total_steps": 204665, "loss": 0.0001, "lr": 6.036777976602987e-07, "epoch": 3.333740502772824, "percentage": 66.67, "elapsed_time": "2:56:51", "remaining_time": "1:28:23", "throughput": 8665.71, "total_tokens": 91957392} +{"current_steps": 136465, "total_steps": 204665, "loss": 0.0, "lr": 6.035995047573785e-07, "epoch": 3.333862653604671, "percentage": 66.68, "elapsed_time": "2:56:51", "remaining_time": "1:28:23", "throughput": 8665.72, "total_tokens": 91960464} +{"current_steps": 136470, "total_steps": 204665, "loss": 0.0, "lr": 6.035212147371887e-07, "epoch": 3.3339848044365183, "percentage": 66.68, "elapsed_time": "2:56:52", "remaining_time": "1:28:23", "throughput": 8665.77, "total_tokens": 91964112} +{"current_steps": 136475, "total_steps": 204665, "loss": 0.0, "lr": 6.034429276002996e-07, "epoch": 3.3341069552683655, "percentage": 66.68, "elapsed_time": "2:56:52", "remaining_time": "1:28:22", "throughput": 8665.79, "total_tokens": 91967184} +{"current_steps": 136480, "total_steps": 204665, "loss": 0.0, "lr": 6.033646433472803e-07, "epoch": 3.3342291061002127, "percentage": 66.68, "elapsed_time": "2:56:53", "remaining_time": "1:28:22", "throughput": 8665.83, "total_tokens": 91970704} +{"current_steps": 136485, "total_steps": 204665, "loss": 0.0005, "lr": 6.032863619786999e-07, "epoch": 3.3343512569320595, "percentage": 66.69, "elapsed_time": "2:56:53", "remaining_time": "1:28:21", "throughput": 8665.88, "total_tokens": 91974224} +{"current_steps": 136490, "total_steps": 204665, "loss": 0.0001, "lr": 6.032080834951276e-07, "epoch": 3.334473407763907, "percentage": 66.69, "elapsed_time": "2:56:53", "remaining_time": "1:28:21", "throughput": 8665.9, "total_tokens": 91977488} +{"current_steps": 136495, "total_steps": 204665, "loss": 0.0, "lr": 6.031298078971328e-07, "epoch": 3.334595558595754, "percentage": 66.69, "elapsed_time": "2:56:54", "remaining_time": "1:28:21", "throughput": 8665.96, "total_tokens": 91981136} +{"current_steps": 136500, "total_steps": 204665, "loss": 0.0, "lr": 6.030515351852852e-07, "epoch": 3.334717709427601, "percentage": 66.69, "elapsed_time": "2:56:54", "remaining_time": "1:28:20", "throughput": 8666.0, "total_tokens": 91984656} +{"current_steps": 136505, "total_steps": 204665, "loss": 0.055, "lr": 6.029732653601531e-07, "epoch": 3.3348398602594482, "percentage": 66.7, "elapsed_time": "2:56:54", "remaining_time": "1:28:20", "throughput": 8666.01, "total_tokens": 91987664} +{"current_steps": 136510, "total_steps": 204665, "loss": 0.0001, "lr": 6.028949984223066e-07, "epoch": 3.3349620110912954, "percentage": 66.7, "elapsed_time": "2:56:55", "remaining_time": "1:28:19", "throughput": 8666.05, "total_tokens": 91991120} +{"current_steps": 136515, "total_steps": 204665, "loss": 0.0645, "lr": 6.028167343723142e-07, "epoch": 3.3350841619231426, "percentage": 66.7, "elapsed_time": "2:56:55", "remaining_time": "1:28:19", "throughput": 8666.06, "total_tokens": 91994064} +{"current_steps": 136520, "total_steps": 204665, "loss": 0.0, "lr": 6.027384732107455e-07, "epoch": 3.33520631275499, "percentage": 66.7, "elapsed_time": "2:56:55", "remaining_time": "1:28:18", "throughput": 8666.06, "total_tokens": 91997072} +{"current_steps": 136525, "total_steps": 204665, "loss": 0.0, "lr": 6.026602149381694e-07, "epoch": 3.335328463586837, "percentage": 66.71, "elapsed_time": "2:56:56", "remaining_time": "1:28:18", "throughput": 8666.06, "total_tokens": 91999952} +{"current_steps": 136530, "total_steps": 204665, "loss": 0.0, "lr": 6.025819595551551e-07, "epoch": 3.335450614418684, "percentage": 66.71, "elapsed_time": "2:56:56", "remaining_time": "1:28:18", "throughput": 8666.12, "total_tokens": 92003664} +{"current_steps": 136535, "total_steps": 204665, "loss": 0.0406, "lr": 6.025037070622719e-07, "epoch": 3.3355727652505314, "percentage": 66.71, "elapsed_time": "2:56:56", "remaining_time": "1:28:17", "throughput": 8666.15, "total_tokens": 92006928} +{"current_steps": 136540, "total_steps": 204665, "loss": 0.0, "lr": 6.024254574600884e-07, "epoch": 3.3356949160823786, "percentage": 66.71, "elapsed_time": "2:56:57", "remaining_time": "1:28:17", "throughput": 8666.18, "total_tokens": 92010320} +{"current_steps": 136545, "total_steps": 204665, "loss": 0.0, "lr": 6.02347210749174e-07, "epoch": 3.3358170669142257, "percentage": 66.72, "elapsed_time": "2:56:57", "remaining_time": "1:28:16", "throughput": 8666.2, "total_tokens": 92013456} +{"current_steps": 136550, "total_steps": 204665, "loss": 0.0662, "lr": 6.022689669300976e-07, "epoch": 3.335939217746073, "percentage": 66.72, "elapsed_time": "2:56:57", "remaining_time": "1:28:16", "throughput": 8666.25, "total_tokens": 92016976} +{"current_steps": 136555, "total_steps": 204665, "loss": 0.0001, "lr": 6.02190726003428e-07, "epoch": 3.33606136857792, "percentage": 66.72, "elapsed_time": "2:56:58", "remaining_time": "1:28:16", "throughput": 8666.28, "total_tokens": 92020368} +{"current_steps": 136560, "total_steps": 204665, "loss": 0.0, "lr": 6.021124879697348e-07, "epoch": 3.3361835194097673, "percentage": 66.72, "elapsed_time": "2:56:58", "remaining_time": "1:28:15", "throughput": 8666.28, "total_tokens": 92023376} +{"current_steps": 136565, "total_steps": 204665, "loss": 0.0, "lr": 6.020342528295863e-07, "epoch": 3.3363056702416145, "percentage": 66.73, "elapsed_time": "2:56:58", "remaining_time": "1:28:15", "throughput": 8666.3, "total_tokens": 92026512} +{"current_steps": 136570, "total_steps": 204665, "loss": 0.0, "lr": 6.019560205835522e-07, "epoch": 3.3364278210734617, "percentage": 66.73, "elapsed_time": "2:56:59", "remaining_time": "1:28:14", "throughput": 8666.31, "total_tokens": 92029520} +{"current_steps": 136575, "total_steps": 204665, "loss": 0.0002, "lr": 6.018777912322007e-07, "epoch": 3.336549971905309, "percentage": 66.73, "elapsed_time": "2:56:59", "remaining_time": "1:28:14", "throughput": 8666.34, "total_tokens": 92032784} +{"current_steps": 136580, "total_steps": 204665, "loss": 0.0, "lr": 6.01799564776101e-07, "epoch": 3.3366721227371556, "percentage": 66.73, "elapsed_time": "2:56:59", "remaining_time": "1:28:14", "throughput": 8666.39, "total_tokens": 92036432} +{"current_steps": 136585, "total_steps": 204665, "loss": 0.0, "lr": 6.017213412158221e-07, "epoch": 3.336794273569003, "percentage": 66.74, "elapsed_time": "2:57:00", "remaining_time": "1:28:13", "throughput": 8666.45, "total_tokens": 92040080} +{"current_steps": 136590, "total_steps": 204665, "loss": 0.0001, "lr": 6.016431205519323e-07, "epoch": 3.33691642440085, "percentage": 66.74, "elapsed_time": "2:57:00", "remaining_time": "1:28:13", "throughput": 8666.53, "total_tokens": 92044112} +{"current_steps": 136595, "total_steps": 204665, "loss": 0.0002, "lr": 6.015649027850015e-07, "epoch": 3.337038575232697, "percentage": 66.74, "elapsed_time": "2:57:00", "remaining_time": "1:28:12", "throughput": 8666.57, "total_tokens": 92047504} +{"current_steps": 136600, "total_steps": 204665, "loss": 0.0, "lr": 6.014866879155973e-07, "epoch": 3.3371607260645444, "percentage": 66.74, "elapsed_time": "2:57:01", "remaining_time": "1:28:12", "throughput": 8666.59, "total_tokens": 92050704} +{"current_steps": 136605, "total_steps": 204665, "loss": 0.0, "lr": 6.014084759442897e-07, "epoch": 3.3372828768963916, "percentage": 66.75, "elapsed_time": "2:57:01", "remaining_time": "1:28:11", "throughput": 8666.65, "total_tokens": 92054416} +{"current_steps": 136610, "total_steps": 204665, "loss": 0.0, "lr": 6.013302668716462e-07, "epoch": 3.3374050277282388, "percentage": 66.75, "elapsed_time": "2:57:02", "remaining_time": "1:28:11", "throughput": 8666.68, "total_tokens": 92057808} +{"current_steps": 136615, "total_steps": 204665, "loss": 0.0131, "lr": 6.012520606982365e-07, "epoch": 3.337527178560086, "percentage": 66.75, "elapsed_time": "2:57:02", "remaining_time": "1:28:11", "throughput": 8666.73, "total_tokens": 92061328} +{"current_steps": 136620, "total_steps": 204665, "loss": 0.0003, "lr": 6.011738574246294e-07, "epoch": 3.337649329391933, "percentage": 66.75, "elapsed_time": "2:57:02", "remaining_time": "1:28:10", "throughput": 8666.78, "total_tokens": 92064912} +{"current_steps": 136625, "total_steps": 204665, "loss": 0.0, "lr": 6.010956570513927e-07, "epoch": 3.3377714802237803, "percentage": 66.76, "elapsed_time": "2:57:03", "remaining_time": "1:28:10", "throughput": 8666.8, "total_tokens": 92068112} +{"current_steps": 136630, "total_steps": 204665, "loss": 0.0, "lr": 6.010174595790963e-07, "epoch": 3.3378936310556275, "percentage": 66.76, "elapsed_time": "2:57:03", "remaining_time": "1:28:09", "throughput": 8666.83, "total_tokens": 92071440} +{"current_steps": 136635, "total_steps": 204665, "loss": 0.2199, "lr": 6.009392650083079e-07, "epoch": 3.3380157818874747, "percentage": 66.76, "elapsed_time": "2:57:03", "remaining_time": "1:28:09", "throughput": 8666.88, "total_tokens": 92075024} +{"current_steps": 136640, "total_steps": 204665, "loss": 0.0284, "lr": 6.008610733395965e-07, "epoch": 3.338137932719322, "percentage": 66.76, "elapsed_time": "2:57:04", "remaining_time": "1:28:09", "throughput": 8666.91, "total_tokens": 92078352} +{"current_steps": 136645, "total_steps": 204665, "loss": 0.0, "lr": 6.007828845735308e-07, "epoch": 3.338260083551169, "percentage": 66.77, "elapsed_time": "2:57:04", "remaining_time": "1:28:08", "throughput": 8666.98, "total_tokens": 92082192} +{"current_steps": 136650, "total_steps": 204665, "loss": 0.0, "lr": 6.007046987106792e-07, "epoch": 3.3383822343830163, "percentage": 66.77, "elapsed_time": "2:57:04", "remaining_time": "1:28:08", "throughput": 8666.98, "total_tokens": 92085136} +{"current_steps": 136655, "total_steps": 204665, "loss": 0.0, "lr": 6.006265157516106e-07, "epoch": 3.3385043852148635, "percentage": 66.77, "elapsed_time": "2:57:05", "remaining_time": "1:28:07", "throughput": 8667.01, "total_tokens": 92088400} +{"current_steps": 136660, "total_steps": 204665, "loss": 0.0774, "lr": 6.005483356968932e-07, "epoch": 3.3386265360467107, "percentage": 66.77, "elapsed_time": "2:57:05", "remaining_time": "1:28:07", "throughput": 8667.0, "total_tokens": 92091216} +{"current_steps": 136665, "total_steps": 204665, "loss": 0.0, "lr": 6.004701585470961e-07, "epoch": 3.3387486868785574, "percentage": 66.77, "elapsed_time": "2:57:05", "remaining_time": "1:28:07", "throughput": 8667.03, "total_tokens": 92094544} +{"current_steps": 136670, "total_steps": 204665, "loss": 0.0, "lr": 6.00391984302787e-07, "epoch": 3.338870837710405, "percentage": 66.78, "elapsed_time": "2:57:06", "remaining_time": "1:28:06", "throughput": 8667.07, "total_tokens": 92098000} +{"current_steps": 136675, "total_steps": 204665, "loss": 0.0, "lr": 6.003138129645353e-07, "epoch": 3.338992988542252, "percentage": 66.78, "elapsed_time": "2:57:06", "remaining_time": "1:28:06", "throughput": 8667.09, "total_tokens": 92101200} +{"current_steps": 136680, "total_steps": 204665, "loss": 0.0, "lr": 6.002356445329088e-07, "epoch": 3.339115139374099, "percentage": 66.78, "elapsed_time": "2:57:06", "remaining_time": "1:28:05", "throughput": 8667.15, "total_tokens": 92104848} +{"current_steps": 136685, "total_steps": 204665, "loss": 0.0, "lr": 6.001574790084763e-07, "epoch": 3.339237290205946, "percentage": 66.78, "elapsed_time": "2:57:07", "remaining_time": "1:28:05", "throughput": 8667.18, "total_tokens": 92108240} +{"current_steps": 136690, "total_steps": 204665, "loss": 0.0, "lr": 6.000793163918063e-07, "epoch": 3.3393594410377934, "percentage": 66.79, "elapsed_time": "2:57:07", "remaining_time": "1:28:05", "throughput": 8667.22, "total_tokens": 92111696} +{"current_steps": 136695, "total_steps": 204665, "loss": 0.0, "lr": 6.000011566834667e-07, "epoch": 3.3394815918696406, "percentage": 66.79, "elapsed_time": "2:57:07", "remaining_time": "1:28:04", "throughput": 8667.26, "total_tokens": 92115152} +{"current_steps": 136700, "total_steps": 204665, "loss": 0.0, "lr": 5.999229998840268e-07, "epoch": 3.3396037427014877, "percentage": 66.79, "elapsed_time": "2:57:08", "remaining_time": "1:28:04", "throughput": 8667.28, "total_tokens": 92118352} +{"current_steps": 136705, "total_steps": 204665, "loss": 0.0001, "lr": 5.998448459940539e-07, "epoch": 3.339725893533335, "percentage": 66.79, "elapsed_time": "2:57:08", "remaining_time": "1:28:03", "throughput": 8667.32, "total_tokens": 92121744} +{"current_steps": 136710, "total_steps": 204665, "loss": 0.0, "lr": 5.997666950141169e-07, "epoch": 3.339848044365182, "percentage": 66.8, "elapsed_time": "2:57:08", "remaining_time": "1:28:03", "throughput": 8667.32, "total_tokens": 92124752} +{"current_steps": 136715, "total_steps": 204665, "loss": 0.0, "lr": 5.996885469447847e-07, "epoch": 3.3399701951970293, "percentage": 66.8, "elapsed_time": "2:57:09", "remaining_time": "1:28:02", "throughput": 8667.38, "total_tokens": 92128336} +{"current_steps": 136720, "total_steps": 204665, "loss": 0.0918, "lr": 5.996104017866245e-07, "epoch": 3.3400923460288765, "percentage": 66.8, "elapsed_time": "2:57:09", "remaining_time": "1:28:02", "throughput": 8667.42, "total_tokens": 92131792} +{"current_steps": 136725, "total_steps": 204665, "loss": 0.0001, "lr": 5.995322595402057e-07, "epoch": 3.3402144968607237, "percentage": 66.8, "elapsed_time": "2:57:10", "remaining_time": "1:28:02", "throughput": 8667.45, "total_tokens": 92135056} +{"current_steps": 136730, "total_steps": 204665, "loss": 0.0, "lr": 5.994541202060955e-07, "epoch": 3.340336647692571, "percentage": 66.81, "elapsed_time": "2:57:10", "remaining_time": "1:28:01", "throughput": 8667.47, "total_tokens": 92138384} +{"current_steps": 136735, "total_steps": 204665, "loss": 0.0, "lr": 5.993759837848631e-07, "epoch": 3.340458798524418, "percentage": 66.81, "elapsed_time": "2:57:10", "remaining_time": "1:28:01", "throughput": 8667.56, "total_tokens": 92142480} +{"current_steps": 136740, "total_steps": 204665, "loss": 0.0, "lr": 5.99297850277076e-07, "epoch": 3.3405809493562653, "percentage": 66.81, "elapsed_time": "2:57:11", "remaining_time": "1:28:00", "throughput": 8667.59, "total_tokens": 92145744} +{"current_steps": 136745, "total_steps": 204665, "loss": 0.0, "lr": 5.992197196833026e-07, "epoch": 3.3407031001881125, "percentage": 66.81, "elapsed_time": "2:57:11", "remaining_time": "1:28:00", "throughput": 8667.63, "total_tokens": 92149264} +{"current_steps": 136750, "total_steps": 204665, "loss": 0.0009, "lr": 5.991415920041117e-07, "epoch": 3.340825251019959, "percentage": 66.82, "elapsed_time": "2:57:11", "remaining_time": "1:28:00", "throughput": 8667.67, "total_tokens": 92152656} +{"current_steps": 136755, "total_steps": 204665, "loss": 0.0902, "lr": 5.990634672400705e-07, "epoch": 3.340947401851807, "percentage": 66.82, "elapsed_time": "2:57:12", "remaining_time": "1:27:59", "throughput": 8667.69, "total_tokens": 92155920} +{"current_steps": 136760, "total_steps": 204665, "loss": 0.0001, "lr": 5.98985345391748e-07, "epoch": 3.3410695526836536, "percentage": 66.82, "elapsed_time": "2:57:12", "remaining_time": "1:27:59", "throughput": 8667.7, "total_tokens": 92158928} +{"current_steps": 136765, "total_steps": 204665, "loss": 0.0001, "lr": 5.989072264597115e-07, "epoch": 3.3411917035155008, "percentage": 66.82, "elapsed_time": "2:57:12", "remaining_time": "1:27:58", "throughput": 8667.71, "total_tokens": 92162000} +{"current_steps": 136770, "total_steps": 204665, "loss": 0.0, "lr": 5.988291104445296e-07, "epoch": 3.341313854347348, "percentage": 66.83, "elapsed_time": "2:57:13", "remaining_time": "1:27:58", "throughput": 8667.73, "total_tokens": 92165136} +{"current_steps": 136775, "total_steps": 204665, "loss": 0.0, "lr": 5.987509973467706e-07, "epoch": 3.341436005179195, "percentage": 66.83, "elapsed_time": "2:57:13", "remaining_time": "1:27:58", "throughput": 8667.8, "total_tokens": 92168976} +{"current_steps": 136780, "total_steps": 204665, "loss": 0.0235, "lr": 5.98672887167002e-07, "epoch": 3.3415581560110423, "percentage": 66.83, "elapsed_time": "2:57:13", "remaining_time": "1:27:57", "throughput": 8667.82, "total_tokens": 92172176} +{"current_steps": 136785, "total_steps": 204665, "loss": 0.0001, "lr": 5.985947799057924e-07, "epoch": 3.3416803068428895, "percentage": 66.83, "elapsed_time": "2:57:14", "remaining_time": "1:27:57", "throughput": 8667.85, "total_tokens": 92175504} +{"current_steps": 136790, "total_steps": 204665, "loss": 0.0002, "lr": 5.985166755637092e-07, "epoch": 3.3418024576747367, "percentage": 66.84, "elapsed_time": "2:57:14", "remaining_time": "1:27:56", "throughput": 8667.87, "total_tokens": 92178768} +{"current_steps": 136795, "total_steps": 204665, "loss": 0.0414, "lr": 5.984385741413209e-07, "epoch": 3.341924608506584, "percentage": 66.84, "elapsed_time": "2:57:14", "remaining_time": "1:27:56", "throughput": 8667.89, "total_tokens": 92181904} +{"current_steps": 136800, "total_steps": 204665, "loss": 0.1131, "lr": 5.983604756391954e-07, "epoch": 3.342046759338431, "percentage": 66.84, "elapsed_time": "2:57:15", "remaining_time": "1:27:56", "throughput": 8667.94, "total_tokens": 92185488} +{"current_steps": 136805, "total_steps": 204665, "loss": 0.0001, "lr": 5.982823800579002e-07, "epoch": 3.3421689101702783, "percentage": 66.84, "elapsed_time": "2:57:15", "remaining_time": "1:27:55", "throughput": 8668.03, "total_tokens": 92189584} +{"current_steps": 136810, "total_steps": 204665, "loss": 0.0001, "lr": 5.98204287398004e-07, "epoch": 3.3422910610021255, "percentage": 66.85, "elapsed_time": "2:57:15", "remaining_time": "1:27:55", "throughput": 8668.13, "total_tokens": 92193808} +{"current_steps": 136815, "total_steps": 204665, "loss": 0.0, "lr": 5.981261976600738e-07, "epoch": 3.3424132118339727, "percentage": 66.85, "elapsed_time": "2:57:16", "remaining_time": "1:27:54", "throughput": 8668.15, "total_tokens": 92197072} +{"current_steps": 136820, "total_steps": 204665, "loss": 0.0, "lr": 5.980481108446786e-07, "epoch": 3.34253536266582, "percentage": 66.85, "elapsed_time": "2:57:16", "remaining_time": "1:27:54", "throughput": 8668.25, "total_tokens": 92201296} +{"current_steps": 136825, "total_steps": 204665, "loss": 0.0001, "lr": 5.97970026952385e-07, "epoch": 3.342657513497667, "percentage": 66.85, "elapsed_time": "2:57:17", "remaining_time": "1:27:53", "throughput": 8668.27, "total_tokens": 92204496} +{"current_steps": 136830, "total_steps": 204665, "loss": 0.0001, "lr": 5.978919459837621e-07, "epoch": 3.3427796643295142, "percentage": 66.86, "elapsed_time": "2:57:17", "remaining_time": "1:27:53", "throughput": 8668.31, "total_tokens": 92207888} +{"current_steps": 136835, "total_steps": 204665, "loss": 0.0, "lr": 5.978138679393766e-07, "epoch": 3.3429018151613614, "percentage": 66.86, "elapsed_time": "2:57:17", "remaining_time": "1:27:53", "throughput": 8668.32, "total_tokens": 92210896} +{"current_steps": 136840, "total_steps": 204665, "loss": 0.0, "lr": 5.977357928197971e-07, "epoch": 3.3430239659932086, "percentage": 66.86, "elapsed_time": "2:57:18", "remaining_time": "1:27:52", "throughput": 8668.32, "total_tokens": 92213904} +{"current_steps": 136845, "total_steps": 204665, "loss": 0.0, "lr": 5.976577206255913e-07, "epoch": 3.3431461168250554, "percentage": 66.86, "elapsed_time": "2:57:18", "remaining_time": "1:27:52", "throughput": 8668.35, "total_tokens": 92217232} +{"current_steps": 136850, "total_steps": 204665, "loss": 0.0002, "lr": 5.975796513573263e-07, "epoch": 3.343268267656903, "percentage": 66.87, "elapsed_time": "2:57:18", "remaining_time": "1:27:51", "throughput": 8668.36, "total_tokens": 92220240} +{"current_steps": 136855, "total_steps": 204665, "loss": 0.0, "lr": 5.975015850155708e-07, "epoch": 3.3433904184887497, "percentage": 66.87, "elapsed_time": "2:57:19", "remaining_time": "1:27:51", "throughput": 8668.42, "total_tokens": 92224016} +{"current_steps": 136860, "total_steps": 204665, "loss": 0.0, "lr": 5.974235216008916e-07, "epoch": 3.343512569320597, "percentage": 66.87, "elapsed_time": "2:57:19", "remaining_time": "1:27:51", "throughput": 8668.46, "total_tokens": 92227408} +{"current_steps": 136865, "total_steps": 204665, "loss": 0.0001, "lr": 5.973454611138568e-07, "epoch": 3.343634720152444, "percentage": 66.87, "elapsed_time": "2:57:19", "remaining_time": "1:27:50", "throughput": 8668.47, "total_tokens": 92230480} +{"current_steps": 136870, "total_steps": 204665, "loss": 0.0921, "lr": 5.972674035550345e-07, "epoch": 3.3437568709842913, "percentage": 66.88, "elapsed_time": "2:57:20", "remaining_time": "1:27:50", "throughput": 8668.58, "total_tokens": 92234832} +{"current_steps": 136875, "total_steps": 204665, "loss": 0.0001, "lr": 5.971893489249917e-07, "epoch": 3.3438790218161385, "percentage": 66.88, "elapsed_time": "2:57:20", "remaining_time": "1:27:49", "throughput": 8668.61, "total_tokens": 92238224} +{"current_steps": 136880, "total_steps": 204665, "loss": 0.0, "lr": 5.971112972242966e-07, "epoch": 3.3440011726479857, "percentage": 66.88, "elapsed_time": "2:57:20", "remaining_time": "1:27:49", "throughput": 8668.61, "total_tokens": 92241104} +{"current_steps": 136885, "total_steps": 204665, "loss": 0.0, "lr": 5.970332484535161e-07, "epoch": 3.344123323479833, "percentage": 66.88, "elapsed_time": "2:57:21", "remaining_time": "1:27:49", "throughput": 8668.61, "total_tokens": 92244112} +{"current_steps": 136890, "total_steps": 204665, "loss": 0.0, "lr": 5.969552026132186e-07, "epoch": 3.34424547431168, "percentage": 66.88, "elapsed_time": "2:57:21", "remaining_time": "1:27:48", "throughput": 8668.64, "total_tokens": 92247376} +{"current_steps": 136895, "total_steps": 204665, "loss": 0.0, "lr": 5.968771597039711e-07, "epoch": 3.3443676251435273, "percentage": 66.89, "elapsed_time": "2:57:21", "remaining_time": "1:27:48", "throughput": 8668.69, "total_tokens": 92250960} +{"current_steps": 136900, "total_steps": 204665, "loss": 0.0, "lr": 5.967991197263412e-07, "epoch": 3.3444897759753744, "percentage": 66.89, "elapsed_time": "2:57:22", "remaining_time": "1:27:47", "throughput": 8668.7, "total_tokens": 92254096} +{"current_steps": 136905, "total_steps": 204665, "loss": 0.0, "lr": 5.967210826808968e-07, "epoch": 3.3446119268072216, "percentage": 66.89, "elapsed_time": "2:57:22", "remaining_time": "1:27:47", "throughput": 8668.75, "total_tokens": 92257680} +{"current_steps": 136910, "total_steps": 204665, "loss": 0.0, "lr": 5.966430485682048e-07, "epoch": 3.344734077639069, "percentage": 66.89, "elapsed_time": "2:57:22", "remaining_time": "1:27:47", "throughput": 8668.76, "total_tokens": 92260688} +{"current_steps": 136915, "total_steps": 204665, "loss": 0.0, "lr": 5.965650173888334e-07, "epoch": 3.344856228470916, "percentage": 66.9, "elapsed_time": "2:57:23", "remaining_time": "1:27:46", "throughput": 8668.85, "total_tokens": 92264784} +{"current_steps": 136920, "total_steps": 204665, "loss": 0.0, "lr": 5.964869891433494e-07, "epoch": 3.344978379302763, "percentage": 66.9, "elapsed_time": "2:57:23", "remaining_time": "1:27:46", "throughput": 8668.89, "total_tokens": 92268240} +{"current_steps": 136925, "total_steps": 204665, "loss": 0.0, "lr": 5.964089638323204e-07, "epoch": 3.3451005301346104, "percentage": 66.9, "elapsed_time": "2:57:23", "remaining_time": "1:27:45", "throughput": 8668.93, "total_tokens": 92271632} +{"current_steps": 136930, "total_steps": 204665, "loss": 0.0001, "lr": 5.963309414563146e-07, "epoch": 3.345222680966457, "percentage": 66.9, "elapsed_time": "2:57:24", "remaining_time": "1:27:45", "throughput": 8668.98, "total_tokens": 92275280} +{"current_steps": 136935, "total_steps": 204665, "loss": 0.0, "lr": 5.962529220158983e-07, "epoch": 3.3453448317983048, "percentage": 66.91, "elapsed_time": "2:57:24", "remaining_time": "1:27:44", "throughput": 8669.01, "total_tokens": 92278672} +{"current_steps": 136940, "total_steps": 204665, "loss": 0.0, "lr": 5.961749055116396e-07, "epoch": 3.3454669826301515, "percentage": 66.91, "elapsed_time": "2:57:25", "remaining_time": "1:27:44", "throughput": 8669.06, "total_tokens": 92282192} +{"current_steps": 136945, "total_steps": 204665, "loss": 0.0, "lr": 5.960968919441055e-07, "epoch": 3.3455891334619987, "percentage": 66.91, "elapsed_time": "2:57:25", "remaining_time": "1:27:44", "throughput": 8669.12, "total_tokens": 92285904} +{"current_steps": 136950, "total_steps": 204665, "loss": 0.0001, "lr": 5.960188813138634e-07, "epoch": 3.345711284293846, "percentage": 66.91, "elapsed_time": "2:57:25", "remaining_time": "1:27:43", "throughput": 8669.14, "total_tokens": 92289104} +{"current_steps": 136955, "total_steps": 204665, "loss": 0.0001, "lr": 5.959408736214807e-07, "epoch": 3.345833435125693, "percentage": 66.92, "elapsed_time": "2:57:26", "remaining_time": "1:27:43", "throughput": 8669.17, "total_tokens": 92292368} +{"current_steps": 136960, "total_steps": 204665, "loss": 0.0, "lr": 5.958628688675244e-07, "epoch": 3.3459555859575403, "percentage": 66.92, "elapsed_time": "2:57:26", "remaining_time": "1:27:42", "throughput": 8669.19, "total_tokens": 92295632} +{"current_steps": 136965, "total_steps": 204665, "loss": 0.0, "lr": 5.957848670525624e-07, "epoch": 3.3460777367893875, "percentage": 66.92, "elapsed_time": "2:57:26", "remaining_time": "1:27:42", "throughput": 8669.26, "total_tokens": 92299536} +{"current_steps": 136970, "total_steps": 204665, "loss": 0.0006, "lr": 5.957068681771613e-07, "epoch": 3.3461998876212347, "percentage": 66.92, "elapsed_time": "2:57:27", "remaining_time": "1:27:42", "throughput": 8669.27, "total_tokens": 92302608} +{"current_steps": 136975, "total_steps": 204665, "loss": 0.0, "lr": 5.95628872241889e-07, "epoch": 3.346322038453082, "percentage": 66.93, "elapsed_time": "2:57:27", "remaining_time": "1:27:41", "throughput": 8669.31, "total_tokens": 92306000} +{"current_steps": 136980, "total_steps": 204665, "loss": 0.0, "lr": 5.955508792473118e-07, "epoch": 3.346444189284929, "percentage": 66.93, "elapsed_time": "2:57:27", "remaining_time": "1:27:41", "throughput": 8669.35, "total_tokens": 92309456} +{"current_steps": 136985, "total_steps": 204665, "loss": 0.0, "lr": 5.954728891939977e-07, "epoch": 3.3465663401167762, "percentage": 66.93, "elapsed_time": "2:57:28", "remaining_time": "1:27:40", "throughput": 8669.43, "total_tokens": 92313488} +{"current_steps": 136990, "total_steps": 204665, "loss": 0.0, "lr": 5.953949020825133e-07, "epoch": 3.3466884909486234, "percentage": 66.93, "elapsed_time": "2:57:28", "remaining_time": "1:27:40", "throughput": 8669.45, "total_tokens": 92316688} +{"current_steps": 136995, "total_steps": 204665, "loss": 0.0, "lr": 5.95316917913426e-07, "epoch": 3.3468106417804706, "percentage": 66.94, "elapsed_time": "2:57:28", "remaining_time": "1:27:40", "throughput": 8669.49, "total_tokens": 92320080} +{"current_steps": 137000, "total_steps": 204665, "loss": 0.0, "lr": 5.952389366873034e-07, "epoch": 3.346932792612318, "percentage": 66.94, "elapsed_time": "2:57:29", "remaining_time": "1:27:39", "throughput": 8669.54, "total_tokens": 92323664} +{"current_steps": 137005, "total_steps": 204665, "loss": 0.0, "lr": 5.951609584047117e-07, "epoch": 3.347054943444165, "percentage": 66.94, "elapsed_time": "2:57:29", "remaining_time": "1:27:39", "throughput": 8669.6, "total_tokens": 92327376} +{"current_steps": 137010, "total_steps": 204665, "loss": 0.0, "lr": 5.950829830662186e-07, "epoch": 3.347177094276012, "percentage": 66.94, "elapsed_time": "2:57:29", "remaining_time": "1:27:38", "throughput": 8669.61, "total_tokens": 92330448} +{"current_steps": 137015, "total_steps": 204665, "loss": 0.0, "lr": 5.950050106723907e-07, "epoch": 3.3472992451078594, "percentage": 66.95, "elapsed_time": "2:57:30", "remaining_time": "1:27:38", "throughput": 8669.67, "total_tokens": 92334096} +{"current_steps": 137020, "total_steps": 204665, "loss": 0.039, "lr": 5.949270412237953e-07, "epoch": 3.3474213959397066, "percentage": 66.95, "elapsed_time": "2:57:30", "remaining_time": "1:27:38", "throughput": 8669.7, "total_tokens": 92337488} +{"current_steps": 137025, "total_steps": 204665, "loss": 0.043, "lr": 5.948490747209997e-07, "epoch": 3.3475435467715533, "percentage": 66.95, "elapsed_time": "2:57:30", "remaining_time": "1:27:37", "throughput": 8669.75, "total_tokens": 92341008} +{"current_steps": 137030, "total_steps": 204665, "loss": 0.0001, "lr": 5.947711111645703e-07, "epoch": 3.3476656976034005, "percentage": 66.95, "elapsed_time": "2:57:31", "remaining_time": "1:27:37", "throughput": 8669.76, "total_tokens": 92344016} +{"current_steps": 137035, "total_steps": 204665, "loss": 0.0, "lr": 5.946931505550746e-07, "epoch": 3.3477878484352477, "percentage": 66.96, "elapsed_time": "2:57:31", "remaining_time": "1:27:36", "throughput": 8669.79, "total_tokens": 92347408} +{"current_steps": 137040, "total_steps": 204665, "loss": 0.0, "lr": 5.946151928930792e-07, "epoch": 3.347909999267095, "percentage": 66.96, "elapsed_time": "2:57:31", "remaining_time": "1:27:36", "throughput": 8669.85, "total_tokens": 92351120} +{"current_steps": 137045, "total_steps": 204665, "loss": 0.0, "lr": 5.945372381791513e-07, "epoch": 3.348032150098942, "percentage": 66.96, "elapsed_time": "2:57:32", "remaining_time": "1:27:36", "throughput": 8669.94, "total_tokens": 92355216} +{"current_steps": 137050, "total_steps": 204665, "loss": 0.0, "lr": 5.944592864138575e-07, "epoch": 3.3481543009307893, "percentage": 66.96, "elapsed_time": "2:57:32", "remaining_time": "1:27:35", "throughput": 8669.95, "total_tokens": 92358288} +{"current_steps": 137055, "total_steps": 204665, "loss": 0.0, "lr": 5.943813375977647e-07, "epoch": 3.3482764517626364, "percentage": 66.97, "elapsed_time": "2:57:33", "remaining_time": "1:27:35", "throughput": 8669.96, "total_tokens": 92361296} +{"current_steps": 137060, "total_steps": 204665, "loss": 0.0, "lr": 5.943033917314404e-07, "epoch": 3.3483986025944836, "percentage": 66.97, "elapsed_time": "2:57:33", "remaining_time": "1:27:34", "throughput": 8669.98, "total_tokens": 92364560} +{"current_steps": 137065, "total_steps": 204665, "loss": 0.0, "lr": 5.942254488154504e-07, "epoch": 3.348520753426331, "percentage": 66.97, "elapsed_time": "2:57:33", "remaining_time": "1:27:34", "throughput": 8670.03, "total_tokens": 92368080} +{"current_steps": 137070, "total_steps": 204665, "loss": 0.0, "lr": 5.941475088503627e-07, "epoch": 3.348642904258178, "percentage": 66.97, "elapsed_time": "2:57:34", "remaining_time": "1:27:33", "throughput": 8670.06, "total_tokens": 92371408} +{"current_steps": 137075, "total_steps": 204665, "loss": 0.0, "lr": 5.940695718367428e-07, "epoch": 3.348765055090025, "percentage": 66.98, "elapsed_time": "2:57:34", "remaining_time": "1:27:33", "throughput": 8670.06, "total_tokens": 92374416} +{"current_steps": 137080, "total_steps": 204665, "loss": 0.0, "lr": 5.939916377751584e-07, "epoch": 3.3488872059218724, "percentage": 66.98, "elapsed_time": "2:57:34", "remaining_time": "1:27:33", "throughput": 8670.13, "total_tokens": 92378256} +{"current_steps": 137085, "total_steps": 204665, "loss": 0.0418, "lr": 5.939137066661763e-07, "epoch": 3.3490093567537196, "percentage": 66.98, "elapsed_time": "2:57:35", "remaining_time": "1:27:32", "throughput": 8670.16, "total_tokens": 92381584} +{"current_steps": 137090, "total_steps": 204665, "loss": 0.0, "lr": 5.938357785103625e-07, "epoch": 3.3491315075855668, "percentage": 66.98, "elapsed_time": "2:57:35", "remaining_time": "1:27:32", "throughput": 8670.19, "total_tokens": 92384912} +{"current_steps": 137095, "total_steps": 204665, "loss": 0.0, "lr": 5.937578533082846e-07, "epoch": 3.349253658417414, "percentage": 66.99, "elapsed_time": "2:57:35", "remaining_time": "1:27:31", "throughput": 8670.26, "total_tokens": 92388752} +{"current_steps": 137100, "total_steps": 204665, "loss": 0.0, "lr": 5.936799310605087e-07, "epoch": 3.349375809249261, "percentage": 66.99, "elapsed_time": "2:57:36", "remaining_time": "1:27:31", "throughput": 8670.26, "total_tokens": 92391696} +{"current_steps": 137105, "total_steps": 204665, "loss": 0.0001, "lr": 5.936020117676015e-07, "epoch": 3.3494979600811083, "percentage": 66.99, "elapsed_time": "2:57:36", "remaining_time": "1:27:31", "throughput": 8670.3, "total_tokens": 92395088} +{"current_steps": 137110, "total_steps": 204665, "loss": 0.0, "lr": 5.9352409543013e-07, "epoch": 3.349620110912955, "percentage": 66.99, "elapsed_time": "2:57:36", "remaining_time": "1:27:30", "throughput": 8670.34, "total_tokens": 92398608} +{"current_steps": 137115, "total_steps": 204665, "loss": 0.0, "lr": 5.934461820486603e-07, "epoch": 3.3497422617448027, "percentage": 66.99, "elapsed_time": "2:57:37", "remaining_time": "1:27:30", "throughput": 8670.39, "total_tokens": 92402064} +{"current_steps": 137120, "total_steps": 204665, "loss": 0.0, "lr": 5.933682716237596e-07, "epoch": 3.3498644125766495, "percentage": 67.0, "elapsed_time": "2:57:37", "remaining_time": "1:27:29", "throughput": 8670.43, "total_tokens": 92405520} +{"current_steps": 137125, "total_steps": 204665, "loss": 0.0007, "lr": 5.932903641559939e-07, "epoch": 3.3499865634084967, "percentage": 67.0, "elapsed_time": "2:57:37", "remaining_time": "1:27:29", "throughput": 8670.47, "total_tokens": 92409104} +{"current_steps": 137130, "total_steps": 204665, "loss": 0.0, "lr": 5.932124596459305e-07, "epoch": 3.350108714240344, "percentage": 67.0, "elapsed_time": "2:57:38", "remaining_time": "1:27:29", "throughput": 8670.5, "total_tokens": 92412432} +{"current_steps": 137135, "total_steps": 204665, "loss": 0.0, "lr": 5.93134558094135e-07, "epoch": 3.350230865072191, "percentage": 67.0, "elapsed_time": "2:57:38", "remaining_time": "1:27:28", "throughput": 8670.55, "total_tokens": 92416016} +{"current_steps": 137140, "total_steps": 204665, "loss": 0.0, "lr": 5.930566595011749e-07, "epoch": 3.3503530159040382, "percentage": 67.01, "elapsed_time": "2:57:38", "remaining_time": "1:27:28", "throughput": 8670.56, "total_tokens": 92419152} +{"current_steps": 137145, "total_steps": 204665, "loss": 0.0001, "lr": 5.929787638676158e-07, "epoch": 3.3504751667358854, "percentage": 67.01, "elapsed_time": "2:57:39", "remaining_time": "1:27:27", "throughput": 8670.58, "total_tokens": 92422224} +{"current_steps": 137150, "total_steps": 204665, "loss": 0.0001, "lr": 5.929008711940249e-07, "epoch": 3.3505973175677326, "percentage": 67.01, "elapsed_time": "2:57:39", "remaining_time": "1:27:27", "throughput": 8670.63, "total_tokens": 92425872} +{"current_steps": 137155, "total_steps": 204665, "loss": 0.0001, "lr": 5.928229814809684e-07, "epoch": 3.35071946839958, "percentage": 67.01, "elapsed_time": "2:57:40", "remaining_time": "1:27:27", "throughput": 8670.69, "total_tokens": 92429584} +{"current_steps": 137160, "total_steps": 204665, "loss": 0.0001, "lr": 5.927450947290125e-07, "epoch": 3.350841619231427, "percentage": 67.02, "elapsed_time": "2:57:40", "remaining_time": "1:27:26", "throughput": 8670.7, "total_tokens": 92432592} +{"current_steps": 137165, "total_steps": 204665, "loss": 0.0204, "lr": 5.926672109387241e-07, "epoch": 3.350963770063274, "percentage": 67.02, "elapsed_time": "2:57:40", "remaining_time": "1:27:26", "throughput": 8670.75, "total_tokens": 92436304} +{"current_steps": 137170, "total_steps": 204665, "loss": 0.0, "lr": 5.925893301106688e-07, "epoch": 3.3510859208951214, "percentage": 67.02, "elapsed_time": "2:57:41", "remaining_time": "1:27:25", "throughput": 8670.79, "total_tokens": 92439760} +{"current_steps": 137175, "total_steps": 204665, "loss": 0.0001, "lr": 5.925114522454136e-07, "epoch": 3.3512080717269686, "percentage": 67.02, "elapsed_time": "2:57:41", "remaining_time": "1:27:25", "throughput": 8670.81, "total_tokens": 92442896} +{"current_steps": 137180, "total_steps": 204665, "loss": 0.0, "lr": 5.924335773435251e-07, "epoch": 3.3513302225588157, "percentage": 67.03, "elapsed_time": "2:57:41", "remaining_time": "1:27:24", "throughput": 8670.8, "total_tokens": 92445712} +{"current_steps": 137185, "total_steps": 204665, "loss": 0.0, "lr": 5.923557054055688e-07, "epoch": 3.351452373390663, "percentage": 67.03, "elapsed_time": "2:57:42", "remaining_time": "1:27:24", "throughput": 8670.81, "total_tokens": 92448848} +{"current_steps": 137190, "total_steps": 204665, "loss": 0.0, "lr": 5.922778364321119e-07, "epoch": 3.35157452422251, "percentage": 67.03, "elapsed_time": "2:57:42", "remaining_time": "1:27:24", "throughput": 8670.83, "total_tokens": 92452048} +{"current_steps": 137195, "total_steps": 204665, "loss": 0.0, "lr": 5.921999704237197e-07, "epoch": 3.3516966750543573, "percentage": 67.03, "elapsed_time": "2:57:42", "remaining_time": "1:27:23", "throughput": 8670.84, "total_tokens": 92455056} +{"current_steps": 137200, "total_steps": 204665, "loss": 0.0, "lr": 5.921221073809596e-07, "epoch": 3.3518188258862045, "percentage": 67.04, "elapsed_time": "2:57:43", "remaining_time": "1:27:23", "throughput": 8670.89, "total_tokens": 92458704} +{"current_steps": 137205, "total_steps": 204665, "loss": 0.0004, "lr": 5.92044247304397e-07, "epoch": 3.3519409767180512, "percentage": 67.04, "elapsed_time": "2:57:43", "remaining_time": "1:27:22", "throughput": 8670.9, "total_tokens": 92461712} +{"current_steps": 137210, "total_steps": 204665, "loss": 0.0003, "lr": 5.919663901945982e-07, "epoch": 3.3520631275498984, "percentage": 67.04, "elapsed_time": "2:57:43", "remaining_time": "1:27:22", "throughput": 8670.9, "total_tokens": 92464720} +{"current_steps": 137215, "total_steps": 204665, "loss": 0.0, "lr": 5.918885360521297e-07, "epoch": 3.3521852783817456, "percentage": 67.04, "elapsed_time": "2:57:44", "remaining_time": "1:27:22", "throughput": 8670.91, "total_tokens": 92467728} +{"current_steps": 137220, "total_steps": 204665, "loss": 0.0, "lr": 5.918106848775574e-07, "epoch": 3.352307429213593, "percentage": 67.05, "elapsed_time": "2:57:44", "remaining_time": "1:27:21", "throughput": 8670.93, "total_tokens": 92470928} +{"current_steps": 137225, "total_steps": 204665, "loss": 0.0001, "lr": 5.917328366714479e-07, "epoch": 3.35242958004544, "percentage": 67.05, "elapsed_time": "2:57:44", "remaining_time": "1:27:21", "throughput": 8670.96, "total_tokens": 92474384} +{"current_steps": 137230, "total_steps": 204665, "loss": 0.0, "lr": 5.916549914343667e-07, "epoch": 3.352551730877287, "percentage": 67.05, "elapsed_time": "2:57:45", "remaining_time": "1:27:20", "throughput": 8670.99, "total_tokens": 92477776} +{"current_steps": 137235, "total_steps": 204665, "loss": 0.0838, "lr": 5.915771491668801e-07, "epoch": 3.3526738817091344, "percentage": 67.05, "elapsed_time": "2:57:45", "remaining_time": "1:27:20", "throughput": 8671.04, "total_tokens": 92481296} +{"current_steps": 137240, "total_steps": 204665, "loss": 0.0, "lr": 5.914993098695548e-07, "epoch": 3.3527960325409816, "percentage": 67.06, "elapsed_time": "2:57:45", "remaining_time": "1:27:20", "throughput": 8671.06, "total_tokens": 92484560} +{"current_steps": 137245, "total_steps": 204665, "loss": 0.0, "lr": 5.914214735429559e-07, "epoch": 3.3529181833728288, "percentage": 67.06, "elapsed_time": "2:57:46", "remaining_time": "1:27:19", "throughput": 8671.14, "total_tokens": 92488592} +{"current_steps": 137250, "total_steps": 204665, "loss": 0.0, "lr": 5.913436401876505e-07, "epoch": 3.353040334204676, "percentage": 67.06, "elapsed_time": "2:57:46", "remaining_time": "1:27:19", "throughput": 8671.18, "total_tokens": 92491984} +{"current_steps": 137255, "total_steps": 204665, "loss": 0.0, "lr": 5.912658098042038e-07, "epoch": 3.353162485036523, "percentage": 67.06, "elapsed_time": "2:57:46", "remaining_time": "1:27:18", "throughput": 8671.21, "total_tokens": 92495376} +{"current_steps": 137260, "total_steps": 204665, "loss": 0.0, "lr": 5.91187982393182e-07, "epoch": 3.3532846358683703, "percentage": 67.07, "elapsed_time": "2:57:47", "remaining_time": "1:27:18", "throughput": 8671.21, "total_tokens": 92498256} +{"current_steps": 137265, "total_steps": 204665, "loss": 0.0, "lr": 5.911101579551511e-07, "epoch": 3.3534067867002175, "percentage": 67.07, "elapsed_time": "2:57:47", "remaining_time": "1:27:18", "throughput": 8671.25, "total_tokens": 92501776} +{"current_steps": 137270, "total_steps": 204665, "loss": 0.0, "lr": 5.910323364906771e-07, "epoch": 3.3535289375320647, "percentage": 67.07, "elapsed_time": "2:57:47", "remaining_time": "1:27:17", "throughput": 8671.26, "total_tokens": 92504784} +{"current_steps": 137275, "total_steps": 204665, "loss": 0.0001, "lr": 5.909545180003262e-07, "epoch": 3.353651088363912, "percentage": 67.07, "elapsed_time": "2:57:48", "remaining_time": "1:27:17", "throughput": 8671.28, "total_tokens": 92507984} +{"current_steps": 137280, "total_steps": 204665, "loss": 0.0001, "lr": 5.908767024846637e-07, "epoch": 3.353773239195759, "percentage": 67.08, "elapsed_time": "2:57:48", "remaining_time": "1:27:16", "throughput": 8671.3, "total_tokens": 92511248} +{"current_steps": 137285, "total_steps": 204665, "loss": 0.0, "lr": 5.907988899442565e-07, "epoch": 3.3538953900276063, "percentage": 67.08, "elapsed_time": "2:57:49", "remaining_time": "1:27:16", "throughput": 8671.33, "total_tokens": 92514512} +{"current_steps": 137290, "total_steps": 204665, "loss": 0.0, "lr": 5.90721080379669e-07, "epoch": 3.354017540859453, "percentage": 67.08, "elapsed_time": "2:57:49", "remaining_time": "1:27:15", "throughput": 8671.33, "total_tokens": 92517456} +{"current_steps": 137295, "total_steps": 204665, "loss": 0.0001, "lr": 5.906432737914686e-07, "epoch": 3.3541396916913007, "percentage": 67.08, "elapsed_time": "2:57:49", "remaining_time": "1:27:15", "throughput": 8671.37, "total_tokens": 92520912} +{"current_steps": 137300, "total_steps": 204665, "loss": 0.0, "lr": 5.905654701802198e-07, "epoch": 3.3542618425231474, "percentage": 67.09, "elapsed_time": "2:57:50", "remaining_time": "1:27:15", "throughput": 8671.41, "total_tokens": 92524368} +{"current_steps": 137305, "total_steps": 204665, "loss": 0.0, "lr": 5.904876695464894e-07, "epoch": 3.3543839933549946, "percentage": 67.09, "elapsed_time": "2:57:50", "remaining_time": "1:27:14", "throughput": 8671.44, "total_tokens": 92527696} +{"current_steps": 137310, "total_steps": 204665, "loss": 0.0761, "lr": 5.90409871890843e-07, "epoch": 3.354506144186842, "percentage": 67.09, "elapsed_time": "2:57:50", "remaining_time": "1:27:14", "throughput": 8671.48, "total_tokens": 92531088} +{"current_steps": 137315, "total_steps": 204665, "loss": 0.0, "lr": 5.903320772138458e-07, "epoch": 3.354628295018689, "percentage": 67.09, "elapsed_time": "2:57:51", "remaining_time": "1:27:13", "throughput": 8671.52, "total_tokens": 92534480} +{"current_steps": 137320, "total_steps": 204665, "loss": 0.0, "lr": 5.902542855160641e-07, "epoch": 3.354750445850536, "percentage": 67.1, "elapsed_time": "2:57:51", "remaining_time": "1:27:13", "throughput": 8671.55, "total_tokens": 92537872} +{"current_steps": 137325, "total_steps": 204665, "loss": 0.0, "lr": 5.901764967980634e-07, "epoch": 3.3548725966823834, "percentage": 67.1, "elapsed_time": "2:57:51", "remaining_time": "1:27:13", "throughput": 8671.63, "total_tokens": 92541904} +{"current_steps": 137330, "total_steps": 204665, "loss": 0.0, "lr": 5.900987110604092e-07, "epoch": 3.3549947475142305, "percentage": 67.1, "elapsed_time": "2:57:52", "remaining_time": "1:27:12", "throughput": 8671.65, "total_tokens": 92544976} +{"current_steps": 137335, "total_steps": 204665, "loss": 0.0, "lr": 5.900209283036677e-07, "epoch": 3.3551168983460777, "percentage": 67.1, "elapsed_time": "2:57:52", "remaining_time": "1:27:12", "throughput": 8671.67, "total_tokens": 92548240} +{"current_steps": 137340, "total_steps": 204665, "loss": 0.0, "lr": 5.899431485284041e-07, "epoch": 3.355239049177925, "percentage": 67.1, "elapsed_time": "2:57:52", "remaining_time": "1:27:11", "throughput": 8671.7, "total_tokens": 92551568} +{"current_steps": 137345, "total_steps": 204665, "loss": 0.0003, "lr": 5.898653717351847e-07, "epoch": 3.355361200009772, "percentage": 67.11, "elapsed_time": "2:57:53", "remaining_time": "1:27:11", "throughput": 8671.73, "total_tokens": 92554960} +{"current_steps": 137350, "total_steps": 204665, "loss": 0.0, "lr": 5.89787597924574e-07, "epoch": 3.3554833508416193, "percentage": 67.11, "elapsed_time": "2:57:53", "remaining_time": "1:27:11", "throughput": 8671.79, "total_tokens": 92558608} +{"current_steps": 137355, "total_steps": 204665, "loss": 0.0001, "lr": 5.897098270971388e-07, "epoch": 3.3556055016734665, "percentage": 67.11, "elapsed_time": "2:57:53", "remaining_time": "1:27:10", "throughput": 8671.81, "total_tokens": 92561808} +{"current_steps": 137360, "total_steps": 204665, "loss": 0.0, "lr": 5.896320592534438e-07, "epoch": 3.3557276525053137, "percentage": 67.11, "elapsed_time": "2:57:54", "remaining_time": "1:27:10", "throughput": 8671.82, "total_tokens": 92564880} +{"current_steps": 137365, "total_steps": 204665, "loss": 0.081, "lr": 5.895542943940546e-07, "epoch": 3.355849803337161, "percentage": 67.12, "elapsed_time": "2:57:54", "remaining_time": "1:27:09", "throughput": 8671.83, "total_tokens": 92567888} +{"current_steps": 137370, "total_steps": 204665, "loss": 0.0, "lr": 5.894765325195374e-07, "epoch": 3.355971954169008, "percentage": 67.12, "elapsed_time": "2:57:54", "remaining_time": "1:27:09", "throughput": 8671.86, "total_tokens": 92571280} +{"current_steps": 137375, "total_steps": 204665, "loss": 0.0, "lr": 5.893987736304569e-07, "epoch": 3.356094105000855, "percentage": 67.12, "elapsed_time": "2:57:55", "remaining_time": "1:27:09", "throughput": 8671.91, "total_tokens": 92574864} +{"current_steps": 137380, "total_steps": 204665, "loss": 0.0, "lr": 5.893210177273793e-07, "epoch": 3.3562162558327024, "percentage": 67.12, "elapsed_time": "2:57:55", "remaining_time": "1:27:08", "throughput": 8671.98, "total_tokens": 92578640} +{"current_steps": 137385, "total_steps": 204665, "loss": 0.0, "lr": 5.892432648108694e-07, "epoch": 3.356338406664549, "percentage": 67.13, "elapsed_time": "2:57:55", "remaining_time": "1:27:08", "throughput": 8671.98, "total_tokens": 92581584} +{"current_steps": 137390, "total_steps": 204665, "loss": 0.0001, "lr": 5.891655148814934e-07, "epoch": 3.3564605574963964, "percentage": 67.13, "elapsed_time": "2:57:56", "remaining_time": "1:27:07", "throughput": 8672.08, "total_tokens": 92585872} +{"current_steps": 137395, "total_steps": 204665, "loss": 0.0, "lr": 5.890877679398158e-07, "epoch": 3.3565827083282436, "percentage": 67.13, "elapsed_time": "2:57:56", "remaining_time": "1:27:07", "throughput": 8672.09, "total_tokens": 92588944} +{"current_steps": 137400, "total_steps": 204665, "loss": 0.0879, "lr": 5.890100239864024e-07, "epoch": 3.3567048591600908, "percentage": 67.13, "elapsed_time": "2:57:57", "remaining_time": "1:27:06", "throughput": 8672.13, "total_tokens": 92592400} +{"current_steps": 137405, "total_steps": 204665, "loss": 0.0, "lr": 5.88932283021819e-07, "epoch": 3.356827009991938, "percentage": 67.14, "elapsed_time": "2:57:57", "remaining_time": "1:27:06", "throughput": 8672.17, "total_tokens": 92595920} +{"current_steps": 137410, "total_steps": 204665, "loss": 0.0, "lr": 5.888545450466307e-07, "epoch": 3.356949160823785, "percentage": 67.14, "elapsed_time": "2:57:57", "remaining_time": "1:27:06", "throughput": 8672.22, "total_tokens": 92599504} +{"current_steps": 137415, "total_steps": 204665, "loss": 0.0001, "lr": 5.887768100614026e-07, "epoch": 3.3570713116556323, "percentage": 67.14, "elapsed_time": "2:57:58", "remaining_time": "1:27:05", "throughput": 8672.24, "total_tokens": 92602704} +{"current_steps": 137420, "total_steps": 204665, "loss": 0.0007, "lr": 5.886990780667e-07, "epoch": 3.3571934624874795, "percentage": 67.14, "elapsed_time": "2:57:58", "remaining_time": "1:27:05", "throughput": 8672.25, "total_tokens": 92605776} +{"current_steps": 137425, "total_steps": 204665, "loss": 0.0, "lr": 5.886213490630883e-07, "epoch": 3.3573156133193267, "percentage": 67.15, "elapsed_time": "2:57:58", "remaining_time": "1:27:04", "throughput": 8672.29, "total_tokens": 92609232} +{"current_steps": 137430, "total_steps": 204665, "loss": 0.0012, "lr": 5.885436230511332e-07, "epoch": 3.357437764151174, "percentage": 67.15, "elapsed_time": "2:57:59", "remaining_time": "1:27:04", "throughput": 8672.3, "total_tokens": 92612304} +{"current_steps": 137435, "total_steps": 204665, "loss": 0.0, "lr": 5.884659000313989e-07, "epoch": 3.357559914983021, "percentage": 67.15, "elapsed_time": "2:57:59", "remaining_time": "1:27:04", "throughput": 8672.32, "total_tokens": 92615568} +{"current_steps": 137440, "total_steps": 204665, "loss": 0.0, "lr": 5.883881800044519e-07, "epoch": 3.3576820658148683, "percentage": 67.15, "elapsed_time": "2:57:59", "remaining_time": "1:27:03", "throughput": 8672.39, "total_tokens": 92619472} +{"current_steps": 137445, "total_steps": 204665, "loss": 0.0, "lr": 5.883104629708563e-07, "epoch": 3.3578042166467155, "percentage": 67.16, "elapsed_time": "2:58:00", "remaining_time": "1:27:03", "throughput": 8672.46, "total_tokens": 92623376} +{"current_steps": 137450, "total_steps": 204665, "loss": 0.043, "lr": 5.882327489311781e-07, "epoch": 3.3579263674785627, "percentage": 67.16, "elapsed_time": "2:58:00", "remaining_time": "1:27:02", "throughput": 8672.5, "total_tokens": 92626768} +{"current_steps": 137455, "total_steps": 204665, "loss": 0.0, "lr": 5.881550378859817e-07, "epoch": 3.35804851831041, "percentage": 67.16, "elapsed_time": "2:58:00", "remaining_time": "1:27:02", "throughput": 8672.53, "total_tokens": 92630224} +{"current_steps": 137460, "total_steps": 204665, "loss": 0.0003, "lr": 5.88077329835833e-07, "epoch": 3.358170669142257, "percentage": 67.16, "elapsed_time": "2:58:01", "remaining_time": "1:27:02", "throughput": 8672.56, "total_tokens": 92633552} +{"current_steps": 137465, "total_steps": 204665, "loss": 0.039, "lr": 5.879996247812969e-07, "epoch": 3.3582928199741042, "percentage": 67.17, "elapsed_time": "2:58:01", "remaining_time": "1:27:01", "throughput": 8672.57, "total_tokens": 92636560} +{"current_steps": 137470, "total_steps": 204665, "loss": 0.0353, "lr": 5.879219227229378e-07, "epoch": 3.358414970805951, "percentage": 67.17, "elapsed_time": "2:58:01", "remaining_time": "1:27:01", "throughput": 8672.6, "total_tokens": 92639888} +{"current_steps": 137475, "total_steps": 204665, "loss": 0.0, "lr": 5.878442236613217e-07, "epoch": 3.358537121637798, "percentage": 67.17, "elapsed_time": "2:58:02", "remaining_time": "1:27:00", "throughput": 8672.64, "total_tokens": 92643408} +{"current_steps": 137480, "total_steps": 204665, "loss": 0.0001, "lr": 5.87766527597013e-07, "epoch": 3.3586592724696454, "percentage": 67.17, "elapsed_time": "2:58:02", "remaining_time": "1:27:00", "throughput": 8672.68, "total_tokens": 92646864} +{"current_steps": 137485, "total_steps": 204665, "loss": 0.0, "lr": 5.876888345305769e-07, "epoch": 3.3587814233014925, "percentage": 67.18, "elapsed_time": "2:58:02", "remaining_time": "1:27:00", "throughput": 8672.72, "total_tokens": 92650320} +{"current_steps": 137490, "total_steps": 204665, "loss": 0.0, "lr": 5.87611144462579e-07, "epoch": 3.3589035741333397, "percentage": 67.18, "elapsed_time": "2:58:03", "remaining_time": "1:26:59", "throughput": 8672.72, "total_tokens": 92653264} +{"current_steps": 137495, "total_steps": 204665, "loss": 0.0346, "lr": 5.875334573935833e-07, "epoch": 3.359025724965187, "percentage": 67.18, "elapsed_time": "2:58:03", "remaining_time": "1:26:59", "throughput": 8672.74, "total_tokens": 92656592} +{"current_steps": 137500, "total_steps": 204665, "loss": 0.0, "lr": 5.874557733241557e-07, "epoch": 3.359147875797034, "percentage": 67.18, "elapsed_time": "2:58:04", "remaining_time": "1:26:58", "throughput": 8672.8, "total_tokens": 92660240} +{"current_steps": 137505, "total_steps": 204665, "loss": 0.0, "lr": 5.873780922548602e-07, "epoch": 3.3592700266288813, "percentage": 67.19, "elapsed_time": "2:58:04", "remaining_time": "1:26:58", "throughput": 8672.62, "total_tokens": 92663632} +{"current_steps": 137510, "total_steps": 204665, "loss": 0.0, "lr": 5.873004141862626e-07, "epoch": 3.3593921774607285, "percentage": 67.19, "elapsed_time": "2:58:04", "remaining_time": "1:26:58", "throughput": 8672.66, "total_tokens": 92667088} +{"current_steps": 137515, "total_steps": 204665, "loss": 0.0619, "lr": 5.872227391189273e-07, "epoch": 3.3595143282925757, "percentage": 67.19, "elapsed_time": "2:58:05", "remaining_time": "1:26:57", "throughput": 8672.67, "total_tokens": 92670160} +{"current_steps": 137520, "total_steps": 204665, "loss": 0.0596, "lr": 5.871450670534189e-07, "epoch": 3.359636479124423, "percentage": 67.19, "elapsed_time": "2:58:05", "remaining_time": "1:26:57", "throughput": 8672.7, "total_tokens": 92673488} +{"current_steps": 137525, "total_steps": 204665, "loss": 0.0, "lr": 5.870673979903031e-07, "epoch": 3.35975862995627, "percentage": 67.2, "elapsed_time": "2:58:05", "remaining_time": "1:26:56", "throughput": 8672.71, "total_tokens": 92676496} +{"current_steps": 137530, "total_steps": 204665, "loss": 0.0, "lr": 5.869897319301438e-07, "epoch": 3.3598807807881172, "percentage": 67.2, "elapsed_time": "2:58:06", "remaining_time": "1:26:56", "throughput": 8672.73, "total_tokens": 92679760} +{"current_steps": 137535, "total_steps": 204665, "loss": 0.0, "lr": 5.869120688735067e-07, "epoch": 3.3600029316199644, "percentage": 67.2, "elapsed_time": "2:58:06", "remaining_time": "1:26:56", "throughput": 8672.81, "total_tokens": 92683728} +{"current_steps": 137540, "total_steps": 204665, "loss": 0.0414, "lr": 5.868344088209558e-07, "epoch": 3.3601250824518116, "percentage": 67.2, "elapsed_time": "2:58:07", "remaining_time": "1:26:55", "throughput": 8672.83, "total_tokens": 92686864} +{"current_steps": 137545, "total_steps": 204665, "loss": 0.0477, "lr": 5.867567517730565e-07, "epoch": 3.360247233283659, "percentage": 67.2, "elapsed_time": "2:58:07", "remaining_time": "1:26:55", "throughput": 8672.86, "total_tokens": 92690256} +{"current_steps": 137550, "total_steps": 204665, "loss": 0.0, "lr": 5.866790977303729e-07, "epoch": 3.360369384115506, "percentage": 67.21, "elapsed_time": "2:58:07", "remaining_time": "1:26:54", "throughput": 8672.89, "total_tokens": 92693584} +{"current_steps": 137555, "total_steps": 204665, "loss": 0.0325, "lr": 5.866014466934701e-07, "epoch": 3.3604915349473528, "percentage": 67.21, "elapsed_time": "2:58:08", "remaining_time": "1:26:54", "throughput": 8672.93, "total_tokens": 92697104} +{"current_steps": 137560, "total_steps": 204665, "loss": 0.065, "lr": 5.865237986629132e-07, "epoch": 3.3606136857792004, "percentage": 67.21, "elapsed_time": "2:58:08", "remaining_time": "1:26:54", "throughput": 8672.97, "total_tokens": 92700624} +{"current_steps": 137565, "total_steps": 204665, "loss": 0.0, "lr": 5.864461536392662e-07, "epoch": 3.360735836611047, "percentage": 67.21, "elapsed_time": "2:58:08", "remaining_time": "1:26:53", "throughput": 8673.01, "total_tokens": 92704080} +{"current_steps": 137570, "total_steps": 204665, "loss": 0.0406, "lr": 5.863685116230939e-07, "epoch": 3.3608579874428943, "percentage": 67.22, "elapsed_time": "2:58:09", "remaining_time": "1:26:53", "throughput": 8673.02, "total_tokens": 92707024} +{"current_steps": 137575, "total_steps": 204665, "loss": 0.0, "lr": 5.862908726149611e-07, "epoch": 3.3609801382747415, "percentage": 67.22, "elapsed_time": "2:58:09", "remaining_time": "1:26:52", "throughput": 8673.04, "total_tokens": 92710224} +{"current_steps": 137580, "total_steps": 204665, "loss": 0.0, "lr": 5.862132366154322e-07, "epoch": 3.3611022891065887, "percentage": 67.22, "elapsed_time": "2:58:09", "remaining_time": "1:26:52", "throughput": 8673.08, "total_tokens": 92713744} +{"current_steps": 137585, "total_steps": 204665, "loss": 0.0, "lr": 5.861356036250724e-07, "epoch": 3.361224439938436, "percentage": 67.22, "elapsed_time": "2:58:10", "remaining_time": "1:26:52", "throughput": 8673.09, "total_tokens": 92716816} +{"current_steps": 137590, "total_steps": 204665, "loss": 0.0, "lr": 5.860579736444453e-07, "epoch": 3.361346590770283, "percentage": 67.23, "elapsed_time": "2:58:10", "remaining_time": "1:26:51", "throughput": 8673.13, "total_tokens": 92720272} +{"current_steps": 137595, "total_steps": 204665, "loss": 0.0007, "lr": 5.859803466741164e-07, "epoch": 3.3614687416021303, "percentage": 67.23, "elapsed_time": "2:58:10", "remaining_time": "1:26:51", "throughput": 8673.14, "total_tokens": 92723280} +{"current_steps": 137600, "total_steps": 204665, "loss": 0.0, "lr": 5.859027227146493e-07, "epoch": 3.3615908924339775, "percentage": 67.23, "elapsed_time": "2:58:11", "remaining_time": "1:26:50", "throughput": 8673.21, "total_tokens": 92727120} +{"current_steps": 137605, "total_steps": 204665, "loss": 0.0005, "lr": 5.858251017666095e-07, "epoch": 3.3617130432658247, "percentage": 67.23, "elapsed_time": "2:58:11", "remaining_time": "1:26:50", "throughput": 8673.24, "total_tokens": 92730576} +{"current_steps": 137610, "total_steps": 204665, "loss": 0.0, "lr": 5.857474838305605e-07, "epoch": 3.361835194097672, "percentage": 67.24, "elapsed_time": "2:58:11", "remaining_time": "1:26:49", "throughput": 8673.31, "total_tokens": 92734416} +{"current_steps": 137615, "total_steps": 204665, "loss": 0.0, "lr": 5.856698689070674e-07, "epoch": 3.361957344929519, "percentage": 67.24, "elapsed_time": "2:58:12", "remaining_time": "1:26:49", "throughput": 8673.37, "total_tokens": 92738064} +{"current_steps": 137620, "total_steps": 204665, "loss": 0.0, "lr": 5.855922569966945e-07, "epoch": 3.362079495761366, "percentage": 67.24, "elapsed_time": "2:58:12", "remaining_time": "1:26:49", "throughput": 8673.41, "total_tokens": 92741520} +{"current_steps": 137625, "total_steps": 204665, "loss": 0.0551, "lr": 5.85514648100006e-07, "epoch": 3.3622016465932134, "percentage": 67.24, "elapsed_time": "2:58:12", "remaining_time": "1:26:48", "throughput": 8673.46, "total_tokens": 92745168} +{"current_steps": 137630, "total_steps": 204665, "loss": 0.0, "lr": 5.854370422175668e-07, "epoch": 3.3623237974250606, "percentage": 67.25, "elapsed_time": "2:58:13", "remaining_time": "1:26:48", "throughput": 8673.49, "total_tokens": 92748432} +{"current_steps": 137635, "total_steps": 204665, "loss": 0.039, "lr": 5.853594393499406e-07, "epoch": 3.362445948256908, "percentage": 67.25, "elapsed_time": "2:58:13", "remaining_time": "1:26:47", "throughput": 8673.49, "total_tokens": 92751440} +{"current_steps": 137640, "total_steps": 204665, "loss": 0.0, "lr": 5.852818394976919e-07, "epoch": 3.362568099088755, "percentage": 67.25, "elapsed_time": "2:58:14", "remaining_time": "1:26:47", "throughput": 8673.51, "total_tokens": 92754640} +{"current_steps": 137645, "total_steps": 204665, "loss": 0.0542, "lr": 5.852042426613858e-07, "epoch": 3.362690249920602, "percentage": 67.25, "elapsed_time": "2:58:14", "remaining_time": "1:26:47", "throughput": 8673.53, "total_tokens": 92757776} +{"current_steps": 137650, "total_steps": 204665, "loss": 0.0, "lr": 5.851266488415856e-07, "epoch": 3.362812400752449, "percentage": 67.26, "elapsed_time": "2:58:14", "remaining_time": "1:26:46", "throughput": 8673.54, "total_tokens": 92760912} +{"current_steps": 137655, "total_steps": 204665, "loss": 0.0, "lr": 5.850490580388562e-07, "epoch": 3.362934551584296, "percentage": 67.26, "elapsed_time": "2:58:15", "remaining_time": "1:26:46", "throughput": 8673.58, "total_tokens": 92764240} +{"current_steps": 137660, "total_steps": 204665, "loss": 0.0, "lr": 5.849714702537615e-07, "epoch": 3.3630567024161433, "percentage": 67.26, "elapsed_time": "2:58:15", "remaining_time": "1:26:45", "throughput": 8673.61, "total_tokens": 92767568} +{"current_steps": 137665, "total_steps": 204665, "loss": 0.0, "lr": 5.848938854868661e-07, "epoch": 3.3631788532479905, "percentage": 67.26, "elapsed_time": "2:58:15", "remaining_time": "1:26:45", "throughput": 8673.62, "total_tokens": 92770704} +{"current_steps": 137670, "total_steps": 204665, "loss": 0.0, "lr": 5.848163037387339e-07, "epoch": 3.3633010040798377, "percentage": 67.27, "elapsed_time": "2:58:16", "remaining_time": "1:26:45", "throughput": 8673.63, "total_tokens": 92773776} +{"current_steps": 137675, "total_steps": 204665, "loss": 0.0, "lr": 5.847387250099292e-07, "epoch": 3.363423154911685, "percentage": 67.27, "elapsed_time": "2:58:16", "remaining_time": "1:26:44", "throughput": 8673.68, "total_tokens": 92777296} +{"current_steps": 137680, "total_steps": 204665, "loss": 0.0475, "lr": 5.846611493010163e-07, "epoch": 3.363545305743532, "percentage": 67.27, "elapsed_time": "2:58:16", "remaining_time": "1:26:44", "throughput": 8673.71, "total_tokens": 92780688} +{"current_steps": 137685, "total_steps": 204665, "loss": 0.0, "lr": 5.845835766125589e-07, "epoch": 3.3636674565753792, "percentage": 67.27, "elapsed_time": "2:58:17", "remaining_time": "1:26:43", "throughput": 8673.75, "total_tokens": 92784144} +{"current_steps": 137690, "total_steps": 204665, "loss": 0.0001, "lr": 5.84506006945122e-07, "epoch": 3.3637896074072264, "percentage": 67.28, "elapsed_time": "2:58:17", "remaining_time": "1:26:43", "throughput": 8673.79, "total_tokens": 92787664} +{"current_steps": 137695, "total_steps": 204665, "loss": 0.0, "lr": 5.844284402992685e-07, "epoch": 3.3639117582390736, "percentage": 67.28, "elapsed_time": "2:58:17", "remaining_time": "1:26:43", "throughput": 8673.81, "total_tokens": 92790736} +{"current_steps": 137700, "total_steps": 204665, "loss": 0.0418, "lr": 5.843508766755638e-07, "epoch": 3.364033909070921, "percentage": 67.28, "elapsed_time": "2:58:18", "remaining_time": "1:26:42", "throughput": 8673.82, "total_tokens": 92793808} +{"current_steps": 137705, "total_steps": 204665, "loss": 0.0, "lr": 5.842733160745709e-07, "epoch": 3.364156059902768, "percentage": 67.28, "elapsed_time": "2:58:18", "remaining_time": "1:26:42", "throughput": 8673.89, "total_tokens": 92797712} +{"current_steps": 137710, "total_steps": 204665, "loss": 0.0001, "lr": 5.841957584968542e-07, "epoch": 3.364278210734615, "percentage": 67.29, "elapsed_time": "2:58:18", "remaining_time": "1:26:41", "throughput": 8673.94, "total_tokens": 92801296} +{"current_steps": 137715, "total_steps": 204665, "loss": 0.0001, "lr": 5.841182039429782e-07, "epoch": 3.3644003615664624, "percentage": 67.29, "elapsed_time": "2:58:19", "remaining_time": "1:26:41", "throughput": 8673.99, "total_tokens": 92804880} +{"current_steps": 137720, "total_steps": 204665, "loss": 0.0, "lr": 5.840406524135061e-07, "epoch": 3.3645225123983096, "percentage": 67.29, "elapsed_time": "2:58:19", "remaining_time": "1:26:41", "throughput": 8674.02, "total_tokens": 92808272} +{"current_steps": 137725, "total_steps": 204665, "loss": 0.0002, "lr": 5.839631039090025e-07, "epoch": 3.3646446632301568, "percentage": 67.29, "elapsed_time": "2:58:19", "remaining_time": "1:26:40", "throughput": 8674.04, "total_tokens": 92811408} +{"current_steps": 137730, "total_steps": 204665, "loss": 0.0, "lr": 5.838855584300311e-07, "epoch": 3.364766814062004, "percentage": 67.3, "elapsed_time": "2:58:20", "remaining_time": "1:26:40", "throughput": 8674.1, "total_tokens": 92815120} +{"current_steps": 137735, "total_steps": 204665, "loss": 0.0, "lr": 5.838080159771556e-07, "epoch": 3.3648889648938507, "percentage": 67.3, "elapsed_time": "2:58:20", "remaining_time": "1:26:39", "throughput": 8674.13, "total_tokens": 92818448} +{"current_steps": 137740, "total_steps": 204665, "loss": 0.0, "lr": 5.837304765509405e-07, "epoch": 3.3650111157256983, "percentage": 67.3, "elapsed_time": "2:58:20", "remaining_time": "1:26:39", "throughput": 8674.15, "total_tokens": 92821648} +{"current_steps": 137745, "total_steps": 204665, "loss": 0.0, "lr": 5.83652940151949e-07, "epoch": 3.365133266557545, "percentage": 67.3, "elapsed_time": "2:58:21", "remaining_time": "1:26:38", "throughput": 8674.17, "total_tokens": 92824848} +{"current_steps": 137750, "total_steps": 204665, "loss": 0.0, "lr": 5.835754067807457e-07, "epoch": 3.3652554173893923, "percentage": 67.31, "elapsed_time": "2:58:21", "remaining_time": "1:26:38", "throughput": 8674.16, "total_tokens": 92827664} +{"current_steps": 137755, "total_steps": 204665, "loss": 0.0, "lr": 5.834978764378935e-07, "epoch": 3.3653775682212395, "percentage": 67.31, "elapsed_time": "2:58:21", "remaining_time": "1:26:38", "throughput": 8674.2, "total_tokens": 92831056} +{"current_steps": 137760, "total_steps": 204665, "loss": 0.0, "lr": 5.834203491239574e-07, "epoch": 3.3654997190530866, "percentage": 67.31, "elapsed_time": "2:58:22", "remaining_time": "1:26:37", "throughput": 8674.23, "total_tokens": 92834448} +{"current_steps": 137765, "total_steps": 204665, "loss": 0.0, "lr": 5.833428248395e-07, "epoch": 3.365621869884934, "percentage": 67.31, "elapsed_time": "2:58:22", "remaining_time": "1:26:37", "throughput": 8674.28, "total_tokens": 92838032} +{"current_steps": 137770, "total_steps": 204665, "loss": 0.005, "lr": 5.832653035850856e-07, "epoch": 3.365744020716781, "percentage": 67.31, "elapsed_time": "2:58:23", "remaining_time": "1:26:36", "throughput": 8674.3, "total_tokens": 92841168} +{"current_steps": 137775, "total_steps": 204665, "loss": 0.0, "lr": 5.831877853612785e-07, "epoch": 3.365866171548628, "percentage": 67.32, "elapsed_time": "2:58:23", "remaining_time": "1:26:36", "throughput": 8674.34, "total_tokens": 92844624} +{"current_steps": 137780, "total_steps": 204665, "loss": 0.0257, "lr": 5.831102701686416e-07, "epoch": 3.3659883223804754, "percentage": 67.32, "elapsed_time": "2:58:23", "remaining_time": "1:26:36", "throughput": 8674.37, "total_tokens": 92848080} +{"current_steps": 137785, "total_steps": 204665, "loss": 0.0, "lr": 5.830327580077392e-07, "epoch": 3.3661104732123226, "percentage": 67.32, "elapsed_time": "2:58:24", "remaining_time": "1:26:35", "throughput": 8674.38, "total_tokens": 92851088} +{"current_steps": 137790, "total_steps": 204665, "loss": 0.0, "lr": 5.829552488791345e-07, "epoch": 3.36623262404417, "percentage": 67.32, "elapsed_time": "2:58:24", "remaining_time": "1:26:35", "throughput": 8674.44, "total_tokens": 92854800} +{"current_steps": 137795, "total_steps": 204665, "loss": 0.0478, "lr": 5.828777427833917e-07, "epoch": 3.366354774876017, "percentage": 67.33, "elapsed_time": "2:58:24", "remaining_time": "1:26:34", "throughput": 8674.46, "total_tokens": 92858000} +{"current_steps": 137800, "total_steps": 204665, "loss": 0.0001, "lr": 5.82800239721074e-07, "epoch": 3.366476925707864, "percentage": 67.33, "elapsed_time": "2:58:25", "remaining_time": "1:26:34", "throughput": 8674.52, "total_tokens": 92861776} +{"current_steps": 137805, "total_steps": 204665, "loss": 0.0676, "lr": 5.82722739692745e-07, "epoch": 3.3665990765397114, "percentage": 67.33, "elapsed_time": "2:58:25", "remaining_time": "1:26:34", "throughput": 8674.52, "total_tokens": 92864592} +{"current_steps": 137810, "total_steps": 204665, "loss": 0.0, "lr": 5.826452426989688e-07, "epoch": 3.3667212273715585, "percentage": 67.33, "elapsed_time": "2:58:25", "remaining_time": "1:26:33", "throughput": 8674.54, "total_tokens": 92867792} +{"current_steps": 137815, "total_steps": 204665, "loss": 0.0846, "lr": 5.825677487403082e-07, "epoch": 3.3668433782034057, "percentage": 67.34, "elapsed_time": "2:58:26", "remaining_time": "1:26:33", "throughput": 8674.55, "total_tokens": 92870928} +{"current_steps": 137820, "total_steps": 204665, "loss": 0.0, "lr": 5.824902578173278e-07, "epoch": 3.366965529035253, "percentage": 67.34, "elapsed_time": "2:58:26", "remaining_time": "1:26:32", "throughput": 8674.61, "total_tokens": 92874576} +{"current_steps": 137825, "total_steps": 204665, "loss": 0.0, "lr": 5.824127699305899e-07, "epoch": 3.3670876798671, "percentage": 67.34, "elapsed_time": "2:58:26", "remaining_time": "1:26:32", "throughput": 8674.7, "total_tokens": 92878672} +{"current_steps": 137830, "total_steps": 204665, "loss": 0.0, "lr": 5.823352850806587e-07, "epoch": 3.367209830698947, "percentage": 67.34, "elapsed_time": "2:58:27", "remaining_time": "1:26:32", "throughput": 8674.73, "total_tokens": 92882000} +{"current_steps": 137835, "total_steps": 204665, "loss": 0.0, "lr": 5.822578032680983e-07, "epoch": 3.367331981530794, "percentage": 67.35, "elapsed_time": "2:58:27", "remaining_time": "1:26:31", "throughput": 8674.76, "total_tokens": 92885392} +{"current_steps": 137840, "total_steps": 204665, "loss": 0.0001, "lr": 5.821803244934708e-07, "epoch": 3.3674541323626412, "percentage": 67.35, "elapsed_time": "2:58:27", "remaining_time": "1:26:31", "throughput": 8674.81, "total_tokens": 92888976} +{"current_steps": 137845, "total_steps": 204665, "loss": 0.0001, "lr": 5.821028487573408e-07, "epoch": 3.3675762831944884, "percentage": 67.35, "elapsed_time": "2:58:28", "remaining_time": "1:26:30", "throughput": 8674.86, "total_tokens": 92892560} +{"current_steps": 137850, "total_steps": 204665, "loss": 0.0027, "lr": 5.82025376060271e-07, "epoch": 3.3676984340263356, "percentage": 67.35, "elapsed_time": "2:58:28", "remaining_time": "1:26:30", "throughput": 8674.88, "total_tokens": 92895696} +{"current_steps": 137855, "total_steps": 204665, "loss": 0.0001, "lr": 5.819479064028254e-07, "epoch": 3.367820584858183, "percentage": 67.36, "elapsed_time": "2:58:28", "remaining_time": "1:26:29", "throughput": 8674.9, "total_tokens": 92898960} +{"current_steps": 137860, "total_steps": 204665, "loss": 0.0237, "lr": 5.818704397855667e-07, "epoch": 3.36794273569003, "percentage": 67.36, "elapsed_time": "2:58:29", "remaining_time": "1:26:29", "throughput": 8674.92, "total_tokens": 92902224} +{"current_steps": 137865, "total_steps": 204665, "loss": 0.0, "lr": 5.817929762090588e-07, "epoch": 3.368064886521877, "percentage": 67.36, "elapsed_time": "2:58:29", "remaining_time": "1:26:29", "throughput": 8674.97, "total_tokens": 92905744} +{"current_steps": 137870, "total_steps": 204665, "loss": 0.0, "lr": 5.81715515673865e-07, "epoch": 3.3681870373537244, "percentage": 67.36, "elapsed_time": "2:58:29", "remaining_time": "1:26:28", "throughput": 8675.0, "total_tokens": 92909136} +{"current_steps": 137875, "total_steps": 204665, "loss": 0.0, "lr": 5.816380581805482e-07, "epoch": 3.3683091881855716, "percentage": 67.37, "elapsed_time": "2:58:30", "remaining_time": "1:26:28", "throughput": 8675.04, "total_tokens": 92912528} +{"current_steps": 137880, "total_steps": 204665, "loss": 0.0, "lr": 5.815606037296723e-07, "epoch": 3.3684313390174188, "percentage": 67.37, "elapsed_time": "2:58:30", "remaining_time": "1:26:27", "throughput": 8675.07, "total_tokens": 92915856} +{"current_steps": 137885, "total_steps": 204665, "loss": 0.0001, "lr": 5.814831523217998e-07, "epoch": 3.368553489849266, "percentage": 67.37, "elapsed_time": "2:58:31", "remaining_time": "1:26:27", "throughput": 8675.1, "total_tokens": 92919184} +{"current_steps": 137890, "total_steps": 204665, "loss": 0.0, "lr": 5.814057039574944e-07, "epoch": 3.368675640681113, "percentage": 67.37, "elapsed_time": "2:58:31", "remaining_time": "1:26:27", "throughput": 8675.09, "total_tokens": 92922064} +{"current_steps": 137895, "total_steps": 204665, "loss": 0.0002, "lr": 5.813282586373198e-07, "epoch": 3.3687977915129603, "percentage": 67.38, "elapsed_time": "2:58:31", "remaining_time": "1:26:26", "throughput": 8675.12, "total_tokens": 92925392} +{"current_steps": 137900, "total_steps": 204665, "loss": 0.0, "lr": 5.812508163618389e-07, "epoch": 3.3689199423448075, "percentage": 67.38, "elapsed_time": "2:58:32", "remaining_time": "1:26:26", "throughput": 8675.16, "total_tokens": 92928848} +{"current_steps": 137905, "total_steps": 204665, "loss": 0.0, "lr": 5.811733771316139e-07, "epoch": 3.3690420931766547, "percentage": 67.38, "elapsed_time": "2:58:32", "remaining_time": "1:26:25", "throughput": 8675.18, "total_tokens": 92931984} +{"current_steps": 137910, "total_steps": 204665, "loss": 0.0, "lr": 5.810959409472093e-07, "epoch": 3.369164244008502, "percentage": 67.38, "elapsed_time": "2:58:32", "remaining_time": "1:26:25", "throughput": 8675.22, "total_tokens": 92935440} +{"current_steps": 137915, "total_steps": 204665, "loss": 0.0, "lr": 5.810185078091879e-07, "epoch": 3.3692863948403486, "percentage": 67.39, "elapsed_time": "2:58:33", "remaining_time": "1:26:25", "throughput": 8675.27, "total_tokens": 92939088} +{"current_steps": 137920, "total_steps": 204665, "loss": 0.0, "lr": 5.809410777181118e-07, "epoch": 3.3694085456721963, "percentage": 67.39, "elapsed_time": "2:58:33", "remaining_time": "1:26:24", "throughput": 8675.31, "total_tokens": 92942544} +{"current_steps": 137925, "total_steps": 204665, "loss": 0.0001, "lr": 5.808636506745453e-07, "epoch": 3.369530696504043, "percentage": 67.39, "elapsed_time": "2:58:33", "remaining_time": "1:26:24", "throughput": 8675.3, "total_tokens": 92945360} +{"current_steps": 137930, "total_steps": 204665, "loss": 0.0, "lr": 5.807862266790512e-07, "epoch": 3.36965284733589, "percentage": 67.39, "elapsed_time": "2:58:34", "remaining_time": "1:26:23", "throughput": 8675.35, "total_tokens": 92948944} +{"current_steps": 137935, "total_steps": 204665, "loss": 0.068, "lr": 5.807088057321921e-07, "epoch": 3.3697749981677374, "percentage": 67.4, "elapsed_time": "2:58:34", "remaining_time": "1:26:23", "throughput": 8675.39, "total_tokens": 92952400} +{"current_steps": 137940, "total_steps": 204665, "loss": 0.0, "lr": 5.806313878345317e-07, "epoch": 3.3698971489995846, "percentage": 67.4, "elapsed_time": "2:58:34", "remaining_time": "1:26:23", "throughput": 8675.42, "total_tokens": 92955664} +{"current_steps": 137945, "total_steps": 204665, "loss": 0.0001, "lr": 5.805539729866322e-07, "epoch": 3.370019299831432, "percentage": 67.4, "elapsed_time": "2:58:35", "remaining_time": "1:26:22", "throughput": 8675.48, "total_tokens": 92959376} +{"current_steps": 137950, "total_steps": 204665, "loss": 0.0, "lr": 5.804765611890576e-07, "epoch": 3.370141450663279, "percentage": 67.4, "elapsed_time": "2:58:35", "remaining_time": "1:26:22", "throughput": 8675.51, "total_tokens": 92962768} +{"current_steps": 137955, "total_steps": 204665, "loss": 0.0, "lr": 5.803991524423698e-07, "epoch": 3.370263601495126, "percentage": 67.41, "elapsed_time": "2:58:35", "remaining_time": "1:26:21", "throughput": 8675.52, "total_tokens": 92965776} +{"current_steps": 137960, "total_steps": 204665, "loss": 0.1704, "lr": 5.803217467471322e-07, "epoch": 3.3703857523269733, "percentage": 67.41, "elapsed_time": "2:58:36", "remaining_time": "1:26:21", "throughput": 8675.53, "total_tokens": 92968848} +{"current_steps": 137965, "total_steps": 204665, "loss": 0.0, "lr": 5.802443441039082e-07, "epoch": 3.3705079031588205, "percentage": 67.41, "elapsed_time": "2:58:36", "remaining_time": "1:26:20", "throughput": 8675.53, "total_tokens": 92971728} +{"current_steps": 137970, "total_steps": 204665, "loss": 0.0007, "lr": 5.801669445132597e-07, "epoch": 3.3706300539906677, "percentage": 67.41, "elapsed_time": "2:58:36", "remaining_time": "1:26:20", "throughput": 8675.59, "total_tokens": 92975440} +{"current_steps": 137975, "total_steps": 204665, "loss": 0.0, "lr": 5.800895479757506e-07, "epoch": 3.370752204822515, "percentage": 67.42, "elapsed_time": "2:58:37", "remaining_time": "1:26:20", "throughput": 8675.62, "total_tokens": 92978832} +{"current_steps": 137980, "total_steps": 204665, "loss": 0.0, "lr": 5.800121544919429e-07, "epoch": 3.370874355654362, "percentage": 67.42, "elapsed_time": "2:58:37", "remaining_time": "1:26:19", "throughput": 8675.66, "total_tokens": 92982288} +{"current_steps": 137985, "total_steps": 204665, "loss": 0.0001, "lr": 5.799347640623997e-07, "epoch": 3.3709965064862093, "percentage": 67.42, "elapsed_time": "2:58:37", "remaining_time": "1:26:19", "throughput": 8675.7, "total_tokens": 92985744} +{"current_steps": 137990, "total_steps": 204665, "loss": 0.0, "lr": 5.798573766876841e-07, "epoch": 3.3711186573180565, "percentage": 67.42, "elapsed_time": "2:58:38", "remaining_time": "1:26:18", "throughput": 8675.75, "total_tokens": 92989328} +{"current_steps": 137995, "total_steps": 204665, "loss": 0.0, "lr": 5.797799923683586e-07, "epoch": 3.3712408081499037, "percentage": 67.42, "elapsed_time": "2:58:38", "remaining_time": "1:26:18", "throughput": 8675.78, "total_tokens": 92992720} +{"current_steps": 138000, "total_steps": 204665, "loss": 0.0, "lr": 5.797026111049863e-07, "epoch": 3.3713629589817504, "percentage": 67.43, "elapsed_time": "2:58:38", "remaining_time": "1:26:18", "throughput": 8675.81, "total_tokens": 92995984} +{"current_steps": 138005, "total_steps": 204665, "loss": 0.0, "lr": 5.796252328981295e-07, "epoch": 3.371485109813598, "percentage": 67.43, "elapsed_time": "2:58:39", "remaining_time": "1:26:17", "throughput": 8675.91, "total_tokens": 93000208} +{"current_steps": 138010, "total_steps": 204665, "loss": 0.0003, "lr": 5.795478577483508e-07, "epoch": 3.371607260645445, "percentage": 67.43, "elapsed_time": "2:58:39", "remaining_time": "1:26:17", "throughput": 8675.97, "total_tokens": 93003920} +{"current_steps": 138015, "total_steps": 204665, "loss": 0.0096, "lr": 5.794704856562136e-07, "epoch": 3.371729411477292, "percentage": 67.43, "elapsed_time": "2:58:40", "remaining_time": "1:26:16", "throughput": 8676.02, "total_tokens": 93007504} +{"current_steps": 138020, "total_steps": 204665, "loss": 0.0002, "lr": 5.793931166222798e-07, "epoch": 3.371851562309139, "percentage": 67.44, "elapsed_time": "2:58:40", "remaining_time": "1:26:16", "throughput": 8676.05, "total_tokens": 93010896} +{"current_steps": 138025, "total_steps": 204665, "loss": 0.0, "lr": 5.793157506471127e-07, "epoch": 3.3719737131409864, "percentage": 67.44, "elapsed_time": "2:58:40", "remaining_time": "1:26:16", "throughput": 8676.08, "total_tokens": 93014160} +{"current_steps": 138030, "total_steps": 204665, "loss": 0.0, "lr": 5.79238387731274e-07, "epoch": 3.3720958639728336, "percentage": 67.44, "elapsed_time": "2:58:41", "remaining_time": "1:26:15", "throughput": 8676.11, "total_tokens": 93017424} +{"current_steps": 138035, "total_steps": 204665, "loss": 0.0, "lr": 5.791610278753276e-07, "epoch": 3.3722180148046808, "percentage": 67.44, "elapsed_time": "2:58:41", "remaining_time": "1:26:15", "throughput": 8676.19, "total_tokens": 93021456} +{"current_steps": 138040, "total_steps": 204665, "loss": 0.0334, "lr": 5.79083671079835e-07, "epoch": 3.372340165636528, "percentage": 67.45, "elapsed_time": "2:58:41", "remaining_time": "1:26:14", "throughput": 8676.2, "total_tokens": 93024464} +{"current_steps": 138045, "total_steps": 204665, "loss": 0.0, "lr": 5.79006317345359e-07, "epoch": 3.372462316468375, "percentage": 67.45, "elapsed_time": "2:58:42", "remaining_time": "1:26:14", "throughput": 8676.22, "total_tokens": 93027600} +{"current_steps": 138050, "total_steps": 204665, "loss": 0.0, "lr": 5.789289666724629e-07, "epoch": 3.3725844673002223, "percentage": 67.45, "elapsed_time": "2:58:42", "remaining_time": "1:26:14", "throughput": 8676.27, "total_tokens": 93031248} +{"current_steps": 138055, "total_steps": 204665, "loss": 0.0, "lr": 5.78851619061708e-07, "epoch": 3.3727066181320695, "percentage": 67.45, "elapsed_time": "2:58:42", "remaining_time": "1:26:13", "throughput": 8676.3, "total_tokens": 93034576} +{"current_steps": 138060, "total_steps": 204665, "loss": 0.0, "lr": 5.787742745136579e-07, "epoch": 3.3728287689639167, "percentage": 67.46, "elapsed_time": "2:58:43", "remaining_time": "1:26:13", "throughput": 8676.31, "total_tokens": 93037648} +{"current_steps": 138065, "total_steps": 204665, "loss": 0.0, "lr": 5.786969330288741e-07, "epoch": 3.372950919795764, "percentage": 67.46, "elapsed_time": "2:58:43", "remaining_time": "1:26:12", "throughput": 8676.32, "total_tokens": 93040720} +{"current_steps": 138070, "total_steps": 204665, "loss": 0.0726, "lr": 5.7861959460792e-07, "epoch": 3.373073070627611, "percentage": 67.46, "elapsed_time": "2:58:43", "remaining_time": "1:26:12", "throughput": 8676.34, "total_tokens": 93043984} +{"current_steps": 138075, "total_steps": 204665, "loss": 0.0, "lr": 5.785422592513572e-07, "epoch": 3.3731952214594583, "percentage": 67.46, "elapsed_time": "2:58:44", "remaining_time": "1:26:12", "throughput": 8676.35, "total_tokens": 93046992} +{"current_steps": 138080, "total_steps": 204665, "loss": 0.0, "lr": 5.784649269597482e-07, "epoch": 3.3733173722913055, "percentage": 67.47, "elapsed_time": "2:58:44", "remaining_time": "1:26:11", "throughput": 8676.37, "total_tokens": 93050192} +{"current_steps": 138085, "total_steps": 204665, "loss": 0.0, "lr": 5.783875977336563e-07, "epoch": 3.3734395231231527, "percentage": 67.47, "elapsed_time": "2:58:44", "remaining_time": "1:26:11", "throughput": 8676.4, "total_tokens": 93053456} +{"current_steps": 138090, "total_steps": 204665, "loss": 0.0377, "lr": 5.783102715736426e-07, "epoch": 3.373561673955, "percentage": 67.47, "elapsed_time": "2:58:45", "remaining_time": "1:26:10", "throughput": 8676.45, "total_tokens": 93057040} +{"current_steps": 138095, "total_steps": 204665, "loss": 0.0, "lr": 5.782329484802706e-07, "epoch": 3.3736838247868466, "percentage": 67.47, "elapsed_time": "2:58:45", "remaining_time": "1:26:10", "throughput": 8676.47, "total_tokens": 93060240} +{"current_steps": 138100, "total_steps": 204665, "loss": 0.0, "lr": 5.781556284541015e-07, "epoch": 3.3738059756186938, "percentage": 67.48, "elapsed_time": "2:58:45", "remaining_time": "1:26:09", "throughput": 8676.53, "total_tokens": 93064016} +{"current_steps": 138105, "total_steps": 204665, "loss": 0.0, "lr": 5.780783114956986e-07, "epoch": 3.373928126450541, "percentage": 67.48, "elapsed_time": "2:58:46", "remaining_time": "1:26:09", "throughput": 8676.53, "total_tokens": 93066896} +{"current_steps": 138110, "total_steps": 204665, "loss": 0.056, "lr": 5.780009976056237e-07, "epoch": 3.374050277282388, "percentage": 67.48, "elapsed_time": "2:58:46", "remaining_time": "1:26:09", "throughput": 8676.58, "total_tokens": 93070480} +{"current_steps": 138115, "total_steps": 204665, "loss": 0.0, "lr": 5.779236867844385e-07, "epoch": 3.3741724281142353, "percentage": 67.48, "elapsed_time": "2:58:46", "remaining_time": "1:26:08", "throughput": 8676.63, "total_tokens": 93074128} +{"current_steps": 138120, "total_steps": 204665, "loss": 0.0, "lr": 5.778463790327064e-07, "epoch": 3.3742945789460825, "percentage": 67.49, "elapsed_time": "2:58:47", "remaining_time": "1:26:08", "throughput": 8676.66, "total_tokens": 93077456} +{"current_steps": 138125, "total_steps": 204665, "loss": 0.0, "lr": 5.777690743509885e-07, "epoch": 3.3744167297779297, "percentage": 67.49, "elapsed_time": "2:58:47", "remaining_time": "1:26:07", "throughput": 8676.71, "total_tokens": 93080976} +{"current_steps": 138130, "total_steps": 204665, "loss": 0.0, "lr": 5.776917727398478e-07, "epoch": 3.374538880609777, "percentage": 67.49, "elapsed_time": "2:58:48", "remaining_time": "1:26:07", "throughput": 8676.75, "total_tokens": 93084560} +{"current_steps": 138135, "total_steps": 204665, "loss": 0.0, "lr": 5.776144741998457e-07, "epoch": 3.374661031441624, "percentage": 67.49, "elapsed_time": "2:58:48", "remaining_time": "1:26:07", "throughput": 8676.81, "total_tokens": 93088208} +{"current_steps": 138140, "total_steps": 204665, "loss": 0.0, "lr": 5.775371787315448e-07, "epoch": 3.3747831822734713, "percentage": 67.5, "elapsed_time": "2:58:48", "remaining_time": "1:26:06", "throughput": 8676.86, "total_tokens": 93091792} +{"current_steps": 138145, "total_steps": 204665, "loss": 0.0001, "lr": 5.774598863355077e-07, "epoch": 3.3749053331053185, "percentage": 67.5, "elapsed_time": "2:58:49", "remaining_time": "1:26:06", "throughput": 8676.91, "total_tokens": 93095440} +{"current_steps": 138150, "total_steps": 204665, "loss": 0.0454, "lr": 5.773825970122954e-07, "epoch": 3.3750274839371657, "percentage": 67.5, "elapsed_time": "2:58:49", "remaining_time": "1:26:05", "throughput": 8676.94, "total_tokens": 93098768} +{"current_steps": 138155, "total_steps": 204665, "loss": 0.0569, "lr": 5.773053107624711e-07, "epoch": 3.375149634769013, "percentage": 67.5, "elapsed_time": "2:58:49", "remaining_time": "1:26:05", "throughput": 8676.98, "total_tokens": 93102096} +{"current_steps": 138160, "total_steps": 204665, "loss": 0.0, "lr": 5.772280275865955e-07, "epoch": 3.37527178560086, "percentage": 67.51, "elapsed_time": "2:58:50", "remaining_time": "1:26:05", "throughput": 8677.02, "total_tokens": 93105616} +{"current_steps": 138165, "total_steps": 204665, "loss": 0.0, "lr": 5.771507474852322e-07, "epoch": 3.3753939364327072, "percentage": 67.51, "elapsed_time": "2:58:50", "remaining_time": "1:26:04", "throughput": 8677.07, "total_tokens": 93109200} +{"current_steps": 138170, "total_steps": 204665, "loss": 0.0643, "lr": 5.770734704589417e-07, "epoch": 3.3755160872645544, "percentage": 67.51, "elapsed_time": "2:58:50", "remaining_time": "1:26:04", "throughput": 8677.08, "total_tokens": 93112208} +{"current_steps": 138175, "total_steps": 204665, "loss": 0.0, "lr": 5.769961965082868e-07, "epoch": 3.3756382380964016, "percentage": 67.51, "elapsed_time": "2:58:51", "remaining_time": "1:26:03", "throughput": 8677.13, "total_tokens": 93115856} +{"current_steps": 138180, "total_steps": 204665, "loss": 0.0, "lr": 5.769189256338299e-07, "epoch": 3.3757603889282484, "percentage": 67.52, "elapsed_time": "2:58:51", "remaining_time": "1:26:03", "throughput": 8677.15, "total_tokens": 93119056} +{"current_steps": 138185, "total_steps": 204665, "loss": 0.0, "lr": 5.768416578361317e-07, "epoch": 3.375882539760096, "percentage": 67.52, "elapsed_time": "2:58:51", "remaining_time": "1:26:03", "throughput": 8677.21, "total_tokens": 93122704} +{"current_steps": 138190, "total_steps": 204665, "loss": 0.0, "lr": 5.767643931157552e-07, "epoch": 3.3760046905919427, "percentage": 67.52, "elapsed_time": "2:58:52", "remaining_time": "1:26:02", "throughput": 8677.23, "total_tokens": 93125904} +{"current_steps": 138195, "total_steps": 204665, "loss": 0.0, "lr": 5.766871314732616e-07, "epoch": 3.37612684142379, "percentage": 67.52, "elapsed_time": "2:58:52", "remaining_time": "1:26:02", "throughput": 8677.25, "total_tokens": 93129168} +{"current_steps": 138200, "total_steps": 204665, "loss": 0.0464, "lr": 5.76609872909213e-07, "epoch": 3.376248992255637, "percentage": 67.52, "elapsed_time": "2:58:52", "remaining_time": "1:26:01", "throughput": 8677.28, "total_tokens": 93132496} +{"current_steps": 138205, "total_steps": 204665, "loss": 0.0588, "lr": 5.765326174241716e-07, "epoch": 3.3763711430874843, "percentage": 67.53, "elapsed_time": "2:58:53", "remaining_time": "1:26:01", "throughput": 8677.32, "total_tokens": 93135888} +{"current_steps": 138210, "total_steps": 204665, "loss": 0.0, "lr": 5.76455365018699e-07, "epoch": 3.3764932939193315, "percentage": 67.53, "elapsed_time": "2:58:53", "remaining_time": "1:26:00", "throughput": 8677.34, "total_tokens": 93139088} +{"current_steps": 138215, "total_steps": 204665, "loss": 0.0, "lr": 5.763781156933565e-07, "epoch": 3.3766154447511787, "percentage": 67.53, "elapsed_time": "2:58:53", "remaining_time": "1:26:00", "throughput": 8677.4, "total_tokens": 93142864} +{"current_steps": 138220, "total_steps": 204665, "loss": 0.0001, "lr": 5.763008694487066e-07, "epoch": 3.376737595583026, "percentage": 67.53, "elapsed_time": "2:58:54", "remaining_time": "1:26:00", "throughput": 8677.44, "total_tokens": 93146256} +{"current_steps": 138225, "total_steps": 204665, "loss": 0.0, "lr": 5.762236262853108e-07, "epoch": 3.376859746414873, "percentage": 67.54, "elapsed_time": "2:58:54", "remaining_time": "1:25:59", "throughput": 8677.47, "total_tokens": 93149584} +{"current_steps": 138230, "total_steps": 204665, "loss": 0.0, "lr": 5.761463862037304e-07, "epoch": 3.3769818972467203, "percentage": 67.54, "elapsed_time": "2:58:54", "remaining_time": "1:25:59", "throughput": 8677.5, "total_tokens": 93152976} +{"current_steps": 138235, "total_steps": 204665, "loss": 0.0001, "lr": 5.760691492045275e-07, "epoch": 3.3771040480785675, "percentage": 67.54, "elapsed_time": "2:58:55", "remaining_time": "1:25:58", "throughput": 8677.52, "total_tokens": 93156048} +{"current_steps": 138240, "total_steps": 204665, "loss": 0.0, "lr": 5.75991915288264e-07, "epoch": 3.3772261989104146, "percentage": 67.54, "elapsed_time": "2:58:55", "remaining_time": "1:25:58", "throughput": 8677.53, "total_tokens": 93159184} +{"current_steps": 138245, "total_steps": 204665, "loss": 0.0, "lr": 5.759146844555011e-07, "epoch": 3.377348349742262, "percentage": 67.55, "elapsed_time": "2:58:56", "remaining_time": "1:25:58", "throughput": 8677.59, "total_tokens": 93162960} +{"current_steps": 138250, "total_steps": 204665, "loss": 0.0, "lr": 5.758374567068011e-07, "epoch": 3.377470500574109, "percentage": 67.55, "elapsed_time": "2:58:56", "remaining_time": "1:25:57", "throughput": 8677.64, "total_tokens": 93166544} +{"current_steps": 138255, "total_steps": 204665, "loss": 0.0013, "lr": 5.757602320427248e-07, "epoch": 3.377592651405956, "percentage": 67.55, "elapsed_time": "2:58:56", "remaining_time": "1:25:57", "throughput": 8677.65, "total_tokens": 93169616} +{"current_steps": 138260, "total_steps": 204665, "loss": 0.0, "lr": 5.756830104638345e-07, "epoch": 3.3777148022378034, "percentage": 67.55, "elapsed_time": "2:58:57", "remaining_time": "1:25:56", "throughput": 8677.7, "total_tokens": 93173200} +{"current_steps": 138265, "total_steps": 204665, "loss": 0.0001, "lr": 5.756057919706912e-07, "epoch": 3.3778369530696506, "percentage": 67.56, "elapsed_time": "2:58:57", "remaining_time": "1:25:56", "throughput": 8677.71, "total_tokens": 93176272} +{"current_steps": 138270, "total_steps": 204665, "loss": 0.0, "lr": 5.755285765638565e-07, "epoch": 3.377959103901498, "percentage": 67.56, "elapsed_time": "2:58:57", "remaining_time": "1:25:56", "throughput": 8677.74, "total_tokens": 93179536} +{"current_steps": 138275, "total_steps": 204665, "loss": 0.0, "lr": 5.754513642438928e-07, "epoch": 3.3780812547333445, "percentage": 67.56, "elapsed_time": "2:58:58", "remaining_time": "1:25:55", "throughput": 8677.77, "total_tokens": 93182928} +{"current_steps": 138280, "total_steps": 204665, "loss": 0.0001, "lr": 5.753741550113605e-07, "epoch": 3.3782034055651917, "percentage": 67.56, "elapsed_time": "2:58:58", "remaining_time": "1:25:55", "throughput": 8677.8, "total_tokens": 93186128} +{"current_steps": 138285, "total_steps": 204665, "loss": 0.0246, "lr": 5.752969488668218e-07, "epoch": 3.378325556397039, "percentage": 67.57, "elapsed_time": "2:58:58", "remaining_time": "1:25:54", "throughput": 8677.85, "total_tokens": 93189776} +{"current_steps": 138290, "total_steps": 204665, "loss": 0.0512, "lr": 5.752197458108376e-07, "epoch": 3.378447707228886, "percentage": 67.57, "elapsed_time": "2:58:59", "remaining_time": "1:25:54", "throughput": 8677.9, "total_tokens": 93193424} +{"current_steps": 138295, "total_steps": 204665, "loss": 0.0, "lr": 5.751425458439698e-07, "epoch": 3.3785698580607333, "percentage": 67.57, "elapsed_time": "2:58:59", "remaining_time": "1:25:54", "throughput": 8677.93, "total_tokens": 93196688} +{"current_steps": 138300, "total_steps": 204665, "loss": 0.0001, "lr": 5.750653489667801e-07, "epoch": 3.3786920088925805, "percentage": 67.57, "elapsed_time": "2:58:59", "remaining_time": "1:25:53", "throughput": 8677.96, "total_tokens": 93200080} +{"current_steps": 138305, "total_steps": 204665, "loss": 0.0213, "lr": 5.749881551798288e-07, "epoch": 3.3788141597244277, "percentage": 67.58, "elapsed_time": "2:59:00", "remaining_time": "1:25:53", "throughput": 8677.97, "total_tokens": 93203152} +{"current_steps": 138310, "total_steps": 204665, "loss": 0.0, "lr": 5.749109644836786e-07, "epoch": 3.378936310556275, "percentage": 67.58, "elapsed_time": "2:59:00", "remaining_time": "1:25:52", "throughput": 8677.97, "total_tokens": 93206032} +{"current_steps": 138315, "total_steps": 204665, "loss": 0.0, "lr": 5.748337768788901e-07, "epoch": 3.379058461388122, "percentage": 67.58, "elapsed_time": "2:59:00", "remaining_time": "1:25:52", "throughput": 8677.98, "total_tokens": 93209040} +{"current_steps": 138320, "total_steps": 204665, "loss": 0.0, "lr": 5.747565923660244e-07, "epoch": 3.3791806122199692, "percentage": 67.58, "elapsed_time": "2:59:01", "remaining_time": "1:25:52", "throughput": 8678.04, "total_tokens": 93212816} +{"current_steps": 138325, "total_steps": 204665, "loss": 0.0, "lr": 5.746794109456434e-07, "epoch": 3.3793027630518164, "percentage": 67.59, "elapsed_time": "2:59:01", "remaining_time": "1:25:51", "throughput": 8678.08, "total_tokens": 93216272} +{"current_steps": 138330, "total_steps": 204665, "loss": 0.0, "lr": 5.746022326183079e-07, "epoch": 3.3794249138836636, "percentage": 67.59, "elapsed_time": "2:59:01", "remaining_time": "1:25:51", "throughput": 8678.11, "total_tokens": 93219664} +{"current_steps": 138335, "total_steps": 204665, "loss": 0.0246, "lr": 5.745250573845797e-07, "epoch": 3.379547064715511, "percentage": 67.59, "elapsed_time": "2:59:02", "remaining_time": "1:25:50", "throughput": 8678.14, "total_tokens": 93222864} +{"current_steps": 138340, "total_steps": 204665, "loss": 0.0001, "lr": 5.744478852450192e-07, "epoch": 3.379669215547358, "percentage": 67.59, "elapsed_time": "2:59:02", "remaining_time": "1:25:50", "throughput": 8678.16, "total_tokens": 93226192} +{"current_steps": 138345, "total_steps": 204665, "loss": 0.0002, "lr": 5.743707162001888e-07, "epoch": 3.379791366379205, "percentage": 67.6, "elapsed_time": "2:59:02", "remaining_time": "1:25:49", "throughput": 8678.18, "total_tokens": 93229392} +{"current_steps": 138350, "total_steps": 204665, "loss": 0.0, "lr": 5.742935502506484e-07, "epoch": 3.3799135172110524, "percentage": 67.6, "elapsed_time": "2:59:03", "remaining_time": "1:25:49", "throughput": 8678.22, "total_tokens": 93232784} +{"current_steps": 138355, "total_steps": 204665, "loss": 0.0001, "lr": 5.742163873969599e-07, "epoch": 3.3800356680428996, "percentage": 67.6, "elapsed_time": "2:59:03", "remaining_time": "1:25:49", "throughput": 8678.24, "total_tokens": 93235984} +{"current_steps": 138360, "total_steps": 204665, "loss": 0.0, "lr": 5.741392276396847e-07, "epoch": 3.3801578188747463, "percentage": 67.6, "elapsed_time": "2:59:03", "remaining_time": "1:25:48", "throughput": 8678.26, "total_tokens": 93239248} +{"current_steps": 138365, "total_steps": 204665, "loss": 0.0002, "lr": 5.740620709793832e-07, "epoch": 3.380279969706594, "percentage": 67.61, "elapsed_time": "2:59:04", "remaining_time": "1:25:48", "throughput": 8678.28, "total_tokens": 93242448} +{"current_steps": 138370, "total_steps": 204665, "loss": 0.0, "lr": 5.739849174166173e-07, "epoch": 3.3804021205384407, "percentage": 67.61, "elapsed_time": "2:59:04", "remaining_time": "1:25:47", "throughput": 8678.29, "total_tokens": 93245456} +{"current_steps": 138375, "total_steps": 204665, "loss": 0.0, "lr": 5.739077669519473e-07, "epoch": 3.380524271370288, "percentage": 67.61, "elapsed_time": "2:59:05", "remaining_time": "1:25:47", "throughput": 8678.29, "total_tokens": 93248336} +{"current_steps": 138380, "total_steps": 204665, "loss": 0.0, "lr": 5.738306195859351e-07, "epoch": 3.380646422202135, "percentage": 67.61, "elapsed_time": "2:59:05", "remaining_time": "1:25:47", "throughput": 8678.3, "total_tokens": 93251472} +{"current_steps": 138385, "total_steps": 204665, "loss": 0.0, "lr": 5.737534753191406e-07, "epoch": 3.3807685730339823, "percentage": 67.62, "elapsed_time": "2:59:05", "remaining_time": "1:25:46", "throughput": 8678.32, "total_tokens": 93254672} +{"current_steps": 138390, "total_steps": 204665, "loss": 0.0001, "lr": 5.736763341521256e-07, "epoch": 3.3808907238658295, "percentage": 67.62, "elapsed_time": "2:59:06", "remaining_time": "1:25:46", "throughput": 8678.36, "total_tokens": 93258064} +{"current_steps": 138395, "total_steps": 204665, "loss": 0.0696, "lr": 5.735991960854514e-07, "epoch": 3.3810128746976766, "percentage": 67.62, "elapsed_time": "2:59:06", "remaining_time": "1:25:45", "throughput": 8678.4, "total_tokens": 93261648} +{"current_steps": 138400, "total_steps": 204665, "loss": 0.0008, "lr": 5.735220611196781e-07, "epoch": 3.381135025529524, "percentage": 67.62, "elapsed_time": "2:59:06", "remaining_time": "1:25:45", "throughput": 8678.43, "total_tokens": 93264912} +{"current_steps": 138405, "total_steps": 204665, "loss": 0.0, "lr": 5.734449292553675e-07, "epoch": 3.381257176361371, "percentage": 67.63, "elapsed_time": "2:59:07", "remaining_time": "1:25:45", "throughput": 8678.47, "total_tokens": 93268368} +{"current_steps": 138410, "total_steps": 204665, "loss": 0.0, "lr": 5.733678004930798e-07, "epoch": 3.381379327193218, "percentage": 67.63, "elapsed_time": "2:59:07", "remaining_time": "1:25:44", "throughput": 8678.51, "total_tokens": 93271824} +{"current_steps": 138415, "total_steps": 204665, "loss": 0.0, "lr": 5.732906748333766e-07, "epoch": 3.3815014780250654, "percentage": 67.63, "elapsed_time": "2:59:07", "remaining_time": "1:25:44", "throughput": 8678.55, "total_tokens": 93275344} +{"current_steps": 138420, "total_steps": 204665, "loss": 0.0001, "lr": 5.732135522768182e-07, "epoch": 3.3816236288569126, "percentage": 67.63, "elapsed_time": "2:59:08", "remaining_time": "1:25:43", "throughput": 8678.6, "total_tokens": 93278992} +{"current_steps": 138425, "total_steps": 204665, "loss": 0.0, "lr": 5.731364328239654e-07, "epoch": 3.38174577968876, "percentage": 67.63, "elapsed_time": "2:59:08", "remaining_time": "1:25:43", "throughput": 8678.61, "total_tokens": 93282000} +{"current_steps": 138430, "total_steps": 204665, "loss": 0.1003, "lr": 5.730593164753795e-07, "epoch": 3.381867930520607, "percentage": 67.64, "elapsed_time": "2:59:08", "remaining_time": "1:25:43", "throughput": 8678.66, "total_tokens": 93285584} +{"current_steps": 138435, "total_steps": 204665, "loss": 0.0, "lr": 5.729822032316208e-07, "epoch": 3.381990081352454, "percentage": 67.64, "elapsed_time": "2:59:09", "remaining_time": "1:25:42", "throughput": 8678.68, "total_tokens": 93288912} +{"current_steps": 138440, "total_steps": 204665, "loss": 0.0022, "lr": 5.729050930932508e-07, "epoch": 3.3821122321843013, "percentage": 67.64, "elapsed_time": "2:59:09", "remaining_time": "1:25:42", "throughput": 8678.7, "total_tokens": 93292112} +{"current_steps": 138445, "total_steps": 204665, "loss": 0.0, "lr": 5.728279860608294e-07, "epoch": 3.382234383016148, "percentage": 67.64, "elapsed_time": "2:59:09", "remaining_time": "1:25:41", "throughput": 8678.71, "total_tokens": 93295184} +{"current_steps": 138450, "total_steps": 204665, "loss": 0.0, "lr": 5.727508821349178e-07, "epoch": 3.3823565338479957, "percentage": 67.65, "elapsed_time": "2:59:10", "remaining_time": "1:25:41", "throughput": 8678.77, "total_tokens": 93298896} +{"current_steps": 138455, "total_steps": 204665, "loss": 0.0, "lr": 5.726737813160771e-07, "epoch": 3.3824786846798425, "percentage": 67.65, "elapsed_time": "2:59:10", "remaining_time": "1:25:40", "throughput": 8678.81, "total_tokens": 93302288} +{"current_steps": 138460, "total_steps": 204665, "loss": 0.0, "lr": 5.725966836048671e-07, "epoch": 3.3826008355116897, "percentage": 67.65, "elapsed_time": "2:59:10", "remaining_time": "1:25:40", "throughput": 8678.83, "total_tokens": 93305552} +{"current_steps": 138465, "total_steps": 204665, "loss": 0.0, "lr": 5.725195890018495e-07, "epoch": 3.382722986343537, "percentage": 67.65, "elapsed_time": "2:59:11", "remaining_time": "1:25:40", "throughput": 8678.86, "total_tokens": 93308880} +{"current_steps": 138470, "total_steps": 204665, "loss": 0.0001, "lr": 5.72442497507584e-07, "epoch": 3.382845137175384, "percentage": 67.66, "elapsed_time": "2:59:11", "remaining_time": "1:25:39", "throughput": 8678.89, "total_tokens": 93312272} +{"current_steps": 138475, "total_steps": 204665, "loss": 0.0, "lr": 5.72365409122632e-07, "epoch": 3.3829672880072312, "percentage": 67.66, "elapsed_time": "2:59:11", "remaining_time": "1:25:39", "throughput": 8678.91, "total_tokens": 93315408} +{"current_steps": 138480, "total_steps": 204665, "loss": 0.0002, "lr": 5.722883238475535e-07, "epoch": 3.3830894388390784, "percentage": 67.66, "elapsed_time": "2:59:12", "remaining_time": "1:25:38", "throughput": 8678.97, "total_tokens": 93319120} +{"current_steps": 138485, "total_steps": 204665, "loss": 0.0, "lr": 5.722112416829092e-07, "epoch": 3.3832115896709256, "percentage": 67.66, "elapsed_time": "2:59:12", "remaining_time": "1:25:38", "throughput": 8679.01, "total_tokens": 93322640} +{"current_steps": 138490, "total_steps": 204665, "loss": 0.0, "lr": 5.721341626292603e-07, "epoch": 3.383333740502773, "percentage": 67.67, "elapsed_time": "2:59:13", "remaining_time": "1:25:38", "throughput": 8679.06, "total_tokens": 93326224} +{"current_steps": 138495, "total_steps": 204665, "loss": 0.0452, "lr": 5.720570866871664e-07, "epoch": 3.38345589133462, "percentage": 67.67, "elapsed_time": "2:59:13", "remaining_time": "1:25:37", "throughput": 8679.14, "total_tokens": 93330256} +{"current_steps": 138500, "total_steps": 204665, "loss": 0.0, "lr": 5.719800138571889e-07, "epoch": 3.383578042166467, "percentage": 67.67, "elapsed_time": "2:59:13", "remaining_time": "1:25:37", "throughput": 8679.17, "total_tokens": 93333520} +{"current_steps": 138505, "total_steps": 204665, "loss": 0.0, "lr": 5.719029441398875e-07, "epoch": 3.3837001929983144, "percentage": 67.67, "elapsed_time": "2:59:14", "remaining_time": "1:25:36", "throughput": 8679.2, "total_tokens": 93336848} +{"current_steps": 138510, "total_steps": 204665, "loss": 0.0001, "lr": 5.718258775358229e-07, "epoch": 3.3838223438301616, "percentage": 67.68, "elapsed_time": "2:59:14", "remaining_time": "1:25:36", "throughput": 8679.22, "total_tokens": 93340048} +{"current_steps": 138515, "total_steps": 204665, "loss": 0.0, "lr": 5.717488140455562e-07, "epoch": 3.3839444946620088, "percentage": 67.68, "elapsed_time": "2:59:14", "remaining_time": "1:25:36", "throughput": 8679.26, "total_tokens": 93343504} +{"current_steps": 138520, "total_steps": 204665, "loss": 0.0, "lr": 5.716717536696473e-07, "epoch": 3.384066645493856, "percentage": 67.68, "elapsed_time": "2:59:15", "remaining_time": "1:25:35", "throughput": 8679.3, "total_tokens": 93347024} +{"current_steps": 138525, "total_steps": 204665, "loss": 0.0638, "lr": 5.715946964086562e-07, "epoch": 3.384188796325703, "percentage": 67.68, "elapsed_time": "2:59:15", "remaining_time": "1:25:35", "throughput": 8679.32, "total_tokens": 93350224} +{"current_steps": 138530, "total_steps": 204665, "loss": 0.0, "lr": 5.71517642263144e-07, "epoch": 3.3843109471575503, "percentage": 67.69, "elapsed_time": "2:59:15", "remaining_time": "1:25:34", "throughput": 8679.4, "total_tokens": 93354192} +{"current_steps": 138535, "total_steps": 204665, "loss": 0.0, "lr": 5.714405912336708e-07, "epoch": 3.3844330979893975, "percentage": 67.69, "elapsed_time": "2:59:16", "remaining_time": "1:25:34", "throughput": 8679.44, "total_tokens": 93357712} +{"current_steps": 138540, "total_steps": 204665, "loss": 0.0489, "lr": 5.713635433207966e-07, "epoch": 3.3845552488212443, "percentage": 67.69, "elapsed_time": "2:59:16", "remaining_time": "1:25:34", "throughput": 8679.47, "total_tokens": 93361040} +{"current_steps": 138545, "total_steps": 204665, "loss": 0.0, "lr": 5.71286498525082e-07, "epoch": 3.3846773996530914, "percentage": 67.69, "elapsed_time": "2:59:16", "remaining_time": "1:25:33", "throughput": 8679.51, "total_tokens": 93364560} +{"current_steps": 138550, "total_steps": 204665, "loss": 0.0, "lr": 5.712094568470875e-07, "epoch": 3.3847995504849386, "percentage": 67.7, "elapsed_time": "2:59:17", "remaining_time": "1:25:33", "throughput": 8679.57, "total_tokens": 93368336} +{"current_steps": 138555, "total_steps": 204665, "loss": 0.0, "lr": 5.711324182873729e-07, "epoch": 3.384921701316786, "percentage": 67.7, "elapsed_time": "2:59:17", "remaining_time": "1:25:32", "throughput": 8679.59, "total_tokens": 93371472} +{"current_steps": 138560, "total_steps": 204665, "loss": 0.0546, "lr": 5.710553828464993e-07, "epoch": 3.385043852148633, "percentage": 67.7, "elapsed_time": "2:59:17", "remaining_time": "1:25:32", "throughput": 8679.6, "total_tokens": 93374480} +{"current_steps": 138565, "total_steps": 204665, "loss": 0.0, "lr": 5.709783505250256e-07, "epoch": 3.38516600298048, "percentage": 67.7, "elapsed_time": "2:59:18", "remaining_time": "1:25:32", "throughput": 8679.64, "total_tokens": 93378000} +{"current_steps": 138570, "total_steps": 204665, "loss": 0.0, "lr": 5.709013213235133e-07, "epoch": 3.3852881538123274, "percentage": 67.71, "elapsed_time": "2:59:18", "remaining_time": "1:25:31", "throughput": 8679.66, "total_tokens": 93381200} +{"current_steps": 138575, "total_steps": 204665, "loss": 0.0, "lr": 5.708242952425216e-07, "epoch": 3.3854103046441746, "percentage": 67.71, "elapsed_time": "2:59:18", "remaining_time": "1:25:31", "throughput": 8679.69, "total_tokens": 93384592} +{"current_steps": 138580, "total_steps": 204665, "loss": 0.0, "lr": 5.707472722826109e-07, "epoch": 3.3855324554760218, "percentage": 67.71, "elapsed_time": "2:59:19", "remaining_time": "1:25:30", "throughput": 8679.83, "total_tokens": 93389264} +{"current_steps": 138585, "total_steps": 204665, "loss": 0.0, "lr": 5.706702524443419e-07, "epoch": 3.385654606307869, "percentage": 67.71, "elapsed_time": "2:59:19", "remaining_time": "1:25:30", "throughput": 8679.88, "total_tokens": 93392784} +{"current_steps": 138590, "total_steps": 204665, "loss": 0.0001, "lr": 5.705932357282741e-07, "epoch": 3.385776757139716, "percentage": 67.72, "elapsed_time": "2:59:20", "remaining_time": "1:25:30", "throughput": 8679.91, "total_tokens": 93396176} +{"current_steps": 138595, "total_steps": 204665, "loss": 0.0, "lr": 5.705162221349681e-07, "epoch": 3.3858989079715633, "percentage": 67.72, "elapsed_time": "2:59:20", "remaining_time": "1:25:29", "throughput": 8679.93, "total_tokens": 93399376} +{"current_steps": 138600, "total_steps": 204665, "loss": 0.0, "lr": 5.704392116649832e-07, "epoch": 3.3860210588034105, "percentage": 67.72, "elapsed_time": "2:59:20", "remaining_time": "1:25:29", "throughput": 8680.0, "total_tokens": 93403216} +{"current_steps": 138605, "total_steps": 204665, "loss": 0.0684, "lr": 5.703622043188799e-07, "epoch": 3.3861432096352577, "percentage": 67.72, "elapsed_time": "2:59:21", "remaining_time": "1:25:28", "throughput": 8680.02, "total_tokens": 93406416} +{"current_steps": 138610, "total_steps": 204665, "loss": 0.0371, "lr": 5.702852000972187e-07, "epoch": 3.386265360467105, "percentage": 67.73, "elapsed_time": "2:59:21", "remaining_time": "1:25:28", "throughput": 8680.02, "total_tokens": 93409360} +{"current_steps": 138615, "total_steps": 204665, "loss": 0.0001, "lr": 5.702081990005587e-07, "epoch": 3.386387511298952, "percentage": 67.73, "elapsed_time": "2:59:21", "remaining_time": "1:25:27", "throughput": 8680.02, "total_tokens": 93412240} +{"current_steps": 138620, "total_steps": 204665, "loss": 0.0, "lr": 5.701312010294606e-07, "epoch": 3.3865096621307993, "percentage": 67.73, "elapsed_time": "2:59:22", "remaining_time": "1:25:27", "throughput": 8680.06, "total_tokens": 93415760} +{"current_steps": 138625, "total_steps": 204665, "loss": 0.0003, "lr": 5.700542061844839e-07, "epoch": 3.386631812962646, "percentage": 67.73, "elapsed_time": "2:59:22", "remaining_time": "1:25:27", "throughput": 8680.07, "total_tokens": 93418768} +{"current_steps": 138630, "total_steps": 204665, "loss": 0.0002, "lr": 5.699772144661885e-07, "epoch": 3.3867539637944937, "percentage": 67.74, "elapsed_time": "2:59:22", "remaining_time": "1:25:26", "throughput": 8680.1, "total_tokens": 93422096} +{"current_steps": 138635, "total_steps": 204665, "loss": 0.0419, "lr": 5.699002258751348e-07, "epoch": 3.3868761146263404, "percentage": 67.74, "elapsed_time": "2:59:23", "remaining_time": "1:25:26", "throughput": 8680.13, "total_tokens": 93425488} +{"current_steps": 138640, "total_steps": 204665, "loss": 0.0, "lr": 5.698232404118819e-07, "epoch": 3.3869982654581876, "percentage": 67.74, "elapsed_time": "2:59:23", "remaining_time": "1:25:25", "throughput": 8680.19, "total_tokens": 93429136} +{"current_steps": 138645, "total_steps": 204665, "loss": 0.0, "lr": 5.697462580769905e-07, "epoch": 3.387120416290035, "percentage": 67.74, "elapsed_time": "2:59:23", "remaining_time": "1:25:25", "throughput": 8680.22, "total_tokens": 93432464} +{"current_steps": 138650, "total_steps": 204665, "loss": 0.0001, "lr": 5.696692788710196e-07, "epoch": 3.387242567121882, "percentage": 67.74, "elapsed_time": "2:59:24", "remaining_time": "1:25:25", "throughput": 8680.25, "total_tokens": 93435792} +{"current_steps": 138655, "total_steps": 204665, "loss": 0.0, "lr": 5.6959230279453e-07, "epoch": 3.387364717953729, "percentage": 67.75, "elapsed_time": "2:59:24", "remaining_time": "1:25:24", "throughput": 8680.31, "total_tokens": 93439568} +{"current_steps": 138660, "total_steps": 204665, "loss": 0.0, "lr": 5.695153298480803e-07, "epoch": 3.3874868687855764, "percentage": 67.75, "elapsed_time": "2:59:24", "remaining_time": "1:25:24", "throughput": 8680.36, "total_tokens": 93443152} +{"current_steps": 138665, "total_steps": 204665, "loss": 0.0431, "lr": 5.694383600322314e-07, "epoch": 3.3876090196174236, "percentage": 67.75, "elapsed_time": "2:59:25", "remaining_time": "1:25:23", "throughput": 8680.41, "total_tokens": 93446736} +{"current_steps": 138670, "total_steps": 204665, "loss": 0.0, "lr": 5.693613933475423e-07, "epoch": 3.3877311704492707, "percentage": 67.75, "elapsed_time": "2:59:25", "remaining_time": "1:25:23", "throughput": 8680.42, "total_tokens": 93449872} +{"current_steps": 138675, "total_steps": 204665, "loss": 0.0155, "lr": 5.692844297945728e-07, "epoch": 3.387853321281118, "percentage": 67.76, "elapsed_time": "2:59:25", "remaining_time": "1:25:23", "throughput": 8680.45, "total_tokens": 93453200} +{"current_steps": 138680, "total_steps": 204665, "loss": 0.0, "lr": 5.692074693738833e-07, "epoch": 3.387975472112965, "percentage": 67.76, "elapsed_time": "2:59:26", "remaining_time": "1:25:22", "throughput": 8680.48, "total_tokens": 93456464} +{"current_steps": 138685, "total_steps": 204665, "loss": 0.1295, "lr": 5.691305120860323e-07, "epoch": 3.3880976229448123, "percentage": 67.76, "elapsed_time": "2:59:26", "remaining_time": "1:25:22", "throughput": 8680.5, "total_tokens": 93459664} +{"current_steps": 138690, "total_steps": 204665, "loss": 0.0, "lr": 5.690535579315809e-07, "epoch": 3.3882197737766595, "percentage": 67.76, "elapsed_time": "2:59:26", "remaining_time": "1:25:21", "throughput": 8680.53, "total_tokens": 93463056} +{"current_steps": 138695, "total_steps": 204665, "loss": 0.0, "lr": 5.689766069110873e-07, "epoch": 3.3883419246085067, "percentage": 67.77, "elapsed_time": "2:59:27", "remaining_time": "1:25:21", "throughput": 8680.55, "total_tokens": 93466192} +{"current_steps": 138700, "total_steps": 204665, "loss": 0.0, "lr": 5.688996590251118e-07, "epoch": 3.388464075440354, "percentage": 67.77, "elapsed_time": "2:59:27", "remaining_time": "1:25:21", "throughput": 8680.57, "total_tokens": 93469392} +{"current_steps": 138705, "total_steps": 204665, "loss": 0.0, "lr": 5.688227142742143e-07, "epoch": 3.388586226272201, "percentage": 67.77, "elapsed_time": "2:59:28", "remaining_time": "1:25:20", "throughput": 8680.59, "total_tokens": 93472592} +{"current_steps": 138710, "total_steps": 204665, "loss": 0.0, "lr": 5.687457726589535e-07, "epoch": 3.3887083771040483, "percentage": 67.77, "elapsed_time": "2:59:28", "remaining_time": "1:25:20", "throughput": 8680.61, "total_tokens": 93475792} +{"current_steps": 138715, "total_steps": 204665, "loss": 0.0001, "lr": 5.6866883417989e-07, "epoch": 3.3888305279358955, "percentage": 67.78, "elapsed_time": "2:59:28", "remaining_time": "1:25:19", "throughput": 8680.62, "total_tokens": 93478864} +{"current_steps": 138720, "total_steps": 204665, "loss": 0.0722, "lr": 5.685918988375823e-07, "epoch": 3.388952678767742, "percentage": 67.78, "elapsed_time": "2:59:29", "remaining_time": "1:25:19", "throughput": 8680.68, "total_tokens": 93482640} +{"current_steps": 138725, "total_steps": 204665, "loss": 0.0001, "lr": 5.685149666325907e-07, "epoch": 3.3890748295995894, "percentage": 67.78, "elapsed_time": "2:59:29", "remaining_time": "1:25:19", "throughput": 8680.73, "total_tokens": 93486160} +{"current_steps": 138730, "total_steps": 204665, "loss": 0.0002, "lr": 5.684380375654744e-07, "epoch": 3.3891969804314366, "percentage": 67.78, "elapsed_time": "2:59:29", "remaining_time": "1:25:18", "throughput": 8680.76, "total_tokens": 93489488} +{"current_steps": 138735, "total_steps": 204665, "loss": 0.0, "lr": 5.683611116367924e-07, "epoch": 3.3893191312632838, "percentage": 67.79, "elapsed_time": "2:59:30", "remaining_time": "1:25:18", "throughput": 8680.79, "total_tokens": 93492880} +{"current_steps": 138740, "total_steps": 204665, "loss": 0.0005, "lr": 5.682841888471047e-07, "epoch": 3.389441282095131, "percentage": 67.79, "elapsed_time": "2:59:30", "remaining_time": "1:25:17", "throughput": 8680.82, "total_tokens": 93496144} +{"current_steps": 138745, "total_steps": 204665, "loss": 0.0, "lr": 5.682072691969701e-07, "epoch": 3.389563432926978, "percentage": 67.79, "elapsed_time": "2:59:30", "remaining_time": "1:25:17", "throughput": 8680.84, "total_tokens": 93499408} +{"current_steps": 138750, "total_steps": 204665, "loss": 0.0, "lr": 5.68130352686949e-07, "epoch": 3.3896855837588253, "percentage": 67.79, "elapsed_time": "2:59:31", "remaining_time": "1:25:16", "throughput": 8680.9, "total_tokens": 93503056} +{"current_steps": 138755, "total_steps": 204665, "loss": 0.0, "lr": 5.680534393175997e-07, "epoch": 3.3898077345906725, "percentage": 67.8, "elapsed_time": "2:59:31", "remaining_time": "1:25:16", "throughput": 8680.93, "total_tokens": 93506384} +{"current_steps": 138760, "total_steps": 204665, "loss": 0.0, "lr": 5.679765290894818e-07, "epoch": 3.3899298854225197, "percentage": 67.8, "elapsed_time": "2:59:31", "remaining_time": "1:25:16", "throughput": 8680.96, "total_tokens": 93509776} +{"current_steps": 138765, "total_steps": 204665, "loss": 0.0, "lr": 5.678996220031553e-07, "epoch": 3.390052036254367, "percentage": 67.8, "elapsed_time": "2:59:32", "remaining_time": "1:25:15", "throughput": 8681.01, "total_tokens": 93513296} +{"current_steps": 138770, "total_steps": 204665, "loss": 0.0, "lr": 5.678227180591786e-07, "epoch": 3.390174187086214, "percentage": 67.8, "elapsed_time": "2:59:32", "remaining_time": "1:25:15", "throughput": 8681.04, "total_tokens": 93516624} +{"current_steps": 138775, "total_steps": 204665, "loss": 0.0, "lr": 5.677458172581115e-07, "epoch": 3.3902963379180613, "percentage": 67.81, "elapsed_time": "2:59:32", "remaining_time": "1:25:14", "throughput": 8681.08, "total_tokens": 93520144} +{"current_steps": 138780, "total_steps": 204665, "loss": 0.0, "lr": 5.676689196005129e-07, "epoch": 3.3904184887499085, "percentage": 67.81, "elapsed_time": "2:59:33", "remaining_time": "1:25:14", "throughput": 8681.11, "total_tokens": 93523408} +{"current_steps": 138785, "total_steps": 204665, "loss": 0.0, "lr": 5.675920250869426e-07, "epoch": 3.3905406395817557, "percentage": 67.81, "elapsed_time": "2:59:33", "remaining_time": "1:25:14", "throughput": 8681.15, "total_tokens": 93526928} +{"current_steps": 138790, "total_steps": 204665, "loss": 0.0, "lr": 5.67515133717959e-07, "epoch": 3.390662790413603, "percentage": 67.81, "elapsed_time": "2:59:33", "remaining_time": "1:25:13", "throughput": 8681.18, "total_tokens": 93530192} +{"current_steps": 138795, "total_steps": 204665, "loss": 0.0001, "lr": 5.674382454941215e-07, "epoch": 3.39078494124545, "percentage": 67.82, "elapsed_time": "2:59:34", "remaining_time": "1:25:13", "throughput": 8681.21, "total_tokens": 93533584} +{"current_steps": 138800, "total_steps": 204665, "loss": 0.0, "lr": 5.6736136041599e-07, "epoch": 3.3909070920772972, "percentage": 67.82, "elapsed_time": "2:59:34", "remaining_time": "1:25:12", "throughput": 8681.24, "total_tokens": 93536848} +{"current_steps": 138805, "total_steps": 204665, "loss": 0.0, "lr": 5.672844784841226e-07, "epoch": 3.391029242909144, "percentage": 67.82, "elapsed_time": "2:59:34", "remaining_time": "1:25:12", "throughput": 8681.27, "total_tokens": 93540240} +{"current_steps": 138810, "total_steps": 204665, "loss": 0.0, "lr": 5.672075996990792e-07, "epoch": 3.3911513937409916, "percentage": 67.82, "elapsed_time": "2:59:35", "remaining_time": "1:25:12", "throughput": 8681.32, "total_tokens": 93543760} +{"current_steps": 138815, "total_steps": 204665, "loss": 0.0, "lr": 5.671307240614183e-07, "epoch": 3.3912735445728384, "percentage": 67.83, "elapsed_time": "2:59:35", "remaining_time": "1:25:11", "throughput": 8681.32, "total_tokens": 93546704} +{"current_steps": 138820, "total_steps": 204665, "loss": 0.0, "lr": 5.670538515716996e-07, "epoch": 3.3913956954046856, "percentage": 67.83, "elapsed_time": "2:59:35", "remaining_time": "1:25:11", "throughput": 8681.34, "total_tokens": 93549904} +{"current_steps": 138825, "total_steps": 204665, "loss": 0.0227, "lr": 5.669769822304812e-07, "epoch": 3.3915178462365327, "percentage": 67.83, "elapsed_time": "2:59:36", "remaining_time": "1:25:10", "throughput": 8681.39, "total_tokens": 93553552} +{"current_steps": 138830, "total_steps": 204665, "loss": 0.0, "lr": 5.669001160383231e-07, "epoch": 3.39163999706838, "percentage": 67.83, "elapsed_time": "2:59:36", "remaining_time": "1:25:10", "throughput": 8681.44, "total_tokens": 93557072} +{"current_steps": 138835, "total_steps": 204665, "loss": 0.0002, "lr": 5.668232529957835e-07, "epoch": 3.391762147900227, "percentage": 67.84, "elapsed_time": "2:59:37", "remaining_time": "1:25:10", "throughput": 8681.45, "total_tokens": 93560208} +{"current_steps": 138840, "total_steps": 204665, "loss": 0.0, "lr": 5.667463931034219e-07, "epoch": 3.3918842987320743, "percentage": 67.84, "elapsed_time": "2:59:37", "remaining_time": "1:25:09", "throughput": 8681.47, "total_tokens": 93563344} +{"current_steps": 138845, "total_steps": 204665, "loss": 0.0002, "lr": 5.666695363617972e-07, "epoch": 3.3920064495639215, "percentage": 67.84, "elapsed_time": "2:59:37", "remaining_time": "1:25:09", "throughput": 8681.48, "total_tokens": 93566480} +{"current_steps": 138850, "total_steps": 204665, "loss": 0.0001, "lr": 5.66592682771468e-07, "epoch": 3.3921286003957687, "percentage": 67.84, "elapsed_time": "2:59:38", "remaining_time": "1:25:08", "throughput": 8681.51, "total_tokens": 93569808} +{"current_steps": 138855, "total_steps": 204665, "loss": 0.0001, "lr": 5.66515832332993e-07, "epoch": 3.392250751227616, "percentage": 67.85, "elapsed_time": "2:59:38", "remaining_time": "1:25:08", "throughput": 8681.55, "total_tokens": 93573264} +{"current_steps": 138860, "total_steps": 204665, "loss": 0.0144, "lr": 5.664389850469322e-07, "epoch": 3.392372902059463, "percentage": 67.85, "elapsed_time": "2:59:38", "remaining_time": "1:25:07", "throughput": 8681.56, "total_tokens": 93576400} +{"current_steps": 138865, "total_steps": 204665, "loss": 0.0, "lr": 5.663621409138431e-07, "epoch": 3.3924950528913103, "percentage": 67.85, "elapsed_time": "2:59:39", "remaining_time": "1:25:07", "throughput": 8681.59, "total_tokens": 93579728} +{"current_steps": 138870, "total_steps": 204665, "loss": 0.0, "lr": 5.662852999342856e-07, "epoch": 3.3926172037231574, "percentage": 67.85, "elapsed_time": "2:59:39", "remaining_time": "1:25:07", "throughput": 8681.63, "total_tokens": 93583184} +{"current_steps": 138875, "total_steps": 204665, "loss": 0.0, "lr": 5.662084621088177e-07, "epoch": 3.3927393545550046, "percentage": 67.85, "elapsed_time": "2:59:39", "remaining_time": "1:25:06", "throughput": 8681.65, "total_tokens": 93586256} +{"current_steps": 138880, "total_steps": 204665, "loss": 0.0395, "lr": 5.66131627437999e-07, "epoch": 3.392861505386852, "percentage": 67.86, "elapsed_time": "2:59:40", "remaining_time": "1:25:06", "throughput": 8681.69, "total_tokens": 93589776} +{"current_steps": 138885, "total_steps": 204665, "loss": 0.0, "lr": 5.660547959223871e-07, "epoch": 3.392983656218699, "percentage": 67.86, "elapsed_time": "2:59:40", "remaining_time": "1:25:05", "throughput": 8681.74, "total_tokens": 93593360} +{"current_steps": 138890, "total_steps": 204665, "loss": 0.0, "lr": 5.659779675625418e-07, "epoch": 3.393105807050546, "percentage": 67.86, "elapsed_time": "2:59:40", "remaining_time": "1:25:05", "throughput": 8681.76, "total_tokens": 93596624} +{"current_steps": 138895, "total_steps": 204665, "loss": 0.0004, "lr": 5.659011423590217e-07, "epoch": 3.3932279578823934, "percentage": 67.86, "elapsed_time": "2:59:41", "remaining_time": "1:25:05", "throughput": 8681.79, "total_tokens": 93599952} +{"current_steps": 138900, "total_steps": 204665, "loss": 0.0001, "lr": 5.658243203123848e-07, "epoch": 3.39335010871424, "percentage": 67.87, "elapsed_time": "2:59:41", "remaining_time": "1:25:04", "throughput": 8681.8, "total_tokens": 93603088} +{"current_steps": 138905, "total_steps": 204665, "loss": 0.0, "lr": 5.657475014231908e-07, "epoch": 3.3934722595460873, "percentage": 67.87, "elapsed_time": "2:59:41", "remaining_time": "1:25:04", "throughput": 8681.87, "total_tokens": 93606928} +{"current_steps": 138910, "total_steps": 204665, "loss": 0.0, "lr": 5.656706856919971e-07, "epoch": 3.3935944103779345, "percentage": 67.87, "elapsed_time": "2:59:42", "remaining_time": "1:25:03", "throughput": 8681.94, "total_tokens": 93610768} +{"current_steps": 138915, "total_steps": 204665, "loss": 0.0, "lr": 5.655938731193633e-07, "epoch": 3.3937165612097817, "percentage": 67.87, "elapsed_time": "2:59:42", "remaining_time": "1:25:03", "throughput": 8681.96, "total_tokens": 93614032} +{"current_steps": 138920, "total_steps": 204665, "loss": 0.0, "lr": 5.655170637058479e-07, "epoch": 3.393838712041629, "percentage": 67.88, "elapsed_time": "2:59:42", "remaining_time": "1:25:03", "throughput": 8681.98, "total_tokens": 93617232} +{"current_steps": 138925, "total_steps": 204665, "loss": 0.0, "lr": 5.654402574520088e-07, "epoch": 3.393960862873476, "percentage": 67.88, "elapsed_time": "2:59:43", "remaining_time": "1:25:02", "throughput": 8682.03, "total_tokens": 93620880} +{"current_steps": 138930, "total_steps": 204665, "loss": 0.0, "lr": 5.653634543584056e-07, "epoch": 3.3940830137053233, "percentage": 67.88, "elapsed_time": "2:59:43", "remaining_time": "1:25:02", "throughput": 8682.05, "total_tokens": 93624016} +{"current_steps": 138935, "total_steps": 204665, "loss": 0.0, "lr": 5.652866544255962e-07, "epoch": 3.3942051645371705, "percentage": 67.88, "elapsed_time": "2:59:43", "remaining_time": "1:25:01", "throughput": 8682.07, "total_tokens": 93627216} +{"current_steps": 138940, "total_steps": 204665, "loss": 0.0739, "lr": 5.652098576541387e-07, "epoch": 3.3943273153690177, "percentage": 67.89, "elapsed_time": "2:59:44", "remaining_time": "1:25:01", "throughput": 8682.08, "total_tokens": 93630288} +{"current_steps": 138945, "total_steps": 204665, "loss": 0.0, "lr": 5.651330640445926e-07, "epoch": 3.394449466200865, "percentage": 67.89, "elapsed_time": "2:59:44", "remaining_time": "1:25:01", "throughput": 8682.08, "total_tokens": 93633296} +{"current_steps": 138950, "total_steps": 204665, "loss": 0.0, "lr": 5.650562735975152e-07, "epoch": 3.394571617032712, "percentage": 67.89, "elapsed_time": "2:59:45", "remaining_time": "1:25:00", "throughput": 8682.13, "total_tokens": 93636816} +{"current_steps": 138955, "total_steps": 204665, "loss": 0.1083, "lr": 5.649794863134663e-07, "epoch": 3.3946937678645592, "percentage": 67.89, "elapsed_time": "2:59:45", "remaining_time": "1:25:00", "throughput": 8682.16, "total_tokens": 93640144} +{"current_steps": 138960, "total_steps": 204665, "loss": 0.0, "lr": 5.649027021930031e-07, "epoch": 3.3948159186964064, "percentage": 67.9, "elapsed_time": "2:59:45", "remaining_time": "1:24:59", "throughput": 8682.19, "total_tokens": 93643472} +{"current_steps": 138965, "total_steps": 204665, "loss": 0.0, "lr": 5.648259212366847e-07, "epoch": 3.3949380695282536, "percentage": 67.9, "elapsed_time": "2:59:46", "remaining_time": "1:24:59", "throughput": 8682.23, "total_tokens": 93646928} +{"current_steps": 138970, "total_steps": 204665, "loss": 0.0, "lr": 5.647491434450688e-07, "epoch": 3.395060220360101, "percentage": 67.9, "elapsed_time": "2:59:46", "remaining_time": "1:24:59", "throughput": 8682.27, "total_tokens": 93650512} +{"current_steps": 138975, "total_steps": 204665, "loss": 0.0, "lr": 5.646723688187148e-07, "epoch": 3.395182371191948, "percentage": 67.9, "elapsed_time": "2:59:46", "remaining_time": "1:24:58", "throughput": 8682.29, "total_tokens": 93653712} +{"current_steps": 138980, "total_steps": 204665, "loss": 0.0005, "lr": 5.645955973581799e-07, "epoch": 3.395304522023795, "percentage": 67.91, "elapsed_time": "2:59:47", "remaining_time": "1:24:58", "throughput": 8682.32, "total_tokens": 93656976} +{"current_steps": 138985, "total_steps": 204665, "loss": 0.0, "lr": 5.645188290640231e-07, "epoch": 3.395426672855642, "percentage": 67.91, "elapsed_time": "2:59:47", "remaining_time": "1:24:57", "throughput": 8682.36, "total_tokens": 93660432} +{"current_steps": 138990, "total_steps": 204665, "loss": 0.0, "lr": 5.644420639368028e-07, "epoch": 3.3955488236874896, "percentage": 67.91, "elapsed_time": "2:59:47", "remaining_time": "1:24:57", "throughput": 8682.38, "total_tokens": 93663696} +{"current_steps": 138995, "total_steps": 204665, "loss": 0.0355, "lr": 5.643653019770764e-07, "epoch": 3.3956709745193363, "percentage": 67.91, "elapsed_time": "2:59:48", "remaining_time": "1:24:56", "throughput": 8682.4, "total_tokens": 93666896} +{"current_steps": 139000, "total_steps": 204665, "loss": 0.0, "lr": 5.642885431854034e-07, "epoch": 3.3957931253511835, "percentage": 67.92, "elapsed_time": "2:59:48", "remaining_time": "1:24:56", "throughput": 8682.41, "total_tokens": 93669904} +{"current_steps": 139005, "total_steps": 204665, "loss": 0.0, "lr": 5.642117875623406e-07, "epoch": 3.3959152761830307, "percentage": 67.92, "elapsed_time": "2:59:48", "remaining_time": "1:24:56", "throughput": 8682.45, "total_tokens": 93673424} +{"current_steps": 139010, "total_steps": 204665, "loss": 0.0, "lr": 5.641350351084471e-07, "epoch": 3.396037427014878, "percentage": 67.92, "elapsed_time": "2:59:49", "remaining_time": "1:24:55", "throughput": 8682.45, "total_tokens": 93676432} +{"current_steps": 139015, "total_steps": 204665, "loss": 0.0412, "lr": 5.640582858242812e-07, "epoch": 3.396159577846725, "percentage": 67.92, "elapsed_time": "2:59:49", "remaining_time": "1:24:55", "throughput": 8682.53, "total_tokens": 93680272} +{"current_steps": 139020, "total_steps": 204665, "loss": 0.0, "lr": 5.639815397104004e-07, "epoch": 3.3962817286785723, "percentage": 67.93, "elapsed_time": "2:59:49", "remaining_time": "1:24:54", "throughput": 8682.56, "total_tokens": 93683664} +{"current_steps": 139025, "total_steps": 204665, "loss": 0.0011, "lr": 5.639047967673634e-07, "epoch": 3.3964038795104194, "percentage": 67.93, "elapsed_time": "2:59:50", "remaining_time": "1:24:54", "throughput": 8682.6, "total_tokens": 93687120} +{"current_steps": 139030, "total_steps": 204665, "loss": 0.0, "lr": 5.638280569957277e-07, "epoch": 3.3965260303422666, "percentage": 67.93, "elapsed_time": "2:59:50", "remaining_time": "1:24:54", "throughput": 8682.62, "total_tokens": 93690384} +{"current_steps": 139035, "total_steps": 204665, "loss": 0.0, "lr": 5.637513203960519e-07, "epoch": 3.396648181174114, "percentage": 67.93, "elapsed_time": "2:59:50", "remaining_time": "1:24:53", "throughput": 8682.62, "total_tokens": 93693328} +{"current_steps": 139040, "total_steps": 204665, "loss": 0.0001, "lr": 5.636745869688939e-07, "epoch": 3.396770332005961, "percentage": 67.94, "elapsed_time": "2:59:51", "remaining_time": "1:24:53", "throughput": 8682.65, "total_tokens": 93696592} +{"current_steps": 139045, "total_steps": 204665, "loss": 0.0, "lr": 5.635978567148114e-07, "epoch": 3.396892482837808, "percentage": 67.94, "elapsed_time": "2:59:51", "remaining_time": "1:24:52", "throughput": 8682.67, "total_tokens": 93699856} +{"current_steps": 139050, "total_steps": 204665, "loss": 0.0, "lr": 5.63521129634363e-07, "epoch": 3.3970146336696554, "percentage": 67.94, "elapsed_time": "2:59:51", "remaining_time": "1:24:52", "throughput": 8682.68, "total_tokens": 93702800} +{"current_steps": 139055, "total_steps": 204665, "loss": 0.0, "lr": 5.634444057281058e-07, "epoch": 3.3971367845015026, "percentage": 67.94, "elapsed_time": "2:59:52", "remaining_time": "1:24:52", "throughput": 8682.71, "total_tokens": 93706256} +{"current_steps": 139060, "total_steps": 204665, "loss": 0.0868, "lr": 5.633676849965989e-07, "epoch": 3.3972589353333498, "percentage": 67.95, "elapsed_time": "2:59:52", "remaining_time": "1:24:51", "throughput": 8682.78, "total_tokens": 93710032} +{"current_steps": 139065, "total_steps": 204665, "loss": 0.0, "lr": 5.632909674403991e-07, "epoch": 3.397381086165197, "percentage": 67.95, "elapsed_time": "2:59:52", "remaining_time": "1:24:51", "throughput": 8682.79, "total_tokens": 93713104} +{"current_steps": 139070, "total_steps": 204665, "loss": 0.0, "lr": 5.63214253060065e-07, "epoch": 3.3975032369970437, "percentage": 67.95, "elapsed_time": "2:59:53", "remaining_time": "1:24:50", "throughput": 8682.85, "total_tokens": 93716752} +{"current_steps": 139075, "total_steps": 204665, "loss": 0.0, "lr": 5.631375418561546e-07, "epoch": 3.3976253878288913, "percentage": 67.95, "elapsed_time": "2:59:53", "remaining_time": "1:24:50", "throughput": 8682.9, "total_tokens": 93720400} +{"current_steps": 139080, "total_steps": 204665, "loss": 0.0909, "lr": 5.630608338292251e-07, "epoch": 3.397747538660738, "percentage": 67.95, "elapsed_time": "2:59:54", "remaining_time": "1:24:50", "throughput": 8682.94, "total_tokens": 93723792} +{"current_steps": 139085, "total_steps": 204665, "loss": 0.0, "lr": 5.629841289798352e-07, "epoch": 3.3978696894925853, "percentage": 67.96, "elapsed_time": "2:59:54", "remaining_time": "1:24:49", "throughput": 8682.97, "total_tokens": 93727184} +{"current_steps": 139090, "total_steps": 204665, "loss": 0.0, "lr": 5.629074273085419e-07, "epoch": 3.3979918403244325, "percentage": 67.96, "elapsed_time": "2:59:54", "remaining_time": "1:24:49", "throughput": 8683.04, "total_tokens": 93731024} +{"current_steps": 139095, "total_steps": 204665, "loss": 0.0, "lr": 5.628307288159035e-07, "epoch": 3.3981139911562797, "percentage": 67.96, "elapsed_time": "2:59:55", "remaining_time": "1:24:48", "throughput": 8683.06, "total_tokens": 93734224} +{"current_steps": 139100, "total_steps": 204665, "loss": 0.0, "lr": 5.627540335024776e-07, "epoch": 3.398236141988127, "percentage": 67.96, "elapsed_time": "2:59:55", "remaining_time": "1:24:48", "throughput": 8683.06, "total_tokens": 93737168} +{"current_steps": 139105, "total_steps": 204665, "loss": 0.0, "lr": 5.626773413688218e-07, "epoch": 3.398358292819974, "percentage": 67.97, "elapsed_time": "2:59:55", "remaining_time": "1:24:48", "throughput": 8683.08, "total_tokens": 93740368} +{"current_steps": 139110, "total_steps": 204665, "loss": 0.0448, "lr": 5.626006524154943e-07, "epoch": 3.3984804436518212, "percentage": 67.97, "elapsed_time": "2:59:56", "remaining_time": "1:24:47", "throughput": 8683.1, "total_tokens": 93743568} +{"current_steps": 139115, "total_steps": 204665, "loss": 0.0365, "lr": 5.625239666430521e-07, "epoch": 3.3986025944836684, "percentage": 67.97, "elapsed_time": "2:59:56", "remaining_time": "1:24:47", "throughput": 8683.14, "total_tokens": 93747088} +{"current_steps": 139120, "total_steps": 204665, "loss": 0.0, "lr": 5.624472840520538e-07, "epoch": 3.3987247453155156, "percentage": 67.97, "elapsed_time": "2:59:56", "remaining_time": "1:24:46", "throughput": 8683.26, "total_tokens": 93751568} +{"current_steps": 139125, "total_steps": 204665, "loss": 0.0, "lr": 5.623706046430561e-07, "epoch": 3.398846896147363, "percentage": 67.98, "elapsed_time": "2:59:57", "remaining_time": "1:24:46", "throughput": 8683.28, "total_tokens": 93754704} +{"current_steps": 139130, "total_steps": 204665, "loss": 0.0, "lr": 5.622939284166175e-07, "epoch": 3.39896904697921, "percentage": 67.98, "elapsed_time": "2:59:57", "remaining_time": "1:24:45", "throughput": 8683.32, "total_tokens": 93758288} +{"current_steps": 139135, "total_steps": 204665, "loss": 0.0, "lr": 5.622172553732946e-07, "epoch": 3.399091197811057, "percentage": 67.98, "elapsed_time": "2:59:57", "remaining_time": "1:24:45", "throughput": 8683.51, "total_tokens": 93763856} +{"current_steps": 139140, "total_steps": 204665, "loss": 0.0, "lr": 5.621405855136463e-07, "epoch": 3.3992133486429044, "percentage": 67.98, "elapsed_time": "2:59:58", "remaining_time": "1:24:45", "throughput": 8683.52, "total_tokens": 93766864} +{"current_steps": 139145, "total_steps": 204665, "loss": 0.0001, "lr": 5.620639188382287e-07, "epoch": 3.3993354994747516, "percentage": 67.99, "elapsed_time": "2:59:58", "remaining_time": "1:24:44", "throughput": 8683.54, "total_tokens": 93770064} +{"current_steps": 139150, "total_steps": 204665, "loss": 0.0, "lr": 5.619872553476007e-07, "epoch": 3.3994576503065987, "percentage": 67.99, "elapsed_time": "2:59:58", "remaining_time": "1:24:44", "throughput": 8683.58, "total_tokens": 93773584} +{"current_steps": 139155, "total_steps": 204665, "loss": 0.0001, "lr": 5.619105950423191e-07, "epoch": 3.399579801138446, "percentage": 67.99, "elapsed_time": "2:59:59", "remaining_time": "1:24:43", "throughput": 8683.63, "total_tokens": 93777104} +{"current_steps": 139160, "total_steps": 204665, "loss": 0.0, "lr": 5.618339379229411e-07, "epoch": 3.399701951970293, "percentage": 67.99, "elapsed_time": "2:59:59", "remaining_time": "1:24:43", "throughput": 8683.64, "total_tokens": 93780176} +{"current_steps": 139165, "total_steps": 204665, "loss": 0.0, "lr": 5.617572839900246e-07, "epoch": 3.39982410280214, "percentage": 68.0, "elapsed_time": "2:59:59", "remaining_time": "1:24:43", "throughput": 8683.69, "total_tokens": 93783888} +{"current_steps": 139170, "total_steps": 204665, "loss": 0.0, "lr": 5.616806332441274e-07, "epoch": 3.399946253633987, "percentage": 68.0, "elapsed_time": "3:00:00", "remaining_time": "1:24:42", "throughput": 8683.74, "total_tokens": 93787472} +{"current_steps": 139175, "total_steps": 204665, "loss": 0.0001, "lr": 5.616039856858062e-07, "epoch": 3.4000684044658342, "percentage": 68.0, "elapsed_time": "3:00:00", "remaining_time": "1:24:42", "throughput": 8683.78, "total_tokens": 93790864} +{"current_steps": 139180, "total_steps": 204665, "loss": 0.0, "lr": 5.61527341315619e-07, "epoch": 3.4001905552976814, "percentage": 68.0, "elapsed_time": "3:00:01", "remaining_time": "1:24:41", "throughput": 8683.8, "total_tokens": 93794064} +{"current_steps": 139185, "total_steps": 204665, "loss": 0.0, "lr": 5.614507001341224e-07, "epoch": 3.4003127061295286, "percentage": 68.01, "elapsed_time": "3:00:01", "remaining_time": "1:24:41", "throughput": 8683.82, "total_tokens": 93797264} +{"current_steps": 139190, "total_steps": 204665, "loss": 0.1035, "lr": 5.613740621418748e-07, "epoch": 3.400434856961376, "percentage": 68.01, "elapsed_time": "3:00:01", "remaining_time": "1:24:41", "throughput": 8683.85, "total_tokens": 93800592} +{"current_steps": 139195, "total_steps": 204665, "loss": 0.0, "lr": 5.612974273394327e-07, "epoch": 3.400557007793223, "percentage": 68.01, "elapsed_time": "3:00:02", "remaining_time": "1:24:40", "throughput": 8683.88, "total_tokens": 93803920} +{"current_steps": 139200, "total_steps": 204665, "loss": 0.0002, "lr": 5.612207957273535e-07, "epoch": 3.40067915862507, "percentage": 68.01, "elapsed_time": "3:00:02", "remaining_time": "1:24:40", "throughput": 8683.91, "total_tokens": 93807248} +{"current_steps": 139205, "total_steps": 204665, "loss": 0.0, "lr": 5.611441673061951e-07, "epoch": 3.4008013094569174, "percentage": 68.02, "elapsed_time": "3:00:02", "remaining_time": "1:24:39", "throughput": 8683.95, "total_tokens": 93810704} +{"current_steps": 139210, "total_steps": 204665, "loss": 0.0, "lr": 5.610675420765141e-07, "epoch": 3.4009234602887646, "percentage": 68.02, "elapsed_time": "3:00:03", "remaining_time": "1:24:39", "throughput": 8683.97, "total_tokens": 93813968} +{"current_steps": 139215, "total_steps": 204665, "loss": 0.0002, "lr": 5.609909200388683e-07, "epoch": 3.4010456111206118, "percentage": 68.02, "elapsed_time": "3:00:03", "remaining_time": "1:24:39", "throughput": 8683.99, "total_tokens": 93817104} +{"current_steps": 139220, "total_steps": 204665, "loss": 0.0, "lr": 5.609143011938143e-07, "epoch": 3.401167761952459, "percentage": 68.02, "elapsed_time": "3:00:03", "remaining_time": "1:24:38", "throughput": 8684.17, "total_tokens": 93822544} +{"current_steps": 139225, "total_steps": 204665, "loss": 0.0, "lr": 5.608376855419094e-07, "epoch": 3.401289912784306, "percentage": 68.03, "elapsed_time": "3:00:04", "remaining_time": "1:24:38", "throughput": 8684.2, "total_tokens": 93825872} +{"current_steps": 139230, "total_steps": 204665, "loss": 0.0, "lr": 5.607610730837116e-07, "epoch": 3.4014120636161533, "percentage": 68.03, "elapsed_time": "3:00:04", "remaining_time": "1:24:37", "throughput": 8684.2, "total_tokens": 93828752} +{"current_steps": 139235, "total_steps": 204665, "loss": 0.0003, "lr": 5.60684463819777e-07, "epoch": 3.4015342144480005, "percentage": 68.03, "elapsed_time": "3:00:04", "remaining_time": "1:24:37", "throughput": 8684.24, "total_tokens": 93832272} +{"current_steps": 139240, "total_steps": 204665, "loss": 0.0, "lr": 5.606078577506635e-07, "epoch": 3.4016563652798477, "percentage": 68.03, "elapsed_time": "3:00:05", "remaining_time": "1:24:37", "throughput": 8684.28, "total_tokens": 93835664} +{"current_steps": 139245, "total_steps": 204665, "loss": 0.0, "lr": 5.605312548769278e-07, "epoch": 3.401778516111695, "percentage": 68.04, "elapsed_time": "3:00:05", "remaining_time": "1:24:36", "throughput": 8684.33, "total_tokens": 93839312} +{"current_steps": 139250, "total_steps": 204665, "loss": 0.0, "lr": 5.604546551991266e-07, "epoch": 3.4019006669435417, "percentage": 68.04, "elapsed_time": "3:00:05", "remaining_time": "1:24:36", "throughput": 8684.35, "total_tokens": 93842512} +{"current_steps": 139255, "total_steps": 204665, "loss": 0.0399, "lr": 5.603780587178177e-07, "epoch": 3.4020228177753893, "percentage": 68.04, "elapsed_time": "3:00:06", "remaining_time": "1:24:35", "throughput": 8684.37, "total_tokens": 93845776} +{"current_steps": 139260, "total_steps": 204665, "loss": 0.0, "lr": 5.603014654335576e-07, "epoch": 3.402144968607236, "percentage": 68.04, "elapsed_time": "3:00:06", "remaining_time": "1:24:35", "throughput": 8684.38, "total_tokens": 93848784} +{"current_steps": 139265, "total_steps": 204665, "loss": 0.0, "lr": 5.602248753469039e-07, "epoch": 3.402267119439083, "percentage": 68.05, "elapsed_time": "3:00:06", "remaining_time": "1:24:35", "throughput": 8684.45, "total_tokens": 93852752} +{"current_steps": 139270, "total_steps": 204665, "loss": 0.0, "lr": 5.601482884584125e-07, "epoch": 3.4023892702709304, "percentage": 68.05, "elapsed_time": "3:00:07", "remaining_time": "1:24:34", "throughput": 8684.48, "total_tokens": 93856016} +{"current_steps": 139275, "total_steps": 204665, "loss": 0.0425, "lr": 5.600717047686417e-07, "epoch": 3.4025114211027776, "percentage": 68.05, "elapsed_time": "3:00:07", "remaining_time": "1:24:34", "throughput": 8684.53, "total_tokens": 93859728} +{"current_steps": 139280, "total_steps": 204665, "loss": 0.135, "lr": 5.599951242781473e-07, "epoch": 3.402633571934625, "percentage": 68.05, "elapsed_time": "3:00:08", "remaining_time": "1:24:33", "throughput": 8684.53, "total_tokens": 93862544} +{"current_steps": 139285, "total_steps": 204665, "loss": 0.0, "lr": 5.599185469874872e-07, "epoch": 3.402755722766472, "percentage": 68.06, "elapsed_time": "3:00:08", "remaining_time": "1:24:33", "throughput": 8684.56, "total_tokens": 93866000} +{"current_steps": 139290, "total_steps": 204665, "loss": 0.0763, "lr": 5.598419728972174e-07, "epoch": 3.402877873598319, "percentage": 68.06, "elapsed_time": "3:00:08", "remaining_time": "1:24:33", "throughput": 8684.6, "total_tokens": 93869456} +{"current_steps": 139295, "total_steps": 204665, "loss": 0.0026, "lr": 5.59765402007895e-07, "epoch": 3.4030000244301664, "percentage": 68.06, "elapsed_time": "3:00:09", "remaining_time": "1:24:32", "throughput": 8684.63, "total_tokens": 93872784} +{"current_steps": 139300, "total_steps": 204665, "loss": 0.0, "lr": 5.596888343200776e-07, "epoch": 3.4031221752620135, "percentage": 68.06, "elapsed_time": "3:00:09", "remaining_time": "1:24:32", "throughput": 8684.67, "total_tokens": 93876304} +{"current_steps": 139305, "total_steps": 204665, "loss": 0.0, "lr": 5.596122698343208e-07, "epoch": 3.4032443260938607, "percentage": 68.06, "elapsed_time": "3:00:09", "remaining_time": "1:24:31", "throughput": 8684.71, "total_tokens": 93879760} +{"current_steps": 139310, "total_steps": 204665, "loss": 0.0004, "lr": 5.595357085511827e-07, "epoch": 3.403366476925708, "percentage": 68.07, "elapsed_time": "3:00:10", "remaining_time": "1:24:31", "throughput": 8684.74, "total_tokens": 93883024} +{"current_steps": 139315, "total_steps": 204665, "loss": 0.0003, "lr": 5.594591504712189e-07, "epoch": 3.403488627757555, "percentage": 68.07, "elapsed_time": "3:00:10", "remaining_time": "1:24:30", "throughput": 8684.76, "total_tokens": 93886224} +{"current_steps": 139320, "total_steps": 204665, "loss": 0.0279, "lr": 5.593825955949865e-07, "epoch": 3.4036107785894023, "percentage": 68.07, "elapsed_time": "3:00:10", "remaining_time": "1:24:30", "throughput": 8684.79, "total_tokens": 93889552} +{"current_steps": 139325, "total_steps": 204665, "loss": 0.0, "lr": 5.593060439230429e-07, "epoch": 3.4037329294212495, "percentage": 68.07, "elapsed_time": "3:00:11", "remaining_time": "1:24:30", "throughput": 8684.83, "total_tokens": 93893072} +{"current_steps": 139330, "total_steps": 204665, "loss": 0.0, "lr": 5.592294954559439e-07, "epoch": 3.4038550802530967, "percentage": 68.08, "elapsed_time": "3:00:11", "remaining_time": "1:24:29", "throughput": 8684.87, "total_tokens": 93896656} +{"current_steps": 139335, "total_steps": 204665, "loss": 0.0, "lr": 5.591529501942469e-07, "epoch": 3.403977231084944, "percentage": 68.08, "elapsed_time": "3:00:11", "remaining_time": "1:24:29", "throughput": 8684.93, "total_tokens": 93900304} +{"current_steps": 139340, "total_steps": 204665, "loss": 0.0, "lr": 5.590764081385078e-07, "epoch": 3.404099381916791, "percentage": 68.08, "elapsed_time": "3:00:12", "remaining_time": "1:24:28", "throughput": 8684.97, "total_tokens": 93903760} +{"current_steps": 139345, "total_steps": 204665, "loss": 0.0626, "lr": 5.589998692892841e-07, "epoch": 3.404221532748638, "percentage": 68.08, "elapsed_time": "3:00:12", "remaining_time": "1:24:28", "throughput": 8685.0, "total_tokens": 93907216} +{"current_steps": 139350, "total_steps": 204665, "loss": 0.0524, "lr": 5.589233336471316e-07, "epoch": 3.404343683580485, "percentage": 68.09, "elapsed_time": "3:00:12", "remaining_time": "1:24:28", "throughput": 8685.03, "total_tokens": 93910544} +{"current_steps": 139355, "total_steps": 204665, "loss": 0.0, "lr": 5.588468012126076e-07, "epoch": 3.404465834412332, "percentage": 68.09, "elapsed_time": "3:00:13", "remaining_time": "1:24:27", "throughput": 8685.07, "total_tokens": 93914000} +{"current_steps": 139360, "total_steps": 204665, "loss": 0.0002, "lr": 5.587702719862683e-07, "epoch": 3.4045879852441794, "percentage": 68.09, "elapsed_time": "3:00:13", "remaining_time": "1:24:27", "throughput": 8685.12, "total_tokens": 93917584} +{"current_steps": 139365, "total_steps": 204665, "loss": 0.0134, "lr": 5.586937459686701e-07, "epoch": 3.4047101360760266, "percentage": 68.09, "elapsed_time": "3:00:13", "remaining_time": "1:24:26", "throughput": 8685.14, "total_tokens": 93920720} +{"current_steps": 139370, "total_steps": 204665, "loss": 0.0004, "lr": 5.586172231603697e-07, "epoch": 3.4048322869078738, "percentage": 68.1, "elapsed_time": "3:00:14", "remaining_time": "1:24:26", "throughput": 8685.15, "total_tokens": 93923856} +{"current_steps": 139375, "total_steps": 204665, "loss": 0.0001, "lr": 5.585407035619234e-07, "epoch": 3.404954437739721, "percentage": 68.1, "elapsed_time": "3:00:14", "remaining_time": "1:24:26", "throughput": 8685.17, "total_tokens": 93927056} +{"current_steps": 139380, "total_steps": 204665, "loss": 0.0, "lr": 5.584641871738882e-07, "epoch": 3.405076588571568, "percentage": 68.1, "elapsed_time": "3:00:14", "remaining_time": "1:24:25", "throughput": 8685.22, "total_tokens": 93930640} +{"current_steps": 139385, "total_steps": 204665, "loss": 0.0, "lr": 5.583876739968197e-07, "epoch": 3.4051987394034153, "percentage": 68.1, "elapsed_time": "3:00:15", "remaining_time": "1:24:25", "throughput": 8685.26, "total_tokens": 93934096} +{"current_steps": 139390, "total_steps": 204665, "loss": 0.0005, "lr": 5.58311164031275e-07, "epoch": 3.4053208902352625, "percentage": 68.11, "elapsed_time": "3:00:15", "remaining_time": "1:24:24", "throughput": 8685.27, "total_tokens": 93937168} +{"current_steps": 139395, "total_steps": 204665, "loss": 0.0956, "lr": 5.582346572778104e-07, "epoch": 3.4054430410671097, "percentage": 68.11, "elapsed_time": "3:00:16", "remaining_time": "1:24:24", "throughput": 8685.3, "total_tokens": 93940496} +{"current_steps": 139400, "total_steps": 204665, "loss": 0.0378, "lr": 5.581581537369821e-07, "epoch": 3.405565191898957, "percentage": 68.11, "elapsed_time": "3:00:16", "remaining_time": "1:24:24", "throughput": 8685.32, "total_tokens": 93943696} +{"current_steps": 139405, "total_steps": 204665, "loss": 0.0002, "lr": 5.580816534093468e-07, "epoch": 3.405687342730804, "percentage": 68.11, "elapsed_time": "3:00:16", "remaining_time": "1:24:23", "throughput": 8685.35, "total_tokens": 93947088} +{"current_steps": 139410, "total_steps": 204665, "loss": 0.0, "lr": 5.580051562954602e-07, "epoch": 3.4058094935626513, "percentage": 68.12, "elapsed_time": "3:00:17", "remaining_time": "1:24:23", "throughput": 8685.37, "total_tokens": 93950224} +{"current_steps": 139415, "total_steps": 204665, "loss": 0.0001, "lr": 5.57928662395879e-07, "epoch": 3.4059316443944985, "percentage": 68.12, "elapsed_time": "3:00:17", "remaining_time": "1:24:22", "throughput": 8685.43, "total_tokens": 93954000} +{"current_steps": 139420, "total_steps": 204665, "loss": 0.0, "lr": 5.5785217171116e-07, "epoch": 3.4060537952263457, "percentage": 68.12, "elapsed_time": "3:00:17", "remaining_time": "1:24:22", "throughput": 8685.48, "total_tokens": 93957712} +{"current_steps": 139425, "total_steps": 204665, "loss": 0.0, "lr": 5.577756842418584e-07, "epoch": 3.406175946058193, "percentage": 68.12, "elapsed_time": "3:00:18", "remaining_time": "1:24:22", "throughput": 8685.54, "total_tokens": 93961360} +{"current_steps": 139430, "total_steps": 204665, "loss": 0.0, "lr": 5.576991999885313e-07, "epoch": 3.4062980968900396, "percentage": 68.13, "elapsed_time": "3:00:18", "remaining_time": "1:24:21", "throughput": 8685.55, "total_tokens": 93964496} +{"current_steps": 139435, "total_steps": 204665, "loss": 0.0, "lr": 5.576227189517343e-07, "epoch": 3.4064202477218872, "percentage": 68.13, "elapsed_time": "3:00:18", "remaining_time": "1:24:21", "throughput": 8685.6, "total_tokens": 93968016} +{"current_steps": 139440, "total_steps": 204665, "loss": 0.0, "lr": 5.575462411320241e-07, "epoch": 3.406542398553734, "percentage": 68.13, "elapsed_time": "3:00:19", "remaining_time": "1:24:20", "throughput": 8685.65, "total_tokens": 93971664} +{"current_steps": 139445, "total_steps": 204665, "loss": 0.0, "lr": 5.574697665299565e-07, "epoch": 3.406664549385581, "percentage": 68.13, "elapsed_time": "3:00:19", "remaining_time": "1:24:20", "throughput": 8685.68, "total_tokens": 93974992} +{"current_steps": 139450, "total_steps": 204665, "loss": 0.0514, "lr": 5.573932951460881e-07, "epoch": 3.4067867002174284, "percentage": 68.14, "elapsed_time": "3:00:19", "remaining_time": "1:24:20", "throughput": 8685.69, "total_tokens": 93978064} +{"current_steps": 139455, "total_steps": 204665, "loss": 0.0, "lr": 5.573168269809743e-07, "epoch": 3.4069088510492755, "percentage": 68.14, "elapsed_time": "3:00:20", "remaining_time": "1:24:19", "throughput": 8685.73, "total_tokens": 93981520} +{"current_steps": 139460, "total_steps": 204665, "loss": 0.0, "lr": 5.572403620351718e-07, "epoch": 3.4070310018811227, "percentage": 68.14, "elapsed_time": "3:00:20", "remaining_time": "1:24:19", "throughput": 8685.76, "total_tokens": 93984848} +{"current_steps": 139465, "total_steps": 204665, "loss": 0.0, "lr": 5.571639003092368e-07, "epoch": 3.40715315271297, "percentage": 68.14, "elapsed_time": "3:00:20", "remaining_time": "1:24:18", "throughput": 8685.81, "total_tokens": 93988496} +{"current_steps": 139470, "total_steps": 204665, "loss": 0.0, "lr": 5.570874418037244e-07, "epoch": 3.407275303544817, "percentage": 68.15, "elapsed_time": "3:00:21", "remaining_time": "1:24:18", "throughput": 8685.81, "total_tokens": 93991440} +{"current_steps": 139475, "total_steps": 204665, "loss": 0.0, "lr": 5.570109865191912e-07, "epoch": 3.4073974543766643, "percentage": 68.15, "elapsed_time": "3:00:21", "remaining_time": "1:24:17", "throughput": 8685.85, "total_tokens": 93994896} +{"current_steps": 139480, "total_steps": 204665, "loss": 0.0, "lr": 5.569345344561938e-07, "epoch": 3.4075196052085115, "percentage": 68.15, "elapsed_time": "3:00:21", "remaining_time": "1:24:17", "throughput": 8685.86, "total_tokens": 93998032} +{"current_steps": 139485, "total_steps": 204665, "loss": 0.0, "lr": 5.568580856152873e-07, "epoch": 3.4076417560403587, "percentage": 68.15, "elapsed_time": "3:00:22", "remaining_time": "1:24:17", "throughput": 8685.85, "total_tokens": 94000784} +{"current_steps": 139490, "total_steps": 204665, "loss": 0.0002, "lr": 5.567816399970282e-07, "epoch": 3.407763906872206, "percentage": 68.16, "elapsed_time": "3:00:22", "remaining_time": "1:24:16", "throughput": 8685.88, "total_tokens": 94004048} +{"current_steps": 139495, "total_steps": 204665, "loss": 0.0001, "lr": 5.567051976019719e-07, "epoch": 3.407886057704053, "percentage": 68.16, "elapsed_time": "3:00:22", "remaining_time": "1:24:16", "throughput": 8685.87, "total_tokens": 94006928} +{"current_steps": 139500, "total_steps": 204665, "loss": 0.0569, "lr": 5.566287584306751e-07, "epoch": 3.4080082085359003, "percentage": 68.16, "elapsed_time": "3:00:23", "remaining_time": "1:24:15", "throughput": 8685.89, "total_tokens": 94010128} +{"current_steps": 139505, "total_steps": 204665, "loss": 0.0003, "lr": 5.565523224836928e-07, "epoch": 3.4081303593677474, "percentage": 68.16, "elapsed_time": "3:00:23", "remaining_time": "1:24:15", "throughput": 8685.9, "total_tokens": 94013200} +{"current_steps": 139510, "total_steps": 204665, "loss": 0.0003, "lr": 5.564758897615813e-07, "epoch": 3.4082525101995946, "percentage": 68.17, "elapsed_time": "3:00:24", "remaining_time": "1:24:15", "throughput": 8685.93, "total_tokens": 94016528} +{"current_steps": 139515, "total_steps": 204665, "loss": 0.0001, "lr": 5.563994602648967e-07, "epoch": 3.4083746610314414, "percentage": 68.17, "elapsed_time": "3:00:24", "remaining_time": "1:24:14", "throughput": 8685.95, "total_tokens": 94019728} +{"current_steps": 139520, "total_steps": 204665, "loss": 0.0, "lr": 5.563230339941942e-07, "epoch": 3.408496811863289, "percentage": 68.17, "elapsed_time": "3:00:24", "remaining_time": "1:24:14", "throughput": 8685.98, "total_tokens": 94023184} +{"current_steps": 139525, "total_steps": 204665, "loss": 0.0, "lr": 5.562466109500304e-07, "epoch": 3.4086189626951358, "percentage": 68.17, "elapsed_time": "3:00:25", "remaining_time": "1:24:13", "throughput": 8686.0, "total_tokens": 94026320} +{"current_steps": 139530, "total_steps": 204665, "loss": 0.0, "lr": 5.561701911329602e-07, "epoch": 3.408741113526983, "percentage": 68.17, "elapsed_time": "3:00:25", "remaining_time": "1:24:13", "throughput": 8686.01, "total_tokens": 94029456} +{"current_steps": 139535, "total_steps": 204665, "loss": 0.05, "lr": 5.560937745435401e-07, "epoch": 3.40886326435883, "percentage": 68.18, "elapsed_time": "3:00:25", "remaining_time": "1:24:13", "throughput": 8686.07, "total_tokens": 94033168} +{"current_steps": 139540, "total_steps": 204665, "loss": 0.0001, "lr": 5.560173611823251e-07, "epoch": 3.4089854151906773, "percentage": 68.18, "elapsed_time": "3:00:26", "remaining_time": "1:24:12", "throughput": 8686.11, "total_tokens": 94036624} +{"current_steps": 139545, "total_steps": 204665, "loss": 0.0001, "lr": 5.559409510498711e-07, "epoch": 3.4091075660225245, "percentage": 68.18, "elapsed_time": "3:00:26", "remaining_time": "1:24:12", "throughput": 8686.22, "total_tokens": 94041040} +{"current_steps": 139550, "total_steps": 204665, "loss": 0.0, "lr": 5.558645441467346e-07, "epoch": 3.4092297168543717, "percentage": 68.18, "elapsed_time": "3:00:26", "remaining_time": "1:24:11", "throughput": 8686.24, "total_tokens": 94044240} +{"current_steps": 139555, "total_steps": 204665, "loss": 0.0, "lr": 5.557881404734705e-07, "epoch": 3.409351867686219, "percentage": 68.19, "elapsed_time": "3:00:27", "remaining_time": "1:24:11", "throughput": 8686.24, "total_tokens": 94047184} +{"current_steps": 139560, "total_steps": 204665, "loss": 0.0, "lr": 5.557117400306341e-07, "epoch": 3.409474018518066, "percentage": 68.19, "elapsed_time": "3:00:27", "remaining_time": "1:24:11", "throughput": 8686.26, "total_tokens": 94050448} +{"current_steps": 139565, "total_steps": 204665, "loss": 0.0, "lr": 5.556353428187818e-07, "epoch": 3.4095961693499133, "percentage": 68.19, "elapsed_time": "3:00:27", "remaining_time": "1:24:10", "throughput": 8686.32, "total_tokens": 94054160} +{"current_steps": 139570, "total_steps": 204665, "loss": 0.0, "lr": 5.555589488384685e-07, "epoch": 3.4097183201817605, "percentage": 68.19, "elapsed_time": "3:00:28", "remaining_time": "1:24:10", "throughput": 8686.36, "total_tokens": 94057616} +{"current_steps": 139575, "total_steps": 204665, "loss": 0.0, "lr": 5.554825580902503e-07, "epoch": 3.4098404710136077, "percentage": 68.2, "elapsed_time": "3:00:28", "remaining_time": "1:24:09", "throughput": 8686.37, "total_tokens": 94060752} +{"current_steps": 139580, "total_steps": 204665, "loss": 0.0, "lr": 5.554061705746822e-07, "epoch": 3.409962621845455, "percentage": 68.2, "elapsed_time": "3:00:28", "remaining_time": "1:24:09", "throughput": 8686.44, "total_tokens": 94064656} +{"current_steps": 139585, "total_steps": 204665, "loss": 0.0, "lr": 5.553297862923203e-07, "epoch": 3.410084772677302, "percentage": 68.2, "elapsed_time": "3:00:29", "remaining_time": "1:24:09", "throughput": 8686.5, "total_tokens": 94068368} +{"current_steps": 139590, "total_steps": 204665, "loss": 0.0, "lr": 5.552534052437195e-07, "epoch": 3.4102069235091492, "percentage": 68.2, "elapsed_time": "3:00:29", "remaining_time": "1:24:08", "throughput": 8686.56, "total_tokens": 94072144} +{"current_steps": 139595, "total_steps": 204665, "loss": 0.0, "lr": 5.551770274294361e-07, "epoch": 3.4103290743409964, "percentage": 68.21, "elapsed_time": "3:00:29", "remaining_time": "1:24:08", "throughput": 8686.59, "total_tokens": 94075472} +{"current_steps": 139600, "total_steps": 204665, "loss": 0.0804, "lr": 5.551006528500244e-07, "epoch": 3.4104512251728436, "percentage": 68.21, "elapsed_time": "3:00:30", "remaining_time": "1:24:07", "throughput": 8686.62, "total_tokens": 94078800} +{"current_steps": 139605, "total_steps": 204665, "loss": 0.0001, "lr": 5.550242815060404e-07, "epoch": 3.410573376004691, "percentage": 68.21, "elapsed_time": "3:00:30", "remaining_time": "1:24:07", "throughput": 8686.63, "total_tokens": 94081936} +{"current_steps": 139610, "total_steps": 204665, "loss": 0.0, "lr": 5.5494791339804e-07, "epoch": 3.4106955268365375, "percentage": 68.21, "elapsed_time": "3:00:31", "remaining_time": "1:24:06", "throughput": 8686.69, "total_tokens": 94085584} +{"current_steps": 139615, "total_steps": 204665, "loss": 0.0001, "lr": 5.548715485265776e-07, "epoch": 3.4108176776683847, "percentage": 68.22, "elapsed_time": "3:00:31", "remaining_time": "1:24:06", "throughput": 8686.73, "total_tokens": 94089040} +{"current_steps": 139620, "total_steps": 204665, "loss": 0.0, "lr": 5.547951868922096e-07, "epoch": 3.410939828500232, "percentage": 68.22, "elapsed_time": "3:00:31", "remaining_time": "1:24:06", "throughput": 8686.73, "total_tokens": 94092048} +{"current_steps": 139625, "total_steps": 204665, "loss": 0.0, "lr": 5.547188284954902e-07, "epoch": 3.411061979332079, "percentage": 68.22, "elapsed_time": "3:00:32", "remaining_time": "1:24:05", "throughput": 8686.74, "total_tokens": 94095120} +{"current_steps": 139630, "total_steps": 204665, "loss": 0.0, "lr": 5.546424733369752e-07, "epoch": 3.4111841301639263, "percentage": 68.22, "elapsed_time": "3:00:32", "remaining_time": "1:24:05", "throughput": 8686.8, "total_tokens": 94098768} +{"current_steps": 139635, "total_steps": 204665, "loss": 0.0, "lr": 5.545661214172204e-07, "epoch": 3.4113062809957735, "percentage": 68.23, "elapsed_time": "3:00:32", "remaining_time": "1:24:04", "throughput": 8686.78, "total_tokens": 94101520} +{"current_steps": 139640, "total_steps": 204665, "loss": 0.0694, "lr": 5.544897727367802e-07, "epoch": 3.4114284318276207, "percentage": 68.23, "elapsed_time": "3:00:33", "remaining_time": "1:24:04", "throughput": 8686.79, "total_tokens": 94104528} +{"current_steps": 139645, "total_steps": 204665, "loss": 0.0, "lr": 5.544134272962105e-07, "epoch": 3.411550582659468, "percentage": 68.23, "elapsed_time": "3:00:33", "remaining_time": "1:24:04", "throughput": 8686.82, "total_tokens": 94107856} +{"current_steps": 139650, "total_steps": 204665, "loss": 0.0001, "lr": 5.543370850960659e-07, "epoch": 3.411672733491315, "percentage": 68.23, "elapsed_time": "3:00:33", "remaining_time": "1:24:03", "throughput": 8686.83, "total_tokens": 94110928} +{"current_steps": 139655, "total_steps": 204665, "loss": 0.0, "lr": 5.542607461369022e-07, "epoch": 3.4117948843231622, "percentage": 68.24, "elapsed_time": "3:00:34", "remaining_time": "1:24:03", "throughput": 8686.86, "total_tokens": 94114256} +{"current_steps": 139660, "total_steps": 204665, "loss": 0.0, "lr": 5.54184410419274e-07, "epoch": 3.4119170351550094, "percentage": 68.24, "elapsed_time": "3:00:34", "remaining_time": "1:24:02", "throughput": 8686.91, "total_tokens": 94117776} +{"current_steps": 139665, "total_steps": 204665, "loss": 0.0004, "lr": 5.54108077943737e-07, "epoch": 3.4120391859868566, "percentage": 68.24, "elapsed_time": "3:00:34", "remaining_time": "1:24:02", "throughput": 8686.95, "total_tokens": 94121232} +{"current_steps": 139670, "total_steps": 204665, "loss": 0.0, "lr": 5.540317487108459e-07, "epoch": 3.412161336818704, "percentage": 68.24, "elapsed_time": "3:00:35", "remaining_time": "1:24:02", "throughput": 8686.97, "total_tokens": 94124560} +{"current_steps": 139675, "total_steps": 204665, "loss": 0.0, "lr": 5.539554227211556e-07, "epoch": 3.412283487650551, "percentage": 68.25, "elapsed_time": "3:00:35", "remaining_time": "1:24:01", "throughput": 8687.01, "total_tokens": 94127952} +{"current_steps": 139680, "total_steps": 204665, "loss": 0.0, "lr": 5.538790999752217e-07, "epoch": 3.412405638482398, "percentage": 68.25, "elapsed_time": "3:00:35", "remaining_time": "1:24:01", "throughput": 8687.01, "total_tokens": 94130960} +{"current_steps": 139685, "total_steps": 204665, "loss": 0.0, "lr": 5.538027804735987e-07, "epoch": 3.4125277893142454, "percentage": 68.25, "elapsed_time": "3:00:36", "remaining_time": "1:24:00", "throughput": 8687.04, "total_tokens": 94134288} +{"current_steps": 139690, "total_steps": 204665, "loss": 0.0, "lr": 5.537264642168422e-07, "epoch": 3.4126499401460926, "percentage": 68.25, "elapsed_time": "3:00:36", "remaining_time": "1:24:00", "throughput": 8687.06, "total_tokens": 94137424} +{"current_steps": 139695, "total_steps": 204665, "loss": 0.0, "lr": 5.536501512055065e-07, "epoch": 3.4127720909779393, "percentage": 68.26, "elapsed_time": "3:00:36", "remaining_time": "1:24:00", "throughput": 8687.1, "total_tokens": 94140944} +{"current_steps": 139700, "total_steps": 204665, "loss": 0.065, "lr": 5.535738414401471e-07, "epoch": 3.412894241809787, "percentage": 68.26, "elapsed_time": "3:00:37", "remaining_time": "1:23:59", "throughput": 8687.1, "total_tokens": 94143824} +{"current_steps": 139705, "total_steps": 204665, "loss": 0.0376, "lr": 5.534975349213191e-07, "epoch": 3.4130163926416337, "percentage": 68.26, "elapsed_time": "3:00:37", "remaining_time": "1:23:59", "throughput": 8687.12, "total_tokens": 94147024} +{"current_steps": 139710, "total_steps": 204665, "loss": 0.0, "lr": 5.534212316495767e-07, "epoch": 3.413138543473481, "percentage": 68.26, "elapsed_time": "3:00:37", "remaining_time": "1:23:58", "throughput": 8687.18, "total_tokens": 94150864} +{"current_steps": 139715, "total_steps": 204665, "loss": 0.0, "lr": 5.533449316254756e-07, "epoch": 3.413260694305328, "percentage": 68.27, "elapsed_time": "3:00:38", "remaining_time": "1:23:58", "throughput": 8687.22, "total_tokens": 94154256} +{"current_steps": 139720, "total_steps": 204665, "loss": 0.0, "lr": 5.532686348495699e-07, "epoch": 3.4133828451371753, "percentage": 68.27, "elapsed_time": "3:00:38", "remaining_time": "1:23:58", "throughput": 8687.27, "total_tokens": 94157904} +{"current_steps": 139725, "total_steps": 204665, "loss": 0.0, "lr": 5.531923413224148e-07, "epoch": 3.4135049959690225, "percentage": 68.27, "elapsed_time": "3:00:38", "remaining_time": "1:23:57", "throughput": 8687.3, "total_tokens": 94161168} +{"current_steps": 139730, "total_steps": 204665, "loss": 0.0, "lr": 5.531160510445657e-07, "epoch": 3.4136271468008696, "percentage": 68.27, "elapsed_time": "3:00:39", "remaining_time": "1:23:57", "throughput": 8687.32, "total_tokens": 94164432} +{"current_steps": 139735, "total_steps": 204665, "loss": 0.0004, "lr": 5.530397640165765e-07, "epoch": 3.413749297632717, "percentage": 68.27, "elapsed_time": "3:00:39", "remaining_time": "1:23:56", "throughput": 8687.41, "total_tokens": 94168528} +{"current_steps": 139740, "total_steps": 204665, "loss": 0.0256, "lr": 5.529634802390026e-07, "epoch": 3.413871448464564, "percentage": 68.28, "elapsed_time": "3:00:39", "remaining_time": "1:23:56", "throughput": 8687.41, "total_tokens": 94171536} +{"current_steps": 139745, "total_steps": 204665, "loss": 0.0, "lr": 5.528871997123981e-07, "epoch": 3.413993599296411, "percentage": 68.28, "elapsed_time": "3:00:40", "remaining_time": "1:23:55", "throughput": 8687.42, "total_tokens": 94174608} +{"current_steps": 139750, "total_steps": 204665, "loss": 0.0501, "lr": 5.528109224373186e-07, "epoch": 3.4141157501282584, "percentage": 68.28, "elapsed_time": "3:00:40", "remaining_time": "1:23:55", "throughput": 8687.44, "total_tokens": 94177744} +{"current_steps": 139755, "total_steps": 204665, "loss": 0.0, "lr": 5.52734648414318e-07, "epoch": 3.4142379009601056, "percentage": 68.28, "elapsed_time": "3:00:41", "remaining_time": "1:23:55", "throughput": 8687.47, "total_tokens": 94181072} +{"current_steps": 139760, "total_steps": 204665, "loss": 0.0, "lr": 5.526583776439517e-07, "epoch": 3.414360051791953, "percentage": 68.29, "elapsed_time": "3:00:41", "remaining_time": "1:23:54", "throughput": 8687.51, "total_tokens": 94184656} +{"current_steps": 139765, "total_steps": 204665, "loss": 0.0, "lr": 5.525821101267735e-07, "epoch": 3.4144822026238, "percentage": 68.29, "elapsed_time": "3:00:41", "remaining_time": "1:23:54", "throughput": 8687.58, "total_tokens": 94188496} +{"current_steps": 139770, "total_steps": 204665, "loss": 0.0, "lr": 5.525058458633391e-07, "epoch": 3.414604353455647, "percentage": 68.29, "elapsed_time": "3:00:42", "remaining_time": "1:23:53", "throughput": 8687.6, "total_tokens": 94191696} +{"current_steps": 139775, "total_steps": 204665, "loss": 0.0001, "lr": 5.524295848542025e-07, "epoch": 3.4147265042874944, "percentage": 68.29, "elapsed_time": "3:00:42", "remaining_time": "1:23:53", "throughput": 8687.65, "total_tokens": 94195344} +{"current_steps": 139780, "total_steps": 204665, "loss": 0.0, "lr": 5.523533270999179e-07, "epoch": 3.4148486551193415, "percentage": 68.3, "elapsed_time": "3:00:42", "remaining_time": "1:23:53", "throughput": 8687.69, "total_tokens": 94198800} +{"current_steps": 139785, "total_steps": 204665, "loss": 0.0, "lr": 5.522770726010404e-07, "epoch": 3.4149708059511887, "percentage": 68.3, "elapsed_time": "3:00:43", "remaining_time": "1:23:52", "throughput": 8687.75, "total_tokens": 94202576} +{"current_steps": 139790, "total_steps": 204665, "loss": 0.0, "lr": 5.522008213581249e-07, "epoch": 3.4150929567830355, "percentage": 68.3, "elapsed_time": "3:00:43", "remaining_time": "1:23:52", "throughput": 8687.78, "total_tokens": 94205968} +{"current_steps": 139795, "total_steps": 204665, "loss": 0.0716, "lr": 5.521245733717248e-07, "epoch": 3.4152151076148827, "percentage": 68.3, "elapsed_time": "3:00:43", "remaining_time": "1:23:51", "throughput": 8687.8, "total_tokens": 94209168} +{"current_steps": 139800, "total_steps": 204665, "loss": 0.0, "lr": 5.520483286423958e-07, "epoch": 3.41533725844673, "percentage": 68.31, "elapsed_time": "3:00:44", "remaining_time": "1:23:51", "throughput": 8687.85, "total_tokens": 94212688} +{"current_steps": 139805, "total_steps": 204665, "loss": 0.0002, "lr": 5.519720871706916e-07, "epoch": 3.415459409278577, "percentage": 68.31, "elapsed_time": "3:00:44", "remaining_time": "1:23:51", "throughput": 8687.85, "total_tokens": 94215696} +{"current_steps": 139810, "total_steps": 204665, "loss": 0.0, "lr": 5.51895848957167e-07, "epoch": 3.4155815601104242, "percentage": 68.31, "elapsed_time": "3:00:44", "remaining_time": "1:23:50", "throughput": 8687.9, "total_tokens": 94219280} +{"current_steps": 139815, "total_steps": 204665, "loss": 0.0002, "lr": 5.518196140023761e-07, "epoch": 3.4157037109422714, "percentage": 68.31, "elapsed_time": "3:00:45", "remaining_time": "1:23:50", "throughput": 8687.95, "total_tokens": 94222928} +{"current_steps": 139820, "total_steps": 204665, "loss": 0.0, "lr": 5.517433823068736e-07, "epoch": 3.4158258617741186, "percentage": 68.32, "elapsed_time": "3:00:45", "remaining_time": "1:23:49", "throughput": 8687.97, "total_tokens": 94226128} +{"current_steps": 139825, "total_steps": 204665, "loss": 0.0001, "lr": 5.51667153871214e-07, "epoch": 3.415948012605966, "percentage": 68.32, "elapsed_time": "3:00:45", "remaining_time": "1:23:49", "throughput": 8687.99, "total_tokens": 94229200} +{"current_steps": 139830, "total_steps": 204665, "loss": 0.0, "lr": 5.51590928695951e-07, "epoch": 3.416070163437813, "percentage": 68.32, "elapsed_time": "3:00:46", "remaining_time": "1:23:49", "throughput": 8688.01, "total_tokens": 94232400} +{"current_steps": 139835, "total_steps": 204665, "loss": 0.0002, "lr": 5.515147067816399e-07, "epoch": 3.41619231426966, "percentage": 68.32, "elapsed_time": "3:00:46", "remaining_time": "1:23:48", "throughput": 8688.05, "total_tokens": 94235920} +{"current_steps": 139840, "total_steps": 204665, "loss": 0.0, "lr": 5.514384881288341e-07, "epoch": 3.4163144651015074, "percentage": 68.33, "elapsed_time": "3:00:46", "remaining_time": "1:23:48", "throughput": 8688.1, "total_tokens": 94239568} +{"current_steps": 139845, "total_steps": 204665, "loss": 0.0, "lr": 5.513622727380887e-07, "epoch": 3.4164366159333546, "percentage": 68.33, "elapsed_time": "3:00:47", "remaining_time": "1:23:47", "throughput": 8688.14, "total_tokens": 94242960} +{"current_steps": 139850, "total_steps": 204665, "loss": 0.0, "lr": 5.51286060609957e-07, "epoch": 3.4165587667652018, "percentage": 68.33, "elapsed_time": "3:00:47", "remaining_time": "1:23:47", "throughput": 8688.13, "total_tokens": 94245776} +{"current_steps": 139855, "total_steps": 204665, "loss": 0.0, "lr": 5.512098517449938e-07, "epoch": 3.416680917597049, "percentage": 68.33, "elapsed_time": "3:00:47", "remaining_time": "1:23:47", "throughput": 8688.16, "total_tokens": 94249168} +{"current_steps": 139860, "total_steps": 204665, "loss": 0.0003, "lr": 5.511336461437536e-07, "epoch": 3.416803068428896, "percentage": 68.34, "elapsed_time": "3:00:48", "remaining_time": "1:23:46", "throughput": 8688.19, "total_tokens": 94252496} +{"current_steps": 139865, "total_steps": 204665, "loss": 0.0, "lr": 5.510574438067904e-07, "epoch": 3.4169252192607433, "percentage": 68.34, "elapsed_time": "3:00:48", "remaining_time": "1:23:46", "throughput": 8688.24, "total_tokens": 94256080} +{"current_steps": 139870, "total_steps": 204665, "loss": 0.0001, "lr": 5.509812447346578e-07, "epoch": 3.4170473700925905, "percentage": 68.34, "elapsed_time": "3:00:49", "remaining_time": "1:23:45", "throughput": 8688.3, "total_tokens": 94259792} +{"current_steps": 139875, "total_steps": 204665, "loss": 0.0, "lr": 5.509050489279107e-07, "epoch": 3.4171695209244373, "percentage": 68.34, "elapsed_time": "3:00:49", "remaining_time": "1:23:45", "throughput": 8688.3, "total_tokens": 94262800} +{"current_steps": 139880, "total_steps": 204665, "loss": 0.0, "lr": 5.508288563871024e-07, "epoch": 3.417291671756285, "percentage": 68.35, "elapsed_time": "3:00:49", "remaining_time": "1:23:45", "throughput": 8688.32, "total_tokens": 94266064} +{"current_steps": 139885, "total_steps": 204665, "loss": 0.0, "lr": 5.50752667112788e-07, "epoch": 3.4174138225881316, "percentage": 68.35, "elapsed_time": "3:00:50", "remaining_time": "1:23:44", "throughput": 8688.42, "total_tokens": 94270288} +{"current_steps": 139890, "total_steps": 204665, "loss": 0.0, "lr": 5.506764811055206e-07, "epoch": 3.417535973419979, "percentage": 68.35, "elapsed_time": "3:00:50", "remaining_time": "1:23:44", "throughput": 8688.45, "total_tokens": 94273680} +{"current_steps": 139895, "total_steps": 204665, "loss": 0.0, "lr": 5.506002983658551e-07, "epoch": 3.417658124251826, "percentage": 68.35, "elapsed_time": "3:00:50", "remaining_time": "1:23:43", "throughput": 8688.47, "total_tokens": 94276880} +{"current_steps": 139900, "total_steps": 204665, "loss": 0.0001, "lr": 5.505241188943446e-07, "epoch": 3.417780275083673, "percentage": 68.36, "elapsed_time": "3:00:51", "remaining_time": "1:23:43", "throughput": 8688.52, "total_tokens": 94280464} +{"current_steps": 139905, "total_steps": 204665, "loss": 0.0402, "lr": 5.504479426915441e-07, "epoch": 3.4179024259155204, "percentage": 68.36, "elapsed_time": "3:00:51", "remaining_time": "1:23:43", "throughput": 8688.53, "total_tokens": 94283600} +{"current_steps": 139910, "total_steps": 204665, "loss": 0.0, "lr": 5.503717697580067e-07, "epoch": 3.4180245767473676, "percentage": 68.36, "elapsed_time": "3:00:51", "remaining_time": "1:23:42", "throughput": 8688.59, "total_tokens": 94287248} +{"current_steps": 139915, "total_steps": 204665, "loss": 0.0, "lr": 5.502956000942867e-07, "epoch": 3.418146727579215, "percentage": 68.36, "elapsed_time": "3:00:52", "remaining_time": "1:23:42", "throughput": 8688.6, "total_tokens": 94290320} +{"current_steps": 139920, "total_steps": 204665, "loss": 0.1, "lr": 5.502194337009384e-07, "epoch": 3.418268878411062, "percentage": 68.37, "elapsed_time": "3:00:52", "remaining_time": "1:23:41", "throughput": 8688.6, "total_tokens": 94293328} +{"current_steps": 139925, "total_steps": 204665, "loss": 0.0, "lr": 5.501432705785149e-07, "epoch": 3.418391029242909, "percentage": 68.37, "elapsed_time": "3:00:52", "remaining_time": "1:23:41", "throughput": 8688.78, "total_tokens": 94298704} +{"current_steps": 139930, "total_steps": 204665, "loss": 0.0, "lr": 5.500671107275712e-07, "epoch": 3.4185131800747564, "percentage": 68.37, "elapsed_time": "3:00:53", "remaining_time": "1:23:40", "throughput": 8688.79, "total_tokens": 94301840} +{"current_steps": 139935, "total_steps": 204665, "loss": 0.0, "lr": 5.499909541486598e-07, "epoch": 3.4186353309066035, "percentage": 68.37, "elapsed_time": "3:00:53", "remaining_time": "1:23:40", "throughput": 8688.81, "total_tokens": 94304976} +{"current_steps": 139940, "total_steps": 204665, "loss": 0.0, "lr": 5.499148008423353e-07, "epoch": 3.4187574817384507, "percentage": 68.38, "elapsed_time": "3:00:53", "remaining_time": "1:23:40", "throughput": 8688.84, "total_tokens": 94308368} +{"current_steps": 139945, "total_steps": 204665, "loss": 0.0, "lr": 5.49838650809152e-07, "epoch": 3.418879632570298, "percentage": 68.38, "elapsed_time": "3:00:54", "remaining_time": "1:23:39", "throughput": 8688.87, "total_tokens": 94311760} +{"current_steps": 139950, "total_steps": 204665, "loss": 0.0, "lr": 5.497625040496625e-07, "epoch": 3.419001783402145, "percentage": 68.38, "elapsed_time": "3:00:54", "remaining_time": "1:23:39", "throughput": 8688.89, "total_tokens": 94314960} +{"current_steps": 139955, "total_steps": 204665, "loss": 0.0, "lr": 5.496863605644215e-07, "epoch": 3.4191239342339923, "percentage": 68.38, "elapsed_time": "3:00:55", "remaining_time": "1:23:38", "throughput": 8688.92, "total_tokens": 94318224} +{"current_steps": 139960, "total_steps": 204665, "loss": 0.0, "lr": 5.496102203539823e-07, "epoch": 3.4192460850658395, "percentage": 68.38, "elapsed_time": "3:00:55", "remaining_time": "1:23:38", "throughput": 8688.97, "total_tokens": 94321872} +{"current_steps": 139965, "total_steps": 204665, "loss": 0.0475, "lr": 5.495340834188988e-07, "epoch": 3.4193682358976867, "percentage": 68.39, "elapsed_time": "3:00:55", "remaining_time": "1:23:38", "throughput": 8689.0, "total_tokens": 94325200} +{"current_steps": 139970, "total_steps": 204665, "loss": 0.0, "lr": 5.494579497597245e-07, "epoch": 3.4194903867295334, "percentage": 68.39, "elapsed_time": "3:00:56", "remaining_time": "1:23:37", "throughput": 8689.03, "total_tokens": 94328528} +{"current_steps": 139975, "total_steps": 204665, "loss": 0.0, "lr": 5.493818193770134e-07, "epoch": 3.4196125375613806, "percentage": 68.39, "elapsed_time": "3:00:56", "remaining_time": "1:23:37", "throughput": 8689.05, "total_tokens": 94331792} +{"current_steps": 139980, "total_steps": 204665, "loss": 0.0365, "lr": 5.493056922713189e-07, "epoch": 3.419734688393228, "percentage": 68.39, "elapsed_time": "3:00:56", "remaining_time": "1:23:36", "throughput": 8689.13, "total_tokens": 94335760} +{"current_steps": 139985, "total_steps": 204665, "loss": 0.0001, "lr": 5.492295684431942e-07, "epoch": 3.419856839225075, "percentage": 68.4, "elapsed_time": "3:00:57", "remaining_time": "1:23:36", "throughput": 8689.16, "total_tokens": 94339088} +{"current_steps": 139990, "total_steps": 204665, "loss": 0.0001, "lr": 5.491534478931939e-07, "epoch": 3.419978990056922, "percentage": 68.4, "elapsed_time": "3:00:57", "remaining_time": "1:23:36", "throughput": 8689.18, "total_tokens": 94342352} +{"current_steps": 139995, "total_steps": 204665, "loss": 0.0001, "lr": 5.490773306218705e-07, "epoch": 3.4201011408887694, "percentage": 68.4, "elapsed_time": "3:00:57", "remaining_time": "1:23:35", "throughput": 8689.2, "total_tokens": 94345488} +{"current_steps": 140000, "total_steps": 204665, "loss": 0.0, "lr": 5.490012166297783e-07, "epoch": 3.4202232917206166, "percentage": 68.4, "elapsed_time": "3:00:58", "remaining_time": "1:23:35", "throughput": 8689.25, "total_tokens": 94349136} +{"current_steps": 140005, "total_steps": 204665, "loss": 0.0, "lr": 5.489251059174705e-07, "epoch": 3.4203454425524638, "percentage": 68.41, "elapsed_time": "3:00:58", "remaining_time": "1:23:34", "throughput": 8689.27, "total_tokens": 94352272} +{"current_steps": 140010, "total_steps": 204665, "loss": 0.0001, "lr": 5.488489984855002e-07, "epoch": 3.420467593384311, "percentage": 68.41, "elapsed_time": "3:00:58", "remaining_time": "1:23:34", "throughput": 8689.3, "total_tokens": 94355728} +{"current_steps": 140015, "total_steps": 204665, "loss": 0.0, "lr": 5.487728943344221e-07, "epoch": 3.420589744216158, "percentage": 68.41, "elapsed_time": "3:00:59", "remaining_time": "1:23:34", "throughput": 8689.35, "total_tokens": 94359248} +{"current_steps": 140020, "total_steps": 204665, "loss": 0.0799, "lr": 5.486967934647884e-07, "epoch": 3.4207118950480053, "percentage": 68.41, "elapsed_time": "3:00:59", "remaining_time": "1:23:33", "throughput": 8689.35, "total_tokens": 94362256} +{"current_steps": 140025, "total_steps": 204665, "loss": 0.0, "lr": 5.486206958771534e-07, "epoch": 3.4208340458798525, "percentage": 68.42, "elapsed_time": "3:00:59", "remaining_time": "1:23:33", "throughput": 8689.42, "total_tokens": 94366096} +{"current_steps": 140030, "total_steps": 204665, "loss": 0.002, "lr": 5.485446015720695e-07, "epoch": 3.4209561967116997, "percentage": 68.42, "elapsed_time": "3:01:00", "remaining_time": "1:23:32", "throughput": 8689.43, "total_tokens": 94369104} +{"current_steps": 140035, "total_steps": 204665, "loss": 0.0955, "lr": 5.484685105500908e-07, "epoch": 3.421078347543547, "percentage": 68.42, "elapsed_time": "3:01:00", "remaining_time": "1:23:32", "throughput": 8689.46, "total_tokens": 94372496} +{"current_steps": 140040, "total_steps": 204665, "loss": 0.0, "lr": 5.48392422811771e-07, "epoch": 3.421200498375394, "percentage": 68.42, "elapsed_time": "3:01:00", "remaining_time": "1:23:32", "throughput": 8689.47, "total_tokens": 94375568} +{"current_steps": 140045, "total_steps": 204665, "loss": 0.0001, "lr": 5.483163383576626e-07, "epoch": 3.4213226492072413, "percentage": 68.43, "elapsed_time": "3:01:01", "remaining_time": "1:23:31", "throughput": 8689.52, "total_tokens": 94379216} +{"current_steps": 140050, "total_steps": 204665, "loss": 0.0, "lr": 5.482402571883196e-07, "epoch": 3.4214448000390885, "percentage": 68.43, "elapsed_time": "3:01:01", "remaining_time": "1:23:31", "throughput": 8689.56, "total_tokens": 94382736} +{"current_steps": 140055, "total_steps": 204665, "loss": 0.0, "lr": 5.481641793042945e-07, "epoch": 3.421566950870935, "percentage": 68.43, "elapsed_time": "3:01:01", "remaining_time": "1:23:30", "throughput": 8689.63, "total_tokens": 94386576} +{"current_steps": 140060, "total_steps": 204665, "loss": 0.0001, "lr": 5.480881047061415e-07, "epoch": 3.421689101702783, "percentage": 68.43, "elapsed_time": "3:01:02", "remaining_time": "1:23:30", "throughput": 8689.69, "total_tokens": 94390288} +{"current_steps": 140065, "total_steps": 204665, "loss": 0.0, "lr": 5.48012033394413e-07, "epoch": 3.4218112525346296, "percentage": 68.44, "elapsed_time": "3:01:02", "remaining_time": "1:23:30", "throughput": 8689.72, "total_tokens": 94393680} +{"current_steps": 140070, "total_steps": 204665, "loss": 0.0, "lr": 5.47935965369663e-07, "epoch": 3.421933403366477, "percentage": 68.44, "elapsed_time": "3:01:03", "remaining_time": "1:23:29", "throughput": 8689.78, "total_tokens": 94397392} +{"current_steps": 140075, "total_steps": 204665, "loss": 0.0, "lr": 5.478599006324436e-07, "epoch": 3.422055554198324, "percentage": 68.44, "elapsed_time": "3:01:03", "remaining_time": "1:23:29", "throughput": 8689.81, "total_tokens": 94400720} +{"current_steps": 140080, "total_steps": 204665, "loss": 0.0, "lr": 5.477838391833092e-07, "epoch": 3.422177705030171, "percentage": 68.44, "elapsed_time": "3:01:03", "remaining_time": "1:23:28", "throughput": 8689.84, "total_tokens": 94404048} +{"current_steps": 140085, "total_steps": 204665, "loss": 0.0, "lr": 5.477077810228123e-07, "epoch": 3.4222998558620183, "percentage": 68.45, "elapsed_time": "3:01:04", "remaining_time": "1:23:28", "throughput": 8689.86, "total_tokens": 94407248} +{"current_steps": 140090, "total_steps": 204665, "loss": 0.0001, "lr": 5.476317261515058e-07, "epoch": 3.4224220066938655, "percentage": 68.45, "elapsed_time": "3:01:04", "remaining_time": "1:23:27", "throughput": 8689.89, "total_tokens": 94410576} +{"current_steps": 140095, "total_steps": 204665, "loss": 0.0007, "lr": 5.475556745699433e-07, "epoch": 3.4225441575257127, "percentage": 68.45, "elapsed_time": "3:01:04", "remaining_time": "1:23:27", "throughput": 8689.91, "total_tokens": 94413776} +{"current_steps": 140100, "total_steps": 204665, "loss": 0.0, "lr": 5.474796262786773e-07, "epoch": 3.42266630835756, "percentage": 68.45, "elapsed_time": "3:01:05", "remaining_time": "1:23:27", "throughput": 8689.94, "total_tokens": 94417232} +{"current_steps": 140105, "total_steps": 204665, "loss": 0.0, "lr": 5.474035812782611e-07, "epoch": 3.422788459189407, "percentage": 68.46, "elapsed_time": "3:01:05", "remaining_time": "1:23:26", "throughput": 8689.95, "total_tokens": 94420176} +{"current_steps": 140110, "total_steps": 204665, "loss": 0.0, "lr": 5.473275395692483e-07, "epoch": 3.4229106100212543, "percentage": 68.46, "elapsed_time": "3:01:05", "remaining_time": "1:23:26", "throughput": 8689.96, "total_tokens": 94423312} +{"current_steps": 140115, "total_steps": 204665, "loss": 0.083, "lr": 5.472515011521908e-07, "epoch": 3.4230327608531015, "percentage": 68.46, "elapsed_time": "3:01:06", "remaining_time": "1:23:25", "throughput": 8690.01, "total_tokens": 94426896} +{"current_steps": 140120, "total_steps": 204665, "loss": 0.0, "lr": 5.471754660276428e-07, "epoch": 3.4231549116849487, "percentage": 68.46, "elapsed_time": "3:01:06", "remaining_time": "1:23:25", "throughput": 8690.05, "total_tokens": 94430352} +{"current_steps": 140125, "total_steps": 204665, "loss": 0.0, "lr": 5.470994341961561e-07, "epoch": 3.423277062516796, "percentage": 68.47, "elapsed_time": "3:01:06", "remaining_time": "1:23:25", "throughput": 8690.11, "total_tokens": 94434128} +{"current_steps": 140130, "total_steps": 204665, "loss": 0.0548, "lr": 5.47023405658284e-07, "epoch": 3.423399213348643, "percentage": 68.47, "elapsed_time": "3:01:07", "remaining_time": "1:23:24", "throughput": 8690.13, "total_tokens": 94437200} +{"current_steps": 140135, "total_steps": 204665, "loss": 0.0, "lr": 5.469473804145801e-07, "epoch": 3.4235213641804902, "percentage": 68.47, "elapsed_time": "3:01:07", "remaining_time": "1:23:24", "throughput": 8690.16, "total_tokens": 94440528} +{"current_steps": 140140, "total_steps": 204665, "loss": 0.0, "lr": 5.468713584655963e-07, "epoch": 3.423643515012337, "percentage": 68.47, "elapsed_time": "3:01:07", "remaining_time": "1:23:23", "throughput": 8690.18, "total_tokens": 94443792} +{"current_steps": 140145, "total_steps": 204665, "loss": 0.0489, "lr": 5.467953398118863e-07, "epoch": 3.4237656658441846, "percentage": 68.48, "elapsed_time": "3:01:08", "remaining_time": "1:23:23", "throughput": 8690.19, "total_tokens": 94446864} +{"current_steps": 140150, "total_steps": 204665, "loss": 0.031, "lr": 5.467193244540019e-07, "epoch": 3.4238878166760314, "percentage": 68.48, "elapsed_time": "3:01:08", "remaining_time": "1:23:23", "throughput": 8690.22, "total_tokens": 94450192} +{"current_steps": 140155, "total_steps": 204665, "loss": 0.0366, "lr": 5.466433123924969e-07, "epoch": 3.4240099675078786, "percentage": 68.48, "elapsed_time": "3:01:08", "remaining_time": "1:23:22", "throughput": 8690.28, "total_tokens": 94453968} +{"current_steps": 140160, "total_steps": 204665, "loss": 0.0, "lr": 5.465673036279235e-07, "epoch": 3.4241321183397257, "percentage": 68.48, "elapsed_time": "3:01:09", "remaining_time": "1:23:22", "throughput": 8690.31, "total_tokens": 94457296} +{"current_steps": 140165, "total_steps": 204665, "loss": 0.0, "lr": 5.464912981608345e-07, "epoch": 3.424254269171573, "percentage": 68.49, "elapsed_time": "3:01:09", "remaining_time": "1:23:21", "throughput": 8690.31, "total_tokens": 94460176} +{"current_steps": 140170, "total_steps": 204665, "loss": 0.0, "lr": 5.464152959917831e-07, "epoch": 3.42437642000342, "percentage": 68.49, "elapsed_time": "3:01:09", "remaining_time": "1:23:21", "throughput": 8690.32, "total_tokens": 94463184} +{"current_steps": 140175, "total_steps": 204665, "loss": 0.0001, "lr": 5.463392971213218e-07, "epoch": 3.4244985708352673, "percentage": 68.49, "elapsed_time": "3:01:10", "remaining_time": "1:23:21", "throughput": 8690.33, "total_tokens": 94466320} +{"current_steps": 140180, "total_steps": 204665, "loss": 0.0, "lr": 5.462633015500027e-07, "epoch": 3.4246207216671145, "percentage": 68.49, "elapsed_time": "3:01:10", "remaining_time": "1:23:20", "throughput": 8690.34, "total_tokens": 94469392} +{"current_steps": 140185, "total_steps": 204665, "loss": 0.027, "lr": 5.461873092783792e-07, "epoch": 3.4247428724989617, "percentage": 68.49, "elapsed_time": "3:01:10", "remaining_time": "1:23:20", "throughput": 8690.36, "total_tokens": 94472656} +{"current_steps": 140190, "total_steps": 204665, "loss": 0.0, "lr": 5.461113203070033e-07, "epoch": 3.424865023330809, "percentage": 68.5, "elapsed_time": "3:01:11", "remaining_time": "1:23:19", "throughput": 8690.45, "total_tokens": 94476688} +{"current_steps": 140195, "total_steps": 204665, "loss": 0.001, "lr": 5.460353346364284e-07, "epoch": 3.424987174162656, "percentage": 68.5, "elapsed_time": "3:01:11", "remaining_time": "1:23:19", "throughput": 8690.46, "total_tokens": 94479888} +{"current_steps": 140200, "total_steps": 204665, "loss": 0.0, "lr": 5.459593522672063e-07, "epoch": 3.4251093249945033, "percentage": 68.5, "elapsed_time": "3:01:12", "remaining_time": "1:23:19", "throughput": 8690.49, "total_tokens": 94483152} +{"current_steps": 140205, "total_steps": 204665, "loss": 0.0, "lr": 5.458833731998902e-07, "epoch": 3.4252314758263505, "percentage": 68.5, "elapsed_time": "3:01:12", "remaining_time": "1:23:18", "throughput": 8690.52, "total_tokens": 94486480} +{"current_steps": 140210, "total_steps": 204665, "loss": 0.0388, "lr": 5.45807397435032e-07, "epoch": 3.4253536266581976, "percentage": 68.51, "elapsed_time": "3:01:12", "remaining_time": "1:23:18", "throughput": 8690.54, "total_tokens": 94489744} +{"current_steps": 140215, "total_steps": 204665, "loss": 0.0, "lr": 5.45731424973185e-07, "epoch": 3.425475777490045, "percentage": 68.51, "elapsed_time": "3:01:13", "remaining_time": "1:23:17", "throughput": 8690.57, "total_tokens": 94493072} +{"current_steps": 140220, "total_steps": 204665, "loss": 0.0, "lr": 5.456554558149008e-07, "epoch": 3.425597928321892, "percentage": 68.51, "elapsed_time": "3:01:13", "remaining_time": "1:23:17", "throughput": 8690.6, "total_tokens": 94496400} +{"current_steps": 140225, "total_steps": 204665, "loss": 0.0001, "lr": 5.455794899607324e-07, "epoch": 3.425720079153739, "percentage": 68.51, "elapsed_time": "3:01:13", "remaining_time": "1:23:17", "throughput": 8690.64, "total_tokens": 94499920} +{"current_steps": 140230, "total_steps": 204665, "loss": 0.0644, "lr": 5.455035274112325e-07, "epoch": 3.4258422299855864, "percentage": 68.52, "elapsed_time": "3:01:14", "remaining_time": "1:23:16", "throughput": 8690.72, "total_tokens": 94503888} +{"current_steps": 140235, "total_steps": 204665, "loss": 0.0001, "lr": 5.454275681669529e-07, "epoch": 3.425964380817433, "percentage": 68.52, "elapsed_time": "3:01:14", "remaining_time": "1:23:16", "throughput": 8690.78, "total_tokens": 94507728} +{"current_steps": 140240, "total_steps": 204665, "loss": 0.0, "lr": 5.453516122284465e-07, "epoch": 3.4260865316492803, "percentage": 68.52, "elapsed_time": "3:01:14", "remaining_time": "1:23:15", "throughput": 8690.82, "total_tokens": 94511184} +{"current_steps": 140245, "total_steps": 204665, "loss": 0.0, "lr": 5.452756595962653e-07, "epoch": 3.4262086824811275, "percentage": 68.52, "elapsed_time": "3:01:15", "remaining_time": "1:23:15", "throughput": 8690.86, "total_tokens": 94514576} +{"current_steps": 140250, "total_steps": 204665, "loss": 0.0, "lr": 5.45199710270962e-07, "epoch": 3.4263308333129747, "percentage": 68.53, "elapsed_time": "3:01:15", "remaining_time": "1:23:14", "throughput": 8690.88, "total_tokens": 94517840} +{"current_steps": 140255, "total_steps": 204665, "loss": 0.0, "lr": 5.451237642530884e-07, "epoch": 3.426452984144822, "percentage": 68.53, "elapsed_time": "3:01:15", "remaining_time": "1:23:14", "throughput": 8690.9, "total_tokens": 94521040} +{"current_steps": 140260, "total_steps": 204665, "loss": 0.0, "lr": 5.450478215431973e-07, "epoch": 3.426575134976669, "percentage": 68.53, "elapsed_time": "3:01:16", "remaining_time": "1:23:14", "throughput": 8690.93, "total_tokens": 94524368} +{"current_steps": 140265, "total_steps": 204665, "loss": 0.0576, "lr": 5.44971882141841e-07, "epoch": 3.4266972858085163, "percentage": 68.53, "elapsed_time": "3:01:16", "remaining_time": "1:23:13", "throughput": 8690.95, "total_tokens": 94527632} +{"current_steps": 140270, "total_steps": 204665, "loss": 0.0001, "lr": 5.448959460495712e-07, "epoch": 3.4268194366403635, "percentage": 68.54, "elapsed_time": "3:01:16", "remaining_time": "1:23:13", "throughput": 8690.98, "total_tokens": 94531024} +{"current_steps": 140275, "total_steps": 204665, "loss": 0.0, "lr": 5.44820013266941e-07, "epoch": 3.4269415874722107, "percentage": 68.54, "elapsed_time": "3:01:17", "remaining_time": "1:23:12", "throughput": 8690.99, "total_tokens": 94534032} +{"current_steps": 140280, "total_steps": 204665, "loss": 0.0, "lr": 5.447440837945015e-07, "epoch": 3.427063738304058, "percentage": 68.54, "elapsed_time": "3:01:17", "remaining_time": "1:23:12", "throughput": 8691.04, "total_tokens": 94537680} +{"current_steps": 140285, "total_steps": 204665, "loss": 0.0002, "lr": 5.446681576328061e-07, "epoch": 3.427185889135905, "percentage": 68.54, "elapsed_time": "3:01:17", "remaining_time": "1:23:12", "throughput": 8691.04, "total_tokens": 94540624} +{"current_steps": 140290, "total_steps": 204665, "loss": 0.0001, "lr": 5.445922347824062e-07, "epoch": 3.4273080399677522, "percentage": 68.55, "elapsed_time": "3:01:18", "remaining_time": "1:23:11", "throughput": 8691.07, "total_tokens": 94543952} +{"current_steps": 140295, "total_steps": 204665, "loss": 0.0, "lr": 5.445163152438535e-07, "epoch": 3.4274301907995994, "percentage": 68.55, "elapsed_time": "3:01:18", "remaining_time": "1:23:11", "throughput": 8691.11, "total_tokens": 94547344} +{"current_steps": 140300, "total_steps": 204665, "loss": 0.047, "lr": 5.444403990177013e-07, "epoch": 3.4275523416314466, "percentage": 68.55, "elapsed_time": "3:01:18", "remaining_time": "1:23:10", "throughput": 8691.15, "total_tokens": 94550928} +{"current_steps": 140305, "total_steps": 204665, "loss": 0.0, "lr": 5.443644861045006e-07, "epoch": 3.427674492463294, "percentage": 68.55, "elapsed_time": "3:01:19", "remaining_time": "1:23:10", "throughput": 8691.17, "total_tokens": 94554128} +{"current_steps": 140310, "total_steps": 204665, "loss": 0.0001, "lr": 5.442885765048042e-07, "epoch": 3.427796643295141, "percentage": 68.56, "elapsed_time": "3:01:19", "remaining_time": "1:23:10", "throughput": 8691.21, "total_tokens": 94557584} +{"current_steps": 140315, "total_steps": 204665, "loss": 0.0, "lr": 5.442126702191637e-07, "epoch": 3.427918794126988, "percentage": 68.56, "elapsed_time": "3:01:20", "remaining_time": "1:23:09", "throughput": 8691.22, "total_tokens": 94560720} +{"current_steps": 140320, "total_steps": 204665, "loss": 0.0325, "lr": 5.44136767248131e-07, "epoch": 3.428040944958835, "percentage": 68.56, "elapsed_time": "3:01:20", "remaining_time": "1:23:09", "throughput": 8691.26, "total_tokens": 94564176} +{"current_steps": 140325, "total_steps": 204665, "loss": 0.0366, "lr": 5.440608675922589e-07, "epoch": 3.4281630957906826, "percentage": 68.56, "elapsed_time": "3:01:20", "remaining_time": "1:23:08", "throughput": 8691.3, "total_tokens": 94567568} +{"current_steps": 140330, "total_steps": 204665, "loss": 0.0, "lr": 5.439849712520983e-07, "epoch": 3.4282852466225293, "percentage": 68.57, "elapsed_time": "3:01:21", "remaining_time": "1:23:08", "throughput": 8691.33, "total_tokens": 94570896} +{"current_steps": 140335, "total_steps": 204665, "loss": 0.0, "lr": 5.439090782282021e-07, "epoch": 3.4284073974543765, "percentage": 68.57, "elapsed_time": "3:01:21", "remaining_time": "1:23:08", "throughput": 8691.39, "total_tokens": 94574736} +{"current_steps": 140340, "total_steps": 204665, "loss": 0.0, "lr": 5.438331885211212e-07, "epoch": 3.4285295482862237, "percentage": 68.57, "elapsed_time": "3:01:21", "remaining_time": "1:23:07", "throughput": 8691.4, "total_tokens": 94577744} +{"current_steps": 140345, "total_steps": 204665, "loss": 0.0569, "lr": 5.437573021314083e-07, "epoch": 3.428651699118071, "percentage": 68.57, "elapsed_time": "3:01:22", "remaining_time": "1:23:07", "throughput": 8691.41, "total_tokens": 94580880} +{"current_steps": 140350, "total_steps": 204665, "loss": 0.0248, "lr": 5.436814190596153e-07, "epoch": 3.428773849949918, "percentage": 68.58, "elapsed_time": "3:01:22", "remaining_time": "1:23:06", "throughput": 8691.43, "total_tokens": 94584080} +{"current_steps": 140355, "total_steps": 204665, "loss": 0.0, "lr": 5.436055393062933e-07, "epoch": 3.4288960007817653, "percentage": 68.58, "elapsed_time": "3:01:22", "remaining_time": "1:23:06", "throughput": 8691.47, "total_tokens": 94587536} +{"current_steps": 140360, "total_steps": 204665, "loss": 0.0017, "lr": 5.43529662871995e-07, "epoch": 3.4290181516136125, "percentage": 68.58, "elapsed_time": "3:01:23", "remaining_time": "1:23:06", "throughput": 8691.51, "total_tokens": 94590928} +{"current_steps": 140365, "total_steps": 204665, "loss": 0.0, "lr": 5.434537897572713e-07, "epoch": 3.4291403024454596, "percentage": 68.58, "elapsed_time": "3:01:23", "remaining_time": "1:23:05", "throughput": 8691.54, "total_tokens": 94594320} +{"current_steps": 140370, "total_steps": 204665, "loss": 0.0, "lr": 5.43377919962675e-07, "epoch": 3.429262453277307, "percentage": 68.59, "elapsed_time": "3:01:23", "remaining_time": "1:23:05", "throughput": 8691.58, "total_tokens": 94597776} +{"current_steps": 140375, "total_steps": 204665, "loss": 0.043, "lr": 5.433020534887568e-07, "epoch": 3.429384604109154, "percentage": 68.59, "elapsed_time": "3:01:24", "remaining_time": "1:23:04", "throughput": 8691.63, "total_tokens": 94601424} +{"current_steps": 140380, "total_steps": 204665, "loss": 0.0, "lr": 5.432261903360693e-07, "epoch": 3.429506754941001, "percentage": 68.59, "elapsed_time": "3:01:24", "remaining_time": "1:23:04", "throughput": 8691.65, "total_tokens": 94604624} +{"current_steps": 140385, "total_steps": 204665, "loss": 0.0, "lr": 5.431503305051634e-07, "epoch": 3.4296289057728484, "percentage": 68.59, "elapsed_time": "3:01:24", "remaining_time": "1:23:04", "throughput": 8691.66, "total_tokens": 94607760} +{"current_steps": 140390, "total_steps": 204665, "loss": 0.0572, "lr": 5.430744739965915e-07, "epoch": 3.4297510566046956, "percentage": 68.6, "elapsed_time": "3:01:25", "remaining_time": "1:23:03", "throughput": 8691.7, "total_tokens": 94611152} +{"current_steps": 140395, "total_steps": 204665, "loss": 0.0001, "lr": 5.429986208109052e-07, "epoch": 3.429873207436543, "percentage": 68.6, "elapsed_time": "3:01:25", "remaining_time": "1:23:03", "throughput": 8691.73, "total_tokens": 94614544} +{"current_steps": 140400, "total_steps": 204665, "loss": 0.0, "lr": 5.429227709486552e-07, "epoch": 3.42999535826839, "percentage": 68.6, "elapsed_time": "3:01:25", "remaining_time": "1:23:02", "throughput": 8691.81, "total_tokens": 94618448} +{"current_steps": 140405, "total_steps": 204665, "loss": 0.0549, "lr": 5.428469244103941e-07, "epoch": 3.430117509100237, "percentage": 68.6, "elapsed_time": "3:01:26", "remaining_time": "1:23:02", "throughput": 8691.84, "total_tokens": 94621840} +{"current_steps": 140410, "total_steps": 204665, "loss": 0.0004, "lr": 5.427710811966729e-07, "epoch": 3.4302396599320844, "percentage": 68.6, "elapsed_time": "3:01:26", "remaining_time": "1:23:01", "throughput": 8691.89, "total_tokens": 94625488} +{"current_steps": 140415, "total_steps": 204665, "loss": 0.0058, "lr": 5.426952413080433e-07, "epoch": 3.430361810763931, "percentage": 68.61, "elapsed_time": "3:01:26", "remaining_time": "1:23:01", "throughput": 8691.96, "total_tokens": 94629264} +{"current_steps": 140420, "total_steps": 204665, "loss": 0.0299, "lr": 5.426194047450574e-07, "epoch": 3.4304839615957783, "percentage": 68.61, "elapsed_time": "3:01:27", "remaining_time": "1:23:01", "throughput": 8692.03, "total_tokens": 94633104} +{"current_steps": 140425, "total_steps": 204665, "loss": 0.0656, "lr": 5.425435715082657e-07, "epoch": 3.4306061124276255, "percentage": 68.61, "elapsed_time": "3:01:27", "remaining_time": "1:23:00", "throughput": 8692.04, "total_tokens": 94636176} +{"current_steps": 140430, "total_steps": 204665, "loss": 0.0, "lr": 5.424677415982206e-07, "epoch": 3.4307282632594727, "percentage": 68.61, "elapsed_time": "3:01:28", "remaining_time": "1:23:00", "throughput": 8692.07, "total_tokens": 94639632} +{"current_steps": 140435, "total_steps": 204665, "loss": 0.0444, "lr": 5.423919150154727e-07, "epoch": 3.43085041409132, "percentage": 68.62, "elapsed_time": "3:01:28", "remaining_time": "1:22:59", "throughput": 8692.1, "total_tokens": 94642896} +{"current_steps": 140440, "total_steps": 204665, "loss": 0.0422, "lr": 5.42316091760574e-07, "epoch": 3.430972564923167, "percentage": 68.62, "elapsed_time": "3:01:28", "remaining_time": "1:22:59", "throughput": 8692.14, "total_tokens": 94646480} +{"current_steps": 140445, "total_steps": 204665, "loss": 0.0001, "lr": 5.42240271834076e-07, "epoch": 3.4310947157550142, "percentage": 68.62, "elapsed_time": "3:01:29", "remaining_time": "1:22:59", "throughput": 8692.2, "total_tokens": 94650128} +{"current_steps": 140450, "total_steps": 204665, "loss": 0.0, "lr": 5.421644552365296e-07, "epoch": 3.4312168665868614, "percentage": 68.62, "elapsed_time": "3:01:29", "remaining_time": "1:22:58", "throughput": 8692.25, "total_tokens": 94653776} +{"current_steps": 140455, "total_steps": 204665, "loss": 0.0, "lr": 5.420886419684869e-07, "epoch": 3.4313390174187086, "percentage": 68.63, "elapsed_time": "3:01:29", "remaining_time": "1:22:58", "throughput": 8692.27, "total_tokens": 94656976} +{"current_steps": 140460, "total_steps": 204665, "loss": 0.0, "lr": 5.420128320304982e-07, "epoch": 3.431461168250556, "percentage": 68.63, "elapsed_time": "3:01:30", "remaining_time": "1:22:57", "throughput": 8692.3, "total_tokens": 94660304} +{"current_steps": 140465, "total_steps": 204665, "loss": 0.0001, "lr": 5.419370254231159e-07, "epoch": 3.431583319082403, "percentage": 68.63, "elapsed_time": "3:01:30", "remaining_time": "1:22:57", "throughput": 8692.33, "total_tokens": 94663632} +{"current_steps": 140470, "total_steps": 204665, "loss": 0.0, "lr": 5.418612221468902e-07, "epoch": 3.43170546991425, "percentage": 68.63, "elapsed_time": "3:01:30", "remaining_time": "1:22:57", "throughput": 8692.38, "total_tokens": 94667152} +{"current_steps": 140475, "total_steps": 204665, "loss": 0.0, "lr": 5.41785422202373e-07, "epoch": 3.4318276207460974, "percentage": 68.64, "elapsed_time": "3:01:31", "remaining_time": "1:22:56", "throughput": 8692.41, "total_tokens": 94670544} +{"current_steps": 140480, "total_steps": 204665, "loss": 0.1119, "lr": 5.417096255901159e-07, "epoch": 3.4319497715779446, "percentage": 68.64, "elapsed_time": "3:01:31", "remaining_time": "1:22:56", "throughput": 8692.45, "total_tokens": 94674064} +{"current_steps": 140485, "total_steps": 204665, "loss": 0.0, "lr": 5.416338323106697e-07, "epoch": 3.4320719224097918, "percentage": 68.64, "elapsed_time": "3:01:31", "remaining_time": "1:22:55", "throughput": 8692.49, "total_tokens": 94677584} +{"current_steps": 140490, "total_steps": 204665, "loss": 0.0004, "lr": 5.41558042364585e-07, "epoch": 3.432194073241639, "percentage": 68.64, "elapsed_time": "3:01:32", "remaining_time": "1:22:55", "throughput": 8692.51, "total_tokens": 94680784} +{"current_steps": 140495, "total_steps": 204665, "loss": 0.1002, "lr": 5.41482255752414e-07, "epoch": 3.432316224073486, "percentage": 68.65, "elapsed_time": "3:01:32", "remaining_time": "1:22:55", "throughput": 8692.52, "total_tokens": 94683920} +{"current_steps": 140500, "total_steps": 204665, "loss": 0.0002, "lr": 5.414064724747069e-07, "epoch": 3.432438374905333, "percentage": 68.65, "elapsed_time": "3:01:32", "remaining_time": "1:22:54", "throughput": 8692.52, "total_tokens": 94686800} +{"current_steps": 140505, "total_steps": 204665, "loss": 0.0, "lr": 5.413306925320158e-07, "epoch": 3.4325605257371805, "percentage": 68.65, "elapsed_time": "3:01:33", "remaining_time": "1:22:54", "throughput": 8692.59, "total_tokens": 94690768} +{"current_steps": 140510, "total_steps": 204665, "loss": 0.0006, "lr": 5.412549159248909e-07, "epoch": 3.4326826765690273, "percentage": 68.65, "elapsed_time": "3:01:33", "remaining_time": "1:22:53", "throughput": 8692.6, "total_tokens": 94693776} +{"current_steps": 140515, "total_steps": 204665, "loss": 0.0, "lr": 5.411791426538839e-07, "epoch": 3.4328048274008744, "percentage": 68.66, "elapsed_time": "3:01:33", "remaining_time": "1:22:53", "throughput": 8692.66, "total_tokens": 94697488} +{"current_steps": 140520, "total_steps": 204665, "loss": 0.0203, "lr": 5.411033727195453e-07, "epoch": 3.4329269782327216, "percentage": 68.66, "elapsed_time": "3:01:34", "remaining_time": "1:22:53", "throughput": 8692.68, "total_tokens": 94700688} +{"current_steps": 140525, "total_steps": 204665, "loss": 0.0, "lr": 5.410276061224267e-07, "epoch": 3.433049129064569, "percentage": 68.66, "elapsed_time": "3:01:34", "remaining_time": "1:22:52", "throughput": 8692.73, "total_tokens": 94704336} +{"current_steps": 140530, "total_steps": 204665, "loss": 0.0, "lr": 5.409518428630785e-07, "epoch": 3.433171279896416, "percentage": 68.66, "elapsed_time": "3:01:35", "remaining_time": "1:22:52", "throughput": 8692.75, "total_tokens": 94707600} +{"current_steps": 140535, "total_steps": 204665, "loss": 0.0, "lr": 5.408760829420519e-07, "epoch": 3.433293430728263, "percentage": 68.67, "elapsed_time": "3:01:35", "remaining_time": "1:22:51", "throughput": 8692.78, "total_tokens": 94710864} +{"current_steps": 140540, "total_steps": 204665, "loss": 0.0, "lr": 5.408003263598984e-07, "epoch": 3.4334155815601104, "percentage": 68.67, "elapsed_time": "3:01:35", "remaining_time": "1:22:51", "throughput": 8692.8, "total_tokens": 94714128} +{"current_steps": 140545, "total_steps": 204665, "loss": 0.0, "lr": 5.407245731171679e-07, "epoch": 3.4335377323919576, "percentage": 68.67, "elapsed_time": "3:01:36", "remaining_time": "1:22:51", "throughput": 8692.82, "total_tokens": 94717264} +{"current_steps": 140550, "total_steps": 204665, "loss": 0.0, "lr": 5.406488232144122e-07, "epoch": 3.4336598832238048, "percentage": 68.67, "elapsed_time": "3:01:36", "remaining_time": "1:22:50", "throughput": 8692.87, "total_tokens": 94720848} +{"current_steps": 140555, "total_steps": 204665, "loss": 0.0694, "lr": 5.405730766521815e-07, "epoch": 3.433782034055652, "percentage": 68.68, "elapsed_time": "3:01:36", "remaining_time": "1:22:50", "throughput": 8692.91, "total_tokens": 94724368} +{"current_steps": 140560, "total_steps": 204665, "loss": 0.0606, "lr": 5.404973334310274e-07, "epoch": 3.433904184887499, "percentage": 68.68, "elapsed_time": "3:01:37", "remaining_time": "1:22:49", "throughput": 8692.95, "total_tokens": 94727824} +{"current_steps": 140565, "total_steps": 204665, "loss": 0.0, "lr": 5.404215935514999e-07, "epoch": 3.4340263357193463, "percentage": 68.68, "elapsed_time": "3:01:37", "remaining_time": "1:22:49", "throughput": 8692.96, "total_tokens": 94730960} +{"current_steps": 140570, "total_steps": 204665, "loss": 0.0001, "lr": 5.403458570141502e-07, "epoch": 3.4341484865511935, "percentage": 68.68, "elapsed_time": "3:01:37", "remaining_time": "1:22:49", "throughput": 8692.98, "total_tokens": 94734224} +{"current_steps": 140575, "total_steps": 204665, "loss": 0.0001, "lr": 5.402701238195293e-07, "epoch": 3.4342706373830407, "percentage": 68.69, "elapsed_time": "3:01:38", "remaining_time": "1:22:48", "throughput": 8693.01, "total_tokens": 94737488} +{"current_steps": 140580, "total_steps": 204665, "loss": 0.0, "lr": 5.401943939681875e-07, "epoch": 3.434392788214888, "percentage": 68.69, "elapsed_time": "3:01:38", "remaining_time": "1:22:48", "throughput": 8693.05, "total_tokens": 94741008} +{"current_steps": 140585, "total_steps": 204665, "loss": 0.0004, "lr": 5.401186674606759e-07, "epoch": 3.4345149390467347, "percentage": 68.69, "elapsed_time": "3:01:38", "remaining_time": "1:22:47", "throughput": 8693.09, "total_tokens": 94744400} +{"current_steps": 140590, "total_steps": 204665, "loss": 0.0186, "lr": 5.400429442975448e-07, "epoch": 3.4346370898785823, "percentage": 68.69, "elapsed_time": "3:01:39", "remaining_time": "1:22:47", "throughput": 8693.14, "total_tokens": 94747984} +{"current_steps": 140595, "total_steps": 204665, "loss": 0.0, "lr": 5.399672244793455e-07, "epoch": 3.434759240710429, "percentage": 68.7, "elapsed_time": "3:01:39", "remaining_time": "1:22:46", "throughput": 8693.18, "total_tokens": 94751568} +{"current_steps": 140600, "total_steps": 204665, "loss": 0.0001, "lr": 5.398915080066283e-07, "epoch": 3.4348813915422762, "percentage": 68.7, "elapsed_time": "3:01:39", "remaining_time": "1:22:46", "throughput": 8693.21, "total_tokens": 94754832} +{"current_steps": 140605, "total_steps": 204665, "loss": 0.0, "lr": 5.398157948799434e-07, "epoch": 3.4350035423741234, "percentage": 68.7, "elapsed_time": "3:01:40", "remaining_time": "1:22:46", "throughput": 8693.23, "total_tokens": 94758032} +{"current_steps": 140610, "total_steps": 204665, "loss": 0.0187, "lr": 5.397400850998421e-07, "epoch": 3.4351256932059706, "percentage": 68.7, "elapsed_time": "3:01:40", "remaining_time": "1:22:45", "throughput": 8693.27, "total_tokens": 94761552} +{"current_steps": 140615, "total_steps": 204665, "loss": 0.0, "lr": 5.396643786668744e-07, "epoch": 3.435247844037818, "percentage": 68.7, "elapsed_time": "3:01:40", "remaining_time": "1:22:45", "throughput": 8693.29, "total_tokens": 94764752} +{"current_steps": 140620, "total_steps": 204665, "loss": 0.0, "lr": 5.395886755815918e-07, "epoch": 3.435369994869665, "percentage": 68.71, "elapsed_time": "3:01:41", "remaining_time": "1:22:44", "throughput": 8693.34, "total_tokens": 94768272} +{"current_steps": 140625, "total_steps": 204665, "loss": 0.0, "lr": 5.395129758445433e-07, "epoch": 3.435492145701512, "percentage": 68.71, "elapsed_time": "3:01:41", "remaining_time": "1:22:44", "throughput": 8693.37, "total_tokens": 94771664} +{"current_steps": 140630, "total_steps": 204665, "loss": 0.0005, "lr": 5.394372794562805e-07, "epoch": 3.4356142965333594, "percentage": 68.71, "elapsed_time": "3:01:41", "remaining_time": "1:22:44", "throughput": 8693.39, "total_tokens": 94774928} +{"current_steps": 140635, "total_steps": 204665, "loss": 0.0001, "lr": 5.393615864173542e-07, "epoch": 3.4357364473652066, "percentage": 68.71, "elapsed_time": "3:01:42", "remaining_time": "1:22:43", "throughput": 8693.43, "total_tokens": 94778384} +{"current_steps": 140640, "total_steps": 204665, "loss": 0.0, "lr": 5.392858967283138e-07, "epoch": 3.4358585981970537, "percentage": 68.72, "elapsed_time": "3:01:42", "remaining_time": "1:22:43", "throughput": 8693.48, "total_tokens": 94782032} +{"current_steps": 140645, "total_steps": 204665, "loss": 0.0006, "lr": 5.392102103897108e-07, "epoch": 3.435980749028901, "percentage": 68.72, "elapsed_time": "3:01:43", "remaining_time": "1:22:42", "throughput": 8693.52, "total_tokens": 94785488} +{"current_steps": 140650, "total_steps": 204665, "loss": 0.0, "lr": 5.391345274020946e-07, "epoch": 3.436102899860748, "percentage": 68.72, "elapsed_time": "3:01:43", "remaining_time": "1:22:42", "throughput": 8693.54, "total_tokens": 94788752} +{"current_steps": 140655, "total_steps": 204665, "loss": 0.0001, "lr": 5.390588477660161e-07, "epoch": 3.4362250506925953, "percentage": 68.72, "elapsed_time": "3:01:43", "remaining_time": "1:22:42", "throughput": 8693.57, "total_tokens": 94792016} +{"current_steps": 140660, "total_steps": 204665, "loss": 0.0, "lr": 5.389831714820261e-07, "epoch": 3.4363472015244425, "percentage": 68.73, "elapsed_time": "3:01:44", "remaining_time": "1:22:41", "throughput": 8693.57, "total_tokens": 94794960} +{"current_steps": 140665, "total_steps": 204665, "loss": 0.0001, "lr": 5.389074985506739e-07, "epoch": 3.4364693523562897, "percentage": 68.73, "elapsed_time": "3:01:44", "remaining_time": "1:22:41", "throughput": 8693.58, "total_tokens": 94798032} +{"current_steps": 140670, "total_steps": 204665, "loss": 0.0, "lr": 5.38831828972511e-07, "epoch": 3.436591503188137, "percentage": 68.73, "elapsed_time": "3:01:44", "remaining_time": "1:22:40", "throughput": 8693.6, "total_tokens": 94801232} +{"current_steps": 140675, "total_steps": 204665, "loss": 0.0003, "lr": 5.387561627480865e-07, "epoch": 3.436713654019984, "percentage": 68.73, "elapsed_time": "3:01:45", "remaining_time": "1:22:40", "throughput": 8693.65, "total_tokens": 94804880} +{"current_steps": 140680, "total_steps": 204665, "loss": 0.0421, "lr": 5.386804998779517e-07, "epoch": 3.436835804851831, "percentage": 68.74, "elapsed_time": "3:01:45", "remaining_time": "1:22:40", "throughput": 8693.69, "total_tokens": 94808336} +{"current_steps": 140685, "total_steps": 204665, "loss": 0.0001, "lr": 5.386048403626561e-07, "epoch": 3.436957955683678, "percentage": 68.74, "elapsed_time": "3:01:45", "remaining_time": "1:22:39", "throughput": 8693.71, "total_tokens": 94811600} +{"current_steps": 140690, "total_steps": 204665, "loss": 0.0, "lr": 5.385291842027505e-07, "epoch": 3.437080106515525, "percentage": 68.74, "elapsed_time": "3:01:46", "remaining_time": "1:22:39", "throughput": 8693.76, "total_tokens": 94815248} +{"current_steps": 140695, "total_steps": 204665, "loss": 0.0, "lr": 5.384535313987844e-07, "epoch": 3.4372022573473724, "percentage": 68.74, "elapsed_time": "3:01:46", "remaining_time": "1:22:38", "throughput": 8693.8, "total_tokens": 94818704} +{"current_steps": 140700, "total_steps": 204665, "loss": 0.0, "lr": 5.383778819513088e-07, "epoch": 3.4373244081792196, "percentage": 68.75, "elapsed_time": "3:01:46", "remaining_time": "1:22:38", "throughput": 8693.8, "total_tokens": 94821776} +{"current_steps": 140705, "total_steps": 204665, "loss": 0.0, "lr": 5.383022358608733e-07, "epoch": 3.4374465590110668, "percentage": 68.75, "elapsed_time": "3:01:47", "remaining_time": "1:22:38", "throughput": 8693.88, "total_tokens": 94825680} +{"current_steps": 140710, "total_steps": 204665, "loss": 0.0003, "lr": 5.382265931280279e-07, "epoch": 3.437568709842914, "percentage": 68.75, "elapsed_time": "3:01:47", "remaining_time": "1:22:37", "throughput": 8693.93, "total_tokens": 94829328} +{"current_steps": 140715, "total_steps": 204665, "loss": 0.0, "lr": 5.381509537533231e-07, "epoch": 3.437690860674761, "percentage": 68.75, "elapsed_time": "3:01:47", "remaining_time": "1:22:37", "throughput": 8693.94, "total_tokens": 94832464} +{"current_steps": 140720, "total_steps": 204665, "loss": 0.0002, "lr": 5.380753177373085e-07, "epoch": 3.4378130115066083, "percentage": 68.76, "elapsed_time": "3:01:48", "remaining_time": "1:22:36", "throughput": 8693.94, "total_tokens": 94835408} +{"current_steps": 140725, "total_steps": 204665, "loss": 0.0, "lr": 5.379996850805344e-07, "epoch": 3.4379351623384555, "percentage": 68.76, "elapsed_time": "3:01:48", "remaining_time": "1:22:36", "throughput": 8693.95, "total_tokens": 94838416} +{"current_steps": 140730, "total_steps": 204665, "loss": 0.0001, "lr": 5.379240557835514e-07, "epoch": 3.4380573131703027, "percentage": 68.76, "elapsed_time": "3:01:48", "remaining_time": "1:22:36", "throughput": 8693.99, "total_tokens": 94842000} +{"current_steps": 140735, "total_steps": 204665, "loss": 0.0, "lr": 5.378484298469084e-07, "epoch": 3.43817946400215, "percentage": 68.76, "elapsed_time": "3:01:49", "remaining_time": "1:22:35", "throughput": 8694.0, "total_tokens": 94845008} +{"current_steps": 140740, "total_steps": 204665, "loss": 0.0, "lr": 5.377728072711563e-07, "epoch": 3.438301614833997, "percentage": 68.77, "elapsed_time": "3:01:49", "remaining_time": "1:22:35", "throughput": 8694.03, "total_tokens": 94848464} +{"current_steps": 140745, "total_steps": 204665, "loss": 0.0371, "lr": 5.376971880568444e-07, "epoch": 3.4384237656658443, "percentage": 68.77, "elapsed_time": "3:01:49", "remaining_time": "1:22:34", "throughput": 8694.08, "total_tokens": 94851984} +{"current_steps": 140750, "total_steps": 204665, "loss": 0.0, "lr": 5.376215722045227e-07, "epoch": 3.4385459164976915, "percentage": 68.77, "elapsed_time": "3:01:50", "remaining_time": "1:22:34", "throughput": 8694.09, "total_tokens": 94855120} +{"current_steps": 140755, "total_steps": 204665, "loss": 0.0002, "lr": 5.375459597147419e-07, "epoch": 3.4386680673295387, "percentage": 68.77, "elapsed_time": "3:01:50", "remaining_time": "1:22:33", "throughput": 8694.13, "total_tokens": 94858704} +{"current_steps": 140760, "total_steps": 204665, "loss": 0.0, "lr": 5.374703505880507e-07, "epoch": 3.438790218161386, "percentage": 68.78, "elapsed_time": "3:01:51", "remaining_time": "1:22:33", "throughput": 8694.15, "total_tokens": 94861904} +{"current_steps": 140765, "total_steps": 204665, "loss": 0.039, "lr": 5.373947448250001e-07, "epoch": 3.4389123689932326, "percentage": 68.78, "elapsed_time": "3:01:51", "remaining_time": "1:22:33", "throughput": 8694.18, "total_tokens": 94865296} +{"current_steps": 140770, "total_steps": 204665, "loss": 0.0, "lr": 5.373191424261388e-07, "epoch": 3.4390345198250802, "percentage": 68.78, "elapsed_time": "3:01:51", "remaining_time": "1:22:32", "throughput": 8694.21, "total_tokens": 94868624} +{"current_steps": 140775, "total_steps": 204665, "loss": 0.0, "lr": 5.372435433920175e-07, "epoch": 3.439156670656927, "percentage": 68.78, "elapsed_time": "3:01:52", "remaining_time": "1:22:32", "throughput": 8694.23, "total_tokens": 94871824} +{"current_steps": 140780, "total_steps": 204665, "loss": 0.0001, "lr": 5.371679477231852e-07, "epoch": 3.439278821488774, "percentage": 68.79, "elapsed_time": "3:01:52", "remaining_time": "1:22:31", "throughput": 8694.27, "total_tokens": 94875344} +{"current_steps": 140785, "total_steps": 204665, "loss": 0.0, "lr": 5.370923554201923e-07, "epoch": 3.4394009723206214, "percentage": 68.79, "elapsed_time": "3:01:52", "remaining_time": "1:22:31", "throughput": 8694.34, "total_tokens": 94879120} +{"current_steps": 140790, "total_steps": 204665, "loss": 0.0001, "lr": 5.370167664835885e-07, "epoch": 3.4395231231524686, "percentage": 68.79, "elapsed_time": "3:01:53", "remaining_time": "1:22:31", "throughput": 8694.33, "total_tokens": 94882000} +{"current_steps": 140795, "total_steps": 204665, "loss": 0.005, "lr": 5.369411809139232e-07, "epoch": 3.4396452739843157, "percentage": 68.79, "elapsed_time": "3:01:53", "remaining_time": "1:22:30", "throughput": 8694.42, "total_tokens": 94886160} +{"current_steps": 140800, "total_steps": 204665, "loss": 0.0, "lr": 5.36865598711746e-07, "epoch": 3.439767424816163, "percentage": 68.8, "elapsed_time": "3:01:53", "remaining_time": "1:22:30", "throughput": 8694.45, "total_tokens": 94889552} +{"current_steps": 140805, "total_steps": 204665, "loss": 0.0011, "lr": 5.367900198776072e-07, "epoch": 3.43988957564801, "percentage": 68.8, "elapsed_time": "3:01:54", "remaining_time": "1:22:29", "throughput": 8694.51, "total_tokens": 94893328} +{"current_steps": 140810, "total_steps": 204665, "loss": 0.0001, "lr": 5.367144444120553e-07, "epoch": 3.4400117264798573, "percentage": 68.8, "elapsed_time": "3:01:54", "remaining_time": "1:22:29", "throughput": 8694.53, "total_tokens": 94896528} +{"current_steps": 140815, "total_steps": 204665, "loss": 0.0, "lr": 5.366388723156412e-07, "epoch": 3.4401338773117045, "percentage": 68.8, "elapsed_time": "3:01:54", "remaining_time": "1:22:29", "throughput": 8694.57, "total_tokens": 94900048} +{"current_steps": 140820, "total_steps": 204665, "loss": 0.0685, "lr": 5.365633035889135e-07, "epoch": 3.4402560281435517, "percentage": 68.81, "elapsed_time": "3:01:55", "remaining_time": "1:22:28", "throughput": 8694.58, "total_tokens": 94903056} +{"current_steps": 140825, "total_steps": 204665, "loss": 0.0276, "lr": 5.364877382324222e-07, "epoch": 3.440378178975399, "percentage": 68.81, "elapsed_time": "3:01:55", "remaining_time": "1:22:28", "throughput": 8694.6, "total_tokens": 94906320} +{"current_steps": 140830, "total_steps": 204665, "loss": 0.0, "lr": 5.364121762467165e-07, "epoch": 3.440500329807246, "percentage": 68.81, "elapsed_time": "3:01:55", "remaining_time": "1:22:27", "throughput": 8694.64, "total_tokens": 94909840} +{"current_steps": 140835, "total_steps": 204665, "loss": 0.0737, "lr": 5.363366176323465e-07, "epoch": 3.4406224806390933, "percentage": 68.81, "elapsed_time": "3:01:56", "remaining_time": "1:22:27", "throughput": 8694.68, "total_tokens": 94913360} +{"current_steps": 140840, "total_steps": 204665, "loss": 0.0, "lr": 5.362610623898612e-07, "epoch": 3.4407446314709405, "percentage": 68.81, "elapsed_time": "3:01:56", "remaining_time": "1:22:27", "throughput": 8694.72, "total_tokens": 94916816} +{"current_steps": 140845, "total_steps": 204665, "loss": 0.0, "lr": 5.361855105198098e-07, "epoch": 3.4408667823027876, "percentage": 68.82, "elapsed_time": "3:01:56", "remaining_time": "1:22:26", "throughput": 8694.76, "total_tokens": 94920336} +{"current_steps": 140850, "total_steps": 204665, "loss": 0.0279, "lr": 5.361099620227427e-07, "epoch": 3.440988933134635, "percentage": 68.82, "elapsed_time": "3:01:57", "remaining_time": "1:22:26", "throughput": 8694.77, "total_tokens": 94923408} +{"current_steps": 140855, "total_steps": 204665, "loss": 0.0, "lr": 5.360344168992083e-07, "epoch": 3.441111083966482, "percentage": 68.82, "elapsed_time": "3:01:57", "remaining_time": "1:22:25", "throughput": 8694.78, "total_tokens": 94926416} +{"current_steps": 140860, "total_steps": 204665, "loss": 0.095, "lr": 5.359588751497568e-07, "epoch": 3.4412332347983288, "percentage": 68.82, "elapsed_time": "3:01:57", "remaining_time": "1:22:25", "throughput": 8694.81, "total_tokens": 94929744} +{"current_steps": 140865, "total_steps": 204665, "loss": 0.0, "lr": 5.358833367749368e-07, "epoch": 3.441355385630176, "percentage": 68.83, "elapsed_time": "3:01:58", "remaining_time": "1:22:25", "throughput": 8694.84, "total_tokens": 94933072} +{"current_steps": 140870, "total_steps": 204665, "loss": 0.0, "lr": 5.358078017752984e-07, "epoch": 3.441477536462023, "percentage": 68.83, "elapsed_time": "3:01:58", "remaining_time": "1:22:24", "throughput": 8694.9, "total_tokens": 94936912} +{"current_steps": 140875, "total_steps": 204665, "loss": 0.0, "lr": 5.357322701513901e-07, "epoch": 3.4415996872938703, "percentage": 68.83, "elapsed_time": "3:01:59", "remaining_time": "1:22:24", "throughput": 8694.97, "total_tokens": 94940752} +{"current_steps": 140880, "total_steps": 204665, "loss": 0.0002, "lr": 5.356567419037616e-07, "epoch": 3.4417218381257175, "percentage": 68.83, "elapsed_time": "3:01:59", "remaining_time": "1:22:23", "throughput": 8695.0, "total_tokens": 94944080} +{"current_steps": 140885, "total_steps": 204665, "loss": 0.0, "lr": 5.355812170329626e-07, "epoch": 3.4418439889575647, "percentage": 68.84, "elapsed_time": "3:01:59", "remaining_time": "1:22:23", "throughput": 8695.02, "total_tokens": 94947344} +{"current_steps": 140890, "total_steps": 204665, "loss": 0.0006, "lr": 5.355056955395415e-07, "epoch": 3.441966139789412, "percentage": 68.84, "elapsed_time": "3:02:00", "remaining_time": "1:22:23", "throughput": 8695.04, "total_tokens": 94950544} +{"current_steps": 140895, "total_steps": 204665, "loss": 0.0, "lr": 5.354301774240483e-07, "epoch": 3.442088290621259, "percentage": 68.84, "elapsed_time": "3:02:00", "remaining_time": "1:22:22", "throughput": 8695.04, "total_tokens": 94953488} +{"current_steps": 140900, "total_steps": 204665, "loss": 0.0, "lr": 5.353546626870313e-07, "epoch": 3.4422104414531063, "percentage": 68.84, "elapsed_time": "3:02:00", "remaining_time": "1:22:22", "throughput": 8695.05, "total_tokens": 94956496} +{"current_steps": 140905, "total_steps": 204665, "loss": 0.0001, "lr": 5.352791513290408e-07, "epoch": 3.4423325922849535, "percentage": 68.85, "elapsed_time": "3:02:01", "remaining_time": "1:22:21", "throughput": 8695.08, "total_tokens": 94959952} +{"current_steps": 140910, "total_steps": 204665, "loss": 0.0, "lr": 5.352036433506251e-07, "epoch": 3.4424547431168007, "percentage": 68.85, "elapsed_time": "3:02:01", "remaining_time": "1:22:21", "throughput": 8695.13, "total_tokens": 94963600} +{"current_steps": 140915, "total_steps": 204665, "loss": 0.0, "lr": 5.351281387523332e-07, "epoch": 3.442576893948648, "percentage": 68.85, "elapsed_time": "3:02:01", "remaining_time": "1:22:21", "throughput": 8695.17, "total_tokens": 94967056} +{"current_steps": 140920, "total_steps": 204665, "loss": 0.0, "lr": 5.35052637534715e-07, "epoch": 3.442699044780495, "percentage": 68.85, "elapsed_time": "3:02:02", "remaining_time": "1:22:20", "throughput": 8695.18, "total_tokens": 94970064} +{"current_steps": 140925, "total_steps": 204665, "loss": 0.0, "lr": 5.349771396983185e-07, "epoch": 3.4428211956123422, "percentage": 68.86, "elapsed_time": "3:02:02", "remaining_time": "1:22:20", "throughput": 8695.2, "total_tokens": 94973392} +{"current_steps": 140930, "total_steps": 204665, "loss": 0.0001, "lr": 5.349016452436938e-07, "epoch": 3.4429433464441894, "percentage": 68.86, "elapsed_time": "3:02:02", "remaining_time": "1:22:19", "throughput": 8695.23, "total_tokens": 94976656} +{"current_steps": 140935, "total_steps": 204665, "loss": 0.0, "lr": 5.348261541713891e-07, "epoch": 3.4430654972760366, "percentage": 68.86, "elapsed_time": "3:02:03", "remaining_time": "1:22:19", "throughput": 8695.24, "total_tokens": 94979728} +{"current_steps": 140940, "total_steps": 204665, "loss": 0.0, "lr": 5.347506664819536e-07, "epoch": 3.443187648107884, "percentage": 68.86, "elapsed_time": "3:02:03", "remaining_time": "1:22:18", "throughput": 8695.27, "total_tokens": 94983056} +{"current_steps": 140945, "total_steps": 204665, "loss": 0.043, "lr": 5.346751821759369e-07, "epoch": 3.4433097989397305, "percentage": 68.87, "elapsed_time": "3:02:03", "remaining_time": "1:22:18", "throughput": 8695.33, "total_tokens": 94986832} +{"current_steps": 140950, "total_steps": 204665, "loss": 0.0, "lr": 5.34599701253887e-07, "epoch": 3.443431949771578, "percentage": 68.87, "elapsed_time": "3:02:04", "remaining_time": "1:22:18", "throughput": 8695.35, "total_tokens": 94990096} +{"current_steps": 140955, "total_steps": 204665, "loss": 0.0, "lr": 5.345242237163537e-07, "epoch": 3.443554100603425, "percentage": 68.87, "elapsed_time": "3:02:04", "remaining_time": "1:22:17", "throughput": 8695.39, "total_tokens": 94993552} +{"current_steps": 140960, "total_steps": 204665, "loss": 0.0, "lr": 5.34448749563885e-07, "epoch": 3.443676251435272, "percentage": 68.87, "elapsed_time": "3:02:04", "remaining_time": "1:22:17", "throughput": 8695.47, "total_tokens": 94997584} +{"current_steps": 140965, "total_steps": 204665, "loss": 0.0, "lr": 5.343732787970305e-07, "epoch": 3.4437984022671193, "percentage": 68.88, "elapsed_time": "3:02:05", "remaining_time": "1:22:16", "throughput": 8695.5, "total_tokens": 95000912} +{"current_steps": 140970, "total_steps": 204665, "loss": 0.0001, "lr": 5.342978114163384e-07, "epoch": 3.4439205530989665, "percentage": 68.88, "elapsed_time": "3:02:05", "remaining_time": "1:22:16", "throughput": 8695.53, "total_tokens": 95004240} +{"current_steps": 140975, "total_steps": 204665, "loss": 0.0204, "lr": 5.342223474223579e-07, "epoch": 3.4440427039308137, "percentage": 68.88, "elapsed_time": "3:02:06", "remaining_time": "1:22:16", "throughput": 8695.58, "total_tokens": 95007952} +{"current_steps": 140980, "total_steps": 204665, "loss": 0.081, "lr": 5.341468868156382e-07, "epoch": 3.444164854762661, "percentage": 68.88, "elapsed_time": "3:02:06", "remaining_time": "1:22:15", "throughput": 8695.6, "total_tokens": 95011152} +{"current_steps": 140985, "total_steps": 204665, "loss": 0.0001, "lr": 5.340714295967272e-07, "epoch": 3.444287005594508, "percentage": 68.89, "elapsed_time": "3:02:06", "remaining_time": "1:22:15", "throughput": 8695.63, "total_tokens": 95014480} +{"current_steps": 140990, "total_steps": 204665, "loss": 0.0439, "lr": 5.339959757661745e-07, "epoch": 3.4444091564263553, "percentage": 68.89, "elapsed_time": "3:02:07", "remaining_time": "1:22:14", "throughput": 8695.67, "total_tokens": 95018000} +{"current_steps": 140995, "total_steps": 204665, "loss": 0.0, "lr": 5.339205253245281e-07, "epoch": 3.4445313072582024, "percentage": 68.89, "elapsed_time": "3:02:07", "remaining_time": "1:22:14", "throughput": 8695.69, "total_tokens": 95021264} +{"current_steps": 141000, "total_steps": 204665, "loss": 0.0001, "lr": 5.338450782723373e-07, "epoch": 3.4446534580900496, "percentage": 68.89, "elapsed_time": "3:02:07", "remaining_time": "1:22:14", "throughput": 8695.72, "total_tokens": 95024528} +{"current_steps": 141005, "total_steps": 204665, "loss": 0.0001, "lr": 5.337696346101502e-07, "epoch": 3.444775608921897, "percentage": 68.9, "elapsed_time": "3:02:08", "remaining_time": "1:22:13", "throughput": 8695.77, "total_tokens": 95028176} +{"current_steps": 141010, "total_steps": 204665, "loss": 0.0, "lr": 5.336941943385158e-07, "epoch": 3.444897759753744, "percentage": 68.9, "elapsed_time": "3:02:08", "remaining_time": "1:22:13", "throughput": 8695.78, "total_tokens": 95031312} +{"current_steps": 141015, "total_steps": 204665, "loss": 0.0, "lr": 5.336187574579829e-07, "epoch": 3.445019910585591, "percentage": 68.9, "elapsed_time": "3:02:08", "remaining_time": "1:22:12", "throughput": 8695.81, "total_tokens": 95034640} +{"current_steps": 141020, "total_steps": 204665, "loss": 0.0, "lr": 5.335433239690994e-07, "epoch": 3.4451420614174384, "percentage": 68.9, "elapsed_time": "3:02:09", "remaining_time": "1:22:12", "throughput": 8695.84, "total_tokens": 95037968} +{"current_steps": 141025, "total_steps": 204665, "loss": 0.0, "lr": 5.334678938724147e-07, "epoch": 3.4452642122492856, "percentage": 68.91, "elapsed_time": "3:02:09", "remaining_time": "1:22:12", "throughput": 8695.87, "total_tokens": 95041296} +{"current_steps": 141030, "total_steps": 204665, "loss": 0.0, "lr": 5.333924671684765e-07, "epoch": 3.4453863630811328, "percentage": 68.91, "elapsed_time": "3:02:09", "remaining_time": "1:22:11", "throughput": 8695.89, "total_tokens": 95044624} +{"current_steps": 141035, "total_steps": 204665, "loss": 0.0016, "lr": 5.333170438578338e-07, "epoch": 3.44550851391298, "percentage": 68.91, "elapsed_time": "3:02:10", "remaining_time": "1:22:11", "throughput": 8695.92, "total_tokens": 95047824} +{"current_steps": 141040, "total_steps": 204665, "loss": 0.0, "lr": 5.332416239410355e-07, "epoch": 3.4456306647448267, "percentage": 68.91, "elapsed_time": "3:02:10", "remaining_time": "1:22:10", "throughput": 8695.97, "total_tokens": 95051472} +{"current_steps": 141045, "total_steps": 204665, "loss": 0.0011, "lr": 5.331662074186294e-07, "epoch": 3.445752815576674, "percentage": 68.92, "elapsed_time": "3:02:10", "remaining_time": "1:22:10", "throughput": 8695.98, "total_tokens": 95054544} +{"current_steps": 141050, "total_steps": 204665, "loss": 0.0, "lr": 5.330907942911644e-07, "epoch": 3.445874966408521, "percentage": 68.92, "elapsed_time": "3:02:11", "remaining_time": "1:22:10", "throughput": 8696.01, "total_tokens": 95058000} +{"current_steps": 141055, "total_steps": 204665, "loss": 0.0, "lr": 5.330153845591884e-07, "epoch": 3.4459971172403683, "percentage": 68.92, "elapsed_time": "3:02:11", "remaining_time": "1:22:09", "throughput": 8696.06, "total_tokens": 95061520} +{"current_steps": 141060, "total_steps": 204665, "loss": 0.0, "lr": 5.329399782232501e-07, "epoch": 3.4461192680722155, "percentage": 68.92, "elapsed_time": "3:02:11", "remaining_time": "1:22:09", "throughput": 8696.1, "total_tokens": 95065104} +{"current_steps": 141065, "total_steps": 204665, "loss": 0.0, "lr": 5.328645752838983e-07, "epoch": 3.4462414189040627, "percentage": 68.92, "elapsed_time": "3:02:12", "remaining_time": "1:22:08", "throughput": 8696.13, "total_tokens": 95068496} +{"current_steps": 141070, "total_steps": 204665, "loss": 0.0, "lr": 5.327891757416806e-07, "epoch": 3.44636356973591, "percentage": 68.93, "elapsed_time": "3:02:12", "remaining_time": "1:22:08", "throughput": 8696.16, "total_tokens": 95071760} +{"current_steps": 141075, "total_steps": 204665, "loss": 0.0, "lr": 5.327137795971461e-07, "epoch": 3.446485720567757, "percentage": 68.93, "elapsed_time": "3:02:12", "remaining_time": "1:22:08", "throughput": 8696.18, "total_tokens": 95075024} +{"current_steps": 141080, "total_steps": 204665, "loss": 0.0, "lr": 5.326383868508422e-07, "epoch": 3.4466078713996042, "percentage": 68.93, "elapsed_time": "3:02:13", "remaining_time": "1:22:07", "throughput": 8696.19, "total_tokens": 95078032} +{"current_steps": 141085, "total_steps": 204665, "loss": 0.0024, "lr": 5.325629975033181e-07, "epoch": 3.4467300222314514, "percentage": 68.93, "elapsed_time": "3:02:13", "remaining_time": "1:22:07", "throughput": 8696.28, "total_tokens": 95082320} +{"current_steps": 141090, "total_steps": 204665, "loss": 0.0, "lr": 5.324876115551211e-07, "epoch": 3.4468521730632986, "percentage": 68.94, "elapsed_time": "3:02:14", "remaining_time": "1:22:06", "throughput": 8696.31, "total_tokens": 95085584} +{"current_steps": 141095, "total_steps": 204665, "loss": 0.0002, "lr": 5.324122290068001e-07, "epoch": 3.446974323895146, "percentage": 68.94, "elapsed_time": "3:02:14", "remaining_time": "1:22:06", "throughput": 8696.34, "total_tokens": 95088912} +{"current_steps": 141100, "total_steps": 204665, "loss": 0.0, "lr": 5.323368498589035e-07, "epoch": 3.447096474726993, "percentage": 68.94, "elapsed_time": "3:02:14", "remaining_time": "1:22:06", "throughput": 8696.32, "total_tokens": 95091664} +{"current_steps": 141105, "total_steps": 204665, "loss": 0.0, "lr": 5.322614741119791e-07, "epoch": 3.44721862555884, "percentage": 68.94, "elapsed_time": "3:02:15", "remaining_time": "1:22:05", "throughput": 8696.35, "total_tokens": 95094928} +{"current_steps": 141110, "total_steps": 204665, "loss": 0.0, "lr": 5.321861017665745e-07, "epoch": 3.4473407763906874, "percentage": 68.95, "elapsed_time": "3:02:15", "remaining_time": "1:22:05", "throughput": 8696.37, "total_tokens": 95098256} +{"current_steps": 141115, "total_steps": 204665, "loss": 0.0, "lr": 5.321107328232391e-07, "epoch": 3.4474629272225346, "percentage": 68.95, "elapsed_time": "3:02:15", "remaining_time": "1:22:04", "throughput": 8696.39, "total_tokens": 95101328} +{"current_steps": 141120, "total_steps": 204665, "loss": 0.0256, "lr": 5.320353672825197e-07, "epoch": 3.4475850780543817, "percentage": 68.95, "elapsed_time": "3:02:16", "remaining_time": "1:22:04", "throughput": 8696.41, "total_tokens": 95104656} +{"current_steps": 141125, "total_steps": 204665, "loss": 0.0, "lr": 5.319600051449653e-07, "epoch": 3.4477072288862285, "percentage": 68.95, "elapsed_time": "3:02:16", "remaining_time": "1:22:04", "throughput": 8696.47, "total_tokens": 95108432} +{"current_steps": 141130, "total_steps": 204665, "loss": 0.0, "lr": 5.318846464111235e-07, "epoch": 3.447829379718076, "percentage": 68.96, "elapsed_time": "3:02:16", "remaining_time": "1:22:03", "throughput": 8696.5, "total_tokens": 95111760} +{"current_steps": 141135, "total_steps": 204665, "loss": 0.0, "lr": 5.318092910815426e-07, "epoch": 3.447951530549923, "percentage": 68.96, "elapsed_time": "3:02:17", "remaining_time": "1:22:03", "throughput": 8696.51, "total_tokens": 95114896} +{"current_steps": 141140, "total_steps": 204665, "loss": 0.0, "lr": 5.317339391567702e-07, "epoch": 3.44807368138177, "percentage": 68.96, "elapsed_time": "3:02:17", "remaining_time": "1:22:02", "throughput": 8696.54, "total_tokens": 95118160} +{"current_steps": 141145, "total_steps": 204665, "loss": 0.0, "lr": 5.31658590637355e-07, "epoch": 3.4481958322136173, "percentage": 68.96, "elapsed_time": "3:02:17", "remaining_time": "1:22:02", "throughput": 8696.59, "total_tokens": 95121808} +{"current_steps": 141150, "total_steps": 204665, "loss": 0.0, "lr": 5.315832455238439e-07, "epoch": 3.4483179830454644, "percentage": 68.97, "elapsed_time": "3:02:18", "remaining_time": "1:22:01", "throughput": 8696.63, "total_tokens": 95125328} +{"current_steps": 141155, "total_steps": 204665, "loss": 0.0, "lr": 5.315079038167856e-07, "epoch": 3.4484401338773116, "percentage": 68.97, "elapsed_time": "3:02:18", "remaining_time": "1:22:01", "throughput": 8696.66, "total_tokens": 95128656} +{"current_steps": 141160, "total_steps": 204665, "loss": 0.0, "lr": 5.314325655167282e-07, "epoch": 3.448562284709159, "percentage": 68.97, "elapsed_time": "3:02:18", "remaining_time": "1:22:01", "throughput": 8696.68, "total_tokens": 95131920} +{"current_steps": 141165, "total_steps": 204665, "loss": 0.0, "lr": 5.313572306242187e-07, "epoch": 3.448684435541006, "percentage": 68.97, "elapsed_time": "3:02:19", "remaining_time": "1:22:00", "throughput": 8696.73, "total_tokens": 95135568} +{"current_steps": 141170, "total_steps": 204665, "loss": 0.143, "lr": 5.312818991398061e-07, "epoch": 3.448806586372853, "percentage": 68.98, "elapsed_time": "3:02:19", "remaining_time": "1:22:00", "throughput": 8696.75, "total_tokens": 95138768} +{"current_steps": 141175, "total_steps": 204665, "loss": 0.0, "lr": 5.31206571064037e-07, "epoch": 3.4489287372047004, "percentage": 68.98, "elapsed_time": "3:02:19", "remaining_time": "1:21:59", "throughput": 8696.8, "total_tokens": 95142352} +{"current_steps": 141180, "total_steps": 204665, "loss": 0.0266, "lr": 5.311312463974602e-07, "epoch": 3.4490508880365476, "percentage": 68.98, "elapsed_time": "3:02:20", "remaining_time": "1:21:59", "throughput": 8696.82, "total_tokens": 95145680} +{"current_steps": 141185, "total_steps": 204665, "loss": 0.0, "lr": 5.310559251406228e-07, "epoch": 3.4491730388683948, "percentage": 68.98, "elapsed_time": "3:02:20", "remaining_time": "1:21:59", "throughput": 8696.88, "total_tokens": 95149456} +{"current_steps": 141190, "total_steps": 204665, "loss": 0.0001, "lr": 5.309806072940728e-07, "epoch": 3.449295189700242, "percentage": 68.99, "elapsed_time": "3:02:20", "remaining_time": "1:21:58", "throughput": 8696.9, "total_tokens": 95152592} +{"current_steps": 141195, "total_steps": 204665, "loss": 0.0804, "lr": 5.309052928583583e-07, "epoch": 3.449417340532089, "percentage": 68.99, "elapsed_time": "3:02:21", "remaining_time": "1:21:58", "throughput": 8696.96, "total_tokens": 95156304} +{"current_steps": 141200, "total_steps": 204665, "loss": 0.0568, "lr": 5.308299818340263e-07, "epoch": 3.4495394913639363, "percentage": 68.99, "elapsed_time": "3:02:21", "remaining_time": "1:21:57", "throughput": 8696.98, "total_tokens": 95159568} +{"current_steps": 141205, "total_steps": 204665, "loss": 0.0, "lr": 5.307546742216253e-07, "epoch": 3.4496616421957835, "percentage": 68.99, "elapsed_time": "3:02:22", "remaining_time": "1:21:57", "throughput": 8697.0, "total_tokens": 95162832} +{"current_steps": 141210, "total_steps": 204665, "loss": 0.0, "lr": 5.306793700217021e-07, "epoch": 3.4497837930276303, "percentage": 69.0, "elapsed_time": "3:02:22", "remaining_time": "1:21:57", "throughput": 8697.04, "total_tokens": 95166352} +{"current_steps": 141215, "total_steps": 204665, "loss": 0.0625, "lr": 5.306040692348053e-07, "epoch": 3.449905943859478, "percentage": 69.0, "elapsed_time": "3:02:22", "remaining_time": "1:21:56", "throughput": 8697.09, "total_tokens": 95169936} +{"current_steps": 141220, "total_steps": 204665, "loss": 0.0, "lr": 5.305287718614818e-07, "epoch": 3.4500280946913247, "percentage": 69.0, "elapsed_time": "3:02:23", "remaining_time": "1:21:56", "throughput": 8697.1, "total_tokens": 95173008} +{"current_steps": 141225, "total_steps": 204665, "loss": 0.0888, "lr": 5.304534779022789e-07, "epoch": 3.450150245523172, "percentage": 69.0, "elapsed_time": "3:02:23", "remaining_time": "1:21:55", "throughput": 8697.11, "total_tokens": 95176080} +{"current_steps": 141230, "total_steps": 204665, "loss": 0.0227, "lr": 5.303781873577451e-07, "epoch": 3.450272396355019, "percentage": 69.01, "elapsed_time": "3:02:23", "remaining_time": "1:21:55", "throughput": 8697.12, "total_tokens": 95179152} +{"current_steps": 141235, "total_steps": 204665, "loss": 0.0, "lr": 5.303029002284271e-07, "epoch": 3.4503945471868662, "percentage": 69.01, "elapsed_time": "3:02:24", "remaining_time": "1:21:55", "throughput": 8697.14, "total_tokens": 95182352} +{"current_steps": 141240, "total_steps": 204665, "loss": 0.0002, "lr": 5.30227616514873e-07, "epoch": 3.4505166980187134, "percentage": 69.01, "elapsed_time": "3:02:24", "remaining_time": "1:21:54", "throughput": 8697.18, "total_tokens": 95185808} +{"current_steps": 141245, "total_steps": 204665, "loss": 0.0, "lr": 5.301523362176297e-07, "epoch": 3.4506388488505606, "percentage": 69.01, "elapsed_time": "3:02:24", "remaining_time": "1:21:54", "throughput": 8697.21, "total_tokens": 95189136} +{"current_steps": 141250, "total_steps": 204665, "loss": 0.0, "lr": 5.30077059337245e-07, "epoch": 3.450760999682408, "percentage": 69.02, "elapsed_time": "3:02:25", "remaining_time": "1:21:53", "throughput": 8697.24, "total_tokens": 95192528} +{"current_steps": 141255, "total_steps": 204665, "loss": 0.028, "lr": 5.300017858742667e-07, "epoch": 3.450883150514255, "percentage": 69.02, "elapsed_time": "3:02:25", "remaining_time": "1:21:53", "throughput": 8697.26, "total_tokens": 95195728} +{"current_steps": 141260, "total_steps": 204665, "loss": 0.0001, "lr": 5.299265158292414e-07, "epoch": 3.451005301346102, "percentage": 69.02, "elapsed_time": "3:02:25", "remaining_time": "1:21:53", "throughput": 8697.28, "total_tokens": 95198992} +{"current_steps": 141265, "total_steps": 204665, "loss": 0.0266, "lr": 5.298512492027174e-07, "epoch": 3.4511274521779494, "percentage": 69.02, "elapsed_time": "3:02:26", "remaining_time": "1:21:52", "throughput": 8697.31, "total_tokens": 95202256} +{"current_steps": 141270, "total_steps": 204665, "loss": 0.0006, "lr": 5.297759859952411e-07, "epoch": 3.4512496030097966, "percentage": 69.02, "elapsed_time": "3:02:26", "remaining_time": "1:21:52", "throughput": 8697.31, "total_tokens": 95205264} +{"current_steps": 141275, "total_steps": 204665, "loss": 0.0002, "lr": 5.297007262073607e-07, "epoch": 3.4513717538416437, "percentage": 69.03, "elapsed_time": "3:02:26", "remaining_time": "1:21:51", "throughput": 8697.32, "total_tokens": 95208208} +{"current_steps": 141280, "total_steps": 204665, "loss": 0.1171, "lr": 5.296254698396227e-07, "epoch": 3.451493904673491, "percentage": 69.03, "elapsed_time": "3:02:27", "remaining_time": "1:21:51", "throughput": 8697.35, "total_tokens": 95211600} +{"current_steps": 141285, "total_steps": 204665, "loss": 0.0, "lr": 5.295502168925749e-07, "epoch": 3.451616055505338, "percentage": 69.03, "elapsed_time": "3:02:27", "remaining_time": "1:21:51", "throughput": 8697.36, "total_tokens": 95214608} +{"current_steps": 141290, "total_steps": 204665, "loss": 0.0, "lr": 5.294749673667646e-07, "epoch": 3.4517382063371853, "percentage": 69.03, "elapsed_time": "3:02:27", "remaining_time": "1:21:50", "throughput": 8697.36, "total_tokens": 95217616} +{"current_steps": 141295, "total_steps": 204665, "loss": 0.0, "lr": 5.293997212627388e-07, "epoch": 3.4518603571690325, "percentage": 69.04, "elapsed_time": "3:02:28", "remaining_time": "1:21:50", "throughput": 8697.4, "total_tokens": 95221072} +{"current_steps": 141300, "total_steps": 204665, "loss": 0.0, "lr": 5.293244785810451e-07, "epoch": 3.4519825080008797, "percentage": 69.04, "elapsed_time": "3:02:28", "remaining_time": "1:21:49", "throughput": 8697.42, "total_tokens": 95224208} +{"current_steps": 141305, "total_steps": 204665, "loss": 0.0, "lr": 5.292492393222299e-07, "epoch": 3.4521046588327264, "percentage": 69.04, "elapsed_time": "3:02:28", "remaining_time": "1:21:49", "throughput": 8697.45, "total_tokens": 95227536} +{"current_steps": 141310, "total_steps": 204665, "loss": 0.0001, "lr": 5.291740034868413e-07, "epoch": 3.4522268096645736, "percentage": 69.04, "elapsed_time": "3:02:29", "remaining_time": "1:21:48", "throughput": 8697.46, "total_tokens": 95230736} +{"current_steps": 141315, "total_steps": 204665, "loss": 0.0001, "lr": 5.290987710754256e-07, "epoch": 3.452348960496421, "percentage": 69.05, "elapsed_time": "3:02:29", "remaining_time": "1:21:48", "throughput": 8697.52, "total_tokens": 95234384} +{"current_steps": 141320, "total_steps": 204665, "loss": 0.0, "lr": 5.290235420885307e-07, "epoch": 3.452471111328268, "percentage": 69.05, "elapsed_time": "3:02:29", "remaining_time": "1:21:48", "throughput": 8697.56, "total_tokens": 95237904} +{"current_steps": 141325, "total_steps": 204665, "loss": 0.0, "lr": 5.289483165267033e-07, "epoch": 3.452593262160115, "percentage": 69.05, "elapsed_time": "3:02:30", "remaining_time": "1:21:47", "throughput": 8697.56, "total_tokens": 95240848} +{"current_steps": 141330, "total_steps": 204665, "loss": 0.0348, "lr": 5.2887309439049e-07, "epoch": 3.4527154129919624, "percentage": 69.05, "elapsed_time": "3:02:30", "remaining_time": "1:21:47", "throughput": 8697.58, "total_tokens": 95243984} +{"current_steps": 141335, "total_steps": 204665, "loss": 0.0439, "lr": 5.287978756804387e-07, "epoch": 3.4528375638238096, "percentage": 69.06, "elapsed_time": "3:02:30", "remaining_time": "1:21:46", "throughput": 8697.61, "total_tokens": 95247376} +{"current_steps": 141340, "total_steps": 204665, "loss": 0.0, "lr": 5.287226603970956e-07, "epoch": 3.4529597146556568, "percentage": 69.06, "elapsed_time": "3:02:31", "remaining_time": "1:21:46", "throughput": 8697.63, "total_tokens": 95250576} +{"current_steps": 141345, "total_steps": 204665, "loss": 0.0685, "lr": 5.28647448541008e-07, "epoch": 3.453081865487504, "percentage": 69.06, "elapsed_time": "3:02:31", "remaining_time": "1:21:46", "throughput": 8697.66, "total_tokens": 95253904} +{"current_steps": 141350, "total_steps": 204665, "loss": 0.0, "lr": 5.285722401127234e-07, "epoch": 3.453204016319351, "percentage": 69.06, "elapsed_time": "3:02:31", "remaining_time": "1:21:45", "throughput": 8697.64, "total_tokens": 95256592} +{"current_steps": 141355, "total_steps": 204665, "loss": 0.0, "lr": 5.284970351127878e-07, "epoch": 3.4533261671511983, "percentage": 69.07, "elapsed_time": "3:02:32", "remaining_time": "1:21:45", "throughput": 8697.69, "total_tokens": 95260112} +{"current_steps": 141360, "total_steps": 204665, "loss": 0.0001, "lr": 5.284218335417489e-07, "epoch": 3.4534483179830455, "percentage": 69.07, "elapsed_time": "3:02:32", "remaining_time": "1:21:44", "throughput": 8697.71, "total_tokens": 95263312} +{"current_steps": 141365, "total_steps": 204665, "loss": 0.0, "lr": 5.283466354001531e-07, "epoch": 3.4535704688148927, "percentage": 69.07, "elapsed_time": "3:02:33", "remaining_time": "1:21:44", "throughput": 8697.75, "total_tokens": 95266832} +{"current_steps": 141370, "total_steps": 204665, "loss": 0.0, "lr": 5.282714406885478e-07, "epoch": 3.45369261964674, "percentage": 69.07, "elapsed_time": "3:02:33", "remaining_time": "1:21:44", "throughput": 8697.78, "total_tokens": 95270160} +{"current_steps": 141375, "total_steps": 204665, "loss": 0.044, "lr": 5.28196249407479e-07, "epoch": 3.453814770478587, "percentage": 69.08, "elapsed_time": "3:02:33", "remaining_time": "1:21:43", "throughput": 8697.78, "total_tokens": 95273168} +{"current_steps": 141380, "total_steps": 204665, "loss": 0.0, "lr": 5.281210615574939e-07, "epoch": 3.4539369213104343, "percentage": 69.08, "elapsed_time": "3:02:34", "remaining_time": "1:21:43", "throughput": 8697.81, "total_tokens": 95276496} +{"current_steps": 141385, "total_steps": 204665, "loss": 0.0001, "lr": 5.280458771391398e-07, "epoch": 3.4540590721422815, "percentage": 69.08, "elapsed_time": "3:02:34", "remaining_time": "1:21:42", "throughput": 8697.84, "total_tokens": 95279760} +{"current_steps": 141390, "total_steps": 204665, "loss": 0.0006, "lr": 5.279706961529627e-07, "epoch": 3.454181222974128, "percentage": 69.08, "elapsed_time": "3:02:34", "remaining_time": "1:21:42", "throughput": 8697.91, "total_tokens": 95283728} +{"current_steps": 141395, "total_steps": 204665, "loss": 0.108, "lr": 5.2789551859951e-07, "epoch": 3.454303373805976, "percentage": 69.09, "elapsed_time": "3:02:35", "remaining_time": "1:21:42", "throughput": 8697.94, "total_tokens": 95286992} +{"current_steps": 141400, "total_steps": 204665, "loss": 0.0003, "lr": 5.278203444793276e-07, "epoch": 3.4544255246378226, "percentage": 69.09, "elapsed_time": "3:02:35", "remaining_time": "1:21:41", "throughput": 8697.98, "total_tokens": 95290512} +{"current_steps": 141405, "total_steps": 204665, "loss": 0.0, "lr": 5.277451737929628e-07, "epoch": 3.45454767546967, "percentage": 69.09, "elapsed_time": "3:02:35", "remaining_time": "1:21:41", "throughput": 8698.01, "total_tokens": 95293904} +{"current_steps": 141410, "total_steps": 204665, "loss": 0.0, "lr": 5.276700065409623e-07, "epoch": 3.454669826301517, "percentage": 69.09, "elapsed_time": "3:02:36", "remaining_time": "1:21:40", "throughput": 8698.06, "total_tokens": 95297552} +{"current_steps": 141415, "total_steps": 204665, "loss": 0.0, "lr": 5.275948427238727e-07, "epoch": 3.454791977133364, "percentage": 69.1, "elapsed_time": "3:02:36", "remaining_time": "1:21:40", "throughput": 8698.11, "total_tokens": 95301200} +{"current_steps": 141420, "total_steps": 204665, "loss": 0.0, "lr": 5.275196823422401e-07, "epoch": 3.4549141279652114, "percentage": 69.1, "elapsed_time": "3:02:36", "remaining_time": "1:21:40", "throughput": 8698.18, "total_tokens": 95304976} +{"current_steps": 141425, "total_steps": 204665, "loss": 0.0002, "lr": 5.274445253966118e-07, "epoch": 3.4550362787970585, "percentage": 69.1, "elapsed_time": "3:02:37", "remaining_time": "1:21:39", "throughput": 8698.2, "total_tokens": 95308304} +{"current_steps": 141430, "total_steps": 204665, "loss": 0.0, "lr": 5.273693718875336e-07, "epoch": 3.4551584296289057, "percentage": 69.1, "elapsed_time": "3:02:37", "remaining_time": "1:21:39", "throughput": 8698.24, "total_tokens": 95311696} +{"current_steps": 141435, "total_steps": 204665, "loss": 0.0, "lr": 5.27294221815553e-07, "epoch": 3.455280580460753, "percentage": 69.11, "elapsed_time": "3:02:37", "remaining_time": "1:21:38", "throughput": 8698.26, "total_tokens": 95314960} +{"current_steps": 141440, "total_steps": 204665, "loss": 0.0129, "lr": 5.272190751812154e-07, "epoch": 3.4554027312926, "percentage": 69.11, "elapsed_time": "3:02:38", "remaining_time": "1:21:38", "throughput": 8698.32, "total_tokens": 95318672} +{"current_steps": 141445, "total_steps": 204665, "loss": 0.0, "lr": 5.271439319850682e-07, "epoch": 3.4555248821244473, "percentage": 69.11, "elapsed_time": "3:02:38", "remaining_time": "1:21:38", "throughput": 8698.36, "total_tokens": 95322128} +{"current_steps": 141450, "total_steps": 204665, "loss": 0.0001, "lr": 5.270687922276573e-07, "epoch": 3.4556470329562945, "percentage": 69.11, "elapsed_time": "3:02:38", "remaining_time": "1:21:37", "throughput": 8698.37, "total_tokens": 95325264} +{"current_steps": 141455, "total_steps": 204665, "loss": 0.0002, "lr": 5.269936559095296e-07, "epoch": 3.4557691837881417, "percentage": 69.12, "elapsed_time": "3:02:39", "remaining_time": "1:21:37", "throughput": 8698.38, "total_tokens": 95328336} +{"current_steps": 141460, "total_steps": 204665, "loss": 0.0, "lr": 5.269185230312309e-07, "epoch": 3.455891334619989, "percentage": 69.12, "elapsed_time": "3:02:39", "remaining_time": "1:21:36", "throughput": 8698.41, "total_tokens": 95331664} +{"current_steps": 141465, "total_steps": 204665, "loss": 0.0, "lr": 5.268433935933079e-07, "epoch": 3.456013485451836, "percentage": 69.12, "elapsed_time": "3:02:40", "remaining_time": "1:21:36", "throughput": 8698.41, "total_tokens": 95334608} +{"current_steps": 141470, "total_steps": 204665, "loss": 0.0, "lr": 5.267682675963074e-07, "epoch": 3.4561356362836833, "percentage": 69.12, "elapsed_time": "3:02:40", "remaining_time": "1:21:36", "throughput": 8698.47, "total_tokens": 95338256} +{"current_steps": 141475, "total_steps": 204665, "loss": 0.0064, "lr": 5.26693145040775e-07, "epoch": 3.4562577871155304, "percentage": 69.13, "elapsed_time": "3:02:40", "remaining_time": "1:21:35", "throughput": 8698.49, "total_tokens": 95341584} +{"current_steps": 141480, "total_steps": 204665, "loss": 0.0, "lr": 5.266180259272578e-07, "epoch": 3.4563799379473776, "percentage": 69.13, "elapsed_time": "3:02:41", "remaining_time": "1:21:35", "throughput": 8698.51, "total_tokens": 95344720} +{"current_steps": 141485, "total_steps": 204665, "loss": 0.0002, "lr": 5.265429102563012e-07, "epoch": 3.4565020887792244, "percentage": 69.13, "elapsed_time": "3:02:41", "remaining_time": "1:21:34", "throughput": 8698.51, "total_tokens": 95347728} +{"current_steps": 141490, "total_steps": 204665, "loss": 0.0, "lr": 5.264677980284522e-07, "epoch": 3.4566242396110716, "percentage": 69.13, "elapsed_time": "3:02:41", "remaining_time": "1:21:34", "throughput": 8698.53, "total_tokens": 95350928} +{"current_steps": 141495, "total_steps": 204665, "loss": 0.0, "lr": 5.263926892442565e-07, "epoch": 3.4567463904429188, "percentage": 69.13, "elapsed_time": "3:02:42", "remaining_time": "1:21:33", "throughput": 8698.54, "total_tokens": 95354000} +{"current_steps": 141500, "total_steps": 204665, "loss": 0.0, "lr": 5.263175839042604e-07, "epoch": 3.456868541274766, "percentage": 69.14, "elapsed_time": "3:02:42", "remaining_time": "1:21:33", "throughput": 8698.57, "total_tokens": 95357328} +{"current_steps": 141505, "total_steps": 204665, "loss": 0.0, "lr": 5.262424820090108e-07, "epoch": 3.456990692106613, "percentage": 69.14, "elapsed_time": "3:02:42", "remaining_time": "1:21:33", "throughput": 8698.6, "total_tokens": 95360592} +{"current_steps": 141510, "total_steps": 204665, "loss": 0.0002, "lr": 5.261673835590527e-07, "epoch": 3.4571128429384603, "percentage": 69.14, "elapsed_time": "3:02:43", "remaining_time": "1:21:32", "throughput": 8698.61, "total_tokens": 95363664} +{"current_steps": 141515, "total_steps": 204665, "loss": 0.0, "lr": 5.260922885549333e-07, "epoch": 3.4572349937703075, "percentage": 69.14, "elapsed_time": "3:02:43", "remaining_time": "1:21:32", "throughput": 8698.62, "total_tokens": 95366736} +{"current_steps": 141520, "total_steps": 204665, "loss": 0.0004, "lr": 5.260171969971981e-07, "epoch": 3.4573571446021547, "percentage": 69.15, "elapsed_time": "3:02:43", "remaining_time": "1:21:31", "throughput": 8698.66, "total_tokens": 95370256} +{"current_steps": 141525, "total_steps": 204665, "loss": 0.0, "lr": 5.259421088863934e-07, "epoch": 3.457479295434002, "percentage": 69.15, "elapsed_time": "3:02:44", "remaining_time": "1:21:31", "throughput": 8698.68, "total_tokens": 95373392} +{"current_steps": 141530, "total_steps": 204665, "loss": 0.0, "lr": 5.258670242230653e-07, "epoch": 3.457601446265849, "percentage": 69.15, "elapsed_time": "3:02:44", "remaining_time": "1:21:31", "throughput": 8698.71, "total_tokens": 95376784} +{"current_steps": 141535, "total_steps": 204665, "loss": 0.0, "lr": 5.257919430077594e-07, "epoch": 3.4577235970976963, "percentage": 69.15, "elapsed_time": "3:02:44", "remaining_time": "1:21:30", "throughput": 8698.74, "total_tokens": 95380048} +{"current_steps": 141540, "total_steps": 204665, "loss": 0.0, "lr": 5.257168652410223e-07, "epoch": 3.4578457479295435, "percentage": 69.16, "elapsed_time": "3:02:45", "remaining_time": "1:21:30", "throughput": 8698.77, "total_tokens": 95383440} +{"current_steps": 141545, "total_steps": 204665, "loss": 0.0002, "lr": 5.256417909233993e-07, "epoch": 3.4579678987613907, "percentage": 69.16, "elapsed_time": "3:02:45", "remaining_time": "1:21:29", "throughput": 8698.82, "total_tokens": 95387088} +{"current_steps": 141550, "total_steps": 204665, "loss": 0.0, "lr": 5.255667200554372e-07, "epoch": 3.458090049593238, "percentage": 69.16, "elapsed_time": "3:02:45", "remaining_time": "1:21:29", "throughput": 8698.86, "total_tokens": 95390544} +{"current_steps": 141555, "total_steps": 204665, "loss": 0.0, "lr": 5.254916526376813e-07, "epoch": 3.458212200425085, "percentage": 69.16, "elapsed_time": "3:02:46", "remaining_time": "1:21:29", "throughput": 8698.92, "total_tokens": 95394256} +{"current_steps": 141560, "total_steps": 204665, "loss": 0.0, "lr": 5.254165886706776e-07, "epoch": 3.4583343512569322, "percentage": 69.17, "elapsed_time": "3:02:46", "remaining_time": "1:21:28", "throughput": 8698.94, "total_tokens": 95397584} +{"current_steps": 141565, "total_steps": 204665, "loss": 0.0, "lr": 5.253415281549726e-07, "epoch": 3.4584565020887794, "percentage": 69.17, "elapsed_time": "3:02:46", "remaining_time": "1:21:28", "throughput": 8699.0, "total_tokens": 95401232} +{"current_steps": 141570, "total_steps": 204665, "loss": 0.0395, "lr": 5.252664710911112e-07, "epoch": 3.458578652920626, "percentage": 69.17, "elapsed_time": "3:02:47", "remaining_time": "1:21:27", "throughput": 8699.0, "total_tokens": 95404176} +{"current_steps": 141575, "total_steps": 204665, "loss": 0.1123, "lr": 5.251914174796401e-07, "epoch": 3.458700803752474, "percentage": 69.17, "elapsed_time": "3:02:47", "remaining_time": "1:21:27", "throughput": 8699.06, "total_tokens": 95407952} +{"current_steps": 141580, "total_steps": 204665, "loss": 0.0645, "lr": 5.251163673211043e-07, "epoch": 3.4588229545843205, "percentage": 69.18, "elapsed_time": "3:02:47", "remaining_time": "1:21:27", "throughput": 8699.08, "total_tokens": 95411216} +{"current_steps": 141585, "total_steps": 204665, "loss": 0.0, "lr": 5.250413206160505e-07, "epoch": 3.4589451054161677, "percentage": 69.18, "elapsed_time": "3:02:48", "remaining_time": "1:21:26", "throughput": 8699.11, "total_tokens": 95414480} +{"current_steps": 141590, "total_steps": 204665, "loss": 0.0, "lr": 5.249662773650235e-07, "epoch": 3.459067256248015, "percentage": 69.18, "elapsed_time": "3:02:48", "remaining_time": "1:21:26", "throughput": 8699.14, "total_tokens": 95417808} +{"current_steps": 141595, "total_steps": 204665, "loss": 0.0, "lr": 5.248912375685694e-07, "epoch": 3.459189407079862, "percentage": 69.18, "elapsed_time": "3:02:48", "remaining_time": "1:21:25", "throughput": 8699.18, "total_tokens": 95421264} +{"current_steps": 141600, "total_steps": 204665, "loss": 0.0, "lr": 5.248162012272345e-07, "epoch": 3.4593115579117093, "percentage": 69.19, "elapsed_time": "3:02:49", "remaining_time": "1:21:25", "throughput": 8699.18, "total_tokens": 95424272} +{"current_steps": 141605, "total_steps": 204665, "loss": 0.0, "lr": 5.247411683415637e-07, "epoch": 3.4594337087435565, "percentage": 69.19, "elapsed_time": "3:02:49", "remaining_time": "1:21:25", "throughput": 8699.22, "total_tokens": 95427728} +{"current_steps": 141610, "total_steps": 204665, "loss": 0.0, "lr": 5.246661389121032e-07, "epoch": 3.4595558595754037, "percentage": 69.19, "elapsed_time": "3:02:50", "remaining_time": "1:21:24", "throughput": 8699.25, "total_tokens": 95430992} +{"current_steps": 141615, "total_steps": 204665, "loss": 0.0, "lr": 5.24591112939398e-07, "epoch": 3.459678010407251, "percentage": 69.19, "elapsed_time": "3:02:50", "remaining_time": "1:21:24", "throughput": 8699.28, "total_tokens": 95434448} +{"current_steps": 141620, "total_steps": 204665, "loss": 0.0, "lr": 5.245160904239947e-07, "epoch": 3.459800161239098, "percentage": 69.2, "elapsed_time": "3:02:50", "remaining_time": "1:21:23", "throughput": 8699.33, "total_tokens": 95438032} +{"current_steps": 141625, "total_steps": 204665, "loss": 0.0, "lr": 5.244410713664377e-07, "epoch": 3.4599223120709452, "percentage": 69.2, "elapsed_time": "3:02:51", "remaining_time": "1:21:23", "throughput": 8699.34, "total_tokens": 95441040} +{"current_steps": 141630, "total_steps": 204665, "loss": 0.0, "lr": 5.243660557672736e-07, "epoch": 3.4600444629027924, "percentage": 69.2, "elapsed_time": "3:02:51", "remaining_time": "1:21:23", "throughput": 8699.4, "total_tokens": 95444816} +{"current_steps": 141635, "total_steps": 204665, "loss": 0.0, "lr": 5.242910436270474e-07, "epoch": 3.4601666137346396, "percentage": 69.2, "elapsed_time": "3:02:51", "remaining_time": "1:21:22", "throughput": 8699.39, "total_tokens": 95447696} +{"current_steps": 141640, "total_steps": 204665, "loss": 0.0, "lr": 5.242160349463043e-07, "epoch": 3.460288764566487, "percentage": 69.21, "elapsed_time": "3:02:52", "remaining_time": "1:21:22", "throughput": 8699.42, "total_tokens": 95450896} +{"current_steps": 141645, "total_steps": 204665, "loss": 0.0738, "lr": 5.241410297255906e-07, "epoch": 3.460410915398334, "percentage": 69.21, "elapsed_time": "3:02:52", "remaining_time": "1:21:21", "throughput": 8699.43, "total_tokens": 95454096} +{"current_steps": 141650, "total_steps": 204665, "loss": 0.0, "lr": 5.240660279654509e-07, "epoch": 3.460533066230181, "percentage": 69.21, "elapsed_time": "3:02:52", "remaining_time": "1:21:21", "throughput": 8699.46, "total_tokens": 95457360} +{"current_steps": 141655, "total_steps": 204665, "loss": 0.0108, "lr": 5.239910296664311e-07, "epoch": 3.460655217062028, "percentage": 69.21, "elapsed_time": "3:02:53", "remaining_time": "1:21:20", "throughput": 8699.48, "total_tokens": 95460560} +{"current_steps": 141660, "total_steps": 204665, "loss": 0.0001, "lr": 5.239160348290769e-07, "epoch": 3.4607773678938756, "percentage": 69.22, "elapsed_time": "3:02:53", "remaining_time": "1:21:20", "throughput": 8699.55, "total_tokens": 95464400} +{"current_steps": 141665, "total_steps": 204665, "loss": 0.0395, "lr": 5.238410434539329e-07, "epoch": 3.4608995187257223, "percentage": 69.22, "elapsed_time": "3:02:53", "remaining_time": "1:21:20", "throughput": 8699.58, "total_tokens": 95467792} +{"current_steps": 141670, "total_steps": 204665, "loss": 0.0, "lr": 5.237660555415454e-07, "epoch": 3.4610216695575695, "percentage": 69.22, "elapsed_time": "3:02:54", "remaining_time": "1:21:19", "throughput": 8699.59, "total_tokens": 95470928} +{"current_steps": 141675, "total_steps": 204665, "loss": 0.0003, "lr": 5.236910710924588e-07, "epoch": 3.4611438203894167, "percentage": 69.22, "elapsed_time": "3:02:54", "remaining_time": "1:21:19", "throughput": 8699.61, "total_tokens": 95474128} +{"current_steps": 141680, "total_steps": 204665, "loss": 0.0001, "lr": 5.236160901072193e-07, "epoch": 3.461265971221264, "percentage": 69.23, "elapsed_time": "3:02:54", "remaining_time": "1:21:18", "throughput": 8699.66, "total_tokens": 95477712} +{"current_steps": 141685, "total_steps": 204665, "loss": 0.0, "lr": 5.235411125863713e-07, "epoch": 3.461388122053111, "percentage": 69.23, "elapsed_time": "3:02:55", "remaining_time": "1:21:18", "throughput": 8699.68, "total_tokens": 95480912} +{"current_steps": 141690, "total_steps": 204665, "loss": 0.0, "lr": 5.234661385304603e-07, "epoch": 3.4615102728849583, "percentage": 69.23, "elapsed_time": "3:02:55", "remaining_time": "1:21:18", "throughput": 8699.69, "total_tokens": 95483920} +{"current_steps": 141695, "total_steps": 204665, "loss": 0.0, "lr": 5.233911679400324e-07, "epoch": 3.4616324237168055, "percentage": 69.23, "elapsed_time": "3:02:55", "remaining_time": "1:21:17", "throughput": 8699.7, "total_tokens": 95486992} +{"current_steps": 141700, "total_steps": 204665, "loss": 0.0, "lr": 5.233162008156316e-07, "epoch": 3.4617545745486527, "percentage": 69.24, "elapsed_time": "3:02:56", "remaining_time": "1:21:17", "throughput": 8699.74, "total_tokens": 95490448} +{"current_steps": 141705, "total_steps": 204665, "loss": 0.0, "lr": 5.232412371578038e-07, "epoch": 3.4618767253805, "percentage": 69.24, "elapsed_time": "3:02:56", "remaining_time": "1:21:16", "throughput": 8699.78, "total_tokens": 95494032} +{"current_steps": 141710, "total_steps": 204665, "loss": 0.0, "lr": 5.231662769670939e-07, "epoch": 3.461998876212347, "percentage": 69.24, "elapsed_time": "3:02:56", "remaining_time": "1:21:16", "throughput": 8699.83, "total_tokens": 95497616} +{"current_steps": 141715, "total_steps": 204665, "loss": 0.0917, "lr": 5.230913202440469e-07, "epoch": 3.462121027044194, "percentage": 69.24, "elapsed_time": "3:02:57", "remaining_time": "1:21:16", "throughput": 8699.89, "total_tokens": 95501328} +{"current_steps": 141720, "total_steps": 204665, "loss": 0.0172, "lr": 5.230163669892085e-07, "epoch": 3.4622431778760414, "percentage": 69.24, "elapsed_time": "3:02:57", "remaining_time": "1:21:15", "throughput": 8699.92, "total_tokens": 95504784} +{"current_steps": 141725, "total_steps": 204665, "loss": 0.0, "lr": 5.229414172031235e-07, "epoch": 3.4623653287078886, "percentage": 69.25, "elapsed_time": "3:02:58", "remaining_time": "1:21:15", "throughput": 8699.94, "total_tokens": 95507920} +{"current_steps": 141730, "total_steps": 204665, "loss": 0.0, "lr": 5.228664708863362e-07, "epoch": 3.462487479539736, "percentage": 69.25, "elapsed_time": "3:02:58", "remaining_time": "1:21:14", "throughput": 8699.97, "total_tokens": 95511248} +{"current_steps": 141735, "total_steps": 204665, "loss": 0.0004, "lr": 5.227915280393928e-07, "epoch": 3.462609630371583, "percentage": 69.25, "elapsed_time": "3:02:58", "remaining_time": "1:21:14", "throughput": 8699.97, "total_tokens": 95514256} +{"current_steps": 141740, "total_steps": 204665, "loss": 0.0, "lr": 5.227165886628373e-07, "epoch": 3.46273178120343, "percentage": 69.25, "elapsed_time": "3:02:59", "remaining_time": "1:21:14", "throughput": 8699.98, "total_tokens": 95517392} +{"current_steps": 141745, "total_steps": 204665, "loss": 0.0, "lr": 5.226416527572157e-07, "epoch": 3.4628539320352774, "percentage": 69.26, "elapsed_time": "3:02:59", "remaining_time": "1:21:13", "throughput": 8700.03, "total_tokens": 95520976} +{"current_steps": 141750, "total_steps": 204665, "loss": 0.0, "lr": 5.225667203230719e-07, "epoch": 3.462976082867124, "percentage": 69.26, "elapsed_time": "3:02:59", "remaining_time": "1:21:13", "throughput": 8700.07, "total_tokens": 95524496} +{"current_steps": 141755, "total_steps": 204665, "loss": 0.0, "lr": 5.224917913609517e-07, "epoch": 3.4630982336989717, "percentage": 69.26, "elapsed_time": "3:03:00", "remaining_time": "1:21:12", "throughput": 8700.13, "total_tokens": 95528208} +{"current_steps": 141760, "total_steps": 204665, "loss": 0.0, "lr": 5.224168658713992e-07, "epoch": 3.4632203845308185, "percentage": 69.26, "elapsed_time": "3:03:00", "remaining_time": "1:21:12", "throughput": 8700.18, "total_tokens": 95531856} +{"current_steps": 141765, "total_steps": 204665, "loss": 0.0, "lr": 5.223419438549601e-07, "epoch": 3.4633425353626657, "percentage": 69.27, "elapsed_time": "3:03:00", "remaining_time": "1:21:12", "throughput": 8700.2, "total_tokens": 95535120} +{"current_steps": 141770, "total_steps": 204665, "loss": 0.0, "lr": 5.222670253121786e-07, "epoch": 3.463464686194513, "percentage": 69.27, "elapsed_time": "3:03:01", "remaining_time": "1:21:11", "throughput": 8700.21, "total_tokens": 95538192} +{"current_steps": 141775, "total_steps": 204665, "loss": 0.0, "lr": 5.221921102435995e-07, "epoch": 3.46358683702636, "percentage": 69.27, "elapsed_time": "3:03:01", "remaining_time": "1:21:11", "throughput": 8700.22, "total_tokens": 95541200} +{"current_steps": 141780, "total_steps": 204665, "loss": 0.0001, "lr": 5.221171986497686e-07, "epoch": 3.4637089878582072, "percentage": 69.27, "elapsed_time": "3:03:01", "remaining_time": "1:21:10", "throughput": 8700.27, "total_tokens": 95544784} +{"current_steps": 141785, "total_steps": 204665, "loss": 0.0406, "lr": 5.220422905312293e-07, "epoch": 3.4638311386900544, "percentage": 69.28, "elapsed_time": "3:03:02", "remaining_time": "1:21:10", "throughput": 8700.31, "total_tokens": 95548304} +{"current_steps": 141790, "total_steps": 204665, "loss": 0.0, "lr": 5.219673858885276e-07, "epoch": 3.4639532895219016, "percentage": 69.28, "elapsed_time": "3:03:02", "remaining_time": "1:21:10", "throughput": 8700.31, "total_tokens": 95551312} +{"current_steps": 141795, "total_steps": 204665, "loss": 0.0, "lr": 5.218924847222073e-07, "epoch": 3.464075440353749, "percentage": 69.28, "elapsed_time": "3:03:02", "remaining_time": "1:21:09", "throughput": 8700.34, "total_tokens": 95554640} +{"current_steps": 141800, "total_steps": 204665, "loss": 0.0, "lr": 5.218175870328136e-07, "epoch": 3.464197591185596, "percentage": 69.28, "elapsed_time": "3:03:03", "remaining_time": "1:21:09", "throughput": 8700.34, "total_tokens": 95557520} +{"current_steps": 141805, "total_steps": 204665, "loss": 0.0, "lr": 5.217426928208908e-07, "epoch": 3.464319742017443, "percentage": 69.29, "elapsed_time": "3:03:03", "remaining_time": "1:21:08", "throughput": 8700.35, "total_tokens": 95560592} +{"current_steps": 141810, "total_steps": 204665, "loss": 0.0, "lr": 5.216678020869838e-07, "epoch": 3.4644418928492904, "percentage": 69.29, "elapsed_time": "3:03:03", "remaining_time": "1:21:08", "throughput": 8700.34, "total_tokens": 95563408} +{"current_steps": 141815, "total_steps": 204665, "loss": 0.0001, "lr": 5.215929148316376e-07, "epoch": 3.4645640436811376, "percentage": 69.29, "elapsed_time": "3:03:04", "remaining_time": "1:21:08", "throughput": 8700.39, "total_tokens": 95566992} +{"current_steps": 141820, "total_steps": 204665, "loss": 0.0, "lr": 5.215180310553959e-07, "epoch": 3.4646861945129848, "percentage": 69.29, "elapsed_time": "3:03:04", "remaining_time": "1:21:07", "throughput": 8700.42, "total_tokens": 95570384} +{"current_steps": 141825, "total_steps": 204665, "loss": 0.0781, "lr": 5.214431507588042e-07, "epoch": 3.464808345344832, "percentage": 69.3, "elapsed_time": "3:03:04", "remaining_time": "1:21:07", "throughput": 8700.44, "total_tokens": 95573648} +{"current_steps": 141830, "total_steps": 204665, "loss": 0.0, "lr": 5.213682739424063e-07, "epoch": 3.464930496176679, "percentage": 69.3, "elapsed_time": "3:03:05", "remaining_time": "1:21:06", "throughput": 8700.49, "total_tokens": 95577232} +{"current_steps": 141835, "total_steps": 204665, "loss": 0.0, "lr": 5.212934006067474e-07, "epoch": 3.465052647008526, "percentage": 69.3, "elapsed_time": "3:03:05", "remaining_time": "1:21:06", "throughput": 8700.53, "total_tokens": 95580752} +{"current_steps": 141840, "total_steps": 204665, "loss": 0.0, "lr": 5.212185307523716e-07, "epoch": 3.4651747978403735, "percentage": 69.3, "elapsed_time": "3:03:05", "remaining_time": "1:21:05", "throughput": 8700.55, "total_tokens": 95583888} +{"current_steps": 141845, "total_steps": 204665, "loss": 0.0, "lr": 5.211436643798231e-07, "epoch": 3.4652969486722203, "percentage": 69.31, "elapsed_time": "3:03:06", "remaining_time": "1:21:05", "throughput": 8700.6, "total_tokens": 95587536} +{"current_steps": 141850, "total_steps": 204665, "loss": 0.0236, "lr": 5.21068801489647e-07, "epoch": 3.4654190995040675, "percentage": 69.31, "elapsed_time": "3:03:06", "remaining_time": "1:21:05", "throughput": 8700.64, "total_tokens": 95590992} +{"current_steps": 141855, "total_steps": 204665, "loss": 0.0, "lr": 5.20993942082387e-07, "epoch": 3.4655412503359146, "percentage": 69.31, "elapsed_time": "3:03:07", "remaining_time": "1:21:04", "throughput": 8700.7, "total_tokens": 95594832} +{"current_steps": 141860, "total_steps": 204665, "loss": 0.0001, "lr": 5.209190861585883e-07, "epoch": 3.465663401167762, "percentage": 69.31, "elapsed_time": "3:03:07", "remaining_time": "1:21:04", "throughput": 8700.74, "total_tokens": 95598352} +{"current_steps": 141865, "total_steps": 204665, "loss": 0.0001, "lr": 5.208442337187945e-07, "epoch": 3.465785551999609, "percentage": 69.32, "elapsed_time": "3:03:07", "remaining_time": "1:21:03", "throughput": 8700.76, "total_tokens": 95601616} +{"current_steps": 141870, "total_steps": 204665, "loss": 0.0, "lr": 5.207693847635503e-07, "epoch": 3.465907702831456, "percentage": 69.32, "elapsed_time": "3:03:08", "remaining_time": "1:21:03", "throughput": 8700.8, "total_tokens": 95605072} +{"current_steps": 141875, "total_steps": 204665, "loss": 0.0, "lr": 5.206945392934004e-07, "epoch": 3.4660298536633034, "percentage": 69.32, "elapsed_time": "3:03:08", "remaining_time": "1:21:03", "throughput": 8700.87, "total_tokens": 95608912} +{"current_steps": 141880, "total_steps": 204665, "loss": 0.0096, "lr": 5.206196973088884e-07, "epoch": 3.4661520044951506, "percentage": 69.32, "elapsed_time": "3:03:08", "remaining_time": "1:21:02", "throughput": 8701.05, "total_tokens": 95614480} +{"current_steps": 141885, "total_steps": 204665, "loss": 0.0477, "lr": 5.205448588105592e-07, "epoch": 3.466274155326998, "percentage": 69.33, "elapsed_time": "3:03:09", "remaining_time": "1:21:02", "throughput": 8701.11, "total_tokens": 95618192} +{"current_steps": 141890, "total_steps": 204665, "loss": 0.0, "lr": 5.204700237989563e-07, "epoch": 3.466396306158845, "percentage": 69.33, "elapsed_time": "3:03:09", "remaining_time": "1:21:01", "throughput": 8701.1, "total_tokens": 95620944} +{"current_steps": 141895, "total_steps": 204665, "loss": 0.0, "lr": 5.203951922746249e-07, "epoch": 3.466518456990692, "percentage": 69.33, "elapsed_time": "3:03:09", "remaining_time": "1:21:01", "throughput": 8701.13, "total_tokens": 95624400} +{"current_steps": 141900, "total_steps": 204665, "loss": 0.0, "lr": 5.203203642381083e-07, "epoch": 3.4666406078225394, "percentage": 69.33, "elapsed_time": "3:03:10", "remaining_time": "1:21:01", "throughput": 8701.16, "total_tokens": 95627664} +{"current_steps": 141905, "total_steps": 204665, "loss": 0.0, "lr": 5.202455396899508e-07, "epoch": 3.4667627586543865, "percentage": 69.34, "elapsed_time": "3:03:10", "remaining_time": "1:21:00", "throughput": 8701.18, "total_tokens": 95630928} +{"current_steps": 141910, "total_steps": 204665, "loss": 0.0, "lr": 5.201707186306974e-07, "epoch": 3.4668849094862337, "percentage": 69.34, "elapsed_time": "3:03:10", "remaining_time": "1:21:00", "throughput": 8701.18, "total_tokens": 95633808} +{"current_steps": 141915, "total_steps": 204665, "loss": 0.0, "lr": 5.20095901060891e-07, "epoch": 3.467007060318081, "percentage": 69.34, "elapsed_time": "3:03:11", "remaining_time": "1:20:59", "throughput": 8701.24, "total_tokens": 95637648} +{"current_steps": 141920, "total_steps": 204665, "loss": 0.0002, "lr": 5.200210869810768e-07, "epoch": 3.467129211149928, "percentage": 69.34, "elapsed_time": "3:03:11", "remaining_time": "1:20:59", "throughput": 8701.29, "total_tokens": 95641296} +{"current_steps": 141925, "total_steps": 204665, "loss": 0.0001, "lr": 5.19946276391798e-07, "epoch": 3.4672513619817753, "percentage": 69.35, "elapsed_time": "3:03:11", "remaining_time": "1:20:59", "throughput": 8701.32, "total_tokens": 95644624} +{"current_steps": 141930, "total_steps": 204665, "loss": 0.0, "lr": 5.198714692935993e-07, "epoch": 3.467373512813622, "percentage": 69.35, "elapsed_time": "3:03:12", "remaining_time": "1:20:58", "throughput": 8701.34, "total_tokens": 95647888} +{"current_steps": 141935, "total_steps": 204665, "loss": 0.0, "lr": 5.19796665687024e-07, "epoch": 3.4674956636454692, "percentage": 69.35, "elapsed_time": "3:03:12", "remaining_time": "1:20:58", "throughput": 8701.38, "total_tokens": 95651216} +{"current_steps": 141940, "total_steps": 204665, "loss": 0.0, "lr": 5.197218655726171e-07, "epoch": 3.4676178144773164, "percentage": 69.35, "elapsed_time": "3:03:12", "remaining_time": "1:20:57", "throughput": 8701.4, "total_tokens": 95654544} +{"current_steps": 141945, "total_steps": 204665, "loss": 0.0, "lr": 5.196470689509218e-07, "epoch": 3.4677399653091636, "percentage": 69.35, "elapsed_time": "3:03:13", "remaining_time": "1:20:57", "throughput": 8701.45, "total_tokens": 95658192} +{"current_steps": 141950, "total_steps": 204665, "loss": 0.0, "lr": 5.195722758224819e-07, "epoch": 3.467862116141011, "percentage": 69.36, "elapsed_time": "3:03:13", "remaining_time": "1:20:57", "throughput": 8701.54, "total_tokens": 95662352} +{"current_steps": 141955, "total_steps": 204665, "loss": 0.0, "lr": 5.19497486187842e-07, "epoch": 3.467984266972858, "percentage": 69.36, "elapsed_time": "3:03:14", "remaining_time": "1:20:56", "throughput": 8701.57, "total_tokens": 95665744} +{"current_steps": 141960, "total_steps": 204665, "loss": 0.0, "lr": 5.19422700047545e-07, "epoch": 3.468106417804705, "percentage": 69.36, "elapsed_time": "3:03:14", "remaining_time": "1:20:56", "throughput": 8701.61, "total_tokens": 95669200} +{"current_steps": 141965, "total_steps": 204665, "loss": 0.0, "lr": 5.193479174021358e-07, "epoch": 3.4682285686365524, "percentage": 69.36, "elapsed_time": "3:03:14", "remaining_time": "1:20:55", "throughput": 8701.64, "total_tokens": 95672592} +{"current_steps": 141970, "total_steps": 204665, "loss": 0.0675, "lr": 5.19273138252158e-07, "epoch": 3.4683507194683996, "percentage": 69.37, "elapsed_time": "3:03:15", "remaining_time": "1:20:55", "throughput": 8701.71, "total_tokens": 95676368} +{"current_steps": 141975, "total_steps": 204665, "loss": 0.0, "lr": 5.191983625981549e-07, "epoch": 3.4684728703002468, "percentage": 69.37, "elapsed_time": "3:03:15", "remaining_time": "1:20:55", "throughput": 8701.73, "total_tokens": 95679632} +{"current_steps": 141980, "total_steps": 204665, "loss": 0.0039, "lr": 5.191235904406709e-07, "epoch": 3.468595021132094, "percentage": 69.37, "elapsed_time": "3:03:15", "remaining_time": "1:20:54", "throughput": 8701.77, "total_tokens": 95683088} +{"current_steps": 141985, "total_steps": 204665, "loss": 0.0, "lr": 5.190488217802492e-07, "epoch": 3.468717171963941, "percentage": 69.37, "elapsed_time": "3:03:16", "remaining_time": "1:20:54", "throughput": 8701.82, "total_tokens": 95686672} +{"current_steps": 141990, "total_steps": 204665, "loss": 0.0, "lr": 5.189740566174341e-07, "epoch": 3.4688393227957883, "percentage": 69.38, "elapsed_time": "3:03:16", "remaining_time": "1:20:53", "throughput": 8701.88, "total_tokens": 95690512} +{"current_steps": 141995, "total_steps": 204665, "loss": 0.0399, "lr": 5.188992949527688e-07, "epoch": 3.4689614736276355, "percentage": 69.38, "elapsed_time": "3:03:16", "remaining_time": "1:20:53", "throughput": 8701.93, "total_tokens": 95694096} +{"current_steps": 142000, "total_steps": 204665, "loss": 0.0009, "lr": 5.188245367867971e-07, "epoch": 3.4690836244594827, "percentage": 69.38, "elapsed_time": "3:03:17", "remaining_time": "1:20:53", "throughput": 8701.95, "total_tokens": 95697296} +{"current_steps": 142005, "total_steps": 204665, "loss": 0.0556, "lr": 5.187497821200633e-07, "epoch": 3.46920577529133, "percentage": 69.38, "elapsed_time": "3:03:17", "remaining_time": "1:20:52", "throughput": 8701.98, "total_tokens": 95700688} +{"current_steps": 142010, "total_steps": 204665, "loss": 0.0, "lr": 5.1867503095311e-07, "epoch": 3.469327926123177, "percentage": 69.39, "elapsed_time": "3:03:17", "remaining_time": "1:20:52", "throughput": 8702.01, "total_tokens": 95704080} +{"current_steps": 142015, "total_steps": 204665, "loss": 0.0399, "lr": 5.186002832864819e-07, "epoch": 3.469450076955024, "percentage": 69.39, "elapsed_time": "3:03:18", "remaining_time": "1:20:51", "throughput": 8702.04, "total_tokens": 95707408} +{"current_steps": 142020, "total_steps": 204665, "loss": 0.0001, "lr": 5.185255391207215e-07, "epoch": 3.4695722277868715, "percentage": 69.39, "elapsed_time": "3:03:18", "remaining_time": "1:20:51", "throughput": 8702.07, "total_tokens": 95710800} +{"current_steps": 142025, "total_steps": 204665, "loss": 0.0, "lr": 5.18450798456373e-07, "epoch": 3.469694378618718, "percentage": 69.39, "elapsed_time": "3:03:18", "remaining_time": "1:20:51", "throughput": 8702.1, "total_tokens": 95714192} +{"current_steps": 142030, "total_steps": 204665, "loss": 0.0, "lr": 5.1837606129398e-07, "epoch": 3.4698165294505654, "percentage": 69.4, "elapsed_time": "3:03:19", "remaining_time": "1:20:50", "throughput": 8702.13, "total_tokens": 95717456} +{"current_steps": 142035, "total_steps": 204665, "loss": 0.0, "lr": 5.183013276340859e-07, "epoch": 3.4699386802824126, "percentage": 69.4, "elapsed_time": "3:03:19", "remaining_time": "1:20:50", "throughput": 8702.14, "total_tokens": 95720528} +{"current_steps": 142040, "total_steps": 204665, "loss": 0.0001, "lr": 5.182265974772339e-07, "epoch": 3.47006083111426, "percentage": 69.4, "elapsed_time": "3:03:20", "remaining_time": "1:20:49", "throughput": 8702.18, "total_tokens": 95724048} +{"current_steps": 142045, "total_steps": 204665, "loss": 0.0444, "lr": 5.181518708239679e-07, "epoch": 3.470182981946107, "percentage": 69.4, "elapsed_time": "3:03:20", "remaining_time": "1:20:49", "throughput": 8702.2, "total_tokens": 95727312} +{"current_steps": 142050, "total_steps": 204665, "loss": 0.0, "lr": 5.180771476748307e-07, "epoch": 3.470305132777954, "percentage": 69.41, "elapsed_time": "3:03:20", "remaining_time": "1:20:49", "throughput": 8702.23, "total_tokens": 95730640} +{"current_steps": 142055, "total_steps": 204665, "loss": 0.0, "lr": 5.180024280303665e-07, "epoch": 3.4704272836098013, "percentage": 69.41, "elapsed_time": "3:03:21", "remaining_time": "1:20:48", "throughput": 8702.24, "total_tokens": 95733648} +{"current_steps": 142060, "total_steps": 204665, "loss": 0.0, "lr": 5.17927711891118e-07, "epoch": 3.4705494344416485, "percentage": 69.41, "elapsed_time": "3:03:21", "remaining_time": "1:20:48", "throughput": 8702.28, "total_tokens": 95737232} +{"current_steps": 142065, "total_steps": 204665, "loss": 0.0079, "lr": 5.178529992576291e-07, "epoch": 3.4706715852734957, "percentage": 69.41, "elapsed_time": "3:03:21", "remaining_time": "1:20:47", "throughput": 8702.3, "total_tokens": 95740368} +{"current_steps": 142070, "total_steps": 204665, "loss": 0.0, "lr": 5.177782901304426e-07, "epoch": 3.470793736105343, "percentage": 69.42, "elapsed_time": "3:03:22", "remaining_time": "1:20:47", "throughput": 8702.31, "total_tokens": 95743568} +{"current_steps": 142075, "total_steps": 204665, "loss": 0.0422, "lr": 5.177035845101023e-07, "epoch": 3.47091588693719, "percentage": 69.42, "elapsed_time": "3:03:22", "remaining_time": "1:20:47", "throughput": 8702.35, "total_tokens": 95747088} +{"current_steps": 142080, "total_steps": 204665, "loss": 0.0001, "lr": 5.176288823971511e-07, "epoch": 3.4710380377690373, "percentage": 69.42, "elapsed_time": "3:03:22", "remaining_time": "1:20:46", "throughput": 8702.39, "total_tokens": 95750480} +{"current_steps": 142085, "total_steps": 204665, "loss": 0.0, "lr": 5.175541837921326e-07, "epoch": 3.4711601886008845, "percentage": 69.42, "elapsed_time": "3:03:23", "remaining_time": "1:20:46", "throughput": 8702.39, "total_tokens": 95753488} +{"current_steps": 142090, "total_steps": 204665, "loss": 0.0001, "lr": 5.174794886955895e-07, "epoch": 3.4712823394327317, "percentage": 69.43, "elapsed_time": "3:03:23", "remaining_time": "1:20:45", "throughput": 8702.42, "total_tokens": 95756880} +{"current_steps": 142095, "total_steps": 204665, "loss": 0.0, "lr": 5.174047971080653e-07, "epoch": 3.471404490264579, "percentage": 69.43, "elapsed_time": "3:03:23", "remaining_time": "1:20:45", "throughput": 8702.44, "total_tokens": 95760144} +{"current_steps": 142100, "total_steps": 204665, "loss": 0.0002, "lr": 5.173301090301036e-07, "epoch": 3.471526641096426, "percentage": 69.43, "elapsed_time": "3:03:24", "remaining_time": "1:20:45", "throughput": 8702.52, "total_tokens": 95764112} +{"current_steps": 142105, "total_steps": 204665, "loss": 0.0, "lr": 5.172554244622469e-07, "epoch": 3.4716487919282732, "percentage": 69.43, "elapsed_time": "3:03:24", "remaining_time": "1:20:44", "throughput": 8702.55, "total_tokens": 95767568} +{"current_steps": 142110, "total_steps": 204665, "loss": 0.0, "lr": 5.171807434050389e-07, "epoch": 3.47177094276012, "percentage": 69.44, "elapsed_time": "3:03:24", "remaining_time": "1:20:44", "throughput": 8702.61, "total_tokens": 95771216} +{"current_steps": 142115, "total_steps": 204665, "loss": 0.012, "lr": 5.17106065859022e-07, "epoch": 3.471893093591967, "percentage": 69.44, "elapsed_time": "3:03:25", "remaining_time": "1:20:43", "throughput": 8702.62, "total_tokens": 95774352} +{"current_steps": 142120, "total_steps": 204665, "loss": 0.0, "lr": 5.170313918247397e-07, "epoch": 3.4720152444238144, "percentage": 69.44, "elapsed_time": "3:03:25", "remaining_time": "1:20:43", "throughput": 8702.67, "total_tokens": 95778000} +{"current_steps": 142125, "total_steps": 204665, "loss": 0.0001, "lr": 5.169567213027355e-07, "epoch": 3.4721373952556616, "percentage": 69.44, "elapsed_time": "3:03:25", "remaining_time": "1:20:42", "throughput": 8702.71, "total_tokens": 95781456} +{"current_steps": 142130, "total_steps": 204665, "loss": 0.0001, "lr": 5.168820542935514e-07, "epoch": 3.4722595460875088, "percentage": 69.45, "elapsed_time": "3:03:26", "remaining_time": "1:20:42", "throughput": 8702.77, "total_tokens": 95785168} +{"current_steps": 142135, "total_steps": 204665, "loss": 0.0, "lr": 5.168073907977315e-07, "epoch": 3.472381696919356, "percentage": 69.45, "elapsed_time": "3:03:26", "remaining_time": "1:20:42", "throughput": 8702.93, "total_tokens": 95790480} +{"current_steps": 142140, "total_steps": 204665, "loss": 0.0, "lr": 5.167327308158177e-07, "epoch": 3.472503847751203, "percentage": 69.45, "elapsed_time": "3:03:27", "remaining_time": "1:20:41", "throughput": 8702.96, "total_tokens": 95793744} +{"current_steps": 142145, "total_steps": 204665, "loss": 0.0, "lr": 5.166580743483539e-07, "epoch": 3.4726259985830503, "percentage": 69.45, "elapsed_time": "3:03:27", "remaining_time": "1:20:41", "throughput": 8702.99, "total_tokens": 95797200} +{"current_steps": 142150, "total_steps": 204665, "loss": 0.0, "lr": 5.165834213958825e-07, "epoch": 3.4727481494148975, "percentage": 69.45, "elapsed_time": "3:03:27", "remaining_time": "1:20:41", "throughput": 8703.02, "total_tokens": 95800528} +{"current_steps": 142155, "total_steps": 204665, "loss": 0.0938, "lr": 5.165087719589462e-07, "epoch": 3.4728703002467447, "percentage": 69.46, "elapsed_time": "3:03:28", "remaining_time": "1:20:40", "throughput": 8703.04, "total_tokens": 95803728} +{"current_steps": 142160, "total_steps": 204665, "loss": 0.0001, "lr": 5.164341260380885e-07, "epoch": 3.472992451078592, "percentage": 69.46, "elapsed_time": "3:03:28", "remaining_time": "1:20:40", "throughput": 8703.08, "total_tokens": 95807248} +{"current_steps": 142165, "total_steps": 204665, "loss": 0.0001, "lr": 5.163594836338515e-07, "epoch": 3.473114601910439, "percentage": 69.46, "elapsed_time": "3:03:28", "remaining_time": "1:20:39", "throughput": 8703.09, "total_tokens": 95810320} +{"current_steps": 142170, "total_steps": 204665, "loss": 0.0524, "lr": 5.162848447467789e-07, "epoch": 3.4732367527422863, "percentage": 69.46, "elapsed_time": "3:03:29", "remaining_time": "1:20:39", "throughput": 8703.12, "total_tokens": 95813712} +{"current_steps": 142175, "total_steps": 204665, "loss": 0.0382, "lr": 5.162102093774126e-07, "epoch": 3.4733589035741335, "percentage": 69.47, "elapsed_time": "3:03:29", "remaining_time": "1:20:38", "throughput": 8703.16, "total_tokens": 95817232} +{"current_steps": 142180, "total_steps": 204665, "loss": 0.0341, "lr": 5.161355775262957e-07, "epoch": 3.4734810544059807, "percentage": 69.47, "elapsed_time": "3:03:29", "remaining_time": "1:20:38", "throughput": 8703.18, "total_tokens": 95820432} +{"current_steps": 142185, "total_steps": 204665, "loss": 0.0, "lr": 5.160609491939713e-07, "epoch": 3.473603205237828, "percentage": 69.47, "elapsed_time": "3:03:30", "remaining_time": "1:20:38", "throughput": 8703.25, "total_tokens": 95824272} +{"current_steps": 142190, "total_steps": 204665, "loss": 0.0001, "lr": 5.159863243809816e-07, "epoch": 3.473725356069675, "percentage": 69.47, "elapsed_time": "3:03:30", "remaining_time": "1:20:37", "throughput": 8703.32, "total_tokens": 95828112} +{"current_steps": 142195, "total_steps": 204665, "loss": 0.0, "lr": 5.159117030878699e-07, "epoch": 3.4738475069015218, "percentage": 69.48, "elapsed_time": "3:03:30", "remaining_time": "1:20:37", "throughput": 8703.34, "total_tokens": 95831376} +{"current_steps": 142200, "total_steps": 204665, "loss": 0.0, "lr": 5.158370853151783e-07, "epoch": 3.4739696577333694, "percentage": 69.48, "elapsed_time": "3:03:31", "remaining_time": "1:20:36", "throughput": 8703.36, "total_tokens": 95834640} +{"current_steps": 142205, "total_steps": 204665, "loss": 0.0005, "lr": 5.157624710634499e-07, "epoch": 3.474091808565216, "percentage": 69.48, "elapsed_time": "3:03:31", "remaining_time": "1:20:36", "throughput": 8703.43, "total_tokens": 95838416} +{"current_steps": 142210, "total_steps": 204665, "loss": 0.0, "lr": 5.156878603332265e-07, "epoch": 3.4742139593970633, "percentage": 69.48, "elapsed_time": "3:03:31", "remaining_time": "1:20:36", "throughput": 8703.42, "total_tokens": 95841296} +{"current_steps": 142215, "total_steps": 204665, "loss": 0.0, "lr": 5.156132531250515e-07, "epoch": 3.4743361102289105, "percentage": 69.49, "elapsed_time": "3:03:32", "remaining_time": "1:20:35", "throughput": 8703.47, "total_tokens": 95844880} +{"current_steps": 142220, "total_steps": 204665, "loss": 0.0, "lr": 5.155386494394674e-07, "epoch": 3.4744582610607577, "percentage": 69.49, "elapsed_time": "3:03:32", "remaining_time": "1:20:35", "throughput": 8703.52, "total_tokens": 95848528} +{"current_steps": 142225, "total_steps": 204665, "loss": 0.0, "lr": 5.154640492770161e-07, "epoch": 3.474580411892605, "percentage": 69.49, "elapsed_time": "3:03:32", "remaining_time": "1:20:34", "throughput": 8703.56, "total_tokens": 95851920} +{"current_steps": 142230, "total_steps": 204665, "loss": 0.0, "lr": 5.153894526382412e-07, "epoch": 3.474702562724452, "percentage": 69.49, "elapsed_time": "3:03:33", "remaining_time": "1:20:34", "throughput": 8703.6, "total_tokens": 95855440} +{"current_steps": 142235, "total_steps": 204665, "loss": 0.0001, "lr": 5.153148595236839e-07, "epoch": 3.4748247135562993, "percentage": 69.5, "elapsed_time": "3:03:33", "remaining_time": "1:20:34", "throughput": 8703.65, "total_tokens": 95859088} +{"current_steps": 142240, "total_steps": 204665, "loss": 0.0001, "lr": 5.152402699338878e-07, "epoch": 3.4749468643881465, "percentage": 69.5, "elapsed_time": "3:03:34", "remaining_time": "1:20:33", "throughput": 8703.65, "total_tokens": 95862032} +{"current_steps": 142245, "total_steps": 204665, "loss": 0.0, "lr": 5.151656838693945e-07, "epoch": 3.4750690152199937, "percentage": 69.5, "elapsed_time": "3:03:34", "remaining_time": "1:20:33", "throughput": 8703.7, "total_tokens": 95865616} +{"current_steps": 142250, "total_steps": 204665, "loss": 0.0, "lr": 5.150911013307471e-07, "epoch": 3.475191166051841, "percentage": 69.5, "elapsed_time": "3:03:34", "remaining_time": "1:20:32", "throughput": 8703.75, "total_tokens": 95869264} +{"current_steps": 142255, "total_steps": 204665, "loss": 0.0, "lr": 5.150165223184877e-07, "epoch": 3.475313316883688, "percentage": 69.51, "elapsed_time": "3:03:35", "remaining_time": "1:20:32", "throughput": 8703.85, "total_tokens": 95873488} +{"current_steps": 142260, "total_steps": 204665, "loss": 0.0754, "lr": 5.149419468331582e-07, "epoch": 3.4754354677155352, "percentage": 69.51, "elapsed_time": "3:03:35", "remaining_time": "1:20:32", "throughput": 8703.89, "total_tokens": 95876944} +{"current_steps": 142265, "total_steps": 204665, "loss": 0.0454, "lr": 5.148673748753017e-07, "epoch": 3.4755576185473824, "percentage": 69.51, "elapsed_time": "3:03:35", "remaining_time": "1:20:31", "throughput": 8703.9, "total_tokens": 95880080} +{"current_steps": 142270, "total_steps": 204665, "loss": 0.1032, "lr": 5.147928064454597e-07, "epoch": 3.4756797693792296, "percentage": 69.51, "elapsed_time": "3:03:36", "remaining_time": "1:20:31", "throughput": 8703.94, "total_tokens": 95883472} +{"current_steps": 142275, "total_steps": 204665, "loss": 0.0, "lr": 5.147182415441749e-07, "epoch": 3.475801920211077, "percentage": 69.52, "elapsed_time": "3:03:36", "remaining_time": "1:20:30", "throughput": 8703.98, "total_tokens": 95886992} +{"current_steps": 142280, "total_steps": 204665, "loss": 0.0422, "lr": 5.1464368017199e-07, "epoch": 3.4759240710429236, "percentage": 69.52, "elapsed_time": "3:03:36", "remaining_time": "1:20:30", "throughput": 8704.01, "total_tokens": 95890384} +{"current_steps": 142285, "total_steps": 204665, "loss": 0.0001, "lr": 5.145691223294464e-07, "epoch": 3.476046221874771, "percentage": 69.52, "elapsed_time": "3:03:37", "remaining_time": "1:20:30", "throughput": 8704.04, "total_tokens": 95893712} +{"current_steps": 142290, "total_steps": 204665, "loss": 0.0001, "lr": 5.144945680170871e-07, "epoch": 3.476168372706618, "percentage": 69.52, "elapsed_time": "3:03:37", "remaining_time": "1:20:29", "throughput": 8704.08, "total_tokens": 95897168} +{"current_steps": 142295, "total_steps": 204665, "loss": 0.0, "lr": 5.144200172354534e-07, "epoch": 3.476290523538465, "percentage": 69.53, "elapsed_time": "3:03:37", "remaining_time": "1:20:29", "throughput": 8704.11, "total_tokens": 95900624} +{"current_steps": 142300, "total_steps": 204665, "loss": 0.0002, "lr": 5.143454699850884e-07, "epoch": 3.4764126743703123, "percentage": 69.53, "elapsed_time": "3:03:38", "remaining_time": "1:20:28", "throughput": 8704.15, "total_tokens": 95904016} +{"current_steps": 142305, "total_steps": 204665, "loss": 0.0001, "lr": 5.142709262665334e-07, "epoch": 3.4765348252021595, "percentage": 69.53, "elapsed_time": "3:03:38", "remaining_time": "1:20:28", "throughput": 8704.19, "total_tokens": 95907600} +{"current_steps": 142310, "total_steps": 204665, "loss": 0.0, "lr": 5.14196386080331e-07, "epoch": 3.4766569760340067, "percentage": 69.53, "elapsed_time": "3:03:38", "remaining_time": "1:20:28", "throughput": 8704.21, "total_tokens": 95910736} +{"current_steps": 142315, "total_steps": 204665, "loss": 0.0, "lr": 5.141218494270234e-07, "epoch": 3.476779126865854, "percentage": 69.54, "elapsed_time": "3:03:39", "remaining_time": "1:20:27", "throughput": 8704.28, "total_tokens": 95914640} +{"current_steps": 142320, "total_steps": 204665, "loss": 0.0001, "lr": 5.14047316307152e-07, "epoch": 3.476901277697701, "percentage": 69.54, "elapsed_time": "3:03:39", "remaining_time": "1:20:27", "throughput": 8704.31, "total_tokens": 95918032} +{"current_steps": 142325, "total_steps": 204665, "loss": 0.0, "lr": 5.139727867212596e-07, "epoch": 3.4770234285295483, "percentage": 69.54, "elapsed_time": "3:03:39", "remaining_time": "1:20:26", "throughput": 8704.37, "total_tokens": 95921808} +{"current_steps": 142330, "total_steps": 204665, "loss": 0.0026, "lr": 5.138982606698876e-07, "epoch": 3.4771455793613955, "percentage": 69.54, "elapsed_time": "3:03:40", "remaining_time": "1:20:26", "throughput": 8704.41, "total_tokens": 95925328} +{"current_steps": 142335, "total_steps": 204665, "loss": 0.0, "lr": 5.13823738153578e-07, "epoch": 3.4772677301932426, "percentage": 69.55, "elapsed_time": "3:03:40", "remaining_time": "1:20:26", "throughput": 8704.43, "total_tokens": 95928528} +{"current_steps": 142340, "total_steps": 204665, "loss": 0.0695, "lr": 5.137492191728734e-07, "epoch": 3.47738988102509, "percentage": 69.55, "elapsed_time": "3:03:40", "remaining_time": "1:20:25", "throughput": 8704.43, "total_tokens": 95931408} +{"current_steps": 142345, "total_steps": 204665, "loss": 0.0, "lr": 5.136747037283149e-07, "epoch": 3.477512031856937, "percentage": 69.55, "elapsed_time": "3:03:41", "remaining_time": "1:20:25", "throughput": 8704.49, "total_tokens": 95935120} +{"current_steps": 142350, "total_steps": 204665, "loss": 0.0439, "lr": 5.136001918204451e-07, "epoch": 3.477634182688784, "percentage": 69.55, "elapsed_time": "3:03:41", "remaining_time": "1:20:24", "throughput": 8704.52, "total_tokens": 95938576} +{"current_steps": 142355, "total_steps": 204665, "loss": 0.0, "lr": 5.135256834498054e-07, "epoch": 3.4777563335206314, "percentage": 69.56, "elapsed_time": "3:03:42", "remaining_time": "1:20:24", "throughput": 8704.56, "total_tokens": 95942096} +{"current_steps": 142360, "total_steps": 204665, "loss": 0.0, "lr": 5.134511786169376e-07, "epoch": 3.4778784843524786, "percentage": 69.56, "elapsed_time": "3:03:42", "remaining_time": "1:20:24", "throughput": 8704.59, "total_tokens": 95945360} +{"current_steps": 142365, "total_steps": 204665, "loss": 0.0, "lr": 5.133766773223839e-07, "epoch": 3.478000635184326, "percentage": 69.56, "elapsed_time": "3:03:42", "remaining_time": "1:20:23", "throughput": 8704.62, "total_tokens": 95948688} +{"current_steps": 142370, "total_steps": 204665, "loss": 0.0002, "lr": 5.133021795666858e-07, "epoch": 3.478122786016173, "percentage": 69.56, "elapsed_time": "3:03:43", "remaining_time": "1:20:23", "throughput": 8704.65, "total_tokens": 95952144} +{"current_steps": 142375, "total_steps": 204665, "loss": 0.0, "lr": 5.132276853503853e-07, "epoch": 3.4782449368480197, "percentage": 69.56, "elapsed_time": "3:03:43", "remaining_time": "1:20:22", "throughput": 8704.68, "total_tokens": 95955472} +{"current_steps": 142380, "total_steps": 204665, "loss": 0.0, "lr": 5.131531946740238e-07, "epoch": 3.478367087679867, "percentage": 69.57, "elapsed_time": "3:03:43", "remaining_time": "1:20:22", "throughput": 8704.71, "total_tokens": 95958736} +{"current_steps": 142385, "total_steps": 204665, "loss": 0.0001, "lr": 5.130787075381433e-07, "epoch": 3.478489238511714, "percentage": 69.57, "elapsed_time": "3:03:44", "remaining_time": "1:20:22", "throughput": 8704.75, "total_tokens": 95962256} +{"current_steps": 142390, "total_steps": 204665, "loss": 0.0, "lr": 5.130042239432853e-07, "epoch": 3.4786113893435613, "percentage": 69.57, "elapsed_time": "3:03:44", "remaining_time": "1:20:21", "throughput": 8704.76, "total_tokens": 95965328} +{"current_steps": 142395, "total_steps": 204665, "loss": 0.0, "lr": 5.129297438899918e-07, "epoch": 3.4787335401754085, "percentage": 69.57, "elapsed_time": "3:03:44", "remaining_time": "1:20:21", "throughput": 8704.8, "total_tokens": 95968848} +{"current_steps": 142400, "total_steps": 204665, "loss": 0.0, "lr": 5.128552673788038e-07, "epoch": 3.4788556910072557, "percentage": 69.58, "elapsed_time": "3:03:45", "remaining_time": "1:20:20", "throughput": 8704.83, "total_tokens": 95972176} +{"current_steps": 142405, "total_steps": 204665, "loss": 0.0, "lr": 5.127807944102634e-07, "epoch": 3.478977841839103, "percentage": 69.58, "elapsed_time": "3:03:45", "remaining_time": "1:20:20", "throughput": 8704.83, "total_tokens": 95975056} +{"current_steps": 142410, "total_steps": 204665, "loss": 0.0, "lr": 5.127063249849125e-07, "epoch": 3.47909999267095, "percentage": 69.58, "elapsed_time": "3:03:45", "remaining_time": "1:20:19", "throughput": 8704.88, "total_tokens": 95978704} +{"current_steps": 142415, "total_steps": 204665, "loss": 0.0166, "lr": 5.126318591032919e-07, "epoch": 3.4792221435027972, "percentage": 69.58, "elapsed_time": "3:03:46", "remaining_time": "1:20:19", "throughput": 8704.95, "total_tokens": 95982608} +{"current_steps": 142420, "total_steps": 204665, "loss": 0.0, "lr": 5.125573967659437e-07, "epoch": 3.4793442943346444, "percentage": 69.59, "elapsed_time": "3:03:46", "remaining_time": "1:20:19", "throughput": 8705.03, "total_tokens": 95986640} +{"current_steps": 142425, "total_steps": 204665, "loss": 0.0001, "lr": 5.124829379734091e-07, "epoch": 3.4794664451664916, "percentage": 69.59, "elapsed_time": "3:03:46", "remaining_time": "1:20:18", "throughput": 8705.07, "total_tokens": 95990096} +{"current_steps": 142430, "total_steps": 204665, "loss": 0.0, "lr": 5.124084827262297e-07, "epoch": 3.479588595998339, "percentage": 69.59, "elapsed_time": "3:03:47", "remaining_time": "1:20:18", "throughput": 8705.08, "total_tokens": 95993168} +{"current_steps": 142435, "total_steps": 204665, "loss": 0.0, "lr": 5.123340310249471e-07, "epoch": 3.479710746830186, "percentage": 69.59, "elapsed_time": "3:03:47", "remaining_time": "1:20:17", "throughput": 8705.13, "total_tokens": 95996816} +{"current_steps": 142440, "total_steps": 204665, "loss": 0.0, "lr": 5.122595828701024e-07, "epoch": 3.479832897662033, "percentage": 69.6, "elapsed_time": "3:03:47", "remaining_time": "1:20:17", "throughput": 8705.14, "total_tokens": 95999952} +{"current_steps": 142445, "total_steps": 204665, "loss": 0.0001, "lr": 5.121851382622375e-07, "epoch": 3.4799550484938804, "percentage": 69.6, "elapsed_time": "3:03:48", "remaining_time": "1:20:17", "throughput": 8705.18, "total_tokens": 96003408} +{"current_steps": 142450, "total_steps": 204665, "loss": 0.0, "lr": 5.121106972018931e-07, "epoch": 3.4800771993257276, "percentage": 69.6, "elapsed_time": "3:03:48", "remaining_time": "1:20:16", "throughput": 8705.21, "total_tokens": 96006800} +{"current_steps": 142455, "total_steps": 204665, "loss": 0.0, "lr": 5.120362596896115e-07, "epoch": 3.4801993501575748, "percentage": 69.6, "elapsed_time": "3:03:49", "remaining_time": "1:20:16", "throughput": 8705.26, "total_tokens": 96010384} +{"current_steps": 142460, "total_steps": 204665, "loss": 0.0, "lr": 5.119618257259333e-07, "epoch": 3.4803215009894215, "percentage": 69.61, "elapsed_time": "3:03:49", "remaining_time": "1:20:15", "throughput": 8705.29, "total_tokens": 96013712} +{"current_steps": 142465, "total_steps": 204665, "loss": 0.0192, "lr": 5.118873953113995e-07, "epoch": 3.480443651821269, "percentage": 69.61, "elapsed_time": "3:03:49", "remaining_time": "1:20:15", "throughput": 8705.32, "total_tokens": 96017168} +{"current_steps": 142470, "total_steps": 204665, "loss": 0.0001, "lr": 5.118129684465524e-07, "epoch": 3.480565802653116, "percentage": 69.61, "elapsed_time": "3:03:50", "remaining_time": "1:20:15", "throughput": 8705.37, "total_tokens": 96020688} +{"current_steps": 142475, "total_steps": 204665, "loss": 0.0001, "lr": 5.117385451319322e-07, "epoch": 3.480687953484963, "percentage": 69.61, "elapsed_time": "3:03:50", "remaining_time": "1:20:14", "throughput": 8705.37, "total_tokens": 96023632} +{"current_steps": 142480, "total_steps": 204665, "loss": 0.0, "lr": 5.116641253680811e-07, "epoch": 3.4808101043168103, "percentage": 69.62, "elapsed_time": "3:03:50", "remaining_time": "1:20:14", "throughput": 8705.4, "total_tokens": 96027024} +{"current_steps": 142485, "total_steps": 204665, "loss": 0.0343, "lr": 5.115897091555394e-07, "epoch": 3.4809322551486575, "percentage": 69.62, "elapsed_time": "3:03:51", "remaining_time": "1:20:13", "throughput": 8705.42, "total_tokens": 96030224} +{"current_steps": 142490, "total_steps": 204665, "loss": 0.0, "lr": 5.115152964948487e-07, "epoch": 3.4810544059805046, "percentage": 69.62, "elapsed_time": "3:03:51", "remaining_time": "1:20:13", "throughput": 8705.45, "total_tokens": 96033616} +{"current_steps": 142495, "total_steps": 204665, "loss": 0.0, "lr": 5.114408873865505e-07, "epoch": 3.481176556812352, "percentage": 69.62, "elapsed_time": "3:03:51", "remaining_time": "1:20:13", "throughput": 8705.48, "total_tokens": 96036944} +{"current_steps": 142500, "total_steps": 204665, "loss": 0.0, "lr": 5.113664818311852e-07, "epoch": 3.481298707644199, "percentage": 69.63, "elapsed_time": "3:03:52", "remaining_time": "1:20:12", "throughput": 8705.51, "total_tokens": 96040272} +{"current_steps": 142505, "total_steps": 204665, "loss": 0.0, "lr": 5.112920798292947e-07, "epoch": 3.481420858476046, "percentage": 69.63, "elapsed_time": "3:03:52", "remaining_time": "1:20:12", "throughput": 8705.55, "total_tokens": 96043792} +{"current_steps": 142510, "total_steps": 204665, "loss": 0.0, "lr": 5.112176813814193e-07, "epoch": 3.4815430093078934, "percentage": 69.63, "elapsed_time": "3:03:52", "remaining_time": "1:20:11", "throughput": 8705.56, "total_tokens": 96046928} +{"current_steps": 142515, "total_steps": 204665, "loss": 0.0305, "lr": 5.111432864881007e-07, "epoch": 3.4816651601397406, "percentage": 69.63, "elapsed_time": "3:03:53", "remaining_time": "1:20:11", "throughput": 8705.57, "total_tokens": 96049936} +{"current_steps": 142520, "total_steps": 204665, "loss": 0.0, "lr": 5.110688951498792e-07, "epoch": 3.481787310971588, "percentage": 69.64, "elapsed_time": "3:03:53", "remaining_time": "1:20:11", "throughput": 8705.62, "total_tokens": 96053520} +{"current_steps": 142525, "total_steps": 204665, "loss": 0.0, "lr": 5.109945073672963e-07, "epoch": 3.481909461803435, "percentage": 69.64, "elapsed_time": "3:03:53", "remaining_time": "1:20:10", "throughput": 8705.62, "total_tokens": 96056528} +{"current_steps": 142530, "total_steps": 204665, "loss": 0.0001, "lr": 5.109201231408931e-07, "epoch": 3.482031612635282, "percentage": 69.64, "elapsed_time": "3:03:54", "remaining_time": "1:20:10", "throughput": 8705.67, "total_tokens": 96060240} +{"current_steps": 142535, "total_steps": 204665, "loss": 0.0, "lr": 5.1084574247121e-07, "epoch": 3.4821537634671293, "percentage": 69.64, "elapsed_time": "3:03:54", "remaining_time": "1:20:09", "throughput": 8705.74, "total_tokens": 96064016} +{"current_steps": 142540, "total_steps": 204665, "loss": 0.0, "lr": 5.107713653587886e-07, "epoch": 3.4822759142989765, "percentage": 69.65, "elapsed_time": "3:03:54", "remaining_time": "1:20:09", "throughput": 8705.77, "total_tokens": 96067472} +{"current_steps": 142545, "total_steps": 204665, "loss": 0.0001, "lr": 5.106969918041692e-07, "epoch": 3.4823980651308237, "percentage": 69.65, "elapsed_time": "3:03:55", "remaining_time": "1:20:09", "throughput": 8705.79, "total_tokens": 96070736} +{"current_steps": 142550, "total_steps": 204665, "loss": 0.0, "lr": 5.106226218078931e-07, "epoch": 3.482520215962671, "percentage": 69.65, "elapsed_time": "3:03:55", "remaining_time": "1:20:08", "throughput": 8705.83, "total_tokens": 96074192} +{"current_steps": 142555, "total_steps": 204665, "loss": 0.0321, "lr": 5.105482553705005e-07, "epoch": 3.4826423667945177, "percentage": 69.65, "elapsed_time": "3:03:55", "remaining_time": "1:20:08", "throughput": 8705.84, "total_tokens": 96077328} +{"current_steps": 142560, "total_steps": 204665, "loss": 0.0, "lr": 5.104738924925331e-07, "epoch": 3.482764517626365, "percentage": 69.66, "elapsed_time": "3:03:56", "remaining_time": "1:20:07", "throughput": 8705.86, "total_tokens": 96080464} +{"current_steps": 142565, "total_steps": 204665, "loss": 0.0, "lr": 5.103995331745313e-07, "epoch": 3.482886668458212, "percentage": 69.66, "elapsed_time": "3:03:56", "remaining_time": "1:20:07", "throughput": 8705.89, "total_tokens": 96083792} +{"current_steps": 142570, "total_steps": 204665, "loss": 0.0, "lr": 5.103251774170352e-07, "epoch": 3.4830088192900592, "percentage": 69.66, "elapsed_time": "3:03:56", "remaining_time": "1:20:07", "throughput": 8705.94, "total_tokens": 96087376} +{"current_steps": 142575, "total_steps": 204665, "loss": 0.0, "lr": 5.102508252205866e-07, "epoch": 3.4831309701219064, "percentage": 69.66, "elapsed_time": "3:03:57", "remaining_time": "1:20:06", "throughput": 8705.94, "total_tokens": 96090448} +{"current_steps": 142580, "total_steps": 204665, "loss": 0.0291, "lr": 5.101764765857254e-07, "epoch": 3.4832531209537536, "percentage": 69.67, "elapsed_time": "3:03:57", "remaining_time": "1:20:06", "throughput": 8705.96, "total_tokens": 96093584} +{"current_steps": 142585, "total_steps": 204665, "loss": 0.0, "lr": 5.101021315129925e-07, "epoch": 3.483375271785601, "percentage": 69.67, "elapsed_time": "3:03:58", "remaining_time": "1:20:05", "throughput": 8705.99, "total_tokens": 96096976} +{"current_steps": 142590, "total_steps": 204665, "loss": 0.0, "lr": 5.10027790002929e-07, "epoch": 3.483497422617448, "percentage": 69.67, "elapsed_time": "3:03:58", "remaining_time": "1:20:05", "throughput": 8706.06, "total_tokens": 96100880} +{"current_steps": 142595, "total_steps": 204665, "loss": 0.0, "lr": 5.099534520560751e-07, "epoch": 3.483619573449295, "percentage": 69.67, "elapsed_time": "3:03:58", "remaining_time": "1:20:05", "throughput": 8706.1, "total_tokens": 96104272} +{"current_steps": 142600, "total_steps": 204665, "loss": 0.0524, "lr": 5.098791176729716e-07, "epoch": 3.4837417242811424, "percentage": 69.67, "elapsed_time": "3:03:59", "remaining_time": "1:20:04", "throughput": 8706.1, "total_tokens": 96107216} +{"current_steps": 142605, "total_steps": 204665, "loss": 0.0284, "lr": 5.098047868541587e-07, "epoch": 3.4838638751129896, "percentage": 69.68, "elapsed_time": "3:03:59", "remaining_time": "1:20:04", "throughput": 8706.12, "total_tokens": 96110480} +{"current_steps": 142610, "total_steps": 204665, "loss": 0.0339, "lr": 5.097304596001777e-07, "epoch": 3.4839860259448368, "percentage": 69.68, "elapsed_time": "3:03:59", "remaining_time": "1:20:03", "throughput": 8706.16, "total_tokens": 96113936} +{"current_steps": 142615, "total_steps": 204665, "loss": 0.0001, "lr": 5.096561359115682e-07, "epoch": 3.484108176776684, "percentage": 69.68, "elapsed_time": "3:04:00", "remaining_time": "1:20:03", "throughput": 8706.17, "total_tokens": 96117072} +{"current_steps": 142620, "total_steps": 204665, "loss": 0.0, "lr": 5.095818157888712e-07, "epoch": 3.484230327608531, "percentage": 69.68, "elapsed_time": "3:04:00", "remaining_time": "1:20:03", "throughput": 8706.19, "total_tokens": 96120272} +{"current_steps": 142625, "total_steps": 204665, "loss": 0.0531, "lr": 5.095074992326274e-07, "epoch": 3.4843524784403783, "percentage": 69.69, "elapsed_time": "3:04:00", "remaining_time": "1:20:02", "throughput": 8706.22, "total_tokens": 96123600} +{"current_steps": 142630, "total_steps": 204665, "loss": 0.0, "lr": 5.094331862433768e-07, "epoch": 3.4844746292722255, "percentage": 69.69, "elapsed_time": "3:04:01", "remaining_time": "1:20:02", "throughput": 8706.28, "total_tokens": 96127376} +{"current_steps": 142635, "total_steps": 204665, "loss": 0.0546, "lr": 5.093588768216602e-07, "epoch": 3.4845967801040727, "percentage": 69.69, "elapsed_time": "3:04:01", "remaining_time": "1:20:01", "throughput": 8706.34, "total_tokens": 96131088} +{"current_steps": 142640, "total_steps": 204665, "loss": 0.0001, "lr": 5.092845709680176e-07, "epoch": 3.4847189309359194, "percentage": 69.69, "elapsed_time": "3:04:01", "remaining_time": "1:20:01", "throughput": 8706.35, "total_tokens": 96134224} +{"current_steps": 142645, "total_steps": 204665, "loss": 0.0, "lr": 5.092102686829896e-07, "epoch": 3.484841081767767, "percentage": 69.7, "elapsed_time": "3:04:02", "remaining_time": "1:20:00", "throughput": 8706.36, "total_tokens": 96137296} +{"current_steps": 142650, "total_steps": 204665, "loss": 0.0, "lr": 5.091359699671168e-07, "epoch": 3.484963232599614, "percentage": 69.7, "elapsed_time": "3:04:02", "remaining_time": "1:20:00", "throughput": 8706.41, "total_tokens": 96140944} +{"current_steps": 142655, "total_steps": 204665, "loss": 0.0, "lr": 5.090616748209388e-07, "epoch": 3.485085383431461, "percentage": 69.7, "elapsed_time": "3:04:02", "remaining_time": "1:20:00", "throughput": 8706.45, "total_tokens": 96144464} +{"current_steps": 142660, "total_steps": 204665, "loss": 0.0, "lr": 5.089873832449969e-07, "epoch": 3.485207534263308, "percentage": 69.7, "elapsed_time": "3:04:03", "remaining_time": "1:19:59", "throughput": 8706.48, "total_tokens": 96147792} +{"current_steps": 142665, "total_steps": 204665, "loss": 0.0003, "lr": 5.089130952398308e-07, "epoch": 3.4853296850951554, "percentage": 69.71, "elapsed_time": "3:04:03", "remaining_time": "1:19:59", "throughput": 8706.52, "total_tokens": 96151312} +{"current_steps": 142670, "total_steps": 204665, "loss": 0.0849, "lr": 5.088388108059802e-07, "epoch": 3.4854518359270026, "percentage": 69.71, "elapsed_time": "3:04:03", "remaining_time": "1:19:58", "throughput": 8706.55, "total_tokens": 96154640} +{"current_steps": 142675, "total_steps": 204665, "loss": 0.0001, "lr": 5.087645299439864e-07, "epoch": 3.4855739867588498, "percentage": 69.71, "elapsed_time": "3:04:04", "remaining_time": "1:19:58", "throughput": 8706.59, "total_tokens": 96158032} +{"current_steps": 142680, "total_steps": 204665, "loss": 0.0001, "lr": 5.086902526543889e-07, "epoch": 3.485696137590697, "percentage": 69.71, "elapsed_time": "3:04:04", "remaining_time": "1:19:58", "throughput": 8706.58, "total_tokens": 96160976} +{"current_steps": 142685, "total_steps": 204665, "loss": 0.0, "lr": 5.08615978937728e-07, "epoch": 3.485818288422544, "percentage": 69.72, "elapsed_time": "3:04:04", "remaining_time": "1:19:57", "throughput": 8706.66, "total_tokens": 96164944} +{"current_steps": 142690, "total_steps": 204665, "loss": 0.0001, "lr": 5.085417087945436e-07, "epoch": 3.4859404392543913, "percentage": 69.72, "elapsed_time": "3:04:05", "remaining_time": "1:19:57", "throughput": 8706.69, "total_tokens": 96168272} +{"current_steps": 142695, "total_steps": 204665, "loss": 0.0, "lr": 5.084674422253767e-07, "epoch": 3.4860625900862385, "percentage": 69.72, "elapsed_time": "3:04:05", "remaining_time": "1:19:56", "throughput": 8706.74, "total_tokens": 96171984} +{"current_steps": 142700, "total_steps": 204665, "loss": 0.0, "lr": 5.083931792307661e-07, "epoch": 3.4861847409180857, "percentage": 69.72, "elapsed_time": "3:04:06", "remaining_time": "1:19:56", "throughput": 8706.77, "total_tokens": 96175312} +{"current_steps": 142705, "total_steps": 204665, "loss": 0.049, "lr": 5.08318919811253e-07, "epoch": 3.486306891749933, "percentage": 69.73, "elapsed_time": "3:04:06", "remaining_time": "1:19:56", "throughput": 8706.81, "total_tokens": 96178768} +{"current_steps": 142710, "total_steps": 204665, "loss": 0.0, "lr": 5.082446639673766e-07, "epoch": 3.48642904258178, "percentage": 69.73, "elapsed_time": "3:04:06", "remaining_time": "1:19:55", "throughput": 8706.85, "total_tokens": 96182224} +{"current_steps": 142715, "total_steps": 204665, "loss": 0.0005, "lr": 5.081704116996773e-07, "epoch": 3.4865511934136273, "percentage": 69.73, "elapsed_time": "3:04:07", "remaining_time": "1:19:55", "throughput": 8706.89, "total_tokens": 96185808} +{"current_steps": 142720, "total_steps": 204665, "loss": 0.001, "lr": 5.080961630086954e-07, "epoch": 3.4866733442454745, "percentage": 69.73, "elapsed_time": "3:04:07", "remaining_time": "1:19:54", "throughput": 8706.91, "total_tokens": 96189008} +{"current_steps": 142725, "total_steps": 204665, "loss": 0.0157, "lr": 5.080219178949701e-07, "epoch": 3.4867954950773212, "percentage": 69.74, "elapsed_time": "3:04:07", "remaining_time": "1:19:54", "throughput": 8706.94, "total_tokens": 96192272} +{"current_steps": 142730, "total_steps": 204665, "loss": 0.0, "lr": 5.079476763590422e-07, "epoch": 3.486917645909169, "percentage": 69.74, "elapsed_time": "3:04:08", "remaining_time": "1:19:54", "throughput": 8706.99, "total_tokens": 96195856} +{"current_steps": 142735, "total_steps": 204665, "loss": 0.0663, "lr": 5.078734384014507e-07, "epoch": 3.4870397967410156, "percentage": 69.74, "elapsed_time": "3:04:08", "remaining_time": "1:19:53", "throughput": 8707.01, "total_tokens": 96199184} +{"current_steps": 142740, "total_steps": 204665, "loss": 0.0001, "lr": 5.07799204022736e-07, "epoch": 3.487161947572863, "percentage": 69.74, "elapsed_time": "3:04:08", "remaining_time": "1:19:53", "throughput": 8707.03, "total_tokens": 96202320} +{"current_steps": 142745, "total_steps": 204665, "loss": 0.0, "lr": 5.077249732234381e-07, "epoch": 3.48728409840471, "percentage": 69.75, "elapsed_time": "3:04:09", "remaining_time": "1:19:52", "throughput": 8707.07, "total_tokens": 96205776} +{"current_steps": 142750, "total_steps": 204665, "loss": 0.0, "lr": 5.076507460040964e-07, "epoch": 3.487406249236557, "percentage": 69.75, "elapsed_time": "3:04:09", "remaining_time": "1:19:52", "throughput": 8707.08, "total_tokens": 96208912} +{"current_steps": 142755, "total_steps": 204665, "loss": 0.0, "lr": 5.075765223652511e-07, "epoch": 3.4875284000684044, "percentage": 69.75, "elapsed_time": "3:04:09", "remaining_time": "1:19:52", "throughput": 8707.09, "total_tokens": 96212048} +{"current_steps": 142760, "total_steps": 204665, "loss": 0.0, "lr": 5.075023023074415e-07, "epoch": 3.4876505509002516, "percentage": 69.75, "elapsed_time": "3:04:10", "remaining_time": "1:19:51", "throughput": 8707.17, "total_tokens": 96216016} +{"current_steps": 142765, "total_steps": 204665, "loss": 0.0, "lr": 5.07428085831208e-07, "epoch": 3.4877727017320987, "percentage": 69.76, "elapsed_time": "3:04:10", "remaining_time": "1:19:51", "throughput": 8707.21, "total_tokens": 96219536} +{"current_steps": 142770, "total_steps": 204665, "loss": 0.0, "lr": 5.0735387293709e-07, "epoch": 3.487894852563946, "percentage": 69.76, "elapsed_time": "3:04:10", "remaining_time": "1:19:50", "throughput": 8707.25, "total_tokens": 96223056} +{"current_steps": 142775, "total_steps": 204665, "loss": 0.0693, "lr": 5.072796636256267e-07, "epoch": 3.488017003395793, "percentage": 69.76, "elapsed_time": "3:04:11", "remaining_time": "1:19:50", "throughput": 8707.27, "total_tokens": 96226128} +{"current_steps": 142780, "total_steps": 204665, "loss": 0.0005, "lr": 5.072054578973585e-07, "epoch": 3.4881391542276403, "percentage": 69.76, "elapsed_time": "3:04:11", "remaining_time": "1:19:50", "throughput": 8707.29, "total_tokens": 96229392} +{"current_steps": 142785, "total_steps": 204665, "loss": 0.0003, "lr": 5.071312557528244e-07, "epoch": 3.4882613050594875, "percentage": 69.77, "elapsed_time": "3:04:11", "remaining_time": "1:19:49", "throughput": 8707.29, "total_tokens": 96232336} +{"current_steps": 142790, "total_steps": 204665, "loss": 0.0, "lr": 5.07057057192565e-07, "epoch": 3.4883834558913347, "percentage": 69.77, "elapsed_time": "3:04:12", "remaining_time": "1:19:49", "throughput": 8707.33, "total_tokens": 96235792} +{"current_steps": 142795, "total_steps": 204665, "loss": 0.0001, "lr": 5.069828622171186e-07, "epoch": 3.488505606723182, "percentage": 69.77, "elapsed_time": "3:04:12", "remaining_time": "1:19:48", "throughput": 8707.34, "total_tokens": 96238992} +{"current_steps": 142800, "total_steps": 204665, "loss": 0.0, "lr": 5.06908670827026e-07, "epoch": 3.488627757555029, "percentage": 69.77, "elapsed_time": "3:04:12", "remaining_time": "1:19:48", "throughput": 8707.4, "total_tokens": 96242768} +{"current_steps": 142805, "total_steps": 204665, "loss": 0.0, "lr": 5.068344830228257e-07, "epoch": 3.4887499083868763, "percentage": 69.77, "elapsed_time": "3:04:13", "remaining_time": "1:19:48", "throughput": 8707.42, "total_tokens": 96245968} +{"current_steps": 142810, "total_steps": 204665, "loss": 0.0, "lr": 5.067602988050576e-07, "epoch": 3.4888720592187235, "percentage": 69.78, "elapsed_time": "3:04:13", "remaining_time": "1:19:47", "throughput": 8707.46, "total_tokens": 96249360} +{"current_steps": 142815, "total_steps": 204665, "loss": 0.0, "lr": 5.066861181742619e-07, "epoch": 3.4889942100505706, "percentage": 69.78, "elapsed_time": "3:04:14", "remaining_time": "1:19:47", "throughput": 8707.47, "total_tokens": 96252432} +{"current_steps": 142820, "total_steps": 204665, "loss": 0.0, "lr": 5.066119411309769e-07, "epoch": 3.4891163608824174, "percentage": 69.78, "elapsed_time": "3:04:14", "remaining_time": "1:19:46", "throughput": 8707.5, "total_tokens": 96255824} +{"current_steps": 142825, "total_steps": 204665, "loss": 0.0, "lr": 5.065377676757428e-07, "epoch": 3.489238511714265, "percentage": 69.78, "elapsed_time": "3:04:14", "remaining_time": "1:19:46", "throughput": 8707.52, "total_tokens": 96258960} +{"current_steps": 142830, "total_steps": 204665, "loss": 0.0, "lr": 5.064635978090986e-07, "epoch": 3.4893606625461118, "percentage": 69.79, "elapsed_time": "3:04:15", "remaining_time": "1:19:46", "throughput": 8707.54, "total_tokens": 96262160} +{"current_steps": 142835, "total_steps": 204665, "loss": 0.0245, "lr": 5.063894315315837e-07, "epoch": 3.489482813377959, "percentage": 69.79, "elapsed_time": "3:04:15", "remaining_time": "1:19:45", "throughput": 8707.56, "total_tokens": 96265424} +{"current_steps": 142840, "total_steps": 204665, "loss": 0.0, "lr": 5.063152688437382e-07, "epoch": 3.489604964209806, "percentage": 69.79, "elapsed_time": "3:04:15", "remaining_time": "1:19:45", "throughput": 8707.59, "total_tokens": 96268752} +{"current_steps": 142845, "total_steps": 204665, "loss": 0.0, "lr": 5.062411097461004e-07, "epoch": 3.4897271150416533, "percentage": 69.79, "elapsed_time": "3:04:16", "remaining_time": "1:19:44", "throughput": 8707.63, "total_tokens": 96272208} +{"current_steps": 142850, "total_steps": 204665, "loss": 0.0, "lr": 5.061669542392104e-07, "epoch": 3.4898492658735005, "percentage": 69.8, "elapsed_time": "3:04:16", "remaining_time": "1:19:44", "throughput": 8707.69, "total_tokens": 96276048} +{"current_steps": 142855, "total_steps": 204665, "loss": 0.0918, "lr": 5.060928023236069e-07, "epoch": 3.4899714167053477, "percentage": 69.8, "elapsed_time": "3:04:16", "remaining_time": "1:19:44", "throughput": 8707.72, "total_tokens": 96279312} +{"current_steps": 142860, "total_steps": 204665, "loss": 0.0, "lr": 5.060186539998295e-07, "epoch": 3.490093567537195, "percentage": 69.8, "elapsed_time": "3:04:17", "remaining_time": "1:19:43", "throughput": 8707.72, "total_tokens": 96282256} +{"current_steps": 142865, "total_steps": 204665, "loss": 0.0, "lr": 5.059445092684171e-07, "epoch": 3.490215718369042, "percentage": 69.8, "elapsed_time": "3:04:17", "remaining_time": "1:19:43", "throughput": 8707.71, "total_tokens": 96285136} +{"current_steps": 142870, "total_steps": 204665, "loss": 0.0548, "lr": 5.058703681299094e-07, "epoch": 3.4903378692008893, "percentage": 69.81, "elapsed_time": "3:04:17", "remaining_time": "1:19:42", "throughput": 8707.73, "total_tokens": 96288336} +{"current_steps": 142875, "total_steps": 204665, "loss": 0.0002, "lr": 5.057962305848454e-07, "epoch": 3.4904600200327365, "percentage": 69.81, "elapsed_time": "3:04:18", "remaining_time": "1:19:42", "throughput": 8707.73, "total_tokens": 96291280} +{"current_steps": 142880, "total_steps": 204665, "loss": 0.0, "lr": 5.057220966337638e-07, "epoch": 3.4905821708645837, "percentage": 69.81, "elapsed_time": "3:04:18", "remaining_time": "1:19:41", "throughput": 8707.77, "total_tokens": 96294800} +{"current_steps": 142885, "total_steps": 204665, "loss": 0.0008, "lr": 5.056479662772042e-07, "epoch": 3.490704321696431, "percentage": 69.81, "elapsed_time": "3:04:18", "remaining_time": "1:19:41", "throughput": 8707.77, "total_tokens": 96297808} +{"current_steps": 142890, "total_steps": 204665, "loss": 0.0511, "lr": 5.055738395157055e-07, "epoch": 3.490826472528278, "percentage": 69.82, "elapsed_time": "3:04:19", "remaining_time": "1:19:41", "throughput": 8707.8, "total_tokens": 96301136} +{"current_steps": 142895, "total_steps": 204665, "loss": 0.0, "lr": 5.054997163498065e-07, "epoch": 3.4909486233601252, "percentage": 69.82, "elapsed_time": "3:04:19", "remaining_time": "1:19:40", "throughput": 8707.84, "total_tokens": 96304528} +{"current_steps": 142900, "total_steps": 204665, "loss": 0.0, "lr": 5.054255967800471e-07, "epoch": 3.4910707741919724, "percentage": 69.82, "elapsed_time": "3:04:19", "remaining_time": "1:19:40", "throughput": 8707.85, "total_tokens": 96307600} +{"current_steps": 142905, "total_steps": 204665, "loss": 0.0, "lr": 5.053514808069655e-07, "epoch": 3.491192925023819, "percentage": 69.82, "elapsed_time": "3:04:20", "remaining_time": "1:19:39", "throughput": 8707.88, "total_tokens": 96310928} +{"current_steps": 142910, "total_steps": 204665, "loss": 0.0, "lr": 5.052773684311011e-07, "epoch": 3.491315075855667, "percentage": 69.83, "elapsed_time": "3:04:20", "remaining_time": "1:19:39", "throughput": 8707.92, "total_tokens": 96314512} +{"current_steps": 142915, "total_steps": 204665, "loss": 0.0, "lr": 5.052032596529926e-07, "epoch": 3.4914372266875136, "percentage": 69.83, "elapsed_time": "3:04:20", "remaining_time": "1:19:39", "throughput": 8707.97, "total_tokens": 96318096} +{"current_steps": 142920, "total_steps": 204665, "loss": 0.0268, "lr": 5.051291544731794e-07, "epoch": 3.4915593775193607, "percentage": 69.83, "elapsed_time": "3:04:21", "remaining_time": "1:19:38", "throughput": 8707.99, "total_tokens": 96321296} +{"current_steps": 142925, "total_steps": 204665, "loss": 0.0001, "lr": 5.050550528921998e-07, "epoch": 3.491681528351208, "percentage": 69.83, "elapsed_time": "3:04:21", "remaining_time": "1:19:38", "throughput": 8708.01, "total_tokens": 96324496} +{"current_steps": 142930, "total_steps": 204665, "loss": 0.0001, "lr": 5.049809549105928e-07, "epoch": 3.491803679183055, "percentage": 69.84, "elapsed_time": "3:04:21", "remaining_time": "1:19:37", "throughput": 8708.03, "total_tokens": 96327760} +{"current_steps": 142935, "total_steps": 204665, "loss": 0.0002, "lr": 5.049068605288978e-07, "epoch": 3.4919258300149023, "percentage": 69.84, "elapsed_time": "3:04:22", "remaining_time": "1:19:37", "throughput": 8708.03, "total_tokens": 96330640} +{"current_steps": 142940, "total_steps": 204665, "loss": 0.0, "lr": 5.04832769747653e-07, "epoch": 3.4920479808467495, "percentage": 69.84, "elapsed_time": "3:04:22", "remaining_time": "1:19:37", "throughput": 8708.03, "total_tokens": 96333584} +{"current_steps": 142945, "total_steps": 204665, "loss": 0.0016, "lr": 5.047586825673978e-07, "epoch": 3.4921701316785967, "percentage": 69.84, "elapsed_time": "3:04:22", "remaining_time": "1:19:36", "throughput": 8708.05, "total_tokens": 96336848} +{"current_steps": 142950, "total_steps": 204665, "loss": 0.0, "lr": 5.046845989886703e-07, "epoch": 3.492292282510444, "percentage": 69.85, "elapsed_time": "3:04:23", "remaining_time": "1:19:36", "throughput": 8708.1, "total_tokens": 96340496} +{"current_steps": 142955, "total_steps": 204665, "loss": 0.0, "lr": 5.0461051901201e-07, "epoch": 3.492414433342291, "percentage": 69.85, "elapsed_time": "3:04:23", "remaining_time": "1:19:35", "throughput": 8708.11, "total_tokens": 96343568} +{"current_steps": 142960, "total_steps": 204665, "loss": 0.0, "lr": 5.04536442637955e-07, "epoch": 3.4925365841741383, "percentage": 69.85, "elapsed_time": "3:04:24", "remaining_time": "1:19:35", "throughput": 8708.16, "total_tokens": 96347216} +{"current_steps": 142965, "total_steps": 204665, "loss": 0.0489, "lr": 5.044623698670441e-07, "epoch": 3.4926587350059854, "percentage": 69.85, "elapsed_time": "3:04:24", "remaining_time": "1:19:35", "throughput": 8708.15, "total_tokens": 96349968} +{"current_steps": 142970, "total_steps": 204665, "loss": 0.0, "lr": 5.043883006998166e-07, "epoch": 3.4927808858378326, "percentage": 69.86, "elapsed_time": "3:04:24", "remaining_time": "1:19:34", "throughput": 8708.21, "total_tokens": 96353744} +{"current_steps": 142975, "total_steps": 204665, "loss": 0.0609, "lr": 5.043142351368106e-07, "epoch": 3.49290303666968, "percentage": 69.86, "elapsed_time": "3:04:25", "remaining_time": "1:19:34", "throughput": 8708.22, "total_tokens": 96356816} +{"current_steps": 142980, "total_steps": 204665, "loss": 0.0, "lr": 5.042401731785645e-07, "epoch": 3.493025187501527, "percentage": 69.86, "elapsed_time": "3:04:25", "remaining_time": "1:19:33", "throughput": 8708.27, "total_tokens": 96360400} +{"current_steps": 142985, "total_steps": 204665, "loss": 0.0001, "lr": 5.041661148256175e-07, "epoch": 3.493147338333374, "percentage": 69.86, "elapsed_time": "3:04:25", "remaining_time": "1:19:33", "throughput": 8708.29, "total_tokens": 96363600} +{"current_steps": 142990, "total_steps": 204665, "loss": 0.0, "lr": 5.040920600785075e-07, "epoch": 3.4932694891652214, "percentage": 69.87, "elapsed_time": "3:04:26", "remaining_time": "1:19:33", "throughput": 8708.33, "total_tokens": 96367120} +{"current_steps": 142995, "total_steps": 204665, "loss": 0.0006, "lr": 5.04018008937774e-07, "epoch": 3.4933916399970686, "percentage": 69.87, "elapsed_time": "3:04:26", "remaining_time": "1:19:32", "throughput": 8708.35, "total_tokens": 96370256} +{"current_steps": 143000, "total_steps": 204665, "loss": 0.0, "lr": 5.039439614039543e-07, "epoch": 3.4935137908289153, "percentage": 69.87, "elapsed_time": "3:04:26", "remaining_time": "1:19:32", "throughput": 8708.38, "total_tokens": 96373648} +{"current_steps": 143005, "total_steps": 204665, "loss": 0.0, "lr": 5.03869917477588e-07, "epoch": 3.4936359416607625, "percentage": 69.87, "elapsed_time": "3:04:27", "remaining_time": "1:19:31", "throughput": 8708.43, "total_tokens": 96377232} +{"current_steps": 143010, "total_steps": 204665, "loss": 0.0, "lr": 5.037958771592128e-07, "epoch": 3.4937580924926097, "percentage": 69.88, "elapsed_time": "3:04:27", "remaining_time": "1:19:31", "throughput": 8708.44, "total_tokens": 96380368} +{"current_steps": 143015, "total_steps": 204665, "loss": 0.0001, "lr": 5.037218404493677e-07, "epoch": 3.493880243324457, "percentage": 69.88, "elapsed_time": "3:04:27", "remaining_time": "1:19:31", "throughput": 8708.5, "total_tokens": 96384080} +{"current_steps": 143020, "total_steps": 204665, "loss": 0.0, "lr": 5.036478073485906e-07, "epoch": 3.494002394156304, "percentage": 69.88, "elapsed_time": "3:04:28", "remaining_time": "1:19:30", "throughput": 8708.55, "total_tokens": 96387728} +{"current_steps": 143025, "total_steps": 204665, "loss": 0.0, "lr": 5.035737778574202e-07, "epoch": 3.4941245449881513, "percentage": 69.88, "elapsed_time": "3:04:28", "remaining_time": "1:19:30", "throughput": 8708.59, "total_tokens": 96391184} +{"current_steps": 143030, "total_steps": 204665, "loss": 0.0355, "lr": 5.034997519763951e-07, "epoch": 3.4942466958199985, "percentage": 69.88, "elapsed_time": "3:04:28", "remaining_time": "1:19:29", "throughput": 8708.62, "total_tokens": 96394448} +{"current_steps": 143035, "total_steps": 204665, "loss": 0.0, "lr": 5.034257297060529e-07, "epoch": 3.4943688466518457, "percentage": 69.89, "elapsed_time": "3:04:29", "remaining_time": "1:19:29", "throughput": 8708.66, "total_tokens": 96398032} +{"current_steps": 143040, "total_steps": 204665, "loss": 0.001, "lr": 5.033517110469327e-07, "epoch": 3.494490997483693, "percentage": 69.89, "elapsed_time": "3:04:29", "remaining_time": "1:19:29", "throughput": 8708.68, "total_tokens": 96401168} +{"current_steps": 143045, "total_steps": 204665, "loss": 0.0002, "lr": 5.032776959995721e-07, "epoch": 3.49461314831554, "percentage": 69.89, "elapsed_time": "3:04:29", "remaining_time": "1:19:28", "throughput": 8708.72, "total_tokens": 96404688} +{"current_steps": 143050, "total_steps": 204665, "loss": 0.0, "lr": 5.032036845645099e-07, "epoch": 3.4947352991473872, "percentage": 69.89, "elapsed_time": "3:04:30", "remaining_time": "1:19:28", "throughput": 8708.73, "total_tokens": 96407824} +{"current_steps": 143055, "total_steps": 204665, "loss": 0.0842, "lr": 5.031296767422844e-07, "epoch": 3.4948574499792344, "percentage": 69.9, "elapsed_time": "3:04:30", "remaining_time": "1:19:27", "throughput": 8708.75, "total_tokens": 96411088} +{"current_steps": 143060, "total_steps": 204665, "loss": 0.0, "lr": 5.030556725334331e-07, "epoch": 3.4949796008110816, "percentage": 69.9, "elapsed_time": "3:04:30", "remaining_time": "1:19:27", "throughput": 8708.8, "total_tokens": 96414672} +{"current_steps": 143065, "total_steps": 204665, "loss": 0.0, "lr": 5.029816719384949e-07, "epoch": 3.495101751642929, "percentage": 69.9, "elapsed_time": "3:04:31", "remaining_time": "1:19:27", "throughput": 8708.81, "total_tokens": 96417808} +{"current_steps": 143070, "total_steps": 204665, "loss": 0.0001, "lr": 5.029076749580075e-07, "epoch": 3.495223902474776, "percentage": 69.9, "elapsed_time": "3:04:31", "remaining_time": "1:19:26", "throughput": 8708.85, "total_tokens": 96421264} +{"current_steps": 143075, "total_steps": 204665, "loss": 0.0, "lr": 5.028336815925094e-07, "epoch": 3.495346053306623, "percentage": 69.91, "elapsed_time": "3:04:31", "remaining_time": "1:19:26", "throughput": 8708.9, "total_tokens": 96424848} +{"current_steps": 143080, "total_steps": 204665, "loss": 0.0172, "lr": 5.027596918425386e-07, "epoch": 3.4954682041384704, "percentage": 69.91, "elapsed_time": "3:04:32", "remaining_time": "1:19:25", "throughput": 8708.99, "total_tokens": 96429072} +{"current_steps": 143085, "total_steps": 204665, "loss": 0.0, "lr": 5.026857057086325e-07, "epoch": 3.495590354970317, "percentage": 69.91, "elapsed_time": "3:04:32", "remaining_time": "1:19:25", "throughput": 8709.02, "total_tokens": 96432400} +{"current_steps": 143090, "total_steps": 204665, "loss": 0.0, "lr": 5.026117231913303e-07, "epoch": 3.4957125058021647, "percentage": 69.91, "elapsed_time": "3:04:33", "remaining_time": "1:19:24", "throughput": 8709.04, "total_tokens": 96435600} +{"current_steps": 143095, "total_steps": 204665, "loss": 0.0, "lr": 5.025377442911689e-07, "epoch": 3.4958346566340115, "percentage": 69.92, "elapsed_time": "3:04:33", "remaining_time": "1:19:24", "throughput": 8709.07, "total_tokens": 96439056} +{"current_steps": 143100, "total_steps": 204665, "loss": 0.1003, "lr": 5.024637690086873e-07, "epoch": 3.4959568074658587, "percentage": 69.92, "elapsed_time": "3:04:33", "remaining_time": "1:19:24", "throughput": 8709.09, "total_tokens": 96442192} +{"current_steps": 143105, "total_steps": 204665, "loss": 0.0, "lr": 5.023897973444226e-07, "epoch": 3.496078958297706, "percentage": 69.92, "elapsed_time": "3:04:34", "remaining_time": "1:19:23", "throughput": 8709.13, "total_tokens": 96445776} +{"current_steps": 143110, "total_steps": 204665, "loss": 0.0, "lr": 5.023158292989135e-07, "epoch": 3.496201109129553, "percentage": 69.92, "elapsed_time": "3:04:34", "remaining_time": "1:19:23", "throughput": 8709.16, "total_tokens": 96449104} +{"current_steps": 143115, "total_steps": 204665, "loss": 0.0001, "lr": 5.022418648726972e-07, "epoch": 3.4963232599614003, "percentage": 69.93, "elapsed_time": "3:04:34", "remaining_time": "1:19:22", "throughput": 8709.15, "total_tokens": 96451920} +{"current_steps": 143120, "total_steps": 204665, "loss": 0.0002, "lr": 5.021679040663118e-07, "epoch": 3.4964454107932474, "percentage": 69.93, "elapsed_time": "3:04:35", "remaining_time": "1:19:22", "throughput": 8709.15, "total_tokens": 96454864} +{"current_steps": 143125, "total_steps": 204665, "loss": 0.0, "lr": 5.020939468802958e-07, "epoch": 3.4965675616250946, "percentage": 69.93, "elapsed_time": "3:04:35", "remaining_time": "1:19:22", "throughput": 8709.22, "total_tokens": 96458768} +{"current_steps": 143130, "total_steps": 204665, "loss": 0.0, "lr": 5.020199933151862e-07, "epoch": 3.496689712456942, "percentage": 69.93, "elapsed_time": "3:04:35", "remaining_time": "1:19:21", "throughput": 8709.26, "total_tokens": 96462160} +{"current_steps": 143135, "total_steps": 204665, "loss": 0.0, "lr": 5.019460433715214e-07, "epoch": 3.496811863288789, "percentage": 69.94, "elapsed_time": "3:04:36", "remaining_time": "1:19:21", "throughput": 8709.27, "total_tokens": 96465296} +{"current_steps": 143140, "total_steps": 204665, "loss": 0.0, "lr": 5.018720970498387e-07, "epoch": 3.496934014120636, "percentage": 69.94, "elapsed_time": "3:04:36", "remaining_time": "1:19:20", "throughput": 8709.32, "total_tokens": 96468880} +{"current_steps": 143145, "total_steps": 204665, "loss": 0.0269, "lr": 5.01798154350676e-07, "epoch": 3.4970561649524834, "percentage": 69.94, "elapsed_time": "3:04:36", "remaining_time": "1:19:20", "throughput": 8709.34, "total_tokens": 96472208} +{"current_steps": 143150, "total_steps": 204665, "loss": 0.0, "lr": 5.017242152745715e-07, "epoch": 3.4971783157843306, "percentage": 69.94, "elapsed_time": "3:04:37", "remaining_time": "1:19:20", "throughput": 8709.39, "total_tokens": 96475728} +{"current_steps": 143155, "total_steps": 204665, "loss": 0.0, "lr": 5.016502798220622e-07, "epoch": 3.4973004666161778, "percentage": 69.95, "elapsed_time": "3:04:37", "remaining_time": "1:19:19", "throughput": 8709.42, "total_tokens": 96479184} +{"current_steps": 143160, "total_steps": 204665, "loss": 0.0001, "lr": 5.015763479936865e-07, "epoch": 3.497422617448025, "percentage": 69.95, "elapsed_time": "3:04:37", "remaining_time": "1:19:19", "throughput": 8709.44, "total_tokens": 96482320} +{"current_steps": 143165, "total_steps": 204665, "loss": 0.0, "lr": 5.015024197899812e-07, "epoch": 3.497544768279872, "percentage": 69.95, "elapsed_time": "3:04:38", "remaining_time": "1:19:18", "throughput": 8709.44, "total_tokens": 96485264} +{"current_steps": 143170, "total_steps": 204665, "loss": 0.0001, "lr": 5.014284952114848e-07, "epoch": 3.4976669191117193, "percentage": 69.95, "elapsed_time": "3:04:38", "remaining_time": "1:19:18", "throughput": 8709.48, "total_tokens": 96488848} +{"current_steps": 143175, "total_steps": 204665, "loss": 0.0, "lr": 5.013545742587341e-07, "epoch": 3.4977890699435665, "percentage": 69.96, "elapsed_time": "3:04:38", "remaining_time": "1:19:18", "throughput": 8709.53, "total_tokens": 96492496} +{"current_steps": 143180, "total_steps": 204665, "loss": 0.0344, "lr": 5.012806569322674e-07, "epoch": 3.4979112207754133, "percentage": 69.96, "elapsed_time": "3:04:39", "remaining_time": "1:19:17", "throughput": 8709.59, "total_tokens": 96496208} +{"current_steps": 143185, "total_steps": 204665, "loss": 0.0642, "lr": 5.012067432326219e-07, "epoch": 3.4980333716072605, "percentage": 69.96, "elapsed_time": "3:04:39", "remaining_time": "1:19:17", "throughput": 8709.61, "total_tokens": 96499408} +{"current_steps": 143190, "total_steps": 204665, "loss": 0.0001, "lr": 5.011328331603348e-07, "epoch": 3.4981555224391077, "percentage": 69.96, "elapsed_time": "3:04:39", "remaining_time": "1:19:16", "throughput": 8709.63, "total_tokens": 96502672} +{"current_steps": 143195, "total_steps": 204665, "loss": 0.0, "lr": 5.010589267159443e-07, "epoch": 3.498277673270955, "percentage": 69.97, "elapsed_time": "3:04:40", "remaining_time": "1:19:16", "throughput": 8709.66, "total_tokens": 96506000} +{"current_steps": 143200, "total_steps": 204665, "loss": 0.0, "lr": 5.00985023899987e-07, "epoch": 3.498399824102802, "percentage": 69.97, "elapsed_time": "3:04:40", "remaining_time": "1:19:16", "throughput": 8709.7, "total_tokens": 96509584} +{"current_steps": 143205, "total_steps": 204665, "loss": 0.0, "lr": 5.00911124713001e-07, "epoch": 3.4985219749346492, "percentage": 69.97, "elapsed_time": "3:04:41", "remaining_time": "1:19:15", "throughput": 8709.71, "total_tokens": 96512592} +{"current_steps": 143210, "total_steps": 204665, "loss": 0.0, "lr": 5.008372291555238e-07, "epoch": 3.4986441257664964, "percentage": 69.97, "elapsed_time": "3:04:41", "remaining_time": "1:19:15", "throughput": 8709.76, "total_tokens": 96516176} +{"current_steps": 143215, "total_steps": 204665, "loss": 0.0, "lr": 5.007633372280921e-07, "epoch": 3.4987662765983436, "percentage": 69.98, "elapsed_time": "3:04:41", "remaining_time": "1:19:14", "throughput": 8709.78, "total_tokens": 96519440} +{"current_steps": 143220, "total_steps": 204665, "loss": 0.0, "lr": 5.006894489312442e-07, "epoch": 3.498888427430191, "percentage": 69.98, "elapsed_time": "3:04:42", "remaining_time": "1:19:14", "throughput": 8709.79, "total_tokens": 96522512} +{"current_steps": 143225, "total_steps": 204665, "loss": 0.0, "lr": 5.006155642655165e-07, "epoch": 3.499010578262038, "percentage": 69.98, "elapsed_time": "3:04:42", "remaining_time": "1:19:14", "throughput": 8709.82, "total_tokens": 96525840} +{"current_steps": 143230, "total_steps": 204665, "loss": 0.0, "lr": 5.005416832314471e-07, "epoch": 3.499132729093885, "percentage": 69.98, "elapsed_time": "3:04:42", "remaining_time": "1:19:13", "throughput": 8709.87, "total_tokens": 96529424} +{"current_steps": 143235, "total_steps": 204665, "loss": 0.0, "lr": 5.004678058295726e-07, "epoch": 3.4992548799257324, "percentage": 69.99, "elapsed_time": "3:04:43", "remaining_time": "1:19:13", "throughput": 8709.88, "total_tokens": 96532560} +{"current_steps": 143240, "total_steps": 204665, "loss": 0.0, "lr": 5.003939320604304e-07, "epoch": 3.4993770307575796, "percentage": 69.99, "elapsed_time": "3:04:43", "remaining_time": "1:19:12", "throughput": 8709.9, "total_tokens": 96535760} +{"current_steps": 143245, "total_steps": 204665, "loss": 0.0, "lr": 5.003200619245584e-07, "epoch": 3.4994991815894267, "percentage": 69.99, "elapsed_time": "3:04:43", "remaining_time": "1:19:12", "throughput": 8709.94, "total_tokens": 96539152} +{"current_steps": 143250, "total_steps": 204665, "loss": 0.0, "lr": 5.00246195422493e-07, "epoch": 3.499621332421274, "percentage": 69.99, "elapsed_time": "3:04:44", "remaining_time": "1:19:12", "throughput": 8709.98, "total_tokens": 96542672} +{"current_steps": 143255, "total_steps": 204665, "loss": 0.0421, "lr": 5.00172332554772e-07, "epoch": 3.499743483253121, "percentage": 69.99, "elapsed_time": "3:04:44", "remaining_time": "1:19:11", "throughput": 8710.02, "total_tokens": 96546128} +{"current_steps": 143260, "total_steps": 204665, "loss": 0.0001, "lr": 5.000984733219318e-07, "epoch": 3.4998656340849683, "percentage": 70.0, "elapsed_time": "3:04:44", "remaining_time": "1:19:11", "throughput": 8710.07, "total_tokens": 96549840} +{"current_steps": 143265, "total_steps": 204665, "loss": 0.0, "lr": 5.000246177245104e-07, "epoch": 3.499987784916815, "percentage": 70.0, "elapsed_time": "3:04:45", "remaining_time": "1:19:10", "throughput": 8710.09, "total_tokens": 96553104} +{"current_steps": 143270, "total_steps": 204665, "loss": 0.1349, "lr": 4.999507657630441e-07, "epoch": 3.5001099357486627, "percentage": 70.0, "elapsed_time": "3:04:45", "remaining_time": "1:19:10", "throughput": 8710.16, "total_tokens": 96557008} +{"current_steps": 143275, "total_steps": 204665, "loss": 0.0332, "lr": 4.998769174380703e-07, "epoch": 3.5002320865805094, "percentage": 70.0, "elapsed_time": "3:04:45", "remaining_time": "1:19:10", "throughput": 8710.17, "total_tokens": 96560080} +{"current_steps": 143276, "total_steps": 204665, "eval_loss": 0.24939614534378052, "epoch": 3.5002565167468793, "percentage": 70.01, "elapsed_time": "3:05:33", "remaining_time": "1:19:30", "throughput": 8672.7, "total_tokens": 96560720} +{"current_steps": 143280, "total_steps": 204665, "loss": 0.0, "lr": 4.998030727501263e-07, "epoch": 3.5003542374123566, "percentage": 70.01, "elapsed_time": "3:06:09", "remaining_time": "1:19:45", "throughput": 8644.91, "total_tokens": 96563536} +{"current_steps": 143285, "total_steps": 204665, "loss": 0.0563, "lr": 4.997292316997492e-07, "epoch": 3.500476388244204, "percentage": 70.01, "elapsed_time": "3:06:10", "remaining_time": "1:19:45", "throughput": 8644.93, "total_tokens": 96566736} +{"current_steps": 143290, "total_steps": 204665, "loss": 0.0, "lr": 4.996553942874751e-07, "epoch": 3.500598539076051, "percentage": 70.01, "elapsed_time": "3:06:10", "remaining_time": "1:19:44", "throughput": 8644.96, "total_tokens": 96570192} +{"current_steps": 143295, "total_steps": 204665, "loss": 0.0762, "lr": 4.995815605138419e-07, "epoch": 3.500720689907898, "percentage": 70.01, "elapsed_time": "3:06:11", "remaining_time": "1:19:44", "throughput": 8644.98, "total_tokens": 96573392} +{"current_steps": 143300, "total_steps": 204665, "loss": 0.0, "lr": 4.995077303793859e-07, "epoch": 3.5008428407397454, "percentage": 70.02, "elapsed_time": "3:06:11", "remaining_time": "1:19:43", "throughput": 8645.02, "total_tokens": 96576848} +{"current_steps": 143305, "total_steps": 204665, "loss": 0.0, "lr": 4.994339038846447e-07, "epoch": 3.5009649915715926, "percentage": 70.02, "elapsed_time": "3:06:11", "remaining_time": "1:19:43", "throughput": 8645.05, "total_tokens": 96580112} +{"current_steps": 143310, "total_steps": 204665, "loss": 0.0, "lr": 4.993600810301543e-07, "epoch": 3.5010871424034398, "percentage": 70.02, "elapsed_time": "3:06:12", "remaining_time": "1:19:43", "throughput": 8645.09, "total_tokens": 96583568} +{"current_steps": 143315, "total_steps": 204665, "loss": 0.0, "lr": 4.992862618164525e-07, "epoch": 3.501209293235287, "percentage": 70.02, "elapsed_time": "3:06:12", "remaining_time": "1:19:42", "throughput": 8645.12, "total_tokens": 96586960} +{"current_steps": 143320, "total_steps": 204665, "loss": 0.0016, "lr": 4.992124462440754e-07, "epoch": 3.501331444067134, "percentage": 70.03, "elapsed_time": "3:06:12", "remaining_time": "1:19:42", "throughput": 8645.15, "total_tokens": 96590224} +{"current_steps": 143325, "total_steps": 204665, "loss": 0.0, "lr": 4.991386343135602e-07, "epoch": 3.5014535948989813, "percentage": 70.03, "elapsed_time": "3:06:13", "remaining_time": "1:19:41", "throughput": 8645.18, "total_tokens": 96593680} +{"current_steps": 143330, "total_steps": 204665, "loss": 0.0, "lr": 4.990648260254434e-07, "epoch": 3.5015757457308285, "percentage": 70.03, "elapsed_time": "3:06:13", "remaining_time": "1:19:41", "throughput": 8645.19, "total_tokens": 96596688} +{"current_steps": 143335, "total_steps": 204665, "loss": 0.0, "lr": 4.989910213802618e-07, "epoch": 3.5016978965626757, "percentage": 70.03, "elapsed_time": "3:06:13", "remaining_time": "1:19:41", "throughput": 8645.21, "total_tokens": 96599824} +{"current_steps": 143340, "total_steps": 204665, "loss": 0.0526, "lr": 4.989172203785528e-07, "epoch": 3.501820047394523, "percentage": 70.04, "elapsed_time": "3:06:14", "remaining_time": "1:19:40", "throughput": 8645.24, "total_tokens": 96603216} +{"current_steps": 143345, "total_steps": 204665, "loss": 0.0, "lr": 4.98843423020852e-07, "epoch": 3.50194219822637, "percentage": 70.04, "elapsed_time": "3:06:14", "remaining_time": "1:19:40", "throughput": 8645.32, "total_tokens": 96607248} +{"current_steps": 143350, "total_steps": 204665, "loss": 0.0002, "lr": 4.98769629307697e-07, "epoch": 3.502064349058217, "percentage": 70.04, "elapsed_time": "3:06:14", "remaining_time": "1:19:39", "throughput": 8645.36, "total_tokens": 96610704} +{"current_steps": 143355, "total_steps": 204665, "loss": 0.0377, "lr": 4.986958392396239e-07, "epoch": 3.5021864998900645, "percentage": 70.04, "elapsed_time": "3:06:15", "remaining_time": "1:19:39", "throughput": 8645.38, "total_tokens": 96613904} +{"current_steps": 143360, "total_steps": 204665, "loss": 0.0, "lr": 4.986220528171692e-07, "epoch": 3.502308650721911, "percentage": 70.05, "elapsed_time": "3:06:15", "remaining_time": "1:19:39", "throughput": 8645.41, "total_tokens": 96617360} +{"current_steps": 143365, "total_steps": 204665, "loss": 0.0, "lr": 4.985482700408704e-07, "epoch": 3.502430801553759, "percentage": 70.05, "elapsed_time": "3:06:15", "remaining_time": "1:19:38", "throughput": 8645.47, "total_tokens": 96621008} +{"current_steps": 143370, "total_steps": 204665, "loss": 0.0, "lr": 4.98474490911263e-07, "epoch": 3.5025529523856056, "percentage": 70.05, "elapsed_time": "3:06:16", "remaining_time": "1:19:38", "throughput": 8645.49, "total_tokens": 96624208} +{"current_steps": 143375, "total_steps": 204665, "loss": 0.0, "lr": 4.984007154288843e-07, "epoch": 3.502675103217453, "percentage": 70.05, "elapsed_time": "3:06:16", "remaining_time": "1:19:37", "throughput": 8645.54, "total_tokens": 96627920} +{"current_steps": 143380, "total_steps": 204665, "loss": 0.0, "lr": 4.983269435942702e-07, "epoch": 3.5027972540493, "percentage": 70.06, "elapsed_time": "3:06:16", "remaining_time": "1:19:37", "throughput": 8645.56, "total_tokens": 96631120} +{"current_steps": 143385, "total_steps": 204665, "loss": 0.0626, "lr": 4.98253175407958e-07, "epoch": 3.502919404881147, "percentage": 70.06, "elapsed_time": "3:06:17", "remaining_time": "1:19:36", "throughput": 8645.61, "total_tokens": 96634704} +{"current_steps": 143390, "total_steps": 204665, "loss": 0.087, "lr": 4.981794108704834e-07, "epoch": 3.5030415557129944, "percentage": 70.06, "elapsed_time": "3:06:17", "remaining_time": "1:19:36", "throughput": 8645.63, "total_tokens": 96637968} +{"current_steps": 143395, "total_steps": 204665, "loss": 0.0001, "lr": 4.981056499823829e-07, "epoch": 3.5031637065448415, "percentage": 70.06, "elapsed_time": "3:06:18", "remaining_time": "1:19:36", "throughput": 8645.68, "total_tokens": 96641552} +{"current_steps": 143400, "total_steps": 204665, "loss": 0.0491, "lr": 4.980318927441934e-07, "epoch": 3.5032858573766887, "percentage": 70.07, "elapsed_time": "3:06:18", "remaining_time": "1:19:35", "throughput": 8645.72, "total_tokens": 96645008} +{"current_steps": 143405, "total_steps": 204665, "loss": 0.0, "lr": 4.979581391564507e-07, "epoch": 3.503408008208536, "percentage": 70.07, "elapsed_time": "3:06:18", "remaining_time": "1:19:35", "throughput": 8645.75, "total_tokens": 96648400} +{"current_steps": 143410, "total_steps": 204665, "loss": 0.0, "lr": 4.978843892196918e-07, "epoch": 3.503530159040383, "percentage": 70.07, "elapsed_time": "3:06:19", "remaining_time": "1:19:34", "throughput": 8645.79, "total_tokens": 96651792} +{"current_steps": 143415, "total_steps": 204665, "loss": 0.0548, "lr": 4.978106429344523e-07, "epoch": 3.5036523098722303, "percentage": 70.07, "elapsed_time": "3:06:19", "remaining_time": "1:19:34", "throughput": 8645.83, "total_tokens": 96655248} +{"current_steps": 143420, "total_steps": 204665, "loss": 0.0, "lr": 4.977369003012691e-07, "epoch": 3.5037744607040775, "percentage": 70.08, "elapsed_time": "3:06:19", "remaining_time": "1:19:34", "throughput": 8645.83, "total_tokens": 96658192} +{"current_steps": 143425, "total_steps": 204665, "loss": 0.0, "lr": 4.976631613206781e-07, "epoch": 3.5038966115359247, "percentage": 70.08, "elapsed_time": "3:06:20", "remaining_time": "1:19:33", "throughput": 8645.85, "total_tokens": 96661456} +{"current_steps": 143430, "total_steps": 204665, "loss": 0.0, "lr": 4.975894259932156e-07, "epoch": 3.504018762367772, "percentage": 70.08, "elapsed_time": "3:06:20", "remaining_time": "1:19:33", "throughput": 8645.89, "total_tokens": 96664912} +{"current_steps": 143435, "total_steps": 204665, "loss": 0.0001, "lr": 4.975156943194183e-07, "epoch": 3.5041409131996186, "percentage": 70.08, "elapsed_time": "3:06:20", "remaining_time": "1:19:32", "throughput": 8645.89, "total_tokens": 96667856} +{"current_steps": 143440, "total_steps": 204665, "loss": 0.0, "lr": 4.974419662998216e-07, "epoch": 3.5042630640314663, "percentage": 70.09, "elapsed_time": "3:06:21", "remaining_time": "1:19:32", "throughput": 8645.93, "total_tokens": 96671312} +{"current_steps": 143445, "total_steps": 204665, "loss": 0.0001, "lr": 4.973682419349625e-07, "epoch": 3.504385214863313, "percentage": 70.09, "elapsed_time": "3:06:21", "remaining_time": "1:19:32", "throughput": 8645.97, "total_tokens": 96674832} +{"current_steps": 143450, "total_steps": 204665, "loss": 0.031, "lr": 4.972945212253764e-07, "epoch": 3.5045073656951606, "percentage": 70.09, "elapsed_time": "3:06:21", "remaining_time": "1:19:31", "throughput": 8645.99, "total_tokens": 96678032} +{"current_steps": 143455, "total_steps": 204665, "loss": 0.0, "lr": 4.972208041715997e-07, "epoch": 3.5046295165270074, "percentage": 70.09, "elapsed_time": "3:06:22", "remaining_time": "1:19:31", "throughput": 8645.99, "total_tokens": 96680976} +{"current_steps": 143460, "total_steps": 204665, "loss": 0.0, "lr": 4.971470907741691e-07, "epoch": 3.5047516673588546, "percentage": 70.1, "elapsed_time": "3:06:22", "remaining_time": "1:19:30", "throughput": 8646.04, "total_tokens": 96684560} +{"current_steps": 143465, "total_steps": 204665, "loss": 0.0, "lr": 4.970733810336196e-07, "epoch": 3.5048738181907018, "percentage": 70.1, "elapsed_time": "3:06:22", "remaining_time": "1:19:30", "throughput": 8646.07, "total_tokens": 96687888} +{"current_steps": 143470, "total_steps": 204665, "loss": 0.0, "lr": 4.96999674950488e-07, "epoch": 3.504995969022549, "percentage": 70.1, "elapsed_time": "3:06:23", "remaining_time": "1:19:30", "throughput": 8646.11, "total_tokens": 96691408} +{"current_steps": 143475, "total_steps": 204665, "loss": 0.0, "lr": 4.969259725253098e-07, "epoch": 3.505118119854396, "percentage": 70.1, "elapsed_time": "3:06:23", "remaining_time": "1:19:29", "throughput": 8646.14, "total_tokens": 96694736} +{"current_steps": 143480, "total_steps": 204665, "loss": 0.0, "lr": 4.968522737586216e-07, "epoch": 3.5052402706862433, "percentage": 70.1, "elapsed_time": "3:06:23", "remaining_time": "1:19:29", "throughput": 8646.18, "total_tokens": 96698192} +{"current_steps": 143485, "total_steps": 204665, "loss": 0.0001, "lr": 4.967785786509586e-07, "epoch": 3.5053624215180905, "percentage": 70.11, "elapsed_time": "3:06:24", "remaining_time": "1:19:28", "throughput": 8646.24, "total_tokens": 96701904} +{"current_steps": 143490, "total_steps": 204665, "loss": 0.0, "lr": 4.967048872028575e-07, "epoch": 3.5054845723499377, "percentage": 70.11, "elapsed_time": "3:06:24", "remaining_time": "1:19:28", "throughput": 8646.26, "total_tokens": 96705168} +{"current_steps": 143495, "total_steps": 204665, "loss": 0.0, "lr": 4.966311994148539e-07, "epoch": 3.505606723181785, "percentage": 70.11, "elapsed_time": "3:06:24", "remaining_time": "1:19:28", "throughput": 8646.3, "total_tokens": 96708560} +{"current_steps": 143500, "total_steps": 204665, "loss": 0.0001, "lr": 4.965575152874833e-07, "epoch": 3.505728874013632, "percentage": 70.11, "elapsed_time": "3:06:25", "remaining_time": "1:19:27", "throughput": 8646.31, "total_tokens": 96711632} +{"current_steps": 143505, "total_steps": 204665, "loss": 0.0307, "lr": 4.96483834821282e-07, "epoch": 3.5058510248454793, "percentage": 70.12, "elapsed_time": "3:06:25", "remaining_time": "1:19:27", "throughput": 8646.36, "total_tokens": 96715280} +{"current_steps": 143510, "total_steps": 204665, "loss": 0.0, "lr": 4.964101580167855e-07, "epoch": 3.5059731756773265, "percentage": 70.12, "elapsed_time": "3:06:26", "remaining_time": "1:19:26", "throughput": 8646.37, "total_tokens": 96718352} +{"current_steps": 143515, "total_steps": 204665, "loss": 0.0001, "lr": 4.963364848745301e-07, "epoch": 3.5060953265091737, "percentage": 70.12, "elapsed_time": "3:06:26", "remaining_time": "1:19:26", "throughput": 8646.41, "total_tokens": 96721744} +{"current_steps": 143520, "total_steps": 204665, "loss": 0.092, "lr": 4.962628153950508e-07, "epoch": 3.506217477341021, "percentage": 70.12, "elapsed_time": "3:06:26", "remaining_time": "1:19:25", "throughput": 8646.42, "total_tokens": 96724816} +{"current_steps": 143525, "total_steps": 204665, "loss": 0.0, "lr": 4.961891495788838e-07, "epoch": 3.506339628172868, "percentage": 70.13, "elapsed_time": "3:06:27", "remaining_time": "1:19:25", "throughput": 8646.46, "total_tokens": 96728272} +{"current_steps": 143530, "total_steps": 204665, "loss": 0.0, "lr": 4.961154874265653e-07, "epoch": 3.506461779004715, "percentage": 70.13, "elapsed_time": "3:06:27", "remaining_time": "1:19:25", "throughput": 8646.49, "total_tokens": 96731600} +{"current_steps": 143535, "total_steps": 204665, "loss": 0.0, "lr": 4.9604182893863e-07, "epoch": 3.5065839298365624, "percentage": 70.13, "elapsed_time": "3:06:27", "remaining_time": "1:19:24", "throughput": 8646.54, "total_tokens": 96735312} +{"current_steps": 143540, "total_steps": 204665, "loss": 0.0671, "lr": 4.959681741156146e-07, "epoch": 3.506706080668409, "percentage": 70.13, "elapsed_time": "3:06:28", "remaining_time": "1:19:24", "throughput": 8646.54, "total_tokens": 96738128} +{"current_steps": 143545, "total_steps": 204665, "loss": 0.0001, "lr": 4.958945229580537e-07, "epoch": 3.5068282315002564, "percentage": 70.14, "elapsed_time": "3:06:28", "remaining_time": "1:19:23", "throughput": 8646.57, "total_tokens": 96741520} +{"current_steps": 143550, "total_steps": 204665, "loss": 0.0001, "lr": 4.958208754664834e-07, "epoch": 3.5069503823321035, "percentage": 70.14, "elapsed_time": "3:06:28", "remaining_time": "1:19:23", "throughput": 8646.6, "total_tokens": 96744784} +{"current_steps": 143555, "total_steps": 204665, "loss": 0.0341, "lr": 4.957472316414398e-07, "epoch": 3.5070725331639507, "percentage": 70.14, "elapsed_time": "3:06:29", "remaining_time": "1:19:23", "throughput": 8646.65, "total_tokens": 96748560} +{"current_steps": 143560, "total_steps": 204665, "loss": 0.0359, "lr": 4.956735914834576e-07, "epoch": 3.507194683995798, "percentage": 70.14, "elapsed_time": "3:06:29", "remaining_time": "1:19:22", "throughput": 8646.7, "total_tokens": 96752144} +{"current_steps": 143565, "total_steps": 204665, "loss": 0.0001, "lr": 4.95599954993073e-07, "epoch": 3.507316834827645, "percentage": 70.15, "elapsed_time": "3:06:29", "remaining_time": "1:19:22", "throughput": 8646.73, "total_tokens": 96755536} +{"current_steps": 143570, "total_steps": 204665, "loss": 0.0002, "lr": 4.955263221708206e-07, "epoch": 3.5074389856594923, "percentage": 70.15, "elapsed_time": "3:06:30", "remaining_time": "1:19:21", "throughput": 8646.76, "total_tokens": 96758864} +{"current_steps": 143575, "total_steps": 204665, "loss": 0.0001, "lr": 4.954526930172371e-07, "epoch": 3.5075611364913395, "percentage": 70.15, "elapsed_time": "3:06:30", "remaining_time": "1:19:21", "throughput": 8646.82, "total_tokens": 96762640} +{"current_steps": 143580, "total_steps": 204665, "loss": 0.0271, "lr": 4.953790675328569e-07, "epoch": 3.5076832873231867, "percentage": 70.15, "elapsed_time": "3:06:30", "remaining_time": "1:19:21", "throughput": 8646.86, "total_tokens": 96766160} +{"current_steps": 143585, "total_steps": 204665, "loss": 0.0414, "lr": 4.95305445718216e-07, "epoch": 3.507805438155034, "percentage": 70.16, "elapsed_time": "3:06:31", "remaining_time": "1:19:20", "throughput": 8646.9, "total_tokens": 96769744} +{"current_steps": 143590, "total_steps": 204665, "loss": 0.0, "lr": 4.952318275738499e-07, "epoch": 3.507927588986881, "percentage": 70.16, "elapsed_time": "3:06:31", "remaining_time": "1:19:20", "throughput": 8646.95, "total_tokens": 96773392} +{"current_steps": 143595, "total_steps": 204665, "loss": 0.0001, "lr": 4.951582131002936e-07, "epoch": 3.5080497398187283, "percentage": 70.16, "elapsed_time": "3:06:31", "remaining_time": "1:19:19", "throughput": 8646.97, "total_tokens": 96776592} +{"current_steps": 143600, "total_steps": 204665, "loss": 0.0, "lr": 4.950846022980822e-07, "epoch": 3.5081718906505754, "percentage": 70.16, "elapsed_time": "3:06:32", "remaining_time": "1:19:19", "throughput": 8646.99, "total_tokens": 96779856} +{"current_steps": 143605, "total_steps": 204665, "loss": 0.0011, "lr": 4.950109951677519e-07, "epoch": 3.5082940414824226, "percentage": 70.17, "elapsed_time": "3:06:32", "remaining_time": "1:19:19", "throughput": 8647.01, "total_tokens": 96783056} +{"current_steps": 143610, "total_steps": 204665, "loss": 0.0, "lr": 4.94937391709837e-07, "epoch": 3.50841619231427, "percentage": 70.17, "elapsed_time": "3:06:33", "remaining_time": "1:19:18", "throughput": 8647.09, "total_tokens": 96787088} +{"current_steps": 143615, "total_steps": 204665, "loss": 0.0, "lr": 4.948637919248736e-07, "epoch": 3.5085383431461166, "percentage": 70.17, "elapsed_time": "3:06:33", "remaining_time": "1:19:18", "throughput": 8647.12, "total_tokens": 96790544} +{"current_steps": 143620, "total_steps": 204665, "loss": 0.0, "lr": 4.947901958133962e-07, "epoch": 3.508660493977964, "percentage": 70.17, "elapsed_time": "3:06:33", "remaining_time": "1:19:17", "throughput": 8647.18, "total_tokens": 96794384} +{"current_steps": 143625, "total_steps": 204665, "loss": 0.0, "lr": 4.947166033759408e-07, "epoch": 3.508782644809811, "percentage": 70.18, "elapsed_time": "3:06:34", "remaining_time": "1:19:17", "throughput": 8647.22, "total_tokens": 96797840} +{"current_steps": 143630, "total_steps": 204665, "loss": 0.0, "lr": 4.946430146130419e-07, "epoch": 3.5089047956416586, "percentage": 70.18, "elapsed_time": "3:06:34", "remaining_time": "1:19:17", "throughput": 8647.25, "total_tokens": 96801232} +{"current_steps": 143635, "total_steps": 204665, "loss": 0.0, "lr": 4.94569429525235e-07, "epoch": 3.5090269464735053, "percentage": 70.18, "elapsed_time": "3:06:34", "remaining_time": "1:19:16", "throughput": 8647.28, "total_tokens": 96804688} +{"current_steps": 143640, "total_steps": 204665, "loss": 0.0, "lr": 4.94495848113055e-07, "epoch": 3.5091490973053525, "percentage": 70.18, "elapsed_time": "3:06:35", "remaining_time": "1:19:16", "throughput": 8647.35, "total_tokens": 96808528} +{"current_steps": 143645, "total_steps": 204665, "loss": 0.0, "lr": 4.944222703770371e-07, "epoch": 3.5092712481371997, "percentage": 70.19, "elapsed_time": "3:06:35", "remaining_time": "1:19:15", "throughput": 8647.37, "total_tokens": 96811792} +{"current_steps": 143650, "total_steps": 204665, "loss": 0.0, "lr": 4.943486963177168e-07, "epoch": 3.509393398969047, "percentage": 70.19, "elapsed_time": "3:06:35", "remaining_time": "1:19:15", "throughput": 8647.39, "total_tokens": 96814992} +{"current_steps": 143655, "total_steps": 204665, "loss": 0.0, "lr": 4.942751259356285e-07, "epoch": 3.509515549800894, "percentage": 70.19, "elapsed_time": "3:06:36", "remaining_time": "1:19:15", "throughput": 8647.45, "total_tokens": 96818896} +{"current_steps": 143660, "total_steps": 204665, "loss": 0.0213, "lr": 4.942015592313078e-07, "epoch": 3.5096377006327413, "percentage": 70.19, "elapsed_time": "3:06:36", "remaining_time": "1:19:14", "throughput": 8647.45, "total_tokens": 96821840} +{"current_steps": 143665, "total_steps": 204665, "loss": 0.0001, "lr": 4.94127996205289e-07, "epoch": 3.5097598514645885, "percentage": 70.2, "elapsed_time": "3:06:36", "remaining_time": "1:19:14", "throughput": 8647.48, "total_tokens": 96825232} +{"current_steps": 143670, "total_steps": 204665, "loss": 0.0, "lr": 4.940544368581079e-07, "epoch": 3.5098820022964357, "percentage": 70.2, "elapsed_time": "3:06:37", "remaining_time": "1:19:13", "throughput": 8647.55, "total_tokens": 96829136} +{"current_steps": 143675, "total_steps": 204665, "loss": 0.0003, "lr": 4.939808811902986e-07, "epoch": 3.510004153128283, "percentage": 70.2, "elapsed_time": "3:06:37", "remaining_time": "1:19:13", "throughput": 8647.59, "total_tokens": 96832784} +{"current_steps": 143680, "total_steps": 204665, "loss": 0.0001, "lr": 4.939073292023965e-07, "epoch": 3.51012630396013, "percentage": 70.2, "elapsed_time": "3:06:37", "remaining_time": "1:19:12", "throughput": 8647.6, "total_tokens": 96835792} +{"current_steps": 143685, "total_steps": 204665, "loss": 0.0, "lr": 4.938337808949368e-07, "epoch": 3.5102484547919772, "percentage": 70.2, "elapsed_time": "3:06:38", "remaining_time": "1:19:12", "throughput": 8647.59, "total_tokens": 96838672} +{"current_steps": 143690, "total_steps": 204665, "loss": 0.0007, "lr": 4.937602362684535e-07, "epoch": 3.5103706056238244, "percentage": 70.21, "elapsed_time": "3:06:38", "remaining_time": "1:19:12", "throughput": 8647.6, "total_tokens": 96841744} +{"current_steps": 143695, "total_steps": 204665, "loss": 0.0, "lr": 4.936866953234824e-07, "epoch": 3.5104927564556716, "percentage": 70.21, "elapsed_time": "3:06:39", "remaining_time": "1:19:11", "throughput": 8647.64, "total_tokens": 96845264} +{"current_steps": 143700, "total_steps": 204665, "loss": 0.0, "lr": 4.936131580605578e-07, "epoch": 3.510614907287519, "percentage": 70.21, "elapsed_time": "3:06:39", "remaining_time": "1:19:11", "throughput": 8647.64, "total_tokens": 96848336} +{"current_steps": 143705, "total_steps": 204665, "loss": 0.0691, "lr": 4.935396244802142e-07, "epoch": 3.510737058119366, "percentage": 70.21, "elapsed_time": "3:06:39", "remaining_time": "1:19:10", "throughput": 8647.65, "total_tokens": 96851472} +{"current_steps": 143710, "total_steps": 204665, "loss": 0.0, "lr": 4.934660945829869e-07, "epoch": 3.5108592089512127, "percentage": 70.22, "elapsed_time": "3:06:40", "remaining_time": "1:19:10", "throughput": 8647.69, "total_tokens": 96854992} +{"current_steps": 143715, "total_steps": 204665, "loss": 0.0001, "lr": 4.933925683694101e-07, "epoch": 3.5109813597830604, "percentage": 70.22, "elapsed_time": "3:06:40", "remaining_time": "1:19:10", "throughput": 8647.68, "total_tokens": 96857872} +{"current_steps": 143720, "total_steps": 204665, "loss": 0.0, "lr": 4.933190458400193e-07, "epoch": 3.511103510614907, "percentage": 70.22, "elapsed_time": "3:06:40", "remaining_time": "1:19:09", "throughput": 8647.7, "total_tokens": 96861008} +{"current_steps": 143725, "total_steps": 204665, "loss": 0.0, "lr": 4.932455269953482e-07, "epoch": 3.5112256614467543, "percentage": 70.22, "elapsed_time": "3:06:41", "remaining_time": "1:19:09", "throughput": 8647.71, "total_tokens": 96864208} +{"current_steps": 143730, "total_steps": 204665, "loss": 0.0005, "lr": 4.931720118359323e-07, "epoch": 3.5113478122786015, "percentage": 70.23, "elapsed_time": "3:06:41", "remaining_time": "1:19:08", "throughput": 8647.74, "total_tokens": 96867536} +{"current_steps": 143735, "total_steps": 204665, "loss": 0.0004, "lr": 4.930985003623054e-07, "epoch": 3.5114699631104487, "percentage": 70.23, "elapsed_time": "3:06:41", "remaining_time": "1:19:08", "throughput": 8647.8, "total_tokens": 96871440} +{"current_steps": 143740, "total_steps": 204665, "loss": 0.0906, "lr": 4.930249925750026e-07, "epoch": 3.511592113942296, "percentage": 70.23, "elapsed_time": "3:06:42", "remaining_time": "1:19:08", "throughput": 8647.83, "total_tokens": 96874768} +{"current_steps": 143745, "total_steps": 204665, "loss": 0.0, "lr": 4.929514884745588e-07, "epoch": 3.511714264774143, "percentage": 70.23, "elapsed_time": "3:06:42", "remaining_time": "1:19:07", "throughput": 8647.89, "total_tokens": 96878672} +{"current_steps": 143750, "total_steps": 204665, "loss": 0.0, "lr": 4.928779880615078e-07, "epoch": 3.5118364156059902, "percentage": 70.24, "elapsed_time": "3:06:42", "remaining_time": "1:19:07", "throughput": 8647.9, "total_tokens": 96881808} +{"current_steps": 143755, "total_steps": 204665, "loss": 0.0421, "lr": 4.928044913363849e-07, "epoch": 3.5119585664378374, "percentage": 70.24, "elapsed_time": "3:06:43", "remaining_time": "1:19:06", "throughput": 8647.94, "total_tokens": 96885264} +{"current_steps": 143760, "total_steps": 204665, "loss": 0.0, "lr": 4.927309982997237e-07, "epoch": 3.5120807172696846, "percentage": 70.24, "elapsed_time": "3:06:43", "remaining_time": "1:19:06", "throughput": 8647.99, "total_tokens": 96888912} +{"current_steps": 143765, "total_steps": 204665, "loss": 0.0, "lr": 4.926575089520592e-07, "epoch": 3.512202868101532, "percentage": 70.24, "elapsed_time": "3:06:43", "remaining_time": "1:19:06", "throughput": 8648.02, "total_tokens": 96892304} +{"current_steps": 143770, "total_steps": 204665, "loss": 0.0975, "lr": 4.925840232939261e-07, "epoch": 3.512325018933379, "percentage": 70.25, "elapsed_time": "3:06:44", "remaining_time": "1:19:05", "throughput": 8648.04, "total_tokens": 96895632} +{"current_steps": 143775, "total_steps": 204665, "loss": 0.0002, "lr": 4.92510541325858e-07, "epoch": 3.512447169765226, "percentage": 70.25, "elapsed_time": "3:06:44", "remaining_time": "1:19:05", "throughput": 8648.08, "total_tokens": 96899152} +{"current_steps": 143780, "total_steps": 204665, "loss": 0.0, "lr": 4.924370630483902e-07, "epoch": 3.5125693205970734, "percentage": 70.25, "elapsed_time": "3:06:45", "remaining_time": "1:19:04", "throughput": 8648.12, "total_tokens": 96902736} +{"current_steps": 143785, "total_steps": 204665, "loss": 0.0002, "lr": 4.923635884620561e-07, "epoch": 3.5126914714289206, "percentage": 70.25, "elapsed_time": "3:06:45", "remaining_time": "1:19:04", "throughput": 8648.14, "total_tokens": 96906000} +{"current_steps": 143790, "total_steps": 204665, "loss": 0.0, "lr": 4.92290117567391e-07, "epoch": 3.5128136222607678, "percentage": 70.26, "elapsed_time": "3:06:45", "remaining_time": "1:19:04", "throughput": 8648.19, "total_tokens": 96909584} +{"current_steps": 143795, "total_steps": 204665, "loss": 0.0001, "lr": 4.922166503649284e-07, "epoch": 3.5129357730926145, "percentage": 70.26, "elapsed_time": "3:06:46", "remaining_time": "1:19:03", "throughput": 8648.21, "total_tokens": 96912912} +{"current_steps": 143800, "total_steps": 204665, "loss": 0.0, "lr": 4.921431868552032e-07, "epoch": 3.513057923924462, "percentage": 70.26, "elapsed_time": "3:06:46", "remaining_time": "1:19:03", "throughput": 8648.24, "total_tokens": 96916176} +{"current_steps": 143805, "total_steps": 204665, "loss": 0.0, "lr": 4.920697270387489e-07, "epoch": 3.513180074756309, "percentage": 70.26, "elapsed_time": "3:06:46", "remaining_time": "1:19:02", "throughput": 8648.28, "total_tokens": 96919760} +{"current_steps": 143810, "total_steps": 204665, "loss": 0.0, "lr": 4.919962709161008e-07, "epoch": 3.5133022255881565, "percentage": 70.27, "elapsed_time": "3:06:47", "remaining_time": "1:19:02", "throughput": 8648.32, "total_tokens": 96923344} +{"current_steps": 143815, "total_steps": 204665, "loss": 0.0, "lr": 4.919228184877922e-07, "epoch": 3.5134243764200033, "percentage": 70.27, "elapsed_time": "3:06:47", "remaining_time": "1:19:02", "throughput": 8648.32, "total_tokens": 96926352} +{"current_steps": 143820, "total_steps": 204665, "loss": 0.0001, "lr": 4.918493697543572e-07, "epoch": 3.5135465272518505, "percentage": 70.27, "elapsed_time": "3:06:47", "remaining_time": "1:19:01", "throughput": 8648.37, "total_tokens": 96930000} +{"current_steps": 143825, "total_steps": 204665, "loss": 0.0001, "lr": 4.917759247163307e-07, "epoch": 3.5136686780836976, "percentage": 70.27, "elapsed_time": "3:06:48", "remaining_time": "1:19:01", "throughput": 8648.41, "total_tokens": 96933520} +{"current_steps": 143830, "total_steps": 204665, "loss": 0.0, "lr": 4.917024833742459e-07, "epoch": 3.513790828915545, "percentage": 70.28, "elapsed_time": "3:06:48", "remaining_time": "1:19:00", "throughput": 8648.41, "total_tokens": 96936528} +{"current_steps": 143835, "total_steps": 204665, "loss": 0.0002, "lr": 4.916290457286374e-07, "epoch": 3.513912979747392, "percentage": 70.28, "elapsed_time": "3:06:48", "remaining_time": "1:19:00", "throughput": 8648.43, "total_tokens": 96939792} +{"current_steps": 143840, "total_steps": 204665, "loss": 0.0, "lr": 4.915556117800395e-07, "epoch": 3.514035130579239, "percentage": 70.28, "elapsed_time": "3:06:49", "remaining_time": "1:19:00", "throughput": 8648.44, "total_tokens": 96942928} +{"current_steps": 143845, "total_steps": 204665, "loss": 0.0, "lr": 4.914821815289858e-07, "epoch": 3.5141572814110864, "percentage": 70.28, "elapsed_time": "3:06:49", "remaining_time": "1:18:59", "throughput": 8648.45, "total_tokens": 96946064} +{"current_steps": 143850, "total_steps": 204665, "loss": 0.0001, "lr": 4.914087549760106e-07, "epoch": 3.5142794322429336, "percentage": 70.29, "elapsed_time": "3:06:49", "remaining_time": "1:18:59", "throughput": 8648.46, "total_tokens": 96949136} +{"current_steps": 143855, "total_steps": 204665, "loss": 0.0, "lr": 4.913353321216475e-07, "epoch": 3.514401583074781, "percentage": 70.29, "elapsed_time": "3:06:50", "remaining_time": "1:18:58", "throughput": 8648.48, "total_tokens": 96952464} +{"current_steps": 143860, "total_steps": 204665, "loss": 0.0008, "lr": 4.912619129664306e-07, "epoch": 3.514523733906628, "percentage": 70.29, "elapsed_time": "3:06:50", "remaining_time": "1:18:58", "throughput": 8648.51, "total_tokens": 96955792} +{"current_steps": 143865, "total_steps": 204665, "loss": 0.0001, "lr": 4.911884975108943e-07, "epoch": 3.514645884738475, "percentage": 70.29, "elapsed_time": "3:06:51", "remaining_time": "1:18:57", "throughput": 8648.58, "total_tokens": 96959696} +{"current_steps": 143870, "total_steps": 204665, "loss": 0.0, "lr": 4.911150857555717e-07, "epoch": 3.5147680355703224, "percentage": 70.3, "elapsed_time": "3:06:51", "remaining_time": "1:18:57", "throughput": 8648.62, "total_tokens": 96963280} +{"current_steps": 143875, "total_steps": 204665, "loss": 0.0, "lr": 4.910416777009975e-07, "epoch": 3.5148901864021695, "percentage": 70.3, "elapsed_time": "3:06:51", "remaining_time": "1:18:57", "throughput": 8648.65, "total_tokens": 96966544} +{"current_steps": 143880, "total_steps": 204665, "loss": 0.0613, "lr": 4.909682733477047e-07, "epoch": 3.5150123372340163, "percentage": 70.3, "elapsed_time": "3:06:52", "remaining_time": "1:18:56", "throughput": 8648.69, "total_tokens": 96970064} +{"current_steps": 143885, "total_steps": 204665, "loss": 0.0046, "lr": 4.90894872696228e-07, "epoch": 3.515134488065864, "percentage": 70.3, "elapsed_time": "3:06:52", "remaining_time": "1:18:56", "throughput": 8648.73, "total_tokens": 96973456} +{"current_steps": 143890, "total_steps": 204665, "loss": 0.0, "lr": 4.908214757471002e-07, "epoch": 3.5152566388977107, "percentage": 70.31, "elapsed_time": "3:06:52", "remaining_time": "1:18:55", "throughput": 8648.78, "total_tokens": 96977168} +{"current_steps": 143895, "total_steps": 204665, "loss": 0.0, "lr": 4.907480825008556e-07, "epoch": 3.5153787897295583, "percentage": 70.31, "elapsed_time": "3:06:53", "remaining_time": "1:18:55", "throughput": 8648.81, "total_tokens": 96980496} +{"current_steps": 143900, "total_steps": 204665, "loss": 0.0, "lr": 4.906746929580284e-07, "epoch": 3.515500940561405, "percentage": 70.31, "elapsed_time": "3:06:53", "remaining_time": "1:18:55", "throughput": 8648.87, "total_tokens": 96984208} +{"current_steps": 143905, "total_steps": 204665, "loss": 0.0003, "lr": 4.906013071191517e-07, "epoch": 3.5156230913932522, "percentage": 70.31, "elapsed_time": "3:06:53", "remaining_time": "1:18:54", "throughput": 8648.86, "total_tokens": 96987088} +{"current_steps": 143910, "total_steps": 204665, "loss": 0.0002, "lr": 4.90527924984759e-07, "epoch": 3.5157452422250994, "percentage": 70.31, "elapsed_time": "3:06:54", "remaining_time": "1:18:54", "throughput": 8648.89, "total_tokens": 96990352} +{"current_steps": 143915, "total_steps": 204665, "loss": 0.0, "lr": 4.904545465553847e-07, "epoch": 3.5158673930569466, "percentage": 70.32, "elapsed_time": "3:06:54", "remaining_time": "1:18:53", "throughput": 8648.91, "total_tokens": 96993552} +{"current_steps": 143920, "total_steps": 204665, "loss": 0.0, "lr": 4.903811718315615e-07, "epoch": 3.515989543888794, "percentage": 70.32, "elapsed_time": "3:06:54", "remaining_time": "1:18:53", "throughput": 8648.94, "total_tokens": 96996880} +{"current_steps": 143925, "total_steps": 204665, "loss": 0.0001, "lr": 4.903078008138239e-07, "epoch": 3.516111694720641, "percentage": 70.32, "elapsed_time": "3:06:55", "remaining_time": "1:18:53", "throughput": 8648.97, "total_tokens": 97000272} +{"current_steps": 143930, "total_steps": 204665, "loss": 0.0003, "lr": 4.902344335027047e-07, "epoch": 3.516233845552488, "percentage": 70.32, "elapsed_time": "3:06:55", "remaining_time": "1:18:52", "throughput": 8649.01, "total_tokens": 97003792} +{"current_steps": 143935, "total_steps": 204665, "loss": 0.0, "lr": 4.901610698987381e-07, "epoch": 3.5163559963843354, "percentage": 70.33, "elapsed_time": "3:06:55", "remaining_time": "1:18:52", "throughput": 8649.01, "total_tokens": 97006672} +{"current_steps": 143940, "total_steps": 204665, "loss": 0.0, "lr": 4.900877100024571e-07, "epoch": 3.5164781472161826, "percentage": 70.33, "elapsed_time": "3:06:56", "remaining_time": "1:18:51", "throughput": 8649.03, "total_tokens": 97009936} +{"current_steps": 143945, "total_steps": 204665, "loss": 0.0, "lr": 4.900143538143958e-07, "epoch": 3.5166002980480298, "percentage": 70.33, "elapsed_time": "3:06:56", "remaining_time": "1:18:51", "throughput": 8649.07, "total_tokens": 97013328} +{"current_steps": 143950, "total_steps": 204665, "loss": 0.0009, "lr": 4.899410013350867e-07, "epoch": 3.516722448879877, "percentage": 70.33, "elapsed_time": "3:06:56", "remaining_time": "1:18:51", "throughput": 8649.1, "total_tokens": 97016656} +{"current_steps": 143955, "total_steps": 204665, "loss": 0.0002, "lr": 4.898676525650639e-07, "epoch": 3.516844599711724, "percentage": 70.34, "elapsed_time": "3:06:57", "remaining_time": "1:18:50", "throughput": 8649.1, "total_tokens": 97019536} +{"current_steps": 143960, "total_steps": 204665, "loss": 0.0, "lr": 4.897943075048612e-07, "epoch": 3.5169667505435713, "percentage": 70.34, "elapsed_time": "3:06:57", "remaining_time": "1:18:50", "throughput": 8649.12, "total_tokens": 97022736} +{"current_steps": 143965, "total_steps": 204665, "loss": 0.0, "lr": 4.897209661550111e-07, "epoch": 3.5170889013754185, "percentage": 70.34, "elapsed_time": "3:06:58", "remaining_time": "1:18:49", "throughput": 8649.21, "total_tokens": 97026960} +{"current_steps": 143970, "total_steps": 204665, "loss": 0.0, "lr": 4.896476285160479e-07, "epoch": 3.5172110522072657, "percentage": 70.34, "elapsed_time": "3:06:58", "remaining_time": "1:18:49", "throughput": 8649.26, "total_tokens": 97030544} +{"current_steps": 143975, "total_steps": 204665, "loss": 0.0002, "lr": 4.895742945885038e-07, "epoch": 3.5173332030391125, "percentage": 70.35, "elapsed_time": "3:06:58", "remaining_time": "1:18:49", "throughput": 8649.45, "total_tokens": 97036240} +{"current_steps": 143980, "total_steps": 204665, "loss": 0.0, "lr": 4.895009643729133e-07, "epoch": 3.51745535387096, "percentage": 70.35, "elapsed_time": "3:06:59", "remaining_time": "1:18:48", "throughput": 8649.49, "total_tokens": 97039632} +{"current_steps": 143985, "total_steps": 204665, "loss": 0.0, "lr": 4.894276378698087e-07, "epoch": 3.517577504702807, "percentage": 70.35, "elapsed_time": "3:06:59", "remaining_time": "1:18:48", "throughput": 8649.53, "total_tokens": 97043152} +{"current_steps": 143990, "total_steps": 204665, "loss": 0.0, "lr": 4.893543150797236e-07, "epoch": 3.5176996555346545, "percentage": 70.35, "elapsed_time": "3:06:59", "remaining_time": "1:18:47", "throughput": 8649.59, "total_tokens": 97046928} +{"current_steps": 143995, "total_steps": 204665, "loss": 0.0, "lr": 4.892809960031916e-07, "epoch": 3.517821806366501, "percentage": 70.36, "elapsed_time": "3:07:00", "remaining_time": "1:18:47", "throughput": 8649.63, "total_tokens": 97050384} +{"current_steps": 144000, "total_steps": 204665, "loss": 0.0001, "lr": 4.892076806407451e-07, "epoch": 3.5179439571983484, "percentage": 70.36, "elapsed_time": "3:07:00", "remaining_time": "1:18:47", "throughput": 8649.65, "total_tokens": 97053648} +{"current_steps": 144005, "total_steps": 204665, "loss": 0.0, "lr": 4.891343689929182e-07, "epoch": 3.5180661080301956, "percentage": 70.36, "elapsed_time": "3:07:00", "remaining_time": "1:18:46", "throughput": 8649.68, "total_tokens": 97056912} +{"current_steps": 144010, "total_steps": 204665, "loss": 0.0017, "lr": 4.890610610602437e-07, "epoch": 3.518188258862043, "percentage": 70.36, "elapsed_time": "3:07:01", "remaining_time": "1:18:46", "throughput": 8649.72, "total_tokens": 97060496} +{"current_steps": 144015, "total_steps": 204665, "loss": 0.0, "lr": 4.889877568432541e-07, "epoch": 3.51831040969389, "percentage": 70.37, "elapsed_time": "3:07:01", "remaining_time": "1:18:45", "throughput": 8649.78, "total_tokens": 97064272} +{"current_steps": 144020, "total_steps": 204665, "loss": 0.0, "lr": 4.889144563424834e-07, "epoch": 3.518432560525737, "percentage": 70.37, "elapsed_time": "3:07:01", "remaining_time": "1:18:45", "throughput": 8649.8, "total_tokens": 97067408} +{"current_steps": 144025, "total_steps": 204665, "loss": 0.0, "lr": 4.888411595584639e-07, "epoch": 3.5185547113575844, "percentage": 70.37, "elapsed_time": "3:07:02", "remaining_time": "1:18:45", "throughput": 8649.83, "total_tokens": 97070736} +{"current_steps": 144030, "total_steps": 204665, "loss": 0.0, "lr": 4.887678664917292e-07, "epoch": 3.5186768621894315, "percentage": 70.37, "elapsed_time": "3:07:02", "remaining_time": "1:18:44", "throughput": 8649.87, "total_tokens": 97074192} +{"current_steps": 144035, "total_steps": 204665, "loss": 0.0, "lr": 4.886945771428118e-07, "epoch": 3.5187990130212787, "percentage": 70.38, "elapsed_time": "3:07:02", "remaining_time": "1:18:44", "throughput": 8649.92, "total_tokens": 97077840} +{"current_steps": 144040, "total_steps": 204665, "loss": 0.0003, "lr": 4.886212915122453e-07, "epoch": 3.518921163853126, "percentage": 70.38, "elapsed_time": "3:07:03", "remaining_time": "1:18:43", "throughput": 8649.93, "total_tokens": 97080912} +{"current_steps": 144045, "total_steps": 204665, "loss": 0.0001, "lr": 4.88548009600562e-07, "epoch": 3.519043314684973, "percentage": 70.38, "elapsed_time": "3:07:03", "remaining_time": "1:18:43", "throughput": 8649.96, "total_tokens": 97084240} +{"current_steps": 144050, "total_steps": 204665, "loss": 0.0, "lr": 4.884747314082951e-07, "epoch": 3.5191654655168203, "percentage": 70.38, "elapsed_time": "3:07:04", "remaining_time": "1:18:42", "throughput": 8650.0, "total_tokens": 97087696} +{"current_steps": 144055, "total_steps": 204665, "loss": 0.0, "lr": 4.884014569359779e-07, "epoch": 3.5192876163486675, "percentage": 70.39, "elapsed_time": "3:07:04", "remaining_time": "1:18:42", "throughput": 8650.05, "total_tokens": 97091280} +{"current_steps": 144060, "total_steps": 204665, "loss": 0.0, "lr": 4.883281861841425e-07, "epoch": 3.5194097671805142, "percentage": 70.39, "elapsed_time": "3:07:04", "remaining_time": "1:18:42", "throughput": 8650.07, "total_tokens": 97094544} +{"current_steps": 144065, "total_steps": 204665, "loss": 0.0, "lr": 4.882549191533226e-07, "epoch": 3.519531918012362, "percentage": 70.39, "elapsed_time": "3:07:05", "remaining_time": "1:18:41", "throughput": 8650.1, "total_tokens": 97097808} +{"current_steps": 144070, "total_steps": 204665, "loss": 0.0, "lr": 4.881816558440501e-07, "epoch": 3.5196540688442086, "percentage": 70.39, "elapsed_time": "3:07:05", "remaining_time": "1:18:41", "throughput": 8650.12, "total_tokens": 97101072} +{"current_steps": 144075, "total_steps": 204665, "loss": 0.0, "lr": 4.881083962568587e-07, "epoch": 3.5197762196760563, "percentage": 70.4, "elapsed_time": "3:07:05", "remaining_time": "1:18:40", "throughput": 8650.14, "total_tokens": 97104144} +{"current_steps": 144080, "total_steps": 204665, "loss": 0.0224, "lr": 4.880351403922804e-07, "epoch": 3.519898370507903, "percentage": 70.4, "elapsed_time": "3:07:06", "remaining_time": "1:18:40", "throughput": 8650.15, "total_tokens": 97107280} +{"current_steps": 144085, "total_steps": 204665, "loss": 0.0, "lr": 4.879618882508481e-07, "epoch": 3.52002052133975, "percentage": 70.4, "elapsed_time": "3:07:06", "remaining_time": "1:18:40", "throughput": 8650.17, "total_tokens": 97110480} +{"current_steps": 144090, "total_steps": 204665, "loss": 0.0922, "lr": 4.878886398330952e-07, "epoch": 3.5201426721715974, "percentage": 70.4, "elapsed_time": "3:07:06", "remaining_time": "1:18:39", "throughput": 8650.21, "total_tokens": 97114000} +{"current_steps": 144095, "total_steps": 204665, "loss": 0.0005, "lr": 4.878153951395535e-07, "epoch": 3.5202648230034446, "percentage": 70.41, "elapsed_time": "3:07:07", "remaining_time": "1:18:39", "throughput": 8650.28, "total_tokens": 97117840} +{"current_steps": 144100, "total_steps": 204665, "loss": 0.0, "lr": 4.877421541707563e-07, "epoch": 3.5203869738352918, "percentage": 70.41, "elapsed_time": "3:07:07", "remaining_time": "1:18:38", "throughput": 8650.28, "total_tokens": 97120912} +{"current_steps": 144105, "total_steps": 204665, "loss": 0.0, "lr": 4.876689169272355e-07, "epoch": 3.520509124667139, "percentage": 70.41, "elapsed_time": "3:07:07", "remaining_time": "1:18:38", "throughput": 8650.3, "total_tokens": 97124048} +{"current_steps": 144110, "total_steps": 204665, "loss": 0.0, "lr": 4.875956834095247e-07, "epoch": 3.520631275498986, "percentage": 70.41, "elapsed_time": "3:07:08", "remaining_time": "1:18:38", "throughput": 8650.36, "total_tokens": 97127760} +{"current_steps": 144115, "total_steps": 204665, "loss": 0.0, "lr": 4.875224536181553e-07, "epoch": 3.5207534263308333, "percentage": 70.42, "elapsed_time": "3:07:08", "remaining_time": "1:18:37", "throughput": 8650.42, "total_tokens": 97131536} +{"current_steps": 144120, "total_steps": 204665, "loss": 0.0, "lr": 4.87449227553661e-07, "epoch": 3.5208755771626805, "percentage": 70.42, "elapsed_time": "3:07:08", "remaining_time": "1:18:37", "throughput": 8650.44, "total_tokens": 97134864} +{"current_steps": 144125, "total_steps": 204665, "loss": 0.0, "lr": 4.873760052165737e-07, "epoch": 3.5209977279945277, "percentage": 70.42, "elapsed_time": "3:07:09", "remaining_time": "1:18:36", "throughput": 8650.5, "total_tokens": 97138512} +{"current_steps": 144130, "total_steps": 204665, "loss": 0.0003, "lr": 4.873027866074258e-07, "epoch": 3.521119878826375, "percentage": 70.42, "elapsed_time": "3:07:09", "remaining_time": "1:18:36", "throughput": 8650.56, "total_tokens": 97142288} +{"current_steps": 144135, "total_steps": 204665, "loss": 0.0, "lr": 4.8722957172675e-07, "epoch": 3.521242029658222, "percentage": 70.42, "elapsed_time": "3:07:09", "remaining_time": "1:18:36", "throughput": 8650.58, "total_tokens": 97145616} +{"current_steps": 144140, "total_steps": 204665, "loss": 0.0, "lr": 4.871563605750785e-07, "epoch": 3.5213641804900693, "percentage": 70.43, "elapsed_time": "3:07:10", "remaining_time": "1:18:35", "throughput": 8650.62, "total_tokens": 97149136} +{"current_steps": 144145, "total_steps": 204665, "loss": 0.0001, "lr": 4.870831531529438e-07, "epoch": 3.5214863313219165, "percentage": 70.43, "elapsed_time": "3:07:10", "remaining_time": "1:18:35", "throughput": 8650.66, "total_tokens": 97152656} +{"current_steps": 144150, "total_steps": 204665, "loss": 0.0, "lr": 4.870099494608788e-07, "epoch": 3.5216084821537637, "percentage": 70.43, "elapsed_time": "3:07:11", "remaining_time": "1:18:34", "throughput": 8650.7, "total_tokens": 97156112} +{"current_steps": 144155, "total_steps": 204665, "loss": 0.0, "lr": 4.86936749499415e-07, "epoch": 3.5217306329856104, "percentage": 70.43, "elapsed_time": "3:07:11", "remaining_time": "1:18:34", "throughput": 8650.74, "total_tokens": 97159632} +{"current_steps": 144160, "total_steps": 204665, "loss": 0.0, "lr": 4.868635532690856e-07, "epoch": 3.521852783817458, "percentage": 70.44, "elapsed_time": "3:07:11", "remaining_time": "1:18:34", "throughput": 8650.81, "total_tokens": 97163408} +{"current_steps": 144165, "total_steps": 204665, "loss": 0.0, "lr": 4.867903607704219e-07, "epoch": 3.521974934649305, "percentage": 70.44, "elapsed_time": "3:07:12", "remaining_time": "1:18:33", "throughput": 8650.85, "total_tokens": 97166864} +{"current_steps": 144170, "total_steps": 204665, "loss": 0.0, "lr": 4.867171720039569e-07, "epoch": 3.522097085481152, "percentage": 70.44, "elapsed_time": "3:07:12", "remaining_time": "1:18:33", "throughput": 8650.87, "total_tokens": 97170064} +{"current_steps": 144175, "total_steps": 204665, "loss": 0.0, "lr": 4.86643986970223e-07, "epoch": 3.522219236312999, "percentage": 70.44, "elapsed_time": "3:07:12", "remaining_time": "1:18:32", "throughput": 8650.92, "total_tokens": 97173712} +{"current_steps": 144180, "total_steps": 204665, "loss": 0.0, "lr": 4.865708056697517e-07, "epoch": 3.5223413871448463, "percentage": 70.45, "elapsed_time": "3:07:13", "remaining_time": "1:18:32", "throughput": 8650.98, "total_tokens": 97177424} +{"current_steps": 144185, "total_steps": 204665, "loss": 0.0032, "lr": 4.864976281030761e-07, "epoch": 3.5224635379766935, "percentage": 70.45, "elapsed_time": "3:07:13", "remaining_time": "1:18:31", "throughput": 8650.99, "total_tokens": 97180560} +{"current_steps": 144190, "total_steps": 204665, "loss": 0.0, "lr": 4.864244542707274e-07, "epoch": 3.5225856888085407, "percentage": 70.45, "elapsed_time": "3:07:13", "remaining_time": "1:18:31", "throughput": 8651.0, "total_tokens": 97183504} +{"current_steps": 144195, "total_steps": 204665, "loss": 0.0, "lr": 4.863512841732386e-07, "epoch": 3.522707839640388, "percentage": 70.45, "elapsed_time": "3:07:14", "remaining_time": "1:18:31", "throughput": 8651.02, "total_tokens": 97186640} +{"current_steps": 144200, "total_steps": 204665, "loss": 0.0924, "lr": 4.86278117811141e-07, "epoch": 3.522829990472235, "percentage": 70.46, "elapsed_time": "3:07:14", "remaining_time": "1:18:30", "throughput": 8651.04, "total_tokens": 97189968} +{"current_steps": 144205, "total_steps": 204665, "loss": 0.1, "lr": 4.862049551849671e-07, "epoch": 3.5229521413040823, "percentage": 70.46, "elapsed_time": "3:07:14", "remaining_time": "1:18:30", "throughput": 8651.06, "total_tokens": 97193104} +{"current_steps": 144210, "total_steps": 204665, "loss": 0.0, "lr": 4.861317962952494e-07, "epoch": 3.5230742921359295, "percentage": 70.46, "elapsed_time": "3:07:15", "remaining_time": "1:18:29", "throughput": 8651.11, "total_tokens": 97196752} +{"current_steps": 144215, "total_steps": 204665, "loss": 0.0, "lr": 4.860586411425195e-07, "epoch": 3.5231964429677767, "percentage": 70.46, "elapsed_time": "3:07:15", "remaining_time": "1:18:29", "throughput": 8651.15, "total_tokens": 97200208} +{"current_steps": 144220, "total_steps": 204665, "loss": 0.0, "lr": 4.859854897273089e-07, "epoch": 3.523318593799624, "percentage": 70.47, "elapsed_time": "3:07:15", "remaining_time": "1:18:29", "throughput": 8651.18, "total_tokens": 97203472} +{"current_steps": 144225, "total_steps": 204665, "loss": 0.0308, "lr": 4.859123420501506e-07, "epoch": 3.523440744631471, "percentage": 70.47, "elapsed_time": "3:07:16", "remaining_time": "1:18:28", "throughput": 8651.24, "total_tokens": 97207248} +{"current_steps": 144230, "total_steps": 204665, "loss": 0.0, "lr": 4.858391981115759e-07, "epoch": 3.5235628954633182, "percentage": 70.47, "elapsed_time": "3:07:16", "remaining_time": "1:18:28", "throughput": 8651.31, "total_tokens": 97211152} +{"current_steps": 144235, "total_steps": 204665, "loss": 0.0003, "lr": 4.857660579121164e-07, "epoch": 3.5236850462951654, "percentage": 70.47, "elapsed_time": "3:07:16", "remaining_time": "1:18:27", "throughput": 8651.34, "total_tokens": 97214544} +{"current_steps": 144240, "total_steps": 204665, "loss": 0.0, "lr": 4.856929214523047e-07, "epoch": 3.523807197127012, "percentage": 70.48, "elapsed_time": "3:07:17", "remaining_time": "1:18:27", "throughput": 8651.4, "total_tokens": 97218256} +{"current_steps": 144245, "total_steps": 204665, "loss": 0.0671, "lr": 4.856197887326726e-07, "epoch": 3.52392934795886, "percentage": 70.48, "elapsed_time": "3:07:17", "remaining_time": "1:18:27", "throughput": 8651.42, "total_tokens": 97221456} +{"current_steps": 144250, "total_steps": 204665, "loss": 0.0, "lr": 4.855466597537514e-07, "epoch": 3.5240514987907066, "percentage": 70.48, "elapsed_time": "3:07:17", "remaining_time": "1:18:26", "throughput": 8651.44, "total_tokens": 97224656} +{"current_steps": 144255, "total_steps": 204665, "loss": 0.0, "lr": 4.854735345160736e-07, "epoch": 3.524173649622554, "percentage": 70.48, "elapsed_time": "3:07:18", "remaining_time": "1:18:26", "throughput": 8651.47, "total_tokens": 97227984} +{"current_steps": 144260, "total_steps": 204665, "loss": 0.0965, "lr": 4.854004130201704e-07, "epoch": 3.524295800454401, "percentage": 70.49, "elapsed_time": "3:07:18", "remaining_time": "1:18:25", "throughput": 8651.47, "total_tokens": 97230992} +{"current_steps": 144265, "total_steps": 204665, "loss": 0.0, "lr": 4.853272952665737e-07, "epoch": 3.524417951286248, "percentage": 70.49, "elapsed_time": "3:07:19", "remaining_time": "1:18:25", "throughput": 8651.49, "total_tokens": 97234064} +{"current_steps": 144270, "total_steps": 204665, "loss": 0.0, "lr": 4.852541812558158e-07, "epoch": 3.5245401021180953, "percentage": 70.49, "elapsed_time": "3:07:19", "remaining_time": "1:18:25", "throughput": 8651.51, "total_tokens": 97237328} +{"current_steps": 144275, "total_steps": 204665, "loss": 0.0739, "lr": 4.851810709884274e-07, "epoch": 3.5246622529499425, "percentage": 70.49, "elapsed_time": "3:07:19", "remaining_time": "1:18:24", "throughput": 8651.53, "total_tokens": 97240592} +{"current_steps": 144280, "total_steps": 204665, "loss": 0.0003, "lr": 4.851079644649412e-07, "epoch": 3.5247844037817897, "percentage": 70.5, "elapsed_time": "3:07:20", "remaining_time": "1:18:24", "throughput": 8651.56, "total_tokens": 97243856} +{"current_steps": 144285, "total_steps": 204665, "loss": 0.0, "lr": 4.850348616858881e-07, "epoch": 3.524906554613637, "percentage": 70.5, "elapsed_time": "3:07:20", "remaining_time": "1:18:23", "throughput": 8651.6, "total_tokens": 97247312} +{"current_steps": 144290, "total_steps": 204665, "loss": 0.0653, "lr": 4.849617626518002e-07, "epoch": 3.525028705445484, "percentage": 70.5, "elapsed_time": "3:07:20", "remaining_time": "1:18:23", "throughput": 8651.67, "total_tokens": 97251216} +{"current_steps": 144295, "total_steps": 204665, "loss": 0.0, "lr": 4.848886673632086e-07, "epoch": 3.5251508562773313, "percentage": 70.5, "elapsed_time": "3:07:21", "remaining_time": "1:18:23", "throughput": 8651.69, "total_tokens": 97254416} +{"current_steps": 144300, "total_steps": 204665, "loss": 0.0, "lr": 4.848155758206452e-07, "epoch": 3.5252730071091785, "percentage": 70.51, "elapsed_time": "3:07:21", "remaining_time": "1:18:22", "throughput": 8651.73, "total_tokens": 97257872} +{"current_steps": 144305, "total_steps": 204665, "loss": 0.0738, "lr": 4.847424880246417e-07, "epoch": 3.5253951579410256, "percentage": 70.51, "elapsed_time": "3:07:21", "remaining_time": "1:18:22", "throughput": 8651.74, "total_tokens": 97261072} +{"current_steps": 144310, "total_steps": 204665, "loss": 0.0001, "lr": 4.846694039757292e-07, "epoch": 3.525517308772873, "percentage": 70.51, "elapsed_time": "3:07:22", "remaining_time": "1:18:21", "throughput": 8651.78, "total_tokens": 97264464} +{"current_steps": 144315, "total_steps": 204665, "loss": 0.0001, "lr": 4.845963236744397e-07, "epoch": 3.52563945960472, "percentage": 70.51, "elapsed_time": "3:07:22", "remaining_time": "1:18:21", "throughput": 8651.8, "total_tokens": 97267728} +{"current_steps": 144320, "total_steps": 204665, "loss": 0.0, "lr": 4.845232471213045e-07, "epoch": 3.525761610436567, "percentage": 70.52, "elapsed_time": "3:07:22", "remaining_time": "1:18:20", "throughput": 8651.81, "total_tokens": 97270800} +{"current_steps": 144325, "total_steps": 204665, "loss": 0.0001, "lr": 4.844501743168543e-07, "epoch": 3.525883761268414, "percentage": 70.52, "elapsed_time": "3:07:23", "remaining_time": "1:18:20", "throughput": 8651.89, "total_tokens": 97274768} +{"current_steps": 144330, "total_steps": 204665, "loss": 0.0625, "lr": 4.843771052616216e-07, "epoch": 3.5260059121002616, "percentage": 70.52, "elapsed_time": "3:07:23", "remaining_time": "1:18:20", "throughput": 8651.95, "total_tokens": 97278608} +{"current_steps": 144335, "total_steps": 204665, "loss": 0.0, "lr": 4.843040399561369e-07, "epoch": 3.5261280629321083, "percentage": 70.52, "elapsed_time": "3:07:23", "remaining_time": "1:18:19", "throughput": 8652.05, "total_tokens": 97282960} +{"current_steps": 144340, "total_steps": 204665, "loss": 0.0389, "lr": 4.842309784009323e-07, "epoch": 3.526250213763956, "percentage": 70.53, "elapsed_time": "3:07:24", "remaining_time": "1:18:19", "throughput": 8652.05, "total_tokens": 97285840} +{"current_steps": 144345, "total_steps": 204665, "loss": 0.0, "lr": 4.841579205965384e-07, "epoch": 3.5263723645958027, "percentage": 70.53, "elapsed_time": "3:07:24", "remaining_time": "1:18:18", "throughput": 8652.08, "total_tokens": 97289168} +{"current_steps": 144350, "total_steps": 204665, "loss": 0.0, "lr": 4.840848665434872e-07, "epoch": 3.52649451542765, "percentage": 70.53, "elapsed_time": "3:07:24", "remaining_time": "1:18:18", "throughput": 8652.09, "total_tokens": 97292240} +{"current_steps": 144355, "total_steps": 204665, "loss": 0.0, "lr": 4.840118162423092e-07, "epoch": 3.526616666259497, "percentage": 70.53, "elapsed_time": "3:07:25", "remaining_time": "1:18:18", "throughput": 8652.12, "total_tokens": 97295632} +{"current_steps": 144360, "total_steps": 204665, "loss": 0.0, "lr": 4.839387696935361e-07, "epoch": 3.5267388170913443, "percentage": 70.53, "elapsed_time": "3:07:25", "remaining_time": "1:18:17", "throughput": 8652.14, "total_tokens": 97298832} +{"current_steps": 144365, "total_steps": 204665, "loss": 0.0001, "lr": 4.838657268976994e-07, "epoch": 3.5268609679231915, "percentage": 70.54, "elapsed_time": "3:07:25", "remaining_time": "1:18:17", "throughput": 8652.15, "total_tokens": 97301968} +{"current_steps": 144370, "total_steps": 204665, "loss": 0.0, "lr": 4.837926878553296e-07, "epoch": 3.5269831187550387, "percentage": 70.54, "elapsed_time": "3:07:26", "remaining_time": "1:18:16", "throughput": 8652.16, "total_tokens": 97305040} +{"current_steps": 144375, "total_steps": 204665, "loss": 0.0, "lr": 4.837196525669586e-07, "epoch": 3.527105269586886, "percentage": 70.54, "elapsed_time": "3:07:26", "remaining_time": "1:18:16", "throughput": 8652.24, "total_tokens": 97309008} +{"current_steps": 144380, "total_steps": 204665, "loss": 0.0, "lr": 4.836466210331168e-07, "epoch": 3.527227420418733, "percentage": 70.54, "elapsed_time": "3:07:27", "remaining_time": "1:18:16", "throughput": 8652.29, "total_tokens": 97312656} +{"current_steps": 144385, "total_steps": 204665, "loss": 0.0573, "lr": 4.83573593254336e-07, "epoch": 3.5273495712505802, "percentage": 70.55, "elapsed_time": "3:07:27", "remaining_time": "1:18:15", "throughput": 8652.3, "total_tokens": 97315792} +{"current_steps": 144390, "total_steps": 204665, "loss": 0.0, "lr": 4.835005692311466e-07, "epoch": 3.5274717220824274, "percentage": 70.55, "elapsed_time": "3:07:27", "remaining_time": "1:18:15", "throughput": 8652.39, "total_tokens": 97319952} +{"current_steps": 144395, "total_steps": 204665, "loss": 0.0852, "lr": 4.834275489640799e-07, "epoch": 3.5275938729142746, "percentage": 70.55, "elapsed_time": "3:07:28", "remaining_time": "1:18:14", "throughput": 8652.46, "total_tokens": 97323856} +{"current_steps": 144400, "total_steps": 204665, "loss": 0.0, "lr": 4.833545324536674e-07, "epoch": 3.527716023746122, "percentage": 70.55, "elapsed_time": "3:07:28", "remaining_time": "1:18:14", "throughput": 8652.53, "total_tokens": 97327696} +{"current_steps": 144405, "total_steps": 204665, "loss": 0.0004, "lr": 4.832815197004394e-07, "epoch": 3.527838174577969, "percentage": 70.56, "elapsed_time": "3:07:28", "remaining_time": "1:18:14", "throughput": 8652.53, "total_tokens": 97330640} +{"current_steps": 144410, "total_steps": 204665, "loss": 0.0, "lr": 4.832085107049275e-07, "epoch": 3.527960325409816, "percentage": 70.56, "elapsed_time": "3:07:29", "remaining_time": "1:18:13", "throughput": 8652.55, "total_tokens": 97333840} +{"current_steps": 144415, "total_steps": 204665, "loss": 0.0, "lr": 4.83135505467662e-07, "epoch": 3.5280824762416634, "percentage": 70.56, "elapsed_time": "3:07:29", "remaining_time": "1:18:13", "throughput": 8652.58, "total_tokens": 97337168} +{"current_steps": 144420, "total_steps": 204665, "loss": 0.0, "lr": 4.830625039891744e-07, "epoch": 3.52820462707351, "percentage": 70.56, "elapsed_time": "3:07:29", "remaining_time": "1:18:12", "throughput": 8652.61, "total_tokens": 97340560} +{"current_steps": 144425, "total_steps": 204665, "loss": 0.0, "lr": 4.82989506269995e-07, "epoch": 3.5283267779053578, "percentage": 70.57, "elapsed_time": "3:07:30", "remaining_time": "1:18:12", "throughput": 8652.64, "total_tokens": 97344016} +{"current_steps": 144430, "total_steps": 204665, "loss": 0.0348, "lr": 4.829165123106552e-07, "epoch": 3.5284489287372045, "percentage": 70.57, "elapsed_time": "3:07:30", "remaining_time": "1:18:12", "throughput": 8652.67, "total_tokens": 97347280} +{"current_steps": 144435, "total_steps": 204665, "loss": 0.0001, "lr": 4.828435221116858e-07, "epoch": 3.528571079569052, "percentage": 70.57, "elapsed_time": "3:07:31", "remaining_time": "1:18:11", "throughput": 8652.46, "total_tokens": 97350160} +{"current_steps": 144440, "total_steps": 204665, "loss": 0.0, "lr": 4.827705356736169e-07, "epoch": 3.528693230400899, "percentage": 70.57, "elapsed_time": "3:07:31", "remaining_time": "1:18:11", "throughput": 8652.48, "total_tokens": 97353296} +{"current_steps": 144445, "total_steps": 204665, "loss": 0.0, "lr": 4.826975529969802e-07, "epoch": 3.528815381232746, "percentage": 70.58, "elapsed_time": "3:07:31", "remaining_time": "1:18:10", "throughput": 8652.49, "total_tokens": 97356432} +{"current_steps": 144450, "total_steps": 204665, "loss": 0.0001, "lr": 4.826245740823056e-07, "epoch": 3.5289375320645933, "percentage": 70.58, "elapsed_time": "3:07:32", "remaining_time": "1:18:10", "throughput": 8652.52, "total_tokens": 97359824} +{"current_steps": 144455, "total_steps": 204665, "loss": 0.0, "lr": 4.825515989301244e-07, "epoch": 3.5290596828964405, "percentage": 70.58, "elapsed_time": "3:07:32", "remaining_time": "1:18:10", "throughput": 8652.58, "total_tokens": 97363536} +{"current_steps": 144460, "total_steps": 204665, "loss": 0.0, "lr": 4.824786275409675e-07, "epoch": 3.5291818337282876, "percentage": 70.58, "elapsed_time": "3:07:32", "remaining_time": "1:18:09", "throughput": 8652.6, "total_tokens": 97366800} +{"current_steps": 144465, "total_steps": 204665, "loss": 0.0, "lr": 4.824056599153646e-07, "epoch": 3.529303984560135, "percentage": 70.59, "elapsed_time": "3:07:33", "remaining_time": "1:18:09", "throughput": 8652.64, "total_tokens": 97370256} +{"current_steps": 144470, "total_steps": 204665, "loss": 0.0, "lr": 4.823326960538476e-07, "epoch": 3.529426135391982, "percentage": 70.59, "elapsed_time": "3:07:33", "remaining_time": "1:18:08", "throughput": 8652.65, "total_tokens": 97373392} +{"current_steps": 144475, "total_steps": 204665, "loss": 0.0, "lr": 4.82259735956946e-07, "epoch": 3.529548286223829, "percentage": 70.59, "elapsed_time": "3:07:33", "remaining_time": "1:18:08", "throughput": 8652.71, "total_tokens": 97377104} +{"current_steps": 144480, "total_steps": 204665, "loss": 0.0, "lr": 4.821867796251908e-07, "epoch": 3.5296704370556764, "percentage": 70.59, "elapsed_time": "3:07:34", "remaining_time": "1:18:08", "throughput": 8652.73, "total_tokens": 97380368} +{"current_steps": 144485, "total_steps": 204665, "loss": 0.0, "lr": 4.82113827059113e-07, "epoch": 3.5297925878875236, "percentage": 70.6, "elapsed_time": "3:07:34", "remaining_time": "1:18:07", "throughput": 8652.75, "total_tokens": 97383568} +{"current_steps": 144490, "total_steps": 204665, "loss": 0.0002, "lr": 4.820408782592425e-07, "epoch": 3.529914738719371, "percentage": 70.6, "elapsed_time": "3:07:34", "remaining_time": "1:18:07", "throughput": 8652.79, "total_tokens": 97387024} +{"current_steps": 144495, "total_steps": 204665, "loss": 0.0, "lr": 4.819679332261104e-07, "epoch": 3.530036889551218, "percentage": 70.6, "elapsed_time": "3:07:35", "remaining_time": "1:18:06", "throughput": 8652.81, "total_tokens": 97390224} +{"current_steps": 144500, "total_steps": 204665, "loss": 0.0, "lr": 4.818949919602465e-07, "epoch": 3.530159040383065, "percentage": 70.6, "elapsed_time": "3:07:35", "remaining_time": "1:18:06", "throughput": 8652.82, "total_tokens": 97393296} +{"current_steps": 144505, "total_steps": 204665, "loss": 0.0, "lr": 4.818220544621817e-07, "epoch": 3.530281191214912, "percentage": 70.61, "elapsed_time": "3:07:36", "remaining_time": "1:18:06", "throughput": 8652.84, "total_tokens": 97396496} +{"current_steps": 144510, "total_steps": 204665, "loss": 0.0001, "lr": 4.817491207324461e-07, "epoch": 3.5304033420467595, "percentage": 70.61, "elapsed_time": "3:07:36", "remaining_time": "1:18:05", "throughput": 8652.86, "total_tokens": 97399760} +{"current_steps": 144515, "total_steps": 204665, "loss": 0.0, "lr": 4.816761907715702e-07, "epoch": 3.5305254928786063, "percentage": 70.61, "elapsed_time": "3:07:36", "remaining_time": "1:18:05", "throughput": 8652.9, "total_tokens": 97403152} +{"current_steps": 144520, "total_steps": 204665, "loss": 0.0, "lr": 4.81603264580085e-07, "epoch": 3.530647643710454, "percentage": 70.61, "elapsed_time": "3:07:37", "remaining_time": "1:18:04", "throughput": 8652.94, "total_tokens": 97406736} +{"current_steps": 144525, "total_steps": 204665, "loss": 0.0001, "lr": 4.8153034215852e-07, "epoch": 3.5307697945423007, "percentage": 70.62, "elapsed_time": "3:07:37", "remaining_time": "1:18:04", "throughput": 8652.95, "total_tokens": 97409744} +{"current_steps": 144530, "total_steps": 204665, "loss": 0.0, "lr": 4.814574235074056e-07, "epoch": 3.530891945374148, "percentage": 70.62, "elapsed_time": "3:07:37", "remaining_time": "1:18:04", "throughput": 8653.0, "total_tokens": 97413328} +{"current_steps": 144535, "total_steps": 204665, "loss": 0.0003, "lr": 4.813845086272727e-07, "epoch": 3.531014096205995, "percentage": 70.62, "elapsed_time": "3:07:38", "remaining_time": "1:18:03", "throughput": 8653.03, "total_tokens": 97416720} +{"current_steps": 144540, "total_steps": 204665, "loss": 0.0, "lr": 4.813115975186512e-07, "epoch": 3.5311362470378422, "percentage": 70.62, "elapsed_time": "3:07:38", "remaining_time": "1:18:03", "throughput": 8653.07, "total_tokens": 97420176} +{"current_steps": 144545, "total_steps": 204665, "loss": 0.0, "lr": 4.812386901820708e-07, "epoch": 3.5312583978696894, "percentage": 70.63, "elapsed_time": "3:07:38", "remaining_time": "1:18:02", "throughput": 8653.09, "total_tokens": 97423312} +{"current_steps": 144550, "total_steps": 204665, "loss": 0.0, "lr": 4.811657866180621e-07, "epoch": 3.5313805487015366, "percentage": 70.63, "elapsed_time": "3:07:39", "remaining_time": "1:18:02", "throughput": 8653.14, "total_tokens": 97426960} +{"current_steps": 144555, "total_steps": 204665, "loss": 0.0, "lr": 4.810928868271558e-07, "epoch": 3.531502699533384, "percentage": 70.63, "elapsed_time": "3:07:39", "remaining_time": "1:18:02", "throughput": 8653.14, "total_tokens": 97429968} +{"current_steps": 144560, "total_steps": 204665, "loss": 0.0, "lr": 4.810199908098813e-07, "epoch": 3.531624850365231, "percentage": 70.63, "elapsed_time": "3:07:39", "remaining_time": "1:18:01", "throughput": 8653.2, "total_tokens": 97433616} +{"current_steps": 144565, "total_steps": 204665, "loss": 0.0001, "lr": 4.809470985667692e-07, "epoch": 3.531747001197078, "percentage": 70.63, "elapsed_time": "3:07:40", "remaining_time": "1:18:01", "throughput": 8653.2, "total_tokens": 97436624} +{"current_steps": 144570, "total_steps": 204665, "loss": 0.0, "lr": 4.808742100983492e-07, "epoch": 3.5318691520289254, "percentage": 70.64, "elapsed_time": "3:07:40", "remaining_time": "1:18:00", "throughput": 8653.25, "total_tokens": 97440144} +{"current_steps": 144575, "total_steps": 204665, "loss": 0.0947, "lr": 4.808013254051514e-07, "epoch": 3.5319913028607726, "percentage": 70.64, "elapsed_time": "3:07:40", "remaining_time": "1:18:00", "throughput": 8653.3, "total_tokens": 97443856} +{"current_steps": 144580, "total_steps": 204665, "loss": 0.0, "lr": 4.807284444877066e-07, "epoch": 3.5321134536926198, "percentage": 70.64, "elapsed_time": "3:07:41", "remaining_time": "1:17:59", "throughput": 8653.33, "total_tokens": 97447184} +{"current_steps": 144585, "total_steps": 204665, "loss": 0.0001, "lr": 4.806555673465437e-07, "epoch": 3.532235604524467, "percentage": 70.64, "elapsed_time": "3:07:41", "remaining_time": "1:17:59", "throughput": 8653.35, "total_tokens": 97450448} +{"current_steps": 144590, "total_steps": 204665, "loss": 0.0, "lr": 4.805826939821937e-07, "epoch": 3.532357755356314, "percentage": 70.65, "elapsed_time": "3:07:41", "remaining_time": "1:17:59", "throughput": 8653.36, "total_tokens": 97453520} +{"current_steps": 144595, "total_steps": 204665, "loss": 0.0, "lr": 4.805098243951855e-07, "epoch": 3.5324799061881613, "percentage": 70.65, "elapsed_time": "3:07:42", "remaining_time": "1:17:58", "throughput": 8653.38, "total_tokens": 97456656} +{"current_steps": 144600, "total_steps": 204665, "loss": 0.0, "lr": 4.8043695858605e-07, "epoch": 3.532602057020008, "percentage": 70.65, "elapsed_time": "3:07:42", "remaining_time": "1:17:58", "throughput": 8653.39, "total_tokens": 97459728} +{"current_steps": 144605, "total_steps": 204665, "loss": 0.0002, "lr": 4.803640965553164e-07, "epoch": 3.5327242078518557, "percentage": 70.65, "elapsed_time": "3:07:42", "remaining_time": "1:17:57", "throughput": 8653.39, "total_tokens": 97462736} +{"current_steps": 144610, "total_steps": 204665, "loss": 0.0003, "lr": 4.802912383035148e-07, "epoch": 3.5328463586837024, "percentage": 70.66, "elapsed_time": "3:07:43", "remaining_time": "1:17:57", "throughput": 8653.41, "total_tokens": 97465808} +{"current_steps": 144615, "total_steps": 204665, "loss": 0.0, "lr": 4.802183838311755e-07, "epoch": 3.5329685095155496, "percentage": 70.66, "elapsed_time": "3:07:43", "remaining_time": "1:17:57", "throughput": 8653.46, "total_tokens": 97469456} +{"current_steps": 144620, "total_steps": 204665, "loss": 0.0, "lr": 4.801455331388275e-07, "epoch": 3.533090660347397, "percentage": 70.66, "elapsed_time": "3:07:43", "remaining_time": "1:17:56", "throughput": 8653.48, "total_tokens": 97472720} +{"current_steps": 144625, "total_steps": 204665, "loss": 0.0, "lr": 4.800726862270014e-07, "epoch": 3.533212811179244, "percentage": 70.66, "elapsed_time": "3:07:44", "remaining_time": "1:17:56", "throughput": 8653.5, "total_tokens": 97475920} +{"current_steps": 144630, "total_steps": 204665, "loss": 0.0316, "lr": 4.799998430962267e-07, "epoch": 3.533334962011091, "percentage": 70.67, "elapsed_time": "3:07:44", "remaining_time": "1:17:55", "throughput": 8653.53, "total_tokens": 97479248} +{"current_steps": 144635, "total_steps": 204665, "loss": 0.0, "lr": 4.799270037470324e-07, "epoch": 3.5334571128429384, "percentage": 70.67, "elapsed_time": "3:07:45", "remaining_time": "1:17:55", "throughput": 8653.57, "total_tokens": 97482768} +{"current_steps": 144640, "total_steps": 204665, "loss": 0.0, "lr": 4.798541681799494e-07, "epoch": 3.5335792636747856, "percentage": 70.67, "elapsed_time": "3:07:45", "remaining_time": "1:17:55", "throughput": 8653.59, "total_tokens": 97485968} +{"current_steps": 144645, "total_steps": 204665, "loss": 0.0, "lr": 4.797813363955064e-07, "epoch": 3.5337014145066328, "percentage": 70.67, "elapsed_time": "3:07:45", "remaining_time": "1:17:54", "throughput": 8653.62, "total_tokens": 97489296} +{"current_steps": 144650, "total_steps": 204665, "loss": 0.0, "lr": 4.797085083942336e-07, "epoch": 3.53382356533848, "percentage": 70.68, "elapsed_time": "3:07:46", "remaining_time": "1:17:54", "throughput": 8653.67, "total_tokens": 97492816} +{"current_steps": 144655, "total_steps": 204665, "loss": 0.0001, "lr": 4.796356841766602e-07, "epoch": 3.533945716170327, "percentage": 70.68, "elapsed_time": "3:07:46", "remaining_time": "1:17:53", "throughput": 8653.7, "total_tokens": 97496272} +{"current_steps": 144660, "total_steps": 204665, "loss": 0.0, "lr": 4.795628637433165e-07, "epoch": 3.5340678670021743, "percentage": 70.68, "elapsed_time": "3:07:46", "remaining_time": "1:17:53", "throughput": 8653.75, "total_tokens": 97499792} +{"current_steps": 144665, "total_steps": 204665, "loss": 0.0, "lr": 4.794900470947312e-07, "epoch": 3.5341900178340215, "percentage": 70.68, "elapsed_time": "3:07:47", "remaining_time": "1:17:53", "throughput": 8653.8, "total_tokens": 97503504} +{"current_steps": 144670, "total_steps": 204665, "loss": 0.0, "lr": 4.794172342314345e-07, "epoch": 3.5343121686658687, "percentage": 70.69, "elapsed_time": "3:07:47", "remaining_time": "1:17:52", "throughput": 8653.83, "total_tokens": 97506768} +{"current_steps": 144675, "total_steps": 204665, "loss": 0.1554, "lr": 4.793444251539558e-07, "epoch": 3.534434319497716, "percentage": 70.69, "elapsed_time": "3:07:47", "remaining_time": "1:17:52", "throughput": 8653.85, "total_tokens": 97509968} +{"current_steps": 144680, "total_steps": 204665, "loss": 0.0002, "lr": 4.792716198628242e-07, "epoch": 3.534556470329563, "percentage": 70.69, "elapsed_time": "3:07:48", "remaining_time": "1:17:51", "throughput": 8653.9, "total_tokens": 97513616} +{"current_steps": 144685, "total_steps": 204665, "loss": 0.0001, "lr": 4.791988183585697e-07, "epoch": 3.53467862116141, "percentage": 70.69, "elapsed_time": "3:07:48", "remaining_time": "1:17:51", "throughput": 8653.97, "total_tokens": 97517392} +{"current_steps": 144690, "total_steps": 204665, "loss": 0.0001, "lr": 4.791260206417212e-07, "epoch": 3.5348007719932575, "percentage": 70.7, "elapsed_time": "3:07:48", "remaining_time": "1:17:51", "throughput": 8654.01, "total_tokens": 97520912} +{"current_steps": 144695, "total_steps": 204665, "loss": 0.0, "lr": 4.790532267128088e-07, "epoch": 3.5349229228251042, "percentage": 70.7, "elapsed_time": "3:07:49", "remaining_time": "1:17:50", "throughput": 8654.04, "total_tokens": 97524304} +{"current_steps": 144700, "total_steps": 204665, "loss": 0.0, "lr": 4.78980436572361e-07, "epoch": 3.535045073656952, "percentage": 70.7, "elapsed_time": "3:07:49", "remaining_time": "1:17:50", "throughput": 8654.14, "total_tokens": 97528592} +{"current_steps": 144705, "total_steps": 204665, "loss": 0.0, "lr": 4.789076502209077e-07, "epoch": 3.5351672244887986, "percentage": 70.7, "elapsed_time": "3:07:49", "remaining_time": "1:17:49", "throughput": 8654.22, "total_tokens": 97532624} +{"current_steps": 144710, "total_steps": 204665, "loss": 0.0, "lr": 4.788348676589784e-07, "epoch": 3.535289375320646, "percentage": 70.71, "elapsed_time": "3:07:50", "remaining_time": "1:17:49", "throughput": 8654.23, "total_tokens": 97535760} +{"current_steps": 144715, "total_steps": 204665, "loss": 0.0815, "lr": 4.787620888871018e-07, "epoch": 3.535411526152493, "percentage": 70.71, "elapsed_time": "3:07:50", "remaining_time": "1:17:49", "throughput": 8654.27, "total_tokens": 97539216} +{"current_steps": 144720, "total_steps": 204665, "loss": 0.0, "lr": 4.786893139058078e-07, "epoch": 3.53553367698434, "percentage": 70.71, "elapsed_time": "3:07:50", "remaining_time": "1:17:48", "throughput": 8654.31, "total_tokens": 97542736} +{"current_steps": 144725, "total_steps": 204665, "loss": 0.0, "lr": 4.78616542715625e-07, "epoch": 3.5356558278161874, "percentage": 70.71, "elapsed_time": "3:07:51", "remaining_time": "1:17:48", "throughput": 8654.33, "total_tokens": 97545936} +{"current_steps": 144730, "total_steps": 204665, "loss": 0.0, "lr": 4.785437753170832e-07, "epoch": 3.5357779786480346, "percentage": 70.72, "elapsed_time": "3:07:51", "remaining_time": "1:17:47", "throughput": 8654.35, "total_tokens": 97549136} +{"current_steps": 144735, "total_steps": 204665, "loss": 0.0, "lr": 4.784710117107112e-07, "epoch": 3.5359001294798817, "percentage": 70.72, "elapsed_time": "3:07:52", "remaining_time": "1:17:47", "throughput": 8654.38, "total_tokens": 97552464} +{"current_steps": 144740, "total_steps": 204665, "loss": 0.0, "lr": 4.783982518970384e-07, "epoch": 3.536022280311729, "percentage": 70.72, "elapsed_time": "3:07:52", "remaining_time": "1:17:46", "throughput": 8654.44, "total_tokens": 97556112} +{"current_steps": 144745, "total_steps": 204665, "loss": 0.0, "lr": 4.783254958765939e-07, "epoch": 3.536144431143576, "percentage": 70.72, "elapsed_time": "3:07:52", "remaining_time": "1:17:46", "throughput": 8654.46, "total_tokens": 97559440} +{"current_steps": 144750, "total_steps": 204665, "loss": 0.0, "lr": 4.782527436499063e-07, "epoch": 3.5362665819754233, "percentage": 70.73, "elapsed_time": "3:07:53", "remaining_time": "1:17:46", "throughput": 8654.48, "total_tokens": 97562576} +{"current_steps": 144755, "total_steps": 204665, "loss": 0.0, "lr": 4.781799952175056e-07, "epoch": 3.5363887328072705, "percentage": 70.73, "elapsed_time": "3:07:53", "remaining_time": "1:17:45", "throughput": 8654.52, "total_tokens": 97566160} +{"current_steps": 144760, "total_steps": 204665, "loss": 0.0, "lr": 4.781072505799197e-07, "epoch": 3.5365108836391177, "percentage": 70.73, "elapsed_time": "3:07:53", "remaining_time": "1:17:45", "throughput": 8654.59, "total_tokens": 97570064} +{"current_steps": 144765, "total_steps": 204665, "loss": 0.0003, "lr": 4.780345097376784e-07, "epoch": 3.536633034470965, "percentage": 70.73, "elapsed_time": "3:07:54", "remaining_time": "1:17:44", "throughput": 8654.61, "total_tokens": 97573200} +{"current_steps": 144770, "total_steps": 204665, "loss": 0.0829, "lr": 4.779617726913109e-07, "epoch": 3.536755185302812, "percentage": 70.74, "elapsed_time": "3:07:54", "remaining_time": "1:17:44", "throughput": 8654.64, "total_tokens": 97576528} +{"current_steps": 144775, "total_steps": 204665, "loss": 0.0, "lr": 4.778890394413454e-07, "epoch": 3.5368773361346593, "percentage": 70.74, "elapsed_time": "3:07:54", "remaining_time": "1:17:44", "throughput": 8654.69, "total_tokens": 97580176} +{"current_steps": 144780, "total_steps": 204665, "loss": 0.0003, "lr": 4.778163099883117e-07, "epoch": 3.536999486966506, "percentage": 70.74, "elapsed_time": "3:07:55", "remaining_time": "1:17:43", "throughput": 8654.71, "total_tokens": 97583440} +{"current_steps": 144785, "total_steps": 204665, "loss": 0.0, "lr": 4.777435843327377e-07, "epoch": 3.5371216377983536, "percentage": 70.74, "elapsed_time": "3:07:55", "remaining_time": "1:17:43", "throughput": 8654.75, "total_tokens": 97586896} +{"current_steps": 144790, "total_steps": 204665, "loss": 0.0, "lr": 4.776708624751535e-07, "epoch": 3.5372437886302004, "percentage": 70.74, "elapsed_time": "3:07:55", "remaining_time": "1:17:42", "throughput": 8654.78, "total_tokens": 97590160} +{"current_steps": 144795, "total_steps": 204665, "loss": 0.0846, "lr": 4.775981444160865e-07, "epoch": 3.5373659394620476, "percentage": 70.75, "elapsed_time": "3:07:56", "remaining_time": "1:17:42", "throughput": 8654.81, "total_tokens": 97593552} +{"current_steps": 144800, "total_steps": 204665, "loss": 0.0, "lr": 4.775254301560666e-07, "epoch": 3.5374880902938948, "percentage": 70.75, "elapsed_time": "3:07:56", "remaining_time": "1:17:42", "throughput": 8654.84, "total_tokens": 97596816} +{"current_steps": 144805, "total_steps": 204665, "loss": 0.0005, "lr": 4.774527196956226e-07, "epoch": 3.537610241125742, "percentage": 70.75, "elapsed_time": "3:07:56", "remaining_time": "1:17:41", "throughput": 8654.86, "total_tokens": 97600080} +{"current_steps": 144810, "total_steps": 204665, "loss": 0.0, "lr": 4.773800130352825e-07, "epoch": 3.537732391957589, "percentage": 70.75, "elapsed_time": "3:07:57", "remaining_time": "1:17:41", "throughput": 8654.87, "total_tokens": 97603152} +{"current_steps": 144815, "total_steps": 204665, "loss": 0.0, "lr": 4.77307310175576e-07, "epoch": 3.5378545427894363, "percentage": 70.76, "elapsed_time": "3:07:57", "remaining_time": "1:17:40", "throughput": 8654.91, "total_tokens": 97606608} +{"current_steps": 144820, "total_steps": 204665, "loss": 0.0, "lr": 4.772346111170309e-07, "epoch": 3.5379766936212835, "percentage": 70.76, "elapsed_time": "3:07:57", "remaining_time": "1:17:40", "throughput": 8654.93, "total_tokens": 97609872} +{"current_steps": 144825, "total_steps": 204665, "loss": 0.0, "lr": 4.771619158601764e-07, "epoch": 3.5380988444531307, "percentage": 70.76, "elapsed_time": "3:07:58", "remaining_time": "1:17:40", "throughput": 8654.97, "total_tokens": 97613328} +{"current_steps": 144830, "total_steps": 204665, "loss": 0.0, "lr": 4.770892244055413e-07, "epoch": 3.538220995284978, "percentage": 70.76, "elapsed_time": "3:07:58", "remaining_time": "1:17:39", "throughput": 8654.99, "total_tokens": 97616592} +{"current_steps": 144835, "total_steps": 204665, "loss": 0.0, "lr": 4.770165367536541e-07, "epoch": 3.538343146116825, "percentage": 70.77, "elapsed_time": "3:07:58", "remaining_time": "1:17:39", "throughput": 8655.05, "total_tokens": 97620240} +{"current_steps": 144840, "total_steps": 204665, "loss": 0.0001, "lr": 4.76943852905043e-07, "epoch": 3.5384652969486723, "percentage": 70.77, "elapsed_time": "3:07:59", "remaining_time": "1:17:38", "throughput": 8655.09, "total_tokens": 97623824} +{"current_steps": 144845, "total_steps": 204665, "loss": 0.0, "lr": 4.768711728602371e-07, "epoch": 3.5385874477805195, "percentage": 70.77, "elapsed_time": "3:07:59", "remaining_time": "1:17:38", "throughput": 8655.11, "total_tokens": 97626896} +{"current_steps": 144850, "total_steps": 204665, "loss": 0.0001, "lr": 4.767984966197649e-07, "epoch": 3.5387095986123667, "percentage": 70.77, "elapsed_time": "3:08:00", "remaining_time": "1:17:38", "throughput": 8655.13, "total_tokens": 97630224} +{"current_steps": 144855, "total_steps": 204665, "loss": 0.0, "lr": 4.767258241841543e-07, "epoch": 3.538831749444214, "percentage": 70.78, "elapsed_time": "3:08:00", "remaining_time": "1:17:37", "throughput": 8655.16, "total_tokens": 97633552} +{"current_steps": 144860, "total_steps": 204665, "loss": 0.0, "lr": 4.766531555539343e-07, "epoch": 3.538953900276061, "percentage": 70.78, "elapsed_time": "3:08:00", "remaining_time": "1:17:37", "throughput": 8655.25, "total_tokens": 97637648} +{"current_steps": 144865, "total_steps": 204665, "loss": 0.0, "lr": 4.7658049072963357e-07, "epoch": 3.539076051107908, "percentage": 70.78, "elapsed_time": "3:08:01", "remaining_time": "1:17:36", "throughput": 8655.31, "total_tokens": 97641488} +{"current_steps": 144870, "total_steps": 204665, "loss": 0.0, "lr": 4.7650782971178003e-07, "epoch": 3.5391982019397554, "percentage": 70.78, "elapsed_time": "3:08:01", "remaining_time": "1:17:36", "throughput": 8655.31, "total_tokens": 97644432} +{"current_steps": 144875, "total_steps": 204665, "loss": 0.0698, "lr": 4.764351725009027e-07, "epoch": 3.539320352771602, "percentage": 70.79, "elapsed_time": "3:08:01", "remaining_time": "1:17:36", "throughput": 8655.33, "total_tokens": 97647632} +{"current_steps": 144880, "total_steps": 204665, "loss": 0.0002, "lr": 4.763625190975292e-07, "epoch": 3.53944250360345, "percentage": 70.79, "elapsed_time": "3:08:02", "remaining_time": "1:17:35", "throughput": 8655.35, "total_tokens": 97650896} +{"current_steps": 144885, "total_steps": 204665, "loss": 0.0501, "lr": 4.762898695021884e-07, "epoch": 3.5395646544352966, "percentage": 70.79, "elapsed_time": "3:08:02", "remaining_time": "1:17:35", "throughput": 8655.35, "total_tokens": 97653776} +{"current_steps": 144890, "total_steps": 204665, "loss": 0.0325, "lr": 4.7621722371540884e-07, "epoch": 3.5396868052671437, "percentage": 70.79, "elapsed_time": "3:08:02", "remaining_time": "1:17:34", "throughput": 8655.37, "total_tokens": 97656976} +{"current_steps": 144895, "total_steps": 204665, "loss": 0.0, "lr": 4.7614458173771807e-07, "epoch": 3.539808956098991, "percentage": 70.8, "elapsed_time": "3:08:03", "remaining_time": "1:17:34", "throughput": 8655.42, "total_tokens": 97660560} +{"current_steps": 144900, "total_steps": 204665, "loss": 0.0, "lr": 4.760719435696453e-07, "epoch": 3.539931106930838, "percentage": 70.8, "elapsed_time": "3:08:03", "remaining_time": "1:17:33", "throughput": 8655.46, "total_tokens": 97664080} +{"current_steps": 144905, "total_steps": 204665, "loss": 0.0, "lr": 4.759993092117178e-07, "epoch": 3.5400532577626853, "percentage": 70.8, "elapsed_time": "3:08:03", "remaining_time": "1:17:33", "throughput": 8655.49, "total_tokens": 97667472} +{"current_steps": 144910, "total_steps": 204665, "loss": 0.0371, "lr": 4.759266786644648e-07, "epoch": 3.5401754085945325, "percentage": 70.8, "elapsed_time": "3:08:04", "remaining_time": "1:17:33", "throughput": 8655.51, "total_tokens": 97670672} +{"current_steps": 144915, "total_steps": 204665, "loss": 0.0, "lr": 4.7585405192841343e-07, "epoch": 3.5402975594263797, "percentage": 70.81, "elapsed_time": "3:08:04", "remaining_time": "1:17:32", "throughput": 8655.54, "total_tokens": 97674000} +{"current_steps": 144920, "total_steps": 204665, "loss": 0.0, "lr": 4.7578142900409237e-07, "epoch": 3.540419710258227, "percentage": 70.81, "elapsed_time": "3:08:04", "remaining_time": "1:17:32", "throughput": 8655.58, "total_tokens": 97677392} +{"current_steps": 144925, "total_steps": 204665, "loss": 0.1114, "lr": 4.7570880989203023e-07, "epoch": 3.540541861090074, "percentage": 70.81, "elapsed_time": "3:08:05", "remaining_time": "1:17:31", "throughput": 8655.61, "total_tokens": 97680784} +{"current_steps": 144930, "total_steps": 204665, "loss": 0.0, "lr": 4.756361945927542e-07, "epoch": 3.5406640119219213, "percentage": 70.81, "elapsed_time": "3:08:05", "remaining_time": "1:17:31", "throughput": 8655.64, "total_tokens": 97684048} +{"current_steps": 144935, "total_steps": 204665, "loss": 0.0224, "lr": 4.755635831067931e-07, "epoch": 3.5407861627537685, "percentage": 70.82, "elapsed_time": "3:08:05", "remaining_time": "1:17:31", "throughput": 8655.67, "total_tokens": 97687376} +{"current_steps": 144940, "total_steps": 204665, "loss": 0.0, "lr": 4.754909754346748e-07, "epoch": 3.5409083135856156, "percentage": 70.82, "elapsed_time": "3:08:06", "remaining_time": "1:17:30", "throughput": 8655.73, "total_tokens": 97691088} +{"current_steps": 144945, "total_steps": 204665, "loss": 0.0, "lr": 4.7541837157692676e-07, "epoch": 3.541030464417463, "percentage": 70.82, "elapsed_time": "3:08:06", "remaining_time": "1:17:30", "throughput": 8655.78, "total_tokens": 97694736} +{"current_steps": 144950, "total_steps": 204665, "loss": 0.0002, "lr": 4.753457715340778e-07, "epoch": 3.5411526152493096, "percentage": 70.82, "elapsed_time": "3:08:06", "remaining_time": "1:17:29", "throughput": 8655.81, "total_tokens": 97698064} +{"current_steps": 144955, "total_steps": 204665, "loss": 0.0, "lr": 4.752731753066552e-07, "epoch": 3.541274766081157, "percentage": 70.83, "elapsed_time": "3:08:07", "remaining_time": "1:17:29", "throughput": 8655.84, "total_tokens": 97701392} +{"current_steps": 144960, "total_steps": 204665, "loss": 0.0, "lr": 4.7520058289518747e-07, "epoch": 3.541396916913004, "percentage": 70.83, "elapsed_time": "3:08:07", "remaining_time": "1:17:29", "throughput": 8655.87, "total_tokens": 97704720} +{"current_steps": 144965, "total_steps": 204665, "loss": 0.0, "lr": 4.75127994300202e-07, "epoch": 3.5415190677448516, "percentage": 70.83, "elapsed_time": "3:08:08", "remaining_time": "1:17:28", "throughput": 8655.88, "total_tokens": 97707856} +{"current_steps": 144970, "total_steps": 204665, "loss": 0.0, "lr": 4.7505540952222725e-07, "epoch": 3.5416412185766983, "percentage": 70.83, "elapsed_time": "3:08:08", "remaining_time": "1:17:28", "throughput": 8655.92, "total_tokens": 97711312} +{"current_steps": 144975, "total_steps": 204665, "loss": 0.0, "lr": 4.749828285617904e-07, "epoch": 3.5417633694085455, "percentage": 70.84, "elapsed_time": "3:08:08", "remaining_time": "1:17:27", "throughput": 8655.95, "total_tokens": 97714704} +{"current_steps": 144980, "total_steps": 204665, "loss": 0.0348, "lr": 4.7491025141941955e-07, "epoch": 3.5418855202403927, "percentage": 70.84, "elapsed_time": "3:08:09", "remaining_time": "1:17:27", "throughput": 8655.98, "total_tokens": 97718032} +{"current_steps": 144985, "total_steps": 204665, "loss": 0.0, "lr": 4.74837678095643e-07, "epoch": 3.54200767107224, "percentage": 70.84, "elapsed_time": "3:08:09", "remaining_time": "1:17:27", "throughput": 8656.01, "total_tokens": 97721360} +{"current_steps": 144990, "total_steps": 204665, "loss": 0.0, "lr": 4.7476510859098775e-07, "epoch": 3.542129821904087, "percentage": 70.84, "elapsed_time": "3:08:09", "remaining_time": "1:17:26", "throughput": 8656.03, "total_tokens": 97724560} +{"current_steps": 144995, "total_steps": 204665, "loss": 0.0001, "lr": 4.7469254290598224e-07, "epoch": 3.5422519727359343, "percentage": 70.85, "elapsed_time": "3:08:10", "remaining_time": "1:17:26", "throughput": 8656.09, "total_tokens": 97728272} +{"current_steps": 145000, "total_steps": 204665, "loss": 0.0, "lr": 4.7461998104115355e-07, "epoch": 3.5423741235677815, "percentage": 70.85, "elapsed_time": "3:08:10", "remaining_time": "1:17:25", "throughput": 8656.14, "total_tokens": 97731920} +{"current_steps": 145005, "total_steps": 204665, "loss": 0.0, "lr": 4.7454742299703e-07, "epoch": 3.5424962743996287, "percentage": 70.85, "elapsed_time": "3:08:10", "remaining_time": "1:17:25", "throughput": 8656.14, "total_tokens": 97734928} +{"current_steps": 145010, "total_steps": 204665, "loss": 0.0, "lr": 4.744748687741386e-07, "epoch": 3.542618425231476, "percentage": 70.85, "elapsed_time": "3:08:11", "remaining_time": "1:17:25", "throughput": 8656.15, "total_tokens": 97738000} +{"current_steps": 145015, "total_steps": 204665, "loss": 0.0, "lr": 4.7440231837300716e-07, "epoch": 3.542740576063323, "percentage": 70.85, "elapsed_time": "3:08:11", "remaining_time": "1:17:24", "throughput": 8656.22, "total_tokens": 97741904} +{"current_steps": 145020, "total_steps": 204665, "loss": 0.0, "lr": 4.743297717941639e-07, "epoch": 3.5428627268951702, "percentage": 70.86, "elapsed_time": "3:08:11", "remaining_time": "1:17:24", "throughput": 8656.23, "total_tokens": 97744912} +{"current_steps": 145025, "total_steps": 204665, "loss": 0.0, "lr": 4.7425722903813556e-07, "epoch": 3.5429848777270174, "percentage": 70.86, "elapsed_time": "3:08:12", "remaining_time": "1:17:23", "throughput": 8656.25, "total_tokens": 97748112} +{"current_steps": 145030, "total_steps": 204665, "loss": 0.0, "lr": 4.7418469010545036e-07, "epoch": 3.5431070285588646, "percentage": 70.86, "elapsed_time": "3:08:12", "remaining_time": "1:17:23", "throughput": 8656.28, "total_tokens": 97751440} +{"current_steps": 145035, "total_steps": 204665, "loss": 0.0, "lr": 4.7411215499663525e-07, "epoch": 3.543229179390712, "percentage": 70.86, "elapsed_time": "3:08:12", "remaining_time": "1:17:22", "throughput": 8656.3, "total_tokens": 97754640} +{"current_steps": 145040, "total_steps": 204665, "loss": 0.0, "lr": 4.7403962371221837e-07, "epoch": 3.543351330222559, "percentage": 70.87, "elapsed_time": "3:08:13", "remaining_time": "1:17:22", "throughput": 8656.33, "total_tokens": 97758032} +{"current_steps": 145045, "total_steps": 204665, "loss": 0.0, "lr": 4.7396709625272636e-07, "epoch": 3.5434734810544057, "percentage": 70.87, "elapsed_time": "3:08:13", "remaining_time": "1:17:22", "throughput": 8656.38, "total_tokens": 97761680} +{"current_steps": 145050, "total_steps": 204665, "loss": 0.0, "lr": 4.738945726186875e-07, "epoch": 3.5435956318862534, "percentage": 70.87, "elapsed_time": "3:08:13", "remaining_time": "1:17:21", "throughput": 8656.43, "total_tokens": 97765264} +{"current_steps": 145055, "total_steps": 204665, "loss": 0.0, "lr": 4.738220528106288e-07, "epoch": 3.5437177827181, "percentage": 70.87, "elapsed_time": "3:08:14", "remaining_time": "1:17:21", "throughput": 8656.44, "total_tokens": 97768336} +{"current_steps": 145060, "total_steps": 204665, "loss": 0.0001, "lr": 4.7374953682907736e-07, "epoch": 3.5438399335499478, "percentage": 70.88, "elapsed_time": "3:08:14", "remaining_time": "1:17:20", "throughput": 8656.46, "total_tokens": 97771536} +{"current_steps": 145065, "total_steps": 204665, "loss": 0.0, "lr": 4.7367702467456115e-07, "epoch": 3.5439620843817945, "percentage": 70.88, "elapsed_time": "3:08:14", "remaining_time": "1:17:20", "throughput": 8656.46, "total_tokens": 97774544} +{"current_steps": 145070, "total_steps": 204665, "loss": 0.0, "lr": 4.736045163476068e-07, "epoch": 3.5440842352136417, "percentage": 70.88, "elapsed_time": "3:08:15", "remaining_time": "1:17:20", "throughput": 8656.49, "total_tokens": 97777808} +{"current_steps": 145075, "total_steps": 204665, "loss": 0.0, "lr": 4.73532011848742e-07, "epoch": 3.544206386045489, "percentage": 70.88, "elapsed_time": "3:08:15", "remaining_time": "1:17:19", "throughput": 8656.53, "total_tokens": 97781392} +{"current_steps": 145080, "total_steps": 204665, "loss": 0.0663, "lr": 4.734595111784945e-07, "epoch": 3.544328536877336, "percentage": 70.89, "elapsed_time": "3:08:16", "remaining_time": "1:17:19", "throughput": 8656.55, "total_tokens": 97784528} +{"current_steps": 145085, "total_steps": 204665, "loss": 0.0001, "lr": 4.733870143373905e-07, "epoch": 3.5444506877091833, "percentage": 70.89, "elapsed_time": "3:08:16", "remaining_time": "1:17:18", "throughput": 8656.56, "total_tokens": 97787664} +{"current_steps": 145090, "total_steps": 204665, "loss": 0.0, "lr": 4.7331452132595827e-07, "epoch": 3.5445728385410304, "percentage": 70.89, "elapsed_time": "3:08:16", "remaining_time": "1:17:18", "throughput": 8656.56, "total_tokens": 97790544} +{"current_steps": 145095, "total_steps": 204665, "loss": 0.0, "lr": 4.7324203214472403e-07, "epoch": 3.5446949893728776, "percentage": 70.89, "elapsed_time": "3:08:17", "remaining_time": "1:17:18", "throughput": 8656.6, "total_tokens": 97794000} +{"current_steps": 145100, "total_steps": 204665, "loss": 0.0, "lr": 4.731695467942158e-07, "epoch": 3.544817140204725, "percentage": 70.9, "elapsed_time": "3:08:17", "remaining_time": "1:17:17", "throughput": 8656.63, "total_tokens": 97797392} +{"current_steps": 145105, "total_steps": 204665, "loss": 0.0, "lr": 4.730970652749601e-07, "epoch": 3.544939291036572, "percentage": 70.9, "elapsed_time": "3:08:17", "remaining_time": "1:17:17", "throughput": 8656.63, "total_tokens": 97800400} +{"current_steps": 145110, "total_steps": 204665, "loss": 0.0, "lr": 4.730245875874841e-07, "epoch": 3.545061441868419, "percentage": 70.9, "elapsed_time": "3:08:18", "remaining_time": "1:17:16", "throughput": 8656.65, "total_tokens": 97803536} +{"current_steps": 145115, "total_steps": 204665, "loss": 0.0, "lr": 4.7295211373231546e-07, "epoch": 3.5451835927002664, "percentage": 70.9, "elapsed_time": "3:08:18", "remaining_time": "1:17:16", "throughput": 8656.66, "total_tokens": 97806672} +{"current_steps": 145120, "total_steps": 204665, "loss": 0.0, "lr": 4.7287964370998043e-07, "epoch": 3.5453057435321136, "percentage": 70.91, "elapsed_time": "3:08:18", "remaining_time": "1:17:16", "throughput": 8656.69, "total_tokens": 97810064} +{"current_steps": 145125, "total_steps": 204665, "loss": 0.0, "lr": 4.7280717752100683e-07, "epoch": 3.5454278943639608, "percentage": 70.91, "elapsed_time": "3:08:19", "remaining_time": "1:17:15", "throughput": 8656.74, "total_tokens": 97813584} +{"current_steps": 145130, "total_steps": 204665, "loss": 0.0, "lr": 4.7273471516592076e-07, "epoch": 3.5455500451958075, "percentage": 70.91, "elapsed_time": "3:08:19", "remaining_time": "1:17:15", "throughput": 8656.76, "total_tokens": 97816848} +{"current_steps": 145135, "total_steps": 204665, "loss": 0.0, "lr": 4.726622566452497e-07, "epoch": 3.545672196027655, "percentage": 70.91, "elapsed_time": "3:08:19", "remaining_time": "1:17:14", "throughput": 8656.79, "total_tokens": 97820240} +{"current_steps": 145140, "total_steps": 204665, "loss": 0.0435, "lr": 4.7258980195952103e-07, "epoch": 3.545794346859502, "percentage": 70.92, "elapsed_time": "3:08:20", "remaining_time": "1:17:14", "throughput": 8656.79, "total_tokens": 97823184} +{"current_steps": 145145, "total_steps": 204665, "loss": 0.0, "lr": 4.7251735110926103e-07, "epoch": 3.5459164976913495, "percentage": 70.92, "elapsed_time": "3:08:20", "remaining_time": "1:17:14", "throughput": 8656.82, "total_tokens": 97826512} +{"current_steps": 145150, "total_steps": 204665, "loss": 0.0, "lr": 4.724449040949965e-07, "epoch": 3.5460386485231963, "percentage": 70.92, "elapsed_time": "3:08:20", "remaining_time": "1:17:13", "throughput": 8656.83, "total_tokens": 97829584} +{"current_steps": 145155, "total_steps": 204665, "loss": 0.0001, "lr": 4.723724609172548e-07, "epoch": 3.5461607993550435, "percentage": 70.92, "elapsed_time": "3:08:21", "remaining_time": "1:17:13", "throughput": 8656.83, "total_tokens": 97832464} +{"current_steps": 145160, "total_steps": 204665, "loss": 0.0001, "lr": 4.7230002157656245e-07, "epoch": 3.5462829501868907, "percentage": 70.93, "elapsed_time": "3:08:21", "remaining_time": "1:17:12", "throughput": 8656.84, "total_tokens": 97835600} +{"current_steps": 145165, "total_steps": 204665, "loss": 0.1, "lr": 4.7222758607344593e-07, "epoch": 3.546405101018738, "percentage": 70.93, "elapsed_time": "3:08:21", "remaining_time": "1:17:12", "throughput": 8656.87, "total_tokens": 97838928} +{"current_steps": 145170, "total_steps": 204665, "loss": 0.0841, "lr": 4.7215515440843236e-07, "epoch": 3.546527251850585, "percentage": 70.93, "elapsed_time": "3:08:22", "remaining_time": "1:17:11", "throughput": 8656.91, "total_tokens": 97842384} +{"current_steps": 145175, "total_steps": 204665, "loss": 0.0, "lr": 4.720827265820489e-07, "epoch": 3.5466494026824322, "percentage": 70.93, "elapsed_time": "3:08:22", "remaining_time": "1:17:11", "throughput": 8656.93, "total_tokens": 97845584} +{"current_steps": 145180, "total_steps": 204665, "loss": 0.0, "lr": 4.7201030259482146e-07, "epoch": 3.5467715535142794, "percentage": 70.94, "elapsed_time": "3:08:22", "remaining_time": "1:17:11", "throughput": 8656.97, "total_tokens": 97849168} +{"current_steps": 145185, "total_steps": 204665, "loss": 0.0, "lr": 4.719378824472774e-07, "epoch": 3.5468937043461266, "percentage": 70.94, "elapsed_time": "3:08:23", "remaining_time": "1:17:10", "throughput": 8657.02, "total_tokens": 97852688} +{"current_steps": 145190, "total_steps": 204665, "loss": 0.0002, "lr": 4.7186546613994283e-07, "epoch": 3.547015855177974, "percentage": 70.94, "elapsed_time": "3:08:23", "remaining_time": "1:17:10", "throughput": 8657.04, "total_tokens": 97855888} +{"current_steps": 145195, "total_steps": 204665, "loss": 0.0001, "lr": 4.7179305367334453e-07, "epoch": 3.547138006009821, "percentage": 70.94, "elapsed_time": "3:08:23", "remaining_time": "1:17:09", "throughput": 8657.06, "total_tokens": 97859216} +{"current_steps": 145200, "total_steps": 204665, "loss": 0.0, "lr": 4.7172064504800967e-07, "epoch": 3.547260156841668, "percentage": 70.95, "elapsed_time": "3:08:24", "remaining_time": "1:17:09", "throughput": 8657.08, "total_tokens": 97862416} +{"current_steps": 145205, "total_steps": 204665, "loss": 0.0332, "lr": 4.7164824026446405e-07, "epoch": 3.5473823076735154, "percentage": 70.95, "elapsed_time": "3:08:24", "remaining_time": "1:17:09", "throughput": 8657.09, "total_tokens": 97865488} +{"current_steps": 145210, "total_steps": 204665, "loss": 0.0001, "lr": 4.7157583932323475e-07, "epoch": 3.5475044585053626, "percentage": 70.95, "elapsed_time": "3:08:25", "remaining_time": "1:17:08", "throughput": 8657.11, "total_tokens": 97868624} +{"current_steps": 145215, "total_steps": 204665, "loss": 0.0001, "lr": 4.7150344222484786e-07, "epoch": 3.5476266093372097, "percentage": 70.95, "elapsed_time": "3:08:25", "remaining_time": "1:17:08", "throughput": 8657.11, "total_tokens": 97871568} +{"current_steps": 145220, "total_steps": 204665, "loss": 0.0, "lr": 4.714310489698303e-07, "epoch": 3.547748760169057, "percentage": 70.95, "elapsed_time": "3:08:25", "remaining_time": "1:17:07", "throughput": 8657.13, "total_tokens": 97874832} +{"current_steps": 145225, "total_steps": 204665, "loss": 0.062, "lr": 4.7135865955870803e-07, "epoch": 3.5478709110009037, "percentage": 70.96, "elapsed_time": "3:08:26", "remaining_time": "1:17:07", "throughput": 8657.15, "total_tokens": 97877968} +{"current_steps": 145230, "total_steps": 204665, "loss": 0.0, "lr": 4.7128627399200784e-07, "epoch": 3.5479930618327513, "percentage": 70.96, "elapsed_time": "3:08:26", "remaining_time": "1:17:07", "throughput": 8657.17, "total_tokens": 97881296} +{"current_steps": 145235, "total_steps": 204665, "loss": 0.0001, "lr": 4.712138922702563e-07, "epoch": 3.548115212664598, "percentage": 70.96, "elapsed_time": "3:08:26", "remaining_time": "1:17:06", "throughput": 8657.26, "total_tokens": 97885456} +{"current_steps": 145240, "total_steps": 204665, "loss": 0.0194, "lr": 4.7114151439397933e-07, "epoch": 3.5482373634964453, "percentage": 70.96, "elapsed_time": "3:08:27", "remaining_time": "1:17:06", "throughput": 8657.27, "total_tokens": 97888592} +{"current_steps": 145245, "total_steps": 204665, "loss": 0.0002, "lr": 4.710691403637038e-07, "epoch": 3.5483595143282924, "percentage": 70.97, "elapsed_time": "3:08:27", "remaining_time": "1:17:05", "throughput": 8657.29, "total_tokens": 97891728} +{"current_steps": 145250, "total_steps": 204665, "loss": 0.0393, "lr": 4.7099677017995575e-07, "epoch": 3.5484816651601396, "percentage": 70.97, "elapsed_time": "3:08:27", "remaining_time": "1:17:05", "throughput": 8657.31, "total_tokens": 97894992} +{"current_steps": 145255, "total_steps": 204665, "loss": 0.0001, "lr": 4.7092440384326113e-07, "epoch": 3.548603815991987, "percentage": 70.97, "elapsed_time": "3:08:28", "remaining_time": "1:17:05", "throughput": 8657.32, "total_tokens": 97898064} +{"current_steps": 145260, "total_steps": 204665, "loss": 0.0001, "lr": 4.708520413541469e-07, "epoch": 3.548725966823834, "percentage": 70.97, "elapsed_time": "3:08:28", "remaining_time": "1:17:04", "throughput": 8657.35, "total_tokens": 97901328} +{"current_steps": 145265, "total_steps": 204665, "loss": 0.0477, "lr": 4.7077968271313863e-07, "epoch": 3.548848117655681, "percentage": 70.98, "elapsed_time": "3:08:28", "remaining_time": "1:17:04", "throughput": 8657.36, "total_tokens": 97904464} +{"current_steps": 145270, "total_steps": 204665, "loss": 0.0002, "lr": 4.707073279207632e-07, "epoch": 3.5489702684875284, "percentage": 70.98, "elapsed_time": "3:08:29", "remaining_time": "1:17:03", "throughput": 8657.38, "total_tokens": 97907664} +{"current_steps": 145275, "total_steps": 204665, "loss": 0.0536, "lr": 4.706349769775461e-07, "epoch": 3.5490924193193756, "percentage": 70.98, "elapsed_time": "3:08:29", "remaining_time": "1:17:03", "throughput": 8657.4, "total_tokens": 97910864} +{"current_steps": 145280, "total_steps": 204665, "loss": 0.0001, "lr": 4.705626298840141e-07, "epoch": 3.5492145701512228, "percentage": 70.98, "elapsed_time": "3:08:29", "remaining_time": "1:17:03", "throughput": 8657.43, "total_tokens": 97914256} +{"current_steps": 145285, "total_steps": 204665, "loss": 0.0004, "lr": 4.7049028664069266e-07, "epoch": 3.54933672098307, "percentage": 70.99, "elapsed_time": "3:08:30", "remaining_time": "1:17:02", "throughput": 8657.44, "total_tokens": 97917328} +{"current_steps": 145290, "total_steps": 204665, "loss": 0.0001, "lr": 4.7041794724810846e-07, "epoch": 3.549458871814917, "percentage": 70.99, "elapsed_time": "3:08:30", "remaining_time": "1:17:02", "throughput": 8657.45, "total_tokens": 97920464} +{"current_steps": 145295, "total_steps": 204665, "loss": 0.0001, "lr": 4.703456117067877e-07, "epoch": 3.5495810226467643, "percentage": 70.99, "elapsed_time": "3:08:30", "remaining_time": "1:17:01", "throughput": 8657.47, "total_tokens": 97923600} +{"current_steps": 145300, "total_steps": 204665, "loss": 0.0, "lr": 4.702732800172556e-07, "epoch": 3.5497031734786115, "percentage": 70.99, "elapsed_time": "3:08:31", "remaining_time": "1:17:01", "throughput": 8657.5, "total_tokens": 97926928} +{"current_steps": 145305, "total_steps": 204665, "loss": 0.0365, "lr": 4.702009521800392e-07, "epoch": 3.5498253243104587, "percentage": 71.0, "elapsed_time": "3:08:31", "remaining_time": "1:17:01", "throughput": 8657.52, "total_tokens": 97930192} +{"current_steps": 145310, "total_steps": 204665, "loss": 0.0, "lr": 4.701286281956636e-07, "epoch": 3.5499474751423055, "percentage": 71.0, "elapsed_time": "3:08:31", "remaining_time": "1:17:00", "throughput": 8657.54, "total_tokens": 97933456} +{"current_steps": 145315, "total_steps": 204665, "loss": 0.0001, "lr": 4.7005630806465547e-07, "epoch": 3.550069625974153, "percentage": 71.0, "elapsed_time": "3:08:32", "remaining_time": "1:17:00", "throughput": 8657.56, "total_tokens": 97936592} +{"current_steps": 145320, "total_steps": 204665, "loss": 0.0, "lr": 4.6998399178754e-07, "epoch": 3.550191776806, "percentage": 71.0, "elapsed_time": "3:08:32", "remaining_time": "1:16:59", "throughput": 8657.61, "total_tokens": 97940240} +{"current_steps": 145325, "total_steps": 204665, "loss": 0.0, "lr": 4.6991167936484346e-07, "epoch": 3.5503139276378475, "percentage": 71.01, "elapsed_time": "3:08:32", "remaining_time": "1:16:59", "throughput": 8657.64, "total_tokens": 97943632} +{"current_steps": 145330, "total_steps": 204665, "loss": 0.0006, "lr": 4.698393707970922e-07, "epoch": 3.5504360784696942, "percentage": 71.01, "elapsed_time": "3:08:33", "remaining_time": "1:16:58", "throughput": 8657.68, "total_tokens": 97947088} +{"current_steps": 145335, "total_steps": 204665, "loss": 0.0, "lr": 4.697670660848113e-07, "epoch": 3.5505582293015414, "percentage": 71.01, "elapsed_time": "3:08:33", "remaining_time": "1:16:58", "throughput": 8657.74, "total_tokens": 97950864} +{"current_steps": 145340, "total_steps": 204665, "loss": 0.0001, "lr": 4.6969476522852726e-07, "epoch": 3.5506803801333886, "percentage": 71.01, "elapsed_time": "3:08:34", "remaining_time": "1:16:58", "throughput": 8657.78, "total_tokens": 97954384} +{"current_steps": 145345, "total_steps": 204665, "loss": 0.0, "lr": 4.696224682287652e-07, "epoch": 3.550802530965236, "percentage": 71.02, "elapsed_time": "3:08:34", "remaining_time": "1:16:57", "throughput": 8657.81, "total_tokens": 97957712} +{"current_steps": 145350, "total_steps": 204665, "loss": 0.0, "lr": 4.695501750860514e-07, "epoch": 3.550924681797083, "percentage": 71.02, "elapsed_time": "3:08:34", "remaining_time": "1:16:57", "throughput": 8657.82, "total_tokens": 97960848} +{"current_steps": 145355, "total_steps": 204665, "loss": 0.0, "lr": 4.694778858009112e-07, "epoch": 3.55104683262893, "percentage": 71.02, "elapsed_time": "3:08:35", "remaining_time": "1:16:56", "throughput": 8657.87, "total_tokens": 97964560} +{"current_steps": 145360, "total_steps": 204665, "loss": 0.0, "lr": 4.694056003738708e-07, "epoch": 3.5511689834607774, "percentage": 71.02, "elapsed_time": "3:08:35", "remaining_time": "1:16:56", "throughput": 8657.85, "total_tokens": 97967184} +{"current_steps": 145365, "total_steps": 204665, "loss": 0.0, "lr": 4.693333188054556e-07, "epoch": 3.5512911342926246, "percentage": 71.03, "elapsed_time": "3:08:35", "remaining_time": "1:16:56", "throughput": 8657.85, "total_tokens": 97970064} +{"current_steps": 145370, "total_steps": 204665, "loss": 0.1659, "lr": 4.692610410961909e-07, "epoch": 3.5514132851244717, "percentage": 71.03, "elapsed_time": "3:08:36", "remaining_time": "1:16:55", "throughput": 8657.87, "total_tokens": 97973264} +{"current_steps": 145375, "total_steps": 204665, "loss": 0.0, "lr": 4.6918876724660296e-07, "epoch": 3.551535435956319, "percentage": 71.03, "elapsed_time": "3:08:36", "remaining_time": "1:16:55", "throughput": 8657.91, "total_tokens": 97976784} +{"current_steps": 145380, "total_steps": 204665, "loss": 0.0, "lr": 4.691164972572168e-07, "epoch": 3.551657586788166, "percentage": 71.03, "elapsed_time": "3:08:36", "remaining_time": "1:16:54", "throughput": 8657.93, "total_tokens": 97980048} +{"current_steps": 145385, "total_steps": 204665, "loss": 0.0, "lr": 4.690442311285582e-07, "epoch": 3.5517797376200133, "percentage": 71.04, "elapsed_time": "3:08:37", "remaining_time": "1:16:54", "throughput": 8657.97, "total_tokens": 97983504} +{"current_steps": 145390, "total_steps": 204665, "loss": 0.0235, "lr": 4.689719688611532e-07, "epoch": 3.5519018884518605, "percentage": 71.04, "elapsed_time": "3:08:37", "remaining_time": "1:16:54", "throughput": 8658.03, "total_tokens": 97987216} +{"current_steps": 145395, "total_steps": 204665, "loss": 0.0, "lr": 4.6889971045552636e-07, "epoch": 3.5520240392837072, "percentage": 71.04, "elapsed_time": "3:08:37", "remaining_time": "1:16:53", "throughput": 8658.05, "total_tokens": 97990480} +{"current_steps": 145400, "total_steps": 204665, "loss": 0.0, "lr": 4.6882745591220417e-07, "epoch": 3.552146190115555, "percentage": 71.04, "elapsed_time": "3:08:38", "remaining_time": "1:16:53", "throughput": 8658.07, "total_tokens": 97993680} +{"current_steps": 145405, "total_steps": 204665, "loss": 0.0, "lr": 4.687552052317112e-07, "epoch": 3.5522683409474016, "percentage": 71.05, "elapsed_time": "3:08:38", "remaining_time": "1:16:52", "throughput": 8658.1, "total_tokens": 97997008} +{"current_steps": 145410, "total_steps": 204665, "loss": 0.0, "lr": 4.6868295841457363e-07, "epoch": 3.5523904917792493, "percentage": 71.05, "elapsed_time": "3:08:38", "remaining_time": "1:16:52", "throughput": 8658.12, "total_tokens": 98000272} +{"current_steps": 145415, "total_steps": 204665, "loss": 0.0001, "lr": 4.68610715461316e-07, "epoch": 3.552512642611096, "percentage": 71.05, "elapsed_time": "3:08:39", "remaining_time": "1:16:52", "throughput": 8658.15, "total_tokens": 98003600} +{"current_steps": 145420, "total_steps": 204665, "loss": 0.0001, "lr": 4.6853847637246433e-07, "epoch": 3.552634793442943, "percentage": 71.05, "elapsed_time": "3:08:39", "remaining_time": "1:16:51", "throughput": 8658.16, "total_tokens": 98006800} +{"current_steps": 145425, "total_steps": 204665, "loss": 0.06, "lr": 4.6846624114854415e-07, "epoch": 3.5527569442747904, "percentage": 71.06, "elapsed_time": "3:08:39", "remaining_time": "1:16:51", "throughput": 8658.19, "total_tokens": 98010064} +{"current_steps": 145430, "total_steps": 204665, "loss": 0.0288, "lr": 4.6839400979008005e-07, "epoch": 3.5528790951066376, "percentage": 71.06, "elapsed_time": "3:08:40", "remaining_time": "1:16:50", "throughput": 8658.22, "total_tokens": 98013456} +{"current_steps": 145435, "total_steps": 204665, "loss": 0.0606, "lr": 4.683217822975981e-07, "epoch": 3.5530012459384848, "percentage": 71.06, "elapsed_time": "3:08:40", "remaining_time": "1:16:50", "throughput": 8658.26, "total_tokens": 98016912} +{"current_steps": 145440, "total_steps": 204665, "loss": 0.0246, "lr": 4.6824955867162276e-07, "epoch": 3.553123396770332, "percentage": 71.06, "elapsed_time": "3:08:40", "remaining_time": "1:16:50", "throughput": 8658.3, "total_tokens": 98020496} +{"current_steps": 145445, "total_steps": 204665, "loss": 0.0, "lr": 4.681773389126795e-07, "epoch": 3.553245547602179, "percentage": 71.06, "elapsed_time": "3:08:41", "remaining_time": "1:16:49", "throughput": 8658.32, "total_tokens": 98023632} +{"current_steps": 145450, "total_steps": 204665, "loss": 0.0, "lr": 4.681051230212942e-07, "epoch": 3.5533676984340263, "percentage": 71.07, "elapsed_time": "3:08:41", "remaining_time": "1:16:49", "throughput": 8658.39, "total_tokens": 98027664} +{"current_steps": 145455, "total_steps": 204665, "loss": 0.0, "lr": 4.680329109979916e-07, "epoch": 3.5534898492658735, "percentage": 71.07, "elapsed_time": "3:08:42", "remaining_time": "1:16:48", "throughput": 8658.41, "total_tokens": 98030800} +{"current_steps": 145460, "total_steps": 204665, "loss": 0.0, "lr": 4.679607028432961e-07, "epoch": 3.5536120000977207, "percentage": 71.07, "elapsed_time": "3:08:42", "remaining_time": "1:16:48", "throughput": 8658.47, "total_tokens": 98034576} +{"current_steps": 145465, "total_steps": 204665, "loss": 0.0001, "lr": 4.6788849855773413e-07, "epoch": 3.553734150929568, "percentage": 71.07, "elapsed_time": "3:08:42", "remaining_time": "1:16:48", "throughput": 8658.49, "total_tokens": 98037840} +{"current_steps": 145470, "total_steps": 204665, "loss": 0.0667, "lr": 4.6781629814183e-07, "epoch": 3.553856301761415, "percentage": 71.08, "elapsed_time": "3:08:43", "remaining_time": "1:16:47", "throughput": 8658.51, "total_tokens": 98040976} +{"current_steps": 145475, "total_steps": 204665, "loss": 0.0, "lr": 4.6774410159610847e-07, "epoch": 3.5539784525932623, "percentage": 71.08, "elapsed_time": "3:08:43", "remaining_time": "1:16:47", "throughput": 8658.51, "total_tokens": 98043984} +{"current_steps": 145480, "total_steps": 204665, "loss": 0.0344, "lr": 4.676719089210951e-07, "epoch": 3.5541006034251095, "percentage": 71.08, "elapsed_time": "3:08:43", "remaining_time": "1:16:46", "throughput": 8658.55, "total_tokens": 98047504} +{"current_steps": 145485, "total_steps": 204665, "loss": 0.0, "lr": 4.675997201173151e-07, "epoch": 3.5542227542569567, "percentage": 71.08, "elapsed_time": "3:08:44", "remaining_time": "1:16:46", "throughput": 8658.55, "total_tokens": 98050448} +{"current_steps": 145490, "total_steps": 204665, "loss": 0.0601, "lr": 4.6752753518529276e-07, "epoch": 3.5543449050888034, "percentage": 71.09, "elapsed_time": "3:08:44", "remaining_time": "1:16:45", "throughput": 8658.57, "total_tokens": 98053648} +{"current_steps": 145495, "total_steps": 204665, "loss": 0.0024, "lr": 4.674553541255537e-07, "epoch": 3.554467055920651, "percentage": 71.09, "elapsed_time": "3:08:44", "remaining_time": "1:16:45", "throughput": 8658.59, "total_tokens": 98056912} +{"current_steps": 145500, "total_steps": 204665, "loss": 0.0, "lr": 4.673831769386223e-07, "epoch": 3.554589206752498, "percentage": 71.09, "elapsed_time": "3:08:45", "remaining_time": "1:16:45", "throughput": 8658.62, "total_tokens": 98060304} +{"current_steps": 145505, "total_steps": 204665, "loss": 0.0, "lr": 4.67311003625024e-07, "epoch": 3.5547113575843454, "percentage": 71.09, "elapsed_time": "3:08:45", "remaining_time": "1:16:44", "throughput": 8658.64, "total_tokens": 98063440} +{"current_steps": 145510, "total_steps": 204665, "loss": 0.0002, "lr": 4.67238834185283e-07, "epoch": 3.554833508416192, "percentage": 71.1, "elapsed_time": "3:08:45", "remaining_time": "1:16:44", "throughput": 8658.67, "total_tokens": 98066832} +{"current_steps": 145515, "total_steps": 204665, "loss": 0.0001, "lr": 4.6716666861992447e-07, "epoch": 3.5549556592480394, "percentage": 71.1, "elapsed_time": "3:08:46", "remaining_time": "1:16:43", "throughput": 8658.68, "total_tokens": 98069904} +{"current_steps": 145520, "total_steps": 204665, "loss": 0.0474, "lr": 4.6709450692947363e-07, "epoch": 3.5550778100798865, "percentage": 71.1, "elapsed_time": "3:08:46", "remaining_time": "1:16:43", "throughput": 8658.74, "total_tokens": 98073680} +{"current_steps": 145525, "total_steps": 204665, "loss": 0.0, "lr": 4.670223491144545e-07, "epoch": 3.5551999609117337, "percentage": 71.1, "elapsed_time": "3:08:46", "remaining_time": "1:16:43", "throughput": 8658.75, "total_tokens": 98076752} +{"current_steps": 145530, "total_steps": 204665, "loss": 0.0501, "lr": 4.6695019517539257e-07, "epoch": 3.555322111743581, "percentage": 71.11, "elapsed_time": "3:08:47", "remaining_time": "1:16:42", "throughput": 8658.78, "total_tokens": 98080144} +{"current_steps": 145535, "total_steps": 204665, "loss": 0.0, "lr": 4.6687804511281183e-07, "epoch": 3.555444262575428, "percentage": 71.11, "elapsed_time": "3:08:47", "remaining_time": "1:16:42", "throughput": 8658.8, "total_tokens": 98083344} +{"current_steps": 145540, "total_steps": 204665, "loss": 0.0667, "lr": 4.668058989272373e-07, "epoch": 3.5555664134072753, "percentage": 71.11, "elapsed_time": "3:08:47", "remaining_time": "1:16:41", "throughput": 8658.83, "total_tokens": 98086672} +{"current_steps": 145545, "total_steps": 204665, "loss": 0.0, "lr": 4.667337566191941e-07, "epoch": 3.5556885642391225, "percentage": 71.11, "elapsed_time": "3:08:48", "remaining_time": "1:16:41", "throughput": 8658.85, "total_tokens": 98089872} +{"current_steps": 145550, "total_steps": 204665, "loss": 0.0, "lr": 4.666616181892061e-07, "epoch": 3.5558107150709697, "percentage": 71.12, "elapsed_time": "3:08:48", "remaining_time": "1:16:41", "throughput": 8658.86, "total_tokens": 98092944} +{"current_steps": 145555, "total_steps": 204665, "loss": 0.0352, "lr": 4.665894836377986e-07, "epoch": 3.555932865902817, "percentage": 71.12, "elapsed_time": "3:08:48", "remaining_time": "1:16:40", "throughput": 8658.87, "total_tokens": 98096016} +{"current_steps": 145560, "total_steps": 204665, "loss": 0.0, "lr": 4.665173529654959e-07, "epoch": 3.556055016734664, "percentage": 71.12, "elapsed_time": "3:08:49", "remaining_time": "1:16:40", "throughput": 8658.9, "total_tokens": 98099280} +{"current_steps": 145565, "total_steps": 204665, "loss": 0.0727, "lr": 4.6644522617282203e-07, "epoch": 3.5561771675665113, "percentage": 71.12, "elapsed_time": "3:08:49", "remaining_time": "1:16:39", "throughput": 8658.94, "total_tokens": 98102864} +{"current_steps": 145570, "total_steps": 204665, "loss": 0.0569, "lr": 4.6637310326030243e-07, "epoch": 3.5562993183983584, "percentage": 71.13, "elapsed_time": "3:08:50", "remaining_time": "1:16:39", "throughput": 8658.96, "total_tokens": 98106064} +{"current_steps": 145575, "total_steps": 204665, "loss": 0.0, "lr": 4.663009842284608e-07, "epoch": 3.556421469230205, "percentage": 71.13, "elapsed_time": "3:08:50", "remaining_time": "1:16:39", "throughput": 8658.99, "total_tokens": 98109456} +{"current_steps": 145580, "total_steps": 204665, "loss": 0.0001, "lr": 4.662288690778222e-07, "epoch": 3.556543620062053, "percentage": 71.13, "elapsed_time": "3:08:50", "remaining_time": "1:16:38", "throughput": 8659.02, "total_tokens": 98112784} +{"current_steps": 145585, "total_steps": 204665, "loss": 0.0, "lr": 4.661567578089105e-07, "epoch": 3.5566657708938996, "percentage": 71.13, "elapsed_time": "3:08:51", "remaining_time": "1:16:38", "throughput": 8659.05, "total_tokens": 98116112} +{"current_steps": 145590, "total_steps": 204665, "loss": 0.0003, "lr": 4.660846504222509e-07, "epoch": 3.556787921725747, "percentage": 71.14, "elapsed_time": "3:08:51", "remaining_time": "1:16:37", "throughput": 8659.07, "total_tokens": 98119312} +{"current_steps": 145595, "total_steps": 204665, "loss": 0.0, "lr": 4.660125469183669e-07, "epoch": 3.556910072557594, "percentage": 71.14, "elapsed_time": "3:08:51", "remaining_time": "1:16:37", "throughput": 8659.08, "total_tokens": 98122448} +{"current_steps": 145600, "total_steps": 204665, "loss": 0.0259, "lr": 4.6594044729778336e-07, "epoch": 3.557032223389441, "percentage": 71.14, "elapsed_time": "3:08:52", "remaining_time": "1:16:37", "throughput": 8659.12, "total_tokens": 98125904} +{"current_steps": 145605, "total_steps": 204665, "loss": 0.0, "lr": 4.658683515610248e-07, "epoch": 3.5571543742212883, "percentage": 71.14, "elapsed_time": "3:08:52", "remaining_time": "1:16:36", "throughput": 8659.15, "total_tokens": 98129232} +{"current_steps": 145610, "total_steps": 204665, "loss": 0.0001, "lr": 4.6579625970861494e-07, "epoch": 3.5572765250531355, "percentage": 71.15, "elapsed_time": "3:08:52", "remaining_time": "1:16:36", "throughput": 8659.18, "total_tokens": 98132624} +{"current_steps": 145615, "total_steps": 204665, "loss": 0.0404, "lr": 4.657241717410787e-07, "epoch": 3.5573986758849827, "percentage": 71.15, "elapsed_time": "3:08:53", "remaining_time": "1:16:35", "throughput": 8659.21, "total_tokens": 98135888} +{"current_steps": 145620, "total_steps": 204665, "loss": 0.0, "lr": 4.656520876589397e-07, "epoch": 3.55752082671683, "percentage": 71.15, "elapsed_time": "3:08:53", "remaining_time": "1:16:35", "throughput": 8659.22, "total_tokens": 98139088} +{"current_steps": 145625, "total_steps": 204665, "loss": 0.0001, "lr": 4.6558000746272276e-07, "epoch": 3.557642977548677, "percentage": 71.15, "elapsed_time": "3:08:53", "remaining_time": "1:16:35", "throughput": 8659.26, "total_tokens": 98142544} +{"current_steps": 145630, "total_steps": 204665, "loss": 0.0307, "lr": 4.655079311529513e-07, "epoch": 3.5577651283805243, "percentage": 71.16, "elapsed_time": "3:08:54", "remaining_time": "1:16:34", "throughput": 8659.28, "total_tokens": 98145808} +{"current_steps": 145635, "total_steps": 204665, "loss": 0.0004, "lr": 4.6543585873015e-07, "epoch": 3.5578872792123715, "percentage": 71.16, "elapsed_time": "3:08:54", "remaining_time": "1:16:34", "throughput": 8659.3, "total_tokens": 98149072} +{"current_steps": 145640, "total_steps": 204665, "loss": 0.0, "lr": 4.6536379019484327e-07, "epoch": 3.5580094300442187, "percentage": 71.16, "elapsed_time": "3:08:54", "remaining_time": "1:16:33", "throughput": 8659.34, "total_tokens": 98152528} +{"current_steps": 145645, "total_steps": 204665, "loss": 0.0048, "lr": 4.6529172554755446e-07, "epoch": 3.558131580876066, "percentage": 71.16, "elapsed_time": "3:08:55", "remaining_time": "1:16:33", "throughput": 8659.36, "total_tokens": 98155728} +{"current_steps": 145650, "total_steps": 204665, "loss": 0.0006, "lr": 4.6521966478880846e-07, "epoch": 3.558253731707913, "percentage": 71.17, "elapsed_time": "3:08:55", "remaining_time": "1:16:32", "throughput": 8659.43, "total_tokens": 98159696} +{"current_steps": 145655, "total_steps": 204665, "loss": 0.0, "lr": 4.6514760791912853e-07, "epoch": 3.5583758825397602, "percentage": 71.17, "elapsed_time": "3:08:55", "remaining_time": "1:16:32", "throughput": 8659.47, "total_tokens": 98163152} +{"current_steps": 145660, "total_steps": 204665, "loss": 0.0, "lr": 4.6507555493903936e-07, "epoch": 3.5584980333716074, "percentage": 71.17, "elapsed_time": "3:08:56", "remaining_time": "1:16:32", "throughput": 8659.53, "total_tokens": 98166992} +{"current_steps": 145665, "total_steps": 204665, "loss": 0.0, "lr": 4.6500350584906435e-07, "epoch": 3.5586201842034546, "percentage": 71.17, "elapsed_time": "3:08:56", "remaining_time": "1:16:31", "throughput": 8659.55, "total_tokens": 98170192} +{"current_steps": 145670, "total_steps": 204665, "loss": 0.0488, "lr": 4.649314606497281e-07, "epoch": 3.5587423350353014, "percentage": 71.17, "elapsed_time": "3:08:56", "remaining_time": "1:16:31", "throughput": 8659.56, "total_tokens": 98173264} +{"current_steps": 145675, "total_steps": 204665, "loss": 0.0, "lr": 4.6485941934155413e-07, "epoch": 3.558864485867149, "percentage": 71.18, "elapsed_time": "3:08:57", "remaining_time": "1:16:30", "throughput": 8659.58, "total_tokens": 98176464} +{"current_steps": 145680, "total_steps": 204665, "loss": 0.049, "lr": 4.6478738192506607e-07, "epoch": 3.5589866366989957, "percentage": 71.18, "elapsed_time": "3:08:57", "remaining_time": "1:16:30", "throughput": 8659.62, "total_tokens": 98179984} +{"current_steps": 145685, "total_steps": 204665, "loss": 0.0008, "lr": 4.647153484007884e-07, "epoch": 3.559108787530843, "percentage": 71.18, "elapsed_time": "3:08:58", "remaining_time": "1:16:30", "throughput": 8659.66, "total_tokens": 98183376} +{"current_steps": 145690, "total_steps": 204665, "loss": 0.0477, "lr": 4.6464331876924443e-07, "epoch": 3.55923093836269, "percentage": 71.18, "elapsed_time": "3:08:58", "remaining_time": "1:16:29", "throughput": 8659.72, "total_tokens": 98187216} +{"current_steps": 145695, "total_steps": 204665, "loss": 0.0001, "lr": 4.645712930309582e-07, "epoch": 3.5593530891945373, "percentage": 71.19, "elapsed_time": "3:08:58", "remaining_time": "1:16:29", "throughput": 8659.76, "total_tokens": 98190672} +{"current_steps": 145700, "total_steps": 204665, "loss": 0.0, "lr": 4.6449927118645393e-07, "epoch": 3.5594752400263845, "percentage": 71.19, "elapsed_time": "3:08:59", "remaining_time": "1:16:28", "throughput": 8659.82, "total_tokens": 98194384} +{"current_steps": 145705, "total_steps": 204665, "loss": 0.0, "lr": 4.6442725323625467e-07, "epoch": 3.5595973908582317, "percentage": 71.19, "elapsed_time": "3:08:59", "remaining_time": "1:16:28", "throughput": 8659.86, "total_tokens": 98197904} +{"current_steps": 145710, "total_steps": 204665, "loss": 0.0, "lr": 4.6435523918088473e-07, "epoch": 3.559719541690079, "percentage": 71.19, "elapsed_time": "3:08:59", "remaining_time": "1:16:28", "throughput": 8659.93, "total_tokens": 98201808} +{"current_steps": 145715, "total_steps": 204665, "loss": 0.0, "lr": 4.642832290208672e-07, "epoch": 3.559841692521926, "percentage": 71.2, "elapsed_time": "3:09:00", "remaining_time": "1:16:27", "throughput": 8659.97, "total_tokens": 98205328} +{"current_steps": 145720, "total_steps": 204665, "loss": 0.0, "lr": 4.642112227567265e-07, "epoch": 3.5599638433537732, "percentage": 71.2, "elapsed_time": "3:09:00", "remaining_time": "1:16:27", "throughput": 8660.0, "total_tokens": 98208656} +{"current_steps": 145725, "total_steps": 204665, "loss": 0.0, "lr": 4.641392203889857e-07, "epoch": 3.5600859941856204, "percentage": 71.2, "elapsed_time": "3:09:00", "remaining_time": "1:16:26", "throughput": 8660.01, "total_tokens": 98211792} +{"current_steps": 145730, "total_steps": 204665, "loss": 0.0, "lr": 4.640672219181684e-07, "epoch": 3.5602081450174676, "percentage": 71.2, "elapsed_time": "3:09:01", "remaining_time": "1:16:26", "throughput": 8660.02, "total_tokens": 98214800} +{"current_steps": 145735, "total_steps": 204665, "loss": 0.0001, "lr": 4.639952273447989e-07, "epoch": 3.560330295849315, "percentage": 71.21, "elapsed_time": "3:09:01", "remaining_time": "1:16:26", "throughput": 8660.06, "total_tokens": 98218256} +{"current_steps": 145740, "total_steps": 204665, "loss": 0.0, "lr": 4.639232366693998e-07, "epoch": 3.560452446681162, "percentage": 71.21, "elapsed_time": "3:09:01", "remaining_time": "1:16:25", "throughput": 8660.09, "total_tokens": 98221648} +{"current_steps": 145745, "total_steps": 204665, "loss": 0.0, "lr": 4.638512498924956e-07, "epoch": 3.560574597513009, "percentage": 71.21, "elapsed_time": "3:09:02", "remaining_time": "1:16:25", "throughput": 8660.11, "total_tokens": 98224784} +{"current_steps": 145750, "total_steps": 204665, "loss": 0.0, "lr": 4.637792670146089e-07, "epoch": 3.5606967483448564, "percentage": 71.21, "elapsed_time": "3:09:02", "remaining_time": "1:16:24", "throughput": 8660.15, "total_tokens": 98228368} +{"current_steps": 145755, "total_steps": 204665, "loss": 0.0399, "lr": 4.6370728803626357e-07, "epoch": 3.560818899176703, "percentage": 71.22, "elapsed_time": "3:09:02", "remaining_time": "1:16:24", "throughput": 8660.17, "total_tokens": 98231568} +{"current_steps": 145760, "total_steps": 204665, "loss": 0.0479, "lr": 4.6363531295798344e-07, "epoch": 3.5609410500085508, "percentage": 71.22, "elapsed_time": "3:09:03", "remaining_time": "1:16:24", "throughput": 8660.21, "total_tokens": 98234960} +{"current_steps": 145765, "total_steps": 204665, "loss": 0.0001, "lr": 4.635633417802917e-07, "epoch": 3.5610632008403975, "percentage": 71.22, "elapsed_time": "3:09:03", "remaining_time": "1:16:23", "throughput": 8660.26, "total_tokens": 98238672} +{"current_steps": 145770, "total_steps": 204665, "loss": 0.0, "lr": 4.634913745037111e-07, "epoch": 3.561185351672245, "percentage": 71.22, "elapsed_time": "3:09:03", "remaining_time": "1:16:23", "throughput": 8660.32, "total_tokens": 98242384} +{"current_steps": 145775, "total_steps": 204665, "loss": 0.0, "lr": 4.6341941112876593e-07, "epoch": 3.561307502504092, "percentage": 71.23, "elapsed_time": "3:09:04", "remaining_time": "1:16:22", "throughput": 8660.34, "total_tokens": 98245520} +{"current_steps": 145780, "total_steps": 204665, "loss": 0.0, "lr": 4.633474516559792e-07, "epoch": 3.561429653335939, "percentage": 71.23, "elapsed_time": "3:09:04", "remaining_time": "1:16:22", "throughput": 8660.35, "total_tokens": 98248656} +{"current_steps": 145785, "total_steps": 204665, "loss": 0.0001, "lr": 4.632754960858738e-07, "epoch": 3.5615518041677863, "percentage": 71.23, "elapsed_time": "3:09:04", "remaining_time": "1:16:22", "throughput": 8660.35, "total_tokens": 98251472} +{"current_steps": 145790, "total_steps": 204665, "loss": 0.0001, "lr": 4.6320354441897326e-07, "epoch": 3.5616739549996335, "percentage": 71.23, "elapsed_time": "3:09:05", "remaining_time": "1:16:21", "throughput": 8660.38, "total_tokens": 98254928} +{"current_steps": 145795, "total_steps": 204665, "loss": 0.0454, "lr": 4.6313159665580124e-07, "epoch": 3.5617961058314807, "percentage": 71.24, "elapsed_time": "3:09:05", "remaining_time": "1:16:21", "throughput": 8660.41, "total_tokens": 98258256} +{"current_steps": 145800, "total_steps": 204665, "loss": 0.0, "lr": 4.630596527968804e-07, "epoch": 3.561918256663328, "percentage": 71.24, "elapsed_time": "3:09:06", "remaining_time": "1:16:20", "throughput": 8660.42, "total_tokens": 98261264} +{"current_steps": 145805, "total_steps": 204665, "loss": 0.0, "lr": 4.629877128427345e-07, "epoch": 3.562040407495175, "percentage": 71.24, "elapsed_time": "3:09:06", "remaining_time": "1:16:20", "throughput": 8660.46, "total_tokens": 98264784} +{"current_steps": 145810, "total_steps": 204665, "loss": 0.0, "lr": 4.6291577679388607e-07, "epoch": 3.562162558327022, "percentage": 71.24, "elapsed_time": "3:09:06", "remaining_time": "1:16:20", "throughput": 8660.47, "total_tokens": 98267920} +{"current_steps": 145815, "total_steps": 204665, "loss": 0.0, "lr": 4.628438446508589e-07, "epoch": 3.5622847091588694, "percentage": 71.25, "elapsed_time": "3:09:07", "remaining_time": "1:16:19", "throughput": 8660.52, "total_tokens": 98271440} +{"current_steps": 145820, "total_steps": 204665, "loss": 0.0, "lr": 4.6277191641417547e-07, "epoch": 3.5624068599907166, "percentage": 71.25, "elapsed_time": "3:09:07", "remaining_time": "1:16:19", "throughput": 8660.57, "total_tokens": 98275152} +{"current_steps": 145825, "total_steps": 204665, "loss": 0.0, "lr": 4.6269999208435903e-07, "epoch": 3.562529010822564, "percentage": 71.25, "elapsed_time": "3:09:07", "remaining_time": "1:16:18", "throughput": 8660.6, "total_tokens": 98278480} +{"current_steps": 145830, "total_steps": 204665, "loss": 0.0, "lr": 4.6262807166193316e-07, "epoch": 3.562651161654411, "percentage": 71.25, "elapsed_time": "3:09:08", "remaining_time": "1:16:18", "throughput": 8660.66, "total_tokens": 98282256} +{"current_steps": 145835, "total_steps": 204665, "loss": 0.0, "lr": 4.6255615514742016e-07, "epoch": 3.562773312486258, "percentage": 71.26, "elapsed_time": "3:09:08", "remaining_time": "1:16:17", "throughput": 8660.67, "total_tokens": 98285392} +{"current_steps": 145840, "total_steps": 204665, "loss": 0.0, "lr": 4.6248424254134376e-07, "epoch": 3.5628954633181054, "percentage": 71.26, "elapsed_time": "3:09:08", "remaining_time": "1:16:17", "throughput": 8660.69, "total_tokens": 98288528} +{"current_steps": 145845, "total_steps": 204665, "loss": 0.0, "lr": 4.6241233384422616e-07, "epoch": 3.5630176141499525, "percentage": 71.26, "elapsed_time": "3:09:09", "remaining_time": "1:16:17", "throughput": 8660.71, "total_tokens": 98291792} +{"current_steps": 145850, "total_steps": 204665, "loss": 0.0, "lr": 4.6234042905659066e-07, "epoch": 3.5631397649817993, "percentage": 71.26, "elapsed_time": "3:09:09", "remaining_time": "1:16:16", "throughput": 8660.78, "total_tokens": 98295696} +{"current_steps": 145855, "total_steps": 204665, "loss": 0.0, "lr": 4.6226852817896046e-07, "epoch": 3.563261915813647, "percentage": 71.27, "elapsed_time": "3:09:09", "remaining_time": "1:16:16", "throughput": 8660.82, "total_tokens": 98299216} +{"current_steps": 145860, "total_steps": 204665, "loss": 0.0, "lr": 4.621966312118578e-07, "epoch": 3.5633840666454937, "percentage": 71.27, "elapsed_time": "3:09:10", "remaining_time": "1:16:15", "throughput": 8660.86, "total_tokens": 98302800} +{"current_steps": 145865, "total_steps": 204665, "loss": 0.0002, "lr": 4.621247381558063e-07, "epoch": 3.563506217477341, "percentage": 71.27, "elapsed_time": "3:09:10", "remaining_time": "1:16:15", "throughput": 8660.9, "total_tokens": 98306192} +{"current_steps": 145870, "total_steps": 204665, "loss": 0.0, "lr": 4.620528490113284e-07, "epoch": 3.563628368309188, "percentage": 71.27, "elapsed_time": "3:09:10", "remaining_time": "1:16:15", "throughput": 8660.9, "total_tokens": 98309136} +{"current_steps": 145875, "total_steps": 204665, "loss": 0.0, "lr": 4.6198096377894644e-07, "epoch": 3.5637505191410352, "percentage": 71.28, "elapsed_time": "3:09:11", "remaining_time": "1:16:14", "throughput": 8660.91, "total_tokens": 98312144} +{"current_steps": 145880, "total_steps": 204665, "loss": 0.0, "lr": 4.61909082459184e-07, "epoch": 3.5638726699728824, "percentage": 71.28, "elapsed_time": "3:09:11", "remaining_time": "1:16:14", "throughput": 8660.96, "total_tokens": 98315856} +{"current_steps": 145885, "total_steps": 204665, "loss": 0.0682, "lr": 4.618372050525631e-07, "epoch": 3.5639948208047296, "percentage": 71.28, "elapsed_time": "3:09:11", "remaining_time": "1:16:13", "throughput": 8661.01, "total_tokens": 98319440} +{"current_steps": 145890, "total_steps": 204665, "loss": 0.0, "lr": 4.617653315596072e-07, "epoch": 3.564116971636577, "percentage": 71.28, "elapsed_time": "3:09:12", "remaining_time": "1:16:13", "throughput": 8661.02, "total_tokens": 98322512} +{"current_steps": 145895, "total_steps": 204665, "loss": 0.0, "lr": 4.616934619808381e-07, "epoch": 3.564239122468424, "percentage": 71.28, "elapsed_time": "3:09:12", "remaining_time": "1:16:13", "throughput": 8661.04, "total_tokens": 98325776} +{"current_steps": 145900, "total_steps": 204665, "loss": 0.0, "lr": 4.6162159631677946e-07, "epoch": 3.564361273300271, "percentage": 71.29, "elapsed_time": "3:09:12", "remaining_time": "1:16:12", "throughput": 8661.07, "total_tokens": 98329040} +{"current_steps": 145905, "total_steps": 204665, "loss": 0.0, "lr": 4.615497345679529e-07, "epoch": 3.5644834241321184, "percentage": 71.29, "elapsed_time": "3:09:13", "remaining_time": "1:16:12", "throughput": 8661.1, "total_tokens": 98332496} +{"current_steps": 145910, "total_steps": 204665, "loss": 0.0835, "lr": 4.614778767348815e-07, "epoch": 3.5646055749639656, "percentage": 71.29, "elapsed_time": "3:09:13", "remaining_time": "1:16:11", "throughput": 8661.11, "total_tokens": 98335440} +{"current_steps": 145915, "total_steps": 204665, "loss": 0.0, "lr": 4.6140602281808816e-07, "epoch": 3.5647277257958128, "percentage": 71.29, "elapsed_time": "3:09:14", "remaining_time": "1:16:11", "throughput": 8661.13, "total_tokens": 98338768} +{"current_steps": 145920, "total_steps": 204665, "loss": 0.0225, "lr": 4.613341728180947e-07, "epoch": 3.56484987662766, "percentage": 71.3, "elapsed_time": "3:09:14", "remaining_time": "1:16:11", "throughput": 8661.18, "total_tokens": 98342352} +{"current_steps": 145925, "total_steps": 204665, "loss": 0.0001, "lr": 4.6126232673542456e-07, "epoch": 3.564972027459507, "percentage": 71.3, "elapsed_time": "3:09:14", "remaining_time": "1:16:10", "throughput": 8661.25, "total_tokens": 98346192} +{"current_steps": 145930, "total_steps": 204665, "loss": 0.0, "lr": 4.6119048457059916e-07, "epoch": 3.5650941782913543, "percentage": 71.3, "elapsed_time": "3:09:15", "remaining_time": "1:16:10", "throughput": 8661.27, "total_tokens": 98349456} +{"current_steps": 145935, "total_steps": 204665, "loss": 0.0001, "lr": 4.611186463241419e-07, "epoch": 3.565216329123201, "percentage": 71.3, "elapsed_time": "3:09:15", "remaining_time": "1:16:09", "throughput": 8661.31, "total_tokens": 98352912} +{"current_steps": 145940, "total_steps": 204665, "loss": 0.0, "lr": 4.610468119965744e-07, "epoch": 3.5653384799550487, "percentage": 71.31, "elapsed_time": "3:09:15", "remaining_time": "1:16:09", "throughput": 8661.31, "total_tokens": 98355856} +{"current_steps": 145945, "total_steps": 204665, "loss": 0.0, "lr": 4.609749815884194e-07, "epoch": 3.5654606307868955, "percentage": 71.31, "elapsed_time": "3:09:16", "remaining_time": "1:16:09", "throughput": 8661.35, "total_tokens": 98359376} +{"current_steps": 145950, "total_steps": 204665, "loss": 0.0002, "lr": 4.609031551001997e-07, "epoch": 3.565582781618743, "percentage": 71.31, "elapsed_time": "3:09:16", "remaining_time": "1:16:08", "throughput": 8661.36, "total_tokens": 98362448} +{"current_steps": 145955, "total_steps": 204665, "loss": 0.0235, "lr": 4.608313325324369e-07, "epoch": 3.56570493245059, "percentage": 71.31, "elapsed_time": "3:09:16", "remaining_time": "1:16:08", "throughput": 8661.4, "total_tokens": 98365968} +{"current_steps": 145960, "total_steps": 204665, "loss": 0.0, "lr": 4.6075951388565414e-07, "epoch": 3.565827083282437, "percentage": 71.32, "elapsed_time": "3:09:17", "remaining_time": "1:16:07", "throughput": 8661.42, "total_tokens": 98369104} +{"current_steps": 145965, "total_steps": 204665, "loss": 0.0001, "lr": 4.6068769916037277e-07, "epoch": 3.565949234114284, "percentage": 71.32, "elapsed_time": "3:09:17", "remaining_time": "1:16:07", "throughput": 8661.45, "total_tokens": 98372560} +{"current_steps": 145970, "total_steps": 204665, "loss": 0.0, "lr": 4.6061588835711583e-07, "epoch": 3.5660713849461314, "percentage": 71.32, "elapsed_time": "3:09:17", "remaining_time": "1:16:07", "throughput": 8661.48, "total_tokens": 98375824} +{"current_steps": 145975, "total_steps": 204665, "loss": 0.0, "lr": 4.605440814764049e-07, "epoch": 3.5661935357779786, "percentage": 71.32, "elapsed_time": "3:09:18", "remaining_time": "1:16:06", "throughput": 8661.51, "total_tokens": 98379280} +{"current_steps": 145980, "total_steps": 204665, "loss": 0.0002, "lr": 4.604722785187629e-07, "epoch": 3.566315686609826, "percentage": 71.33, "elapsed_time": "3:09:18", "remaining_time": "1:16:06", "throughput": 8661.54, "total_tokens": 98382608} +{"current_steps": 145985, "total_steps": 204665, "loss": 0.0, "lr": 4.604004794847116e-07, "epoch": 3.566437837441673, "percentage": 71.33, "elapsed_time": "3:09:18", "remaining_time": "1:16:05", "throughput": 8661.57, "total_tokens": 98385872} +{"current_steps": 145990, "total_steps": 204665, "loss": 0.0, "lr": 4.603286843747728e-07, "epoch": 3.56655998827352, "percentage": 71.33, "elapsed_time": "3:09:19", "remaining_time": "1:16:05", "throughput": 8661.62, "total_tokens": 98389584} +{"current_steps": 145995, "total_steps": 204665, "loss": 0.0, "lr": 4.602568931894694e-07, "epoch": 3.5666821391053674, "percentage": 71.33, "elapsed_time": "3:09:19", "remaining_time": "1:16:05", "throughput": 8661.68, "total_tokens": 98393296} +{"current_steps": 146000, "total_steps": 204665, "loss": 0.0, "lr": 4.601851059293225e-07, "epoch": 3.5668042899372145, "percentage": 71.34, "elapsed_time": "3:09:19", "remaining_time": "1:16:04", "throughput": 8661.7, "total_tokens": 98396496} +{"current_steps": 146005, "total_steps": 204665, "loss": 0.0001, "lr": 4.601133225948548e-07, "epoch": 3.5669264407690617, "percentage": 71.34, "elapsed_time": "3:09:20", "remaining_time": "1:16:04", "throughput": 8661.71, "total_tokens": 98399632} +{"current_steps": 146010, "total_steps": 204665, "loss": 0.0003, "lr": 4.600415431865886e-07, "epoch": 3.567048591600909, "percentage": 71.34, "elapsed_time": "3:09:20", "remaining_time": "1:16:03", "throughput": 8661.75, "total_tokens": 98403024} +{"current_steps": 146015, "total_steps": 204665, "loss": 0.0005, "lr": 4.5996976770504514e-07, "epoch": 3.567170742432756, "percentage": 71.34, "elapsed_time": "3:09:20", "remaining_time": "1:16:03", "throughput": 8661.78, "total_tokens": 98406352} +{"current_steps": 146020, "total_steps": 204665, "loss": 0.0, "lr": 4.598979961507471e-07, "epoch": 3.567292893264603, "percentage": 71.35, "elapsed_time": "3:09:21", "remaining_time": "1:16:02", "throughput": 8661.79, "total_tokens": 98409488} +{"current_steps": 146025, "total_steps": 204665, "loss": 0.0, "lr": 4.598262285242158e-07, "epoch": 3.5674150440964505, "percentage": 71.35, "elapsed_time": "3:09:21", "remaining_time": "1:16:02", "throughput": 8661.81, "total_tokens": 98412752} +{"current_steps": 146030, "total_steps": 204665, "loss": 0.0546, "lr": 4.597544648259738e-07, "epoch": 3.5675371949282972, "percentage": 71.35, "elapsed_time": "3:09:22", "remaining_time": "1:16:02", "throughput": 8661.86, "total_tokens": 98416336} +{"current_steps": 146035, "total_steps": 204665, "loss": 0.062, "lr": 4.5968270505654227e-07, "epoch": 3.567659345760145, "percentage": 71.35, "elapsed_time": "3:09:22", "remaining_time": "1:16:01", "throughput": 8661.89, "total_tokens": 98419664} +{"current_steps": 146040, "total_steps": 204665, "loss": 0.0, "lr": 4.596109492164435e-07, "epoch": 3.5677814965919916, "percentage": 71.36, "elapsed_time": "3:09:22", "remaining_time": "1:16:01", "throughput": 8661.9, "total_tokens": 98422800} +{"current_steps": 146045, "total_steps": 204665, "loss": 0.0, "lr": 4.595391973061995e-07, "epoch": 3.567903647423839, "percentage": 71.36, "elapsed_time": "3:09:23", "remaining_time": "1:16:00", "throughput": 8661.91, "total_tokens": 98425872} +{"current_steps": 146050, "total_steps": 204665, "loss": 0.0, "lr": 4.5946744932633155e-07, "epoch": 3.568025798255686, "percentage": 71.36, "elapsed_time": "3:09:23", "remaining_time": "1:16:00", "throughput": 8661.96, "total_tokens": 98429392} +{"current_steps": 146055, "total_steps": 204665, "loss": 0.0001, "lr": 4.5939570527736203e-07, "epoch": 3.568147949087533, "percentage": 71.36, "elapsed_time": "3:09:23", "remaining_time": "1:16:00", "throughput": 8662.01, "total_tokens": 98433040} +{"current_steps": 146060, "total_steps": 204665, "loss": 0.0563, "lr": 4.59323965159812e-07, "epoch": 3.5682700999193804, "percentage": 71.37, "elapsed_time": "3:09:24", "remaining_time": "1:15:59", "throughput": 8662.02, "total_tokens": 98436176} +{"current_steps": 146065, "total_steps": 204665, "loss": 0.0308, "lr": 4.592522289742036e-07, "epoch": 3.5683922507512276, "percentage": 71.37, "elapsed_time": "3:09:24", "remaining_time": "1:15:59", "throughput": 8662.06, "total_tokens": 98439568} +{"current_steps": 146070, "total_steps": 204665, "loss": 0.0, "lr": 4.591804967210586e-07, "epoch": 3.5685144015830748, "percentage": 71.37, "elapsed_time": "3:09:24", "remaining_time": "1:15:58", "throughput": 8662.07, "total_tokens": 98442640} +{"current_steps": 146075, "total_steps": 204665, "loss": 0.0002, "lr": 4.5910876840089865e-07, "epoch": 3.568636552414922, "percentage": 71.37, "elapsed_time": "3:09:25", "remaining_time": "1:15:58", "throughput": 8662.11, "total_tokens": 98446096} +{"current_steps": 146080, "total_steps": 204665, "loss": 0.0, "lr": 4.590370440142448e-07, "epoch": 3.568758703246769, "percentage": 71.38, "elapsed_time": "3:09:25", "remaining_time": "1:15:58", "throughput": 8662.12, "total_tokens": 98449168} +{"current_steps": 146085, "total_steps": 204665, "loss": 0.0001, "lr": 4.5896532356161944e-07, "epoch": 3.5688808540786163, "percentage": 71.38, "elapsed_time": "3:09:25", "remaining_time": "1:15:57", "throughput": 8662.16, "total_tokens": 98452688} +{"current_steps": 146090, "total_steps": 204665, "loss": 0.0, "lr": 4.5889360704354375e-07, "epoch": 3.5690030049104635, "percentage": 71.38, "elapsed_time": "3:09:26", "remaining_time": "1:15:57", "throughput": 8662.18, "total_tokens": 98455952} +{"current_steps": 146095, "total_steps": 204665, "loss": 0.0, "lr": 4.58821894460539e-07, "epoch": 3.5691251557423107, "percentage": 71.38, "elapsed_time": "3:09:26", "remaining_time": "1:15:56", "throughput": 8662.22, "total_tokens": 98459408} +{"current_steps": 146100, "total_steps": 204665, "loss": 0.0504, "lr": 4.5875018581312684e-07, "epoch": 3.569247306574158, "percentage": 71.38, "elapsed_time": "3:09:26", "remaining_time": "1:15:56", "throughput": 8662.25, "total_tokens": 98462736} +{"current_steps": 146105, "total_steps": 204665, "loss": 0.0001, "lr": 4.5867848110182937e-07, "epoch": 3.569369457406005, "percentage": 71.39, "elapsed_time": "3:09:27", "remaining_time": "1:15:56", "throughput": 8662.29, "total_tokens": 98466192} +{"current_steps": 146110, "total_steps": 204665, "loss": 0.0361, "lr": 4.5860678032716724e-07, "epoch": 3.5694916082378523, "percentage": 71.39, "elapsed_time": "3:09:27", "remaining_time": "1:15:55", "throughput": 8662.32, "total_tokens": 98469520} +{"current_steps": 146115, "total_steps": 204665, "loss": 0.0001, "lr": 4.5853508348966253e-07, "epoch": 3.569613759069699, "percentage": 71.39, "elapsed_time": "3:09:27", "remaining_time": "1:15:55", "throughput": 8662.34, "total_tokens": 98472720} +{"current_steps": 146120, "total_steps": 204665, "loss": 0.0, "lr": 4.5846339058983595e-07, "epoch": 3.5697359099015467, "percentage": 71.39, "elapsed_time": "3:09:28", "remaining_time": "1:15:54", "throughput": 8662.39, "total_tokens": 98476304} +{"current_steps": 146125, "total_steps": 204665, "loss": 0.0001, "lr": 4.583917016282097e-07, "epoch": 3.5698580607333934, "percentage": 71.4, "elapsed_time": "3:09:28", "remaining_time": "1:15:54", "throughput": 8662.39, "total_tokens": 98479312} +{"current_steps": 146130, "total_steps": 204665, "loss": 0.0, "lr": 4.583200166053043e-07, "epoch": 3.569980211565241, "percentage": 71.4, "elapsed_time": "3:09:28", "remaining_time": "1:15:54", "throughput": 8662.42, "total_tokens": 98482640} +{"current_steps": 146135, "total_steps": 204665, "loss": 0.0625, "lr": 4.5824833552164134e-07, "epoch": 3.570102362397088, "percentage": 71.4, "elapsed_time": "3:09:29", "remaining_time": "1:15:53", "throughput": 8662.44, "total_tokens": 98485904} +{"current_steps": 146140, "total_steps": 204665, "loss": 0.0, "lr": 4.5817665837774265e-07, "epoch": 3.570224513228935, "percentage": 71.4, "elapsed_time": "3:09:29", "remaining_time": "1:15:53", "throughput": 8662.48, "total_tokens": 98489424} +{"current_steps": 146145, "total_steps": 204665, "loss": 0.0, "lr": 4.581049851741287e-07, "epoch": 3.570346664060782, "percentage": 71.41, "elapsed_time": "3:09:29", "remaining_time": "1:15:52", "throughput": 8662.51, "total_tokens": 98492752} +{"current_steps": 146150, "total_steps": 204665, "loss": 0.0, "lr": 4.580333159113213e-07, "epoch": 3.5704688148926294, "percentage": 71.41, "elapsed_time": "3:09:30", "remaining_time": "1:15:52", "throughput": 8662.56, "total_tokens": 98496336} +{"current_steps": 146155, "total_steps": 204665, "loss": 0.0001, "lr": 4.5796165058984104e-07, "epoch": 3.5705909657244765, "percentage": 71.41, "elapsed_time": "3:09:30", "remaining_time": "1:15:52", "throughput": 8662.58, "total_tokens": 98499536} +{"current_steps": 146160, "total_steps": 204665, "loss": 0.0, "lr": 4.578899892102095e-07, "epoch": 3.5707131165563237, "percentage": 71.41, "elapsed_time": "3:09:31", "remaining_time": "1:15:51", "throughput": 8662.58, "total_tokens": 98502544} +{"current_steps": 146165, "total_steps": 204665, "loss": 0.0, "lr": 4.5781833177294815e-07, "epoch": 3.570835267388171, "percentage": 71.42, "elapsed_time": "3:09:31", "remaining_time": "1:15:51", "throughput": 8662.61, "total_tokens": 98505808} +{"current_steps": 146170, "total_steps": 204665, "loss": 0.0002, "lr": 4.577466782785774e-07, "epoch": 3.570957418220018, "percentage": 71.42, "elapsed_time": "3:09:31", "remaining_time": "1:15:50", "throughput": 8662.65, "total_tokens": 98509392} +{"current_steps": 146175, "total_steps": 204665, "loss": 0.0009, "lr": 4.5767502872761885e-07, "epoch": 3.5710795690518653, "percentage": 71.42, "elapsed_time": "3:09:32", "remaining_time": "1:15:50", "throughput": 8662.68, "total_tokens": 98512656} +{"current_steps": 146180, "total_steps": 204665, "loss": 0.0, "lr": 4.576033831205935e-07, "epoch": 3.5712017198837125, "percentage": 71.42, "elapsed_time": "3:09:32", "remaining_time": "1:15:49", "throughput": 8662.69, "total_tokens": 98515728} +{"current_steps": 146185, "total_steps": 204665, "loss": 0.0, "lr": 4.5753174145802185e-07, "epoch": 3.5713238707155597, "percentage": 71.43, "elapsed_time": "3:09:32", "remaining_time": "1:15:49", "throughput": 8662.7, "total_tokens": 98518800} +{"current_steps": 146190, "total_steps": 204665, "loss": 0.0, "lr": 4.5746010374042567e-07, "epoch": 3.571446021547407, "percentage": 71.43, "elapsed_time": "3:09:33", "remaining_time": "1:15:49", "throughput": 8662.72, "total_tokens": 98521936} +{"current_steps": 146195, "total_steps": 204665, "loss": 0.0306, "lr": 4.5738846996832505e-07, "epoch": 3.571568172379254, "percentage": 71.43, "elapsed_time": "3:09:33", "remaining_time": "1:15:48", "throughput": 8662.73, "total_tokens": 98525072} +{"current_steps": 146200, "total_steps": 204665, "loss": 0.041, "lr": 4.573168401422419e-07, "epoch": 3.571690323211101, "percentage": 71.43, "elapsed_time": "3:09:33", "remaining_time": "1:15:48", "throughput": 8662.77, "total_tokens": 98528592} +{"current_steps": 146205, "total_steps": 204665, "loss": 0.0, "lr": 4.5724521426269626e-07, "epoch": 3.5718124740429484, "percentage": 71.44, "elapsed_time": "3:09:34", "remaining_time": "1:15:47", "throughput": 8662.81, "total_tokens": 98532112} +{"current_steps": 146210, "total_steps": 204665, "loss": 0.0001, "lr": 4.571735923302098e-07, "epoch": 3.571934624874795, "percentage": 71.44, "elapsed_time": "3:09:34", "remaining_time": "1:15:47", "throughput": 8662.85, "total_tokens": 98535568} +{"current_steps": 146215, "total_steps": 204665, "loss": 0.0566, "lr": 4.571019743453025e-07, "epoch": 3.572056775706643, "percentage": 71.44, "elapsed_time": "3:09:34", "remaining_time": "1:15:47", "throughput": 8662.91, "total_tokens": 98539344} +{"current_steps": 146220, "total_steps": 204665, "loss": 0.0, "lr": 4.5703036030849617e-07, "epoch": 3.5721789265384896, "percentage": 71.44, "elapsed_time": "3:09:35", "remaining_time": "1:15:46", "throughput": 8662.97, "total_tokens": 98543056} +{"current_steps": 146225, "total_steps": 204665, "loss": 0.0, "lr": 4.5695875022031073e-07, "epoch": 3.5723010773703368, "percentage": 71.45, "elapsed_time": "3:09:35", "remaining_time": "1:15:46", "throughput": 8663.02, "total_tokens": 98546640} +{"current_steps": 146230, "total_steps": 204665, "loss": 0.0002, "lr": 4.5688714408126717e-07, "epoch": 3.572423228202184, "percentage": 71.45, "elapsed_time": "3:09:35", "remaining_time": "1:15:45", "throughput": 8663.03, "total_tokens": 98549712} +{"current_steps": 146235, "total_steps": 204665, "loss": 0.0, "lr": 4.5681554189188684e-07, "epoch": 3.572545379034031, "percentage": 71.45, "elapsed_time": "3:09:36", "remaining_time": "1:15:45", "throughput": 8663.06, "total_tokens": 98553104} +{"current_steps": 146240, "total_steps": 204665, "loss": 0.0003, "lr": 4.5674394365268966e-07, "epoch": 3.5726675298658783, "percentage": 71.45, "elapsed_time": "3:09:36", "remaining_time": "1:15:45", "throughput": 8663.09, "total_tokens": 98556496} +{"current_steps": 146245, "total_steps": 204665, "loss": 0.0007, "lr": 4.56672349364197e-07, "epoch": 3.5727896806977255, "percentage": 71.46, "elapsed_time": "3:09:36", "remaining_time": "1:15:44", "throughput": 8663.15, "total_tokens": 98560208} +{"current_steps": 146250, "total_steps": 204665, "loss": 0.0001, "lr": 4.5660075902692877e-07, "epoch": 3.5729118315295727, "percentage": 71.46, "elapsed_time": "3:09:37", "remaining_time": "1:15:44", "throughput": 8663.21, "total_tokens": 98563984} +{"current_steps": 146255, "total_steps": 204665, "loss": 0.0001, "lr": 4.565291726414059e-07, "epoch": 3.57303398236142, "percentage": 71.46, "elapsed_time": "3:09:37", "remaining_time": "1:15:43", "throughput": 8663.26, "total_tokens": 98567696} +{"current_steps": 146260, "total_steps": 204665, "loss": 0.0, "lr": 4.5645759020814955e-07, "epoch": 3.573156133193267, "percentage": 71.46, "elapsed_time": "3:09:37", "remaining_time": "1:15:43", "throughput": 8663.27, "total_tokens": 98570704} +{"current_steps": 146265, "total_steps": 204665, "loss": 0.0893, "lr": 4.5638601172767934e-07, "epoch": 3.5732782840251143, "percentage": 71.47, "elapsed_time": "3:09:38", "remaining_time": "1:15:43", "throughput": 8663.3, "total_tokens": 98573968} +{"current_steps": 146270, "total_steps": 204665, "loss": 0.0, "lr": 4.5631443720051667e-07, "epoch": 3.5734004348569615, "percentage": 71.47, "elapsed_time": "3:09:38", "remaining_time": "1:15:42", "throughput": 8663.31, "total_tokens": 98577040} +{"current_steps": 146275, "total_steps": 204665, "loss": 0.0225, "lr": 4.5624286662718124e-07, "epoch": 3.5735225856888087, "percentage": 71.47, "elapsed_time": "3:09:39", "remaining_time": "1:15:42", "throughput": 8663.32, "total_tokens": 98580176} +{"current_steps": 146280, "total_steps": 204665, "loss": 0.0, "lr": 4.5617130000819435e-07, "epoch": 3.573644736520656, "percentage": 71.47, "elapsed_time": "3:09:39", "remaining_time": "1:15:41", "throughput": 8663.33, "total_tokens": 98583184} +{"current_steps": 146285, "total_steps": 204665, "loss": 0.0, "lr": 4.560997373440757e-07, "epoch": 3.573766887352503, "percentage": 71.48, "elapsed_time": "3:09:39", "remaining_time": "1:15:41", "throughput": 8663.36, "total_tokens": 98586640} +{"current_steps": 146290, "total_steps": 204665, "loss": 0.0, "lr": 4.560281786353464e-07, "epoch": 3.57388903818435, "percentage": 71.48, "elapsed_time": "3:09:40", "remaining_time": "1:15:41", "throughput": 8663.38, "total_tokens": 98589776} +{"current_steps": 146295, "total_steps": 204665, "loss": 0.0001, "lr": 4.5595662388252643e-07, "epoch": 3.574011189016197, "percentage": 71.48, "elapsed_time": "3:09:40", "remaining_time": "1:15:40", "throughput": 8663.39, "total_tokens": 98592912} +{"current_steps": 146300, "total_steps": 204665, "loss": 0.0, "lr": 4.55885073086136e-07, "epoch": 3.5741333398480446, "percentage": 71.48, "elapsed_time": "3:09:40", "remaining_time": "1:15:40", "throughput": 8663.42, "total_tokens": 98596176} +{"current_steps": 146305, "total_steps": 204665, "loss": 0.0058, "lr": 4.558135262466959e-07, "epoch": 3.5742554906798913, "percentage": 71.49, "elapsed_time": "3:09:41", "remaining_time": "1:15:39", "throughput": 8663.43, "total_tokens": 98599312} +{"current_steps": 146310, "total_steps": 204665, "loss": 0.0, "lr": 4.557419833647258e-07, "epoch": 3.5743776415117385, "percentage": 71.49, "elapsed_time": "3:09:41", "remaining_time": "1:15:39", "throughput": 8663.45, "total_tokens": 98602512} +{"current_steps": 146315, "total_steps": 204665, "loss": 0.0001, "lr": 4.556704444407465e-07, "epoch": 3.5744997923435857, "percentage": 71.49, "elapsed_time": "3:09:41", "remaining_time": "1:15:39", "throughput": 8663.49, "total_tokens": 98605904} +{"current_steps": 146320, "total_steps": 204665, "loss": 0.0003, "lr": 4.5559890947527843e-07, "epoch": 3.574621943175433, "percentage": 71.49, "elapsed_time": "3:09:42", "remaining_time": "1:15:38", "throughput": 8663.55, "total_tokens": 98609680} +{"current_steps": 146325, "total_steps": 204665, "loss": 0.0657, "lr": 4.555273784688413e-07, "epoch": 3.57474409400728, "percentage": 71.49, "elapsed_time": "3:09:42", "remaining_time": "1:15:38", "throughput": 8663.58, "total_tokens": 98613072} +{"current_steps": 146330, "total_steps": 204665, "loss": 0.0, "lr": 4.554558514219557e-07, "epoch": 3.5748662448391273, "percentage": 71.5, "elapsed_time": "3:09:42", "remaining_time": "1:15:37", "throughput": 8663.62, "total_tokens": 98616656} +{"current_steps": 146335, "total_steps": 204665, "loss": 0.1131, "lr": 4.553843283351413e-07, "epoch": 3.5749883956709745, "percentage": 71.5, "elapsed_time": "3:09:43", "remaining_time": "1:15:37", "throughput": 8663.65, "total_tokens": 98619920} +{"current_steps": 146340, "total_steps": 204665, "loss": 0.0, "lr": 4.553128092089189e-07, "epoch": 3.5751105465028217, "percentage": 71.5, "elapsed_time": "3:09:43", "remaining_time": "1:15:36", "throughput": 8663.65, "total_tokens": 98622864} +{"current_steps": 146345, "total_steps": 204665, "loss": 0.0001, "lr": 4.5524129404380794e-07, "epoch": 3.575232697334669, "percentage": 71.5, "elapsed_time": "3:09:43", "remaining_time": "1:15:36", "throughput": 8663.7, "total_tokens": 98626448} +{"current_steps": 146350, "total_steps": 204665, "loss": 0.0, "lr": 4.551697828403288e-07, "epoch": 3.575354848166516, "percentage": 71.51, "elapsed_time": "3:09:44", "remaining_time": "1:15:36", "throughput": 8663.74, "total_tokens": 98629904} +{"current_steps": 146355, "total_steps": 204665, "loss": 0.0, "lr": 4.5509827559900194e-07, "epoch": 3.5754769989983632, "percentage": 71.51, "elapsed_time": "3:09:44", "remaining_time": "1:15:35", "throughput": 8663.79, "total_tokens": 98633616} +{"current_steps": 146360, "total_steps": 204665, "loss": 0.0, "lr": 4.550267723203466e-07, "epoch": 3.5755991498302104, "percentage": 71.51, "elapsed_time": "3:09:44", "remaining_time": "1:15:35", "throughput": 8663.81, "total_tokens": 98636816} +{"current_steps": 146365, "total_steps": 204665, "loss": 0.0001, "lr": 4.5495527300488346e-07, "epoch": 3.5757213006620576, "percentage": 71.51, "elapsed_time": "3:09:45", "remaining_time": "1:15:34", "throughput": 8663.88, "total_tokens": 98640720} +{"current_steps": 146370, "total_steps": 204665, "loss": 0.0, "lr": 4.548837776531318e-07, "epoch": 3.575843451493905, "percentage": 71.52, "elapsed_time": "3:09:45", "remaining_time": "1:15:34", "throughput": 8663.93, "total_tokens": 98644432} +{"current_steps": 146375, "total_steps": 204665, "loss": 0.0023, "lr": 4.548122862656124e-07, "epoch": 3.575965602325752, "percentage": 71.52, "elapsed_time": "3:09:45", "remaining_time": "1:15:34", "throughput": 8663.97, "total_tokens": 98647824} +{"current_steps": 146380, "total_steps": 204665, "loss": 0.0, "lr": 4.547407988428442e-07, "epoch": 3.5760877531575987, "percentage": 71.52, "elapsed_time": "3:09:46", "remaining_time": "1:15:33", "throughput": 8663.98, "total_tokens": 98651024} +{"current_steps": 146385, "total_steps": 204665, "loss": 0.0001, "lr": 4.5466931538534804e-07, "epoch": 3.5762099039894464, "percentage": 71.52, "elapsed_time": "3:09:46", "remaining_time": "1:15:33", "throughput": 8664.03, "total_tokens": 98654608} +{"current_steps": 146390, "total_steps": 204665, "loss": 0.0, "lr": 4.545978358936429e-07, "epoch": 3.576332054821293, "percentage": 71.53, "elapsed_time": "3:09:47", "remaining_time": "1:15:32", "throughput": 8664.09, "total_tokens": 98658320} +{"current_steps": 146395, "total_steps": 204665, "loss": 0.0, "lr": 4.5452636036824933e-07, "epoch": 3.5764542056531408, "percentage": 71.53, "elapsed_time": "3:09:47", "remaining_time": "1:15:32", "throughput": 8664.12, "total_tokens": 98661712} +{"current_steps": 146400, "total_steps": 204665, "loss": 0.0001, "lr": 4.5445488880968673e-07, "epoch": 3.5765763564849875, "percentage": 71.53, "elapsed_time": "3:09:47", "remaining_time": "1:15:32", "throughput": 8664.13, "total_tokens": 98664720} +{"current_steps": 146405, "total_steps": 204665, "loss": 0.0003, "lr": 4.543834212184746e-07, "epoch": 3.5766985073168347, "percentage": 71.53, "elapsed_time": "3:09:48", "remaining_time": "1:15:31", "throughput": 8664.15, "total_tokens": 98667984} +{"current_steps": 146410, "total_steps": 204665, "loss": 0.0, "lr": 4.543119575951331e-07, "epoch": 3.576820658148682, "percentage": 71.54, "elapsed_time": "3:09:48", "remaining_time": "1:15:31", "throughput": 8664.18, "total_tokens": 98671248} +{"current_steps": 146415, "total_steps": 204665, "loss": 0.0001, "lr": 4.5424049794018203e-07, "epoch": 3.576942808980529, "percentage": 71.54, "elapsed_time": "3:09:48", "remaining_time": "1:15:30", "throughput": 8664.23, "total_tokens": 98674896} +{"current_steps": 146420, "total_steps": 204665, "loss": 0.0, "lr": 4.5416904225414055e-07, "epoch": 3.5770649598123763, "percentage": 71.54, "elapsed_time": "3:09:49", "remaining_time": "1:15:30", "throughput": 8664.26, "total_tokens": 98678288} +{"current_steps": 146425, "total_steps": 204665, "loss": 0.0, "lr": 4.540975905375289e-07, "epoch": 3.5771871106442235, "percentage": 71.54, "elapsed_time": "3:09:49", "remaining_time": "1:15:30", "throughput": 8664.32, "total_tokens": 98682064} +{"current_steps": 146430, "total_steps": 204665, "loss": 0.0019, "lr": 4.5402614279086617e-07, "epoch": 3.5773092614760706, "percentage": 71.55, "elapsed_time": "3:09:49", "remaining_time": "1:15:29", "throughput": 8664.37, "total_tokens": 98685648} +{"current_steps": 146435, "total_steps": 204665, "loss": 0.0334, "lr": 4.539546990146724e-07, "epoch": 3.577431412307918, "percentage": 71.55, "elapsed_time": "3:09:50", "remaining_time": "1:15:29", "throughput": 8664.43, "total_tokens": 98689488} +{"current_steps": 146440, "total_steps": 204665, "loss": 0.0, "lr": 4.538832592094666e-07, "epoch": 3.577553563139765, "percentage": 71.55, "elapsed_time": "3:09:50", "remaining_time": "1:15:28", "throughput": 8664.48, "total_tokens": 98693136} +{"current_steps": 146445, "total_steps": 204665, "loss": 0.0, "lr": 4.538118233757686e-07, "epoch": 3.577675713971612, "percentage": 71.55, "elapsed_time": "3:09:50", "remaining_time": "1:15:28", "throughput": 8664.53, "total_tokens": 98696720} +{"current_steps": 146450, "total_steps": 204665, "loss": 0.0, "lr": 4.5374039151409836e-07, "epoch": 3.5777978648034594, "percentage": 71.56, "elapsed_time": "3:09:51", "remaining_time": "1:15:28", "throughput": 8664.58, "total_tokens": 98700432} +{"current_steps": 146455, "total_steps": 204665, "loss": 0.0001, "lr": 4.5366896362497464e-07, "epoch": 3.5779200156353066, "percentage": 71.56, "elapsed_time": "3:09:51", "remaining_time": "1:15:27", "throughput": 8664.6, "total_tokens": 98703632} +{"current_steps": 146460, "total_steps": 204665, "loss": 0.0, "lr": 4.5359753970891735e-07, "epoch": 3.578042166467154, "percentage": 71.56, "elapsed_time": "3:09:51", "remaining_time": "1:15:27", "throughput": 8664.62, "total_tokens": 98706832} +{"current_steps": 146465, "total_steps": 204665, "loss": 0.111, "lr": 4.535261197664455e-07, "epoch": 3.5781643172990005, "percentage": 71.56, "elapsed_time": "3:09:52", "remaining_time": "1:15:26", "throughput": 8664.65, "total_tokens": 98710160} +{"current_steps": 146470, "total_steps": 204665, "loss": 0.0001, "lr": 4.534547037980786e-07, "epoch": 3.578286468130848, "percentage": 71.57, "elapsed_time": "3:09:52", "remaining_time": "1:15:26", "throughput": 8664.68, "total_tokens": 98713552} +{"current_steps": 146475, "total_steps": 204665, "loss": 0.0, "lr": 4.533832918043364e-07, "epoch": 3.578408618962695, "percentage": 71.57, "elapsed_time": "3:09:52", "remaining_time": "1:15:26", "throughput": 8664.71, "total_tokens": 98716880} +{"current_steps": 146480, "total_steps": 204665, "loss": 0.0, "lr": 4.533118837857377e-07, "epoch": 3.5785307697945425, "percentage": 71.57, "elapsed_time": "3:09:53", "remaining_time": "1:15:25", "throughput": 8664.76, "total_tokens": 98720464} +{"current_steps": 146485, "total_steps": 204665, "loss": 0.0001, "lr": 4.532404797428023e-07, "epoch": 3.5786529206263893, "percentage": 71.57, "elapsed_time": "3:09:53", "remaining_time": "1:15:25", "throughput": 8664.76, "total_tokens": 98723472} +{"current_steps": 146490, "total_steps": 204665, "loss": 0.0, "lr": 4.531690796760492e-07, "epoch": 3.5787750714582365, "percentage": 71.58, "elapsed_time": "3:09:54", "remaining_time": "1:15:24", "throughput": 8664.82, "total_tokens": 98727248} +{"current_steps": 146495, "total_steps": 204665, "loss": 0.0001, "lr": 4.530976835859973e-07, "epoch": 3.5788972222900837, "percentage": 71.58, "elapsed_time": "3:09:54", "remaining_time": "1:15:24", "throughput": 8664.87, "total_tokens": 98730832} +{"current_steps": 146500, "total_steps": 204665, "loss": 0.0268, "lr": 4.530262914731665e-07, "epoch": 3.579019373121931, "percentage": 71.58, "elapsed_time": "3:09:54", "remaining_time": "1:15:24", "throughput": 8664.9, "total_tokens": 98734160} +{"current_steps": 146505, "total_steps": 204665, "loss": 0.0001, "lr": 4.529549033380753e-07, "epoch": 3.579141523953778, "percentage": 71.58, "elapsed_time": "3:09:55", "remaining_time": "1:15:23", "throughput": 8664.93, "total_tokens": 98737552} +{"current_steps": 146510, "total_steps": 204665, "loss": 0.0002, "lr": 4.528835191812435e-07, "epoch": 3.5792636747856252, "percentage": 71.59, "elapsed_time": "3:09:55", "remaining_time": "1:15:23", "throughput": 8664.94, "total_tokens": 98740560} +{"current_steps": 146515, "total_steps": 204665, "loss": 0.0001, "lr": 4.5281213900318947e-07, "epoch": 3.5793858256174724, "percentage": 71.59, "elapsed_time": "3:09:55", "remaining_time": "1:15:22", "throughput": 8664.94, "total_tokens": 98743568} +{"current_steps": 146520, "total_steps": 204665, "loss": 0.0, "lr": 4.527407628044332e-07, "epoch": 3.5795079764493196, "percentage": 71.59, "elapsed_time": "3:09:56", "remaining_time": "1:15:22", "throughput": 8664.97, "total_tokens": 98746832} +{"current_steps": 146525, "total_steps": 204665, "loss": 0.0, "lr": 4.526693905854929e-07, "epoch": 3.579630127281167, "percentage": 71.59, "elapsed_time": "3:09:56", "remaining_time": "1:15:22", "throughput": 8665.02, "total_tokens": 98750544} +{"current_steps": 146530, "total_steps": 204665, "loss": 0.0001, "lr": 4.5259802234688836e-07, "epoch": 3.579752278113014, "percentage": 71.6, "elapsed_time": "3:09:56", "remaining_time": "1:15:21", "throughput": 8665.04, "total_tokens": 98753808} +{"current_steps": 146535, "total_steps": 204665, "loss": 0.0002, "lr": 4.525266580891379e-07, "epoch": 3.579874428944861, "percentage": 71.6, "elapsed_time": "3:09:57", "remaining_time": "1:15:21", "throughput": 8665.06, "total_tokens": 98757008} +{"current_steps": 146540, "total_steps": 204665, "loss": 0.0, "lr": 4.5245529781276083e-07, "epoch": 3.5799965797767084, "percentage": 71.6, "elapsed_time": "3:09:57", "remaining_time": "1:15:20", "throughput": 8665.09, "total_tokens": 98760272} +{"current_steps": 146545, "total_steps": 204665, "loss": 0.0001, "lr": 4.523839415182765e-07, "epoch": 3.5801187306085556, "percentage": 71.6, "elapsed_time": "3:09:57", "remaining_time": "1:15:20", "throughput": 8665.12, "total_tokens": 98763600} +{"current_steps": 146550, "total_steps": 204665, "loss": 0.0, "lr": 4.5231258920620305e-07, "epoch": 3.5802408814404028, "percentage": 71.6, "elapsed_time": "3:09:58", "remaining_time": "1:15:19", "throughput": 8665.15, "total_tokens": 98766992} +{"current_steps": 146555, "total_steps": 204665, "loss": 0.0, "lr": 4.522412408770602e-07, "epoch": 3.58036303227225, "percentage": 71.61, "elapsed_time": "3:09:58", "remaining_time": "1:15:19", "throughput": 8665.17, "total_tokens": 98770256} +{"current_steps": 146560, "total_steps": 204665, "loss": 0.0, "lr": 4.5216989653136584e-07, "epoch": 3.5804851831040967, "percentage": 71.61, "elapsed_time": "3:09:58", "remaining_time": "1:15:19", "throughput": 8665.21, "total_tokens": 98773712} +{"current_steps": 146565, "total_steps": 204665, "loss": 0.0, "lr": 4.5209855616963945e-07, "epoch": 3.5806073339359443, "percentage": 71.61, "elapsed_time": "3:09:59", "remaining_time": "1:15:18", "throughput": 8665.24, "total_tokens": 98777104} +{"current_steps": 146570, "total_steps": 204665, "loss": 0.0631, "lr": 4.520272197924001e-07, "epoch": 3.580729484767791, "percentage": 71.61, "elapsed_time": "3:09:59", "remaining_time": "1:15:18", "throughput": 8665.28, "total_tokens": 98780432} +{"current_steps": 146575, "total_steps": 204665, "loss": 0.0, "lr": 4.519558874001658e-07, "epoch": 3.5808516355996387, "percentage": 71.62, "elapsed_time": "3:09:59", "remaining_time": "1:15:17", "throughput": 8665.33, "total_tokens": 98784080} +{"current_steps": 146580, "total_steps": 204665, "loss": 0.0001, "lr": 4.51884558993456e-07, "epoch": 3.5809737864314855, "percentage": 71.62, "elapsed_time": "3:10:00", "remaining_time": "1:15:17", "throughput": 8665.31, "total_tokens": 98786832} +{"current_steps": 146585, "total_steps": 204665, "loss": 0.0366, "lr": 4.518132345727889e-07, "epoch": 3.5810959372633326, "percentage": 71.62, "elapsed_time": "3:10:00", "remaining_time": "1:15:17", "throughput": 8665.34, "total_tokens": 98790096} +{"current_steps": 146590, "total_steps": 204665, "loss": 0.0001, "lr": 4.5174191413868354e-07, "epoch": 3.58121808809518, "percentage": 71.62, "elapsed_time": "3:10:00", "remaining_time": "1:15:16", "throughput": 8665.38, "total_tokens": 98793680} +{"current_steps": 146595, "total_steps": 204665, "loss": 0.0, "lr": 4.5167059769165827e-07, "epoch": 3.581340238927027, "percentage": 71.63, "elapsed_time": "3:10:01", "remaining_time": "1:15:16", "throughput": 8665.4, "total_tokens": 98796816} +{"current_steps": 146600, "total_steps": 204665, "loss": 0.0, "lr": 4.5159928523223224e-07, "epoch": 3.581462389758874, "percentage": 71.63, "elapsed_time": "3:10:01", "remaining_time": "1:15:15", "throughput": 8665.45, "total_tokens": 98800400} +{"current_steps": 146605, "total_steps": 204665, "loss": 0.0, "lr": 4.5152797676092367e-07, "epoch": 3.5815845405907214, "percentage": 71.63, "elapsed_time": "3:10:01", "remaining_time": "1:15:15", "throughput": 8665.47, "total_tokens": 98803600} +{"current_steps": 146610, "total_steps": 204665, "loss": 0.0, "lr": 4.514566722782508e-07, "epoch": 3.5817066914225686, "percentage": 71.63, "elapsed_time": "3:10:02", "remaining_time": "1:15:15", "throughput": 8665.5, "total_tokens": 98806992} +{"current_steps": 146615, "total_steps": 204665, "loss": 0.0, "lr": 4.513853717847329e-07, "epoch": 3.581828842254416, "percentage": 71.64, "elapsed_time": "3:10:02", "remaining_time": "1:15:14", "throughput": 8665.53, "total_tokens": 98810384} +{"current_steps": 146620, "total_steps": 204665, "loss": 0.0, "lr": 4.513140752808878e-07, "epoch": 3.581950993086263, "percentage": 71.64, "elapsed_time": "3:10:03", "remaining_time": "1:15:14", "throughput": 8665.52, "total_tokens": 98813136} +{"current_steps": 146625, "total_steps": 204665, "loss": 0.0, "lr": 4.512427827672344e-07, "epoch": 3.58207314391811, "percentage": 71.64, "elapsed_time": "3:10:03", "remaining_time": "1:15:13", "throughput": 8665.59, "total_tokens": 98816976} +{"current_steps": 146630, "total_steps": 204665, "loss": 0.0001, "lr": 4.5117149424429135e-07, "epoch": 3.5821952947499573, "percentage": 71.64, "elapsed_time": "3:10:03", "remaining_time": "1:15:13", "throughput": 8665.59, "total_tokens": 98819856} +{"current_steps": 146635, "total_steps": 204665, "loss": 0.0, "lr": 4.5110020971257645e-07, "epoch": 3.5823174455818045, "percentage": 71.65, "elapsed_time": "3:10:04", "remaining_time": "1:15:13", "throughput": 8665.65, "total_tokens": 98823696} +{"current_steps": 146640, "total_steps": 204665, "loss": 0.0001, "lr": 4.510289291726088e-07, "epoch": 3.5824395964136517, "percentage": 71.65, "elapsed_time": "3:10:04", "remaining_time": "1:15:12", "throughput": 8665.7, "total_tokens": 98827280} +{"current_steps": 146645, "total_steps": 204665, "loss": 0.0, "lr": 4.5095765262490614e-07, "epoch": 3.5825617472454985, "percentage": 71.65, "elapsed_time": "3:10:04", "remaining_time": "1:15:12", "throughput": 8665.73, "total_tokens": 98830736} +{"current_steps": 146650, "total_steps": 204665, "loss": 0.0, "lr": 4.5088638006998745e-07, "epoch": 3.582683898077346, "percentage": 71.65, "elapsed_time": "3:10:05", "remaining_time": "1:15:11", "throughput": 8665.75, "total_tokens": 98833872} +{"current_steps": 146655, "total_steps": 204665, "loss": 0.0002, "lr": 4.508151115083703e-07, "epoch": 3.582806048909193, "percentage": 71.66, "elapsed_time": "3:10:05", "remaining_time": "1:15:11", "throughput": 8665.78, "total_tokens": 98837200} +{"current_steps": 146660, "total_steps": 204665, "loss": 0.0, "lr": 4.5074384694057334e-07, "epoch": 3.5829281997410405, "percentage": 71.66, "elapsed_time": "3:10:05", "remaining_time": "1:15:11", "throughput": 8665.81, "total_tokens": 98840656} +{"current_steps": 146665, "total_steps": 204665, "loss": 0.0, "lr": 4.5067258636711536e-07, "epoch": 3.5830503505728872, "percentage": 71.66, "elapsed_time": "3:10:06", "remaining_time": "1:15:10", "throughput": 8665.84, "total_tokens": 98843984} +{"current_steps": 146670, "total_steps": 204665, "loss": 0.0313, "lr": 4.5060132978851364e-07, "epoch": 3.5831725014047344, "percentage": 71.66, "elapsed_time": "3:10:06", "remaining_time": "1:15:10", "throughput": 8665.87, "total_tokens": 98847248} +{"current_steps": 146675, "total_steps": 204665, "loss": 0.0001, "lr": 4.5053007720528713e-07, "epoch": 3.5832946522365816, "percentage": 71.67, "elapsed_time": "3:10:06", "remaining_time": "1:15:09", "throughput": 8665.88, "total_tokens": 98850384} +{"current_steps": 146680, "total_steps": 204665, "loss": 0.0, "lr": 4.5045882861795337e-07, "epoch": 3.583416803068429, "percentage": 71.67, "elapsed_time": "3:10:07", "remaining_time": "1:15:09", "throughput": 8665.91, "total_tokens": 98853776} +{"current_steps": 146685, "total_steps": 204665, "loss": 0.0001, "lr": 4.503875840270311e-07, "epoch": 3.583538953900276, "percentage": 71.67, "elapsed_time": "3:10:07", "remaining_time": "1:15:09", "throughput": 8665.94, "total_tokens": 98857040} +{"current_steps": 146690, "total_steps": 204665, "loss": 0.0, "lr": 4.503163434330379e-07, "epoch": 3.583661104732123, "percentage": 71.67, "elapsed_time": "3:10:07", "remaining_time": "1:15:08", "throughput": 8666.0, "total_tokens": 98860944} +{"current_steps": 146695, "total_steps": 204665, "loss": 0.0, "lr": 4.502451068364925e-07, "epoch": 3.5837832555639704, "percentage": 71.68, "elapsed_time": "3:10:08", "remaining_time": "1:15:08", "throughput": 8666.03, "total_tokens": 98864208} +{"current_steps": 146700, "total_steps": 204665, "loss": 0.0, "lr": 4.501738742379121e-07, "epoch": 3.5839054063958176, "percentage": 71.68, "elapsed_time": "3:10:08", "remaining_time": "1:15:07", "throughput": 8666.04, "total_tokens": 98867344} +{"current_steps": 146705, "total_steps": 204665, "loss": 0.0, "lr": 4.5010264563781554e-07, "epoch": 3.5840275572276648, "percentage": 71.68, "elapsed_time": "3:10:08", "remaining_time": "1:15:07", "throughput": 8666.04, "total_tokens": 98870224} +{"current_steps": 146710, "total_steps": 204665, "loss": 0.0, "lr": 4.5003142103672045e-07, "epoch": 3.584149708059512, "percentage": 71.68, "elapsed_time": "3:10:09", "remaining_time": "1:15:07", "throughput": 8666.04, "total_tokens": 98873232} +{"current_steps": 146715, "total_steps": 204665, "loss": 0.0001, "lr": 4.499602004351445e-07, "epoch": 3.584271858891359, "percentage": 71.69, "elapsed_time": "3:10:09", "remaining_time": "1:15:06", "throughput": 8666.09, "total_tokens": 98876816} +{"current_steps": 146720, "total_steps": 204665, "loss": 0.0002, "lr": 4.4988898383360576e-07, "epoch": 3.5843940097232063, "percentage": 71.69, "elapsed_time": "3:10:09", "remaining_time": "1:15:06", "throughput": 8666.12, "total_tokens": 98880208} +{"current_steps": 146725, "total_steps": 204665, "loss": 0.0, "lr": 4.498177712326228e-07, "epoch": 3.5845161605550535, "percentage": 71.69, "elapsed_time": "3:10:10", "remaining_time": "1:15:05", "throughput": 8666.17, "total_tokens": 98883728} +{"current_steps": 146730, "total_steps": 204665, "loss": 0.0, "lr": 4.4974656263271247e-07, "epoch": 3.5846383113869007, "percentage": 71.69, "elapsed_time": "3:10:10", "remaining_time": "1:15:05", "throughput": 8666.18, "total_tokens": 98886864} +{"current_steps": 146735, "total_steps": 204665, "loss": 0.0, "lr": 4.496753580343937e-07, "epoch": 3.584760462218748, "percentage": 71.7, "elapsed_time": "3:10:11", "remaining_time": "1:15:04", "throughput": 8666.2, "total_tokens": 98890000} +{"current_steps": 146740, "total_steps": 204665, "loss": 0.0, "lr": 4.4960415743818327e-07, "epoch": 3.5848826130505946, "percentage": 71.7, "elapsed_time": "3:10:11", "remaining_time": "1:15:04", "throughput": 8666.24, "total_tokens": 98893520} +{"current_steps": 146745, "total_steps": 204665, "loss": 0.0, "lr": 4.495329608445998e-07, "epoch": 3.5850047638824423, "percentage": 71.7, "elapsed_time": "3:10:11", "remaining_time": "1:15:04", "throughput": 8666.25, "total_tokens": 98896528} +{"current_steps": 146750, "total_steps": 204665, "loss": 0.062, "lr": 4.494617682541604e-07, "epoch": 3.585126914714289, "percentage": 71.7, "elapsed_time": "3:10:12", "remaining_time": "1:15:03", "throughput": 8666.27, "total_tokens": 98899856} +{"current_steps": 146755, "total_steps": 204665, "loss": 0.0, "lr": 4.4939057966738304e-07, "epoch": 3.585249065546136, "percentage": 71.7, "elapsed_time": "3:10:12", "remaining_time": "1:15:03", "throughput": 8666.32, "total_tokens": 98903440} +{"current_steps": 146760, "total_steps": 204665, "loss": 0.0, "lr": 4.4931939508478575e-07, "epoch": 3.5853712163779834, "percentage": 71.71, "elapsed_time": "3:10:12", "remaining_time": "1:15:02", "throughput": 8666.33, "total_tokens": 98906512} +{"current_steps": 146765, "total_steps": 204665, "loss": 0.0, "lr": 4.4924821450688575e-07, "epoch": 3.5854933672098306, "percentage": 71.71, "elapsed_time": "3:10:13", "remaining_time": "1:15:02", "throughput": 8666.34, "total_tokens": 98909520} +{"current_steps": 146770, "total_steps": 204665, "loss": 0.0003, "lr": 4.4917703793420116e-07, "epoch": 3.5856155180416778, "percentage": 71.71, "elapsed_time": "3:10:13", "remaining_time": "1:15:02", "throughput": 8666.33, "total_tokens": 98912400} +{"current_steps": 146775, "total_steps": 204665, "loss": 0.0, "lr": 4.4910586536724893e-07, "epoch": 3.585737668873525, "percentage": 71.71, "elapsed_time": "3:10:13", "remaining_time": "1:15:01", "throughput": 8666.38, "total_tokens": 98915920} +{"current_steps": 146780, "total_steps": 204665, "loss": 0.0, "lr": 4.4903469680654703e-07, "epoch": 3.585859819705372, "percentage": 71.72, "elapsed_time": "3:10:14", "remaining_time": "1:15:01", "throughput": 8666.4, "total_tokens": 98919184} +{"current_steps": 146785, "total_steps": 204665, "loss": 0.0, "lr": 4.489635322526134e-07, "epoch": 3.5859819705372193, "percentage": 71.72, "elapsed_time": "3:10:14", "remaining_time": "1:15:00", "throughput": 8666.44, "total_tokens": 98922640} +{"current_steps": 146790, "total_steps": 204665, "loss": 0.0, "lr": 4.488923717059647e-07, "epoch": 3.5861041213690665, "percentage": 71.72, "elapsed_time": "3:10:14", "remaining_time": "1:15:00", "throughput": 8666.45, "total_tokens": 98925712} +{"current_steps": 146795, "total_steps": 204665, "loss": 0.0, "lr": 4.4882121516711937e-07, "epoch": 3.5862262722009137, "percentage": 71.72, "elapsed_time": "3:10:15", "remaining_time": "1:15:00", "throughput": 8666.48, "total_tokens": 98929168} +{"current_steps": 146800, "total_steps": 204665, "loss": 0.0001, "lr": 4.4875006263659445e-07, "epoch": 3.586348423032761, "percentage": 71.73, "elapsed_time": "3:10:15", "remaining_time": "1:14:59", "throughput": 8666.52, "total_tokens": 98932752} +{"current_steps": 146805, "total_steps": 204665, "loss": 0.083, "lr": 4.486789141149069e-07, "epoch": 3.586470573864608, "percentage": 71.73, "elapsed_time": "3:10:15", "remaining_time": "1:14:59", "throughput": 8666.56, "total_tokens": 98936208} +{"current_steps": 146810, "total_steps": 204665, "loss": 0.0001, "lr": 4.4860776960257495e-07, "epoch": 3.5865927246964553, "percentage": 71.73, "elapsed_time": "3:10:16", "remaining_time": "1:14:58", "throughput": 8666.57, "total_tokens": 98939280} +{"current_steps": 146815, "total_steps": 204665, "loss": 0.0, "lr": 4.4853662910011524e-07, "epoch": 3.5867148755283025, "percentage": 71.73, "elapsed_time": "3:10:16", "remaining_time": "1:14:58", "throughput": 8666.58, "total_tokens": 98942352} +{"current_steps": 146820, "total_steps": 204665, "loss": 0.0002, "lr": 4.484654926080459e-07, "epoch": 3.5868370263601497, "percentage": 71.74, "elapsed_time": "3:10:16", "remaining_time": "1:14:58", "throughput": 8666.61, "total_tokens": 98945680} +{"current_steps": 146825, "total_steps": 204665, "loss": 0.0, "lr": 4.4839436012688336e-07, "epoch": 3.5869591771919964, "percentage": 71.74, "elapsed_time": "3:10:17", "remaining_time": "1:14:57", "throughput": 8666.67, "total_tokens": 98949520} +{"current_steps": 146830, "total_steps": 204665, "loss": 0.0, "lr": 4.483232316571459e-07, "epoch": 3.587081328023844, "percentage": 71.74, "elapsed_time": "3:10:17", "remaining_time": "1:14:57", "throughput": 8666.7, "total_tokens": 98952848} +{"current_steps": 146835, "total_steps": 204665, "loss": 0.0001, "lr": 4.482521071993498e-07, "epoch": 3.587203478855691, "percentage": 71.74, "elapsed_time": "3:10:17", "remaining_time": "1:14:56", "throughput": 8666.72, "total_tokens": 98956112} +{"current_steps": 146840, "total_steps": 204665, "loss": 0.0, "lr": 4.481809867540133e-07, "epoch": 3.5873256296875384, "percentage": 71.75, "elapsed_time": "3:10:18", "remaining_time": "1:14:56", "throughput": 8666.75, "total_tokens": 98959376} +{"current_steps": 146845, "total_steps": 204665, "loss": 0.0, "lr": 4.4810987032165257e-07, "epoch": 3.587447780519385, "percentage": 71.75, "elapsed_time": "3:10:18", "remaining_time": "1:14:56", "throughput": 8666.83, "total_tokens": 98963536} +{"current_steps": 146850, "total_steps": 204665, "loss": 0.0, "lr": 4.480387579027853e-07, "epoch": 3.5875699313512324, "percentage": 71.75, "elapsed_time": "3:10:18", "remaining_time": "1:14:55", "throughput": 8666.86, "total_tokens": 98966800} +{"current_steps": 146855, "total_steps": 204665, "loss": 0.0, "lr": 4.479676494979291e-07, "epoch": 3.5876920821830796, "percentage": 71.75, "elapsed_time": "3:10:19", "remaining_time": "1:14:55", "throughput": 8666.89, "total_tokens": 98970192} +{"current_steps": 146860, "total_steps": 204665, "loss": 0.0, "lr": 4.4789654510760023e-07, "epoch": 3.5878142330149267, "percentage": 71.76, "elapsed_time": "3:10:19", "remaining_time": "1:14:54", "throughput": 8666.89, "total_tokens": 98973072} +{"current_steps": 146865, "total_steps": 204665, "loss": 0.0453, "lr": 4.478254447323165e-07, "epoch": 3.587936383846774, "percentage": 71.76, "elapsed_time": "3:10:20", "remaining_time": "1:14:54", "throughput": 8666.91, "total_tokens": 98976272} +{"current_steps": 146870, "total_steps": 204665, "loss": 0.0, "lr": 4.477543483725944e-07, "epoch": 3.588058534678621, "percentage": 71.76, "elapsed_time": "3:10:20", "remaining_time": "1:14:54", "throughput": 8666.95, "total_tokens": 98979728} +{"current_steps": 146875, "total_steps": 204665, "loss": 0.0, "lr": 4.4768325602895116e-07, "epoch": 3.5881806855104683, "percentage": 71.76, "elapsed_time": "3:10:20", "remaining_time": "1:14:53", "throughput": 8666.99, "total_tokens": 98983312} +{"current_steps": 146880, "total_steps": 204665, "loss": 0.0, "lr": 4.476121677019042e-07, "epoch": 3.5883028363423155, "percentage": 71.77, "elapsed_time": "3:10:21", "remaining_time": "1:14:53", "throughput": 8667.01, "total_tokens": 98986512} +{"current_steps": 146885, "total_steps": 204665, "loss": 0.0224, "lr": 4.4754108339196974e-07, "epoch": 3.5884249871741627, "percentage": 71.77, "elapsed_time": "3:10:21", "remaining_time": "1:14:52", "throughput": 8667.02, "total_tokens": 98989584} +{"current_steps": 146890, "total_steps": 204665, "loss": 0.0, "lr": 4.4747000309966553e-07, "epoch": 3.58854713800601, "percentage": 71.77, "elapsed_time": "3:10:21", "remaining_time": "1:14:52", "throughput": 8667.06, "total_tokens": 98993104} +{"current_steps": 146895, "total_steps": 204665, "loss": 0.0, "lr": 4.4739892682550763e-07, "epoch": 3.588669288837857, "percentage": 71.77, "elapsed_time": "3:10:22", "remaining_time": "1:14:52", "throughput": 8667.08, "total_tokens": 98996240} +{"current_steps": 146900, "total_steps": 204665, "loss": 0.0, "lr": 4.4732785457001375e-07, "epoch": 3.5887914396697043, "percentage": 71.78, "elapsed_time": "3:10:22", "remaining_time": "1:14:51", "throughput": 8667.12, "total_tokens": 98999696} +{"current_steps": 146905, "total_steps": 204665, "loss": 0.0003, "lr": 4.472567863337001e-07, "epoch": 3.5889135905015515, "percentage": 71.78, "elapsed_time": "3:10:22", "remaining_time": "1:14:51", "throughput": 8667.17, "total_tokens": 99003344} +{"current_steps": 146910, "total_steps": 204665, "loss": 0.0001, "lr": 4.4718572211708406e-07, "epoch": 3.5890357413333986, "percentage": 71.78, "elapsed_time": "3:10:23", "remaining_time": "1:14:50", "throughput": 8667.23, "total_tokens": 99007184} +{"current_steps": 146915, "total_steps": 204665, "loss": 0.0, "lr": 4.4711466192068215e-07, "epoch": 3.589157892165246, "percentage": 71.78, "elapsed_time": "3:10:23", "remaining_time": "1:14:50", "throughput": 8667.27, "total_tokens": 99010704} +{"current_steps": 146920, "total_steps": 204665, "loss": 0.0725, "lr": 4.4704360574501075e-07, "epoch": 3.5892800429970926, "percentage": 71.79, "elapsed_time": "3:10:23", "remaining_time": "1:14:50", "throughput": 8667.33, "total_tokens": 99014416} +{"current_steps": 146925, "total_steps": 204665, "loss": 0.0, "lr": 4.469725535905873e-07, "epoch": 3.58940219382894, "percentage": 71.79, "elapsed_time": "3:10:24", "remaining_time": "1:14:49", "throughput": 8667.41, "total_tokens": 99018384} +{"current_steps": 146930, "total_steps": 204665, "loss": 0.0, "lr": 4.4690150545792784e-07, "epoch": 3.589524344660787, "percentage": 71.79, "elapsed_time": "3:10:24", "remaining_time": "1:14:49", "throughput": 8667.41, "total_tokens": 99021392} +{"current_steps": 146935, "total_steps": 204665, "loss": 0.0, "lr": 4.4683046134754976e-07, "epoch": 3.589646495492634, "percentage": 71.79, "elapsed_time": "3:10:24", "remaining_time": "1:14:48", "throughput": 8667.43, "total_tokens": 99024528} +{"current_steps": 146940, "total_steps": 204665, "loss": 0.0545, "lr": 4.46759421259969e-07, "epoch": 3.5897686463244813, "percentage": 71.8, "elapsed_time": "3:10:25", "remaining_time": "1:14:48", "throughput": 8667.43, "total_tokens": 99027536} +{"current_steps": 146945, "total_steps": 204665, "loss": 0.0, "lr": 4.466883851957026e-07, "epoch": 3.5898907971563285, "percentage": 71.8, "elapsed_time": "3:10:25", "remaining_time": "1:14:47", "throughput": 8667.62, "total_tokens": 99033232} +{"current_steps": 146950, "total_steps": 204665, "loss": 0.0001, "lr": 4.466173531552674e-07, "epoch": 3.5900129479881757, "percentage": 71.8, "elapsed_time": "3:10:25", "remaining_time": "1:14:47", "throughput": 8667.64, "total_tokens": 99036368} +{"current_steps": 146955, "total_steps": 204665, "loss": 0.0, "lr": 4.465463251391792e-07, "epoch": 3.590135098820023, "percentage": 71.8, "elapsed_time": "3:10:26", "remaining_time": "1:14:47", "throughput": 8667.66, "total_tokens": 99039632} +{"current_steps": 146960, "total_steps": 204665, "loss": 0.0, "lr": 4.464753011479555e-07, "epoch": 3.59025724965187, "percentage": 71.81, "elapsed_time": "3:10:26", "remaining_time": "1:14:46", "throughput": 8667.7, "total_tokens": 99043152} +{"current_steps": 146965, "total_steps": 204665, "loss": 0.0001, "lr": 4.464042811821118e-07, "epoch": 3.5903794004837173, "percentage": 71.81, "elapsed_time": "3:10:27", "remaining_time": "1:14:46", "throughput": 8667.86, "total_tokens": 99048400} +{"current_steps": 146970, "total_steps": 204665, "loss": 0.0001, "lr": 4.4633326524216517e-07, "epoch": 3.5905015513155645, "percentage": 71.81, "elapsed_time": "3:10:27", "remaining_time": "1:14:45", "throughput": 8667.88, "total_tokens": 99051664} +{"current_steps": 146975, "total_steps": 204665, "loss": 0.0, "lr": 4.462622533286322e-07, "epoch": 3.5906237021474117, "percentage": 71.81, "elapsed_time": "3:10:27", "remaining_time": "1:14:45", "throughput": 8667.88, "total_tokens": 99054608} +{"current_steps": 146980, "total_steps": 204665, "loss": 0.0, "lr": 4.461912454420288e-07, "epoch": 3.590745852979259, "percentage": 71.81, "elapsed_time": "3:10:28", "remaining_time": "1:14:45", "throughput": 8667.93, "total_tokens": 99058320} +{"current_steps": 146985, "total_steps": 204665, "loss": 0.0, "lr": 4.4612024158287196e-07, "epoch": 3.590868003811106, "percentage": 71.82, "elapsed_time": "3:10:28", "remaining_time": "1:14:44", "throughput": 8667.96, "total_tokens": 99061648} +{"current_steps": 146990, "total_steps": 204665, "loss": 0.0, "lr": 4.4604924175167737e-07, "epoch": 3.5909901546429532, "percentage": 71.82, "elapsed_time": "3:10:28", "remaining_time": "1:14:44", "throughput": 8668.0, "total_tokens": 99065104} +{"current_steps": 146995, "total_steps": 204665, "loss": 0.0, "lr": 4.4597824594896194e-07, "epoch": 3.5911123054748004, "percentage": 71.82, "elapsed_time": "3:10:29", "remaining_time": "1:14:43", "throughput": 8668.03, "total_tokens": 99068432} +{"current_steps": 147000, "total_steps": 204665, "loss": 0.0, "lr": 4.459072541752414e-07, "epoch": 3.5912344563066476, "percentage": 71.82, "elapsed_time": "3:10:29", "remaining_time": "1:14:43", "throughput": 8668.07, "total_tokens": 99071888} +{"current_steps": 147005, "total_steps": 204665, "loss": 0.1026, "lr": 4.4583626643103276e-07, "epoch": 3.5913566071384944, "percentage": 71.83, "elapsed_time": "3:10:29", "remaining_time": "1:14:43", "throughput": 8668.09, "total_tokens": 99075152} +{"current_steps": 147010, "total_steps": 204665, "loss": 0.0, "lr": 4.4576528271685143e-07, "epoch": 3.591478757970342, "percentage": 71.83, "elapsed_time": "3:10:30", "remaining_time": "1:14:42", "throughput": 8668.12, "total_tokens": 99078608} +{"current_steps": 147015, "total_steps": 204665, "loss": 0.0, "lr": 4.4569430303321445e-07, "epoch": 3.5916009088021887, "percentage": 71.83, "elapsed_time": "3:10:30", "remaining_time": "1:14:42", "throughput": 8668.17, "total_tokens": 99082256} +{"current_steps": 147020, "total_steps": 204665, "loss": 0.0003, "lr": 4.456233273806376e-07, "epoch": 3.5917230596340364, "percentage": 71.83, "elapsed_time": "3:10:30", "remaining_time": "1:14:41", "throughput": 8668.2, "total_tokens": 99085520} +{"current_steps": 147025, "total_steps": 204665, "loss": 0.0, "lr": 4.4555235575963655e-07, "epoch": 3.591845210465883, "percentage": 71.84, "elapsed_time": "3:10:31", "remaining_time": "1:14:41", "throughput": 8668.24, "total_tokens": 99089040} +{"current_steps": 147030, "total_steps": 204665, "loss": 0.0, "lr": 4.45481388170728e-07, "epoch": 3.5919673612977303, "percentage": 71.84, "elapsed_time": "3:10:31", "remaining_time": "1:14:41", "throughput": 8668.28, "total_tokens": 99092560} +{"current_steps": 147035, "total_steps": 204665, "loss": 0.0414, "lr": 4.4541042461442824e-07, "epoch": 3.5920895121295775, "percentage": 71.84, "elapsed_time": "3:10:31", "remaining_time": "1:14:40", "throughput": 8668.31, "total_tokens": 99095952} +{"current_steps": 147040, "total_steps": 204665, "loss": 0.0, "lr": 4.4533946509125267e-07, "epoch": 3.5922116629614247, "percentage": 71.84, "elapsed_time": "3:10:32", "remaining_time": "1:14:40", "throughput": 8668.36, "total_tokens": 99099536} +{"current_steps": 147045, "total_steps": 204665, "loss": 0.0, "lr": 4.4526850960171813e-07, "epoch": 3.592333813793272, "percentage": 71.85, "elapsed_time": "3:10:32", "remaining_time": "1:14:39", "throughput": 8668.36, "total_tokens": 99102544} +{"current_steps": 147050, "total_steps": 204665, "loss": 0.0, "lr": 4.4519755814633974e-07, "epoch": 3.592455964625119, "percentage": 71.85, "elapsed_time": "3:10:33", "remaining_time": "1:14:39", "throughput": 8668.37, "total_tokens": 99105616} +{"current_steps": 147055, "total_steps": 204665, "loss": 0.0, "lr": 4.451266107256344e-07, "epoch": 3.5925781154569663, "percentage": 71.85, "elapsed_time": "3:10:33", "remaining_time": "1:14:39", "throughput": 8668.4, "total_tokens": 99109008} +{"current_steps": 147060, "total_steps": 204665, "loss": 0.0304, "lr": 4.4505566734011713e-07, "epoch": 3.5927002662888134, "percentage": 71.85, "elapsed_time": "3:10:33", "remaining_time": "1:14:38", "throughput": 8668.4, "total_tokens": 99111888} +{"current_steps": 147065, "total_steps": 204665, "loss": 0.0898, "lr": 4.449847279903044e-07, "epoch": 3.5928224171206606, "percentage": 71.86, "elapsed_time": "3:10:34", "remaining_time": "1:14:38", "throughput": 8668.42, "total_tokens": 99115024} +{"current_steps": 147070, "total_steps": 204665, "loss": 0.0, "lr": 4.4491379267671227e-07, "epoch": 3.592944567952508, "percentage": 71.86, "elapsed_time": "3:10:34", "remaining_time": "1:14:37", "throughput": 8668.44, "total_tokens": 99118288} +{"current_steps": 147075, "total_steps": 204665, "loss": 0.0, "lr": 4.4484286139985604e-07, "epoch": 3.593066718784355, "percentage": 71.86, "elapsed_time": "3:10:34", "remaining_time": "1:14:37", "throughput": 8668.47, "total_tokens": 99121744} +{"current_steps": 147080, "total_steps": 204665, "loss": 0.0, "lr": 4.447719341602522e-07, "epoch": 3.593188869616202, "percentage": 71.86, "elapsed_time": "3:10:35", "remaining_time": "1:14:37", "throughput": 8668.5, "total_tokens": 99125136} +{"current_steps": 147085, "total_steps": 204665, "loss": 0.0, "lr": 4.4470101095841584e-07, "epoch": 3.5933110204480494, "percentage": 71.87, "elapsed_time": "3:10:35", "remaining_time": "1:14:36", "throughput": 8668.52, "total_tokens": 99128336} +{"current_steps": 147090, "total_steps": 204665, "loss": 0.0, "lr": 4.4463009179486344e-07, "epoch": 3.593433171279896, "percentage": 71.87, "elapsed_time": "3:10:35", "remaining_time": "1:14:36", "throughput": 8668.56, "total_tokens": 99131728} +{"current_steps": 147095, "total_steps": 204665, "loss": 0.0, "lr": 4.4455917667011e-07, "epoch": 3.5935553221117438, "percentage": 71.87, "elapsed_time": "3:10:36", "remaining_time": "1:14:35", "throughput": 8668.62, "total_tokens": 99135568} +{"current_steps": 147100, "total_steps": 204665, "loss": 0.0001, "lr": 4.444882655846717e-07, "epoch": 3.5936774729435905, "percentage": 71.87, "elapsed_time": "3:10:36", "remaining_time": "1:14:35", "throughput": 8668.62, "total_tokens": 99138512} +{"current_steps": 147105, "total_steps": 204665, "loss": 0.0, "lr": 4.4441735853906437e-07, "epoch": 3.593799623775438, "percentage": 71.88, "elapsed_time": "3:10:36", "remaining_time": "1:14:35", "throughput": 8668.63, "total_tokens": 99141648} +{"current_steps": 147110, "total_steps": 204665, "loss": 0.0524, "lr": 4.443464555338031e-07, "epoch": 3.593921774607285, "percentage": 71.88, "elapsed_time": "3:10:37", "remaining_time": "1:14:34", "throughput": 8668.63, "total_tokens": 99144592} +{"current_steps": 147115, "total_steps": 204665, "loss": 0.0, "lr": 4.4427555656940443e-07, "epoch": 3.594043925439132, "percentage": 71.88, "elapsed_time": "3:10:37", "remaining_time": "1:14:34", "throughput": 8668.64, "total_tokens": 99147664} +{"current_steps": 147120, "total_steps": 204665, "loss": 0.0001, "lr": 4.4420466164638316e-07, "epoch": 3.5941660762709793, "percentage": 71.88, "elapsed_time": "3:10:37", "remaining_time": "1:14:33", "throughput": 8668.65, "total_tokens": 99150736} +{"current_steps": 147125, "total_steps": 204665, "loss": 0.0, "lr": 4.4413377076525484e-07, "epoch": 3.5942882271028265, "percentage": 71.89, "elapsed_time": "3:10:38", "remaining_time": "1:14:33", "throughput": 8668.67, "total_tokens": 99153936} +{"current_steps": 147130, "total_steps": 204665, "loss": 0.0001, "lr": 4.4406288392653556e-07, "epoch": 3.5944103779346737, "percentage": 71.89, "elapsed_time": "3:10:38", "remaining_time": "1:14:33", "throughput": 8668.69, "total_tokens": 99157136} +{"current_steps": 147135, "total_steps": 204665, "loss": 0.0, "lr": 4.4399200113074006e-07, "epoch": 3.594532528766521, "percentage": 71.89, "elapsed_time": "3:10:38", "remaining_time": "1:14:32", "throughput": 8668.72, "total_tokens": 99160592} +{"current_steps": 147140, "total_steps": 204665, "loss": 0.0286, "lr": 4.439211223783849e-07, "epoch": 3.594654679598368, "percentage": 71.89, "elapsed_time": "3:10:39", "remaining_time": "1:14:32", "throughput": 8668.76, "total_tokens": 99164112} +{"current_steps": 147145, "total_steps": 204665, "loss": 0.0962, "lr": 4.4385024766998426e-07, "epoch": 3.5947768304302152, "percentage": 71.9, "elapsed_time": "3:10:39", "remaining_time": "1:14:31", "throughput": 8668.76, "total_tokens": 99167120} +{"current_steps": 147150, "total_steps": 204665, "loss": 0.0, "lr": 4.4377937700605473e-07, "epoch": 3.5948989812620624, "percentage": 71.9, "elapsed_time": "3:10:39", "remaining_time": "1:14:31", "throughput": 8668.79, "total_tokens": 99170448} +{"current_steps": 147155, "total_steps": 204665, "loss": 0.0, "lr": 4.437085103871108e-07, "epoch": 3.5950211320939096, "percentage": 71.9, "elapsed_time": "3:10:40", "remaining_time": "1:14:31", "throughput": 8668.82, "total_tokens": 99173776} +{"current_steps": 147160, "total_steps": 204665, "loss": 0.0, "lr": 4.4363764781366817e-07, "epoch": 3.595143282925757, "percentage": 71.9, "elapsed_time": "3:10:40", "remaining_time": "1:14:30", "throughput": 8668.87, "total_tokens": 99177360} +{"current_steps": 147165, "total_steps": 204665, "loss": 0.0, "lr": 4.435667892862426e-07, "epoch": 3.595265433757604, "percentage": 71.91, "elapsed_time": "3:10:40", "remaining_time": "1:14:30", "throughput": 8668.9, "total_tokens": 99180752} +{"current_steps": 147170, "total_steps": 204665, "loss": 0.0052, "lr": 4.4349593480534854e-07, "epoch": 3.595387584589451, "percentage": 71.91, "elapsed_time": "3:10:41", "remaining_time": "1:14:29", "throughput": 8668.94, "total_tokens": 99184208} +{"current_steps": 147175, "total_steps": 204665, "loss": 0.0, "lr": 4.4342508437150214e-07, "epoch": 3.5955097354212984, "percentage": 71.91, "elapsed_time": "3:10:41", "remaining_time": "1:14:29", "throughput": 8668.96, "total_tokens": 99187536} +{"current_steps": 147180, "total_steps": 204665, "loss": 0.0, "lr": 4.43354237985218e-07, "epoch": 3.5956318862531456, "percentage": 71.91, "elapsed_time": "3:10:42", "remaining_time": "1:14:28", "throughput": 8668.98, "total_tokens": 99190736} +{"current_steps": 147185, "total_steps": 204665, "loss": 0.0001, "lr": 4.4328339564701143e-07, "epoch": 3.5957540370849923, "percentage": 71.92, "elapsed_time": "3:10:42", "remaining_time": "1:14:28", "throughput": 8669.03, "total_tokens": 99194448} +{"current_steps": 147190, "total_steps": 204665, "loss": 0.0, "lr": 4.4321255735739816e-07, "epoch": 3.59587618791684, "percentage": 71.92, "elapsed_time": "3:10:42", "remaining_time": "1:14:28", "throughput": 8669.05, "total_tokens": 99197584} +{"current_steps": 147195, "total_steps": 204665, "loss": 0.0003, "lr": 4.4314172311689244e-07, "epoch": 3.5959983387486867, "percentage": 71.92, "elapsed_time": "3:10:43", "remaining_time": "1:14:27", "throughput": 8669.12, "total_tokens": 99201616} +{"current_steps": 147200, "total_steps": 204665, "loss": 0.0, "lr": 4.430708929260104e-07, "epoch": 3.5961204895805343, "percentage": 71.92, "elapsed_time": "3:10:43", "remaining_time": "1:14:27", "throughput": 8669.15, "total_tokens": 99204944} +{"current_steps": 147205, "total_steps": 204665, "loss": 0.0, "lr": 4.4300006678526615e-07, "epoch": 3.596242640412381, "percentage": 71.92, "elapsed_time": "3:10:43", "remaining_time": "1:14:26", "throughput": 8669.18, "total_tokens": 99208272} +{"current_steps": 147210, "total_steps": 204665, "loss": 0.0, "lr": 4.429292446951757e-07, "epoch": 3.5963647912442283, "percentage": 71.93, "elapsed_time": "3:10:44", "remaining_time": "1:14:26", "throughput": 8669.21, "total_tokens": 99211664} +{"current_steps": 147215, "total_steps": 204665, "loss": 0.0, "lr": 4.4285842665625317e-07, "epoch": 3.5964869420760754, "percentage": 71.93, "elapsed_time": "3:10:44", "remaining_time": "1:14:26", "throughput": 8669.23, "total_tokens": 99214992} +{"current_steps": 147220, "total_steps": 204665, "loss": 0.0, "lr": 4.4278761266901444e-07, "epoch": 3.5966090929079226, "percentage": 71.93, "elapsed_time": "3:10:44", "remaining_time": "1:14:25", "throughput": 8669.26, "total_tokens": 99218384} +{"current_steps": 147225, "total_steps": 204665, "loss": 0.0567, "lr": 4.4271680273397404e-07, "epoch": 3.59673124373977, "percentage": 71.93, "elapsed_time": "3:10:45", "remaining_time": "1:14:25", "throughput": 8669.3, "total_tokens": 99221840} +{"current_steps": 147230, "total_steps": 204665, "loss": 0.0, "lr": 4.426459968516466e-07, "epoch": 3.596853394571617, "percentage": 71.94, "elapsed_time": "3:10:45", "remaining_time": "1:14:24", "throughput": 8669.3, "total_tokens": 99224784} +{"current_steps": 147235, "total_steps": 204665, "loss": 0.0, "lr": 4.425751950225477e-07, "epoch": 3.596975545403464, "percentage": 71.94, "elapsed_time": "3:10:45", "remaining_time": "1:14:24", "throughput": 8669.29, "total_tokens": 99227536} +{"current_steps": 147240, "total_steps": 204665, "loss": 0.0, "lr": 4.425043972471916e-07, "epoch": 3.5970976962353114, "percentage": 71.94, "elapsed_time": "3:10:46", "remaining_time": "1:14:24", "throughput": 8669.34, "total_tokens": 99231184} +{"current_steps": 147245, "total_steps": 204665, "loss": 0.0, "lr": 4.424336035260937e-07, "epoch": 3.5972198470671586, "percentage": 71.94, "elapsed_time": "3:10:46", "remaining_time": "1:14:23", "throughput": 8669.39, "total_tokens": 99234768} +{"current_steps": 147250, "total_steps": 204665, "loss": 0.0, "lr": 4.423628138597684e-07, "epoch": 3.5973419978990058, "percentage": 71.95, "elapsed_time": "3:10:46", "remaining_time": "1:14:23", "throughput": 8669.41, "total_tokens": 99238032} +{"current_steps": 147255, "total_steps": 204665, "loss": 0.0, "lr": 4.4229202824873056e-07, "epoch": 3.597464148730853, "percentage": 71.95, "elapsed_time": "3:10:47", "remaining_time": "1:14:22", "throughput": 8669.42, "total_tokens": 99241168} +{"current_steps": 147260, "total_steps": 204665, "loss": 0.0035, "lr": 4.422212466934956e-07, "epoch": 3.5975862995627, "percentage": 71.95, "elapsed_time": "3:10:47", "remaining_time": "1:14:22", "throughput": 8669.45, "total_tokens": 99244560} +{"current_steps": 147265, "total_steps": 204665, "loss": 0.0001, "lr": 4.421504691945773e-07, "epoch": 3.5977084503945473, "percentage": 71.95, "elapsed_time": "3:10:47", "remaining_time": "1:14:22", "throughput": 8669.48, "total_tokens": 99247824} +{"current_steps": 147270, "total_steps": 204665, "loss": 0.075, "lr": 4.4207969575249126e-07, "epoch": 3.597830601226394, "percentage": 71.96, "elapsed_time": "3:10:48", "remaining_time": "1:14:21", "throughput": 8669.51, "total_tokens": 99251280} +{"current_steps": 147275, "total_steps": 204665, "loss": 0.0, "lr": 4.420089263677513e-07, "epoch": 3.5979527520582417, "percentage": 71.96, "elapsed_time": "3:10:48", "remaining_time": "1:14:21", "throughput": 8669.55, "total_tokens": 99254800} +{"current_steps": 147280, "total_steps": 204665, "loss": 0.0, "lr": 4.4193816104087257e-07, "epoch": 3.5980749028900885, "percentage": 71.96, "elapsed_time": "3:10:49", "remaining_time": "1:14:20", "throughput": 8669.59, "total_tokens": 99258256} +{"current_steps": 147285, "total_steps": 204665, "loss": 0.0, "lr": 4.4186739977236997e-07, "epoch": 3.598197053721936, "percentage": 71.96, "elapsed_time": "3:10:49", "remaining_time": "1:14:20", "throughput": 8669.6, "total_tokens": 99261392} +{"current_steps": 147290, "total_steps": 204665, "loss": 0.0, "lr": 4.417966425627574e-07, "epoch": 3.598319204553783, "percentage": 71.97, "elapsed_time": "3:10:49", "remaining_time": "1:14:20", "throughput": 8669.62, "total_tokens": 99264592} +{"current_steps": 147295, "total_steps": 204665, "loss": 0.0326, "lr": 4.417258894125502e-07, "epoch": 3.59844135538563, "percentage": 71.97, "elapsed_time": "3:10:50", "remaining_time": "1:14:19", "throughput": 8669.65, "total_tokens": 99268048} +{"current_steps": 147300, "total_steps": 204665, "loss": 0.0001, "lr": 4.4165514032226205e-07, "epoch": 3.5985635062174772, "percentage": 71.97, "elapsed_time": "3:10:50", "remaining_time": "1:14:19", "throughput": 8669.69, "total_tokens": 99271504} +{"current_steps": 147305, "total_steps": 204665, "loss": 0.0003, "lr": 4.4158439529240834e-07, "epoch": 3.5986856570493244, "percentage": 71.97, "elapsed_time": "3:10:50", "remaining_time": "1:14:18", "throughput": 8669.72, "total_tokens": 99274768} +{"current_steps": 147310, "total_steps": 204665, "loss": 0.0, "lr": 4.4151365432350264e-07, "epoch": 3.5988078078811716, "percentage": 71.98, "elapsed_time": "3:10:51", "remaining_time": "1:14:18", "throughput": 8669.76, "total_tokens": 99278416} +{"current_steps": 147315, "total_steps": 204665, "loss": 0.0, "lr": 4.414429174160603e-07, "epoch": 3.598929958713019, "percentage": 71.98, "elapsed_time": "3:10:51", "remaining_time": "1:14:18", "throughput": 8669.79, "total_tokens": 99281680} +{"current_steps": 147320, "total_steps": 204665, "loss": 0.0, "lr": 4.4137218457059487e-07, "epoch": 3.599052109544866, "percentage": 71.98, "elapsed_time": "3:10:51", "remaining_time": "1:14:17", "throughput": 8669.82, "total_tokens": 99285072} +{"current_steps": 147325, "total_steps": 204665, "loss": 0.0224, "lr": 4.413014557876216e-07, "epoch": 3.599174260376713, "percentage": 71.98, "elapsed_time": "3:10:52", "remaining_time": "1:14:17", "throughput": 8669.83, "total_tokens": 99288144} +{"current_steps": 147330, "total_steps": 204665, "loss": 0.0, "lr": 4.412307310676544e-07, "epoch": 3.5992964112085604, "percentage": 71.99, "elapsed_time": "3:10:52", "remaining_time": "1:14:16", "throughput": 8669.86, "total_tokens": 99291472} +{"current_steps": 147335, "total_steps": 204665, "loss": 0.0, "lr": 4.4116001041120723e-07, "epoch": 3.5994185620404076, "percentage": 71.99, "elapsed_time": "3:10:52", "remaining_time": "1:14:16", "throughput": 8669.9, "total_tokens": 99294992} +{"current_steps": 147340, "total_steps": 204665, "loss": 0.0, "lr": 4.410892938187948e-07, "epoch": 3.5995407128722547, "percentage": 71.99, "elapsed_time": "3:10:53", "remaining_time": "1:14:16", "throughput": 8669.93, "total_tokens": 99298448} +{"current_steps": 147345, "total_steps": 204665, "loss": 0.0, "lr": 4.410185812909316e-07, "epoch": 3.599662863704102, "percentage": 71.99, "elapsed_time": "3:10:53", "remaining_time": "1:14:15", "throughput": 8669.95, "total_tokens": 99301584} +{"current_steps": 147350, "total_steps": 204665, "loss": 0.0, "lr": 4.409478728281314e-07, "epoch": 3.599785014535949, "percentage": 72.0, "elapsed_time": "3:10:53", "remaining_time": "1:14:15", "throughput": 8670.01, "total_tokens": 99305424} +{"current_steps": 147355, "total_steps": 204665, "loss": 0.0, "lr": 4.4087716843090897e-07, "epoch": 3.5999071653677963, "percentage": 72.0, "elapsed_time": "3:10:54", "remaining_time": "1:14:14", "throughput": 8670.06, "total_tokens": 99308944} +{"current_steps": 147360, "total_steps": 204665, "loss": 0.0, "lr": 4.408064680997777e-07, "epoch": 3.6000293161996435, "percentage": 72.0, "elapsed_time": "3:10:54", "remaining_time": "1:14:14", "throughput": 8670.14, "total_tokens": 99313040} +{"current_steps": 147365, "total_steps": 204665, "loss": 0.0, "lr": 4.407357718352527e-07, "epoch": 3.6001514670314902, "percentage": 72.0, "elapsed_time": "3:10:54", "remaining_time": "1:14:14", "throughput": 8670.17, "total_tokens": 99316368} +{"current_steps": 147370, "total_steps": 204665, "loss": 0.0, "lr": 4.406650796378472e-07, "epoch": 3.600273617863338, "percentage": 72.01, "elapsed_time": "3:10:55", "remaining_time": "1:14:13", "throughput": 8670.18, "total_tokens": 99319440} +{"current_steps": 147375, "total_steps": 204665, "loss": 0.0, "lr": 4.4059439150807566e-07, "epoch": 3.6003957686951846, "percentage": 72.01, "elapsed_time": "3:10:55", "remaining_time": "1:14:13", "throughput": 8670.22, "total_tokens": 99322960} +{"current_steps": 147380, "total_steps": 204665, "loss": 0.1013, "lr": 4.405237074464525e-07, "epoch": 3.600517919527032, "percentage": 72.01, "elapsed_time": "3:10:55", "remaining_time": "1:14:12", "throughput": 8670.26, "total_tokens": 99326416} +{"current_steps": 147385, "total_steps": 204665, "loss": 0.0, "lr": 4.404530274534911e-07, "epoch": 3.600640070358879, "percentage": 72.01, "elapsed_time": "3:10:56", "remaining_time": "1:14:12", "throughput": 8670.28, "total_tokens": 99329744} +{"current_steps": 147390, "total_steps": 204665, "loss": 0.0, "lr": 4.4038235152970606e-07, "epoch": 3.600762221190726, "percentage": 72.02, "elapsed_time": "3:10:56", "remaining_time": "1:14:12", "throughput": 8670.32, "total_tokens": 99333200} +{"current_steps": 147395, "total_steps": 204665, "loss": 0.0, "lr": 4.403116796756108e-07, "epoch": 3.6008843720225734, "percentage": 72.02, "elapsed_time": "3:10:57", "remaining_time": "1:14:11", "throughput": 8670.37, "total_tokens": 99336784} +{"current_steps": 147400, "total_steps": 204665, "loss": 0.0001, "lr": 4.402410118917199e-07, "epoch": 3.6010065228544206, "percentage": 72.02, "elapsed_time": "3:10:57", "remaining_time": "1:14:11", "throughput": 8670.39, "total_tokens": 99340048} +{"current_steps": 147405, "total_steps": 204665, "loss": 0.0001, "lr": 4.401703481785466e-07, "epoch": 3.6011286736862678, "percentage": 72.02, "elapsed_time": "3:10:57", "remaining_time": "1:14:10", "throughput": 8670.43, "total_tokens": 99343568} +{"current_steps": 147410, "total_steps": 204665, "loss": 0.059, "lr": 4.4009968853660496e-07, "epoch": 3.601250824518115, "percentage": 72.03, "elapsed_time": "3:10:58", "remaining_time": "1:14:10", "throughput": 8670.44, "total_tokens": 99346576} +{"current_steps": 147415, "total_steps": 204665, "loss": 0.0, "lr": 4.4002903296640947e-07, "epoch": 3.601372975349962, "percentage": 72.03, "elapsed_time": "3:10:58", "remaining_time": "1:14:09", "throughput": 8670.46, "total_tokens": 99349840} +{"current_steps": 147420, "total_steps": 204665, "loss": 0.0, "lr": 4.399583814684731e-07, "epoch": 3.6014951261818093, "percentage": 72.03, "elapsed_time": "3:10:58", "remaining_time": "1:14:09", "throughput": 8670.5, "total_tokens": 99353360} +{"current_steps": 147425, "total_steps": 204665, "loss": 0.0001, "lr": 4.3988773404331027e-07, "epoch": 3.6016172770136565, "percentage": 72.03, "elapsed_time": "3:10:59", "remaining_time": "1:14:09", "throughput": 8670.55, "total_tokens": 99356880} +{"current_steps": 147430, "total_steps": 204665, "loss": 0.0, "lr": 4.398170906914346e-07, "epoch": 3.6017394278455037, "percentage": 72.03, "elapsed_time": "3:10:59", "remaining_time": "1:14:08", "throughput": 8670.56, "total_tokens": 99359952} +{"current_steps": 147435, "total_steps": 204665, "loss": 0.0, "lr": 4.397464514133593e-07, "epoch": 3.601861578677351, "percentage": 72.04, "elapsed_time": "3:10:59", "remaining_time": "1:14:08", "throughput": 8670.6, "total_tokens": 99363408} +{"current_steps": 147440, "total_steps": 204665, "loss": 0.0349, "lr": 4.3967581620959893e-07, "epoch": 3.601983729509198, "percentage": 72.04, "elapsed_time": "3:11:00", "remaining_time": "1:14:07", "throughput": 8670.66, "total_tokens": 99367184} +{"current_steps": 147445, "total_steps": 204665, "loss": 0.0002, "lr": 4.3960518508066624e-07, "epoch": 3.6021058803410453, "percentage": 72.04, "elapsed_time": "3:11:00", "remaining_time": "1:14:07", "throughput": 8670.69, "total_tokens": 99370640} +{"current_steps": 147450, "total_steps": 204665, "loss": 0.0122, "lr": 4.3953455802707587e-07, "epoch": 3.602228031172892, "percentage": 72.04, "elapsed_time": "3:11:00", "remaining_time": "1:14:07", "throughput": 8670.71, "total_tokens": 99373904} +{"current_steps": 147455, "total_steps": 204665, "loss": 0.0, "lr": 4.3946393504934054e-07, "epoch": 3.6023501820047397, "percentage": 72.05, "elapsed_time": "3:11:01", "remaining_time": "1:14:06", "throughput": 8670.78, "total_tokens": 99377744} +{"current_steps": 147460, "total_steps": 204665, "loss": 0.0001, "lr": 4.3939331614797457e-07, "epoch": 3.6024723328365864, "percentage": 72.05, "elapsed_time": "3:11:01", "remaining_time": "1:14:06", "throughput": 8670.78, "total_tokens": 99380752} +{"current_steps": 147465, "total_steps": 204665, "loss": 0.0, "lr": 4.393227013234908e-07, "epoch": 3.602594483668434, "percentage": 72.05, "elapsed_time": "3:11:01", "remaining_time": "1:14:05", "throughput": 8670.78, "total_tokens": 99383568} +{"current_steps": 147470, "total_steps": 204665, "loss": 0.0, "lr": 4.392520905764032e-07, "epoch": 3.602716634500281, "percentage": 72.05, "elapsed_time": "3:11:02", "remaining_time": "1:14:05", "throughput": 8670.79, "total_tokens": 99386704} +{"current_steps": 147475, "total_steps": 204665, "loss": 0.0, "lr": 4.391814839072255e-07, "epoch": 3.602838785332128, "percentage": 72.06, "elapsed_time": "3:11:02", "remaining_time": "1:14:05", "throughput": 8670.82, "total_tokens": 99390032} +{"current_steps": 147480, "total_steps": 204665, "loss": 0.0, "lr": 4.3911088131647066e-07, "epoch": 3.602960936163975, "percentage": 72.06, "elapsed_time": "3:11:02", "remaining_time": "1:14:04", "throughput": 8670.82, "total_tokens": 99392976} +{"current_steps": 147485, "total_steps": 204665, "loss": 0.0, "lr": 4.3904028280465267e-07, "epoch": 3.6030830869958224, "percentage": 72.06, "elapsed_time": "3:11:03", "remaining_time": "1:14:04", "throughput": 8670.86, "total_tokens": 99396496} +{"current_steps": 147490, "total_steps": 204665, "loss": 0.0, "lr": 4.389696883722842e-07, "epoch": 3.6032052378276695, "percentage": 72.06, "elapsed_time": "3:11:03", "remaining_time": "1:14:03", "throughput": 8670.93, "total_tokens": 99400400} +{"current_steps": 147495, "total_steps": 204665, "loss": 0.0, "lr": 4.3889909801987955e-07, "epoch": 3.6033273886595167, "percentage": 72.07, "elapsed_time": "3:11:03", "remaining_time": "1:14:03", "throughput": 8671.02, "total_tokens": 99404496} +{"current_steps": 147500, "total_steps": 204665, "loss": 0.0001, "lr": 4.3882851174795113e-07, "epoch": 3.603449539491364, "percentage": 72.07, "elapsed_time": "3:11:04", "remaining_time": "1:14:03", "throughput": 8671.07, "total_tokens": 99408208} +{"current_steps": 147505, "total_steps": 204665, "loss": 0.0001, "lr": 4.387579295570126e-07, "epoch": 3.603571690323211, "percentage": 72.07, "elapsed_time": "3:11:04", "remaining_time": "1:14:02", "throughput": 8671.08, "total_tokens": 99411280} +{"current_steps": 147510, "total_steps": 204665, "loss": 0.0457, "lr": 4.3868735144757787e-07, "epoch": 3.6036938411550583, "percentage": 72.07, "elapsed_time": "3:11:05", "remaining_time": "1:14:02", "throughput": 8671.11, "total_tokens": 99414672} +{"current_steps": 147515, "total_steps": 204665, "loss": 0.0, "lr": 4.3861677742015937e-07, "epoch": 3.6038159919869055, "percentage": 72.08, "elapsed_time": "3:11:05", "remaining_time": "1:14:01", "throughput": 8671.13, "total_tokens": 99417872} +{"current_steps": 147520, "total_steps": 204665, "loss": 0.0001, "lr": 4.3854620747527095e-07, "epoch": 3.6039381428187527, "percentage": 72.08, "elapsed_time": "3:11:05", "remaining_time": "1:14:01", "throughput": 8671.17, "total_tokens": 99421328} +{"current_steps": 147525, "total_steps": 204665, "loss": 0.0, "lr": 4.384756416134251e-07, "epoch": 3.6040602936506, "percentage": 72.08, "elapsed_time": "3:11:06", "remaining_time": "1:14:01", "throughput": 8671.19, "total_tokens": 99424528} +{"current_steps": 147530, "total_steps": 204665, "loss": 0.0, "lr": 4.3840507983513585e-07, "epoch": 3.604182444482447, "percentage": 72.08, "elapsed_time": "3:11:06", "remaining_time": "1:14:00", "throughput": 8671.2, "total_tokens": 99427664} +{"current_steps": 147535, "total_steps": 204665, "loss": 0.0, "lr": 4.383345221409159e-07, "epoch": 3.604304595314294, "percentage": 72.09, "elapsed_time": "3:11:06", "remaining_time": "1:14:00", "throughput": 8671.22, "total_tokens": 99430864} +{"current_steps": 147540, "total_steps": 204665, "loss": 0.1113, "lr": 4.38263968531278e-07, "epoch": 3.6044267461461414, "percentage": 72.09, "elapsed_time": "3:11:07", "remaining_time": "1:13:59", "throughput": 8671.25, "total_tokens": 99434128} +{"current_steps": 147545, "total_steps": 204665, "loss": 0.0002, "lr": 4.38193419006736e-07, "epoch": 3.604548896977988, "percentage": 72.09, "elapsed_time": "3:11:07", "remaining_time": "1:13:59", "throughput": 8671.29, "total_tokens": 99437584} +{"current_steps": 147550, "total_steps": 204665, "loss": 0.0, "lr": 4.381228735678022e-07, "epoch": 3.604671047809836, "percentage": 72.09, "elapsed_time": "3:11:07", "remaining_time": "1:13:59", "throughput": 8671.29, "total_tokens": 99440656} +{"current_steps": 147555, "total_steps": 204665, "loss": 0.0, "lr": 4.380523322149903e-07, "epoch": 3.6047931986416826, "percentage": 72.1, "elapsed_time": "3:11:08", "remaining_time": "1:13:58", "throughput": 8671.36, "total_tokens": 99444560} +{"current_steps": 147560, "total_steps": 204665, "loss": 0.0, "lr": 4.379817949488127e-07, "epoch": 3.6049153494735298, "percentage": 72.1, "elapsed_time": "3:11:08", "remaining_time": "1:13:58", "throughput": 8671.39, "total_tokens": 99447824} +{"current_steps": 147565, "total_steps": 204665, "loss": 0.0001, "lr": 4.3791126176978254e-07, "epoch": 3.605037500305377, "percentage": 72.1, "elapsed_time": "3:11:08", "remaining_time": "1:13:57", "throughput": 8671.49, "total_tokens": 99452304} +{"current_steps": 147570, "total_steps": 204665, "loss": 0.0, "lr": 4.378407326784134e-07, "epoch": 3.605159651137224, "percentage": 72.1, "elapsed_time": "3:11:09", "remaining_time": "1:13:57", "throughput": 8671.54, "total_tokens": 99455888} +{"current_steps": 147575, "total_steps": 204665, "loss": 0.0001, "lr": 4.377702076752171e-07, "epoch": 3.6052818019690713, "percentage": 72.11, "elapsed_time": "3:11:09", "remaining_time": "1:13:57", "throughput": 8671.58, "total_tokens": 99459408} +{"current_steps": 147580, "total_steps": 204665, "loss": 0.0, "lr": 4.3769968676070757e-07, "epoch": 3.6054039528009185, "percentage": 72.11, "elapsed_time": "3:11:09", "remaining_time": "1:13:56", "throughput": 8671.62, "total_tokens": 99462928} +{"current_steps": 147585, "total_steps": 204665, "loss": 0.0, "lr": 4.376291699353968e-07, "epoch": 3.6055261036327657, "percentage": 72.11, "elapsed_time": "3:11:10", "remaining_time": "1:13:56", "throughput": 8671.64, "total_tokens": 99466128} +{"current_steps": 147590, "total_steps": 204665, "loss": 0.0, "lr": 4.375586571997979e-07, "epoch": 3.605648254464613, "percentage": 72.11, "elapsed_time": "3:11:10", "remaining_time": "1:13:55", "throughput": 8671.66, "total_tokens": 99469328} +{"current_steps": 147595, "total_steps": 204665, "loss": 0.0, "lr": 4.374881485544241e-07, "epoch": 3.60577040529646, "percentage": 72.12, "elapsed_time": "3:11:10", "remaining_time": "1:13:55", "throughput": 8671.7, "total_tokens": 99472848} +{"current_steps": 147600, "total_steps": 204665, "loss": 0.0, "lr": 4.3741764399978743e-07, "epoch": 3.6058925561283073, "percentage": 72.12, "elapsed_time": "3:11:11", "remaining_time": "1:13:55", "throughput": 8671.76, "total_tokens": 99476560} +{"current_steps": 147605, "total_steps": 204665, "loss": 0.0, "lr": 4.3734714353640124e-07, "epoch": 3.6060147069601545, "percentage": 72.12, "elapsed_time": "3:11:11", "remaining_time": "1:13:54", "throughput": 8671.8, "total_tokens": 99480016} +{"current_steps": 147610, "total_steps": 204665, "loss": 0.0, "lr": 4.3727664716477773e-07, "epoch": 3.6061368577920017, "percentage": 72.12, "elapsed_time": "3:11:12", "remaining_time": "1:13:54", "throughput": 8671.89, "total_tokens": 99484304} +{"current_steps": 147615, "total_steps": 204665, "loss": 0.0, "lr": 4.372061548854301e-07, "epoch": 3.606259008623849, "percentage": 72.13, "elapsed_time": "3:11:12", "remaining_time": "1:13:53", "throughput": 8671.91, "total_tokens": 99487440} +{"current_steps": 147620, "total_steps": 204665, "loss": 0.0258, "lr": 4.371356666988704e-07, "epoch": 3.606381159455696, "percentage": 72.13, "elapsed_time": "3:11:12", "remaining_time": "1:13:53", "throughput": 8671.92, "total_tokens": 99490640} +{"current_steps": 147625, "total_steps": 204665, "loss": 0.0, "lr": 4.3706518260561175e-07, "epoch": 3.6065033102875432, "percentage": 72.13, "elapsed_time": "3:11:13", "remaining_time": "1:13:53", "throughput": 8671.94, "total_tokens": 99493776} +{"current_steps": 147630, "total_steps": 204665, "loss": 0.0891, "lr": 4.369947026061662e-07, "epoch": 3.60662546111939, "percentage": 72.13, "elapsed_time": "3:11:13", "remaining_time": "1:13:52", "throughput": 8671.96, "total_tokens": 99496912} +{"current_steps": 147635, "total_steps": 204665, "loss": 0.0, "lr": 4.36924226701047e-07, "epoch": 3.6067476119512376, "percentage": 72.13, "elapsed_time": "3:11:13", "remaining_time": "1:13:52", "throughput": 8671.99, "total_tokens": 99500304} +{"current_steps": 147640, "total_steps": 204665, "loss": 0.0, "lr": 4.368537548907663e-07, "epoch": 3.6068697627830844, "percentage": 72.14, "elapsed_time": "3:11:14", "remaining_time": "1:13:51", "throughput": 8672.01, "total_tokens": 99503504} +{"current_steps": 147645, "total_steps": 204665, "loss": 0.0001, "lr": 4.3678328717583614e-07, "epoch": 3.606991913614932, "percentage": 72.14, "elapsed_time": "3:11:14", "remaining_time": "1:13:51", "throughput": 8672.08, "total_tokens": 99507408} +{"current_steps": 147650, "total_steps": 204665, "loss": 0.1092, "lr": 4.3671282355676976e-07, "epoch": 3.6071140644467787, "percentage": 72.14, "elapsed_time": "3:11:14", "remaining_time": "1:13:50", "throughput": 8672.12, "total_tokens": 99510928} +{"current_steps": 147655, "total_steps": 204665, "loss": 0.0476, "lr": 4.3664236403407886e-07, "epoch": 3.607236215278626, "percentage": 72.14, "elapsed_time": "3:11:15", "remaining_time": "1:13:50", "throughput": 8672.14, "total_tokens": 99514192} +{"current_steps": 147660, "total_steps": 204665, "loss": 0.0003, "lr": 4.365719086082762e-07, "epoch": 3.607358366110473, "percentage": 72.15, "elapsed_time": "3:11:15", "remaining_time": "1:13:50", "throughput": 8672.16, "total_tokens": 99517392} +{"current_steps": 147665, "total_steps": 204665, "loss": 0.0533, "lr": 4.365014572798745e-07, "epoch": 3.6074805169423203, "percentage": 72.15, "elapsed_time": "3:11:15", "remaining_time": "1:13:49", "throughput": 8672.19, "total_tokens": 99520784} +{"current_steps": 147670, "total_steps": 204665, "loss": 0.0, "lr": 4.364310100493855e-07, "epoch": 3.6076026677741675, "percentage": 72.15, "elapsed_time": "3:11:16", "remaining_time": "1:13:49", "throughput": 8672.23, "total_tokens": 99524304} +{"current_steps": 147675, "total_steps": 204665, "loss": 0.0, "lr": 4.3636056691732214e-07, "epoch": 3.6077248186060147, "percentage": 72.15, "elapsed_time": "3:11:16", "remaining_time": "1:13:48", "throughput": 8672.26, "total_tokens": 99527632} +{"current_steps": 147680, "total_steps": 204665, "loss": 0.0002, "lr": 4.3629012788419597e-07, "epoch": 3.607846969437862, "percentage": 72.16, "elapsed_time": "3:11:16", "remaining_time": "1:13:48", "throughput": 8672.29, "total_tokens": 99530896} +{"current_steps": 147685, "total_steps": 204665, "loss": 0.0, "lr": 4.3621969295051953e-07, "epoch": 3.607969120269709, "percentage": 72.16, "elapsed_time": "3:11:17", "remaining_time": "1:13:48", "throughput": 8672.3, "total_tokens": 99534032} +{"current_steps": 147690, "total_steps": 204665, "loss": 0.0, "lr": 4.361492621168056e-07, "epoch": 3.6080912711015563, "percentage": 72.16, "elapsed_time": "3:11:17", "remaining_time": "1:13:47", "throughput": 8672.35, "total_tokens": 99537680} +{"current_steps": 147695, "total_steps": 204665, "loss": 0.0, "lr": 4.360788353835654e-07, "epoch": 3.6082134219334034, "percentage": 72.16, "elapsed_time": "3:11:17", "remaining_time": "1:13:47", "throughput": 8672.39, "total_tokens": 99541136} +{"current_steps": 147700, "total_steps": 204665, "loss": 0.0001, "lr": 4.3600841275131204e-07, "epoch": 3.6083355727652506, "percentage": 72.17, "elapsed_time": "3:11:18", "remaining_time": "1:13:46", "throughput": 8672.41, "total_tokens": 99544336} +{"current_steps": 147705, "total_steps": 204665, "loss": 0.0, "lr": 4.359379942205568e-07, "epoch": 3.608457723597098, "percentage": 72.17, "elapsed_time": "3:11:18", "remaining_time": "1:13:46", "throughput": 8672.48, "total_tokens": 99548176} +{"current_steps": 147710, "total_steps": 204665, "loss": 0.0, "lr": 4.3586757979181254e-07, "epoch": 3.608579874428945, "percentage": 72.17, "elapsed_time": "3:11:18", "remaining_time": "1:13:46", "throughput": 8672.5, "total_tokens": 99551376} +{"current_steps": 147715, "total_steps": 204665, "loss": 0.0, "lr": 4.3579716946559064e-07, "epoch": 3.6087020252607918, "percentage": 72.17, "elapsed_time": "3:11:19", "remaining_time": "1:13:45", "throughput": 8672.52, "total_tokens": 99554704} +{"current_steps": 147720, "total_steps": 204665, "loss": 0.0, "lr": 4.357267632424034e-07, "epoch": 3.6088241760926394, "percentage": 72.18, "elapsed_time": "3:11:19", "remaining_time": "1:13:45", "throughput": 8672.56, "total_tokens": 99558224} +{"current_steps": 147725, "total_steps": 204665, "loss": 0.0451, "lr": 4.356563611227634e-07, "epoch": 3.608946326924486, "percentage": 72.18, "elapsed_time": "3:11:20", "remaining_time": "1:13:44", "throughput": 8672.56, "total_tokens": 99561168} +{"current_steps": 147730, "total_steps": 204665, "loss": 0.0, "lr": 4.3558596310718166e-07, "epoch": 3.6090684777563338, "percentage": 72.18, "elapsed_time": "3:11:20", "remaining_time": "1:13:44", "throughput": 8672.6, "total_tokens": 99564560} +{"current_steps": 147735, "total_steps": 204665, "loss": 0.0, "lr": 4.355155691961711e-07, "epoch": 3.6091906285881805, "percentage": 72.18, "elapsed_time": "3:11:20", "remaining_time": "1:13:44", "throughput": 8672.61, "total_tokens": 99567696} +{"current_steps": 147740, "total_steps": 204665, "loss": 0.0, "lr": 4.3544517939024304e-07, "epoch": 3.6093127794200277, "percentage": 72.19, "elapsed_time": "3:11:21", "remaining_time": "1:13:43", "throughput": 8672.61, "total_tokens": 99570640} +{"current_steps": 147745, "total_steps": 204665, "loss": 0.0001, "lr": 4.3537479368990917e-07, "epoch": 3.609434930251875, "percentage": 72.19, "elapsed_time": "3:11:21", "remaining_time": "1:13:43", "throughput": 8672.65, "total_tokens": 99574096} +{"current_steps": 147750, "total_steps": 204665, "loss": 0.0, "lr": 4.353044120956819e-07, "epoch": 3.609557081083722, "percentage": 72.19, "elapsed_time": "3:11:21", "remaining_time": "1:13:42", "throughput": 8672.69, "total_tokens": 99577616} +{"current_steps": 147755, "total_steps": 204665, "loss": 0.0, "lr": 4.352340346080726e-07, "epoch": 3.6096792319155693, "percentage": 72.19, "elapsed_time": "3:11:22", "remaining_time": "1:13:42", "throughput": 8672.76, "total_tokens": 99581456} +{"current_steps": 147760, "total_steps": 204665, "loss": 0.0001, "lr": 4.351636612275936e-07, "epoch": 3.6098013827474165, "percentage": 72.2, "elapsed_time": "3:11:22", "remaining_time": "1:13:42", "throughput": 8672.79, "total_tokens": 99584912} +{"current_steps": 147765, "total_steps": 204665, "loss": 0.0, "lr": 4.350932919547561e-07, "epoch": 3.6099235335792637, "percentage": 72.2, "elapsed_time": "3:11:22", "remaining_time": "1:13:41", "throughput": 8672.81, "total_tokens": 99588112} +{"current_steps": 147770, "total_steps": 204665, "loss": 0.0, "lr": 4.350229267900725e-07, "epoch": 3.610045684411111, "percentage": 72.2, "elapsed_time": "3:11:23", "remaining_time": "1:13:41", "throughput": 8672.91, "total_tokens": 99592400} +{"current_steps": 147775, "total_steps": 204665, "loss": 0.0, "lr": 4.349525657340536e-07, "epoch": 3.610167835242958, "percentage": 72.2, "elapsed_time": "3:11:23", "remaining_time": "1:13:40", "throughput": 8672.93, "total_tokens": 99595664} +{"current_steps": 147780, "total_steps": 204665, "loss": 0.0005, "lr": 4.348822087872117e-07, "epoch": 3.6102899860748052, "percentage": 72.21, "elapsed_time": "3:11:23", "remaining_time": "1:13:40", "throughput": 8672.93, "total_tokens": 99598608} +{"current_steps": 147785, "total_steps": 204665, "loss": 0.0711, "lr": 4.3481185595005875e-07, "epoch": 3.6104121369066524, "percentage": 72.21, "elapsed_time": "3:11:24", "remaining_time": "1:13:40", "throughput": 8672.94, "total_tokens": 99601616} +{"current_steps": 147790, "total_steps": 204665, "loss": 0.0001, "lr": 4.347415072231058e-07, "epoch": 3.6105342877384996, "percentage": 72.21, "elapsed_time": "3:11:24", "remaining_time": "1:13:39", "throughput": 8672.97, "total_tokens": 99604944} +{"current_steps": 147795, "total_steps": 204665, "loss": 0.0344, "lr": 4.3467116260686474e-07, "epoch": 3.610656438570347, "percentage": 72.21, "elapsed_time": "3:11:24", "remaining_time": "1:13:39", "throughput": 8673.0, "total_tokens": 99608272} +{"current_steps": 147800, "total_steps": 204665, "loss": 0.0, "lr": 4.3460082210184677e-07, "epoch": 3.610778589402194, "percentage": 72.22, "elapsed_time": "3:11:25", "remaining_time": "1:13:38", "throughput": 8673.03, "total_tokens": 99611664} +{"current_steps": 147805, "total_steps": 204665, "loss": 0.0, "lr": 4.345304857085642e-07, "epoch": 3.610900740234041, "percentage": 72.22, "elapsed_time": "3:11:25", "remaining_time": "1:13:38", "throughput": 8673.04, "total_tokens": 99614800} +{"current_steps": 147810, "total_steps": 204665, "loss": 0.0, "lr": 4.344601534275275e-07, "epoch": 3.611022891065888, "percentage": 72.22, "elapsed_time": "3:11:25", "remaining_time": "1:13:38", "throughput": 8673.06, "total_tokens": 99618000} +{"current_steps": 147815, "total_steps": 204665, "loss": 0.0, "lr": 4.3438982525924874e-07, "epoch": 3.6111450418977356, "percentage": 72.22, "elapsed_time": "3:11:26", "remaining_time": "1:13:37", "throughput": 8673.1, "total_tokens": 99621456} +{"current_steps": 147820, "total_steps": 204665, "loss": 0.0, "lr": 4.3431950120423963e-07, "epoch": 3.6112671927295823, "percentage": 72.23, "elapsed_time": "3:11:26", "remaining_time": "1:13:37", "throughput": 8673.14, "total_tokens": 99625040} +{"current_steps": 147825, "total_steps": 204665, "loss": 0.0, "lr": 4.342491812630109e-07, "epoch": 3.6113893435614295, "percentage": 72.23, "elapsed_time": "3:11:26", "remaining_time": "1:13:36", "throughput": 8673.2, "total_tokens": 99628752} +{"current_steps": 147830, "total_steps": 204665, "loss": 0.0, "lr": 4.3417886543607474e-07, "epoch": 3.6115114943932767, "percentage": 72.23, "elapsed_time": "3:11:27", "remaining_time": "1:13:36", "throughput": 8673.25, "total_tokens": 99632464} +{"current_steps": 147835, "total_steps": 204665, "loss": 0.0775, "lr": 4.341085537239416e-07, "epoch": 3.611633645225124, "percentage": 72.23, "elapsed_time": "3:11:27", "remaining_time": "1:13:36", "throughput": 8673.26, "total_tokens": 99635472} +{"current_steps": 147840, "total_steps": 204665, "loss": 0.0, "lr": 4.340382461271237e-07, "epoch": 3.611755796056971, "percentage": 72.24, "elapsed_time": "3:11:28", "remaining_time": "1:13:35", "throughput": 8673.29, "total_tokens": 99638928} +{"current_steps": 147845, "total_steps": 204665, "loss": 0.0, "lr": 4.339679426461319e-07, "epoch": 3.6118779468888182, "percentage": 72.24, "elapsed_time": "3:11:28", "remaining_time": "1:13:35", "throughput": 8673.32, "total_tokens": 99642320} +{"current_steps": 147850, "total_steps": 204665, "loss": 0.0, "lr": 4.3389764328147706e-07, "epoch": 3.6120000977206654, "percentage": 72.24, "elapsed_time": "3:11:28", "remaining_time": "1:13:34", "throughput": 8673.4, "total_tokens": 99646352} +{"current_steps": 147855, "total_steps": 204665, "loss": 0.0453, "lr": 4.338273480336712e-07, "epoch": 3.6121222485525126, "percentage": 72.24, "elapsed_time": "3:11:29", "remaining_time": "1:13:34", "throughput": 8673.42, "total_tokens": 99649552} +{"current_steps": 147860, "total_steps": 204665, "loss": 0.0, "lr": 4.3375705690322474e-07, "epoch": 3.61224439938436, "percentage": 72.24, "elapsed_time": "3:11:29", "remaining_time": "1:13:34", "throughput": 8673.47, "total_tokens": 99653136} +{"current_steps": 147865, "total_steps": 204665, "loss": 0.0, "lr": 4.336867698906497e-07, "epoch": 3.612366550216207, "percentage": 72.25, "elapsed_time": "3:11:29", "remaining_time": "1:13:33", "throughput": 8673.45, "total_tokens": 99655888} +{"current_steps": 147870, "total_steps": 204665, "loss": 0.0, "lr": 4.3361648699645623e-07, "epoch": 3.612488701048054, "percentage": 72.25, "elapsed_time": "3:11:30", "remaining_time": "1:13:33", "throughput": 8673.49, "total_tokens": 99659408} +{"current_steps": 147875, "total_steps": 204665, "loss": 0.0, "lr": 4.335462082211563e-07, "epoch": 3.6126108518799014, "percentage": 72.25, "elapsed_time": "3:11:30", "remaining_time": "1:13:32", "throughput": 8673.56, "total_tokens": 99663184} +{"current_steps": 147880, "total_steps": 204665, "loss": 0.0002, "lr": 4.3347593356526093e-07, "epoch": 3.6127330027117486, "percentage": 72.25, "elapsed_time": "3:11:30", "remaining_time": "1:13:32", "throughput": 8673.58, "total_tokens": 99666512} +{"current_steps": 147885, "total_steps": 204665, "loss": 0.0, "lr": 4.334056630292805e-07, "epoch": 3.6128551535435958, "percentage": 72.26, "elapsed_time": "3:11:31", "remaining_time": "1:13:31", "throughput": 8673.61, "total_tokens": 99669840} +{"current_steps": 147890, "total_steps": 204665, "loss": 0.0001, "lr": 4.33335396613727e-07, "epoch": 3.612977304375443, "percentage": 72.26, "elapsed_time": "3:11:31", "remaining_time": "1:13:31", "throughput": 8673.64, "total_tokens": 99673232} +{"current_steps": 147895, "total_steps": 204665, "loss": 0.0001, "lr": 4.332651343191104e-07, "epoch": 3.6130994552072897, "percentage": 72.26, "elapsed_time": "3:11:31", "remaining_time": "1:13:31", "throughput": 8673.65, "total_tokens": 99676304} +{"current_steps": 147900, "total_steps": 204665, "loss": 0.0009, "lr": 4.331948761459423e-07, "epoch": 3.6132216060391373, "percentage": 72.26, "elapsed_time": "3:11:32", "remaining_time": "1:13:30", "throughput": 8673.67, "total_tokens": 99679568} +{"current_steps": 147905, "total_steps": 204665, "loss": 0.0, "lr": 4.331246220947338e-07, "epoch": 3.613343756870984, "percentage": 72.27, "elapsed_time": "3:11:32", "remaining_time": "1:13:30", "throughput": 8673.7, "total_tokens": 99682896} +{"current_steps": 147910, "total_steps": 204665, "loss": 0.0, "lr": 4.3305437216599517e-07, "epoch": 3.6134659077028317, "percentage": 72.27, "elapsed_time": "3:11:32", "remaining_time": "1:13:29", "throughput": 8673.76, "total_tokens": 99686544} +{"current_steps": 147915, "total_steps": 204665, "loss": 0.0001, "lr": 4.3298412636023797e-07, "epoch": 3.6135880585346785, "percentage": 72.27, "elapsed_time": "3:11:33", "remaining_time": "1:13:29", "throughput": 8673.8, "total_tokens": 99690192} +{"current_steps": 147920, "total_steps": 204665, "loss": 0.0, "lr": 4.329138846779724e-07, "epoch": 3.6137102093665256, "percentage": 72.27, "elapsed_time": "3:11:33", "remaining_time": "1:13:29", "throughput": 8673.85, "total_tokens": 99693840} +{"current_steps": 147925, "total_steps": 204665, "loss": 0.0006, "lr": 4.3284364711971e-07, "epoch": 3.613832360198373, "percentage": 72.28, "elapsed_time": "3:11:33", "remaining_time": "1:13:28", "throughput": 8673.87, "total_tokens": 99697040} +{"current_steps": 147930, "total_steps": 204665, "loss": 0.0406, "lr": 4.3277341368596066e-07, "epoch": 3.61395451103022, "percentage": 72.28, "elapsed_time": "3:11:34", "remaining_time": "1:13:28", "throughput": 8673.89, "total_tokens": 99700304} +{"current_steps": 147935, "total_steps": 204665, "loss": 0.049, "lr": 4.327031843772361e-07, "epoch": 3.614076661862067, "percentage": 72.28, "elapsed_time": "3:11:34", "remaining_time": "1:13:27", "throughput": 8673.93, "total_tokens": 99703760} +{"current_steps": 147940, "total_steps": 204665, "loss": 0.0001, "lr": 4.3263295919404605e-07, "epoch": 3.6141988126939144, "percentage": 72.28, "elapsed_time": "3:11:34", "remaining_time": "1:13:27", "throughput": 8673.96, "total_tokens": 99707152} +{"current_steps": 147945, "total_steps": 204665, "loss": 0.0, "lr": 4.3256273813690227e-07, "epoch": 3.6143209635257616, "percentage": 72.29, "elapsed_time": "3:11:35", "remaining_time": "1:13:27", "throughput": 8673.99, "total_tokens": 99710416} +{"current_steps": 147950, "total_steps": 204665, "loss": 0.0001, "lr": 4.3249252120631474e-07, "epoch": 3.614443114357609, "percentage": 72.29, "elapsed_time": "3:11:35", "remaining_time": "1:13:26", "throughput": 8674.02, "total_tokens": 99713872} +{"current_steps": 147955, "total_steps": 204665, "loss": 0.0947, "lr": 4.3242230840279393e-07, "epoch": 3.614565265189456, "percentage": 72.29, "elapsed_time": "3:11:36", "remaining_time": "1:13:26", "throughput": 8674.07, "total_tokens": 99717456} +{"current_steps": 147960, "total_steps": 204665, "loss": 0.0002, "lr": 4.3235209972685117e-07, "epoch": 3.614687416021303, "percentage": 72.29, "elapsed_time": "3:11:36", "remaining_time": "1:13:25", "throughput": 8674.12, "total_tokens": 99721168} +{"current_steps": 147965, "total_steps": 204665, "loss": 0.0001, "lr": 4.3228189517899616e-07, "epoch": 3.6148095668531504, "percentage": 72.3, "elapsed_time": "3:11:36", "remaining_time": "1:13:25", "throughput": 8674.18, "total_tokens": 99724880} +{"current_steps": 147970, "total_steps": 204665, "loss": 0.0, "lr": 4.3221169475973994e-07, "epoch": 3.6149317176849975, "percentage": 72.3, "elapsed_time": "3:11:37", "remaining_time": "1:13:25", "throughput": 8674.2, "total_tokens": 99728144} +{"current_steps": 147975, "total_steps": 204665, "loss": 0.0, "lr": 4.3214149846959336e-07, "epoch": 3.6150538685168447, "percentage": 72.3, "elapsed_time": "3:11:37", "remaining_time": "1:13:24", "throughput": 8674.2, "total_tokens": 99731152} +{"current_steps": 147980, "total_steps": 204665, "loss": 0.0, "lr": 4.320713063090662e-07, "epoch": 3.615176019348692, "percentage": 72.3, "elapsed_time": "3:11:37", "remaining_time": "1:13:24", "throughput": 8674.23, "total_tokens": 99734416} +{"current_steps": 147985, "total_steps": 204665, "loss": 0.0224, "lr": 4.320011182786696e-07, "epoch": 3.615298170180539, "percentage": 72.31, "elapsed_time": "3:11:38", "remaining_time": "1:13:23", "throughput": 8674.26, "total_tokens": 99737872} +{"current_steps": 147990, "total_steps": 204665, "loss": 0.0, "lr": 4.319309343789133e-07, "epoch": 3.615420321012386, "percentage": 72.31, "elapsed_time": "3:11:38", "remaining_time": "1:13:23", "throughput": 8674.29, "total_tokens": 99741136} +{"current_steps": 147995, "total_steps": 204665, "loss": 0.0, "lr": 4.3186075461030803e-07, "epoch": 3.6155424718442335, "percentage": 72.31, "elapsed_time": "3:11:38", "remaining_time": "1:13:23", "throughput": 8674.3, "total_tokens": 99744208} +{"current_steps": 148000, "total_steps": 204665, "loss": 0.0441, "lr": 4.317905789733645e-07, "epoch": 3.6156646226760802, "percentage": 72.31, "elapsed_time": "3:11:39", "remaining_time": "1:13:22", "throughput": 8674.4, "total_tokens": 99748624} +{"current_steps": 148005, "total_steps": 204665, "loss": 0.0, "lr": 4.317204074685924e-07, "epoch": 3.6157867735079274, "percentage": 72.32, "elapsed_time": "3:11:39", "remaining_time": "1:13:22", "throughput": 8674.41, "total_tokens": 99751632} +{"current_steps": 148010, "total_steps": 204665, "loss": 0.0, "lr": 4.316502400965026e-07, "epoch": 3.6159089243397746, "percentage": 72.32, "elapsed_time": "3:11:39", "remaining_time": "1:13:21", "throughput": 8674.43, "total_tokens": 99754896} +{"current_steps": 148015, "total_steps": 204665, "loss": 0.0, "lr": 4.315800768576049e-07, "epoch": 3.616031075171622, "percentage": 72.32, "elapsed_time": "3:11:40", "remaining_time": "1:13:21", "throughput": 8674.46, "total_tokens": 99758288} +{"current_steps": 148020, "total_steps": 204665, "loss": 0.0, "lr": 4.3150991775241e-07, "epoch": 3.616153226003469, "percentage": 72.32, "elapsed_time": "3:11:40", "remaining_time": "1:13:21", "throughput": 8674.46, "total_tokens": 99761104} +{"current_steps": 148025, "total_steps": 204665, "loss": 0.0001, "lr": 4.314397627814276e-07, "epoch": 3.616275376835316, "percentage": 72.33, "elapsed_time": "3:11:40", "remaining_time": "1:13:20", "throughput": 8674.47, "total_tokens": 99764240} +{"current_steps": 148030, "total_steps": 204665, "loss": 0.0235, "lr": 4.3136961194516817e-07, "epoch": 3.6163975276671634, "percentage": 72.33, "elapsed_time": "3:11:41", "remaining_time": "1:13:20", "throughput": 8674.51, "total_tokens": 99767696} +{"current_steps": 148035, "total_steps": 204665, "loss": 0.0382, "lr": 4.312994652441422e-07, "epoch": 3.6165196784990106, "percentage": 72.33, "elapsed_time": "3:11:41", "remaining_time": "1:13:19", "throughput": 8674.51, "total_tokens": 99770704} +{"current_steps": 148040, "total_steps": 204665, "loss": 0.0, "lr": 4.3122932267885917e-07, "epoch": 3.6166418293308578, "percentage": 72.33, "elapsed_time": "3:11:41", "remaining_time": "1:13:19", "throughput": 8674.54, "total_tokens": 99774096} +{"current_steps": 148045, "total_steps": 204665, "loss": 0.0, "lr": 4.311591842498298e-07, "epoch": 3.616763980162705, "percentage": 72.34, "elapsed_time": "3:11:42", "remaining_time": "1:13:19", "throughput": 8674.59, "total_tokens": 99777616} +{"current_steps": 148050, "total_steps": 204665, "loss": 0.0, "lr": 4.310890499575638e-07, "epoch": 3.616886130994552, "percentage": 72.34, "elapsed_time": "3:11:42", "remaining_time": "1:13:18", "throughput": 8674.6, "total_tokens": 99780816} +{"current_steps": 148055, "total_steps": 204665, "loss": 0.0001, "lr": 4.3101891980257086e-07, "epoch": 3.6170082818263993, "percentage": 72.34, "elapsed_time": "3:11:42", "remaining_time": "1:13:18", "throughput": 8674.62, "total_tokens": 99783952} +{"current_steps": 148060, "total_steps": 204665, "loss": 0.0001, "lr": 4.3094879378536185e-07, "epoch": 3.6171304326582465, "percentage": 72.34, "elapsed_time": "3:11:43", "remaining_time": "1:13:17", "throughput": 8674.66, "total_tokens": 99787472} +{"current_steps": 148065, "total_steps": 204665, "loss": 0.0, "lr": 4.308786719064459e-07, "epoch": 3.6172525834900937, "percentage": 72.35, "elapsed_time": "3:11:43", "remaining_time": "1:13:17", "throughput": 8674.66, "total_tokens": 99790416} +{"current_steps": 148070, "total_steps": 204665, "loss": 0.0005, "lr": 4.3080855416633363e-07, "epoch": 3.617374734321941, "percentage": 72.35, "elapsed_time": "3:11:44", "remaining_time": "1:13:17", "throughput": 8674.7, "total_tokens": 99793872} +{"current_steps": 148075, "total_steps": 204665, "loss": 0.0, "lr": 4.307384405655343e-07, "epoch": 3.6174968851537876, "percentage": 72.35, "elapsed_time": "3:11:44", "remaining_time": "1:13:16", "throughput": 8674.7, "total_tokens": 99796816} +{"current_steps": 148080, "total_steps": 204665, "loss": 0.0, "lr": 4.3066833110455845e-07, "epoch": 3.6176190359856353, "percentage": 72.35, "elapsed_time": "3:11:44", "remaining_time": "1:13:16", "throughput": 8674.72, "total_tokens": 99800080} +{"current_steps": 148085, "total_steps": 204665, "loss": 0.0265, "lr": 4.3059822578391536e-07, "epoch": 3.617741186817482, "percentage": 72.35, "elapsed_time": "3:11:45", "remaining_time": "1:13:15", "throughput": 8674.78, "total_tokens": 99803792} +{"current_steps": 148090, "total_steps": 204665, "loss": 0.0, "lr": 4.305281246041151e-07, "epoch": 3.6178633376493297, "percentage": 72.36, "elapsed_time": "3:11:45", "remaining_time": "1:13:15", "throughput": 8674.82, "total_tokens": 99807376} +{"current_steps": 148095, "total_steps": 204665, "loss": 0.0399, "lr": 4.3045802756566787e-07, "epoch": 3.6179854884811764, "percentage": 72.36, "elapsed_time": "3:11:45", "remaining_time": "1:13:15", "throughput": 8674.85, "total_tokens": 99810704} +{"current_steps": 148100, "total_steps": 204665, "loss": 0.0, "lr": 4.3038793466908266e-07, "epoch": 3.6181076393130236, "percentage": 72.36, "elapsed_time": "3:11:46", "remaining_time": "1:13:14", "throughput": 8674.91, "total_tokens": 99814544} +{"current_steps": 148105, "total_steps": 204665, "loss": 0.0001, "lr": 4.303178459148699e-07, "epoch": 3.618229790144871, "percentage": 72.36, "elapsed_time": "3:11:46", "remaining_time": "1:13:14", "throughput": 8674.99, "total_tokens": 99818512} +{"current_steps": 148110, "total_steps": 204665, "loss": 0.0, "lr": 4.3024776130353866e-07, "epoch": 3.618351940976718, "percentage": 72.37, "elapsed_time": "3:11:46", "remaining_time": "1:13:13", "throughput": 8675.04, "total_tokens": 99822160} +{"current_steps": 148115, "total_steps": 204665, "loss": 0.0001, "lr": 4.3017768083559933e-07, "epoch": 3.618474091808565, "percentage": 72.37, "elapsed_time": "3:11:47", "remaining_time": "1:13:13", "throughput": 8675.07, "total_tokens": 99825552} +{"current_steps": 148120, "total_steps": 204665, "loss": 0.0001, "lr": 4.301076045115608e-07, "epoch": 3.6185962426404124, "percentage": 72.37, "elapsed_time": "3:11:47", "remaining_time": "1:13:13", "throughput": 8675.08, "total_tokens": 99828624} +{"current_steps": 148125, "total_steps": 204665, "loss": 0.0, "lr": 4.3003753233193305e-07, "epoch": 3.6187183934722595, "percentage": 72.37, "elapsed_time": "3:11:47", "remaining_time": "1:13:12", "throughput": 8675.1, "total_tokens": 99831888} +{"current_steps": 148130, "total_steps": 204665, "loss": 0.0, "lr": 4.299674642972261e-07, "epoch": 3.6188405443041067, "percentage": 72.38, "elapsed_time": "3:11:48", "remaining_time": "1:13:12", "throughput": 8675.13, "total_tokens": 99835280} +{"current_steps": 148135, "total_steps": 204665, "loss": 0.0, "lr": 4.2989740040794864e-07, "epoch": 3.618962695135954, "percentage": 72.38, "elapsed_time": "3:11:48", "remaining_time": "1:13:11", "throughput": 8675.16, "total_tokens": 99838608} +{"current_steps": 148140, "total_steps": 204665, "loss": 0.0, "lr": 4.29827340664611e-07, "epoch": 3.619084845967801, "percentage": 72.38, "elapsed_time": "3:11:48", "remaining_time": "1:13:11", "throughput": 8675.18, "total_tokens": 99841872} +{"current_steps": 148145, "total_steps": 204665, "loss": 0.0001, "lr": 4.2975728506772193e-07, "epoch": 3.6192069967996483, "percentage": 72.38, "elapsed_time": "3:11:49", "remaining_time": "1:13:10", "throughput": 8675.2, "total_tokens": 99845008} +{"current_steps": 148150, "total_steps": 204665, "loss": 0.0002, "lr": 4.296872336177916e-07, "epoch": 3.6193291476314955, "percentage": 72.39, "elapsed_time": "3:11:49", "remaining_time": "1:13:10", "throughput": 8675.24, "total_tokens": 99848464} +{"current_steps": 148155, "total_steps": 204665, "loss": 0.0, "lr": 4.2961718631532905e-07, "epoch": 3.6194512984633427, "percentage": 72.39, "elapsed_time": "3:11:49", "remaining_time": "1:13:10", "throughput": 8675.28, "total_tokens": 99851984} +{"current_steps": 148160, "total_steps": 204665, "loss": 0.0, "lr": 4.295471431608435e-07, "epoch": 3.6195734492951894, "percentage": 72.39, "elapsed_time": "3:11:50", "remaining_time": "1:13:09", "throughput": 8675.31, "total_tokens": 99855312} +{"current_steps": 148165, "total_steps": 204665, "loss": 0.0001, "lr": 4.2947710415484486e-07, "epoch": 3.619695600127037, "percentage": 72.39, "elapsed_time": "3:11:50", "remaining_time": "1:13:09", "throughput": 8675.35, "total_tokens": 99858896} +{"current_steps": 148170, "total_steps": 204665, "loss": 0.0, "lr": 4.2940706929784176e-07, "epoch": 3.619817750958884, "percentage": 72.4, "elapsed_time": "3:11:50", "remaining_time": "1:13:08", "throughput": 8675.38, "total_tokens": 99862160} +{"current_steps": 148175, "total_steps": 204665, "loss": 0.016, "lr": 4.2933703859034444e-07, "epoch": 3.6199399017907314, "percentage": 72.4, "elapsed_time": "3:11:51", "remaining_time": "1:13:08", "throughput": 8675.42, "total_tokens": 99865680} +{"current_steps": 148180, "total_steps": 204665, "loss": 0.0, "lr": 4.292670120328612e-07, "epoch": 3.620062052622578, "percentage": 72.4, "elapsed_time": "3:11:51", "remaining_time": "1:13:08", "throughput": 8675.43, "total_tokens": 99868816} +{"current_steps": 148185, "total_steps": 204665, "loss": 0.0, "lr": 4.291969896259017e-07, "epoch": 3.6201842034544254, "percentage": 72.4, "elapsed_time": "3:11:52", "remaining_time": "1:13:07", "throughput": 8675.46, "total_tokens": 99872144} +{"current_steps": 148190, "total_steps": 204665, "loss": 0.0001, "lr": 4.291269713699757e-07, "epoch": 3.6203063542862726, "percentage": 72.41, "elapsed_time": "3:11:52", "remaining_time": "1:13:07", "throughput": 8675.5, "total_tokens": 99875664} +{"current_steps": 148195, "total_steps": 204665, "loss": 0.0, "lr": 4.2905695726559145e-07, "epoch": 3.6204285051181198, "percentage": 72.41, "elapsed_time": "3:11:52", "remaining_time": "1:13:06", "throughput": 8675.55, "total_tokens": 99879312} +{"current_steps": 148200, "total_steps": 204665, "loss": 0.0003, "lr": 4.289869473132589e-07, "epoch": 3.620550655949967, "percentage": 72.41, "elapsed_time": "3:11:53", "remaining_time": "1:13:06", "throughput": 8675.57, "total_tokens": 99882640} +{"current_steps": 148205, "total_steps": 204665, "loss": 0.0004, "lr": 4.2891694151348654e-07, "epoch": 3.620672806781814, "percentage": 72.41, "elapsed_time": "3:11:53", "remaining_time": "1:13:06", "throughput": 8675.6, "total_tokens": 99885968} +{"current_steps": 148210, "total_steps": 204665, "loss": 0.0, "lr": 4.288469398667842e-07, "epoch": 3.6207949576136613, "percentage": 72.42, "elapsed_time": "3:11:53", "remaining_time": "1:13:05", "throughput": 8675.62, "total_tokens": 99889104} +{"current_steps": 148215, "total_steps": 204665, "loss": 0.0, "lr": 4.2877694237366014e-07, "epoch": 3.6209171084455085, "percentage": 72.42, "elapsed_time": "3:11:54", "remaining_time": "1:13:05", "throughput": 8675.64, "total_tokens": 99892304} +{"current_steps": 148220, "total_steps": 204665, "loss": 0.0002, "lr": 4.2870694903462377e-07, "epoch": 3.6210392592773557, "percentage": 72.42, "elapsed_time": "3:11:54", "remaining_time": "1:13:04", "throughput": 8675.8, "total_tokens": 99897680} +{"current_steps": 148225, "total_steps": 204665, "loss": 0.0004, "lr": 4.2863695985018453e-07, "epoch": 3.621161410109203, "percentage": 72.42, "elapsed_time": "3:11:54", "remaining_time": "1:13:04", "throughput": 8675.85, "total_tokens": 99901328} +{"current_steps": 148230, "total_steps": 204665, "loss": 0.0, "lr": 4.285669748208507e-07, "epoch": 3.62128356094105, "percentage": 72.43, "elapsed_time": "3:11:55", "remaining_time": "1:13:04", "throughput": 8675.86, "total_tokens": 99904336} +{"current_steps": 148235, "total_steps": 204665, "loss": 0.0, "lr": 4.284969939471318e-07, "epoch": 3.6214057117728973, "percentage": 72.43, "elapsed_time": "3:11:55", "remaining_time": "1:13:03", "throughput": 8675.87, "total_tokens": 99907472} +{"current_steps": 148240, "total_steps": 204665, "loss": 0.0003, "lr": 4.284270172295361e-07, "epoch": 3.6215278626047445, "percentage": 72.43, "elapsed_time": "3:11:55", "remaining_time": "1:13:03", "throughput": 8675.9, "total_tokens": 99910800} +{"current_steps": 148245, "total_steps": 204665, "loss": 0.0, "lr": 4.2835704466857325e-07, "epoch": 3.6216500134365917, "percentage": 72.43, "elapsed_time": "3:11:56", "remaining_time": "1:13:02", "throughput": 8675.96, "total_tokens": 99914512} +{"current_steps": 148250, "total_steps": 204665, "loss": 0.0, "lr": 4.2828707626475133e-07, "epoch": 3.621772164268439, "percentage": 72.44, "elapsed_time": "3:11:56", "remaining_time": "1:13:02", "throughput": 8675.97, "total_tokens": 99917712} +{"current_steps": 148255, "total_steps": 204665, "loss": 0.0, "lr": 4.2821711201858e-07, "epoch": 3.6218943151002856, "percentage": 72.44, "elapsed_time": "3:11:56", "remaining_time": "1:13:02", "throughput": 8675.99, "total_tokens": 99920848} +{"current_steps": 148260, "total_steps": 204665, "loss": 0.0, "lr": 4.281471519305676e-07, "epoch": 3.6220164659321332, "percentage": 72.44, "elapsed_time": "3:11:57", "remaining_time": "1:13:01", "throughput": 8676.02, "total_tokens": 99924304} +{"current_steps": 148265, "total_steps": 204665, "loss": 0.0, "lr": 4.280771960012225e-07, "epoch": 3.62213861676398, "percentage": 72.44, "elapsed_time": "3:11:57", "remaining_time": "1:13:01", "throughput": 8676.06, "total_tokens": 99927824} +{"current_steps": 148270, "total_steps": 204665, "loss": 0.0, "lr": 4.2800724423105427e-07, "epoch": 3.6222607675958276, "percentage": 72.45, "elapsed_time": "3:11:57", "remaining_time": "1:13:00", "throughput": 8676.1, "total_tokens": 99931344} +{"current_steps": 148275, "total_steps": 204665, "loss": 0.0433, "lr": 4.279372966205709e-07, "epoch": 3.6223829184276743, "percentage": 72.45, "elapsed_time": "3:11:58", "remaining_time": "1:13:00", "throughput": 8676.16, "total_tokens": 99935056} +{"current_steps": 148280, "total_steps": 204665, "loss": 0.0, "lr": 4.278673531702812e-07, "epoch": 3.6225050692595215, "percentage": 72.45, "elapsed_time": "3:11:58", "remaining_time": "1:13:00", "throughput": 8676.19, "total_tokens": 99938512} +{"current_steps": 148285, "total_steps": 204665, "loss": 0.0, "lr": 4.2779741388069445e-07, "epoch": 3.6226272200913687, "percentage": 72.45, "elapsed_time": "3:11:59", "remaining_time": "1:12:59", "throughput": 8676.22, "total_tokens": 99941904} +{"current_steps": 148290, "total_steps": 204665, "loss": 0.0, "lr": 4.277274787523185e-07, "epoch": 3.622749370923216, "percentage": 72.45, "elapsed_time": "3:11:59", "remaining_time": "1:12:59", "throughput": 8676.25, "total_tokens": 99945232} +{"current_steps": 148295, "total_steps": 204665, "loss": 0.0001, "lr": 4.2765754778566255e-07, "epoch": 3.622871521755063, "percentage": 72.46, "elapsed_time": "3:11:59", "remaining_time": "1:12:58", "throughput": 8676.29, "total_tokens": 99948688} +{"current_steps": 148300, "total_steps": 204665, "loss": 0.0, "lr": 4.275876209812346e-07, "epoch": 3.6229936725869103, "percentage": 72.46, "elapsed_time": "3:12:00", "remaining_time": "1:12:58", "throughput": 8676.33, "total_tokens": 99952144} +{"current_steps": 148305, "total_steps": 204665, "loss": 0.0001, "lr": 4.2751769833954334e-07, "epoch": 3.6231158234187575, "percentage": 72.46, "elapsed_time": "3:12:00", "remaining_time": "1:12:58", "throughput": 8676.36, "total_tokens": 99955536} +{"current_steps": 148310, "total_steps": 204665, "loss": 0.0, "lr": 4.274477798610977e-07, "epoch": 3.6232379742506047, "percentage": 72.46, "elapsed_time": "3:12:00", "remaining_time": "1:12:57", "throughput": 8676.39, "total_tokens": 99958928} +{"current_steps": 148315, "total_steps": 204665, "loss": 0.0, "lr": 4.2737786554640543e-07, "epoch": 3.623360125082452, "percentage": 72.47, "elapsed_time": "3:12:01", "remaining_time": "1:12:57", "throughput": 8676.42, "total_tokens": 99962256} +{"current_steps": 148320, "total_steps": 204665, "loss": 0.0, "lr": 4.2730795539597575e-07, "epoch": 3.623482275914299, "percentage": 72.47, "elapsed_time": "3:12:01", "remaining_time": "1:12:56", "throughput": 8676.52, "total_tokens": 99966608} +{"current_steps": 148325, "total_steps": 204665, "loss": 0.0688, "lr": 4.272380494103163e-07, "epoch": 3.6236044267461462, "percentage": 72.47, "elapsed_time": "3:12:01", "remaining_time": "1:12:56", "throughput": 8676.53, "total_tokens": 99969744} +{"current_steps": 148330, "total_steps": 204665, "loss": 0.0, "lr": 4.2716814758993614e-07, "epoch": 3.6237265775779934, "percentage": 72.47, "elapsed_time": "3:12:02", "remaining_time": "1:12:56", "throughput": 8676.56, "total_tokens": 99973072} +{"current_steps": 148335, "total_steps": 204665, "loss": 0.0, "lr": 4.270982499353429e-07, "epoch": 3.6238487284098406, "percentage": 72.48, "elapsed_time": "3:12:02", "remaining_time": "1:12:55", "throughput": 8676.59, "total_tokens": 99976528} +{"current_steps": 148340, "total_steps": 204665, "loss": 0.0, "lr": 4.2702835644704535e-07, "epoch": 3.6239708792416874, "percentage": 72.48, "elapsed_time": "3:12:02", "remaining_time": "1:12:55", "throughput": 8676.63, "total_tokens": 99979920} +{"current_steps": 148345, "total_steps": 204665, "loss": 0.0001, "lr": 4.2695846712555204e-07, "epoch": 3.624093030073535, "percentage": 72.48, "elapsed_time": "3:12:03", "remaining_time": "1:12:54", "throughput": 8676.66, "total_tokens": 99983376} +{"current_steps": 148350, "total_steps": 204665, "loss": 0.0, "lr": 4.2688858197137047e-07, "epoch": 3.6242151809053818, "percentage": 72.48, "elapsed_time": "3:12:03", "remaining_time": "1:12:54", "throughput": 8676.73, "total_tokens": 99987216} +{"current_steps": 148355, "total_steps": 204665, "loss": 0.0478, "lr": 4.268187009850097e-07, "epoch": 3.6243373317372294, "percentage": 72.49, "elapsed_time": "3:12:03", "remaining_time": "1:12:54", "throughput": 8676.74, "total_tokens": 99990352} +{"current_steps": 148360, "total_steps": 204665, "loss": 0.0891, "lr": 4.2674882416697746e-07, "epoch": 3.624459482569076, "percentage": 72.49, "elapsed_time": "3:12:04", "remaining_time": "1:12:53", "throughput": 8676.77, "total_tokens": 99993680} +{"current_steps": 148365, "total_steps": 204665, "loss": 0.0, "lr": 4.2667895151778167e-07, "epoch": 3.6245816334009233, "percentage": 72.49, "elapsed_time": "3:12:04", "remaining_time": "1:12:53", "throughput": 8676.77, "total_tokens": 99996560} +{"current_steps": 148370, "total_steps": 204665, "loss": 0.0, "lr": 4.266090830379311e-07, "epoch": 3.6247037842327705, "percentage": 72.49, "elapsed_time": "3:12:04", "remaining_time": "1:12:52", "throughput": 8676.78, "total_tokens": 99999696} +{"current_steps": 148375, "total_steps": 204665, "loss": 0.0, "lr": 4.265392187279331e-07, "epoch": 3.6248259350646177, "percentage": 72.5, "elapsed_time": "3:12:05", "remaining_time": "1:12:52", "throughput": 8676.8, "total_tokens": 100002832} +{"current_steps": 148380, "total_steps": 204665, "loss": 0.0001, "lr": 4.2646935858829644e-07, "epoch": 3.624948085896465, "percentage": 72.5, "elapsed_time": "3:12:05", "remaining_time": "1:12:52", "throughput": 8676.85, "total_tokens": 100006608} +{"current_steps": 148385, "total_steps": 204665, "loss": 0.0001, "lr": 4.2639950261952863e-07, "epoch": 3.625070236728312, "percentage": 72.5, "elapsed_time": "3:12:06", "remaining_time": "1:12:51", "throughput": 8676.89, "total_tokens": 100010000} +{"current_steps": 148390, "total_steps": 204665, "loss": 0.0, "lr": 4.263296508221381e-07, "epoch": 3.6251923875601593, "percentage": 72.5, "elapsed_time": "3:12:06", "remaining_time": "1:12:51", "throughput": 8676.91, "total_tokens": 100013328} +{"current_steps": 148395, "total_steps": 204665, "loss": 0.0, "lr": 4.262598031966325e-07, "epoch": 3.6253145383920065, "percentage": 72.51, "elapsed_time": "3:12:06", "remaining_time": "1:12:50", "throughput": 8676.93, "total_tokens": 100016528} +{"current_steps": 148400, "total_steps": 204665, "loss": 0.0, "lr": 4.261899597435198e-07, "epoch": 3.6254366892238536, "percentage": 72.51, "elapsed_time": "3:12:07", "remaining_time": "1:12:50", "throughput": 8676.95, "total_tokens": 100019728} +{"current_steps": 148405, "total_steps": 204665, "loss": 0.0001, "lr": 4.2612012046330846e-07, "epoch": 3.625558840055701, "percentage": 72.51, "elapsed_time": "3:12:07", "remaining_time": "1:12:50", "throughput": 8676.97, "total_tokens": 100022864} +{"current_steps": 148410, "total_steps": 204665, "loss": 0.0313, "lr": 4.2605028535650553e-07, "epoch": 3.625680990887548, "percentage": 72.51, "elapsed_time": "3:12:07", "remaining_time": "1:12:49", "throughput": 8677.0, "total_tokens": 100026256} +{"current_steps": 148415, "total_steps": 204665, "loss": 0.0, "lr": 4.259804544236197e-07, "epoch": 3.625803141719395, "percentage": 72.52, "elapsed_time": "3:12:08", "remaining_time": "1:12:49", "throughput": 8677.03, "total_tokens": 100029648} +{"current_steps": 148420, "total_steps": 204665, "loss": 0.0667, "lr": 4.2591062766515806e-07, "epoch": 3.6259252925512424, "percentage": 72.52, "elapsed_time": "3:12:08", "remaining_time": "1:12:48", "throughput": 8677.04, "total_tokens": 100032720} +{"current_steps": 148425, "total_steps": 204665, "loss": 0.0, "lr": 4.258408050816291e-07, "epoch": 3.6260474433830896, "percentage": 72.52, "elapsed_time": "3:12:08", "remaining_time": "1:12:48", "throughput": 8677.06, "total_tokens": 100035856} +{"current_steps": 148430, "total_steps": 204665, "loss": 0.0, "lr": 4.2577098667353996e-07, "epoch": 3.626169594214937, "percentage": 72.52, "elapsed_time": "3:12:09", "remaining_time": "1:12:47", "throughput": 8677.09, "total_tokens": 100039248} +{"current_steps": 148435, "total_steps": 204665, "loss": 0.0001, "lr": 4.257011724413985e-07, "epoch": 3.6262917450467835, "percentage": 72.53, "elapsed_time": "3:12:09", "remaining_time": "1:12:47", "throughput": 8677.14, "total_tokens": 100042896} +{"current_steps": 148440, "total_steps": 204665, "loss": 0.0002, "lr": 4.2563136238571307e-07, "epoch": 3.626413895878631, "percentage": 72.53, "elapsed_time": "3:12:09", "remaining_time": "1:12:47", "throughput": 8677.14, "total_tokens": 100045904} +{"current_steps": 148445, "total_steps": 204665, "loss": 0.0, "lr": 4.2556155650699045e-07, "epoch": 3.626536046710478, "percentage": 72.53, "elapsed_time": "3:12:10", "remaining_time": "1:12:46", "throughput": 8677.17, "total_tokens": 100049296} +{"current_steps": 148450, "total_steps": 204665, "loss": 0.0, "lr": 4.2549175480573897e-07, "epoch": 3.626658197542325, "percentage": 72.53, "elapsed_time": "3:12:10", "remaining_time": "1:12:46", "throughput": 8677.2, "total_tokens": 100052624} +{"current_steps": 148455, "total_steps": 204665, "loss": 0.0001, "lr": 4.2542195728246565e-07, "epoch": 3.6267803483741723, "percentage": 72.54, "elapsed_time": "3:12:10", "remaining_time": "1:12:45", "throughput": 8677.23, "total_tokens": 100056016} +{"current_steps": 148460, "total_steps": 204665, "loss": 0.0, "lr": 4.253521639376788e-07, "epoch": 3.6269024992060195, "percentage": 72.54, "elapsed_time": "3:12:11", "remaining_time": "1:12:45", "throughput": 8677.23, "total_tokens": 100058960} +{"current_steps": 148465, "total_steps": 204665, "loss": 0.0, "lr": 4.2528237477188566e-07, "epoch": 3.6270246500378667, "percentage": 72.54, "elapsed_time": "3:12:11", "remaining_time": "1:12:45", "throughput": 8677.29, "total_tokens": 100062672} +{"current_steps": 148470, "total_steps": 204665, "loss": 0.0, "lr": 4.2521258978559315e-07, "epoch": 3.627146800869714, "percentage": 72.54, "elapsed_time": "3:12:11", "remaining_time": "1:12:44", "throughput": 8677.32, "total_tokens": 100066064} +{"current_steps": 148475, "total_steps": 204665, "loss": 0.0, "lr": 4.2514280897930977e-07, "epoch": 3.627268951701561, "percentage": 72.55, "elapsed_time": "3:12:12", "remaining_time": "1:12:44", "throughput": 8677.36, "total_tokens": 100069648} +{"current_steps": 148480, "total_steps": 204665, "loss": 0.0, "lr": 4.250730323535421e-07, "epoch": 3.6273911025334082, "percentage": 72.55, "elapsed_time": "3:12:12", "remaining_time": "1:12:43", "throughput": 8677.38, "total_tokens": 100072784} +{"current_steps": 148485, "total_steps": 204665, "loss": 0.0, "lr": 4.2500325990879835e-07, "epoch": 3.6275132533652554, "percentage": 72.55, "elapsed_time": "3:12:12", "remaining_time": "1:12:43", "throughput": 8677.39, "total_tokens": 100075920} +{"current_steps": 148490, "total_steps": 204665, "loss": 0.0655, "lr": 4.249334916455851e-07, "epoch": 3.6276354041971026, "percentage": 72.55, "elapsed_time": "3:12:13", "remaining_time": "1:12:43", "throughput": 8677.45, "total_tokens": 100079632} +{"current_steps": 148495, "total_steps": 204665, "loss": 0.0257, "lr": 4.2486372756441027e-07, "epoch": 3.62775755502895, "percentage": 72.56, "elapsed_time": "3:12:13", "remaining_time": "1:12:42", "throughput": 8677.48, "total_tokens": 100083024} +{"current_steps": 148500, "total_steps": 204665, "loss": 0.0001, "lr": 4.247939676657815e-07, "epoch": 3.627879705860797, "percentage": 72.56, "elapsed_time": "3:12:13", "remaining_time": "1:12:42", "throughput": 8677.49, "total_tokens": 100086160} +{"current_steps": 148505, "total_steps": 204665, "loss": 0.0002, "lr": 4.2472421195020525e-07, "epoch": 3.628001856692644, "percentage": 72.56, "elapsed_time": "3:12:14", "remaining_time": "1:12:41", "throughput": 8677.53, "total_tokens": 100089552} +{"current_steps": 148510, "total_steps": 204665, "loss": 0.0, "lr": 4.2465446041818966e-07, "epoch": 3.6281240075244914, "percentage": 72.56, "elapsed_time": "3:12:14", "remaining_time": "1:12:41", "throughput": 8677.54, "total_tokens": 100092624} +{"current_steps": 148515, "total_steps": 204665, "loss": 0.0002, "lr": 4.245847130702412e-07, "epoch": 3.6282461583563386, "percentage": 72.56, "elapsed_time": "3:12:15", "remaining_time": "1:12:41", "throughput": 8677.6, "total_tokens": 100096400} +{"current_steps": 148520, "total_steps": 204665, "loss": 0.0, "lr": 4.2451496990686784e-07, "epoch": 3.6283683091881853, "percentage": 72.57, "elapsed_time": "3:12:15", "remaining_time": "1:12:40", "throughput": 8677.66, "total_tokens": 100100240} +{"current_steps": 148525, "total_steps": 204665, "loss": 0.0002, "lr": 4.244452309285761e-07, "epoch": 3.628490460020033, "percentage": 72.57, "elapsed_time": "3:12:15", "remaining_time": "1:12:40", "throughput": 8677.68, "total_tokens": 100103440} +{"current_steps": 148530, "total_steps": 204665, "loss": 0.075, "lr": 4.243754961358733e-07, "epoch": 3.6286126108518797, "percentage": 72.57, "elapsed_time": "3:12:16", "remaining_time": "1:12:39", "throughput": 8677.75, "total_tokens": 100107408} +{"current_steps": 148535, "total_steps": 204665, "loss": 0.0, "lr": 4.243057655292672e-07, "epoch": 3.6287347616837273, "percentage": 72.57, "elapsed_time": "3:12:16", "remaining_time": "1:12:39", "throughput": 8677.76, "total_tokens": 100110416} +{"current_steps": 148540, "total_steps": 204665, "loss": 0.0, "lr": 4.242360391092641e-07, "epoch": 3.628856912515574, "percentage": 72.58, "elapsed_time": "3:12:16", "remaining_time": "1:12:39", "throughput": 8677.79, "total_tokens": 100113808} +{"current_steps": 148545, "total_steps": 204665, "loss": 0.0738, "lr": 4.2416631687637173e-07, "epoch": 3.6289790633474213, "percentage": 72.58, "elapsed_time": "3:12:17", "remaining_time": "1:12:38", "throughput": 8677.8, "total_tokens": 100116880} +{"current_steps": 148550, "total_steps": 204665, "loss": 0.0, "lr": 4.240965988310963e-07, "epoch": 3.6291012141792685, "percentage": 72.58, "elapsed_time": "3:12:17", "remaining_time": "1:12:38", "throughput": 8677.83, "total_tokens": 100120272} +{"current_steps": 148555, "total_steps": 204665, "loss": 0.0739, "lr": 4.240268849739458e-07, "epoch": 3.6292233650111156, "percentage": 72.58, "elapsed_time": "3:12:17", "remaining_time": "1:12:37", "throughput": 8677.85, "total_tokens": 100123472} +{"current_steps": 148560, "total_steps": 204665, "loss": 0.0881, "lr": 4.239571753054263e-07, "epoch": 3.629345515842963, "percentage": 72.59, "elapsed_time": "3:12:18", "remaining_time": "1:12:37", "throughput": 8677.9, "total_tokens": 100127120} +{"current_steps": 148565, "total_steps": 204665, "loss": 0.0, "lr": 4.2388746982604553e-07, "epoch": 3.62946766667481, "percentage": 72.59, "elapsed_time": "3:12:18", "remaining_time": "1:12:37", "throughput": 8677.93, "total_tokens": 100130384} +{"current_steps": 148570, "total_steps": 204665, "loss": 0.0, "lr": 4.2381776853630955e-07, "epoch": 3.629589817506657, "percentage": 72.59, "elapsed_time": "3:12:18", "remaining_time": "1:12:36", "throughput": 8677.96, "total_tokens": 100133776} +{"current_steps": 148575, "total_steps": 204665, "loss": 0.008, "lr": 4.237480714367262e-07, "epoch": 3.6297119683385044, "percentage": 72.59, "elapsed_time": "3:12:19", "remaining_time": "1:12:36", "throughput": 8677.98, "total_tokens": 100137104} +{"current_steps": 148580, "total_steps": 204665, "loss": 0.0, "lr": 4.236783785278019e-07, "epoch": 3.6298341191703516, "percentage": 72.6, "elapsed_time": "3:12:19", "remaining_time": "1:12:35", "throughput": 8678.02, "total_tokens": 100140496} +{"current_steps": 148585, "total_steps": 204665, "loss": 0.0001, "lr": 4.2360868981004305e-07, "epoch": 3.629956270002199, "percentage": 72.6, "elapsed_time": "3:12:19", "remaining_time": "1:12:35", "throughput": 8678.07, "total_tokens": 100144208} +{"current_steps": 148590, "total_steps": 204665, "loss": 0.0, "lr": 4.235390052839568e-07, "epoch": 3.630078420834046, "percentage": 72.6, "elapsed_time": "3:12:20", "remaining_time": "1:12:35", "throughput": 8678.12, "total_tokens": 100147792} +{"current_steps": 148595, "total_steps": 204665, "loss": 0.0004, "lr": 4.2346932495005037e-07, "epoch": 3.630200571665893, "percentage": 72.6, "elapsed_time": "3:12:20", "remaining_time": "1:12:34", "throughput": 8678.13, "total_tokens": 100150928} +{"current_steps": 148600, "total_steps": 204665, "loss": 0.0014, "lr": 4.2339964880882974e-07, "epoch": 3.6303227224977404, "percentage": 72.61, "elapsed_time": "3:12:20", "remaining_time": "1:12:34", "throughput": 8678.16, "total_tokens": 100154192} +{"current_steps": 148605, "total_steps": 204665, "loss": 0.0, "lr": 4.233299768608022e-07, "epoch": 3.6304448733295875, "percentage": 72.61, "elapsed_time": "3:12:21", "remaining_time": "1:12:33", "throughput": 8678.2, "total_tokens": 100157712} +{"current_steps": 148610, "total_steps": 204665, "loss": 0.0001, "lr": 4.232603091064739e-07, "epoch": 3.6305670241614347, "percentage": 72.61, "elapsed_time": "3:12:21", "remaining_time": "1:12:33", "throughput": 8678.24, "total_tokens": 100161232} +{"current_steps": 148615, "total_steps": 204665, "loss": 0.0, "lr": 4.2319064554635174e-07, "epoch": 3.6306891749932815, "percentage": 72.61, "elapsed_time": "3:12:22", "remaining_time": "1:12:33", "throughput": 8678.33, "total_tokens": 100165456} +{"current_steps": 148620, "total_steps": 204665, "loss": 0.0, "lr": 4.231209861809427e-07, "epoch": 3.630811325825129, "percentage": 72.62, "elapsed_time": "3:12:22", "remaining_time": "1:12:32", "throughput": 8678.34, "total_tokens": 100168464} +{"current_steps": 148625, "total_steps": 204665, "loss": 0.0, "lr": 4.2305133101075264e-07, "epoch": 3.630933476656976, "percentage": 72.62, "elapsed_time": "3:12:22", "remaining_time": "1:12:32", "throughput": 8678.44, "total_tokens": 100172880} +{"current_steps": 148630, "total_steps": 204665, "loss": 0.0, "lr": 4.2298168003628885e-07, "epoch": 3.631055627488823, "percentage": 72.62, "elapsed_time": "3:12:23", "remaining_time": "1:12:31", "throughput": 8678.46, "total_tokens": 100176144} +{"current_steps": 148635, "total_steps": 204665, "loss": 0.0001, "lr": 4.2291203325805715e-07, "epoch": 3.6311777783206702, "percentage": 72.62, "elapsed_time": "3:12:23", "remaining_time": "1:12:31", "throughput": 8678.47, "total_tokens": 100179216} +{"current_steps": 148640, "total_steps": 204665, "loss": 0.0, "lr": 4.228423906765647e-07, "epoch": 3.6312999291525174, "percentage": 72.63, "elapsed_time": "3:12:23", "remaining_time": "1:12:31", "throughput": 8678.5, "total_tokens": 100182608} +{"current_steps": 148645, "total_steps": 204665, "loss": 0.0276, "lr": 4.2277275229231726e-07, "epoch": 3.6314220799843646, "percentage": 72.63, "elapsed_time": "3:12:24", "remaining_time": "1:12:30", "throughput": 8678.55, "total_tokens": 100186256} +{"current_steps": 148650, "total_steps": 204665, "loss": 0.0007, "lr": 4.227031181058216e-07, "epoch": 3.631544230816212, "percentage": 72.63, "elapsed_time": "3:12:24", "remaining_time": "1:12:30", "throughput": 8678.56, "total_tokens": 100189328} +{"current_steps": 148655, "total_steps": 204665, "loss": 0.0, "lr": 4.226334881175846e-07, "epoch": 3.631666381648059, "percentage": 72.63, "elapsed_time": "3:12:24", "remaining_time": "1:12:29", "throughput": 8678.59, "total_tokens": 100192592} +{"current_steps": 148660, "total_steps": 204665, "loss": 0.0007, "lr": 4.225638623281117e-07, "epoch": 3.631788532479906, "percentage": 72.64, "elapsed_time": "3:12:25", "remaining_time": "1:12:29", "throughput": 8678.61, "total_tokens": 100195856} +{"current_steps": 148665, "total_steps": 204665, "loss": 0.0017, "lr": 4.2249424073791006e-07, "epoch": 3.6319106833117534, "percentage": 72.64, "elapsed_time": "3:12:25", "remaining_time": "1:12:29", "throughput": 8678.63, "total_tokens": 100199120} +{"current_steps": 148670, "total_steps": 204665, "loss": 0.0001, "lr": 4.224246233474857e-07, "epoch": 3.6320328341436006, "percentage": 72.64, "elapsed_time": "3:12:25", "remaining_time": "1:12:28", "throughput": 8678.66, "total_tokens": 100202448} +{"current_steps": 148675, "total_steps": 204665, "loss": 0.0, "lr": 4.2235501015734445e-07, "epoch": 3.6321549849754478, "percentage": 72.64, "elapsed_time": "3:12:26", "remaining_time": "1:12:28", "throughput": 8678.7, "total_tokens": 100205904} +{"current_steps": 148680, "total_steps": 204665, "loss": 0.0002, "lr": 4.2228540116799326e-07, "epoch": 3.632277135807295, "percentage": 72.65, "elapsed_time": "3:12:26", "remaining_time": "1:12:27", "throughput": 8678.72, "total_tokens": 100209168} +{"current_steps": 148685, "total_steps": 204665, "loss": 0.0, "lr": 4.2221579637993766e-07, "epoch": 3.632399286639142, "percentage": 72.65, "elapsed_time": "3:12:26", "remaining_time": "1:12:27", "throughput": 8678.76, "total_tokens": 100212624} +{"current_steps": 148690, "total_steps": 204665, "loss": 0.0266, "lr": 4.221461957936846e-07, "epoch": 3.6325214374709893, "percentage": 72.65, "elapsed_time": "3:12:27", "remaining_time": "1:12:27", "throughput": 8678.77, "total_tokens": 100215760} +{"current_steps": 148695, "total_steps": 204665, "loss": 0.0, "lr": 4.220765994097395e-07, "epoch": 3.6326435883028365, "percentage": 72.65, "elapsed_time": "3:12:27", "remaining_time": "1:12:26", "throughput": 8678.8, "total_tokens": 100219024} +{"current_steps": 148700, "total_steps": 204665, "loss": 0.0, "lr": 4.2200700722860906e-07, "epoch": 3.6327657391346833, "percentage": 72.66, "elapsed_time": "3:12:27", "remaining_time": "1:12:26", "throughput": 8678.81, "total_tokens": 100222224} +{"current_steps": 148705, "total_steps": 204665, "loss": 0.0001, "lr": 4.219374192507988e-07, "epoch": 3.632887889966531, "percentage": 72.66, "elapsed_time": "3:12:28", "remaining_time": "1:12:25", "throughput": 8678.82, "total_tokens": 100225232} +{"current_steps": 148710, "total_steps": 204665, "loss": 0.0619, "lr": 4.2186783547681516e-07, "epoch": 3.6330100407983776, "percentage": 72.66, "elapsed_time": "3:12:28", "remaining_time": "1:12:25", "throughput": 8678.85, "total_tokens": 100228688} +{"current_steps": 148715, "total_steps": 204665, "loss": 0.0, "lr": 4.2179825590716445e-07, "epoch": 3.6331321916302253, "percentage": 72.66, "elapsed_time": "3:12:28", "remaining_time": "1:12:24", "throughput": 8678.87, "total_tokens": 100231888} +{"current_steps": 148720, "total_steps": 204665, "loss": 0.0, "lr": 4.21728680542352e-07, "epoch": 3.633254342462072, "percentage": 72.67, "elapsed_time": "3:12:29", "remaining_time": "1:12:24", "throughput": 8678.89, "total_tokens": 100235088} +{"current_steps": 148725, "total_steps": 204665, "loss": 0.0, "lr": 4.216591093828844e-07, "epoch": 3.633376493293919, "percentage": 72.67, "elapsed_time": "3:12:29", "remaining_time": "1:12:24", "throughput": 8678.91, "total_tokens": 100238352} +{"current_steps": 148730, "total_steps": 204665, "loss": 0.0, "lr": 4.21589542429267e-07, "epoch": 3.6334986441257664, "percentage": 72.67, "elapsed_time": "3:12:30", "remaining_time": "1:12:23", "throughput": 8679.02, "total_tokens": 100242832} +{"current_steps": 148735, "total_steps": 204665, "loss": 0.0, "lr": 4.215199796820064e-07, "epoch": 3.6336207949576136, "percentage": 72.67, "elapsed_time": "3:12:30", "remaining_time": "1:12:23", "throughput": 8679.04, "total_tokens": 100245968} +{"current_steps": 148740, "total_steps": 204665, "loss": 0.0, "lr": 4.2145042114160776e-07, "epoch": 3.6337429457894608, "percentage": 72.67, "elapsed_time": "3:12:30", "remaining_time": "1:12:22", "throughput": 8679.06, "total_tokens": 100249232} +{"current_steps": 148745, "total_steps": 204665, "loss": 0.0, "lr": 4.213808668085772e-07, "epoch": 3.633865096621308, "percentage": 72.68, "elapsed_time": "3:12:31", "remaining_time": "1:12:22", "throughput": 8679.1, "total_tokens": 100252688} +{"current_steps": 148750, "total_steps": 204665, "loss": 0.0, "lr": 4.2131131668342103e-07, "epoch": 3.633987247453155, "percentage": 72.68, "elapsed_time": "3:12:31", "remaining_time": "1:12:22", "throughput": 8679.12, "total_tokens": 100255952} +{"current_steps": 148755, "total_steps": 204665, "loss": 0.0, "lr": 4.212417707666442e-07, "epoch": 3.6341093982850023, "percentage": 72.68, "elapsed_time": "3:12:31", "remaining_time": "1:12:21", "throughput": 8679.13, "total_tokens": 100259088} +{"current_steps": 148760, "total_steps": 204665, "loss": 0.0, "lr": 4.2117222905875327e-07, "epoch": 3.6342315491168495, "percentage": 72.68, "elapsed_time": "3:12:32", "remaining_time": "1:12:21", "throughput": 8679.16, "total_tokens": 100262416} +{"current_steps": 148765, "total_steps": 204665, "loss": 0.0, "lr": 4.2110269156025327e-07, "epoch": 3.6343536999486967, "percentage": 72.69, "elapsed_time": "3:12:32", "remaining_time": "1:12:20", "throughput": 8679.2, "total_tokens": 100265936} +{"current_steps": 148770, "total_steps": 204665, "loss": 0.0, "lr": 4.2103315827165043e-07, "epoch": 3.634475850780544, "percentage": 72.69, "elapsed_time": "3:12:32", "remaining_time": "1:12:20", "throughput": 8679.27, "total_tokens": 100269776} +{"current_steps": 148775, "total_steps": 204665, "loss": 0.0, "lr": 4.209636291934503e-07, "epoch": 3.634598001612391, "percentage": 72.69, "elapsed_time": "3:12:33", "remaining_time": "1:12:20", "throughput": 8679.33, "total_tokens": 100273616} +{"current_steps": 148780, "total_steps": 204665, "loss": 0.0, "lr": 4.20894104326158e-07, "epoch": 3.6347201524442383, "percentage": 72.69, "elapsed_time": "3:12:33", "remaining_time": "1:12:19", "throughput": 8679.35, "total_tokens": 100276880} +{"current_steps": 148785, "total_steps": 204665, "loss": 0.0, "lr": 4.2082458367027986e-07, "epoch": 3.634842303276085, "percentage": 72.7, "elapsed_time": "3:12:33", "remaining_time": "1:12:19", "throughput": 8679.38, "total_tokens": 100280208} +{"current_steps": 148790, "total_steps": 204665, "loss": 0.0001, "lr": 4.207550672263208e-07, "epoch": 3.6349644541079327, "percentage": 72.7, "elapsed_time": "3:12:34", "remaining_time": "1:12:18", "throughput": 8679.42, "total_tokens": 100283792} +{"current_steps": 148795, "total_steps": 204665, "loss": 0.0, "lr": 4.206855549947871e-07, "epoch": 3.6350866049397794, "percentage": 72.7, "elapsed_time": "3:12:34", "remaining_time": "1:12:18", "throughput": 8679.46, "total_tokens": 100287184} +{"current_steps": 148800, "total_steps": 204665, "loss": 0.0, "lr": 4.2061604697618347e-07, "epoch": 3.635208755771627, "percentage": 72.7, "elapsed_time": "3:12:34", "remaining_time": "1:12:18", "throughput": 8679.48, "total_tokens": 100290448} +{"current_steps": 148805, "total_steps": 204665, "loss": 0.0433, "lr": 4.205465431710158e-07, "epoch": 3.635330906603474, "percentage": 72.71, "elapsed_time": "3:12:35", "remaining_time": "1:12:17", "throughput": 8679.49, "total_tokens": 100293520} +{"current_steps": 148810, "total_steps": 204665, "loss": 0.0, "lr": 4.2047704357978975e-07, "epoch": 3.635453057435321, "percentage": 72.71, "elapsed_time": "3:12:35", "remaining_time": "1:12:17", "throughput": 8679.54, "total_tokens": 100297168} +{"current_steps": 148815, "total_steps": 204665, "loss": 0.0, "lr": 4.204075482030103e-07, "epoch": 3.635575208267168, "percentage": 72.71, "elapsed_time": "3:12:35", "remaining_time": "1:12:16", "throughput": 8679.61, "total_tokens": 100301072} +{"current_steps": 148820, "total_steps": 204665, "loss": 0.0004, "lr": 4.203380570411833e-07, "epoch": 3.6356973590990154, "percentage": 72.71, "elapsed_time": "3:12:36", "remaining_time": "1:12:16", "throughput": 8679.66, "total_tokens": 100304784} +{"current_steps": 148825, "total_steps": 204665, "loss": 0.0922, "lr": 4.2026857009481363e-07, "epoch": 3.6358195099308626, "percentage": 72.72, "elapsed_time": "3:12:36", "remaining_time": "1:12:16", "throughput": 8679.69, "total_tokens": 100308112} +{"current_steps": 148830, "total_steps": 204665, "loss": 0.0, "lr": 4.201990873644071e-07, "epoch": 3.6359416607627097, "percentage": 72.72, "elapsed_time": "3:12:36", "remaining_time": "1:12:15", "throughput": 8679.7, "total_tokens": 100311248} +{"current_steps": 148835, "total_steps": 204665, "loss": 0.0607, "lr": 4.2012960885046846e-07, "epoch": 3.636063811594557, "percentage": 72.72, "elapsed_time": "3:12:37", "remaining_time": "1:12:15", "throughput": 8679.73, "total_tokens": 100314512} +{"current_steps": 148840, "total_steps": 204665, "loss": 0.0953, "lr": 4.200601345535032e-07, "epoch": 3.636185962426404, "percentage": 72.72, "elapsed_time": "3:12:37", "remaining_time": "1:12:14", "throughput": 8679.75, "total_tokens": 100317712} +{"current_steps": 148845, "total_steps": 204665, "loss": 0.0, "lr": 4.1999066447401707e-07, "epoch": 3.6363081132582513, "percentage": 72.73, "elapsed_time": "3:12:38", "remaining_time": "1:12:14", "throughput": 8679.77, "total_tokens": 100321040} +{"current_steps": 148850, "total_steps": 204665, "loss": 0.0567, "lr": 4.1992119861251443e-07, "epoch": 3.6364302640900985, "percentage": 72.73, "elapsed_time": "3:12:38", "remaining_time": "1:12:14", "throughput": 8679.79, "total_tokens": 100324112} +{"current_steps": 148855, "total_steps": 204665, "loss": 0.0, "lr": 4.1985173696950125e-07, "epoch": 3.6365524149219457, "percentage": 72.73, "elapsed_time": "3:12:38", "remaining_time": "1:12:13", "throughput": 8679.82, "total_tokens": 100327504} +{"current_steps": 148860, "total_steps": 204665, "loss": 0.0235, "lr": 4.1978227954548183e-07, "epoch": 3.636674565753793, "percentage": 72.73, "elapsed_time": "3:12:39", "remaining_time": "1:12:13", "throughput": 8679.87, "total_tokens": 100331152} +{"current_steps": 148865, "total_steps": 204665, "loss": 0.0591, "lr": 4.197128263409622e-07, "epoch": 3.63679671658564, "percentage": 72.74, "elapsed_time": "3:12:39", "remaining_time": "1:12:12", "throughput": 8679.88, "total_tokens": 100334288} +{"current_steps": 148870, "total_steps": 204665, "loss": 0.0001, "lr": 4.196433773564465e-07, "epoch": 3.6369188674174873, "percentage": 72.74, "elapsed_time": "3:12:39", "remaining_time": "1:12:12", "throughput": 8679.92, "total_tokens": 100337744} +{"current_steps": 148875, "total_steps": 204665, "loss": 0.0, "lr": 4.195739325924407e-07, "epoch": 3.6370410182493345, "percentage": 72.74, "elapsed_time": "3:12:40", "remaining_time": "1:12:12", "throughput": 8679.94, "total_tokens": 100341008} +{"current_steps": 148880, "total_steps": 204665, "loss": 0.0, "lr": 4.1950449204944905e-07, "epoch": 3.637163169081181, "percentage": 72.74, "elapsed_time": "3:12:40", "remaining_time": "1:12:11", "throughput": 8679.98, "total_tokens": 100344528} +{"current_steps": 148885, "total_steps": 204665, "loss": 0.0, "lr": 4.1943505572797713e-07, "epoch": 3.637285319913029, "percentage": 72.75, "elapsed_time": "3:12:40", "remaining_time": "1:12:11", "throughput": 8680.02, "total_tokens": 100347920} +{"current_steps": 148890, "total_steps": 204665, "loss": 0.0, "lr": 4.1936562362852966e-07, "epoch": 3.6374074707448756, "percentage": 72.75, "elapsed_time": "3:12:41", "remaining_time": "1:12:10", "throughput": 8680.03, "total_tokens": 100351056} +{"current_steps": 148895, "total_steps": 204665, "loss": 0.0, "lr": 4.1929619575161126e-07, "epoch": 3.6375296215767228, "percentage": 72.75, "elapsed_time": "3:12:41", "remaining_time": "1:12:10", "throughput": 8680.03, "total_tokens": 100354000} +{"current_steps": 148900, "total_steps": 204665, "loss": 0.0001, "lr": 4.192267720977271e-07, "epoch": 3.63765177240857, "percentage": 72.75, "elapsed_time": "3:12:41", "remaining_time": "1:12:10", "throughput": 8680.07, "total_tokens": 100357520} +{"current_steps": 148905, "total_steps": 204665, "loss": 0.0, "lr": 4.1915735266738237e-07, "epoch": 3.637773923240417, "percentage": 72.76, "elapsed_time": "3:12:42", "remaining_time": "1:12:09", "throughput": 8680.09, "total_tokens": 100360720} +{"current_steps": 148910, "total_steps": 204665, "loss": 0.043, "lr": 4.190879374610813e-07, "epoch": 3.6378960740722643, "percentage": 72.76, "elapsed_time": "3:12:42", "remaining_time": "1:12:09", "throughput": 8680.11, "total_tokens": 100363984} +{"current_steps": 148915, "total_steps": 204665, "loss": 0.0, "lr": 4.190185264793292e-07, "epoch": 3.6380182249041115, "percentage": 72.76, "elapsed_time": "3:12:42", "remaining_time": "1:12:08", "throughput": 8680.12, "total_tokens": 100367056} +{"current_steps": 148920, "total_steps": 204665, "loss": 0.0, "lr": 4.189491197226305e-07, "epoch": 3.6381403757359587, "percentage": 72.76, "elapsed_time": "3:12:43", "remaining_time": "1:12:08", "throughput": 8680.15, "total_tokens": 100370384} +{"current_steps": 148925, "total_steps": 204665, "loss": 0.0, "lr": 4.188797171914903e-07, "epoch": 3.638262526567806, "percentage": 72.77, "elapsed_time": "3:12:43", "remaining_time": "1:12:08", "throughput": 8680.19, "total_tokens": 100373968} +{"current_steps": 148930, "total_steps": 204665, "loss": 0.0, "lr": 4.1881031888641285e-07, "epoch": 3.638384677399653, "percentage": 72.77, "elapsed_time": "3:12:43", "remaining_time": "1:12:07", "throughput": 8680.24, "total_tokens": 100377616} +{"current_steps": 148935, "total_steps": 204665, "loss": 0.0, "lr": 4.18740924807903e-07, "epoch": 3.6385068282315003, "percentage": 72.77, "elapsed_time": "3:12:44", "remaining_time": "1:12:07", "throughput": 8680.28, "total_tokens": 100381136} +{"current_steps": 148940, "total_steps": 204665, "loss": 0.0, "lr": 4.186715349564658e-07, "epoch": 3.6386289790633475, "percentage": 72.77, "elapsed_time": "3:12:44", "remaining_time": "1:12:06", "throughput": 8680.32, "total_tokens": 100384592} +{"current_steps": 148945, "total_steps": 204665, "loss": 0.0001, "lr": 4.186021493326053e-07, "epoch": 3.6387511298951947, "percentage": 72.78, "elapsed_time": "3:12:44", "remaining_time": "1:12:06", "throughput": 8680.33, "total_tokens": 100387664} +{"current_steps": 148950, "total_steps": 204665, "loss": 0.0, "lr": 4.185327679368267e-07, "epoch": 3.638873280727042, "percentage": 72.78, "elapsed_time": "3:12:45", "remaining_time": "1:12:06", "throughput": 8680.39, "total_tokens": 100391568} +{"current_steps": 148955, "total_steps": 204665, "loss": 0.0002, "lr": 4.184633907696338e-07, "epoch": 3.638995431558889, "percentage": 72.78, "elapsed_time": "3:12:45", "remaining_time": "1:12:05", "throughput": 8680.41, "total_tokens": 100394704} +{"current_steps": 148960, "total_steps": 204665, "loss": 0.0002, "lr": 4.183940178315315e-07, "epoch": 3.6391175823907362, "percentage": 72.78, "elapsed_time": "3:12:46", "remaining_time": "1:12:05", "throughput": 8680.43, "total_tokens": 100398032} +{"current_steps": 148965, "total_steps": 204665, "loss": 0.0, "lr": 4.183246491230248e-07, "epoch": 3.639239733222583, "percentage": 72.78, "elapsed_time": "3:12:46", "remaining_time": "1:12:04", "throughput": 8680.46, "total_tokens": 100401296} +{"current_steps": 148970, "total_steps": 204665, "loss": 0.0002, "lr": 4.1825528464461725e-07, "epoch": 3.6393618840544306, "percentage": 72.79, "elapsed_time": "3:12:46", "remaining_time": "1:12:04", "throughput": 8680.5, "total_tokens": 100404816} +{"current_steps": 148975, "total_steps": 204665, "loss": 0.0001, "lr": 4.1818592439681413e-07, "epoch": 3.6394840348862774, "percentage": 72.79, "elapsed_time": "3:12:47", "remaining_time": "1:12:04", "throughput": 8680.5, "total_tokens": 100407824} +{"current_steps": 148980, "total_steps": 204665, "loss": 0.0, "lr": 4.1811656838011946e-07, "epoch": 3.639606185718125, "percentage": 72.79, "elapsed_time": "3:12:47", "remaining_time": "1:12:03", "throughput": 8680.51, "total_tokens": 100410832} +{"current_steps": 148985, "total_steps": 204665, "loss": 0.0671, "lr": 4.180472165950373e-07, "epoch": 3.6397283365499717, "percentage": 72.79, "elapsed_time": "3:12:47", "remaining_time": "1:12:03", "throughput": 8680.52, "total_tokens": 100413968} +{"current_steps": 148990, "total_steps": 204665, "loss": 0.0833, "lr": 4.1797786904207254e-07, "epoch": 3.639850487381819, "percentage": 72.8, "elapsed_time": "3:12:48", "remaining_time": "1:12:02", "throughput": 8680.62, "total_tokens": 100418256} +{"current_steps": 148995, "total_steps": 204665, "loss": 0.0, "lr": 4.17908525721729e-07, "epoch": 3.639972638213666, "percentage": 72.8, "elapsed_time": "3:12:48", "remaining_time": "1:12:02", "throughput": 8680.71, "total_tokens": 100422416} +{"current_steps": 149000, "total_steps": 204665, "loss": 0.0003, "lr": 4.178391866345116e-07, "epoch": 3.6400947890455133, "percentage": 72.8, "elapsed_time": "3:12:48", "remaining_time": "1:12:01", "throughput": 8680.76, "total_tokens": 100426000} +{"current_steps": 149005, "total_steps": 204665, "loss": 0.0001, "lr": 4.1776985178092383e-07, "epoch": 3.6402169398773605, "percentage": 72.8, "elapsed_time": "3:12:49", "remaining_time": "1:12:01", "throughput": 8680.77, "total_tokens": 100429200} +{"current_steps": 149010, "total_steps": 204665, "loss": 0.001, "lr": 4.177005211614706e-07, "epoch": 3.6403390907092077, "percentage": 72.81, "elapsed_time": "3:12:49", "remaining_time": "1:12:01", "throughput": 8680.78, "total_tokens": 100432144} +{"current_steps": 149015, "total_steps": 204665, "loss": 0.0, "lr": 4.176311947766555e-07, "epoch": 3.640461241541055, "percentage": 72.81, "elapsed_time": "3:12:49", "remaining_time": "1:12:00", "throughput": 8680.81, "total_tokens": 100435600} +{"current_steps": 149020, "total_steps": 204665, "loss": 0.0, "lr": 4.1756187262698305e-07, "epoch": 3.640583392372902, "percentage": 72.81, "elapsed_time": "3:12:50", "remaining_time": "1:12:00", "throughput": 8680.83, "total_tokens": 100438800} +{"current_steps": 149025, "total_steps": 204665, "loss": 0.0, "lr": 4.1749255471295755e-07, "epoch": 3.6407055432047493, "percentage": 72.81, "elapsed_time": "3:12:50", "remaining_time": "1:11:59", "throughput": 8680.83, "total_tokens": 100441680} +{"current_steps": 149030, "total_steps": 204665, "loss": 0.0, "lr": 4.174232410350826e-07, "epoch": 3.6408276940365965, "percentage": 72.82, "elapsed_time": "3:12:50", "remaining_time": "1:11:59", "throughput": 8680.83, "total_tokens": 100444624} +{"current_steps": 149035, "total_steps": 204665, "loss": 0.0, "lr": 4.173539315938629e-07, "epoch": 3.6409498448684436, "percentage": 72.82, "elapsed_time": "3:12:51", "remaining_time": "1:11:59", "throughput": 8680.85, "total_tokens": 100447888} +{"current_steps": 149040, "total_steps": 204665, "loss": 0.0989, "lr": 4.1728462638980164e-07, "epoch": 3.641071995700291, "percentage": 72.82, "elapsed_time": "3:12:51", "remaining_time": "1:11:58", "throughput": 8680.88, "total_tokens": 100451152} +{"current_steps": 149045, "total_steps": 204665, "loss": 0.0, "lr": 4.172153254234038e-07, "epoch": 3.641194146532138, "percentage": 72.82, "elapsed_time": "3:12:51", "remaining_time": "1:11:58", "throughput": 8680.92, "total_tokens": 100454608} +{"current_steps": 149050, "total_steps": 204665, "loss": 0.0001, "lr": 4.171460286951725e-07, "epoch": 3.641316297363985, "percentage": 72.83, "elapsed_time": "3:12:52", "remaining_time": "1:11:57", "throughput": 8680.95, "total_tokens": 100458000} +{"current_steps": 149055, "total_steps": 204665, "loss": 0.1442, "lr": 4.17076736205612e-07, "epoch": 3.6414384481958324, "percentage": 72.83, "elapsed_time": "3:12:52", "remaining_time": "1:11:57", "throughput": 8681.01, "total_tokens": 100461776} +{"current_steps": 149060, "total_steps": 204665, "loss": 0.0476, "lr": 4.170074479552266e-07, "epoch": 3.641560599027679, "percentage": 72.83, "elapsed_time": "3:12:52", "remaining_time": "1:11:57", "throughput": 8681.01, "total_tokens": 100464848} +{"current_steps": 149065, "total_steps": 204665, "loss": 0.0922, "lr": 4.1693816394451954e-07, "epoch": 3.641682749859527, "percentage": 72.83, "elapsed_time": "3:12:53", "remaining_time": "1:11:56", "throughput": 8681.07, "total_tokens": 100468560} +{"current_steps": 149070, "total_steps": 204665, "loss": 0.0001, "lr": 4.1686888417399537e-07, "epoch": 3.6418049006913735, "percentage": 72.84, "elapsed_time": "3:12:53", "remaining_time": "1:11:56", "throughput": 8681.1, "total_tokens": 100471888} +{"current_steps": 149075, "total_steps": 204665, "loss": 0.0, "lr": 4.167996086441571e-07, "epoch": 3.6419270515232207, "percentage": 72.84, "elapsed_time": "3:12:53", "remaining_time": "1:11:55", "throughput": 8681.14, "total_tokens": 100475472} +{"current_steps": 149080, "total_steps": 204665, "loss": 0.0, "lr": 4.167303373555092e-07, "epoch": 3.642049202355068, "percentage": 72.84, "elapsed_time": "3:12:54", "remaining_time": "1:11:55", "throughput": 8681.16, "total_tokens": 100478608} +{"current_steps": 149085, "total_steps": 204665, "loss": 0.0, "lr": 4.1666107030855535e-07, "epoch": 3.642171353186915, "percentage": 72.84, "elapsed_time": "3:12:54", "remaining_time": "1:11:55", "throughput": 8681.22, "total_tokens": 100482384} +{"current_steps": 149090, "total_steps": 204665, "loss": 0.058, "lr": 4.165918075037986e-07, "epoch": 3.6422935040187623, "percentage": 72.85, "elapsed_time": "3:12:55", "remaining_time": "1:11:54", "throughput": 8681.23, "total_tokens": 100485520} +{"current_steps": 149095, "total_steps": 204665, "loss": 0.0, "lr": 4.1652254894174357e-07, "epoch": 3.6424156548506095, "percentage": 72.85, "elapsed_time": "3:12:55", "remaining_time": "1:11:54", "throughput": 8681.24, "total_tokens": 100488656} +{"current_steps": 149100, "total_steps": 204665, "loss": 0.0716, "lr": 4.1645329462289314e-07, "epoch": 3.6425378056824567, "percentage": 72.85, "elapsed_time": "3:12:55", "remaining_time": "1:11:53", "throughput": 8681.29, "total_tokens": 100492240} +{"current_steps": 149105, "total_steps": 204665, "loss": 0.0001, "lr": 4.163840445477517e-07, "epoch": 3.642659956514304, "percentage": 72.85, "elapsed_time": "3:12:56", "remaining_time": "1:11:53", "throughput": 8681.39, "total_tokens": 100496720} +{"current_steps": 149110, "total_steps": 204665, "loss": 0.0526, "lr": 4.1631479871682195e-07, "epoch": 3.642782107346151, "percentage": 72.86, "elapsed_time": "3:12:56", "remaining_time": "1:11:53", "throughput": 8681.42, "total_tokens": 100500048} +{"current_steps": 149115, "total_steps": 204665, "loss": 0.0, "lr": 4.1624555713060815e-07, "epoch": 3.6429042581779982, "percentage": 72.86, "elapsed_time": "3:12:56", "remaining_time": "1:11:52", "throughput": 8681.44, "total_tokens": 100503184} +{"current_steps": 149120, "total_steps": 204665, "loss": 0.0001, "lr": 4.1617631978961396e-07, "epoch": 3.6430264090098454, "percentage": 72.86, "elapsed_time": "3:12:57", "remaining_time": "1:11:52", "throughput": 8681.46, "total_tokens": 100506384} +{"current_steps": 149125, "total_steps": 204665, "loss": 0.0, "lr": 4.1610708669434224e-07, "epoch": 3.6431485598416926, "percentage": 72.86, "elapsed_time": "3:12:57", "remaining_time": "1:11:51", "throughput": 8681.48, "total_tokens": 100509584} +{"current_steps": 149130, "total_steps": 204665, "loss": 0.0002, "lr": 4.1603785784529724e-07, "epoch": 3.64327071067354, "percentage": 72.87, "elapsed_time": "3:12:57", "remaining_time": "1:11:51", "throughput": 8681.48, "total_tokens": 100512592} +{"current_steps": 149135, "total_steps": 204665, "loss": 0.0, "lr": 4.1596863324298157e-07, "epoch": 3.643392861505387, "percentage": 72.87, "elapsed_time": "3:12:58", "remaining_time": "1:11:51", "throughput": 8681.51, "total_tokens": 100515920} +{"current_steps": 149140, "total_steps": 204665, "loss": 0.0557, "lr": 4.158994128878994e-07, "epoch": 3.643515012337234, "percentage": 72.87, "elapsed_time": "3:12:58", "remaining_time": "1:11:50", "throughput": 8681.53, "total_tokens": 100519184} +{"current_steps": 149145, "total_steps": 204665, "loss": 0.0, "lr": 4.158301967805535e-07, "epoch": 3.643637163169081, "percentage": 72.87, "elapsed_time": "3:12:58", "remaining_time": "1:11:50", "throughput": 8681.56, "total_tokens": 100522640} +{"current_steps": 149150, "total_steps": 204665, "loss": 0.0002, "lr": 4.1576098492144763e-07, "epoch": 3.6437593140009286, "percentage": 72.88, "elapsed_time": "3:12:59", "remaining_time": "1:11:49", "throughput": 8681.64, "total_tokens": 100526672} +{"current_steps": 149155, "total_steps": 204665, "loss": 0.0739, "lr": 4.1569177731108526e-07, "epoch": 3.6438814648327753, "percentage": 72.88, "elapsed_time": "3:12:59", "remaining_time": "1:11:49", "throughput": 8681.7, "total_tokens": 100530512} +{"current_steps": 149160, "total_steps": 204665, "loss": 0.0001, "lr": 4.1562257394996913e-07, "epoch": 3.644003615664623, "percentage": 72.88, "elapsed_time": "3:12:59", "remaining_time": "1:11:49", "throughput": 8681.71, "total_tokens": 100533584} +{"current_steps": 149165, "total_steps": 204665, "loss": 0.0, "lr": 4.155533748386032e-07, "epoch": 3.6441257664964697, "percentage": 72.88, "elapsed_time": "3:13:00", "remaining_time": "1:11:48", "throughput": 8681.74, "total_tokens": 100536976} +{"current_steps": 149170, "total_steps": 204665, "loss": 0.0, "lr": 4.1548417997749e-07, "epoch": 3.644247917328317, "percentage": 72.88, "elapsed_time": "3:13:00", "remaining_time": "1:11:48", "throughput": 8681.76, "total_tokens": 100540304} +{"current_steps": 149175, "total_steps": 204665, "loss": 0.0, "lr": 4.154149893671334e-07, "epoch": 3.644370068160164, "percentage": 72.89, "elapsed_time": "3:13:00", "remaining_time": "1:11:47", "throughput": 8681.77, "total_tokens": 100543376} +{"current_steps": 149180, "total_steps": 204665, "loss": 0.0, "lr": 4.153458030080358e-07, "epoch": 3.6444922189920113, "percentage": 72.89, "elapsed_time": "3:13:01", "remaining_time": "1:11:47", "throughput": 8681.81, "total_tokens": 100546832} +{"current_steps": 149185, "total_steps": 204665, "loss": 0.0001, "lr": 4.1527662090070113e-07, "epoch": 3.6446143698238584, "percentage": 72.89, "elapsed_time": "3:13:01", "remaining_time": "1:11:47", "throughput": 8681.85, "total_tokens": 100550352} +{"current_steps": 149190, "total_steps": 204665, "loss": 0.0, "lr": 4.1520744304563185e-07, "epoch": 3.6447365206557056, "percentage": 72.89, "elapsed_time": "3:13:02", "remaining_time": "1:11:46", "throughput": 8681.86, "total_tokens": 100553488} +{"current_steps": 149195, "total_steps": 204665, "loss": 0.0002, "lr": 4.151382694433316e-07, "epoch": 3.644858671487553, "percentage": 72.9, "elapsed_time": "3:13:02", "remaining_time": "1:11:46", "throughput": 8681.88, "total_tokens": 100556688} +{"current_steps": 149200, "total_steps": 204665, "loss": 0.0001, "lr": 4.150691000943033e-07, "epoch": 3.6449808223194, "percentage": 72.9, "elapsed_time": "3:13:02", "remaining_time": "1:11:45", "throughput": 8681.92, "total_tokens": 100560208} +{"current_steps": 149205, "total_steps": 204665, "loss": 0.0347, "lr": 4.149999349990494e-07, "epoch": 3.645102973151247, "percentage": 72.9, "elapsed_time": "3:13:03", "remaining_time": "1:11:45", "throughput": 8681.93, "total_tokens": 100563344} +{"current_steps": 149210, "total_steps": 204665, "loss": 0.0001, "lr": 4.149307741580733e-07, "epoch": 3.6452251239830944, "percentage": 72.9, "elapsed_time": "3:13:03", "remaining_time": "1:11:45", "throughput": 8681.93, "total_tokens": 100566224} +{"current_steps": 149215, "total_steps": 204665, "loss": 0.0001, "lr": 4.148616175718783e-07, "epoch": 3.6453472748149416, "percentage": 72.91, "elapsed_time": "3:13:03", "remaining_time": "1:11:44", "throughput": 8681.94, "total_tokens": 100569488} +{"current_steps": 149220, "total_steps": 204665, "loss": 0.0093, "lr": 4.1479246524096676e-07, "epoch": 3.6454694256467888, "percentage": 72.91, "elapsed_time": "3:13:04", "remaining_time": "1:11:44", "throughput": 8681.98, "total_tokens": 100572880} +{"current_steps": 149225, "total_steps": 204665, "loss": 0.0, "lr": 4.147233171658421e-07, "epoch": 3.645591576478636, "percentage": 72.91, "elapsed_time": "3:13:04", "remaining_time": "1:11:43", "throughput": 8681.98, "total_tokens": 100575824} +{"current_steps": 149230, "total_steps": 204665, "loss": 0.0451, "lr": 4.146541733470066e-07, "epoch": 3.6457137273104827, "percentage": 72.91, "elapsed_time": "3:13:04", "remaining_time": "1:11:43", "throughput": 8682.03, "total_tokens": 100579600} +{"current_steps": 149235, "total_steps": 204665, "loss": 0.0, "lr": 4.145850337849637e-07, "epoch": 3.6458358781423303, "percentage": 72.92, "elapsed_time": "3:13:05", "remaining_time": "1:11:43", "throughput": 8682.09, "total_tokens": 100583376} +{"current_steps": 149240, "total_steps": 204665, "loss": 0.0614, "lr": 4.145158984802155e-07, "epoch": 3.645958028974177, "percentage": 72.92, "elapsed_time": "3:13:05", "remaining_time": "1:11:42", "throughput": 8682.11, "total_tokens": 100586640} +{"current_steps": 149245, "total_steps": 204665, "loss": 0.0, "lr": 4.144467674332651e-07, "epoch": 3.6460801798060247, "percentage": 72.92, "elapsed_time": "3:13:05", "remaining_time": "1:11:42", "throughput": 8682.15, "total_tokens": 100590160} +{"current_steps": 149250, "total_steps": 204665, "loss": 0.0, "lr": 4.143776406446158e-07, "epoch": 3.6462023306378715, "percentage": 72.92, "elapsed_time": "3:13:06", "remaining_time": "1:11:41", "throughput": 8682.18, "total_tokens": 100593488} +{"current_steps": 149255, "total_steps": 204665, "loss": 0.0001, "lr": 4.143085181147694e-07, "epoch": 3.6463244814697187, "percentage": 72.93, "elapsed_time": "3:13:06", "remaining_time": "1:11:41", "throughput": 8682.18, "total_tokens": 100596432} +{"current_steps": 149260, "total_steps": 204665, "loss": 0.0002, "lr": 4.142393998442294e-07, "epoch": 3.646446632301566, "percentage": 72.93, "elapsed_time": "3:13:06", "remaining_time": "1:11:41", "throughput": 8682.2, "total_tokens": 100599696} +{"current_steps": 149265, "total_steps": 204665, "loss": 0.0, "lr": 4.1417028583349766e-07, "epoch": 3.646568783133413, "percentage": 72.93, "elapsed_time": "3:13:07", "remaining_time": "1:11:40", "throughput": 8682.23, "total_tokens": 100603088} +{"current_steps": 149270, "total_steps": 204665, "loss": 0.0, "lr": 4.1410117608307716e-07, "epoch": 3.6466909339652602, "percentage": 72.93, "elapsed_time": "3:13:07", "remaining_time": "1:11:40", "throughput": 8682.25, "total_tokens": 100606352} +{"current_steps": 149275, "total_steps": 204665, "loss": 0.0, "lr": 4.140320705934708e-07, "epoch": 3.6468130847971074, "percentage": 72.94, "elapsed_time": "3:13:07", "remaining_time": "1:11:39", "throughput": 8682.29, "total_tokens": 100609808} +{"current_steps": 149280, "total_steps": 204665, "loss": 0.0, "lr": 4.1396296936518047e-07, "epoch": 3.6469352356289546, "percentage": 72.94, "elapsed_time": "3:13:08", "remaining_time": "1:11:39", "throughput": 8682.31, "total_tokens": 100613072} +{"current_steps": 149285, "total_steps": 204665, "loss": 0.0648, "lr": 4.1389387239870945e-07, "epoch": 3.647057386460802, "percentage": 72.94, "elapsed_time": "3:13:08", "remaining_time": "1:11:39", "throughput": 8682.33, "total_tokens": 100616336} +{"current_steps": 149290, "total_steps": 204665, "loss": 0.0003, "lr": 4.138247796945599e-07, "epoch": 3.647179537292649, "percentage": 72.94, "elapsed_time": "3:13:08", "remaining_time": "1:11:38", "throughput": 8682.34, "total_tokens": 100619408} +{"current_steps": 149295, "total_steps": 204665, "loss": 0.0005, "lr": 4.1375569125323374e-07, "epoch": 3.647301688124496, "percentage": 72.95, "elapsed_time": "3:13:09", "remaining_time": "1:11:38", "throughput": 8682.38, "total_tokens": 100622928} +{"current_steps": 149300, "total_steps": 204665, "loss": 0.0, "lr": 4.136866070752343e-07, "epoch": 3.6474238389563434, "percentage": 72.95, "elapsed_time": "3:13:09", "remaining_time": "1:11:37", "throughput": 8682.4, "total_tokens": 100626128} +{"current_steps": 149305, "total_steps": 204665, "loss": 0.0801, "lr": 4.1361752716106315e-07, "epoch": 3.6475459897881906, "percentage": 72.95, "elapsed_time": "3:13:10", "remaining_time": "1:11:37", "throughput": 8682.43, "total_tokens": 100629584} +{"current_steps": 149310, "total_steps": 204665, "loss": 0.0563, "lr": 4.1354845151122344e-07, "epoch": 3.6476681406200377, "percentage": 72.95, "elapsed_time": "3:13:10", "remaining_time": "1:11:36", "throughput": 8682.45, "total_tokens": 100632848} +{"current_steps": 149315, "total_steps": 204665, "loss": 0.0, "lr": 4.1347938012621675e-07, "epoch": 3.647790291451885, "percentage": 72.96, "elapsed_time": "3:13:10", "remaining_time": "1:11:36", "throughput": 8682.5, "total_tokens": 100636432} +{"current_steps": 149320, "total_steps": 204665, "loss": 0.0, "lr": 4.1341031300654615e-07, "epoch": 3.647912442283732, "percentage": 72.96, "elapsed_time": "3:13:11", "remaining_time": "1:11:36", "throughput": 8682.54, "total_tokens": 100639952} +{"current_steps": 149325, "total_steps": 204665, "loss": 0.0, "lr": 4.1334125015271316e-07, "epoch": 3.648034593115579, "percentage": 72.96, "elapsed_time": "3:13:11", "remaining_time": "1:11:35", "throughput": 8682.57, "total_tokens": 100643344} +{"current_steps": 149330, "total_steps": 204665, "loss": 0.0001, "lr": 4.1327219156522043e-07, "epoch": 3.6481567439474265, "percentage": 72.96, "elapsed_time": "3:13:11", "remaining_time": "1:11:35", "throughput": 8682.59, "total_tokens": 100646608} +{"current_steps": 149335, "total_steps": 204665, "loss": 0.0, "lr": 4.1320313724457046e-07, "epoch": 3.6482788947792733, "percentage": 72.97, "elapsed_time": "3:13:12", "remaining_time": "1:11:34", "throughput": 8682.64, "total_tokens": 100650320} +{"current_steps": 149340, "total_steps": 204665, "loss": 0.0, "lr": 4.1313408719126475e-07, "epoch": 3.648401045611121, "percentage": 72.97, "elapsed_time": "3:13:12", "remaining_time": "1:11:34", "throughput": 8682.67, "total_tokens": 100653648} +{"current_steps": 149345, "total_steps": 204665, "loss": 0.0, "lr": 4.130650414058061e-07, "epoch": 3.6485231964429676, "percentage": 72.97, "elapsed_time": "3:13:12", "remaining_time": "1:11:34", "throughput": 8682.68, "total_tokens": 100656784} +{"current_steps": 149350, "total_steps": 204665, "loss": 0.0001, "lr": 4.1299599988869606e-07, "epoch": 3.648645347274815, "percentage": 72.97, "elapsed_time": "3:13:13", "remaining_time": "1:11:33", "throughput": 8682.72, "total_tokens": 100660304} +{"current_steps": 149355, "total_steps": 204665, "loss": 0.0001, "lr": 4.1292696264043724e-07, "epoch": 3.648767498106662, "percentage": 72.98, "elapsed_time": "3:13:13", "remaining_time": "1:11:33", "throughput": 8682.73, "total_tokens": 100663312} +{"current_steps": 149360, "total_steps": 204665, "loss": 0.0, "lr": 4.128579296615312e-07, "epoch": 3.648889648938509, "percentage": 72.98, "elapsed_time": "3:13:13", "remaining_time": "1:11:32", "throughput": 8682.74, "total_tokens": 100666448} +{"current_steps": 149365, "total_steps": 204665, "loss": 0.0, "lr": 4.127889009524802e-07, "epoch": 3.6490117997703564, "percentage": 72.98, "elapsed_time": "3:13:14", "remaining_time": "1:11:32", "throughput": 8682.76, "total_tokens": 100669712} +{"current_steps": 149370, "total_steps": 204665, "loss": 0.0009, "lr": 4.127198765137866e-07, "epoch": 3.6491339506022036, "percentage": 72.98, "elapsed_time": "3:13:14", "remaining_time": "1:11:32", "throughput": 8682.78, "total_tokens": 100672976} +{"current_steps": 149375, "total_steps": 204665, "loss": 0.0, "lr": 4.1265085634595167e-07, "epoch": 3.6492561014340508, "percentage": 72.99, "elapsed_time": "3:13:14", "remaining_time": "1:11:31", "throughput": 8682.85, "total_tokens": 100676944} +{"current_steps": 149380, "total_steps": 204665, "loss": 0.0, "lr": 4.125818404494781e-07, "epoch": 3.649378252265898, "percentage": 72.99, "elapsed_time": "3:13:15", "remaining_time": "1:11:31", "throughput": 8682.91, "total_tokens": 100680784} +{"current_steps": 149385, "total_steps": 204665, "loss": 0.0009, "lr": 4.125128288248669e-07, "epoch": 3.649500403097745, "percentage": 72.99, "elapsed_time": "3:13:15", "remaining_time": "1:11:30", "throughput": 8682.92, "total_tokens": 100683856} +{"current_steps": 149390, "total_steps": 204665, "loss": 0.0002, "lr": 4.1244382147262093e-07, "epoch": 3.6496225539295923, "percentage": 72.99, "elapsed_time": "3:13:15", "remaining_time": "1:11:30", "throughput": 8682.96, "total_tokens": 100687376} +{"current_steps": 149395, "total_steps": 204665, "loss": 0.0, "lr": 4.123748183932414e-07, "epoch": 3.6497447047614395, "percentage": 72.99, "elapsed_time": "3:13:16", "remaining_time": "1:11:30", "throughput": 8683.0, "total_tokens": 100690832} +{"current_steps": 149400, "total_steps": 204665, "loss": 0.0001, "lr": 4.1230581958723e-07, "epoch": 3.6498668555932867, "percentage": 73.0, "elapsed_time": "3:13:16", "remaining_time": "1:11:29", "throughput": 8683.05, "total_tokens": 100694544} +{"current_steps": 149405, "total_steps": 204665, "loss": 0.0003, "lr": 4.122368250550889e-07, "epoch": 3.649989006425134, "percentage": 73.0, "elapsed_time": "3:13:17", "remaining_time": "1:11:29", "throughput": 8683.06, "total_tokens": 100697616} +{"current_steps": 149410, "total_steps": 204665, "loss": 0.0, "lr": 4.121678347973195e-07, "epoch": 3.6501111572569807, "percentage": 73.0, "elapsed_time": "3:13:17", "remaining_time": "1:11:28", "throughput": 8683.08, "total_tokens": 100700816} +{"current_steps": 149415, "total_steps": 204665, "loss": 0.0162, "lr": 4.1209884881442413e-07, "epoch": 3.6502333080888283, "percentage": 73.0, "elapsed_time": "3:13:17", "remaining_time": "1:11:28", "throughput": 8683.09, "total_tokens": 100703824} +{"current_steps": 149420, "total_steps": 204665, "loss": 0.0, "lr": 4.1202986710690356e-07, "epoch": 3.650355458920675, "percentage": 73.01, "elapsed_time": "3:13:18", "remaining_time": "1:11:28", "throughput": 8683.13, "total_tokens": 100707408} +{"current_steps": 149425, "total_steps": 204665, "loss": 0.0527, "lr": 4.1196088967526e-07, "epoch": 3.6504776097525227, "percentage": 73.01, "elapsed_time": "3:13:18", "remaining_time": "1:11:27", "throughput": 8683.18, "total_tokens": 100711056} +{"current_steps": 149430, "total_steps": 204665, "loss": 0.0, "lr": 4.1189191651999546e-07, "epoch": 3.6505997605843694, "percentage": 73.01, "elapsed_time": "3:13:18", "remaining_time": "1:11:27", "throughput": 8683.21, "total_tokens": 100714448} +{"current_steps": 149435, "total_steps": 204665, "loss": 0.0348, "lr": 4.118229476416106e-07, "epoch": 3.6507219114162166, "percentage": 73.01, "elapsed_time": "3:13:19", "remaining_time": "1:11:26", "throughput": 8683.24, "total_tokens": 100717840} +{"current_steps": 149440, "total_steps": 204665, "loss": 0.0, "lr": 4.1175398304060783e-07, "epoch": 3.650844062248064, "percentage": 73.02, "elapsed_time": "3:13:19", "remaining_time": "1:11:26", "throughput": 8683.29, "total_tokens": 100721424} +{"current_steps": 149445, "total_steps": 204665, "loss": 0.0, "lr": 4.1168502271748794e-07, "epoch": 3.650966213079911, "percentage": 73.02, "elapsed_time": "3:13:19", "remaining_time": "1:11:26", "throughput": 8683.33, "total_tokens": 100725008} +{"current_steps": 149450, "total_steps": 204665, "loss": 0.0, "lr": 4.1161606667275327e-07, "epoch": 3.651088363911758, "percentage": 73.02, "elapsed_time": "3:13:20", "remaining_time": "1:11:25", "throughput": 8683.38, "total_tokens": 100728656} +{"current_steps": 149455, "total_steps": 204665, "loss": 0.0, "lr": 4.1154711490690443e-07, "epoch": 3.6512105147436054, "percentage": 73.02, "elapsed_time": "3:13:20", "remaining_time": "1:11:25", "throughput": 8683.39, "total_tokens": 100731664} +{"current_steps": 149460, "total_steps": 204665, "loss": 0.0, "lr": 4.1147816742044317e-07, "epoch": 3.6513326655754526, "percentage": 73.03, "elapsed_time": "3:13:20", "remaining_time": "1:11:24", "throughput": 8683.4, "total_tokens": 100734800} +{"current_steps": 149465, "total_steps": 204665, "loss": 0.0, "lr": 4.1140922421387137e-07, "epoch": 3.6514548164072997, "percentage": 73.03, "elapsed_time": "3:13:21", "remaining_time": "1:11:24", "throughput": 8683.45, "total_tokens": 100738384} +{"current_steps": 149470, "total_steps": 204665, "loss": 0.0, "lr": 4.113402852876897e-07, "epoch": 3.651576967239147, "percentage": 73.03, "elapsed_time": "3:13:21", "remaining_time": "1:11:24", "throughput": 8683.48, "total_tokens": 100741712} +{"current_steps": 149475, "total_steps": 204665, "loss": 0.0, "lr": 4.1127135064240003e-07, "epoch": 3.651699118070994, "percentage": 73.03, "elapsed_time": "3:13:21", "remaining_time": "1:11:23", "throughput": 8683.49, "total_tokens": 100744848} +{"current_steps": 149480, "total_steps": 204665, "loss": 0.0, "lr": 4.112024202785033e-07, "epoch": 3.6518212689028413, "percentage": 73.04, "elapsed_time": "3:13:22", "remaining_time": "1:11:23", "throughput": 8683.52, "total_tokens": 100748176} +{"current_steps": 149485, "total_steps": 204665, "loss": 0.0, "lr": 4.1113349419650113e-07, "epoch": 3.6519434197346885, "percentage": 73.04, "elapsed_time": "3:13:22", "remaining_time": "1:11:22", "throughput": 8683.56, "total_tokens": 100751696} +{"current_steps": 149490, "total_steps": 204665, "loss": 0.0, "lr": 4.1106457239689417e-07, "epoch": 3.6520655705665357, "percentage": 73.04, "elapsed_time": "3:13:22", "remaining_time": "1:11:22", "throughput": 8683.61, "total_tokens": 100755408} +{"current_steps": 149495, "total_steps": 204665, "loss": 0.0, "lr": 4.109956548801845e-07, "epoch": 3.652187721398383, "percentage": 73.04, "elapsed_time": "3:13:23", "remaining_time": "1:11:22", "throughput": 8683.62, "total_tokens": 100758416} +{"current_steps": 149500, "total_steps": 204665, "loss": 0.0, "lr": 4.1092674164687247e-07, "epoch": 3.65230987223023, "percentage": 73.05, "elapsed_time": "3:13:23", "remaining_time": "1:11:21", "throughput": 8683.62, "total_tokens": 100761296} +{"current_steps": 149505, "total_steps": 204665, "loss": 0.0, "lr": 4.1085783269746e-07, "epoch": 3.652432023062077, "percentage": 73.05, "elapsed_time": "3:13:23", "remaining_time": "1:11:21", "throughput": 8683.63, "total_tokens": 100764432} +{"current_steps": 149510, "total_steps": 204665, "loss": 0.0, "lr": 4.107889280324478e-07, "epoch": 3.6525541738939244, "percentage": 73.05, "elapsed_time": "3:13:24", "remaining_time": "1:11:20", "throughput": 8683.66, "total_tokens": 100767824} +{"current_steps": 149515, "total_steps": 204665, "loss": 0.0, "lr": 4.107200276523367e-07, "epoch": 3.652676324725771, "percentage": 73.05, "elapsed_time": "3:13:24", "remaining_time": "1:11:20", "throughput": 8683.71, "total_tokens": 100771344} +{"current_steps": 149520, "total_steps": 204665, "loss": 0.0001, "lr": 4.10651131557628e-07, "epoch": 3.6527984755576184, "percentage": 73.06, "elapsed_time": "3:13:24", "remaining_time": "1:11:20", "throughput": 8683.75, "total_tokens": 100774864} +{"current_steps": 149525, "total_steps": 204665, "loss": 0.0313, "lr": 4.105822397488231e-07, "epoch": 3.6529206263894656, "percentage": 73.06, "elapsed_time": "3:13:25", "remaining_time": "1:11:19", "throughput": 8683.75, "total_tokens": 100777808} +{"current_steps": 149530, "total_steps": 204665, "loss": 0.0, "lr": 4.1051335222642247e-07, "epoch": 3.6530427772213128, "percentage": 73.06, "elapsed_time": "3:13:25", "remaining_time": "1:11:19", "throughput": 8683.81, "total_tokens": 100781712} +{"current_steps": 149535, "total_steps": 204665, "loss": 0.0, "lr": 4.1044446899092756e-07, "epoch": 3.65316492805316, "percentage": 73.06, "elapsed_time": "3:13:26", "remaining_time": "1:11:18", "throughput": 8683.86, "total_tokens": 100785360} +{"current_steps": 149540, "total_steps": 204665, "loss": 0.0672, "lr": 4.1037559004283863e-07, "epoch": 3.653287078885007, "percentage": 73.07, "elapsed_time": "3:13:26", "remaining_time": "1:11:18", "throughput": 8683.87, "total_tokens": 100788368} +{"current_steps": 149545, "total_steps": 204665, "loss": 0.0004, "lr": 4.103067153826575e-07, "epoch": 3.6534092297168543, "percentage": 73.07, "elapsed_time": "3:13:26", "remaining_time": "1:11:18", "throughput": 8683.9, "total_tokens": 100791824} +{"current_steps": 149550, "total_steps": 204665, "loss": 0.0, "lr": 4.1023784501088407e-07, "epoch": 3.6535313805487015, "percentage": 73.07, "elapsed_time": "3:13:27", "remaining_time": "1:11:17", "throughput": 8683.95, "total_tokens": 100795344} +{"current_steps": 149555, "total_steps": 204665, "loss": 0.0, "lr": 4.101689789280197e-07, "epoch": 3.6536535313805487, "percentage": 73.07, "elapsed_time": "3:13:27", "remaining_time": "1:11:17", "throughput": 8683.98, "total_tokens": 100798800} +{"current_steps": 149560, "total_steps": 204665, "loss": 0.0002, "lr": 4.101001171345655e-07, "epoch": 3.653775682212396, "percentage": 73.08, "elapsed_time": "3:13:27", "remaining_time": "1:11:16", "throughput": 8684.03, "total_tokens": 100802448} +{"current_steps": 149565, "total_steps": 204665, "loss": 0.0, "lr": 4.100312596310216e-07, "epoch": 3.653897833044243, "percentage": 73.08, "elapsed_time": "3:13:28", "remaining_time": "1:11:16", "throughput": 8684.07, "total_tokens": 100806032} +{"current_steps": 149570, "total_steps": 204665, "loss": 0.0, "lr": 4.0996240641788936e-07, "epoch": 3.6540199838760903, "percentage": 73.08, "elapsed_time": "3:13:28", "remaining_time": "1:11:16", "throughput": 8684.1, "total_tokens": 100809360} +{"current_steps": 149575, "total_steps": 204665, "loss": 0.0, "lr": 4.0989355749566887e-07, "epoch": 3.6541421347079375, "percentage": 73.08, "elapsed_time": "3:13:28", "remaining_time": "1:11:15", "throughput": 8684.14, "total_tokens": 100812880} +{"current_steps": 149580, "total_steps": 204665, "loss": 0.0536, "lr": 4.098247128648611e-07, "epoch": 3.6542642855397847, "percentage": 73.09, "elapsed_time": "3:13:29", "remaining_time": "1:11:15", "throughput": 8684.19, "total_tokens": 100816528} +{"current_steps": 149585, "total_steps": 204665, "loss": 0.0, "lr": 4.097558725259672e-07, "epoch": 3.654386436371632, "percentage": 73.09, "elapsed_time": "3:13:29", "remaining_time": "1:11:14", "throughput": 8684.21, "total_tokens": 100819664} +{"current_steps": 149590, "total_steps": 204665, "loss": 0.0, "lr": 4.09687036479487e-07, "epoch": 3.6545085872034786, "percentage": 73.09, "elapsed_time": "3:13:29", "remaining_time": "1:11:14", "throughput": 8684.27, "total_tokens": 100823504} +{"current_steps": 149595, "total_steps": 204665, "loss": 0.0002, "lr": 4.0961820472592167e-07, "epoch": 3.6546307380353262, "percentage": 73.09, "elapsed_time": "3:13:30", "remaining_time": "1:11:14", "throughput": 8684.31, "total_tokens": 100827024} +{"current_steps": 149600, "total_steps": 204665, "loss": 0.0455, "lr": 4.0954937726577165e-07, "epoch": 3.654752888867173, "percentage": 73.1, "elapsed_time": "3:13:30", "remaining_time": "1:11:13", "throughput": 8684.33, "total_tokens": 100830352} +{"current_steps": 149605, "total_steps": 204665, "loss": 0.0, "lr": 4.094805540995371e-07, "epoch": 3.6548750396990206, "percentage": 73.1, "elapsed_time": "3:13:30", "remaining_time": "1:11:13", "throughput": 8684.38, "total_tokens": 100834000} +{"current_steps": 149610, "total_steps": 204665, "loss": 0.0474, "lr": 4.09411735227719e-07, "epoch": 3.6549971905308674, "percentage": 73.1, "elapsed_time": "3:13:31", "remaining_time": "1:11:12", "throughput": 8684.39, "total_tokens": 100837008} +{"current_steps": 149615, "total_steps": 204665, "loss": 0.0267, "lr": 4.0934292065081733e-07, "epoch": 3.6551193413627145, "percentage": 73.1, "elapsed_time": "3:13:31", "remaining_time": "1:11:12", "throughput": 8684.45, "total_tokens": 100840848} +{"current_steps": 149620, "total_steps": 204665, "loss": 0.0, "lr": 4.0927411036933314e-07, "epoch": 3.6552414921945617, "percentage": 73.1, "elapsed_time": "3:13:32", "remaining_time": "1:11:12", "throughput": 8684.46, "total_tokens": 100843984} +{"current_steps": 149625, "total_steps": 204665, "loss": 0.0, "lr": 4.092053043837661e-07, "epoch": 3.655363643026409, "percentage": 73.11, "elapsed_time": "3:13:32", "remaining_time": "1:11:11", "throughput": 8684.5, "total_tokens": 100847504} +{"current_steps": 149630, "total_steps": 204665, "loss": 0.0001, "lr": 4.091365026946174e-07, "epoch": 3.655485793858256, "percentage": 73.11, "elapsed_time": "3:13:32", "remaining_time": "1:11:11", "throughput": 8684.52, "total_tokens": 100850640} +{"current_steps": 149635, "total_steps": 204665, "loss": 0.0, "lr": 4.0906770530238667e-07, "epoch": 3.6556079446901033, "percentage": 73.11, "elapsed_time": "3:13:33", "remaining_time": "1:11:10", "throughput": 8684.6, "total_tokens": 100854736} +{"current_steps": 149640, "total_steps": 204665, "loss": 0.0, "lr": 4.089989122075748e-07, "epoch": 3.6557300955219505, "percentage": 73.11, "elapsed_time": "3:13:33", "remaining_time": "1:11:10", "throughput": 8684.62, "total_tokens": 100858000} +{"current_steps": 149645, "total_steps": 204665, "loss": 0.1434, "lr": 4.0893012341068146e-07, "epoch": 3.6558522463537977, "percentage": 73.12, "elapsed_time": "3:13:33", "remaining_time": "1:11:10", "throughput": 8684.64, "total_tokens": 100861136} +{"current_steps": 149650, "total_steps": 204665, "loss": 0.0, "lr": 4.088613389122072e-07, "epoch": 3.655974397185645, "percentage": 73.12, "elapsed_time": "3:13:34", "remaining_time": "1:11:09", "throughput": 8684.69, "total_tokens": 100864848} +{"current_steps": 149655, "total_steps": 204665, "loss": 0.0012, "lr": 4.087925587126527e-07, "epoch": 3.656096548017492, "percentage": 73.12, "elapsed_time": "3:13:34", "remaining_time": "1:11:09", "throughput": 8684.72, "total_tokens": 100868240} +{"current_steps": 149660, "total_steps": 204665, "loss": 0.0003, "lr": 4.087237828125174e-07, "epoch": 3.6562186988493393, "percentage": 73.12, "elapsed_time": "3:13:34", "remaining_time": "1:11:08", "throughput": 8684.77, "total_tokens": 100871888} +{"current_steps": 149665, "total_steps": 204665, "loss": 0.0, "lr": 4.0865501121230205e-07, "epoch": 3.6563408496811864, "percentage": 73.13, "elapsed_time": "3:13:35", "remaining_time": "1:11:08", "throughput": 8684.81, "total_tokens": 100875344} +{"current_steps": 149670, "total_steps": 204665, "loss": 0.0003, "lr": 4.085862439125063e-07, "epoch": 3.6564630005130336, "percentage": 73.13, "elapsed_time": "3:13:35", "remaining_time": "1:11:08", "throughput": 8684.82, "total_tokens": 100878544} +{"current_steps": 149675, "total_steps": 204665, "loss": 0.0, "lr": 4.0851748091363036e-07, "epoch": 3.656585151344881, "percentage": 73.13, "elapsed_time": "3:13:35", "remaining_time": "1:11:07", "throughput": 8684.84, "total_tokens": 100881616} +{"current_steps": 149680, "total_steps": 204665, "loss": 0.0001, "lr": 4.084487222161748e-07, "epoch": 3.656707302176728, "percentage": 73.13, "elapsed_time": "3:13:36", "remaining_time": "1:11:07", "throughput": 8684.87, "total_tokens": 100885072} +{"current_steps": 149685, "total_steps": 204665, "loss": 0.0, "lr": 4.0837996782063876e-07, "epoch": 3.6568294530085748, "percentage": 73.14, "elapsed_time": "3:13:36", "remaining_time": "1:11:06", "throughput": 8684.91, "total_tokens": 100888592} +{"current_steps": 149690, "total_steps": 204665, "loss": 0.0005, "lr": 4.083112177275232e-07, "epoch": 3.6569516038404224, "percentage": 73.14, "elapsed_time": "3:13:36", "remaining_time": "1:11:06", "throughput": 8684.94, "total_tokens": 100891984} +{"current_steps": 149695, "total_steps": 204665, "loss": 0.0309, "lr": 4.082424719373272e-07, "epoch": 3.657073754672269, "percentage": 73.14, "elapsed_time": "3:13:37", "remaining_time": "1:11:06", "throughput": 8685.0, "total_tokens": 100895760} +{"current_steps": 149700, "total_steps": 204665, "loss": 0.0, "lr": 4.0817373045055134e-07, "epoch": 3.6571959055041163, "percentage": 73.14, "elapsed_time": "3:13:37", "remaining_time": "1:11:05", "throughput": 8685.06, "total_tokens": 100899600} +{"current_steps": 149705, "total_steps": 204665, "loss": 0.0974, "lr": 4.0810499326769557e-07, "epoch": 3.6573180563359635, "percentage": 73.15, "elapsed_time": "3:13:37", "remaining_time": "1:11:05", "throughput": 8685.07, "total_tokens": 100902672} +{"current_steps": 149710, "total_steps": 204665, "loss": 0.0, "lr": 4.080362603892589e-07, "epoch": 3.6574402071678107, "percentage": 73.15, "elapsed_time": "3:13:38", "remaining_time": "1:11:04", "throughput": 8685.09, "total_tokens": 100905872} +{"current_steps": 149715, "total_steps": 204665, "loss": 0.0, "lr": 4.079675318157423e-07, "epoch": 3.657562357999658, "percentage": 73.15, "elapsed_time": "3:13:38", "remaining_time": "1:11:04", "throughput": 8685.12, "total_tokens": 100909200} +{"current_steps": 149720, "total_steps": 204665, "loss": 0.0, "lr": 4.078988075476445e-07, "epoch": 3.657684508831505, "percentage": 73.15, "elapsed_time": "3:13:38", "remaining_time": "1:11:03", "throughput": 8685.12, "total_tokens": 100912144} +{"current_steps": 149725, "total_steps": 204665, "loss": 0.0, "lr": 4.0783008758546633e-07, "epoch": 3.6578066596633523, "percentage": 73.16, "elapsed_time": "3:13:39", "remaining_time": "1:11:03", "throughput": 8685.15, "total_tokens": 100915536} +{"current_steps": 149730, "total_steps": 204665, "loss": 0.0, "lr": 4.0776137192970664e-07, "epoch": 3.6579288104951995, "percentage": 73.16, "elapsed_time": "3:13:39", "remaining_time": "1:11:03", "throughput": 8685.16, "total_tokens": 100918544} +{"current_steps": 149735, "total_steps": 204665, "loss": 0.0, "lr": 4.076926605808655e-07, "epoch": 3.6580509613270467, "percentage": 73.16, "elapsed_time": "3:13:40", "remaining_time": "1:11:02", "throughput": 8685.18, "total_tokens": 100921744} +{"current_steps": 149740, "total_steps": 204665, "loss": 0.0, "lr": 4.0762395353944303e-07, "epoch": 3.658173112158894, "percentage": 73.16, "elapsed_time": "3:13:40", "remaining_time": "1:11:02", "throughput": 8685.19, "total_tokens": 100924944} +{"current_steps": 149745, "total_steps": 204665, "loss": 0.0002, "lr": 4.075552508059382e-07, "epoch": 3.658295262990741, "percentage": 73.17, "elapsed_time": "3:13:40", "remaining_time": "1:11:01", "throughput": 8685.25, "total_tokens": 100928656} +{"current_steps": 149750, "total_steps": 204665, "loss": 0.0, "lr": 4.0748655238085115e-07, "epoch": 3.6584174138225882, "percentage": 73.17, "elapsed_time": "3:13:41", "remaining_time": "1:11:01", "throughput": 8685.27, "total_tokens": 100931920} +{"current_steps": 149755, "total_steps": 204665, "loss": 0.0, "lr": 4.074178582646811e-07, "epoch": 3.6585395646544354, "percentage": 73.17, "elapsed_time": "3:13:41", "remaining_time": "1:11:01", "throughput": 8685.3, "total_tokens": 100935312} +{"current_steps": 149760, "total_steps": 204665, "loss": 0.0001, "lr": 4.07349168457928e-07, "epoch": 3.6586617154862826, "percentage": 73.17, "elapsed_time": "3:13:41", "remaining_time": "1:11:00", "throughput": 8685.34, "total_tokens": 100938832} +{"current_steps": 149765, "total_steps": 204665, "loss": 0.0, "lr": 4.0728048296109084e-07, "epoch": 3.65878386631813, "percentage": 73.18, "elapsed_time": "3:13:42", "remaining_time": "1:11:00", "throughput": 8685.37, "total_tokens": 100942096} +{"current_steps": 149770, "total_steps": 204665, "loss": 0.0001, "lr": 4.072118017746694e-07, "epoch": 3.6589060171499765, "percentage": 73.18, "elapsed_time": "3:13:42", "remaining_time": "1:10:59", "throughput": 8685.4, "total_tokens": 100945424} +{"current_steps": 149775, "total_steps": 204665, "loss": 0.0, "lr": 4.0714312489916347e-07, "epoch": 3.659028167981824, "percentage": 73.18, "elapsed_time": "3:13:42", "remaining_time": "1:10:59", "throughput": 8685.43, "total_tokens": 100948880} +{"current_steps": 149780, "total_steps": 204665, "loss": 0.0, "lr": 4.07074452335072e-07, "epoch": 3.659150318813671, "percentage": 73.18, "elapsed_time": "3:13:43", "remaining_time": "1:10:59", "throughput": 8685.61, "total_tokens": 100954448} +{"current_steps": 149785, "total_steps": 204665, "loss": 0.0, "lr": 4.0700578408289477e-07, "epoch": 3.6592724696455186, "percentage": 73.19, "elapsed_time": "3:13:43", "remaining_time": "1:10:58", "throughput": 8685.67, "total_tokens": 100958288} +{"current_steps": 149790, "total_steps": 204665, "loss": 0.0514, "lr": 4.069371201431308e-07, "epoch": 3.6593946204773653, "percentage": 73.19, "elapsed_time": "3:13:43", "remaining_time": "1:10:58", "throughput": 8685.67, "total_tokens": 100961296} +{"current_steps": 149795, "total_steps": 204665, "loss": 0.0, "lr": 4.068684605162798e-07, "epoch": 3.6595167713092125, "percentage": 73.19, "elapsed_time": "3:13:44", "remaining_time": "1:10:57", "throughput": 8685.69, "total_tokens": 100964432} +{"current_steps": 149800, "total_steps": 204665, "loss": 0.0, "lr": 4.067998052028406e-07, "epoch": 3.6596389221410597, "percentage": 73.19, "elapsed_time": "3:13:44", "remaining_time": "1:10:57", "throughput": 8685.72, "total_tokens": 100967824} +{"current_steps": 149805, "total_steps": 204665, "loss": 0.0001, "lr": 4.0673115420331315e-07, "epoch": 3.659761072972907, "percentage": 73.2, "elapsed_time": "3:13:44", "remaining_time": "1:10:57", "throughput": 8685.77, "total_tokens": 100971472} +{"current_steps": 149810, "total_steps": 204665, "loss": 0.0, "lr": 4.0666250751819597e-07, "epoch": 3.659883223804754, "percentage": 73.2, "elapsed_time": "3:13:45", "remaining_time": "1:10:56", "throughput": 8685.8, "total_tokens": 100974864} +{"current_steps": 149815, "total_steps": 204665, "loss": 0.0, "lr": 4.0659386514798887e-07, "epoch": 3.6600053746366012, "percentage": 73.2, "elapsed_time": "3:13:45", "remaining_time": "1:10:56", "throughput": 8685.85, "total_tokens": 100978576} +{"current_steps": 149820, "total_steps": 204665, "loss": 0.0, "lr": 4.065252270931909e-07, "epoch": 3.6601275254684484, "percentage": 73.2, "elapsed_time": "3:13:45", "remaining_time": "1:10:55", "throughput": 8685.89, "total_tokens": 100982032} +{"current_steps": 149825, "total_steps": 204665, "loss": 0.0, "lr": 4.0645659335430073e-07, "epoch": 3.6602496763002956, "percentage": 73.2, "elapsed_time": "3:13:46", "remaining_time": "1:10:55", "throughput": 8685.91, "total_tokens": 100985296} +{"current_steps": 149830, "total_steps": 204665, "loss": 0.0001, "lr": 4.063879639318178e-07, "epoch": 3.660371827132143, "percentage": 73.21, "elapsed_time": "3:13:46", "remaining_time": "1:10:55", "throughput": 8685.93, "total_tokens": 100988496} +{"current_steps": 149835, "total_steps": 204665, "loss": 0.0, "lr": 4.063193388262417e-07, "epoch": 3.66049397796399, "percentage": 73.21, "elapsed_time": "3:13:47", "remaining_time": "1:10:54", "throughput": 8685.94, "total_tokens": 100991696} +{"current_steps": 149840, "total_steps": 204665, "loss": 0.0006, "lr": 4.062507180380707e-07, "epoch": 3.660616128795837, "percentage": 73.21, "elapsed_time": "3:13:47", "remaining_time": "1:10:54", "throughput": 8685.98, "total_tokens": 100995088} +{"current_steps": 149845, "total_steps": 204665, "loss": 0.0, "lr": 4.061821015678044e-07, "epoch": 3.6607382796276844, "percentage": 73.21, "elapsed_time": "3:13:47", "remaining_time": "1:10:53", "throughput": 8685.99, "total_tokens": 100998224} +{"current_steps": 149850, "total_steps": 204665, "loss": 0.0, "lr": 4.061134894159413e-07, "epoch": 3.6608604304595316, "percentage": 73.22, "elapsed_time": "3:13:48", "remaining_time": "1:10:53", "throughput": 8686.03, "total_tokens": 101001872} +{"current_steps": 149855, "total_steps": 204665, "loss": 0.0001, "lr": 4.06044881582981e-07, "epoch": 3.6609825812913783, "percentage": 73.22, "elapsed_time": "3:13:48", "remaining_time": "1:10:53", "throughput": 8686.07, "total_tokens": 101005328} +{"current_steps": 149860, "total_steps": 204665, "loss": 0.0, "lr": 4.059762780694217e-07, "epoch": 3.661104732123226, "percentage": 73.22, "elapsed_time": "3:13:48", "remaining_time": "1:10:52", "throughput": 8686.1, "total_tokens": 101008720} +{"current_steps": 149865, "total_steps": 204665, "loss": 0.0002, "lr": 4.059076788757627e-07, "epoch": 3.6612268829550727, "percentage": 73.22, "elapsed_time": "3:13:49", "remaining_time": "1:10:52", "throughput": 8686.13, "total_tokens": 101012112} +{"current_steps": 149870, "total_steps": 204665, "loss": 0.0, "lr": 4.058390840025032e-07, "epoch": 3.6613490337869203, "percentage": 73.23, "elapsed_time": "3:13:49", "remaining_time": "1:10:51", "throughput": 8686.14, "total_tokens": 101015248} +{"current_steps": 149875, "total_steps": 204665, "loss": 0.06, "lr": 4.0577049345014137e-07, "epoch": 3.661471184618767, "percentage": 73.23, "elapsed_time": "3:13:49", "remaining_time": "1:10:51", "throughput": 8686.14, "total_tokens": 101018128} +{"current_steps": 149880, "total_steps": 204665, "loss": 0.0, "lr": 4.057019072191766e-07, "epoch": 3.6615933354506143, "percentage": 73.23, "elapsed_time": "3:13:50", "remaining_time": "1:10:51", "throughput": 8686.15, "total_tokens": 101021136} +{"current_steps": 149885, "total_steps": 204665, "loss": 0.0003, "lr": 4.056333253101072e-07, "epoch": 3.6617154862824615, "percentage": 73.23, "elapsed_time": "3:13:50", "remaining_time": "1:10:50", "throughput": 8686.2, "total_tokens": 101024848} +{"current_steps": 149890, "total_steps": 204665, "loss": 0.0003, "lr": 4.0556474772343194e-07, "epoch": 3.6618376371143087, "percentage": 73.24, "elapsed_time": "3:13:50", "remaining_time": "1:10:50", "throughput": 8686.22, "total_tokens": 101028176} +{"current_steps": 149895, "total_steps": 204665, "loss": 0.0, "lr": 4.0549617445965023e-07, "epoch": 3.661959787946156, "percentage": 73.24, "elapsed_time": "3:13:51", "remaining_time": "1:10:49", "throughput": 8686.23, "total_tokens": 101031248} +{"current_steps": 149900, "total_steps": 204665, "loss": 0.0366, "lr": 4.0542760551925983e-07, "epoch": 3.662081938778003, "percentage": 73.24, "elapsed_time": "3:13:51", "remaining_time": "1:10:49", "throughput": 8686.29, "total_tokens": 101034960} +{"current_steps": 149905, "total_steps": 204665, "loss": 0.0, "lr": 4.053590409027602e-07, "epoch": 3.66220408960985, "percentage": 73.24, "elapsed_time": "3:13:51", "remaining_time": "1:10:49", "throughput": 8686.31, "total_tokens": 101038224} +{"current_steps": 149910, "total_steps": 204665, "loss": 0.0, "lr": 4.052904806106495e-07, "epoch": 3.6623262404416974, "percentage": 73.25, "elapsed_time": "3:13:52", "remaining_time": "1:10:48", "throughput": 8686.32, "total_tokens": 101041360} +{"current_steps": 149915, "total_steps": 204665, "loss": 0.0001, "lr": 4.052219246434261e-07, "epoch": 3.6624483912735446, "percentage": 73.25, "elapsed_time": "3:13:52", "remaining_time": "1:10:48", "throughput": 8686.36, "total_tokens": 101044816} +{"current_steps": 149920, "total_steps": 204665, "loss": 0.0, "lr": 4.0515337300158914e-07, "epoch": 3.662570542105392, "percentage": 73.25, "elapsed_time": "3:13:52", "remaining_time": "1:10:47", "throughput": 8686.36, "total_tokens": 101047824} +{"current_steps": 149925, "total_steps": 204665, "loss": 0.0975, "lr": 4.050848256856365e-07, "epoch": 3.662692692937239, "percentage": 73.25, "elapsed_time": "3:13:53", "remaining_time": "1:10:47", "throughput": 8686.4, "total_tokens": 101051280} +{"current_steps": 149930, "total_steps": 204665, "loss": 0.0, "lr": 4.0501628269606735e-07, "epoch": 3.662814843769086, "percentage": 73.26, "elapsed_time": "3:13:53", "remaining_time": "1:10:47", "throughput": 8686.45, "total_tokens": 101055056} +{"current_steps": 149935, "total_steps": 204665, "loss": 0.001, "lr": 4.049477440333795e-07, "epoch": 3.6629369946009334, "percentage": 73.26, "elapsed_time": "3:13:53", "remaining_time": "1:10:46", "throughput": 8686.49, "total_tokens": 101058512} +{"current_steps": 149940, "total_steps": 204665, "loss": 0.0299, "lr": 4.04879209698072e-07, "epoch": 3.6630591454327806, "percentage": 73.26, "elapsed_time": "3:13:54", "remaining_time": "1:10:46", "throughput": 8686.5, "total_tokens": 101061648} +{"current_steps": 149945, "total_steps": 204665, "loss": 0.0, "lr": 4.048106796906426e-07, "epoch": 3.6631812962646277, "percentage": 73.26, "elapsed_time": "3:13:54", "remaining_time": "1:10:45", "throughput": 8686.56, "total_tokens": 101065360} +{"current_steps": 149950, "total_steps": 204665, "loss": 0.0, "lr": 4.047421540115905e-07, "epoch": 3.6633034470964745, "percentage": 73.27, "elapsed_time": "3:13:55", "remaining_time": "1:10:45", "throughput": 8686.6, "total_tokens": 101068880} +{"current_steps": 149955, "total_steps": 204665, "loss": 0.0, "lr": 4.0467363266141317e-07, "epoch": 3.663425597928322, "percentage": 73.27, "elapsed_time": "3:13:55", "remaining_time": "1:10:45", "throughput": 8686.6, "total_tokens": 101071888} +{"current_steps": 149960, "total_steps": 204665, "loss": 0.0, "lr": 4.046051156406093e-07, "epoch": 3.663547748760169, "percentage": 73.27, "elapsed_time": "3:13:55", "remaining_time": "1:10:44", "throughput": 8686.65, "total_tokens": 101075472} +{"current_steps": 149965, "total_steps": 204665, "loss": 0.0, "lr": 4.045366029496774e-07, "epoch": 3.6636698995920165, "percentage": 73.27, "elapsed_time": "3:13:56", "remaining_time": "1:10:44", "throughput": 8686.67, "total_tokens": 101078672} +{"current_steps": 149970, "total_steps": 204665, "loss": 0.0001, "lr": 4.044680945891152e-07, "epoch": 3.6637920504238632, "percentage": 73.28, "elapsed_time": "3:13:56", "remaining_time": "1:10:43", "throughput": 8686.69, "total_tokens": 101082064} +{"current_steps": 149975, "total_steps": 204665, "loss": 0.0, "lr": 4.0439959055942163e-07, "epoch": 3.6639142012557104, "percentage": 73.28, "elapsed_time": "3:13:56", "remaining_time": "1:10:43", "throughput": 8686.73, "total_tokens": 101085520} +{"current_steps": 149980, "total_steps": 204665, "loss": 0.0875, "lr": 4.0433109086109407e-07, "epoch": 3.6640363520875576, "percentage": 73.28, "elapsed_time": "3:13:57", "remaining_time": "1:10:43", "throughput": 8686.77, "total_tokens": 101089168} +{"current_steps": 149985, "total_steps": 204665, "loss": 0.0, "lr": 4.042625954946309e-07, "epoch": 3.664158502919405, "percentage": 73.28, "elapsed_time": "3:13:57", "remaining_time": "1:10:42", "throughput": 8686.81, "total_tokens": 101092560} +{"current_steps": 149990, "total_steps": 204665, "loss": 0.0839, "lr": 4.0419410446053095e-07, "epoch": 3.664280653751252, "percentage": 73.29, "elapsed_time": "3:13:57", "remaining_time": "1:10:42", "throughput": 8686.87, "total_tokens": 101096400} +{"current_steps": 149995, "total_steps": 204665, "loss": 0.0001, "lr": 4.0412561775929123e-07, "epoch": 3.664402804583099, "percentage": 73.29, "elapsed_time": "3:13:58", "remaining_time": "1:10:41", "throughput": 8686.9, "total_tokens": 101099856} +{"current_steps": 150000, "total_steps": 204665, "loss": 0.0, "lr": 4.040571353914106e-07, "epoch": 3.6645249554149464, "percentage": 73.29, "elapsed_time": "3:13:58", "remaining_time": "1:10:41", "throughput": 8686.92, "total_tokens": 101102992} +{"current_steps": 150005, "total_steps": 204665, "loss": 0.0, "lr": 4.039886573573864e-07, "epoch": 3.6646471062467936, "percentage": 73.29, "elapsed_time": "3:13:58", "remaining_time": "1:10:41", "throughput": 8686.94, "total_tokens": 101106320} +{"current_steps": 150010, "total_steps": 204665, "loss": 0.0, "lr": 4.039201836577175e-07, "epoch": 3.6647692570786408, "percentage": 73.3, "elapsed_time": "3:13:59", "remaining_time": "1:10:40", "throughput": 8686.95, "total_tokens": 101109328} +{"current_steps": 150015, "total_steps": 204665, "loss": 0.0001, "lr": 4.038517142929012e-07, "epoch": 3.664891407910488, "percentage": 73.3, "elapsed_time": "3:13:59", "remaining_time": "1:10:40", "throughput": 8686.96, "total_tokens": 101112400} +{"current_steps": 150020, "total_steps": 204665, "loss": 0.0, "lr": 4.037832492634353e-07, "epoch": 3.665013558742335, "percentage": 73.3, "elapsed_time": "3:13:59", "remaining_time": "1:10:39", "throughput": 8686.96, "total_tokens": 101115408} +{"current_steps": 150025, "total_steps": 204665, "loss": 0.0, "lr": 4.0371478856981834e-07, "epoch": 3.6651357095741823, "percentage": 73.3, "elapsed_time": "3:14:00", "remaining_time": "1:10:39", "throughput": 8686.99, "total_tokens": 101118800} +{"current_steps": 150030, "total_steps": 204665, "loss": 0.0, "lr": 4.036463322125474e-07, "epoch": 3.6652578604060295, "percentage": 73.31, "elapsed_time": "3:14:00", "remaining_time": "1:10:39", "throughput": 8687.01, "total_tokens": 101122064} +{"current_steps": 150035, "total_steps": 204665, "loss": 0.0813, "lr": 4.0357788019212116e-07, "epoch": 3.6653800112378763, "percentage": 73.31, "elapsed_time": "3:14:00", "remaining_time": "1:10:38", "throughput": 8687.06, "total_tokens": 101125776} +{"current_steps": 150040, "total_steps": 204665, "loss": 0.0, "lr": 4.0350943250903657e-07, "epoch": 3.665502162069724, "percentage": 73.31, "elapsed_time": "3:14:01", "remaining_time": "1:10:38", "throughput": 8687.1, "total_tokens": 101129168} +{"current_steps": 150045, "total_steps": 204665, "loss": 0.0001, "lr": 4.034409891637919e-07, "epoch": 3.6656243129015706, "percentage": 73.31, "elapsed_time": "3:14:01", "remaining_time": "1:10:37", "throughput": 8687.15, "total_tokens": 101132880} +{"current_steps": 150050, "total_steps": 204665, "loss": 0.0, "lr": 4.033725501568851e-07, "epoch": 3.6657464637334183, "percentage": 73.31, "elapsed_time": "3:14:02", "remaining_time": "1:10:37", "throughput": 8687.18, "total_tokens": 101136272} +{"current_steps": 150055, "total_steps": 204665, "loss": 0.0, "lr": 4.0330411548881325e-07, "epoch": 3.665868614565265, "percentage": 73.32, "elapsed_time": "3:14:02", "remaining_time": "1:10:37", "throughput": 8687.21, "total_tokens": 101139728} +{"current_steps": 150060, "total_steps": 204665, "loss": 0.0, "lr": 4.032356851600748e-07, "epoch": 3.665990765397112, "percentage": 73.32, "elapsed_time": "3:14:02", "remaining_time": "1:10:36", "throughput": 8687.21, "total_tokens": 101142608} +{"current_steps": 150065, "total_steps": 204665, "loss": 0.0004, "lr": 4.0316725917116645e-07, "epoch": 3.6661129162289594, "percentage": 73.32, "elapsed_time": "3:14:03", "remaining_time": "1:10:36", "throughput": 8687.24, "total_tokens": 101145936} +{"current_steps": 150070, "total_steps": 204665, "loss": 0.0, "lr": 4.030988375225868e-07, "epoch": 3.6662350670608066, "percentage": 73.32, "elapsed_time": "3:14:03", "remaining_time": "1:10:35", "throughput": 8687.3, "total_tokens": 101149840} +{"current_steps": 150075, "total_steps": 204665, "loss": 0.0, "lr": 4.0303042021483256e-07, "epoch": 3.666357217892654, "percentage": 73.33, "elapsed_time": "3:14:03", "remaining_time": "1:10:35", "throughput": 8687.33, "total_tokens": 101153168} +{"current_steps": 150080, "total_steps": 204665, "loss": 0.0, "lr": 4.029620072484017e-07, "epoch": 3.666479368724501, "percentage": 73.33, "elapsed_time": "3:14:04", "remaining_time": "1:10:35", "throughput": 8687.36, "total_tokens": 101156560} +{"current_steps": 150085, "total_steps": 204665, "loss": 0.0, "lr": 4.02893598623792e-07, "epoch": 3.666601519556348, "percentage": 73.33, "elapsed_time": "3:14:04", "remaining_time": "1:10:34", "throughput": 8687.41, "total_tokens": 101160208} +{"current_steps": 150090, "total_steps": 204665, "loss": 0.0, "lr": 4.028251943415003e-07, "epoch": 3.6667236703881954, "percentage": 73.33, "elapsed_time": "3:14:04", "remaining_time": "1:10:34", "throughput": 8687.45, "total_tokens": 101163792} +{"current_steps": 150095, "total_steps": 204665, "loss": 0.0, "lr": 4.027567944020248e-07, "epoch": 3.6668458212200425, "percentage": 73.34, "elapsed_time": "3:14:05", "remaining_time": "1:10:33", "throughput": 8687.49, "total_tokens": 101167184} +{"current_steps": 150100, "total_steps": 204665, "loss": 0.0269, "lr": 4.0268839880586214e-07, "epoch": 3.6669679720518897, "percentage": 73.34, "elapsed_time": "3:14:05", "remaining_time": "1:10:33", "throughput": 8687.51, "total_tokens": 101170448} +{"current_steps": 150105, "total_steps": 204665, "loss": 0.0, "lr": 4.026200075535104e-07, "epoch": 3.667090122883737, "percentage": 73.34, "elapsed_time": "3:14:05", "remaining_time": "1:10:33", "throughput": 8687.53, "total_tokens": 101173712} +{"current_steps": 150110, "total_steps": 204665, "loss": 0.0, "lr": 4.0255162064546644e-07, "epoch": 3.667212273715584, "percentage": 73.34, "elapsed_time": "3:14:06", "remaining_time": "1:10:32", "throughput": 8687.57, "total_tokens": 101177296} +{"current_steps": 150115, "total_steps": 204665, "loss": 0.0008, "lr": 4.0248323808222803e-07, "epoch": 3.6673344245474313, "percentage": 73.35, "elapsed_time": "3:14:06", "remaining_time": "1:10:32", "throughput": 8687.63, "total_tokens": 101181072} +{"current_steps": 150120, "total_steps": 204665, "loss": 0.0, "lr": 4.024148598642919e-07, "epoch": 3.6674565753792785, "percentage": 73.35, "elapsed_time": "3:14:06", "remaining_time": "1:10:31", "throughput": 8687.63, "total_tokens": 101184016} +{"current_steps": 150125, "total_steps": 204665, "loss": 0.0, "lr": 4.0234648599215606e-07, "epoch": 3.6675787262111257, "percentage": 73.35, "elapsed_time": "3:14:07", "remaining_time": "1:10:31", "throughput": 8687.62, "total_tokens": 101186832} +{"current_steps": 150130, "total_steps": 204665, "loss": 0.0321, "lr": 4.022781164663173e-07, "epoch": 3.6677008770429724, "percentage": 73.35, "elapsed_time": "3:14:07", "remaining_time": "1:10:31", "throughput": 8687.63, "total_tokens": 101189968} +{"current_steps": 150135, "total_steps": 204665, "loss": 0.0001, "lr": 4.0220975128727244e-07, "epoch": 3.66782302787482, "percentage": 73.36, "elapsed_time": "3:14:07", "remaining_time": "1:10:30", "throughput": 8687.66, "total_tokens": 101193232} +{"current_steps": 150140, "total_steps": 204665, "loss": 0.0334, "lr": 4.02141390455519e-07, "epoch": 3.667945178706667, "percentage": 73.36, "elapsed_time": "3:14:08", "remaining_time": "1:10:30", "throughput": 8687.67, "total_tokens": 101196368} +{"current_steps": 150145, "total_steps": 204665, "loss": 0.0002, "lr": 4.0207303397155467e-07, "epoch": 3.668067329538514, "percentage": 73.36, "elapsed_time": "3:14:08", "remaining_time": "1:10:29", "throughput": 8687.71, "total_tokens": 101199824} +{"current_steps": 150150, "total_steps": 204665, "loss": 0.0001, "lr": 4.020046818358755e-07, "epoch": 3.668189480370361, "percentage": 73.36, "elapsed_time": "3:14:08", "remaining_time": "1:10:29", "throughput": 8687.73, "total_tokens": 101203088} +{"current_steps": 150155, "total_steps": 204665, "loss": 0.1163, "lr": 4.0193633404897973e-07, "epoch": 3.6683116312022084, "percentage": 73.37, "elapsed_time": "3:14:09", "remaining_time": "1:10:28", "throughput": 8687.79, "total_tokens": 101206800} +{"current_steps": 150160, "total_steps": 204665, "loss": 0.1, "lr": 4.0186799061136334e-07, "epoch": 3.6684337820340556, "percentage": 73.37, "elapsed_time": "3:14:09", "remaining_time": "1:10:28", "throughput": 8687.8, "total_tokens": 101209872} +{"current_steps": 150165, "total_steps": 204665, "loss": 0.0268, "lr": 4.0179965152352413e-07, "epoch": 3.6685559328659028, "percentage": 73.37, "elapsed_time": "3:14:10", "remaining_time": "1:10:28", "throughput": 8687.85, "total_tokens": 101213648} +{"current_steps": 150170, "total_steps": 204665, "loss": 0.0001, "lr": 4.0173131678595837e-07, "epoch": 3.66867808369775, "percentage": 73.37, "elapsed_time": "3:14:10", "remaining_time": "1:10:27", "throughput": 8687.87, "total_tokens": 101216912} +{"current_steps": 150175, "total_steps": 204665, "loss": 0.0371, "lr": 4.016629863991634e-07, "epoch": 3.668800234529597, "percentage": 73.38, "elapsed_time": "3:14:10", "remaining_time": "1:10:27", "throughput": 8687.96, "total_tokens": 101221136} +{"current_steps": 150180, "total_steps": 204665, "loss": 0.0526, "lr": 4.0159466036363654e-07, "epoch": 3.6689223853614443, "percentage": 73.38, "elapsed_time": "3:14:11", "remaining_time": "1:10:26", "throughput": 8688.01, "total_tokens": 101224720} +{"current_steps": 150185, "total_steps": 204665, "loss": 0.0, "lr": 4.0152633867987383e-07, "epoch": 3.6690445361932915, "percentage": 73.38, "elapsed_time": "3:14:11", "remaining_time": "1:10:26", "throughput": 8688.05, "total_tokens": 101228368} +{"current_steps": 150190, "total_steps": 204665, "loss": 0.0, "lr": 4.01458021348373e-07, "epoch": 3.6691666870251387, "percentage": 73.38, "elapsed_time": "3:14:11", "remaining_time": "1:10:26", "throughput": 8688.09, "total_tokens": 101231888} +{"current_steps": 150195, "total_steps": 204665, "loss": 0.0383, "lr": 4.0138970836963006e-07, "epoch": 3.669288837856986, "percentage": 73.39, "elapsed_time": "3:14:12", "remaining_time": "1:10:25", "throughput": 8688.11, "total_tokens": 101235088} +{"current_steps": 150200, "total_steps": 204665, "loss": 0.0201, "lr": 4.0132139974414247e-07, "epoch": 3.669410988688833, "percentage": 73.39, "elapsed_time": "3:14:12", "remaining_time": "1:10:25", "throughput": 8688.13, "total_tokens": 101238352} +{"current_steps": 150205, "total_steps": 204665, "loss": 0.0, "lr": 4.012530954724064e-07, "epoch": 3.6695331395206803, "percentage": 73.39, "elapsed_time": "3:14:12", "remaining_time": "1:10:24", "throughput": 8688.15, "total_tokens": 101241552} +{"current_steps": 150210, "total_steps": 204665, "loss": 0.075, "lr": 4.011847955549188e-07, "epoch": 3.6696552903525275, "percentage": 73.39, "elapsed_time": "3:14:13", "remaining_time": "1:10:24", "throughput": 8688.17, "total_tokens": 101244816} +{"current_steps": 150215, "total_steps": 204665, "loss": 0.0001, "lr": 4.011164999921768e-07, "epoch": 3.669777441184374, "percentage": 73.4, "elapsed_time": "3:14:13", "remaining_time": "1:10:24", "throughput": 8688.19, "total_tokens": 101248080} +{"current_steps": 150220, "total_steps": 204665, "loss": 0.0, "lr": 4.010482087846766e-07, "epoch": 3.669899592016222, "percentage": 73.4, "elapsed_time": "3:14:13", "remaining_time": "1:10:23", "throughput": 8688.25, "total_tokens": 101251856} +{"current_steps": 150225, "total_steps": 204665, "loss": 0.0, "lr": 4.0097992193291474e-07, "epoch": 3.6700217428480686, "percentage": 73.4, "elapsed_time": "3:14:14", "remaining_time": "1:10:23", "throughput": 8688.29, "total_tokens": 101255376} +{"current_steps": 150230, "total_steps": 204665, "loss": 0.0, "lr": 4.0091163943738825e-07, "epoch": 3.6701438936799162, "percentage": 73.4, "elapsed_time": "3:14:14", "remaining_time": "1:10:22", "throughput": 8688.31, "total_tokens": 101258640} +{"current_steps": 150235, "total_steps": 204665, "loss": 0.0, "lr": 4.008433612985931e-07, "epoch": 3.670266044511763, "percentage": 73.41, "elapsed_time": "3:14:14", "remaining_time": "1:10:22", "throughput": 8688.31, "total_tokens": 101261648} +{"current_steps": 150240, "total_steps": 204665, "loss": 0.0373, "lr": 4.007750875170266e-07, "epoch": 3.67038819534361, "percentage": 73.41, "elapsed_time": "3:14:15", "remaining_time": "1:10:22", "throughput": 8688.34, "total_tokens": 101264976} +{"current_steps": 150245, "total_steps": 204665, "loss": 0.0002, "lr": 4.007068180931844e-07, "epoch": 3.6705103461754574, "percentage": 73.41, "elapsed_time": "3:14:15", "remaining_time": "1:10:21", "throughput": 8688.36, "total_tokens": 101268176} +{"current_steps": 150250, "total_steps": 204665, "loss": 0.0457, "lr": 4.006385530275638e-07, "epoch": 3.6706324970073045, "percentage": 73.41, "elapsed_time": "3:14:15", "remaining_time": "1:10:21", "throughput": 8688.36, "total_tokens": 101271120} +{"current_steps": 150255, "total_steps": 204665, "loss": 0.0, "lr": 4.005702923206605e-07, "epoch": 3.6707546478391517, "percentage": 73.42, "elapsed_time": "3:14:16", "remaining_time": "1:10:20", "throughput": 8688.38, "total_tokens": 101274320} +{"current_steps": 150260, "total_steps": 204665, "loss": 0.0, "lr": 4.005020359729716e-07, "epoch": 3.670876798670999, "percentage": 73.42, "elapsed_time": "3:14:16", "remaining_time": "1:10:20", "throughput": 8688.42, "total_tokens": 101277840} +{"current_steps": 150265, "total_steps": 204665, "loss": 0.0, "lr": 4.0043378398499286e-07, "epoch": 3.670998949502846, "percentage": 73.42, "elapsed_time": "3:14:16", "remaining_time": "1:10:20", "throughput": 8688.42, "total_tokens": 101280784} +{"current_steps": 150270, "total_steps": 204665, "loss": 0.0001, "lr": 4.0036553635722083e-07, "epoch": 3.6711211003346933, "percentage": 73.42, "elapsed_time": "3:14:17", "remaining_time": "1:10:19", "throughput": 8688.45, "total_tokens": 101284176} +{"current_steps": 150275, "total_steps": 204665, "loss": 0.0, "lr": 4.0029729309015224e-07, "epoch": 3.6712432511665405, "percentage": 73.42, "elapsed_time": "3:14:17", "remaining_time": "1:10:19", "throughput": 8688.45, "total_tokens": 101287184} +{"current_steps": 150280, "total_steps": 204665, "loss": 0.0, "lr": 4.0022905418428275e-07, "epoch": 3.6713654019983877, "percentage": 73.43, "elapsed_time": "3:14:18", "remaining_time": "1:10:18", "throughput": 8688.48, "total_tokens": 101290512} +{"current_steps": 150285, "total_steps": 204665, "loss": 0.0001, "lr": 4.0016081964010927e-07, "epoch": 3.671487552830235, "percentage": 73.43, "elapsed_time": "3:14:18", "remaining_time": "1:10:18", "throughput": 8688.56, "total_tokens": 101294608} +{"current_steps": 150290, "total_steps": 204665, "loss": 0.0, "lr": 4.000925894581272e-07, "epoch": 3.671609703662082, "percentage": 73.43, "elapsed_time": "3:14:18", "remaining_time": "1:10:18", "throughput": 8688.6, "total_tokens": 101298128} +{"current_steps": 150295, "total_steps": 204665, "loss": 0.0, "lr": 4.000243636388332e-07, "epoch": 3.6717318544939292, "percentage": 73.43, "elapsed_time": "3:14:19", "remaining_time": "1:10:17", "throughput": 8688.63, "total_tokens": 101301456} +{"current_steps": 150300, "total_steps": 204665, "loss": 0.0, "lr": 3.9995614218272377e-07, "epoch": 3.671854005325776, "percentage": 73.44, "elapsed_time": "3:14:19", "remaining_time": "1:10:17", "throughput": 8688.67, "total_tokens": 101305040} +{"current_steps": 150305, "total_steps": 204665, "loss": 0.0002, "lr": 3.9988792509029435e-07, "epoch": 3.6719761561576236, "percentage": 73.44, "elapsed_time": "3:14:19", "remaining_time": "1:10:16", "throughput": 8688.69, "total_tokens": 101308240} +{"current_steps": 150310, "total_steps": 204665, "loss": 0.0, "lr": 3.998197123620417e-07, "epoch": 3.6720983069894704, "percentage": 73.44, "elapsed_time": "3:14:20", "remaining_time": "1:10:16", "throughput": 8688.71, "total_tokens": 101311504} +{"current_steps": 150315, "total_steps": 204665, "loss": 0.0, "lr": 3.997515039984611e-07, "epoch": 3.672220457821318, "percentage": 73.44, "elapsed_time": "3:14:20", "remaining_time": "1:10:16", "throughput": 8688.74, "total_tokens": 101314896} +{"current_steps": 150320, "total_steps": 204665, "loss": 0.0001, "lr": 3.9968330000004944e-07, "epoch": 3.6723426086531648, "percentage": 73.45, "elapsed_time": "3:14:20", "remaining_time": "1:10:15", "throughput": 8688.75, "total_tokens": 101318032} +{"current_steps": 150325, "total_steps": 204665, "loss": 0.0002, "lr": 3.9961510036730227e-07, "epoch": 3.672464759485012, "percentage": 73.45, "elapsed_time": "3:14:21", "remaining_time": "1:10:15", "throughput": 8688.81, "total_tokens": 101321808} +{"current_steps": 150330, "total_steps": 204665, "loss": 0.0001, "lr": 3.995469051007152e-07, "epoch": 3.672586910316859, "percentage": 73.45, "elapsed_time": "3:14:21", "remaining_time": "1:10:14", "throughput": 8688.82, "total_tokens": 101325008} +{"current_steps": 150335, "total_steps": 204665, "loss": 0.0, "lr": 3.9947871420078495e-07, "epoch": 3.6727090611487063, "percentage": 73.45, "elapsed_time": "3:14:21", "remaining_time": "1:10:14", "throughput": 8688.84, "total_tokens": 101328144} +{"current_steps": 150340, "total_steps": 204665, "loss": 0.0, "lr": 3.9941052766800656e-07, "epoch": 3.6728312119805535, "percentage": 73.46, "elapsed_time": "3:14:22", "remaining_time": "1:10:14", "throughput": 8688.87, "total_tokens": 101331536} +{"current_steps": 150345, "total_steps": 204665, "loss": 0.0, "lr": 3.993423455028767e-07, "epoch": 3.6729533628124007, "percentage": 73.46, "elapsed_time": "3:14:22", "remaining_time": "1:10:13", "throughput": 8688.91, "total_tokens": 101335120} +{"current_steps": 150350, "total_steps": 204665, "loss": 0.0451, "lr": 3.992741677058906e-07, "epoch": 3.673075513644248, "percentage": 73.46, "elapsed_time": "3:14:22", "remaining_time": "1:10:13", "throughput": 8688.93, "total_tokens": 101338320} +{"current_steps": 150355, "total_steps": 204665, "loss": 0.0, "lr": 3.9920599427754465e-07, "epoch": 3.673197664476095, "percentage": 73.46, "elapsed_time": "3:14:23", "remaining_time": "1:10:12", "throughput": 8688.96, "total_tokens": 101341776} +{"current_steps": 150360, "total_steps": 204665, "loss": 0.0002, "lr": 3.991378252183339e-07, "epoch": 3.6733198153079423, "percentage": 73.47, "elapsed_time": "3:14:23", "remaining_time": "1:10:12", "throughput": 8689.03, "total_tokens": 101345680} +{"current_steps": 150365, "total_steps": 204665, "loss": 0.0, "lr": 3.9906966052875457e-07, "epoch": 3.6734419661397895, "percentage": 73.47, "elapsed_time": "3:14:23", "remaining_time": "1:10:12", "throughput": 8689.06, "total_tokens": 101349008} +{"current_steps": 150370, "total_steps": 204665, "loss": 0.0, "lr": 3.990015002093027e-07, "epoch": 3.6735641169716367, "percentage": 73.47, "elapsed_time": "3:14:24", "remaining_time": "1:10:11", "throughput": 8689.08, "total_tokens": 101352336} +{"current_steps": 150375, "total_steps": 204665, "loss": 0.0, "lr": 3.989333442604731e-07, "epoch": 3.673686267803484, "percentage": 73.47, "elapsed_time": "3:14:24", "remaining_time": "1:10:11", "throughput": 8689.1, "total_tokens": 101355536} +{"current_steps": 150380, "total_steps": 204665, "loss": 0.0466, "lr": 3.988651926827623e-07, "epoch": 3.673808418635331, "percentage": 73.48, "elapsed_time": "3:14:25", "remaining_time": "1:10:10", "throughput": 8689.1, "total_tokens": 101358416} +{"current_steps": 150385, "total_steps": 204665, "loss": 0.0488, "lr": 3.9879704547666517e-07, "epoch": 3.673930569467178, "percentage": 73.48, "elapsed_time": "3:14:25", "remaining_time": "1:10:10", "throughput": 8689.1, "total_tokens": 101361424} +{"current_steps": 150390, "total_steps": 204665, "loss": 0.0, "lr": 3.987289026426776e-07, "epoch": 3.6740527202990254, "percentage": 73.48, "elapsed_time": "3:14:25", "remaining_time": "1:10:10", "throughput": 8689.1, "total_tokens": 101364432} +{"current_steps": 150395, "total_steps": 204665, "loss": 0.0, "lr": 3.9866076418129545e-07, "epoch": 3.674174871130872, "percentage": 73.48, "elapsed_time": "3:14:26", "remaining_time": "1:10:09", "throughput": 8689.17, "total_tokens": 101368272} +{"current_steps": 150400, "total_steps": 204665, "loss": 0.0297, "lr": 3.985926300930137e-07, "epoch": 3.67429702196272, "percentage": 73.49, "elapsed_time": "3:14:26", "remaining_time": "1:10:09", "throughput": 8689.19, "total_tokens": 101371536} +{"current_steps": 150405, "total_steps": 204665, "loss": 0.0001, "lr": 3.985245003783284e-07, "epoch": 3.6744191727945665, "percentage": 73.49, "elapsed_time": "3:14:26", "remaining_time": "1:10:08", "throughput": 8689.26, "total_tokens": 101375568} +{"current_steps": 150410, "total_steps": 204665, "loss": 0.0001, "lr": 3.9845637503773443e-07, "epoch": 3.674541323626414, "percentage": 73.49, "elapsed_time": "3:14:27", "remaining_time": "1:10:08", "throughput": 8689.28, "total_tokens": 101378768} +{"current_steps": 150415, "total_steps": 204665, "loss": 0.062, "lr": 3.9838825407172784e-07, "epoch": 3.674663474458261, "percentage": 73.49, "elapsed_time": "3:14:27", "remaining_time": "1:10:08", "throughput": 8689.28, "total_tokens": 101381776} +{"current_steps": 150420, "total_steps": 204665, "loss": 0.0001, "lr": 3.983201374808033e-07, "epoch": 3.674785625290108, "percentage": 73.5, "elapsed_time": "3:14:27", "remaining_time": "1:10:07", "throughput": 8689.28, "total_tokens": 101384656} +{"current_steps": 150425, "total_steps": 204665, "loss": 0.0001, "lr": 3.982520252654569e-07, "epoch": 3.6749077761219553, "percentage": 73.5, "elapsed_time": "3:14:28", "remaining_time": "1:10:07", "throughput": 8689.31, "total_tokens": 101388048} +{"current_steps": 150430, "total_steps": 204665, "loss": 0.0001, "lr": 3.981839174261833e-07, "epoch": 3.6750299269538025, "percentage": 73.5, "elapsed_time": "3:14:28", "remaining_time": "1:10:06", "throughput": 8689.32, "total_tokens": 101391184} +{"current_steps": 150435, "total_steps": 204665, "loss": 0.0388, "lr": 3.9811581396347835e-07, "epoch": 3.6751520777856497, "percentage": 73.5, "elapsed_time": "3:14:28", "remaining_time": "1:10:06", "throughput": 8689.35, "total_tokens": 101394512} +{"current_steps": 150440, "total_steps": 204665, "loss": 0.0, "lr": 3.9804771487783727e-07, "epoch": 3.675274228617497, "percentage": 73.51, "elapsed_time": "3:14:29", "remaining_time": "1:10:06", "throughput": 8689.38, "total_tokens": 101397904} +{"current_steps": 150445, "total_steps": 204665, "loss": 0.0, "lr": 3.9797962016975463e-07, "epoch": 3.675396379449344, "percentage": 73.51, "elapsed_time": "3:14:29", "remaining_time": "1:10:05", "throughput": 8689.4, "total_tokens": 101401232} +{"current_steps": 150450, "total_steps": 204665, "loss": 0.0004, "lr": 3.979115298397262e-07, "epoch": 3.6755185302811912, "percentage": 73.51, "elapsed_time": "3:14:29", "remaining_time": "1:10:05", "throughput": 8689.46, "total_tokens": 101404944} +{"current_steps": 150455, "total_steps": 204665, "loss": 0.0917, "lr": 3.978434438882474e-07, "epoch": 3.6756406811130384, "percentage": 73.51, "elapsed_time": "3:14:30", "remaining_time": "1:10:04", "throughput": 8689.48, "total_tokens": 101408208} +{"current_steps": 150460, "total_steps": 204665, "loss": 0.0012, "lr": 3.9777536231581265e-07, "epoch": 3.6757628319448856, "percentage": 73.52, "elapsed_time": "3:14:30", "remaining_time": "1:10:04", "throughput": 8689.5, "total_tokens": 101411536} +{"current_steps": 150465, "total_steps": 204665, "loss": 0.0001, "lr": 3.9770728512291785e-07, "epoch": 3.675884982776733, "percentage": 73.52, "elapsed_time": "3:14:30", "remaining_time": "1:10:04", "throughput": 8689.52, "total_tokens": 101414736} +{"current_steps": 150470, "total_steps": 204665, "loss": 0.0001, "lr": 3.9763921231005726e-07, "epoch": 3.67600713360858, "percentage": 73.52, "elapsed_time": "3:14:31", "remaining_time": "1:10:03", "throughput": 8689.55, "total_tokens": 101418128} +{"current_steps": 150475, "total_steps": 204665, "loss": 0.0, "lr": 3.975711438777267e-07, "epoch": 3.676129284440427, "percentage": 73.52, "elapsed_time": "3:14:31", "remaining_time": "1:10:03", "throughput": 8689.58, "total_tokens": 101421520} +{"current_steps": 150480, "total_steps": 204665, "loss": 0.0003, "lr": 3.975030798264205e-07, "epoch": 3.676251435272274, "percentage": 73.53, "elapsed_time": "3:14:31", "remaining_time": "1:10:02", "throughput": 8689.63, "total_tokens": 101425232} +{"current_steps": 150485, "total_steps": 204665, "loss": 0.0001, "lr": 3.974350201566339e-07, "epoch": 3.6763735861041216, "percentage": 73.53, "elapsed_time": "3:14:32", "remaining_time": "1:10:02", "throughput": 8689.64, "total_tokens": 101428304} +{"current_steps": 150490, "total_steps": 204665, "loss": 0.0, "lr": 3.973669648688622e-07, "epoch": 3.6764957369359683, "percentage": 73.53, "elapsed_time": "3:14:32", "remaining_time": "1:10:02", "throughput": 8689.66, "total_tokens": 101431568} +{"current_steps": 150495, "total_steps": 204665, "loss": 0.0365, "lr": 3.9729891396359984e-07, "epoch": 3.676617887767816, "percentage": 73.53, "elapsed_time": "3:14:33", "remaining_time": "1:10:01", "throughput": 8689.71, "total_tokens": 101435216} +{"current_steps": 150500, "total_steps": 204665, "loss": 0.0, "lr": 3.9723086744134216e-07, "epoch": 3.6767400385996627, "percentage": 73.53, "elapsed_time": "3:14:33", "remaining_time": "1:10:01", "throughput": 8689.73, "total_tokens": 101438480} +{"current_steps": 150505, "total_steps": 204665, "loss": 0.1552, "lr": 3.971628253025834e-07, "epoch": 3.67686218943151, "percentage": 73.54, "elapsed_time": "3:14:33", "remaining_time": "1:10:00", "throughput": 8689.78, "total_tokens": 101442064} +{"current_steps": 150510, "total_steps": 204665, "loss": 0.0, "lr": 3.97094787547819e-07, "epoch": 3.676984340263357, "percentage": 73.54, "elapsed_time": "3:14:34", "remaining_time": "1:10:00", "throughput": 8689.8, "total_tokens": 101445328} +{"current_steps": 150515, "total_steps": 204665, "loss": 0.0, "lr": 3.9702675417754317e-07, "epoch": 3.6771064910952043, "percentage": 73.54, "elapsed_time": "3:14:34", "remaining_time": "1:10:00", "throughput": 8689.81, "total_tokens": 101448464} +{"current_steps": 150520, "total_steps": 204665, "loss": 0.0, "lr": 3.969587251922509e-07, "epoch": 3.6772286419270515, "percentage": 73.54, "elapsed_time": "3:14:34", "remaining_time": "1:09:59", "throughput": 8689.85, "total_tokens": 101451984} +{"current_steps": 150525, "total_steps": 204665, "loss": 0.0, "lr": 3.9689070059243745e-07, "epoch": 3.6773507927588986, "percentage": 73.55, "elapsed_time": "3:14:35", "remaining_time": "1:09:59", "throughput": 8689.9, "total_tokens": 101455568} +{"current_steps": 150530, "total_steps": 204665, "loss": 0.0, "lr": 3.96822680378597e-07, "epoch": 3.677472943590746, "percentage": 73.55, "elapsed_time": "3:14:35", "remaining_time": "1:09:58", "throughput": 8689.91, "total_tokens": 101458768} +{"current_steps": 150535, "total_steps": 204665, "loss": 0.0, "lr": 3.967546645512239e-07, "epoch": 3.677595094422593, "percentage": 73.55, "elapsed_time": "3:14:35", "remaining_time": "1:09:58", "throughput": 8689.96, "total_tokens": 101462352} +{"current_steps": 150540, "total_steps": 204665, "loss": 0.0, "lr": 3.9668665311081337e-07, "epoch": 3.67771724525444, "percentage": 73.55, "elapsed_time": "3:14:36", "remaining_time": "1:09:58", "throughput": 8689.97, "total_tokens": 101465424} +{"current_steps": 150545, "total_steps": 204665, "loss": 0.0, "lr": 3.966186460578596e-07, "epoch": 3.6778393960862874, "percentage": 73.56, "elapsed_time": "3:14:36", "remaining_time": "1:09:57", "throughput": 8689.99, "total_tokens": 101468752} +{"current_steps": 150550, "total_steps": 204665, "loss": 0.0332, "lr": 3.965506433928576e-07, "epoch": 3.6779615469181346, "percentage": 73.56, "elapsed_time": "3:14:36", "remaining_time": "1:09:57", "throughput": 8690.0, "total_tokens": 101471824} +{"current_steps": 150555, "total_steps": 204665, "loss": 0.0, "lr": 3.9648264511630125e-07, "epoch": 3.678083697749982, "percentage": 73.56, "elapsed_time": "3:14:37", "remaining_time": "1:09:56", "throughput": 8690.04, "total_tokens": 101475344} +{"current_steps": 150560, "total_steps": 204665, "loss": 0.0, "lr": 3.964146512286858e-07, "epoch": 3.678205848581829, "percentage": 73.56, "elapsed_time": "3:14:37", "remaining_time": "1:09:56", "throughput": 8690.06, "total_tokens": 101478544} +{"current_steps": 150565, "total_steps": 204665, "loss": 0.0, "lr": 3.96346661730505e-07, "epoch": 3.678327999413676, "percentage": 73.57, "elapsed_time": "3:14:37", "remaining_time": "1:09:56", "throughput": 8690.12, "total_tokens": 101482448} +{"current_steps": 150570, "total_steps": 204665, "loss": 0.0, "lr": 3.9627867662225403e-07, "epoch": 3.6784501502455234, "percentage": 73.57, "elapsed_time": "3:14:38", "remaining_time": "1:09:55", "throughput": 8690.15, "total_tokens": 101485776} +{"current_steps": 150575, "total_steps": 204665, "loss": 0.0854, "lr": 3.962106959044265e-07, "epoch": 3.67857230107737, "percentage": 73.57, "elapsed_time": "3:14:38", "remaining_time": "1:09:55", "throughput": 8690.15, "total_tokens": 101488784} +{"current_steps": 150580, "total_steps": 204665, "loss": 0.0001, "lr": 3.961427195775171e-07, "epoch": 3.6786944519092177, "percentage": 73.57, "elapsed_time": "3:14:38", "remaining_time": "1:09:54", "throughput": 8690.17, "total_tokens": 101491984} +{"current_steps": 150585, "total_steps": 204665, "loss": 0.0001, "lr": 3.9607474764202073e-07, "epoch": 3.6788166027410645, "percentage": 73.58, "elapsed_time": "3:14:39", "remaining_time": "1:09:54", "throughput": 8690.22, "total_tokens": 101495632} +{"current_steps": 150590, "total_steps": 204665, "loss": 0.075, "lr": 3.960067800984309e-07, "epoch": 3.6789387535729117, "percentage": 73.58, "elapsed_time": "3:14:39", "remaining_time": "1:09:54", "throughput": 8690.29, "total_tokens": 101499536} +{"current_steps": 150595, "total_steps": 204665, "loss": 0.0002, "lr": 3.9593881694724253e-07, "epoch": 3.679060904404759, "percentage": 73.58, "elapsed_time": "3:14:39", "remaining_time": "1:09:53", "throughput": 8690.31, "total_tokens": 101502800} +{"current_steps": 150600, "total_steps": 204665, "loss": 0.0, "lr": 3.958708581889493e-07, "epoch": 3.679183055236606, "percentage": 73.58, "elapsed_time": "3:14:40", "remaining_time": "1:09:53", "throughput": 8690.35, "total_tokens": 101506384} +{"current_steps": 150605, "total_steps": 204665, "loss": 0.0001, "lr": 3.9580290382404546e-07, "epoch": 3.6793052060684532, "percentage": 73.59, "elapsed_time": "3:14:40", "remaining_time": "1:09:52", "throughput": 8690.41, "total_tokens": 101510160} +{"current_steps": 150610, "total_steps": 204665, "loss": 0.0004, "lr": 3.957349538530259e-07, "epoch": 3.6794273569003004, "percentage": 73.59, "elapsed_time": "3:14:41", "remaining_time": "1:09:52", "throughput": 8690.46, "total_tokens": 101513872} +{"current_steps": 150615, "total_steps": 204665, "loss": 0.0, "lr": 3.95667008276384e-07, "epoch": 3.6795495077321476, "percentage": 73.59, "elapsed_time": "3:14:41", "remaining_time": "1:09:52", "throughput": 8690.46, "total_tokens": 101516816} +{"current_steps": 150620, "total_steps": 204665, "loss": 0.0, "lr": 3.9559906709461445e-07, "epoch": 3.679671658563995, "percentage": 73.59, "elapsed_time": "3:14:41", "remaining_time": "1:09:51", "throughput": 8690.49, "total_tokens": 101520208} +{"current_steps": 150625, "total_steps": 204665, "loss": 0.0, "lr": 3.9553113030821064e-07, "epoch": 3.679793809395842, "percentage": 73.6, "elapsed_time": "3:14:42", "remaining_time": "1:09:51", "throughput": 8690.54, "total_tokens": 101523792} +{"current_steps": 150630, "total_steps": 204665, "loss": 0.0, "lr": 3.954631979176675e-07, "epoch": 3.679915960227689, "percentage": 73.6, "elapsed_time": "3:14:42", "remaining_time": "1:09:50", "throughput": 8690.55, "total_tokens": 101526992} +{"current_steps": 150635, "total_steps": 204665, "loss": 0.0, "lr": 3.953952699234785e-07, "epoch": 3.6800381110595364, "percentage": 73.6, "elapsed_time": "3:14:42", "remaining_time": "1:09:50", "throughput": 8690.58, "total_tokens": 101530320} +{"current_steps": 150640, "total_steps": 204665, "loss": 0.0001, "lr": 3.953273463261374e-07, "epoch": 3.6801602618913836, "percentage": 73.6, "elapsed_time": "3:14:43", "remaining_time": "1:09:50", "throughput": 8690.61, "total_tokens": 101533712} +{"current_steps": 150645, "total_steps": 204665, "loss": 0.0001, "lr": 3.952594271261388e-07, "epoch": 3.6802824127232308, "percentage": 73.61, "elapsed_time": "3:14:43", "remaining_time": "1:09:49", "throughput": 8690.63, "total_tokens": 101536976} +{"current_steps": 150650, "total_steps": 204665, "loss": 0.0, "lr": 3.95191512323976e-07, "epoch": 3.680404563555078, "percentage": 73.61, "elapsed_time": "3:14:43", "remaining_time": "1:09:49", "throughput": 8690.65, "total_tokens": 101540240} +{"current_steps": 150655, "total_steps": 204665, "loss": 0.0, "lr": 3.951236019201435e-07, "epoch": 3.680526714386925, "percentage": 73.61, "elapsed_time": "3:14:44", "remaining_time": "1:09:48", "throughput": 8690.69, "total_tokens": 101543696} +{"current_steps": 150660, "total_steps": 204665, "loss": 0.0, "lr": 3.9505569591513444e-07, "epoch": 3.680648865218772, "percentage": 73.61, "elapsed_time": "3:14:44", "remaining_time": "1:09:48", "throughput": 8690.7, "total_tokens": 101546768} +{"current_steps": 150665, "total_steps": 204665, "loss": 0.0001, "lr": 3.949877943094435e-07, "epoch": 3.6807710160506195, "percentage": 73.62, "elapsed_time": "3:14:44", "remaining_time": "1:09:47", "throughput": 8690.72, "total_tokens": 101550032} +{"current_steps": 150670, "total_steps": 204665, "loss": 0.0, "lr": 3.949198971035638e-07, "epoch": 3.6808931668824663, "percentage": 73.62, "elapsed_time": "3:14:45", "remaining_time": "1:09:47", "throughput": 8690.72, "total_tokens": 101553040} +{"current_steps": 150675, "total_steps": 204665, "loss": 0.0001, "lr": 3.9485200429798914e-07, "epoch": 3.681015317714314, "percentage": 73.62, "elapsed_time": "3:14:45", "remaining_time": "1:09:47", "throughput": 8690.76, "total_tokens": 101556560} +{"current_steps": 150680, "total_steps": 204665, "loss": 0.0, "lr": 3.947841158932139e-07, "epoch": 3.6811374685461606, "percentage": 73.62, "elapsed_time": "3:14:45", "remaining_time": "1:09:46", "throughput": 8690.83, "total_tokens": 101560400} +{"current_steps": 150685, "total_steps": 204665, "loss": 0.0, "lr": 3.9471623188973115e-07, "epoch": 3.681259619378008, "percentage": 73.63, "elapsed_time": "3:14:46", "remaining_time": "1:09:46", "throughput": 8690.9, "total_tokens": 101564432} +{"current_steps": 150690, "total_steps": 204665, "loss": 0.0006, "lr": 3.9464835228803494e-07, "epoch": 3.681381770209855, "percentage": 73.63, "elapsed_time": "3:14:46", "remaining_time": "1:09:45", "throughput": 8690.89, "total_tokens": 101567248} +{"current_steps": 150695, "total_steps": 204665, "loss": 0.0, "lr": 3.945804770886184e-07, "epoch": 3.681503921041702, "percentage": 73.63, "elapsed_time": "3:14:46", "remaining_time": "1:09:45", "throughput": 8690.91, "total_tokens": 101570512} +{"current_steps": 150700, "total_steps": 204665, "loss": 0.0, "lr": 3.9451260629197557e-07, "epoch": 3.6816260718735494, "percentage": 73.63, "elapsed_time": "3:14:47", "remaining_time": "1:09:45", "throughput": 8690.92, "total_tokens": 101573520} +{"current_steps": 150705, "total_steps": 204665, "loss": 0.0, "lr": 3.9444473989860017e-07, "epoch": 3.6817482227053966, "percentage": 73.63, "elapsed_time": "3:14:47", "remaining_time": "1:09:44", "throughput": 8690.93, "total_tokens": 101576720} +{"current_steps": 150710, "total_steps": 204665, "loss": 0.0, "lr": 3.943768779089852e-07, "epoch": 3.681870373537244, "percentage": 73.64, "elapsed_time": "3:14:48", "remaining_time": "1:09:44", "throughput": 8690.98, "total_tokens": 101580368} +{"current_steps": 150715, "total_steps": 204665, "loss": 0.0002, "lr": 3.943090203236248e-07, "epoch": 3.681992524369091, "percentage": 73.64, "elapsed_time": "3:14:48", "remaining_time": "1:09:43", "throughput": 8691.01, "total_tokens": 101583696} +{"current_steps": 150720, "total_steps": 204665, "loss": 0.0, "lr": 3.942411671430118e-07, "epoch": 3.682114675200938, "percentage": 73.64, "elapsed_time": "3:14:48", "remaining_time": "1:09:43", "throughput": 8691.08, "total_tokens": 101587600} +{"current_steps": 150725, "total_steps": 204665, "loss": 0.0, "lr": 3.941733183676402e-07, "epoch": 3.6822368260327853, "percentage": 73.64, "elapsed_time": "3:14:49", "remaining_time": "1:09:43", "throughput": 8691.11, "total_tokens": 101590992} +{"current_steps": 150730, "total_steps": 204665, "loss": 0.0, "lr": 3.94105473998003e-07, "epoch": 3.6823589768646325, "percentage": 73.65, "elapsed_time": "3:14:49", "remaining_time": "1:09:42", "throughput": 8691.14, "total_tokens": 101594512} +{"current_steps": 150735, "total_steps": 204665, "loss": 0.0001, "lr": 3.94037634034594e-07, "epoch": 3.6824811276964797, "percentage": 73.65, "elapsed_time": "3:14:49", "remaining_time": "1:09:42", "throughput": 8691.2, "total_tokens": 101598352} +{"current_steps": 150740, "total_steps": 204665, "loss": 0.0, "lr": 3.9396979847790603e-07, "epoch": 3.682603278528327, "percentage": 73.65, "elapsed_time": "3:14:50", "remaining_time": "1:09:41", "throughput": 8691.24, "total_tokens": 101601872} +{"current_steps": 150745, "total_steps": 204665, "loss": 0.0, "lr": 3.9390196732843294e-07, "epoch": 3.682725429360174, "percentage": 73.65, "elapsed_time": "3:14:50", "remaining_time": "1:09:41", "throughput": 8691.29, "total_tokens": 101605456} +{"current_steps": 150750, "total_steps": 204665, "loss": 0.0001, "lr": 3.9383414058666784e-07, "epoch": 3.6828475801920213, "percentage": 73.66, "elapsed_time": "3:14:50", "remaining_time": "1:09:41", "throughput": 8691.32, "total_tokens": 101608912} +{"current_steps": 150755, "total_steps": 204665, "loss": 0.0692, "lr": 3.9376631825310345e-07, "epoch": 3.682969731023868, "percentage": 73.66, "elapsed_time": "3:14:51", "remaining_time": "1:09:40", "throughput": 8691.35, "total_tokens": 101612240} +{"current_steps": 150760, "total_steps": 204665, "loss": 0.0, "lr": 3.936985003282336e-07, "epoch": 3.6830918818557157, "percentage": 73.66, "elapsed_time": "3:14:51", "remaining_time": "1:09:40", "throughput": 8691.36, "total_tokens": 101615312} +{"current_steps": 150765, "total_steps": 204665, "loss": 0.0, "lr": 3.936306868125516e-07, "epoch": 3.6832140326875624, "percentage": 73.66, "elapsed_time": "3:14:51", "remaining_time": "1:09:39", "throughput": 8691.4, "total_tokens": 101618896} +{"current_steps": 150770, "total_steps": 204665, "loss": 0.0001, "lr": 3.9356287770654993e-07, "epoch": 3.6833361835194096, "percentage": 73.67, "elapsed_time": "3:14:52", "remaining_time": "1:09:39", "throughput": 8691.42, "total_tokens": 101622096} +{"current_steps": 150775, "total_steps": 204665, "loss": 0.0349, "lr": 3.934950730107226e-07, "epoch": 3.683458334351257, "percentage": 73.67, "elapsed_time": "3:14:52", "remaining_time": "1:09:39", "throughput": 8691.46, "total_tokens": 101625680} +{"current_steps": 150780, "total_steps": 204665, "loss": 0.0235, "lr": 3.9342727272556186e-07, "epoch": 3.683580485183104, "percentage": 73.67, "elapsed_time": "3:14:52", "remaining_time": "1:09:38", "throughput": 8691.47, "total_tokens": 101628816} +{"current_steps": 150785, "total_steps": 204665, "loss": 0.0, "lr": 3.933594768515615e-07, "epoch": 3.683702636014951, "percentage": 73.67, "elapsed_time": "3:14:53", "remaining_time": "1:09:38", "throughput": 8691.5, "total_tokens": 101632144} +{"current_steps": 150790, "total_steps": 204665, "loss": 0.0, "lr": 3.932916853892138e-07, "epoch": 3.6838247868467984, "percentage": 73.68, "elapsed_time": "3:14:53", "remaining_time": "1:09:37", "throughput": 8691.58, "total_tokens": 101636304} +{"current_steps": 150795, "total_steps": 204665, "loss": 0.0, "lr": 3.9322389833901205e-07, "epoch": 3.6839469376786456, "percentage": 73.68, "elapsed_time": "3:14:54", "remaining_time": "1:09:37", "throughput": 8691.64, "total_tokens": 101640208} +{"current_steps": 150800, "total_steps": 204665, "loss": 0.0, "lr": 3.931561157014498e-07, "epoch": 3.6840690885104928, "percentage": 73.68, "elapsed_time": "3:14:54", "remaining_time": "1:09:37", "throughput": 8691.67, "total_tokens": 101643472} +{"current_steps": 150805, "total_steps": 204665, "loss": 0.0512, "lr": 3.930883374770191e-07, "epoch": 3.68419123934234, "percentage": 73.68, "elapsed_time": "3:14:54", "remaining_time": "1:09:36", "throughput": 8691.75, "total_tokens": 101647696} +{"current_steps": 150810, "total_steps": 204665, "loss": 0.0, "lr": 3.9302056366621363e-07, "epoch": 3.684313390174187, "percentage": 73.69, "elapsed_time": "3:14:55", "remaining_time": "1:09:36", "throughput": 8691.78, "total_tokens": 101651088} +{"current_steps": 150815, "total_steps": 204665, "loss": 0.0938, "lr": 3.929527942695254e-07, "epoch": 3.6844355410060343, "percentage": 73.69, "elapsed_time": "3:14:55", "remaining_time": "1:09:35", "throughput": 8691.81, "total_tokens": 101654480} +{"current_steps": 150820, "total_steps": 204665, "loss": 0.0, "lr": 3.9288502928744824e-07, "epoch": 3.6845576918378815, "percentage": 73.69, "elapsed_time": "3:14:55", "remaining_time": "1:09:35", "throughput": 8691.82, "total_tokens": 101657552} +{"current_steps": 150825, "total_steps": 204665, "loss": 0.0001, "lr": 3.9281726872047403e-07, "epoch": 3.6846798426697287, "percentage": 73.69, "elapsed_time": "3:14:56", "remaining_time": "1:09:35", "throughput": 8691.85, "total_tokens": 101660816} +{"current_steps": 150830, "total_steps": 204665, "loss": 0.0728, "lr": 3.927495125690959e-07, "epoch": 3.684801993501576, "percentage": 73.7, "elapsed_time": "3:14:56", "remaining_time": "1:09:34", "throughput": 8691.86, "total_tokens": 101663952} +{"current_steps": 150835, "total_steps": 204665, "loss": 0.0, "lr": 3.926817608338071e-07, "epoch": 3.684924144333423, "percentage": 73.7, "elapsed_time": "3:14:56", "remaining_time": "1:09:34", "throughput": 8691.89, "total_tokens": 101667408} +{"current_steps": 150840, "total_steps": 204665, "loss": 0.0, "lr": 3.926140135150998e-07, "epoch": 3.68504629516527, "percentage": 73.7, "elapsed_time": "3:14:57", "remaining_time": "1:09:33", "throughput": 8691.91, "total_tokens": 101670608} +{"current_steps": 150845, "total_steps": 204665, "loss": 0.0, "lr": 3.9254627061346655e-07, "epoch": 3.6851684459971175, "percentage": 73.7, "elapsed_time": "3:14:57", "remaining_time": "1:09:33", "throughput": 8691.95, "total_tokens": 101674128} +{"current_steps": 150850, "total_steps": 204665, "loss": 0.1039, "lr": 3.9247853212940043e-07, "epoch": 3.685290596828964, "percentage": 73.71, "elapsed_time": "3:14:57", "remaining_time": "1:09:33", "throughput": 8691.99, "total_tokens": 101677648} +{"current_steps": 150855, "total_steps": 204665, "loss": 0.0003, "lr": 3.924107980633935e-07, "epoch": 3.685412747660812, "percentage": 73.71, "elapsed_time": "3:14:58", "remaining_time": "1:09:32", "throughput": 8692.0, "total_tokens": 101680784} +{"current_steps": 150860, "total_steps": 204665, "loss": 0.0, "lr": 3.92343068415939e-07, "epoch": 3.6855348984926586, "percentage": 73.71, "elapsed_time": "3:14:58", "remaining_time": "1:09:32", "throughput": 8692.06, "total_tokens": 101684560} +{"current_steps": 150865, "total_steps": 204665, "loss": 0.0004, "lr": 3.9227534318752887e-07, "epoch": 3.6856570493245058, "percentage": 73.71, "elapsed_time": "3:14:58", "remaining_time": "1:09:31", "throughput": 8692.08, "total_tokens": 101687824} +{"current_steps": 150870, "total_steps": 204665, "loss": 0.0002, "lr": 3.922076223786561e-07, "epoch": 3.685779200156353, "percentage": 73.72, "elapsed_time": "3:14:59", "remaining_time": "1:09:31", "throughput": 8692.13, "total_tokens": 101691536} +{"current_steps": 150875, "total_steps": 204665, "loss": 0.0001, "lr": 3.9213990598981283e-07, "epoch": 3.6859013509882, "percentage": 73.72, "elapsed_time": "3:14:59", "remaining_time": "1:09:31", "throughput": 8692.17, "total_tokens": 101695184} +{"current_steps": 150880, "total_steps": 204665, "loss": 0.0548, "lr": 3.9207219402149183e-07, "epoch": 3.6860235018200473, "percentage": 73.72, "elapsed_time": "3:14:59", "remaining_time": "1:09:30", "throughput": 8692.24, "total_tokens": 101699152} +{"current_steps": 150885, "total_steps": 204665, "loss": 0.0, "lr": 3.920044864741852e-07, "epoch": 3.6861456526518945, "percentage": 73.72, "elapsed_time": "3:15:00", "remaining_time": "1:09:30", "throughput": 8692.25, "total_tokens": 101702288} +{"current_steps": 150890, "total_steps": 204665, "loss": 0.0, "lr": 3.919367833483852e-07, "epoch": 3.6862678034837417, "percentage": 73.73, "elapsed_time": "3:15:00", "remaining_time": "1:09:29", "throughput": 8692.28, "total_tokens": 101705616} +{"current_steps": 150895, "total_steps": 204665, "loss": 0.0, "lr": 3.91869084644585e-07, "epoch": 3.686389954315589, "percentage": 73.73, "elapsed_time": "3:15:01", "remaining_time": "1:09:29", "throughput": 8692.3, "total_tokens": 101708944} +{"current_steps": 150900, "total_steps": 204665, "loss": 0.0001, "lr": 3.91801390363276e-07, "epoch": 3.686512105147436, "percentage": 73.73, "elapsed_time": "3:15:01", "remaining_time": "1:09:29", "throughput": 8692.35, "total_tokens": 101712528} +{"current_steps": 150905, "total_steps": 204665, "loss": 0.0001, "lr": 3.9173370050495123e-07, "epoch": 3.6866342559792833, "percentage": 73.73, "elapsed_time": "3:15:01", "remaining_time": "1:09:28", "throughput": 8692.38, "total_tokens": 101715920} +{"current_steps": 150910, "total_steps": 204665, "loss": 0.0225, "lr": 3.916660150701022e-07, "epoch": 3.6867564068111305, "percentage": 73.74, "elapsed_time": "3:15:02", "remaining_time": "1:09:28", "throughput": 8692.4, "total_tokens": 101719184} +{"current_steps": 150915, "total_steps": 204665, "loss": 0.0, "lr": 3.9159833405922193e-07, "epoch": 3.6868785576429777, "percentage": 73.74, "elapsed_time": "3:15:02", "remaining_time": "1:09:27", "throughput": 8692.42, "total_tokens": 101722448} +{"current_steps": 150920, "total_steps": 204665, "loss": 0.0527, "lr": 3.915306574728019e-07, "epoch": 3.687000708474825, "percentage": 73.74, "elapsed_time": "3:15:02", "remaining_time": "1:09:27", "throughput": 8692.44, "total_tokens": 101725712} +{"current_steps": 150925, "total_steps": 204665, "loss": 0.0, "lr": 3.914629853113345e-07, "epoch": 3.6871228593066716, "percentage": 73.74, "elapsed_time": "3:15:03", "remaining_time": "1:09:27", "throughput": 8692.48, "total_tokens": 101729232} +{"current_steps": 150930, "total_steps": 204665, "loss": 0.0489, "lr": 3.913953175753123e-07, "epoch": 3.6872450101385192, "percentage": 73.74, "elapsed_time": "3:15:03", "remaining_time": "1:09:26", "throughput": 8692.55, "total_tokens": 101733200} +{"current_steps": 150935, "total_steps": 204665, "loss": 0.0, "lr": 3.913276542652267e-07, "epoch": 3.687367160970366, "percentage": 73.75, "elapsed_time": "3:15:03", "remaining_time": "1:09:26", "throughput": 8692.56, "total_tokens": 101736272} +{"current_steps": 150940, "total_steps": 204665, "loss": 0.0, "lr": 3.912599953815705e-07, "epoch": 3.6874893118022136, "percentage": 73.75, "elapsed_time": "3:15:04", "remaining_time": "1:09:25", "throughput": 8692.58, "total_tokens": 101739472} +{"current_steps": 150945, "total_steps": 204665, "loss": 0.0001, "lr": 3.911923409248353e-07, "epoch": 3.6876114626340604, "percentage": 73.75, "elapsed_time": "3:15:04", "remaining_time": "1:09:25", "throughput": 8692.6, "total_tokens": 101742800} +{"current_steps": 150950, "total_steps": 204665, "loss": 0.0001, "lr": 3.911246908955129e-07, "epoch": 3.6877336134659076, "percentage": 73.75, "elapsed_time": "3:15:04", "remaining_time": "1:09:25", "throughput": 8692.62, "total_tokens": 101745936} +{"current_steps": 150955, "total_steps": 204665, "loss": 0.0, "lr": 3.9105704529409575e-07, "epoch": 3.6878557642977547, "percentage": 73.76, "elapsed_time": "3:15:05", "remaining_time": "1:09:24", "throughput": 8692.63, "total_tokens": 101749136} +{"current_steps": 150960, "total_steps": 204665, "loss": 0.0003, "lr": 3.909894041210753e-07, "epoch": 3.687977915129602, "percentage": 73.76, "elapsed_time": "3:15:05", "remaining_time": "1:09:24", "throughput": 8692.68, "total_tokens": 101752784} +{"current_steps": 150965, "total_steps": 204665, "loss": 0.0, "lr": 3.90921767376944e-07, "epoch": 3.688100065961449, "percentage": 73.76, "elapsed_time": "3:15:05", "remaining_time": "1:09:23", "throughput": 8692.69, "total_tokens": 101755792} +{"current_steps": 150970, "total_steps": 204665, "loss": 0.0465, "lr": 3.9085413506219313e-07, "epoch": 3.6882222167932963, "percentage": 73.76, "elapsed_time": "3:15:06", "remaining_time": "1:09:23", "throughput": 8692.7, "total_tokens": 101758992} +{"current_steps": 150975, "total_steps": 204665, "loss": 0.0, "lr": 3.907865071773151e-07, "epoch": 3.6883443676251435, "percentage": 73.77, "elapsed_time": "3:15:06", "remaining_time": "1:09:23", "throughput": 8692.75, "total_tokens": 101762640} +{"current_steps": 150980, "total_steps": 204665, "loss": 0.0002, "lr": 3.9071888372280113e-07, "epoch": 3.6884665184569907, "percentage": 73.77, "elapsed_time": "3:15:06", "remaining_time": "1:09:22", "throughput": 8692.77, "total_tokens": 101765904} +{"current_steps": 150985, "total_steps": 204665, "loss": 0.0468, "lr": 3.906512646991433e-07, "epoch": 3.688588669288838, "percentage": 73.77, "elapsed_time": "3:15:07", "remaining_time": "1:09:22", "throughput": 8692.82, "total_tokens": 101769552} +{"current_steps": 150990, "total_steps": 204665, "loss": 0.0, "lr": 3.9058365010683383e-07, "epoch": 3.688710820120685, "percentage": 73.77, "elapsed_time": "3:15:07", "remaining_time": "1:09:21", "throughput": 8692.85, "total_tokens": 101772880} +{"current_steps": 150995, "total_steps": 204665, "loss": 0.0, "lr": 3.905160399463635e-07, "epoch": 3.6888329709525323, "percentage": 73.78, "elapsed_time": "3:15:08", "remaining_time": "1:09:21", "throughput": 8692.89, "total_tokens": 101776400} +{"current_steps": 151000, "total_steps": 204665, "loss": 0.0, "lr": 3.9044843421822485e-07, "epoch": 3.6889551217843795, "percentage": 73.78, "elapsed_time": "3:15:08", "remaining_time": "1:09:21", "throughput": 8692.91, "total_tokens": 101779664} +{"current_steps": 151005, "total_steps": 204665, "loss": 0.0, "lr": 3.903808329229087e-07, "epoch": 3.6890772726162266, "percentage": 73.78, "elapsed_time": "3:15:08", "remaining_time": "1:09:20", "throughput": 8692.92, "total_tokens": 101782800} +{"current_steps": 151010, "total_steps": 204665, "loss": 0.0786, "lr": 3.9031323606090717e-07, "epoch": 3.689199423448074, "percentage": 73.78, "elapsed_time": "3:15:09", "remaining_time": "1:09:20", "throughput": 8692.94, "total_tokens": 101786128} +{"current_steps": 151015, "total_steps": 204665, "loss": 0.0, "lr": 3.902456436327122e-07, "epoch": 3.689321574279921, "percentage": 73.79, "elapsed_time": "3:15:09", "remaining_time": "1:09:19", "throughput": 8692.96, "total_tokens": 101789392} +{"current_steps": 151020, "total_steps": 204665, "loss": 0.0, "lr": 3.9017805563881446e-07, "epoch": 3.6894437251117678, "percentage": 73.79, "elapsed_time": "3:15:09", "remaining_time": "1:09:19", "throughput": 8692.98, "total_tokens": 101792592} +{"current_steps": 151025, "total_steps": 204665, "loss": 0.0001, "lr": 3.901104720797063e-07, "epoch": 3.6895658759436154, "percentage": 73.79, "elapsed_time": "3:15:10", "remaining_time": "1:09:19", "throughput": 8693.0, "total_tokens": 101795792} +{"current_steps": 151030, "total_steps": 204665, "loss": 0.0001, "lr": 3.9004289295587845e-07, "epoch": 3.689688026775462, "percentage": 73.79, "elapsed_time": "3:15:10", "remaining_time": "1:09:18", "throughput": 8693.04, "total_tokens": 101799312} +{"current_steps": 151035, "total_steps": 204665, "loss": 0.0003, "lr": 3.8997531826782315e-07, "epoch": 3.68981017760731, "percentage": 73.8, "elapsed_time": "3:15:10", "remaining_time": "1:09:18", "throughput": 8693.06, "total_tokens": 101802640} +{"current_steps": 151040, "total_steps": 204665, "loss": 0.0, "lr": 3.89907748016031e-07, "epoch": 3.6899323284391565, "percentage": 73.8, "elapsed_time": "3:15:11", "remaining_time": "1:09:17", "throughput": 8693.11, "total_tokens": 101806288} +{"current_steps": 151045, "total_steps": 204665, "loss": 0.0, "lr": 3.898401822009942e-07, "epoch": 3.6900544792710037, "percentage": 73.8, "elapsed_time": "3:15:11", "remaining_time": "1:09:17", "throughput": 8693.23, "total_tokens": 101810896} +{"current_steps": 151050, "total_steps": 204665, "loss": 0.0, "lr": 3.8977262082320337e-07, "epoch": 3.690176630102851, "percentage": 73.8, "elapsed_time": "3:15:11", "remaining_time": "1:09:17", "throughput": 8693.23, "total_tokens": 101813968} +{"current_steps": 151055, "total_steps": 204665, "loss": 0.0667, "lr": 3.897050638831505e-07, "epoch": 3.690298780934698, "percentage": 73.81, "elapsed_time": "3:15:12", "remaining_time": "1:09:16", "throughput": 8693.26, "total_tokens": 101817360} +{"current_steps": 151060, "total_steps": 204665, "loss": 0.0, "lr": 3.896375113813265e-07, "epoch": 3.6904209317665453, "percentage": 73.81, "elapsed_time": "3:15:12", "remaining_time": "1:09:16", "throughput": 8693.26, "total_tokens": 101820304} +{"current_steps": 151065, "total_steps": 204665, "loss": 0.0, "lr": 3.8956996331822243e-07, "epoch": 3.6905430825983925, "percentage": 73.81, "elapsed_time": "3:15:12", "remaining_time": "1:09:15", "throughput": 8693.31, "total_tokens": 101824016} +{"current_steps": 151070, "total_steps": 204665, "loss": 0.0, "lr": 3.895024196943301e-07, "epoch": 3.6906652334302397, "percentage": 73.81, "elapsed_time": "3:15:13", "remaining_time": "1:09:15", "throughput": 8693.37, "total_tokens": 101827856} +{"current_steps": 151075, "total_steps": 204665, "loss": 0.0, "lr": 3.8943488051013997e-07, "epoch": 3.690787384262087, "percentage": 73.82, "elapsed_time": "3:15:13", "remaining_time": "1:09:15", "throughput": 8693.38, "total_tokens": 101830928} +{"current_steps": 151080, "total_steps": 204665, "loss": 0.0371, "lr": 3.8936734576614374e-07, "epoch": 3.690909535093934, "percentage": 73.82, "elapsed_time": "3:15:13", "remaining_time": "1:09:14", "throughput": 8693.48, "total_tokens": 101835280} +{"current_steps": 151085, "total_steps": 204665, "loss": 0.0, "lr": 3.8929981546283266e-07, "epoch": 3.6910316859257812, "percentage": 73.82, "elapsed_time": "3:15:14", "remaining_time": "1:09:14", "throughput": 8693.49, "total_tokens": 101838352} +{"current_steps": 151090, "total_steps": 204665, "loss": 0.0, "lr": 3.8923228960069723e-07, "epoch": 3.6911538367576284, "percentage": 73.82, "elapsed_time": "3:15:14", "remaining_time": "1:09:13", "throughput": 8693.51, "total_tokens": 101841552} +{"current_steps": 151095, "total_steps": 204665, "loss": 0.0001, "lr": 3.8916476818022914e-07, "epoch": 3.6912759875894756, "percentage": 73.83, "elapsed_time": "3:15:15", "remaining_time": "1:09:13", "throughput": 8693.52, "total_tokens": 101844688} +{"current_steps": 151100, "total_steps": 204665, "loss": 0.0515, "lr": 3.8909725120191893e-07, "epoch": 3.691398138421323, "percentage": 73.83, "elapsed_time": "3:15:15", "remaining_time": "1:09:13", "throughput": 8693.54, "total_tokens": 101847952} +{"current_steps": 151105, "total_steps": 204665, "loss": 0.0, "lr": 3.890297386662578e-07, "epoch": 3.6915202892531696, "percentage": 73.83, "elapsed_time": "3:15:15", "remaining_time": "1:09:12", "throughput": 8693.57, "total_tokens": 101851344} +{"current_steps": 151110, "total_steps": 204665, "loss": 0.0, "lr": 3.88962230573737e-07, "epoch": 3.691642440085017, "percentage": 73.83, "elapsed_time": "3:15:16", "remaining_time": "1:09:12", "throughput": 8693.59, "total_tokens": 101854544} +{"current_steps": 151115, "total_steps": 204665, "loss": 0.0002, "lr": 3.8889472692484703e-07, "epoch": 3.691764590916864, "percentage": 73.84, "elapsed_time": "3:15:16", "remaining_time": "1:09:11", "throughput": 8693.61, "total_tokens": 101857744} +{"current_steps": 151120, "total_steps": 204665, "loss": 0.0001, "lr": 3.8882722772007914e-07, "epoch": 3.6918867417487116, "percentage": 73.84, "elapsed_time": "3:15:16", "remaining_time": "1:09:11", "throughput": 8693.64, "total_tokens": 101861136} +{"current_steps": 151125, "total_steps": 204665, "loss": 0.0, "lr": 3.8875973295992383e-07, "epoch": 3.6920088925805583, "percentage": 73.84, "elapsed_time": "3:15:17", "remaining_time": "1:09:11", "throughput": 8693.66, "total_tokens": 101864400} +{"current_steps": 151130, "total_steps": 204665, "loss": 0.0, "lr": 3.8869224264487244e-07, "epoch": 3.6921310434124055, "percentage": 73.84, "elapsed_time": "3:15:17", "remaining_time": "1:09:10", "throughput": 8693.71, "total_tokens": 101867984} +{"current_steps": 151135, "total_steps": 204665, "loss": 0.0001, "lr": 3.886247567754151e-07, "epoch": 3.6922531942442527, "percentage": 73.85, "elapsed_time": "3:15:17", "remaining_time": "1:09:10", "throughput": 8693.74, "total_tokens": 101871440} +{"current_steps": 151140, "total_steps": 204665, "loss": 0.0001, "lr": 3.88557275352043e-07, "epoch": 3.6923753450761, "percentage": 73.85, "elapsed_time": "3:15:18", "remaining_time": "1:09:09", "throughput": 8693.8, "total_tokens": 101875152} +{"current_steps": 151145, "total_steps": 204665, "loss": 0.0, "lr": 3.884897983752472e-07, "epoch": 3.692497495907947, "percentage": 73.85, "elapsed_time": "3:15:18", "remaining_time": "1:09:09", "throughput": 8693.85, "total_tokens": 101878800} +{"current_steps": 151150, "total_steps": 204665, "loss": 0.0, "lr": 3.88422325845518e-07, "epoch": 3.6926196467397943, "percentage": 73.85, "elapsed_time": "3:15:18", "remaining_time": "1:09:09", "throughput": 8693.86, "total_tokens": 101881872} +{"current_steps": 151155, "total_steps": 204665, "loss": 0.0, "lr": 3.88354857763346e-07, "epoch": 3.6927417975716414, "percentage": 73.85, "elapsed_time": "3:15:19", "remaining_time": "1:09:08", "throughput": 8693.89, "total_tokens": 101885264} +{"current_steps": 151160, "total_steps": 204665, "loss": 0.0001, "lr": 3.882873941292221e-07, "epoch": 3.6928639484034886, "percentage": 73.86, "elapsed_time": "3:15:19", "remaining_time": "1:09:08", "throughput": 8693.89, "total_tokens": 101888208} +{"current_steps": 151165, "total_steps": 204665, "loss": 0.0, "lr": 3.8821993494363657e-07, "epoch": 3.692986099235336, "percentage": 73.86, "elapsed_time": "3:15:19", "remaining_time": "1:09:07", "throughput": 8693.88, "total_tokens": 101891088} +{"current_steps": 151170, "total_steps": 204665, "loss": 0.0, "lr": 3.881524802070806e-07, "epoch": 3.693108250067183, "percentage": 73.86, "elapsed_time": "3:15:20", "remaining_time": "1:09:07", "throughput": 8693.91, "total_tokens": 101894352} +{"current_steps": 151175, "total_steps": 204665, "loss": 0.0001, "lr": 3.880850299200439e-07, "epoch": 3.69323040089903, "percentage": 73.86, "elapsed_time": "3:15:20", "remaining_time": "1:09:07", "throughput": 8693.9, "total_tokens": 101897232} +{"current_steps": 151180, "total_steps": 204665, "loss": 0.0, "lr": 3.880175840830179e-07, "epoch": 3.6933525517308774, "percentage": 73.87, "elapsed_time": "3:15:20", "remaining_time": "1:09:06", "throughput": 8693.92, "total_tokens": 101900368} +{"current_steps": 151185, "total_steps": 204665, "loss": 0.0, "lr": 3.879501426964922e-07, "epoch": 3.6934747025627246, "percentage": 73.87, "elapsed_time": "3:15:21", "remaining_time": "1:09:06", "throughput": 8693.93, "total_tokens": 101903504} +{"current_steps": 151190, "total_steps": 204665, "loss": 0.0004, "lr": 3.8788270576095806e-07, "epoch": 3.6935968533945718, "percentage": 73.87, "elapsed_time": "3:15:21", "remaining_time": "1:09:05", "throughput": 8693.96, "total_tokens": 101906832} +{"current_steps": 151195, "total_steps": 204665, "loss": 0.0, "lr": 3.8781527327690523e-07, "epoch": 3.693719004226419, "percentage": 73.87, "elapsed_time": "3:15:21", "remaining_time": "1:09:05", "throughput": 8693.97, "total_tokens": 101909968} +{"current_steps": 151200, "total_steps": 204665, "loss": 0.0, "lr": 3.8774784524482426e-07, "epoch": 3.6938411550582657, "percentage": 73.88, "elapsed_time": "3:15:22", "remaining_time": "1:09:05", "throughput": 8694.0, "total_tokens": 101913296} +{"current_steps": 151205, "total_steps": 204665, "loss": 0.0001, "lr": 3.876804216652061e-07, "epoch": 3.6939633058901133, "percentage": 73.88, "elapsed_time": "3:15:22", "remaining_time": "1:09:04", "throughput": 8694.02, "total_tokens": 101916688} +{"current_steps": 151210, "total_steps": 204665, "loss": 0.0001, "lr": 3.876130025385402e-07, "epoch": 3.69408545672196, "percentage": 73.88, "elapsed_time": "3:15:22", "remaining_time": "1:09:04", "throughput": 8694.07, "total_tokens": 101920336} +{"current_steps": 151215, "total_steps": 204665, "loss": 0.0, "lr": 3.8754558786531767e-07, "epoch": 3.6942076075538073, "percentage": 73.88, "elapsed_time": "3:15:23", "remaining_time": "1:09:03", "throughput": 8694.11, "total_tokens": 101923728} +{"current_steps": 151220, "total_steps": 204665, "loss": 0.0, "lr": 3.87478177646028e-07, "epoch": 3.6943297583856545, "percentage": 73.89, "elapsed_time": "3:15:23", "remaining_time": "1:09:03", "throughput": 8694.15, "total_tokens": 101927312} +{"current_steps": 151225, "total_steps": 204665, "loss": 0.0001, "lr": 3.87410771881162e-07, "epoch": 3.6944519092175017, "percentage": 73.89, "elapsed_time": "3:15:24", "remaining_time": "1:09:03", "throughput": 8694.21, "total_tokens": 101931216} +{"current_steps": 151230, "total_steps": 204665, "loss": 0.0245, "lr": 3.8734337057120945e-07, "epoch": 3.694574060049349, "percentage": 73.89, "elapsed_time": "3:15:24", "remaining_time": "1:09:02", "throughput": 8694.23, "total_tokens": 101934416} +{"current_steps": 151235, "total_steps": 204665, "loss": 0.0, "lr": 3.8727597371666067e-07, "epoch": 3.694696210881196, "percentage": 73.89, "elapsed_time": "3:15:24", "remaining_time": "1:09:02", "throughput": 8694.23, "total_tokens": 101937424} +{"current_steps": 151240, "total_steps": 204665, "loss": 0.0001, "lr": 3.8720858131800605e-07, "epoch": 3.6948183617130432, "percentage": 73.9, "elapsed_time": "3:15:25", "remaining_time": "1:09:01", "throughput": 8694.26, "total_tokens": 101940816} +{"current_steps": 151245, "total_steps": 204665, "loss": 0.0, "lr": 3.8714119337573513e-07, "epoch": 3.6949405125448904, "percentage": 73.9, "elapsed_time": "3:15:25", "remaining_time": "1:09:01", "throughput": 8694.31, "total_tokens": 101944464} +{"current_steps": 151250, "total_steps": 204665, "loss": 0.0001, "lr": 3.8707380989033866e-07, "epoch": 3.6950626633767376, "percentage": 73.9, "elapsed_time": "3:15:25", "remaining_time": "1:09:01", "throughput": 8694.35, "total_tokens": 101947984} +{"current_steps": 151255, "total_steps": 204665, "loss": 0.0001, "lr": 3.870064308623063e-07, "epoch": 3.695184814208585, "percentage": 73.9, "elapsed_time": "3:15:26", "remaining_time": "1:09:00", "throughput": 8694.37, "total_tokens": 101951184} +{"current_steps": 151260, "total_steps": 204665, "loss": 0.0001, "lr": 3.8693905629212775e-07, "epoch": 3.695306965040432, "percentage": 73.91, "elapsed_time": "3:15:26", "remaining_time": "1:09:00", "throughput": 8694.43, "total_tokens": 101955024} +{"current_steps": 151265, "total_steps": 204665, "loss": 0.0, "lr": 3.8687168618029366e-07, "epoch": 3.695429115872279, "percentage": 73.91, "elapsed_time": "3:15:26", "remaining_time": "1:08:59", "throughput": 8694.48, "total_tokens": 101958736} +{"current_steps": 151270, "total_steps": 204665, "loss": 0.0, "lr": 3.8680432052729304e-07, "epoch": 3.6955512667041264, "percentage": 73.91, "elapsed_time": "3:15:27", "remaining_time": "1:08:59", "throughput": 8694.53, "total_tokens": 101962448} +{"current_steps": 151275, "total_steps": 204665, "loss": 0.0, "lr": 3.867369593336168e-07, "epoch": 3.6956734175359736, "percentage": 73.91, "elapsed_time": "3:15:27", "remaining_time": "1:08:59", "throughput": 8694.62, "total_tokens": 101966736} +{"current_steps": 151280, "total_steps": 204665, "loss": 0.115, "lr": 3.86669602599754e-07, "epoch": 3.6957955683678207, "percentage": 73.92, "elapsed_time": "3:15:27", "remaining_time": "1:08:58", "throughput": 8694.63, "total_tokens": 101969808} +{"current_steps": 151285, "total_steps": 204665, "loss": 0.0538, "lr": 3.866022503261952e-07, "epoch": 3.6959177191996675, "percentage": 73.92, "elapsed_time": "3:15:28", "remaining_time": "1:08:58", "throughput": 8694.65, "total_tokens": 101973008} +{"current_steps": 151290, "total_steps": 204665, "loss": 0.031, "lr": 3.8653490251342945e-07, "epoch": 3.696039870031515, "percentage": 73.92, "elapsed_time": "3:15:28", "remaining_time": "1:08:57", "throughput": 8694.67, "total_tokens": 101976272} +{"current_steps": 151295, "total_steps": 204665, "loss": 0.0, "lr": 3.8646755916194685e-07, "epoch": 3.696162020863362, "percentage": 73.92, "elapsed_time": "3:15:28", "remaining_time": "1:08:57", "throughput": 8694.68, "total_tokens": 101979344} +{"current_steps": 151300, "total_steps": 204665, "loss": 0.0001, "lr": 3.864002202722375e-07, "epoch": 3.6962841716952095, "percentage": 73.93, "elapsed_time": "3:15:29", "remaining_time": "1:08:57", "throughput": 8694.72, "total_tokens": 101982928} +{"current_steps": 151305, "total_steps": 204665, "loss": 0.0, "lr": 3.863328858447905e-07, "epoch": 3.6964063225270563, "percentage": 73.93, "elapsed_time": "3:15:29", "remaining_time": "1:08:56", "throughput": 8694.73, "total_tokens": 101986064} +{"current_steps": 151310, "total_steps": 204665, "loss": 0.0075, "lr": 3.8626555588009614e-07, "epoch": 3.6965284733589034, "percentage": 73.93, "elapsed_time": "3:15:29", "remaining_time": "1:08:56", "throughput": 8694.74, "total_tokens": 101989136} +{"current_steps": 151315, "total_steps": 204665, "loss": 0.0001, "lr": 3.861982303786434e-07, "epoch": 3.6966506241907506, "percentage": 73.93, "elapsed_time": "3:15:30", "remaining_time": "1:08:55", "throughput": 8694.75, "total_tokens": 101992272} +{"current_steps": 151320, "total_steps": 204665, "loss": 0.0, "lr": 3.861309093409222e-07, "epoch": 3.696772775022598, "percentage": 73.94, "elapsed_time": "3:15:30", "remaining_time": "1:08:55", "throughput": 8694.75, "total_tokens": 101995280} +{"current_steps": 151325, "total_steps": 204665, "loss": 0.0, "lr": 3.860635927674225e-07, "epoch": 3.696894925854445, "percentage": 73.94, "elapsed_time": "3:15:31", "remaining_time": "1:08:55", "throughput": 8694.81, "total_tokens": 101998992} +{"current_steps": 151330, "total_steps": 204665, "loss": 0.0, "lr": 3.859962806586331e-07, "epoch": 3.697017076686292, "percentage": 73.94, "elapsed_time": "3:15:31", "remaining_time": "1:08:54", "throughput": 8694.84, "total_tokens": 102002448} +{"current_steps": 151335, "total_steps": 204665, "loss": 0.0287, "lr": 3.8592897301504436e-07, "epoch": 3.6971392275181394, "percentage": 73.94, "elapsed_time": "3:15:31", "remaining_time": "1:08:54", "throughput": 8694.88, "total_tokens": 102006032} +{"current_steps": 151340, "total_steps": 204665, "loss": 0.0, "lr": 3.8586166983714475e-07, "epoch": 3.6972613783499866, "percentage": 73.95, "elapsed_time": "3:15:32", "remaining_time": "1:08:53", "throughput": 8694.92, "total_tokens": 102009488} +{"current_steps": 151345, "total_steps": 204665, "loss": 0.0, "lr": 3.8579437112542476e-07, "epoch": 3.6973835291818338, "percentage": 73.95, "elapsed_time": "3:15:32", "remaining_time": "1:08:53", "throughput": 8694.75, "total_tokens": 102012880} +{"current_steps": 151350, "total_steps": 204665, "loss": 0.0, "lr": 3.857270768803729e-07, "epoch": 3.697505680013681, "percentage": 73.95, "elapsed_time": "3:15:33", "remaining_time": "1:08:53", "throughput": 8694.78, "total_tokens": 102016272} +{"current_steps": 151355, "total_steps": 204665, "loss": 0.0001, "lr": 3.856597871024794e-07, "epoch": 3.697627830845528, "percentage": 73.95, "elapsed_time": "3:15:33", "remaining_time": "1:08:52", "throughput": 8694.81, "total_tokens": 102019536} +{"current_steps": 151360, "total_steps": 204665, "loss": 0.0001, "lr": 3.855925017922327e-07, "epoch": 3.6977499816773753, "percentage": 73.95, "elapsed_time": "3:15:33", "remaining_time": "1:08:52", "throughput": 8694.83, "total_tokens": 102022800} +{"current_steps": 151365, "total_steps": 204665, "loss": 0.0, "lr": 3.8552522095012296e-07, "epoch": 3.6978721325092225, "percentage": 73.96, "elapsed_time": "3:15:34", "remaining_time": "1:08:51", "throughput": 8694.85, "total_tokens": 102026064} +{"current_steps": 151370, "total_steps": 204665, "loss": 0.0001, "lr": 3.8545794457663903e-07, "epoch": 3.6979942833410693, "percentage": 73.96, "elapsed_time": "3:15:34", "remaining_time": "1:08:51", "throughput": 8694.86, "total_tokens": 102029136} +{"current_steps": 151375, "total_steps": 204665, "loss": 0.0, "lr": 3.8539067267227e-07, "epoch": 3.698116434172917, "percentage": 73.96, "elapsed_time": "3:15:34", "remaining_time": "1:08:51", "throughput": 8694.89, "total_tokens": 102032464} +{"current_steps": 151380, "total_steps": 204665, "loss": 0.0, "lr": 3.853234052375055e-07, "epoch": 3.6982385850047637, "percentage": 73.96, "elapsed_time": "3:15:35", "remaining_time": "1:08:50", "throughput": 8694.92, "total_tokens": 102035920} +{"current_steps": 151385, "total_steps": 204665, "loss": 0.0001, "lr": 3.852561422728343e-07, "epoch": 3.6983607358366113, "percentage": 73.97, "elapsed_time": "3:15:35", "remaining_time": "1:08:50", "throughput": 8694.93, "total_tokens": 102038992} +{"current_steps": 151390, "total_steps": 204665, "loss": 0.0, "lr": 3.851888837787457e-07, "epoch": 3.698482886668458, "percentage": 73.97, "elapsed_time": "3:15:35", "remaining_time": "1:08:49", "throughput": 8694.93, "total_tokens": 102042064} +{"current_steps": 151395, "total_steps": 204665, "loss": 0.0, "lr": 3.851216297557294e-07, "epoch": 3.6986050375003052, "percentage": 73.97, "elapsed_time": "3:15:36", "remaining_time": "1:08:49", "throughput": 8695.01, "total_tokens": 102046160} +{"current_steps": 151400, "total_steps": 204665, "loss": 0.0, "lr": 3.850543802042735e-07, "epoch": 3.6987271883321524, "percentage": 73.97, "elapsed_time": "3:15:36", "remaining_time": "1:08:49", "throughput": 8695.05, "total_tokens": 102049680} +{"current_steps": 151405, "total_steps": 204665, "loss": 0.0, "lr": 3.849871351248679e-07, "epoch": 3.6988493391639996, "percentage": 73.98, "elapsed_time": "3:15:36", "remaining_time": "1:08:48", "throughput": 8695.1, "total_tokens": 102053392} +{"current_steps": 151410, "total_steps": 204665, "loss": 0.0, "lr": 3.84919894518001e-07, "epoch": 3.698971489995847, "percentage": 73.98, "elapsed_time": "3:15:37", "remaining_time": "1:08:48", "throughput": 8695.13, "total_tokens": 102056784} +{"current_steps": 151415, "total_steps": 204665, "loss": 0.0008, "lr": 3.84852658384162e-07, "epoch": 3.699093640827694, "percentage": 73.98, "elapsed_time": "3:15:37", "remaining_time": "1:08:47", "throughput": 8695.16, "total_tokens": 102060048} +{"current_steps": 151420, "total_steps": 204665, "loss": 0.0, "lr": 3.847854267238403e-07, "epoch": 3.699215791659541, "percentage": 73.98, "elapsed_time": "3:15:37", "remaining_time": "1:08:47", "throughput": 8695.21, "total_tokens": 102063696} +{"current_steps": 151425, "total_steps": 204665, "loss": 0.0, "lr": 3.8471819953752404e-07, "epoch": 3.6993379424913884, "percentage": 73.99, "elapsed_time": "3:15:38", "remaining_time": "1:08:47", "throughput": 8695.26, "total_tokens": 102067408} +{"current_steps": 151430, "total_steps": 204665, "loss": 0.0557, "lr": 3.84650976825703e-07, "epoch": 3.6994600933232356, "percentage": 73.99, "elapsed_time": "3:15:38", "remaining_time": "1:08:46", "throughput": 8695.28, "total_tokens": 102070672} +{"current_steps": 151435, "total_steps": 204665, "loss": 0.0, "lr": 3.8458375858886513e-07, "epoch": 3.6995822441550827, "percentage": 73.99, "elapsed_time": "3:15:38", "remaining_time": "1:08:46", "throughput": 8695.3, "total_tokens": 102073872} +{"current_steps": 151440, "total_steps": 204665, "loss": 0.0, "lr": 3.8451654482750006e-07, "epoch": 3.69970439498693, "percentage": 73.99, "elapsed_time": "3:15:39", "remaining_time": "1:08:45", "throughput": 8695.33, "total_tokens": 102077200} +{"current_steps": 151445, "total_steps": 204665, "loss": 0.0, "lr": 3.844493355420958e-07, "epoch": 3.699826545818777, "percentage": 74.0, "elapsed_time": "3:15:39", "remaining_time": "1:08:45", "throughput": 8695.34, "total_tokens": 102080400} +{"current_steps": 151450, "total_steps": 204665, "loss": 0.0, "lr": 3.8438213073314164e-07, "epoch": 3.6999486966506243, "percentage": 74.0, "elapsed_time": "3:15:40", "remaining_time": "1:08:45", "throughput": 8695.37, "total_tokens": 102083792} +{"current_steps": 151455, "total_steps": 204665, "loss": 0.0001, "lr": 3.843149304011265e-07, "epoch": 3.7000708474824715, "percentage": 74.0, "elapsed_time": "3:15:40", "remaining_time": "1:08:44", "throughput": 8695.4, "total_tokens": 102087120} +{"current_steps": 151460, "total_steps": 204665, "loss": 0.0, "lr": 3.842477345465388e-07, "epoch": 3.7001929983143187, "percentage": 74.0, "elapsed_time": "3:15:40", "remaining_time": "1:08:44", "throughput": 8695.42, "total_tokens": 102090384} +{"current_steps": 151465, "total_steps": 204665, "loss": 0.0, "lr": 3.841805431698669e-07, "epoch": 3.7003151491461654, "percentage": 74.01, "elapsed_time": "3:15:41", "remaining_time": "1:08:43", "throughput": 8695.46, "total_tokens": 102093904} +{"current_steps": 151470, "total_steps": 204665, "loss": 0.0, "lr": 3.841133562716e-07, "epoch": 3.700437299978013, "percentage": 74.01, "elapsed_time": "3:15:41", "remaining_time": "1:08:43", "throughput": 8695.48, "total_tokens": 102097104} +{"current_steps": 151475, "total_steps": 204665, "loss": 0.0168, "lr": 3.8404617385222615e-07, "epoch": 3.70055945080986, "percentage": 74.01, "elapsed_time": "3:15:41", "remaining_time": "1:08:43", "throughput": 8695.51, "total_tokens": 102100432} +{"current_steps": 151480, "total_steps": 204665, "loss": 0.0, "lr": 3.839789959122345e-07, "epoch": 3.7006816016417075, "percentage": 74.01, "elapsed_time": "3:15:42", "remaining_time": "1:08:42", "throughput": 8695.56, "total_tokens": 102104080} +{"current_steps": 151485, "total_steps": 204665, "loss": 0.0, "lr": 3.8391182245211283e-07, "epoch": 3.700803752473554, "percentage": 74.02, "elapsed_time": "3:15:42", "remaining_time": "1:08:42", "throughput": 8695.57, "total_tokens": 102107216} +{"current_steps": 151490, "total_steps": 204665, "loss": 0.0, "lr": 3.8384465347235064e-07, "epoch": 3.7009259033054014, "percentage": 74.02, "elapsed_time": "3:15:42", "remaining_time": "1:08:41", "throughput": 8695.6, "total_tokens": 102110608} +{"current_steps": 151495, "total_steps": 204665, "loss": 0.0, "lr": 3.837774889734353e-07, "epoch": 3.7010480541372486, "percentage": 74.02, "elapsed_time": "3:15:43", "remaining_time": "1:08:41", "throughput": 8695.66, "total_tokens": 102114320} +{"current_steps": 151500, "total_steps": 204665, "loss": 0.0001, "lr": 3.837103289558563e-07, "epoch": 3.7011702049690958, "percentage": 74.02, "elapsed_time": "3:15:43", "remaining_time": "1:08:41", "throughput": 8695.73, "total_tokens": 102118352} +{"current_steps": 151505, "total_steps": 204665, "loss": 0.0, "lr": 3.836431734201012e-07, "epoch": 3.701292355800943, "percentage": 74.03, "elapsed_time": "3:15:43", "remaining_time": "1:08:40", "throughput": 8695.75, "total_tokens": 102121552} +{"current_steps": 151510, "total_steps": 204665, "loss": 0.0007, "lr": 3.8357602236665867e-07, "epoch": 3.70141450663279, "percentage": 74.03, "elapsed_time": "3:15:44", "remaining_time": "1:08:40", "throughput": 8695.78, "total_tokens": 102124944} +{"current_steps": 151515, "total_steps": 204665, "loss": 0.0, "lr": 3.8350887579601744e-07, "epoch": 3.7015366574646373, "percentage": 74.03, "elapsed_time": "3:15:44", "remaining_time": "1:08:39", "throughput": 8695.82, "total_tokens": 102128464} +{"current_steps": 151520, "total_steps": 204665, "loss": 0.0, "lr": 3.834417337086652e-07, "epoch": 3.7016588082964845, "percentage": 74.03, "elapsed_time": "3:15:44", "remaining_time": "1:08:39", "throughput": 8695.84, "total_tokens": 102131664} +{"current_steps": 151525, "total_steps": 204665, "loss": 0.0, "lr": 3.833745961050908e-07, "epoch": 3.7017809591283317, "percentage": 74.04, "elapsed_time": "3:15:45", "remaining_time": "1:08:39", "throughput": 8695.87, "total_tokens": 102135120} +{"current_steps": 151530, "total_steps": 204665, "loss": 0.0001, "lr": 3.833074629857819e-07, "epoch": 3.701903109960179, "percentage": 74.04, "elapsed_time": "3:15:45", "remaining_time": "1:08:38", "throughput": 8695.92, "total_tokens": 102138768} +{"current_steps": 151535, "total_steps": 204665, "loss": 0.0003, "lr": 3.8324033435122727e-07, "epoch": 3.702025260792026, "percentage": 74.04, "elapsed_time": "3:15:45", "remaining_time": "1:08:38", "throughput": 8695.93, "total_tokens": 102141968} +{"current_steps": 151540, "total_steps": 204665, "loss": 0.0282, "lr": 3.831732102019145e-07, "epoch": 3.7021474116238733, "percentage": 74.04, "elapsed_time": "3:15:46", "remaining_time": "1:08:37", "throughput": 8695.98, "total_tokens": 102145616} +{"current_steps": 151545, "total_steps": 204665, "loss": 0.0002, "lr": 3.83106090538332e-07, "epoch": 3.7022695624557205, "percentage": 74.05, "elapsed_time": "3:15:46", "remaining_time": "1:08:37", "throughput": 8696.01, "total_tokens": 102148944} +{"current_steps": 151550, "total_steps": 204665, "loss": 0.0, "lr": 3.830389753609684e-07, "epoch": 3.702391713287567, "percentage": 74.05, "elapsed_time": "3:15:46", "remaining_time": "1:08:37", "throughput": 8696.04, "total_tokens": 102152336} +{"current_steps": 151555, "total_steps": 204665, "loss": 0.0, "lr": 3.8297186467031083e-07, "epoch": 3.702513864119415, "percentage": 74.05, "elapsed_time": "3:15:47", "remaining_time": "1:08:36", "throughput": 8696.06, "total_tokens": 102155600} +{"current_steps": 151560, "total_steps": 204665, "loss": 0.0, "lr": 3.829047584668483e-07, "epoch": 3.7026360149512616, "percentage": 74.05, "elapsed_time": "3:15:47", "remaining_time": "1:08:36", "throughput": 8696.09, "total_tokens": 102158928} +{"current_steps": 151565, "total_steps": 204665, "loss": 0.0, "lr": 3.8283765675106795e-07, "epoch": 3.7027581657831092, "percentage": 74.06, "elapsed_time": "3:15:48", "remaining_time": "1:08:35", "throughput": 8696.08, "total_tokens": 102161808} +{"current_steps": 151570, "total_steps": 204665, "loss": 0.0, "lr": 3.8277055952345847e-07, "epoch": 3.702880316614956, "percentage": 74.06, "elapsed_time": "3:15:48", "remaining_time": "1:08:35", "throughput": 8696.1, "total_tokens": 102165008} +{"current_steps": 151575, "total_steps": 204665, "loss": 0.0001, "lr": 3.827034667845075e-07, "epoch": 3.703002467446803, "percentage": 74.06, "elapsed_time": "3:15:48", "remaining_time": "1:08:35", "throughput": 8696.15, "total_tokens": 102168592} +{"current_steps": 151580, "total_steps": 204665, "loss": 0.0, "lr": 3.8263637853470266e-07, "epoch": 3.7031246182786504, "percentage": 74.06, "elapsed_time": "3:15:49", "remaining_time": "1:08:34", "throughput": 8696.2, "total_tokens": 102172368} +{"current_steps": 151585, "total_steps": 204665, "loss": 0.0, "lr": 3.825692947745324e-07, "epoch": 3.7032467691104975, "percentage": 74.06, "elapsed_time": "3:15:49", "remaining_time": "1:08:34", "throughput": 8696.23, "total_tokens": 102175632} +{"current_steps": 151590, "total_steps": 204665, "loss": 0.0, "lr": 3.82502215504484e-07, "epoch": 3.7033689199423447, "percentage": 74.07, "elapsed_time": "3:15:49", "remaining_time": "1:08:33", "throughput": 8696.26, "total_tokens": 102179024} +{"current_steps": 151595, "total_steps": 204665, "loss": 0.0, "lr": 3.824351407250459e-07, "epoch": 3.703491070774192, "percentage": 74.07, "elapsed_time": "3:15:50", "remaining_time": "1:08:33", "throughput": 8696.29, "total_tokens": 102182352} +{"current_steps": 151600, "total_steps": 204665, "loss": 0.0, "lr": 3.823680704367053e-07, "epoch": 3.703613221606039, "percentage": 74.07, "elapsed_time": "3:15:50", "remaining_time": "1:08:33", "throughput": 8696.32, "total_tokens": 102185808} +{"current_steps": 151605, "total_steps": 204665, "loss": 0.0, "lr": 3.823010046399501e-07, "epoch": 3.7037353724378863, "percentage": 74.07, "elapsed_time": "3:15:50", "remaining_time": "1:08:32", "throughput": 8696.37, "total_tokens": 102189328} +{"current_steps": 151610, "total_steps": 204665, "loss": 0.0, "lr": 3.8223394333526846e-07, "epoch": 3.7038575232697335, "percentage": 74.08, "elapsed_time": "3:15:51", "remaining_time": "1:08:32", "throughput": 8696.4, "total_tokens": 102192784} +{"current_steps": 151615, "total_steps": 204665, "loss": 0.0, "lr": 3.8216688652314753e-07, "epoch": 3.7039796741015807, "percentage": 74.08, "elapsed_time": "3:15:51", "remaining_time": "1:08:31", "throughput": 8696.44, "total_tokens": 102196240} +{"current_steps": 151620, "total_steps": 204665, "loss": 0.0, "lr": 3.8209983420407543e-07, "epoch": 3.704101824933428, "percentage": 74.08, "elapsed_time": "3:15:51", "remaining_time": "1:08:31", "throughput": 8696.45, "total_tokens": 102199312} +{"current_steps": 151625, "total_steps": 204665, "loss": 0.0001, "lr": 3.820327863785392e-07, "epoch": 3.704223975765275, "percentage": 74.08, "elapsed_time": "3:15:52", "remaining_time": "1:08:31", "throughput": 8696.49, "total_tokens": 102202832} +{"current_steps": 151630, "total_steps": 204665, "loss": 0.1065, "lr": 3.819657430470271e-07, "epoch": 3.7043461265971223, "percentage": 74.09, "elapsed_time": "3:15:52", "remaining_time": "1:08:30", "throughput": 8696.51, "total_tokens": 102206032} +{"current_steps": 151635, "total_steps": 204665, "loss": 0.0, "lr": 3.81898704210026e-07, "epoch": 3.7044682774289694, "percentage": 74.09, "elapsed_time": "3:15:52", "remaining_time": "1:08:30", "throughput": 8696.53, "total_tokens": 102209296} +{"current_steps": 151640, "total_steps": 204665, "loss": 0.0, "lr": 3.8183166986802384e-07, "epoch": 3.7045904282608166, "percentage": 74.09, "elapsed_time": "3:15:53", "remaining_time": "1:08:29", "throughput": 8696.58, "total_tokens": 102213008} +{"current_steps": 151645, "total_steps": 204665, "loss": 0.0, "lr": 3.8176464002150833e-07, "epoch": 3.7047125790926634, "percentage": 74.09, "elapsed_time": "3:15:53", "remaining_time": "1:08:29", "throughput": 8696.62, "total_tokens": 102216528} +{"current_steps": 151650, "total_steps": 204665, "loss": 0.0, "lr": 3.816976146709663e-07, "epoch": 3.704834729924511, "percentage": 74.1, "elapsed_time": "3:15:53", "remaining_time": "1:08:29", "throughput": 8696.65, "total_tokens": 102219920} +{"current_steps": 151655, "total_steps": 204665, "loss": 0.0, "lr": 3.8163059381688587e-07, "epoch": 3.7049568807563578, "percentage": 74.1, "elapsed_time": "3:15:54", "remaining_time": "1:08:28", "throughput": 8696.66, "total_tokens": 102222992} +{"current_steps": 151660, "total_steps": 204665, "loss": 0.113, "lr": 3.8156357745975375e-07, "epoch": 3.705079031588205, "percentage": 74.1, "elapsed_time": "3:15:54", "remaining_time": "1:08:28", "throughput": 8696.71, "total_tokens": 102226640} +{"current_steps": 151665, "total_steps": 204665, "loss": 0.0202, "lr": 3.8149656560005794e-07, "epoch": 3.705201182420052, "percentage": 74.1, "elapsed_time": "3:15:54", "remaining_time": "1:08:27", "throughput": 8696.73, "total_tokens": 102229840} +{"current_steps": 151670, "total_steps": 204665, "loss": 0.0, "lr": 3.8142955823828517e-07, "epoch": 3.7053233332518993, "percentage": 74.11, "elapsed_time": "3:15:55", "remaining_time": "1:08:27", "throughput": 8696.79, "total_tokens": 102233616} +{"current_steps": 151675, "total_steps": 204665, "loss": 0.0, "lr": 3.8136255537492333e-07, "epoch": 3.7054454840837465, "percentage": 74.11, "elapsed_time": "3:15:55", "remaining_time": "1:08:27", "throughput": 8696.82, "total_tokens": 102237008} +{"current_steps": 151680, "total_steps": 204665, "loss": 0.0, "lr": 3.8129555701045936e-07, "epoch": 3.7055676349155937, "percentage": 74.11, "elapsed_time": "3:15:56", "remaining_time": "1:08:26", "throughput": 8696.83, "total_tokens": 102240080} +{"current_steps": 151685, "total_steps": 204665, "loss": 0.0, "lr": 3.812285631453802e-07, "epoch": 3.705689785747441, "percentage": 74.11, "elapsed_time": "3:15:56", "remaining_time": "1:08:26", "throughput": 8696.87, "total_tokens": 102243664} +{"current_steps": 151690, "total_steps": 204665, "loss": 0.0, "lr": 3.8116157378017377e-07, "epoch": 3.705811936579288, "percentage": 74.12, "elapsed_time": "3:15:56", "remaining_time": "1:08:25", "throughput": 8696.9, "total_tokens": 102247184} +{"current_steps": 151695, "total_steps": 204665, "loss": 0.0005, "lr": 3.810945889153264e-07, "epoch": 3.7059340874111353, "percentage": 74.12, "elapsed_time": "3:15:57", "remaining_time": "1:08:25", "throughput": 8696.95, "total_tokens": 102250704} +{"current_steps": 151700, "total_steps": 204665, "loss": 0.0, "lr": 3.8102760855132567e-07, "epoch": 3.7060562382429825, "percentage": 74.12, "elapsed_time": "3:15:57", "remaining_time": "1:08:25", "throughput": 8696.99, "total_tokens": 102254352} +{"current_steps": 151705, "total_steps": 204665, "loss": 0.0696, "lr": 3.8096063268865897e-07, "epoch": 3.7061783890748297, "percentage": 74.12, "elapsed_time": "3:15:57", "remaining_time": "1:08:24", "throughput": 8697.06, "total_tokens": 102258192} +{"current_steps": 151710, "total_steps": 204665, "loss": 0.0, "lr": 3.8089366132781277e-07, "epoch": 3.706300539906677, "percentage": 74.13, "elapsed_time": "3:15:58", "remaining_time": "1:08:24", "throughput": 8697.09, "total_tokens": 102261648} +{"current_steps": 151715, "total_steps": 204665, "loss": 0.0001, "lr": 3.808266944692746e-07, "epoch": 3.706422690738524, "percentage": 74.13, "elapsed_time": "3:15:58", "remaining_time": "1:08:23", "throughput": 8697.14, "total_tokens": 102265296} +{"current_steps": 151720, "total_steps": 204665, "loss": 0.085, "lr": 3.8075973211353107e-07, "epoch": 3.7065448415703712, "percentage": 74.13, "elapsed_time": "3:15:58", "remaining_time": "1:08:23", "throughput": 8697.17, "total_tokens": 102268624} +{"current_steps": 151725, "total_steps": 204665, "loss": 0.0, "lr": 3.8069277426106917e-07, "epoch": 3.7066669924022184, "percentage": 74.13, "elapsed_time": "3:15:59", "remaining_time": "1:08:23", "throughput": 8697.21, "total_tokens": 102272208} +{"current_steps": 151730, "total_steps": 204665, "loss": 0.0, "lr": 3.8062582091237637e-07, "epoch": 3.706789143234065, "percentage": 74.14, "elapsed_time": "3:15:59", "remaining_time": "1:08:22", "throughput": 8697.25, "total_tokens": 102275728} +{"current_steps": 151735, "total_steps": 204665, "loss": 0.044, "lr": 3.805588720679389e-07, "epoch": 3.706911294065913, "percentage": 74.14, "elapsed_time": "3:15:59", "remaining_time": "1:08:22", "throughput": 8697.28, "total_tokens": 102279120} +{"current_steps": 151740, "total_steps": 204665, "loss": 0.0, "lr": 3.8049192772824435e-07, "epoch": 3.7070334448977595, "percentage": 74.14, "elapsed_time": "3:16:00", "remaining_time": "1:08:21", "throughput": 8697.29, "total_tokens": 102282192} +{"current_steps": 151745, "total_steps": 204665, "loss": 0.0, "lr": 3.8042498789377863e-07, "epoch": 3.707155595729607, "percentage": 74.14, "elapsed_time": "3:16:00", "remaining_time": "1:08:21", "throughput": 8697.31, "total_tokens": 102285392} +{"current_steps": 151750, "total_steps": 204665, "loss": 0.0, "lr": 3.803580525650294e-07, "epoch": 3.707277746561454, "percentage": 74.15, "elapsed_time": "3:16:00", "remaining_time": "1:08:21", "throughput": 8697.36, "total_tokens": 102289168} +{"current_steps": 151755, "total_steps": 204665, "loss": 0.0001, "lr": 3.802911217424828e-07, "epoch": 3.707399897393301, "percentage": 74.15, "elapsed_time": "3:16:01", "remaining_time": "1:08:20", "throughput": 8697.38, "total_tokens": 102292432} +{"current_steps": 151760, "total_steps": 204665, "loss": 0.0, "lr": 3.8022419542662585e-07, "epoch": 3.7075220482251483, "percentage": 74.15, "elapsed_time": "3:16:01", "remaining_time": "1:08:20", "throughput": 8697.41, "total_tokens": 102295696} +{"current_steps": 151765, "total_steps": 204665, "loss": 0.0, "lr": 3.8015727361794547e-07, "epoch": 3.7076441990569955, "percentage": 74.15, "elapsed_time": "3:16:01", "remaining_time": "1:08:19", "throughput": 8697.44, "total_tokens": 102299152} +{"current_steps": 151770, "total_steps": 204665, "loss": 0.0, "lr": 3.800903563169283e-07, "epoch": 3.7077663498888427, "percentage": 74.16, "elapsed_time": "3:16:02", "remaining_time": "1:08:19", "throughput": 8697.48, "total_tokens": 102302608} +{"current_steps": 151775, "total_steps": 204665, "loss": 0.0001, "lr": 3.800234435240604e-07, "epoch": 3.70788850072069, "percentage": 74.16, "elapsed_time": "3:16:02", "remaining_time": "1:08:19", "throughput": 8697.49, "total_tokens": 102305680} +{"current_steps": 151780, "total_steps": 204665, "loss": 0.0, "lr": 3.7995653523982896e-07, "epoch": 3.708010651552537, "percentage": 74.16, "elapsed_time": "3:16:03", "remaining_time": "1:08:18", "throughput": 8697.5, "total_tokens": 102308752} +{"current_steps": 151785, "total_steps": 204665, "loss": 0.0002, "lr": 3.7988963146472053e-07, "epoch": 3.7081328023843843, "percentage": 74.16, "elapsed_time": "3:16:03", "remaining_time": "1:08:18", "throughput": 8697.52, "total_tokens": 102312080} +{"current_steps": 151790, "total_steps": 204665, "loss": 0.0, "lr": 3.798227321992211e-07, "epoch": 3.7082549532162314, "percentage": 74.17, "elapsed_time": "3:16:03", "remaining_time": "1:08:17", "throughput": 8697.55, "total_tokens": 102315472} +{"current_steps": 151795, "total_steps": 204665, "loss": 0.0443, "lr": 3.7975583744381757e-07, "epoch": 3.7083771040480786, "percentage": 74.17, "elapsed_time": "3:16:04", "remaining_time": "1:08:17", "throughput": 8697.58, "total_tokens": 102318800} +{"current_steps": 151800, "total_steps": 204665, "loss": 0.0822, "lr": 3.796889471989967e-07, "epoch": 3.708499254879926, "percentage": 74.17, "elapsed_time": "3:16:04", "remaining_time": "1:08:17", "throughput": 8697.58, "total_tokens": 102321808} +{"current_steps": 151805, "total_steps": 204665, "loss": 0.0, "lr": 3.7962206146524435e-07, "epoch": 3.708621405711773, "percentage": 74.17, "elapsed_time": "3:16:04", "remaining_time": "1:08:16", "throughput": 8697.61, "total_tokens": 102325200} +{"current_steps": 151810, "total_steps": 204665, "loss": 0.0, "lr": 3.7955518024304756e-07, "epoch": 3.70874355654362, "percentage": 74.17, "elapsed_time": "3:16:05", "remaining_time": "1:08:16", "throughput": 8697.66, "total_tokens": 102328784} +{"current_steps": 151815, "total_steps": 204665, "loss": 0.0, "lr": 3.794883035328921e-07, "epoch": 3.7088657073754674, "percentage": 74.18, "elapsed_time": "3:16:05", "remaining_time": "1:08:15", "throughput": 8697.68, "total_tokens": 102331984} +{"current_steps": 151820, "total_steps": 204665, "loss": 0.0001, "lr": 3.794214313352646e-07, "epoch": 3.7089878582073146, "percentage": 74.18, "elapsed_time": "3:16:05", "remaining_time": "1:08:15", "throughput": 8697.7, "total_tokens": 102335248} +{"current_steps": 151825, "total_steps": 204665, "loss": 0.0001, "lr": 3.7935456365065164e-07, "epoch": 3.7091100090391613, "percentage": 74.18, "elapsed_time": "3:16:06", "remaining_time": "1:08:14", "throughput": 8697.72, "total_tokens": 102338512} +{"current_steps": 151830, "total_steps": 204665, "loss": 0.0001, "lr": 3.7928770047953883e-07, "epoch": 3.709232159871009, "percentage": 74.18, "elapsed_time": "3:16:06", "remaining_time": "1:08:14", "throughput": 8697.76, "total_tokens": 102342032} +{"current_steps": 151835, "total_steps": 204665, "loss": 0.0, "lr": 3.792208418224133e-07, "epoch": 3.7093543107028557, "percentage": 74.19, "elapsed_time": "3:16:06", "remaining_time": "1:08:14", "throughput": 8697.81, "total_tokens": 102345744} +{"current_steps": 151840, "total_steps": 204665, "loss": 0.0, "lr": 3.7915398767976037e-07, "epoch": 3.709476461534703, "percentage": 74.19, "elapsed_time": "3:16:07", "remaining_time": "1:08:13", "throughput": 8697.82, "total_tokens": 102348752} +{"current_steps": 151845, "total_steps": 204665, "loss": 0.0631, "lr": 3.7908713805206694e-07, "epoch": 3.70959861236655, "percentage": 74.19, "elapsed_time": "3:16:07", "remaining_time": "1:08:13", "throughput": 8697.83, "total_tokens": 102351888} +{"current_steps": 151850, "total_steps": 204665, "loss": 0.0, "lr": 3.7902029293981854e-07, "epoch": 3.7097207631983973, "percentage": 74.19, "elapsed_time": "3:16:07", "remaining_time": "1:08:12", "throughput": 8697.87, "total_tokens": 102355472} +{"current_steps": 151855, "total_steps": 204665, "loss": 0.0001, "lr": 3.7895345234350163e-07, "epoch": 3.7098429140302445, "percentage": 74.2, "elapsed_time": "3:16:08", "remaining_time": "1:08:12", "throughput": 8697.9, "total_tokens": 102358800} +{"current_steps": 151860, "total_steps": 204665, "loss": 0.0, "lr": 3.788866162636025e-07, "epoch": 3.7099650648620917, "percentage": 74.2, "elapsed_time": "3:16:08", "remaining_time": "1:08:12", "throughput": 8697.91, "total_tokens": 102361872} +{"current_steps": 151865, "total_steps": 204665, "loss": 0.0, "lr": 3.788197847006067e-07, "epoch": 3.710087215693939, "percentage": 74.2, "elapsed_time": "3:16:08", "remaining_time": "1:08:11", "throughput": 8697.93, "total_tokens": 102365136} +{"current_steps": 151870, "total_steps": 204665, "loss": 0.0, "lr": 3.787529576550008e-07, "epoch": 3.710209366525786, "percentage": 74.2, "elapsed_time": "3:16:09", "remaining_time": "1:08:11", "throughput": 8697.95, "total_tokens": 102368400} +{"current_steps": 151875, "total_steps": 204665, "loss": 0.0, "lr": 3.786861351272702e-07, "epoch": 3.7103315173576332, "percentage": 74.21, "elapsed_time": "3:16:09", "remaining_time": "1:08:10", "throughput": 8697.98, "total_tokens": 102371728} +{"current_steps": 151880, "total_steps": 204665, "loss": 0.0, "lr": 3.786193171179014e-07, "epoch": 3.7104536681894804, "percentage": 74.21, "elapsed_time": "3:16:09", "remaining_time": "1:08:10", "throughput": 8698.0, "total_tokens": 102374992} +{"current_steps": 151885, "total_steps": 204665, "loss": 0.0004, "lr": 3.7855250362738014e-07, "epoch": 3.7105758190213276, "percentage": 74.21, "elapsed_time": "3:16:10", "remaining_time": "1:08:10", "throughput": 8698.04, "total_tokens": 102378576} +{"current_steps": 151890, "total_steps": 204665, "loss": 0.0, "lr": 3.7848569465619187e-07, "epoch": 3.710697969853175, "percentage": 74.21, "elapsed_time": "3:16:10", "remaining_time": "1:08:09", "throughput": 8698.06, "total_tokens": 102381840} +{"current_steps": 151895, "total_steps": 204665, "loss": 0.0, "lr": 3.7841889020482307e-07, "epoch": 3.710820120685022, "percentage": 74.22, "elapsed_time": "3:16:10", "remaining_time": "1:08:09", "throughput": 8698.09, "total_tokens": 102385232} +{"current_steps": 151900, "total_steps": 204665, "loss": 0.0, "lr": 3.7835209027375894e-07, "epoch": 3.710942271516869, "percentage": 74.22, "elapsed_time": "3:16:11", "remaining_time": "1:08:08", "throughput": 8698.1, "total_tokens": 102388304} +{"current_steps": 151905, "total_steps": 204665, "loss": 0.0384, "lr": 3.7828529486348604e-07, "epoch": 3.7110644223487164, "percentage": 74.22, "elapsed_time": "3:16:11", "remaining_time": "1:08:08", "throughput": 8698.13, "total_tokens": 102391632} +{"current_steps": 151910, "total_steps": 204665, "loss": 0.0, "lr": 3.782185039744893e-07, "epoch": 3.711186573180563, "percentage": 74.22, "elapsed_time": "3:16:12", "remaining_time": "1:08:08", "throughput": 8698.14, "total_tokens": 102394768} +{"current_steps": 151915, "total_steps": 204665, "loss": 0.0, "lr": 3.781517176072548e-07, "epoch": 3.7113087240124107, "percentage": 74.23, "elapsed_time": "3:16:12", "remaining_time": "1:08:07", "throughput": 8698.15, "total_tokens": 102397840} +{"current_steps": 151920, "total_steps": 204665, "loss": 0.0, "lr": 3.7808493576226863e-07, "epoch": 3.7114308748442575, "percentage": 74.23, "elapsed_time": "3:16:12", "remaining_time": "1:08:07", "throughput": 8698.17, "total_tokens": 102401104} +{"current_steps": 151925, "total_steps": 204665, "loss": 0.0293, "lr": 3.780181584400158e-07, "epoch": 3.711553025676105, "percentage": 74.23, "elapsed_time": "3:16:13", "remaining_time": "1:08:06", "throughput": 8698.22, "total_tokens": 102404752} +{"current_steps": 151930, "total_steps": 204665, "loss": 0.0, "lr": 3.779513856409825e-07, "epoch": 3.711675176507952, "percentage": 74.23, "elapsed_time": "3:16:13", "remaining_time": "1:08:06", "throughput": 8698.24, "total_tokens": 102407952} +{"current_steps": 151935, "total_steps": 204665, "loss": 0.0002, "lr": 3.778846173656538e-07, "epoch": 3.711797327339799, "percentage": 74.24, "elapsed_time": "3:16:13", "remaining_time": "1:08:06", "throughput": 8698.25, "total_tokens": 102411088} +{"current_steps": 151940, "total_steps": 204665, "loss": 0.0, "lr": 3.778178536145157e-07, "epoch": 3.7119194781716462, "percentage": 74.24, "elapsed_time": "3:16:14", "remaining_time": "1:08:05", "throughput": 8698.25, "total_tokens": 102414096} +{"current_steps": 151945, "total_steps": 204665, "loss": 0.0009, "lr": 3.777510943880532e-07, "epoch": 3.7120416290034934, "percentage": 74.24, "elapsed_time": "3:16:14", "remaining_time": "1:08:05", "throughput": 8698.26, "total_tokens": 102417168} +{"current_steps": 151950, "total_steps": 204665, "loss": 0.0, "lr": 3.776843396867522e-07, "epoch": 3.7121637798353406, "percentage": 74.24, "elapsed_time": "3:16:14", "remaining_time": "1:08:04", "throughput": 8698.31, "total_tokens": 102420752} +{"current_steps": 151955, "total_steps": 204665, "loss": 0.0, "lr": 3.7761758951109836e-07, "epoch": 3.712285930667188, "percentage": 74.25, "elapsed_time": "3:16:15", "remaining_time": "1:08:04", "throughput": 8698.33, "total_tokens": 102424144} +{"current_steps": 151960, "total_steps": 204665, "loss": 0.0, "lr": 3.7755084386157643e-07, "epoch": 3.712408081499035, "percentage": 74.25, "elapsed_time": "3:16:15", "remaining_time": "1:08:04", "throughput": 8698.35, "total_tokens": 102427280} +{"current_steps": 151965, "total_steps": 204665, "loss": 0.0, "lr": 3.7748410273867247e-07, "epoch": 3.712530232330882, "percentage": 74.25, "elapsed_time": "3:16:15", "remaining_time": "1:08:03", "throughput": 8698.41, "total_tokens": 102431184} +{"current_steps": 151970, "total_steps": 204665, "loss": 0.0, "lr": 3.7741736614287135e-07, "epoch": 3.7126523831627294, "percentage": 74.25, "elapsed_time": "3:16:16", "remaining_time": "1:08:03", "throughput": 8698.43, "total_tokens": 102434448} +{"current_steps": 151975, "total_steps": 204665, "loss": 0.0001, "lr": 3.7735063407465886e-07, "epoch": 3.7127745339945766, "percentage": 74.26, "elapsed_time": "3:16:16", "remaining_time": "1:08:02", "throughput": 8698.45, "total_tokens": 102437648} +{"current_steps": 151980, "total_steps": 204665, "loss": 0.0, "lr": 3.772839065345197e-07, "epoch": 3.7128966848264238, "percentage": 74.26, "elapsed_time": "3:16:16", "remaining_time": "1:08:02", "throughput": 8698.47, "total_tokens": 102440912} +{"current_steps": 151985, "total_steps": 204665, "loss": 0.0004, "lr": 3.7721718352293976e-07, "epoch": 3.713018835658271, "percentage": 74.26, "elapsed_time": "3:16:17", "remaining_time": "1:08:02", "throughput": 8698.49, "total_tokens": 102444048} +{"current_steps": 151990, "total_steps": 204665, "loss": 0.0, "lr": 3.7715046504040406e-07, "epoch": 3.713140986490118, "percentage": 74.26, "elapsed_time": "3:16:17", "remaining_time": "1:08:01", "throughput": 8698.52, "total_tokens": 102447568} +{"current_steps": 151995, "total_steps": 204665, "loss": 0.0, "lr": 3.770837510873972e-07, "epoch": 3.713263137321965, "percentage": 74.27, "elapsed_time": "3:16:17", "remaining_time": "1:08:01", "throughput": 8698.55, "total_tokens": 102450960} +{"current_steps": 152000, "total_steps": 204665, "loss": 0.0, "lr": 3.770170416644054e-07, "epoch": 3.7133852881538125, "percentage": 74.27, "elapsed_time": "3:16:18", "remaining_time": "1:08:00", "throughput": 8698.62, "total_tokens": 102454928} +{"current_steps": 152005, "total_steps": 204665, "loss": 0.0, "lr": 3.7695033677191277e-07, "epoch": 3.7135074389856593, "percentage": 74.27, "elapsed_time": "3:16:18", "remaining_time": "1:08:00", "throughput": 8698.66, "total_tokens": 102458384} +{"current_steps": 152010, "total_steps": 204665, "loss": 0.0359, "lr": 3.7688363641040486e-07, "epoch": 3.713629589817507, "percentage": 74.27, "elapsed_time": "3:16:18", "remaining_time": "1:08:00", "throughput": 8698.68, "total_tokens": 102461648} +{"current_steps": 152015, "total_steps": 204665, "loss": 0.0, "lr": 3.7681694058036715e-07, "epoch": 3.7137517406493536, "percentage": 74.28, "elapsed_time": "3:16:19", "remaining_time": "1:07:59", "throughput": 8698.74, "total_tokens": 102465488} +{"current_steps": 152020, "total_steps": 204665, "loss": 0.0, "lr": 3.7675024928228393e-07, "epoch": 3.713873891481201, "percentage": 74.28, "elapsed_time": "3:16:19", "remaining_time": "1:07:59", "throughput": 8698.78, "total_tokens": 102469008} +{"current_steps": 152025, "total_steps": 204665, "loss": 0.0, "lr": 3.7668356251664077e-07, "epoch": 3.713996042313048, "percentage": 74.28, "elapsed_time": "3:16:20", "remaining_time": "1:07:58", "throughput": 8698.82, "total_tokens": 102472656} +{"current_steps": 152030, "total_steps": 204665, "loss": 0.0, "lr": 3.766168802839221e-07, "epoch": 3.714118193144895, "percentage": 74.28, "elapsed_time": "3:16:20", "remaining_time": "1:07:58", "throughput": 8698.86, "total_tokens": 102476112} +{"current_steps": 152035, "total_steps": 204665, "loss": 0.0001, "lr": 3.765502025846132e-07, "epoch": 3.7142403439767424, "percentage": 74.28, "elapsed_time": "3:16:20", "remaining_time": "1:07:58", "throughput": 8698.89, "total_tokens": 102479440} +{"current_steps": 152040, "total_steps": 204665, "loss": 0.0, "lr": 3.7648352941919924e-07, "epoch": 3.7143624948085896, "percentage": 74.29, "elapsed_time": "3:16:21", "remaining_time": "1:07:57", "throughput": 8698.9, "total_tokens": 102482576} +{"current_steps": 152045, "total_steps": 204665, "loss": 0.0001, "lr": 3.764168607881644e-07, "epoch": 3.714484645640437, "percentage": 74.29, "elapsed_time": "3:16:21", "remaining_time": "1:07:57", "throughput": 8698.9, "total_tokens": 102485584} +{"current_steps": 152050, "total_steps": 204665, "loss": 0.0001, "lr": 3.763501966919942e-07, "epoch": 3.714606796472284, "percentage": 74.29, "elapsed_time": "3:16:21", "remaining_time": "1:07:56", "throughput": 8698.92, "total_tokens": 102488848} +{"current_steps": 152055, "total_steps": 204665, "loss": 0.1307, "lr": 3.762835371311728e-07, "epoch": 3.714728947304131, "percentage": 74.29, "elapsed_time": "3:16:22", "remaining_time": "1:07:56", "throughput": 8698.95, "total_tokens": 102492176} +{"current_steps": 152060, "total_steps": 204665, "loss": 0.0003, "lr": 3.762168821061856e-07, "epoch": 3.7148510981359784, "percentage": 74.3, "elapsed_time": "3:16:22", "remaining_time": "1:07:56", "throughput": 8698.95, "total_tokens": 102495184} +{"current_steps": 152065, "total_steps": 204665, "loss": 0.0, "lr": 3.761502316175167e-07, "epoch": 3.7149732489678255, "percentage": 74.3, "elapsed_time": "3:16:22", "remaining_time": "1:07:55", "throughput": 8699.0, "total_tokens": 102498768} +{"current_steps": 152070, "total_steps": 204665, "loss": 0.0, "lr": 3.760835856656511e-07, "epoch": 3.7150953997996727, "percentage": 74.3, "elapsed_time": "3:16:23", "remaining_time": "1:07:55", "throughput": 8699.0, "total_tokens": 102501712} +{"current_steps": 152075, "total_steps": 204665, "loss": 0.0, "lr": 3.760169442510738e-07, "epoch": 3.71521755063152, "percentage": 74.3, "elapsed_time": "3:16:23", "remaining_time": "1:07:54", "throughput": 8699.04, "total_tokens": 102505360} +{"current_steps": 152080, "total_steps": 204665, "loss": 0.062, "lr": 3.7595030737426916e-07, "epoch": 3.715339701463367, "percentage": 74.31, "elapsed_time": "3:16:23", "remaining_time": "1:07:54", "throughput": 8699.06, "total_tokens": 102508560} +{"current_steps": 152085, "total_steps": 204665, "loss": 0.0, "lr": 3.758836750357213e-07, "epoch": 3.7154618522952143, "percentage": 74.31, "elapsed_time": "3:16:24", "remaining_time": "1:07:54", "throughput": 8699.08, "total_tokens": 102511824} +{"current_steps": 152090, "total_steps": 204665, "loss": 0.0, "lr": 3.758170472359156e-07, "epoch": 3.715584003127061, "percentage": 74.31, "elapsed_time": "3:16:24", "remaining_time": "1:07:53", "throughput": 8699.1, "total_tokens": 102515024} +{"current_steps": 152095, "total_steps": 204665, "loss": 0.0008, "lr": 3.7575042397533627e-07, "epoch": 3.7157061539589087, "percentage": 74.31, "elapsed_time": "3:16:24", "remaining_time": "1:07:53", "throughput": 8699.11, "total_tokens": 102518096} +{"current_steps": 152100, "total_steps": 204665, "loss": 0.0433, "lr": 3.756838052544674e-07, "epoch": 3.7158283047907554, "percentage": 74.32, "elapsed_time": "3:16:25", "remaining_time": "1:07:52", "throughput": 8699.14, "total_tokens": 102521488} +{"current_steps": 152105, "total_steps": 204665, "loss": 0.0, "lr": 3.756171910737938e-07, "epoch": 3.715950455622603, "percentage": 74.32, "elapsed_time": "3:16:25", "remaining_time": "1:07:52", "throughput": 8699.18, "total_tokens": 102525008} +{"current_steps": 152110, "total_steps": 204665, "loss": 0.0, "lr": 3.7555058143380024e-07, "epoch": 3.71607260645445, "percentage": 74.32, "elapsed_time": "3:16:25", "remaining_time": "1:07:52", "throughput": 8699.18, "total_tokens": 102528016} +{"current_steps": 152115, "total_steps": 204665, "loss": 0.0, "lr": 3.754839763349704e-07, "epoch": 3.716194757286297, "percentage": 74.32, "elapsed_time": "3:16:26", "remaining_time": "1:07:51", "throughput": 8699.22, "total_tokens": 102531536} +{"current_steps": 152120, "total_steps": 204665, "loss": 0.0, "lr": 3.7541737577778956e-07, "epoch": 3.716316908118144, "percentage": 74.33, "elapsed_time": "3:16:26", "remaining_time": "1:07:51", "throughput": 8699.27, "total_tokens": 102535120} +{"current_steps": 152125, "total_steps": 204665, "loss": 0.0, "lr": 3.753507797627412e-07, "epoch": 3.7164390589499914, "percentage": 74.33, "elapsed_time": "3:16:26", "remaining_time": "1:07:50", "throughput": 8699.31, "total_tokens": 102538640} +{"current_steps": 152130, "total_steps": 204665, "loss": 0.0, "lr": 3.7528418829030986e-07, "epoch": 3.7165612097818386, "percentage": 74.33, "elapsed_time": "3:16:27", "remaining_time": "1:07:50", "throughput": 8699.33, "total_tokens": 102541904} +{"current_steps": 152135, "total_steps": 204665, "loss": 0.0, "lr": 3.752176013609804e-07, "epoch": 3.7166833606136858, "percentage": 74.33, "elapsed_time": "3:16:27", "remaining_time": "1:07:50", "throughput": 8699.36, "total_tokens": 102545360} +{"current_steps": 152140, "total_steps": 204665, "loss": 0.0, "lr": 3.7515101897523616e-07, "epoch": 3.716805511445533, "percentage": 74.34, "elapsed_time": "3:16:28", "remaining_time": "1:07:49", "throughput": 8699.41, "total_tokens": 102548944} +{"current_steps": 152145, "total_steps": 204665, "loss": 0.0, "lr": 3.750844411335622e-07, "epoch": 3.71692766227738, "percentage": 74.34, "elapsed_time": "3:16:28", "remaining_time": "1:07:49", "throughput": 8699.44, "total_tokens": 102552464} +{"current_steps": 152150, "total_steps": 204665, "loss": 0.0, "lr": 3.7501786783644183e-07, "epoch": 3.7170498131092273, "percentage": 74.34, "elapsed_time": "3:16:28", "remaining_time": "1:07:48", "throughput": 8699.49, "total_tokens": 102556048} +{"current_steps": 152155, "total_steps": 204665, "loss": 0.0, "lr": 3.7495129908436e-07, "epoch": 3.7171719639410745, "percentage": 74.34, "elapsed_time": "3:16:29", "remaining_time": "1:07:48", "throughput": 8699.52, "total_tokens": 102559504} +{"current_steps": 152160, "total_steps": 204665, "loss": 0.0001, "lr": 3.7488473487780004e-07, "epoch": 3.7172941147729217, "percentage": 74.35, "elapsed_time": "3:16:29", "remaining_time": "1:07:48", "throughput": 8699.54, "total_tokens": 102562768} +{"current_steps": 152165, "total_steps": 204665, "loss": 0.0, "lr": 3.7481817521724655e-07, "epoch": 3.717416265604769, "percentage": 74.35, "elapsed_time": "3:16:29", "remaining_time": "1:07:47", "throughput": 8699.56, "total_tokens": 102565904} +{"current_steps": 152170, "total_steps": 204665, "loss": 0.0, "lr": 3.7475162010318374e-07, "epoch": 3.717538416436616, "percentage": 74.35, "elapsed_time": "3:16:30", "remaining_time": "1:07:47", "throughput": 8699.58, "total_tokens": 102569232} +{"current_steps": 152175, "total_steps": 204665, "loss": 0.0, "lr": 3.746850695360949e-07, "epoch": 3.717660567268463, "percentage": 74.35, "elapsed_time": "3:16:30", "remaining_time": "1:07:46", "throughput": 8699.6, "total_tokens": 102572496} +{"current_steps": 152180, "total_steps": 204665, "loss": 0.0, "lr": 3.7461852351646483e-07, "epoch": 3.7177827181003105, "percentage": 74.36, "elapsed_time": "3:16:30", "remaining_time": "1:07:46", "throughput": 8699.64, "total_tokens": 102576016} +{"current_steps": 152185, "total_steps": 204665, "loss": 0.0002, "lr": 3.745519820447768e-07, "epoch": 3.717904868932157, "percentage": 74.36, "elapsed_time": "3:16:31", "remaining_time": "1:07:46", "throughput": 8699.66, "total_tokens": 102579216} +{"current_steps": 152190, "total_steps": 204665, "loss": 0.0234, "lr": 3.7448544512151514e-07, "epoch": 3.718027019764005, "percentage": 74.36, "elapsed_time": "3:16:31", "remaining_time": "1:07:45", "throughput": 8699.68, "total_tokens": 102582480} +{"current_steps": 152195, "total_steps": 204665, "loss": 0.0, "lr": 3.7441891274716375e-07, "epoch": 3.7181491705958516, "percentage": 74.36, "elapsed_time": "3:16:31", "remaining_time": "1:07:45", "throughput": 8699.72, "total_tokens": 102586064} +{"current_steps": 152200, "total_steps": 204665, "loss": 0.0, "lr": 3.743523849222059e-07, "epoch": 3.718271321427699, "percentage": 74.37, "elapsed_time": "3:16:32", "remaining_time": "1:07:44", "throughput": 8699.76, "total_tokens": 102589456} +{"current_steps": 152205, "total_steps": 204665, "loss": 0.0, "lr": 3.7428586164712604e-07, "epoch": 3.718393472259546, "percentage": 74.37, "elapsed_time": "3:16:32", "remaining_time": "1:07:44", "throughput": 8699.8, "total_tokens": 102592976} +{"current_steps": 152210, "total_steps": 204665, "loss": 0.0, "lr": 3.742193429224074e-07, "epoch": 3.718515623091393, "percentage": 74.37, "elapsed_time": "3:16:32", "remaining_time": "1:07:44", "throughput": 8699.81, "total_tokens": 102596112} +{"current_steps": 152215, "total_steps": 204665, "loss": 0.0, "lr": 3.741528287485344e-07, "epoch": 3.7186377739232404, "percentage": 74.37, "elapsed_time": "3:16:33", "remaining_time": "1:07:43", "throughput": 8699.84, "total_tokens": 102599504} +{"current_steps": 152220, "total_steps": 204665, "loss": 0.0, "lr": 3.7408631912599e-07, "epoch": 3.7187599247550875, "percentage": 74.38, "elapsed_time": "3:16:33", "remaining_time": "1:07:43", "throughput": 8699.85, "total_tokens": 102602640} +{"current_steps": 152225, "total_steps": 204665, "loss": 0.0, "lr": 3.740198140552582e-07, "epoch": 3.7188820755869347, "percentage": 74.38, "elapsed_time": "3:16:33", "remaining_time": "1:07:42", "throughput": 8699.92, "total_tokens": 102606544} +{"current_steps": 152230, "total_steps": 204665, "loss": 0.0, "lr": 3.7395331353682305e-07, "epoch": 3.719004226418782, "percentage": 74.38, "elapsed_time": "3:16:34", "remaining_time": "1:07:42", "throughput": 8699.92, "total_tokens": 102609488} +{"current_steps": 152235, "total_steps": 204665, "loss": 0.0, "lr": 3.7388681757116736e-07, "epoch": 3.719126377250629, "percentage": 74.38, "elapsed_time": "3:16:34", "remaining_time": "1:07:42", "throughput": 8699.94, "total_tokens": 102612688} +{"current_steps": 152240, "total_steps": 204665, "loss": 0.0631, "lr": 3.7382032615877554e-07, "epoch": 3.7192485280824763, "percentage": 74.38, "elapsed_time": "3:16:35", "remaining_time": "1:07:41", "throughput": 8700.0, "total_tokens": 102616528} +{"current_steps": 152245, "total_steps": 204665, "loss": 0.0462, "lr": 3.7375383930013037e-07, "epoch": 3.7193706789143235, "percentage": 74.39, "elapsed_time": "3:16:35", "remaining_time": "1:07:41", "throughput": 8700.0, "total_tokens": 102619536} +{"current_steps": 152250, "total_steps": 204665, "loss": 0.0383, "lr": 3.736873569957162e-07, "epoch": 3.7194928297461707, "percentage": 74.39, "elapsed_time": "3:16:35", "remaining_time": "1:07:40", "throughput": 8700.01, "total_tokens": 102622608} +{"current_steps": 152255, "total_steps": 204665, "loss": 0.0, "lr": 3.736208792460156e-07, "epoch": 3.719614980578018, "percentage": 74.39, "elapsed_time": "3:16:36", "remaining_time": "1:07:40", "throughput": 8700.06, "total_tokens": 102626320} +{"current_steps": 152260, "total_steps": 204665, "loss": 0.0414, "lr": 3.7355440605151236e-07, "epoch": 3.719737131409865, "percentage": 74.39, "elapsed_time": "3:16:36", "remaining_time": "1:07:40", "throughput": 8700.11, "total_tokens": 102629968} +{"current_steps": 152265, "total_steps": 204665, "loss": 0.0, "lr": 3.7348793741269036e-07, "epoch": 3.7198592822417123, "percentage": 74.4, "elapsed_time": "3:16:36", "remaining_time": "1:07:39", "throughput": 8700.14, "total_tokens": 102633232} +{"current_steps": 152270, "total_steps": 204665, "loss": 0.0, "lr": 3.7342147333003227e-07, "epoch": 3.719981433073559, "percentage": 74.4, "elapsed_time": "3:16:37", "remaining_time": "1:07:39", "throughput": 8700.15, "total_tokens": 102636432} +{"current_steps": 152275, "total_steps": 204665, "loss": 0.081, "lr": 3.733550138040221e-07, "epoch": 3.7201035839054066, "percentage": 74.4, "elapsed_time": "3:16:37", "remaining_time": "1:07:38", "throughput": 8700.18, "total_tokens": 102639696} +{"current_steps": 152280, "total_steps": 204665, "loss": 0.0, "lr": 3.7328855883514244e-07, "epoch": 3.7202257347372534, "percentage": 74.4, "elapsed_time": "3:16:37", "remaining_time": "1:07:38", "throughput": 8700.19, "total_tokens": 102642896} +{"current_steps": 152285, "total_steps": 204665, "loss": 0.0001, "lr": 3.7322210842387734e-07, "epoch": 3.7203478855691006, "percentage": 74.41, "elapsed_time": "3:16:38", "remaining_time": "1:07:38", "throughput": 8700.2, "total_tokens": 102645840} +{"current_steps": 152290, "total_steps": 204665, "loss": 0.0, "lr": 3.731556625707093e-07, "epoch": 3.7204700364009478, "percentage": 74.41, "elapsed_time": "3:16:38", "remaining_time": "1:07:37", "throughput": 8700.22, "total_tokens": 102649104} +{"current_steps": 152295, "total_steps": 204665, "loss": 0.0, "lr": 3.730892212761222e-07, "epoch": 3.720592187232795, "percentage": 74.41, "elapsed_time": "3:16:38", "remaining_time": "1:07:37", "throughput": 8700.23, "total_tokens": 102652176} +{"current_steps": 152300, "total_steps": 204665, "loss": 0.0003, "lr": 3.730227845405989e-07, "epoch": 3.720714338064642, "percentage": 74.41, "elapsed_time": "3:16:39", "remaining_time": "1:07:36", "throughput": 8700.23, "total_tokens": 102655120} +{"current_steps": 152305, "total_steps": 204665, "loss": 0.0, "lr": 3.729563523646222e-07, "epoch": 3.7208364888964893, "percentage": 74.42, "elapsed_time": "3:16:39", "remaining_time": "1:07:36", "throughput": 8700.26, "total_tokens": 102658448} +{"current_steps": 152310, "total_steps": 204665, "loss": 0.0, "lr": 3.72889924748676e-07, "epoch": 3.7209586397283365, "percentage": 74.42, "elapsed_time": "3:16:39", "remaining_time": "1:07:36", "throughput": 8700.27, "total_tokens": 102661520} +{"current_steps": 152315, "total_steps": 204665, "loss": 0.0, "lr": 3.728235016932425e-07, "epoch": 3.7210807905601837, "percentage": 74.42, "elapsed_time": "3:16:40", "remaining_time": "1:07:35", "throughput": 8700.28, "total_tokens": 102664592} +{"current_steps": 152320, "total_steps": 204665, "loss": 0.0, "lr": 3.7275708319880516e-07, "epoch": 3.721202941392031, "percentage": 74.42, "elapsed_time": "3:16:40", "remaining_time": "1:07:35", "throughput": 8700.31, "total_tokens": 102668048} +{"current_steps": 152325, "total_steps": 204665, "loss": 0.0, "lr": 3.7269066926584746e-07, "epoch": 3.721325092223878, "percentage": 74.43, "elapsed_time": "3:16:40", "remaining_time": "1:07:34", "throughput": 8700.35, "total_tokens": 102671504} +{"current_steps": 152330, "total_steps": 204665, "loss": 0.0, "lr": 3.7262425989485145e-07, "epoch": 3.7214472430557253, "percentage": 74.43, "elapsed_time": "3:16:41", "remaining_time": "1:07:34", "throughput": 8700.4, "total_tokens": 102675152} +{"current_steps": 152335, "total_steps": 204665, "loss": 0.0402, "lr": 3.725578550863011e-07, "epoch": 3.7215693938875725, "percentage": 74.43, "elapsed_time": "3:16:41", "remaining_time": "1:07:34", "throughput": 8700.43, "total_tokens": 102678544} +{"current_steps": 152340, "total_steps": 204665, "loss": 0.0, "lr": 3.7249145484067835e-07, "epoch": 3.7216915447194197, "percentage": 74.43, "elapsed_time": "3:16:41", "remaining_time": "1:07:33", "throughput": 8700.46, "total_tokens": 102681872} +{"current_steps": 152345, "total_steps": 204665, "loss": 0.0, "lr": 3.7242505915846677e-07, "epoch": 3.721813695551267, "percentage": 74.44, "elapsed_time": "3:16:42", "remaining_time": "1:07:33", "throughput": 8700.46, "total_tokens": 102684880} +{"current_steps": 152350, "total_steps": 204665, "loss": 0.0, "lr": 3.723586680401487e-07, "epoch": 3.721935846383114, "percentage": 74.44, "elapsed_time": "3:16:42", "remaining_time": "1:07:32", "throughput": 8700.47, "total_tokens": 102687888} +{"current_steps": 152355, "total_steps": 204665, "loss": 0.0, "lr": 3.7229228148620726e-07, "epoch": 3.722057997214961, "percentage": 74.44, "elapsed_time": "3:16:42", "remaining_time": "1:07:32", "throughput": 8700.51, "total_tokens": 102691536} +{"current_steps": 152360, "total_steps": 204665, "loss": 0.0, "lr": 3.722258994971255e-07, "epoch": 3.7221801480468084, "percentage": 74.44, "elapsed_time": "3:16:43", "remaining_time": "1:07:32", "throughput": 8700.54, "total_tokens": 102694800} +{"current_steps": 152365, "total_steps": 204665, "loss": 0.0, "lr": 3.7215952207338543e-07, "epoch": 3.722302298878655, "percentage": 74.45, "elapsed_time": "3:16:43", "remaining_time": "1:07:31", "throughput": 8700.59, "total_tokens": 102698576} +{"current_steps": 152370, "total_steps": 204665, "loss": 0.0, "lr": 3.7209314921547066e-07, "epoch": 3.722424449710503, "percentage": 74.45, "elapsed_time": "3:16:43", "remaining_time": "1:07:31", "throughput": 8700.61, "total_tokens": 102701840} +{"current_steps": 152375, "total_steps": 204665, "loss": 0.0, "lr": 3.7202678092386296e-07, "epoch": 3.7225466005423495, "percentage": 74.45, "elapsed_time": "3:16:44", "remaining_time": "1:07:30", "throughput": 8700.65, "total_tokens": 102705296} +{"current_steps": 152380, "total_steps": 204665, "loss": 0.0, "lr": 3.7196041719904536e-07, "epoch": 3.7226687513741967, "percentage": 74.45, "elapsed_time": "3:16:44", "remaining_time": "1:07:30", "throughput": 8700.76, "total_tokens": 102709776} +{"current_steps": 152385, "total_steps": 204665, "loss": 0.0001, "lr": 3.71894058041501e-07, "epoch": 3.722790902206044, "percentage": 74.46, "elapsed_time": "3:16:45", "remaining_time": "1:07:30", "throughput": 8700.77, "total_tokens": 102712912} +{"current_steps": 152390, "total_steps": 204665, "loss": 0.0, "lr": 3.7182770345171187e-07, "epoch": 3.722913053037891, "percentage": 74.46, "elapsed_time": "3:16:45", "remaining_time": "1:07:29", "throughput": 8700.8, "total_tokens": 102716240} +{"current_steps": 152395, "total_steps": 204665, "loss": 0.0, "lr": 3.7176135343016036e-07, "epoch": 3.7230352038697383, "percentage": 74.46, "elapsed_time": "3:16:45", "remaining_time": "1:07:29", "throughput": 8700.81, "total_tokens": 102719248} +{"current_steps": 152400, "total_steps": 204665, "loss": 0.0, "lr": 3.7169500797732966e-07, "epoch": 3.7231573547015855, "percentage": 74.46, "elapsed_time": "3:16:46", "remaining_time": "1:07:28", "throughput": 8700.82, "total_tokens": 102722384} +{"current_steps": 152405, "total_steps": 204665, "loss": 0.0435, "lr": 3.716286670937018e-07, "epoch": 3.7232795055334327, "percentage": 74.47, "elapsed_time": "3:16:46", "remaining_time": "1:07:28", "throughput": 8700.82, "total_tokens": 102725328} +{"current_steps": 152410, "total_steps": 204665, "loss": 0.0566, "lr": 3.7156233077975895e-07, "epoch": 3.72340165636528, "percentage": 74.47, "elapsed_time": "3:16:46", "remaining_time": "1:07:28", "throughput": 8700.83, "total_tokens": 102728400} +{"current_steps": 152415, "total_steps": 204665, "loss": 0.0031, "lr": 3.714959990359838e-07, "epoch": 3.723523807197127, "percentage": 74.47, "elapsed_time": "3:16:47", "remaining_time": "1:07:27", "throughput": 8700.85, "total_tokens": 102731728} +{"current_steps": 152420, "total_steps": 204665, "loss": 0.0, "lr": 3.7142967186285924e-07, "epoch": 3.7236459580289742, "percentage": 74.47, "elapsed_time": "3:16:47", "remaining_time": "1:07:27", "throughput": 8700.94, "total_tokens": 102735952} +{"current_steps": 152425, "total_steps": 204665, "loss": 0.0001, "lr": 3.7136334926086676e-07, "epoch": 3.7237681088608214, "percentage": 74.48, "elapsed_time": "3:16:47", "remaining_time": "1:07:26", "throughput": 8700.98, "total_tokens": 102739408} +{"current_steps": 152430, "total_steps": 204665, "loss": 0.0001, "lr": 3.712970312304894e-07, "epoch": 3.7238902596926686, "percentage": 74.48, "elapsed_time": "3:16:48", "remaining_time": "1:07:26", "throughput": 8701.01, "total_tokens": 102742800} +{"current_steps": 152435, "total_steps": 204665, "loss": 0.0012, "lr": 3.7123071777220884e-07, "epoch": 3.724012410524516, "percentage": 74.48, "elapsed_time": "3:16:48", "remaining_time": "1:07:26", "throughput": 8701.03, "total_tokens": 102746000} +{"current_steps": 152440, "total_steps": 204665, "loss": 0.0, "lr": 3.711644088865076e-07, "epoch": 3.7241345613563626, "percentage": 74.48, "elapsed_time": "3:16:48", "remaining_time": "1:07:25", "throughput": 8701.05, "total_tokens": 102749392} +{"current_steps": 152445, "total_steps": 204665, "loss": 0.0, "lr": 3.7109810457386825e-07, "epoch": 3.72425671218821, "percentage": 74.49, "elapsed_time": "3:16:49", "remaining_time": "1:07:25", "throughput": 8701.07, "total_tokens": 102752592} +{"current_steps": 152450, "total_steps": 204665, "loss": 0.0001, "lr": 3.7103180483477234e-07, "epoch": 3.724378863020057, "percentage": 74.49, "elapsed_time": "3:16:49", "remaining_time": "1:07:24", "throughput": 8701.1, "total_tokens": 102755920} +{"current_steps": 152455, "total_steps": 204665, "loss": 0.0, "lr": 3.7096550966970264e-07, "epoch": 3.7245010138519046, "percentage": 74.49, "elapsed_time": "3:16:49", "remaining_time": "1:07:24", "throughput": 8701.14, "total_tokens": 102759504} +{"current_steps": 152460, "total_steps": 204665, "loss": 0.0, "lr": 3.7089921907914056e-07, "epoch": 3.7246231646837513, "percentage": 74.49, "elapsed_time": "3:16:50", "remaining_time": "1:07:24", "throughput": 8701.18, "total_tokens": 102762960} +{"current_steps": 152465, "total_steps": 204665, "loss": 0.0882, "lr": 3.70832933063569e-07, "epoch": 3.7247453155155985, "percentage": 74.49, "elapsed_time": "3:16:50", "remaining_time": "1:07:23", "throughput": 8701.2, "total_tokens": 102766224} +{"current_steps": 152470, "total_steps": 204665, "loss": 0.0, "lr": 3.707666516234692e-07, "epoch": 3.7248674663474457, "percentage": 74.5, "elapsed_time": "3:16:50", "remaining_time": "1:07:23", "throughput": 8701.22, "total_tokens": 102769424} +{"current_steps": 152475, "total_steps": 204665, "loss": 0.0, "lr": 3.7070037475932346e-07, "epoch": 3.724989617179293, "percentage": 74.5, "elapsed_time": "3:16:51", "remaining_time": "1:07:22", "throughput": 8701.23, "total_tokens": 102772496} +{"current_steps": 152480, "total_steps": 204665, "loss": 0.0, "lr": 3.706341024716143e-07, "epoch": 3.72511176801114, "percentage": 74.5, "elapsed_time": "3:16:51", "remaining_time": "1:07:22", "throughput": 8701.28, "total_tokens": 102776080} +{"current_steps": 152485, "total_steps": 204665, "loss": 0.0, "lr": 3.705678347608229e-07, "epoch": 3.7252339188429873, "percentage": 74.5, "elapsed_time": "3:16:51", "remaining_time": "1:07:22", "throughput": 8701.3, "total_tokens": 102779408} +{"current_steps": 152490, "total_steps": 204665, "loss": 0.0, "lr": 3.705015716274318e-07, "epoch": 3.7253560696748345, "percentage": 74.51, "elapsed_time": "3:16:52", "remaining_time": "1:07:21", "throughput": 8701.34, "total_tokens": 102782864} +{"current_steps": 152495, "total_steps": 204665, "loss": 0.0, "lr": 3.704353130719222e-07, "epoch": 3.7254782205066816, "percentage": 74.51, "elapsed_time": "3:16:52", "remaining_time": "1:07:21", "throughput": 8701.36, "total_tokens": 102786128} +{"current_steps": 152500, "total_steps": 204665, "loss": 0.1, "lr": 3.7036905909477666e-07, "epoch": 3.725600371338529, "percentage": 74.51, "elapsed_time": "3:16:52", "remaining_time": "1:07:20", "throughput": 8701.4, "total_tokens": 102789520} +{"current_steps": 152505, "total_steps": 204665, "loss": 0.0, "lr": 3.7030280969647676e-07, "epoch": 3.725722522170376, "percentage": 74.51, "elapsed_time": "3:16:53", "remaining_time": "1:07:20", "throughput": 8701.41, "total_tokens": 102792592} +{"current_steps": 152510, "total_steps": 204665, "loss": 0.0383, "lr": 3.702365648775039e-07, "epoch": 3.725844673002223, "percentage": 74.52, "elapsed_time": "3:16:53", "remaining_time": "1:07:20", "throughput": 8701.44, "total_tokens": 102796048} +{"current_steps": 152515, "total_steps": 204665, "loss": 0.0, "lr": 3.701703246383403e-07, "epoch": 3.7259668238340704, "percentage": 74.52, "elapsed_time": "3:16:54", "remaining_time": "1:07:19", "throughput": 8701.47, "total_tokens": 102799504} +{"current_steps": 152520, "total_steps": 204665, "loss": 0.0728, "lr": 3.701040889794673e-07, "epoch": 3.7260889746659176, "percentage": 74.52, "elapsed_time": "3:16:54", "remaining_time": "1:07:19", "throughput": 8701.47, "total_tokens": 102802384} +{"current_steps": 152525, "total_steps": 204665, "loss": 0.0, "lr": 3.700378579013671e-07, "epoch": 3.726211125497765, "percentage": 74.52, "elapsed_time": "3:16:54", "remaining_time": "1:07:18", "throughput": 8701.51, "total_tokens": 102805840} +{"current_steps": 152530, "total_steps": 204665, "loss": 0.0, "lr": 3.699716314045207e-07, "epoch": 3.726333276329612, "percentage": 74.53, "elapsed_time": "3:16:55", "remaining_time": "1:07:18", "throughput": 8701.52, "total_tokens": 102808976} +{"current_steps": 152535, "total_steps": 204665, "loss": 0.0, "lr": 3.6990540948940995e-07, "epoch": 3.7264554271614587, "percentage": 74.53, "elapsed_time": "3:16:55", "remaining_time": "1:07:18", "throughput": 8701.54, "total_tokens": 102812240} +{"current_steps": 152540, "total_steps": 204665, "loss": 0.0, "lr": 3.6983919215651704e-07, "epoch": 3.7265775779933064, "percentage": 74.53, "elapsed_time": "3:16:55", "remaining_time": "1:07:17", "throughput": 8701.56, "total_tokens": 102815504} +{"current_steps": 152545, "total_steps": 204665, "loss": 0.0001, "lr": 3.6977297940632257e-07, "epoch": 3.726699728825153, "percentage": 74.53, "elapsed_time": "3:16:56", "remaining_time": "1:07:17", "throughput": 8701.57, "total_tokens": 102818512} +{"current_steps": 152550, "total_steps": 204665, "loss": 0.0, "lr": 3.697067712393088e-07, "epoch": 3.7268218796570007, "percentage": 74.54, "elapsed_time": "3:16:56", "remaining_time": "1:07:16", "throughput": 8701.58, "total_tokens": 102821584} +{"current_steps": 152555, "total_steps": 204665, "loss": 0.0, "lr": 3.696405676559567e-07, "epoch": 3.7269440304888475, "percentage": 74.54, "elapsed_time": "3:16:56", "remaining_time": "1:07:16", "throughput": 8701.62, "total_tokens": 102825040} +{"current_steps": 152560, "total_steps": 204665, "loss": 0.0, "lr": 3.6957436865674817e-07, "epoch": 3.7270661813206947, "percentage": 74.54, "elapsed_time": "3:16:57", "remaining_time": "1:07:15", "throughput": 8701.64, "total_tokens": 102828304} +{"current_steps": 152565, "total_steps": 204665, "loss": 0.0, "lr": 3.695081742421642e-07, "epoch": 3.727188332152542, "percentage": 74.54, "elapsed_time": "3:16:57", "remaining_time": "1:07:15", "throughput": 8701.68, "total_tokens": 102831824} +{"current_steps": 152570, "total_steps": 204665, "loss": 0.0, "lr": 3.6944198441268626e-07, "epoch": 3.727310482984389, "percentage": 74.55, "elapsed_time": "3:16:57", "remaining_time": "1:07:15", "throughput": 8701.7, "total_tokens": 102835024} +{"current_steps": 152575, "total_steps": 204665, "loss": 0.0, "lr": 3.6937579916879614e-07, "epoch": 3.7274326338162362, "percentage": 74.55, "elapsed_time": "3:16:58", "remaining_time": "1:07:14", "throughput": 8701.72, "total_tokens": 102838288} +{"current_steps": 152580, "total_steps": 204665, "loss": 0.0, "lr": 3.6930961851097454e-07, "epoch": 3.7275547846480834, "percentage": 74.55, "elapsed_time": "3:16:58", "remaining_time": "1:07:14", "throughput": 8701.72, "total_tokens": 102841232} +{"current_steps": 152585, "total_steps": 204665, "loss": 0.0001, "lr": 3.692434424397033e-07, "epoch": 3.7276769354799306, "percentage": 74.55, "elapsed_time": "3:16:58", "remaining_time": "1:07:13", "throughput": 8701.77, "total_tokens": 102844816} +{"current_steps": 152590, "total_steps": 204665, "loss": 0.0, "lr": 3.6917727095546314e-07, "epoch": 3.727799086311778, "percentage": 74.56, "elapsed_time": "3:16:59", "remaining_time": "1:07:13", "throughput": 8701.78, "total_tokens": 102847888} +{"current_steps": 152595, "total_steps": 204665, "loss": 0.0001, "lr": 3.691111040587358e-07, "epoch": 3.727921237143625, "percentage": 74.56, "elapsed_time": "3:16:59", "remaining_time": "1:07:13", "throughput": 8701.78, "total_tokens": 102850896} +{"current_steps": 152600, "total_steps": 204665, "loss": 0.0, "lr": 3.69044941750002e-07, "epoch": 3.728043387975472, "percentage": 74.56, "elapsed_time": "3:16:59", "remaining_time": "1:07:12", "throughput": 8701.8, "total_tokens": 102854032} +{"current_steps": 152605, "total_steps": 204665, "loss": 0.0, "lr": 3.6897878402974324e-07, "epoch": 3.7281655388073194, "percentage": 74.56, "elapsed_time": "3:17:00", "remaining_time": "1:07:12", "throughput": 8701.82, "total_tokens": 102857296} +{"current_steps": 152610, "total_steps": 204665, "loss": 0.0, "lr": 3.689126308984406e-07, "epoch": 3.7282876896391666, "percentage": 74.57, "elapsed_time": "3:17:00", "remaining_time": "1:07:11", "throughput": 8701.85, "total_tokens": 102860624} +{"current_steps": 152615, "total_steps": 204665, "loss": 0.0335, "lr": 3.688464823565747e-07, "epoch": 3.7284098404710138, "percentage": 74.57, "elapsed_time": "3:17:00", "remaining_time": "1:07:11", "throughput": 8701.91, "total_tokens": 102864528} +{"current_steps": 152620, "total_steps": 204665, "loss": 0.0001, "lr": 3.687803384046273e-07, "epoch": 3.7285319913028605, "percentage": 74.57, "elapsed_time": "3:17:01", "remaining_time": "1:07:11", "throughput": 8701.93, "total_tokens": 102867728} +{"current_steps": 152625, "total_steps": 204665, "loss": 0.0, "lr": 3.687141990430787e-07, "epoch": 3.728654142134708, "percentage": 74.57, "elapsed_time": "3:17:01", "remaining_time": "1:07:10", "throughput": 8701.97, "total_tokens": 102871248} +{"current_steps": 152630, "total_steps": 204665, "loss": 0.0, "lr": 3.686480642724102e-07, "epoch": 3.728776292966555, "percentage": 74.58, "elapsed_time": "3:17:01", "remaining_time": "1:07:10", "throughput": 8701.99, "total_tokens": 102874448} +{"current_steps": 152635, "total_steps": 204665, "loss": 0.0, "lr": 3.685819340931031e-07, "epoch": 3.7288984437984025, "percentage": 74.58, "elapsed_time": "3:17:02", "remaining_time": "1:07:09", "throughput": 8702.0, "total_tokens": 102877584} +{"current_steps": 152640, "total_steps": 204665, "loss": 0.0, "lr": 3.685158085056378e-07, "epoch": 3.7290205946302493, "percentage": 74.58, "elapsed_time": "3:17:02", "remaining_time": "1:07:09", "throughput": 8702.03, "total_tokens": 102880976} +{"current_steps": 152645, "total_steps": 204665, "loss": 0.0388, "lr": 3.6844968751049566e-07, "epoch": 3.7291427454620965, "percentage": 74.58, "elapsed_time": "3:17:02", "remaining_time": "1:07:09", "throughput": 8702.06, "total_tokens": 102884432} +{"current_steps": 152650, "total_steps": 204665, "loss": 0.0063, "lr": 3.683835711081569e-07, "epoch": 3.7292648962939436, "percentage": 74.59, "elapsed_time": "3:17:03", "remaining_time": "1:07:08", "throughput": 8702.09, "total_tokens": 102887760} +{"current_steps": 152655, "total_steps": 204665, "loss": 0.0, "lr": 3.6831745929910306e-07, "epoch": 3.729387047125791, "percentage": 74.59, "elapsed_time": "3:17:03", "remaining_time": "1:07:08", "throughput": 8702.1, "total_tokens": 102890896} +{"current_steps": 152660, "total_steps": 204665, "loss": 0.0893, "lr": 3.682513520838142e-07, "epoch": 3.729509197957638, "percentage": 74.59, "elapsed_time": "3:17:04", "remaining_time": "1:07:07", "throughput": 8702.13, "total_tokens": 102894288} +{"current_steps": 152665, "total_steps": 204665, "loss": 0.0728, "lr": 3.681852494627714e-07, "epoch": 3.729631348789485, "percentage": 74.59, "elapsed_time": "3:17:04", "remaining_time": "1:07:07", "throughput": 8702.15, "total_tokens": 102897552} +{"current_steps": 152670, "total_steps": 204665, "loss": 0.0, "lr": 3.681191514364558e-07, "epoch": 3.7297534996213324, "percentage": 74.6, "elapsed_time": "3:17:04", "remaining_time": "1:07:07", "throughput": 8702.19, "total_tokens": 102901072} +{"current_steps": 152675, "total_steps": 204665, "loss": 0.0, "lr": 3.6805305800534726e-07, "epoch": 3.7298756504531796, "percentage": 74.6, "elapsed_time": "3:17:05", "remaining_time": "1:07:06", "throughput": 8702.29, "total_tokens": 102905488} +{"current_steps": 152680, "total_steps": 204665, "loss": 0.0, "lr": 3.679869691699273e-07, "epoch": 3.729997801285027, "percentage": 74.6, "elapsed_time": "3:17:05", "remaining_time": "1:07:06", "throughput": 8702.32, "total_tokens": 102908752} +{"current_steps": 152685, "total_steps": 204665, "loss": 0.0, "lr": 3.6792088493067576e-07, "epoch": 3.730119952116874, "percentage": 74.6, "elapsed_time": "3:17:05", "remaining_time": "1:07:05", "throughput": 8702.32, "total_tokens": 102911760} +{"current_steps": 152690, "total_steps": 204665, "loss": 0.0, "lr": 3.6785480528807343e-07, "epoch": 3.730242102948721, "percentage": 74.6, "elapsed_time": "3:17:06", "remaining_time": "1:07:05", "throughput": 8702.33, "total_tokens": 102914832} +{"current_steps": 152695, "total_steps": 204665, "loss": 0.0598, "lr": 3.677887302426014e-07, "epoch": 3.7303642537805684, "percentage": 74.61, "elapsed_time": "3:17:06", "remaining_time": "1:07:05", "throughput": 8702.35, "total_tokens": 102918096} +{"current_steps": 152700, "total_steps": 204665, "loss": 0.0002, "lr": 3.677226597947398e-07, "epoch": 3.7304864046124155, "percentage": 74.61, "elapsed_time": "3:17:06", "remaining_time": "1:07:04", "throughput": 8702.38, "total_tokens": 102921552} +{"current_steps": 152705, "total_steps": 204665, "loss": 0.0, "lr": 3.676565939449687e-07, "epoch": 3.7306085554442627, "percentage": 74.61, "elapsed_time": "3:17:07", "remaining_time": "1:07:04", "throughput": 8702.42, "total_tokens": 102925072} +{"current_steps": 152710, "total_steps": 204665, "loss": 0.0001, "lr": 3.6759053269376927e-07, "epoch": 3.73073070627611, "percentage": 74.61, "elapsed_time": "3:17:07", "remaining_time": "1:07:03", "throughput": 8702.48, "total_tokens": 102928848} +{"current_steps": 152715, "total_steps": 204665, "loss": 0.0001, "lr": 3.6752447604162165e-07, "epoch": 3.7308528571079567, "percentage": 74.62, "elapsed_time": "3:17:07", "remaining_time": "1:07:03", "throughput": 8702.52, "total_tokens": 102932432} +{"current_steps": 152720, "total_steps": 204665, "loss": 0.0, "lr": 3.674584239890057e-07, "epoch": 3.7309750079398043, "percentage": 74.62, "elapsed_time": "3:17:08", "remaining_time": "1:07:03", "throughput": 8702.54, "total_tokens": 102935568} +{"current_steps": 152725, "total_steps": 204665, "loss": 0.0002, "lr": 3.673923765364022e-07, "epoch": 3.731097158771651, "percentage": 74.62, "elapsed_time": "3:17:08", "remaining_time": "1:07:02", "throughput": 8702.54, "total_tokens": 102938576} +{"current_steps": 152730, "total_steps": 204665, "loss": 0.0001, "lr": 3.67326333684292e-07, "epoch": 3.7312193096034982, "percentage": 74.62, "elapsed_time": "3:17:08", "remaining_time": "1:07:02", "throughput": 8702.58, "total_tokens": 102942160} +{"current_steps": 152735, "total_steps": 204665, "loss": 0.0, "lr": 3.672602954331544e-07, "epoch": 3.7313414604353454, "percentage": 74.63, "elapsed_time": "3:17:09", "remaining_time": "1:07:01", "throughput": 8702.6, "total_tokens": 102945296} +{"current_steps": 152740, "total_steps": 204665, "loss": 0.0001, "lr": 3.671942617834705e-07, "epoch": 3.7314636112671926, "percentage": 74.63, "elapsed_time": "3:17:09", "remaining_time": "1:07:01", "throughput": 8702.63, "total_tokens": 102948816} +{"current_steps": 152745, "total_steps": 204665, "loss": 0.0, "lr": 3.671282327357198e-07, "epoch": 3.73158576209904, "percentage": 74.63, "elapsed_time": "3:17:09", "remaining_time": "1:07:01", "throughput": 8702.67, "total_tokens": 102952336} +{"current_steps": 152750, "total_steps": 204665, "loss": 0.0, "lr": 3.670622082903828e-07, "epoch": 3.731707912930887, "percentage": 74.63, "elapsed_time": "3:17:10", "remaining_time": "1:07:00", "throughput": 8702.69, "total_tokens": 102955536} +{"current_steps": 152755, "total_steps": 204665, "loss": 0.0, "lr": 3.6699618844794e-07, "epoch": 3.731830063762734, "percentage": 74.64, "elapsed_time": "3:17:10", "remaining_time": "1:07:00", "throughput": 8702.77, "total_tokens": 102959632} +{"current_steps": 152760, "total_steps": 204665, "loss": 0.0, "lr": 3.6693017320887076e-07, "epoch": 3.7319522145945814, "percentage": 74.64, "elapsed_time": "3:17:11", "remaining_time": "1:06:59", "throughput": 8702.84, "total_tokens": 102963536} +{"current_steps": 152765, "total_steps": 204665, "loss": 0.0001, "lr": 3.6686416257365603e-07, "epoch": 3.7320743654264286, "percentage": 74.64, "elapsed_time": "3:17:11", "remaining_time": "1:06:59", "throughput": 8702.87, "total_tokens": 102966928} +{"current_steps": 152770, "total_steps": 204665, "loss": 0.0, "lr": 3.66798156542775e-07, "epoch": 3.7321965162582758, "percentage": 74.64, "elapsed_time": "3:17:11", "remaining_time": "1:06:59", "throughput": 8702.93, "total_tokens": 102970704} +{"current_steps": 152775, "total_steps": 204665, "loss": 0.0, "lr": 3.667321551167085e-07, "epoch": 3.732318667090123, "percentage": 74.65, "elapsed_time": "3:17:12", "remaining_time": "1:06:58", "throughput": 8702.93, "total_tokens": 102973648} +{"current_steps": 152780, "total_steps": 204665, "loss": 0.0, "lr": 3.666661582959357e-07, "epoch": 3.73244081792197, "percentage": 74.65, "elapsed_time": "3:17:12", "remaining_time": "1:06:58", "throughput": 8702.97, "total_tokens": 102977104} +{"current_steps": 152785, "total_steps": 204665, "loss": 0.0001, "lr": 3.666001660809369e-07, "epoch": 3.7325629687538173, "percentage": 74.65, "elapsed_time": "3:17:12", "remaining_time": "1:06:57", "throughput": 8702.99, "total_tokens": 102980368} +{"current_steps": 152790, "total_steps": 204665, "loss": 0.0, "lr": 3.665341784721925e-07, "epoch": 3.7326851195856645, "percentage": 74.65, "elapsed_time": "3:17:13", "remaining_time": "1:06:57", "throughput": 8703.01, "total_tokens": 102983632} +{"current_steps": 152795, "total_steps": 204665, "loss": 0.0714, "lr": 3.6646819547018147e-07, "epoch": 3.7328072704175117, "percentage": 74.66, "elapsed_time": "3:17:13", "remaining_time": "1:06:57", "throughput": 8703.03, "total_tokens": 102986896} +{"current_steps": 152800, "total_steps": 204665, "loss": 0.0, "lr": 3.6640221707538455e-07, "epoch": 3.7329294212493584, "percentage": 74.66, "elapsed_time": "3:17:13", "remaining_time": "1:06:56", "throughput": 8703.06, "total_tokens": 102990224} +{"current_steps": 152805, "total_steps": 204665, "loss": 0.0, "lr": 3.6633624328828085e-07, "epoch": 3.733051572081206, "percentage": 74.66, "elapsed_time": "3:17:14", "remaining_time": "1:06:56", "throughput": 8703.09, "total_tokens": 102993552} +{"current_steps": 152810, "total_steps": 204665, "loss": 0.0, "lr": 3.6627027410935063e-07, "epoch": 3.733173722913053, "percentage": 74.66, "elapsed_time": "3:17:14", "remaining_time": "1:06:55", "throughput": 8703.12, "total_tokens": 102996944} +{"current_steps": 152815, "total_steps": 204665, "loss": 0.0, "lr": 3.662043095390736e-07, "epoch": 3.7332958737449005, "percentage": 74.67, "elapsed_time": "3:17:14", "remaining_time": "1:06:55", "throughput": 8703.12, "total_tokens": 102999952} +{"current_steps": 152820, "total_steps": 204665, "loss": 0.0, "lr": 3.661383495779288e-07, "epoch": 3.733418024576747, "percentage": 74.67, "elapsed_time": "3:17:15", "remaining_time": "1:06:55", "throughput": 8703.15, "total_tokens": 103003280} +{"current_steps": 152825, "total_steps": 204665, "loss": 0.0, "lr": 3.660723942263967e-07, "epoch": 3.7335401754085944, "percentage": 74.67, "elapsed_time": "3:17:15", "remaining_time": "1:06:54", "throughput": 8703.16, "total_tokens": 103006416} +{"current_steps": 152830, "total_steps": 204665, "loss": 0.0, "lr": 3.660064434849565e-07, "epoch": 3.7336623262404416, "percentage": 74.67, "elapsed_time": "3:17:15", "remaining_time": "1:06:54", "throughput": 8703.17, "total_tokens": 103009488} +{"current_steps": 152835, "total_steps": 204665, "loss": 0.0845, "lr": 3.6594049735408816e-07, "epoch": 3.7337844770722888, "percentage": 74.68, "elapsed_time": "3:17:16", "remaining_time": "1:06:53", "throughput": 8703.18, "total_tokens": 103012496} +{"current_steps": 152840, "total_steps": 204665, "loss": 0.0, "lr": 3.6587455583427074e-07, "epoch": 3.733906627904136, "percentage": 74.68, "elapsed_time": "3:17:16", "remaining_time": "1:06:53", "throughput": 8703.22, "total_tokens": 103016016} +{"current_steps": 152845, "total_steps": 204665, "loss": 0.0, "lr": 3.6580861892598423e-07, "epoch": 3.734028778735983, "percentage": 74.68, "elapsed_time": "3:17:16", "remaining_time": "1:06:53", "throughput": 8703.23, "total_tokens": 103019088} +{"current_steps": 152850, "total_steps": 204665, "loss": 0.0002, "lr": 3.657426866297082e-07, "epoch": 3.7341509295678303, "percentage": 74.68, "elapsed_time": "3:17:17", "remaining_time": "1:06:52", "throughput": 8703.25, "total_tokens": 103022416} +{"current_steps": 152855, "total_steps": 204665, "loss": 0.0, "lr": 3.6567675894592174e-07, "epoch": 3.7342730803996775, "percentage": 74.69, "elapsed_time": "3:17:17", "remaining_time": "1:06:52", "throughput": 8703.29, "total_tokens": 103025936} +{"current_steps": 152860, "total_steps": 204665, "loss": 0.012, "lr": 3.656108358751048e-07, "epoch": 3.7343952312315247, "percentage": 74.69, "elapsed_time": "3:17:17", "remaining_time": "1:06:51", "throughput": 8703.29, "total_tokens": 103028880} +{"current_steps": 152865, "total_steps": 204665, "loss": 0.0, "lr": 3.655449174177361e-07, "epoch": 3.734517382063372, "percentage": 74.69, "elapsed_time": "3:17:18", "remaining_time": "1:06:51", "throughput": 8703.33, "total_tokens": 103032336} +{"current_steps": 152870, "total_steps": 204665, "loss": 0.0, "lr": 3.6547900357429585e-07, "epoch": 3.734639532895219, "percentage": 74.69, "elapsed_time": "3:17:18", "remaining_time": "1:06:51", "throughput": 8703.36, "total_tokens": 103035728} +{"current_steps": 152875, "total_steps": 204665, "loss": 0.0, "lr": 3.654130943452625e-07, "epoch": 3.7347616837270663, "percentage": 74.7, "elapsed_time": "3:17:18", "remaining_time": "1:06:50", "throughput": 8703.37, "total_tokens": 103038800} +{"current_steps": 152880, "total_steps": 204665, "loss": 0.0001, "lr": 3.653471897311159e-07, "epoch": 3.7348838345589135, "percentage": 74.7, "elapsed_time": "3:17:19", "remaining_time": "1:06:50", "throughput": 8703.43, "total_tokens": 103042576} +{"current_steps": 152885, "total_steps": 204665, "loss": 0.0, "lr": 3.6528128973233554e-07, "epoch": 3.7350059853907607, "percentage": 74.7, "elapsed_time": "3:17:19", "remaining_time": "1:06:49", "throughput": 8703.45, "total_tokens": 103045904} +{"current_steps": 152890, "total_steps": 204665, "loss": 0.0002, "lr": 3.652153943494e-07, "epoch": 3.735128136222608, "percentage": 74.7, "elapsed_time": "3:17:20", "remaining_time": "1:06:49", "throughput": 8703.49, "total_tokens": 103049360} +{"current_steps": 152895, "total_steps": 204665, "loss": 0.0, "lr": 3.6514950358278917e-07, "epoch": 3.7352502870544546, "percentage": 74.71, "elapsed_time": "3:17:20", "remaining_time": "1:06:49", "throughput": 8703.52, "total_tokens": 103052752} +{"current_steps": 152900, "total_steps": 204665, "loss": 0.0001, "lr": 3.6508361743298167e-07, "epoch": 3.7353724378863022, "percentage": 74.71, "elapsed_time": "3:17:20", "remaining_time": "1:06:48", "throughput": 8703.51, "total_tokens": 103055568} +{"current_steps": 152905, "total_steps": 204665, "loss": 0.0643, "lr": 3.6501773590045713e-07, "epoch": 3.735494588718149, "percentage": 74.71, "elapsed_time": "3:17:21", "remaining_time": "1:06:48", "throughput": 8703.55, "total_tokens": 103059088} +{"current_steps": 152910, "total_steps": 204665, "loss": 0.0, "lr": 3.6495185898569405e-07, "epoch": 3.735616739549996, "percentage": 74.71, "elapsed_time": "3:17:21", "remaining_time": "1:06:47", "throughput": 8703.54, "total_tokens": 103061840} +{"current_steps": 152915, "total_steps": 204665, "loss": 0.0001, "lr": 3.6488598668917224e-07, "epoch": 3.7357388903818434, "percentage": 74.71, "elapsed_time": "3:17:21", "remaining_time": "1:06:47", "throughput": 8703.58, "total_tokens": 103065360} +{"current_steps": 152920, "total_steps": 204665, "loss": 0.0, "lr": 3.648201190113703e-07, "epoch": 3.7358610412136906, "percentage": 74.72, "elapsed_time": "3:17:22", "remaining_time": "1:06:47", "throughput": 8703.61, "total_tokens": 103068688} +{"current_steps": 152925, "total_steps": 204665, "loss": 0.0001, "lr": 3.647542559527671e-07, "epoch": 3.7359831920455377, "percentage": 74.72, "elapsed_time": "3:17:22", "remaining_time": "1:06:46", "throughput": 8703.65, "total_tokens": 103072208} +{"current_steps": 152930, "total_steps": 204665, "loss": 0.0, "lr": 3.646883975138421e-07, "epoch": 3.736105342877385, "percentage": 74.72, "elapsed_time": "3:17:22", "remaining_time": "1:06:46", "throughput": 8703.69, "total_tokens": 103075856} +{"current_steps": 152935, "total_steps": 204665, "loss": 0.0001, "lr": 3.646225436950735e-07, "epoch": 3.736227493709232, "percentage": 74.72, "elapsed_time": "3:17:23", "remaining_time": "1:06:45", "throughput": 8703.73, "total_tokens": 103079312} +{"current_steps": 152940, "total_steps": 204665, "loss": 0.0001, "lr": 3.6455669449694073e-07, "epoch": 3.7363496445410793, "percentage": 74.73, "elapsed_time": "3:17:23", "remaining_time": "1:06:45", "throughput": 8703.76, "total_tokens": 103082704} +{"current_steps": 152945, "total_steps": 204665, "loss": 0.0, "lr": 3.6449084991992295e-07, "epoch": 3.7364717953729265, "percentage": 74.73, "elapsed_time": "3:17:23", "remaining_time": "1:06:45", "throughput": 8703.8, "total_tokens": 103086288} +{"current_steps": 152950, "total_steps": 204665, "loss": 0.0003, "lr": 3.644250099644983e-07, "epoch": 3.7365939462047737, "percentage": 74.73, "elapsed_time": "3:17:24", "remaining_time": "1:06:44", "throughput": 8703.82, "total_tokens": 103089424} +{"current_steps": 152955, "total_steps": 204665, "loss": 0.0, "lr": 3.643591746311462e-07, "epoch": 3.736716097036621, "percentage": 74.73, "elapsed_time": "3:17:24", "remaining_time": "1:06:44", "throughput": 8703.9, "total_tokens": 103093584} +{"current_steps": 152960, "total_steps": 204665, "loss": 0.0, "lr": 3.642933439203448e-07, "epoch": 3.736838247868468, "percentage": 74.74, "elapsed_time": "3:17:24", "remaining_time": "1:06:43", "throughput": 8703.9, "total_tokens": 103096592} +{"current_steps": 152965, "total_steps": 204665, "loss": 0.0, "lr": 3.6422751783257364e-07, "epoch": 3.7369603987003153, "percentage": 74.74, "elapsed_time": "3:17:25", "remaining_time": "1:06:43", "throughput": 8703.95, "total_tokens": 103100240} +{"current_steps": 152970, "total_steps": 204665, "loss": 0.0, "lr": 3.6416169636831064e-07, "epoch": 3.7370825495321625, "percentage": 74.74, "elapsed_time": "3:17:25", "remaining_time": "1:06:43", "throughput": 8703.96, "total_tokens": 103103376} +{"current_steps": 152975, "total_steps": 204665, "loss": 0.0, "lr": 3.640958795280347e-07, "epoch": 3.7372047003640096, "percentage": 74.74, "elapsed_time": "3:17:25", "remaining_time": "1:06:42", "throughput": 8703.97, "total_tokens": 103106448} +{"current_steps": 152980, "total_steps": 204665, "loss": 0.0001, "lr": 3.6403006731222496e-07, "epoch": 3.7373268511958564, "percentage": 74.75, "elapsed_time": "3:17:26", "remaining_time": "1:06:42", "throughput": 8704.02, "total_tokens": 103110032} +{"current_steps": 152985, "total_steps": 204665, "loss": 0.0, "lr": 3.6396425972135923e-07, "epoch": 3.737449002027704, "percentage": 74.75, "elapsed_time": "3:17:26", "remaining_time": "1:06:41", "throughput": 8704.05, "total_tokens": 103113360} +{"current_steps": 152990, "total_steps": 204665, "loss": 0.0001, "lr": 3.638984567559169e-07, "epoch": 3.7375711528595508, "percentage": 74.75, "elapsed_time": "3:17:26", "remaining_time": "1:06:41", "throughput": 8704.06, "total_tokens": 103116496} +{"current_steps": 152995, "total_steps": 204665, "loss": 0.0, "lr": 3.6383265841637567e-07, "epoch": 3.7376933036913984, "percentage": 74.75, "elapsed_time": "3:17:27", "remaining_time": "1:06:41", "throughput": 8704.09, "total_tokens": 103119952} +{"current_steps": 153000, "total_steps": 204665, "loss": 0.0, "lr": 3.6376686470321447e-07, "epoch": 3.737815454523245, "percentage": 74.76, "elapsed_time": "3:17:27", "remaining_time": "1:06:40", "throughput": 8704.12, "total_tokens": 103123344} +{"current_steps": 153005, "total_steps": 204665, "loss": 0.0, "lr": 3.637010756169121e-07, "epoch": 3.7379376053550923, "percentage": 74.76, "elapsed_time": "3:17:27", "remaining_time": "1:06:40", "throughput": 8704.14, "total_tokens": 103126480} +{"current_steps": 153010, "total_steps": 204665, "loss": 0.0001, "lr": 3.6363529115794667e-07, "epoch": 3.7380597561869395, "percentage": 74.76, "elapsed_time": "3:17:28", "remaining_time": "1:06:39", "throughput": 8704.17, "total_tokens": 103129936} +{"current_steps": 153015, "total_steps": 204665, "loss": 0.0, "lr": 3.6356951132679626e-07, "epoch": 3.7381819070187867, "percentage": 74.76, "elapsed_time": "3:17:28", "remaining_time": "1:06:39", "throughput": 8704.22, "total_tokens": 103133584} +{"current_steps": 153020, "total_steps": 204665, "loss": 0.0, "lr": 3.635037361239398e-07, "epoch": 3.738304057850634, "percentage": 74.77, "elapsed_time": "3:17:29", "remaining_time": "1:06:39", "throughput": 8704.24, "total_tokens": 103136848} +{"current_steps": 153025, "total_steps": 204665, "loss": 0.0, "lr": 3.6343796554985504e-07, "epoch": 3.738426208682481, "percentage": 74.77, "elapsed_time": "3:17:29", "remaining_time": "1:06:38", "throughput": 8704.26, "total_tokens": 103140048} +{"current_steps": 153030, "total_steps": 204665, "loss": 0.0, "lr": 3.6337219960502106e-07, "epoch": 3.7385483595143283, "percentage": 74.77, "elapsed_time": "3:17:29", "remaining_time": "1:06:38", "throughput": 8704.28, "total_tokens": 103143376} +{"current_steps": 153035, "total_steps": 204665, "loss": 0.0, "lr": 3.633064382899153e-07, "epoch": 3.7386705103461755, "percentage": 74.77, "elapsed_time": "3:17:30", "remaining_time": "1:06:37", "throughput": 8704.31, "total_tokens": 103146640} +{"current_steps": 153040, "total_steps": 204665, "loss": 0.0, "lr": 3.632406816050166e-07, "epoch": 3.7387926611780227, "percentage": 74.78, "elapsed_time": "3:17:30", "remaining_time": "1:06:37", "throughput": 8704.31, "total_tokens": 103149648} +{"current_steps": 153045, "total_steps": 204665, "loss": 0.0, "lr": 3.6317492955080263e-07, "epoch": 3.73891481200987, "percentage": 74.78, "elapsed_time": "3:17:30", "remaining_time": "1:06:37", "throughput": 8704.32, "total_tokens": 103152656} +{"current_steps": 153050, "total_steps": 204665, "loss": 0.0, "lr": 3.6310918212775223e-07, "epoch": 3.739036962841717, "percentage": 74.78, "elapsed_time": "3:17:31", "remaining_time": "1:06:36", "throughput": 8704.35, "total_tokens": 103155984} +{"current_steps": 153055, "total_steps": 204665, "loss": 0.0, "lr": 3.6304343933634284e-07, "epoch": 3.7391591136735642, "percentage": 74.78, "elapsed_time": "3:17:31", "remaining_time": "1:06:36", "throughput": 8704.39, "total_tokens": 103159568} +{"current_steps": 153060, "total_steps": 204665, "loss": 0.0, "lr": 3.629777011770532e-07, "epoch": 3.7392812645054114, "percentage": 74.79, "elapsed_time": "3:17:31", "remaining_time": "1:06:35", "throughput": 8704.42, "total_tokens": 103162896} +{"current_steps": 153065, "total_steps": 204665, "loss": 0.0, "lr": 3.629119676503607e-07, "epoch": 3.739403415337258, "percentage": 74.79, "elapsed_time": "3:17:32", "remaining_time": "1:06:35", "throughput": 8704.42, "total_tokens": 103165840} +{"current_steps": 153070, "total_steps": 204665, "loss": 0.0, "lr": 3.628462387567437e-07, "epoch": 3.739525566169106, "percentage": 74.79, "elapsed_time": "3:17:32", "remaining_time": "1:06:35", "throughput": 8704.44, "total_tokens": 103169168} +{"current_steps": 153075, "total_steps": 204665, "loss": 0.0, "lr": 3.6278051449668067e-07, "epoch": 3.7396477170009526, "percentage": 74.79, "elapsed_time": "3:17:32", "remaining_time": "1:06:34", "throughput": 8704.42, "total_tokens": 103171792} +{"current_steps": 153080, "total_steps": 204665, "loss": 0.0, "lr": 3.627147948706487e-07, "epoch": 3.7397698678328, "percentage": 74.8, "elapsed_time": "3:17:33", "remaining_time": "1:06:34", "throughput": 8704.45, "total_tokens": 103175120} +{"current_steps": 153085, "total_steps": 204665, "loss": 0.0182, "lr": 3.626490798791265e-07, "epoch": 3.739892018664647, "percentage": 74.8, "elapsed_time": "3:17:33", "remaining_time": "1:06:33", "throughput": 8704.45, "total_tokens": 103178000} +{"current_steps": 153090, "total_steps": 204665, "loss": 0.0, "lr": 3.6258336952259127e-07, "epoch": 3.740014169496494, "percentage": 74.8, "elapsed_time": "3:17:33", "remaining_time": "1:06:33", "throughput": 8704.47, "total_tokens": 103181200} +{"current_steps": 153095, "total_steps": 204665, "loss": 0.0001, "lr": 3.6251766380152127e-07, "epoch": 3.7401363203283413, "percentage": 74.8, "elapsed_time": "3:17:34", "remaining_time": "1:06:33", "throughput": 8704.53, "total_tokens": 103185040} +{"current_steps": 153100, "total_steps": 204665, "loss": 0.031, "lr": 3.6245196271639457e-07, "epoch": 3.7402584711601885, "percentage": 74.81, "elapsed_time": "3:17:34", "remaining_time": "1:06:32", "throughput": 8704.59, "total_tokens": 103188880} +{"current_steps": 153105, "total_steps": 204665, "loss": 0.0, "lr": 3.623862662676884e-07, "epoch": 3.7403806219920357, "percentage": 74.81, "elapsed_time": "3:17:34", "remaining_time": "1:06:32", "throughput": 8704.62, "total_tokens": 103192336} +{"current_steps": 153110, "total_steps": 204665, "loss": 0.0955, "lr": 3.6232057445588107e-07, "epoch": 3.740502772823883, "percentage": 74.81, "elapsed_time": "3:17:35", "remaining_time": "1:06:31", "throughput": 8704.67, "total_tokens": 103195984} +{"current_steps": 153115, "total_steps": 204665, "loss": 0.0, "lr": 3.622548872814497e-07, "epoch": 3.74062492365573, "percentage": 74.81, "elapsed_time": "3:17:35", "remaining_time": "1:06:31", "throughput": 8704.69, "total_tokens": 103199120} +{"current_steps": 153120, "total_steps": 204665, "loss": 0.0, "lr": 3.621892047448727e-07, "epoch": 3.7407470744875773, "percentage": 74.81, "elapsed_time": "3:17:35", "remaining_time": "1:06:31", "throughput": 8704.68, "total_tokens": 103202000} +{"current_steps": 153125, "total_steps": 204665, "loss": 0.0, "lr": 3.6212352684662737e-07, "epoch": 3.7408692253194245, "percentage": 74.82, "elapsed_time": "3:17:36", "remaining_time": "1:06:30", "throughput": 8704.71, "total_tokens": 103205392} +{"current_steps": 153130, "total_steps": 204665, "loss": 0.0001, "lr": 3.62057853587191e-07, "epoch": 3.7409913761512716, "percentage": 74.82, "elapsed_time": "3:17:36", "remaining_time": "1:06:30", "throughput": 8704.75, "total_tokens": 103208848} +{"current_steps": 153135, "total_steps": 204665, "loss": 0.0969, "lr": 3.6199218496704175e-07, "epoch": 3.741113526983119, "percentage": 74.82, "elapsed_time": "3:17:36", "remaining_time": "1:06:29", "throughput": 8704.77, "total_tokens": 103211984} +{"current_steps": 153140, "total_steps": 204665, "loss": 0.0, "lr": 3.619265209866567e-07, "epoch": 3.741235677814966, "percentage": 74.82, "elapsed_time": "3:17:37", "remaining_time": "1:06:29", "throughput": 8704.8, "total_tokens": 103215504} +{"current_steps": 153145, "total_steps": 204665, "loss": 0.0, "lr": 3.6186086164651387e-07, "epoch": 3.741357828646813, "percentage": 74.83, "elapsed_time": "3:17:37", "remaining_time": "1:06:29", "throughput": 8704.83, "total_tokens": 103218832} +{"current_steps": 153150, "total_steps": 204665, "loss": 0.0788, "lr": 3.617952069470902e-07, "epoch": 3.7414799794786604, "percentage": 74.83, "elapsed_time": "3:17:38", "remaining_time": "1:06:28", "throughput": 8704.86, "total_tokens": 103222288} +{"current_steps": 153155, "total_steps": 204665, "loss": 0.031, "lr": 3.6172955688886343e-07, "epoch": 3.7416021303105076, "percentage": 74.83, "elapsed_time": "3:17:38", "remaining_time": "1:06:28", "throughput": 8704.88, "total_tokens": 103225552} +{"current_steps": 153160, "total_steps": 204665, "loss": 0.0, "lr": 3.6166391147231126e-07, "epoch": 3.7417242811423543, "percentage": 74.83, "elapsed_time": "3:17:38", "remaining_time": "1:06:27", "throughput": 8705.01, "total_tokens": 103230352} +{"current_steps": 153165, "total_steps": 204665, "loss": 0.0001, "lr": 3.615982706979106e-07, "epoch": 3.741846431974202, "percentage": 74.84, "elapsed_time": "3:17:39", "remaining_time": "1:06:27", "throughput": 8705.02, "total_tokens": 103233488} +{"current_steps": 153170, "total_steps": 204665, "loss": 0.0, "lr": 3.6153263456613925e-07, "epoch": 3.7419685828060487, "percentage": 74.84, "elapsed_time": "3:17:39", "remaining_time": "1:06:27", "throughput": 8705.08, "total_tokens": 103237264} +{"current_steps": 153175, "total_steps": 204665, "loss": 0.0, "lr": 3.6146700307747403e-07, "epoch": 3.7420907336378963, "percentage": 74.84, "elapsed_time": "3:17:39", "remaining_time": "1:06:26", "throughput": 8705.1, "total_tokens": 103240464} +{"current_steps": 153180, "total_steps": 204665, "loss": 0.0001, "lr": 3.6140137623239287e-07, "epoch": 3.742212884469743, "percentage": 74.84, "elapsed_time": "3:17:40", "remaining_time": "1:06:26", "throughput": 8705.11, "total_tokens": 103243600} +{"current_steps": 153185, "total_steps": 204665, "loss": 0.0007, "lr": 3.613357540313723e-07, "epoch": 3.7423350353015903, "percentage": 74.85, "elapsed_time": "3:17:40", "remaining_time": "1:06:25", "throughput": 8705.12, "total_tokens": 103246672} +{"current_steps": 153190, "total_steps": 204665, "loss": 0.0001, "lr": 3.612701364748899e-07, "epoch": 3.7424571861334375, "percentage": 74.85, "elapsed_time": "3:17:40", "remaining_time": "1:06:25", "throughput": 8705.15, "total_tokens": 103250064} +{"current_steps": 153195, "total_steps": 204665, "loss": 0.0, "lr": 3.612045235634232e-07, "epoch": 3.7425793369652847, "percentage": 74.85, "elapsed_time": "3:17:41", "remaining_time": "1:06:25", "throughput": 8705.16, "total_tokens": 103253136} +{"current_steps": 153200, "total_steps": 204665, "loss": 0.0, "lr": 3.6113891529744864e-07, "epoch": 3.742701487797132, "percentage": 74.85, "elapsed_time": "3:17:41", "remaining_time": "1:06:24", "throughput": 8705.2, "total_tokens": 103256592} +{"current_steps": 153205, "total_steps": 204665, "loss": 0.0, "lr": 3.610733116774441e-07, "epoch": 3.742823638628979, "percentage": 74.86, "elapsed_time": "3:17:41", "remaining_time": "1:06:24", "throughput": 8705.21, "total_tokens": 103259728} +{"current_steps": 153210, "total_steps": 204665, "loss": 0.0563, "lr": 3.6100771270388606e-07, "epoch": 3.7429457894608262, "percentage": 74.86, "elapsed_time": "3:17:42", "remaining_time": "1:06:23", "throughput": 8705.23, "total_tokens": 103262864} +{"current_steps": 153215, "total_steps": 204665, "loss": 0.0, "lr": 3.6094211837725197e-07, "epoch": 3.7430679402926734, "percentage": 74.86, "elapsed_time": "3:17:42", "remaining_time": "1:06:23", "throughput": 8705.25, "total_tokens": 103266192} +{"current_steps": 153220, "total_steps": 204665, "loss": 0.0, "lr": 3.6087652869801846e-07, "epoch": 3.7431900911245206, "percentage": 74.86, "elapsed_time": "3:17:42", "remaining_time": "1:06:23", "throughput": 8705.28, "total_tokens": 103269584} +{"current_steps": 153225, "total_steps": 204665, "loss": 0.0, "lr": 3.60810943666663e-07, "epoch": 3.743312241956368, "percentage": 74.87, "elapsed_time": "3:17:43", "remaining_time": "1:06:22", "throughput": 8705.3, "total_tokens": 103272784} +{"current_steps": 153230, "total_steps": 204665, "loss": 0.0, "lr": 3.6074536328366235e-07, "epoch": 3.743434392788215, "percentage": 74.87, "elapsed_time": "3:17:43", "remaining_time": "1:06:22", "throughput": 8705.35, "total_tokens": 103276368} +{"current_steps": 153235, "total_steps": 204665, "loss": 0.0, "lr": 3.606797875494929e-07, "epoch": 3.743556543620062, "percentage": 74.87, "elapsed_time": "3:17:43", "remaining_time": "1:06:21", "throughput": 8705.33, "total_tokens": 103279120} +{"current_steps": 153240, "total_steps": 204665, "loss": 0.0, "lr": 3.606142164646324e-07, "epoch": 3.7436786944519094, "percentage": 74.87, "elapsed_time": "3:17:44", "remaining_time": "1:06:21", "throughput": 8705.35, "total_tokens": 103282320} +{"current_steps": 153245, "total_steps": 204665, "loss": 0.0001, "lr": 3.60548650029557e-07, "epoch": 3.743800845283756, "percentage": 74.88, "elapsed_time": "3:17:44", "remaining_time": "1:06:21", "throughput": 8705.38, "total_tokens": 103285648} +{"current_steps": 153250, "total_steps": 204665, "loss": 0.0, "lr": 3.604830882447438e-07, "epoch": 3.7439229961156038, "percentage": 74.88, "elapsed_time": "3:17:44", "remaining_time": "1:06:20", "throughput": 8705.38, "total_tokens": 103288592} +{"current_steps": 153255, "total_steps": 204665, "loss": 0.0, "lr": 3.6041753111066987e-07, "epoch": 3.7440451469474505, "percentage": 74.88, "elapsed_time": "3:17:45", "remaining_time": "1:06:20", "throughput": 8705.4, "total_tokens": 103291792} +{"current_steps": 153260, "total_steps": 204665, "loss": 0.0, "lr": 3.603519786278114e-07, "epoch": 3.744167297779298, "percentage": 74.88, "elapsed_time": "3:17:45", "remaining_time": "1:06:19", "throughput": 8705.43, "total_tokens": 103295184} +{"current_steps": 153265, "total_steps": 204665, "loss": 0.1206, "lr": 3.602864307966457e-07, "epoch": 3.744289448611145, "percentage": 74.89, "elapsed_time": "3:17:45", "remaining_time": "1:06:19", "throughput": 8705.44, "total_tokens": 103298320} +{"current_steps": 153270, "total_steps": 204665, "loss": 0.0388, "lr": 3.6022088761764877e-07, "epoch": 3.744411599442992, "percentage": 74.89, "elapsed_time": "3:17:46", "remaining_time": "1:06:19", "throughput": 8705.46, "total_tokens": 103301584} +{"current_steps": 153275, "total_steps": 204665, "loss": 0.0449, "lr": 3.6015534909129796e-07, "epoch": 3.7445337502748393, "percentage": 74.89, "elapsed_time": "3:17:46", "remaining_time": "1:06:18", "throughput": 8705.48, "total_tokens": 103304784} +{"current_steps": 153280, "total_steps": 204665, "loss": 0.0, "lr": 3.600898152180692e-07, "epoch": 3.7446559011066864, "percentage": 74.89, "elapsed_time": "3:17:46", "remaining_time": "1:06:18", "throughput": 8705.52, "total_tokens": 103308304} +{"current_steps": 153285, "total_steps": 204665, "loss": 0.0383, "lr": 3.600242859984395e-07, "epoch": 3.7447780519385336, "percentage": 74.9, "elapsed_time": "3:17:47", "remaining_time": "1:06:17", "throughput": 8705.56, "total_tokens": 103311760} +{"current_steps": 153290, "total_steps": 204665, "loss": 0.0001, "lr": 3.599587614328856e-07, "epoch": 3.744900202770381, "percentage": 74.9, "elapsed_time": "3:17:47", "remaining_time": "1:06:17", "throughput": 8705.56, "total_tokens": 103314768} +{"current_steps": 153295, "total_steps": 204665, "loss": 0.0, "lr": 3.598932415218835e-07, "epoch": 3.745022353602228, "percentage": 74.9, "elapsed_time": "3:17:48", "remaining_time": "1:06:17", "throughput": 8705.61, "total_tokens": 103318416} +{"current_steps": 153300, "total_steps": 204665, "loss": 0.0, "lr": 3.598277262659102e-07, "epoch": 3.745144504434075, "percentage": 74.9, "elapsed_time": "3:17:48", "remaining_time": "1:06:16", "throughput": 8705.6, "total_tokens": 103321296} +{"current_steps": 153305, "total_steps": 204665, "loss": 0.0, "lr": 3.597622156654414e-07, "epoch": 3.7452666552659224, "percentage": 74.91, "elapsed_time": "3:17:48", "remaining_time": "1:06:16", "throughput": 8705.67, "total_tokens": 103325264} +{"current_steps": 153310, "total_steps": 204665, "loss": 0.0003, "lr": 3.596967097209541e-07, "epoch": 3.7453888060977696, "percentage": 74.91, "elapsed_time": "3:17:49", "remaining_time": "1:06:15", "throughput": 8705.67, "total_tokens": 103328080} +{"current_steps": 153315, "total_steps": 204665, "loss": 0.0, "lr": 3.596312084329248e-07, "epoch": 3.7455109569296168, "percentage": 74.91, "elapsed_time": "3:17:49", "remaining_time": "1:06:15", "throughput": 8705.71, "total_tokens": 103331664} +{"current_steps": 153320, "total_steps": 204665, "loss": 0.0, "lr": 3.595657118018297e-07, "epoch": 3.745633107761464, "percentage": 74.91, "elapsed_time": "3:17:49", "remaining_time": "1:06:15", "throughput": 8705.75, "total_tokens": 103335184} +{"current_steps": 153325, "total_steps": 204665, "loss": 0.0002, "lr": 3.595002198281446e-07, "epoch": 3.745755258593311, "percentage": 74.92, "elapsed_time": "3:17:50", "remaining_time": "1:06:14", "throughput": 8705.79, "total_tokens": 103338704} +{"current_steps": 153330, "total_steps": 204665, "loss": 0.0651, "lr": 3.5943473251234656e-07, "epoch": 3.7458774094251583, "percentage": 74.92, "elapsed_time": "3:17:50", "remaining_time": "1:06:14", "throughput": 8705.8, "total_tokens": 103341840} +{"current_steps": 153335, "total_steps": 204665, "loss": 0.0, "lr": 3.5936924985491104e-07, "epoch": 3.7459995602570055, "percentage": 74.92, "elapsed_time": "3:17:50", "remaining_time": "1:06:13", "throughput": 8705.89, "total_tokens": 103346000} +{"current_steps": 153340, "total_steps": 204665, "loss": 0.0, "lr": 3.59303771856315e-07, "epoch": 3.7461217110888523, "percentage": 74.92, "elapsed_time": "3:17:51", "remaining_time": "1:06:13", "throughput": 8705.9, "total_tokens": 103349136} +{"current_steps": 153345, "total_steps": 204665, "loss": 0.0348, "lr": 3.592382985170339e-07, "epoch": 3.7462438619207, "percentage": 74.92, "elapsed_time": "3:17:51", "remaining_time": "1:06:13", "throughput": 8705.92, "total_tokens": 103352336} +{"current_steps": 153350, "total_steps": 204665, "loss": 0.0, "lr": 3.591728298375446e-07, "epoch": 3.7463660127525467, "percentage": 74.93, "elapsed_time": "3:17:51", "remaining_time": "1:06:12", "throughput": 8705.93, "total_tokens": 103355408} +{"current_steps": 153355, "total_steps": 204665, "loss": 0.0, "lr": 3.5910736581832246e-07, "epoch": 3.746488163584394, "percentage": 74.93, "elapsed_time": "3:17:52", "remaining_time": "1:06:12", "throughput": 8705.93, "total_tokens": 103358352} +{"current_steps": 153360, "total_steps": 204665, "loss": 0.0, "lr": 3.5904190645984434e-07, "epoch": 3.746610314416241, "percentage": 74.93, "elapsed_time": "3:17:52", "remaining_time": "1:06:11", "throughput": 8705.95, "total_tokens": 103361680} +{"current_steps": 153365, "total_steps": 204665, "loss": 0.0, "lr": 3.589764517625855e-07, "epoch": 3.7467324652480882, "percentage": 74.93, "elapsed_time": "3:17:52", "remaining_time": "1:06:11", "throughput": 8706.01, "total_tokens": 103365456} +{"current_steps": 153370, "total_steps": 204665, "loss": 0.0, "lr": 3.5891100172702273e-07, "epoch": 3.7468546160799354, "percentage": 74.94, "elapsed_time": "3:17:53", "remaining_time": "1:06:11", "throughput": 8706.06, "total_tokens": 103369232} +{"current_steps": 153375, "total_steps": 204665, "loss": 0.0003, "lr": 3.588455563536311e-07, "epoch": 3.7469767669117826, "percentage": 74.94, "elapsed_time": "3:17:53", "remaining_time": "1:06:10", "throughput": 8706.09, "total_tokens": 103372560} +{"current_steps": 153380, "total_steps": 204665, "loss": 0.0, "lr": 3.5878011564288714e-07, "epoch": 3.74709891774363, "percentage": 74.94, "elapsed_time": "3:17:53", "remaining_time": "1:06:10", "throughput": 8706.13, "total_tokens": 103376016} +{"current_steps": 153385, "total_steps": 204665, "loss": 0.0, "lr": 3.587146795952669e-07, "epoch": 3.747221068575477, "percentage": 74.94, "elapsed_time": "3:17:54", "remaining_time": "1:06:09", "throughput": 8706.15, "total_tokens": 103379344} +{"current_steps": 153390, "total_steps": 204665, "loss": 0.0, "lr": 3.5864924821124575e-07, "epoch": 3.747343219407324, "percentage": 74.95, "elapsed_time": "3:17:54", "remaining_time": "1:06:09", "throughput": 8706.17, "total_tokens": 103382480} +{"current_steps": 153395, "total_steps": 204665, "loss": 0.0085, "lr": 3.585838214913001e-07, "epoch": 3.7474653702391714, "percentage": 74.95, "elapsed_time": "3:17:54", "remaining_time": "1:06:09", "throughput": 8706.2, "total_tokens": 103385936} +{"current_steps": 153400, "total_steps": 204665, "loss": 0.0001, "lr": 3.58518399435905e-07, "epoch": 3.7475875210710186, "percentage": 74.95, "elapsed_time": "3:17:55", "remaining_time": "1:06:08", "throughput": 8706.24, "total_tokens": 103389392} +{"current_steps": 153405, "total_steps": 204665, "loss": 0.0, "lr": 3.5845298204553676e-07, "epoch": 3.7477096719028657, "percentage": 74.95, "elapsed_time": "3:17:55", "remaining_time": "1:06:08", "throughput": 8706.33, "total_tokens": 103393680} +{"current_steps": 153410, "total_steps": 204665, "loss": 0.0001, "lr": 3.5838756932067126e-07, "epoch": 3.747831822734713, "percentage": 74.96, "elapsed_time": "3:17:56", "remaining_time": "1:06:07", "throughput": 8706.35, "total_tokens": 103397008} +{"current_steps": 153415, "total_steps": 204665, "loss": 0.0321, "lr": 3.5832216126178363e-07, "epoch": 3.74795397356656, "percentage": 74.96, "elapsed_time": "3:17:56", "remaining_time": "1:06:07", "throughput": 8706.41, "total_tokens": 103400784} +{"current_steps": 153420, "total_steps": 204665, "loss": 0.0738, "lr": 3.5825675786935006e-07, "epoch": 3.7480761243984073, "percentage": 74.96, "elapsed_time": "3:17:56", "remaining_time": "1:06:07", "throughput": 8706.46, "total_tokens": 103404496} +{"current_steps": 153425, "total_steps": 204665, "loss": 0.0, "lr": 3.581913591438457e-07, "epoch": 3.748198275230254, "percentage": 74.96, "elapsed_time": "3:17:57", "remaining_time": "1:06:06", "throughput": 8706.48, "total_tokens": 103407696} +{"current_steps": 153430, "total_steps": 204665, "loss": 0.0, "lr": 3.5812596508574675e-07, "epoch": 3.7483204260621017, "percentage": 74.97, "elapsed_time": "3:17:57", "remaining_time": "1:06:06", "throughput": 8706.52, "total_tokens": 103411216} +{"current_steps": 153435, "total_steps": 204665, "loss": 0.0001, "lr": 3.580605756955284e-07, "epoch": 3.7484425768939484, "percentage": 74.97, "elapsed_time": "3:17:57", "remaining_time": "1:06:05", "throughput": 8706.51, "total_tokens": 103414160} +{"current_steps": 153440, "total_steps": 204665, "loss": 0.0, "lr": 3.5799519097366593e-07, "epoch": 3.748564727725796, "percentage": 74.97, "elapsed_time": "3:17:58", "remaining_time": "1:06:05", "throughput": 8706.53, "total_tokens": 103417360} +{"current_steps": 153445, "total_steps": 204665, "loss": 0.0418, "lr": 3.579298109206353e-07, "epoch": 3.748686878557643, "percentage": 74.97, "elapsed_time": "3:17:58", "remaining_time": "1:06:05", "throughput": 8706.59, "total_tokens": 103421072} +{"current_steps": 153450, "total_steps": 204665, "loss": 0.0001, "lr": 3.578644355369116e-07, "epoch": 3.74880902938949, "percentage": 74.98, "elapsed_time": "3:17:58", "remaining_time": "1:06:04", "throughput": 8706.59, "total_tokens": 103424080} +{"current_steps": 153455, "total_steps": 204665, "loss": 0.0, "lr": 3.5779906482297073e-07, "epoch": 3.748931180221337, "percentage": 74.98, "elapsed_time": "3:17:59", "remaining_time": "1:06:04", "throughput": 8706.6, "total_tokens": 103427088} +{"current_steps": 153460, "total_steps": 204665, "loss": 0.0, "lr": 3.577336987792874e-07, "epoch": 3.7490533310531844, "percentage": 74.98, "elapsed_time": "3:17:59", "remaining_time": "1:06:03", "throughput": 8706.64, "total_tokens": 103430736} +{"current_steps": 153465, "total_steps": 204665, "loss": 0.0418, "lr": 3.576683374063374e-07, "epoch": 3.7491754818850316, "percentage": 74.98, "elapsed_time": "3:17:59", "remaining_time": "1:06:03", "throughput": 8706.65, "total_tokens": 103433744} +{"current_steps": 153470, "total_steps": 204665, "loss": 0.0, "lr": 3.576029807045964e-07, "epoch": 3.7492976327168788, "percentage": 74.99, "elapsed_time": "3:18:00", "remaining_time": "1:06:03", "throughput": 8706.77, "total_tokens": 103438416} +{"current_steps": 153475, "total_steps": 204665, "loss": 0.075, "lr": 3.5753762867453885e-07, "epoch": 3.749419783548726, "percentage": 74.99, "elapsed_time": "3:18:00", "remaining_time": "1:06:02", "throughput": 8706.82, "total_tokens": 103442128} +{"current_steps": 153480, "total_steps": 204665, "loss": 0.0, "lr": 3.574722813166409e-07, "epoch": 3.749541934380573, "percentage": 74.99, "elapsed_time": "3:18:00", "remaining_time": "1:06:02", "throughput": 8706.87, "total_tokens": 103445776} +{"current_steps": 153485, "total_steps": 204665, "loss": 0.0, "lr": 3.5740693863137696e-07, "epoch": 3.7496640852124203, "percentage": 74.99, "elapsed_time": "3:18:01", "remaining_time": "1:06:01", "throughput": 8706.87, "total_tokens": 103448784} +{"current_steps": 153490, "total_steps": 204665, "loss": 0.0, "lr": 3.5734160061922304e-07, "epoch": 3.7497862360442675, "percentage": 75.0, "elapsed_time": "3:18:01", "remaining_time": "1:06:01", "throughput": 8706.9, "total_tokens": 103452112} +{"current_steps": 153495, "total_steps": 204665, "loss": 0.0354, "lr": 3.572762672806534e-07, "epoch": 3.7499083868761147, "percentage": 75.0, "elapsed_time": "3:18:01", "remaining_time": "1:06:01", "throughput": 8706.94, "total_tokens": 103455696} +{"current_steps": 153500, "total_steps": 204665, "loss": 0.0001, "lr": 3.572109386161436e-07, "epoch": 3.750030537707962, "percentage": 75.0, "elapsed_time": "3:18:02", "remaining_time": "1:06:00", "throughput": 8706.96, "total_tokens": 103458896} +{"current_steps": 153505, "total_steps": 204665, "loss": 0.0, "lr": 3.571456146261691e-07, "epoch": 3.750152688539809, "percentage": 75.0, "elapsed_time": "3:18:02", "remaining_time": "1:06:00", "throughput": 8706.99, "total_tokens": 103462352} +{"current_steps": 153510, "total_steps": 204665, "loss": 0.0, "lr": 3.5708029531120433e-07, "epoch": 3.750274839371656, "percentage": 75.01, "elapsed_time": "3:18:03", "remaining_time": "1:05:59", "throughput": 8707.03, "total_tokens": 103465808} +{"current_steps": 153510, "total_steps": 204665, "eval_loss": 0.24138930439949036, "epoch": 3.750274839371656, "percentage": 75.01, "elapsed_time": "3:18:50", "remaining_time": "1:06:15", "throughput": 8672.15, "total_tokens": 103465808} +{"current_steps": 153515, "total_steps": 204665, "loss": 0.0, "lr": 3.5701498067172487e-07, "epoch": 3.7503969902035035, "percentage": 75.01, "elapsed_time": "3:19:24", "remaining_time": "1:06:26", "throughput": 8648.04, "total_tokens": 103468944} +{"current_steps": 153520, "total_steps": 204665, "loss": 0.0, "lr": 3.5694967070820514e-07, "epoch": 3.7505191410353502, "percentage": 75.01, "elapsed_time": "3:19:24", "remaining_time": "1:06:26", "throughput": 8648.09, "total_tokens": 103472464} +{"current_steps": 153525, "total_steps": 204665, "loss": 0.0002, "lr": 3.5688436542112054e-07, "epoch": 3.750641291867198, "percentage": 75.01, "elapsed_time": "3:19:25", "remaining_time": "1:06:25", "throughput": 8648.14, "total_tokens": 103476176} +{"current_steps": 153530, "total_steps": 204665, "loss": 0.0, "lr": 3.5681906481094557e-07, "epoch": 3.7507634426990446, "percentage": 75.02, "elapsed_time": "3:19:25", "remaining_time": "1:06:25", "throughput": 8648.22, "total_tokens": 103480336} +{"current_steps": 153535, "total_steps": 204665, "loss": 0.0002, "lr": 3.5675376887815577e-07, "epoch": 3.750885593530892, "percentage": 75.02, "elapsed_time": "3:19:25", "remaining_time": "1:06:24", "throughput": 8648.26, "total_tokens": 103483728} +{"current_steps": 153540, "total_steps": 204665, "loss": 0.0, "lr": 3.566884776232255e-07, "epoch": 3.751007744362739, "percentage": 75.02, "elapsed_time": "3:19:26", "remaining_time": "1:06:24", "throughput": 8648.3, "total_tokens": 103487248} +{"current_steps": 153545, "total_steps": 204665, "loss": 0.0, "lr": 3.566231910466293e-07, "epoch": 3.751129895194586, "percentage": 75.02, "elapsed_time": "3:19:26", "remaining_time": "1:06:24", "throughput": 8648.33, "total_tokens": 103490704} +{"current_steps": 153550, "total_steps": 204665, "loss": 0.0, "lr": 3.5655790914884264e-07, "epoch": 3.7512520460264334, "percentage": 75.03, "elapsed_time": "3:19:26", "remaining_time": "1:06:23", "throughput": 8648.37, "total_tokens": 103494160} +{"current_steps": 153555, "total_steps": 204665, "loss": 0.0002, "lr": 3.5649263193033964e-07, "epoch": 3.7513741968582806, "percentage": 75.03, "elapsed_time": "3:19:27", "remaining_time": "1:06:23", "throughput": 8648.39, "total_tokens": 103497424} +{"current_steps": 153560, "total_steps": 204665, "loss": 0.0, "lr": 3.564273593915953e-07, "epoch": 3.7514963476901277, "percentage": 75.03, "elapsed_time": "3:19:27", "remaining_time": "1:06:22", "throughput": 8648.43, "total_tokens": 103500944} +{"current_steps": 153565, "total_steps": 204665, "loss": 0.0, "lr": 3.563620915330846e-07, "epoch": 3.751618498521975, "percentage": 75.03, "elapsed_time": "3:19:27", "remaining_time": "1:06:22", "throughput": 8648.49, "total_tokens": 103504720} +{"current_steps": 153570, "total_steps": 204665, "loss": 0.0, "lr": 3.5629682835528153e-07, "epoch": 3.751740649353822, "percentage": 75.03, "elapsed_time": "3:19:28", "remaining_time": "1:06:22", "throughput": 8648.49, "total_tokens": 103507664} +{"current_steps": 153575, "total_steps": 204665, "loss": 0.0563, "lr": 3.562315698586614e-07, "epoch": 3.7518628001856693, "percentage": 75.04, "elapsed_time": "3:19:28", "remaining_time": "1:06:21", "throughput": 8648.51, "total_tokens": 103510928} +{"current_steps": 153580, "total_steps": 204665, "loss": 0.0001, "lr": 3.561663160436982e-07, "epoch": 3.7519849510175165, "percentage": 75.04, "elapsed_time": "3:19:28", "remaining_time": "1:06:21", "throughput": 8648.53, "total_tokens": 103514128} +{"current_steps": 153585, "total_steps": 204665, "loss": 0.0, "lr": 3.5610106691086696e-07, "epoch": 3.7521071018493637, "percentage": 75.04, "elapsed_time": "3:19:29", "remaining_time": "1:06:20", "throughput": 8648.6, "total_tokens": 103518096} +{"current_steps": 153590, "total_steps": 204665, "loss": 0.0297, "lr": 3.5603582246064165e-07, "epoch": 3.752229252681211, "percentage": 75.04, "elapsed_time": "3:19:29", "remaining_time": "1:06:20", "throughput": 8648.63, "total_tokens": 103521424} +{"current_steps": 153595, "total_steps": 204665, "loss": 0.0, "lr": 3.55970582693497e-07, "epoch": 3.752351403513058, "percentage": 75.05, "elapsed_time": "3:19:30", "remaining_time": "1:06:20", "throughput": 8648.66, "total_tokens": 103524880} +{"current_steps": 153600, "total_steps": 204665, "loss": 0.075, "lr": 3.55905347609908e-07, "epoch": 3.7524735543449053, "percentage": 75.05, "elapsed_time": "3:19:30", "remaining_time": "1:06:19", "throughput": 8648.73, "total_tokens": 103528784} +{"current_steps": 153605, "total_steps": 204665, "loss": 0.0, "lr": 3.55840117210348e-07, "epoch": 3.752595705176752, "percentage": 75.05, "elapsed_time": "3:19:30", "remaining_time": "1:06:19", "throughput": 8648.79, "total_tokens": 103532560} +{"current_steps": 153610, "total_steps": 204665, "loss": 0.0, "lr": 3.557748914952924e-07, "epoch": 3.7527178560085996, "percentage": 75.05, "elapsed_time": "3:19:31", "remaining_time": "1:06:18", "throughput": 8648.79, "total_tokens": 103535504} +{"current_steps": 153615, "total_steps": 204665, "loss": 0.0, "lr": 3.557096704652147e-07, "epoch": 3.7528400068404464, "percentage": 75.06, "elapsed_time": "3:19:31", "remaining_time": "1:06:18", "throughput": 8648.79, "total_tokens": 103538448} +{"current_steps": 153620, "total_steps": 204665, "loss": 0.0, "lr": 3.5564445412058984e-07, "epoch": 3.752962157672294, "percentage": 75.06, "elapsed_time": "3:19:31", "remaining_time": "1:06:17", "throughput": 8648.81, "total_tokens": 103541584} +{"current_steps": 153625, "total_steps": 204665, "loss": 0.0, "lr": 3.5557924246189153e-07, "epoch": 3.7530843085041408, "percentage": 75.06, "elapsed_time": "3:19:32", "remaining_time": "1:06:17", "throughput": 8648.81, "total_tokens": 103544528} +{"current_steps": 153630, "total_steps": 204665, "loss": 0.0, "lr": 3.555140354895947e-07, "epoch": 3.753206459335988, "percentage": 75.06, "elapsed_time": "3:19:32", "remaining_time": "1:06:17", "throughput": 8648.83, "total_tokens": 103547728} +{"current_steps": 153635, "total_steps": 204665, "loss": 0.0, "lr": 3.5544883320417276e-07, "epoch": 3.753328610167835, "percentage": 75.07, "elapsed_time": "3:19:32", "remaining_time": "1:06:16", "throughput": 8648.85, "total_tokens": 103550928} +{"current_steps": 153640, "total_steps": 204665, "loss": 0.0, "lr": 3.553836356061005e-07, "epoch": 3.7534507609996823, "percentage": 75.07, "elapsed_time": "3:19:33", "remaining_time": "1:06:16", "throughput": 8648.86, "total_tokens": 103554064} +{"current_steps": 153645, "total_steps": 204665, "loss": 0.0001, "lr": 3.5531844269585164e-07, "epoch": 3.7535729118315295, "percentage": 75.07, "elapsed_time": "3:19:33", "remaining_time": "1:06:15", "throughput": 8648.88, "total_tokens": 103557200} +{"current_steps": 153650, "total_steps": 204665, "loss": 0.0002, "lr": 3.5525325447390075e-07, "epoch": 3.7536950626633767, "percentage": 75.07, "elapsed_time": "3:19:33", "remaining_time": "1:06:15", "throughput": 8648.91, "total_tokens": 103560528} +{"current_steps": 153655, "total_steps": 204665, "loss": 0.0001, "lr": 3.5518807094072123e-07, "epoch": 3.753817213495224, "percentage": 75.08, "elapsed_time": "3:19:34", "remaining_time": "1:06:15", "throughput": 8648.94, "total_tokens": 103563920} +{"current_steps": 153660, "total_steps": 204665, "loss": 0.0536, "lr": 3.5512289209678794e-07, "epoch": 3.753939364327071, "percentage": 75.08, "elapsed_time": "3:19:34", "remaining_time": "1:06:14", "throughput": 8648.94, "total_tokens": 103566864} +{"current_steps": 153665, "total_steps": 204665, "loss": 0.0, "lr": 3.55057717942574e-07, "epoch": 3.7540615151589183, "percentage": 75.08, "elapsed_time": "3:19:34", "remaining_time": "1:06:14", "throughput": 8649.01, "total_tokens": 103570960} +{"current_steps": 153670, "total_steps": 204665, "loss": 0.0003, "lr": 3.549925484785541e-07, "epoch": 3.7541836659907655, "percentage": 75.08, "elapsed_time": "3:19:35", "remaining_time": "1:06:13", "throughput": 8649.06, "total_tokens": 103574608} +{"current_steps": 153675, "total_steps": 204665, "loss": 0.0, "lr": 3.5492738370520157e-07, "epoch": 3.7543058168226127, "percentage": 75.09, "elapsed_time": "3:19:35", "remaining_time": "1:06:13", "throughput": 8649.15, "total_tokens": 103578832} +{"current_steps": 153680, "total_steps": 204665, "loss": 0.0, "lr": 3.54862223622991e-07, "epoch": 3.75442796765446, "percentage": 75.09, "elapsed_time": "3:19:35", "remaining_time": "1:06:13", "throughput": 8649.17, "total_tokens": 103582032} +{"current_steps": 153685, "total_steps": 204665, "loss": 0.0, "lr": 3.5479706823239554e-07, "epoch": 3.754550118486307, "percentage": 75.09, "elapsed_time": "3:19:36", "remaining_time": "1:06:12", "throughput": 8649.17, "total_tokens": 103585040} +{"current_steps": 153690, "total_steps": 204665, "loss": 0.0003, "lr": 3.5473191753388923e-07, "epoch": 3.754672269318154, "percentage": 75.09, "elapsed_time": "3:19:36", "remaining_time": "1:06:12", "throughput": 8649.19, "total_tokens": 103588240} +{"current_steps": 153695, "total_steps": 204665, "loss": 0.0, "lr": 3.5466677152794634e-07, "epoch": 3.7547944201500014, "percentage": 75.1, "elapsed_time": "3:19:36", "remaining_time": "1:06:11", "throughput": 8649.23, "total_tokens": 103591632} +{"current_steps": 153700, "total_steps": 204665, "loss": 0.0, "lr": 3.5460163021503996e-07, "epoch": 3.754916570981848, "percentage": 75.1, "elapsed_time": "3:19:37", "remaining_time": "1:06:11", "throughput": 8649.25, "total_tokens": 103594960} +{"current_steps": 153705, "total_steps": 204665, "loss": 0.0001, "lr": 3.545364935956445e-07, "epoch": 3.755038721813696, "percentage": 75.1, "elapsed_time": "3:19:37", "remaining_time": "1:06:11", "throughput": 8649.3, "total_tokens": 103598544} +{"current_steps": 153710, "total_steps": 204665, "loss": 0.0001, "lr": 3.5447136167023286e-07, "epoch": 3.7551608726455425, "percentage": 75.1, "elapsed_time": "3:19:38", "remaining_time": "1:06:10", "throughput": 8649.33, "total_tokens": 103602064} +{"current_steps": 153715, "total_steps": 204665, "loss": 0.0, "lr": 3.544062344392791e-07, "epoch": 3.7552830234773897, "percentage": 75.11, "elapsed_time": "3:19:38", "remaining_time": "1:06:10", "throughput": 8649.34, "total_tokens": 103605136} +{"current_steps": 153720, "total_steps": 204665, "loss": 0.0, "lr": 3.5434111190325724e-07, "epoch": 3.755405174309237, "percentage": 75.11, "elapsed_time": "3:19:38", "remaining_time": "1:06:09", "throughput": 8649.36, "total_tokens": 103608336} +{"current_steps": 153725, "total_steps": 204665, "loss": 0.0, "lr": 3.542759940626401e-07, "epoch": 3.755527325141084, "percentage": 75.11, "elapsed_time": "3:19:39", "remaining_time": "1:06:09", "throughput": 8649.38, "total_tokens": 103611600} +{"current_steps": 153730, "total_steps": 204665, "loss": 0.0001, "lr": 3.54210880917902e-07, "epoch": 3.7556494759729313, "percentage": 75.11, "elapsed_time": "3:19:39", "remaining_time": "1:06:09", "throughput": 8649.37, "total_tokens": 103614480} +{"current_steps": 153735, "total_steps": 204665, "loss": 0.0002, "lr": 3.541457724695156e-07, "epoch": 3.7557716268047785, "percentage": 75.12, "elapsed_time": "3:19:39", "remaining_time": "1:06:08", "throughput": 8649.4, "total_tokens": 103617872} +{"current_steps": 153740, "total_steps": 204665, "loss": 0.002, "lr": 3.540806687179553e-07, "epoch": 3.7558937776366257, "percentage": 75.12, "elapsed_time": "3:19:40", "remaining_time": "1:06:08", "throughput": 8649.42, "total_tokens": 103621008} +{"current_steps": 153745, "total_steps": 204665, "loss": 0.0, "lr": 3.5401556966369405e-07, "epoch": 3.756015928468473, "percentage": 75.12, "elapsed_time": "3:19:40", "remaining_time": "1:06:07", "throughput": 8649.44, "total_tokens": 103624272} +{"current_steps": 153750, "total_steps": 204665, "loss": 0.0002, "lr": 3.5395047530720513e-07, "epoch": 3.75613807930032, "percentage": 75.12, "elapsed_time": "3:19:40", "remaining_time": "1:06:07", "throughput": 8649.44, "total_tokens": 103627216} +{"current_steps": 153755, "total_steps": 204665, "loss": 0.1025, "lr": 3.5388538564896233e-07, "epoch": 3.7562602301321673, "percentage": 75.13, "elapsed_time": "3:19:41", "remaining_time": "1:06:07", "throughput": 8649.47, "total_tokens": 103630544} +{"current_steps": 153760, "total_steps": 204665, "loss": 0.0, "lr": 3.5382030068943845e-07, "epoch": 3.7563823809640144, "percentage": 75.13, "elapsed_time": "3:19:41", "remaining_time": "1:06:06", "throughput": 8649.49, "total_tokens": 103633680} +{"current_steps": 153765, "total_steps": 204665, "loss": 0.0, "lr": 3.5375522042910756e-07, "epoch": 3.7565045317958616, "percentage": 75.13, "elapsed_time": "3:19:41", "remaining_time": "1:06:06", "throughput": 8649.5, "total_tokens": 103636752} +{"current_steps": 153770, "total_steps": 204665, "loss": 0.0, "lr": 3.5369014486844205e-07, "epoch": 3.756626682627709, "percentage": 75.13, "elapsed_time": "3:19:42", "remaining_time": "1:06:05", "throughput": 8649.51, "total_tokens": 103639888} +{"current_steps": 153775, "total_steps": 204665, "loss": 0.0, "lr": 3.536250740079161e-07, "epoch": 3.756748833459556, "percentage": 75.13, "elapsed_time": "3:19:42", "remaining_time": "1:06:05", "throughput": 8649.53, "total_tokens": 103643024} +{"current_steps": 153780, "total_steps": 204665, "loss": 0.0006, "lr": 3.535600078480021e-07, "epoch": 3.756870984291403, "percentage": 75.14, "elapsed_time": "3:19:42", "remaining_time": "1:06:05", "throughput": 8649.54, "total_tokens": 103646160} +{"current_steps": 153785, "total_steps": 204665, "loss": 0.0691, "lr": 3.5349494638917354e-07, "epoch": 3.75699313512325, "percentage": 75.14, "elapsed_time": "3:19:43", "remaining_time": "1:06:04", "throughput": 8649.53, "total_tokens": 103648976} +{"current_steps": 153790, "total_steps": 204665, "loss": 0.0, "lr": 3.53429889631904e-07, "epoch": 3.7571152859550976, "percentage": 75.14, "elapsed_time": "3:19:43", "remaining_time": "1:06:04", "throughput": 8649.54, "total_tokens": 103652112} +{"current_steps": 153795, "total_steps": 204665, "loss": 0.0, "lr": 3.533648375766659e-07, "epoch": 3.7572374367869443, "percentage": 75.14, "elapsed_time": "3:19:43", "remaining_time": "1:06:03", "throughput": 8649.54, "total_tokens": 103655056} +{"current_steps": 153800, "total_steps": 204665, "loss": 0.0007, "lr": 3.5329979022393296e-07, "epoch": 3.7573595876187915, "percentage": 75.15, "elapsed_time": "3:19:44", "remaining_time": "1:06:03", "throughput": 8649.57, "total_tokens": 103658448} +{"current_steps": 153805, "total_steps": 204665, "loss": 0.0, "lr": 3.532347475741776e-07, "epoch": 3.7574817384506387, "percentage": 75.15, "elapsed_time": "3:19:44", "remaining_time": "1:06:03", "throughput": 8649.59, "total_tokens": 103661584} +{"current_steps": 153810, "total_steps": 204665, "loss": 0.0566, "lr": 3.5316970962787295e-07, "epoch": 3.757603889282486, "percentage": 75.15, "elapsed_time": "3:19:44", "remaining_time": "1:06:02", "throughput": 8649.64, "total_tokens": 103665296} +{"current_steps": 153815, "total_steps": 204665, "loss": 0.0, "lr": 3.5310467638549256e-07, "epoch": 3.757726040114333, "percentage": 75.15, "elapsed_time": "3:19:45", "remaining_time": "1:06:02", "throughput": 8649.64, "total_tokens": 103668240} +{"current_steps": 153820, "total_steps": 204665, "loss": 0.0, "lr": 3.5303964784750875e-07, "epoch": 3.7578481909461803, "percentage": 75.16, "elapsed_time": "3:19:45", "remaining_time": "1:06:01", "throughput": 8649.66, "total_tokens": 103671568} +{"current_steps": 153825, "total_steps": 204665, "loss": 0.0001, "lr": 3.529746240143948e-07, "epoch": 3.7579703417780275, "percentage": 75.16, "elapsed_time": "3:19:45", "remaining_time": "1:06:01", "throughput": 8649.69, "total_tokens": 103674832} +{"current_steps": 153830, "total_steps": 204665, "loss": 0.0, "lr": 3.5290960488662316e-07, "epoch": 3.7580924926098747, "percentage": 75.16, "elapsed_time": "3:19:46", "remaining_time": "1:06:01", "throughput": 8649.79, "total_tokens": 103679312} +{"current_steps": 153835, "total_steps": 204665, "loss": 0.0, "lr": 3.528445904646672e-07, "epoch": 3.758214643441722, "percentage": 75.16, "elapsed_time": "3:19:46", "remaining_time": "1:06:00", "throughput": 8649.85, "total_tokens": 103683088} +{"current_steps": 153840, "total_steps": 204665, "loss": 0.0751, "lr": 3.527795807489992e-07, "epoch": 3.758336794273569, "percentage": 75.17, "elapsed_time": "3:19:47", "remaining_time": "1:06:00", "throughput": 8649.93, "total_tokens": 103687312} +{"current_steps": 153845, "total_steps": 204665, "loss": 0.0, "lr": 3.5271457574009246e-07, "epoch": 3.7584589451054162, "percentage": 75.17, "elapsed_time": "3:19:47", "remaining_time": "1:05:59", "throughput": 8649.97, "total_tokens": 103690832} +{"current_steps": 153850, "total_steps": 204665, "loss": 0.0, "lr": 3.5264957543841935e-07, "epoch": 3.7585810959372634, "percentage": 75.17, "elapsed_time": "3:19:47", "remaining_time": "1:05:59", "throughput": 8649.99, "total_tokens": 103694032} +{"current_steps": 153855, "total_steps": 204665, "loss": 0.0, "lr": 3.5258457984445234e-07, "epoch": 3.7587032467691106, "percentage": 75.17, "elapsed_time": "3:19:48", "remaining_time": "1:05:59", "throughput": 8650.02, "total_tokens": 103697488} +{"current_steps": 153860, "total_steps": 204665, "loss": 0.0, "lr": 3.5251958895866487e-07, "epoch": 3.758825397600958, "percentage": 75.18, "elapsed_time": "3:19:48", "remaining_time": "1:05:58", "throughput": 8650.06, "total_tokens": 103701072} +{"current_steps": 153865, "total_steps": 204665, "loss": 0.0, "lr": 3.5245460278152863e-07, "epoch": 3.758947548432805, "percentage": 75.18, "elapsed_time": "3:19:48", "remaining_time": "1:05:58", "throughput": 8650.06, "total_tokens": 103704016} +{"current_steps": 153870, "total_steps": 204665, "loss": 0.0834, "lr": 3.523896213135167e-07, "epoch": 3.7590696992646517, "percentage": 75.18, "elapsed_time": "3:19:49", "remaining_time": "1:05:57", "throughput": 8650.1, "total_tokens": 103707472} +{"current_steps": 153875, "total_steps": 204665, "loss": 0.0489, "lr": 3.523246445551019e-07, "epoch": 3.7591918500964994, "percentage": 75.18, "elapsed_time": "3:19:49", "remaining_time": "1:05:57", "throughput": 8650.14, "total_tokens": 103711120} +{"current_steps": 153880, "total_steps": 204665, "loss": 0.0, "lr": 3.5225967250675623e-07, "epoch": 3.759314000928346, "percentage": 75.19, "elapsed_time": "3:19:49", "remaining_time": "1:05:57", "throughput": 8650.14, "total_tokens": 103714192} +{"current_steps": 153885, "total_steps": 204665, "loss": 0.0, "lr": 3.521947051689528e-07, "epoch": 3.7594361517601937, "percentage": 75.19, "elapsed_time": "3:19:50", "remaining_time": "1:05:56", "throughput": 8650.18, "total_tokens": 103717648} +{"current_steps": 153890, "total_steps": 204665, "loss": 0.0, "lr": 3.5212974254216343e-07, "epoch": 3.7595583025920405, "percentage": 75.19, "elapsed_time": "3:19:50", "remaining_time": "1:05:56", "throughput": 8650.21, "total_tokens": 103721104} +{"current_steps": 153895, "total_steps": 204665, "loss": 0.0, "lr": 3.5206478462686106e-07, "epoch": 3.7596804534238877, "percentage": 75.19, "elapsed_time": "3:19:50", "remaining_time": "1:05:55", "throughput": 8650.23, "total_tokens": 103724432} +{"current_steps": 153900, "total_steps": 204665, "loss": 0.0, "lr": 3.5199983142351753e-07, "epoch": 3.759802604255735, "percentage": 75.2, "elapsed_time": "3:19:51", "remaining_time": "1:05:55", "throughput": 8650.28, "total_tokens": 103728016} +{"current_steps": 153905, "total_steps": 204665, "loss": 0.0336, "lr": 3.5193488293260554e-07, "epoch": 3.759924755087582, "percentage": 75.2, "elapsed_time": "3:19:51", "remaining_time": "1:05:55", "throughput": 8650.31, "total_tokens": 103731472} +{"current_steps": 153910, "total_steps": 204665, "loss": 0.104, "lr": 3.5186993915459773e-07, "epoch": 3.7600469059194292, "percentage": 75.2, "elapsed_time": "3:19:52", "remaining_time": "1:05:54", "throughput": 8650.33, "total_tokens": 103734864} +{"current_steps": 153915, "total_steps": 204665, "loss": 0.0838, "lr": 3.5180500008996574e-07, "epoch": 3.7601690567512764, "percentage": 75.2, "elapsed_time": "3:19:52", "remaining_time": "1:05:54", "throughput": 8650.37, "total_tokens": 103738384} +{"current_steps": 153920, "total_steps": 204665, "loss": 0.0201, "lr": 3.517400657391824e-07, "epoch": 3.7602912075831236, "percentage": 75.21, "elapsed_time": "3:19:52", "remaining_time": "1:05:53", "throughput": 8650.39, "total_tokens": 103741776} +{"current_steps": 153925, "total_steps": 204665, "loss": 0.0004, "lr": 3.516751361027194e-07, "epoch": 3.760413358414971, "percentage": 75.21, "elapsed_time": "3:19:53", "remaining_time": "1:05:53", "throughput": 8650.4, "total_tokens": 103744848} +{"current_steps": 153930, "total_steps": 204665, "loss": 0.0003, "lr": 3.516102111810494e-07, "epoch": 3.760535509246818, "percentage": 75.21, "elapsed_time": "3:19:53", "remaining_time": "1:05:53", "throughput": 8650.41, "total_tokens": 103747920} +{"current_steps": 153935, "total_steps": 204665, "loss": 0.0715, "lr": 3.5154529097464413e-07, "epoch": 3.760657660078665, "percentage": 75.21, "elapsed_time": "3:19:53", "remaining_time": "1:05:52", "throughput": 8650.42, "total_tokens": 103751056} +{"current_steps": 153940, "total_steps": 204665, "loss": 0.0, "lr": 3.5148037548397616e-07, "epoch": 3.7607798109105124, "percentage": 75.22, "elapsed_time": "3:19:54", "remaining_time": "1:05:52", "throughput": 8650.45, "total_tokens": 103754448} +{"current_steps": 153945, "total_steps": 204665, "loss": 0.0224, "lr": 3.514154647095171e-07, "epoch": 3.7609019617423596, "percentage": 75.22, "elapsed_time": "3:19:54", "remaining_time": "1:05:51", "throughput": 8650.46, "total_tokens": 103757648} +{"current_steps": 153950, "total_steps": 204665, "loss": 0.0, "lr": 3.5135055865173943e-07, "epoch": 3.7610241125742068, "percentage": 75.22, "elapsed_time": "3:19:54", "remaining_time": "1:05:51", "throughput": 8650.45, "total_tokens": 103760528} +{"current_steps": 153955, "total_steps": 204665, "loss": 0.0001, "lr": 3.512856573111147e-07, "epoch": 3.761146263406054, "percentage": 75.22, "elapsed_time": "3:19:55", "remaining_time": "1:05:50", "throughput": 8650.47, "total_tokens": 103763728} +{"current_steps": 153960, "total_steps": 204665, "loss": 0.0001, "lr": 3.512207606881156e-07, "epoch": 3.761268414237901, "percentage": 75.23, "elapsed_time": "3:19:55", "remaining_time": "1:05:50", "throughput": 8650.48, "total_tokens": 103766928} +{"current_steps": 153965, "total_steps": 204665, "loss": 0.0205, "lr": 3.511558687832131e-07, "epoch": 3.761390565069748, "percentage": 75.23, "elapsed_time": "3:19:55", "remaining_time": "1:05:50", "throughput": 8650.58, "total_tokens": 103771280} +{"current_steps": 153970, "total_steps": 204665, "loss": 0.0001, "lr": 3.510909815968801e-07, "epoch": 3.7615127159015955, "percentage": 75.23, "elapsed_time": "3:19:56", "remaining_time": "1:05:49", "throughput": 8650.59, "total_tokens": 103774480} +{"current_steps": 153975, "total_steps": 204665, "loss": 0.0001, "lr": 3.510260991295876e-07, "epoch": 3.7616348667334423, "percentage": 75.23, "elapsed_time": "3:19:56", "remaining_time": "1:05:49", "throughput": 8650.6, "total_tokens": 103777552} +{"current_steps": 153980, "total_steps": 204665, "loss": 0.0, "lr": 3.509612213818083e-07, "epoch": 3.7617570175652895, "percentage": 75.24, "elapsed_time": "3:19:56", "remaining_time": "1:05:48", "throughput": 8650.65, "total_tokens": 103781264} +{"current_steps": 153985, "total_steps": 204665, "loss": 0.0, "lr": 3.5089634835401317e-07, "epoch": 3.7618791683971367, "percentage": 75.24, "elapsed_time": "3:19:57", "remaining_time": "1:05:48", "throughput": 8650.7, "total_tokens": 103785104} +{"current_steps": 153990, "total_steps": 204665, "loss": 0.0003, "lr": 3.5083148004667474e-07, "epoch": 3.762001319228984, "percentage": 75.24, "elapsed_time": "3:19:57", "remaining_time": "1:05:48", "throughput": 8650.74, "total_tokens": 103788688} +{"current_steps": 153995, "total_steps": 204665, "loss": 0.0, "lr": 3.5076661646026396e-07, "epoch": 3.762123470060831, "percentage": 75.24, "elapsed_time": "3:19:58", "remaining_time": "1:05:47", "throughput": 8650.75, "total_tokens": 103791760} +{"current_steps": 154000, "total_steps": 204665, "loss": 0.0002, "lr": 3.507017575952531e-07, "epoch": 3.762245620892678, "percentage": 75.24, "elapsed_time": "3:19:58", "remaining_time": "1:05:47", "throughput": 8650.76, "total_tokens": 103795024} +{"current_steps": 154005, "total_steps": 204665, "loss": 0.0, "lr": 3.5063690345211396e-07, "epoch": 3.7623677717245254, "percentage": 75.25, "elapsed_time": "3:19:58", "remaining_time": "1:05:46", "throughput": 8650.79, "total_tokens": 103798480} +{"current_steps": 154010, "total_steps": 204665, "loss": 0.0002, "lr": 3.505720540313176e-07, "epoch": 3.7624899225563726, "percentage": 75.25, "elapsed_time": "3:19:59", "remaining_time": "1:05:46", "throughput": 8650.81, "total_tokens": 103801616} +{"current_steps": 154015, "total_steps": 204665, "loss": 0.0001, "lr": 3.5050720933333634e-07, "epoch": 3.76261207338822, "percentage": 75.25, "elapsed_time": "3:19:59", "remaining_time": "1:05:46", "throughput": 8650.8, "total_tokens": 103804496} +{"current_steps": 154020, "total_steps": 204665, "loss": 0.0, "lr": 3.504423693586409e-07, "epoch": 3.762734224220067, "percentage": 75.25, "elapsed_time": "3:19:59", "remaining_time": "1:05:45", "throughput": 8650.85, "total_tokens": 103808208} +{"current_steps": 154025, "total_steps": 204665, "loss": 0.0018, "lr": 3.503775341077033e-07, "epoch": 3.762856375051914, "percentage": 75.26, "elapsed_time": "3:20:00", "remaining_time": "1:05:45", "throughput": 8650.9, "total_tokens": 103811920} +{"current_steps": 154030, "total_steps": 204665, "loss": 0.0001, "lr": 3.503127035809953e-07, "epoch": 3.7629785258837614, "percentage": 75.26, "elapsed_time": "3:20:00", "remaining_time": "1:05:44", "throughput": 8650.91, "total_tokens": 103815120} +{"current_steps": 154035, "total_steps": 204665, "loss": 0.0, "lr": 3.502478777789878e-07, "epoch": 3.7631006767156086, "percentage": 75.26, "elapsed_time": "3:20:00", "remaining_time": "1:05:44", "throughput": 8650.95, "total_tokens": 103818640} +{"current_steps": 154040, "total_steps": 204665, "loss": 0.0, "lr": 3.501830567021528e-07, "epoch": 3.7632228275474557, "percentage": 75.26, "elapsed_time": "3:20:01", "remaining_time": "1:05:44", "throughput": 8651.02, "total_tokens": 103822672} +{"current_steps": 154045, "total_steps": 204665, "loss": 0.0, "lr": 3.5011824035096104e-07, "epoch": 3.763344978379303, "percentage": 75.27, "elapsed_time": "3:20:01", "remaining_time": "1:05:43", "throughput": 8651.02, "total_tokens": 103825680} +{"current_steps": 154050, "total_steps": 204665, "loss": 0.0, "lr": 3.500534287258846e-07, "epoch": 3.7634671292111497, "percentage": 75.27, "elapsed_time": "3:20:01", "remaining_time": "1:05:43", "throughput": 8651.04, "total_tokens": 103828880} +{"current_steps": 154055, "total_steps": 204665, "loss": 0.0, "lr": 3.4998862182739444e-07, "epoch": 3.7635892800429973, "percentage": 75.27, "elapsed_time": "3:20:02", "remaining_time": "1:05:42", "throughput": 8651.07, "total_tokens": 103832336} +{"current_steps": 154060, "total_steps": 204665, "loss": 0.0, "lr": 3.499238196559615e-07, "epoch": 3.763711430874844, "percentage": 75.27, "elapsed_time": "3:20:02", "remaining_time": "1:05:42", "throughput": 8651.12, "total_tokens": 103836112} +{"current_steps": 154065, "total_steps": 204665, "loss": 0.0, "lr": 3.4985902221205775e-07, "epoch": 3.7638335817066917, "percentage": 75.28, "elapsed_time": "3:20:02", "remaining_time": "1:05:42", "throughput": 8651.13, "total_tokens": 103839248} +{"current_steps": 154070, "total_steps": 204665, "loss": 0.0001, "lr": 3.497942294961537e-07, "epoch": 3.7639557325385384, "percentage": 75.28, "elapsed_time": "3:20:03", "remaining_time": "1:05:41", "throughput": 8651.15, "total_tokens": 103842512} +{"current_steps": 154075, "total_steps": 204665, "loss": 0.0, "lr": 3.497294415087212e-07, "epoch": 3.7640778833703856, "percentage": 75.28, "elapsed_time": "3:20:03", "remaining_time": "1:05:41", "throughput": 8651.21, "total_tokens": 103846544} +{"current_steps": 154080, "total_steps": 204665, "loss": 0.043, "lr": 3.496646582502308e-07, "epoch": 3.764200034202233, "percentage": 75.28, "elapsed_time": "3:20:04", "remaining_time": "1:05:40", "throughput": 8651.25, "total_tokens": 103850000} +{"current_steps": 154085, "total_steps": 204665, "loss": 0.0001, "lr": 3.4959987972115437e-07, "epoch": 3.76432218503408, "percentage": 75.29, "elapsed_time": "3:20:04", "remaining_time": "1:05:40", "throughput": 8651.28, "total_tokens": 103853520} +{"current_steps": 154090, "total_steps": 204665, "loss": 0.0002, "lr": 3.49535105921962e-07, "epoch": 3.764444335865927, "percentage": 75.29, "elapsed_time": "3:20:04", "remaining_time": "1:05:40", "throughput": 8651.3, "total_tokens": 103856848} +{"current_steps": 154095, "total_steps": 204665, "loss": 0.0337, "lr": 3.494703368531254e-07, "epoch": 3.7645664866977744, "percentage": 75.29, "elapsed_time": "3:20:05", "remaining_time": "1:05:39", "throughput": 8651.32, "total_tokens": 103860112} +{"current_steps": 154100, "total_steps": 204665, "loss": 0.0, "lr": 3.494055725151158e-07, "epoch": 3.7646886375296216, "percentage": 75.29, "elapsed_time": "3:20:05", "remaining_time": "1:05:39", "throughput": 8651.33, "total_tokens": 103863312} +{"current_steps": 154105, "total_steps": 204665, "loss": 0.0, "lr": 3.4934081290840367e-07, "epoch": 3.7648107883614688, "percentage": 75.3, "elapsed_time": "3:20:05", "remaining_time": "1:05:38", "throughput": 8651.35, "total_tokens": 103866576} +{"current_steps": 154110, "total_steps": 204665, "loss": 0.0004, "lr": 3.492760580334603e-07, "epoch": 3.764932939193316, "percentage": 75.3, "elapsed_time": "3:20:06", "remaining_time": "1:05:38", "throughput": 8651.37, "total_tokens": 103869968} +{"current_steps": 154115, "total_steps": 204665, "loss": 0.0001, "lr": 3.492113078907563e-07, "epoch": 3.765055090025163, "percentage": 75.3, "elapsed_time": "3:20:06", "remaining_time": "1:05:38", "throughput": 8651.39, "total_tokens": 103873296} +{"current_steps": 154120, "total_steps": 204665, "loss": 0.0001, "lr": 3.4914656248076256e-07, "epoch": 3.7651772408570103, "percentage": 75.3, "elapsed_time": "3:20:06", "remaining_time": "1:05:37", "throughput": 8651.41, "total_tokens": 103876560} +{"current_steps": 154125, "total_steps": 204665, "loss": 0.0, "lr": 3.490818218039504e-07, "epoch": 3.7652993916888575, "percentage": 75.31, "elapsed_time": "3:20:07", "remaining_time": "1:05:37", "throughput": 8651.42, "total_tokens": 103879632} +{"current_steps": 154130, "total_steps": 204665, "loss": 0.0, "lr": 3.4901708586079003e-07, "epoch": 3.7654215425207047, "percentage": 75.31, "elapsed_time": "3:20:07", "remaining_time": "1:05:36", "throughput": 8651.46, "total_tokens": 103883216} +{"current_steps": 154135, "total_steps": 204665, "loss": 0.0, "lr": 3.4895235465175286e-07, "epoch": 3.7655436933525515, "percentage": 75.31, "elapsed_time": "3:20:07", "remaining_time": "1:05:36", "throughput": 8651.49, "total_tokens": 103886672} +{"current_steps": 154140, "total_steps": 204665, "loss": 0.0, "lr": 3.488876281773089e-07, "epoch": 3.765665844184399, "percentage": 75.31, "elapsed_time": "3:20:08", "remaining_time": "1:05:36", "throughput": 8651.5, "total_tokens": 103889680} +{"current_steps": 154145, "total_steps": 204665, "loss": 0.0001, "lr": 3.4882290643792967e-07, "epoch": 3.765787995016246, "percentage": 75.32, "elapsed_time": "3:20:08", "remaining_time": "1:05:35", "throughput": 8651.57, "total_tokens": 103893584} +{"current_steps": 154150, "total_steps": 204665, "loss": 0.0, "lr": 3.4875818943408496e-07, "epoch": 3.7659101458480935, "percentage": 75.32, "elapsed_time": "3:20:08", "remaining_time": "1:05:35", "throughput": 8651.61, "total_tokens": 103897168} +{"current_steps": 154155, "total_steps": 204665, "loss": 0.0, "lr": 3.486934771662462e-07, "epoch": 3.76603229667994, "percentage": 75.32, "elapsed_time": "3:20:09", "remaining_time": "1:05:34", "throughput": 8651.64, "total_tokens": 103900624} +{"current_steps": 154160, "total_steps": 204665, "loss": 0.0001, "lr": 3.4862876963488375e-07, "epoch": 3.7661544475117874, "percentage": 75.32, "elapsed_time": "3:20:09", "remaining_time": "1:05:34", "throughput": 8651.64, "total_tokens": 103903376} +{"current_steps": 154165, "total_steps": 204665, "loss": 0.0143, "lr": 3.4856406684046767e-07, "epoch": 3.7662765983436346, "percentage": 75.33, "elapsed_time": "3:20:10", "remaining_time": "1:05:34", "throughput": 8651.66, "total_tokens": 103906704} +{"current_steps": 154170, "total_steps": 204665, "loss": 0.0, "lr": 3.484993687834693e-07, "epoch": 3.766398749175482, "percentage": 75.33, "elapsed_time": "3:20:10", "remaining_time": "1:05:33", "throughput": 8651.7, "total_tokens": 103910160} +{"current_steps": 154175, "total_steps": 204665, "loss": 0.0002, "lr": 3.4843467546435836e-07, "epoch": 3.766520900007329, "percentage": 75.33, "elapsed_time": "3:20:10", "remaining_time": "1:05:33", "throughput": 8651.72, "total_tokens": 103913424} +{"current_steps": 154180, "total_steps": 204665, "loss": 0.0001, "lr": 3.4836998688360576e-07, "epoch": 3.766643050839176, "percentage": 75.33, "elapsed_time": "3:20:11", "remaining_time": "1:05:32", "throughput": 8651.74, "total_tokens": 103916688} +{"current_steps": 154185, "total_steps": 204665, "loss": 0.0, "lr": 3.4830530304168216e-07, "epoch": 3.7667652016710234, "percentage": 75.34, "elapsed_time": "3:20:11", "remaining_time": "1:05:32", "throughput": 8651.76, "total_tokens": 103919952} +{"current_steps": 154190, "total_steps": 204665, "loss": 0.0001, "lr": 3.482406239390574e-07, "epoch": 3.7668873525028705, "percentage": 75.34, "elapsed_time": "3:20:11", "remaining_time": "1:05:32", "throughput": 8651.79, "total_tokens": 103923280} +{"current_steps": 154195, "total_steps": 204665, "loss": 0.0, "lr": 3.4817594957620243e-07, "epoch": 3.7670095033347177, "percentage": 75.34, "elapsed_time": "3:20:12", "remaining_time": "1:05:31", "throughput": 8651.81, "total_tokens": 103926544} +{"current_steps": 154200, "total_steps": 204665, "loss": 0.0, "lr": 3.4811127995358693e-07, "epoch": 3.767131654166565, "percentage": 75.34, "elapsed_time": "3:20:12", "remaining_time": "1:05:31", "throughput": 8651.85, "total_tokens": 103929936} +{"current_steps": 154205, "total_steps": 204665, "loss": 0.0, "lr": 3.4804661507168186e-07, "epoch": 3.767253804998412, "percentage": 75.35, "elapsed_time": "3:20:12", "remaining_time": "1:05:30", "throughput": 8651.89, "total_tokens": 103933456} +{"current_steps": 154210, "total_steps": 204665, "loss": 0.075, "lr": 3.4798195493095683e-07, "epoch": 3.7673759558302593, "percentage": 75.35, "elapsed_time": "3:20:13", "remaining_time": "1:05:30", "throughput": 8651.91, "total_tokens": 103936656} +{"current_steps": 154215, "total_steps": 204665, "loss": 0.0, "lr": 3.4791729953188243e-07, "epoch": 3.7674981066621065, "percentage": 75.35, "elapsed_time": "3:20:13", "remaining_time": "1:05:30", "throughput": 8651.94, "total_tokens": 103940048} +{"current_steps": 154220, "total_steps": 204665, "loss": 0.0, "lr": 3.4785264887492914e-07, "epoch": 3.7676202574939537, "percentage": 75.35, "elapsed_time": "3:20:13", "remaining_time": "1:05:29", "throughput": 8651.96, "total_tokens": 103943376} +{"current_steps": 154225, "total_steps": 204665, "loss": 0.0004, "lr": 3.477880029605665e-07, "epoch": 3.767742408325801, "percentage": 75.35, "elapsed_time": "3:20:14", "remaining_time": "1:05:29", "throughput": 8652.03, "total_tokens": 103947216} +{"current_steps": 154230, "total_steps": 204665, "loss": 0.0004, "lr": 3.477233617892652e-07, "epoch": 3.7678645591576476, "percentage": 75.36, "elapsed_time": "3:20:14", "remaining_time": "1:05:28", "throughput": 8652.08, "total_tokens": 103951120} +{"current_steps": 154235, "total_steps": 204665, "loss": 0.0, "lr": 3.476587253614948e-07, "epoch": 3.7679867099894953, "percentage": 75.36, "elapsed_time": "3:20:14", "remaining_time": "1:05:28", "throughput": 8652.12, "total_tokens": 103954576} +{"current_steps": 154240, "total_steps": 204665, "loss": 0.0001, "lr": 3.4759409367772586e-07, "epoch": 3.768108860821342, "percentage": 75.36, "elapsed_time": "3:20:15", "remaining_time": "1:05:28", "throughput": 8652.14, "total_tokens": 103957840} +{"current_steps": 154245, "total_steps": 204665, "loss": 0.0, "lr": 3.475294667384279e-07, "epoch": 3.7682310116531896, "percentage": 75.36, "elapsed_time": "3:20:15", "remaining_time": "1:05:27", "throughput": 8652.17, "total_tokens": 103961296} +{"current_steps": 154250, "total_steps": 204665, "loss": 0.0, "lr": 3.4746484454407135e-07, "epoch": 3.7683531624850364, "percentage": 75.37, "elapsed_time": "3:20:15", "remaining_time": "1:05:27", "throughput": 8652.21, "total_tokens": 103964752} +{"current_steps": 154255, "total_steps": 204665, "loss": 0.0, "lr": 3.4740022709512575e-07, "epoch": 3.7684753133168836, "percentage": 75.37, "elapsed_time": "3:20:16", "remaining_time": "1:05:26", "throughput": 8652.27, "total_tokens": 103968592} +{"current_steps": 154260, "total_steps": 204665, "loss": 0.0869, "lr": 3.473356143920615e-07, "epoch": 3.7685974641487308, "percentage": 75.37, "elapsed_time": "3:20:16", "remaining_time": "1:05:26", "throughput": 8652.3, "total_tokens": 103971984} +{"current_steps": 154265, "total_steps": 204665, "loss": 0.0, "lr": 3.472710064353478e-07, "epoch": 3.768719614980578, "percentage": 75.37, "elapsed_time": "3:20:17", "remaining_time": "1:05:26", "throughput": 8652.32, "total_tokens": 103975120} +{"current_steps": 154270, "total_steps": 204665, "loss": 0.0, "lr": 3.4720640322545537e-07, "epoch": 3.768841765812425, "percentage": 75.38, "elapsed_time": "3:20:17", "remaining_time": "1:05:25", "throughput": 8652.35, "total_tokens": 103978512} +{"current_steps": 154275, "total_steps": 204665, "loss": 0.0, "lr": 3.471418047628532e-07, "epoch": 3.7689639166442723, "percentage": 75.38, "elapsed_time": "3:20:17", "remaining_time": "1:05:25", "throughput": 8652.36, "total_tokens": 103981584} +{"current_steps": 154280, "total_steps": 204665, "loss": 0.0003, "lr": 3.470772110480117e-07, "epoch": 3.7690860674761195, "percentage": 75.38, "elapsed_time": "3:20:18", "remaining_time": "1:05:24", "throughput": 8652.4, "total_tokens": 103985040} +{"current_steps": 154285, "total_steps": 204665, "loss": 0.0, "lr": 3.4701262208140004e-07, "epoch": 3.7692082183079667, "percentage": 75.38, "elapsed_time": "3:20:18", "remaining_time": "1:05:24", "throughput": 8652.42, "total_tokens": 103988304} +{"current_steps": 154290, "total_steps": 204665, "loss": 0.0, "lr": 3.4694803786348857e-07, "epoch": 3.769330369139814, "percentage": 75.39, "elapsed_time": "3:20:18", "remaining_time": "1:05:24", "throughput": 8652.45, "total_tokens": 103991696} +{"current_steps": 154295, "total_steps": 204665, "loss": 0.0, "lr": 3.468834583947462e-07, "epoch": 3.769452519971661, "percentage": 75.39, "elapsed_time": "3:20:19", "remaining_time": "1:05:23", "throughput": 8652.49, "total_tokens": 103995152} +{"current_steps": 154300, "total_steps": 204665, "loss": 0.0, "lr": 3.468188836756435e-07, "epoch": 3.7695746708035083, "percentage": 75.39, "elapsed_time": "3:20:19", "remaining_time": "1:05:23", "throughput": 8652.53, "total_tokens": 103998736} +{"current_steps": 154305, "total_steps": 204665, "loss": 0.0, "lr": 3.467543137066491e-07, "epoch": 3.7696968216353555, "percentage": 75.39, "elapsed_time": "3:20:19", "remaining_time": "1:05:22", "throughput": 8652.55, "total_tokens": 104001872} +{"current_steps": 154310, "total_steps": 204665, "loss": 0.0, "lr": 3.4668974848823294e-07, "epoch": 3.7698189724672027, "percentage": 75.4, "elapsed_time": "3:20:20", "remaining_time": "1:05:22", "throughput": 8652.59, "total_tokens": 104005392} +{"current_steps": 154315, "total_steps": 204665, "loss": 0.0005, "lr": 3.4662518802086516e-07, "epoch": 3.7699411232990494, "percentage": 75.4, "elapsed_time": "3:20:20", "remaining_time": "1:05:22", "throughput": 8652.59, "total_tokens": 104008336} +{"current_steps": 154320, "total_steps": 204665, "loss": 0.0001, "lr": 3.465606323050143e-07, "epoch": 3.770063274130897, "percentage": 75.4, "elapsed_time": "3:20:20", "remaining_time": "1:05:21", "throughput": 8652.6, "total_tokens": 104011408} +{"current_steps": 154325, "total_steps": 204665, "loss": 0.0, "lr": 3.4649608134115074e-07, "epoch": 3.770185424962744, "percentage": 75.4, "elapsed_time": "3:20:21", "remaining_time": "1:05:21", "throughput": 8652.61, "total_tokens": 104014544} +{"current_steps": 154330, "total_steps": 204665, "loss": 0.0, "lr": 3.4643153512974297e-07, "epoch": 3.7703075757945914, "percentage": 75.41, "elapsed_time": "3:20:21", "remaining_time": "1:05:20", "throughput": 8652.62, "total_tokens": 104017616} +{"current_steps": 154335, "total_steps": 204665, "loss": 0.0, "lr": 3.463669936712613e-07, "epoch": 3.770429726626438, "percentage": 75.41, "elapsed_time": "3:20:21", "remaining_time": "1:05:20", "throughput": 8652.65, "total_tokens": 104021008} +{"current_steps": 154340, "total_steps": 204665, "loss": 0.0, "lr": 3.463024569661743e-07, "epoch": 3.7705518774582854, "percentage": 75.41, "elapsed_time": "3:20:22", "remaining_time": "1:05:20", "throughput": 8652.67, "total_tokens": 104024208} +{"current_steps": 154345, "total_steps": 204665, "loss": 0.0, "lr": 3.462379250149516e-07, "epoch": 3.7706740282901325, "percentage": 75.41, "elapsed_time": "3:20:22", "remaining_time": "1:05:19", "throughput": 8652.69, "total_tokens": 104027344} +{"current_steps": 154350, "total_steps": 204665, "loss": 0.0, "lr": 3.4617339781806296e-07, "epoch": 3.7707961791219797, "percentage": 75.42, "elapsed_time": "3:20:22", "remaining_time": "1:05:19", "throughput": 8652.7, "total_tokens": 104030544} +{"current_steps": 154355, "total_steps": 204665, "loss": 0.0, "lr": 3.4610887537597687e-07, "epoch": 3.770918329953827, "percentage": 75.42, "elapsed_time": "3:20:23", "remaining_time": "1:05:18", "throughput": 8652.74, "total_tokens": 104034064} +{"current_steps": 154360, "total_steps": 204665, "loss": 0.0001, "lr": 3.460443576891632e-07, "epoch": 3.771040480785674, "percentage": 75.42, "elapsed_time": "3:20:23", "remaining_time": "1:05:18", "throughput": 8652.77, "total_tokens": 104037328} +{"current_steps": 154365, "total_steps": 204665, "loss": 0.0, "lr": 3.4597984475809094e-07, "epoch": 3.7711626316175213, "percentage": 75.42, "elapsed_time": "3:20:23", "remaining_time": "1:05:18", "throughput": 8652.81, "total_tokens": 104040912} +{"current_steps": 154370, "total_steps": 204665, "loss": 0.0, "lr": 3.459153365832288e-07, "epoch": 3.7712847824493685, "percentage": 75.43, "elapsed_time": "3:20:24", "remaining_time": "1:05:17", "throughput": 8652.87, "total_tokens": 104044688} +{"current_steps": 154375, "total_steps": 204665, "loss": 0.0, "lr": 3.458508331650465e-07, "epoch": 3.7714069332812157, "percentage": 75.43, "elapsed_time": "3:20:24", "remaining_time": "1:05:17", "throughput": 8652.89, "total_tokens": 104047824} +{"current_steps": 154380, "total_steps": 204665, "loss": 0.0002, "lr": 3.457863345040126e-07, "epoch": 3.771529084113063, "percentage": 75.43, "elapsed_time": "3:20:24", "remaining_time": "1:05:16", "throughput": 8652.89, "total_tokens": 104050768} +{"current_steps": 154385, "total_steps": 204665, "loss": 0.0001, "lr": 3.457218406005968e-07, "epoch": 3.77165123494491, "percentage": 75.43, "elapsed_time": "3:20:25", "remaining_time": "1:05:16", "throughput": 8652.94, "total_tokens": 104054480} +{"current_steps": 154390, "total_steps": 204665, "loss": 0.0, "lr": 3.456573514552675e-07, "epoch": 3.7717733857767572, "percentage": 75.44, "elapsed_time": "3:20:25", "remaining_time": "1:05:15", "throughput": 8652.98, "total_tokens": 104057936} +{"current_steps": 154395, "total_steps": 204665, "loss": 0.0002, "lr": 3.4559286706849424e-07, "epoch": 3.7718955366086044, "percentage": 75.44, "elapsed_time": "3:20:26", "remaining_time": "1:05:15", "throughput": 8652.99, "total_tokens": 104060944} +{"current_steps": 154400, "total_steps": 204665, "loss": 0.0, "lr": 3.455283874407452e-07, "epoch": 3.7720176874404516, "percentage": 75.44, "elapsed_time": "3:20:26", "remaining_time": "1:05:15", "throughput": 8653.02, "total_tokens": 104064336} +{"current_steps": 154405, "total_steps": 204665, "loss": 0.0234, "lr": 3.4546391257248985e-07, "epoch": 3.772139838272299, "percentage": 75.44, "elapsed_time": "3:20:26", "remaining_time": "1:05:14", "throughput": 8653.04, "total_tokens": 104067600} +{"current_steps": 154410, "total_steps": 204665, "loss": 0.0, "lr": 3.453994424641973e-07, "epoch": 3.7722619891041456, "percentage": 75.45, "elapsed_time": "3:20:27", "remaining_time": "1:05:14", "throughput": 8653.09, "total_tokens": 104071120} +{"current_steps": 154415, "total_steps": 204665, "loss": 0.0274, "lr": 3.453349771163357e-07, "epoch": 3.772384139935993, "percentage": 75.45, "elapsed_time": "3:20:27", "remaining_time": "1:05:13", "throughput": 8653.11, "total_tokens": 104074384} +{"current_steps": 154420, "total_steps": 204665, "loss": 0.0442, "lr": 3.4527051652937467e-07, "epoch": 3.77250629076784, "percentage": 75.45, "elapsed_time": "3:20:27", "remaining_time": "1:05:13", "throughput": 8653.12, "total_tokens": 104077392} +{"current_steps": 154425, "total_steps": 204665, "loss": 0.0, "lr": 3.452060607037821e-07, "epoch": 3.772628441599687, "percentage": 75.45, "elapsed_time": "3:20:28", "remaining_time": "1:05:13", "throughput": 8653.14, "total_tokens": 104080656} +{"current_steps": 154430, "total_steps": 204665, "loss": 0.0604, "lr": 3.4514160964002725e-07, "epoch": 3.7727505924315343, "percentage": 75.46, "elapsed_time": "3:20:28", "remaining_time": "1:05:12", "throughput": 8653.16, "total_tokens": 104083792} +{"current_steps": 154435, "total_steps": 204665, "loss": 0.0, "lr": 3.450771633385791e-07, "epoch": 3.7728727432633815, "percentage": 75.46, "elapsed_time": "3:20:28", "remaining_time": "1:05:12", "throughput": 8653.18, "total_tokens": 104086992} +{"current_steps": 154440, "total_steps": 204665, "loss": 0.0, "lr": 3.450127217999055e-07, "epoch": 3.7729948940952287, "percentage": 75.46, "elapsed_time": "3:20:29", "remaining_time": "1:05:11", "throughput": 8653.22, "total_tokens": 104090512} +{"current_steps": 154445, "total_steps": 204665, "loss": 0.0, "lr": 3.44948285024476e-07, "epoch": 3.773117044927076, "percentage": 75.46, "elapsed_time": "3:20:29", "remaining_time": "1:05:11", "throughput": 8653.24, "total_tokens": 104093776} +{"current_steps": 154450, "total_steps": 204665, "loss": 0.0, "lr": 3.4488385301275833e-07, "epoch": 3.773239195758923, "percentage": 75.46, "elapsed_time": "3:20:29", "remaining_time": "1:05:11", "throughput": 8653.31, "total_tokens": 104097680} +{"current_steps": 154455, "total_steps": 204665, "loss": 0.0671, "lr": 3.448194257652219e-07, "epoch": 3.7733613465907703, "percentage": 75.47, "elapsed_time": "3:20:30", "remaining_time": "1:05:10", "throughput": 8653.31, "total_tokens": 104100688} +{"current_steps": 154460, "total_steps": 204665, "loss": 0.0, "lr": 3.447550032823345e-07, "epoch": 3.7734834974226175, "percentage": 75.47, "elapsed_time": "3:20:30", "remaining_time": "1:05:10", "throughput": 8653.33, "total_tokens": 104103824} +{"current_steps": 154465, "total_steps": 204665, "loss": 0.0, "lr": 3.446905855645653e-07, "epoch": 3.7736056482544647, "percentage": 75.47, "elapsed_time": "3:20:30", "remaining_time": "1:05:09", "throughput": 8653.36, "total_tokens": 104107216} +{"current_steps": 154470, "total_steps": 204665, "loss": 0.0787, "lr": 3.4462617261238245e-07, "epoch": 3.773727799086312, "percentage": 75.47, "elapsed_time": "3:20:31", "remaining_time": "1:05:09", "throughput": 8653.39, "total_tokens": 104110608} +{"current_steps": 154475, "total_steps": 204665, "loss": 0.0001, "lr": 3.4456176442625393e-07, "epoch": 3.773849949918159, "percentage": 75.48, "elapsed_time": "3:20:31", "remaining_time": "1:05:09", "throughput": 8653.42, "total_tokens": 104114000} +{"current_steps": 154480, "total_steps": 204665, "loss": 0.0, "lr": 3.4449736100664895e-07, "epoch": 3.773972100750006, "percentage": 75.48, "elapsed_time": "3:20:31", "remaining_time": "1:05:08", "throughput": 8653.47, "total_tokens": 104117648} +{"current_steps": 154485, "total_steps": 204665, "loss": 0.0, "lr": 3.4443296235403507e-07, "epoch": 3.7740942515818534, "percentage": 75.48, "elapsed_time": "3:20:32", "remaining_time": "1:05:08", "throughput": 8653.48, "total_tokens": 104120784} +{"current_steps": 154490, "total_steps": 204665, "loss": 0.0403, "lr": 3.443685684688814e-07, "epoch": 3.7742164024137006, "percentage": 75.48, "elapsed_time": "3:20:32", "remaining_time": "1:05:07", "throughput": 8653.53, "total_tokens": 104124432} +{"current_steps": 154495, "total_steps": 204665, "loss": 0.0, "lr": 3.4430417935165547e-07, "epoch": 3.7743385532455473, "percentage": 75.49, "elapsed_time": "3:20:32", "remaining_time": "1:05:07", "throughput": 8653.55, "total_tokens": 104127568} +{"current_steps": 154500, "total_steps": 204665, "loss": 0.0, "lr": 3.44239795002826e-07, "epoch": 3.774460704077395, "percentage": 75.49, "elapsed_time": "3:20:33", "remaining_time": "1:05:07", "throughput": 8653.56, "total_tokens": 104130704} +{"current_steps": 154505, "total_steps": 204665, "loss": 0.0, "lr": 3.4417541542286134e-07, "epoch": 3.7745828549092417, "percentage": 75.49, "elapsed_time": "3:20:33", "remaining_time": "1:05:06", "throughput": 8653.57, "total_tokens": 104133776} +{"current_steps": 154510, "total_steps": 204665, "loss": 0.0, "lr": 3.4411104061222916e-07, "epoch": 3.7747050057410894, "percentage": 75.49, "elapsed_time": "3:20:33", "remaining_time": "1:05:06", "throughput": 8653.6, "total_tokens": 104137104} +{"current_steps": 154515, "total_steps": 204665, "loss": 0.0001, "lr": 3.4404667057139827e-07, "epoch": 3.774827156572936, "percentage": 75.5, "elapsed_time": "3:20:34", "remaining_time": "1:05:05", "throughput": 8653.72, "total_tokens": 104141776} +{"current_steps": 154520, "total_steps": 204665, "loss": 0.0, "lr": 3.4398230530083596e-07, "epoch": 3.7749493074047833, "percentage": 75.5, "elapsed_time": "3:20:34", "remaining_time": "1:05:05", "throughput": 8653.79, "total_tokens": 104145680} +{"current_steps": 154525, "total_steps": 204665, "loss": 0.0, "lr": 3.4391794480101087e-07, "epoch": 3.7750714582366305, "percentage": 75.5, "elapsed_time": "3:20:35", "remaining_time": "1:05:05", "throughput": 8653.8, "total_tokens": 104148880} +{"current_steps": 154530, "total_steps": 204665, "loss": 0.0, "lr": 3.4385358907239135e-07, "epoch": 3.7751936090684777, "percentage": 75.5, "elapsed_time": "3:20:35", "remaining_time": "1:05:04", "throughput": 8653.81, "total_tokens": 104151952} +{"current_steps": 154535, "total_steps": 204665, "loss": 0.06, "lr": 3.437892381154446e-07, "epoch": 3.775315759900325, "percentage": 75.51, "elapsed_time": "3:20:35", "remaining_time": "1:05:04", "throughput": 8653.87, "total_tokens": 104155728} +{"current_steps": 154540, "total_steps": 204665, "loss": 0.0, "lr": 3.4372489193063935e-07, "epoch": 3.775437910732172, "percentage": 75.51, "elapsed_time": "3:20:36", "remaining_time": "1:05:03", "throughput": 8653.87, "total_tokens": 104158672} +{"current_steps": 154545, "total_steps": 204665, "loss": 0.0, "lr": 3.436605505184429e-07, "epoch": 3.7755600615640192, "percentage": 75.51, "elapsed_time": "3:20:36", "remaining_time": "1:05:03", "throughput": 8653.93, "total_tokens": 104162384} +{"current_steps": 154550, "total_steps": 204665, "loss": 0.0, "lr": 3.435962138793237e-07, "epoch": 3.7756822123958664, "percentage": 75.51, "elapsed_time": "3:20:36", "remaining_time": "1:05:03", "throughput": 8653.95, "total_tokens": 104165584} +{"current_steps": 154555, "total_steps": 204665, "loss": 0.0, "lr": 3.4353188201374915e-07, "epoch": 3.7758043632277136, "percentage": 75.52, "elapsed_time": "3:20:37", "remaining_time": "1:05:02", "throughput": 8653.97, "total_tokens": 104168848} +{"current_steps": 154560, "total_steps": 204665, "loss": 0.0313, "lr": 3.434675549221876e-07, "epoch": 3.775926514059561, "percentage": 75.52, "elapsed_time": "3:20:37", "remaining_time": "1:05:02", "throughput": 8653.99, "total_tokens": 104171984} +{"current_steps": 154565, "total_steps": 204665, "loss": 0.0, "lr": 3.434032326051063e-07, "epoch": 3.776048664891408, "percentage": 75.52, "elapsed_time": "3:20:37", "remaining_time": "1:05:01", "throughput": 8653.99, "total_tokens": 104174992} +{"current_steps": 154570, "total_steps": 204665, "loss": 0.0, "lr": 3.4333891506297365e-07, "epoch": 3.776170815723255, "percentage": 75.52, "elapsed_time": "3:20:38", "remaining_time": "1:05:01", "throughput": 8654.03, "total_tokens": 104178512} +{"current_steps": 154575, "total_steps": 204665, "loss": 0.0, "lr": 3.432746022962566e-07, "epoch": 3.7762929665551024, "percentage": 75.53, "elapsed_time": "3:20:38", "remaining_time": "1:05:01", "throughput": 8654.05, "total_tokens": 104181904} +{"current_steps": 154580, "total_steps": 204665, "loss": 0.0, "lr": 3.432102943054237e-07, "epoch": 3.7764151173869496, "percentage": 75.53, "elapsed_time": "3:20:38", "remaining_time": "1:05:00", "throughput": 8654.07, "total_tokens": 104185104} +{"current_steps": 154585, "total_steps": 204665, "loss": 0.0, "lr": 3.4314599109094176e-07, "epoch": 3.7765372682187968, "percentage": 75.53, "elapsed_time": "3:20:39", "remaining_time": "1:05:00", "throughput": 8654.09, "total_tokens": 104188304} +{"current_steps": 154590, "total_steps": 204665, "loss": 0.0, "lr": 3.4308169265327926e-07, "epoch": 3.7766594190506435, "percentage": 75.53, "elapsed_time": "3:20:39", "remaining_time": "1:04:59", "throughput": 8654.11, "total_tokens": 104191504} +{"current_steps": 154595, "total_steps": 204665, "loss": 0.0255, "lr": 3.4301739899290303e-07, "epoch": 3.776781569882491, "percentage": 75.54, "elapsed_time": "3:20:39", "remaining_time": "1:04:59", "throughput": 8654.14, "total_tokens": 104194960} +{"current_steps": 154600, "total_steps": 204665, "loss": 0.0001, "lr": 3.429531101102814e-07, "epoch": 3.776903720714338, "percentage": 75.54, "elapsed_time": "3:20:40", "remaining_time": "1:04:59", "throughput": 8654.21, "total_tokens": 104198864} +{"current_steps": 154605, "total_steps": 204665, "loss": 0.0, "lr": 3.42888826005881e-07, "epoch": 3.777025871546185, "percentage": 75.54, "elapsed_time": "3:20:40", "remaining_time": "1:04:58", "throughput": 8654.25, "total_tokens": 104202448} +{"current_steps": 154610, "total_steps": 204665, "loss": 0.0, "lr": 3.428245466801701e-07, "epoch": 3.7771480223780323, "percentage": 75.54, "elapsed_time": "3:20:40", "remaining_time": "1:04:58", "throughput": 8654.28, "total_tokens": 104205904} +{"current_steps": 154615, "total_steps": 204665, "loss": 0.0, "lr": 3.427602721336157e-07, "epoch": 3.7772701732098795, "percentage": 75.55, "elapsed_time": "3:20:41", "remaining_time": "1:04:57", "throughput": 8654.29, "total_tokens": 104208976} +{"current_steps": 154620, "total_steps": 204665, "loss": 0.0538, "lr": 3.426960023666853e-07, "epoch": 3.7773923240417266, "percentage": 75.55, "elapsed_time": "3:20:41", "remaining_time": "1:04:57", "throughput": 8654.31, "total_tokens": 104212240} +{"current_steps": 154625, "total_steps": 204665, "loss": 0.0, "lr": 3.426317373798466e-07, "epoch": 3.777514474873574, "percentage": 75.55, "elapsed_time": "3:20:42", "remaining_time": "1:04:57", "throughput": 8654.37, "total_tokens": 104216080} +{"current_steps": 154630, "total_steps": 204665, "loss": 0.0001, "lr": 3.425674771735665e-07, "epoch": 3.777636625705421, "percentage": 75.55, "elapsed_time": "3:20:42", "remaining_time": "1:04:56", "throughput": 8654.41, "total_tokens": 104219536} +{"current_steps": 154635, "total_steps": 204665, "loss": 0.0, "lr": 3.4250322174831294e-07, "epoch": 3.777758776537268, "percentage": 75.56, "elapsed_time": "3:20:42", "remaining_time": "1:04:56", "throughput": 8654.44, "total_tokens": 104222864} +{"current_steps": 154640, "total_steps": 204665, "loss": 0.0, "lr": 3.424389711045523e-07, "epoch": 3.7778809273691154, "percentage": 75.56, "elapsed_time": "3:20:43", "remaining_time": "1:04:55", "throughput": 8654.48, "total_tokens": 104226448} +{"current_steps": 154645, "total_steps": 204665, "loss": 0.0001, "lr": 3.4237472524275266e-07, "epoch": 3.7780030782009626, "percentage": 75.56, "elapsed_time": "3:20:43", "remaining_time": "1:04:55", "throughput": 8654.52, "total_tokens": 104229904} +{"current_steps": 154650, "total_steps": 204665, "loss": 0.0, "lr": 3.423104841633807e-07, "epoch": 3.77812522903281, "percentage": 75.56, "elapsed_time": "3:20:43", "remaining_time": "1:04:55", "throughput": 8654.56, "total_tokens": 104233488} +{"current_steps": 154655, "total_steps": 204665, "loss": 0.0002, "lr": 3.422462478669037e-07, "epoch": 3.778247379864657, "percentage": 75.56, "elapsed_time": "3:20:44", "remaining_time": "1:04:54", "throughput": 8654.62, "total_tokens": 104237264} +{"current_steps": 154660, "total_steps": 204665, "loss": 0.0515, "lr": 3.4218201635378927e-07, "epoch": 3.778369530696504, "percentage": 75.57, "elapsed_time": "3:20:44", "remaining_time": "1:04:54", "throughput": 8654.63, "total_tokens": 104240336} +{"current_steps": 154665, "total_steps": 204665, "loss": 0.0001, "lr": 3.4211778962450376e-07, "epoch": 3.7784916815283514, "percentage": 75.57, "elapsed_time": "3:20:44", "remaining_time": "1:04:53", "throughput": 8654.67, "total_tokens": 104243984} +{"current_steps": 154670, "total_steps": 204665, "loss": 0.0, "lr": 3.4205356767951497e-07, "epoch": 3.7786138323601985, "percentage": 75.57, "elapsed_time": "3:20:45", "remaining_time": "1:04:53", "throughput": 8654.68, "total_tokens": 104246992} +{"current_steps": 154675, "total_steps": 204665, "loss": 0.1016, "lr": 3.4198935051928967e-07, "epoch": 3.7787359831920453, "percentage": 75.57, "elapsed_time": "3:20:45", "remaining_time": "1:04:53", "throughput": 8654.71, "total_tokens": 104250320} +{"current_steps": 154680, "total_steps": 204665, "loss": 0.0591, "lr": 3.419251381442945e-07, "epoch": 3.778858134023893, "percentage": 75.58, "elapsed_time": "3:20:45", "remaining_time": "1:04:52", "throughput": 8654.74, "total_tokens": 104253712} +{"current_steps": 154685, "total_steps": 204665, "loss": 0.0, "lr": 3.41860930554997e-07, "epoch": 3.7789802848557397, "percentage": 75.58, "elapsed_time": "3:20:46", "remaining_time": "1:04:52", "throughput": 8654.77, "total_tokens": 104257104} +{"current_steps": 154690, "total_steps": 204665, "loss": 0.0341, "lr": 3.4179672775186344e-07, "epoch": 3.7791024356875873, "percentage": 75.58, "elapsed_time": "3:20:46", "remaining_time": "1:04:51", "throughput": 8654.79, "total_tokens": 104260368} +{"current_steps": 154695, "total_steps": 204665, "loss": 0.0, "lr": 3.417325297353615e-07, "epoch": 3.779224586519434, "percentage": 75.58, "elapsed_time": "3:20:46", "remaining_time": "1:04:51", "throughput": 8654.81, "total_tokens": 104263696} +{"current_steps": 154700, "total_steps": 204665, "loss": 0.0, "lr": 3.4166833650595725e-07, "epoch": 3.7793467373512812, "percentage": 75.59, "elapsed_time": "3:20:47", "remaining_time": "1:04:51", "throughput": 8654.85, "total_tokens": 104267088} +{"current_steps": 154705, "total_steps": 204665, "loss": 0.0, "lr": 3.4160414806411844e-07, "epoch": 3.7794688881831284, "percentage": 75.59, "elapsed_time": "3:20:47", "remaining_time": "1:04:50", "throughput": 8654.84, "total_tokens": 104269968} +{"current_steps": 154710, "total_steps": 204665, "loss": 0.0002, "lr": 3.4153996441031086e-07, "epoch": 3.7795910390149756, "percentage": 75.59, "elapsed_time": "3:20:47", "remaining_time": "1:04:50", "throughput": 8654.84, "total_tokens": 104272848} +{"current_steps": 154715, "total_steps": 204665, "loss": 0.0, "lr": 3.4147578554500177e-07, "epoch": 3.779713189846823, "percentage": 75.59, "elapsed_time": "3:20:48", "remaining_time": "1:04:49", "throughput": 8654.86, "total_tokens": 104276176} +{"current_steps": 154720, "total_steps": 204665, "loss": 0.0, "lr": 3.4141161146865825e-07, "epoch": 3.77983534067867, "percentage": 75.6, "elapsed_time": "3:20:48", "remaining_time": "1:04:49", "throughput": 8654.86, "total_tokens": 104279120} +{"current_steps": 154725, "total_steps": 204665, "loss": 0.0003, "lr": 3.413474421817464e-07, "epoch": 3.779957491510517, "percentage": 75.6, "elapsed_time": "3:20:48", "remaining_time": "1:04:48", "throughput": 8654.91, "total_tokens": 104282704} +{"current_steps": 154730, "total_steps": 204665, "loss": 0.0001, "lr": 3.412832776847333e-07, "epoch": 3.7800796423423644, "percentage": 75.6, "elapsed_time": "3:20:49", "remaining_time": "1:04:48", "throughput": 8654.93, "total_tokens": 104285904} +{"current_steps": 154735, "total_steps": 204665, "loss": 0.0, "lr": 3.412191179780851e-07, "epoch": 3.7802017931742116, "percentage": 75.6, "elapsed_time": "3:20:49", "remaining_time": "1:04:48", "throughput": 8654.94, "total_tokens": 104289040} +{"current_steps": 154740, "total_steps": 204665, "loss": 0.0, "lr": 3.4115496306226863e-07, "epoch": 3.7803239440060588, "percentage": 75.61, "elapsed_time": "3:20:50", "remaining_time": "1:04:47", "throughput": 8654.97, "total_tokens": 104292368} +{"current_steps": 154745, "total_steps": 204665, "loss": 0.0, "lr": 3.410908129377509e-07, "epoch": 3.780446094837906, "percentage": 75.61, "elapsed_time": "3:20:50", "remaining_time": "1:04:47", "throughput": 8655.02, "total_tokens": 104296144} +{"current_steps": 154750, "total_steps": 204665, "loss": 0.0, "lr": 3.4102666760499753e-07, "epoch": 3.780568245669753, "percentage": 75.61, "elapsed_time": "3:20:50", "remaining_time": "1:04:46", "throughput": 8655.06, "total_tokens": 104299600} +{"current_steps": 154755, "total_steps": 204665, "loss": 0.0, "lr": 3.4096252706447595e-07, "epoch": 3.7806903965016003, "percentage": 75.61, "elapsed_time": "3:20:51", "remaining_time": "1:04:46", "throughput": 8655.1, "total_tokens": 104303120} +{"current_steps": 154760, "total_steps": 204665, "loss": 0.0566, "lr": 3.4089839131665175e-07, "epoch": 3.780812547333447, "percentage": 75.62, "elapsed_time": "3:20:51", "remaining_time": "1:04:46", "throughput": 8655.1, "total_tokens": 104306128} +{"current_steps": 154765, "total_steps": 204665, "loss": 0.0, "lr": 3.4083426036199203e-07, "epoch": 3.7809346981652947, "percentage": 75.62, "elapsed_time": "3:20:51", "remaining_time": "1:04:45", "throughput": 8655.12, "total_tokens": 104309328} +{"current_steps": 154770, "total_steps": 204665, "loss": 0.0006, "lr": 3.4077013420096255e-07, "epoch": 3.7810568489971415, "percentage": 75.62, "elapsed_time": "3:20:52", "remaining_time": "1:04:45", "throughput": 8655.13, "total_tokens": 104312400} +{"current_steps": 154775, "total_steps": 204665, "loss": 0.0, "lr": 3.4070601283403033e-07, "epoch": 3.781178999828989, "percentage": 75.62, "elapsed_time": "3:20:52", "remaining_time": "1:04:44", "throughput": 8655.14, "total_tokens": 104315600} +{"current_steps": 154780, "total_steps": 204665, "loss": 0.0, "lr": 3.406418962616612e-07, "epoch": 3.781301150660836, "percentage": 75.63, "elapsed_time": "3:20:52", "remaining_time": "1:04:44", "throughput": 8655.14, "total_tokens": 104318544} +{"current_steps": 154785, "total_steps": 204665, "loss": 0.0, "lr": 3.4057778448432127e-07, "epoch": 3.781423301492683, "percentage": 75.63, "elapsed_time": "3:20:53", "remaining_time": "1:04:44", "throughput": 8655.18, "total_tokens": 104322064} +{"current_steps": 154790, "total_steps": 204665, "loss": 0.0, "lr": 3.405136775024775e-07, "epoch": 3.78154545232453, "percentage": 75.63, "elapsed_time": "3:20:53", "remaining_time": "1:04:43", "throughput": 8655.24, "total_tokens": 104325776} +{"current_steps": 154795, "total_steps": 204665, "loss": 0.0453, "lr": 3.4044957531659514e-07, "epoch": 3.7816676031563774, "percentage": 75.63, "elapsed_time": "3:20:53", "remaining_time": "1:04:43", "throughput": 8655.28, "total_tokens": 104329360} +{"current_steps": 154800, "total_steps": 204665, "loss": 0.0, "lr": 3.4038547792714135e-07, "epoch": 3.7817897539882246, "percentage": 75.64, "elapsed_time": "3:20:54", "remaining_time": "1:04:42", "throughput": 8655.32, "total_tokens": 104332880} +{"current_steps": 154805, "total_steps": 204665, "loss": 0.0607, "lr": 3.403213853345813e-07, "epoch": 3.781911904820072, "percentage": 75.64, "elapsed_time": "3:20:54", "remaining_time": "1:04:42", "throughput": 8655.37, "total_tokens": 104336528} +{"current_steps": 154810, "total_steps": 204665, "loss": 0.0, "lr": 3.402572975393817e-07, "epoch": 3.782034055651919, "percentage": 75.64, "elapsed_time": "3:20:54", "remaining_time": "1:04:42", "throughput": 8655.38, "total_tokens": 104339600} +{"current_steps": 154815, "total_steps": 204665, "loss": 0.0678, "lr": 3.401932145420088e-07, "epoch": 3.782156206483766, "percentage": 75.64, "elapsed_time": "3:20:55", "remaining_time": "1:04:41", "throughput": 8655.43, "total_tokens": 104343248} +{"current_steps": 154820, "total_steps": 204665, "loss": 0.0001, "lr": 3.4012913634292796e-07, "epoch": 3.7822783573156133, "percentage": 75.65, "elapsed_time": "3:20:55", "remaining_time": "1:04:41", "throughput": 8655.46, "total_tokens": 104346640} +{"current_steps": 154825, "total_steps": 204665, "loss": 0.0, "lr": 3.400650629426057e-07, "epoch": 3.7824005081474605, "percentage": 75.65, "elapsed_time": "3:20:55", "remaining_time": "1:04:40", "throughput": 8655.51, "total_tokens": 104350352} +{"current_steps": 154830, "total_steps": 204665, "loss": 0.0001, "lr": 3.400009943415076e-07, "epoch": 3.7825226589793077, "percentage": 75.65, "elapsed_time": "3:20:56", "remaining_time": "1:04:40", "throughput": 8655.51, "total_tokens": 104353424} +{"current_steps": 154835, "total_steps": 204665, "loss": 0.0001, "lr": 3.3993693054009986e-07, "epoch": 3.782644809811155, "percentage": 75.65, "elapsed_time": "3:20:56", "remaining_time": "1:04:40", "throughput": 8655.55, "total_tokens": 104356944} +{"current_steps": 154840, "total_steps": 204665, "loss": 0.0, "lr": 3.3987287153884856e-07, "epoch": 3.782766960643002, "percentage": 75.66, "elapsed_time": "3:20:56", "remaining_time": "1:04:39", "throughput": 8655.59, "total_tokens": 104360400} +{"current_steps": 154845, "total_steps": 204665, "loss": 0.0, "lr": 3.3980881733821895e-07, "epoch": 3.7828891114748493, "percentage": 75.66, "elapsed_time": "3:20:57", "remaining_time": "1:04:39", "throughput": 8655.61, "total_tokens": 104363728} +{"current_steps": 154850, "total_steps": 204665, "loss": 0.0383, "lr": 3.3974476793867755e-07, "epoch": 3.7830112623066965, "percentage": 75.66, "elapsed_time": "3:20:57", "remaining_time": "1:04:38", "throughput": 8655.63, "total_tokens": 104366992} +{"current_steps": 154855, "total_steps": 204665, "loss": 0.0, "lr": 3.396807233406894e-07, "epoch": 3.7831334131385432, "percentage": 75.66, "elapsed_time": "3:20:58", "remaining_time": "1:04:38", "throughput": 8655.65, "total_tokens": 104370128} +{"current_steps": 154860, "total_steps": 204665, "loss": 0.0, "lr": 3.3961668354472107e-07, "epoch": 3.783255563970391, "percentage": 75.67, "elapsed_time": "3:20:58", "remaining_time": "1:04:38", "throughput": 8655.66, "total_tokens": 104373264} +{"current_steps": 154865, "total_steps": 204665, "loss": 0.0, "lr": 3.3955264855123747e-07, "epoch": 3.7833777148022376, "percentage": 75.67, "elapsed_time": "3:20:58", "remaining_time": "1:04:37", "throughput": 8655.67, "total_tokens": 104376336} +{"current_steps": 154870, "total_steps": 204665, "loss": 0.0013, "lr": 3.3948861836070463e-07, "epoch": 3.783499865634085, "percentage": 75.67, "elapsed_time": "3:20:59", "remaining_time": "1:04:37", "throughput": 8655.68, "total_tokens": 104379408} +{"current_steps": 154875, "total_steps": 204665, "loss": 0.0001, "lr": 3.394245929735885e-07, "epoch": 3.783622016465932, "percentage": 75.67, "elapsed_time": "3:20:59", "remaining_time": "1:04:36", "throughput": 8655.72, "total_tokens": 104382864} +{"current_steps": 154880, "total_steps": 204665, "loss": 0.0001, "lr": 3.3936057239035445e-07, "epoch": 3.783744167297779, "percentage": 75.67, "elapsed_time": "3:20:59", "remaining_time": "1:04:36", "throughput": 8655.76, "total_tokens": 104386448} +{"current_steps": 154885, "total_steps": 204665, "loss": 0.0001, "lr": 3.392965566114676e-07, "epoch": 3.7838663181296264, "percentage": 75.68, "elapsed_time": "3:21:00", "remaining_time": "1:04:36", "throughput": 8655.8, "total_tokens": 104389968} +{"current_steps": 154890, "total_steps": 204665, "loss": 0.0, "lr": 3.392325456373943e-07, "epoch": 3.7839884689614736, "percentage": 75.68, "elapsed_time": "3:21:00", "remaining_time": "1:04:35", "throughput": 8655.83, "total_tokens": 104393424} +{"current_steps": 154895, "total_steps": 204665, "loss": 0.0, "lr": 3.3916853946859936e-07, "epoch": 3.7841106197933208, "percentage": 75.68, "elapsed_time": "3:21:00", "remaining_time": "1:04:35", "throughput": 8655.83, "total_tokens": 104396304} +{"current_steps": 154900, "total_steps": 204665, "loss": 0.0, "lr": 3.3910453810554884e-07, "epoch": 3.784232770625168, "percentage": 75.68, "elapsed_time": "3:21:01", "remaining_time": "1:04:34", "throughput": 8655.85, "total_tokens": 104399568} +{"current_steps": 154905, "total_steps": 204665, "loss": 0.0, "lr": 3.390405415487075e-07, "epoch": 3.784354921457015, "percentage": 75.69, "elapsed_time": "3:21:01", "remaining_time": "1:04:34", "throughput": 8655.88, "total_tokens": 104402896} +{"current_steps": 154910, "total_steps": 204665, "loss": 0.0, "lr": 3.389765497985415e-07, "epoch": 3.7844770722888623, "percentage": 75.69, "elapsed_time": "3:21:01", "remaining_time": "1:04:34", "throughput": 8655.88, "total_tokens": 104405904} +{"current_steps": 154915, "total_steps": 204665, "loss": 0.0, "lr": 3.389125628555155e-07, "epoch": 3.7845992231207095, "percentage": 75.69, "elapsed_time": "3:21:02", "remaining_time": "1:04:33", "throughput": 8655.92, "total_tokens": 104409360} +{"current_steps": 154920, "total_steps": 204665, "loss": 0.0, "lr": 3.3884858072009546e-07, "epoch": 3.7847213739525567, "percentage": 75.69, "elapsed_time": "3:21:02", "remaining_time": "1:04:33", "throughput": 8655.91, "total_tokens": 104412112} +{"current_steps": 154925, "total_steps": 204665, "loss": 0.0, "lr": 3.387846033927461e-07, "epoch": 3.784843524784404, "percentage": 75.7, "elapsed_time": "3:21:02", "remaining_time": "1:04:32", "throughput": 8655.94, "total_tokens": 104415632} +{"current_steps": 154930, "total_steps": 204665, "loss": 0.0436, "lr": 3.387206308739329e-07, "epoch": 3.784965675616251, "percentage": 75.7, "elapsed_time": "3:21:03", "remaining_time": "1:04:32", "throughput": 8655.96, "total_tokens": 104418832} +{"current_steps": 154935, "total_steps": 204665, "loss": 0.0, "lr": 3.3865666316412143e-07, "epoch": 3.7850878264480983, "percentage": 75.7, "elapsed_time": "3:21:03", "remaining_time": "1:04:32", "throughput": 8655.99, "total_tokens": 104422224} +{"current_steps": 154940, "total_steps": 204665, "loss": 0.0181, "lr": 3.385927002637763e-07, "epoch": 3.785209977279945, "percentage": 75.7, "elapsed_time": "3:21:03", "remaining_time": "1:04:31", "throughput": 8656.04, "total_tokens": 104425936} +{"current_steps": 154945, "total_steps": 204665, "loss": 0.0, "lr": 3.3852874217336323e-07, "epoch": 3.7853321281117926, "percentage": 75.71, "elapsed_time": "3:21:04", "remaining_time": "1:04:31", "throughput": 8656.07, "total_tokens": 104429328} +{"current_steps": 154950, "total_steps": 204665, "loss": 0.0, "lr": 3.3846478889334673e-07, "epoch": 3.7854542789436394, "percentage": 75.71, "elapsed_time": "3:21:04", "remaining_time": "1:04:30", "throughput": 8656.13, "total_tokens": 104433168} +{"current_steps": 154955, "total_steps": 204665, "loss": 0.092, "lr": 3.384008404241926e-07, "epoch": 3.785576429775487, "percentage": 75.71, "elapsed_time": "3:21:04", "remaining_time": "1:04:30", "throughput": 8656.15, "total_tokens": 104436304} +{"current_steps": 154960, "total_steps": 204665, "loss": 0.0, "lr": 3.3833689676636525e-07, "epoch": 3.7856985806073338, "percentage": 75.71, "elapsed_time": "3:21:05", "remaining_time": "1:04:30", "throughput": 8656.18, "total_tokens": 104439760} +{"current_steps": 154965, "total_steps": 204665, "loss": 0.0001, "lr": 3.3827295792032984e-07, "epoch": 3.785820731439181, "percentage": 75.72, "elapsed_time": "3:21:05", "remaining_time": "1:04:29", "throughput": 8656.21, "total_tokens": 104443088} +{"current_steps": 154970, "total_steps": 204665, "loss": 0.0, "lr": 3.382090238865518e-07, "epoch": 3.785942882271028, "percentage": 75.72, "elapsed_time": "3:21:06", "remaining_time": "1:04:29", "throughput": 8656.26, "total_tokens": 104446800} +{"current_steps": 154975, "total_steps": 204665, "loss": 0.0001, "lr": 3.3814509466549545e-07, "epoch": 3.7860650331028753, "percentage": 75.72, "elapsed_time": "3:21:06", "remaining_time": "1:04:28", "throughput": 8656.33, "total_tokens": 104450768} +{"current_steps": 154980, "total_steps": 204665, "loss": 0.0002, "lr": 3.3808117025762626e-07, "epoch": 3.7861871839347225, "percentage": 75.72, "elapsed_time": "3:21:06", "remaining_time": "1:04:28", "throughput": 8656.36, "total_tokens": 104454032} +{"current_steps": 154985, "total_steps": 204665, "loss": 0.0, "lr": 3.380172506634089e-07, "epoch": 3.7863093347665697, "percentage": 75.73, "elapsed_time": "3:21:07", "remaining_time": "1:04:28", "throughput": 8656.41, "total_tokens": 104457744} +{"current_steps": 154990, "total_steps": 204665, "loss": 0.1035, "lr": 3.379533358833078e-07, "epoch": 3.786431485598417, "percentage": 75.73, "elapsed_time": "3:21:07", "remaining_time": "1:04:27", "throughput": 8656.43, "total_tokens": 104460880} +{"current_steps": 154995, "total_steps": 204665, "loss": 0.0, "lr": 3.3788942591778836e-07, "epoch": 3.786553636430264, "percentage": 75.73, "elapsed_time": "3:21:07", "remaining_time": "1:04:27", "throughput": 8656.45, "total_tokens": 104464208} +{"current_steps": 155000, "total_steps": 204665, "loss": 0.0, "lr": 3.3782552076731487e-07, "epoch": 3.7866757872621113, "percentage": 75.73, "elapsed_time": "3:21:08", "remaining_time": "1:04:26", "throughput": 8656.47, "total_tokens": 104467280} +{"current_steps": 155005, "total_steps": 204665, "loss": 0.0, "lr": 3.377616204323526e-07, "epoch": 3.7867979380939585, "percentage": 75.74, "elapsed_time": "3:21:08", "remaining_time": "1:04:26", "throughput": 8656.51, "total_tokens": 104470800} +{"current_steps": 155010, "total_steps": 204665, "loss": 0.0, "lr": 3.3769772491336554e-07, "epoch": 3.7869200889258057, "percentage": 75.74, "elapsed_time": "3:21:08", "remaining_time": "1:04:26", "throughput": 8656.52, "total_tokens": 104473872} +{"current_steps": 155015, "total_steps": 204665, "loss": 0.0, "lr": 3.3763383421081927e-07, "epoch": 3.787042239757653, "percentage": 75.74, "elapsed_time": "3:21:09", "remaining_time": "1:04:25", "throughput": 8656.54, "total_tokens": 104477136} +{"current_steps": 155020, "total_steps": 204665, "loss": 0.0, "lr": 3.3756994832517737e-07, "epoch": 3.7871643905895, "percentage": 75.74, "elapsed_time": "3:21:09", "remaining_time": "1:04:25", "throughput": 8656.57, "total_tokens": 104480592} +{"current_steps": 155025, "total_steps": 204665, "loss": 0.0, "lr": 3.3750606725690513e-07, "epoch": 3.7872865414213472, "percentage": 75.75, "elapsed_time": "3:21:09", "remaining_time": "1:04:24", "throughput": 8656.6, "total_tokens": 104483920} +{"current_steps": 155030, "total_steps": 204665, "loss": 0.0, "lr": 3.374421910064672e-07, "epoch": 3.7874086922531944, "percentage": 75.75, "elapsed_time": "3:21:10", "remaining_time": "1:04:24", "throughput": 8656.61, "total_tokens": 104487056} +{"current_steps": 155035, "total_steps": 204665, "loss": 0.0, "lr": 3.3737831957432763e-07, "epoch": 3.787530843085041, "percentage": 75.75, "elapsed_time": "3:21:10", "remaining_time": "1:04:24", "throughput": 8656.64, "total_tokens": 104490320} +{"current_steps": 155040, "total_steps": 204665, "loss": 0.0, "lr": 3.373144529609514e-07, "epoch": 3.787652993916889, "percentage": 75.75, "elapsed_time": "3:21:10", "remaining_time": "1:04:23", "throughput": 8656.64, "total_tokens": 104493328} +{"current_steps": 155045, "total_steps": 204665, "loss": 0.0446, "lr": 3.3725059116680245e-07, "epoch": 3.7877751447487356, "percentage": 75.76, "elapsed_time": "3:21:11", "remaining_time": "1:04:23", "throughput": 8656.65, "total_tokens": 104496400} +{"current_steps": 155050, "total_steps": 204665, "loss": 0.0003, "lr": 3.3718673419234565e-07, "epoch": 3.7878972955805827, "percentage": 75.76, "elapsed_time": "3:21:11", "remaining_time": "1:04:22", "throughput": 8656.68, "total_tokens": 104499856} +{"current_steps": 155055, "total_steps": 204665, "loss": 0.0, "lr": 3.37122882038045e-07, "epoch": 3.78801944641243, "percentage": 75.76, "elapsed_time": "3:21:11", "remaining_time": "1:04:22", "throughput": 8656.75, "total_tokens": 104503696} +{"current_steps": 155060, "total_steps": 204665, "loss": 0.0, "lr": 3.3705903470436504e-07, "epoch": 3.788141597244277, "percentage": 75.76, "elapsed_time": "3:21:12", "remaining_time": "1:04:22", "throughput": 8656.76, "total_tokens": 104506768} +{"current_steps": 155065, "total_steps": 204665, "loss": 0.0, "lr": 3.369951921917703e-07, "epoch": 3.7882637480761243, "percentage": 75.77, "elapsed_time": "3:21:12", "remaining_time": "1:04:21", "throughput": 8656.79, "total_tokens": 104510160} +{"current_steps": 155070, "total_steps": 204665, "loss": 0.0001, "lr": 3.369313545007246e-07, "epoch": 3.7883858989079715, "percentage": 75.77, "elapsed_time": "3:21:12", "remaining_time": "1:04:21", "throughput": 8656.83, "total_tokens": 104513680} +{"current_steps": 155075, "total_steps": 204665, "loss": 0.0002, "lr": 3.3686752163169275e-07, "epoch": 3.7885080497398187, "percentage": 75.77, "elapsed_time": "3:21:13", "remaining_time": "1:04:20", "throughput": 8656.87, "total_tokens": 104517200} +{"current_steps": 155080, "total_steps": 204665, "loss": 0.0, "lr": 3.368036935851384e-07, "epoch": 3.788630200571666, "percentage": 75.77, "elapsed_time": "3:21:13", "remaining_time": "1:04:20", "throughput": 8656.91, "total_tokens": 104520784} +{"current_steps": 155085, "total_steps": 204665, "loss": 0.0714, "lr": 3.367398703615262e-07, "epoch": 3.788752351403513, "percentage": 75.78, "elapsed_time": "3:21:14", "remaining_time": "1:04:20", "throughput": 8656.96, "total_tokens": 104524496} +{"current_steps": 155090, "total_steps": 204665, "loss": 0.0, "lr": 3.366760519613201e-07, "epoch": 3.7888745022353603, "percentage": 75.78, "elapsed_time": "3:21:14", "remaining_time": "1:04:19", "throughput": 8656.99, "total_tokens": 104527888} +{"current_steps": 155095, "total_steps": 204665, "loss": 0.0, "lr": 3.3661223838498374e-07, "epoch": 3.7889966530672075, "percentage": 75.78, "elapsed_time": "3:21:14", "remaining_time": "1:04:19", "throughput": 8657.03, "total_tokens": 104531408} +{"current_steps": 155100, "total_steps": 204665, "loss": 0.0001, "lr": 3.36548429632982e-07, "epoch": 3.7891188038990546, "percentage": 75.78, "elapsed_time": "3:21:15", "remaining_time": "1:04:18", "throughput": 8657.07, "total_tokens": 104535056} +{"current_steps": 155105, "total_steps": 204665, "loss": 0.0, "lr": 3.364846257057783e-07, "epoch": 3.789240954730902, "percentage": 75.78, "elapsed_time": "3:21:15", "remaining_time": "1:04:18", "throughput": 8657.09, "total_tokens": 104538320} +{"current_steps": 155110, "total_steps": 204665, "loss": 0.0002, "lr": 3.364208266038371e-07, "epoch": 3.789363105562749, "percentage": 75.79, "elapsed_time": "3:21:15", "remaining_time": "1:04:18", "throughput": 8657.12, "total_tokens": 104541648} +{"current_steps": 155115, "total_steps": 204665, "loss": 0.0, "lr": 3.363570323276218e-07, "epoch": 3.789485256394596, "percentage": 75.79, "elapsed_time": "3:21:16", "remaining_time": "1:04:17", "throughput": 8657.13, "total_tokens": 104544784} +{"current_steps": 155120, "total_steps": 204665, "loss": 0.0, "lr": 3.3629324287759666e-07, "epoch": 3.789607407226443, "percentage": 75.79, "elapsed_time": "3:21:16", "remaining_time": "1:04:17", "throughput": 8657.17, "total_tokens": 104548304} +{"current_steps": 155125, "total_steps": 204665, "loss": 0.0193, "lr": 3.362294582542259e-07, "epoch": 3.7897295580582906, "percentage": 75.79, "elapsed_time": "3:21:16", "remaining_time": "1:04:16", "throughput": 8657.22, "total_tokens": 104551952} +{"current_steps": 155130, "total_steps": 204665, "loss": 0.0, "lr": 3.3616567845797273e-07, "epoch": 3.7898517088901373, "percentage": 75.8, "elapsed_time": "3:21:17", "remaining_time": "1:04:16", "throughput": 8657.28, "total_tokens": 104555728} +{"current_steps": 155135, "total_steps": 204665, "loss": 0.0002, "lr": 3.3610190348930157e-07, "epoch": 3.789973859721985, "percentage": 75.8, "elapsed_time": "3:21:17", "remaining_time": "1:04:16", "throughput": 8657.3, "total_tokens": 104558928} +{"current_steps": 155140, "total_steps": 204665, "loss": 0.0001, "lr": 3.360381333486757e-07, "epoch": 3.7900960105538317, "percentage": 75.8, "elapsed_time": "3:21:17", "remaining_time": "1:04:15", "throughput": 8657.34, "total_tokens": 104562512} +{"current_steps": 155145, "total_steps": 204665, "loss": 0.0, "lr": 3.359743680365591e-07, "epoch": 3.790218161385679, "percentage": 75.8, "elapsed_time": "3:21:18", "remaining_time": "1:04:15", "throughput": 8657.38, "total_tokens": 104566096} +{"current_steps": 155150, "total_steps": 204665, "loss": 0.0, "lr": 3.3591060755341583e-07, "epoch": 3.790340312217526, "percentage": 75.81, "elapsed_time": "3:21:18", "remaining_time": "1:04:14", "throughput": 8657.44, "total_tokens": 104569872} +{"current_steps": 155155, "total_steps": 204665, "loss": 0.0, "lr": 3.3584685189970886e-07, "epoch": 3.7904624630493733, "percentage": 75.81, "elapsed_time": "3:21:18", "remaining_time": "1:04:14", "throughput": 8657.49, "total_tokens": 104573648} +{"current_steps": 155160, "total_steps": 204665, "loss": 0.2054, "lr": 3.357831010759026e-07, "epoch": 3.7905846138812205, "percentage": 75.81, "elapsed_time": "3:21:19", "remaining_time": "1:04:14", "throughput": 8657.58, "total_tokens": 104577936} +{"current_steps": 155165, "total_steps": 204665, "loss": 0.0, "lr": 3.3571935508245986e-07, "epoch": 3.7907067647130677, "percentage": 75.81, "elapsed_time": "3:21:19", "remaining_time": "1:04:13", "throughput": 8657.62, "total_tokens": 104581392} +{"current_steps": 155170, "total_steps": 204665, "loss": 0.0, "lr": 3.35655613919845e-07, "epoch": 3.790828915544915, "percentage": 75.82, "elapsed_time": "3:21:20", "remaining_time": "1:04:13", "throughput": 8657.65, "total_tokens": 104584784} +{"current_steps": 155175, "total_steps": 204665, "loss": 0.0, "lr": 3.355918775885209e-07, "epoch": 3.790951066376762, "percentage": 75.82, "elapsed_time": "3:21:20", "remaining_time": "1:04:12", "throughput": 8657.66, "total_tokens": 104587856} +{"current_steps": 155180, "total_steps": 204665, "loss": 0.0, "lr": 3.355281460889514e-07, "epoch": 3.7910732172086092, "percentage": 75.82, "elapsed_time": "3:21:20", "remaining_time": "1:04:12", "throughput": 8657.68, "total_tokens": 104591056} +{"current_steps": 155185, "total_steps": 204665, "loss": 0.1111, "lr": 3.3546441942160033e-07, "epoch": 3.7911953680404564, "percentage": 75.82, "elapsed_time": "3:21:21", "remaining_time": "1:04:11", "throughput": 8657.69, "total_tokens": 104594192} +{"current_steps": 155190, "total_steps": 204665, "loss": 0.0, "lr": 3.3540069758693056e-07, "epoch": 3.7913175188723036, "percentage": 75.83, "elapsed_time": "3:21:21", "remaining_time": "1:04:11", "throughput": 8657.71, "total_tokens": 104597456} +{"current_steps": 155195, "total_steps": 204665, "loss": 0.0, "lr": 3.353369805854055e-07, "epoch": 3.791439669704151, "percentage": 75.83, "elapsed_time": "3:21:21", "remaining_time": "1:04:11", "throughput": 8657.73, "total_tokens": 104600720} +{"current_steps": 155200, "total_steps": 204665, "loss": 0.0, "lr": 3.3527326841748894e-07, "epoch": 3.791561820535998, "percentage": 75.83, "elapsed_time": "3:21:22", "remaining_time": "1:04:10", "throughput": 8657.75, "total_tokens": 104603920} +{"current_steps": 155205, "total_steps": 204665, "loss": 0.0004, "lr": 3.3520956108364397e-07, "epoch": 3.7916839713678447, "percentage": 75.83, "elapsed_time": "3:21:22", "remaining_time": "1:04:10", "throughput": 8657.76, "total_tokens": 104607056} +{"current_steps": 155210, "total_steps": 204665, "loss": 0.0, "lr": 3.351458585843335e-07, "epoch": 3.7918061221996924, "percentage": 75.84, "elapsed_time": "3:21:22", "remaining_time": "1:04:09", "throughput": 8657.78, "total_tokens": 104610192} +{"current_steps": 155215, "total_steps": 204665, "loss": 0.0, "lr": 3.350821609200213e-07, "epoch": 3.791928273031539, "percentage": 75.84, "elapsed_time": "3:21:23", "remaining_time": "1:04:09", "throughput": 8657.81, "total_tokens": 104613584} +{"current_steps": 155220, "total_steps": 204665, "loss": 0.0, "lr": 3.3501846809117075e-07, "epoch": 3.7920504238633868, "percentage": 75.84, "elapsed_time": "3:21:23", "remaining_time": "1:04:09", "throughput": 8657.85, "total_tokens": 104617104} +{"current_steps": 155225, "total_steps": 204665, "loss": 0.0235, "lr": 3.349547800982444e-07, "epoch": 3.7921725746952335, "percentage": 75.84, "elapsed_time": "3:21:23", "remaining_time": "1:04:08", "throughput": 8657.87, "total_tokens": 104620304} +{"current_steps": 155230, "total_steps": 204665, "loss": 0.0, "lr": 3.3489109694170604e-07, "epoch": 3.7922947255270807, "percentage": 75.85, "elapsed_time": "3:21:24", "remaining_time": "1:04:08", "throughput": 8657.91, "total_tokens": 104623888} +{"current_steps": 155235, "total_steps": 204665, "loss": 0.0, "lr": 3.3482741862201827e-07, "epoch": 3.792416876358928, "percentage": 75.85, "elapsed_time": "3:21:24", "remaining_time": "1:04:07", "throughput": 8657.96, "total_tokens": 104627600} +{"current_steps": 155240, "total_steps": 204665, "loss": 0.0, "lr": 3.3476374513964444e-07, "epoch": 3.792539027190775, "percentage": 75.85, "elapsed_time": "3:21:24", "remaining_time": "1:04:07", "throughput": 8657.98, "total_tokens": 104630928} +{"current_steps": 155245, "total_steps": 204665, "loss": 0.0, "lr": 3.3470007649504783e-07, "epoch": 3.7926611780226223, "percentage": 75.85, "elapsed_time": "3:21:25", "remaining_time": "1:04:07", "throughput": 8658.0, "total_tokens": 104634128} +{"current_steps": 155250, "total_steps": 204665, "loss": 0.0, "lr": 3.3463641268869093e-07, "epoch": 3.7927833288544694, "percentage": 75.86, "elapsed_time": "3:21:25", "remaining_time": "1:04:06", "throughput": 8658.05, "total_tokens": 104637776} +{"current_steps": 155255, "total_steps": 204665, "loss": 0.0, "lr": 3.345727537210373e-07, "epoch": 3.7929054796863166, "percentage": 75.86, "elapsed_time": "3:21:25", "remaining_time": "1:04:06", "throughput": 8658.06, "total_tokens": 104640848} +{"current_steps": 155260, "total_steps": 204665, "loss": 0.0005, "lr": 3.3450909959254937e-07, "epoch": 3.793027630518164, "percentage": 75.86, "elapsed_time": "3:21:26", "remaining_time": "1:04:05", "throughput": 8658.07, "total_tokens": 104643984} +{"current_steps": 155265, "total_steps": 204665, "loss": 0.0006, "lr": 3.344454503036904e-07, "epoch": 3.793149781350011, "percentage": 75.86, "elapsed_time": "3:21:26", "remaining_time": "1:04:05", "throughput": 8658.08, "total_tokens": 104647120} +{"current_steps": 155270, "total_steps": 204665, "loss": 0.0, "lr": 3.3438180585492294e-07, "epoch": 3.793271932181858, "percentage": 75.87, "elapsed_time": "3:21:26", "remaining_time": "1:04:05", "throughput": 8658.12, "total_tokens": 104650640} +{"current_steps": 155275, "total_steps": 204665, "loss": 0.0, "lr": 3.3431816624670995e-07, "epoch": 3.7933940830137054, "percentage": 75.87, "elapsed_time": "3:21:27", "remaining_time": "1:04:04", "throughput": 8658.12, "total_tokens": 104653648} +{"current_steps": 155280, "total_steps": 204665, "loss": 0.0001, "lr": 3.3425453147951466e-07, "epoch": 3.7935162338455526, "percentage": 75.87, "elapsed_time": "3:21:27", "remaining_time": "1:04:04", "throughput": 8658.13, "total_tokens": 104656656} +{"current_steps": 155285, "total_steps": 204665, "loss": 0.0001, "lr": 3.3419090155379913e-07, "epoch": 3.7936383846773998, "percentage": 75.87, "elapsed_time": "3:21:28", "remaining_time": "1:04:03", "throughput": 8658.14, "total_tokens": 104659792} +{"current_steps": 155290, "total_steps": 204665, "loss": 0.0, "lr": 3.341272764700268e-07, "epoch": 3.793760535509247, "percentage": 75.88, "elapsed_time": "3:21:28", "remaining_time": "1:04:03", "throughput": 8658.18, "total_tokens": 104663248} +{"current_steps": 155295, "total_steps": 204665, "loss": 0.0, "lr": 3.340636562286601e-07, "epoch": 3.793882686341094, "percentage": 75.88, "elapsed_time": "3:21:28", "remaining_time": "1:04:03", "throughput": 8658.2, "total_tokens": 104666512} +{"current_steps": 155300, "total_steps": 204665, "loss": 0.0, "lr": 3.340000408301611e-07, "epoch": 3.794004837172941, "percentage": 75.88, "elapsed_time": "3:21:29", "remaining_time": "1:04:02", "throughput": 8658.21, "total_tokens": 104669648} +{"current_steps": 155305, "total_steps": 204665, "loss": 0.0, "lr": 3.339364302749933e-07, "epoch": 3.7941269880047885, "percentage": 75.88, "elapsed_time": "3:21:29", "remaining_time": "1:04:02", "throughput": 8658.24, "total_tokens": 104672912} +{"current_steps": 155310, "total_steps": 204665, "loss": 0.0, "lr": 3.3387282456361867e-07, "epoch": 3.7942491388366353, "percentage": 75.88, "elapsed_time": "3:21:29", "remaining_time": "1:04:01", "throughput": 8658.27, "total_tokens": 104676304} +{"current_steps": 155315, "total_steps": 204665, "loss": 0.0952, "lr": 3.3380922369650035e-07, "epoch": 3.794371289668483, "percentage": 75.89, "elapsed_time": "3:21:30", "remaining_time": "1:04:01", "throughput": 8658.29, "total_tokens": 104679632} +{"current_steps": 155320, "total_steps": 204665, "loss": 0.0, "lr": 3.337456276741002e-07, "epoch": 3.7944934405003297, "percentage": 75.89, "elapsed_time": "3:21:30", "remaining_time": "1:04:01", "throughput": 8658.3, "total_tokens": 104682704} +{"current_steps": 155325, "total_steps": 204665, "loss": 0.0, "lr": 3.336820364968813e-07, "epoch": 3.794615591332177, "percentage": 75.89, "elapsed_time": "3:21:30", "remaining_time": "1:04:00", "throughput": 8658.35, "total_tokens": 104686352} +{"current_steps": 155330, "total_steps": 204665, "loss": 0.05, "lr": 3.3361845016530566e-07, "epoch": 3.794737742164024, "percentage": 75.89, "elapsed_time": "3:21:31", "remaining_time": "1:04:00", "throughput": 8658.39, "total_tokens": 104689872} +{"current_steps": 155335, "total_steps": 204665, "loss": 0.0, "lr": 3.3355486867983573e-07, "epoch": 3.7948598929958712, "percentage": 75.9, "elapsed_time": "3:21:31", "remaining_time": "1:03:59", "throughput": 8658.43, "total_tokens": 104693520} +{"current_steps": 155340, "total_steps": 204665, "loss": 0.0, "lr": 3.334912920409345e-07, "epoch": 3.7949820438277184, "percentage": 75.9, "elapsed_time": "3:21:31", "remaining_time": "1:03:59", "throughput": 8658.48, "total_tokens": 104697104} +{"current_steps": 155345, "total_steps": 204665, "loss": 0.0002, "lr": 3.334277202490635e-07, "epoch": 3.7951041946595656, "percentage": 75.9, "elapsed_time": "3:21:32", "remaining_time": "1:03:59", "throughput": 8658.5, "total_tokens": 104700432} +{"current_steps": 155350, "total_steps": 204665, "loss": 0.0706, "lr": 3.333641533046857e-07, "epoch": 3.795226345491413, "percentage": 75.9, "elapsed_time": "3:21:32", "remaining_time": "1:03:58", "throughput": 8658.54, "total_tokens": 104703888} +{"current_steps": 155355, "total_steps": 204665, "loss": 0.0, "lr": 3.333005912082628e-07, "epoch": 3.79534849632326, "percentage": 75.91, "elapsed_time": "3:21:32", "remaining_time": "1:03:58", "throughput": 8658.55, "total_tokens": 104707024} +{"current_steps": 155360, "total_steps": 204665, "loss": 0.0002, "lr": 3.332370339602576e-07, "epoch": 3.795470647155107, "percentage": 75.91, "elapsed_time": "3:21:33", "remaining_time": "1:03:57", "throughput": 8658.6, "total_tokens": 104710736} +{"current_steps": 155365, "total_steps": 204665, "loss": 0.0, "lr": 3.331734815611318e-07, "epoch": 3.7955927979869544, "percentage": 75.91, "elapsed_time": "3:21:33", "remaining_time": "1:03:57", "throughput": 8658.61, "total_tokens": 104713744} +{"current_steps": 155370, "total_steps": 204665, "loss": 0.0002, "lr": 3.3310993401134767e-07, "epoch": 3.7957149488188016, "percentage": 75.91, "elapsed_time": "3:21:33", "remaining_time": "1:03:57", "throughput": 8658.64, "total_tokens": 104717200} +{"current_steps": 155375, "total_steps": 204665, "loss": 0.0001, "lr": 3.330463913113679e-07, "epoch": 3.7958370996506487, "percentage": 75.92, "elapsed_time": "3:21:34", "remaining_time": "1:03:56", "throughput": 8658.68, "total_tokens": 104720656} +{"current_steps": 155380, "total_steps": 204665, "loss": 0.0, "lr": 3.329828534616538e-07, "epoch": 3.795959250482496, "percentage": 75.92, "elapsed_time": "3:21:34", "remaining_time": "1:03:56", "throughput": 8658.71, "total_tokens": 104723984} +{"current_steps": 155385, "total_steps": 204665, "loss": 0.0, "lr": 3.3291932046266804e-07, "epoch": 3.7960814013143427, "percentage": 75.92, "elapsed_time": "3:21:34", "remaining_time": "1:03:55", "throughput": 8658.7, "total_tokens": 104726800} +{"current_steps": 155390, "total_steps": 204665, "loss": 0.0, "lr": 3.328557923148722e-07, "epoch": 3.7962035521461903, "percentage": 75.92, "elapsed_time": "3:21:35", "remaining_time": "1:03:55", "throughput": 8658.73, "total_tokens": 104730128} +{"current_steps": 155395, "total_steps": 204665, "loss": 0.0418, "lr": 3.327922690187287e-07, "epoch": 3.796325702978037, "percentage": 75.93, "elapsed_time": "3:21:35", "remaining_time": "1:03:55", "throughput": 8658.75, "total_tokens": 104733392} +{"current_steps": 155400, "total_steps": 204665, "loss": 0.0, "lr": 3.327287505746993e-07, "epoch": 3.7964478538098847, "percentage": 75.93, "elapsed_time": "3:21:36", "remaining_time": "1:03:54", "throughput": 8658.77, "total_tokens": 104736656} +{"current_steps": 155405, "total_steps": 204665, "loss": 0.0001, "lr": 3.3266523698324564e-07, "epoch": 3.7965700046417314, "percentage": 75.93, "elapsed_time": "3:21:36", "remaining_time": "1:03:54", "throughput": 8658.79, "total_tokens": 104739984} +{"current_steps": 155410, "total_steps": 204665, "loss": 0.0004, "lr": 3.3260172824483013e-07, "epoch": 3.7966921554735786, "percentage": 75.93, "elapsed_time": "3:21:36", "remaining_time": "1:03:53", "throughput": 8658.84, "total_tokens": 104743504} +{"current_steps": 155415, "total_steps": 204665, "loss": 0.0001, "lr": 3.325382243599141e-07, "epoch": 3.796814306305426, "percentage": 75.94, "elapsed_time": "3:21:37", "remaining_time": "1:03:53", "throughput": 8658.85, "total_tokens": 104746576} +{"current_steps": 155420, "total_steps": 204665, "loss": 0.0, "lr": 3.324747253289599e-07, "epoch": 3.796936457137273, "percentage": 75.94, "elapsed_time": "3:21:37", "remaining_time": "1:03:53", "throughput": 8658.86, "total_tokens": 104749648} +{"current_steps": 155425, "total_steps": 204665, "loss": 0.0, "lr": 3.3241123115242873e-07, "epoch": 3.79705860796912, "percentage": 75.94, "elapsed_time": "3:21:37", "remaining_time": "1:03:52", "throughput": 8658.9, "total_tokens": 104753168} +{"current_steps": 155430, "total_steps": 204665, "loss": 0.0296, "lr": 3.323477418307826e-07, "epoch": 3.7971807588009674, "percentage": 75.94, "elapsed_time": "3:21:38", "remaining_time": "1:03:52", "throughput": 8658.94, "total_tokens": 104756688} +{"current_steps": 155435, "total_steps": 204665, "loss": 0.0027, "lr": 3.322842573644837e-07, "epoch": 3.7973029096328146, "percentage": 75.95, "elapsed_time": "3:21:38", "remaining_time": "1:03:51", "throughput": 8658.99, "total_tokens": 104760400} +{"current_steps": 155440, "total_steps": 204665, "loss": 0.0256, "lr": 3.3222077775399295e-07, "epoch": 3.7974250604646618, "percentage": 75.95, "elapsed_time": "3:21:38", "remaining_time": "1:03:51", "throughput": 8659.05, "total_tokens": 104764176} +{"current_steps": 155445, "total_steps": 204665, "loss": 0.0, "lr": 3.321573029997725e-07, "epoch": 3.797547211296509, "percentage": 75.95, "elapsed_time": "3:21:39", "remaining_time": "1:03:51", "throughput": 8659.09, "total_tokens": 104767696} +{"current_steps": 155450, "total_steps": 204665, "loss": 0.0, "lr": 3.3209383310228355e-07, "epoch": 3.797669362128356, "percentage": 75.95, "elapsed_time": "3:21:39", "remaining_time": "1:03:50", "throughput": 8659.12, "total_tokens": 104771088} +{"current_steps": 155455, "total_steps": 204665, "loss": 0.0, "lr": 3.3203036806198783e-07, "epoch": 3.7977915129602033, "percentage": 75.96, "elapsed_time": "3:21:39", "remaining_time": "1:03:50", "throughput": 8659.13, "total_tokens": 104774160} +{"current_steps": 155460, "total_steps": 204665, "loss": 0.0, "lr": 3.3196690787934734e-07, "epoch": 3.7979136637920505, "percentage": 75.96, "elapsed_time": "3:21:40", "remaining_time": "1:03:49", "throughput": 8659.15, "total_tokens": 104777424} +{"current_steps": 155465, "total_steps": 204665, "loss": 0.0, "lr": 3.3190345255482276e-07, "epoch": 3.7980358146238977, "percentage": 75.96, "elapsed_time": "3:21:40", "remaining_time": "1:03:49", "throughput": 8659.18, "total_tokens": 104780752} +{"current_steps": 155470, "total_steps": 204665, "loss": 0.0, "lr": 3.318400020888764e-07, "epoch": 3.798157965455745, "percentage": 75.96, "elapsed_time": "3:21:40", "remaining_time": "1:03:49", "throughput": 8659.22, "total_tokens": 104784208} +{"current_steps": 155475, "total_steps": 204665, "loss": 0.0343, "lr": 3.317765564819689e-07, "epoch": 3.798280116287592, "percentage": 75.97, "elapsed_time": "3:21:41", "remaining_time": "1:03:48", "throughput": 8659.25, "total_tokens": 104787728} +{"current_steps": 155480, "total_steps": 204665, "loss": 0.0, "lr": 3.317131157345623e-07, "epoch": 3.798402267119439, "percentage": 75.97, "elapsed_time": "3:21:41", "remaining_time": "1:03:48", "throughput": 8659.29, "total_tokens": 104791248} +{"current_steps": 155485, "total_steps": 204665, "loss": 0.0299, "lr": 3.316496798471173e-07, "epoch": 3.7985244179512865, "percentage": 75.97, "elapsed_time": "3:21:41", "remaining_time": "1:03:47", "throughput": 8659.33, "total_tokens": 104794768} +{"current_steps": 155490, "total_steps": 204665, "loss": 0.0, "lr": 3.3158624882009567e-07, "epoch": 3.7986465687831332, "percentage": 75.97, "elapsed_time": "3:21:42", "remaining_time": "1:03:47", "throughput": 8659.37, "total_tokens": 104798224} +{"current_steps": 155495, "total_steps": 204665, "loss": 0.0, "lr": 3.3152282265395895e-07, "epoch": 3.7987687196149804, "percentage": 75.98, "elapsed_time": "3:21:42", "remaining_time": "1:03:47", "throughput": 8659.42, "total_tokens": 104802000} +{"current_steps": 155500, "total_steps": 204665, "loss": 0.0, "lr": 3.314594013491681e-07, "epoch": 3.7988908704468276, "percentage": 75.98, "elapsed_time": "3:21:43", "remaining_time": "1:03:46", "throughput": 8659.43, "total_tokens": 104805136} +{"current_steps": 155505, "total_steps": 204665, "loss": 0.0006, "lr": 3.313959849061838e-07, "epoch": 3.799013021278675, "percentage": 75.98, "elapsed_time": "3:21:43", "remaining_time": "1:03:46", "throughput": 8659.45, "total_tokens": 104808336} +{"current_steps": 155510, "total_steps": 204665, "loss": 0.0, "lr": 3.313325733254682e-07, "epoch": 3.799135172110522, "percentage": 75.98, "elapsed_time": "3:21:43", "remaining_time": "1:03:45", "throughput": 8659.47, "total_tokens": 104811472} +{"current_steps": 155515, "total_steps": 204665, "loss": 0.0, "lr": 3.3126916660748194e-07, "epoch": 3.799257322942369, "percentage": 75.99, "elapsed_time": "3:21:44", "remaining_time": "1:03:45", "throughput": 8659.5, "total_tokens": 104814928} +{"current_steps": 155520, "total_steps": 204665, "loss": 0.0, "lr": 3.312057647526858e-07, "epoch": 3.7993794737742164, "percentage": 75.99, "elapsed_time": "3:21:44", "remaining_time": "1:03:45", "throughput": 8659.51, "total_tokens": 104818000} +{"current_steps": 155525, "total_steps": 204665, "loss": 0.0, "lr": 3.311423677615414e-07, "epoch": 3.7995016246060636, "percentage": 75.99, "elapsed_time": "3:21:44", "remaining_time": "1:03:44", "throughput": 8659.52, "total_tokens": 104821072} +{"current_steps": 155530, "total_steps": 204665, "loss": 0.0444, "lr": 3.310789756345097e-07, "epoch": 3.7996237754379107, "percentage": 75.99, "elapsed_time": "3:21:45", "remaining_time": "1:03:44", "throughput": 8659.54, "total_tokens": 104824336} +{"current_steps": 155535, "total_steps": 204665, "loss": 0.0, "lr": 3.310155883720513e-07, "epoch": 3.799745926269758, "percentage": 75.99, "elapsed_time": "3:21:45", "remaining_time": "1:03:43", "throughput": 8659.56, "total_tokens": 104827472} +{"current_steps": 155540, "total_steps": 204665, "loss": 0.0, "lr": 3.309522059746279e-07, "epoch": 3.799868077101605, "percentage": 76.0, "elapsed_time": "3:21:45", "remaining_time": "1:03:43", "throughput": 8659.57, "total_tokens": 104830672} +{"current_steps": 155545, "total_steps": 204665, "loss": 0.0, "lr": 3.308888284426997e-07, "epoch": 3.7999902279334523, "percentage": 76.0, "elapsed_time": "3:21:46", "remaining_time": "1:03:43", "throughput": 8659.61, "total_tokens": 104834128} +{"current_steps": 155550, "total_steps": 204665, "loss": 0.0, "lr": 3.308254557767279e-07, "epoch": 3.8001123787652995, "percentage": 76.0, "elapsed_time": "3:21:46", "remaining_time": "1:03:42", "throughput": 8659.64, "total_tokens": 104837584} +{"current_steps": 155555, "total_steps": 204665, "loss": 0.0, "lr": 3.3076208797717366e-07, "epoch": 3.8002345295971467, "percentage": 76.0, "elapsed_time": "3:21:46", "remaining_time": "1:03:42", "throughput": 8659.69, "total_tokens": 104841232} +{"current_steps": 155560, "total_steps": 204665, "loss": 0.0213, "lr": 3.3069872504449723e-07, "epoch": 3.800356680428994, "percentage": 76.01, "elapsed_time": "3:21:47", "remaining_time": "1:03:41", "throughput": 8659.73, "total_tokens": 104844752} +{"current_steps": 155565, "total_steps": 204665, "loss": 0.0, "lr": 3.3063536697915995e-07, "epoch": 3.8004788312608406, "percentage": 76.01, "elapsed_time": "3:21:47", "remaining_time": "1:03:41", "throughput": 8659.76, "total_tokens": 104848208} +{"current_steps": 155570, "total_steps": 204665, "loss": 0.0, "lr": 3.30572013781622e-07, "epoch": 3.8006009820926883, "percentage": 76.01, "elapsed_time": "3:21:47", "remaining_time": "1:03:41", "throughput": 8659.81, "total_tokens": 104851792} +{"current_steps": 155575, "total_steps": 204665, "loss": 0.0001, "lr": 3.305086654523449e-07, "epoch": 3.800723132924535, "percentage": 76.01, "elapsed_time": "3:21:48", "remaining_time": "1:03:40", "throughput": 8659.83, "total_tokens": 104855120} +{"current_steps": 155580, "total_steps": 204665, "loss": 0.1219, "lr": 3.304453219917883e-07, "epoch": 3.8008452837563826, "percentage": 76.02, "elapsed_time": "3:21:48", "remaining_time": "1:03:40", "throughput": 8659.84, "total_tokens": 104858192} +{"current_steps": 155585, "total_steps": 204665, "loss": 0.0293, "lr": 3.3038198340041356e-07, "epoch": 3.8009674345882294, "percentage": 76.02, "elapsed_time": "3:21:48", "remaining_time": "1:03:39", "throughput": 8659.92, "total_tokens": 104862352} +{"current_steps": 155590, "total_steps": 204665, "loss": 0.0, "lr": 3.3031864967868153e-07, "epoch": 3.8010895854200766, "percentage": 76.02, "elapsed_time": "3:21:49", "remaining_time": "1:03:39", "throughput": 8659.92, "total_tokens": 104865168} +{"current_steps": 155595, "total_steps": 204665, "loss": 0.016, "lr": 3.302553208270519e-07, "epoch": 3.8012117362519238, "percentage": 76.02, "elapsed_time": "3:21:49", "remaining_time": "1:03:39", "throughput": 8659.98, "total_tokens": 104869008} +{"current_steps": 155600, "total_steps": 204665, "loss": 0.0, "lr": 3.301919968459861e-07, "epoch": 3.801333887083771, "percentage": 76.03, "elapsed_time": "3:21:49", "remaining_time": "1:03:38", "throughput": 8659.99, "total_tokens": 104872144} +{"current_steps": 155605, "total_steps": 204665, "loss": 0.0, "lr": 3.3012867773594434e-07, "epoch": 3.801456037915618, "percentage": 76.03, "elapsed_time": "3:21:50", "remaining_time": "1:03:38", "throughput": 8660.0, "total_tokens": 104875280} +{"current_steps": 155610, "total_steps": 204665, "loss": 0.0, "lr": 3.3006536349738654e-07, "epoch": 3.8015781887474653, "percentage": 76.03, "elapsed_time": "3:21:50", "remaining_time": "1:03:37", "throughput": 8660.05, "total_tokens": 104878864} +{"current_steps": 155615, "total_steps": 204665, "loss": 0.0001, "lr": 3.300020541307741e-07, "epoch": 3.8017003395793125, "percentage": 76.03, "elapsed_time": "3:21:50", "remaining_time": "1:03:37", "throughput": 8660.04, "total_tokens": 104881680} +{"current_steps": 155620, "total_steps": 204665, "loss": 0.0001, "lr": 3.2993874963656645e-07, "epoch": 3.8018224904111597, "percentage": 76.04, "elapsed_time": "3:21:51", "remaining_time": "1:03:36", "throughput": 8660.03, "total_tokens": 104884496} +{"current_steps": 155625, "total_steps": 204665, "loss": 0.0001, "lr": 3.298754500152249e-07, "epoch": 3.801944641243007, "percentage": 76.04, "elapsed_time": "3:21:51", "remaining_time": "1:03:36", "throughput": 8660.07, "total_tokens": 104888016} +{"current_steps": 155630, "total_steps": 204665, "loss": 0.0, "lr": 3.298121552672088e-07, "epoch": 3.802066792074854, "percentage": 76.04, "elapsed_time": "3:21:52", "remaining_time": "1:03:36", "throughput": 8660.09, "total_tokens": 104891216} +{"current_steps": 155635, "total_steps": 204665, "loss": 0.0, "lr": 3.297488653929794e-07, "epoch": 3.8021889429067013, "percentage": 76.04, "elapsed_time": "3:21:52", "remaining_time": "1:03:35", "throughput": 8660.16, "total_tokens": 104895184} +{"current_steps": 155640, "total_steps": 204665, "loss": 0.0, "lr": 3.2968558039299633e-07, "epoch": 3.8023110937385485, "percentage": 76.05, "elapsed_time": "3:21:52", "remaining_time": "1:03:35", "throughput": 8660.23, "total_tokens": 104899152} +{"current_steps": 155645, "total_steps": 204665, "loss": 0.0869, "lr": 3.296223002677199e-07, "epoch": 3.8024332445703957, "percentage": 76.05, "elapsed_time": "3:21:53", "remaining_time": "1:03:34", "throughput": 8660.25, "total_tokens": 104902416} +{"current_steps": 155650, "total_steps": 204665, "loss": 0.0001, "lr": 3.2955902501761067e-07, "epoch": 3.802555395402243, "percentage": 76.05, "elapsed_time": "3:21:53", "remaining_time": "1:03:34", "throughput": 8660.28, "total_tokens": 104905808} +{"current_steps": 155655, "total_steps": 204665, "loss": 0.0, "lr": 3.294957546431283e-07, "epoch": 3.80267754623409, "percentage": 76.05, "elapsed_time": "3:21:53", "remaining_time": "1:03:34", "throughput": 8660.28, "total_tokens": 104908752} +{"current_steps": 155660, "total_steps": 204665, "loss": 0.0559, "lr": 3.294324891447334e-07, "epoch": 3.802799697065937, "percentage": 76.06, "elapsed_time": "3:21:54", "remaining_time": "1:03:33", "throughput": 8660.31, "total_tokens": 104912208} +{"current_steps": 155665, "total_steps": 204665, "loss": 0.0, "lr": 3.293692285228855e-07, "epoch": 3.8029218478977844, "percentage": 76.06, "elapsed_time": "3:21:54", "remaining_time": "1:03:33", "throughput": 8660.37, "total_tokens": 104915920} +{"current_steps": 155670, "total_steps": 204665, "loss": 0.0, "lr": 3.2930597277804537e-07, "epoch": 3.803043998729631, "percentage": 76.06, "elapsed_time": "3:21:54", "remaining_time": "1:03:32", "throughput": 8660.38, "total_tokens": 104919056} +{"current_steps": 155675, "total_steps": 204665, "loss": 0.0, "lr": 3.2924272191067215e-07, "epoch": 3.8031661495614784, "percentage": 76.06, "elapsed_time": "3:21:55", "remaining_time": "1:03:32", "throughput": 8660.41, "total_tokens": 104922384} +{"current_steps": 155680, "total_steps": 204665, "loss": 0.0336, "lr": 3.291794759212263e-07, "epoch": 3.8032883003933255, "percentage": 76.07, "elapsed_time": "3:21:55", "remaining_time": "1:03:32", "throughput": 8660.44, "total_tokens": 104925840} +{"current_steps": 155685, "total_steps": 204665, "loss": 0.0606, "lr": 3.2911623481016814e-07, "epoch": 3.8034104512251727, "percentage": 76.07, "elapsed_time": "3:21:55", "remaining_time": "1:03:31", "throughput": 8660.47, "total_tokens": 104929168} +{"current_steps": 155690, "total_steps": 204665, "loss": 0.0, "lr": 3.2905299857795675e-07, "epoch": 3.80353260205702, "percentage": 76.07, "elapsed_time": "3:21:56", "remaining_time": "1:03:31", "throughput": 8660.51, "total_tokens": 104932752} +{"current_steps": 155695, "total_steps": 204665, "loss": 0.0332, "lr": 3.289897672250528e-07, "epoch": 3.803654752888867, "percentage": 76.07, "elapsed_time": "3:21:56", "remaining_time": "1:03:30", "throughput": 8660.52, "total_tokens": 104935888} +{"current_steps": 155700, "total_steps": 204665, "loss": 0.074, "lr": 3.289265407519154e-07, "epoch": 3.8037769037207143, "percentage": 76.08, "elapsed_time": "3:21:56", "remaining_time": "1:03:30", "throughput": 8660.55, "total_tokens": 104939216} +{"current_steps": 155705, "total_steps": 204665, "loss": 0.0001, "lr": 3.28863319159005e-07, "epoch": 3.8038990545525615, "percentage": 76.08, "elapsed_time": "3:21:57", "remaining_time": "1:03:30", "throughput": 8660.59, "total_tokens": 104942800} +{"current_steps": 155710, "total_steps": 204665, "loss": 0.0, "lr": 3.28800102446781e-07, "epoch": 3.8040212053844087, "percentage": 76.08, "elapsed_time": "3:21:57", "remaining_time": "1:03:29", "throughput": 8660.61, "total_tokens": 104946064} +{"current_steps": 155715, "total_steps": 204665, "loss": 0.0, "lr": 3.2873689061570297e-07, "epoch": 3.804143356216256, "percentage": 76.08, "elapsed_time": "3:21:57", "remaining_time": "1:03:29", "throughput": 8660.66, "total_tokens": 104949776} +{"current_steps": 155720, "total_steps": 204665, "loss": 0.0064, "lr": 3.286736836662311e-07, "epoch": 3.804265507048103, "percentage": 76.09, "elapsed_time": "3:21:58", "remaining_time": "1:03:28", "throughput": 8660.69, "total_tokens": 104953104} +{"current_steps": 155725, "total_steps": 204665, "loss": 0.0001, "lr": 3.286104815988244e-07, "epoch": 3.8043876578799503, "percentage": 76.09, "elapsed_time": "3:21:58", "remaining_time": "1:03:28", "throughput": 8660.71, "total_tokens": 104956240} +{"current_steps": 155730, "total_steps": 204665, "loss": 0.0001, "lr": 3.285472844139432e-07, "epoch": 3.8045098087117974, "percentage": 76.09, "elapsed_time": "3:21:59", "remaining_time": "1:03:28", "throughput": 8660.71, "total_tokens": 104959248} +{"current_steps": 155735, "total_steps": 204665, "loss": 0.0, "lr": 3.2848409211204653e-07, "epoch": 3.8046319595436446, "percentage": 76.09, "elapsed_time": "3:21:59", "remaining_time": "1:03:27", "throughput": 8660.72, "total_tokens": 104962256} +{"current_steps": 155740, "total_steps": 204665, "loss": 0.0001, "lr": 3.2842090469359406e-07, "epoch": 3.804754110375492, "percentage": 76.1, "elapsed_time": "3:21:59", "remaining_time": "1:03:27", "throughput": 8660.73, "total_tokens": 104965328} +{"current_steps": 155745, "total_steps": 204665, "loss": 0.0002, "lr": 3.283577221590457e-07, "epoch": 3.8048762612073386, "percentage": 76.1, "elapsed_time": "3:22:00", "remaining_time": "1:03:26", "throughput": 8660.75, "total_tokens": 104968592} +{"current_steps": 155750, "total_steps": 204665, "loss": 0.0, "lr": 3.282945445088604e-07, "epoch": 3.804998412039186, "percentage": 76.1, "elapsed_time": "3:22:00", "remaining_time": "1:03:26", "throughput": 8660.81, "total_tokens": 104972368} +{"current_steps": 155755, "total_steps": 204665, "loss": 0.0, "lr": 3.28231371743498e-07, "epoch": 3.805120562871033, "percentage": 76.1, "elapsed_time": "3:22:00", "remaining_time": "1:03:26", "throughput": 8660.84, "total_tokens": 104975696} +{"current_steps": 155760, "total_steps": 204665, "loss": 0.0002, "lr": 3.281682038634176e-07, "epoch": 3.8052427137028806, "percentage": 76.1, "elapsed_time": "3:22:01", "remaining_time": "1:03:25", "throughput": 8660.91, "total_tokens": 104979792} +{"current_steps": 155765, "total_steps": 204665, "loss": 0.0, "lr": 3.28105040869079e-07, "epoch": 3.8053648645347273, "percentage": 76.11, "elapsed_time": "3:22:01", "remaining_time": "1:03:25", "throughput": 8660.96, "total_tokens": 104983376} +{"current_steps": 155770, "total_steps": 204665, "loss": 0.0415, "lr": 3.280418827609409e-07, "epoch": 3.8054870153665745, "percentage": 76.11, "elapsed_time": "3:22:01", "remaining_time": "1:03:24", "throughput": 8660.99, "total_tokens": 104986768} +{"current_steps": 155775, "total_steps": 204665, "loss": 0.0, "lr": 3.2797872953946305e-07, "epoch": 3.8056091661984217, "percentage": 76.11, "elapsed_time": "3:22:02", "remaining_time": "1:03:24", "throughput": 8661.04, "total_tokens": 104990416} +{"current_steps": 155780, "total_steps": 204665, "loss": 0.0, "lr": 3.279155812051049e-07, "epoch": 3.805731317030269, "percentage": 76.11, "elapsed_time": "3:22:02", "remaining_time": "1:03:24", "throughput": 8661.09, "total_tokens": 104994064} +{"current_steps": 155785, "total_steps": 204665, "loss": 0.0001, "lr": 3.2785243775832505e-07, "epoch": 3.805853467862116, "percentage": 76.12, "elapsed_time": "3:22:02", "remaining_time": "1:03:23", "throughput": 8661.12, "total_tokens": 104997520} +{"current_steps": 155790, "total_steps": 204665, "loss": 0.0, "lr": 3.277892991995834e-07, "epoch": 3.8059756186939633, "percentage": 76.12, "elapsed_time": "3:22:03", "remaining_time": "1:03:23", "throughput": 8661.14, "total_tokens": 105000720} +{"current_steps": 155795, "total_steps": 204665, "loss": 0.0, "lr": 3.277261655293384e-07, "epoch": 3.8060977695258105, "percentage": 76.12, "elapsed_time": "3:22:03", "remaining_time": "1:03:22", "throughput": 8661.19, "total_tokens": 105004432} +{"current_steps": 155800, "total_steps": 204665, "loss": 0.0001, "lr": 3.2766303674804964e-07, "epoch": 3.8062199203576577, "percentage": 76.12, "elapsed_time": "3:22:03", "remaining_time": "1:03:22", "throughput": 8661.22, "total_tokens": 105007824} +{"current_steps": 155805, "total_steps": 204665, "loss": 0.0619, "lr": 3.275999128561764e-07, "epoch": 3.806342071189505, "percentage": 76.13, "elapsed_time": "3:22:04", "remaining_time": "1:03:22", "throughput": 8661.23, "total_tokens": 105010832} +{"current_steps": 155810, "total_steps": 204665, "loss": 0.0279, "lr": 3.2753679385417745e-07, "epoch": 3.806464222021352, "percentage": 76.13, "elapsed_time": "3:22:04", "remaining_time": "1:03:21", "throughput": 8661.26, "total_tokens": 105014160} +{"current_steps": 155815, "total_steps": 204665, "loss": 0.0, "lr": 3.274736797425115e-07, "epoch": 3.8065863728531992, "percentage": 76.13, "elapsed_time": "3:22:04", "remaining_time": "1:03:21", "throughput": 8661.31, "total_tokens": 105017808} +{"current_steps": 155820, "total_steps": 204665, "loss": 0.0546, "lr": 3.2741057052163814e-07, "epoch": 3.8067085236850464, "percentage": 76.13, "elapsed_time": "3:22:05", "remaining_time": "1:03:20", "throughput": 8661.35, "total_tokens": 105021392} +{"current_steps": 155825, "total_steps": 204665, "loss": 0.0, "lr": 3.27347466192016e-07, "epoch": 3.8068306745168936, "percentage": 76.14, "elapsed_time": "3:22:05", "remaining_time": "1:03:20", "throughput": 8661.38, "total_tokens": 105024848} +{"current_steps": 155830, "total_steps": 204665, "loss": 0.0, "lr": 3.2728436675410376e-07, "epoch": 3.8069528253487404, "percentage": 76.14, "elapsed_time": "3:22:05", "remaining_time": "1:03:20", "throughput": 8661.39, "total_tokens": 105027856} +{"current_steps": 155835, "total_steps": 204665, "loss": 0.0, "lr": 3.2722127220836047e-07, "epoch": 3.807074976180588, "percentage": 76.14, "elapsed_time": "3:22:06", "remaining_time": "1:03:19", "throughput": 8661.41, "total_tokens": 105031120} +{"current_steps": 155840, "total_steps": 204665, "loss": 0.0012, "lr": 3.271581825552454e-07, "epoch": 3.8071971270124347, "percentage": 76.14, "elapsed_time": "3:22:06", "remaining_time": "1:03:19", "throughput": 8661.43, "total_tokens": 105034320} +{"current_steps": 155845, "total_steps": 204665, "loss": 0.0, "lr": 3.270950977952166e-07, "epoch": 3.8073192778442824, "percentage": 76.15, "elapsed_time": "3:22:07", "remaining_time": "1:03:18", "throughput": 8661.44, "total_tokens": 105037328} +{"current_steps": 155850, "total_steps": 204665, "loss": 0.0, "lr": 3.270320179287337e-07, "epoch": 3.807441428676129, "percentage": 76.15, "elapsed_time": "3:22:07", "remaining_time": "1:03:18", "throughput": 8661.46, "total_tokens": 105040592} +{"current_steps": 155855, "total_steps": 204665, "loss": 0.0353, "lr": 3.2696894295625456e-07, "epoch": 3.8075635795079763, "percentage": 76.15, "elapsed_time": "3:22:07", "remaining_time": "1:03:18", "throughput": 8661.48, "total_tokens": 105043856} +{"current_steps": 155860, "total_steps": 204665, "loss": 0.0005, "lr": 3.2690587287823824e-07, "epoch": 3.8076857303398235, "percentage": 76.15, "elapsed_time": "3:22:08", "remaining_time": "1:03:17", "throughput": 8661.52, "total_tokens": 105047376} +{"current_steps": 155865, "total_steps": 204665, "loss": 0.0003, "lr": 3.2684280769514384e-07, "epoch": 3.8078078811716707, "percentage": 76.16, "elapsed_time": "3:22:08", "remaining_time": "1:03:17", "throughput": 8661.56, "total_tokens": 105050832} +{"current_steps": 155870, "total_steps": 204665, "loss": 0.0467, "lr": 3.267797474074293e-07, "epoch": 3.807930032003518, "percentage": 76.16, "elapsed_time": "3:22:08", "remaining_time": "1:03:16", "throughput": 8661.58, "total_tokens": 105054032} +{"current_steps": 155875, "total_steps": 204665, "loss": 0.0, "lr": 3.267166920155537e-07, "epoch": 3.808052182835365, "percentage": 76.16, "elapsed_time": "3:22:09", "remaining_time": "1:03:16", "throughput": 8661.62, "total_tokens": 105057616} +{"current_steps": 155880, "total_steps": 204665, "loss": 0.0264, "lr": 3.266536415199753e-07, "epoch": 3.8081743336672123, "percentage": 76.16, "elapsed_time": "3:22:09", "remaining_time": "1:03:16", "throughput": 8661.64, "total_tokens": 105060880} +{"current_steps": 155885, "total_steps": 204665, "loss": 0.0001, "lr": 3.265905959211529e-07, "epoch": 3.8082964844990594, "percentage": 76.17, "elapsed_time": "3:22:09", "remaining_time": "1:03:15", "throughput": 8661.67, "total_tokens": 105064208} +{"current_steps": 155890, "total_steps": 204665, "loss": 0.1409, "lr": 3.2652755521954456e-07, "epoch": 3.8084186353309066, "percentage": 76.17, "elapsed_time": "3:22:10", "remaining_time": "1:03:15", "throughput": 8661.68, "total_tokens": 105067280} +{"current_steps": 155895, "total_steps": 204665, "loss": 0.0, "lr": 3.2646451941560895e-07, "epoch": 3.808540786162754, "percentage": 76.17, "elapsed_time": "3:22:10", "remaining_time": "1:03:14", "throughput": 8661.73, "total_tokens": 105070864} +{"current_steps": 155900, "total_steps": 204665, "loss": 0.0001, "lr": 3.264014885098049e-07, "epoch": 3.808662936994601, "percentage": 76.17, "elapsed_time": "3:22:10", "remaining_time": "1:03:14", "throughput": 8661.76, "total_tokens": 105074320} +{"current_steps": 155905, "total_steps": 204665, "loss": 0.0, "lr": 3.2633846250259e-07, "epoch": 3.808785087826448, "percentage": 76.18, "elapsed_time": "3:22:11", "remaining_time": "1:03:14", "throughput": 8661.78, "total_tokens": 105077584} +{"current_steps": 155910, "total_steps": 204665, "loss": 0.0, "lr": 3.262754413944233e-07, "epoch": 3.8089072386582954, "percentage": 76.18, "elapsed_time": "3:22:11", "remaining_time": "1:03:13", "throughput": 8661.8, "total_tokens": 105080848} +{"current_steps": 155915, "total_steps": 204665, "loss": 0.0, "lr": 3.2621242518576286e-07, "epoch": 3.8090293894901426, "percentage": 76.18, "elapsed_time": "3:22:11", "remaining_time": "1:03:13", "throughput": 8661.83, "total_tokens": 105084176} +{"current_steps": 155920, "total_steps": 204665, "loss": 0.0, "lr": 3.261494138770665e-07, "epoch": 3.8091515403219898, "percentage": 76.18, "elapsed_time": "3:22:12", "remaining_time": "1:03:12", "throughput": 8661.85, "total_tokens": 105087376} +{"current_steps": 155925, "total_steps": 204665, "loss": 0.0002, "lr": 3.260864074687932e-07, "epoch": 3.8092736911538365, "percentage": 76.19, "elapsed_time": "3:22:12", "remaining_time": "1:03:12", "throughput": 8661.87, "total_tokens": 105090640} +{"current_steps": 155930, "total_steps": 204665, "loss": 0.0, "lr": 3.260234059614005e-07, "epoch": 3.809395841985684, "percentage": 76.19, "elapsed_time": "3:22:12", "remaining_time": "1:03:12", "throughput": 8661.92, "total_tokens": 105094288} +{"current_steps": 155935, "total_steps": 204665, "loss": 0.06, "lr": 3.259604093553472e-07, "epoch": 3.809517992817531, "percentage": 76.19, "elapsed_time": "3:22:13", "remaining_time": "1:03:11", "throughput": 8661.93, "total_tokens": 105097360} +{"current_steps": 155940, "total_steps": 204665, "loss": 0.0, "lr": 3.258974176510908e-07, "epoch": 3.8096401436493785, "percentage": 76.19, "elapsed_time": "3:22:13", "remaining_time": "1:03:11", "throughput": 8662.0, "total_tokens": 105101392} +{"current_steps": 155945, "total_steps": 204665, "loss": 0.0563, "lr": 3.258344308490899e-07, "epoch": 3.8097622944812253, "percentage": 76.2, "elapsed_time": "3:22:13", "remaining_time": "1:03:10", "throughput": 8662.03, "total_tokens": 105104848} +{"current_steps": 155950, "total_steps": 204665, "loss": 0.0378, "lr": 3.2577144894980213e-07, "epoch": 3.8098844453130725, "percentage": 76.2, "elapsed_time": "3:22:14", "remaining_time": "1:03:10", "throughput": 8662.05, "total_tokens": 105108048} +{"current_steps": 155955, "total_steps": 204665, "loss": 0.0, "lr": 3.2570847195368565e-07, "epoch": 3.8100065961449197, "percentage": 76.2, "elapsed_time": "3:22:14", "remaining_time": "1:03:10", "throughput": 8662.05, "total_tokens": 105110928} +{"current_steps": 155960, "total_steps": 204665, "loss": 0.0, "lr": 3.256454998611989e-07, "epoch": 3.810128746976767, "percentage": 76.2, "elapsed_time": "3:22:15", "remaining_time": "1:03:09", "throughput": 8662.1, "total_tokens": 105114640} +{"current_steps": 155965, "total_steps": 204665, "loss": 0.0614, "lr": 3.2558253267279923e-07, "epoch": 3.810250897808614, "percentage": 76.21, "elapsed_time": "3:22:15", "remaining_time": "1:03:09", "throughput": 8662.14, "total_tokens": 105118224} +{"current_steps": 155970, "total_steps": 204665, "loss": 0.0002, "lr": 3.25519570388945e-07, "epoch": 3.8103730486404612, "percentage": 76.21, "elapsed_time": "3:22:15", "remaining_time": "1:03:08", "throughput": 8662.16, "total_tokens": 105121424} +{"current_steps": 155975, "total_steps": 204665, "loss": 0.0, "lr": 3.254566130100935e-07, "epoch": 3.8104951994723084, "percentage": 76.21, "elapsed_time": "3:22:16", "remaining_time": "1:03:08", "throughput": 8662.21, "total_tokens": 105125200} +{"current_steps": 155980, "total_steps": 204665, "loss": 0.0, "lr": 3.253936605367034e-07, "epoch": 3.8106173503041556, "percentage": 76.21, "elapsed_time": "3:22:16", "remaining_time": "1:03:08", "throughput": 8662.22, "total_tokens": 105128272} +{"current_steps": 155985, "total_steps": 204665, "loss": 0.0003, "lr": 3.2533071296923154e-07, "epoch": 3.810739501136003, "percentage": 76.21, "elapsed_time": "3:22:16", "remaining_time": "1:03:07", "throughput": 8662.24, "total_tokens": 105131408} +{"current_steps": 155990, "total_steps": 204665, "loss": 0.0, "lr": 3.2526777030813636e-07, "epoch": 3.81086165196785, "percentage": 76.22, "elapsed_time": "3:22:17", "remaining_time": "1:03:07", "throughput": 8662.26, "total_tokens": 105134672} +{"current_steps": 155995, "total_steps": 204665, "loss": 0.0569, "lr": 3.2520483255387567e-07, "epoch": 3.810983802799697, "percentage": 76.22, "elapsed_time": "3:22:17", "remaining_time": "1:03:06", "throughput": 8662.28, "total_tokens": 105138000} +{"current_steps": 156000, "total_steps": 204665, "loss": 0.0, "lr": 3.2514189970690666e-07, "epoch": 3.8111059536315444, "percentage": 76.22, "elapsed_time": "3:22:17", "remaining_time": "1:03:06", "throughput": 8662.32, "total_tokens": 105141456} +{"current_steps": 156005, "total_steps": 204665, "loss": 0.0, "lr": 3.2507897176768753e-07, "epoch": 3.8112281044633916, "percentage": 76.22, "elapsed_time": "3:22:18", "remaining_time": "1:03:06", "throughput": 8662.35, "total_tokens": 105144912} +{"current_steps": 156010, "total_steps": 204665, "loss": 0.0001, "lr": 3.250160487366753e-07, "epoch": 3.8113502552952383, "percentage": 76.23, "elapsed_time": "3:22:18", "remaining_time": "1:03:05", "throughput": 8662.38, "total_tokens": 105148240} +{"current_steps": 156015, "total_steps": 204665, "loss": 0.0, "lr": 3.2495313061432836e-07, "epoch": 3.811472406127086, "percentage": 76.23, "elapsed_time": "3:22:18", "remaining_time": "1:03:05", "throughput": 8662.38, "total_tokens": 105151184} +{"current_steps": 156020, "total_steps": 204665, "loss": 0.0, "lr": 3.2489021740110344e-07, "epoch": 3.8115945569589327, "percentage": 76.23, "elapsed_time": "3:22:19", "remaining_time": "1:03:04", "throughput": 8662.42, "total_tokens": 105154640} +{"current_steps": 156025, "total_steps": 204665, "loss": 0.0, "lr": 3.2482730909745883e-07, "epoch": 3.8117167077907803, "percentage": 76.23, "elapsed_time": "3:22:19", "remaining_time": "1:03:04", "throughput": 8662.44, "total_tokens": 105157968} +{"current_steps": 156030, "total_steps": 204665, "loss": 0.0001, "lr": 3.2476440570385155e-07, "epoch": 3.811838858622627, "percentage": 76.24, "elapsed_time": "3:22:19", "remaining_time": "1:03:04", "throughput": 8662.46, "total_tokens": 105161168} +{"current_steps": 156035, "total_steps": 204665, "loss": 0.0007, "lr": 3.2470150722073875e-07, "epoch": 3.8119610094544742, "percentage": 76.24, "elapsed_time": "3:22:20", "remaining_time": "1:03:03", "throughput": 8662.48, "total_tokens": 105164432} +{"current_steps": 156040, "total_steps": 204665, "loss": 0.0, "lr": 3.246386136485786e-07, "epoch": 3.8120831602863214, "percentage": 76.24, "elapsed_time": "3:22:20", "remaining_time": "1:03:03", "throughput": 8662.52, "total_tokens": 105167888} +{"current_steps": 156045, "total_steps": 204665, "loss": 0.0, "lr": 3.2457572498782783e-07, "epoch": 3.8122053111181686, "percentage": 76.24, "elapsed_time": "3:22:20", "remaining_time": "1:03:02", "throughput": 8662.56, "total_tokens": 105171472} +{"current_steps": 156050, "total_steps": 204665, "loss": 0.0, "lr": 3.2451284123894394e-07, "epoch": 3.812327461950016, "percentage": 76.25, "elapsed_time": "3:22:21", "remaining_time": "1:03:02", "throughput": 8662.59, "total_tokens": 105174800} +{"current_steps": 156055, "total_steps": 204665, "loss": 0.0, "lr": 3.2444996240238474e-07, "epoch": 3.812449612781863, "percentage": 76.25, "elapsed_time": "3:22:21", "remaining_time": "1:03:02", "throughput": 8662.62, "total_tokens": 105178128} +{"current_steps": 156060, "total_steps": 204665, "loss": 0.0001, "lr": 3.2438708847860684e-07, "epoch": 3.81257176361371, "percentage": 76.25, "elapsed_time": "3:22:21", "remaining_time": "1:03:01", "throughput": 8662.67, "total_tokens": 105181904} +{"current_steps": 156065, "total_steps": 204665, "loss": 0.0879, "lr": 3.243242194680681e-07, "epoch": 3.8126939144455574, "percentage": 76.25, "elapsed_time": "3:22:22", "remaining_time": "1:03:01", "throughput": 8662.73, "total_tokens": 105185744} +{"current_steps": 156070, "total_steps": 204665, "loss": 0.0, "lr": 3.242613553712249e-07, "epoch": 3.8128160652774046, "percentage": 76.26, "elapsed_time": "3:22:22", "remaining_time": "1:03:00", "throughput": 8662.78, "total_tokens": 105189392} +{"current_steps": 156075, "total_steps": 204665, "loss": 0.0671, "lr": 3.2419849618853535e-07, "epoch": 3.8129382161092518, "percentage": 76.26, "elapsed_time": "3:22:23", "remaining_time": "1:03:00", "throughput": 8662.8, "total_tokens": 105192720} +{"current_steps": 156080, "total_steps": 204665, "loss": 0.0, "lr": 3.2413564192045573e-07, "epoch": 3.813060366941099, "percentage": 76.26, "elapsed_time": "3:22:23", "remaining_time": "1:03:00", "throughput": 8662.81, "total_tokens": 105195792} +{"current_steps": 156085, "total_steps": 204665, "loss": 0.0302, "lr": 3.2407279256744344e-07, "epoch": 3.813182517772946, "percentage": 76.26, "elapsed_time": "3:22:23", "remaining_time": "1:02:59", "throughput": 8662.82, "total_tokens": 105198864} +{"current_steps": 156090, "total_steps": 204665, "loss": 0.0001, "lr": 3.2400994812995595e-07, "epoch": 3.8133046686047933, "percentage": 76.27, "elapsed_time": "3:22:24", "remaining_time": "1:02:59", "throughput": 8662.86, "total_tokens": 105202448} +{"current_steps": 156095, "total_steps": 204665, "loss": 0.0, "lr": 3.2394710860844963e-07, "epoch": 3.8134268194366405, "percentage": 76.27, "elapsed_time": "3:22:24", "remaining_time": "1:02:58", "throughput": 8662.89, "total_tokens": 105205840} +{"current_steps": 156100, "total_steps": 204665, "loss": 0.0173, "lr": 3.2388427400338215e-07, "epoch": 3.8135489702684877, "percentage": 76.27, "elapsed_time": "3:22:24", "remaining_time": "1:02:58", "throughput": 8662.93, "total_tokens": 105209360} +{"current_steps": 156105, "total_steps": 204665, "loss": 0.0, "lr": 3.2382144431520975e-07, "epoch": 3.8136711211003345, "percentage": 76.27, "elapsed_time": "3:22:25", "remaining_time": "1:02:58", "throughput": 8662.98, "total_tokens": 105213008} +{"current_steps": 156110, "total_steps": 204665, "loss": 0.0359, "lr": 3.2375861954438967e-07, "epoch": 3.813793271932182, "percentage": 76.28, "elapsed_time": "3:22:25", "remaining_time": "1:02:57", "throughput": 8663.02, "total_tokens": 105216592} +{"current_steps": 156115, "total_steps": 204665, "loss": 0.0, "lr": 3.236957996913791e-07, "epoch": 3.813915422764029, "percentage": 76.28, "elapsed_time": "3:22:25", "remaining_time": "1:02:57", "throughput": 8663.06, "total_tokens": 105220176} +{"current_steps": 156120, "total_steps": 204665, "loss": 0.0003, "lr": 3.236329847566346e-07, "epoch": 3.814037573595876, "percentage": 76.28, "elapsed_time": "3:22:26", "remaining_time": "1:02:56", "throughput": 8663.11, "total_tokens": 105223824} +{"current_steps": 156125, "total_steps": 204665, "loss": 0.0, "lr": 3.2357017474061255e-07, "epoch": 3.814159724427723, "percentage": 76.28, "elapsed_time": "3:22:26", "remaining_time": "1:02:56", "throughput": 8663.13, "total_tokens": 105227024} +{"current_steps": 156130, "total_steps": 204665, "loss": 0.0001, "lr": 3.2350736964377045e-07, "epoch": 3.8142818752595704, "percentage": 76.29, "elapsed_time": "3:22:26", "remaining_time": "1:02:56", "throughput": 8663.14, "total_tokens": 105230224} +{"current_steps": 156135, "total_steps": 204665, "loss": 0.0513, "lr": 3.2344456946656485e-07, "epoch": 3.8144040260914176, "percentage": 76.29, "elapsed_time": "3:22:27", "remaining_time": "1:02:55", "throughput": 8663.18, "total_tokens": 105233616} +{"current_steps": 156140, "total_steps": 204665, "loss": 0.0, "lr": 3.233817742094519e-07, "epoch": 3.814526176923265, "percentage": 76.29, "elapsed_time": "3:22:27", "remaining_time": "1:02:55", "throughput": 8663.2, "total_tokens": 105236880} +{"current_steps": 156145, "total_steps": 204665, "loss": 0.0001, "lr": 3.233189838728887e-07, "epoch": 3.814648327755112, "percentage": 76.29, "elapsed_time": "3:22:27", "remaining_time": "1:02:54", "throughput": 8663.21, "total_tokens": 105239952} +{"current_steps": 156150, "total_steps": 204665, "loss": 0.0, "lr": 3.232561984573321e-07, "epoch": 3.814770478586959, "percentage": 76.3, "elapsed_time": "3:22:28", "remaining_time": "1:02:54", "throughput": 8663.23, "total_tokens": 105243216} +{"current_steps": 156155, "total_steps": 204665, "loss": 0.0002, "lr": 3.2319341796323817e-07, "epoch": 3.8148926294188064, "percentage": 76.3, "elapsed_time": "3:22:28", "remaining_time": "1:02:54", "throughput": 8663.25, "total_tokens": 105246416} +{"current_steps": 156160, "total_steps": 204665, "loss": 0.0, "lr": 3.231306423910641e-07, "epoch": 3.8150147802506535, "percentage": 76.3, "elapsed_time": "3:22:28", "remaining_time": "1:02:53", "throughput": 8663.27, "total_tokens": 105249680} +{"current_steps": 156165, "total_steps": 204665, "loss": 0.0314, "lr": 3.230678717412657e-07, "epoch": 3.8151369310825007, "percentage": 76.3, "elapsed_time": "3:22:29", "remaining_time": "1:02:53", "throughput": 8663.3, "total_tokens": 105253008} +{"current_steps": 156170, "total_steps": 204665, "loss": 0.0001, "lr": 3.230051060142999e-07, "epoch": 3.815259081914348, "percentage": 76.31, "elapsed_time": "3:22:29", "remaining_time": "1:02:52", "throughput": 8663.29, "total_tokens": 105255888} +{"current_steps": 156175, "total_steps": 204665, "loss": 0.0001, "lr": 3.2294234521062325e-07, "epoch": 3.815381232746195, "percentage": 76.31, "elapsed_time": "3:22:29", "remaining_time": "1:02:52", "throughput": 8663.33, "total_tokens": 105259408} +{"current_steps": 156180, "total_steps": 204665, "loss": 0.0, "lr": 3.228795893306917e-07, "epoch": 3.8155033835780423, "percentage": 76.31, "elapsed_time": "3:22:30", "remaining_time": "1:02:51", "throughput": 8663.33, "total_tokens": 105262352} +{"current_steps": 156185, "total_steps": 204665, "loss": 0.0001, "lr": 3.228168383749622e-07, "epoch": 3.8156255344098895, "percentage": 76.31, "elapsed_time": "3:22:30", "remaining_time": "1:02:51", "throughput": 8663.36, "total_tokens": 105265680} +{"current_steps": 156190, "total_steps": 204665, "loss": 0.0323, "lr": 3.2275409234389053e-07, "epoch": 3.8157476852417362, "percentage": 76.31, "elapsed_time": "3:22:31", "remaining_time": "1:02:51", "throughput": 8663.38, "total_tokens": 105269008} +{"current_steps": 156195, "total_steps": 204665, "loss": 0.0, "lr": 3.226913512379336e-07, "epoch": 3.815869836073584, "percentage": 76.32, "elapsed_time": "3:22:31", "remaining_time": "1:02:50", "throughput": 8663.39, "total_tokens": 105272080} +{"current_steps": 156200, "total_steps": 204665, "loss": 0.0, "lr": 3.226286150575469e-07, "epoch": 3.8159919869054306, "percentage": 76.32, "elapsed_time": "3:22:31", "remaining_time": "1:02:50", "throughput": 8663.41, "total_tokens": 105275216} +{"current_steps": 156205, "total_steps": 204665, "loss": 0.0, "lr": 3.225658838031872e-07, "epoch": 3.8161141377372783, "percentage": 76.32, "elapsed_time": "3:22:32", "remaining_time": "1:02:49", "throughput": 8663.44, "total_tokens": 105278672} +{"current_steps": 156210, "total_steps": 204665, "loss": 0.0, "lr": 3.225031574753109e-07, "epoch": 3.816236288569125, "percentage": 76.32, "elapsed_time": "3:22:32", "remaining_time": "1:02:49", "throughput": 8663.45, "total_tokens": 105281744} +{"current_steps": 156215, "total_steps": 204665, "loss": 0.0, "lr": 3.2244043607437353e-07, "epoch": 3.816358439400972, "percentage": 76.33, "elapsed_time": "3:22:32", "remaining_time": "1:02:49", "throughput": 8663.47, "total_tokens": 105284880} +{"current_steps": 156220, "total_steps": 204665, "loss": 0.0, "lr": 3.223777196008318e-07, "epoch": 3.8164805902328194, "percentage": 76.33, "elapsed_time": "3:22:33", "remaining_time": "1:02:48", "throughput": 8663.47, "total_tokens": 105287952} +{"current_steps": 156225, "total_steps": 204665, "loss": 0.0277, "lr": 3.2231500805514167e-07, "epoch": 3.8166027410646666, "percentage": 76.33, "elapsed_time": "3:22:33", "remaining_time": "1:02:48", "throughput": 8663.55, "total_tokens": 105292048} +{"current_steps": 156230, "total_steps": 204665, "loss": 0.0, "lr": 3.222523014377587e-07, "epoch": 3.8167248918965138, "percentage": 76.33, "elapsed_time": "3:22:33", "remaining_time": "1:02:47", "throughput": 8663.58, "total_tokens": 105295376} +{"current_steps": 156235, "total_steps": 204665, "loss": 0.0, "lr": 3.2218959974913963e-07, "epoch": 3.816847042728361, "percentage": 76.34, "elapsed_time": "3:22:34", "remaining_time": "1:02:47", "throughput": 8663.62, "total_tokens": 105298960} +{"current_steps": 156240, "total_steps": 204665, "loss": 0.0, "lr": 3.2212690298973976e-07, "epoch": 3.816969193560208, "percentage": 76.34, "elapsed_time": "3:22:34", "remaining_time": "1:02:47", "throughput": 8663.63, "total_tokens": 105302096} +{"current_steps": 156245, "total_steps": 204665, "loss": 0.0001, "lr": 3.220642111600157e-07, "epoch": 3.8170913443920553, "percentage": 76.34, "elapsed_time": "3:22:34", "remaining_time": "1:02:46", "throughput": 8663.64, "total_tokens": 105305104} +{"current_steps": 156250, "total_steps": 204665, "loss": 0.0, "lr": 3.220015242604227e-07, "epoch": 3.8172134952239025, "percentage": 76.34, "elapsed_time": "3:22:35", "remaining_time": "1:02:46", "throughput": 8663.65, "total_tokens": 105308176} +{"current_steps": 156255, "total_steps": 204665, "loss": 0.0001, "lr": 3.219388422914173e-07, "epoch": 3.8173356460557497, "percentage": 76.35, "elapsed_time": "3:22:35", "remaining_time": "1:02:45", "throughput": 8663.67, "total_tokens": 105311376} +{"current_steps": 156260, "total_steps": 204665, "loss": 0.0, "lr": 3.2187616525345474e-07, "epoch": 3.817457796887597, "percentage": 76.35, "elapsed_time": "3:22:35", "remaining_time": "1:02:45", "throughput": 8663.72, "total_tokens": 105315152} +{"current_steps": 156265, "total_steps": 204665, "loss": 0.0, "lr": 3.2181349314699115e-07, "epoch": 3.817579947719444, "percentage": 76.35, "elapsed_time": "3:22:36", "remaining_time": "1:02:45", "throughput": 8663.76, "total_tokens": 105318544} +{"current_steps": 156270, "total_steps": 204665, "loss": 0.0, "lr": 3.217508259724825e-07, "epoch": 3.8177020985512913, "percentage": 76.35, "elapsed_time": "3:22:36", "remaining_time": "1:02:44", "throughput": 8663.77, "total_tokens": 105321680} +{"current_steps": 156275, "total_steps": 204665, "loss": 0.0002, "lr": 3.216881637303839e-07, "epoch": 3.817824249383138, "percentage": 76.36, "elapsed_time": "3:22:36", "remaining_time": "1:02:44", "throughput": 8663.77, "total_tokens": 105324688} +{"current_steps": 156280, "total_steps": 204665, "loss": 0.0007, "lr": 3.216255064211517e-07, "epoch": 3.8179464002149857, "percentage": 76.36, "elapsed_time": "3:22:37", "remaining_time": "1:02:43", "throughput": 8663.81, "total_tokens": 105328080} +{"current_steps": 156285, "total_steps": 204665, "loss": 0.0, "lr": 3.215628540452411e-07, "epoch": 3.8180685510468324, "percentage": 76.36, "elapsed_time": "3:22:37", "remaining_time": "1:02:43", "throughput": 8663.86, "total_tokens": 105331856} +{"current_steps": 156290, "total_steps": 204665, "loss": 0.0, "lr": 3.215002066031082e-07, "epoch": 3.81819070187868, "percentage": 76.36, "elapsed_time": "3:22:37", "remaining_time": "1:02:43", "throughput": 8663.89, "total_tokens": 105335312} +{"current_steps": 156295, "total_steps": 204665, "loss": 0.0834, "lr": 3.2143756409520783e-07, "epoch": 3.818312852710527, "percentage": 76.37, "elapsed_time": "3:22:38", "remaining_time": "1:02:42", "throughput": 8663.92, "total_tokens": 105338704} +{"current_steps": 156300, "total_steps": 204665, "loss": 0.0001, "lr": 3.213749265219962e-07, "epoch": 3.818435003542374, "percentage": 76.37, "elapsed_time": "3:22:38", "remaining_time": "1:02:42", "throughput": 8663.95, "total_tokens": 105341968} +{"current_steps": 156305, "total_steps": 204665, "loss": 0.0, "lr": 3.2131229388392877e-07, "epoch": 3.818557154374221, "percentage": 76.37, "elapsed_time": "3:22:39", "remaining_time": "1:02:41", "throughput": 8663.98, "total_tokens": 105345360} +{"current_steps": 156310, "total_steps": 204665, "loss": 0.0001, "lr": 3.2124966618146066e-07, "epoch": 3.8186793052060684, "percentage": 76.37, "elapsed_time": "3:22:39", "remaining_time": "1:02:41", "throughput": 8664.04, "total_tokens": 105349264} +{"current_steps": 156315, "total_steps": 204665, "loss": 0.0, "lr": 3.211870434150479e-07, "epoch": 3.8188014560379155, "percentage": 76.38, "elapsed_time": "3:22:39", "remaining_time": "1:02:41", "throughput": 8664.07, "total_tokens": 105352528} +{"current_steps": 156320, "total_steps": 204665, "loss": 0.0, "lr": 3.211244255851452e-07, "epoch": 3.8189236068697627, "percentage": 76.38, "elapsed_time": "3:22:40", "remaining_time": "1:02:40", "throughput": 8664.11, "total_tokens": 105356112} +{"current_steps": 156325, "total_steps": 204665, "loss": 0.0, "lr": 3.2106181269220856e-07, "epoch": 3.81904575770161, "percentage": 76.38, "elapsed_time": "3:22:40", "remaining_time": "1:02:40", "throughput": 8664.14, "total_tokens": 105359568} +{"current_steps": 156330, "total_steps": 204665, "loss": 0.0001, "lr": 3.209992047366927e-07, "epoch": 3.819167908533457, "percentage": 76.38, "elapsed_time": "3:22:40", "remaining_time": "1:02:39", "throughput": 8664.19, "total_tokens": 105363152} +{"current_steps": 156335, "total_steps": 204665, "loss": 0.0, "lr": 3.209366017190536e-07, "epoch": 3.8192900593653043, "percentage": 76.39, "elapsed_time": "3:22:41", "remaining_time": "1:02:39", "throughput": 8664.22, "total_tokens": 105366608} +{"current_steps": 156340, "total_steps": 204665, "loss": 0.0001, "lr": 3.2087400363974615e-07, "epoch": 3.8194122101971515, "percentage": 76.39, "elapsed_time": "3:22:41", "remaining_time": "1:02:39", "throughput": 8664.25, "total_tokens": 105370000} +{"current_steps": 156345, "total_steps": 204665, "loss": 0.0, "lr": 3.2081141049922534e-07, "epoch": 3.8195343610289987, "percentage": 76.39, "elapsed_time": "3:22:41", "remaining_time": "1:02:38", "throughput": 8664.29, "total_tokens": 105373456} +{"current_steps": 156350, "total_steps": 204665, "loss": 0.0, "lr": 3.207488222979469e-07, "epoch": 3.819656511860846, "percentage": 76.39, "elapsed_time": "3:22:42", "remaining_time": "1:02:38", "throughput": 8664.32, "total_tokens": 105376912} +{"current_steps": 156355, "total_steps": 204665, "loss": 0.0, "lr": 3.2068623903636536e-07, "epoch": 3.819778662692693, "percentage": 76.4, "elapsed_time": "3:22:42", "remaining_time": "1:02:37", "throughput": 8664.36, "total_tokens": 105380368} +{"current_steps": 156360, "total_steps": 204665, "loss": 0.0, "lr": 3.206236607149363e-07, "epoch": 3.8199008135245403, "percentage": 76.4, "elapsed_time": "3:22:42", "remaining_time": "1:02:37", "throughput": 8664.41, "total_tokens": 105384144} +{"current_steps": 156365, "total_steps": 204665, "loss": 0.0, "lr": 3.2056108733411504e-07, "epoch": 3.8200229643563874, "percentage": 76.4, "elapsed_time": "3:22:43", "remaining_time": "1:02:37", "throughput": 8664.42, "total_tokens": 105387152} +{"current_steps": 156370, "total_steps": 204665, "loss": 0.0, "lr": 3.2049851889435585e-07, "epoch": 3.820145115188234, "percentage": 76.4, "elapsed_time": "3:22:43", "remaining_time": "1:02:36", "throughput": 8664.45, "total_tokens": 105390608} +{"current_steps": 156375, "total_steps": 204665, "loss": 0.0, "lr": 3.2043595539611455e-07, "epoch": 3.820267266020082, "percentage": 76.41, "elapsed_time": "3:22:43", "remaining_time": "1:02:36", "throughput": 8664.5, "total_tokens": 105394256} +{"current_steps": 156380, "total_steps": 204665, "loss": 0.0, "lr": 3.2037339683984554e-07, "epoch": 3.8203894168519286, "percentage": 76.41, "elapsed_time": "3:22:44", "remaining_time": "1:02:35", "throughput": 8664.53, "total_tokens": 105397648} +{"current_steps": 156385, "total_steps": 204665, "loss": 0.0479, "lr": 3.203108432260042e-07, "epoch": 3.820511567683776, "percentage": 76.41, "elapsed_time": "3:22:44", "remaining_time": "1:02:35", "throughput": 8664.55, "total_tokens": 105400848} +{"current_steps": 156390, "total_steps": 204665, "loss": 0.0, "lr": 3.20248294555045e-07, "epoch": 3.820633718515623, "percentage": 76.41, "elapsed_time": "3:22:44", "remaining_time": "1:02:35", "throughput": 8664.61, "total_tokens": 105404688} +{"current_steps": 156395, "total_steps": 204665, "loss": 0.0, "lr": 3.201857508274231e-07, "epoch": 3.82075586934747, "percentage": 76.42, "elapsed_time": "3:22:45", "remaining_time": "1:02:34", "throughput": 8664.65, "total_tokens": 105408272} +{"current_steps": 156400, "total_steps": 204665, "loss": 0.0166, "lr": 3.201232120435934e-07, "epoch": 3.8208780201793173, "percentage": 76.42, "elapsed_time": "3:22:45", "remaining_time": "1:02:34", "throughput": 8664.66, "total_tokens": 105411344} +{"current_steps": 156405, "total_steps": 204665, "loss": 0.1224, "lr": 3.2006067820401026e-07, "epoch": 3.8210001710111645, "percentage": 76.42, "elapsed_time": "3:22:46", "remaining_time": "1:02:33", "throughput": 8664.7, "total_tokens": 105414864} +{"current_steps": 156410, "total_steps": 204665, "loss": 0.0, "lr": 3.1999814930912914e-07, "epoch": 3.8211223218430117, "percentage": 76.42, "elapsed_time": "3:22:46", "remaining_time": "1:02:33", "throughput": 8664.75, "total_tokens": 105418640} +{"current_steps": 156415, "total_steps": 204665, "loss": 0.0, "lr": 3.1993562535940413e-07, "epoch": 3.821244472674859, "percentage": 76.42, "elapsed_time": "3:22:46", "remaining_time": "1:02:33", "throughput": 8664.8, "total_tokens": 105422288} +{"current_steps": 156420, "total_steps": 204665, "loss": 0.0, "lr": 3.198731063552901e-07, "epoch": 3.821366623506706, "percentage": 76.43, "elapsed_time": "3:22:47", "remaining_time": "1:02:32", "throughput": 8664.84, "total_tokens": 105425744} +{"current_steps": 156425, "total_steps": 204665, "loss": 0.0, "lr": 3.1981059229724205e-07, "epoch": 3.8214887743385533, "percentage": 76.43, "elapsed_time": "3:22:47", "remaining_time": "1:02:32", "throughput": 8664.86, "total_tokens": 105428944} +{"current_steps": 156430, "total_steps": 204665, "loss": 0.0001, "lr": 3.197480831857143e-07, "epoch": 3.8216109251704005, "percentage": 76.43, "elapsed_time": "3:22:47", "remaining_time": "1:02:31", "throughput": 8664.89, "total_tokens": 105432400} +{"current_steps": 156435, "total_steps": 204665, "loss": 0.0, "lr": 3.1968557902116124e-07, "epoch": 3.8217330760022477, "percentage": 76.43, "elapsed_time": "3:22:48", "remaining_time": "1:02:31", "throughput": 8664.92, "total_tokens": 105435664} +{"current_steps": 156440, "total_steps": 204665, "loss": 0.0, "lr": 3.196230798040379e-07, "epoch": 3.821855226834095, "percentage": 76.44, "elapsed_time": "3:22:48", "remaining_time": "1:02:31", "throughput": 8664.96, "total_tokens": 105439248} +{"current_steps": 156445, "total_steps": 204665, "loss": 0.0, "lr": 3.195605855347985e-07, "epoch": 3.821977377665942, "percentage": 76.44, "elapsed_time": "3:22:48", "remaining_time": "1:02:30", "throughput": 8664.98, "total_tokens": 105442576} +{"current_steps": 156450, "total_steps": 204665, "loss": 0.0442, "lr": 3.194980962138972e-07, "epoch": 3.8220995284977892, "percentage": 76.44, "elapsed_time": "3:22:49", "remaining_time": "1:02:30", "throughput": 8665.0, "total_tokens": 105445776} +{"current_steps": 156455, "total_steps": 204665, "loss": 0.0698, "lr": 3.1943561184178893e-07, "epoch": 3.822221679329636, "percentage": 76.44, "elapsed_time": "3:22:49", "remaining_time": "1:02:29", "throughput": 8665.07, "total_tokens": 105449744} +{"current_steps": 156460, "total_steps": 204665, "loss": 0.0703, "lr": 3.1937313241892806e-07, "epoch": 3.8223438301614836, "percentage": 76.45, "elapsed_time": "3:22:49", "remaining_time": "1:02:29", "throughput": 8665.08, "total_tokens": 105452880} +{"current_steps": 156465, "total_steps": 204665, "loss": 0.0317, "lr": 3.1931065794576863e-07, "epoch": 3.8224659809933303, "percentage": 76.45, "elapsed_time": "3:22:50", "remaining_time": "1:02:29", "throughput": 8665.11, "total_tokens": 105456208} +{"current_steps": 156470, "total_steps": 204665, "loss": 0.0, "lr": 3.1924818842276547e-07, "epoch": 3.822588131825178, "percentage": 76.45, "elapsed_time": "3:22:50", "remaining_time": "1:02:28", "throughput": 8665.13, "total_tokens": 105459472} +{"current_steps": 156475, "total_steps": 204665, "loss": 0.0, "lr": 3.1918572385037225e-07, "epoch": 3.8227102826570247, "percentage": 76.45, "elapsed_time": "3:22:50", "remaining_time": "1:02:28", "throughput": 8665.14, "total_tokens": 105462608} +{"current_steps": 156480, "total_steps": 204665, "loss": 0.0, "lr": 3.191232642290439e-07, "epoch": 3.822832433488872, "percentage": 76.46, "elapsed_time": "3:22:51", "remaining_time": "1:02:27", "throughput": 8665.22, "total_tokens": 105466640} +{"current_steps": 156485, "total_steps": 204665, "loss": 0.0001, "lr": 3.19060809559234e-07, "epoch": 3.822954584320719, "percentage": 76.46, "elapsed_time": "3:22:51", "remaining_time": "1:02:27", "throughput": 8665.26, "total_tokens": 105470224} +{"current_steps": 156490, "total_steps": 204665, "loss": 0.0, "lr": 3.18998359841397e-07, "epoch": 3.8230767351525663, "percentage": 76.46, "elapsed_time": "3:22:51", "remaining_time": "1:02:27", "throughput": 8665.27, "total_tokens": 105473360} +{"current_steps": 156495, "total_steps": 204665, "loss": 0.0001, "lr": 3.189359150759875e-07, "epoch": 3.8231988859844135, "percentage": 76.46, "elapsed_time": "3:22:52", "remaining_time": "1:02:26", "throughput": 8665.29, "total_tokens": 105476496} +{"current_steps": 156500, "total_steps": 204665, "loss": 0.0001, "lr": 3.1887347526345885e-07, "epoch": 3.8233210368162607, "percentage": 76.47, "elapsed_time": "3:22:52", "remaining_time": "1:02:26", "throughput": 8665.3, "total_tokens": 105479568} +{"current_steps": 156505, "total_steps": 204665, "loss": 0.0, "lr": 3.1881104040426574e-07, "epoch": 3.823443187648108, "percentage": 76.47, "elapsed_time": "3:22:52", "remaining_time": "1:02:25", "throughput": 8665.3, "total_tokens": 105482512} +{"current_steps": 156510, "total_steps": 204665, "loss": 0.0, "lr": 3.187486104988617e-07, "epoch": 3.823565338479955, "percentage": 76.47, "elapsed_time": "3:22:53", "remaining_time": "1:02:25", "throughput": 8665.33, "total_tokens": 105485968} +{"current_steps": 156515, "total_steps": 204665, "loss": 0.0003, "lr": 3.18686185547701e-07, "epoch": 3.8236874893118022, "percentage": 76.47, "elapsed_time": "3:22:53", "remaining_time": "1:02:25", "throughput": 8665.39, "total_tokens": 105489680} +{"current_steps": 156520, "total_steps": 204665, "loss": 0.0, "lr": 3.1862376555123795e-07, "epoch": 3.8238096401436494, "percentage": 76.48, "elapsed_time": "3:22:54", "remaining_time": "1:02:24", "throughput": 8665.4, "total_tokens": 105492880} +{"current_steps": 156525, "total_steps": 204665, "loss": 0.0, "lr": 3.1856135050992584e-07, "epoch": 3.8239317909754966, "percentage": 76.48, "elapsed_time": "3:22:54", "remaining_time": "1:02:24", "throughput": 8665.42, "total_tokens": 105495952} +{"current_steps": 156530, "total_steps": 204665, "loss": 0.0002, "lr": 3.184989404242191e-07, "epoch": 3.824053941807344, "percentage": 76.48, "elapsed_time": "3:22:54", "remaining_time": "1:02:23", "throughput": 8665.46, "total_tokens": 105499472} +{"current_steps": 156535, "total_steps": 204665, "loss": 0.0001, "lr": 3.184365352945715e-07, "epoch": 3.824176092639191, "percentage": 76.48, "elapsed_time": "3:22:55", "remaining_time": "1:02:23", "throughput": 8665.47, "total_tokens": 105502672} +{"current_steps": 156540, "total_steps": 204665, "loss": 0.0, "lr": 3.183741351214363e-07, "epoch": 3.824298243471038, "percentage": 76.49, "elapsed_time": "3:22:55", "remaining_time": "1:02:23", "throughput": 8665.5, "total_tokens": 105505936} +{"current_steps": 156545, "total_steps": 204665, "loss": 0.0, "lr": 3.1831173990526806e-07, "epoch": 3.8244203943028854, "percentage": 76.49, "elapsed_time": "3:22:55", "remaining_time": "1:02:22", "throughput": 8665.52, "total_tokens": 105509200} +{"current_steps": 156550, "total_steps": 204665, "loss": 0.0, "lr": 3.1824934964652e-07, "epoch": 3.824542545134732, "percentage": 76.49, "elapsed_time": "3:22:56", "remaining_time": "1:02:22", "throughput": 8665.55, "total_tokens": 105512528} +{"current_steps": 156555, "total_steps": 204665, "loss": 0.0016, "lr": 3.1818696434564616e-07, "epoch": 3.8246646959665798, "percentage": 76.49, "elapsed_time": "3:22:56", "remaining_time": "1:02:21", "throughput": 8665.56, "total_tokens": 105515728} +{"current_steps": 156560, "total_steps": 204665, "loss": 0.0001, "lr": 3.1812458400309993e-07, "epoch": 3.8247868467984265, "percentage": 76.5, "elapsed_time": "3:22:56", "remaining_time": "1:02:21", "throughput": 8665.57, "total_tokens": 105518800} +{"current_steps": 156565, "total_steps": 204665, "loss": 0.0, "lr": 3.180622086193354e-07, "epoch": 3.8249089976302737, "percentage": 76.5, "elapsed_time": "3:22:57", "remaining_time": "1:02:21", "throughput": 8665.65, "total_tokens": 105522832} +{"current_steps": 156570, "total_steps": 204665, "loss": 0.0, "lr": 3.1799983819480557e-07, "epoch": 3.825031148462121, "percentage": 76.5, "elapsed_time": "3:22:57", "remaining_time": "1:02:20", "throughput": 8665.68, "total_tokens": 105526224} +{"current_steps": 156575, "total_steps": 204665, "loss": 0.0, "lr": 3.179374727299644e-07, "epoch": 3.825153299293968, "percentage": 76.5, "elapsed_time": "3:22:57", "remaining_time": "1:02:20", "throughput": 8665.72, "total_tokens": 105529744} +{"current_steps": 156580, "total_steps": 204665, "loss": 0.0, "lr": 3.1787511222526565e-07, "epoch": 3.8252754501258153, "percentage": 76.51, "elapsed_time": "3:22:58", "remaining_time": "1:02:19", "throughput": 8665.75, "total_tokens": 105533136} +{"current_steps": 156585, "total_steps": 204665, "loss": 0.0332, "lr": 3.1781275668116225e-07, "epoch": 3.8253976009576625, "percentage": 76.51, "elapsed_time": "3:22:58", "remaining_time": "1:02:19", "throughput": 8665.78, "total_tokens": 105536528} +{"current_steps": 156590, "total_steps": 204665, "loss": 0.0, "lr": 3.177504060981083e-07, "epoch": 3.8255197517895096, "percentage": 76.51, "elapsed_time": "3:22:58", "remaining_time": "1:02:19", "throughput": 8665.81, "total_tokens": 105539984} +{"current_steps": 156595, "total_steps": 204665, "loss": 0.0, "lr": 3.176880604765565e-07, "epoch": 3.825641902621357, "percentage": 76.51, "elapsed_time": "3:22:59", "remaining_time": "1:02:18", "throughput": 8665.84, "total_tokens": 105543376} +{"current_steps": 156600, "total_steps": 204665, "loss": 0.0, "lr": 3.1762571981696106e-07, "epoch": 3.825764053453204, "percentage": 76.52, "elapsed_time": "3:22:59", "remaining_time": "1:02:18", "throughput": 8665.89, "total_tokens": 105547024} +{"current_steps": 156605, "total_steps": 204665, "loss": 0.0, "lr": 3.175633841197746e-07, "epoch": 3.825886204285051, "percentage": 76.52, "elapsed_time": "3:22:59", "remaining_time": "1:02:17", "throughput": 8665.91, "total_tokens": 105550160} +{"current_steps": 156610, "total_steps": 204665, "loss": 0.0, "lr": 3.1750105338545075e-07, "epoch": 3.8260083551168984, "percentage": 76.52, "elapsed_time": "3:23:00", "remaining_time": "1:02:17", "throughput": 8665.93, "total_tokens": 105553424} +{"current_steps": 156615, "total_steps": 204665, "loss": 0.0, "lr": 3.174387276144431e-07, "epoch": 3.8261305059487456, "percentage": 76.52, "elapsed_time": "3:23:00", "remaining_time": "1:02:17", "throughput": 8665.97, "total_tokens": 105556880} +{"current_steps": 156620, "total_steps": 204665, "loss": 0.0, "lr": 3.1737640680720433e-07, "epoch": 3.826252656780593, "percentage": 76.53, "elapsed_time": "3:23:00", "remaining_time": "1:02:16", "throughput": 8665.99, "total_tokens": 105560208} +{"current_steps": 156625, "total_steps": 204665, "loss": 0.0, "lr": 3.173140909641883e-07, "epoch": 3.82637480761244, "percentage": 76.53, "elapsed_time": "3:23:01", "remaining_time": "1:02:16", "throughput": 8666.06, "total_tokens": 105564176} +{"current_steps": 156630, "total_steps": 204665, "loss": 0.0002, "lr": 3.1725178008584743e-07, "epoch": 3.826496958444287, "percentage": 76.53, "elapsed_time": "3:23:01", "remaining_time": "1:02:15", "throughput": 8666.09, "total_tokens": 105567440} +{"current_steps": 156635, "total_steps": 204665, "loss": 0.0, "lr": 3.1718947417263553e-07, "epoch": 3.826619109276134, "percentage": 76.53, "elapsed_time": "3:23:02", "remaining_time": "1:02:15", "throughput": 8666.1, "total_tokens": 105570576} +{"current_steps": 156640, "total_steps": 204665, "loss": 0.0, "lr": 3.1712717322500514e-07, "epoch": 3.8267412601079815, "percentage": 76.53, "elapsed_time": "3:23:02", "remaining_time": "1:02:15", "throughput": 8666.1, "total_tokens": 105573520} +{"current_steps": 156645, "total_steps": 204665, "loss": 0.0, "lr": 3.1706487724341e-07, "epoch": 3.8268634109398283, "percentage": 76.54, "elapsed_time": "3:23:02", "remaining_time": "1:02:14", "throughput": 8666.11, "total_tokens": 105576592} +{"current_steps": 156650, "total_steps": 204665, "loss": 0.0626, "lr": 3.1700258622830265e-07, "epoch": 3.826985561771676, "percentage": 76.54, "elapsed_time": "3:23:03", "remaining_time": "1:02:14", "throughput": 8666.16, "total_tokens": 105580304} +{"current_steps": 156655, "total_steps": 204665, "loss": 0.0, "lr": 3.1694030018013596e-07, "epoch": 3.8271077126035227, "percentage": 76.54, "elapsed_time": "3:23:03", "remaining_time": "1:02:13", "throughput": 8666.2, "total_tokens": 105583888} +{"current_steps": 156660, "total_steps": 204665, "loss": 0.0, "lr": 3.168780190993634e-07, "epoch": 3.82722986343537, "percentage": 76.54, "elapsed_time": "3:23:03", "remaining_time": "1:02:13", "throughput": 8666.22, "total_tokens": 105587024} +{"current_steps": 156665, "total_steps": 204665, "loss": 0.0, "lr": 3.1681574298643743e-07, "epoch": 3.827352014267217, "percentage": 76.55, "elapsed_time": "3:23:04", "remaining_time": "1:02:13", "throughput": 8666.24, "total_tokens": 105590288} +{"current_steps": 156670, "total_steps": 204665, "loss": 0.0, "lr": 3.16753471841811e-07, "epoch": 3.8274741650990642, "percentage": 76.55, "elapsed_time": "3:23:04", "remaining_time": "1:02:12", "throughput": 8666.24, "total_tokens": 105593232} +{"current_steps": 156675, "total_steps": 204665, "loss": 0.0, "lr": 3.1669120566593745e-07, "epoch": 3.8275963159309114, "percentage": 76.55, "elapsed_time": "3:23:04", "remaining_time": "1:02:12", "throughput": 8666.26, "total_tokens": 105596368} +{"current_steps": 156680, "total_steps": 204665, "loss": 0.0, "lr": 3.166289444592689e-07, "epoch": 3.8277184667627586, "percentage": 76.55, "elapsed_time": "3:23:05", "remaining_time": "1:02:11", "throughput": 8666.27, "total_tokens": 105599568} +{"current_steps": 156685, "total_steps": 204665, "loss": 0.0, "lr": 3.1656668822225884e-07, "epoch": 3.827840617594606, "percentage": 76.56, "elapsed_time": "3:23:05", "remaining_time": "1:02:11", "throughput": 8666.32, "total_tokens": 105603152} +{"current_steps": 156690, "total_steps": 204665, "loss": 0.0001, "lr": 3.165044369553592e-07, "epoch": 3.827962768426453, "percentage": 76.56, "elapsed_time": "3:23:05", "remaining_time": "1:02:11", "throughput": 8666.36, "total_tokens": 105606736} +{"current_steps": 156695, "total_steps": 204665, "loss": 0.0001, "lr": 3.1644219065902366e-07, "epoch": 3.8280849192583, "percentage": 76.56, "elapsed_time": "3:23:06", "remaining_time": "1:02:10", "throughput": 8666.41, "total_tokens": 105610448} +{"current_steps": 156700, "total_steps": 204665, "loss": 0.0, "lr": 3.1637994933370393e-07, "epoch": 3.8282070700901474, "percentage": 76.56, "elapsed_time": "3:23:06", "remaining_time": "1:02:10", "throughput": 8666.42, "total_tokens": 105613456} +{"current_steps": 156705, "total_steps": 204665, "loss": 0.0, "lr": 3.16317712979853e-07, "epoch": 3.8283292209219946, "percentage": 76.57, "elapsed_time": "3:23:06", "remaining_time": "1:02:09", "throughput": 8666.44, "total_tokens": 105616784} +{"current_steps": 156710, "total_steps": 204665, "loss": 0.0, "lr": 3.16255481597924e-07, "epoch": 3.8284513717538418, "percentage": 76.57, "elapsed_time": "3:23:07", "remaining_time": "1:02:09", "throughput": 8666.5, "total_tokens": 105620624} +{"current_steps": 156715, "total_steps": 204665, "loss": 0.0, "lr": 3.161932551883687e-07, "epoch": 3.828573522585689, "percentage": 76.57, "elapsed_time": "3:23:07", "remaining_time": "1:02:09", "throughput": 8666.54, "total_tokens": 105624080} +{"current_steps": 156720, "total_steps": 204665, "loss": 0.0001, "lr": 3.161310337516402e-07, "epoch": 3.828695673417536, "percentage": 76.57, "elapsed_time": "3:23:07", "remaining_time": "1:02:08", "throughput": 8666.56, "total_tokens": 105627344} +{"current_steps": 156725, "total_steps": 204665, "loss": 0.0, "lr": 3.1606881728819057e-07, "epoch": 3.8288178242493833, "percentage": 76.58, "elapsed_time": "3:23:08", "remaining_time": "1:02:08", "throughput": 8666.57, "total_tokens": 105630416} +{"current_steps": 156730, "total_steps": 204665, "loss": 0.0, "lr": 3.160066057984724e-07, "epoch": 3.82893997508123, "percentage": 76.58, "elapsed_time": "3:23:08", "remaining_time": "1:02:07", "throughput": 8666.59, "total_tokens": 105633680} +{"current_steps": 156735, "total_steps": 204665, "loss": 0.0, "lr": 3.1594439928293847e-07, "epoch": 3.8290621259130777, "percentage": 76.58, "elapsed_time": "3:23:08", "remaining_time": "1:02:07", "throughput": 8666.63, "total_tokens": 105637136} +{"current_steps": 156740, "total_steps": 204665, "loss": 0.0001, "lr": 3.1588219774204085e-07, "epoch": 3.8291842767449245, "percentage": 76.58, "elapsed_time": "3:23:09", "remaining_time": "1:02:07", "throughput": 8666.63, "total_tokens": 105640080} +{"current_steps": 156745, "total_steps": 204665, "loss": 0.0, "lr": 3.1582000117623154e-07, "epoch": 3.8293064275767716, "percentage": 76.59, "elapsed_time": "3:23:09", "remaining_time": "1:02:06", "throughput": 8666.69, "total_tokens": 105643856} +{"current_steps": 156750, "total_steps": 204665, "loss": 0.0001, "lr": 3.1575780958596353e-07, "epoch": 3.829428578408619, "percentage": 76.59, "elapsed_time": "3:23:09", "remaining_time": "1:02:06", "throughput": 8666.7, "total_tokens": 105646928} +{"current_steps": 156755, "total_steps": 204665, "loss": 0.0, "lr": 3.1569562297168883e-07, "epoch": 3.829550729240466, "percentage": 76.59, "elapsed_time": "3:23:10", "remaining_time": "1:02:05", "throughput": 8666.71, "total_tokens": 105650064} +{"current_steps": 156760, "total_steps": 204665, "loss": 0.0, "lr": 3.1563344133385927e-07, "epoch": 3.829672880072313, "percentage": 76.59, "elapsed_time": "3:23:10", "remaining_time": "1:02:05", "throughput": 8666.76, "total_tokens": 105653648} +{"current_steps": 156765, "total_steps": 204665, "loss": 0.0002, "lr": 3.155712646729275e-07, "epoch": 3.8297950309041604, "percentage": 76.6, "elapsed_time": "3:23:11", "remaining_time": "1:02:05", "throughput": 8666.75, "total_tokens": 105656528} +{"current_steps": 156770, "total_steps": 204665, "loss": 0.0, "lr": 3.155090929893458e-07, "epoch": 3.8299171817360076, "percentage": 76.6, "elapsed_time": "3:23:11", "remaining_time": "1:02:04", "throughput": 8666.8, "total_tokens": 105660112} +{"current_steps": 156775, "total_steps": 204665, "loss": 0.0, "lr": 3.154469262835657e-07, "epoch": 3.830039332567855, "percentage": 76.6, "elapsed_time": "3:23:11", "remaining_time": "1:02:04", "throughput": 8666.83, "total_tokens": 105663504} +{"current_steps": 156780, "total_steps": 204665, "loss": 0.0762, "lr": 3.153847645560401e-07, "epoch": 3.830161483399702, "percentage": 76.6, "elapsed_time": "3:23:12", "remaining_time": "1:02:03", "throughput": 8666.86, "total_tokens": 105666960} +{"current_steps": 156785, "total_steps": 204665, "loss": 0.0838, "lr": 3.153226078072202e-07, "epoch": 3.830283634231549, "percentage": 76.61, "elapsed_time": "3:23:12", "remaining_time": "1:02:03", "throughput": 8666.92, "total_tokens": 105670800} +{"current_steps": 156790, "total_steps": 204665, "loss": 0.0, "lr": 3.152604560375589e-07, "epoch": 3.8304057850633964, "percentage": 76.61, "elapsed_time": "3:23:12", "remaining_time": "1:02:02", "throughput": 8666.93, "total_tokens": 105673872} +{"current_steps": 156795, "total_steps": 204665, "loss": 0.003, "lr": 3.1519830924750734e-07, "epoch": 3.8305279358952435, "percentage": 76.61, "elapsed_time": "3:23:13", "remaining_time": "1:02:02", "throughput": 8666.95, "total_tokens": 105677008} +{"current_steps": 156800, "total_steps": 204665, "loss": 0.05, "lr": 3.151361674375179e-07, "epoch": 3.8306500867270907, "percentage": 76.61, "elapsed_time": "3:23:13", "remaining_time": "1:02:02", "throughput": 8667.01, "total_tokens": 105680912} +{"current_steps": 156805, "total_steps": 204665, "loss": 0.0, "lr": 3.1507403060804274e-07, "epoch": 3.830772237558938, "percentage": 76.62, "elapsed_time": "3:23:13", "remaining_time": "1:02:01", "throughput": 8667.07, "total_tokens": 105684624} +{"current_steps": 156810, "total_steps": 204665, "loss": 0.0, "lr": 3.1501189875953314e-07, "epoch": 3.830894388390785, "percentage": 76.62, "elapsed_time": "3:23:14", "remaining_time": "1:02:01", "throughput": 8667.07, "total_tokens": 105687696} +{"current_steps": 156815, "total_steps": 204665, "loss": 0.0003, "lr": 3.1494977189244166e-07, "epoch": 3.831016539222632, "percentage": 76.62, "elapsed_time": "3:23:14", "remaining_time": "1:02:00", "throughput": 8667.1, "total_tokens": 105690960} +{"current_steps": 156820, "total_steps": 204665, "loss": 0.0, "lr": 3.148876500072193e-07, "epoch": 3.8311386900544795, "percentage": 76.62, "elapsed_time": "3:23:14", "remaining_time": "1:02:00", "throughput": 8667.15, "total_tokens": 105694608} +{"current_steps": 156825, "total_steps": 204665, "loss": 0.0444, "lr": 3.1482553310431816e-07, "epoch": 3.8312608408863262, "percentage": 76.63, "elapsed_time": "3:23:15", "remaining_time": "1:02:00", "throughput": 8667.18, "total_tokens": 105698000} +{"current_steps": 156830, "total_steps": 204665, "loss": 0.0, "lr": 3.147634211841904e-07, "epoch": 3.831382991718174, "percentage": 76.63, "elapsed_time": "3:23:15", "remaining_time": "1:01:59", "throughput": 8667.21, "total_tokens": 105701456} +{"current_steps": 156835, "total_steps": 204665, "loss": 0.0002, "lr": 3.1470131424728707e-07, "epoch": 3.8315051425500206, "percentage": 76.63, "elapsed_time": "3:23:15", "remaining_time": "1:01:59", "throughput": 8667.27, "total_tokens": 105705232} +{"current_steps": 156840, "total_steps": 204665, "loss": 0.0, "lr": 3.146392122940604e-07, "epoch": 3.831627293381868, "percentage": 76.63, "elapsed_time": "3:23:16", "remaining_time": "1:01:58", "throughput": 8667.27, "total_tokens": 105708176} +{"current_steps": 156845, "total_steps": 204665, "loss": 0.0, "lr": 3.145771153249618e-07, "epoch": 3.831749444213715, "percentage": 76.63, "elapsed_time": "3:23:16", "remaining_time": "1:01:58", "throughput": 8667.27, "total_tokens": 105711056} +{"current_steps": 156850, "total_steps": 204665, "loss": 0.0, "lr": 3.145150233404423e-07, "epoch": 3.831871595045562, "percentage": 76.64, "elapsed_time": "3:23:16", "remaining_time": "1:01:58", "throughput": 8667.27, "total_tokens": 105714064} +{"current_steps": 156855, "total_steps": 204665, "loss": 0.0, "lr": 3.144529363409544e-07, "epoch": 3.8319937458774094, "percentage": 76.64, "elapsed_time": "3:23:17", "remaining_time": "1:01:57", "throughput": 8667.29, "total_tokens": 105717328} +{"current_steps": 156860, "total_steps": 204665, "loss": 0.175, "lr": 3.1439085432694866e-07, "epoch": 3.8321158967092566, "percentage": 76.64, "elapsed_time": "3:23:17", "remaining_time": "1:01:57", "throughput": 8667.31, "total_tokens": 105720528} +{"current_steps": 156865, "total_steps": 204665, "loss": 0.0, "lr": 3.143287772988774e-07, "epoch": 3.8322380475411038, "percentage": 76.64, "elapsed_time": "3:23:17", "remaining_time": "1:01:56", "throughput": 8667.33, "total_tokens": 105723728} +{"current_steps": 156870, "total_steps": 204665, "loss": 0.0003, "lr": 3.142667052571915e-07, "epoch": 3.832360198372951, "percentage": 76.65, "elapsed_time": "3:23:18", "remaining_time": "1:01:56", "throughput": 8667.33, "total_tokens": 105726672} +{"current_steps": 156875, "total_steps": 204665, "loss": 0.0, "lr": 3.1420463820234266e-07, "epoch": 3.832482349204798, "percentage": 76.65, "elapsed_time": "3:23:18", "remaining_time": "1:01:56", "throughput": 8667.36, "total_tokens": 105729936} +{"current_steps": 156880, "total_steps": 204665, "loss": 0.0544, "lr": 3.14142576134782e-07, "epoch": 3.8326045000366453, "percentage": 76.65, "elapsed_time": "3:23:18", "remaining_time": "1:01:55", "throughput": 8667.37, "total_tokens": 105733072} +{"current_steps": 156885, "total_steps": 204665, "loss": 0.0538, "lr": 3.140805190549609e-07, "epoch": 3.8327266508684925, "percentage": 76.65, "elapsed_time": "3:23:19", "remaining_time": "1:01:55", "throughput": 8667.38, "total_tokens": 105736208} +{"current_steps": 156890, "total_steps": 204665, "loss": 0.0, "lr": 3.140184669633311e-07, "epoch": 3.8328488017003397, "percentage": 76.66, "elapsed_time": "3:23:19", "remaining_time": "1:01:54", "throughput": 8667.4, "total_tokens": 105739408} +{"current_steps": 156895, "total_steps": 204665, "loss": 0.0, "lr": 3.1395641986034324e-07, "epoch": 3.832970952532187, "percentage": 76.66, "elapsed_time": "3:23:20", "remaining_time": "1:01:54", "throughput": 8667.45, "total_tokens": 105742992} +{"current_steps": 156900, "total_steps": 204665, "loss": 0.0001, "lr": 3.138943777464491e-07, "epoch": 3.8330931033640336, "percentage": 76.66, "elapsed_time": "3:23:20", "remaining_time": "1:01:54", "throughput": 8667.48, "total_tokens": 105746384} +{"current_steps": 156905, "total_steps": 204665, "loss": 0.0245, "lr": 3.138323406220993e-07, "epoch": 3.8332152541958813, "percentage": 76.66, "elapsed_time": "3:23:20", "remaining_time": "1:01:53", "throughput": 8667.52, "total_tokens": 105749904} +{"current_steps": 156910, "total_steps": 204665, "loss": 0.0, "lr": 3.1377030848774565e-07, "epoch": 3.833337405027728, "percentage": 76.67, "elapsed_time": "3:23:21", "remaining_time": "1:01:53", "throughput": 8667.58, "total_tokens": 105753808} +{"current_steps": 156915, "total_steps": 204665, "loss": 0.0, "lr": 3.1370828134383845e-07, "epoch": 3.8334595558595757, "percentage": 76.67, "elapsed_time": "3:23:21", "remaining_time": "1:01:52", "throughput": 8667.61, "total_tokens": 105757136} +{"current_steps": 156920, "total_steps": 204665, "loss": 0.0, "lr": 3.136462591908293e-07, "epoch": 3.8335817066914224, "percentage": 76.67, "elapsed_time": "3:23:21", "remaining_time": "1:01:52", "throughput": 8667.6, "total_tokens": 105759888} +{"current_steps": 156925, "total_steps": 204665, "loss": 0.0361, "lr": 3.1358424202916945e-07, "epoch": 3.8337038575232696, "percentage": 76.67, "elapsed_time": "3:23:22", "remaining_time": "1:01:52", "throughput": 8667.63, "total_tokens": 105763280} +{"current_steps": 156930, "total_steps": 204665, "loss": 0.0, "lr": 3.135222298593093e-07, "epoch": 3.8338260083551168, "percentage": 76.68, "elapsed_time": "3:23:22", "remaining_time": "1:01:51", "throughput": 8667.63, "total_tokens": 105766288} +{"current_steps": 156935, "total_steps": 204665, "loss": 0.0, "lr": 3.134602226817005e-07, "epoch": 3.833948159186964, "percentage": 76.68, "elapsed_time": "3:23:22", "remaining_time": "1:01:51", "throughput": 8667.67, "total_tokens": 105769680} +{"current_steps": 156940, "total_steps": 204665, "loss": 0.0, "lr": 3.1339822049679323e-07, "epoch": 3.834070310018811, "percentage": 76.68, "elapsed_time": "3:23:23", "remaining_time": "1:01:50", "throughput": 8667.73, "total_tokens": 105773584} +{"current_steps": 156945, "total_steps": 204665, "loss": 0.0406, "lr": 3.1333622330503905e-07, "epoch": 3.8341924608506583, "percentage": 76.68, "elapsed_time": "3:23:23", "remaining_time": "1:01:50", "throughput": 8667.74, "total_tokens": 105776656} +{"current_steps": 156950, "total_steps": 204665, "loss": 0.0, "lr": 3.1327423110688835e-07, "epoch": 3.8343146116825055, "percentage": 76.69, "elapsed_time": "3:23:23", "remaining_time": "1:01:50", "throughput": 8667.78, "total_tokens": 105780240} +{"current_steps": 156955, "total_steps": 204665, "loss": 0.0005, "lr": 3.1321224390279235e-07, "epoch": 3.8344367625143527, "percentage": 76.69, "elapsed_time": "3:23:24", "remaining_time": "1:01:49", "throughput": 8667.85, "total_tokens": 105784144} +{"current_steps": 156960, "total_steps": 204665, "loss": 0.0001, "lr": 3.1315026169320167e-07, "epoch": 3.8345589133462, "percentage": 76.69, "elapsed_time": "3:23:24", "remaining_time": "1:01:49", "throughput": 8667.89, "total_tokens": 105787728} +{"current_steps": 156965, "total_steps": 204665, "loss": 0.0, "lr": 3.130882844785667e-07, "epoch": 3.834681064178047, "percentage": 76.69, "elapsed_time": "3:23:24", "remaining_time": "1:01:48", "throughput": 8667.96, "total_tokens": 105791760} +{"current_steps": 156970, "total_steps": 204665, "loss": 0.0, "lr": 3.1302631225933884e-07, "epoch": 3.8348032150098943, "percentage": 76.7, "elapsed_time": "3:23:25", "remaining_time": "1:01:48", "throughput": 8667.99, "total_tokens": 105795216} +{"current_steps": 156975, "total_steps": 204665, "loss": 0.0399, "lr": 3.1296434503596815e-07, "epoch": 3.8349253658417415, "percentage": 76.7, "elapsed_time": "3:23:25", "remaining_time": "1:01:48", "throughput": 8668.04, "total_tokens": 105798736} +{"current_steps": 156980, "total_steps": 204665, "loss": 0.0, "lr": 3.1290238280890547e-07, "epoch": 3.8350475166735887, "percentage": 76.7, "elapsed_time": "3:23:25", "remaining_time": "1:01:47", "throughput": 8668.08, "total_tokens": 105802384} +{"current_steps": 156985, "total_steps": 204665, "loss": 0.0001, "lr": 3.128404255786017e-07, "epoch": 3.835169667505436, "percentage": 76.7, "elapsed_time": "3:23:26", "remaining_time": "1:01:47", "throughput": 8668.11, "total_tokens": 105805776} +{"current_steps": 156990, "total_steps": 204665, "loss": 0.0737, "lr": 3.12778473345507e-07, "epoch": 3.835291818337283, "percentage": 76.71, "elapsed_time": "3:23:26", "remaining_time": "1:01:46", "throughput": 8668.14, "total_tokens": 105809040} +{"current_steps": 156995, "total_steps": 204665, "loss": 0.0001, "lr": 3.1271652611007226e-07, "epoch": 3.83541396916913, "percentage": 76.71, "elapsed_time": "3:23:27", "remaining_time": "1:01:46", "throughput": 8668.16, "total_tokens": 105812304} +{"current_steps": 157000, "total_steps": 204665, "loss": 0.0, "lr": 3.126545838727476e-07, "epoch": 3.8355361200009774, "percentage": 76.71, "elapsed_time": "3:23:27", "remaining_time": "1:01:46", "throughput": 8668.2, "total_tokens": 105815824} +{"current_steps": 157005, "total_steps": 204665, "loss": 0.0, "lr": 3.12592646633984e-07, "epoch": 3.835658270832824, "percentage": 76.71, "elapsed_time": "3:23:27", "remaining_time": "1:01:45", "throughput": 8668.23, "total_tokens": 105819280} +{"current_steps": 157010, "total_steps": 204665, "loss": 0.0, "lr": 3.1253071439423116e-07, "epoch": 3.835780421664672, "percentage": 76.72, "elapsed_time": "3:23:28", "remaining_time": "1:01:45", "throughput": 8668.29, "total_tokens": 105822992} +{"current_steps": 157015, "total_steps": 204665, "loss": 0.0, "lr": 3.1246878715393996e-07, "epoch": 3.8359025724965186, "percentage": 76.72, "elapsed_time": "3:23:28", "remaining_time": "1:01:44", "throughput": 8668.33, "total_tokens": 105826512} +{"current_steps": 157020, "total_steps": 204665, "loss": 0.0, "lr": 3.1240686491356096e-07, "epoch": 3.8360247233283657, "percentage": 76.72, "elapsed_time": "3:23:28", "remaining_time": "1:01:44", "throughput": 8668.36, "total_tokens": 105829904} +{"current_steps": 157025, "total_steps": 204665, "loss": 0.0, "lr": 3.123449476735438e-07, "epoch": 3.836146874160213, "percentage": 76.72, "elapsed_time": "3:23:29", "remaining_time": "1:01:44", "throughput": 8668.36, "total_tokens": 105832912} +{"current_steps": 157030, "total_steps": 204665, "loss": 0.0, "lr": 3.1228303543433953e-07, "epoch": 3.83626902499206, "percentage": 76.73, "elapsed_time": "3:23:29", "remaining_time": "1:01:43", "throughput": 8668.4, "total_tokens": 105836368} +{"current_steps": 157035, "total_steps": 204665, "loss": 0.0, "lr": 3.1222112819639755e-07, "epoch": 3.8363911758239073, "percentage": 76.73, "elapsed_time": "3:23:29", "remaining_time": "1:01:43", "throughput": 8668.43, "total_tokens": 105839760} +{"current_steps": 157040, "total_steps": 204665, "loss": 0.0, "lr": 3.1215922596016897e-07, "epoch": 3.8365133266557545, "percentage": 76.73, "elapsed_time": "3:23:30", "remaining_time": "1:01:42", "throughput": 8668.47, "total_tokens": 105843280} +{"current_steps": 157045, "total_steps": 204665, "loss": 0.0, "lr": 3.1209732872610316e-07, "epoch": 3.8366354774876017, "percentage": 76.73, "elapsed_time": "3:23:30", "remaining_time": "1:01:42", "throughput": 8668.48, "total_tokens": 105846416} +{"current_steps": 157050, "total_steps": 204665, "loss": 0.0174, "lr": 3.1203543649465093e-07, "epoch": 3.836757628319449, "percentage": 76.74, "elapsed_time": "3:23:30", "remaining_time": "1:01:42", "throughput": 8668.5, "total_tokens": 105849680} +{"current_steps": 157055, "total_steps": 204665, "loss": 0.0001, "lr": 3.1197354926626175e-07, "epoch": 3.836879779151296, "percentage": 76.74, "elapsed_time": "3:23:31", "remaining_time": "1:01:41", "throughput": 8668.55, "total_tokens": 105853328} +{"current_steps": 157060, "total_steps": 204665, "loss": 0.0, "lr": 3.1191166704138614e-07, "epoch": 3.8370019299831433, "percentage": 76.74, "elapsed_time": "3:23:31", "remaining_time": "1:01:41", "throughput": 8668.57, "total_tokens": 105856528} +{"current_steps": 157065, "total_steps": 204665, "loss": 0.0, "lr": 3.118497898204742e-07, "epoch": 3.8371240808149905, "percentage": 76.74, "elapsed_time": "3:23:31", "remaining_time": "1:01:40", "throughput": 8668.59, "total_tokens": 105859792} +{"current_steps": 157070, "total_steps": 204665, "loss": 0.0, "lr": 3.117879176039752e-07, "epoch": 3.8372462316468376, "percentage": 76.74, "elapsed_time": "3:23:32", "remaining_time": "1:01:40", "throughput": 8668.63, "total_tokens": 105863312} +{"current_steps": 157075, "total_steps": 204665, "loss": 0.0001, "lr": 3.117260503923396e-07, "epoch": 3.837368382478685, "percentage": 76.75, "elapsed_time": "3:23:32", "remaining_time": "1:01:40", "throughput": 8668.69, "total_tokens": 105867024} +{"current_steps": 157080, "total_steps": 204665, "loss": 0.1088, "lr": 3.1166418818601757e-07, "epoch": 3.8374905333105316, "percentage": 76.75, "elapsed_time": "3:23:32", "remaining_time": "1:01:39", "throughput": 8668.71, "total_tokens": 105870288} +{"current_steps": 157085, "total_steps": 204665, "loss": 0.0, "lr": 3.116023309854584e-07, "epoch": 3.837612684142379, "percentage": 76.75, "elapsed_time": "3:23:33", "remaining_time": "1:01:39", "throughput": 8668.74, "total_tokens": 105873744} +{"current_steps": 157090, "total_steps": 204665, "loss": 0.0, "lr": 3.1154047879111256e-07, "epoch": 3.837734834974226, "percentage": 76.75, "elapsed_time": "3:23:33", "remaining_time": "1:01:38", "throughput": 8668.76, "total_tokens": 105876880} +{"current_steps": 157095, "total_steps": 204665, "loss": 0.0001, "lr": 3.1147863160342923e-07, "epoch": 3.8378569858060736, "percentage": 76.76, "elapsed_time": "3:23:33", "remaining_time": "1:01:38", "throughput": 8668.79, "total_tokens": 105880336} +{"current_steps": 157100, "total_steps": 204665, "loss": 0.0, "lr": 3.114167894228589e-07, "epoch": 3.8379791366379203, "percentage": 76.76, "elapsed_time": "3:23:34", "remaining_time": "1:01:38", "throughput": 8668.83, "total_tokens": 105883792} +{"current_steps": 157105, "total_steps": 204665, "loss": 0.0, "lr": 3.1135495224985043e-07, "epoch": 3.8381012874697675, "percentage": 76.76, "elapsed_time": "3:23:34", "remaining_time": "1:01:37", "throughput": 8668.87, "total_tokens": 105887312} +{"current_steps": 157110, "total_steps": 204665, "loss": 0.0, "lr": 3.11293120084854e-07, "epoch": 3.8382234383016147, "percentage": 76.76, "elapsed_time": "3:23:35", "remaining_time": "1:01:37", "throughput": 8668.9, "total_tokens": 105890704} +{"current_steps": 157115, "total_steps": 204665, "loss": 0.0, "lr": 3.112312929283195e-07, "epoch": 3.838345589133462, "percentage": 76.77, "elapsed_time": "3:23:35", "remaining_time": "1:01:36", "throughput": 8668.92, "total_tokens": 105893904} +{"current_steps": 157120, "total_steps": 204665, "loss": 0.0325, "lr": 3.111694707806961e-07, "epoch": 3.838467739965309, "percentage": 76.77, "elapsed_time": "3:23:35", "remaining_time": "1:01:36", "throughput": 8668.96, "total_tokens": 105897424} +{"current_steps": 157125, "total_steps": 204665, "loss": 0.0, "lr": 3.111076536424337e-07, "epoch": 3.8385898907971563, "percentage": 76.77, "elapsed_time": "3:23:36", "remaining_time": "1:01:36", "throughput": 8668.97, "total_tokens": 105900496} +{"current_steps": 157130, "total_steps": 204665, "loss": 0.0, "lr": 3.1104584151398163e-07, "epoch": 3.8387120416290035, "percentage": 76.77, "elapsed_time": "3:23:36", "remaining_time": "1:01:35", "throughput": 8669.02, "total_tokens": 105904208} +{"current_steps": 157135, "total_steps": 204665, "loss": 0.0, "lr": 3.1098403439578945e-07, "epoch": 3.8388341924608507, "percentage": 76.78, "elapsed_time": "3:23:36", "remaining_time": "1:01:35", "throughput": 8669.06, "total_tokens": 105907728} +{"current_steps": 157140, "total_steps": 204665, "loss": 0.0002, "lr": 3.1092223228830696e-07, "epoch": 3.838956343292698, "percentage": 76.78, "elapsed_time": "3:23:37", "remaining_time": "1:01:34", "throughput": 8669.08, "total_tokens": 105911056} +{"current_steps": 157145, "total_steps": 204665, "loss": 0.0, "lr": 3.1086043519198315e-07, "epoch": 3.839078494124545, "percentage": 76.78, "elapsed_time": "3:23:37", "remaining_time": "1:01:34", "throughput": 8669.08, "total_tokens": 105913936} +{"current_steps": 157150, "total_steps": 204665, "loss": 0.0924, "lr": 3.107986431072678e-07, "epoch": 3.8392006449563922, "percentage": 76.78, "elapsed_time": "3:23:37", "remaining_time": "1:01:34", "throughput": 8669.09, "total_tokens": 105917072} +{"current_steps": 157155, "total_steps": 204665, "loss": 0.0003, "lr": 3.107368560346101e-07, "epoch": 3.8393227957882394, "percentage": 76.79, "elapsed_time": "3:23:38", "remaining_time": "1:01:33", "throughput": 8669.11, "total_tokens": 105920272} +{"current_steps": 157160, "total_steps": 204665, "loss": 0.0001, "lr": 3.1067507397445904e-07, "epoch": 3.8394449466200866, "percentage": 76.79, "elapsed_time": "3:23:38", "remaining_time": "1:01:33", "throughput": 8669.12, "total_tokens": 105923344} +{"current_steps": 157165, "total_steps": 204665, "loss": 0.0001, "lr": 3.106132969272646e-07, "epoch": 3.839567097451934, "percentage": 76.79, "elapsed_time": "3:23:38", "remaining_time": "1:01:32", "throughput": 8669.14, "total_tokens": 105926480} +{"current_steps": 157170, "total_steps": 204665, "loss": 0.0, "lr": 3.1055152489347535e-07, "epoch": 3.839689248283781, "percentage": 76.79, "elapsed_time": "3:23:39", "remaining_time": "1:01:32", "throughput": 8669.17, "total_tokens": 105929872} +{"current_steps": 157175, "total_steps": 204665, "loss": 0.0, "lr": 3.1048975787354126e-07, "epoch": 3.8398113991156277, "percentage": 76.8, "elapsed_time": "3:23:39", "remaining_time": "1:01:32", "throughput": 8669.21, "total_tokens": 105933392} +{"current_steps": 157180, "total_steps": 204665, "loss": 0.0588, "lr": 3.104279958679107e-07, "epoch": 3.8399335499474754, "percentage": 76.8, "elapsed_time": "3:23:39", "remaining_time": "1:01:31", "throughput": 8669.24, "total_tokens": 105936784} +{"current_steps": 157185, "total_steps": 204665, "loss": 0.0, "lr": 3.103662388770335e-07, "epoch": 3.840055700779322, "percentage": 76.8, "elapsed_time": "3:23:40", "remaining_time": "1:01:31", "throughput": 8669.25, "total_tokens": 105939920} +{"current_steps": 157190, "total_steps": 204665, "loss": 0.0001, "lr": 3.1030448690135824e-07, "epoch": 3.8401778516111693, "percentage": 76.8, "elapsed_time": "3:23:40", "remaining_time": "1:01:30", "throughput": 8669.28, "total_tokens": 105943312} +{"current_steps": 157195, "total_steps": 204665, "loss": 0.0, "lr": 3.1024273994133465e-07, "epoch": 3.8403000024430165, "percentage": 76.81, "elapsed_time": "3:23:40", "remaining_time": "1:01:30", "throughput": 8669.32, "total_tokens": 105946832} +{"current_steps": 157200, "total_steps": 204665, "loss": 0.0, "lr": 3.1018099799741095e-07, "epoch": 3.8404221532748637, "percentage": 76.81, "elapsed_time": "3:23:41", "remaining_time": "1:01:30", "throughput": 8669.36, "total_tokens": 105950352} +{"current_steps": 157205, "total_steps": 204665, "loss": 0.0001, "lr": 3.101192610700366e-07, "epoch": 3.840544304106711, "percentage": 76.81, "elapsed_time": "3:23:41", "remaining_time": "1:01:29", "throughput": 8669.39, "total_tokens": 105953680} +{"current_steps": 157210, "total_steps": 204665, "loss": 0.0, "lr": 3.100575291596609e-07, "epoch": 3.840666454938558, "percentage": 76.81, "elapsed_time": "3:23:41", "remaining_time": "1:01:29", "throughput": 8669.42, "total_tokens": 105957072} +{"current_steps": 157215, "total_steps": 204665, "loss": 0.0, "lr": 3.0999580226673207e-07, "epoch": 3.8407886057704053, "percentage": 76.82, "elapsed_time": "3:23:42", "remaining_time": "1:01:28", "throughput": 8669.46, "total_tokens": 105960656} +{"current_steps": 157220, "total_steps": 204665, "loss": 0.0, "lr": 3.0993408039169964e-07, "epoch": 3.8409107566022525, "percentage": 76.82, "elapsed_time": "3:23:42", "remaining_time": "1:01:28", "throughput": 8669.5, "total_tokens": 105964176} +{"current_steps": 157225, "total_steps": 204665, "loss": 0.0, "lr": 3.098723635350119e-07, "epoch": 3.8410329074340996, "percentage": 76.82, "elapsed_time": "3:23:42", "remaining_time": "1:01:28", "throughput": 8669.56, "total_tokens": 105967952} +{"current_steps": 157230, "total_steps": 204665, "loss": 0.0, "lr": 3.0981065169711793e-07, "epoch": 3.841155058265947, "percentage": 76.82, "elapsed_time": "3:23:43", "remaining_time": "1:01:27", "throughput": 8669.59, "total_tokens": 105971344} +{"current_steps": 157235, "total_steps": 204665, "loss": 0.0009, "lr": 3.0974894487846693e-07, "epoch": 3.841277209097794, "percentage": 76.83, "elapsed_time": "3:23:43", "remaining_time": "1:01:27", "throughput": 8669.62, "total_tokens": 105974800} +{"current_steps": 157240, "total_steps": 204665, "loss": 0.0, "lr": 3.096872430795069e-07, "epoch": 3.841399359929641, "percentage": 76.83, "elapsed_time": "3:23:44", "remaining_time": "1:01:26", "throughput": 8669.68, "total_tokens": 105978640} +{"current_steps": 157245, "total_steps": 204665, "loss": 0.0563, "lr": 3.0962554630068716e-07, "epoch": 3.8415215107614884, "percentage": 76.83, "elapsed_time": "3:23:44", "remaining_time": "1:01:26", "throughput": 8669.69, "total_tokens": 105981712} +{"current_steps": 157250, "total_steps": 204665, "loss": 0.0003, "lr": 3.095638545424559e-07, "epoch": 3.8416436615933356, "percentage": 76.83, "elapsed_time": "3:23:44", "remaining_time": "1:01:26", "throughput": 8669.69, "total_tokens": 105984656} +{"current_steps": 157255, "total_steps": 204665, "loss": 0.036, "lr": 3.095021678052623e-07, "epoch": 3.841765812425183, "percentage": 76.84, "elapsed_time": "3:23:45", "remaining_time": "1:01:25", "throughput": 8669.72, "total_tokens": 105987984} +{"current_steps": 157260, "total_steps": 204665, "loss": 0.0, "lr": 3.094404860895543e-07, "epoch": 3.8418879632570295, "percentage": 76.84, "elapsed_time": "3:23:45", "remaining_time": "1:01:25", "throughput": 8669.75, "total_tokens": 105991376} +{"current_steps": 157265, "total_steps": 204665, "loss": 0.0001, "lr": 3.0937880939578123e-07, "epoch": 3.842010114088877, "percentage": 76.84, "elapsed_time": "3:23:45", "remaining_time": "1:01:24", "throughput": 8669.78, "total_tokens": 105994640} +{"current_steps": 157270, "total_steps": 204665, "loss": 0.0, "lr": 3.0931713772439106e-07, "epoch": 3.842132264920724, "percentage": 76.84, "elapsed_time": "3:23:46", "remaining_time": "1:01:24", "throughput": 8669.78, "total_tokens": 105997648} +{"current_steps": 157275, "total_steps": 204665, "loss": 0.0, "lr": 3.0925547107583225e-07, "epoch": 3.8422544157525715, "percentage": 76.85, "elapsed_time": "3:23:46", "remaining_time": "1:01:24", "throughput": 8669.8, "total_tokens": 106000848} +{"current_steps": 157280, "total_steps": 204665, "loss": 0.0, "lr": 3.0919380945055374e-07, "epoch": 3.8423765665844183, "percentage": 76.85, "elapsed_time": "3:23:46", "remaining_time": "1:01:23", "throughput": 8669.86, "total_tokens": 106004624} +{"current_steps": 157285, "total_steps": 204665, "loss": 0.0, "lr": 3.091321528490033e-07, "epoch": 3.8424987174162655, "percentage": 76.85, "elapsed_time": "3:23:47", "remaining_time": "1:01:23", "throughput": 8669.86, "total_tokens": 106007568} +{"current_steps": 157290, "total_steps": 204665, "loss": 0.0, "lr": 3.090705012716297e-07, "epoch": 3.8426208682481127, "percentage": 76.85, "elapsed_time": "3:23:47", "remaining_time": "1:01:22", "throughput": 8669.89, "total_tokens": 106010960} +{"current_steps": 157295, "total_steps": 204665, "loss": 0.0, "lr": 3.090088547188815e-07, "epoch": 3.84274301907996, "percentage": 76.85, "elapsed_time": "3:23:47", "remaining_time": "1:01:22", "throughput": 8669.91, "total_tokens": 106014224} +{"current_steps": 157300, "total_steps": 204665, "loss": 0.0, "lr": 3.0894721319120654e-07, "epoch": 3.842865169911807, "percentage": 76.86, "elapsed_time": "3:23:48", "remaining_time": "1:01:22", "throughput": 8669.93, "total_tokens": 106017424} +{"current_steps": 157305, "total_steps": 204665, "loss": 0.0004, "lr": 3.088855766890536e-07, "epoch": 3.8429873207436542, "percentage": 76.86, "elapsed_time": "3:23:48", "remaining_time": "1:01:21", "throughput": 8669.96, "total_tokens": 106020816} +{"current_steps": 157310, "total_steps": 204665, "loss": 0.0002, "lr": 3.0882394521287023e-07, "epoch": 3.8431094715755014, "percentage": 76.86, "elapsed_time": "3:23:48", "remaining_time": "1:01:21", "throughput": 8669.99, "total_tokens": 106024208} +{"current_steps": 157315, "total_steps": 204665, "loss": 0.0466, "lr": 3.0876231876310544e-07, "epoch": 3.8432316224073486, "percentage": 76.86, "elapsed_time": "3:23:49", "remaining_time": "1:01:20", "throughput": 8670.0, "total_tokens": 106027344} +{"current_steps": 157320, "total_steps": 204665, "loss": 0.0625, "lr": 3.0870069734020665e-07, "epoch": 3.843353773239196, "percentage": 76.87, "elapsed_time": "3:23:49", "remaining_time": "1:01:20", "throughput": 8670.02, "total_tokens": 106030480} +{"current_steps": 157325, "total_steps": 204665, "loss": 0.0002, "lr": 3.086390809446223e-07, "epoch": 3.843475924071043, "percentage": 76.87, "elapsed_time": "3:23:49", "remaining_time": "1:01:20", "throughput": 8670.05, "total_tokens": 106033872} +{"current_steps": 157330, "total_steps": 204665, "loss": 0.0, "lr": 3.0857746957680096e-07, "epoch": 3.84359807490289, "percentage": 76.87, "elapsed_time": "3:23:50", "remaining_time": "1:01:19", "throughput": 8670.08, "total_tokens": 106037328} +{"current_steps": 157335, "total_steps": 204665, "loss": 0.0, "lr": 3.085158632371898e-07, "epoch": 3.8437202257347374, "percentage": 76.87, "elapsed_time": "3:23:50", "remaining_time": "1:01:19", "throughput": 8670.1, "total_tokens": 106040464} +{"current_steps": 157340, "total_steps": 204665, "loss": 0.0, "lr": 3.084542619262376e-07, "epoch": 3.8438423765665846, "percentage": 76.88, "elapsed_time": "3:23:50", "remaining_time": "1:01:18", "throughput": 8670.12, "total_tokens": 106043792} +{"current_steps": 157345, "total_steps": 204665, "loss": 0.0272, "lr": 3.083926656443917e-07, "epoch": 3.8439645273984313, "percentage": 76.88, "elapsed_time": "3:23:51", "remaining_time": "1:01:18", "throughput": 8670.16, "total_tokens": 106047184} +{"current_steps": 157350, "total_steps": 204665, "loss": 0.0, "lr": 3.083310743921006e-07, "epoch": 3.844086678230279, "percentage": 76.88, "elapsed_time": "3:23:51", "remaining_time": "1:01:18", "throughput": 8670.19, "total_tokens": 106050640} +{"current_steps": 157355, "total_steps": 204665, "loss": 0.0002, "lr": 3.082694881698118e-07, "epoch": 3.8442088290621257, "percentage": 76.88, "elapsed_time": "3:23:51", "remaining_time": "1:01:17", "throughput": 8670.25, "total_tokens": 106054416} +{"current_steps": 157360, "total_steps": 204665, "loss": 0.0, "lr": 3.082079069779735e-07, "epoch": 3.8443309798939733, "percentage": 76.89, "elapsed_time": "3:23:52", "remaining_time": "1:01:17", "throughput": 8670.28, "total_tokens": 106057808} +{"current_steps": 157365, "total_steps": 204665, "loss": 0.0, "lr": 3.081463308170331e-07, "epoch": 3.84445313072582, "percentage": 76.89, "elapsed_time": "3:23:52", "remaining_time": "1:01:16", "throughput": 8670.3, "total_tokens": 106061136} +{"current_steps": 157370, "total_steps": 204665, "loss": 0.0, "lr": 3.0808475968743907e-07, "epoch": 3.8445752815576673, "percentage": 76.89, "elapsed_time": "3:23:53", "remaining_time": "1:01:16", "throughput": 8670.32, "total_tokens": 106064272} +{"current_steps": 157375, "total_steps": 204665, "loss": 0.0001, "lr": 3.080231935896387e-07, "epoch": 3.8446974323895144, "percentage": 76.89, "elapsed_time": "3:23:53", "remaining_time": "1:01:16", "throughput": 8670.35, "total_tokens": 106067792} +{"current_steps": 157380, "total_steps": 204665, "loss": 0.0924, "lr": 3.0796163252407946e-07, "epoch": 3.8448195832213616, "percentage": 76.9, "elapsed_time": "3:23:53", "remaining_time": "1:01:15", "throughput": 8670.37, "total_tokens": 106070928} +{"current_steps": 157385, "total_steps": 204665, "loss": 0.0001, "lr": 3.079000764912093e-07, "epoch": 3.844941734053209, "percentage": 76.9, "elapsed_time": "3:23:54", "remaining_time": "1:01:15", "throughput": 8670.38, "total_tokens": 106074064} +{"current_steps": 157390, "total_steps": 204665, "loss": 0.0155, "lr": 3.078385254914764e-07, "epoch": 3.845063884885056, "percentage": 76.9, "elapsed_time": "3:23:54", "remaining_time": "1:01:14", "throughput": 8670.43, "total_tokens": 106077776} +{"current_steps": 157395, "total_steps": 204665, "loss": 0.0001, "lr": 3.077769795253276e-07, "epoch": 3.845186035716903, "percentage": 76.9, "elapsed_time": "3:23:54", "remaining_time": "1:01:14", "throughput": 8670.48, "total_tokens": 106081360} +{"current_steps": 157400, "total_steps": 204665, "loss": 0.0, "lr": 3.077154385932109e-07, "epoch": 3.8453081865487504, "percentage": 76.91, "elapsed_time": "3:23:55", "remaining_time": "1:01:14", "throughput": 8670.48, "total_tokens": 106084368} +{"current_steps": 157405, "total_steps": 204665, "loss": 0.0, "lr": 3.0765390269557356e-07, "epoch": 3.8454303373805976, "percentage": 76.91, "elapsed_time": "3:23:55", "remaining_time": "1:01:13", "throughput": 8670.52, "total_tokens": 106087824} +{"current_steps": 157410, "total_steps": 204665, "loss": 0.0, "lr": 3.0759237183286356e-07, "epoch": 3.8455524882124448, "percentage": 76.91, "elapsed_time": "3:23:55", "remaining_time": "1:01:13", "throughput": 8670.54, "total_tokens": 106091088} +{"current_steps": 157415, "total_steps": 204665, "loss": 0.0002, "lr": 3.075308460055278e-07, "epoch": 3.845674639044292, "percentage": 76.91, "elapsed_time": "3:23:56", "remaining_time": "1:01:12", "throughput": 8670.56, "total_tokens": 106094224} +{"current_steps": 157420, "total_steps": 204665, "loss": 0.0, "lr": 3.074693252140139e-07, "epoch": 3.845796789876139, "percentage": 76.92, "elapsed_time": "3:23:56", "remaining_time": "1:01:12", "throughput": 8670.58, "total_tokens": 106097424} +{"current_steps": 157425, "total_steps": 204665, "loss": 0.0002, "lr": 3.0740780945876963e-07, "epoch": 3.8459189407079863, "percentage": 76.92, "elapsed_time": "3:23:56", "remaining_time": "1:01:12", "throughput": 8670.61, "total_tokens": 106100816} +{"current_steps": 157430, "total_steps": 204665, "loss": 0.0202, "lr": 3.0734629874024187e-07, "epoch": 3.8460410915398335, "percentage": 76.92, "elapsed_time": "3:23:57", "remaining_time": "1:01:11", "throughput": 8670.63, "total_tokens": 106104080} +{"current_steps": 157435, "total_steps": 204665, "loss": 0.0, "lr": 3.072847930588783e-07, "epoch": 3.8461632423716807, "percentage": 76.92, "elapsed_time": "3:23:57", "remaining_time": "1:01:11", "throughput": 8670.67, "total_tokens": 106107664} +{"current_steps": 157440, "total_steps": 204665, "loss": 0.0, "lr": 3.072232924151258e-07, "epoch": 3.8462853932035275, "percentage": 76.93, "elapsed_time": "3:23:57", "remaining_time": "1:01:10", "throughput": 8670.75, "total_tokens": 106111760} +{"current_steps": 157445, "total_steps": 204665, "loss": 0.0, "lr": 3.071617968094319e-07, "epoch": 3.846407544035375, "percentage": 76.93, "elapsed_time": "3:23:58", "remaining_time": "1:01:10", "throughput": 8670.75, "total_tokens": 106114704} +{"current_steps": 157450, "total_steps": 204665, "loss": 0.0, "lr": 3.0710030624224405e-07, "epoch": 3.846529694867222, "percentage": 76.93, "elapsed_time": "3:23:58", "remaining_time": "1:01:10", "throughput": 8670.76, "total_tokens": 106117840} +{"current_steps": 157455, "total_steps": 204665, "loss": 0.0001, "lr": 3.070388207140088e-07, "epoch": 3.8466518456990695, "percentage": 76.93, "elapsed_time": "3:23:58", "remaining_time": "1:01:09", "throughput": 8670.77, "total_tokens": 106120912} +{"current_steps": 157460, "total_steps": 204665, "loss": 0.0, "lr": 3.0697734022517386e-07, "epoch": 3.8467739965309162, "percentage": 76.94, "elapsed_time": "3:23:59", "remaining_time": "1:01:09", "throughput": 8670.83, "total_tokens": 106124624} +{"current_steps": 157465, "total_steps": 204665, "loss": 0.0, "lr": 3.0691586477618614e-07, "epoch": 3.8468961473627634, "percentage": 76.94, "elapsed_time": "3:23:59", "remaining_time": "1:01:08", "throughput": 8670.89, "total_tokens": 106128400} +{"current_steps": 157470, "total_steps": 204665, "loss": 0.0614, "lr": 3.0685439436749237e-07, "epoch": 3.8470182981946106, "percentage": 76.94, "elapsed_time": "3:23:59", "remaining_time": "1:01:08", "throughput": 8670.92, "total_tokens": 106131792} +{"current_steps": 157475, "total_steps": 204665, "loss": 0.0, "lr": 3.067929289995402e-07, "epoch": 3.847140449026458, "percentage": 76.94, "elapsed_time": "3:24:00", "remaining_time": "1:01:08", "throughput": 8670.99, "total_tokens": 106135824} +{"current_steps": 157480, "total_steps": 204665, "loss": 0.0, "lr": 3.067314686727761e-07, "epoch": 3.847262599858305, "percentage": 76.95, "elapsed_time": "3:24:00", "remaining_time": "1:01:07", "throughput": 8671.01, "total_tokens": 106139152} +{"current_steps": 157485, "total_steps": 204665, "loss": 0.0, "lr": 3.066700133876474e-07, "epoch": 3.847384750690152, "percentage": 76.95, "elapsed_time": "3:24:01", "remaining_time": "1:01:07", "throughput": 8671.01, "total_tokens": 106142032} +{"current_steps": 157490, "total_steps": 204665, "loss": 0.0001, "lr": 3.066085631446006e-07, "epoch": 3.8475069015219994, "percentage": 76.95, "elapsed_time": "3:24:01", "remaining_time": "1:01:06", "throughput": 8671.05, "total_tokens": 106145616} +{"current_steps": 157495, "total_steps": 204665, "loss": 0.0, "lr": 3.0654711794408304e-07, "epoch": 3.8476290523538466, "percentage": 76.95, "elapsed_time": "3:24:01", "remaining_time": "1:01:06", "throughput": 8671.08, "total_tokens": 106148944} +{"current_steps": 157500, "total_steps": 204665, "loss": 0.0002, "lr": 3.064856777865412e-07, "epoch": 3.8477512031856937, "percentage": 76.96, "elapsed_time": "3:24:02", "remaining_time": "1:01:06", "throughput": 8671.12, "total_tokens": 106152528} +{"current_steps": 157505, "total_steps": 204665, "loss": 0.0002, "lr": 3.064242426724223e-07, "epoch": 3.847873354017541, "percentage": 76.96, "elapsed_time": "3:24:02", "remaining_time": "1:01:05", "throughput": 8671.16, "total_tokens": 106156048} +{"current_steps": 157510, "total_steps": 204665, "loss": 0.0, "lr": 3.0636281260217243e-07, "epoch": 3.847995504849388, "percentage": 76.96, "elapsed_time": "3:24:02", "remaining_time": "1:01:05", "throughput": 8671.18, "total_tokens": 106159184} +{"current_steps": 157515, "total_steps": 204665, "loss": 0.0433, "lr": 3.0630138757623893e-07, "epoch": 3.8481176556812353, "percentage": 76.96, "elapsed_time": "3:24:03", "remaining_time": "1:01:04", "throughput": 8671.22, "total_tokens": 106162832} +{"current_steps": 157520, "total_steps": 204665, "loss": 0.0, "lr": 3.0623996759506855e-07, "epoch": 3.8482398065130825, "percentage": 76.96, "elapsed_time": "3:24:03", "remaining_time": "1:01:04", "throughput": 8671.25, "total_tokens": 106166096} +{"current_steps": 157525, "total_steps": 204665, "loss": 0.1025, "lr": 3.061785526591073e-07, "epoch": 3.8483619573449293, "percentage": 76.97, "elapsed_time": "3:24:03", "remaining_time": "1:01:04", "throughput": 8671.28, "total_tokens": 106169552} +{"current_steps": 157530, "total_steps": 204665, "loss": 0.0, "lr": 3.0611714276880273e-07, "epoch": 3.848484108176777, "percentage": 76.97, "elapsed_time": "3:24:04", "remaining_time": "1:01:03", "throughput": 8671.32, "total_tokens": 106173008} +{"current_steps": 157535, "total_steps": 204665, "loss": 0.0, "lr": 3.060557379246005e-07, "epoch": 3.8486062590086236, "percentage": 76.97, "elapsed_time": "3:24:04", "remaining_time": "1:01:03", "throughput": 8671.35, "total_tokens": 106176400} +{"current_steps": 157540, "total_steps": 204665, "loss": 0.0283, "lr": 3.0599433812694765e-07, "epoch": 3.8487284098404713, "percentage": 76.97, "elapsed_time": "3:24:04", "remaining_time": "1:01:02", "throughput": 8671.36, "total_tokens": 106179600} +{"current_steps": 157545, "total_steps": 204665, "loss": 0.0, "lr": 3.0593294337629097e-07, "epoch": 3.848850560672318, "percentage": 76.98, "elapsed_time": "3:24:05", "remaining_time": "1:01:02", "throughput": 8671.44, "total_tokens": 106183632} +{"current_steps": 157550, "total_steps": 204665, "loss": 0.0001, "lr": 3.0587155367307614e-07, "epoch": 3.848972711504165, "percentage": 76.98, "elapsed_time": "3:24:05", "remaining_time": "1:01:02", "throughput": 8671.46, "total_tokens": 106186960} +{"current_steps": 157555, "total_steps": 204665, "loss": 0.0713, "lr": 3.0581016901775057e-07, "epoch": 3.8490948623360124, "percentage": 76.98, "elapsed_time": "3:24:05", "remaining_time": "1:01:01", "throughput": 8671.49, "total_tokens": 106190288} +{"current_steps": 157560, "total_steps": 204665, "loss": 0.0002, "lr": 3.057487894107598e-07, "epoch": 3.8492170131678596, "percentage": 76.98, "elapsed_time": "3:24:06", "remaining_time": "1:01:01", "throughput": 8671.53, "total_tokens": 106193808} +{"current_steps": 157565, "total_steps": 204665, "loss": 0.0, "lr": 3.056874148525508e-07, "epoch": 3.8493391639997068, "percentage": 76.99, "elapsed_time": "3:24:06", "remaining_time": "1:01:00", "throughput": 8671.54, "total_tokens": 106196944} +{"current_steps": 157570, "total_steps": 204665, "loss": 0.0001, "lr": 3.056260453435694e-07, "epoch": 3.849461314831554, "percentage": 76.99, "elapsed_time": "3:24:06", "remaining_time": "1:01:00", "throughput": 8671.56, "total_tokens": 106200208} +{"current_steps": 157575, "total_steps": 204665, "loss": 0.0001, "lr": 3.0556468088426256e-07, "epoch": 3.849583465663401, "percentage": 76.99, "elapsed_time": "3:24:07", "remaining_time": "1:01:00", "throughput": 8671.6, "total_tokens": 106203728} +{"current_steps": 157580, "total_steps": 204665, "loss": 0.0, "lr": 3.0550332147507606e-07, "epoch": 3.8497056164952483, "percentage": 76.99, "elapsed_time": "3:24:07", "remaining_time": "1:00:59", "throughput": 8671.62, "total_tokens": 106206864} +{"current_steps": 157585, "total_steps": 204665, "loss": 0.0, "lr": 3.0544196711645586e-07, "epoch": 3.8498277673270955, "percentage": 77.0, "elapsed_time": "3:24:07", "remaining_time": "1:00:59", "throughput": 8671.64, "total_tokens": 106210128} +{"current_steps": 157590, "total_steps": 204665, "loss": 0.0001, "lr": 3.053806178088488e-07, "epoch": 3.8499499181589427, "percentage": 77.0, "elapsed_time": "3:24:08", "remaining_time": "1:00:58", "throughput": 8671.66, "total_tokens": 106213456} +{"current_steps": 157595, "total_steps": 204665, "loss": 0.0001, "lr": 3.053192735527005e-07, "epoch": 3.85007206899079, "percentage": 77.0, "elapsed_time": "3:24:08", "remaining_time": "1:00:58", "throughput": 8671.68, "total_tokens": 106216656} +{"current_steps": 157600, "total_steps": 204665, "loss": 0.0619, "lr": 3.052579343484573e-07, "epoch": 3.850194219822637, "percentage": 77.0, "elapsed_time": "3:24:09", "remaining_time": "1:00:57", "throughput": 8671.71, "total_tokens": 106220048} +{"current_steps": 157605, "total_steps": 204665, "loss": 0.0005, "lr": 3.0519660019656544e-07, "epoch": 3.8503163706544843, "percentage": 77.01, "elapsed_time": "3:24:09", "remaining_time": "1:00:57", "throughput": 8671.73, "total_tokens": 106223312} +{"current_steps": 157610, "total_steps": 204665, "loss": 0.0, "lr": 3.051352710974706e-07, "epoch": 3.8504385214863315, "percentage": 77.01, "elapsed_time": "3:24:09", "remaining_time": "1:00:57", "throughput": 8671.75, "total_tokens": 106226448} +{"current_steps": 157615, "total_steps": 204665, "loss": 0.0, "lr": 3.0507394705161913e-07, "epoch": 3.8505606723181787, "percentage": 77.01, "elapsed_time": "3:24:10", "remaining_time": "1:00:56", "throughput": 8671.78, "total_tokens": 106229776} +{"current_steps": 157620, "total_steps": 204665, "loss": 0.0, "lr": 3.050126280594567e-07, "epoch": 3.8506828231500254, "percentage": 77.01, "elapsed_time": "3:24:10", "remaining_time": "1:00:56", "throughput": 8671.79, "total_tokens": 106232976} +{"current_steps": 157625, "total_steps": 204665, "loss": 0.0, "lr": 3.0495131412142963e-07, "epoch": 3.850804973981873, "percentage": 77.02, "elapsed_time": "3:24:10", "remaining_time": "1:00:55", "throughput": 8671.84, "total_tokens": 106236624} +{"current_steps": 157630, "total_steps": 204665, "loss": 0.1139, "lr": 3.0489000523798316e-07, "epoch": 3.85092712481372, "percentage": 77.02, "elapsed_time": "3:24:11", "remaining_time": "1:00:55", "throughput": 8671.91, "total_tokens": 106240528} +{"current_steps": 157635, "total_steps": 204665, "loss": 0.0, "lr": 3.048287014095635e-07, "epoch": 3.851049275645567, "percentage": 77.02, "elapsed_time": "3:24:11", "remaining_time": "1:00:55", "throughput": 8671.96, "total_tokens": 106244304} +{"current_steps": 157640, "total_steps": 204665, "loss": 0.0, "lr": 3.0476740263661693e-07, "epoch": 3.851171426477414, "percentage": 77.02, "elapsed_time": "3:24:11", "remaining_time": "1:00:54", "throughput": 8672.03, "total_tokens": 106248272} +{"current_steps": 157645, "total_steps": 204665, "loss": 0.0001, "lr": 3.0470610891958836e-07, "epoch": 3.8512935773092614, "percentage": 77.03, "elapsed_time": "3:24:12", "remaining_time": "1:00:54", "throughput": 8672.06, "total_tokens": 106251600} +{"current_steps": 157650, "total_steps": 204665, "loss": 0.0, "lr": 3.0464482025892444e-07, "epoch": 3.8514157281411086, "percentage": 77.03, "elapsed_time": "3:24:12", "remaining_time": "1:00:53", "throughput": 8672.08, "total_tokens": 106254864} +{"current_steps": 157655, "total_steps": 204665, "loss": 0.0, "lr": 3.0458353665507e-07, "epoch": 3.8515378789729557, "percentage": 77.03, "elapsed_time": "3:24:12", "remaining_time": "1:00:53", "throughput": 8672.1, "total_tokens": 106258064} +{"current_steps": 157660, "total_steps": 204665, "loss": 0.0, "lr": 3.0452225810847153e-07, "epoch": 3.851660029804803, "percentage": 77.03, "elapsed_time": "3:24:13", "remaining_time": "1:00:53", "throughput": 8672.13, "total_tokens": 106261584} +{"current_steps": 157665, "total_steps": 204665, "loss": 0.0001, "lr": 3.0446098461957383e-07, "epoch": 3.85178218063665, "percentage": 77.04, "elapsed_time": "3:24:13", "remaining_time": "1:00:52", "throughput": 8672.18, "total_tokens": 106265168} +{"current_steps": 157670, "total_steps": 204665, "loss": 0.0, "lr": 3.043997161888233e-07, "epoch": 3.8519043314684973, "percentage": 77.04, "elapsed_time": "3:24:13", "remaining_time": "1:00:52", "throughput": 8672.23, "total_tokens": 106268880} +{"current_steps": 157675, "total_steps": 204665, "loss": 0.0, "lr": 3.0433845281666484e-07, "epoch": 3.8520264823003445, "percentage": 77.04, "elapsed_time": "3:24:14", "remaining_time": "1:00:51", "throughput": 8672.23, "total_tokens": 106271824} +{"current_steps": 157680, "total_steps": 204665, "loss": 0.0, "lr": 3.0427719450354463e-07, "epoch": 3.8521486331321917, "percentage": 77.04, "elapsed_time": "3:24:14", "remaining_time": "1:00:51", "throughput": 8672.28, "total_tokens": 106275472} +{"current_steps": 157685, "total_steps": 204665, "loss": 0.0001, "lr": 3.042159412499077e-07, "epoch": 3.852270783964039, "percentage": 77.05, "elapsed_time": "3:24:14", "remaining_time": "1:00:51", "throughput": 8672.31, "total_tokens": 106278992} +{"current_steps": 157690, "total_steps": 204665, "loss": 0.0001, "lr": 3.041546930561992e-07, "epoch": 3.852392934795886, "percentage": 77.05, "elapsed_time": "3:24:15", "remaining_time": "1:00:50", "throughput": 8672.34, "total_tokens": 106282320} +{"current_steps": 157695, "total_steps": 204665, "loss": 0.0, "lr": 3.04093449922865e-07, "epoch": 3.8525150856277333, "percentage": 77.05, "elapsed_time": "3:24:15", "remaining_time": "1:00:50", "throughput": 8672.38, "total_tokens": 106285904} +{"current_steps": 157700, "total_steps": 204665, "loss": 0.0, "lr": 3.0403221185035075e-07, "epoch": 3.8526372364595804, "percentage": 77.05, "elapsed_time": "3:24:16", "remaining_time": "1:00:49", "throughput": 8672.39, "total_tokens": 106288912} +{"current_steps": 157705, "total_steps": 204665, "loss": 0.0, "lr": 3.0397097883910116e-07, "epoch": 3.852759387291427, "percentage": 77.06, "elapsed_time": "3:24:16", "remaining_time": "1:00:49", "throughput": 8672.38, "total_tokens": 106291664} +{"current_steps": 157710, "total_steps": 204665, "loss": 0.0, "lr": 3.0390975088956207e-07, "epoch": 3.852881538123275, "percentage": 77.06, "elapsed_time": "3:24:16", "remaining_time": "1:00:49", "throughput": 8672.4, "total_tokens": 106294928} +{"current_steps": 157715, "total_steps": 204665, "loss": 0.0, "lr": 3.038485280021783e-07, "epoch": 3.8530036889551216, "percentage": 77.06, "elapsed_time": "3:24:17", "remaining_time": "1:00:48", "throughput": 8672.44, "total_tokens": 106298448} +{"current_steps": 157720, "total_steps": 204665, "loss": 0.0, "lr": 3.0378731017739547e-07, "epoch": 3.853125839786969, "percentage": 77.06, "elapsed_time": "3:24:17", "remaining_time": "1:00:48", "throughput": 8672.48, "total_tokens": 106301968} +{"current_steps": 157725, "total_steps": 204665, "loss": 0.0, "lr": 3.0372609741565824e-07, "epoch": 3.853247990618816, "percentage": 77.06, "elapsed_time": "3:24:17", "remaining_time": "1:00:47", "throughput": 8672.51, "total_tokens": 106305296} +{"current_steps": 157730, "total_steps": 204665, "loss": 0.0, "lr": 3.0366488971741224e-07, "epoch": 3.853370141450663, "percentage": 77.07, "elapsed_time": "3:24:18", "remaining_time": "1:00:47", "throughput": 8672.51, "total_tokens": 106308304} +{"current_steps": 157735, "total_steps": 204665, "loss": 0.0002, "lr": 3.036036870831027e-07, "epoch": 3.8534922922825103, "percentage": 77.07, "elapsed_time": "3:24:18", "remaining_time": "1:00:47", "throughput": 8672.55, "total_tokens": 106311824} +{"current_steps": 157740, "total_steps": 204665, "loss": 0.022, "lr": 3.0354248951317407e-07, "epoch": 3.8536144431143575, "percentage": 77.07, "elapsed_time": "3:24:18", "remaining_time": "1:00:46", "throughput": 8672.58, "total_tokens": 106315216} +{"current_steps": 157745, "total_steps": 204665, "loss": 0.0, "lr": 3.034812970080721e-07, "epoch": 3.8537365939462047, "percentage": 77.07, "elapsed_time": "3:24:19", "remaining_time": "1:00:46", "throughput": 8672.59, "total_tokens": 106318288} +{"current_steps": 157750, "total_steps": 204665, "loss": 0.0, "lr": 3.034201095682413e-07, "epoch": 3.853858744778052, "percentage": 77.08, "elapsed_time": "3:24:19", "remaining_time": "1:00:45", "throughput": 8672.59, "total_tokens": 106321168} +{"current_steps": 157755, "total_steps": 204665, "loss": 0.0, "lr": 3.0335892719412704e-07, "epoch": 3.853980895609899, "percentage": 77.08, "elapsed_time": "3:24:19", "remaining_time": "1:00:45", "throughput": 8672.66, "total_tokens": 106325072} +{"current_steps": 157760, "total_steps": 204665, "loss": 0.0, "lr": 3.032977498861737e-07, "epoch": 3.8541030464417463, "percentage": 77.08, "elapsed_time": "3:24:20", "remaining_time": "1:00:45", "throughput": 8672.69, "total_tokens": 106328464} +{"current_steps": 157765, "total_steps": 204665, "loss": 0.0, "lr": 3.032365776448266e-07, "epoch": 3.8542251972735935, "percentage": 77.08, "elapsed_time": "3:24:20", "remaining_time": "1:00:44", "throughput": 8672.69, "total_tokens": 106331472} +{"current_steps": 157770, "total_steps": 204665, "loss": 0.0003, "lr": 3.0317541047053074e-07, "epoch": 3.8543473481054407, "percentage": 77.09, "elapsed_time": "3:24:20", "remaining_time": "1:00:44", "throughput": 8672.74, "total_tokens": 106335120} +{"current_steps": 157775, "total_steps": 204665, "loss": 0.0, "lr": 3.031142483637308e-07, "epoch": 3.854469498937288, "percentage": 77.09, "elapsed_time": "3:24:21", "remaining_time": "1:00:43", "throughput": 8672.75, "total_tokens": 106338320} +{"current_steps": 157780, "total_steps": 204665, "loss": 0.0, "lr": 3.030530913248711e-07, "epoch": 3.854591649769135, "percentage": 77.09, "elapsed_time": "3:24:21", "remaining_time": "1:00:43", "throughput": 8672.76, "total_tokens": 106341264} +{"current_steps": 157785, "total_steps": 204665, "loss": 0.0, "lr": 3.0299193935439714e-07, "epoch": 3.8547138006009822, "percentage": 77.09, "elapsed_time": "3:24:21", "remaining_time": "1:00:43", "throughput": 8672.75, "total_tokens": 106344080} +{"current_steps": 157790, "total_steps": 204665, "loss": 0.0, "lr": 3.0293079245275297e-07, "epoch": 3.8548359514328294, "percentage": 77.1, "elapsed_time": "3:24:22", "remaining_time": "1:00:42", "throughput": 8672.79, "total_tokens": 106347536} +{"current_steps": 157795, "total_steps": 204665, "loss": 0.0001, "lr": 3.0286965062038383e-07, "epoch": 3.8549581022646766, "percentage": 77.1, "elapsed_time": "3:24:22", "remaining_time": "1:00:42", "throughput": 8672.79, "total_tokens": 106350608} +{"current_steps": 157800, "total_steps": 204665, "loss": 0.0, "lr": 3.028085138577338e-07, "epoch": 3.8550802530965234, "percentage": 77.1, "elapsed_time": "3:24:22", "remaining_time": "1:00:41", "throughput": 8672.8, "total_tokens": 106353616} +{"current_steps": 157805, "total_steps": 204665, "loss": 0.0, "lr": 3.027473821652481e-07, "epoch": 3.855202403928371, "percentage": 77.1, "elapsed_time": "3:24:23", "remaining_time": "1:00:41", "throughput": 8672.85, "total_tokens": 106357328} +{"current_steps": 157810, "total_steps": 204665, "loss": 0.0001, "lr": 3.0268625554337067e-07, "epoch": 3.8553245547602177, "percentage": 77.11, "elapsed_time": "3:24:23", "remaining_time": "1:00:41", "throughput": 8672.87, "total_tokens": 106360528} +{"current_steps": 157815, "total_steps": 204665, "loss": 0.0, "lr": 3.026251339925466e-07, "epoch": 3.855446705592065, "percentage": 77.11, "elapsed_time": "3:24:23", "remaining_time": "1:00:40", "throughput": 8672.91, "total_tokens": 106364176} +{"current_steps": 157820, "total_steps": 204665, "loss": 0.0, "lr": 3.025640175132199e-07, "epoch": 3.855568856423912, "percentage": 77.11, "elapsed_time": "3:24:24", "remaining_time": "1:00:40", "throughput": 8672.95, "total_tokens": 106367696} +{"current_steps": 157825, "total_steps": 204665, "loss": 0.0, "lr": 3.025029061058352e-07, "epoch": 3.8556910072557593, "percentage": 77.11, "elapsed_time": "3:24:24", "remaining_time": "1:00:39", "throughput": 8672.97, "total_tokens": 106370896} +{"current_steps": 157830, "total_steps": 204665, "loss": 0.0001, "lr": 3.0244179977083727e-07, "epoch": 3.8558131580876065, "percentage": 77.12, "elapsed_time": "3:24:24", "remaining_time": "1:00:39", "throughput": 8673.01, "total_tokens": 106374480} +{"current_steps": 157835, "total_steps": 204665, "loss": 0.0, "lr": 3.023806985086699e-07, "epoch": 3.8559353089194537, "percentage": 77.12, "elapsed_time": "3:24:25", "remaining_time": "1:00:39", "throughput": 8673.04, "total_tokens": 106377872} +{"current_steps": 157840, "total_steps": 204665, "loss": 0.0, "lr": 3.0231960231977803e-07, "epoch": 3.856057459751301, "percentage": 77.12, "elapsed_time": "3:24:25", "remaining_time": "1:00:38", "throughput": 8673.09, "total_tokens": 106381520} +{"current_steps": 157845, "total_steps": 204665, "loss": 0.0001, "lr": 3.022585112046053e-07, "epoch": 3.856179610583148, "percentage": 77.12, "elapsed_time": "3:24:26", "remaining_time": "1:00:38", "throughput": 8673.09, "total_tokens": 106384464} +{"current_steps": 157850, "total_steps": 204665, "loss": 0.0001, "lr": 3.021974251635965e-07, "epoch": 3.8563017614149953, "percentage": 77.13, "elapsed_time": "3:24:26", "remaining_time": "1:00:37", "throughput": 8673.14, "total_tokens": 106388112} +{"current_steps": 157855, "total_steps": 204665, "loss": 0.0001, "lr": 3.021363441971959e-07, "epoch": 3.8564239122468424, "percentage": 77.13, "elapsed_time": "3:24:26", "remaining_time": "1:00:37", "throughput": 8673.16, "total_tokens": 106391312} +{"current_steps": 157860, "total_steps": 204665, "loss": 0.0, "lr": 3.020752683058473e-07, "epoch": 3.8565460630786896, "percentage": 77.13, "elapsed_time": "3:24:27", "remaining_time": "1:00:37", "throughput": 8673.2, "total_tokens": 106394832} +{"current_steps": 157865, "total_steps": 204665, "loss": 0.0, "lr": 3.0201419748999524e-07, "epoch": 3.856668213910537, "percentage": 77.13, "elapsed_time": "3:24:27", "remaining_time": "1:00:36", "throughput": 8673.22, "total_tokens": 106398160} +{"current_steps": 157870, "total_steps": 204665, "loss": 0.0, "lr": 3.019531317500834e-07, "epoch": 3.856790364742384, "percentage": 77.14, "elapsed_time": "3:24:27", "remaining_time": "1:00:36", "throughput": 8673.25, "total_tokens": 106401488} +{"current_steps": 157875, "total_steps": 204665, "loss": 0.0, "lr": 3.0189207108655656e-07, "epoch": 3.856912515574231, "percentage": 77.14, "elapsed_time": "3:24:28", "remaining_time": "1:00:35", "throughput": 8673.28, "total_tokens": 106404816} +{"current_steps": 157880, "total_steps": 204665, "loss": 0.0465, "lr": 3.018310154998579e-07, "epoch": 3.8570346664060784, "percentage": 77.14, "elapsed_time": "3:24:28", "remaining_time": "1:00:35", "throughput": 8673.3, "total_tokens": 106408080} +{"current_steps": 157885, "total_steps": 204665, "loss": 0.0, "lr": 3.017699649904323e-07, "epoch": 3.857156817237925, "percentage": 77.14, "elapsed_time": "3:24:28", "remaining_time": "1:00:35", "throughput": 8673.34, "total_tokens": 106411664} +{"current_steps": 157890, "total_steps": 204665, "loss": 0.0, "lr": 3.017089195587232e-07, "epoch": 3.8572789680697728, "percentage": 77.15, "elapsed_time": "3:24:29", "remaining_time": "1:00:34", "throughput": 8673.36, "total_tokens": 106414800} +{"current_steps": 157895, "total_steps": 204665, "loss": 0.0001, "lr": 3.0164787920517445e-07, "epoch": 3.8574011189016195, "percentage": 77.15, "elapsed_time": "3:24:29", "remaining_time": "1:00:34", "throughput": 8673.41, "total_tokens": 106418448} +{"current_steps": 157900, "total_steps": 204665, "loss": 0.0002, "lr": 3.0158684393023035e-07, "epoch": 3.857523269733467, "percentage": 77.15, "elapsed_time": "3:24:29", "remaining_time": "1:00:33", "throughput": 8673.44, "total_tokens": 106421904} +{"current_steps": 157905, "total_steps": 204665, "loss": 0.0, "lr": 3.015258137343344e-07, "epoch": 3.857645420565314, "percentage": 77.15, "elapsed_time": "3:24:30", "remaining_time": "1:00:33", "throughput": 8673.46, "total_tokens": 106425104} +{"current_steps": 157910, "total_steps": 204665, "loss": 0.0, "lr": 3.0146478861793076e-07, "epoch": 3.857767571397161, "percentage": 77.16, "elapsed_time": "3:24:30", "remaining_time": "1:00:33", "throughput": 8673.5, "total_tokens": 106428752} +{"current_steps": 157915, "total_steps": 204665, "loss": 0.0, "lr": 3.0140376858146286e-07, "epoch": 3.8578897222290083, "percentage": 77.16, "elapsed_time": "3:24:30", "remaining_time": "1:00:32", "throughput": 8673.53, "total_tokens": 106432016} +{"current_steps": 157920, "total_steps": 204665, "loss": 0.0, "lr": 3.0134275362537465e-07, "epoch": 3.8580118730608555, "percentage": 77.16, "elapsed_time": "3:24:31", "remaining_time": "1:00:32", "throughput": 8673.53, "total_tokens": 106435024} +{"current_steps": 157925, "total_steps": 204665, "loss": 0.0006, "lr": 3.012817437501102e-07, "epoch": 3.8581340238927027, "percentage": 77.16, "elapsed_time": "3:24:31", "remaining_time": "1:00:31", "throughput": 8673.57, "total_tokens": 106438544} +{"current_steps": 157930, "total_steps": 204665, "loss": 0.0468, "lr": 3.0122073895611244e-07, "epoch": 3.85825617472455, "percentage": 77.17, "elapsed_time": "3:24:31", "remaining_time": "1:00:31", "throughput": 8673.59, "total_tokens": 106441616} +{"current_steps": 157935, "total_steps": 204665, "loss": 0.0, "lr": 3.011597392438258e-07, "epoch": 3.858378325556397, "percentage": 77.17, "elapsed_time": "3:24:32", "remaining_time": "1:00:31", "throughput": 8673.6, "total_tokens": 106444752} +{"current_steps": 157940, "total_steps": 204665, "loss": 0.0835, "lr": 3.010987446136931e-07, "epoch": 3.8585004763882442, "percentage": 77.17, "elapsed_time": "3:24:32", "remaining_time": "1:00:30", "throughput": 8673.63, "total_tokens": 106448208} +{"current_steps": 157945, "total_steps": 204665, "loss": 0.0, "lr": 3.0103775506615837e-07, "epoch": 3.8586226272200914, "percentage": 77.17, "elapsed_time": "3:24:32", "remaining_time": "1:00:30", "throughput": 8673.7, "total_tokens": 106452176} +{"current_steps": 157950, "total_steps": 204665, "loss": 0.0, "lr": 3.0097677060166536e-07, "epoch": 3.8587447780519386, "percentage": 77.17, "elapsed_time": "3:24:33", "remaining_time": "1:00:29", "throughput": 8673.75, "total_tokens": 106455952} +{"current_steps": 157955, "total_steps": 204665, "loss": 0.0, "lr": 3.00915791220657e-07, "epoch": 3.858866928883786, "percentage": 77.18, "elapsed_time": "3:24:33", "remaining_time": "1:00:29", "throughput": 8673.77, "total_tokens": 106459152} +{"current_steps": 157960, "total_steps": 204665, "loss": 0.0, "lr": 3.008548169235774e-07, "epoch": 3.858989079715633, "percentage": 77.18, "elapsed_time": "3:24:34", "remaining_time": "1:00:29", "throughput": 8673.82, "total_tokens": 106462800} +{"current_steps": 157965, "total_steps": 204665, "loss": 0.0, "lr": 3.0079384771086924e-07, "epoch": 3.85911123054748, "percentage": 77.18, "elapsed_time": "3:24:34", "remaining_time": "1:00:28", "throughput": 8673.86, "total_tokens": 106466320} +{"current_steps": 157970, "total_steps": 204665, "loss": 0.0, "lr": 3.0073288358297656e-07, "epoch": 3.859233381379327, "percentage": 77.18, "elapsed_time": "3:24:34", "remaining_time": "1:00:28", "throughput": 8673.88, "total_tokens": 106469456} +{"current_steps": 157975, "total_steps": 204665, "loss": 0.0, "lr": 3.0067192454034217e-07, "epoch": 3.8593555322111746, "percentage": 77.19, "elapsed_time": "3:24:35", "remaining_time": "1:00:27", "throughput": 8673.89, "total_tokens": 106472656} +{"current_steps": 157980, "total_steps": 204665, "loss": 0.0, "lr": 3.0061097058341e-07, "epoch": 3.8594776830430213, "percentage": 77.19, "elapsed_time": "3:24:35", "remaining_time": "1:00:27", "throughput": 8673.94, "total_tokens": 106476240} +{"current_steps": 157985, "total_steps": 204665, "loss": 0.0003, "lr": 3.005500217126226e-07, "epoch": 3.859599833874869, "percentage": 77.19, "elapsed_time": "3:24:35", "remaining_time": "1:00:27", "throughput": 8673.97, "total_tokens": 106479696} +{"current_steps": 157990, "total_steps": 204665, "loss": 0.0796, "lr": 3.004890779284239e-07, "epoch": 3.8597219847067157, "percentage": 77.19, "elapsed_time": "3:24:36", "remaining_time": "1:00:26", "throughput": 8674.01, "total_tokens": 106483216} +{"current_steps": 157995, "total_steps": 204665, "loss": 0.0003, "lr": 3.0042813923125675e-07, "epoch": 3.859844135538563, "percentage": 77.2, "elapsed_time": "3:24:36", "remaining_time": "1:00:26", "throughput": 8674.05, "total_tokens": 106486864} +{"current_steps": 158000, "total_steps": 204665, "loss": 0.0, "lr": 3.0036720562156406e-07, "epoch": 3.85996628637041, "percentage": 77.2, "elapsed_time": "3:24:36", "remaining_time": "1:00:25", "throughput": 8674.1, "total_tokens": 106490512} +{"current_steps": 158005, "total_steps": 204665, "loss": 0.0, "lr": 3.003062770997892e-07, "epoch": 3.8600884372022572, "percentage": 77.2, "elapsed_time": "3:24:37", "remaining_time": "1:00:25", "throughput": 8674.14, "total_tokens": 106494032} +{"current_steps": 158010, "total_steps": 204665, "loss": 0.0385, "lr": 3.002453536663756e-07, "epoch": 3.8602105880341044, "percentage": 77.2, "elapsed_time": "3:24:37", "remaining_time": "1:00:25", "throughput": 8674.18, "total_tokens": 106497552} +{"current_steps": 158015, "total_steps": 204665, "loss": 0.0, "lr": 3.001844353217657e-07, "epoch": 3.8603327388659516, "percentage": 77.21, "elapsed_time": "3:24:37", "remaining_time": "1:00:24", "throughput": 8674.22, "total_tokens": 106501136} +{"current_steps": 158020, "total_steps": 204665, "loss": 0.0, "lr": 3.0012352206640313e-07, "epoch": 3.860454889697799, "percentage": 77.21, "elapsed_time": "3:24:38", "remaining_time": "1:00:24", "throughput": 8674.22, "total_tokens": 106504080} +{"current_steps": 158025, "total_steps": 204665, "loss": 0.0, "lr": 3.000626139007302e-07, "epoch": 3.860577040529646, "percentage": 77.21, "elapsed_time": "3:24:38", "remaining_time": "1:00:23", "throughput": 8674.24, "total_tokens": 106507280} +{"current_steps": 158030, "total_steps": 204665, "loss": 0.0, "lr": 3.0000171082519056e-07, "epoch": 3.860699191361493, "percentage": 77.21, "elapsed_time": "3:24:38", "remaining_time": "1:00:23", "throughput": 8674.25, "total_tokens": 106510288} +{"current_steps": 158035, "total_steps": 204665, "loss": 0.0, "lr": 2.999408128402264e-07, "epoch": 3.8608213421933404, "percentage": 77.22, "elapsed_time": "3:24:39", "remaining_time": "1:00:23", "throughput": 8674.29, "total_tokens": 106513808} +{"current_steps": 158040, "total_steps": 204665, "loss": 0.0, "lr": 2.9987991994628094e-07, "epoch": 3.8609434930251876, "percentage": 77.22, "elapsed_time": "3:24:39", "remaining_time": "1:00:22", "throughput": 8674.31, "total_tokens": 106517008} +{"current_steps": 158045, "total_steps": 204665, "loss": 0.0, "lr": 2.998190321437973e-07, "epoch": 3.8610656438570348, "percentage": 77.22, "elapsed_time": "3:24:39", "remaining_time": "1:00:22", "throughput": 8674.32, "total_tokens": 106520144} +{"current_steps": 158050, "total_steps": 204665, "loss": 0.0436, "lr": 2.9975814943321774e-07, "epoch": 3.861187794688882, "percentage": 77.22, "elapsed_time": "3:24:40", "remaining_time": "1:00:21", "throughput": 8674.33, "total_tokens": 106523152} +{"current_steps": 158055, "total_steps": 204665, "loss": 0.0, "lr": 2.9969727181498563e-07, "epoch": 3.861309945520729, "percentage": 77.23, "elapsed_time": "3:24:40", "remaining_time": "1:00:21", "throughput": 8674.37, "total_tokens": 106526736} +{"current_steps": 158060, "total_steps": 204665, "loss": 0.0, "lr": 2.996363992895429e-07, "epoch": 3.8614320963525763, "percentage": 77.23, "elapsed_time": "3:24:40", "remaining_time": "1:00:21", "throughput": 8674.41, "total_tokens": 106530256} +{"current_steps": 158065, "total_steps": 204665, "loss": 0.0, "lr": 2.9957553185733295e-07, "epoch": 3.861554247184423, "percentage": 77.23, "elapsed_time": "3:24:41", "remaining_time": "1:00:20", "throughput": 8674.43, "total_tokens": 106533584} +{"current_steps": 158070, "total_steps": 204665, "loss": 0.0, "lr": 2.995146695187979e-07, "epoch": 3.8616763980162707, "percentage": 77.23, "elapsed_time": "3:24:41", "remaining_time": "1:00:20", "throughput": 8674.46, "total_tokens": 106536976} +{"current_steps": 158075, "total_steps": 204665, "loss": 0.0003, "lr": 2.994538122743806e-07, "epoch": 3.8617985488481175, "percentage": 77.24, "elapsed_time": "3:24:42", "remaining_time": "1:00:19", "throughput": 8674.48, "total_tokens": 106540176} +{"current_steps": 158080, "total_steps": 204665, "loss": 0.0, "lr": 2.993929601245239e-07, "epoch": 3.861920699679965, "percentage": 77.24, "elapsed_time": "3:24:42", "remaining_time": "1:00:19", "throughput": 8674.49, "total_tokens": 106543312} +{"current_steps": 158085, "total_steps": 204665, "loss": 0.0, "lr": 2.993321130696699e-07, "epoch": 3.862042850511812, "percentage": 77.24, "elapsed_time": "3:24:42", "remaining_time": "1:00:19", "throughput": 8674.51, "total_tokens": 106546576} +{"current_steps": 158090, "total_steps": 204665, "loss": 0.0, "lr": 2.9927127111026094e-07, "epoch": 3.862165001343659, "percentage": 77.24, "elapsed_time": "3:24:43", "remaining_time": "1:00:18", "throughput": 8674.53, "total_tokens": 106549712} +{"current_steps": 158095, "total_steps": 204665, "loss": 0.0, "lr": 2.992104342467402e-07, "epoch": 3.862287152175506, "percentage": 77.25, "elapsed_time": "3:24:43", "remaining_time": "1:00:18", "throughput": 8674.55, "total_tokens": 106552912} +{"current_steps": 158100, "total_steps": 204665, "loss": 0.0, "lr": 2.9914960247954936e-07, "epoch": 3.8624093030073534, "percentage": 77.25, "elapsed_time": "3:24:43", "remaining_time": "1:00:17", "throughput": 8674.59, "total_tokens": 106556560} +{"current_steps": 158105, "total_steps": 204665, "loss": 0.0, "lr": 2.9908877580913126e-07, "epoch": 3.8625314538392006, "percentage": 77.25, "elapsed_time": "3:24:44", "remaining_time": "1:00:17", "throughput": 8674.6, "total_tokens": 106559568} +{"current_steps": 158110, "total_steps": 204665, "loss": 0.0693, "lr": 2.99027954235928e-07, "epoch": 3.862653604671048, "percentage": 77.25, "elapsed_time": "3:24:44", "remaining_time": "1:00:17", "throughput": 8674.64, "total_tokens": 106563088} +{"current_steps": 158115, "total_steps": 204665, "loss": 0.0, "lr": 2.989671377603822e-07, "epoch": 3.862775755502895, "percentage": 77.26, "elapsed_time": "3:24:44", "remaining_time": "1:00:16", "throughput": 8674.69, "total_tokens": 106566800} +{"current_steps": 158120, "total_steps": 204665, "loss": 0.0002, "lr": 2.989063263829357e-07, "epoch": 3.862897906334742, "percentage": 77.26, "elapsed_time": "3:24:45", "remaining_time": "1:00:16", "throughput": 8674.72, "total_tokens": 106570256} +{"current_steps": 158125, "total_steps": 204665, "loss": 0.0, "lr": 2.9884552010403106e-07, "epoch": 3.8630200571665894, "percentage": 77.26, "elapsed_time": "3:24:45", "remaining_time": "1:00:15", "throughput": 8674.75, "total_tokens": 106573520} +{"current_steps": 158130, "total_steps": 204665, "loss": 0.0, "lr": 2.987847189241103e-07, "epoch": 3.8631422079984366, "percentage": 77.26, "elapsed_time": "3:24:45", "remaining_time": "1:00:15", "throughput": 8674.77, "total_tokens": 106576848} +{"current_steps": 158135, "total_steps": 204665, "loss": 0.0353, "lr": 2.987239228436156e-07, "epoch": 3.8632643588302837, "percentage": 77.27, "elapsed_time": "3:24:46", "remaining_time": "1:00:15", "throughput": 8674.79, "total_tokens": 106580048} +{"current_steps": 158140, "total_steps": 204665, "loss": 0.0775, "lr": 2.9866313186298944e-07, "epoch": 3.863386509662131, "percentage": 77.27, "elapsed_time": "3:24:46", "remaining_time": "1:00:14", "throughput": 8674.83, "total_tokens": 106583632} +{"current_steps": 158145, "total_steps": 204665, "loss": 0.0489, "lr": 2.9860234598267333e-07, "epoch": 3.863508660493978, "percentage": 77.27, "elapsed_time": "3:24:46", "remaining_time": "1:00:14", "throughput": 8674.86, "total_tokens": 106586960} +{"current_steps": 158150, "total_steps": 204665, "loss": 0.0, "lr": 2.985415652031099e-07, "epoch": 3.863630811325825, "percentage": 77.27, "elapsed_time": "3:24:47", "remaining_time": "1:00:13", "throughput": 8674.87, "total_tokens": 106590096} +{"current_steps": 158155, "total_steps": 204665, "loss": 0.0, "lr": 2.9848078952474063e-07, "epoch": 3.8637529621576725, "percentage": 77.28, "elapsed_time": "3:24:47", "remaining_time": "1:00:13", "throughput": 8674.89, "total_tokens": 106593232} +{"current_steps": 158160, "total_steps": 204665, "loss": 0.0, "lr": 2.984200189480077e-07, "epoch": 3.8638751129895192, "percentage": 77.28, "elapsed_time": "3:24:47", "remaining_time": "1:00:13", "throughput": 8674.92, "total_tokens": 106596688} +{"current_steps": 158165, "total_steps": 204665, "loss": 0.0, "lr": 2.983592534733533e-07, "epoch": 3.863997263821367, "percentage": 77.28, "elapsed_time": "3:24:48", "remaining_time": "1:00:12", "throughput": 8674.96, "total_tokens": 106600272} +{"current_steps": 158170, "total_steps": 204665, "loss": 0.0258, "lr": 2.98298493101219e-07, "epoch": 3.8641194146532136, "percentage": 77.28, "elapsed_time": "3:24:48", "remaining_time": "1:00:12", "throughput": 8674.98, "total_tokens": 106603472} +{"current_steps": 158175, "total_steps": 204665, "loss": 0.0, "lr": 2.982377378320471e-07, "epoch": 3.864241565485061, "percentage": 77.28, "elapsed_time": "3:24:48", "remaining_time": "1:00:11", "throughput": 8674.99, "total_tokens": 106606480} +{"current_steps": 158180, "total_steps": 204665, "loss": 0.0001, "lr": 2.981769876662786e-07, "epoch": 3.864363716316908, "percentage": 77.29, "elapsed_time": "3:24:49", "remaining_time": "1:00:11", "throughput": 8675.02, "total_tokens": 106609936} +{"current_steps": 158185, "total_steps": 204665, "loss": 0.0, "lr": 2.981162426043563e-07, "epoch": 3.864485867148755, "percentage": 77.29, "elapsed_time": "3:24:49", "remaining_time": "1:00:11", "throughput": 8675.05, "total_tokens": 106613264} +{"current_steps": 158190, "total_steps": 204665, "loss": 0.0004, "lr": 2.980555026467212e-07, "epoch": 3.8646080179806024, "percentage": 77.29, "elapsed_time": "3:24:49", "remaining_time": "1:00:10", "throughput": 8675.05, "total_tokens": 106616272} +{"current_steps": 158195, "total_steps": 204665, "loss": 0.0, "lr": 2.9799476779381547e-07, "epoch": 3.8647301688124496, "percentage": 77.29, "elapsed_time": "3:24:50", "remaining_time": "1:00:10", "throughput": 8675.09, "total_tokens": 106619728} +{"current_steps": 158200, "total_steps": 204665, "loss": 0.0, "lr": 2.9793403804608066e-07, "epoch": 3.8648523196442968, "percentage": 77.3, "elapsed_time": "3:24:50", "remaining_time": "1:00:09", "throughput": 8675.17, "total_tokens": 106623888} +{"current_steps": 158205, "total_steps": 204665, "loss": 0.0, "lr": 2.9787331340395807e-07, "epoch": 3.864974470476144, "percentage": 77.3, "elapsed_time": "3:24:51", "remaining_time": "1:00:09", "throughput": 8675.22, "total_tokens": 106627664} +{"current_steps": 158210, "total_steps": 204665, "loss": 0.0, "lr": 2.9781259386788984e-07, "epoch": 3.865096621307991, "percentage": 77.3, "elapsed_time": "3:24:51", "remaining_time": "1:00:09", "throughput": 8675.27, "total_tokens": 106631248} +{"current_steps": 158215, "total_steps": 204665, "loss": 0.0, "lr": 2.97751879438317e-07, "epoch": 3.8652187721398383, "percentage": 77.3, "elapsed_time": "3:24:51", "remaining_time": "1:00:08", "throughput": 8675.3, "total_tokens": 106634704} +{"current_steps": 158220, "total_steps": 204665, "loss": 0.001, "lr": 2.976911701156818e-07, "epoch": 3.8653409229716855, "percentage": 77.31, "elapsed_time": "3:24:52", "remaining_time": "1:00:08", "throughput": 8675.17, "total_tokens": 106638416} +{"current_steps": 158225, "total_steps": 204665, "loss": 0.0, "lr": 2.9763046590042487e-07, "epoch": 3.8654630738035327, "percentage": 77.31, "elapsed_time": "3:24:52", "remaining_time": "1:00:07", "throughput": 8675.19, "total_tokens": 106641616} +{"current_steps": 158230, "total_steps": 204665, "loss": 0.0536, "lr": 2.9756976679298805e-07, "epoch": 3.86558522463538, "percentage": 77.31, "elapsed_time": "3:24:53", "remaining_time": "1:00:07", "throughput": 8675.19, "total_tokens": 106644688} +{"current_steps": 158235, "total_steps": 204665, "loss": 0.0, "lr": 2.9750907279381333e-07, "epoch": 3.865707375467227, "percentage": 77.31, "elapsed_time": "3:24:53", "remaining_time": "1:00:07", "throughput": 8675.25, "total_tokens": 106648464} +{"current_steps": 158240, "total_steps": 204665, "loss": 0.0, "lr": 2.97448383903341e-07, "epoch": 3.8658295262990743, "percentage": 77.32, "elapsed_time": "3:24:53", "remaining_time": "1:00:06", "throughput": 8675.27, "total_tokens": 106651792} +{"current_steps": 158245, "total_steps": 204665, "loss": 0.0002, "lr": 2.973877001220135e-07, "epoch": 3.865951677130921, "percentage": 77.32, "elapsed_time": "3:24:54", "remaining_time": "1:00:06", "throughput": 8675.32, "total_tokens": 106655440} +{"current_steps": 158250, "total_steps": 204665, "loss": 0.0, "lr": 2.9732702145027136e-07, "epoch": 3.8660738279627687, "percentage": 77.32, "elapsed_time": "3:24:54", "remaining_time": "1:00:05", "throughput": 8675.33, "total_tokens": 106658576} +{"current_steps": 158255, "total_steps": 204665, "loss": 0.0002, "lr": 2.97266347888556e-07, "epoch": 3.8661959787946154, "percentage": 77.32, "elapsed_time": "3:24:54", "remaining_time": "1:00:05", "throughput": 8675.39, "total_tokens": 106662288} +{"current_steps": 158260, "total_steps": 204665, "loss": 0.0447, "lr": 2.9720567943730913e-07, "epoch": 3.8663181296264626, "percentage": 77.33, "elapsed_time": "3:24:55", "remaining_time": "1:00:05", "throughput": 8675.38, "total_tokens": 106665168} +{"current_steps": 158265, "total_steps": 204665, "loss": 0.0, "lr": 2.971450160969712e-07, "epoch": 3.86644028045831, "percentage": 77.33, "elapsed_time": "3:24:55", "remaining_time": "1:00:04", "throughput": 8675.38, "total_tokens": 106668112} +{"current_steps": 158270, "total_steps": 204665, "loss": 0.0, "lr": 2.9708435786798414e-07, "epoch": 3.866562431290157, "percentage": 77.33, "elapsed_time": "3:24:55", "remaining_time": "1:00:04", "throughput": 8675.46, "total_tokens": 106672208} +{"current_steps": 158275, "total_steps": 204665, "loss": 0.0, "lr": 2.970237047507883e-07, "epoch": 3.866684582122004, "percentage": 77.33, "elapsed_time": "3:24:56", "remaining_time": "1:00:03", "throughput": 8675.48, "total_tokens": 106675408} +{"current_steps": 158280, "total_steps": 204665, "loss": 0.0001, "lr": 2.9696305674582553e-07, "epoch": 3.8668067329538514, "percentage": 77.34, "elapsed_time": "3:24:56", "remaining_time": "1:00:03", "throughput": 8675.52, "total_tokens": 106678992} +{"current_steps": 158285, "total_steps": 204665, "loss": 0.0, "lr": 2.969024138535362e-07, "epoch": 3.8669288837856985, "percentage": 77.34, "elapsed_time": "3:24:56", "remaining_time": "1:00:03", "throughput": 8675.55, "total_tokens": 106682384} +{"current_steps": 158290, "total_steps": 204665, "loss": 0.058, "lr": 2.96841776074362e-07, "epoch": 3.8670510346175457, "percentage": 77.34, "elapsed_time": "3:24:57", "remaining_time": "1:00:02", "throughput": 8675.56, "total_tokens": 106685520} +{"current_steps": 158295, "total_steps": 204665, "loss": 0.0, "lr": 2.9678114340874317e-07, "epoch": 3.867173185449393, "percentage": 77.34, "elapsed_time": "3:24:57", "remaining_time": "1:00:02", "throughput": 8675.6, "total_tokens": 106689040} +{"current_steps": 158300, "total_steps": 204665, "loss": 0.0204, "lr": 2.967205158571212e-07, "epoch": 3.86729533628124, "percentage": 77.35, "elapsed_time": "3:24:57", "remaining_time": "1:00:01", "throughput": 8675.66, "total_tokens": 106692880} +{"current_steps": 158305, "total_steps": 204665, "loss": 0.0001, "lr": 2.966598934199369e-07, "epoch": 3.8674174871130873, "percentage": 77.35, "elapsed_time": "3:24:58", "remaining_time": "1:00:01", "throughput": 8675.71, "total_tokens": 106696528} +{"current_steps": 158310, "total_steps": 204665, "loss": 0.0001, "lr": 2.965992760976308e-07, "epoch": 3.8675396379449345, "percentage": 77.35, "elapsed_time": "3:24:58", "remaining_time": "1:00:01", "throughput": 8675.73, "total_tokens": 106699728} +{"current_steps": 158315, "total_steps": 204665, "loss": 0.0, "lr": 2.9653866389064387e-07, "epoch": 3.8676617887767817, "percentage": 77.35, "elapsed_time": "3:24:58", "remaining_time": "1:00:00", "throughput": 8675.74, "total_tokens": 106702800} +{"current_steps": 158320, "total_steps": 204665, "loss": 0.0, "lr": 2.9647805679941726e-07, "epoch": 3.867783939608629, "percentage": 77.36, "elapsed_time": "3:24:59", "remaining_time": "1:00:00", "throughput": 8675.75, "total_tokens": 106705808} +{"current_steps": 158325, "total_steps": 204665, "loss": 0.0004, "lr": 2.9641745482439115e-07, "epoch": 3.867906090440476, "percentage": 77.36, "elapsed_time": "3:24:59", "remaining_time": "0:59:59", "throughput": 8675.78, "total_tokens": 106709264} +{"current_steps": 158330, "total_steps": 204665, "loss": 0.0, "lr": 2.9635685796600695e-07, "epoch": 3.868028241272323, "percentage": 77.36, "elapsed_time": "3:25:00", "remaining_time": "0:59:59", "throughput": 8675.79, "total_tokens": 106712336} +{"current_steps": 158335, "total_steps": 204665, "loss": 0.0, "lr": 2.962962662247045e-07, "epoch": 3.8681503921041704, "percentage": 77.36, "elapsed_time": "3:25:00", "remaining_time": "0:59:59", "throughput": 8675.81, "total_tokens": 106715600} +{"current_steps": 158340, "total_steps": 204665, "loss": 0.0, "lr": 2.962356796009253e-07, "epoch": 3.868272542936017, "percentage": 77.37, "elapsed_time": "3:25:00", "remaining_time": "0:59:58", "throughput": 8675.85, "total_tokens": 106719184} +{"current_steps": 158345, "total_steps": 204665, "loss": 0.0771, "lr": 2.961750980951091e-07, "epoch": 3.868394693767865, "percentage": 77.37, "elapsed_time": "3:25:01", "remaining_time": "0:59:58", "throughput": 8675.88, "total_tokens": 106722512} +{"current_steps": 158350, "total_steps": 204665, "loss": 0.0329, "lr": 2.9611452170769704e-07, "epoch": 3.8685168445997116, "percentage": 77.37, "elapsed_time": "3:25:01", "remaining_time": "0:59:57", "throughput": 8675.89, "total_tokens": 106725584} +{"current_steps": 158355, "total_steps": 204665, "loss": 0.0001, "lr": 2.960539504391297e-07, "epoch": 3.8686389954315588, "percentage": 77.37, "elapsed_time": "3:25:01", "remaining_time": "0:59:57", "throughput": 8675.91, "total_tokens": 106728784} +{"current_steps": 158360, "total_steps": 204665, "loss": 0.0, "lr": 2.959933842898471e-07, "epoch": 3.868761146263406, "percentage": 77.38, "elapsed_time": "3:25:02", "remaining_time": "0:59:57", "throughput": 8675.93, "total_tokens": 106731984} +{"current_steps": 158365, "total_steps": 204665, "loss": 0.0, "lr": 2.959328232602902e-07, "epoch": 3.868883297095253, "percentage": 77.38, "elapsed_time": "3:25:02", "remaining_time": "0:59:56", "throughput": 8675.95, "total_tokens": 106735312} +{"current_steps": 158370, "total_steps": 204665, "loss": 0.0, "lr": 2.95872267350899e-07, "epoch": 3.8690054479271003, "percentage": 77.38, "elapsed_time": "3:25:02", "remaining_time": "0:59:56", "throughput": 8676.0, "total_tokens": 106739024} +{"current_steps": 158375, "total_steps": 204665, "loss": 0.0439, "lr": 2.9581171656211423e-07, "epoch": 3.8691275987589475, "percentage": 77.38, "elapsed_time": "3:25:03", "remaining_time": "0:59:55", "throughput": 8676.02, "total_tokens": 106742224} +{"current_steps": 158380, "total_steps": 204665, "loss": 0.0, "lr": 2.9575117089437584e-07, "epoch": 3.8692497495907947, "percentage": 77.38, "elapsed_time": "3:25:03", "remaining_time": "0:59:55", "throughput": 8676.04, "total_tokens": 106745360} +{"current_steps": 158385, "total_steps": 204665, "loss": 0.0, "lr": 2.956906303481244e-07, "epoch": 3.869371900422642, "percentage": 77.39, "elapsed_time": "3:25:03", "remaining_time": "0:59:55", "throughput": 8676.07, "total_tokens": 106748880} +{"current_steps": 158390, "total_steps": 204665, "loss": 0.0, "lr": 2.956300949238003e-07, "epoch": 3.869494051254489, "percentage": 77.39, "elapsed_time": "3:25:04", "remaining_time": "0:59:54", "throughput": 8676.11, "total_tokens": 106752272} +{"current_steps": 158395, "total_steps": 204665, "loss": 0.0, "lr": 2.955695646218437e-07, "epoch": 3.8696162020863363, "percentage": 77.39, "elapsed_time": "3:25:04", "remaining_time": "0:59:54", "throughput": 8676.13, "total_tokens": 106755664} +{"current_steps": 158400, "total_steps": 204665, "loss": 0.0489, "lr": 2.9550903944269445e-07, "epoch": 3.8697383529181835, "percentage": 77.39, "elapsed_time": "3:25:04", "remaining_time": "0:59:53", "throughput": 8676.16, "total_tokens": 106758992} +{"current_steps": 158405, "total_steps": 204665, "loss": 0.0544, "lr": 2.9544851938679314e-07, "epoch": 3.8698605037500307, "percentage": 77.4, "elapsed_time": "3:25:05", "remaining_time": "0:59:53", "throughput": 8676.2, "total_tokens": 106762448} +{"current_steps": 158410, "total_steps": 204665, "loss": 0.0005, "lr": 2.9538800445457946e-07, "epoch": 3.869982654581878, "percentage": 77.4, "elapsed_time": "3:25:05", "remaining_time": "0:59:53", "throughput": 8676.26, "total_tokens": 106766352} +{"current_steps": 158415, "total_steps": 204665, "loss": 0.0, "lr": 2.95327494646494e-07, "epoch": 3.8701048054137246, "percentage": 77.4, "elapsed_time": "3:25:05", "remaining_time": "0:59:52", "throughput": 8676.29, "total_tokens": 106769808} +{"current_steps": 158420, "total_steps": 204665, "loss": 0.0536, "lr": 2.9526698996297615e-07, "epoch": 3.8702269562455722, "percentage": 77.4, "elapsed_time": "3:25:06", "remaining_time": "0:59:52", "throughput": 8676.33, "total_tokens": 106773264} +{"current_steps": 158425, "total_steps": 204665, "loss": 0.0002, "lr": 2.952064904044668e-07, "epoch": 3.870349107077419, "percentage": 77.41, "elapsed_time": "3:25:06", "remaining_time": "0:59:51", "throughput": 8676.34, "total_tokens": 106776336} +{"current_steps": 158430, "total_steps": 204665, "loss": 0.0, "lr": 2.951459959714049e-07, "epoch": 3.8704712579092666, "percentage": 77.41, "elapsed_time": "3:25:06", "remaining_time": "0:59:51", "throughput": 8676.34, "total_tokens": 106779280} +{"current_steps": 158435, "total_steps": 204665, "loss": 0.0, "lr": 2.9508550666423136e-07, "epoch": 3.8705934087411134, "percentage": 77.41, "elapsed_time": "3:25:07", "remaining_time": "0:59:51", "throughput": 8676.34, "total_tokens": 106782288} +{"current_steps": 158440, "total_steps": 204665, "loss": 0.0372, "lr": 2.9502502248338525e-07, "epoch": 3.8707155595729605, "percentage": 77.41, "elapsed_time": "3:25:07", "remaining_time": "0:59:50", "throughput": 8676.37, "total_tokens": 106785680} +{"current_steps": 158445, "total_steps": 204665, "loss": 0.0, "lr": 2.9496454342930674e-07, "epoch": 3.8708377104048077, "percentage": 77.42, "elapsed_time": "3:25:07", "remaining_time": "0:59:50", "throughput": 8676.4, "total_tokens": 106788944} +{"current_steps": 158450, "total_steps": 204665, "loss": 0.0, "lr": 2.949040695024361e-07, "epoch": 3.870959861236655, "percentage": 77.42, "elapsed_time": "3:25:08", "remaining_time": "0:59:49", "throughput": 8676.43, "total_tokens": 106792464} +{"current_steps": 158455, "total_steps": 204665, "loss": 0.0006, "lr": 2.9484360070321236e-07, "epoch": 3.871082012068502, "percentage": 77.42, "elapsed_time": "3:25:08", "remaining_time": "0:59:49", "throughput": 8676.47, "total_tokens": 106795920} +{"current_steps": 158460, "total_steps": 204665, "loss": 0.0, "lr": 2.94783137032076e-07, "epoch": 3.8712041629003493, "percentage": 77.42, "elapsed_time": "3:25:09", "remaining_time": "0:59:49", "throughput": 8676.47, "total_tokens": 106798928} +{"current_steps": 158465, "total_steps": 204665, "loss": 0.0427, "lr": 2.94722678489466e-07, "epoch": 3.8713263137321965, "percentage": 77.43, "elapsed_time": "3:25:09", "remaining_time": "0:59:48", "throughput": 8676.5, "total_tokens": 106802256} +{"current_steps": 158470, "total_steps": 204665, "loss": 0.0, "lr": 2.946622250758226e-07, "epoch": 3.8714484645640437, "percentage": 77.43, "elapsed_time": "3:25:09", "remaining_time": "0:59:48", "throughput": 8676.52, "total_tokens": 106805584} +{"current_steps": 158475, "total_steps": 204665, "loss": 0.0, "lr": 2.9460177679158505e-07, "epoch": 3.871570615395891, "percentage": 77.43, "elapsed_time": "3:25:10", "remaining_time": "0:59:47", "throughput": 8676.57, "total_tokens": 106809232} +{"current_steps": 158480, "total_steps": 204665, "loss": 0.0, "lr": 2.9454133363719304e-07, "epoch": 3.871692766227738, "percentage": 77.43, "elapsed_time": "3:25:10", "remaining_time": "0:59:47", "throughput": 8676.56, "total_tokens": 106812112} +{"current_steps": 158485, "total_steps": 204665, "loss": 0.0489, "lr": 2.944808956130864e-07, "epoch": 3.8718149170595852, "percentage": 77.44, "elapsed_time": "3:25:10", "remaining_time": "0:59:47", "throughput": 8676.62, "total_tokens": 106815952} +{"current_steps": 158490, "total_steps": 204665, "loss": 0.0003, "lr": 2.944204627197042e-07, "epoch": 3.8719370678914324, "percentage": 77.44, "elapsed_time": "3:25:11", "remaining_time": "0:59:46", "throughput": 8676.64, "total_tokens": 106819088} +{"current_steps": 158495, "total_steps": 204665, "loss": 0.0, "lr": 2.9436003495748664e-07, "epoch": 3.8720592187232796, "percentage": 77.44, "elapsed_time": "3:25:11", "remaining_time": "0:59:46", "throughput": 8676.63, "total_tokens": 106821968} +{"current_steps": 158500, "total_steps": 204665, "loss": 0.0, "lr": 2.942996123268722e-07, "epoch": 3.872181369555127, "percentage": 77.44, "elapsed_time": "3:25:11", "remaining_time": "0:59:45", "throughput": 8676.68, "total_tokens": 106825616} +{"current_steps": 158505, "total_steps": 204665, "loss": 0.0, "lr": 2.942391948283012e-07, "epoch": 3.872303520386974, "percentage": 77.45, "elapsed_time": "3:25:12", "remaining_time": "0:59:45", "throughput": 8676.72, "total_tokens": 106829072} +{"current_steps": 158510, "total_steps": 204665, "loss": 0.0, "lr": 2.941787824622125e-07, "epoch": 3.8724256712188208, "percentage": 77.45, "elapsed_time": "3:25:12", "remaining_time": "0:59:45", "throughput": 8676.74, "total_tokens": 106832336} +{"current_steps": 158515, "total_steps": 204665, "loss": 0.0, "lr": 2.9411837522904536e-07, "epoch": 3.8725478220506684, "percentage": 77.45, "elapsed_time": "3:25:12", "remaining_time": "0:59:44", "throughput": 8676.78, "total_tokens": 106835920} +{"current_steps": 158520, "total_steps": 204665, "loss": 0.0, "lr": 2.940579731292395e-07, "epoch": 3.872669972882515, "percentage": 77.45, "elapsed_time": "3:25:13", "remaining_time": "0:59:44", "throughput": 8676.82, "total_tokens": 106839504} +{"current_steps": 158525, "total_steps": 204665, "loss": 0.0327, "lr": 2.9399757616323363e-07, "epoch": 3.8727921237143628, "percentage": 77.46, "elapsed_time": "3:25:13", "remaining_time": "0:59:43", "throughput": 8676.85, "total_tokens": 106842832} +{"current_steps": 158530, "total_steps": 204665, "loss": 0.0, "lr": 2.9393718433146766e-07, "epoch": 3.8729142745462095, "percentage": 77.46, "elapsed_time": "3:25:13", "remaining_time": "0:59:43", "throughput": 8676.87, "total_tokens": 106846160} +{"current_steps": 158535, "total_steps": 204665, "loss": 0.0, "lr": 2.938767976343799e-07, "epoch": 3.8730364253780567, "percentage": 77.46, "elapsed_time": "3:25:14", "remaining_time": "0:59:43", "throughput": 8676.89, "total_tokens": 106849296} +{"current_steps": 158540, "total_steps": 204665, "loss": 0.0001, "lr": 2.9381641607241014e-07, "epoch": 3.873158576209904, "percentage": 77.46, "elapsed_time": "3:25:14", "remaining_time": "0:59:42", "throughput": 8676.94, "total_tokens": 106852944} +{"current_steps": 158545, "total_steps": 204665, "loss": 0.0, "lr": 2.937560396459976e-07, "epoch": 3.873280727041751, "percentage": 77.47, "elapsed_time": "3:25:14", "remaining_time": "0:59:42", "throughput": 8676.99, "total_tokens": 106856720} +{"current_steps": 158550, "total_steps": 204665, "loss": 0.0004, "lr": 2.936956683555808e-07, "epoch": 3.8734028778735983, "percentage": 77.47, "elapsed_time": "3:25:15", "remaining_time": "0:59:41", "throughput": 8676.99, "total_tokens": 106859728} +{"current_steps": 158555, "total_steps": 204665, "loss": 0.0, "lr": 2.936353022015994e-07, "epoch": 3.8735250287054455, "percentage": 77.47, "elapsed_time": "3:25:15", "remaining_time": "0:59:41", "throughput": 8677.04, "total_tokens": 106863376} +{"current_steps": 158560, "total_steps": 204665, "loss": 0.0, "lr": 2.935749411844918e-07, "epoch": 3.8736471795372927, "percentage": 77.47, "elapsed_time": "3:25:15", "remaining_time": "0:59:41", "throughput": 8677.06, "total_tokens": 106866576} +{"current_steps": 158565, "total_steps": 204665, "loss": 0.0, "lr": 2.9351458530469707e-07, "epoch": 3.87376933036914, "percentage": 77.48, "elapsed_time": "3:25:16", "remaining_time": "0:59:40", "throughput": 8677.08, "total_tokens": 106869904} +{"current_steps": 158570, "total_steps": 204665, "loss": 0.0325, "lr": 2.9345423456265474e-07, "epoch": 3.873891481200987, "percentage": 77.48, "elapsed_time": "3:25:16", "remaining_time": "0:59:40", "throughput": 8677.11, "total_tokens": 106873232} +{"current_steps": 158575, "total_steps": 204665, "loss": 0.0, "lr": 2.933938889588029e-07, "epoch": 3.874013632032834, "percentage": 77.48, "elapsed_time": "3:25:17", "remaining_time": "0:59:39", "throughput": 8677.14, "total_tokens": 106876688} +{"current_steps": 158580, "total_steps": 204665, "loss": 0.0, "lr": 2.933335484935812e-07, "epoch": 3.8741357828646814, "percentage": 77.48, "elapsed_time": "3:25:17", "remaining_time": "0:59:39", "throughput": 8677.16, "total_tokens": 106879952} +{"current_steps": 158585, "total_steps": 204665, "loss": 0.0953, "lr": 2.932732131674275e-07, "epoch": 3.8742579336965286, "percentage": 77.49, "elapsed_time": "3:25:17", "remaining_time": "0:59:39", "throughput": 8677.21, "total_tokens": 106883600} +{"current_steps": 158590, "total_steps": 204665, "loss": 0.0, "lr": 2.932128829807815e-07, "epoch": 3.874380084528376, "percentage": 77.49, "elapsed_time": "3:25:18", "remaining_time": "0:59:38", "throughput": 8677.23, "total_tokens": 106886864} +{"current_steps": 158595, "total_steps": 204665, "loss": 0.0524, "lr": 2.931525579340811e-07, "epoch": 3.8745022353602225, "percentage": 77.49, "elapsed_time": "3:25:18", "remaining_time": "0:59:38", "throughput": 8677.27, "total_tokens": 106890320} +{"current_steps": 158600, "total_steps": 204665, "loss": 0.0, "lr": 2.9309223802776585e-07, "epoch": 3.87462438619207, "percentage": 77.49, "elapsed_time": "3:25:18", "remaining_time": "0:59:37", "throughput": 8677.33, "total_tokens": 106894096} +{"current_steps": 158605, "total_steps": 204665, "loss": 0.0, "lr": 2.9303192326227365e-07, "epoch": 3.874746537023917, "percentage": 77.49, "elapsed_time": "3:25:19", "remaining_time": "0:59:37", "throughput": 8677.39, "total_tokens": 106898000} +{"current_steps": 158610, "total_steps": 204665, "loss": 0.0004, "lr": 2.929716136380438e-07, "epoch": 3.8748686878557645, "percentage": 77.5, "elapsed_time": "3:25:19", "remaining_time": "0:59:37", "throughput": 8677.4, "total_tokens": 106901008} +{"current_steps": 158615, "total_steps": 204665, "loss": 0.0, "lr": 2.9291130915551443e-07, "epoch": 3.8749908386876113, "percentage": 77.5, "elapsed_time": "3:25:19", "remaining_time": "0:59:36", "throughput": 8677.41, "total_tokens": 106904144} +{"current_steps": 158620, "total_steps": 204665, "loss": 0.0, "lr": 2.928510098151239e-07, "epoch": 3.8751129895194585, "percentage": 77.5, "elapsed_time": "3:25:20", "remaining_time": "0:59:36", "throughput": 8677.44, "total_tokens": 106907536} +{"current_steps": 158625, "total_steps": 204665, "loss": 0.0001, "lr": 2.927907156173114e-07, "epoch": 3.8752351403513057, "percentage": 77.5, "elapsed_time": "3:25:20", "remaining_time": "0:59:35", "throughput": 8677.48, "total_tokens": 106911184} +{"current_steps": 158630, "total_steps": 204665, "loss": 0.003, "lr": 2.927304265625148e-07, "epoch": 3.875357291183153, "percentage": 77.51, "elapsed_time": "3:25:20", "remaining_time": "0:59:35", "throughput": 8677.5, "total_tokens": 106914320} +{"current_steps": 158635, "total_steps": 204665, "loss": 0.0, "lr": 2.9267014265117264e-07, "epoch": 3.875479442015, "percentage": 77.51, "elapsed_time": "3:25:21", "remaining_time": "0:59:35", "throughput": 8677.53, "total_tokens": 106917776} +{"current_steps": 158640, "total_steps": 204665, "loss": 0.0, "lr": 2.9260986388372377e-07, "epoch": 3.8756015928468472, "percentage": 77.51, "elapsed_time": "3:25:21", "remaining_time": "0:59:34", "throughput": 8677.6, "total_tokens": 106921680} +{"current_steps": 158645, "total_steps": 204665, "loss": 0.0001, "lr": 2.92549590260606e-07, "epoch": 3.8757237436786944, "percentage": 77.51, "elapsed_time": "3:25:21", "remaining_time": "0:59:34", "throughput": 8677.61, "total_tokens": 106924816} +{"current_steps": 158650, "total_steps": 204665, "loss": 0.0, "lr": 2.9248932178225813e-07, "epoch": 3.8758458945105416, "percentage": 77.52, "elapsed_time": "3:25:22", "remaining_time": "0:59:33", "throughput": 8677.61, "total_tokens": 106927696} +{"current_steps": 158655, "total_steps": 204665, "loss": 0.0002, "lr": 2.9242905844911794e-07, "epoch": 3.875968045342389, "percentage": 77.52, "elapsed_time": "3:25:22", "remaining_time": "0:59:33", "throughput": 8677.64, "total_tokens": 106931024} +{"current_steps": 158660, "total_steps": 204665, "loss": 0.0001, "lr": 2.923688002616239e-07, "epoch": 3.876090196174236, "percentage": 77.52, "elapsed_time": "3:25:22", "remaining_time": "0:59:33", "throughput": 8677.65, "total_tokens": 106934160} +{"current_steps": 158665, "total_steps": 204665, "loss": 0.0002, "lr": 2.9230854722021456e-07, "epoch": 3.876212347006083, "percentage": 77.52, "elapsed_time": "3:25:23", "remaining_time": "0:59:32", "throughput": 8677.67, "total_tokens": 106937360} +{"current_steps": 158670, "total_steps": 204665, "loss": 0.0001, "lr": 2.922482993253277e-07, "epoch": 3.8763344978379304, "percentage": 77.53, "elapsed_time": "3:25:23", "remaining_time": "0:59:32", "throughput": 8677.69, "total_tokens": 106940624} +{"current_steps": 158675, "total_steps": 204665, "loss": 0.0, "lr": 2.921880565774016e-07, "epoch": 3.8764566486697776, "percentage": 77.53, "elapsed_time": "3:25:23", "remaining_time": "0:59:31", "throughput": 8677.72, "total_tokens": 106944080} +{"current_steps": 158680, "total_steps": 204665, "loss": 0.0, "lr": 2.9212781897687424e-07, "epoch": 3.8765787995016248, "percentage": 77.53, "elapsed_time": "3:25:24", "remaining_time": "0:59:31", "throughput": 8677.75, "total_tokens": 106947408} +{"current_steps": 158685, "total_steps": 204665, "loss": 0.0, "lr": 2.920675865241841e-07, "epoch": 3.876700950333472, "percentage": 77.53, "elapsed_time": "3:25:24", "remaining_time": "0:59:31", "throughput": 8677.81, "total_tokens": 106951312} +{"current_steps": 158690, "total_steps": 204665, "loss": 0.0002, "lr": 2.920073592197684e-07, "epoch": 3.8768231011653187, "percentage": 77.54, "elapsed_time": "3:25:25", "remaining_time": "0:59:30", "throughput": 8677.84, "total_tokens": 106954576} +{"current_steps": 158695, "total_steps": 204665, "loss": 0.0002, "lr": 2.919471370640657e-07, "epoch": 3.8769452519971663, "percentage": 77.54, "elapsed_time": "3:25:25", "remaining_time": "0:59:30", "throughput": 8677.85, "total_tokens": 106957776} +{"current_steps": 158700, "total_steps": 204665, "loss": 0.0444, "lr": 2.918869200575141e-07, "epoch": 3.877067402829013, "percentage": 77.54, "elapsed_time": "3:25:25", "remaining_time": "0:59:29", "throughput": 8677.9, "total_tokens": 106961488} +{"current_steps": 158705, "total_steps": 204665, "loss": 0.0, "lr": 2.918267082005513e-07, "epoch": 3.8771895536608603, "percentage": 77.54, "elapsed_time": "3:25:26", "remaining_time": "0:59:29", "throughput": 8677.94, "total_tokens": 106964944} +{"current_steps": 158710, "total_steps": 204665, "loss": 0.0, "lr": 2.9176650149361495e-07, "epoch": 3.8773117044927075, "percentage": 77.55, "elapsed_time": "3:25:26", "remaining_time": "0:59:29", "throughput": 8677.96, "total_tokens": 106968208} +{"current_steps": 158715, "total_steps": 204665, "loss": 0.0, "lr": 2.9170629993714336e-07, "epoch": 3.8774338553245546, "percentage": 77.55, "elapsed_time": "3:25:26", "remaining_time": "0:59:28", "throughput": 8677.96, "total_tokens": 106971216} +{"current_steps": 158720, "total_steps": 204665, "loss": 0.0001, "lr": 2.9164610353157373e-07, "epoch": 3.877556006156402, "percentage": 77.55, "elapsed_time": "3:25:27", "remaining_time": "0:59:28", "throughput": 8678.0, "total_tokens": 106974672} +{"current_steps": 158725, "total_steps": 204665, "loss": 0.0, "lr": 2.915859122773444e-07, "epoch": 3.877678156988249, "percentage": 77.55, "elapsed_time": "3:25:27", "remaining_time": "0:59:27", "throughput": 8678.0, "total_tokens": 106977744} +{"current_steps": 158730, "total_steps": 204665, "loss": 0.017, "lr": 2.915257261748927e-07, "epoch": 3.877800307820096, "percentage": 77.56, "elapsed_time": "3:25:27", "remaining_time": "0:59:27", "throughput": 8678.05, "total_tokens": 106981392} +{"current_steps": 158735, "total_steps": 204665, "loss": 0.0, "lr": 2.9146554522465674e-07, "epoch": 3.8779224586519434, "percentage": 77.56, "elapsed_time": "3:25:28", "remaining_time": "0:59:27", "throughput": 8678.08, "total_tokens": 106984784} +{"current_steps": 158740, "total_steps": 204665, "loss": 0.0002, "lr": 2.914053694270735e-07, "epoch": 3.8780446094837906, "percentage": 77.56, "elapsed_time": "3:25:28", "remaining_time": "0:59:26", "throughput": 8678.13, "total_tokens": 106988432} +{"current_steps": 158745, "total_steps": 204665, "loss": 0.0001, "lr": 2.9134519878258133e-07, "epoch": 3.878166760315638, "percentage": 77.56, "elapsed_time": "3:25:28", "remaining_time": "0:59:26", "throughput": 8678.18, "total_tokens": 106992080} +{"current_steps": 158750, "total_steps": 204665, "loss": 0.0, "lr": 2.9128503329161724e-07, "epoch": 3.878288911147485, "percentage": 77.57, "elapsed_time": "3:25:29", "remaining_time": "0:59:25", "throughput": 8678.23, "total_tokens": 106995792} +{"current_steps": 158755, "total_steps": 204665, "loss": 0.0, "lr": 2.912248729546191e-07, "epoch": 3.878411061979332, "percentage": 77.57, "elapsed_time": "3:25:29", "remaining_time": "0:59:25", "throughput": 8678.26, "total_tokens": 106999184} +{"current_steps": 158760, "total_steps": 204665, "loss": 0.0623, "lr": 2.9116471777202445e-07, "epoch": 3.8785332128111794, "percentage": 77.57, "elapsed_time": "3:25:29", "remaining_time": "0:59:25", "throughput": 8678.26, "total_tokens": 107002128} +{"current_steps": 158765, "total_steps": 204665, "loss": 0.0002, "lr": 2.911045677442704e-07, "epoch": 3.8786553636430265, "percentage": 77.57, "elapsed_time": "3:25:30", "remaining_time": "0:59:24", "throughput": 8678.31, "total_tokens": 107005840} +{"current_steps": 158770, "total_steps": 204665, "loss": 0.0, "lr": 2.910444228717949e-07, "epoch": 3.8787775144748737, "percentage": 77.58, "elapsed_time": "3:25:30", "remaining_time": "0:59:24", "throughput": 8678.31, "total_tokens": 107008784} +{"current_steps": 158775, "total_steps": 204665, "loss": 0.0001, "lr": 2.9098428315503466e-07, "epoch": 3.8788996653067205, "percentage": 77.58, "elapsed_time": "3:25:30", "remaining_time": "0:59:23", "throughput": 8678.31, "total_tokens": 107011728} +{"current_steps": 158780, "total_steps": 204665, "loss": 0.0, "lr": 2.9092414859442784e-07, "epoch": 3.879021816138568, "percentage": 77.58, "elapsed_time": "3:25:31", "remaining_time": "0:59:23", "throughput": 8678.34, "total_tokens": 107015056} +{"current_steps": 158785, "total_steps": 204665, "loss": 0.0001, "lr": 2.908640191904109e-07, "epoch": 3.879143966970415, "percentage": 77.58, "elapsed_time": "3:25:31", "remaining_time": "0:59:23", "throughput": 8678.35, "total_tokens": 107018128} +{"current_steps": 158790, "total_steps": 204665, "loss": 0.0, "lr": 2.908038949434216e-07, "epoch": 3.8792661178022625, "percentage": 77.59, "elapsed_time": "3:25:31", "remaining_time": "0:59:22", "throughput": 8678.36, "total_tokens": 107021200} +{"current_steps": 158795, "total_steps": 204665, "loss": 0.07, "lr": 2.907437758538975e-07, "epoch": 3.8793882686341092, "percentage": 77.59, "elapsed_time": "3:25:32", "remaining_time": "0:59:22", "throughput": 8678.37, "total_tokens": 107024400} +{"current_steps": 158800, "total_steps": 204665, "loss": 0.0001, "lr": 2.906836619222751e-07, "epoch": 3.8795104194659564, "percentage": 77.59, "elapsed_time": "3:25:32", "remaining_time": "0:59:21", "throughput": 8678.42, "total_tokens": 107028048} +{"current_steps": 158805, "total_steps": 204665, "loss": 0.0, "lr": 2.906235531489921e-07, "epoch": 3.8796325702978036, "percentage": 77.59, "elapsed_time": "3:25:33", "remaining_time": "0:59:21", "throughput": 8678.43, "total_tokens": 107031184} +{"current_steps": 158810, "total_steps": 204665, "loss": 0.0, "lr": 2.905634495344853e-07, "epoch": 3.879754721129651, "percentage": 77.6, "elapsed_time": "3:25:33", "remaining_time": "0:59:21", "throughput": 8678.46, "total_tokens": 107034640} +{"current_steps": 158815, "total_steps": 204665, "loss": 0.0, "lr": 2.905033510791921e-07, "epoch": 3.879876871961498, "percentage": 77.6, "elapsed_time": "3:25:33", "remaining_time": "0:59:20", "throughput": 8678.48, "total_tokens": 107037776} +{"current_steps": 158820, "total_steps": 204665, "loss": 0.0, "lr": 2.9044325778354937e-07, "epoch": 3.879999022793345, "percentage": 77.6, "elapsed_time": "3:25:34", "remaining_time": "0:59:20", "throughput": 8678.49, "total_tokens": 107040976} +{"current_steps": 158825, "total_steps": 204665, "loss": 0.0, "lr": 2.903831696479938e-07, "epoch": 3.8801211736251924, "percentage": 77.6, "elapsed_time": "3:25:34", "remaining_time": "0:59:19", "throughput": 8678.53, "total_tokens": 107044496} +{"current_steps": 158830, "total_steps": 204665, "loss": 0.0, "lr": 2.90323086672963e-07, "epoch": 3.8802433244570396, "percentage": 77.6, "elapsed_time": "3:25:34", "remaining_time": "0:59:19", "throughput": 8678.55, "total_tokens": 107047696} +{"current_steps": 158835, "total_steps": 204665, "loss": 0.0, "lr": 2.9026300885889333e-07, "epoch": 3.8803654752888868, "percentage": 77.61, "elapsed_time": "3:25:35", "remaining_time": "0:59:19", "throughput": 8678.56, "total_tokens": 107050704} +{"current_steps": 158840, "total_steps": 204665, "loss": 0.0631, "lr": 2.9020293620622214e-07, "epoch": 3.880487626120734, "percentage": 77.61, "elapsed_time": "3:25:35", "remaining_time": "0:59:18", "throughput": 8678.58, "total_tokens": 107053968} +{"current_steps": 158845, "total_steps": 204665, "loss": 0.0, "lr": 2.90142868715386e-07, "epoch": 3.880609776952581, "percentage": 77.61, "elapsed_time": "3:25:35", "remaining_time": "0:59:18", "throughput": 8678.59, "total_tokens": 107057104} +{"current_steps": 158850, "total_steps": 204665, "loss": 0.0325, "lr": 2.900828063868216e-07, "epoch": 3.8807319277844283, "percentage": 77.61, "elapsed_time": "3:25:36", "remaining_time": "0:59:17", "throughput": 8678.61, "total_tokens": 107060368} +{"current_steps": 158855, "total_steps": 204665, "loss": 0.0, "lr": 2.9002274922096646e-07, "epoch": 3.8808540786162755, "percentage": 77.62, "elapsed_time": "3:25:36", "remaining_time": "0:59:17", "throughput": 8678.65, "total_tokens": 107063888} +{"current_steps": 158860, "total_steps": 204665, "loss": 0.0, "lr": 2.899626972182565e-07, "epoch": 3.8809762294481227, "percentage": 77.62, "elapsed_time": "3:25:36", "remaining_time": "0:59:17", "throughput": 8678.68, "total_tokens": 107067344} +{"current_steps": 158865, "total_steps": 204665, "loss": 0.0, "lr": 2.899026503791291e-07, "epoch": 3.88109838027997, "percentage": 77.62, "elapsed_time": "3:25:37", "remaining_time": "0:59:16", "throughput": 8678.7, "total_tokens": 107070608} +{"current_steps": 158870, "total_steps": 204665, "loss": 0.0, "lr": 2.898426087040203e-07, "epoch": 3.8812205311118166, "percentage": 77.62, "elapsed_time": "3:25:37", "remaining_time": "0:59:16", "throughput": 8678.73, "total_tokens": 107073872} +{"current_steps": 158875, "total_steps": 204665, "loss": 0.0, "lr": 2.89782572193367e-07, "epoch": 3.8813426819436643, "percentage": 77.63, "elapsed_time": "3:25:37", "remaining_time": "0:59:15", "throughput": 8678.77, "total_tokens": 107077456} +{"current_steps": 158880, "total_steps": 204665, "loss": 0.0, "lr": 2.8972254084760626e-07, "epoch": 3.881464832775511, "percentage": 77.63, "elapsed_time": "3:25:38", "remaining_time": "0:59:15", "throughput": 8678.8, "total_tokens": 107080784} +{"current_steps": 158885, "total_steps": 204665, "loss": 0.0366, "lr": 2.8966251466717395e-07, "epoch": 3.881586983607358, "percentage": 77.63, "elapsed_time": "3:25:38", "remaining_time": "0:59:15", "throughput": 8678.8, "total_tokens": 107083728} +{"current_steps": 158890, "total_steps": 204665, "loss": 0.0002, "lr": 2.896024936525071e-07, "epoch": 3.8817091344392054, "percentage": 77.63, "elapsed_time": "3:25:38", "remaining_time": "0:59:14", "throughput": 8678.81, "total_tokens": 107086800} +{"current_steps": 158895, "total_steps": 204665, "loss": 0.0001, "lr": 2.895424778040417e-07, "epoch": 3.8818312852710526, "percentage": 77.64, "elapsed_time": "3:25:39", "remaining_time": "0:59:14", "throughput": 8678.91, "total_tokens": 107091344} +{"current_steps": 158900, "total_steps": 204665, "loss": 0.0, "lr": 2.894824671222149e-07, "epoch": 3.8819534361029, "percentage": 77.64, "elapsed_time": "3:25:39", "remaining_time": "0:59:13", "throughput": 8678.95, "total_tokens": 107094864} +{"current_steps": 158905, "total_steps": 204665, "loss": 0.0, "lr": 2.894224616074623e-07, "epoch": 3.882075586934747, "percentage": 77.64, "elapsed_time": "3:25:39", "remaining_time": "0:59:13", "throughput": 8678.99, "total_tokens": 107098320} +{"current_steps": 158910, "total_steps": 204665, "loss": 0.0001, "lr": 2.89362461260221e-07, "epoch": 3.882197737766594, "percentage": 77.64, "elapsed_time": "3:25:40", "remaining_time": "0:59:13", "throughput": 8678.99, "total_tokens": 107101264} +{"current_steps": 158915, "total_steps": 204665, "loss": 0.0, "lr": 2.893024660809268e-07, "epoch": 3.8823198885984413, "percentage": 77.65, "elapsed_time": "3:25:40", "remaining_time": "0:59:12", "throughput": 8679.02, "total_tokens": 107104720} +{"current_steps": 158920, "total_steps": 204665, "loss": 0.0, "lr": 2.892424760700164e-07, "epoch": 3.8824420394302885, "percentage": 77.65, "elapsed_time": "3:25:40", "remaining_time": "0:59:12", "throughput": 8679.05, "total_tokens": 107108112} +{"current_steps": 158925, "total_steps": 204665, "loss": 0.0, "lr": 2.89182491227926e-07, "epoch": 3.8825641902621357, "percentage": 77.65, "elapsed_time": "3:25:41", "remaining_time": "0:59:11", "throughput": 8679.07, "total_tokens": 107111312} +{"current_steps": 158930, "total_steps": 204665, "loss": 0.0, "lr": 2.891225115550914e-07, "epoch": 3.882686341093983, "percentage": 77.65, "elapsed_time": "3:25:41", "remaining_time": "0:59:11", "throughput": 8679.14, "total_tokens": 107115280} +{"current_steps": 158935, "total_steps": 204665, "loss": 0.0838, "lr": 2.890625370519493e-07, "epoch": 3.88280849192583, "percentage": 77.66, "elapsed_time": "3:25:42", "remaining_time": "0:59:11", "throughput": 8679.16, "total_tokens": 107118608} +{"current_steps": 158940, "total_steps": 204665, "loss": 0.0, "lr": 2.8900256771893536e-07, "epoch": 3.8829306427576773, "percentage": 77.66, "elapsed_time": "3:25:42", "remaining_time": "0:59:10", "throughput": 8679.17, "total_tokens": 107121616} +{"current_steps": 158945, "total_steps": 204665, "loss": 0.0378, "lr": 2.8894260355648605e-07, "epoch": 3.8830527935895245, "percentage": 77.66, "elapsed_time": "3:25:42", "remaining_time": "0:59:10", "throughput": 8679.17, "total_tokens": 107124624} +{"current_steps": 158950, "total_steps": 204665, "loss": 0.0, "lr": 2.888826445650376e-07, "epoch": 3.8831749444213717, "percentage": 77.66, "elapsed_time": "3:25:43", "remaining_time": "0:59:09", "throughput": 8679.18, "total_tokens": 107127760} +{"current_steps": 158955, "total_steps": 204665, "loss": 0.0, "lr": 2.8882269074502565e-07, "epoch": 3.8832970952532184, "percentage": 77.67, "elapsed_time": "3:25:43", "remaining_time": "0:59:09", "throughput": 8679.2, "total_tokens": 107130960} +{"current_steps": 158960, "total_steps": 204665, "loss": 0.0, "lr": 2.887627420968867e-07, "epoch": 3.883419246085066, "percentage": 77.67, "elapsed_time": "3:25:43", "remaining_time": "0:59:09", "throughput": 8679.21, "total_tokens": 107134096} +{"current_steps": 158965, "total_steps": 204665, "loss": 0.0, "lr": 2.8870279862105596e-07, "epoch": 3.883541396916913, "percentage": 77.67, "elapsed_time": "3:25:44", "remaining_time": "0:59:08", "throughput": 8679.24, "total_tokens": 107137360} +{"current_steps": 158970, "total_steps": 204665, "loss": 0.0, "lr": 2.886428603179698e-07, "epoch": 3.8836635477487604, "percentage": 77.67, "elapsed_time": "3:25:44", "remaining_time": "0:59:08", "throughput": 8679.25, "total_tokens": 107140496} +{"current_steps": 158975, "total_steps": 204665, "loss": 0.0, "lr": 2.8858292718806443e-07, "epoch": 3.883785698580607, "percentage": 77.68, "elapsed_time": "3:25:44", "remaining_time": "0:59:07", "throughput": 8679.29, "total_tokens": 107143952} +{"current_steps": 158980, "total_steps": 204665, "loss": 0.0, "lr": 2.88522999231775e-07, "epoch": 3.8839078494124544, "percentage": 77.68, "elapsed_time": "3:25:45", "remaining_time": "0:59:07", "throughput": 8679.33, "total_tokens": 107147664} +{"current_steps": 158985, "total_steps": 204665, "loss": 0.0, "lr": 2.8846307644953803e-07, "epoch": 3.8840300002443016, "percentage": 77.68, "elapsed_time": "3:25:45", "remaining_time": "0:59:07", "throughput": 8679.38, "total_tokens": 107151376} +{"current_steps": 158990, "total_steps": 204665, "loss": 0.0, "lr": 2.884031588417887e-07, "epoch": 3.8841521510761488, "percentage": 77.68, "elapsed_time": "3:25:45", "remaining_time": "0:59:06", "throughput": 8679.42, "total_tokens": 107154832} +{"current_steps": 158995, "total_steps": 204665, "loss": 0.0, "lr": 2.8834324640896325e-07, "epoch": 3.884274301907996, "percentage": 77.69, "elapsed_time": "3:25:46", "remaining_time": "0:59:06", "throughput": 8679.43, "total_tokens": 107157904} +{"current_steps": 159000, "total_steps": 204665, "loss": 0.0, "lr": 2.8828333915149674e-07, "epoch": 3.884396452739843, "percentage": 77.69, "elapsed_time": "3:25:46", "remaining_time": "0:59:05", "throughput": 8679.46, "total_tokens": 107161296} +{"current_steps": 159005, "total_steps": 204665, "loss": 0.0, "lr": 2.882234370698253e-07, "epoch": 3.8845186035716903, "percentage": 77.69, "elapsed_time": "3:25:46", "remaining_time": "0:59:05", "throughput": 8679.48, "total_tokens": 107164624} +{"current_steps": 159010, "total_steps": 204665, "loss": 0.0, "lr": 2.8816354016438483e-07, "epoch": 3.8846407544035375, "percentage": 77.69, "elapsed_time": "3:25:47", "remaining_time": "0:59:05", "throughput": 8679.54, "total_tokens": 107168528} +{"current_steps": 159015, "total_steps": 204665, "loss": 0.0, "lr": 2.881036484356104e-07, "epoch": 3.8847629052353847, "percentage": 77.7, "elapsed_time": "3:25:47", "remaining_time": "0:59:04", "throughput": 8679.57, "total_tokens": 107171856} +{"current_steps": 159020, "total_steps": 204665, "loss": 0.0001, "lr": 2.8804376188393755e-07, "epoch": 3.884885056067232, "percentage": 77.7, "elapsed_time": "3:25:47", "remaining_time": "0:59:04", "throughput": 8679.58, "total_tokens": 107174992} +{"current_steps": 159025, "total_steps": 204665, "loss": 0.0, "lr": 2.8798388050980225e-07, "epoch": 3.885007206899079, "percentage": 77.7, "elapsed_time": "3:25:48", "remaining_time": "0:59:03", "throughput": 8679.61, "total_tokens": 107178384} +{"current_steps": 159030, "total_steps": 204665, "loss": 0.0001, "lr": 2.8792400431363927e-07, "epoch": 3.8851293577309263, "percentage": 77.7, "elapsed_time": "3:25:48", "remaining_time": "0:59:03", "throughput": 8679.64, "total_tokens": 107181776} +{"current_steps": 159035, "total_steps": 204665, "loss": 0.0, "lr": 2.8786413329588486e-07, "epoch": 3.8852515085627735, "percentage": 77.71, "elapsed_time": "3:25:48", "remaining_time": "0:59:03", "throughput": 8679.65, "total_tokens": 107184976} +{"current_steps": 159040, "total_steps": 204665, "loss": 0.0, "lr": 2.8780426745697374e-07, "epoch": 3.88537365939462, "percentage": 77.71, "elapsed_time": "3:25:49", "remaining_time": "0:59:02", "throughput": 8679.71, "total_tokens": 107188816} +{"current_steps": 159045, "total_steps": 204665, "loss": 0.0, "lr": 2.877444067973418e-07, "epoch": 3.885495810226468, "percentage": 77.71, "elapsed_time": "3:25:49", "remaining_time": "0:59:02", "throughput": 8679.74, "total_tokens": 107192208} +{"current_steps": 159050, "total_steps": 204665, "loss": 0.0566, "lr": 2.8768455131742373e-07, "epoch": 3.8856179610583146, "percentage": 77.71, "elapsed_time": "3:25:50", "remaining_time": "0:59:01", "throughput": 8679.78, "total_tokens": 107195728} +{"current_steps": 159055, "total_steps": 204665, "loss": 0.0353, "lr": 2.876247010176556e-07, "epoch": 3.885740111890162, "percentage": 77.71, "elapsed_time": "3:25:50", "remaining_time": "0:59:01", "throughput": 8679.8, "total_tokens": 107199120} +{"current_steps": 159060, "total_steps": 204665, "loss": 0.0, "lr": 2.875648558984719e-07, "epoch": 3.885862262722009, "percentage": 77.72, "elapsed_time": "3:25:50", "remaining_time": "0:59:01", "throughput": 8679.82, "total_tokens": 107202256} +{"current_steps": 159065, "total_steps": 204665, "loss": 0.0, "lr": 2.875050159603082e-07, "epoch": 3.885984413553856, "percentage": 77.72, "elapsed_time": "3:25:51", "remaining_time": "0:59:00", "throughput": 8679.84, "total_tokens": 107205520} +{"current_steps": 159070, "total_steps": 204665, "loss": 0.0253, "lr": 2.8744518120359997e-07, "epoch": 3.8861065643857033, "percentage": 77.72, "elapsed_time": "3:25:51", "remaining_time": "0:59:00", "throughput": 8679.86, "total_tokens": 107208848} +{"current_steps": 159075, "total_steps": 204665, "loss": 0.0, "lr": 2.8738535162878173e-07, "epoch": 3.8862287152175505, "percentage": 77.72, "elapsed_time": "3:25:51", "remaining_time": "0:58:59", "throughput": 8679.87, "total_tokens": 107211984} +{"current_steps": 159080, "total_steps": 204665, "loss": 0.0002, "lr": 2.873255272362891e-07, "epoch": 3.8863508660493977, "percentage": 77.73, "elapsed_time": "3:25:52", "remaining_time": "0:58:59", "throughput": 8679.91, "total_tokens": 107215504} +{"current_steps": 159085, "total_steps": 204665, "loss": 0.0001, "lr": 2.872657080265567e-07, "epoch": 3.886473016881245, "percentage": 77.73, "elapsed_time": "3:25:52", "remaining_time": "0:58:59", "throughput": 8679.92, "total_tokens": 107218576} +{"current_steps": 159090, "total_steps": 204665, "loss": 0.0005, "lr": 2.8720589400002014e-07, "epoch": 3.886595167713092, "percentage": 77.73, "elapsed_time": "3:25:52", "remaining_time": "0:58:58", "throughput": 8679.92, "total_tokens": 107221584} +{"current_steps": 159095, "total_steps": 204665, "loss": 0.0, "lr": 2.871460851571137e-07, "epoch": 3.8867173185449393, "percentage": 77.73, "elapsed_time": "3:25:53", "remaining_time": "0:58:58", "throughput": 8679.96, "total_tokens": 107225104} +{"current_steps": 159100, "total_steps": 204665, "loss": 0.0, "lr": 2.870862814982726e-07, "epoch": 3.8868394693767865, "percentage": 77.74, "elapsed_time": "3:25:53", "remaining_time": "0:58:57", "throughput": 8680.02, "total_tokens": 107228944} +{"current_steps": 159105, "total_steps": 204665, "loss": 0.0, "lr": 2.8702648302393217e-07, "epoch": 3.8869616202086337, "percentage": 77.74, "elapsed_time": "3:25:53", "remaining_time": "0:58:57", "throughput": 8680.04, "total_tokens": 107232272} +{"current_steps": 159110, "total_steps": 204665, "loss": 0.0, "lr": 2.869666897345265e-07, "epoch": 3.887083771040481, "percentage": 77.74, "elapsed_time": "3:25:54", "remaining_time": "0:58:57", "throughput": 8680.09, "total_tokens": 107235920} +{"current_steps": 159115, "total_steps": 204665, "loss": 0.0, "lr": 2.8690690163049136e-07, "epoch": 3.887205921872328, "percentage": 77.74, "elapsed_time": "3:25:54", "remaining_time": "0:58:56", "throughput": 8680.1, "total_tokens": 107238992} +{"current_steps": 159120, "total_steps": 204665, "loss": 0.0, "lr": 2.868471187122606e-07, "epoch": 3.8873280727041752, "percentage": 77.75, "elapsed_time": "3:25:54", "remaining_time": "0:58:56", "throughput": 8680.16, "total_tokens": 107242832} +{"current_steps": 159125, "total_steps": 204665, "loss": 0.0001, "lr": 2.8678734098026967e-07, "epoch": 3.8874502235360224, "percentage": 77.75, "elapsed_time": "3:25:55", "remaining_time": "0:58:55", "throughput": 8680.17, "total_tokens": 107245904} +{"current_steps": 159130, "total_steps": 204665, "loss": 0.0, "lr": 2.8672756843495316e-07, "epoch": 3.8875723743678696, "percentage": 77.75, "elapsed_time": "3:25:55", "remaining_time": "0:58:55", "throughput": 8680.18, "total_tokens": 107249104} +{"current_steps": 159135, "total_steps": 204665, "loss": 0.0002, "lr": 2.8666780107674524e-07, "epoch": 3.8876945251997164, "percentage": 77.75, "elapsed_time": "3:25:55", "remaining_time": "0:58:55", "throughput": 8680.19, "total_tokens": 107252112} +{"current_steps": 159140, "total_steps": 204665, "loss": 0.0, "lr": 2.8660803890608123e-07, "epoch": 3.887816676031564, "percentage": 77.76, "elapsed_time": "3:25:56", "remaining_time": "0:58:54", "throughput": 8680.22, "total_tokens": 107255440} +{"current_steps": 159145, "total_steps": 204665, "loss": 0.0001, "lr": 2.865482819233951e-07, "epoch": 3.8879388268634107, "percentage": 77.76, "elapsed_time": "3:25:56", "remaining_time": "0:58:54", "throughput": 8680.22, "total_tokens": 107258448} +{"current_steps": 159150, "total_steps": 204665, "loss": 0.0534, "lr": 2.864885301291221e-07, "epoch": 3.8880609776952584, "percentage": 77.76, "elapsed_time": "3:25:56", "remaining_time": "0:58:53", "throughput": 8680.23, "total_tokens": 107261520} +{"current_steps": 159155, "total_steps": 204665, "loss": 0.0, "lr": 2.8642878352369616e-07, "epoch": 3.888183128527105, "percentage": 77.76, "elapsed_time": "3:25:57", "remaining_time": "0:58:53", "throughput": 8680.25, "total_tokens": 107264784} +{"current_steps": 159160, "total_steps": 204665, "loss": 0.1633, "lr": 2.8636904210755196e-07, "epoch": 3.8883052793589523, "percentage": 77.77, "elapsed_time": "3:25:57", "remaining_time": "0:58:53", "throughput": 8680.27, "total_tokens": 107267984} +{"current_steps": 159165, "total_steps": 204665, "loss": 0.0, "lr": 2.8630930588112443e-07, "epoch": 3.8884274301907995, "percentage": 77.77, "elapsed_time": "3:25:58", "remaining_time": "0:58:52", "throughput": 8680.29, "total_tokens": 107271312} +{"current_steps": 159170, "total_steps": 204665, "loss": 0.0002, "lr": 2.8624957484484723e-07, "epoch": 3.8885495810226467, "percentage": 77.77, "elapsed_time": "3:25:58", "remaining_time": "0:58:52", "throughput": 8680.31, "total_tokens": 107274512} +{"current_steps": 159175, "total_steps": 204665, "loss": 0.0, "lr": 2.8618984899915533e-07, "epoch": 3.888671731854494, "percentage": 77.77, "elapsed_time": "3:25:58", "remaining_time": "0:58:51", "throughput": 8680.37, "total_tokens": 107278288} +{"current_steps": 159180, "total_steps": 204665, "loss": 0.0, "lr": 2.861301283444827e-07, "epoch": 3.888793882686341, "percentage": 77.78, "elapsed_time": "3:25:59", "remaining_time": "0:58:51", "throughput": 8680.41, "total_tokens": 107281808} +{"current_steps": 159185, "total_steps": 204665, "loss": 0.0, "lr": 2.8607041288126396e-07, "epoch": 3.8889160335181883, "percentage": 77.78, "elapsed_time": "3:25:59", "remaining_time": "0:58:51", "throughput": 8680.44, "total_tokens": 107285200} +{"current_steps": 159190, "total_steps": 204665, "loss": 0.0, "lr": 2.8601070260993287e-07, "epoch": 3.8890381843500355, "percentage": 77.78, "elapsed_time": "3:25:59", "remaining_time": "0:58:50", "throughput": 8680.45, "total_tokens": 107288272} +{"current_steps": 159195, "total_steps": 204665, "loss": 0.0, "lr": 2.859509975309241e-07, "epoch": 3.8891603351818826, "percentage": 77.78, "elapsed_time": "3:26:00", "remaining_time": "0:58:50", "throughput": 8680.47, "total_tokens": 107291472} +{"current_steps": 159200, "total_steps": 204665, "loss": 0.0, "lr": 2.8589129764467203e-07, "epoch": 3.88928248601373, "percentage": 77.79, "elapsed_time": "3:26:00", "remaining_time": "0:58:49", "throughput": 8680.49, "total_tokens": 107294736} +{"current_steps": 159205, "total_steps": 204665, "loss": 0.0245, "lr": 2.858316029516101e-07, "epoch": 3.889404636845577, "percentage": 77.79, "elapsed_time": "3:26:00", "remaining_time": "0:58:49", "throughput": 8680.52, "total_tokens": 107298128} +{"current_steps": 159210, "total_steps": 204665, "loss": 0.0, "lr": 2.8577191345217324e-07, "epoch": 3.889526787677424, "percentage": 77.79, "elapsed_time": "3:26:01", "remaining_time": "0:58:49", "throughput": 8680.59, "total_tokens": 107302096} +{"current_steps": 159215, "total_steps": 204665, "loss": 0.0, "lr": 2.857122291467948e-07, "epoch": 3.8896489385092714, "percentage": 77.79, "elapsed_time": "3:26:01", "remaining_time": "0:58:48", "throughput": 8680.61, "total_tokens": 107305296} +{"current_steps": 159220, "total_steps": 204665, "loss": 0.0001, "lr": 2.856525500359095e-07, "epoch": 3.889771089341118, "percentage": 77.8, "elapsed_time": "3:26:01", "remaining_time": "0:58:48", "throughput": 8680.67, "total_tokens": 107309136} +{"current_steps": 159225, "total_steps": 204665, "loss": 0.0, "lr": 2.855928761199505e-07, "epoch": 3.889893240172966, "percentage": 77.8, "elapsed_time": "3:26:02", "remaining_time": "0:58:47", "throughput": 8680.68, "total_tokens": 107312272} +{"current_steps": 159230, "total_steps": 204665, "loss": 0.0, "lr": 2.855332073993528e-07, "epoch": 3.8900153910048125, "percentage": 77.8, "elapsed_time": "3:26:02", "remaining_time": "0:58:47", "throughput": 8680.69, "total_tokens": 107315408} +{"current_steps": 159235, "total_steps": 204665, "loss": 0.0, "lr": 2.854735438745497e-07, "epoch": 3.89013754183666, "percentage": 77.8, "elapsed_time": "3:26:02", "remaining_time": "0:58:47", "throughput": 8680.71, "total_tokens": 107318608} +{"current_steps": 159240, "total_steps": 204665, "loss": 0.0, "lr": 2.854138855459748e-07, "epoch": 3.890259692668507, "percentage": 77.81, "elapsed_time": "3:26:03", "remaining_time": "0:58:46", "throughput": 8680.74, "total_tokens": 107321936} +{"current_steps": 159245, "total_steps": 204665, "loss": 0.0, "lr": 2.8535423241406274e-07, "epoch": 3.890381843500354, "percentage": 77.81, "elapsed_time": "3:26:03", "remaining_time": "0:58:46", "throughput": 8680.74, "total_tokens": 107324880} +{"current_steps": 159250, "total_steps": 204665, "loss": 0.0, "lr": 2.8529458447924646e-07, "epoch": 3.8905039943322013, "percentage": 77.81, "elapsed_time": "3:26:03", "remaining_time": "0:58:45", "throughput": 8680.81, "total_tokens": 107328912} +{"current_steps": 159255, "total_steps": 204665, "loss": 0.0204, "lr": 2.852349417419604e-07, "epoch": 3.8906261451640485, "percentage": 77.81, "elapsed_time": "3:26:04", "remaining_time": "0:58:45", "throughput": 8680.82, "total_tokens": 107332048} +{"current_steps": 159260, "total_steps": 204665, "loss": 0.0, "lr": 2.8517530420263826e-07, "epoch": 3.8907482959958957, "percentage": 77.81, "elapsed_time": "3:26:04", "remaining_time": "0:58:45", "throughput": 8680.84, "total_tokens": 107335248} +{"current_steps": 159265, "total_steps": 204665, "loss": 0.0, "lr": 2.8511567186171327e-07, "epoch": 3.890870446827743, "percentage": 77.82, "elapsed_time": "3:26:04", "remaining_time": "0:58:44", "throughput": 8680.85, "total_tokens": 107338256} +{"current_steps": 159270, "total_steps": 204665, "loss": 0.0001, "lr": 2.8505604471961975e-07, "epoch": 3.89099259765959, "percentage": 77.82, "elapsed_time": "3:26:05", "remaining_time": "0:58:44", "throughput": 8680.89, "total_tokens": 107341840} +{"current_steps": 159275, "total_steps": 204665, "loss": 0.0, "lr": 2.849964227767906e-07, "epoch": 3.8911147484914372, "percentage": 77.82, "elapsed_time": "3:26:05", "remaining_time": "0:58:43", "throughput": 8680.93, "total_tokens": 107345424} +{"current_steps": 159280, "total_steps": 204665, "loss": 0.0001, "lr": 2.849368060336599e-07, "epoch": 3.8912368993232844, "percentage": 77.82, "elapsed_time": "3:26:05", "remaining_time": "0:58:43", "throughput": 8680.93, "total_tokens": 107348432} +{"current_steps": 159285, "total_steps": 204665, "loss": 0.1123, "lr": 2.848771944906613e-07, "epoch": 3.8913590501551316, "percentage": 77.83, "elapsed_time": "3:26:06", "remaining_time": "0:58:43", "throughput": 8681.02, "total_tokens": 107352720} +{"current_steps": 159290, "total_steps": 204665, "loss": 0.0, "lr": 2.8481758814822777e-07, "epoch": 3.891481200986979, "percentage": 77.83, "elapsed_time": "3:26:06", "remaining_time": "0:58:42", "throughput": 8681.03, "total_tokens": 107355792} +{"current_steps": 159295, "total_steps": 204665, "loss": 0.0, "lr": 2.8475798700679344e-07, "epoch": 3.891603351818826, "percentage": 77.83, "elapsed_time": "3:26:07", "remaining_time": "0:58:42", "throughput": 8681.08, "total_tokens": 107359568} +{"current_steps": 159300, "total_steps": 204665, "loss": 0.0, "lr": 2.846983910667911e-07, "epoch": 3.891725502650673, "percentage": 77.83, "elapsed_time": "3:26:07", "remaining_time": "0:58:41", "throughput": 8681.11, "total_tokens": 107362896} +{"current_steps": 159305, "total_steps": 204665, "loss": 0.0588, "lr": 2.8463880032865463e-07, "epoch": 3.8918476534825204, "percentage": 77.84, "elapsed_time": "3:26:07", "remaining_time": "0:58:41", "throughput": 8681.16, "total_tokens": 107366544} +{"current_steps": 159310, "total_steps": 204665, "loss": 0.0, "lr": 2.84579214792817e-07, "epoch": 3.8919698043143676, "percentage": 77.84, "elapsed_time": "3:26:08", "remaining_time": "0:58:41", "throughput": 8681.17, "total_tokens": 107369616} +{"current_steps": 159315, "total_steps": 204665, "loss": 0.0002, "lr": 2.8451963445971184e-07, "epoch": 3.8920919551462143, "percentage": 77.84, "elapsed_time": "3:26:08", "remaining_time": "0:58:40", "throughput": 8681.2, "total_tokens": 107373136} +{"current_steps": 159320, "total_steps": 204665, "loss": 0.0, "lr": 2.844600593297726e-07, "epoch": 3.892214105978062, "percentage": 77.84, "elapsed_time": "3:26:08", "remaining_time": "0:58:40", "throughput": 8681.23, "total_tokens": 107376400} +{"current_steps": 159325, "total_steps": 204665, "loss": 0.0002, "lr": 2.8440048940343185e-07, "epoch": 3.8923362568099087, "percentage": 77.85, "elapsed_time": "3:26:09", "remaining_time": "0:58:39", "throughput": 8681.26, "total_tokens": 107379792} +{"current_steps": 159330, "total_steps": 204665, "loss": 0.0, "lr": 2.843409246811236e-07, "epoch": 3.892458407641756, "percentage": 77.85, "elapsed_time": "3:26:09", "remaining_time": "0:58:39", "throughput": 8681.26, "total_tokens": 107382800} +{"current_steps": 159335, "total_steps": 204665, "loss": 0.0, "lr": 2.842813651632806e-07, "epoch": 3.892580558473603, "percentage": 77.85, "elapsed_time": "3:26:09", "remaining_time": "0:58:39", "throughput": 8681.27, "total_tokens": 107385936} +{"current_steps": 159340, "total_steps": 204665, "loss": 0.0256, "lr": 2.8422181085033583e-07, "epoch": 3.8927027093054503, "percentage": 77.85, "elapsed_time": "3:26:10", "remaining_time": "0:58:38", "throughput": 8681.3, "total_tokens": 107389264} +{"current_steps": 159345, "total_steps": 204665, "loss": 0.0488, "lr": 2.841622617427227e-07, "epoch": 3.8928248601372974, "percentage": 77.86, "elapsed_time": "3:26:10", "remaining_time": "0:58:38", "throughput": 8681.31, "total_tokens": 107392400} +{"current_steps": 159350, "total_steps": 204665, "loss": 0.0488, "lr": 2.841027178408739e-07, "epoch": 3.8929470109691446, "percentage": 77.86, "elapsed_time": "3:26:10", "remaining_time": "0:58:37", "throughput": 8681.33, "total_tokens": 107395600} +{"current_steps": 159355, "total_steps": 204665, "loss": 0.0, "lr": 2.8404317914522304e-07, "epoch": 3.893069161800992, "percentage": 77.86, "elapsed_time": "3:26:11", "remaining_time": "0:58:37", "throughput": 8681.35, "total_tokens": 107398864} +{"current_steps": 159360, "total_steps": 204665, "loss": 0.0, "lr": 2.839836456562025e-07, "epoch": 3.893191312632839, "percentage": 77.86, "elapsed_time": "3:26:11", "remaining_time": "0:58:37", "throughput": 8681.43, "total_tokens": 107402960} +{"current_steps": 159365, "total_steps": 204665, "loss": 0.0, "lr": 2.839241173742456e-07, "epoch": 3.893313463464686, "percentage": 77.87, "elapsed_time": "3:26:11", "remaining_time": "0:58:36", "throughput": 8681.44, "total_tokens": 107406096} +{"current_steps": 159370, "total_steps": 204665, "loss": 0.0005, "lr": 2.838645942997849e-07, "epoch": 3.8934356142965334, "percentage": 77.87, "elapsed_time": "3:26:12", "remaining_time": "0:58:36", "throughput": 8681.46, "total_tokens": 107409296} +{"current_steps": 159375, "total_steps": 204665, "loss": 0.0, "lr": 2.8380507643325357e-07, "epoch": 3.8935577651283806, "percentage": 77.87, "elapsed_time": "3:26:12", "remaining_time": "0:58:35", "throughput": 8681.48, "total_tokens": 107412688} +{"current_steps": 159380, "total_steps": 204665, "loss": 0.0441, "lr": 2.8374556377508463e-07, "epoch": 3.8936799159602278, "percentage": 77.87, "elapsed_time": "3:26:12", "remaining_time": "0:58:35", "throughput": 8681.5, "total_tokens": 107415952} +{"current_steps": 159385, "total_steps": 204665, "loss": 0.0048, "lr": 2.8368605632571017e-07, "epoch": 3.893802066792075, "percentage": 77.88, "elapsed_time": "3:26:13", "remaining_time": "0:58:35", "throughput": 8681.54, "total_tokens": 107419472} +{"current_steps": 159390, "total_steps": 204665, "loss": 0.0, "lr": 2.836265540855638e-07, "epoch": 3.893924217623922, "percentage": 77.88, "elapsed_time": "3:26:13", "remaining_time": "0:58:34", "throughput": 8681.56, "total_tokens": 107422736} +{"current_steps": 159395, "total_steps": 204665, "loss": 0.0, "lr": 2.835670570550774e-07, "epoch": 3.8940463684557693, "percentage": 77.88, "elapsed_time": "3:26:14", "remaining_time": "0:58:34", "throughput": 8681.62, "total_tokens": 107426576} +{"current_steps": 159400, "total_steps": 204665, "loss": 0.0, "lr": 2.8350756523468454e-07, "epoch": 3.894168519287616, "percentage": 77.88, "elapsed_time": "3:26:14", "remaining_time": "0:58:33", "throughput": 8681.68, "total_tokens": 107430416} +{"current_steps": 159405, "total_steps": 204665, "loss": 0.0489, "lr": 2.834480786248169e-07, "epoch": 3.8942906701194637, "percentage": 77.89, "elapsed_time": "3:26:14", "remaining_time": "0:58:33", "throughput": 8681.69, "total_tokens": 107433488} +{"current_steps": 159410, "total_steps": 204665, "loss": 0.0476, "lr": 2.833885972259077e-07, "epoch": 3.8944128209513105, "percentage": 77.89, "elapsed_time": "3:26:15", "remaining_time": "0:58:33", "throughput": 8681.7, "total_tokens": 107436496} +{"current_steps": 159415, "total_steps": 204665, "loss": 0.0, "lr": 2.8332912103838957e-07, "epoch": 3.894534971783158, "percentage": 77.89, "elapsed_time": "3:26:15", "remaining_time": "0:58:32", "throughput": 8681.74, "total_tokens": 107440080} +{"current_steps": 159420, "total_steps": 204665, "loss": 0.0, "lr": 2.8326965006269454e-07, "epoch": 3.894657122615005, "percentage": 77.89, "elapsed_time": "3:26:15", "remaining_time": "0:58:32", "throughput": 8681.77, "total_tokens": 107443536} +{"current_steps": 159425, "total_steps": 204665, "loss": 0.0, "lr": 2.832101842992558e-07, "epoch": 3.894779273446852, "percentage": 77.9, "elapsed_time": "3:26:16", "remaining_time": "0:58:31", "throughput": 8681.79, "total_tokens": 107446672} +{"current_steps": 159430, "total_steps": 204665, "loss": 0.0, "lr": 2.8315072374850504e-07, "epoch": 3.8949014242786992, "percentage": 77.9, "elapsed_time": "3:26:16", "remaining_time": "0:58:31", "throughput": 8681.8, "total_tokens": 107449808} +{"current_steps": 159435, "total_steps": 204665, "loss": 0.0, "lr": 2.8309126841087527e-07, "epoch": 3.8950235751105464, "percentage": 77.9, "elapsed_time": "3:26:16", "remaining_time": "0:58:31", "throughput": 8681.84, "total_tokens": 107453200} +{"current_steps": 159440, "total_steps": 204665, "loss": 0.0512, "lr": 2.8303181828679857e-07, "epoch": 3.8951457259423936, "percentage": 77.9, "elapsed_time": "3:26:17", "remaining_time": "0:58:30", "throughput": 8681.89, "total_tokens": 107456976} +{"current_steps": 159445, "total_steps": 204665, "loss": 0.0002, "lr": 2.829723733767071e-07, "epoch": 3.895267876774241, "percentage": 77.91, "elapsed_time": "3:26:17", "remaining_time": "0:58:30", "throughput": 8681.92, "total_tokens": 107460368} +{"current_steps": 159450, "total_steps": 204665, "loss": 0.0, "lr": 2.8291293368103374e-07, "epoch": 3.895390027606088, "percentage": 77.91, "elapsed_time": "3:26:17", "remaining_time": "0:58:29", "throughput": 8681.93, "total_tokens": 107463440} +{"current_steps": 159455, "total_steps": 204665, "loss": 0.0006, "lr": 2.8285349920021006e-07, "epoch": 3.895512178437935, "percentage": 77.91, "elapsed_time": "3:26:18", "remaining_time": "0:58:29", "throughput": 8681.97, "total_tokens": 107466960} +{"current_steps": 159460, "total_steps": 204665, "loss": 0.0, "lr": 2.8279406993466893e-07, "epoch": 3.8956343292697824, "percentage": 77.91, "elapsed_time": "3:26:18", "remaining_time": "0:58:29", "throughput": 8681.99, "total_tokens": 107470224} +{"current_steps": 159465, "total_steps": 204665, "loss": 0.0001, "lr": 2.8273464588484186e-07, "epoch": 3.8957564801016296, "percentage": 77.92, "elapsed_time": "3:26:18", "remaining_time": "0:58:28", "throughput": 8682.06, "total_tokens": 107474192} +{"current_steps": 159470, "total_steps": 204665, "loss": 0.0, "lr": 2.826752270511614e-07, "epoch": 3.8958786309334767, "percentage": 77.92, "elapsed_time": "3:26:19", "remaining_time": "0:58:28", "throughput": 8682.08, "total_tokens": 107477520} +{"current_steps": 159475, "total_steps": 204665, "loss": 0.0, "lr": 2.8261581343406005e-07, "epoch": 3.896000781765324, "percentage": 77.92, "elapsed_time": "3:26:19", "remaining_time": "0:58:27", "throughput": 8682.1, "total_tokens": 107480720} +{"current_steps": 159480, "total_steps": 204665, "loss": 0.0001, "lr": 2.8255640503396903e-07, "epoch": 3.896122932597171, "percentage": 77.92, "elapsed_time": "3:26:19", "remaining_time": "0:58:27", "throughput": 8682.16, "total_tokens": 107484624} +{"current_steps": 159485, "total_steps": 204665, "loss": 0.0, "lr": 2.8249700185132107e-07, "epoch": 3.896245083429018, "percentage": 77.92, "elapsed_time": "3:26:20", "remaining_time": "0:58:27", "throughput": 8682.2, "total_tokens": 107488080} +{"current_steps": 159490, "total_steps": 204665, "loss": 0.0, "lr": 2.824376038865477e-07, "epoch": 3.8963672342608655, "percentage": 77.93, "elapsed_time": "3:26:20", "remaining_time": "0:58:26", "throughput": 8682.22, "total_tokens": 107491408} +{"current_steps": 159495, "total_steps": 204665, "loss": 0.0, "lr": 2.823782111400813e-07, "epoch": 3.8964893850927123, "percentage": 77.93, "elapsed_time": "3:26:20", "remaining_time": "0:58:26", "throughput": 8682.27, "total_tokens": 107494992} +{"current_steps": 159500, "total_steps": 204665, "loss": 0.0, "lr": 2.823188236123533e-07, "epoch": 3.89661153592456, "percentage": 77.93, "elapsed_time": "3:26:21", "remaining_time": "0:58:25", "throughput": 8682.29, "total_tokens": 107498320} +{"current_steps": 159505, "total_steps": 204665, "loss": 0.0001, "lr": 2.8225944130379586e-07, "epoch": 3.8967336867564066, "percentage": 77.93, "elapsed_time": "3:26:21", "remaining_time": "0:58:25", "throughput": 8682.3, "total_tokens": 107501392} +{"current_steps": 159510, "total_steps": 204665, "loss": 0.0, "lr": 2.822000642148411e-07, "epoch": 3.896855837588254, "percentage": 77.94, "elapsed_time": "3:26:22", "remaining_time": "0:58:25", "throughput": 8682.34, "total_tokens": 107504912} +{"current_steps": 159515, "total_steps": 204665, "loss": 0.0, "lr": 2.821406923459202e-07, "epoch": 3.896977988420101, "percentage": 77.94, "elapsed_time": "3:26:22", "remaining_time": "0:58:24", "throughput": 8682.36, "total_tokens": 107508240} +{"current_steps": 159520, "total_steps": 204665, "loss": 0.0399, "lr": 2.8208132569746555e-07, "epoch": 3.897100139251948, "percentage": 77.94, "elapsed_time": "3:26:22", "remaining_time": "0:58:24", "throughput": 8682.37, "total_tokens": 107511248} +{"current_steps": 159525, "total_steps": 204665, "loss": 0.0, "lr": 2.8202196426990844e-07, "epoch": 3.8972222900837954, "percentage": 77.94, "elapsed_time": "3:26:23", "remaining_time": "0:58:23", "throughput": 8682.39, "total_tokens": 107514576} +{"current_steps": 159530, "total_steps": 204665, "loss": 0.0, "lr": 2.819626080636809e-07, "epoch": 3.8973444409156426, "percentage": 77.95, "elapsed_time": "3:26:23", "remaining_time": "0:58:23", "throughput": 8682.46, "total_tokens": 107518608} +{"current_steps": 159535, "total_steps": 204665, "loss": 0.1042, "lr": 2.8190325707921416e-07, "epoch": 3.8974665917474898, "percentage": 77.95, "elapsed_time": "3:26:23", "remaining_time": "0:58:23", "throughput": 8682.5, "total_tokens": 107522128} +{"current_steps": 159540, "total_steps": 204665, "loss": 0.0001, "lr": 2.818439113169403e-07, "epoch": 3.897588742579337, "percentage": 77.95, "elapsed_time": "3:26:24", "remaining_time": "0:58:22", "throughput": 8682.56, "total_tokens": 107525968} +{"current_steps": 159545, "total_steps": 204665, "loss": 0.0001, "lr": 2.817845707772908e-07, "epoch": 3.897710893411184, "percentage": 77.95, "elapsed_time": "3:26:24", "remaining_time": "0:58:22", "throughput": 8682.55, "total_tokens": 107528784} +{"current_steps": 159550, "total_steps": 204665, "loss": 0.0, "lr": 2.817252354606966e-07, "epoch": 3.8978330442430313, "percentage": 77.96, "elapsed_time": "3:26:24", "remaining_time": "0:58:21", "throughput": 8682.56, "total_tokens": 107531856} +{"current_steps": 159555, "total_steps": 204665, "loss": 0.0, "lr": 2.8166590536759015e-07, "epoch": 3.8979551950748785, "percentage": 77.96, "elapsed_time": "3:26:25", "remaining_time": "0:58:21", "throughput": 8682.58, "total_tokens": 107535056} +{"current_steps": 159560, "total_steps": 204665, "loss": 0.1033, "lr": 2.8160658049840205e-07, "epoch": 3.8980773459067257, "percentage": 77.96, "elapsed_time": "3:26:25", "remaining_time": "0:58:21", "throughput": 8682.62, "total_tokens": 107538512} +{"current_steps": 159565, "total_steps": 204665, "loss": 0.0, "lr": 2.815472608535642e-07, "epoch": 3.898199496738573, "percentage": 77.96, "elapsed_time": "3:26:25", "remaining_time": "0:58:20", "throughput": 8682.65, "total_tokens": 107541968} +{"current_steps": 159570, "total_steps": 204665, "loss": 0.0318, "lr": 2.8148794643350816e-07, "epoch": 3.89832164757042, "percentage": 77.97, "elapsed_time": "3:26:26", "remaining_time": "0:58:20", "throughput": 8682.68, "total_tokens": 107545360} +{"current_steps": 159575, "total_steps": 204665, "loss": 0.0, "lr": 2.8142863723866475e-07, "epoch": 3.8984437984022673, "percentage": 77.97, "elapsed_time": "3:26:26", "remaining_time": "0:58:19", "throughput": 8682.68, "total_tokens": 107548368} +{"current_steps": 159580, "total_steps": 204665, "loss": 0.058, "lr": 2.8136933326946574e-07, "epoch": 3.898565949234114, "percentage": 77.97, "elapsed_time": "3:26:26", "remaining_time": "0:58:19", "throughput": 8682.72, "total_tokens": 107551824} +{"current_steps": 159585, "total_steps": 204665, "loss": 0.0, "lr": 2.813100345263421e-07, "epoch": 3.8986881000659617, "percentage": 77.97, "elapsed_time": "3:26:27", "remaining_time": "0:58:19", "throughput": 8682.74, "total_tokens": 107555088} +{"current_steps": 159590, "total_steps": 204665, "loss": 0.0, "lr": 2.812507410097251e-07, "epoch": 3.8988102508978084, "percentage": 77.98, "elapsed_time": "3:26:27", "remaining_time": "0:58:18", "throughput": 8682.78, "total_tokens": 107558608} +{"current_steps": 159595, "total_steps": 204665, "loss": 0.0008, "lr": 2.811914527200463e-07, "epoch": 3.898932401729656, "percentage": 77.98, "elapsed_time": "3:26:27", "remaining_time": "0:58:18", "throughput": 8682.81, "total_tokens": 107562000} +{"current_steps": 159600, "total_steps": 204665, "loss": 0.0, "lr": 2.8113216965773634e-07, "epoch": 3.899054552561503, "percentage": 77.98, "elapsed_time": "3:26:28", "remaining_time": "0:58:17", "throughput": 8682.84, "total_tokens": 107565456} +{"current_steps": 159605, "total_steps": 204665, "loss": 0.0, "lr": 2.810728918232269e-07, "epoch": 3.89917670339335, "percentage": 77.98, "elapsed_time": "3:26:28", "remaining_time": "0:58:17", "throughput": 8682.85, "total_tokens": 107568592} +{"current_steps": 159610, "total_steps": 204665, "loss": 0.0, "lr": 2.8101361921694854e-07, "epoch": 3.899298854225197, "percentage": 77.99, "elapsed_time": "3:26:28", "remaining_time": "0:58:17", "throughput": 8682.91, "total_tokens": 107572432} +{"current_steps": 159615, "total_steps": 204665, "loss": 0.0572, "lr": 2.8095435183933267e-07, "epoch": 3.8994210050570444, "percentage": 77.99, "elapsed_time": "3:26:29", "remaining_time": "0:58:16", "throughput": 8682.92, "total_tokens": 107575504} +{"current_steps": 159620, "total_steps": 204665, "loss": 0.0002, "lr": 2.8089508969081e-07, "epoch": 3.8995431558888916, "percentage": 77.99, "elapsed_time": "3:26:29", "remaining_time": "0:58:16", "throughput": 8682.95, "total_tokens": 107578896} +{"current_steps": 159625, "total_steps": 204665, "loss": 0.0433, "lr": 2.8083583277181154e-07, "epoch": 3.8996653067207387, "percentage": 77.99, "elapsed_time": "3:26:30", "remaining_time": "0:58:15", "throughput": 8682.97, "total_tokens": 107582096} +{"current_steps": 159630, "total_steps": 204665, "loss": 0.0002, "lr": 2.807765810827687e-07, "epoch": 3.899787457552586, "percentage": 78.0, "elapsed_time": "3:26:30", "remaining_time": "0:58:15", "throughput": 8683.01, "total_tokens": 107585616} +{"current_steps": 159635, "total_steps": 204665, "loss": 0.081, "lr": 2.807173346241116e-07, "epoch": 3.899909608384433, "percentage": 78.0, "elapsed_time": "3:26:30", "remaining_time": "0:58:15", "throughput": 8683.01, "total_tokens": 107588624} +{"current_steps": 159640, "total_steps": 204665, "loss": 0.0003, "lr": 2.80658093396272e-07, "epoch": 3.9000317592162803, "percentage": 78.0, "elapsed_time": "3:26:31", "remaining_time": "0:58:14", "throughput": 8683.05, "total_tokens": 107592144} +{"current_steps": 159645, "total_steps": 204665, "loss": 0.0, "lr": 2.8059885739968e-07, "epoch": 3.9001539100481275, "percentage": 78.0, "elapsed_time": "3:26:31", "remaining_time": "0:58:14", "throughput": 8683.07, "total_tokens": 107595344} +{"current_steps": 159650, "total_steps": 204665, "loss": 0.0478, "lr": 2.805396266347665e-07, "epoch": 3.9002760608799747, "percentage": 78.01, "elapsed_time": "3:26:31", "remaining_time": "0:58:13", "throughput": 8683.11, "total_tokens": 107598928} +{"current_steps": 159655, "total_steps": 204665, "loss": 0.0, "lr": 2.804804011019626e-07, "epoch": 3.900398211711822, "percentage": 78.01, "elapsed_time": "3:26:32", "remaining_time": "0:58:13", "throughput": 8683.12, "total_tokens": 107601936} +{"current_steps": 159660, "total_steps": 204665, "loss": 0.0, "lr": 2.8042118080169843e-07, "epoch": 3.900520362543669, "percentage": 78.01, "elapsed_time": "3:26:32", "remaining_time": "0:58:13", "throughput": 8683.16, "total_tokens": 107605456} +{"current_steps": 159665, "total_steps": 204665, "loss": 0.0, "lr": 2.803619657344053e-07, "epoch": 3.900642513375516, "percentage": 78.01, "elapsed_time": "3:26:32", "remaining_time": "0:58:12", "throughput": 8683.17, "total_tokens": 107608656} +{"current_steps": 159670, "total_steps": 204665, "loss": 0.0, "lr": 2.803027559005131e-07, "epoch": 3.9007646642073635, "percentage": 78.02, "elapsed_time": "3:26:33", "remaining_time": "0:58:12", "throughput": 8683.2, "total_tokens": 107611984} +{"current_steps": 159675, "total_steps": 204665, "loss": 0.0439, "lr": 2.8024355130045316e-07, "epoch": 3.90088681503921, "percentage": 78.02, "elapsed_time": "3:26:33", "remaining_time": "0:58:11", "throughput": 8683.21, "total_tokens": 107615056} +{"current_steps": 159680, "total_steps": 204665, "loss": 0.0, "lr": 2.801843519346555e-07, "epoch": 3.901008965871058, "percentage": 78.02, "elapsed_time": "3:26:33", "remaining_time": "0:58:11", "throughput": 8683.24, "total_tokens": 107618512} +{"current_steps": 159685, "total_steps": 204665, "loss": 0.0, "lr": 2.8012515780355084e-07, "epoch": 3.9011311167029046, "percentage": 78.02, "elapsed_time": "3:26:34", "remaining_time": "0:58:11", "throughput": 8683.29, "total_tokens": 107622160} +{"current_steps": 159690, "total_steps": 204665, "loss": 0.0, "lr": 2.8006596890756995e-07, "epoch": 3.9012532675347518, "percentage": 78.03, "elapsed_time": "3:26:34", "remaining_time": "0:58:10", "throughput": 8683.32, "total_tokens": 107625616} +{"current_steps": 159695, "total_steps": 204665, "loss": 0.0, "lr": 2.8000678524714263e-07, "epoch": 3.901375418366599, "percentage": 78.03, "elapsed_time": "3:26:34", "remaining_time": "0:58:10", "throughput": 8683.36, "total_tokens": 107629264} +{"current_steps": 159700, "total_steps": 204665, "loss": 0.0001, "lr": 2.7994760682269993e-07, "epoch": 3.901497569198446, "percentage": 78.03, "elapsed_time": "3:26:35", "remaining_time": "0:58:09", "throughput": 8683.39, "total_tokens": 107632528} +{"current_steps": 159705, "total_steps": 204665, "loss": 0.0, "lr": 2.798884336346716e-07, "epoch": 3.9016197200302933, "percentage": 78.03, "elapsed_time": "3:26:35", "remaining_time": "0:58:09", "throughput": 8683.42, "total_tokens": 107635984} +{"current_steps": 159710, "total_steps": 204665, "loss": 0.0, "lr": 2.7982926568348853e-07, "epoch": 3.9017418708621405, "percentage": 78.03, "elapsed_time": "3:26:35", "remaining_time": "0:58:09", "throughput": 8683.44, "total_tokens": 107639184} +{"current_steps": 159715, "total_steps": 204665, "loss": 0.0, "lr": 2.797701029695805e-07, "epoch": 3.9018640216939877, "percentage": 78.04, "elapsed_time": "3:26:36", "remaining_time": "0:58:08", "throughput": 8683.48, "total_tokens": 107642768} +{"current_steps": 159720, "total_steps": 204665, "loss": 0.0, "lr": 2.7971094549337805e-07, "epoch": 3.901986172525835, "percentage": 78.04, "elapsed_time": "3:26:36", "remaining_time": "0:58:08", "throughput": 8683.51, "total_tokens": 107646160} +{"current_steps": 159725, "total_steps": 204665, "loss": 0.0, "lr": 2.7965179325531154e-07, "epoch": 3.902108323357682, "percentage": 78.04, "elapsed_time": "3:26:36", "remaining_time": "0:58:07", "throughput": 8683.56, "total_tokens": 107649872} +{"current_steps": 159730, "total_steps": 204665, "loss": 0.0, "lr": 2.7959264625581067e-07, "epoch": 3.9022304741895293, "percentage": 78.04, "elapsed_time": "3:26:37", "remaining_time": "0:58:07", "throughput": 8683.61, "total_tokens": 107653648} +{"current_steps": 159735, "total_steps": 204665, "loss": 0.0, "lr": 2.795335044953061e-07, "epoch": 3.9023526250213765, "percentage": 78.05, "elapsed_time": "3:26:37", "remaining_time": "0:58:07", "throughput": 8683.66, "total_tokens": 107657296} +{"current_steps": 159740, "total_steps": 204665, "loss": 0.0, "lr": 2.794743679742274e-07, "epoch": 3.9024747758532237, "percentage": 78.05, "elapsed_time": "3:26:38", "remaining_time": "0:58:06", "throughput": 8683.67, "total_tokens": 107660432} +{"current_steps": 159745, "total_steps": 204665, "loss": 0.0, "lr": 2.7941523669300527e-07, "epoch": 3.902596926685071, "percentage": 78.05, "elapsed_time": "3:26:38", "remaining_time": "0:58:06", "throughput": 8683.72, "total_tokens": 107664144} +{"current_steps": 159750, "total_steps": 204665, "loss": 0.0, "lr": 2.793561106520693e-07, "epoch": 3.902719077516918, "percentage": 78.05, "elapsed_time": "3:26:38", "remaining_time": "0:58:06", "throughput": 8683.74, "total_tokens": 107667344} +{"current_steps": 159755, "total_steps": 204665, "loss": 0.0, "lr": 2.7929698985184923e-07, "epoch": 3.9028412283487652, "percentage": 78.06, "elapsed_time": "3:26:39", "remaining_time": "0:58:05", "throughput": 8683.76, "total_tokens": 107670544} +{"current_steps": 159760, "total_steps": 204665, "loss": 0.0001, "lr": 2.792378742927756e-07, "epoch": 3.902963379180612, "percentage": 78.06, "elapsed_time": "3:26:39", "remaining_time": "0:58:05", "throughput": 8683.81, "total_tokens": 107674320} +{"current_steps": 159765, "total_steps": 204665, "loss": 0.0001, "lr": 2.791787639752776e-07, "epoch": 3.9030855300124596, "percentage": 78.06, "elapsed_time": "3:26:39", "remaining_time": "0:58:04", "throughput": 8683.85, "total_tokens": 107677776} +{"current_steps": 159770, "total_steps": 204665, "loss": 0.0001, "lr": 2.7911965889978595e-07, "epoch": 3.9032076808443064, "percentage": 78.06, "elapsed_time": "3:26:40", "remaining_time": "0:58:04", "throughput": 8683.9, "total_tokens": 107681488} +{"current_steps": 159775, "total_steps": 204665, "loss": 0.0001, "lr": 2.7906055906672965e-07, "epoch": 3.9033298316761535, "percentage": 78.07, "elapsed_time": "3:26:40", "remaining_time": "0:58:04", "throughput": 8683.96, "total_tokens": 107685328} +{"current_steps": 159780, "total_steps": 204665, "loss": 0.0, "lr": 2.7900146447653895e-07, "epoch": 3.9034519825080007, "percentage": 78.07, "elapsed_time": "3:26:40", "remaining_time": "0:58:03", "throughput": 8683.97, "total_tokens": 107688336} +{"current_steps": 159785, "total_steps": 204665, "loss": 0.0, "lr": 2.789423751296438e-07, "epoch": 3.903574133339848, "percentage": 78.07, "elapsed_time": "3:26:41", "remaining_time": "0:58:03", "throughput": 8683.98, "total_tokens": 107691536} +{"current_steps": 159790, "total_steps": 204665, "loss": 0.0, "lr": 2.788832910264732e-07, "epoch": 3.903696284171695, "percentage": 78.07, "elapsed_time": "3:26:41", "remaining_time": "0:58:02", "throughput": 8684.03, "total_tokens": 107695120} +{"current_steps": 159795, "total_steps": 204665, "loss": 0.0, "lr": 2.7882421216745776e-07, "epoch": 3.9038184350035423, "percentage": 78.08, "elapsed_time": "3:26:41", "remaining_time": "0:58:02", "throughput": 8684.07, "total_tokens": 107698768} +{"current_steps": 159800, "total_steps": 204665, "loss": 0.0, "lr": 2.787651385530263e-07, "epoch": 3.9039405858353895, "percentage": 78.08, "elapsed_time": "3:26:42", "remaining_time": "0:58:02", "throughput": 8684.11, "total_tokens": 107702224} +{"current_steps": 159805, "total_steps": 204665, "loss": 0.0, "lr": 2.787060701836089e-07, "epoch": 3.9040627366672367, "percentage": 78.08, "elapsed_time": "3:26:42", "remaining_time": "0:58:01", "throughput": 8684.12, "total_tokens": 107705424} +{"current_steps": 159810, "total_steps": 204665, "loss": 0.0, "lr": 2.7864700705963484e-07, "epoch": 3.904184887499084, "percentage": 78.08, "elapsed_time": "3:26:42", "remaining_time": "0:58:01", "throughput": 8684.15, "total_tokens": 107708688} +{"current_steps": 159815, "total_steps": 204665, "loss": 0.0, "lr": 2.785879491815336e-07, "epoch": 3.904307038330931, "percentage": 78.09, "elapsed_time": "3:26:43", "remaining_time": "0:58:00", "throughput": 8684.17, "total_tokens": 107712016} +{"current_steps": 159820, "total_steps": 204665, "loss": 0.0, "lr": 2.785288965497352e-07, "epoch": 3.9044291891627783, "percentage": 78.09, "elapsed_time": "3:26:43", "remaining_time": "0:58:00", "throughput": 8684.19, "total_tokens": 107715152} +{"current_steps": 159825, "total_steps": 204665, "loss": 0.0, "lr": 2.784698491646684e-07, "epoch": 3.9045513399946254, "percentage": 78.09, "elapsed_time": "3:26:43", "remaining_time": "0:58:00", "throughput": 8684.18, "total_tokens": 107718032} +{"current_steps": 159830, "total_steps": 204665, "loss": 0.0, "lr": 2.7841080702676336e-07, "epoch": 3.9046734908264726, "percentage": 78.09, "elapsed_time": "3:26:44", "remaining_time": "0:57:59", "throughput": 8684.22, "total_tokens": 107721552} +{"current_steps": 159835, "total_steps": 204665, "loss": 0.0, "lr": 2.783517701364485e-07, "epoch": 3.90479564165832, "percentage": 78.1, "elapsed_time": "3:26:44", "remaining_time": "0:57:59", "throughput": 8684.28, "total_tokens": 107725328} +{"current_steps": 159840, "total_steps": 204665, "loss": 0.0, "lr": 2.782927384941541e-07, "epoch": 3.904917792490167, "percentage": 78.1, "elapsed_time": "3:26:44", "remaining_time": "0:57:58", "throughput": 8684.28, "total_tokens": 107728336} +{"current_steps": 159845, "total_steps": 204665, "loss": 0.0, "lr": 2.7823371210030865e-07, "epoch": 3.9050399433220138, "percentage": 78.1, "elapsed_time": "3:26:45", "remaining_time": "0:57:58", "throughput": 8684.3, "total_tokens": 107731472} +{"current_steps": 159850, "total_steps": 204665, "loss": 0.0, "lr": 2.781746909553422e-07, "epoch": 3.9051620941538614, "percentage": 78.1, "elapsed_time": "3:26:45", "remaining_time": "0:57:58", "throughput": 8684.32, "total_tokens": 107734736} +{"current_steps": 159855, "total_steps": 204665, "loss": 0.0, "lr": 2.7811567505968346e-07, "epoch": 3.905284244985708, "percentage": 78.11, "elapsed_time": "3:26:46", "remaining_time": "0:57:57", "throughput": 8684.39, "total_tokens": 107738832} +{"current_steps": 159860, "total_steps": 204665, "loss": 0.0, "lr": 2.780566644137614e-07, "epoch": 3.9054063958175558, "percentage": 78.11, "elapsed_time": "3:26:46", "remaining_time": "0:57:57", "throughput": 8684.42, "total_tokens": 107742224} +{"current_steps": 159865, "total_steps": 204665, "loss": 0.0, "lr": 2.7799765901800576e-07, "epoch": 3.9055285466494025, "percentage": 78.11, "elapsed_time": "3:26:46", "remaining_time": "0:57:56", "throughput": 8684.44, "total_tokens": 107745424} +{"current_steps": 159870, "total_steps": 204665, "loss": 0.0, "lr": 2.779386588728451e-07, "epoch": 3.9056506974812497, "percentage": 78.11, "elapsed_time": "3:26:47", "remaining_time": "0:57:56", "throughput": 8684.45, "total_tokens": 107748560} +{"current_steps": 159875, "total_steps": 204665, "loss": 0.0001, "lr": 2.7787966397870855e-07, "epoch": 3.905772848313097, "percentage": 78.12, "elapsed_time": "3:26:47", "remaining_time": "0:57:56", "throughput": 8684.48, "total_tokens": 107751952} +{"current_steps": 159880, "total_steps": 204665, "loss": 0.0, "lr": 2.7782067433602574e-07, "epoch": 3.905894999144944, "percentage": 78.12, "elapsed_time": "3:26:47", "remaining_time": "0:57:55", "throughput": 8684.5, "total_tokens": 107755216} +{"current_steps": 159885, "total_steps": 204665, "loss": 0.0, "lr": 2.777616899452249e-07, "epoch": 3.9060171499767913, "percentage": 78.12, "elapsed_time": "3:26:48", "remaining_time": "0:57:55", "throughput": 8684.52, "total_tokens": 107758480} +{"current_steps": 159890, "total_steps": 204665, "loss": 0.0002, "lr": 2.7770271080673566e-07, "epoch": 3.9061393008086385, "percentage": 78.12, "elapsed_time": "3:26:48", "remaining_time": "0:57:54", "throughput": 8684.55, "total_tokens": 107761872} +{"current_steps": 159895, "total_steps": 204665, "loss": 0.0, "lr": 2.776437369209862e-07, "epoch": 3.9062614516404857, "percentage": 78.13, "elapsed_time": "3:26:48", "remaining_time": "0:57:54", "throughput": 8684.6, "total_tokens": 107765584} +{"current_steps": 159900, "total_steps": 204665, "loss": 0.0001, "lr": 2.7758476828840615e-07, "epoch": 3.906383602472333, "percentage": 78.13, "elapsed_time": "3:26:49", "remaining_time": "0:57:54", "throughput": 8684.6, "total_tokens": 107768592} +{"current_steps": 159905, "total_steps": 204665, "loss": 0.0, "lr": 2.775258049094236e-07, "epoch": 3.90650575330418, "percentage": 78.13, "elapsed_time": "3:26:49", "remaining_time": "0:57:53", "throughput": 8684.61, "total_tokens": 107771664} +{"current_steps": 159910, "total_steps": 204665, "loss": 0.0, "lr": 2.7746684678446776e-07, "epoch": 3.9066279041360272, "percentage": 78.13, "elapsed_time": "3:26:49", "remaining_time": "0:57:53", "throughput": 8684.63, "total_tokens": 107774800} +{"current_steps": 159915, "total_steps": 204665, "loss": 0.0001, "lr": 2.774078939139677e-07, "epoch": 3.9067500549678744, "percentage": 78.14, "elapsed_time": "3:26:50", "remaining_time": "0:57:52", "throughput": 8684.64, "total_tokens": 107778000} +{"current_steps": 159920, "total_steps": 204665, "loss": 0.0, "lr": 2.773489462983514e-07, "epoch": 3.9068722057997216, "percentage": 78.14, "elapsed_time": "3:26:50", "remaining_time": "0:57:52", "throughput": 8684.66, "total_tokens": 107781264} +{"current_steps": 159925, "total_steps": 204665, "loss": 0.0, "lr": 2.7729000393804825e-07, "epoch": 3.906994356631569, "percentage": 78.14, "elapsed_time": "3:26:50", "remaining_time": "0:57:52", "throughput": 8684.72, "total_tokens": 107785040} +{"current_steps": 159930, "total_steps": 204665, "loss": 0.0, "lr": 2.772310668334863e-07, "epoch": 3.907116507463416, "percentage": 78.14, "elapsed_time": "3:26:51", "remaining_time": "0:57:51", "throughput": 8684.76, "total_tokens": 107788560} +{"current_steps": 159935, "total_steps": 204665, "loss": 0.0001, "lr": 2.771721349850944e-07, "epoch": 3.907238658295263, "percentage": 78.14, "elapsed_time": "3:26:51", "remaining_time": "0:57:51", "throughput": 8684.8, "total_tokens": 107792080} +{"current_steps": 159940, "total_steps": 204665, "loss": 0.0, "lr": 2.7711320839330155e-07, "epoch": 3.90736080912711, "percentage": 78.15, "elapsed_time": "3:26:51", "remaining_time": "0:57:50", "throughput": 8684.82, "total_tokens": 107795472} +{"current_steps": 159945, "total_steps": 204665, "loss": 0.0, "lr": 2.7705428705853537e-07, "epoch": 3.9074829599589576, "percentage": 78.15, "elapsed_time": "3:26:52", "remaining_time": "0:57:50", "throughput": 8684.86, "total_tokens": 107798928} +{"current_steps": 159950, "total_steps": 204665, "loss": 0.0001, "lr": 2.769953709812254e-07, "epoch": 3.9076051107908043, "percentage": 78.15, "elapsed_time": "3:26:52", "remaining_time": "0:57:50", "throughput": 8684.9, "total_tokens": 107802576} +{"current_steps": 159955, "total_steps": 204665, "loss": 0.0, "lr": 2.769364601617994e-07, "epoch": 3.9077272616226515, "percentage": 78.15, "elapsed_time": "3:26:52", "remaining_time": "0:57:49", "throughput": 8684.92, "total_tokens": 107805776} +{"current_steps": 159960, "total_steps": 204665, "loss": 0.0, "lr": 2.7687755460068575e-07, "epoch": 3.9078494124544987, "percentage": 78.16, "elapsed_time": "3:26:53", "remaining_time": "0:57:49", "throughput": 8684.94, "total_tokens": 107809040} +{"current_steps": 159965, "total_steps": 204665, "loss": 0.0667, "lr": 2.768186542983133e-07, "epoch": 3.907971563286346, "percentage": 78.16, "elapsed_time": "3:26:53", "remaining_time": "0:57:48", "throughput": 8684.96, "total_tokens": 107812304} +{"current_steps": 159970, "total_steps": 204665, "loss": 0.0, "lr": 2.767597592551097e-07, "epoch": 3.908093714118193, "percentage": 78.16, "elapsed_time": "3:26:54", "remaining_time": "0:57:48", "throughput": 8684.99, "total_tokens": 107815696} +{"current_steps": 159975, "total_steps": 204665, "loss": 0.0632, "lr": 2.767008694715041e-07, "epoch": 3.9082158649500403, "percentage": 78.16, "elapsed_time": "3:26:54", "remaining_time": "0:57:48", "throughput": 8685.04, "total_tokens": 107819344} +{"current_steps": 159980, "total_steps": 204665, "loss": 0.0001, "lr": 2.766419849479239e-07, "epoch": 3.9083380157818874, "percentage": 78.17, "elapsed_time": "3:26:54", "remaining_time": "0:57:47", "throughput": 8685.06, "total_tokens": 107822544} +{"current_steps": 159985, "total_steps": 204665, "loss": 0.0, "lr": 2.765831056847981e-07, "epoch": 3.9084601666137346, "percentage": 78.17, "elapsed_time": "3:26:55", "remaining_time": "0:57:47", "throughput": 8685.08, "total_tokens": 107825808} +{"current_steps": 159990, "total_steps": 204665, "loss": 0.0001, "lr": 2.765242316825542e-07, "epoch": 3.908582317445582, "percentage": 78.17, "elapsed_time": "3:26:55", "remaining_time": "0:57:46", "throughput": 8685.11, "total_tokens": 107829328} +{"current_steps": 159995, "total_steps": 204665, "loss": 0.0, "lr": 2.764653629416208e-07, "epoch": 3.908704468277429, "percentage": 78.17, "elapsed_time": "3:26:55", "remaining_time": "0:57:46", "throughput": 8685.14, "total_tokens": 107832720} +{"current_steps": 160000, "total_steps": 204665, "loss": 0.0, "lr": 2.7640649946242613e-07, "epoch": 3.908826619109276, "percentage": 78.18, "elapsed_time": "3:26:56", "remaining_time": "0:57:46", "throughput": 8685.16, "total_tokens": 107835920} +{"current_steps": 160005, "total_steps": 204665, "loss": 0.0, "lr": 2.7634764124539765e-07, "epoch": 3.9089487699411234, "percentage": 78.18, "elapsed_time": "3:26:56", "remaining_time": "0:57:45", "throughput": 8685.21, "total_tokens": 107839696} +{"current_steps": 160010, "total_steps": 204665, "loss": 0.0638, "lr": 2.762887882909641e-07, "epoch": 3.9090709207729706, "percentage": 78.18, "elapsed_time": "3:26:56", "remaining_time": "0:57:45", "throughput": 8685.27, "total_tokens": 107843664} +{"current_steps": 160015, "total_steps": 204665, "loss": 0.0, "lr": 2.7622994059955287e-07, "epoch": 3.9091930716048178, "percentage": 78.18, "elapsed_time": "3:26:57", "remaining_time": "0:57:44", "throughput": 8685.31, "total_tokens": 107847120} +{"current_steps": 160020, "total_steps": 204665, "loss": 0.0, "lr": 2.7617109817159244e-07, "epoch": 3.909315222436665, "percentage": 78.19, "elapsed_time": "3:26:57", "remaining_time": "0:57:44", "throughput": 8685.33, "total_tokens": 107850512} +{"current_steps": 160025, "total_steps": 204665, "loss": 0.0001, "lr": 2.7611226100751016e-07, "epoch": 3.9094373732685117, "percentage": 78.19, "elapsed_time": "3:26:57", "remaining_time": "0:57:44", "throughput": 8685.4, "total_tokens": 107854352} +{"current_steps": 160030, "total_steps": 204665, "loss": 0.0, "lr": 2.760534291077343e-07, "epoch": 3.9095595241003593, "percentage": 78.19, "elapsed_time": "3:26:58", "remaining_time": "0:57:43", "throughput": 8685.42, "total_tokens": 107857744} +{"current_steps": 160035, "total_steps": 204665, "loss": 0.0388, "lr": 2.759946024726928e-07, "epoch": 3.909681674932206, "percentage": 78.19, "elapsed_time": "3:26:58", "remaining_time": "0:57:43", "throughput": 8685.45, "total_tokens": 107861136} +{"current_steps": 160040, "total_steps": 204665, "loss": 0.0001, "lr": 2.7593578110281314e-07, "epoch": 3.9098038257640537, "percentage": 78.2, "elapsed_time": "3:26:58", "remaining_time": "0:57:42", "throughput": 8685.47, "total_tokens": 107864400} +{"current_steps": 160045, "total_steps": 204665, "loss": 0.0, "lr": 2.758769649985234e-07, "epoch": 3.9099259765959005, "percentage": 78.2, "elapsed_time": "3:26:59", "remaining_time": "0:57:42", "throughput": 8685.51, "total_tokens": 107867920} +{"current_steps": 160050, "total_steps": 204665, "loss": 0.0, "lr": 2.7581815416025087e-07, "epoch": 3.9100481274277477, "percentage": 78.2, "elapsed_time": "3:26:59", "remaining_time": "0:57:42", "throughput": 8685.54, "total_tokens": 107871376} +{"current_steps": 160055, "total_steps": 204665, "loss": 0.0, "lr": 2.757593485884238e-07, "epoch": 3.910170278259595, "percentage": 78.2, "elapsed_time": "3:26:59", "remaining_time": "0:57:41", "throughput": 8685.56, "total_tokens": 107874576} +{"current_steps": 160060, "total_steps": 204665, "loss": 0.0, "lr": 2.7570054828346957e-07, "epoch": 3.910292429091442, "percentage": 78.21, "elapsed_time": "3:27:00", "remaining_time": "0:57:41", "throughput": 8685.62, "total_tokens": 107878416} +{"current_steps": 160065, "total_steps": 204665, "loss": 0.0, "lr": 2.756417532458154e-07, "epoch": 3.9104145799232892, "percentage": 78.21, "elapsed_time": "3:27:00", "remaining_time": "0:57:40", "throughput": 8685.68, "total_tokens": 107882256} +{"current_steps": 160070, "total_steps": 204665, "loss": 0.0003, "lr": 2.755829634758896e-07, "epoch": 3.9105367307551364, "percentage": 78.21, "elapsed_time": "3:27:01", "remaining_time": "0:57:40", "throughput": 8685.7, "total_tokens": 107885520} +{"current_steps": 160075, "total_steps": 204665, "loss": 0.0, "lr": 2.7552417897411905e-07, "epoch": 3.9106588815869836, "percentage": 78.21, "elapsed_time": "3:27:01", "remaining_time": "0:57:40", "throughput": 8685.73, "total_tokens": 107888976} +{"current_steps": 160080, "total_steps": 204665, "loss": 0.0, "lr": 2.7546539974093175e-07, "epoch": 3.910781032418831, "percentage": 78.22, "elapsed_time": "3:27:01", "remaining_time": "0:57:39", "throughput": 8685.78, "total_tokens": 107892688} +{"current_steps": 160085, "total_steps": 204665, "loss": 0.0001, "lr": 2.7540662577675477e-07, "epoch": 3.910903183250678, "percentage": 78.22, "elapsed_time": "3:27:02", "remaining_time": "0:57:39", "throughput": 8685.79, "total_tokens": 107895824} +{"current_steps": 160090, "total_steps": 204665, "loss": 0.0, "lr": 2.753478570820156e-07, "epoch": 3.911025334082525, "percentage": 78.22, "elapsed_time": "3:27:02", "remaining_time": "0:57:38", "throughput": 8685.81, "total_tokens": 107899024} +{"current_steps": 160095, "total_steps": 204665, "loss": 0.0002, "lr": 2.7528909365714205e-07, "epoch": 3.9111474849143724, "percentage": 78.22, "elapsed_time": "3:27:02", "remaining_time": "0:57:38", "throughput": 8685.82, "total_tokens": 107902096} +{"current_steps": 160100, "total_steps": 204665, "loss": 0.0, "lr": 2.752303355025608e-07, "epoch": 3.9112696357462196, "percentage": 78.23, "elapsed_time": "3:27:03", "remaining_time": "0:57:38", "throughput": 8685.88, "total_tokens": 107905872} +{"current_steps": 160105, "total_steps": 204665, "loss": 0.0, "lr": 2.751715826186998e-07, "epoch": 3.9113917865780667, "percentage": 78.23, "elapsed_time": "3:27:03", "remaining_time": "0:57:37", "throughput": 8685.89, "total_tokens": 107909008} +{"current_steps": 160110, "total_steps": 204665, "loss": 0.0, "lr": 2.751128350059857e-07, "epoch": 3.9115139374099135, "percentage": 78.23, "elapsed_time": "3:27:03", "remaining_time": "0:57:37", "throughput": 8685.92, "total_tokens": 107912400} +{"current_steps": 160115, "total_steps": 204665, "loss": 0.0, "lr": 2.7505409266484636e-07, "epoch": 3.911636088241761, "percentage": 78.23, "elapsed_time": "3:27:04", "remaining_time": "0:57:36", "throughput": 8685.95, "total_tokens": 107915856} +{"current_steps": 160120, "total_steps": 204665, "loss": 0.0, "lr": 2.749953555957083e-07, "epoch": 3.911758239073608, "percentage": 78.24, "elapsed_time": "3:27:04", "remaining_time": "0:57:36", "throughput": 8685.99, "total_tokens": 107919376} +{"current_steps": 160125, "total_steps": 204665, "loss": 0.0, "lr": 2.7493662379899906e-07, "epoch": 3.9118803899054555, "percentage": 78.24, "elapsed_time": "3:27:04", "remaining_time": "0:57:36", "throughput": 8686.01, "total_tokens": 107922704} +{"current_steps": 160130, "total_steps": 204665, "loss": 0.0, "lr": 2.748778972751461e-07, "epoch": 3.9120025407373022, "percentage": 78.24, "elapsed_time": "3:27:05", "remaining_time": "0:57:35", "throughput": 8686.04, "total_tokens": 107926160} +{"current_steps": 160135, "total_steps": 204665, "loss": 0.0003, "lr": 2.748191760245756e-07, "epoch": 3.9121246915691494, "percentage": 78.24, "elapsed_time": "3:27:05", "remaining_time": "0:57:35", "throughput": 8686.06, "total_tokens": 107929360} +{"current_steps": 160140, "total_steps": 204665, "loss": 0.0001, "lr": 2.7476046004771557e-07, "epoch": 3.9122468424009966, "percentage": 78.24, "elapsed_time": "3:27:05", "remaining_time": "0:57:34", "throughput": 8686.09, "total_tokens": 107932688} +{"current_steps": 160145, "total_steps": 204665, "loss": 0.0, "lr": 2.747017493449922e-07, "epoch": 3.912368993232844, "percentage": 78.25, "elapsed_time": "3:27:06", "remaining_time": "0:57:34", "throughput": 8686.12, "total_tokens": 107936144} +{"current_steps": 160150, "total_steps": 204665, "loss": 0.0, "lr": 2.74643043916833e-07, "epoch": 3.912491144064691, "percentage": 78.25, "elapsed_time": "3:27:06", "remaining_time": "0:57:34", "throughput": 8686.17, "total_tokens": 107939856} +{"current_steps": 160155, "total_steps": 204665, "loss": 0.0, "lr": 2.7458434376366457e-07, "epoch": 3.912613294896538, "percentage": 78.25, "elapsed_time": "3:27:06", "remaining_time": "0:57:33", "throughput": 8686.19, "total_tokens": 107943056} +{"current_steps": 160160, "total_steps": 204665, "loss": 0.0, "lr": 2.74525648885914e-07, "epoch": 3.9127354457283854, "percentage": 78.25, "elapsed_time": "3:27:07", "remaining_time": "0:57:33", "throughput": 8686.22, "total_tokens": 107946512} +{"current_steps": 160165, "total_steps": 204665, "loss": 0.0684, "lr": 2.744669592840082e-07, "epoch": 3.9128575965602326, "percentage": 78.26, "elapsed_time": "3:27:07", "remaining_time": "0:57:32", "throughput": 8686.25, "total_tokens": 107949904} +{"current_steps": 160170, "total_steps": 204665, "loss": 0.0, "lr": 2.744082749583734e-07, "epoch": 3.9129797473920798, "percentage": 78.26, "elapsed_time": "3:27:08", "remaining_time": "0:57:32", "throughput": 8686.27, "total_tokens": 107953168} +{"current_steps": 160175, "total_steps": 204665, "loss": 0.0, "lr": 2.743495959094372e-07, "epoch": 3.913101898223927, "percentage": 78.26, "elapsed_time": "3:27:08", "remaining_time": "0:57:32", "throughput": 8686.28, "total_tokens": 107956176} +{"current_steps": 160180, "total_steps": 204665, "loss": 0.0633, "lr": 2.742909221376255e-07, "epoch": 3.913224049055774, "percentage": 78.26, "elapsed_time": "3:27:08", "remaining_time": "0:57:31", "throughput": 8686.31, "total_tokens": 107959696} +{"current_steps": 160185, "total_steps": 204665, "loss": 0.0, "lr": 2.742322536433654e-07, "epoch": 3.9133461998876213, "percentage": 78.27, "elapsed_time": "3:27:09", "remaining_time": "0:57:31", "throughput": 8686.33, "total_tokens": 107962960} +{"current_steps": 160190, "total_steps": 204665, "loss": 0.0, "lr": 2.74173590427084e-07, "epoch": 3.9134683507194685, "percentage": 78.27, "elapsed_time": "3:27:09", "remaining_time": "0:57:30", "throughput": 8686.36, "total_tokens": 107966352} +{"current_steps": 160195, "total_steps": 204665, "loss": 0.0005, "lr": 2.74114932489207e-07, "epoch": 3.9135905015513157, "percentage": 78.27, "elapsed_time": "3:27:09", "remaining_time": "0:57:30", "throughput": 8686.38, "total_tokens": 107969552} +{"current_steps": 160200, "total_steps": 204665, "loss": 0.0001, "lr": 2.7405627983016186e-07, "epoch": 3.913712652383163, "percentage": 78.27, "elapsed_time": "3:27:10", "remaining_time": "0:57:30", "throughput": 8686.39, "total_tokens": 107972560} +{"current_steps": 160205, "total_steps": 204665, "loss": 0.0, "lr": 2.7399763245037444e-07, "epoch": 3.9138348032150097, "percentage": 78.28, "elapsed_time": "3:27:10", "remaining_time": "0:57:29", "throughput": 8686.41, "total_tokens": 107975824} +{"current_steps": 160210, "total_steps": 204665, "loss": 0.0, "lr": 2.739389903502718e-07, "epoch": 3.9139569540468573, "percentage": 78.28, "elapsed_time": "3:27:10", "remaining_time": "0:57:29", "throughput": 8686.44, "total_tokens": 107979280} +{"current_steps": 160215, "total_steps": 204665, "loss": 0.0, "lr": 2.738803535302797e-07, "epoch": 3.914079104878704, "percentage": 78.28, "elapsed_time": "3:27:11", "remaining_time": "0:57:28", "throughput": 8686.46, "total_tokens": 107982544} +{"current_steps": 160220, "total_steps": 204665, "loss": 0.0, "lr": 2.738217219908251e-07, "epoch": 3.9142012557105517, "percentage": 78.28, "elapsed_time": "3:27:11", "remaining_time": "0:57:28", "throughput": 8686.49, "total_tokens": 107985872} +{"current_steps": 160225, "total_steps": 204665, "loss": 0.0, "lr": 2.7376309573233446e-07, "epoch": 3.9143234065423984, "percentage": 78.29, "elapsed_time": "3:27:11", "remaining_time": "0:57:28", "throughput": 8686.55, "total_tokens": 107989776} +{"current_steps": 160230, "total_steps": 204665, "loss": 0.0, "lr": 2.7370447475523363e-07, "epoch": 3.9144455573742456, "percentage": 78.29, "elapsed_time": "3:27:12", "remaining_time": "0:57:27", "throughput": 8686.58, "total_tokens": 107993104} +{"current_steps": 160235, "total_steps": 204665, "loss": 0.0003, "lr": 2.7364585905994953e-07, "epoch": 3.914567708206093, "percentage": 78.29, "elapsed_time": "3:27:12", "remaining_time": "0:57:27", "throughput": 8686.62, "total_tokens": 107996624} +{"current_steps": 160240, "total_steps": 204665, "loss": 0.0, "lr": 2.7358724864690783e-07, "epoch": 3.91468985903794, "percentage": 78.29, "elapsed_time": "3:27:12", "remaining_time": "0:57:26", "throughput": 8686.68, "total_tokens": 108000656} +{"current_steps": 160245, "total_steps": 204665, "loss": 0.0146, "lr": 2.7352864351653503e-07, "epoch": 3.914812009869787, "percentage": 78.3, "elapsed_time": "3:27:13", "remaining_time": "0:57:26", "throughput": 8686.69, "total_tokens": 108003728} +{"current_steps": 160250, "total_steps": 204665, "loss": 0.0, "lr": 2.7347004366925764e-07, "epoch": 3.9149341607016344, "percentage": 78.3, "elapsed_time": "3:27:13", "remaining_time": "0:57:26", "throughput": 8686.7, "total_tokens": 108006800} +{"current_steps": 160255, "total_steps": 204665, "loss": 0.0001, "lr": 2.7341144910550116e-07, "epoch": 3.9150563115334815, "percentage": 78.3, "elapsed_time": "3:27:13", "remaining_time": "0:57:25", "throughput": 8686.76, "total_tokens": 108010576} +{"current_steps": 160260, "total_steps": 204665, "loss": 0.0, "lr": 2.7335285982569247e-07, "epoch": 3.9151784623653287, "percentage": 78.3, "elapsed_time": "3:27:14", "remaining_time": "0:57:25", "throughput": 8686.76, "total_tokens": 108013520} +{"current_steps": 160265, "total_steps": 204665, "loss": 0.0001, "lr": 2.732942758302571e-07, "epoch": 3.915300613197176, "percentage": 78.31, "elapsed_time": "3:27:14", "remaining_time": "0:57:24", "throughput": 8686.76, "total_tokens": 108016528} +{"current_steps": 160270, "total_steps": 204665, "loss": 0.0, "lr": 2.732356971196209e-07, "epoch": 3.915422764029023, "percentage": 78.31, "elapsed_time": "3:27:14", "remaining_time": "0:57:24", "throughput": 8686.77, "total_tokens": 108019600} +{"current_steps": 160275, "total_steps": 204665, "loss": 0.0, "lr": 2.7317712369421053e-07, "epoch": 3.9155449148608703, "percentage": 78.31, "elapsed_time": "3:27:15", "remaining_time": "0:57:24", "throughput": 8686.79, "total_tokens": 108022864} +{"current_steps": 160280, "total_steps": 204665, "loss": 0.0002, "lr": 2.731185555544514e-07, "epoch": 3.9156670656927175, "percentage": 78.31, "elapsed_time": "3:27:15", "remaining_time": "0:57:23", "throughput": 8686.81, "total_tokens": 108026064} +{"current_steps": 160285, "total_steps": 204665, "loss": 0.0024, "lr": 2.7305999270076985e-07, "epoch": 3.9157892165245647, "percentage": 78.32, "elapsed_time": "3:27:16", "remaining_time": "0:57:23", "throughput": 8686.85, "total_tokens": 108029712} +{"current_steps": 160290, "total_steps": 204665, "loss": 0.0609, "lr": 2.730014351335913e-07, "epoch": 3.9159113673564114, "percentage": 78.32, "elapsed_time": "3:27:16", "remaining_time": "0:57:22", "throughput": 8686.86, "total_tokens": 108032784} +{"current_steps": 160295, "total_steps": 204665, "loss": 0.0, "lr": 2.729428828533421e-07, "epoch": 3.916033518188259, "percentage": 78.32, "elapsed_time": "3:27:16", "remaining_time": "0:57:22", "throughput": 8686.86, "total_tokens": 108035728} +{"current_steps": 160300, "total_steps": 204665, "loss": 0.0, "lr": 2.7288433586044746e-07, "epoch": 3.916155669020106, "percentage": 78.32, "elapsed_time": "3:27:17", "remaining_time": "0:57:22", "throughput": 8686.94, "total_tokens": 108039760} +{"current_steps": 160305, "total_steps": 204665, "loss": 0.0, "lr": 2.728257941553336e-07, "epoch": 3.9162778198519534, "percentage": 78.33, "elapsed_time": "3:27:17", "remaining_time": "0:57:21", "throughput": 8686.96, "total_tokens": 108043088} +{"current_steps": 160310, "total_steps": 204665, "loss": 0.0, "lr": 2.7276725773842646e-07, "epoch": 3.9163999706838, "percentage": 78.33, "elapsed_time": "3:27:17", "remaining_time": "0:57:21", "throughput": 8687.02, "total_tokens": 108046864} +{"current_steps": 160315, "total_steps": 204665, "loss": 0.0, "lr": 2.727087266101511e-07, "epoch": 3.9165221215156474, "percentage": 78.33, "elapsed_time": "3:27:18", "remaining_time": "0:57:20", "throughput": 8687.09, "total_tokens": 108050960} +{"current_steps": 160320, "total_steps": 204665, "loss": 0.0, "lr": 2.726502007709338e-07, "epoch": 3.9166442723474946, "percentage": 78.33, "elapsed_time": "3:27:18", "remaining_time": "0:57:20", "throughput": 8687.09, "total_tokens": 108053968} +{"current_steps": 160325, "total_steps": 204665, "loss": 0.0, "lr": 2.725916802211995e-07, "epoch": 3.9167664231793418, "percentage": 78.34, "elapsed_time": "3:27:18", "remaining_time": "0:57:20", "throughput": 8687.11, "total_tokens": 108057232} +{"current_steps": 160330, "total_steps": 204665, "loss": 0.0464, "lr": 2.7253316496137457e-07, "epoch": 3.916888574011189, "percentage": 78.34, "elapsed_time": "3:27:19", "remaining_time": "0:57:19", "throughput": 8687.14, "total_tokens": 108060496} +{"current_steps": 160335, "total_steps": 204665, "loss": 0.0774, "lr": 2.7247465499188373e-07, "epoch": 3.917010724843036, "percentage": 78.34, "elapsed_time": "3:27:19", "remaining_time": "0:57:19", "throughput": 8687.28, "total_tokens": 108065744} +{"current_steps": 160340, "total_steps": 204665, "loss": 0.0, "lr": 2.724161503131529e-07, "epoch": 3.9171328756748833, "percentage": 78.34, "elapsed_time": "3:27:19", "remaining_time": "0:57:18", "throughput": 8687.31, "total_tokens": 108069136} +{"current_steps": 160345, "total_steps": 204665, "loss": 0.0816, "lr": 2.7235765092560794e-07, "epoch": 3.9172550265067305, "percentage": 78.35, "elapsed_time": "3:27:20", "remaining_time": "0:57:18", "throughput": 8687.32, "total_tokens": 108072272} +{"current_steps": 160350, "total_steps": 204665, "loss": 0.0, "lr": 2.722991568296734e-07, "epoch": 3.9173771773385777, "percentage": 78.35, "elapsed_time": "3:27:20", "remaining_time": "0:57:18", "throughput": 8687.32, "total_tokens": 108075216} +{"current_steps": 160355, "total_steps": 204665, "loss": 0.0001, "lr": 2.7224066802577547e-07, "epoch": 3.917499328170425, "percentage": 78.35, "elapsed_time": "3:27:20", "remaining_time": "0:57:17", "throughput": 8687.39, "total_tokens": 108079120} +{"current_steps": 160360, "total_steps": 204665, "loss": 0.019, "lr": 2.721821845143388e-07, "epoch": 3.917621479002272, "percentage": 78.35, "elapsed_time": "3:27:21", "remaining_time": "0:57:17", "throughput": 8687.42, "total_tokens": 108082512} +{"current_steps": 160365, "total_steps": 204665, "loss": 0.0007, "lr": 2.721237062957894e-07, "epoch": 3.9177436298341193, "percentage": 78.35, "elapsed_time": "3:27:21", "remaining_time": "0:57:16", "throughput": 8687.43, "total_tokens": 108085648} +{"current_steps": 160370, "total_steps": 204665, "loss": 0.0, "lr": 2.720652333705522e-07, "epoch": 3.9178657806659665, "percentage": 78.36, "elapsed_time": "3:27:21", "remaining_time": "0:57:16", "throughput": 8687.46, "total_tokens": 108088976} +{"current_steps": 160375, "total_steps": 204665, "loss": 0.0, "lr": 2.720067657390521e-07, "epoch": 3.9179879314978137, "percentage": 78.36, "elapsed_time": "3:27:22", "remaining_time": "0:57:16", "throughput": 8687.49, "total_tokens": 108092432} +{"current_steps": 160380, "total_steps": 204665, "loss": 0.0, "lr": 2.7194830340171494e-07, "epoch": 3.918110082329661, "percentage": 78.36, "elapsed_time": "3:27:22", "remaining_time": "0:57:15", "throughput": 8687.51, "total_tokens": 108095760} +{"current_steps": 160385, "total_steps": 204665, "loss": 0.0, "lr": 2.7188984635896516e-07, "epoch": 3.9182322331615076, "percentage": 78.36, "elapsed_time": "3:27:23", "remaining_time": "0:57:15", "throughput": 8687.54, "total_tokens": 108099024} +{"current_steps": 160390, "total_steps": 204665, "loss": 0.0, "lr": 2.718313946112286e-07, "epoch": 3.9183543839933552, "percentage": 78.37, "elapsed_time": "3:27:23", "remaining_time": "0:57:14", "throughput": 8687.57, "total_tokens": 108102480} +{"current_steps": 160395, "total_steps": 204665, "loss": 0.0, "lr": 2.717729481589297e-07, "epoch": 3.918476534825202, "percentage": 78.37, "elapsed_time": "3:27:23", "remaining_time": "0:57:14", "throughput": 8687.6, "total_tokens": 108105872} +{"current_steps": 160400, "total_steps": 204665, "loss": 0.0, "lr": 2.7171450700249375e-07, "epoch": 3.918598685657049, "percentage": 78.37, "elapsed_time": "3:27:24", "remaining_time": "0:57:14", "throughput": 8687.6, "total_tokens": 108108880} +{"current_steps": 160405, "total_steps": 204665, "loss": 0.0, "lr": 2.7165607114234614e-07, "epoch": 3.9187208364888964, "percentage": 78.37, "elapsed_time": "3:27:24", "remaining_time": "0:57:13", "throughput": 8687.63, "total_tokens": 108112336} +{"current_steps": 160410, "total_steps": 204665, "loss": 0.0, "lr": 2.715976405789111e-07, "epoch": 3.9188429873207435, "percentage": 78.38, "elapsed_time": "3:27:24", "remaining_time": "0:57:13", "throughput": 8687.64, "total_tokens": 108115408} +{"current_steps": 160415, "total_steps": 204665, "loss": 0.0, "lr": 2.7153921531261436e-07, "epoch": 3.9189651381525907, "percentage": 78.38, "elapsed_time": "3:27:25", "remaining_time": "0:57:12", "throughput": 8687.66, "total_tokens": 108118608} +{"current_steps": 160420, "total_steps": 204665, "loss": 0.0, "lr": 2.7148079534388004e-07, "epoch": 3.919087288984438, "percentage": 78.38, "elapsed_time": "3:27:25", "remaining_time": "0:57:12", "throughput": 8687.72, "total_tokens": 108122448} +{"current_steps": 160425, "total_steps": 204665, "loss": 0.0, "lr": 2.714223806731335e-07, "epoch": 3.919209439816285, "percentage": 78.38, "elapsed_time": "3:27:25", "remaining_time": "0:57:12", "throughput": 8687.78, "total_tokens": 108126352} +{"current_steps": 160430, "total_steps": 204665, "loss": 0.0021, "lr": 2.7136397130079926e-07, "epoch": 3.9193315906481323, "percentage": 78.39, "elapsed_time": "3:27:26", "remaining_time": "0:57:11", "throughput": 8687.8, "total_tokens": 108129552} +{"current_steps": 160435, "total_steps": 204665, "loss": 0.0, "lr": 2.71305567227302e-07, "epoch": 3.9194537414799795, "percentage": 78.39, "elapsed_time": "3:27:26", "remaining_time": "0:57:11", "throughput": 8687.83, "total_tokens": 108133008} +{"current_steps": 160440, "total_steps": 204665, "loss": 0.0, "lr": 2.7124716845306717e-07, "epoch": 3.9195758923118267, "percentage": 78.39, "elapsed_time": "3:27:26", "remaining_time": "0:57:10", "throughput": 8687.86, "total_tokens": 108136400} +{"current_steps": 160445, "total_steps": 204665, "loss": 0.0, "lr": 2.7118877497851844e-07, "epoch": 3.919698043143674, "percentage": 78.39, "elapsed_time": "3:27:27", "remaining_time": "0:57:10", "throughput": 8687.91, "total_tokens": 108139984} +{"current_steps": 160450, "total_steps": 204665, "loss": 0.0, "lr": 2.711303868040814e-07, "epoch": 3.919820193975521, "percentage": 78.4, "elapsed_time": "3:27:27", "remaining_time": "0:57:10", "throughput": 8687.93, "total_tokens": 108143248} +{"current_steps": 160455, "total_steps": 204665, "loss": 0.0, "lr": 2.7107200393017994e-07, "epoch": 3.9199423448073683, "percentage": 78.4, "elapsed_time": "3:27:27", "remaining_time": "0:57:09", "throughput": 8687.95, "total_tokens": 108146448} +{"current_steps": 160460, "total_steps": 204665, "loss": 0.0, "lr": 2.710136263572391e-07, "epoch": 3.9200644956392154, "percentage": 78.4, "elapsed_time": "3:27:28", "remaining_time": "0:57:09", "throughput": 8687.97, "total_tokens": 108149776} +{"current_steps": 160465, "total_steps": 204665, "loss": 0.0, "lr": 2.7095525408568297e-07, "epoch": 3.9201866464710626, "percentage": 78.4, "elapsed_time": "3:27:28", "remaining_time": "0:57:08", "throughput": 8687.98, "total_tokens": 108152848} +{"current_steps": 160470, "total_steps": 204665, "loss": 0.0, "lr": 2.7089688711593674e-07, "epoch": 3.9203087973029094, "percentage": 78.41, "elapsed_time": "3:27:28", "remaining_time": "0:57:08", "throughput": 8687.98, "total_tokens": 108155792} +{"current_steps": 160475, "total_steps": 204665, "loss": 0.0, "lr": 2.7083852544842433e-07, "epoch": 3.920430948134757, "percentage": 78.41, "elapsed_time": "3:27:29", "remaining_time": "0:57:08", "throughput": 8687.98, "total_tokens": 108158736} +{"current_steps": 160480, "total_steps": 204665, "loss": 0.0, "lr": 2.7078016908357004e-07, "epoch": 3.9205530989666038, "percentage": 78.41, "elapsed_time": "3:27:29", "remaining_time": "0:57:07", "throughput": 8688.0, "total_tokens": 108162000} +{"current_steps": 160485, "total_steps": 204665, "loss": 0.0, "lr": 2.707218180217988e-07, "epoch": 3.9206752497984514, "percentage": 78.41, "elapsed_time": "3:27:29", "remaining_time": "0:57:07", "throughput": 8688.05, "total_tokens": 108165648} +{"current_steps": 160490, "total_steps": 204665, "loss": 0.0, "lr": 2.7066347226353435e-07, "epoch": 3.920797400630298, "percentage": 78.42, "elapsed_time": "3:27:30", "remaining_time": "0:57:06", "throughput": 8688.09, "total_tokens": 108169296} +{"current_steps": 160495, "total_steps": 204665, "loss": 0.0, "lr": 2.706051318092013e-07, "epoch": 3.9209195514621453, "percentage": 78.42, "elapsed_time": "3:27:30", "remaining_time": "0:57:06", "throughput": 8688.1, "total_tokens": 108172368} +{"current_steps": 160500, "total_steps": 204665, "loss": 0.0001, "lr": 2.705467966592242e-07, "epoch": 3.9210417022939925, "percentage": 78.42, "elapsed_time": "3:27:30", "remaining_time": "0:57:06", "throughput": 8688.12, "total_tokens": 108175568} +{"current_steps": 160505, "total_steps": 204665, "loss": 0.0, "lr": 2.704884668140267e-07, "epoch": 3.9211638531258397, "percentage": 78.42, "elapsed_time": "3:27:31", "remaining_time": "0:57:05", "throughput": 8688.16, "total_tokens": 108179088} +{"current_steps": 160510, "total_steps": 204665, "loss": 0.0, "lr": 2.704301422740336e-07, "epoch": 3.921286003957687, "percentage": 78.43, "elapsed_time": "3:27:31", "remaining_time": "0:57:05", "throughput": 8688.17, "total_tokens": 108182096} +{"current_steps": 160515, "total_steps": 204665, "loss": 0.0, "lr": 2.703718230396683e-07, "epoch": 3.921408154789534, "percentage": 78.43, "elapsed_time": "3:27:32", "remaining_time": "0:57:04", "throughput": 8688.21, "total_tokens": 108185744} +{"current_steps": 160520, "total_steps": 204665, "loss": 0.0, "lr": 2.703135091113559e-07, "epoch": 3.9215303056213813, "percentage": 78.43, "elapsed_time": "3:27:32", "remaining_time": "0:57:04", "throughput": 8688.25, "total_tokens": 108189328} +{"current_steps": 160525, "total_steps": 204665, "loss": 0.0, "lr": 2.7025520048951944e-07, "epoch": 3.9216524564532285, "percentage": 78.43, "elapsed_time": "3:27:32", "remaining_time": "0:57:04", "throughput": 8688.28, "total_tokens": 108192656} +{"current_steps": 160530, "total_steps": 204665, "loss": 0.0, "lr": 2.701968971745835e-07, "epoch": 3.9217746072850757, "percentage": 78.44, "elapsed_time": "3:27:33", "remaining_time": "0:57:03", "throughput": 8688.32, "total_tokens": 108196240} +{"current_steps": 160535, "total_steps": 204665, "loss": 0.074, "lr": 2.701385991669722e-07, "epoch": 3.921896758116923, "percentage": 78.44, "elapsed_time": "3:27:33", "remaining_time": "0:57:03", "throughput": 8688.33, "total_tokens": 108199376} +{"current_steps": 160540, "total_steps": 204665, "loss": 0.0, "lr": 2.7008030646710923e-07, "epoch": 3.92201890894877, "percentage": 78.44, "elapsed_time": "3:27:33", "remaining_time": "0:57:02", "throughput": 8688.34, "total_tokens": 108202384} +{"current_steps": 160545, "total_steps": 204665, "loss": 0.0007, "lr": 2.7002201907541875e-07, "epoch": 3.9221410597806172, "percentage": 78.44, "elapsed_time": "3:27:34", "remaining_time": "0:57:02", "throughput": 8688.36, "total_tokens": 108205712} +{"current_steps": 160550, "total_steps": 204665, "loss": 0.0069, "lr": 2.699637369923242e-07, "epoch": 3.9222632106124644, "percentage": 78.45, "elapsed_time": "3:27:34", "remaining_time": "0:57:02", "throughput": 8688.4, "total_tokens": 108209168} +{"current_steps": 160555, "total_steps": 204665, "loss": 0.0, "lr": 2.699054602182498e-07, "epoch": 3.9223853614443116, "percentage": 78.45, "elapsed_time": "3:27:34", "remaining_time": "0:57:01", "throughput": 8688.43, "total_tokens": 108212688} +{"current_steps": 160560, "total_steps": 204665, "loss": 0.0, "lr": 2.6984718875361947e-07, "epoch": 3.922507512276159, "percentage": 78.45, "elapsed_time": "3:27:35", "remaining_time": "0:57:01", "throughput": 8688.46, "total_tokens": 108216080} +{"current_steps": 160565, "total_steps": 204665, "loss": 0.0321, "lr": 2.6978892259885657e-07, "epoch": 3.9226296631080055, "percentage": 78.45, "elapsed_time": "3:27:35", "remaining_time": "0:57:00", "throughput": 8688.49, "total_tokens": 108219472} +{"current_steps": 160570, "total_steps": 204665, "loss": 0.0, "lr": 2.697306617543852e-07, "epoch": 3.922751813939853, "percentage": 78.46, "elapsed_time": "3:27:35", "remaining_time": "0:57:00", "throughput": 8688.51, "total_tokens": 108222800} +{"current_steps": 160575, "total_steps": 204665, "loss": 0.0, "lr": 2.6967240622062895e-07, "epoch": 3.9228739647717, "percentage": 78.46, "elapsed_time": "3:27:36", "remaining_time": "0:57:00", "throughput": 8688.53, "total_tokens": 108225936} +{"current_steps": 160580, "total_steps": 204665, "loss": 0.0, "lr": 2.696141559980111e-07, "epoch": 3.922996115603547, "percentage": 78.46, "elapsed_time": "3:27:36", "remaining_time": "0:56:59", "throughput": 8688.56, "total_tokens": 108229392} +{"current_steps": 160585, "total_steps": 204665, "loss": 0.0, "lr": 2.6955591108695585e-07, "epoch": 3.9231182664353943, "percentage": 78.46, "elapsed_time": "3:27:36", "remaining_time": "0:56:59", "throughput": 8688.59, "total_tokens": 108232784} +{"current_steps": 160590, "total_steps": 204665, "loss": 0.0, "lr": 2.6949767148788615e-07, "epoch": 3.9232404172672415, "percentage": 78.46, "elapsed_time": "3:27:37", "remaining_time": "0:56:58", "throughput": 8688.64, "total_tokens": 108236496} +{"current_steps": 160595, "total_steps": 204665, "loss": 0.0, "lr": 2.694394372012262e-07, "epoch": 3.9233625680990887, "percentage": 78.47, "elapsed_time": "3:27:37", "remaining_time": "0:56:58", "throughput": 8688.7, "total_tokens": 108240400} +{"current_steps": 160600, "total_steps": 204665, "loss": 0.0, "lr": 2.6938120822739884e-07, "epoch": 3.923484718930936, "percentage": 78.47, "elapsed_time": "3:27:37", "remaining_time": "0:56:58", "throughput": 8688.76, "total_tokens": 108244240} +{"current_steps": 160605, "total_steps": 204665, "loss": 0.0, "lr": 2.693229845668281e-07, "epoch": 3.923606869762783, "percentage": 78.47, "elapsed_time": "3:27:38", "remaining_time": "0:56:57", "throughput": 8688.87, "total_tokens": 108248976} +{"current_steps": 160610, "total_steps": 204665, "loss": 0.0003, "lr": 2.6926476621993697e-07, "epoch": 3.9237290205946302, "percentage": 78.47, "elapsed_time": "3:27:38", "remaining_time": "0:56:57", "throughput": 8688.91, "total_tokens": 108252432} +{"current_steps": 160615, "total_steps": 204665, "loss": 0.0, "lr": 2.6920655318714923e-07, "epoch": 3.9238511714264774, "percentage": 78.48, "elapsed_time": "3:27:39", "remaining_time": "0:56:56", "throughput": 8688.92, "total_tokens": 108255568} +{"current_steps": 160620, "total_steps": 204665, "loss": 0.0, "lr": 2.6914834546888766e-07, "epoch": 3.9239733222583246, "percentage": 78.48, "elapsed_time": "3:27:39", "remaining_time": "0:56:56", "throughput": 8688.97, "total_tokens": 108259280} +{"current_steps": 160625, "total_steps": 204665, "loss": 0.0, "lr": 2.69090143065576e-07, "epoch": 3.924095473090172, "percentage": 78.48, "elapsed_time": "3:27:39", "remaining_time": "0:56:56", "throughput": 8688.99, "total_tokens": 108262608} +{"current_steps": 160630, "total_steps": 204665, "loss": 0.0001, "lr": 2.690319459776376e-07, "epoch": 3.924217623922019, "percentage": 78.48, "elapsed_time": "3:27:40", "remaining_time": "0:56:55", "throughput": 8689.02, "total_tokens": 108266000} +{"current_steps": 160635, "total_steps": 204665, "loss": 0.0, "lr": 2.689737542054953e-07, "epoch": 3.924339774753866, "percentage": 78.49, "elapsed_time": "3:27:40", "remaining_time": "0:56:55", "throughput": 8689.03, "total_tokens": 108269136} +{"current_steps": 160640, "total_steps": 204665, "loss": 0.0, "lr": 2.689155677495727e-07, "epoch": 3.9244619255857134, "percentage": 78.49, "elapsed_time": "3:27:40", "remaining_time": "0:56:54", "throughput": 8689.04, "total_tokens": 108272144} +{"current_steps": 160645, "total_steps": 204665, "loss": 0.0, "lr": 2.6885738661029246e-07, "epoch": 3.9245840764175606, "percentage": 78.49, "elapsed_time": "3:27:41", "remaining_time": "0:56:54", "throughput": 8689.04, "total_tokens": 108275152} +{"current_steps": 160650, "total_steps": 204665, "loss": 0.0, "lr": 2.687992107880779e-07, "epoch": 3.9247062272494073, "percentage": 78.49, "elapsed_time": "3:27:41", "remaining_time": "0:56:54", "throughput": 8689.08, "total_tokens": 108278608} +{"current_steps": 160655, "total_steps": 204665, "loss": 0.0, "lr": 2.6874104028335256e-07, "epoch": 3.924828378081255, "percentage": 78.5, "elapsed_time": "3:27:41", "remaining_time": "0:56:53", "throughput": 8689.09, "total_tokens": 108281808} +{"current_steps": 160660, "total_steps": 204665, "loss": 0.0553, "lr": 2.686828750965386e-07, "epoch": 3.9249505289131017, "percentage": 78.5, "elapsed_time": "3:27:42", "remaining_time": "0:56:53", "throughput": 8689.12, "total_tokens": 108285136} +{"current_steps": 160665, "total_steps": 204665, "loss": 0.0, "lr": 2.6862471522805995e-07, "epoch": 3.9250726797449493, "percentage": 78.5, "elapsed_time": "3:27:42", "remaining_time": "0:56:53", "throughput": 8689.14, "total_tokens": 108288400} +{"current_steps": 160670, "total_steps": 204665, "loss": 0.0822, "lr": 2.685665606783387e-07, "epoch": 3.925194830576796, "percentage": 78.5, "elapsed_time": "3:27:42", "remaining_time": "0:56:52", "throughput": 8689.17, "total_tokens": 108291792} +{"current_steps": 160675, "total_steps": 204665, "loss": 0.0, "lr": 2.6850841144779844e-07, "epoch": 3.9253169814086433, "percentage": 78.51, "elapsed_time": "3:27:43", "remaining_time": "0:56:52", "throughput": 8689.2, "total_tokens": 108295312} +{"current_steps": 160680, "total_steps": 204665, "loss": 0.0002, "lr": 2.684502675368617e-07, "epoch": 3.9254391322404905, "percentage": 78.51, "elapsed_time": "3:27:43", "remaining_time": "0:56:51", "throughput": 8689.24, "total_tokens": 108298768} +{"current_steps": 160685, "total_steps": 204665, "loss": 0.0, "lr": 2.683921289459512e-07, "epoch": 3.9255612830723376, "percentage": 78.51, "elapsed_time": "3:27:43", "remaining_time": "0:56:51", "throughput": 8689.26, "total_tokens": 108302096} +{"current_steps": 160690, "total_steps": 204665, "loss": 0.0, "lr": 2.683339956754902e-07, "epoch": 3.925683433904185, "percentage": 78.51, "elapsed_time": "3:27:44", "remaining_time": "0:56:51", "throughput": 8689.27, "total_tokens": 108305040} +{"current_steps": 160695, "total_steps": 204665, "loss": 0.0, "lr": 2.6827586772590084e-07, "epoch": 3.925805584736032, "percentage": 78.52, "elapsed_time": "3:27:44", "remaining_time": "0:56:50", "throughput": 8689.31, "total_tokens": 108308624} +{"current_steps": 160700, "total_steps": 204665, "loss": 0.0, "lr": 2.6821774509760655e-07, "epoch": 3.925927735567879, "percentage": 78.52, "elapsed_time": "3:27:44", "remaining_time": "0:56:50", "throughput": 8689.32, "total_tokens": 108311696} +{"current_steps": 160705, "total_steps": 204665, "loss": 0.0, "lr": 2.681596277910293e-07, "epoch": 3.9260498863997264, "percentage": 78.52, "elapsed_time": "3:27:45", "remaining_time": "0:56:49", "throughput": 8689.35, "total_tokens": 108315088} +{"current_steps": 160710, "total_steps": 204665, "loss": 0.0, "lr": 2.68101515806592e-07, "epoch": 3.9261720372315736, "percentage": 78.52, "elapsed_time": "3:27:45", "remaining_time": "0:56:49", "throughput": 8689.37, "total_tokens": 108318416} +{"current_steps": 160715, "total_steps": 204665, "loss": 0.0, "lr": 2.680434091447177e-07, "epoch": 3.926294188063421, "percentage": 78.53, "elapsed_time": "3:27:45", "remaining_time": "0:56:49", "throughput": 8689.39, "total_tokens": 108321616} +{"current_steps": 160720, "total_steps": 204665, "loss": 0.0353, "lr": 2.6798530780582826e-07, "epoch": 3.926416338895268, "percentage": 78.53, "elapsed_time": "3:27:46", "remaining_time": "0:56:48", "throughput": 8689.4, "total_tokens": 108324688} +{"current_steps": 160725, "total_steps": 204665, "loss": 0.0, "lr": 2.6792721179034695e-07, "epoch": 3.926538489727115, "percentage": 78.53, "elapsed_time": "3:27:46", "remaining_time": "0:56:48", "throughput": 8689.41, "total_tokens": 108327824} +{"current_steps": 160730, "total_steps": 204665, "loss": 0.0, "lr": 2.678691210986955e-07, "epoch": 3.9266606405589624, "percentage": 78.53, "elapsed_time": "3:27:47", "remaining_time": "0:56:47", "throughput": 8689.46, "total_tokens": 108331472} +{"current_steps": 160735, "total_steps": 204665, "loss": 0.0003, "lr": 2.67811035731297e-07, "epoch": 3.926782791390809, "percentage": 78.54, "elapsed_time": "3:27:47", "remaining_time": "0:56:47", "throughput": 8689.46, "total_tokens": 108334544} +{"current_steps": 160740, "total_steps": 204665, "loss": 0.074, "lr": 2.677529556885734e-07, "epoch": 3.9269049422226567, "percentage": 78.54, "elapsed_time": "3:27:47", "remaining_time": "0:56:47", "throughput": 8689.47, "total_tokens": 108337616} +{"current_steps": 160745, "total_steps": 204665, "loss": 0.0, "lr": 2.6769488097094704e-07, "epoch": 3.9270270930545035, "percentage": 78.54, "elapsed_time": "3:27:48", "remaining_time": "0:56:46", "throughput": 8689.57, "total_tokens": 108342096} +{"current_steps": 160750, "total_steps": 204665, "loss": 0.0, "lr": 2.67636811578841e-07, "epoch": 3.927149243886351, "percentage": 78.54, "elapsed_time": "3:27:48", "remaining_time": "0:56:46", "throughput": 8689.59, "total_tokens": 108345360} +{"current_steps": 160755, "total_steps": 204665, "loss": 0.0, "lr": 2.675787475126766e-07, "epoch": 3.927271394718198, "percentage": 78.55, "elapsed_time": "3:27:48", "remaining_time": "0:56:45", "throughput": 8689.63, "total_tokens": 108348880} +{"current_steps": 160760, "total_steps": 204665, "loss": 0.0, "lr": 2.675206887728769e-07, "epoch": 3.927393545550045, "percentage": 78.55, "elapsed_time": "3:27:49", "remaining_time": "0:56:45", "throughput": 8689.63, "total_tokens": 108351824} +{"current_steps": 160765, "total_steps": 204665, "loss": 0.0003, "lr": 2.6746263535986345e-07, "epoch": 3.9275156963818922, "percentage": 78.55, "elapsed_time": "3:27:49", "remaining_time": "0:56:45", "throughput": 8689.66, "total_tokens": 108355216} +{"current_steps": 160770, "total_steps": 204665, "loss": 0.0, "lr": 2.6740458727405903e-07, "epoch": 3.9276378472137394, "percentage": 78.55, "elapsed_time": "3:27:49", "remaining_time": "0:56:44", "throughput": 8689.69, "total_tokens": 108358672} +{"current_steps": 160775, "total_steps": 204665, "loss": 0.0, "lr": 2.6734654451588524e-07, "epoch": 3.9277599980455866, "percentage": 78.56, "elapsed_time": "3:27:50", "remaining_time": "0:56:44", "throughput": 8689.72, "total_tokens": 108362064} +{"current_steps": 160780, "total_steps": 204665, "loss": 0.0, "lr": 2.6728850708576467e-07, "epoch": 3.927882148877434, "percentage": 78.56, "elapsed_time": "3:27:50", "remaining_time": "0:56:43", "throughput": 8689.74, "total_tokens": 108365392} +{"current_steps": 160785, "total_steps": 204665, "loss": 0.0, "lr": 2.672304749841189e-07, "epoch": 3.928004299709281, "percentage": 78.56, "elapsed_time": "3:27:50", "remaining_time": "0:56:43", "throughput": 8689.76, "total_tokens": 108368656} +{"current_steps": 160790, "total_steps": 204665, "loss": 0.0, "lr": 2.671724482113705e-07, "epoch": 3.928126450541128, "percentage": 78.56, "elapsed_time": "3:27:51", "remaining_time": "0:56:43", "throughput": 8689.82, "total_tokens": 108372496} +{"current_steps": 160795, "total_steps": 204665, "loss": 0.0001, "lr": 2.6711442676794117e-07, "epoch": 3.9282486013729754, "percentage": 78.56, "elapsed_time": "3:27:51", "remaining_time": "0:56:42", "throughput": 8689.85, "total_tokens": 108375824} +{"current_steps": 160800, "total_steps": 204665, "loss": 0.0001, "lr": 2.6705641065425255e-07, "epoch": 3.9283707522048226, "percentage": 78.57, "elapsed_time": "3:27:51", "remaining_time": "0:56:42", "throughput": 8689.86, "total_tokens": 108378960} +{"current_steps": 160805, "total_steps": 204665, "loss": 0.0, "lr": 2.669983998707268e-07, "epoch": 3.9284929030366698, "percentage": 78.57, "elapsed_time": "3:27:52", "remaining_time": "0:56:41", "throughput": 8689.85, "total_tokens": 108381712} +{"current_steps": 160810, "total_steps": 204665, "loss": 0.0, "lr": 2.669403944177863e-07, "epoch": 3.928615053868517, "percentage": 78.57, "elapsed_time": "3:27:52", "remaining_time": "0:56:41", "throughput": 8689.87, "total_tokens": 108385040} +{"current_steps": 160815, "total_steps": 204665, "loss": 0.0, "lr": 2.668823942958519e-07, "epoch": 3.928737204700364, "percentage": 78.57, "elapsed_time": "3:27:52", "remaining_time": "0:56:41", "throughput": 8689.89, "total_tokens": 108388176} +{"current_steps": 160820, "total_steps": 204665, "loss": 0.0, "lr": 2.668243995053464e-07, "epoch": 3.9288593555322113, "percentage": 78.58, "elapsed_time": "3:27:53", "remaining_time": "0:56:40", "throughput": 8689.91, "total_tokens": 108391632} +{"current_steps": 160825, "total_steps": 204665, "loss": 0.0, "lr": 2.667664100466906e-07, "epoch": 3.9289815063640585, "percentage": 78.58, "elapsed_time": "3:27:53", "remaining_time": "0:56:40", "throughput": 8689.97, "total_tokens": 108395472} +{"current_steps": 160830, "total_steps": 204665, "loss": 0.0, "lr": 2.6670842592030706e-07, "epoch": 3.9291036571959053, "percentage": 78.58, "elapsed_time": "3:27:53", "remaining_time": "0:56:39", "throughput": 8690.0, "total_tokens": 108398992} +{"current_steps": 160835, "total_steps": 204665, "loss": 0.0, "lr": 2.6665044712661687e-07, "epoch": 3.929225808027753, "percentage": 78.58, "elapsed_time": "3:27:54", "remaining_time": "0:56:39", "throughput": 8690.04, "total_tokens": 108402512} +{"current_steps": 160840, "total_steps": 204665, "loss": 0.0402, "lr": 2.665924736660418e-07, "epoch": 3.9293479588595996, "percentage": 78.59, "elapsed_time": "3:27:54", "remaining_time": "0:56:39", "throughput": 8690.06, "total_tokens": 108405776} +{"current_steps": 160845, "total_steps": 204665, "loss": 0.0, "lr": 2.6653450553900383e-07, "epoch": 3.929470109691447, "percentage": 78.59, "elapsed_time": "3:27:55", "remaining_time": "0:56:38", "throughput": 8690.08, "total_tokens": 108408976} +{"current_steps": 160850, "total_steps": 204665, "loss": 0.0001, "lr": 2.664765427459239e-07, "epoch": 3.929592260523294, "percentage": 78.59, "elapsed_time": "3:27:55", "remaining_time": "0:56:38", "throughput": 8690.14, "total_tokens": 108412880} +{"current_steps": 160855, "total_steps": 204665, "loss": 0.0489, "lr": 2.6641858528722403e-07, "epoch": 3.929714411355141, "percentage": 78.59, "elapsed_time": "3:27:55", "remaining_time": "0:56:37", "throughput": 8690.16, "total_tokens": 108416080} +{"current_steps": 160860, "total_steps": 204665, "loss": 0.0, "lr": 2.6636063316332535e-07, "epoch": 3.9298365621869884, "percentage": 78.6, "elapsed_time": "3:27:56", "remaining_time": "0:56:37", "throughput": 8690.18, "total_tokens": 108419280} +{"current_steps": 160865, "total_steps": 204665, "loss": 0.0, "lr": 2.663026863746495e-07, "epoch": 3.9299587130188356, "percentage": 78.6, "elapsed_time": "3:27:56", "remaining_time": "0:56:37", "throughput": 8690.22, "total_tokens": 108422864} +{"current_steps": 160870, "total_steps": 204665, "loss": 0.0, "lr": 2.662447449216181e-07, "epoch": 3.930080863850683, "percentage": 78.6, "elapsed_time": "3:27:56", "remaining_time": "0:56:36", "throughput": 8690.23, "total_tokens": 108425936} +{"current_steps": 160875, "total_steps": 204665, "loss": 0.0882, "lr": 2.6618680880465207e-07, "epoch": 3.93020301468253, "percentage": 78.6, "elapsed_time": "3:27:57", "remaining_time": "0:56:36", "throughput": 8690.27, "total_tokens": 108429456} +{"current_steps": 160880, "total_steps": 204665, "loss": 0.0373, "lr": 2.6612887802417307e-07, "epoch": 3.930325165514377, "percentage": 78.61, "elapsed_time": "3:27:57", "remaining_time": "0:56:35", "throughput": 8690.26, "total_tokens": 108432272} +{"current_steps": 160885, "total_steps": 204665, "loss": 0.035, "lr": 2.660709525806024e-07, "epoch": 3.9304473163462244, "percentage": 78.61, "elapsed_time": "3:27:57", "remaining_time": "0:56:35", "throughput": 8690.27, "total_tokens": 108435408} +{"current_steps": 160890, "total_steps": 204665, "loss": 0.0, "lr": 2.660130324743608e-07, "epoch": 3.9305694671780715, "percentage": 78.61, "elapsed_time": "3:27:58", "remaining_time": "0:56:35", "throughput": 8690.3, "total_tokens": 108438736} +{"current_steps": 160895, "total_steps": 204665, "loss": 0.0, "lr": 2.659551177058701e-07, "epoch": 3.9306916180099187, "percentage": 78.61, "elapsed_time": "3:27:58", "remaining_time": "0:56:34", "throughput": 8690.32, "total_tokens": 108442000} +{"current_steps": 160900, "total_steps": 204665, "loss": 0.0, "lr": 2.6589720827555094e-07, "epoch": 3.930813768841766, "percentage": 78.62, "elapsed_time": "3:27:58", "remaining_time": "0:56:34", "throughput": 8690.34, "total_tokens": 108445264} +{"current_steps": 160905, "total_steps": 204665, "loss": 0.0729, "lr": 2.6583930418382507e-07, "epoch": 3.930935919673613, "percentage": 78.62, "elapsed_time": "3:27:59", "remaining_time": "0:56:33", "throughput": 8690.36, "total_tokens": 108448656} +{"current_steps": 160910, "total_steps": 204665, "loss": 0.0, "lr": 2.6578140543111293e-07, "epoch": 3.9310580705054603, "percentage": 78.62, "elapsed_time": "3:27:59", "remaining_time": "0:56:33", "throughput": 8690.41, "total_tokens": 108452304} +{"current_steps": 160915, "total_steps": 204665, "loss": 0.0, "lr": 2.6572351201783625e-07, "epoch": 3.931180221337307, "percentage": 78.62, "elapsed_time": "3:27:59", "remaining_time": "0:56:33", "throughput": 8690.44, "total_tokens": 108455632} +{"current_steps": 160920, "total_steps": 204665, "loss": 0.0, "lr": 2.656656239444153e-07, "epoch": 3.9313023721691547, "percentage": 78.63, "elapsed_time": "3:28:00", "remaining_time": "0:56:32", "throughput": 8690.48, "total_tokens": 108459216} +{"current_steps": 160925, "total_steps": 204665, "loss": 0.0869, "lr": 2.6560774121127185e-07, "epoch": 3.9314245230010014, "percentage": 78.63, "elapsed_time": "3:28:00", "remaining_time": "0:56:32", "throughput": 8690.49, "total_tokens": 108462352} +{"current_steps": 160930, "total_steps": 204665, "loss": 0.0, "lr": 2.6554986381882603e-07, "epoch": 3.931546673832849, "percentage": 78.63, "elapsed_time": "3:28:00", "remaining_time": "0:56:31", "throughput": 8690.54, "total_tokens": 108466000} +{"current_steps": 160935, "total_steps": 204665, "loss": 0.0, "lr": 2.6549199176749915e-07, "epoch": 3.931668824664696, "percentage": 78.63, "elapsed_time": "3:28:01", "remaining_time": "0:56:31", "throughput": 8690.56, "total_tokens": 108469264} +{"current_steps": 160940, "total_steps": 204665, "loss": 0.0, "lr": 2.654341250577125e-07, "epoch": 3.931790975496543, "percentage": 78.64, "elapsed_time": "3:28:01", "remaining_time": "0:56:31", "throughput": 8690.58, "total_tokens": 108472528} +{"current_steps": 160945, "total_steps": 204665, "loss": 0.0, "lr": 2.6537626368988595e-07, "epoch": 3.93191312632839, "percentage": 78.64, "elapsed_time": "3:28:01", "remaining_time": "0:56:30", "throughput": 8690.62, "total_tokens": 108476048} +{"current_steps": 160950, "total_steps": 204665, "loss": 0.0, "lr": 2.6531840766444127e-07, "epoch": 3.9320352771602374, "percentage": 78.64, "elapsed_time": "3:28:02", "remaining_time": "0:56:30", "throughput": 8690.64, "total_tokens": 108479312} +{"current_steps": 160955, "total_steps": 204665, "loss": 0.0, "lr": 2.6526055698179826e-07, "epoch": 3.9321574279920846, "percentage": 78.64, "elapsed_time": "3:28:02", "remaining_time": "0:56:29", "throughput": 8690.65, "total_tokens": 108482448} +{"current_steps": 160960, "total_steps": 204665, "loss": 0.0, "lr": 2.652027116423783e-07, "epoch": 3.9322795788239318, "percentage": 78.65, "elapsed_time": "3:28:03", "remaining_time": "0:56:29", "throughput": 8690.71, "total_tokens": 108486288} +{"current_steps": 160965, "total_steps": 204665, "loss": 0.0, "lr": 2.65144871646602e-07, "epoch": 3.932401729655779, "percentage": 78.65, "elapsed_time": "3:28:03", "remaining_time": "0:56:29", "throughput": 8690.75, "total_tokens": 108489936} +{"current_steps": 160970, "total_steps": 204665, "loss": 0.0, "lr": 2.6508703699488964e-07, "epoch": 3.932523880487626, "percentage": 78.65, "elapsed_time": "3:28:03", "remaining_time": "0:56:28", "throughput": 8690.79, "total_tokens": 108493456} +{"current_steps": 160975, "total_steps": 204665, "loss": 0.0, "lr": 2.6502920768766234e-07, "epoch": 3.9326460313194733, "percentage": 78.65, "elapsed_time": "3:28:04", "remaining_time": "0:56:28", "throughput": 8690.82, "total_tokens": 108496784} +{"current_steps": 160980, "total_steps": 204665, "loss": 0.0, "lr": 2.6497138372534e-07, "epoch": 3.9327681821513205, "percentage": 78.66, "elapsed_time": "3:28:04", "remaining_time": "0:56:27", "throughput": 8690.86, "total_tokens": 108500304} +{"current_steps": 160985, "total_steps": 204665, "loss": 0.0, "lr": 2.6491356510834374e-07, "epoch": 3.9328903329831677, "percentage": 78.66, "elapsed_time": "3:28:04", "remaining_time": "0:56:27", "throughput": 8690.88, "total_tokens": 108503632} +{"current_steps": 160990, "total_steps": 204665, "loss": 0.0, "lr": 2.6485575183709375e-07, "epoch": 3.933012483815015, "percentage": 78.66, "elapsed_time": "3:28:05", "remaining_time": "0:56:27", "throughput": 8690.9, "total_tokens": 108506832} +{"current_steps": 160995, "total_steps": 204665, "loss": 0.0, "lr": 2.6479794391201005e-07, "epoch": 3.933134634646862, "percentage": 78.66, "elapsed_time": "3:28:05", "remaining_time": "0:56:26", "throughput": 8690.94, "total_tokens": 108510352} +{"current_steps": 161000, "total_steps": 204665, "loss": 0.0446, "lr": 2.6474014133351383e-07, "epoch": 3.9332567854787093, "percentage": 78.67, "elapsed_time": "3:28:05", "remaining_time": "0:56:26", "throughput": 8690.99, "total_tokens": 108514128} +{"current_steps": 161005, "total_steps": 204665, "loss": 0.0001, "lr": 2.6468234410202484e-07, "epoch": 3.9333789363105565, "percentage": 78.67, "elapsed_time": "3:28:06", "remaining_time": "0:56:25", "throughput": 8691.01, "total_tokens": 108517328} +{"current_steps": 161010, "total_steps": 204665, "loss": 0.1235, "lr": 2.6462455221796386e-07, "epoch": 3.933501087142403, "percentage": 78.67, "elapsed_time": "3:28:06", "remaining_time": "0:56:25", "throughput": 8691.05, "total_tokens": 108520848} +{"current_steps": 161015, "total_steps": 204665, "loss": 0.0, "lr": 2.645667656817506e-07, "epoch": 3.933623237974251, "percentage": 78.67, "elapsed_time": "3:28:06", "remaining_time": "0:56:25", "throughput": 8691.05, "total_tokens": 108523856} +{"current_steps": 161020, "total_steps": 204665, "loss": 0.0, "lr": 2.6450898449380575e-07, "epoch": 3.9337453888060976, "percentage": 78.67, "elapsed_time": "3:28:07", "remaining_time": "0:56:24", "throughput": 8691.07, "total_tokens": 108527184} +{"current_steps": 161025, "total_steps": 204665, "loss": 0.0, "lr": 2.6445120865454964e-07, "epoch": 3.9338675396379448, "percentage": 78.68, "elapsed_time": "3:28:07", "remaining_time": "0:56:24", "throughput": 8691.1, "total_tokens": 108530576} +{"current_steps": 161030, "total_steps": 204665, "loss": 0.0, "lr": 2.643934381644017e-07, "epoch": 3.933989690469792, "percentage": 78.68, "elapsed_time": "3:28:07", "remaining_time": "0:56:23", "throughput": 8691.18, "total_tokens": 108534672} +{"current_steps": 161035, "total_steps": 204665, "loss": 0.0524, "lr": 2.64335673023783e-07, "epoch": 3.934111841301639, "percentage": 78.68, "elapsed_time": "3:28:08", "remaining_time": "0:56:23", "throughput": 8691.23, "total_tokens": 108538384} +{"current_steps": 161040, "total_steps": 204665, "loss": 0.0, "lr": 2.6427791323311287e-07, "epoch": 3.9342339921334863, "percentage": 78.68, "elapsed_time": "3:28:08", "remaining_time": "0:56:23", "throughput": 8691.29, "total_tokens": 108542288} +{"current_steps": 161045, "total_steps": 204665, "loss": 0.0, "lr": 2.642201587928119e-07, "epoch": 3.9343561429653335, "percentage": 78.69, "elapsed_time": "3:28:08", "remaining_time": "0:56:22", "throughput": 8691.35, "total_tokens": 108546128} +{"current_steps": 161050, "total_steps": 204665, "loss": 0.0, "lr": 2.641624097032995e-07, "epoch": 3.9344782937971807, "percentage": 78.69, "elapsed_time": "3:28:09", "remaining_time": "0:56:22", "throughput": 8691.38, "total_tokens": 108549520} +{"current_steps": 161055, "total_steps": 204665, "loss": 0.0, "lr": 2.64104665964996e-07, "epoch": 3.934600444629028, "percentage": 78.69, "elapsed_time": "3:28:09", "remaining_time": "0:56:21", "throughput": 8691.42, "total_tokens": 108553104} +{"current_steps": 161060, "total_steps": 204665, "loss": 0.0, "lr": 2.640469275783217e-07, "epoch": 3.934722595460875, "percentage": 78.69, "elapsed_time": "3:28:10", "remaining_time": "0:56:21", "throughput": 8691.44, "total_tokens": 108556304} +{"current_steps": 161065, "total_steps": 204665, "loss": 0.0, "lr": 2.6398919454369564e-07, "epoch": 3.9348447462927223, "percentage": 78.7, "elapsed_time": "3:28:10", "remaining_time": "0:56:21", "throughput": 8691.49, "total_tokens": 108560016} +{"current_steps": 161070, "total_steps": 204665, "loss": 0.0, "lr": 2.639314668615384e-07, "epoch": 3.9349668971245695, "percentage": 78.7, "elapsed_time": "3:28:10", "remaining_time": "0:56:20", "throughput": 8691.49, "total_tokens": 108562960} +{"current_steps": 161075, "total_steps": 204665, "loss": 0.0, "lr": 2.638737445322694e-07, "epoch": 3.9350890479564167, "percentage": 78.7, "elapsed_time": "3:28:11", "remaining_time": "0:56:20", "throughput": 8691.52, "total_tokens": 108566480} +{"current_steps": 161080, "total_steps": 204665, "loss": 0.0, "lr": 2.638160275563087e-07, "epoch": 3.935211198788264, "percentage": 78.7, "elapsed_time": "3:28:11", "remaining_time": "0:56:19", "throughput": 8691.51, "total_tokens": 108569232} +{"current_steps": 161085, "total_steps": 204665, "loss": 0.0479, "lr": 2.637583159340756e-07, "epoch": 3.935333349620111, "percentage": 78.71, "elapsed_time": "3:28:11", "remaining_time": "0:56:19", "throughput": 8691.53, "total_tokens": 108572368} +{"current_steps": 161090, "total_steps": 204665, "loss": 0.0, "lr": 2.637006096659903e-07, "epoch": 3.9354555004519582, "percentage": 78.71, "elapsed_time": "3:28:12", "remaining_time": "0:56:19", "throughput": 8691.52, "total_tokens": 108575248} +{"current_steps": 161095, "total_steps": 204665, "loss": 0.0, "lr": 2.6364290875247195e-07, "epoch": 3.935577651283805, "percentage": 78.71, "elapsed_time": "3:28:12", "remaining_time": "0:56:18", "throughput": 8691.55, "total_tokens": 108578640} +{"current_steps": 161100, "total_steps": 204665, "loss": 0.0, "lr": 2.635852131939407e-07, "epoch": 3.9356998021156526, "percentage": 78.71, "elapsed_time": "3:28:12", "remaining_time": "0:56:18", "throughput": 8691.59, "total_tokens": 108582160} +{"current_steps": 161105, "total_steps": 204665, "loss": 0.0, "lr": 2.635275229908158e-07, "epoch": 3.9358219529474994, "percentage": 78.72, "elapsed_time": "3:28:13", "remaining_time": "0:56:17", "throughput": 8691.61, "total_tokens": 108585424} +{"current_steps": 161110, "total_steps": 204665, "loss": 0.0001, "lr": 2.6346983814351667e-07, "epoch": 3.935944103779347, "percentage": 78.72, "elapsed_time": "3:28:13", "remaining_time": "0:56:17", "throughput": 8691.61, "total_tokens": 108588432} +{"current_steps": 161115, "total_steps": 204665, "loss": 0.0, "lr": 2.634121586524629e-07, "epoch": 3.9360662546111937, "percentage": 78.72, "elapsed_time": "3:28:13", "remaining_time": "0:56:17", "throughput": 8691.65, "total_tokens": 108591952} +{"current_steps": 161120, "total_steps": 204665, "loss": 0.0, "lr": 2.633544845180743e-07, "epoch": 3.936188405443041, "percentage": 78.72, "elapsed_time": "3:28:14", "remaining_time": "0:56:16", "throughput": 8691.68, "total_tokens": 108595408} +{"current_steps": 161125, "total_steps": 204665, "loss": 0.0, "lr": 2.632968157407698e-07, "epoch": 3.936310556274888, "percentage": 78.73, "elapsed_time": "3:28:14", "remaining_time": "0:56:16", "throughput": 8691.7, "total_tokens": 108598608} +{"current_steps": 161130, "total_steps": 204665, "loss": 0.0778, "lr": 2.632391523209693e-07, "epoch": 3.9364327071067353, "percentage": 78.73, "elapsed_time": "3:28:14", "remaining_time": "0:56:15", "throughput": 8691.71, "total_tokens": 108601808} +{"current_steps": 161135, "total_steps": 204665, "loss": 0.0, "lr": 2.631814942590914e-07, "epoch": 3.9365548579385825, "percentage": 78.73, "elapsed_time": "3:28:15", "remaining_time": "0:56:15", "throughput": 8691.72, "total_tokens": 108604880} +{"current_steps": 161140, "total_steps": 204665, "loss": 0.0, "lr": 2.631238415555563e-07, "epoch": 3.9366770087704297, "percentage": 78.73, "elapsed_time": "3:28:15", "remaining_time": "0:56:15", "throughput": 8691.74, "total_tokens": 108608016} +{"current_steps": 161145, "total_steps": 204665, "loss": 0.1047, "lr": 2.6306619421078245e-07, "epoch": 3.936799159602277, "percentage": 78.74, "elapsed_time": "3:28:15", "remaining_time": "0:56:14", "throughput": 8691.8, "total_tokens": 108611920} +{"current_steps": 161150, "total_steps": 204665, "loss": 0.0, "lr": 2.630085522251896e-07, "epoch": 3.936921310434124, "percentage": 78.74, "elapsed_time": "3:28:16", "remaining_time": "0:56:14", "throughput": 8691.79, "total_tokens": 108614736} +{"current_steps": 161155, "total_steps": 204665, "loss": 0.0, "lr": 2.629509155991969e-07, "epoch": 3.9370434612659713, "percentage": 78.74, "elapsed_time": "3:28:16", "remaining_time": "0:56:13", "throughput": 8691.81, "total_tokens": 108617936} +{"current_steps": 161160, "total_steps": 204665, "loss": 0.0, "lr": 2.6289328433322323e-07, "epoch": 3.9371656120978185, "percentage": 78.74, "elapsed_time": "3:28:16", "remaining_time": "0:56:13", "throughput": 8691.82, "total_tokens": 108621072} +{"current_steps": 161165, "total_steps": 204665, "loss": 0.0, "lr": 2.6283565842768807e-07, "epoch": 3.9372877629296656, "percentage": 78.75, "elapsed_time": "3:28:17", "remaining_time": "0:56:13", "throughput": 8691.84, "total_tokens": 108624336} +{"current_steps": 161170, "total_steps": 204665, "loss": 0.0, "lr": 2.627780378830099e-07, "epoch": 3.937409913761513, "percentage": 78.75, "elapsed_time": "3:28:17", "remaining_time": "0:56:12", "throughput": 8691.87, "total_tokens": 108627792} +{"current_steps": 161175, "total_steps": 204665, "loss": 0.0001, "lr": 2.6272042269960856e-07, "epoch": 3.93753206459336, "percentage": 78.75, "elapsed_time": "3:28:17", "remaining_time": "0:56:12", "throughput": 8691.9, "total_tokens": 108631120} +{"current_steps": 161180, "total_steps": 204665, "loss": 0.0002, "lr": 2.6266281287790225e-07, "epoch": 3.9376542154252068, "percentage": 78.75, "elapsed_time": "3:28:18", "remaining_time": "0:56:11", "throughput": 8691.92, "total_tokens": 108634384} +{"current_steps": 161185, "total_steps": 204665, "loss": 0.0631, "lr": 2.6260520841831037e-07, "epoch": 3.9377763662570544, "percentage": 78.76, "elapsed_time": "3:28:18", "remaining_time": "0:56:11", "throughput": 8691.94, "total_tokens": 108637584} +{"current_steps": 161190, "total_steps": 204665, "loss": 0.0, "lr": 2.6254760932125184e-07, "epoch": 3.937898517088901, "percentage": 78.76, "elapsed_time": "3:28:19", "remaining_time": "0:56:11", "throughput": 8691.98, "total_tokens": 108641168} +{"current_steps": 161195, "total_steps": 204665, "loss": 0.0001, "lr": 2.624900155871457e-07, "epoch": 3.938020667920749, "percentage": 78.76, "elapsed_time": "3:28:19", "remaining_time": "0:56:10", "throughput": 8692.01, "total_tokens": 108644560} +{"current_steps": 161200, "total_steps": 204665, "loss": 0.0001, "lr": 2.624324272164101e-07, "epoch": 3.9381428187525955, "percentage": 78.76, "elapsed_time": "3:28:19", "remaining_time": "0:56:10", "throughput": 8692.1, "total_tokens": 108648912} +{"current_steps": 161205, "total_steps": 204665, "loss": 0.1652, "lr": 2.6237484420946456e-07, "epoch": 3.9382649695844427, "percentage": 78.77, "elapsed_time": "3:28:20", "remaining_time": "0:56:09", "throughput": 8692.12, "total_tokens": 108652240} +{"current_steps": 161210, "total_steps": 204665, "loss": 0.0, "lr": 2.6231726656672726e-07, "epoch": 3.93838712041629, "percentage": 78.77, "elapsed_time": "3:28:20", "remaining_time": "0:56:09", "throughput": 8692.14, "total_tokens": 108655376} +{"current_steps": 161215, "total_steps": 204665, "loss": 0.0, "lr": 2.622596942886175e-07, "epoch": 3.938509271248137, "percentage": 78.77, "elapsed_time": "3:28:20", "remaining_time": "0:56:09", "throughput": 8692.16, "total_tokens": 108658704} +{"current_steps": 161220, "total_steps": 204665, "loss": 0.0, "lr": 2.622021273755535e-07, "epoch": 3.9386314220799843, "percentage": 78.77, "elapsed_time": "3:28:21", "remaining_time": "0:56:08", "throughput": 8692.17, "total_tokens": 108661776} +{"current_steps": 161225, "total_steps": 204665, "loss": 0.0, "lr": 2.621445658279542e-07, "epoch": 3.9387535729118315, "percentage": 78.78, "elapsed_time": "3:28:21", "remaining_time": "0:56:08", "throughput": 8692.19, "total_tokens": 108665040} +{"current_steps": 161230, "total_steps": 204665, "loss": 0.0001, "lr": 2.6208700964623785e-07, "epoch": 3.9388757237436787, "percentage": 78.78, "elapsed_time": "3:28:21", "remaining_time": "0:56:07", "throughput": 8692.21, "total_tokens": 108668240} +{"current_steps": 161235, "total_steps": 204665, "loss": 0.0, "lr": 2.620294588308235e-07, "epoch": 3.938997874575526, "percentage": 78.78, "elapsed_time": "3:28:22", "remaining_time": "0:56:07", "throughput": 8692.22, "total_tokens": 108671376} +{"current_steps": 161240, "total_steps": 204665, "loss": 0.0, "lr": 2.619719133821292e-07, "epoch": 3.939120025407373, "percentage": 78.78, "elapsed_time": "3:28:22", "remaining_time": "0:56:07", "throughput": 8692.24, "total_tokens": 108674576} +{"current_steps": 161245, "total_steps": 204665, "loss": 0.0, "lr": 2.6191437330057364e-07, "epoch": 3.9392421762392202, "percentage": 78.78, "elapsed_time": "3:28:22", "remaining_time": "0:56:06", "throughput": 8692.28, "total_tokens": 108678160} +{"current_steps": 161250, "total_steps": 204665, "loss": 0.0, "lr": 2.6185683858657546e-07, "epoch": 3.9393643270710674, "percentage": 78.79, "elapsed_time": "3:28:23", "remaining_time": "0:56:06", "throughput": 8692.32, "total_tokens": 108681680} +{"current_steps": 161255, "total_steps": 204665, "loss": 0.0001, "lr": 2.617993092405527e-07, "epoch": 3.9394864779029146, "percentage": 78.79, "elapsed_time": "3:28:23", "remaining_time": "0:56:05", "throughput": 8692.34, "total_tokens": 108685008} +{"current_steps": 161260, "total_steps": 204665, "loss": 0.0, "lr": 2.6174178526292424e-07, "epoch": 3.939608628734762, "percentage": 78.79, "elapsed_time": "3:28:23", "remaining_time": "0:56:05", "throughput": 8692.38, "total_tokens": 108688528} +{"current_steps": 161265, "total_steps": 204665, "loss": 0.0001, "lr": 2.616842666541077e-07, "epoch": 3.939730779566609, "percentage": 78.79, "elapsed_time": "3:28:24", "remaining_time": "0:56:05", "throughput": 8692.41, "total_tokens": 108691984} +{"current_steps": 161270, "total_steps": 204665, "loss": 0.0648, "lr": 2.616267534145218e-07, "epoch": 3.939852930398456, "percentage": 78.8, "elapsed_time": "3:28:24", "remaining_time": "0:56:04", "throughput": 8692.44, "total_tokens": 108695312} +{"current_steps": 161275, "total_steps": 204665, "loss": 0.0, "lr": 2.6156924554458506e-07, "epoch": 3.939975081230303, "percentage": 78.8, "elapsed_time": "3:28:24", "remaining_time": "0:56:04", "throughput": 8692.46, "total_tokens": 108698576} +{"current_steps": 161280, "total_steps": 204665, "loss": 0.0, "lr": 2.61511743044715e-07, "epoch": 3.9400972320621506, "percentage": 78.8, "elapsed_time": "3:28:25", "remaining_time": "0:56:03", "throughput": 8692.46, "total_tokens": 108701584} +{"current_steps": 161285, "total_steps": 204665, "loss": 0.0, "lr": 2.614542459153306e-07, "epoch": 3.9402193828939973, "percentage": 78.8, "elapsed_time": "3:28:25", "remaining_time": "0:56:03", "throughput": 8692.5, "total_tokens": 108705040} +{"current_steps": 161290, "total_steps": 204665, "loss": 0.0, "lr": 2.6139675415684914e-07, "epoch": 3.940341533725845, "percentage": 78.81, "elapsed_time": "3:28:25", "remaining_time": "0:56:03", "throughput": 8692.51, "total_tokens": 108708176} +{"current_steps": 161295, "total_steps": 204665, "loss": 0.0002, "lr": 2.613392677696895e-07, "epoch": 3.9404636845576917, "percentage": 78.81, "elapsed_time": "3:28:26", "remaining_time": "0:56:02", "throughput": 8692.53, "total_tokens": 108711376} +{"current_steps": 161300, "total_steps": 204665, "loss": 0.0001, "lr": 2.612817867542694e-07, "epoch": 3.940585835389539, "percentage": 78.81, "elapsed_time": "3:28:26", "remaining_time": "0:56:02", "throughput": 8692.54, "total_tokens": 108714512} +{"current_steps": 161305, "total_steps": 204665, "loss": 0.0, "lr": 2.612243111110065e-07, "epoch": 3.940707986221386, "percentage": 78.81, "elapsed_time": "3:28:26", "remaining_time": "0:56:01", "throughput": 8692.55, "total_tokens": 108717584} +{"current_steps": 161310, "total_steps": 204665, "loss": 0.0001, "lr": 2.611668408403195e-07, "epoch": 3.9408301370532333, "percentage": 78.82, "elapsed_time": "3:28:27", "remaining_time": "0:56:01", "throughput": 8692.57, "total_tokens": 108720976} +{"current_steps": 161315, "total_steps": 204665, "loss": 0.0, "lr": 2.611093759426256e-07, "epoch": 3.9409522878850805, "percentage": 78.82, "elapsed_time": "3:28:27", "remaining_time": "0:56:01", "throughput": 8692.6, "total_tokens": 108724304} +{"current_steps": 161320, "total_steps": 204665, "loss": 0.0456, "lr": 2.6105191641834337e-07, "epoch": 3.9410744387169276, "percentage": 78.82, "elapsed_time": "3:28:28", "remaining_time": "0:56:00", "throughput": 8692.62, "total_tokens": 108727504} +{"current_steps": 161325, "total_steps": 204665, "loss": 0.0, "lr": 2.6099446226789e-07, "epoch": 3.941196589548775, "percentage": 78.82, "elapsed_time": "3:28:28", "remaining_time": "0:56:00", "throughput": 8692.62, "total_tokens": 108730576} +{"current_steps": 161330, "total_steps": 204665, "loss": 0.0, "lr": 2.6093701349168396e-07, "epoch": 3.941318740380622, "percentage": 78.83, "elapsed_time": "3:28:28", "remaining_time": "0:55:59", "throughput": 8692.66, "total_tokens": 108734096} +{"current_steps": 161335, "total_steps": 204665, "loss": 0.0457, "lr": 2.608795700901425e-07, "epoch": 3.941440891212469, "percentage": 78.83, "elapsed_time": "3:28:29", "remaining_time": "0:55:59", "throughput": 8692.7, "total_tokens": 108737552} +{"current_steps": 161340, "total_steps": 204665, "loss": 0.0, "lr": 2.608221320636836e-07, "epoch": 3.9415630420443164, "percentage": 78.83, "elapsed_time": "3:28:29", "remaining_time": "0:55:59", "throughput": 8692.72, "total_tokens": 108740816} +{"current_steps": 161345, "total_steps": 204665, "loss": 0.0003, "lr": 2.607646994127253e-07, "epoch": 3.9416851928761636, "percentage": 78.83, "elapsed_time": "3:28:29", "remaining_time": "0:55:58", "throughput": 8692.74, "total_tokens": 108744208} +{"current_steps": 161350, "total_steps": 204665, "loss": 0.0418, "lr": 2.6070727213768464e-07, "epoch": 3.941807343708011, "percentage": 78.84, "elapsed_time": "3:28:30", "remaining_time": "0:55:58", "throughput": 8692.76, "total_tokens": 108747408} +{"current_steps": 161355, "total_steps": 204665, "loss": 0.0006, "lr": 2.606498502389798e-07, "epoch": 3.941929494539858, "percentage": 78.84, "elapsed_time": "3:28:30", "remaining_time": "0:55:57", "throughput": 8692.78, "total_tokens": 108750608} +{"current_steps": 161360, "total_steps": 204665, "loss": 0.0001, "lr": 2.6059243371702775e-07, "epoch": 3.9420516453717047, "percentage": 78.84, "elapsed_time": "3:28:30", "remaining_time": "0:55:57", "throughput": 8692.79, "total_tokens": 108753808} +{"current_steps": 161365, "total_steps": 204665, "loss": 0.0, "lr": 2.605350225722465e-07, "epoch": 3.9421737962035523, "percentage": 78.84, "elapsed_time": "3:28:31", "remaining_time": "0:55:57", "throughput": 8692.79, "total_tokens": 108756752} +{"current_steps": 161370, "total_steps": 204665, "loss": 0.0003, "lr": 2.6047761680505367e-07, "epoch": 3.942295947035399, "percentage": 78.85, "elapsed_time": "3:28:31", "remaining_time": "0:55:56", "throughput": 8692.79, "total_tokens": 108759632} +{"current_steps": 161375, "total_steps": 204665, "loss": 0.0, "lr": 2.604202164158663e-07, "epoch": 3.9424180978672467, "percentage": 78.85, "elapsed_time": "3:28:31", "remaining_time": "0:55:56", "throughput": 8692.81, "total_tokens": 108762960} +{"current_steps": 161380, "total_steps": 204665, "loss": 0.0001, "lr": 2.6036282140510224e-07, "epoch": 3.9425402486990935, "percentage": 78.85, "elapsed_time": "3:28:32", "remaining_time": "0:55:55", "throughput": 8692.84, "total_tokens": 108766352} +{"current_steps": 161385, "total_steps": 204665, "loss": 0.0652, "lr": 2.6030543177317853e-07, "epoch": 3.9426623995309407, "percentage": 78.85, "elapsed_time": "3:28:32", "remaining_time": "0:55:55", "throughput": 8692.88, "total_tokens": 108769872} +{"current_steps": 161390, "total_steps": 204665, "loss": 0.0489, "lr": 2.602480475205129e-07, "epoch": 3.942784550362788, "percentage": 78.86, "elapsed_time": "3:28:32", "remaining_time": "0:55:55", "throughput": 8692.9, "total_tokens": 108773200} +{"current_steps": 161395, "total_steps": 204665, "loss": 0.0006, "lr": 2.6019066864752206e-07, "epoch": 3.942906701194635, "percentage": 78.86, "elapsed_time": "3:28:33", "remaining_time": "0:55:54", "throughput": 8692.96, "total_tokens": 108777040} +{"current_steps": 161400, "total_steps": 204665, "loss": 0.0001, "lr": 2.60133295154624e-07, "epoch": 3.9430288520264822, "percentage": 78.86, "elapsed_time": "3:28:33", "remaining_time": "0:55:54", "throughput": 8692.99, "total_tokens": 108780368} +{"current_steps": 161405, "total_steps": 204665, "loss": 0.0001, "lr": 2.600759270422355e-07, "epoch": 3.9431510028583294, "percentage": 78.86, "elapsed_time": "3:28:33", "remaining_time": "0:55:53", "throughput": 8693.0, "total_tokens": 108783440} +{"current_steps": 161410, "total_steps": 204665, "loss": 0.0, "lr": 2.6001856431077395e-07, "epoch": 3.9432731536901766, "percentage": 78.87, "elapsed_time": "3:28:34", "remaining_time": "0:55:53", "throughput": 8693.0, "total_tokens": 108786512} +{"current_steps": 161415, "total_steps": 204665, "loss": 0.0, "lr": 2.599612069606565e-07, "epoch": 3.943395304522024, "percentage": 78.87, "elapsed_time": "3:28:34", "remaining_time": "0:55:53", "throughput": 8693.05, "total_tokens": 108790096} +{"current_steps": 161420, "total_steps": 204665, "loss": 0.011, "lr": 2.5990385499229994e-07, "epoch": 3.943517455353871, "percentage": 78.87, "elapsed_time": "3:28:34", "remaining_time": "0:55:52", "throughput": 8693.07, "total_tokens": 108793360} +{"current_steps": 161425, "total_steps": 204665, "loss": 0.0, "lr": 2.5984650840612157e-07, "epoch": 3.943639606185718, "percentage": 78.87, "elapsed_time": "3:28:35", "remaining_time": "0:55:52", "throughput": 8693.09, "total_tokens": 108796688} +{"current_steps": 161430, "total_steps": 204665, "loss": 0.0014, "lr": 2.5978916720253873e-07, "epoch": 3.9437617570175654, "percentage": 78.88, "elapsed_time": "3:28:35", "remaining_time": "0:55:52", "throughput": 8693.1, "total_tokens": 108799760} +{"current_steps": 161435, "total_steps": 204665, "loss": 0.0, "lr": 2.5973183138196785e-07, "epoch": 3.9438839078494126, "percentage": 78.88, "elapsed_time": "3:28:36", "remaining_time": "0:55:51", "throughput": 8693.14, "total_tokens": 108803344} +{"current_steps": 161440, "total_steps": 204665, "loss": 0.0003, "lr": 2.5967450094482657e-07, "epoch": 3.9440060586812598, "percentage": 78.88, "elapsed_time": "3:28:36", "remaining_time": "0:55:51", "throughput": 8693.15, "total_tokens": 108806480} +{"current_steps": 161445, "total_steps": 204665, "loss": 0.0, "lr": 2.596171758915312e-07, "epoch": 3.944128209513107, "percentage": 78.88, "elapsed_time": "3:28:36", "remaining_time": "0:55:50", "throughput": 8693.21, "total_tokens": 108810320} +{"current_steps": 161450, "total_steps": 204665, "loss": 0.0, "lr": 2.595598562224991e-07, "epoch": 3.944250360344954, "percentage": 78.89, "elapsed_time": "3:28:37", "remaining_time": "0:55:50", "throughput": 8693.24, "total_tokens": 108813712} +{"current_steps": 161455, "total_steps": 204665, "loss": 0.0001, "lr": 2.5950254193814655e-07, "epoch": 3.944372511176801, "percentage": 78.89, "elapsed_time": "3:28:37", "remaining_time": "0:55:50", "throughput": 8693.24, "total_tokens": 108816656} +{"current_steps": 161460, "total_steps": 204665, "loss": 0.0001, "lr": 2.5944523303889065e-07, "epoch": 3.9444946620086485, "percentage": 78.89, "elapsed_time": "3:28:37", "remaining_time": "0:55:49", "throughput": 8693.29, "total_tokens": 108820304} +{"current_steps": 161465, "total_steps": 204665, "loss": 0.0, "lr": 2.593879295251485e-07, "epoch": 3.9446168128404953, "percentage": 78.89, "elapsed_time": "3:28:38", "remaining_time": "0:55:49", "throughput": 8693.3, "total_tokens": 108823440} +{"current_steps": 161470, "total_steps": 204665, "loss": 0.0, "lr": 2.5933063139733637e-07, "epoch": 3.9447389636723424, "percentage": 78.89, "elapsed_time": "3:28:38", "remaining_time": "0:55:48", "throughput": 8693.32, "total_tokens": 108826704} +{"current_steps": 161475, "total_steps": 204665, "loss": 0.0, "lr": 2.592733386558713e-07, "epoch": 3.9448611145041896, "percentage": 78.9, "elapsed_time": "3:28:38", "remaining_time": "0:55:48", "throughput": 8693.4, "total_tokens": 108830864} +{"current_steps": 161480, "total_steps": 204665, "loss": 0.0, "lr": 2.5921605130116954e-07, "epoch": 3.944983265336037, "percentage": 78.9, "elapsed_time": "3:28:39", "remaining_time": "0:55:48", "throughput": 8693.43, "total_tokens": 108834320} +{"current_steps": 161485, "total_steps": 204665, "loss": 0.0, "lr": 2.591587693336481e-07, "epoch": 3.945105416167884, "percentage": 78.9, "elapsed_time": "3:28:39", "remaining_time": "0:55:47", "throughput": 8693.46, "total_tokens": 108837712} +{"current_steps": 161490, "total_steps": 204665, "loss": 0.0, "lr": 2.5910149275372305e-07, "epoch": 3.945227566999731, "percentage": 78.9, "elapsed_time": "3:28:39", "remaining_time": "0:55:47", "throughput": 8693.48, "total_tokens": 108840976} +{"current_steps": 161495, "total_steps": 204665, "loss": 0.0, "lr": 2.5904422156181126e-07, "epoch": 3.9453497178315784, "percentage": 78.91, "elapsed_time": "3:28:40", "remaining_time": "0:55:46", "throughput": 8693.5, "total_tokens": 108844176} +{"current_steps": 161500, "total_steps": 204665, "loss": 0.0, "lr": 2.589869557583294e-07, "epoch": 3.9454718686634256, "percentage": 78.91, "elapsed_time": "3:28:40", "remaining_time": "0:55:46", "throughput": 8693.52, "total_tokens": 108847504} +{"current_steps": 161505, "total_steps": 204665, "loss": 0.0, "lr": 2.589296953436938e-07, "epoch": 3.9455940194952728, "percentage": 78.91, "elapsed_time": "3:28:40", "remaining_time": "0:55:46", "throughput": 8693.53, "total_tokens": 108850576} +{"current_steps": 161510, "total_steps": 204665, "loss": 0.0, "lr": 2.5887244031832043e-07, "epoch": 3.94571617032712, "percentage": 78.91, "elapsed_time": "3:28:41", "remaining_time": "0:55:45", "throughput": 8693.56, "total_tokens": 108854032} +{"current_steps": 161515, "total_steps": 204665, "loss": 0.0, "lr": 2.5881519068262635e-07, "epoch": 3.945838321158967, "percentage": 78.92, "elapsed_time": "3:28:41", "remaining_time": "0:55:45", "throughput": 8693.57, "total_tokens": 108857104} +{"current_steps": 161520, "total_steps": 204665, "loss": 0.0001, "lr": 2.587579464370273e-07, "epoch": 3.9459604719908143, "percentage": 78.92, "elapsed_time": "3:28:41", "remaining_time": "0:55:44", "throughput": 8693.6, "total_tokens": 108860560} +{"current_steps": 161525, "total_steps": 204665, "loss": 0.0, "lr": 2.587007075819401e-07, "epoch": 3.9460826228226615, "percentage": 78.92, "elapsed_time": "3:28:42", "remaining_time": "0:55:44", "throughput": 8693.64, "total_tokens": 108864080} +{"current_steps": 161530, "total_steps": 204665, "loss": 0.0, "lr": 2.586434741177804e-07, "epoch": 3.9462047736545087, "percentage": 78.92, "elapsed_time": "3:28:42", "remaining_time": "0:55:44", "throughput": 8693.66, "total_tokens": 108867344} +{"current_steps": 161535, "total_steps": 204665, "loss": 0.0, "lr": 2.5858624604496504e-07, "epoch": 3.946326924486356, "percentage": 78.93, "elapsed_time": "3:28:42", "remaining_time": "0:55:43", "throughput": 8693.68, "total_tokens": 108870544} +{"current_steps": 161540, "total_steps": 204665, "loss": 0.0001, "lr": 2.585290233639097e-07, "epoch": 3.9464490753182027, "percentage": 78.93, "elapsed_time": "3:28:43", "remaining_time": "0:55:43", "throughput": 8693.71, "total_tokens": 108873936} +{"current_steps": 161545, "total_steps": 204665, "loss": 0.0, "lr": 2.584718060750309e-07, "epoch": 3.9465712261500503, "percentage": 78.93, "elapsed_time": "3:28:43", "remaining_time": "0:55:42", "throughput": 8693.71, "total_tokens": 108876944} +{"current_steps": 161550, "total_steps": 204665, "loss": 0.0153, "lr": 2.584145941787444e-07, "epoch": 3.946693376981897, "percentage": 78.93, "elapsed_time": "3:28:43", "remaining_time": "0:55:42", "throughput": 8693.73, "total_tokens": 108880144} +{"current_steps": 161555, "total_steps": 204665, "loss": 0.0, "lr": 2.5835738767546647e-07, "epoch": 3.9468155278137447, "percentage": 78.94, "elapsed_time": "3:28:44", "remaining_time": "0:55:42", "throughput": 8693.77, "total_tokens": 108883664} +{"current_steps": 161560, "total_steps": 204665, "loss": 0.0, "lr": 2.5830018656561325e-07, "epoch": 3.9469376786455914, "percentage": 78.94, "elapsed_time": "3:28:44", "remaining_time": "0:55:41", "throughput": 8693.79, "total_tokens": 108886992} +{"current_steps": 161565, "total_steps": 204665, "loss": 0.0, "lr": 2.582429908496003e-07, "epoch": 3.9470598294774386, "percentage": 78.94, "elapsed_time": "3:28:45", "remaining_time": "0:55:41", "throughput": 8693.86, "total_tokens": 108891024} +{"current_steps": 161570, "total_steps": 204665, "loss": 0.0, "lr": 2.581858005278442e-07, "epoch": 3.947181980309286, "percentage": 78.94, "elapsed_time": "3:28:45", "remaining_time": "0:55:40", "throughput": 8693.91, "total_tokens": 108894736} +{"current_steps": 161575, "total_steps": 204665, "loss": 0.0, "lr": 2.581286156007602e-07, "epoch": 3.947304131141133, "percentage": 78.95, "elapsed_time": "3:28:45", "remaining_time": "0:55:40", "throughput": 8693.94, "total_tokens": 108898192} +{"current_steps": 161580, "total_steps": 204665, "loss": 0.0, "lr": 2.5807143606876436e-07, "epoch": 3.94742628197298, "percentage": 78.95, "elapsed_time": "3:28:46", "remaining_time": "0:55:40", "throughput": 8694.0, "total_tokens": 108902032} +{"current_steps": 161585, "total_steps": 204665, "loss": 0.0002, "lr": 2.5801426193227296e-07, "epoch": 3.9475484328048274, "percentage": 78.95, "elapsed_time": "3:28:46", "remaining_time": "0:55:39", "throughput": 8694.04, "total_tokens": 108905552} +{"current_steps": 161590, "total_steps": 204665, "loss": 0.0, "lr": 2.5795709319170114e-07, "epoch": 3.9476705836366746, "percentage": 78.95, "elapsed_time": "3:28:46", "remaining_time": "0:55:39", "throughput": 8694.04, "total_tokens": 108908560} +{"current_steps": 161595, "total_steps": 204665, "loss": 0.0875, "lr": 2.578999298474651e-07, "epoch": 3.9477927344685217, "percentage": 78.96, "elapsed_time": "3:28:47", "remaining_time": "0:55:38", "throughput": 8694.04, "total_tokens": 108911440} +{"current_steps": 161600, "total_steps": 204665, "loss": 0.0001, "lr": 2.5784277189998016e-07, "epoch": 3.947914885300369, "percentage": 78.96, "elapsed_time": "3:28:47", "remaining_time": "0:55:38", "throughput": 8694.08, "total_tokens": 108914960} +{"current_steps": 161605, "total_steps": 204665, "loss": 0.0019, "lr": 2.577856193496625e-07, "epoch": 3.948037036132216, "percentage": 78.96, "elapsed_time": "3:28:47", "remaining_time": "0:55:38", "throughput": 8694.1, "total_tokens": 108918288} +{"current_steps": 161610, "total_steps": 204665, "loss": 0.0, "lr": 2.577284721969274e-07, "epoch": 3.9481591869640633, "percentage": 78.96, "elapsed_time": "3:28:48", "remaining_time": "0:55:37", "throughput": 8694.13, "total_tokens": 108921808} +{"current_steps": 161615, "total_steps": 204665, "loss": 0.0001, "lr": 2.576713304421902e-07, "epoch": 3.9482813377959105, "percentage": 78.97, "elapsed_time": "3:28:48", "remaining_time": "0:55:37", "throughput": 8694.14, "total_tokens": 108924752} +{"current_steps": 161620, "total_steps": 204665, "loss": 0.0001, "lr": 2.57614194085867e-07, "epoch": 3.9484034886277577, "percentage": 78.97, "elapsed_time": "3:28:48", "remaining_time": "0:55:36", "throughput": 8694.17, "total_tokens": 108928208} +{"current_steps": 161625, "total_steps": 204665, "loss": 0.0, "lr": 2.575570631283729e-07, "epoch": 3.948525639459605, "percentage": 78.97, "elapsed_time": "3:28:49", "remaining_time": "0:55:36", "throughput": 8694.19, "total_tokens": 108931472} +{"current_steps": 161630, "total_steps": 204665, "loss": 0.0546, "lr": 2.574999375701238e-07, "epoch": 3.948647790291452, "percentage": 78.97, "elapsed_time": "3:28:49", "remaining_time": "0:55:36", "throughput": 8694.21, "total_tokens": 108934736} +{"current_steps": 161635, "total_steps": 204665, "loss": 0.0978, "lr": 2.574428174115345e-07, "epoch": 3.948769941123299, "percentage": 78.98, "elapsed_time": "3:28:49", "remaining_time": "0:55:35", "throughput": 8694.24, "total_tokens": 108938128} +{"current_steps": 161640, "total_steps": 204665, "loss": 0.0001, "lr": 2.573857026530211e-07, "epoch": 3.9488920919551465, "percentage": 78.98, "elapsed_time": "3:28:50", "remaining_time": "0:55:35", "throughput": 8694.28, "total_tokens": 108941712} +{"current_steps": 161645, "total_steps": 204665, "loss": 0.0, "lr": 2.5732859329499825e-07, "epoch": 3.949014242786993, "percentage": 78.98, "elapsed_time": "3:28:50", "remaining_time": "0:55:34", "throughput": 8694.36, "total_tokens": 108945808} +{"current_steps": 161650, "total_steps": 204665, "loss": 0.0, "lr": 2.572714893378817e-07, "epoch": 3.9491363936188404, "percentage": 78.98, "elapsed_time": "3:28:51", "remaining_time": "0:55:34", "throughput": 8694.45, "total_tokens": 108950160} +{"current_steps": 161655, "total_steps": 204665, "loss": 0.0288, "lr": 2.5721439078208686e-07, "epoch": 3.9492585444506876, "percentage": 78.99, "elapsed_time": "3:28:51", "remaining_time": "0:55:34", "throughput": 8694.49, "total_tokens": 108953744} +{"current_steps": 161660, "total_steps": 204665, "loss": 0.075, "lr": 2.571572976280285e-07, "epoch": 3.9493806952825348, "percentage": 78.99, "elapsed_time": "3:28:51", "remaining_time": "0:55:33", "throughput": 8694.49, "total_tokens": 108956752} +{"current_steps": 161665, "total_steps": 204665, "loss": 0.0466, "lr": 2.5710020987612234e-07, "epoch": 3.949502846114382, "percentage": 78.99, "elapsed_time": "3:28:52", "remaining_time": "0:55:33", "throughput": 8694.5, "total_tokens": 108959760} +{"current_steps": 161670, "total_steps": 204665, "loss": 0.0, "lr": 2.57043127526783e-07, "epoch": 3.949624996946229, "percentage": 78.99, "elapsed_time": "3:28:52", "remaining_time": "0:55:32", "throughput": 8694.52, "total_tokens": 108963088} +{"current_steps": 161675, "total_steps": 204665, "loss": 0.0002, "lr": 2.569860505804259e-07, "epoch": 3.9497471477780763, "percentage": 78.99, "elapsed_time": "3:28:52", "remaining_time": "0:55:32", "throughput": 8694.55, "total_tokens": 108966544} +{"current_steps": 161680, "total_steps": 204665, "loss": 0.0003, "lr": 2.5692897903746635e-07, "epoch": 3.9498692986099235, "percentage": 79.0, "elapsed_time": "3:28:53", "remaining_time": "0:55:32", "throughput": 8694.56, "total_tokens": 108969552} +{"current_steps": 161685, "total_steps": 204665, "loss": 0.0001, "lr": 2.568719128983189e-07, "epoch": 3.9499914494417707, "percentage": 79.0, "elapsed_time": "3:28:53", "remaining_time": "0:55:31", "throughput": 8694.59, "total_tokens": 108973008} +{"current_steps": 161690, "total_steps": 204665, "loss": 0.0, "lr": 2.5681485216339907e-07, "epoch": 3.950113600273618, "percentage": 79.0, "elapsed_time": "3:28:53", "remaining_time": "0:55:31", "throughput": 8694.61, "total_tokens": 108976208} +{"current_steps": 161695, "total_steps": 204665, "loss": 0.0001, "lr": 2.5675779683312115e-07, "epoch": 3.950235751105465, "percentage": 79.0, "elapsed_time": "3:28:54", "remaining_time": "0:55:30", "throughput": 8694.63, "total_tokens": 108979472} +{"current_steps": 161700, "total_steps": 204665, "loss": 0.0, "lr": 2.5670074690790065e-07, "epoch": 3.9503579019373123, "percentage": 79.01, "elapsed_time": "3:28:54", "remaining_time": "0:55:30", "throughput": 8694.65, "total_tokens": 108982800} +{"current_steps": 161705, "total_steps": 204665, "loss": 0.0, "lr": 2.5664370238815214e-07, "epoch": 3.9504800527691595, "percentage": 79.01, "elapsed_time": "3:28:54", "remaining_time": "0:55:30", "throughput": 8694.68, "total_tokens": 108986192} +{"current_steps": 161710, "total_steps": 204665, "loss": 0.0, "lr": 2.565866632742908e-07, "epoch": 3.9506022036010067, "percentage": 79.01, "elapsed_time": "3:28:55", "remaining_time": "0:55:29", "throughput": 8694.68, "total_tokens": 108989072} +{"current_steps": 161715, "total_steps": 204665, "loss": 0.0, "lr": 2.5652962956673086e-07, "epoch": 3.950724354432854, "percentage": 79.01, "elapsed_time": "3:28:55", "remaining_time": "0:55:29", "throughput": 8694.72, "total_tokens": 108992656} +{"current_steps": 161720, "total_steps": 204665, "loss": 0.0001, "lr": 2.5647260126588775e-07, "epoch": 3.9508465052647006, "percentage": 79.02, "elapsed_time": "3:28:55", "remaining_time": "0:55:28", "throughput": 8694.71, "total_tokens": 108995536} +{"current_steps": 161725, "total_steps": 204665, "loss": 0.0, "lr": 2.5641557837217586e-07, "epoch": 3.9509686560965482, "percentage": 79.02, "elapsed_time": "3:28:56", "remaining_time": "0:55:28", "throughput": 8694.77, "total_tokens": 108999312} +{"current_steps": 161730, "total_steps": 204665, "loss": 0.1403, "lr": 2.563585608860096e-07, "epoch": 3.951090806928395, "percentage": 79.02, "elapsed_time": "3:28:56", "remaining_time": "0:55:28", "throughput": 8694.79, "total_tokens": 109002576} +{"current_steps": 161735, "total_steps": 204665, "loss": 0.0348, "lr": 2.563015488078039e-07, "epoch": 3.9512129577602426, "percentage": 79.02, "elapsed_time": "3:28:56", "remaining_time": "0:55:27", "throughput": 8694.81, "total_tokens": 109005776} +{"current_steps": 161740, "total_steps": 204665, "loss": 0.0001, "lr": 2.5624454213797366e-07, "epoch": 3.9513351085920894, "percentage": 79.03, "elapsed_time": "3:28:57", "remaining_time": "0:55:27", "throughput": 8694.82, "total_tokens": 109008912} +{"current_steps": 161745, "total_steps": 204665, "loss": 0.0, "lr": 2.5618754087693283e-07, "epoch": 3.9514572594239366, "percentage": 79.03, "elapsed_time": "3:28:57", "remaining_time": "0:55:26", "throughput": 8694.84, "total_tokens": 109012240} +{"current_steps": 161750, "total_steps": 204665, "loss": 0.0001, "lr": 2.5613054502509655e-07, "epoch": 3.9515794102557837, "percentage": 79.03, "elapsed_time": "3:28:57", "remaining_time": "0:55:26", "throughput": 8694.86, "total_tokens": 109015504} +{"current_steps": 161755, "total_steps": 204665, "loss": 0.0, "lr": 2.560735545828787e-07, "epoch": 3.951701561087631, "percentage": 79.03, "elapsed_time": "3:28:58", "remaining_time": "0:55:26", "throughput": 8694.88, "total_tokens": 109018704} +{"current_steps": 161760, "total_steps": 204665, "loss": 0.0002, "lr": 2.560165695506945e-07, "epoch": 3.951823711919478, "percentage": 79.04, "elapsed_time": "3:28:58", "remaining_time": "0:55:25", "throughput": 8694.92, "total_tokens": 109022352} +{"current_steps": 161765, "total_steps": 204665, "loss": 0.0005, "lr": 2.559595899289575e-07, "epoch": 3.9519458627513253, "percentage": 79.04, "elapsed_time": "3:28:58", "remaining_time": "0:55:25", "throughput": 8694.98, "total_tokens": 109026192} +{"current_steps": 161770, "total_steps": 204665, "loss": 0.0, "lr": 2.5590261571808247e-07, "epoch": 3.9520680135831725, "percentage": 79.04, "elapsed_time": "3:28:59", "remaining_time": "0:55:24", "throughput": 8695.01, "total_tokens": 109029520} +{"current_steps": 161775, "total_steps": 204665, "loss": 0.0542, "lr": 2.558456469184841e-07, "epoch": 3.9521901644150197, "percentage": 79.04, "elapsed_time": "3:28:59", "remaining_time": "0:55:24", "throughput": 8695.05, "total_tokens": 109033168} +{"current_steps": 161780, "total_steps": 204665, "loss": 0.0, "lr": 2.55788683530576e-07, "epoch": 3.952312315246867, "percentage": 79.05, "elapsed_time": "3:29:00", "remaining_time": "0:55:24", "throughput": 8695.12, "total_tokens": 109037200} +{"current_steps": 161785, "total_steps": 204665, "loss": 0.0392, "lr": 2.5573172555477316e-07, "epoch": 3.952434466078714, "percentage": 79.05, "elapsed_time": "3:29:00", "remaining_time": "0:55:23", "throughput": 8695.14, "total_tokens": 109040400} +{"current_steps": 161790, "total_steps": 204665, "loss": 0.0001, "lr": 2.55674772991489e-07, "epoch": 3.9525566169105613, "percentage": 79.05, "elapsed_time": "3:29:00", "remaining_time": "0:55:23", "throughput": 8695.16, "total_tokens": 109043664} +{"current_steps": 161795, "total_steps": 204665, "loss": 0.0, "lr": 2.5561782584113845e-07, "epoch": 3.9526787677424084, "percentage": 79.05, "elapsed_time": "3:29:01", "remaining_time": "0:55:22", "throughput": 8695.18, "total_tokens": 109046928} +{"current_steps": 161800, "total_steps": 204665, "loss": 0.0001, "lr": 2.55560884104135e-07, "epoch": 3.9528009185742556, "percentage": 79.06, "elapsed_time": "3:29:01", "remaining_time": "0:55:22", "throughput": 8695.17, "total_tokens": 109049744} +{"current_steps": 161805, "total_steps": 204665, "loss": 0.0, "lr": 2.555039477808929e-07, "epoch": 3.9529230694061024, "percentage": 79.06, "elapsed_time": "3:29:01", "remaining_time": "0:55:22", "throughput": 8695.21, "total_tokens": 109053264} +{"current_steps": 161810, "total_steps": 204665, "loss": 0.0, "lr": 2.5544701687182677e-07, "epoch": 3.95304522023795, "percentage": 79.06, "elapsed_time": "3:29:02", "remaining_time": "0:55:21", "throughput": 8695.21, "total_tokens": 109056272} +{"current_steps": 161815, "total_steps": 204665, "loss": 0.0, "lr": 2.5539009137735013e-07, "epoch": 3.9531673710697968, "percentage": 79.06, "elapsed_time": "3:29:02", "remaining_time": "0:55:21", "throughput": 8695.23, "total_tokens": 109059536} +{"current_steps": 161820, "total_steps": 204665, "loss": 0.0003, "lr": 2.553331712978768e-07, "epoch": 3.9532895219016444, "percentage": 79.07, "elapsed_time": "3:29:02", "remaining_time": "0:55:20", "throughput": 8695.25, "total_tokens": 109062800} +{"current_steps": 161825, "total_steps": 204665, "loss": 0.0, "lr": 2.552762566338211e-07, "epoch": 3.953411672733491, "percentage": 79.07, "elapsed_time": "3:29:03", "remaining_time": "0:55:20", "throughput": 8695.27, "total_tokens": 109066128} +{"current_steps": 161830, "total_steps": 204665, "loss": 0.0, "lr": 2.552193473855966e-07, "epoch": 3.9535338235653383, "percentage": 79.07, "elapsed_time": "3:29:03", "remaining_time": "0:55:20", "throughput": 8695.32, "total_tokens": 109069840} +{"current_steps": 161835, "total_steps": 204665, "loss": 0.0, "lr": 2.551624435536176e-07, "epoch": 3.9536559743971855, "percentage": 79.07, "elapsed_time": "3:29:03", "remaining_time": "0:55:19", "throughput": 8695.35, "total_tokens": 109073168} +{"current_steps": 161840, "total_steps": 204665, "loss": 0.0184, "lr": 2.551055451382973e-07, "epoch": 3.9537781252290327, "percentage": 79.08, "elapsed_time": "3:29:04", "remaining_time": "0:55:19", "throughput": 8695.36, "total_tokens": 109076240} +{"current_steps": 161845, "total_steps": 204665, "loss": 0.0, "lr": 2.550486521400501e-07, "epoch": 3.95390027606088, "percentage": 79.08, "elapsed_time": "3:29:04", "remaining_time": "0:55:18", "throughput": 8695.41, "total_tokens": 109080016} +{"current_steps": 161850, "total_steps": 204665, "loss": 0.0005, "lr": 2.5499176455928927e-07, "epoch": 3.954022426892727, "percentage": 79.08, "elapsed_time": "3:29:04", "remaining_time": "0:55:18", "throughput": 8695.45, "total_tokens": 109083536} +{"current_steps": 161855, "total_steps": 204665, "loss": 0.0354, "lr": 2.5493488239642904e-07, "epoch": 3.9541445777245743, "percentage": 79.08, "elapsed_time": "3:29:05", "remaining_time": "0:55:18", "throughput": 8695.5, "total_tokens": 109087184} +{"current_steps": 161860, "total_steps": 204665, "loss": 0.0001, "lr": 2.5487800565188236e-07, "epoch": 3.9542667285564215, "percentage": 79.09, "elapsed_time": "3:29:05", "remaining_time": "0:55:17", "throughput": 8695.53, "total_tokens": 109090640} +{"current_steps": 161865, "total_steps": 204665, "loss": 0.0554, "lr": 2.548211343260632e-07, "epoch": 3.9543888793882687, "percentage": 79.09, "elapsed_time": "3:29:05", "remaining_time": "0:55:17", "throughput": 8695.54, "total_tokens": 109093776} +{"current_steps": 161870, "total_steps": 204665, "loss": 0.0, "lr": 2.5476426841938545e-07, "epoch": 3.954511030220116, "percentage": 79.09, "elapsed_time": "3:29:06", "remaining_time": "0:55:16", "throughput": 8695.56, "total_tokens": 109096976} +{"current_steps": 161875, "total_steps": 204665, "loss": 0.0, "lr": 2.547074079322622e-07, "epoch": 3.954633181051963, "percentage": 79.09, "elapsed_time": "3:29:06", "remaining_time": "0:55:16", "throughput": 8695.6, "total_tokens": 109100560} +{"current_steps": 161880, "total_steps": 204665, "loss": 0.0, "lr": 2.5465055286510737e-07, "epoch": 3.9547553318838102, "percentage": 79.1, "elapsed_time": "3:29:06", "remaining_time": "0:55:16", "throughput": 8695.62, "total_tokens": 109103888} +{"current_steps": 161885, "total_steps": 204665, "loss": 0.0, "lr": 2.5459370321833396e-07, "epoch": 3.9548774827156574, "percentage": 79.1, "elapsed_time": "3:29:07", "remaining_time": "0:55:15", "throughput": 8695.66, "total_tokens": 109107344} +{"current_steps": 161890, "total_steps": 204665, "loss": 0.0, "lr": 2.545368589923559e-07, "epoch": 3.9549996335475046, "percentage": 79.1, "elapsed_time": "3:29:07", "remaining_time": "0:55:15", "throughput": 8695.68, "total_tokens": 109110544} +{"current_steps": 161895, "total_steps": 204665, "loss": 0.0, "lr": 2.54480020187586e-07, "epoch": 3.955121784379352, "percentage": 79.1, "elapsed_time": "3:29:08", "remaining_time": "0:55:14", "throughput": 8695.69, "total_tokens": 109113680} +{"current_steps": 161900, "total_steps": 204665, "loss": 0.0, "lr": 2.54423186804438e-07, "epoch": 3.9552439352111985, "percentage": 79.1, "elapsed_time": "3:29:08", "remaining_time": "0:55:14", "throughput": 8695.71, "total_tokens": 109117008} +{"current_steps": 161905, "total_steps": 204665, "loss": 0.0, "lr": 2.5436635884332526e-07, "epoch": 3.955366086043046, "percentage": 79.11, "elapsed_time": "3:29:08", "remaining_time": "0:55:14", "throughput": 8695.74, "total_tokens": 109120336} +{"current_steps": 161910, "total_steps": 204665, "loss": 0.0, "lr": 2.5430953630466067e-07, "epoch": 3.955488236874893, "percentage": 79.11, "elapsed_time": "3:29:09", "remaining_time": "0:55:13", "throughput": 8695.77, "total_tokens": 109123792} +{"current_steps": 161915, "total_steps": 204665, "loss": 0.0, "lr": 2.54252719188858e-07, "epoch": 3.95561038770674, "percentage": 79.11, "elapsed_time": "3:29:09", "remaining_time": "0:55:13", "throughput": 8695.83, "total_tokens": 109127568} +{"current_steps": 161920, "total_steps": 204665, "loss": 0.0527, "lr": 2.5419590749633014e-07, "epoch": 3.9557325385385873, "percentage": 79.11, "elapsed_time": "3:29:09", "remaining_time": "0:55:12", "throughput": 8695.86, "total_tokens": 109130960} +{"current_steps": 161925, "total_steps": 204665, "loss": 0.0, "lr": 2.5413910122748996e-07, "epoch": 3.9558546893704345, "percentage": 79.12, "elapsed_time": "3:29:10", "remaining_time": "0:55:12", "throughput": 8695.88, "total_tokens": 109134288} +{"current_steps": 161930, "total_steps": 204665, "loss": 0.0, "lr": 2.5408230038275115e-07, "epoch": 3.9559768402022817, "percentage": 79.12, "elapsed_time": "3:29:10", "remaining_time": "0:55:12", "throughput": 8695.9, "total_tokens": 109137552} +{"current_steps": 161935, "total_steps": 204665, "loss": 0.0, "lr": 2.5402550496252616e-07, "epoch": 3.956098991034129, "percentage": 79.12, "elapsed_time": "3:29:10", "remaining_time": "0:55:11", "throughput": 8695.94, "total_tokens": 109141200} +{"current_steps": 161940, "total_steps": 204665, "loss": 0.0843, "lr": 2.539687149672287e-07, "epoch": 3.956221141865976, "percentage": 79.12, "elapsed_time": "3:29:11", "remaining_time": "0:55:11", "throughput": 8695.95, "total_tokens": 109144272} +{"current_steps": 161945, "total_steps": 204665, "loss": 0.0, "lr": 2.53911930397271e-07, "epoch": 3.9563432926978233, "percentage": 79.13, "elapsed_time": "3:29:11", "remaining_time": "0:55:11", "throughput": 8695.98, "total_tokens": 109147664} +{"current_steps": 161950, "total_steps": 204665, "loss": 0.0, "lr": 2.538551512530668e-07, "epoch": 3.9564654435296704, "percentage": 79.13, "elapsed_time": "3:29:11", "remaining_time": "0:55:10", "throughput": 8696.01, "total_tokens": 109151056} +{"current_steps": 161955, "total_steps": 204665, "loss": 0.0001, "lr": 2.537983775350283e-07, "epoch": 3.9565875943615176, "percentage": 79.13, "elapsed_time": "3:29:12", "remaining_time": "0:55:10", "throughput": 8696.04, "total_tokens": 109154576} +{"current_steps": 161960, "total_steps": 204665, "loss": 0.0002, "lr": 2.5374160924356867e-07, "epoch": 3.956709745193365, "percentage": 79.13, "elapsed_time": "3:29:12", "remaining_time": "0:55:09", "throughput": 8696.07, "total_tokens": 109157968} +{"current_steps": 161965, "total_steps": 204665, "loss": 0.0, "lr": 2.5368484637910117e-07, "epoch": 3.956831896025212, "percentage": 79.14, "elapsed_time": "3:29:12", "remaining_time": "0:55:09", "throughput": 8696.1, "total_tokens": 109161360} +{"current_steps": 161970, "total_steps": 204665, "loss": 0.0, "lr": 2.536280889420378e-07, "epoch": 3.956954046857059, "percentage": 79.14, "elapsed_time": "3:29:13", "remaining_time": "0:55:09", "throughput": 8696.14, "total_tokens": 109164944} +{"current_steps": 161975, "total_steps": 204665, "loss": 0.0, "lr": 2.535713369327921e-07, "epoch": 3.9570761976889064, "percentage": 79.14, "elapsed_time": "3:29:13", "remaining_time": "0:55:08", "throughput": 8696.21, "total_tokens": 109168912} +{"current_steps": 161980, "total_steps": 204665, "loss": 0.0224, "lr": 2.5351459035177604e-07, "epoch": 3.9571983485207536, "percentage": 79.14, "elapsed_time": "3:29:13", "remaining_time": "0:55:08", "throughput": 8696.22, "total_tokens": 109172048} +{"current_steps": 161985, "total_steps": 204665, "loss": 0.0, "lr": 2.534578491994026e-07, "epoch": 3.9573204993526003, "percentage": 79.15, "elapsed_time": "3:29:14", "remaining_time": "0:55:07", "throughput": 8696.23, "total_tokens": 109175184} +{"current_steps": 161990, "total_steps": 204665, "loss": 0.0001, "lr": 2.534011134760848e-07, "epoch": 3.957442650184448, "percentage": 79.15, "elapsed_time": "3:29:14", "remaining_time": "0:55:07", "throughput": 8696.25, "total_tokens": 109178384} +{"current_steps": 161995, "total_steps": 204665, "loss": 0.0, "lr": 2.533443831822347e-07, "epoch": 3.9575648010162947, "percentage": 79.15, "elapsed_time": "3:29:14", "remaining_time": "0:55:07", "throughput": 8696.27, "total_tokens": 109181648} +{"current_steps": 162000, "total_steps": 204665, "loss": 0.0, "lr": 2.5328765831826537e-07, "epoch": 3.9576869518481423, "percentage": 79.15, "elapsed_time": "3:29:15", "remaining_time": "0:55:06", "throughput": 8696.3, "total_tokens": 109185040} +{"current_steps": 162005, "total_steps": 204665, "loss": 0.0, "lr": 2.532309388845887e-07, "epoch": 3.957809102679989, "percentage": 79.16, "elapsed_time": "3:29:15", "remaining_time": "0:55:06", "throughput": 8696.35, "total_tokens": 109188752} +{"current_steps": 162010, "total_steps": 204665, "loss": 0.0, "lr": 2.531742248816178e-07, "epoch": 3.9579312535118363, "percentage": 79.16, "elapsed_time": "3:29:16", "remaining_time": "0:55:05", "throughput": 8696.41, "total_tokens": 109192656} +{"current_steps": 162015, "total_steps": 204665, "loss": 0.0001, "lr": 2.531175163097645e-07, "epoch": 3.9580534043436835, "percentage": 79.16, "elapsed_time": "3:29:16", "remaining_time": "0:55:05", "throughput": 8696.44, "total_tokens": 109196048} +{"current_steps": 162020, "total_steps": 204665, "loss": 0.0, "lr": 2.5306081316944185e-07, "epoch": 3.9581755551755307, "percentage": 79.16, "elapsed_time": "3:29:16", "remaining_time": "0:55:05", "throughput": 8696.45, "total_tokens": 109199120} +{"current_steps": 162025, "total_steps": 204665, "loss": 0.0, "lr": 2.530041154610615e-07, "epoch": 3.958297706007378, "percentage": 79.17, "elapsed_time": "3:29:17", "remaining_time": "0:55:04", "throughput": 8696.48, "total_tokens": 109202448} +{"current_steps": 162030, "total_steps": 204665, "loss": 0.0, "lr": 2.529474231850365e-07, "epoch": 3.958419856839225, "percentage": 79.17, "elapsed_time": "3:29:17", "remaining_time": "0:55:04", "throughput": 8696.48, "total_tokens": 109205456} +{"current_steps": 162035, "total_steps": 204665, "loss": 0.0237, "lr": 2.528907363417787e-07, "epoch": 3.9585420076710722, "percentage": 79.17, "elapsed_time": "3:29:17", "remaining_time": "0:55:03", "throughput": 8696.47, "total_tokens": 109208208} +{"current_steps": 162040, "total_steps": 204665, "loss": 0.0, "lr": 2.528340549317002e-07, "epoch": 3.9586641585029194, "percentage": 79.17, "elapsed_time": "3:29:18", "remaining_time": "0:55:03", "throughput": 8696.51, "total_tokens": 109211792} +{"current_steps": 162045, "total_steps": 204665, "loss": 0.0001, "lr": 2.5277737895521365e-07, "epoch": 3.9587863093347666, "percentage": 79.18, "elapsed_time": "3:29:18", "remaining_time": "0:55:03", "throughput": 8696.52, "total_tokens": 109214992} +{"current_steps": 162050, "total_steps": 204665, "loss": 0.0, "lr": 2.5272070841273076e-07, "epoch": 3.958908460166614, "percentage": 79.18, "elapsed_time": "3:29:18", "remaining_time": "0:55:02", "throughput": 8696.54, "total_tokens": 109218192} +{"current_steps": 162055, "total_steps": 204665, "loss": 0.0, "lr": 2.526640433046638e-07, "epoch": 3.959030610998461, "percentage": 79.18, "elapsed_time": "3:29:19", "remaining_time": "0:55:02", "throughput": 8696.55, "total_tokens": 109221328} +{"current_steps": 162060, "total_steps": 204665, "loss": 0.0, "lr": 2.526073836314252e-07, "epoch": 3.959152761830308, "percentage": 79.18, "elapsed_time": "3:29:19", "remaining_time": "0:55:01", "throughput": 8696.58, "total_tokens": 109224656} +{"current_steps": 162065, "total_steps": 204665, "loss": 0.0, "lr": 2.525507293934265e-07, "epoch": 3.9592749126621554, "percentage": 79.19, "elapsed_time": "3:29:19", "remaining_time": "0:55:01", "throughput": 8696.6, "total_tokens": 109227920} +{"current_steps": 162070, "total_steps": 204665, "loss": 0.0, "lr": 2.524940805910802e-07, "epoch": 3.9593970634940026, "percentage": 79.19, "elapsed_time": "3:29:20", "remaining_time": "0:55:01", "throughput": 8696.65, "total_tokens": 109231632} +{"current_steps": 162075, "total_steps": 204665, "loss": 0.0, "lr": 2.524374372247977e-07, "epoch": 3.9595192143258497, "percentage": 79.19, "elapsed_time": "3:29:20", "remaining_time": "0:55:00", "throughput": 8696.66, "total_tokens": 109234832} +{"current_steps": 162080, "total_steps": 204665, "loss": 0.0576, "lr": 2.523807992949912e-07, "epoch": 3.9596413651576965, "percentage": 79.19, "elapsed_time": "3:29:20", "remaining_time": "0:55:00", "throughput": 8696.68, "total_tokens": 109237968} +{"current_steps": 162085, "total_steps": 204665, "loss": 0.0001, "lr": 2.52324166802073e-07, "epoch": 3.959763515989544, "percentage": 79.2, "elapsed_time": "3:29:21", "remaining_time": "0:54:59", "throughput": 8696.69, "total_tokens": 109241232} +{"current_steps": 162090, "total_steps": 204665, "loss": 0.0824, "lr": 2.5226753974645423e-07, "epoch": 3.959885666821391, "percentage": 79.2, "elapsed_time": "3:29:21", "remaining_time": "0:54:59", "throughput": 8696.71, "total_tokens": 109244496} +{"current_steps": 162095, "total_steps": 204665, "loss": 0.0, "lr": 2.522109181285473e-07, "epoch": 3.960007817653238, "percentage": 79.2, "elapsed_time": "3:29:21", "remaining_time": "0:54:59", "throughput": 8696.73, "total_tokens": 109247696} +{"current_steps": 162100, "total_steps": 204665, "loss": 0.0, "lr": 2.5215430194876343e-07, "epoch": 3.9601299684850853, "percentage": 79.2, "elapsed_time": "3:29:22", "remaining_time": "0:54:58", "throughput": 8696.77, "total_tokens": 109251152} +{"current_steps": 162105, "total_steps": 204665, "loss": 0.0, "lr": 2.520976912075149e-07, "epoch": 3.9602521193169324, "percentage": 79.21, "elapsed_time": "3:29:22", "remaining_time": "0:54:58", "throughput": 8696.8, "total_tokens": 109254608} +{"current_steps": 162110, "total_steps": 204665, "loss": 0.0003, "lr": 2.5204108590521277e-07, "epoch": 3.9603742701487796, "percentage": 79.21, "elapsed_time": "3:29:22", "remaining_time": "0:54:57", "throughput": 8696.83, "total_tokens": 109258000} +{"current_steps": 162115, "total_steps": 204665, "loss": 0.0513, "lr": 2.519844860422692e-07, "epoch": 3.960496420980627, "percentage": 79.21, "elapsed_time": "3:29:23", "remaining_time": "0:54:57", "throughput": 8696.85, "total_tokens": 109261264} +{"current_steps": 162120, "total_steps": 204665, "loss": 0.0, "lr": 2.519278916190958e-07, "epoch": 3.960618571812474, "percentage": 79.21, "elapsed_time": "3:29:23", "remaining_time": "0:54:57", "throughput": 8696.89, "total_tokens": 109264848} +{"current_steps": 162125, "total_steps": 204665, "loss": 0.0, "lr": 2.51871302636104e-07, "epoch": 3.960740722644321, "percentage": 79.21, "elapsed_time": "3:29:24", "remaining_time": "0:54:56", "throughput": 8696.91, "total_tokens": 109268048} +{"current_steps": 162130, "total_steps": 204665, "loss": 0.0, "lr": 2.51814719093705e-07, "epoch": 3.9608628734761684, "percentage": 79.22, "elapsed_time": "3:29:24", "remaining_time": "0:54:56", "throughput": 8696.94, "total_tokens": 109271504} +{"current_steps": 162135, "total_steps": 204665, "loss": 0.0, "lr": 2.5175814099231096e-07, "epoch": 3.9609850243080156, "percentage": 79.22, "elapsed_time": "3:29:24", "remaining_time": "0:54:55", "throughput": 8696.95, "total_tokens": 109274640} +{"current_steps": 162140, "total_steps": 204665, "loss": 0.0001, "lr": 2.5170156833233256e-07, "epoch": 3.9611071751398628, "percentage": 79.22, "elapsed_time": "3:29:25", "remaining_time": "0:54:55", "throughput": 8696.98, "total_tokens": 109278096} +{"current_steps": 162145, "total_steps": 204665, "loss": 0.0019, "lr": 2.516450011141821e-07, "epoch": 3.96122932597171, "percentage": 79.22, "elapsed_time": "3:29:25", "remaining_time": "0:54:55", "throughput": 8697.05, "total_tokens": 109282000} +{"current_steps": 162150, "total_steps": 204665, "loss": 0.0001, "lr": 2.5158843933827e-07, "epoch": 3.961351476803557, "percentage": 79.23, "elapsed_time": "3:29:25", "remaining_time": "0:54:54", "throughput": 8697.09, "total_tokens": 109285648} +{"current_steps": 162155, "total_steps": 204665, "loss": 0.0001, "lr": 2.515318830050085e-07, "epoch": 3.9614736276354043, "percentage": 79.23, "elapsed_time": "3:29:26", "remaining_time": "0:54:54", "throughput": 8697.12, "total_tokens": 109289040} +{"current_steps": 162160, "total_steps": 204665, "loss": 0.0, "lr": 2.514753321148081e-07, "epoch": 3.9615957784672515, "percentage": 79.23, "elapsed_time": "3:29:26", "remaining_time": "0:54:53", "throughput": 8697.15, "total_tokens": 109292432} +{"current_steps": 162165, "total_steps": 204665, "loss": 0.0, "lr": 2.514187866680807e-07, "epoch": 3.9617179292990983, "percentage": 79.23, "elapsed_time": "3:29:26", "remaining_time": "0:54:53", "throughput": 8697.18, "total_tokens": 109295824} +{"current_steps": 162170, "total_steps": 204665, "loss": 0.0, "lr": 2.5136224666523696e-07, "epoch": 3.961840080130946, "percentage": 79.24, "elapsed_time": "3:29:27", "remaining_time": "0:54:53", "throughput": 8697.18, "total_tokens": 109298832} +{"current_steps": 162175, "total_steps": 204665, "loss": 0.0571, "lr": 2.5130571210668825e-07, "epoch": 3.9619622309627927, "percentage": 79.24, "elapsed_time": "3:29:27", "remaining_time": "0:54:52", "throughput": 8697.2, "total_tokens": 109301968} +{"current_steps": 162180, "total_steps": 204665, "loss": 0.0, "lr": 2.5124918299284615e-07, "epoch": 3.9620843817946403, "percentage": 79.24, "elapsed_time": "3:29:27", "remaining_time": "0:54:52", "throughput": 8697.23, "total_tokens": 109305360} +{"current_steps": 162185, "total_steps": 204665, "loss": 0.0, "lr": 2.5119265932412105e-07, "epoch": 3.962206532626487, "percentage": 79.24, "elapsed_time": "3:29:28", "remaining_time": "0:54:51", "throughput": 8697.24, "total_tokens": 109308624} +{"current_steps": 162190, "total_steps": 204665, "loss": 0.0, "lr": 2.511361411009246e-07, "epoch": 3.962328683458334, "percentage": 79.25, "elapsed_time": "3:29:28", "remaining_time": "0:54:51", "throughput": 8697.26, "total_tokens": 109311824} +{"current_steps": 162195, "total_steps": 204665, "loss": 0.0, "lr": 2.5107962832366735e-07, "epoch": 3.9624508342901814, "percentage": 79.25, "elapsed_time": "3:29:28", "remaining_time": "0:54:51", "throughput": 8697.3, "total_tokens": 109315408} +{"current_steps": 162200, "total_steps": 204665, "loss": 0.0, "lr": 2.510231209927608e-07, "epoch": 3.9625729851220286, "percentage": 79.25, "elapsed_time": "3:29:29", "remaining_time": "0:54:50", "throughput": 8697.33, "total_tokens": 109318864} +{"current_steps": 162205, "total_steps": 204665, "loss": 0.0013, "lr": 2.509666191086152e-07, "epoch": 3.962695135953876, "percentage": 79.25, "elapsed_time": "3:29:29", "remaining_time": "0:54:50", "throughput": 8697.35, "total_tokens": 109322000} +{"current_steps": 162210, "total_steps": 204665, "loss": 0.0598, "lr": 2.509101226716418e-07, "epoch": 3.962817286785723, "percentage": 79.26, "elapsed_time": "3:29:29", "remaining_time": "0:54:49", "throughput": 8697.36, "total_tokens": 109325200} +{"current_steps": 162215, "total_steps": 204665, "loss": 0.0, "lr": 2.5085363168225173e-07, "epoch": 3.96293943761757, "percentage": 79.26, "elapsed_time": "3:29:30", "remaining_time": "0:54:49", "throughput": 8697.43, "total_tokens": 109329168} +{"current_steps": 162220, "total_steps": 204665, "loss": 0.0583, "lr": 2.5079714614085535e-07, "epoch": 3.9630615884494174, "percentage": 79.26, "elapsed_time": "3:29:30", "remaining_time": "0:54:49", "throughput": 8697.48, "total_tokens": 109332880} +{"current_steps": 162225, "total_steps": 204665, "loss": 0.0001, "lr": 2.5074066604786383e-07, "epoch": 3.9631837392812646, "percentage": 79.26, "elapsed_time": "3:29:30", "remaining_time": "0:54:48", "throughput": 8697.48, "total_tokens": 109335952} +{"current_steps": 162230, "total_steps": 204665, "loss": 0.0, "lr": 2.506841914036878e-07, "epoch": 3.9633058901131117, "percentage": 79.27, "elapsed_time": "3:29:31", "remaining_time": "0:54:48", "throughput": 8697.49, "total_tokens": 109338960} +{"current_steps": 162235, "total_steps": 204665, "loss": 0.1063, "lr": 2.506277222087375e-07, "epoch": 3.963428040944959, "percentage": 79.27, "elapsed_time": "3:29:31", "remaining_time": "0:54:47", "throughput": 8697.51, "total_tokens": 109342288} +{"current_steps": 162240, "total_steps": 204665, "loss": 0.0, "lr": 2.505712584634243e-07, "epoch": 3.963550191776806, "percentage": 79.27, "elapsed_time": "3:29:32", "remaining_time": "0:54:47", "throughput": 8697.54, "total_tokens": 109345680} +{"current_steps": 162245, "total_steps": 204665, "loss": 0.0, "lr": 2.505148001681582e-07, "epoch": 3.9636723426086533, "percentage": 79.27, "elapsed_time": "3:29:32", "remaining_time": "0:54:47", "throughput": 8697.61, "total_tokens": 109349648} +{"current_steps": 162250, "total_steps": 204665, "loss": 0.0, "lr": 2.5045834732335024e-07, "epoch": 3.9637944934405, "percentage": 79.28, "elapsed_time": "3:29:32", "remaining_time": "0:54:46", "throughput": 8697.65, "total_tokens": 109353360} +{"current_steps": 162255, "total_steps": 204665, "loss": 0.0456, "lr": 2.5040189992941063e-07, "epoch": 3.9639166442723477, "percentage": 79.28, "elapsed_time": "3:29:33", "remaining_time": "0:54:46", "throughput": 8697.66, "total_tokens": 109356368} +{"current_steps": 162260, "total_steps": 204665, "loss": 0.0, "lr": 2.5034545798675024e-07, "epoch": 3.9640387951041944, "percentage": 79.28, "elapsed_time": "3:29:33", "remaining_time": "0:54:45", "throughput": 8697.67, "total_tokens": 109359504} +{"current_steps": 162265, "total_steps": 204665, "loss": 0.0, "lr": 2.50289021495779e-07, "epoch": 3.964160945936042, "percentage": 79.28, "elapsed_time": "3:29:33", "remaining_time": "0:54:45", "throughput": 8697.7, "total_tokens": 109362960} +{"current_steps": 162270, "total_steps": 204665, "loss": 0.0, "lr": 2.502325904569077e-07, "epoch": 3.964283096767889, "percentage": 79.29, "elapsed_time": "3:29:34", "remaining_time": "0:54:45", "throughput": 8697.72, "total_tokens": 109366160} +{"current_steps": 162275, "total_steps": 204665, "loss": 0.0001, "lr": 2.5017616487054694e-07, "epoch": 3.964405247599736, "percentage": 79.29, "elapsed_time": "3:29:34", "remaining_time": "0:54:44", "throughput": 8697.74, "total_tokens": 109369488} +{"current_steps": 162280, "total_steps": 204665, "loss": 0.0, "lr": 2.501197447371065e-07, "epoch": 3.964527398431583, "percentage": 79.29, "elapsed_time": "3:29:34", "remaining_time": "0:54:44", "throughput": 8697.76, "total_tokens": 109372624} +{"current_steps": 162285, "total_steps": 204665, "loss": 0.0, "lr": 2.5006333005699734e-07, "epoch": 3.9646495492634304, "percentage": 79.29, "elapsed_time": "3:29:35", "remaining_time": "0:54:43", "throughput": 8697.77, "total_tokens": 109375824} +{"current_steps": 162290, "total_steps": 204665, "loss": 0.0849, "lr": 2.5000692083062893e-07, "epoch": 3.9647717000952776, "percentage": 79.3, "elapsed_time": "3:29:35", "remaining_time": "0:54:43", "throughput": 8697.81, "total_tokens": 109379280} +{"current_steps": 162295, "total_steps": 204665, "loss": 0.0, "lr": 2.49950517058412e-07, "epoch": 3.9648938509271248, "percentage": 79.3, "elapsed_time": "3:29:35", "remaining_time": "0:54:43", "throughput": 8697.83, "total_tokens": 109382608} +{"current_steps": 162300, "total_steps": 204665, "loss": 0.0, "lr": 2.498941187407568e-07, "epoch": 3.965016001758972, "percentage": 79.3, "elapsed_time": "3:29:36", "remaining_time": "0:54:42", "throughput": 8697.87, "total_tokens": 109386128} +{"current_steps": 162305, "total_steps": 204665, "loss": 0.0407, "lr": 2.498377258780732e-07, "epoch": 3.965138152590819, "percentage": 79.3, "elapsed_time": "3:29:36", "remaining_time": "0:54:42", "throughput": 8697.87, "total_tokens": 109389200} +{"current_steps": 162310, "total_steps": 204665, "loss": 0.0353, "lr": 2.4978133847077163e-07, "epoch": 3.9652603034226663, "percentage": 79.31, "elapsed_time": "3:29:36", "remaining_time": "0:54:41", "throughput": 8697.88, "total_tokens": 109392272} +{"current_steps": 162315, "total_steps": 204665, "loss": 0.0002, "lr": 2.497249565192617e-07, "epoch": 3.9653824542545135, "percentage": 79.31, "elapsed_time": "3:29:37", "remaining_time": "0:54:41", "throughput": 8697.92, "total_tokens": 109395792} +{"current_steps": 162320, "total_steps": 204665, "loss": 0.0, "lr": 2.4966858002395396e-07, "epoch": 3.9655046050863607, "percentage": 79.31, "elapsed_time": "3:29:37", "remaining_time": "0:54:41", "throughput": 8697.92, "total_tokens": 109398672} +{"current_steps": 162325, "total_steps": 204665, "loss": 0.0, "lr": 2.496122089852578e-07, "epoch": 3.965626755918208, "percentage": 79.31, "elapsed_time": "3:29:37", "remaining_time": "0:54:40", "throughput": 8697.94, "total_tokens": 109402000} +{"current_steps": 162330, "total_steps": 204665, "loss": 0.0, "lr": 2.495558434035838e-07, "epoch": 3.965748906750055, "percentage": 79.31, "elapsed_time": "3:29:38", "remaining_time": "0:54:40", "throughput": 8697.98, "total_tokens": 109405584} +{"current_steps": 162335, "total_steps": 204665, "loss": 0.0, "lr": 2.4949948327934134e-07, "epoch": 3.9658710575819023, "percentage": 79.32, "elapsed_time": "3:29:38", "remaining_time": "0:54:39", "throughput": 8697.99, "total_tokens": 109408592} +{"current_steps": 162340, "total_steps": 204665, "loss": 0.0001, "lr": 2.494431286129407e-07, "epoch": 3.9659932084137495, "percentage": 79.32, "elapsed_time": "3:29:38", "remaining_time": "0:54:39", "throughput": 8698.02, "total_tokens": 109411984} +{"current_steps": 162345, "total_steps": 204665, "loss": 0.075, "lr": 2.493867794047916e-07, "epoch": 3.966115359245596, "percentage": 79.32, "elapsed_time": "3:29:39", "remaining_time": "0:54:39", "throughput": 8698.04, "total_tokens": 109415248} +{"current_steps": 162350, "total_steps": 204665, "loss": 0.0, "lr": 2.493304356553033e-07, "epoch": 3.966237510077444, "percentage": 79.32, "elapsed_time": "3:29:39", "remaining_time": "0:54:38", "throughput": 8698.11, "total_tokens": 109419280} +{"current_steps": 162355, "total_steps": 204665, "loss": 0.0, "lr": 2.492740973648864e-07, "epoch": 3.9663596609092906, "percentage": 79.33, "elapsed_time": "3:29:40", "remaining_time": "0:54:38", "throughput": 8698.12, "total_tokens": 109422480} +{"current_steps": 162360, "total_steps": 204665, "loss": 0.0, "lr": 2.492177645339497e-07, "epoch": 3.9664818117411382, "percentage": 79.33, "elapsed_time": "3:29:40", "remaining_time": "0:54:37", "throughput": 8698.15, "total_tokens": 109425808} +{"current_steps": 162365, "total_steps": 204665, "loss": 0.0002, "lr": 2.491614371629035e-07, "epoch": 3.966603962572985, "percentage": 79.33, "elapsed_time": "3:29:40", "remaining_time": "0:54:37", "throughput": 8698.15, "total_tokens": 109428816} +{"current_steps": 162370, "total_steps": 204665, "loss": 0.0725, "lr": 2.491051152521576e-07, "epoch": 3.966726113404832, "percentage": 79.33, "elapsed_time": "3:29:41", "remaining_time": "0:54:37", "throughput": 8698.17, "total_tokens": 109431952} +{"current_steps": 162375, "total_steps": 204665, "loss": 0.0, "lr": 2.4904879880212094e-07, "epoch": 3.9668482642366794, "percentage": 79.34, "elapsed_time": "3:29:41", "remaining_time": "0:54:36", "throughput": 8698.19, "total_tokens": 109435216} +{"current_steps": 162380, "total_steps": 204665, "loss": 0.0001, "lr": 2.489924878132036e-07, "epoch": 3.9669704150685265, "percentage": 79.34, "elapsed_time": "3:29:41", "remaining_time": "0:54:36", "throughput": 8698.21, "total_tokens": 109438544} +{"current_steps": 162385, "total_steps": 204665, "loss": 0.0006, "lr": 2.489361822858147e-07, "epoch": 3.9670925659003737, "percentage": 79.34, "elapsed_time": "3:29:42", "remaining_time": "0:54:35", "throughput": 8698.26, "total_tokens": 109442192} +{"current_steps": 162390, "total_steps": 204665, "loss": 0.0008, "lr": 2.488798822203638e-07, "epoch": 3.967214716732221, "percentage": 79.34, "elapsed_time": "3:29:42", "remaining_time": "0:54:35", "throughput": 8698.28, "total_tokens": 109445456} +{"current_steps": 162395, "total_steps": 204665, "loss": 0.0001, "lr": 2.488235876172609e-07, "epoch": 3.967336867564068, "percentage": 79.35, "elapsed_time": "3:29:42", "remaining_time": "0:54:35", "throughput": 8698.3, "total_tokens": 109448656} +{"current_steps": 162400, "total_steps": 204665, "loss": 0.0, "lr": 2.4876729847691445e-07, "epoch": 3.9674590183959153, "percentage": 79.35, "elapsed_time": "3:29:43", "remaining_time": "0:54:34", "throughput": 8698.33, "total_tokens": 109452176} +{"current_steps": 162405, "total_steps": 204665, "loss": 0.0, "lr": 2.4871101479973456e-07, "epoch": 3.9675811692277625, "percentage": 79.35, "elapsed_time": "3:29:43", "remaining_time": "0:54:34", "throughput": 8698.34, "total_tokens": 109455312} +{"current_steps": 162410, "total_steps": 204665, "loss": 0.0001, "lr": 2.4865473658613e-07, "epoch": 3.9677033200596097, "percentage": 79.35, "elapsed_time": "3:29:43", "remaining_time": "0:54:33", "throughput": 8698.38, "total_tokens": 109458832} +{"current_steps": 162415, "total_steps": 204665, "loss": 0.0, "lr": 2.485984638365106e-07, "epoch": 3.967825470891457, "percentage": 79.36, "elapsed_time": "3:29:44", "remaining_time": "0:54:33", "throughput": 8698.4, "total_tokens": 109462096} +{"current_steps": 162420, "total_steps": 204665, "loss": 0.0, "lr": 2.4854219655128493e-07, "epoch": 3.967947621723304, "percentage": 79.36, "elapsed_time": "3:29:44", "remaining_time": "0:54:33", "throughput": 8698.45, "total_tokens": 109465808} +{"current_steps": 162425, "total_steps": 204665, "loss": 0.0444, "lr": 2.4848593473086253e-07, "epoch": 3.9680697725551513, "percentage": 79.36, "elapsed_time": "3:29:44", "remaining_time": "0:54:32", "throughput": 8698.5, "total_tokens": 109469456} +{"current_steps": 162430, "total_steps": 204665, "loss": 0.0, "lr": 2.4842967837565287e-07, "epoch": 3.968191923386998, "percentage": 79.36, "elapsed_time": "3:29:45", "remaining_time": "0:54:32", "throughput": 8698.57, "total_tokens": 109473488} +{"current_steps": 162435, "total_steps": 204665, "loss": 0.0, "lr": 2.483734274860647e-07, "epoch": 3.9683140742188456, "percentage": 79.37, "elapsed_time": "3:29:45", "remaining_time": "0:54:32", "throughput": 8698.59, "total_tokens": 109476752} +{"current_steps": 162440, "total_steps": 204665, "loss": 0.0003, "lr": 2.4831718206250694e-07, "epoch": 3.9684362250506924, "percentage": 79.37, "elapsed_time": "3:29:45", "remaining_time": "0:54:31", "throughput": 8698.61, "total_tokens": 109480080} +{"current_steps": 162445, "total_steps": 204665, "loss": 0.0002, "lr": 2.4826094210538895e-07, "epoch": 3.96855837588254, "percentage": 79.37, "elapsed_time": "3:29:46", "remaining_time": "0:54:31", "throughput": 8698.63, "total_tokens": 109483344} +{"current_steps": 162450, "total_steps": 204665, "loss": 0.0001, "lr": 2.482047076151197e-07, "epoch": 3.9686805267143868, "percentage": 79.37, "elapsed_time": "3:29:46", "remaining_time": "0:54:30", "throughput": 8698.63, "total_tokens": 109486224} +{"current_steps": 162455, "total_steps": 204665, "loss": 0.0, "lr": 2.4814847859210763e-07, "epoch": 3.968802677546234, "percentage": 79.38, "elapsed_time": "3:29:46", "remaining_time": "0:54:30", "throughput": 8698.63, "total_tokens": 109489232} +{"current_steps": 162460, "total_steps": 204665, "loss": 0.0, "lr": 2.480922550367621e-07, "epoch": 3.968924828378081, "percentage": 79.38, "elapsed_time": "3:29:47", "remaining_time": "0:54:30", "throughput": 8698.65, "total_tokens": 109492368} +{"current_steps": 162465, "total_steps": 204665, "loss": 0.0, "lr": 2.480360369494923e-07, "epoch": 3.9690469792099283, "percentage": 79.38, "elapsed_time": "3:29:47", "remaining_time": "0:54:29", "throughput": 8698.65, "total_tokens": 109495440} +{"current_steps": 162470, "total_steps": 204665, "loss": 0.0, "lr": 2.479798243307063e-07, "epoch": 3.9691691300417755, "percentage": 79.38, "elapsed_time": "3:29:47", "remaining_time": "0:54:29", "throughput": 8698.67, "total_tokens": 109498640} +{"current_steps": 162475, "total_steps": 204665, "loss": 0.0, "lr": 2.479236171808137e-07, "epoch": 3.9692912808736227, "percentage": 79.39, "elapsed_time": "3:29:48", "remaining_time": "0:54:28", "throughput": 8698.72, "total_tokens": 109502288} +{"current_steps": 162480, "total_steps": 204665, "loss": 0.0001, "lr": 2.478674155002224e-07, "epoch": 3.96941343170547, "percentage": 79.39, "elapsed_time": "3:29:48", "remaining_time": "0:54:28", "throughput": 8698.77, "total_tokens": 109506128} +{"current_steps": 162485, "total_steps": 204665, "loss": 0.0, "lr": 2.4781121928934155e-07, "epoch": 3.969535582537317, "percentage": 79.39, "elapsed_time": "3:29:49", "remaining_time": "0:54:28", "throughput": 8698.83, "total_tokens": 109509968} +{"current_steps": 162490, "total_steps": 204665, "loss": 0.1131, "lr": 2.477550285485802e-07, "epoch": 3.9696577333691643, "percentage": 79.39, "elapsed_time": "3:29:49", "remaining_time": "0:54:27", "throughput": 8698.87, "total_tokens": 109513488} +{"current_steps": 162495, "total_steps": 204665, "loss": 0.0001, "lr": 2.476988432783463e-07, "epoch": 3.9697798842010115, "percentage": 79.4, "elapsed_time": "3:29:49", "remaining_time": "0:54:27", "throughput": 8698.92, "total_tokens": 109517200} +{"current_steps": 162500, "total_steps": 204665, "loss": 0.0, "lr": 2.4764266347904905e-07, "epoch": 3.9699020350328587, "percentage": 79.4, "elapsed_time": "3:29:50", "remaining_time": "0:54:26", "throughput": 8698.93, "total_tokens": 109520400} +{"current_steps": 162505, "total_steps": 204665, "loss": 0.0393, "lr": 2.4758648915109636e-07, "epoch": 3.970024185864706, "percentage": 79.4, "elapsed_time": "3:29:50", "remaining_time": "0:54:26", "throughput": 8698.98, "total_tokens": 109524048} +{"current_steps": 162510, "total_steps": 204665, "loss": 0.0, "lr": 2.4753032029489753e-07, "epoch": 3.970146336696553, "percentage": 79.4, "elapsed_time": "3:29:50", "remaining_time": "0:54:26", "throughput": 8699.0, "total_tokens": 109527376} +{"current_steps": 162515, "total_steps": 204665, "loss": 0.0, "lr": 2.4747415691086013e-07, "epoch": 3.9702684875284002, "percentage": 79.41, "elapsed_time": "3:29:51", "remaining_time": "0:54:25", "throughput": 8699.03, "total_tokens": 109530768} +{"current_steps": 162520, "total_steps": 204665, "loss": 0.0001, "lr": 2.474179989993932e-07, "epoch": 3.9703906383602474, "percentage": 79.41, "elapsed_time": "3:29:51", "remaining_time": "0:54:25", "throughput": 8699.05, "total_tokens": 109534032} +{"current_steps": 162525, "total_steps": 204665, "loss": 0.0001, "lr": 2.473618465609053e-07, "epoch": 3.970512789192094, "percentage": 79.41, "elapsed_time": "3:29:51", "remaining_time": "0:54:24", "throughput": 8699.07, "total_tokens": 109537296} +{"current_steps": 162530, "total_steps": 204665, "loss": 0.0, "lr": 2.4730569959580416e-07, "epoch": 3.970634940023942, "percentage": 79.41, "elapsed_time": "3:29:52", "remaining_time": "0:54:24", "throughput": 8699.07, "total_tokens": 109540304} +{"current_steps": 162535, "total_steps": 204665, "loss": 0.0, "lr": 2.4724955810449865e-07, "epoch": 3.9707570908557885, "percentage": 79.42, "elapsed_time": "3:29:52", "remaining_time": "0:54:24", "throughput": 8699.08, "total_tokens": 109543440} +{"current_steps": 162540, "total_steps": 204665, "loss": 0.0, "lr": 2.471934220873969e-07, "epoch": 3.9708792416876357, "percentage": 79.42, "elapsed_time": "3:29:52", "remaining_time": "0:54:23", "throughput": 8699.13, "total_tokens": 109547152} +{"current_steps": 162545, "total_steps": 204665, "loss": 0.0, "lr": 2.471372915449067e-07, "epoch": 3.971001392519483, "percentage": 79.42, "elapsed_time": "3:29:53", "remaining_time": "0:54:23", "throughput": 8699.14, "total_tokens": 109550160} +{"current_steps": 162550, "total_steps": 204665, "loss": 0.0001, "lr": 2.4708116647743696e-07, "epoch": 3.97112354335133, "percentage": 79.42, "elapsed_time": "3:29:53", "remaining_time": "0:54:22", "throughput": 8699.15, "total_tokens": 109553296} +{"current_steps": 162555, "total_steps": 204665, "loss": 0.0893, "lr": 2.4702504688539516e-07, "epoch": 3.9712456941831773, "percentage": 79.42, "elapsed_time": "3:29:53", "remaining_time": "0:54:22", "throughput": 8699.18, "total_tokens": 109556688} +{"current_steps": 162560, "total_steps": 204665, "loss": 0.0817, "lr": 2.469689327691901e-07, "epoch": 3.9713678450150245, "percentage": 79.43, "elapsed_time": "3:29:54", "remaining_time": "0:54:22", "throughput": 8699.22, "total_tokens": 109560336} +{"current_steps": 162565, "total_steps": 204665, "loss": 0.0, "lr": 2.4691282412922923e-07, "epoch": 3.9714899958468717, "percentage": 79.43, "elapsed_time": "3:29:54", "remaining_time": "0:54:21", "throughput": 8699.25, "total_tokens": 109563792} +{"current_steps": 162570, "total_steps": 204665, "loss": 0.0, "lr": 2.4685672096592105e-07, "epoch": 3.971612146678719, "percentage": 79.43, "elapsed_time": "3:29:54", "remaining_time": "0:54:21", "throughput": 8699.27, "total_tokens": 109566992} +{"current_steps": 162575, "total_steps": 204665, "loss": 0.0, "lr": 2.468006232796731e-07, "epoch": 3.971734297510566, "percentage": 79.43, "elapsed_time": "3:29:55", "remaining_time": "0:54:20", "throughput": 8699.3, "total_tokens": 109570384} +{"current_steps": 162580, "total_steps": 204665, "loss": 0.0, "lr": 2.4674453107089356e-07, "epoch": 3.9718564483424132, "percentage": 79.44, "elapsed_time": "3:29:55", "remaining_time": "0:54:20", "throughput": 8699.31, "total_tokens": 109573520} +{"current_steps": 162585, "total_steps": 204665, "loss": 0.0015, "lr": 2.4668844433999083e-07, "epoch": 3.9719785991742604, "percentage": 79.44, "elapsed_time": "3:29:56", "remaining_time": "0:54:20", "throughput": 8699.34, "total_tokens": 109576912} +{"current_steps": 162590, "total_steps": 204665, "loss": 0.0, "lr": 2.466323630873719e-07, "epoch": 3.9721007500061076, "percentage": 79.44, "elapsed_time": "3:29:56", "remaining_time": "0:54:19", "throughput": 8699.36, "total_tokens": 109580240} +{"current_steps": 162595, "total_steps": 204665, "loss": 0.0, "lr": 2.465762873134455e-07, "epoch": 3.972222900837955, "percentage": 79.44, "elapsed_time": "3:29:56", "remaining_time": "0:54:19", "throughput": 8699.38, "total_tokens": 109583440} +{"current_steps": 162600, "total_steps": 204665, "loss": 0.0, "lr": 2.465202170186186e-07, "epoch": 3.972345051669802, "percentage": 79.45, "elapsed_time": "3:29:57", "remaining_time": "0:54:18", "throughput": 8699.42, "total_tokens": 109587024} +{"current_steps": 162605, "total_steps": 204665, "loss": 0.0001, "lr": 2.4646415220329963e-07, "epoch": 3.972467202501649, "percentage": 79.45, "elapsed_time": "3:29:57", "remaining_time": "0:54:18", "throughput": 8699.44, "total_tokens": 109590224} +{"current_steps": 162610, "total_steps": 204665, "loss": 0.0, "lr": 2.4640809286789575e-07, "epoch": 3.972589353333496, "percentage": 79.45, "elapsed_time": "3:29:57", "remaining_time": "0:54:18", "throughput": 8699.44, "total_tokens": 109593232} +{"current_steps": 162615, "total_steps": 204665, "loss": 0.0, "lr": 2.463520390128149e-07, "epoch": 3.9727115041653436, "percentage": 79.45, "elapsed_time": "3:29:58", "remaining_time": "0:54:17", "throughput": 8699.49, "total_tokens": 109596944} +{"current_steps": 162620, "total_steps": 204665, "loss": 0.0, "lr": 2.4629599063846494e-07, "epoch": 3.9728336549971903, "percentage": 79.46, "elapsed_time": "3:29:58", "remaining_time": "0:54:17", "throughput": 8699.5, "total_tokens": 109600080} +{"current_steps": 162625, "total_steps": 204665, "loss": 0.0001, "lr": 2.4623994774525313e-07, "epoch": 3.972955805829038, "percentage": 79.46, "elapsed_time": "3:29:58", "remaining_time": "0:54:16", "throughput": 8699.52, "total_tokens": 109603216} +{"current_steps": 162630, "total_steps": 204665, "loss": 0.0377, "lr": 2.461839103335873e-07, "epoch": 3.9730779566608847, "percentage": 79.46, "elapsed_time": "3:29:59", "remaining_time": "0:54:16", "throughput": 8699.53, "total_tokens": 109606352} +{"current_steps": 162635, "total_steps": 204665, "loss": 0.0, "lr": 2.461278784038747e-07, "epoch": 3.973200107492732, "percentage": 79.46, "elapsed_time": "3:29:59", "remaining_time": "0:54:16", "throughput": 8699.56, "total_tokens": 109609808} +{"current_steps": 162640, "total_steps": 204665, "loss": 0.0, "lr": 2.4607185195652315e-07, "epoch": 3.973322258324579, "percentage": 79.47, "elapsed_time": "3:29:59", "remaining_time": "0:54:15", "throughput": 8699.59, "total_tokens": 109613200} +{"current_steps": 162645, "total_steps": 204665, "loss": 0.0, "lr": 2.460158309919396e-07, "epoch": 3.9734444091564263, "percentage": 79.47, "elapsed_time": "3:30:00", "remaining_time": "0:54:15", "throughput": 8699.62, "total_tokens": 109616784} +{"current_steps": 162650, "total_steps": 204665, "loss": 0.0, "lr": 2.4595981551053193e-07, "epoch": 3.9735665599882735, "percentage": 79.47, "elapsed_time": "3:30:00", "remaining_time": "0:54:14", "throughput": 8699.66, "total_tokens": 109620240} +{"current_steps": 162655, "total_steps": 204665, "loss": 0.0002, "lr": 2.459038055127073e-07, "epoch": 3.9736887108201207, "percentage": 79.47, "elapsed_time": "3:30:00", "remaining_time": "0:54:14", "throughput": 8699.67, "total_tokens": 109623440} +{"current_steps": 162660, "total_steps": 204665, "loss": 0.0, "lr": 2.458478009988728e-07, "epoch": 3.973810861651968, "percentage": 79.48, "elapsed_time": "3:30:01", "remaining_time": "0:54:14", "throughput": 8699.69, "total_tokens": 109626704} +{"current_steps": 162665, "total_steps": 204665, "loss": 0.0001, "lr": 2.4579180196943614e-07, "epoch": 3.973933012483815, "percentage": 79.48, "elapsed_time": "3:30:01", "remaining_time": "0:54:13", "throughput": 8699.74, "total_tokens": 109630416} +{"current_steps": 162670, "total_steps": 204665, "loss": 0.0, "lr": 2.4573580842480424e-07, "epoch": 3.974055163315662, "percentage": 79.48, "elapsed_time": "3:30:01", "remaining_time": "0:54:13", "throughput": 8699.79, "total_tokens": 109634064} +{"current_steps": 162675, "total_steps": 204665, "loss": 0.0, "lr": 2.456798203653843e-07, "epoch": 3.9741773141475094, "percentage": 79.48, "elapsed_time": "3:30:02", "remaining_time": "0:54:12", "throughput": 8699.83, "total_tokens": 109637648} +{"current_steps": 162680, "total_steps": 204665, "loss": 0.0523, "lr": 2.456238377915839e-07, "epoch": 3.9742994649793566, "percentage": 79.49, "elapsed_time": "3:30:02", "remaining_time": "0:54:12", "throughput": 8699.86, "total_tokens": 109641104} +{"current_steps": 162685, "total_steps": 204665, "loss": 0.0, "lr": 2.4556786070380954e-07, "epoch": 3.974421615811204, "percentage": 79.49, "elapsed_time": "3:30:02", "remaining_time": "0:54:12", "throughput": 8699.91, "total_tokens": 109644944} +{"current_steps": 162690, "total_steps": 204665, "loss": 0.0, "lr": 2.455118891024689e-07, "epoch": 3.974543766643051, "percentage": 79.49, "elapsed_time": "3:30:03", "remaining_time": "0:54:11", "throughput": 8699.92, "total_tokens": 109648080} +{"current_steps": 162695, "total_steps": 204665, "loss": 0.0, "lr": 2.454559229879685e-07, "epoch": 3.974665917474898, "percentage": 79.49, "elapsed_time": "3:30:03", "remaining_time": "0:54:11", "throughput": 8699.96, "total_tokens": 109651664} +{"current_steps": 162700, "total_steps": 204665, "loss": 0.0, "lr": 2.453999623607155e-07, "epoch": 3.9747880683067454, "percentage": 79.5, "elapsed_time": "3:30:04", "remaining_time": "0:54:10", "throughput": 8700.0, "total_tokens": 109655120} +{"current_steps": 162705, "total_steps": 204665, "loss": 0.0, "lr": 2.4534400722111724e-07, "epoch": 3.974910219138592, "percentage": 79.5, "elapsed_time": "3:30:04", "remaining_time": "0:54:10", "throughput": 8700.0, "total_tokens": 109658128} +{"current_steps": 162710, "total_steps": 204665, "loss": 0.0, "lr": 2.4528805756958004e-07, "epoch": 3.9750323699704397, "percentage": 79.5, "elapsed_time": "3:30:04", "remaining_time": "0:54:10", "throughput": 8700.05, "total_tokens": 109661776} +{"current_steps": 162715, "total_steps": 204665, "loss": 0.0, "lr": 2.452321134065114e-07, "epoch": 3.9751545208022865, "percentage": 79.5, "elapsed_time": "3:30:05", "remaining_time": "0:54:09", "throughput": 8700.06, "total_tokens": 109664976} +{"current_steps": 162720, "total_steps": 204665, "loss": 0.0, "lr": 2.4517617473231755e-07, "epoch": 3.9752766716341337, "percentage": 79.51, "elapsed_time": "3:30:05", "remaining_time": "0:54:09", "throughput": 8700.09, "total_tokens": 109668304} +{"current_steps": 162725, "total_steps": 204665, "loss": 0.0001, "lr": 2.4512024154740594e-07, "epoch": 3.975398822465981, "percentage": 79.51, "elapsed_time": "3:30:05", "remaining_time": "0:54:08", "throughput": 8700.09, "total_tokens": 109671120} +{"current_steps": 162730, "total_steps": 204665, "loss": 0.0, "lr": 2.450643138521826e-07, "epoch": 3.975520973297828, "percentage": 79.51, "elapsed_time": "3:30:06", "remaining_time": "0:54:08", "throughput": 8700.1, "total_tokens": 109674320} +{"current_steps": 162735, "total_steps": 204665, "loss": 0.0004, "lr": 2.4500839164705464e-07, "epoch": 3.9756431241296752, "percentage": 79.51, "elapsed_time": "3:30:06", "remaining_time": "0:54:08", "throughput": 8700.13, "total_tokens": 109677776} +{"current_steps": 162740, "total_steps": 204665, "loss": 0.0, "lr": 2.4495247493242896e-07, "epoch": 3.9757652749615224, "percentage": 79.52, "elapsed_time": "3:30:06", "remaining_time": "0:54:07", "throughput": 8700.15, "total_tokens": 109680976} +{"current_steps": 162745, "total_steps": 204665, "loss": 0.06, "lr": 2.4489656370871205e-07, "epoch": 3.9758874257933696, "percentage": 79.52, "elapsed_time": "3:30:07", "remaining_time": "0:54:07", "throughput": 8700.15, "total_tokens": 109683920} +{"current_steps": 162750, "total_steps": 204665, "loss": 0.0, "lr": 2.4484065797631015e-07, "epoch": 3.976009576625217, "percentage": 79.52, "elapsed_time": "3:30:07", "remaining_time": "0:54:06", "throughput": 8700.18, "total_tokens": 109687312} +{"current_steps": 162755, "total_steps": 204665, "loss": 0.0, "lr": 2.447847577356303e-07, "epoch": 3.976131727457064, "percentage": 79.52, "elapsed_time": "3:30:07", "remaining_time": "0:54:06", "throughput": 8700.2, "total_tokens": 109690576} +{"current_steps": 162760, "total_steps": 204665, "loss": 0.0001, "lr": 2.4472886298707883e-07, "epoch": 3.976253878288911, "percentage": 79.53, "elapsed_time": "3:30:08", "remaining_time": "0:54:06", "throughput": 8700.22, "total_tokens": 109693776} +{"current_steps": 162765, "total_steps": 204665, "loss": 0.0, "lr": 2.44672973731062e-07, "epoch": 3.9763760291207584, "percentage": 79.53, "elapsed_time": "3:30:08", "remaining_time": "0:54:05", "throughput": 8700.23, "total_tokens": 109696912} +{"current_steps": 162770, "total_steps": 204665, "loss": 0.0001, "lr": 2.4461708996798634e-07, "epoch": 3.9764981799526056, "percentage": 79.53, "elapsed_time": "3:30:08", "remaining_time": "0:54:05", "throughput": 8700.26, "total_tokens": 109700304} +{"current_steps": 162775, "total_steps": 204665, "loss": 0.0, "lr": 2.445612116982588e-07, "epoch": 3.9766203307844528, "percentage": 79.53, "elapsed_time": "3:30:09", "remaining_time": "0:54:04", "throughput": 8700.26, "total_tokens": 109703248} +{"current_steps": 162780, "total_steps": 204665, "loss": 0.0, "lr": 2.44505338922285e-07, "epoch": 3.9767424816163, "percentage": 79.53, "elapsed_time": "3:30:09", "remaining_time": "0:54:04", "throughput": 8700.28, "total_tokens": 109706576} +{"current_steps": 162785, "total_steps": 204665, "loss": 0.0004, "lr": 2.444494716404718e-07, "epoch": 3.976864632448147, "percentage": 79.54, "elapsed_time": "3:30:09", "remaining_time": "0:54:04", "throughput": 8700.29, "total_tokens": 109709648} +{"current_steps": 162790, "total_steps": 204665, "loss": 0.0, "lr": 2.4439360985322497e-07, "epoch": 3.976986783279994, "percentage": 79.54, "elapsed_time": "3:30:10", "remaining_time": "0:54:03", "throughput": 8700.32, "total_tokens": 109713040} +{"current_steps": 162795, "total_steps": 204665, "loss": 0.0, "lr": 2.443377535609511e-07, "epoch": 3.9771089341118415, "percentage": 79.54, "elapsed_time": "3:30:10", "remaining_time": "0:54:03", "throughput": 8700.33, "total_tokens": 109716240} +{"current_steps": 162800, "total_steps": 204665, "loss": 0.0, "lr": 2.4428190276405657e-07, "epoch": 3.9772310849436883, "percentage": 79.54, "elapsed_time": "3:30:10", "remaining_time": "0:54:02", "throughput": 8700.35, "total_tokens": 109719504} +{"current_steps": 162805, "total_steps": 204665, "loss": 0.0578, "lr": 2.4422605746294713e-07, "epoch": 3.977353235775536, "percentage": 79.55, "elapsed_time": "3:30:11", "remaining_time": "0:54:02", "throughput": 8700.37, "total_tokens": 109722768} +{"current_steps": 162810, "total_steps": 204665, "loss": 0.0, "lr": 2.4417021765802923e-07, "epoch": 3.9774753866073826, "percentage": 79.55, "elapsed_time": "3:30:11", "remaining_time": "0:54:02", "throughput": 8700.38, "total_tokens": 109725904} +{"current_steps": 162815, "total_steps": 204665, "loss": 0.0, "lr": 2.4411438334970856e-07, "epoch": 3.97759753743923, "percentage": 79.55, "elapsed_time": "3:30:11", "remaining_time": "0:54:01", "throughput": 8700.45, "total_tokens": 109729808} +{"current_steps": 162820, "total_steps": 204665, "loss": 0.0, "lr": 2.4405855453839174e-07, "epoch": 3.977719688271077, "percentage": 79.55, "elapsed_time": "3:30:12", "remaining_time": "0:54:01", "throughput": 8700.46, "total_tokens": 109732944} +{"current_steps": 162825, "total_steps": 204665, "loss": 0.0, "lr": 2.4400273122448413e-07, "epoch": 3.977841839102924, "percentage": 79.56, "elapsed_time": "3:30:12", "remaining_time": "0:54:00", "throughput": 8700.49, "total_tokens": 109736336} +{"current_steps": 162830, "total_steps": 204665, "loss": 0.0453, "lr": 2.43946913408392e-07, "epoch": 3.9779639899347714, "percentage": 79.56, "elapsed_time": "3:30:13", "remaining_time": "0:54:00", "throughput": 8700.5, "total_tokens": 109739600} +{"current_steps": 162835, "total_steps": 204665, "loss": 0.0, "lr": 2.438911010905216e-07, "epoch": 3.9780861407666186, "percentage": 79.56, "elapsed_time": "3:30:13", "remaining_time": "0:54:00", "throughput": 8700.53, "total_tokens": 109742864} +{"current_steps": 162840, "total_steps": 204665, "loss": 0.0, "lr": 2.4383529427127804e-07, "epoch": 3.978208291598466, "percentage": 79.56, "elapsed_time": "3:30:13", "remaining_time": "0:53:59", "throughput": 8700.57, "total_tokens": 109746448} +{"current_steps": 162845, "total_steps": 204665, "loss": 0.0, "lr": 2.43779492951068e-07, "epoch": 3.978330442430313, "percentage": 79.57, "elapsed_time": "3:30:14", "remaining_time": "0:53:59", "throughput": 8700.61, "total_tokens": 109750032} +{"current_steps": 162850, "total_steps": 204665, "loss": 0.0, "lr": 2.4372369713029683e-07, "epoch": 3.97845259326216, "percentage": 79.57, "elapsed_time": "3:30:14", "remaining_time": "0:53:59", "throughput": 8700.63, "total_tokens": 109753360} +{"current_steps": 162855, "total_steps": 204665, "loss": 0.0, "lr": 2.436679068093701e-07, "epoch": 3.9785747440940074, "percentage": 79.57, "elapsed_time": "3:30:14", "remaining_time": "0:53:58", "throughput": 8700.65, "total_tokens": 109756624} +{"current_steps": 162860, "total_steps": 204665, "loss": 0.0489, "lr": 2.436121219886941e-07, "epoch": 3.9786968949258545, "percentage": 79.57, "elapsed_time": "3:30:15", "remaining_time": "0:53:58", "throughput": 8700.67, "total_tokens": 109759760} +{"current_steps": 162865, "total_steps": 204665, "loss": 0.0, "lr": 2.4355634266867387e-07, "epoch": 3.9788190457577017, "percentage": 79.58, "elapsed_time": "3:30:15", "remaining_time": "0:53:57", "throughput": 8700.69, "total_tokens": 109763152} +{"current_steps": 162870, "total_steps": 204665, "loss": 0.0, "lr": 2.435005688497157e-07, "epoch": 3.978941196589549, "percentage": 79.58, "elapsed_time": "3:30:15", "remaining_time": "0:53:57", "throughput": 8700.71, "total_tokens": 109766288} +{"current_steps": 162875, "total_steps": 204665, "loss": 0.0, "lr": 2.434448005322245e-07, "epoch": 3.9790633474213957, "percentage": 79.58, "elapsed_time": "3:30:16", "remaining_time": "0:53:57", "throughput": 8700.71, "total_tokens": 109769360} +{"current_steps": 162880, "total_steps": 204665, "loss": 0.0001, "lr": 2.4338903771660656e-07, "epoch": 3.9791854982532433, "percentage": 79.58, "elapsed_time": "3:30:16", "remaining_time": "0:53:56", "throughput": 8700.73, "total_tokens": 109772624} +{"current_steps": 162885, "total_steps": 204665, "loss": 0.0001, "lr": 2.433332804032667e-07, "epoch": 3.97930764908509, "percentage": 79.59, "elapsed_time": "3:30:16", "remaining_time": "0:53:56", "throughput": 8700.73, "total_tokens": 109775568} +{"current_steps": 162890, "total_steps": 204665, "loss": 0.0, "lr": 2.4327752859261074e-07, "epoch": 3.9794297999169377, "percentage": 79.59, "elapsed_time": "3:30:17", "remaining_time": "0:53:55", "throughput": 8700.77, "total_tokens": 109779088} +{"current_steps": 162895, "total_steps": 204665, "loss": 0.0, "lr": 2.432217822850445e-07, "epoch": 3.9795519507487844, "percentage": 79.59, "elapsed_time": "3:30:17", "remaining_time": "0:53:55", "throughput": 8700.77, "total_tokens": 109782032} +{"current_steps": 162900, "total_steps": 204665, "loss": 0.0, "lr": 2.4316604148097264e-07, "epoch": 3.9796741015806316, "percentage": 79.59, "elapsed_time": "3:30:17", "remaining_time": "0:53:55", "throughput": 8700.78, "total_tokens": 109785296} +{"current_steps": 162905, "total_steps": 204665, "loss": 0.0479, "lr": 2.431103061808012e-07, "epoch": 3.979796252412479, "percentage": 79.6, "elapsed_time": "3:30:18", "remaining_time": "0:53:54", "throughput": 8700.82, "total_tokens": 109788816} +{"current_steps": 162910, "total_steps": 204665, "loss": 0.0004, "lr": 2.43054576384935e-07, "epoch": 3.979918403244326, "percentage": 79.6, "elapsed_time": "3:30:18", "remaining_time": "0:53:54", "throughput": 8700.84, "total_tokens": 109792080} +{"current_steps": 162915, "total_steps": 204665, "loss": 0.041, "lr": 2.429988520937797e-07, "epoch": 3.980040554076173, "percentage": 79.6, "elapsed_time": "3:30:18", "remaining_time": "0:53:53", "throughput": 8700.85, "total_tokens": 109795216} +{"current_steps": 162920, "total_steps": 204665, "loss": 0.0004, "lr": 2.4294313330773995e-07, "epoch": 3.9801627049080204, "percentage": 79.6, "elapsed_time": "3:30:19", "remaining_time": "0:53:53", "throughput": 8700.9, "total_tokens": 109798800} +{"current_steps": 162925, "total_steps": 204665, "loss": 0.0, "lr": 2.428874200272215e-07, "epoch": 3.9802848557398676, "percentage": 79.61, "elapsed_time": "3:30:19", "remaining_time": "0:53:53", "throughput": 8700.94, "total_tokens": 109802384} +{"current_steps": 162930, "total_steps": 204665, "loss": 0.0, "lr": 2.4283171225262967e-07, "epoch": 3.9804070065717148, "percentage": 79.61, "elapsed_time": "3:30:19", "remaining_time": "0:53:52", "throughput": 8701.0, "total_tokens": 109806416} +{"current_steps": 162935, "total_steps": 204665, "loss": 0.0, "lr": 2.42776009984369e-07, "epoch": 3.980529157403562, "percentage": 79.61, "elapsed_time": "3:30:20", "remaining_time": "0:53:52", "throughput": 8701.05, "total_tokens": 109810192} +{"current_steps": 162940, "total_steps": 204665, "loss": 0.0, "lr": 2.427203132228451e-07, "epoch": 3.980651308235409, "percentage": 79.61, "elapsed_time": "3:30:20", "remaining_time": "0:53:51", "throughput": 8701.08, "total_tokens": 109813584} +{"current_steps": 162945, "total_steps": 204665, "loss": 0.0001, "lr": 2.426646219684625e-07, "epoch": 3.9807734590672563, "percentage": 79.62, "elapsed_time": "3:30:21", "remaining_time": "0:53:51", "throughput": 8701.09, "total_tokens": 109816592} +{"current_steps": 162950, "total_steps": 204665, "loss": 0.0, "lr": 2.426089362216267e-07, "epoch": 3.9808956098991035, "percentage": 79.62, "elapsed_time": "3:30:21", "remaining_time": "0:53:51", "throughput": 8701.1, "total_tokens": 109819728} +{"current_steps": 162955, "total_steps": 204665, "loss": 0.0, "lr": 2.4255325598274225e-07, "epoch": 3.9810177607309507, "percentage": 79.62, "elapsed_time": "3:30:21", "remaining_time": "0:53:50", "throughput": 8701.11, "total_tokens": 109822864} +{"current_steps": 162960, "total_steps": 204665, "loss": 0.0, "lr": 2.424975812522144e-07, "epoch": 3.981139911562798, "percentage": 79.62, "elapsed_time": "3:30:22", "remaining_time": "0:53:50", "throughput": 8701.14, "total_tokens": 109826320} +{"current_steps": 162965, "total_steps": 204665, "loss": 0.0, "lr": 2.424419120304481e-07, "epoch": 3.981262062394645, "percentage": 79.63, "elapsed_time": "3:30:22", "remaining_time": "0:53:49", "throughput": 8701.16, "total_tokens": 109829584} +{"current_steps": 162970, "total_steps": 204665, "loss": 0.0, "lr": 2.423862483178475e-07, "epoch": 3.981384213226492, "percentage": 79.63, "elapsed_time": "3:30:22", "remaining_time": "0:53:49", "throughput": 8701.19, "total_tokens": 109833040} +{"current_steps": 162975, "total_steps": 204665, "loss": 0.0, "lr": 2.4233059011481817e-07, "epoch": 3.9815063640583395, "percentage": 79.63, "elapsed_time": "3:30:23", "remaining_time": "0:53:49", "throughput": 8701.21, "total_tokens": 109836176} +{"current_steps": 162980, "total_steps": 204665, "loss": 0.0, "lr": 2.422749374217643e-07, "epoch": 3.981628514890186, "percentage": 79.63, "elapsed_time": "3:30:23", "remaining_time": "0:53:48", "throughput": 8701.21, "total_tokens": 109839248} +{"current_steps": 162985, "total_steps": 204665, "loss": 0.0, "lr": 2.4221929023909096e-07, "epoch": 3.981750665722034, "percentage": 79.64, "elapsed_time": "3:30:23", "remaining_time": "0:53:48", "throughput": 8701.25, "total_tokens": 109842704} +{"current_steps": 162990, "total_steps": 204665, "loss": 0.0, "lr": 2.4216364856720295e-07, "epoch": 3.9818728165538806, "percentage": 79.64, "elapsed_time": "3:30:24", "remaining_time": "0:53:47", "throughput": 8701.31, "total_tokens": 109846608} +{"current_steps": 162995, "total_steps": 204665, "loss": 0.0093, "lr": 2.421080124065045e-07, "epoch": 3.981994967385728, "percentage": 79.64, "elapsed_time": "3:30:24", "remaining_time": "0:53:47", "throughput": 8701.31, "total_tokens": 109849616} +{"current_steps": 163000, "total_steps": 204665, "loss": 0.0, "lr": 2.4205238175740075e-07, "epoch": 3.982117118217575, "percentage": 79.64, "elapsed_time": "3:30:24", "remaining_time": "0:53:47", "throughput": 8701.33, "total_tokens": 109852880} +{"current_steps": 163005, "total_steps": 204665, "loss": 0.0, "lr": 2.4199675662029563e-07, "epoch": 3.982239269049422, "percentage": 79.64, "elapsed_time": "3:30:25", "remaining_time": "0:53:46", "throughput": 8701.36, "total_tokens": 109856336} +{"current_steps": 163010, "total_steps": 204665, "loss": 0.0, "lr": 2.4194113699559395e-07, "epoch": 3.9823614198812693, "percentage": 79.65, "elapsed_time": "3:30:25", "remaining_time": "0:53:46", "throughput": 8701.39, "total_tokens": 109859600} +{"current_steps": 163015, "total_steps": 204665, "loss": 0.0523, "lr": 2.4188552288370043e-07, "epoch": 3.9824835707131165, "percentage": 79.65, "elapsed_time": "3:30:25", "remaining_time": "0:53:45", "throughput": 8701.41, "total_tokens": 109862928} +{"current_steps": 163020, "total_steps": 204665, "loss": 0.0, "lr": 2.4182991428501906e-07, "epoch": 3.9826057215449637, "percentage": 79.65, "elapsed_time": "3:30:26", "remaining_time": "0:53:45", "throughput": 8701.43, "total_tokens": 109866256} +{"current_steps": 163025, "total_steps": 204665, "loss": 0.0, "lr": 2.4177431119995483e-07, "epoch": 3.982727872376811, "percentage": 79.65, "elapsed_time": "3:30:26", "remaining_time": "0:53:45", "throughput": 8701.46, "total_tokens": 109869584} +{"current_steps": 163030, "total_steps": 204665, "loss": 0.0, "lr": 2.417187136289115e-07, "epoch": 3.982850023208658, "percentage": 79.66, "elapsed_time": "3:30:26", "remaining_time": "0:53:44", "throughput": 8701.48, "total_tokens": 109872912} +{"current_steps": 163035, "total_steps": 204665, "loss": 0.1016, "lr": 2.4166312157229384e-07, "epoch": 3.9829721740405053, "percentage": 79.66, "elapsed_time": "3:30:27", "remaining_time": "0:53:44", "throughput": 8701.49, "total_tokens": 109875984} +{"current_steps": 163040, "total_steps": 204665, "loss": 0.0001, "lr": 2.416075350305056e-07, "epoch": 3.9830943248723525, "percentage": 79.66, "elapsed_time": "3:30:27", "remaining_time": "0:53:43", "throughput": 8701.51, "total_tokens": 109879184} +{"current_steps": 163045, "total_steps": 204665, "loss": 0.0002, "lr": 2.4155195400395144e-07, "epoch": 3.9832164757041997, "percentage": 79.66, "elapsed_time": "3:30:27", "remaining_time": "0:53:43", "throughput": 8701.54, "total_tokens": 109882704} +{"current_steps": 163050, "total_steps": 204665, "loss": 0.0, "lr": 2.414963784930357e-07, "epoch": 3.983338626536047, "percentage": 79.67, "elapsed_time": "3:30:28", "remaining_time": "0:53:43", "throughput": 8701.58, "total_tokens": 109886288} +{"current_steps": 163055, "total_steps": 204665, "loss": 0.0, "lr": 2.414408084981623e-07, "epoch": 3.9834607773678936, "percentage": 79.67, "elapsed_time": "3:30:28", "remaining_time": "0:53:42", "throughput": 8701.64, "total_tokens": 109890064} +{"current_steps": 163060, "total_steps": 204665, "loss": 0.0, "lr": 2.4138524401973515e-07, "epoch": 3.9835829281997412, "percentage": 79.67, "elapsed_time": "3:30:29", "remaining_time": "0:53:42", "throughput": 8701.65, "total_tokens": 109893264} +{"current_steps": 163065, "total_steps": 204665, "loss": 0.0, "lr": 2.4132968505815874e-07, "epoch": 3.983705079031588, "percentage": 79.67, "elapsed_time": "3:30:29", "remaining_time": "0:53:41", "throughput": 8701.65, "total_tokens": 109896208} +{"current_steps": 163070, "total_steps": 204665, "loss": 0.0001, "lr": 2.4127413161383693e-07, "epoch": 3.9838272298634356, "percentage": 79.68, "elapsed_time": "3:30:29", "remaining_time": "0:53:41", "throughput": 8701.67, "total_tokens": 109899472} +{"current_steps": 163075, "total_steps": 204665, "loss": 0.0, "lr": 2.412185836871735e-07, "epoch": 3.9839493806952824, "percentage": 79.68, "elapsed_time": "3:30:30", "remaining_time": "0:53:41", "throughput": 8701.67, "total_tokens": 109902480} +{"current_steps": 163080, "total_steps": 204665, "loss": 0.0, "lr": 2.4116304127857256e-07, "epoch": 3.9840715315271296, "percentage": 79.68, "elapsed_time": "3:30:30", "remaining_time": "0:53:40", "throughput": 8701.69, "total_tokens": 109905744} +{"current_steps": 163085, "total_steps": 204665, "loss": 0.0534, "lr": 2.411075043884384e-07, "epoch": 3.9841936823589768, "percentage": 79.68, "elapsed_time": "3:30:30", "remaining_time": "0:53:40", "throughput": 8701.73, "total_tokens": 109909264} +{"current_steps": 163090, "total_steps": 204665, "loss": 0.0001, "lr": 2.410519730171743e-07, "epoch": 3.984315833190824, "percentage": 79.69, "elapsed_time": "3:30:31", "remaining_time": "0:53:39", "throughput": 8701.77, "total_tokens": 109912784} +{"current_steps": 163095, "total_steps": 204665, "loss": 0.0001, "lr": 2.4099644716518464e-07, "epoch": 3.984437984022671, "percentage": 79.69, "elapsed_time": "3:30:31", "remaining_time": "0:53:39", "throughput": 8701.78, "total_tokens": 109915920} +{"current_steps": 163100, "total_steps": 204665, "loss": 0.0114, "lr": 2.409409268328727e-07, "epoch": 3.9845601348545183, "percentage": 79.69, "elapsed_time": "3:30:31", "remaining_time": "0:53:39", "throughput": 8701.84, "total_tokens": 109919760} +{"current_steps": 163105, "total_steps": 204665, "loss": 0.0, "lr": 2.4088541202064247e-07, "epoch": 3.9846822856863655, "percentage": 79.69, "elapsed_time": "3:30:32", "remaining_time": "0:53:38", "throughput": 8701.89, "total_tokens": 109923536} +{"current_steps": 163110, "total_steps": 204665, "loss": 0.0, "lr": 2.4082990272889804e-07, "epoch": 3.9848044365182127, "percentage": 79.7, "elapsed_time": "3:30:32", "remaining_time": "0:53:38", "throughput": 8701.93, "total_tokens": 109927184} +{"current_steps": 163115, "total_steps": 204665, "loss": 0.0002, "lr": 2.407743989580424e-07, "epoch": 3.98492658735006, "percentage": 79.7, "elapsed_time": "3:30:32", "remaining_time": "0:53:37", "throughput": 8701.95, "total_tokens": 109930384} +{"current_steps": 163120, "total_steps": 204665, "loss": 0.0, "lr": 2.407189007084799e-07, "epoch": 3.985048738181907, "percentage": 79.7, "elapsed_time": "3:30:33", "remaining_time": "0:53:37", "throughput": 8702.0, "total_tokens": 109934096} +{"current_steps": 163125, "total_steps": 204665, "loss": 0.0, "lr": 2.4066340798061344e-07, "epoch": 3.9851708890137543, "percentage": 79.7, "elapsed_time": "3:30:33", "remaining_time": "0:53:37", "throughput": 8702.04, "total_tokens": 109937744} +{"current_steps": 163130, "total_steps": 204665, "loss": 0.0, "lr": 2.4060792077484727e-07, "epoch": 3.9852930398456015, "percentage": 79.71, "elapsed_time": "3:30:33", "remaining_time": "0:53:36", "throughput": 8702.06, "total_tokens": 109940944} +{"current_steps": 163135, "total_steps": 204665, "loss": 0.0001, "lr": 2.405524390915842e-07, "epoch": 3.9854151906774486, "percentage": 79.71, "elapsed_time": "3:30:34", "remaining_time": "0:53:36", "throughput": 8702.08, "total_tokens": 109944336} +{"current_steps": 163140, "total_steps": 204665, "loss": 0.0004, "lr": 2.4049696293122803e-07, "epoch": 3.985537341509296, "percentage": 79.71, "elapsed_time": "3:30:34", "remaining_time": "0:53:35", "throughput": 8702.12, "total_tokens": 109947856} +{"current_steps": 163145, "total_steps": 204665, "loss": 0.0, "lr": 2.4044149229418255e-07, "epoch": 3.985659492341143, "percentage": 79.71, "elapsed_time": "3:30:34", "remaining_time": "0:53:35", "throughput": 8702.15, "total_tokens": 109951184} +{"current_steps": 163150, "total_steps": 204665, "loss": 0.0001, "lr": 2.4038602718085057e-07, "epoch": 3.9857816431729898, "percentage": 79.72, "elapsed_time": "3:30:35", "remaining_time": "0:53:35", "throughput": 8702.17, "total_tokens": 109954512} +{"current_steps": 163155, "total_steps": 204665, "loss": 0.0, "lr": 2.4033056759163597e-07, "epoch": 3.9859037940048374, "percentage": 79.72, "elapsed_time": "3:30:35", "remaining_time": "0:53:34", "throughput": 8702.21, "total_tokens": 109958096} +{"current_steps": 163160, "total_steps": 204665, "loss": 0.0465, "lr": 2.402751135269417e-07, "epoch": 3.986025944836684, "percentage": 79.72, "elapsed_time": "3:30:35", "remaining_time": "0:53:34", "throughput": 8702.23, "total_tokens": 109961296} +{"current_steps": 163165, "total_steps": 204665, "loss": 0.0, "lr": 2.4021966498717107e-07, "epoch": 3.9861480956685313, "percentage": 79.72, "elapsed_time": "3:30:36", "remaining_time": "0:53:33", "throughput": 8702.28, "total_tokens": 109965072} +{"current_steps": 163170, "total_steps": 204665, "loss": 0.0, "lr": 2.4016422197272757e-07, "epoch": 3.9862702465003785, "percentage": 79.73, "elapsed_time": "3:30:36", "remaining_time": "0:53:33", "throughput": 8702.28, "total_tokens": 109967952} +{"current_steps": 163175, "total_steps": 204665, "loss": 0.0, "lr": 2.4010878448401393e-07, "epoch": 3.9863923973322257, "percentage": 79.73, "elapsed_time": "3:30:37", "remaining_time": "0:53:33", "throughput": 8702.29, "total_tokens": 109971088} +{"current_steps": 163180, "total_steps": 204665, "loss": 0.0001, "lr": 2.4005335252143387e-07, "epoch": 3.986514548164073, "percentage": 79.73, "elapsed_time": "3:30:37", "remaining_time": "0:53:32", "throughput": 8702.33, "total_tokens": 109974672} +{"current_steps": 163185, "total_steps": 204665, "loss": 0.0, "lr": 2.3999792608539005e-07, "epoch": 3.98663669899592, "percentage": 79.73, "elapsed_time": "3:30:37", "remaining_time": "0:53:32", "throughput": 8702.35, "total_tokens": 109977808} +{"current_steps": 163190, "total_steps": 204665, "loss": 0.0, "lr": 2.3994250517628587e-07, "epoch": 3.9867588498277673, "percentage": 79.74, "elapsed_time": "3:30:38", "remaining_time": "0:53:31", "throughput": 8702.38, "total_tokens": 109981328} +{"current_steps": 163195, "total_steps": 204665, "loss": 0.0, "lr": 2.398870897945241e-07, "epoch": 3.9868810006596145, "percentage": 79.74, "elapsed_time": "3:30:38", "remaining_time": "0:53:31", "throughput": 8702.43, "total_tokens": 109985040} +{"current_steps": 163200, "total_steps": 204665, "loss": 0.0, "lr": 2.398316799405077e-07, "epoch": 3.9870031514914617, "percentage": 79.74, "elapsed_time": "3:30:38", "remaining_time": "0:53:31", "throughput": 8702.48, "total_tokens": 109988752} +{"current_steps": 163205, "total_steps": 204665, "loss": 0.0, "lr": 2.397762756146402e-07, "epoch": 3.987125302323309, "percentage": 79.74, "elapsed_time": "3:30:39", "remaining_time": "0:53:30", "throughput": 8702.53, "total_tokens": 109992464} +{"current_steps": 163210, "total_steps": 204665, "loss": 0.0, "lr": 2.3972087681732367e-07, "epoch": 3.987247453155156, "percentage": 79.74, "elapsed_time": "3:30:39", "remaining_time": "0:53:30", "throughput": 8702.55, "total_tokens": 109995792} +{"current_steps": 163215, "total_steps": 204665, "loss": 0.0001, "lr": 2.396654835489618e-07, "epoch": 3.9873696039870032, "percentage": 79.75, "elapsed_time": "3:30:39", "remaining_time": "0:53:30", "throughput": 8702.62, "total_tokens": 109999888} +{"current_steps": 163220, "total_steps": 204665, "loss": 0.0, "lr": 2.396100958099567e-07, "epoch": 3.9874917548188504, "percentage": 79.75, "elapsed_time": "3:30:40", "remaining_time": "0:53:29", "throughput": 8702.64, "total_tokens": 110003024} +{"current_steps": 163225, "total_steps": 204665, "loss": 0.0, "lr": 2.395547136007119e-07, "epoch": 3.9876139056506976, "percentage": 79.75, "elapsed_time": "3:30:40", "remaining_time": "0:53:29", "throughput": 8702.66, "total_tokens": 110006288} +{"current_steps": 163230, "total_steps": 204665, "loss": 0.0, "lr": 2.3949933692162936e-07, "epoch": 3.987736056482545, "percentage": 79.75, "elapsed_time": "3:30:40", "remaining_time": "0:53:28", "throughput": 8702.72, "total_tokens": 110010320} +{"current_steps": 163235, "total_steps": 204665, "loss": 0.0, "lr": 2.394439657731122e-07, "epoch": 3.9878582073143916, "percentage": 79.76, "elapsed_time": "3:30:41", "remaining_time": "0:53:28", "throughput": 8702.76, "total_tokens": 110013840} +{"current_steps": 163240, "total_steps": 204665, "loss": 0.0, "lr": 2.393886001555634e-07, "epoch": 3.987980358146239, "percentage": 79.76, "elapsed_time": "3:30:41", "remaining_time": "0:53:28", "throughput": 8702.79, "total_tokens": 110017232} +{"current_steps": 163245, "total_steps": 204665, "loss": 0.0, "lr": 2.3933324006938503e-07, "epoch": 3.988102508978086, "percentage": 79.76, "elapsed_time": "3:30:41", "remaining_time": "0:53:27", "throughput": 8702.82, "total_tokens": 110020624} +{"current_steps": 163250, "total_steps": 204665, "loss": 0.0, "lr": 2.3927788551498016e-07, "epoch": 3.9882246598099336, "percentage": 79.76, "elapsed_time": "3:30:42", "remaining_time": "0:53:27", "throughput": 8702.83, "total_tokens": 110023760} +{"current_steps": 163255, "total_steps": 204665, "loss": 0.0371, "lr": 2.392225364927508e-07, "epoch": 3.9883468106417803, "percentage": 79.77, "elapsed_time": "3:30:42", "remaining_time": "0:53:26", "throughput": 8702.84, "total_tokens": 110026832} +{"current_steps": 163260, "total_steps": 204665, "loss": 0.0371, "lr": 2.3916719300310017e-07, "epoch": 3.9884689614736275, "percentage": 79.77, "elapsed_time": "3:30:42", "remaining_time": "0:53:26", "throughput": 8702.86, "total_tokens": 110030224} +{"current_steps": 163265, "total_steps": 204665, "loss": 0.0679, "lr": 2.3911185504642993e-07, "epoch": 3.9885911123054747, "percentage": 79.77, "elapsed_time": "3:30:43", "remaining_time": "0:53:26", "throughput": 8702.89, "total_tokens": 110033616} +{"current_steps": 163270, "total_steps": 204665, "loss": 0.0, "lr": 2.3905652262314335e-07, "epoch": 3.988713263137322, "percentage": 79.77, "elapsed_time": "3:30:43", "remaining_time": "0:53:25", "throughput": 8702.92, "total_tokens": 110037072} +{"current_steps": 163275, "total_steps": 204665, "loss": 0.0, "lr": 2.390011957336424e-07, "epoch": 3.988835413969169, "percentage": 79.78, "elapsed_time": "3:30:44", "remaining_time": "0:53:25", "throughput": 8702.95, "total_tokens": 110040400} +{"current_steps": 163280, "total_steps": 204665, "loss": 0.0, "lr": 2.3894587437832903e-07, "epoch": 3.9889575648010163, "percentage": 79.78, "elapsed_time": "3:30:44", "remaining_time": "0:53:24", "throughput": 8702.94, "total_tokens": 110043280} +{"current_steps": 163285, "total_steps": 204665, "loss": 0.0, "lr": 2.388905585576063e-07, "epoch": 3.9890797156328635, "percentage": 79.78, "elapsed_time": "3:30:44", "remaining_time": "0:53:24", "throughput": 8702.99, "total_tokens": 110046928} +{"current_steps": 163290, "total_steps": 204665, "loss": 0.0439, "lr": 2.3883524827187593e-07, "epoch": 3.9892018664647106, "percentage": 79.78, "elapsed_time": "3:30:45", "remaining_time": "0:53:24", "throughput": 8703.0, "total_tokens": 110050128} +{"current_steps": 163295, "total_steps": 204665, "loss": 0.0626, "lr": 2.387799435215403e-07, "epoch": 3.989324017296558, "percentage": 79.79, "elapsed_time": "3:30:45", "remaining_time": "0:53:23", "throughput": 8703.04, "total_tokens": 110053712} +{"current_steps": 163300, "total_steps": 204665, "loss": 0.0667, "lr": 2.3872464430700203e-07, "epoch": 3.989446168128405, "percentage": 79.79, "elapsed_time": "3:30:45", "remaining_time": "0:53:23", "throughput": 8703.08, "total_tokens": 110057232} +{"current_steps": 163305, "total_steps": 204665, "loss": 0.0, "lr": 2.3866935062866254e-07, "epoch": 3.989568318960252, "percentage": 79.79, "elapsed_time": "3:30:46", "remaining_time": "0:53:22", "throughput": 8703.1, "total_tokens": 110060432} +{"current_steps": 163310, "total_steps": 204665, "loss": 0.0002, "lr": 2.3861406248692463e-07, "epoch": 3.9896904697920994, "percentage": 79.79, "elapsed_time": "3:30:46", "remaining_time": "0:53:22", "throughput": 8703.12, "total_tokens": 110063824} +{"current_steps": 163315, "total_steps": 204665, "loss": 0.0, "lr": 2.3855877988218974e-07, "epoch": 3.9898126206239466, "percentage": 79.8, "elapsed_time": "3:30:46", "remaining_time": "0:53:22", "throughput": 8703.15, "total_tokens": 110067216} +{"current_steps": 163320, "total_steps": 204665, "loss": 0.0, "lr": 2.3850350281486044e-07, "epoch": 3.9899347714557933, "percentage": 79.8, "elapsed_time": "3:30:47", "remaining_time": "0:53:21", "throughput": 8703.18, "total_tokens": 110070544} +{"current_steps": 163325, "total_steps": 204665, "loss": 0.0, "lr": 2.384482312853383e-07, "epoch": 3.990056922287641, "percentage": 79.8, "elapsed_time": "3:30:47", "remaining_time": "0:53:21", "throughput": 8703.17, "total_tokens": 110073360} +{"current_steps": 163330, "total_steps": 204665, "loss": 0.0565, "lr": 2.383929652940253e-07, "epoch": 3.9901790731194877, "percentage": 79.8, "elapsed_time": "3:30:47", "remaining_time": "0:53:20", "throughput": 8703.2, "total_tokens": 110076688} +{"current_steps": 163335, "total_steps": 204665, "loss": 0.0, "lr": 2.3833770484132398e-07, "epoch": 3.9903012239513354, "percentage": 79.81, "elapsed_time": "3:30:48", "remaining_time": "0:53:20", "throughput": 8703.24, "total_tokens": 110080272} +{"current_steps": 163340, "total_steps": 204665, "loss": 0.0, "lr": 2.3828244992763536e-07, "epoch": 3.990423374783182, "percentage": 79.81, "elapsed_time": "3:30:48", "remaining_time": "0:53:20", "throughput": 8703.26, "total_tokens": 110083664} +{"current_steps": 163345, "total_steps": 204665, "loss": 0.0001, "lr": 2.3822720055336188e-07, "epoch": 3.9905455256150293, "percentage": 79.81, "elapsed_time": "3:30:48", "remaining_time": "0:53:19", "throughput": 8703.28, "total_tokens": 110086928} +{"current_steps": 163350, "total_steps": 204665, "loss": 0.0, "lr": 2.381719567189049e-07, "epoch": 3.9906676764468765, "percentage": 79.81, "elapsed_time": "3:30:49", "remaining_time": "0:53:19", "throughput": 8703.3, "total_tokens": 110090192} +{"current_steps": 163355, "total_steps": 204665, "loss": 0.0001, "lr": 2.381167184246663e-07, "epoch": 3.9907898272787237, "percentage": 79.82, "elapsed_time": "3:30:49", "remaining_time": "0:53:18", "throughput": 8703.33, "total_tokens": 110093584} +{"current_steps": 163360, "total_steps": 204665, "loss": 0.0, "lr": 2.380614856710481e-07, "epoch": 3.990911978110571, "percentage": 79.82, "elapsed_time": "3:30:49", "remaining_time": "0:53:18", "throughput": 8703.34, "total_tokens": 110096656} +{"current_steps": 163365, "total_steps": 204665, "loss": 0.0002, "lr": 2.380062584584518e-07, "epoch": 3.991034128942418, "percentage": 79.82, "elapsed_time": "3:30:50", "remaining_time": "0:53:18", "throughput": 8703.37, "total_tokens": 110100112} +{"current_steps": 163370, "total_steps": 204665, "loss": 0.0002, "lr": 2.3795103678727857e-07, "epoch": 3.9911562797742652, "percentage": 79.82, "elapsed_time": "3:30:50", "remaining_time": "0:53:17", "throughput": 8703.4, "total_tokens": 110103440} +{"current_steps": 163375, "total_steps": 204665, "loss": 0.0607, "lr": 2.3789582065793068e-07, "epoch": 3.9912784306061124, "percentage": 79.83, "elapsed_time": "3:30:50", "remaining_time": "0:53:17", "throughput": 8703.41, "total_tokens": 110106640} +{"current_steps": 163380, "total_steps": 204665, "loss": 0.0003, "lr": 2.3784061007080937e-07, "epoch": 3.9914005814379596, "percentage": 79.83, "elapsed_time": "3:30:51", "remaining_time": "0:53:16", "throughput": 8703.44, "total_tokens": 110110096} +{"current_steps": 163385, "total_steps": 204665, "loss": 0.0, "lr": 2.3778540502631583e-07, "epoch": 3.991522732269807, "percentage": 79.83, "elapsed_time": "3:30:51", "remaining_time": "0:53:16", "throughput": 8703.49, "total_tokens": 110113744} +{"current_steps": 163390, "total_steps": 204665, "loss": 0.0888, "lr": 2.377302055248519e-07, "epoch": 3.991644883101654, "percentage": 79.83, "elapsed_time": "3:30:52", "remaining_time": "0:53:16", "throughput": 8703.55, "total_tokens": 110117648} +{"current_steps": 163395, "total_steps": 204665, "loss": 0.0, "lr": 2.3767501156681923e-07, "epoch": 3.991767033933501, "percentage": 79.84, "elapsed_time": "3:30:52", "remaining_time": "0:53:15", "throughput": 8703.57, "total_tokens": 110120912} +{"current_steps": 163400, "total_steps": 204665, "loss": 0.0372, "lr": 2.3761982315261853e-07, "epoch": 3.9918891847653484, "percentage": 79.84, "elapsed_time": "3:30:52", "remaining_time": "0:53:15", "throughput": 8703.59, "total_tokens": 110124176} +{"current_steps": 163405, "total_steps": 204665, "loss": 0.0, "lr": 2.375646402826519e-07, "epoch": 3.9920113355971956, "percentage": 79.84, "elapsed_time": "3:30:53", "remaining_time": "0:53:14", "throughput": 8703.59, "total_tokens": 110127184} +{"current_steps": 163410, "total_steps": 204665, "loss": 0.0001, "lr": 2.3750946295732e-07, "epoch": 3.9921334864290428, "percentage": 79.84, "elapsed_time": "3:30:53", "remaining_time": "0:53:14", "throughput": 8703.61, "total_tokens": 110130512} +{"current_steps": 163415, "total_steps": 204665, "loss": 0.039, "lr": 2.374542911770243e-07, "epoch": 3.9922556372608895, "percentage": 79.85, "elapsed_time": "3:30:53", "remaining_time": "0:53:14", "throughput": 8703.62, "total_tokens": 110133584} +{"current_steps": 163420, "total_steps": 204665, "loss": 0.0, "lr": 2.3739912494216641e-07, "epoch": 3.992377788092737, "percentage": 79.85, "elapsed_time": "3:30:54", "remaining_time": "0:53:13", "throughput": 8703.67, "total_tokens": 110137360} +{"current_steps": 163425, "total_steps": 204665, "loss": 0.0693, "lr": 2.3734396425314695e-07, "epoch": 3.992499938924584, "percentage": 79.85, "elapsed_time": "3:30:54", "remaining_time": "0:53:13", "throughput": 8703.69, "total_tokens": 110140560} +{"current_steps": 163430, "total_steps": 204665, "loss": 0.0001, "lr": 2.3728880911036752e-07, "epoch": 3.9926220897564315, "percentage": 79.85, "elapsed_time": "3:30:54", "remaining_time": "0:53:12", "throughput": 8703.7, "total_tokens": 110143632} +{"current_steps": 163435, "total_steps": 204665, "loss": 0.0, "lr": 2.372336595142288e-07, "epoch": 3.9927442405882783, "percentage": 79.85, "elapsed_time": "3:30:55", "remaining_time": "0:53:12", "throughput": 8703.72, "total_tokens": 110146896} +{"current_steps": 163440, "total_steps": 204665, "loss": 0.0, "lr": 2.3717851546513234e-07, "epoch": 3.9928663914201254, "percentage": 79.86, "elapsed_time": "3:30:55", "remaining_time": "0:53:12", "throughput": 8703.78, "total_tokens": 110150864} +{"current_steps": 163445, "total_steps": 204665, "loss": 0.0, "lr": 2.3712337696347863e-07, "epoch": 3.9929885422519726, "percentage": 79.86, "elapsed_time": "3:30:55", "remaining_time": "0:53:11", "throughput": 8703.82, "total_tokens": 110154384} +{"current_steps": 163450, "total_steps": 204665, "loss": 0.0, "lr": 2.3706824400966886e-07, "epoch": 3.99311069308382, "percentage": 79.86, "elapsed_time": "3:30:56", "remaining_time": "0:53:11", "throughput": 8703.86, "total_tokens": 110157904} +{"current_steps": 163455, "total_steps": 204665, "loss": 0.0446, "lr": 2.3701311660410438e-07, "epoch": 3.993232843915667, "percentage": 79.86, "elapsed_time": "3:30:56", "remaining_time": "0:53:10", "throughput": 8703.87, "total_tokens": 110161104} +{"current_steps": 163460, "total_steps": 204665, "loss": 0.0, "lr": 2.3695799474718537e-07, "epoch": 3.993354994747514, "percentage": 79.87, "elapsed_time": "3:30:56", "remaining_time": "0:53:10", "throughput": 8703.91, "total_tokens": 110164688} +{"current_steps": 163465, "total_steps": 204665, "loss": 0.0317, "lr": 2.3690287843931334e-07, "epoch": 3.9934771455793614, "percentage": 79.87, "elapsed_time": "3:30:57", "remaining_time": "0:53:10", "throughput": 8703.98, "total_tokens": 110168720} +{"current_steps": 163470, "total_steps": 204665, "loss": 0.0, "lr": 2.3684776768088887e-07, "epoch": 3.9935992964112086, "percentage": 79.87, "elapsed_time": "3:30:57", "remaining_time": "0:53:09", "throughput": 8704.0, "total_tokens": 110172048} +{"current_steps": 163475, "total_steps": 204665, "loss": 0.0001, "lr": 2.3679266247231244e-07, "epoch": 3.9937214472430558, "percentage": 79.87, "elapsed_time": "3:30:58", "remaining_time": "0:53:09", "throughput": 8704.15, "total_tokens": 110177424} +{"current_steps": 163480, "total_steps": 204665, "loss": 0.0, "lr": 2.3673756281398528e-07, "epoch": 3.993843598074903, "percentage": 79.88, "elapsed_time": "3:30:58", "remaining_time": "0:53:08", "throughput": 8704.19, "total_tokens": 110181008} +{"current_steps": 163485, "total_steps": 204665, "loss": 0.0003, "lr": 2.3668246870630759e-07, "epoch": 3.99396574890675, "percentage": 79.88, "elapsed_time": "3:30:58", "remaining_time": "0:53:08", "throughput": 8704.21, "total_tokens": 110184336} +{"current_steps": 163490, "total_steps": 204665, "loss": 0.0, "lr": 2.3662738014968054e-07, "epoch": 3.9940878997385973, "percentage": 79.88, "elapsed_time": "3:30:59", "remaining_time": "0:53:08", "throughput": 8704.25, "total_tokens": 110187856} +{"current_steps": 163495, "total_steps": 204665, "loss": 0.0, "lr": 2.3657229714450422e-07, "epoch": 3.9942100505704445, "percentage": 79.88, "elapsed_time": "3:30:59", "remaining_time": "0:53:07", "throughput": 8704.25, "total_tokens": 110190800} +{"current_steps": 163500, "total_steps": 204665, "loss": 0.0, "lr": 2.365172196911799e-07, "epoch": 3.9943322014022913, "percentage": 79.89, "elapsed_time": "3:30:59", "remaining_time": "0:53:07", "throughput": 8704.29, "total_tokens": 110194384} +{"current_steps": 163505, "total_steps": 204665, "loss": 0.1123, "lr": 2.3646214779010732e-07, "epoch": 3.994454352234139, "percentage": 79.89, "elapsed_time": "3:31:00", "remaining_time": "0:53:07", "throughput": 8704.32, "total_tokens": 110197840} +{"current_steps": 163510, "total_steps": 204665, "loss": 0.0, "lr": 2.364070814416873e-07, "epoch": 3.9945765030659857, "percentage": 79.89, "elapsed_time": "3:31:00", "remaining_time": "0:53:06", "throughput": 8704.39, "total_tokens": 110201872} +{"current_steps": 163515, "total_steps": 204665, "loss": 0.0336, "lr": 2.3635202064632075e-07, "epoch": 3.9946986538978333, "percentage": 79.89, "elapsed_time": "3:31:00", "remaining_time": "0:53:06", "throughput": 8704.39, "total_tokens": 110204752} +{"current_steps": 163520, "total_steps": 204665, "loss": 0.0, "lr": 2.3629696540440735e-07, "epoch": 3.99482080472968, "percentage": 79.9, "elapsed_time": "3:31:01", "remaining_time": "0:53:05", "throughput": 8704.42, "total_tokens": 110208208} +{"current_steps": 163525, "total_steps": 204665, "loss": 0.0, "lr": 2.3624191571634822e-07, "epoch": 3.9949429555615272, "percentage": 79.9, "elapsed_time": "3:31:01", "remaining_time": "0:53:05", "throughput": 8704.44, "total_tokens": 110211472} +{"current_steps": 163530, "total_steps": 204665, "loss": 0.0, "lr": 2.3618687158254292e-07, "epoch": 3.9950651063933744, "percentage": 79.9, "elapsed_time": "3:31:01", "remaining_time": "0:53:05", "throughput": 8704.45, "total_tokens": 110214672} +{"current_steps": 163535, "total_steps": 204665, "loss": 0.0, "lr": 2.3613183300339246e-07, "epoch": 3.9951872572252216, "percentage": 79.9, "elapsed_time": "3:31:02", "remaining_time": "0:53:04", "throughput": 8704.49, "total_tokens": 110218128} +{"current_steps": 163540, "total_steps": 204665, "loss": 0.0, "lr": 2.3607679997929652e-07, "epoch": 3.995309408057069, "percentage": 79.91, "elapsed_time": "3:31:02", "remaining_time": "0:53:04", "throughput": 8704.51, "total_tokens": 110221392} +{"current_steps": 163545, "total_steps": 204665, "loss": 0.0, "lr": 2.3602177251065548e-07, "epoch": 3.995431558888916, "percentage": 79.91, "elapsed_time": "3:31:02", "remaining_time": "0:53:03", "throughput": 8704.52, "total_tokens": 110224464} +{"current_steps": 163550, "total_steps": 204665, "loss": 0.0, "lr": 2.3596675059786998e-07, "epoch": 3.995553709720763, "percentage": 79.91, "elapsed_time": "3:31:03", "remaining_time": "0:53:03", "throughput": 8704.55, "total_tokens": 110227856} +{"current_steps": 163555, "total_steps": 204665, "loss": 0.0, "lr": 2.3591173424133937e-07, "epoch": 3.9956758605526104, "percentage": 79.91, "elapsed_time": "3:31:03", "remaining_time": "0:53:03", "throughput": 8704.57, "total_tokens": 110231248} +{"current_steps": 163560, "total_steps": 204665, "loss": 0.0645, "lr": 2.3585672344146457e-07, "epoch": 3.9957980113844576, "percentage": 79.92, "elapsed_time": "3:31:03", "remaining_time": "0:53:02", "throughput": 8704.6, "total_tokens": 110234640} +{"current_steps": 163565, "total_steps": 204665, "loss": 0.0, "lr": 2.358017181986448e-07, "epoch": 3.9959201622163047, "percentage": 79.92, "elapsed_time": "3:31:04", "remaining_time": "0:53:02", "throughput": 8704.61, "total_tokens": 110237712} +{"current_steps": 163570, "total_steps": 204665, "loss": 0.0, "lr": 2.3574671851328077e-07, "epoch": 3.996042313048152, "percentage": 79.92, "elapsed_time": "3:31:04", "remaining_time": "0:53:01", "throughput": 8704.61, "total_tokens": 110240720} +{"current_steps": 163575, "total_steps": 204665, "loss": 0.0, "lr": 2.3569172438577189e-07, "epoch": 3.996164463879999, "percentage": 79.92, "elapsed_time": "3:31:04", "remaining_time": "0:53:01", "throughput": 8704.66, "total_tokens": 110244368} +{"current_steps": 163580, "total_steps": 204665, "loss": 0.0005, "lr": 2.3563673581651866e-07, "epoch": 3.9962866147118463, "percentage": 79.93, "elapsed_time": "3:31:05", "remaining_time": "0:53:01", "throughput": 8704.66, "total_tokens": 110247376} +{"current_steps": 163585, "total_steps": 204665, "loss": 0.0, "lr": 2.3558175280592075e-07, "epoch": 3.9964087655436935, "percentage": 79.93, "elapsed_time": "3:31:05", "remaining_time": "0:53:00", "throughput": 8704.68, "total_tokens": 110250576} +{"current_steps": 163590, "total_steps": 204665, "loss": 0.0, "lr": 2.355267753543776e-07, "epoch": 3.9965309163755407, "percentage": 79.93, "elapsed_time": "3:31:06", "remaining_time": "0:53:00", "throughput": 8704.69, "total_tokens": 110253776} +{"current_steps": 163595, "total_steps": 204665, "loss": 0.0, "lr": 2.3547180346228957e-07, "epoch": 3.9966530672073874, "percentage": 79.93, "elapsed_time": "3:31:06", "remaining_time": "0:52:59", "throughput": 8704.73, "total_tokens": 110257296} +{"current_steps": 163600, "total_steps": 204665, "loss": 0.0, "lr": 2.35416837130056e-07, "epoch": 3.996775218039235, "percentage": 79.94, "elapsed_time": "3:31:06", "remaining_time": "0:52:59", "throughput": 8704.76, "total_tokens": 110260688} +{"current_steps": 163605, "total_steps": 204665, "loss": 0.0, "lr": 2.353618763580768e-07, "epoch": 3.996897368871082, "percentage": 79.94, "elapsed_time": "3:31:07", "remaining_time": "0:52:59", "throughput": 8704.82, "total_tokens": 110264656} +{"current_steps": 163610, "total_steps": 204665, "loss": 0.0, "lr": 2.353069211467521e-07, "epoch": 3.997019519702929, "percentage": 79.94, "elapsed_time": "3:31:07", "remaining_time": "0:52:58", "throughput": 8704.84, "total_tokens": 110267920} +{"current_steps": 163615, "total_steps": 204665, "loss": 0.0, "lr": 2.352519714964808e-07, "epoch": 3.997141670534776, "percentage": 79.94, "elapsed_time": "3:31:07", "remaining_time": "0:52:58", "throughput": 8704.87, "total_tokens": 110271312} +{"current_steps": 163620, "total_steps": 204665, "loss": 0.0002, "lr": 2.3519702740766312e-07, "epoch": 3.9972638213666234, "percentage": 79.95, "elapsed_time": "3:31:08", "remaining_time": "0:52:57", "throughput": 8704.89, "total_tokens": 110274576} +{"current_steps": 163625, "total_steps": 204665, "loss": 0.0, "lr": 2.3514208888069798e-07, "epoch": 3.9973859721984706, "percentage": 79.95, "elapsed_time": "3:31:08", "remaining_time": "0:52:57", "throughput": 8704.88, "total_tokens": 110277392} +{"current_steps": 163630, "total_steps": 204665, "loss": 0.0, "lr": 2.3508715591598572e-07, "epoch": 3.9975081230303178, "percentage": 79.95, "elapsed_time": "3:31:08", "remaining_time": "0:52:57", "throughput": 8704.9, "total_tokens": 110280592} +{"current_steps": 163635, "total_steps": 204665, "loss": 0.0, "lr": 2.3503222851392513e-07, "epoch": 3.997630273862165, "percentage": 79.95, "elapsed_time": "3:31:09", "remaining_time": "0:52:56", "throughput": 8704.93, "total_tokens": 110283984} +{"current_steps": 163640, "total_steps": 204665, "loss": 0.0663, "lr": 2.3497730667491577e-07, "epoch": 3.997752424694012, "percentage": 79.96, "elapsed_time": "3:31:09", "remaining_time": "0:52:56", "throughput": 8704.95, "total_tokens": 110287376} +{"current_steps": 163645, "total_steps": 204665, "loss": 0.0002, "lr": 2.3492239039935756e-07, "epoch": 3.9978745755258593, "percentage": 79.96, "elapsed_time": "3:31:09", "remaining_time": "0:52:55", "throughput": 8704.99, "total_tokens": 110290960} +{"current_steps": 163650, "total_steps": 204665, "loss": 0.0, "lr": 2.348674796876493e-07, "epoch": 3.9979967263577065, "percentage": 79.96, "elapsed_time": "3:31:10", "remaining_time": "0:52:55", "throughput": 8705.03, "total_tokens": 110294416} +{"current_steps": 163655, "total_steps": 204665, "loss": 0.0, "lr": 2.3481257454019078e-07, "epoch": 3.9981188771895537, "percentage": 79.96, "elapsed_time": "3:31:10", "remaining_time": "0:52:55", "throughput": 8705.1, "total_tokens": 110298512} +{"current_steps": 163660, "total_steps": 204665, "loss": 0.0535, "lr": 2.3475767495738075e-07, "epoch": 3.998241028021401, "percentage": 79.96, "elapsed_time": "3:31:10", "remaining_time": "0:52:54", "throughput": 8705.14, "total_tokens": 110302160} +{"current_steps": 163665, "total_steps": 204665, "loss": 0.0, "lr": 2.347027809396186e-07, "epoch": 3.998363178853248, "percentage": 79.97, "elapsed_time": "3:31:11", "remaining_time": "0:52:54", "throughput": 8705.18, "total_tokens": 110305616} +{"current_steps": 163670, "total_steps": 204665, "loss": 0.0135, "lr": 2.346478924873041e-07, "epoch": 3.9984853296850953, "percentage": 79.97, "elapsed_time": "3:31:11", "remaining_time": "0:52:53", "throughput": 8705.19, "total_tokens": 110308752} +{"current_steps": 163675, "total_steps": 204665, "loss": 0.0, "lr": 2.3459300960083593e-07, "epoch": 3.9986074805169425, "percentage": 79.97, "elapsed_time": "3:31:11", "remaining_time": "0:52:53", "throughput": 8705.21, "total_tokens": 110312080} +{"current_steps": 163680, "total_steps": 204665, "loss": 0.0975, "lr": 2.3453813228061302e-07, "epoch": 3.9987296313487892, "percentage": 79.97, "elapsed_time": "3:31:12", "remaining_time": "0:52:53", "throughput": 8705.23, "total_tokens": 110315280} +{"current_steps": 163685, "total_steps": 204665, "loss": 0.0191, "lr": 2.3448326052703492e-07, "epoch": 3.998851782180637, "percentage": 79.98, "elapsed_time": "3:31:12", "remaining_time": "0:52:52", "throughput": 8705.25, "total_tokens": 110318544} +{"current_steps": 163690, "total_steps": 204665, "loss": 0.0003, "lr": 2.3442839434050043e-07, "epoch": 3.9989739330124836, "percentage": 79.98, "elapsed_time": "3:31:13", "remaining_time": "0:52:52", "throughput": 8705.3, "total_tokens": 110322256} +{"current_steps": 163695, "total_steps": 204665, "loss": 0.0, "lr": 2.3437353372140833e-07, "epoch": 3.9990960838443312, "percentage": 79.98, "elapsed_time": "3:31:13", "remaining_time": "0:52:51", "throughput": 8705.32, "total_tokens": 110325520} +{"current_steps": 163700, "total_steps": 204665, "loss": 0.0, "lr": 2.3431867867015788e-07, "epoch": 3.999218234676178, "percentage": 79.98, "elapsed_time": "3:31:13", "remaining_time": "0:52:51", "throughput": 8705.35, "total_tokens": 110328976} +{"current_steps": 163705, "total_steps": 204665, "loss": 0.0001, "lr": 2.3426382918714815e-07, "epoch": 3.999340385508025, "percentage": 79.99, "elapsed_time": "3:31:14", "remaining_time": "0:52:51", "throughput": 8705.36, "total_tokens": 110332048} +{"current_steps": 163710, "total_steps": 204665, "loss": 0.0, "lr": 2.3420898527277754e-07, "epoch": 3.9994625363398724, "percentage": 79.99, "elapsed_time": "3:31:14", "remaining_time": "0:52:50", "throughput": 8705.39, "total_tokens": 110335568} +{"current_steps": 163715, "total_steps": 204665, "loss": 0.0, "lr": 2.341541469274454e-07, "epoch": 3.9995846871717196, "percentage": 79.99, "elapsed_time": "3:31:14", "remaining_time": "0:52:50", "throughput": 8705.42, "total_tokens": 110339024} +{"current_steps": 163720, "total_steps": 204665, "loss": 0.0002, "lr": 2.3409931415155003e-07, "epoch": 3.9997068380035667, "percentage": 79.99, "elapsed_time": "3:31:15", "remaining_time": "0:52:49", "throughput": 8705.43, "total_tokens": 110342032} +{"current_steps": 163725, "total_steps": 204665, "loss": 0.0001, "lr": 2.340444869454905e-07, "epoch": 3.999828988835414, "percentage": 80.0, "elapsed_time": "3:31:15", "remaining_time": "0:52:49", "throughput": 8705.47, "total_tokens": 110345680} +{"current_steps": 163730, "total_steps": 204665, "loss": 0.0, "lr": 2.339896653096658e-07, "epoch": 3.999951139667261, "percentage": 80.0, "elapsed_time": "3:31:15", "remaining_time": "0:52:49", "throughput": 8705.48, "total_tokens": 110348688} +{"current_steps": 163735, "total_steps": 204665, "loss": 0.0, "lr": 2.3393484924447392e-07, "epoch": 4.000073290499108, "percentage": 80.0, "elapsed_time": "3:31:16", "remaining_time": "0:52:48", "throughput": 8705.35, "total_tokens": 110351272} +{"current_steps": 163740, "total_steps": 204665, "loss": 0.0, "lr": 2.3388003875031415e-07, "epoch": 4.0001954413309555, "percentage": 80.0, "elapsed_time": "3:31:16", "remaining_time": "0:52:48", "throughput": 8705.38, "total_tokens": 110354664} +{"current_steps": 163744, "total_steps": 204665, "eval_loss": 0.247334286570549, "epoch": 4.000293161996433, "percentage": 80.01, "elapsed_time": "3:32:04", "remaining_time": "0:53:00", "throughput": 8672.63, "total_tokens": 110357352} +{"current_steps": 163745, "total_steps": 204665, "loss": 0.0, "lr": 2.3382523382758456e-07, "epoch": 4.000317592162802, "percentage": 80.01, "elapsed_time": "3:32:36", "remaining_time": "0:53:07", "throughput": 8651.07, "total_tokens": 110357928} +{"current_steps": 163750, "total_steps": 204665, "loss": 0.0, "lr": 2.337704344766842e-07, "epoch": 4.00043974299465, "percentage": 80.01, "elapsed_time": "3:32:36", "remaining_time": "0:53:07", "throughput": 8651.1, "total_tokens": 110361320} +{"current_steps": 163755, "total_steps": 204665, "loss": 0.0, "lr": 2.337156406980111e-07, "epoch": 4.000561893826497, "percentage": 80.01, "elapsed_time": "3:32:37", "remaining_time": "0:53:07", "throughput": 8651.12, "total_tokens": 110364584} +{"current_steps": 163760, "total_steps": 204665, "loss": 0.0001, "lr": 2.3366085249196387e-07, "epoch": 4.000684044658344, "percentage": 80.01, "elapsed_time": "3:32:37", "remaining_time": "0:53:06", "throughput": 8651.16, "total_tokens": 110368168} +{"current_steps": 163765, "total_steps": 204665, "loss": 0.0, "lr": 2.3360606985894138e-07, "epoch": 4.000806195490191, "percentage": 80.02, "elapsed_time": "3:32:37", "remaining_time": "0:53:06", "throughput": 8651.17, "total_tokens": 110371176} +{"current_steps": 163770, "total_steps": 204665, "loss": 0.0, "lr": 2.335512927993414e-07, "epoch": 4.000928346322039, "percentage": 80.02, "elapsed_time": "3:32:38", "remaining_time": "0:53:05", "throughput": 8651.18, "total_tokens": 110374312} +{"current_steps": 163775, "total_steps": 204665, "loss": 0.0, "lr": 2.3349652131356278e-07, "epoch": 4.001050497153885, "percentage": 80.02, "elapsed_time": "3:32:38", "remaining_time": "0:53:05", "throughput": 8651.2, "total_tokens": 110377512} +{"current_steps": 163780, "total_steps": 204665, "loss": 0.0419, "lr": 2.334417554020035e-07, "epoch": 4.001172647985733, "percentage": 80.02, "elapsed_time": "3:32:38", "remaining_time": "0:53:05", "throughput": 8651.24, "total_tokens": 110381096} +{"current_steps": 163785, "total_steps": 204665, "loss": 0.0, "lr": 2.333869950650621e-07, "epoch": 4.00129479881758, "percentage": 80.03, "elapsed_time": "3:32:39", "remaining_time": "0:53:04", "throughput": 8651.26, "total_tokens": 110384296} +{"current_steps": 163790, "total_steps": 204665, "loss": 0.0, "lr": 2.333322403031367e-07, "epoch": 4.001416949649427, "percentage": 80.03, "elapsed_time": "3:32:39", "remaining_time": "0:53:04", "throughput": 8651.28, "total_tokens": 110387496} +{"current_steps": 163795, "total_steps": 204665, "loss": 0.0, "lr": 2.332774911166252e-07, "epoch": 4.001539100481274, "percentage": 80.03, "elapsed_time": "3:32:40", "remaining_time": "0:53:03", "throughput": 8651.33, "total_tokens": 110391208} +{"current_steps": 163800, "total_steps": 204665, "loss": 0.0, "lr": 2.332227475059263e-07, "epoch": 4.001661251313122, "percentage": 80.03, "elapsed_time": "3:32:40", "remaining_time": "0:53:03", "throughput": 8651.35, "total_tokens": 110394536} +{"current_steps": 163805, "total_steps": 204665, "loss": 0.0, "lr": 2.3316800947143744e-07, "epoch": 4.0017834021449685, "percentage": 80.04, "elapsed_time": "3:32:40", "remaining_time": "0:53:03", "throughput": 8651.37, "total_tokens": 110397672} +{"current_steps": 163810, "total_steps": 204665, "loss": 0.0, "lr": 2.3311327701355743e-07, "epoch": 4.001905552976816, "percentage": 80.04, "elapsed_time": "3:32:41", "remaining_time": "0:53:02", "throughput": 8651.41, "total_tokens": 110401320} +{"current_steps": 163815, "total_steps": 204665, "loss": 0.0, "lr": 2.3305855013268372e-07, "epoch": 4.002027703808663, "percentage": 80.04, "elapsed_time": "3:32:41", "remaining_time": "0:53:02", "throughput": 8651.45, "total_tokens": 110404776} +{"current_steps": 163820, "total_steps": 204665, "loss": 0.0002, "lr": 2.3300382882921444e-07, "epoch": 4.0021498546405105, "percentage": 80.04, "elapsed_time": "3:32:41", "remaining_time": "0:53:01", "throughput": 8651.49, "total_tokens": 110408296} +{"current_steps": 163825, "total_steps": 204665, "loss": 0.0, "lr": 2.329491131035478e-07, "epoch": 4.002272005472357, "percentage": 80.05, "elapsed_time": "3:32:42", "remaining_time": "0:53:01", "throughput": 8651.49, "total_tokens": 110411240} +{"current_steps": 163830, "total_steps": 204665, "loss": 0.0, "lr": 2.3289440295608142e-07, "epoch": 4.002394156304204, "percentage": 80.05, "elapsed_time": "3:32:42", "remaining_time": "0:53:01", "throughput": 8651.53, "total_tokens": 110414888} +{"current_steps": 163835, "total_steps": 204665, "loss": 0.0, "lr": 2.328396983872134e-07, "epoch": 4.002516307136052, "percentage": 80.05, "elapsed_time": "3:32:42", "remaining_time": "0:53:00", "throughput": 8651.56, "total_tokens": 110418216} +{"current_steps": 163840, "total_steps": 204665, "loss": 0.0, "lr": 2.327849993973413e-07, "epoch": 4.002638457967898, "percentage": 80.05, "elapsed_time": "3:32:43", "remaining_time": "0:53:00", "throughput": 8651.58, "total_tokens": 110421544} +{"current_steps": 163845, "total_steps": 204665, "loss": 0.0, "lr": 2.3273030598686317e-07, "epoch": 4.002760608799746, "percentage": 80.06, "elapsed_time": "3:32:43", "remaining_time": "0:52:59", "throughput": 8651.6, "total_tokens": 110424744} +{"current_steps": 163850, "total_steps": 204665, "loss": 0.0, "lr": 2.3267561815617641e-07, "epoch": 4.002882759631593, "percentage": 80.06, "elapsed_time": "3:32:43", "remaining_time": "0:52:59", "throughput": 8651.66, "total_tokens": 110428648} +{"current_steps": 163855, "total_steps": 204665, "loss": 0.0, "lr": 2.32620935905679e-07, "epoch": 4.00300491046344, "percentage": 80.06, "elapsed_time": "3:32:44", "remaining_time": "0:52:59", "throughput": 8651.71, "total_tokens": 110432424} +{"current_steps": 163860, "total_steps": 204665, "loss": 0.0, "lr": 2.3256625923576877e-07, "epoch": 4.003127061295287, "percentage": 80.06, "elapsed_time": "3:32:44", "remaining_time": "0:52:58", "throughput": 8651.74, "total_tokens": 110435752} +{"current_steps": 163865, "total_steps": 204665, "loss": 0.0, "lr": 2.325115881468428e-07, "epoch": 4.003249212127135, "percentage": 80.06, "elapsed_time": "3:32:44", "remaining_time": "0:52:58", "throughput": 8651.76, "total_tokens": 110438952} +{"current_steps": 163870, "total_steps": 204665, "loss": 0.0, "lr": 2.324569226392994e-07, "epoch": 4.0033713629589815, "percentage": 80.07, "elapsed_time": "3:32:45", "remaining_time": "0:52:57", "throughput": 8651.77, "total_tokens": 110442024} +{"current_steps": 163875, "total_steps": 204665, "loss": 0.0, "lr": 2.3240226271353525e-07, "epoch": 4.003493513790829, "percentage": 80.07, "elapsed_time": "3:32:45", "remaining_time": "0:52:57", "throughput": 8651.83, "total_tokens": 110445928} +{"current_steps": 163880, "total_steps": 204665, "loss": 0.0, "lr": 2.323476083699487e-07, "epoch": 4.003615664622676, "percentage": 80.07, "elapsed_time": "3:32:45", "remaining_time": "0:52:57", "throughput": 8651.84, "total_tokens": 110448936} +{"current_steps": 163885, "total_steps": 204665, "loss": 0.0, "lr": 2.3229295960893647e-07, "epoch": 4.003737815454524, "percentage": 80.07, "elapsed_time": "3:32:46", "remaining_time": "0:52:56", "throughput": 8651.89, "total_tokens": 110452648} +{"current_steps": 163890, "total_steps": 204665, "loss": 0.0, "lr": 2.3223831643089664e-07, "epoch": 4.00385996628637, "percentage": 80.08, "elapsed_time": "3:32:46", "remaining_time": "0:52:56", "throughput": 8651.92, "total_tokens": 110456040} +{"current_steps": 163895, "total_steps": 204665, "loss": 0.0, "lr": 2.3218367883622635e-07, "epoch": 4.003982117118218, "percentage": 80.08, "elapsed_time": "3:32:46", "remaining_time": "0:52:55", "throughput": 8651.93, "total_tokens": 110459176} +{"current_steps": 163900, "total_steps": 204665, "loss": 0.0, "lr": 2.3212904682532242e-07, "epoch": 4.004104267950065, "percentage": 80.08, "elapsed_time": "3:32:47", "remaining_time": "0:52:55", "throughput": 8651.97, "total_tokens": 110462760} +{"current_steps": 163905, "total_steps": 204665, "loss": 0.0001, "lr": 2.3207442039858306e-07, "epoch": 4.004226418781912, "percentage": 80.08, "elapsed_time": "3:32:47", "remaining_time": "0:52:55", "throughput": 8652.0, "total_tokens": 110466152} +{"current_steps": 163910, "total_steps": 204665, "loss": 0.0, "lr": 2.320197995564046e-07, "epoch": 4.004348569613759, "percentage": 80.09, "elapsed_time": "3:32:48", "remaining_time": "0:52:54", "throughput": 8652.03, "total_tokens": 110469544} +{"current_steps": 163915, "total_steps": 204665, "loss": 0.0534, "lr": 2.3196518429918488e-07, "epoch": 4.004470720445606, "percentage": 80.09, "elapsed_time": "3:32:48", "remaining_time": "0:52:54", "throughput": 8652.08, "total_tokens": 110473320} +{"current_steps": 163920, "total_steps": 204665, "loss": 0.0, "lr": 2.319105746273211e-07, "epoch": 4.0045928712774534, "percentage": 80.09, "elapsed_time": "3:32:48", "remaining_time": "0:52:53", "throughput": 8652.1, "total_tokens": 110476520} +{"current_steps": 163925, "total_steps": 204665, "loss": 0.0, "lr": 2.3185597054120999e-07, "epoch": 4.0047150221093, "percentage": 80.09, "elapsed_time": "3:32:49", "remaining_time": "0:52:53", "throughput": 8652.13, "total_tokens": 110479976} +{"current_steps": 163930, "total_steps": 204665, "loss": 0.0, "lr": 2.3180137204124905e-07, "epoch": 4.004837172941148, "percentage": 80.1, "elapsed_time": "3:32:49", "remaining_time": "0:52:53", "throughput": 8652.16, "total_tokens": 110483496} +{"current_steps": 163935, "total_steps": 204665, "loss": 0.0, "lr": 2.317467791278349e-07, "epoch": 4.004959323772995, "percentage": 80.1, "elapsed_time": "3:32:49", "remaining_time": "0:52:52", "throughput": 8652.18, "total_tokens": 110486632} +{"current_steps": 163940, "total_steps": 204665, "loss": 0.0, "lr": 2.3169219180136513e-07, "epoch": 4.005081474604842, "percentage": 80.1, "elapsed_time": "3:32:50", "remaining_time": "0:52:52", "throughput": 8652.21, "total_tokens": 110490152} +{"current_steps": 163945, "total_steps": 204665, "loss": 0.0, "lr": 2.3163761006223616e-07, "epoch": 4.005203625436689, "percentage": 80.1, "elapsed_time": "3:32:50", "remaining_time": "0:52:51", "throughput": 8652.28, "total_tokens": 110494120} +{"current_steps": 163950, "total_steps": 204665, "loss": 0.0, "lr": 2.315830339108451e-07, "epoch": 4.005325776268537, "percentage": 80.11, "elapsed_time": "3:32:50", "remaining_time": "0:52:51", "throughput": 8652.33, "total_tokens": 110497896} +{"current_steps": 163955, "total_steps": 204665, "loss": 0.0, "lr": 2.3152846334758912e-07, "epoch": 4.005447927100383, "percentage": 80.11, "elapsed_time": "3:32:51", "remaining_time": "0:52:51", "throughput": 8652.35, "total_tokens": 110501032} +{"current_steps": 163960, "total_steps": 204665, "loss": 0.0, "lr": 2.314738983728647e-07, "epoch": 4.005570077932231, "percentage": 80.11, "elapsed_time": "3:32:51", "remaining_time": "0:52:50", "throughput": 8652.39, "total_tokens": 110504616} +{"current_steps": 163965, "total_steps": 204665, "loss": 0.0, "lr": 2.31419338987069e-07, "epoch": 4.005692228764078, "percentage": 80.11, "elapsed_time": "3:32:51", "remaining_time": "0:52:50", "throughput": 8652.43, "total_tokens": 110508200} +{"current_steps": 163970, "total_steps": 204665, "loss": 0.0, "lr": 2.3136478519059832e-07, "epoch": 4.005814379595925, "percentage": 80.12, "elapsed_time": "3:32:52", "remaining_time": "0:52:49", "throughput": 8652.46, "total_tokens": 110511720} +{"current_steps": 163975, "total_steps": 204665, "loss": 0.0002, "lr": 2.3131023698384966e-07, "epoch": 4.005936530427772, "percentage": 80.12, "elapsed_time": "3:32:52", "remaining_time": "0:52:49", "throughput": 8652.55, "total_tokens": 110516008} +{"current_steps": 163980, "total_steps": 204665, "loss": 0.0, "lr": 2.3125569436721993e-07, "epoch": 4.00605868125962, "percentage": 80.12, "elapsed_time": "3:32:52", "remaining_time": "0:52:49", "throughput": 8652.55, "total_tokens": 110519016} +{"current_steps": 163985, "total_steps": 204665, "loss": 0.0, "lr": 2.3120115734110556e-07, "epoch": 4.0061808320914665, "percentage": 80.12, "elapsed_time": "3:32:53", "remaining_time": "0:52:48", "throughput": 8652.59, "total_tokens": 110522408} +{"current_steps": 163990, "total_steps": 204665, "loss": 0.0, "lr": 2.3114662590590294e-07, "epoch": 4.006302982923314, "percentage": 80.13, "elapsed_time": "3:32:53", "remaining_time": "0:52:48", "throughput": 8652.63, "total_tokens": 110526056} +{"current_steps": 163995, "total_steps": 204665, "loss": 0.0202, "lr": 2.310921000620092e-07, "epoch": 4.006425133755161, "percentage": 80.13, "elapsed_time": "3:32:54", "remaining_time": "0:52:47", "throughput": 8652.63, "total_tokens": 110528936} +{"current_steps": 164000, "total_steps": 204665, "loss": 0.0, "lr": 2.3103757980982042e-07, "epoch": 4.006547284587008, "percentage": 80.13, "elapsed_time": "3:32:54", "remaining_time": "0:52:47", "throughput": 8652.66, "total_tokens": 110532264} +{"current_steps": 164005, "total_steps": 204665, "loss": 0.0, "lr": 2.3098306514973287e-07, "epoch": 4.006669435418855, "percentage": 80.13, "elapsed_time": "3:32:54", "remaining_time": "0:52:47", "throughput": 8652.67, "total_tokens": 110535400} +{"current_steps": 164010, "total_steps": 204665, "loss": 0.0, "lr": 2.3092855608214345e-07, "epoch": 4.006791586250702, "percentage": 80.14, "elapsed_time": "3:32:55", "remaining_time": "0:52:46", "throughput": 8652.69, "total_tokens": 110538600} +{"current_steps": 164015, "total_steps": 204665, "loss": 0.0467, "lr": 2.3087405260744852e-07, "epoch": 4.00691373708255, "percentage": 80.14, "elapsed_time": "3:32:55", "remaining_time": "0:52:46", "throughput": 8652.7, "total_tokens": 110541928} +{"current_steps": 164020, "total_steps": 204665, "loss": 0.0, "lr": 2.3081955472604419e-07, "epoch": 4.007035887914396, "percentage": 80.14, "elapsed_time": "3:32:55", "remaining_time": "0:52:45", "throughput": 8652.73, "total_tokens": 110545192} +{"current_steps": 164025, "total_steps": 204665, "loss": 0.0, "lr": 2.3076506243832727e-07, "epoch": 4.007158038746244, "percentage": 80.14, "elapsed_time": "3:32:56", "remaining_time": "0:52:45", "throughput": 8652.75, "total_tokens": 110548456} +{"current_steps": 164030, "total_steps": 204665, "loss": 0.0, "lr": 2.3071057574469332e-07, "epoch": 4.007280189578091, "percentage": 80.15, "elapsed_time": "3:32:56", "remaining_time": "0:52:45", "throughput": 8652.76, "total_tokens": 110551592} +{"current_steps": 164035, "total_steps": 204665, "loss": 0.0, "lr": 2.3065609464553937e-07, "epoch": 4.007402340409938, "percentage": 80.15, "elapsed_time": "3:32:56", "remaining_time": "0:52:44", "throughput": 8652.77, "total_tokens": 110554664} +{"current_steps": 164040, "total_steps": 204665, "loss": 0.0, "lr": 2.3060161914126086e-07, "epoch": 4.007524491241785, "percentage": 80.15, "elapsed_time": "3:32:57", "remaining_time": "0:52:44", "throughput": 8652.8, "total_tokens": 110558056} +{"current_steps": 164045, "total_steps": 204665, "loss": 0.0, "lr": 2.305471492322544e-07, "epoch": 4.007646642073633, "percentage": 80.15, "elapsed_time": "3:32:57", "remaining_time": "0:52:43", "throughput": 8652.81, "total_tokens": 110561192} +{"current_steps": 164050, "total_steps": 204665, "loss": 0.0, "lr": 2.3049268491891615e-07, "epoch": 4.0077687929054795, "percentage": 80.16, "elapsed_time": "3:32:57", "remaining_time": "0:52:43", "throughput": 8652.88, "total_tokens": 110565160} +{"current_steps": 164055, "total_steps": 204665, "loss": 0.0003, "lr": 2.3043822620164187e-07, "epoch": 4.007890943737327, "percentage": 80.16, "elapsed_time": "3:32:58", "remaining_time": "0:52:43", "throughput": 8652.91, "total_tokens": 110568616} +{"current_steps": 164060, "total_steps": 204665, "loss": 0.0, "lr": 2.3038377308082812e-07, "epoch": 4.008013094569174, "percentage": 80.16, "elapsed_time": "3:32:58", "remaining_time": "0:52:42", "throughput": 8652.98, "total_tokens": 110572712} +{"current_steps": 164065, "total_steps": 204665, "loss": 0.0003, "lr": 2.3032932555687033e-07, "epoch": 4.0081352454010215, "percentage": 80.16, "elapsed_time": "3:32:58", "remaining_time": "0:52:42", "throughput": 8653.03, "total_tokens": 110576552} +{"current_steps": 164070, "total_steps": 204665, "loss": 0.0, "lr": 2.3027488363016458e-07, "epoch": 4.008257396232868, "percentage": 80.17, "elapsed_time": "3:32:59", "remaining_time": "0:52:41", "throughput": 8653.05, "total_tokens": 110579752} +{"current_steps": 164075, "total_steps": 204665, "loss": 0.0, "lr": 2.3022044730110723e-07, "epoch": 4.008379547064716, "percentage": 80.17, "elapsed_time": "3:32:59", "remaining_time": "0:52:41", "throughput": 8653.07, "total_tokens": 110583144} +{"current_steps": 164080, "total_steps": 204665, "loss": 0.0002, "lr": 2.301660165700936e-07, "epoch": 4.008501697896563, "percentage": 80.17, "elapsed_time": "3:32:59", "remaining_time": "0:52:41", "throughput": 8653.09, "total_tokens": 110586344} +{"current_steps": 164085, "total_steps": 204665, "loss": 0.0, "lr": 2.3011159143752e-07, "epoch": 4.00862384872841, "percentage": 80.17, "elapsed_time": "3:33:00", "remaining_time": "0:52:40", "throughput": 8653.14, "total_tokens": 110590120} +{"current_steps": 164090, "total_steps": 204665, "loss": 0.0, "lr": 2.300571719037817e-07, "epoch": 4.008745999560257, "percentage": 80.17, "elapsed_time": "3:33:00", "remaining_time": "0:52:40", "throughput": 8653.16, "total_tokens": 110593448} +{"current_steps": 164095, "total_steps": 204665, "loss": 0.0, "lr": 2.3000275796927504e-07, "epoch": 4.008868150392104, "percentage": 80.18, "elapsed_time": "3:33:01", "remaining_time": "0:52:39", "throughput": 8653.22, "total_tokens": 110597288} +{"current_steps": 164100, "total_steps": 204665, "loss": 0.0, "lr": 2.2994834963439547e-07, "epoch": 4.008990301223951, "percentage": 80.18, "elapsed_time": "3:33:01", "remaining_time": "0:52:39", "throughput": 8653.21, "total_tokens": 110600168} +{"current_steps": 164105, "total_steps": 204665, "loss": 0.0, "lr": 2.2989394689953824e-07, "epoch": 4.009112452055798, "percentage": 80.18, "elapsed_time": "3:33:01", "remaining_time": "0:52:39", "throughput": 8653.27, "total_tokens": 110603944} +{"current_steps": 164110, "total_steps": 204665, "loss": 0.0, "lr": 2.2983954976509967e-07, "epoch": 4.009234602887646, "percentage": 80.18, "elapsed_time": "3:33:02", "remaining_time": "0:52:38", "throughput": 8653.27, "total_tokens": 110606888} +{"current_steps": 164115, "total_steps": 204665, "loss": 0.0, "lr": 2.2978515823147481e-07, "epoch": 4.0093567537194925, "percentage": 80.19, "elapsed_time": "3:33:02", "remaining_time": "0:52:38", "throughput": 8653.28, "total_tokens": 110610088} +{"current_steps": 164120, "total_steps": 204665, "loss": 0.0, "lr": 2.2973077229905967e-07, "epoch": 4.00947890455134, "percentage": 80.19, "elapsed_time": "3:33:02", "remaining_time": "0:52:37", "throughput": 8653.31, "total_tokens": 110613416} +{"current_steps": 164125, "total_steps": 204665, "loss": 0.0002, "lr": 2.2967639196824928e-07, "epoch": 4.009601055383187, "percentage": 80.19, "elapsed_time": "3:33:03", "remaining_time": "0:52:37", "throughput": 8653.31, "total_tokens": 110616488} +{"current_steps": 164130, "total_steps": 204665, "loss": 0.0, "lr": 2.296220172394394e-07, "epoch": 4.0097232062150345, "percentage": 80.19, "elapsed_time": "3:33:03", "remaining_time": "0:52:37", "throughput": 8653.33, "total_tokens": 110619624} +{"current_steps": 164135, "total_steps": 204665, "loss": 0.0, "lr": 2.2956764811302564e-07, "epoch": 4.009845357046881, "percentage": 80.2, "elapsed_time": "3:33:03", "remaining_time": "0:52:36", "throughput": 8653.35, "total_tokens": 110622952} +{"current_steps": 164140, "total_steps": 204665, "loss": 0.058, "lr": 2.295132845894029e-07, "epoch": 4.009967507878729, "percentage": 80.2, "elapsed_time": "3:33:04", "remaining_time": "0:52:36", "throughput": 8653.38, "total_tokens": 110626280} +{"current_steps": 164145, "total_steps": 204665, "loss": 0.0, "lr": 2.2945892666896705e-07, "epoch": 4.010089658710576, "percentage": 80.2, "elapsed_time": "3:33:04", "remaining_time": "0:52:35", "throughput": 8653.43, "total_tokens": 110630056} +{"current_steps": 164150, "total_steps": 204665, "loss": 0.0001, "lr": 2.2940457435211292e-07, "epoch": 4.010211809542423, "percentage": 80.2, "elapsed_time": "3:33:04", "remaining_time": "0:52:35", "throughput": 8653.45, "total_tokens": 110633320} +{"current_steps": 164155, "total_steps": 204665, "loss": 0.0, "lr": 2.2935022763923618e-07, "epoch": 4.01033396037427, "percentage": 80.21, "elapsed_time": "3:33:05", "remaining_time": "0:52:35", "throughput": 8653.49, "total_tokens": 110636904} +{"current_steps": 164160, "total_steps": 204665, "loss": 0.0, "lr": 2.2929588653073163e-07, "epoch": 4.010456111206118, "percentage": 80.21, "elapsed_time": "3:33:05", "remaining_time": "0:52:34", "throughput": 8653.5, "total_tokens": 110640104} +{"current_steps": 164165, "total_steps": 204665, "loss": 0.0, "lr": 2.2924155102699472e-07, "epoch": 4.010578262037964, "percentage": 80.21, "elapsed_time": "3:33:05", "remaining_time": "0:52:34", "throughput": 8653.52, "total_tokens": 110643304} +{"current_steps": 164170, "total_steps": 204665, "loss": 0.0, "lr": 2.2918722112842082e-07, "epoch": 4.010700412869812, "percentage": 80.21, "elapsed_time": "3:33:06", "remaining_time": "0:52:33", "throughput": 8653.54, "total_tokens": 110646568} +{"current_steps": 164175, "total_steps": 204665, "loss": 0.0001, "lr": 2.291328968354045e-07, "epoch": 4.010822563701659, "percentage": 80.22, "elapsed_time": "3:33:06", "remaining_time": "0:52:33", "throughput": 8653.56, "total_tokens": 110649896} +{"current_steps": 164180, "total_steps": 204665, "loss": 0.0, "lr": 2.2907857814834132e-07, "epoch": 4.0109447145335055, "percentage": 80.22, "elapsed_time": "3:33:06", "remaining_time": "0:52:33", "throughput": 8653.58, "total_tokens": 110653160} +{"current_steps": 164185, "total_steps": 204665, "loss": 0.0, "lr": 2.2902426506762574e-07, "epoch": 4.011066865365353, "percentage": 80.22, "elapsed_time": "3:33:07", "remaining_time": "0:52:32", "throughput": 8653.6, "total_tokens": 110656552} +{"current_steps": 164190, "total_steps": 204665, "loss": 0.0, "lr": 2.2896995759365344e-07, "epoch": 4.0111890161972, "percentage": 80.22, "elapsed_time": "3:33:07", "remaining_time": "0:52:32", "throughput": 8653.61, "total_tokens": 110659688} +{"current_steps": 164195, "total_steps": 204665, "loss": 0.0, "lr": 2.289156557268187e-07, "epoch": 4.0113111670290476, "percentage": 80.23, "elapsed_time": "3:33:08", "remaining_time": "0:52:31", "throughput": 8653.64, "total_tokens": 110663144} +{"current_steps": 164200, "total_steps": 204665, "loss": 0.0, "lr": 2.2886135946751706e-07, "epoch": 4.011433317860894, "percentage": 80.23, "elapsed_time": "3:33:08", "remaining_time": "0:52:31", "throughput": 8653.64, "total_tokens": 110666152} +{"current_steps": 164205, "total_steps": 204665, "loss": 0.0, "lr": 2.2880706881614298e-07, "epoch": 4.011555468692742, "percentage": 80.23, "elapsed_time": "3:33:08", "remaining_time": "0:52:31", "throughput": 8653.66, "total_tokens": 110669352} +{"current_steps": 164210, "total_steps": 204665, "loss": 0.0, "lr": 2.2875278377309093e-07, "epoch": 4.011677619524589, "percentage": 80.23, "elapsed_time": "3:33:09", "remaining_time": "0:52:30", "throughput": 8653.7, "total_tokens": 110673064} +{"current_steps": 164215, "total_steps": 204665, "loss": 0.0, "lr": 2.2869850433875648e-07, "epoch": 4.011799770356436, "percentage": 80.24, "elapsed_time": "3:33:09", "remaining_time": "0:52:30", "throughput": 8653.72, "total_tokens": 110676328} +{"current_steps": 164220, "total_steps": 204665, "loss": 0.0, "lr": 2.2864423051353366e-07, "epoch": 4.011921921188283, "percentage": 80.24, "elapsed_time": "3:33:09", "remaining_time": "0:52:29", "throughput": 8653.74, "total_tokens": 110679784} +{"current_steps": 164225, "total_steps": 204665, "loss": 0.0, "lr": 2.2858996229781745e-07, "epoch": 4.012044072020131, "percentage": 80.24, "elapsed_time": "3:33:10", "remaining_time": "0:52:29", "throughput": 8653.75, "total_tokens": 110682856} +{"current_steps": 164230, "total_steps": 204665, "loss": 0.0, "lr": 2.285356996920028e-07, "epoch": 4.012166222851977, "percentage": 80.24, "elapsed_time": "3:33:10", "remaining_time": "0:52:29", "throughput": 8653.78, "total_tokens": 110686248} +{"current_steps": 164235, "total_steps": 204665, "loss": 0.0, "lr": 2.2848144269648373e-07, "epoch": 4.012288373683825, "percentage": 80.25, "elapsed_time": "3:33:10", "remaining_time": "0:52:28", "throughput": 8653.82, "total_tokens": 110689960} +{"current_steps": 164240, "total_steps": 204665, "loss": 0.0235, "lr": 2.2842719131165544e-07, "epoch": 4.012410524515672, "percentage": 80.25, "elapsed_time": "3:33:11", "remaining_time": "0:52:28", "throughput": 8653.86, "total_tokens": 110693672} +{"current_steps": 164245, "total_steps": 204665, "loss": 0.0, "lr": 2.2837294553791186e-07, "epoch": 4.0125326753475195, "percentage": 80.25, "elapsed_time": "3:33:11", "remaining_time": "0:52:27", "throughput": 8653.88, "total_tokens": 110696936} +{"current_steps": 164250, "total_steps": 204665, "loss": 0.0, "lr": 2.2831870537564814e-07, "epoch": 4.012654826179366, "percentage": 80.25, "elapsed_time": "3:33:11", "remaining_time": "0:52:27", "throughput": 8653.91, "total_tokens": 110700328} +{"current_steps": 164255, "total_steps": 204665, "loss": 0.0, "lr": 2.282644708252579e-07, "epoch": 4.012776977011214, "percentage": 80.26, "elapsed_time": "3:33:12", "remaining_time": "0:52:27", "throughput": 8653.93, "total_tokens": 110703720} +{"current_steps": 164260, "total_steps": 204665, "loss": 0.0, "lr": 2.2821024188713612e-07, "epoch": 4.012899127843061, "percentage": 80.26, "elapsed_time": "3:33:12", "remaining_time": "0:52:26", "throughput": 8653.96, "total_tokens": 110707240} +{"current_steps": 164265, "total_steps": 204665, "loss": 0.0, "lr": 2.2815601856167722e-07, "epoch": 4.013021278674908, "percentage": 80.26, "elapsed_time": "3:33:13", "remaining_time": "0:52:26", "throughput": 8653.99, "total_tokens": 110710568} +{"current_steps": 164270, "total_steps": 204665, "loss": 0.0, "lr": 2.281018008492751e-07, "epoch": 4.013143429506755, "percentage": 80.26, "elapsed_time": "3:33:13", "remaining_time": "0:52:25", "throughput": 8654.03, "total_tokens": 110714280} +{"current_steps": 164275, "total_steps": 204665, "loss": 0.0, "lr": 2.280475887503247e-07, "epoch": 4.013265580338602, "percentage": 80.27, "elapsed_time": "3:33:13", "remaining_time": "0:52:25", "throughput": 8654.06, "total_tokens": 110717736} +{"current_steps": 164280, "total_steps": 204665, "loss": 0.0, "lr": 2.2799338226521947e-07, "epoch": 4.013387731170449, "percentage": 80.27, "elapsed_time": "3:33:14", "remaining_time": "0:52:25", "throughput": 8654.06, "total_tokens": 110720872} +{"current_steps": 164285, "total_steps": 204665, "loss": 0.0, "lr": 2.279391813943541e-07, "epoch": 4.013509882002296, "percentage": 80.27, "elapsed_time": "3:33:14", "remaining_time": "0:52:24", "throughput": 8654.09, "total_tokens": 110724264} +{"current_steps": 164290, "total_steps": 204665, "loss": 0.0, "lr": 2.2788498613812279e-07, "epoch": 4.013632032834144, "percentage": 80.27, "elapsed_time": "3:33:14", "remaining_time": "0:52:24", "throughput": 8654.17, "total_tokens": 110728552} +{"current_steps": 164295, "total_steps": 204665, "loss": 0.0, "lr": 2.278307964969196e-07, "epoch": 4.0137541836659905, "percentage": 80.28, "elapsed_time": "3:33:15", "remaining_time": "0:52:23", "throughput": 8654.2, "total_tokens": 110732136} +{"current_steps": 164300, "total_steps": 204665, "loss": 0.0, "lr": 2.2777661247113832e-07, "epoch": 4.013876334497838, "percentage": 80.28, "elapsed_time": "3:33:15", "remaining_time": "0:52:23", "throughput": 8654.22, "total_tokens": 110735400} +{"current_steps": 164305, "total_steps": 204665, "loss": 0.0, "lr": 2.2772243406117353e-07, "epoch": 4.013998485329685, "percentage": 80.28, "elapsed_time": "3:33:15", "remaining_time": "0:52:23", "throughput": 8654.25, "total_tokens": 110738920} +{"current_steps": 164310, "total_steps": 204665, "loss": 0.0, "lr": 2.2766826126741877e-07, "epoch": 4.0141206361615325, "percentage": 80.28, "elapsed_time": "3:33:16", "remaining_time": "0:52:22", "throughput": 8654.28, "total_tokens": 110742376} +{"current_steps": 164315, "total_steps": 204665, "loss": 0.0, "lr": 2.27614094090268e-07, "epoch": 4.014242786993379, "percentage": 80.28, "elapsed_time": "3:33:16", "remaining_time": "0:52:22", "throughput": 8654.3, "total_tokens": 110745576} +{"current_steps": 164320, "total_steps": 204665, "loss": 0.0, "lr": 2.275599325301153e-07, "epoch": 4.014364937825227, "percentage": 80.29, "elapsed_time": "3:33:16", "remaining_time": "0:52:21", "throughput": 8654.32, "total_tokens": 110748968} +{"current_steps": 164325, "total_steps": 204665, "loss": 0.0, "lr": 2.275057765873547e-07, "epoch": 4.014487088657074, "percentage": 80.29, "elapsed_time": "3:33:17", "remaining_time": "0:52:21", "throughput": 8654.33, "total_tokens": 110752232} +{"current_steps": 164330, "total_steps": 204665, "loss": 0.0004, "lr": 2.274516262623797e-07, "epoch": 4.014609239488921, "percentage": 80.29, "elapsed_time": "3:33:17", "remaining_time": "0:52:21", "throughput": 8654.36, "total_tokens": 110755624} +{"current_steps": 164335, "total_steps": 204665, "loss": 0.0, "lr": 2.2739748155558448e-07, "epoch": 4.014731390320768, "percentage": 80.29, "elapsed_time": "3:33:18", "remaining_time": "0:52:20", "throughput": 8654.37, "total_tokens": 110758760} +{"current_steps": 164340, "total_steps": 204665, "loss": 0.0, "lr": 2.273433424673622e-07, "epoch": 4.014853541152616, "percentage": 80.3, "elapsed_time": "3:33:18", "remaining_time": "0:52:20", "throughput": 8654.38, "total_tokens": 110761960} +{"current_steps": 164345, "total_steps": 204665, "loss": 0.0, "lr": 2.2728920899810734e-07, "epoch": 4.014975691984462, "percentage": 80.3, "elapsed_time": "3:33:18", "remaining_time": "0:52:20", "throughput": 8654.4, "total_tokens": 110765160} +{"current_steps": 164350, "total_steps": 204665, "loss": 0.0001, "lr": 2.27235081148213e-07, "epoch": 4.01509784281631, "percentage": 80.3, "elapsed_time": "3:33:19", "remaining_time": "0:52:19", "throughput": 8654.43, "total_tokens": 110768616} +{"current_steps": 164355, "total_steps": 204665, "loss": 0.0, "lr": 2.2718095891807287e-07, "epoch": 4.015219993648157, "percentage": 80.3, "elapsed_time": "3:33:19", "remaining_time": "0:52:19", "throughput": 8654.45, "total_tokens": 110771880} +{"current_steps": 164360, "total_steps": 204665, "loss": 0.0, "lr": 2.27126842308081e-07, "epoch": 4.0153421444800035, "percentage": 80.31, "elapsed_time": "3:33:19", "remaining_time": "0:52:18", "throughput": 8654.48, "total_tokens": 110775272} +{"current_steps": 164365, "total_steps": 204665, "loss": 0.0001, "lr": 2.2707273131863025e-07, "epoch": 4.015464295311851, "percentage": 80.31, "elapsed_time": "3:33:20", "remaining_time": "0:52:18", "throughput": 8654.48, "total_tokens": 110778216} +{"current_steps": 164370, "total_steps": 204665, "loss": 0.0, "lr": 2.270186259501149e-07, "epoch": 4.015586446143698, "percentage": 80.31, "elapsed_time": "3:33:20", "remaining_time": "0:52:18", "throughput": 8654.51, "total_tokens": 110781608} +{"current_steps": 164375, "total_steps": 204665, "loss": 0.0, "lr": 2.269645262029276e-07, "epoch": 4.0157085969755455, "percentage": 80.31, "elapsed_time": "3:33:20", "remaining_time": "0:52:17", "throughput": 8654.53, "total_tokens": 110784872} +{"current_steps": 164380, "total_steps": 204665, "loss": 0.0, "lr": 2.269104320774623e-07, "epoch": 4.015830747807392, "percentage": 80.32, "elapsed_time": "3:33:21", "remaining_time": "0:52:17", "throughput": 8654.57, "total_tokens": 110788392} +{"current_steps": 164385, "total_steps": 204665, "loss": 0.0, "lr": 2.2685634357411242e-07, "epoch": 4.01595289863924, "percentage": 80.32, "elapsed_time": "3:33:21", "remaining_time": "0:52:16", "throughput": 8654.59, "total_tokens": 110791720} +{"current_steps": 164390, "total_steps": 204665, "loss": 0.0, "lr": 2.2680226069327102e-07, "epoch": 4.016075049471087, "percentage": 80.32, "elapsed_time": "3:33:21", "remaining_time": "0:52:16", "throughput": 8654.6, "total_tokens": 110794792} +{"current_steps": 164395, "total_steps": 204665, "loss": 0.0, "lr": 2.2674818343533175e-07, "epoch": 4.016197200302934, "percentage": 80.32, "elapsed_time": "3:33:22", "remaining_time": "0:52:16", "throughput": 8654.63, "total_tokens": 110798184} +{"current_steps": 164400, "total_steps": 204665, "loss": 0.0, "lr": 2.2669411180068742e-07, "epoch": 4.016319351134781, "percentage": 80.33, "elapsed_time": "3:33:22", "remaining_time": "0:52:15", "throughput": 8654.64, "total_tokens": 110801256} +{"current_steps": 164405, "total_steps": 204665, "loss": 0.0, "lr": 2.2664004578973173e-07, "epoch": 4.016441501966629, "percentage": 80.33, "elapsed_time": "3:33:22", "remaining_time": "0:52:15", "throughput": 8654.68, "total_tokens": 110804712} +{"current_steps": 164410, "total_steps": 204665, "loss": 0.0, "lr": 2.2658598540285767e-07, "epoch": 4.016563652798475, "percentage": 80.33, "elapsed_time": "3:33:23", "remaining_time": "0:52:14", "throughput": 8654.68, "total_tokens": 110807720} +{"current_steps": 164415, "total_steps": 204665, "loss": 0.0, "lr": 2.2653193064045807e-07, "epoch": 4.016685803630323, "percentage": 80.33, "elapsed_time": "3:33:23", "remaining_time": "0:52:14", "throughput": 8654.71, "total_tokens": 110811048} +{"current_steps": 164420, "total_steps": 204665, "loss": 0.0, "lr": 2.2647788150292657e-07, "epoch": 4.01680795446217, "percentage": 80.34, "elapsed_time": "3:33:23", "remaining_time": "0:52:14", "throughput": 8654.74, "total_tokens": 110814440} +{"current_steps": 164425, "total_steps": 204665, "loss": 0.0, "lr": 2.2642383799065578e-07, "epoch": 4.016930105294017, "percentage": 80.34, "elapsed_time": "3:33:24", "remaining_time": "0:52:13", "throughput": 8654.74, "total_tokens": 110817640} +{"current_steps": 164430, "total_steps": 204665, "loss": 0.0, "lr": 2.2636980010403904e-07, "epoch": 4.017052256125864, "percentage": 80.34, "elapsed_time": "3:33:24", "remaining_time": "0:52:13", "throughput": 8654.78, "total_tokens": 110821096} +{"current_steps": 164435, "total_steps": 204665, "loss": 0.0, "lr": 2.2631576784346906e-07, "epoch": 4.017174406957712, "percentage": 80.34, "elapsed_time": "3:33:24", "remaining_time": "0:52:12", "throughput": 8654.81, "total_tokens": 110824616} +{"current_steps": 164440, "total_steps": 204665, "loss": 0.0, "lr": 2.2626174120933882e-07, "epoch": 4.0172965577895585, "percentage": 80.35, "elapsed_time": "3:33:25", "remaining_time": "0:52:12", "throughput": 8654.84, "total_tokens": 110828008} +{"current_steps": 164445, "total_steps": 204665, "loss": 0.0001, "lr": 2.262077202020416e-07, "epoch": 4.017418708621406, "percentage": 80.35, "elapsed_time": "3:33:25", "remaining_time": "0:52:12", "throughput": 8654.85, "total_tokens": 110831144} +{"current_steps": 164450, "total_steps": 204665, "loss": 0.0, "lr": 2.261537048219697e-07, "epoch": 4.017540859453253, "percentage": 80.35, "elapsed_time": "3:33:26", "remaining_time": "0:52:11", "throughput": 8654.86, "total_tokens": 110834216} +{"current_steps": 164455, "total_steps": 204665, "loss": 0.0, "lr": 2.2609969506951655e-07, "epoch": 4.0176630102851, "percentage": 80.35, "elapsed_time": "3:33:26", "remaining_time": "0:52:11", "throughput": 8654.88, "total_tokens": 110837352} +{"current_steps": 164460, "total_steps": 204665, "loss": 0.0, "lr": 2.260456909450742e-07, "epoch": 4.017785161116947, "percentage": 80.36, "elapsed_time": "3:33:26", "remaining_time": "0:52:10", "throughput": 8654.91, "total_tokens": 110840808} +{"current_steps": 164465, "total_steps": 204665, "loss": 0.0, "lr": 2.2599169244903604e-07, "epoch": 4.017907311948794, "percentage": 80.36, "elapsed_time": "3:33:27", "remaining_time": "0:52:10", "throughput": 8654.91, "total_tokens": 110843752} +{"current_steps": 164470, "total_steps": 204665, "loss": 0.0, "lr": 2.259376995817942e-07, "epoch": 4.018029462780642, "percentage": 80.36, "elapsed_time": "3:33:27", "remaining_time": "0:52:10", "throughput": 8654.93, "total_tokens": 110847016} +{"current_steps": 164475, "total_steps": 204665, "loss": 0.0, "lr": 2.258837123437416e-07, "epoch": 4.018151613612488, "percentage": 80.36, "elapsed_time": "3:33:27", "remaining_time": "0:52:09", "throughput": 8654.95, "total_tokens": 110850280} +{"current_steps": 164480, "total_steps": 204665, "loss": 0.0, "lr": 2.258297307352711e-07, "epoch": 4.018273764444336, "percentage": 80.37, "elapsed_time": "3:33:28", "remaining_time": "0:52:09", "throughput": 8654.95, "total_tokens": 110853224} +{"current_steps": 164485, "total_steps": 204665, "loss": 0.0, "lr": 2.257757547567748e-07, "epoch": 4.018395915276183, "percentage": 80.37, "elapsed_time": "3:33:28", "remaining_time": "0:52:08", "throughput": 8655.01, "total_tokens": 110857000} +{"current_steps": 164490, "total_steps": 204665, "loss": 0.0, "lr": 2.2572178440864575e-07, "epoch": 4.01851806610803, "percentage": 80.37, "elapsed_time": "3:33:28", "remaining_time": "0:52:08", "throughput": 8655.02, "total_tokens": 110860136} +{"current_steps": 164495, "total_steps": 204665, "loss": 0.0002, "lr": 2.256678196912758e-07, "epoch": 4.018640216939877, "percentage": 80.37, "elapsed_time": "3:33:29", "remaining_time": "0:52:08", "throughput": 8655.02, "total_tokens": 110863080} +{"current_steps": 164500, "total_steps": 204665, "loss": 0.0, "lr": 2.2561386060505805e-07, "epoch": 4.018762367771725, "percentage": 80.38, "elapsed_time": "3:33:29", "remaining_time": "0:52:07", "throughput": 8655.06, "total_tokens": 110866664} +{"current_steps": 164505, "total_steps": 204665, "loss": 0.0, "lr": 2.2555990715038432e-07, "epoch": 4.0188845186035715, "percentage": 80.38, "elapsed_time": "3:33:29", "remaining_time": "0:52:07", "throughput": 8655.06, "total_tokens": 110869480} +{"current_steps": 164510, "total_steps": 204665, "loss": 0.0, "lr": 2.255059593276476e-07, "epoch": 4.019006669435419, "percentage": 80.38, "elapsed_time": "3:33:30", "remaining_time": "0:52:06", "throughput": 8655.05, "total_tokens": 110872296} +{"current_steps": 164515, "total_steps": 204665, "loss": 0.0, "lr": 2.254520171372397e-07, "epoch": 4.019128820267266, "percentage": 80.38, "elapsed_time": "3:33:30", "remaining_time": "0:52:06", "throughput": 8655.11, "total_tokens": 110876072} +{"current_steps": 164520, "total_steps": 204665, "loss": 0.0, "lr": 2.253980805795529e-07, "epoch": 4.019250971099114, "percentage": 80.39, "elapsed_time": "3:33:30", "remaining_time": "0:52:06", "throughput": 8655.15, "total_tokens": 110879720} +{"current_steps": 164525, "total_steps": 204665, "loss": 0.0, "lr": 2.2534414965497984e-07, "epoch": 4.01937312193096, "percentage": 80.39, "elapsed_time": "3:33:31", "remaining_time": "0:52:05", "throughput": 8655.17, "total_tokens": 110882920} +{"current_steps": 164530, "total_steps": 204665, "loss": 0.0, "lr": 2.2529022436391221e-07, "epoch": 4.019495272762808, "percentage": 80.39, "elapsed_time": "3:33:31", "remaining_time": "0:52:05", "throughput": 8655.22, "total_tokens": 110886568} +{"current_steps": 164535, "total_steps": 204665, "loss": 0.0, "lr": 2.2523630470674238e-07, "epoch": 4.019617423594655, "percentage": 80.39, "elapsed_time": "3:33:31", "remaining_time": "0:52:04", "throughput": 8655.24, "total_tokens": 110889832} +{"current_steps": 164540, "total_steps": 204665, "loss": 0.0, "lr": 2.251823906838629e-07, "epoch": 4.019739574426501, "percentage": 80.39, "elapsed_time": "3:33:32", "remaining_time": "0:52:04", "throughput": 8655.29, "total_tokens": 110893672} +{"current_steps": 164545, "total_steps": 204665, "loss": 0.0, "lr": 2.2512848229566517e-07, "epoch": 4.019861725258349, "percentage": 80.4, "elapsed_time": "3:33:32", "remaining_time": "0:52:04", "throughput": 8655.34, "total_tokens": 110897384} +{"current_steps": 164550, "total_steps": 204665, "loss": 0.0, "lr": 2.2507457954254173e-07, "epoch": 4.019983876090196, "percentage": 80.4, "elapsed_time": "3:33:32", "remaining_time": "0:52:03", "throughput": 8655.36, "total_tokens": 110900584} +{"current_steps": 164555, "total_steps": 204665, "loss": 0.0, "lr": 2.2502068242488414e-07, "epoch": 4.020106026922043, "percentage": 80.4, "elapsed_time": "3:33:33", "remaining_time": "0:52:03", "throughput": 8655.37, "total_tokens": 110903656} +{"current_steps": 164560, "total_steps": 204665, "loss": 0.0, "lr": 2.249667909430849e-07, "epoch": 4.02022817775389, "percentage": 80.4, "elapsed_time": "3:33:33", "remaining_time": "0:52:02", "throughput": 8655.4, "total_tokens": 110906984} +{"current_steps": 164565, "total_steps": 204665, "loss": 0.0, "lr": 2.2491290509753536e-07, "epoch": 4.020350328585738, "percentage": 80.41, "elapsed_time": "3:33:33", "remaining_time": "0:52:02", "throughput": 8655.43, "total_tokens": 110910376} +{"current_steps": 164570, "total_steps": 204665, "loss": 0.0, "lr": 2.2485902488862763e-07, "epoch": 4.020472479417585, "percentage": 80.41, "elapsed_time": "3:33:34", "remaining_time": "0:52:02", "throughput": 8655.43, "total_tokens": 110913384} +{"current_steps": 164575, "total_steps": 204665, "loss": 0.0, "lr": 2.2480515031675384e-07, "epoch": 4.020594630249432, "percentage": 80.41, "elapsed_time": "3:33:34", "remaining_time": "0:52:01", "throughput": 8655.49, "total_tokens": 110917224} +{"current_steps": 164580, "total_steps": 204665, "loss": 0.0, "lr": 2.2475128138230516e-07, "epoch": 4.020716781081279, "percentage": 80.41, "elapsed_time": "3:33:35", "remaining_time": "0:52:01", "throughput": 8655.51, "total_tokens": 110920360} +{"current_steps": 164585, "total_steps": 204665, "loss": 0.0, "lr": 2.24697418085674e-07, "epoch": 4.020838931913127, "percentage": 80.42, "elapsed_time": "3:33:35", "remaining_time": "0:52:00", "throughput": 8655.52, "total_tokens": 110923496} +{"current_steps": 164590, "total_steps": 204665, "loss": 0.0, "lr": 2.2464356042725152e-07, "epoch": 4.020961082744973, "percentage": 80.42, "elapsed_time": "3:33:35", "remaining_time": "0:52:00", "throughput": 8655.55, "total_tokens": 110926888} +{"current_steps": 164595, "total_steps": 204665, "loss": 0.0, "lr": 2.2458970840742976e-07, "epoch": 4.021083233576821, "percentage": 80.42, "elapsed_time": "3:33:36", "remaining_time": "0:52:00", "throughput": 8655.57, "total_tokens": 110930152} +{"current_steps": 164600, "total_steps": 204665, "loss": 0.0001, "lr": 2.2453586202660003e-07, "epoch": 4.021205384408668, "percentage": 80.42, "elapsed_time": "3:33:36", "remaining_time": "0:51:59", "throughput": 8655.61, "total_tokens": 110933672} +{"current_steps": 164605, "total_steps": 204665, "loss": 0.0, "lr": 2.2448202128515436e-07, "epoch": 4.021327535240515, "percentage": 80.43, "elapsed_time": "3:33:36", "remaining_time": "0:51:59", "throughput": 8655.63, "total_tokens": 110936936} +{"current_steps": 164610, "total_steps": 204665, "loss": 0.0, "lr": 2.2442818618348368e-07, "epoch": 4.021449686072362, "percentage": 80.43, "elapsed_time": "3:33:37", "remaining_time": "0:51:58", "throughput": 8655.63, "total_tokens": 110939816} +{"current_steps": 164615, "total_steps": 204665, "loss": 0.0, "lr": 2.2437435672198014e-07, "epoch": 4.02157183690421, "percentage": 80.43, "elapsed_time": "3:33:37", "remaining_time": "0:51:58", "throughput": 8655.66, "total_tokens": 110943272} +{"current_steps": 164620, "total_steps": 204665, "loss": 0.0, "lr": 2.243205329010349e-07, "epoch": 4.0216939877360565, "percentage": 80.43, "elapsed_time": "3:33:37", "remaining_time": "0:51:58", "throughput": 8655.68, "total_tokens": 110946408} +{"current_steps": 164625, "total_steps": 204665, "loss": 0.0002, "lr": 2.242667147210392e-07, "epoch": 4.021816138567903, "percentage": 80.44, "elapsed_time": "3:33:38", "remaining_time": "0:51:57", "throughput": 8655.71, "total_tokens": 110949864} +{"current_steps": 164630, "total_steps": 204665, "loss": 0.0, "lr": 2.2421290218238442e-07, "epoch": 4.021938289399751, "percentage": 80.44, "elapsed_time": "3:33:38", "remaining_time": "0:51:57", "throughput": 8655.78, "total_tokens": 110953832} +{"current_steps": 164635, "total_steps": 204665, "loss": 0.0, "lr": 2.241590952854625e-07, "epoch": 4.022060440231598, "percentage": 80.44, "elapsed_time": "3:33:38", "remaining_time": "0:51:56", "throughput": 8655.79, "total_tokens": 110956968} +{"current_steps": 164640, "total_steps": 204665, "loss": 0.0, "lr": 2.241052940306639e-07, "epoch": 4.022182591063445, "percentage": 80.44, "elapsed_time": "3:33:39", "remaining_time": "0:51:56", "throughput": 8655.81, "total_tokens": 110960168} +{"current_steps": 164645, "total_steps": 204665, "loss": 0.0, "lr": 2.2405149841838068e-07, "epoch": 4.022304741895292, "percentage": 80.45, "elapsed_time": "3:33:39", "remaining_time": "0:51:56", "throughput": 8655.83, "total_tokens": 110963496} +{"current_steps": 164650, "total_steps": 204665, "loss": 0.0, "lr": 2.2399770844900334e-07, "epoch": 4.02242689272714, "percentage": 80.45, "elapsed_time": "3:33:39", "remaining_time": "0:51:55", "throughput": 8655.85, "total_tokens": 110966632} +{"current_steps": 164655, "total_steps": 204665, "loss": 0.0, "lr": 2.2394392412292372e-07, "epoch": 4.022549043558986, "percentage": 80.45, "elapsed_time": "3:33:40", "remaining_time": "0:51:55", "throughput": 8655.87, "total_tokens": 110969896} +{"current_steps": 164660, "total_steps": 204665, "loss": 0.0005, "lr": 2.238901454405323e-07, "epoch": 4.022671194390834, "percentage": 80.45, "elapsed_time": "3:33:40", "remaining_time": "0:51:54", "throughput": 8655.9, "total_tokens": 110973352} +{"current_steps": 164665, "total_steps": 204665, "loss": 0.0, "lr": 2.2383637240222052e-07, "epoch": 4.022793345222681, "percentage": 80.46, "elapsed_time": "3:33:40", "remaining_time": "0:51:54", "throughput": 8655.93, "total_tokens": 110976680} +{"current_steps": 164670, "total_steps": 204665, "loss": 0.0, "lr": 2.2378260500837965e-07, "epoch": 4.022915496054528, "percentage": 80.46, "elapsed_time": "3:33:41", "remaining_time": "0:51:54", "throughput": 8655.96, "total_tokens": 110980136} +{"current_steps": 164675, "total_steps": 204665, "loss": 0.0, "lr": 2.2372884325940013e-07, "epoch": 4.023037646886375, "percentage": 80.46, "elapsed_time": "3:33:41", "remaining_time": "0:51:53", "throughput": 8655.98, "total_tokens": 110983400} +{"current_steps": 164680, "total_steps": 204665, "loss": 0.0, "lr": 2.2367508715567364e-07, "epoch": 4.023159797718223, "percentage": 80.46, "elapsed_time": "3:33:41", "remaining_time": "0:51:53", "throughput": 8656.04, "total_tokens": 110987240} +{"current_steps": 164685, "total_steps": 204665, "loss": 0.0, "lr": 2.236213366975903e-07, "epoch": 4.0232819485500695, "percentage": 80.47, "elapsed_time": "3:33:42", "remaining_time": "0:51:52", "throughput": 8656.06, "total_tokens": 110990568} +{"current_steps": 164690, "total_steps": 204665, "loss": 0.0, "lr": 2.2356759188554153e-07, "epoch": 4.023404099381917, "percentage": 80.47, "elapsed_time": "3:33:42", "remaining_time": "0:51:52", "throughput": 8656.1, "total_tokens": 110994024} +{"current_steps": 164695, "total_steps": 204665, "loss": 0.0, "lr": 2.235138527199184e-07, "epoch": 4.023526250213764, "percentage": 80.47, "elapsed_time": "3:33:42", "remaining_time": "0:51:52", "throughput": 8656.1, "total_tokens": 110997096} +{"current_steps": 164700, "total_steps": 204665, "loss": 0.0, "lr": 2.2346011920111095e-07, "epoch": 4.0236484010456115, "percentage": 80.47, "elapsed_time": "3:33:43", "remaining_time": "0:51:51", "throughput": 8656.15, "total_tokens": 111000744} +{"current_steps": 164705, "total_steps": 204665, "loss": 0.0, "lr": 2.2340639132951077e-07, "epoch": 4.023770551877458, "percentage": 80.48, "elapsed_time": "3:33:43", "remaining_time": "0:51:51", "throughput": 8656.16, "total_tokens": 111004008} +{"current_steps": 164710, "total_steps": 204665, "loss": 0.0, "lr": 2.2335266910550787e-07, "epoch": 4.023892702709306, "percentage": 80.48, "elapsed_time": "3:33:44", "remaining_time": "0:51:50", "throughput": 8656.2, "total_tokens": 111007528} +{"current_steps": 164715, "total_steps": 204665, "loss": 0.0, "lr": 2.2329895252949348e-07, "epoch": 4.024014853541153, "percentage": 80.48, "elapsed_time": "3:33:44", "remaining_time": "0:51:50", "throughput": 8656.21, "total_tokens": 111010600} +{"current_steps": 164720, "total_steps": 204665, "loss": 0.0, "lr": 2.2324524160185808e-07, "epoch": 4.024137004372999, "percentage": 80.48, "elapsed_time": "3:33:44", "remaining_time": "0:51:50", "throughput": 8656.28, "total_tokens": 111014696} +{"current_steps": 164725, "total_steps": 204665, "loss": 0.0698, "lr": 2.2319153632299192e-07, "epoch": 4.024259155204847, "percentage": 80.49, "elapsed_time": "3:33:45", "remaining_time": "0:51:49", "throughput": 8656.29, "total_tokens": 111017832} +{"current_steps": 164730, "total_steps": 204665, "loss": 0.0, "lr": 2.2313783669328613e-07, "epoch": 4.024381306036694, "percentage": 80.49, "elapsed_time": "3:33:45", "remaining_time": "0:51:49", "throughput": 8656.3, "total_tokens": 111020968} +{"current_steps": 164735, "total_steps": 204665, "loss": 0.0, "lr": 2.230841427131307e-07, "epoch": 4.024503456868541, "percentage": 80.49, "elapsed_time": "3:33:45", "remaining_time": "0:51:48", "throughput": 8656.32, "total_tokens": 111024232} +{"current_steps": 164740, "total_steps": 204665, "loss": 0.0, "lr": 2.2303045438291656e-07, "epoch": 4.024625607700388, "percentage": 80.49, "elapsed_time": "3:33:46", "remaining_time": "0:51:48", "throughput": 8656.37, "total_tokens": 111027816} +{"current_steps": 164745, "total_steps": 204665, "loss": 0.0, "lr": 2.2297677170303363e-07, "epoch": 4.024747758532236, "percentage": 80.49, "elapsed_time": "3:33:46", "remaining_time": "0:51:48", "throughput": 8656.35, "total_tokens": 111030568} +{"current_steps": 164750, "total_steps": 204665, "loss": 0.0, "lr": 2.2292309467387293e-07, "epoch": 4.0248699093640825, "percentage": 80.5, "elapsed_time": "3:33:46", "remaining_time": "0:51:47", "throughput": 8656.36, "total_tokens": 111033576} +{"current_steps": 164755, "total_steps": 204665, "loss": 0.0, "lr": 2.2286942329582425e-07, "epoch": 4.02499206019593, "percentage": 80.5, "elapsed_time": "3:33:47", "remaining_time": "0:51:47", "throughput": 8656.41, "total_tokens": 111037352} +{"current_steps": 164760, "total_steps": 204665, "loss": 0.0, "lr": 2.2281575756927818e-07, "epoch": 4.025114211027777, "percentage": 80.5, "elapsed_time": "3:33:47", "remaining_time": "0:51:46", "throughput": 8656.43, "total_tokens": 111040488} +{"current_steps": 164765, "total_steps": 204665, "loss": 0.0, "lr": 2.2276209749462516e-07, "epoch": 4.0252363618596245, "percentage": 80.5, "elapsed_time": "3:33:47", "remaining_time": "0:51:46", "throughput": 8656.45, "total_tokens": 111043752} +{"current_steps": 164770, "total_steps": 204665, "loss": 0.0, "lr": 2.227084430722549e-07, "epoch": 4.025358512691471, "percentage": 80.51, "elapsed_time": "3:33:48", "remaining_time": "0:51:46", "throughput": 8656.47, "total_tokens": 111047080} +{"current_steps": 164775, "total_steps": 204665, "loss": 0.0, "lr": 2.226547943025583e-07, "epoch": 4.025480663523319, "percentage": 80.51, "elapsed_time": "3:33:48", "remaining_time": "0:51:45", "throughput": 8656.49, "total_tokens": 111050216} +{"current_steps": 164780, "total_steps": 204665, "loss": 0.0, "lr": 2.226011511859247e-07, "epoch": 4.025602814355166, "percentage": 80.51, "elapsed_time": "3:33:48", "remaining_time": "0:51:45", "throughput": 8656.51, "total_tokens": 111053416} +{"current_steps": 164785, "total_steps": 204665, "loss": 0.0, "lr": 2.2254751372274473e-07, "epoch": 4.025724965187013, "percentage": 80.51, "elapsed_time": "3:33:49", "remaining_time": "0:51:44", "throughput": 8656.55, "total_tokens": 111057000} +{"current_steps": 164790, "total_steps": 204665, "loss": 0.0, "lr": 2.2249388191340857e-07, "epoch": 4.02584711601886, "percentage": 80.52, "elapsed_time": "3:33:49", "remaining_time": "0:51:44", "throughput": 8656.57, "total_tokens": 111060264} +{"current_steps": 164795, "total_steps": 204665, "loss": 0.0, "lr": 2.2244025575830582e-07, "epoch": 4.025969266850708, "percentage": 80.52, "elapsed_time": "3:33:49", "remaining_time": "0:51:44", "throughput": 8656.58, "total_tokens": 111063336} +{"current_steps": 164800, "total_steps": 204665, "loss": 0.0, "lr": 2.2238663525782687e-07, "epoch": 4.026091417682554, "percentage": 80.52, "elapsed_time": "3:33:50", "remaining_time": "0:51:43", "throughput": 8656.61, "total_tokens": 111066728} +{"current_steps": 164805, "total_steps": 204665, "loss": 0.0, "lr": 2.2233302041236124e-07, "epoch": 4.026213568514401, "percentage": 80.52, "elapsed_time": "3:33:50", "remaining_time": "0:51:43", "throughput": 8656.65, "total_tokens": 111070248} +{"current_steps": 164810, "total_steps": 204665, "loss": 0.0, "lr": 2.222794112222993e-07, "epoch": 4.026335719346249, "percentage": 80.53, "elapsed_time": "3:33:50", "remaining_time": "0:51:42", "throughput": 8656.65, "total_tokens": 111073256} +{"current_steps": 164815, "total_steps": 204665, "loss": 0.0, "lr": 2.2222580768803045e-07, "epoch": 4.0264578701780955, "percentage": 80.53, "elapsed_time": "3:33:51", "remaining_time": "0:51:42", "throughput": 8656.66, "total_tokens": 111076264} +{"current_steps": 164820, "total_steps": 204665, "loss": 0.0, "lr": 2.221722098099449e-07, "epoch": 4.026580021009943, "percentage": 80.53, "elapsed_time": "3:33:51", "remaining_time": "0:51:42", "throughput": 8656.71, "total_tokens": 111080040} +{"current_steps": 164825, "total_steps": 204665, "loss": 0.0, "lr": 2.221186175884323e-07, "epoch": 4.02670217184179, "percentage": 80.53, "elapsed_time": "3:33:52", "remaining_time": "0:51:41", "throughput": 8656.73, "total_tokens": 111083240} +{"current_steps": 164830, "total_steps": 204665, "loss": 0.0, "lr": 2.2206503102388207e-07, "epoch": 4.0268243226736375, "percentage": 80.54, "elapsed_time": "3:33:52", "remaining_time": "0:51:41", "throughput": 8656.73, "total_tokens": 111086184} +{"current_steps": 164835, "total_steps": 204665, "loss": 0.0, "lr": 2.2201145011668443e-07, "epoch": 4.026946473505484, "percentage": 80.54, "elapsed_time": "3:33:52", "remaining_time": "0:51:40", "throughput": 8656.74, "total_tokens": 111089320} +{"current_steps": 164840, "total_steps": 204665, "loss": 0.0, "lr": 2.219578748672285e-07, "epoch": 4.027068624337332, "percentage": 80.54, "elapsed_time": "3:33:53", "remaining_time": "0:51:40", "throughput": 8656.77, "total_tokens": 111092648} +{"current_steps": 164845, "total_steps": 204665, "loss": 0.0, "lr": 2.219043052759042e-07, "epoch": 4.027190775169179, "percentage": 80.54, "elapsed_time": "3:33:53", "remaining_time": "0:51:40", "throughput": 8656.8, "total_tokens": 111096104} +{"current_steps": 164850, "total_steps": 204665, "loss": 0.0, "lr": 2.2185074134310134e-07, "epoch": 4.027312926001026, "percentage": 80.55, "elapsed_time": "3:33:53", "remaining_time": "0:51:39", "throughput": 8656.82, "total_tokens": 111099304} +{"current_steps": 164855, "total_steps": 204665, "loss": 0.0, "lr": 2.217971830692089e-07, "epoch": 4.027435076832873, "percentage": 80.55, "elapsed_time": "3:33:54", "remaining_time": "0:51:39", "throughput": 8656.86, "total_tokens": 111102824} +{"current_steps": 164860, "total_steps": 204665, "loss": 0.0, "lr": 2.2174363045461697e-07, "epoch": 4.027557227664721, "percentage": 80.55, "elapsed_time": "3:33:54", "remaining_time": "0:51:38", "throughput": 8656.89, "total_tokens": 111106280} +{"current_steps": 164865, "total_steps": 204665, "loss": 0.0, "lr": 2.216900834997143e-07, "epoch": 4.027679378496567, "percentage": 80.55, "elapsed_time": "3:33:54", "remaining_time": "0:51:38", "throughput": 8656.91, "total_tokens": 111109416} +{"current_steps": 164870, "total_steps": 204665, "loss": 0.0, "lr": 2.2163654220489102e-07, "epoch": 4.027801529328415, "percentage": 80.56, "elapsed_time": "3:33:55", "remaining_time": "0:51:38", "throughput": 8656.93, "total_tokens": 111112744} +{"current_steps": 164875, "total_steps": 204665, "loss": 0.0, "lr": 2.2158300657053596e-07, "epoch": 4.027923680160262, "percentage": 80.56, "elapsed_time": "3:33:55", "remaining_time": "0:51:37", "throughput": 8656.97, "total_tokens": 111116328} +{"current_steps": 164880, "total_steps": 204665, "loss": 0.0, "lr": 2.215294765970386e-07, "epoch": 4.028045830992109, "percentage": 80.56, "elapsed_time": "3:33:55", "remaining_time": "0:51:37", "throughput": 8656.98, "total_tokens": 111119272} +{"current_steps": 164885, "total_steps": 204665, "loss": 0.0, "lr": 2.2147595228478844e-07, "epoch": 4.028167981823956, "percentage": 80.56, "elapsed_time": "3:33:56", "remaining_time": "0:51:36", "throughput": 8657.0, "total_tokens": 111122600} +{"current_steps": 164890, "total_steps": 204665, "loss": 0.0002, "lr": 2.2142243363417446e-07, "epoch": 4.028290132655804, "percentage": 80.57, "elapsed_time": "3:33:56", "remaining_time": "0:51:36", "throughput": 8657.04, "total_tokens": 111126184} +{"current_steps": 164895, "total_steps": 204665, "loss": 0.0, "lr": 2.213689206455861e-07, "epoch": 4.028412283487651, "percentage": 80.57, "elapsed_time": "3:33:56", "remaining_time": "0:51:36", "throughput": 8657.05, "total_tokens": 111129256} +{"current_steps": 164900, "total_steps": 204665, "loss": 0.0, "lr": 2.2131541331941216e-07, "epoch": 4.028534434319497, "percentage": 80.57, "elapsed_time": "3:33:57", "remaining_time": "0:51:35", "throughput": 8657.05, "total_tokens": 111132136} +{"current_steps": 164905, "total_steps": 204665, "loss": 0.0464, "lr": 2.2126191165604214e-07, "epoch": 4.028656585151345, "percentage": 80.57, "elapsed_time": "3:33:57", "remaining_time": "0:51:35", "throughput": 8657.08, "total_tokens": 111135528} +{"current_steps": 164910, "total_steps": 204665, "loss": 0.0, "lr": 2.2120841565586479e-07, "epoch": 4.028778735983192, "percentage": 80.58, "elapsed_time": "3:33:57", "remaining_time": "0:51:34", "throughput": 8657.1, "total_tokens": 111138792} +{"current_steps": 164915, "total_steps": 204665, "loss": 0.0, "lr": 2.211549253192696e-07, "epoch": 4.028900886815039, "percentage": 80.58, "elapsed_time": "3:33:58", "remaining_time": "0:51:34", "throughput": 8657.11, "total_tokens": 111141864} +{"current_steps": 164920, "total_steps": 204665, "loss": 0.0, "lr": 2.2110144064664493e-07, "epoch": 4.029023037646886, "percentage": 80.58, "elapsed_time": "3:33:58", "remaining_time": "0:51:34", "throughput": 8657.14, "total_tokens": 111145320} +{"current_steps": 164925, "total_steps": 204665, "loss": 0.0, "lr": 2.2104796163838036e-07, "epoch": 4.029145188478734, "percentage": 80.58, "elapsed_time": "3:33:58", "remaining_time": "0:51:33", "throughput": 8657.15, "total_tokens": 111148264} +{"current_steps": 164930, "total_steps": 204665, "loss": 0.0, "lr": 2.2099448829486455e-07, "epoch": 4.0292673393105805, "percentage": 80.59, "elapsed_time": "3:33:59", "remaining_time": "0:51:33", "throughput": 8657.21, "total_tokens": 111152168} +{"current_steps": 164935, "total_steps": 204665, "loss": 0.0, "lr": 2.2094102061648613e-07, "epoch": 4.029389490142428, "percentage": 80.59, "elapsed_time": "3:33:59", "remaining_time": "0:51:32", "throughput": 8657.23, "total_tokens": 111155432} +{"current_steps": 164940, "total_steps": 204665, "loss": 0.0, "lr": 2.2088755860363406e-07, "epoch": 4.029511640974275, "percentage": 80.59, "elapsed_time": "3:33:59", "remaining_time": "0:51:32", "throughput": 8657.27, "total_tokens": 111158952} +{"current_steps": 164945, "total_steps": 204665, "loss": 0.0, "lr": 2.2083410225669752e-07, "epoch": 4.0296337918061225, "percentage": 80.59, "elapsed_time": "3:34:00", "remaining_time": "0:51:32", "throughput": 8657.3, "total_tokens": 111162472} +{"current_steps": 164950, "total_steps": 204665, "loss": 0.0, "lr": 2.2078065157606473e-07, "epoch": 4.029755942637969, "percentage": 80.6, "elapsed_time": "3:34:00", "remaining_time": "0:51:31", "throughput": 8657.34, "total_tokens": 111165928} +{"current_steps": 164955, "total_steps": 204665, "loss": 0.0, "lr": 2.2072720656212483e-07, "epoch": 4.029878093469817, "percentage": 80.6, "elapsed_time": "3:34:01", "remaining_time": "0:51:31", "throughput": 8657.34, "total_tokens": 111168936} +{"current_steps": 164960, "total_steps": 204665, "loss": 0.0, "lr": 2.206737672152661e-07, "epoch": 4.030000244301664, "percentage": 80.6, "elapsed_time": "3:34:01", "remaining_time": "0:51:30", "throughput": 8657.38, "total_tokens": 111172456} +{"current_steps": 164965, "total_steps": 204665, "loss": 0.0, "lr": 2.206203335358776e-07, "epoch": 4.030122395133511, "percentage": 80.6, "elapsed_time": "3:34:01", "remaining_time": "0:51:30", "throughput": 8657.42, "total_tokens": 111176040} +{"current_steps": 164970, "total_steps": 204665, "loss": 0.0, "lr": 2.2056690552434732e-07, "epoch": 4.030244545965358, "percentage": 80.6, "elapsed_time": "3:34:02", "remaining_time": "0:51:30", "throughput": 8657.44, "total_tokens": 111179176} +{"current_steps": 164975, "total_steps": 204665, "loss": 0.0, "lr": 2.2051348318106421e-07, "epoch": 4.030366696797206, "percentage": 80.61, "elapsed_time": "3:34:02", "remaining_time": "0:51:29", "throughput": 8657.46, "total_tokens": 111182376} +{"current_steps": 164980, "total_steps": 204665, "loss": 0.0, "lr": 2.2046006650641692e-07, "epoch": 4.030488847629052, "percentage": 80.61, "elapsed_time": "3:34:02", "remaining_time": "0:51:29", "throughput": 8657.46, "total_tokens": 111185448} +{"current_steps": 164985, "total_steps": 204665, "loss": 0.0, "lr": 2.204066555007935e-07, "epoch": 4.030610998460899, "percentage": 80.61, "elapsed_time": "3:34:03", "remaining_time": "0:51:28", "throughput": 8657.46, "total_tokens": 111188328} +{"current_steps": 164990, "total_steps": 204665, "loss": 0.0, "lr": 2.2035325016458273e-07, "epoch": 4.030733149292747, "percentage": 80.61, "elapsed_time": "3:34:03", "remaining_time": "0:51:28", "throughput": 8657.48, "total_tokens": 111191528} +{"current_steps": 164995, "total_steps": 204665, "loss": 0.0, "lr": 2.2029985049817268e-07, "epoch": 4.0308553001245935, "percentage": 80.62, "elapsed_time": "3:34:03", "remaining_time": "0:51:28", "throughput": 8657.51, "total_tokens": 111194984} +{"current_steps": 165000, "total_steps": 204665, "loss": 0.0, "lr": 2.2024645650195174e-07, "epoch": 4.030977450956441, "percentage": 80.62, "elapsed_time": "3:34:04", "remaining_time": "0:51:27", "throughput": 8657.54, "total_tokens": 111198440} +{"current_steps": 165005, "total_steps": 204665, "loss": 0.0, "lr": 2.2019306817630856e-07, "epoch": 4.031099601788288, "percentage": 80.62, "elapsed_time": "3:34:04", "remaining_time": "0:51:27", "throughput": 8657.57, "total_tokens": 111201704} +{"current_steps": 165010, "total_steps": 204665, "loss": 0.0, "lr": 2.2013968552163098e-07, "epoch": 4.0312217526201355, "percentage": 80.62, "elapsed_time": "3:34:04", "remaining_time": "0:51:26", "throughput": 8657.6, "total_tokens": 111205224} +{"current_steps": 165015, "total_steps": 204665, "loss": 0.0, "lr": 2.2008630853830755e-07, "epoch": 4.031343903451982, "percentage": 80.63, "elapsed_time": "3:34:05", "remaining_time": "0:51:26", "throughput": 8657.61, "total_tokens": 111208296} +{"current_steps": 165020, "total_steps": 204665, "loss": 0.0003, "lr": 2.20032937226726e-07, "epoch": 4.03146605428383, "percentage": 80.63, "elapsed_time": "3:34:05", "remaining_time": "0:51:26", "throughput": 8657.65, "total_tokens": 111211816} +{"current_steps": 165025, "total_steps": 204665, "loss": 0.0, "lr": 2.19979571587275e-07, "epoch": 4.031588205115677, "percentage": 80.63, "elapsed_time": "3:34:05", "remaining_time": "0:51:25", "throughput": 8657.67, "total_tokens": 111215016} +{"current_steps": 165030, "total_steps": 204665, "loss": 0.0, "lr": 2.1992621162034232e-07, "epoch": 4.031710355947524, "percentage": 80.63, "elapsed_time": "3:34:06", "remaining_time": "0:51:25", "throughput": 8657.7, "total_tokens": 111218408} +{"current_steps": 165035, "total_steps": 204665, "loss": 0.0, "lr": 2.1987285732631577e-07, "epoch": 4.031832506779371, "percentage": 80.64, "elapsed_time": "3:34:06", "remaining_time": "0:51:24", "throughput": 8657.71, "total_tokens": 111221544} +{"current_steps": 165040, "total_steps": 204665, "loss": 0.0, "lr": 2.1981950870558385e-07, "epoch": 4.031954657611219, "percentage": 80.64, "elapsed_time": "3:34:06", "remaining_time": "0:51:24", "throughput": 8657.82, "total_tokens": 111226088} +{"current_steps": 165045, "total_steps": 204665, "loss": 0.0, "lr": 2.1976616575853412e-07, "epoch": 4.032076808443065, "percentage": 80.64, "elapsed_time": "3:34:07", "remaining_time": "0:51:24", "throughput": 8657.86, "total_tokens": 111229608} +{"current_steps": 165050, "total_steps": 204665, "loss": 0.0, "lr": 2.1971282848555495e-07, "epoch": 4.032198959274913, "percentage": 80.64, "elapsed_time": "3:34:07", "remaining_time": "0:51:23", "throughput": 8657.86, "total_tokens": 111232552} +{"current_steps": 165055, "total_steps": 204665, "loss": 0.0, "lr": 2.1965949688703368e-07, "epoch": 4.03232111010676, "percentage": 80.65, "elapsed_time": "3:34:07", "remaining_time": "0:51:23", "throughput": 8657.91, "total_tokens": 111236200} +{"current_steps": 165060, "total_steps": 204665, "loss": 0.0, "lr": 2.1960617096335876e-07, "epoch": 4.032443260938607, "percentage": 80.65, "elapsed_time": "3:34:08", "remaining_time": "0:51:22", "throughput": 8657.93, "total_tokens": 111239464} +{"current_steps": 165065, "total_steps": 204665, "loss": 0.0, "lr": 2.1955285071491724e-07, "epoch": 4.032565411770454, "percentage": 80.65, "elapsed_time": "3:34:08", "remaining_time": "0:51:22", "throughput": 8657.94, "total_tokens": 111242600} +{"current_steps": 165070, "total_steps": 204665, "loss": 0.0, "lr": 2.194995361420975e-07, "epoch": 4.032687562602302, "percentage": 80.65, "elapsed_time": "3:34:08", "remaining_time": "0:51:22", "throughput": 8657.97, "total_tokens": 111246056} +{"current_steps": 165075, "total_steps": 204665, "loss": 0.0, "lr": 2.1944622724528716e-07, "epoch": 4.0328097134341485, "percentage": 80.66, "elapsed_time": "3:34:09", "remaining_time": "0:51:21", "throughput": 8657.99, "total_tokens": 111249320} +{"current_steps": 165080, "total_steps": 204665, "loss": 0.0609, "lr": 2.1939292402487363e-07, "epoch": 4.032931864265995, "percentage": 80.66, "elapsed_time": "3:34:09", "remaining_time": "0:51:21", "throughput": 8658.03, "total_tokens": 111252776} +{"current_steps": 165085, "total_steps": 204665, "loss": 0.0, "lr": 2.1933962648124505e-07, "epoch": 4.033054015097843, "percentage": 80.66, "elapsed_time": "3:34:10", "remaining_time": "0:51:20", "throughput": 8658.04, "total_tokens": 111255912} +{"current_steps": 165090, "total_steps": 204665, "loss": 0.062, "lr": 2.1928633461478828e-07, "epoch": 4.03317616592969, "percentage": 80.66, "elapsed_time": "3:34:10", "remaining_time": "0:51:20", "throughput": 8658.07, "total_tokens": 111259240} +{"current_steps": 165095, "total_steps": 204665, "loss": 0.0, "lr": 2.192330484258913e-07, "epoch": 4.033298316761537, "percentage": 80.67, "elapsed_time": "3:34:10", "remaining_time": "0:51:20", "throughput": 8658.1, "total_tokens": 111262760} +{"current_steps": 165100, "total_steps": 204665, "loss": 0.0, "lr": 2.1917976791494186e-07, "epoch": 4.033420467593384, "percentage": 80.67, "elapsed_time": "3:34:11", "remaining_time": "0:51:19", "throughput": 8658.13, "total_tokens": 111266088} +{"current_steps": 165105, "total_steps": 204665, "loss": 0.0, "lr": 2.1912649308232688e-07, "epoch": 4.033542618425232, "percentage": 80.67, "elapsed_time": "3:34:11", "remaining_time": "0:51:19", "throughput": 8658.17, "total_tokens": 111269608} +{"current_steps": 165110, "total_steps": 204665, "loss": 0.0, "lr": 2.190732239284344e-07, "epoch": 4.033664769257078, "percentage": 80.67, "elapsed_time": "3:34:11", "remaining_time": "0:51:18", "throughput": 8658.17, "total_tokens": 111272488} +{"current_steps": 165115, "total_steps": 204665, "loss": 0.0, "lr": 2.1901996045365123e-07, "epoch": 4.033786920088926, "percentage": 80.68, "elapsed_time": "3:34:12", "remaining_time": "0:51:18", "throughput": 8658.2, "total_tokens": 111275944} +{"current_steps": 165120, "total_steps": 204665, "loss": 0.0, "lr": 2.1896670265836516e-07, "epoch": 4.033909070920773, "percentage": 80.68, "elapsed_time": "3:34:12", "remaining_time": "0:51:18", "throughput": 8658.23, "total_tokens": 111279336} +{"current_steps": 165125, "total_steps": 204665, "loss": 0.0, "lr": 2.1891345054296306e-07, "epoch": 4.03403122175262, "percentage": 80.68, "elapsed_time": "3:34:12", "remaining_time": "0:51:17", "throughput": 8658.28, "total_tokens": 111283048} +{"current_steps": 165130, "total_steps": 204665, "loss": 0.0, "lr": 2.188602041078328e-07, "epoch": 4.034153372584467, "percentage": 80.68, "elapsed_time": "3:34:13", "remaining_time": "0:51:17", "throughput": 8658.28, "total_tokens": 111285992} +{"current_steps": 165135, "total_steps": 204665, "loss": 0.0, "lr": 2.1880696335336114e-07, "epoch": 4.034275523416315, "percentage": 80.69, "elapsed_time": "3:34:13", "remaining_time": "0:51:16", "throughput": 8658.29, "total_tokens": 111289064} +{"current_steps": 165140, "total_steps": 204665, "loss": 0.0, "lr": 2.1875372827993499e-07, "epoch": 4.0343976742481615, "percentage": 80.69, "elapsed_time": "3:34:13", "remaining_time": "0:51:16", "throughput": 8658.33, "total_tokens": 111292712} +{"current_steps": 165145, "total_steps": 204665, "loss": 0.0, "lr": 2.1870049888794228e-07, "epoch": 4.034519825080009, "percentage": 80.69, "elapsed_time": "3:34:14", "remaining_time": "0:51:16", "throughput": 8658.19, "total_tokens": 111296232} +{"current_steps": 165150, "total_steps": 204665, "loss": 0.0001, "lr": 2.1864727517776938e-07, "epoch": 4.034641975911856, "percentage": 80.69, "elapsed_time": "3:34:14", "remaining_time": "0:51:15", "throughput": 8658.21, "total_tokens": 111299432} +{"current_steps": 165155, "total_steps": 204665, "loss": 0.0, "lr": 2.1859405714980394e-07, "epoch": 4.0347641267437035, "percentage": 80.7, "elapsed_time": "3:34:15", "remaining_time": "0:51:15", "throughput": 8658.21, "total_tokens": 111302440} +{"current_steps": 165160, "total_steps": 204665, "loss": 0.0, "lr": 2.1854084480443237e-07, "epoch": 4.03488627757555, "percentage": 80.7, "elapsed_time": "3:34:15", "remaining_time": "0:51:14", "throughput": 8658.24, "total_tokens": 111305768} +{"current_steps": 165165, "total_steps": 204665, "loss": 0.0, "lr": 2.1848763814204197e-07, "epoch": 4.035008428407397, "percentage": 80.7, "elapsed_time": "3:34:15", "remaining_time": "0:51:14", "throughput": 8658.26, "total_tokens": 111309032} +{"current_steps": 165170, "total_steps": 204665, "loss": 0.0, "lr": 2.1843443716301991e-07, "epoch": 4.035130579239245, "percentage": 80.7, "elapsed_time": "3:34:16", "remaining_time": "0:51:14", "throughput": 8658.29, "total_tokens": 111312424} +{"current_steps": 165175, "total_steps": 204665, "loss": 0.0, "lr": 2.1838124186775265e-07, "epoch": 4.035252730071091, "percentage": 80.71, "elapsed_time": "3:34:16", "remaining_time": "0:51:13", "throughput": 8658.46, "total_tokens": 111318184} +{"current_steps": 165180, "total_steps": 204665, "loss": 0.0, "lr": 2.1832805225662742e-07, "epoch": 4.035374880902939, "percentage": 80.71, "elapsed_time": "3:34:16", "remaining_time": "0:51:13", "throughput": 8658.48, "total_tokens": 111321320} +{"current_steps": 165185, "total_steps": 204665, "loss": 0.0479, "lr": 2.1827486833003062e-07, "epoch": 4.035497031734786, "percentage": 80.71, "elapsed_time": "3:34:17", "remaining_time": "0:51:12", "throughput": 8658.52, "total_tokens": 111324840} +{"current_steps": 165190, "total_steps": 204665, "loss": 0.0, "lr": 2.1822169008834924e-07, "epoch": 4.035619182566633, "percentage": 80.71, "elapsed_time": "3:34:17", "remaining_time": "0:51:12", "throughput": 8658.57, "total_tokens": 111328616} +{"current_steps": 165195, "total_steps": 204665, "loss": 0.0, "lr": 2.181685175319702e-07, "epoch": 4.03574133339848, "percentage": 80.71, "elapsed_time": "3:34:17", "remaining_time": "0:51:12", "throughput": 8658.59, "total_tokens": 111331752} +{"current_steps": 165200, "total_steps": 204665, "loss": 0.0002, "lr": 2.1811535066127983e-07, "epoch": 4.035863484230328, "percentage": 80.72, "elapsed_time": "3:34:18", "remaining_time": "0:51:11", "throughput": 8658.62, "total_tokens": 111335272} +{"current_steps": 165205, "total_steps": 204665, "loss": 0.0, "lr": 2.180621894766651e-07, "epoch": 4.035985635062175, "percentage": 80.72, "elapsed_time": "3:34:18", "remaining_time": "0:51:11", "throughput": 8658.68, "total_tokens": 111339112} +{"current_steps": 165210, "total_steps": 204665, "loss": 0.0, "lr": 2.1800903397851222e-07, "epoch": 4.036107785894022, "percentage": 80.72, "elapsed_time": "3:34:19", "remaining_time": "0:51:10", "throughput": 8658.7, "total_tokens": 111342312} +{"current_steps": 165215, "total_steps": 204665, "loss": 0.0, "lr": 2.1795588416720822e-07, "epoch": 4.036229936725869, "percentage": 80.72, "elapsed_time": "3:34:19", "remaining_time": "0:51:10", "throughput": 8658.73, "total_tokens": 111345832} +{"current_steps": 165220, "total_steps": 204665, "loss": 0.0, "lr": 2.1790274004313912e-07, "epoch": 4.036352087557717, "percentage": 80.73, "elapsed_time": "3:34:19", "remaining_time": "0:51:10", "throughput": 8658.73, "total_tokens": 111348776} +{"current_steps": 165225, "total_steps": 204665, "loss": 0.0, "lr": 2.1784960160669197e-07, "epoch": 4.036474238389563, "percentage": 80.73, "elapsed_time": "3:34:20", "remaining_time": "0:51:09", "throughput": 8658.79, "total_tokens": 111352552} +{"current_steps": 165230, "total_steps": 204665, "loss": 0.0, "lr": 2.1779646885825264e-07, "epoch": 4.036596389221411, "percentage": 80.73, "elapsed_time": "3:34:20", "remaining_time": "0:51:09", "throughput": 8658.8, "total_tokens": 111355688} +{"current_steps": 165235, "total_steps": 204665, "loss": 0.0, "lr": 2.1774334179820797e-07, "epoch": 4.036718540053258, "percentage": 80.73, "elapsed_time": "3:34:20", "remaining_time": "0:51:08", "throughput": 8658.84, "total_tokens": 111359208} +{"current_steps": 165240, "total_steps": 204665, "loss": 0.0168, "lr": 2.1769022042694385e-07, "epoch": 4.036840690885105, "percentage": 80.74, "elapsed_time": "3:34:21", "remaining_time": "0:51:08", "throughput": 8658.88, "total_tokens": 111362728} +{"current_steps": 165245, "total_steps": 204665, "loss": 0.0, "lr": 2.176371047448472e-07, "epoch": 4.036962841716952, "percentage": 80.74, "elapsed_time": "3:34:21", "remaining_time": "0:51:08", "throughput": 8658.94, "total_tokens": 111366568} +{"current_steps": 165250, "total_steps": 204665, "loss": 0.0, "lr": 2.175839947523036e-07, "epoch": 4.037084992548799, "percentage": 80.74, "elapsed_time": "3:34:21", "remaining_time": "0:51:07", "throughput": 8658.95, "total_tokens": 111369768} +{"current_steps": 165255, "total_steps": 204665, "loss": 0.0001, "lr": 2.1753089044969997e-07, "epoch": 4.0372071433806465, "percentage": 80.74, "elapsed_time": "3:34:22", "remaining_time": "0:51:07", "throughput": 8658.99, "total_tokens": 111373288} +{"current_steps": 165260, "total_steps": 204665, "loss": 0.0004, "lr": 2.1747779183742187e-07, "epoch": 4.037329294212493, "percentage": 80.75, "elapsed_time": "3:34:22", "remaining_time": "0:51:06", "throughput": 8659.05, "total_tokens": 111377128} +{"current_steps": 165265, "total_steps": 204665, "loss": 0.0, "lr": 2.17424698915856e-07, "epoch": 4.037451445044341, "percentage": 80.75, "elapsed_time": "3:34:22", "remaining_time": "0:51:06", "throughput": 8659.11, "total_tokens": 111380968} +{"current_steps": 165270, "total_steps": 204665, "loss": 0.0, "lr": 2.1737161168538787e-07, "epoch": 4.037573595876188, "percentage": 80.75, "elapsed_time": "3:34:23", "remaining_time": "0:51:06", "throughput": 8659.11, "total_tokens": 111383912} +{"current_steps": 165275, "total_steps": 204665, "loss": 0.0, "lr": 2.1731853014640422e-07, "epoch": 4.037695746708035, "percentage": 80.75, "elapsed_time": "3:34:23", "remaining_time": "0:51:05", "throughput": 8659.16, "total_tokens": 111387688} +{"current_steps": 165280, "total_steps": 204665, "loss": 0.0, "lr": 2.1726545429929055e-07, "epoch": 4.037817897539882, "percentage": 80.76, "elapsed_time": "3:34:23", "remaining_time": "0:51:05", "throughput": 8659.16, "total_tokens": 111390632} +{"current_steps": 165285, "total_steps": 204665, "loss": 0.0, "lr": 2.1721238414443287e-07, "epoch": 4.03794004837173, "percentage": 80.76, "elapsed_time": "3:34:24", "remaining_time": "0:51:04", "throughput": 8659.24, "total_tokens": 111394728} +{"current_steps": 165290, "total_steps": 204665, "loss": 0.0, "lr": 2.1715931968221768e-07, "epoch": 4.038062199203576, "percentage": 80.76, "elapsed_time": "3:34:24", "remaining_time": "0:51:04", "throughput": 8659.29, "total_tokens": 111398504} +{"current_steps": 165295, "total_steps": 204665, "loss": 0.0, "lr": 2.1710626091303008e-07, "epoch": 4.038184350035424, "percentage": 80.76, "elapsed_time": "3:34:24", "remaining_time": "0:51:04", "throughput": 8659.28, "total_tokens": 111401320} +{"current_steps": 165300, "total_steps": 204665, "loss": 0.0, "lr": 2.1705320783725667e-07, "epoch": 4.038306500867271, "percentage": 80.77, "elapsed_time": "3:34:25", "remaining_time": "0:51:03", "throughput": 8659.32, "total_tokens": 111404840} +{"current_steps": 165305, "total_steps": 204665, "loss": 0.0, "lr": 2.170001604552827e-07, "epoch": 4.038428651699118, "percentage": 80.77, "elapsed_time": "3:34:25", "remaining_time": "0:51:03", "throughput": 8659.34, "total_tokens": 111408168} +{"current_steps": 165310, "total_steps": 204665, "loss": 0.0, "lr": 2.1694711876749438e-07, "epoch": 4.038550802530965, "percentage": 80.77, "elapsed_time": "3:34:26", "remaining_time": "0:51:02", "throughput": 8659.38, "total_tokens": 111411688} +{"current_steps": 165315, "total_steps": 204665, "loss": 0.0, "lr": 2.168940827742769e-07, "epoch": 4.038672953362813, "percentage": 80.77, "elapsed_time": "3:34:26", "remaining_time": "0:51:02", "throughput": 8659.42, "total_tokens": 111415144} +{"current_steps": 165320, "total_steps": 204665, "loss": 0.0, "lr": 2.1684105247601635e-07, "epoch": 4.0387951041946595, "percentage": 80.78, "elapsed_time": "3:34:26", "remaining_time": "0:51:02", "throughput": 8659.45, "total_tokens": 111418664} +{"current_steps": 165325, "total_steps": 204665, "loss": 0.0, "lr": 2.1678802787309857e-07, "epoch": 4.038917255026507, "percentage": 80.78, "elapsed_time": "3:34:27", "remaining_time": "0:51:01", "throughput": 8659.45, "total_tokens": 111421608} +{"current_steps": 165330, "total_steps": 204665, "loss": 0.0, "lr": 2.167350089659087e-07, "epoch": 4.039039405858354, "percentage": 80.78, "elapsed_time": "3:34:27", "remaining_time": "0:51:01", "throughput": 8659.47, "total_tokens": 111424808} +{"current_steps": 165335, "total_steps": 204665, "loss": 0.0, "lr": 2.166819957548327e-07, "epoch": 4.0391615566902015, "percentage": 80.78, "elapsed_time": "3:34:27", "remaining_time": "0:51:00", "throughput": 8659.52, "total_tokens": 111428456} +{"current_steps": 165340, "total_steps": 204665, "loss": 0.0, "lr": 2.1662898824025588e-07, "epoch": 4.039283707522048, "percentage": 80.79, "elapsed_time": "3:34:28", "remaining_time": "0:51:00", "throughput": 8659.54, "total_tokens": 111431656} +{"current_steps": 165345, "total_steps": 204665, "loss": 0.0, "lr": 2.1657598642256358e-07, "epoch": 4.039405858353895, "percentage": 80.79, "elapsed_time": "3:34:28", "remaining_time": "0:51:00", "throughput": 8659.55, "total_tokens": 111434792} +{"current_steps": 165350, "total_steps": 204665, "loss": 0.0, "lr": 2.165229903021417e-07, "epoch": 4.039528009185743, "percentage": 80.79, "elapsed_time": "3:34:28", "remaining_time": "0:50:59", "throughput": 8659.58, "total_tokens": 111438120} +{"current_steps": 165355, "total_steps": 204665, "loss": 0.0, "lr": 2.1646999987937497e-07, "epoch": 4.039650160017589, "percentage": 80.79, "elapsed_time": "3:34:29", "remaining_time": "0:50:59", "throughput": 8659.6, "total_tokens": 111441448} +{"current_steps": 165360, "total_steps": 204665, "loss": 0.0, "lr": 2.164170151546496e-07, "epoch": 4.039772310849437, "percentage": 80.8, "elapsed_time": "3:34:29", "remaining_time": "0:50:58", "throughput": 8659.61, "total_tokens": 111444520} +{"current_steps": 165365, "total_steps": 204665, "loss": 0.0, "lr": 2.1636403612835007e-07, "epoch": 4.039894461681284, "percentage": 80.8, "elapsed_time": "3:34:29", "remaining_time": "0:50:58", "throughput": 8659.64, "total_tokens": 111447848} +{"current_steps": 165370, "total_steps": 204665, "loss": 0.0349, "lr": 2.1631106280086232e-07, "epoch": 4.040016612513131, "percentage": 80.8, "elapsed_time": "3:34:30", "remaining_time": "0:50:58", "throughput": 8659.67, "total_tokens": 111451240} +{"current_steps": 165375, "total_steps": 204665, "loss": 0.0, "lr": 2.1625809517257098e-07, "epoch": 4.040138763344978, "percentage": 80.8, "elapsed_time": "3:34:30", "remaining_time": "0:50:57", "throughput": 8659.7, "total_tokens": 111454632} +{"current_steps": 165380, "total_steps": 204665, "loss": 0.0, "lr": 2.162051332438617e-07, "epoch": 4.040260914176826, "percentage": 80.81, "elapsed_time": "3:34:30", "remaining_time": "0:50:57", "throughput": 8659.74, "total_tokens": 111458152} +{"current_steps": 165385, "total_steps": 204665, "loss": 0.0, "lr": 2.1615217701511967e-07, "epoch": 4.0403830650086725, "percentage": 80.81, "elapsed_time": "3:34:31", "remaining_time": "0:50:56", "throughput": 8659.76, "total_tokens": 111461480} +{"current_steps": 165390, "total_steps": 204665, "loss": 0.0, "lr": 2.1609922648672962e-07, "epoch": 4.04050521584052, "percentage": 80.81, "elapsed_time": "3:34:31", "remaining_time": "0:50:56", "throughput": 8659.77, "total_tokens": 111464552} +{"current_steps": 165395, "total_steps": 204665, "loss": 0.0, "lr": 2.1604628165907712e-07, "epoch": 4.040627366672367, "percentage": 80.81, "elapsed_time": "3:34:31", "remaining_time": "0:50:56", "throughput": 8659.81, "total_tokens": 111468072} +{"current_steps": 165400, "total_steps": 204665, "loss": 0.0, "lr": 2.1599334253254665e-07, "epoch": 4.0407495175042145, "percentage": 80.81, "elapsed_time": "3:34:32", "remaining_time": "0:50:55", "throughput": 8659.85, "total_tokens": 111471592} +{"current_steps": 165405, "total_steps": 204665, "loss": 0.0, "lr": 2.1594040910752344e-07, "epoch": 4.040871668336061, "percentage": 80.82, "elapsed_time": "3:34:32", "remaining_time": "0:50:55", "throughput": 8659.87, "total_tokens": 111474984} +{"current_steps": 165410, "total_steps": 204665, "loss": 0.0348, "lr": 2.1588748138439271e-07, "epoch": 4.040993819167909, "percentage": 80.82, "elapsed_time": "3:34:32", "remaining_time": "0:50:54", "throughput": 8659.91, "total_tokens": 111478504} +{"current_steps": 165415, "total_steps": 204665, "loss": 0.0, "lr": 2.1583455936353888e-07, "epoch": 4.041115969999756, "percentage": 80.82, "elapsed_time": "3:34:33", "remaining_time": "0:50:54", "throughput": 8659.93, "total_tokens": 111481640} +{"current_steps": 165420, "total_steps": 204665, "loss": 0.0, "lr": 2.157816430453473e-07, "epoch": 4.041238120831603, "percentage": 80.82, "elapsed_time": "3:34:33", "remaining_time": "0:50:54", "throughput": 8659.98, "total_tokens": 111485480} +{"current_steps": 165425, "total_steps": 204665, "loss": 0.0003, "lr": 2.1572873243020228e-07, "epoch": 4.04136027166345, "percentage": 80.83, "elapsed_time": "3:34:33", "remaining_time": "0:50:53", "throughput": 8660.0, "total_tokens": 111488616} +{"current_steps": 165430, "total_steps": 204665, "loss": 0.0, "lr": 2.1567582751848913e-07, "epoch": 4.041482422495297, "percentage": 80.83, "elapsed_time": "3:34:34", "remaining_time": "0:50:53", "throughput": 8660.0, "total_tokens": 111491624} +{"current_steps": 165435, "total_steps": 204665, "loss": 0.0, "lr": 2.1562292831059203e-07, "epoch": 4.041604573327144, "percentage": 80.83, "elapsed_time": "3:34:34", "remaining_time": "0:50:53", "throughput": 8660.02, "total_tokens": 111494952} +{"current_steps": 165440, "total_steps": 204665, "loss": 0.0, "lr": 2.1557003480689627e-07, "epoch": 4.041726724158991, "percentage": 80.83, "elapsed_time": "3:34:35", "remaining_time": "0:50:52", "throughput": 8660.03, "total_tokens": 111497896} +{"current_steps": 165445, "total_steps": 204665, "loss": 0.0, "lr": 2.1551714700778623e-07, "epoch": 4.041848874990839, "percentage": 80.84, "elapsed_time": "3:34:35", "remaining_time": "0:50:52", "throughput": 8660.04, "total_tokens": 111501032} +{"current_steps": 165450, "total_steps": 204665, "loss": 0.0, "lr": 2.1546426491364622e-07, "epoch": 4.0419710258226855, "percentage": 80.84, "elapsed_time": "3:34:35", "remaining_time": "0:50:51", "throughput": 8660.05, "total_tokens": 111504168} +{"current_steps": 165455, "total_steps": 204665, "loss": 0.0882, "lr": 2.154113885248613e-07, "epoch": 4.042093176654533, "percentage": 80.84, "elapsed_time": "3:34:36", "remaining_time": "0:50:51", "throughput": 8660.09, "total_tokens": 111507688} +{"current_steps": 165460, "total_steps": 204665, "loss": 0.0, "lr": 2.1535851784181558e-07, "epoch": 4.04221532748638, "percentage": 80.84, "elapsed_time": "3:34:36", "remaining_time": "0:50:51", "throughput": 8660.1, "total_tokens": 111510760} +{"current_steps": 165465, "total_steps": 204665, "loss": 0.0, "lr": 2.15305652864894e-07, "epoch": 4.0423374783182275, "percentage": 80.85, "elapsed_time": "3:34:36", "remaining_time": "0:50:50", "throughput": 8660.11, "total_tokens": 111513832} +{"current_steps": 165470, "total_steps": 204665, "loss": 0.0, "lr": 2.1525279359448046e-07, "epoch": 4.042459629150074, "percentage": 80.85, "elapsed_time": "3:34:37", "remaining_time": "0:50:50", "throughput": 8660.14, "total_tokens": 111517224} +{"current_steps": 165475, "total_steps": 204665, "loss": 0.0, "lr": 2.1519994003095976e-07, "epoch": 4.042581779981922, "percentage": 80.85, "elapsed_time": "3:34:37", "remaining_time": "0:50:49", "throughput": 8660.17, "total_tokens": 111520680} +{"current_steps": 165480, "total_steps": 204665, "loss": 0.0, "lr": 2.1514709217471638e-07, "epoch": 4.042703930813769, "percentage": 80.85, "elapsed_time": "3:34:37", "remaining_time": "0:50:49", "throughput": 8660.17, "total_tokens": 111523688} +{"current_steps": 165485, "total_steps": 204665, "loss": 0.0, "lr": 2.1509425002613424e-07, "epoch": 4.042826081645616, "percentage": 80.86, "elapsed_time": "3:34:38", "remaining_time": "0:50:49", "throughput": 8660.19, "total_tokens": 111526824} +{"current_steps": 165490, "total_steps": 204665, "loss": 0.0, "lr": 2.1504141358559812e-07, "epoch": 4.042948232477463, "percentage": 80.86, "elapsed_time": "3:34:38", "remaining_time": "0:50:48", "throughput": 8660.22, "total_tokens": 111530216} +{"current_steps": 165495, "total_steps": 204665, "loss": 0.0, "lr": 2.1498858285349164e-07, "epoch": 4.043070383309311, "percentage": 80.86, "elapsed_time": "3:34:38", "remaining_time": "0:50:48", "throughput": 8660.22, "total_tokens": 111533224} +{"current_steps": 165500, "total_steps": 204665, "loss": 0.0, "lr": 2.1493575783019934e-07, "epoch": 4.043192534141157, "percentage": 80.86, "elapsed_time": "3:34:39", "remaining_time": "0:50:47", "throughput": 8660.27, "total_tokens": 111536872} +{"current_steps": 165505, "total_steps": 204665, "loss": 0.0, "lr": 2.148829385161056e-07, "epoch": 4.043314684973005, "percentage": 80.87, "elapsed_time": "3:34:39", "remaining_time": "0:50:47", "throughput": 8660.28, "total_tokens": 111540072} +{"current_steps": 165510, "total_steps": 204665, "loss": 0.0, "lr": 2.1483012491159404e-07, "epoch": 4.043436835804852, "percentage": 80.87, "elapsed_time": "3:34:39", "remaining_time": "0:50:47", "throughput": 8660.32, "total_tokens": 111543656} +{"current_steps": 165515, "total_steps": 204665, "loss": 0.0, "lr": 2.1477731701704927e-07, "epoch": 4.0435589866366985, "percentage": 80.87, "elapsed_time": "3:34:40", "remaining_time": "0:50:46", "throughput": 8660.39, "total_tokens": 111547560} +{"current_steps": 165520, "total_steps": 204665, "loss": 0.0, "lr": 2.147245148328548e-07, "epoch": 4.043681137468546, "percentage": 80.87, "elapsed_time": "3:34:40", "remaining_time": "0:50:46", "throughput": 8660.41, "total_tokens": 111550824} +{"current_steps": 165525, "total_steps": 204665, "loss": 0.0, "lr": 2.1467171835939525e-07, "epoch": 4.043803288300393, "percentage": 80.88, "elapsed_time": "3:34:40", "remaining_time": "0:50:45", "throughput": 8660.44, "total_tokens": 111554344} +{"current_steps": 165530, "total_steps": 204665, "loss": 0.0, "lr": 2.146189275970538e-07, "epoch": 4.043925439132241, "percentage": 80.88, "elapsed_time": "3:34:41", "remaining_time": "0:50:45", "throughput": 8660.47, "total_tokens": 111557608} +{"current_steps": 165535, "total_steps": 204665, "loss": 0.0, "lr": 2.1456614254621497e-07, "epoch": 4.044047589964087, "percentage": 80.88, "elapsed_time": "3:34:41", "remaining_time": "0:50:45", "throughput": 8660.54, "total_tokens": 111561640} +{"current_steps": 165540, "total_steps": 204665, "loss": 0.0, "lr": 2.1451336320726222e-07, "epoch": 4.044169740795935, "percentage": 80.88, "elapsed_time": "3:34:41", "remaining_time": "0:50:44", "throughput": 8660.55, "total_tokens": 111564776} +{"current_steps": 165545, "total_steps": 204665, "loss": 0.0001, "lr": 2.1446058958057978e-07, "epoch": 4.044291891627782, "percentage": 80.89, "elapsed_time": "3:34:42", "remaining_time": "0:50:44", "throughput": 8660.56, "total_tokens": 111567912} +{"current_steps": 165550, "total_steps": 204665, "loss": 0.0001, "lr": 2.1440782166655101e-07, "epoch": 4.044414042459629, "percentage": 80.89, "elapsed_time": "3:34:42", "remaining_time": "0:50:43", "throughput": 8660.58, "total_tokens": 111571176} +{"current_steps": 165555, "total_steps": 204665, "loss": 0.0, "lr": 2.1435505946556008e-07, "epoch": 4.044536193291476, "percentage": 80.89, "elapsed_time": "3:34:42", "remaining_time": "0:50:43", "throughput": 8660.61, "total_tokens": 111574440} +{"current_steps": 165560, "total_steps": 204665, "loss": 0.0, "lr": 2.1430230297799024e-07, "epoch": 4.044658344123324, "percentage": 80.89, "elapsed_time": "3:34:43", "remaining_time": "0:50:43", "throughput": 8660.65, "total_tokens": 111578088} +{"current_steps": 165565, "total_steps": 204665, "loss": 0.0, "lr": 2.142495522042257e-07, "epoch": 4.0447804949551704, "percentage": 80.9, "elapsed_time": "3:34:43", "remaining_time": "0:50:42", "throughput": 8660.69, "total_tokens": 111581544} +{"current_steps": 165570, "total_steps": 204665, "loss": 0.0, "lr": 2.141968071446494e-07, "epoch": 4.044902645787018, "percentage": 80.9, "elapsed_time": "3:34:44", "remaining_time": "0:50:42", "throughput": 8660.73, "total_tokens": 111585128} +{"current_steps": 165575, "total_steps": 204665, "loss": 0.0, "lr": 2.1414406779964555e-07, "epoch": 4.045024796618865, "percentage": 80.9, "elapsed_time": "3:34:44", "remaining_time": "0:50:41", "throughput": 8660.77, "total_tokens": 111588712} +{"current_steps": 165580, "total_steps": 204665, "loss": 0.0, "lr": 2.1409133416959712e-07, "epoch": 4.0451469474507125, "percentage": 80.9, "elapsed_time": "3:34:44", "remaining_time": "0:50:41", "throughput": 8660.84, "total_tokens": 111592744} +{"current_steps": 165585, "total_steps": 204665, "loss": 0.0, "lr": 2.1403860625488823e-07, "epoch": 4.045269098282559, "percentage": 80.91, "elapsed_time": "3:34:45", "remaining_time": "0:50:41", "throughput": 8660.89, "total_tokens": 111596456} +{"current_steps": 165590, "total_steps": 204665, "loss": 0.0, "lr": 2.1398588405590168e-07, "epoch": 4.045391249114407, "percentage": 80.91, "elapsed_time": "3:34:45", "remaining_time": "0:50:40", "throughput": 8660.91, "total_tokens": 111599720} +{"current_steps": 165595, "total_steps": 204665, "loss": 0.0, "lr": 2.1393316757302116e-07, "epoch": 4.045513399946254, "percentage": 80.91, "elapsed_time": "3:34:45", "remaining_time": "0:50:40", "throughput": 8660.96, "total_tokens": 111603496} +{"current_steps": 165600, "total_steps": 204665, "loss": 0.0, "lr": 2.1388045680663047e-07, "epoch": 4.045635550778101, "percentage": 80.91, "elapsed_time": "3:34:46", "remaining_time": "0:50:39", "throughput": 8660.99, "total_tokens": 111606824} +{"current_steps": 165605, "total_steps": 204665, "loss": 0.0, "lr": 2.1382775175711222e-07, "epoch": 4.045757701609948, "percentage": 80.92, "elapsed_time": "3:34:46", "remaining_time": "0:50:39", "throughput": 8661.13, "total_tokens": 111612008} +{"current_steps": 165610, "total_steps": 204665, "loss": 0.0, "lr": 2.1377505242485018e-07, "epoch": 4.045879852441795, "percentage": 80.92, "elapsed_time": "3:34:46", "remaining_time": "0:50:39", "throughput": 8661.13, "total_tokens": 111615016} +{"current_steps": 165615, "total_steps": 204665, "loss": 0.0, "lr": 2.1372235881022726e-07, "epoch": 4.046002003273642, "percentage": 80.92, "elapsed_time": "3:34:47", "remaining_time": "0:50:38", "throughput": 8661.17, "total_tokens": 111618600} +{"current_steps": 165620, "total_steps": 204665, "loss": 0.0, "lr": 2.1366967091362708e-07, "epoch": 4.046124154105489, "percentage": 80.92, "elapsed_time": "3:34:47", "remaining_time": "0:50:38", "throughput": 8661.22, "total_tokens": 111622248} +{"current_steps": 165625, "total_steps": 204665, "loss": 0.0, "lr": 2.136169887354322e-07, "epoch": 4.046246304937337, "percentage": 80.92, "elapsed_time": "3:34:47", "remaining_time": "0:50:37", "throughput": 8661.24, "total_tokens": 111625576} +{"current_steps": 165630, "total_steps": 204665, "loss": 0.0, "lr": 2.1356431227602624e-07, "epoch": 4.0463684557691835, "percentage": 80.93, "elapsed_time": "3:34:48", "remaining_time": "0:50:37", "throughput": 8661.31, "total_tokens": 111629608} +{"current_steps": 165635, "total_steps": 204665, "loss": 0.0, "lr": 2.1351164153579226e-07, "epoch": 4.046490606601031, "percentage": 80.93, "elapsed_time": "3:34:48", "remaining_time": "0:50:37", "throughput": 8661.34, "total_tokens": 111633000} +{"current_steps": 165640, "total_steps": 204665, "loss": 0.0, "lr": 2.1345897651511292e-07, "epoch": 4.046612757432878, "percentage": 80.93, "elapsed_time": "3:34:48", "remaining_time": "0:50:36", "throughput": 8661.35, "total_tokens": 111636072} +{"current_steps": 165645, "total_steps": 204665, "loss": 0.0, "lr": 2.1340631721437174e-07, "epoch": 4.0467349082647255, "percentage": 80.93, "elapsed_time": "3:34:49", "remaining_time": "0:50:36", "throughput": 8661.39, "total_tokens": 111639592} +{"current_steps": 165650, "total_steps": 204665, "loss": 0.0, "lr": 2.1335366363395147e-07, "epoch": 4.046857059096572, "percentage": 80.94, "elapsed_time": "3:34:49", "remaining_time": "0:50:35", "throughput": 8661.45, "total_tokens": 111643432} +{"current_steps": 165655, "total_steps": 204665, "loss": 0.0, "lr": 2.1330101577423453e-07, "epoch": 4.04697920992842, "percentage": 80.94, "elapsed_time": "3:34:50", "remaining_time": "0:50:35", "throughput": 8661.47, "total_tokens": 111646696} +{"current_steps": 165660, "total_steps": 204665, "loss": 0.0, "lr": 2.1324837363560456e-07, "epoch": 4.047101360760267, "percentage": 80.94, "elapsed_time": "3:34:50", "remaining_time": "0:50:35", "throughput": 8661.5, "total_tokens": 111650152} +{"current_steps": 165665, "total_steps": 204665, "loss": 0.0, "lr": 2.1319573721844376e-07, "epoch": 4.047223511592114, "percentage": 80.94, "elapsed_time": "3:34:50", "remaining_time": "0:50:34", "throughput": 8661.52, "total_tokens": 111653416} +{"current_steps": 165670, "total_steps": 204665, "loss": 0.0, "lr": 2.131431065231355e-07, "epoch": 4.047345662423961, "percentage": 80.95, "elapsed_time": "3:34:51", "remaining_time": "0:50:34", "throughput": 8661.57, "total_tokens": 111657064} +{"current_steps": 165675, "total_steps": 204665, "loss": 0.0, "lr": 2.1309048155006183e-07, "epoch": 4.047467813255809, "percentage": 80.95, "elapsed_time": "3:34:51", "remaining_time": "0:50:33", "throughput": 8661.58, "total_tokens": 111660200} +{"current_steps": 165680, "total_steps": 204665, "loss": 0.0, "lr": 2.1303786229960618e-07, "epoch": 4.047589964087655, "percentage": 80.95, "elapsed_time": "3:34:51", "remaining_time": "0:50:33", "throughput": 8661.61, "total_tokens": 111663592} +{"current_steps": 165685, "total_steps": 204665, "loss": 0.0, "lr": 2.1298524877215052e-07, "epoch": 4.047712114919503, "percentage": 80.95, "elapsed_time": "3:34:52", "remaining_time": "0:50:33", "throughput": 8661.62, "total_tokens": 111666664} +{"current_steps": 165690, "total_steps": 204665, "loss": 0.0, "lr": 2.1293264096807783e-07, "epoch": 4.04783426575135, "percentage": 80.96, "elapsed_time": "3:34:52", "remaining_time": "0:50:32", "throughput": 8661.64, "total_tokens": 111669800} +{"current_steps": 165695, "total_steps": 204665, "loss": 0.0004, "lr": 2.1288003888777096e-07, "epoch": 4.0479564165831965, "percentage": 80.96, "elapsed_time": "3:34:52", "remaining_time": "0:50:32", "throughput": 8661.64, "total_tokens": 111672808} +{"current_steps": 165700, "total_steps": 204665, "loss": 0.0, "lr": 2.128274425316119e-07, "epoch": 4.048078567415044, "percentage": 80.96, "elapsed_time": "3:34:53", "remaining_time": "0:50:31", "throughput": 8661.69, "total_tokens": 111676520} +{"current_steps": 165705, "total_steps": 204665, "loss": 0.0, "lr": 2.1277485189998366e-07, "epoch": 4.048200718246891, "percentage": 80.96, "elapsed_time": "3:34:53", "remaining_time": "0:50:31", "throughput": 8661.72, "total_tokens": 111679848} +{"current_steps": 165710, "total_steps": 204665, "loss": 0.0, "lr": 2.127222669932681e-07, "epoch": 4.0483228690787385, "percentage": 80.97, "elapsed_time": "3:34:53", "remaining_time": "0:50:31", "throughput": 8661.75, "total_tokens": 111683304} +{"current_steps": 165715, "total_steps": 204665, "loss": 0.0, "lr": 2.1266968781184814e-07, "epoch": 4.048445019910585, "percentage": 80.97, "elapsed_time": "3:34:54", "remaining_time": "0:50:30", "throughput": 8661.77, "total_tokens": 111686504} +{"current_steps": 165720, "total_steps": 204665, "loss": 0.0, "lr": 2.1261711435610607e-07, "epoch": 4.048567170742433, "percentage": 80.97, "elapsed_time": "3:34:54", "remaining_time": "0:50:30", "throughput": 8661.81, "total_tokens": 111690088} +{"current_steps": 165725, "total_steps": 204665, "loss": 0.0, "lr": 2.1256454662642398e-07, "epoch": 4.04868932157428, "percentage": 80.97, "elapsed_time": "3:34:54", "remaining_time": "0:50:29", "throughput": 8661.84, "total_tokens": 111693544} +{"current_steps": 165730, "total_steps": 204665, "loss": 0.0, "lr": 2.1251198462318444e-07, "epoch": 4.048811472406127, "percentage": 80.98, "elapsed_time": "3:34:55", "remaining_time": "0:50:29", "throughput": 8661.86, "total_tokens": 111696680} +{"current_steps": 165735, "total_steps": 204665, "loss": 0.0, "lr": 2.1245942834676944e-07, "epoch": 4.048933623237974, "percentage": 80.98, "elapsed_time": "3:34:55", "remaining_time": "0:50:29", "throughput": 8661.9, "total_tokens": 111700328} +{"current_steps": 165740, "total_steps": 204665, "loss": 0.0, "lr": 2.124068777975615e-07, "epoch": 4.049055774069822, "percentage": 80.98, "elapsed_time": "3:34:55", "remaining_time": "0:50:28", "throughput": 8661.94, "total_tokens": 111703848} +{"current_steps": 165745, "total_steps": 204665, "loss": 0.0, "lr": 2.123543329759423e-07, "epoch": 4.049177924901668, "percentage": 80.98, "elapsed_time": "3:34:56", "remaining_time": "0:50:28", "throughput": 8661.97, "total_tokens": 111707240} +{"current_steps": 165750, "total_steps": 204665, "loss": 0.0001, "lr": 2.123017938822945e-07, "epoch": 4.049300075733516, "percentage": 80.99, "elapsed_time": "3:34:56", "remaining_time": "0:50:27", "throughput": 8661.96, "total_tokens": 111710056} +{"current_steps": 165755, "total_steps": 204665, "loss": 0.0001, "lr": 2.1224926051699987e-07, "epoch": 4.049422226565363, "percentage": 80.99, "elapsed_time": "3:34:56", "remaining_time": "0:50:27", "throughput": 8662.01, "total_tokens": 111713768} +{"current_steps": 165760, "total_steps": 204665, "loss": 0.0, "lr": 2.121967328804404e-07, "epoch": 4.04954437739721, "percentage": 80.99, "elapsed_time": "3:34:57", "remaining_time": "0:50:27", "throughput": 8662.04, "total_tokens": 111717096} +{"current_steps": 165765, "total_steps": 204665, "loss": 0.0, "lr": 2.1214421097299828e-07, "epoch": 4.049666528229057, "percentage": 80.99, "elapsed_time": "3:34:57", "remaining_time": "0:50:26", "throughput": 8662.05, "total_tokens": 111720232} +{"current_steps": 165770, "total_steps": 204665, "loss": 0.0001, "lr": 2.1209169479505519e-07, "epoch": 4.049788679060905, "percentage": 81.0, "elapsed_time": "3:34:58", "remaining_time": "0:50:26", "throughput": 8662.08, "total_tokens": 111723560} +{"current_steps": 165775, "total_steps": 204665, "loss": 0.0, "lr": 2.1203918434699342e-07, "epoch": 4.0499108298927515, "percentage": 81.0, "elapsed_time": "3:34:58", "remaining_time": "0:50:25", "throughput": 8662.13, "total_tokens": 111727336} +{"current_steps": 165780, "total_steps": 204665, "loss": 0.0, "lr": 2.1198667962919437e-07, "epoch": 4.050032980724599, "percentage": 81.0, "elapsed_time": "3:34:58", "remaining_time": "0:50:25", "throughput": 8662.15, "total_tokens": 111730536} +{"current_steps": 165785, "total_steps": 204665, "loss": 0.0, "lr": 2.1193418064204016e-07, "epoch": 4.050155131556446, "percentage": 81.0, "elapsed_time": "3:34:59", "remaining_time": "0:50:25", "throughput": 8662.21, "total_tokens": 111734440} +{"current_steps": 165790, "total_steps": 204665, "loss": 0.0, "lr": 2.1188168738591284e-07, "epoch": 4.050277282388293, "percentage": 81.01, "elapsed_time": "3:34:59", "remaining_time": "0:50:24", "throughput": 8662.24, "total_tokens": 111737832} +{"current_steps": 165795, "total_steps": 204665, "loss": 0.0591, "lr": 2.1182919986119364e-07, "epoch": 4.05039943322014, "percentage": 81.01, "elapsed_time": "3:34:59", "remaining_time": "0:50:24", "throughput": 8662.26, "total_tokens": 111741160} +{"current_steps": 165800, "total_steps": 204665, "loss": 0.0, "lr": 2.117767180682647e-07, "epoch": 4.050521584051987, "percentage": 81.01, "elapsed_time": "3:35:00", "remaining_time": "0:50:23", "throughput": 8662.29, "total_tokens": 111744488} +{"current_steps": 165805, "total_steps": 204665, "loss": 0.0, "lr": 2.1172424200750715e-07, "epoch": 4.050643734883835, "percentage": 81.01, "elapsed_time": "3:35:00", "remaining_time": "0:50:23", "throughput": 8662.32, "total_tokens": 111747880} +{"current_steps": 165810, "total_steps": 204665, "loss": 0.0, "lr": 2.1167177167930307e-07, "epoch": 4.050765885715681, "percentage": 81.02, "elapsed_time": "3:35:00", "remaining_time": "0:50:23", "throughput": 8662.33, "total_tokens": 111751016} +{"current_steps": 165815, "total_steps": 204665, "loss": 0.0001, "lr": 2.1161930708403407e-07, "epoch": 4.050888036547529, "percentage": 81.02, "elapsed_time": "3:35:01", "remaining_time": "0:50:22", "throughput": 8662.34, "total_tokens": 111754024} +{"current_steps": 165820, "total_steps": 204665, "loss": 0.0, "lr": 2.1156684822208127e-07, "epoch": 4.051010187379376, "percentage": 81.02, "elapsed_time": "3:35:01", "remaining_time": "0:50:22", "throughput": 8662.41, "total_tokens": 111758184} +{"current_steps": 165825, "total_steps": 204665, "loss": 0.0, "lr": 2.1151439509382674e-07, "epoch": 4.051132338211223, "percentage": 81.02, "elapsed_time": "3:35:01", "remaining_time": "0:50:21", "throughput": 8662.43, "total_tokens": 111761384} +{"current_steps": 165830, "total_steps": 204665, "loss": 0.0, "lr": 2.1146194769965132e-07, "epoch": 4.05125448904307, "percentage": 81.03, "elapsed_time": "3:35:02", "remaining_time": "0:50:21", "throughput": 8662.44, "total_tokens": 111764392} +{"current_steps": 165835, "total_steps": 204665, "loss": 0.0, "lr": 2.114095060399369e-07, "epoch": 4.051376639874918, "percentage": 81.03, "elapsed_time": "3:35:02", "remaining_time": "0:50:21", "throughput": 8662.43, "total_tokens": 111767208} +{"current_steps": 165840, "total_steps": 204665, "loss": 0.0, "lr": 2.1135707011506442e-07, "epoch": 4.0514987907067646, "percentage": 81.03, "elapsed_time": "3:35:02", "remaining_time": "0:50:20", "throughput": 8662.43, "total_tokens": 111770088} +{"current_steps": 165845, "total_steps": 204665, "loss": 0.0, "lr": 2.113046399254157e-07, "epoch": 4.051620941538612, "percentage": 81.03, "elapsed_time": "3:35:03", "remaining_time": "0:50:20", "throughput": 8662.45, "total_tokens": 111773416} +{"current_steps": 165850, "total_steps": 204665, "loss": 0.0, "lr": 2.112522154713715e-07, "epoch": 4.051743092370459, "percentage": 81.03, "elapsed_time": "3:35:03", "remaining_time": "0:50:19", "throughput": 8662.46, "total_tokens": 111776488} +{"current_steps": 165855, "total_steps": 204665, "loss": 0.0, "lr": 2.111997967533137e-07, "epoch": 4.051865243202307, "percentage": 81.04, "elapsed_time": "3:35:03", "remaining_time": "0:50:19", "throughput": 8662.48, "total_tokens": 111779624} +{"current_steps": 165860, "total_steps": 204665, "loss": 0.0, "lr": 2.1114738377162279e-07, "epoch": 4.051987394034153, "percentage": 81.04, "elapsed_time": "3:35:04", "remaining_time": "0:50:19", "throughput": 8662.52, "total_tokens": 111783208} +{"current_steps": 165865, "total_steps": 204665, "loss": 0.0, "lr": 2.1109497652668052e-07, "epoch": 4.052109544866001, "percentage": 81.04, "elapsed_time": "3:35:04", "remaining_time": "0:50:18", "throughput": 8662.56, "total_tokens": 111786728} +{"current_steps": 165870, "total_steps": 204665, "loss": 0.0, "lr": 2.110425750188679e-07, "epoch": 4.052231695697848, "percentage": 81.04, "elapsed_time": "3:35:04", "remaining_time": "0:50:18", "throughput": 8662.57, "total_tokens": 111789864} +{"current_steps": 165875, "total_steps": 204665, "loss": 0.0, "lr": 2.1099017924856544e-07, "epoch": 4.052353846529694, "percentage": 81.05, "elapsed_time": "3:35:05", "remaining_time": "0:50:17", "throughput": 8662.6, "total_tokens": 111793256} +{"current_steps": 165880, "total_steps": 204665, "loss": 0.0, "lr": 2.109377892161547e-07, "epoch": 4.052475997361542, "percentage": 81.05, "elapsed_time": "3:35:05", "remaining_time": "0:50:17", "throughput": 8662.65, "total_tokens": 111796840} +{"current_steps": 165885, "total_steps": 204665, "loss": 0.0, "lr": 2.108854049220169e-07, "epoch": 4.052598148193389, "percentage": 81.05, "elapsed_time": "3:35:05", "remaining_time": "0:50:17", "throughput": 8662.67, "total_tokens": 111800104} +{"current_steps": 165890, "total_steps": 204665, "loss": 0.0, "lr": 2.1083302636653234e-07, "epoch": 4.0527202990252365, "percentage": 81.05, "elapsed_time": "3:35:06", "remaining_time": "0:50:16", "throughput": 8662.67, "total_tokens": 111803112} +{"current_steps": 165895, "total_steps": 204665, "loss": 0.0, "lr": 2.1078065355008257e-07, "epoch": 4.052842449857083, "percentage": 81.06, "elapsed_time": "3:35:06", "remaining_time": "0:50:16", "throughput": 8662.73, "total_tokens": 111806888} +{"current_steps": 165900, "total_steps": 204665, "loss": 0.0, "lr": 2.1072828647304795e-07, "epoch": 4.052964600688931, "percentage": 81.06, "elapsed_time": "3:35:06", "remaining_time": "0:50:15", "throughput": 8662.73, "total_tokens": 111809896} +{"current_steps": 165905, "total_steps": 204665, "loss": 0.0001, "lr": 2.1067592513580944e-07, "epoch": 4.053086751520778, "percentage": 81.06, "elapsed_time": "3:35:07", "remaining_time": "0:50:15", "throughput": 8662.77, "total_tokens": 111813352} +{"current_steps": 165910, "total_steps": 204665, "loss": 0.0, "lr": 2.1062356953874815e-07, "epoch": 4.053208902352625, "percentage": 81.06, "elapsed_time": "3:35:07", "remaining_time": "0:50:15", "throughput": 8662.77, "total_tokens": 111816360} +{"current_steps": 165915, "total_steps": 204665, "loss": 0.0, "lr": 2.1057121968224445e-07, "epoch": 4.053331053184472, "percentage": 81.07, "elapsed_time": "3:35:08", "remaining_time": "0:50:14", "throughput": 8662.79, "total_tokens": 111819688} +{"current_steps": 165920, "total_steps": 204665, "loss": 0.0224, "lr": 2.1051887556667937e-07, "epoch": 4.05345320401632, "percentage": 81.07, "elapsed_time": "3:35:08", "remaining_time": "0:50:14", "throughput": 8662.84, "total_tokens": 111823336} +{"current_steps": 165925, "total_steps": 204665, "loss": 0.0, "lr": 2.10466537192433e-07, "epoch": 4.053575354848166, "percentage": 81.07, "elapsed_time": "3:35:08", "remaining_time": "0:50:13", "throughput": 8662.85, "total_tokens": 111826344} +{"current_steps": 165930, "total_steps": 204665, "loss": 0.0, "lr": 2.1041420455988668e-07, "epoch": 4.053697505680014, "percentage": 81.07, "elapsed_time": "3:35:09", "remaining_time": "0:50:13", "throughput": 8662.89, "total_tokens": 111829992} +{"current_steps": 165935, "total_steps": 204665, "loss": 0.0, "lr": 2.1036187766942037e-07, "epoch": 4.053819656511861, "percentage": 81.08, "elapsed_time": "3:35:09", "remaining_time": "0:50:13", "throughput": 8662.93, "total_tokens": 111833512} +{"current_steps": 165940, "total_steps": 204665, "loss": 0.0, "lr": 2.103095565214149e-07, "epoch": 4.053941807343708, "percentage": 81.08, "elapsed_time": "3:35:09", "remaining_time": "0:50:12", "throughput": 8662.96, "total_tokens": 111836904} +{"current_steps": 165945, "total_steps": 204665, "loss": 0.0, "lr": 2.1025724111625099e-07, "epoch": 4.054063958175555, "percentage": 81.08, "elapsed_time": "3:35:10", "remaining_time": "0:50:12", "throughput": 8663.01, "total_tokens": 111840616} +{"current_steps": 165950, "total_steps": 204665, "loss": 0.0, "lr": 2.1020493145430851e-07, "epoch": 4.054186109007403, "percentage": 81.08, "elapsed_time": "3:35:10", "remaining_time": "0:50:11", "throughput": 8663.02, "total_tokens": 111843752} +{"current_steps": 165955, "total_steps": 204665, "loss": 0.0, "lr": 2.1015262753596853e-07, "epoch": 4.0543082598392495, "percentage": 81.09, "elapsed_time": "3:35:10", "remaining_time": "0:50:11", "throughput": 8663.09, "total_tokens": 111847720} +{"current_steps": 165960, "total_steps": 204665, "loss": 0.0007, "lr": 2.1010032936161103e-07, "epoch": 4.054430410671097, "percentage": 81.09, "elapsed_time": "3:35:11", "remaining_time": "0:50:11", "throughput": 8663.12, "total_tokens": 111851176} +{"current_steps": 165965, "total_steps": 204665, "loss": 0.0, "lr": 2.100480369316162e-07, "epoch": 4.054552561502944, "percentage": 81.09, "elapsed_time": "3:35:11", "remaining_time": "0:50:10", "throughput": 8663.14, "total_tokens": 111854376} +{"current_steps": 165970, "total_steps": 204665, "loss": 0.0, "lr": 2.0999575024636474e-07, "epoch": 4.054674712334791, "percentage": 81.09, "elapsed_time": "3:35:11", "remaining_time": "0:50:10", "throughput": 8663.15, "total_tokens": 111857576} +{"current_steps": 165975, "total_steps": 204665, "loss": 0.0, "lr": 2.0994346930623642e-07, "epoch": 4.054796863166638, "percentage": 81.1, "elapsed_time": "3:35:12", "remaining_time": "0:50:09", "throughput": 8663.2, "total_tokens": 111861224} +{"current_steps": 165980, "total_steps": 204665, "loss": 0.0, "lr": 2.0989119411161194e-07, "epoch": 4.054919013998485, "percentage": 81.1, "elapsed_time": "3:35:12", "remaining_time": "0:50:09", "throughput": 8663.22, "total_tokens": 111864424} +{"current_steps": 165985, "total_steps": 204665, "loss": 0.0, "lr": 2.09838924662871e-07, "epoch": 4.055041164830333, "percentage": 81.1, "elapsed_time": "3:35:12", "remaining_time": "0:50:09", "throughput": 8663.23, "total_tokens": 111867496} +{"current_steps": 165990, "total_steps": 204665, "loss": 0.0, "lr": 2.097866609603941e-07, "epoch": 4.055163315662179, "percentage": 81.1, "elapsed_time": "3:35:13", "remaining_time": "0:50:08", "throughput": 8663.24, "total_tokens": 111870568} +{"current_steps": 165995, "total_steps": 204665, "loss": 0.0, "lr": 2.097344030045609e-07, "epoch": 4.055285466494027, "percentage": 81.11, "elapsed_time": "3:35:13", "remaining_time": "0:50:08", "throughput": 8663.26, "total_tokens": 111873960} +{"current_steps": 166000, "total_steps": 204665, "loss": 0.0, "lr": 2.096821507957517e-07, "epoch": 4.055407617325874, "percentage": 81.11, "elapsed_time": "3:35:13", "remaining_time": "0:50:07", "throughput": 8663.3, "total_tokens": 111877416} +{"current_steps": 166005, "total_steps": 204665, "loss": 0.0, "lr": 2.096299043343468e-07, "epoch": 4.055529768157721, "percentage": 81.11, "elapsed_time": "3:35:14", "remaining_time": "0:50:07", "throughput": 8663.3, "total_tokens": 111880360} +{"current_steps": 166010, "total_steps": 204665, "loss": 0.0, "lr": 2.0957766362072548e-07, "epoch": 4.055651918989568, "percentage": 81.11, "elapsed_time": "3:35:14", "remaining_time": "0:50:07", "throughput": 8663.31, "total_tokens": 111883432} +{"current_steps": 166015, "total_steps": 204665, "loss": 0.0, "lr": 2.0952542865526824e-07, "epoch": 4.055774069821416, "percentage": 81.12, "elapsed_time": "3:35:14", "remaining_time": "0:50:06", "throughput": 8663.33, "total_tokens": 111886632} +{"current_steps": 166020, "total_steps": 204665, "loss": 0.0, "lr": 2.094731994383544e-07, "epoch": 4.0558962206532625, "percentage": 81.12, "elapsed_time": "3:35:15", "remaining_time": "0:50:06", "throughput": 8663.34, "total_tokens": 111889832} +{"current_steps": 166025, "total_steps": 204665, "loss": 0.0, "lr": 2.0942097597036446e-07, "epoch": 4.05601837148511, "percentage": 81.12, "elapsed_time": "3:35:15", "remaining_time": "0:50:05", "throughput": 8663.37, "total_tokens": 111893160} +{"current_steps": 166030, "total_steps": 204665, "loss": 0.0005, "lr": 2.0936875825167744e-07, "epoch": 4.056140522316957, "percentage": 81.12, "elapsed_time": "3:35:16", "remaining_time": "0:50:05", "throughput": 8663.42, "total_tokens": 111896936} +{"current_steps": 166035, "total_steps": 204665, "loss": 0.0, "lr": 2.093165462826736e-07, "epoch": 4.0562626731488045, "percentage": 81.13, "elapsed_time": "3:35:16", "remaining_time": "0:50:05", "throughput": 8663.47, "total_tokens": 111900712} +{"current_steps": 166040, "total_steps": 204665, "loss": 0.0, "lr": 2.0926434006373261e-07, "epoch": 4.056384823980651, "percentage": 81.13, "elapsed_time": "3:35:16", "remaining_time": "0:50:04", "throughput": 8663.5, "total_tokens": 111903976} +{"current_steps": 166045, "total_steps": 204665, "loss": 0.0, "lr": 2.0921213959523388e-07, "epoch": 4.056506974812499, "percentage": 81.13, "elapsed_time": "3:35:17", "remaining_time": "0:50:04", "throughput": 8663.51, "total_tokens": 111907176} +{"current_steps": 166050, "total_steps": 204665, "loss": 0.0, "lr": 2.091599448775574e-07, "epoch": 4.056629125644346, "percentage": 81.13, "elapsed_time": "3:35:17", "remaining_time": "0:50:03", "throughput": 8663.54, "total_tokens": 111910440} +{"current_steps": 166055, "total_steps": 204665, "loss": 0.0246, "lr": 2.091077559110822e-07, "epoch": 4.056751276476192, "percentage": 81.14, "elapsed_time": "3:35:17", "remaining_time": "0:50:03", "throughput": 8663.55, "total_tokens": 111913512} +{"current_steps": 166060, "total_steps": 204665, "loss": 0.0, "lr": 2.0905557269618845e-07, "epoch": 4.05687342730804, "percentage": 81.14, "elapsed_time": "3:35:18", "remaining_time": "0:50:03", "throughput": 8663.57, "total_tokens": 111916840} +{"current_steps": 166065, "total_steps": 204665, "loss": 0.0, "lr": 2.0900339523325528e-07, "epoch": 4.056995578139887, "percentage": 81.14, "elapsed_time": "3:35:18", "remaining_time": "0:50:02", "throughput": 8663.61, "total_tokens": 111920296} +{"current_steps": 166070, "total_steps": 204665, "loss": 0.0, "lr": 2.0895122352266194e-07, "epoch": 4.057117728971734, "percentage": 81.14, "elapsed_time": "3:35:18", "remaining_time": "0:50:02", "throughput": 8663.63, "total_tokens": 111923624} +{"current_steps": 166075, "total_steps": 204665, "loss": 0.0, "lr": 2.0889905756478833e-07, "epoch": 4.057239879803581, "percentage": 81.14, "elapsed_time": "3:35:19", "remaining_time": "0:50:01", "throughput": 8663.65, "total_tokens": 111926888} +{"current_steps": 166080, "total_steps": 204665, "loss": 0.0, "lr": 2.0884689736001316e-07, "epoch": 4.057362030635429, "percentage": 81.15, "elapsed_time": "3:35:19", "remaining_time": "0:50:01", "throughput": 8663.67, "total_tokens": 111930152} +{"current_steps": 166085, "total_steps": 204665, "loss": 0.0, "lr": 2.0879474290871656e-07, "epoch": 4.0574841814672755, "percentage": 81.15, "elapsed_time": "3:35:19", "remaining_time": "0:50:01", "throughput": 8663.7, "total_tokens": 111933544} +{"current_steps": 166090, "total_steps": 204665, "loss": 0.0, "lr": 2.0874259421127706e-07, "epoch": 4.057606332299123, "percentage": 81.15, "elapsed_time": "3:35:20", "remaining_time": "0:50:00", "throughput": 8663.71, "total_tokens": 111936616} +{"current_steps": 166095, "total_steps": 204665, "loss": 0.0, "lr": 2.0869045126807427e-07, "epoch": 4.05772848313097, "percentage": 81.15, "elapsed_time": "3:35:20", "remaining_time": "0:50:00", "throughput": 8663.71, "total_tokens": 111939496} +{"current_steps": 166100, "total_steps": 204665, "loss": 0.0, "lr": 2.0863831407948763e-07, "epoch": 4.0578506339628175, "percentage": 81.16, "elapsed_time": "3:35:20", "remaining_time": "0:49:59", "throughput": 8663.74, "total_tokens": 111942888} +{"current_steps": 166105, "total_steps": 204665, "loss": 0.0, "lr": 2.0858618264589577e-07, "epoch": 4.057972784794664, "percentage": 81.16, "elapsed_time": "3:35:21", "remaining_time": "0:49:59", "throughput": 8663.77, "total_tokens": 111946344} +{"current_steps": 166110, "total_steps": 204665, "loss": 0.0293, "lr": 2.0853405696767823e-07, "epoch": 4.058094935626512, "percentage": 81.16, "elapsed_time": "3:35:21", "remaining_time": "0:49:59", "throughput": 8663.78, "total_tokens": 111949288} +{"current_steps": 166115, "total_steps": 204665, "loss": 0.0, "lr": 2.0848193704521378e-07, "epoch": 4.058217086458359, "percentage": 81.16, "elapsed_time": "3:35:21", "remaining_time": "0:49:58", "throughput": 8663.8, "total_tokens": 111952616} +{"current_steps": 166120, "total_steps": 204665, "loss": 0.0, "lr": 2.0842982287888145e-07, "epoch": 4.058339237290206, "percentage": 81.17, "elapsed_time": "3:35:22", "remaining_time": "0:49:58", "throughput": 8663.85, "total_tokens": 111956328} +{"current_steps": 166125, "total_steps": 204665, "loss": 0.0, "lr": 2.0837771446906073e-07, "epoch": 4.058461388122053, "percentage": 81.17, "elapsed_time": "3:35:22", "remaining_time": "0:49:57", "throughput": 8663.86, "total_tokens": 111959400} +{"current_steps": 166130, "total_steps": 204665, "loss": 0.0, "lr": 2.0832561181612985e-07, "epoch": 4.058583538953901, "percentage": 81.17, "elapsed_time": "3:35:22", "remaining_time": "0:49:57", "throughput": 8663.89, "total_tokens": 111962856} +{"current_steps": 166135, "total_steps": 204665, "loss": 0.0, "lr": 2.082735149204683e-07, "epoch": 4.058705689785747, "percentage": 81.17, "elapsed_time": "3:35:23", "remaining_time": "0:49:57", "throughput": 8663.91, "total_tokens": 111966184} +{"current_steps": 166140, "total_steps": 204665, "loss": 0.0, "lr": 2.0822142378245444e-07, "epoch": 4.058827840617594, "percentage": 81.18, "elapsed_time": "3:35:23", "remaining_time": "0:49:56", "throughput": 8663.93, "total_tokens": 111969448} +{"current_steps": 166145, "total_steps": 204665, "loss": 0.0418, "lr": 2.0816933840246776e-07, "epoch": 4.058949991449442, "percentage": 81.18, "elapsed_time": "3:35:23", "remaining_time": "0:49:56", "throughput": 8663.97, "total_tokens": 111972904} +{"current_steps": 166150, "total_steps": 204665, "loss": 0.0, "lr": 2.0811725878088615e-07, "epoch": 4.0590721422812885, "percentage": 81.18, "elapsed_time": "3:35:24", "remaining_time": "0:49:55", "throughput": 8663.98, "total_tokens": 111975976} +{"current_steps": 166155, "total_steps": 204665, "loss": 0.0004, "lr": 2.0806518491808923e-07, "epoch": 4.059194293113136, "percentage": 81.18, "elapsed_time": "3:35:24", "remaining_time": "0:49:55", "throughput": 8664.02, "total_tokens": 111979624} +{"current_steps": 166160, "total_steps": 204665, "loss": 0.0, "lr": 2.08013116814455e-07, "epoch": 4.059316443944983, "percentage": 81.19, "elapsed_time": "3:35:25", "remaining_time": "0:49:55", "throughput": 8664.05, "total_tokens": 111982952} +{"current_steps": 166165, "total_steps": 204665, "loss": 0.0, "lr": 2.079610544703626e-07, "epoch": 4.0594385947768306, "percentage": 81.19, "elapsed_time": "3:35:25", "remaining_time": "0:49:54", "throughput": 8664.06, "total_tokens": 111986088} +{"current_steps": 166170, "total_steps": 204665, "loss": 0.0, "lr": 2.0790899788619033e-07, "epoch": 4.059560745608677, "percentage": 81.19, "elapsed_time": "3:35:25", "remaining_time": "0:49:54", "throughput": 8664.09, "total_tokens": 111989416} +{"current_steps": 166175, "total_steps": 204665, "loss": 0.0, "lr": 2.0785694706231693e-07, "epoch": 4.059682896440525, "percentage": 81.19, "elapsed_time": "3:35:26", "remaining_time": "0:49:53", "throughput": 8664.09, "total_tokens": 111992360} +{"current_steps": 166180, "total_steps": 204665, "loss": 0.0, "lr": 2.0780490199912103e-07, "epoch": 4.059805047272372, "percentage": 81.2, "elapsed_time": "3:35:26", "remaining_time": "0:49:53", "throughput": 8664.14, "total_tokens": 111996072} +{"current_steps": 166185, "total_steps": 204665, "loss": 0.0, "lr": 2.0775286269698066e-07, "epoch": 4.059927198104219, "percentage": 81.2, "elapsed_time": "3:35:26", "remaining_time": "0:49:53", "throughput": 8664.16, "total_tokens": 111999272} +{"current_steps": 166190, "total_steps": 204665, "loss": 0.0, "lr": 2.077008291562745e-07, "epoch": 4.060049348936066, "percentage": 81.2, "elapsed_time": "3:35:27", "remaining_time": "0:49:52", "throughput": 8664.16, "total_tokens": 112002216} +{"current_steps": 166195, "total_steps": 204665, "loss": 0.0, "lr": 2.076488013773814e-07, "epoch": 4.060171499767914, "percentage": 81.2, "elapsed_time": "3:35:27", "remaining_time": "0:49:52", "throughput": 8664.17, "total_tokens": 112005288} +{"current_steps": 166200, "total_steps": 204665, "loss": 0.0, "lr": 2.0759677936067899e-07, "epoch": 4.06029365059976, "percentage": 81.21, "elapsed_time": "3:35:27", "remaining_time": "0:49:51", "throughput": 8664.2, "total_tokens": 112008808} +{"current_steps": 166205, "total_steps": 204665, "loss": 0.0, "lr": 2.0754476310654611e-07, "epoch": 4.060415801431608, "percentage": 81.21, "elapsed_time": "3:35:28", "remaining_time": "0:49:51", "throughput": 8664.21, "total_tokens": 112011880} +{"current_steps": 166210, "total_steps": 204665, "loss": 0.0, "lr": 2.074927526153607e-07, "epoch": 4.060537952263455, "percentage": 81.21, "elapsed_time": "3:35:28", "remaining_time": "0:49:51", "throughput": 8664.25, "total_tokens": 112015336} +{"current_steps": 166215, "total_steps": 204665, "loss": 0.0, "lr": 2.074407478875012e-07, "epoch": 4.0606601030953025, "percentage": 81.21, "elapsed_time": "3:35:28", "remaining_time": "0:49:50", "throughput": 8664.26, "total_tokens": 112018536} +{"current_steps": 166220, "total_steps": 204665, "loss": 0.0, "lr": 2.073887489233459e-07, "epoch": 4.060782253927149, "percentage": 81.22, "elapsed_time": "3:35:29", "remaining_time": "0:49:50", "throughput": 8664.26, "total_tokens": 112021480} +{"current_steps": 166225, "total_steps": 204665, "loss": 0.0, "lr": 2.0733675572327258e-07, "epoch": 4.060904404758997, "percentage": 81.22, "elapsed_time": "3:35:29", "remaining_time": "0:49:49", "throughput": 8664.27, "total_tokens": 112024488} +{"current_steps": 166230, "total_steps": 204665, "loss": 0.0, "lr": 2.0728476828765996e-07, "epoch": 4.061026555590844, "percentage": 81.22, "elapsed_time": "3:35:29", "remaining_time": "0:49:49", "throughput": 8664.29, "total_tokens": 112027688} +{"current_steps": 166235, "total_steps": 204665, "loss": 0.0475, "lr": 2.0723278661688526e-07, "epoch": 4.06114870642269, "percentage": 81.22, "elapsed_time": "3:35:30", "remaining_time": "0:49:49", "throughput": 8664.33, "total_tokens": 112031272} +{"current_steps": 166240, "total_steps": 204665, "loss": 0.0, "lr": 2.0718081071132732e-07, "epoch": 4.061270857254538, "percentage": 81.23, "elapsed_time": "3:35:30", "remaining_time": "0:49:48", "throughput": 8664.35, "total_tokens": 112034600} +{"current_steps": 166245, "total_steps": 204665, "loss": 0.0, "lr": 2.0712884057136348e-07, "epoch": 4.061393008086385, "percentage": 81.23, "elapsed_time": "3:35:30", "remaining_time": "0:49:48", "throughput": 8664.36, "total_tokens": 112037672} +{"current_steps": 166250, "total_steps": 204665, "loss": 0.0, "lr": 2.07076876197372e-07, "epoch": 4.061515158918232, "percentage": 81.23, "elapsed_time": "3:35:31", "remaining_time": "0:49:47", "throughput": 8664.4, "total_tokens": 112041192} +{"current_steps": 166255, "total_steps": 204665, "loss": 0.0, "lr": 2.0702491758973105e-07, "epoch": 4.061637309750079, "percentage": 81.23, "elapsed_time": "3:35:31", "remaining_time": "0:49:47", "throughput": 8664.43, "total_tokens": 112044584} +{"current_steps": 166260, "total_steps": 204665, "loss": 0.0, "lr": 2.0697296474881787e-07, "epoch": 4.061759460581927, "percentage": 81.24, "elapsed_time": "3:35:31", "remaining_time": "0:49:47", "throughput": 8664.45, "total_tokens": 112047784} +{"current_steps": 166265, "total_steps": 204665, "loss": 0.0, "lr": 2.069210176750108e-07, "epoch": 4.0618816114137735, "percentage": 81.24, "elapsed_time": "3:35:32", "remaining_time": "0:49:46", "throughput": 8664.47, "total_tokens": 112051112} +{"current_steps": 166270, "total_steps": 204665, "loss": 0.0, "lr": 2.0686907636868746e-07, "epoch": 4.062003762245621, "percentage": 81.24, "elapsed_time": "3:35:32", "remaining_time": "0:49:46", "throughput": 8664.48, "total_tokens": 112054184} +{"current_steps": 166275, "total_steps": 204665, "loss": 0.0, "lr": 2.0681714083022527e-07, "epoch": 4.062125913077468, "percentage": 81.24, "elapsed_time": "3:35:32", "remaining_time": "0:49:45", "throughput": 8664.5, "total_tokens": 112057448} +{"current_steps": 166280, "total_steps": 204665, "loss": 0.0, "lr": 2.0676521106000245e-07, "epoch": 4.0622480639093155, "percentage": 81.24, "elapsed_time": "3:35:33", "remaining_time": "0:49:45", "throughput": 8664.51, "total_tokens": 112060520} +{"current_steps": 166285, "total_steps": 204665, "loss": 0.0, "lr": 2.0671328705839608e-07, "epoch": 4.062370214741162, "percentage": 81.25, "elapsed_time": "3:35:33", "remaining_time": "0:49:45", "throughput": 8664.53, "total_tokens": 112063848} +{"current_steps": 166290, "total_steps": 204665, "loss": 0.0, "lr": 2.066613688257842e-07, "epoch": 4.06249236557301, "percentage": 81.25, "elapsed_time": "3:35:33", "remaining_time": "0:49:44", "throughput": 8664.56, "total_tokens": 112067240} +{"current_steps": 166295, "total_steps": 204665, "loss": 0.0, "lr": 2.066094563625441e-07, "epoch": 4.062614516404857, "percentage": 81.25, "elapsed_time": "3:35:34", "remaining_time": "0:49:44", "throughput": 8664.56, "total_tokens": 112070248} +{"current_steps": 166300, "total_steps": 204665, "loss": 0.0, "lr": 2.065575496690537e-07, "epoch": 4.062736667236704, "percentage": 81.25, "elapsed_time": "3:35:34", "remaining_time": "0:49:43", "throughput": 8664.61, "total_tokens": 112073832} +{"current_steps": 166305, "total_steps": 204665, "loss": 0.0, "lr": 2.0650564874568988e-07, "epoch": 4.062858818068551, "percentage": 81.26, "elapsed_time": "3:35:35", "remaining_time": "0:49:43", "throughput": 8664.63, "total_tokens": 112077096} +{"current_steps": 166310, "total_steps": 204665, "loss": 0.0, "lr": 2.0645375359283045e-07, "epoch": 4.062980968900399, "percentage": 81.26, "elapsed_time": "3:35:35", "remaining_time": "0:49:43", "throughput": 8664.64, "total_tokens": 112080296} +{"current_steps": 166315, "total_steps": 204665, "loss": 0.0, "lr": 2.0640186421085303e-07, "epoch": 4.063103119732245, "percentage": 81.26, "elapsed_time": "3:35:35", "remaining_time": "0:49:42", "throughput": 8664.68, "total_tokens": 112083880} +{"current_steps": 166320, "total_steps": 204665, "loss": 0.0, "lr": 2.063499806001344e-07, "epoch": 4.063225270564092, "percentage": 81.26, "elapsed_time": "3:35:36", "remaining_time": "0:49:42", "throughput": 8664.69, "total_tokens": 112086824} +{"current_steps": 166325, "total_steps": 204665, "loss": 0.0, "lr": 2.0629810276105252e-07, "epoch": 4.06334742139594, "percentage": 81.27, "elapsed_time": "3:35:36", "remaining_time": "0:49:42", "throughput": 8664.7, "total_tokens": 112090024} +{"current_steps": 166330, "total_steps": 204665, "loss": 0.0, "lr": 2.0624623069398407e-07, "epoch": 4.0634695722277865, "percentage": 81.27, "elapsed_time": "3:35:36", "remaining_time": "0:49:41", "throughput": 8664.73, "total_tokens": 112093416} +{"current_steps": 166335, "total_steps": 204665, "loss": 0.0, "lr": 2.061943643993067e-07, "epoch": 4.063591723059634, "percentage": 81.27, "elapsed_time": "3:35:37", "remaining_time": "0:49:41", "throughput": 8664.77, "total_tokens": 112097000} +{"current_steps": 166340, "total_steps": 204665, "loss": 0.0, "lr": 2.061425038773972e-07, "epoch": 4.063713873891481, "percentage": 81.27, "elapsed_time": "3:35:37", "remaining_time": "0:49:40", "throughput": 8664.8, "total_tokens": 112100456} +{"current_steps": 166345, "total_steps": 204665, "loss": 0.0, "lr": 2.0609064912863284e-07, "epoch": 4.0638360247233285, "percentage": 81.28, "elapsed_time": "3:35:37", "remaining_time": "0:49:40", "throughput": 8664.82, "total_tokens": 112103656} +{"current_steps": 166350, "total_steps": 204665, "loss": 0.0, "lr": 2.0603880015339115e-07, "epoch": 4.063958175555175, "percentage": 81.28, "elapsed_time": "3:35:38", "remaining_time": "0:49:40", "throughput": 8664.85, "total_tokens": 112107048} +{"current_steps": 166355, "total_steps": 204665, "loss": 0.0, "lr": 2.059869569520486e-07, "epoch": 4.064080326387023, "percentage": 81.28, "elapsed_time": "3:35:38", "remaining_time": "0:49:39", "throughput": 8664.9, "total_tokens": 112110696} +{"current_steps": 166360, "total_steps": 204665, "loss": 0.0, "lr": 2.0593511952498277e-07, "epoch": 4.06420247721887, "percentage": 81.28, "elapsed_time": "3:35:38", "remaining_time": "0:49:39", "throughput": 8664.91, "total_tokens": 112113896} +{"current_steps": 166365, "total_steps": 204665, "loss": 0.0, "lr": 2.0588328787257004e-07, "epoch": 4.064324628050717, "percentage": 81.29, "elapsed_time": "3:35:39", "remaining_time": "0:49:38", "throughput": 8664.94, "total_tokens": 112117224} +{"current_steps": 166370, "total_steps": 204665, "loss": 0.0, "lr": 2.0583146199518787e-07, "epoch": 4.064446778882564, "percentage": 81.29, "elapsed_time": "3:35:39", "remaining_time": "0:49:38", "throughput": 8664.96, "total_tokens": 112120552} +{"current_steps": 166375, "total_steps": 204665, "loss": 0.0, "lr": 2.0577964189321284e-07, "epoch": 4.064568929714412, "percentage": 81.29, "elapsed_time": "3:35:39", "remaining_time": "0:49:38", "throughput": 8664.99, "total_tokens": 112123880} +{"current_steps": 166380, "total_steps": 204665, "loss": 0.0, "lr": 2.0572782756702168e-07, "epoch": 4.064691080546258, "percentage": 81.29, "elapsed_time": "3:35:40", "remaining_time": "0:49:37", "throughput": 8665.0, "total_tokens": 112127080} +{"current_steps": 166385, "total_steps": 204665, "loss": 0.0, "lr": 2.0567601901699173e-07, "epoch": 4.064813231378106, "percentage": 81.3, "elapsed_time": "3:35:40", "remaining_time": "0:49:37", "throughput": 8665.04, "total_tokens": 112130664} +{"current_steps": 166390, "total_steps": 204665, "loss": 0.0, "lr": 2.0562421624349903e-07, "epoch": 4.064935382209953, "percentage": 81.3, "elapsed_time": "3:35:40", "remaining_time": "0:49:36", "throughput": 8665.07, "total_tokens": 112133992} +{"current_steps": 166395, "total_steps": 204665, "loss": 0.0, "lr": 2.0557241924692103e-07, "epoch": 4.0650575330418, "percentage": 81.3, "elapsed_time": "3:35:41", "remaining_time": "0:49:36", "throughput": 8665.1, "total_tokens": 112137448} +{"current_steps": 166400, "total_steps": 204665, "loss": 0.0, "lr": 2.0552062802763382e-07, "epoch": 4.065179683873647, "percentage": 81.3, "elapsed_time": "3:35:41", "remaining_time": "0:49:36", "throughput": 8665.12, "total_tokens": 112140584} +{"current_steps": 166405, "total_steps": 204665, "loss": 0.0001, "lr": 2.0546884258601427e-07, "epoch": 4.065301834705495, "percentage": 81.31, "elapsed_time": "3:35:41", "remaining_time": "0:49:35", "throughput": 8665.15, "total_tokens": 112143976} +{"current_steps": 166410, "total_steps": 204665, "loss": 0.0, "lr": 2.0541706292243921e-07, "epoch": 4.0654239855373415, "percentage": 81.31, "elapsed_time": "3:35:42", "remaining_time": "0:49:35", "throughput": 8665.19, "total_tokens": 112147560} +{"current_steps": 166415, "total_steps": 204665, "loss": 0.0, "lr": 2.0536528903728478e-07, "epoch": 4.065546136369188, "percentage": 81.31, "elapsed_time": "3:35:42", "remaining_time": "0:49:34", "throughput": 8665.22, "total_tokens": 112151016} +{"current_steps": 166420, "total_steps": 204665, "loss": 0.0, "lr": 2.053135209309279e-07, "epoch": 4.065668287201036, "percentage": 81.31, "elapsed_time": "3:35:43", "remaining_time": "0:49:34", "throughput": 8665.25, "total_tokens": 112154408} +{"current_steps": 166425, "total_steps": 204665, "loss": 0.0, "lr": 2.0526175860374462e-07, "epoch": 4.065790438032883, "percentage": 81.32, "elapsed_time": "3:35:43", "remaining_time": "0:49:34", "throughput": 8665.28, "total_tokens": 112157864} +{"current_steps": 166430, "total_steps": 204665, "loss": 0.0, "lr": 2.0521000205611162e-07, "epoch": 4.06591258886473, "percentage": 81.32, "elapsed_time": "3:35:43", "remaining_time": "0:49:33", "throughput": 8665.3, "total_tokens": 112161000} +{"current_steps": 166435, "total_steps": 204665, "loss": 0.0, "lr": 2.0515825128840548e-07, "epoch": 4.066034739696577, "percentage": 81.32, "elapsed_time": "3:35:44", "remaining_time": "0:49:33", "throughput": 8665.3, "total_tokens": 112164008} +{"current_steps": 166440, "total_steps": 204665, "loss": 0.0, "lr": 2.0510650630100212e-07, "epoch": 4.066156890528425, "percentage": 81.32, "elapsed_time": "3:35:44", "remaining_time": "0:49:32", "throughput": 8665.32, "total_tokens": 112167272} +{"current_steps": 166445, "total_steps": 204665, "loss": 0.0, "lr": 2.0505476709427827e-07, "epoch": 4.066279041360271, "percentage": 81.33, "elapsed_time": "3:35:44", "remaining_time": "0:49:32", "throughput": 8665.35, "total_tokens": 112170600} +{"current_steps": 166450, "total_steps": 204665, "loss": 0.0, "lr": 2.050030336686097e-07, "epoch": 4.066401192192119, "percentage": 81.33, "elapsed_time": "3:35:45", "remaining_time": "0:49:32", "throughput": 8665.38, "total_tokens": 112174056} +{"current_steps": 166455, "total_steps": 204665, "loss": 0.0, "lr": 2.0495130602437315e-07, "epoch": 4.066523343023966, "percentage": 81.33, "elapsed_time": "3:35:45", "remaining_time": "0:49:31", "throughput": 8665.39, "total_tokens": 112177192} +{"current_steps": 166460, "total_steps": 204665, "loss": 0.0, "lr": 2.048995841619443e-07, "epoch": 4.066645493855813, "percentage": 81.33, "elapsed_time": "3:35:45", "remaining_time": "0:49:31", "throughput": 8665.44, "total_tokens": 112180776} +{"current_steps": 166465, "total_steps": 204665, "loss": 0.0, "lr": 2.0484786808169975e-07, "epoch": 4.06676764468766, "percentage": 81.34, "elapsed_time": "3:35:46", "remaining_time": "0:49:30", "throughput": 8665.46, "total_tokens": 112184168} +{"current_steps": 166470, "total_steps": 204665, "loss": 0.0, "lr": 2.0479615778401517e-07, "epoch": 4.066889795519508, "percentage": 81.34, "elapsed_time": "3:35:46", "remaining_time": "0:49:30", "throughput": 8665.51, "total_tokens": 112187880} +{"current_steps": 166475, "total_steps": 204665, "loss": 0.0, "lr": 2.0474445326926703e-07, "epoch": 4.0670119463513545, "percentage": 81.34, "elapsed_time": "3:35:46", "remaining_time": "0:49:30", "throughput": 8665.56, "total_tokens": 112191592} +{"current_steps": 166480, "total_steps": 204665, "loss": 0.0, "lr": 2.0469275453783098e-07, "epoch": 4.067134097183202, "percentage": 81.34, "elapsed_time": "3:35:47", "remaining_time": "0:49:29", "throughput": 8665.58, "total_tokens": 112194792} +{"current_steps": 166485, "total_steps": 204665, "loss": 0.0, "lr": 2.046410615900832e-07, "epoch": 4.067256248015049, "percentage": 81.35, "elapsed_time": "3:35:47", "remaining_time": "0:49:29", "throughput": 8665.62, "total_tokens": 112198376} +{"current_steps": 166490, "total_steps": 204665, "loss": 0.0, "lr": 2.0458937442639968e-07, "epoch": 4.067378398846897, "percentage": 81.35, "elapsed_time": "3:35:47", "remaining_time": "0:49:28", "throughput": 8665.65, "total_tokens": 112201896} +{"current_steps": 166495, "total_steps": 204665, "loss": 0.0, "lr": 2.0453769304715586e-07, "epoch": 4.067500549678743, "percentage": 81.35, "elapsed_time": "3:35:48", "remaining_time": "0:49:28", "throughput": 8665.68, "total_tokens": 112205160} +{"current_steps": 166500, "total_steps": 204665, "loss": 0.0, "lr": 2.0448601745272797e-07, "epoch": 4.06762270051059, "percentage": 81.35, "elapsed_time": "3:35:48", "remaining_time": "0:49:28", "throughput": 8665.68, "total_tokens": 112208232} +{"current_steps": 166505, "total_steps": 204665, "loss": 0.0, "lr": 2.044343476434919e-07, "epoch": 4.067744851342438, "percentage": 81.35, "elapsed_time": "3:35:48", "remaining_time": "0:49:27", "throughput": 8665.74, "total_tokens": 112212008} +{"current_steps": 166510, "total_steps": 204665, "loss": 0.0, "lr": 2.0438268361982303e-07, "epoch": 4.067867002174284, "percentage": 81.36, "elapsed_time": "3:35:49", "remaining_time": "0:49:27", "throughput": 8665.77, "total_tokens": 112215464} +{"current_steps": 166515, "total_steps": 204665, "loss": 0.0, "lr": 2.0433102538209745e-07, "epoch": 4.067989153006132, "percentage": 81.36, "elapsed_time": "3:35:49", "remaining_time": "0:49:26", "throughput": 8665.8, "total_tokens": 112218856} +{"current_steps": 166520, "total_steps": 204665, "loss": 0.0, "lr": 2.0427937293069042e-07, "epoch": 4.068111303837979, "percentage": 81.36, "elapsed_time": "3:35:49", "remaining_time": "0:49:26", "throughput": 8665.81, "total_tokens": 112221992} +{"current_steps": 166525, "total_steps": 204665, "loss": 0.0512, "lr": 2.0422772626597796e-07, "epoch": 4.068233454669826, "percentage": 81.36, "elapsed_time": "3:35:50", "remaining_time": "0:49:26", "throughput": 8665.82, "total_tokens": 112225128} +{"current_steps": 166530, "total_steps": 204665, "loss": 0.0204, "lr": 2.0417608538833563e-07, "epoch": 4.068355605501673, "percentage": 81.37, "elapsed_time": "3:35:50", "remaining_time": "0:49:25", "throughput": 8665.84, "total_tokens": 112228328} +{"current_steps": 166535, "total_steps": 204665, "loss": 0.0, "lr": 2.0412445029813863e-07, "epoch": 4.068477756333521, "percentage": 81.37, "elapsed_time": "3:35:50", "remaining_time": "0:49:25", "throughput": 8665.86, "total_tokens": 112231528} +{"current_steps": 166540, "total_steps": 204665, "loss": 0.0001, "lr": 2.0407282099576295e-07, "epoch": 4.068599907165368, "percentage": 81.37, "elapsed_time": "3:35:51", "remaining_time": "0:49:24", "throughput": 8665.87, "total_tokens": 112234600} +{"current_steps": 166545, "total_steps": 204665, "loss": 0.0, "lr": 2.0402119748158352e-07, "epoch": 4.068722057997215, "percentage": 81.37, "elapsed_time": "3:35:51", "remaining_time": "0:49:24", "throughput": 8665.89, "total_tokens": 112237800} +{"current_steps": 166550, "total_steps": 204665, "loss": 0.0, "lr": 2.039695797559763e-07, "epoch": 4.068844208829062, "percentage": 81.38, "elapsed_time": "3:35:52", "remaining_time": "0:49:24", "throughput": 8665.9, "total_tokens": 112240872} +{"current_steps": 166555, "total_steps": 204665, "loss": 0.0, "lr": 2.0391796781931615e-07, "epoch": 4.06896635966091, "percentage": 81.38, "elapsed_time": "3:35:52", "remaining_time": "0:49:23", "throughput": 8665.94, "total_tokens": 112244392} +{"current_steps": 166560, "total_steps": 204665, "loss": 0.0, "lr": 2.0386636167197868e-07, "epoch": 4.069088510492756, "percentage": 81.38, "elapsed_time": "3:35:52", "remaining_time": "0:49:23", "throughput": 8665.98, "total_tokens": 112247976} +{"current_steps": 166565, "total_steps": 204665, "loss": 0.0, "lr": 2.038147613143394e-07, "epoch": 4.069210661324604, "percentage": 81.38, "elapsed_time": "3:35:53", "remaining_time": "0:49:22", "throughput": 8665.99, "total_tokens": 112251112} +{"current_steps": 166570, "total_steps": 204665, "loss": 0.0, "lr": 2.0376316674677306e-07, "epoch": 4.069332812156451, "percentage": 81.39, "elapsed_time": "3:35:53", "remaining_time": "0:49:22", "throughput": 8666.0, "total_tokens": 112254184} +{"current_steps": 166575, "total_steps": 204665, "loss": 0.0, "lr": 2.0371157796965544e-07, "epoch": 4.069454962988298, "percentage": 81.39, "elapsed_time": "3:35:53", "remaining_time": "0:49:22", "throughput": 8666.03, "total_tokens": 112257576} +{"current_steps": 166580, "total_steps": 204665, "loss": 0.0, "lr": 2.0365999498336138e-07, "epoch": 4.069577113820145, "percentage": 81.39, "elapsed_time": "3:35:54", "remaining_time": "0:49:21", "throughput": 8666.05, "total_tokens": 112260904} +{"current_steps": 166585, "total_steps": 204665, "loss": 0.0, "lr": 2.0360841778826576e-07, "epoch": 4.069699264651993, "percentage": 81.39, "elapsed_time": "3:35:54", "remaining_time": "0:49:21", "throughput": 8666.06, "total_tokens": 112264040} +{"current_steps": 166590, "total_steps": 204665, "loss": 0.0, "lr": 2.0355684638474412e-07, "epoch": 4.0698214154838395, "percentage": 81.4, "elapsed_time": "3:35:54", "remaining_time": "0:49:20", "throughput": 8666.08, "total_tokens": 112267240} +{"current_steps": 166595, "total_steps": 204665, "loss": 0.0, "lr": 2.035052807731712e-07, "epoch": 4.069943566315686, "percentage": 81.4, "elapsed_time": "3:35:55", "remaining_time": "0:49:20", "throughput": 8666.11, "total_tokens": 112270696} +{"current_steps": 166600, "total_steps": 204665, "loss": 0.0, "lr": 2.034537209539222e-07, "epoch": 4.070065717147534, "percentage": 81.4, "elapsed_time": "3:35:55", "remaining_time": "0:49:20", "throughput": 8666.15, "total_tokens": 112274152} +{"current_steps": 166605, "total_steps": 204665, "loss": 0.0, "lr": 2.0340216692737188e-07, "epoch": 4.070187867979381, "percentage": 81.4, "elapsed_time": "3:35:55", "remaining_time": "0:49:19", "throughput": 8666.21, "total_tokens": 112278056} +{"current_steps": 166610, "total_steps": 204665, "loss": 0.0, "lr": 2.0335061869389547e-07, "epoch": 4.070310018811228, "percentage": 81.41, "elapsed_time": "3:35:56", "remaining_time": "0:49:19", "throughput": 8666.24, "total_tokens": 112281448} +{"current_steps": 166615, "total_steps": 204665, "loss": 0.0, "lr": 2.0329907625386733e-07, "epoch": 4.070432169643075, "percentage": 81.41, "elapsed_time": "3:35:56", "remaining_time": "0:49:18", "throughput": 8666.27, "total_tokens": 112284904} +{"current_steps": 166620, "total_steps": 204665, "loss": 0.0, "lr": 2.0324753960766262e-07, "epoch": 4.070554320474923, "percentage": 81.41, "elapsed_time": "3:35:56", "remaining_time": "0:49:18", "throughput": 8666.32, "total_tokens": 112288616} +{"current_steps": 166625, "total_steps": 204665, "loss": 0.0, "lr": 2.0319600875565635e-07, "epoch": 4.070676471306769, "percentage": 81.41, "elapsed_time": "3:35:57", "remaining_time": "0:49:18", "throughput": 8666.33, "total_tokens": 112291752} +{"current_steps": 166630, "total_steps": 204665, "loss": 0.0, "lr": 2.031444836982228e-07, "epoch": 4.070798622138617, "percentage": 81.42, "elapsed_time": "3:35:57", "remaining_time": "0:49:17", "throughput": 8666.38, "total_tokens": 112295400} +{"current_steps": 166635, "total_steps": 204665, "loss": 0.0, "lr": 2.030929644357371e-07, "epoch": 4.070920772970464, "percentage": 81.42, "elapsed_time": "3:35:57", "remaining_time": "0:49:17", "throughput": 8666.4, "total_tokens": 112298664} +{"current_steps": 166640, "total_steps": 204665, "loss": 0.0, "lr": 2.030414509685734e-07, "epoch": 4.071042923802311, "percentage": 81.42, "elapsed_time": "3:35:58", "remaining_time": "0:49:16", "throughput": 8666.41, "total_tokens": 112301736} +{"current_steps": 166645, "total_steps": 204665, "loss": 0.0, "lr": 2.0298994329710694e-07, "epoch": 4.071165074634158, "percentage": 81.42, "elapsed_time": "3:35:58", "remaining_time": "0:49:16", "throughput": 8666.43, "total_tokens": 112305064} +{"current_steps": 166650, "total_steps": 204665, "loss": 0.0, "lr": 2.029384414217118e-07, "epoch": 4.071287225466006, "percentage": 81.43, "elapsed_time": "3:35:58", "remaining_time": "0:49:16", "throughput": 8666.45, "total_tokens": 112308328} +{"current_steps": 166655, "total_steps": 204665, "loss": 0.0, "lr": 2.0288694534276262e-07, "epoch": 4.0714093762978525, "percentage": 81.43, "elapsed_time": "3:35:59", "remaining_time": "0:49:15", "throughput": 8666.47, "total_tokens": 112311592} +{"current_steps": 166660, "total_steps": 204665, "loss": 0.0, "lr": 2.0283545506063426e-07, "epoch": 4.0715315271297, "percentage": 81.43, "elapsed_time": "3:35:59", "remaining_time": "0:49:15", "throughput": 8666.48, "total_tokens": 112314728} +{"current_steps": 166665, "total_steps": 204665, "loss": 0.0005, "lr": 2.0278397057570063e-07, "epoch": 4.071653677961547, "percentage": 81.43, "elapsed_time": "3:36:00", "remaining_time": "0:49:14", "throughput": 8666.49, "total_tokens": 112317672} +{"current_steps": 166670, "total_steps": 204665, "loss": 0.0, "lr": 2.0273249188833652e-07, "epoch": 4.0717758287933945, "percentage": 81.44, "elapsed_time": "3:36:00", "remaining_time": "0:49:14", "throughput": 8666.52, "total_tokens": 112321128} +{"current_steps": 166675, "total_steps": 204665, "loss": 0.0667, "lr": 2.026810189989161e-07, "epoch": 4.071897979625241, "percentage": 81.44, "elapsed_time": "3:36:00", "remaining_time": "0:49:14", "throughput": 8666.56, "total_tokens": 112324840} +{"current_steps": 166680, "total_steps": 204665, "loss": 0.0, "lr": 2.0262955190781393e-07, "epoch": 4.072020130457088, "percentage": 81.44, "elapsed_time": "3:36:01", "remaining_time": "0:49:13", "throughput": 8666.57, "total_tokens": 112327848} +{"current_steps": 166685, "total_steps": 204665, "loss": 0.0, "lr": 2.025780906154041e-07, "epoch": 4.072142281288936, "percentage": 81.44, "elapsed_time": "3:36:01", "remaining_time": "0:49:13", "throughput": 8666.6, "total_tokens": 112331240} +{"current_steps": 166690, "total_steps": 204665, "loss": 0.0, "lr": 2.025266351220607e-07, "epoch": 4.072264432120782, "percentage": 81.45, "elapsed_time": "3:36:01", "remaining_time": "0:49:12", "throughput": 8666.61, "total_tokens": 112334312} +{"current_steps": 166695, "total_steps": 204665, "loss": 0.0, "lr": 2.0247518542815822e-07, "epoch": 4.07238658295263, "percentage": 81.45, "elapsed_time": "3:36:02", "remaining_time": "0:49:12", "throughput": 8666.61, "total_tokens": 112337256} +{"current_steps": 166700, "total_steps": 204665, "loss": 0.0, "lr": 2.024237415340706e-07, "epoch": 4.072508733784477, "percentage": 81.45, "elapsed_time": "3:36:02", "remaining_time": "0:49:12", "throughput": 8666.64, "total_tokens": 112340648} +{"current_steps": 166705, "total_steps": 204665, "loss": 0.0, "lr": 2.023723034401722e-07, "epoch": 4.072630884616324, "percentage": 81.45, "elapsed_time": "3:36:02", "remaining_time": "0:49:11", "throughput": 8666.65, "total_tokens": 112343720} +{"current_steps": 166710, "total_steps": 204665, "loss": 0.0, "lr": 2.0232087114683672e-07, "epoch": 4.072753035448171, "percentage": 81.46, "elapsed_time": "3:36:03", "remaining_time": "0:49:11", "throughput": 8666.66, "total_tokens": 112346792} +{"current_steps": 166715, "total_steps": 204665, "loss": 0.0, "lr": 2.022694446544385e-07, "epoch": 4.072875186280019, "percentage": 81.46, "elapsed_time": "3:36:03", "remaining_time": "0:49:10", "throughput": 8666.73, "total_tokens": 112350888} +{"current_steps": 166720, "total_steps": 204665, "loss": 0.0003, "lr": 2.0221802396335164e-07, "epoch": 4.0729973371118655, "percentage": 81.46, "elapsed_time": "3:36:03", "remaining_time": "0:49:10", "throughput": 8666.77, "total_tokens": 112354408} +{"current_steps": 166725, "total_steps": 204665, "loss": 0.0, "lr": 2.0216660907394955e-07, "epoch": 4.073119487943713, "percentage": 81.46, "elapsed_time": "3:36:04", "remaining_time": "0:49:10", "throughput": 8666.79, "total_tokens": 112357544} +{"current_steps": 166730, "total_steps": 204665, "loss": 0.0001, "lr": 2.0211519998660687e-07, "epoch": 4.07324163877556, "percentage": 81.46, "elapsed_time": "3:36:04", "remaining_time": "0:49:09", "throughput": 8666.82, "total_tokens": 112361000} +{"current_steps": 166735, "total_steps": 204665, "loss": 0.0, "lr": 2.020637967016967e-07, "epoch": 4.0733637896074075, "percentage": 81.47, "elapsed_time": "3:36:04", "remaining_time": "0:49:09", "throughput": 8666.82, "total_tokens": 112363944} +{"current_steps": 166740, "total_steps": 204665, "loss": 0.0, "lr": 2.0201239921959346e-07, "epoch": 4.073485940439254, "percentage": 81.47, "elapsed_time": "3:36:05", "remaining_time": "0:49:08", "throughput": 8666.83, "total_tokens": 112367080} +{"current_steps": 166745, "total_steps": 204665, "loss": 0.0, "lr": 2.0196100754067046e-07, "epoch": 4.073608091271102, "percentage": 81.47, "elapsed_time": "3:36:05", "remaining_time": "0:49:08", "throughput": 8666.86, "total_tokens": 112370472} +{"current_steps": 166750, "total_steps": 204665, "loss": 0.0, "lr": 2.0190962166530167e-07, "epoch": 4.073730242102949, "percentage": 81.47, "elapsed_time": "3:36:05", "remaining_time": "0:49:08", "throughput": 8666.88, "total_tokens": 112373800} +{"current_steps": 166755, "total_steps": 204665, "loss": 0.0, "lr": 2.018582415938611e-07, "epoch": 4.073852392934796, "percentage": 81.48, "elapsed_time": "3:36:06", "remaining_time": "0:49:07", "throughput": 8666.89, "total_tokens": 112376872} +{"current_steps": 166760, "total_steps": 204665, "loss": 0.0, "lr": 2.018068673267217e-07, "epoch": 4.073974543766643, "percentage": 81.48, "elapsed_time": "3:36:06", "remaining_time": "0:49:07", "throughput": 8666.9, "total_tokens": 112379816} +{"current_steps": 166765, "total_steps": 204665, "loss": 0.0, "lr": 2.017554988642578e-07, "epoch": 4.07409669459849, "percentage": 81.48, "elapsed_time": "3:36:06", "remaining_time": "0:49:06", "throughput": 8666.92, "total_tokens": 112383144} +{"current_steps": 166770, "total_steps": 204665, "loss": 0.0, "lr": 2.0170413620684222e-07, "epoch": 4.074218845430337, "percentage": 81.48, "elapsed_time": "3:36:07", "remaining_time": "0:49:06", "throughput": 8666.94, "total_tokens": 112386344} +{"current_steps": 166775, "total_steps": 204665, "loss": 0.0, "lr": 2.0165277935484926e-07, "epoch": 4.074340996262184, "percentage": 81.49, "elapsed_time": "3:36:07", "remaining_time": "0:49:06", "throughput": 8666.96, "total_tokens": 112389672} +{"current_steps": 166780, "total_steps": 204665, "loss": 0.0, "lr": 2.016014283086518e-07, "epoch": 4.074463147094032, "percentage": 81.49, "elapsed_time": "3:36:07", "remaining_time": "0:49:05", "throughput": 8666.97, "total_tokens": 112392744} +{"current_steps": 166785, "total_steps": 204665, "loss": 0.0, "lr": 2.0155008306862366e-07, "epoch": 4.0745852979258785, "percentage": 81.49, "elapsed_time": "3:36:08", "remaining_time": "0:49:05", "throughput": 8667.0, "total_tokens": 112396136} +{"current_steps": 166790, "total_steps": 204665, "loss": 0.0, "lr": 2.0149874363513775e-07, "epoch": 4.074707448757726, "percentage": 81.49, "elapsed_time": "3:36:08", "remaining_time": "0:49:04", "throughput": 8667.04, "total_tokens": 112399720} +{"current_steps": 166795, "total_steps": 204665, "loss": 0.0, "lr": 2.0144741000856813e-07, "epoch": 4.074829599589573, "percentage": 81.5, "elapsed_time": "3:36:08", "remaining_time": "0:49:04", "throughput": 8667.04, "total_tokens": 112402536} +{"current_steps": 166800, "total_steps": 204665, "loss": 0.0, "lr": 2.0139608218928772e-07, "epoch": 4.0749517504214205, "percentage": 81.5, "elapsed_time": "3:36:09", "remaining_time": "0:49:04", "throughput": 8667.08, "total_tokens": 112406120} +{"current_steps": 166805, "total_steps": 204665, "loss": 0.0, "lr": 2.0134476017766943e-07, "epoch": 4.075073901253267, "percentage": 81.5, "elapsed_time": "3:36:09", "remaining_time": "0:49:03", "throughput": 8667.1, "total_tokens": 112409384} +{"current_steps": 166810, "total_steps": 204665, "loss": 0.0, "lr": 2.0129344397408698e-07, "epoch": 4.075196052085115, "percentage": 81.5, "elapsed_time": "3:36:10", "remaining_time": "0:49:03", "throughput": 8667.12, "total_tokens": 112412648} +{"current_steps": 166815, "total_steps": 204665, "loss": 0.0, "lr": 2.0124213357891362e-07, "epoch": 4.075318202916962, "percentage": 81.51, "elapsed_time": "3:36:10", "remaining_time": "0:49:02", "throughput": 8667.13, "total_tokens": 112415720} +{"current_steps": 166820, "total_steps": 204665, "loss": 0.0, "lr": 2.0119082899252216e-07, "epoch": 4.075440353748809, "percentage": 81.51, "elapsed_time": "3:36:10", "remaining_time": "0:49:02", "throughput": 8667.18, "total_tokens": 112419496} +{"current_steps": 166825, "total_steps": 204665, "loss": 0.0, "lr": 2.0113953021528595e-07, "epoch": 4.075562504580656, "percentage": 81.51, "elapsed_time": "3:36:11", "remaining_time": "0:49:02", "throughput": 8667.21, "total_tokens": 112422824} +{"current_steps": 166830, "total_steps": 204665, "loss": 0.0, "lr": 2.0108823724757772e-07, "epoch": 4.075684655412504, "percentage": 81.51, "elapsed_time": "3:36:11", "remaining_time": "0:49:01", "throughput": 8667.24, "total_tokens": 112426280} +{"current_steps": 166835, "total_steps": 204665, "loss": 0.0, "lr": 2.0103695008977083e-07, "epoch": 4.07580680624435, "percentage": 81.52, "elapsed_time": "3:36:11", "remaining_time": "0:49:01", "throughput": 8667.27, "total_tokens": 112429608} +{"current_steps": 166840, "total_steps": 204665, "loss": 0.0, "lr": 2.0098566874223833e-07, "epoch": 4.075928957076198, "percentage": 81.52, "elapsed_time": "3:36:12", "remaining_time": "0:49:00", "throughput": 8667.28, "total_tokens": 112432744} +{"current_steps": 166845, "total_steps": 204665, "loss": 0.0, "lr": 2.0093439320535267e-07, "epoch": 4.076051107908045, "percentage": 81.52, "elapsed_time": "3:36:12", "remaining_time": "0:49:00", "throughput": 8667.32, "total_tokens": 112436392} +{"current_steps": 166850, "total_steps": 204665, "loss": 0.0, "lr": 2.008831234794872e-07, "epoch": 4.0761732587398924, "percentage": 81.52, "elapsed_time": "3:36:12", "remaining_time": "0:49:00", "throughput": 8667.37, "total_tokens": 112440104} +{"current_steps": 166855, "total_steps": 204665, "loss": 0.0, "lr": 2.0083185956501447e-07, "epoch": 4.076295409571739, "percentage": 81.53, "elapsed_time": "3:36:13", "remaining_time": "0:48:59", "throughput": 8667.38, "total_tokens": 112443112} +{"current_steps": 166860, "total_steps": 204665, "loss": 0.0, "lr": 2.0078060146230758e-07, "epoch": 4.076417560403586, "percentage": 81.53, "elapsed_time": "3:36:13", "remaining_time": "0:48:59", "throughput": 8667.39, "total_tokens": 112446184} +{"current_steps": 166865, "total_steps": 204665, "loss": 0.0, "lr": 2.007293491717389e-07, "epoch": 4.076539711235434, "percentage": 81.53, "elapsed_time": "3:36:13", "remaining_time": "0:48:58", "throughput": 8667.42, "total_tokens": 112449640} +{"current_steps": 166870, "total_steps": 204665, "loss": 0.0, "lr": 2.0067810269368136e-07, "epoch": 4.07666186206728, "percentage": 81.53, "elapsed_time": "3:36:14", "remaining_time": "0:48:58", "throughput": 8667.44, "total_tokens": 112452904} +{"current_steps": 166875, "total_steps": 204665, "loss": 0.0, "lr": 2.0062686202850797e-07, "epoch": 4.076784012899128, "percentage": 81.54, "elapsed_time": "3:36:14", "remaining_time": "0:48:58", "throughput": 8667.48, "total_tokens": 112456488} +{"current_steps": 166880, "total_steps": 204665, "loss": 0.0, "lr": 2.005756271765907e-07, "epoch": 4.076906163730975, "percentage": 81.54, "elapsed_time": "3:36:14", "remaining_time": "0:48:57", "throughput": 8667.5, "total_tokens": 112459688} +{"current_steps": 166885, "total_steps": 204665, "loss": 0.0, "lr": 2.005243981383028e-07, "epoch": 4.077028314562822, "percentage": 81.54, "elapsed_time": "3:36:15", "remaining_time": "0:48:57", "throughput": 8667.55, "total_tokens": 112463400} +{"current_steps": 166890, "total_steps": 204665, "loss": 0.0, "lr": 2.004731749140165e-07, "epoch": 4.077150465394669, "percentage": 81.54, "elapsed_time": "3:36:15", "remaining_time": "0:48:56", "throughput": 8667.56, "total_tokens": 112466600} +{"current_steps": 166895, "total_steps": 204665, "loss": 0.0, "lr": 2.0042195750410406e-07, "epoch": 4.077272616226517, "percentage": 81.55, "elapsed_time": "3:36:15", "remaining_time": "0:48:56", "throughput": 8667.6, "total_tokens": 112470056} +{"current_steps": 166900, "total_steps": 204665, "loss": 0.0, "lr": 2.0037074590893842e-07, "epoch": 4.0773947670583635, "percentage": 81.55, "elapsed_time": "3:36:16", "remaining_time": "0:48:56", "throughput": 8667.63, "total_tokens": 112473576} +{"current_steps": 166905, "total_steps": 204665, "loss": 0.0917, "lr": 2.0031954012889153e-07, "epoch": 4.077516917890211, "percentage": 81.55, "elapsed_time": "3:36:16", "remaining_time": "0:48:55", "throughput": 8667.66, "total_tokens": 112476904} +{"current_steps": 166910, "total_steps": 204665, "loss": 0.0, "lr": 2.0026834016433635e-07, "epoch": 4.077639068722058, "percentage": 81.55, "elapsed_time": "3:36:16", "remaining_time": "0:48:55", "throughput": 8667.7, "total_tokens": 112480488} +{"current_steps": 166915, "total_steps": 204665, "loss": 0.0, "lr": 2.0021714601564464e-07, "epoch": 4.0777612195539055, "percentage": 81.56, "elapsed_time": "3:36:17", "remaining_time": "0:48:54", "throughput": 8667.75, "total_tokens": 112484264} +{"current_steps": 166920, "total_steps": 204665, "loss": 0.0, "lr": 2.0016595768318922e-07, "epoch": 4.077883370385752, "percentage": 81.56, "elapsed_time": "3:36:17", "remaining_time": "0:48:54", "throughput": 8667.77, "total_tokens": 112487464} +{"current_steps": 166925, "total_steps": 204665, "loss": 0.0, "lr": 2.0011477516734175e-07, "epoch": 4.0780055212176, "percentage": 81.56, "elapsed_time": "3:36:18", "remaining_time": "0:48:54", "throughput": 8667.82, "total_tokens": 112491176} +{"current_steps": 166930, "total_steps": 204665, "loss": 0.0, "lr": 2.0006359846847487e-07, "epoch": 4.078127672049447, "percentage": 81.56, "elapsed_time": "3:36:18", "remaining_time": "0:48:53", "throughput": 8667.81, "total_tokens": 112493928} +{"current_steps": 166935, "total_steps": 204665, "loss": 0.0, "lr": 2.000124275869609e-07, "epoch": 4.078249822881294, "percentage": 81.56, "elapsed_time": "3:36:18", "remaining_time": "0:48:53", "throughput": 8667.86, "total_tokens": 112497576} +{"current_steps": 166940, "total_steps": 204665, "loss": 0.0, "lr": 1.9996126252317146e-07, "epoch": 4.078371973713141, "percentage": 81.57, "elapsed_time": "3:36:19", "remaining_time": "0:48:52", "throughput": 8667.86, "total_tokens": 112500584} +{"current_steps": 166945, "total_steps": 204665, "loss": 0.0, "lr": 1.9991010327747915e-07, "epoch": 4.078494124544988, "percentage": 81.57, "elapsed_time": "3:36:19", "remaining_time": "0:48:52", "throughput": 8667.88, "total_tokens": 112503784} +{"current_steps": 166950, "total_steps": 204665, "loss": 0.0, "lr": 1.9985894985025542e-07, "epoch": 4.078616275376835, "percentage": 81.57, "elapsed_time": "3:36:19", "remaining_time": "0:48:52", "throughput": 8667.91, "total_tokens": 112507240} +{"current_steps": 166955, "total_steps": 204665, "loss": 0.0, "lr": 1.99807802241873e-07, "epoch": 4.078738426208682, "percentage": 81.57, "elapsed_time": "3:36:20", "remaining_time": "0:48:51", "throughput": 8667.91, "total_tokens": 112510184} +{"current_steps": 166960, "total_steps": 204665, "loss": 0.0, "lr": 1.9975666045270323e-07, "epoch": 4.07886057704053, "percentage": 81.58, "elapsed_time": "3:36:20", "remaining_time": "0:48:51", "throughput": 8667.99, "total_tokens": 112514280} +{"current_steps": 166965, "total_steps": 204665, "loss": 0.0, "lr": 1.9970552448311818e-07, "epoch": 4.0789827278723765, "percentage": 81.58, "elapsed_time": "3:36:20", "remaining_time": "0:48:51", "throughput": 8668.01, "total_tokens": 112517608} +{"current_steps": 166970, "total_steps": 204665, "loss": 0.0, "lr": 1.9965439433349008e-07, "epoch": 4.079104878704224, "percentage": 81.58, "elapsed_time": "3:36:21", "remaining_time": "0:48:50", "throughput": 8668.03, "total_tokens": 112520808} +{"current_steps": 166975, "total_steps": 204665, "loss": 0.0, "lr": 1.9960327000419032e-07, "epoch": 4.079227029536071, "percentage": 81.58, "elapsed_time": "3:36:21", "remaining_time": "0:48:50", "throughput": 8668.04, "total_tokens": 112524008} +{"current_steps": 166980, "total_steps": 204665, "loss": 0.0, "lr": 1.9955215149559101e-07, "epoch": 4.0793491803679185, "percentage": 81.59, "elapsed_time": "3:36:21", "remaining_time": "0:48:49", "throughput": 8668.04, "total_tokens": 112526952} +{"current_steps": 166985, "total_steps": 204665, "loss": 0.0, "lr": 1.9950103880806357e-07, "epoch": 4.079471331199765, "percentage": 81.59, "elapsed_time": "3:36:22", "remaining_time": "0:48:49", "throughput": 8668.09, "total_tokens": 112530664} +{"current_steps": 166990, "total_steps": 204665, "loss": 0.0018, "lr": 1.9944993194198012e-07, "epoch": 4.079593482031613, "percentage": 81.59, "elapsed_time": "3:36:22", "remaining_time": "0:48:49", "throughput": 8668.13, "total_tokens": 112534184} +{"current_steps": 166995, "total_steps": 204665, "loss": 0.0, "lr": 1.9939883089771203e-07, "epoch": 4.07971563286346, "percentage": 81.59, "elapsed_time": "3:36:22", "remaining_time": "0:48:48", "throughput": 8668.16, "total_tokens": 112537576} +{"current_steps": 167000, "total_steps": 204665, "loss": 0.0308, "lr": 1.9934773567563079e-07, "epoch": 4.079837783695307, "percentage": 81.6, "elapsed_time": "3:36:23", "remaining_time": "0:48:48", "throughput": 8668.18, "total_tokens": 112540840} +{"current_steps": 167005, "total_steps": 204665, "loss": 0.0392, "lr": 1.9929664627610842e-07, "epoch": 4.079959934527154, "percentage": 81.6, "elapsed_time": "3:36:23", "remaining_time": "0:48:47", "throughput": 8668.21, "total_tokens": 112544232} +{"current_steps": 167010, "total_steps": 204665, "loss": 0.0359, "lr": 1.9924556269951587e-07, "epoch": 4.080082085359002, "percentage": 81.6, "elapsed_time": "3:36:23", "remaining_time": "0:48:47", "throughput": 8668.24, "total_tokens": 112547624} +{"current_steps": 167015, "total_steps": 204665, "loss": 0.0, "lr": 1.9919448494622526e-07, "epoch": 4.080204236190848, "percentage": 81.6, "elapsed_time": "3:36:24", "remaining_time": "0:48:47", "throughput": 8668.28, "total_tokens": 112551272} +{"current_steps": 167020, "total_steps": 204665, "loss": 0.0, "lr": 1.9914341301660752e-07, "epoch": 4.080326387022696, "percentage": 81.61, "elapsed_time": "3:36:24", "remaining_time": "0:48:46", "throughput": 8668.29, "total_tokens": 112554344} +{"current_steps": 167025, "total_steps": 204665, "loss": 0.0, "lr": 1.9909234691103426e-07, "epoch": 4.080448537854543, "percentage": 81.61, "elapsed_time": "3:36:24", "remaining_time": "0:48:46", "throughput": 8668.32, "total_tokens": 112557736} +{"current_steps": 167030, "total_steps": 204665, "loss": 0.0, "lr": 1.9904128662987717e-07, "epoch": 4.08057068868639, "percentage": 81.61, "elapsed_time": "3:36:25", "remaining_time": "0:48:45", "throughput": 8668.35, "total_tokens": 112561128} +{"current_steps": 167035, "total_steps": 204665, "loss": 0.0, "lr": 1.9899023217350697e-07, "epoch": 4.080692839518237, "percentage": 81.61, "elapsed_time": "3:36:25", "remaining_time": "0:48:45", "throughput": 8668.36, "total_tokens": 112564264} +{"current_steps": 167040, "total_steps": 204665, "loss": 0.0, "lr": 1.9893918354229554e-07, "epoch": 4.080814990350084, "percentage": 81.62, "elapsed_time": "3:36:25", "remaining_time": "0:48:45", "throughput": 8668.4, "total_tokens": 112567784} +{"current_steps": 167045, "total_steps": 204665, "loss": 0.0, "lr": 1.9888814073661353e-07, "epoch": 4.0809371411819315, "percentage": 81.62, "elapsed_time": "3:36:26", "remaining_time": "0:48:44", "throughput": 8668.42, "total_tokens": 112571112} +{"current_steps": 167050, "total_steps": 204665, "loss": 0.054, "lr": 1.9883710375683273e-07, "epoch": 4.081059292013778, "percentage": 81.62, "elapsed_time": "3:36:26", "remaining_time": "0:48:44", "throughput": 8668.47, "total_tokens": 112574760} +{"current_steps": 167055, "total_steps": 204665, "loss": 0.0, "lr": 1.987860726033237e-07, "epoch": 4.081181442845626, "percentage": 81.62, "elapsed_time": "3:36:27", "remaining_time": "0:48:43", "throughput": 8668.5, "total_tokens": 112578152} +{"current_steps": 167060, "total_steps": 204665, "loss": 0.0, "lr": 1.9873504727645784e-07, "epoch": 4.081303593677473, "percentage": 81.63, "elapsed_time": "3:36:27", "remaining_time": "0:48:43", "throughput": 8668.55, "total_tokens": 112581928} +{"current_steps": 167065, "total_steps": 204665, "loss": 0.0, "lr": 1.9868402777660652e-07, "epoch": 4.08142574450932, "percentage": 81.63, "elapsed_time": "3:36:27", "remaining_time": "0:48:43", "throughput": 8668.58, "total_tokens": 112585320} +{"current_steps": 167070, "total_steps": 204665, "loss": 0.0, "lr": 1.9863301410414024e-07, "epoch": 4.081547895341167, "percentage": 81.63, "elapsed_time": "3:36:28", "remaining_time": "0:48:42", "throughput": 8668.62, "total_tokens": 112588968} +{"current_steps": 167075, "total_steps": 204665, "loss": 0.0, "lr": 1.9858200625943044e-07, "epoch": 4.081670046173015, "percentage": 81.63, "elapsed_time": "3:36:28", "remaining_time": "0:48:42", "throughput": 8668.63, "total_tokens": 112591976} +{"current_steps": 167080, "total_steps": 204665, "loss": 0.0001, "lr": 1.9853100424284764e-07, "epoch": 4.081792197004861, "percentage": 81.64, "elapsed_time": "3:36:28", "remaining_time": "0:48:41", "throughput": 8668.64, "total_tokens": 112595048} +{"current_steps": 167085, "total_steps": 204665, "loss": 0.0, "lr": 1.9848000805476284e-07, "epoch": 4.081914347836709, "percentage": 81.64, "elapsed_time": "3:36:29", "remaining_time": "0:48:41", "throughput": 8668.69, "total_tokens": 112598888} +{"current_steps": 167090, "total_steps": 204665, "loss": 0.0, "lr": 1.9842901769554742e-07, "epoch": 4.082036498668556, "percentage": 81.64, "elapsed_time": "3:36:29", "remaining_time": "0:48:41", "throughput": 8668.69, "total_tokens": 112601768} +{"current_steps": 167095, "total_steps": 204665, "loss": 0.0, "lr": 1.9837803316557167e-07, "epoch": 4.082158649500403, "percentage": 81.64, "elapsed_time": "3:36:29", "remaining_time": "0:48:40", "throughput": 8668.72, "total_tokens": 112605096} +{"current_steps": 167100, "total_steps": 204665, "loss": 0.0, "lr": 1.9832705446520625e-07, "epoch": 4.08228080033225, "percentage": 81.65, "elapsed_time": "3:36:30", "remaining_time": "0:48:40", "throughput": 8668.76, "total_tokens": 112608680} +{"current_steps": 167105, "total_steps": 204665, "loss": 0.0, "lr": 1.9827608159482235e-07, "epoch": 4.082402951164098, "percentage": 81.65, "elapsed_time": "3:36:30", "remaining_time": "0:48:39", "throughput": 8668.79, "total_tokens": 112612136} +{"current_steps": 167110, "total_steps": 204665, "loss": 0.0, "lr": 1.9822511455479041e-07, "epoch": 4.0825251019959445, "percentage": 81.65, "elapsed_time": "3:36:30", "remaining_time": "0:48:39", "throughput": 8668.82, "total_tokens": 112615656} +{"current_steps": 167115, "total_steps": 204665, "loss": 0.0, "lr": 1.9817415334548093e-07, "epoch": 4.082647252827792, "percentage": 81.65, "elapsed_time": "3:36:31", "remaining_time": "0:48:39", "throughput": 8668.83, "total_tokens": 112618728} +{"current_steps": 167120, "total_steps": 204665, "loss": 0.0, "lr": 1.9812319796726452e-07, "epoch": 4.082769403659639, "percentage": 81.66, "elapsed_time": "3:36:31", "remaining_time": "0:48:38", "throughput": 8668.85, "total_tokens": 112621992} +{"current_steps": 167125, "total_steps": 204665, "loss": 0.0, "lr": 1.980722484205123e-07, "epoch": 4.082891554491486, "percentage": 81.66, "elapsed_time": "3:36:31", "remaining_time": "0:48:38", "throughput": 8668.87, "total_tokens": 112625192} +{"current_steps": 167130, "total_steps": 204665, "loss": 0.0, "lr": 1.9802130470559397e-07, "epoch": 4.083013705323333, "percentage": 81.66, "elapsed_time": "3:36:32", "remaining_time": "0:48:37", "throughput": 8668.89, "total_tokens": 112628392} +{"current_steps": 167135, "total_steps": 204665, "loss": 0.0, "lr": 1.9797036682288083e-07, "epoch": 4.08313585615518, "percentage": 81.66, "elapsed_time": "3:36:32", "remaining_time": "0:48:37", "throughput": 8668.93, "total_tokens": 112631976} +{"current_steps": 167140, "total_steps": 204665, "loss": 0.0, "lr": 1.9791943477274255e-07, "epoch": 4.083258006987028, "percentage": 81.67, "elapsed_time": "3:36:32", "remaining_time": "0:48:37", "throughput": 8669.01, "total_tokens": 112636136} +{"current_steps": 167145, "total_steps": 204665, "loss": 0.0, "lr": 1.9786850855554993e-07, "epoch": 4.083380157818874, "percentage": 81.67, "elapsed_time": "3:36:33", "remaining_time": "0:48:36", "throughput": 8669.04, "total_tokens": 112639656} +{"current_steps": 167150, "total_steps": 204665, "loss": 0.0, "lr": 1.9781758817167348e-07, "epoch": 4.083502308650722, "percentage": 81.67, "elapsed_time": "3:36:33", "remaining_time": "0:48:36", "throughput": 8669.06, "total_tokens": 112642792} +{"current_steps": 167155, "total_steps": 204665, "loss": 0.0, "lr": 1.9776667362148303e-07, "epoch": 4.083624459482569, "percentage": 81.67, "elapsed_time": "3:36:34", "remaining_time": "0:48:35", "throughput": 8669.08, "total_tokens": 112646120} +{"current_steps": 167160, "total_steps": 204665, "loss": 0.0, "lr": 1.9771576490534935e-07, "epoch": 4.083746610314416, "percentage": 81.67, "elapsed_time": "3:36:34", "remaining_time": "0:48:35", "throughput": 8669.11, "total_tokens": 112649576} +{"current_steps": 167165, "total_steps": 204665, "loss": 0.0, "lr": 1.976648620236422e-07, "epoch": 4.083868761146263, "percentage": 81.68, "elapsed_time": "3:36:34", "remaining_time": "0:48:35", "throughput": 8669.13, "total_tokens": 112652840} +{"current_steps": 167170, "total_steps": 204665, "loss": 0.0, "lr": 1.976139649767322e-07, "epoch": 4.083990911978111, "percentage": 81.68, "elapsed_time": "3:36:35", "remaining_time": "0:48:34", "throughput": 8669.15, "total_tokens": 112656040} +{"current_steps": 167175, "total_steps": 204665, "loss": 0.0, "lr": 1.9756307376498905e-07, "epoch": 4.084113062809958, "percentage": 81.68, "elapsed_time": "3:36:35", "remaining_time": "0:48:34", "throughput": 8669.16, "total_tokens": 112659112} +{"current_steps": 167180, "total_steps": 204665, "loss": 0.0, "lr": 1.9751218838878304e-07, "epoch": 4.084235213641805, "percentage": 81.68, "elapsed_time": "3:36:35", "remaining_time": "0:48:33", "throughput": 8669.17, "total_tokens": 112662184} +{"current_steps": 167185, "total_steps": 204665, "loss": 0.0, "lr": 1.9746130884848445e-07, "epoch": 4.084357364473652, "percentage": 81.69, "elapsed_time": "3:36:36", "remaining_time": "0:48:33", "throughput": 8669.2, "total_tokens": 112665512} +{"current_steps": 167190, "total_steps": 204665, "loss": 0.0, "lr": 1.9741043514446288e-07, "epoch": 4.0844795153055, "percentage": 81.69, "elapsed_time": "3:36:36", "remaining_time": "0:48:33", "throughput": 8669.22, "total_tokens": 112668840} +{"current_steps": 167195, "total_steps": 204665, "loss": 0.0, "lr": 1.973595672770887e-07, "epoch": 4.084601666137346, "percentage": 81.69, "elapsed_time": "3:36:36", "remaining_time": "0:48:32", "throughput": 8669.25, "total_tokens": 112672168} +{"current_steps": 167200, "total_steps": 204665, "loss": 0.0, "lr": 1.9730870524673172e-07, "epoch": 4.084723816969194, "percentage": 81.69, "elapsed_time": "3:36:37", "remaining_time": "0:48:32", "throughput": 8669.27, "total_tokens": 112675368} +{"current_steps": 167205, "total_steps": 204665, "loss": 0.0, "lr": 1.972578490537614e-07, "epoch": 4.084845967801041, "percentage": 81.7, "elapsed_time": "3:36:37", "remaining_time": "0:48:31", "throughput": 8669.28, "total_tokens": 112678504} +{"current_steps": 167210, "total_steps": 204665, "loss": 0.0, "lr": 1.9720699869854817e-07, "epoch": 4.084968118632888, "percentage": 81.7, "elapsed_time": "3:36:37", "remaining_time": "0:48:31", "throughput": 8669.31, "total_tokens": 112681960} +{"current_steps": 167215, "total_steps": 204665, "loss": 0.0, "lr": 1.9715615418146138e-07, "epoch": 4.085090269464735, "percentage": 81.7, "elapsed_time": "3:36:38", "remaining_time": "0:48:31", "throughput": 8669.34, "total_tokens": 112685288} +{"current_steps": 167220, "total_steps": 204665, "loss": 0.0, "lr": 1.9710531550287112e-07, "epoch": 4.085212420296582, "percentage": 81.7, "elapsed_time": "3:36:38", "remaining_time": "0:48:30", "throughput": 8669.35, "total_tokens": 112688360} +{"current_steps": 167225, "total_steps": 204665, "loss": 0.0, "lr": 1.9705448266314685e-07, "epoch": 4.0853345711284295, "percentage": 81.71, "elapsed_time": "3:36:38", "remaining_time": "0:48:30", "throughput": 8669.4, "total_tokens": 112692136} +{"current_steps": 167230, "total_steps": 204665, "loss": 0.0, "lr": 1.9700365566265852e-07, "epoch": 4.085456721960276, "percentage": 81.71, "elapsed_time": "3:36:39", "remaining_time": "0:48:29", "throughput": 8669.42, "total_tokens": 112695336} +{"current_steps": 167235, "total_steps": 204665, "loss": 0.0004, "lr": 1.9695283450177523e-07, "epoch": 4.085578872792124, "percentage": 81.71, "elapsed_time": "3:36:39", "remaining_time": "0:48:29", "throughput": 8669.43, "total_tokens": 112698536} +{"current_steps": 167240, "total_steps": 204665, "loss": 0.0001, "lr": 1.9690201918086712e-07, "epoch": 4.085701023623971, "percentage": 81.71, "elapsed_time": "3:36:39", "remaining_time": "0:48:29", "throughput": 8669.45, "total_tokens": 112701736} +{"current_steps": 167245, "total_steps": 204665, "loss": 0.0, "lr": 1.9685120970030366e-07, "epoch": 4.085823174455818, "percentage": 81.72, "elapsed_time": "3:36:40", "remaining_time": "0:48:28", "throughput": 8669.47, "total_tokens": 112705064} +{"current_steps": 167250, "total_steps": 204665, "loss": 0.0, "lr": 1.9680040606045402e-07, "epoch": 4.085945325287665, "percentage": 81.72, "elapsed_time": "3:36:40", "remaining_time": "0:48:28", "throughput": 8669.51, "total_tokens": 112708520} +{"current_steps": 167255, "total_steps": 204665, "loss": 0.0, "lr": 1.9674960826168807e-07, "epoch": 4.086067476119513, "percentage": 81.72, "elapsed_time": "3:36:40", "remaining_time": "0:48:27", "throughput": 8669.5, "total_tokens": 112711400} +{"current_steps": 167260, "total_steps": 204665, "loss": 0.0, "lr": 1.966988163043748e-07, "epoch": 4.086189626951359, "percentage": 81.72, "elapsed_time": "3:36:41", "remaining_time": "0:48:27", "throughput": 8669.51, "total_tokens": 112714472} +{"current_steps": 167265, "total_steps": 204665, "loss": 0.0, "lr": 1.966480301888841e-07, "epoch": 4.086311777783207, "percentage": 81.73, "elapsed_time": "3:36:41", "remaining_time": "0:48:27", "throughput": 8669.54, "total_tokens": 112717864} +{"current_steps": 167270, "total_steps": 204665, "loss": 0.0, "lr": 1.9659724991558467e-07, "epoch": 4.086433928615054, "percentage": 81.73, "elapsed_time": "3:36:41", "remaining_time": "0:48:26", "throughput": 8669.55, "total_tokens": 112720872} +{"current_steps": 167275, "total_steps": 204665, "loss": 0.0, "lr": 1.9654647548484615e-07, "epoch": 4.086556079446901, "percentage": 81.73, "elapsed_time": "3:36:42", "remaining_time": "0:48:26", "throughput": 8669.6, "total_tokens": 112724648} +{"current_steps": 167280, "total_steps": 204665, "loss": 0.0, "lr": 1.96495706897038e-07, "epoch": 4.086678230278748, "percentage": 81.73, "elapsed_time": "3:36:42", "remaining_time": "0:48:25", "throughput": 8669.62, "total_tokens": 112727848} +{"current_steps": 167285, "total_steps": 204665, "loss": 0.0, "lr": 1.9644494415252887e-07, "epoch": 4.086800381110596, "percentage": 81.74, "elapsed_time": "3:36:42", "remaining_time": "0:48:25", "throughput": 8669.64, "total_tokens": 112731176} +{"current_steps": 167290, "total_steps": 204665, "loss": 0.0, "lr": 1.9639418725168866e-07, "epoch": 4.0869225319424425, "percentage": 81.74, "elapsed_time": "3:36:43", "remaining_time": "0:48:25", "throughput": 8669.65, "total_tokens": 112734312} +{"current_steps": 167295, "total_steps": 204665, "loss": 0.0, "lr": 1.963434361948857e-07, "epoch": 4.08704468277429, "percentage": 81.74, "elapsed_time": "3:36:43", "remaining_time": "0:48:24", "throughput": 8669.66, "total_tokens": 112737320} +{"current_steps": 167300, "total_steps": 204665, "loss": 0.0, "lr": 1.9629269098248967e-07, "epoch": 4.087166833606137, "percentage": 81.74, "elapsed_time": "3:36:44", "remaining_time": "0:48:24", "throughput": 8669.66, "total_tokens": 112740328} +{"current_steps": 167305, "total_steps": 204665, "loss": 0.0, "lr": 1.9624195161486945e-07, "epoch": 4.087288984437984, "percentage": 81.75, "elapsed_time": "3:36:44", "remaining_time": "0:48:23", "throughput": 8669.67, "total_tokens": 112743336} +{"current_steps": 167310, "total_steps": 204665, "loss": 0.0, "lr": 1.961912180923936e-07, "epoch": 4.087411135269831, "percentage": 81.75, "elapsed_time": "3:36:44", "remaining_time": "0:48:23", "throughput": 8669.7, "total_tokens": 112746792} +{"current_steps": 167315, "total_steps": 204665, "loss": 0.0, "lr": 1.961404904154317e-07, "epoch": 4.087533286101678, "percentage": 81.75, "elapsed_time": "3:36:45", "remaining_time": "0:48:23", "throughput": 8669.74, "total_tokens": 112750312} +{"current_steps": 167320, "total_steps": 204665, "loss": 0.0, "lr": 1.960897685843521e-07, "epoch": 4.087655436933526, "percentage": 81.75, "elapsed_time": "3:36:45", "remaining_time": "0:48:22", "throughput": 8669.75, "total_tokens": 112753448} +{"current_steps": 167325, "total_steps": 204665, "loss": 0.0, "lr": 1.9603905259952426e-07, "epoch": 4.087777587765372, "percentage": 81.76, "elapsed_time": "3:36:45", "remaining_time": "0:48:22", "throughput": 8669.78, "total_tokens": 112756840} +{"current_steps": 167330, "total_steps": 204665, "loss": 0.0, "lr": 1.9598834246131634e-07, "epoch": 4.08789973859722, "percentage": 81.76, "elapsed_time": "3:36:46", "remaining_time": "0:48:21", "throughput": 8669.81, "total_tokens": 112760296} +{"current_steps": 167335, "total_steps": 204665, "loss": 0.0, "lr": 1.9593763817009745e-07, "epoch": 4.088021889429067, "percentage": 81.76, "elapsed_time": "3:36:46", "remaining_time": "0:48:21", "throughput": 8669.83, "total_tokens": 112763496} +{"current_steps": 167340, "total_steps": 204665, "loss": 0.0, "lr": 1.958869397262366e-07, "epoch": 4.088144040260914, "percentage": 81.76, "elapsed_time": "3:36:46", "remaining_time": "0:48:21", "throughput": 8669.87, "total_tokens": 112767016} +{"current_steps": 167345, "total_steps": 204665, "loss": 0.0, "lr": 1.9583624713010183e-07, "epoch": 4.088266191092761, "percentage": 81.77, "elapsed_time": "3:36:47", "remaining_time": "0:48:20", "throughput": 8669.86, "total_tokens": 112769896} +{"current_steps": 167350, "total_steps": 204665, "loss": 0.0, "lr": 1.9578556038206262e-07, "epoch": 4.088388341924609, "percentage": 81.77, "elapsed_time": "3:36:47", "remaining_time": "0:48:20", "throughput": 8669.88, "total_tokens": 112773032} +{"current_steps": 167355, "total_steps": 204665, "loss": 0.0, "lr": 1.9573487948248668e-07, "epoch": 4.0885104927564555, "percentage": 81.77, "elapsed_time": "3:36:47", "remaining_time": "0:48:19", "throughput": 8669.9, "total_tokens": 112776424} +{"current_steps": 167360, "total_steps": 204665, "loss": 0.0, "lr": 1.9568420443174338e-07, "epoch": 4.088632643588303, "percentage": 81.77, "elapsed_time": "3:36:48", "remaining_time": "0:48:19", "throughput": 8669.91, "total_tokens": 112779496} +{"current_steps": 167365, "total_steps": 204665, "loss": 0.0, "lr": 1.9563353523020066e-07, "epoch": 4.08875479442015, "percentage": 81.78, "elapsed_time": "3:36:48", "remaining_time": "0:48:19", "throughput": 8669.92, "total_tokens": 112782504} +{"current_steps": 167370, "total_steps": 204665, "loss": 0.0, "lr": 1.9558287187822707e-07, "epoch": 4.0888769452519975, "percentage": 81.78, "elapsed_time": "3:36:48", "remaining_time": "0:48:18", "throughput": 8669.97, "total_tokens": 112786216} +{"current_steps": 167375, "total_steps": 204665, "loss": 0.0, "lr": 1.955322143761916e-07, "epoch": 4.088999096083844, "percentage": 81.78, "elapsed_time": "3:36:49", "remaining_time": "0:48:18", "throughput": 8670.02, "total_tokens": 112789992} +{"current_steps": 167380, "total_steps": 204665, "loss": 0.0, "lr": 1.9548156272446194e-07, "epoch": 4.089121246915692, "percentage": 81.78, "elapsed_time": "3:36:49", "remaining_time": "0:48:17", "throughput": 8670.04, "total_tokens": 112793192} +{"current_steps": 167385, "total_steps": 204665, "loss": 0.0, "lr": 1.954309169234071e-07, "epoch": 4.089243397747539, "percentage": 81.78, "elapsed_time": "3:36:49", "remaining_time": "0:48:17", "throughput": 8670.07, "total_tokens": 112796648} +{"current_steps": 167390, "total_steps": 204665, "loss": 0.0, "lr": 1.9538027697339455e-07, "epoch": 4.089365548579385, "percentage": 81.79, "elapsed_time": "3:36:50", "remaining_time": "0:48:17", "throughput": 8670.1, "total_tokens": 112800104} +{"current_steps": 167395, "total_steps": 204665, "loss": 0.0, "lr": 1.9532964287479325e-07, "epoch": 4.089487699411233, "percentage": 81.79, "elapsed_time": "3:36:50", "remaining_time": "0:48:16", "throughput": 8670.14, "total_tokens": 112803688} +{"current_steps": 167400, "total_steps": 204665, "loss": 0.0, "lr": 1.9527901462797136e-07, "epoch": 4.08960985024308, "percentage": 81.79, "elapsed_time": "3:36:50", "remaining_time": "0:48:16", "throughput": 8670.19, "total_tokens": 112807400} +{"current_steps": 167405, "total_steps": 204665, "loss": 0.0, "lr": 1.95228392233297e-07, "epoch": 4.089732001074927, "percentage": 81.79, "elapsed_time": "3:36:51", "remaining_time": "0:48:15", "throughput": 8670.22, "total_tokens": 112810792} +{"current_steps": 167410, "total_steps": 204665, "loss": 0.0, "lr": 1.9517777569113792e-07, "epoch": 4.089854151906774, "percentage": 81.8, "elapsed_time": "3:36:51", "remaining_time": "0:48:15", "throughput": 8670.25, "total_tokens": 112814184} +{"current_steps": 167415, "total_steps": 204665, "loss": 0.0, "lr": 1.9512716500186277e-07, "epoch": 4.089976302738622, "percentage": 81.8, "elapsed_time": "3:36:52", "remaining_time": "0:48:15", "throughput": 8670.3, "total_tokens": 112817896} +{"current_steps": 167420, "total_steps": 204665, "loss": 0.0, "lr": 1.950765601658394e-07, "epoch": 4.0900984535704685, "percentage": 81.8, "elapsed_time": "3:36:52", "remaining_time": "0:48:14", "throughput": 8670.36, "total_tokens": 112821800} +{"current_steps": 167425, "total_steps": 204665, "loss": 0.0, "lr": 1.950259611834355e-07, "epoch": 4.090220604402316, "percentage": 81.8, "elapsed_time": "3:36:52", "remaining_time": "0:48:14", "throughput": 8670.38, "total_tokens": 112825064} +{"current_steps": 167430, "total_steps": 204665, "loss": 0.0, "lr": 1.9497536805501934e-07, "epoch": 4.090342755234163, "percentage": 81.81, "elapsed_time": "3:36:53", "remaining_time": "0:48:13", "throughput": 8670.39, "total_tokens": 112828200} +{"current_steps": 167435, "total_steps": 204665, "loss": 0.0, "lr": 1.9492478078095909e-07, "epoch": 4.0904649060660105, "percentage": 81.81, "elapsed_time": "3:36:53", "remaining_time": "0:48:13", "throughput": 8670.45, "total_tokens": 112832104} +{"current_steps": 167440, "total_steps": 204665, "loss": 0.0, "lr": 1.948741993616221e-07, "epoch": 4.090587056897857, "percentage": 81.81, "elapsed_time": "3:36:53", "remaining_time": "0:48:13", "throughput": 8670.48, "total_tokens": 112835496} +{"current_steps": 167445, "total_steps": 204665, "loss": 0.0, "lr": 1.948236237973767e-07, "epoch": 4.090709207729705, "percentage": 81.81, "elapsed_time": "3:36:54", "remaining_time": "0:48:12", "throughput": 8670.51, "total_tokens": 112838888} +{"current_steps": 167450, "total_steps": 204665, "loss": 0.0, "lr": 1.9477305408859023e-07, "epoch": 4.090831358561552, "percentage": 81.82, "elapsed_time": "3:36:54", "remaining_time": "0:48:12", "throughput": 8670.57, "total_tokens": 112842856} +{"current_steps": 167455, "total_steps": 204665, "loss": 0.0, "lr": 1.9472249023563103e-07, "epoch": 4.090953509393399, "percentage": 81.82, "elapsed_time": "3:36:54", "remaining_time": "0:48:12", "throughput": 8670.6, "total_tokens": 112846184} +{"current_steps": 167460, "total_steps": 204665, "loss": 0.0, "lr": 1.9467193223886613e-07, "epoch": 4.091075660225246, "percentage": 81.82, "elapsed_time": "3:36:55", "remaining_time": "0:48:11", "throughput": 8670.62, "total_tokens": 112849512} +{"current_steps": 167465, "total_steps": 204665, "loss": 0.0, "lr": 1.9462138009866357e-07, "epoch": 4.091197811057094, "percentage": 81.82, "elapsed_time": "3:36:55", "remaining_time": "0:48:11", "throughput": 8670.66, "total_tokens": 112853096} +{"current_steps": 167470, "total_steps": 204665, "loss": 0.0011, "lr": 1.945708338153913e-07, "epoch": 4.09131996188894, "percentage": 81.83, "elapsed_time": "3:36:55", "remaining_time": "0:48:10", "throughput": 8670.66, "total_tokens": 112855912} +{"current_steps": 167475, "total_steps": 204665, "loss": 0.0, "lr": 1.9452029338941623e-07, "epoch": 4.091442112720788, "percentage": 81.83, "elapsed_time": "3:36:56", "remaining_time": "0:48:10", "throughput": 8670.7, "total_tokens": 112859560} +{"current_steps": 167480, "total_steps": 204665, "loss": 0.0, "lr": 1.944697588211064e-07, "epoch": 4.091564263552635, "percentage": 81.83, "elapsed_time": "3:36:56", "remaining_time": "0:48:10", "throughput": 8670.74, "total_tokens": 112863080} +{"current_steps": 167485, "total_steps": 204665, "loss": 0.0, "lr": 1.9441923011082905e-07, "epoch": 4.0916864143844816, "percentage": 81.83, "elapsed_time": "3:36:56", "remaining_time": "0:48:09", "throughput": 8670.77, "total_tokens": 112866408} +{"current_steps": 167490, "total_steps": 204665, "loss": 0.0, "lr": 1.943687072589516e-07, "epoch": 4.091808565216329, "percentage": 81.84, "elapsed_time": "3:36:57", "remaining_time": "0:48:09", "throughput": 8670.79, "total_tokens": 112869672} +{"current_steps": 167495, "total_steps": 204665, "loss": 0.0, "lr": 1.9431819026584196e-07, "epoch": 4.091930716048176, "percentage": 81.84, "elapsed_time": "3:36:57", "remaining_time": "0:48:08", "throughput": 8670.8, "total_tokens": 112872808} +{"current_steps": 167500, "total_steps": 204665, "loss": 0.0, "lr": 1.942676791318668e-07, "epoch": 4.092052866880024, "percentage": 81.84, "elapsed_time": "3:36:57", "remaining_time": "0:48:08", "throughput": 8670.82, "total_tokens": 112876072} +{"current_steps": 167505, "total_steps": 204665, "loss": 0.0, "lr": 1.942171738573941e-07, "epoch": 4.09217501771187, "percentage": 81.84, "elapsed_time": "3:36:58", "remaining_time": "0:48:08", "throughput": 8670.85, "total_tokens": 112879464} +{"current_steps": 167510, "total_steps": 204665, "loss": 0.0, "lr": 1.941666744427909e-07, "epoch": 4.092297168543718, "percentage": 81.85, "elapsed_time": "3:36:58", "remaining_time": "0:48:07", "throughput": 8670.87, "total_tokens": 112882792} +{"current_steps": 167515, "total_steps": 204665, "loss": 0.05, "lr": 1.9411618088842396e-07, "epoch": 4.092419319375565, "percentage": 81.85, "elapsed_time": "3:36:58", "remaining_time": "0:48:07", "throughput": 8670.92, "total_tokens": 112886568} +{"current_steps": 167520, "total_steps": 204665, "loss": 0.0, "lr": 1.9406569319466136e-07, "epoch": 4.092541470207412, "percentage": 81.85, "elapsed_time": "3:36:59", "remaining_time": "0:48:06", "throughput": 8670.96, "total_tokens": 112890088} +{"current_steps": 167525, "total_steps": 204665, "loss": 0.0, "lr": 1.9401521136186937e-07, "epoch": 4.092663621039259, "percentage": 81.85, "elapsed_time": "3:36:59", "remaining_time": "0:48:06", "throughput": 8671.0, "total_tokens": 112893608} +{"current_steps": 167530, "total_steps": 204665, "loss": 0.0, "lr": 1.939647353904159e-07, "epoch": 4.092785771871107, "percentage": 81.86, "elapsed_time": "3:37:00", "remaining_time": "0:48:06", "throughput": 8671.0, "total_tokens": 112896552} +{"current_steps": 167535, "total_steps": 204665, "loss": 0.0, "lr": 1.9391426528066744e-07, "epoch": 4.0929079227029534, "percentage": 81.86, "elapsed_time": "3:37:00", "remaining_time": "0:48:05", "throughput": 8671.02, "total_tokens": 112899880} +{"current_steps": 167540, "total_steps": 204665, "loss": 0.0, "lr": 1.938638010329915e-07, "epoch": 4.093030073534801, "percentage": 81.86, "elapsed_time": "3:37:00", "remaining_time": "0:48:05", "throughput": 8671.03, "total_tokens": 112902824} +{"current_steps": 167545, "total_steps": 204665, "loss": 0.0, "lr": 1.9381334264775462e-07, "epoch": 4.093152224366648, "percentage": 81.86, "elapsed_time": "3:37:01", "remaining_time": "0:48:04", "throughput": 8671.06, "total_tokens": 112906344} +{"current_steps": 167550, "total_steps": 204665, "loss": 0.0, "lr": 1.9376289012532388e-07, "epoch": 4.0932743751984955, "percentage": 81.87, "elapsed_time": "3:37:01", "remaining_time": "0:48:04", "throughput": 8671.1, "total_tokens": 112909928} +{"current_steps": 167555, "total_steps": 204665, "loss": 0.0, "lr": 1.937124434660664e-07, "epoch": 4.093396526030342, "percentage": 81.87, "elapsed_time": "3:37:01", "remaining_time": "0:48:04", "throughput": 8671.15, "total_tokens": 112913576} +{"current_steps": 167560, "total_steps": 204665, "loss": 0.0, "lr": 1.9366200267034882e-07, "epoch": 4.09351867686219, "percentage": 81.87, "elapsed_time": "3:37:02", "remaining_time": "0:48:03", "throughput": 8671.17, "total_tokens": 112916776} +{"current_steps": 167565, "total_steps": 204665, "loss": 0.0, "lr": 1.9361156773853826e-07, "epoch": 4.093640827694037, "percentage": 81.87, "elapsed_time": "3:37:02", "remaining_time": "0:48:03", "throughput": 8671.19, "total_tokens": 112920104} +{"current_steps": 167570, "total_steps": 204665, "loss": 0.0526, "lr": 1.9356113867100089e-07, "epoch": 4.093762978525883, "percentage": 81.88, "elapsed_time": "3:37:02", "remaining_time": "0:48:02", "throughput": 8671.24, "total_tokens": 112923752} +{"current_steps": 167575, "total_steps": 204665, "loss": 0.0, "lr": 1.9351071546810428e-07, "epoch": 4.093885129357731, "percentage": 81.88, "elapsed_time": "3:37:03", "remaining_time": "0:48:02", "throughput": 8671.24, "total_tokens": 112926696} +{"current_steps": 167580, "total_steps": 204665, "loss": 0.0, "lr": 1.9346029813021425e-07, "epoch": 4.094007280189578, "percentage": 81.88, "elapsed_time": "3:37:03", "remaining_time": "0:48:02", "throughput": 8671.26, "total_tokens": 112929960} +{"current_steps": 167585, "total_steps": 204665, "loss": 0.0, "lr": 1.9340988665769786e-07, "epoch": 4.094129431021425, "percentage": 81.88, "elapsed_time": "3:37:03", "remaining_time": "0:48:01", "throughput": 8671.26, "total_tokens": 112932904} +{"current_steps": 167590, "total_steps": 204665, "loss": 0.0, "lr": 1.9335948105092203e-07, "epoch": 4.094251581853272, "percentage": 81.89, "elapsed_time": "3:37:04", "remaining_time": "0:48:01", "throughput": 8671.28, "total_tokens": 112936168} +{"current_steps": 167595, "total_steps": 204665, "loss": 0.0, "lr": 1.9330908131025282e-07, "epoch": 4.09437373268512, "percentage": 81.89, "elapsed_time": "3:37:04", "remaining_time": "0:48:00", "throughput": 8671.31, "total_tokens": 112939624} +{"current_steps": 167600, "total_steps": 204665, "loss": 0.0, "lr": 1.9325868743605711e-07, "epoch": 4.0944958835169665, "percentage": 81.89, "elapsed_time": "3:37:04", "remaining_time": "0:48:00", "throughput": 8671.34, "total_tokens": 112943016} +{"current_steps": 167605, "total_steps": 204665, "loss": 0.0, "lr": 1.93208299428701e-07, "epoch": 4.094618034348814, "percentage": 81.89, "elapsed_time": "3:37:05", "remaining_time": "0:48:00", "throughput": 8671.36, "total_tokens": 112946216} +{"current_steps": 167610, "total_steps": 204665, "loss": 0.0, "lr": 1.9315791728855136e-07, "epoch": 4.094740185180661, "percentage": 81.89, "elapsed_time": "3:37:05", "remaining_time": "0:47:59", "throughput": 8671.36, "total_tokens": 112949224} +{"current_steps": 167615, "total_steps": 204665, "loss": 0.0, "lr": 1.9310754101597437e-07, "epoch": 4.0948623360125085, "percentage": 81.9, "elapsed_time": "3:37:05", "remaining_time": "0:47:59", "throughput": 8671.38, "total_tokens": 112952552} +{"current_steps": 167620, "total_steps": 204665, "loss": 0.0, "lr": 1.930571706113362e-07, "epoch": 4.094984486844355, "percentage": 81.9, "elapsed_time": "3:37:06", "remaining_time": "0:47:58", "throughput": 8671.41, "total_tokens": 112955880} +{"current_steps": 167625, "total_steps": 204665, "loss": 0.0, "lr": 1.9300680607500354e-07, "epoch": 4.095106637676203, "percentage": 81.9, "elapsed_time": "3:37:06", "remaining_time": "0:47:58", "throughput": 8671.41, "total_tokens": 112958760} +{"current_steps": 167630, "total_steps": 204665, "loss": 0.0, "lr": 1.9295644740734207e-07, "epoch": 4.09522878850805, "percentage": 81.9, "elapsed_time": "3:37:06", "remaining_time": "0:47:58", "throughput": 8671.44, "total_tokens": 112962216} +{"current_steps": 167635, "total_steps": 204665, "loss": 0.0, "lr": 1.9290609460871876e-07, "epoch": 4.095350939339897, "percentage": 81.91, "elapsed_time": "3:37:07", "remaining_time": "0:47:57", "throughput": 8671.52, "total_tokens": 112966440} +{"current_steps": 167640, "total_steps": 204665, "loss": 0.0, "lr": 1.928557476794991e-07, "epoch": 4.095473090171744, "percentage": 81.91, "elapsed_time": "3:37:07", "remaining_time": "0:47:57", "throughput": 8671.53, "total_tokens": 112969448} +{"current_steps": 167645, "total_steps": 204665, "loss": 0.0698, "lr": 1.928054066200495e-07, "epoch": 4.095595241003592, "percentage": 81.91, "elapsed_time": "3:37:07", "remaining_time": "0:47:56", "throughput": 8671.58, "total_tokens": 112973288} +{"current_steps": 167650, "total_steps": 204665, "loss": 0.0, "lr": 1.9275507143073645e-07, "epoch": 4.095717391835438, "percentage": 81.91, "elapsed_time": "3:37:08", "remaining_time": "0:47:56", "throughput": 8671.61, "total_tokens": 112976744} +{"current_steps": 167655, "total_steps": 204665, "loss": 0.0, "lr": 1.9270474211192534e-07, "epoch": 4.095839542667285, "percentage": 81.92, "elapsed_time": "3:37:08", "remaining_time": "0:47:56", "throughput": 8671.62, "total_tokens": 112979688} +{"current_steps": 167660, "total_steps": 204665, "loss": 0.0, "lr": 1.926544186639828e-07, "epoch": 4.095961693499133, "percentage": 81.92, "elapsed_time": "3:37:09", "remaining_time": "0:47:55", "throughput": 8671.63, "total_tokens": 112982888} +{"current_steps": 167665, "total_steps": 204665, "loss": 0.0, "lr": 1.9260410108727408e-07, "epoch": 4.0960838443309795, "percentage": 81.92, "elapsed_time": "3:37:09", "remaining_time": "0:47:55", "throughput": 8671.64, "total_tokens": 112986024} +{"current_steps": 167670, "total_steps": 204665, "loss": 0.0, "lr": 1.9255378938216583e-07, "epoch": 4.096205995162827, "percentage": 81.92, "elapsed_time": "3:37:09", "remaining_time": "0:47:54", "throughput": 8671.66, "total_tokens": 112989288} +{"current_steps": 167675, "total_steps": 204665, "loss": 0.0, "lr": 1.9250348354902335e-07, "epoch": 4.096328145994674, "percentage": 81.93, "elapsed_time": "3:37:10", "remaining_time": "0:47:54", "throughput": 8671.66, "total_tokens": 112992232} +{"current_steps": 167680, "total_steps": 204665, "loss": 0.0, "lr": 1.9245318358821272e-07, "epoch": 4.0964502968265215, "percentage": 81.93, "elapsed_time": "3:37:10", "remaining_time": "0:47:54", "throughput": 8671.7, "total_tokens": 112995688} +{"current_steps": 167685, "total_steps": 204665, "loss": 0.0, "lr": 1.9240288950010008e-07, "epoch": 4.096572447658368, "percentage": 81.93, "elapsed_time": "3:37:10", "remaining_time": "0:47:53", "throughput": 8671.71, "total_tokens": 112998888} +{"current_steps": 167690, "total_steps": 204665, "loss": 0.0, "lr": 1.923526012850505e-07, "epoch": 4.096694598490216, "percentage": 81.93, "elapsed_time": "3:37:11", "remaining_time": "0:47:53", "throughput": 8671.76, "total_tokens": 113002600} +{"current_steps": 167695, "total_steps": 204665, "loss": 0.0, "lr": 1.9230231894343029e-07, "epoch": 4.096816749322063, "percentage": 81.94, "elapsed_time": "3:37:11", "remaining_time": "0:47:52", "throughput": 8671.77, "total_tokens": 113005672} +{"current_steps": 167700, "total_steps": 204665, "loss": 0.0, "lr": 1.9225204247560467e-07, "epoch": 4.09693890015391, "percentage": 81.94, "elapsed_time": "3:37:11", "remaining_time": "0:47:52", "throughput": 8671.81, "total_tokens": 113009192} +{"current_steps": 167705, "total_steps": 204665, "loss": 0.0, "lr": 1.9220177188193942e-07, "epoch": 4.097061050985757, "percentage": 81.94, "elapsed_time": "3:37:12", "remaining_time": "0:47:52", "throughput": 8671.82, "total_tokens": 113012264} +{"current_steps": 167710, "total_steps": 204665, "loss": 0.0, "lr": 1.9215150716280037e-07, "epoch": 4.097183201817605, "percentage": 81.94, "elapsed_time": "3:37:12", "remaining_time": "0:47:51", "throughput": 8671.83, "total_tokens": 113015336} +{"current_steps": 167715, "total_steps": 204665, "loss": 0.0, "lr": 1.921012483185529e-07, "epoch": 4.097305352649451, "percentage": 81.95, "elapsed_time": "3:37:12", "remaining_time": "0:47:51", "throughput": 8671.87, "total_tokens": 113018984} +{"current_steps": 167720, "total_steps": 204665, "loss": 0.0, "lr": 1.9205099534956214e-07, "epoch": 4.097427503481299, "percentage": 81.95, "elapsed_time": "3:37:13", "remaining_time": "0:47:50", "throughput": 8671.88, "total_tokens": 113021928} +{"current_steps": 167725, "total_steps": 204665, "loss": 0.0002, "lr": 1.9200074825619418e-07, "epoch": 4.097549654313146, "percentage": 81.95, "elapsed_time": "3:37:13", "remaining_time": "0:47:50", "throughput": 8671.91, "total_tokens": 113025384} +{"current_steps": 167730, "total_steps": 204665, "loss": 0.0, "lr": 1.91950507038814e-07, "epoch": 4.097671805144993, "percentage": 81.95, "elapsed_time": "3:37:13", "remaining_time": "0:47:50", "throughput": 8671.94, "total_tokens": 113028904} +{"current_steps": 167735, "total_steps": 204665, "loss": 0.0, "lr": 1.9190027169778688e-07, "epoch": 4.09779395597684, "percentage": 81.96, "elapsed_time": "3:37:14", "remaining_time": "0:47:49", "throughput": 8671.96, "total_tokens": 113032168} +{"current_steps": 167740, "total_steps": 204665, "loss": 0.0, "lr": 1.9185004223347834e-07, "epoch": 4.097916106808688, "percentage": 81.96, "elapsed_time": "3:37:14", "remaining_time": "0:47:49", "throughput": 8671.97, "total_tokens": 113035240} +{"current_steps": 167745, "total_steps": 204665, "loss": 0.0, "lr": 1.9179981864625394e-07, "epoch": 4.0980382576405345, "percentage": 81.96, "elapsed_time": "3:37:14", "remaining_time": "0:47:48", "throughput": 8672.02, "total_tokens": 113038888} +{"current_steps": 167750, "total_steps": 204665, "loss": 0.0, "lr": 1.917496009364784e-07, "epoch": 4.098160408472381, "percentage": 81.96, "elapsed_time": "3:37:15", "remaining_time": "0:47:48", "throughput": 8672.05, "total_tokens": 113042408} +{"current_steps": 167755, "total_steps": 204665, "loss": 0.0, "lr": 1.9169938910451734e-07, "epoch": 4.098282559304229, "percentage": 81.97, "elapsed_time": "3:37:15", "remaining_time": "0:47:48", "throughput": 8672.07, "total_tokens": 113045672} +{"current_steps": 167760, "total_steps": 204665, "loss": 0.0, "lr": 1.9164918315073552e-07, "epoch": 4.098404710136076, "percentage": 81.97, "elapsed_time": "3:37:15", "remaining_time": "0:47:47", "throughput": 8672.09, "total_tokens": 113048872} +{"current_steps": 167765, "total_steps": 204665, "loss": 0.0002, "lr": 1.915989830754985e-07, "epoch": 4.098526860967923, "percentage": 81.97, "elapsed_time": "3:37:16", "remaining_time": "0:47:47", "throughput": 8672.1, "total_tokens": 113051944} +{"current_steps": 167770, "total_steps": 204665, "loss": 0.0, "lr": 1.915487888791708e-07, "epoch": 4.09864901179977, "percentage": 81.97, "elapsed_time": "3:37:16", "remaining_time": "0:47:46", "throughput": 8672.1, "total_tokens": 113054952} +{"current_steps": 167775, "total_steps": 204665, "loss": 0.0, "lr": 1.9149860056211787e-07, "epoch": 4.098771162631618, "percentage": 81.98, "elapsed_time": "3:37:16", "remaining_time": "0:47:46", "throughput": 8672.11, "total_tokens": 113058024} +{"current_steps": 167780, "total_steps": 204665, "loss": 0.0, "lr": 1.9144841812470468e-07, "epoch": 4.098893313463464, "percentage": 81.98, "elapsed_time": "3:37:17", "remaining_time": "0:47:46", "throughput": 8672.13, "total_tokens": 113061288} +{"current_steps": 167785, "total_steps": 204665, "loss": 0.0, "lr": 1.913982415672959e-07, "epoch": 4.099015464295312, "percentage": 81.98, "elapsed_time": "3:37:17", "remaining_time": "0:47:45", "throughput": 8672.2, "total_tokens": 113065448} +{"current_steps": 167790, "total_steps": 204665, "loss": 0.0, "lr": 1.9134807089025695e-07, "epoch": 4.099137615127159, "percentage": 81.98, "elapsed_time": "3:37:18", "remaining_time": "0:47:45", "throughput": 8672.22, "total_tokens": 113068712} +{"current_steps": 167795, "total_steps": 204665, "loss": 0.0, "lr": 1.9129790609395192e-07, "epoch": 4.099259765959006, "percentage": 81.99, "elapsed_time": "3:37:18", "remaining_time": "0:47:44", "throughput": 8672.24, "total_tokens": 113071848} +{"current_steps": 167800, "total_steps": 204665, "loss": 0.0, "lr": 1.9124774717874603e-07, "epoch": 4.099381916790853, "percentage": 81.99, "elapsed_time": "3:37:18", "remaining_time": "0:47:44", "throughput": 8672.23, "total_tokens": 113074728} +{"current_steps": 167805, "total_steps": 204665, "loss": 0.0, "lr": 1.9119759414500447e-07, "epoch": 4.099504067622701, "percentage": 81.99, "elapsed_time": "3:37:19", "remaining_time": "0:47:44", "throughput": 8672.26, "total_tokens": 113078120} +{"current_steps": 167810, "total_steps": 204665, "loss": 0.0, "lr": 1.9114744699309117e-07, "epoch": 4.0996262184545476, "percentage": 81.99, "elapsed_time": "3:37:19", "remaining_time": "0:47:43", "throughput": 8672.31, "total_tokens": 113081832} +{"current_steps": 167815, "total_steps": 204665, "loss": 0.058, "lr": 1.9109730572337146e-07, "epoch": 4.099748369286395, "percentage": 81.99, "elapsed_time": "3:37:19", "remaining_time": "0:47:43", "throughput": 8672.32, "total_tokens": 113085032} +{"current_steps": 167820, "total_steps": 204665, "loss": 0.0, "lr": 1.9104717033620965e-07, "epoch": 4.099870520118242, "percentage": 82.0, "elapsed_time": "3:37:20", "remaining_time": "0:47:42", "throughput": 8672.35, "total_tokens": 113088296} +{"current_steps": 167825, "total_steps": 204665, "loss": 0.0, "lr": 1.9099704083197023e-07, "epoch": 4.09999267095009, "percentage": 82.0, "elapsed_time": "3:37:20", "remaining_time": "0:47:42", "throughput": 8672.36, "total_tokens": 113091496} +{"current_steps": 167830, "total_steps": 204665, "loss": 0.0, "lr": 1.9094691721101818e-07, "epoch": 4.100114821781936, "percentage": 82.0, "elapsed_time": "3:37:20", "remaining_time": "0:47:42", "throughput": 8672.39, "total_tokens": 113094824} +{"current_steps": 167835, "total_steps": 204665, "loss": 0.0001, "lr": 1.908967994737175e-07, "epoch": 4.100236972613784, "percentage": 82.0, "elapsed_time": "3:37:21", "remaining_time": "0:47:41", "throughput": 8672.43, "total_tokens": 113098408} +{"current_steps": 167840, "total_steps": 204665, "loss": 0.0, "lr": 1.908466876204331e-07, "epoch": 4.100359123445631, "percentage": 82.01, "elapsed_time": "3:37:21", "remaining_time": "0:47:41", "throughput": 8672.45, "total_tokens": 113101672} +{"current_steps": 167845, "total_steps": 204665, "loss": 0.0, "lr": 1.90796581651529e-07, "epoch": 4.100481274277477, "percentage": 82.01, "elapsed_time": "3:37:21", "remaining_time": "0:47:40", "throughput": 8672.45, "total_tokens": 113104616} +{"current_steps": 167850, "total_steps": 204665, "loss": 0.0, "lr": 1.9074648156737017e-07, "epoch": 4.100603425109325, "percentage": 82.01, "elapsed_time": "3:37:22", "remaining_time": "0:47:40", "throughput": 8672.48, "total_tokens": 113108136} +{"current_steps": 167855, "total_steps": 204665, "loss": 0.0, "lr": 1.9069638736832016e-07, "epoch": 4.100725575941172, "percentage": 82.01, "elapsed_time": "3:37:22", "remaining_time": "0:47:40", "throughput": 8672.55, "total_tokens": 113112168} +{"current_steps": 167860, "total_steps": 204665, "loss": 0.0, "lr": 1.9064629905474384e-07, "epoch": 4.1008477267730195, "percentage": 82.02, "elapsed_time": "3:37:22", "remaining_time": "0:47:39", "throughput": 8672.55, "total_tokens": 113115112} +{"current_steps": 167865, "total_steps": 204665, "loss": 0.0325, "lr": 1.9059621662700554e-07, "epoch": 4.100969877604866, "percentage": 82.02, "elapsed_time": "3:37:23", "remaining_time": "0:47:39", "throughput": 8672.56, "total_tokens": 113118248} +{"current_steps": 167870, "total_steps": 204665, "loss": 0.0, "lr": 1.9054614008546888e-07, "epoch": 4.101092028436714, "percentage": 82.02, "elapsed_time": "3:37:23", "remaining_time": "0:47:38", "throughput": 8672.61, "total_tokens": 113121896} +{"current_steps": 167875, "total_steps": 204665, "loss": 0.0, "lr": 1.9049606943049878e-07, "epoch": 4.101214179268561, "percentage": 82.02, "elapsed_time": "3:37:23", "remaining_time": "0:47:38", "throughput": 8672.63, "total_tokens": 113125224} +{"current_steps": 167880, "total_steps": 204665, "loss": 0.0, "lr": 1.9044600466245875e-07, "epoch": 4.101336330100408, "percentage": 82.03, "elapsed_time": "3:37:24", "remaining_time": "0:47:38", "throughput": 8672.65, "total_tokens": 113128424} +{"current_steps": 167885, "total_steps": 204665, "loss": 0.0, "lr": 1.9039594578171336e-07, "epoch": 4.101458480932255, "percentage": 82.03, "elapsed_time": "3:37:24", "remaining_time": "0:47:37", "throughput": 8672.67, "total_tokens": 113131752} +{"current_steps": 167890, "total_steps": 204665, "loss": 0.0, "lr": 1.9034589278862612e-07, "epoch": 4.101580631764103, "percentage": 82.03, "elapsed_time": "3:37:24", "remaining_time": "0:47:37", "throughput": 8672.69, "total_tokens": 113134888} +{"current_steps": 167895, "total_steps": 204665, "loss": 0.0, "lr": 1.9029584568356138e-07, "epoch": 4.101702782595949, "percentage": 82.03, "elapsed_time": "3:37:25", "remaining_time": "0:47:36", "throughput": 8672.7, "total_tokens": 113138024} +{"current_steps": 167900, "total_steps": 204665, "loss": 0.0, "lr": 1.902458044668832e-07, "epoch": 4.101824933427797, "percentage": 82.04, "elapsed_time": "3:37:25", "remaining_time": "0:47:36", "throughput": 8672.72, "total_tokens": 113141288} +{"current_steps": 167905, "total_steps": 204665, "loss": 0.0, "lr": 1.901957691389552e-07, "epoch": 4.101947084259644, "percentage": 82.04, "elapsed_time": "3:37:25", "remaining_time": "0:47:36", "throughput": 8672.75, "total_tokens": 113144616} +{"current_steps": 167910, "total_steps": 204665, "loss": 0.0, "lr": 1.9014573970014147e-07, "epoch": 4.102069235091491, "percentage": 82.04, "elapsed_time": "3:37:26", "remaining_time": "0:47:35", "throughput": 8672.81, "total_tokens": 113148520} +{"current_steps": 167915, "total_steps": 204665, "loss": 0.0, "lr": 1.9009571615080555e-07, "epoch": 4.102191385923338, "percentage": 82.04, "elapsed_time": "3:37:26", "remaining_time": "0:47:35", "throughput": 8672.83, "total_tokens": 113151912} +{"current_steps": 167920, "total_steps": 204665, "loss": 0.0, "lr": 1.900456984913117e-07, "epoch": 4.102313536755186, "percentage": 82.05, "elapsed_time": "3:37:27", "remaining_time": "0:47:35", "throughput": 8672.86, "total_tokens": 113155240} +{"current_steps": 167925, "total_steps": 204665, "loss": 0.0, "lr": 1.8999568672202338e-07, "epoch": 4.1024356875870325, "percentage": 82.05, "elapsed_time": "3:37:27", "remaining_time": "0:47:34", "throughput": 8672.89, "total_tokens": 113158696} +{"current_steps": 167930, "total_steps": 204665, "loss": 0.0, "lr": 1.899456808433041e-07, "epoch": 4.102557838418879, "percentage": 82.05, "elapsed_time": "3:37:27", "remaining_time": "0:47:34", "throughput": 8672.9, "total_tokens": 113161832} +{"current_steps": 167935, "total_steps": 204665, "loss": 0.0, "lr": 1.898956808555179e-07, "epoch": 4.102679989250727, "percentage": 82.05, "elapsed_time": "3:37:28", "remaining_time": "0:47:33", "throughput": 8672.92, "total_tokens": 113165096} +{"current_steps": 167940, "total_steps": 204665, "loss": 0.0, "lr": 1.898456867590279e-07, "epoch": 4.102802140082574, "percentage": 82.06, "elapsed_time": "3:37:28", "remaining_time": "0:47:33", "throughput": 8672.94, "total_tokens": 113168360} +{"current_steps": 167945, "total_steps": 204665, "loss": 0.0, "lr": 1.897956985541983e-07, "epoch": 4.102924290914421, "percentage": 82.06, "elapsed_time": "3:37:28", "remaining_time": "0:47:33", "throughput": 8672.98, "total_tokens": 113171880} +{"current_steps": 167950, "total_steps": 204665, "loss": 0.0, "lr": 1.8974571624139201e-07, "epoch": 4.103046441746268, "percentage": 82.06, "elapsed_time": "3:37:29", "remaining_time": "0:47:32", "throughput": 8672.99, "total_tokens": 113174952} +{"current_steps": 167955, "total_steps": 204665, "loss": 0.0, "lr": 1.8969573982097288e-07, "epoch": 4.103168592578116, "percentage": 82.06, "elapsed_time": "3:37:29", "remaining_time": "0:47:32", "throughput": 8673.01, "total_tokens": 113178152} +{"current_steps": 167960, "total_steps": 204665, "loss": 0.0, "lr": 1.8964576929330444e-07, "epoch": 4.103290743409962, "percentage": 82.07, "elapsed_time": "3:37:29", "remaining_time": "0:47:31", "throughput": 8673.03, "total_tokens": 113181480} +{"current_steps": 167965, "total_steps": 204665, "loss": 0.0, "lr": 1.895958046587497e-07, "epoch": 4.10341289424181, "percentage": 82.07, "elapsed_time": "3:37:30", "remaining_time": "0:47:31", "throughput": 8673.1, "total_tokens": 113185576} +{"current_steps": 167970, "total_steps": 204665, "loss": 0.0, "lr": 1.8954584591767241e-07, "epoch": 4.103535045073657, "percentage": 82.07, "elapsed_time": "3:37:30", "remaining_time": "0:47:31", "throughput": 8673.14, "total_tokens": 113189096} +{"current_steps": 167975, "total_steps": 204665, "loss": 0.0, "lr": 1.8949589307043555e-07, "epoch": 4.103657195905504, "percentage": 82.07, "elapsed_time": "3:37:30", "remaining_time": "0:47:30", "throughput": 8673.16, "total_tokens": 113192360} +{"current_steps": 167980, "total_steps": 204665, "loss": 0.0, "lr": 1.8944594611740282e-07, "epoch": 4.103779346737351, "percentage": 82.08, "elapsed_time": "3:37:31", "remaining_time": "0:47:30", "throughput": 8673.17, "total_tokens": 113195368} +{"current_steps": 167985, "total_steps": 204665, "loss": 0.0, "lr": 1.8939600505893693e-07, "epoch": 4.103901497569199, "percentage": 82.08, "elapsed_time": "3:37:31", "remaining_time": "0:47:29", "throughput": 8673.21, "total_tokens": 113199080} +{"current_steps": 167990, "total_steps": 204665, "loss": 0.0, "lr": 1.8934606989540125e-07, "epoch": 4.1040236484010455, "percentage": 82.08, "elapsed_time": "3:37:31", "remaining_time": "0:47:29", "throughput": 8673.27, "total_tokens": 113202920} +{"current_steps": 167995, "total_steps": 204665, "loss": 0.0001, "lr": 1.8929614062715927e-07, "epoch": 4.104145799232893, "percentage": 82.08, "elapsed_time": "3:37:32", "remaining_time": "0:47:29", "throughput": 8673.29, "total_tokens": 113206248} +{"current_steps": 168000, "total_steps": 204665, "loss": 0.0, "lr": 1.8924621725457357e-07, "epoch": 4.10426795006474, "percentage": 82.09, "elapsed_time": "3:37:32", "remaining_time": "0:47:28", "throughput": 8673.35, "total_tokens": 113210152} +{"current_steps": 168005, "total_steps": 204665, "loss": 0.0, "lr": 1.8919629977800767e-07, "epoch": 4.1043901008965875, "percentage": 82.09, "elapsed_time": "3:37:32", "remaining_time": "0:47:28", "throughput": 8673.38, "total_tokens": 113213416} +{"current_steps": 168010, "total_steps": 204665, "loss": 0.0, "lr": 1.8914638819782414e-07, "epoch": 4.104512251728434, "percentage": 82.09, "elapsed_time": "3:37:33", "remaining_time": "0:47:27", "throughput": 8673.41, "total_tokens": 113216936} +{"current_steps": 168015, "total_steps": 204665, "loss": 0.0, "lr": 1.8909648251438648e-07, "epoch": 4.104634402560281, "percentage": 82.09, "elapsed_time": "3:37:33", "remaining_time": "0:47:27", "throughput": 8673.5, "total_tokens": 113221288} +{"current_steps": 168020, "total_steps": 204665, "loss": 0.0, "lr": 1.8904658272805696e-07, "epoch": 4.104756553392129, "percentage": 82.1, "elapsed_time": "3:37:34", "remaining_time": "0:47:27", "throughput": 8673.56, "total_tokens": 113225128} +{"current_steps": 168025, "total_steps": 204665, "loss": 0.0, "lr": 1.8899668883919907e-07, "epoch": 4.104878704223975, "percentage": 82.1, "elapsed_time": "3:37:34", "remaining_time": "0:47:26", "throughput": 8673.57, "total_tokens": 113228328} +{"current_steps": 168030, "total_steps": 204665, "loss": 0.0, "lr": 1.8894680084817516e-07, "epoch": 4.105000855055823, "percentage": 82.1, "elapsed_time": "3:37:34", "remaining_time": "0:47:26", "throughput": 8673.59, "total_tokens": 113231528} +{"current_steps": 168035, "total_steps": 204665, "loss": 0.0, "lr": 1.8889691875534853e-07, "epoch": 4.10512300588767, "percentage": 82.1, "elapsed_time": "3:37:35", "remaining_time": "0:47:25", "throughput": 8673.62, "total_tokens": 113235048} +{"current_steps": 168040, "total_steps": 204665, "loss": 0.0, "lr": 1.8884704256108163e-07, "epoch": 4.105245156719517, "percentage": 82.1, "elapsed_time": "3:37:35", "remaining_time": "0:47:25", "throughput": 8673.66, "total_tokens": 113238504} +{"current_steps": 168045, "total_steps": 204665, "loss": 0.0, "lr": 1.8879717226573698e-07, "epoch": 4.105367307551364, "percentage": 82.11, "elapsed_time": "3:37:35", "remaining_time": "0:47:25", "throughput": 8673.69, "total_tokens": 113241960} +{"current_steps": 168050, "total_steps": 204665, "loss": 0.0, "lr": 1.8874730786967752e-07, "epoch": 4.105489458383212, "percentage": 82.11, "elapsed_time": "3:37:36", "remaining_time": "0:47:24", "throughput": 8673.71, "total_tokens": 113245288} +{"current_steps": 168055, "total_steps": 204665, "loss": 0.0001, "lr": 1.8869744937326603e-07, "epoch": 4.1056116092150585, "percentage": 82.11, "elapsed_time": "3:37:36", "remaining_time": "0:47:24", "throughput": 8673.73, "total_tokens": 113248616} +{"current_steps": 168060, "total_steps": 204665, "loss": 0.0, "lr": 1.886475967768647e-07, "epoch": 4.105733760046906, "percentage": 82.11, "elapsed_time": "3:37:36", "remaining_time": "0:47:23", "throughput": 8673.83, "total_tokens": 113253160} +{"current_steps": 168065, "total_steps": 204665, "loss": 0.0, "lr": 1.8859775008083646e-07, "epoch": 4.105855910878753, "percentage": 82.12, "elapsed_time": "3:37:37", "remaining_time": "0:47:23", "throughput": 8673.84, "total_tokens": 113256296} +{"current_steps": 168070, "total_steps": 204665, "loss": 0.0, "lr": 1.8854790928554343e-07, "epoch": 4.1059780617106005, "percentage": 82.12, "elapsed_time": "3:37:37", "remaining_time": "0:47:23", "throughput": 8673.87, "total_tokens": 113259624} +{"current_steps": 168075, "total_steps": 204665, "loss": 0.0, "lr": 1.8849807439134847e-07, "epoch": 4.106100212542447, "percentage": 82.12, "elapsed_time": "3:37:37", "remaining_time": "0:47:22", "throughput": 8673.89, "total_tokens": 113262952} +{"current_steps": 168080, "total_steps": 204665, "loss": 0.0, "lr": 1.8844824539861348e-07, "epoch": 4.106222363374295, "percentage": 82.12, "elapsed_time": "3:37:38", "remaining_time": "0:47:22", "throughput": 8673.95, "total_tokens": 113266856} +{"current_steps": 168085, "total_steps": 204665, "loss": 0.0, "lr": 1.883984223077012e-07, "epoch": 4.106344514206142, "percentage": 82.13, "elapsed_time": "3:37:38", "remaining_time": "0:47:21", "throughput": 8673.99, "total_tokens": 113270376} +{"current_steps": 168090, "total_steps": 204665, "loss": 0.0466, "lr": 1.883486051189742e-07, "epoch": 4.106466665037989, "percentage": 82.13, "elapsed_time": "3:37:38", "remaining_time": "0:47:21", "throughput": 8674.0, "total_tokens": 113273512} +{"current_steps": 168095, "total_steps": 204665, "loss": 0.0, "lr": 1.882987938327941e-07, "epoch": 4.106588815869836, "percentage": 82.13, "elapsed_time": "3:37:39", "remaining_time": "0:47:21", "throughput": 8674.04, "total_tokens": 113277096} +{"current_steps": 168100, "total_steps": 204665, "loss": 0.0, "lr": 1.8824898844952374e-07, "epoch": 4.106710966701684, "percentage": 82.13, "elapsed_time": "3:37:39", "remaining_time": "0:47:20", "throughput": 8674.06, "total_tokens": 113280296} +{"current_steps": 168105, "total_steps": 204665, "loss": 0.0, "lr": 1.8819918896952492e-07, "epoch": 4.10683311753353, "percentage": 82.14, "elapsed_time": "3:37:40", "remaining_time": "0:47:20", "throughput": 8674.11, "total_tokens": 113284072} +{"current_steps": 168110, "total_steps": 204665, "loss": 0.0, "lr": 1.8814939539315987e-07, "epoch": 4.106955268365377, "percentage": 82.14, "elapsed_time": "3:37:40", "remaining_time": "0:47:19", "throughput": 8674.13, "total_tokens": 113287400} +{"current_steps": 168115, "total_steps": 204665, "loss": 0.0, "lr": 1.8809960772079103e-07, "epoch": 4.107077419197225, "percentage": 82.14, "elapsed_time": "3:37:40", "remaining_time": "0:47:19", "throughput": 8674.15, "total_tokens": 113290600} +{"current_steps": 168120, "total_steps": 204665, "loss": 0.0, "lr": 1.8804982595277995e-07, "epoch": 4.1071995700290715, "percentage": 82.14, "elapsed_time": "3:37:41", "remaining_time": "0:47:19", "throughput": 8674.16, "total_tokens": 113293672} +{"current_steps": 168125, "total_steps": 204665, "loss": 0.0, "lr": 1.8800005008948928e-07, "epoch": 4.107321720860919, "percentage": 82.15, "elapsed_time": "3:37:41", "remaining_time": "0:47:18", "throughput": 8674.15, "total_tokens": 113296488} +{"current_steps": 168130, "total_steps": 204665, "loss": 0.0, "lr": 1.8795028013128055e-07, "epoch": 4.107443871692766, "percentage": 82.15, "elapsed_time": "3:37:41", "remaining_time": "0:47:18", "throughput": 8674.18, "total_tokens": 113299816} +{"current_steps": 168135, "total_steps": 204665, "loss": 0.0, "lr": 1.8790051607851553e-07, "epoch": 4.107566022524614, "percentage": 82.15, "elapsed_time": "3:37:42", "remaining_time": "0:47:17", "throughput": 8674.21, "total_tokens": 113303208} +{"current_steps": 168140, "total_steps": 204665, "loss": 0.0, "lr": 1.878507579315567e-07, "epoch": 4.10768817335646, "percentage": 82.15, "elapsed_time": "3:37:42", "remaining_time": "0:47:17", "throughput": 8674.23, "total_tokens": 113306600} +{"current_steps": 168145, "total_steps": 204665, "loss": 0.0, "lr": 1.878010056907653e-07, "epoch": 4.107810324188308, "percentage": 82.16, "elapsed_time": "3:37:42", "remaining_time": "0:47:17", "throughput": 8674.27, "total_tokens": 113310120} +{"current_steps": 168150, "total_steps": 204665, "loss": 0.0, "lr": 1.877512593565037e-07, "epoch": 4.107932475020155, "percentage": 82.16, "elapsed_time": "3:37:43", "remaining_time": "0:47:16", "throughput": 8674.31, "total_tokens": 113313768} +{"current_steps": 168155, "total_steps": 204665, "loss": 0.0, "lr": 1.8770151892913322e-07, "epoch": 4.108054625852002, "percentage": 82.16, "elapsed_time": "3:37:43", "remaining_time": "0:47:16", "throughput": 8674.35, "total_tokens": 113317352} +{"current_steps": 168160, "total_steps": 204665, "loss": 0.0003, "lr": 1.8765178440901596e-07, "epoch": 4.108176776683849, "percentage": 82.16, "elapsed_time": "3:37:43", "remaining_time": "0:47:15", "throughput": 8674.38, "total_tokens": 113320808} +{"current_steps": 168165, "total_steps": 204665, "loss": 0.0, "lr": 1.8760205579651323e-07, "epoch": 4.108298927515697, "percentage": 82.17, "elapsed_time": "3:37:44", "remaining_time": "0:47:15", "throughput": 8674.41, "total_tokens": 113324328} +{"current_steps": 168170, "total_steps": 204665, "loss": 0.0, "lr": 1.8755233309198704e-07, "epoch": 4.108421078347543, "percentage": 82.17, "elapsed_time": "3:37:44", "remaining_time": "0:47:15", "throughput": 8674.44, "total_tokens": 113327720} +{"current_steps": 168175, "total_steps": 204665, "loss": 0.0, "lr": 1.8750261629579867e-07, "epoch": 4.108543229179391, "percentage": 82.17, "elapsed_time": "3:37:44", "remaining_time": "0:47:14", "throughput": 8674.45, "total_tokens": 113330792} +{"current_steps": 168180, "total_steps": 204665, "loss": 0.0, "lr": 1.8745290540830972e-07, "epoch": 4.108665380011238, "percentage": 82.17, "elapsed_time": "3:37:45", "remaining_time": "0:47:14", "throughput": 8674.47, "total_tokens": 113333992} +{"current_steps": 168185, "total_steps": 204665, "loss": 0.0, "lr": 1.87403200429882e-07, "epoch": 4.1087875308430855, "percentage": 82.18, "elapsed_time": "3:37:45", "remaining_time": "0:47:13", "throughput": 8674.49, "total_tokens": 113337320} +{"current_steps": 168190, "total_steps": 204665, "loss": 0.0, "lr": 1.8735350136087658e-07, "epoch": 4.108909681674932, "percentage": 82.18, "elapsed_time": "3:37:45", "remaining_time": "0:47:13", "throughput": 8674.52, "total_tokens": 113340776} +{"current_steps": 168195, "total_steps": 204665, "loss": 0.0, "lr": 1.8730380820165537e-07, "epoch": 4.109031832506779, "percentage": 82.18, "elapsed_time": "3:37:46", "remaining_time": "0:47:13", "throughput": 8674.55, "total_tokens": 113344168} +{"current_steps": 168200, "total_steps": 204665, "loss": 0.0, "lr": 1.8725412095257908e-07, "epoch": 4.109153983338627, "percentage": 82.18, "elapsed_time": "3:37:46", "remaining_time": "0:47:12", "throughput": 8674.57, "total_tokens": 113347432} +{"current_steps": 168205, "total_steps": 204665, "loss": 0.0, "lr": 1.8720443961400944e-07, "epoch": 4.109276134170473, "percentage": 82.19, "elapsed_time": "3:37:46", "remaining_time": "0:47:12", "throughput": 8674.6, "total_tokens": 113350952} +{"current_steps": 168210, "total_steps": 204665, "loss": 0.0, "lr": 1.8715476418630805e-07, "epoch": 4.109398285002321, "percentage": 82.19, "elapsed_time": "3:37:47", "remaining_time": "0:47:11", "throughput": 8674.63, "total_tokens": 113354344} +{"current_steps": 168215, "total_steps": 204665, "loss": 0.0, "lr": 1.871050946698356e-07, "epoch": 4.109520435834168, "percentage": 82.19, "elapsed_time": "3:37:47", "remaining_time": "0:47:11", "throughput": 8674.66, "total_tokens": 113357800} +{"current_steps": 168220, "total_steps": 204665, "loss": 0.0, "lr": 1.870554310649538e-07, "epoch": 4.109642586666015, "percentage": 82.19, "elapsed_time": "3:37:48", "remaining_time": "0:47:11", "throughput": 8674.68, "total_tokens": 113361064} +{"current_steps": 168225, "total_steps": 204665, "loss": 0.0, "lr": 1.8700577337202327e-07, "epoch": 4.109764737497862, "percentage": 82.2, "elapsed_time": "3:37:48", "remaining_time": "0:47:10", "throughput": 8674.7, "total_tokens": 113364200} +{"current_steps": 168230, "total_steps": 204665, "loss": 0.0, "lr": 1.8695612159140572e-07, "epoch": 4.10988688832971, "percentage": 82.2, "elapsed_time": "3:37:48", "remaining_time": "0:47:10", "throughput": 8674.73, "total_tokens": 113367720} +{"current_steps": 168235, "total_steps": 204665, "loss": 0.0, "lr": 1.8690647572346185e-07, "epoch": 4.1100090391615565, "percentage": 82.2, "elapsed_time": "3:37:49", "remaining_time": "0:47:10", "throughput": 8674.75, "total_tokens": 113370984} +{"current_steps": 168240, "total_steps": 204665, "loss": 0.0, "lr": 1.868568357685526e-07, "epoch": 4.110131189993404, "percentage": 82.2, "elapsed_time": "3:37:49", "remaining_time": "0:47:09", "throughput": 8674.75, "total_tokens": 113373864} +{"current_steps": 168245, "total_steps": 204665, "loss": 0.0, "lr": 1.8680720172703946e-07, "epoch": 4.110253340825251, "percentage": 82.21, "elapsed_time": "3:37:49", "remaining_time": "0:47:09", "throughput": 8674.75, "total_tokens": 113376872} +{"current_steps": 168250, "total_steps": 204665, "loss": 0.0, "lr": 1.867575735992827e-07, "epoch": 4.1103754916570985, "percentage": 82.21, "elapsed_time": "3:37:50", "remaining_time": "0:47:08", "throughput": 8674.81, "total_tokens": 113380840} +{"current_steps": 168255, "total_steps": 204665, "loss": 0.0, "lr": 1.8670795138564387e-07, "epoch": 4.110497642488945, "percentage": 82.21, "elapsed_time": "3:37:50", "remaining_time": "0:47:08", "throughput": 8674.83, "total_tokens": 113384040} +{"current_steps": 168260, "total_steps": 204665, "loss": 0.0, "lr": 1.8665833508648344e-07, "epoch": 4.110619793320793, "percentage": 82.21, "elapsed_time": "3:37:50", "remaining_time": "0:47:08", "throughput": 8674.88, "total_tokens": 113387752} +{"current_steps": 168265, "total_steps": 204665, "loss": 0.0, "lr": 1.8660872470216215e-07, "epoch": 4.11074194415264, "percentage": 82.21, "elapsed_time": "3:37:51", "remaining_time": "0:47:07", "throughput": 8674.91, "total_tokens": 113391208} +{"current_steps": 168270, "total_steps": 204665, "loss": 0.0, "lr": 1.8655912023304143e-07, "epoch": 4.110864094984487, "percentage": 82.22, "elapsed_time": "3:37:51", "remaining_time": "0:47:07", "throughput": 8675.0, "total_tokens": 113395560} +{"current_steps": 168275, "total_steps": 204665, "loss": 0.0001, "lr": 1.865095216794812e-07, "epoch": 4.110986245816334, "percentage": 82.22, "elapsed_time": "3:37:51", "remaining_time": "0:47:06", "throughput": 8675.02, "total_tokens": 113398888} +{"current_steps": 168280, "total_steps": 204665, "loss": 0.0, "lr": 1.8645992904184282e-07, "epoch": 4.111108396648181, "percentage": 82.22, "elapsed_time": "3:37:52", "remaining_time": "0:47:06", "throughput": 8675.03, "total_tokens": 113401960} +{"current_steps": 168285, "total_steps": 204665, "loss": 0.0, "lr": 1.8641034232048647e-07, "epoch": 4.111230547480028, "percentage": 82.22, "elapsed_time": "3:37:52", "remaining_time": "0:47:06", "throughput": 8675.08, "total_tokens": 113405736} +{"current_steps": 168290, "total_steps": 204665, "loss": 0.0, "lr": 1.8636076151577317e-07, "epoch": 4.111352698311875, "percentage": 82.23, "elapsed_time": "3:37:52", "remaining_time": "0:47:05", "throughput": 8675.12, "total_tokens": 113409384} +{"current_steps": 168295, "total_steps": 204665, "loss": 0.0, "lr": 1.8631118662806288e-07, "epoch": 4.111474849143723, "percentage": 82.23, "elapsed_time": "3:37:53", "remaining_time": "0:47:05", "throughput": 8675.18, "total_tokens": 113413224} +{"current_steps": 168300, "total_steps": 204665, "loss": 0.0, "lr": 1.8626161765771665e-07, "epoch": 4.1115969999755695, "percentage": 82.23, "elapsed_time": "3:37:53", "remaining_time": "0:47:04", "throughput": 8675.2, "total_tokens": 113416488} +{"current_steps": 168305, "total_steps": 204665, "loss": 0.0, "lr": 1.8621205460509504e-07, "epoch": 4.111719150807417, "percentage": 82.23, "elapsed_time": "3:37:53", "remaining_time": "0:47:04", "throughput": 8675.22, "total_tokens": 113419688} +{"current_steps": 168310, "total_steps": 204665, "loss": 0.0, "lr": 1.861624974705579e-07, "epoch": 4.111841301639264, "percentage": 82.24, "elapsed_time": "3:37:54", "remaining_time": "0:47:04", "throughput": 8675.27, "total_tokens": 113423528} +{"current_steps": 168315, "total_steps": 204665, "loss": 0.0, "lr": 1.8611294625446628e-07, "epoch": 4.1119634524711115, "percentage": 82.24, "elapsed_time": "3:37:54", "remaining_time": "0:47:03", "throughput": 8675.29, "total_tokens": 113426792} +{"current_steps": 168320, "total_steps": 204665, "loss": 0.0, "lr": 1.8606340095717999e-07, "epoch": 4.112085603302958, "percentage": 82.24, "elapsed_time": "3:37:55", "remaining_time": "0:47:03", "throughput": 8675.33, "total_tokens": 113430312} +{"current_steps": 168325, "total_steps": 204665, "loss": 0.0, "lr": 1.8601386157905974e-07, "epoch": 4.112207754134806, "percentage": 82.24, "elapsed_time": "3:37:55", "remaining_time": "0:47:02", "throughput": 8675.38, "total_tokens": 113434024} +{"current_steps": 168330, "total_steps": 204665, "loss": 0.0, "lr": 1.8596432812046548e-07, "epoch": 4.112329904966653, "percentage": 82.25, "elapsed_time": "3:37:55", "remaining_time": "0:47:02", "throughput": 8675.43, "total_tokens": 113437736} +{"current_steps": 168335, "total_steps": 204665, "loss": 0.0, "lr": 1.859148005817578e-07, "epoch": 4.1124520557985, "percentage": 82.25, "elapsed_time": "3:37:56", "remaining_time": "0:47:02", "throughput": 8675.44, "total_tokens": 113441000} +{"current_steps": 168340, "total_steps": 204665, "loss": 0.0, "lr": 1.858652789632964e-07, "epoch": 4.112574206630347, "percentage": 82.25, "elapsed_time": "3:37:56", "remaining_time": "0:47:01", "throughput": 8675.44, "total_tokens": 113443816} +{"current_steps": 168345, "total_steps": 204665, "loss": 0.0, "lr": 1.858157632654419e-07, "epoch": 4.112696357462195, "percentage": 82.25, "elapsed_time": "3:37:56", "remaining_time": "0:47:01", "throughput": 8675.45, "total_tokens": 113446952} +{"current_steps": 168350, "total_steps": 204665, "loss": 0.0, "lr": 1.8576625348855411e-07, "epoch": 4.112818508294041, "percentage": 82.26, "elapsed_time": "3:37:57", "remaining_time": "0:47:00", "throughput": 8675.46, "total_tokens": 113450088} +{"current_steps": 168355, "total_steps": 204665, "loss": 0.0, "lr": 1.85716749632993e-07, "epoch": 4.112940659125889, "percentage": 82.26, "elapsed_time": "3:37:57", "remaining_time": "0:47:00", "throughput": 8675.49, "total_tokens": 113453416} +{"current_steps": 168360, "total_steps": 204665, "loss": 0.0, "lr": 1.8566725169911858e-07, "epoch": 4.113062809957736, "percentage": 82.26, "elapsed_time": "3:37:57", "remaining_time": "0:47:00", "throughput": 8675.5, "total_tokens": 113456552} +{"current_steps": 168365, "total_steps": 204665, "loss": 0.0, "lr": 1.856177596872913e-07, "epoch": 4.113184960789583, "percentage": 82.26, "elapsed_time": "3:37:58", "remaining_time": "0:46:59", "throughput": 8675.52, "total_tokens": 113459880} +{"current_steps": 168370, "total_steps": 204665, "loss": 0.0, "lr": 1.8556827359787042e-07, "epoch": 4.11330711162143, "percentage": 82.27, "elapsed_time": "3:37:58", "remaining_time": "0:46:59", "throughput": 8675.55, "total_tokens": 113463208} +{"current_steps": 168375, "total_steps": 204665, "loss": 0.0, "lr": 1.8551879343121635e-07, "epoch": 4.113429262453277, "percentage": 82.27, "elapsed_time": "3:37:58", "remaining_time": "0:46:58", "throughput": 8675.56, "total_tokens": 113466344} +{"current_steps": 168380, "total_steps": 204665, "loss": 0.0, "lr": 1.854693191876884e-07, "epoch": 4.1135514132851245, "percentage": 82.27, "elapsed_time": "3:37:59", "remaining_time": "0:46:58", "throughput": 8675.6, "total_tokens": 113469928} +{"current_steps": 168385, "total_steps": 204665, "loss": 0.0, "lr": 1.8541985086764688e-07, "epoch": 4.113673564116971, "percentage": 82.27, "elapsed_time": "3:37:59", "remaining_time": "0:46:58", "throughput": 8675.63, "total_tokens": 113473384} +{"current_steps": 168390, "total_steps": 204665, "loss": 0.0, "lr": 1.8537038847145116e-07, "epoch": 4.113795714948819, "percentage": 82.28, "elapsed_time": "3:37:59", "remaining_time": "0:46:57", "throughput": 8675.66, "total_tokens": 113476840} +{"current_steps": 168395, "total_steps": 204665, "loss": 0.0, "lr": 1.8532093199946098e-07, "epoch": 4.113917865780666, "percentage": 82.28, "elapsed_time": "3:38:00", "remaining_time": "0:46:57", "throughput": 8675.69, "total_tokens": 113480232} +{"current_steps": 168400, "total_steps": 204665, "loss": 0.0, "lr": 1.852714814520364e-07, "epoch": 4.114040016612513, "percentage": 82.28, "elapsed_time": "3:38:00", "remaining_time": "0:46:56", "throughput": 8675.7, "total_tokens": 113483304} +{"current_steps": 168405, "total_steps": 204665, "loss": 0.0, "lr": 1.852220368295364e-07, "epoch": 4.11416216744436, "percentage": 82.28, "elapsed_time": "3:38:00", "remaining_time": "0:46:56", "throughput": 8675.71, "total_tokens": 113486440} +{"current_steps": 168410, "total_steps": 204665, "loss": 0.0, "lr": 1.8517259813232122e-07, "epoch": 4.114284318276208, "percentage": 82.29, "elapsed_time": "3:38:01", "remaining_time": "0:46:56", "throughput": 8675.75, "total_tokens": 113489960} +{"current_steps": 168415, "total_steps": 204665, "loss": 0.0, "lr": 1.851231653607499e-07, "epoch": 4.114406469108054, "percentage": 82.29, "elapsed_time": "3:38:01", "remaining_time": "0:46:55", "throughput": 8675.78, "total_tokens": 113493416} +{"current_steps": 168420, "total_steps": 204665, "loss": 0.0, "lr": 1.8507373851518204e-07, "epoch": 4.114528619939902, "percentage": 82.29, "elapsed_time": "3:38:01", "remaining_time": "0:46:55", "throughput": 8675.79, "total_tokens": 113496488} +{"current_steps": 168425, "total_steps": 204665, "loss": 0.0002, "lr": 1.8502431759597737e-07, "epoch": 4.114650770771749, "percentage": 82.29, "elapsed_time": "3:38:02", "remaining_time": "0:46:54", "throughput": 8675.79, "total_tokens": 113499560} +{"current_steps": 168430, "total_steps": 204665, "loss": 0.0, "lr": 1.849749026034948e-07, "epoch": 4.114772921603596, "percentage": 82.3, "elapsed_time": "3:38:02", "remaining_time": "0:46:54", "throughput": 8675.81, "total_tokens": 113502696} +{"current_steps": 168435, "total_steps": 204665, "loss": 0.0, "lr": 1.8492549353809416e-07, "epoch": 4.114895072435443, "percentage": 82.3, "elapsed_time": "3:38:03", "remaining_time": "0:46:54", "throughput": 8675.84, "total_tokens": 113506216} +{"current_steps": 168440, "total_steps": 204665, "loss": 0.0, "lr": 1.8487609040013463e-07, "epoch": 4.115017223267291, "percentage": 82.3, "elapsed_time": "3:38:03", "remaining_time": "0:46:53", "throughput": 8675.87, "total_tokens": 113509736} +{"current_steps": 168445, "total_steps": 204665, "loss": 0.0, "lr": 1.8482669318997524e-07, "epoch": 4.1151393740991375, "percentage": 82.3, "elapsed_time": "3:38:03", "remaining_time": "0:46:53", "throughput": 8675.93, "total_tokens": 113513512} +{"current_steps": 168450, "total_steps": 204665, "loss": 0.0, "lr": 1.8477730190797548e-07, "epoch": 4.115261524930985, "percentage": 82.31, "elapsed_time": "3:38:04", "remaining_time": "0:46:52", "throughput": 8675.94, "total_tokens": 113516648} +{"current_steps": 168455, "total_steps": 204665, "loss": 0.0, "lr": 1.8472791655449426e-07, "epoch": 4.115383675762832, "percentage": 82.31, "elapsed_time": "3:38:04", "remaining_time": "0:46:52", "throughput": 8676.1, "total_tokens": 113522280} +{"current_steps": 168460, "total_steps": 204665, "loss": 0.0, "lr": 1.8467853712989123e-07, "epoch": 4.115505826594679, "percentage": 82.31, "elapsed_time": "3:38:04", "remaining_time": "0:46:52", "throughput": 8676.1, "total_tokens": 113525160} +{"current_steps": 168465, "total_steps": 204665, "loss": 0.0, "lr": 1.8462916363452486e-07, "epoch": 4.115627977426526, "percentage": 82.31, "elapsed_time": "3:38:05", "remaining_time": "0:46:51", "throughput": 8676.15, "total_tokens": 113528936} +{"current_steps": 168470, "total_steps": 204665, "loss": 0.0001, "lr": 1.8457979606875483e-07, "epoch": 4.115750128258373, "percentage": 82.32, "elapsed_time": "3:38:05", "remaining_time": "0:46:51", "throughput": 8676.15, "total_tokens": 113531880} +{"current_steps": 168475, "total_steps": 204665, "loss": 0.0, "lr": 1.8453043443293946e-07, "epoch": 4.115872279090221, "percentage": 82.32, "elapsed_time": "3:38:05", "remaining_time": "0:46:50", "throughput": 8676.18, "total_tokens": 113535272} +{"current_steps": 168480, "total_steps": 204665, "loss": 0.0, "lr": 1.8448107872743855e-07, "epoch": 4.115994429922067, "percentage": 82.32, "elapsed_time": "3:38:06", "remaining_time": "0:46:50", "throughput": 8676.19, "total_tokens": 113538408} +{"current_steps": 168485, "total_steps": 204665, "loss": 0.0, "lr": 1.8443172895261016e-07, "epoch": 4.116116580753915, "percentage": 82.32, "elapsed_time": "3:38:06", "remaining_time": "0:46:50", "throughput": 8676.22, "total_tokens": 113541928} +{"current_steps": 168490, "total_steps": 204665, "loss": 0.0001, "lr": 1.8438238510881365e-07, "epoch": 4.116238731585762, "percentage": 82.32, "elapsed_time": "3:38:06", "remaining_time": "0:46:49", "throughput": 8676.23, "total_tokens": 113545000} +{"current_steps": 168495, "total_steps": 204665, "loss": 0.0, "lr": 1.8433304719640807e-07, "epoch": 4.1163608824176094, "percentage": 82.33, "elapsed_time": "3:38:07", "remaining_time": "0:46:49", "throughput": 8676.25, "total_tokens": 113548264} +{"current_steps": 168500, "total_steps": 204665, "loss": 0.0, "lr": 1.8428371521575182e-07, "epoch": 4.116483033249456, "percentage": 82.33, "elapsed_time": "3:38:07", "remaining_time": "0:46:48", "throughput": 8676.31, "total_tokens": 113552104} +{"current_steps": 168505, "total_steps": 204665, "loss": 0.0, "lr": 1.842343891672039e-07, "epoch": 4.116605184081304, "percentage": 82.33, "elapsed_time": "3:38:07", "remaining_time": "0:46:48", "throughput": 8676.31, "total_tokens": 113555176} +{"current_steps": 168510, "total_steps": 204665, "loss": 0.0, "lr": 1.8418506905112274e-07, "epoch": 4.116727334913151, "percentage": 82.33, "elapsed_time": "3:38:08", "remaining_time": "0:46:48", "throughput": 8676.36, "total_tokens": 113558824} +{"current_steps": 168515, "total_steps": 204665, "loss": 0.0, "lr": 1.8413575486786713e-07, "epoch": 4.116849485744998, "percentage": 82.34, "elapsed_time": "3:38:08", "remaining_time": "0:46:47", "throughput": 8676.37, "total_tokens": 113562024} +{"current_steps": 168520, "total_steps": 204665, "loss": 0.0, "lr": 1.8408644661779605e-07, "epoch": 4.116971636576845, "percentage": 82.34, "elapsed_time": "3:38:09", "remaining_time": "0:46:47", "throughput": 8676.41, "total_tokens": 113565544} +{"current_steps": 168525, "total_steps": 204665, "loss": 0.0001, "lr": 1.8403714430126748e-07, "epoch": 4.117093787408693, "percentage": 82.34, "elapsed_time": "3:38:09", "remaining_time": "0:46:46", "throughput": 8676.44, "total_tokens": 113568936} +{"current_steps": 168530, "total_steps": 204665, "loss": 0.0, "lr": 1.8398784791864052e-07, "epoch": 4.117215938240539, "percentage": 82.34, "elapsed_time": "3:38:09", "remaining_time": "0:46:46", "throughput": 8676.48, "total_tokens": 113572584} +{"current_steps": 168535, "total_steps": 204665, "loss": 0.0, "lr": 1.839385574702732e-07, "epoch": 4.117338089072387, "percentage": 82.35, "elapsed_time": "3:38:10", "remaining_time": "0:46:46", "throughput": 8676.49, "total_tokens": 113575720} +{"current_steps": 168540, "total_steps": 204665, "loss": 0.0, "lr": 1.8388927295652444e-07, "epoch": 4.117460239904234, "percentage": 82.35, "elapsed_time": "3:38:10", "remaining_time": "0:46:45", "throughput": 8676.53, "total_tokens": 113579368} +{"current_steps": 168545, "total_steps": 204665, "loss": 0.0, "lr": 1.838399943777521e-07, "epoch": 4.117582390736081, "percentage": 82.35, "elapsed_time": "3:38:10", "remaining_time": "0:46:45", "throughput": 8676.57, "total_tokens": 113582824} +{"current_steps": 168550, "total_steps": 204665, "loss": 0.0, "lr": 1.837907217343151e-07, "epoch": 4.117704541567928, "percentage": 82.35, "elapsed_time": "3:38:11", "remaining_time": "0:46:45", "throughput": 8676.6, "total_tokens": 113586344} +{"current_steps": 168555, "total_steps": 204665, "loss": 0.0, "lr": 1.8374145502657157e-07, "epoch": 4.117826692399775, "percentage": 82.36, "elapsed_time": "3:38:11", "remaining_time": "0:46:44", "throughput": 8676.62, "total_tokens": 113589608} +{"current_steps": 168560, "total_steps": 204665, "loss": 0.0, "lr": 1.8369219425487935e-07, "epoch": 4.1179488432316225, "percentage": 82.36, "elapsed_time": "3:38:11", "remaining_time": "0:46:44", "throughput": 8676.68, "total_tokens": 113593576} +{"current_steps": 168565, "total_steps": 204665, "loss": 0.0, "lr": 1.836429394195974e-07, "epoch": 4.118070994063469, "percentage": 82.36, "elapsed_time": "3:38:12", "remaining_time": "0:46:43", "throughput": 8676.72, "total_tokens": 113597160} +{"current_steps": 168570, "total_steps": 204665, "loss": 0.0, "lr": 1.8359369052108332e-07, "epoch": 4.118193144895317, "percentage": 82.36, "elapsed_time": "3:38:12", "remaining_time": "0:46:43", "throughput": 8676.79, "total_tokens": 113601128} +{"current_steps": 168575, "total_steps": 204665, "loss": 0.0, "lr": 1.8354444755969579e-07, "epoch": 4.118315295727164, "percentage": 82.37, "elapsed_time": "3:38:12", "remaining_time": "0:46:43", "throughput": 8676.81, "total_tokens": 113604456} +{"current_steps": 168580, "total_steps": 204665, "loss": 0.0, "lr": 1.8349521053579232e-07, "epoch": 4.118437446559011, "percentage": 82.37, "elapsed_time": "3:38:13", "remaining_time": "0:46:42", "throughput": 8676.85, "total_tokens": 113607976} +{"current_steps": 168585, "total_steps": 204665, "loss": 0.0, "lr": 1.8344597944973129e-07, "epoch": 4.118559597390858, "percentage": 82.37, "elapsed_time": "3:38:13", "remaining_time": "0:46:42", "throughput": 8676.91, "total_tokens": 113612008} +{"current_steps": 168590, "total_steps": 204665, "loss": 0.0, "lr": 1.8339675430187097e-07, "epoch": 4.118681748222706, "percentage": 82.37, "elapsed_time": "3:38:13", "remaining_time": "0:46:41", "throughput": 8676.93, "total_tokens": 113615272} +{"current_steps": 168595, "total_steps": 204665, "loss": 0.0, "lr": 1.8334753509256883e-07, "epoch": 4.118803899054552, "percentage": 82.38, "elapsed_time": "3:38:14", "remaining_time": "0:46:41", "throughput": 8676.95, "total_tokens": 113618408} +{"current_steps": 168600, "total_steps": 204665, "loss": 0.0, "lr": 1.8329832182218341e-07, "epoch": 4.1189260498864, "percentage": 82.38, "elapsed_time": "3:38:14", "remaining_time": "0:46:41", "throughput": 8676.97, "total_tokens": 113621800} +{"current_steps": 168605, "total_steps": 204665, "loss": 0.0, "lr": 1.8324911449107195e-07, "epoch": 4.119048200718247, "percentage": 82.38, "elapsed_time": "3:38:14", "remaining_time": "0:46:40", "throughput": 8677.02, "total_tokens": 113625448} +{"current_steps": 168610, "total_steps": 204665, "loss": 0.0, "lr": 1.831999130995926e-07, "epoch": 4.119170351550094, "percentage": 82.38, "elapsed_time": "3:38:15", "remaining_time": "0:46:40", "throughput": 8677.04, "total_tokens": 113628840} +{"current_steps": 168615, "total_steps": 204665, "loss": 0.0, "lr": 1.8315071764810342e-07, "epoch": 4.119292502381941, "percentage": 82.39, "elapsed_time": "3:38:15", "remaining_time": "0:46:39", "throughput": 8677.04, "total_tokens": 113631720} +{"current_steps": 168620, "total_steps": 204665, "loss": 0.0, "lr": 1.8310152813696166e-07, "epoch": 4.119414653213789, "percentage": 82.39, "elapsed_time": "3:38:16", "remaining_time": "0:46:39", "throughput": 8677.04, "total_tokens": 113634664} +{"current_steps": 168625, "total_steps": 204665, "loss": 0.0003, "lr": 1.8305234456652562e-07, "epoch": 4.1195368040456355, "percentage": 82.39, "elapsed_time": "3:38:16", "remaining_time": "0:46:39", "throughput": 8677.11, "total_tokens": 113638824} +{"current_steps": 168630, "total_steps": 204665, "loss": 0.0, "lr": 1.8300316693715234e-07, "epoch": 4.119658954877483, "percentage": 82.39, "elapsed_time": "3:38:16", "remaining_time": "0:46:38", "throughput": 8677.14, "total_tokens": 113642216} +{"current_steps": 168635, "total_steps": 204665, "loss": 0.0, "lr": 1.8295399524920008e-07, "epoch": 4.11978110570933, "percentage": 82.4, "elapsed_time": "3:38:17", "remaining_time": "0:46:38", "throughput": 8677.16, "total_tokens": 113645480} +{"current_steps": 168640, "total_steps": 204665, "loss": 0.0, "lr": 1.829048295030259e-07, "epoch": 4.119903256541177, "percentage": 82.4, "elapsed_time": "3:38:17", "remaining_time": "0:46:37", "throughput": 8677.19, "total_tokens": 113649000} +{"current_steps": 168645, "total_steps": 204665, "loss": 0.0, "lr": 1.828556696989878e-07, "epoch": 4.120025407373024, "percentage": 82.4, "elapsed_time": "3:38:17", "remaining_time": "0:46:37", "throughput": 8677.21, "total_tokens": 113652200} +{"current_steps": 168650, "total_steps": 204665, "loss": 0.0477, "lr": 1.8280651583744278e-07, "epoch": 4.120147558204871, "percentage": 82.4, "elapsed_time": "3:38:18", "remaining_time": "0:46:37", "throughput": 8677.22, "total_tokens": 113655336} +{"current_steps": 168655, "total_steps": 204665, "loss": 0.0, "lr": 1.8275736791874885e-07, "epoch": 4.120269709036719, "percentage": 82.41, "elapsed_time": "3:38:18", "remaining_time": "0:46:36", "throughput": 8677.25, "total_tokens": 113658728} +{"current_steps": 168660, "total_steps": 204665, "loss": 0.0, "lr": 1.8270822594326308e-07, "epoch": 4.120391859868565, "percentage": 82.41, "elapsed_time": "3:38:18", "remaining_time": "0:46:36", "throughput": 8677.26, "total_tokens": 113661928} +{"current_steps": 168665, "total_steps": 204665, "loss": 0.0, "lr": 1.826590899113427e-07, "epoch": 4.120514010700413, "percentage": 82.41, "elapsed_time": "3:38:19", "remaining_time": "0:46:35", "throughput": 8677.28, "total_tokens": 113665128} +{"current_steps": 168670, "total_steps": 204665, "loss": 0.0, "lr": 1.8260995982334538e-07, "epoch": 4.12063616153226, "percentage": 82.41, "elapsed_time": "3:38:19", "remaining_time": "0:46:35", "throughput": 8677.3, "total_tokens": 113668456} +{"current_steps": 168675, "total_steps": 204665, "loss": 0.0, "lr": 1.8256083567962843e-07, "epoch": 4.120758312364107, "percentage": 82.42, "elapsed_time": "3:38:19", "remaining_time": "0:46:35", "throughput": 8677.29, "total_tokens": 113671272} +{"current_steps": 168680, "total_steps": 204665, "loss": 0.0, "lr": 1.825117174805486e-07, "epoch": 4.120880463195954, "percentage": 82.42, "elapsed_time": "3:38:20", "remaining_time": "0:46:34", "throughput": 8677.34, "total_tokens": 113674920} +{"current_steps": 168685, "total_steps": 204665, "loss": 0.0, "lr": 1.8246260522646385e-07, "epoch": 4.121002614027802, "percentage": 82.42, "elapsed_time": "3:38:20", "remaining_time": "0:46:34", "throughput": 8677.34, "total_tokens": 113677928} +{"current_steps": 168690, "total_steps": 204665, "loss": 0.0, "lr": 1.8241349891773062e-07, "epoch": 4.1211247648596485, "percentage": 82.42, "elapsed_time": "3:38:20", "remaining_time": "0:46:33", "throughput": 8677.35, "total_tokens": 113681064} +{"current_steps": 168695, "total_steps": 204665, "loss": 0.0, "lr": 1.8236439855470654e-07, "epoch": 4.121246915691496, "percentage": 82.42, "elapsed_time": "3:38:21", "remaining_time": "0:46:33", "throughput": 8677.36, "total_tokens": 113684264} +{"current_steps": 168700, "total_steps": 204665, "loss": 0.0, "lr": 1.8231530413774833e-07, "epoch": 4.121369066523343, "percentage": 82.43, "elapsed_time": "3:38:21", "remaining_time": "0:46:33", "throughput": 8677.37, "total_tokens": 113687400} +{"current_steps": 168705, "total_steps": 204665, "loss": 0.0, "lr": 1.82266215667213e-07, "epoch": 4.1214912173551905, "percentage": 82.43, "elapsed_time": "3:38:21", "remaining_time": "0:46:32", "throughput": 8677.41, "total_tokens": 113690920} +{"current_steps": 168710, "total_steps": 204665, "loss": 0.0001, "lr": 1.822171331434581e-07, "epoch": 4.121613368187037, "percentage": 82.43, "elapsed_time": "3:38:22", "remaining_time": "0:46:32", "throughput": 8677.44, "total_tokens": 113694376} +{"current_steps": 168715, "total_steps": 204665, "loss": 0.0, "lr": 1.8216805656683986e-07, "epoch": 4.121735519018885, "percentage": 82.43, "elapsed_time": "3:38:22", "remaining_time": "0:46:31", "throughput": 8677.46, "total_tokens": 113697576} +{"current_steps": 168720, "total_steps": 204665, "loss": 0.0, "lr": 1.8211898593771568e-07, "epoch": 4.121857669850732, "percentage": 82.44, "elapsed_time": "3:38:22", "remaining_time": "0:46:31", "throughput": 8677.51, "total_tokens": 113701352} +{"current_steps": 168725, "total_steps": 204665, "loss": 0.0001, "lr": 1.8206992125644205e-07, "epoch": 4.121979820682579, "percentage": 82.44, "elapsed_time": "3:38:23", "remaining_time": "0:46:31", "throughput": 8677.53, "total_tokens": 113704616} +{"current_steps": 168730, "total_steps": 204665, "loss": 0.0, "lr": 1.8202086252337611e-07, "epoch": 4.122101971514426, "percentage": 82.44, "elapsed_time": "3:38:23", "remaining_time": "0:46:30", "throughput": 8677.54, "total_tokens": 113707752} +{"current_steps": 168735, "total_steps": 204665, "loss": 0.0, "lr": 1.8197180973887428e-07, "epoch": 4.122224122346273, "percentage": 82.44, "elapsed_time": "3:38:24", "remaining_time": "0:46:30", "throughput": 8677.56, "total_tokens": 113711080} +{"current_steps": 168740, "total_steps": 204665, "loss": 0.0, "lr": 1.8192276290329345e-07, "epoch": 4.12234627317812, "percentage": 82.45, "elapsed_time": "3:38:24", "remaining_time": "0:46:29", "throughput": 8677.58, "total_tokens": 113714344} +{"current_steps": 168745, "total_steps": 204665, "loss": 0.0, "lr": 1.8187372201699058e-07, "epoch": 4.122468424009967, "percentage": 82.45, "elapsed_time": "3:38:24", "remaining_time": "0:46:29", "throughput": 8677.6, "total_tokens": 113717544} +{"current_steps": 168750, "total_steps": 204665, "loss": 0.0, "lr": 1.8182468708032205e-07, "epoch": 4.122590574841815, "percentage": 82.45, "elapsed_time": "3:38:25", "remaining_time": "0:46:29", "throughput": 8677.63, "total_tokens": 113721000} +{"current_steps": 168755, "total_steps": 204665, "loss": 0.0, "lr": 1.8177565809364426e-07, "epoch": 4.1227127256736615, "percentage": 82.45, "elapsed_time": "3:38:25", "remaining_time": "0:46:28", "throughput": 8677.68, "total_tokens": 113724712} +{"current_steps": 168760, "total_steps": 204665, "loss": 0.0, "lr": 1.817266350573141e-07, "epoch": 4.122834876505509, "percentage": 82.46, "elapsed_time": "3:38:25", "remaining_time": "0:46:28", "throughput": 8677.73, "total_tokens": 113728552} +{"current_steps": 168765, "total_steps": 204665, "loss": 0.0, "lr": 1.816776179716878e-07, "epoch": 4.122957027337356, "percentage": 82.46, "elapsed_time": "3:38:26", "remaining_time": "0:46:27", "throughput": 8677.77, "total_tokens": 113732136} +{"current_steps": 168770, "total_steps": 204665, "loss": 0.0, "lr": 1.8162860683712212e-07, "epoch": 4.1230791781692036, "percentage": 82.46, "elapsed_time": "3:38:26", "remaining_time": "0:46:27", "throughput": 8677.78, "total_tokens": 113735272} +{"current_steps": 168775, "total_steps": 204665, "loss": 0.0, "lr": 1.8157960165397312e-07, "epoch": 4.12320132900105, "percentage": 82.46, "elapsed_time": "3:38:26", "remaining_time": "0:46:27", "throughput": 8677.82, "total_tokens": 113738728} +{"current_steps": 168780, "total_steps": 204665, "loss": 0.0021, "lr": 1.8153060242259765e-07, "epoch": 4.123323479832898, "percentage": 82.47, "elapsed_time": "3:38:27", "remaining_time": "0:46:26", "throughput": 8677.84, "total_tokens": 113742056} +{"current_steps": 168785, "total_steps": 204665, "loss": 0.0, "lr": 1.8148160914335153e-07, "epoch": 4.123445630664745, "percentage": 82.47, "elapsed_time": "3:38:27", "remaining_time": "0:46:26", "throughput": 8677.88, "total_tokens": 113745640} +{"current_steps": 168790, "total_steps": 204665, "loss": 0.0, "lr": 1.8143262181659157e-07, "epoch": 4.123567781496592, "percentage": 82.47, "elapsed_time": "3:38:27", "remaining_time": "0:46:25", "throughput": 8677.92, "total_tokens": 113749160} +{"current_steps": 168795, "total_steps": 204665, "loss": 0.0, "lr": 1.813836404426734e-07, "epoch": 4.123689932328439, "percentage": 82.47, "elapsed_time": "3:38:28", "remaining_time": "0:46:25", "throughput": 8677.96, "total_tokens": 113752808} +{"current_steps": 168800, "total_steps": 204665, "loss": 0.0, "lr": 1.8133466502195372e-07, "epoch": 4.123812083160287, "percentage": 82.48, "elapsed_time": "3:38:28", "remaining_time": "0:46:25", "throughput": 8677.97, "total_tokens": 113756008} +{"current_steps": 168805, "total_steps": 204665, "loss": 0.0, "lr": 1.8128569555478867e-07, "epoch": 4.123934233992133, "percentage": 82.48, "elapsed_time": "3:38:28", "remaining_time": "0:46:24", "throughput": 8677.97, "total_tokens": 113758952} +{"current_steps": 168810, "total_steps": 204665, "loss": 0.0, "lr": 1.812367320415341e-07, "epoch": 4.124056384823981, "percentage": 82.48, "elapsed_time": "3:38:29", "remaining_time": "0:46:24", "throughput": 8677.99, "total_tokens": 113762280} +{"current_steps": 168815, "total_steps": 204665, "loss": 0.0, "lr": 1.811877744825464e-07, "epoch": 4.124178535655828, "percentage": 82.48, "elapsed_time": "3:38:29", "remaining_time": "0:46:23", "throughput": 8678.03, "total_tokens": 113765736} +{"current_steps": 168820, "total_steps": 204665, "loss": 0.0, "lr": 1.8113882287818127e-07, "epoch": 4.124300686487675, "percentage": 82.49, "elapsed_time": "3:38:29", "remaining_time": "0:46:23", "throughput": 8678.05, "total_tokens": 113769128} +{"current_steps": 168825, "total_steps": 204665, "loss": 0.0, "lr": 1.8108987722879487e-07, "epoch": 4.124422837319522, "percentage": 82.49, "elapsed_time": "3:38:30", "remaining_time": "0:46:23", "throughput": 8678.12, "total_tokens": 113773160} +{"current_steps": 168830, "total_steps": 204665, "loss": 0.0, "lr": 1.8104093753474336e-07, "epoch": 4.124544988151369, "percentage": 82.49, "elapsed_time": "3:38:30", "remaining_time": "0:46:22", "throughput": 8678.15, "total_tokens": 113776552} +{"current_steps": 168835, "total_steps": 204665, "loss": 0.0, "lr": 1.8099200379638212e-07, "epoch": 4.124667138983217, "percentage": 82.49, "elapsed_time": "3:38:31", "remaining_time": "0:46:22", "throughput": 8678.18, "total_tokens": 113779944} +{"current_steps": 168840, "total_steps": 204665, "loss": 0.0028, "lr": 1.8094307601406767e-07, "epoch": 4.124789289815063, "percentage": 82.5, "elapsed_time": "3:38:31", "remaining_time": "0:46:22", "throughput": 8678.22, "total_tokens": 113783592} +{"current_steps": 168845, "total_steps": 204665, "loss": 0.0, "lr": 1.8089415418815512e-07, "epoch": 4.124911440646911, "percentage": 82.5, "elapsed_time": "3:38:31", "remaining_time": "0:46:21", "throughput": 8678.26, "total_tokens": 113787240} +{"current_steps": 168850, "total_steps": 204665, "loss": 0.0, "lr": 1.8084523831900089e-07, "epoch": 4.125033591478758, "percentage": 82.5, "elapsed_time": "3:38:32", "remaining_time": "0:46:21", "throughput": 8678.31, "total_tokens": 113790888} +{"current_steps": 168855, "total_steps": 204665, "loss": 0.0, "lr": 1.8079632840696023e-07, "epoch": 4.125155742310605, "percentage": 82.5, "elapsed_time": "3:38:32", "remaining_time": "0:46:20", "throughput": 8678.34, "total_tokens": 113794280} +{"current_steps": 168860, "total_steps": 204665, "loss": 0.0, "lr": 1.807474244523891e-07, "epoch": 4.125277893142452, "percentage": 82.51, "elapsed_time": "3:38:32", "remaining_time": "0:46:20", "throughput": 8678.35, "total_tokens": 113797480} +{"current_steps": 168865, "total_steps": 204665, "loss": 0.0, "lr": 1.806985264556432e-07, "epoch": 4.1254000439743, "percentage": 82.51, "elapsed_time": "3:38:33", "remaining_time": "0:46:20", "throughput": 8678.37, "total_tokens": 113800744} +{"current_steps": 168870, "total_steps": 204665, "loss": 0.0001, "lr": 1.806496344170777e-07, "epoch": 4.1255221948061465, "percentage": 82.51, "elapsed_time": "3:38:33", "remaining_time": "0:46:19", "throughput": 8678.39, "total_tokens": 113804008} +{"current_steps": 168875, "total_steps": 204665, "loss": 0.0, "lr": 1.8060074833704863e-07, "epoch": 4.125644345637994, "percentage": 82.51, "elapsed_time": "3:38:33", "remaining_time": "0:46:19", "throughput": 8678.44, "total_tokens": 113807784} +{"current_steps": 168880, "total_steps": 204665, "loss": 0.0, "lr": 1.8055186821591107e-07, "epoch": 4.125766496469841, "percentage": 82.52, "elapsed_time": "3:38:34", "remaining_time": "0:46:18", "throughput": 8678.48, "total_tokens": 113811240} +{"current_steps": 168885, "total_steps": 204665, "loss": 0.0526, "lr": 1.8050299405402102e-07, "epoch": 4.1258886473016885, "percentage": 82.52, "elapsed_time": "3:38:34", "remaining_time": "0:46:18", "throughput": 8678.51, "total_tokens": 113814696} +{"current_steps": 168890, "total_steps": 204665, "loss": 0.0, "lr": 1.8045412585173336e-07, "epoch": 4.126010798133535, "percentage": 82.52, "elapsed_time": "3:38:34", "remaining_time": "0:46:18", "throughput": 8678.53, "total_tokens": 113818024} +{"current_steps": 168895, "total_steps": 204665, "loss": 0.0, "lr": 1.804052636094038e-07, "epoch": 4.126132948965383, "percentage": 82.52, "elapsed_time": "3:38:35", "remaining_time": "0:46:17", "throughput": 8678.54, "total_tokens": 113821032} +{"current_steps": 168900, "total_steps": 204665, "loss": 0.0, "lr": 1.8035640732738766e-07, "epoch": 4.12625509979723, "percentage": 82.53, "elapsed_time": "3:38:35", "remaining_time": "0:46:17", "throughput": 8678.57, "total_tokens": 113824488} +{"current_steps": 168905, "total_steps": 204665, "loss": 0.0, "lr": 1.8030755700604007e-07, "epoch": 4.126377250629076, "percentage": 82.53, "elapsed_time": "3:38:35", "remaining_time": "0:46:16", "throughput": 8678.59, "total_tokens": 113827816} +{"current_steps": 168910, "total_steps": 204665, "loss": 0.0, "lr": 1.8025871264571668e-07, "epoch": 4.126499401460924, "percentage": 82.53, "elapsed_time": "3:38:36", "remaining_time": "0:46:16", "throughput": 8678.64, "total_tokens": 113831592} +{"current_steps": 168915, "total_steps": 204665, "loss": 0.0, "lr": 1.8020987424677203e-07, "epoch": 4.126621552292771, "percentage": 82.53, "elapsed_time": "3:38:36", "remaining_time": "0:46:16", "throughput": 8678.66, "total_tokens": 113834856} +{"current_steps": 168920, "total_steps": 204665, "loss": 0.0, "lr": 1.801610418095618e-07, "epoch": 4.126743703124618, "percentage": 82.53, "elapsed_time": "3:38:36", "remaining_time": "0:46:15", "throughput": 8678.7, "total_tokens": 113838440} +{"current_steps": 168925, "total_steps": 204665, "loss": 0.0, "lr": 1.801122153344412e-07, "epoch": 4.126865853956465, "percentage": 82.54, "elapsed_time": "3:38:37", "remaining_time": "0:46:15", "throughput": 8678.71, "total_tokens": 113841512} +{"current_steps": 168930, "total_steps": 204665, "loss": 0.0, "lr": 1.800633948217648e-07, "epoch": 4.126988004788313, "percentage": 82.54, "elapsed_time": "3:38:37", "remaining_time": "0:46:14", "throughput": 8678.74, "total_tokens": 113845032} +{"current_steps": 168935, "total_steps": 204665, "loss": 0.0, "lr": 1.800145802718882e-07, "epoch": 4.1271101556201595, "percentage": 82.54, "elapsed_time": "3:38:38", "remaining_time": "0:46:14", "throughput": 8678.76, "total_tokens": 113848232} +{"current_steps": 168940, "total_steps": 204665, "loss": 0.0, "lr": 1.799657716851659e-07, "epoch": 4.127232306452007, "percentage": 82.54, "elapsed_time": "3:38:38", "remaining_time": "0:46:14", "throughput": 8678.76, "total_tokens": 113851304} +{"current_steps": 168945, "total_steps": 204665, "loss": 0.0, "lr": 1.7991696906195332e-07, "epoch": 4.127354457283854, "percentage": 82.55, "elapsed_time": "3:38:38", "remaining_time": "0:46:13", "throughput": 8678.8, "total_tokens": 113854824} +{"current_steps": 168950, "total_steps": 204665, "loss": 0.0, "lr": 1.7986817240260487e-07, "epoch": 4.1274766081157015, "percentage": 82.55, "elapsed_time": "3:38:39", "remaining_time": "0:46:13", "throughput": 8678.84, "total_tokens": 113858472} +{"current_steps": 168955, "total_steps": 204665, "loss": 0.0, "lr": 1.7981938170747591e-07, "epoch": 4.127598758947548, "percentage": 82.55, "elapsed_time": "3:38:39", "remaining_time": "0:46:12", "throughput": 8678.87, "total_tokens": 113861800} +{"current_steps": 168960, "total_steps": 204665, "loss": 0.0, "lr": 1.7977059697692065e-07, "epoch": 4.127720909779396, "percentage": 82.55, "elapsed_time": "3:38:39", "remaining_time": "0:46:12", "throughput": 8678.89, "total_tokens": 113865192} +{"current_steps": 168965, "total_steps": 204665, "loss": 0.0, "lr": 1.7972181821129462e-07, "epoch": 4.127843060611243, "percentage": 82.56, "elapsed_time": "3:38:40", "remaining_time": "0:46:12", "throughput": 8678.91, "total_tokens": 113868456} +{"current_steps": 168970, "total_steps": 204665, "loss": 0.0, "lr": 1.7967304541095206e-07, "epoch": 4.12796521144309, "percentage": 82.56, "elapsed_time": "3:38:40", "remaining_time": "0:46:11", "throughput": 8678.94, "total_tokens": 113871784} +{"current_steps": 168975, "total_steps": 204665, "loss": 0.0, "lr": 1.7962427857624752e-07, "epoch": 4.128087362274937, "percentage": 82.56, "elapsed_time": "3:38:40", "remaining_time": "0:46:11", "throughput": 8678.95, "total_tokens": 113874984} +{"current_steps": 168980, "total_steps": 204665, "loss": 0.0, "lr": 1.7957551770753598e-07, "epoch": 4.128209513106785, "percentage": 82.56, "elapsed_time": "3:38:41", "remaining_time": "0:46:10", "throughput": 8678.96, "total_tokens": 113878120} +{"current_steps": 168985, "total_steps": 204665, "loss": 0.0012, "lr": 1.795267628051721e-07, "epoch": 4.128331663938631, "percentage": 82.57, "elapsed_time": "3:38:41", "remaining_time": "0:46:10", "throughput": 8678.97, "total_tokens": 113881128} +{"current_steps": 168990, "total_steps": 204665, "loss": 0.0, "lr": 1.7947801386951e-07, "epoch": 4.128453814770479, "percentage": 82.57, "elapsed_time": "3:38:41", "remaining_time": "0:46:10", "throughput": 8678.97, "total_tokens": 113884200} +{"current_steps": 168995, "total_steps": 204665, "loss": 0.0, "lr": 1.7942927090090476e-07, "epoch": 4.128575965602326, "percentage": 82.57, "elapsed_time": "3:38:42", "remaining_time": "0:46:09", "throughput": 8679.04, "total_tokens": 113888232} +{"current_steps": 169000, "total_steps": 204665, "loss": 0.0, "lr": 1.793805338997104e-07, "epoch": 4.1286981164341725, "percentage": 82.57, "elapsed_time": "3:38:42", "remaining_time": "0:46:09", "throughput": 8679.05, "total_tokens": 113891304} +{"current_steps": 169005, "total_steps": 204665, "loss": 0.0, "lr": 1.793318028662817e-07, "epoch": 4.12882026726602, "percentage": 82.58, "elapsed_time": "3:38:42", "remaining_time": "0:46:08", "throughput": 8679.07, "total_tokens": 113894568} +{"current_steps": 169010, "total_steps": 204665, "loss": 0.0, "lr": 1.7928307780097264e-07, "epoch": 4.128942418097867, "percentage": 82.58, "elapsed_time": "3:38:43", "remaining_time": "0:46:08", "throughput": 8679.08, "total_tokens": 113897768} +{"current_steps": 169015, "total_steps": 204665, "loss": 0.0, "lr": 1.7923435870413773e-07, "epoch": 4.1290645689297145, "percentage": 82.58, "elapsed_time": "3:38:43", "remaining_time": "0:46:08", "throughput": 8679.11, "total_tokens": 113901096} +{"current_steps": 169020, "total_steps": 204665, "loss": 0.0, "lr": 1.7918564557613157e-07, "epoch": 4.129186719761561, "percentage": 82.58, "elapsed_time": "3:38:43", "remaining_time": "0:46:07", "throughput": 8679.13, "total_tokens": 113904424} +{"current_steps": 169025, "total_steps": 204665, "loss": 0.0, "lr": 1.7913693841730792e-07, "epoch": 4.129308870593409, "percentage": 82.59, "elapsed_time": "3:38:44", "remaining_time": "0:46:07", "throughput": 8679.13, "total_tokens": 113907304} +{"current_steps": 169030, "total_steps": 204665, "loss": 0.0, "lr": 1.7908823722802157e-07, "epoch": 4.129431021425256, "percentage": 82.59, "elapsed_time": "3:38:44", "remaining_time": "0:46:06", "throughput": 8679.14, "total_tokens": 113910440} +{"current_steps": 169035, "total_steps": 204665, "loss": 0.0, "lr": 1.7903954200862602e-07, "epoch": 4.129553172257103, "percentage": 82.59, "elapsed_time": "3:38:44", "remaining_time": "0:46:06", "throughput": 8679.15, "total_tokens": 113913512} +{"current_steps": 169040, "total_steps": 204665, "loss": 0.0, "lr": 1.78990852759476e-07, "epoch": 4.12967532308895, "percentage": 82.59, "elapsed_time": "3:38:45", "remaining_time": "0:46:06", "throughput": 8679.15, "total_tokens": 113916456} +{"current_steps": 169045, "total_steps": 204665, "loss": 0.0, "lr": 1.789421694809251e-07, "epoch": 4.129797473920798, "percentage": 82.6, "elapsed_time": "3:38:45", "remaining_time": "0:46:05", "throughput": 8679.16, "total_tokens": 113919656} +{"current_steps": 169050, "total_steps": 204665, "loss": 0.0, "lr": 1.788934921733276e-07, "epoch": 4.129919624752644, "percentage": 82.6, "elapsed_time": "3:38:45", "remaining_time": "0:46:05", "throughput": 8679.2, "total_tokens": 113923112} +{"current_steps": 169055, "total_steps": 204665, "loss": 0.0, "lr": 1.7884482083703766e-07, "epoch": 4.130041775584492, "percentage": 82.6, "elapsed_time": "3:38:46", "remaining_time": "0:46:04", "throughput": 8679.23, "total_tokens": 113926568} +{"current_steps": 169060, "total_steps": 204665, "loss": 0.0, "lr": 1.787961554724091e-07, "epoch": 4.130163926416339, "percentage": 82.6, "elapsed_time": "3:38:46", "remaining_time": "0:46:04", "throughput": 8679.23, "total_tokens": 113929640} +{"current_steps": 169065, "total_steps": 204665, "loss": 0.0, "lr": 1.7874749607979556e-07, "epoch": 4.130286077248186, "percentage": 82.61, "elapsed_time": "3:38:47", "remaining_time": "0:46:04", "throughput": 8679.28, "total_tokens": 113933288} +{"current_steps": 169070, "total_steps": 204665, "loss": 0.0, "lr": 1.7869884265955127e-07, "epoch": 4.130408228080033, "percentage": 82.61, "elapsed_time": "3:38:47", "remaining_time": "0:46:03", "throughput": 8679.28, "total_tokens": 113936232} +{"current_steps": 169075, "total_steps": 204665, "loss": 0.0, "lr": 1.7865019521202973e-07, "epoch": 4.130530378911881, "percentage": 82.61, "elapsed_time": "3:38:47", "remaining_time": "0:46:03", "throughput": 8679.3, "total_tokens": 113939560} +{"current_steps": 169080, "total_steps": 204665, "loss": 0.0, "lr": 1.7860155373758511e-07, "epoch": 4.1306525297437275, "percentage": 82.61, "elapsed_time": "3:38:48", "remaining_time": "0:46:02", "throughput": 8679.32, "total_tokens": 113942888} +{"current_steps": 169085, "total_steps": 204665, "loss": 0.0, "lr": 1.785529182365707e-07, "epoch": 4.130774680575575, "percentage": 82.62, "elapsed_time": "3:38:48", "remaining_time": "0:46:02", "throughput": 8679.33, "total_tokens": 113945896} +{"current_steps": 169090, "total_steps": 204665, "loss": 0.0, "lr": 1.7850428870934052e-07, "epoch": 4.130896831407422, "percentage": 82.62, "elapsed_time": "3:38:48", "remaining_time": "0:46:02", "throughput": 8679.36, "total_tokens": 113949288} +{"current_steps": 169095, "total_steps": 204665, "loss": 0.0, "lr": 1.7845566515624798e-07, "epoch": 4.131018982239269, "percentage": 82.62, "elapsed_time": "3:38:49", "remaining_time": "0:46:01", "throughput": 8679.37, "total_tokens": 113952488} +{"current_steps": 169100, "total_steps": 204665, "loss": 0.0, "lr": 1.7840704757764712e-07, "epoch": 4.131141133071116, "percentage": 82.62, "elapsed_time": "3:38:49", "remaining_time": "0:46:01", "throughput": 8679.37, "total_tokens": 113955432} +{"current_steps": 169105, "total_steps": 204665, "loss": 0.0002, "lr": 1.7835843597389088e-07, "epoch": 4.131263283902963, "percentage": 82.63, "elapsed_time": "3:38:49", "remaining_time": "0:46:00", "throughput": 8679.42, "total_tokens": 113959208} +{"current_steps": 169110, "total_steps": 204665, "loss": 0.0, "lr": 1.783098303453331e-07, "epoch": 4.131385434734811, "percentage": 82.63, "elapsed_time": "3:38:50", "remaining_time": "0:46:00", "throughput": 8679.47, "total_tokens": 113962920} +{"current_steps": 169115, "total_steps": 204665, "loss": 0.0, "lr": 1.7826123069232746e-07, "epoch": 4.131507585566657, "percentage": 82.63, "elapsed_time": "3:38:50", "remaining_time": "0:46:00", "throughput": 8679.49, "total_tokens": 113966184} +{"current_steps": 169120, "total_steps": 204665, "loss": 0.0, "lr": 1.7821263701522694e-07, "epoch": 4.131629736398505, "percentage": 82.63, "elapsed_time": "3:38:50", "remaining_time": "0:45:59", "throughput": 8679.5, "total_tokens": 113969320} +{"current_steps": 169125, "total_steps": 204665, "loss": 0.0, "lr": 1.7816404931438533e-07, "epoch": 4.131751887230352, "percentage": 82.64, "elapsed_time": "3:38:51", "remaining_time": "0:45:59", "throughput": 8679.52, "total_tokens": 113972584} +{"current_steps": 169130, "total_steps": 204665, "loss": 0.0, "lr": 1.781154675901556e-07, "epoch": 4.131874038062199, "percentage": 82.64, "elapsed_time": "3:38:51", "remaining_time": "0:45:59", "throughput": 8679.53, "total_tokens": 113975656} +{"current_steps": 169135, "total_steps": 204665, "loss": 0.0, "lr": 1.7806689184289136e-07, "epoch": 4.131996188894046, "percentage": 82.64, "elapsed_time": "3:38:51", "remaining_time": "0:45:58", "throughput": 8679.58, "total_tokens": 113979432} +{"current_steps": 169140, "total_steps": 204665, "loss": 0.0, "lr": 1.7801832207294587e-07, "epoch": 4.132118339725894, "percentage": 82.64, "elapsed_time": "3:38:52", "remaining_time": "0:45:58", "throughput": 8679.6, "total_tokens": 113982760} +{"current_steps": 169145, "total_steps": 204665, "loss": 0.0, "lr": 1.7796975828067206e-07, "epoch": 4.132240490557741, "percentage": 82.64, "elapsed_time": "3:38:52", "remaining_time": "0:45:57", "throughput": 8679.67, "total_tokens": 113986856} +{"current_steps": 169150, "total_steps": 204665, "loss": 0.0, "lr": 1.7792120046642344e-07, "epoch": 4.132362641389588, "percentage": 82.65, "elapsed_time": "3:38:52", "remaining_time": "0:45:57", "throughput": 8679.72, "total_tokens": 113990568} +{"current_steps": 169155, "total_steps": 204665, "loss": 0.0, "lr": 1.7787264863055273e-07, "epoch": 4.132484792221435, "percentage": 82.65, "elapsed_time": "3:38:53", "remaining_time": "0:45:57", "throughput": 8679.78, "total_tokens": 113994536} +{"current_steps": 169160, "total_steps": 204665, "loss": 0.0, "lr": 1.7782410277341352e-07, "epoch": 4.132606943053283, "percentage": 82.65, "elapsed_time": "3:38:53", "remaining_time": "0:45:56", "throughput": 8679.8, "total_tokens": 113997672} +{"current_steps": 169165, "total_steps": 204665, "loss": 0.0, "lr": 1.7777556289535834e-07, "epoch": 4.132729093885129, "percentage": 82.65, "elapsed_time": "3:38:54", "remaining_time": "0:45:56", "throughput": 8679.82, "total_tokens": 114001064} +{"current_steps": 169170, "total_steps": 204665, "loss": 0.0, "lr": 1.7772702899674063e-07, "epoch": 4.132851244716976, "percentage": 82.66, "elapsed_time": "3:38:54", "remaining_time": "0:45:55", "throughput": 8679.85, "total_tokens": 114004392} +{"current_steps": 169175, "total_steps": 204665, "loss": 0.0, "lr": 1.7767850107791316e-07, "epoch": 4.132973395548824, "percentage": 82.66, "elapsed_time": "3:38:54", "remaining_time": "0:45:55", "throughput": 8679.9, "total_tokens": 114008168} +{"current_steps": 169180, "total_steps": 204665, "loss": 0.0, "lr": 1.7762997913922862e-07, "epoch": 4.1330955463806704, "percentage": 82.66, "elapsed_time": "3:38:55", "remaining_time": "0:45:55", "throughput": 8679.94, "total_tokens": 114011752} +{"current_steps": 169185, "total_steps": 204665, "loss": 0.0, "lr": 1.7758146318104018e-07, "epoch": 4.133217697212518, "percentage": 82.66, "elapsed_time": "3:38:55", "remaining_time": "0:45:54", "throughput": 8679.96, "total_tokens": 114015144} +{"current_steps": 169190, "total_steps": 204665, "loss": 0.0, "lr": 1.7753295320370043e-07, "epoch": 4.133339848044365, "percentage": 82.67, "elapsed_time": "3:38:55", "remaining_time": "0:45:54", "throughput": 8679.97, "total_tokens": 114018152} +{"current_steps": 169195, "total_steps": 204665, "loss": 0.0003, "lr": 1.7748444920756245e-07, "epoch": 4.1334619988762125, "percentage": 82.67, "elapsed_time": "3:38:56", "remaining_time": "0:45:53", "throughput": 8679.98, "total_tokens": 114021416} +{"current_steps": 169200, "total_steps": 204665, "loss": 0.0, "lr": 1.774359511929785e-07, "epoch": 4.133584149708059, "percentage": 82.67, "elapsed_time": "3:38:56", "remaining_time": "0:45:53", "throughput": 8680.0, "total_tokens": 114024616} +{"current_steps": 169205, "total_steps": 204665, "loss": 0.0, "lr": 1.773874591603015e-07, "epoch": 4.133706300539907, "percentage": 82.67, "elapsed_time": "3:38:56", "remaining_time": "0:45:53", "throughput": 8680.02, "total_tokens": 114027944} +{"current_steps": 169210, "total_steps": 204665, "loss": 0.0, "lr": 1.7733897310988456e-07, "epoch": 4.133828451371754, "percentage": 82.68, "elapsed_time": "3:38:57", "remaining_time": "0:45:52", "throughput": 8680.06, "total_tokens": 114031400} +{"current_steps": 169215, "total_steps": 204665, "loss": 0.0, "lr": 1.7729049304207955e-07, "epoch": 4.133950602203601, "percentage": 82.68, "elapsed_time": "3:38:57", "remaining_time": "0:45:52", "throughput": 8680.08, "total_tokens": 114034792} +{"current_steps": 169220, "total_steps": 204665, "loss": 0.0, "lr": 1.7724201895723956e-07, "epoch": 4.134072753035448, "percentage": 82.68, "elapsed_time": "3:38:57", "remaining_time": "0:45:51", "throughput": 8680.13, "total_tokens": 114038568} +{"current_steps": 169225, "total_steps": 204665, "loss": 0.0, "lr": 1.7719355085571676e-07, "epoch": 4.134194903867296, "percentage": 82.68, "elapsed_time": "3:38:58", "remaining_time": "0:45:51", "throughput": 8680.16, "total_tokens": 114041896} +{"current_steps": 169230, "total_steps": 204665, "loss": 0.0, "lr": 1.771450887378637e-07, "epoch": 4.134317054699142, "percentage": 82.69, "elapsed_time": "3:38:58", "remaining_time": "0:45:51", "throughput": 8680.18, "total_tokens": 114045224} +{"current_steps": 169235, "total_steps": 204665, "loss": 0.0, "lr": 1.7709663260403307e-07, "epoch": 4.13443920553099, "percentage": 82.69, "elapsed_time": "3:38:58", "remaining_time": "0:45:50", "throughput": 8680.2, "total_tokens": 114048552} +{"current_steps": 169240, "total_steps": 204665, "loss": 0.0, "lr": 1.7704818245457686e-07, "epoch": 4.134561356362837, "percentage": 82.69, "elapsed_time": "3:38:59", "remaining_time": "0:45:50", "throughput": 8680.22, "total_tokens": 114051816} +{"current_steps": 169245, "total_steps": 204665, "loss": 0.0, "lr": 1.7699973828984794e-07, "epoch": 4.134683507194684, "percentage": 82.69, "elapsed_time": "3:38:59", "remaining_time": "0:45:49", "throughput": 8680.23, "total_tokens": 114054952} +{"current_steps": 169250, "total_steps": 204665, "loss": 0.0, "lr": 1.76951300110198e-07, "epoch": 4.134805658026531, "percentage": 82.7, "elapsed_time": "3:38:59", "remaining_time": "0:45:49", "throughput": 8680.24, "total_tokens": 114058024} +{"current_steps": 169255, "total_steps": 204665, "loss": 0.0, "lr": 1.7690286791597973e-07, "epoch": 4.134927808858379, "percentage": 82.7, "elapsed_time": "3:39:00", "remaining_time": "0:45:49", "throughput": 8680.26, "total_tokens": 114061352} +{"current_steps": 169260, "total_steps": 204665, "loss": 0.0, "lr": 1.7685444170754503e-07, "epoch": 4.1350499596902255, "percentage": 82.7, "elapsed_time": "3:39:00", "remaining_time": "0:45:48", "throughput": 8680.28, "total_tokens": 114064552} +{"current_steps": 169265, "total_steps": 204665, "loss": 0.0, "lr": 1.7680602148524649e-07, "epoch": 4.135172110522072, "percentage": 82.7, "elapsed_time": "3:39:01", "remaining_time": "0:45:48", "throughput": 8680.28, "total_tokens": 114067624} +{"current_steps": 169270, "total_steps": 204665, "loss": 0.0, "lr": 1.7675760724943567e-07, "epoch": 4.13529426135392, "percentage": 82.71, "elapsed_time": "3:39:01", "remaining_time": "0:45:47", "throughput": 8680.32, "total_tokens": 114071144} +{"current_steps": 169275, "total_steps": 204665, "loss": 0.0, "lr": 1.767091990004652e-07, "epoch": 4.135416412185767, "percentage": 82.71, "elapsed_time": "3:39:01", "remaining_time": "0:45:47", "throughput": 8680.34, "total_tokens": 114074472} +{"current_steps": 169280, "total_steps": 204665, "loss": 0.0, "lr": 1.7666079673868695e-07, "epoch": 4.135538563017614, "percentage": 82.71, "elapsed_time": "3:39:02", "remaining_time": "0:45:47", "throughput": 8680.44, "total_tokens": 114078952} +{"current_steps": 169285, "total_steps": 204665, "loss": 0.0, "lr": 1.7661240046445259e-07, "epoch": 4.135660713849461, "percentage": 82.71, "elapsed_time": "3:39:02", "remaining_time": "0:45:46", "throughput": 8680.47, "total_tokens": 114082536} +{"current_steps": 169290, "total_steps": 204665, "loss": 0.0, "lr": 1.7656401017811451e-07, "epoch": 4.135782864681309, "percentage": 82.72, "elapsed_time": "3:39:02", "remaining_time": "0:45:46", "throughput": 8680.51, "total_tokens": 114085992} +{"current_steps": 169295, "total_steps": 204665, "loss": 0.0, "lr": 1.7651562588002412e-07, "epoch": 4.135905015513155, "percentage": 82.72, "elapsed_time": "3:39:03", "remaining_time": "0:45:45", "throughput": 8680.52, "total_tokens": 114089192} +{"current_steps": 169300, "total_steps": 204665, "loss": 0.0, "lr": 1.7646724757053366e-07, "epoch": 4.136027166345003, "percentage": 82.72, "elapsed_time": "3:39:03", "remaining_time": "0:45:45", "throughput": 8680.54, "total_tokens": 114092456} +{"current_steps": 169305, "total_steps": 204665, "loss": 0.0, "lr": 1.7641887524999511e-07, "epoch": 4.13614931717685, "percentage": 82.72, "elapsed_time": "3:39:03", "remaining_time": "0:45:45", "throughput": 8680.56, "total_tokens": 114095592} +{"current_steps": 169310, "total_steps": 204665, "loss": 0.0, "lr": 1.7637050891875983e-07, "epoch": 4.136271468008697, "percentage": 82.73, "elapsed_time": "3:39:04", "remaining_time": "0:45:44", "throughput": 8680.58, "total_tokens": 114099048} +{"current_steps": 169315, "total_steps": 204665, "loss": 0.0, "lr": 1.7632214857717997e-07, "epoch": 4.136393618840544, "percentage": 82.73, "elapsed_time": "3:39:04", "remaining_time": "0:45:44", "throughput": 8680.6, "total_tokens": 114102248} +{"current_steps": 169320, "total_steps": 204665, "loss": 0.0, "lr": 1.7627379422560672e-07, "epoch": 4.136515769672392, "percentage": 82.73, "elapsed_time": "3:39:04", "remaining_time": "0:45:43", "throughput": 8680.65, "total_tokens": 114105960} +{"current_steps": 169325, "total_steps": 204665, "loss": 0.0, "lr": 1.762254458643919e-07, "epoch": 4.1366379205042385, "percentage": 82.73, "elapsed_time": "3:39:05", "remaining_time": "0:45:43", "throughput": 8680.67, "total_tokens": 114109352} +{"current_steps": 169330, "total_steps": 204665, "loss": 0.0005, "lr": 1.7617710349388758e-07, "epoch": 4.136760071336086, "percentage": 82.74, "elapsed_time": "3:39:05", "remaining_time": "0:45:43", "throughput": 8680.72, "total_tokens": 114113000} +{"current_steps": 169335, "total_steps": 204665, "loss": 0.0, "lr": 1.761287671144447e-07, "epoch": 4.136882222167933, "percentage": 82.74, "elapsed_time": "3:39:05", "remaining_time": "0:45:42", "throughput": 8680.72, "total_tokens": 114116072} +{"current_steps": 169340, "total_steps": 204665, "loss": 0.0, "lr": 1.7608043672641516e-07, "epoch": 4.1370043729997805, "percentage": 82.74, "elapsed_time": "3:39:06", "remaining_time": "0:45:42", "throughput": 8680.74, "total_tokens": 114119272} +{"current_steps": 169345, "total_steps": 204665, "loss": 0.0, "lr": 1.7603211233015013e-07, "epoch": 4.137126523831627, "percentage": 82.74, "elapsed_time": "3:39:06", "remaining_time": "0:45:41", "throughput": 8680.76, "total_tokens": 114122664} +{"current_steps": 169350, "total_steps": 204665, "loss": 0.0, "lr": 1.7598379392600137e-07, "epoch": 4.137248674663475, "percentage": 82.74, "elapsed_time": "3:39:06", "remaining_time": "0:45:41", "throughput": 8680.8, "total_tokens": 114126120} +{"current_steps": 169355, "total_steps": 204665, "loss": 0.0, "lr": 1.759354815143199e-07, "epoch": 4.137370825495322, "percentage": 82.75, "elapsed_time": "3:39:07", "remaining_time": "0:45:41", "throughput": 8680.82, "total_tokens": 114129448} +{"current_steps": 169360, "total_steps": 204665, "loss": 0.0, "lr": 1.7588717509545738e-07, "epoch": 4.137492976327168, "percentage": 82.75, "elapsed_time": "3:39:07", "remaining_time": "0:45:40", "throughput": 8680.83, "total_tokens": 114132584} +{"current_steps": 169365, "total_steps": 204665, "loss": 0.0, "lr": 1.75838874669765e-07, "epoch": 4.137615127159016, "percentage": 82.75, "elapsed_time": "3:39:08", "remaining_time": "0:45:40", "throughput": 8680.86, "total_tokens": 114135912} +{"current_steps": 169370, "total_steps": 204665, "loss": 0.0, "lr": 1.757905802375942e-07, "epoch": 4.137737277990863, "percentage": 82.75, "elapsed_time": "3:39:08", "remaining_time": "0:45:39", "throughput": 8680.88, "total_tokens": 114139304} +{"current_steps": 169375, "total_steps": 204665, "loss": 0.0, "lr": 1.7574229179929556e-07, "epoch": 4.13785942882271, "percentage": 82.76, "elapsed_time": "3:39:08", "remaining_time": "0:45:39", "throughput": 8680.9, "total_tokens": 114142504} +{"current_steps": 169380, "total_steps": 204665, "loss": 0.0, "lr": 1.7569400935522105e-07, "epoch": 4.137981579654557, "percentage": 82.76, "elapsed_time": "3:39:09", "remaining_time": "0:45:39", "throughput": 8680.95, "total_tokens": 114146344} +{"current_steps": 169385, "total_steps": 204665, "loss": 0.0, "lr": 1.7564573290572115e-07, "epoch": 4.138103730486405, "percentage": 82.76, "elapsed_time": "3:39:09", "remaining_time": "0:45:38", "throughput": 8680.98, "total_tokens": 114149800} +{"current_steps": 169390, "total_steps": 204665, "loss": 0.0, "lr": 1.7559746245114737e-07, "epoch": 4.1382258813182515, "percentage": 82.76, "elapsed_time": "3:39:09", "remaining_time": "0:45:38", "throughput": 8680.99, "total_tokens": 114152872} +{"current_steps": 169395, "total_steps": 204665, "loss": 0.0, "lr": 1.7554919799185041e-07, "epoch": 4.138348032150099, "percentage": 82.77, "elapsed_time": "3:39:10", "remaining_time": "0:45:38", "throughput": 8681.01, "total_tokens": 114156136} +{"current_steps": 169400, "total_steps": 204665, "loss": 0.0, "lr": 1.7550093952818168e-07, "epoch": 4.138470182981946, "percentage": 82.77, "elapsed_time": "3:39:10", "remaining_time": "0:45:37", "throughput": 8681.02, "total_tokens": 114159272} +{"current_steps": 169405, "total_steps": 204665, "loss": 0.0, "lr": 1.7545268706049155e-07, "epoch": 4.1385923338137935, "percentage": 82.77, "elapsed_time": "3:39:10", "remaining_time": "0:45:37", "throughput": 8681.07, "total_tokens": 114162920} +{"current_steps": 169410, "total_steps": 204665, "loss": 0.0, "lr": 1.7540444058913162e-07, "epoch": 4.13871448464564, "percentage": 82.77, "elapsed_time": "3:39:11", "remaining_time": "0:45:36", "throughput": 8681.09, "total_tokens": 114166248} +{"current_steps": 169415, "total_steps": 204665, "loss": 0.0419, "lr": 1.7535620011445208e-07, "epoch": 4.138836635477488, "percentage": 82.78, "elapsed_time": "3:39:11", "remaining_time": "0:45:36", "throughput": 8681.12, "total_tokens": 114169640} +{"current_steps": 169420, "total_steps": 204665, "loss": 0.0, "lr": 1.7530796563680406e-07, "epoch": 4.138958786309335, "percentage": 82.78, "elapsed_time": "3:39:11", "remaining_time": "0:45:36", "throughput": 8681.16, "total_tokens": 114173224} +{"current_steps": 169425, "total_steps": 204665, "loss": 0.0, "lr": 1.752597371565385e-07, "epoch": 4.139080937141182, "percentage": 82.78, "elapsed_time": "3:39:12", "remaining_time": "0:45:35", "throughput": 8681.18, "total_tokens": 114176424} +{"current_steps": 169430, "total_steps": 204665, "loss": 0.0, "lr": 1.7521151467400585e-07, "epoch": 4.139203087973029, "percentage": 82.78, "elapsed_time": "3:39:12", "remaining_time": "0:45:35", "throughput": 8681.21, "total_tokens": 114179880} +{"current_steps": 169435, "total_steps": 204665, "loss": 0.0, "lr": 1.7516329818955712e-07, "epoch": 4.139325238804877, "percentage": 82.79, "elapsed_time": "3:39:12", "remaining_time": "0:45:34", "throughput": 8681.22, "total_tokens": 114183016} +{"current_steps": 169440, "total_steps": 204665, "loss": 0.0, "lr": 1.7511508770354243e-07, "epoch": 4.139447389636723, "percentage": 82.79, "elapsed_time": "3:39:13", "remaining_time": "0:45:34", "throughput": 8681.23, "total_tokens": 114186152} +{"current_steps": 169445, "total_steps": 204665, "loss": 0.0, "lr": 1.75066883216313e-07, "epoch": 4.13956954046857, "percentage": 82.79, "elapsed_time": "3:39:13", "remaining_time": "0:45:34", "throughput": 8681.26, "total_tokens": 114189544} +{"current_steps": 169450, "total_steps": 204665, "loss": 0.0, "lr": 1.750186847282188e-07, "epoch": 4.139691691300418, "percentage": 82.79, "elapsed_time": "3:39:13", "remaining_time": "0:45:33", "throughput": 8681.29, "total_tokens": 114193000} +{"current_steps": 169455, "total_steps": 204665, "loss": 0.0, "lr": 1.7497049223961058e-07, "epoch": 4.1398138421322646, "percentage": 82.8, "elapsed_time": "3:39:14", "remaining_time": "0:45:33", "throughput": 8681.34, "total_tokens": 114196648} +{"current_steps": 169460, "total_steps": 204665, "loss": 0.0, "lr": 1.749223057508391e-07, "epoch": 4.139935992964112, "percentage": 82.8, "elapsed_time": "3:39:14", "remaining_time": "0:45:32", "throughput": 8681.37, "total_tokens": 114200104} +{"current_steps": 169465, "total_steps": 204665, "loss": 0.0, "lr": 1.748741252622543e-07, "epoch": 4.140058143795959, "percentage": 82.8, "elapsed_time": "3:39:14", "remaining_time": "0:45:32", "throughput": 8681.43, "total_tokens": 114204008} +{"current_steps": 169470, "total_steps": 204665, "loss": 0.0, "lr": 1.7482595077420713e-07, "epoch": 4.140180294627807, "percentage": 82.8, "elapsed_time": "3:39:15", "remaining_time": "0:45:32", "throughput": 8681.43, "total_tokens": 114206888} +{"current_steps": 169475, "total_steps": 204665, "loss": 0.0, "lr": 1.7477778228704732e-07, "epoch": 4.140302445459653, "percentage": 82.81, "elapsed_time": "3:39:15", "remaining_time": "0:45:31", "throughput": 8681.46, "total_tokens": 114210408} +{"current_steps": 169480, "total_steps": 204665, "loss": 0.0, "lr": 1.7472961980112556e-07, "epoch": 4.140424596291501, "percentage": 82.81, "elapsed_time": "3:39:16", "remaining_time": "0:45:31", "throughput": 8681.47, "total_tokens": 114213416} +{"current_steps": 169485, "total_steps": 204665, "loss": 0.0, "lr": 1.746814633167921e-07, "epoch": 4.140546747123348, "percentage": 82.81, "elapsed_time": "3:39:16", "remaining_time": "0:45:30", "throughput": 8681.51, "total_tokens": 114217000} +{"current_steps": 169490, "total_steps": 204665, "loss": 0.0, "lr": 1.7463331283439664e-07, "epoch": 4.140668897955195, "percentage": 82.81, "elapsed_time": "3:39:16", "remaining_time": "0:45:30", "throughput": 8681.52, "total_tokens": 114220072} +{"current_steps": 169495, "total_steps": 204665, "loss": 0.0, "lr": 1.7458516835429016e-07, "epoch": 4.140791048787042, "percentage": 82.82, "elapsed_time": "3:39:17", "remaining_time": "0:45:30", "throughput": 8681.55, "total_tokens": 114223592} +{"current_steps": 169500, "total_steps": 204665, "loss": 0.0, "lr": 1.7453702987682195e-07, "epoch": 4.14091319961889, "percentage": 82.82, "elapsed_time": "3:39:17", "remaining_time": "0:45:29", "throughput": 8681.6, "total_tokens": 114227304} +{"current_steps": 169505, "total_steps": 204665, "loss": 0.0, "lr": 1.7448889740234273e-07, "epoch": 4.1410353504507365, "percentage": 82.82, "elapsed_time": "3:39:17", "remaining_time": "0:45:29", "throughput": 8681.66, "total_tokens": 114231208} +{"current_steps": 169510, "total_steps": 204665, "loss": 0.0, "lr": 1.7444077093120214e-07, "epoch": 4.141157501282584, "percentage": 82.82, "elapsed_time": "3:39:18", "remaining_time": "0:45:28", "throughput": 8681.68, "total_tokens": 114234408} +{"current_steps": 169515, "total_steps": 204665, "loss": 0.0, "lr": 1.743926504637503e-07, "epoch": 4.141279652114431, "percentage": 82.83, "elapsed_time": "3:39:18", "remaining_time": "0:45:28", "throughput": 8681.71, "total_tokens": 114237736} +{"current_steps": 169520, "total_steps": 204665, "loss": 0.0, "lr": 1.7434453600033728e-07, "epoch": 4.1414018029462785, "percentage": 82.83, "elapsed_time": "3:39:18", "remaining_time": "0:45:28", "throughput": 8681.73, "total_tokens": 114241128} +{"current_steps": 169525, "total_steps": 204665, "loss": 0.0, "lr": 1.742964275413128e-07, "epoch": 4.141523953778125, "percentage": 82.83, "elapsed_time": "3:39:19", "remaining_time": "0:45:27", "throughput": 8681.77, "total_tokens": 114244584} +{"current_steps": 169530, "total_steps": 204665, "loss": 0.0, "lr": 1.7424832508702692e-07, "epoch": 4.141646104609972, "percentage": 82.83, "elapsed_time": "3:39:19", "remaining_time": "0:45:27", "throughput": 8681.79, "total_tokens": 114247912} +{"current_steps": 169535, "total_steps": 204665, "loss": 0.0, "lr": 1.74200228637829e-07, "epoch": 4.14176825544182, "percentage": 82.84, "elapsed_time": "3:39:19", "remaining_time": "0:45:26", "throughput": 8681.8, "total_tokens": 114250984} +{"current_steps": 169540, "total_steps": 204665, "loss": 0.0, "lr": 1.7415213819406926e-07, "epoch": 4.141890406273666, "percentage": 82.84, "elapsed_time": "3:39:20", "remaining_time": "0:45:26", "throughput": 8681.82, "total_tokens": 114254248} +{"current_steps": 169545, "total_steps": 204665, "loss": 0.0001, "lr": 1.741040537560976e-07, "epoch": 4.142012557105514, "percentage": 82.84, "elapsed_time": "3:39:20", "remaining_time": "0:45:26", "throughput": 8681.83, "total_tokens": 114257384} +{"current_steps": 169550, "total_steps": 204665, "loss": 0.0, "lr": 1.7405597532426297e-07, "epoch": 4.142134707937361, "percentage": 82.84, "elapsed_time": "3:39:20", "remaining_time": "0:45:25", "throughput": 8681.83, "total_tokens": 114260392} +{"current_steps": 169555, "total_steps": 204665, "loss": 0.0, "lr": 1.7400790289891588e-07, "epoch": 4.142256858769208, "percentage": 82.85, "elapsed_time": "3:39:21", "remaining_time": "0:45:25", "throughput": 8681.87, "total_tokens": 114263848} +{"current_steps": 169560, "total_steps": 204665, "loss": 0.0, "lr": 1.7395983648040513e-07, "epoch": 4.142379009601055, "percentage": 82.85, "elapsed_time": "3:39:21", "remaining_time": "0:45:24", "throughput": 8681.91, "total_tokens": 114267432} +{"current_steps": 169565, "total_steps": 204665, "loss": 0.0, "lr": 1.7391177606908081e-07, "epoch": 4.142501160432903, "percentage": 82.85, "elapsed_time": "3:39:21", "remaining_time": "0:45:24", "throughput": 8681.94, "total_tokens": 114270824} +{"current_steps": 169570, "total_steps": 204665, "loss": 0.0, "lr": 1.7386372166529218e-07, "epoch": 4.1426233112647495, "percentage": 82.85, "elapsed_time": "3:39:22", "remaining_time": "0:45:24", "throughput": 8681.97, "total_tokens": 114274280} +{"current_steps": 169575, "total_steps": 204665, "loss": 0.0, "lr": 1.7381567326938883e-07, "epoch": 4.142745462096597, "percentage": 82.85, "elapsed_time": "3:39:22", "remaining_time": "0:45:23", "throughput": 8681.99, "total_tokens": 114277608} +{"current_steps": 169580, "total_steps": 204665, "loss": 0.0, "lr": 1.7376763088171998e-07, "epoch": 4.142867612928444, "percentage": 82.86, "elapsed_time": "3:39:22", "remaining_time": "0:45:23", "throughput": 8682.02, "total_tokens": 114281064} +{"current_steps": 169585, "total_steps": 204665, "loss": 0.0, "lr": 1.737195945026354e-07, "epoch": 4.1429897637602915, "percentage": 82.86, "elapsed_time": "3:39:23", "remaining_time": "0:45:22", "throughput": 8682.04, "total_tokens": 114284200} +{"current_steps": 169590, "total_steps": 204665, "loss": 0.0, "lr": 1.7367156413248408e-07, "epoch": 4.143111914592138, "percentage": 82.86, "elapsed_time": "3:39:23", "remaining_time": "0:45:22", "throughput": 8682.05, "total_tokens": 114287400} +{"current_steps": 169595, "total_steps": 204665, "loss": 0.0, "lr": 1.7362353977161527e-07, "epoch": 4.143234065423986, "percentage": 82.86, "elapsed_time": "3:39:23", "remaining_time": "0:45:22", "throughput": 8682.1, "total_tokens": 114291112} +{"current_steps": 169600, "total_steps": 204665, "loss": 0.0, "lr": 1.7357552142037856e-07, "epoch": 4.143356216255833, "percentage": 82.87, "elapsed_time": "3:39:24", "remaining_time": "0:45:21", "throughput": 8682.17, "total_tokens": 114295144} +{"current_steps": 169605, "total_steps": 204665, "loss": 0.0, "lr": 1.735275090791226e-07, "epoch": 4.14347836708768, "percentage": 82.87, "elapsed_time": "3:39:24", "remaining_time": "0:45:21", "throughput": 8682.22, "total_tokens": 114298856} +{"current_steps": 169610, "total_steps": 204665, "loss": 0.0, "lr": 1.73479502748197e-07, "epoch": 4.143600517919527, "percentage": 82.87, "elapsed_time": "3:39:25", "remaining_time": "0:45:20", "throughput": 8682.23, "total_tokens": 114301992} +{"current_steps": 169615, "total_steps": 204665, "loss": 0.0, "lr": 1.7343150242795102e-07, "epoch": 4.143722668751375, "percentage": 82.87, "elapsed_time": "3:39:25", "remaining_time": "0:45:20", "throughput": 8682.24, "total_tokens": 114305064} +{"current_steps": 169620, "total_steps": 204665, "loss": 0.0, "lr": 1.7338350811873314e-07, "epoch": 4.143844819583221, "percentage": 82.88, "elapsed_time": "3:39:25", "remaining_time": "0:45:20", "throughput": 8682.25, "total_tokens": 114308200} +{"current_steps": 169625, "total_steps": 204665, "loss": 0.0, "lr": 1.73335519820893e-07, "epoch": 4.143966970415068, "percentage": 82.88, "elapsed_time": "3:39:26", "remaining_time": "0:45:19", "throughput": 8682.24, "total_tokens": 114310952} +{"current_steps": 169630, "total_steps": 204665, "loss": 0.0, "lr": 1.732875375347791e-07, "epoch": 4.144089121246916, "percentage": 82.88, "elapsed_time": "3:39:26", "remaining_time": "0:45:19", "throughput": 8682.27, "total_tokens": 114314408} +{"current_steps": 169635, "total_steps": 204665, "loss": 0.0, "lr": 1.7323956126074057e-07, "epoch": 4.1442112720787625, "percentage": 82.88, "elapsed_time": "3:39:26", "remaining_time": "0:45:18", "throughput": 8682.3, "total_tokens": 114317928} +{"current_steps": 169640, "total_steps": 204665, "loss": 0.0, "lr": 1.731915909991265e-07, "epoch": 4.14433342291061, "percentage": 82.89, "elapsed_time": "3:39:27", "remaining_time": "0:45:18", "throughput": 8682.3, "total_tokens": 114320808} +{"current_steps": 169645, "total_steps": 204665, "loss": 0.0, "lr": 1.7314362675028537e-07, "epoch": 4.144455573742457, "percentage": 82.89, "elapsed_time": "3:39:27", "remaining_time": "0:45:18", "throughput": 8682.36, "total_tokens": 114324584} +{"current_steps": 169650, "total_steps": 204665, "loss": 0.0, "lr": 1.7309566851456647e-07, "epoch": 4.1445777245743045, "percentage": 82.89, "elapsed_time": "3:39:27", "remaining_time": "0:45:17", "throughput": 8682.4, "total_tokens": 114328296} +{"current_steps": 169655, "total_steps": 204665, "loss": 0.0, "lr": 1.7304771629231796e-07, "epoch": 4.144699875406151, "percentage": 82.89, "elapsed_time": "3:39:28", "remaining_time": "0:45:17", "throughput": 8682.42, "total_tokens": 114331624} +{"current_steps": 169660, "total_steps": 204665, "loss": 0.0, "lr": 1.7299977008388923e-07, "epoch": 4.144822026237999, "percentage": 82.9, "elapsed_time": "3:39:28", "remaining_time": "0:45:16", "throughput": 8682.44, "total_tokens": 114334824} +{"current_steps": 169665, "total_steps": 204665, "loss": 0.0, "lr": 1.729518298896282e-07, "epoch": 4.144944177069846, "percentage": 82.9, "elapsed_time": "3:39:28", "remaining_time": "0:45:16", "throughput": 8682.44, "total_tokens": 114337768} +{"current_steps": 169670, "total_steps": 204665, "loss": 0.0, "lr": 1.7290389570988406e-07, "epoch": 4.145066327901693, "percentage": 82.9, "elapsed_time": "3:39:29", "remaining_time": "0:45:16", "throughput": 8682.44, "total_tokens": 114340712} +{"current_steps": 169675, "total_steps": 204665, "loss": 0.0, "lr": 1.728559675450054e-07, "epoch": 4.14518847873354, "percentage": 82.9, "elapsed_time": "3:39:29", "remaining_time": "0:45:15", "throughput": 8682.48, "total_tokens": 114344232} +{"current_steps": 169680, "total_steps": 204665, "loss": 0.0, "lr": 1.7280804539534066e-07, "epoch": 4.145310629565388, "percentage": 82.91, "elapsed_time": "3:39:29", "remaining_time": "0:45:15", "throughput": 8682.51, "total_tokens": 114347752} +{"current_steps": 169685, "total_steps": 204665, "loss": 0.0, "lr": 1.7276012926123807e-07, "epoch": 4.145432780397234, "percentage": 82.91, "elapsed_time": "3:39:30", "remaining_time": "0:45:15", "throughput": 8682.54, "total_tokens": 114351144} +{"current_steps": 169690, "total_steps": 204665, "loss": 0.0, "lr": 1.7271221914304657e-07, "epoch": 4.145554931229082, "percentage": 82.91, "elapsed_time": "3:39:30", "remaining_time": "0:45:14", "throughput": 8682.56, "total_tokens": 114354408} +{"current_steps": 169695, "total_steps": 204665, "loss": 0.0, "lr": 1.7266431504111413e-07, "epoch": 4.145677082060929, "percentage": 82.91, "elapsed_time": "3:39:30", "remaining_time": "0:45:14", "throughput": 8682.58, "total_tokens": 114357544} +{"current_steps": 169700, "total_steps": 204665, "loss": 0.0, "lr": 1.7261641695578943e-07, "epoch": 4.145799232892776, "percentage": 82.92, "elapsed_time": "3:39:31", "remaining_time": "0:45:13", "throughput": 8682.6, "total_tokens": 114360872} +{"current_steps": 169705, "total_steps": 204665, "loss": 0.0, "lr": 1.7256852488742057e-07, "epoch": 4.145921383724623, "percentage": 82.92, "elapsed_time": "3:39:31", "remaining_time": "0:45:13", "throughput": 8682.61, "total_tokens": 114363944} +{"current_steps": 169710, "total_steps": 204665, "loss": 0.0, "lr": 1.7252063883635604e-07, "epoch": 4.14604353455647, "percentage": 82.92, "elapsed_time": "3:39:31", "remaining_time": "0:45:13", "throughput": 8682.62, "total_tokens": 114367080} +{"current_steps": 169715, "total_steps": 204665, "loss": 0.0, "lr": 1.7247275880294388e-07, "epoch": 4.1461656853883175, "percentage": 82.92, "elapsed_time": "3:39:32", "remaining_time": "0:45:12", "throughput": 8682.63, "total_tokens": 114370152} +{"current_steps": 169720, "total_steps": 204665, "loss": 0.0, "lr": 1.7242488478753258e-07, "epoch": 4.146287836220164, "percentage": 82.93, "elapsed_time": "3:39:32", "remaining_time": "0:45:12", "throughput": 8682.68, "total_tokens": 114373928} +{"current_steps": 169725, "total_steps": 204665, "loss": 0.0, "lr": 1.723770167904699e-07, "epoch": 4.146409987052012, "percentage": 82.93, "elapsed_time": "3:39:32", "remaining_time": "0:45:11", "throughput": 8682.69, "total_tokens": 114377000} +{"current_steps": 169730, "total_steps": 204665, "loss": 0.0, "lr": 1.723291548121042e-07, "epoch": 4.146532137883859, "percentage": 82.93, "elapsed_time": "3:39:33", "remaining_time": "0:45:11", "throughput": 8682.73, "total_tokens": 114380584} +{"current_steps": 169735, "total_steps": 204665, "loss": 0.0, "lr": 1.7228129885278364e-07, "epoch": 4.146654288715706, "percentage": 82.93, "elapsed_time": "3:39:33", "remaining_time": "0:45:11", "throughput": 8682.76, "total_tokens": 114383976} +{"current_steps": 169740, "total_steps": 204665, "loss": 0.0, "lr": 1.7223344891285584e-07, "epoch": 4.146776439547553, "percentage": 82.94, "elapsed_time": "3:39:34", "remaining_time": "0:45:10", "throughput": 8682.77, "total_tokens": 114387112} +{"current_steps": 169745, "total_steps": 204665, "loss": 0.0, "lr": 1.7218560499266943e-07, "epoch": 4.146898590379401, "percentage": 82.94, "elapsed_time": "3:39:34", "remaining_time": "0:45:10", "throughput": 8682.79, "total_tokens": 114390248} +{"current_steps": 169750, "total_steps": 204665, "loss": 0.0, "lr": 1.7213776709257165e-07, "epoch": 4.147020741211247, "percentage": 82.94, "elapsed_time": "3:39:34", "remaining_time": "0:45:09", "throughput": 8682.82, "total_tokens": 114393704} +{"current_steps": 169755, "total_steps": 204665, "loss": 0.0, "lr": 1.7208993521291092e-07, "epoch": 4.147142892043095, "percentage": 82.94, "elapsed_time": "3:39:35", "remaining_time": "0:45:09", "throughput": 8682.86, "total_tokens": 114397224} +{"current_steps": 169760, "total_steps": 204665, "loss": 0.0, "lr": 1.7204210935403462e-07, "epoch": 4.147265042874942, "percentage": 82.95, "elapsed_time": "3:39:35", "remaining_time": "0:45:09", "throughput": 8682.86, "total_tokens": 114400168} +{"current_steps": 169765, "total_steps": 204665, "loss": 0.0, "lr": 1.7199428951629082e-07, "epoch": 4.147387193706789, "percentage": 82.95, "elapsed_time": "3:39:35", "remaining_time": "0:45:08", "throughput": 8682.87, "total_tokens": 114403240} +{"current_steps": 169770, "total_steps": 204665, "loss": 0.0, "lr": 1.7194647570002741e-07, "epoch": 4.147509344538636, "percentage": 82.95, "elapsed_time": "3:39:36", "remaining_time": "0:45:08", "throughput": 8682.87, "total_tokens": 114406120} +{"current_steps": 169775, "total_steps": 204665, "loss": 0.0, "lr": 1.718986679055918e-07, "epoch": 4.147631495370484, "percentage": 82.95, "elapsed_time": "3:39:36", "remaining_time": "0:45:07", "throughput": 8682.89, "total_tokens": 114409512} +{"current_steps": 169780, "total_steps": 204665, "loss": 0.0, "lr": 1.71850866133332e-07, "epoch": 4.147753646202331, "percentage": 82.96, "elapsed_time": "3:39:36", "remaining_time": "0:45:07", "throughput": 8682.93, "total_tokens": 114412968} +{"current_steps": 169785, "total_steps": 204665, "loss": 0.0, "lr": 1.718030703835952e-07, "epoch": 4.147875797034178, "percentage": 82.96, "elapsed_time": "3:39:37", "remaining_time": "0:45:07", "throughput": 8682.92, "total_tokens": 114415784} +{"current_steps": 169790, "total_steps": 204665, "loss": 0.0, "lr": 1.717552806567295e-07, "epoch": 4.147997947866025, "percentage": 82.96, "elapsed_time": "3:39:37", "remaining_time": "0:45:06", "throughput": 8682.93, "total_tokens": 114418920} +{"current_steps": 169795, "total_steps": 204665, "loss": 0.0, "lr": 1.7170749695308228e-07, "epoch": 4.148120098697872, "percentage": 82.96, "elapsed_time": "3:39:37", "remaining_time": "0:45:06", "throughput": 8682.94, "total_tokens": 114421992} +{"current_steps": 169800, "total_steps": 204665, "loss": 0.0, "lr": 1.716597192730005e-07, "epoch": 4.148242249529719, "percentage": 82.96, "elapsed_time": "3:39:38", "remaining_time": "0:45:05", "throughput": 8682.96, "total_tokens": 114425192} +{"current_steps": 169805, "total_steps": 204665, "loss": 0.0, "lr": 1.716119476168324e-07, "epoch": 4.148364400361566, "percentage": 82.97, "elapsed_time": "3:39:38", "remaining_time": "0:45:05", "throughput": 8682.98, "total_tokens": 114428520} +{"current_steps": 169810, "total_steps": 204665, "loss": 0.0, "lr": 1.7156418198492473e-07, "epoch": 4.148486551193414, "percentage": 82.97, "elapsed_time": "3:39:38", "remaining_time": "0:45:05", "throughput": 8683.03, "total_tokens": 114432296} +{"current_steps": 169815, "total_steps": 204665, "loss": 0.0, "lr": 1.7151642237762543e-07, "epoch": 4.14860870202526, "percentage": 82.97, "elapsed_time": "3:39:39", "remaining_time": "0:45:04", "throughput": 8683.07, "total_tokens": 114435880} +{"current_steps": 169820, "total_steps": 204665, "loss": 0.0, "lr": 1.7146866879528122e-07, "epoch": 4.148730852857108, "percentage": 82.97, "elapsed_time": "3:39:39", "remaining_time": "0:45:04", "throughput": 8683.1, "total_tokens": 114439208} +{"current_steps": 169825, "total_steps": 204665, "loss": 0.0, "lr": 1.714209212382398e-07, "epoch": 4.148853003688955, "percentage": 82.98, "elapsed_time": "3:39:39", "remaining_time": "0:45:03", "throughput": 8683.15, "total_tokens": 114442984} +{"current_steps": 169830, "total_steps": 204665, "loss": 0.0, "lr": 1.7137317970684851e-07, "epoch": 4.1489751545208025, "percentage": 82.98, "elapsed_time": "3:39:40", "remaining_time": "0:45:03", "throughput": 8683.15, "total_tokens": 114445992} +{"current_steps": 169835, "total_steps": 204665, "loss": 0.0, "lr": 1.71325444201454e-07, "epoch": 4.149097305352649, "percentage": 82.98, "elapsed_time": "3:39:40", "remaining_time": "0:45:03", "throughput": 8683.17, "total_tokens": 114449192} +{"current_steps": 169840, "total_steps": 204665, "loss": 0.0, "lr": 1.7127771472240404e-07, "epoch": 4.149219456184497, "percentage": 82.98, "elapsed_time": "3:39:40", "remaining_time": "0:45:02", "throughput": 8683.18, "total_tokens": 114452392} +{"current_steps": 169845, "total_steps": 204665, "loss": 0.0, "lr": 1.7122999127004522e-07, "epoch": 4.149341607016344, "percentage": 82.99, "elapsed_time": "3:39:41", "remaining_time": "0:45:02", "throughput": 8683.23, "total_tokens": 114456104} +{"current_steps": 169850, "total_steps": 204665, "loss": 0.0, "lr": 1.7118227384472482e-07, "epoch": 4.149463757848191, "percentage": 82.99, "elapsed_time": "3:39:41", "remaining_time": "0:45:01", "throughput": 8683.24, "total_tokens": 114459112} +{"current_steps": 169855, "total_steps": 204665, "loss": 0.0, "lr": 1.7113456244679014e-07, "epoch": 4.149585908680038, "percentage": 82.99, "elapsed_time": "3:39:41", "remaining_time": "0:45:01", "throughput": 8683.3, "total_tokens": 114463016} +{"current_steps": 169860, "total_steps": 204665, "loss": 0.0716, "lr": 1.7108685707658754e-07, "epoch": 4.149708059511886, "percentage": 82.99, "elapsed_time": "3:39:42", "remaining_time": "0:45:01", "throughput": 8683.3, "total_tokens": 114466088} +{"current_steps": 169865, "total_steps": 204665, "loss": 0.0, "lr": 1.7103915773446453e-07, "epoch": 4.149830210343732, "percentage": 83.0, "elapsed_time": "3:39:42", "remaining_time": "0:45:00", "throughput": 8683.33, "total_tokens": 114469416} +{"current_steps": 169870, "total_steps": 204665, "loss": 0.0, "lr": 1.709914644207675e-07, "epoch": 4.14995236117558, "percentage": 83.0, "elapsed_time": "3:39:43", "remaining_time": "0:45:00", "throughput": 8683.34, "total_tokens": 114472552} +{"current_steps": 169875, "total_steps": 204665, "loss": 0.0, "lr": 1.7094377713584374e-07, "epoch": 4.150074512007427, "percentage": 83.0, "elapsed_time": "3:39:43", "remaining_time": "0:44:59", "throughput": 8683.34, "total_tokens": 114475560} +{"current_steps": 169880, "total_steps": 204665, "loss": 0.0, "lr": 1.7089609588003962e-07, "epoch": 4.150196662839274, "percentage": 83.0, "elapsed_time": "3:39:43", "remaining_time": "0:44:59", "throughput": 8683.37, "total_tokens": 114478888} +{"current_steps": 169885, "total_steps": 204665, "loss": 0.0, "lr": 1.7084842065370232e-07, "epoch": 4.150318813671121, "percentage": 83.01, "elapsed_time": "3:39:44", "remaining_time": "0:44:59", "throughput": 8683.38, "total_tokens": 114482088} +{"current_steps": 169890, "total_steps": 204665, "loss": 0.0, "lr": 1.7080075145717798e-07, "epoch": 4.150440964502968, "percentage": 83.01, "elapsed_time": "3:39:44", "remaining_time": "0:44:58", "throughput": 8683.44, "total_tokens": 114485992} +{"current_steps": 169895, "total_steps": 204665, "loss": 0.0, "lr": 1.707530882908139e-07, "epoch": 4.1505631153348155, "percentage": 83.01, "elapsed_time": "3:39:44", "remaining_time": "0:44:58", "throughput": 8683.44, "total_tokens": 114488936} +{"current_steps": 169900, "total_steps": 204665, "loss": 0.0001, "lr": 1.707054311549565e-07, "epoch": 4.150685266166662, "percentage": 83.01, "elapsed_time": "3:39:45", "remaining_time": "0:44:57", "throughput": 8683.48, "total_tokens": 114492584} +{"current_steps": 169905, "total_steps": 204665, "loss": 0.0, "lr": 1.706577800499519e-07, "epoch": 4.15080741699851, "percentage": 83.02, "elapsed_time": "3:39:45", "remaining_time": "0:44:57", "throughput": 8683.51, "total_tokens": 114495912} +{"current_steps": 169910, "total_steps": 204665, "loss": 0.0, "lr": 1.706101349761473e-07, "epoch": 4.150929567830357, "percentage": 83.02, "elapsed_time": "3:39:45", "remaining_time": "0:44:57", "throughput": 8683.54, "total_tokens": 114499432} +{"current_steps": 169915, "total_steps": 204665, "loss": 0.0, "lr": 1.7056249593388862e-07, "epoch": 4.151051718662204, "percentage": 83.02, "elapsed_time": "3:39:46", "remaining_time": "0:44:56", "throughput": 8683.54, "total_tokens": 114502248} +{"current_steps": 169920, "total_steps": 204665, "loss": 0.0, "lr": 1.7051486292352258e-07, "epoch": 4.151173869494051, "percentage": 83.02, "elapsed_time": "3:39:46", "remaining_time": "0:44:56", "throughput": 8683.55, "total_tokens": 114505448} +{"current_steps": 169925, "total_steps": 204665, "loss": 0.0, "lr": 1.704672359453958e-07, "epoch": 4.151296020325899, "percentage": 83.03, "elapsed_time": "3:39:46", "remaining_time": "0:44:55", "throughput": 8683.56, "total_tokens": 114508520} +{"current_steps": 169930, "total_steps": 204665, "loss": 0.0, "lr": 1.7041961499985414e-07, "epoch": 4.151418171157745, "percentage": 83.03, "elapsed_time": "3:39:47", "remaining_time": "0:44:55", "throughput": 8683.57, "total_tokens": 114511656} +{"current_steps": 169935, "total_steps": 204665, "loss": 0.0, "lr": 1.703720000872444e-07, "epoch": 4.151540321989593, "percentage": 83.03, "elapsed_time": "3:39:47", "remaining_time": "0:44:55", "throughput": 8683.64, "total_tokens": 114515560} +{"current_steps": 169940, "total_steps": 204665, "loss": 0.0, "lr": 1.703243912079123e-07, "epoch": 4.15166247282144, "percentage": 83.03, "elapsed_time": "3:39:47", "remaining_time": "0:44:54", "throughput": 8683.65, "total_tokens": 114518760} +{"current_steps": 169945, "total_steps": 204665, "loss": 0.0, "lr": 1.702767883622045e-07, "epoch": 4.151784623653287, "percentage": 83.04, "elapsed_time": "3:39:48", "remaining_time": "0:44:54", "throughput": 8683.7, "total_tokens": 114522472} +{"current_steps": 169950, "total_steps": 204665, "loss": 0.0, "lr": 1.7022919155046722e-07, "epoch": 4.151906774485134, "percentage": 83.04, "elapsed_time": "3:39:48", "remaining_time": "0:44:53", "throughput": 8683.75, "total_tokens": 114526184} +{"current_steps": 169955, "total_steps": 204665, "loss": 0.0, "lr": 1.7018160077304633e-07, "epoch": 4.152028925316982, "percentage": 83.04, "elapsed_time": "3:39:48", "remaining_time": "0:44:53", "throughput": 8683.78, "total_tokens": 114529576} +{"current_steps": 169960, "total_steps": 204665, "loss": 0.0, "lr": 1.7013401603028822e-07, "epoch": 4.1521510761488285, "percentage": 83.04, "elapsed_time": "3:39:49", "remaining_time": "0:44:53", "throughput": 8683.78, "total_tokens": 114532584} +{"current_steps": 169965, "total_steps": 204665, "loss": 0.0, "lr": 1.7008643732253848e-07, "epoch": 4.152273226980676, "percentage": 83.05, "elapsed_time": "3:39:49", "remaining_time": "0:44:52", "throughput": 8683.79, "total_tokens": 114535592} +{"current_steps": 169970, "total_steps": 204665, "loss": 0.0, "lr": 1.7003886465014362e-07, "epoch": 4.152395377812523, "percentage": 83.05, "elapsed_time": "3:39:49", "remaining_time": "0:44:52", "throughput": 8683.8, "total_tokens": 114538728} +{"current_steps": 169975, "total_steps": 204665, "loss": 0.0, "lr": 1.6999129801344914e-07, "epoch": 4.1525175286443705, "percentage": 83.05, "elapsed_time": "3:39:50", "remaining_time": "0:44:51", "throughput": 8683.82, "total_tokens": 114541992} +{"current_steps": 169980, "total_steps": 204665, "loss": 0.0, "lr": 1.699437374128011e-07, "epoch": 4.152639679476217, "percentage": 83.05, "elapsed_time": "3:39:50", "remaining_time": "0:44:51", "throughput": 8683.87, "total_tokens": 114545768} +{"current_steps": 169985, "total_steps": 204665, "loss": 0.0001, "lr": 1.698961828485458e-07, "epoch": 4.152761830308064, "percentage": 83.06, "elapsed_time": "3:39:50", "remaining_time": "0:44:51", "throughput": 8683.9, "total_tokens": 114549096} +{"current_steps": 169990, "total_steps": 204665, "loss": 0.0, "lr": 1.698486343210288e-07, "epoch": 4.152883981139912, "percentage": 83.06, "elapsed_time": "3:39:51", "remaining_time": "0:44:50", "throughput": 8683.92, "total_tokens": 114552424} +{"current_steps": 169995, "total_steps": 204665, "loss": 0.0, "lr": 1.6980109183059544e-07, "epoch": 4.153006131971758, "percentage": 83.06, "elapsed_time": "3:39:51", "remaining_time": "0:44:50", "throughput": 8683.92, "total_tokens": 114555304} +{"current_steps": 170000, "total_steps": 204665, "loss": 0.0, "lr": 1.6975355537759217e-07, "epoch": 4.153128282803606, "percentage": 83.06, "elapsed_time": "3:39:51", "remaining_time": "0:44:50", "throughput": 8683.94, "total_tokens": 114558440} +{"current_steps": 170005, "total_steps": 204665, "loss": 0.0, "lr": 1.6970602496236409e-07, "epoch": 4.153250433635453, "percentage": 83.07, "elapsed_time": "3:39:52", "remaining_time": "0:44:49", "throughput": 8683.98, "total_tokens": 114562088} +{"current_steps": 170010, "total_steps": 204665, "loss": 0.0, "lr": 1.6965850058525732e-07, "epoch": 4.1533725844673, "percentage": 83.07, "elapsed_time": "3:39:52", "remaining_time": "0:44:49", "throughput": 8683.99, "total_tokens": 114565160} +{"current_steps": 170015, "total_steps": 204665, "loss": 0.0, "lr": 1.6961098224661707e-07, "epoch": 4.153494735299147, "percentage": 83.07, "elapsed_time": "3:39:53", "remaining_time": "0:44:48", "throughput": 8684.0, "total_tokens": 114568232} +{"current_steps": 170020, "total_steps": 204665, "loss": 0.0, "lr": 1.6956346994678926e-07, "epoch": 4.153616886130995, "percentage": 83.07, "elapsed_time": "3:39:53", "remaining_time": "0:44:48", "throughput": 8684.05, "total_tokens": 114572072} +{"current_steps": 170025, "total_steps": 204665, "loss": 0.0, "lr": 1.695159636861191e-07, "epoch": 4.1537390369628415, "percentage": 83.07, "elapsed_time": "3:39:53", "remaining_time": "0:44:48", "throughput": 8684.06, "total_tokens": 114575208} +{"current_steps": 170030, "total_steps": 204665, "loss": 0.0, "lr": 1.6946846346495248e-07, "epoch": 4.153861187794689, "percentage": 83.08, "elapsed_time": "3:39:54", "remaining_time": "0:44:47", "throughput": 8684.1, "total_tokens": 114578728} +{"current_steps": 170035, "total_steps": 204665, "loss": 0.0, "lr": 1.6942096928363426e-07, "epoch": 4.153983338626536, "percentage": 83.08, "elapsed_time": "3:39:54", "remaining_time": "0:44:47", "throughput": 8684.14, "total_tokens": 114582312} +{"current_steps": 170040, "total_steps": 204665, "loss": 0.0, "lr": 1.6937348114251026e-07, "epoch": 4.1541054894583835, "percentage": 83.08, "elapsed_time": "3:39:54", "remaining_time": "0:44:46", "throughput": 8684.17, "total_tokens": 114585768} +{"current_steps": 170045, "total_steps": 204665, "loss": 0.0, "lr": 1.693259990419259e-07, "epoch": 4.15422764029023, "percentage": 83.08, "elapsed_time": "3:39:55", "remaining_time": "0:44:46", "throughput": 8684.21, "total_tokens": 114589224} +{"current_steps": 170050, "total_steps": 204665, "loss": 0.0, "lr": 1.69278522982226e-07, "epoch": 4.154349791122078, "percentage": 83.09, "elapsed_time": "3:39:55", "remaining_time": "0:44:46", "throughput": 8684.24, "total_tokens": 114592616} +{"current_steps": 170055, "total_steps": 204665, "loss": 0.0, "lr": 1.6923105296375638e-07, "epoch": 4.154471941953925, "percentage": 83.09, "elapsed_time": "3:39:55", "remaining_time": "0:44:45", "throughput": 8684.27, "total_tokens": 114596136} +{"current_steps": 170060, "total_steps": 204665, "loss": 0.0, "lr": 1.691835889868618e-07, "epoch": 4.154594092785772, "percentage": 83.09, "elapsed_time": "3:39:56", "remaining_time": "0:44:45", "throughput": 8684.36, "total_tokens": 114600488} +{"current_steps": 170065, "total_steps": 204665, "loss": 0.0, "lr": 1.6913613105188785e-07, "epoch": 4.154716243617619, "percentage": 83.09, "elapsed_time": "3:39:56", "remaining_time": "0:44:44", "throughput": 8684.38, "total_tokens": 114603816} +{"current_steps": 170070, "total_steps": 204665, "loss": 0.0, "lr": 1.6908867915917924e-07, "epoch": 4.154838394449466, "percentage": 83.1, "elapsed_time": "3:39:56", "remaining_time": "0:44:44", "throughput": 8684.43, "total_tokens": 114607528} +{"current_steps": 170075, "total_steps": 204665, "loss": 0.0, "lr": 1.6904123330908117e-07, "epoch": 4.154960545281313, "percentage": 83.1, "elapsed_time": "3:39:57", "remaining_time": "0:44:44", "throughput": 8684.45, "total_tokens": 114610856} +{"current_steps": 170080, "total_steps": 204665, "loss": 0.0, "lr": 1.68993793501939e-07, "epoch": 4.15508269611316, "percentage": 83.1, "elapsed_time": "3:39:57", "remaining_time": "0:44:43", "throughput": 8684.48, "total_tokens": 114614248} +{"current_steps": 170085, "total_steps": 204665, "loss": 0.0, "lr": 1.6894635973809725e-07, "epoch": 4.155204846945008, "percentage": 83.1, "elapsed_time": "3:39:57", "remaining_time": "0:44:43", "throughput": 8684.5, "total_tokens": 114617512} +{"current_steps": 170090, "total_steps": 204665, "loss": 0.0, "lr": 1.688989320179014e-07, "epoch": 4.1553269977768545, "percentage": 83.11, "elapsed_time": "3:39:58", "remaining_time": "0:44:42", "throughput": 8684.52, "total_tokens": 114620776} +{"current_steps": 170095, "total_steps": 204665, "loss": 0.0, "lr": 1.6885151034169577e-07, "epoch": 4.155449148608702, "percentage": 83.11, "elapsed_time": "3:39:58", "remaining_time": "0:44:42", "throughput": 8684.53, "total_tokens": 114623848} +{"current_steps": 170100, "total_steps": 204665, "loss": 0.0, "lr": 1.688040947098257e-07, "epoch": 4.155571299440549, "percentage": 83.11, "elapsed_time": "3:39:58", "remaining_time": "0:44:42", "throughput": 8684.55, "total_tokens": 114627176} +{"current_steps": 170105, "total_steps": 204665, "loss": 0.0, "lr": 1.6875668512263587e-07, "epoch": 4.155693450272397, "percentage": 83.11, "elapsed_time": "3:39:59", "remaining_time": "0:44:41", "throughput": 8684.6, "total_tokens": 114630952} +{"current_steps": 170110, "total_steps": 204665, "loss": 0.1071, "lr": 1.6870928158047072e-07, "epoch": 4.155815601104243, "percentage": 83.12, "elapsed_time": "3:39:59", "remaining_time": "0:44:41", "throughput": 8684.61, "total_tokens": 114634024} +{"current_steps": 170115, "total_steps": 204665, "loss": 0.0, "lr": 1.6866188408367553e-07, "epoch": 4.155937751936091, "percentage": 83.12, "elapsed_time": "3:40:00", "remaining_time": "0:44:40", "throughput": 8684.63, "total_tokens": 114637288} +{"current_steps": 170120, "total_steps": 204665, "loss": 0.0, "lr": 1.6861449263259453e-07, "epoch": 4.156059902767938, "percentage": 83.12, "elapsed_time": "3:40:00", "remaining_time": "0:44:40", "throughput": 8684.66, "total_tokens": 114640680} +{"current_steps": 170125, "total_steps": 204665, "loss": 0.0, "lr": 1.6856710722757273e-07, "epoch": 4.156182053599785, "percentage": 83.12, "elapsed_time": "3:40:00", "remaining_time": "0:44:40", "throughput": 8684.69, "total_tokens": 114644072} +{"current_steps": 170130, "total_steps": 204665, "loss": 0.0, "lr": 1.685197278689543e-07, "epoch": 4.156304204431632, "percentage": 83.13, "elapsed_time": "3:40:01", "remaining_time": "0:44:39", "throughput": 8684.72, "total_tokens": 114647528} +{"current_steps": 170135, "total_steps": 204665, "loss": 0.0, "lr": 1.6847235455708408e-07, "epoch": 4.15642635526348, "percentage": 83.13, "elapsed_time": "3:40:01", "remaining_time": "0:44:39", "throughput": 8684.74, "total_tokens": 114650792} +{"current_steps": 170140, "total_steps": 204665, "loss": 0.0, "lr": 1.6842498729230682e-07, "epoch": 4.1565485060953264, "percentage": 83.13, "elapsed_time": "3:40:01", "remaining_time": "0:44:38", "throughput": 8684.75, "total_tokens": 114653928} +{"current_steps": 170145, "total_steps": 204665, "loss": 0.0, "lr": 1.6837762607496654e-07, "epoch": 4.156670656927174, "percentage": 83.13, "elapsed_time": "3:40:02", "remaining_time": "0:44:38", "throughput": 8684.83, "total_tokens": 114658088} +{"current_steps": 170150, "total_steps": 204665, "loss": 0.0, "lr": 1.6833027090540797e-07, "epoch": 4.156792807759021, "percentage": 83.14, "elapsed_time": "3:40:02", "remaining_time": "0:44:38", "throughput": 8684.84, "total_tokens": 114661288} +{"current_steps": 170155, "total_steps": 204665, "loss": 0.0, "lr": 1.6828292178397508e-07, "epoch": 4.156914958590868, "percentage": 83.14, "elapsed_time": "3:40:02", "remaining_time": "0:44:37", "throughput": 8684.86, "total_tokens": 114664552} +{"current_steps": 170160, "total_steps": 204665, "loss": 0.0, "lr": 1.682355787110128e-07, "epoch": 4.157037109422715, "percentage": 83.14, "elapsed_time": "3:40:03", "remaining_time": "0:44:37", "throughput": 8684.95, "total_tokens": 114668840} +{"current_steps": 170165, "total_steps": 204665, "loss": 0.0, "lr": 1.6818824168686486e-07, "epoch": 4.157159260254562, "percentage": 83.14, "elapsed_time": "3:40:03", "remaining_time": "0:44:36", "throughput": 8684.95, "total_tokens": 114671848} +{"current_steps": 170170, "total_steps": 204665, "loss": 0.0, "lr": 1.6814091071187586e-07, "epoch": 4.15728141108641, "percentage": 83.15, "elapsed_time": "3:40:03", "remaining_time": "0:44:36", "throughput": 8684.98, "total_tokens": 114675240} +{"current_steps": 170175, "total_steps": 204665, "loss": 0.0224, "lr": 1.6809358578639e-07, "epoch": 4.157403561918256, "percentage": 83.15, "elapsed_time": "3:40:04", "remaining_time": "0:44:36", "throughput": 8685.03, "total_tokens": 114679016} +{"current_steps": 170180, "total_steps": 204665, "loss": 0.0, "lr": 1.680462669107512e-07, "epoch": 4.157525712750104, "percentage": 83.15, "elapsed_time": "3:40:04", "remaining_time": "0:44:35", "throughput": 8685.06, "total_tokens": 114682344} +{"current_steps": 170185, "total_steps": 204665, "loss": 0.0, "lr": 1.6799895408530385e-07, "epoch": 4.157647863581951, "percentage": 83.15, "elapsed_time": "3:40:04", "remaining_time": "0:44:35", "throughput": 8685.08, "total_tokens": 114685672} +{"current_steps": 170190, "total_steps": 204665, "loss": 0.0, "lr": 1.679516473103917e-07, "epoch": 4.157770014413798, "percentage": 83.16, "elapsed_time": "3:40:05", "remaining_time": "0:44:34", "throughput": 8685.11, "total_tokens": 114689128} +{"current_steps": 170195, "total_steps": 204665, "loss": 0.0, "lr": 1.6790434658635922e-07, "epoch": 4.157892165245645, "percentage": 83.16, "elapsed_time": "3:40:05", "remaining_time": "0:44:34", "throughput": 8685.12, "total_tokens": 114692136} +{"current_steps": 170200, "total_steps": 204665, "loss": 0.0, "lr": 1.6785705191354983e-07, "epoch": 4.158014316077493, "percentage": 83.16, "elapsed_time": "3:40:05", "remaining_time": "0:44:34", "throughput": 8685.15, "total_tokens": 114695592} +{"current_steps": 170205, "total_steps": 204665, "loss": 0.0, "lr": 1.678097632923081e-07, "epoch": 4.1581364669093395, "percentage": 83.16, "elapsed_time": "3:40:06", "remaining_time": "0:44:33", "throughput": 8685.17, "total_tokens": 114698856} +{"current_steps": 170210, "total_steps": 204665, "loss": 0.0, "lr": 1.677624807229776e-07, "epoch": 4.158258617741187, "percentage": 83.17, "elapsed_time": "3:40:06", "remaining_time": "0:44:33", "throughput": 8685.19, "total_tokens": 114702248} +{"current_steps": 170215, "total_steps": 204665, "loss": 0.0, "lr": 1.677152042059019e-07, "epoch": 4.158380768573034, "percentage": 83.17, "elapsed_time": "3:40:06", "remaining_time": "0:44:32", "throughput": 8685.21, "total_tokens": 114705448} +{"current_steps": 170220, "total_steps": 204665, "loss": 0.0, "lr": 1.676679337414254e-07, "epoch": 4.1585029194048815, "percentage": 83.17, "elapsed_time": "3:40:07", "remaining_time": "0:44:32", "throughput": 8685.26, "total_tokens": 114709096} +{"current_steps": 170225, "total_steps": 204665, "loss": 0.0, "lr": 1.6762066932989128e-07, "epoch": 4.158625070236728, "percentage": 83.17, "elapsed_time": "3:40:07", "remaining_time": "0:44:32", "throughput": 8685.28, "total_tokens": 114712488} +{"current_steps": 170230, "total_steps": 204665, "loss": 0.0, "lr": 1.6757341097164345e-07, "epoch": 4.158747221068576, "percentage": 83.17, "elapsed_time": "3:40:08", "remaining_time": "0:44:31", "throughput": 8685.3, "total_tokens": 114715752} +{"current_steps": 170235, "total_steps": 204665, "loss": 0.0, "lr": 1.67526158667026e-07, "epoch": 4.158869371900423, "percentage": 83.18, "elapsed_time": "3:40:08", "remaining_time": "0:44:31", "throughput": 8685.31, "total_tokens": 114718760} +{"current_steps": 170240, "total_steps": 204665, "loss": 0.0, "lr": 1.67478912416382e-07, "epoch": 4.15899152273227, "percentage": 83.18, "elapsed_time": "3:40:08", "remaining_time": "0:44:30", "throughput": 8685.35, "total_tokens": 114722472} +{"current_steps": 170245, "total_steps": 204665, "loss": 0.0, "lr": 1.674316722200555e-07, "epoch": 4.159113673564117, "percentage": 83.18, "elapsed_time": "3:40:09", "remaining_time": "0:44:30", "throughput": 8685.38, "total_tokens": 114725928} +{"current_steps": 170250, "total_steps": 204665, "loss": 0.0, "lr": 1.6738443807838952e-07, "epoch": 4.159235824395964, "percentage": 83.18, "elapsed_time": "3:40:09", "remaining_time": "0:44:30", "throughput": 8685.44, "total_tokens": 114729768} +{"current_steps": 170255, "total_steps": 204665, "loss": 0.0, "lr": 1.6733720999172786e-07, "epoch": 4.159357975227811, "percentage": 83.19, "elapsed_time": "3:40:09", "remaining_time": "0:44:29", "throughput": 8685.47, "total_tokens": 114733224} +{"current_steps": 170260, "total_steps": 204665, "loss": 0.0, "lr": 1.6728998796041428e-07, "epoch": 4.159480126059658, "percentage": 83.19, "elapsed_time": "3:40:10", "remaining_time": "0:44:29", "throughput": 8685.49, "total_tokens": 114736424} +{"current_steps": 170265, "total_steps": 204665, "loss": 0.0, "lr": 1.6724277198479163e-07, "epoch": 4.159602276891506, "percentage": 83.19, "elapsed_time": "3:40:10", "remaining_time": "0:44:29", "throughput": 8685.49, "total_tokens": 114739368} +{"current_steps": 170270, "total_steps": 204665, "loss": 0.0, "lr": 1.6719556206520368e-07, "epoch": 4.1597244277233525, "percentage": 83.19, "elapsed_time": "3:40:10", "remaining_time": "0:44:28", "throughput": 8685.49, "total_tokens": 114742376} +{"current_steps": 170275, "total_steps": 204665, "loss": 0.0, "lr": 1.6714835820199347e-07, "epoch": 4.1598465785552, "percentage": 83.2, "elapsed_time": "3:40:11", "remaining_time": "0:44:28", "throughput": 8685.51, "total_tokens": 114745576} +{"current_steps": 170280, "total_steps": 204665, "loss": 0.0, "lr": 1.671011603955046e-07, "epoch": 4.159968729387047, "percentage": 83.2, "elapsed_time": "3:40:11", "remaining_time": "0:44:27", "throughput": 8685.54, "total_tokens": 114749032} +{"current_steps": 170285, "total_steps": 204665, "loss": 0.0, "lr": 1.670539686460799e-07, "epoch": 4.1600908802188945, "percentage": 83.2, "elapsed_time": "3:40:11", "remaining_time": "0:44:27", "throughput": 8685.57, "total_tokens": 114752360} +{"current_steps": 170290, "total_steps": 204665, "loss": 0.0, "lr": 1.6700678295406267e-07, "epoch": 4.160213031050741, "percentage": 83.2, "elapsed_time": "3:40:12", "remaining_time": "0:44:27", "throughput": 8685.6, "total_tokens": 114755752} +{"current_steps": 170295, "total_steps": 204665, "loss": 0.0, "lr": 1.6695960331979652e-07, "epoch": 4.160335181882589, "percentage": 83.21, "elapsed_time": "3:40:12", "remaining_time": "0:44:26", "throughput": 8685.61, "total_tokens": 114758952} +{"current_steps": 170300, "total_steps": 204665, "loss": 0.0, "lr": 1.6691242974362417e-07, "epoch": 4.160457332714436, "percentage": 83.21, "elapsed_time": "3:40:12", "remaining_time": "0:44:26", "throughput": 8685.63, "total_tokens": 114762152} +{"current_steps": 170305, "total_steps": 204665, "loss": 0.0, "lr": 1.6686526222588847e-07, "epoch": 4.160579483546283, "percentage": 83.21, "elapsed_time": "3:40:13", "remaining_time": "0:44:25", "throughput": 8685.65, "total_tokens": 114765480} +{"current_steps": 170310, "total_steps": 204665, "loss": 0.0, "lr": 1.6681810076693282e-07, "epoch": 4.16070163437813, "percentage": 83.21, "elapsed_time": "3:40:13", "remaining_time": "0:44:25", "throughput": 8685.7, "total_tokens": 114769192} +{"current_steps": 170315, "total_steps": 204665, "loss": 0.0436, "lr": 1.6677094536709991e-07, "epoch": 4.160823785209978, "percentage": 83.22, "elapsed_time": "3:40:13", "remaining_time": "0:44:25", "throughput": 8685.71, "total_tokens": 114772328} +{"current_steps": 170320, "total_steps": 204665, "loss": 0.0, "lr": 1.6672379602673303e-07, "epoch": 4.160945936041824, "percentage": 83.22, "elapsed_time": "3:40:14", "remaining_time": "0:44:24", "throughput": 8685.74, "total_tokens": 114775720} +{"current_steps": 170325, "total_steps": 204665, "loss": 0.0001, "lr": 1.666766527461745e-07, "epoch": 4.161068086873672, "percentage": 83.22, "elapsed_time": "3:40:14", "remaining_time": "0:44:24", "throughput": 8685.76, "total_tokens": 114778856} +{"current_steps": 170330, "total_steps": 204665, "loss": 0.0, "lr": 1.6662951552576787e-07, "epoch": 4.161190237705519, "percentage": 83.22, "elapsed_time": "3:40:14", "remaining_time": "0:44:23", "throughput": 8685.8, "total_tokens": 114782504} +{"current_steps": 170335, "total_steps": 204665, "loss": 0.0, "lr": 1.6658238436585515e-07, "epoch": 4.1613123885373655, "percentage": 83.23, "elapsed_time": "3:40:15", "remaining_time": "0:44:23", "throughput": 8685.82, "total_tokens": 114785704} +{"current_steps": 170340, "total_steps": 204665, "loss": 0.0, "lr": 1.665352592667798e-07, "epoch": 4.161434539369213, "percentage": 83.23, "elapsed_time": "3:40:15", "remaining_time": "0:44:23", "throughput": 8685.82, "total_tokens": 114788648} +{"current_steps": 170345, "total_steps": 204665, "loss": 0.0, "lr": 1.6648814022888403e-07, "epoch": 4.16155669020106, "percentage": 83.23, "elapsed_time": "3:40:15", "remaining_time": "0:44:22", "throughput": 8685.84, "total_tokens": 114791912} +{"current_steps": 170350, "total_steps": 204665, "loss": 0.0, "lr": 1.6644102725251063e-07, "epoch": 4.1616788410329075, "percentage": 83.23, "elapsed_time": "3:40:16", "remaining_time": "0:44:22", "throughput": 8685.87, "total_tokens": 114795368} +{"current_steps": 170355, "total_steps": 204665, "loss": 0.0, "lr": 1.663939203380026e-07, "epoch": 4.161800991864754, "percentage": 83.24, "elapsed_time": "3:40:16", "remaining_time": "0:44:21", "throughput": 8685.9, "total_tokens": 114798696} +{"current_steps": 170360, "total_steps": 204665, "loss": 0.0, "lr": 1.6634681948570183e-07, "epoch": 4.161923142696602, "percentage": 83.24, "elapsed_time": "3:40:17", "remaining_time": "0:44:21", "throughput": 8685.9, "total_tokens": 114801704} +{"current_steps": 170365, "total_steps": 204665, "loss": 0.0, "lr": 1.6629972469595155e-07, "epoch": 4.162045293528449, "percentage": 83.24, "elapsed_time": "3:40:17", "remaining_time": "0:44:21", "throughput": 8685.92, "total_tokens": 114804904} +{"current_steps": 170370, "total_steps": 204665, "loss": 0.0, "lr": 1.6625263596909368e-07, "epoch": 4.162167444360296, "percentage": 83.24, "elapsed_time": "3:40:17", "remaining_time": "0:44:20", "throughput": 8685.94, "total_tokens": 114808168} +{"current_steps": 170375, "total_steps": 204665, "loss": 0.0, "lr": 1.6620555330547104e-07, "epoch": 4.162289595192143, "percentage": 83.25, "elapsed_time": "3:40:18", "remaining_time": "0:44:20", "throughput": 8685.96, "total_tokens": 114811432} +{"current_steps": 170380, "total_steps": 204665, "loss": 0.0, "lr": 1.6615847670542572e-07, "epoch": 4.162411746023991, "percentage": 83.25, "elapsed_time": "3:40:18", "remaining_time": "0:44:19", "throughput": 8686.0, "total_tokens": 114815016} +{"current_steps": 170385, "total_steps": 204665, "loss": 0.0, "lr": 1.661114061693002e-07, "epoch": 4.162533896855837, "percentage": 83.25, "elapsed_time": "3:40:18", "remaining_time": "0:44:19", "throughput": 8686.02, "total_tokens": 114818408} +{"current_steps": 170390, "total_steps": 204665, "loss": 0.0, "lr": 1.660643416974371e-07, "epoch": 4.162656047687685, "percentage": 83.25, "elapsed_time": "3:40:19", "remaining_time": "0:44:19", "throughput": 8686.04, "total_tokens": 114821544} +{"current_steps": 170395, "total_steps": 204665, "loss": 0.0, "lr": 1.6601728329017818e-07, "epoch": 4.162778198519532, "percentage": 83.26, "elapsed_time": "3:40:19", "remaining_time": "0:44:18", "throughput": 8686.08, "total_tokens": 114825192} +{"current_steps": 170400, "total_steps": 204665, "loss": 0.0122, "lr": 1.6597023094786612e-07, "epoch": 4.162900349351379, "percentage": 83.26, "elapsed_time": "3:40:19", "remaining_time": "0:44:18", "throughput": 8686.12, "total_tokens": 114828776} +{"current_steps": 170405, "total_steps": 204665, "loss": 0.0, "lr": 1.6592318467084255e-07, "epoch": 4.163022500183226, "percentage": 83.26, "elapsed_time": "3:40:20", "remaining_time": "0:44:17", "throughput": 8686.13, "total_tokens": 114831848} +{"current_steps": 170410, "total_steps": 204665, "loss": 0.0, "lr": 1.658761444594502e-07, "epoch": 4.163144651015074, "percentage": 83.26, "elapsed_time": "3:40:20", "remaining_time": "0:44:17", "throughput": 8686.17, "total_tokens": 114835304} +{"current_steps": 170415, "total_steps": 204665, "loss": 0.0, "lr": 1.658291103140309e-07, "epoch": 4.1632668018469206, "percentage": 83.27, "elapsed_time": "3:40:20", "remaining_time": "0:44:17", "throughput": 8686.19, "total_tokens": 114838696} +{"current_steps": 170420, "total_steps": 204665, "loss": 0.0, "lr": 1.657820822349264e-07, "epoch": 4.163388952678767, "percentage": 83.27, "elapsed_time": "3:40:21", "remaining_time": "0:44:16", "throughput": 8686.2, "total_tokens": 114841768} +{"current_steps": 170425, "total_steps": 204665, "loss": 0.0794, "lr": 1.657350602224793e-07, "epoch": 4.163511103510615, "percentage": 83.27, "elapsed_time": "3:40:21", "remaining_time": "0:44:16", "throughput": 8686.23, "total_tokens": 114845032} +{"current_steps": 170430, "total_steps": 204665, "loss": 0.0, "lr": 1.6568804427703088e-07, "epoch": 4.163633254342462, "percentage": 83.27, "elapsed_time": "3:40:21", "remaining_time": "0:44:15", "throughput": 8686.25, "total_tokens": 114848424} +{"current_steps": 170435, "total_steps": 204665, "loss": 0.0, "lr": 1.6564103439892373e-07, "epoch": 4.163755405174309, "percentage": 83.28, "elapsed_time": "3:40:22", "remaining_time": "0:44:15", "throughput": 8686.28, "total_tokens": 114851816} +{"current_steps": 170440, "total_steps": 204665, "loss": 0.0, "lr": 1.6559403058849909e-07, "epoch": 4.163877556006156, "percentage": 83.28, "elapsed_time": "3:40:22", "remaining_time": "0:44:15", "throughput": 8686.32, "total_tokens": 114855464} +{"current_steps": 170445, "total_steps": 204665, "loss": 0.0, "lr": 1.6554703284609918e-07, "epoch": 4.163999706838004, "percentage": 83.28, "elapsed_time": "3:40:22", "remaining_time": "0:44:14", "throughput": 8686.35, "total_tokens": 114858856} +{"current_steps": 170450, "total_steps": 204665, "loss": 0.0, "lr": 1.6550004117206583e-07, "epoch": 4.16412185766985, "percentage": 83.28, "elapsed_time": "3:40:23", "remaining_time": "0:44:14", "throughput": 8686.39, "total_tokens": 114862376} +{"current_steps": 170455, "total_steps": 204665, "loss": 0.0, "lr": 1.6545305556674038e-07, "epoch": 4.164244008501698, "percentage": 83.28, "elapsed_time": "3:40:23", "remaining_time": "0:44:13", "throughput": 8686.38, "total_tokens": 114865256} +{"current_steps": 170460, "total_steps": 204665, "loss": 0.0, "lr": 1.6540607603046508e-07, "epoch": 4.164366159333545, "percentage": 83.29, "elapsed_time": "3:40:23", "remaining_time": "0:44:13", "throughput": 8686.41, "total_tokens": 114868520} +{"current_steps": 170465, "total_steps": 204665, "loss": 0.0, "lr": 1.653591025635811e-07, "epoch": 4.1644883101653924, "percentage": 83.29, "elapsed_time": "3:40:24", "remaining_time": "0:44:13", "throughput": 8686.41, "total_tokens": 114871592} +{"current_steps": 170470, "total_steps": 204665, "loss": 0.0, "lr": 1.6531213516643028e-07, "epoch": 4.164610460997239, "percentage": 83.29, "elapsed_time": "3:40:24", "remaining_time": "0:44:12", "throughput": 8686.43, "total_tokens": 114874856} +{"current_steps": 170475, "total_steps": 204665, "loss": 0.0, "lr": 1.6526517383935402e-07, "epoch": 4.164732611829087, "percentage": 83.29, "elapsed_time": "3:40:24", "remaining_time": "0:44:12", "throughput": 8686.46, "total_tokens": 114878248} +{"current_steps": 170480, "total_steps": 204665, "loss": 0.0, "lr": 1.652182185826939e-07, "epoch": 4.164854762660934, "percentage": 83.3, "elapsed_time": "3:40:25", "remaining_time": "0:44:11", "throughput": 8686.48, "total_tokens": 114881576} +{"current_steps": 170485, "total_steps": 204665, "loss": 0.0, "lr": 1.651712693967916e-07, "epoch": 4.164976913492781, "percentage": 83.3, "elapsed_time": "3:40:25", "remaining_time": "0:44:11", "throughput": 8686.54, "total_tokens": 114885352} +{"current_steps": 170490, "total_steps": 204665, "loss": 0.0, "lr": 1.6512432628198823e-07, "epoch": 4.165099064324628, "percentage": 83.3, "elapsed_time": "3:40:26", "remaining_time": "0:44:11", "throughput": 8686.57, "total_tokens": 114888872} +{"current_steps": 170495, "total_steps": 204665, "loss": 0.0, "lr": 1.6507738923862546e-07, "epoch": 4.165221215156476, "percentage": 83.3, "elapsed_time": "3:40:26", "remaining_time": "0:44:10", "throughput": 8686.59, "total_tokens": 114892072} +{"current_steps": 170500, "total_steps": 204665, "loss": 0.0, "lr": 1.6503045826704433e-07, "epoch": 4.165343365988322, "percentage": 83.31, "elapsed_time": "3:40:26", "remaining_time": "0:44:10", "throughput": 8686.62, "total_tokens": 114895528} +{"current_steps": 170505, "total_steps": 204665, "loss": 0.0, "lr": 1.6498353336758653e-07, "epoch": 4.16546551682017, "percentage": 83.31, "elapsed_time": "3:40:27", "remaining_time": "0:44:09", "throughput": 8686.68, "total_tokens": 114899432} +{"current_steps": 170510, "total_steps": 204665, "loss": 0.0, "lr": 1.649366145405927e-07, "epoch": 4.165587667652017, "percentage": 83.31, "elapsed_time": "3:40:27", "remaining_time": "0:44:09", "throughput": 8686.71, "total_tokens": 114902888} +{"current_steps": 170515, "total_steps": 204665, "loss": 0.0, "lr": 1.6488970178640483e-07, "epoch": 4.1657098184838635, "percentage": 83.31, "elapsed_time": "3:40:27", "remaining_time": "0:44:09", "throughput": 8686.73, "total_tokens": 114906216} +{"current_steps": 170520, "total_steps": 204665, "loss": 0.0, "lr": 1.6484279510536358e-07, "epoch": 4.165831969315711, "percentage": 83.32, "elapsed_time": "3:40:28", "remaining_time": "0:44:08", "throughput": 8686.75, "total_tokens": 114909480} +{"current_steps": 170525, "total_steps": 204665, "loss": 0.0, "lr": 1.6479589449780984e-07, "epoch": 4.165954120147558, "percentage": 83.32, "elapsed_time": "3:40:28", "remaining_time": "0:44:08", "throughput": 8686.77, "total_tokens": 114912680} +{"current_steps": 170530, "total_steps": 204665, "loss": 0.0, "lr": 1.6474899996408532e-07, "epoch": 4.1660762709794055, "percentage": 83.32, "elapsed_time": "3:40:28", "remaining_time": "0:44:08", "throughput": 8686.78, "total_tokens": 114915752} +{"current_steps": 170535, "total_steps": 204665, "loss": 0.0, "lr": 1.647021115045305e-07, "epoch": 4.166198421811252, "percentage": 83.32, "elapsed_time": "3:40:29", "remaining_time": "0:44:07", "throughput": 8686.85, "total_tokens": 114919784} +{"current_steps": 170540, "total_steps": 204665, "loss": 0.0, "lr": 1.646552291194866e-07, "epoch": 4.1663205726431, "percentage": 83.33, "elapsed_time": "3:40:29", "remaining_time": "0:44:07", "throughput": 8686.89, "total_tokens": 114923496} +{"current_steps": 170545, "total_steps": 204665, "loss": 0.0, "lr": 1.6460835280929474e-07, "epoch": 4.166442723474947, "percentage": 83.33, "elapsed_time": "3:40:29", "remaining_time": "0:44:06", "throughput": 8686.92, "total_tokens": 114926888} +{"current_steps": 170550, "total_steps": 204665, "loss": 0.0, "lr": 1.6456148257429537e-07, "epoch": 4.166564874306794, "percentage": 83.33, "elapsed_time": "3:40:30", "remaining_time": "0:44:06", "throughput": 8686.93, "total_tokens": 114930024} +{"current_steps": 170555, "total_steps": 204665, "loss": 0.0002, "lr": 1.6451461841482994e-07, "epoch": 4.166687025138641, "percentage": 83.33, "elapsed_time": "3:40:30", "remaining_time": "0:44:06", "throughput": 8686.95, "total_tokens": 114933352} +{"current_steps": 170560, "total_steps": 204665, "loss": 0.0, "lr": 1.6446776033123866e-07, "epoch": 4.166809175970489, "percentage": 83.34, "elapsed_time": "3:40:30", "remaining_time": "0:44:05", "throughput": 8686.96, "total_tokens": 114936424} +{"current_steps": 170565, "total_steps": 204665, "loss": 0.0, "lr": 1.6442090832386246e-07, "epoch": 4.166931326802335, "percentage": 83.34, "elapsed_time": "3:40:31", "remaining_time": "0:44:05", "throughput": 8686.98, "total_tokens": 114939688} +{"current_steps": 170570, "total_steps": 204665, "loss": 0.0, "lr": 1.6437406239304253e-07, "epoch": 4.167053477634183, "percentage": 83.34, "elapsed_time": "3:40:31", "remaining_time": "0:44:04", "throughput": 8686.99, "total_tokens": 114942824} +{"current_steps": 170575, "total_steps": 204665, "loss": 0.0, "lr": 1.643272225391188e-07, "epoch": 4.16717562846603, "percentage": 83.34, "elapsed_time": "3:40:31", "remaining_time": "0:44:04", "throughput": 8687.04, "total_tokens": 114946472} +{"current_steps": 170580, "total_steps": 204665, "loss": 0.0, "lr": 1.6428038876243266e-07, "epoch": 4.167297779297877, "percentage": 83.35, "elapsed_time": "3:40:32", "remaining_time": "0:44:04", "throughput": 8687.06, "total_tokens": 114949800} +{"current_steps": 170585, "total_steps": 204665, "loss": 0.0, "lr": 1.6423356106332398e-07, "epoch": 4.167419930129724, "percentage": 83.35, "elapsed_time": "3:40:32", "remaining_time": "0:44:03", "throughput": 8687.08, "total_tokens": 114953064} +{"current_steps": 170590, "total_steps": 204665, "loss": 0.0, "lr": 1.641867394421339e-07, "epoch": 4.167542080961572, "percentage": 83.35, "elapsed_time": "3:40:33", "remaining_time": "0:44:03", "throughput": 8687.13, "total_tokens": 114956840} +{"current_steps": 170595, "total_steps": 204665, "loss": 0.0, "lr": 1.641399238992024e-07, "epoch": 4.1676642317934185, "percentage": 83.35, "elapsed_time": "3:40:33", "remaining_time": "0:44:02", "throughput": 8687.14, "total_tokens": 114959848} +{"current_steps": 170600, "total_steps": 204665, "loss": 0.0, "lr": 1.640931144348703e-07, "epoch": 4.167786382625266, "percentage": 83.36, "elapsed_time": "3:40:33", "remaining_time": "0:44:02", "throughput": 8687.16, "total_tokens": 114963176} +{"current_steps": 170605, "total_steps": 204665, "loss": 0.0, "lr": 1.6404631104947798e-07, "epoch": 4.167908533457113, "percentage": 83.36, "elapsed_time": "3:40:34", "remaining_time": "0:44:02", "throughput": 8687.21, "total_tokens": 114966888} +{"current_steps": 170610, "total_steps": 204665, "loss": 0.0, "lr": 1.6399951374336585e-07, "epoch": 4.16803068428896, "percentage": 83.36, "elapsed_time": "3:40:34", "remaining_time": "0:44:01", "throughput": 8687.24, "total_tokens": 114970344} +{"current_steps": 170615, "total_steps": 204665, "loss": 0.0, "lr": 1.6395272251687386e-07, "epoch": 4.168152835120807, "percentage": 83.36, "elapsed_time": "3:40:34", "remaining_time": "0:44:01", "throughput": 8687.27, "total_tokens": 114973864} +{"current_steps": 170620, "total_steps": 204665, "loss": 0.0, "lr": 1.6390593737034276e-07, "epoch": 4.168274985952654, "percentage": 83.37, "elapsed_time": "3:40:35", "remaining_time": "0:44:00", "throughput": 8687.27, "total_tokens": 114976744} +{"current_steps": 170625, "total_steps": 204665, "loss": 0.0, "lr": 1.6385915830411223e-07, "epoch": 4.168397136784502, "percentage": 83.37, "elapsed_time": "3:40:35", "remaining_time": "0:44:00", "throughput": 8687.29, "total_tokens": 114979944} +{"current_steps": 170630, "total_steps": 204665, "loss": 0.0, "lr": 1.6381238531852314e-07, "epoch": 4.168519287616348, "percentage": 83.37, "elapsed_time": "3:40:35", "remaining_time": "0:44:00", "throughput": 8687.3, "total_tokens": 114983144} +{"current_steps": 170635, "total_steps": 204665, "loss": 0.0, "lr": 1.6376561841391501e-07, "epoch": 4.168641438448196, "percentage": 83.37, "elapsed_time": "3:40:36", "remaining_time": "0:43:59", "throughput": 8687.33, "total_tokens": 114986472} +{"current_steps": 170640, "total_steps": 204665, "loss": 0.0, "lr": 1.6371885759062853e-07, "epoch": 4.168763589280043, "percentage": 83.38, "elapsed_time": "3:40:36", "remaining_time": "0:43:59", "throughput": 8687.34, "total_tokens": 114989608} +{"current_steps": 170645, "total_steps": 204665, "loss": 0.0, "lr": 1.6367210284900324e-07, "epoch": 4.16888574011189, "percentage": 83.38, "elapsed_time": "3:40:36", "remaining_time": "0:43:58", "throughput": 8687.35, "total_tokens": 114992808} +{"current_steps": 170650, "total_steps": 204665, "loss": 0.0, "lr": 1.636253541893795e-07, "epoch": 4.169007890943737, "percentage": 83.38, "elapsed_time": "3:40:37", "remaining_time": "0:43:58", "throughput": 8687.36, "total_tokens": 114995880} +{"current_steps": 170655, "total_steps": 204665, "loss": 0.0, "lr": 1.6357861161209695e-07, "epoch": 4.169130041775585, "percentage": 83.38, "elapsed_time": "3:40:37", "remaining_time": "0:43:58", "throughput": 8687.39, "total_tokens": 114999272} +{"current_steps": 170660, "total_steps": 204665, "loss": 0.0001, "lr": 1.6353187511749565e-07, "epoch": 4.1692521926074315, "percentage": 83.39, "elapsed_time": "3:40:37", "remaining_time": "0:43:57", "throughput": 8687.42, "total_tokens": 115002664} +{"current_steps": 170665, "total_steps": 204665, "loss": 0.0, "lr": 1.6348514470591578e-07, "epoch": 4.169374343439279, "percentage": 83.39, "elapsed_time": "3:40:38", "remaining_time": "0:43:57", "throughput": 8687.43, "total_tokens": 115005800} +{"current_steps": 170670, "total_steps": 204665, "loss": 0.0, "lr": 1.6343842037769673e-07, "epoch": 4.169496494271126, "percentage": 83.39, "elapsed_time": "3:40:38", "remaining_time": "0:43:56", "throughput": 8687.46, "total_tokens": 115009256} +{"current_steps": 170675, "total_steps": 204665, "loss": 0.0, "lr": 1.6339170213317877e-07, "epoch": 4.1696186451029735, "percentage": 83.39, "elapsed_time": "3:40:38", "remaining_time": "0:43:56", "throughput": 8687.5, "total_tokens": 115012776} +{"current_steps": 170680, "total_steps": 204665, "loss": 0.0, "lr": 1.6334498997270108e-07, "epoch": 4.16974079593482, "percentage": 83.39, "elapsed_time": "3:40:39", "remaining_time": "0:43:56", "throughput": 8687.51, "total_tokens": 115015912} +{"current_steps": 170685, "total_steps": 204665, "loss": 0.0, "lr": 1.6329828389660394e-07, "epoch": 4.169862946766668, "percentage": 83.4, "elapsed_time": "3:40:39", "remaining_time": "0:43:55", "throughput": 8687.55, "total_tokens": 115019432} +{"current_steps": 170690, "total_steps": 204665, "loss": 0.0, "lr": 1.6325158390522642e-07, "epoch": 4.169985097598515, "percentage": 83.4, "elapsed_time": "3:40:39", "remaining_time": "0:43:55", "throughput": 8687.56, "total_tokens": 115022568} +{"current_steps": 170695, "total_steps": 204665, "loss": 0.0, "lr": 1.6320488999890847e-07, "epoch": 4.170107248430361, "percentage": 83.4, "elapsed_time": "3:40:40", "remaining_time": "0:43:54", "throughput": 8687.59, "total_tokens": 115026088} +{"current_steps": 170700, "total_steps": 204665, "loss": 0.0001, "lr": 1.6315820217798992e-07, "epoch": 4.170229399262209, "percentage": 83.4, "elapsed_time": "3:40:40", "remaining_time": "0:43:54", "throughput": 8687.62, "total_tokens": 115029480} +{"current_steps": 170705, "total_steps": 204665, "loss": 0.0, "lr": 1.6311152044280973e-07, "epoch": 4.170351550094056, "percentage": 83.41, "elapsed_time": "3:40:40", "remaining_time": "0:43:54", "throughput": 8687.66, "total_tokens": 115033128} +{"current_steps": 170710, "total_steps": 204665, "loss": 0.0, "lr": 1.6306484479370786e-07, "epoch": 4.170473700925903, "percentage": 83.41, "elapsed_time": "3:40:41", "remaining_time": "0:43:53", "throughput": 8687.68, "total_tokens": 115036392} +{"current_steps": 170715, "total_steps": 204665, "loss": 0.0, "lr": 1.6301817523102335e-07, "epoch": 4.17059585175775, "percentage": 83.41, "elapsed_time": "3:40:41", "remaining_time": "0:43:53", "throughput": 8687.73, "total_tokens": 115040104} +{"current_steps": 170720, "total_steps": 204665, "loss": 0.0, "lr": 1.6297151175509606e-07, "epoch": 4.170718002589598, "percentage": 83.41, "elapsed_time": "3:40:42", "remaining_time": "0:43:52", "throughput": 8687.76, "total_tokens": 115043560} +{"current_steps": 170725, "total_steps": 204665, "loss": 0.0, "lr": 1.6292485436626502e-07, "epoch": 4.1708401534214445, "percentage": 83.42, "elapsed_time": "3:40:42", "remaining_time": "0:43:52", "throughput": 8687.79, "total_tokens": 115046888} +{"current_steps": 170730, "total_steps": 204665, "loss": 0.0, "lr": 1.6287820306486944e-07, "epoch": 4.170962304253292, "percentage": 83.42, "elapsed_time": "3:40:42", "remaining_time": "0:43:52", "throughput": 8687.8, "total_tokens": 115050024} +{"current_steps": 170735, "total_steps": 204665, "loss": 0.0, "lr": 1.628315578512488e-07, "epoch": 4.171084455085139, "percentage": 83.42, "elapsed_time": "3:40:43", "remaining_time": "0:43:51", "throughput": 8687.81, "total_tokens": 115053224} +{"current_steps": 170740, "total_steps": 204665, "loss": 0.0, "lr": 1.6278491872574218e-07, "epoch": 4.1712066059169866, "percentage": 83.42, "elapsed_time": "3:40:43", "remaining_time": "0:43:51", "throughput": 8687.83, "total_tokens": 115056424} +{"current_steps": 170745, "total_steps": 204665, "loss": 0.0, "lr": 1.6273828568868886e-07, "epoch": 4.171328756748833, "percentage": 83.43, "elapsed_time": "3:40:43", "remaining_time": "0:43:50", "throughput": 8687.84, "total_tokens": 115059624} +{"current_steps": 170750, "total_steps": 204665, "loss": 0.0, "lr": 1.6269165874042788e-07, "epoch": 4.171450907580681, "percentage": 83.43, "elapsed_time": "3:40:44", "remaining_time": "0:43:50", "throughput": 8687.86, "total_tokens": 115062888} +{"current_steps": 170755, "total_steps": 204665, "loss": 0.0, "lr": 1.6264503788129825e-07, "epoch": 4.171573058412528, "percentage": 83.43, "elapsed_time": "3:40:44", "remaining_time": "0:43:50", "throughput": 8687.88, "total_tokens": 115066152} +{"current_steps": 170760, "total_steps": 204665, "loss": 0.0001, "lr": 1.625984231116394e-07, "epoch": 4.171695209244375, "percentage": 83.43, "elapsed_time": "3:40:44", "remaining_time": "0:43:49", "throughput": 8687.93, "total_tokens": 115069864} +{"current_steps": 170765, "total_steps": 204665, "loss": 0.0, "lr": 1.625518144317898e-07, "epoch": 4.171817360076222, "percentage": 83.44, "elapsed_time": "3:40:45", "remaining_time": "0:43:49", "throughput": 8687.95, "total_tokens": 115073128} +{"current_steps": 170770, "total_steps": 204665, "loss": 0.0, "lr": 1.6250521184208888e-07, "epoch": 4.17193951090807, "percentage": 83.44, "elapsed_time": "3:40:45", "remaining_time": "0:43:49", "throughput": 8687.96, "total_tokens": 115076264} +{"current_steps": 170775, "total_steps": 204665, "loss": 0.0397, "lr": 1.624586153428751e-07, "epoch": 4.172061661739916, "percentage": 83.44, "elapsed_time": "3:40:45", "remaining_time": "0:43:48", "throughput": 8688.01, "total_tokens": 115079976} +{"current_steps": 170780, "total_steps": 204665, "loss": 0.0, "lr": 1.624120249344878e-07, "epoch": 4.172183812571763, "percentage": 83.44, "elapsed_time": "3:40:46", "remaining_time": "0:43:48", "throughput": 8688.05, "total_tokens": 115083624} +{"current_steps": 170785, "total_steps": 204665, "loss": 0.0, "lr": 1.623654406172652e-07, "epoch": 4.172305963403611, "percentage": 83.45, "elapsed_time": "3:40:46", "remaining_time": "0:43:47", "throughput": 8688.07, "total_tokens": 115086824} +{"current_steps": 170790, "total_steps": 204665, "loss": 0.0, "lr": 1.6231886239154647e-07, "epoch": 4.172428114235458, "percentage": 83.45, "elapsed_time": "3:40:46", "remaining_time": "0:43:47", "throughput": 8688.11, "total_tokens": 115090536} +{"current_steps": 170795, "total_steps": 204665, "loss": 0.0, "lr": 1.6227229025767052e-07, "epoch": 4.172550265067305, "percentage": 83.45, "elapsed_time": "3:40:47", "remaining_time": "0:43:47", "throughput": 8688.11, "total_tokens": 115093480} +{"current_steps": 170800, "total_steps": 204665, "loss": 0.0, "lr": 1.6222572421597558e-07, "epoch": 4.172672415899152, "percentage": 83.45, "elapsed_time": "3:40:47", "remaining_time": "0:43:46", "throughput": 8688.13, "total_tokens": 115096680} +{"current_steps": 170805, "total_steps": 204665, "loss": 0.0, "lr": 1.621791642668008e-07, "epoch": 4.172794566731, "percentage": 83.46, "elapsed_time": "3:40:47", "remaining_time": "0:43:46", "throughput": 8688.19, "total_tokens": 115100584} +{"current_steps": 170810, "total_steps": 204665, "loss": 0.0, "lr": 1.621326104104842e-07, "epoch": 4.172916717562846, "percentage": 83.46, "elapsed_time": "3:40:48", "remaining_time": "0:43:45", "throughput": 8688.22, "total_tokens": 115104104} +{"current_steps": 170815, "total_steps": 204665, "loss": 0.0, "lr": 1.620860626473648e-07, "epoch": 4.173038868394694, "percentage": 83.46, "elapsed_time": "3:40:48", "remaining_time": "0:43:45", "throughput": 8688.24, "total_tokens": 115107304} +{"current_steps": 170820, "total_steps": 204665, "loss": 0.0, "lr": 1.6203952097778073e-07, "epoch": 4.173161019226541, "percentage": 83.46, "elapsed_time": "3:40:48", "remaining_time": "0:43:45", "throughput": 8688.27, "total_tokens": 115110696} +{"current_steps": 170825, "total_steps": 204665, "loss": 0.0, "lr": 1.6199298540207086e-07, "epoch": 4.173283170058388, "percentage": 83.47, "elapsed_time": "3:40:49", "remaining_time": "0:43:44", "throughput": 8688.28, "total_tokens": 115113896} +{"current_steps": 170830, "total_steps": 204665, "loss": 0.0, "lr": 1.6194645592057343e-07, "epoch": 4.173405320890235, "percentage": 83.47, "elapsed_time": "3:40:49", "remaining_time": "0:43:44", "throughput": 8688.31, "total_tokens": 115117288} +{"current_steps": 170835, "total_steps": 204665, "loss": 0.0, "lr": 1.6189993253362655e-07, "epoch": 4.173527471722083, "percentage": 83.47, "elapsed_time": "3:40:50", "remaining_time": "0:43:43", "throughput": 8688.33, "total_tokens": 115120488} +{"current_steps": 170840, "total_steps": 204665, "loss": 0.0, "lr": 1.6185341524156904e-07, "epoch": 4.1736496225539295, "percentage": 83.47, "elapsed_time": "3:40:50", "remaining_time": "0:43:43", "throughput": 8688.35, "total_tokens": 115123880} +{"current_steps": 170845, "total_steps": 204665, "loss": 0.0, "lr": 1.6180690404473862e-07, "epoch": 4.173771773385777, "percentage": 83.48, "elapsed_time": "3:40:50", "remaining_time": "0:43:43", "throughput": 8688.37, "total_tokens": 115127016} +{"current_steps": 170850, "total_steps": 204665, "loss": 0.0, "lr": 1.6176039894347382e-07, "epoch": 4.173893924217624, "percentage": 83.48, "elapsed_time": "3:40:51", "remaining_time": "0:43:42", "throughput": 8688.39, "total_tokens": 115130408} +{"current_steps": 170855, "total_steps": 204665, "loss": 0.0, "lr": 1.6171389993811323e-07, "epoch": 4.1740160750494715, "percentage": 83.48, "elapsed_time": "3:40:51", "remaining_time": "0:43:42", "throughput": 8688.43, "total_tokens": 115133928} +{"current_steps": 170860, "total_steps": 204665, "loss": 0.0, "lr": 1.616674070289943e-07, "epoch": 4.174138225881318, "percentage": 83.48, "elapsed_time": "3:40:51", "remaining_time": "0:43:41", "throughput": 8688.48, "total_tokens": 115137704} +{"current_steps": 170865, "total_steps": 204665, "loss": 0.0, "lr": 1.6162092021645569e-07, "epoch": 4.174260376713166, "percentage": 83.49, "elapsed_time": "3:40:52", "remaining_time": "0:43:41", "throughput": 8688.52, "total_tokens": 115141288} +{"current_steps": 170870, "total_steps": 204665, "loss": 0.0, "lr": 1.6157443950083504e-07, "epoch": 4.174382527545013, "percentage": 83.49, "elapsed_time": "3:40:52", "remaining_time": "0:43:41", "throughput": 8688.54, "total_tokens": 115144616} +{"current_steps": 170875, "total_steps": 204665, "loss": 0.0, "lr": 1.6152796488247078e-07, "epoch": 4.174504678376859, "percentage": 83.49, "elapsed_time": "3:40:52", "remaining_time": "0:43:40", "throughput": 8688.58, "total_tokens": 115148072} +{"current_steps": 170880, "total_steps": 204665, "loss": 0.0, "lr": 1.614814963617005e-07, "epoch": 4.174626829208707, "percentage": 83.49, "elapsed_time": "3:40:53", "remaining_time": "0:43:40", "throughput": 8688.61, "total_tokens": 115151464} +{"current_steps": 170885, "total_steps": 204665, "loss": 0.0, "lr": 1.6143503393886225e-07, "epoch": 4.174748980040554, "percentage": 83.49, "elapsed_time": "3:40:53", "remaining_time": "0:43:39", "throughput": 8688.64, "total_tokens": 115154920} +{"current_steps": 170890, "total_steps": 204665, "loss": 0.0, "lr": 1.6138857761429436e-07, "epoch": 4.174871130872401, "percentage": 83.5, "elapsed_time": "3:40:53", "remaining_time": "0:43:39", "throughput": 8688.66, "total_tokens": 115158184} +{"current_steps": 170895, "total_steps": 204665, "loss": 0.0, "lr": 1.6134212738833385e-07, "epoch": 4.174993281704248, "percentage": 83.5, "elapsed_time": "3:40:54", "remaining_time": "0:43:39", "throughput": 8688.67, "total_tokens": 115161256} +{"current_steps": 170900, "total_steps": 204665, "loss": 0.0, "lr": 1.6129568326131936e-07, "epoch": 4.175115432536096, "percentage": 83.5, "elapsed_time": "3:40:54", "remaining_time": "0:43:38", "throughput": 8688.69, "total_tokens": 115164520} +{"current_steps": 170905, "total_steps": 204665, "loss": 0.0002, "lr": 1.6124924523358795e-07, "epoch": 4.1752375833679425, "percentage": 83.5, "elapsed_time": "3:40:54", "remaining_time": "0:43:38", "throughput": 8688.71, "total_tokens": 115167848} +{"current_steps": 170910, "total_steps": 204665, "loss": 0.0, "lr": 1.612028133054776e-07, "epoch": 4.17535973419979, "percentage": 83.51, "elapsed_time": "3:40:55", "remaining_time": "0:43:37", "throughput": 8688.76, "total_tokens": 115171624} +{"current_steps": 170915, "total_steps": 204665, "loss": 0.0, "lr": 1.611563874773263e-07, "epoch": 4.175481885031637, "percentage": 83.51, "elapsed_time": "3:40:55", "remaining_time": "0:43:37", "throughput": 8688.79, "total_tokens": 115175016} +{"current_steps": 170920, "total_steps": 204665, "loss": 0.0, "lr": 1.6110996774947127e-07, "epoch": 4.1756040358634845, "percentage": 83.51, "elapsed_time": "3:40:55", "remaining_time": "0:43:37", "throughput": 8688.83, "total_tokens": 115178600} +{"current_steps": 170925, "total_steps": 204665, "loss": 0.0, "lr": 1.6106355412225003e-07, "epoch": 4.175726186695331, "percentage": 83.51, "elapsed_time": "3:40:56", "remaining_time": "0:43:36", "throughput": 8688.86, "total_tokens": 115181928} +{"current_steps": 170930, "total_steps": 204665, "loss": 0.0, "lr": 1.610171465960005e-07, "epoch": 4.175848337527179, "percentage": 83.52, "elapsed_time": "3:40:56", "remaining_time": "0:43:36", "throughput": 8688.88, "total_tokens": 115185192} +{"current_steps": 170935, "total_steps": 204665, "loss": 0.0307, "lr": 1.6097074517105967e-07, "epoch": 4.175970488359026, "percentage": 83.52, "elapsed_time": "3:40:56", "remaining_time": "0:43:35", "throughput": 8688.93, "total_tokens": 115188904} +{"current_steps": 170940, "total_steps": 204665, "loss": 0.0, "lr": 1.609243498477656e-07, "epoch": 4.176092639190873, "percentage": 83.52, "elapsed_time": "3:40:57", "remaining_time": "0:43:35", "throughput": 8688.94, "total_tokens": 115192040} +{"current_steps": 170945, "total_steps": 204665, "loss": 0.0, "lr": 1.6087796062645499e-07, "epoch": 4.17621479002272, "percentage": 83.52, "elapsed_time": "3:40:57", "remaining_time": "0:43:35", "throughput": 8688.95, "total_tokens": 115195240} +{"current_steps": 170950, "total_steps": 204665, "loss": 0.0, "lr": 1.608315775074658e-07, "epoch": 4.176336940854568, "percentage": 83.53, "elapsed_time": "3:40:58", "remaining_time": "0:43:34", "throughput": 8688.99, "total_tokens": 115198760} +{"current_steps": 170955, "total_steps": 204665, "loss": 0.0, "lr": 1.6078520049113485e-07, "epoch": 4.176459091686414, "percentage": 83.53, "elapsed_time": "3:40:58", "remaining_time": "0:43:34", "throughput": 8689.01, "total_tokens": 115201960} +{"current_steps": 170960, "total_steps": 204665, "loss": 0.0, "lr": 1.6073882957779993e-07, "epoch": 4.176581242518261, "percentage": 83.53, "elapsed_time": "3:40:58", "remaining_time": "0:43:33", "throughput": 8689.02, "total_tokens": 115205096} +{"current_steps": 170965, "total_steps": 204665, "loss": 0.0, "lr": 1.6069246476779774e-07, "epoch": 4.176703393350109, "percentage": 83.53, "elapsed_time": "3:40:59", "remaining_time": "0:43:33", "throughput": 8689.07, "total_tokens": 115208808} +{"current_steps": 170970, "total_steps": 204665, "loss": 0.0, "lr": 1.6064610606146567e-07, "epoch": 4.1768255441819555, "percentage": 83.54, "elapsed_time": "3:40:59", "remaining_time": "0:43:33", "throughput": 8689.07, "total_tokens": 115211816} +{"current_steps": 170975, "total_steps": 204665, "loss": 0.0, "lr": 1.60599753459141e-07, "epoch": 4.176947695013803, "percentage": 83.54, "elapsed_time": "3:40:59", "remaining_time": "0:43:32", "throughput": 8689.14, "total_tokens": 115215912} +{"current_steps": 170980, "total_steps": 204665, "loss": 0.0, "lr": 1.605534069611606e-07, "epoch": 4.17706984584565, "percentage": 83.54, "elapsed_time": "3:41:00", "remaining_time": "0:43:32", "throughput": 8689.16, "total_tokens": 115219240} +{"current_steps": 170985, "total_steps": 204665, "loss": 0.0, "lr": 1.6050706656786184e-07, "epoch": 4.1771919966774975, "percentage": 83.54, "elapsed_time": "3:41:00", "remaining_time": "0:43:31", "throughput": 8689.19, "total_tokens": 115222632} +{"current_steps": 170990, "total_steps": 204665, "loss": 0.0, "lr": 1.6046073227958123e-07, "epoch": 4.177314147509344, "percentage": 83.55, "elapsed_time": "3:41:00", "remaining_time": "0:43:31", "throughput": 8689.22, "total_tokens": 115226088} +{"current_steps": 170995, "total_steps": 204665, "loss": 0.0, "lr": 1.6041440409665618e-07, "epoch": 4.177436298341192, "percentage": 83.55, "elapsed_time": "3:41:01", "remaining_time": "0:43:31", "throughput": 8689.27, "total_tokens": 115229736} +{"current_steps": 171000, "total_steps": 204665, "loss": 0.0, "lr": 1.603680820194232e-07, "epoch": 4.177558449173039, "percentage": 83.55, "elapsed_time": "3:41:01", "remaining_time": "0:43:30", "throughput": 8689.29, "total_tokens": 115233128} +{"current_steps": 171005, "total_steps": 204665, "loss": 0.0, "lr": 1.6032176604821933e-07, "epoch": 4.177680600004886, "percentage": 83.55, "elapsed_time": "3:41:01", "remaining_time": "0:43:30", "throughput": 8689.32, "total_tokens": 115236584} +{"current_steps": 171010, "total_steps": 204665, "loss": 0.0, "lr": 1.6027545618338166e-07, "epoch": 4.177802750836733, "percentage": 83.56, "elapsed_time": "3:41:02", "remaining_time": "0:43:30", "throughput": 8689.37, "total_tokens": 115240296} +{"current_steps": 171015, "total_steps": 204665, "loss": 0.0, "lr": 1.6022915242524659e-07, "epoch": 4.177924901668581, "percentage": 83.56, "elapsed_time": "3:41:02", "remaining_time": "0:43:29", "throughput": 8689.46, "total_tokens": 115244648} +{"current_steps": 171020, "total_steps": 204665, "loss": 0.0, "lr": 1.6018285477415116e-07, "epoch": 4.178047052500427, "percentage": 83.56, "elapsed_time": "3:41:02", "remaining_time": "0:43:29", "throughput": 8689.52, "total_tokens": 115248488} +{"current_steps": 171025, "total_steps": 204665, "loss": 0.0, "lr": 1.6013656323043166e-07, "epoch": 4.178169203332275, "percentage": 83.56, "elapsed_time": "3:41:03", "remaining_time": "0:43:28", "throughput": 8689.55, "total_tokens": 115252072} +{"current_steps": 171030, "total_steps": 204665, "loss": 0.0, "lr": 1.6009027779442519e-07, "epoch": 4.178291354164122, "percentage": 83.57, "elapsed_time": "3:41:03", "remaining_time": "0:43:28", "throughput": 8689.59, "total_tokens": 115255592} +{"current_steps": 171035, "total_steps": 204665, "loss": 0.0, "lr": 1.600439984664681e-07, "epoch": 4.178413504995969, "percentage": 83.57, "elapsed_time": "3:41:03", "remaining_time": "0:43:28", "throughput": 8689.63, "total_tokens": 115259112} +{"current_steps": 171040, "total_steps": 204665, "loss": 0.0, "lr": 1.599977252468968e-07, "epoch": 4.178535655827816, "percentage": 83.57, "elapsed_time": "3:41:04", "remaining_time": "0:43:27", "throughput": 8689.62, "total_tokens": 115261864} +{"current_steps": 171045, "total_steps": 204665, "loss": 0.0, "lr": 1.5995145813604815e-07, "epoch": 4.178657806659663, "percentage": 83.57, "elapsed_time": "3:41:04", "remaining_time": "0:43:27", "throughput": 8689.63, "total_tokens": 115265000} +{"current_steps": 171050, "total_steps": 204665, "loss": 0.0, "lr": 1.5990519713425832e-07, "epoch": 4.1787799574915105, "percentage": 83.58, "elapsed_time": "3:41:05", "remaining_time": "0:43:26", "throughput": 8689.67, "total_tokens": 115268520} +{"current_steps": 171055, "total_steps": 204665, "loss": 0.0, "lr": 1.5985894224186401e-07, "epoch": 4.178902108323357, "percentage": 83.58, "elapsed_time": "3:41:05", "remaining_time": "0:43:26", "throughput": 8689.68, "total_tokens": 115271656} +{"current_steps": 171060, "total_steps": 204665, "loss": 0.0, "lr": 1.5981269345920123e-07, "epoch": 4.179024259155205, "percentage": 83.58, "elapsed_time": "3:41:05", "remaining_time": "0:43:26", "throughput": 8689.69, "total_tokens": 115274728} +{"current_steps": 171065, "total_steps": 204665, "loss": 0.0, "lr": 1.5976645078660643e-07, "epoch": 4.179146409987052, "percentage": 83.58, "elapsed_time": "3:41:06", "remaining_time": "0:43:25", "throughput": 8689.73, "total_tokens": 115278440} +{"current_steps": 171070, "total_steps": 204665, "loss": 0.0, "lr": 1.597202142244164e-07, "epoch": 4.179268560818899, "percentage": 83.59, "elapsed_time": "3:41:06", "remaining_time": "0:43:25", "throughput": 8689.76, "total_tokens": 115281832} +{"current_steps": 171075, "total_steps": 204665, "loss": 0.0, "lr": 1.5967398377296658e-07, "epoch": 4.179390711650746, "percentage": 83.59, "elapsed_time": "3:41:06", "remaining_time": "0:43:24", "throughput": 8689.78, "total_tokens": 115285160} +{"current_steps": 171080, "total_steps": 204665, "loss": 0.0002, "lr": 1.59627759432594e-07, "epoch": 4.179512862482594, "percentage": 83.59, "elapsed_time": "3:41:07", "remaining_time": "0:43:24", "throughput": 8689.79, "total_tokens": 115288232} +{"current_steps": 171085, "total_steps": 204665, "loss": 0.0, "lr": 1.5958154120363398e-07, "epoch": 4.17963501331444, "percentage": 83.59, "elapsed_time": "3:41:07", "remaining_time": "0:43:24", "throughput": 8689.8, "total_tokens": 115291304} +{"current_steps": 171090, "total_steps": 204665, "loss": 0.058, "lr": 1.595353290864233e-07, "epoch": 4.179757164146288, "percentage": 83.6, "elapsed_time": "3:41:07", "remaining_time": "0:43:23", "throughput": 8689.8, "total_tokens": 115294184} +{"current_steps": 171095, "total_steps": 204665, "loss": 0.0, "lr": 1.594891230812976e-07, "epoch": 4.179879314978135, "percentage": 83.6, "elapsed_time": "3:41:08", "remaining_time": "0:43:23", "throughput": 8689.82, "total_tokens": 115297512} +{"current_steps": 171100, "total_steps": 204665, "loss": 0.0, "lr": 1.59442923188593e-07, "epoch": 4.180001465809982, "percentage": 83.6, "elapsed_time": "3:41:08", "remaining_time": "0:43:22", "throughput": 8689.86, "total_tokens": 115301032} +{"current_steps": 171105, "total_steps": 204665, "loss": 0.0, "lr": 1.5939672940864578e-07, "epoch": 4.180123616641829, "percentage": 83.6, "elapsed_time": "3:41:08", "remaining_time": "0:43:22", "throughput": 8689.86, "total_tokens": 115303912} +{"current_steps": 171110, "total_steps": 204665, "loss": 0.0, "lr": 1.5935054174179142e-07, "epoch": 4.180245767473677, "percentage": 83.6, "elapsed_time": "3:41:09", "remaining_time": "0:43:22", "throughput": 8689.86, "total_tokens": 115306856} +{"current_steps": 171115, "total_steps": 204665, "loss": 0.0, "lr": 1.5930436018836635e-07, "epoch": 4.180367918305524, "percentage": 83.61, "elapsed_time": "3:41:09", "remaining_time": "0:43:21", "throughput": 8689.88, "total_tokens": 115310184} +{"current_steps": 171120, "total_steps": 204665, "loss": 0.0693, "lr": 1.5925818474870578e-07, "epoch": 4.180490069137371, "percentage": 83.61, "elapsed_time": "3:41:09", "remaining_time": "0:43:21", "throughput": 8689.88, "total_tokens": 115313064} +{"current_steps": 171125, "total_steps": 204665, "loss": 0.0, "lr": 1.59212015423146e-07, "epoch": 4.180612219969218, "percentage": 83.61, "elapsed_time": "3:41:10", "remaining_time": "0:43:20", "throughput": 8689.91, "total_tokens": 115316584} +{"current_steps": 171130, "total_steps": 204665, "loss": 0.0, "lr": 1.5916585221202238e-07, "epoch": 4.180734370801066, "percentage": 83.61, "elapsed_time": "3:41:10", "remaining_time": "0:43:20", "throughput": 8689.94, "total_tokens": 115319912} +{"current_steps": 171135, "total_steps": 204665, "loss": 0.0, "lr": 1.5911969511567113e-07, "epoch": 4.180856521632912, "percentage": 83.62, "elapsed_time": "3:41:10", "remaining_time": "0:43:20", "throughput": 8689.95, "total_tokens": 115323112} +{"current_steps": 171140, "total_steps": 204665, "loss": 0.0, "lr": 1.5907354413442765e-07, "epoch": 4.180978672464759, "percentage": 83.62, "elapsed_time": "3:41:11", "remaining_time": "0:43:19", "throughput": 8689.95, "total_tokens": 115325992} +{"current_steps": 171145, "total_steps": 204665, "loss": 0.0, "lr": 1.590273992686273e-07, "epoch": 4.181100823296607, "percentage": 83.62, "elapsed_time": "3:41:11", "remaining_time": "0:43:19", "throughput": 8689.99, "total_tokens": 115329576} +{"current_steps": 171150, "total_steps": 204665, "loss": 0.0, "lr": 1.5898126051860606e-07, "epoch": 4.1812229741284535, "percentage": 83.62, "elapsed_time": "3:41:11", "remaining_time": "0:43:18", "throughput": 8690.02, "total_tokens": 115332968} +{"current_steps": 171155, "total_steps": 204665, "loss": 0.0, "lr": 1.589351278846991e-07, "epoch": 4.181345124960301, "percentage": 83.63, "elapsed_time": "3:41:12", "remaining_time": "0:43:18", "throughput": 8690.03, "total_tokens": 115336168} +{"current_steps": 171160, "total_steps": 204665, "loss": 0.0, "lr": 1.5888900136724203e-07, "epoch": 4.181467275792148, "percentage": 83.63, "elapsed_time": "3:41:12", "remaining_time": "0:43:18", "throughput": 8690.06, "total_tokens": 115339624} +{"current_steps": 171165, "total_steps": 204665, "loss": 0.0, "lr": 1.5884288096657071e-07, "epoch": 4.1815894266239955, "percentage": 83.63, "elapsed_time": "3:41:12", "remaining_time": "0:43:17", "throughput": 8690.1, "total_tokens": 115343208} +{"current_steps": 171170, "total_steps": 204665, "loss": 0.0, "lr": 1.5879676668302e-07, "epoch": 4.181711577455842, "percentage": 83.63, "elapsed_time": "3:41:13", "remaining_time": "0:43:17", "throughput": 8690.11, "total_tokens": 115346344} +{"current_steps": 171175, "total_steps": 204665, "loss": 0.0, "lr": 1.587506585169256e-07, "epoch": 4.18183372828769, "percentage": 83.64, "elapsed_time": "3:41:13", "remaining_time": "0:43:16", "throughput": 8690.13, "total_tokens": 115349608} +{"current_steps": 171180, "total_steps": 204665, "loss": 0.0, "lr": 1.5870455646862246e-07, "epoch": 4.181955879119537, "percentage": 83.64, "elapsed_time": "3:41:13", "remaining_time": "0:43:16", "throughput": 8690.17, "total_tokens": 115353192} +{"current_steps": 171185, "total_steps": 204665, "loss": 0.0, "lr": 1.5865846053844634e-07, "epoch": 4.182078029951384, "percentage": 83.64, "elapsed_time": "3:41:14", "remaining_time": "0:43:16", "throughput": 8690.18, "total_tokens": 115356328} +{"current_steps": 171190, "total_steps": 204665, "loss": 0.0, "lr": 1.5861237072673194e-07, "epoch": 4.182200180783231, "percentage": 83.64, "elapsed_time": "3:41:14", "remaining_time": "0:43:15", "throughput": 8690.21, "total_tokens": 115359720} +{"current_steps": 171195, "total_steps": 204665, "loss": 0.0, "lr": 1.585662870338147e-07, "epoch": 4.182322331615079, "percentage": 83.65, "elapsed_time": "3:41:15", "remaining_time": "0:43:15", "throughput": 8690.24, "total_tokens": 115363048} +{"current_steps": 171200, "total_steps": 204665, "loss": 0.0, "lr": 1.5852020946002998e-07, "epoch": 4.182444482446925, "percentage": 83.65, "elapsed_time": "3:41:15", "remaining_time": "0:43:14", "throughput": 8690.27, "total_tokens": 115366568} +{"current_steps": 171205, "total_steps": 204665, "loss": 0.0, "lr": 1.584741380057123e-07, "epoch": 4.182566633278773, "percentage": 83.65, "elapsed_time": "3:41:15", "remaining_time": "0:43:14", "throughput": 8690.29, "total_tokens": 115369832} +{"current_steps": 171210, "total_steps": 204665, "loss": 0.0, "lr": 1.584280726711974e-07, "epoch": 4.18268878411062, "percentage": 83.65, "elapsed_time": "3:41:16", "remaining_time": "0:43:14", "throughput": 8690.31, "total_tokens": 115373096} +{"current_steps": 171215, "total_steps": 204665, "loss": 0.0, "lr": 1.5838201345681957e-07, "epoch": 4.182810934942467, "percentage": 83.66, "elapsed_time": "3:41:16", "remaining_time": "0:43:13", "throughput": 8690.31, "total_tokens": 115376040} +{"current_steps": 171220, "total_steps": 204665, "loss": 0.0, "lr": 1.5833596036291408e-07, "epoch": 4.182933085774314, "percentage": 83.66, "elapsed_time": "3:41:16", "remaining_time": "0:43:13", "throughput": 8690.38, "total_tokens": 115380072} +{"current_steps": 171225, "total_steps": 204665, "loss": 0.0, "lr": 1.5828991338981623e-07, "epoch": 4.183055236606162, "percentage": 83.66, "elapsed_time": "3:41:17", "remaining_time": "0:43:13", "throughput": 8690.4, "total_tokens": 115383464} +{"current_steps": 171230, "total_steps": 204665, "loss": 0.0, "lr": 1.5824387253786043e-07, "epoch": 4.1831773874380085, "percentage": 83.66, "elapsed_time": "3:41:17", "remaining_time": "0:43:12", "throughput": 8690.43, "total_tokens": 115386920} +{"current_steps": 171235, "total_steps": 204665, "loss": 0.001, "lr": 1.581978378073814e-07, "epoch": 4.183299538269855, "percentage": 83.67, "elapsed_time": "3:41:17", "remaining_time": "0:43:12", "throughput": 8690.48, "total_tokens": 115390632} +{"current_steps": 171240, "total_steps": 204665, "loss": 0.0, "lr": 1.581518091987144e-07, "epoch": 4.183421689101703, "percentage": 83.67, "elapsed_time": "3:41:18", "remaining_time": "0:43:11", "throughput": 8690.48, "total_tokens": 115393512} +{"current_steps": 171245, "total_steps": 204665, "loss": 0.0, "lr": 1.5810578671219355e-07, "epoch": 4.18354383993355, "percentage": 83.67, "elapsed_time": "3:41:18", "remaining_time": "0:43:11", "throughput": 8690.5, "total_tokens": 115396776} +{"current_steps": 171250, "total_steps": 204665, "loss": 0.0, "lr": 1.5805977034815409e-07, "epoch": 4.183665990765397, "percentage": 83.67, "elapsed_time": "3:41:18", "remaining_time": "0:43:11", "throughput": 8690.51, "total_tokens": 115399912} +{"current_steps": 171255, "total_steps": 204665, "loss": 0.0, "lr": 1.5801376010693024e-07, "epoch": 4.183788141597244, "percentage": 83.68, "elapsed_time": "3:41:19", "remaining_time": "0:43:10", "throughput": 8690.52, "total_tokens": 115403048} +{"current_steps": 171260, "total_steps": 204665, "loss": 0.0, "lr": 1.5796775598885703e-07, "epoch": 4.183910292429092, "percentage": 83.68, "elapsed_time": "3:41:19", "remaining_time": "0:43:10", "throughput": 8690.54, "total_tokens": 115406248} +{"current_steps": 171265, "total_steps": 204665, "loss": 0.0, "lr": 1.5792175799426855e-07, "epoch": 4.184032443260938, "percentage": 83.68, "elapsed_time": "3:41:19", "remaining_time": "0:43:09", "throughput": 8690.56, "total_tokens": 115409512} +{"current_steps": 171270, "total_steps": 204665, "loss": 0.0, "lr": 1.5787576612349973e-07, "epoch": 4.184154594092786, "percentage": 83.68, "elapsed_time": "3:41:20", "remaining_time": "0:43:09", "throughput": 8690.58, "total_tokens": 115412840} +{"current_steps": 171275, "total_steps": 204665, "loss": 0.0, "lr": 1.5782978037688466e-07, "epoch": 4.184276744924633, "percentage": 83.69, "elapsed_time": "3:41:20", "remaining_time": "0:43:09", "throughput": 8690.59, "total_tokens": 115415976} +{"current_steps": 171280, "total_steps": 204665, "loss": 0.0, "lr": 1.5778380075475818e-07, "epoch": 4.18439889575648, "percentage": 83.69, "elapsed_time": "3:41:20", "remaining_time": "0:43:08", "throughput": 8690.62, "total_tokens": 115419432} +{"current_steps": 171285, "total_steps": 204665, "loss": 0.0, "lr": 1.5773782725745412e-07, "epoch": 4.184521046588327, "percentage": 83.69, "elapsed_time": "3:41:21", "remaining_time": "0:43:08", "throughput": 8690.66, "total_tokens": 115422952} +{"current_steps": 171290, "total_steps": 204665, "loss": 0.0, "lr": 1.576918598853072e-07, "epoch": 4.184643197420175, "percentage": 83.69, "elapsed_time": "3:41:21", "remaining_time": "0:43:07", "throughput": 8690.68, "total_tokens": 115426216} +{"current_steps": 171295, "total_steps": 204665, "loss": 0.0, "lr": 1.5764589863865187e-07, "epoch": 4.1847653482520215, "percentage": 83.7, "elapsed_time": "3:41:21", "remaining_time": "0:43:07", "throughput": 8690.72, "total_tokens": 115429864} +{"current_steps": 171300, "total_steps": 204665, "loss": 0.0002, "lr": 1.575999435178218e-07, "epoch": 4.184887499083869, "percentage": 83.7, "elapsed_time": "3:41:22", "remaining_time": "0:43:07", "throughput": 8690.77, "total_tokens": 115433640} +{"current_steps": 171305, "total_steps": 204665, "loss": 0.0, "lr": 1.5755399452315176e-07, "epoch": 4.185009649915716, "percentage": 83.7, "elapsed_time": "3:41:22", "remaining_time": "0:43:06", "throughput": 8690.79, "total_tokens": 115436840} +{"current_steps": 171310, "total_steps": 204665, "loss": 0.0, "lr": 1.575080516549755e-07, "epoch": 4.185131800747563, "percentage": 83.7, "elapsed_time": "3:41:23", "remaining_time": "0:43:06", "throughput": 8690.82, "total_tokens": 115440360} +{"current_steps": 171315, "total_steps": 204665, "loss": 0.0, "lr": 1.5746211491362726e-07, "epoch": 4.18525395157941, "percentage": 83.71, "elapsed_time": "3:41:23", "remaining_time": "0:43:05", "throughput": 8690.86, "total_tokens": 115443880} +{"current_steps": 171320, "total_steps": 204665, "loss": 0.0, "lr": 1.5741618429944136e-07, "epoch": 4.185376102411257, "percentage": 83.71, "elapsed_time": "3:41:23", "remaining_time": "0:43:05", "throughput": 8690.88, "total_tokens": 115447144} +{"current_steps": 171325, "total_steps": 204665, "loss": 0.0, "lr": 1.5737025981275143e-07, "epoch": 4.185498253243105, "percentage": 83.71, "elapsed_time": "3:41:24", "remaining_time": "0:43:05", "throughput": 8690.91, "total_tokens": 115450600} +{"current_steps": 171330, "total_steps": 204665, "loss": 0.0, "lr": 1.5732434145389185e-07, "epoch": 4.185620404074951, "percentage": 83.71, "elapsed_time": "3:41:24", "remaining_time": "0:43:04", "throughput": 8690.93, "total_tokens": 115453864} +{"current_steps": 171335, "total_steps": 204665, "loss": 0.0, "lr": 1.57278429223196e-07, "epoch": 4.185742554906799, "percentage": 83.71, "elapsed_time": "3:41:24", "remaining_time": "0:43:04", "throughput": 8690.94, "total_tokens": 115457064} +{"current_steps": 171340, "total_steps": 204665, "loss": 0.0, "lr": 1.5723252312099832e-07, "epoch": 4.185864705738646, "percentage": 83.72, "elapsed_time": "3:41:25", "remaining_time": "0:43:03", "throughput": 8690.96, "total_tokens": 115460264} +{"current_steps": 171345, "total_steps": 204665, "loss": 0.0, "lr": 1.5718662314763242e-07, "epoch": 4.185986856570493, "percentage": 83.72, "elapsed_time": "3:41:25", "remaining_time": "0:43:03", "throughput": 8690.98, "total_tokens": 115463528} +{"current_steps": 171350, "total_steps": 204665, "loss": 0.0, "lr": 1.571407293034319e-07, "epoch": 4.18610900740234, "percentage": 83.72, "elapsed_time": "3:41:25", "remaining_time": "0:43:03", "throughput": 8690.99, "total_tokens": 115466600} +{"current_steps": 171355, "total_steps": 204665, "loss": 0.0, "lr": 1.5709484158873088e-07, "epoch": 4.186231158234188, "percentage": 83.72, "elapsed_time": "3:41:26", "remaining_time": "0:43:02", "throughput": 8691.03, "total_tokens": 115470184} +{"current_steps": 171360, "total_steps": 204665, "loss": 0.0, "lr": 1.570489600038627e-07, "epoch": 4.1863533090660345, "percentage": 83.73, "elapsed_time": "3:41:26", "remaining_time": "0:43:02", "throughput": 8691.07, "total_tokens": 115473832} +{"current_steps": 171365, "total_steps": 204665, "loss": 0.0, "lr": 1.5700308454916132e-07, "epoch": 4.186475459897882, "percentage": 83.73, "elapsed_time": "3:41:26", "remaining_time": "0:43:01", "throughput": 8691.09, "total_tokens": 115477096} +{"current_steps": 171370, "total_steps": 204665, "loss": 0.0, "lr": 1.5695721522496007e-07, "epoch": 4.186597610729729, "percentage": 83.73, "elapsed_time": "3:41:27", "remaining_time": "0:43:01", "throughput": 8691.14, "total_tokens": 115480808} +{"current_steps": 171375, "total_steps": 204665, "loss": 0.0, "lr": 1.5691135203159277e-07, "epoch": 4.1867197615615765, "percentage": 83.73, "elapsed_time": "3:41:27", "remaining_time": "0:43:01", "throughput": 8691.18, "total_tokens": 115484392} +{"current_steps": 171380, "total_steps": 204665, "loss": 0.0, "lr": 1.5686549496939306e-07, "epoch": 4.186841912393423, "percentage": 83.74, "elapsed_time": "3:41:27", "remaining_time": "0:43:00", "throughput": 8691.21, "total_tokens": 115487912} +{"current_steps": 171385, "total_steps": 204665, "loss": 0.0, "lr": 1.5681964403869408e-07, "epoch": 4.186964063225271, "percentage": 83.74, "elapsed_time": "3:41:28", "remaining_time": "0:43:00", "throughput": 8691.22, "total_tokens": 115490984} +{"current_steps": 171390, "total_steps": 204665, "loss": 0.0162, "lr": 1.5677379923982958e-07, "epoch": 4.187086214057118, "percentage": 83.74, "elapsed_time": "3:41:28", "remaining_time": "0:42:59", "throughput": 8691.25, "total_tokens": 115494376} +{"current_steps": 171395, "total_steps": 204665, "loss": 0.0, "lr": 1.5672796057313265e-07, "epoch": 4.187208364888965, "percentage": 83.74, "elapsed_time": "3:41:28", "remaining_time": "0:42:59", "throughput": 8691.26, "total_tokens": 115497576} +{"current_steps": 171400, "total_steps": 204665, "loss": 0.0, "lr": 1.5668212803893698e-07, "epoch": 4.187330515720812, "percentage": 83.75, "elapsed_time": "3:41:29", "remaining_time": "0:42:59", "throughput": 8691.28, "total_tokens": 115500776} +{"current_steps": 171405, "total_steps": 204665, "loss": 0.0, "lr": 1.5663630163757558e-07, "epoch": 4.187452666552659, "percentage": 83.75, "elapsed_time": "3:41:29", "remaining_time": "0:42:58", "throughput": 8691.33, "total_tokens": 115504552} +{"current_steps": 171410, "total_steps": 204665, "loss": 0.0, "lr": 1.565904813693817e-07, "epoch": 4.187574817384506, "percentage": 83.75, "elapsed_time": "3:41:29", "remaining_time": "0:42:58", "throughput": 8691.37, "total_tokens": 115508136} +{"current_steps": 171415, "total_steps": 204665, "loss": 0.0, "lr": 1.5654466723468897e-07, "epoch": 4.187696968216353, "percentage": 83.75, "elapsed_time": "3:41:30", "remaining_time": "0:42:57", "throughput": 8691.39, "total_tokens": 115511400} +{"current_steps": 171420, "total_steps": 204665, "loss": 0.0, "lr": 1.5649885923383e-07, "epoch": 4.187819119048201, "percentage": 83.76, "elapsed_time": "3:41:30", "remaining_time": "0:42:57", "throughput": 8691.41, "total_tokens": 115514600} +{"current_steps": 171425, "total_steps": 204665, "loss": 0.0, "lr": 1.5645305736713854e-07, "epoch": 4.187941269880048, "percentage": 83.76, "elapsed_time": "3:41:31", "remaining_time": "0:42:57", "throughput": 8691.43, "total_tokens": 115517864} +{"current_steps": 171430, "total_steps": 204665, "loss": 0.0, "lr": 1.56407261634947e-07, "epoch": 4.188063420711895, "percentage": 83.76, "elapsed_time": "3:41:31", "remaining_time": "0:42:56", "throughput": 8691.45, "total_tokens": 115521128} +{"current_steps": 171435, "total_steps": 204665, "loss": 0.0, "lr": 1.563614720375891e-07, "epoch": 4.188185571543742, "percentage": 83.76, "elapsed_time": "3:41:31", "remaining_time": "0:42:56", "throughput": 8691.51, "total_tokens": 115525160} +{"current_steps": 171440, "total_steps": 204665, "loss": 0.0, "lr": 1.5631568857539712e-07, "epoch": 4.18830772237559, "percentage": 83.77, "elapsed_time": "3:41:32", "remaining_time": "0:42:55", "throughput": 8691.54, "total_tokens": 115528488} +{"current_steps": 171445, "total_steps": 204665, "loss": 0.0, "lr": 1.562699112487047e-07, "epoch": 4.188429873207436, "percentage": 83.77, "elapsed_time": "3:41:32", "remaining_time": "0:42:55", "throughput": 8691.55, "total_tokens": 115531688} +{"current_steps": 171450, "total_steps": 204665, "loss": 0.0, "lr": 1.5622414005784434e-07, "epoch": 4.188552024039284, "percentage": 83.77, "elapsed_time": "3:41:32", "remaining_time": "0:42:55", "throughput": 8691.58, "total_tokens": 115535016} +{"current_steps": 171455, "total_steps": 204665, "loss": 0.0, "lr": 1.5617837500314879e-07, "epoch": 4.188674174871131, "percentage": 83.77, "elapsed_time": "3:41:33", "remaining_time": "0:42:54", "throughput": 8691.6, "total_tokens": 115538280} +{"current_steps": 171460, "total_steps": 204665, "loss": 0.0, "lr": 1.561326160849513e-07, "epoch": 4.188796325702978, "percentage": 83.78, "elapsed_time": "3:41:33", "remaining_time": "0:42:54", "throughput": 8691.61, "total_tokens": 115541352} +{"current_steps": 171465, "total_steps": 204665, "loss": 0.0, "lr": 1.5608686330358422e-07, "epoch": 4.188918476534825, "percentage": 83.78, "elapsed_time": "3:41:33", "remaining_time": "0:42:54", "throughput": 8691.63, "total_tokens": 115544744} +{"current_steps": 171470, "total_steps": 204665, "loss": 0.0, "lr": 1.5604111665938035e-07, "epoch": 4.189040627366673, "percentage": 83.78, "elapsed_time": "3:41:34", "remaining_time": "0:42:53", "throughput": 8691.65, "total_tokens": 115547944} +{"current_steps": 171475, "total_steps": 204665, "loss": 0.0, "lr": 1.5599537615267277e-07, "epoch": 4.1891627781985195, "percentage": 83.78, "elapsed_time": "3:41:34", "remaining_time": "0:42:53", "throughput": 8691.66, "total_tokens": 115551016} +{"current_steps": 171480, "total_steps": 204665, "loss": 0.0, "lr": 1.5594964178379366e-07, "epoch": 4.189284929030367, "percentage": 83.79, "elapsed_time": "3:41:34", "remaining_time": "0:42:52", "throughput": 8691.69, "total_tokens": 115554472} +{"current_steps": 171485, "total_steps": 204665, "loss": 0.0, "lr": 1.5590391355307587e-07, "epoch": 4.189407079862214, "percentage": 83.79, "elapsed_time": "3:41:35", "remaining_time": "0:42:52", "throughput": 8691.7, "total_tokens": 115557480} +{"current_steps": 171490, "total_steps": 204665, "loss": 0.0, "lr": 1.5585819146085178e-07, "epoch": 4.1895292306940615, "percentage": 83.79, "elapsed_time": "3:41:35", "remaining_time": "0:42:52", "throughput": 8691.7, "total_tokens": 115560424} +{"current_steps": 171495, "total_steps": 204665, "loss": 0.0, "lr": 1.5581247550745402e-07, "epoch": 4.189651381525908, "percentage": 83.79, "elapsed_time": "3:41:35", "remaining_time": "0:42:51", "throughput": 8691.74, "total_tokens": 115564008} +{"current_steps": 171500, "total_steps": 204665, "loss": 0.0, "lr": 1.557667656932149e-07, "epoch": 4.189773532357755, "percentage": 83.8, "elapsed_time": "3:41:36", "remaining_time": "0:42:51", "throughput": 8691.76, "total_tokens": 115567400} +{"current_steps": 171505, "total_steps": 204665, "loss": 0.0, "lr": 1.5572106201846691e-07, "epoch": 4.189895683189603, "percentage": 83.8, "elapsed_time": "3:41:36", "remaining_time": "0:42:50", "throughput": 8691.79, "total_tokens": 115570792} +{"current_steps": 171510, "total_steps": 204665, "loss": 0.0, "lr": 1.5567536448354257e-07, "epoch": 4.190017834021449, "percentage": 83.8, "elapsed_time": "3:41:36", "remaining_time": "0:42:50", "throughput": 8691.8, "total_tokens": 115573864} +{"current_steps": 171515, "total_steps": 204665, "loss": 0.0, "lr": 1.5562967308877395e-07, "epoch": 4.190139984853297, "percentage": 83.8, "elapsed_time": "3:41:37", "remaining_time": "0:42:50", "throughput": 8691.83, "total_tokens": 115577256} +{"current_steps": 171520, "total_steps": 204665, "loss": 0.0501, "lr": 1.5558398783449366e-07, "epoch": 4.190262135685144, "percentage": 83.81, "elapsed_time": "3:41:37", "remaining_time": "0:42:49", "throughput": 8691.86, "total_tokens": 115580776} +{"current_steps": 171525, "total_steps": 204665, "loss": 0.0001, "lr": 1.5553830872103347e-07, "epoch": 4.190384286516991, "percentage": 83.81, "elapsed_time": "3:41:37", "remaining_time": "0:42:49", "throughput": 8691.93, "total_tokens": 115584872} +{"current_steps": 171530, "total_steps": 204665, "loss": 0.0444, "lr": 1.5549263574872585e-07, "epoch": 4.190506437348838, "percentage": 83.81, "elapsed_time": "3:41:38", "remaining_time": "0:42:48", "throughput": 8691.94, "total_tokens": 115587880} +{"current_steps": 171535, "total_steps": 204665, "loss": 0.0, "lr": 1.554469689179032e-07, "epoch": 4.190628588180686, "percentage": 83.81, "elapsed_time": "3:41:38", "remaining_time": "0:42:48", "throughput": 8691.95, "total_tokens": 115590952} +{"current_steps": 171540, "total_steps": 204665, "loss": 0.0, "lr": 1.5540130822889708e-07, "epoch": 4.1907507390125325, "percentage": 83.82, "elapsed_time": "3:41:38", "remaining_time": "0:42:48", "throughput": 8691.98, "total_tokens": 115594536} +{"current_steps": 171545, "total_steps": 204665, "loss": 0.0, "lr": 1.5535565368204008e-07, "epoch": 4.19087288984438, "percentage": 83.82, "elapsed_time": "3:41:39", "remaining_time": "0:42:47", "throughput": 8692.02, "total_tokens": 115598056} +{"current_steps": 171550, "total_steps": 204665, "loss": 0.0, "lr": 1.553100052776639e-07, "epoch": 4.190995040676227, "percentage": 83.82, "elapsed_time": "3:41:39", "remaining_time": "0:42:47", "throughput": 8692.04, "total_tokens": 115601384} +{"current_steps": 171555, "total_steps": 204665, "loss": 0.0, "lr": 1.5526436301610035e-07, "epoch": 4.1911171915080745, "percentage": 83.82, "elapsed_time": "3:41:40", "remaining_time": "0:42:46", "throughput": 8692.05, "total_tokens": 115604392} +{"current_steps": 171560, "total_steps": 204665, "loss": 0.0, "lr": 1.5521872689768178e-07, "epoch": 4.191239342339921, "percentage": 83.82, "elapsed_time": "3:41:40", "remaining_time": "0:42:46", "throughput": 8692.06, "total_tokens": 115607592} +{"current_steps": 171565, "total_steps": 204665, "loss": 0.0, "lr": 1.551730969227396e-07, "epoch": 4.191361493171769, "percentage": 83.83, "elapsed_time": "3:41:40", "remaining_time": "0:42:46", "throughput": 8692.09, "total_tokens": 115611048} +{"current_steps": 171570, "total_steps": 204665, "loss": 0.0, "lr": 1.5512747309160622e-07, "epoch": 4.191483644003616, "percentage": 83.83, "elapsed_time": "3:41:41", "remaining_time": "0:42:45", "throughput": 8692.13, "total_tokens": 115614568} +{"current_steps": 171575, "total_steps": 204665, "loss": 0.0, "lr": 1.5508185540461283e-07, "epoch": 4.191605794835463, "percentage": 83.83, "elapsed_time": "3:41:41", "remaining_time": "0:42:45", "throughput": 8692.16, "total_tokens": 115618088} +{"current_steps": 171580, "total_steps": 204665, "loss": 0.0, "lr": 1.5503624386209157e-07, "epoch": 4.19172794566731, "percentage": 83.83, "elapsed_time": "3:41:41", "remaining_time": "0:42:44", "throughput": 8692.19, "total_tokens": 115621544} +{"current_steps": 171585, "total_steps": 204665, "loss": 0.0, "lr": 1.5499063846437387e-07, "epoch": 4.191850096499157, "percentage": 83.84, "elapsed_time": "3:41:42", "remaining_time": "0:42:44", "throughput": 8692.24, "total_tokens": 115625192} +{"current_steps": 171590, "total_steps": 204665, "loss": 0.0, "lr": 1.549450392117917e-07, "epoch": 4.191972247331004, "percentage": 83.84, "elapsed_time": "3:41:42", "remaining_time": "0:42:44", "throughput": 8692.29, "total_tokens": 115628904} +{"current_steps": 171595, "total_steps": 204665, "loss": 0.0, "lr": 1.5489944610467632e-07, "epoch": 4.192094398162851, "percentage": 83.84, "elapsed_time": "3:41:42", "remaining_time": "0:42:43", "throughput": 8692.31, "total_tokens": 115632232} +{"current_steps": 171600, "total_steps": 204665, "loss": 0.0, "lr": 1.5485385914335946e-07, "epoch": 4.192216548994699, "percentage": 83.84, "elapsed_time": "3:41:43", "remaining_time": "0:42:43", "throughput": 8692.34, "total_tokens": 115635624} +{"current_steps": 171605, "total_steps": 204665, "loss": 0.0, "lr": 1.548082783281729e-07, "epoch": 4.1923386998265455, "percentage": 83.85, "elapsed_time": "3:41:43", "remaining_time": "0:42:42", "throughput": 8692.35, "total_tokens": 115638760} +{"current_steps": 171610, "total_steps": 204665, "loss": 0.0, "lr": 1.5476270365944766e-07, "epoch": 4.192460850658393, "percentage": 83.85, "elapsed_time": "3:41:43", "remaining_time": "0:42:42", "throughput": 8692.34, "total_tokens": 115641512} +{"current_steps": 171615, "total_steps": 204665, "loss": 0.0, "lr": 1.547171351375155e-07, "epoch": 4.19258300149024, "percentage": 83.85, "elapsed_time": "3:41:44", "remaining_time": "0:42:42", "throughput": 8692.38, "total_tokens": 115645096} +{"current_steps": 171620, "total_steps": 204665, "loss": 0.0, "lr": 1.546715727627076e-07, "epoch": 4.1927051523220875, "percentage": 83.85, "elapsed_time": "3:41:44", "remaining_time": "0:42:41", "throughput": 8692.43, "total_tokens": 115648872} +{"current_steps": 171625, "total_steps": 204665, "loss": 0.0, "lr": 1.5462601653535524e-07, "epoch": 4.192827303153934, "percentage": 83.86, "elapsed_time": "3:41:44", "remaining_time": "0:42:41", "throughput": 8692.46, "total_tokens": 115652328} +{"current_steps": 171630, "total_steps": 204665, "loss": 0.0, "lr": 1.5458046645579014e-07, "epoch": 4.192949453985782, "percentage": 83.86, "elapsed_time": "3:41:45", "remaining_time": "0:42:40", "throughput": 8692.47, "total_tokens": 115655464} +{"current_steps": 171635, "total_steps": 204665, "loss": 0.0, "lr": 1.5453492252434308e-07, "epoch": 4.193071604817629, "percentage": 83.86, "elapsed_time": "3:41:45", "remaining_time": "0:42:40", "throughput": 8692.53, "total_tokens": 115659304} +{"current_steps": 171640, "total_steps": 204665, "loss": 0.0, "lr": 1.5448938474134575e-07, "epoch": 4.193193755649476, "percentage": 83.86, "elapsed_time": "3:41:45", "remaining_time": "0:42:40", "throughput": 8692.57, "total_tokens": 115663016} +{"current_steps": 171645, "total_steps": 204665, "loss": 0.0, "lr": 1.544438531071287e-07, "epoch": 4.193315906481323, "percentage": 83.87, "elapsed_time": "3:41:46", "remaining_time": "0:42:39", "throughput": 8692.62, "total_tokens": 115666664} +{"current_steps": 171650, "total_steps": 204665, "loss": 0.0607, "lr": 1.5439832762202375e-07, "epoch": 4.193438057313171, "percentage": 83.87, "elapsed_time": "3:41:46", "remaining_time": "0:42:39", "throughput": 8692.67, "total_tokens": 115670440} +{"current_steps": 171655, "total_steps": 204665, "loss": 0.0, "lr": 1.5435280828636143e-07, "epoch": 4.193560208145017, "percentage": 83.87, "elapsed_time": "3:41:47", "remaining_time": "0:42:38", "throughput": 8692.71, "total_tokens": 115674024} +{"current_steps": 171660, "total_steps": 204665, "loss": 0.0, "lr": 1.543072951004728e-07, "epoch": 4.193682358976865, "percentage": 83.87, "elapsed_time": "3:41:47", "remaining_time": "0:42:38", "throughput": 8692.71, "total_tokens": 115677032} +{"current_steps": 171665, "total_steps": 204665, "loss": 0.0, "lr": 1.5426178806468926e-07, "epoch": 4.193804509808712, "percentage": 83.88, "elapsed_time": "3:41:47", "remaining_time": "0:42:38", "throughput": 8692.72, "total_tokens": 115680104} +{"current_steps": 171670, "total_steps": 204665, "loss": 0.0, "lr": 1.5421628717934109e-07, "epoch": 4.1939266606405585, "percentage": 83.88, "elapsed_time": "3:41:48", "remaining_time": "0:42:37", "throughput": 8692.74, "total_tokens": 115683304} +{"current_steps": 171675, "total_steps": 204665, "loss": 0.0, "lr": 1.5417079244475995e-07, "epoch": 4.194048811472406, "percentage": 83.88, "elapsed_time": "3:41:48", "remaining_time": "0:42:37", "throughput": 8692.76, "total_tokens": 115686568} +{"current_steps": 171680, "total_steps": 204665, "loss": 0.0, "lr": 1.54125303861276e-07, "epoch": 4.194170962304253, "percentage": 83.88, "elapsed_time": "3:41:48", "remaining_time": "0:42:37", "throughput": 8692.79, "total_tokens": 115689960} +{"current_steps": 171685, "total_steps": 204665, "loss": 0.0, "lr": 1.540798214292204e-07, "epoch": 4.1942931131361005, "percentage": 83.89, "elapsed_time": "3:41:49", "remaining_time": "0:42:36", "throughput": 8692.8, "total_tokens": 115693096} +{"current_steps": 171690, "total_steps": 204665, "loss": 0.0, "lr": 1.54034345148924e-07, "epoch": 4.194415263967947, "percentage": 83.89, "elapsed_time": "3:41:49", "remaining_time": "0:42:36", "throughput": 8692.81, "total_tokens": 115696232} +{"current_steps": 171695, "total_steps": 204665, "loss": 0.0, "lr": 1.5398887502071722e-07, "epoch": 4.194537414799795, "percentage": 83.89, "elapsed_time": "3:41:49", "remaining_time": "0:42:35", "throughput": 8692.87, "total_tokens": 115700072} +{"current_steps": 171700, "total_steps": 204665, "loss": 0.0, "lr": 1.5394341104493113e-07, "epoch": 4.194659565631642, "percentage": 83.89, "elapsed_time": "3:41:50", "remaining_time": "0:42:35", "throughput": 8692.89, "total_tokens": 115703336} +{"current_steps": 171705, "total_steps": 204665, "loss": 0.0, "lr": 1.538979532218959e-07, "epoch": 4.194781716463489, "percentage": 83.9, "elapsed_time": "3:41:50", "remaining_time": "0:42:35", "throughput": 8692.92, "total_tokens": 115706792} +{"current_steps": 171710, "total_steps": 204665, "loss": 0.0477, "lr": 1.538525015519425e-07, "epoch": 4.194903867295336, "percentage": 83.9, "elapsed_time": "3:41:50", "remaining_time": "0:42:34", "throughput": 8692.95, "total_tokens": 115710248} +{"current_steps": 171715, "total_steps": 204665, "loss": 0.0, "lr": 1.5380705603540112e-07, "epoch": 4.195026018127184, "percentage": 83.9, "elapsed_time": "3:41:51", "remaining_time": "0:42:34", "throughput": 8692.98, "total_tokens": 115713704} +{"current_steps": 171720, "total_steps": 204665, "loss": 0.0, "lr": 1.5376161667260235e-07, "epoch": 4.19514816895903, "percentage": 83.9, "elapsed_time": "3:41:51", "remaining_time": "0:42:33", "throughput": 8693.03, "total_tokens": 115717480} +{"current_steps": 171725, "total_steps": 204665, "loss": 0.0, "lr": 1.5371618346387704e-07, "epoch": 4.195270319790878, "percentage": 83.91, "elapsed_time": "3:41:51", "remaining_time": "0:42:33", "throughput": 8693.08, "total_tokens": 115721256} +{"current_steps": 171730, "total_steps": 204665, "loss": 0.0, "lr": 1.5367075640955495e-07, "epoch": 4.195392470622725, "percentage": 83.91, "elapsed_time": "3:41:52", "remaining_time": "0:42:33", "throughput": 8693.09, "total_tokens": 115724264} +{"current_steps": 171735, "total_steps": 204665, "loss": 0.0, "lr": 1.5362533550996704e-07, "epoch": 4.195514621454572, "percentage": 83.91, "elapsed_time": "3:41:52", "remaining_time": "0:42:32", "throughput": 8693.12, "total_tokens": 115727656} +{"current_steps": 171740, "total_steps": 204665, "loss": 0.0, "lr": 1.5357992076544314e-07, "epoch": 4.195636772286419, "percentage": 83.91, "elapsed_time": "3:41:52", "remaining_time": "0:42:32", "throughput": 8693.14, "total_tokens": 115731048} +{"current_steps": 171745, "total_steps": 204665, "loss": 0.0, "lr": 1.5353451217631386e-07, "epoch": 4.195758923118267, "percentage": 83.92, "elapsed_time": "3:41:53", "remaining_time": "0:42:31", "throughput": 8693.19, "total_tokens": 115734696} +{"current_steps": 171750, "total_steps": 204665, "loss": 0.0, "lr": 1.5348910974290907e-07, "epoch": 4.195881073950114, "percentage": 83.92, "elapsed_time": "3:41:53", "remaining_time": "0:42:31", "throughput": 8693.22, "total_tokens": 115738216} +{"current_steps": 171755, "total_steps": 204665, "loss": 0.0002, "lr": 1.534437134655595e-07, "epoch": 4.196003224781961, "percentage": 83.92, "elapsed_time": "3:41:53", "remaining_time": "0:42:31", "throughput": 8693.28, "total_tokens": 115742184} +{"current_steps": 171760, "total_steps": 204665, "loss": 0.0, "lr": 1.533983233445948e-07, "epoch": 4.196125375613808, "percentage": 83.92, "elapsed_time": "3:41:54", "remaining_time": "0:42:30", "throughput": 8693.3, "total_tokens": 115745448} +{"current_steps": 171765, "total_steps": 204665, "loss": 0.0, "lr": 1.53352939380345e-07, "epoch": 4.196247526445655, "percentage": 83.92, "elapsed_time": "3:41:54", "remaining_time": "0:42:30", "throughput": 8693.33, "total_tokens": 115748776} +{"current_steps": 171770, "total_steps": 204665, "loss": 0.0, "lr": 1.5330756157314062e-07, "epoch": 4.196369677277502, "percentage": 83.93, "elapsed_time": "3:41:55", "remaining_time": "0:42:29", "throughput": 8693.34, "total_tokens": 115751976} +{"current_steps": 171775, "total_steps": 204665, "loss": 0.0001, "lr": 1.5326218992331119e-07, "epoch": 4.196491828109349, "percentage": 83.93, "elapsed_time": "3:41:55", "remaining_time": "0:42:29", "throughput": 8693.37, "total_tokens": 115755432} +{"current_steps": 171780, "total_steps": 204665, "loss": 0.0, "lr": 1.5321682443118677e-07, "epoch": 4.196613978941197, "percentage": 83.93, "elapsed_time": "3:41:55", "remaining_time": "0:42:29", "throughput": 8693.41, "total_tokens": 115758952} +{"current_steps": 171785, "total_steps": 204665, "loss": 0.0, "lr": 1.5317146509709767e-07, "epoch": 4.1967361297730434, "percentage": 83.93, "elapsed_time": "3:41:56", "remaining_time": "0:42:28", "throughput": 8693.41, "total_tokens": 115761960} +{"current_steps": 171790, "total_steps": 204665, "loss": 0.0, "lr": 1.5312611192137313e-07, "epoch": 4.196858280604891, "percentage": 83.94, "elapsed_time": "3:41:56", "remaining_time": "0:42:28", "throughput": 8693.45, "total_tokens": 115765480} +{"current_steps": 171795, "total_steps": 204665, "loss": 0.0, "lr": 1.5308076490434352e-07, "epoch": 4.196980431436738, "percentage": 83.94, "elapsed_time": "3:41:56", "remaining_time": "0:42:27", "throughput": 8693.5, "total_tokens": 115769256} +{"current_steps": 171800, "total_steps": 204665, "loss": 0.0, "lr": 1.5303542404633818e-07, "epoch": 4.1971025822685855, "percentage": 83.94, "elapsed_time": "3:41:57", "remaining_time": "0:42:27", "throughput": 8693.51, "total_tokens": 115772392} +{"current_steps": 171805, "total_steps": 204665, "loss": 0.0, "lr": 1.529900893476873e-07, "epoch": 4.197224733100432, "percentage": 83.94, "elapsed_time": "3:41:57", "remaining_time": "0:42:27", "throughput": 8693.53, "total_tokens": 115775592} +{"current_steps": 171810, "total_steps": 204665, "loss": 0.0, "lr": 1.5294476080872009e-07, "epoch": 4.19734688393228, "percentage": 83.95, "elapsed_time": "3:41:57", "remaining_time": "0:42:26", "throughput": 8693.52, "total_tokens": 115778408} +{"current_steps": 171815, "total_steps": 204665, "loss": 0.0, "lr": 1.5289943842976638e-07, "epoch": 4.197469034764127, "percentage": 83.95, "elapsed_time": "3:41:58", "remaining_time": "0:42:26", "throughput": 8693.54, "total_tokens": 115781672} +{"current_steps": 171820, "total_steps": 204665, "loss": 0.0, "lr": 1.5285412221115602e-07, "epoch": 4.197591185595974, "percentage": 83.95, "elapsed_time": "3:41:58", "remaining_time": "0:42:25", "throughput": 8693.56, "total_tokens": 115785000} +{"current_steps": 171825, "total_steps": 204665, "loss": 0.0, "lr": 1.5280881215321805e-07, "epoch": 4.197713336427821, "percentage": 83.95, "elapsed_time": "3:41:58", "remaining_time": "0:42:25", "throughput": 8693.58, "total_tokens": 115788136} +{"current_steps": 171830, "total_steps": 204665, "loss": 0.0, "lr": 1.527635082562826e-07, "epoch": 4.197835487259669, "percentage": 83.96, "elapsed_time": "3:41:59", "remaining_time": "0:42:25", "throughput": 8693.6, "total_tokens": 115791400} +{"current_steps": 171835, "total_steps": 204665, "loss": 0.0, "lr": 1.5271821052067846e-07, "epoch": 4.197957638091515, "percentage": 83.96, "elapsed_time": "3:41:59", "remaining_time": "0:42:24", "throughput": 8693.62, "total_tokens": 115794664} +{"current_steps": 171840, "total_steps": 204665, "loss": 0.0009, "lr": 1.526729189467355e-07, "epoch": 4.198079788923363, "percentage": 83.96, "elapsed_time": "3:41:59", "remaining_time": "0:42:24", "throughput": 8693.64, "total_tokens": 115797992} +{"current_steps": 171845, "total_steps": 204665, "loss": 0.0, "lr": 1.5262763353478315e-07, "epoch": 4.19820193975521, "percentage": 83.96, "elapsed_time": "3:42:00", "remaining_time": "0:42:23", "throughput": 8693.66, "total_tokens": 115801256} +{"current_steps": 171850, "total_steps": 204665, "loss": 0.0, "lr": 1.5258235428515033e-07, "epoch": 4.198324090587057, "percentage": 83.97, "elapsed_time": "3:42:00", "remaining_time": "0:42:23", "throughput": 8693.68, "total_tokens": 115804520} +{"current_steps": 171855, "total_steps": 204665, "loss": 0.0, "lr": 1.5253708119816676e-07, "epoch": 4.198446241418904, "percentage": 83.97, "elapsed_time": "3:42:00", "remaining_time": "0:42:23", "throughput": 8693.72, "total_tokens": 115808040} +{"current_steps": 171860, "total_steps": 204665, "loss": 0.0, "lr": 1.524918142741616e-07, "epoch": 4.198568392250751, "percentage": 83.97, "elapsed_time": "3:42:01", "remaining_time": "0:42:22", "throughput": 8693.72, "total_tokens": 115811112} +{"current_steps": 171865, "total_steps": 204665, "loss": 0.0, "lr": 1.5244655351346357e-07, "epoch": 4.1986905430825985, "percentage": 83.97, "elapsed_time": "3:42:01", "remaining_time": "0:42:22", "throughput": 8693.75, "total_tokens": 115814440} +{"current_steps": 171870, "total_steps": 204665, "loss": 0.0, "lr": 1.5240129891640242e-07, "epoch": 4.198812693914445, "percentage": 83.98, "elapsed_time": "3:42:01", "remaining_time": "0:42:21", "throughput": 8693.76, "total_tokens": 115817640} +{"current_steps": 171875, "total_steps": 204665, "loss": 0.0524, "lr": 1.523560504833068e-07, "epoch": 4.198934844746293, "percentage": 83.98, "elapsed_time": "3:42:02", "remaining_time": "0:42:21", "throughput": 8693.77, "total_tokens": 115820712} +{"current_steps": 171880, "total_steps": 204665, "loss": 0.0, "lr": 1.5231080821450616e-07, "epoch": 4.19905699557814, "percentage": 83.98, "elapsed_time": "3:42:02", "remaining_time": "0:42:21", "throughput": 8693.77, "total_tokens": 115823656} +{"current_steps": 171885, "total_steps": 204665, "loss": 0.0, "lr": 1.522655721103291e-07, "epoch": 4.199179146409987, "percentage": 83.98, "elapsed_time": "3:42:02", "remaining_time": "0:42:20", "throughput": 8693.8, "total_tokens": 115827048} +{"current_steps": 171890, "total_steps": 204665, "loss": 0.0, "lr": 1.5222034217110502e-07, "epoch": 4.199301297241834, "percentage": 83.99, "elapsed_time": "3:42:03", "remaining_time": "0:42:20", "throughput": 8693.83, "total_tokens": 115830504} +{"current_steps": 171895, "total_steps": 204665, "loss": 0.0, "lr": 1.5217511839716245e-07, "epoch": 4.199423448073682, "percentage": 83.99, "elapsed_time": "3:42:03", "remaining_time": "0:42:20", "throughput": 8693.86, "total_tokens": 115833896} +{"current_steps": 171900, "total_steps": 204665, "loss": 0.125, "lr": 1.521299007888307e-07, "epoch": 4.199545598905528, "percentage": 83.99, "elapsed_time": "3:42:03", "remaining_time": "0:42:19", "throughput": 8693.91, "total_tokens": 115837672} +{"current_steps": 171905, "total_steps": 204665, "loss": 0.0, "lr": 1.5208468934643815e-07, "epoch": 4.199667749737376, "percentage": 83.99, "elapsed_time": "3:42:04", "remaining_time": "0:42:19", "throughput": 8693.95, "total_tokens": 115841256} +{"current_steps": 171910, "total_steps": 204665, "loss": 0.0, "lr": 1.5203948407031375e-07, "epoch": 4.199789900569223, "percentage": 84.0, "elapsed_time": "3:42:04", "remaining_time": "0:42:18", "throughput": 8693.96, "total_tokens": 115844456} +{"current_steps": 171915, "total_steps": 204665, "loss": 0.0, "lr": 1.5199428496078648e-07, "epoch": 4.19991205140107, "percentage": 84.0, "elapsed_time": "3:42:05", "remaining_time": "0:42:18", "throughput": 8693.97, "total_tokens": 115847656} +{"current_steps": 171920, "total_steps": 204665, "loss": 0.0, "lr": 1.5194909201818473e-07, "epoch": 4.200034202232917, "percentage": 84.0, "elapsed_time": "3:42:05", "remaining_time": "0:42:18", "throughput": 8693.99, "total_tokens": 115850856} +{"current_steps": 171925, "total_steps": 204665, "loss": 0.0, "lr": 1.5190390524283747e-07, "epoch": 4.200156353064765, "percentage": 84.0, "elapsed_time": "3:42:05", "remaining_time": "0:42:17", "throughput": 8694.0, "total_tokens": 115853928} +{"current_steps": 171930, "total_steps": 204665, "loss": 0.0, "lr": 1.5185872463507287e-07, "epoch": 4.2002785038966115, "percentage": 84.01, "elapsed_time": "3:42:06", "remaining_time": "0:42:17", "throughput": 8694.03, "total_tokens": 115857448} +{"current_steps": 171935, "total_steps": 204665, "loss": 0.0, "lr": 1.518135501952198e-07, "epoch": 4.200400654728458, "percentage": 84.01, "elapsed_time": "3:42:06", "remaining_time": "0:42:16", "throughput": 8694.03, "total_tokens": 115860264} +{"current_steps": 171940, "total_steps": 204665, "loss": 0.0, "lr": 1.5176838192360686e-07, "epoch": 4.200522805560306, "percentage": 84.01, "elapsed_time": "3:42:06", "remaining_time": "0:42:16", "throughput": 8694.05, "total_tokens": 115863720} +{"current_steps": 171945, "total_steps": 204665, "loss": 0.0, "lr": 1.5172321982056223e-07, "epoch": 4.200644956392153, "percentage": 84.01, "elapsed_time": "3:42:07", "remaining_time": "0:42:16", "throughput": 8694.07, "total_tokens": 115866920} +{"current_steps": 171950, "total_steps": 204665, "loss": 0.0, "lr": 1.516780638864148e-07, "epoch": 4.200767107224, "percentage": 84.02, "elapsed_time": "3:42:07", "remaining_time": "0:42:15", "throughput": 8694.1, "total_tokens": 115870312} +{"current_steps": 171955, "total_steps": 204665, "loss": 0.0, "lr": 1.5163291412149226e-07, "epoch": 4.200889258055847, "percentage": 84.02, "elapsed_time": "3:42:07", "remaining_time": "0:42:15", "throughput": 8694.11, "total_tokens": 115873512} +{"current_steps": 171960, "total_steps": 204665, "loss": 0.0, "lr": 1.515877705261237e-07, "epoch": 4.201011408887695, "percentage": 84.02, "elapsed_time": "3:42:08", "remaining_time": "0:42:14", "throughput": 8694.15, "total_tokens": 115877032} +{"current_steps": 171965, "total_steps": 204665, "loss": 0.0, "lr": 1.5154263310063708e-07, "epoch": 4.201133559719541, "percentage": 84.02, "elapsed_time": "3:42:08", "remaining_time": "0:42:14", "throughput": 8694.17, "total_tokens": 115880360} +{"current_steps": 171970, "total_steps": 204665, "loss": 0.0, "lr": 1.5149750184536036e-07, "epoch": 4.201255710551389, "percentage": 84.03, "elapsed_time": "3:42:08", "remaining_time": "0:42:14", "throughput": 8694.19, "total_tokens": 115883560} +{"current_steps": 171975, "total_steps": 204665, "loss": 0.0, "lr": 1.5145237676062228e-07, "epoch": 4.201377861383236, "percentage": 84.03, "elapsed_time": "3:42:09", "remaining_time": "0:42:13", "throughput": 8694.21, "total_tokens": 115886824} +{"current_steps": 171980, "total_steps": 204665, "loss": 0.0, "lr": 1.5140725784675057e-07, "epoch": 4.201500012215083, "percentage": 84.03, "elapsed_time": "3:42:09", "remaining_time": "0:42:13", "throughput": 8694.24, "total_tokens": 115890216} +{"current_steps": 171985, "total_steps": 204665, "loss": 0.0, "lr": 1.5136214510407364e-07, "epoch": 4.20162216304693, "percentage": 84.03, "elapsed_time": "3:42:09", "remaining_time": "0:42:12", "throughput": 8694.27, "total_tokens": 115893736} +{"current_steps": 171990, "total_steps": 204665, "loss": 0.0, "lr": 1.5131703853291934e-07, "epoch": 4.201744313878778, "percentage": 84.03, "elapsed_time": "3:42:10", "remaining_time": "0:42:12", "throughput": 8694.32, "total_tokens": 115897448} +{"current_steps": 171995, "total_steps": 204665, "loss": 0.0, "lr": 1.5127193813361595e-07, "epoch": 4.2018664647106245, "percentage": 84.04, "elapsed_time": "3:42:10", "remaining_time": "0:42:12", "throughput": 8694.35, "total_tokens": 115900968} +{"current_steps": 172000, "total_steps": 204665, "loss": 0.0, "lr": 1.512268439064911e-07, "epoch": 4.201988615542472, "percentage": 84.04, "elapsed_time": "3:42:10", "remaining_time": "0:42:11", "throughput": 8694.37, "total_tokens": 115904104} +{"current_steps": 172005, "total_steps": 204665, "loss": 0.0, "lr": 1.5118175585187286e-07, "epoch": 4.202110766374319, "percentage": 84.04, "elapsed_time": "3:42:11", "remaining_time": "0:42:11", "throughput": 8694.39, "total_tokens": 115907432} +{"current_steps": 172010, "total_steps": 204665, "loss": 0.0, "lr": 1.5113667397008957e-07, "epoch": 4.2022329172061665, "percentage": 84.04, "elapsed_time": "3:42:11", "remaining_time": "0:42:10", "throughput": 8694.41, "total_tokens": 115910696} +{"current_steps": 172015, "total_steps": 204665, "loss": 0.0, "lr": 1.5109159826146834e-07, "epoch": 4.202355068038013, "percentage": 84.05, "elapsed_time": "3:42:11", "remaining_time": "0:42:10", "throughput": 8694.46, "total_tokens": 115914408} +{"current_steps": 172020, "total_steps": 204665, "loss": 0.0, "lr": 1.510465287263376e-07, "epoch": 4.202477218869861, "percentage": 84.05, "elapsed_time": "3:42:12", "remaining_time": "0:42:10", "throughput": 8694.48, "total_tokens": 115917672} +{"current_steps": 172025, "total_steps": 204665, "loss": 0.0001, "lr": 1.5100146536502468e-07, "epoch": 4.202599369701708, "percentage": 84.05, "elapsed_time": "3:42:12", "remaining_time": "0:42:09", "throughput": 8694.51, "total_tokens": 115921064} +{"current_steps": 172030, "total_steps": 204665, "loss": 0.0, "lr": 1.5095640817785737e-07, "epoch": 4.202721520533554, "percentage": 84.05, "elapsed_time": "3:42:13", "remaining_time": "0:42:09", "throughput": 8694.52, "total_tokens": 115924264} +{"current_steps": 172035, "total_steps": 204665, "loss": 0.0, "lr": 1.509113571651638e-07, "epoch": 4.202843671365402, "percentage": 84.06, "elapsed_time": "3:42:13", "remaining_time": "0:42:08", "throughput": 8694.54, "total_tokens": 115927528} +{"current_steps": 172040, "total_steps": 204665, "loss": 0.0, "lr": 1.5086631232727086e-07, "epoch": 4.202965822197249, "percentage": 84.06, "elapsed_time": "3:42:13", "remaining_time": "0:42:08", "throughput": 8694.56, "total_tokens": 115930664} +{"current_steps": 172045, "total_steps": 204665, "loss": 0.0, "lr": 1.508212736645067e-07, "epoch": 4.203087973029096, "percentage": 84.06, "elapsed_time": "3:42:14", "remaining_time": "0:42:08", "throughput": 8694.58, "total_tokens": 115933928} +{"current_steps": 172050, "total_steps": 204665, "loss": 0.0, "lr": 1.5077624117719845e-07, "epoch": 4.203210123860943, "percentage": 84.06, "elapsed_time": "3:42:14", "remaining_time": "0:42:07", "throughput": 8694.6, "total_tokens": 115937192} +{"current_steps": 172055, "total_steps": 204665, "loss": 0.0, "lr": 1.507312148656741e-07, "epoch": 4.203332274692791, "percentage": 84.07, "elapsed_time": "3:42:14", "remaining_time": "0:42:07", "throughput": 8694.61, "total_tokens": 115940456} +{"current_steps": 172060, "total_steps": 204665, "loss": 0.0, "lr": 1.5068619473026045e-07, "epoch": 4.2034544255246375, "percentage": 84.07, "elapsed_time": "3:42:15", "remaining_time": "0:42:06", "throughput": 8694.66, "total_tokens": 115944040} +{"current_steps": 172065, "total_steps": 204665, "loss": 0.0, "lr": 1.506411807712854e-07, "epoch": 4.203576576356485, "percentage": 84.07, "elapsed_time": "3:42:15", "remaining_time": "0:42:06", "throughput": 8694.7, "total_tokens": 115947688} +{"current_steps": 172070, "total_steps": 204665, "loss": 0.0, "lr": 1.5059617298907624e-07, "epoch": 4.203698727188332, "percentage": 84.07, "elapsed_time": "3:42:15", "remaining_time": "0:42:06", "throughput": 8694.72, "total_tokens": 115951016} +{"current_steps": 172075, "total_steps": 204665, "loss": 0.0, "lr": 1.505511713839599e-07, "epoch": 4.20382087802018, "percentage": 84.08, "elapsed_time": "3:42:16", "remaining_time": "0:42:05", "throughput": 8694.79, "total_tokens": 115955048} +{"current_steps": 172080, "total_steps": 204665, "loss": 0.0, "lr": 1.5050617595626424e-07, "epoch": 4.203943028852026, "percentage": 84.08, "elapsed_time": "3:42:16", "remaining_time": "0:42:05", "throughput": 8694.64, "total_tokens": 115958248} +{"current_steps": 172085, "total_steps": 204665, "loss": 0.0, "lr": 1.5046118670631581e-07, "epoch": 4.204065179683874, "percentage": 84.08, "elapsed_time": "3:42:17", "remaining_time": "0:42:05", "throughput": 8694.64, "total_tokens": 115961320} +{"current_steps": 172090, "total_steps": 204665, "loss": 0.0004, "lr": 1.504162036344422e-07, "epoch": 4.204187330515721, "percentage": 84.08, "elapsed_time": "3:42:17", "remaining_time": "0:42:04", "throughput": 8694.66, "total_tokens": 115964584} +{"current_steps": 172095, "total_steps": 204665, "loss": 0.0, "lr": 1.503712267409707e-07, "epoch": 4.204309481347568, "percentage": 84.09, "elapsed_time": "3:42:17", "remaining_time": "0:42:04", "throughput": 8694.68, "total_tokens": 115967912} +{"current_steps": 172100, "total_steps": 204665, "loss": 0.0, "lr": 1.5032625602622784e-07, "epoch": 4.204431632179415, "percentage": 84.09, "elapsed_time": "3:42:18", "remaining_time": "0:42:03", "throughput": 8694.69, "total_tokens": 115970984} +{"current_steps": 172105, "total_steps": 204665, "loss": 0.0, "lr": 1.5028129149054126e-07, "epoch": 4.204553783011263, "percentage": 84.09, "elapsed_time": "3:42:18", "remaining_time": "0:42:03", "throughput": 8694.71, "total_tokens": 115974248} +{"current_steps": 172110, "total_steps": 204665, "loss": 0.0, "lr": 1.5023633313423745e-07, "epoch": 4.2046759338431094, "percentage": 84.09, "elapsed_time": "3:42:18", "remaining_time": "0:42:03", "throughput": 8694.73, "total_tokens": 115977512} +{"current_steps": 172115, "total_steps": 204665, "loss": 0.0, "lr": 1.5019138095764383e-07, "epoch": 4.204798084674957, "percentage": 84.1, "elapsed_time": "3:42:19", "remaining_time": "0:42:02", "throughput": 8694.74, "total_tokens": 115980648} +{"current_steps": 172120, "total_steps": 204665, "loss": 0.0, "lr": 1.5014643496108682e-07, "epoch": 4.204920235506804, "percentage": 84.1, "elapsed_time": "3:42:19", "remaining_time": "0:42:02", "throughput": 8694.76, "total_tokens": 115983976} +{"current_steps": 172125, "total_steps": 204665, "loss": 0.0, "lr": 1.5010149514489356e-07, "epoch": 4.205042386338651, "percentage": 84.1, "elapsed_time": "3:42:19", "remaining_time": "0:42:01", "throughput": 8694.77, "total_tokens": 115987048} +{"current_steps": 172130, "total_steps": 204665, "loss": 0.0, "lr": 1.5005656150939095e-07, "epoch": 4.205164537170498, "percentage": 84.1, "elapsed_time": "3:42:20", "remaining_time": "0:42:01", "throughput": 8694.78, "total_tokens": 115990184} +{"current_steps": 172135, "total_steps": 204665, "loss": 0.0, "lr": 1.5001163405490547e-07, "epoch": 4.205286688002345, "percentage": 84.11, "elapsed_time": "3:42:20", "remaining_time": "0:42:01", "throughput": 8694.81, "total_tokens": 115993640} +{"current_steps": 172140, "total_steps": 204665, "loss": 0.0, "lr": 1.499667127817642e-07, "epoch": 4.205408838834193, "percentage": 84.11, "elapsed_time": "3:42:20", "remaining_time": "0:42:00", "throughput": 8694.84, "total_tokens": 115997096} +{"current_steps": 172145, "total_steps": 204665, "loss": 0.0, "lr": 1.4992179769029346e-07, "epoch": 4.205530989666039, "percentage": 84.11, "elapsed_time": "3:42:21", "remaining_time": "0:42:00", "throughput": 8694.86, "total_tokens": 116000296} +{"current_steps": 172150, "total_steps": 204665, "loss": 0.0, "lr": 1.4987688878082028e-07, "epoch": 4.205653140497887, "percentage": 84.11, "elapsed_time": "3:42:21", "remaining_time": "0:41:59", "throughput": 8694.86, "total_tokens": 116003368} +{"current_steps": 172155, "total_steps": 204665, "loss": 0.0, "lr": 1.4983198605367075e-07, "epoch": 4.205775291329734, "percentage": 84.12, "elapsed_time": "3:42:21", "remaining_time": "0:41:59", "throughput": 8694.86, "total_tokens": 116006248} +{"current_steps": 172160, "total_steps": 204665, "loss": 0.0, "lr": 1.4978708950917162e-07, "epoch": 4.205897442161581, "percentage": 84.12, "elapsed_time": "3:42:22", "remaining_time": "0:41:59", "throughput": 8694.87, "total_tokens": 116009448} +{"current_steps": 172165, "total_steps": 204665, "loss": 0.0, "lr": 1.4974219914764986e-07, "epoch": 4.206019592993428, "percentage": 84.12, "elapsed_time": "3:42:22", "remaining_time": "0:41:58", "throughput": 8694.89, "total_tokens": 116012712} +{"current_steps": 172170, "total_steps": 204665, "loss": 0.0, "lr": 1.496973149694314e-07, "epoch": 4.206141743825276, "percentage": 84.12, "elapsed_time": "3:42:22", "remaining_time": "0:41:58", "throughput": 8694.93, "total_tokens": 116016424} +{"current_steps": 172175, "total_steps": 204665, "loss": 0.0, "lr": 1.4965243697484253e-07, "epoch": 4.2062638946571225, "percentage": 84.13, "elapsed_time": "3:42:23", "remaining_time": "0:41:57", "throughput": 8694.96, "total_tokens": 116019816} +{"current_steps": 172180, "total_steps": 204665, "loss": 0.0, "lr": 1.4960756516421013e-07, "epoch": 4.20638604548897, "percentage": 84.13, "elapsed_time": "3:42:23", "remaining_time": "0:41:57", "throughput": 8694.97, "total_tokens": 116022888} +{"current_steps": 172185, "total_steps": 204665, "loss": 0.0, "lr": 1.4956269953785993e-07, "epoch": 4.206508196320817, "percentage": 84.13, "elapsed_time": "3:42:24", "remaining_time": "0:41:57", "throughput": 8694.99, "total_tokens": 116026216} +{"current_steps": 172190, "total_steps": 204665, "loss": 0.0, "lr": 1.495178400961188e-07, "epoch": 4.2066303471526645, "percentage": 84.13, "elapsed_time": "3:42:24", "remaining_time": "0:41:56", "throughput": 8695.01, "total_tokens": 116029480} +{"current_steps": 172195, "total_steps": 204665, "loss": 0.0, "lr": 1.4947298683931254e-07, "epoch": 4.206752497984511, "percentage": 84.14, "elapsed_time": "3:42:24", "remaining_time": "0:41:56", "throughput": 8695.04, "total_tokens": 116033000} +{"current_steps": 172200, "total_steps": 204665, "loss": 0.0, "lr": 1.4942813976776759e-07, "epoch": 4.206874648816359, "percentage": 84.14, "elapsed_time": "3:42:25", "remaining_time": "0:41:55", "throughput": 8695.07, "total_tokens": 116036392} +{"current_steps": 172205, "total_steps": 204665, "loss": 0.0, "lr": 1.493832988818098e-07, "epoch": 4.206996799648206, "percentage": 84.14, "elapsed_time": "3:42:25", "remaining_time": "0:41:55", "throughput": 8695.1, "total_tokens": 116039976} +{"current_steps": 172210, "total_steps": 204665, "loss": 0.0, "lr": 1.4933846418176578e-07, "epoch": 4.207118950480052, "percentage": 84.14, "elapsed_time": "3:42:25", "remaining_time": "0:41:55", "throughput": 8695.13, "total_tokens": 116043368} +{"current_steps": 172215, "total_steps": 204665, "loss": 0.0, "lr": 1.4929363566796082e-07, "epoch": 4.2072411013119, "percentage": 84.14, "elapsed_time": "3:42:26", "remaining_time": "0:41:54", "throughput": 8695.16, "total_tokens": 116046824} +{"current_steps": 172220, "total_steps": 204665, "loss": 0.0, "lr": 1.492488133407215e-07, "epoch": 4.207363252143747, "percentage": 84.15, "elapsed_time": "3:42:26", "remaining_time": "0:41:54", "throughput": 8695.17, "total_tokens": 116049896} +{"current_steps": 172225, "total_steps": 204665, "loss": 0.0426, "lr": 1.492039972003738e-07, "epoch": 4.207485402975594, "percentage": 84.15, "elapsed_time": "3:42:26", "remaining_time": "0:41:53", "throughput": 8695.18, "total_tokens": 116053096} +{"current_steps": 172230, "total_steps": 204665, "loss": 0.0, "lr": 1.491591872472433e-07, "epoch": 4.207607553807441, "percentage": 84.15, "elapsed_time": "3:42:27", "remaining_time": "0:41:53", "throughput": 8695.19, "total_tokens": 116056168} +{"current_steps": 172235, "total_steps": 204665, "loss": 0.0, "lr": 1.491143834816563e-07, "epoch": 4.207729704639289, "percentage": 84.15, "elapsed_time": "3:42:27", "remaining_time": "0:41:53", "throughput": 8695.21, "total_tokens": 116059560} +{"current_steps": 172240, "total_steps": 204665, "loss": 0.0, "lr": 1.4906958590393802e-07, "epoch": 4.2078518554711355, "percentage": 84.16, "elapsed_time": "3:42:27", "remaining_time": "0:41:52", "throughput": 8695.23, "total_tokens": 116062824} +{"current_steps": 172245, "total_steps": 204665, "loss": 0.0, "lr": 1.4902479451441464e-07, "epoch": 4.207974006302983, "percentage": 84.16, "elapsed_time": "3:42:28", "remaining_time": "0:41:52", "throughput": 8695.27, "total_tokens": 116066344} +{"current_steps": 172250, "total_steps": 204665, "loss": 0.0, "lr": 1.4898000931341204e-07, "epoch": 4.20809615713483, "percentage": 84.16, "elapsed_time": "3:42:28", "remaining_time": "0:41:52", "throughput": 8695.27, "total_tokens": 116069352} +{"current_steps": 172255, "total_steps": 204665, "loss": 0.0, "lr": 1.4893523030125544e-07, "epoch": 4.2082183079666775, "percentage": 84.16, "elapsed_time": "3:42:28", "remaining_time": "0:41:51", "throughput": 8695.31, "total_tokens": 116072872} +{"current_steps": 172260, "total_steps": 204665, "loss": 0.0, "lr": 1.4889045747827111e-07, "epoch": 4.208340458798524, "percentage": 84.17, "elapsed_time": "3:42:29", "remaining_time": "0:41:51", "throughput": 8695.31, "total_tokens": 116075880} +{"current_steps": 172265, "total_steps": 204665, "loss": 0.0, "lr": 1.4884569084478394e-07, "epoch": 4.208462609630372, "percentage": 84.17, "elapsed_time": "3:42:29", "remaining_time": "0:41:50", "throughput": 8695.34, "total_tokens": 116079336} +{"current_steps": 172270, "total_steps": 204665, "loss": 0.0, "lr": 1.4880093040112018e-07, "epoch": 4.208584760462219, "percentage": 84.17, "elapsed_time": "3:42:29", "remaining_time": "0:41:50", "throughput": 8695.34, "total_tokens": 116082280} +{"current_steps": 172275, "total_steps": 204665, "loss": 0.0, "lr": 1.4875617614760493e-07, "epoch": 4.208706911294066, "percentage": 84.17, "elapsed_time": "3:42:30", "remaining_time": "0:41:50", "throughput": 8695.36, "total_tokens": 116085608} +{"current_steps": 172280, "total_steps": 204665, "loss": 0.0, "lr": 1.4871142808456349e-07, "epoch": 4.208829062125913, "percentage": 84.18, "elapsed_time": "3:42:30", "remaining_time": "0:41:49", "throughput": 8695.37, "total_tokens": 116088744} +{"current_steps": 172285, "total_steps": 204665, "loss": 0.0, "lr": 1.4866668621232182e-07, "epoch": 4.208951212957761, "percentage": 84.18, "elapsed_time": "3:42:30", "remaining_time": "0:41:49", "throughput": 8695.39, "total_tokens": 116092008} +{"current_steps": 172290, "total_steps": 204665, "loss": 0.0, "lr": 1.4862195053120464e-07, "epoch": 4.209073363789607, "percentage": 84.18, "elapsed_time": "3:42:31", "remaining_time": "0:41:48", "throughput": 8695.4, "total_tokens": 116095144} +{"current_steps": 172295, "total_steps": 204665, "loss": 0.0, "lr": 1.4857722104153792e-07, "epoch": 4.209195514621454, "percentage": 84.18, "elapsed_time": "3:42:31", "remaining_time": "0:41:48", "throughput": 8695.46, "total_tokens": 116099048} +{"current_steps": 172300, "total_steps": 204665, "loss": 0.0, "lr": 1.485324977436464e-07, "epoch": 4.209317665453302, "percentage": 84.19, "elapsed_time": "3:42:32", "remaining_time": "0:41:48", "throughput": 8695.49, "total_tokens": 116102504} +{"current_steps": 172305, "total_steps": 204665, "loss": 0.0, "lr": 1.4848778063785583e-07, "epoch": 4.2094398162851485, "percentage": 84.19, "elapsed_time": "3:42:32", "remaining_time": "0:41:47", "throughput": 8695.49, "total_tokens": 116105512} +{"current_steps": 172310, "total_steps": 204665, "loss": 0.0, "lr": 1.4844306972449093e-07, "epoch": 4.209561967116996, "percentage": 84.19, "elapsed_time": "3:42:32", "remaining_time": "0:41:47", "throughput": 8695.51, "total_tokens": 116108648} +{"current_steps": 172315, "total_steps": 204665, "loss": 0.0, "lr": 1.4839836500387703e-07, "epoch": 4.209684117948843, "percentage": 84.19, "elapsed_time": "3:42:33", "remaining_time": "0:41:46", "throughput": 8695.53, "total_tokens": 116111912} +{"current_steps": 172320, "total_steps": 204665, "loss": 0.0, "lr": 1.4835366647633963e-07, "epoch": 4.2098062687806905, "percentage": 84.2, "elapsed_time": "3:42:33", "remaining_time": "0:41:46", "throughput": 8695.55, "total_tokens": 116115176} +{"current_steps": 172325, "total_steps": 204665, "loss": 0.0002, "lr": 1.48308974142203e-07, "epoch": 4.209928419612537, "percentage": 84.2, "elapsed_time": "3:42:33", "remaining_time": "0:41:46", "throughput": 8695.56, "total_tokens": 116118312} +{"current_steps": 172330, "total_steps": 204665, "loss": 0.0, "lr": 1.4826428800179303e-07, "epoch": 4.210050570444385, "percentage": 84.2, "elapsed_time": "3:42:34", "remaining_time": "0:41:45", "throughput": 8695.58, "total_tokens": 116121704} +{"current_steps": 172335, "total_steps": 204665, "loss": 0.0, "lr": 1.4821960805543388e-07, "epoch": 4.210172721276232, "percentage": 84.2, "elapsed_time": "3:42:34", "remaining_time": "0:41:45", "throughput": 8695.62, "total_tokens": 116125352} +{"current_steps": 172340, "total_steps": 204665, "loss": 0.0, "lr": 1.4817493430345084e-07, "epoch": 4.210294872108079, "percentage": 84.21, "elapsed_time": "3:42:34", "remaining_time": "0:41:44", "throughput": 8695.62, "total_tokens": 116128296} +{"current_steps": 172345, "total_steps": 204665, "loss": 0.0, "lr": 1.48130266746169e-07, "epoch": 4.210417022939926, "percentage": 84.21, "elapsed_time": "3:42:35", "remaining_time": "0:41:44", "throughput": 8695.67, "total_tokens": 116131944} +{"current_steps": 172350, "total_steps": 204665, "loss": 0.0489, "lr": 1.480856053839129e-07, "epoch": 4.210539173771774, "percentage": 84.21, "elapsed_time": "3:42:35", "remaining_time": "0:41:44", "throughput": 8695.7, "total_tokens": 116135528} +{"current_steps": 172355, "total_steps": 204665, "loss": 0.0, "lr": 1.4804095021700746e-07, "epoch": 4.21066132460362, "percentage": 84.21, "elapsed_time": "3:42:35", "remaining_time": "0:41:43", "throughput": 8695.75, "total_tokens": 116139304} +{"current_steps": 172360, "total_steps": 204665, "loss": 0.0, "lr": 1.4799630124577733e-07, "epoch": 4.210783475435468, "percentage": 84.22, "elapsed_time": "3:42:36", "remaining_time": "0:41:43", "throughput": 8695.78, "total_tokens": 116142760} +{"current_steps": 172365, "total_steps": 204665, "loss": 0.0, "lr": 1.4795165847054735e-07, "epoch": 4.210905626267315, "percentage": 84.22, "elapsed_time": "3:42:36", "remaining_time": "0:41:42", "throughput": 8695.79, "total_tokens": 116145832} +{"current_steps": 172370, "total_steps": 204665, "loss": 0.0, "lr": 1.4790702189164194e-07, "epoch": 4.211027777099162, "percentage": 84.22, "elapsed_time": "3:42:36", "remaining_time": "0:41:42", "throughput": 8695.8, "total_tokens": 116148904} +{"current_steps": 172375, "total_steps": 204665, "loss": 0.0, "lr": 1.4786239150938594e-07, "epoch": 4.211149927931009, "percentage": 84.22, "elapsed_time": "3:42:37", "remaining_time": "0:41:42", "throughput": 8695.81, "total_tokens": 116152104} +{"current_steps": 172380, "total_steps": 204665, "loss": 0.0, "lr": 1.47817767324104e-07, "epoch": 4.211272078762857, "percentage": 84.23, "elapsed_time": "3:42:37", "remaining_time": "0:41:41", "throughput": 8695.86, "total_tokens": 116155816} +{"current_steps": 172385, "total_steps": 204665, "loss": 0.0, "lr": 1.4777314933612016e-07, "epoch": 4.2113942295947036, "percentage": 84.23, "elapsed_time": "3:42:37", "remaining_time": "0:41:41", "throughput": 8695.87, "total_tokens": 116159016} +{"current_steps": 172390, "total_steps": 204665, "loss": 0.0, "lr": 1.4772853754575942e-07, "epoch": 4.21151638042655, "percentage": 84.23, "elapsed_time": "3:42:38", "remaining_time": "0:41:40", "throughput": 8695.89, "total_tokens": 116162344} +{"current_steps": 172395, "total_steps": 204665, "loss": 0.0001, "lr": 1.4768393195334583e-07, "epoch": 4.211638531258398, "percentage": 84.23, "elapsed_time": "3:42:38", "remaining_time": "0:41:40", "throughput": 8695.9, "total_tokens": 116165480} +{"current_steps": 172400, "total_steps": 204665, "loss": 0.0, "lr": 1.476393325592038e-07, "epoch": 4.211760682090245, "percentage": 84.24, "elapsed_time": "3:42:38", "remaining_time": "0:41:40", "throughput": 8695.94, "total_tokens": 116169000} +{"current_steps": 172405, "total_steps": 204665, "loss": 0.0, "lr": 1.475947393636582e-07, "epoch": 4.211882832922092, "percentage": 84.24, "elapsed_time": "3:42:39", "remaining_time": "0:41:39", "throughput": 8695.96, "total_tokens": 116172264} +{"current_steps": 172410, "total_steps": 204665, "loss": 0.0, "lr": 1.475501523670325e-07, "epoch": 4.212004983753939, "percentage": 84.24, "elapsed_time": "3:42:39", "remaining_time": "0:41:39", "throughput": 8695.99, "total_tokens": 116175848} +{"current_steps": 172415, "total_steps": 204665, "loss": 0.0, "lr": 1.475055715696517e-07, "epoch": 4.212127134585787, "percentage": 84.24, "elapsed_time": "3:42:40", "remaining_time": "0:41:38", "throughput": 8696.04, "total_tokens": 116179560} +{"current_steps": 172420, "total_steps": 204665, "loss": 0.0, "lr": 1.4746099697183945e-07, "epoch": 4.212249285417633, "percentage": 84.24, "elapsed_time": "3:42:40", "remaining_time": "0:41:38", "throughput": 8696.06, "total_tokens": 116182824} +{"current_steps": 172425, "total_steps": 204665, "loss": 0.0, "lr": 1.4741642857392045e-07, "epoch": 4.212371436249481, "percentage": 84.25, "elapsed_time": "3:42:40", "remaining_time": "0:41:38", "throughput": 8696.09, "total_tokens": 116186280} +{"current_steps": 172430, "total_steps": 204665, "loss": 0.0, "lr": 1.4737186637621812e-07, "epoch": 4.212493587081328, "percentage": 84.25, "elapsed_time": "3:42:41", "remaining_time": "0:41:37", "throughput": 8696.12, "total_tokens": 116189736} +{"current_steps": 172435, "total_steps": 204665, "loss": 0.0, "lr": 1.4732731037905698e-07, "epoch": 4.2126157379131755, "percentage": 84.25, "elapsed_time": "3:42:41", "remaining_time": "0:41:37", "throughput": 8696.13, "total_tokens": 116192872} +{"current_steps": 172440, "total_steps": 204665, "loss": 0.0, "lr": 1.4728276058276122e-07, "epoch": 4.212737888745022, "percentage": 84.25, "elapsed_time": "3:42:41", "remaining_time": "0:41:37", "throughput": 8696.12, "total_tokens": 116195752} +{"current_steps": 172445, "total_steps": 204665, "loss": 0.0, "lr": 1.4723821698765437e-07, "epoch": 4.21286003957687, "percentage": 84.26, "elapsed_time": "3:42:42", "remaining_time": "0:41:36", "throughput": 8696.13, "total_tokens": 116198824} +{"current_steps": 172450, "total_steps": 204665, "loss": 0.0, "lr": 1.471936795940607e-07, "epoch": 4.212982190408717, "percentage": 84.26, "elapsed_time": "3:42:42", "remaining_time": "0:41:36", "throughput": 8696.14, "total_tokens": 116202024} +{"current_steps": 172455, "total_steps": 204665, "loss": 0.0, "lr": 1.4714914840230385e-07, "epoch": 4.213104341240564, "percentage": 84.26, "elapsed_time": "3:42:42", "remaining_time": "0:41:35", "throughput": 8696.15, "total_tokens": 116205032} +{"current_steps": 172460, "total_steps": 204665, "loss": 0.0, "lr": 1.471046234127079e-07, "epoch": 4.213226492072411, "percentage": 84.26, "elapsed_time": "3:42:43", "remaining_time": "0:41:35", "throughput": 8696.16, "total_tokens": 116208296} +{"current_steps": 172465, "total_steps": 204665, "loss": 0.0, "lr": 1.4706010462559638e-07, "epoch": 4.213348642904259, "percentage": 84.27, "elapsed_time": "3:42:43", "remaining_time": "0:41:35", "throughput": 8696.19, "total_tokens": 116211688} +{"current_steps": 172470, "total_steps": 204665, "loss": 0.0, "lr": 1.470155920412932e-07, "epoch": 4.213470793736105, "percentage": 84.27, "elapsed_time": "3:42:43", "remaining_time": "0:41:34", "throughput": 8696.2, "total_tokens": 116214824} +{"current_steps": 172475, "total_steps": 204665, "loss": 0.0, "lr": 1.4697108566012228e-07, "epoch": 4.213592944567952, "percentage": 84.27, "elapsed_time": "3:42:44", "remaining_time": "0:41:34", "throughput": 8696.22, "total_tokens": 116218088} +{"current_steps": 172480, "total_steps": 204665, "loss": 0.0, "lr": 1.46926585482407e-07, "epoch": 4.2137150953998, "percentage": 84.27, "elapsed_time": "3:42:44", "remaining_time": "0:41:33", "throughput": 8696.22, "total_tokens": 116221096} +{"current_steps": 172485, "total_steps": 204665, "loss": 0.0, "lr": 1.4688209150847085e-07, "epoch": 4.2138372462316465, "percentage": 84.28, "elapsed_time": "3:42:44", "remaining_time": "0:41:33", "throughput": 8696.22, "total_tokens": 116224040} +{"current_steps": 172490, "total_steps": 204665, "loss": 0.0, "lr": 1.4683760373863785e-07, "epoch": 4.213959397063494, "percentage": 84.28, "elapsed_time": "3:42:45", "remaining_time": "0:41:33", "throughput": 8696.22, "total_tokens": 116227048} +{"current_steps": 172495, "total_steps": 204665, "loss": 0.0, "lr": 1.4679312217323102e-07, "epoch": 4.214081547895341, "percentage": 84.28, "elapsed_time": "3:42:45", "remaining_time": "0:41:32", "throughput": 8696.28, "total_tokens": 116230824} +{"current_steps": 172500, "total_steps": 204665, "loss": 0.0, "lr": 1.4674864681257438e-07, "epoch": 4.2142036987271885, "percentage": 84.28, "elapsed_time": "3:42:45", "remaining_time": "0:41:32", "throughput": 8696.29, "total_tokens": 116233960} +{"current_steps": 172505, "total_steps": 204665, "loss": 0.0, "lr": 1.4670417765699072e-07, "epoch": 4.214325849559035, "percentage": 84.29, "elapsed_time": "3:42:46", "remaining_time": "0:41:31", "throughput": 8696.29, "total_tokens": 116236904} +{"current_steps": 172510, "total_steps": 204665, "loss": 0.0, "lr": 1.4665971470680417e-07, "epoch": 4.214448000390883, "percentage": 84.29, "elapsed_time": "3:42:46", "remaining_time": "0:41:31", "throughput": 8696.31, "total_tokens": 116240296} +{"current_steps": 172515, "total_steps": 204665, "loss": 0.0, "lr": 1.4661525796233732e-07, "epoch": 4.21457015122273, "percentage": 84.29, "elapsed_time": "3:42:46", "remaining_time": "0:41:31", "throughput": 8696.36, "total_tokens": 116244072} +{"current_steps": 172520, "total_steps": 204665, "loss": 0.0, "lr": 1.4657080742391414e-07, "epoch": 4.214692302054577, "percentage": 84.29, "elapsed_time": "3:42:47", "remaining_time": "0:41:30", "throughput": 8696.38, "total_tokens": 116247400} +{"current_steps": 172525, "total_steps": 204665, "loss": 0.0, "lr": 1.465263630918574e-07, "epoch": 4.214814452886424, "percentage": 84.3, "elapsed_time": "3:42:47", "remaining_time": "0:41:30", "throughput": 8696.41, "total_tokens": 116250792} +{"current_steps": 172530, "total_steps": 204665, "loss": 0.0, "lr": 1.4648192496649047e-07, "epoch": 4.214936603718272, "percentage": 84.3, "elapsed_time": "3:42:48", "remaining_time": "0:41:29", "throughput": 8696.42, "total_tokens": 116253928} +{"current_steps": 172535, "total_steps": 204665, "loss": 0.0, "lr": 1.464374930481368e-07, "epoch": 4.215058754550118, "percentage": 84.3, "elapsed_time": "3:42:48", "remaining_time": "0:41:29", "throughput": 8696.47, "total_tokens": 116257768} +{"current_steps": 172540, "total_steps": 204665, "loss": 0.0, "lr": 1.46393067337119e-07, "epoch": 4.215180905381966, "percentage": 84.3, "elapsed_time": "3:42:48", "remaining_time": "0:41:29", "throughput": 8696.52, "total_tokens": 116261480} +{"current_steps": 172545, "total_steps": 204665, "loss": 0.0, "lr": 1.4634864783376055e-07, "epoch": 4.215303056213813, "percentage": 84.31, "elapsed_time": "3:42:49", "remaining_time": "0:41:28", "throughput": 8696.54, "total_tokens": 116264808} +{"current_steps": 172550, "total_steps": 204665, "loss": 0.0, "lr": 1.4630423453838427e-07, "epoch": 4.21542520704566, "percentage": 84.31, "elapsed_time": "3:42:49", "remaining_time": "0:41:28", "throughput": 8696.59, "total_tokens": 116268520} +{"current_steps": 172555, "total_steps": 204665, "loss": 0.0, "lr": 1.4625982745131315e-07, "epoch": 4.215547357877507, "percentage": 84.31, "elapsed_time": "3:42:49", "remaining_time": "0:41:27", "throughput": 8696.6, "total_tokens": 116271720} +{"current_steps": 172560, "total_steps": 204665, "loss": 0.0, "lr": 1.4621542657287033e-07, "epoch": 4.215669508709354, "percentage": 84.31, "elapsed_time": "3:42:50", "remaining_time": "0:41:27", "throughput": 8696.62, "total_tokens": 116274984} +{"current_steps": 172565, "total_steps": 204665, "loss": 0.0, "lr": 1.4617103190337853e-07, "epoch": 4.2157916595412015, "percentage": 84.32, "elapsed_time": "3:42:50", "remaining_time": "0:41:27", "throughput": 8696.66, "total_tokens": 116278504} +{"current_steps": 172570, "total_steps": 204665, "loss": 0.0, "lr": 1.4612664344316073e-07, "epoch": 4.215913810373048, "percentage": 84.32, "elapsed_time": "3:42:50", "remaining_time": "0:41:26", "throughput": 8696.68, "total_tokens": 116281896} +{"current_steps": 172575, "total_steps": 204665, "loss": 0.0, "lr": 1.4608226119253942e-07, "epoch": 4.216035961204896, "percentage": 84.32, "elapsed_time": "3:42:51", "remaining_time": "0:41:26", "throughput": 8696.72, "total_tokens": 116285416} +{"current_steps": 172580, "total_steps": 204665, "loss": 0.0, "lr": 1.4603788515183792e-07, "epoch": 4.216158112036743, "percentage": 84.32, "elapsed_time": "3:42:51", "remaining_time": "0:41:25", "throughput": 8696.74, "total_tokens": 116288808} +{"current_steps": 172585, "total_steps": 204665, "loss": 0.0, "lr": 1.4599351532137848e-07, "epoch": 4.21628026286859, "percentage": 84.33, "elapsed_time": "3:42:51", "remaining_time": "0:41:25", "throughput": 8696.76, "total_tokens": 116292072} +{"current_steps": 172590, "total_steps": 204665, "loss": 0.0, "lr": 1.459491517014837e-07, "epoch": 4.216402413700437, "percentage": 84.33, "elapsed_time": "3:42:52", "remaining_time": "0:41:25", "throughput": 8696.81, "total_tokens": 116295784} +{"current_steps": 172595, "total_steps": 204665, "loss": 0.1056, "lr": 1.4590479429247672e-07, "epoch": 4.216524564532285, "percentage": 84.33, "elapsed_time": "3:42:52", "remaining_time": "0:41:24", "throughput": 8696.86, "total_tokens": 116299624} +{"current_steps": 172600, "total_steps": 204665, "loss": 0.0, "lr": 1.458604430946795e-07, "epoch": 4.216646715364131, "percentage": 84.33, "elapsed_time": "3:42:52", "remaining_time": "0:41:24", "throughput": 8696.87, "total_tokens": 116302696} +{"current_steps": 172605, "total_steps": 204665, "loss": 0.0, "lr": 1.45816098108415e-07, "epoch": 4.216768866195979, "percentage": 84.34, "elapsed_time": "3:42:53", "remaining_time": "0:41:23", "throughput": 8696.91, "total_tokens": 116306408} +{"current_steps": 172610, "total_steps": 204665, "loss": 0.0, "lr": 1.4577175933400554e-07, "epoch": 4.216891017027826, "percentage": 84.34, "elapsed_time": "3:42:53", "remaining_time": "0:41:23", "throughput": 8696.92, "total_tokens": 116309416} +{"current_steps": 172615, "total_steps": 204665, "loss": 0.0, "lr": 1.4572742677177375e-07, "epoch": 4.217013167859673, "percentage": 84.34, "elapsed_time": "3:42:53", "remaining_time": "0:41:23", "throughput": 8696.91, "total_tokens": 116312168} +{"current_steps": 172620, "total_steps": 204665, "loss": 0.0, "lr": 1.4568310042204156e-07, "epoch": 4.21713531869152, "percentage": 84.34, "elapsed_time": "3:42:54", "remaining_time": "0:41:22", "throughput": 8696.93, "total_tokens": 116315496} +{"current_steps": 172625, "total_steps": 204665, "loss": 0.0, "lr": 1.4563878028513177e-07, "epoch": 4.217257469523368, "percentage": 84.35, "elapsed_time": "3:42:54", "remaining_time": "0:41:22", "throughput": 8696.95, "total_tokens": 116318888} +{"current_steps": 172630, "total_steps": 204665, "loss": 0.0, "lr": 1.4559446636136675e-07, "epoch": 4.2173796203552145, "percentage": 84.35, "elapsed_time": "3:42:55", "remaining_time": "0:41:22", "throughput": 8696.97, "total_tokens": 116322088} +{"current_steps": 172635, "total_steps": 204665, "loss": 0.0, "lr": 1.4555015865106835e-07, "epoch": 4.217501771187062, "percentage": 84.35, "elapsed_time": "3:42:55", "remaining_time": "0:41:21", "throughput": 8696.98, "total_tokens": 116325288} +{"current_steps": 172640, "total_steps": 204665, "loss": 0.0, "lr": 1.455058571545593e-07, "epoch": 4.217623922018909, "percentage": 84.35, "elapsed_time": "3:42:55", "remaining_time": "0:41:21", "throughput": 8697.01, "total_tokens": 116328744} +{"current_steps": 172645, "total_steps": 204665, "loss": 0.0, "lr": 1.454615618721612e-07, "epoch": 4.2177460728507565, "percentage": 84.35, "elapsed_time": "3:42:56", "remaining_time": "0:41:20", "throughput": 8697.06, "total_tokens": 116332584} +{"current_steps": 172650, "total_steps": 204665, "loss": 0.0, "lr": 1.4541727280419647e-07, "epoch": 4.217868223682603, "percentage": 84.36, "elapsed_time": "3:42:56", "remaining_time": "0:41:20", "throughput": 8697.1, "total_tokens": 116336232} +{"current_steps": 172655, "total_steps": 204665, "loss": 0.0, "lr": 1.4537298995098745e-07, "epoch": 4.21799037451445, "percentage": 84.36, "elapsed_time": "3:42:56", "remaining_time": "0:41:20", "throughput": 8697.11, "total_tokens": 116339240} +{"current_steps": 172660, "total_steps": 204665, "loss": 0.0, "lr": 1.4532871331285568e-07, "epoch": 4.218112525346298, "percentage": 84.36, "elapsed_time": "3:42:57", "remaining_time": "0:41:19", "throughput": 8697.14, "total_tokens": 116342760} +{"current_steps": 172665, "total_steps": 204665, "loss": 0.0, "lr": 1.4528444289012353e-07, "epoch": 4.218234676178144, "percentage": 84.36, "elapsed_time": "3:42:57", "remaining_time": "0:41:19", "throughput": 8697.15, "total_tokens": 116345896} +{"current_steps": 172670, "total_steps": 204665, "loss": 0.0001, "lr": 1.4524017868311268e-07, "epoch": 4.218356827009992, "percentage": 84.37, "elapsed_time": "3:42:57", "remaining_time": "0:41:18", "throughput": 8697.17, "total_tokens": 116349160} +{"current_steps": 172675, "total_steps": 204665, "loss": 0.0, "lr": 1.4519592069214538e-07, "epoch": 4.218478977841839, "percentage": 84.37, "elapsed_time": "3:42:58", "remaining_time": "0:41:18", "throughput": 8697.18, "total_tokens": 116352296} +{"current_steps": 172680, "total_steps": 204665, "loss": 0.0, "lr": 1.4515166891754292e-07, "epoch": 4.218601128673686, "percentage": 84.37, "elapsed_time": "3:42:58", "remaining_time": "0:41:18", "throughput": 8697.18, "total_tokens": 116355304} +{"current_steps": 172685, "total_steps": 204665, "loss": 0.0, "lr": 1.4510742335962777e-07, "epoch": 4.218723279505533, "percentage": 84.37, "elapsed_time": "3:42:58", "remaining_time": "0:41:17", "throughput": 8697.2, "total_tokens": 116358568} +{"current_steps": 172690, "total_steps": 204665, "loss": 0.0, "lr": 1.4506318401872143e-07, "epoch": 4.218845430337381, "percentage": 84.38, "elapsed_time": "3:42:59", "remaining_time": "0:41:17", "throughput": 8697.25, "total_tokens": 116362408} +{"current_steps": 172695, "total_steps": 204665, "loss": 0.0, "lr": 1.4501895089514525e-07, "epoch": 4.2189675811692275, "percentage": 84.38, "elapsed_time": "3:42:59", "remaining_time": "0:41:16", "throughput": 8697.3, "total_tokens": 116366120} +{"current_steps": 172700, "total_steps": 204665, "loss": 0.0, "lr": 1.449747239892215e-07, "epoch": 4.219089732001075, "percentage": 84.38, "elapsed_time": "3:42:59", "remaining_time": "0:41:16", "throughput": 8697.33, "total_tokens": 116369640} +{"current_steps": 172705, "total_steps": 204665, "loss": 0.0, "lr": 1.449305033012712e-07, "epoch": 4.219211882832922, "percentage": 84.38, "elapsed_time": "3:43:00", "remaining_time": "0:41:16", "throughput": 8697.33, "total_tokens": 116372648} +{"current_steps": 172710, "total_steps": 204665, "loss": 0.0, "lr": 1.4488628883161658e-07, "epoch": 4.21933403366477, "percentage": 84.39, "elapsed_time": "3:43:00", "remaining_time": "0:41:15", "throughput": 8697.37, "total_tokens": 116376168} +{"current_steps": 172715, "total_steps": 204665, "loss": 0.0, "lr": 1.4484208058057866e-07, "epoch": 4.219456184496616, "percentage": 84.39, "elapsed_time": "3:43:00", "remaining_time": "0:41:15", "throughput": 8697.37, "total_tokens": 116379240} +{"current_steps": 172720, "total_steps": 204665, "loss": 0.0002, "lr": 1.4479787854847904e-07, "epoch": 4.219578335328464, "percentage": 84.39, "elapsed_time": "3:43:01", "remaining_time": "0:41:14", "throughput": 8697.39, "total_tokens": 116382440} +{"current_steps": 172725, "total_steps": 204665, "loss": 0.0, "lr": 1.447536827356396e-07, "epoch": 4.219700486160311, "percentage": 84.39, "elapsed_time": "3:43:01", "remaining_time": "0:41:14", "throughput": 8697.41, "total_tokens": 116385640} +{"current_steps": 172730, "total_steps": 204665, "loss": 0.0, "lr": 1.4470949314238112e-07, "epoch": 4.219822636992158, "percentage": 84.4, "elapsed_time": "3:43:02", "remaining_time": "0:41:14", "throughput": 8697.43, "total_tokens": 116388968} +{"current_steps": 172735, "total_steps": 204665, "loss": 0.0, "lr": 1.4466530976902557e-07, "epoch": 4.219944787824005, "percentage": 84.4, "elapsed_time": "3:43:02", "remaining_time": "0:41:13", "throughput": 8697.45, "total_tokens": 116392296} +{"current_steps": 172740, "total_steps": 204665, "loss": 0.0, "lr": 1.446211326158936e-07, "epoch": 4.220066938655853, "percentage": 84.4, "elapsed_time": "3:43:02", "remaining_time": "0:41:13", "throughput": 8697.46, "total_tokens": 116395368} +{"current_steps": 172745, "total_steps": 204665, "loss": 0.0, "lr": 1.445769616833069e-07, "epoch": 4.220189089487699, "percentage": 84.4, "elapsed_time": "3:43:03", "remaining_time": "0:41:12", "throughput": 8697.5, "total_tokens": 116399080} +{"current_steps": 172750, "total_steps": 204665, "loss": 0.0, "lr": 1.4453279697158683e-07, "epoch": 4.220311240319546, "percentage": 84.41, "elapsed_time": "3:43:03", "remaining_time": "0:41:12", "throughput": 8697.51, "total_tokens": 116402152} +{"current_steps": 172755, "total_steps": 204665, "loss": 0.0, "lr": 1.4448863848105407e-07, "epoch": 4.220433391151394, "percentage": 84.41, "elapsed_time": "3:43:03", "remaining_time": "0:41:12", "throughput": 8697.55, "total_tokens": 116405736} +{"current_steps": 172760, "total_steps": 204665, "loss": 0.0, "lr": 1.444444862120303e-07, "epoch": 4.220555541983241, "percentage": 84.41, "elapsed_time": "3:43:04", "remaining_time": "0:41:11", "throughput": 8697.54, "total_tokens": 116408488} +{"current_steps": 172765, "total_steps": 204665, "loss": 0.0, "lr": 1.4440034016483614e-07, "epoch": 4.220677692815088, "percentage": 84.41, "elapsed_time": "3:43:04", "remaining_time": "0:41:11", "throughput": 8697.58, "total_tokens": 116412136} +{"current_steps": 172770, "total_steps": 204665, "loss": 0.0, "lr": 1.4435620033979302e-07, "epoch": 4.220799843646935, "percentage": 84.42, "elapsed_time": "3:43:04", "remaining_time": "0:41:10", "throughput": 8697.61, "total_tokens": 116415656} +{"current_steps": 172775, "total_steps": 204665, "loss": 0.0, "lr": 1.443120667372215e-07, "epoch": 4.220921994478783, "percentage": 84.42, "elapsed_time": "3:43:05", "remaining_time": "0:41:10", "throughput": 8697.64, "total_tokens": 116419112} +{"current_steps": 172780, "total_steps": 204665, "loss": 0.0, "lr": 1.4426793935744287e-07, "epoch": 4.221044145310629, "percentage": 84.42, "elapsed_time": "3:43:05", "remaining_time": "0:41:10", "throughput": 8697.69, "total_tokens": 116422952} +{"current_steps": 172785, "total_steps": 204665, "loss": 0.0, "lr": 1.442238182007781e-07, "epoch": 4.221166296142477, "percentage": 84.42, "elapsed_time": "3:43:05", "remaining_time": "0:41:09", "throughput": 8697.71, "total_tokens": 116426280} +{"current_steps": 172790, "total_steps": 204665, "loss": 0.0, "lr": 1.4417970326754803e-07, "epoch": 4.221288446974324, "percentage": 84.43, "elapsed_time": "3:43:06", "remaining_time": "0:41:09", "throughput": 8697.72, "total_tokens": 116429352} +{"current_steps": 172795, "total_steps": 204665, "loss": 0.0305, "lr": 1.44135594558073e-07, "epoch": 4.221410597806171, "percentage": 84.43, "elapsed_time": "3:43:06", "remaining_time": "0:41:08", "throughput": 8697.76, "total_tokens": 116433000} +{"current_steps": 172800, "total_steps": 204665, "loss": 0.0, "lr": 1.4409149207267434e-07, "epoch": 4.221532748638018, "percentage": 84.43, "elapsed_time": "3:43:06", "remaining_time": "0:41:08", "throughput": 8697.79, "total_tokens": 116436392} +{"current_steps": 172805, "total_steps": 204665, "loss": 0.0, "lr": 1.4404739581167236e-07, "epoch": 4.221654899469866, "percentage": 84.43, "elapsed_time": "3:43:07", "remaining_time": "0:41:08", "throughput": 8697.81, "total_tokens": 116439720} +{"current_steps": 172810, "total_steps": 204665, "loss": 0.0, "lr": 1.4400330577538822e-07, "epoch": 4.2217770503017125, "percentage": 84.44, "elapsed_time": "3:43:07", "remaining_time": "0:41:07", "throughput": 8697.85, "total_tokens": 116443240} +{"current_steps": 172815, "total_steps": 204665, "loss": 0.0, "lr": 1.43959221964142e-07, "epoch": 4.22189920113356, "percentage": 84.44, "elapsed_time": "3:43:07", "remaining_time": "0:41:07", "throughput": 8697.88, "total_tokens": 116446696} +{"current_steps": 172820, "total_steps": 204665, "loss": 0.0, "lr": 1.439151443782548e-07, "epoch": 4.222021351965407, "percentage": 84.44, "elapsed_time": "3:43:08", "remaining_time": "0:41:07", "throughput": 8697.94, "total_tokens": 116450792} +{"current_steps": 172825, "total_steps": 204665, "loss": 0.0, "lr": 1.4387107301804668e-07, "epoch": 4.2221435027972545, "percentage": 84.44, "elapsed_time": "3:43:08", "remaining_time": "0:41:06", "throughput": 8697.97, "total_tokens": 116454184} +{"current_steps": 172830, "total_steps": 204665, "loss": 0.0, "lr": 1.4382700788383873e-07, "epoch": 4.222265653629101, "percentage": 84.45, "elapsed_time": "3:43:09", "remaining_time": "0:41:06", "throughput": 8697.98, "total_tokens": 116457320} +{"current_steps": 172835, "total_steps": 204665, "loss": 0.0, "lr": 1.4378294897595068e-07, "epoch": 4.222387804460948, "percentage": 84.45, "elapsed_time": "3:43:09", "remaining_time": "0:41:05", "throughput": 8698.05, "total_tokens": 116461480} +{"current_steps": 172840, "total_steps": 204665, "loss": 0.0, "lr": 1.4373889629470336e-07, "epoch": 4.222509955292796, "percentage": 84.45, "elapsed_time": "3:43:09", "remaining_time": "0:41:05", "throughput": 8698.08, "total_tokens": 116464872} +{"current_steps": 172845, "total_steps": 204665, "loss": 0.0, "lr": 1.4369484984041735e-07, "epoch": 4.222632106124642, "percentage": 84.45, "elapsed_time": "3:43:10", "remaining_time": "0:41:05", "throughput": 8698.1, "total_tokens": 116468200} +{"current_steps": 172850, "total_steps": 204665, "loss": 0.0, "lr": 1.4365080961341246e-07, "epoch": 4.22275425695649, "percentage": 84.46, "elapsed_time": "3:43:10", "remaining_time": "0:41:04", "throughput": 8698.13, "total_tokens": 116471592} +{"current_steps": 172855, "total_steps": 204665, "loss": 0.0, "lr": 1.4360677561400947e-07, "epoch": 4.222876407788337, "percentage": 84.46, "elapsed_time": "3:43:10", "remaining_time": "0:41:04", "throughput": 8698.17, "total_tokens": 116475240} +{"current_steps": 172860, "total_steps": 204665, "loss": 0.0, "lr": 1.435627478425282e-07, "epoch": 4.222998558620184, "percentage": 84.46, "elapsed_time": "3:43:11", "remaining_time": "0:41:03", "throughput": 8698.23, "total_tokens": 116479144} +{"current_steps": 172865, "total_steps": 204665, "loss": 0.0, "lr": 1.4351872629928907e-07, "epoch": 4.223120709452031, "percentage": 84.46, "elapsed_time": "3:43:11", "remaining_time": "0:41:03", "throughput": 8698.25, "total_tokens": 116482472} +{"current_steps": 172870, "total_steps": 204665, "loss": 0.0, "lr": 1.4347471098461194e-07, "epoch": 4.223242860283879, "percentage": 84.46, "elapsed_time": "3:43:11", "remaining_time": "0:41:03", "throughput": 8698.26, "total_tokens": 116485544} +{"current_steps": 172875, "total_steps": 204665, "loss": 0.0, "lr": 1.434307018988171e-07, "epoch": 4.2233650111157255, "percentage": 84.47, "elapsed_time": "3:43:12", "remaining_time": "0:41:02", "throughput": 8698.29, "total_tokens": 116488936} +{"current_steps": 172880, "total_steps": 204665, "loss": 0.0, "lr": 1.4338669904222478e-07, "epoch": 4.223487161947573, "percentage": 84.47, "elapsed_time": "3:43:12", "remaining_time": "0:41:02", "throughput": 8698.3, "total_tokens": 116492008} +{"current_steps": 172885, "total_steps": 204665, "loss": 0.0, "lr": 1.4334270241515466e-07, "epoch": 4.22360931277942, "percentage": 84.47, "elapsed_time": "3:43:12", "remaining_time": "0:41:01", "throughput": 8698.32, "total_tokens": 116495400} +{"current_steps": 172890, "total_steps": 204665, "loss": 0.0, "lr": 1.4329871201792698e-07, "epoch": 4.2237314636112675, "percentage": 84.47, "elapsed_time": "3:43:13", "remaining_time": "0:41:01", "throughput": 8698.36, "total_tokens": 116498920} +{"current_steps": 172895, "total_steps": 204665, "loss": 0.0, "lr": 1.4325472785086147e-07, "epoch": 4.223853614443114, "percentage": 84.48, "elapsed_time": "3:43:13", "remaining_time": "0:41:01", "throughput": 8698.37, "total_tokens": 116502056} +{"current_steps": 172900, "total_steps": 204665, "loss": 0.0002, "lr": 1.4321074991427785e-07, "epoch": 4.223975765274962, "percentage": 84.48, "elapsed_time": "3:43:13", "remaining_time": "0:41:00", "throughput": 8698.39, "total_tokens": 116505320} +{"current_steps": 172905, "total_steps": 204665, "loss": 0.0, "lr": 1.431667782084962e-07, "epoch": 4.224097916106809, "percentage": 84.48, "elapsed_time": "3:43:14", "remaining_time": "0:41:00", "throughput": 8698.41, "total_tokens": 116508520} +{"current_steps": 172910, "total_steps": 204665, "loss": 0.0572, "lr": 1.4312281273383608e-07, "epoch": 4.224220066938656, "percentage": 84.48, "elapsed_time": "3:43:14", "remaining_time": "0:40:59", "throughput": 8698.42, "total_tokens": 116511720} +{"current_steps": 172915, "total_steps": 204665, "loss": 0.0, "lr": 1.4307885349061755e-07, "epoch": 4.224342217770503, "percentage": 84.49, "elapsed_time": "3:43:14", "remaining_time": "0:40:59", "throughput": 8698.43, "total_tokens": 116514792} +{"current_steps": 172920, "total_steps": 204665, "loss": 0.0, "lr": 1.4303490047915989e-07, "epoch": 4.22446436860235, "percentage": 84.49, "elapsed_time": "3:43:15", "remaining_time": "0:40:59", "throughput": 8698.45, "total_tokens": 116518056} +{"current_steps": 172925, "total_steps": 204665, "loss": 0.0, "lr": 1.429909536997831e-07, "epoch": 4.224586519434197, "percentage": 84.49, "elapsed_time": "3:43:15", "remaining_time": "0:40:58", "throughput": 8698.46, "total_tokens": 116521192} +{"current_steps": 172930, "total_steps": 204665, "loss": 0.0, "lr": 1.4294701315280645e-07, "epoch": 4.224708670266044, "percentage": 84.49, "elapsed_time": "3:43:15", "remaining_time": "0:40:58", "throughput": 8698.47, "total_tokens": 116524264} +{"current_steps": 172935, "total_steps": 204665, "loss": 0.0, "lr": 1.4290307883854958e-07, "epoch": 4.224830821097892, "percentage": 84.5, "elapsed_time": "3:43:16", "remaining_time": "0:40:57", "throughput": 8698.5, "total_tokens": 116527784} +{"current_steps": 172940, "total_steps": 204665, "loss": 0.0, "lr": 1.4285915075733225e-07, "epoch": 4.2249529719297385, "percentage": 84.5, "elapsed_time": "3:43:16", "remaining_time": "0:40:57", "throughput": 8698.51, "total_tokens": 116530920} +{"current_steps": 172945, "total_steps": 204665, "loss": 0.0, "lr": 1.428152289094735e-07, "epoch": 4.225075122761586, "percentage": 84.5, "elapsed_time": "3:43:16", "remaining_time": "0:40:57", "throughput": 8698.53, "total_tokens": 116534056} +{"current_steps": 172950, "total_steps": 204665, "loss": 0.0, "lr": 1.4277131329529323e-07, "epoch": 4.225197273593433, "percentage": 84.5, "elapsed_time": "3:43:17", "remaining_time": "0:40:56", "throughput": 8698.55, "total_tokens": 116537384} +{"current_steps": 172955, "total_steps": 204665, "loss": 0.0, "lr": 1.427274039151103e-07, "epoch": 4.2253194244252805, "percentage": 84.51, "elapsed_time": "3:43:17", "remaining_time": "0:40:56", "throughput": 8698.59, "total_tokens": 116540968} +{"current_steps": 172960, "total_steps": 204665, "loss": 0.0, "lr": 1.426835007692443e-07, "epoch": 4.225441575257127, "percentage": 84.51, "elapsed_time": "3:43:18", "remaining_time": "0:40:55", "throughput": 8698.6, "total_tokens": 116544168} +{"current_steps": 172965, "total_steps": 204665, "loss": 0.0, "lr": 1.4263960385801465e-07, "epoch": 4.225563726088975, "percentage": 84.51, "elapsed_time": "3:43:18", "remaining_time": "0:40:55", "throughput": 8698.6, "total_tokens": 116547112} +{"current_steps": 172970, "total_steps": 204665, "loss": 0.0, "lr": 1.4259571318174014e-07, "epoch": 4.225685876920822, "percentage": 84.51, "elapsed_time": "3:43:18", "remaining_time": "0:40:55", "throughput": 8698.61, "total_tokens": 116550120} +{"current_steps": 172975, "total_steps": 204665, "loss": 0.0, "lr": 1.4255182874074045e-07, "epoch": 4.225808027752669, "percentage": 84.52, "elapsed_time": "3:43:19", "remaining_time": "0:40:54", "throughput": 8698.63, "total_tokens": 116553320} +{"current_steps": 172980, "total_steps": 204665, "loss": 0.0, "lr": 1.4250795053533438e-07, "epoch": 4.225930178584516, "percentage": 84.52, "elapsed_time": "3:43:19", "remaining_time": "0:40:54", "throughput": 8698.65, "total_tokens": 116556776} +{"current_steps": 172985, "total_steps": 204665, "loss": 0.0, "lr": 1.4246407856584132e-07, "epoch": 4.226052329416364, "percentage": 84.52, "elapsed_time": "3:43:19", "remaining_time": "0:40:53", "throughput": 8698.67, "total_tokens": 116559912} +{"current_steps": 172990, "total_steps": 204665, "loss": 0.0, "lr": 1.4242021283257976e-07, "epoch": 4.22617448024821, "percentage": 84.52, "elapsed_time": "3:43:20", "remaining_time": "0:40:53", "throughput": 8698.68, "total_tokens": 116563112} +{"current_steps": 172995, "total_steps": 204665, "loss": 0.0, "lr": 1.4237635333586938e-07, "epoch": 4.226296631080058, "percentage": 84.53, "elapsed_time": "3:43:20", "remaining_time": "0:40:53", "throughput": 8698.7, "total_tokens": 116566312} +{"current_steps": 173000, "total_steps": 204665, "loss": 0.0529, "lr": 1.423325000760287e-07, "epoch": 4.226418781911905, "percentage": 84.53, "elapsed_time": "3:43:20", "remaining_time": "0:40:52", "throughput": 8698.73, "total_tokens": 116569768} +{"current_steps": 173005, "total_steps": 204665, "loss": 0.0, "lr": 1.422886530533769e-07, "epoch": 4.226540932743752, "percentage": 84.53, "elapsed_time": "3:43:21", "remaining_time": "0:40:52", "throughput": 8698.75, "total_tokens": 116573032} +{"current_steps": 173010, "total_steps": 204665, "loss": 0.0, "lr": 1.422448122682327e-07, "epoch": 4.226663083575599, "percentage": 84.53, "elapsed_time": "3:43:21", "remaining_time": "0:40:52", "throughput": 8698.77, "total_tokens": 116576296} +{"current_steps": 173015, "total_steps": 204665, "loss": 0.0002, "lr": 1.4220097772091478e-07, "epoch": 4.226785234407446, "percentage": 84.54, "elapsed_time": "3:43:21", "remaining_time": "0:40:51", "throughput": 8698.81, "total_tokens": 116580008} +{"current_steps": 173020, "total_steps": 204665, "loss": 0.0, "lr": 1.4215714941174227e-07, "epoch": 4.2269073852392935, "percentage": 84.54, "elapsed_time": "3:43:22", "remaining_time": "0:40:51", "throughput": 8698.86, "total_tokens": 116583720} +{"current_steps": 173025, "total_steps": 204665, "loss": 0.0, "lr": 1.4211332734103343e-07, "epoch": 4.22702953607114, "percentage": 84.54, "elapsed_time": "3:43:22", "remaining_time": "0:40:50", "throughput": 8698.88, "total_tokens": 116586920} +{"current_steps": 173030, "total_steps": 204665, "loss": 0.0, "lr": 1.4206951150910727e-07, "epoch": 4.227151686902988, "percentage": 84.54, "elapsed_time": "3:43:22", "remaining_time": "0:40:50", "throughput": 8698.91, "total_tokens": 116590440} +{"current_steps": 173035, "total_steps": 204665, "loss": 0.0, "lr": 1.420257019162826e-07, "epoch": 4.227273837734835, "percentage": 84.55, "elapsed_time": "3:43:23", "remaining_time": "0:40:50", "throughput": 8698.93, "total_tokens": 116593576} +{"current_steps": 173040, "total_steps": 204665, "loss": 0.0, "lr": 1.4198189856287746e-07, "epoch": 4.227395988566682, "percentage": 84.55, "elapsed_time": "3:43:23", "remaining_time": "0:40:49", "throughput": 8698.96, "total_tokens": 116597096} +{"current_steps": 173045, "total_steps": 204665, "loss": 0.0, "lr": 1.4193810144921114e-07, "epoch": 4.227518139398529, "percentage": 84.55, "elapsed_time": "3:43:24", "remaining_time": "0:40:49", "throughput": 8698.36, "total_tokens": 116600488} +{"current_steps": 173050, "total_steps": 204665, "loss": 0.0, "lr": 1.4189431057560142e-07, "epoch": 4.227640290230377, "percentage": 84.55, "elapsed_time": "3:43:25", "remaining_time": "0:40:49", "throughput": 8698.4, "total_tokens": 116604072} +{"current_steps": 173055, "total_steps": 204665, "loss": 0.0, "lr": 1.4185052594236702e-07, "epoch": 4.227762441062223, "percentage": 84.56, "elapsed_time": "3:43:25", "remaining_time": "0:40:48", "throughput": 8698.43, "total_tokens": 116607464} +{"current_steps": 173060, "total_steps": 204665, "loss": 0.0, "lr": 1.418067475498267e-07, "epoch": 4.227884591894071, "percentage": 84.56, "elapsed_time": "3:43:25", "remaining_time": "0:40:48", "throughput": 8698.46, "total_tokens": 116610856} +{"current_steps": 173065, "total_steps": 204665, "loss": 0.0, "lr": 1.417629753982983e-07, "epoch": 4.228006742725918, "percentage": 84.56, "elapsed_time": "3:43:26", "remaining_time": "0:40:47", "throughput": 8698.46, "total_tokens": 116613864} +{"current_steps": 173070, "total_steps": 204665, "loss": 0.0, "lr": 1.4171920948810056e-07, "epoch": 4.2281288935577654, "percentage": 84.56, "elapsed_time": "3:43:26", "remaining_time": "0:40:47", "throughput": 8698.47, "total_tokens": 116616872} +{"current_steps": 173075, "total_steps": 204665, "loss": 0.0, "lr": 1.4167544981955148e-07, "epoch": 4.228251044389612, "percentage": 84.57, "elapsed_time": "3:43:26", "remaining_time": "0:40:47", "throughput": 8698.5, "total_tokens": 116620392} +{"current_steps": 173080, "total_steps": 204665, "loss": 0.0, "lr": 1.4163169639296946e-07, "epoch": 4.22837319522146, "percentage": 84.57, "elapsed_time": "3:43:27", "remaining_time": "0:40:46", "throughput": 8698.54, "total_tokens": 116623976} +{"current_steps": 173085, "total_steps": 204665, "loss": 0.0, "lr": 1.4158794920867245e-07, "epoch": 4.228495346053307, "percentage": 84.57, "elapsed_time": "3:43:27", "remaining_time": "0:40:46", "throughput": 8698.59, "total_tokens": 116627752} +{"current_steps": 173090, "total_steps": 204665, "loss": 0.0, "lr": 1.4154420826697888e-07, "epoch": 4.228617496885154, "percentage": 84.57, "elapsed_time": "3:43:28", "remaining_time": "0:40:45", "throughput": 8698.62, "total_tokens": 116631144} +{"current_steps": 173095, "total_steps": 204665, "loss": 0.0, "lr": 1.415004735682068e-07, "epoch": 4.228739647717001, "percentage": 84.57, "elapsed_time": "3:43:28", "remaining_time": "0:40:45", "throughput": 8698.66, "total_tokens": 116634792} +{"current_steps": 173100, "total_steps": 204665, "loss": 0.0, "lr": 1.4145674511267425e-07, "epoch": 4.228861798548848, "percentage": 84.58, "elapsed_time": "3:43:28", "remaining_time": "0:40:45", "throughput": 8698.69, "total_tokens": 116638248} +{"current_steps": 173105, "total_steps": 204665, "loss": 0.0, "lr": 1.414130229006989e-07, "epoch": 4.228983949380695, "percentage": 84.58, "elapsed_time": "3:43:29", "remaining_time": "0:40:44", "throughput": 8698.71, "total_tokens": 116641448} +{"current_steps": 173110, "total_steps": 204665, "loss": 0.0, "lr": 1.4136930693259918e-07, "epoch": 4.229106100212542, "percentage": 84.58, "elapsed_time": "3:43:29", "remaining_time": "0:40:44", "throughput": 8698.75, "total_tokens": 116645096} +{"current_steps": 173115, "total_steps": 204665, "loss": 0.0, "lr": 1.4132559720869264e-07, "epoch": 4.22922825104439, "percentage": 84.58, "elapsed_time": "3:43:29", "remaining_time": "0:40:43", "throughput": 8698.79, "total_tokens": 116648680} +{"current_steps": 173120, "total_steps": 204665, "loss": 0.0, "lr": 1.4128189372929755e-07, "epoch": 4.2293504018762365, "percentage": 84.59, "elapsed_time": "3:43:30", "remaining_time": "0:40:43", "throughput": 8698.8, "total_tokens": 116651880} +{"current_steps": 173125, "total_steps": 204665, "loss": 0.0, "lr": 1.4123819649473123e-07, "epoch": 4.229472552708084, "percentage": 84.59, "elapsed_time": "3:43:30", "remaining_time": "0:40:43", "throughput": 8698.83, "total_tokens": 116655208} +{"current_steps": 173130, "total_steps": 204665, "loss": 0.0, "lr": 1.4119450550531198e-07, "epoch": 4.229594703539931, "percentage": 84.59, "elapsed_time": "3:43:30", "remaining_time": "0:40:42", "throughput": 8698.88, "total_tokens": 116658920} +{"current_steps": 173135, "total_steps": 204665, "loss": 0.0, "lr": 1.411508207613571e-07, "epoch": 4.2297168543717785, "percentage": 84.59, "elapsed_time": "3:43:31", "remaining_time": "0:40:42", "throughput": 8698.89, "total_tokens": 116662120} +{"current_steps": 173140, "total_steps": 204665, "loss": 0.0, "lr": 1.4110714226318455e-07, "epoch": 4.229839005203625, "percentage": 84.6, "elapsed_time": "3:43:31", "remaining_time": "0:40:41", "throughput": 8698.92, "total_tokens": 116665512} +{"current_steps": 173145, "total_steps": 204665, "loss": 0.0, "lr": 1.4106347001111173e-07, "epoch": 4.229961156035473, "percentage": 84.6, "elapsed_time": "3:43:31", "remaining_time": "0:40:41", "throughput": 8698.96, "total_tokens": 116669160} +{"current_steps": 173150, "total_steps": 204665, "loss": 0.0, "lr": 1.4101980400545643e-07, "epoch": 4.23008330686732, "percentage": 84.6, "elapsed_time": "3:43:32", "remaining_time": "0:40:41", "throughput": 8698.99, "total_tokens": 116672488} +{"current_steps": 173155, "total_steps": 204665, "loss": 0.0, "lr": 1.4097614424653624e-07, "epoch": 4.230205457699167, "percentage": 84.6, "elapsed_time": "3:43:32", "remaining_time": "0:40:40", "throughput": 8699.01, "total_tokens": 116675752} +{"current_steps": 173160, "total_steps": 204665, "loss": 0.0, "lr": 1.409324907346685e-07, "epoch": 4.230327608531014, "percentage": 84.61, "elapsed_time": "3:43:32", "remaining_time": "0:40:40", "throughput": 8699.02, "total_tokens": 116678952} +{"current_steps": 173165, "total_steps": 204665, "loss": 0.0, "lr": 1.4088884347017094e-07, "epoch": 4.230449759362862, "percentage": 84.61, "elapsed_time": "3:43:33", "remaining_time": "0:40:39", "throughput": 8699.05, "total_tokens": 116682280} +{"current_steps": 173170, "total_steps": 204665, "loss": 0.0, "lr": 1.4084520245336052e-07, "epoch": 4.230571910194708, "percentage": 84.61, "elapsed_time": "3:43:33", "remaining_time": "0:40:39", "throughput": 8699.07, "total_tokens": 116685480} +{"current_steps": 173175, "total_steps": 204665, "loss": 0.0, "lr": 1.408015676845551e-07, "epoch": 4.230694061026556, "percentage": 84.61, "elapsed_time": "3:43:33", "remaining_time": "0:40:39", "throughput": 8699.09, "total_tokens": 116688808} +{"current_steps": 173180, "total_steps": 204665, "loss": 0.0, "lr": 1.4075793916407154e-07, "epoch": 4.230816211858403, "percentage": 84.62, "elapsed_time": "3:43:34", "remaining_time": "0:40:38", "throughput": 8699.1, "total_tokens": 116692008} +{"current_steps": 173185, "total_steps": 204665, "loss": 0.0, "lr": 1.4071431689222735e-07, "epoch": 4.2309383626902495, "percentage": 84.62, "elapsed_time": "3:43:34", "remaining_time": "0:40:38", "throughput": 8699.12, "total_tokens": 116695208} +{"current_steps": 173190, "total_steps": 204665, "loss": 0.0, "lr": 1.4067070086933996e-07, "epoch": 4.231060513522097, "percentage": 84.62, "elapsed_time": "3:43:34", "remaining_time": "0:40:37", "throughput": 8699.14, "total_tokens": 116698536} +{"current_steps": 173195, "total_steps": 204665, "loss": 0.0, "lr": 1.4062709109572623e-07, "epoch": 4.231182664353944, "percentage": 84.62, "elapsed_time": "3:43:35", "remaining_time": "0:40:37", "throughput": 8699.22, "total_tokens": 116702760} +{"current_steps": 173200, "total_steps": 204665, "loss": 0.0, "lr": 1.4058348757170367e-07, "epoch": 4.2313048151857915, "percentage": 84.63, "elapsed_time": "3:43:35", "remaining_time": "0:40:37", "throughput": 8699.25, "total_tokens": 116706216} +{"current_steps": 173205, "total_steps": 204665, "loss": 0.0, "lr": 1.4053989029758905e-07, "epoch": 4.231426966017638, "percentage": 84.63, "elapsed_time": "3:43:36", "remaining_time": "0:40:36", "throughput": 8699.27, "total_tokens": 116709416} +{"current_steps": 173210, "total_steps": 204665, "loss": 0.0, "lr": 1.4049629927369934e-07, "epoch": 4.231549116849486, "percentage": 84.63, "elapsed_time": "3:43:36", "remaining_time": "0:40:36", "throughput": 8699.32, "total_tokens": 116713256} +{"current_steps": 173215, "total_steps": 204665, "loss": 0.0, "lr": 1.40452714500352e-07, "epoch": 4.231671267681333, "percentage": 84.63, "elapsed_time": "3:43:36", "remaining_time": "0:40:36", "throughput": 8699.35, "total_tokens": 116716648} +{"current_steps": 173220, "total_steps": 204665, "loss": 0.0, "lr": 1.4040913597786342e-07, "epoch": 4.23179341851318, "percentage": 84.64, "elapsed_time": "3:43:37", "remaining_time": "0:40:35", "throughput": 8699.35, "total_tokens": 116719592} +{"current_steps": 173225, "total_steps": 204665, "loss": 0.0, "lr": 1.4036556370655105e-07, "epoch": 4.231915569345027, "percentage": 84.64, "elapsed_time": "3:43:37", "remaining_time": "0:40:35", "throughput": 8699.38, "total_tokens": 116722984} +{"current_steps": 173230, "total_steps": 204665, "loss": 0.0, "lr": 1.4032199768673124e-07, "epoch": 4.232037720176875, "percentage": 84.64, "elapsed_time": "3:43:37", "remaining_time": "0:40:34", "throughput": 8699.44, "total_tokens": 116726888} +{"current_steps": 173235, "total_steps": 204665, "loss": 0.0, "lr": 1.402784379187213e-07, "epoch": 4.232159871008721, "percentage": 84.64, "elapsed_time": "3:43:38", "remaining_time": "0:40:34", "throughput": 8699.48, "total_tokens": 116730536} +{"current_steps": 173240, "total_steps": 204665, "loss": 0.0001, "lr": 1.4023488440283771e-07, "epoch": 4.232282021840569, "percentage": 84.65, "elapsed_time": "3:43:38", "remaining_time": "0:40:34", "throughput": 8699.5, "total_tokens": 116733800} +{"current_steps": 173245, "total_steps": 204665, "loss": 0.0, "lr": 1.4019133713939713e-07, "epoch": 4.232404172672416, "percentage": 84.65, "elapsed_time": "3:43:38", "remaining_time": "0:40:33", "throughput": 8699.54, "total_tokens": 116737448} +{"current_steps": 173250, "total_steps": 204665, "loss": 0.0, "lr": 1.4014779612871673e-07, "epoch": 4.232526323504263, "percentage": 84.65, "elapsed_time": "3:43:39", "remaining_time": "0:40:33", "throughput": 8699.54, "total_tokens": 116740392} +{"current_steps": 173255, "total_steps": 204665, "loss": 0.0451, "lr": 1.4010426137111265e-07, "epoch": 4.23264847433611, "percentage": 84.65, "elapsed_time": "3:43:39", "remaining_time": "0:40:32", "throughput": 8699.55, "total_tokens": 116743464} +{"current_steps": 173260, "total_steps": 204665, "loss": 0.0, "lr": 1.4006073286690178e-07, "epoch": 4.232770625167958, "percentage": 84.66, "elapsed_time": "3:43:39", "remaining_time": "0:40:32", "throughput": 8699.6, "total_tokens": 116747240} +{"current_steps": 173265, "total_steps": 204665, "loss": 0.0, "lr": 1.4001721061640038e-07, "epoch": 4.2328927759998045, "percentage": 84.66, "elapsed_time": "3:43:40", "remaining_time": "0:40:32", "throughput": 8699.64, "total_tokens": 116750760} +{"current_steps": 173270, "total_steps": 204665, "loss": 0.0, "lr": 1.3997369461992513e-07, "epoch": 4.233014926831652, "percentage": 84.66, "elapsed_time": "3:43:40", "remaining_time": "0:40:31", "throughput": 8699.65, "total_tokens": 116753960} +{"current_steps": 173275, "total_steps": 204665, "loss": 0.0, "lr": 1.3993018487779262e-07, "epoch": 4.233137077663499, "percentage": 84.66, "elapsed_time": "3:43:40", "remaining_time": "0:40:31", "throughput": 8699.7, "total_tokens": 116757672} +{"current_steps": 173280, "total_steps": 204665, "loss": 0.0005, "lr": 1.39886681390319e-07, "epoch": 4.233259228495346, "percentage": 84.67, "elapsed_time": "3:43:41", "remaining_time": "0:40:30", "throughput": 8699.71, "total_tokens": 116760808} +{"current_steps": 173285, "total_steps": 204665, "loss": 0.0, "lr": 1.3984318415782103e-07, "epoch": 4.233381379327193, "percentage": 84.67, "elapsed_time": "3:43:41", "remaining_time": "0:40:30", "throughput": 8699.73, "total_tokens": 116764008} +{"current_steps": 173290, "total_steps": 204665, "loss": 0.0, "lr": 1.3979969318061457e-07, "epoch": 4.23350353015904, "percentage": 84.67, "elapsed_time": "3:43:41", "remaining_time": "0:40:30", "throughput": 8699.76, "total_tokens": 116767528} +{"current_steps": 173295, "total_steps": 204665, "loss": 0.0, "lr": 1.3975620845901624e-07, "epoch": 4.233625680990888, "percentage": 84.67, "elapsed_time": "3:43:42", "remaining_time": "0:40:29", "throughput": 8699.81, "total_tokens": 116771176} +{"current_steps": 173300, "total_steps": 204665, "loss": 0.0, "lr": 1.3971272999334206e-07, "epoch": 4.233747831822734, "percentage": 84.67, "elapsed_time": "3:43:42", "remaining_time": "0:40:29", "throughput": 8699.84, "total_tokens": 116774568} +{"current_steps": 173305, "total_steps": 204665, "loss": 0.0, "lr": 1.3966925778390836e-07, "epoch": 4.233869982654582, "percentage": 84.68, "elapsed_time": "3:43:42", "remaining_time": "0:40:28", "throughput": 8699.85, "total_tokens": 116777768} +{"current_steps": 173310, "total_steps": 204665, "loss": 0.0, "lr": 1.3962579183103106e-07, "epoch": 4.233992133486429, "percentage": 84.68, "elapsed_time": "3:43:43", "remaining_time": "0:40:28", "throughput": 8699.89, "total_tokens": 116781352} +{"current_steps": 173315, "total_steps": 204665, "loss": 0.0, "lr": 1.3958233213502669e-07, "epoch": 4.234114284318276, "percentage": 84.68, "elapsed_time": "3:43:43", "remaining_time": "0:40:28", "throughput": 8699.91, "total_tokens": 116784680} +{"current_steps": 173320, "total_steps": 204665, "loss": 0.0584, "lr": 1.3953887869621095e-07, "epoch": 4.234236435150123, "percentage": 84.68, "elapsed_time": "3:43:44", "remaining_time": "0:40:27", "throughput": 8699.93, "total_tokens": 116787880} +{"current_steps": 173325, "total_steps": 204665, "loss": 0.0, "lr": 1.3949543151489973e-07, "epoch": 4.234358585981971, "percentage": 84.69, "elapsed_time": "3:43:44", "remaining_time": "0:40:27", "throughput": 8699.93, "total_tokens": 116790824} +{"current_steps": 173330, "total_steps": 204665, "loss": 0.0, "lr": 1.3945199059140932e-07, "epoch": 4.2344807368138175, "percentage": 84.69, "elapsed_time": "3:43:44", "remaining_time": "0:40:26", "throughput": 8699.94, "total_tokens": 116793896} +{"current_steps": 173335, "total_steps": 204665, "loss": 0.0, "lr": 1.3940855592605538e-07, "epoch": 4.234602887645665, "percentage": 84.69, "elapsed_time": "3:43:45", "remaining_time": "0:40:26", "throughput": 8699.94, "total_tokens": 116796840} +{"current_steps": 173340, "total_steps": 204665, "loss": 0.0, "lr": 1.3936512751915387e-07, "epoch": 4.234725038477512, "percentage": 84.69, "elapsed_time": "3:43:45", "remaining_time": "0:40:26", "throughput": 8699.95, "total_tokens": 116799976} +{"current_steps": 173345, "total_steps": 204665, "loss": 0.0, "lr": 1.3932170537102084e-07, "epoch": 4.2348471893093595, "percentage": 84.7, "elapsed_time": "3:43:45", "remaining_time": "0:40:25", "throughput": 8699.98, "total_tokens": 116803432} +{"current_steps": 173350, "total_steps": 204665, "loss": 0.0, "lr": 1.3927828948197162e-07, "epoch": 4.234969340141206, "percentage": 84.7, "elapsed_time": "3:43:46", "remaining_time": "0:40:25", "throughput": 8699.99, "total_tokens": 116806504} +{"current_steps": 173355, "total_steps": 204665, "loss": 0.0, "lr": 1.392348798523225e-07, "epoch": 4.235091490973054, "percentage": 84.7, "elapsed_time": "3:43:46", "remaining_time": "0:40:24", "throughput": 8700.03, "total_tokens": 116810152} +{"current_steps": 173360, "total_steps": 204665, "loss": 0.0002, "lr": 1.391914764823885e-07, "epoch": 4.235213641804901, "percentage": 84.7, "elapsed_time": "3:43:46", "remaining_time": "0:40:24", "throughput": 8700.05, "total_tokens": 116813352} +{"current_steps": 173365, "total_steps": 204665, "loss": 0.0, "lr": 1.3914807937248575e-07, "epoch": 4.235335792636748, "percentage": 84.71, "elapsed_time": "3:43:47", "remaining_time": "0:40:24", "throughput": 8700.08, "total_tokens": 116816936} +{"current_steps": 173370, "total_steps": 204665, "loss": 0.0, "lr": 1.3910468852292977e-07, "epoch": 4.235457943468595, "percentage": 84.71, "elapsed_time": "3:43:47", "remaining_time": "0:40:23", "throughput": 8700.1, "total_tokens": 116820072} +{"current_steps": 173375, "total_steps": 204665, "loss": 0.0, "lr": 1.3906130393403593e-07, "epoch": 4.235580094300442, "percentage": 84.71, "elapsed_time": "3:43:47", "remaining_time": "0:40:23", "throughput": 8700.13, "total_tokens": 116823592} +{"current_steps": 173380, "total_steps": 204665, "loss": 0.0, "lr": 1.3901792560612002e-07, "epoch": 4.235702245132289, "percentage": 84.71, "elapsed_time": "3:43:48", "remaining_time": "0:40:23", "throughput": 8700.17, "total_tokens": 116827240} +{"current_steps": 173385, "total_steps": 204665, "loss": 0.0, "lr": 1.3897455353949715e-07, "epoch": 4.235824395964136, "percentage": 84.72, "elapsed_time": "3:43:48", "remaining_time": "0:40:22", "throughput": 8700.2, "total_tokens": 116830632} +{"current_steps": 173390, "total_steps": 204665, "loss": 0.0, "lr": 1.389311877344832e-07, "epoch": 4.235946546795984, "percentage": 84.72, "elapsed_time": "3:43:48", "remaining_time": "0:40:22", "throughput": 8700.23, "total_tokens": 116834152} +{"current_steps": 173395, "total_steps": 204665, "loss": 0.0, "lr": 1.38887828191393e-07, "epoch": 4.236068697627831, "percentage": 84.72, "elapsed_time": "3:43:49", "remaining_time": "0:40:21", "throughput": 8700.24, "total_tokens": 116837224} +{"current_steps": 173400, "total_steps": 204665, "loss": 0.0, "lr": 1.3884447491054207e-07, "epoch": 4.236190848459678, "percentage": 84.72, "elapsed_time": "3:43:49", "remaining_time": "0:40:21", "throughput": 8700.26, "total_tokens": 116840488} +{"current_steps": 173405, "total_steps": 204665, "loss": 0.0, "lr": 1.3880112789224596e-07, "epoch": 4.236312999291525, "percentage": 84.73, "elapsed_time": "3:43:49", "remaining_time": "0:40:21", "throughput": 8700.32, "total_tokens": 116844392} +{"current_steps": 173410, "total_steps": 204665, "loss": 0.0, "lr": 1.3875778713681975e-07, "epoch": 4.236435150123373, "percentage": 84.73, "elapsed_time": "3:43:50", "remaining_time": "0:40:20", "throughput": 8700.34, "total_tokens": 116847720} +{"current_steps": 173415, "total_steps": 204665, "loss": 0.0, "lr": 1.3871445264457826e-07, "epoch": 4.236557300955219, "percentage": 84.73, "elapsed_time": "3:43:50", "remaining_time": "0:40:20", "throughput": 8700.36, "total_tokens": 116850984} +{"current_steps": 173420, "total_steps": 204665, "loss": 0.0536, "lr": 1.3867112441583718e-07, "epoch": 4.236679451787067, "percentage": 84.73, "elapsed_time": "3:43:50", "remaining_time": "0:40:19", "throughput": 8700.37, "total_tokens": 116854120} +{"current_steps": 173425, "total_steps": 204665, "loss": 0.0, "lr": 1.3862780245091133e-07, "epoch": 4.236801602618914, "percentage": 84.74, "elapsed_time": "3:43:51", "remaining_time": "0:40:19", "throughput": 8700.39, "total_tokens": 116857448} +{"current_steps": 173430, "total_steps": 204665, "loss": 0.0, "lr": 1.3858448675011558e-07, "epoch": 4.236923753450761, "percentage": 84.74, "elapsed_time": "3:43:51", "remaining_time": "0:40:19", "throughput": 8700.4, "total_tokens": 116860648} +{"current_steps": 173435, "total_steps": 204665, "loss": 0.0, "lr": 1.3854117731376515e-07, "epoch": 4.237045904282608, "percentage": 84.74, "elapsed_time": "3:43:51", "remaining_time": "0:40:18", "throughput": 8700.45, "total_tokens": 116864296} +{"current_steps": 173440, "total_steps": 204665, "loss": 0.0, "lr": 1.384978741421752e-07, "epoch": 4.237168055114456, "percentage": 84.74, "elapsed_time": "3:43:52", "remaining_time": "0:40:18", "throughput": 8700.46, "total_tokens": 116867496} +{"current_steps": 173445, "total_steps": 204665, "loss": 0.0563, "lr": 1.3845457723566024e-07, "epoch": 4.2372902059463025, "percentage": 84.75, "elapsed_time": "3:43:52", "remaining_time": "0:40:17", "throughput": 8700.47, "total_tokens": 116870632} +{"current_steps": 173450, "total_steps": 204665, "loss": 0.0, "lr": 1.3841128659453548e-07, "epoch": 4.237412356778149, "percentage": 84.75, "elapsed_time": "3:43:53", "remaining_time": "0:40:17", "throughput": 8700.5, "total_tokens": 116874024} +{"current_steps": 173455, "total_steps": 204665, "loss": 0.0, "lr": 1.3836800221911537e-07, "epoch": 4.237534507609997, "percentage": 84.75, "elapsed_time": "3:43:53", "remaining_time": "0:40:17", "throughput": 8700.54, "total_tokens": 116877544} +{"current_steps": 173460, "total_steps": 204665, "loss": 0.0, "lr": 1.3832472410971485e-07, "epoch": 4.237656658441844, "percentage": 84.75, "elapsed_time": "3:43:53", "remaining_time": "0:40:16", "throughput": 8700.55, "total_tokens": 116880744} +{"current_steps": 173465, "total_steps": 204665, "loss": 0.0, "lr": 1.38281452266649e-07, "epoch": 4.237778809273691, "percentage": 84.76, "elapsed_time": "3:43:54", "remaining_time": "0:40:16", "throughput": 8700.56, "total_tokens": 116883816} +{"current_steps": 173470, "total_steps": 204665, "loss": 0.0, "lr": 1.3823818669023202e-07, "epoch": 4.237900960105538, "percentage": 84.76, "elapsed_time": "3:43:54", "remaining_time": "0:40:15", "throughput": 8700.58, "total_tokens": 116887144} +{"current_steps": 173475, "total_steps": 204665, "loss": 0.0, "lr": 1.3819492738077887e-07, "epoch": 4.238023110937386, "percentage": 84.76, "elapsed_time": "3:43:54", "remaining_time": "0:40:15", "throughput": 8700.6, "total_tokens": 116890472} +{"current_steps": 173480, "total_steps": 204665, "loss": 0.0004, "lr": 1.3815167433860387e-07, "epoch": 4.238145261769232, "percentage": 84.76, "elapsed_time": "3:43:55", "remaining_time": "0:40:15", "throughput": 8700.61, "total_tokens": 116893480} +{"current_steps": 173485, "total_steps": 204665, "loss": 0.0, "lr": 1.3810842756402184e-07, "epoch": 4.23826741260108, "percentage": 84.77, "elapsed_time": "3:43:55", "remaining_time": "0:40:14", "throughput": 8700.63, "total_tokens": 116896744} +{"current_steps": 173490, "total_steps": 204665, "loss": 0.0, "lr": 1.3806518705734694e-07, "epoch": 4.238389563432927, "percentage": 84.77, "elapsed_time": "3:43:55", "remaining_time": "0:40:14", "throughput": 8700.65, "total_tokens": 116900072} +{"current_steps": 173495, "total_steps": 204665, "loss": 0.0, "lr": 1.3802195281889383e-07, "epoch": 4.238511714264774, "percentage": 84.77, "elapsed_time": "3:43:56", "remaining_time": "0:40:13", "throughput": 8700.67, "total_tokens": 116903272} +{"current_steps": 173500, "total_steps": 204665, "loss": 0.0, "lr": 1.379787248489771e-07, "epoch": 4.238633865096621, "percentage": 84.77, "elapsed_time": "3:43:56", "remaining_time": "0:40:13", "throughput": 8700.7, "total_tokens": 116906728} +{"current_steps": 173505, "total_steps": 204665, "loss": 0.0, "lr": 1.379355031479108e-07, "epoch": 4.238756015928469, "percentage": 84.78, "elapsed_time": "3:43:56", "remaining_time": "0:40:13", "throughput": 8700.74, "total_tokens": 116910248} +{"current_steps": 173510, "total_steps": 204665, "loss": 0.0, "lr": 1.3789228771600959e-07, "epoch": 4.2388781667603155, "percentage": 84.78, "elapsed_time": "3:43:57", "remaining_time": "0:40:12", "throughput": 8700.75, "total_tokens": 116913384} +{"current_steps": 173515, "total_steps": 204665, "loss": 0.0, "lr": 1.378490785535875e-07, "epoch": 4.239000317592163, "percentage": 84.78, "elapsed_time": "3:43:57", "remaining_time": "0:40:12", "throughput": 8700.91, "total_tokens": 116919144} +{"current_steps": 173520, "total_steps": 204665, "loss": 0.0, "lr": 1.378058756609587e-07, "epoch": 4.23912246842401, "percentage": 84.78, "elapsed_time": "3:43:57", "remaining_time": "0:40:11", "throughput": 8700.93, "total_tokens": 116922408} +{"current_steps": 173525, "total_steps": 204665, "loss": 0.0, "lr": 1.3776267903843763e-07, "epoch": 4.2392446192558575, "percentage": 84.78, "elapsed_time": "3:43:58", "remaining_time": "0:40:11", "throughput": 8700.97, "total_tokens": 116926056} +{"current_steps": 173530, "total_steps": 204665, "loss": 0.0, "lr": 1.3771948868633797e-07, "epoch": 4.239366770087704, "percentage": 84.79, "elapsed_time": "3:43:58", "remaining_time": "0:40:11", "throughput": 8701.0, "total_tokens": 116929448} +{"current_steps": 173535, "total_steps": 204665, "loss": 0.0, "lr": 1.3767630460497447e-07, "epoch": 4.239488920919552, "percentage": 84.79, "elapsed_time": "3:43:58", "remaining_time": "0:40:10", "throughput": 8701.01, "total_tokens": 116932648} +{"current_steps": 173540, "total_steps": 204665, "loss": 0.0001, "lr": 1.3763312679466054e-07, "epoch": 4.239611071751399, "percentage": 84.79, "elapsed_time": "3:43:59", "remaining_time": "0:40:10", "throughput": 8701.04, "total_tokens": 116936104} +{"current_steps": 173545, "total_steps": 204665, "loss": 0.0, "lr": 1.375899552557106e-07, "epoch": 4.239733222583245, "percentage": 84.79, "elapsed_time": "3:43:59", "remaining_time": "0:40:09", "throughput": 8701.07, "total_tokens": 116939496} +{"current_steps": 173550, "total_steps": 204665, "loss": 0.0, "lr": 1.3754678998843838e-07, "epoch": 4.239855373415093, "percentage": 84.8, "elapsed_time": "3:44:00", "remaining_time": "0:40:09", "throughput": 8701.1, "total_tokens": 116942888} +{"current_steps": 173555, "total_steps": 204665, "loss": 0.0, "lr": 1.3750363099315777e-07, "epoch": 4.23997752424694, "percentage": 84.8, "elapsed_time": "3:44:00", "remaining_time": "0:40:09", "throughput": 8701.12, "total_tokens": 116946216} +{"current_steps": 173560, "total_steps": 204665, "loss": 0.0, "lr": 1.3746047827018302e-07, "epoch": 4.240099675078787, "percentage": 84.8, "elapsed_time": "3:44:00", "remaining_time": "0:40:08", "throughput": 8701.13, "total_tokens": 116949288} +{"current_steps": 173565, "total_steps": 204665, "loss": 0.0, "lr": 1.374173318198274e-07, "epoch": 4.240221825910634, "percentage": 84.8, "elapsed_time": "3:44:01", "remaining_time": "0:40:08", "throughput": 8701.17, "total_tokens": 116952872} +{"current_steps": 173570, "total_steps": 204665, "loss": 0.0, "lr": 1.3737419164240527e-07, "epoch": 4.240343976742482, "percentage": 84.81, "elapsed_time": "3:44:01", "remaining_time": "0:40:08", "throughput": 8701.22, "total_tokens": 116956776} +{"current_steps": 173575, "total_steps": 204665, "loss": 0.0, "lr": 1.3733105773822973e-07, "epoch": 4.2404661275743285, "percentage": 84.81, "elapsed_time": "3:44:01", "remaining_time": "0:40:07", "throughput": 8701.25, "total_tokens": 116960104} +{"current_steps": 173580, "total_steps": 204665, "loss": 0.0139, "lr": 1.3728793010761497e-07, "epoch": 4.240588278406176, "percentage": 84.81, "elapsed_time": "3:44:02", "remaining_time": "0:40:07", "throughput": 8701.26, "total_tokens": 116963240} +{"current_steps": 173585, "total_steps": 204665, "loss": 0.0, "lr": 1.372448087508742e-07, "epoch": 4.240710429238023, "percentage": 84.81, "elapsed_time": "3:44:02", "remaining_time": "0:40:06", "throughput": 8701.28, "total_tokens": 116966504} +{"current_steps": 173590, "total_steps": 204665, "loss": 0.0, "lr": 1.3720169366832134e-07, "epoch": 4.2408325800698705, "percentage": 84.82, "elapsed_time": "3:44:02", "remaining_time": "0:40:06", "throughput": 8701.28, "total_tokens": 116969512} +{"current_steps": 173595, "total_steps": 204665, "loss": 0.0005, "lr": 1.3715858486027e-07, "epoch": 4.240954730901717, "percentage": 84.82, "elapsed_time": "3:44:03", "remaining_time": "0:40:06", "throughput": 8701.28, "total_tokens": 116972456} +{"current_steps": 173600, "total_steps": 204665, "loss": 0.0, "lr": 1.371154823270332e-07, "epoch": 4.241076881733565, "percentage": 84.82, "elapsed_time": "3:44:03", "remaining_time": "0:40:05", "throughput": 8701.29, "total_tokens": 116975592} +{"current_steps": 173605, "total_steps": 204665, "loss": 0.0343, "lr": 1.3707238606892503e-07, "epoch": 4.241199032565412, "percentage": 84.82, "elapsed_time": "3:44:03", "remaining_time": "0:40:05", "throughput": 8701.31, "total_tokens": 116978920} +{"current_steps": 173610, "total_steps": 204665, "loss": 0.0, "lr": 1.3702929608625823e-07, "epoch": 4.241321183397259, "percentage": 84.83, "elapsed_time": "3:44:04", "remaining_time": "0:40:04", "throughput": 8701.32, "total_tokens": 116981928} +{"current_steps": 173615, "total_steps": 204665, "loss": 0.0, "lr": 1.369862123793468e-07, "epoch": 4.241443334229106, "percentage": 84.83, "elapsed_time": "3:44:04", "remaining_time": "0:40:04", "throughput": 8701.36, "total_tokens": 116985576} +{"current_steps": 173620, "total_steps": 204665, "loss": 0.0, "lr": 1.3694313494850362e-07, "epoch": 4.241565485060954, "percentage": 84.83, "elapsed_time": "3:44:04", "remaining_time": "0:40:04", "throughput": 8701.39, "total_tokens": 116988968} +{"current_steps": 173625, "total_steps": 204665, "loss": 0.0, "lr": 1.3690006379404217e-07, "epoch": 4.2416876358928, "percentage": 84.83, "elapsed_time": "3:44:05", "remaining_time": "0:40:03", "throughput": 8701.43, "total_tokens": 116992680} +{"current_steps": 173630, "total_steps": 204665, "loss": 0.0003, "lr": 1.3685699891627568e-07, "epoch": 4.241809786724648, "percentage": 84.84, "elapsed_time": "3:44:05", "remaining_time": "0:40:03", "throughput": 8701.44, "total_tokens": 116995752} +{"current_steps": 173635, "total_steps": 204665, "loss": 0.0, "lr": 1.3681394031551706e-07, "epoch": 4.241931937556495, "percentage": 84.84, "elapsed_time": "3:44:05", "remaining_time": "0:40:02", "throughput": 8701.49, "total_tokens": 116999528} +{"current_steps": 173640, "total_steps": 204665, "loss": 0.0, "lr": 1.367708879920798e-07, "epoch": 4.2420540883883415, "percentage": 84.84, "elapsed_time": "3:44:06", "remaining_time": "0:40:02", "throughput": 8701.52, "total_tokens": 117002920} +{"current_steps": 173645, "total_steps": 204665, "loss": 0.0, "lr": 1.3672784194627663e-07, "epoch": 4.242176239220189, "percentage": 84.84, "elapsed_time": "3:44:06", "remaining_time": "0:40:02", "throughput": 8701.56, "total_tokens": 117006568} +{"current_steps": 173650, "total_steps": 204665, "loss": 0.0, "lr": 1.3668480217842072e-07, "epoch": 4.242298390052036, "percentage": 84.85, "elapsed_time": "3:44:06", "remaining_time": "0:40:01", "throughput": 8701.56, "total_tokens": 117009640} +{"current_steps": 173655, "total_steps": 204665, "loss": 0.0, "lr": 1.3664176868882537e-07, "epoch": 4.2424205408838835, "percentage": 84.85, "elapsed_time": "3:44:07", "remaining_time": "0:40:01", "throughput": 8701.58, "total_tokens": 117012840} +{"current_steps": 173660, "total_steps": 204665, "loss": 0.0, "lr": 1.3659874147780314e-07, "epoch": 4.24254269171573, "percentage": 84.85, "elapsed_time": "3:44:07", "remaining_time": "0:40:00", "throughput": 8701.61, "total_tokens": 117016296} +{"current_steps": 173665, "total_steps": 204665, "loss": 0.0, "lr": 1.365557205456672e-07, "epoch": 4.242664842547578, "percentage": 84.85, "elapsed_time": "3:44:08", "remaining_time": "0:40:00", "throughput": 8701.65, "total_tokens": 117019944} +{"current_steps": 173670, "total_steps": 204665, "loss": 0.0, "lr": 1.3651270589273023e-07, "epoch": 4.242786993379425, "percentage": 84.86, "elapsed_time": "3:44:08", "remaining_time": "0:40:00", "throughput": 8701.71, "total_tokens": 117023848} +{"current_steps": 173675, "total_steps": 204665, "loss": 0.0, "lr": 1.3646969751930504e-07, "epoch": 4.242909144211272, "percentage": 84.86, "elapsed_time": "3:44:08", "remaining_time": "0:39:59", "throughput": 8701.73, "total_tokens": 117027176} +{"current_steps": 173680, "total_steps": 204665, "loss": 0.0001, "lr": 1.364266954257046e-07, "epoch": 4.243031295043119, "percentage": 84.86, "elapsed_time": "3:44:09", "remaining_time": "0:39:59", "throughput": 8701.75, "total_tokens": 117030440} +{"current_steps": 173685, "total_steps": 204665, "loss": 0.0001, "lr": 1.3638369961224138e-07, "epoch": 4.243153445874967, "percentage": 84.86, "elapsed_time": "3:44:09", "remaining_time": "0:39:58", "throughput": 8701.78, "total_tokens": 117033896} +{"current_steps": 173690, "total_steps": 204665, "loss": 0.0, "lr": 1.3634071007922841e-07, "epoch": 4.243275596706813, "percentage": 84.87, "elapsed_time": "3:44:09", "remaining_time": "0:39:58", "throughput": 8701.82, "total_tokens": 117037480} +{"current_steps": 173695, "total_steps": 204665, "loss": 0.0, "lr": 1.3629772682697794e-07, "epoch": 4.243397747538661, "percentage": 84.87, "elapsed_time": "3:44:10", "remaining_time": "0:39:58", "throughput": 8701.82, "total_tokens": 117040552} +{"current_steps": 173700, "total_steps": 204665, "loss": 0.0, "lr": 1.3625474985580277e-07, "epoch": 4.243519898370508, "percentage": 84.87, "elapsed_time": "3:44:10", "remaining_time": "0:39:57", "throughput": 8701.85, "total_tokens": 117044008} +{"current_steps": 173705, "total_steps": 204665, "loss": 0.0, "lr": 1.3621177916601522e-07, "epoch": 4.243642049202355, "percentage": 84.87, "elapsed_time": "3:44:10", "remaining_time": "0:39:57", "throughput": 8701.89, "total_tokens": 117047528} +{"current_steps": 173710, "total_steps": 204665, "loss": 0.0, "lr": 1.3616881475792796e-07, "epoch": 4.243764200034202, "percentage": 84.88, "elapsed_time": "3:44:11", "remaining_time": "0:39:56", "throughput": 8701.93, "total_tokens": 117051048} +{"current_steps": 173715, "total_steps": 204665, "loss": 0.0, "lr": 1.3612585663185372e-07, "epoch": 4.24388635086605, "percentage": 84.88, "elapsed_time": "3:44:11", "remaining_time": "0:39:56", "throughput": 8701.94, "total_tokens": 117054184} +{"current_steps": 173720, "total_steps": 204665, "loss": 0.0, "lr": 1.3608290478810448e-07, "epoch": 4.244008501697897, "percentage": 84.88, "elapsed_time": "3:44:11", "remaining_time": "0:39:56", "throughput": 8701.93, "total_tokens": 117057064} +{"current_steps": 173725, "total_steps": 204665, "loss": 0.0, "lr": 1.3603995922699252e-07, "epoch": 4.244130652529743, "percentage": 84.88, "elapsed_time": "3:44:12", "remaining_time": "0:39:55", "throughput": 8701.97, "total_tokens": 117060648} +{"current_steps": 173730, "total_steps": 204665, "loss": 0.0, "lr": 1.3599701994883062e-07, "epoch": 4.244252803361591, "percentage": 84.89, "elapsed_time": "3:44:12", "remaining_time": "0:39:55", "throughput": 8701.99, "total_tokens": 117063912} +{"current_steps": 173735, "total_steps": 204665, "loss": 0.0, "lr": 1.3595408695393072e-07, "epoch": 4.244374954193438, "percentage": 84.89, "elapsed_time": "3:44:12", "remaining_time": "0:39:55", "throughput": 8702.01, "total_tokens": 117067240} +{"current_steps": 173740, "total_steps": 204665, "loss": 0.0, "lr": 1.3591116024260496e-07, "epoch": 4.244497105025285, "percentage": 84.89, "elapsed_time": "3:44:13", "remaining_time": "0:39:54", "throughput": 8702.02, "total_tokens": 117070376} +{"current_steps": 173745, "total_steps": 204665, "loss": 0.0, "lr": 1.3586823981516559e-07, "epoch": 4.244619255857132, "percentage": 84.89, "elapsed_time": "3:44:13", "remaining_time": "0:39:54", "throughput": 8702.05, "total_tokens": 117073832} +{"current_steps": 173750, "total_steps": 204665, "loss": 0.0, "lr": 1.3582532567192506e-07, "epoch": 4.24474140668898, "percentage": 84.89, "elapsed_time": "3:44:13", "remaining_time": "0:39:53", "throughput": 8702.08, "total_tokens": 117077224} +{"current_steps": 173755, "total_steps": 204665, "loss": 0.0399, "lr": 1.3578241781319498e-07, "epoch": 4.2448635575208264, "percentage": 84.9, "elapsed_time": "3:44:14", "remaining_time": "0:39:53", "throughput": 8702.11, "total_tokens": 117080680} +{"current_steps": 173760, "total_steps": 204665, "loss": 0.0, "lr": 1.357395162392878e-07, "epoch": 4.244985708352674, "percentage": 84.9, "elapsed_time": "3:44:14", "remaining_time": "0:39:53", "throughput": 8702.13, "total_tokens": 117083944} +{"current_steps": 173765, "total_steps": 204665, "loss": 0.0, "lr": 1.3569662095051504e-07, "epoch": 4.245107859184521, "percentage": 84.9, "elapsed_time": "3:44:14", "remaining_time": "0:39:52", "throughput": 8702.15, "total_tokens": 117087144} +{"current_steps": 173770, "total_steps": 204665, "loss": 0.0, "lr": 1.35653731947189e-07, "epoch": 4.2452300100163685, "percentage": 84.9, "elapsed_time": "3:44:15", "remaining_time": "0:39:52", "throughput": 8702.2, "total_tokens": 117091048} +{"current_steps": 173775, "total_steps": 204665, "loss": 0.0, "lr": 1.3561084922962173e-07, "epoch": 4.245352160848215, "percentage": 84.91, "elapsed_time": "3:44:15", "remaining_time": "0:39:51", "throughput": 8702.22, "total_tokens": 117094312} +{"current_steps": 173780, "total_steps": 204665, "loss": 0.0, "lr": 1.355679727981246e-07, "epoch": 4.245474311680063, "percentage": 84.91, "elapsed_time": "3:44:16", "remaining_time": "0:39:51", "throughput": 8702.25, "total_tokens": 117097704} +{"current_steps": 173785, "total_steps": 204665, "loss": 0.0, "lr": 1.3552510265300988e-07, "epoch": 4.24559646251191, "percentage": 84.91, "elapsed_time": "3:44:16", "remaining_time": "0:39:51", "throughput": 8702.26, "total_tokens": 117100904} +{"current_steps": 173790, "total_steps": 204665, "loss": 0.0, "lr": 1.3548223879458897e-07, "epoch": 4.245718613343757, "percentage": 84.91, "elapsed_time": "3:44:16", "remaining_time": "0:39:50", "throughput": 8702.3, "total_tokens": 117104488} +{"current_steps": 173795, "total_steps": 204665, "loss": 0.0, "lr": 1.35439381223174e-07, "epoch": 4.245840764175604, "percentage": 84.92, "elapsed_time": "3:44:17", "remaining_time": "0:39:50", "throughput": 8702.3, "total_tokens": 117107432} +{"current_steps": 173800, "total_steps": 204665, "loss": 0.0, "lr": 1.35396529939076e-07, "epoch": 4.245962915007452, "percentage": 84.92, "elapsed_time": "3:44:17", "remaining_time": "0:39:49", "throughput": 8702.32, "total_tokens": 117110696} +{"current_steps": 173805, "total_steps": 204665, "loss": 0.0, "lr": 1.3535368494260712e-07, "epoch": 4.246085065839298, "percentage": 84.92, "elapsed_time": "3:44:17", "remaining_time": "0:39:49", "throughput": 8702.34, "total_tokens": 117114024} +{"current_steps": 173810, "total_steps": 204665, "loss": 0.0, "lr": 1.3531084623407897e-07, "epoch": 4.246207216671145, "percentage": 84.92, "elapsed_time": "3:44:18", "remaining_time": "0:39:49", "throughput": 8702.37, "total_tokens": 117117480} +{"current_steps": 173815, "total_steps": 204665, "loss": 0.0, "lr": 1.3526801381380272e-07, "epoch": 4.246329367502993, "percentage": 84.93, "elapsed_time": "3:44:18", "remaining_time": "0:39:48", "throughput": 8702.4, "total_tokens": 117120872} +{"current_steps": 173820, "total_steps": 204665, "loss": 0.0, "lr": 1.3522518768209034e-07, "epoch": 4.2464515183348395, "percentage": 84.93, "elapsed_time": "3:44:18", "remaining_time": "0:39:48", "throughput": 8702.4, "total_tokens": 117123880} +{"current_steps": 173825, "total_steps": 204665, "loss": 0.0, "lr": 1.3518236783925296e-07, "epoch": 4.246573669166687, "percentage": 84.93, "elapsed_time": "3:44:19", "remaining_time": "0:39:47", "throughput": 8702.42, "total_tokens": 117127144} +{"current_steps": 173830, "total_steps": 204665, "loss": 0.0, "lr": 1.3513955428560175e-07, "epoch": 4.246695819998534, "percentage": 84.93, "elapsed_time": "3:44:19", "remaining_time": "0:39:47", "throughput": 8702.48, "total_tokens": 117131048} +{"current_steps": 173835, "total_steps": 204665, "loss": 0.0, "lr": 1.3509674702144859e-07, "epoch": 4.2468179708303815, "percentage": 84.94, "elapsed_time": "3:44:19", "remaining_time": "0:39:47", "throughput": 8702.5, "total_tokens": 117134376} +{"current_steps": 173840, "total_steps": 204665, "loss": 0.0, "lr": 1.350539460471042e-07, "epoch": 4.246940121662228, "percentage": 84.94, "elapsed_time": "3:44:20", "remaining_time": "0:39:46", "throughput": 8702.52, "total_tokens": 117137704} +{"current_steps": 173845, "total_steps": 204665, "loss": 0.0, "lr": 1.3501115136288044e-07, "epoch": 4.247062272494076, "percentage": 84.94, "elapsed_time": "3:44:20", "remaining_time": "0:39:46", "throughput": 8702.56, "total_tokens": 117141352} +{"current_steps": 173850, "total_steps": 204665, "loss": 0.0, "lr": 1.3496836296908797e-07, "epoch": 4.247184423325923, "percentage": 84.94, "elapsed_time": "3:44:20", "remaining_time": "0:39:45", "throughput": 8702.57, "total_tokens": 117144552} +{"current_steps": 173855, "total_steps": 204665, "loss": 0.0, "lr": 1.3492558086603855e-07, "epoch": 4.24730657415777, "percentage": 84.95, "elapsed_time": "3:44:21", "remaining_time": "0:39:45", "throughput": 8702.6, "total_tokens": 117147944} +{"current_steps": 173860, "total_steps": 204665, "loss": 0.0, "lr": 1.348828050540427e-07, "epoch": 4.247428724989617, "percentage": 84.95, "elapsed_time": "3:44:21", "remaining_time": "0:39:45", "throughput": 8702.62, "total_tokens": 117151272} +{"current_steps": 173865, "total_steps": 204665, "loss": 0.0, "lr": 1.3484003553341183e-07, "epoch": 4.247550875821465, "percentage": 84.95, "elapsed_time": "3:44:21", "remaining_time": "0:39:44", "throughput": 8702.64, "total_tokens": 117154536} +{"current_steps": 173870, "total_steps": 204665, "loss": 0.0, "lr": 1.3479727230445704e-07, "epoch": 4.247673026653311, "percentage": 84.95, "elapsed_time": "3:44:22", "remaining_time": "0:39:44", "throughput": 8702.69, "total_tokens": 117158248} +{"current_steps": 173875, "total_steps": 204665, "loss": 0.0, "lr": 1.3475451536748906e-07, "epoch": 4.247795177485159, "percentage": 84.96, "elapsed_time": "3:44:22", "remaining_time": "0:39:43", "throughput": 8702.7, "total_tokens": 117161320} +{"current_steps": 173880, "total_steps": 204665, "loss": 0.0513, "lr": 1.347117647228192e-07, "epoch": 4.247917328317006, "percentage": 84.96, "elapsed_time": "3:44:22", "remaining_time": "0:39:43", "throughput": 8702.73, "total_tokens": 117164840} +{"current_steps": 173885, "total_steps": 204665, "loss": 0.0, "lr": 1.3466902037075788e-07, "epoch": 4.248039479148853, "percentage": 84.96, "elapsed_time": "3:44:23", "remaining_time": "0:39:43", "throughput": 8702.74, "total_tokens": 117168040} +{"current_steps": 173890, "total_steps": 204665, "loss": 0.0, "lr": 1.3462628231161632e-07, "epoch": 4.2481616299807, "percentage": 84.96, "elapsed_time": "3:44:23", "remaining_time": "0:39:42", "throughput": 8702.76, "total_tokens": 117171304} +{"current_steps": 173895, "total_steps": 204665, "loss": 0.0, "lr": 1.3458355054570515e-07, "epoch": 4.248283780812548, "percentage": 84.97, "elapsed_time": "3:44:24", "remaining_time": "0:39:42", "throughput": 8702.83, "total_tokens": 117175400} +{"current_steps": 173900, "total_steps": 204665, "loss": 0.0, "lr": 1.3454082507333496e-07, "epoch": 4.2484059316443945, "percentage": 84.97, "elapsed_time": "3:44:24", "remaining_time": "0:39:42", "throughput": 8702.83, "total_tokens": 117178344} +{"current_steps": 173905, "total_steps": 204665, "loss": 0.0, "lr": 1.3449810589481702e-07, "epoch": 4.248528082476241, "percentage": 84.97, "elapsed_time": "3:44:24", "remaining_time": "0:39:41", "throughput": 8702.84, "total_tokens": 117181480} +{"current_steps": 173910, "total_steps": 204665, "loss": 0.0, "lr": 1.3445539301046148e-07, "epoch": 4.248650233308089, "percentage": 84.97, "elapsed_time": "3:44:25", "remaining_time": "0:39:41", "throughput": 8702.86, "total_tokens": 117184872} +{"current_steps": 173915, "total_steps": 204665, "loss": 0.0, "lr": 1.3441268642057923e-07, "epoch": 4.248772384139936, "percentage": 84.98, "elapsed_time": "3:44:25", "remaining_time": "0:39:40", "throughput": 8702.89, "total_tokens": 117188200} +{"current_steps": 173920, "total_steps": 204665, "loss": 0.0372, "lr": 1.3436998612548055e-07, "epoch": 4.248894534971783, "percentage": 84.98, "elapsed_time": "3:44:25", "remaining_time": "0:39:40", "throughput": 8702.92, "total_tokens": 117191656} +{"current_steps": 173925, "total_steps": 204665, "loss": 0.0, "lr": 1.3432729212547645e-07, "epoch": 4.24901668580363, "percentage": 84.98, "elapsed_time": "3:44:26", "remaining_time": "0:39:40", "throughput": 8702.95, "total_tokens": 117195176} +{"current_steps": 173930, "total_steps": 204665, "loss": 0.0, "lr": 1.3428460442087686e-07, "epoch": 4.249138836635478, "percentage": 84.98, "elapsed_time": "3:44:26", "remaining_time": "0:39:39", "throughput": 8702.96, "total_tokens": 117198312} +{"current_steps": 173935, "total_steps": 204665, "loss": 0.0001, "lr": 1.3424192301199267e-07, "epoch": 4.249260987467324, "percentage": 84.99, "elapsed_time": "3:44:26", "remaining_time": "0:39:39", "throughput": 8702.98, "total_tokens": 117201512} +{"current_steps": 173940, "total_steps": 204665, "loss": 0.0, "lr": 1.3419924789913407e-07, "epoch": 4.249383138299172, "percentage": 84.99, "elapsed_time": "3:44:27", "remaining_time": "0:39:38", "throughput": 8703.01, "total_tokens": 117205032} +{"current_steps": 173945, "total_steps": 204665, "loss": 0.0, "lr": 1.3415657908261113e-07, "epoch": 4.249505289131019, "percentage": 84.99, "elapsed_time": "3:44:27", "remaining_time": "0:39:38", "throughput": 8703.04, "total_tokens": 117208424} +{"current_steps": 173950, "total_steps": 204665, "loss": 0.0, "lr": 1.3411391656273475e-07, "epoch": 4.249627439962866, "percentage": 84.99, "elapsed_time": "3:44:27", "remaining_time": "0:39:38", "throughput": 8703.05, "total_tokens": 117211560} +{"current_steps": 173955, "total_steps": 204665, "loss": 0.0, "lr": 1.3407126033981464e-07, "epoch": 4.249749590794713, "percentage": 84.99, "elapsed_time": "3:44:28", "remaining_time": "0:39:37", "throughput": 8703.1, "total_tokens": 117215336} +{"current_steps": 173960, "total_steps": 204665, "loss": 0.0, "lr": 1.3402861041416124e-07, "epoch": 4.249871741626561, "percentage": 85.0, "elapsed_time": "3:44:28", "remaining_time": "0:39:37", "throughput": 8703.1, "total_tokens": 117218344} +{"current_steps": 173965, "total_steps": 204665, "loss": 0.0, "lr": 1.3398596678608488e-07, "epoch": 4.2499938924584075, "percentage": 85.0, "elapsed_time": "3:44:28", "remaining_time": "0:39:36", "throughput": 8703.14, "total_tokens": 117221928} +{"current_steps": 173970, "total_steps": 204665, "loss": 0.0, "lr": 1.3394332945589526e-07, "epoch": 4.250116043290255, "percentage": 85.0, "elapsed_time": "3:44:29", "remaining_time": "0:39:36", "throughput": 8703.17, "total_tokens": 117225384} +{"current_steps": 173975, "total_steps": 204665, "loss": 0.0, "lr": 1.3390069842390295e-07, "epoch": 4.250238194122102, "percentage": 85.0, "elapsed_time": "3:44:29", "remaining_time": "0:39:36", "throughput": 8703.18, "total_tokens": 117228520} +{"current_steps": 173978, "total_steps": 204665, "eval_loss": 0.337473601102829, "epoch": 4.250311484621211, "percentage": 85.01, "elapsed_time": "3:45:17", "remaining_time": "0:39:44", "throughput": 8672.47, "total_tokens": 117230952} +{"current_steps": 173980, "total_steps": 204665, "loss": 0.0, "lr": 1.3385807369041746e-07, "epoch": 4.2503603449539495, "percentage": 85.01, "elapsed_time": "3:45:53", "remaining_time": "0:39:50", "throughput": 8649.44, "total_tokens": 117232040} +{"current_steps": 173985, "total_steps": 204665, "loss": 0.0, "lr": 1.338154552557491e-07, "epoch": 4.250482495785796, "percentage": 85.01, "elapsed_time": "3:45:54", "remaining_time": "0:39:50", "throughput": 8649.44, "total_tokens": 117235048} +{"current_steps": 173990, "total_steps": 204665, "loss": 0.05, "lr": 1.3377284312020787e-07, "epoch": 4.250604646617644, "percentage": 85.01, "elapsed_time": "3:45:54", "remaining_time": "0:39:49", "throughput": 8649.47, "total_tokens": 117238376} +{"current_steps": 173995, "total_steps": 204665, "loss": 0.0, "lr": 1.3373023728410338e-07, "epoch": 4.250726797449491, "percentage": 85.01, "elapsed_time": "3:45:54", "remaining_time": "0:39:49", "throughput": 8649.53, "total_tokens": 117242408} +{"current_steps": 174000, "total_steps": 204665, "loss": 0.0, "lr": 1.336876377477457e-07, "epoch": 4.250848948281337, "percentage": 85.02, "elapsed_time": "3:45:55", "remaining_time": "0:39:48", "throughput": 8649.55, "total_tokens": 117245672} +{"current_steps": 174005, "total_steps": 204665, "loss": 0.0, "lr": 1.3364504451144443e-07, "epoch": 4.250971099113185, "percentage": 85.02, "elapsed_time": "3:45:55", "remaining_time": "0:39:48", "throughput": 8649.57, "total_tokens": 117248872} +{"current_steps": 174010, "total_steps": 204665, "loss": 0.0, "lr": 1.3360245757550947e-07, "epoch": 4.251093249945032, "percentage": 85.02, "elapsed_time": "3:45:55", "remaining_time": "0:39:48", "throughput": 8649.57, "total_tokens": 117251880} +{"current_steps": 174015, "total_steps": 204665, "loss": 0.0, "lr": 1.335598769402504e-07, "epoch": 4.251215400776879, "percentage": 85.02, "elapsed_time": "3:45:56", "remaining_time": "0:39:47", "throughput": 8649.62, "total_tokens": 117255528} +{"current_steps": 174020, "total_steps": 204665, "loss": 0.0, "lr": 1.3351730260597693e-07, "epoch": 4.251337551608726, "percentage": 85.03, "elapsed_time": "3:45:56", "remaining_time": "0:39:47", "throughput": 8649.61, "total_tokens": 117258408} +{"current_steps": 174025, "total_steps": 204665, "loss": 0.0, "lr": 1.3347473457299885e-07, "epoch": 4.251459702440574, "percentage": 85.03, "elapsed_time": "3:45:56", "remaining_time": "0:39:46", "throughput": 8649.66, "total_tokens": 117262056} +{"current_steps": 174030, "total_steps": 204665, "loss": 0.0, "lr": 1.3343217284162566e-07, "epoch": 4.2515818532724206, "percentage": 85.03, "elapsed_time": "3:45:57", "remaining_time": "0:39:46", "throughput": 8649.68, "total_tokens": 117265448} +{"current_steps": 174035, "total_steps": 204665, "loss": 0.0, "lr": 1.333896174121665e-07, "epoch": 4.251704004104268, "percentage": 85.03, "elapsed_time": "3:45:57", "remaining_time": "0:39:46", "throughput": 8649.69, "total_tokens": 117268392} +{"current_steps": 174040, "total_steps": 204665, "loss": 0.0, "lr": 1.3334706828493137e-07, "epoch": 4.251826154936115, "percentage": 85.04, "elapsed_time": "3:45:57", "remaining_time": "0:39:45", "throughput": 8649.71, "total_tokens": 117271720} +{"current_steps": 174045, "total_steps": 204665, "loss": 0.0, "lr": 1.333045254602294e-07, "epoch": 4.251948305767963, "percentage": 85.04, "elapsed_time": "3:45:58", "remaining_time": "0:39:45", "throughput": 8649.72, "total_tokens": 117274856} +{"current_steps": 174050, "total_steps": 204665, "loss": 0.0, "lr": 1.3326198893836994e-07, "epoch": 4.252070456599809, "percentage": 85.04, "elapsed_time": "3:45:58", "remaining_time": "0:39:44", "throughput": 8649.74, "total_tokens": 117278120} +{"current_steps": 174055, "total_steps": 204665, "loss": 0.0, "lr": 1.3321945871966234e-07, "epoch": 4.252192607431657, "percentage": 85.04, "elapsed_time": "3:45:58", "remaining_time": "0:39:44", "throughput": 8649.76, "total_tokens": 117281256} +{"current_steps": 174060, "total_steps": 204665, "loss": 0.0, "lr": 1.3317693480441615e-07, "epoch": 4.252314758263504, "percentage": 85.05, "elapsed_time": "3:45:59", "remaining_time": "0:39:44", "throughput": 8649.79, "total_tokens": 117284648} +{"current_steps": 174065, "total_steps": 204665, "loss": 0.0, "lr": 1.3313441719294027e-07, "epoch": 4.252436909095351, "percentage": 85.05, "elapsed_time": "3:45:59", "remaining_time": "0:39:43", "throughput": 8649.84, "total_tokens": 117288552} +{"current_steps": 174070, "total_steps": 204665, "loss": 0.0, "lr": 1.3309190588554432e-07, "epoch": 4.252559059927198, "percentage": 85.05, "elapsed_time": "3:45:59", "remaining_time": "0:39:43", "throughput": 8649.89, "total_tokens": 117292200} +{"current_steps": 174075, "total_steps": 204665, "loss": 0.0, "lr": 1.330494008825369e-07, "epoch": 4.252681210759045, "percentage": 85.05, "elapsed_time": "3:46:00", "remaining_time": "0:39:42", "throughput": 8649.91, "total_tokens": 117295528} +{"current_steps": 174080, "total_steps": 204665, "loss": 0.0, "lr": 1.330069021842275e-07, "epoch": 4.2528033615908925, "percentage": 85.06, "elapsed_time": "3:46:00", "remaining_time": "0:39:42", "throughput": 8649.94, "total_tokens": 117298984} +{"current_steps": 174085, "total_steps": 204665, "loss": 0.0, "lr": 1.3296440979092527e-07, "epoch": 4.252925512422739, "percentage": 85.06, "elapsed_time": "3:46:01", "remaining_time": "0:39:42", "throughput": 8649.99, "total_tokens": 117302760} +{"current_steps": 174090, "total_steps": 204665, "loss": 0.0, "lr": 1.3292192370293887e-07, "epoch": 4.253047663254587, "percentage": 85.06, "elapsed_time": "3:46:01", "remaining_time": "0:39:41", "throughput": 8650.0, "total_tokens": 117305768} +{"current_steps": 174095, "total_steps": 204665, "loss": 0.0, "lr": 1.328794439205777e-07, "epoch": 4.253169814086434, "percentage": 85.06, "elapsed_time": "3:46:01", "remaining_time": "0:39:41", "throughput": 8650.03, "total_tokens": 117309224} +{"current_steps": 174100, "total_steps": 204665, "loss": 0.0, "lr": 1.328369704441501e-07, "epoch": 4.253291964918281, "percentage": 85.07, "elapsed_time": "3:46:02", "remaining_time": "0:39:40", "throughput": 8650.05, "total_tokens": 117312488} +{"current_steps": 174105, "total_steps": 204665, "loss": 0.0, "lr": 1.3279450327396568e-07, "epoch": 4.253414115750128, "percentage": 85.07, "elapsed_time": "3:46:02", "remaining_time": "0:39:40", "throughput": 8650.06, "total_tokens": 117315688} +{"current_steps": 174110, "total_steps": 204665, "loss": 0.0, "lr": 1.3275204241033255e-07, "epoch": 4.253536266581976, "percentage": 85.07, "elapsed_time": "3:46:02", "remaining_time": "0:39:40", "throughput": 8650.11, "total_tokens": 117319336} +{"current_steps": 174115, "total_steps": 204665, "loss": 0.0, "lr": 1.327095878535598e-07, "epoch": 4.253658417413822, "percentage": 85.07, "elapsed_time": "3:46:03", "remaining_time": "0:39:39", "throughput": 8650.11, "total_tokens": 117322344} +{"current_steps": 174120, "total_steps": 204665, "loss": 0.0, "lr": 1.3266713960395647e-07, "epoch": 4.25378056824567, "percentage": 85.08, "elapsed_time": "3:46:03", "remaining_time": "0:39:39", "throughput": 8650.12, "total_tokens": 117325288} +{"current_steps": 174125, "total_steps": 204665, "loss": 0.0, "lr": 1.3262469766183083e-07, "epoch": 4.253902719077517, "percentage": 85.08, "elapsed_time": "3:46:03", "remaining_time": "0:39:38", "throughput": 8650.15, "total_tokens": 117328744} +{"current_steps": 174130, "total_steps": 204665, "loss": 0.0, "lr": 1.325822620274918e-07, "epoch": 4.254024869909364, "percentage": 85.08, "elapsed_time": "3:46:04", "remaining_time": "0:39:38", "throughput": 8650.16, "total_tokens": 117331880} +{"current_steps": 174135, "total_steps": 204665, "loss": 0.0, "lr": 1.325398327012479e-07, "epoch": 4.254147020741211, "percentage": 85.08, "elapsed_time": "3:46:04", "remaining_time": "0:39:38", "throughput": 8650.19, "total_tokens": 117335208} +{"current_steps": 174140, "total_steps": 204665, "loss": 0.0, "lr": 1.324974096834075e-07, "epoch": 4.254269171573059, "percentage": 85.09, "elapsed_time": "3:46:04", "remaining_time": "0:39:37", "throughput": 8650.2, "total_tokens": 117338344} +{"current_steps": 174145, "total_steps": 204665, "loss": 0.0, "lr": 1.3245499297427943e-07, "epoch": 4.2543913224049055, "percentage": 85.09, "elapsed_time": "3:46:05", "remaining_time": "0:39:37", "throughput": 8650.23, "total_tokens": 117341800} +{"current_steps": 174150, "total_steps": 204665, "loss": 0.0001, "lr": 1.3241258257417177e-07, "epoch": 4.254513473236753, "percentage": 85.09, "elapsed_time": "3:46:05", "remaining_time": "0:39:36", "throughput": 8650.25, "total_tokens": 117345064} +{"current_steps": 174155, "total_steps": 204665, "loss": 0.0, "lr": 1.323701784833934e-07, "epoch": 4.2546356240686, "percentage": 85.09, "elapsed_time": "3:46:05", "remaining_time": "0:39:36", "throughput": 8650.28, "total_tokens": 117348520} +{"current_steps": 174160, "total_steps": 204665, "loss": 0.0, "lr": 1.3232778070225227e-07, "epoch": 4.2547577749004475, "percentage": 85.1, "elapsed_time": "3:46:06", "remaining_time": "0:39:36", "throughput": 8650.31, "total_tokens": 117351976} +{"current_steps": 174165, "total_steps": 204665, "loss": 0.0, "lr": 1.3228538923105704e-07, "epoch": 4.254879925732294, "percentage": 85.1, "elapsed_time": "3:46:06", "remaining_time": "0:39:35", "throughput": 8650.35, "total_tokens": 117355432} +{"current_steps": 174170, "total_steps": 204665, "loss": 0.0, "lr": 1.3224300407011558e-07, "epoch": 4.255002076564141, "percentage": 85.1, "elapsed_time": "3:46:06", "remaining_time": "0:39:35", "throughput": 8650.39, "total_tokens": 117359016} +{"current_steps": 174175, "total_steps": 204665, "loss": 0.0, "lr": 1.3220062521973652e-07, "epoch": 4.255124227395989, "percentage": 85.1, "elapsed_time": "3:46:07", "remaining_time": "0:39:35", "throughput": 8650.43, "total_tokens": 117362728} +{"current_steps": 174180, "total_steps": 204665, "loss": 0.0, "lr": 1.3215825268022807e-07, "epoch": 4.255246378227835, "percentage": 85.1, "elapsed_time": "3:46:07", "remaining_time": "0:39:34", "throughput": 8650.45, "total_tokens": 117365928} +{"current_steps": 174185, "total_steps": 204665, "loss": 0.0, "lr": 1.3211588645189809e-07, "epoch": 4.255368529059683, "percentage": 85.11, "elapsed_time": "3:46:07", "remaining_time": "0:39:34", "throughput": 8650.51, "total_tokens": 117369896} +{"current_steps": 174190, "total_steps": 204665, "loss": 0.0, "lr": 1.3207352653505488e-07, "epoch": 4.25549067989153, "percentage": 85.11, "elapsed_time": "3:46:08", "remaining_time": "0:39:33", "throughput": 8650.55, "total_tokens": 117373480} +{"current_steps": 174195, "total_steps": 204665, "loss": 0.0, "lr": 1.3203117293000632e-07, "epoch": 4.255612830723377, "percentage": 85.11, "elapsed_time": "3:46:08", "remaining_time": "0:39:33", "throughput": 8650.56, "total_tokens": 117376488} +{"current_steps": 174200, "total_steps": 204665, "loss": 0.0, "lr": 1.3198882563706082e-07, "epoch": 4.255734981555224, "percentage": 85.11, "elapsed_time": "3:46:09", "remaining_time": "0:39:33", "throughput": 8650.57, "total_tokens": 117379560} +{"current_steps": 174205, "total_steps": 204665, "loss": 0.0, "lr": 1.319464846565257e-07, "epoch": 4.255857132387072, "percentage": 85.12, "elapsed_time": "3:46:09", "remaining_time": "0:39:32", "throughput": 8650.6, "total_tokens": 117383016} +{"current_steps": 174210, "total_steps": 204665, "loss": 0.0, "lr": 1.3190414998870924e-07, "epoch": 4.2559792832189185, "percentage": 85.12, "elapsed_time": "3:46:09", "remaining_time": "0:39:32", "throughput": 8650.65, "total_tokens": 117386792} +{"current_steps": 174215, "total_steps": 204665, "loss": 0.0, "lr": 1.3186182163391957e-07, "epoch": 4.256101434050766, "percentage": 85.12, "elapsed_time": "3:46:10", "remaining_time": "0:39:31", "throughput": 8650.67, "total_tokens": 117390056} +{"current_steps": 174220, "total_steps": 204665, "loss": 0.0, "lr": 1.3181949959246398e-07, "epoch": 4.256223584882613, "percentage": 85.12, "elapsed_time": "3:46:10", "remaining_time": "0:39:31", "throughput": 8650.7, "total_tokens": 117393512} +{"current_steps": 174225, "total_steps": 204665, "loss": 0.0, "lr": 1.3177718386465065e-07, "epoch": 4.2563457357144605, "percentage": 85.13, "elapsed_time": "3:46:10", "remaining_time": "0:39:31", "throughput": 8650.74, "total_tokens": 117397224} +{"current_steps": 174230, "total_steps": 204665, "loss": 0.0, "lr": 1.3173487445078702e-07, "epoch": 4.256467886546307, "percentage": 85.13, "elapsed_time": "3:46:11", "remaining_time": "0:39:30", "throughput": 8650.8, "total_tokens": 117401064} +{"current_steps": 174235, "total_steps": 204665, "loss": 0.0, "lr": 1.3169257135118118e-07, "epoch": 4.256590037378155, "percentage": 85.13, "elapsed_time": "3:46:11", "remaining_time": "0:39:30", "throughput": 8650.82, "total_tokens": 117404392} +{"current_steps": 174240, "total_steps": 204665, "loss": 0.0, "lr": 1.316502745661402e-07, "epoch": 4.256712188210002, "percentage": 85.13, "elapsed_time": "3:46:11", "remaining_time": "0:39:29", "throughput": 8650.84, "total_tokens": 117407720} +{"current_steps": 174245, "total_steps": 204665, "loss": 0.0, "lr": 1.316079840959723e-07, "epoch": 4.256834339041849, "percentage": 85.14, "elapsed_time": "3:46:12", "remaining_time": "0:39:29", "throughput": 8650.88, "total_tokens": 117411240} +{"current_steps": 174250, "total_steps": 204665, "loss": 0.0, "lr": 1.3156569994098465e-07, "epoch": 4.256956489873696, "percentage": 85.14, "elapsed_time": "3:46:12", "remaining_time": "0:39:29", "throughput": 8650.9, "total_tokens": 117414504} +{"current_steps": 174255, "total_steps": 204665, "loss": 0.0, "lr": 1.3152342210148447e-07, "epoch": 4.257078640705544, "percentage": 85.14, "elapsed_time": "3:46:12", "remaining_time": "0:39:28", "throughput": 8650.95, "total_tokens": 117418408} +{"current_steps": 174260, "total_steps": 204665, "loss": 0.0, "lr": 1.3148115057777997e-07, "epoch": 4.25720079153739, "percentage": 85.14, "elapsed_time": "3:46:13", "remaining_time": "0:39:28", "throughput": 8651.02, "total_tokens": 117422568} +{"current_steps": 174265, "total_steps": 204665, "loss": 0.0, "lr": 1.3143888537017788e-07, "epoch": 4.257322942369237, "percentage": 85.15, "elapsed_time": "3:46:13", "remaining_time": "0:39:27", "throughput": 8651.04, "total_tokens": 117425768} +{"current_steps": 174270, "total_steps": 204665, "loss": 0.0, "lr": 1.3139662647898574e-07, "epoch": 4.257445093201085, "percentage": 85.15, "elapsed_time": "3:46:13", "remaining_time": "0:39:27", "throughput": 8651.06, "total_tokens": 117429096} +{"current_steps": 174275, "total_steps": 204665, "loss": 0.0, "lr": 1.313543739045113e-07, "epoch": 4.2575672440329315, "percentage": 85.15, "elapsed_time": "3:46:14", "remaining_time": "0:39:27", "throughput": 8651.07, "total_tokens": 117432168} +{"current_steps": 174280, "total_steps": 204665, "loss": 0.0, "lr": 1.313121276470611e-07, "epoch": 4.257689394864779, "percentage": 85.15, "elapsed_time": "3:46:14", "remaining_time": "0:39:26", "throughput": 8651.1, "total_tokens": 117435688} +{"current_steps": 174285, "total_steps": 204665, "loss": 0.0, "lr": 1.3126988770694314e-07, "epoch": 4.257811545696626, "percentage": 85.16, "elapsed_time": "3:46:15", "remaining_time": "0:39:26", "throughput": 8651.12, "total_tokens": 117439016} +{"current_steps": 174290, "total_steps": 204665, "loss": 0.0, "lr": 1.31227654084464e-07, "epoch": 4.2579336965284735, "percentage": 85.16, "elapsed_time": "3:46:15", "remaining_time": "0:39:25", "throughput": 8651.16, "total_tokens": 117442664} +{"current_steps": 174295, "total_steps": 204665, "loss": 0.0, "lr": 1.3118542677993116e-07, "epoch": 4.25805584736032, "percentage": 85.16, "elapsed_time": "3:46:15", "remaining_time": "0:39:25", "throughput": 8651.18, "total_tokens": 117445992} +{"current_steps": 174300, "total_steps": 204665, "loss": 0.0, "lr": 1.3114320579365134e-07, "epoch": 4.258177998192168, "percentage": 85.16, "elapsed_time": "3:46:16", "remaining_time": "0:39:25", "throughput": 8651.21, "total_tokens": 117449384} +{"current_steps": 174305, "total_steps": 204665, "loss": 0.0, "lr": 1.3110099112593199e-07, "epoch": 4.258300149024015, "percentage": 85.17, "elapsed_time": "3:46:16", "remaining_time": "0:39:24", "throughput": 8651.22, "total_tokens": 117452456} +{"current_steps": 174310, "total_steps": 204665, "loss": 0.0, "lr": 1.3105878277707992e-07, "epoch": 4.258422299855862, "percentage": 85.17, "elapsed_time": "3:46:16", "remaining_time": "0:39:24", "throughput": 8651.24, "total_tokens": 117455784} +{"current_steps": 174315, "total_steps": 204665, "loss": 0.0, "lr": 1.3101658074740207e-07, "epoch": 4.258544450687709, "percentage": 85.17, "elapsed_time": "3:46:17", "remaining_time": "0:39:23", "throughput": 8651.25, "total_tokens": 117459048} +{"current_steps": 174320, "total_steps": 204665, "loss": 0.0, "lr": 1.3097438503720548e-07, "epoch": 4.258666601519557, "percentage": 85.17, "elapsed_time": "3:46:17", "remaining_time": "0:39:23", "throughput": 8651.29, "total_tokens": 117462568} +{"current_steps": 174325, "total_steps": 204665, "loss": 0.0, "lr": 1.309321956467968e-07, "epoch": 4.258788752351403, "percentage": 85.18, "elapsed_time": "3:46:17", "remaining_time": "0:39:23", "throughput": 8651.29, "total_tokens": 117465576} +{"current_steps": 174330, "total_steps": 204665, "loss": 0.0, "lr": 1.308900125764828e-07, "epoch": 4.258910903183251, "percentage": 85.18, "elapsed_time": "3:46:18", "remaining_time": "0:39:22", "throughput": 8651.32, "total_tokens": 117469160} +{"current_steps": 174335, "total_steps": 204665, "loss": 0.0, "lr": 1.3084783582657077e-07, "epoch": 4.259033054015098, "percentage": 85.18, "elapsed_time": "3:46:18", "remaining_time": "0:39:22", "throughput": 8651.36, "total_tokens": 117472744} +{"current_steps": 174340, "total_steps": 204665, "loss": 0.0, "lr": 1.3080566539736691e-07, "epoch": 4.2591552048469445, "percentage": 85.18, "elapsed_time": "3:46:18", "remaining_time": "0:39:21", "throughput": 8651.39, "total_tokens": 117476136} +{"current_steps": 174345, "total_steps": 204665, "loss": 0.0, "lr": 1.307635012891779e-07, "epoch": 4.259277355678792, "percentage": 85.19, "elapsed_time": "3:46:19", "remaining_time": "0:39:21", "throughput": 8651.41, "total_tokens": 117479528} +{"current_steps": 174350, "total_steps": 204665, "loss": 0.0, "lr": 1.3072134350231068e-07, "epoch": 4.259399506510639, "percentage": 85.19, "elapsed_time": "3:46:19", "remaining_time": "0:39:21", "throughput": 8651.43, "total_tokens": 117482920} +{"current_steps": 174355, "total_steps": 204665, "loss": 0.0, "lr": 1.3067919203707168e-07, "epoch": 4.2595216573424866, "percentage": 85.19, "elapsed_time": "3:46:19", "remaining_time": "0:39:20", "throughput": 8651.45, "total_tokens": 117486248} +{"current_steps": 174360, "total_steps": 204665, "loss": 0.0, "lr": 1.306370468937672e-07, "epoch": 4.259643808174333, "percentage": 85.19, "elapsed_time": "3:46:20", "remaining_time": "0:39:20", "throughput": 8651.45, "total_tokens": 117489192} +{"current_steps": 174365, "total_steps": 204665, "loss": 0.0, "lr": 1.305949080727039e-07, "epoch": 4.259765959006181, "percentage": 85.2, "elapsed_time": "3:46:20", "remaining_time": "0:39:19", "throughput": 8651.46, "total_tokens": 117492264} +{"current_steps": 174370, "total_steps": 204665, "loss": 0.0, "lr": 1.3055277557418854e-07, "epoch": 4.259888109838028, "percentage": 85.2, "elapsed_time": "3:46:20", "remaining_time": "0:39:19", "throughput": 8651.48, "total_tokens": 117495592} +{"current_steps": 174375, "total_steps": 204665, "loss": 0.0, "lr": 1.3051064939852706e-07, "epoch": 4.260010260669875, "percentage": 85.2, "elapsed_time": "3:46:21", "remaining_time": "0:39:19", "throughput": 8651.49, "total_tokens": 117498856} +{"current_steps": 174380, "total_steps": 204665, "loss": 0.0, "lr": 1.3046852954602617e-07, "epoch": 4.260132411501722, "percentage": 85.2, "elapsed_time": "3:46:21", "remaining_time": "0:39:18", "throughput": 8651.49, "total_tokens": 117501736} +{"current_steps": 174385, "total_steps": 204665, "loss": 0.0, "lr": 1.3042641601699178e-07, "epoch": 4.26025456233357, "percentage": 85.21, "elapsed_time": "3:46:22", "remaining_time": "0:39:18", "throughput": 8651.53, "total_tokens": 117505384} +{"current_steps": 174390, "total_steps": 204665, "loss": 0.0, "lr": 1.3038430881173035e-07, "epoch": 4.260376713165416, "percentage": 85.21, "elapsed_time": "3:46:22", "remaining_time": "0:39:17", "throughput": 8651.54, "total_tokens": 117508648} +{"current_steps": 174395, "total_steps": 204665, "loss": 0.0, "lr": 1.303422079305484e-07, "epoch": 4.260498863997264, "percentage": 85.21, "elapsed_time": "3:46:22", "remaining_time": "0:39:17", "throughput": 8651.56, "total_tokens": 117511976} +{"current_steps": 174400, "total_steps": 204665, "loss": 0.0, "lr": 1.3030011337375158e-07, "epoch": 4.260621014829111, "percentage": 85.21, "elapsed_time": "3:46:23", "remaining_time": "0:39:17", "throughput": 8651.58, "total_tokens": 117515240} +{"current_steps": 174405, "total_steps": 204665, "loss": 0.0, "lr": 1.3025802514164653e-07, "epoch": 4.2607431656609585, "percentage": 85.21, "elapsed_time": "3:46:23", "remaining_time": "0:39:16", "throughput": 8651.58, "total_tokens": 117518376} +{"current_steps": 174410, "total_steps": 204665, "loss": 0.0, "lr": 1.3021594323453878e-07, "epoch": 4.260865316492805, "percentage": 85.22, "elapsed_time": "3:46:23", "remaining_time": "0:39:16", "throughput": 8651.59, "total_tokens": 117521512} +{"current_steps": 174415, "total_steps": 204665, "loss": 0.0, "lr": 1.3017386765273487e-07, "epoch": 4.260987467324653, "percentage": 85.22, "elapsed_time": "3:46:24", "remaining_time": "0:39:15", "throughput": 8651.61, "total_tokens": 117524776} +{"current_steps": 174420, "total_steps": 204665, "loss": 0.0, "lr": 1.3013179839654033e-07, "epoch": 4.2611096181565, "percentage": 85.22, "elapsed_time": "3:46:24", "remaining_time": "0:39:15", "throughput": 8651.62, "total_tokens": 117527912} +{"current_steps": 174425, "total_steps": 204665, "loss": 0.0, "lr": 1.3008973546626134e-07, "epoch": 4.261231768988347, "percentage": 85.22, "elapsed_time": "3:46:24", "remaining_time": "0:39:15", "throughput": 8651.64, "total_tokens": 117531240} +{"current_steps": 174430, "total_steps": 204665, "loss": 0.0001, "lr": 1.3004767886220391e-07, "epoch": 4.261353919820194, "percentage": 85.23, "elapsed_time": "3:46:25", "remaining_time": "0:39:14", "throughput": 8651.68, "total_tokens": 117535016} +{"current_steps": 174435, "total_steps": 204665, "loss": 0.0, "lr": 1.3000562858467368e-07, "epoch": 4.261476070652041, "percentage": 85.23, "elapsed_time": "3:46:25", "remaining_time": "0:39:14", "throughput": 8651.73, "total_tokens": 117538856} +{"current_steps": 174440, "total_steps": 204665, "loss": 0.0, "lr": 1.2996358463397662e-07, "epoch": 4.261598221483888, "percentage": 85.23, "elapsed_time": "3:46:25", "remaining_time": "0:39:14", "throughput": 8651.76, "total_tokens": 117542312} +{"current_steps": 174445, "total_steps": 204665, "loss": 0.0, "lr": 1.2992154701041836e-07, "epoch": 4.261720372315735, "percentage": 85.23, "elapsed_time": "3:46:26", "remaining_time": "0:39:13", "throughput": 8651.77, "total_tokens": 117545512} +{"current_steps": 174450, "total_steps": 204665, "loss": 0.0522, "lr": 1.2987951571430456e-07, "epoch": 4.261842523147583, "percentage": 85.24, "elapsed_time": "3:46:26", "remaining_time": "0:39:13", "throughput": 8651.81, "total_tokens": 117549160} +{"current_steps": 174455, "total_steps": 204665, "loss": 0.0, "lr": 1.2983749074594097e-07, "epoch": 4.2619646739794295, "percentage": 85.24, "elapsed_time": "3:46:27", "remaining_time": "0:39:12", "throughput": 8651.84, "total_tokens": 117552616} +{"current_steps": 174460, "total_steps": 204665, "loss": 0.0, "lr": 1.2979547210563313e-07, "epoch": 4.262086824811277, "percentage": 85.24, "elapsed_time": "3:46:27", "remaining_time": "0:39:12", "throughput": 8651.85, "total_tokens": 117555752} +{"current_steps": 174465, "total_steps": 204665, "loss": 0.0, "lr": 1.297534597936869e-07, "epoch": 4.262208975643124, "percentage": 85.24, "elapsed_time": "3:46:27", "remaining_time": "0:39:12", "throughput": 8651.85, "total_tokens": 117558824} +{"current_steps": 174470, "total_steps": 204665, "loss": 0.0, "lr": 1.2971145381040726e-07, "epoch": 4.2623311264749715, "percentage": 85.25, "elapsed_time": "3:46:28", "remaining_time": "0:39:11", "throughput": 8651.87, "total_tokens": 117562088} +{"current_steps": 174475, "total_steps": 204665, "loss": 0.0, "lr": 1.296694541561003e-07, "epoch": 4.262453277306818, "percentage": 85.25, "elapsed_time": "3:46:28", "remaining_time": "0:39:11", "throughput": 8651.89, "total_tokens": 117565416} +{"current_steps": 174480, "total_steps": 204665, "loss": 0.0, "lr": 1.296274608310709e-07, "epoch": 4.262575428138666, "percentage": 85.25, "elapsed_time": "3:46:28", "remaining_time": "0:39:10", "throughput": 8651.91, "total_tokens": 117568808} +{"current_steps": 174485, "total_steps": 204665, "loss": 0.0, "lr": 1.2958547383562468e-07, "epoch": 4.262697578970513, "percentage": 85.25, "elapsed_time": "3:46:29", "remaining_time": "0:39:10", "throughput": 8651.93, "total_tokens": 117572072} +{"current_steps": 174490, "total_steps": 204665, "loss": 0.0, "lr": 1.295434931700673e-07, "epoch": 4.26281972980236, "percentage": 85.26, "elapsed_time": "3:46:29", "remaining_time": "0:39:10", "throughput": 8651.97, "total_tokens": 117575784} +{"current_steps": 174495, "total_steps": 204665, "loss": 0.0, "lr": 1.295015188347035e-07, "epoch": 4.262941880634207, "percentage": 85.26, "elapsed_time": "3:46:29", "remaining_time": "0:39:09", "throughput": 8652.01, "total_tokens": 117579432} +{"current_steps": 174500, "total_steps": 204665, "loss": 0.0, "lr": 1.2945955082983906e-07, "epoch": 4.263064031466055, "percentage": 85.26, "elapsed_time": "3:46:30", "remaining_time": "0:39:09", "throughput": 8652.04, "total_tokens": 117583016} +{"current_steps": 174505, "total_steps": 204665, "loss": 0.0318, "lr": 1.2941758915577862e-07, "epoch": 4.263186182297901, "percentage": 85.26, "elapsed_time": "3:46:30", "remaining_time": "0:39:08", "throughput": 8652.06, "total_tokens": 117586408} +{"current_steps": 174510, "total_steps": 204665, "loss": 0.0, "lr": 1.293756338128279e-07, "epoch": 4.263308333129749, "percentage": 85.27, "elapsed_time": "3:46:30", "remaining_time": "0:39:08", "throughput": 8652.09, "total_tokens": 117589736} +{"current_steps": 174515, "total_steps": 204665, "loss": 0.0, "lr": 1.2933368480129148e-07, "epoch": 4.263430483961596, "percentage": 85.27, "elapsed_time": "3:46:31", "remaining_time": "0:39:08", "throughput": 8652.09, "total_tokens": 117592872} +{"current_steps": 174520, "total_steps": 204665, "loss": 0.0, "lr": 1.2929174212147475e-07, "epoch": 4.263552634793443, "percentage": 85.27, "elapsed_time": "3:46:31", "remaining_time": "0:39:07", "throughput": 8652.13, "total_tokens": 117596456} +{"current_steps": 174525, "total_steps": 204665, "loss": 0.0, "lr": 1.2924980577368284e-07, "epoch": 4.26367478562529, "percentage": 85.27, "elapsed_time": "3:46:31", "remaining_time": "0:39:07", "throughput": 8652.16, "total_tokens": 117599912} +{"current_steps": 174530, "total_steps": 204665, "loss": 0.0, "lr": 1.2920787575822035e-07, "epoch": 4.263796936457137, "percentage": 85.28, "elapsed_time": "3:46:32", "remaining_time": "0:39:06", "throughput": 8652.18, "total_tokens": 117603240} +{"current_steps": 174535, "total_steps": 204665, "loss": 0.0, "lr": 1.291659520753926e-07, "epoch": 4.2639190872889845, "percentage": 85.28, "elapsed_time": "3:46:32", "remaining_time": "0:39:06", "throughput": 8652.22, "total_tokens": 117606952} +{"current_steps": 174540, "total_steps": 204665, "loss": 0.0, "lr": 1.2912403472550405e-07, "epoch": 4.264041238120831, "percentage": 85.28, "elapsed_time": "3:46:33", "remaining_time": "0:39:06", "throughput": 8652.25, "total_tokens": 117610472} +{"current_steps": 174545, "total_steps": 204665, "loss": 0.0, "lr": 1.2908212370885997e-07, "epoch": 4.264163388952679, "percentage": 85.28, "elapsed_time": "3:46:33", "remaining_time": "0:39:05", "throughput": 8652.28, "total_tokens": 117613928} +{"current_steps": 174550, "total_steps": 204665, "loss": 0.0, "lr": 1.2904021902576467e-07, "epoch": 4.264285539784526, "percentage": 85.29, "elapsed_time": "3:46:33", "remaining_time": "0:39:05", "throughput": 8652.3, "total_tokens": 117617256} +{"current_steps": 174555, "total_steps": 204665, "loss": 0.0, "lr": 1.289983206765235e-07, "epoch": 4.264407690616373, "percentage": 85.29, "elapsed_time": "3:46:34", "remaining_time": "0:39:04", "throughput": 8652.32, "total_tokens": 117620584} +{"current_steps": 174560, "total_steps": 204665, "loss": 0.0, "lr": 1.2895642866144075e-07, "epoch": 4.26452984144822, "percentage": 85.29, "elapsed_time": "3:46:34", "remaining_time": "0:39:04", "throughput": 8652.37, "total_tokens": 117624360} +{"current_steps": 174565, "total_steps": 204665, "loss": 0.0, "lr": 1.2891454298082084e-07, "epoch": 4.264651992280068, "percentage": 85.29, "elapsed_time": "3:46:34", "remaining_time": "0:39:04", "throughput": 8652.39, "total_tokens": 117627688} +{"current_steps": 174570, "total_steps": 204665, "loss": 0.0, "lr": 1.2887266363496897e-07, "epoch": 4.264774143111914, "percentage": 85.3, "elapsed_time": "3:46:35", "remaining_time": "0:39:03", "throughput": 8652.42, "total_tokens": 117631208} +{"current_steps": 174575, "total_steps": 204665, "loss": 0.0, "lr": 1.2883079062418922e-07, "epoch": 4.264896293943762, "percentage": 85.3, "elapsed_time": "3:46:35", "remaining_time": "0:39:03", "throughput": 8652.45, "total_tokens": 117634728} +{"current_steps": 174580, "total_steps": 204665, "loss": 0.0, "lr": 1.2878892394878616e-07, "epoch": 4.265018444775609, "percentage": 85.3, "elapsed_time": "3:46:35", "remaining_time": "0:39:02", "throughput": 8652.49, "total_tokens": 117638248} +{"current_steps": 174585, "total_steps": 204665, "loss": 0.0, "lr": 1.2874706360906462e-07, "epoch": 4.265140595607456, "percentage": 85.3, "elapsed_time": "3:46:36", "remaining_time": "0:39:02", "throughput": 8652.55, "total_tokens": 117642216} +{"current_steps": 174590, "total_steps": 204665, "loss": 0.0, "lr": 1.287052096053286e-07, "epoch": 4.265262746439303, "percentage": 85.31, "elapsed_time": "3:46:36", "remaining_time": "0:39:02", "throughput": 8652.56, "total_tokens": 117645352} +{"current_steps": 174595, "total_steps": 204665, "loss": 0.0, "lr": 1.2866336193788285e-07, "epoch": 4.265384897271151, "percentage": 85.31, "elapsed_time": "3:46:36", "remaining_time": "0:39:01", "throughput": 8652.56, "total_tokens": 117648360} +{"current_steps": 174600, "total_steps": 204665, "loss": 0.0, "lr": 1.2862152060703135e-07, "epoch": 4.2655070481029975, "percentage": 85.31, "elapsed_time": "3:46:37", "remaining_time": "0:39:01", "throughput": 8652.56, "total_tokens": 117651304} +{"current_steps": 174605, "total_steps": 204665, "loss": 0.0105, "lr": 1.2857968561307864e-07, "epoch": 4.265629198934845, "percentage": 85.31, "elapsed_time": "3:46:37", "remaining_time": "0:39:00", "throughput": 8652.58, "total_tokens": 117654632} +{"current_steps": 174610, "total_steps": 204665, "loss": 0.0, "lr": 1.285378569563287e-07, "epoch": 4.265751349766692, "percentage": 85.32, "elapsed_time": "3:46:37", "remaining_time": "0:39:00", "throughput": 8652.59, "total_tokens": 117657768} +{"current_steps": 174615, "total_steps": 204665, "loss": 0.0, "lr": 1.284960346370858e-07, "epoch": 4.2658735005985395, "percentage": 85.32, "elapsed_time": "3:46:38", "remaining_time": "0:39:00", "throughput": 8652.61, "total_tokens": 117660968} +{"current_steps": 174620, "total_steps": 204665, "loss": 0.0, "lr": 1.284542186556543e-07, "epoch": 4.265995651430386, "percentage": 85.32, "elapsed_time": "3:46:38", "remaining_time": "0:38:59", "throughput": 8652.64, "total_tokens": 117664360} +{"current_steps": 174625, "total_steps": 204665, "loss": 0.0001, "lr": 1.2841240901233796e-07, "epoch": 4.266117802262233, "percentage": 85.32, "elapsed_time": "3:46:39", "remaining_time": "0:38:59", "throughput": 8652.67, "total_tokens": 117667880} +{"current_steps": 174630, "total_steps": 204665, "loss": 0.0, "lr": 1.2837060570744128e-07, "epoch": 4.266239953094081, "percentage": 85.32, "elapsed_time": "3:46:39", "remaining_time": "0:38:58", "throughput": 8652.7, "total_tokens": 117671336} +{"current_steps": 174635, "total_steps": 204665, "loss": 0.0, "lr": 1.2832880874126784e-07, "epoch": 4.266362103925927, "percentage": 85.33, "elapsed_time": "3:46:39", "remaining_time": "0:38:58", "throughput": 8652.74, "total_tokens": 117674856} +{"current_steps": 174640, "total_steps": 204665, "loss": 0.0, "lr": 1.2828701811412168e-07, "epoch": 4.266484254757775, "percentage": 85.33, "elapsed_time": "3:46:40", "remaining_time": "0:38:58", "throughput": 8652.76, "total_tokens": 117678120} +{"current_steps": 174645, "total_steps": 204665, "loss": 0.0, "lr": 1.2824523382630692e-07, "epoch": 4.266606405589622, "percentage": 85.33, "elapsed_time": "3:46:40", "remaining_time": "0:38:57", "throughput": 8652.78, "total_tokens": 117681448} +{"current_steps": 174650, "total_steps": 204665, "loss": 0.0, "lr": 1.2820345587812743e-07, "epoch": 4.266728556421469, "percentage": 85.33, "elapsed_time": "3:46:40", "remaining_time": "0:38:57", "throughput": 8652.79, "total_tokens": 117684520} +{"current_steps": 174655, "total_steps": 204665, "loss": 0.0, "lr": 1.281616842698866e-07, "epoch": 4.266850707253316, "percentage": 85.34, "elapsed_time": "3:46:41", "remaining_time": "0:38:57", "throughput": 8652.83, "total_tokens": 117688424} +{"current_steps": 174660, "total_steps": 204665, "loss": 0.0, "lr": 1.2811991900188868e-07, "epoch": 4.266972858085164, "percentage": 85.34, "elapsed_time": "3:46:41", "remaining_time": "0:38:56", "throughput": 8652.84, "total_tokens": 117691432} +{"current_steps": 174665, "total_steps": 204665, "loss": 0.0, "lr": 1.2807816007443727e-07, "epoch": 4.2670950089170105, "percentage": 85.34, "elapsed_time": "3:46:41", "remaining_time": "0:38:56", "throughput": 8652.86, "total_tokens": 117694760} +{"current_steps": 174670, "total_steps": 204665, "loss": 0.0, "lr": 1.2803640748783583e-07, "epoch": 4.267217159748858, "percentage": 85.34, "elapsed_time": "3:46:42", "remaining_time": "0:38:55", "throughput": 8652.87, "total_tokens": 117697896} +{"current_steps": 174675, "total_steps": 204665, "loss": 0.0, "lr": 1.279946612423881e-07, "epoch": 4.267339310580705, "percentage": 85.35, "elapsed_time": "3:46:42", "remaining_time": "0:38:55", "throughput": 8652.96, "total_tokens": 117702312} +{"current_steps": 174680, "total_steps": 204665, "loss": 0.0, "lr": 1.2795292133839796e-07, "epoch": 4.267461461412553, "percentage": 85.35, "elapsed_time": "3:46:42", "remaining_time": "0:38:55", "throughput": 8652.98, "total_tokens": 117705576} +{"current_steps": 174685, "total_steps": 204665, "loss": 0.0, "lr": 1.279111877761685e-07, "epoch": 4.267583612244399, "percentage": 85.35, "elapsed_time": "3:46:43", "remaining_time": "0:38:54", "throughput": 8653.01, "total_tokens": 117708904} +{"current_steps": 174690, "total_steps": 204665, "loss": 0.0, "lr": 1.2786946055600367e-07, "epoch": 4.267705763076247, "percentage": 85.35, "elapsed_time": "3:46:43", "remaining_time": "0:38:54", "throughput": 8653.04, "total_tokens": 117712424} +{"current_steps": 174695, "total_steps": 204665, "loss": 0.0, "lr": 1.2782773967820647e-07, "epoch": 4.267827913908094, "percentage": 85.36, "elapsed_time": "3:46:43", "remaining_time": "0:38:53", "throughput": 8653.1, "total_tokens": 117716328} +{"current_steps": 174700, "total_steps": 204665, "loss": 0.0, "lr": 1.277860251430808e-07, "epoch": 4.26795006473994, "percentage": 85.36, "elapsed_time": "3:46:44", "remaining_time": "0:38:53", "throughput": 8653.11, "total_tokens": 117719528} +{"current_steps": 174705, "total_steps": 204665, "loss": 0.0, "lr": 1.2774431695092958e-07, "epoch": 4.268072215571788, "percentage": 85.36, "elapsed_time": "3:46:44", "remaining_time": "0:38:53", "throughput": 8653.12, "total_tokens": 117722600} +{"current_steps": 174710, "total_steps": 204665, "loss": 0.0, "lr": 1.2770261510205616e-07, "epoch": 4.268194366403635, "percentage": 85.36, "elapsed_time": "3:46:44", "remaining_time": "0:38:52", "throughput": 8653.17, "total_tokens": 117726248} +{"current_steps": 174715, "total_steps": 204665, "loss": 0.0, "lr": 1.2766091959676427e-07, "epoch": 4.268316517235482, "percentage": 85.37, "elapsed_time": "3:46:45", "remaining_time": "0:38:52", "throughput": 8653.22, "total_tokens": 117730024} +{"current_steps": 174720, "total_steps": 204665, "loss": 0.0, "lr": 1.276192304353565e-07, "epoch": 4.268438668067329, "percentage": 85.37, "elapsed_time": "3:46:45", "remaining_time": "0:38:51", "throughput": 8653.23, "total_tokens": 117733288} +{"current_steps": 174725, "total_steps": 204665, "loss": 0.0, "lr": 1.2757754761813667e-07, "epoch": 4.268560818899177, "percentage": 85.37, "elapsed_time": "3:46:46", "remaining_time": "0:38:51", "throughput": 8653.27, "total_tokens": 117736872} +{"current_steps": 174730, "total_steps": 204665, "loss": 0.0, "lr": 1.275358711454072e-07, "epoch": 4.268682969731024, "percentage": 85.37, "elapsed_time": "3:46:46", "remaining_time": "0:38:51", "throughput": 8653.31, "total_tokens": 117740392} +{"current_steps": 174735, "total_steps": 204665, "loss": 0.0256, "lr": 1.2749420101747165e-07, "epoch": 4.268805120562871, "percentage": 85.38, "elapsed_time": "3:46:46", "remaining_time": "0:38:50", "throughput": 8653.32, "total_tokens": 117743464} +{"current_steps": 174740, "total_steps": 204665, "loss": 0.0, "lr": 1.2745253723463311e-07, "epoch": 4.268927271394718, "percentage": 85.38, "elapsed_time": "3:46:47", "remaining_time": "0:38:50", "throughput": 8653.35, "total_tokens": 117746984} +{"current_steps": 174745, "total_steps": 204665, "loss": 0.0, "lr": 1.2741087979719412e-07, "epoch": 4.269049422226566, "percentage": 85.38, "elapsed_time": "3:46:47", "remaining_time": "0:38:49", "throughput": 8653.42, "total_tokens": 117750952} +{"current_steps": 174750, "total_steps": 204665, "loss": 0.0, "lr": 1.2736922870545829e-07, "epoch": 4.269171573058412, "percentage": 85.38, "elapsed_time": "3:46:47", "remaining_time": "0:38:49", "throughput": 8653.45, "total_tokens": 117754472} +{"current_steps": 174755, "total_steps": 204665, "loss": 0.0, "lr": 1.27327583959728e-07, "epoch": 4.26929372389026, "percentage": 85.39, "elapsed_time": "3:46:48", "remaining_time": "0:38:49", "throughput": 8653.46, "total_tokens": 117757608} +{"current_steps": 174760, "total_steps": 204665, "loss": 0.0, "lr": 1.2728594556030613e-07, "epoch": 4.269415874722107, "percentage": 85.39, "elapsed_time": "3:46:48", "remaining_time": "0:38:48", "throughput": 8653.48, "total_tokens": 117760808} +{"current_steps": 174765, "total_steps": 204665, "loss": 0.0, "lr": 1.2724431350749576e-07, "epoch": 4.269538025553954, "percentage": 85.39, "elapsed_time": "3:46:48", "remaining_time": "0:38:48", "throughput": 8653.5, "total_tokens": 117764072} +{"current_steps": 174770, "total_steps": 204665, "loss": 0.0, "lr": 1.2720268780159927e-07, "epoch": 4.269660176385801, "percentage": 85.39, "elapsed_time": "3:46:49", "remaining_time": "0:38:47", "throughput": 8653.54, "total_tokens": 117767656} +{"current_steps": 174775, "total_steps": 204665, "loss": 0.0, "lr": 1.2716106844291974e-07, "epoch": 4.269782327217649, "percentage": 85.4, "elapsed_time": "3:46:49", "remaining_time": "0:38:47", "throughput": 8653.55, "total_tokens": 117770792} +{"current_steps": 174780, "total_steps": 204665, "loss": 0.0, "lr": 1.271194554317595e-07, "epoch": 4.2699044780494955, "percentage": 85.4, "elapsed_time": "3:46:49", "remaining_time": "0:38:47", "throughput": 8653.57, "total_tokens": 117774056} +{"current_steps": 174785, "total_steps": 204665, "loss": 0.0, "lr": 1.2707784876842165e-07, "epoch": 4.270026628881343, "percentage": 85.4, "elapsed_time": "3:46:50", "remaining_time": "0:38:46", "throughput": 8653.59, "total_tokens": 117777320} +{"current_steps": 174790, "total_steps": 204665, "loss": 0.0, "lr": 1.2703624845320826e-07, "epoch": 4.27014877971319, "percentage": 85.4, "elapsed_time": "3:46:50", "remaining_time": "0:38:46", "throughput": 8653.61, "total_tokens": 117780584} +{"current_steps": 174795, "total_steps": 204665, "loss": 0.0, "lr": 1.2699465448642198e-07, "epoch": 4.270270930545037, "percentage": 85.41, "elapsed_time": "3:46:50", "remaining_time": "0:38:45", "throughput": 8653.63, "total_tokens": 117783848} +{"current_steps": 174800, "total_steps": 204665, "loss": 0.0, "lr": 1.269530668683656e-07, "epoch": 4.270393081376884, "percentage": 85.41, "elapsed_time": "3:46:51", "remaining_time": "0:38:45", "throughput": 8653.68, "total_tokens": 117787560} +{"current_steps": 174805, "total_steps": 204665, "loss": 0.0, "lr": 1.2691148559934117e-07, "epoch": 4.270515232208731, "percentage": 85.41, "elapsed_time": "3:46:51", "remaining_time": "0:38:45", "throughput": 8653.7, "total_tokens": 117790760} +{"current_steps": 174810, "total_steps": 204665, "loss": 0.0, "lr": 1.2686991067965147e-07, "epoch": 4.270637383040579, "percentage": 85.41, "elapsed_time": "3:46:51", "remaining_time": "0:38:44", "throughput": 8653.7, "total_tokens": 117793704} +{"current_steps": 174815, "total_steps": 204665, "loss": 0.0, "lr": 1.2682834210959847e-07, "epoch": 4.270759533872425, "percentage": 85.42, "elapsed_time": "3:46:52", "remaining_time": "0:38:44", "throughput": 8653.71, "total_tokens": 117796712} +{"current_steps": 174820, "total_steps": 204665, "loss": 0.0001, "lr": 1.2678677988948473e-07, "epoch": 4.270881684704273, "percentage": 85.42, "elapsed_time": "3:46:52", "remaining_time": "0:38:43", "throughput": 8653.72, "total_tokens": 117799912} +{"current_steps": 174825, "total_steps": 204665, "loss": 0.0, "lr": 1.2674522401961218e-07, "epoch": 4.27100383553612, "percentage": 85.42, "elapsed_time": "3:46:52", "remaining_time": "0:38:43", "throughput": 8653.74, "total_tokens": 117803112} +{"current_steps": 174830, "total_steps": 204665, "loss": 0.0, "lr": 1.2670367450028328e-07, "epoch": 4.271125986367967, "percentage": 85.42, "elapsed_time": "3:46:53", "remaining_time": "0:38:43", "throughput": 8653.76, "total_tokens": 117806376} +{"current_steps": 174835, "total_steps": 204665, "loss": 0.0, "lr": 1.2666213133180038e-07, "epoch": 4.271248137199814, "percentage": 85.42, "elapsed_time": "3:46:53", "remaining_time": "0:38:42", "throughput": 8653.79, "total_tokens": 117809896} +{"current_steps": 174840, "total_steps": 204665, "loss": 0.0, "lr": 1.2662059451446506e-07, "epoch": 4.271370288031662, "percentage": 85.43, "elapsed_time": "3:46:54", "remaining_time": "0:38:42", "throughput": 8653.82, "total_tokens": 117813224} +{"current_steps": 174845, "total_steps": 204665, "loss": 0.0, "lr": 1.2657906404858e-07, "epoch": 4.2714924388635085, "percentage": 85.43, "elapsed_time": "3:46:54", "remaining_time": "0:38:41", "throughput": 8653.84, "total_tokens": 117816488} +{"current_steps": 174850, "total_steps": 204665, "loss": 0.0, "lr": 1.265375399344466e-07, "epoch": 4.271614589695356, "percentage": 85.43, "elapsed_time": "3:46:54", "remaining_time": "0:38:41", "throughput": 8653.84, "total_tokens": 117819368} +{"current_steps": 174855, "total_steps": 204665, "loss": 0.0, "lr": 1.2649602217236744e-07, "epoch": 4.271736740527203, "percentage": 85.43, "elapsed_time": "3:46:55", "remaining_time": "0:38:41", "throughput": 8653.84, "total_tokens": 117822312} +{"current_steps": 174860, "total_steps": 204665, "loss": 0.0005, "lr": 1.264545107626439e-07, "epoch": 4.2718588913590505, "percentage": 85.44, "elapsed_time": "3:46:55", "remaining_time": "0:38:40", "throughput": 8653.86, "total_tokens": 117825576} +{"current_steps": 174865, "total_steps": 204665, "loss": 0.0, "lr": 1.2641300570557834e-07, "epoch": 4.271981042190897, "percentage": 85.44, "elapsed_time": "3:46:55", "remaining_time": "0:38:40", "throughput": 8653.89, "total_tokens": 117828968} +{"current_steps": 174870, "total_steps": 204665, "loss": 0.0, "lr": 1.2637150700147235e-07, "epoch": 4.272103193022745, "percentage": 85.44, "elapsed_time": "3:46:56", "remaining_time": "0:38:39", "throughput": 8653.94, "total_tokens": 117832808} +{"current_steps": 174875, "total_steps": 204665, "loss": 0.0, "lr": 1.2633001465062754e-07, "epoch": 4.272225343854592, "percentage": 85.44, "elapsed_time": "3:46:56", "remaining_time": "0:38:39", "throughput": 8653.97, "total_tokens": 117836136} +{"current_steps": 174880, "total_steps": 204665, "loss": 0.0, "lr": 1.2628852865334606e-07, "epoch": 4.272347494686439, "percentage": 85.45, "elapsed_time": "3:46:56", "remaining_time": "0:38:39", "throughput": 8653.98, "total_tokens": 117839336} +{"current_steps": 174885, "total_steps": 204665, "loss": 0.0, "lr": 1.262470490099292e-07, "epoch": 4.272469645518286, "percentage": 85.45, "elapsed_time": "3:46:57", "remaining_time": "0:38:38", "throughput": 8654.0, "total_tokens": 117842536} +{"current_steps": 174890, "total_steps": 204665, "loss": 0.0, "lr": 1.262055757206788e-07, "epoch": 4.272591796350133, "percentage": 85.45, "elapsed_time": "3:46:57", "remaining_time": "0:38:38", "throughput": 8654.0, "total_tokens": 117845352} +{"current_steps": 174895, "total_steps": 204665, "loss": 0.0, "lr": 1.2616410878589666e-07, "epoch": 4.27271394718198, "percentage": 85.45, "elapsed_time": "3:46:57", "remaining_time": "0:38:37", "throughput": 8654.03, "total_tokens": 117848872} +{"current_steps": 174900, "total_steps": 204665, "loss": 0.0, "lr": 1.2612264820588403e-07, "epoch": 4.272836098013827, "percentage": 85.46, "elapsed_time": "3:46:58", "remaining_time": "0:38:37", "throughput": 8654.06, "total_tokens": 117852328} +{"current_steps": 174905, "total_steps": 204665, "loss": 0.0, "lr": 1.2608119398094276e-07, "epoch": 4.272958248845675, "percentage": 85.46, "elapsed_time": "3:46:58", "remaining_time": "0:38:37", "throughput": 8654.1, "total_tokens": 117855848} +{"current_steps": 174910, "total_steps": 204665, "loss": 0.0, "lr": 1.260397461113738e-07, "epoch": 4.2730803996775215, "percentage": 85.46, "elapsed_time": "3:46:58", "remaining_time": "0:38:36", "throughput": 8654.12, "total_tokens": 117859240} +{"current_steps": 174915, "total_steps": 204665, "loss": 0.0, "lr": 1.2599830459747907e-07, "epoch": 4.273202550509369, "percentage": 85.46, "elapsed_time": "3:46:59", "remaining_time": "0:38:36", "throughput": 8654.15, "total_tokens": 117862632} +{"current_steps": 174920, "total_steps": 204665, "loss": 0.0, "lr": 1.2595686943955964e-07, "epoch": 4.273324701341216, "percentage": 85.47, "elapsed_time": "3:46:59", "remaining_time": "0:38:35", "throughput": 8654.19, "total_tokens": 117866152} +{"current_steps": 174925, "total_steps": 204665, "loss": 0.0, "lr": 1.2591544063791683e-07, "epoch": 4.2734468521730635, "percentage": 85.47, "elapsed_time": "3:46:59", "remaining_time": "0:38:35", "throughput": 8654.21, "total_tokens": 117869544} +{"current_steps": 174930, "total_steps": 204665, "loss": 0.0, "lr": 1.2587401819285237e-07, "epoch": 4.27356900300491, "percentage": 85.47, "elapsed_time": "3:47:00", "remaining_time": "0:38:35", "throughput": 8654.21, "total_tokens": 117872488} +{"current_steps": 174935, "total_steps": 204665, "loss": 0.0, "lr": 1.2583260210466685e-07, "epoch": 4.273691153836758, "percentage": 85.47, "elapsed_time": "3:47:00", "remaining_time": "0:38:34", "throughput": 8654.24, "total_tokens": 117875880} +{"current_steps": 174940, "total_steps": 204665, "loss": 0.0, "lr": 1.25791192373662e-07, "epoch": 4.273813304668605, "percentage": 85.48, "elapsed_time": "3:47:00", "remaining_time": "0:38:34", "throughput": 8654.26, "total_tokens": 117879144} +{"current_steps": 174945, "total_steps": 204665, "loss": 0.0738, "lr": 1.2574978900013854e-07, "epoch": 4.273935455500452, "percentage": 85.48, "elapsed_time": "3:47:01", "remaining_time": "0:38:34", "throughput": 8654.28, "total_tokens": 117882280} +{"current_steps": 174950, "total_steps": 204665, "loss": 0.0, "lr": 1.2570839198439775e-07, "epoch": 4.274057606332299, "percentage": 85.48, "elapsed_time": "3:47:01", "remaining_time": "0:38:33", "throughput": 8654.3, "total_tokens": 117885608} +{"current_steps": 174955, "total_steps": 204665, "loss": 0.0, "lr": 1.256670013267409e-07, "epoch": 4.274179757164147, "percentage": 85.48, "elapsed_time": "3:47:01", "remaining_time": "0:38:33", "throughput": 8654.32, "total_tokens": 117888808} +{"current_steps": 174960, "total_steps": 204665, "loss": 0.0, "lr": 1.2562561702746888e-07, "epoch": 4.274301907995993, "percentage": 85.49, "elapsed_time": "3:47:02", "remaining_time": "0:38:32", "throughput": 8654.31, "total_tokens": 117891624} +{"current_steps": 174965, "total_steps": 204665, "loss": 0.0, "lr": 1.255842390868822e-07, "epoch": 4.27442405882784, "percentage": 85.49, "elapsed_time": "3:47:02", "remaining_time": "0:38:32", "throughput": 8654.35, "total_tokens": 117895080} +{"current_steps": 174970, "total_steps": 204665, "loss": 0.0, "lr": 1.255428675052824e-07, "epoch": 4.274546209659688, "percentage": 85.49, "elapsed_time": "3:47:02", "remaining_time": "0:38:32", "throughput": 8654.36, "total_tokens": 117898280} +{"current_steps": 174975, "total_steps": 204665, "loss": 0.0, "lr": 1.2550150228297007e-07, "epoch": 4.2746683604915345, "percentage": 85.49, "elapsed_time": "3:47:03", "remaining_time": "0:38:31", "throughput": 8654.38, "total_tokens": 117901480} +{"current_steps": 174980, "total_steps": 204665, "loss": 0.0, "lr": 1.254601434202458e-07, "epoch": 4.274790511323382, "percentage": 85.5, "elapsed_time": "3:47:03", "remaining_time": "0:38:31", "throughput": 8654.39, "total_tokens": 117904616} +{"current_steps": 174985, "total_steps": 204665, "loss": 0.0, "lr": 1.2541879091741058e-07, "epoch": 4.274912662155229, "percentage": 85.5, "elapsed_time": "3:47:04", "remaining_time": "0:38:30", "throughput": 8654.41, "total_tokens": 117907752} +{"current_steps": 174990, "total_steps": 204665, "loss": 0.0, "lr": 1.253774447747653e-07, "epoch": 4.2750348129870765, "percentage": 85.5, "elapsed_time": "3:47:04", "remaining_time": "0:38:30", "throughput": 8654.4, "total_tokens": 117910632} +{"current_steps": 174995, "total_steps": 204665, "loss": 0.0, "lr": 1.253361049926104e-07, "epoch": 4.275156963818923, "percentage": 85.5, "elapsed_time": "3:47:04", "remaining_time": "0:38:30", "throughput": 8654.46, "total_tokens": 117914472} +{"current_steps": 175000, "total_steps": 204665, "loss": 0.0, "lr": 1.252947715712468e-07, "epoch": 4.275279114650771, "percentage": 85.51, "elapsed_time": "3:47:05", "remaining_time": "0:38:29", "throughput": 8654.46, "total_tokens": 117917416} +{"current_steps": 175005, "total_steps": 204665, "loss": 0.0, "lr": 1.2525344451097465e-07, "epoch": 4.275401265482618, "percentage": 85.51, "elapsed_time": "3:47:05", "remaining_time": "0:38:29", "throughput": 8654.47, "total_tokens": 117920488} +{"current_steps": 175010, "total_steps": 204665, "loss": 0.0, "lr": 1.252121238120949e-07, "epoch": 4.275523416314465, "percentage": 85.51, "elapsed_time": "3:47:05", "remaining_time": "0:38:28", "throughput": 8654.5, "total_tokens": 117923944} +{"current_steps": 175015, "total_steps": 204665, "loss": 0.0, "lr": 1.2517080947490765e-07, "epoch": 4.275645567146312, "percentage": 85.51, "elapsed_time": "3:47:06", "remaining_time": "0:38:28", "throughput": 8654.51, "total_tokens": 117927016} +{"current_steps": 175020, "total_steps": 204665, "loss": 0.0174, "lr": 1.2512950149971357e-07, "epoch": 4.27576771797816, "percentage": 85.52, "elapsed_time": "3:47:06", "remaining_time": "0:38:28", "throughput": 8654.52, "total_tokens": 117930152} +{"current_steps": 175025, "total_steps": 204665, "loss": 0.0, "lr": 1.250881998868134e-07, "epoch": 4.275889868810006, "percentage": 85.52, "elapsed_time": "3:47:06", "remaining_time": "0:38:27", "throughput": 8654.57, "total_tokens": 117933864} +{"current_steps": 175030, "total_steps": 204665, "loss": 0.0, "lr": 1.250469046365068e-07, "epoch": 4.276012019641854, "percentage": 85.52, "elapsed_time": "3:47:07", "remaining_time": "0:38:27", "throughput": 8654.6, "total_tokens": 117937320} +{"current_steps": 175035, "total_steps": 204665, "loss": 0.0, "lr": 1.2500561574909474e-07, "epoch": 4.276134170473701, "percentage": 85.52, "elapsed_time": "3:47:07", "remaining_time": "0:38:26", "throughput": 8654.62, "total_tokens": 117940520} +{"current_steps": 175040, "total_steps": 204665, "loss": 0.0, "lr": 1.2496433322487697e-07, "epoch": 4.2762563213055484, "percentage": 85.53, "elapsed_time": "3:47:07", "remaining_time": "0:38:26", "throughput": 8654.65, "total_tokens": 117943976} +{"current_steps": 175045, "total_steps": 204665, "loss": 0.0, "lr": 1.2492305706415397e-07, "epoch": 4.276378472137395, "percentage": 85.53, "elapsed_time": "3:47:08", "remaining_time": "0:38:26", "throughput": 8654.68, "total_tokens": 117947368} +{"current_steps": 175050, "total_steps": 204665, "loss": 0.0, "lr": 1.24881787267226e-07, "epoch": 4.276500622969243, "percentage": 85.53, "elapsed_time": "3:47:08", "remaining_time": "0:38:25", "throughput": 8654.68, "total_tokens": 117950376} +{"current_steps": 175055, "total_steps": 204665, "loss": 0.0, "lr": 1.2484052383439293e-07, "epoch": 4.27662277380109, "percentage": 85.53, "elapsed_time": "3:47:08", "remaining_time": "0:38:25", "throughput": 8654.7, "total_tokens": 117953704} +{"current_steps": 175060, "total_steps": 204665, "loss": 0.0, "lr": 1.247992667659551e-07, "epoch": 4.276744924632936, "percentage": 85.53, "elapsed_time": "3:47:09", "remaining_time": "0:38:24", "throughput": 8654.72, "total_tokens": 117956840} +{"current_steps": 175065, "total_steps": 204665, "loss": 0.0, "lr": 1.2475801606221236e-07, "epoch": 4.276867075464784, "percentage": 85.54, "elapsed_time": "3:47:09", "remaining_time": "0:38:24", "throughput": 8654.75, "total_tokens": 117960232} +{"current_steps": 175070, "total_steps": 204665, "loss": 0.0, "lr": 1.247167717234646e-07, "epoch": 4.276989226296631, "percentage": 85.54, "elapsed_time": "3:47:09", "remaining_time": "0:38:24", "throughput": 8654.78, "total_tokens": 117963688} +{"current_steps": 175075, "total_steps": 204665, "loss": 0.0, "lr": 1.2467553375001204e-07, "epoch": 4.277111377128478, "percentage": 85.54, "elapsed_time": "3:47:10", "remaining_time": "0:38:23", "throughput": 8654.77, "total_tokens": 117966440} +{"current_steps": 175080, "total_steps": 204665, "loss": 0.0, "lr": 1.2463430214215432e-07, "epoch": 4.277233527960325, "percentage": 85.54, "elapsed_time": "3:47:10", "remaining_time": "0:38:23", "throughput": 8654.77, "total_tokens": 117969448} +{"current_steps": 175085, "total_steps": 204665, "loss": 0.0, "lr": 1.2459307690019162e-07, "epoch": 4.277355678792173, "percentage": 85.55, "elapsed_time": "3:47:10", "remaining_time": "0:38:22", "throughput": 8654.78, "total_tokens": 117972520} +{"current_steps": 175090, "total_steps": 204665, "loss": 0.0, "lr": 1.2455185802442314e-07, "epoch": 4.2774778296240195, "percentage": 85.55, "elapsed_time": "3:47:11", "remaining_time": "0:38:22", "throughput": 8654.82, "total_tokens": 117976040} +{"current_steps": 175095, "total_steps": 204665, "loss": 0.0, "lr": 1.2451064551514946e-07, "epoch": 4.277599980455867, "percentage": 85.55, "elapsed_time": "3:47:11", "remaining_time": "0:38:22", "throughput": 8654.83, "total_tokens": 117979048} +{"current_steps": 175100, "total_steps": 204665, "loss": 0.0, "lr": 1.244694393726694e-07, "epoch": 4.277722131287714, "percentage": 85.55, "elapsed_time": "3:47:11", "remaining_time": "0:38:21", "throughput": 8654.84, "total_tokens": 117982248} +{"current_steps": 175105, "total_steps": 204665, "loss": 0.0, "lr": 1.2442823959728322e-07, "epoch": 4.2778442821195615, "percentage": 85.56, "elapsed_time": "3:47:12", "remaining_time": "0:38:21", "throughput": 8654.88, "total_tokens": 117985832} +{"current_steps": 175110, "total_steps": 204665, "loss": 0.0, "lr": 1.2438704618929052e-07, "epoch": 4.277966432951408, "percentage": 85.56, "elapsed_time": "3:47:12", "remaining_time": "0:38:20", "throughput": 8654.91, "total_tokens": 117989224} +{"current_steps": 175115, "total_steps": 204665, "loss": 0.0, "lr": 1.2434585914899054e-07, "epoch": 4.278088583783256, "percentage": 85.56, "elapsed_time": "3:47:12", "remaining_time": "0:38:20", "throughput": 8654.93, "total_tokens": 117992552} +{"current_steps": 175120, "total_steps": 204665, "loss": 0.0, "lr": 1.2430467847668325e-07, "epoch": 4.278210734615103, "percentage": 85.56, "elapsed_time": "3:47:13", "remaining_time": "0:38:20", "throughput": 8654.96, "total_tokens": 117995880} +{"current_steps": 175125, "total_steps": 204665, "loss": 0.0, "lr": 1.2426350417266762e-07, "epoch": 4.27833288544695, "percentage": 85.57, "elapsed_time": "3:47:13", "remaining_time": "0:38:19", "throughput": 8654.96, "total_tokens": 117998888} +{"current_steps": 175130, "total_steps": 204665, "loss": 0.0, "lr": 1.2422233623724354e-07, "epoch": 4.278455036278797, "percentage": 85.57, "elapsed_time": "3:47:14", "remaining_time": "0:38:19", "throughput": 8654.98, "total_tokens": 118002088} +{"current_steps": 175135, "total_steps": 204665, "loss": 0.0, "lr": 1.2418117467070998e-07, "epoch": 4.278577187110645, "percentage": 85.57, "elapsed_time": "3:47:14", "remaining_time": "0:38:18", "throughput": 8654.97, "total_tokens": 118004904} +{"current_steps": 175140, "total_steps": 204665, "loss": 0.0, "lr": 1.241400194733665e-07, "epoch": 4.278699337942491, "percentage": 85.57, "elapsed_time": "3:47:14", "remaining_time": "0:38:18", "throughput": 8654.98, "total_tokens": 118007976} +{"current_steps": 175145, "total_steps": 204665, "loss": 0.0, "lr": 1.2409887064551262e-07, "epoch": 4.278821488774339, "percentage": 85.58, "elapsed_time": "3:47:15", "remaining_time": "0:38:18", "throughput": 8654.99, "total_tokens": 118011112} +{"current_steps": 175150, "total_steps": 204665, "loss": 0.0, "lr": 1.240577281874471e-07, "epoch": 4.278943639606186, "percentage": 85.58, "elapsed_time": "3:47:15", "remaining_time": "0:38:17", "throughput": 8655.02, "total_tokens": 118014504} +{"current_steps": 175155, "total_steps": 204665, "loss": 0.0182, "lr": 1.240165920994696e-07, "epoch": 4.2790657904380325, "percentage": 85.58, "elapsed_time": "3:47:15", "remaining_time": "0:38:17", "throughput": 8655.05, "total_tokens": 118017896} +{"current_steps": 175160, "total_steps": 204665, "loss": 0.0, "lr": 1.2397546238187883e-07, "epoch": 4.27918794126988, "percentage": 85.58, "elapsed_time": "3:47:16", "remaining_time": "0:38:16", "throughput": 8655.08, "total_tokens": 118021416} +{"current_steps": 175165, "total_steps": 204665, "loss": 0.0, "lr": 1.239343390349743e-07, "epoch": 4.279310092101727, "percentage": 85.59, "elapsed_time": "3:47:16", "remaining_time": "0:38:16", "throughput": 8655.16, "total_tokens": 118025704} +{"current_steps": 175170, "total_steps": 204665, "loss": 0.0, "lr": 1.2389322205905473e-07, "epoch": 4.2794322429335745, "percentage": 85.59, "elapsed_time": "3:47:16", "remaining_time": "0:38:16", "throughput": 8655.18, "total_tokens": 118028968} +{"current_steps": 175175, "total_steps": 204665, "loss": 0.0, "lr": 1.2385211145441943e-07, "epoch": 4.279554393765421, "percentage": 85.59, "elapsed_time": "3:47:17", "remaining_time": "0:38:15", "throughput": 8655.19, "total_tokens": 118031976} +{"current_steps": 175180, "total_steps": 204665, "loss": 0.0, "lr": 1.238110072213673e-07, "epoch": 4.279676544597269, "percentage": 85.59, "elapsed_time": "3:47:17", "remaining_time": "0:38:15", "throughput": 8655.18, "total_tokens": 118034792} +{"current_steps": 175185, "total_steps": 204665, "loss": 0.0, "lr": 1.2376990936019694e-07, "epoch": 4.279798695429116, "percentage": 85.6, "elapsed_time": "3:47:17", "remaining_time": "0:38:14", "throughput": 8655.18, "total_tokens": 118037800} +{"current_steps": 175190, "total_steps": 204665, "loss": 0.0, "lr": 1.237288178712077e-07, "epoch": 4.279920846260963, "percentage": 85.6, "elapsed_time": "3:47:18", "remaining_time": "0:38:14", "throughput": 8655.22, "total_tokens": 118041320} +{"current_steps": 175195, "total_steps": 204665, "loss": 0.0, "lr": 1.2368773275469801e-07, "epoch": 4.28004299709281, "percentage": 85.6, "elapsed_time": "3:47:18", "remaining_time": "0:38:14", "throughput": 8655.25, "total_tokens": 118044776} +{"current_steps": 175200, "total_steps": 204665, "loss": 0.0, "lr": 1.2364665401096686e-07, "epoch": 4.280165147924658, "percentage": 85.6, "elapsed_time": "3:47:18", "remaining_time": "0:38:13", "throughput": 8655.28, "total_tokens": 118048232} +{"current_steps": 175205, "total_steps": 204665, "loss": 0.0, "lr": 1.236055816403131e-07, "epoch": 4.280287298756504, "percentage": 85.61, "elapsed_time": "3:47:19", "remaining_time": "0:38:13", "throughput": 8655.33, "total_tokens": 118051944} +{"current_steps": 175210, "total_steps": 204665, "loss": 0.0, "lr": 1.2356451564303504e-07, "epoch": 4.280409449588352, "percentage": 85.61, "elapsed_time": "3:47:19", "remaining_time": "0:38:12", "throughput": 8655.33, "total_tokens": 118054952} +{"current_steps": 175215, "total_steps": 204665, "loss": 0.0, "lr": 1.235234560194318e-07, "epoch": 4.280531600420199, "percentage": 85.61, "elapsed_time": "3:47:19", "remaining_time": "0:38:12", "throughput": 8655.35, "total_tokens": 118058152} +{"current_steps": 175220, "total_steps": 204665, "loss": 0.0305, "lr": 1.2348240276980148e-07, "epoch": 4.280653751252046, "percentage": 85.61, "elapsed_time": "3:47:20", "remaining_time": "0:38:12", "throughput": 8655.35, "total_tokens": 118061160} +{"current_steps": 175225, "total_steps": 204665, "loss": 0.0, "lr": 1.2344135589444315e-07, "epoch": 4.280775902083893, "percentage": 85.62, "elapsed_time": "3:47:20", "remaining_time": "0:38:11", "throughput": 8655.4, "total_tokens": 118064872} +{"current_steps": 175230, "total_steps": 204665, "loss": 0.0, "lr": 1.2340031539365481e-07, "epoch": 4.280898052915741, "percentage": 85.62, "elapsed_time": "3:47:20", "remaining_time": "0:38:11", "throughput": 8655.43, "total_tokens": 118068328} +{"current_steps": 175235, "total_steps": 204665, "loss": 0.0, "lr": 1.233592812677352e-07, "epoch": 4.2810202037475875, "percentage": 85.62, "elapsed_time": "3:47:21", "remaining_time": "0:38:10", "throughput": 8655.42, "total_tokens": 118071080} +{"current_steps": 175240, "total_steps": 204665, "loss": 0.0, "lr": 1.2331825351698278e-07, "epoch": 4.281142354579435, "percentage": 85.62, "elapsed_time": "3:47:21", "remaining_time": "0:38:10", "throughput": 8655.45, "total_tokens": 118074472} +{"current_steps": 175245, "total_steps": 204665, "loss": 0.0, "lr": 1.2327723214169572e-07, "epoch": 4.281264505411282, "percentage": 85.63, "elapsed_time": "3:47:21", "remaining_time": "0:38:10", "throughput": 8655.48, "total_tokens": 118077864} +{"current_steps": 175250, "total_steps": 204665, "loss": 0.0, "lr": 1.2323621714217257e-07, "epoch": 4.281386656243129, "percentage": 85.63, "elapsed_time": "3:47:22", "remaining_time": "0:38:09", "throughput": 8655.5, "total_tokens": 118081192} +{"current_steps": 175255, "total_steps": 204665, "loss": 0.0, "lr": 1.2319520851871136e-07, "epoch": 4.281508807074976, "percentage": 85.63, "elapsed_time": "3:47:22", "remaining_time": "0:38:09", "throughput": 8655.54, "total_tokens": 118084712} +{"current_steps": 175260, "total_steps": 204665, "loss": 0.0, "lr": 1.2315420627161032e-07, "epoch": 4.281630957906823, "percentage": 85.63, "elapsed_time": "3:47:23", "remaining_time": "0:38:09", "throughput": 8655.55, "total_tokens": 118087848} +{"current_steps": 175265, "total_steps": 204665, "loss": 0.0, "lr": 1.2311321040116795e-07, "epoch": 4.281753108738671, "percentage": 85.64, "elapsed_time": "3:47:23", "remaining_time": "0:38:08", "throughput": 8655.57, "total_tokens": 118091112} +{"current_steps": 175270, "total_steps": 204665, "loss": 0.0, "lr": 1.230722209076822e-07, "epoch": 4.281875259570517, "percentage": 85.64, "elapsed_time": "3:47:23", "remaining_time": "0:38:08", "throughput": 8655.58, "total_tokens": 118094120} +{"current_steps": 175275, "total_steps": 204665, "loss": 0.0, "lr": 1.2303123779145096e-07, "epoch": 4.281997410402365, "percentage": 85.64, "elapsed_time": "3:47:24", "remaining_time": "0:38:07", "throughput": 8655.6, "total_tokens": 118097384} +{"current_steps": 175280, "total_steps": 204665, "loss": 0.0, "lr": 1.2299026105277265e-07, "epoch": 4.282119561234212, "percentage": 85.64, "elapsed_time": "3:47:24", "remaining_time": "0:38:07", "throughput": 8655.63, "total_tokens": 118100776} +{"current_steps": 175285, "total_steps": 204665, "loss": 0.0, "lr": 1.2294929069194494e-07, "epoch": 4.282241712066059, "percentage": 85.64, "elapsed_time": "3:47:24", "remaining_time": "0:38:07", "throughput": 8655.65, "total_tokens": 118104168} +{"current_steps": 175290, "total_steps": 204665, "loss": 0.0, "lr": 1.2290832670926576e-07, "epoch": 4.282363862897906, "percentage": 85.65, "elapsed_time": "3:47:25", "remaining_time": "0:38:06", "throughput": 8655.67, "total_tokens": 118107368} +{"current_steps": 175295, "total_steps": 204665, "loss": 0.0, "lr": 1.2286736910503314e-07, "epoch": 4.282486013729754, "percentage": 85.65, "elapsed_time": "3:47:25", "remaining_time": "0:38:06", "throughput": 8655.68, "total_tokens": 118110440} +{"current_steps": 175300, "total_steps": 204665, "loss": 0.0, "lr": 1.2282641787954506e-07, "epoch": 4.2826081645616005, "percentage": 85.65, "elapsed_time": "3:47:25", "remaining_time": "0:38:05", "throughput": 8655.69, "total_tokens": 118113448} +{"current_steps": 175305, "total_steps": 204665, "loss": 0.0, "lr": 1.2278547303309905e-07, "epoch": 4.282730315393448, "percentage": 85.65, "elapsed_time": "3:47:26", "remaining_time": "0:38:05", "throughput": 8655.72, "total_tokens": 118116968} +{"current_steps": 175310, "total_steps": 204665, "loss": 0.0, "lr": 1.2274453456599333e-07, "epoch": 4.282852466225295, "percentage": 85.66, "elapsed_time": "3:47:26", "remaining_time": "0:38:05", "throughput": 8655.74, "total_tokens": 118120296} +{"current_steps": 175315, "total_steps": 204665, "loss": 0.0, "lr": 1.2270360247852496e-07, "epoch": 4.2829746170571426, "percentage": 85.66, "elapsed_time": "3:47:26", "remaining_time": "0:38:04", "throughput": 8655.76, "total_tokens": 118123432} +{"current_steps": 175320, "total_steps": 204665, "loss": 0.0, "lr": 1.2266267677099219e-07, "epoch": 4.283096767888989, "percentage": 85.66, "elapsed_time": "3:47:27", "remaining_time": "0:38:04", "throughput": 8655.8, "total_tokens": 118127080} +{"current_steps": 175325, "total_steps": 204665, "loss": 0.0553, "lr": 1.2262175744369218e-07, "epoch": 4.283218918720836, "percentage": 85.66, "elapsed_time": "3:47:27", "remaining_time": "0:38:03", "throughput": 8655.84, "total_tokens": 118130728} +{"current_steps": 175330, "total_steps": 204665, "loss": 0.0, "lr": 1.2258084449692286e-07, "epoch": 4.283341069552684, "percentage": 85.67, "elapsed_time": "3:47:27", "remaining_time": "0:38:03", "throughput": 8655.88, "total_tokens": 118134248} +{"current_steps": 175335, "total_steps": 204665, "loss": 0.0, "lr": 1.2253993793098171e-07, "epoch": 4.28346322038453, "percentage": 85.67, "elapsed_time": "3:47:28", "remaining_time": "0:38:03", "throughput": 8655.89, "total_tokens": 118137448} +{"current_steps": 175340, "total_steps": 204665, "loss": 0.0002, "lr": 1.2249903774616598e-07, "epoch": 4.283585371216378, "percentage": 85.67, "elapsed_time": "3:47:28", "remaining_time": "0:38:02", "throughput": 8655.93, "total_tokens": 118140968} +{"current_steps": 175345, "total_steps": 204665, "loss": 0.0, "lr": 1.2245814394277354e-07, "epoch": 4.283707522048225, "percentage": 85.67, "elapsed_time": "3:47:28", "remaining_time": "0:38:02", "throughput": 8655.96, "total_tokens": 118144360} +{"current_steps": 175350, "total_steps": 204665, "loss": 0.0, "lr": 1.2241725652110124e-07, "epoch": 4.283829672880072, "percentage": 85.68, "elapsed_time": "3:47:29", "remaining_time": "0:38:01", "throughput": 8655.97, "total_tokens": 118147560} +{"current_steps": 175355, "total_steps": 204665, "loss": 0.0, "lr": 1.2237637548144664e-07, "epoch": 4.283951823711919, "percentage": 85.68, "elapsed_time": "3:47:29", "remaining_time": "0:38:01", "throughput": 8656.0, "total_tokens": 118151016} +{"current_steps": 175360, "total_steps": 204665, "loss": 0.0, "lr": 1.2233550082410737e-07, "epoch": 4.284073974543767, "percentage": 85.68, "elapsed_time": "3:47:29", "remaining_time": "0:38:01", "throughput": 8656.02, "total_tokens": 118154216} +{"current_steps": 175365, "total_steps": 204665, "loss": 0.0, "lr": 1.222946325493801e-07, "epoch": 4.284196125375614, "percentage": 85.68, "elapsed_time": "3:47:30", "remaining_time": "0:38:00", "throughput": 8656.06, "total_tokens": 118157800} +{"current_steps": 175370, "total_steps": 204665, "loss": 0.0, "lr": 1.222537706575627e-07, "epoch": 4.284318276207461, "percentage": 85.69, "elapsed_time": "3:47:30", "remaining_time": "0:38:00", "throughput": 8656.06, "total_tokens": 118160744} +{"current_steps": 175375, "total_steps": 204665, "loss": 0.0, "lr": 1.2221291514895182e-07, "epoch": 4.284440427039308, "percentage": 85.69, "elapsed_time": "3:47:30", "remaining_time": "0:37:59", "throughput": 8656.09, "total_tokens": 118164136} +{"current_steps": 175380, "total_steps": 204665, "loss": 0.0, "lr": 1.2217206602384455e-07, "epoch": 4.284562577871156, "percentage": 85.69, "elapsed_time": "3:47:31", "remaining_time": "0:37:59", "throughput": 8656.13, "total_tokens": 118167784} +{"current_steps": 175385, "total_steps": 204665, "loss": 0.0, "lr": 1.2213122328253833e-07, "epoch": 4.284684728703002, "percentage": 85.69, "elapsed_time": "3:47:31", "remaining_time": "0:37:59", "throughput": 8656.16, "total_tokens": 118171240} +{"current_steps": 175390, "total_steps": 204665, "loss": 0.0004, "lr": 1.2209038692532981e-07, "epoch": 4.28480687953485, "percentage": 85.7, "elapsed_time": "3:47:32", "remaining_time": "0:37:58", "throughput": 8656.2, "total_tokens": 118174888} +{"current_steps": 175395, "total_steps": 204665, "loss": 0.0, "lr": 1.2204955695251628e-07, "epoch": 4.284929030366697, "percentage": 85.7, "elapsed_time": "3:47:32", "remaining_time": "0:37:58", "throughput": 8656.2, "total_tokens": 118177768} +{"current_steps": 175400, "total_steps": 204665, "loss": 0.0, "lr": 1.2200873336439442e-07, "epoch": 4.285051181198544, "percentage": 85.7, "elapsed_time": "3:47:32", "remaining_time": "0:37:57", "throughput": 8656.2, "total_tokens": 118180520} +{"current_steps": 175405, "total_steps": 204665, "loss": 0.0317, "lr": 1.2196791616126135e-07, "epoch": 4.285173332030391, "percentage": 85.7, "elapsed_time": "3:47:33", "remaining_time": "0:37:57", "throughput": 8656.23, "total_tokens": 118183976} +{"current_steps": 175410, "total_steps": 204665, "loss": 0.0, "lr": 1.2192710534341343e-07, "epoch": 4.285295482862239, "percentage": 85.71, "elapsed_time": "3:47:33", "remaining_time": "0:37:57", "throughput": 8656.23, "total_tokens": 118186920} +{"current_steps": 175415, "total_steps": 204665, "loss": 0.0, "lr": 1.2188630091114817e-07, "epoch": 4.2854176336940855, "percentage": 85.71, "elapsed_time": "3:47:33", "remaining_time": "0:37:56", "throughput": 8656.26, "total_tokens": 118190440} +{"current_steps": 175420, "total_steps": 204665, "loss": 0.0, "lr": 1.218455028647616e-07, "epoch": 4.285539784525932, "percentage": 85.71, "elapsed_time": "3:47:34", "remaining_time": "0:37:56", "throughput": 8656.27, "total_tokens": 118193448} +{"current_steps": 175425, "total_steps": 204665, "loss": 0.0, "lr": 1.218047112045507e-07, "epoch": 4.28566193535778, "percentage": 85.71, "elapsed_time": "3:47:34", "remaining_time": "0:37:55", "throughput": 8656.28, "total_tokens": 118196520} +{"current_steps": 175430, "total_steps": 204665, "loss": 0.0, "lr": 1.2176392593081242e-07, "epoch": 4.285784086189627, "percentage": 85.72, "elapsed_time": "3:47:34", "remaining_time": "0:37:55", "throughput": 8656.3, "total_tokens": 118199784} +{"current_steps": 175435, "total_steps": 204665, "loss": 0.0, "lr": 1.2172314704384278e-07, "epoch": 4.285906237021474, "percentage": 85.72, "elapsed_time": "3:47:35", "remaining_time": "0:37:55", "throughput": 8656.32, "total_tokens": 118202984} +{"current_steps": 175440, "total_steps": 204665, "loss": 0.0, "lr": 1.2168237454393893e-07, "epoch": 4.286028387853321, "percentage": 85.72, "elapsed_time": "3:47:35", "remaining_time": "0:37:54", "throughput": 8656.35, "total_tokens": 118206504} +{"current_steps": 175445, "total_steps": 204665, "loss": 0.0, "lr": 1.2164160843139693e-07, "epoch": 4.286150538685169, "percentage": 85.72, "elapsed_time": "3:47:35", "remaining_time": "0:37:54", "throughput": 8656.37, "total_tokens": 118209704} +{"current_steps": 175450, "total_steps": 204665, "loss": 0.0, "lr": 1.2160084870651331e-07, "epoch": 4.286272689517015, "percentage": 85.73, "elapsed_time": "3:47:36", "remaining_time": "0:37:53", "throughput": 8656.38, "total_tokens": 118212776} +{"current_steps": 175455, "total_steps": 204665, "loss": 0.0, "lr": 1.2156009536958479e-07, "epoch": 4.286394840348863, "percentage": 85.73, "elapsed_time": "3:47:36", "remaining_time": "0:37:53", "throughput": 8656.43, "total_tokens": 118216552} +{"current_steps": 175460, "total_steps": 204665, "loss": 0.0325, "lr": 1.2151934842090738e-07, "epoch": 4.28651699118071, "percentage": 85.73, "elapsed_time": "3:47:36", "remaining_time": "0:37:53", "throughput": 8656.45, "total_tokens": 118219944} +{"current_steps": 175465, "total_steps": 204665, "loss": 0.0, "lr": 1.2147860786077767e-07, "epoch": 4.286639142012557, "percentage": 85.73, "elapsed_time": "3:47:37", "remaining_time": "0:37:52", "throughput": 8656.51, "total_tokens": 118223784} +{"current_steps": 175470, "total_steps": 204665, "loss": 0.0, "lr": 1.2143787368949178e-07, "epoch": 4.286761292844404, "percentage": 85.74, "elapsed_time": "3:47:37", "remaining_time": "0:37:52", "throughput": 8656.53, "total_tokens": 118227048} +{"current_steps": 175475, "total_steps": 204665, "loss": 0.0, "lr": 1.2139714590734607e-07, "epoch": 4.286883443676252, "percentage": 85.74, "elapsed_time": "3:47:37", "remaining_time": "0:37:51", "throughput": 8656.54, "total_tokens": 118230056} +{"current_steps": 175480, "total_steps": 204665, "loss": 0.0, "lr": 1.2135642451463635e-07, "epoch": 4.2870055945080985, "percentage": 85.74, "elapsed_time": "3:47:38", "remaining_time": "0:37:51", "throughput": 8656.55, "total_tokens": 118233256} +{"current_steps": 175485, "total_steps": 204665, "loss": 0.0, "lr": 1.2131570951165936e-07, "epoch": 4.287127745339946, "percentage": 85.74, "elapsed_time": "3:47:38", "remaining_time": "0:37:51", "throughput": 8656.56, "total_tokens": 118236200} +{"current_steps": 175490, "total_steps": 204665, "loss": 0.0, "lr": 1.212750008987109e-07, "epoch": 4.287249896171793, "percentage": 85.74, "elapsed_time": "3:47:38", "remaining_time": "0:37:50", "throughput": 8656.56, "total_tokens": 118239208} +{"current_steps": 175495, "total_steps": 204665, "loss": 0.0, "lr": 1.212342986760867e-07, "epoch": 4.2873720470036405, "percentage": 85.75, "elapsed_time": "3:47:39", "remaining_time": "0:37:50", "throughput": 8656.61, "total_tokens": 118242920} +{"current_steps": 175500, "total_steps": 204665, "loss": 0.0, "lr": 1.211936028440832e-07, "epoch": 4.287494197835487, "percentage": 85.75, "elapsed_time": "3:47:39", "remaining_time": "0:37:49", "throughput": 8656.65, "total_tokens": 118246632} +{"current_steps": 175505, "total_steps": 204665, "loss": 0.0, "lr": 1.2115291340299604e-07, "epoch": 4.287616348667335, "percentage": 85.75, "elapsed_time": "3:47:39", "remaining_time": "0:37:49", "throughput": 8656.67, "total_tokens": 118249896} +{"current_steps": 175510, "total_steps": 204665, "loss": 0.0, "lr": 1.2111223035312136e-07, "epoch": 4.287738499499182, "percentage": 85.75, "elapsed_time": "3:47:40", "remaining_time": "0:37:49", "throughput": 8656.7, "total_tokens": 118253224} +{"current_steps": 175515, "total_steps": 204665, "loss": 0.0, "lr": 1.2107155369475496e-07, "epoch": 4.287860650331028, "percentage": 85.76, "elapsed_time": "3:47:40", "remaining_time": "0:37:48", "throughput": 8656.71, "total_tokens": 118256360} +{"current_steps": 175520, "total_steps": 204665, "loss": 0.0, "lr": 1.2103088342819256e-07, "epoch": 4.287982801162876, "percentage": 85.76, "elapsed_time": "3:47:40", "remaining_time": "0:37:48", "throughput": 8656.72, "total_tokens": 118259368} +{"current_steps": 175525, "total_steps": 204665, "loss": 0.0, "lr": 1.2099021955373013e-07, "epoch": 4.288104951994723, "percentage": 85.76, "elapsed_time": "3:47:41", "remaining_time": "0:37:48", "throughput": 8656.72, "total_tokens": 118262440} +{"current_steps": 175530, "total_steps": 204665, "loss": 0.0004, "lr": 1.2094956207166307e-07, "epoch": 4.28822710282657, "percentage": 85.76, "elapsed_time": "3:47:41", "remaining_time": "0:37:47", "throughput": 8656.74, "total_tokens": 118265576} +{"current_steps": 175535, "total_steps": 204665, "loss": 0.0, "lr": 1.2090891098228739e-07, "epoch": 4.288349253658417, "percentage": 85.77, "elapsed_time": "3:47:42", "remaining_time": "0:37:47", "throughput": 8656.77, "total_tokens": 118269096} +{"current_steps": 175540, "total_steps": 204665, "loss": 0.0, "lr": 1.208682662858984e-07, "epoch": 4.288471404490265, "percentage": 85.77, "elapsed_time": "3:47:42", "remaining_time": "0:37:46", "throughput": 8656.81, "total_tokens": 118272616} +{"current_steps": 175545, "total_steps": 204665, "loss": 0.0, "lr": 1.208276279827919e-07, "epoch": 4.2885935553221115, "percentage": 85.77, "elapsed_time": "3:47:42", "remaining_time": "0:37:46", "throughput": 8656.83, "total_tokens": 118276008} +{"current_steps": 175550, "total_steps": 204665, "loss": 0.0, "lr": 1.2078699607326347e-07, "epoch": 4.288715706153959, "percentage": 85.77, "elapsed_time": "3:47:43", "remaining_time": "0:37:46", "throughput": 8656.85, "total_tokens": 118279144} +{"current_steps": 175555, "total_steps": 204665, "loss": 0.0, "lr": 1.2074637055760828e-07, "epoch": 4.288837856985806, "percentage": 85.78, "elapsed_time": "3:47:43", "remaining_time": "0:37:45", "throughput": 8656.85, "total_tokens": 118282152} +{"current_steps": 175560, "total_steps": 204665, "loss": 0.0, "lr": 1.2070575143612217e-07, "epoch": 4.2889600078176535, "percentage": 85.78, "elapsed_time": "3:47:43", "remaining_time": "0:37:45", "throughput": 8656.85, "total_tokens": 118284968} +{"current_steps": 175565, "total_steps": 204665, "loss": 0.0, "lr": 1.2066513870910022e-07, "epoch": 4.2890821586495, "percentage": 85.78, "elapsed_time": "3:47:44", "remaining_time": "0:37:44", "throughput": 8656.85, "total_tokens": 118287912} +{"current_steps": 175570, "total_steps": 204665, "loss": 0.0, "lr": 1.20624532376838e-07, "epoch": 4.289204309481348, "percentage": 85.78, "elapsed_time": "3:47:44", "remaining_time": "0:37:44", "throughput": 8656.86, "total_tokens": 118291112} +{"current_steps": 175575, "total_steps": 204665, "loss": 0.0, "lr": 1.205839324396305e-07, "epoch": 4.289326460313195, "percentage": 85.79, "elapsed_time": "3:47:44", "remaining_time": "0:37:44", "throughput": 8656.89, "total_tokens": 118294504} +{"current_steps": 175580, "total_steps": 204665, "loss": 0.0, "lr": 1.2054333889777345e-07, "epoch": 4.289448611145042, "percentage": 85.79, "elapsed_time": "3:47:45", "remaining_time": "0:37:43", "throughput": 8656.93, "total_tokens": 118298024} +{"current_steps": 175585, "total_steps": 204665, "loss": 0.0, "lr": 1.2050275175156156e-07, "epoch": 4.289570761976889, "percentage": 85.79, "elapsed_time": "3:47:45", "remaining_time": "0:37:43", "throughput": 8656.95, "total_tokens": 118301352} +{"current_steps": 175590, "total_steps": 204665, "loss": 0.0001, "lr": 1.2046217100129042e-07, "epoch": 4.289692912808736, "percentage": 85.79, "elapsed_time": "3:47:45", "remaining_time": "0:37:42", "throughput": 8656.98, "total_tokens": 118304808} +{"current_steps": 175595, "total_steps": 204665, "loss": 0.0, "lr": 1.2042159664725494e-07, "epoch": 4.289815063640583, "percentage": 85.8, "elapsed_time": "3:47:46", "remaining_time": "0:37:42", "throughput": 8657.0, "total_tokens": 118308072} +{"current_steps": 175600, "total_steps": 204665, "loss": 0.0, "lr": 1.2038102868975e-07, "epoch": 4.28993721447243, "percentage": 85.8, "elapsed_time": "3:47:46", "remaining_time": "0:37:42", "throughput": 8657.04, "total_tokens": 118311720} +{"current_steps": 175605, "total_steps": 204665, "loss": 0.0, "lr": 1.2034046712907075e-07, "epoch": 4.290059365304278, "percentage": 85.8, "elapsed_time": "3:47:46", "remaining_time": "0:37:41", "throughput": 8657.06, "total_tokens": 118314984} +{"current_steps": 175610, "total_steps": 204665, "loss": 0.0, "lr": 1.2029991196551248e-07, "epoch": 4.2901815161361245, "percentage": 85.8, "elapsed_time": "3:47:47", "remaining_time": "0:37:41", "throughput": 8657.07, "total_tokens": 118318056} +{"current_steps": 175615, "total_steps": 204665, "loss": 0.0, "lr": 1.2025936319936957e-07, "epoch": 4.290303666967972, "percentage": 85.81, "elapsed_time": "3:47:47", "remaining_time": "0:37:40", "throughput": 8657.11, "total_tokens": 118321640} +{"current_steps": 175620, "total_steps": 204665, "loss": 0.0, "lr": 1.2021882083093748e-07, "epoch": 4.290425817799819, "percentage": 85.81, "elapsed_time": "3:47:47", "remaining_time": "0:37:40", "throughput": 8657.12, "total_tokens": 118324840} +{"current_steps": 175625, "total_steps": 204665, "loss": 0.0, "lr": 1.2017828486051052e-07, "epoch": 4.2905479686316665, "percentage": 85.81, "elapsed_time": "3:47:48", "remaining_time": "0:37:40", "throughput": 8657.14, "total_tokens": 118328104} +{"current_steps": 175630, "total_steps": 204665, "loss": 0.0, "lr": 1.2013775528838399e-07, "epoch": 4.290670119463513, "percentage": 85.81, "elapsed_time": "3:47:48", "remaining_time": "0:37:39", "throughput": 8657.17, "total_tokens": 118331496} +{"current_steps": 175635, "total_steps": 204665, "loss": 0.0001, "lr": 1.2009723211485212e-07, "epoch": 4.290792270295361, "percentage": 85.82, "elapsed_time": "3:47:48", "remaining_time": "0:37:39", "throughput": 8657.19, "total_tokens": 118334696} +{"current_steps": 175640, "total_steps": 204665, "loss": 0.0, "lr": 1.2005671534020978e-07, "epoch": 4.290914421127208, "percentage": 85.82, "elapsed_time": "3:47:49", "remaining_time": "0:37:38", "throughput": 8657.2, "total_tokens": 118337832} +{"current_steps": 175645, "total_steps": 204665, "loss": 0.0, "lr": 1.2001620496475195e-07, "epoch": 4.291036571959055, "percentage": 85.82, "elapsed_time": "3:47:49", "remaining_time": "0:37:38", "throughput": 8657.2, "total_tokens": 118340776} +{"current_steps": 175650, "total_steps": 204665, "loss": 0.0, "lr": 1.1997570098877275e-07, "epoch": 4.291158722790902, "percentage": 85.82, "elapsed_time": "3:47:49", "remaining_time": "0:37:38", "throughput": 8657.21, "total_tokens": 118343912} +{"current_steps": 175655, "total_steps": 204665, "loss": 0.0, "lr": 1.1993520341256713e-07, "epoch": 4.29128087362275, "percentage": 85.83, "elapsed_time": "3:47:50", "remaining_time": "0:37:37", "throughput": 8657.25, "total_tokens": 118347432} +{"current_steps": 175660, "total_steps": 204665, "loss": 0.0, "lr": 1.1989471223642923e-07, "epoch": 4.291403024454596, "percentage": 85.83, "elapsed_time": "3:47:50", "remaining_time": "0:37:37", "throughput": 8657.25, "total_tokens": 118350312} +{"current_steps": 175665, "total_steps": 204665, "loss": 0.0, "lr": 1.1985422746065367e-07, "epoch": 4.291525175286444, "percentage": 85.83, "elapsed_time": "3:47:51", "remaining_time": "0:37:36", "throughput": 8657.29, "total_tokens": 118353832} +{"current_steps": 175670, "total_steps": 204665, "loss": 0.0, "lr": 1.1981374908553522e-07, "epoch": 4.291647326118291, "percentage": 85.83, "elapsed_time": "3:47:51", "remaining_time": "0:37:36", "throughput": 8657.32, "total_tokens": 118357288} +{"current_steps": 175675, "total_steps": 204665, "loss": 0.0, "lr": 1.1977327711136754e-07, "epoch": 4.291769476950138, "percentage": 85.84, "elapsed_time": "3:47:51", "remaining_time": "0:37:36", "throughput": 8657.33, "total_tokens": 118360488} +{"current_steps": 175680, "total_steps": 204665, "loss": 0.0, "lr": 1.1973281153844572e-07, "epoch": 4.291891627781985, "percentage": 85.84, "elapsed_time": "3:47:52", "remaining_time": "0:37:35", "throughput": 8657.36, "total_tokens": 118363816} +{"current_steps": 175685, "total_steps": 204665, "loss": 0.0784, "lr": 1.1969235236706354e-07, "epoch": 4.292013778613832, "percentage": 85.84, "elapsed_time": "3:47:52", "remaining_time": "0:37:35", "throughput": 8657.39, "total_tokens": 118367208} +{"current_steps": 175690, "total_steps": 204665, "loss": 0.0, "lr": 1.196518995975152e-07, "epoch": 4.29213592944568, "percentage": 85.84, "elapsed_time": "3:47:52", "remaining_time": "0:37:34", "throughput": 8657.43, "total_tokens": 118370920} +{"current_steps": 175695, "total_steps": 204665, "loss": 0.0, "lr": 1.1961145323009526e-07, "epoch": 4.292258080277526, "percentage": 85.85, "elapsed_time": "3:47:53", "remaining_time": "0:37:34", "throughput": 8657.45, "total_tokens": 118374120} +{"current_steps": 175700, "total_steps": 204665, "loss": 0.0, "lr": 1.1957101326509733e-07, "epoch": 4.292380231109374, "percentage": 85.85, "elapsed_time": "3:47:53", "remaining_time": "0:37:34", "throughput": 8657.46, "total_tokens": 118377256} +{"current_steps": 175705, "total_steps": 204665, "loss": 0.0, "lr": 1.195305797028161e-07, "epoch": 4.292502381941221, "percentage": 85.85, "elapsed_time": "3:47:53", "remaining_time": "0:37:33", "throughput": 8657.49, "total_tokens": 118380648} +{"current_steps": 175710, "total_steps": 204665, "loss": 0.0, "lr": 1.194901525435451e-07, "epoch": 4.292624532773068, "percentage": 85.85, "elapsed_time": "3:47:54", "remaining_time": "0:37:33", "throughput": 8657.51, "total_tokens": 118383912} +{"current_steps": 175715, "total_steps": 204665, "loss": 0.0, "lr": 1.1944973178757868e-07, "epoch": 4.292746683604915, "percentage": 85.85, "elapsed_time": "3:47:54", "remaining_time": "0:37:32", "throughput": 8657.54, "total_tokens": 118387304} +{"current_steps": 175720, "total_steps": 204665, "loss": 0.0, "lr": 1.1940931743521044e-07, "epoch": 4.292868834436763, "percentage": 85.86, "elapsed_time": "3:47:54", "remaining_time": "0:37:32", "throughput": 8657.58, "total_tokens": 118390888} +{"current_steps": 175725, "total_steps": 204665, "loss": 0.0, "lr": 1.1936890948673473e-07, "epoch": 4.2929909852686094, "percentage": 85.86, "elapsed_time": "3:47:55", "remaining_time": "0:37:32", "throughput": 8657.62, "total_tokens": 118394536} +{"current_steps": 175730, "total_steps": 204665, "loss": 0.0, "lr": 1.1932850794244497e-07, "epoch": 4.293113136100457, "percentage": 85.86, "elapsed_time": "3:47:55", "remaining_time": "0:37:31", "throughput": 8657.63, "total_tokens": 118397672} +{"current_steps": 175735, "total_steps": 204665, "loss": 0.0, "lr": 1.1928811280263517e-07, "epoch": 4.293235286932304, "percentage": 85.86, "elapsed_time": "3:47:55", "remaining_time": "0:37:31", "throughput": 8657.67, "total_tokens": 118401320} +{"current_steps": 175740, "total_steps": 204665, "loss": 0.0, "lr": 1.192477240675993e-07, "epoch": 4.2933574377641515, "percentage": 85.87, "elapsed_time": "3:47:56", "remaining_time": "0:37:30", "throughput": 8657.71, "total_tokens": 118404840} +{"current_steps": 175745, "total_steps": 204665, "loss": 0.0, "lr": 1.192073417376307e-07, "epoch": 4.293479588595998, "percentage": 85.87, "elapsed_time": "3:47:56", "remaining_time": "0:37:30", "throughput": 8657.74, "total_tokens": 118408232} +{"current_steps": 175750, "total_steps": 204665, "loss": 0.0, "lr": 1.1916696581302344e-07, "epoch": 4.293601739427846, "percentage": 85.87, "elapsed_time": "3:47:56", "remaining_time": "0:37:30", "throughput": 8657.75, "total_tokens": 118411432} +{"current_steps": 175755, "total_steps": 204665, "loss": 0.0, "lr": 1.1912659629407063e-07, "epoch": 4.293723890259693, "percentage": 85.87, "elapsed_time": "3:47:57", "remaining_time": "0:37:29", "throughput": 8657.79, "total_tokens": 118415016} +{"current_steps": 175760, "total_steps": 204665, "loss": 0.0, "lr": 1.1908623318106626e-07, "epoch": 4.29384604109154, "percentage": 85.88, "elapsed_time": "3:47:57", "remaining_time": "0:37:29", "throughput": 8657.81, "total_tokens": 118418216} +{"current_steps": 175765, "total_steps": 204665, "loss": 0.0, "lr": 1.1904587647430386e-07, "epoch": 4.293968191923387, "percentage": 85.88, "elapsed_time": "3:47:57", "remaining_time": "0:37:28", "throughput": 8657.86, "total_tokens": 118422056} +{"current_steps": 175770, "total_steps": 204665, "loss": 0.0, "lr": 1.1900552617407655e-07, "epoch": 4.294090342755235, "percentage": 85.88, "elapsed_time": "3:47:58", "remaining_time": "0:37:28", "throughput": 8657.9, "total_tokens": 118425512} +{"current_steps": 175775, "total_steps": 204665, "loss": 0.0, "lr": 1.1896518228067831e-07, "epoch": 4.294212493587081, "percentage": 85.88, "elapsed_time": "3:47:58", "remaining_time": "0:37:28", "throughput": 8657.9, "total_tokens": 118428456} +{"current_steps": 175780, "total_steps": 204665, "loss": 0.0, "lr": 1.189248447944019e-07, "epoch": 4.294334644418928, "percentage": 85.89, "elapsed_time": "3:47:59", "remaining_time": "0:37:27", "throughput": 8657.91, "total_tokens": 118431656} +{"current_steps": 175785, "total_steps": 204665, "loss": 0.0, "lr": 1.1888451371554132e-07, "epoch": 4.294456795250776, "percentage": 85.89, "elapsed_time": "3:47:59", "remaining_time": "0:37:27", "throughput": 8657.98, "total_tokens": 118435688} +{"current_steps": 175790, "total_steps": 204665, "loss": 0.0, "lr": 1.188441890443893e-07, "epoch": 4.2945789460826225, "percentage": 85.89, "elapsed_time": "3:47:59", "remaining_time": "0:37:27", "throughput": 8657.99, "total_tokens": 118438824} +{"current_steps": 175795, "total_steps": 204665, "loss": 0.0, "lr": 1.1880387078123955e-07, "epoch": 4.29470109691447, "percentage": 85.89, "elapsed_time": "3:48:00", "remaining_time": "0:37:26", "throughput": 8658.02, "total_tokens": 118442152} +{"current_steps": 175800, "total_steps": 204665, "loss": 0.0, "lr": 1.1876355892638513e-07, "epoch": 4.294823247746317, "percentage": 85.9, "elapsed_time": "3:48:00", "remaining_time": "0:37:26", "throughput": 8658.03, "total_tokens": 118445288} +{"current_steps": 175805, "total_steps": 204665, "loss": 0.0, "lr": 1.187232534801188e-07, "epoch": 4.2949453985781645, "percentage": 85.9, "elapsed_time": "3:48:00", "remaining_time": "0:37:25", "throughput": 8658.05, "total_tokens": 118448552} +{"current_steps": 175810, "total_steps": 204665, "loss": 0.0761, "lr": 1.1868295444273435e-07, "epoch": 4.295067549410011, "percentage": 85.9, "elapsed_time": "3:48:01", "remaining_time": "0:37:25", "throughput": 8658.06, "total_tokens": 118451624} +{"current_steps": 175815, "total_steps": 204665, "loss": 0.0, "lr": 1.1864266181452421e-07, "epoch": 4.295189700241859, "percentage": 85.9, "elapsed_time": "3:48:01", "remaining_time": "0:37:25", "throughput": 8658.08, "total_tokens": 118454952} +{"current_steps": 175820, "total_steps": 204665, "loss": 0.0, "lr": 1.1860237559578168e-07, "epoch": 4.295311851073706, "percentage": 85.91, "elapsed_time": "3:48:01", "remaining_time": "0:37:24", "throughput": 8658.12, "total_tokens": 118458536} +{"current_steps": 175825, "total_steps": 204665, "loss": 0.0, "lr": 1.1856209578679998e-07, "epoch": 4.295434001905553, "percentage": 85.91, "elapsed_time": "3:48:02", "remaining_time": "0:37:24", "throughput": 8658.14, "total_tokens": 118461864} +{"current_steps": 175830, "total_steps": 204665, "loss": 0.0, "lr": 1.1852182238787156e-07, "epoch": 4.2955561527374, "percentage": 85.91, "elapsed_time": "3:48:02", "remaining_time": "0:37:23", "throughput": 8658.17, "total_tokens": 118465192} +{"current_steps": 175835, "total_steps": 204665, "loss": 0.0, "lr": 1.1848155539928972e-07, "epoch": 4.295678303569248, "percentage": 85.91, "elapsed_time": "3:48:02", "remaining_time": "0:37:23", "throughput": 8658.2, "total_tokens": 118468648} +{"current_steps": 175840, "total_steps": 204665, "loss": 0.0, "lr": 1.1844129482134702e-07, "epoch": 4.295800454401094, "percentage": 85.92, "elapsed_time": "3:48:03", "remaining_time": "0:37:23", "throughput": 8658.23, "total_tokens": 118472104} +{"current_steps": 175845, "total_steps": 204665, "loss": 0.0, "lr": 1.1840104065433642e-07, "epoch": 4.295922605232942, "percentage": 85.92, "elapsed_time": "3:48:03", "remaining_time": "0:37:22", "throughput": 8658.25, "total_tokens": 118475368} +{"current_steps": 175850, "total_steps": 204665, "loss": 0.0, "lr": 1.183607928985505e-07, "epoch": 4.296044756064789, "percentage": 85.92, "elapsed_time": "3:48:03", "remaining_time": "0:37:22", "throughput": 8658.3, "total_tokens": 118479144} +{"current_steps": 175855, "total_steps": 204665, "loss": 0.0, "lr": 1.1832055155428189e-07, "epoch": 4.2961669068966355, "percentage": 85.92, "elapsed_time": "3:48:04", "remaining_time": "0:37:21", "throughput": 8658.32, "total_tokens": 118482408} +{"current_steps": 175860, "total_steps": 204665, "loss": 0.0, "lr": 1.1828031662182358e-07, "epoch": 4.296289057728483, "percentage": 85.93, "elapsed_time": "3:48:04", "remaining_time": "0:37:21", "throughput": 8658.36, "total_tokens": 118485992} +{"current_steps": 175865, "total_steps": 204665, "loss": 0.0, "lr": 1.1824008810146791e-07, "epoch": 4.296411208560331, "percentage": 85.93, "elapsed_time": "3:48:04", "remaining_time": "0:37:21", "throughput": 8658.37, "total_tokens": 118489128} +{"current_steps": 175870, "total_steps": 204665, "loss": 0.0, "lr": 1.1819986599350751e-07, "epoch": 4.2965333593921775, "percentage": 85.93, "elapsed_time": "3:48:05", "remaining_time": "0:37:20", "throughput": 8658.39, "total_tokens": 118492264} +{"current_steps": 175875, "total_steps": 204665, "loss": 0.0, "lr": 1.1815965029823471e-07, "epoch": 4.296655510224024, "percentage": 85.93, "elapsed_time": "3:48:05", "remaining_time": "0:37:20", "throughput": 8658.42, "total_tokens": 118495848} +{"current_steps": 175880, "total_steps": 204665, "loss": 0.0, "lr": 1.181194410159424e-07, "epoch": 4.296777661055872, "percentage": 85.94, "elapsed_time": "3:48:05", "remaining_time": "0:37:19", "throughput": 8658.45, "total_tokens": 118499176} +{"current_steps": 175885, "total_steps": 204665, "loss": 0.0017, "lr": 1.1807923814692244e-07, "epoch": 4.296899811887719, "percentage": 85.94, "elapsed_time": "3:48:06", "remaining_time": "0:37:19", "throughput": 8658.47, "total_tokens": 118502440} +{"current_steps": 175890, "total_steps": 204665, "loss": 0.0, "lr": 1.1803904169146773e-07, "epoch": 4.297021962719566, "percentage": 85.94, "elapsed_time": "3:48:06", "remaining_time": "0:37:19", "throughput": 8658.52, "total_tokens": 118506216} +{"current_steps": 175895, "total_steps": 204665, "loss": 0.0, "lr": 1.179988516498701e-07, "epoch": 4.297144113551413, "percentage": 85.94, "elapsed_time": "3:48:07", "remaining_time": "0:37:18", "throughput": 8658.54, "total_tokens": 118509416} +{"current_steps": 175900, "total_steps": 204665, "loss": 0.0346, "lr": 1.1795866802242216e-07, "epoch": 4.297266264383261, "percentage": 85.95, "elapsed_time": "3:48:07", "remaining_time": "0:37:18", "throughput": 8658.54, "total_tokens": 118512488} +{"current_steps": 175905, "total_steps": 204665, "loss": 0.0, "lr": 1.1791849080941618e-07, "epoch": 4.297388415215107, "percentage": 85.95, "elapsed_time": "3:48:07", "remaining_time": "0:37:17", "throughput": 8658.57, "total_tokens": 118515816} +{"current_steps": 175910, "total_steps": 204665, "loss": 0.0, "lr": 1.1787832001114384e-07, "epoch": 4.297510566046955, "percentage": 85.95, "elapsed_time": "3:48:08", "remaining_time": "0:37:17", "throughput": 8658.6, "total_tokens": 118519144} +{"current_steps": 175915, "total_steps": 204665, "loss": 0.0, "lr": 1.1783815562789767e-07, "epoch": 4.297632716878802, "percentage": 85.95, "elapsed_time": "3:48:08", "remaining_time": "0:37:17", "throughput": 8658.64, "total_tokens": 118522792} +{"current_steps": 175920, "total_steps": 204665, "loss": 0.0, "lr": 1.1779799765997e-07, "epoch": 4.297754867710649, "percentage": 85.96, "elapsed_time": "3:48:08", "remaining_time": "0:37:16", "throughput": 8658.64, "total_tokens": 118525736} +{"current_steps": 175925, "total_steps": 204665, "loss": 0.0, "lr": 1.1775784610765227e-07, "epoch": 4.297877018542496, "percentage": 85.96, "elapsed_time": "3:48:09", "remaining_time": "0:37:16", "throughput": 8658.66, "total_tokens": 118529064} +{"current_steps": 175930, "total_steps": 204665, "loss": 0.0, "lr": 1.1771770097123701e-07, "epoch": 4.297999169374344, "percentage": 85.96, "elapsed_time": "3:48:09", "remaining_time": "0:37:15", "throughput": 8658.69, "total_tokens": 118532392} +{"current_steps": 175935, "total_steps": 204665, "loss": 0.0, "lr": 1.1767756225101566e-07, "epoch": 4.2981213202061905, "percentage": 85.96, "elapsed_time": "3:48:09", "remaining_time": "0:37:15", "throughput": 8658.72, "total_tokens": 118535912} +{"current_steps": 175940, "total_steps": 204665, "loss": 0.0, "lr": 1.1763742994728077e-07, "epoch": 4.298243471038038, "percentage": 85.96, "elapsed_time": "3:48:10", "remaining_time": "0:37:15", "throughput": 8658.75, "total_tokens": 118539368} +{"current_steps": 175945, "total_steps": 204665, "loss": 0.0, "lr": 1.1759730406032342e-07, "epoch": 4.298365621869885, "percentage": 85.97, "elapsed_time": "3:48:10", "remaining_time": "0:37:14", "throughput": 8658.77, "total_tokens": 118542568} +{"current_steps": 175950, "total_steps": 204665, "loss": 0.0, "lr": 1.1755718459043595e-07, "epoch": 4.298487772701732, "percentage": 85.97, "elapsed_time": "3:48:10", "remaining_time": "0:37:14", "throughput": 8658.77, "total_tokens": 118545512} +{"current_steps": 175955, "total_steps": 204665, "loss": 0.0, "lr": 1.1751707153791012e-07, "epoch": 4.298609923533579, "percentage": 85.97, "elapsed_time": "3:48:11", "remaining_time": "0:37:13", "throughput": 8658.82, "total_tokens": 118549160} +{"current_steps": 175960, "total_steps": 204665, "loss": 0.0, "lr": 1.1747696490303727e-07, "epoch": 4.298732074365426, "percentage": 85.97, "elapsed_time": "3:48:11", "remaining_time": "0:37:13", "throughput": 8658.87, "total_tokens": 118553000} +{"current_steps": 175965, "total_steps": 204665, "loss": 0.0, "lr": 1.1743686468610958e-07, "epoch": 4.298854225197274, "percentage": 85.98, "elapsed_time": "3:48:11", "remaining_time": "0:37:13", "throughput": 8658.93, "total_tokens": 118556968} +{"current_steps": 175970, "total_steps": 204665, "loss": 0.0, "lr": 1.1739677088741817e-07, "epoch": 4.29897637602912, "percentage": 85.98, "elapsed_time": "3:48:12", "remaining_time": "0:37:12", "throughput": 8658.94, "total_tokens": 118559976} +{"current_steps": 175975, "total_steps": 204665, "loss": 0.0001, "lr": 1.1735668350725481e-07, "epoch": 4.299098526860968, "percentage": 85.98, "elapsed_time": "3:48:12", "remaining_time": "0:37:12", "throughput": 8658.96, "total_tokens": 118563368} +{"current_steps": 175980, "total_steps": 204665, "loss": 0.0, "lr": 1.1731660254591124e-07, "epoch": 4.299220677692815, "percentage": 85.98, "elapsed_time": "3:48:12", "remaining_time": "0:37:11", "throughput": 8658.99, "total_tokens": 118566824} +{"current_steps": 175985, "total_steps": 204665, "loss": 0.0, "lr": 1.172765280036786e-07, "epoch": 4.299342828524662, "percentage": 85.99, "elapsed_time": "3:48:13", "remaining_time": "0:37:11", "throughput": 8659.0, "total_tokens": 118569832} +{"current_steps": 175990, "total_steps": 204665, "loss": 0.0, "lr": 1.1723645988084862e-07, "epoch": 4.299464979356509, "percentage": 85.99, "elapsed_time": "3:48:13", "remaining_time": "0:37:11", "throughput": 8659.01, "total_tokens": 118572968} +{"current_steps": 175995, "total_steps": 204665, "loss": 0.0, "lr": 1.1719639817771244e-07, "epoch": 4.299587130188357, "percentage": 85.99, "elapsed_time": "3:48:13", "remaining_time": "0:37:10", "throughput": 8659.06, "total_tokens": 118576680} +{"current_steps": 176000, "total_steps": 204665, "loss": 0.0, "lr": 1.1715634289456156e-07, "epoch": 4.2997092810202036, "percentage": 85.99, "elapsed_time": "3:48:14", "remaining_time": "0:37:10", "throughput": 8659.11, "total_tokens": 118580456} +{"current_steps": 176005, "total_steps": 204665, "loss": 0.0, "lr": 1.1711629403168733e-07, "epoch": 4.299831431852051, "percentage": 86.0, "elapsed_time": "3:48:14", "remaining_time": "0:37:09", "throughput": 8659.11, "total_tokens": 118583272} +{"current_steps": 176010, "total_steps": 204665, "loss": 0.0, "lr": 1.1707625158938062e-07, "epoch": 4.299953582683898, "percentage": 86.0, "elapsed_time": "3:48:14", "remaining_time": "0:37:09", "throughput": 8659.15, "total_tokens": 118586984} +{"current_steps": 176015, "total_steps": 204665, "loss": 0.0, "lr": 1.1703621556793308e-07, "epoch": 4.300075733515746, "percentage": 86.0, "elapsed_time": "3:48:15", "remaining_time": "0:37:09", "throughput": 8659.19, "total_tokens": 118590440} +{"current_steps": 176020, "total_steps": 204665, "loss": 0.0, "lr": 1.1699618596763549e-07, "epoch": 4.300197884347592, "percentage": 86.0, "elapsed_time": "3:48:15", "remaining_time": "0:37:08", "throughput": 8659.22, "total_tokens": 118593896} +{"current_steps": 176025, "total_steps": 204665, "loss": 0.0001, "lr": 1.1695616278877929e-07, "epoch": 4.30032003517944, "percentage": 86.01, "elapsed_time": "3:48:16", "remaining_time": "0:37:08", "throughput": 8659.23, "total_tokens": 118597032} +{"current_steps": 176030, "total_steps": 204665, "loss": 0.0, "lr": 1.1691614603165522e-07, "epoch": 4.300442186011287, "percentage": 86.01, "elapsed_time": "3:48:16", "remaining_time": "0:37:08", "throughput": 8659.26, "total_tokens": 118600552} +{"current_steps": 176035, "total_steps": 204665, "loss": 0.0, "lr": 1.1687613569655464e-07, "epoch": 4.300564336843134, "percentage": 86.01, "elapsed_time": "3:48:16", "remaining_time": "0:37:07", "throughput": 8659.3, "total_tokens": 118604072} +{"current_steps": 176040, "total_steps": 204665, "loss": 0.0, "lr": 1.1683613178376816e-07, "epoch": 4.300686487674981, "percentage": 86.01, "elapsed_time": "3:48:17", "remaining_time": "0:37:07", "throughput": 8659.32, "total_tokens": 118607336} +{"current_steps": 176045, "total_steps": 204665, "loss": 0.0, "lr": 1.1679613429358681e-07, "epoch": 4.300808638506828, "percentage": 86.02, "elapsed_time": "3:48:17", "remaining_time": "0:37:06", "throughput": 8659.34, "total_tokens": 118610728} +{"current_steps": 176050, "total_steps": 204665, "loss": 0.0, "lr": 1.1675614322630179e-07, "epoch": 4.3009307893386755, "percentage": 86.02, "elapsed_time": "3:48:17", "remaining_time": "0:37:06", "throughput": 8659.35, "total_tokens": 118613736} +{"current_steps": 176055, "total_steps": 204665, "loss": 0.0, "lr": 1.1671615858220352e-07, "epoch": 4.301052940170522, "percentage": 86.02, "elapsed_time": "3:48:18", "remaining_time": "0:37:06", "throughput": 8659.37, "total_tokens": 118617000} +{"current_steps": 176060, "total_steps": 204665, "loss": 0.0001, "lr": 1.16676180361583e-07, "epoch": 4.30117509100237, "percentage": 86.02, "elapsed_time": "3:48:18", "remaining_time": "0:37:05", "throughput": 8659.39, "total_tokens": 118620264} +{"current_steps": 176065, "total_steps": 204665, "loss": 0.0, "lr": 1.1663620856473078e-07, "epoch": 4.301297241834217, "percentage": 86.03, "elapsed_time": "3:48:18", "remaining_time": "0:37:05", "throughput": 8659.41, "total_tokens": 118623464} +{"current_steps": 176070, "total_steps": 204665, "loss": 0.0, "lr": 1.1659624319193751e-07, "epoch": 4.301419392666064, "percentage": 86.03, "elapsed_time": "3:48:19", "remaining_time": "0:37:04", "throughput": 8659.45, "total_tokens": 118627240} +{"current_steps": 176075, "total_steps": 204665, "loss": 0.0, "lr": 1.1655628424349428e-07, "epoch": 4.301541543497911, "percentage": 86.03, "elapsed_time": "3:48:19", "remaining_time": "0:37:04", "throughput": 8659.47, "total_tokens": 118630376} +{"current_steps": 176080, "total_steps": 204665, "loss": 0.0852, "lr": 1.165163317196911e-07, "epoch": 4.301663694329759, "percentage": 86.03, "elapsed_time": "3:48:19", "remaining_time": "0:37:04", "throughput": 8659.49, "total_tokens": 118633704} +{"current_steps": 176085, "total_steps": 204665, "loss": 0.0, "lr": 1.1647638562081907e-07, "epoch": 4.301785845161605, "percentage": 86.04, "elapsed_time": "3:48:20", "remaining_time": "0:37:03", "throughput": 8659.54, "total_tokens": 118637352} +{"current_steps": 176090, "total_steps": 204665, "loss": 0.0, "lr": 1.1643644594716817e-07, "epoch": 4.301907995993453, "percentage": 86.04, "elapsed_time": "3:48:20", "remaining_time": "0:37:03", "throughput": 8659.56, "total_tokens": 118640616} +{"current_steps": 176095, "total_steps": 204665, "loss": 0.0, "lr": 1.1639651269902928e-07, "epoch": 4.3020301468253, "percentage": 86.04, "elapsed_time": "3:48:20", "remaining_time": "0:37:02", "throughput": 8659.58, "total_tokens": 118643944} +{"current_steps": 176100, "total_steps": 204665, "loss": 0.0, "lr": 1.1635658587669239e-07, "epoch": 4.302152297657147, "percentage": 86.04, "elapsed_time": "3:48:21", "remaining_time": "0:37:02", "throughput": 8659.61, "total_tokens": 118647400} +{"current_steps": 176105, "total_steps": 204665, "loss": 0.0, "lr": 1.1631666548044827e-07, "epoch": 4.302274448488994, "percentage": 86.05, "elapsed_time": "3:48:21", "remaining_time": "0:37:02", "throughput": 8659.63, "total_tokens": 118650664} +{"current_steps": 176110, "total_steps": 204665, "loss": 0.0, "lr": 1.1627675151058703e-07, "epoch": 4.302396599320842, "percentage": 86.05, "elapsed_time": "3:48:21", "remaining_time": "0:37:01", "throughput": 8659.64, "total_tokens": 118653800} +{"current_steps": 176115, "total_steps": 204665, "loss": 0.0, "lr": 1.1623684396739885e-07, "epoch": 4.3025187501526885, "percentage": 86.05, "elapsed_time": "3:48:22", "remaining_time": "0:37:01", "throughput": 8659.65, "total_tokens": 118656872} +{"current_steps": 176120, "total_steps": 204665, "loss": 0.0001, "lr": 1.161969428511741e-07, "epoch": 4.302640900984536, "percentage": 86.05, "elapsed_time": "3:48:22", "remaining_time": "0:37:00", "throughput": 8659.69, "total_tokens": 118660392} +{"current_steps": 176125, "total_steps": 204665, "loss": 0.0, "lr": 1.1615704816220284e-07, "epoch": 4.302763051816383, "percentage": 86.06, "elapsed_time": "3:48:22", "remaining_time": "0:37:00", "throughput": 8659.71, "total_tokens": 118663656} +{"current_steps": 176130, "total_steps": 204665, "loss": 0.0, "lr": 1.1611715990077531e-07, "epoch": 4.3028852026482305, "percentage": 86.06, "elapsed_time": "3:48:23", "remaining_time": "0:37:00", "throughput": 8659.72, "total_tokens": 118666856} +{"current_steps": 176135, "total_steps": 204665, "loss": 0.0, "lr": 1.1607727806718138e-07, "epoch": 4.303007353480077, "percentage": 86.06, "elapsed_time": "3:48:23", "remaining_time": "0:36:59", "throughput": 8659.75, "total_tokens": 118670184} +{"current_steps": 176140, "total_steps": 204665, "loss": 0.0, "lr": 1.1603740266171124e-07, "epoch": 4.303129504311924, "percentage": 86.06, "elapsed_time": "3:48:23", "remaining_time": "0:36:59", "throughput": 8659.77, "total_tokens": 118673384} +{"current_steps": 176145, "total_steps": 204665, "loss": 0.0, "lr": 1.1599753368465515e-07, "epoch": 4.303251655143772, "percentage": 86.07, "elapsed_time": "3:48:24", "remaining_time": "0:36:58", "throughput": 8659.79, "total_tokens": 118676776} +{"current_steps": 176150, "total_steps": 204665, "loss": 0.0, "lr": 1.159576711363025e-07, "epoch": 4.303373805975618, "percentage": 86.07, "elapsed_time": "3:48:24", "remaining_time": "0:36:58", "throughput": 8659.81, "total_tokens": 118679912} +{"current_steps": 176155, "total_steps": 204665, "loss": 0.0, "lr": 1.1591781501694365e-07, "epoch": 4.303495956807466, "percentage": 86.07, "elapsed_time": "3:48:25", "remaining_time": "0:36:58", "throughput": 8659.81, "total_tokens": 118682984} +{"current_steps": 176160, "total_steps": 204665, "loss": 0.0, "lr": 1.15877965326868e-07, "epoch": 4.303618107639313, "percentage": 86.07, "elapsed_time": "3:48:25", "remaining_time": "0:36:57", "throughput": 8659.84, "total_tokens": 118686248} +{"current_steps": 176165, "total_steps": 204665, "loss": 0.0, "lr": 1.1583812206636556e-07, "epoch": 4.30374025847116, "percentage": 86.07, "elapsed_time": "3:48:25", "remaining_time": "0:36:57", "throughput": 8659.85, "total_tokens": 118689448} +{"current_steps": 176170, "total_steps": 204665, "loss": 0.0, "lr": 1.1579828523572632e-07, "epoch": 4.303862409303007, "percentage": 86.08, "elapsed_time": "3:48:26", "remaining_time": "0:36:56", "throughput": 8659.88, "total_tokens": 118692840} +{"current_steps": 176175, "total_steps": 204665, "loss": 0.0, "lr": 1.157584548352396e-07, "epoch": 4.303984560134855, "percentage": 86.08, "elapsed_time": "3:48:26", "remaining_time": "0:36:56", "throughput": 8659.9, "total_tokens": 118696040} +{"current_steps": 176180, "total_steps": 204665, "loss": 0.0, "lr": 1.157186308651955e-07, "epoch": 4.3041067109667015, "percentage": 86.08, "elapsed_time": "3:48:26", "remaining_time": "0:36:56", "throughput": 8659.91, "total_tokens": 118699240} +{"current_steps": 176185, "total_steps": 204665, "loss": 0.0, "lr": 1.1567881332588303e-07, "epoch": 4.304228861798549, "percentage": 86.08, "elapsed_time": "3:48:27", "remaining_time": "0:36:55", "throughput": 8659.93, "total_tokens": 118702376} +{"current_steps": 176190, "total_steps": 204665, "loss": 0.0, "lr": 1.1563900221759238e-07, "epoch": 4.304351012630396, "percentage": 86.09, "elapsed_time": "3:48:27", "remaining_time": "0:36:55", "throughput": 8659.95, "total_tokens": 118705704} +{"current_steps": 176195, "total_steps": 204665, "loss": 0.0, "lr": 1.1559919754061253e-07, "epoch": 4.3044731634622435, "percentage": 86.09, "elapsed_time": "3:48:27", "remaining_time": "0:36:54", "throughput": 8659.98, "total_tokens": 118709032} +{"current_steps": 176200, "total_steps": 204665, "loss": 0.0, "lr": 1.155593992952334e-07, "epoch": 4.30459531429409, "percentage": 86.09, "elapsed_time": "3:48:28", "remaining_time": "0:36:54", "throughput": 8660.02, "total_tokens": 118712680} +{"current_steps": 176205, "total_steps": 204665, "loss": 0.0, "lr": 1.1551960748174405e-07, "epoch": 4.304717465125938, "percentage": 86.09, "elapsed_time": "3:48:28", "remaining_time": "0:36:54", "throughput": 8660.04, "total_tokens": 118716008} +{"current_steps": 176210, "total_steps": 204665, "loss": 0.0, "lr": 1.1547982210043417e-07, "epoch": 4.304839615957785, "percentage": 86.1, "elapsed_time": "3:48:28", "remaining_time": "0:36:53", "throughput": 8660.08, "total_tokens": 118719528} +{"current_steps": 176215, "total_steps": 204665, "loss": 0.0, "lr": 1.1544004315159284e-07, "epoch": 4.304961766789631, "percentage": 86.1, "elapsed_time": "3:48:29", "remaining_time": "0:36:53", "throughput": 8660.13, "total_tokens": 118723240} +{"current_steps": 176220, "total_steps": 204665, "loss": 0.0, "lr": 1.1540027063550939e-07, "epoch": 4.305083917621479, "percentage": 86.1, "elapsed_time": "3:48:29", "remaining_time": "0:36:52", "throughput": 8660.13, "total_tokens": 118726248} +{"current_steps": 176225, "total_steps": 204665, "loss": 0.0, "lr": 1.1536050455247304e-07, "epoch": 4.305206068453326, "percentage": 86.1, "elapsed_time": "3:48:29", "remaining_time": "0:36:52", "throughput": 8660.15, "total_tokens": 118729512} +{"current_steps": 176230, "total_steps": 204665, "loss": 0.0, "lr": 1.1532074490277321e-07, "epoch": 4.305328219285173, "percentage": 86.11, "elapsed_time": "3:48:30", "remaining_time": "0:36:52", "throughput": 8660.16, "total_tokens": 118732584} +{"current_steps": 176235, "total_steps": 204665, "loss": 0.0, "lr": 1.152809916866987e-07, "epoch": 4.30545037011702, "percentage": 86.11, "elapsed_time": "3:48:30", "remaining_time": "0:36:51", "throughput": 8660.17, "total_tokens": 118735656} +{"current_steps": 176240, "total_steps": 204665, "loss": 0.0, "lr": 1.152412449045389e-07, "epoch": 4.305572520948868, "percentage": 86.11, "elapsed_time": "3:48:30", "remaining_time": "0:36:51", "throughput": 8660.22, "total_tokens": 118739368} +{"current_steps": 176245, "total_steps": 204665, "loss": 0.0794, "lr": 1.1520150455658261e-07, "epoch": 4.3056946717807145, "percentage": 86.11, "elapsed_time": "3:48:31", "remaining_time": "0:36:50", "throughput": 8660.24, "total_tokens": 118742632} +{"current_steps": 176250, "total_steps": 204665, "loss": 0.0, "lr": 1.1516177064311916e-07, "epoch": 4.305816822612562, "percentage": 86.12, "elapsed_time": "3:48:31", "remaining_time": "0:36:50", "throughput": 8660.28, "total_tokens": 118746280} +{"current_steps": 176255, "total_steps": 204665, "loss": 0.0, "lr": 1.1512204316443719e-07, "epoch": 4.305938973444409, "percentage": 86.12, "elapsed_time": "3:48:31", "remaining_time": "0:36:50", "throughput": 8660.31, "total_tokens": 118749608} +{"current_steps": 176260, "total_steps": 204665, "loss": 0.0, "lr": 1.1508232212082559e-07, "epoch": 4.3060611242762565, "percentage": 86.12, "elapsed_time": "3:48:32", "remaining_time": "0:36:49", "throughput": 8660.33, "total_tokens": 118752936} +{"current_steps": 176265, "total_steps": 204665, "loss": 0.0, "lr": 1.1504260751257366e-07, "epoch": 4.306183275108103, "percentage": 86.12, "elapsed_time": "3:48:32", "remaining_time": "0:36:49", "throughput": 8660.38, "total_tokens": 118756648} +{"current_steps": 176270, "total_steps": 204665, "loss": 0.0, "lr": 1.1500289933996965e-07, "epoch": 4.306305425939951, "percentage": 86.13, "elapsed_time": "3:48:33", "remaining_time": "0:36:49", "throughput": 8660.43, "total_tokens": 118760488} +{"current_steps": 176275, "total_steps": 204665, "loss": 0.0, "lr": 1.1496319760330276e-07, "epoch": 4.306427576771798, "percentage": 86.13, "elapsed_time": "3:48:33", "remaining_time": "0:36:48", "throughput": 8660.46, "total_tokens": 118763944} +{"current_steps": 176280, "total_steps": 204665, "loss": 0.0, "lr": 1.149235023028614e-07, "epoch": 4.306549727603645, "percentage": 86.13, "elapsed_time": "3:48:33", "remaining_time": "0:36:48", "throughput": 8660.48, "total_tokens": 118767272} +{"current_steps": 176285, "total_steps": 204665, "loss": 0.0001, "lr": 1.1488381343893461e-07, "epoch": 4.306671878435492, "percentage": 86.13, "elapsed_time": "3:48:34", "remaining_time": "0:36:47", "throughput": 8660.53, "total_tokens": 118771048} +{"current_steps": 176290, "total_steps": 204665, "loss": 0.0, "lr": 1.1484413101181057e-07, "epoch": 4.30679402926734, "percentage": 86.14, "elapsed_time": "3:48:34", "remaining_time": "0:36:47", "throughput": 8660.54, "total_tokens": 118774120} +{"current_steps": 176295, "total_steps": 204665, "loss": 0.0, "lr": 1.1480445502177805e-07, "epoch": 4.306916180099186, "percentage": 86.14, "elapsed_time": "3:48:34", "remaining_time": "0:36:47", "throughput": 8660.57, "total_tokens": 118777576} +{"current_steps": 176300, "total_steps": 204665, "loss": 0.0, "lr": 1.1476478546912582e-07, "epoch": 4.307038330931034, "percentage": 86.14, "elapsed_time": "3:48:35", "remaining_time": "0:36:46", "throughput": 8660.65, "total_tokens": 118781864} +{"current_steps": 176305, "total_steps": 204665, "loss": 0.0, "lr": 1.14725122354142e-07, "epoch": 4.307160481762881, "percentage": 86.14, "elapsed_time": "3:48:35", "remaining_time": "0:36:46", "throughput": 8660.69, "total_tokens": 118785448} +{"current_steps": 176310, "total_steps": 204665, "loss": 0.0, "lr": 1.1468546567711545e-07, "epoch": 4.3072826325947275, "percentage": 86.15, "elapsed_time": "3:48:35", "remaining_time": "0:36:45", "throughput": 8660.69, "total_tokens": 118788392} +{"current_steps": 176315, "total_steps": 204665, "loss": 0.0, "lr": 1.1464581543833429e-07, "epoch": 4.307404783426575, "percentage": 86.15, "elapsed_time": "3:48:36", "remaining_time": "0:36:45", "throughput": 8660.7, "total_tokens": 118791464} +{"current_steps": 176320, "total_steps": 204665, "loss": 0.0, "lr": 1.1460617163808661e-07, "epoch": 4.307526934258422, "percentage": 86.15, "elapsed_time": "3:48:36", "remaining_time": "0:36:45", "throughput": 8660.76, "total_tokens": 118795432} +{"current_steps": 176325, "total_steps": 204665, "loss": 0.0465, "lr": 1.145665342766613e-07, "epoch": 4.30764908509027, "percentage": 86.15, "elapsed_time": "3:48:36", "remaining_time": "0:36:44", "throughput": 8660.8, "total_tokens": 118798952} +{"current_steps": 176330, "total_steps": 204665, "loss": 0.0, "lr": 1.14526903354346e-07, "epoch": 4.307771235922116, "percentage": 86.16, "elapsed_time": "3:48:37", "remaining_time": "0:36:44", "throughput": 8660.8, "total_tokens": 118801960} +{"current_steps": 176335, "total_steps": 204665, "loss": 0.0, "lr": 1.1448727887142951e-07, "epoch": 4.307893386753964, "percentage": 86.16, "elapsed_time": "3:48:37", "remaining_time": "0:36:43", "throughput": 8660.83, "total_tokens": 118805288} +{"current_steps": 176340, "total_steps": 204665, "loss": 0.0, "lr": 1.1444766082819945e-07, "epoch": 4.308015537585811, "percentage": 86.16, "elapsed_time": "3:48:37", "remaining_time": "0:36:43", "throughput": 8660.84, "total_tokens": 118808488} +{"current_steps": 176345, "total_steps": 204665, "loss": 0.0, "lr": 1.144080492249444e-07, "epoch": 4.308137688417658, "percentage": 86.16, "elapsed_time": "3:48:38", "remaining_time": "0:36:43", "throughput": 8660.85, "total_tokens": 118811624} +{"current_steps": 176350, "total_steps": 204665, "loss": 0.0, "lr": 1.1436844406195211e-07, "epoch": 4.308259839249505, "percentage": 86.17, "elapsed_time": "3:48:38", "remaining_time": "0:36:42", "throughput": 8660.87, "total_tokens": 118814824} +{"current_steps": 176355, "total_steps": 204665, "loss": 0.0213, "lr": 1.1432884533951059e-07, "epoch": 4.308381990081353, "percentage": 86.17, "elapsed_time": "3:48:38", "remaining_time": "0:36:42", "throughput": 8660.87, "total_tokens": 118817896} +{"current_steps": 176360, "total_steps": 204665, "loss": 0.0, "lr": 1.1428925305790815e-07, "epoch": 4.308504140913199, "percentage": 86.17, "elapsed_time": "3:48:39", "remaining_time": "0:36:41", "throughput": 8660.91, "total_tokens": 118821416} +{"current_steps": 176365, "total_steps": 204665, "loss": 0.0, "lr": 1.1424966721743224e-07, "epoch": 4.308626291745047, "percentage": 86.17, "elapsed_time": "3:48:39", "remaining_time": "0:36:41", "throughput": 8660.93, "total_tokens": 118824808} +{"current_steps": 176370, "total_steps": 204665, "loss": 0.0, "lr": 1.1421008781837127e-07, "epoch": 4.308748442576894, "percentage": 86.17, "elapsed_time": "3:48:39", "remaining_time": "0:36:41", "throughput": 8660.97, "total_tokens": 118828392} +{"current_steps": 176375, "total_steps": 204665, "loss": 0.0, "lr": 1.1417051486101248e-07, "epoch": 4.3088705934087415, "percentage": 86.18, "elapsed_time": "3:48:40", "remaining_time": "0:36:40", "throughput": 8660.99, "total_tokens": 118831656} +{"current_steps": 176380, "total_steps": 204665, "loss": 0.0, "lr": 1.1413094834564408e-07, "epoch": 4.308992744240588, "percentage": 86.18, "elapsed_time": "3:48:40", "remaining_time": "0:36:40", "throughput": 8661.02, "total_tokens": 118835048} +{"current_steps": 176385, "total_steps": 204665, "loss": 0.0, "lr": 1.1409138827255382e-07, "epoch": 4.309114895072436, "percentage": 86.18, "elapsed_time": "3:48:41", "remaining_time": "0:36:39", "throughput": 8661.04, "total_tokens": 118838376} +{"current_steps": 176390, "total_steps": 204665, "loss": 0.0, "lr": 1.1405183464202916e-07, "epoch": 4.309237045904283, "percentage": 86.18, "elapsed_time": "3:48:41", "remaining_time": "0:36:39", "throughput": 8661.06, "total_tokens": 118841512} +{"current_steps": 176395, "total_steps": 204665, "loss": 0.0, "lr": 1.1401228745435799e-07, "epoch": 4.30935919673613, "percentage": 86.19, "elapsed_time": "3:48:41", "remaining_time": "0:36:39", "throughput": 8661.07, "total_tokens": 118844648} +{"current_steps": 176400, "total_steps": 204665, "loss": 0.0, "lr": 1.1397274670982748e-07, "epoch": 4.309481347567977, "percentage": 86.19, "elapsed_time": "3:48:42", "remaining_time": "0:36:38", "throughput": 8661.09, "total_tokens": 118847848} +{"current_steps": 176405, "total_steps": 204665, "loss": 0.0, "lr": 1.1393321240872578e-07, "epoch": 4.309603498399824, "percentage": 86.19, "elapsed_time": "3:48:42", "remaining_time": "0:36:38", "throughput": 8661.1, "total_tokens": 118850984} +{"current_steps": 176410, "total_steps": 204665, "loss": 0.0, "lr": 1.1389368455133985e-07, "epoch": 4.309725649231671, "percentage": 86.19, "elapsed_time": "3:48:42", "remaining_time": "0:36:37", "throughput": 8661.14, "total_tokens": 118854568} +{"current_steps": 176415, "total_steps": 204665, "loss": 0.0, "lr": 1.138541631379576e-07, "epoch": 4.309847800063518, "percentage": 86.2, "elapsed_time": "3:48:43", "remaining_time": "0:36:37", "throughput": 8661.17, "total_tokens": 118858088} +{"current_steps": 176420, "total_steps": 204665, "loss": 0.0, "lr": 1.138146481688662e-07, "epoch": 4.309969950895366, "percentage": 86.2, "elapsed_time": "3:48:43", "remaining_time": "0:36:37", "throughput": 8661.19, "total_tokens": 118861416} +{"current_steps": 176425, "total_steps": 204665, "loss": 0.0, "lr": 1.1377513964435292e-07, "epoch": 4.3100921017272125, "percentage": 86.2, "elapsed_time": "3:48:43", "remaining_time": "0:36:36", "throughput": 8661.23, "total_tokens": 118865064} +{"current_steps": 176430, "total_steps": 204665, "loss": 0.0, "lr": 1.1373563756470527e-07, "epoch": 4.31021425255906, "percentage": 86.2, "elapsed_time": "3:48:44", "remaining_time": "0:36:36", "throughput": 8661.26, "total_tokens": 118868520} +{"current_steps": 176435, "total_steps": 204665, "loss": 0.0, "lr": 1.1369614193021027e-07, "epoch": 4.310336403390907, "percentage": 86.21, "elapsed_time": "3:48:44", "remaining_time": "0:36:35", "throughput": 8661.28, "total_tokens": 118871720} +{"current_steps": 176440, "total_steps": 204665, "loss": 0.0, "lr": 1.1365665274115554e-07, "epoch": 4.3104585542227545, "percentage": 86.21, "elapsed_time": "3:48:44", "remaining_time": "0:36:35", "throughput": 8661.29, "total_tokens": 118874792} +{"current_steps": 176445, "total_steps": 204665, "loss": 0.0, "lr": 1.1361716999782778e-07, "epoch": 4.310580705054601, "percentage": 86.21, "elapsed_time": "3:48:45", "remaining_time": "0:36:35", "throughput": 8661.32, "total_tokens": 118878248} +{"current_steps": 176450, "total_steps": 204665, "loss": 0.0, "lr": 1.135776937005144e-07, "epoch": 4.310702855886449, "percentage": 86.21, "elapsed_time": "3:48:45", "remaining_time": "0:36:34", "throughput": 8661.33, "total_tokens": 118881448} +{"current_steps": 176455, "total_steps": 204665, "loss": 0.0, "lr": 1.1353822384950263e-07, "epoch": 4.310825006718296, "percentage": 86.22, "elapsed_time": "3:48:45", "remaining_time": "0:36:34", "throughput": 8661.37, "total_tokens": 118885096} +{"current_steps": 176460, "total_steps": 204665, "loss": 0.0, "lr": 1.1349876044507922e-07, "epoch": 4.310947157550143, "percentage": 86.22, "elapsed_time": "3:48:46", "remaining_time": "0:36:33", "throughput": 8661.41, "total_tokens": 118888616} +{"current_steps": 176465, "total_steps": 204665, "loss": 0.0, "lr": 1.134593034875313e-07, "epoch": 4.31106930838199, "percentage": 86.22, "elapsed_time": "3:48:46", "remaining_time": "0:36:33", "throughput": 8661.44, "total_tokens": 118892072} +{"current_steps": 176470, "total_steps": 204665, "loss": 0.0001, "lr": 1.1341985297714573e-07, "epoch": 4.311191459213838, "percentage": 86.22, "elapsed_time": "3:48:46", "remaining_time": "0:36:33", "throughput": 8661.46, "total_tokens": 118895272} +{"current_steps": 176475, "total_steps": 204665, "loss": 0.0, "lr": 1.1338040891420941e-07, "epoch": 4.311313610045684, "percentage": 86.23, "elapsed_time": "3:48:47", "remaining_time": "0:36:32", "throughput": 8661.5, "total_tokens": 118898920} +{"current_steps": 176480, "total_steps": 204665, "loss": 0.0, "lr": 1.1334097129900932e-07, "epoch": 4.311435760877531, "percentage": 86.23, "elapsed_time": "3:48:47", "remaining_time": "0:36:32", "throughput": 8661.54, "total_tokens": 118902568} +{"current_steps": 176485, "total_steps": 204665, "loss": 0.0, "lr": 1.1330154013183213e-07, "epoch": 4.311557911709379, "percentage": 86.23, "elapsed_time": "3:48:48", "remaining_time": "0:36:31", "throughput": 8661.57, "total_tokens": 118906024} +{"current_steps": 176490, "total_steps": 204665, "loss": 0.0, "lr": 1.1326211541296471e-07, "epoch": 4.311680062541226, "percentage": 86.23, "elapsed_time": "3:48:48", "remaining_time": "0:36:31", "throughput": 8661.59, "total_tokens": 118909288} +{"current_steps": 176495, "total_steps": 204665, "loss": 0.0, "lr": 1.1322269714269361e-07, "epoch": 4.311802213373073, "percentage": 86.24, "elapsed_time": "3:48:48", "remaining_time": "0:36:31", "throughput": 8661.59, "total_tokens": 118912360} +{"current_steps": 176500, "total_steps": 204665, "loss": 0.0, "lr": 1.1318328532130561e-07, "epoch": 4.31192436420492, "percentage": 86.24, "elapsed_time": "3:48:49", "remaining_time": "0:36:30", "throughput": 8661.61, "total_tokens": 118915496} +{"current_steps": 176505, "total_steps": 204665, "loss": 0.0, "lr": 1.1314387994908726e-07, "epoch": 4.3120465150367675, "percentage": 86.24, "elapsed_time": "3:48:49", "remaining_time": "0:36:30", "throughput": 8661.62, "total_tokens": 118918632} +{"current_steps": 176510, "total_steps": 204665, "loss": 0.0, "lr": 1.1310448102632519e-07, "epoch": 4.312168665868614, "percentage": 86.24, "elapsed_time": "3:48:49", "remaining_time": "0:36:30", "throughput": 8661.62, "total_tokens": 118921640} +{"current_steps": 176515, "total_steps": 204665, "loss": 0.0, "lr": 1.1306508855330576e-07, "epoch": 4.312290816700462, "percentage": 86.25, "elapsed_time": "3:48:50", "remaining_time": "0:36:29", "throughput": 8661.66, "total_tokens": 118925224} +{"current_steps": 176520, "total_steps": 204665, "loss": 0.0, "lr": 1.1302570253031573e-07, "epoch": 4.312412967532309, "percentage": 86.25, "elapsed_time": "3:48:50", "remaining_time": "0:36:29", "throughput": 8661.67, "total_tokens": 118928360} +{"current_steps": 176525, "total_steps": 204665, "loss": 0.0, "lr": 1.1298632295764143e-07, "epoch": 4.312535118364156, "percentage": 86.25, "elapsed_time": "3:48:50", "remaining_time": "0:36:28", "throughput": 8661.68, "total_tokens": 118931432} +{"current_steps": 176530, "total_steps": 204665, "loss": 0.0, "lr": 1.1294694983556896e-07, "epoch": 4.312657269196003, "percentage": 86.25, "elapsed_time": "3:48:51", "remaining_time": "0:36:28", "throughput": 8661.71, "total_tokens": 118934824} +{"current_steps": 176535, "total_steps": 204665, "loss": 0.0, "lr": 1.1290758316438476e-07, "epoch": 4.312779420027851, "percentage": 86.26, "elapsed_time": "3:48:51", "remaining_time": "0:36:28", "throughput": 8661.71, "total_tokens": 118937832} +{"current_steps": 176540, "total_steps": 204665, "loss": 0.0, "lr": 1.1286822294437548e-07, "epoch": 4.312901570859697, "percentage": 86.26, "elapsed_time": "3:48:51", "remaining_time": "0:36:27", "throughput": 8661.72, "total_tokens": 118940904} +{"current_steps": 176545, "total_steps": 204665, "loss": 0.0, "lr": 1.128288691758269e-07, "epoch": 4.313023721691545, "percentage": 86.26, "elapsed_time": "3:48:52", "remaining_time": "0:36:27", "throughput": 8661.76, "total_tokens": 118944552} +{"current_steps": 176550, "total_steps": 204665, "loss": 0.0001, "lr": 1.1278952185902557e-07, "epoch": 4.313145872523392, "percentage": 86.26, "elapsed_time": "3:48:52", "remaining_time": "0:36:26", "throughput": 8661.8, "total_tokens": 118948136} +{"current_steps": 176555, "total_steps": 204665, "loss": 0.0, "lr": 1.1275018099425738e-07, "epoch": 4.313268023355239, "percentage": 86.27, "elapsed_time": "3:48:52", "remaining_time": "0:36:26", "throughput": 8661.84, "total_tokens": 118951656} +{"current_steps": 176560, "total_steps": 204665, "loss": 0.0203, "lr": 1.1271084658180862e-07, "epoch": 4.313390174187086, "percentage": 86.27, "elapsed_time": "3:48:53", "remaining_time": "0:36:26", "throughput": 8661.86, "total_tokens": 118954856} +{"current_steps": 176565, "total_steps": 204665, "loss": 0.0, "lr": 1.1267151862196501e-07, "epoch": 4.313512325018934, "percentage": 86.27, "elapsed_time": "3:48:53", "remaining_time": "0:36:25", "throughput": 8661.87, "total_tokens": 118958056} +{"current_steps": 176570, "total_steps": 204665, "loss": 0.0002, "lr": 1.1263219711501282e-07, "epoch": 4.3136344758507805, "percentage": 86.27, "elapsed_time": "3:48:53", "remaining_time": "0:36:25", "throughput": 8661.9, "total_tokens": 118961448} +{"current_steps": 176575, "total_steps": 204665, "loss": 0.0, "lr": 1.1259288206123818e-07, "epoch": 4.313756626682627, "percentage": 86.28, "elapsed_time": "3:48:54", "remaining_time": "0:36:24", "throughput": 8661.96, "total_tokens": 118965416} +{"current_steps": 176580, "total_steps": 204665, "loss": 0.0641, "lr": 1.1255357346092653e-07, "epoch": 4.313878777514475, "percentage": 86.28, "elapsed_time": "3:48:54", "remaining_time": "0:36:24", "throughput": 8661.97, "total_tokens": 118968552} +{"current_steps": 176585, "total_steps": 204665, "loss": 0.0, "lr": 1.125142713143642e-07, "epoch": 4.314000928346322, "percentage": 86.28, "elapsed_time": "3:48:54", "remaining_time": "0:36:24", "throughput": 8661.99, "total_tokens": 118971880} +{"current_steps": 176590, "total_steps": 204665, "loss": 0.0, "lr": 1.1247497562183661e-07, "epoch": 4.314123079178169, "percentage": 86.28, "elapsed_time": "3:48:55", "remaining_time": "0:36:23", "throughput": 8662.04, "total_tokens": 118975592} +{"current_steps": 176595, "total_steps": 204665, "loss": 0.0001, "lr": 1.1243568638362988e-07, "epoch": 4.314245230010016, "percentage": 86.28, "elapsed_time": "3:48:55", "remaining_time": "0:36:23", "throughput": 8662.09, "total_tokens": 118979432} +{"current_steps": 176600, "total_steps": 204665, "loss": 0.0, "lr": 1.1239640360002945e-07, "epoch": 4.314367380841864, "percentage": 86.29, "elapsed_time": "3:48:56", "remaining_time": "0:36:22", "throughput": 8662.12, "total_tokens": 118982952} +{"current_steps": 176605, "total_steps": 204665, "loss": 0.0, "lr": 1.1235712727132107e-07, "epoch": 4.31448953167371, "percentage": 86.29, "elapsed_time": "3:48:56", "remaining_time": "0:36:22", "throughput": 8662.13, "total_tokens": 118986024} +{"current_steps": 176610, "total_steps": 204665, "loss": 0.0, "lr": 1.1231785739779065e-07, "epoch": 4.314611682505558, "percentage": 86.29, "elapsed_time": "3:48:56", "remaining_time": "0:36:22", "throughput": 8662.13, "total_tokens": 118988904} +{"current_steps": 176615, "total_steps": 204665, "loss": 0.0, "lr": 1.1227859397972328e-07, "epoch": 4.314733833337405, "percentage": 86.29, "elapsed_time": "3:48:57", "remaining_time": "0:36:21", "throughput": 8662.15, "total_tokens": 118992232} +{"current_steps": 176620, "total_steps": 204665, "loss": 0.0, "lr": 1.1223933701740484e-07, "epoch": 4.314855984169252, "percentage": 86.3, "elapsed_time": "3:48:57", "remaining_time": "0:36:21", "throughput": 8662.17, "total_tokens": 118995432} +{"current_steps": 176625, "total_steps": 204665, "loss": 0.0, "lr": 1.1220008651112089e-07, "epoch": 4.314978135001099, "percentage": 86.3, "elapsed_time": "3:48:57", "remaining_time": "0:36:20", "throughput": 8662.19, "total_tokens": 118998760} +{"current_steps": 176630, "total_steps": 204665, "loss": 0.0, "lr": 1.1216084246115642e-07, "epoch": 4.315100285832947, "percentage": 86.3, "elapsed_time": "3:48:58", "remaining_time": "0:36:20", "throughput": 8662.22, "total_tokens": 119002216} +{"current_steps": 176635, "total_steps": 204665, "loss": 0.0002, "lr": 1.1212160486779732e-07, "epoch": 4.3152224366647935, "percentage": 86.3, "elapsed_time": "3:48:58", "remaining_time": "0:36:20", "throughput": 8662.23, "total_tokens": 119005288} +{"current_steps": 176640, "total_steps": 204665, "loss": 0.0, "lr": 1.1208237373132845e-07, "epoch": 4.315344587496641, "percentage": 86.31, "elapsed_time": "3:48:58", "remaining_time": "0:36:19", "throughput": 8662.25, "total_tokens": 119008552} +{"current_steps": 176645, "total_steps": 204665, "loss": 0.0, "lr": 1.1204314905203571e-07, "epoch": 4.315466738328488, "percentage": 86.31, "elapsed_time": "3:48:59", "remaining_time": "0:36:19", "throughput": 8662.28, "total_tokens": 119011880} +{"current_steps": 176650, "total_steps": 204665, "loss": 0.0, "lr": 1.1200393083020376e-07, "epoch": 4.315588889160336, "percentage": 86.31, "elapsed_time": "3:48:59", "remaining_time": "0:36:18", "throughput": 8662.31, "total_tokens": 119015336} +{"current_steps": 176655, "total_steps": 204665, "loss": 0.0, "lr": 1.1196471906611826e-07, "epoch": 4.315711039992182, "percentage": 86.31, "elapsed_time": "3:48:59", "remaining_time": "0:36:18", "throughput": 8662.33, "total_tokens": 119018728} +{"current_steps": 176660, "total_steps": 204665, "loss": 0.0, "lr": 1.1192551376006398e-07, "epoch": 4.31583319082403, "percentage": 86.32, "elapsed_time": "3:49:00", "remaining_time": "0:36:18", "throughput": 8662.36, "total_tokens": 119022056} +{"current_steps": 176665, "total_steps": 204665, "loss": 0.0, "lr": 1.1188631491232626e-07, "epoch": 4.315955341655877, "percentage": 86.32, "elapsed_time": "3:49:00", "remaining_time": "0:36:17", "throughput": 8662.38, "total_tokens": 119025448} +{"current_steps": 176670, "total_steps": 204665, "loss": 0.0, "lr": 1.1184712252319028e-07, "epoch": 4.316077492487723, "percentage": 86.32, "elapsed_time": "3:49:00", "remaining_time": "0:36:17", "throughput": 8662.45, "total_tokens": 119029544} +{"current_steps": 176675, "total_steps": 204665, "loss": 0.0, "lr": 1.1180793659294074e-07, "epoch": 4.316199643319571, "percentage": 86.32, "elapsed_time": "3:49:01", "remaining_time": "0:36:16", "throughput": 8662.47, "total_tokens": 119032808} +{"current_steps": 176680, "total_steps": 204665, "loss": 0.0, "lr": 1.1176875712186295e-07, "epoch": 4.316321794151418, "percentage": 86.33, "elapsed_time": "3:49:01", "remaining_time": "0:36:16", "throughput": 8662.51, "total_tokens": 119036328} +{"current_steps": 176685, "total_steps": 204665, "loss": 0.0029, "lr": 1.1172958411024147e-07, "epoch": 4.3164439449832654, "percentage": 86.33, "elapsed_time": "3:49:01", "remaining_time": "0:36:16", "throughput": 8662.54, "total_tokens": 119039848} +{"current_steps": 176690, "total_steps": 204665, "loss": 0.0, "lr": 1.116904175583614e-07, "epoch": 4.316566095815112, "percentage": 86.33, "elapsed_time": "3:49:02", "remaining_time": "0:36:15", "throughput": 8662.56, "total_tokens": 119042984} +{"current_steps": 176695, "total_steps": 204665, "loss": 0.0143, "lr": 1.1165125746650771e-07, "epoch": 4.31668824664696, "percentage": 86.33, "elapsed_time": "3:49:02", "remaining_time": "0:36:15", "throughput": 8662.6, "total_tokens": 119046760} +{"current_steps": 176700, "total_steps": 204665, "loss": 0.0001, "lr": 1.1161210383496478e-07, "epoch": 4.316810397478807, "percentage": 86.34, "elapsed_time": "3:49:02", "remaining_time": "0:36:14", "throughput": 8662.62, "total_tokens": 119049960} +{"current_steps": 176705, "total_steps": 204665, "loss": 0.0, "lr": 1.1157295666401789e-07, "epoch": 4.316932548310654, "percentage": 86.34, "elapsed_time": "3:49:03", "remaining_time": "0:36:14", "throughput": 8662.63, "total_tokens": 119052968} +{"current_steps": 176710, "total_steps": 204665, "loss": 0.0, "lr": 1.1153381595395117e-07, "epoch": 4.317054699142501, "percentage": 86.34, "elapsed_time": "3:49:03", "remaining_time": "0:36:14", "throughput": 8662.65, "total_tokens": 119056232} +{"current_steps": 176715, "total_steps": 204665, "loss": 0.0169, "lr": 1.114946817050496e-07, "epoch": 4.317176849974349, "percentage": 86.34, "elapsed_time": "3:49:03", "remaining_time": "0:36:13", "throughput": 8662.69, "total_tokens": 119059880} +{"current_steps": 176720, "total_steps": 204665, "loss": 0.0, "lr": 1.1145555391759764e-07, "epoch": 4.317299000806195, "percentage": 86.35, "elapsed_time": "3:49:04", "remaining_time": "0:36:13", "throughput": 8662.7, "total_tokens": 119062952} +{"current_steps": 176725, "total_steps": 204665, "loss": 0.0, "lr": 1.1141643259187994e-07, "epoch": 4.317421151638043, "percentage": 86.35, "elapsed_time": "3:49:04", "remaining_time": "0:36:13", "throughput": 8662.71, "total_tokens": 119066088} +{"current_steps": 176730, "total_steps": 204665, "loss": 0.0, "lr": 1.1137731772818105e-07, "epoch": 4.31754330246989, "percentage": 86.35, "elapsed_time": "3:49:05", "remaining_time": "0:36:12", "throughput": 8662.73, "total_tokens": 119069480} +{"current_steps": 176735, "total_steps": 204665, "loss": 0.0, "lr": 1.1133820932678506e-07, "epoch": 4.317665453301737, "percentage": 86.35, "elapsed_time": "3:49:05", "remaining_time": "0:36:12", "throughput": 8662.75, "total_tokens": 119072680} +{"current_steps": 176740, "total_steps": 204665, "loss": 0.0, "lr": 1.1129910738797688e-07, "epoch": 4.317787604133584, "percentage": 86.36, "elapsed_time": "3:49:05", "remaining_time": "0:36:11", "throughput": 8662.78, "total_tokens": 119076200} +{"current_steps": 176745, "total_steps": 204665, "loss": 0.0, "lr": 1.1126001191204038e-07, "epoch": 4.317909754965432, "percentage": 86.36, "elapsed_time": "3:49:06", "remaining_time": "0:36:11", "throughput": 8662.78, "total_tokens": 119079016} +{"current_steps": 176750, "total_steps": 204665, "loss": 0.0, "lr": 1.1122092289926033e-07, "epoch": 4.3180319057972785, "percentage": 86.36, "elapsed_time": "3:49:06", "remaining_time": "0:36:11", "throughput": 8662.8, "total_tokens": 119082280} +{"current_steps": 176755, "total_steps": 204665, "loss": 0.0, "lr": 1.1118184034992062e-07, "epoch": 4.318154056629126, "percentage": 86.36, "elapsed_time": "3:49:06", "remaining_time": "0:36:10", "throughput": 8662.82, "total_tokens": 119085544} +{"current_steps": 176760, "total_steps": 204665, "loss": 0.0, "lr": 1.1114276426430558e-07, "epoch": 4.318276207460973, "percentage": 86.37, "elapsed_time": "3:49:07", "remaining_time": "0:36:10", "throughput": 8662.82, "total_tokens": 119088616} +{"current_steps": 176765, "total_steps": 204665, "loss": 0.0001, "lr": 1.1110369464269964e-07, "epoch": 4.31839835829282, "percentage": 86.37, "elapsed_time": "3:49:07", "remaining_time": "0:36:09", "throughput": 8662.9, "total_tokens": 119092776} +{"current_steps": 176770, "total_steps": 204665, "loss": 0.0, "lr": 1.1106463148538659e-07, "epoch": 4.318520509124667, "percentage": 86.37, "elapsed_time": "3:49:07", "remaining_time": "0:36:09", "throughput": 8662.95, "total_tokens": 119096616} +{"current_steps": 176775, "total_steps": 204665, "loss": 0.0, "lr": 1.1102557479265074e-07, "epoch": 4.318642659956514, "percentage": 86.37, "elapsed_time": "3:49:08", "remaining_time": "0:36:09", "throughput": 8662.99, "total_tokens": 119100264} +{"current_steps": 176780, "total_steps": 204665, "loss": 0.0, "lr": 1.1098652456477586e-07, "epoch": 4.318764810788362, "percentage": 86.38, "elapsed_time": "3:49:08", "remaining_time": "0:36:08", "throughput": 8663.01, "total_tokens": 119103464} +{"current_steps": 176785, "total_steps": 204665, "loss": 0.0, "lr": 1.1094748080204608e-07, "epoch": 4.318886961620208, "percentage": 86.38, "elapsed_time": "3:49:08", "remaining_time": "0:36:08", "throughput": 8663.01, "total_tokens": 119106472} +{"current_steps": 176790, "total_steps": 204665, "loss": 0.0, "lr": 1.1090844350474559e-07, "epoch": 4.319009112452056, "percentage": 86.38, "elapsed_time": "3:49:09", "remaining_time": "0:36:07", "throughput": 8663.03, "total_tokens": 119109736} +{"current_steps": 176795, "total_steps": 204665, "loss": 0.0, "lr": 1.1086941267315775e-07, "epoch": 4.319131263283903, "percentage": 86.38, "elapsed_time": "3:49:09", "remaining_time": "0:36:07", "throughput": 8663.05, "total_tokens": 119112936} +{"current_steps": 176800, "total_steps": 204665, "loss": 0.0632, "lr": 1.1083038830756697e-07, "epoch": 4.31925341411575, "percentage": 86.39, "elapsed_time": "3:49:09", "remaining_time": "0:36:07", "throughput": 8663.07, "total_tokens": 119116264} +{"current_steps": 176805, "total_steps": 204665, "loss": 0.0, "lr": 1.1079137040825648e-07, "epoch": 4.319375564947597, "percentage": 86.39, "elapsed_time": "3:49:10", "remaining_time": "0:36:06", "throughput": 8663.09, "total_tokens": 119119528} +{"current_steps": 176810, "total_steps": 204665, "loss": 0.0595, "lr": 1.107523589755105e-07, "epoch": 4.319497715779445, "percentage": 86.39, "elapsed_time": "3:49:10", "remaining_time": "0:36:06", "throughput": 8663.12, "total_tokens": 119122920} +{"current_steps": 176815, "total_steps": 204665, "loss": 0.0, "lr": 1.1071335400961245e-07, "epoch": 4.3196198666112915, "percentage": 86.39, "elapsed_time": "3:49:10", "remaining_time": "0:36:05", "throughput": 8663.15, "total_tokens": 119126376} +{"current_steps": 176820, "total_steps": 204665, "loss": 0.0, "lr": 1.1067435551084625e-07, "epoch": 4.319742017443139, "percentage": 86.39, "elapsed_time": "3:49:11", "remaining_time": "0:36:05", "throughput": 8663.16, "total_tokens": 119129448} +{"current_steps": 176825, "total_steps": 204665, "loss": 0.0, "lr": 1.1063536347949509e-07, "epoch": 4.319864168274986, "percentage": 86.4, "elapsed_time": "3:49:11", "remaining_time": "0:36:05", "throughput": 8663.2, "total_tokens": 119133096} +{"current_steps": 176830, "total_steps": 204665, "loss": 0.0, "lr": 1.1059637791584298e-07, "epoch": 4.3199863191068335, "percentage": 86.4, "elapsed_time": "3:49:11", "remaining_time": "0:36:04", "throughput": 8663.21, "total_tokens": 119136104} +{"current_steps": 176835, "total_steps": 204665, "loss": 0.0, "lr": 1.1055739882017323e-07, "epoch": 4.32010846993868, "percentage": 86.4, "elapsed_time": "3:49:12", "remaining_time": "0:36:04", "throughput": 8663.23, "total_tokens": 119139432} +{"current_steps": 176840, "total_steps": 204665, "loss": 0.0, "lr": 1.1051842619276918e-07, "epoch": 4.320230620770527, "percentage": 86.4, "elapsed_time": "3:49:12", "remaining_time": "0:36:03", "throughput": 8663.25, "total_tokens": 119142760} +{"current_steps": 176845, "total_steps": 204665, "loss": 0.0, "lr": 1.104794600339145e-07, "epoch": 4.320352771602375, "percentage": 86.41, "elapsed_time": "3:49:13", "remaining_time": "0:36:03", "throughput": 8663.27, "total_tokens": 119145960} +{"current_steps": 176850, "total_steps": 204665, "loss": 0.0638, "lr": 1.104405003438923e-07, "epoch": 4.320474922434221, "percentage": 86.41, "elapsed_time": "3:49:13", "remaining_time": "0:36:03", "throughput": 8663.28, "total_tokens": 119149032} +{"current_steps": 176855, "total_steps": 204665, "loss": 0.0, "lr": 1.1040154712298599e-07, "epoch": 4.320597073266069, "percentage": 86.41, "elapsed_time": "3:49:13", "remaining_time": "0:36:02", "throughput": 8663.31, "total_tokens": 119152552} +{"current_steps": 176860, "total_steps": 204665, "loss": 0.0, "lr": 1.1036260037147915e-07, "epoch": 4.320719224097916, "percentage": 86.41, "elapsed_time": "3:49:14", "remaining_time": "0:36:02", "throughput": 8663.33, "total_tokens": 119155752} +{"current_steps": 176865, "total_steps": 204665, "loss": 0.0, "lr": 1.1032366008965455e-07, "epoch": 4.320841374929763, "percentage": 86.42, "elapsed_time": "3:49:14", "remaining_time": "0:36:01", "throughput": 8663.34, "total_tokens": 119158888} +{"current_steps": 176870, "total_steps": 204665, "loss": 0.0, "lr": 1.1028472627779573e-07, "epoch": 4.32096352576161, "percentage": 86.42, "elapsed_time": "3:49:14", "remaining_time": "0:36:01", "throughput": 8663.35, "total_tokens": 119162088} +{"current_steps": 176875, "total_steps": 204665, "loss": 0.0, "lr": 1.1024579893618547e-07, "epoch": 4.321085676593458, "percentage": 86.42, "elapsed_time": "3:49:15", "remaining_time": "0:36:01", "throughput": 8663.35, "total_tokens": 119165032} +{"current_steps": 176880, "total_steps": 204665, "loss": 0.0, "lr": 1.102068780651072e-07, "epoch": 4.3212078274253045, "percentage": 86.42, "elapsed_time": "3:49:15", "remaining_time": "0:36:00", "throughput": 8663.38, "total_tokens": 119168424} +{"current_steps": 176885, "total_steps": 204665, "loss": 0.0, "lr": 1.1016796366484394e-07, "epoch": 4.321329978257152, "percentage": 86.43, "elapsed_time": "3:49:15", "remaining_time": "0:36:00", "throughput": 8663.39, "total_tokens": 119171560} +{"current_steps": 176890, "total_steps": 204665, "loss": 0.0, "lr": 1.1012905573567843e-07, "epoch": 4.321452129088999, "percentage": 86.43, "elapsed_time": "3:49:16", "remaining_time": "0:35:59", "throughput": 8663.42, "total_tokens": 119174888} +{"current_steps": 176895, "total_steps": 204665, "loss": 0.0, "lr": 1.1009015427789393e-07, "epoch": 4.3215742799208465, "percentage": 86.43, "elapsed_time": "3:49:16", "remaining_time": "0:35:59", "throughput": 8663.46, "total_tokens": 119178536} +{"current_steps": 176900, "total_steps": 204665, "loss": 0.0, "lr": 1.1005125929177306e-07, "epoch": 4.321696430752693, "percentage": 86.43, "elapsed_time": "3:49:16", "remaining_time": "0:35:59", "throughput": 8663.48, "total_tokens": 119181800} +{"current_steps": 176905, "total_steps": 204665, "loss": 0.0, "lr": 1.1001237077759895e-07, "epoch": 4.321818581584541, "percentage": 86.44, "elapsed_time": "3:49:17", "remaining_time": "0:35:58", "throughput": 8663.48, "total_tokens": 119184808} +{"current_steps": 176910, "total_steps": 204665, "loss": 0.0, "lr": 1.0997348873565404e-07, "epoch": 4.321940732416388, "percentage": 86.44, "elapsed_time": "3:49:17", "remaining_time": "0:35:58", "throughput": 8663.51, "total_tokens": 119188264} +{"current_steps": 176915, "total_steps": 204665, "loss": 0.0, "lr": 1.0993461316622132e-07, "epoch": 4.322062883248235, "percentage": 86.44, "elapsed_time": "3:49:17", "remaining_time": "0:35:57", "throughput": 8663.54, "total_tokens": 119191656} +{"current_steps": 176920, "total_steps": 204665, "loss": 0.0, "lr": 1.0989574406958368e-07, "epoch": 4.322185034080082, "percentage": 86.44, "elapsed_time": "3:49:18", "remaining_time": "0:35:57", "throughput": 8663.56, "total_tokens": 119194856} +{"current_steps": 176925, "total_steps": 204665, "loss": 0.0, "lr": 1.0985688144602346e-07, "epoch": 4.32230718491193, "percentage": 86.45, "elapsed_time": "3:49:18", "remaining_time": "0:35:57", "throughput": 8663.61, "total_tokens": 119198760} +{"current_steps": 176930, "total_steps": 204665, "loss": 0.0, "lr": 1.0981802529582362e-07, "epoch": 4.322429335743776, "percentage": 86.45, "elapsed_time": "3:49:18", "remaining_time": "0:35:56", "throughput": 8663.65, "total_tokens": 119202280} +{"current_steps": 176935, "total_steps": 204665, "loss": 0.0, "lr": 1.0977917561926642e-07, "epoch": 4.322551486575623, "percentage": 86.45, "elapsed_time": "3:49:19", "remaining_time": "0:35:56", "throughput": 8663.67, "total_tokens": 119205544} +{"current_steps": 176940, "total_steps": 204665, "loss": 0.0, "lr": 1.0974033241663439e-07, "epoch": 4.322673637407471, "percentage": 86.45, "elapsed_time": "3:49:19", "remaining_time": "0:35:56", "throughput": 8663.69, "total_tokens": 119208808} +{"current_steps": 176945, "total_steps": 204665, "loss": 0.0, "lr": 1.097014956882103e-07, "epoch": 4.3227957882393175, "percentage": 86.46, "elapsed_time": "3:49:19", "remaining_time": "0:35:55", "throughput": 8663.72, "total_tokens": 119212264} +{"current_steps": 176950, "total_steps": 204665, "loss": 0.0, "lr": 1.0966266543427616e-07, "epoch": 4.322917939071165, "percentage": 86.46, "elapsed_time": "3:49:20", "remaining_time": "0:35:55", "throughput": 8663.76, "total_tokens": 119215976} +{"current_steps": 176955, "total_steps": 204665, "loss": 0.0, "lr": 1.0962384165511485e-07, "epoch": 4.323040089903012, "percentage": 86.46, "elapsed_time": "3:49:20", "remaining_time": "0:35:54", "throughput": 8663.8, "total_tokens": 119219560} +{"current_steps": 176960, "total_steps": 204665, "loss": 0.0, "lr": 1.0958502435100814e-07, "epoch": 4.3231622407348596, "percentage": 86.46, "elapsed_time": "3:49:21", "remaining_time": "0:35:54", "throughput": 8663.83, "total_tokens": 119222952} +{"current_steps": 176965, "total_steps": 204665, "loss": 0.0, "lr": 1.0954621352223892e-07, "epoch": 4.323284391566706, "percentage": 86.47, "elapsed_time": "3:49:21", "remaining_time": "0:35:54", "throughput": 8663.86, "total_tokens": 119226472} +{"current_steps": 176970, "total_steps": 204665, "loss": 0.0, "lr": 1.0950740916908896e-07, "epoch": 4.323406542398554, "percentage": 86.47, "elapsed_time": "3:49:21", "remaining_time": "0:35:53", "throughput": 8663.87, "total_tokens": 119229544} +{"current_steps": 176975, "total_steps": 204665, "loss": 0.0, "lr": 1.0946861129184048e-07, "epoch": 4.323528693230401, "percentage": 86.47, "elapsed_time": "3:49:22", "remaining_time": "0:35:53", "throughput": 8663.89, "total_tokens": 119232808} +{"current_steps": 176980, "total_steps": 204665, "loss": 0.0, "lr": 1.0942981989077615e-07, "epoch": 4.323650844062248, "percentage": 86.47, "elapsed_time": "3:49:22", "remaining_time": "0:35:52", "throughput": 8663.9, "total_tokens": 119235880} +{"current_steps": 176985, "total_steps": 204665, "loss": 0.0, "lr": 1.093910349661774e-07, "epoch": 4.323772994894095, "percentage": 86.48, "elapsed_time": "3:49:22", "remaining_time": "0:35:52", "throughput": 8663.93, "total_tokens": 119239336} +{"current_steps": 176990, "total_steps": 204665, "loss": 0.0, "lr": 1.0935225651832691e-07, "epoch": 4.323895145725943, "percentage": 86.48, "elapsed_time": "3:49:23", "remaining_time": "0:35:52", "throughput": 8663.97, "total_tokens": 119242984} +{"current_steps": 176995, "total_steps": 204665, "loss": 0.0, "lr": 1.0931348454750599e-07, "epoch": 4.324017296557789, "percentage": 86.48, "elapsed_time": "3:49:23", "remaining_time": "0:35:51", "throughput": 8663.98, "total_tokens": 119245992} +{"current_steps": 177000, "total_steps": 204665, "loss": 0.0, "lr": 1.0927471905399732e-07, "epoch": 4.324139447389637, "percentage": 86.48, "elapsed_time": "3:49:23", "remaining_time": "0:35:51", "throughput": 8663.99, "total_tokens": 119249192} +{"current_steps": 177005, "total_steps": 204665, "loss": 0.0, "lr": 1.0923596003808222e-07, "epoch": 4.324261598221484, "percentage": 86.49, "elapsed_time": "3:49:24", "remaining_time": "0:35:50", "throughput": 8664.0, "total_tokens": 119252264} +{"current_steps": 177010, "total_steps": 204665, "loss": 0.0, "lr": 1.091972075000428e-07, "epoch": 4.3243837490533314, "percentage": 86.49, "elapsed_time": "3:49:24", "remaining_time": "0:35:50", "throughput": 8664.02, "total_tokens": 119255464} +{"current_steps": 177015, "total_steps": 204665, "loss": 0.0, "lr": 1.0915846144016117e-07, "epoch": 4.324505899885178, "percentage": 86.49, "elapsed_time": "3:49:24", "remaining_time": "0:35:50", "throughput": 8664.08, "total_tokens": 119259432} +{"current_steps": 177020, "total_steps": 204665, "loss": 0.0, "lr": 1.0911972185871842e-07, "epoch": 4.324628050717026, "percentage": 86.49, "elapsed_time": "3:49:25", "remaining_time": "0:35:49", "throughput": 8664.1, "total_tokens": 119262632} +{"current_steps": 177025, "total_steps": 204665, "loss": 0.0, "lr": 1.0908098875599703e-07, "epoch": 4.324750201548873, "percentage": 86.5, "elapsed_time": "3:49:25", "remaining_time": "0:35:49", "throughput": 8664.14, "total_tokens": 119266280} +{"current_steps": 177030, "total_steps": 204665, "loss": 0.0, "lr": 1.0904226213227807e-07, "epoch": 4.324872352380719, "percentage": 86.5, "elapsed_time": "3:49:25", "remaining_time": "0:35:48", "throughput": 8664.15, "total_tokens": 119269352} +{"current_steps": 177035, "total_steps": 204665, "loss": 0.0, "lr": 1.0900354198784367e-07, "epoch": 4.324994503212567, "percentage": 86.5, "elapsed_time": "3:49:26", "remaining_time": "0:35:48", "throughput": 8664.15, "total_tokens": 119272360} +{"current_steps": 177040, "total_steps": 204665, "loss": 0.0, "lr": 1.0896482832297515e-07, "epoch": 4.325116654044414, "percentage": 86.5, "elapsed_time": "3:49:26", "remaining_time": "0:35:48", "throughput": 8664.16, "total_tokens": 119275368} +{"current_steps": 177045, "total_steps": 204665, "loss": 0.0, "lr": 1.0892612113795374e-07, "epoch": 4.325238804876261, "percentage": 86.5, "elapsed_time": "3:49:26", "remaining_time": "0:35:47", "throughput": 8664.16, "total_tokens": 119278312} +{"current_steps": 177050, "total_steps": 204665, "loss": 0.0, "lr": 1.0888742043306154e-07, "epoch": 4.325360955708108, "percentage": 86.51, "elapsed_time": "3:49:27", "remaining_time": "0:35:47", "throughput": 8664.19, "total_tokens": 119281704} +{"current_steps": 177055, "total_steps": 204665, "loss": 0.0, "lr": 1.0884872620857954e-07, "epoch": 4.325483106539956, "percentage": 86.51, "elapsed_time": "3:49:27", "remaining_time": "0:35:46", "throughput": 8664.19, "total_tokens": 119284712} +{"current_steps": 177060, "total_steps": 204665, "loss": 0.1786, "lr": 1.0881003846478942e-07, "epoch": 4.3256052573718025, "percentage": 86.51, "elapsed_time": "3:49:27", "remaining_time": "0:35:46", "throughput": 8664.21, "total_tokens": 119287912} +{"current_steps": 177065, "total_steps": 204665, "loss": 0.0, "lr": 1.0877135720197228e-07, "epoch": 4.32572740820365, "percentage": 86.51, "elapsed_time": "3:49:28", "remaining_time": "0:35:46", "throughput": 8664.24, "total_tokens": 119291368} +{"current_steps": 177070, "total_steps": 204665, "loss": 0.0, "lr": 1.0873268242040945e-07, "epoch": 4.325849559035497, "percentage": 86.52, "elapsed_time": "3:49:28", "remaining_time": "0:35:45", "throughput": 8664.26, "total_tokens": 119294696} +{"current_steps": 177075, "total_steps": 204665, "loss": 0.0, "lr": 1.0869401412038248e-07, "epoch": 4.3259717098673445, "percentage": 86.52, "elapsed_time": "3:49:28", "remaining_time": "0:35:45", "throughput": 8664.28, "total_tokens": 119297960} +{"current_steps": 177080, "total_steps": 204665, "loss": 0.0, "lr": 1.0865535230217226e-07, "epoch": 4.326093860699191, "percentage": 86.52, "elapsed_time": "3:49:29", "remaining_time": "0:35:44", "throughput": 8664.31, "total_tokens": 119301416} +{"current_steps": 177085, "total_steps": 204665, "loss": 0.0, "lr": 1.0861669696606024e-07, "epoch": 4.326216011531039, "percentage": 86.52, "elapsed_time": "3:49:29", "remaining_time": "0:35:44", "throughput": 8664.35, "total_tokens": 119305000} +{"current_steps": 177090, "total_steps": 204665, "loss": 0.0, "lr": 1.0857804811232707e-07, "epoch": 4.326338162362886, "percentage": 86.53, "elapsed_time": "3:49:29", "remaining_time": "0:35:44", "throughput": 8664.37, "total_tokens": 119308328} +{"current_steps": 177095, "total_steps": 204665, "loss": 0.0, "lr": 1.0853940574125419e-07, "epoch": 4.326460313194733, "percentage": 86.53, "elapsed_time": "3:49:30", "remaining_time": "0:35:43", "throughput": 8664.37, "total_tokens": 119311208} +{"current_steps": 177100, "total_steps": 204665, "loss": 0.0, "lr": 1.0850076985312262e-07, "epoch": 4.32658246402658, "percentage": 86.53, "elapsed_time": "3:49:30", "remaining_time": "0:35:43", "throughput": 8664.37, "total_tokens": 119314152} +{"current_steps": 177105, "total_steps": 204665, "loss": 0.0, "lr": 1.0846214044821311e-07, "epoch": 4.326704614858427, "percentage": 86.53, "elapsed_time": "3:49:31", "remaining_time": "0:35:42", "throughput": 8664.4, "total_tokens": 119317544} +{"current_steps": 177110, "total_steps": 204665, "loss": 0.0, "lr": 1.084235175268069e-07, "epoch": 4.326826765690274, "percentage": 86.54, "elapsed_time": "3:49:31", "remaining_time": "0:35:42", "throughput": 8664.41, "total_tokens": 119320616} +{"current_steps": 177115, "total_steps": 204665, "loss": 0.0, "lr": 1.0838490108918452e-07, "epoch": 4.326948916522121, "percentage": 86.54, "elapsed_time": "3:49:31", "remaining_time": "0:35:42", "throughput": 8664.44, "total_tokens": 119324136} +{"current_steps": 177120, "total_steps": 204665, "loss": 0.0, "lr": 1.083462911356271e-07, "epoch": 4.327071067353969, "percentage": 86.54, "elapsed_time": "3:49:32", "remaining_time": "0:35:41", "throughput": 8664.49, "total_tokens": 119327912} +{"current_steps": 177125, "total_steps": 204665, "loss": 0.0, "lr": 1.0830768766641507e-07, "epoch": 4.3271932181858155, "percentage": 86.54, "elapsed_time": "3:49:32", "remaining_time": "0:35:41", "throughput": 8664.52, "total_tokens": 119331432} +{"current_steps": 177130, "total_steps": 204665, "loss": 0.0, "lr": 1.0826909068182954e-07, "epoch": 4.327315369017663, "percentage": 86.55, "elapsed_time": "3:49:32", "remaining_time": "0:35:40", "throughput": 8664.56, "total_tokens": 119334888} +{"current_steps": 177135, "total_steps": 204665, "loss": 0.0, "lr": 1.0823050018215097e-07, "epoch": 4.32743751984951, "percentage": 86.55, "elapsed_time": "3:49:33", "remaining_time": "0:35:40", "throughput": 8664.59, "total_tokens": 119338408} +{"current_steps": 177140, "total_steps": 204665, "loss": 0.0, "lr": 1.0819191616766011e-07, "epoch": 4.3275596706813575, "percentage": 86.55, "elapsed_time": "3:49:33", "remaining_time": "0:35:40", "throughput": 8664.64, "total_tokens": 119342184} +{"current_steps": 177145, "total_steps": 204665, "loss": 0.0, "lr": 1.0815333863863763e-07, "epoch": 4.327681821513204, "percentage": 86.55, "elapsed_time": "3:49:33", "remaining_time": "0:35:39", "throughput": 8664.67, "total_tokens": 119345768} +{"current_steps": 177150, "total_steps": 204665, "loss": 0.0, "lr": 1.0811476759536364e-07, "epoch": 4.327803972345052, "percentage": 86.56, "elapsed_time": "3:49:34", "remaining_time": "0:35:39", "throughput": 8664.69, "total_tokens": 119348968} +{"current_steps": 177155, "total_steps": 204665, "loss": 0.0, "lr": 1.0807620303811915e-07, "epoch": 4.327926123176899, "percentage": 86.56, "elapsed_time": "3:49:34", "remaining_time": "0:35:39", "throughput": 8664.69, "total_tokens": 119351912} +{"current_steps": 177160, "total_steps": 204665, "loss": 0.0, "lr": 1.0803764496718426e-07, "epoch": 4.328048274008746, "percentage": 86.56, "elapsed_time": "3:49:34", "remaining_time": "0:35:38", "throughput": 8664.71, "total_tokens": 119355176} +{"current_steps": 177165, "total_steps": 204665, "loss": 0.0, "lr": 1.0799909338283952e-07, "epoch": 4.328170424840593, "percentage": 86.56, "elapsed_time": "3:49:35", "remaining_time": "0:35:38", "throughput": 8664.72, "total_tokens": 119358248} +{"current_steps": 177170, "total_steps": 204665, "loss": 0.0, "lr": 1.0796054828536549e-07, "epoch": 4.328292575672441, "percentage": 86.57, "elapsed_time": "3:49:35", "remaining_time": "0:35:37", "throughput": 8664.74, "total_tokens": 119361576} +{"current_steps": 177175, "total_steps": 204665, "loss": 0.0, "lr": 1.0792200967504206e-07, "epoch": 4.328414726504287, "percentage": 86.57, "elapsed_time": "3:49:35", "remaining_time": "0:35:37", "throughput": 8664.75, "total_tokens": 119364648} +{"current_steps": 177180, "total_steps": 204665, "loss": 0.0, "lr": 1.0788347755214999e-07, "epoch": 4.328536877336135, "percentage": 86.57, "elapsed_time": "3:49:36", "remaining_time": "0:35:37", "throughput": 8664.77, "total_tokens": 119368040} +{"current_steps": 177185, "total_steps": 204665, "loss": 0.0, "lr": 1.0784495191696897e-07, "epoch": 4.328659028167982, "percentage": 86.57, "elapsed_time": "3:49:36", "remaining_time": "0:35:36", "throughput": 8664.81, "total_tokens": 119371624} +{"current_steps": 177190, "total_steps": 204665, "loss": 0.0, "lr": 1.0780643276977941e-07, "epoch": 4.328781178999829, "percentage": 86.58, "elapsed_time": "3:49:36", "remaining_time": "0:35:36", "throughput": 8664.83, "total_tokens": 119374888} +{"current_steps": 177195, "total_steps": 204665, "loss": 0.0, "lr": 1.0776792011086166e-07, "epoch": 4.328903329831676, "percentage": 86.58, "elapsed_time": "3:49:37", "remaining_time": "0:35:35", "throughput": 8664.85, "total_tokens": 119378088} +{"current_steps": 177200, "total_steps": 204665, "loss": 0.0, "lr": 1.0772941394049528e-07, "epoch": 4.329025480663523, "percentage": 86.58, "elapsed_time": "3:49:37", "remaining_time": "0:35:35", "throughput": 8664.88, "total_tokens": 119381480} +{"current_steps": 177205, "total_steps": 204665, "loss": 0.0002, "lr": 1.0769091425896093e-07, "epoch": 4.3291476314953705, "percentage": 86.58, "elapsed_time": "3:49:37", "remaining_time": "0:35:35", "throughput": 8664.9, "total_tokens": 119384744} +{"current_steps": 177210, "total_steps": 204665, "loss": 0.0002, "lr": 1.0765242106653805e-07, "epoch": 4.329269782327217, "percentage": 86.59, "elapsed_time": "3:49:38", "remaining_time": "0:35:34", "throughput": 8664.91, "total_tokens": 119387880} +{"current_steps": 177215, "total_steps": 204665, "loss": 0.0, "lr": 1.0761393436350685e-07, "epoch": 4.329391933159065, "percentage": 86.59, "elapsed_time": "3:49:38", "remaining_time": "0:35:34", "throughput": 8664.93, "total_tokens": 119391144} +{"current_steps": 177220, "total_steps": 204665, "loss": 0.0, "lr": 1.0757545415014702e-07, "epoch": 4.329514083990912, "percentage": 86.59, "elapsed_time": "3:49:39", "remaining_time": "0:35:33", "throughput": 8664.94, "total_tokens": 119394216} +{"current_steps": 177225, "total_steps": 204665, "loss": 0.0, "lr": 1.0753698042673853e-07, "epoch": 4.329636234822759, "percentage": 86.59, "elapsed_time": "3:49:39", "remaining_time": "0:35:33", "throughput": 8664.96, "total_tokens": 119397480} +{"current_steps": 177230, "total_steps": 204665, "loss": 0.0, "lr": 1.074985131935614e-07, "epoch": 4.329758385654606, "percentage": 86.6, "elapsed_time": "3:49:39", "remaining_time": "0:35:33", "throughput": 8664.97, "total_tokens": 119400680} +{"current_steps": 177235, "total_steps": 204665, "loss": 0.0, "lr": 1.0746005245089484e-07, "epoch": 4.329880536486454, "percentage": 86.6, "elapsed_time": "3:49:40", "remaining_time": "0:35:32", "throughput": 8665.04, "total_tokens": 119404840} +{"current_steps": 177240, "total_steps": 204665, "loss": 0.0, "lr": 1.0742159819901908e-07, "epoch": 4.3300026873183, "percentage": 86.6, "elapsed_time": "3:49:40", "remaining_time": "0:35:32", "throughput": 8665.05, "total_tokens": 119407912} +{"current_steps": 177245, "total_steps": 204665, "loss": 0.0, "lr": 1.0738315043821356e-07, "epoch": 4.330124838150148, "percentage": 86.6, "elapsed_time": "3:49:40", "remaining_time": "0:35:31", "throughput": 8665.07, "total_tokens": 119411176} +{"current_steps": 177250, "total_steps": 204665, "loss": 0.0, "lr": 1.0734470916875771e-07, "epoch": 4.330246988981995, "percentage": 86.6, "elapsed_time": "3:49:41", "remaining_time": "0:35:31", "throughput": 8665.07, "total_tokens": 119414184} +{"current_steps": 177255, "total_steps": 204665, "loss": 0.0, "lr": 1.0730627439093131e-07, "epoch": 4.330369139813842, "percentage": 86.61, "elapsed_time": "3:49:41", "remaining_time": "0:35:31", "throughput": 8665.1, "total_tokens": 119417512} +{"current_steps": 177260, "total_steps": 204665, "loss": 0.0, "lr": 1.0726784610501372e-07, "epoch": 4.330491290645689, "percentage": 86.61, "elapsed_time": "3:49:41", "remaining_time": "0:35:30", "throughput": 8665.09, "total_tokens": 119420392} +{"current_steps": 177265, "total_steps": 204665, "loss": 0.0, "lr": 1.0722942431128457e-07, "epoch": 4.330613441477537, "percentage": 86.61, "elapsed_time": "3:49:42", "remaining_time": "0:35:30", "throughput": 8665.12, "total_tokens": 119423848} +{"current_steps": 177270, "total_steps": 204665, "loss": 0.0, "lr": 1.0719100901002298e-07, "epoch": 4.3307355923093835, "percentage": 86.61, "elapsed_time": "3:49:42", "remaining_time": "0:35:29", "throughput": 8665.15, "total_tokens": 119427240} +{"current_steps": 177275, "total_steps": 204665, "loss": 0.0, "lr": 1.0715260020150874e-07, "epoch": 4.330857743141231, "percentage": 86.62, "elapsed_time": "3:49:42", "remaining_time": "0:35:29", "throughput": 8665.16, "total_tokens": 119430248} +{"current_steps": 177280, "total_steps": 204665, "loss": 0.0, "lr": 1.0711419788602072e-07, "epoch": 4.330979893973078, "percentage": 86.62, "elapsed_time": "3:49:43", "remaining_time": "0:35:29", "throughput": 8665.17, "total_tokens": 119433448} +{"current_steps": 177285, "total_steps": 204665, "loss": 0.0, "lr": 1.0707580206383837e-07, "epoch": 4.3311020448049256, "percentage": 86.62, "elapsed_time": "3:49:43", "remaining_time": "0:35:28", "throughput": 8665.2, "total_tokens": 119436840} +{"current_steps": 177290, "total_steps": 204665, "loss": 0.0, "lr": 1.0703741273524125e-07, "epoch": 4.331224195636772, "percentage": 86.62, "elapsed_time": "3:49:43", "remaining_time": "0:35:28", "throughput": 8665.23, "total_tokens": 119440232} +{"current_steps": 177295, "total_steps": 204665, "loss": 0.0, "lr": 1.0699902990050791e-07, "epoch": 4.331346346468619, "percentage": 86.63, "elapsed_time": "3:49:44", "remaining_time": "0:35:27", "throughput": 8665.27, "total_tokens": 119443880} +{"current_steps": 177300, "total_steps": 204665, "loss": 0.0, "lr": 1.0696065355991812e-07, "epoch": 4.331468497300467, "percentage": 86.63, "elapsed_time": "3:49:44", "remaining_time": "0:35:27", "throughput": 8665.31, "total_tokens": 119447528} +{"current_steps": 177305, "total_steps": 204665, "loss": 0.0, "lr": 1.0692228371375045e-07, "epoch": 4.331590648132313, "percentage": 86.63, "elapsed_time": "3:49:44", "remaining_time": "0:35:27", "throughput": 8665.35, "total_tokens": 119451176} +{"current_steps": 177310, "total_steps": 204665, "loss": 0.0, "lr": 1.0688392036228434e-07, "epoch": 4.331712798964161, "percentage": 86.63, "elapsed_time": "3:49:45", "remaining_time": "0:35:26", "throughput": 8665.37, "total_tokens": 119454440} +{"current_steps": 177315, "total_steps": 204665, "loss": 0.0, "lr": 1.0684556350579832e-07, "epoch": 4.331834949796008, "percentage": 86.64, "elapsed_time": "3:49:45", "remaining_time": "0:35:26", "throughput": 8665.42, "total_tokens": 119458152} +{"current_steps": 177320, "total_steps": 204665, "loss": 0.0, "lr": 1.0680721314457164e-07, "epoch": 4.331957100627855, "percentage": 86.64, "elapsed_time": "3:49:45", "remaining_time": "0:35:25", "throughput": 8665.48, "total_tokens": 119462120} +{"current_steps": 177325, "total_steps": 204665, "loss": 0.0, "lr": 1.0676886927888329e-07, "epoch": 4.332079251459702, "percentage": 86.64, "elapsed_time": "3:49:46", "remaining_time": "0:35:25", "throughput": 8665.5, "total_tokens": 119465320} +{"current_steps": 177330, "total_steps": 204665, "loss": 0.0, "lr": 1.0673053190901183e-07, "epoch": 4.33220140229155, "percentage": 86.64, "elapsed_time": "3:49:46", "remaining_time": "0:35:25", "throughput": 8665.53, "total_tokens": 119468776} +{"current_steps": 177335, "total_steps": 204665, "loss": 0.0, "lr": 1.0669220103523647e-07, "epoch": 4.332323553123397, "percentage": 86.65, "elapsed_time": "3:49:47", "remaining_time": "0:35:24", "throughput": 8665.56, "total_tokens": 119472360} +{"current_steps": 177340, "total_steps": 204665, "loss": 0.0, "lr": 1.0665387665783532e-07, "epoch": 4.332445703955244, "percentage": 86.65, "elapsed_time": "3:49:47", "remaining_time": "0:35:24", "throughput": 8665.58, "total_tokens": 119475496} +{"current_steps": 177345, "total_steps": 204665, "loss": 0.0, "lr": 1.0661555877708783e-07, "epoch": 4.332567854787091, "percentage": 86.65, "elapsed_time": "3:49:47", "remaining_time": "0:35:23", "throughput": 8665.61, "total_tokens": 119478952} +{"current_steps": 177350, "total_steps": 204665, "loss": 0.0, "lr": 1.0657724739327223e-07, "epoch": 4.332690005618939, "percentage": 86.65, "elapsed_time": "3:49:48", "remaining_time": "0:35:23", "throughput": 8665.62, "total_tokens": 119482152} +{"current_steps": 177355, "total_steps": 204665, "loss": 0.0, "lr": 1.0653894250666695e-07, "epoch": 4.332812156450785, "percentage": 86.66, "elapsed_time": "3:49:48", "remaining_time": "0:35:23", "throughput": 8665.72, "total_tokens": 119486696} +{"current_steps": 177360, "total_steps": 204665, "loss": 0.0, "lr": 1.06500644117551e-07, "epoch": 4.332934307282633, "percentage": 86.66, "elapsed_time": "3:49:48", "remaining_time": "0:35:22", "throughput": 8665.75, "total_tokens": 119490216} +{"current_steps": 177365, "total_steps": 204665, "loss": 0.0002, "lr": 1.0646235222620247e-07, "epoch": 4.33305645811448, "percentage": 86.66, "elapsed_time": "3:49:49", "remaining_time": "0:35:22", "throughput": 8665.79, "total_tokens": 119493800} +{"current_steps": 177370, "total_steps": 204665, "loss": 0.0, "lr": 1.0642406683290028e-07, "epoch": 4.333178608946327, "percentage": 86.66, "elapsed_time": "3:49:49", "remaining_time": "0:35:22", "throughput": 8665.82, "total_tokens": 119497192} +{"current_steps": 177375, "total_steps": 204665, "loss": 0.0929, "lr": 1.0638578793792253e-07, "epoch": 4.333300759778174, "percentage": 86.67, "elapsed_time": "3:49:49", "remaining_time": "0:35:21", "throughput": 8665.84, "total_tokens": 119500584} +{"current_steps": 177380, "total_steps": 204665, "loss": 0.0, "lr": 1.0634751554154753e-07, "epoch": 4.333422910610022, "percentage": 86.67, "elapsed_time": "3:49:50", "remaining_time": "0:35:21", "throughput": 8665.85, "total_tokens": 119503656} +{"current_steps": 177385, "total_steps": 204665, "loss": 0.0, "lr": 1.0630924964405396e-07, "epoch": 4.3335450614418685, "percentage": 86.67, "elapsed_time": "3:49:50", "remaining_time": "0:35:20", "throughput": 8665.89, "total_tokens": 119507240} +{"current_steps": 177390, "total_steps": 204665, "loss": 0.0, "lr": 1.0627099024571984e-07, "epoch": 4.333667212273715, "percentage": 86.67, "elapsed_time": "3:49:50", "remaining_time": "0:35:20", "throughput": 8665.91, "total_tokens": 119510568} +{"current_steps": 177395, "total_steps": 204665, "loss": 0.0, "lr": 1.0623273734682347e-07, "epoch": 4.333789363105563, "percentage": 86.68, "elapsed_time": "3:49:51", "remaining_time": "0:35:20", "throughput": 8665.93, "total_tokens": 119513768} +{"current_steps": 177400, "total_steps": 204665, "loss": 0.0, "lr": 1.0619449094764299e-07, "epoch": 4.33391151393741, "percentage": 86.68, "elapsed_time": "3:49:51", "remaining_time": "0:35:19", "throughput": 8665.96, "total_tokens": 119517224} +{"current_steps": 177405, "total_steps": 204665, "loss": 0.0, "lr": 1.0615625104845672e-07, "epoch": 4.334033664769257, "percentage": 86.68, "elapsed_time": "3:49:51", "remaining_time": "0:35:19", "throughput": 8666.0, "total_tokens": 119520808} +{"current_steps": 177410, "total_steps": 204665, "loss": 0.0, "lr": 1.0611801764954242e-07, "epoch": 4.334155815601104, "percentage": 86.68, "elapsed_time": "3:49:52", "remaining_time": "0:35:18", "throughput": 8666.02, "total_tokens": 119524072} +{"current_steps": 177415, "total_steps": 204665, "loss": 0.0354, "lr": 1.0607979075117824e-07, "epoch": 4.334277966432952, "percentage": 86.69, "elapsed_time": "3:49:52", "remaining_time": "0:35:18", "throughput": 8666.04, "total_tokens": 119527336} +{"current_steps": 177420, "total_steps": 204665, "loss": 0.0, "lr": 1.0604157035364259e-07, "epoch": 4.334400117264798, "percentage": 86.69, "elapsed_time": "3:49:52", "remaining_time": "0:35:18", "throughput": 8666.07, "total_tokens": 119530856} +{"current_steps": 177425, "total_steps": 204665, "loss": 0.0, "lr": 1.0600335645721281e-07, "epoch": 4.334522268096646, "percentage": 86.69, "elapsed_time": "3:49:53", "remaining_time": "0:35:17", "throughput": 8666.08, "total_tokens": 119533864} +{"current_steps": 177430, "total_steps": 204665, "loss": 0.0, "lr": 1.0596514906216725e-07, "epoch": 4.334644418928493, "percentage": 86.69, "elapsed_time": "3:49:53", "remaining_time": "0:35:17", "throughput": 8666.08, "total_tokens": 119536808} +{"current_steps": 177435, "total_steps": 204665, "loss": 0.0, "lr": 1.0592694816878345e-07, "epoch": 4.33476656976034, "percentage": 86.7, "elapsed_time": "3:49:53", "remaining_time": "0:35:16", "throughput": 8666.09, "total_tokens": 119539880} +{"current_steps": 177440, "total_steps": 204665, "loss": 0.0, "lr": 1.0588875377733952e-07, "epoch": 4.334888720592187, "percentage": 86.7, "elapsed_time": "3:49:54", "remaining_time": "0:35:16", "throughput": 8666.12, "total_tokens": 119543272} +{"current_steps": 177445, "total_steps": 204665, "loss": 0.0, "lr": 1.0585056588811292e-07, "epoch": 4.335010871424035, "percentage": 86.7, "elapsed_time": "3:49:54", "remaining_time": "0:35:16", "throughput": 8666.15, "total_tokens": 119546792} +{"current_steps": 177450, "total_steps": 204665, "loss": 0.0, "lr": 1.0581238450138163e-07, "epoch": 4.3351330222558815, "percentage": 86.7, "elapsed_time": "3:49:55", "remaining_time": "0:35:15", "throughput": 8666.15, "total_tokens": 119549800} +{"current_steps": 177455, "total_steps": 204665, "loss": 0.0004, "lr": 1.0577420961742301e-07, "epoch": 4.335255173087729, "percentage": 86.71, "elapsed_time": "3:49:55", "remaining_time": "0:35:15", "throughput": 8666.18, "total_tokens": 119553192} +{"current_steps": 177460, "total_steps": 204665, "loss": 0.0, "lr": 1.0573604123651503e-07, "epoch": 4.335377323919576, "percentage": 86.71, "elapsed_time": "3:49:55", "remaining_time": "0:35:14", "throughput": 8666.22, "total_tokens": 119556712} +{"current_steps": 177465, "total_steps": 204665, "loss": 0.0, "lr": 1.0569787935893514e-07, "epoch": 4.335499474751423, "percentage": 86.71, "elapsed_time": "3:49:56", "remaining_time": "0:35:14", "throughput": 8666.23, "total_tokens": 119559912} +{"current_steps": 177470, "total_steps": 204665, "loss": 0.0, "lr": 1.0565972398496059e-07, "epoch": 4.33562162558327, "percentage": 86.71, "elapsed_time": "3:49:56", "remaining_time": "0:35:14", "throughput": 8666.26, "total_tokens": 119563240} +{"current_steps": 177475, "total_steps": 204665, "loss": 0.003, "lr": 1.0562157511486902e-07, "epoch": 4.335743776415117, "percentage": 86.71, "elapsed_time": "3:49:56", "remaining_time": "0:35:13", "throughput": 8666.27, "total_tokens": 119566440} +{"current_steps": 177480, "total_steps": 204665, "loss": 0.0, "lr": 1.0558343274893821e-07, "epoch": 4.335865927246965, "percentage": 86.72, "elapsed_time": "3:49:57", "remaining_time": "0:35:13", "throughput": 8666.29, "total_tokens": 119569768} +{"current_steps": 177485, "total_steps": 204665, "loss": 0.0, "lr": 1.0554529688744507e-07, "epoch": 4.335988078078811, "percentage": 86.72, "elapsed_time": "3:49:57", "remaining_time": "0:35:12", "throughput": 8666.34, "total_tokens": 119573416} +{"current_steps": 177490, "total_steps": 204665, "loss": 0.0, "lr": 1.0550716753066724e-07, "epoch": 4.336110228910659, "percentage": 86.72, "elapsed_time": "3:49:57", "remaining_time": "0:35:12", "throughput": 8666.37, "total_tokens": 119576872} +{"current_steps": 177495, "total_steps": 204665, "loss": 0.0, "lr": 1.0546904467888162e-07, "epoch": 4.336232379742506, "percentage": 86.72, "elapsed_time": "3:49:58", "remaining_time": "0:35:12", "throughput": 8666.4, "total_tokens": 119580328} +{"current_steps": 177500, "total_steps": 204665, "loss": 0.0, "lr": 1.0543092833236578e-07, "epoch": 4.336354530574353, "percentage": 86.73, "elapsed_time": "3:49:58", "remaining_time": "0:35:11", "throughput": 8666.41, "total_tokens": 119583464} +{"current_steps": 177505, "total_steps": 204665, "loss": 0.0, "lr": 1.0539281849139703e-07, "epoch": 4.3364766814062, "percentage": 86.73, "elapsed_time": "3:49:58", "remaining_time": "0:35:11", "throughput": 8666.43, "total_tokens": 119586664} +{"current_steps": 177510, "total_steps": 204665, "loss": 0.0, "lr": 1.0535471515625216e-07, "epoch": 4.336598832238048, "percentage": 86.73, "elapsed_time": "3:49:59", "remaining_time": "0:35:10", "throughput": 8666.46, "total_tokens": 119590184} +{"current_steps": 177515, "total_steps": 204665, "loss": 0.0, "lr": 1.0531661832720862e-07, "epoch": 4.3367209830698945, "percentage": 86.73, "elapsed_time": "3:49:59", "remaining_time": "0:35:10", "throughput": 8666.47, "total_tokens": 119593192} +{"current_steps": 177520, "total_steps": 204665, "loss": 0.0, "lr": 1.0527852800454295e-07, "epoch": 4.336843133901742, "percentage": 86.74, "elapsed_time": "3:49:59", "remaining_time": "0:35:10", "throughput": 8666.48, "total_tokens": 119596456} +{"current_steps": 177525, "total_steps": 204665, "loss": 0.0, "lr": 1.0524044418853284e-07, "epoch": 4.336965284733589, "percentage": 86.74, "elapsed_time": "3:50:00", "remaining_time": "0:35:09", "throughput": 8666.54, "total_tokens": 119600424} +{"current_steps": 177530, "total_steps": 204665, "loss": 0.0, "lr": 1.0520236687945461e-07, "epoch": 4.3370874355654365, "percentage": 86.74, "elapsed_time": "3:50:00", "remaining_time": "0:35:09", "throughput": 8666.57, "total_tokens": 119603752} +{"current_steps": 177535, "total_steps": 204665, "loss": 0.0, "lr": 1.0516429607758548e-07, "epoch": 4.337209586397283, "percentage": 86.74, "elapsed_time": "3:50:00", "remaining_time": "0:35:08", "throughput": 8666.59, "total_tokens": 119607016} +{"current_steps": 177540, "total_steps": 204665, "loss": 0.0, "lr": 1.0512623178320235e-07, "epoch": 4.337331737229131, "percentage": 86.75, "elapsed_time": "3:50:01", "remaining_time": "0:35:08", "throughput": 8666.61, "total_tokens": 119610408} +{"current_steps": 177545, "total_steps": 204665, "loss": 0.0, "lr": 1.0508817399658187e-07, "epoch": 4.337453888060978, "percentage": 86.75, "elapsed_time": "3:50:01", "remaining_time": "0:35:08", "throughput": 8666.64, "total_tokens": 119613800} +{"current_steps": 177550, "total_steps": 204665, "loss": 0.0, "lr": 1.0505012271800107e-07, "epoch": 4.337576038892825, "percentage": 86.75, "elapsed_time": "3:50:01", "remaining_time": "0:35:07", "throughput": 8666.65, "total_tokens": 119616936} +{"current_steps": 177555, "total_steps": 204665, "loss": 0.0, "lr": 1.0501207794773647e-07, "epoch": 4.337698189724672, "percentage": 86.75, "elapsed_time": "3:50:02", "remaining_time": "0:35:07", "throughput": 8666.69, "total_tokens": 119620520} +{"current_steps": 177560, "total_steps": 204665, "loss": 0.0, "lr": 1.0497403968606455e-07, "epoch": 4.337820340556519, "percentage": 86.76, "elapsed_time": "3:50:02", "remaining_time": "0:35:07", "throughput": 8666.71, "total_tokens": 119623720} +{"current_steps": 177565, "total_steps": 204665, "loss": 0.0, "lr": 1.049360079332624e-07, "epoch": 4.337942491388366, "percentage": 86.76, "elapsed_time": "3:50:03", "remaining_time": "0:35:06", "throughput": 8666.78, "total_tokens": 119627816} +{"current_steps": 177570, "total_steps": 204665, "loss": 0.0, "lr": 1.0489798268960615e-07, "epoch": 4.338064642220213, "percentage": 86.76, "elapsed_time": "3:50:03", "remaining_time": "0:35:06", "throughput": 8666.79, "total_tokens": 119631016} +{"current_steps": 177575, "total_steps": 204665, "loss": 0.0, "lr": 1.0485996395537267e-07, "epoch": 4.338186793052061, "percentage": 86.76, "elapsed_time": "3:50:03", "remaining_time": "0:35:05", "throughput": 8666.82, "total_tokens": 119634472} +{"current_steps": 177580, "total_steps": 204665, "loss": 0.0, "lr": 1.0482195173083807e-07, "epoch": 4.3383089438839075, "percentage": 86.77, "elapsed_time": "3:50:04", "remaining_time": "0:35:05", "throughput": 8666.83, "total_tokens": 119637608} +{"current_steps": 177585, "total_steps": 204665, "loss": 0.0, "lr": 1.0478394601627927e-07, "epoch": 4.338431094715755, "percentage": 86.77, "elapsed_time": "3:50:04", "remaining_time": "0:35:05", "throughput": 8666.87, "total_tokens": 119641064} +{"current_steps": 177590, "total_steps": 204665, "loss": 0.0, "lr": 1.0474594681197213e-07, "epoch": 4.338553245547602, "percentage": 86.77, "elapsed_time": "3:50:04", "remaining_time": "0:35:04", "throughput": 8666.9, "total_tokens": 119644520} +{"current_steps": 177595, "total_steps": 204665, "loss": 0.0, "lr": 1.0470795411819333e-07, "epoch": 4.3386753963794495, "percentage": 86.77, "elapsed_time": "3:50:05", "remaining_time": "0:35:04", "throughput": 8666.93, "total_tokens": 119648104} +{"current_steps": 177600, "total_steps": 204665, "loss": 0.0, "lr": 1.0466996793521932e-07, "epoch": 4.338797547211296, "percentage": 86.78, "elapsed_time": "3:50:05", "remaining_time": "0:35:03", "throughput": 8666.97, "total_tokens": 119651624} +{"current_steps": 177605, "total_steps": 204665, "loss": 0.0, "lr": 1.0463198826332587e-07, "epoch": 4.338919698043144, "percentage": 86.78, "elapsed_time": "3:50:05", "remaining_time": "0:35:03", "throughput": 8667.0, "total_tokens": 119655080} +{"current_steps": 177610, "total_steps": 204665, "loss": 0.0, "lr": 1.0459401510278965e-07, "epoch": 4.339041848874991, "percentage": 86.78, "elapsed_time": "3:50:06", "remaining_time": "0:35:03", "throughput": 8667.03, "total_tokens": 119658408} +{"current_steps": 177615, "total_steps": 204665, "loss": 0.0, "lr": 1.0455604845388633e-07, "epoch": 4.339163999706838, "percentage": 86.78, "elapsed_time": "3:50:06", "remaining_time": "0:35:02", "throughput": 8667.05, "total_tokens": 119661800} +{"current_steps": 177620, "total_steps": 204665, "loss": 0.0, "lr": 1.0451808831689247e-07, "epoch": 4.339286150538685, "percentage": 86.79, "elapsed_time": "3:50:06", "remaining_time": "0:35:02", "throughput": 8667.06, "total_tokens": 119664872} +{"current_steps": 177625, "total_steps": 204665, "loss": 0.0, "lr": 1.0448013469208384e-07, "epoch": 4.339408301370533, "percentage": 86.79, "elapsed_time": "3:50:07", "remaining_time": "0:35:01", "throughput": 8667.06, "total_tokens": 119667816} +{"current_steps": 177630, "total_steps": 204665, "loss": 0.0, "lr": 1.0444218757973643e-07, "epoch": 4.339530452202379, "percentage": 86.79, "elapsed_time": "3:50:07", "remaining_time": "0:35:01", "throughput": 8667.12, "total_tokens": 119671720} +{"current_steps": 177635, "total_steps": 204665, "loss": 0.0, "lr": 1.0440424698012651e-07, "epoch": 4.339652603034227, "percentage": 86.79, "elapsed_time": "3:50:07", "remaining_time": "0:35:01", "throughput": 8667.14, "total_tokens": 119675112} +{"current_steps": 177640, "total_steps": 204665, "loss": 0.0001, "lr": 1.0436631289352959e-07, "epoch": 4.339774753866074, "percentage": 86.8, "elapsed_time": "3:50:08", "remaining_time": "0:35:00", "throughput": 8667.16, "total_tokens": 119678312} +{"current_steps": 177645, "total_steps": 204665, "loss": 0.0, "lr": 1.0432838532022204e-07, "epoch": 4.339896904697921, "percentage": 86.8, "elapsed_time": "3:50:08", "remaining_time": "0:35:00", "throughput": 8667.19, "total_tokens": 119681832} +{"current_steps": 177650, "total_steps": 204665, "loss": 0.0, "lr": 1.0429046426047905e-07, "epoch": 4.340019055529768, "percentage": 86.8, "elapsed_time": "3:50:08", "remaining_time": "0:34:59", "throughput": 8667.24, "total_tokens": 119685544} +{"current_steps": 177655, "total_steps": 204665, "loss": 0.0, "lr": 1.0425254971457697e-07, "epoch": 4.340141206361615, "percentage": 86.8, "elapsed_time": "3:50:09", "remaining_time": "0:34:59", "throughput": 8667.25, "total_tokens": 119688616} +{"current_steps": 177660, "total_steps": 204665, "loss": 0.0, "lr": 1.0421464168279137e-07, "epoch": 4.340263357193463, "percentage": 86.81, "elapsed_time": "3:50:09", "remaining_time": "0:34:59", "throughput": 8667.28, "total_tokens": 119692072} +{"current_steps": 177665, "total_steps": 204665, "loss": 0.0, "lr": 1.0417674016539745e-07, "epoch": 4.340385508025309, "percentage": 86.81, "elapsed_time": "3:50:09", "remaining_time": "0:34:58", "throughput": 8667.29, "total_tokens": 119695272} +{"current_steps": 177670, "total_steps": 204665, "loss": 0.0, "lr": 1.0413884516267158e-07, "epoch": 4.340507658857157, "percentage": 86.81, "elapsed_time": "3:50:10", "remaining_time": "0:34:58", "throughput": 8667.34, "total_tokens": 119699048} +{"current_steps": 177675, "total_steps": 204665, "loss": 0.0, "lr": 1.0410095667488872e-07, "epoch": 4.340629809689004, "percentage": 86.81, "elapsed_time": "3:50:10", "remaining_time": "0:34:57", "throughput": 8667.37, "total_tokens": 119702504} +{"current_steps": 177680, "total_steps": 204665, "loss": 0.0, "lr": 1.040630747023249e-07, "epoch": 4.340751960520851, "percentage": 86.82, "elapsed_time": "3:50:11", "remaining_time": "0:34:57", "throughput": 8667.43, "total_tokens": 119706344} +{"current_steps": 177685, "total_steps": 204665, "loss": 0.0, "lr": 1.0402519924525511e-07, "epoch": 4.340874111352698, "percentage": 86.82, "elapsed_time": "3:50:11", "remaining_time": "0:34:57", "throughput": 8667.49, "total_tokens": 119710376} +{"current_steps": 177690, "total_steps": 204665, "loss": 0.0, "lr": 1.0398733030395512e-07, "epoch": 4.340996262184546, "percentage": 86.82, "elapsed_time": "3:50:11", "remaining_time": "0:34:56", "throughput": 8667.53, "total_tokens": 119713896} +{"current_steps": 177695, "total_steps": 204665, "loss": 0.0, "lr": 1.0394946787870052e-07, "epoch": 4.3411184130163925, "percentage": 86.82, "elapsed_time": "3:50:12", "remaining_time": "0:34:56", "throughput": 8667.54, "total_tokens": 119717096} +{"current_steps": 177700, "total_steps": 204665, "loss": 0.0, "lr": 1.0391161196976605e-07, "epoch": 4.34124056384824, "percentage": 86.82, "elapsed_time": "3:50:12", "remaining_time": "0:34:55", "throughput": 8667.55, "total_tokens": 119720104} +{"current_steps": 177705, "total_steps": 204665, "loss": 0.0, "lr": 1.0387376257742763e-07, "epoch": 4.341362714680087, "percentage": 86.83, "elapsed_time": "3:50:12", "remaining_time": "0:34:55", "throughput": 8667.58, "total_tokens": 119723688} +{"current_steps": 177710, "total_steps": 204665, "loss": 0.0, "lr": 1.0383591970196004e-07, "epoch": 4.3414848655119345, "percentage": 86.83, "elapsed_time": "3:50:13", "remaining_time": "0:34:55", "throughput": 8667.6, "total_tokens": 119726888} +{"current_steps": 177715, "total_steps": 204665, "loss": 0.0, "lr": 1.0379808334363893e-07, "epoch": 4.341607016343781, "percentage": 86.83, "elapsed_time": "3:50:13", "remaining_time": "0:34:54", "throughput": 8667.63, "total_tokens": 119730408} +{"current_steps": 177720, "total_steps": 204665, "loss": 0.0, "lr": 1.0376025350273898e-07, "epoch": 4.341729167175629, "percentage": 86.83, "elapsed_time": "3:50:13", "remaining_time": "0:34:54", "throughput": 8667.65, "total_tokens": 119733608} +{"current_steps": 177725, "total_steps": 204665, "loss": 0.0, "lr": 1.0372243017953541e-07, "epoch": 4.341851318007476, "percentage": 86.84, "elapsed_time": "3:50:14", "remaining_time": "0:34:53", "throughput": 8667.67, "total_tokens": 119736872} +{"current_steps": 177730, "total_steps": 204665, "loss": 0.0, "lr": 1.0368461337430378e-07, "epoch": 4.341973468839322, "percentage": 86.84, "elapsed_time": "3:50:14", "remaining_time": "0:34:53", "throughput": 8667.68, "total_tokens": 119739944} +{"current_steps": 177735, "total_steps": 204665, "loss": 0.0, "lr": 1.0364680308731843e-07, "epoch": 4.34209561967117, "percentage": 86.84, "elapsed_time": "3:50:14", "remaining_time": "0:34:53", "throughput": 8667.73, "total_tokens": 119743848} +{"current_steps": 177740, "total_steps": 204665, "loss": 0.0, "lr": 1.036089993188548e-07, "epoch": 4.342217770503017, "percentage": 86.84, "elapsed_time": "3:50:15", "remaining_time": "0:34:52", "throughput": 8667.79, "total_tokens": 119747752} +{"current_steps": 177745, "total_steps": 204665, "loss": 0.0, "lr": 1.0357120206918746e-07, "epoch": 4.342339921334864, "percentage": 86.85, "elapsed_time": "3:50:15", "remaining_time": "0:34:52", "throughput": 8667.8, "total_tokens": 119750824} +{"current_steps": 177750, "total_steps": 204665, "loss": 0.0, "lr": 1.0353341133859161e-07, "epoch": 4.342462072166711, "percentage": 86.85, "elapsed_time": "3:50:15", "remaining_time": "0:34:52", "throughput": 8667.83, "total_tokens": 119754280} +{"current_steps": 177755, "total_steps": 204665, "loss": 0.0, "lr": 1.0349562712734173e-07, "epoch": 4.342584222998559, "percentage": 86.85, "elapsed_time": "3:50:16", "remaining_time": "0:34:51", "throughput": 8667.85, "total_tokens": 119757544} +{"current_steps": 177760, "total_steps": 204665, "loss": 0.0, "lr": 1.03457849435713e-07, "epoch": 4.3427063738304055, "percentage": 86.85, "elapsed_time": "3:50:16", "remaining_time": "0:34:51", "throughput": 8667.85, "total_tokens": 119760552} +{"current_steps": 177765, "total_steps": 204665, "loss": 0.0, "lr": 1.034200782639797e-07, "epoch": 4.342828524662253, "percentage": 86.86, "elapsed_time": "3:50:16", "remaining_time": "0:34:50", "throughput": 8667.88, "total_tokens": 119763944} +{"current_steps": 177770, "total_steps": 204665, "loss": 0.0, "lr": 1.03382313612417e-07, "epoch": 4.3429506754941, "percentage": 86.86, "elapsed_time": "3:50:17", "remaining_time": "0:34:50", "throughput": 8667.9, "total_tokens": 119767272} +{"current_steps": 177775, "total_steps": 204665, "loss": 0.0, "lr": 1.0334455548129928e-07, "epoch": 4.3430728263259475, "percentage": 86.86, "elapsed_time": "3:50:17", "remaining_time": "0:34:50", "throughput": 8667.98, "total_tokens": 119771560} +{"current_steps": 177780, "total_steps": 204665, "loss": 0.0, "lr": 1.0330680387090085e-07, "epoch": 4.343194977157794, "percentage": 86.86, "elapsed_time": "3:50:18", "remaining_time": "0:34:49", "throughput": 8668.01, "total_tokens": 119774952} +{"current_steps": 177785, "total_steps": 204665, "loss": 0.0, "lr": 1.032690587814965e-07, "epoch": 4.343317127989642, "percentage": 86.87, "elapsed_time": "3:50:18", "remaining_time": "0:34:49", "throughput": 8668.03, "total_tokens": 119778280} +{"current_steps": 177790, "total_steps": 204665, "loss": 0.0, "lr": 1.0323132021336101e-07, "epoch": 4.343439278821489, "percentage": 86.87, "elapsed_time": "3:50:18", "remaining_time": "0:34:48", "throughput": 8668.05, "total_tokens": 119781544} +{"current_steps": 177795, "total_steps": 204665, "loss": 0.0, "lr": 1.0319358816676838e-07, "epoch": 4.343561429653336, "percentage": 86.87, "elapsed_time": "3:50:19", "remaining_time": "0:34:48", "throughput": 8668.06, "total_tokens": 119784680} +{"current_steps": 177800, "total_steps": 204665, "loss": 0.0, "lr": 1.0315586264199338e-07, "epoch": 4.343683580485183, "percentage": 86.87, "elapsed_time": "3:50:19", "remaining_time": "0:34:48", "throughput": 8668.07, "total_tokens": 119787688} +{"current_steps": 177805, "total_steps": 204665, "loss": 0.0, "lr": 1.0311814363930994e-07, "epoch": 4.343805731317031, "percentage": 86.88, "elapsed_time": "3:50:19", "remaining_time": "0:34:47", "throughput": 8668.1, "total_tokens": 119791208} +{"current_steps": 177810, "total_steps": 204665, "loss": 0.0, "lr": 1.0308043115899257e-07, "epoch": 4.343927882148877, "percentage": 86.88, "elapsed_time": "3:50:20", "remaining_time": "0:34:47", "throughput": 8668.11, "total_tokens": 119794344} +{"current_steps": 177815, "total_steps": 204665, "loss": 0.0, "lr": 1.0304272520131586e-07, "epoch": 4.344050032980725, "percentage": 86.88, "elapsed_time": "3:50:20", "remaining_time": "0:34:46", "throughput": 8668.14, "total_tokens": 119797736} +{"current_steps": 177820, "total_steps": 204665, "loss": 0.0, "lr": 1.0300502576655334e-07, "epoch": 4.344172183812572, "percentage": 86.88, "elapsed_time": "3:50:20", "remaining_time": "0:34:46", "throughput": 8668.17, "total_tokens": 119801256} +{"current_steps": 177825, "total_steps": 204665, "loss": 0.0, "lr": 1.0296733285497982e-07, "epoch": 4.3442943346444185, "percentage": 86.89, "elapsed_time": "3:50:21", "remaining_time": "0:34:46", "throughput": 8668.19, "total_tokens": 119804520} +{"current_steps": 177830, "total_steps": 204665, "loss": 0.0, "lr": 1.0292964646686897e-07, "epoch": 4.344416485476266, "percentage": 86.89, "elapsed_time": "3:50:21", "remaining_time": "0:34:45", "throughput": 8668.22, "total_tokens": 119807848} +{"current_steps": 177835, "total_steps": 204665, "loss": 0.0, "lr": 1.0289196660249521e-07, "epoch": 4.344538636308113, "percentage": 86.89, "elapsed_time": "3:50:21", "remaining_time": "0:34:45", "throughput": 8668.23, "total_tokens": 119811048} +{"current_steps": 177840, "total_steps": 204665, "loss": 0.0, "lr": 1.0285429326213213e-07, "epoch": 4.3446607871399605, "percentage": 86.89, "elapsed_time": "3:50:22", "remaining_time": "0:34:44", "throughput": 8668.25, "total_tokens": 119814248} +{"current_steps": 177845, "total_steps": 204665, "loss": 0.0, "lr": 1.0281662644605394e-07, "epoch": 4.344782937971807, "percentage": 86.9, "elapsed_time": "3:50:22", "remaining_time": "0:34:44", "throughput": 8668.27, "total_tokens": 119817512} +{"current_steps": 177850, "total_steps": 204665, "loss": 0.0, "lr": 1.0277896615453473e-07, "epoch": 4.344905088803655, "percentage": 86.9, "elapsed_time": "3:50:22", "remaining_time": "0:34:44", "throughput": 8668.28, "total_tokens": 119820648} +{"current_steps": 177855, "total_steps": 204665, "loss": 0.0, "lr": 1.02741312387848e-07, "epoch": 4.345027239635502, "percentage": 86.9, "elapsed_time": "3:50:23", "remaining_time": "0:34:43", "throughput": 8668.3, "total_tokens": 119823976} +{"current_steps": 177860, "total_steps": 204665, "loss": 0.0, "lr": 1.0270366514626793e-07, "epoch": 4.345149390467349, "percentage": 86.9, "elapsed_time": "3:50:23", "remaining_time": "0:34:43", "throughput": 8668.36, "total_tokens": 119827944} +{"current_steps": 177865, "total_steps": 204665, "loss": 0.0, "lr": 1.0266602443006822e-07, "epoch": 4.345271541299196, "percentage": 86.91, "elapsed_time": "3:50:23", "remaining_time": "0:34:42", "throughput": 8668.39, "total_tokens": 119831336} +{"current_steps": 177870, "total_steps": 204665, "loss": 0.0, "lr": 1.0262839023952241e-07, "epoch": 4.345393692131044, "percentage": 86.91, "elapsed_time": "3:50:24", "remaining_time": "0:34:42", "throughput": 8668.44, "total_tokens": 119835112} +{"current_steps": 177875, "total_steps": 204665, "loss": 0.0, "lr": 1.025907625749044e-07, "epoch": 4.34551584296289, "percentage": 86.91, "elapsed_time": "3:50:24", "remaining_time": "0:34:42", "throughput": 8668.47, "total_tokens": 119838568} +{"current_steps": 177880, "total_steps": 204665, "loss": 0.0, "lr": 1.0255314143648753e-07, "epoch": 4.345637993794738, "percentage": 86.91, "elapsed_time": "3:50:24", "remaining_time": "0:34:41", "throughput": 8668.49, "total_tokens": 119841832} +{"current_steps": 177885, "total_steps": 204665, "loss": 0.0, "lr": 1.025155268245459e-07, "epoch": 4.345760144626585, "percentage": 86.92, "elapsed_time": "3:50:25", "remaining_time": "0:34:41", "throughput": 8668.49, "total_tokens": 119844840} +{"current_steps": 177890, "total_steps": 204665, "loss": 0.0, "lr": 1.0247791873935241e-07, "epoch": 4.345882295458432, "percentage": 86.92, "elapsed_time": "3:50:25", "remaining_time": "0:34:40", "throughput": 8668.57, "total_tokens": 119849192} +{"current_steps": 177895, "total_steps": 204665, "loss": 0.0, "lr": 1.0244031718118118e-07, "epoch": 4.346004446290279, "percentage": 86.92, "elapsed_time": "3:50:26", "remaining_time": "0:34:40", "throughput": 8668.59, "total_tokens": 119852456} +{"current_steps": 177900, "total_steps": 204665, "loss": 0.0816, "lr": 1.0240272215030521e-07, "epoch": 4.346126597122127, "percentage": 86.92, "elapsed_time": "3:50:26", "remaining_time": "0:34:40", "throughput": 8668.59, "total_tokens": 119855400} +{"current_steps": 177905, "total_steps": 204665, "loss": 0.0, "lr": 1.0236513364699805e-07, "epoch": 4.3462487479539735, "percentage": 86.92, "elapsed_time": "3:50:26", "remaining_time": "0:34:39", "throughput": 8668.6, "total_tokens": 119858472} +{"current_steps": 177910, "total_steps": 204665, "loss": 0.0, "lr": 1.0232755167153328e-07, "epoch": 4.346370898785821, "percentage": 86.93, "elapsed_time": "3:50:27", "remaining_time": "0:34:39", "throughput": 8668.62, "total_tokens": 119861672} +{"current_steps": 177915, "total_steps": 204665, "loss": 0.0, "lr": 1.0228997622418378e-07, "epoch": 4.346493049617668, "percentage": 86.93, "elapsed_time": "3:50:27", "remaining_time": "0:34:38", "throughput": 8668.67, "total_tokens": 119865384} +{"current_steps": 177920, "total_steps": 204665, "loss": 0.0213, "lr": 1.0225240730522322e-07, "epoch": 4.346615200449515, "percentage": 86.93, "elapsed_time": "3:50:27", "remaining_time": "0:34:38", "throughput": 8668.67, "total_tokens": 119868328} +{"current_steps": 177925, "total_steps": 204665, "loss": 0.0, "lr": 1.0221484491492438e-07, "epoch": 4.346737351281362, "percentage": 86.93, "elapsed_time": "3:50:28", "remaining_time": "0:34:38", "throughput": 8668.71, "total_tokens": 119872040} +{"current_steps": 177930, "total_steps": 204665, "loss": 0.0, "lr": 1.0217728905356093e-07, "epoch": 4.346859502113209, "percentage": 86.94, "elapsed_time": "3:50:28", "remaining_time": "0:34:37", "throughput": 8668.72, "total_tokens": 119875048} +{"current_steps": 177935, "total_steps": 204665, "loss": 0.0, "lr": 1.0213973972140555e-07, "epoch": 4.346981652945057, "percentage": 86.94, "elapsed_time": "3:50:28", "remaining_time": "0:34:37", "throughput": 8668.74, "total_tokens": 119878248} +{"current_steps": 177940, "total_steps": 204665, "loss": 0.0, "lr": 1.0210219691873145e-07, "epoch": 4.347103803776903, "percentage": 86.94, "elapsed_time": "3:50:29", "remaining_time": "0:34:37", "throughput": 8668.77, "total_tokens": 119881704} +{"current_steps": 177945, "total_steps": 204665, "loss": 0.0, "lr": 1.0206466064581177e-07, "epoch": 4.347225954608751, "percentage": 86.94, "elapsed_time": "3:50:29", "remaining_time": "0:34:36", "throughput": 8668.8, "total_tokens": 119885288} +{"current_steps": 177950, "total_steps": 204665, "loss": 0.0, "lr": 1.0202713090291937e-07, "epoch": 4.347348105440598, "percentage": 86.95, "elapsed_time": "3:50:29", "remaining_time": "0:34:36", "throughput": 8668.83, "total_tokens": 119888744} +{"current_steps": 177955, "total_steps": 204665, "loss": 0.0, "lr": 1.0198960769032728e-07, "epoch": 4.347470256272445, "percentage": 86.95, "elapsed_time": "3:50:30", "remaining_time": "0:34:35", "throughput": 8668.86, "total_tokens": 119892072} +{"current_steps": 177960, "total_steps": 204665, "loss": 0.0001, "lr": 1.0195209100830815e-07, "epoch": 4.347592407104292, "percentage": 86.95, "elapsed_time": "3:50:30", "remaining_time": "0:34:35", "throughput": 8668.86, "total_tokens": 119895080} +{"current_steps": 177965, "total_steps": 204665, "loss": 0.0, "lr": 1.0191458085713511e-07, "epoch": 4.34771455793614, "percentage": 86.95, "elapsed_time": "3:50:30", "remaining_time": "0:34:35", "throughput": 8668.88, "total_tokens": 119898408} +{"current_steps": 177970, "total_steps": 204665, "loss": 0.0, "lr": 1.0187707723708084e-07, "epoch": 4.347836708767987, "percentage": 86.96, "elapsed_time": "3:50:31", "remaining_time": "0:34:34", "throughput": 8668.89, "total_tokens": 119901544} +{"current_steps": 177975, "total_steps": 204665, "loss": 0.0, "lr": 1.0183958014841776e-07, "epoch": 4.347958859599834, "percentage": 86.96, "elapsed_time": "3:50:31", "remaining_time": "0:34:34", "throughput": 8668.94, "total_tokens": 119905192} +{"current_steps": 177980, "total_steps": 204665, "loss": 0.0002, "lr": 1.0180208959141912e-07, "epoch": 4.348081010431681, "percentage": 86.96, "elapsed_time": "3:50:31", "remaining_time": "0:34:33", "throughput": 8668.96, "total_tokens": 119908456} +{"current_steps": 177985, "total_steps": 204665, "loss": 0.0001, "lr": 1.0176460556635702e-07, "epoch": 4.348203161263529, "percentage": 86.96, "elapsed_time": "3:50:32", "remaining_time": "0:34:33", "throughput": 8668.97, "total_tokens": 119911656} +{"current_steps": 177990, "total_steps": 204665, "loss": 0.0536, "lr": 1.0172712807350447e-07, "epoch": 4.348325312095375, "percentage": 86.97, "elapsed_time": "3:50:32", "remaining_time": "0:34:33", "throughput": 8669.02, "total_tokens": 119915304} +{"current_steps": 177995, "total_steps": 204665, "loss": 0.0, "lr": 1.0168965711313371e-07, "epoch": 4.348447462927223, "percentage": 86.97, "elapsed_time": "3:50:32", "remaining_time": "0:34:32", "throughput": 8669.03, "total_tokens": 119918568} +{"current_steps": 178000, "total_steps": 204665, "loss": 0.0, "lr": 1.016521926855174e-07, "epoch": 4.34856961375907, "percentage": 86.97, "elapsed_time": "3:50:33", "remaining_time": "0:34:32", "throughput": 8669.06, "total_tokens": 119921832} +{"current_steps": 178005, "total_steps": 204665, "loss": 0.0, "lr": 1.0161473479092819e-07, "epoch": 4.348691764590917, "percentage": 86.97, "elapsed_time": "3:50:33", "remaining_time": "0:34:31", "throughput": 8669.06, "total_tokens": 119924904} +{"current_steps": 178010, "total_steps": 204665, "loss": 0.0, "lr": 1.0157728342963801e-07, "epoch": 4.348813915422764, "percentage": 86.98, "elapsed_time": "3:50:34", "remaining_time": "0:34:31", "throughput": 8669.12, "total_tokens": 119928744} +{"current_steps": 178015, "total_steps": 204665, "loss": 0.0, "lr": 1.0153983860191961e-07, "epoch": 4.348936066254611, "percentage": 86.98, "elapsed_time": "3:50:34", "remaining_time": "0:34:31", "throughput": 8669.15, "total_tokens": 119932200} +{"current_steps": 178020, "total_steps": 204665, "loss": 0.0, "lr": 1.0150240030804502e-07, "epoch": 4.3490582170864585, "percentage": 86.98, "elapsed_time": "3:50:34", "remaining_time": "0:34:30", "throughput": 8669.18, "total_tokens": 119935592} +{"current_steps": 178025, "total_steps": 204665, "loss": 0.0, "lr": 1.014649685482869e-07, "epoch": 4.349180367918305, "percentage": 86.98, "elapsed_time": "3:50:35", "remaining_time": "0:34:30", "throughput": 8669.19, "total_tokens": 119938728} +{"current_steps": 178030, "total_steps": 204665, "loss": 0.0, "lr": 1.0142754332291692e-07, "epoch": 4.349302518750153, "percentage": 86.99, "elapsed_time": "3:50:35", "remaining_time": "0:34:29", "throughput": 8669.23, "total_tokens": 119942376} +{"current_steps": 178035, "total_steps": 204665, "loss": 0.0, "lr": 1.0139012463220764e-07, "epoch": 4.349424669582, "percentage": 86.99, "elapsed_time": "3:50:35", "remaining_time": "0:34:29", "throughput": 8669.25, "total_tokens": 119945704} +{"current_steps": 178040, "total_steps": 204665, "loss": 0.0, "lr": 1.0135271247643117e-07, "epoch": 4.349546820413847, "percentage": 86.99, "elapsed_time": "3:50:36", "remaining_time": "0:34:29", "throughput": 8669.27, "total_tokens": 119948968} +{"current_steps": 178045, "total_steps": 204665, "loss": 0.0, "lr": 1.0131530685585931e-07, "epoch": 4.349668971245694, "percentage": 86.99, "elapsed_time": "3:50:36", "remaining_time": "0:34:28", "throughput": 8669.29, "total_tokens": 119952232} +{"current_steps": 178050, "total_steps": 204665, "loss": 0.0, "lr": 1.0127790777076439e-07, "epoch": 4.349791122077542, "percentage": 87.0, "elapsed_time": "3:50:36", "remaining_time": "0:34:28", "throughput": 8669.35, "total_tokens": 119956136} +{"current_steps": 178055, "total_steps": 204665, "loss": 0.0, "lr": 1.0124051522141819e-07, "epoch": 4.349913272909388, "percentage": 87.0, "elapsed_time": "3:50:37", "remaining_time": "0:34:27", "throughput": 8669.39, "total_tokens": 119959656} +{"current_steps": 178060, "total_steps": 204665, "loss": 0.0, "lr": 1.0120312920809282e-07, "epoch": 4.350035423741236, "percentage": 87.0, "elapsed_time": "3:50:37", "remaining_time": "0:34:27", "throughput": 8669.4, "total_tokens": 119962792} +{"current_steps": 178065, "total_steps": 204665, "loss": 0.0, "lr": 1.0116574973105984e-07, "epoch": 4.350157574573083, "percentage": 87.0, "elapsed_time": "3:50:37", "remaining_time": "0:34:27", "throughput": 8669.41, "total_tokens": 119965992} +{"current_steps": 178070, "total_steps": 204665, "loss": 0.0, "lr": 1.011283767905915e-07, "epoch": 4.35027972540493, "percentage": 87.01, "elapsed_time": "3:50:38", "remaining_time": "0:34:26", "throughput": 8669.44, "total_tokens": 119969448} +{"current_steps": 178075, "total_steps": 204665, "loss": 0.0, "lr": 1.0109101038695911e-07, "epoch": 4.350401876236777, "percentage": 87.01, "elapsed_time": "3:50:38", "remaining_time": "0:34:26", "throughput": 8669.45, "total_tokens": 119972520} +{"current_steps": 178080, "total_steps": 204665, "loss": 0.0, "lr": 1.0105365052043491e-07, "epoch": 4.350524027068625, "percentage": 87.01, "elapsed_time": "3:50:38", "remaining_time": "0:34:25", "throughput": 8669.49, "total_tokens": 119976104} +{"current_steps": 178085, "total_steps": 204665, "loss": 0.0, "lr": 1.0101629719129045e-07, "epoch": 4.3506461779004715, "percentage": 87.01, "elapsed_time": "3:50:39", "remaining_time": "0:34:25", "throughput": 8669.49, "total_tokens": 119978984} +{"current_steps": 178090, "total_steps": 204665, "loss": 0.0, "lr": 1.0097895039979698e-07, "epoch": 4.350768328732318, "percentage": 87.02, "elapsed_time": "3:50:39", "remaining_time": "0:34:25", "throughput": 8669.5, "total_tokens": 119982056} +{"current_steps": 178095, "total_steps": 204665, "loss": 0.0, "lr": 1.0094161014622637e-07, "epoch": 4.350890479564166, "percentage": 87.02, "elapsed_time": "3:50:39", "remaining_time": "0:34:24", "throughput": 8669.52, "total_tokens": 119985384} +{"current_steps": 178100, "total_steps": 204665, "loss": 0.0, "lr": 1.0090427643085043e-07, "epoch": 4.351012630396013, "percentage": 87.02, "elapsed_time": "3:50:40", "remaining_time": "0:34:24", "throughput": 8669.54, "total_tokens": 119988584} +{"current_steps": 178105, "total_steps": 204665, "loss": 0.0019, "lr": 1.0086694925394024e-07, "epoch": 4.35113478122786, "percentage": 87.02, "elapsed_time": "3:50:40", "remaining_time": "0:34:23", "throughput": 8669.55, "total_tokens": 119991720} +{"current_steps": 178110, "total_steps": 204665, "loss": 0.0, "lr": 1.008296286157676e-07, "epoch": 4.351256932059707, "percentage": 87.03, "elapsed_time": "3:50:40", "remaining_time": "0:34:23", "throughput": 8669.59, "total_tokens": 119995432} +{"current_steps": 178115, "total_steps": 204665, "loss": 0.0, "lr": 1.0079231451660352e-07, "epoch": 4.351379082891555, "percentage": 87.03, "elapsed_time": "3:50:41", "remaining_time": "0:34:23", "throughput": 8669.62, "total_tokens": 119998952} +{"current_steps": 178120, "total_steps": 204665, "loss": 0.0259, "lr": 1.007550069567198e-07, "epoch": 4.351501233723401, "percentage": 87.03, "elapsed_time": "3:50:41", "remaining_time": "0:34:22", "throughput": 8669.63, "total_tokens": 120001960} +{"current_steps": 178125, "total_steps": 204665, "loss": 0.0, "lr": 1.007177059363874e-07, "epoch": 4.351623384555249, "percentage": 87.03, "elapsed_time": "3:50:41", "remaining_time": "0:34:22", "throughput": 8669.64, "total_tokens": 120005160} +{"current_steps": 178130, "total_steps": 204665, "loss": 0.0403, "lr": 1.0068041145587769e-07, "epoch": 4.351745535387096, "percentage": 87.03, "elapsed_time": "3:50:42", "remaining_time": "0:34:22", "throughput": 8669.7, "total_tokens": 120009064} +{"current_steps": 178135, "total_steps": 204665, "loss": 0.0, "lr": 1.00643123515462e-07, "epoch": 4.351867686218943, "percentage": 87.04, "elapsed_time": "3:50:42", "remaining_time": "0:34:21", "throughput": 8669.73, "total_tokens": 120012392} +{"current_steps": 178140, "total_steps": 204665, "loss": 0.0134, "lr": 1.0060584211541134e-07, "epoch": 4.35198983705079, "percentage": 87.04, "elapsed_time": "3:50:43", "remaining_time": "0:34:21", "throughput": 8669.75, "total_tokens": 120015720} +{"current_steps": 178145, "total_steps": 204665, "loss": 0.0, "lr": 1.0056856725599704e-07, "epoch": 4.352111987882638, "percentage": 87.04, "elapsed_time": "3:50:43", "remaining_time": "0:34:20", "throughput": 8669.79, "total_tokens": 120019304} +{"current_steps": 178150, "total_steps": 204665, "loss": 0.0, "lr": 1.0053129893748991e-07, "epoch": 4.3522341387144845, "percentage": 87.04, "elapsed_time": "3:50:43", "remaining_time": "0:34:20", "throughput": 8669.79, "total_tokens": 120022248} +{"current_steps": 178155, "total_steps": 204665, "loss": 0.0336, "lr": 1.0049403716016113e-07, "epoch": 4.352356289546332, "percentage": 87.05, "elapsed_time": "3:50:44", "remaining_time": "0:34:20", "throughput": 8669.8, "total_tokens": 120025384} +{"current_steps": 178160, "total_steps": 204665, "loss": 0.0, "lr": 1.0045678192428175e-07, "epoch": 4.352478440378179, "percentage": 87.05, "elapsed_time": "3:50:44", "remaining_time": "0:34:19", "throughput": 8669.85, "total_tokens": 120029032} +{"current_steps": 178165, "total_steps": 204665, "loss": 0.0, "lr": 1.0041953323012242e-07, "epoch": 4.3526005912100265, "percentage": 87.05, "elapsed_time": "3:50:44", "remaining_time": "0:34:19", "throughput": 8669.86, "total_tokens": 120032232} +{"current_steps": 178170, "total_steps": 204665, "loss": 0.0, "lr": 1.0038229107795448e-07, "epoch": 4.352722742041873, "percentage": 87.05, "elapsed_time": "3:50:45", "remaining_time": "0:34:18", "throughput": 8669.87, "total_tokens": 120035368} +{"current_steps": 178175, "total_steps": 204665, "loss": 0.0, "lr": 1.0034505546804839e-07, "epoch": 4.352844892873721, "percentage": 87.06, "elapsed_time": "3:50:45", "remaining_time": "0:34:18", "throughput": 8669.91, "total_tokens": 120038888} +{"current_steps": 178180, "total_steps": 204665, "loss": 0.0, "lr": 1.003078264006748e-07, "epoch": 4.352967043705568, "percentage": 87.06, "elapsed_time": "3:50:45", "remaining_time": "0:34:18", "throughput": 8669.96, "total_tokens": 120042664} +{"current_steps": 178185, "total_steps": 204665, "loss": 0.0, "lr": 1.0027060387610497e-07, "epoch": 4.353089194537414, "percentage": 87.06, "elapsed_time": "3:50:46", "remaining_time": "0:34:17", "throughput": 8669.96, "total_tokens": 120045672} +{"current_steps": 178190, "total_steps": 204665, "loss": 0.0, "lr": 1.0023338789460912e-07, "epoch": 4.353211345369262, "percentage": 87.06, "elapsed_time": "3:50:46", "remaining_time": "0:34:17", "throughput": 8669.97, "total_tokens": 120048744} +{"current_steps": 178195, "total_steps": 204665, "loss": 0.0, "lr": 1.0019617845645822e-07, "epoch": 4.353333496201109, "percentage": 87.07, "elapsed_time": "3:50:46", "remaining_time": "0:34:16", "throughput": 8670.0, "total_tokens": 120052200} +{"current_steps": 178200, "total_steps": 204665, "loss": 0.0, "lr": 1.0015897556192266e-07, "epoch": 4.353455647032956, "percentage": 87.07, "elapsed_time": "3:50:47", "remaining_time": "0:34:16", "throughput": 8669.99, "total_tokens": 120054952} +{"current_steps": 178205, "total_steps": 204665, "loss": 0.0, "lr": 1.0012177921127307e-07, "epoch": 4.353577797864803, "percentage": 87.07, "elapsed_time": "3:50:47", "remaining_time": "0:34:16", "throughput": 8670.0, "total_tokens": 120058024} +{"current_steps": 178210, "total_steps": 204665, "loss": 0.0, "lr": 1.0008458940477992e-07, "epoch": 4.353699948696651, "percentage": 87.07, "elapsed_time": "3:50:47", "remaining_time": "0:34:15", "throughput": 8670.02, "total_tokens": 120061288} +{"current_steps": 178215, "total_steps": 204665, "loss": 0.0, "lr": 1.0004740614271356e-07, "epoch": 4.3538220995284975, "percentage": 87.08, "elapsed_time": "3:50:48", "remaining_time": "0:34:15", "throughput": 8670.03, "total_tokens": 120064424} +{"current_steps": 178220, "total_steps": 204665, "loss": 0.0001, "lr": 1.0001022942534476e-07, "epoch": 4.353944250360345, "percentage": 87.08, "elapsed_time": "3:50:48", "remaining_time": "0:34:14", "throughput": 8670.05, "total_tokens": 120067688} +{"current_steps": 178225, "total_steps": 204665, "loss": 0.0, "lr": 9.997305925294342e-08, "epoch": 4.354066401192192, "percentage": 87.08, "elapsed_time": "3:50:48", "remaining_time": "0:34:14", "throughput": 8670.1, "total_tokens": 120071464} +{"current_steps": 178230, "total_steps": 204665, "loss": 0.0, "lr": 9.993589562578031e-08, "epoch": 4.3541885520240395, "percentage": 87.08, "elapsed_time": "3:50:49", "remaining_time": "0:34:14", "throughput": 8670.12, "total_tokens": 120074856} +{"current_steps": 178235, "total_steps": 204665, "loss": 0.0, "lr": 9.989873854412523e-08, "epoch": 4.354310702855886, "percentage": 87.09, "elapsed_time": "3:50:49", "remaining_time": "0:34:13", "throughput": 8670.14, "total_tokens": 120078056} +{"current_steps": 178240, "total_steps": 204665, "loss": 0.0, "lr": 9.986158800824884e-08, "epoch": 4.354432853687734, "percentage": 87.09, "elapsed_time": "3:50:49", "remaining_time": "0:34:13", "throughput": 8670.16, "total_tokens": 120081448} +{"current_steps": 178245, "total_steps": 204665, "loss": 0.0, "lr": 9.982444401842083e-08, "epoch": 4.354555004519581, "percentage": 87.09, "elapsed_time": "3:50:50", "remaining_time": "0:34:12", "throughput": 8670.18, "total_tokens": 120084648} +{"current_steps": 178250, "total_steps": 204665, "loss": 0.0, "lr": 9.978730657491164e-08, "epoch": 4.354677155351428, "percentage": 87.09, "elapsed_time": "3:50:50", "remaining_time": "0:34:12", "throughput": 8670.2, "total_tokens": 120087912} +{"current_steps": 178255, "total_steps": 204665, "loss": 0.0, "lr": 9.975017567799148e-08, "epoch": 4.354799306183275, "percentage": 87.1, "elapsed_time": "3:50:50", "remaining_time": "0:34:12", "throughput": 8670.2, "total_tokens": 120090920} +{"current_steps": 178260, "total_steps": 204665, "loss": 0.0, "lr": 9.971305132792996e-08, "epoch": 4.354921457015123, "percentage": 87.1, "elapsed_time": "3:50:51", "remaining_time": "0:34:11", "throughput": 8670.22, "total_tokens": 120094248} +{"current_steps": 178265, "total_steps": 204665, "loss": 0.0, "lr": 9.967593352499747e-08, "epoch": 4.355043607846969, "percentage": 87.1, "elapsed_time": "3:50:51", "remaining_time": "0:34:11", "throughput": 8670.22, "total_tokens": 120097128} +{"current_steps": 178270, "total_steps": 204665, "loss": 0.0, "lr": 9.963882226946363e-08, "epoch": 4.355165758678817, "percentage": 87.1, "elapsed_time": "3:50:52", "remaining_time": "0:34:10", "throughput": 8670.24, "total_tokens": 120100328} +{"current_steps": 178275, "total_steps": 204665, "loss": 0.0, "lr": 9.960171756159851e-08, "epoch": 4.355287909510664, "percentage": 87.11, "elapsed_time": "3:50:52", "remaining_time": "0:34:10", "throughput": 8670.29, "total_tokens": 120104232} +{"current_steps": 178280, "total_steps": 204665, "loss": 0.0, "lr": 9.956461940167193e-08, "epoch": 4.3554100603425105, "percentage": 87.11, "elapsed_time": "3:50:52", "remaining_time": "0:34:10", "throughput": 8670.33, "total_tokens": 120107880} +{"current_steps": 178285, "total_steps": 204665, "loss": 0.0001, "lr": 9.952752778995343e-08, "epoch": 4.355532211174358, "percentage": 87.11, "elapsed_time": "3:50:53", "remaining_time": "0:34:09", "throughput": 8670.35, "total_tokens": 120111080} +{"current_steps": 178290, "total_steps": 204665, "loss": 0.0313, "lr": 9.949044272671326e-08, "epoch": 4.355654362006205, "percentage": 87.11, "elapsed_time": "3:50:53", "remaining_time": "0:34:09", "throughput": 8670.37, "total_tokens": 120114344} +{"current_steps": 178295, "total_steps": 204665, "loss": 0.0, "lr": 9.945336421222039e-08, "epoch": 4.355776512838053, "percentage": 87.12, "elapsed_time": "3:50:53", "remaining_time": "0:34:08", "throughput": 8670.4, "total_tokens": 120117736} +{"current_steps": 178300, "total_steps": 204665, "loss": 0.0, "lr": 9.941629224674519e-08, "epoch": 4.355898663669899, "percentage": 87.12, "elapsed_time": "3:50:54", "remaining_time": "0:34:08", "throughput": 8670.42, "total_tokens": 120121064} +{"current_steps": 178305, "total_steps": 204665, "loss": 0.0, "lr": 9.937922683055677e-08, "epoch": 4.356020814501747, "percentage": 87.12, "elapsed_time": "3:50:54", "remaining_time": "0:34:08", "throughput": 8670.46, "total_tokens": 120124776} +{"current_steps": 178310, "total_steps": 204665, "loss": 0.0, "lr": 9.93421679639248e-08, "epoch": 4.356142965333594, "percentage": 87.12, "elapsed_time": "3:50:54", "remaining_time": "0:34:07", "throughput": 8670.49, "total_tokens": 120128168} +{"current_steps": 178315, "total_steps": 204665, "loss": 0.0, "lr": 9.930511564711907e-08, "epoch": 4.356265116165441, "percentage": 87.13, "elapsed_time": "3:50:55", "remaining_time": "0:34:07", "throughput": 8670.53, "total_tokens": 120131752} +{"current_steps": 178320, "total_steps": 204665, "loss": 0.0, "lr": 9.926806988040858e-08, "epoch": 4.356387266997288, "percentage": 87.13, "elapsed_time": "3:50:55", "remaining_time": "0:34:07", "throughput": 8670.56, "total_tokens": 120135272} +{"current_steps": 178325, "total_steps": 204665, "loss": 0.0, "lr": 9.923103066406314e-08, "epoch": 4.356509417829136, "percentage": 87.13, "elapsed_time": "3:50:55", "remaining_time": "0:34:06", "throughput": 8670.57, "total_tokens": 120138408} +{"current_steps": 178330, "total_steps": 204665, "loss": 0.0, "lr": 9.919399799835171e-08, "epoch": 4.3566315686609824, "percentage": 87.13, "elapsed_time": "3:50:56", "remaining_time": "0:34:06", "throughput": 8670.61, "total_tokens": 120141928} +{"current_steps": 178335, "total_steps": 204665, "loss": 0.0, "lr": 9.915697188354399e-08, "epoch": 4.35675371949283, "percentage": 87.14, "elapsed_time": "3:50:56", "remaining_time": "0:34:05", "throughput": 8670.64, "total_tokens": 120145448} +{"current_steps": 178340, "total_steps": 204665, "loss": 0.0004, "lr": 9.911995231990899e-08, "epoch": 4.356875870324677, "percentage": 87.14, "elapsed_time": "3:50:56", "remaining_time": "0:34:05", "throughput": 8670.67, "total_tokens": 120148904} +{"current_steps": 178345, "total_steps": 204665, "loss": 0.0, "lr": 9.908293930771594e-08, "epoch": 4.3569980211565245, "percentage": 87.14, "elapsed_time": "3:50:57", "remaining_time": "0:34:05", "throughput": 8670.7, "total_tokens": 120152360} +{"current_steps": 178350, "total_steps": 204665, "loss": 0.0, "lr": 9.904593284723417e-08, "epoch": 4.357120171988371, "percentage": 87.14, "elapsed_time": "3:50:57", "remaining_time": "0:34:04", "throughput": 8670.73, "total_tokens": 120155752} +{"current_steps": 178355, "total_steps": 204665, "loss": 0.0, "lr": 9.90089329387327e-08, "epoch": 4.357242322820218, "percentage": 87.14, "elapsed_time": "3:50:57", "remaining_time": "0:34:04", "throughput": 8670.75, "total_tokens": 120159144} +{"current_steps": 178360, "total_steps": 204665, "loss": 0.0, "lr": 9.897193958248063e-08, "epoch": 4.357364473652066, "percentage": 87.15, "elapsed_time": "3:50:58", "remaining_time": "0:34:03", "throughput": 8670.78, "total_tokens": 120162536} +{"current_steps": 178365, "total_steps": 204665, "loss": 0.0, "lr": 9.893495277874686e-08, "epoch": 4.357486624483912, "percentage": 87.15, "elapsed_time": "3:50:58", "remaining_time": "0:34:03", "throughput": 8670.8, "total_tokens": 120165736} +{"current_steps": 178370, "total_steps": 204665, "loss": 0.0, "lr": 9.889797252780064e-08, "epoch": 4.35760877531576, "percentage": 87.15, "elapsed_time": "3:50:59", "remaining_time": "0:34:03", "throughput": 8670.81, "total_tokens": 120168808} +{"current_steps": 178375, "total_steps": 204665, "loss": 0.0, "lr": 9.88609988299105e-08, "epoch": 4.357730926147607, "percentage": 87.15, "elapsed_time": "3:50:59", "remaining_time": "0:34:02", "throughput": 8670.83, "total_tokens": 120172200} +{"current_steps": 178380, "total_steps": 204665, "loss": 0.0, "lr": 9.882403168534581e-08, "epoch": 4.357853076979454, "percentage": 87.16, "elapsed_time": "3:50:59", "remaining_time": "0:34:02", "throughput": 8670.85, "total_tokens": 120175400} +{"current_steps": 178385, "total_steps": 204665, "loss": 0.0, "lr": 9.878707109437489e-08, "epoch": 4.357975227811301, "percentage": 87.16, "elapsed_time": "3:51:00", "remaining_time": "0:34:01", "throughput": 8670.88, "total_tokens": 120178856} +{"current_steps": 178390, "total_steps": 204665, "loss": 0.0, "lr": 9.875011705726699e-08, "epoch": 4.358097378643149, "percentage": 87.16, "elapsed_time": "3:51:00", "remaining_time": "0:34:01", "throughput": 8670.88, "total_tokens": 120181736} +{"current_steps": 178395, "total_steps": 204665, "loss": 0.0, "lr": 9.871316957429077e-08, "epoch": 4.3582195294749955, "percentage": 87.16, "elapsed_time": "3:51:00", "remaining_time": "0:34:01", "throughput": 8670.9, "total_tokens": 120185000} +{"current_steps": 178400, "total_steps": 204665, "loss": 0.0, "lr": 9.867622864571445e-08, "epoch": 4.358341680306843, "percentage": 87.17, "elapsed_time": "3:51:01", "remaining_time": "0:34:00", "throughput": 8670.9, "total_tokens": 120188072} +{"current_steps": 178405, "total_steps": 204665, "loss": 0.0, "lr": 9.863929427180706e-08, "epoch": 4.35846383113869, "percentage": 87.17, "elapsed_time": "3:51:01", "remaining_time": "0:34:00", "throughput": 8670.93, "total_tokens": 120191464} +{"current_steps": 178410, "total_steps": 204665, "loss": 0.0, "lr": 9.860236645283737e-08, "epoch": 4.3585859819705375, "percentage": 87.17, "elapsed_time": "3:51:01", "remaining_time": "0:33:59", "throughput": 8670.96, "total_tokens": 120194920} +{"current_steps": 178415, "total_steps": 204665, "loss": 0.0536, "lr": 9.856544518907362e-08, "epoch": 4.358708132802384, "percentage": 87.17, "elapsed_time": "3:51:02", "remaining_time": "0:33:59", "throughput": 8670.99, "total_tokens": 120198376} +{"current_steps": 178420, "total_steps": 204665, "loss": 0.0655, "lr": 9.852853048078446e-08, "epoch": 4.358830283634232, "percentage": 87.18, "elapsed_time": "3:51:02", "remaining_time": "0:33:59", "throughput": 8671.0, "total_tokens": 120201512} +{"current_steps": 178425, "total_steps": 204665, "loss": 0.0, "lr": 9.849162232823816e-08, "epoch": 4.358952434466079, "percentage": 87.18, "elapsed_time": "3:51:02", "remaining_time": "0:33:58", "throughput": 8671.06, "total_tokens": 120205608} +{"current_steps": 178430, "total_steps": 204665, "loss": 0.0, "lr": 9.845472073170346e-08, "epoch": 4.359074585297926, "percentage": 87.18, "elapsed_time": "3:51:03", "remaining_time": "0:33:58", "throughput": 8671.1, "total_tokens": 120209128} +{"current_steps": 178435, "total_steps": 204665, "loss": 0.0, "lr": 9.84178256914483e-08, "epoch": 4.359196736129773, "percentage": 87.18, "elapsed_time": "3:51:03", "remaining_time": "0:33:57", "throughput": 8671.12, "total_tokens": 120212520} +{"current_steps": 178440, "total_steps": 204665, "loss": 0.0, "lr": 9.83809372077412e-08, "epoch": 4.359318886961621, "percentage": 87.19, "elapsed_time": "3:51:03", "remaining_time": "0:33:57", "throughput": 8671.13, "total_tokens": 120215528} +{"current_steps": 178445, "total_steps": 204665, "loss": 0.0, "lr": 9.834405528085066e-08, "epoch": 4.359441037793467, "percentage": 87.19, "elapsed_time": "3:51:04", "remaining_time": "0:33:57", "throughput": 8671.16, "total_tokens": 120219112} +{"current_steps": 178450, "total_steps": 204665, "loss": 0.0, "lr": 9.830717991104443e-08, "epoch": 4.359563188625314, "percentage": 87.19, "elapsed_time": "3:51:04", "remaining_time": "0:33:56", "throughput": 8671.21, "total_tokens": 120222760} +{"current_steps": 178455, "total_steps": 204665, "loss": 0.0, "lr": 9.827031109859107e-08, "epoch": 4.359685339457162, "percentage": 87.19, "elapsed_time": "3:51:04", "remaining_time": "0:33:56", "throughput": 8671.21, "total_tokens": 120225832} +{"current_steps": 178460, "total_steps": 204665, "loss": 0.0, "lr": 9.82334488437585e-08, "epoch": 4.3598074902890085, "percentage": 87.2, "elapsed_time": "3:51:05", "remaining_time": "0:33:55", "throughput": 8671.23, "total_tokens": 120229096} +{"current_steps": 178465, "total_steps": 204665, "loss": 0.0, "lr": 9.819659314681472e-08, "epoch": 4.359929641120856, "percentage": 87.2, "elapsed_time": "3:51:05", "remaining_time": "0:33:55", "throughput": 8671.26, "total_tokens": 120232552} +{"current_steps": 178470, "total_steps": 204665, "loss": 0.0, "lr": 9.815974400802807e-08, "epoch": 4.360051791952703, "percentage": 87.2, "elapsed_time": "3:51:05", "remaining_time": "0:33:55", "throughput": 8671.33, "total_tokens": 120236584} +{"current_steps": 178475, "total_steps": 204665, "loss": 0.0, "lr": 9.812290142766622e-08, "epoch": 4.3601739427845505, "percentage": 87.2, "elapsed_time": "3:51:06", "remaining_time": "0:33:54", "throughput": 8671.34, "total_tokens": 120239656} +{"current_steps": 178480, "total_steps": 204665, "loss": 0.0, "lr": 9.808606540599728e-08, "epoch": 4.360296093616397, "percentage": 87.21, "elapsed_time": "3:51:06", "remaining_time": "0:33:54", "throughput": 8671.37, "total_tokens": 120243112} +{"current_steps": 178485, "total_steps": 204665, "loss": 0.0, "lr": 9.804923594328907e-08, "epoch": 4.360418244448245, "percentage": 87.21, "elapsed_time": "3:51:07", "remaining_time": "0:33:54", "throughput": 8671.4, "total_tokens": 120246568} +{"current_steps": 178490, "total_steps": 204665, "loss": 0.0011, "lr": 9.801241303980934e-08, "epoch": 4.360540395280092, "percentage": 87.21, "elapsed_time": "3:51:07", "remaining_time": "0:33:53", "throughput": 8671.42, "total_tokens": 120249960} +{"current_steps": 178495, "total_steps": 204665, "loss": 0.0, "lr": 9.7975596695826e-08, "epoch": 4.360662546111939, "percentage": 87.21, "elapsed_time": "3:51:07", "remaining_time": "0:33:53", "throughput": 8671.44, "total_tokens": 120253224} +{"current_steps": 178500, "total_steps": 204665, "loss": 0.0, "lr": 9.793878691160662e-08, "epoch": 4.360784696943786, "percentage": 87.22, "elapsed_time": "3:51:08", "remaining_time": "0:33:52", "throughput": 8671.45, "total_tokens": 120256360} +{"current_steps": 178505, "total_steps": 204665, "loss": 0.0, "lr": 9.79019836874192e-08, "epoch": 4.360906847775634, "percentage": 87.22, "elapsed_time": "3:51:08", "remaining_time": "0:33:52", "throughput": 8671.46, "total_tokens": 120259496} +{"current_steps": 178510, "total_steps": 204665, "loss": 0.0, "lr": 9.786518702353097e-08, "epoch": 4.36102899860748, "percentage": 87.22, "elapsed_time": "3:51:08", "remaining_time": "0:33:52", "throughput": 8671.5, "total_tokens": 120263080} +{"current_steps": 178515, "total_steps": 204665, "loss": 0.0, "lr": 9.782839692020994e-08, "epoch": 4.361151149439328, "percentage": 87.22, "elapsed_time": "3:51:09", "remaining_time": "0:33:51", "throughput": 8671.58, "total_tokens": 120267368} +{"current_steps": 178520, "total_steps": 204665, "loss": 0.0001, "lr": 9.779161337772323e-08, "epoch": 4.361273300271175, "percentage": 87.23, "elapsed_time": "3:51:09", "remaining_time": "0:33:51", "throughput": 8671.63, "total_tokens": 120271144} +{"current_steps": 178525, "total_steps": 204665, "loss": 0.0, "lr": 9.775483639633863e-08, "epoch": 4.361395451103022, "percentage": 87.23, "elapsed_time": "3:51:09", "remaining_time": "0:33:50", "throughput": 8671.62, "total_tokens": 120274024} +{"current_steps": 178530, "total_steps": 204665, "loss": 0.0, "lr": 9.771806597632382e-08, "epoch": 4.361517601934869, "percentage": 87.23, "elapsed_time": "3:51:10", "remaining_time": "0:33:50", "throughput": 8671.66, "total_tokens": 120277480} +{"current_steps": 178535, "total_steps": 204665, "loss": 0.0, "lr": 9.768130211794556e-08, "epoch": 4.361639752766717, "percentage": 87.23, "elapsed_time": "3:51:10", "remaining_time": "0:33:50", "throughput": 8671.66, "total_tokens": 120280424} +{"current_steps": 178540, "total_steps": 204665, "loss": 0.0, "lr": 9.764454482147189e-08, "epoch": 4.3617619035985635, "percentage": 87.24, "elapsed_time": "3:51:10", "remaining_time": "0:33:49", "throughput": 8671.65, "total_tokens": 120283304} +{"current_steps": 178545, "total_steps": 204665, "loss": 0.0, "lr": 9.760779408716946e-08, "epoch": 4.36188405443041, "percentage": 87.24, "elapsed_time": "3:51:11", "remaining_time": "0:33:49", "throughput": 8671.7, "total_tokens": 120287080} +{"current_steps": 178550, "total_steps": 204665, "loss": 0.0, "lr": 9.757104991530618e-08, "epoch": 4.362006205262258, "percentage": 87.24, "elapsed_time": "3:51:11", "remaining_time": "0:33:48", "throughput": 8671.74, "total_tokens": 120290728} +{"current_steps": 178555, "total_steps": 204665, "loss": 0.0001, "lr": 9.753431230614873e-08, "epoch": 4.362128356094105, "percentage": 87.24, "elapsed_time": "3:51:11", "remaining_time": "0:33:48", "throughput": 8671.79, "total_tokens": 120294504} +{"current_steps": 178560, "total_steps": 204665, "loss": 0.0435, "lr": 9.749758125996444e-08, "epoch": 4.362250506925952, "percentage": 87.25, "elapsed_time": "3:51:12", "remaining_time": "0:33:48", "throughput": 8671.81, "total_tokens": 120297832} +{"current_steps": 178565, "total_steps": 204665, "loss": 0.0, "lr": 9.746085677702065e-08, "epoch": 4.362372657757799, "percentage": 87.25, "elapsed_time": "3:51:12", "remaining_time": "0:33:47", "throughput": 8671.83, "total_tokens": 120301096} +{"current_steps": 178570, "total_steps": 204665, "loss": 0.0, "lr": 9.742413885758416e-08, "epoch": 4.362494808589647, "percentage": 87.25, "elapsed_time": "3:51:12", "remaining_time": "0:33:47", "throughput": 8671.86, "total_tokens": 120304424} +{"current_steps": 178575, "total_steps": 204665, "loss": 0.0, "lr": 9.73874275019223e-08, "epoch": 4.362616959421493, "percentage": 87.25, "elapsed_time": "3:51:13", "remaining_time": "0:33:46", "throughput": 8671.87, "total_tokens": 120307688} +{"current_steps": 178580, "total_steps": 204665, "loss": 0.0, "lr": 9.735072271030165e-08, "epoch": 4.362739110253341, "percentage": 87.25, "elapsed_time": "3:51:13", "remaining_time": "0:33:46", "throughput": 8671.9, "total_tokens": 120311144} +{"current_steps": 178585, "total_steps": 204665, "loss": 0.0, "lr": 9.731402448298942e-08, "epoch": 4.362861261085188, "percentage": 87.26, "elapsed_time": "3:51:14", "remaining_time": "0:33:46", "throughput": 8671.91, "total_tokens": 120314216} +{"current_steps": 178590, "total_steps": 204665, "loss": 0.0, "lr": 9.727733282025242e-08, "epoch": 4.362983411917035, "percentage": 87.26, "elapsed_time": "3:51:14", "remaining_time": "0:33:45", "throughput": 8671.95, "total_tokens": 120317800} +{"current_steps": 178595, "total_steps": 204665, "loss": 0.0, "lr": 9.724064772235729e-08, "epoch": 4.363105562748882, "percentage": 87.26, "elapsed_time": "3:51:14", "remaining_time": "0:33:45", "throughput": 8672.0, "total_tokens": 120321576} +{"current_steps": 178600, "total_steps": 204665, "loss": 0.0, "lr": 9.720396918957118e-08, "epoch": 4.36322771358073, "percentage": 87.26, "elapsed_time": "3:51:15", "remaining_time": "0:33:44", "throughput": 8672.03, "total_tokens": 120325096} +{"current_steps": 178605, "total_steps": 204665, "loss": 0.0, "lr": 9.716729722216055e-08, "epoch": 4.3633498644125766, "percentage": 87.27, "elapsed_time": "3:51:15", "remaining_time": "0:33:44", "throughput": 8672.05, "total_tokens": 120328360} +{"current_steps": 178610, "total_steps": 204665, "loss": 0.0, "lr": 9.713063182039216e-08, "epoch": 4.363472015244424, "percentage": 87.27, "elapsed_time": "3:51:15", "remaining_time": "0:33:44", "throughput": 8672.06, "total_tokens": 120331496} +{"current_steps": 178615, "total_steps": 204665, "loss": 0.0, "lr": 9.709397298453259e-08, "epoch": 4.363594166076271, "percentage": 87.27, "elapsed_time": "3:51:16", "remaining_time": "0:33:43", "throughput": 8672.07, "total_tokens": 120334632} +{"current_steps": 178620, "total_steps": 204665, "loss": 0.0, "lr": 9.705732071484851e-08, "epoch": 4.363716316908118, "percentage": 87.27, "elapsed_time": "3:51:16", "remaining_time": "0:33:43", "throughput": 8672.09, "total_tokens": 120337832} +{"current_steps": 178625, "total_steps": 204665, "loss": 0.0, "lr": 9.70206750116066e-08, "epoch": 4.363838467739965, "percentage": 87.28, "elapsed_time": "3:51:16", "remaining_time": "0:33:42", "throughput": 8672.12, "total_tokens": 120341288} +{"current_steps": 178630, "total_steps": 204665, "loss": 0.0, "lr": 9.698403587507298e-08, "epoch": 4.363960618571813, "percentage": 87.28, "elapsed_time": "3:51:17", "remaining_time": "0:33:42", "throughput": 8672.14, "total_tokens": 120344616} +{"current_steps": 178635, "total_steps": 204665, "loss": 0.083, "lr": 9.694740330551465e-08, "epoch": 4.36408276940366, "percentage": 87.28, "elapsed_time": "3:51:17", "remaining_time": "0:33:42", "throughput": 8672.14, "total_tokens": 120347624} +{"current_steps": 178640, "total_steps": 204665, "loss": 0.0, "lr": 9.691077730319741e-08, "epoch": 4.364204920235506, "percentage": 87.28, "elapsed_time": "3:51:17", "remaining_time": "0:33:41", "throughput": 8672.19, "total_tokens": 120351336} +{"current_steps": 178645, "total_steps": 204665, "loss": 0.0, "lr": 9.687415786838804e-08, "epoch": 4.364327071067354, "percentage": 87.29, "elapsed_time": "3:51:18", "remaining_time": "0:33:41", "throughput": 8672.21, "total_tokens": 120354664} +{"current_steps": 178650, "total_steps": 204665, "loss": 0.0, "lr": 9.683754500135266e-08, "epoch": 4.364449221899201, "percentage": 87.29, "elapsed_time": "3:51:18", "remaining_time": "0:33:40", "throughput": 8672.24, "total_tokens": 120358184} +{"current_steps": 178655, "total_steps": 204665, "loss": 0.0, "lr": 9.68009387023575e-08, "epoch": 4.3645713727310484, "percentage": 87.29, "elapsed_time": "3:51:18", "remaining_time": "0:33:40", "throughput": 8672.25, "total_tokens": 120361192} +{"current_steps": 178660, "total_steps": 204665, "loss": 0.0, "lr": 9.676433897166903e-08, "epoch": 4.364693523562895, "percentage": 87.29, "elapsed_time": "3:51:19", "remaining_time": "0:33:40", "throughput": 8672.26, "total_tokens": 120364328} +{"current_steps": 178665, "total_steps": 204665, "loss": 0.0, "lr": 9.6727745809553e-08, "epoch": 4.364815674394743, "percentage": 87.3, "elapsed_time": "3:51:19", "remaining_time": "0:33:39", "throughput": 8672.27, "total_tokens": 120367400} +{"current_steps": 178670, "total_steps": 204665, "loss": 0.0, "lr": 9.669115921627602e-08, "epoch": 4.36493782522659, "percentage": 87.3, "elapsed_time": "3:51:19", "remaining_time": "0:33:39", "throughput": 8672.27, "total_tokens": 120370408} +{"current_steps": 178675, "total_steps": 204665, "loss": 0.0, "lr": 9.665457919210363e-08, "epoch": 4.365059976058437, "percentage": 87.3, "elapsed_time": "3:51:20", "remaining_time": "0:33:39", "throughput": 8672.29, "total_tokens": 120373672} +{"current_steps": 178680, "total_steps": 204665, "loss": 0.0, "lr": 9.661800573730239e-08, "epoch": 4.365182126890284, "percentage": 87.3, "elapsed_time": "3:51:20", "remaining_time": "0:33:38", "throughput": 8672.29, "total_tokens": 120376744} +{"current_steps": 178685, "total_steps": 204665, "loss": 0.0, "lr": 9.658143885213776e-08, "epoch": 4.365304277722132, "percentage": 87.31, "elapsed_time": "3:51:20", "remaining_time": "0:33:38", "throughput": 8672.32, "total_tokens": 120380072} +{"current_steps": 178690, "total_steps": 204665, "loss": 0.0, "lr": 9.65448785368761e-08, "epoch": 4.365426428553978, "percentage": 87.31, "elapsed_time": "3:51:21", "remaining_time": "0:33:37", "throughput": 8672.33, "total_tokens": 120383272} +{"current_steps": 178695, "total_steps": 204665, "loss": 0.0, "lr": 9.650832479178283e-08, "epoch": 4.365548579385826, "percentage": 87.31, "elapsed_time": "3:51:21", "remaining_time": "0:33:37", "throughput": 8672.35, "total_tokens": 120386536} +{"current_steps": 178700, "total_steps": 204665, "loss": 0.0, "lr": 9.647177761712421e-08, "epoch": 4.365670730217673, "percentage": 87.31, "elapsed_time": "3:51:21", "remaining_time": "0:33:37", "throughput": 8672.38, "total_tokens": 120389928} +{"current_steps": 178705, "total_steps": 204665, "loss": 0.0, "lr": 9.643523701316591e-08, "epoch": 4.36579288104952, "percentage": 87.32, "elapsed_time": "3:51:22", "remaining_time": "0:33:36", "throughput": 8672.41, "total_tokens": 120393384} +{"current_steps": 178710, "total_steps": 204665, "loss": 0.0, "lr": 9.639870298017339e-08, "epoch": 4.365915031881367, "percentage": 87.32, "elapsed_time": "3:51:22", "remaining_time": "0:33:36", "throughput": 8672.42, "total_tokens": 120396456} +{"current_steps": 178715, "total_steps": 204665, "loss": 0.0, "lr": 9.636217551841253e-08, "epoch": 4.366037182713214, "percentage": 87.32, "elapsed_time": "3:51:23", "remaining_time": "0:33:35", "throughput": 8672.43, "total_tokens": 120399656} +{"current_steps": 178720, "total_steps": 204665, "loss": 0.0, "lr": 9.632565462814923e-08, "epoch": 4.3661593335450615, "percentage": 87.32, "elapsed_time": "3:51:23", "remaining_time": "0:33:35", "throughput": 8672.44, "total_tokens": 120402728} +{"current_steps": 178725, "total_steps": 204665, "loss": 0.0, "lr": 9.628914030964863e-08, "epoch": 4.366281484376908, "percentage": 87.33, "elapsed_time": "3:51:23", "remaining_time": "0:33:35", "throughput": 8672.44, "total_tokens": 120405736} +{"current_steps": 178730, "total_steps": 204665, "loss": 0.0, "lr": 9.625263256317661e-08, "epoch": 4.366403635208756, "percentage": 87.33, "elapsed_time": "3:51:24", "remaining_time": "0:33:34", "throughput": 8672.47, "total_tokens": 120409192} +{"current_steps": 178735, "total_steps": 204665, "loss": 0.0, "lr": 9.621613138899831e-08, "epoch": 4.366525786040603, "percentage": 87.33, "elapsed_time": "3:51:24", "remaining_time": "0:33:34", "throughput": 8672.5, "total_tokens": 120412520} +{"current_steps": 178740, "total_steps": 204665, "loss": 0.0, "lr": 9.617963678737961e-08, "epoch": 4.36664793687245, "percentage": 87.33, "elapsed_time": "3:51:24", "remaining_time": "0:33:33", "throughput": 8672.51, "total_tokens": 120415656} +{"current_steps": 178745, "total_steps": 204665, "loss": 0.0, "lr": 9.614314875858554e-08, "epoch": 4.366770087704297, "percentage": 87.34, "elapsed_time": "3:51:25", "remaining_time": "0:33:33", "throughput": 8672.52, "total_tokens": 120418856} +{"current_steps": 178750, "total_steps": 204665, "loss": 0.0, "lr": 9.610666730288152e-08, "epoch": 4.366892238536145, "percentage": 87.34, "elapsed_time": "3:51:25", "remaining_time": "0:33:33", "throughput": 8672.54, "total_tokens": 120422056} +{"current_steps": 178755, "total_steps": 204665, "loss": 0.0, "lr": 9.607019242053315e-08, "epoch": 4.367014389367991, "percentage": 87.34, "elapsed_time": "3:51:25", "remaining_time": "0:33:32", "throughput": 8672.56, "total_tokens": 120425384} +{"current_steps": 178760, "total_steps": 204665, "loss": 0.0, "lr": 9.603372411180532e-08, "epoch": 4.367136540199839, "percentage": 87.34, "elapsed_time": "3:51:26", "remaining_time": "0:33:32", "throughput": 8672.61, "total_tokens": 120429160} +{"current_steps": 178765, "total_steps": 204665, "loss": 0.0, "lr": 9.599726237696359e-08, "epoch": 4.367258691031686, "percentage": 87.35, "elapsed_time": "3:51:26", "remaining_time": "0:33:31", "throughput": 8672.63, "total_tokens": 120432488} +{"current_steps": 178770, "total_steps": 204665, "loss": 0.0, "lr": 9.596080721627264e-08, "epoch": 4.367380841863533, "percentage": 87.35, "elapsed_time": "3:51:26", "remaining_time": "0:33:31", "throughput": 8672.64, "total_tokens": 120435624} +{"current_steps": 178775, "total_steps": 204665, "loss": 0.0, "lr": 9.592435862999793e-08, "epoch": 4.36750299269538, "percentage": 87.35, "elapsed_time": "3:51:27", "remaining_time": "0:33:31", "throughput": 8672.69, "total_tokens": 120439272} +{"current_steps": 178780, "total_steps": 204665, "loss": 0.0, "lr": 9.588791661840468e-08, "epoch": 4.367625143527228, "percentage": 87.35, "elapsed_time": "3:51:27", "remaining_time": "0:33:30", "throughput": 8672.69, "total_tokens": 120442280} +{"current_steps": 178785, "total_steps": 204665, "loss": 0.0, "lr": 9.585148118175746e-08, "epoch": 4.3677472943590745, "percentage": 87.35, "elapsed_time": "3:51:27", "remaining_time": "0:33:30", "throughput": 8672.75, "total_tokens": 120446248} +{"current_steps": 178790, "total_steps": 204665, "loss": 0.0, "lr": 9.581505232032161e-08, "epoch": 4.367869445190922, "percentage": 87.36, "elapsed_time": "3:51:28", "remaining_time": "0:33:29", "throughput": 8672.74, "total_tokens": 120449064} +{"current_steps": 178795, "total_steps": 204665, "loss": 0.0, "lr": 9.577863003436182e-08, "epoch": 4.367991596022769, "percentage": 87.36, "elapsed_time": "3:51:28", "remaining_time": "0:33:29", "throughput": 8672.76, "total_tokens": 120452328} +{"current_steps": 178800, "total_steps": 204665, "loss": 0.0, "lr": 9.574221432414297e-08, "epoch": 4.3681137468546165, "percentage": 87.36, "elapsed_time": "3:51:28", "remaining_time": "0:33:29", "throughput": 8672.76, "total_tokens": 120455208} +{"current_steps": 178805, "total_steps": 204665, "loss": 0.0, "lr": 9.57058051899301e-08, "epoch": 4.368235897686463, "percentage": 87.36, "elapsed_time": "3:51:29", "remaining_time": "0:33:28", "throughput": 8672.77, "total_tokens": 120458472} +{"current_steps": 178810, "total_steps": 204665, "loss": 0.0, "lr": 9.566940263198764e-08, "epoch": 4.36835804851831, "percentage": 87.37, "elapsed_time": "3:51:29", "remaining_time": "0:33:28", "throughput": 8672.81, "total_tokens": 120462056} +{"current_steps": 178815, "total_steps": 204665, "loss": 0.0, "lr": 9.563300665058072e-08, "epoch": 4.368480199350158, "percentage": 87.37, "elapsed_time": "3:51:29", "remaining_time": "0:33:27", "throughput": 8672.85, "total_tokens": 120465640} +{"current_steps": 178820, "total_steps": 204665, "loss": 0.0, "lr": 9.559661724597368e-08, "epoch": 4.368602350182004, "percentage": 87.37, "elapsed_time": "3:51:30", "remaining_time": "0:33:27", "throughput": 8672.85, "total_tokens": 120468520} +{"current_steps": 178825, "total_steps": 204665, "loss": 0.0, "lr": 9.556023441843142e-08, "epoch": 4.368724501013852, "percentage": 87.37, "elapsed_time": "3:51:30", "remaining_time": "0:33:27", "throughput": 8672.87, "total_tokens": 120471848} +{"current_steps": 178830, "total_steps": 204665, "loss": 0.0003, "lr": 9.552385816821818e-08, "epoch": 4.368846651845699, "percentage": 87.38, "elapsed_time": "3:51:31", "remaining_time": "0:33:26", "throughput": 8672.9, "total_tokens": 120475240} +{"current_steps": 178835, "total_steps": 204665, "loss": 0.0, "lr": 9.548748849559896e-08, "epoch": 4.368968802677546, "percentage": 87.38, "elapsed_time": "3:51:31", "remaining_time": "0:33:26", "throughput": 8672.96, "total_tokens": 120479208} +{"current_steps": 178840, "total_steps": 204665, "loss": 0.0, "lr": 9.545112540083788e-08, "epoch": 4.369090953509393, "percentage": 87.38, "elapsed_time": "3:51:31", "remaining_time": "0:33:26", "throughput": 8672.96, "total_tokens": 120482152} +{"current_steps": 178845, "total_steps": 204665, "loss": 0.0, "lr": 9.541476888419942e-08, "epoch": 4.369213104341241, "percentage": 87.38, "elapsed_time": "3:51:32", "remaining_time": "0:33:25", "throughput": 8672.97, "total_tokens": 120485416} +{"current_steps": 178850, "total_steps": 204665, "loss": 0.0, "lr": 9.537841894594823e-08, "epoch": 4.3693352551730875, "percentage": 87.39, "elapsed_time": "3:51:32", "remaining_time": "0:33:25", "throughput": 8673.01, "total_tokens": 120488936} +{"current_steps": 178855, "total_steps": 204665, "loss": 0.0, "lr": 9.534207558634833e-08, "epoch": 4.369457406004935, "percentage": 87.39, "elapsed_time": "3:51:32", "remaining_time": "0:33:24", "throughput": 8673.05, "total_tokens": 120492584} +{"current_steps": 178860, "total_steps": 204665, "loss": 0.0931, "lr": 9.53057388056644e-08, "epoch": 4.369579556836782, "percentage": 87.39, "elapsed_time": "3:51:33", "remaining_time": "0:33:24", "throughput": 8673.04, "total_tokens": 120495272} +{"current_steps": 178865, "total_steps": 204665, "loss": 0.0, "lr": 9.526940860416033e-08, "epoch": 4.3697017076686295, "percentage": 87.39, "elapsed_time": "3:51:33", "remaining_time": "0:33:24", "throughput": 8673.12, "total_tokens": 120499560} +{"current_steps": 178870, "total_steps": 204665, "loss": 0.0, "lr": 9.523308498210036e-08, "epoch": 4.369823858500476, "percentage": 87.4, "elapsed_time": "3:51:33", "remaining_time": "0:33:23", "throughput": 8673.14, "total_tokens": 120502824} +{"current_steps": 178875, "total_steps": 204665, "loss": 0.0, "lr": 9.519676793974907e-08, "epoch": 4.369946009332324, "percentage": 87.4, "elapsed_time": "3:51:34", "remaining_time": "0:33:23", "throughput": 8673.14, "total_tokens": 120505896} +{"current_steps": 178880, "total_steps": 204665, "loss": 0.0, "lr": 9.516045747736989e-08, "epoch": 4.370068160164171, "percentage": 87.4, "elapsed_time": "3:51:34", "remaining_time": "0:33:22", "throughput": 8673.14, "total_tokens": 120508840} +{"current_steps": 178885, "total_steps": 204665, "loss": 0.0, "lr": 9.512415359522752e-08, "epoch": 4.370190310996018, "percentage": 87.4, "elapsed_time": "3:51:34", "remaining_time": "0:33:22", "throughput": 8673.16, "total_tokens": 120512040} +{"current_steps": 178890, "total_steps": 204665, "loss": 0.0, "lr": 9.508785629358552e-08, "epoch": 4.370312461827865, "percentage": 87.41, "elapsed_time": "3:51:35", "remaining_time": "0:33:22", "throughput": 8673.19, "total_tokens": 120515560} +{"current_steps": 178895, "total_steps": 204665, "loss": 0.0, "lr": 9.50515655727081e-08, "epoch": 4.370434612659713, "percentage": 87.41, "elapsed_time": "3:51:35", "remaining_time": "0:33:21", "throughput": 8673.23, "total_tokens": 120519208} +{"current_steps": 178900, "total_steps": 204665, "loss": 0.0, "lr": 9.50152814328592e-08, "epoch": 4.370556763491559, "percentage": 87.41, "elapsed_time": "3:51:35", "remaining_time": "0:33:21", "throughput": 8673.24, "total_tokens": 120522344} +{"current_steps": 178905, "total_steps": 204665, "loss": 0.0, "lr": 9.497900387430236e-08, "epoch": 4.370678914323406, "percentage": 87.41, "elapsed_time": "3:51:36", "remaining_time": "0:33:20", "throughput": 8673.28, "total_tokens": 120525864} +{"current_steps": 178910, "total_steps": 204665, "loss": 0.0, "lr": 9.494273289730181e-08, "epoch": 4.370801065155254, "percentage": 87.42, "elapsed_time": "3:51:36", "remaining_time": "0:33:20", "throughput": 8673.31, "total_tokens": 120529320} +{"current_steps": 178915, "total_steps": 204665, "loss": 0.0, "lr": 9.490646850212103e-08, "epoch": 4.3709232159871005, "percentage": 87.42, "elapsed_time": "3:51:36", "remaining_time": "0:33:20", "throughput": 8673.31, "total_tokens": 120532328} +{"current_steps": 178920, "total_steps": 204665, "loss": 0.0, "lr": 9.487021068902402e-08, "epoch": 4.371045366818948, "percentage": 87.42, "elapsed_time": "3:51:37", "remaining_time": "0:33:19", "throughput": 8673.35, "total_tokens": 120535976} +{"current_steps": 178925, "total_steps": 204665, "loss": 0.0, "lr": 9.483395945827399e-08, "epoch": 4.371167517650795, "percentage": 87.42, "elapsed_time": "3:51:37", "remaining_time": "0:33:19", "throughput": 8673.37, "total_tokens": 120539176} +{"current_steps": 178930, "total_steps": 204665, "loss": 0.0, "lr": 9.479771481013488e-08, "epoch": 4.3712896684826426, "percentage": 87.43, "elapsed_time": "3:51:37", "remaining_time": "0:33:18", "throughput": 8673.4, "total_tokens": 120542696} +{"current_steps": 178935, "total_steps": 204665, "loss": 0.0, "lr": 9.476147674487056e-08, "epoch": 4.371411819314489, "percentage": 87.43, "elapsed_time": "3:51:38", "remaining_time": "0:33:18", "throughput": 8673.43, "total_tokens": 120546152} +{"current_steps": 178940, "total_steps": 204665, "loss": 0.0, "lr": 9.472524526274394e-08, "epoch": 4.371533970146337, "percentage": 87.43, "elapsed_time": "3:51:38", "remaining_time": "0:33:18", "throughput": 8673.47, "total_tokens": 120549800} +{"current_steps": 178945, "total_steps": 204665, "loss": 0.0, "lr": 9.468902036401916e-08, "epoch": 4.371656120978184, "percentage": 87.43, "elapsed_time": "3:51:39", "remaining_time": "0:33:17", "throughput": 8673.51, "total_tokens": 120553448} +{"current_steps": 178950, "total_steps": 204665, "loss": 0.0, "lr": 9.465280204895909e-08, "epoch": 4.371778271810031, "percentage": 87.44, "elapsed_time": "3:51:39", "remaining_time": "0:33:17", "throughput": 8673.53, "total_tokens": 120556712} +{"current_steps": 178955, "total_steps": 204665, "loss": 0.0, "lr": 9.461659031782742e-08, "epoch": 4.371900422641878, "percentage": 87.44, "elapsed_time": "3:51:39", "remaining_time": "0:33:16", "throughput": 8673.56, "total_tokens": 120560168} +{"current_steps": 178960, "total_steps": 204665, "loss": 0.0, "lr": 9.45803851708874e-08, "epoch": 4.372022573473726, "percentage": 87.44, "elapsed_time": "3:51:40", "remaining_time": "0:33:16", "throughput": 8673.57, "total_tokens": 120563304} +{"current_steps": 178965, "total_steps": 204665, "loss": 0.0, "lr": 9.454418660840225e-08, "epoch": 4.372144724305572, "percentage": 87.44, "elapsed_time": "3:51:40", "remaining_time": "0:33:16", "throughput": 8673.59, "total_tokens": 120566504} +{"current_steps": 178970, "total_steps": 204665, "loss": 0.0, "lr": 9.450799463063552e-08, "epoch": 4.37226687513742, "percentage": 87.45, "elapsed_time": "3:51:40", "remaining_time": "0:33:15", "throughput": 8673.63, "total_tokens": 120570152} +{"current_steps": 178975, "total_steps": 204665, "loss": 0.0, "lr": 9.447180923785004e-08, "epoch": 4.372389025969267, "percentage": 87.45, "elapsed_time": "3:51:41", "remaining_time": "0:33:15", "throughput": 8673.67, "total_tokens": 120573800} +{"current_steps": 178980, "total_steps": 204665, "loss": 0.0, "lr": 9.443563043030922e-08, "epoch": 4.372511176801114, "percentage": 87.45, "elapsed_time": "3:51:41", "remaining_time": "0:33:14", "throughput": 8673.72, "total_tokens": 120577576} +{"current_steps": 178985, "total_steps": 204665, "loss": 0.0, "lr": 9.4399458208276e-08, "epoch": 4.372633327632961, "percentage": 87.45, "elapsed_time": "3:51:41", "remaining_time": "0:33:14", "throughput": 8673.76, "total_tokens": 120581288} +{"current_steps": 178990, "total_steps": 204665, "loss": 0.0, "lr": 9.436329257201359e-08, "epoch": 4.372755478464808, "percentage": 87.46, "elapsed_time": "3:51:42", "remaining_time": "0:33:14", "throughput": 8673.78, "total_tokens": 120584488} +{"current_steps": 178995, "total_steps": 204665, "loss": 0.0, "lr": 9.432713352178479e-08, "epoch": 4.372877629296656, "percentage": 87.46, "elapsed_time": "3:51:42", "remaining_time": "0:33:13", "throughput": 8673.63, "total_tokens": 120587688} +{"current_steps": 179000, "total_steps": 204665, "loss": 0.0, "lr": 9.429098105785283e-08, "epoch": 4.372999780128502, "percentage": 87.46, "elapsed_time": "3:51:43", "remaining_time": "0:33:13", "throughput": 8673.64, "total_tokens": 120590888} +{"current_steps": 179005, "total_steps": 204665, "loss": 0.0, "lr": 9.425483518048028e-08, "epoch": 4.37312193096035, "percentage": 87.46, "elapsed_time": "3:51:43", "remaining_time": "0:33:13", "throughput": 8673.66, "total_tokens": 120594152} +{"current_steps": 179010, "total_steps": 204665, "loss": 0.0, "lr": 9.421869588993025e-08, "epoch": 4.373244081792197, "percentage": 87.46, "elapsed_time": "3:51:43", "remaining_time": "0:33:12", "throughput": 8673.67, "total_tokens": 120597352} +{"current_steps": 179015, "total_steps": 204665, "loss": 0.0, "lr": 9.418256318646567e-08, "epoch": 4.373366232624044, "percentage": 87.47, "elapsed_time": "3:51:44", "remaining_time": "0:33:12", "throughput": 8673.69, "total_tokens": 120600616} +{"current_steps": 179020, "total_steps": 204665, "loss": 0.0, "lr": 9.414643707034886e-08, "epoch": 4.373488383455891, "percentage": 87.47, "elapsed_time": "3:51:44", "remaining_time": "0:33:11", "throughput": 8673.71, "total_tokens": 120603880} +{"current_steps": 179025, "total_steps": 204665, "loss": 0.0, "lr": 9.411031754184285e-08, "epoch": 4.373610534287739, "percentage": 87.47, "elapsed_time": "3:51:44", "remaining_time": "0:33:11", "throughput": 8673.76, "total_tokens": 120607656} +{"current_steps": 179030, "total_steps": 204665, "loss": 0.0, "lr": 9.40742046012104e-08, "epoch": 4.3737326851195855, "percentage": 87.47, "elapsed_time": "3:51:45", "remaining_time": "0:33:11", "throughput": 8673.78, "total_tokens": 120610920} +{"current_steps": 179035, "total_steps": 204665, "loss": 0.0, "lr": 9.403809824871378e-08, "epoch": 4.373854835951433, "percentage": 87.48, "elapsed_time": "3:51:45", "remaining_time": "0:33:10", "throughput": 8673.81, "total_tokens": 120614440} +{"current_steps": 179040, "total_steps": 204665, "loss": 0.0, "lr": 9.400199848461598e-08, "epoch": 4.37397698678328, "percentage": 87.48, "elapsed_time": "3:51:45", "remaining_time": "0:33:10", "throughput": 8673.84, "total_tokens": 120617832} +{"current_steps": 179045, "total_steps": 204665, "loss": 0.0, "lr": 9.396590530917925e-08, "epoch": 4.3740991376151275, "percentage": 87.48, "elapsed_time": "3:51:46", "remaining_time": "0:33:09", "throughput": 8673.87, "total_tokens": 120621416} +{"current_steps": 179050, "total_steps": 204665, "loss": 0.0, "lr": 9.392981872266626e-08, "epoch": 4.374221288446974, "percentage": 87.48, "elapsed_time": "3:51:46", "remaining_time": "0:33:09", "throughput": 8673.93, "total_tokens": 120625192} +{"current_steps": 179055, "total_steps": 204665, "loss": 0.0, "lr": 9.389373872533912e-08, "epoch": 4.374343439278822, "percentage": 87.49, "elapsed_time": "3:51:46", "remaining_time": "0:33:09", "throughput": 8673.94, "total_tokens": 120628328} +{"current_steps": 179060, "total_steps": 204665, "loss": 0.0001, "lr": 9.385766531746053e-08, "epoch": 4.374465590110669, "percentage": 87.49, "elapsed_time": "3:51:47", "remaining_time": "0:33:08", "throughput": 8673.95, "total_tokens": 120631528} +{"current_steps": 179065, "total_steps": 204665, "loss": 0.0302, "lr": 9.382159849929284e-08, "epoch": 4.374587740942516, "percentage": 87.49, "elapsed_time": "3:51:47", "remaining_time": "0:33:08", "throughput": 8673.98, "total_tokens": 120634856} +{"current_steps": 179070, "total_steps": 204665, "loss": 0.0, "lr": 9.378553827109803e-08, "epoch": 4.374709891774363, "percentage": 87.49, "elapsed_time": "3:51:48", "remaining_time": "0:33:07", "throughput": 8673.98, "total_tokens": 120637928} +{"current_steps": 179075, "total_steps": 204665, "loss": 0.0, "lr": 9.37494846331387e-08, "epoch": 4.37483204260621, "percentage": 87.5, "elapsed_time": "3:51:48", "remaining_time": "0:33:07", "throughput": 8673.99, "total_tokens": 120641064} +{"current_steps": 179080, "total_steps": 204665, "loss": 0.0, "lr": 9.371343758567663e-08, "epoch": 4.374954193438057, "percentage": 87.5, "elapsed_time": "3:51:48", "remaining_time": "0:33:07", "throughput": 8674.0, "total_tokens": 120644136} +{"current_steps": 179085, "total_steps": 204665, "loss": 0.0, "lr": 9.367739712897426e-08, "epoch": 4.375076344269904, "percentage": 87.5, "elapsed_time": "3:51:49", "remaining_time": "0:33:06", "throughput": 8674.03, "total_tokens": 120647592} +{"current_steps": 179090, "total_steps": 204665, "loss": 0.0, "lr": 9.364136326329386e-08, "epoch": 4.375198495101752, "percentage": 87.5, "elapsed_time": "3:51:49", "remaining_time": "0:33:06", "throughput": 8674.05, "total_tokens": 120650792} +{"current_steps": 179095, "total_steps": 204665, "loss": 0.0913, "lr": 9.360533598889708e-08, "epoch": 4.3753206459335985, "percentage": 87.51, "elapsed_time": "3:51:49", "remaining_time": "0:33:05", "throughput": 8674.07, "total_tokens": 120654120} +{"current_steps": 179100, "total_steps": 204665, "loss": 0.0555, "lr": 9.356931530604617e-08, "epoch": 4.375442796765446, "percentage": 87.51, "elapsed_time": "3:51:50", "remaining_time": "0:33:05", "throughput": 8674.08, "total_tokens": 120657256} +{"current_steps": 179105, "total_steps": 204665, "loss": 0.0, "lr": 9.353330121500302e-08, "epoch": 4.375564947597293, "percentage": 87.51, "elapsed_time": "3:51:50", "remaining_time": "0:33:05", "throughput": 8674.09, "total_tokens": 120660392} +{"current_steps": 179110, "total_steps": 204665, "loss": 0.0, "lr": 9.349729371602944e-08, "epoch": 4.3756870984291405, "percentage": 87.51, "elapsed_time": "3:51:50", "remaining_time": "0:33:04", "throughput": 8674.12, "total_tokens": 120663720} +{"current_steps": 179115, "total_steps": 204665, "loss": 0.0, "lr": 9.346129280938742e-08, "epoch": 4.375809249260987, "percentage": 87.52, "elapsed_time": "3:51:51", "remaining_time": "0:33:04", "throughput": 8674.15, "total_tokens": 120667176} +{"current_steps": 179120, "total_steps": 204665, "loss": 0.0, "lr": 9.342529849533853e-08, "epoch": 4.375931400092835, "percentage": 87.52, "elapsed_time": "3:51:51", "remaining_time": "0:33:03", "throughput": 8674.19, "total_tokens": 120670888} +{"current_steps": 179125, "total_steps": 204665, "loss": 0.0, "lr": 9.338931077414492e-08, "epoch": 4.376053550924682, "percentage": 87.52, "elapsed_time": "3:51:51", "remaining_time": "0:33:03", "throughput": 8674.22, "total_tokens": 120674280} +{"current_steps": 179130, "total_steps": 204665, "loss": 0.0, "lr": 9.335332964606802e-08, "epoch": 4.376175701756529, "percentage": 87.52, "elapsed_time": "3:51:52", "remaining_time": "0:33:03", "throughput": 8674.24, "total_tokens": 120677608} +{"current_steps": 179135, "total_steps": 204665, "loss": 0.0, "lr": 9.331735511136962e-08, "epoch": 4.376297852588376, "percentage": 87.53, "elapsed_time": "3:51:52", "remaining_time": "0:33:02", "throughput": 8674.27, "total_tokens": 120681064} +{"current_steps": 179140, "total_steps": 204665, "loss": 0.0, "lr": 9.328138717031109e-08, "epoch": 4.376420003420224, "percentage": 87.53, "elapsed_time": "3:51:52", "remaining_time": "0:33:02", "throughput": 8674.29, "total_tokens": 120684328} +{"current_steps": 179145, "total_steps": 204665, "loss": 0.0489, "lr": 9.324542582315442e-08, "epoch": 4.37654215425207, "percentage": 87.53, "elapsed_time": "3:51:53", "remaining_time": "0:33:02", "throughput": 8674.29, "total_tokens": 120687400} +{"current_steps": 179150, "total_steps": 204665, "loss": 0.0, "lr": 9.320947107016074e-08, "epoch": 4.376664305083918, "percentage": 87.53, "elapsed_time": "3:51:53", "remaining_time": "0:33:01", "throughput": 8674.31, "total_tokens": 120690600} +{"current_steps": 179155, "total_steps": 204665, "loss": 0.0, "lr": 9.317352291159164e-08, "epoch": 4.376786455915765, "percentage": 87.54, "elapsed_time": "3:51:53", "remaining_time": "0:33:01", "throughput": 8674.35, "total_tokens": 120694312} +{"current_steps": 179160, "total_steps": 204665, "loss": 0.0, "lr": 9.313758134770877e-08, "epoch": 4.376908606747612, "percentage": 87.54, "elapsed_time": "3:51:54", "remaining_time": "0:33:00", "throughput": 8674.37, "total_tokens": 120697576} +{"current_steps": 179165, "total_steps": 204665, "loss": 0.0, "lr": 9.310164637877316e-08, "epoch": 4.377030757579459, "percentage": 87.54, "elapsed_time": "3:51:54", "remaining_time": "0:33:00", "throughput": 8674.41, "total_tokens": 120701160} +{"current_steps": 179170, "total_steps": 204665, "loss": 0.0, "lr": 9.306571800504648e-08, "epoch": 4.377152908411306, "percentage": 87.54, "elapsed_time": "3:51:54", "remaining_time": "0:33:00", "throughput": 8674.46, "total_tokens": 120705000} +{"current_steps": 179175, "total_steps": 204665, "loss": 0.0001, "lr": 9.302979622678964e-08, "epoch": 4.3772750592431535, "percentage": 87.55, "elapsed_time": "3:51:55", "remaining_time": "0:32:59", "throughput": 8674.48, "total_tokens": 120708264} +{"current_steps": 179180, "total_steps": 204665, "loss": 0.0, "lr": 9.299388104426409e-08, "epoch": 4.377397210075, "percentage": 87.55, "elapsed_time": "3:51:55", "remaining_time": "0:32:59", "throughput": 8674.49, "total_tokens": 120711400} +{"current_steps": 179185, "total_steps": 204665, "loss": 0.0, "lr": 9.295797245773119e-08, "epoch": 4.377519360906848, "percentage": 87.55, "elapsed_time": "3:51:56", "remaining_time": "0:32:58", "throughput": 8674.49, "total_tokens": 120714344} +{"current_steps": 179190, "total_steps": 204665, "loss": 0.0, "lr": 9.29220704674516e-08, "epoch": 4.377641511738695, "percentage": 87.55, "elapsed_time": "3:51:56", "remaining_time": "0:32:58", "throughput": 8674.49, "total_tokens": 120717352} +{"current_steps": 179195, "total_steps": 204665, "loss": 0.0, "lr": 9.288617507368701e-08, "epoch": 4.377763662570542, "percentage": 87.56, "elapsed_time": "3:51:56", "remaining_time": "0:32:58", "throughput": 8674.51, "total_tokens": 120720488} +{"current_steps": 179200, "total_steps": 204665, "loss": 0.0, "lr": 9.285028627669789e-08, "epoch": 4.377885813402389, "percentage": 87.56, "elapsed_time": "3:51:57", "remaining_time": "0:32:57", "throughput": 8674.53, "total_tokens": 120723752} +{"current_steps": 179205, "total_steps": 204665, "loss": 0.0, "lr": 9.281440407674556e-08, "epoch": 4.378007964234237, "percentage": 87.56, "elapsed_time": "3:51:57", "remaining_time": "0:32:57", "throughput": 8674.54, "total_tokens": 120726888} +{"current_steps": 179210, "total_steps": 204665, "loss": 0.0, "lr": 9.277852847409107e-08, "epoch": 4.378130115066083, "percentage": 87.56, "elapsed_time": "3:51:57", "remaining_time": "0:32:56", "throughput": 8674.58, "total_tokens": 120730536} +{"current_steps": 179215, "total_steps": 204665, "loss": 0.0, "lr": 9.274265946899484e-08, "epoch": 4.378252265897931, "percentage": 87.57, "elapsed_time": "3:51:58", "remaining_time": "0:32:56", "throughput": 8674.64, "total_tokens": 120734504} +{"current_steps": 179220, "total_steps": 204665, "loss": 0.0, "lr": 9.270679706171825e-08, "epoch": 4.378374416729778, "percentage": 87.57, "elapsed_time": "3:51:58", "remaining_time": "0:32:56", "throughput": 8674.64, "total_tokens": 120737448} +{"current_steps": 179225, "total_steps": 204665, "loss": 0.0001, "lr": 9.267094125252161e-08, "epoch": 4.378496567561625, "percentage": 87.57, "elapsed_time": "3:51:58", "remaining_time": "0:32:55", "throughput": 8674.64, "total_tokens": 120740328} +{"current_steps": 179230, "total_steps": 204665, "loss": 0.0, "lr": 9.26350920416662e-08, "epoch": 4.378618718393472, "percentage": 87.57, "elapsed_time": "3:51:59", "remaining_time": "0:32:55", "throughput": 8674.66, "total_tokens": 120743592} +{"current_steps": 179235, "total_steps": 204665, "loss": 0.0, "lr": 9.259924942941222e-08, "epoch": 4.37874086922532, "percentage": 87.57, "elapsed_time": "3:51:59", "remaining_time": "0:32:54", "throughput": 8674.68, "total_tokens": 120746856} +{"current_steps": 179240, "total_steps": 204665, "loss": 0.0003, "lr": 9.256341341602059e-08, "epoch": 4.3788630200571665, "percentage": 87.58, "elapsed_time": "3:51:59", "remaining_time": "0:32:54", "throughput": 8674.72, "total_tokens": 120750504} +{"current_steps": 179245, "total_steps": 204665, "loss": 0.0, "lr": 9.25275840017521e-08, "epoch": 4.378985170889013, "percentage": 87.58, "elapsed_time": "3:52:00", "remaining_time": "0:32:54", "throughput": 8674.77, "total_tokens": 120754344} +{"current_steps": 179250, "total_steps": 204665, "loss": 0.0, "lr": 9.249176118686686e-08, "epoch": 4.379107321720861, "percentage": 87.58, "elapsed_time": "3:52:00", "remaining_time": "0:32:53", "throughput": 8674.8, "total_tokens": 120757736} +{"current_steps": 179255, "total_steps": 204665, "loss": 0.0, "lr": 9.245594497162579e-08, "epoch": 4.379229472552708, "percentage": 87.58, "elapsed_time": "3:52:00", "remaining_time": "0:32:53", "throughput": 8674.82, "total_tokens": 120761064} +{"current_steps": 179260, "total_steps": 204665, "loss": 0.0, "lr": 9.242013535628901e-08, "epoch": 4.379351623384555, "percentage": 87.59, "elapsed_time": "3:52:01", "remaining_time": "0:32:52", "throughput": 8674.85, "total_tokens": 120764456} +{"current_steps": 179265, "total_steps": 204665, "loss": 0.0, "lr": 9.238433234111731e-08, "epoch": 4.379473774216402, "percentage": 87.59, "elapsed_time": "3:52:01", "remaining_time": "0:32:52", "throughput": 8674.86, "total_tokens": 120767656} +{"current_steps": 179270, "total_steps": 204665, "loss": 0.0, "lr": 9.234853592637071e-08, "epoch": 4.37959592504825, "percentage": 87.59, "elapsed_time": "3:52:01", "remaining_time": "0:32:52", "throughput": 8674.88, "total_tokens": 120770792} +{"current_steps": 179275, "total_steps": 204665, "loss": 0.084, "lr": 9.231274611230965e-08, "epoch": 4.379718075880096, "percentage": 87.59, "elapsed_time": "3:52:02", "remaining_time": "0:32:51", "throughput": 8674.89, "total_tokens": 120773992} +{"current_steps": 179280, "total_steps": 204665, "loss": 0.0, "lr": 9.227696289919462e-08, "epoch": 4.379840226711944, "percentage": 87.6, "elapsed_time": "3:52:02", "remaining_time": "0:32:51", "throughput": 8674.91, "total_tokens": 120777128} +{"current_steps": 179285, "total_steps": 204665, "loss": 0.0, "lr": 9.22411862872855e-08, "epoch": 4.379962377543791, "percentage": 87.6, "elapsed_time": "3:52:02", "remaining_time": "0:32:50", "throughput": 8674.92, "total_tokens": 120780264} +{"current_steps": 179290, "total_steps": 204665, "loss": 0.0, "lr": 9.220541627684286e-08, "epoch": 4.380084528375638, "percentage": 87.6, "elapsed_time": "3:52:03", "remaining_time": "0:32:50", "throughput": 8674.96, "total_tokens": 120783848} +{"current_steps": 179295, "total_steps": 204665, "loss": 0.0, "lr": 9.216965286812628e-08, "epoch": 4.380206679207485, "percentage": 87.6, "elapsed_time": "3:52:03", "remaining_time": "0:32:50", "throughput": 8674.98, "total_tokens": 120787304} +{"current_steps": 179300, "total_steps": 204665, "loss": 0.0182, "lr": 9.213389606139643e-08, "epoch": 4.380328830039333, "percentage": 87.61, "elapsed_time": "3:52:03", "remaining_time": "0:32:49", "throughput": 8675.03, "total_tokens": 120791016} +{"current_steps": 179305, "total_steps": 204665, "loss": 0.0001, "lr": 9.2098145856913e-08, "epoch": 4.38045098087118, "percentage": 87.61, "elapsed_time": "3:52:04", "remaining_time": "0:32:49", "throughput": 8675.05, "total_tokens": 120794344} +{"current_steps": 179310, "total_steps": 204665, "loss": 0.0, "lr": 9.206240225493611e-08, "epoch": 4.380573131703027, "percentage": 87.61, "elapsed_time": "3:52:04", "remaining_time": "0:32:48", "throughput": 8675.08, "total_tokens": 120797736} +{"current_steps": 179315, "total_steps": 204665, "loss": 0.0, "lr": 9.202666525572545e-08, "epoch": 4.380695282534874, "percentage": 87.61, "elapsed_time": "3:52:05", "remaining_time": "0:32:48", "throughput": 8675.09, "total_tokens": 120800936} +{"current_steps": 179320, "total_steps": 204665, "loss": 0.0, "lr": 9.199093485954123e-08, "epoch": 4.380817433366722, "percentage": 87.62, "elapsed_time": "3:52:05", "remaining_time": "0:32:48", "throughput": 8675.15, "total_tokens": 120804840} +{"current_steps": 179325, "total_steps": 204665, "loss": 0.0, "lr": 9.195521106664328e-08, "epoch": 4.380939584198568, "percentage": 87.62, "elapsed_time": "3:52:05", "remaining_time": "0:32:47", "throughput": 8675.17, "total_tokens": 120808168} +{"current_steps": 179330, "total_steps": 204665, "loss": 0.0, "lr": 9.191949387729103e-08, "epoch": 4.381061735030416, "percentage": 87.62, "elapsed_time": "3:52:06", "remaining_time": "0:32:47", "throughput": 8675.19, "total_tokens": 120811496} +{"current_steps": 179335, "total_steps": 204665, "loss": 0.0, "lr": 9.188378329174451e-08, "epoch": 4.381183885862263, "percentage": 87.62, "elapsed_time": "3:52:06", "remaining_time": "0:32:47", "throughput": 8675.21, "total_tokens": 120814760} +{"current_steps": 179340, "total_steps": 204665, "loss": 0.0, "lr": 9.184807931026351e-08, "epoch": 4.3813060366941095, "percentage": 87.63, "elapsed_time": "3:52:06", "remaining_time": "0:32:46", "throughput": 8675.26, "total_tokens": 120818472} +{"current_steps": 179345, "total_steps": 204665, "loss": 0.0, "lr": 9.181238193310736e-08, "epoch": 4.381428187525957, "percentage": 87.63, "elapsed_time": "3:52:07", "remaining_time": "0:32:46", "throughput": 8675.26, "total_tokens": 120821352} +{"current_steps": 179350, "total_steps": 204665, "loss": 0.0, "lr": 9.177669116053599e-08, "epoch": 4.381550338357804, "percentage": 87.63, "elapsed_time": "3:52:07", "remaining_time": "0:32:45", "throughput": 8675.27, "total_tokens": 120824488} +{"current_steps": 179355, "total_steps": 204665, "loss": 0.0, "lr": 9.174100699280862e-08, "epoch": 4.3816724891896515, "percentage": 87.63, "elapsed_time": "3:52:07", "remaining_time": "0:32:45", "throughput": 8675.29, "total_tokens": 120827752} +{"current_steps": 179360, "total_steps": 204665, "loss": 0.0302, "lr": 9.170532943018517e-08, "epoch": 4.381794640021498, "percentage": 87.64, "elapsed_time": "3:52:08", "remaining_time": "0:32:45", "throughput": 8675.35, "total_tokens": 120831656} +{"current_steps": 179365, "total_steps": 204665, "loss": 0.0, "lr": 9.166965847292474e-08, "epoch": 4.381916790853346, "percentage": 87.64, "elapsed_time": "3:52:08", "remaining_time": "0:32:44", "throughput": 8675.36, "total_tokens": 120834792} +{"current_steps": 179370, "total_steps": 204665, "loss": 0.0, "lr": 9.163399412128681e-08, "epoch": 4.382038941685193, "percentage": 87.64, "elapsed_time": "3:52:08", "remaining_time": "0:32:44", "throughput": 8675.45, "total_tokens": 120839272} +{"current_steps": 179375, "total_steps": 204665, "loss": 0.0, "lr": 9.159833637553094e-08, "epoch": 4.38216109251704, "percentage": 87.64, "elapsed_time": "3:52:09", "remaining_time": "0:32:43", "throughput": 8675.47, "total_tokens": 120842600} +{"current_steps": 179380, "total_steps": 204665, "loss": 0.0, "lr": 9.156268523591615e-08, "epoch": 4.382283243348887, "percentage": 87.65, "elapsed_time": "3:52:09", "remaining_time": "0:32:43", "throughput": 8675.49, "total_tokens": 120845864} +{"current_steps": 179385, "total_steps": 204665, "loss": 0.0, "lr": 9.152704070270201e-08, "epoch": 4.382405394180735, "percentage": 87.65, "elapsed_time": "3:52:09", "remaining_time": "0:32:43", "throughput": 8675.51, "total_tokens": 120849192} +{"current_steps": 179390, "total_steps": 204665, "loss": 0.0, "lr": 9.149140277614742e-08, "epoch": 4.382527545012581, "percentage": 87.65, "elapsed_time": "3:52:10", "remaining_time": "0:32:42", "throughput": 8675.57, "total_tokens": 120853096} +{"current_steps": 179395, "total_steps": 204665, "loss": 0.0, "lr": 9.145577145651173e-08, "epoch": 4.382649695844429, "percentage": 87.65, "elapsed_time": "3:52:10", "remaining_time": "0:32:42", "throughput": 8675.59, "total_tokens": 120856360} +{"current_steps": 179400, "total_steps": 204665, "loss": 0.0, "lr": 9.142014674405418e-08, "epoch": 4.382771846676276, "percentage": 87.66, "elapsed_time": "3:52:10", "remaining_time": "0:32:41", "throughput": 8675.63, "total_tokens": 120860072} +{"current_steps": 179405, "total_steps": 204665, "loss": 0.0, "lr": 9.138452863903357e-08, "epoch": 4.382893997508123, "percentage": 87.66, "elapsed_time": "3:52:11", "remaining_time": "0:32:41", "throughput": 8675.63, "total_tokens": 120863016} +{"current_steps": 179410, "total_steps": 204665, "loss": 0.0, "lr": 9.134891714170911e-08, "epoch": 4.38301614833997, "percentage": 87.66, "elapsed_time": "3:52:11", "remaining_time": "0:32:41", "throughput": 8675.65, "total_tokens": 120866280} +{"current_steps": 179415, "total_steps": 204665, "loss": 0.0, "lr": 9.131331225233985e-08, "epoch": 4.383138299171818, "percentage": 87.66, "elapsed_time": "3:52:12", "remaining_time": "0:32:40", "throughput": 8675.68, "total_tokens": 120869800} +{"current_steps": 179420, "total_steps": 204665, "loss": 0.0, "lr": 9.127771397118434e-08, "epoch": 4.3832604500036645, "percentage": 87.67, "elapsed_time": "3:52:12", "remaining_time": "0:32:40", "throughput": 8675.67, "total_tokens": 120872552} +{"current_steps": 179425, "total_steps": 204665, "loss": 0.0, "lr": 9.124212229850192e-08, "epoch": 4.383382600835512, "percentage": 87.67, "elapsed_time": "3:52:12", "remaining_time": "0:32:39", "throughput": 8675.71, "total_tokens": 120876200} +{"current_steps": 179430, "total_steps": 204665, "loss": 0.0, "lr": 9.120653723455108e-08, "epoch": 4.383504751667359, "percentage": 87.67, "elapsed_time": "3:52:13", "remaining_time": "0:32:39", "throughput": 8675.71, "total_tokens": 120879016} +{"current_steps": 179435, "total_steps": 204665, "loss": 0.0, "lr": 9.117095877959091e-08, "epoch": 4.383626902499206, "percentage": 87.67, "elapsed_time": "3:52:13", "remaining_time": "0:32:39", "throughput": 8675.73, "total_tokens": 120882408} +{"current_steps": 179440, "total_steps": 204665, "loss": 0.0, "lr": 9.113538693387967e-08, "epoch": 4.383749053331053, "percentage": 87.67, "elapsed_time": "3:52:13", "remaining_time": "0:32:38", "throughput": 8675.75, "total_tokens": 120885608} +{"current_steps": 179445, "total_steps": 204665, "loss": 0.0, "lr": 9.109982169767671e-08, "epoch": 4.3838712041629, "percentage": 87.68, "elapsed_time": "3:52:14", "remaining_time": "0:32:38", "throughput": 8675.76, "total_tokens": 120888680} +{"current_steps": 179450, "total_steps": 204665, "loss": 0.0, "lr": 9.106426307124004e-08, "epoch": 4.383993354994748, "percentage": 87.68, "elapsed_time": "3:52:14", "remaining_time": "0:32:37", "throughput": 8675.78, "total_tokens": 120891944} +{"current_steps": 179455, "total_steps": 204665, "loss": 0.0, "lr": 9.102871105482868e-08, "epoch": 4.384115505826594, "percentage": 87.68, "elapsed_time": "3:52:14", "remaining_time": "0:32:37", "throughput": 8675.8, "total_tokens": 120895336} +{"current_steps": 179460, "total_steps": 204665, "loss": 0.0, "lr": 9.099316564870086e-08, "epoch": 4.384237656658442, "percentage": 87.68, "elapsed_time": "3:52:15", "remaining_time": "0:32:37", "throughput": 8675.82, "total_tokens": 120898664} +{"current_steps": 179465, "total_steps": 204665, "loss": 0.058, "lr": 9.095762685311526e-08, "epoch": 4.384359807490289, "percentage": 87.69, "elapsed_time": "3:52:15", "remaining_time": "0:32:36", "throughput": 8675.83, "total_tokens": 120901800} +{"current_steps": 179470, "total_steps": 204665, "loss": 0.0, "lr": 9.092209466833046e-08, "epoch": 4.384481958322136, "percentage": 87.69, "elapsed_time": "3:52:15", "remaining_time": "0:32:36", "throughput": 8675.84, "total_tokens": 120904808} +{"current_steps": 179475, "total_steps": 204665, "loss": 0.0, "lr": 9.088656909460446e-08, "epoch": 4.384604109153983, "percentage": 87.69, "elapsed_time": "3:52:16", "remaining_time": "0:32:35", "throughput": 8675.87, "total_tokens": 120908264} +{"current_steps": 179480, "total_steps": 204665, "loss": 0.0, "lr": 9.085105013219607e-08, "epoch": 4.384726259985831, "percentage": 87.69, "elapsed_time": "3:52:16", "remaining_time": "0:32:35", "throughput": 8675.88, "total_tokens": 120911272} +{"current_steps": 179485, "total_steps": 204665, "loss": 0.0, "lr": 9.08155377813633e-08, "epoch": 4.3848484108176775, "percentage": 87.7, "elapsed_time": "3:52:16", "remaining_time": "0:32:35", "throughput": 8675.9, "total_tokens": 120914664} +{"current_steps": 179490, "total_steps": 204665, "loss": 0.0, "lr": 9.07800320423644e-08, "epoch": 4.384970561649525, "percentage": 87.7, "elapsed_time": "3:52:17", "remaining_time": "0:32:34", "throughput": 8675.93, "total_tokens": 120917992} +{"current_steps": 179495, "total_steps": 204665, "loss": 0.0, "lr": 9.074453291545781e-08, "epoch": 4.385092712481372, "percentage": 87.7, "elapsed_time": "3:52:17", "remaining_time": "0:32:34", "throughput": 8675.94, "total_tokens": 120921192} +{"current_steps": 179500, "total_steps": 204665, "loss": 0.0, "lr": 9.070904040090132e-08, "epoch": 4.3852148633132195, "percentage": 87.7, "elapsed_time": "3:52:17", "remaining_time": "0:32:34", "throughput": 8675.95, "total_tokens": 120924328} +{"current_steps": 179505, "total_steps": 204665, "loss": 0.0, "lr": 9.067355449895352e-08, "epoch": 4.385337014145066, "percentage": 87.71, "elapsed_time": "3:52:18", "remaining_time": "0:32:33", "throughput": 8676.0, "total_tokens": 120928104} +{"current_steps": 179510, "total_steps": 204665, "loss": 0.0, "lr": 9.063807520987199e-08, "epoch": 4.385459164976914, "percentage": 87.71, "elapsed_time": "3:52:18", "remaining_time": "0:32:33", "throughput": 8676.01, "total_tokens": 120931304} +{"current_steps": 179515, "total_steps": 204665, "loss": 0.0, "lr": 9.060260253391517e-08, "epoch": 4.385581315808761, "percentage": 87.71, "elapsed_time": "3:52:18", "remaining_time": "0:32:32", "throughput": 8676.03, "total_tokens": 120934568} +{"current_steps": 179520, "total_steps": 204665, "loss": 0.0, "lr": 9.056713647134084e-08, "epoch": 4.385703466640608, "percentage": 87.71, "elapsed_time": "3:52:19", "remaining_time": "0:32:32", "throughput": 8676.06, "total_tokens": 120937832} +{"current_steps": 179525, "total_steps": 204665, "loss": 0.0, "lr": 9.053167702240672e-08, "epoch": 4.385825617472455, "percentage": 87.72, "elapsed_time": "3:52:19", "remaining_time": "0:32:32", "throughput": 8676.06, "total_tokens": 120940904} +{"current_steps": 179530, "total_steps": 204665, "loss": 0.0, "lr": 9.049622418737102e-08, "epoch": 4.385947768304302, "percentage": 87.72, "elapsed_time": "3:52:19", "remaining_time": "0:32:31", "throughput": 8676.12, "total_tokens": 120944808} +{"current_steps": 179535, "total_steps": 204665, "loss": 0.0, "lr": 9.046077796649121e-08, "epoch": 4.386069919136149, "percentage": 87.72, "elapsed_time": "3:52:20", "remaining_time": "0:32:31", "throughput": 8676.14, "total_tokens": 120948200} +{"current_steps": 179540, "total_steps": 204665, "loss": 0.0, "lr": 9.042533836002541e-08, "epoch": 4.386192069967996, "percentage": 87.72, "elapsed_time": "3:52:20", "remaining_time": "0:32:30", "throughput": 8676.19, "total_tokens": 120951912} +{"current_steps": 179545, "total_steps": 204665, "loss": 0.0, "lr": 9.03899053682311e-08, "epoch": 4.386314220799844, "percentage": 87.73, "elapsed_time": "3:52:21", "remaining_time": "0:32:30", "throughput": 8676.22, "total_tokens": 120955496} +{"current_steps": 179550, "total_steps": 204665, "loss": 0.0, "lr": 9.035447899136617e-08, "epoch": 4.3864363716316905, "percentage": 87.73, "elapsed_time": "3:52:21", "remaining_time": "0:32:30", "throughput": 8676.28, "total_tokens": 120959336} +{"current_steps": 179555, "total_steps": 204665, "loss": 0.0, "lr": 9.031905922968797e-08, "epoch": 4.386558522463538, "percentage": 87.73, "elapsed_time": "3:52:21", "remaining_time": "0:32:29", "throughput": 8676.33, "total_tokens": 120963112} +{"current_steps": 179560, "total_steps": 204665, "loss": 0.0, "lr": 9.02836460834543e-08, "epoch": 4.386680673295385, "percentage": 87.73, "elapsed_time": "3:52:22", "remaining_time": "0:32:29", "throughput": 8676.35, "total_tokens": 120966376} +{"current_steps": 179565, "total_steps": 204665, "loss": 0.0, "lr": 9.024823955292271e-08, "epoch": 4.3868028241272325, "percentage": 87.74, "elapsed_time": "3:52:22", "remaining_time": "0:32:28", "throughput": 8676.38, "total_tokens": 120969896} +{"current_steps": 179570, "total_steps": 204665, "loss": 0.0, "lr": 9.021283963835058e-08, "epoch": 4.386924974959079, "percentage": 87.74, "elapsed_time": "3:52:22", "remaining_time": "0:32:28", "throughput": 8676.43, "total_tokens": 120973608} +{"current_steps": 179575, "total_steps": 204665, "loss": 0.0, "lr": 9.017744633999547e-08, "epoch": 4.387047125790927, "percentage": 87.74, "elapsed_time": "3:52:23", "remaining_time": "0:32:28", "throughput": 8676.45, "total_tokens": 120977000} +{"current_steps": 179580, "total_steps": 204665, "loss": 0.0, "lr": 9.01420596581145e-08, "epoch": 4.387169276622774, "percentage": 87.74, "elapsed_time": "3:52:23", "remaining_time": "0:32:27", "throughput": 8676.49, "total_tokens": 120980584} +{"current_steps": 179585, "total_steps": 204665, "loss": 0.0, "lr": 9.010667959296526e-08, "epoch": 4.387291427454621, "percentage": 87.75, "elapsed_time": "3:52:23", "remaining_time": "0:32:27", "throughput": 8676.53, "total_tokens": 120984168} +{"current_steps": 179590, "total_steps": 204665, "loss": 0.0, "lr": 9.007130614480507e-08, "epoch": 4.387413578286468, "percentage": 87.75, "elapsed_time": "3:52:24", "remaining_time": "0:32:26", "throughput": 8676.55, "total_tokens": 120987560} +{"current_steps": 179595, "total_steps": 204665, "loss": 0.0, "lr": 9.003593931389087e-08, "epoch": 4.387535729118316, "percentage": 87.75, "elapsed_time": "3:52:24", "remaining_time": "0:32:26", "throughput": 8676.57, "total_tokens": 120990696} +{"current_steps": 179600, "total_steps": 204665, "loss": 0.0, "lr": 9.000057910048042e-08, "epoch": 4.387657879950162, "percentage": 87.75, "elapsed_time": "3:52:24", "remaining_time": "0:32:26", "throughput": 8676.58, "total_tokens": 120993960} +{"current_steps": 179605, "total_steps": 204665, "loss": 0.0, "lr": 8.996522550483021e-08, "epoch": 4.387780030782009, "percentage": 87.76, "elapsed_time": "3:52:25", "remaining_time": "0:32:25", "throughput": 8676.61, "total_tokens": 120997224} +{"current_steps": 179610, "total_steps": 204665, "loss": 0.0, "lr": 8.99298785271978e-08, "epoch": 4.387902181613857, "percentage": 87.76, "elapsed_time": "3:52:25", "remaining_time": "0:32:25", "throughput": 8676.64, "total_tokens": 121000680} +{"current_steps": 179615, "total_steps": 204665, "loss": 0.0, "lr": 8.989453816783998e-08, "epoch": 4.388024332445704, "percentage": 87.76, "elapsed_time": "3:52:25", "remaining_time": "0:32:24", "throughput": 8676.64, "total_tokens": 121003752} +{"current_steps": 179620, "total_steps": 204665, "loss": 0.0, "lr": 8.985920442701411e-08, "epoch": 4.388146483277551, "percentage": 87.76, "elapsed_time": "3:52:26", "remaining_time": "0:32:24", "throughput": 8676.67, "total_tokens": 121007144} +{"current_steps": 179625, "total_steps": 204665, "loss": 0.0001, "lr": 8.982387730497665e-08, "epoch": 4.388268634109398, "percentage": 87.77, "elapsed_time": "3:52:26", "remaining_time": "0:32:24", "throughput": 8676.68, "total_tokens": 121010152} +{"current_steps": 179630, "total_steps": 204665, "loss": 0.0, "lr": 8.978855680198494e-08, "epoch": 4.388390784941246, "percentage": 87.77, "elapsed_time": "3:52:26", "remaining_time": "0:32:23", "throughput": 8676.69, "total_tokens": 121013288} +{"current_steps": 179635, "total_steps": 204665, "loss": 0.0, "lr": 8.975324291829567e-08, "epoch": 4.388512935773092, "percentage": 87.77, "elapsed_time": "3:52:27", "remaining_time": "0:32:23", "throughput": 8676.72, "total_tokens": 121016680} +{"current_steps": 179640, "total_steps": 204665, "loss": 0.0214, "lr": 8.971793565416541e-08, "epoch": 4.38863508660494, "percentage": 87.77, "elapsed_time": "3:52:27", "remaining_time": "0:32:22", "throughput": 8676.74, "total_tokens": 121020072} +{"current_steps": 179645, "total_steps": 204665, "loss": 0.0, "lr": 8.96826350098513e-08, "epoch": 4.388757237436787, "percentage": 87.78, "elapsed_time": "3:52:27", "remaining_time": "0:32:22", "throughput": 8676.76, "total_tokens": 121023336} +{"current_steps": 179650, "total_steps": 204665, "loss": 0.0, "lr": 8.964734098561e-08, "epoch": 4.388879388268634, "percentage": 87.78, "elapsed_time": "3:52:28", "remaining_time": "0:32:22", "throughput": 8676.78, "total_tokens": 121026664} +{"current_steps": 179655, "total_steps": 204665, "loss": 0.0001, "lr": 8.961205358169788e-08, "epoch": 4.389001539100481, "percentage": 87.78, "elapsed_time": "3:52:28", "remaining_time": "0:32:21", "throughput": 8676.79, "total_tokens": 121029672} +{"current_steps": 179660, "total_steps": 204665, "loss": 0.0325, "lr": 8.957677279837195e-08, "epoch": 4.389123689932329, "percentage": 87.78, "elapsed_time": "3:52:29", "remaining_time": "0:32:21", "throughput": 8676.8, "total_tokens": 121032744} +{"current_steps": 179665, "total_steps": 204665, "loss": 0.0001, "lr": 8.954149863588844e-08, "epoch": 4.3892458407641755, "percentage": 87.78, "elapsed_time": "3:52:29", "remaining_time": "0:32:21", "throughput": 8676.82, "total_tokens": 121036136} +{"current_steps": 179670, "total_steps": 204665, "loss": 0.0, "lr": 8.950623109450428e-08, "epoch": 4.389367991596023, "percentage": 87.79, "elapsed_time": "3:52:29", "remaining_time": "0:32:20", "throughput": 8676.86, "total_tokens": 121039592} +{"current_steps": 179675, "total_steps": 204665, "loss": 0.0002, "lr": 8.947097017447546e-08, "epoch": 4.38949014242787, "percentage": 87.79, "elapsed_time": "3:52:30", "remaining_time": "0:32:20", "throughput": 8676.88, "total_tokens": 121042920} +{"current_steps": 179680, "total_steps": 204665, "loss": 0.0, "lr": 8.94357158760587e-08, "epoch": 4.3896122932597175, "percentage": 87.79, "elapsed_time": "3:52:30", "remaining_time": "0:32:19", "throughput": 8676.92, "total_tokens": 121046568} +{"current_steps": 179685, "total_steps": 204665, "loss": 0.0, "lr": 8.940046819951052e-08, "epoch": 4.389734444091564, "percentage": 87.79, "elapsed_time": "3:52:30", "remaining_time": "0:32:19", "throughput": 8676.95, "total_tokens": 121050024} +{"current_steps": 179690, "total_steps": 204665, "loss": 0.0, "lr": 8.936522714508676e-08, "epoch": 4.389856594923412, "percentage": 87.8, "elapsed_time": "3:52:31", "remaining_time": "0:32:19", "throughput": 8676.96, "total_tokens": 121053096} +{"current_steps": 179695, "total_steps": 204665, "loss": 0.0, "lr": 8.932999271304432e-08, "epoch": 4.389978745755259, "percentage": 87.8, "elapsed_time": "3:52:31", "remaining_time": "0:32:18", "throughput": 8676.98, "total_tokens": 121056360} +{"current_steps": 179700, "total_steps": 204665, "loss": 0.0, "lr": 8.929476490363885e-08, "epoch": 4.390100896587105, "percentage": 87.8, "elapsed_time": "3:52:31", "remaining_time": "0:32:18", "throughput": 8677.0, "total_tokens": 121059688} +{"current_steps": 179705, "total_steps": 204665, "loss": 0.0, "lr": 8.925954371712696e-08, "epoch": 4.390223047418953, "percentage": 87.8, "elapsed_time": "3:52:32", "remaining_time": "0:32:17", "throughput": 8677.06, "total_tokens": 121063720} +{"current_steps": 179710, "total_steps": 204665, "loss": 0.0, "lr": 8.922432915376443e-08, "epoch": 4.3903451982508, "percentage": 87.81, "elapsed_time": "3:52:32", "remaining_time": "0:32:17", "throughput": 8677.08, "total_tokens": 121067048} +{"current_steps": 179715, "total_steps": 204665, "loss": 0.0, "lr": 8.918912121380761e-08, "epoch": 4.390467349082647, "percentage": 87.81, "elapsed_time": "3:52:32", "remaining_time": "0:32:17", "throughput": 8677.12, "total_tokens": 121070568} +{"current_steps": 179720, "total_steps": 204665, "loss": 0.0, "lr": 8.915391989751265e-08, "epoch": 4.390589499914494, "percentage": 87.81, "elapsed_time": "3:52:33", "remaining_time": "0:32:16", "throughput": 8677.14, "total_tokens": 121073832} +{"current_steps": 179725, "total_steps": 204665, "loss": 0.0002, "lr": 8.911872520513542e-08, "epoch": 4.390711650746342, "percentage": 87.81, "elapsed_time": "3:52:33", "remaining_time": "0:32:16", "throughput": 8677.15, "total_tokens": 121076968} +{"current_steps": 179730, "total_steps": 204665, "loss": 0.0014, "lr": 8.908353713693162e-08, "epoch": 4.3908338015781885, "percentage": 87.82, "elapsed_time": "3:52:33", "remaining_time": "0:32:15", "throughput": 8677.18, "total_tokens": 121080552} +{"current_steps": 179735, "total_steps": 204665, "loss": 0.0, "lr": 8.90483556931575e-08, "epoch": 4.390955952410036, "percentage": 87.82, "elapsed_time": "3:52:34", "remaining_time": "0:32:15", "throughput": 8677.23, "total_tokens": 121084392} +{"current_steps": 179740, "total_steps": 204665, "loss": 0.0, "lr": 8.90131808740685e-08, "epoch": 4.391078103241883, "percentage": 87.82, "elapsed_time": "3:52:34", "remaining_time": "0:32:15", "throughput": 8677.28, "total_tokens": 121088040} +{"current_steps": 179745, "total_steps": 204665, "loss": 0.0, "lr": 8.8978012679921e-08, "epoch": 4.3912002540737305, "percentage": 87.82, "elapsed_time": "3:52:34", "remaining_time": "0:32:14", "throughput": 8677.33, "total_tokens": 121092008} +{"current_steps": 179750, "total_steps": 204665, "loss": 0.0318, "lr": 8.894285111097011e-08, "epoch": 4.391322404905577, "percentage": 87.83, "elapsed_time": "3:52:35", "remaining_time": "0:32:14", "throughput": 8677.36, "total_tokens": 121095400} +{"current_steps": 179755, "total_steps": 204665, "loss": 0.0, "lr": 8.890769616747207e-08, "epoch": 4.391444555737425, "percentage": 87.83, "elapsed_time": "3:52:35", "remaining_time": "0:32:13", "throughput": 8677.36, "total_tokens": 121098344} +{"current_steps": 179760, "total_steps": 204665, "loss": 0.0, "lr": 8.887254784968223e-08, "epoch": 4.391566706569272, "percentage": 87.83, "elapsed_time": "3:52:36", "remaining_time": "0:32:13", "throughput": 8677.38, "total_tokens": 121101736} +{"current_steps": 179765, "total_steps": 204665, "loss": 0.0, "lr": 8.88374061578564e-08, "epoch": 4.391688857401119, "percentage": 87.83, "elapsed_time": "3:52:36", "remaining_time": "0:32:13", "throughput": 8677.4, "total_tokens": 121104936} +{"current_steps": 179770, "total_steps": 204665, "loss": 0.0, "lr": 8.880227109224981e-08, "epoch": 4.391811008232966, "percentage": 87.84, "elapsed_time": "3:52:36", "remaining_time": "0:32:12", "throughput": 8677.45, "total_tokens": 121108648} +{"current_steps": 179775, "total_steps": 204665, "loss": 0.0, "lr": 8.876714265311824e-08, "epoch": 4.391933159064814, "percentage": 87.84, "elapsed_time": "3:52:37", "remaining_time": "0:32:12", "throughput": 8677.45, "total_tokens": 121111720} +{"current_steps": 179780, "total_steps": 204665, "loss": 0.0, "lr": 8.873202084071717e-08, "epoch": 4.39205530989666, "percentage": 87.84, "elapsed_time": "3:52:37", "remaining_time": "0:32:11", "throughput": 8677.49, "total_tokens": 121115368} +{"current_steps": 179785, "total_steps": 204665, "loss": 0.0, "lr": 8.869690565530185e-08, "epoch": 4.392177460728508, "percentage": 87.84, "elapsed_time": "3:52:37", "remaining_time": "0:32:11", "throughput": 8677.51, "total_tokens": 121118632} +{"current_steps": 179790, "total_steps": 204665, "loss": 0.0, "lr": 8.866179709712795e-08, "epoch": 4.392299611560355, "percentage": 87.85, "elapsed_time": "3:52:38", "remaining_time": "0:32:11", "throughput": 8677.51, "total_tokens": 121121512} +{"current_steps": 179795, "total_steps": 204665, "loss": 0.0288, "lr": 8.862669516645039e-08, "epoch": 4.3924217623922015, "percentage": 87.85, "elapsed_time": "3:52:38", "remaining_time": "0:32:10", "throughput": 8677.58, "total_tokens": 121125672} +{"current_steps": 179800, "total_steps": 204665, "loss": 0.0, "lr": 8.85915998635246e-08, "epoch": 4.392543913224049, "percentage": 87.85, "elapsed_time": "3:52:38", "remaining_time": "0:32:10", "throughput": 8677.58, "total_tokens": 121128680} +{"current_steps": 179805, "total_steps": 204665, "loss": 0.0299, "lr": 8.855651118860608e-08, "epoch": 4.392666064055896, "percentage": 87.85, "elapsed_time": "3:52:39", "remaining_time": "0:32:10", "throughput": 8677.62, "total_tokens": 121132264} +{"current_steps": 179810, "total_steps": 204665, "loss": 0.0, "lr": 8.85214291419496e-08, "epoch": 4.3927882148877435, "percentage": 87.86, "elapsed_time": "3:52:39", "remaining_time": "0:32:09", "throughput": 8677.63, "total_tokens": 121135528} +{"current_steps": 179815, "total_steps": 204665, "loss": 0.0, "lr": 8.848635372381052e-08, "epoch": 4.39291036571959, "percentage": 87.86, "elapsed_time": "3:52:39", "remaining_time": "0:32:09", "throughput": 8677.68, "total_tokens": 121139304} +{"current_steps": 179820, "total_steps": 204665, "loss": 0.0, "lr": 8.845128493444365e-08, "epoch": 4.393032516551438, "percentage": 87.86, "elapsed_time": "3:52:40", "remaining_time": "0:32:08", "throughput": 8677.7, "total_tokens": 121142568} +{"current_steps": 179825, "total_steps": 204665, "loss": 0.0, "lr": 8.841622277410455e-08, "epoch": 4.393154667383285, "percentage": 87.86, "elapsed_time": "3:52:40", "remaining_time": "0:32:08", "throughput": 8677.74, "total_tokens": 121146216} +{"current_steps": 179830, "total_steps": 204665, "loss": 0.0, "lr": 8.838116724304767e-08, "epoch": 4.393276818215132, "percentage": 87.87, "elapsed_time": "3:52:40", "remaining_time": "0:32:08", "throughput": 8677.75, "total_tokens": 121149480} +{"current_steps": 179835, "total_steps": 204665, "loss": 0.0, "lr": 8.834611834152806e-08, "epoch": 4.393398969046979, "percentage": 87.87, "elapsed_time": "3:52:41", "remaining_time": "0:32:07", "throughput": 8677.77, "total_tokens": 121152616} +{"current_steps": 179840, "total_steps": 204665, "loss": 0.0, "lr": 8.831107606980093e-08, "epoch": 4.393521119878827, "percentage": 87.87, "elapsed_time": "3:52:41", "remaining_time": "0:32:07", "throughput": 8677.81, "total_tokens": 121156392} +{"current_steps": 179845, "total_steps": 204665, "loss": 0.0, "lr": 8.827604042812054e-08, "epoch": 4.393643270710673, "percentage": 87.87, "elapsed_time": "3:52:41", "remaining_time": "0:32:06", "throughput": 8677.83, "total_tokens": 121159656} +{"current_steps": 179850, "total_steps": 204665, "loss": 0.0, "lr": 8.824101141674234e-08, "epoch": 4.393765421542521, "percentage": 87.88, "elapsed_time": "3:52:42", "remaining_time": "0:32:06", "throughput": 8677.84, "total_tokens": 121162728} +{"current_steps": 179855, "total_steps": 204665, "loss": 0.0, "lr": 8.820598903592047e-08, "epoch": 4.393887572374368, "percentage": 87.88, "elapsed_time": "3:52:42", "remaining_time": "0:32:06", "throughput": 8677.87, "total_tokens": 121166248} +{"current_steps": 179860, "total_steps": 204665, "loss": 0.0, "lr": 8.817097328591005e-08, "epoch": 4.394009723206215, "percentage": 87.88, "elapsed_time": "3:52:43", "remaining_time": "0:32:05", "throughput": 8677.88, "total_tokens": 121169256} +{"current_steps": 179865, "total_steps": 204665, "loss": 0.0001, "lr": 8.813596416696544e-08, "epoch": 4.394131874038062, "percentage": 87.88, "elapsed_time": "3:52:43", "remaining_time": "0:32:05", "throughput": 8677.9, "total_tokens": 121172584} +{"current_steps": 179870, "total_steps": 204665, "loss": 0.0, "lr": 8.810096167934133e-08, "epoch": 4.394254024869909, "percentage": 87.89, "elapsed_time": "3:52:43", "remaining_time": "0:32:04", "throughput": 8677.92, "total_tokens": 121175848} +{"current_steps": 179875, "total_steps": 204665, "loss": 0.0, "lr": 8.806596582329251e-08, "epoch": 4.3943761757017565, "percentage": 87.89, "elapsed_time": "3:52:44", "remaining_time": "0:32:04", "throughput": 8677.92, "total_tokens": 121178856} +{"current_steps": 179880, "total_steps": 204665, "loss": 0.0, "lr": 8.8030976599073e-08, "epoch": 4.394498326533603, "percentage": 87.89, "elapsed_time": "3:52:44", "remaining_time": "0:32:04", "throughput": 8677.94, "total_tokens": 121182056} +{"current_steps": 179885, "total_steps": 204665, "loss": 0.0, "lr": 8.79959940069378e-08, "epoch": 4.394620477365451, "percentage": 87.89, "elapsed_time": "3:52:44", "remaining_time": "0:32:03", "throughput": 8677.99, "total_tokens": 121185832} +{"current_steps": 179890, "total_steps": 204665, "loss": 0.0, "lr": 8.796101804714084e-08, "epoch": 4.394742628197298, "percentage": 87.89, "elapsed_time": "3:52:45", "remaining_time": "0:32:03", "throughput": 8678.02, "total_tokens": 121189288} +{"current_steps": 179895, "total_steps": 204665, "loss": 0.0, "lr": 8.792604871993658e-08, "epoch": 4.394864779029145, "percentage": 87.9, "elapsed_time": "3:52:45", "remaining_time": "0:32:02", "throughput": 8678.04, "total_tokens": 121192680} +{"current_steps": 179900, "total_steps": 204665, "loss": 0.0003, "lr": 8.78910860255797e-08, "epoch": 4.394986929860992, "percentage": 87.9, "elapsed_time": "3:52:45", "remaining_time": "0:32:02", "throughput": 8678.08, "total_tokens": 121196200} +{"current_steps": 179905, "total_steps": 204665, "loss": 0.0, "lr": 8.78561299643239e-08, "epoch": 4.39510908069284, "percentage": 87.9, "elapsed_time": "3:52:46", "remaining_time": "0:32:02", "throughput": 8678.09, "total_tokens": 121199336} +{"current_steps": 179910, "total_steps": 204665, "loss": 0.0, "lr": 8.782118053642384e-08, "epoch": 4.395231231524686, "percentage": 87.9, "elapsed_time": "3:52:46", "remaining_time": "0:32:01", "throughput": 8678.1, "total_tokens": 121202472} +{"current_steps": 179915, "total_steps": 204665, "loss": 0.0, "lr": 8.778623774213345e-08, "epoch": 4.395353382356534, "percentage": 87.91, "elapsed_time": "3:52:46", "remaining_time": "0:32:01", "throughput": 8678.12, "total_tokens": 121205864} +{"current_steps": 179920, "total_steps": 204665, "loss": 0.0, "lr": 8.775130158170697e-08, "epoch": 4.395475533188381, "percentage": 87.91, "elapsed_time": "3:52:47", "remaining_time": "0:32:00", "throughput": 8678.13, "total_tokens": 121208936} +{"current_steps": 179925, "total_steps": 204665, "loss": 0.0, "lr": 8.771637205539817e-08, "epoch": 4.395597684020228, "percentage": 87.91, "elapsed_time": "3:52:47", "remaining_time": "0:32:00", "throughput": 8678.14, "total_tokens": 121212136} +{"current_steps": 179930, "total_steps": 204665, "loss": 0.0, "lr": 8.768144916346155e-08, "epoch": 4.395719834852075, "percentage": 87.91, "elapsed_time": "3:52:47", "remaining_time": "0:32:00", "throughput": 8678.19, "total_tokens": 121215784} +{"current_steps": 179935, "total_steps": 204665, "loss": 0.0, "lr": 8.764653290615065e-08, "epoch": 4.395841985683923, "percentage": 87.92, "elapsed_time": "3:52:48", "remaining_time": "0:31:59", "throughput": 8678.2, "total_tokens": 121218920} +{"current_steps": 179940, "total_steps": 204665, "loss": 0.0, "lr": 8.761162328371963e-08, "epoch": 4.39596413651577, "percentage": 87.92, "elapsed_time": "3:52:48", "remaining_time": "0:31:59", "throughput": 8678.25, "total_tokens": 121222696} +{"current_steps": 179945, "total_steps": 204665, "loss": 0.0, "lr": 8.757672029642238e-08, "epoch": 4.396086287347617, "percentage": 87.92, "elapsed_time": "3:52:48", "remaining_time": "0:31:58", "throughput": 8678.24, "total_tokens": 121225576} +{"current_steps": 179950, "total_steps": 204665, "loss": 0.0, "lr": 8.754182394451249e-08, "epoch": 4.396208438179464, "percentage": 87.92, "elapsed_time": "3:52:49", "remaining_time": "0:31:58", "throughput": 8678.29, "total_tokens": 121229288} +{"current_steps": 179955, "total_steps": 204665, "loss": 0.0336, "lr": 8.750693422824374e-08, "epoch": 4.396330589011312, "percentage": 87.93, "elapsed_time": "3:52:49", "remaining_time": "0:31:58", "throughput": 8678.3, "total_tokens": 121232488} +{"current_steps": 179960, "total_steps": 204665, "loss": 0.0, "lr": 8.747205114787026e-08, "epoch": 4.396452739843158, "percentage": 87.93, "elapsed_time": "3:52:49", "remaining_time": "0:31:57", "throughput": 8678.35, "total_tokens": 121236200} +{"current_steps": 179965, "total_steps": 204665, "loss": 0.0, "lr": 8.743717470364532e-08, "epoch": 4.396574890675005, "percentage": 87.93, "elapsed_time": "3:52:50", "remaining_time": "0:31:57", "throughput": 8678.37, "total_tokens": 121239592} +{"current_steps": 179970, "total_steps": 204665, "loss": 0.0, "lr": 8.740230489582278e-08, "epoch": 4.396697041506853, "percentage": 87.93, "elapsed_time": "3:52:50", "remaining_time": "0:31:57", "throughput": 8678.41, "total_tokens": 121243240} +{"current_steps": 179975, "total_steps": 204665, "loss": 0.0, "lr": 8.736744172465604e-08, "epoch": 4.3968191923386994, "percentage": 87.94, "elapsed_time": "3:52:51", "remaining_time": "0:31:56", "throughput": 8678.45, "total_tokens": 121246760} +{"current_steps": 179980, "total_steps": 204665, "loss": 0.0, "lr": 8.733258519039888e-08, "epoch": 4.396941343170547, "percentage": 87.94, "elapsed_time": "3:52:51", "remaining_time": "0:31:56", "throughput": 8678.48, "total_tokens": 121250280} +{"current_steps": 179985, "total_steps": 204665, "loss": 0.0, "lr": 8.729773529330442e-08, "epoch": 4.397063494002394, "percentage": 87.94, "elapsed_time": "3:52:51", "remaining_time": "0:31:55", "throughput": 8678.47, "total_tokens": 121252968} +{"current_steps": 179990, "total_steps": 204665, "loss": 0.0, "lr": 8.726289203362636e-08, "epoch": 4.3971856448342415, "percentage": 87.94, "elapsed_time": "3:52:52", "remaining_time": "0:31:55", "throughput": 8678.47, "total_tokens": 121255912} +{"current_steps": 179995, "total_steps": 204665, "loss": 0.0, "lr": 8.722805541161826e-08, "epoch": 4.397307795666088, "percentage": 87.95, "elapsed_time": "3:52:52", "remaining_time": "0:31:55", "throughput": 8678.49, "total_tokens": 121259176} +{"current_steps": 180000, "total_steps": 204665, "loss": 0.0, "lr": 8.719322542753305e-08, "epoch": 4.397429946497936, "percentage": 87.95, "elapsed_time": "3:52:52", "remaining_time": "0:31:54", "throughput": 8678.49, "total_tokens": 121262248} +{"current_steps": 180005, "total_steps": 204665, "loss": 0.0, "lr": 8.715840208162439e-08, "epoch": 4.397552097329783, "percentage": 87.95, "elapsed_time": "3:52:53", "remaining_time": "0:31:54", "throughput": 8678.52, "total_tokens": 121265576} +{"current_steps": 180010, "total_steps": 204665, "loss": 0.0, "lr": 8.71235853741451e-08, "epoch": 4.39767424816163, "percentage": 87.95, "elapsed_time": "3:52:53", "remaining_time": "0:31:53", "throughput": 8678.55, "total_tokens": 121269096} +{"current_steps": 180015, "total_steps": 204665, "loss": 0.0, "lr": 8.708877530534897e-08, "epoch": 4.397796398993477, "percentage": 87.96, "elapsed_time": "3:52:53", "remaining_time": "0:31:53", "throughput": 8678.57, "total_tokens": 121272360} +{"current_steps": 180020, "total_steps": 204665, "loss": 0.0, "lr": 8.705397187548846e-08, "epoch": 4.397918549825325, "percentage": 87.96, "elapsed_time": "3:52:54", "remaining_time": "0:31:53", "throughput": 8678.58, "total_tokens": 121275496} +{"current_steps": 180025, "total_steps": 204665, "loss": 0.0, "lr": 8.701917508481715e-08, "epoch": 4.398040700657171, "percentage": 87.96, "elapsed_time": "3:52:54", "remaining_time": "0:31:52", "throughput": 8678.63, "total_tokens": 121279272} +{"current_steps": 180030, "total_steps": 204665, "loss": 0.0, "lr": 8.698438493358806e-08, "epoch": 4.398162851489019, "percentage": 87.96, "elapsed_time": "3:52:54", "remaining_time": "0:31:52", "throughput": 8678.64, "total_tokens": 121282472} +{"current_steps": 180035, "total_steps": 204665, "loss": 0.0, "lr": 8.69496014220541e-08, "epoch": 4.398285002320866, "percentage": 87.97, "elapsed_time": "3:52:55", "remaining_time": "0:31:51", "throughput": 8678.68, "total_tokens": 121286056} +{"current_steps": 180040, "total_steps": 204665, "loss": 0.0, "lr": 8.691482455046806e-08, "epoch": 4.398407153152713, "percentage": 87.97, "elapsed_time": "3:52:55", "remaining_time": "0:31:51", "throughput": 8678.71, "total_tokens": 121289448} +{"current_steps": 180045, "total_steps": 204665, "loss": 0.0, "lr": 8.688005431908318e-08, "epoch": 4.39852930398456, "percentage": 87.97, "elapsed_time": "3:52:55", "remaining_time": "0:31:51", "throughput": 8678.73, "total_tokens": 121292840} +{"current_steps": 180050, "total_steps": 204665, "loss": 0.0, "lr": 8.684529072815206e-08, "epoch": 4.398651454816408, "percentage": 87.97, "elapsed_time": "3:52:56", "remaining_time": "0:31:50", "throughput": 8678.77, "total_tokens": 121296360} +{"current_steps": 180055, "total_steps": 204665, "loss": 0.0, "lr": 8.681053377792768e-08, "epoch": 4.3987736056482545, "percentage": 87.98, "elapsed_time": "3:52:56", "remaining_time": "0:31:50", "throughput": 8678.8, "total_tokens": 121299752} +{"current_steps": 180060, "total_steps": 204665, "loss": 0.0, "lr": 8.677578346866254e-08, "epoch": 4.398895756480101, "percentage": 87.98, "elapsed_time": "3:52:56", "remaining_time": "0:31:49", "throughput": 8678.82, "total_tokens": 121303144} +{"current_steps": 180065, "total_steps": 204665, "loss": 0.0, "lr": 8.674103980060964e-08, "epoch": 4.399017907311949, "percentage": 87.98, "elapsed_time": "3:52:57", "remaining_time": "0:31:49", "throughput": 8678.86, "total_tokens": 121306856} +{"current_steps": 180070, "total_steps": 204665, "loss": 0.0, "lr": 8.670630277402147e-08, "epoch": 4.399140058143796, "percentage": 87.98, "elapsed_time": "3:52:57", "remaining_time": "0:31:49", "throughput": 8678.89, "total_tokens": 121310248} +{"current_steps": 180075, "total_steps": 204665, "loss": 0.0, "lr": 8.667157238915079e-08, "epoch": 4.399262208975643, "percentage": 87.99, "elapsed_time": "3:52:57", "remaining_time": "0:31:48", "throughput": 8678.92, "total_tokens": 121313768} +{"current_steps": 180080, "total_steps": 204665, "loss": 0.0, "lr": 8.663684864624998e-08, "epoch": 4.39938435980749, "percentage": 87.99, "elapsed_time": "3:52:58", "remaining_time": "0:31:48", "throughput": 8678.98, "total_tokens": 121317736} +{"current_steps": 180085, "total_steps": 204665, "loss": 0.0, "lr": 8.660213154557172e-08, "epoch": 4.399506510639338, "percentage": 87.99, "elapsed_time": "3:52:58", "remaining_time": "0:31:47", "throughput": 8678.98, "total_tokens": 121320616} +{"current_steps": 180090, "total_steps": 204665, "loss": 0.0, "lr": 8.656742108736859e-08, "epoch": 4.399628661471184, "percentage": 87.99, "elapsed_time": "3:52:59", "remaining_time": "0:31:47", "throughput": 8679.02, "total_tokens": 121324328} +{"current_steps": 180095, "total_steps": 204665, "loss": 0.0, "lr": 8.653271727189259e-08, "epoch": 4.399750812303032, "percentage": 88.0, "elapsed_time": "3:52:59", "remaining_time": "0:31:47", "throughput": 8679.08, "total_tokens": 121328296} +{"current_steps": 180100, "total_steps": 204665, "loss": 0.0, "lr": 8.649802009939666e-08, "epoch": 4.399872963134879, "percentage": 88.0, "elapsed_time": "3:52:59", "remaining_time": "0:31:46", "throughput": 8679.11, "total_tokens": 121331752} +{"current_steps": 180105, "total_steps": 204665, "loss": 0.0, "lr": 8.646332957013258e-08, "epoch": 4.399995113966726, "percentage": 88.0, "elapsed_time": "3:53:00", "remaining_time": "0:31:46", "throughput": 8679.13, "total_tokens": 121334952} +{"current_steps": 180110, "total_steps": 204665, "loss": 0.0, "lr": 8.642864568435281e-08, "epoch": 4.400117264798573, "percentage": 88.0, "elapsed_time": "3:53:00", "remaining_time": "0:31:46", "throughput": 8679.18, "total_tokens": 121338792} +{"current_steps": 180115, "total_steps": 204665, "loss": 0.09, "lr": 8.639396844230995e-08, "epoch": 4.400239415630421, "percentage": 88.0, "elapsed_time": "3:53:00", "remaining_time": "0:31:45", "throughput": 8679.2, "total_tokens": 121342120} +{"current_steps": 180120, "total_steps": 204665, "loss": 0.105, "lr": 8.635929784425566e-08, "epoch": 4.4003615664622675, "percentage": 88.01, "elapsed_time": "3:53:01", "remaining_time": "0:31:45", "throughput": 8679.23, "total_tokens": 121345576} +{"current_steps": 180125, "total_steps": 204665, "loss": 0.0, "lr": 8.632463389044253e-08, "epoch": 4.400483717294115, "percentage": 88.01, "elapsed_time": "3:53:01", "remaining_time": "0:31:44", "throughput": 8679.28, "total_tokens": 121349288} +{"current_steps": 180130, "total_steps": 204665, "loss": 0.0, "lr": 8.628997658112214e-08, "epoch": 4.400605868125962, "percentage": 88.01, "elapsed_time": "3:53:01", "remaining_time": "0:31:44", "throughput": 8679.35, "total_tokens": 121353448} +{"current_steps": 180135, "total_steps": 204665, "loss": 0.0, "lr": 8.625532591654705e-08, "epoch": 4.4007280189578095, "percentage": 88.01, "elapsed_time": "3:53:02", "remaining_time": "0:31:44", "throughput": 8679.36, "total_tokens": 121356648} +{"current_steps": 180140, "total_steps": 204665, "loss": 0.0, "lr": 8.622068189696896e-08, "epoch": 4.400850169789656, "percentage": 88.02, "elapsed_time": "3:53:02", "remaining_time": "0:31:43", "throughput": 8679.38, "total_tokens": 121359912} +{"current_steps": 180145, "total_steps": 204665, "loss": 0.0, "lr": 8.618604452263967e-08, "epoch": 4.400972320621504, "percentage": 88.02, "elapsed_time": "3:53:02", "remaining_time": "0:31:43", "throughput": 8679.4, "total_tokens": 121363176} +{"current_steps": 180150, "total_steps": 204665, "loss": 0.0, "lr": 8.615141379381141e-08, "epoch": 4.401094471453351, "percentage": 88.02, "elapsed_time": "3:53:03", "remaining_time": "0:31:42", "throughput": 8679.42, "total_tokens": 121366504} +{"current_steps": 180155, "total_steps": 204665, "loss": 0.0, "lr": 8.611678971073577e-08, "epoch": 4.401216622285197, "percentage": 88.02, "elapsed_time": "3:53:03", "remaining_time": "0:31:42", "throughput": 8679.46, "total_tokens": 121370088} +{"current_steps": 180160, "total_steps": 204665, "loss": 0.0, "lr": 8.608217227366465e-08, "epoch": 4.401338773117045, "percentage": 88.03, "elapsed_time": "3:53:03", "remaining_time": "0:31:42", "throughput": 8679.5, "total_tokens": 121373736} +{"current_steps": 180165, "total_steps": 204665, "loss": 0.0, "lr": 8.604756148284975e-08, "epoch": 4.401460923948892, "percentage": 88.03, "elapsed_time": "3:53:04", "remaining_time": "0:31:41", "throughput": 8679.51, "total_tokens": 121376936} +{"current_steps": 180170, "total_steps": 204665, "loss": 0.0, "lr": 8.601295733854297e-08, "epoch": 4.401583074780739, "percentage": 88.03, "elapsed_time": "3:53:04", "remaining_time": "0:31:41", "throughput": 8679.54, "total_tokens": 121380264} +{"current_steps": 180175, "total_steps": 204665, "loss": 0.0001, "lr": 8.597835984099566e-08, "epoch": 4.401705225612586, "percentage": 88.03, "elapsed_time": "3:53:05", "remaining_time": "0:31:40", "throughput": 8679.57, "total_tokens": 121383848} +{"current_steps": 180180, "total_steps": 204665, "loss": 0.0, "lr": 8.59437689904594e-08, "epoch": 4.401827376444434, "percentage": 88.04, "elapsed_time": "3:53:05", "remaining_time": "0:31:40", "throughput": 8679.59, "total_tokens": 121387112} +{"current_steps": 180185, "total_steps": 204665, "loss": 0.0, "lr": 8.590918478718623e-08, "epoch": 4.4019495272762805, "percentage": 88.04, "elapsed_time": "3:53:05", "remaining_time": "0:31:40", "throughput": 8679.63, "total_tokens": 121390760} +{"current_steps": 180190, "total_steps": 204665, "loss": 0.0, "lr": 8.587460723142715e-08, "epoch": 4.402071678108128, "percentage": 88.04, "elapsed_time": "3:53:06", "remaining_time": "0:31:39", "throughput": 8679.66, "total_tokens": 121394088} +{"current_steps": 180195, "total_steps": 204665, "loss": 0.0, "lr": 8.584003632343384e-08, "epoch": 4.402193828939975, "percentage": 88.04, "elapsed_time": "3:53:06", "remaining_time": "0:31:39", "throughput": 8679.69, "total_tokens": 121397608} +{"current_steps": 180200, "total_steps": 204665, "loss": 0.0, "lr": 8.580547206345767e-08, "epoch": 4.4023159797718225, "percentage": 88.05, "elapsed_time": "3:53:06", "remaining_time": "0:31:38", "throughput": 8679.77, "total_tokens": 121401960} +{"current_steps": 180205, "total_steps": 204665, "loss": 0.0, "lr": 8.577091445174988e-08, "epoch": 4.402438130603669, "percentage": 88.05, "elapsed_time": "3:53:07", "remaining_time": "0:31:38", "throughput": 8679.81, "total_tokens": 121405544} +{"current_steps": 180210, "total_steps": 204665, "loss": 0.0, "lr": 8.573636348856205e-08, "epoch": 4.402560281435517, "percentage": 88.05, "elapsed_time": "3:53:07", "remaining_time": "0:31:38", "throughput": 8679.84, "total_tokens": 121409128} +{"current_steps": 180215, "total_steps": 204665, "loss": 0.0, "lr": 8.570181917414521e-08, "epoch": 4.402682432267364, "percentage": 88.05, "elapsed_time": "3:53:07", "remaining_time": "0:31:37", "throughput": 8679.84, "total_tokens": 121412008} +{"current_steps": 180220, "total_steps": 204665, "loss": 0.0, "lr": 8.56672815087508e-08, "epoch": 4.402804583099211, "percentage": 88.06, "elapsed_time": "3:53:08", "remaining_time": "0:31:37", "throughput": 8679.88, "total_tokens": 121415656} +{"current_steps": 180225, "total_steps": 204665, "loss": 0.0, "lr": 8.563275049262964e-08, "epoch": 4.402926733931058, "percentage": 88.06, "elapsed_time": "3:53:08", "remaining_time": "0:31:36", "throughput": 8679.92, "total_tokens": 121419240} +{"current_steps": 180230, "total_steps": 204665, "loss": 0.0, "lr": 8.559822612603318e-08, "epoch": 4.403048884762905, "percentage": 88.06, "elapsed_time": "3:53:08", "remaining_time": "0:31:36", "throughput": 8679.93, "total_tokens": 121422312} +{"current_steps": 180235, "total_steps": 204665, "loss": 0.0, "lr": 8.556370840921235e-08, "epoch": 4.403171035594752, "percentage": 88.06, "elapsed_time": "3:53:09", "remaining_time": "0:31:36", "throughput": 8679.94, "total_tokens": 121425576} +{"current_steps": 180240, "total_steps": 204665, "loss": 0.0, "lr": 8.552919734241825e-08, "epoch": 4.403293186426599, "percentage": 88.07, "elapsed_time": "3:53:09", "remaining_time": "0:31:35", "throughput": 8679.96, "total_tokens": 121428776} +{"current_steps": 180245, "total_steps": 204665, "loss": 0.0, "lr": 8.549469292590161e-08, "epoch": 4.403415337258447, "percentage": 88.07, "elapsed_time": "3:53:09", "remaining_time": "0:31:35", "throughput": 8679.98, "total_tokens": 121431976} +{"current_steps": 180250, "total_steps": 204665, "loss": 0.0, "lr": 8.546019515991376e-08, "epoch": 4.4035374880902936, "percentage": 88.07, "elapsed_time": "3:53:10", "remaining_time": "0:31:34", "throughput": 8679.97, "total_tokens": 121434856} +{"current_steps": 180255, "total_steps": 204665, "loss": 0.0, "lr": 8.542570404470529e-08, "epoch": 4.403659638922141, "percentage": 88.07, "elapsed_time": "3:53:10", "remaining_time": "0:31:34", "throughput": 8679.99, "total_tokens": 121438184} +{"current_steps": 180260, "total_steps": 204665, "loss": 0.0, "lr": 8.539121958052697e-08, "epoch": 4.403781789753988, "percentage": 88.08, "elapsed_time": "3:53:10", "remaining_time": "0:31:34", "throughput": 8680.03, "total_tokens": 121441768} +{"current_steps": 180265, "total_steps": 204665, "loss": 0.0, "lr": 8.535674176762986e-08, "epoch": 4.403903940585836, "percentage": 88.08, "elapsed_time": "3:53:11", "remaining_time": "0:31:33", "throughput": 8680.05, "total_tokens": 121444968} +{"current_steps": 180270, "total_steps": 204665, "loss": 0.0, "lr": 8.53222706062643e-08, "epoch": 4.404026091417682, "percentage": 88.08, "elapsed_time": "3:53:11", "remaining_time": "0:31:33", "throughput": 8680.07, "total_tokens": 121448296} +{"current_steps": 180275, "total_steps": 204665, "loss": 0.0001, "lr": 8.52878060966813e-08, "epoch": 4.40414824224953, "percentage": 88.08, "elapsed_time": "3:53:11", "remaining_time": "0:31:33", "throughput": 8680.12, "total_tokens": 121452072} +{"current_steps": 180280, "total_steps": 204665, "loss": 0.0003, "lr": 8.525334823913156e-08, "epoch": 4.404270393081377, "percentage": 88.09, "elapsed_time": "3:53:12", "remaining_time": "0:31:32", "throughput": 8680.14, "total_tokens": 121455272} +{"current_steps": 180285, "total_steps": 204665, "loss": 0.0, "lr": 8.521889703386532e-08, "epoch": 4.404392543913224, "percentage": 88.09, "elapsed_time": "3:53:12", "remaining_time": "0:31:32", "throughput": 8680.15, "total_tokens": 121458408} +{"current_steps": 180290, "total_steps": 204665, "loss": 0.0, "lr": 8.518445248113338e-08, "epoch": 4.404514694745071, "percentage": 88.09, "elapsed_time": "3:53:13", "remaining_time": "0:31:31", "throughput": 8680.17, "total_tokens": 121461736} +{"current_steps": 180295, "total_steps": 204665, "loss": 0.0, "lr": 8.515001458118609e-08, "epoch": 4.404636845576919, "percentage": 88.09, "elapsed_time": "3:53:13", "remaining_time": "0:31:31", "throughput": 8680.23, "total_tokens": 121465640} +{"current_steps": 180300, "total_steps": 204665, "loss": 0.0, "lr": 8.511558333427393e-08, "epoch": 4.4047589964087654, "percentage": 88.1, "elapsed_time": "3:53:13", "remaining_time": "0:31:31", "throughput": 8680.25, "total_tokens": 121468968} +{"current_steps": 180305, "total_steps": 204665, "loss": 0.0, "lr": 8.508115874064759e-08, "epoch": 4.404881147240613, "percentage": 88.1, "elapsed_time": "3:53:14", "remaining_time": "0:31:30", "throughput": 8680.26, "total_tokens": 121472232} +{"current_steps": 180310, "total_steps": 204665, "loss": 0.0001, "lr": 8.504674080055685e-08, "epoch": 4.40500329807246, "percentage": 88.1, "elapsed_time": "3:53:14", "remaining_time": "0:31:30", "throughput": 8680.3, "total_tokens": 121475752} +{"current_steps": 180315, "total_steps": 204665, "loss": 0.0, "lr": 8.501232951425252e-08, "epoch": 4.4051254489043075, "percentage": 88.1, "elapsed_time": "3:53:14", "remaining_time": "0:31:29", "throughput": 8680.34, "total_tokens": 121479464} +{"current_steps": 180320, "total_steps": 204665, "loss": 0.0, "lr": 8.497792488198452e-08, "epoch": 4.405247599736154, "percentage": 88.1, "elapsed_time": "3:53:15", "remaining_time": "0:31:29", "throughput": 8680.38, "total_tokens": 121483048} +{"current_steps": 180325, "total_steps": 204665, "loss": 0.0, "lr": 8.494352690400319e-08, "epoch": 4.405369750568001, "percentage": 88.11, "elapsed_time": "3:53:15", "remaining_time": "0:31:29", "throughput": 8680.4, "total_tokens": 121486376} +{"current_steps": 180330, "total_steps": 204665, "loss": 0.0, "lr": 8.490913558055856e-08, "epoch": 4.405491901399849, "percentage": 88.11, "elapsed_time": "3:53:15", "remaining_time": "0:31:28", "throughput": 8680.41, "total_tokens": 121489448} +{"current_steps": 180335, "total_steps": 204665, "loss": 0.0, "lr": 8.487475091190088e-08, "epoch": 4.405614052231695, "percentage": 88.11, "elapsed_time": "3:53:16", "remaining_time": "0:31:28", "throughput": 8680.44, "total_tokens": 121492904} +{"current_steps": 180340, "total_steps": 204665, "loss": 0.0, "lr": 8.484037289828028e-08, "epoch": 4.405736203063543, "percentage": 88.11, "elapsed_time": "3:53:16", "remaining_time": "0:31:27", "throughput": 8680.47, "total_tokens": 121496424} +{"current_steps": 180345, "total_steps": 204665, "loss": 0.0, "lr": 8.480600153994666e-08, "epoch": 4.40585835389539, "percentage": 88.12, "elapsed_time": "3:53:16", "remaining_time": "0:31:27", "throughput": 8680.48, "total_tokens": 121499496} +{"current_steps": 180350, "total_steps": 204665, "loss": 0.0, "lr": 8.477163683714972e-08, "epoch": 4.405980504727237, "percentage": 88.12, "elapsed_time": "3:53:17", "remaining_time": "0:31:27", "throughput": 8680.5, "total_tokens": 121502696} +{"current_steps": 180355, "total_steps": 204665, "loss": 0.0, "lr": 8.473727879013981e-08, "epoch": 4.406102655559084, "percentage": 88.12, "elapsed_time": "3:53:17", "remaining_time": "0:31:26", "throughput": 8680.52, "total_tokens": 121506088} +{"current_steps": 180360, "total_steps": 204665, "loss": 0.0, "lr": 8.470292739916641e-08, "epoch": 4.406224806390932, "percentage": 88.12, "elapsed_time": "3:53:17", "remaining_time": "0:31:26", "throughput": 8680.56, "total_tokens": 121509672} +{"current_steps": 180365, "total_steps": 204665, "loss": 0.0004, "lr": 8.466858266447962e-08, "epoch": 4.4063469572227785, "percentage": 88.13, "elapsed_time": "3:53:18", "remaining_time": "0:31:25", "throughput": 8680.59, "total_tokens": 121513064} +{"current_steps": 180370, "total_steps": 204665, "loss": 0.0, "lr": 8.463424458632906e-08, "epoch": 4.406469108054626, "percentage": 88.13, "elapsed_time": "3:53:18", "remaining_time": "0:31:25", "throughput": 8680.61, "total_tokens": 121516456} +{"current_steps": 180375, "total_steps": 204665, "loss": 0.0, "lr": 8.459991316496452e-08, "epoch": 4.406591258886473, "percentage": 88.13, "elapsed_time": "3:53:18", "remaining_time": "0:31:25", "throughput": 8680.63, "total_tokens": 121519656} +{"current_steps": 180380, "total_steps": 204665, "loss": 0.0, "lr": 8.456558840063555e-08, "epoch": 4.4067134097183205, "percentage": 88.13, "elapsed_time": "3:53:19", "remaining_time": "0:31:24", "throughput": 8680.64, "total_tokens": 121522728} +{"current_steps": 180385, "total_steps": 204665, "loss": 0.0, "lr": 8.453127029359197e-08, "epoch": 4.406835560550167, "percentage": 88.14, "elapsed_time": "3:53:19", "remaining_time": "0:31:24", "throughput": 8680.63, "total_tokens": 121525608} +{"current_steps": 180390, "total_steps": 204665, "loss": 0.0, "lr": 8.449695884408303e-08, "epoch": 4.406957711382015, "percentage": 88.14, "elapsed_time": "3:53:19", "remaining_time": "0:31:23", "throughput": 8680.66, "total_tokens": 121528936} +{"current_steps": 180395, "total_steps": 204665, "loss": 0.0435, "lr": 8.446265405235842e-08, "epoch": 4.407079862213862, "percentage": 88.14, "elapsed_time": "3:53:20", "remaining_time": "0:31:23", "throughput": 8680.66, "total_tokens": 121532008} +{"current_steps": 180400, "total_steps": 204665, "loss": 0.0286, "lr": 8.442835591866792e-08, "epoch": 4.407202013045709, "percentage": 88.14, "elapsed_time": "3:53:20", "remaining_time": "0:31:23", "throughput": 8680.68, "total_tokens": 121535144} +{"current_steps": 180405, "total_steps": 204665, "loss": 0.0, "lr": 8.439406444326047e-08, "epoch": 4.407324163877556, "percentage": 88.15, "elapsed_time": "3:53:21", "remaining_time": "0:31:22", "throughput": 8680.72, "total_tokens": 121538920} +{"current_steps": 180410, "total_steps": 204665, "loss": 0.0, "lr": 8.435977962638574e-08, "epoch": 4.407446314709404, "percentage": 88.15, "elapsed_time": "3:53:21", "remaining_time": "0:31:22", "throughput": 8680.74, "total_tokens": 121542120} +{"current_steps": 180415, "total_steps": 204665, "loss": 0.0, "lr": 8.432550146829287e-08, "epoch": 4.40756846554125, "percentage": 88.15, "elapsed_time": "3:53:21", "remaining_time": "0:31:22", "throughput": 8680.76, "total_tokens": 121545512} +{"current_steps": 180420, "total_steps": 204665, "loss": 0.0, "lr": 8.429122996923143e-08, "epoch": 4.407690616373097, "percentage": 88.15, "elapsed_time": "3:53:22", "remaining_time": "0:31:21", "throughput": 8680.78, "total_tokens": 121548776} +{"current_steps": 180425, "total_steps": 204665, "loss": 0.0, "lr": 8.425696512945024e-08, "epoch": 4.407812767204945, "percentage": 88.16, "elapsed_time": "3:53:22", "remaining_time": "0:31:21", "throughput": 8680.81, "total_tokens": 121552232} +{"current_steps": 180430, "total_steps": 204665, "loss": 0.0001, "lr": 8.422270694919864e-08, "epoch": 4.4079349180367915, "percentage": 88.16, "elapsed_time": "3:53:22", "remaining_time": "0:31:20", "throughput": 8680.83, "total_tokens": 121555432} +{"current_steps": 180435, "total_steps": 204665, "loss": 0.0, "lr": 8.41884554287261e-08, "epoch": 4.408057068868639, "percentage": 88.16, "elapsed_time": "3:53:23", "remaining_time": "0:31:20", "throughput": 8680.85, "total_tokens": 121558696} +{"current_steps": 180440, "total_steps": 204665, "loss": 0.0, "lr": 8.415421056828132e-08, "epoch": 4.408179219700486, "percentage": 88.16, "elapsed_time": "3:53:23", "remaining_time": "0:31:20", "throughput": 8680.87, "total_tokens": 121562088} +{"current_steps": 180445, "total_steps": 204665, "loss": 0.0, "lr": 8.411997236811352e-08, "epoch": 4.4083013705323335, "percentage": 88.17, "elapsed_time": "3:53:23", "remaining_time": "0:31:19", "throughput": 8680.89, "total_tokens": 121565416} +{"current_steps": 180450, "total_steps": 204665, "loss": 0.0001, "lr": 8.408574082847164e-08, "epoch": 4.40842352136418, "percentage": 88.17, "elapsed_time": "3:53:24", "remaining_time": "0:31:19", "throughput": 8680.91, "total_tokens": 121568616} +{"current_steps": 180455, "total_steps": 204665, "loss": 0.0, "lr": 8.405151594960447e-08, "epoch": 4.408545672196028, "percentage": 88.17, "elapsed_time": "3:53:24", "remaining_time": "0:31:18", "throughput": 8680.95, "total_tokens": 121572264} +{"current_steps": 180460, "total_steps": 204665, "loss": 0.0, "lr": 8.401729773176114e-08, "epoch": 4.408667823027875, "percentage": 88.17, "elapsed_time": "3:53:24", "remaining_time": "0:31:18", "throughput": 8680.97, "total_tokens": 121575528} +{"current_steps": 180465, "total_steps": 204665, "loss": 0.0, "lr": 8.398308617519024e-08, "epoch": 4.408789973859722, "percentage": 88.18, "elapsed_time": "3:53:25", "remaining_time": "0:31:18", "throughput": 8680.97, "total_tokens": 121578536} +{"current_steps": 180470, "total_steps": 204665, "loss": 0.0, "lr": 8.394888128014099e-08, "epoch": 4.408912124691569, "percentage": 88.18, "elapsed_time": "3:53:25", "remaining_time": "0:31:17", "throughput": 8681.01, "total_tokens": 121582184} +{"current_steps": 180475, "total_steps": 204665, "loss": 0.0, "lr": 8.391468304686155e-08, "epoch": 4.409034275523417, "percentage": 88.18, "elapsed_time": "3:53:25", "remaining_time": "0:31:17", "throughput": 8681.02, "total_tokens": 121585256} +{"current_steps": 180480, "total_steps": 204665, "loss": 0.0, "lr": 8.388049147560117e-08, "epoch": 4.409156426355263, "percentage": 88.18, "elapsed_time": "3:53:26", "remaining_time": "0:31:16", "throughput": 8681.05, "total_tokens": 121588712} +{"current_steps": 180485, "total_steps": 204665, "loss": 0.0, "lr": 8.384630656660807e-08, "epoch": 4.409278577187111, "percentage": 88.19, "elapsed_time": "3:53:26", "remaining_time": "0:31:16", "throughput": 8681.08, "total_tokens": 121592104} +{"current_steps": 180490, "total_steps": 204665, "loss": 0.0, "lr": 8.381212832013107e-08, "epoch": 4.409400728018958, "percentage": 88.19, "elapsed_time": "3:53:26", "remaining_time": "0:31:16", "throughput": 8681.09, "total_tokens": 121595240} +{"current_steps": 180495, "total_steps": 204665, "loss": 0.0, "lr": 8.377795673641886e-08, "epoch": 4.4095228788508045, "percentage": 88.19, "elapsed_time": "3:53:27", "remaining_time": "0:31:15", "throughput": 8681.1, "total_tokens": 121598440} +{"current_steps": 180500, "total_steps": 204665, "loss": 0.0, "lr": 8.374379181571967e-08, "epoch": 4.409645029682652, "percentage": 88.19, "elapsed_time": "3:53:27", "remaining_time": "0:31:15", "throughput": 8681.12, "total_tokens": 121601704} +{"current_steps": 180505, "total_steps": 204665, "loss": 0.0, "lr": 8.37096335582822e-08, "epoch": 4.409767180514499, "percentage": 88.2, "elapsed_time": "3:53:27", "remaining_time": "0:31:14", "throughput": 8681.14, "total_tokens": 121604968} +{"current_steps": 180510, "total_steps": 204665, "loss": 0.0, "lr": 8.367548196435447e-08, "epoch": 4.4098893313463465, "percentage": 88.2, "elapsed_time": "3:53:28", "remaining_time": "0:31:14", "throughput": 8681.15, "total_tokens": 121608104} +{"current_steps": 180515, "total_steps": 204665, "loss": 0.0, "lr": 8.364133703418518e-08, "epoch": 4.410011482178193, "percentage": 88.2, "elapsed_time": "3:53:28", "remaining_time": "0:31:14", "throughput": 8681.15, "total_tokens": 121611112} +{"current_steps": 180520, "total_steps": 204665, "loss": 0.0388, "lr": 8.360719876802269e-08, "epoch": 4.410133633010041, "percentage": 88.2, "elapsed_time": "3:53:28", "remaining_time": "0:31:13", "throughput": 8681.18, "total_tokens": 121614504} +{"current_steps": 180525, "total_steps": 204665, "loss": 0.0, "lr": 8.357306716611501e-08, "epoch": 4.410255783841888, "percentage": 88.21, "elapsed_time": "3:53:29", "remaining_time": "0:31:13", "throughput": 8681.2, "total_tokens": 121617896} +{"current_steps": 180530, "total_steps": 204665, "loss": 0.0, "lr": 8.35389422287106e-08, "epoch": 4.410377934673735, "percentage": 88.21, "elapsed_time": "3:53:29", "remaining_time": "0:31:12", "throughput": 8681.22, "total_tokens": 121621096} +{"current_steps": 180535, "total_steps": 204665, "loss": 0.0, "lr": 8.35048239560574e-08, "epoch": 4.410500085505582, "percentage": 88.21, "elapsed_time": "3:53:30", "remaining_time": "0:31:12", "throughput": 8681.25, "total_tokens": 121624616} +{"current_steps": 180540, "total_steps": 204665, "loss": 0.0, "lr": 8.347071234840374e-08, "epoch": 4.41062223633743, "percentage": 88.21, "elapsed_time": "3:53:30", "remaining_time": "0:31:12", "throughput": 8681.26, "total_tokens": 121627688} +{"current_steps": 180545, "total_steps": 204665, "loss": 0.0, "lr": 8.343660740599745e-08, "epoch": 4.410744387169276, "percentage": 88.21, "elapsed_time": "3:53:30", "remaining_time": "0:31:11", "throughput": 8681.29, "total_tokens": 121631144} +{"current_steps": 180550, "total_steps": 204665, "loss": 0.0, "lr": 8.340250912908675e-08, "epoch": 4.410866538001124, "percentage": 88.22, "elapsed_time": "3:53:31", "remaining_time": "0:31:11", "throughput": 8681.3, "total_tokens": 121634280} +{"current_steps": 180555, "total_steps": 204665, "loss": 0.0, "lr": 8.336841751791946e-08, "epoch": 4.410988688832971, "percentage": 88.22, "elapsed_time": "3:53:31", "remaining_time": "0:31:10", "throughput": 8681.34, "total_tokens": 121637864} +{"current_steps": 180560, "total_steps": 204665, "loss": 0.0, "lr": 8.333433257274369e-08, "epoch": 4.411110839664818, "percentage": 88.22, "elapsed_time": "3:53:31", "remaining_time": "0:31:10", "throughput": 8681.37, "total_tokens": 121641320} +{"current_steps": 180565, "total_steps": 204665, "loss": 0.0, "lr": 8.330025429380727e-08, "epoch": 4.411232990496665, "percentage": 88.22, "elapsed_time": "3:53:32", "remaining_time": "0:31:10", "throughput": 8681.39, "total_tokens": 121644584} +{"current_steps": 180570, "total_steps": 204665, "loss": 0.0, "lr": 8.326618268135776e-08, "epoch": 4.411355141328513, "percentage": 88.23, "elapsed_time": "3:53:32", "remaining_time": "0:31:09", "throughput": 8681.4, "total_tokens": 121647592} +{"current_steps": 180575, "total_steps": 204665, "loss": 0.0, "lr": 8.323211773564331e-08, "epoch": 4.4114772921603596, "percentage": 88.23, "elapsed_time": "3:53:32", "remaining_time": "0:31:09", "throughput": 8681.41, "total_tokens": 121650664} +{"current_steps": 180580, "total_steps": 204665, "loss": 0.0421, "lr": 8.319805945691127e-08, "epoch": 4.411599442992207, "percentage": 88.23, "elapsed_time": "3:53:33", "remaining_time": "0:31:09", "throughput": 8681.42, "total_tokens": 121653928} +{"current_steps": 180585, "total_steps": 204665, "loss": 0.0, "lr": 8.316400784540966e-08, "epoch": 4.411721593824054, "percentage": 88.23, "elapsed_time": "3:53:33", "remaining_time": "0:31:08", "throughput": 8681.46, "total_tokens": 121657576} +{"current_steps": 180590, "total_steps": 204665, "loss": 0.0, "lr": 8.312996290138607e-08, "epoch": 4.411843744655901, "percentage": 88.24, "elapsed_time": "3:53:33", "remaining_time": "0:31:08", "throughput": 8681.48, "total_tokens": 121660904} +{"current_steps": 180595, "total_steps": 204665, "loss": 0.0, "lr": 8.309592462508774e-08, "epoch": 4.411965895487748, "percentage": 88.24, "elapsed_time": "3:53:34", "remaining_time": "0:31:07", "throughput": 8681.51, "total_tokens": 121664232} +{"current_steps": 180600, "total_steps": 204665, "loss": 0.0, "lr": 8.306189301676281e-08, "epoch": 4.412088046319595, "percentage": 88.24, "elapsed_time": "3:53:34", "remaining_time": "0:31:07", "throughput": 8681.52, "total_tokens": 121667496} +{"current_steps": 180605, "total_steps": 204665, "loss": 0.0, "lr": 8.302786807665807e-08, "epoch": 4.412210197151443, "percentage": 88.24, "elapsed_time": "3:53:34", "remaining_time": "0:31:07", "throughput": 8681.53, "total_tokens": 121670440} +{"current_steps": 180610, "total_steps": 204665, "loss": 0.0003, "lr": 8.299384980502144e-08, "epoch": 4.412332347983289, "percentage": 88.25, "elapsed_time": "3:53:35", "remaining_time": "0:31:06", "throughput": 8681.54, "total_tokens": 121673576} +{"current_steps": 180615, "total_steps": 204665, "loss": 0.0, "lr": 8.295983820210028e-08, "epoch": 4.412454498815137, "percentage": 88.25, "elapsed_time": "3:53:35", "remaining_time": "0:31:06", "throughput": 8681.56, "total_tokens": 121676840} +{"current_steps": 180620, "total_steps": 204665, "loss": 0.0003, "lr": 8.292583326814173e-08, "epoch": 4.412576649646984, "percentage": 88.25, "elapsed_time": "3:53:35", "remaining_time": "0:31:05", "throughput": 8681.58, "total_tokens": 121680104} +{"current_steps": 180625, "total_steps": 204665, "loss": 0.0, "lr": 8.289183500339337e-08, "epoch": 4.4126988004788315, "percentage": 88.25, "elapsed_time": "3:53:36", "remaining_time": "0:31:05", "throughput": 8681.6, "total_tokens": 121683496} +{"current_steps": 180630, "total_steps": 204665, "loss": 0.0, "lr": 8.285784340810198e-08, "epoch": 4.412820951310678, "percentage": 88.26, "elapsed_time": "3:53:36", "remaining_time": "0:31:05", "throughput": 8681.63, "total_tokens": 121687016} +{"current_steps": 180635, "total_steps": 204665, "loss": 0.0, "lr": 8.282385848251527e-08, "epoch": 4.412943102142526, "percentage": 88.26, "elapsed_time": "3:53:36", "remaining_time": "0:31:04", "throughput": 8681.64, "total_tokens": 121690152} +{"current_steps": 180640, "total_steps": 204665, "loss": 0.0, "lr": 8.278988022687994e-08, "epoch": 4.413065252974373, "percentage": 88.26, "elapsed_time": "3:53:37", "remaining_time": "0:31:04", "throughput": 8681.66, "total_tokens": 121693416} +{"current_steps": 180645, "total_steps": 204665, "loss": 0.0001, "lr": 8.275590864144333e-08, "epoch": 4.41318740380622, "percentage": 88.26, "elapsed_time": "3:53:37", "remaining_time": "0:31:03", "throughput": 8681.69, "total_tokens": 121696808} +{"current_steps": 180650, "total_steps": 204665, "loss": 0.0, "lr": 8.27219437264527e-08, "epoch": 4.413309554638067, "percentage": 88.27, "elapsed_time": "3:53:38", "remaining_time": "0:31:03", "throughput": 8681.74, "total_tokens": 121700712} +{"current_steps": 180655, "total_steps": 204665, "loss": 0.0001, "lr": 8.268798548215472e-08, "epoch": 4.413431705469915, "percentage": 88.27, "elapsed_time": "3:53:38", "remaining_time": "0:31:03", "throughput": 8681.81, "total_tokens": 121704808} +{"current_steps": 180660, "total_steps": 204665, "loss": 0.0, "lr": 8.265403390879633e-08, "epoch": 4.413553856301761, "percentage": 88.27, "elapsed_time": "3:53:38", "remaining_time": "0:31:02", "throughput": 8681.82, "total_tokens": 121707880} +{"current_steps": 180665, "total_steps": 204665, "loss": 0.0354, "lr": 8.262008900662464e-08, "epoch": 4.413676007133609, "percentage": 88.27, "elapsed_time": "3:53:39", "remaining_time": "0:31:02", "throughput": 8681.83, "total_tokens": 121711080} +{"current_steps": 180670, "total_steps": 204665, "loss": 0.0, "lr": 8.258615077588627e-08, "epoch": 4.413798157965456, "percentage": 88.28, "elapsed_time": "3:53:39", "remaining_time": "0:31:01", "throughput": 8681.86, "total_tokens": 121714536} +{"current_steps": 180675, "total_steps": 204665, "loss": 0.0, "lr": 8.255221921682831e-08, "epoch": 4.413920308797303, "percentage": 88.28, "elapsed_time": "3:53:39", "remaining_time": "0:31:01", "throughput": 8681.9, "total_tokens": 121718120} +{"current_steps": 180680, "total_steps": 204665, "loss": 0.0, "lr": 8.251829432969726e-08, "epoch": 4.41404245962915, "percentage": 88.28, "elapsed_time": "3:53:40", "remaining_time": "0:31:01", "throughput": 8681.92, "total_tokens": 121721320} +{"current_steps": 180685, "total_steps": 204665, "loss": 0.0, "lr": 8.248437611474013e-08, "epoch": 4.414164610460997, "percentage": 88.28, "elapsed_time": "3:53:40", "remaining_time": "0:31:00", "throughput": 8681.95, "total_tokens": 121724776} +{"current_steps": 180690, "total_steps": 204665, "loss": 0.0, "lr": 8.245046457220317e-08, "epoch": 4.4142867612928445, "percentage": 88.29, "elapsed_time": "3:53:40", "remaining_time": "0:31:00", "throughput": 8681.99, "total_tokens": 121728424} +{"current_steps": 180695, "total_steps": 204665, "loss": 0.0, "lr": 8.241655970233341e-08, "epoch": 4.414408912124691, "percentage": 88.29, "elapsed_time": "3:53:41", "remaining_time": "0:30:59", "throughput": 8681.99, "total_tokens": 121731496} +{"current_steps": 180700, "total_steps": 204665, "loss": 0.0, "lr": 8.238266150537699e-08, "epoch": 4.414531062956539, "percentage": 88.29, "elapsed_time": "3:53:41", "remaining_time": "0:30:59", "throughput": 8682.01, "total_tokens": 121734696} +{"current_steps": 180705, "total_steps": 204665, "loss": 0.0, "lr": 8.23487699815808e-08, "epoch": 4.414653213788386, "percentage": 88.29, "elapsed_time": "3:53:41", "remaining_time": "0:30:59", "throughput": 8682.07, "total_tokens": 121738664} +{"current_steps": 180710, "total_steps": 204665, "loss": 0.0, "lr": 8.231488513119123e-08, "epoch": 4.414775364620233, "percentage": 88.3, "elapsed_time": "3:53:42", "remaining_time": "0:30:58", "throughput": 8682.1, "total_tokens": 121742120} +{"current_steps": 180715, "total_steps": 204665, "loss": 0.0, "lr": 8.22810069544545e-08, "epoch": 4.41489751545208, "percentage": 88.3, "elapsed_time": "3:53:42", "remaining_time": "0:30:58", "throughput": 8682.15, "total_tokens": 121745960} +{"current_steps": 180720, "total_steps": 204665, "loss": 0.0, "lr": 8.224713545161732e-08, "epoch": 4.415019666283928, "percentage": 88.3, "elapsed_time": "3:53:42", "remaining_time": "0:30:58", "throughput": 8682.2, "total_tokens": 121749800} +{"current_steps": 180725, "total_steps": 204665, "loss": 0.0, "lr": 8.221327062292571e-08, "epoch": 4.415141817115774, "percentage": 88.3, "elapsed_time": "3:53:43", "remaining_time": "0:30:57", "throughput": 8682.26, "total_tokens": 121753704} +{"current_steps": 180730, "total_steps": 204665, "loss": 0.0, "lr": 8.217941246862614e-08, "epoch": 4.415263967947622, "percentage": 88.31, "elapsed_time": "3:53:43", "remaining_time": "0:30:57", "throughput": 8682.27, "total_tokens": 121756840} +{"current_steps": 180735, "total_steps": 204665, "loss": 0.0, "lr": 8.214556098896464e-08, "epoch": 4.415386118779469, "percentage": 88.31, "elapsed_time": "3:53:43", "remaining_time": "0:30:56", "throughput": 8682.3, "total_tokens": 121760296} +{"current_steps": 180740, "total_steps": 204665, "loss": 0.0, "lr": 8.211171618418744e-08, "epoch": 4.415508269611316, "percentage": 88.31, "elapsed_time": "3:53:44", "remaining_time": "0:30:56", "throughput": 8682.34, "total_tokens": 121764008} +{"current_steps": 180745, "total_steps": 204665, "loss": 0.0001, "lr": 8.20778780545408e-08, "epoch": 4.415630420443163, "percentage": 88.31, "elapsed_time": "3:53:44", "remaining_time": "0:30:56", "throughput": 8682.37, "total_tokens": 121767336} +{"current_steps": 180750, "total_steps": 204665, "loss": 0.0, "lr": 8.204404660027065e-08, "epoch": 4.415752571275011, "percentage": 88.32, "elapsed_time": "3:53:45", "remaining_time": "0:30:55", "throughput": 8682.38, "total_tokens": 121770600} +{"current_steps": 180755, "total_steps": 204665, "loss": 0.0, "lr": 8.201022182162332e-08, "epoch": 4.4158747221068575, "percentage": 88.32, "elapsed_time": "3:53:45", "remaining_time": "0:30:55", "throughput": 8682.4, "total_tokens": 121773864} +{"current_steps": 180760, "total_steps": 204665, "loss": 0.0, "lr": 8.197640371884429e-08, "epoch": 4.415996872938704, "percentage": 88.32, "elapsed_time": "3:53:45", "remaining_time": "0:30:54", "throughput": 8682.43, "total_tokens": 121777256} +{"current_steps": 180765, "total_steps": 204665, "loss": 0.0, "lr": 8.194259229218003e-08, "epoch": 4.416119023770552, "percentage": 88.32, "elapsed_time": "3:53:46", "remaining_time": "0:30:54", "throughput": 8682.48, "total_tokens": 121781096} +{"current_steps": 180770, "total_steps": 204665, "loss": 0.0, "lr": 8.190878754187614e-08, "epoch": 4.4162411746023995, "percentage": 88.32, "elapsed_time": "3:53:46", "remaining_time": "0:30:54", "throughput": 8682.52, "total_tokens": 121784616} +{"current_steps": 180775, "total_steps": 204665, "loss": 0.0, "lr": 8.187498946817828e-08, "epoch": 4.416363325434246, "percentage": 88.33, "elapsed_time": "3:53:46", "remaining_time": "0:30:53", "throughput": 8682.52, "total_tokens": 121787624} +{"current_steps": 180780, "total_steps": 204665, "loss": 0.0002, "lr": 8.18411980713326e-08, "epoch": 4.416485476266093, "percentage": 88.33, "elapsed_time": "3:53:47", "remaining_time": "0:30:53", "throughput": 8682.57, "total_tokens": 121791272} +{"current_steps": 180785, "total_steps": 204665, "loss": 0.0, "lr": 8.180741335158458e-08, "epoch": 4.416607627097941, "percentage": 88.33, "elapsed_time": "3:53:47", "remaining_time": "0:30:52", "throughput": 8682.6, "total_tokens": 121794792} +{"current_steps": 180790, "total_steps": 204665, "loss": 0.0, "lr": 8.177363530918013e-08, "epoch": 4.416729777929787, "percentage": 88.33, "elapsed_time": "3:53:47", "remaining_time": "0:30:52", "throughput": 8682.65, "total_tokens": 121798632} +{"current_steps": 180795, "total_steps": 204665, "loss": 0.0, "lr": 8.173986394436461e-08, "epoch": 4.416851928761635, "percentage": 88.34, "elapsed_time": "3:53:48", "remaining_time": "0:30:52", "throughput": 8682.68, "total_tokens": 121802024} +{"current_steps": 180800, "total_steps": 204665, "loss": 0.0, "lr": 8.17060992573838e-08, "epoch": 4.416974079593482, "percentage": 88.34, "elapsed_time": "3:53:48", "remaining_time": "0:30:51", "throughput": 8682.72, "total_tokens": 121805736} +{"current_steps": 180805, "total_steps": 204665, "loss": 0.0, "lr": 8.167234124848344e-08, "epoch": 4.417096230425329, "percentage": 88.34, "elapsed_time": "3:53:48", "remaining_time": "0:30:51", "throughput": 8682.74, "total_tokens": 121808936} +{"current_steps": 180810, "total_steps": 204665, "loss": 0.0, "lr": 8.163858991790861e-08, "epoch": 4.417218381257176, "percentage": 88.34, "elapsed_time": "3:53:49", "remaining_time": "0:30:50", "throughput": 8682.74, "total_tokens": 121812008} +{"current_steps": 180815, "total_steps": 204665, "loss": 0.0, "lr": 8.160484526590516e-08, "epoch": 4.417340532089024, "percentage": 88.35, "elapsed_time": "3:53:49", "remaining_time": "0:30:50", "throughput": 8682.78, "total_tokens": 121815592} +{"current_steps": 180820, "total_steps": 204665, "loss": 0.0, "lr": 8.157110729271799e-08, "epoch": 4.4174626829208705, "percentage": 88.35, "elapsed_time": "3:53:49", "remaining_time": "0:30:50", "throughput": 8682.78, "total_tokens": 121818600} +{"current_steps": 180825, "total_steps": 204665, "loss": 0.0, "lr": 8.153737599859312e-08, "epoch": 4.417584833752718, "percentage": 88.35, "elapsed_time": "3:53:50", "remaining_time": "0:30:49", "throughput": 8682.79, "total_tokens": 121821672} +{"current_steps": 180830, "total_steps": 204665, "loss": 0.0, "lr": 8.150365138377513e-08, "epoch": 4.417706984584565, "percentage": 88.35, "elapsed_time": "3:53:50", "remaining_time": "0:30:49", "throughput": 8682.84, "total_tokens": 121825512} +{"current_steps": 180835, "total_steps": 204665, "loss": 0.0, "lr": 8.146993344850973e-08, "epoch": 4.4178291354164125, "percentage": 88.36, "elapsed_time": "3:53:50", "remaining_time": "0:30:48", "throughput": 8682.86, "total_tokens": 121828712} +{"current_steps": 180840, "total_steps": 204665, "loss": 0.0, "lr": 8.143622219304225e-08, "epoch": 4.417951286248259, "percentage": 88.36, "elapsed_time": "3:53:51", "remaining_time": "0:30:48", "throughput": 8682.87, "total_tokens": 121831784} +{"current_steps": 180845, "total_steps": 204665, "loss": 0.0, "lr": 8.140251761761741e-08, "epoch": 4.418073437080107, "percentage": 88.36, "elapsed_time": "3:53:51", "remaining_time": "0:30:48", "throughput": 8682.92, "total_tokens": 121835688} +{"current_steps": 180850, "total_steps": 204665, "loss": 0.0, "lr": 8.136881972248067e-08, "epoch": 4.418195587911954, "percentage": 88.36, "elapsed_time": "3:53:51", "remaining_time": "0:30:47", "throughput": 8682.94, "total_tokens": 121839016} +{"current_steps": 180855, "total_steps": 204665, "loss": 0.0, "lr": 8.133512850787682e-08, "epoch": 4.4183177387438, "percentage": 88.37, "elapsed_time": "3:53:52", "remaining_time": "0:30:47", "throughput": 8683.0, "total_tokens": 121842984} +{"current_steps": 180860, "total_steps": 204665, "loss": 0.0392, "lr": 8.130144397405114e-08, "epoch": 4.418439889575648, "percentage": 88.37, "elapsed_time": "3:53:52", "remaining_time": "0:30:47", "throughput": 8683.03, "total_tokens": 121846504} +{"current_steps": 180865, "total_steps": 204665, "loss": 0.0, "lr": 8.12677661212483e-08, "epoch": 4.418562040407495, "percentage": 88.37, "elapsed_time": "3:53:53", "remaining_time": "0:30:46", "throughput": 8683.06, "total_tokens": 121849832} +{"current_steps": 180870, "total_steps": 204665, "loss": 0.0, "lr": 8.123409494971356e-08, "epoch": 4.418684191239342, "percentage": 88.37, "elapsed_time": "3:53:53", "remaining_time": "0:30:46", "throughput": 8683.08, "total_tokens": 121853224} +{"current_steps": 180875, "total_steps": 204665, "loss": 0.0, "lr": 8.120043045969161e-08, "epoch": 4.418806342071189, "percentage": 88.38, "elapsed_time": "3:53:53", "remaining_time": "0:30:45", "throughput": 8683.11, "total_tokens": 121856616} +{"current_steps": 180880, "total_steps": 204665, "loss": 0.0, "lr": 8.116677265142713e-08, "epoch": 4.418928492903037, "percentage": 88.38, "elapsed_time": "3:53:54", "remaining_time": "0:30:45", "throughput": 8683.12, "total_tokens": 121859752} +{"current_steps": 180885, "total_steps": 204665, "loss": 0.0, "lr": 8.113312152516516e-08, "epoch": 4.4190506437348835, "percentage": 88.38, "elapsed_time": "3:53:54", "remaining_time": "0:30:45", "throughput": 8683.13, "total_tokens": 121862888} +{"current_steps": 180890, "total_steps": 204665, "loss": 0.0, "lr": 8.109947708115006e-08, "epoch": 4.419172794566731, "percentage": 88.38, "elapsed_time": "3:53:54", "remaining_time": "0:30:44", "throughput": 8683.16, "total_tokens": 121866344} +{"current_steps": 180895, "total_steps": 204665, "loss": 0.0, "lr": 8.106583931962674e-08, "epoch": 4.419294945398578, "percentage": 88.39, "elapsed_time": "3:53:55", "remaining_time": "0:30:44", "throughput": 8683.18, "total_tokens": 121869800} +{"current_steps": 180900, "total_steps": 204665, "loss": 0.0, "lr": 8.103220824083989e-08, "epoch": 4.419417096230426, "percentage": 88.39, "elapsed_time": "3:53:55", "remaining_time": "0:30:43", "throughput": 8683.22, "total_tokens": 121873384} +{"current_steps": 180905, "total_steps": 204665, "loss": 0.0, "lr": 8.0998583845034e-08, "epoch": 4.419539247062272, "percentage": 88.39, "elapsed_time": "3:53:55", "remaining_time": "0:30:43", "throughput": 8683.27, "total_tokens": 121877224} +{"current_steps": 180910, "total_steps": 204665, "loss": 0.0, "lr": 8.096496613245363e-08, "epoch": 4.41966139789412, "percentage": 88.39, "elapsed_time": "3:53:56", "remaining_time": "0:30:43", "throughput": 8683.27, "total_tokens": 121880040} +{"current_steps": 180915, "total_steps": 204665, "loss": 0.0, "lr": 8.093135510334304e-08, "epoch": 4.419783548725967, "percentage": 88.4, "elapsed_time": "3:53:56", "remaining_time": "0:30:42", "throughput": 8683.3, "total_tokens": 121883496} +{"current_steps": 180920, "total_steps": 204665, "loss": 0.0, "lr": 8.089775075794691e-08, "epoch": 4.419905699557814, "percentage": 88.4, "elapsed_time": "3:53:56", "remaining_time": "0:30:42", "throughput": 8683.31, "total_tokens": 121886696} +{"current_steps": 180925, "total_steps": 204665, "loss": 0.0, "lr": 8.086415309650962e-08, "epoch": 4.420027850389661, "percentage": 88.4, "elapsed_time": "3:53:57", "remaining_time": "0:30:41", "throughput": 8683.33, "total_tokens": 121890024} +{"current_steps": 180930, "total_steps": 204665, "loss": 0.0, "lr": 8.08305621192753e-08, "epoch": 4.420150001221509, "percentage": 88.4, "elapsed_time": "3:53:57", "remaining_time": "0:30:41", "throughput": 8683.35, "total_tokens": 121893288} +{"current_steps": 180935, "total_steps": 204665, "loss": 0.0, "lr": 8.079697782648864e-08, "epoch": 4.420272152053355, "percentage": 88.41, "elapsed_time": "3:53:57", "remaining_time": "0:30:41", "throughput": 8683.37, "total_tokens": 121896680} +{"current_steps": 180940, "total_steps": 204665, "loss": 0.0, "lr": 8.076340021839323e-08, "epoch": 4.420394302885203, "percentage": 88.41, "elapsed_time": "3:53:58", "remaining_time": "0:30:40", "throughput": 8683.39, "total_tokens": 121899880} +{"current_steps": 180945, "total_steps": 204665, "loss": 0.0, "lr": 8.072982929523398e-08, "epoch": 4.42051645371705, "percentage": 88.41, "elapsed_time": "3:53:58", "remaining_time": "0:30:40", "throughput": 8683.4, "total_tokens": 121903080} +{"current_steps": 180950, "total_steps": 204665, "loss": 0.0, "lr": 8.069626505725435e-08, "epoch": 4.420638604548897, "percentage": 88.41, "elapsed_time": "3:53:58", "remaining_time": "0:30:39", "throughput": 8683.41, "total_tokens": 121906216} +{"current_steps": 180955, "total_steps": 204665, "loss": 0.0, "lr": 8.066270750469883e-08, "epoch": 4.420760755380744, "percentage": 88.42, "elapsed_time": "3:53:59", "remaining_time": "0:30:39", "throughput": 8683.41, "total_tokens": 121909160} +{"current_steps": 180960, "total_steps": 204665, "loss": 0.0, "lr": 8.062915663781145e-08, "epoch": 4.420882906212591, "percentage": 88.42, "elapsed_time": "3:53:59", "remaining_time": "0:30:39", "throughput": 8683.45, "total_tokens": 121912744} +{"current_steps": 180965, "total_steps": 204665, "loss": 0.0, "lr": 8.059561245683622e-08, "epoch": 4.421005057044439, "percentage": 88.42, "elapsed_time": "3:54:00", "remaining_time": "0:30:38", "throughput": 8683.49, "total_tokens": 121916456} +{"current_steps": 180970, "total_steps": 204665, "loss": 0.0, "lr": 8.056207496201672e-08, "epoch": 4.421127207876285, "percentage": 88.42, "elapsed_time": "3:54:00", "remaining_time": "0:30:38", "throughput": 8683.55, "total_tokens": 121920424} +{"current_steps": 180975, "total_steps": 204665, "loss": 0.0, "lr": 8.052854415359744e-08, "epoch": 4.421249358708133, "percentage": 88.42, "elapsed_time": "3:54:00", "remaining_time": "0:30:37", "throughput": 8683.57, "total_tokens": 121923752} +{"current_steps": 180980, "total_steps": 204665, "loss": 0.0, "lr": 8.049502003182173e-08, "epoch": 4.42137150953998, "percentage": 88.43, "elapsed_time": "3:54:01", "remaining_time": "0:30:37", "throughput": 8683.58, "total_tokens": 121926760} +{"current_steps": 180985, "total_steps": 204665, "loss": 0.0, "lr": 8.046150259693341e-08, "epoch": 4.421493660371827, "percentage": 88.43, "elapsed_time": "3:54:01", "remaining_time": "0:30:37", "throughput": 8683.61, "total_tokens": 121930216} +{"current_steps": 180990, "total_steps": 204665, "loss": 0.0, "lr": 8.042799184917647e-08, "epoch": 4.421615811203674, "percentage": 88.43, "elapsed_time": "3:54:01", "remaining_time": "0:30:36", "throughput": 8683.63, "total_tokens": 121933544} +{"current_steps": 180995, "total_steps": 204665, "loss": 0.0, "lr": 8.039448778879465e-08, "epoch": 4.421737962035522, "percentage": 88.43, "elapsed_time": "3:54:02", "remaining_time": "0:30:36", "throughput": 8683.66, "total_tokens": 121937064} +{"current_steps": 181000, "total_steps": 204665, "loss": 0.0, "lr": 8.036099041603117e-08, "epoch": 4.4218601128673685, "percentage": 88.44, "elapsed_time": "3:54:02", "remaining_time": "0:30:35", "throughput": 8683.68, "total_tokens": 121940200} +{"current_steps": 181005, "total_steps": 204665, "loss": 0.0, "lr": 8.032749973113017e-08, "epoch": 4.421982263699216, "percentage": 88.44, "elapsed_time": "3:54:02", "remaining_time": "0:30:35", "throughput": 8683.71, "total_tokens": 121943784} +{"current_steps": 181010, "total_steps": 204665, "loss": 0.0, "lr": 8.029401573433481e-08, "epoch": 4.422104414531063, "percentage": 88.44, "elapsed_time": "3:54:03", "remaining_time": "0:30:35", "throughput": 8683.72, "total_tokens": 121946920} +{"current_steps": 181015, "total_steps": 204665, "loss": 0.0, "lr": 8.026053842588876e-08, "epoch": 4.4222265653629105, "percentage": 88.44, "elapsed_time": "3:54:03", "remaining_time": "0:30:34", "throughput": 8683.74, "total_tokens": 121950120} +{"current_steps": 181020, "total_steps": 204665, "loss": 0.0, "lr": 8.022706780603549e-08, "epoch": 4.422348716194757, "percentage": 88.45, "elapsed_time": "3:54:03", "remaining_time": "0:30:34", "throughput": 8683.78, "total_tokens": 121953768} +{"current_steps": 181025, "total_steps": 204665, "loss": 0.0, "lr": 8.019360387501839e-08, "epoch": 4.422470867026605, "percentage": 88.45, "elapsed_time": "3:54:04", "remaining_time": "0:30:34", "throughput": 8683.8, "total_tokens": 121957032} +{"current_steps": 181030, "total_steps": 204665, "loss": 0.0, "lr": 8.01601466330808e-08, "epoch": 4.422593017858452, "percentage": 88.45, "elapsed_time": "3:54:04", "remaining_time": "0:30:33", "throughput": 8683.85, "total_tokens": 121960872} +{"current_steps": 181035, "total_steps": 204665, "loss": 0.0, "lr": 8.012669608046596e-08, "epoch": 4.422715168690299, "percentage": 88.45, "elapsed_time": "3:54:04", "remaining_time": "0:30:33", "throughput": 8683.86, "total_tokens": 121964136} +{"current_steps": 181040, "total_steps": 204665, "loss": 0.0, "lr": 8.009325221741736e-08, "epoch": 4.422837319522146, "percentage": 88.46, "elapsed_time": "3:54:05", "remaining_time": "0:30:32", "throughput": 8683.88, "total_tokens": 121967400} +{"current_steps": 181045, "total_steps": 204665, "loss": 0.0, "lr": 8.00598150441778e-08, "epoch": 4.422959470353993, "percentage": 88.46, "elapsed_time": "3:54:05", "remaining_time": "0:30:32", "throughput": 8683.9, "total_tokens": 121970600} +{"current_steps": 181050, "total_steps": 204665, "loss": 0.0, "lr": 8.002638456099087e-08, "epoch": 4.42308162118584, "percentage": 88.46, "elapsed_time": "3:54:05", "remaining_time": "0:30:32", "throughput": 8683.93, "total_tokens": 121974120} +{"current_steps": 181055, "total_steps": 204665, "loss": 0.0, "lr": 7.999296076809958e-08, "epoch": 4.423203772017687, "percentage": 88.46, "elapsed_time": "3:54:06", "remaining_time": "0:30:31", "throughput": 8683.96, "total_tokens": 121977640} +{"current_steps": 181060, "total_steps": 204665, "loss": 0.0, "lr": 7.995954366574675e-08, "epoch": 4.423325922849535, "percentage": 88.47, "elapsed_time": "3:54:06", "remaining_time": "0:30:31", "throughput": 8683.97, "total_tokens": 121980776} +{"current_steps": 181065, "total_steps": 204665, "loss": 0.0, "lr": 7.992613325417574e-08, "epoch": 4.4234480736813815, "percentage": 88.47, "elapsed_time": "3:54:07", "remaining_time": "0:30:30", "throughput": 8684.0, "total_tokens": 121984168} +{"current_steps": 181070, "total_steps": 204665, "loss": 0.0, "lr": 7.989272953362924e-08, "epoch": 4.423570224513229, "percentage": 88.47, "elapsed_time": "3:54:07", "remaining_time": "0:30:30", "throughput": 8684.03, "total_tokens": 121987624} +{"current_steps": 181075, "total_steps": 204665, "loss": 0.0, "lr": 7.985933250435039e-08, "epoch": 4.423692375345076, "percentage": 88.47, "elapsed_time": "3:54:07", "remaining_time": "0:30:30", "throughput": 8684.05, "total_tokens": 121990952} +{"current_steps": 181080, "total_steps": 204665, "loss": 0.0, "lr": 7.9825942166582e-08, "epoch": 4.4238145261769235, "percentage": 88.48, "elapsed_time": "3:54:08", "remaining_time": "0:30:29", "throughput": 8684.08, "total_tokens": 121994344} +{"current_steps": 181085, "total_steps": 204665, "loss": 0.0, "lr": 7.979255852056677e-08, "epoch": 4.42393667700877, "percentage": 88.48, "elapsed_time": "3:54:08", "remaining_time": "0:30:29", "throughput": 8684.08, "total_tokens": 121997416} +{"current_steps": 181090, "total_steps": 204665, "loss": 0.0, "lr": 7.97591815665476e-08, "epoch": 4.424058827840618, "percentage": 88.48, "elapsed_time": "3:54:08", "remaining_time": "0:30:28", "throughput": 8684.09, "total_tokens": 122000552} +{"current_steps": 181095, "total_steps": 204665, "loss": 0.0, "lr": 7.972581130476707e-08, "epoch": 4.424180978672465, "percentage": 88.48, "elapsed_time": "3:54:09", "remaining_time": "0:30:28", "throughput": 8684.11, "total_tokens": 122003688} +{"current_steps": 181100, "total_steps": 204665, "loss": 0.0224, "lr": 7.969244773546812e-08, "epoch": 4.424303129504312, "percentage": 88.49, "elapsed_time": "3:54:09", "remaining_time": "0:30:28", "throughput": 8684.12, "total_tokens": 122006824} +{"current_steps": 181105, "total_steps": 204665, "loss": 0.0, "lr": 7.965909085889299e-08, "epoch": 4.424425280336159, "percentage": 88.49, "elapsed_time": "3:54:09", "remaining_time": "0:30:27", "throughput": 8684.15, "total_tokens": 122010344} +{"current_steps": 181110, "total_steps": 204665, "loss": 0.0, "lr": 7.96257406752846e-08, "epoch": 4.424547431168007, "percentage": 88.49, "elapsed_time": "3:54:10", "remaining_time": "0:30:27", "throughput": 8684.15, "total_tokens": 122013352} +{"current_steps": 181115, "total_steps": 204665, "loss": 0.0, "lr": 7.959239718488553e-08, "epoch": 4.424669581999853, "percentage": 88.49, "elapsed_time": "3:54:10", "remaining_time": "0:30:26", "throughput": 8684.2, "total_tokens": 122017064} +{"current_steps": 181120, "total_steps": 204665, "loss": 0.0, "lr": 7.955906038793791e-08, "epoch": 4.4247917328317, "percentage": 88.5, "elapsed_time": "3:54:10", "remaining_time": "0:30:26", "throughput": 8684.21, "total_tokens": 122020200} +{"current_steps": 181125, "total_steps": 204665, "loss": 0.0, "lr": 7.952573028468456e-08, "epoch": 4.424913883663548, "percentage": 88.5, "elapsed_time": "3:54:11", "remaining_time": "0:30:26", "throughput": 8684.23, "total_tokens": 122023528} +{"current_steps": 181130, "total_steps": 204665, "loss": 0.0001, "lr": 7.949240687536751e-08, "epoch": 4.4250360344953945, "percentage": 88.5, "elapsed_time": "3:54:11", "remaining_time": "0:30:25", "throughput": 8684.24, "total_tokens": 122026600} +{"current_steps": 181135, "total_steps": 204665, "loss": 0.0, "lr": 7.945909016022934e-08, "epoch": 4.425158185327242, "percentage": 88.5, "elapsed_time": "3:54:11", "remaining_time": "0:30:25", "throughput": 8684.26, "total_tokens": 122029992} +{"current_steps": 181140, "total_steps": 204665, "loss": 0.0, "lr": 7.942578013951217e-08, "epoch": 4.425280336159089, "percentage": 88.51, "elapsed_time": "3:54:12", "remaining_time": "0:30:24", "throughput": 8684.26, "total_tokens": 122033000} +{"current_steps": 181145, "total_steps": 204665, "loss": 0.0, "lr": 7.93924768134584e-08, "epoch": 4.4254024869909365, "percentage": 88.51, "elapsed_time": "3:54:12", "remaining_time": "0:30:24", "throughput": 8684.28, "total_tokens": 122036200} +{"current_steps": 181150, "total_steps": 204665, "loss": 0.0, "lr": 7.935918018231024e-08, "epoch": 4.425524637822783, "percentage": 88.51, "elapsed_time": "3:54:12", "remaining_time": "0:30:24", "throughput": 8684.32, "total_tokens": 122039848} +{"current_steps": 181155, "total_steps": 204665, "loss": 0.0, "lr": 7.932589024630953e-08, "epoch": 4.425646788654631, "percentage": 88.51, "elapsed_time": "3:54:13", "remaining_time": "0:30:23", "throughput": 8684.37, "total_tokens": 122043688} +{"current_steps": 181160, "total_steps": 204665, "loss": 0.0, "lr": 7.929260700569884e-08, "epoch": 4.425768939486478, "percentage": 88.52, "elapsed_time": "3:54:13", "remaining_time": "0:30:23", "throughput": 8684.4, "total_tokens": 122047208} +{"current_steps": 181165, "total_steps": 204665, "loss": 0.0, "lr": 7.925933046071975e-08, "epoch": 4.425891090318325, "percentage": 88.52, "elapsed_time": "3:54:13", "remaining_time": "0:30:23", "throughput": 8684.43, "total_tokens": 122050600} +{"current_steps": 181170, "total_steps": 204665, "loss": 0.0, "lr": 7.922606061161464e-08, "epoch": 4.426013241150172, "percentage": 88.52, "elapsed_time": "3:54:14", "remaining_time": "0:30:22", "throughput": 8684.43, "total_tokens": 122053608} +{"current_steps": 181175, "total_steps": 204665, "loss": 0.0, "lr": 7.919279745862505e-08, "epoch": 4.42613539198202, "percentage": 88.52, "elapsed_time": "3:54:14", "remaining_time": "0:30:22", "throughput": 8684.45, "total_tokens": 122056872} +{"current_steps": 181180, "total_steps": 204665, "loss": 0.0, "lr": 7.915954100199328e-08, "epoch": 4.426257542813866, "percentage": 88.53, "elapsed_time": "3:54:14", "remaining_time": "0:30:21", "throughput": 8684.47, "total_tokens": 122060200} +{"current_steps": 181185, "total_steps": 204665, "loss": 0.0, "lr": 7.912629124196113e-08, "epoch": 4.426379693645714, "percentage": 88.53, "elapsed_time": "3:54:15", "remaining_time": "0:30:21", "throughput": 8684.49, "total_tokens": 122063464} +{"current_steps": 181190, "total_steps": 204665, "loss": 0.0, "lr": 7.909304817876994e-08, "epoch": 4.426501844477561, "percentage": 88.53, "elapsed_time": "3:54:15", "remaining_time": "0:30:21", "throughput": 8684.5, "total_tokens": 122066536} +{"current_steps": 181195, "total_steps": 204665, "loss": 0.0, "lr": 7.905981181266208e-08, "epoch": 4.426623995309408, "percentage": 88.53, "elapsed_time": "3:54:16", "remaining_time": "0:30:20", "throughput": 8684.51, "total_tokens": 122069736} +{"current_steps": 181200, "total_steps": 204665, "loss": 0.0, "lr": 7.90265821438788e-08, "epoch": 4.426746146141255, "percentage": 88.53, "elapsed_time": "3:54:16", "remaining_time": "0:30:20", "throughput": 8684.57, "total_tokens": 122073576} +{"current_steps": 181205, "total_steps": 204665, "loss": 0.0, "lr": 7.89933591726618e-08, "epoch": 4.426868296973103, "percentage": 88.54, "elapsed_time": "3:54:16", "remaining_time": "0:30:19", "throughput": 8684.58, "total_tokens": 122076776} +{"current_steps": 181210, "total_steps": 204665, "loss": 0.0, "lr": 7.896014289925312e-08, "epoch": 4.4269904478049495, "percentage": 88.54, "elapsed_time": "3:54:17", "remaining_time": "0:30:19", "throughput": 8684.59, "total_tokens": 122079848} +{"current_steps": 181215, "total_steps": 204665, "loss": 0.0, "lr": 7.892693332389378e-08, "epoch": 4.427112598636796, "percentage": 88.54, "elapsed_time": "3:54:17", "remaining_time": "0:30:19", "throughput": 8684.62, "total_tokens": 122083304} +{"current_steps": 181220, "total_steps": 204665, "loss": 0.0, "lr": 7.889373044682567e-08, "epoch": 4.427234749468644, "percentage": 88.54, "elapsed_time": "3:54:17", "remaining_time": "0:30:18", "throughput": 8684.63, "total_tokens": 122086504} +{"current_steps": 181225, "total_steps": 204665, "loss": 0.0, "lr": 7.886053426828998e-08, "epoch": 4.427356900300491, "percentage": 88.55, "elapsed_time": "3:54:18", "remaining_time": "0:30:18", "throughput": 8684.65, "total_tokens": 122089704} +{"current_steps": 181230, "total_steps": 204665, "loss": 0.0, "lr": 7.882734478852826e-08, "epoch": 4.427479051132338, "percentage": 88.55, "elapsed_time": "3:54:18", "remaining_time": "0:30:17", "throughput": 8684.68, "total_tokens": 122093224} +{"current_steps": 181235, "total_steps": 204665, "loss": 0.0, "lr": 7.8794162007782e-08, "epoch": 4.427601201964185, "percentage": 88.55, "elapsed_time": "3:54:18", "remaining_time": "0:30:17", "throughput": 8684.7, "total_tokens": 122096616} +{"current_steps": 181240, "total_steps": 204665, "loss": 0.0, "lr": 7.876098592629221e-08, "epoch": 4.427723352796033, "percentage": 88.55, "elapsed_time": "3:54:19", "remaining_time": "0:30:17", "throughput": 8684.73, "total_tokens": 122100136} +{"current_steps": 181245, "total_steps": 204665, "loss": 0.0, "lr": 7.872781654430039e-08, "epoch": 4.427845503627879, "percentage": 88.56, "elapsed_time": "3:54:19", "remaining_time": "0:30:16", "throughput": 8684.75, "total_tokens": 122103336} +{"current_steps": 181250, "total_steps": 204665, "loss": 0.0, "lr": 7.869465386204765e-08, "epoch": 4.427967654459727, "percentage": 88.56, "elapsed_time": "3:54:19", "remaining_time": "0:30:16", "throughput": 8684.77, "total_tokens": 122106792} +{"current_steps": 181255, "total_steps": 204665, "loss": 0.0, "lr": 7.866149787977528e-08, "epoch": 4.428089805291574, "percentage": 88.56, "elapsed_time": "3:54:20", "remaining_time": "0:30:15", "throughput": 8684.78, "total_tokens": 122109928} +{"current_steps": 181260, "total_steps": 204665, "loss": 0.0, "lr": 7.862834859772416e-08, "epoch": 4.4282119561234214, "percentage": 88.56, "elapsed_time": "3:54:20", "remaining_time": "0:30:15", "throughput": 8684.8, "total_tokens": 122113128} +{"current_steps": 181265, "total_steps": 204665, "loss": 0.0, "lr": 7.859520601613545e-08, "epoch": 4.428334106955268, "percentage": 88.57, "elapsed_time": "3:54:20", "remaining_time": "0:30:15", "throughput": 8684.82, "total_tokens": 122116392} +{"current_steps": 181270, "total_steps": 204665, "loss": 0.0, "lr": 7.85620701352504e-08, "epoch": 4.428456257787116, "percentage": 88.57, "elapsed_time": "3:54:21", "remaining_time": "0:30:14", "throughput": 8684.85, "total_tokens": 122119848} +{"current_steps": 181275, "total_steps": 204665, "loss": 0.0, "lr": 7.852894095530993e-08, "epoch": 4.428578408618963, "percentage": 88.57, "elapsed_time": "3:54:21", "remaining_time": "0:30:14", "throughput": 8684.87, "total_tokens": 122123240} +{"current_steps": 181280, "total_steps": 204665, "loss": 0.0002, "lr": 7.849581847655462e-08, "epoch": 4.42870055945081, "percentage": 88.57, "elapsed_time": "3:54:21", "remaining_time": "0:30:13", "throughput": 8684.9, "total_tokens": 122126632} +{"current_steps": 181285, "total_steps": 204665, "loss": 0.0, "lr": 7.846270269922572e-08, "epoch": 4.428822710282657, "percentage": 88.58, "elapsed_time": "3:54:22", "remaining_time": "0:30:13", "throughput": 8684.96, "total_tokens": 122130600} +{"current_steps": 181290, "total_steps": 204665, "loss": 0.0, "lr": 7.842959362356394e-08, "epoch": 4.428944861114505, "percentage": 88.58, "elapsed_time": "3:54:22", "remaining_time": "0:30:13", "throughput": 8684.97, "total_tokens": 122133736} +{"current_steps": 181295, "total_steps": 204665, "loss": 0.0, "lr": 7.839649124980985e-08, "epoch": 4.429067011946351, "percentage": 88.58, "elapsed_time": "3:54:22", "remaining_time": "0:30:12", "throughput": 8684.99, "total_tokens": 122136936} +{"current_steps": 181300, "total_steps": 204665, "loss": 0.0, "lr": 7.836339557820427e-08, "epoch": 4.429189162778199, "percentage": 88.58, "elapsed_time": "3:54:23", "remaining_time": "0:30:12", "throughput": 8685.03, "total_tokens": 122140648} +{"current_steps": 181305, "total_steps": 204665, "loss": 0.0, "lr": 7.83303066089882e-08, "epoch": 4.429311313610046, "percentage": 88.59, "elapsed_time": "3:54:23", "remaining_time": "0:30:12", "throughput": 8685.06, "total_tokens": 122143976} +{"current_steps": 181310, "total_steps": 204665, "loss": 0.0, "lr": 7.829722434240193e-08, "epoch": 4.4294334644418925, "percentage": 88.59, "elapsed_time": "3:54:24", "remaining_time": "0:30:11", "throughput": 8685.09, "total_tokens": 122147496} +{"current_steps": 181315, "total_steps": 204665, "loss": 0.0, "lr": 7.826414877868626e-08, "epoch": 4.42955561527374, "percentage": 88.59, "elapsed_time": "3:54:24", "remaining_time": "0:30:11", "throughput": 8685.12, "total_tokens": 122151016} +{"current_steps": 181320, "total_steps": 204665, "loss": 0.0, "lr": 7.823107991808143e-08, "epoch": 4.429677766105587, "percentage": 88.59, "elapsed_time": "3:54:24", "remaining_time": "0:30:10", "throughput": 8685.15, "total_tokens": 122154344} +{"current_steps": 181325, "total_steps": 204665, "loss": 0.0, "lr": 7.819801776082813e-08, "epoch": 4.4297999169374345, "percentage": 88.6, "elapsed_time": "3:54:25", "remaining_time": "0:30:10", "throughput": 8685.18, "total_tokens": 122157864} +{"current_steps": 181330, "total_steps": 204665, "loss": 0.0, "lr": 7.816496230716696e-08, "epoch": 4.429922067769281, "percentage": 88.6, "elapsed_time": "3:54:25", "remaining_time": "0:30:10", "throughput": 8685.21, "total_tokens": 122161448} +{"current_steps": 181335, "total_steps": 204665, "loss": 0.0, "lr": 7.813191355733806e-08, "epoch": 4.430044218601129, "percentage": 88.6, "elapsed_time": "3:54:25", "remaining_time": "0:30:09", "throughput": 8685.27, "total_tokens": 122165288} +{"current_steps": 181340, "total_steps": 204665, "loss": 0.0, "lr": 7.809887151158189e-08, "epoch": 4.430166369432976, "percentage": 88.6, "elapsed_time": "3:54:26", "remaining_time": "0:30:09", "throughput": 8685.27, "total_tokens": 122168360} +{"current_steps": 181345, "total_steps": 204665, "loss": 0.0, "lr": 7.806583617013851e-08, "epoch": 4.430288520264823, "percentage": 88.61, "elapsed_time": "3:54:26", "remaining_time": "0:30:08", "throughput": 8685.28, "total_tokens": 122171368} +{"current_steps": 181350, "total_steps": 204665, "loss": 0.0, "lr": 7.80328075332486e-08, "epoch": 4.43041067109667, "percentage": 88.61, "elapsed_time": "3:54:26", "remaining_time": "0:30:08", "throughput": 8685.32, "total_tokens": 122175144} +{"current_steps": 181355, "total_steps": 204665, "loss": 0.0, "lr": 7.799978560115184e-08, "epoch": 4.430532821928518, "percentage": 88.61, "elapsed_time": "3:54:27", "remaining_time": "0:30:08", "throughput": 8685.34, "total_tokens": 122178344} +{"current_steps": 181360, "total_steps": 204665, "loss": 0.0, "lr": 7.79667703740886e-08, "epoch": 4.430654972760364, "percentage": 88.61, "elapsed_time": "3:54:27", "remaining_time": "0:30:07", "throughput": 8685.36, "total_tokens": 122181608} +{"current_steps": 181365, "total_steps": 204665, "loss": 0.0, "lr": 7.793376185229928e-08, "epoch": 4.430777123592212, "percentage": 88.62, "elapsed_time": "3:54:27", "remaining_time": "0:30:07", "throughput": 8685.38, "total_tokens": 122184872} +{"current_steps": 181370, "total_steps": 204665, "loss": 0.0, "lr": 7.790076003602342e-08, "epoch": 4.430899274424059, "percentage": 88.62, "elapsed_time": "3:54:28", "remaining_time": "0:30:06", "throughput": 8685.4, "total_tokens": 122188136} +{"current_steps": 181375, "total_steps": 204665, "loss": 0.0, "lr": 7.78677649255014e-08, "epoch": 4.431021425255906, "percentage": 88.62, "elapsed_time": "3:54:28", "remaining_time": "0:30:06", "throughput": 8685.41, "total_tokens": 122191272} +{"current_steps": 181380, "total_steps": 204665, "loss": 0.0, "lr": 7.78347765209728e-08, "epoch": 4.431143576087753, "percentage": 88.62, "elapsed_time": "3:54:28", "remaining_time": "0:30:06", "throughput": 8685.42, "total_tokens": 122194408} +{"current_steps": 181385, "total_steps": 204665, "loss": 0.0, "lr": 7.780179482267802e-08, "epoch": 4.4312657269196, "percentage": 88.63, "elapsed_time": "3:54:29", "remaining_time": "0:30:05", "throughput": 8685.47, "total_tokens": 122198120} +{"current_steps": 181390, "total_steps": 204665, "loss": 0.0, "lr": 7.77688198308566e-08, "epoch": 4.4313878777514475, "percentage": 88.63, "elapsed_time": "3:54:29", "remaining_time": "0:30:05", "throughput": 8685.49, "total_tokens": 122201512} +{"current_steps": 181395, "total_steps": 204665, "loss": 0.0, "lr": 7.773585154574814e-08, "epoch": 4.431510028583294, "percentage": 88.63, "elapsed_time": "3:54:29", "remaining_time": "0:30:04", "throughput": 8685.52, "total_tokens": 122204904} +{"current_steps": 181400, "total_steps": 204665, "loss": 0.0, "lr": 7.770288996759289e-08, "epoch": 4.431632179415142, "percentage": 88.63, "elapsed_time": "3:54:30", "remaining_time": "0:30:04", "throughput": 8685.56, "total_tokens": 122208488} +{"current_steps": 181405, "total_steps": 204665, "loss": 0.0, "lr": 7.766993509663e-08, "epoch": 4.431754330246989, "percentage": 88.64, "elapsed_time": "3:54:30", "remaining_time": "0:30:04", "throughput": 8685.56, "total_tokens": 122211496} +{"current_steps": 181410, "total_steps": 204665, "loss": 0.0, "lr": 7.763698693309972e-08, "epoch": 4.431876481078836, "percentage": 88.64, "elapsed_time": "3:54:30", "remaining_time": "0:30:03", "throughput": 8685.58, "total_tokens": 122214696} +{"current_steps": 181415, "total_steps": 204665, "loss": 0.0, "lr": 7.760404547724109e-08, "epoch": 4.431998631910683, "percentage": 88.64, "elapsed_time": "3:54:31", "remaining_time": "0:30:03", "throughput": 8685.6, "total_tokens": 122218024} +{"current_steps": 181420, "total_steps": 204665, "loss": 0.0, "lr": 7.7571110729294e-08, "epoch": 4.432120782742531, "percentage": 88.64, "elapsed_time": "3:54:31", "remaining_time": "0:30:02", "throughput": 8685.65, "total_tokens": 122221864} +{"current_steps": 181425, "total_steps": 204665, "loss": 0.0, "lr": 7.753818268949808e-08, "epoch": 4.432242933574377, "percentage": 88.64, "elapsed_time": "3:54:32", "remaining_time": "0:30:02", "throughput": 8685.67, "total_tokens": 122225128} +{"current_steps": 181430, "total_steps": 204665, "loss": 0.0, "lr": 7.750526135809232e-08, "epoch": 4.432365084406225, "percentage": 88.65, "elapsed_time": "3:54:32", "remaining_time": "0:30:02", "throughput": 8685.7, "total_tokens": 122228648} +{"current_steps": 181435, "total_steps": 204665, "loss": 0.0, "lr": 7.747234673531667e-08, "epoch": 4.432487235238072, "percentage": 88.65, "elapsed_time": "3:54:32", "remaining_time": "0:30:01", "throughput": 8685.73, "total_tokens": 122232040} +{"current_steps": 181440, "total_steps": 204665, "loss": 0.0, "lr": 7.743943882141013e-08, "epoch": 4.432609386069919, "percentage": 88.65, "elapsed_time": "3:54:33", "remaining_time": "0:30:01", "throughput": 8685.76, "total_tokens": 122235496} +{"current_steps": 181445, "total_steps": 204665, "loss": 0.0, "lr": 7.740653761661219e-08, "epoch": 4.432731536901766, "percentage": 88.65, "elapsed_time": "3:54:33", "remaining_time": "0:30:01", "throughput": 8685.77, "total_tokens": 122238696} +{"current_steps": 181450, "total_steps": 204665, "loss": 0.0, "lr": 7.737364312116202e-08, "epoch": 4.432853687733614, "percentage": 88.66, "elapsed_time": "3:54:33", "remaining_time": "0:30:00", "throughput": 8685.81, "total_tokens": 122242280} +{"current_steps": 181455, "total_steps": 204665, "loss": 0.0, "lr": 7.734075533529871e-08, "epoch": 4.4329758385654605, "percentage": 88.66, "elapsed_time": "3:54:34", "remaining_time": "0:30:00", "throughput": 8685.82, "total_tokens": 122245480} +{"current_steps": 181460, "total_steps": 204665, "loss": 0.0, "lr": 7.730787425926188e-08, "epoch": 4.433097989397308, "percentage": 88.66, "elapsed_time": "3:54:34", "remaining_time": "0:29:59", "throughput": 8685.87, "total_tokens": 122249320} +{"current_steps": 181465, "total_steps": 204665, "loss": 0.0, "lr": 7.727499989329023e-08, "epoch": 4.433220140229155, "percentage": 88.66, "elapsed_time": "3:54:34", "remaining_time": "0:29:59", "throughput": 8685.9, "total_tokens": 122252776} +{"current_steps": 181470, "total_steps": 204665, "loss": 0.0286, "lr": 7.7242132237623e-08, "epoch": 4.4333422910610025, "percentage": 88.67, "elapsed_time": "3:54:35", "remaining_time": "0:29:59", "throughput": 8685.95, "total_tokens": 122256552} +{"current_steps": 181475, "total_steps": 204665, "loss": 0.0, "lr": 7.72092712924991e-08, "epoch": 4.433464441892849, "percentage": 88.67, "elapsed_time": "3:54:35", "remaining_time": "0:29:58", "throughput": 8685.97, "total_tokens": 122259880} +{"current_steps": 181480, "total_steps": 204665, "loss": 0.0, "lr": 7.71764170581577e-08, "epoch": 4.433586592724696, "percentage": 88.67, "elapsed_time": "3:54:35", "remaining_time": "0:29:58", "throughput": 8686.0, "total_tokens": 122263208} +{"current_steps": 181485, "total_steps": 204665, "loss": 0.0, "lr": 7.714356953483747e-08, "epoch": 4.433708743556544, "percentage": 88.67, "elapsed_time": "3:54:36", "remaining_time": "0:29:57", "throughput": 8686.02, "total_tokens": 122266472} +{"current_steps": 181490, "total_steps": 204665, "loss": 0.0, "lr": 7.711072872277757e-08, "epoch": 4.43383089438839, "percentage": 88.68, "elapsed_time": "3:54:36", "remaining_time": "0:29:57", "throughput": 8686.04, "total_tokens": 122269800} +{"current_steps": 181495, "total_steps": 204665, "loss": 0.0, "lr": 7.70778946222167e-08, "epoch": 4.433953045220238, "percentage": 88.68, "elapsed_time": "3:54:36", "remaining_time": "0:29:57", "throughput": 8686.06, "total_tokens": 122273128} +{"current_steps": 181500, "total_steps": 204665, "loss": 0.0, "lr": 7.704506723339343e-08, "epoch": 4.434075196052085, "percentage": 88.68, "elapsed_time": "3:54:37", "remaining_time": "0:29:56", "throughput": 8686.07, "total_tokens": 122276200} +{"current_steps": 181505, "total_steps": 204665, "loss": 0.0, "lr": 7.701224655654682e-08, "epoch": 4.434197346883932, "percentage": 88.68, "elapsed_time": "3:54:37", "remaining_time": "0:29:56", "throughput": 8686.08, "total_tokens": 122279400} +{"current_steps": 181510, "total_steps": 204665, "loss": 0.0, "lr": 7.69794325919153e-08, "epoch": 4.434319497715779, "percentage": 88.69, "elapsed_time": "3:54:37", "remaining_time": "0:29:55", "throughput": 8686.11, "total_tokens": 122282856} +{"current_steps": 181515, "total_steps": 204665, "loss": 0.0, "lr": 7.694662533973762e-08, "epoch": 4.434441648547627, "percentage": 88.69, "elapsed_time": "3:54:38", "remaining_time": "0:29:55", "throughput": 8686.16, "total_tokens": 122286568} +{"current_steps": 181520, "total_steps": 204665, "loss": 0.0, "lr": 7.691382480025244e-08, "epoch": 4.4345637993794735, "percentage": 88.69, "elapsed_time": "3:54:38", "remaining_time": "0:29:55", "throughput": 8686.17, "total_tokens": 122289704} +{"current_steps": 181525, "total_steps": 204665, "loss": 0.0002, "lr": 7.688103097369803e-08, "epoch": 4.434685950211321, "percentage": 88.69, "elapsed_time": "3:54:39", "remaining_time": "0:29:54", "throughput": 8686.18, "total_tokens": 122292904} +{"current_steps": 181530, "total_steps": 204665, "loss": 0.0, "lr": 7.68482438603133e-08, "epoch": 4.434808101043168, "percentage": 88.7, "elapsed_time": "3:54:39", "remaining_time": "0:29:54", "throughput": 8686.2, "total_tokens": 122296232} +{"current_steps": 181535, "total_steps": 204665, "loss": 0.0, "lr": 7.681546346033618e-08, "epoch": 4.4349302518750155, "percentage": 88.7, "elapsed_time": "3:54:39", "remaining_time": "0:29:53", "throughput": 8686.23, "total_tokens": 122299688} +{"current_steps": 181540, "total_steps": 204665, "loss": 0.0, "lr": 7.67826897740056e-08, "epoch": 4.435052402706862, "percentage": 88.7, "elapsed_time": "3:54:40", "remaining_time": "0:29:53", "throughput": 8686.24, "total_tokens": 122302696} +{"current_steps": 181545, "total_steps": 204665, "loss": 0.0, "lr": 7.674992280155934e-08, "epoch": 4.43517455353871, "percentage": 88.7, "elapsed_time": "3:54:40", "remaining_time": "0:29:53", "throughput": 8686.24, "total_tokens": 122305768} +{"current_steps": 181550, "total_steps": 204665, "loss": 0.0, "lr": 7.671716254323601e-08, "epoch": 4.435296704370557, "percentage": 88.71, "elapsed_time": "3:54:40", "remaining_time": "0:29:52", "throughput": 8686.26, "total_tokens": 122308904} +{"current_steps": 181555, "total_steps": 204665, "loss": 0.0, "lr": 7.668440899927398e-08, "epoch": 4.435418855202404, "percentage": 88.71, "elapsed_time": "3:54:41", "remaining_time": "0:29:52", "throughput": 8686.27, "total_tokens": 122312168} +{"current_steps": 181560, "total_steps": 204665, "loss": 0.0, "lr": 7.665166216991115e-08, "epoch": 4.435541006034251, "percentage": 88.71, "elapsed_time": "3:54:41", "remaining_time": "0:29:51", "throughput": 8686.32, "total_tokens": 122315816} +{"current_steps": 181565, "total_steps": 204665, "loss": 0.0, "lr": 7.66189220553859e-08, "epoch": 4.435663156866099, "percentage": 88.71, "elapsed_time": "3:54:41", "remaining_time": "0:29:51", "throughput": 8686.32, "total_tokens": 122318888} +{"current_steps": 181570, "total_steps": 204665, "loss": 0.0, "lr": 7.658618865593603e-08, "epoch": 4.435785307697945, "percentage": 88.72, "elapsed_time": "3:54:42", "remaining_time": "0:29:51", "throughput": 8686.34, "total_tokens": 122322088} +{"current_steps": 181575, "total_steps": 204665, "loss": 0.0, "lr": 7.655346197179979e-08, "epoch": 4.435907458529792, "percentage": 88.72, "elapsed_time": "3:54:42", "remaining_time": "0:29:50", "throughput": 8686.37, "total_tokens": 122325736} +{"current_steps": 181580, "total_steps": 204665, "loss": 0.0, "lr": 7.652074200321524e-08, "epoch": 4.43602960936164, "percentage": 88.72, "elapsed_time": "3:54:42", "remaining_time": "0:29:50", "throughput": 8686.39, "total_tokens": 122328936} +{"current_steps": 181585, "total_steps": 204665, "loss": 0.0, "lr": 7.648802875042038e-08, "epoch": 4.436151760193487, "percentage": 88.72, "elapsed_time": "3:54:43", "remaining_time": "0:29:50", "throughput": 8686.42, "total_tokens": 122332328} +{"current_steps": 181590, "total_steps": 204665, "loss": 0.0003, "lr": 7.64553222136527e-08, "epoch": 4.436273911025334, "percentage": 88.73, "elapsed_time": "3:54:43", "remaining_time": "0:29:49", "throughput": 8686.42, "total_tokens": 122335272} +{"current_steps": 181595, "total_steps": 204665, "loss": 0.0, "lr": 7.642262239315055e-08, "epoch": 4.436396061857181, "percentage": 88.73, "elapsed_time": "3:54:43", "remaining_time": "0:29:49", "throughput": 8686.45, "total_tokens": 122338792} +{"current_steps": 181600, "total_steps": 204665, "loss": 0.0, "lr": 7.638992928915144e-08, "epoch": 4.436518212689029, "percentage": 88.73, "elapsed_time": "3:54:44", "remaining_time": "0:29:48", "throughput": 8686.46, "total_tokens": 122341928} +{"current_steps": 181605, "total_steps": 204665, "loss": 0.0, "lr": 7.635724290189305e-08, "epoch": 4.436640363520875, "percentage": 88.73, "elapsed_time": "3:54:44", "remaining_time": "0:29:48", "throughput": 8686.48, "total_tokens": 122345320} +{"current_steps": 181610, "total_steps": 204665, "loss": 0.0, "lr": 7.632456323161319e-08, "epoch": 4.436762514352723, "percentage": 88.74, "elapsed_time": "3:54:44", "remaining_time": "0:29:48", "throughput": 8686.55, "total_tokens": 122349416} +{"current_steps": 181615, "total_steps": 204665, "loss": 0.0, "lr": 7.629189027854977e-08, "epoch": 4.43688466518457, "percentage": 88.74, "elapsed_time": "3:54:45", "remaining_time": "0:29:47", "throughput": 8686.57, "total_tokens": 122352616} +{"current_steps": 181620, "total_steps": 204665, "loss": 0.0, "lr": 7.625922404293994e-08, "epoch": 4.437006816016417, "percentage": 88.74, "elapsed_time": "3:54:45", "remaining_time": "0:29:47", "throughput": 8686.61, "total_tokens": 122356264} +{"current_steps": 181625, "total_steps": 204665, "loss": 0.0663, "lr": 7.622656452502174e-08, "epoch": 4.437128966848264, "percentage": 88.74, "elapsed_time": "3:54:45", "remaining_time": "0:29:46", "throughput": 8686.63, "total_tokens": 122359656} +{"current_steps": 181630, "total_steps": 204665, "loss": 0.0, "lr": 7.61939117250322e-08, "epoch": 4.437251117680112, "percentage": 88.75, "elapsed_time": "3:54:46", "remaining_time": "0:29:46", "throughput": 8686.64, "total_tokens": 122362856} +{"current_steps": 181635, "total_steps": 204665, "loss": 0.0, "lr": 7.616126564320901e-08, "epoch": 4.4373732685119585, "percentage": 88.75, "elapsed_time": "3:54:46", "remaining_time": "0:29:46", "throughput": 8686.66, "total_tokens": 122366120} +{"current_steps": 181640, "total_steps": 204665, "loss": 0.0, "lr": 7.612862627978978e-08, "epoch": 4.437495419343806, "percentage": 88.75, "elapsed_time": "3:54:47", "remaining_time": "0:29:45", "throughput": 8686.73, "total_tokens": 122370216} +{"current_steps": 181645, "total_steps": 204665, "loss": 0.0, "lr": 7.60959936350114e-08, "epoch": 4.437617570175653, "percentage": 88.75, "elapsed_time": "3:54:47", "remaining_time": "0:29:45", "throughput": 8686.76, "total_tokens": 122373672} +{"current_steps": 181650, "total_steps": 204665, "loss": 0.0, "lr": 7.60633677091117e-08, "epoch": 4.4377397210075005, "percentage": 88.75, "elapsed_time": "3:54:47", "remaining_time": "0:29:44", "throughput": 8686.82, "total_tokens": 122377704} +{"current_steps": 181655, "total_steps": 204665, "loss": 0.0, "lr": 7.60307485023276e-08, "epoch": 4.437861871839347, "percentage": 88.76, "elapsed_time": "3:54:48", "remaining_time": "0:29:44", "throughput": 8686.83, "total_tokens": 122380904} +{"current_steps": 181660, "total_steps": 204665, "loss": 0.0, "lr": 7.599813601489646e-08, "epoch": 4.437984022671195, "percentage": 88.76, "elapsed_time": "3:54:48", "remaining_time": "0:29:44", "throughput": 8686.84, "total_tokens": 122384040} +{"current_steps": 181665, "total_steps": 204665, "loss": 0.0, "lr": 7.596553024705533e-08, "epoch": 4.438106173503042, "percentage": 88.76, "elapsed_time": "3:54:48", "remaining_time": "0:29:43", "throughput": 8686.86, "total_tokens": 122387176} +{"current_steps": 181670, "total_steps": 204665, "loss": 0.0, "lr": 7.593293119904132e-08, "epoch": 4.438228324334888, "percentage": 88.76, "elapsed_time": "3:54:49", "remaining_time": "0:29:43", "throughput": 8686.88, "total_tokens": 122390632} +{"current_steps": 181675, "total_steps": 204665, "loss": 0.0, "lr": 7.590033887109181e-08, "epoch": 4.438350475166736, "percentage": 88.77, "elapsed_time": "3:54:49", "remaining_time": "0:29:42", "throughput": 8686.9, "total_tokens": 122393960} +{"current_steps": 181680, "total_steps": 204665, "loss": 0.0, "lr": 7.586775326344341e-08, "epoch": 4.438472625998583, "percentage": 88.77, "elapsed_time": "3:54:49", "remaining_time": "0:29:42", "throughput": 8686.89, "total_tokens": 122396648} +{"current_steps": 181685, "total_steps": 204665, "loss": 0.0, "lr": 7.583517437633335e-08, "epoch": 4.43859477683043, "percentage": 88.77, "elapsed_time": "3:54:50", "remaining_time": "0:29:42", "throughput": 8686.92, "total_tokens": 122400104} +{"current_steps": 181690, "total_steps": 204665, "loss": 0.0, "lr": 7.580260220999845e-08, "epoch": 4.438716927662277, "percentage": 88.77, "elapsed_time": "3:54:50", "remaining_time": "0:29:41", "throughput": 8686.95, "total_tokens": 122403688} +{"current_steps": 181695, "total_steps": 204665, "loss": 0.0, "lr": 7.577003676467564e-08, "epoch": 4.438839078494125, "percentage": 88.78, "elapsed_time": "3:54:50", "remaining_time": "0:29:41", "throughput": 8686.97, "total_tokens": 122406888} +{"current_steps": 181700, "total_steps": 204665, "loss": 0.0, "lr": 7.573747804060182e-08, "epoch": 4.4389612293259715, "percentage": 88.78, "elapsed_time": "3:54:51", "remaining_time": "0:29:40", "throughput": 8686.97, "total_tokens": 122409832} +{"current_steps": 181705, "total_steps": 204665, "loss": 0.0, "lr": 7.570492603801337e-08, "epoch": 4.439083380157819, "percentage": 88.78, "elapsed_time": "3:54:51", "remaining_time": "0:29:40", "throughput": 8686.99, "total_tokens": 122413224} +{"current_steps": 181710, "total_steps": 204665, "loss": 0.0, "lr": 7.567238075714755e-08, "epoch": 4.439205530989666, "percentage": 88.78, "elapsed_time": "3:54:51", "remaining_time": "0:29:40", "throughput": 8687.01, "total_tokens": 122416552} +{"current_steps": 181715, "total_steps": 204665, "loss": 0.0, "lr": 7.56398421982406e-08, "epoch": 4.4393276818215135, "percentage": 88.79, "elapsed_time": "3:54:52", "remaining_time": "0:29:39", "throughput": 8687.03, "total_tokens": 122419752} +{"current_steps": 181720, "total_steps": 204665, "loss": 0.0, "lr": 7.560731036152957e-08, "epoch": 4.43944983265336, "percentage": 88.79, "elapsed_time": "3:54:52", "remaining_time": "0:29:39", "throughput": 8687.04, "total_tokens": 122422824} +{"current_steps": 181725, "total_steps": 204665, "loss": 0.0, "lr": 7.557478524725059e-08, "epoch": 4.439571983485208, "percentage": 88.79, "elapsed_time": "3:54:52", "remaining_time": "0:29:39", "throughput": 8687.05, "total_tokens": 122426024} +{"current_steps": 181730, "total_steps": 204665, "loss": 0.0, "lr": 7.554226685564047e-08, "epoch": 4.439694134317055, "percentage": 88.79, "elapsed_time": "3:54:53", "remaining_time": "0:29:38", "throughput": 8687.06, "total_tokens": 122429160} +{"current_steps": 181735, "total_steps": 204665, "loss": 0.0384, "lr": 7.55097551869357e-08, "epoch": 4.439816285148902, "percentage": 88.8, "elapsed_time": "3:54:53", "remaining_time": "0:29:38", "throughput": 8687.08, "total_tokens": 122432488} +{"current_steps": 181740, "total_steps": 204665, "loss": 0.0, "lr": 7.547725024137252e-08, "epoch": 4.439938435980749, "percentage": 88.8, "elapsed_time": "3:54:53", "remaining_time": "0:29:37", "throughput": 8687.09, "total_tokens": 122435624} +{"current_steps": 181745, "total_steps": 204665, "loss": 0.0, "lr": 7.544475201918765e-08, "epoch": 4.440060586812596, "percentage": 88.8, "elapsed_time": "3:54:54", "remaining_time": "0:29:37", "throughput": 8687.09, "total_tokens": 122438504} +{"current_steps": 181750, "total_steps": 204665, "loss": 0.0, "lr": 7.5412260520617e-08, "epoch": 4.440182737644443, "percentage": 88.8, "elapsed_time": "3:54:54", "remaining_time": "0:29:37", "throughput": 8687.1, "total_tokens": 122441640} +{"current_steps": 181755, "total_steps": 204665, "loss": 0.0, "lr": 7.537977574589726e-08, "epoch": 4.44030488847629, "percentage": 88.81, "elapsed_time": "3:54:54", "remaining_time": "0:29:36", "throughput": 8687.11, "total_tokens": 122444776} +{"current_steps": 181760, "total_steps": 204665, "loss": 0.0, "lr": 7.534729769526437e-08, "epoch": 4.440427039308138, "percentage": 88.81, "elapsed_time": "3:54:55", "remaining_time": "0:29:36", "throughput": 8687.13, "total_tokens": 122448040} +{"current_steps": 181765, "total_steps": 204665, "loss": 0.0, "lr": 7.531482636895458e-08, "epoch": 4.4405491901399845, "percentage": 88.81, "elapsed_time": "3:54:55", "remaining_time": "0:29:35", "throughput": 8687.16, "total_tokens": 122451496} +{"current_steps": 181770, "total_steps": 204665, "loss": 0.0, "lr": 7.528236176720426e-08, "epoch": 4.440671340971832, "percentage": 88.81, "elapsed_time": "3:54:56", "remaining_time": "0:29:35", "throughput": 8687.17, "total_tokens": 122454696} +{"current_steps": 181775, "total_steps": 204665, "loss": 0.0, "lr": 7.52499038902491e-08, "epoch": 4.440793491803679, "percentage": 88.82, "elapsed_time": "3:54:56", "remaining_time": "0:29:35", "throughput": 8687.2, "total_tokens": 122458088} +{"current_steps": 181780, "total_steps": 204665, "loss": 0.0, "lr": 7.521745273832558e-08, "epoch": 4.4409156426355265, "percentage": 88.82, "elapsed_time": "3:54:56", "remaining_time": "0:29:34", "throughput": 8687.2, "total_tokens": 122461096} +{"current_steps": 181785, "total_steps": 204665, "loss": 0.0, "lr": 7.518500831166929e-08, "epoch": 4.441037793467373, "percentage": 88.82, "elapsed_time": "3:54:57", "remaining_time": "0:29:34", "throughput": 8687.21, "total_tokens": 122464168} +{"current_steps": 181790, "total_steps": 204665, "loss": 0.0, "lr": 7.515257061051661e-08, "epoch": 4.441159944299221, "percentage": 88.82, "elapsed_time": "3:54:57", "remaining_time": "0:29:33", "throughput": 8687.22, "total_tokens": 122467368} +{"current_steps": 181795, "total_steps": 204665, "loss": 0.0, "lr": 7.5120139635103e-08, "epoch": 4.441282095131068, "percentage": 88.83, "elapsed_time": "3:54:57", "remaining_time": "0:29:33", "throughput": 8687.24, "total_tokens": 122470568} +{"current_steps": 181800, "total_steps": 204665, "loss": 0.0, "lr": 7.508771538566461e-08, "epoch": 4.441404245962915, "percentage": 88.83, "elapsed_time": "3:54:58", "remaining_time": "0:29:33", "throughput": 8687.25, "total_tokens": 122473768} +{"current_steps": 181805, "total_steps": 204665, "loss": 0.0, "lr": 7.505529786243714e-08, "epoch": 4.441526396794762, "percentage": 88.83, "elapsed_time": "3:54:58", "remaining_time": "0:29:32", "throughput": 8687.26, "total_tokens": 122476776} +{"current_steps": 181810, "total_steps": 204665, "loss": 0.0, "lr": 7.502288706565618e-08, "epoch": 4.44164854762661, "percentage": 88.83, "elapsed_time": "3:54:58", "remaining_time": "0:29:32", "throughput": 8687.29, "total_tokens": 122480296} +{"current_steps": 181815, "total_steps": 204665, "loss": 0.0, "lr": 7.499048299555777e-08, "epoch": 4.441770698458456, "percentage": 88.84, "elapsed_time": "3:54:59", "remaining_time": "0:29:31", "throughput": 8687.3, "total_tokens": 122483432} +{"current_steps": 181820, "total_steps": 204665, "loss": 0.0308, "lr": 7.495808565237716e-08, "epoch": 4.441892849290304, "percentage": 88.84, "elapsed_time": "3:54:59", "remaining_time": "0:29:31", "throughput": 8687.33, "total_tokens": 122486888} +{"current_steps": 181825, "total_steps": 204665, "loss": 0.0, "lr": 7.492569503635015e-08, "epoch": 4.442015000122151, "percentage": 88.84, "elapsed_time": "3:54:59", "remaining_time": "0:29:31", "throughput": 8687.34, "total_tokens": 122490088} +{"current_steps": 181830, "total_steps": 204665, "loss": 0.0, "lr": 7.489331114771247e-08, "epoch": 4.442137150953998, "percentage": 88.84, "elapsed_time": "3:55:00", "remaining_time": "0:29:30", "throughput": 8687.37, "total_tokens": 122493416} +{"current_steps": 181835, "total_steps": 204665, "loss": 0.0, "lr": 7.486093398669934e-08, "epoch": 4.442259301785845, "percentage": 88.85, "elapsed_time": "3:55:00", "remaining_time": "0:29:30", "throughput": 8687.44, "total_tokens": 122497576} +{"current_steps": 181840, "total_steps": 204665, "loss": 0.0, "lr": 7.482856355354638e-08, "epoch": 4.442381452617692, "percentage": 88.85, "elapsed_time": "3:55:00", "remaining_time": "0:29:29", "throughput": 8687.49, "total_tokens": 122501480} +{"current_steps": 181845, "total_steps": 204665, "loss": 0.0, "lr": 7.479619984848884e-08, "epoch": 4.4425036034495395, "percentage": 88.85, "elapsed_time": "3:55:01", "remaining_time": "0:29:29", "throughput": 8687.51, "total_tokens": 122504744} +{"current_steps": 181850, "total_steps": 204665, "loss": 0.0, "lr": 7.476384287176241e-08, "epoch": 4.442625754281386, "percentage": 88.85, "elapsed_time": "3:55:01", "remaining_time": "0:29:29", "throughput": 8687.52, "total_tokens": 122507944} +{"current_steps": 181855, "total_steps": 204665, "loss": 0.0, "lr": 7.473149262360201e-08, "epoch": 4.442747905113234, "percentage": 88.85, "elapsed_time": "3:55:01", "remaining_time": "0:29:28", "throughput": 8687.54, "total_tokens": 122511080} +{"current_steps": 181860, "total_steps": 204665, "loss": 0.0, "lr": 7.469914910424291e-08, "epoch": 4.442870055945081, "percentage": 88.86, "elapsed_time": "3:55:02", "remaining_time": "0:29:28", "throughput": 8687.57, "total_tokens": 122514600} +{"current_steps": 181865, "total_steps": 204665, "loss": 0.0, "lr": 7.46668123139208e-08, "epoch": 4.442992206776928, "percentage": 88.86, "elapsed_time": "3:55:02", "remaining_time": "0:29:28", "throughput": 8687.59, "total_tokens": 122517992} +{"current_steps": 181870, "total_steps": 204665, "loss": 0.0, "lr": 7.463448225287028e-08, "epoch": 4.443114357608775, "percentage": 88.86, "elapsed_time": "3:55:02", "remaining_time": "0:29:27", "throughput": 8687.59, "total_tokens": 122520872} +{"current_steps": 181875, "total_steps": 204665, "loss": 0.0, "lr": 7.460215892132693e-08, "epoch": 4.443236508440623, "percentage": 88.86, "elapsed_time": "3:55:03", "remaining_time": "0:29:27", "throughput": 8687.6, "total_tokens": 122524008} +{"current_steps": 181880, "total_steps": 204665, "loss": 0.0, "lr": 7.456984231952535e-08, "epoch": 4.443358659272469, "percentage": 88.87, "elapsed_time": "3:55:03", "remaining_time": "0:29:26", "throughput": 8687.61, "total_tokens": 122527144} +{"current_steps": 181885, "total_steps": 204665, "loss": 0.0, "lr": 7.453753244770078e-08, "epoch": 4.443480810104317, "percentage": 88.87, "elapsed_time": "3:55:04", "remaining_time": "0:29:26", "throughput": 8687.65, "total_tokens": 122530856} +{"current_steps": 181890, "total_steps": 204665, "loss": 0.0, "lr": 7.450522930608838e-08, "epoch": 4.443602960936164, "percentage": 88.87, "elapsed_time": "3:55:04", "remaining_time": "0:29:26", "throughput": 8687.68, "total_tokens": 122534248} +{"current_steps": 181895, "total_steps": 204665, "loss": 0.0, "lr": 7.447293289492285e-08, "epoch": 4.443725111768011, "percentage": 88.87, "elapsed_time": "3:55:04", "remaining_time": "0:29:25", "throughput": 8687.68, "total_tokens": 122537192} +{"current_steps": 181900, "total_steps": 204665, "loss": 0.0, "lr": 7.444064321443899e-08, "epoch": 4.443847262599858, "percentage": 88.88, "elapsed_time": "3:55:05", "remaining_time": "0:29:25", "throughput": 8687.68, "total_tokens": 122540200} +{"current_steps": 181905, "total_steps": 204665, "loss": 0.0, "lr": 7.440836026487184e-08, "epoch": 4.443969413431706, "percentage": 88.88, "elapsed_time": "3:55:05", "remaining_time": "0:29:24", "throughput": 8687.71, "total_tokens": 122543656} +{"current_steps": 181910, "total_steps": 204665, "loss": 0.0, "lr": 7.43760840464559e-08, "epoch": 4.444091564263553, "percentage": 88.88, "elapsed_time": "3:55:05", "remaining_time": "0:29:24", "throughput": 8687.7, "total_tokens": 122546472} +{"current_steps": 181915, "total_steps": 204665, "loss": 0.0, "lr": 7.434381455942617e-08, "epoch": 4.4442137150954, "percentage": 88.88, "elapsed_time": "3:55:06", "remaining_time": "0:29:24", "throughput": 8687.75, "total_tokens": 122550184} +{"current_steps": 181920, "total_steps": 204665, "loss": 0.0, "lr": 7.431155180401705e-08, "epoch": 4.444335865927247, "percentage": 88.89, "elapsed_time": "3:55:06", "remaining_time": "0:29:23", "throughput": 8687.76, "total_tokens": 122553320} +{"current_steps": 181925, "total_steps": 204665, "loss": 0.0, "lr": 7.427929578046354e-08, "epoch": 4.444458016759095, "percentage": 88.89, "elapsed_time": "3:55:06", "remaining_time": "0:29:23", "throughput": 8687.78, "total_tokens": 122556584} +{"current_steps": 181930, "total_steps": 204665, "loss": 0.0, "lr": 7.424704648899972e-08, "epoch": 4.444580167590941, "percentage": 88.89, "elapsed_time": "3:55:07", "remaining_time": "0:29:22", "throughput": 8687.78, "total_tokens": 122559592} +{"current_steps": 181935, "total_steps": 204665, "loss": 0.0, "lr": 7.421480392986057e-08, "epoch": 4.444702318422788, "percentage": 88.89, "elapsed_time": "3:55:07", "remaining_time": "0:29:22", "throughput": 8687.79, "total_tokens": 122562664} +{"current_steps": 181940, "total_steps": 204665, "loss": 0.0, "lr": 7.418256810328016e-08, "epoch": 4.444824469254636, "percentage": 88.9, "elapsed_time": "3:55:07", "remaining_time": "0:29:22", "throughput": 8687.79, "total_tokens": 122565672} +{"current_steps": 181945, "total_steps": 204665, "loss": 0.0, "lr": 7.415033900949319e-08, "epoch": 4.4449466200864824, "percentage": 88.9, "elapsed_time": "3:55:08", "remaining_time": "0:29:21", "throughput": 8687.87, "total_tokens": 122569896} +{"current_steps": 181950, "total_steps": 204665, "loss": 0.0, "lr": 7.411811664873413e-08, "epoch": 4.44506877091833, "percentage": 88.9, "elapsed_time": "3:55:08", "remaining_time": "0:29:21", "throughput": 8687.9, "total_tokens": 122573480} +{"current_steps": 181955, "total_steps": 204665, "loss": 0.0, "lr": 7.408590102123701e-08, "epoch": 4.445190921750177, "percentage": 88.9, "elapsed_time": "3:55:08", "remaining_time": "0:29:20", "throughput": 8687.91, "total_tokens": 122576680} +{"current_steps": 181960, "total_steps": 204665, "loss": 0.0, "lr": 7.405369212723645e-08, "epoch": 4.4453130725820245, "percentage": 88.91, "elapsed_time": "3:55:09", "remaining_time": "0:29:20", "throughput": 8687.93, "total_tokens": 122579944} +{"current_steps": 181965, "total_steps": 204665, "loss": 0.0, "lr": 7.402148996696622e-08, "epoch": 4.445435223413871, "percentage": 88.91, "elapsed_time": "3:55:09", "remaining_time": "0:29:20", "throughput": 8687.95, "total_tokens": 122583208} +{"current_steps": 181970, "total_steps": 204665, "loss": 0.0, "lr": 7.398929454066105e-08, "epoch": 4.445557374245719, "percentage": 88.91, "elapsed_time": "3:55:09", "remaining_time": "0:29:19", "throughput": 8687.96, "total_tokens": 122586472} +{"current_steps": 181975, "total_steps": 204665, "loss": 0.0631, "lr": 7.395710584855452e-08, "epoch": 4.445679525077566, "percentage": 88.91, "elapsed_time": "3:55:10", "remaining_time": "0:29:19", "throughput": 8688.01, "total_tokens": 122590312} +{"current_steps": 181980, "total_steps": 204665, "loss": 0.0, "lr": 7.392492389088112e-08, "epoch": 4.445801675909413, "percentage": 88.92, "elapsed_time": "3:55:10", "remaining_time": "0:29:18", "throughput": 8688.02, "total_tokens": 122593448} +{"current_steps": 181985, "total_steps": 204665, "loss": 0.0, "lr": 7.389274866787488e-08, "epoch": 4.44592382674126, "percentage": 88.92, "elapsed_time": "3:55:10", "remaining_time": "0:29:18", "throughput": 8688.08, "total_tokens": 122597352} +{"current_steps": 181990, "total_steps": 204665, "loss": 0.0002, "lr": 7.386058017976938e-08, "epoch": 4.446045977573108, "percentage": 88.92, "elapsed_time": "3:55:11", "remaining_time": "0:29:18", "throughput": 8688.12, "total_tokens": 122600936} +{"current_steps": 181995, "total_steps": 204665, "loss": 0.0, "lr": 7.38284184267991e-08, "epoch": 4.446168128404954, "percentage": 88.92, "elapsed_time": "3:55:11", "remaining_time": "0:29:17", "throughput": 8688.15, "total_tokens": 122604520} +{"current_steps": 182000, "total_steps": 204665, "loss": 0.0, "lr": 7.379626340919754e-08, "epoch": 4.446290279236802, "percentage": 88.93, "elapsed_time": "3:55:12", "remaining_time": "0:29:17", "throughput": 8688.17, "total_tokens": 122607784} +{"current_steps": 182005, "total_steps": 204665, "loss": 0.0, "lr": 7.376411512719882e-08, "epoch": 4.446412430068649, "percentage": 88.93, "elapsed_time": "3:55:12", "remaining_time": "0:29:17", "throughput": 8688.16, "total_tokens": 122610536} +{"current_steps": 182010, "total_steps": 204665, "loss": 0.0, "lr": 7.373197358103655e-08, "epoch": 4.4465345809004955, "percentage": 88.93, "elapsed_time": "3:55:12", "remaining_time": "0:29:16", "throughput": 8688.23, "total_tokens": 122614568} +{"current_steps": 182015, "total_steps": 204665, "loss": 0.0, "lr": 7.369983877094432e-08, "epoch": 4.446656731732343, "percentage": 88.93, "elapsed_time": "3:55:13", "remaining_time": "0:29:16", "throughput": 8688.24, "total_tokens": 122617768} +{"current_steps": 182020, "total_steps": 204665, "loss": 0.0, "lr": 7.366771069715627e-08, "epoch": 4.44677888256419, "percentage": 88.94, "elapsed_time": "3:55:13", "remaining_time": "0:29:15", "throughput": 8688.26, "total_tokens": 122621096} +{"current_steps": 182025, "total_steps": 204665, "loss": 0.0, "lr": 7.363558935990555e-08, "epoch": 4.4469010333960375, "percentage": 88.94, "elapsed_time": "3:55:13", "remaining_time": "0:29:15", "throughput": 8688.29, "total_tokens": 122624616} +{"current_steps": 182030, "total_steps": 204665, "loss": 0.0, "lr": 7.360347475942618e-08, "epoch": 4.447023184227884, "percentage": 88.94, "elapsed_time": "3:55:14", "remaining_time": "0:29:15", "throughput": 8688.31, "total_tokens": 122627752} +{"current_steps": 182035, "total_steps": 204665, "loss": 0.0, "lr": 7.357136689595133e-08, "epoch": 4.447145335059732, "percentage": 88.94, "elapsed_time": "3:55:14", "remaining_time": "0:29:14", "throughput": 8688.35, "total_tokens": 122631400} +{"current_steps": 182040, "total_steps": 204665, "loss": 0.0, "lr": 7.35392657697147e-08, "epoch": 4.447267485891579, "percentage": 88.95, "elapsed_time": "3:55:14", "remaining_time": "0:29:14", "throughput": 8688.36, "total_tokens": 122634664} +{"current_steps": 182045, "total_steps": 204665, "loss": 0.0, "lr": 7.350717138094976e-08, "epoch": 4.447389636723426, "percentage": 88.95, "elapsed_time": "3:55:15", "remaining_time": "0:29:13", "throughput": 8688.39, "total_tokens": 122638056} +{"current_steps": 182050, "total_steps": 204665, "loss": 0.0, "lr": 7.347508372988986e-08, "epoch": 4.447511787555273, "percentage": 88.95, "elapsed_time": "3:55:15", "remaining_time": "0:29:13", "throughput": 8688.41, "total_tokens": 122641384} +{"current_steps": 182055, "total_steps": 204665, "loss": 0.0, "lr": 7.34430028167684e-08, "epoch": 4.447633938387121, "percentage": 88.95, "elapsed_time": "3:55:15", "remaining_time": "0:29:13", "throughput": 8688.42, "total_tokens": 122644520} +{"current_steps": 182060, "total_steps": 204665, "loss": 0.0, "lr": 7.341092864181853e-08, "epoch": 4.447756089218967, "percentage": 88.96, "elapsed_time": "3:55:16", "remaining_time": "0:29:12", "throughput": 8688.48, "total_tokens": 122648360} +{"current_steps": 182065, "total_steps": 204665, "loss": 0.0, "lr": 7.337886120527381e-08, "epoch": 4.447878240050815, "percentage": 88.96, "elapsed_time": "3:55:16", "remaining_time": "0:29:12", "throughput": 8688.49, "total_tokens": 122651560} +{"current_steps": 182070, "total_steps": 204665, "loss": 0.0, "lr": 7.334680050736707e-08, "epoch": 4.448000390882662, "percentage": 88.96, "elapsed_time": "3:55:16", "remaining_time": "0:29:11", "throughput": 8688.5, "total_tokens": 122654696} +{"current_steps": 182075, "total_steps": 204665, "loss": 0.0, "lr": 7.331474654833158e-08, "epoch": 4.448122541714509, "percentage": 88.96, "elapsed_time": "3:55:17", "remaining_time": "0:29:11", "throughput": 8688.51, "total_tokens": 122657832} +{"current_steps": 182080, "total_steps": 204665, "loss": 0.0, "lr": 7.32826993284007e-08, "epoch": 4.448244692546356, "percentage": 88.96, "elapsed_time": "3:55:17", "remaining_time": "0:29:11", "throughput": 8688.52, "total_tokens": 122660840} +{"current_steps": 182085, "total_steps": 204665, "loss": 0.0, "lr": 7.325065884780712e-08, "epoch": 4.448366843378204, "percentage": 88.97, "elapsed_time": "3:55:17", "remaining_time": "0:29:10", "throughput": 8688.54, "total_tokens": 122664168} +{"current_steps": 182090, "total_steps": 204665, "loss": 0.0, "lr": 7.321862510678423e-08, "epoch": 4.4484889942100505, "percentage": 88.97, "elapsed_time": "3:55:18", "remaining_time": "0:29:10", "throughput": 8688.58, "total_tokens": 122667880} +{"current_steps": 182095, "total_steps": 204665, "loss": 0.0, "lr": 7.318659810556449e-08, "epoch": 4.448611145041898, "percentage": 88.97, "elapsed_time": "3:55:18", "remaining_time": "0:29:09", "throughput": 8688.63, "total_tokens": 122671720} +{"current_steps": 182100, "total_steps": 204665, "loss": 0.0, "lr": 7.31545778443814e-08, "epoch": 4.448733295873745, "percentage": 88.97, "elapsed_time": "3:55:18", "remaining_time": "0:29:09", "throughput": 8688.66, "total_tokens": 122675112} +{"current_steps": 182105, "total_steps": 204665, "loss": 0.0224, "lr": 7.31225643234672e-08, "epoch": 4.448855446705592, "percentage": 88.98, "elapsed_time": "3:55:19", "remaining_time": "0:29:09", "throughput": 8688.67, "total_tokens": 122678312} +{"current_steps": 182110, "total_steps": 204665, "loss": 0.0, "lr": 7.309055754305527e-08, "epoch": 4.448977597537439, "percentage": 88.98, "elapsed_time": "3:55:19", "remaining_time": "0:29:08", "throughput": 8688.67, "total_tokens": 122681320} +{"current_steps": 182115, "total_steps": 204665, "loss": 0.0, "lr": 7.305855750337809e-08, "epoch": 4.449099748369286, "percentage": 88.98, "elapsed_time": "3:55:20", "remaining_time": "0:29:08", "throughput": 8688.7, "total_tokens": 122684840} +{"current_steps": 182120, "total_steps": 204665, "loss": 0.0, "lr": 7.302656420466824e-08, "epoch": 4.449221899201134, "percentage": 88.98, "elapsed_time": "3:55:20", "remaining_time": "0:29:07", "throughput": 8688.75, "total_tokens": 122688552} +{"current_steps": 182125, "total_steps": 204665, "loss": 0.0, "lr": 7.299457764715866e-08, "epoch": 4.44934405003298, "percentage": 88.99, "elapsed_time": "3:55:20", "remaining_time": "0:29:07", "throughput": 8688.77, "total_tokens": 122691880} +{"current_steps": 182130, "total_steps": 204665, "loss": 0.0, "lr": 7.296259783108171e-08, "epoch": 4.449466200864828, "percentage": 88.99, "elapsed_time": "3:55:21", "remaining_time": "0:29:07", "throughput": 8688.79, "total_tokens": 122695144} +{"current_steps": 182135, "total_steps": 204665, "loss": 0.0, "lr": 7.293062475667011e-08, "epoch": 4.449588351696675, "percentage": 88.99, "elapsed_time": "3:55:21", "remaining_time": "0:29:06", "throughput": 8688.81, "total_tokens": 122698536} +{"current_steps": 182140, "total_steps": 204665, "loss": 0.0, "lr": 7.289865842415654e-08, "epoch": 4.449710502528522, "percentage": 88.99, "elapsed_time": "3:55:21", "remaining_time": "0:29:06", "throughput": 8688.83, "total_tokens": 122701736} +{"current_steps": 182145, "total_steps": 204665, "loss": 0.0, "lr": 7.286669883377306e-08, "epoch": 4.449832653360369, "percentage": 89.0, "elapsed_time": "3:55:22", "remaining_time": "0:29:06", "throughput": 8688.85, "total_tokens": 122705128} +{"current_steps": 182150, "total_steps": 204665, "loss": 0.0, "lr": 7.283474598575257e-08, "epoch": 4.449954804192217, "percentage": 89.0, "elapsed_time": "3:55:22", "remaining_time": "0:29:05", "throughput": 8688.86, "total_tokens": 122708072} +{"current_steps": 182155, "total_steps": 204665, "loss": 0.0, "lr": 7.280279988032689e-08, "epoch": 4.4500769550240635, "percentage": 89.0, "elapsed_time": "3:55:22", "remaining_time": "0:29:05", "throughput": 8688.88, "total_tokens": 122711528} +{"current_steps": 182160, "total_steps": 204665, "loss": 0.0, "lr": 7.277086051772896e-08, "epoch": 4.450199105855911, "percentage": 89.0, "elapsed_time": "3:55:23", "remaining_time": "0:29:04", "throughput": 8688.89, "total_tokens": 122714536} +{"current_steps": 182165, "total_steps": 204665, "loss": 0.0, "lr": 7.273892789819047e-08, "epoch": 4.450321256687758, "percentage": 89.01, "elapsed_time": "3:55:23", "remaining_time": "0:29:04", "throughput": 8688.91, "total_tokens": 122717864} +{"current_steps": 182170, "total_steps": 204665, "loss": 0.0, "lr": 7.270700202194391e-08, "epoch": 4.4504434075196055, "percentage": 89.01, "elapsed_time": "3:55:23", "remaining_time": "0:29:04", "throughput": 8688.94, "total_tokens": 122721256} +{"current_steps": 182175, "total_steps": 204665, "loss": 0.0, "lr": 7.267508288922153e-08, "epoch": 4.450565558351452, "percentage": 89.01, "elapsed_time": "3:55:24", "remaining_time": "0:29:03", "throughput": 8688.95, "total_tokens": 122724328} +{"current_steps": 182180, "total_steps": 204665, "loss": 0.0, "lr": 7.264317050025537e-08, "epoch": 4.4506877091833, "percentage": 89.01, "elapsed_time": "3:55:24", "remaining_time": "0:29:03", "throughput": 8688.97, "total_tokens": 122727656} +{"current_steps": 182185, "total_steps": 204665, "loss": 0.0, "lr": 7.261126485527757e-08, "epoch": 4.450809860015147, "percentage": 89.02, "elapsed_time": "3:55:24", "remaining_time": "0:29:02", "throughput": 8689.0, "total_tokens": 122731112} +{"current_steps": 182190, "total_steps": 204665, "loss": 0.0, "lr": 7.257936595451986e-08, "epoch": 4.450932010846994, "percentage": 89.02, "elapsed_time": "3:55:25", "remaining_time": "0:29:02", "throughput": 8689.01, "total_tokens": 122734312} +{"current_steps": 182195, "total_steps": 204665, "loss": 0.0, "lr": 7.254747379821458e-08, "epoch": 4.451054161678841, "percentage": 89.02, "elapsed_time": "3:55:25", "remaining_time": "0:29:02", "throughput": 8689.08, "total_tokens": 122738472} +{"current_steps": 182200, "total_steps": 204665, "loss": 0.0, "lr": 7.251558838659355e-08, "epoch": 4.451176312510688, "percentage": 89.02, "elapsed_time": "3:55:25", "remaining_time": "0:29:01", "throughput": 8689.09, "total_tokens": 122741608} +{"current_steps": 182205, "total_steps": 204665, "loss": 0.0, "lr": 7.24837097198887e-08, "epoch": 4.451298463342535, "percentage": 89.03, "elapsed_time": "3:55:26", "remaining_time": "0:29:01", "throughput": 8689.11, "total_tokens": 122744872} +{"current_steps": 182210, "total_steps": 204665, "loss": 0.0, "lr": 7.245183779833163e-08, "epoch": 4.451420614174382, "percentage": 89.03, "elapsed_time": "3:55:26", "remaining_time": "0:29:00", "throughput": 8689.14, "total_tokens": 122748392} +{"current_steps": 182215, "total_steps": 204665, "loss": 0.0, "lr": 7.241997262215449e-08, "epoch": 4.45154276500623, "percentage": 89.03, "elapsed_time": "3:55:26", "remaining_time": "0:29:00", "throughput": 8689.18, "total_tokens": 122751912} +{"current_steps": 182220, "total_steps": 204665, "loss": 0.0, "lr": 7.238811419158852e-08, "epoch": 4.4516649158380766, "percentage": 89.03, "elapsed_time": "3:55:27", "remaining_time": "0:29:00", "throughput": 8689.2, "total_tokens": 122755240} +{"current_steps": 182225, "total_steps": 204665, "loss": 0.0, "lr": 7.2356262506866e-08, "epoch": 4.451787066669924, "percentage": 89.04, "elapsed_time": "3:55:27", "remaining_time": "0:28:59", "throughput": 8689.22, "total_tokens": 122758632} +{"current_steps": 182230, "total_steps": 204665, "loss": 0.0, "lr": 7.232441756821794e-08, "epoch": 4.451909217501771, "percentage": 89.04, "elapsed_time": "3:55:28", "remaining_time": "0:28:59", "throughput": 8689.24, "total_tokens": 122761960} +{"current_steps": 182235, "total_steps": 204665, "loss": 0.0, "lr": 7.229257937587641e-08, "epoch": 4.452031368333619, "percentage": 89.04, "elapsed_time": "3:55:28", "remaining_time": "0:28:58", "throughput": 8689.29, "total_tokens": 122765672} +{"current_steps": 182240, "total_steps": 204665, "loss": 0.0, "lr": 7.226074793007264e-08, "epoch": 4.452153519165465, "percentage": 89.04, "elapsed_time": "3:55:28", "remaining_time": "0:28:58", "throughput": 8689.3, "total_tokens": 122768744} +{"current_steps": 182245, "total_steps": 204665, "loss": 0.0, "lr": 7.222892323103846e-08, "epoch": 4.452275669997313, "percentage": 89.05, "elapsed_time": "3:55:29", "remaining_time": "0:28:58", "throughput": 8689.32, "total_tokens": 122772136} +{"current_steps": 182250, "total_steps": 204665, "loss": 0.0, "lr": 7.21971052790048e-08, "epoch": 4.45239782082916, "percentage": 89.05, "elapsed_time": "3:55:29", "remaining_time": "0:28:57", "throughput": 8689.35, "total_tokens": 122775528} +{"current_steps": 182255, "total_steps": 204665, "loss": 0.0, "lr": 7.216529407420357e-08, "epoch": 4.452519971661007, "percentage": 89.05, "elapsed_time": "3:55:29", "remaining_time": "0:28:57", "throughput": 8689.38, "total_tokens": 122779048} +{"current_steps": 182260, "total_steps": 204665, "loss": 0.0, "lr": 7.213348961686572e-08, "epoch": 4.452642122492854, "percentage": 89.05, "elapsed_time": "3:55:30", "remaining_time": "0:28:57", "throughput": 8689.41, "total_tokens": 122782568} +{"current_steps": 182265, "total_steps": 204665, "loss": 0.0523, "lr": 7.210169190722271e-08, "epoch": 4.452764273324702, "percentage": 89.06, "elapsed_time": "3:55:30", "remaining_time": "0:28:56", "throughput": 8689.44, "total_tokens": 122786088} +{"current_steps": 182270, "total_steps": 204665, "loss": 0.0002, "lr": 7.206990094550592e-08, "epoch": 4.4528864241565485, "percentage": 89.06, "elapsed_time": "3:55:30", "remaining_time": "0:28:56", "throughput": 8689.48, "total_tokens": 122789608} +{"current_steps": 182275, "total_steps": 204665, "loss": 0.0, "lr": 7.203811673194615e-08, "epoch": 4.453008574988396, "percentage": 89.06, "elapsed_time": "3:55:31", "remaining_time": "0:28:55", "throughput": 8689.5, "total_tokens": 122793000} +{"current_steps": 182280, "total_steps": 204665, "loss": 0.0, "lr": 7.200633926677513e-08, "epoch": 4.453130725820243, "percentage": 89.06, "elapsed_time": "3:55:31", "remaining_time": "0:28:55", "throughput": 8689.53, "total_tokens": 122796456} +{"current_steps": 182285, "total_steps": 204665, "loss": 0.0, "lr": 7.197456855022333e-08, "epoch": 4.4532528766520905, "percentage": 89.07, "elapsed_time": "3:55:31", "remaining_time": "0:28:55", "throughput": 8689.54, "total_tokens": 122799528} +{"current_steps": 182290, "total_steps": 204665, "loss": 0.0, "lr": 7.194280458252211e-08, "epoch": 4.453375027483937, "percentage": 89.07, "elapsed_time": "3:55:32", "remaining_time": "0:28:54", "throughput": 8689.54, "total_tokens": 122802472} +{"current_steps": 182295, "total_steps": 204665, "loss": 0.0, "lr": 7.191104736390252e-08, "epoch": 4.453497178315784, "percentage": 89.07, "elapsed_time": "3:55:32", "remaining_time": "0:28:54", "throughput": 8689.54, "total_tokens": 122805416} +{"current_steps": 182300, "total_steps": 204665, "loss": 0.0005, "lr": 7.187929689459527e-08, "epoch": 4.453619329147632, "percentage": 89.07, "elapsed_time": "3:55:32", "remaining_time": "0:28:53", "throughput": 8689.55, "total_tokens": 122808424} +{"current_steps": 182305, "total_steps": 204665, "loss": 0.001, "lr": 7.18475531748317e-08, "epoch": 4.453741479979478, "percentage": 89.07, "elapsed_time": "3:55:33", "remaining_time": "0:28:53", "throughput": 8689.57, "total_tokens": 122811752} +{"current_steps": 182310, "total_steps": 204665, "loss": 0.0, "lr": 7.181581620484211e-08, "epoch": 4.453863630811326, "percentage": 89.08, "elapsed_time": "3:55:33", "remaining_time": "0:28:53", "throughput": 8689.59, "total_tokens": 122815080} +{"current_steps": 182315, "total_steps": 204665, "loss": 0.0, "lr": 7.178408598485775e-08, "epoch": 4.453985781643173, "percentage": 89.08, "elapsed_time": "3:55:33", "remaining_time": "0:28:52", "throughput": 8689.61, "total_tokens": 122818344} +{"current_steps": 182320, "total_steps": 204665, "loss": 0.0, "lr": 7.175236251510908e-08, "epoch": 4.45410793247502, "percentage": 89.08, "elapsed_time": "3:55:34", "remaining_time": "0:28:52", "throughput": 8689.67, "total_tokens": 122822312} +{"current_steps": 182325, "total_steps": 204665, "loss": 0.0, "lr": 7.172064579582682e-08, "epoch": 4.454230083306867, "percentage": 89.08, "elapsed_time": "3:55:34", "remaining_time": "0:28:51", "throughput": 8689.68, "total_tokens": 122825512} +{"current_steps": 182330, "total_steps": 204665, "loss": 0.0, "lr": 7.16889358272419e-08, "epoch": 4.454352234138715, "percentage": 89.09, "elapsed_time": "3:55:34", "remaining_time": "0:28:51", "throughput": 8689.71, "total_tokens": 122828904} +{"current_steps": 182335, "total_steps": 204665, "loss": 0.1163, "lr": 7.165723260958445e-08, "epoch": 4.4544743849705615, "percentage": 89.09, "elapsed_time": "3:55:35", "remaining_time": "0:28:51", "throughput": 8689.76, "total_tokens": 122832808} +{"current_steps": 182340, "total_steps": 204665, "loss": 0.0, "lr": 7.162553614308552e-08, "epoch": 4.454596535802409, "percentage": 89.09, "elapsed_time": "3:55:35", "remaining_time": "0:28:50", "throughput": 8689.77, "total_tokens": 122835880} +{"current_steps": 182345, "total_steps": 204665, "loss": 0.0, "lr": 7.159384642797528e-08, "epoch": 4.454718686634256, "percentage": 89.09, "elapsed_time": "3:55:36", "remaining_time": "0:28:50", "throughput": 8689.78, "total_tokens": 122839016} +{"current_steps": 182350, "total_steps": 204665, "loss": 0.0, "lr": 7.156216346448419e-08, "epoch": 4.4548408374661035, "percentage": 89.1, "elapsed_time": "3:55:36", "remaining_time": "0:28:49", "throughput": 8689.85, "total_tokens": 122843048} +{"current_steps": 182355, "total_steps": 204665, "loss": 0.0, "lr": 7.153048725284305e-08, "epoch": 4.45496298829795, "percentage": 89.1, "elapsed_time": "3:55:36", "remaining_time": "0:28:49", "throughput": 8689.9, "total_tokens": 122847016} +{"current_steps": 182360, "total_steps": 204665, "loss": 0.0, "lr": 7.14988177932817e-08, "epoch": 4.455085139129798, "percentage": 89.1, "elapsed_time": "3:55:37", "remaining_time": "0:28:49", "throughput": 8689.91, "total_tokens": 122850152} +{"current_steps": 182365, "total_steps": 204665, "loss": 0.0, "lr": 7.146715508603085e-08, "epoch": 4.455207289961645, "percentage": 89.1, "elapsed_time": "3:55:37", "remaining_time": "0:28:48", "throughput": 8689.92, "total_tokens": 122853096} +{"current_steps": 182370, "total_steps": 204665, "loss": 0.0, "lr": 7.143549913132052e-08, "epoch": 4.455329440793491, "percentage": 89.11, "elapsed_time": "3:55:37", "remaining_time": "0:28:48", "throughput": 8689.94, "total_tokens": 122856488} +{"current_steps": 182375, "total_steps": 204665, "loss": 0.0, "lr": 7.140384992938108e-08, "epoch": 4.455451591625339, "percentage": 89.11, "elapsed_time": "3:55:38", "remaining_time": "0:28:47", "throughput": 8689.96, "total_tokens": 122859752} +{"current_steps": 182380, "total_steps": 204665, "loss": 0.0, "lr": 7.137220748044236e-08, "epoch": 4.455573742457186, "percentage": 89.11, "elapsed_time": "3:55:38", "remaining_time": "0:28:47", "throughput": 8690.0, "total_tokens": 122863336} +{"current_steps": 182385, "total_steps": 204665, "loss": 0.0, "lr": 7.134057178473485e-08, "epoch": 4.455695893289033, "percentage": 89.11, "elapsed_time": "3:55:38", "remaining_time": "0:28:47", "throughput": 8690.01, "total_tokens": 122866472} +{"current_steps": 182390, "total_steps": 204665, "loss": 0.0, "lr": 7.130894284248856e-08, "epoch": 4.45581804412088, "percentage": 89.12, "elapsed_time": "3:55:39", "remaining_time": "0:28:46", "throughput": 8690.06, "total_tokens": 122870248} +{"current_steps": 182395, "total_steps": 204665, "loss": 0.0383, "lr": 7.127732065393333e-08, "epoch": 4.455940194952728, "percentage": 89.12, "elapsed_time": "3:55:39", "remaining_time": "0:28:46", "throughput": 8690.06, "total_tokens": 122873256} +{"current_steps": 182400, "total_steps": 204665, "loss": 0.0, "lr": 7.12457052192994e-08, "epoch": 4.4560623457845745, "percentage": 89.12, "elapsed_time": "3:55:39", "remaining_time": "0:28:46", "throughput": 8690.09, "total_tokens": 122876584} +{"current_steps": 182405, "total_steps": 204665, "loss": 0.0, "lr": 7.121409653881628e-08, "epoch": 4.456184496616422, "percentage": 89.12, "elapsed_time": "3:55:40", "remaining_time": "0:28:45", "throughput": 8690.11, "total_tokens": 122879976} +{"current_steps": 182410, "total_steps": 204665, "loss": 0.0, "lr": 7.11824946127142e-08, "epoch": 4.456306647448269, "percentage": 89.13, "elapsed_time": "3:55:40", "remaining_time": "0:28:45", "throughput": 8690.13, "total_tokens": 122883240} +{"current_steps": 182415, "total_steps": 204665, "loss": 0.0, "lr": 7.115089944122276e-08, "epoch": 4.4564287982801165, "percentage": 89.13, "elapsed_time": "3:55:40", "remaining_time": "0:28:44", "throughput": 8690.14, "total_tokens": 122886312} +{"current_steps": 182420, "total_steps": 204665, "loss": 0.0, "lr": 7.111931102457192e-08, "epoch": 4.456550949111963, "percentage": 89.13, "elapsed_time": "3:55:41", "remaining_time": "0:28:44", "throughput": 8690.16, "total_tokens": 122889640} +{"current_steps": 182425, "total_steps": 204665, "loss": 0.0, "lr": 7.108772936299134e-08, "epoch": 4.456673099943811, "percentage": 89.13, "elapsed_time": "3:55:41", "remaining_time": "0:28:44", "throughput": 8690.2, "total_tokens": 122893288} +{"current_steps": 182430, "total_steps": 204665, "loss": 0.0, "lr": 7.105615445671042e-08, "epoch": 4.456795250775658, "percentage": 89.14, "elapsed_time": "3:55:41", "remaining_time": "0:28:43", "throughput": 8690.22, "total_tokens": 122896552} +{"current_steps": 182435, "total_steps": 204665, "loss": 0.0, "lr": 7.10245863059592e-08, "epoch": 4.456917401607505, "percentage": 89.14, "elapsed_time": "3:55:42", "remaining_time": "0:28:43", "throughput": 8690.24, "total_tokens": 122899880} +{"current_steps": 182440, "total_steps": 204665, "loss": 0.0, "lr": 7.099302491096681e-08, "epoch": 4.457039552439352, "percentage": 89.14, "elapsed_time": "3:55:42", "remaining_time": "0:28:42", "throughput": 8690.28, "total_tokens": 122903528} +{"current_steps": 182445, "total_steps": 204665, "loss": 0.0, "lr": 7.096147027196308e-08, "epoch": 4.4571617032712, "percentage": 89.14, "elapsed_time": "3:55:42", "remaining_time": "0:28:42", "throughput": 8690.32, "total_tokens": 122907176} +{"current_steps": 182450, "total_steps": 204665, "loss": 0.0, "lr": 7.092992238917761e-08, "epoch": 4.457283854103046, "percentage": 89.15, "elapsed_time": "3:55:43", "remaining_time": "0:28:42", "throughput": 8690.36, "total_tokens": 122910760} +{"current_steps": 182455, "total_steps": 204665, "loss": 0.0, "lr": 7.089838126283943e-08, "epoch": 4.457406004934894, "percentage": 89.15, "elapsed_time": "3:55:43", "remaining_time": "0:28:41", "throughput": 8690.37, "total_tokens": 122913960} +{"current_steps": 182460, "total_steps": 204665, "loss": 0.0, "lr": 7.086684689317834e-08, "epoch": 4.457528155766741, "percentage": 89.15, "elapsed_time": "3:55:44", "remaining_time": "0:28:41", "throughput": 8690.37, "total_tokens": 122916904} +{"current_steps": 182465, "total_steps": 204665, "loss": 0.0, "lr": 7.083531928042319e-08, "epoch": 4.4576503065985875, "percentage": 89.15, "elapsed_time": "3:55:44", "remaining_time": "0:28:40", "throughput": 8690.39, "total_tokens": 122920168} +{"current_steps": 182470, "total_steps": 204665, "loss": 0.0, "lr": 7.080379842480378e-08, "epoch": 4.457772457430435, "percentage": 89.16, "elapsed_time": "3:55:44", "remaining_time": "0:28:40", "throughput": 8690.42, "total_tokens": 122923688} +{"current_steps": 182475, "total_steps": 204665, "loss": 0.0, "lr": 7.077228432654881e-08, "epoch": 4.457894608262282, "percentage": 89.16, "elapsed_time": "3:55:45", "remaining_time": "0:28:40", "throughput": 8690.42, "total_tokens": 122926632} +{"current_steps": 182480, "total_steps": 204665, "loss": 0.0, "lr": 7.074077698588777e-08, "epoch": 4.4580167590941295, "percentage": 89.16, "elapsed_time": "3:55:45", "remaining_time": "0:28:39", "throughput": 8690.46, "total_tokens": 122930152} +{"current_steps": 182485, "total_steps": 204665, "loss": 0.0, "lr": 7.070927640304992e-08, "epoch": 4.458138909925976, "percentage": 89.16, "elapsed_time": "3:55:45", "remaining_time": "0:28:39", "throughput": 8690.47, "total_tokens": 122933288} +{"current_steps": 182490, "total_steps": 204665, "loss": 0.0, "lr": 7.067778257826395e-08, "epoch": 4.458261060757824, "percentage": 89.17, "elapsed_time": "3:55:46", "remaining_time": "0:28:38", "throughput": 8690.5, "total_tokens": 122936808} +{"current_steps": 182495, "total_steps": 204665, "loss": 0.0, "lr": 7.064629551175928e-08, "epoch": 4.458383211589671, "percentage": 89.17, "elapsed_time": "3:55:46", "remaining_time": "0:28:38", "throughput": 8690.51, "total_tokens": 122939880} +{"current_steps": 182500, "total_steps": 204665, "loss": 0.0, "lr": 7.061481520376455e-08, "epoch": 4.458505362421518, "percentage": 89.17, "elapsed_time": "3:55:46", "remaining_time": "0:28:38", "throughput": 8690.52, "total_tokens": 122943080} +{"current_steps": 182505, "total_steps": 204665, "loss": 0.0, "lr": 7.058334165450885e-08, "epoch": 4.458627513253365, "percentage": 89.17, "elapsed_time": "3:55:47", "remaining_time": "0:28:37", "throughput": 8690.55, "total_tokens": 122946472} +{"current_steps": 182510, "total_steps": 204665, "loss": 0.0, "lr": 7.055187486422131e-08, "epoch": 4.458749664085213, "percentage": 89.17, "elapsed_time": "3:55:47", "remaining_time": "0:28:37", "throughput": 8690.56, "total_tokens": 122949672} +{"current_steps": 182515, "total_steps": 204665, "loss": 0.0, "lr": 7.052041483313043e-08, "epoch": 4.458871814917059, "percentage": 89.18, "elapsed_time": "3:55:47", "remaining_time": "0:28:36", "throughput": 8690.58, "total_tokens": 122952936} +{"current_steps": 182520, "total_steps": 204665, "loss": 0.0, "lr": 7.0488961561465e-08, "epoch": 4.458993965748907, "percentage": 89.18, "elapsed_time": "3:55:48", "remaining_time": "0:28:36", "throughput": 8690.6, "total_tokens": 122956136} +{"current_steps": 182525, "total_steps": 204665, "loss": 0.0, "lr": 7.045751504945396e-08, "epoch": 4.459116116580754, "percentage": 89.18, "elapsed_time": "3:55:48", "remaining_time": "0:28:36", "throughput": 8690.61, "total_tokens": 122959336} +{"current_steps": 182530, "total_steps": 204665, "loss": 0.0, "lr": 7.04260752973258e-08, "epoch": 4.459238267412601, "percentage": 89.18, "elapsed_time": "3:55:48", "remaining_time": "0:28:35", "throughput": 8690.65, "total_tokens": 122962984} +{"current_steps": 182535, "total_steps": 204665, "loss": 0.0, "lr": 7.039464230530933e-08, "epoch": 4.459360418244448, "percentage": 89.19, "elapsed_time": "3:55:49", "remaining_time": "0:28:35", "throughput": 8690.66, "total_tokens": 122966056} +{"current_steps": 182540, "total_steps": 204665, "loss": 0.0286, "lr": 7.036321607363294e-08, "epoch": 4.459482569076296, "percentage": 89.19, "elapsed_time": "3:55:49", "remaining_time": "0:28:35", "throughput": 8690.67, "total_tokens": 122969128} +{"current_steps": 182545, "total_steps": 204665, "loss": 0.0, "lr": 7.033179660252541e-08, "epoch": 4.459604719908143, "percentage": 89.19, "elapsed_time": "3:55:49", "remaining_time": "0:28:34", "throughput": 8690.7, "total_tokens": 122972712} +{"current_steps": 182550, "total_steps": 204665, "loss": 0.0, "lr": 7.030038389221493e-08, "epoch": 4.45972687073999, "percentage": 89.19, "elapsed_time": "3:55:50", "remaining_time": "0:28:34", "throughput": 8690.73, "total_tokens": 122976104} +{"current_steps": 182555, "total_steps": 204665, "loss": 0.0, "lr": 7.02689779429304e-08, "epoch": 4.459849021571837, "percentage": 89.2, "elapsed_time": "3:55:50", "remaining_time": "0:28:33", "throughput": 8690.74, "total_tokens": 122979304} +{"current_steps": 182560, "total_steps": 204665, "loss": 0.0, "lr": 7.023757875489967e-08, "epoch": 4.459971172403684, "percentage": 89.2, "elapsed_time": "3:55:50", "remaining_time": "0:28:33", "throughput": 8690.78, "total_tokens": 122982888} +{"current_steps": 182565, "total_steps": 204665, "loss": 0.0, "lr": 7.020618632835151e-08, "epoch": 4.460093323235531, "percentage": 89.2, "elapsed_time": "3:55:51", "remaining_time": "0:28:33", "throughput": 8690.78, "total_tokens": 122985896} +{"current_steps": 182570, "total_steps": 204665, "loss": 0.0001, "lr": 7.017480066351388e-08, "epoch": 4.460215474067378, "percentage": 89.2, "elapsed_time": "3:55:51", "remaining_time": "0:28:32", "throughput": 8690.8, "total_tokens": 122989160} +{"current_steps": 182575, "total_steps": 204665, "loss": 0.1477, "lr": 7.014342176061517e-08, "epoch": 4.460337624899226, "percentage": 89.21, "elapsed_time": "3:55:52", "remaining_time": "0:28:32", "throughput": 8690.81, "total_tokens": 122992424} +{"current_steps": 182580, "total_steps": 204665, "loss": 0.0, "lr": 7.011204961988382e-08, "epoch": 4.460459775731072, "percentage": 89.21, "elapsed_time": "3:55:52", "remaining_time": "0:28:31", "throughput": 8690.84, "total_tokens": 122995880} +{"current_steps": 182585, "total_steps": 204665, "loss": 0.0, "lr": 7.008068424154756e-08, "epoch": 4.46058192656292, "percentage": 89.21, "elapsed_time": "3:55:52", "remaining_time": "0:28:31", "throughput": 8690.85, "total_tokens": 122998952} +{"current_steps": 182590, "total_steps": 204665, "loss": 0.0, "lr": 7.004932562583488e-08, "epoch": 4.460704077394767, "percentage": 89.21, "elapsed_time": "3:55:53", "remaining_time": "0:28:31", "throughput": 8690.86, "total_tokens": 123002024} +{"current_steps": 182595, "total_steps": 204665, "loss": 0.0, "lr": 7.001797377297348e-08, "epoch": 4.4608262282266145, "percentage": 89.22, "elapsed_time": "3:55:53", "remaining_time": "0:28:30", "throughput": 8690.89, "total_tokens": 123005480} +{"current_steps": 182600, "total_steps": 204665, "loss": 0.0, "lr": 6.998662868319138e-08, "epoch": 4.460948379058461, "percentage": 89.22, "elapsed_time": "3:55:53", "remaining_time": "0:28:30", "throughput": 8690.91, "total_tokens": 123008808} +{"current_steps": 182605, "total_steps": 204665, "loss": 0.0, "lr": 6.9955290356717e-08, "epoch": 4.461070529890309, "percentage": 89.22, "elapsed_time": "3:55:54", "remaining_time": "0:28:29", "throughput": 8690.94, "total_tokens": 123012328} +{"current_steps": 182610, "total_steps": 204665, "loss": 0.0001, "lr": 6.992395879377766e-08, "epoch": 4.461192680722156, "percentage": 89.22, "elapsed_time": "3:55:54", "remaining_time": "0:28:29", "throughput": 8690.97, "total_tokens": 123015720} +{"current_steps": 182615, "total_steps": 204665, "loss": 0.0, "lr": 6.989263399460155e-08, "epoch": 4.461314831554003, "percentage": 89.23, "elapsed_time": "3:55:54", "remaining_time": "0:28:29", "throughput": 8690.99, "total_tokens": 123019048} +{"current_steps": 182620, "total_steps": 204665, "loss": 0.0001, "lr": 6.986131595941624e-08, "epoch": 4.46143698238585, "percentage": 89.23, "elapsed_time": "3:55:55", "remaining_time": "0:28:28", "throughput": 8691.03, "total_tokens": 123022568} +{"current_steps": 182625, "total_steps": 204665, "loss": 0.0, "lr": 6.98300046884498e-08, "epoch": 4.461559133217698, "percentage": 89.23, "elapsed_time": "3:55:55", "remaining_time": "0:28:28", "throughput": 8691.04, "total_tokens": 123025640} +{"current_steps": 182630, "total_steps": 204665, "loss": 0.0, "lr": 6.97987001819298e-08, "epoch": 4.461681284049544, "percentage": 89.23, "elapsed_time": "3:55:55", "remaining_time": "0:28:27", "throughput": 8691.07, "total_tokens": 123029160} +{"current_steps": 182635, "total_steps": 204665, "loss": 0.0, "lr": 6.976740244008361e-08, "epoch": 4.461803434881391, "percentage": 89.24, "elapsed_time": "3:55:56", "remaining_time": "0:28:27", "throughput": 8691.09, "total_tokens": 123032360} +{"current_steps": 182640, "total_steps": 204665, "loss": 0.0, "lr": 6.973611146313929e-08, "epoch": 4.461925585713239, "percentage": 89.24, "elapsed_time": "3:55:56", "remaining_time": "0:28:27", "throughput": 8691.12, "total_tokens": 123035880} +{"current_steps": 182645, "total_steps": 204665, "loss": 0.0001, "lr": 6.970482725132399e-08, "epoch": 4.4620477365450855, "percentage": 89.24, "elapsed_time": "3:55:56", "remaining_time": "0:28:26", "throughput": 8691.12, "total_tokens": 123038888} +{"current_steps": 182650, "total_steps": 204665, "loss": 0.0, "lr": 6.967354980486562e-08, "epoch": 4.462169887376933, "percentage": 89.24, "elapsed_time": "3:55:57", "remaining_time": "0:28:26", "throughput": 8691.14, "total_tokens": 123042088} +{"current_steps": 182655, "total_steps": 204665, "loss": 0.0, "lr": 6.964227912399123e-08, "epoch": 4.46229203820878, "percentage": 89.25, "elapsed_time": "3:55:57", "remaining_time": "0:28:25", "throughput": 8691.16, "total_tokens": 123045416} +{"current_steps": 182660, "total_steps": 204665, "loss": 0.0, "lr": 6.961101520892831e-08, "epoch": 4.4624141890406275, "percentage": 89.25, "elapsed_time": "3:55:57", "remaining_time": "0:28:25", "throughput": 8691.17, "total_tokens": 123048616} +{"current_steps": 182665, "total_steps": 204665, "loss": 0.0, "lr": 6.957975805990469e-08, "epoch": 4.462536339872474, "percentage": 89.25, "elapsed_time": "3:55:58", "remaining_time": "0:28:25", "throughput": 8691.18, "total_tokens": 123051560} +{"current_steps": 182670, "total_steps": 204665, "loss": 0.0, "lr": 6.954850767714704e-08, "epoch": 4.462658490704322, "percentage": 89.25, "elapsed_time": "3:55:58", "remaining_time": "0:28:24", "throughput": 8691.21, "total_tokens": 123055144} +{"current_steps": 182675, "total_steps": 204665, "loss": 0.0, "lr": 6.951726406088309e-08, "epoch": 4.462780641536169, "percentage": 89.26, "elapsed_time": "3:55:58", "remaining_time": "0:28:24", "throughput": 8691.3, "total_tokens": 123059624} +{"current_steps": 182680, "total_steps": 204665, "loss": 0.0, "lr": 6.948602721133967e-08, "epoch": 4.462902792368016, "percentage": 89.26, "elapsed_time": "3:55:59", "remaining_time": "0:28:24", "throughput": 8691.34, "total_tokens": 123063336} +{"current_steps": 182685, "total_steps": 204665, "loss": 0.0, "lr": 6.945479712874436e-08, "epoch": 4.463024943199863, "percentage": 89.26, "elapsed_time": "3:55:59", "remaining_time": "0:28:23", "throughput": 8691.37, "total_tokens": 123066728} +{"current_steps": 182690, "total_steps": 204665, "loss": 0.0, "lr": 6.942357381332387e-08, "epoch": 4.463147094031711, "percentage": 89.26, "elapsed_time": "3:56:00", "remaining_time": "0:28:23", "throughput": 8691.41, "total_tokens": 123070376} +{"current_steps": 182695, "total_steps": 204665, "loss": 0.0, "lr": 6.939235726530535e-08, "epoch": 4.463269244863557, "percentage": 89.27, "elapsed_time": "3:56:00", "remaining_time": "0:28:22", "throughput": 8691.42, "total_tokens": 123073512} +{"current_steps": 182700, "total_steps": 204665, "loss": 0.0005, "lr": 6.936114748491617e-08, "epoch": 4.463391395695405, "percentage": 89.27, "elapsed_time": "3:56:00", "remaining_time": "0:28:22", "throughput": 8691.42, "total_tokens": 123076520} +{"current_steps": 182705, "total_steps": 204665, "loss": 0.0, "lr": 6.932994447238294e-08, "epoch": 4.463513546527252, "percentage": 89.27, "elapsed_time": "3:56:01", "remaining_time": "0:28:22", "throughput": 8691.47, "total_tokens": 123080360} +{"current_steps": 182710, "total_steps": 204665, "loss": 0.0, "lr": 6.929874822793269e-08, "epoch": 4.463635697359099, "percentage": 89.27, "elapsed_time": "3:56:01", "remaining_time": "0:28:21", "throughput": 8691.51, "total_tokens": 123084072} +{"current_steps": 182715, "total_steps": 204665, "loss": 0.0, "lr": 6.926755875179224e-08, "epoch": 4.463757848190946, "percentage": 89.28, "elapsed_time": "3:56:01", "remaining_time": "0:28:21", "throughput": 8691.53, "total_tokens": 123087208} +{"current_steps": 182720, "total_steps": 204665, "loss": 0.0, "lr": 6.923637604418853e-08, "epoch": 4.463879999022794, "percentage": 89.28, "elapsed_time": "3:56:02", "remaining_time": "0:28:20", "throughput": 8691.54, "total_tokens": 123090344} +{"current_steps": 182725, "total_steps": 204665, "loss": 0.0, "lr": 6.920520010534803e-08, "epoch": 4.4640021498546405, "percentage": 89.28, "elapsed_time": "3:56:02", "remaining_time": "0:28:20", "throughput": 8691.56, "total_tokens": 123093608} +{"current_steps": 182730, "total_steps": 204665, "loss": 0.0, "lr": 6.91740309354979e-08, "epoch": 4.464124300686487, "percentage": 89.28, "elapsed_time": "3:56:02", "remaining_time": "0:28:20", "throughput": 8691.61, "total_tokens": 123097448} +{"current_steps": 182735, "total_steps": 204665, "loss": 0.0, "lr": 6.914286853486462e-08, "epoch": 4.464246451518335, "percentage": 89.28, "elapsed_time": "3:56:03", "remaining_time": "0:28:19", "throughput": 8691.66, "total_tokens": 123101352} +{"current_steps": 182740, "total_steps": 204665, "loss": 0.0, "lr": 6.911171290367457e-08, "epoch": 4.464368602350182, "percentage": 89.29, "elapsed_time": "3:56:03", "remaining_time": "0:28:19", "throughput": 8691.67, "total_tokens": 123104488} +{"current_steps": 182745, "total_steps": 204665, "loss": 0.0, "lr": 6.908056404215467e-08, "epoch": 4.464490753182029, "percentage": 89.29, "elapsed_time": "3:56:03", "remaining_time": "0:28:18", "throughput": 8691.69, "total_tokens": 123107752} +{"current_steps": 182750, "total_steps": 204665, "loss": 0.0, "lr": 6.90494219505311e-08, "epoch": 4.464612904013876, "percentage": 89.29, "elapsed_time": "3:56:04", "remaining_time": "0:28:18", "throughput": 8691.73, "total_tokens": 123111336} +{"current_steps": 182755, "total_steps": 204665, "loss": 0.0172, "lr": 6.901828662903054e-08, "epoch": 4.464735054845724, "percentage": 89.29, "elapsed_time": "3:56:04", "remaining_time": "0:28:18", "throughput": 8691.75, "total_tokens": 123114536} +{"current_steps": 182760, "total_steps": 204665, "loss": 0.0003, "lr": 6.898715807787958e-08, "epoch": 4.46485720567757, "percentage": 89.3, "elapsed_time": "3:56:04", "remaining_time": "0:28:17", "throughput": 8691.77, "total_tokens": 123117864} +{"current_steps": 182765, "total_steps": 204665, "loss": 0.0, "lr": 6.895603629730429e-08, "epoch": 4.464979356509418, "percentage": 89.3, "elapsed_time": "3:56:05", "remaining_time": "0:28:17", "throughput": 8691.76, "total_tokens": 123120680} +{"current_steps": 182770, "total_steps": 204665, "loss": 0.0, "lr": 6.892492128753124e-08, "epoch": 4.465101507341265, "percentage": 89.3, "elapsed_time": "3:56:05", "remaining_time": "0:28:16", "throughput": 8691.79, "total_tokens": 123124136} +{"current_steps": 182775, "total_steps": 204665, "loss": 0.0, "lr": 6.88938130487865e-08, "epoch": 4.465223658173112, "percentage": 89.3, "elapsed_time": "3:56:05", "remaining_time": "0:28:16", "throughput": 8691.82, "total_tokens": 123127528} +{"current_steps": 182780, "total_steps": 204665, "loss": 0.0, "lr": 6.886271158129642e-08, "epoch": 4.465345809004959, "percentage": 89.31, "elapsed_time": "3:56:06", "remaining_time": "0:28:16", "throughput": 8691.82, "total_tokens": 123130600} +{"current_steps": 182785, "total_steps": 204665, "loss": 0.0, "lr": 6.883161688528715e-08, "epoch": 4.465467959836807, "percentage": 89.31, "elapsed_time": "3:56:06", "remaining_time": "0:28:15", "throughput": 8691.9, "total_tokens": 123134888} +{"current_steps": 182790, "total_steps": 204665, "loss": 0.0, "lr": 6.880052896098465e-08, "epoch": 4.4655901106686535, "percentage": 89.31, "elapsed_time": "3:56:06", "remaining_time": "0:28:15", "throughput": 8691.93, "total_tokens": 123138344} +{"current_steps": 182795, "total_steps": 204665, "loss": 0.0, "lr": 6.876944780861548e-08, "epoch": 4.465712261500501, "percentage": 89.31, "elapsed_time": "3:56:07", "remaining_time": "0:28:15", "throughput": 8691.97, "total_tokens": 123142056} +{"current_steps": 182800, "total_steps": 204665, "loss": 0.0, "lr": 6.873837342840516e-08, "epoch": 4.465834412332348, "percentage": 89.32, "elapsed_time": "3:56:07", "remaining_time": "0:28:14", "throughput": 8691.99, "total_tokens": 123145384} +{"current_steps": 182805, "total_steps": 204665, "loss": 0.0001, "lr": 6.870730582057993e-08, "epoch": 4.4659565631641955, "percentage": 89.32, "elapsed_time": "3:56:08", "remaining_time": "0:28:14", "throughput": 8692.02, "total_tokens": 123148712} +{"current_steps": 182810, "total_steps": 204665, "loss": 0.0, "lr": 6.867624498536561e-08, "epoch": 4.466078713996042, "percentage": 89.32, "elapsed_time": "3:56:08", "remaining_time": "0:28:13", "throughput": 8692.03, "total_tokens": 123151848} +{"current_steps": 182815, "total_steps": 204665, "loss": 0.0, "lr": 6.864519092298804e-08, "epoch": 4.46620086482789, "percentage": 89.32, "elapsed_time": "3:56:08", "remaining_time": "0:28:13", "throughput": 8692.04, "total_tokens": 123155112} +{"current_steps": 182820, "total_steps": 204665, "loss": 0.0, "lr": 6.861414363367335e-08, "epoch": 4.466323015659737, "percentage": 89.33, "elapsed_time": "3:56:09", "remaining_time": "0:28:13", "throughput": 8692.06, "total_tokens": 123158312} +{"current_steps": 182825, "total_steps": 204665, "loss": 0.0, "lr": 6.858310311764715e-08, "epoch": 4.466445166491583, "percentage": 89.33, "elapsed_time": "3:56:09", "remaining_time": "0:28:12", "throughput": 8692.08, "total_tokens": 123161640} +{"current_steps": 182830, "total_steps": 204665, "loss": 0.0, "lr": 6.855206937513491e-08, "epoch": 4.466567317323431, "percentage": 89.33, "elapsed_time": "3:56:09", "remaining_time": "0:28:12", "throughput": 8692.12, "total_tokens": 123165160} +{"current_steps": 182835, "total_steps": 204665, "loss": 0.0, "lr": 6.85210424063628e-08, "epoch": 4.466689468155278, "percentage": 89.33, "elapsed_time": "3:56:10", "remaining_time": "0:28:11", "throughput": 8692.16, "total_tokens": 123168808} +{"current_steps": 182840, "total_steps": 204665, "loss": 0.0, "lr": 6.849002221155598e-08, "epoch": 4.466811618987125, "percentage": 89.34, "elapsed_time": "3:56:10", "remaining_time": "0:28:11", "throughput": 8692.19, "total_tokens": 123172264} +{"current_steps": 182845, "total_steps": 204665, "loss": 0.0, "lr": 6.845900879094046e-08, "epoch": 4.466933769818972, "percentage": 89.34, "elapsed_time": "3:56:10", "remaining_time": "0:28:11", "throughput": 8692.2, "total_tokens": 123175528} +{"current_steps": 182850, "total_steps": 204665, "loss": 0.0, "lr": 6.842800214474143e-08, "epoch": 4.46705592065082, "percentage": 89.34, "elapsed_time": "3:56:11", "remaining_time": "0:28:10", "throughput": 8692.24, "total_tokens": 123179176} +{"current_steps": 182855, "total_steps": 204665, "loss": 0.0, "lr": 6.839700227318468e-08, "epoch": 4.4671780714826665, "percentage": 89.34, "elapsed_time": "3:56:11", "remaining_time": "0:28:10", "throughput": 8692.26, "total_tokens": 123182376} +{"current_steps": 182860, "total_steps": 204665, "loss": 0.0, "lr": 6.836600917649538e-08, "epoch": 4.467300222314514, "percentage": 89.35, "elapsed_time": "3:56:11", "remaining_time": "0:28:09", "throughput": 8692.28, "total_tokens": 123185704} +{"current_steps": 182865, "total_steps": 204665, "loss": 0.0, "lr": 6.833502285489911e-08, "epoch": 4.467422373146361, "percentage": 89.35, "elapsed_time": "3:56:12", "remaining_time": "0:28:09", "throughput": 8692.3, "total_tokens": 123189032} +{"current_steps": 182870, "total_steps": 204665, "loss": 0.0, "lr": 6.830404330862104e-08, "epoch": 4.467544523978209, "percentage": 89.35, "elapsed_time": "3:56:12", "remaining_time": "0:28:09", "throughput": 8692.35, "total_tokens": 123192808} +{"current_steps": 182875, "total_steps": 204665, "loss": 0.0, "lr": 6.827307053788667e-08, "epoch": 4.467666674810055, "percentage": 89.35, "elapsed_time": "3:56:12", "remaining_time": "0:28:08", "throughput": 8692.37, "total_tokens": 123196200} +{"current_steps": 182880, "total_steps": 204665, "loss": 0.0, "lr": 6.8242104542921e-08, "epoch": 4.467788825641903, "percentage": 89.36, "elapsed_time": "3:56:13", "remaining_time": "0:28:08", "throughput": 8692.37, "total_tokens": 123199144} +{"current_steps": 182885, "total_steps": 204665, "loss": 0.0, "lr": 6.821114532394944e-08, "epoch": 4.46791097647375, "percentage": 89.36, "elapsed_time": "3:56:13", "remaining_time": "0:28:07", "throughput": 8692.43, "total_tokens": 123203048} +{"current_steps": 182890, "total_steps": 204665, "loss": 0.0, "lr": 6.818019288119714e-08, "epoch": 4.468033127305597, "percentage": 89.36, "elapsed_time": "3:56:13", "remaining_time": "0:28:07", "throughput": 8692.43, "total_tokens": 123205992} +{"current_steps": 182895, "total_steps": 204665, "loss": 0.0, "lr": 6.81492472148889e-08, "epoch": 4.468155278137444, "percentage": 89.36, "elapsed_time": "3:56:14", "remaining_time": "0:28:07", "throughput": 8692.45, "total_tokens": 123209256} +{"current_steps": 182900, "total_steps": 204665, "loss": 0.0, "lr": 6.811830832525023e-08, "epoch": 4.468277428969291, "percentage": 89.37, "elapsed_time": "3:56:14", "remaining_time": "0:28:06", "throughput": 8692.46, "total_tokens": 123212392} +{"current_steps": 182905, "total_steps": 204665, "loss": 0.0, "lr": 6.808737621250571e-08, "epoch": 4.468399579801138, "percentage": 89.37, "elapsed_time": "3:56:14", "remaining_time": "0:28:06", "throughput": 8692.48, "total_tokens": 123215784} +{"current_steps": 182910, "total_steps": 204665, "loss": 0.0, "lr": 6.805645087688039e-08, "epoch": 4.468521730632986, "percentage": 89.37, "elapsed_time": "3:56:15", "remaining_time": "0:28:05", "throughput": 8692.51, "total_tokens": 123219176} +{"current_steps": 182915, "total_steps": 204665, "loss": 0.0, "lr": 6.80255323185993e-08, "epoch": 4.468643881464833, "percentage": 89.37, "elapsed_time": "3:56:15", "remaining_time": "0:28:05", "throughput": 8692.52, "total_tokens": 123222440} +{"current_steps": 182920, "total_steps": 204665, "loss": 0.0, "lr": 6.799462053788718e-08, "epoch": 4.46876603229668, "percentage": 89.38, "elapsed_time": "3:56:16", "remaining_time": "0:28:05", "throughput": 8692.53, "total_tokens": 123225448} +{"current_steps": 182925, "total_steps": 204665, "loss": 0.0, "lr": 6.796371553496904e-08, "epoch": 4.468888183128527, "percentage": 89.38, "elapsed_time": "3:56:16", "remaining_time": "0:28:04", "throughput": 8692.53, "total_tokens": 123228456} +{"current_steps": 182930, "total_steps": 204665, "loss": 0.0348, "lr": 6.793281731006917e-08, "epoch": 4.469010333960374, "percentage": 89.38, "elapsed_time": "3:56:16", "remaining_time": "0:28:04", "throughput": 8692.55, "total_tokens": 123231720} +{"current_steps": 182935, "total_steps": 204665, "loss": 0.0, "lr": 6.790192586341282e-08, "epoch": 4.469132484792222, "percentage": 89.38, "elapsed_time": "3:56:17", "remaining_time": "0:28:04", "throughput": 8692.59, "total_tokens": 123235432} +{"current_steps": 182940, "total_steps": 204665, "loss": 0.0, "lr": 6.787104119522425e-08, "epoch": 4.469254635624068, "percentage": 89.39, "elapsed_time": "3:56:17", "remaining_time": "0:28:03", "throughput": 8692.63, "total_tokens": 123239144} +{"current_steps": 182945, "total_steps": 204665, "loss": 0.0, "lr": 6.784016330572816e-08, "epoch": 4.469376786455916, "percentage": 89.39, "elapsed_time": "3:56:17", "remaining_time": "0:28:03", "throughput": 8692.66, "total_tokens": 123242472} +{"current_steps": 182950, "total_steps": 204665, "loss": 0.0, "lr": 6.780929219514919e-08, "epoch": 4.469498937287763, "percentage": 89.39, "elapsed_time": "3:56:18", "remaining_time": "0:28:02", "throughput": 8692.67, "total_tokens": 123245608} +{"current_steps": 182955, "total_steps": 204665, "loss": 0.0, "lr": 6.777842786371157e-08, "epoch": 4.46962108811961, "percentage": 89.39, "elapsed_time": "3:56:18", "remaining_time": "0:28:02", "throughput": 8692.7, "total_tokens": 123249064} +{"current_steps": 182960, "total_steps": 204665, "loss": 0.0, "lr": 6.774757031164025e-08, "epoch": 4.469743238951457, "percentage": 89.39, "elapsed_time": "3:56:18", "remaining_time": "0:28:02", "throughput": 8692.7, "total_tokens": 123252136} +{"current_steps": 182965, "total_steps": 204665, "loss": 0.0, "lr": 6.771671953915914e-08, "epoch": 4.469865389783305, "percentage": 89.4, "elapsed_time": "3:56:19", "remaining_time": "0:28:01", "throughput": 8692.74, "total_tokens": 123255784} +{"current_steps": 182970, "total_steps": 204665, "loss": 0.0, "lr": 6.768587554649286e-08, "epoch": 4.4699875406151515, "percentage": 89.4, "elapsed_time": "3:56:19", "remaining_time": "0:28:01", "throughput": 8692.78, "total_tokens": 123259304} +{"current_steps": 182975, "total_steps": 204665, "loss": 0.0, "lr": 6.765503833386566e-08, "epoch": 4.470109691446999, "percentage": 89.4, "elapsed_time": "3:56:19", "remaining_time": "0:28:00", "throughput": 8692.78, "total_tokens": 123262376} +{"current_steps": 182980, "total_steps": 204665, "loss": 0.0, "lr": 6.76242079015017e-08, "epoch": 4.470231842278846, "percentage": 89.4, "elapsed_time": "3:56:20", "remaining_time": "0:28:00", "throughput": 8692.79, "total_tokens": 123265384} +{"current_steps": 182985, "total_steps": 204665, "loss": 0.0, "lr": 6.759338424962546e-08, "epoch": 4.4703539931106935, "percentage": 89.41, "elapsed_time": "3:56:20", "remaining_time": "0:28:00", "throughput": 8692.82, "total_tokens": 123268904} +{"current_steps": 182990, "total_steps": 204665, "loss": 0.0, "lr": 6.756256737846067e-08, "epoch": 4.47047614394254, "percentage": 89.41, "elapsed_time": "3:56:20", "remaining_time": "0:27:59", "throughput": 8692.83, "total_tokens": 123272040} +{"current_steps": 182995, "total_steps": 204665, "loss": 0.0, "lr": 6.753175728823191e-08, "epoch": 4.470598294774387, "percentage": 89.41, "elapsed_time": "3:56:21", "remaining_time": "0:27:59", "throughput": 8692.87, "total_tokens": 123275624} +{"current_steps": 183000, "total_steps": 204665, "loss": 0.0, "lr": 6.750095397916289e-08, "epoch": 4.470720445606235, "percentage": 89.41, "elapsed_time": "3:56:21", "remaining_time": "0:27:58", "throughput": 8692.91, "total_tokens": 123279144} +{"current_steps": 183005, "total_steps": 204665, "loss": 0.0, "lr": 6.747015745147777e-08, "epoch": 4.470842596438081, "percentage": 89.42, "elapsed_time": "3:56:21", "remaining_time": "0:27:58", "throughput": 8692.93, "total_tokens": 123282536} +{"current_steps": 183010, "total_steps": 204665, "loss": 0.0, "lr": 6.74393677054006e-08, "epoch": 4.470964747269929, "percentage": 89.42, "elapsed_time": "3:56:22", "remaining_time": "0:27:58", "throughput": 8692.96, "total_tokens": 123285928} +{"current_steps": 183015, "total_steps": 204665, "loss": 0.0, "lr": 6.740858474115496e-08, "epoch": 4.471086898101776, "percentage": 89.42, "elapsed_time": "3:56:22", "remaining_time": "0:27:57", "throughput": 8693.0, "total_tokens": 123289576} +{"current_steps": 183020, "total_steps": 204665, "loss": 0.0, "lr": 6.737780855896513e-08, "epoch": 4.471209048933623, "percentage": 89.42, "elapsed_time": "3:56:22", "remaining_time": "0:27:57", "throughput": 8693.04, "total_tokens": 123293160} +{"current_steps": 183025, "total_steps": 204665, "loss": 0.0, "lr": 6.73470391590546e-08, "epoch": 4.47133119976547, "percentage": 89.43, "elapsed_time": "3:56:23", "remaining_time": "0:27:56", "throughput": 8693.08, "total_tokens": 123296808} +{"current_steps": 183030, "total_steps": 204665, "loss": 0.0, "lr": 6.73162765416474e-08, "epoch": 4.471453350597318, "percentage": 89.43, "elapsed_time": "3:56:23", "remaining_time": "0:27:56", "throughput": 8693.08, "total_tokens": 123299752} +{"current_steps": 183035, "total_steps": 204665, "loss": 0.0, "lr": 6.728552070696691e-08, "epoch": 4.4715755014291645, "percentage": 89.43, "elapsed_time": "3:56:24", "remaining_time": "0:27:56", "throughput": 8693.13, "total_tokens": 123303592} +{"current_steps": 183040, "total_steps": 204665, "loss": 0.0, "lr": 6.725477165523719e-08, "epoch": 4.471697652261012, "percentage": 89.43, "elapsed_time": "3:56:24", "remaining_time": "0:27:55", "throughput": 8693.18, "total_tokens": 123307304} +{"current_steps": 183045, "total_steps": 204665, "loss": 0.0, "lr": 6.722402938668158e-08, "epoch": 4.471819803092859, "percentage": 89.44, "elapsed_time": "3:56:24", "remaining_time": "0:27:55", "throughput": 8693.21, "total_tokens": 123310824} +{"current_steps": 183050, "total_steps": 204665, "loss": 0.0, "lr": 6.719329390152361e-08, "epoch": 4.4719419539247065, "percentage": 89.44, "elapsed_time": "3:56:25", "remaining_time": "0:27:55", "throughput": 8693.24, "total_tokens": 123314280} +{"current_steps": 183055, "total_steps": 204665, "loss": 0.0, "lr": 6.716256519998698e-08, "epoch": 4.472064104756553, "percentage": 89.44, "elapsed_time": "3:56:25", "remaining_time": "0:27:54", "throughput": 8693.29, "total_tokens": 123318056} +{"current_steps": 183060, "total_steps": 204665, "loss": 0.0, "lr": 6.713184328229505e-08, "epoch": 4.472186255588401, "percentage": 89.44, "elapsed_time": "3:56:25", "remaining_time": "0:27:54", "throughput": 8693.32, "total_tokens": 123321512} +{"current_steps": 183065, "total_steps": 204665, "loss": 0.0, "lr": 6.71011281486712e-08, "epoch": 4.472308406420248, "percentage": 89.45, "elapsed_time": "3:56:26", "remaining_time": "0:27:53", "throughput": 8693.34, "total_tokens": 123324840} +{"current_steps": 183070, "total_steps": 204665, "loss": 0.0606, "lr": 6.707041979933903e-08, "epoch": 4.472430557252095, "percentage": 89.45, "elapsed_time": "3:56:26", "remaining_time": "0:27:53", "throughput": 8693.35, "total_tokens": 123327976} +{"current_steps": 183075, "total_steps": 204665, "loss": 0.0, "lr": 6.703971823452149e-08, "epoch": 4.472552708083942, "percentage": 89.45, "elapsed_time": "3:56:26", "remaining_time": "0:27:53", "throughput": 8693.36, "total_tokens": 123331112} +{"current_steps": 183080, "total_steps": 204665, "loss": 0.0, "lr": 6.700902345444226e-08, "epoch": 4.47267485891579, "percentage": 89.45, "elapsed_time": "3:56:27", "remaining_time": "0:27:52", "throughput": 8693.38, "total_tokens": 123334440} +{"current_steps": 183085, "total_steps": 204665, "loss": 0.0, "lr": 6.697833545932419e-08, "epoch": 4.472797009747636, "percentage": 89.46, "elapsed_time": "3:56:27", "remaining_time": "0:27:52", "throughput": 8693.52, "total_tokens": 123339944} +{"current_steps": 183090, "total_steps": 204665, "loss": 0.0, "lr": 6.694765424939075e-08, "epoch": 4.472919160579483, "percentage": 89.46, "elapsed_time": "3:56:27", "remaining_time": "0:27:51", "throughput": 8693.54, "total_tokens": 123343208} +{"current_steps": 183095, "total_steps": 204665, "loss": 0.0, "lr": 6.691697982486478e-08, "epoch": 4.473041311411331, "percentage": 89.46, "elapsed_time": "3:56:28", "remaining_time": "0:27:51", "throughput": 8693.58, "total_tokens": 123346728} +{"current_steps": 183100, "total_steps": 204665, "loss": 0.0003, "lr": 6.688631218596951e-08, "epoch": 4.4731634622431775, "percentage": 89.46, "elapsed_time": "3:56:28", "remaining_time": "0:27:51", "throughput": 8693.59, "total_tokens": 123349992} +{"current_steps": 183105, "total_steps": 204665, "loss": 0.0, "lr": 6.685565133292814e-08, "epoch": 4.473285613075025, "percentage": 89.47, "elapsed_time": "3:56:28", "remaining_time": "0:27:50", "throughput": 8693.6, "total_tokens": 123353064} +{"current_steps": 183110, "total_steps": 204665, "loss": 0.0, "lr": 6.682499726596336e-08, "epoch": 4.473407763906872, "percentage": 89.47, "elapsed_time": "3:56:29", "remaining_time": "0:27:50", "throughput": 8693.61, "total_tokens": 123356072} +{"current_steps": 183115, "total_steps": 204665, "loss": 0.0, "lr": 6.67943499852982e-08, "epoch": 4.4735299147387195, "percentage": 89.47, "elapsed_time": "3:56:29", "remaining_time": "0:27:49", "throughput": 8693.63, "total_tokens": 123359336} +{"current_steps": 183120, "total_steps": 204665, "loss": 0.0, "lr": 6.67637094911555e-08, "epoch": 4.473652065570566, "percentage": 89.47, "elapsed_time": "3:56:29", "remaining_time": "0:27:49", "throughput": 8693.65, "total_tokens": 123362664} +{"current_steps": 183125, "total_steps": 204665, "loss": 0.0193, "lr": 6.67330757837582e-08, "epoch": 4.473774216402414, "percentage": 89.48, "elapsed_time": "3:56:30", "remaining_time": "0:27:49", "throughput": 8693.67, "total_tokens": 123365992} +{"current_steps": 183130, "total_steps": 204665, "loss": 0.0, "lr": 6.670244886332888e-08, "epoch": 4.473896367234261, "percentage": 89.48, "elapsed_time": "3:56:30", "remaining_time": "0:27:48", "throughput": 8693.68, "total_tokens": 123369128} +{"current_steps": 183135, "total_steps": 204665, "loss": 0.0, "lr": 6.667182873009047e-08, "epoch": 4.474018518066108, "percentage": 89.48, "elapsed_time": "3:56:31", "remaining_time": "0:27:48", "throughput": 8693.71, "total_tokens": 123372648} +{"current_steps": 183140, "total_steps": 204665, "loss": 0.0002, "lr": 6.664121538426548e-08, "epoch": 4.474140668897955, "percentage": 89.48, "elapsed_time": "3:56:31", "remaining_time": "0:27:47", "throughput": 8693.75, "total_tokens": 123376168} +{"current_steps": 183145, "total_steps": 204665, "loss": 0.0, "lr": 6.661060882607672e-08, "epoch": 4.474262819729803, "percentage": 89.49, "elapsed_time": "3:56:31", "remaining_time": "0:27:47", "throughput": 8693.79, "total_tokens": 123379880} +{"current_steps": 183150, "total_steps": 204665, "loss": 0.0, "lr": 6.658000905574657e-08, "epoch": 4.474384970561649, "percentage": 89.49, "elapsed_time": "3:56:32", "remaining_time": "0:27:47", "throughput": 8693.81, "total_tokens": 123383208} +{"current_steps": 183155, "total_steps": 204665, "loss": 0.0, "lr": 6.654941607349773e-08, "epoch": 4.474507121393497, "percentage": 89.49, "elapsed_time": "3:56:32", "remaining_time": "0:27:46", "throughput": 8693.81, "total_tokens": 123386088} +{"current_steps": 183160, "total_steps": 204665, "loss": 0.0, "lr": 6.651882987955249e-08, "epoch": 4.474629272225344, "percentage": 89.49, "elapsed_time": "3:56:32", "remaining_time": "0:27:46", "throughput": 8693.82, "total_tokens": 123389288} +{"current_steps": 183165, "total_steps": 204665, "loss": 0.0, "lr": 6.648825047413353e-08, "epoch": 4.474751423057191, "percentage": 89.5, "elapsed_time": "3:56:33", "remaining_time": "0:27:45", "throughput": 8693.83, "total_tokens": 123392488} +{"current_steps": 183170, "total_steps": 204665, "loss": 0.0, "lr": 6.645767785746292e-08, "epoch": 4.474873573889038, "percentage": 89.5, "elapsed_time": "3:56:33", "remaining_time": "0:27:45", "throughput": 8693.86, "total_tokens": 123395944} +{"current_steps": 183175, "total_steps": 204665, "loss": 0.0, "lr": 6.642711202976336e-08, "epoch": 4.474995724720886, "percentage": 89.5, "elapsed_time": "3:56:33", "remaining_time": "0:27:45", "throughput": 8693.89, "total_tokens": 123399400} +{"current_steps": 183180, "total_steps": 204665, "loss": 0.0, "lr": 6.63965529912568e-08, "epoch": 4.4751178755527325, "percentage": 89.5, "elapsed_time": "3:56:34", "remaining_time": "0:27:44", "throughput": 8693.93, "total_tokens": 123402920} +{"current_steps": 183185, "total_steps": 204665, "loss": 0.0, "lr": 6.636600074216569e-08, "epoch": 4.475240026384579, "percentage": 89.5, "elapsed_time": "3:56:34", "remaining_time": "0:27:44", "throughput": 8693.95, "total_tokens": 123406248} +{"current_steps": 183190, "total_steps": 204665, "loss": 0.0, "lr": 6.633545528271211e-08, "epoch": 4.475362177216427, "percentage": 89.51, "elapsed_time": "3:56:34", "remaining_time": "0:27:44", "throughput": 8693.99, "total_tokens": 123410024} +{"current_steps": 183195, "total_steps": 204665, "loss": 0.0399, "lr": 6.63049166131181e-08, "epoch": 4.475484328048274, "percentage": 89.51, "elapsed_time": "3:56:35", "remaining_time": "0:27:43", "throughput": 8694.03, "total_tokens": 123413608} +{"current_steps": 183200, "total_steps": 204665, "loss": 0.0, "lr": 6.627438473360614e-08, "epoch": 4.475606478880121, "percentage": 89.51, "elapsed_time": "3:56:35", "remaining_time": "0:27:43", "throughput": 8694.03, "total_tokens": 123416552} +{"current_steps": 183205, "total_steps": 204665, "loss": 0.0, "lr": 6.624385964439782e-08, "epoch": 4.475728629711968, "percentage": 89.51, "elapsed_time": "3:56:35", "remaining_time": "0:27:42", "throughput": 8694.04, "total_tokens": 123419752} +{"current_steps": 183210, "total_steps": 204665, "loss": 0.0, "lr": 6.621334134571543e-08, "epoch": 4.475850780543816, "percentage": 89.52, "elapsed_time": "3:56:36", "remaining_time": "0:27:42", "throughput": 8694.07, "total_tokens": 123423144} +{"current_steps": 183215, "total_steps": 204665, "loss": 0.0, "lr": 6.618282983778056e-08, "epoch": 4.475972931375662, "percentage": 89.52, "elapsed_time": "3:56:36", "remaining_time": "0:27:42", "throughput": 8694.09, "total_tokens": 123426536} +{"current_steps": 183220, "total_steps": 204665, "loss": 0.0, "lr": 6.615232512081536e-08, "epoch": 4.47609508220751, "percentage": 89.52, "elapsed_time": "3:56:36", "remaining_time": "0:27:41", "throughput": 8694.13, "total_tokens": 123430184} +{"current_steps": 183225, "total_steps": 204665, "loss": 0.0, "lr": 6.612182719504189e-08, "epoch": 4.476217233039357, "percentage": 89.52, "elapsed_time": "3:56:37", "remaining_time": "0:27:41", "throughput": 8694.15, "total_tokens": 123433512} +{"current_steps": 183230, "total_steps": 204665, "loss": 0.0, "lr": 6.609133606068141e-08, "epoch": 4.4763393838712044, "percentage": 89.53, "elapsed_time": "3:56:37", "remaining_time": "0:27:40", "throughput": 8694.16, "total_tokens": 123436584} +{"current_steps": 183235, "total_steps": 204665, "loss": 0.0, "lr": 6.606085171795606e-08, "epoch": 4.476461534703051, "percentage": 89.53, "elapsed_time": "3:56:37", "remaining_time": "0:27:40", "throughput": 8694.19, "total_tokens": 123440040} +{"current_steps": 183240, "total_steps": 204665, "loss": 0.0007, "lr": 6.603037416708734e-08, "epoch": 4.476583685534899, "percentage": 89.53, "elapsed_time": "3:56:38", "remaining_time": "0:27:40", "throughput": 8694.21, "total_tokens": 123443368} +{"current_steps": 183245, "total_steps": 204665, "loss": 0.0, "lr": 6.599990340829697e-08, "epoch": 4.476705836366746, "percentage": 89.53, "elapsed_time": "3:56:38", "remaining_time": "0:27:39", "throughput": 8694.24, "total_tokens": 123446760} +{"current_steps": 183250, "total_steps": 204665, "loss": 0.0, "lr": 6.596943944180666e-08, "epoch": 4.476827987198593, "percentage": 89.54, "elapsed_time": "3:56:39", "remaining_time": "0:27:39", "throughput": 8694.28, "total_tokens": 123450408} +{"current_steps": 183255, "total_steps": 204665, "loss": 0.0, "lr": 6.593898226783757e-08, "epoch": 4.47695013803044, "percentage": 89.54, "elapsed_time": "3:56:39", "remaining_time": "0:27:38", "throughput": 8694.3, "total_tokens": 123453736} +{"current_steps": 183260, "total_steps": 204665, "loss": 0.0, "lr": 6.590853188661161e-08, "epoch": 4.477072288862287, "percentage": 89.54, "elapsed_time": "3:56:39", "remaining_time": "0:27:38", "throughput": 8694.32, "total_tokens": 123457064} +{"current_steps": 183265, "total_steps": 204665, "loss": 0.0, "lr": 6.587808829834984e-08, "epoch": 4.477194439694134, "percentage": 89.54, "elapsed_time": "3:56:40", "remaining_time": "0:27:38", "throughput": 8694.36, "total_tokens": 123460712} +{"current_steps": 183270, "total_steps": 204665, "loss": 0.0, "lr": 6.584765150327409e-08, "epoch": 4.477316590525981, "percentage": 89.55, "elapsed_time": "3:56:40", "remaining_time": "0:27:37", "throughput": 8694.37, "total_tokens": 123463848} +{"current_steps": 183275, "total_steps": 204665, "loss": 0.0, "lr": 6.58172215016053e-08, "epoch": 4.477438741357829, "percentage": 89.55, "elapsed_time": "3:56:40", "remaining_time": "0:27:37", "throughput": 8694.39, "total_tokens": 123467176} +{"current_steps": 183280, "total_steps": 204665, "loss": 0.0, "lr": 6.578679829356514e-08, "epoch": 4.4775608921896755, "percentage": 89.55, "elapsed_time": "3:56:41", "remaining_time": "0:27:36", "throughput": 8694.41, "total_tokens": 123470440} +{"current_steps": 183285, "total_steps": 204665, "loss": 0.0, "lr": 6.575638187937437e-08, "epoch": 4.477683043021523, "percentage": 89.55, "elapsed_time": "3:56:41", "remaining_time": "0:27:36", "throughput": 8694.43, "total_tokens": 123473832} +{"current_steps": 183290, "total_steps": 204665, "loss": 0.0, "lr": 6.572597225925458e-08, "epoch": 4.47780519385337, "percentage": 89.56, "elapsed_time": "3:56:41", "remaining_time": "0:27:36", "throughput": 8694.5, "total_tokens": 123477928} +{"current_steps": 183295, "total_steps": 204665, "loss": 0.0, "lr": 6.569556943342691e-08, "epoch": 4.4779273446852175, "percentage": 89.56, "elapsed_time": "3:56:42", "remaining_time": "0:27:35", "throughput": 8694.53, "total_tokens": 123481512} +{"current_steps": 183300, "total_steps": 204665, "loss": 0.0009, "lr": 6.56651734021123e-08, "epoch": 4.478049495517064, "percentage": 89.56, "elapsed_time": "3:56:42", "remaining_time": "0:27:35", "throughput": 8694.58, "total_tokens": 123485352} +{"current_steps": 183305, "total_steps": 204665, "loss": 0.0, "lr": 6.563478416553192e-08, "epoch": 4.478171646348912, "percentage": 89.56, "elapsed_time": "3:56:42", "remaining_time": "0:27:35", "throughput": 8694.58, "total_tokens": 123488232} +{"current_steps": 183310, "total_steps": 204665, "loss": 0.0, "lr": 6.560440172390658e-08, "epoch": 4.478293797180759, "percentage": 89.57, "elapsed_time": "3:56:43", "remaining_time": "0:27:34", "throughput": 8694.61, "total_tokens": 123491688} +{"current_steps": 183315, "total_steps": 204665, "loss": 0.0, "lr": 6.557402607745733e-08, "epoch": 4.478415948012606, "percentage": 89.57, "elapsed_time": "3:56:43", "remaining_time": "0:27:34", "throughput": 8694.62, "total_tokens": 123494888} +{"current_steps": 183320, "total_steps": 204665, "loss": 0.0, "lr": 6.55436572264052e-08, "epoch": 4.478538098844453, "percentage": 89.57, "elapsed_time": "3:56:43", "remaining_time": "0:27:33", "throughput": 8694.65, "total_tokens": 123498344} +{"current_steps": 183325, "total_steps": 204665, "loss": 0.0, "lr": 6.551329517097092e-08, "epoch": 4.478660249676301, "percentage": 89.57, "elapsed_time": "3:56:44", "remaining_time": "0:27:33", "throughput": 8694.68, "total_tokens": 123501800} +{"current_steps": 183330, "total_steps": 204665, "loss": 0.0, "lr": 6.548293991137533e-08, "epoch": 4.478782400508147, "percentage": 89.58, "elapsed_time": "3:56:44", "remaining_time": "0:27:33", "throughput": 8694.72, "total_tokens": 123505448} +{"current_steps": 183335, "total_steps": 204665, "loss": 0.0, "lr": 6.54525914478391e-08, "epoch": 4.478904551339995, "percentage": 89.58, "elapsed_time": "3:56:45", "remaining_time": "0:27:32", "throughput": 8694.72, "total_tokens": 123508520} +{"current_steps": 183340, "total_steps": 204665, "loss": 0.0, "lr": 6.542224978058309e-08, "epoch": 4.479026702171842, "percentage": 89.58, "elapsed_time": "3:56:45", "remaining_time": "0:27:32", "throughput": 8694.74, "total_tokens": 123511912} +{"current_steps": 183345, "total_steps": 204665, "loss": 0.0, "lr": 6.539191490982766e-08, "epoch": 4.479148853003689, "percentage": 89.58, "elapsed_time": "3:56:45", "remaining_time": "0:27:31", "throughput": 8694.75, "total_tokens": 123514984} +{"current_steps": 183350, "total_steps": 204665, "loss": 0.0, "lr": 6.536158683579374e-08, "epoch": 4.479271003835536, "percentage": 89.59, "elapsed_time": "3:56:46", "remaining_time": "0:27:31", "throughput": 8694.75, "total_tokens": 123517928} +{"current_steps": 183355, "total_steps": 204665, "loss": 0.0001, "lr": 6.533126555870183e-08, "epoch": 4.479393154667383, "percentage": 89.59, "elapsed_time": "3:56:46", "remaining_time": "0:27:31", "throughput": 8694.77, "total_tokens": 123521192} +{"current_steps": 183360, "total_steps": 204665, "loss": 0.0, "lr": 6.53009510787722e-08, "epoch": 4.4795153054992305, "percentage": 89.59, "elapsed_time": "3:56:46", "remaining_time": "0:27:30", "throughput": 8694.78, "total_tokens": 123524264} +{"current_steps": 183365, "total_steps": 204665, "loss": 0.0, "lr": 6.527064339622557e-08, "epoch": 4.479637456331077, "percentage": 89.59, "elapsed_time": "3:56:47", "remaining_time": "0:27:30", "throughput": 8694.79, "total_tokens": 123527464} +{"current_steps": 183370, "total_steps": 204665, "loss": 0.0, "lr": 6.524034251128207e-08, "epoch": 4.479759607162925, "percentage": 89.6, "elapsed_time": "3:56:47", "remaining_time": "0:27:29", "throughput": 8694.82, "total_tokens": 123530984} +{"current_steps": 183375, "total_steps": 204665, "loss": 0.0, "lr": 6.521004842416222e-08, "epoch": 4.479881757994772, "percentage": 89.6, "elapsed_time": "3:56:47", "remaining_time": "0:27:29", "throughput": 8694.85, "total_tokens": 123534376} +{"current_steps": 183380, "total_steps": 204665, "loss": 0.0, "lr": 6.51797611350865e-08, "epoch": 4.480003908826619, "percentage": 89.6, "elapsed_time": "3:56:48", "remaining_time": "0:27:29", "throughput": 8694.89, "total_tokens": 123538024} +{"current_steps": 183385, "total_steps": 204665, "loss": 0.0, "lr": 6.514948064427484e-08, "epoch": 4.480126059658466, "percentage": 89.6, "elapsed_time": "3:56:48", "remaining_time": "0:27:28", "throughput": 8694.91, "total_tokens": 123541352} +{"current_steps": 183390, "total_steps": 204665, "loss": 0.0, "lr": 6.511920695194783e-08, "epoch": 4.480248210490314, "percentage": 89.6, "elapsed_time": "3:56:48", "remaining_time": "0:27:28", "throughput": 8694.92, "total_tokens": 123544552} +{"current_steps": 183395, "total_steps": 204665, "loss": 0.0, "lr": 6.50889400583251e-08, "epoch": 4.48037036132216, "percentage": 89.61, "elapsed_time": "3:56:49", "remaining_time": "0:27:27", "throughput": 8694.95, "total_tokens": 123548072} +{"current_steps": 183400, "total_steps": 204665, "loss": 0.0, "lr": 6.505867996362735e-08, "epoch": 4.480492512154008, "percentage": 89.61, "elapsed_time": "3:56:49", "remaining_time": "0:27:27", "throughput": 8694.99, "total_tokens": 123551656} +{"current_steps": 183405, "total_steps": 204665, "loss": 0.0, "lr": 6.502842666807406e-08, "epoch": 4.480614662985855, "percentage": 89.61, "elapsed_time": "3:56:49", "remaining_time": "0:27:27", "throughput": 8695.01, "total_tokens": 123555048} +{"current_steps": 183410, "total_steps": 204665, "loss": 0.05, "lr": 6.499818017188563e-08, "epoch": 4.480736813817702, "percentage": 89.61, "elapsed_time": "3:56:50", "remaining_time": "0:27:26", "throughput": 8695.02, "total_tokens": 123558184} +{"current_steps": 183415, "total_steps": 204665, "loss": 0.0, "lr": 6.49679404752821e-08, "epoch": 4.480858964649549, "percentage": 89.62, "elapsed_time": "3:56:50", "remaining_time": "0:27:26", "throughput": 8695.04, "total_tokens": 123561512} +{"current_steps": 183420, "total_steps": 204665, "loss": 0.0, "lr": 6.493770757848294e-08, "epoch": 4.480981115481397, "percentage": 89.62, "elapsed_time": "3:56:50", "remaining_time": "0:27:26", "throughput": 8695.09, "total_tokens": 123565288} +{"current_steps": 183425, "total_steps": 204665, "loss": 0.0, "lr": 6.490748148170844e-08, "epoch": 4.4811032663132435, "percentage": 89.62, "elapsed_time": "3:56:51", "remaining_time": "0:27:25", "throughput": 8695.11, "total_tokens": 123568552} +{"current_steps": 183430, "total_steps": 204665, "loss": 0.0, "lr": 6.48772621851782e-08, "epoch": 4.481225417145091, "percentage": 89.62, "elapsed_time": "3:56:51", "remaining_time": "0:27:25", "throughput": 8695.14, "total_tokens": 123572136} +{"current_steps": 183435, "total_steps": 204665, "loss": 0.0002, "lr": 6.484704968911204e-08, "epoch": 4.481347567976938, "percentage": 89.63, "elapsed_time": "3:56:51", "remaining_time": "0:27:24", "throughput": 8695.16, "total_tokens": 123575464} +{"current_steps": 183440, "total_steps": 204665, "loss": 0.0, "lr": 6.481684399372955e-08, "epoch": 4.4814697188087855, "percentage": 89.63, "elapsed_time": "3:56:52", "remaining_time": "0:27:24", "throughput": 8695.2, "total_tokens": 123579176} +{"current_steps": 183445, "total_steps": 204665, "loss": 0.0, "lr": 6.47866450992507e-08, "epoch": 4.481591869640632, "percentage": 89.63, "elapsed_time": "3:56:52", "remaining_time": "0:27:24", "throughput": 8695.23, "total_tokens": 123582504} +{"current_steps": 183450, "total_steps": 204665, "loss": 0.0, "lr": 6.475645300589472e-08, "epoch": 4.481714020472479, "percentage": 89.63, "elapsed_time": "3:56:53", "remaining_time": "0:27:23", "throughput": 8695.25, "total_tokens": 123585832} +{"current_steps": 183455, "total_steps": 204665, "loss": 0.0, "lr": 6.472626771388156e-08, "epoch": 4.481836171304327, "percentage": 89.64, "elapsed_time": "3:56:53", "remaining_time": "0:27:23", "throughput": 8695.28, "total_tokens": 123589352} +{"current_steps": 183460, "total_steps": 204665, "loss": 0.0005, "lr": 6.46960892234304e-08, "epoch": 4.481958322136173, "percentage": 89.64, "elapsed_time": "3:56:53", "remaining_time": "0:27:22", "throughput": 8695.31, "total_tokens": 123592808} +{"current_steps": 183465, "total_steps": 204665, "loss": 0.0, "lr": 6.466591753476092e-08, "epoch": 4.482080472968021, "percentage": 89.64, "elapsed_time": "3:56:54", "remaining_time": "0:27:22", "throughput": 8695.32, "total_tokens": 123596008} +{"current_steps": 183470, "total_steps": 204665, "loss": 0.0, "lr": 6.463575264809229e-08, "epoch": 4.482202623799868, "percentage": 89.64, "elapsed_time": "3:56:54", "remaining_time": "0:27:22", "throughput": 8695.36, "total_tokens": 123599592} +{"current_steps": 183475, "total_steps": 204665, "loss": 0.0, "lr": 6.460559456364434e-08, "epoch": 4.482324774631715, "percentage": 89.65, "elapsed_time": "3:56:54", "remaining_time": "0:27:21", "throughput": 8695.39, "total_tokens": 123603048} +{"current_steps": 183480, "total_steps": 204665, "loss": 0.0, "lr": 6.457544328163578e-08, "epoch": 4.482446925463562, "percentage": 89.65, "elapsed_time": "3:56:55", "remaining_time": "0:27:21", "throughput": 8695.4, "total_tokens": 123606120} +{"current_steps": 183485, "total_steps": 204665, "loss": 0.0, "lr": 6.454529880228643e-08, "epoch": 4.48256907629541, "percentage": 89.65, "elapsed_time": "3:56:55", "remaining_time": "0:27:20", "throughput": 8695.41, "total_tokens": 123609256} +{"current_steps": 183490, "total_steps": 204665, "loss": 0.0, "lr": 6.451516112581512e-08, "epoch": 4.4826912271272565, "percentage": 89.65, "elapsed_time": "3:56:55", "remaining_time": "0:27:20", "throughput": 8695.43, "total_tokens": 123612520} +{"current_steps": 183495, "total_steps": 204665, "loss": 0.0, "lr": 6.448503025244134e-08, "epoch": 4.482813377959104, "percentage": 89.66, "elapsed_time": "3:56:56", "remaining_time": "0:27:20", "throughput": 8695.45, "total_tokens": 123615912} +{"current_steps": 183500, "total_steps": 204665, "loss": 0.0, "lr": 6.445490618238392e-08, "epoch": 4.482935528790951, "percentage": 89.66, "elapsed_time": "3:56:56", "remaining_time": "0:27:19", "throughput": 8695.47, "total_tokens": 123619176} +{"current_steps": 183505, "total_steps": 204665, "loss": 0.0, "lr": 6.4424788915862e-08, "epoch": 4.4830576796227986, "percentage": 89.66, "elapsed_time": "3:56:56", "remaining_time": "0:27:19", "throughput": 8695.5, "total_tokens": 123622696} +{"current_steps": 183510, "total_steps": 204665, "loss": 0.0, "lr": 6.439467845309488e-08, "epoch": 4.483179830454645, "percentage": 89.66, "elapsed_time": "3:56:57", "remaining_time": "0:27:18", "throughput": 8695.52, "total_tokens": 123625960} +{"current_steps": 183515, "total_steps": 204665, "loss": 0.0, "lr": 6.436457479430101e-08, "epoch": 4.483301981286493, "percentage": 89.67, "elapsed_time": "3:56:57", "remaining_time": "0:27:18", "throughput": 8695.53, "total_tokens": 123629032} +{"current_steps": 183520, "total_steps": 204665, "loss": 0.0, "lr": 6.433447793969982e-08, "epoch": 4.48342413211834, "percentage": 89.67, "elapsed_time": "3:56:57", "remaining_time": "0:27:18", "throughput": 8695.56, "total_tokens": 123632488} +{"current_steps": 183525, "total_steps": 204665, "loss": 0.0, "lr": 6.430438788950987e-08, "epoch": 4.483546282950186, "percentage": 89.67, "elapsed_time": "3:56:58", "remaining_time": "0:27:17", "throughput": 8695.59, "total_tokens": 123635944} +{"current_steps": 183530, "total_steps": 204665, "loss": 0.0, "lr": 6.427430464395022e-08, "epoch": 4.483668433782034, "percentage": 89.67, "elapsed_time": "3:56:58", "remaining_time": "0:27:17", "throughput": 8695.62, "total_tokens": 123639464} +{"current_steps": 183535, "total_steps": 204665, "loss": 0.0001, "lr": 6.424422820323938e-08, "epoch": 4.483790584613882, "percentage": 89.68, "elapsed_time": "3:56:58", "remaining_time": "0:27:16", "throughput": 8695.67, "total_tokens": 123643304} +{"current_steps": 183540, "total_steps": 204665, "loss": 0.0, "lr": 6.421415856759616e-08, "epoch": 4.483912735445728, "percentage": 89.68, "elapsed_time": "3:56:59", "remaining_time": "0:27:16", "throughput": 8695.71, "total_tokens": 123646952} +{"current_steps": 183545, "total_steps": 204665, "loss": 0.0, "lr": 6.418409573723937e-08, "epoch": 4.484034886277575, "percentage": 89.68, "elapsed_time": "3:56:59", "remaining_time": "0:27:16", "throughput": 8695.73, "total_tokens": 123650280} +{"current_steps": 183550, "total_steps": 204665, "loss": 0.0, "lr": 6.415403971238741e-08, "epoch": 4.484157037109423, "percentage": 89.68, "elapsed_time": "3:56:59", "remaining_time": "0:27:15", "throughput": 8695.74, "total_tokens": 123653352} +{"current_steps": 183555, "total_steps": 204665, "loss": 0.0005, "lr": 6.412399049325922e-08, "epoch": 4.48427918794127, "percentage": 89.69, "elapsed_time": "3:57:00", "remaining_time": "0:27:15", "throughput": 8695.75, "total_tokens": 123656552} +{"current_steps": 183560, "total_steps": 204665, "loss": 0.0, "lr": 6.409394808007307e-08, "epoch": 4.484401338773117, "percentage": 89.69, "elapsed_time": "3:57:00", "remaining_time": "0:27:15", "throughput": 8695.77, "total_tokens": 123659752} +{"current_steps": 183565, "total_steps": 204665, "loss": 0.0, "lr": 6.406391247304732e-08, "epoch": 4.484523489604964, "percentage": 89.69, "elapsed_time": "3:57:01", "remaining_time": "0:27:14", "throughput": 8695.82, "total_tokens": 123663592} +{"current_steps": 183570, "total_steps": 204665, "loss": 0.0, "lr": 6.403388367240059e-08, "epoch": 4.484645640436812, "percentage": 89.69, "elapsed_time": "3:57:01", "remaining_time": "0:27:14", "throughput": 8695.83, "total_tokens": 123666728} +{"current_steps": 183575, "total_steps": 204665, "loss": 0.0, "lr": 6.400386167835115e-08, "epoch": 4.484767791268658, "percentage": 89.7, "elapsed_time": "3:57:01", "remaining_time": "0:27:13", "throughput": 8695.85, "total_tokens": 123669992} +{"current_steps": 183580, "total_steps": 204665, "loss": 0.0, "lr": 6.397384649111748e-08, "epoch": 4.484889942100506, "percentage": 89.7, "elapsed_time": "3:57:02", "remaining_time": "0:27:13", "throughput": 8695.89, "total_tokens": 123673640} +{"current_steps": 183585, "total_steps": 204665, "loss": 0.0, "lr": 6.394383811091764e-08, "epoch": 4.485012092932353, "percentage": 89.7, "elapsed_time": "3:57:02", "remaining_time": "0:27:13", "throughput": 8695.91, "total_tokens": 123677032} +{"current_steps": 183590, "total_steps": 204665, "loss": 0.0, "lr": 6.39138365379701e-08, "epoch": 4.4851342437642, "percentage": 89.7, "elapsed_time": "3:57:02", "remaining_time": "0:27:12", "throughput": 8695.92, "total_tokens": 123680168} +{"current_steps": 183595, "total_steps": 204665, "loss": 0.0, "lr": 6.388384177249273e-08, "epoch": 4.485256394596047, "percentage": 89.71, "elapsed_time": "3:57:03", "remaining_time": "0:27:12", "throughput": 8695.94, "total_tokens": 123683432} +{"current_steps": 183600, "total_steps": 204665, "loss": 0.0, "lr": 6.385385381470388e-08, "epoch": 4.485378545427895, "percentage": 89.71, "elapsed_time": "3:57:03", "remaining_time": "0:27:11", "throughput": 8695.97, "total_tokens": 123686888} +{"current_steps": 183605, "total_steps": 204665, "loss": 0.0002, "lr": 6.382387266482182e-08, "epoch": 4.4855006962597415, "percentage": 89.71, "elapsed_time": "3:57:03", "remaining_time": "0:27:11", "throughput": 8696.0, "total_tokens": 123690280} +{"current_steps": 183610, "total_steps": 204665, "loss": 0.0, "lr": 6.379389832306415e-08, "epoch": 4.485622847091589, "percentage": 89.71, "elapsed_time": "3:57:04", "remaining_time": "0:27:11", "throughput": 8696.02, "total_tokens": 123693608} +{"current_steps": 183615, "total_steps": 204665, "loss": 0.0, "lr": 6.376393078964915e-08, "epoch": 4.485744997923436, "percentage": 89.71, "elapsed_time": "3:57:04", "remaining_time": "0:27:10", "throughput": 8696.04, "total_tokens": 123696936} +{"current_steps": 183620, "total_steps": 204665, "loss": 0.0, "lr": 6.373397006479464e-08, "epoch": 4.485867148755283, "percentage": 89.72, "elapsed_time": "3:57:04", "remaining_time": "0:27:10", "throughput": 8696.05, "total_tokens": 123700072} +{"current_steps": 183625, "total_steps": 204665, "loss": 0.0, "lr": 6.370401614871845e-08, "epoch": 4.48598929958713, "percentage": 89.72, "elapsed_time": "3:57:05", "remaining_time": "0:27:09", "throughput": 8696.07, "total_tokens": 123703336} +{"current_steps": 183630, "total_steps": 204665, "loss": 0.0, "lr": 6.367406904163863e-08, "epoch": 4.486111450418977, "percentage": 89.72, "elapsed_time": "3:57:05", "remaining_time": "0:27:09", "throughput": 8696.07, "total_tokens": 123706280} +{"current_steps": 183635, "total_steps": 204665, "loss": 0.0, "lr": 6.364412874377267e-08, "epoch": 4.486233601250825, "percentage": 89.72, "elapsed_time": "3:57:05", "remaining_time": "0:27:09", "throughput": 8696.09, "total_tokens": 123709672} +{"current_steps": 183640, "total_steps": 204665, "loss": 0.0, "lr": 6.361419525533862e-08, "epoch": 4.486355752082671, "percentage": 89.73, "elapsed_time": "3:57:06", "remaining_time": "0:27:08", "throughput": 8696.09, "total_tokens": 123712680} +{"current_steps": 183645, "total_steps": 204665, "loss": 0.0, "lr": 6.358426857655386e-08, "epoch": 4.486477902914519, "percentage": 89.73, "elapsed_time": "3:57:06", "remaining_time": "0:27:08", "throughput": 8696.1, "total_tokens": 123715752} +{"current_steps": 183650, "total_steps": 204665, "loss": 0.0, "lr": 6.355434870763632e-08, "epoch": 4.486600053746366, "percentage": 89.73, "elapsed_time": "3:57:06", "remaining_time": "0:27:07", "throughput": 8696.14, "total_tokens": 123719400} +{"current_steps": 183655, "total_steps": 204665, "loss": 0.0, "lr": 6.352443564880328e-08, "epoch": 4.486722204578213, "percentage": 89.73, "elapsed_time": "3:57:07", "remaining_time": "0:27:07", "throughput": 8696.14, "total_tokens": 123722344} +{"current_steps": 183660, "total_steps": 204665, "loss": 0.0, "lr": 6.349452940027255e-08, "epoch": 4.48684435541006, "percentage": 89.74, "elapsed_time": "3:57:07", "remaining_time": "0:27:07", "throughput": 8696.17, "total_tokens": 123725864} +{"current_steps": 183665, "total_steps": 204665, "loss": 0.0, "lr": 6.346462996226155e-08, "epoch": 4.486966506241908, "percentage": 89.74, "elapsed_time": "3:57:07", "remaining_time": "0:27:06", "throughput": 8696.21, "total_tokens": 123729512} +{"current_steps": 183670, "total_steps": 204665, "loss": 0.0, "lr": 6.343473733498739e-08, "epoch": 4.4870886570737545, "percentage": 89.74, "elapsed_time": "3:57:08", "remaining_time": "0:27:06", "throughput": 8696.24, "total_tokens": 123732904} +{"current_steps": 183675, "total_steps": 204665, "loss": 0.0, "lr": 6.340485151866793e-08, "epoch": 4.487210807905602, "percentage": 89.74, "elapsed_time": "3:57:08", "remaining_time": "0:27:06", "throughput": 8696.25, "total_tokens": 123736168} +{"current_steps": 183680, "total_steps": 204665, "loss": 0.0, "lr": 6.33749725135202e-08, "epoch": 4.487332958737449, "percentage": 89.75, "elapsed_time": "3:57:09", "remaining_time": "0:27:05", "throughput": 8696.26, "total_tokens": 123739240} +{"current_steps": 183685, "total_steps": 204665, "loss": 0.0, "lr": 6.33451003197617e-08, "epoch": 4.4874551095692965, "percentage": 89.75, "elapsed_time": "3:57:09", "remaining_time": "0:27:05", "throughput": 8696.28, "total_tokens": 123742504} +{"current_steps": 183690, "total_steps": 204665, "loss": 0.0, "lr": 6.331523493760949e-08, "epoch": 4.487577260401143, "percentage": 89.75, "elapsed_time": "3:57:09", "remaining_time": "0:27:04", "throughput": 8696.3, "total_tokens": 123745896} +{"current_steps": 183695, "total_steps": 204665, "loss": 0.0, "lr": 6.328537636728071e-08, "epoch": 4.487699411232991, "percentage": 89.75, "elapsed_time": "3:57:10", "remaining_time": "0:27:04", "throughput": 8696.33, "total_tokens": 123749224} +{"current_steps": 183700, "total_steps": 204665, "loss": 0.0, "lr": 6.325552460899285e-08, "epoch": 4.487821562064838, "percentage": 89.76, "elapsed_time": "3:57:10", "remaining_time": "0:27:04", "throughput": 8696.34, "total_tokens": 123752360} +{"current_steps": 183705, "total_steps": 204665, "loss": 0.0, "lr": 6.322567966296255e-08, "epoch": 4.487943712896685, "percentage": 89.76, "elapsed_time": "3:57:10", "remaining_time": "0:27:03", "throughput": 8696.36, "total_tokens": 123755752} +{"current_steps": 183710, "total_steps": 204665, "loss": 0.0, "lr": 6.319584152940726e-08, "epoch": 4.488065863728532, "percentage": 89.76, "elapsed_time": "3:57:11", "remaining_time": "0:27:03", "throughput": 8696.41, "total_tokens": 123759528} +{"current_steps": 183715, "total_steps": 204665, "loss": 0.0, "lr": 6.316601020854362e-08, "epoch": 4.488188014560379, "percentage": 89.76, "elapsed_time": "3:57:11", "remaining_time": "0:27:02", "throughput": 8696.44, "total_tokens": 123762984} +{"current_steps": 183720, "total_steps": 204665, "loss": 0.0, "lr": 6.313618570058876e-08, "epoch": 4.488310165392226, "percentage": 89.77, "elapsed_time": "3:57:11", "remaining_time": "0:27:02", "throughput": 8696.45, "total_tokens": 123766120} +{"current_steps": 183725, "total_steps": 204665, "loss": 0.0, "lr": 6.310636800575975e-08, "epoch": 4.488432316224073, "percentage": 89.77, "elapsed_time": "3:57:12", "remaining_time": "0:27:02", "throughput": 8696.51, "total_tokens": 123770152} +{"current_steps": 183730, "total_steps": 204665, "loss": 0.0, "lr": 6.307655712427295e-08, "epoch": 4.488554467055921, "percentage": 89.77, "elapsed_time": "3:57:12", "remaining_time": "0:27:01", "throughput": 8696.52, "total_tokens": 123773288} +{"current_steps": 183735, "total_steps": 204665, "loss": 0.0, "lr": 6.304675305634566e-08, "epoch": 4.4886766178877675, "percentage": 89.77, "elapsed_time": "3:57:12", "remaining_time": "0:27:01", "throughput": 8696.54, "total_tokens": 123776552} +{"current_steps": 183740, "total_steps": 204665, "loss": 0.0, "lr": 6.301695580219424e-08, "epoch": 4.488798768719615, "percentage": 89.78, "elapsed_time": "3:57:13", "remaining_time": "0:27:00", "throughput": 8696.55, "total_tokens": 123779688} +{"current_steps": 183745, "total_steps": 204665, "loss": 0.0, "lr": 6.298716536203563e-08, "epoch": 4.488920919551462, "percentage": 89.78, "elapsed_time": "3:57:13", "remaining_time": "0:27:00", "throughput": 8696.58, "total_tokens": 123783208} +{"current_steps": 183750, "total_steps": 204665, "loss": 0.0, "lr": 6.295738173608622e-08, "epoch": 4.4890430703833095, "percentage": 89.78, "elapsed_time": "3:57:13", "remaining_time": "0:27:00", "throughput": 8696.59, "total_tokens": 123786344} +{"current_steps": 183755, "total_steps": 204665, "loss": 0.0, "lr": 6.292760492456284e-08, "epoch": 4.489165221215156, "percentage": 89.78, "elapsed_time": "3:57:14", "remaining_time": "0:26:59", "throughput": 8696.65, "total_tokens": 123790184} +{"current_steps": 183760, "total_steps": 204665, "loss": 0.0, "lr": 6.289783492768208e-08, "epoch": 4.489287372047004, "percentage": 89.79, "elapsed_time": "3:57:14", "remaining_time": "0:26:59", "throughput": 8696.66, "total_tokens": 123793320} +{"current_steps": 183765, "total_steps": 204665, "loss": 0.0, "lr": 6.286807174566033e-08, "epoch": 4.489409522878851, "percentage": 89.79, "elapsed_time": "3:57:14", "remaining_time": "0:26:58", "throughput": 8696.68, "total_tokens": 123796584} +{"current_steps": 183770, "total_steps": 204665, "loss": 0.0, "lr": 6.283831537871375e-08, "epoch": 4.489531673710698, "percentage": 89.79, "elapsed_time": "3:57:15", "remaining_time": "0:26:58", "throughput": 8696.68, "total_tokens": 123799528} +{"current_steps": 183775, "total_steps": 204665, "loss": 0.0, "lr": 6.280856582705929e-08, "epoch": 4.489653824542545, "percentage": 89.79, "elapsed_time": "3:57:15", "remaining_time": "0:26:58", "throughput": 8696.71, "total_tokens": 123802984} +{"current_steps": 183780, "total_steps": 204665, "loss": 0.0, "lr": 6.277882309091287e-08, "epoch": 4.489775975374393, "percentage": 89.8, "elapsed_time": "3:57:15", "remaining_time": "0:26:57", "throughput": 8696.72, "total_tokens": 123806184} +{"current_steps": 183785, "total_steps": 204665, "loss": 0.0, "lr": 6.274908717049099e-08, "epoch": 4.489898126206239, "percentage": 89.8, "elapsed_time": "3:57:16", "remaining_time": "0:26:57", "throughput": 8696.74, "total_tokens": 123809384} +{"current_steps": 183790, "total_steps": 204665, "loss": 0.0, "lr": 6.27193580660098e-08, "epoch": 4.490020277038087, "percentage": 89.8, "elapsed_time": "3:57:16", "remaining_time": "0:26:57", "throughput": 8696.75, "total_tokens": 123812520} +{"current_steps": 183795, "total_steps": 204665, "loss": 0.0, "lr": 6.26896357776856e-08, "epoch": 4.490142427869934, "percentage": 89.8, "elapsed_time": "3:57:16", "remaining_time": "0:26:56", "throughput": 8696.78, "total_tokens": 123815912} +{"current_steps": 183800, "total_steps": 204665, "loss": 0.0, "lr": 6.26599203057343e-08, "epoch": 4.490264578701781, "percentage": 89.81, "elapsed_time": "3:57:17", "remaining_time": "0:26:56", "throughput": 8696.8, "total_tokens": 123819240} +{"current_steps": 183805, "total_steps": 204665, "loss": 0.0, "lr": 6.263021165037241e-08, "epoch": 4.490386729533628, "percentage": 89.81, "elapsed_time": "3:57:17", "remaining_time": "0:26:55", "throughput": 8696.81, "total_tokens": 123822504} +{"current_steps": 183810, "total_steps": 204665, "loss": 0.0, "lr": 6.260050981181553e-08, "epoch": 4.490508880365475, "percentage": 89.81, "elapsed_time": "3:57:18", "remaining_time": "0:26:55", "throughput": 8696.84, "total_tokens": 123825832} +{"current_steps": 183815, "total_steps": 204665, "loss": 0.0, "lr": 6.257081479027993e-08, "epoch": 4.4906310311973225, "percentage": 89.81, "elapsed_time": "3:57:18", "remaining_time": "0:26:55", "throughput": 8696.86, "total_tokens": 123829160} +{"current_steps": 183820, "total_steps": 204665, "loss": 0.0, "lr": 6.254112658598165e-08, "epoch": 4.490753182029169, "percentage": 89.82, "elapsed_time": "3:57:18", "remaining_time": "0:26:54", "throughput": 8696.87, "total_tokens": 123832296} +{"current_steps": 183825, "total_steps": 204665, "loss": 0.0, "lr": 6.25114451991362e-08, "epoch": 4.490875332861017, "percentage": 89.82, "elapsed_time": "3:57:19", "remaining_time": "0:26:54", "throughput": 8696.91, "total_tokens": 123836072} +{"current_steps": 183830, "total_steps": 204665, "loss": 0.0, "lr": 6.248177062995997e-08, "epoch": 4.490997483692864, "percentage": 89.82, "elapsed_time": "3:57:19", "remaining_time": "0:26:53", "throughput": 8696.92, "total_tokens": 123839144} +{"current_steps": 183835, "total_steps": 204665, "loss": 0.0, "lr": 6.245210287866821e-08, "epoch": 4.491119634524711, "percentage": 89.82, "elapsed_time": "3:57:19", "remaining_time": "0:26:53", "throughput": 8696.94, "total_tokens": 123842408} +{"current_steps": 183840, "total_steps": 204665, "loss": 0.0, "lr": 6.242244194547707e-08, "epoch": 4.491241785356558, "percentage": 89.82, "elapsed_time": "3:57:20", "remaining_time": "0:26:53", "throughput": 8697.0, "total_tokens": 123846440} +{"current_steps": 183845, "total_steps": 204665, "loss": 0.0, "lr": 6.239278783060198e-08, "epoch": 4.491363936188406, "percentage": 89.83, "elapsed_time": "3:57:20", "remaining_time": "0:26:52", "throughput": 8697.0, "total_tokens": 123849448} +{"current_steps": 183850, "total_steps": 204665, "loss": 0.0, "lr": 6.236314053425873e-08, "epoch": 4.491486087020252, "percentage": 89.83, "elapsed_time": "3:57:20", "remaining_time": "0:26:52", "throughput": 8697.05, "total_tokens": 123853288} +{"current_steps": 183855, "total_steps": 204665, "loss": 0.0, "lr": 6.233350005666304e-08, "epoch": 4.4916082378521, "percentage": 89.83, "elapsed_time": "3:57:21", "remaining_time": "0:26:51", "throughput": 8697.09, "total_tokens": 123856872} +{"current_steps": 183860, "total_steps": 204665, "loss": 0.0, "lr": 6.230386639803031e-08, "epoch": 4.491730388683947, "percentage": 89.83, "elapsed_time": "3:57:21", "remaining_time": "0:26:51", "throughput": 8697.09, "total_tokens": 123859816} +{"current_steps": 183865, "total_steps": 204665, "loss": 0.0, "lr": 6.227423955857614e-08, "epoch": 4.491852539515794, "percentage": 89.84, "elapsed_time": "3:57:21", "remaining_time": "0:26:51", "throughput": 8697.14, "total_tokens": 123863528} +{"current_steps": 183870, "total_steps": 204665, "loss": 0.0354, "lr": 6.22446195385159e-08, "epoch": 4.491974690347641, "percentage": 89.84, "elapsed_time": "3:57:22", "remaining_time": "0:26:50", "throughput": 8697.17, "total_tokens": 123867112} +{"current_steps": 183875, "total_steps": 204665, "loss": 0.0, "lr": 6.221500633806487e-08, "epoch": 4.492096841179489, "percentage": 89.84, "elapsed_time": "3:57:22", "remaining_time": "0:26:50", "throughput": 8697.21, "total_tokens": 123870760} +{"current_steps": 183880, "total_steps": 204665, "loss": 0.0, "lr": 6.218539995743865e-08, "epoch": 4.492218992011336, "percentage": 89.84, "elapsed_time": "3:57:22", "remaining_time": "0:26:49", "throughput": 8697.22, "total_tokens": 123873768} +{"current_steps": 183885, "total_steps": 204665, "loss": 0.0, "lr": 6.215580039685243e-08, "epoch": 4.492341142843182, "percentage": 89.85, "elapsed_time": "3:57:23", "remaining_time": "0:26:49", "throughput": 8697.22, "total_tokens": 123876776} +{"current_steps": 183890, "total_steps": 204665, "loss": 0.0, "lr": 6.212620765652155e-08, "epoch": 4.49246329367503, "percentage": 89.85, "elapsed_time": "3:57:23", "remaining_time": "0:26:49", "throughput": 8697.24, "total_tokens": 123880040} +{"current_steps": 183895, "total_steps": 204665, "loss": 0.0, "lr": 6.209662173666097e-08, "epoch": 4.492585444506877, "percentage": 89.85, "elapsed_time": "3:57:23", "remaining_time": "0:26:48", "throughput": 8697.27, "total_tokens": 123883560} +{"current_steps": 183900, "total_steps": 204665, "loss": 0.0, "lr": 6.206704263748618e-08, "epoch": 4.492707595338724, "percentage": 89.85, "elapsed_time": "3:57:24", "remaining_time": "0:26:48", "throughput": 8697.3, "total_tokens": 123886952} +{"current_steps": 183905, "total_steps": 204665, "loss": 0.0, "lr": 6.203747035921192e-08, "epoch": 4.492829746170571, "percentage": 89.86, "elapsed_time": "3:57:24", "remaining_time": "0:26:47", "throughput": 8697.3, "total_tokens": 123889960} +{"current_steps": 183910, "total_steps": 204665, "loss": 0.0, "lr": 6.200790490205354e-08, "epoch": 4.492951897002419, "percentage": 89.86, "elapsed_time": "3:57:24", "remaining_time": "0:26:47", "throughput": 8697.33, "total_tokens": 123893480} +{"current_steps": 183915, "total_steps": 204665, "loss": 0.0, "lr": 6.197834626622611e-08, "epoch": 4.4930740478342654, "percentage": 89.86, "elapsed_time": "3:57:25", "remaining_time": "0:26:47", "throughput": 8697.35, "total_tokens": 123896616} +{"current_steps": 183920, "total_steps": 204665, "loss": 0.0, "lr": 6.194879445194434e-08, "epoch": 4.493196198666113, "percentage": 89.86, "elapsed_time": "3:57:25", "remaining_time": "0:26:46", "throughput": 8697.37, "total_tokens": 123899944} +{"current_steps": 183925, "total_steps": 204665, "loss": 0.0, "lr": 6.191924945942329e-08, "epoch": 4.49331834949796, "percentage": 89.87, "elapsed_time": "3:57:26", "remaining_time": "0:26:46", "throughput": 8697.4, "total_tokens": 123903400} +{"current_steps": 183930, "total_steps": 204665, "loss": 0.0, "lr": 6.188971128887777e-08, "epoch": 4.4934405003298075, "percentage": 89.87, "elapsed_time": "3:57:26", "remaining_time": "0:26:46", "throughput": 8697.45, "total_tokens": 123907304} +{"current_steps": 183935, "total_steps": 204665, "loss": 0.0, "lr": 6.18601799405224e-08, "epoch": 4.493562651161654, "percentage": 89.87, "elapsed_time": "3:57:26", "remaining_time": "0:26:45", "throughput": 8697.47, "total_tokens": 123910568} +{"current_steps": 183940, "total_steps": 204665, "loss": 0.0, "lr": 6.183065541457244e-08, "epoch": 4.493684801993502, "percentage": 89.87, "elapsed_time": "3:57:27", "remaining_time": "0:26:45", "throughput": 8697.56, "total_tokens": 123915112} +{"current_steps": 183945, "total_steps": 204665, "loss": 0.0, "lr": 6.180113771124207e-08, "epoch": 4.493806952825349, "percentage": 89.88, "elapsed_time": "3:57:27", "remaining_time": "0:26:44", "throughput": 8697.59, "total_tokens": 123918632} +{"current_steps": 183950, "total_steps": 204665, "loss": 0.0, "lr": 6.177162683074633e-08, "epoch": 4.493929103657196, "percentage": 89.88, "elapsed_time": "3:57:27", "remaining_time": "0:26:44", "throughput": 8697.61, "total_tokens": 123921896} +{"current_steps": 183955, "total_steps": 204665, "loss": 0.0, "lr": 6.174212277329949e-08, "epoch": 4.494051254489043, "percentage": 89.88, "elapsed_time": "3:57:28", "remaining_time": "0:26:44", "throughput": 8697.64, "total_tokens": 123925352} +{"current_steps": 183960, "total_steps": 204665, "loss": 0.0, "lr": 6.17126255391165e-08, "epoch": 4.494173405320891, "percentage": 89.88, "elapsed_time": "3:57:28", "remaining_time": "0:26:43", "throughput": 8697.67, "total_tokens": 123928872} +{"current_steps": 183965, "total_steps": 204665, "loss": 0.0001, "lr": 6.16831351284115e-08, "epoch": 4.494295556152737, "percentage": 89.89, "elapsed_time": "3:57:28", "remaining_time": "0:26:43", "throughput": 8697.71, "total_tokens": 123932520} +{"current_steps": 183970, "total_steps": 204665, "loss": 0.0394, "lr": 6.165365154139924e-08, "epoch": 4.494417706984585, "percentage": 89.89, "elapsed_time": "3:57:29", "remaining_time": "0:26:42", "throughput": 8697.73, "total_tokens": 123935848} +{"current_steps": 183975, "total_steps": 204665, "loss": 0.0, "lr": 6.162417477829407e-08, "epoch": 4.494539857816432, "percentage": 89.89, "elapsed_time": "3:57:29", "remaining_time": "0:26:42", "throughput": 8697.74, "total_tokens": 123938984} +{"current_steps": 183980, "total_steps": 204665, "loss": 0.0863, "lr": 6.159470483931006e-08, "epoch": 4.4946620086482785, "percentage": 89.89, "elapsed_time": "3:57:29", "remaining_time": "0:26:42", "throughput": 8697.78, "total_tokens": 123942632} +{"current_steps": 183985, "total_steps": 204665, "loss": 0.0, "lr": 6.156524172466194e-08, "epoch": 4.494784159480126, "percentage": 89.9, "elapsed_time": "3:57:30", "remaining_time": "0:26:41", "throughput": 8697.79, "total_tokens": 123945832} +{"current_steps": 183990, "total_steps": 204665, "loss": 0.029, "lr": 6.153578543456361e-08, "epoch": 4.494906310311973, "percentage": 89.9, "elapsed_time": "3:57:30", "remaining_time": "0:26:41", "throughput": 8697.83, "total_tokens": 123949416} +{"current_steps": 183995, "total_steps": 204665, "loss": 0.0568, "lr": 6.150633596922971e-08, "epoch": 4.4950284611438205, "percentage": 89.9, "elapsed_time": "3:57:30", "remaining_time": "0:26:40", "throughput": 8697.86, "total_tokens": 123952936} +{"current_steps": 184000, "total_steps": 204665, "loss": 0.0, "lr": 6.147689332887396e-08, "epoch": 4.495150611975667, "percentage": 89.9, "elapsed_time": "3:57:31", "remaining_time": "0:26:40", "throughput": 8697.88, "total_tokens": 123956200} +{"current_steps": 184005, "total_steps": 204665, "loss": 0.0, "lr": 6.14474575137106e-08, "epoch": 4.495272762807515, "percentage": 89.91, "elapsed_time": "3:57:31", "remaining_time": "0:26:40", "throughput": 8697.9, "total_tokens": 123959400} +{"current_steps": 184010, "total_steps": 204665, "loss": 0.0, "lr": 6.141802852395406e-08, "epoch": 4.495394913639362, "percentage": 89.91, "elapsed_time": "3:57:32", "remaining_time": "0:26:39", "throughput": 8697.93, "total_tokens": 123962856} +{"current_steps": 184015, "total_steps": 204665, "loss": 0.0, "lr": 6.138860635981779e-08, "epoch": 4.495517064471209, "percentage": 89.91, "elapsed_time": "3:57:32", "remaining_time": "0:26:39", "throughput": 8697.95, "total_tokens": 123966184} +{"current_steps": 184020, "total_steps": 204665, "loss": 0.0, "lr": 6.135919102151632e-08, "epoch": 4.495639215303056, "percentage": 89.91, "elapsed_time": "3:57:32", "remaining_time": "0:26:38", "throughput": 8697.96, "total_tokens": 123969448} +{"current_steps": 184025, "total_steps": 204665, "loss": 0.0, "lr": 6.132978250926302e-08, "epoch": 4.495761366134904, "percentage": 89.92, "elapsed_time": "3:57:33", "remaining_time": "0:26:38", "throughput": 8698.0, "total_tokens": 123973032} +{"current_steps": 184030, "total_steps": 204665, "loss": 0.0, "lr": 6.130038082327205e-08, "epoch": 4.49588351696675, "percentage": 89.92, "elapsed_time": "3:57:33", "remaining_time": "0:26:38", "throughput": 8698.04, "total_tokens": 123976744} +{"current_steps": 184035, "total_steps": 204665, "loss": 0.0, "lr": 6.127098596375724e-08, "epoch": 4.496005667798598, "percentage": 89.92, "elapsed_time": "3:57:33", "remaining_time": "0:26:37", "throughput": 8698.06, "total_tokens": 123979880} +{"current_steps": 184040, "total_steps": 204665, "loss": 0.0, "lr": 6.124159793093231e-08, "epoch": 4.496127818630445, "percentage": 89.92, "elapsed_time": "3:57:34", "remaining_time": "0:26:37", "throughput": 8698.08, "total_tokens": 123983336} +{"current_steps": 184045, "total_steps": 204665, "loss": 0.0, "lr": 6.121221672501108e-08, "epoch": 4.496249969462292, "percentage": 89.92, "elapsed_time": "3:57:34", "remaining_time": "0:26:37", "throughput": 8698.09, "total_tokens": 123986408} +{"current_steps": 184050, "total_steps": 204665, "loss": 0.0257, "lr": 6.118284234620686e-08, "epoch": 4.496372120294139, "percentage": 89.93, "elapsed_time": "3:57:34", "remaining_time": "0:26:36", "throughput": 8698.17, "total_tokens": 123990632} +{"current_steps": 184055, "total_steps": 204665, "loss": 0.0, "lr": 6.115347479473376e-08, "epoch": 4.496494271125987, "percentage": 89.93, "elapsed_time": "3:57:35", "remaining_time": "0:26:36", "throughput": 8698.17, "total_tokens": 123993704} +{"current_steps": 184060, "total_steps": 204665, "loss": 0.0, "lr": 6.112411407080498e-08, "epoch": 4.4966164219578335, "percentage": 89.93, "elapsed_time": "3:57:35", "remaining_time": "0:26:35", "throughput": 8698.21, "total_tokens": 123997224} +{"current_steps": 184065, "total_steps": 204665, "loss": 0.0, "lr": 6.10947601746341e-08, "epoch": 4.496738572789681, "percentage": 89.93, "elapsed_time": "3:57:35", "remaining_time": "0:26:35", "throughput": 8698.25, "total_tokens": 124000872} +{"current_steps": 184070, "total_steps": 204665, "loss": 0.0, "lr": 6.106541310643487e-08, "epoch": 4.496860723621528, "percentage": 89.94, "elapsed_time": "3:57:36", "remaining_time": "0:26:35", "throughput": 8698.27, "total_tokens": 124004200} +{"current_steps": 184075, "total_steps": 204665, "loss": 0.0, "lr": 6.103607286642054e-08, "epoch": 4.496982874453375, "percentage": 89.94, "elapsed_time": "3:57:36", "remaining_time": "0:26:34", "throughput": 8698.3, "total_tokens": 124007656} +{"current_steps": 184080, "total_steps": 204665, "loss": 0.0, "lr": 6.100673945480417e-08, "epoch": 4.497105025285222, "percentage": 89.94, "elapsed_time": "3:57:36", "remaining_time": "0:26:34", "throughput": 8698.32, "total_tokens": 124011112} +{"current_steps": 184085, "total_steps": 204665, "loss": 0.0, "lr": 6.097741287179958e-08, "epoch": 4.497227176117069, "percentage": 89.94, "elapsed_time": "3:57:37", "remaining_time": "0:26:33", "throughput": 8698.35, "total_tokens": 124014440} +{"current_steps": 184090, "total_steps": 204665, "loss": 0.0099, "lr": 6.094809311761961e-08, "epoch": 4.497349326948917, "percentage": 89.95, "elapsed_time": "3:57:37", "remaining_time": "0:26:33", "throughput": 8698.37, "total_tokens": 124017832} +{"current_steps": 184095, "total_steps": 204665, "loss": 0.0, "lr": 6.091878019247787e-08, "epoch": 4.497471477780763, "percentage": 89.95, "elapsed_time": "3:57:37", "remaining_time": "0:26:33", "throughput": 8698.39, "total_tokens": 124021160} +{"current_steps": 184100, "total_steps": 204665, "loss": 0.0, "lr": 6.088947409658718e-08, "epoch": 4.497593628612611, "percentage": 89.95, "elapsed_time": "3:57:38", "remaining_time": "0:26:32", "throughput": 8698.41, "total_tokens": 124024488} +{"current_steps": 184105, "total_steps": 204665, "loss": 0.0, "lr": 6.086017483016104e-08, "epoch": 4.497715779444458, "percentage": 89.95, "elapsed_time": "3:57:38", "remaining_time": "0:26:32", "throughput": 8698.45, "total_tokens": 124028072} +{"current_steps": 184110, "total_steps": 204665, "loss": 0.0, "lr": 6.083088239341206e-08, "epoch": 4.497837930276305, "percentage": 89.96, "elapsed_time": "3:57:38", "remaining_time": "0:26:31", "throughput": 8698.47, "total_tokens": 124031400} +{"current_steps": 184115, "total_steps": 204665, "loss": 0.0, "lr": 6.080159678655372e-08, "epoch": 4.497960081108152, "percentage": 89.96, "elapsed_time": "3:57:39", "remaining_time": "0:26:31", "throughput": 8698.48, "total_tokens": 124034600} +{"current_steps": 184120, "total_steps": 204665, "loss": 0.0, "lr": 6.077231800979865e-08, "epoch": 4.49808223194, "percentage": 89.96, "elapsed_time": "3:57:39", "remaining_time": "0:26:31", "throughput": 8698.49, "total_tokens": 124037672} +{"current_steps": 184125, "total_steps": 204665, "loss": 0.0, "lr": 6.074304606335978e-08, "epoch": 4.4982043827718465, "percentage": 89.96, "elapsed_time": "3:57:40", "remaining_time": "0:26:30", "throughput": 8698.49, "total_tokens": 124040616} +{"current_steps": 184130, "total_steps": 204665, "loss": 0.0, "lr": 6.071378094745039e-08, "epoch": 4.498326533603694, "percentage": 89.97, "elapsed_time": "3:57:40", "remaining_time": "0:26:30", "throughput": 8698.51, "total_tokens": 124043816} +{"current_steps": 184135, "total_steps": 204665, "loss": 0.0, "lr": 6.068452266228285e-08, "epoch": 4.498448684435541, "percentage": 89.97, "elapsed_time": "3:57:40", "remaining_time": "0:26:29", "throughput": 8698.53, "total_tokens": 124047272} +{"current_steps": 184140, "total_steps": 204665, "loss": 0.0, "lr": 6.065527120807024e-08, "epoch": 4.4985708352673885, "percentage": 89.97, "elapsed_time": "3:57:41", "remaining_time": "0:26:29", "throughput": 8698.57, "total_tokens": 124050856} +{"current_steps": 184145, "total_steps": 204665, "loss": 0.0, "lr": 6.062602658502491e-08, "epoch": 4.498692986099235, "percentage": 89.97, "elapsed_time": "3:57:41", "remaining_time": "0:26:29", "throughput": 8698.6, "total_tokens": 124054312} +{"current_steps": 184150, "total_steps": 204665, "loss": 0.0, "lr": 6.059678879336005e-08, "epoch": 4.498815136931082, "percentage": 89.98, "elapsed_time": "3:57:41", "remaining_time": "0:26:28", "throughput": 8698.59, "total_tokens": 124057128} +{"current_steps": 184155, "total_steps": 204665, "loss": 0.0, "lr": 6.056755783328782e-08, "epoch": 4.49893728776293, "percentage": 89.98, "elapsed_time": "3:57:42", "remaining_time": "0:26:28", "throughput": 8698.59, "total_tokens": 124060136} +{"current_steps": 184160, "total_steps": 204665, "loss": 0.0, "lr": 6.053833370502104e-08, "epoch": 4.499059438594776, "percentage": 89.98, "elapsed_time": "3:57:42", "remaining_time": "0:26:28", "throughput": 8698.65, "total_tokens": 124064040} +{"current_steps": 184165, "total_steps": 204665, "loss": 0.0, "lr": 6.050911640877221e-08, "epoch": 4.499181589426624, "percentage": 89.98, "elapsed_time": "3:57:42", "remaining_time": "0:26:27", "throughput": 8698.7, "total_tokens": 124067880} +{"current_steps": 184170, "total_steps": 204665, "loss": 0.0, "lr": 6.047990594475372e-08, "epoch": 4.499303740258471, "percentage": 89.99, "elapsed_time": "3:57:43", "remaining_time": "0:26:27", "throughput": 8698.69, "total_tokens": 124070696} +{"current_steps": 184175, "total_steps": 204665, "loss": 0.0, "lr": 6.045070231317817e-08, "epoch": 4.499425891090318, "percentage": 89.99, "elapsed_time": "3:57:43", "remaining_time": "0:26:26", "throughput": 8698.73, "total_tokens": 124074344} +{"current_steps": 184180, "total_steps": 204665, "loss": 0.0, "lr": 6.042150551425784e-08, "epoch": 4.499548041922165, "percentage": 89.99, "elapsed_time": "3:57:43", "remaining_time": "0:26:26", "throughput": 8698.75, "total_tokens": 124077608} +{"current_steps": 184185, "total_steps": 204665, "loss": 0.0, "lr": 6.039231554820489e-08, "epoch": 4.499670192754013, "percentage": 89.99, "elapsed_time": "3:57:44", "remaining_time": "0:26:26", "throughput": 8698.8, "total_tokens": 124081384} +{"current_steps": 184190, "total_steps": 204665, "loss": 0.0, "lr": 6.036313241523183e-08, "epoch": 4.4997923435858596, "percentage": 90.0, "elapsed_time": "3:57:44", "remaining_time": "0:26:25", "throughput": 8698.83, "total_tokens": 124084968} +{"current_steps": 184195, "total_steps": 204665, "loss": 0.0538, "lr": 6.033395611555081e-08, "epoch": 4.499914494417707, "percentage": 90.0, "elapsed_time": "3:57:44", "remaining_time": "0:26:25", "throughput": 8698.84, "total_tokens": 124088040} +{"current_steps": 184200, "total_steps": 204665, "loss": 0.0, "lr": 6.0304786649374e-08, "epoch": 4.500036645249554, "percentage": 90.0, "elapsed_time": "3:57:45", "remaining_time": "0:26:24", "throughput": 8698.85, "total_tokens": 124091240} +{"current_steps": 184205, "total_steps": 204665, "loss": 0.0, "lr": 6.027562401691344e-08, "epoch": 4.500158796081402, "percentage": 90.0, "elapsed_time": "3:57:45", "remaining_time": "0:26:24", "throughput": 8698.9, "total_tokens": 124095144} +{"current_steps": 184210, "total_steps": 204665, "loss": 0.0, "lr": 6.024646821838142e-08, "epoch": 4.500280946913248, "percentage": 90.01, "elapsed_time": "3:57:45", "remaining_time": "0:26:24", "throughput": 8698.94, "total_tokens": 124098728} +{"current_steps": 184212, "total_steps": 204665, "eval_loss": 0.3127802014350891, "epoch": 4.500329807245987, "percentage": 90.01, "elapsed_time": "3:58:33", "remaining_time": "0:26:29", "throughput": 8669.91, "total_tokens": 124100264} +{"current_steps": 184215, "total_steps": 204665, "loss": 0.0, "lr": 6.021731925398976e-08, "epoch": 4.500403097745096, "percentage": 90.01, "elapsed_time": "3:59:09", "remaining_time": "0:26:32", "throughput": 8648.51, "total_tokens": 124102632} +{"current_steps": 184220, "total_steps": 204665, "loss": 0.0, "lr": 6.018817712395041e-08, "epoch": 4.500525248576943, "percentage": 90.01, "elapsed_time": "3:59:09", "remaining_time": "0:26:32", "throughput": 8648.52, "total_tokens": 124105832} +{"current_steps": 184225, "total_steps": 204665, "loss": 0.0, "lr": 6.015904182847564e-08, "epoch": 4.50064739940879, "percentage": 90.01, "elapsed_time": "3:59:10", "remaining_time": "0:26:32", "throughput": 8648.56, "total_tokens": 124109352} +{"current_steps": 184230, "total_steps": 204665, "loss": 0.0, "lr": 6.012991336777695e-08, "epoch": 4.500769550240637, "percentage": 90.02, "elapsed_time": "3:59:10", "remaining_time": "0:26:31", "throughput": 8648.57, "total_tokens": 124112552} +{"current_steps": 184235, "total_steps": 204665, "loss": 0.0, "lr": 6.01007917420665e-08, "epoch": 4.500891701072485, "percentage": 90.02, "elapsed_time": "3:59:10", "remaining_time": "0:26:31", "throughput": 8648.59, "total_tokens": 124115816} +{"current_steps": 184240, "total_steps": 204665, "loss": 0.0001, "lr": 6.007167695155569e-08, "epoch": 4.5010138519043315, "percentage": 90.02, "elapsed_time": "3:59:11", "remaining_time": "0:26:31", "throughput": 8648.64, "total_tokens": 124119528} +{"current_steps": 184245, "total_steps": 204665, "loss": 0.0, "lr": 6.004256899645665e-08, "epoch": 4.501136002736178, "percentage": 90.02, "elapsed_time": "3:59:11", "remaining_time": "0:26:30", "throughput": 8648.63, "total_tokens": 124122344} +{"current_steps": 184250, "total_steps": 204665, "loss": 0.0, "lr": 6.001346787698069e-08, "epoch": 4.501258153568026, "percentage": 90.03, "elapsed_time": "3:59:12", "remaining_time": "0:26:30", "throughput": 8648.64, "total_tokens": 124125416} +{"current_steps": 184255, "total_steps": 204665, "loss": 0.0, "lr": 5.998437359333964e-08, "epoch": 4.501380304399873, "percentage": 90.03, "elapsed_time": "3:59:12", "remaining_time": "0:26:29", "throughput": 8648.68, "total_tokens": 124129000} +{"current_steps": 184260, "total_steps": 204665, "loss": 0.0, "lr": 5.995528614574519e-08, "epoch": 4.50150245523172, "percentage": 90.03, "elapsed_time": "3:59:12", "remaining_time": "0:26:29", "throughput": 8648.68, "total_tokens": 124131880} +{"current_steps": 184265, "total_steps": 204665, "loss": 0.0, "lr": 5.992620553440863e-08, "epoch": 4.501624606063567, "percentage": 90.03, "elapsed_time": "3:59:13", "remaining_time": "0:26:29", "throughput": 8648.68, "total_tokens": 124134760} +{"current_steps": 184270, "total_steps": 204665, "loss": 0.0, "lr": 5.989713175954169e-08, "epoch": 4.501746756895415, "percentage": 90.03, "elapsed_time": "3:59:13", "remaining_time": "0:26:28", "throughput": 8648.7, "total_tokens": 124138152} +{"current_steps": 184275, "total_steps": 204665, "loss": 0.0, "lr": 5.986806482135542e-08, "epoch": 4.501868907727261, "percentage": 90.04, "elapsed_time": "3:59:13", "remaining_time": "0:26:28", "throughput": 8648.72, "total_tokens": 124141352} +{"current_steps": 184280, "total_steps": 204665, "loss": 0.0, "lr": 5.983900472006175e-08, "epoch": 4.501991058559109, "percentage": 90.04, "elapsed_time": "3:59:14", "remaining_time": "0:26:27", "throughput": 8648.73, "total_tokens": 124144616} +{"current_steps": 184285, "total_steps": 204665, "loss": 0.0, "lr": 5.980995145587165e-08, "epoch": 4.502113209390956, "percentage": 90.04, "elapsed_time": "3:59:14", "remaining_time": "0:26:27", "throughput": 8648.76, "total_tokens": 124147944} +{"current_steps": 184290, "total_steps": 204665, "loss": 0.0, "lr": 5.978090502899624e-08, "epoch": 4.502235360222803, "percentage": 90.04, "elapsed_time": "3:59:14", "remaining_time": "0:26:27", "throughput": 8648.78, "total_tokens": 124151400} +{"current_steps": 184295, "total_steps": 204665, "loss": 0.0, "lr": 5.975186543964716e-08, "epoch": 4.50235751105465, "percentage": 90.05, "elapsed_time": "3:59:15", "remaining_time": "0:26:26", "throughput": 8648.84, "total_tokens": 124155368} +{"current_steps": 184300, "total_steps": 204665, "loss": 0.0, "lr": 5.972283268803536e-08, "epoch": 4.502479661886498, "percentage": 90.05, "elapsed_time": "3:59:15", "remaining_time": "0:26:26", "throughput": 8648.86, "total_tokens": 124158632} +{"current_steps": 184305, "total_steps": 204665, "loss": 0.0, "lr": 5.969380677437208e-08, "epoch": 4.5026018127183445, "percentage": 90.05, "elapsed_time": "3:59:15", "remaining_time": "0:26:25", "throughput": 8648.9, "total_tokens": 124162344} +{"current_steps": 184310, "total_steps": 204665, "loss": 0.0, "lr": 5.966478769886818e-08, "epoch": 4.502723963550192, "percentage": 90.05, "elapsed_time": "3:59:16", "remaining_time": "0:26:25", "throughput": 8648.92, "total_tokens": 124165672} +{"current_steps": 184315, "total_steps": 204665, "loss": 0.0, "lr": 5.963577546173493e-08, "epoch": 4.502846114382039, "percentage": 90.06, "elapsed_time": "3:59:16", "remaining_time": "0:26:25", "throughput": 8648.96, "total_tokens": 124169320} +{"current_steps": 184320, "total_steps": 204665, "loss": 0.0, "lr": 5.960677006318338e-08, "epoch": 4.5029682652138865, "percentage": 90.06, "elapsed_time": "3:59:16", "remaining_time": "0:26:24", "throughput": 8649.04, "total_tokens": 124173608} +{"current_steps": 184325, "total_steps": 204665, "loss": 0.0, "lr": 5.9577771503424135e-08, "epoch": 4.503090416045733, "percentage": 90.06, "elapsed_time": "3:59:17", "remaining_time": "0:26:24", "throughput": 8649.1, "total_tokens": 124177576} +{"current_steps": 184330, "total_steps": 204665, "loss": 0.0, "lr": 5.954877978266848e-08, "epoch": 4.503212566877581, "percentage": 90.06, "elapsed_time": "3:59:17", "remaining_time": "0:26:23", "throughput": 8649.14, "total_tokens": 124181288} +{"current_steps": 184335, "total_steps": 204665, "loss": 0.0, "lr": 5.9519794901126907e-08, "epoch": 4.503334717709428, "percentage": 90.07, "elapsed_time": "3:59:17", "remaining_time": "0:26:23", "throughput": 8649.14, "total_tokens": 124184296} +{"current_steps": 184340, "total_steps": 204665, "loss": 0.0, "lr": 5.9490816859010364e-08, "epoch": 4.503456868541274, "percentage": 90.07, "elapsed_time": "3:59:18", "remaining_time": "0:26:23", "throughput": 8649.18, "total_tokens": 124187944} +{"current_steps": 184345, "total_steps": 204665, "loss": 0.0, "lr": 5.946184565652967e-08, "epoch": 4.503579019373122, "percentage": 90.07, "elapsed_time": "3:59:18", "remaining_time": "0:26:22", "throughput": 8649.2, "total_tokens": 124191208} +{"current_steps": 184350, "total_steps": 204665, "loss": 0.0, "lr": 5.943288129389523e-08, "epoch": 4.503701170204969, "percentage": 90.07, "elapsed_time": "3:59:19", "remaining_time": "0:26:22", "throughput": 8649.23, "total_tokens": 124194664} +{"current_steps": 184355, "total_steps": 204665, "loss": 0.0, "lr": 5.940392377131809e-08, "epoch": 4.503823321036816, "percentage": 90.08, "elapsed_time": "3:59:19", "remaining_time": "0:26:21", "throughput": 8649.26, "total_tokens": 124198184} +{"current_steps": 184360, "total_steps": 204665, "loss": 0.0, "lr": 5.937497308900841e-08, "epoch": 4.503945471868663, "percentage": 90.08, "elapsed_time": "3:59:19", "remaining_time": "0:26:21", "throughput": 8649.3, "total_tokens": 124201768} +{"current_steps": 184365, "total_steps": 204665, "loss": 0.0, "lr": 5.934602924717702e-08, "epoch": 4.504067622700511, "percentage": 90.08, "elapsed_time": "3:59:20", "remaining_time": "0:26:21", "throughput": 8649.32, "total_tokens": 124205096} +{"current_steps": 184370, "total_steps": 204665, "loss": 0.0, "lr": 5.93170922460342e-08, "epoch": 4.5041897735323575, "percentage": 90.08, "elapsed_time": "3:59:20", "remaining_time": "0:26:20", "throughput": 8649.33, "total_tokens": 124208104} +{"current_steps": 184375, "total_steps": 204665, "loss": 0.0, "lr": 5.9288162085790574e-08, "epoch": 4.504311924364205, "percentage": 90.09, "elapsed_time": "3:59:20", "remaining_time": "0:26:20", "throughput": 8649.33, "total_tokens": 124211112} +{"current_steps": 184380, "total_steps": 204665, "loss": 0.0, "lr": 5.9259238766656506e-08, "epoch": 4.504434075196052, "percentage": 90.09, "elapsed_time": "3:59:21", "remaining_time": "0:26:19", "throughput": 8649.36, "total_tokens": 124214568} +{"current_steps": 184385, "total_steps": 204665, "loss": 0.0, "lr": 5.923032228884228e-08, "epoch": 4.5045562260278995, "percentage": 90.09, "elapsed_time": "3:59:21", "remaining_time": "0:26:19", "throughput": 8649.38, "total_tokens": 124217704} +{"current_steps": 184390, "total_steps": 204665, "loss": 0.0, "lr": 5.920141265255818e-08, "epoch": 4.504678376859746, "percentage": 90.09, "elapsed_time": "3:59:21", "remaining_time": "0:26:19", "throughput": 8649.38, "total_tokens": 124220776} +{"current_steps": 184395, "total_steps": 204665, "loss": 0.0, "lr": 5.9172509858014474e-08, "epoch": 4.504800527691594, "percentage": 90.1, "elapsed_time": "3:59:22", "remaining_time": "0:26:18", "throughput": 8649.41, "total_tokens": 124224168} +{"current_steps": 184400, "total_steps": 204665, "loss": 0.0, "lr": 5.914361390542133e-08, "epoch": 4.504922678523441, "percentage": 90.1, "elapsed_time": "3:59:22", "remaining_time": "0:26:18", "throughput": 8649.42, "total_tokens": 124227240} +{"current_steps": 184405, "total_steps": 204665, "loss": 0.0, "lr": 5.91147247949888e-08, "epoch": 4.505044829355288, "percentage": 90.1, "elapsed_time": "3:59:22", "remaining_time": "0:26:18", "throughput": 8649.43, "total_tokens": 124230312} +{"current_steps": 184410, "total_steps": 204665, "loss": 0.0, "lr": 5.908584252692694e-08, "epoch": 4.505166980187135, "percentage": 90.1, "elapsed_time": "3:59:23", "remaining_time": "0:26:17", "throughput": 8649.46, "total_tokens": 124233960} +{"current_steps": 184415, "total_steps": 204665, "loss": 0.0, "lr": 5.905696710144614e-08, "epoch": 4.505289131018982, "percentage": 90.11, "elapsed_time": "3:59:23", "remaining_time": "0:26:17", "throughput": 8649.47, "total_tokens": 124236968} +{"current_steps": 184420, "total_steps": 204665, "loss": 0.0, "lr": 5.902809851875601e-08, "epoch": 4.505411281850829, "percentage": 90.11, "elapsed_time": "3:59:23", "remaining_time": "0:26:16", "throughput": 8649.49, "total_tokens": 124240232} +{"current_steps": 184425, "total_steps": 204665, "loss": 0.0, "lr": 5.899923677906682e-08, "epoch": 4.505533432682677, "percentage": 90.11, "elapsed_time": "3:59:24", "remaining_time": "0:26:16", "throughput": 8649.5, "total_tokens": 124243432} +{"current_steps": 184430, "total_steps": 204665, "loss": 0.0, "lr": 5.897038188258807e-08, "epoch": 4.505655583514524, "percentage": 90.11, "elapsed_time": "3:59:24", "remaining_time": "0:26:16", "throughput": 8649.52, "total_tokens": 124246696} +{"current_steps": 184435, "total_steps": 204665, "loss": 0.0, "lr": 5.894153382952993e-08, "epoch": 4.5057777343463705, "percentage": 90.12, "elapsed_time": "3:59:24", "remaining_time": "0:26:15", "throughput": 8649.53, "total_tokens": 124249768} +{"current_steps": 184440, "total_steps": 204665, "loss": 0.0, "lr": 5.891269262010212e-08, "epoch": 4.505899885178218, "percentage": 90.12, "elapsed_time": "3:59:25", "remaining_time": "0:26:15", "throughput": 8649.56, "total_tokens": 124253224} +{"current_steps": 184445, "total_steps": 204665, "loss": 0.0, "lr": 5.888385825451414e-08, "epoch": 4.506022036010065, "percentage": 90.12, "elapsed_time": "3:59:25", "remaining_time": "0:26:14", "throughput": 8649.56, "total_tokens": 124256168} +{"current_steps": 184450, "total_steps": 204665, "loss": 0.0, "lr": 5.885503073297604e-08, "epoch": 4.5061441868419125, "percentage": 90.12, "elapsed_time": "3:59:25", "remaining_time": "0:26:14", "throughput": 8649.62, "total_tokens": 124260328} +{"current_steps": 184455, "total_steps": 204665, "loss": 0.0, "lr": 5.8826210055697215e-08, "epoch": 4.506266337673759, "percentage": 90.13, "elapsed_time": "3:59:26", "remaining_time": "0:26:14", "throughput": 8649.66, "total_tokens": 124263912} +{"current_steps": 184460, "total_steps": 204665, "loss": 0.0117, "lr": 5.879739622288748e-08, "epoch": 4.506388488505607, "percentage": 90.13, "elapsed_time": "3:59:26", "remaining_time": "0:26:13", "throughput": 8649.68, "total_tokens": 124267240} +{"current_steps": 184465, "total_steps": 204665, "loss": 0.0, "lr": 5.876858923475603e-08, "epoch": 4.506510639337454, "percentage": 90.13, "elapsed_time": "3:59:27", "remaining_time": "0:26:13", "throughput": 8649.7, "total_tokens": 124270504} +{"current_steps": 184470, "total_steps": 204665, "loss": 0.0, "lr": 5.873978909151256e-08, "epoch": 4.506632790169301, "percentage": 90.13, "elapsed_time": "3:59:27", "remaining_time": "0:26:12", "throughput": 8649.71, "total_tokens": 124273512} +{"current_steps": 184475, "total_steps": 204665, "loss": 0.0, "lr": 5.871099579336669e-08, "epoch": 4.506754941001148, "percentage": 90.14, "elapsed_time": "3:59:27", "remaining_time": "0:26:12", "throughput": 8649.74, "total_tokens": 124277032} +{"current_steps": 184480, "total_steps": 204665, "loss": 0.0, "lr": 5.868220934052748e-08, "epoch": 4.506877091832996, "percentage": 90.14, "elapsed_time": "3:59:28", "remaining_time": "0:26:12", "throughput": 8649.79, "total_tokens": 124280808} +{"current_steps": 184485, "total_steps": 204665, "loss": 0.0, "lr": 5.865342973320453e-08, "epoch": 4.506999242664842, "percentage": 90.14, "elapsed_time": "3:59:28", "remaining_time": "0:26:11", "throughput": 8649.82, "total_tokens": 124284328} +{"current_steps": 184490, "total_steps": 204665, "loss": 0.0, "lr": 5.862465697160712e-08, "epoch": 4.50712139349669, "percentage": 90.14, "elapsed_time": "3:59:28", "remaining_time": "0:26:11", "throughput": 8649.84, "total_tokens": 124287656} +{"current_steps": 184495, "total_steps": 204665, "loss": 0.0, "lr": 5.85958910559442e-08, "epoch": 4.507243544328537, "percentage": 90.14, "elapsed_time": "3:59:29", "remaining_time": "0:26:10", "throughput": 8649.89, "total_tokens": 124291432} +{"current_steps": 184500, "total_steps": 204665, "loss": 0.0, "lr": 5.8567131986425265e-08, "epoch": 4.507365695160384, "percentage": 90.15, "elapsed_time": "3:59:29", "remaining_time": "0:26:10", "throughput": 8649.94, "total_tokens": 124295272} +{"current_steps": 184505, "total_steps": 204665, "loss": 0.0, "lr": 5.853837976325926e-08, "epoch": 4.507487845992231, "percentage": 90.15, "elapsed_time": "3:59:29", "remaining_time": "0:26:10", "throughput": 8649.96, "total_tokens": 124298536} +{"current_steps": 184510, "total_steps": 204665, "loss": 0.0, "lr": 5.8509634386655573e-08, "epoch": 4.507609996824078, "percentage": 90.15, "elapsed_time": "3:59:30", "remaining_time": "0:26:09", "throughput": 8649.99, "total_tokens": 124301992} +{"current_steps": 184515, "total_steps": 204665, "loss": 0.0, "lr": 5.848089585682292e-08, "epoch": 4.507732147655926, "percentage": 90.15, "elapsed_time": "3:59:30", "remaining_time": "0:26:09", "throughput": 8650.01, "total_tokens": 124305384} +{"current_steps": 184520, "total_steps": 204665, "loss": 0.0, "lr": 5.8452164173970475e-08, "epoch": 4.507854298487773, "percentage": 90.16, "elapsed_time": "3:59:30", "remaining_time": "0:26:08", "throughput": 8650.04, "total_tokens": 124308840} +{"current_steps": 184525, "total_steps": 204665, "loss": 0.0, "lr": 5.842343933830707e-08, "epoch": 4.50797644931962, "percentage": 90.16, "elapsed_time": "3:59:31", "remaining_time": "0:26:08", "throughput": 8650.08, "total_tokens": 124312552} +{"current_steps": 184530, "total_steps": 204665, "loss": 0.0, "lr": 5.839472135004164e-08, "epoch": 4.508098600151467, "percentage": 90.16, "elapsed_time": "3:59:31", "remaining_time": "0:26:08", "throughput": 8650.09, "total_tokens": 124315752} +{"current_steps": 184535, "total_steps": 204665, "loss": 0.0, "lr": 5.8366010209383254e-08, "epoch": 4.508220750983314, "percentage": 90.16, "elapsed_time": "3:59:31", "remaining_time": "0:26:07", "throughput": 8650.11, "total_tokens": 124319016} +{"current_steps": 184540, "total_steps": 204665, "loss": 0.0, "lr": 5.833730591654029e-08, "epoch": 4.508342901815161, "percentage": 90.17, "elapsed_time": "3:59:32", "remaining_time": "0:26:07", "throughput": 8650.14, "total_tokens": 124322664} +{"current_steps": 184545, "total_steps": 204665, "loss": 0.0, "lr": 5.830860847172192e-08, "epoch": 4.508465052647009, "percentage": 90.17, "elapsed_time": "3:59:32", "remaining_time": "0:26:06", "throughput": 8650.14, "total_tokens": 124325608} +{"current_steps": 184550, "total_steps": 204665, "loss": 0.0014, "lr": 5.827991787513642e-08, "epoch": 4.508587203478855, "percentage": 90.17, "elapsed_time": "3:59:33", "remaining_time": "0:26:06", "throughput": 8650.15, "total_tokens": 124328744} +{"current_steps": 184555, "total_steps": 204665, "loss": 0.0, "lr": 5.825123412699284e-08, "epoch": 4.508709354310703, "percentage": 90.17, "elapsed_time": "3:59:33", "remaining_time": "0:26:06", "throughput": 8650.19, "total_tokens": 124332264} +{"current_steps": 184560, "total_steps": 204665, "loss": 0.0, "lr": 5.822255722749947e-08, "epoch": 4.50883150514255, "percentage": 90.18, "elapsed_time": "3:59:33", "remaining_time": "0:26:05", "throughput": 8650.22, "total_tokens": 124335784} +{"current_steps": 184565, "total_steps": 204665, "loss": 0.0, "lr": 5.819388717686491e-08, "epoch": 4.5089536559743975, "percentage": 90.18, "elapsed_time": "3:59:34", "remaining_time": "0:26:05", "throughput": 8650.25, "total_tokens": 124339304} +{"current_steps": 184570, "total_steps": 204665, "loss": 0.0, "lr": 5.816522397529788e-08, "epoch": 4.509075806806244, "percentage": 90.18, "elapsed_time": "3:59:34", "remaining_time": "0:26:05", "throughput": 8650.26, "total_tokens": 124342440} +{"current_steps": 184575, "total_steps": 204665, "loss": 0.0, "lr": 5.813656762300656e-08, "epoch": 4.509197957638092, "percentage": 90.18, "elapsed_time": "3:59:34", "remaining_time": "0:26:04", "throughput": 8650.29, "total_tokens": 124345896} +{"current_steps": 184580, "total_steps": 204665, "loss": 0.0, "lr": 5.810791812019955e-08, "epoch": 4.509320108469939, "percentage": 90.19, "elapsed_time": "3:59:35", "remaining_time": "0:26:04", "throughput": 8650.31, "total_tokens": 124349352} +{"current_steps": 184585, "total_steps": 204665, "loss": 0.0341, "lr": 5.807927546708491e-08, "epoch": 4.509442259301786, "percentage": 90.19, "elapsed_time": "3:59:35", "remaining_time": "0:26:03", "throughput": 8650.32, "total_tokens": 124352424} +{"current_steps": 184590, "total_steps": 204665, "loss": 0.0, "lr": 5.805063966387136e-08, "epoch": 4.509564410133633, "percentage": 90.19, "elapsed_time": "3:59:35", "remaining_time": "0:26:03", "throughput": 8650.33, "total_tokens": 124355688} +{"current_steps": 184595, "total_steps": 204665, "loss": 0.0, "lr": 5.8022010710766844e-08, "epoch": 4.509686560965481, "percentage": 90.19, "elapsed_time": "3:59:36", "remaining_time": "0:26:03", "throughput": 8650.35, "total_tokens": 124359016} +{"current_steps": 184600, "total_steps": 204665, "loss": 0.0, "lr": 5.799338860797953e-08, "epoch": 4.509808711797327, "percentage": 90.2, "elapsed_time": "3:59:36", "remaining_time": "0:26:02", "throughput": 8650.39, "total_tokens": 124362728} +{"current_steps": 184605, "total_steps": 204665, "loss": 0.0, "lr": 5.796477335571781e-08, "epoch": 4.509930862629174, "percentage": 90.2, "elapsed_time": "3:59:36", "remaining_time": "0:26:02", "throughput": 8650.4, "total_tokens": 124365864} +{"current_steps": 184610, "total_steps": 204665, "loss": 0.0, "lr": 5.793616495418951e-08, "epoch": 4.510053013461022, "percentage": 90.2, "elapsed_time": "3:59:37", "remaining_time": "0:26:01", "throughput": 8650.42, "total_tokens": 124369320} +{"current_steps": 184615, "total_steps": 204665, "loss": 0.0003, "lr": 5.790756340360292e-08, "epoch": 4.5101751642928685, "percentage": 90.2, "elapsed_time": "3:59:37", "remaining_time": "0:26:01", "throughput": 8650.42, "total_tokens": 124372200} +{"current_steps": 184620, "total_steps": 204665, "loss": 0.0, "lr": 5.7878968704165754e-08, "epoch": 4.510297315124716, "percentage": 90.21, "elapsed_time": "3:59:37", "remaining_time": "0:26:01", "throughput": 8650.42, "total_tokens": 124375208} +{"current_steps": 184625, "total_steps": 204665, "loss": 0.0, "lr": 5.785038085608607e-08, "epoch": 4.510419465956563, "percentage": 90.21, "elapsed_time": "3:59:38", "remaining_time": "0:26:00", "throughput": 8650.45, "total_tokens": 124378728} +{"current_steps": 184630, "total_steps": 204665, "loss": 0.0, "lr": 5.782179985957214e-08, "epoch": 4.5105416167884105, "percentage": 90.21, "elapsed_time": "3:59:38", "remaining_time": "0:26:00", "throughput": 8650.5, "total_tokens": 124382568} +{"current_steps": 184635, "total_steps": 204665, "loss": 0.0, "lr": 5.779322571483125e-08, "epoch": 4.510663767620257, "percentage": 90.21, "elapsed_time": "3:59:39", "remaining_time": "0:25:59", "throughput": 8650.52, "total_tokens": 124385896} +{"current_steps": 184640, "total_steps": 204665, "loss": 0.0, "lr": 5.7764658422071566e-08, "epoch": 4.510785918452105, "percentage": 90.22, "elapsed_time": "3:59:39", "remaining_time": "0:25:59", "throughput": 8650.53, "total_tokens": 124389032} +{"current_steps": 184645, "total_steps": 204665, "loss": 0.0, "lr": 5.773609798150059e-08, "epoch": 4.510908069283952, "percentage": 90.22, "elapsed_time": "3:59:39", "remaining_time": "0:25:59", "throughput": 8650.56, "total_tokens": 124392488} +{"current_steps": 184650, "total_steps": 204665, "loss": 0.0, "lr": 5.7707544393326145e-08, "epoch": 4.511030220115799, "percentage": 90.22, "elapsed_time": "3:59:40", "remaining_time": "0:25:58", "throughput": 8650.58, "total_tokens": 124395816} +{"current_steps": 184655, "total_steps": 204665, "loss": 0.0414, "lr": 5.767899765775608e-08, "epoch": 4.511152370947646, "percentage": 90.22, "elapsed_time": "3:59:40", "remaining_time": "0:25:58", "throughput": 8650.6, "total_tokens": 124399272} +{"current_steps": 184660, "total_steps": 204665, "loss": 0.0, "lr": 5.765045777499755e-08, "epoch": 4.511274521779494, "percentage": 90.23, "elapsed_time": "3:59:40", "remaining_time": "0:25:57", "throughput": 8650.63, "total_tokens": 124402600} +{"current_steps": 184665, "total_steps": 204665, "loss": 0.0, "lr": 5.7621924745258624e-08, "epoch": 4.51139667261134, "percentage": 90.23, "elapsed_time": "3:59:41", "remaining_time": "0:25:57", "throughput": 8650.65, "total_tokens": 124405928} +{"current_steps": 184670, "total_steps": 204665, "loss": 0.0, "lr": 5.759339856874634e-08, "epoch": 4.511518823443188, "percentage": 90.23, "elapsed_time": "3:59:41", "remaining_time": "0:25:57", "throughput": 8650.66, "total_tokens": 124409128} +{"current_steps": 184675, "total_steps": 204665, "loss": 0.0, "lr": 5.7564879245668444e-08, "epoch": 4.511640974275035, "percentage": 90.23, "elapsed_time": "3:59:41", "remaining_time": "0:25:56", "throughput": 8650.7, "total_tokens": 124412904} +{"current_steps": 184680, "total_steps": 204665, "loss": 0.0, "lr": 5.753636677623208e-08, "epoch": 4.5117631251068815, "percentage": 90.24, "elapsed_time": "3:59:42", "remaining_time": "0:25:56", "throughput": 8650.73, "total_tokens": 124416360} +{"current_steps": 184685, "total_steps": 204665, "loss": 0.0, "lr": 5.750786116064477e-08, "epoch": 4.511885275938729, "percentage": 90.24, "elapsed_time": "3:59:42", "remaining_time": "0:25:55", "throughput": 8650.75, "total_tokens": 124419688} +{"current_steps": 184690, "total_steps": 204665, "loss": 0.0, "lr": 5.7479362399113995e-08, "epoch": 4.512007426770577, "percentage": 90.24, "elapsed_time": "3:59:42", "remaining_time": "0:25:55", "throughput": 8650.77, "total_tokens": 124423080} +{"current_steps": 184695, "total_steps": 204665, "loss": 0.0, "lr": 5.7450870491846715e-08, "epoch": 4.5121295776024235, "percentage": 90.24, "elapsed_time": "3:59:43", "remaining_time": "0:25:55", "throughput": 8650.79, "total_tokens": 124426344} +{"current_steps": 184700, "total_steps": 204665, "loss": 0.0, "lr": 5.7422385439050095e-08, "epoch": 4.51225172843427, "percentage": 90.25, "elapsed_time": "3:59:43", "remaining_time": "0:25:54", "throughput": 8650.82, "total_tokens": 124429800} +{"current_steps": 184705, "total_steps": 204665, "loss": 0.0, "lr": 5.7393907240931624e-08, "epoch": 4.512373879266118, "percentage": 90.25, "elapsed_time": "3:59:43", "remaining_time": "0:25:54", "throughput": 8650.83, "total_tokens": 124433000} +{"current_steps": 184710, "total_steps": 204665, "loss": 0.0, "lr": 5.736543589769816e-08, "epoch": 4.512496030097965, "percentage": 90.25, "elapsed_time": "3:59:44", "remaining_time": "0:25:53", "throughput": 8650.86, "total_tokens": 124436392} +{"current_steps": 184715, "total_steps": 204665, "loss": 0.0, "lr": 5.733697140955662e-08, "epoch": 4.512618180929812, "percentage": 90.25, "elapsed_time": "3:59:44", "remaining_time": "0:25:53", "throughput": 8650.87, "total_tokens": 124439656} +{"current_steps": 184720, "total_steps": 204665, "loss": 0.0003, "lr": 5.73085137767142e-08, "epoch": 4.512740331761659, "percentage": 90.25, "elapsed_time": "3:59:44", "remaining_time": "0:25:53", "throughput": 8650.89, "total_tokens": 124442984} +{"current_steps": 184725, "total_steps": 204665, "loss": 0.0, "lr": 5.728006299937793e-08, "epoch": 4.512862482593507, "percentage": 90.26, "elapsed_time": "3:59:45", "remaining_time": "0:25:52", "throughput": 8650.89, "total_tokens": 124445992} +{"current_steps": 184730, "total_steps": 204665, "loss": 0.0, "lr": 5.7251619077754445e-08, "epoch": 4.512984633425353, "percentage": 90.26, "elapsed_time": "3:59:45", "remaining_time": "0:25:52", "throughput": 8650.9, "total_tokens": 124449256} +{"current_steps": 184735, "total_steps": 204665, "loss": 0.0, "lr": 5.72231820120509e-08, "epoch": 4.513106784257201, "percentage": 90.26, "elapsed_time": "3:59:46", "remaining_time": "0:25:52", "throughput": 8650.91, "total_tokens": 124452264} +{"current_steps": 184740, "total_steps": 204665, "loss": 0.0, "lr": 5.7194751802473793e-08, "epoch": 4.513228935089048, "percentage": 90.26, "elapsed_time": "3:59:46", "remaining_time": "0:25:51", "throughput": 8650.95, "total_tokens": 124455976} +{"current_steps": 184745, "total_steps": 204665, "loss": 0.0, "lr": 5.716632844923008e-08, "epoch": 4.513351085920895, "percentage": 90.27, "elapsed_time": "3:59:46", "remaining_time": "0:25:51", "throughput": 8650.95, "total_tokens": 124458920} +{"current_steps": 184750, "total_steps": 204665, "loss": 0.0, "lr": 5.71379119525266e-08, "epoch": 4.513473236752742, "percentage": 90.27, "elapsed_time": "3:59:47", "remaining_time": "0:25:50", "throughput": 8650.99, "total_tokens": 124462504} +{"current_steps": 184755, "total_steps": 204665, "loss": 0.0, "lr": 5.710950231256961e-08, "epoch": 4.51359538758459, "percentage": 90.27, "elapsed_time": "3:59:47", "remaining_time": "0:25:50", "throughput": 8651.01, "total_tokens": 124465960} +{"current_steps": 184760, "total_steps": 204665, "loss": 0.0, "lr": 5.708109952956608e-08, "epoch": 4.5137175384164365, "percentage": 90.27, "elapsed_time": "3:59:47", "remaining_time": "0:25:50", "throughput": 8651.05, "total_tokens": 124469672} +{"current_steps": 184765, "total_steps": 204665, "loss": 0.0, "lr": 5.705270360372227e-08, "epoch": 4.513839689248284, "percentage": 90.28, "elapsed_time": "3:59:48", "remaining_time": "0:25:49", "throughput": 8651.07, "total_tokens": 124473000} +{"current_steps": 184770, "total_steps": 204665, "loss": 0.0402, "lr": 5.702431453524503e-08, "epoch": 4.513961840080131, "percentage": 90.28, "elapsed_time": "3:59:48", "remaining_time": "0:25:49", "throughput": 8651.1, "total_tokens": 124476328} +{"current_steps": 184775, "total_steps": 204665, "loss": 0.0008, "lr": 5.699593232434041e-08, "epoch": 4.514083990911978, "percentage": 90.28, "elapsed_time": "3:59:48", "remaining_time": "0:25:48", "throughput": 8651.13, "total_tokens": 124479848} +{"current_steps": 184780, "total_steps": 204665, "loss": 0.0, "lr": 5.6967556971215027e-08, "epoch": 4.514206141743825, "percentage": 90.28, "elapsed_time": "3:59:49", "remaining_time": "0:25:48", "throughput": 8651.15, "total_tokens": 124483240} +{"current_steps": 184785, "total_steps": 204665, "loss": 0.0, "lr": 5.693918847607526e-08, "epoch": 4.514328292575673, "percentage": 90.29, "elapsed_time": "3:59:49", "remaining_time": "0:25:48", "throughput": 8651.18, "total_tokens": 124486696} +{"current_steps": 184790, "total_steps": 204665, "loss": 0.0, "lr": 5.691082683912729e-08, "epoch": 4.51445044340752, "percentage": 90.29, "elapsed_time": "3:59:49", "remaining_time": "0:25:47", "throughput": 8651.21, "total_tokens": 124490152} +{"current_steps": 184795, "total_steps": 204665, "loss": 0.0, "lr": 5.688247206057761e-08, "epoch": 4.514572594239366, "percentage": 90.29, "elapsed_time": "3:59:50", "remaining_time": "0:25:47", "throughput": 8651.28, "total_tokens": 124494376} +{"current_steps": 184800, "total_steps": 204665, "loss": 0.0, "lr": 5.6854124140632285e-08, "epoch": 4.514694745071214, "percentage": 90.29, "elapsed_time": "3:59:50", "remaining_time": "0:25:46", "throughput": 8651.31, "total_tokens": 124497960} +{"current_steps": 184805, "total_steps": 204665, "loss": 0.0, "lr": 5.682578307949726e-08, "epoch": 4.514816895903061, "percentage": 90.3, "elapsed_time": "3:59:50", "remaining_time": "0:25:46", "throughput": 8651.31, "total_tokens": 124500904} +{"current_steps": 184810, "total_steps": 204665, "loss": 0.0, "lr": 5.679744887737903e-08, "epoch": 4.514939046734908, "percentage": 90.3, "elapsed_time": "3:59:51", "remaining_time": "0:25:46", "throughput": 8651.33, "total_tokens": 124504360} +{"current_steps": 184815, "total_steps": 204665, "loss": 0.0, "lr": 5.676912153448321e-08, "epoch": 4.515061197566755, "percentage": 90.3, "elapsed_time": "3:59:51", "remaining_time": "0:25:45", "throughput": 8651.37, "total_tokens": 124508072} +{"current_steps": 184820, "total_steps": 204665, "loss": 0.0, "lr": 5.6740801051016197e-08, "epoch": 4.515183348398603, "percentage": 90.3, "elapsed_time": "3:59:52", "remaining_time": "0:25:45", "throughput": 8651.39, "total_tokens": 124511336} +{"current_steps": 184825, "total_steps": 204665, "loss": 0.0, "lr": 5.671248742718371e-08, "epoch": 4.5153054992304495, "percentage": 90.31, "elapsed_time": "3:59:52", "remaining_time": "0:25:44", "throughput": 8651.45, "total_tokens": 124515432} +{"current_steps": 184830, "total_steps": 204665, "loss": 0.0, "lr": 5.66841806631918e-08, "epoch": 4.515427650062297, "percentage": 90.31, "elapsed_time": "3:59:52", "remaining_time": "0:25:44", "throughput": 8651.48, "total_tokens": 124519016} +{"current_steps": 184835, "total_steps": 204665, "loss": 0.0, "lr": 5.66558807592461e-08, "epoch": 4.515549800894144, "percentage": 90.31, "elapsed_time": "3:59:53", "remaining_time": "0:25:44", "throughput": 8651.5, "total_tokens": 124522152} +{"current_steps": 184840, "total_steps": 204665, "loss": 0.0, "lr": 5.662758771555265e-08, "epoch": 4.515671951725992, "percentage": 90.31, "elapsed_time": "3:59:53", "remaining_time": "0:25:43", "throughput": 8651.52, "total_tokens": 124525480} +{"current_steps": 184845, "total_steps": 204665, "loss": 0.0, "lr": 5.659930153231718e-08, "epoch": 4.515794102557838, "percentage": 90.32, "elapsed_time": "3:59:53", "remaining_time": "0:25:43", "throughput": 8651.55, "total_tokens": 124528808} +{"current_steps": 184850, "total_steps": 204665, "loss": 0.0, "lr": 5.657102220974519e-08, "epoch": 4.515916253389686, "percentage": 90.32, "elapsed_time": "3:59:54", "remaining_time": "0:25:42", "throughput": 8651.59, "total_tokens": 124532456} +{"current_steps": 184855, "total_steps": 204665, "loss": 0.0, "lr": 5.654274974804263e-08, "epoch": 4.516038404221533, "percentage": 90.32, "elapsed_time": "3:59:54", "remaining_time": "0:25:42", "throughput": 8651.62, "total_tokens": 124535976} +{"current_steps": 184860, "total_steps": 204665, "loss": 0.0, "lr": 5.651448414741489e-08, "epoch": 4.51616055505338, "percentage": 90.32, "elapsed_time": "3:59:54", "remaining_time": "0:25:42", "throughput": 8651.63, "total_tokens": 124539048} +{"current_steps": 184865, "total_steps": 204665, "loss": 0.0, "lr": 5.648622540806758e-08, "epoch": 4.516282705885227, "percentage": 90.33, "elapsed_time": "3:59:55", "remaining_time": "0:25:41", "throughput": 8651.66, "total_tokens": 124542568} +{"current_steps": 184870, "total_steps": 204665, "loss": 0.0, "lr": 5.6457973530206206e-08, "epoch": 4.516404856717074, "percentage": 90.33, "elapsed_time": "3:59:55", "remaining_time": "0:25:41", "throughput": 8651.68, "total_tokens": 124545832} +{"current_steps": 184875, "total_steps": 204665, "loss": 0.0, "lr": 5.6429728514036154e-08, "epoch": 4.5165270075489214, "percentage": 90.33, "elapsed_time": "3:59:55", "remaining_time": "0:25:41", "throughput": 8651.7, "total_tokens": 124549032} +{"current_steps": 184880, "total_steps": 204665, "loss": 0.0, "lr": 5.640149035976305e-08, "epoch": 4.516649158380768, "percentage": 90.33, "elapsed_time": "3:59:56", "remaining_time": "0:25:40", "throughput": 8651.74, "total_tokens": 124552616} +{"current_steps": 184885, "total_steps": 204665, "loss": 0.0, "lr": 5.637325906759205e-08, "epoch": 4.516771309212616, "percentage": 90.34, "elapsed_time": "3:59:56", "remaining_time": "0:25:40", "throughput": 8651.74, "total_tokens": 124555624} +{"current_steps": 184890, "total_steps": 204665, "loss": 0.0, "lr": 5.634503463772855e-08, "epoch": 4.516893460044463, "percentage": 90.34, "elapsed_time": "3:59:56", "remaining_time": "0:25:39", "throughput": 8651.74, "total_tokens": 124558632} +{"current_steps": 184895, "total_steps": 204665, "loss": 0.0, "lr": 5.631681707037772e-08, "epoch": 4.51701561087631, "percentage": 90.34, "elapsed_time": "3:59:57", "remaining_time": "0:25:39", "throughput": 8651.76, "total_tokens": 124561832} +{"current_steps": 184900, "total_steps": 204665, "loss": 0.0, "lr": 5.628860636574495e-08, "epoch": 4.517137761708157, "percentage": 90.34, "elapsed_time": "3:59:57", "remaining_time": "0:25:39", "throughput": 8651.79, "total_tokens": 124565352} +{"current_steps": 184905, "total_steps": 204665, "loss": 0.0, "lr": 5.626040252403519e-08, "epoch": 4.517259912540005, "percentage": 90.35, "elapsed_time": "3:59:57", "remaining_time": "0:25:38", "throughput": 8651.81, "total_tokens": 124568680} +{"current_steps": 184910, "total_steps": 204665, "loss": 0.0, "lr": 5.623220554545349e-08, "epoch": 4.517382063371851, "percentage": 90.35, "elapsed_time": "3:59:58", "remaining_time": "0:25:38", "throughput": 8651.83, "total_tokens": 124571944} +{"current_steps": 184915, "total_steps": 204665, "loss": 0.0, "lr": 5.6204015430205254e-08, "epoch": 4.517504214203699, "percentage": 90.35, "elapsed_time": "3:59:58", "remaining_time": "0:25:37", "throughput": 8651.89, "total_tokens": 124575848} +{"current_steps": 184920, "total_steps": 204665, "loss": 0.0, "lr": 5.6175832178495086e-08, "epoch": 4.517626365035546, "percentage": 90.35, "elapsed_time": "3:59:59", "remaining_time": "0:25:37", "throughput": 8651.91, "total_tokens": 124579048} +{"current_steps": 184925, "total_steps": 204665, "loss": 0.0, "lr": 5.614765579052827e-08, "epoch": 4.517748515867393, "percentage": 90.35, "elapsed_time": "3:59:59", "remaining_time": "0:25:37", "throughput": 8651.92, "total_tokens": 124582312} +{"current_steps": 184930, "total_steps": 204665, "loss": 0.0, "lr": 5.6119486266509306e-08, "epoch": 4.51787066669924, "percentage": 90.36, "elapsed_time": "3:59:59", "remaining_time": "0:25:36", "throughput": 8651.95, "total_tokens": 124585896} +{"current_steps": 184935, "total_steps": 204665, "loss": 0.0, "lr": 5.6091323606643484e-08, "epoch": 4.517992817531088, "percentage": 90.36, "elapsed_time": "4:00:00", "remaining_time": "0:25:36", "throughput": 8652.03, "total_tokens": 124590248} +{"current_steps": 184940, "total_steps": 204665, "loss": 0.0, "lr": 5.606316781113551e-08, "epoch": 4.5181149683629345, "percentage": 90.36, "elapsed_time": "4:00:00", "remaining_time": "0:25:35", "throughput": 8652.07, "total_tokens": 124593960} +{"current_steps": 184945, "total_steps": 204665, "loss": 0.0, "lr": 5.603501888018991e-08, "epoch": 4.518237119194782, "percentage": 90.36, "elapsed_time": "4:00:00", "remaining_time": "0:25:35", "throughput": 8652.07, "total_tokens": 124596968} +{"current_steps": 184950, "total_steps": 204665, "loss": 0.0, "lr": 5.6006876814011725e-08, "epoch": 4.518359270026629, "percentage": 90.37, "elapsed_time": "4:00:01", "remaining_time": "0:25:35", "throughput": 8652.09, "total_tokens": 124600232} +{"current_steps": 184955, "total_steps": 204665, "loss": 0.0, "lr": 5.5978741612805244e-08, "epoch": 4.5184814208584765, "percentage": 90.37, "elapsed_time": "4:00:01", "remaining_time": "0:25:34", "throughput": 8652.12, "total_tokens": 124603624} +{"current_steps": 184960, "total_steps": 204665, "loss": 0.0, "lr": 5.5950613276775415e-08, "epoch": 4.518603571690323, "percentage": 90.37, "elapsed_time": "4:00:01", "remaining_time": "0:25:34", "throughput": 8652.14, "total_tokens": 124607016} +{"current_steps": 184965, "total_steps": 204665, "loss": 0.0, "lr": 5.5922491806126514e-08, "epoch": 4.51872572252217, "percentage": 90.37, "elapsed_time": "4:00:02", "remaining_time": "0:25:33", "throughput": 8652.17, "total_tokens": 124610344} +{"current_steps": 184970, "total_steps": 204665, "loss": 0.0, "lr": 5.589437720106327e-08, "epoch": 4.518847873354018, "percentage": 90.38, "elapsed_time": "4:00:02", "remaining_time": "0:25:33", "throughput": 8652.19, "total_tokens": 124613736} +{"current_steps": 184975, "total_steps": 204665, "loss": 0.0, "lr": 5.586626946179007e-08, "epoch": 4.518970024185864, "percentage": 90.38, "elapsed_time": "4:00:02", "remaining_time": "0:25:33", "throughput": 8652.22, "total_tokens": 124617128} +{"current_steps": 184980, "total_steps": 204665, "loss": 0.0, "lr": 5.58381685885112e-08, "epoch": 4.519092175017712, "percentage": 90.38, "elapsed_time": "4:00:03", "remaining_time": "0:25:32", "throughput": 8652.24, "total_tokens": 124620392} +{"current_steps": 184985, "total_steps": 204665, "loss": 0.0, "lr": 5.581007458143128e-08, "epoch": 4.519214325849559, "percentage": 90.38, "elapsed_time": "4:00:03", "remaining_time": "0:25:32", "throughput": 8652.26, "total_tokens": 124623784} +{"current_steps": 184990, "total_steps": 204665, "loss": 0.0, "lr": 5.578198744075424e-08, "epoch": 4.519336476681406, "percentage": 90.39, "elapsed_time": "4:00:03", "remaining_time": "0:25:31", "throughput": 8652.29, "total_tokens": 124627112} +{"current_steps": 184995, "total_steps": 204665, "loss": 0.0, "lr": 5.57539071666846e-08, "epoch": 4.519458627513253, "percentage": 90.39, "elapsed_time": "4:00:04", "remaining_time": "0:25:31", "throughput": 8652.3, "total_tokens": 124630376} +{"current_steps": 185000, "total_steps": 204665, "loss": 0.0, "lr": 5.572583375942675e-08, "epoch": 4.519580778345101, "percentage": 90.39, "elapsed_time": "4:00:04", "remaining_time": "0:25:31", "throughput": 8652.33, "total_tokens": 124633832} +{"current_steps": 185005, "total_steps": 204665, "loss": 0.0, "lr": 5.569776721918451e-08, "epoch": 4.5197029291769475, "percentage": 90.39, "elapsed_time": "4:00:04", "remaining_time": "0:25:30", "throughput": 8652.36, "total_tokens": 124637288} +{"current_steps": 185010, "total_steps": 204665, "loss": 0.0001, "lr": 5.566970754616196e-08, "epoch": 4.519825080008795, "percentage": 90.4, "elapsed_time": "4:00:05", "remaining_time": "0:25:30", "throughput": 8652.37, "total_tokens": 124640424} +{"current_steps": 185015, "total_steps": 204665, "loss": 0.0, "lr": 5.564165474056337e-08, "epoch": 4.519947230840642, "percentage": 90.4, "elapsed_time": "4:00:05", "remaining_time": "0:25:29", "throughput": 8652.41, "total_tokens": 124643944} +{"current_steps": 185020, "total_steps": 204665, "loss": 0.0, "lr": 5.5613608802592806e-08, "epoch": 4.5200693816724895, "percentage": 90.4, "elapsed_time": "4:00:06", "remaining_time": "0:25:29", "throughput": 8652.45, "total_tokens": 124647528} +{"current_steps": 185025, "total_steps": 204665, "loss": 0.0, "lr": 5.558556973245387e-08, "epoch": 4.520191532504336, "percentage": 90.4, "elapsed_time": "4:00:06", "remaining_time": "0:25:29", "throughput": 8652.48, "total_tokens": 124651048} +{"current_steps": 185030, "total_steps": 204665, "loss": 0.0, "lr": 5.555753753035064e-08, "epoch": 4.520313683336184, "percentage": 90.41, "elapsed_time": "4:00:06", "remaining_time": "0:25:28", "throughput": 8652.51, "total_tokens": 124654568} +{"current_steps": 185035, "total_steps": 204665, "loss": 0.0, "lr": 5.552951219648727e-08, "epoch": 4.520435834168031, "percentage": 90.41, "elapsed_time": "4:00:07", "remaining_time": "0:25:28", "throughput": 8652.52, "total_tokens": 124657704} +{"current_steps": 185040, "total_steps": 204665, "loss": 0.0, "lr": 5.550149373106716e-08, "epoch": 4.520557984999877, "percentage": 90.41, "elapsed_time": "4:00:07", "remaining_time": "0:25:28", "throughput": 8652.58, "total_tokens": 124661672} +{"current_steps": 185045, "total_steps": 204665, "loss": 0.0, "lr": 5.547348213429437e-08, "epoch": 4.520680135831725, "percentage": 90.41, "elapsed_time": "4:00:07", "remaining_time": "0:25:27", "throughput": 8652.61, "total_tokens": 124665128} +{"current_steps": 185050, "total_steps": 204665, "loss": 0.0, "lr": 5.544547740637229e-08, "epoch": 4.520802286663573, "percentage": 90.42, "elapsed_time": "4:00:08", "remaining_time": "0:25:27", "throughput": 8652.65, "total_tokens": 124668776} +{"current_steps": 185055, "total_steps": 204665, "loss": 0.0, "lr": 5.5417479547504756e-08, "epoch": 4.520924437495419, "percentage": 90.42, "elapsed_time": "4:00:08", "remaining_time": "0:25:26", "throughput": 8652.7, "total_tokens": 124672488} +{"current_steps": 185060, "total_steps": 204665, "loss": 0.0, "lr": 5.538948855789549e-08, "epoch": 4.521046588327266, "percentage": 90.42, "elapsed_time": "4:00:08", "remaining_time": "0:25:26", "throughput": 8652.71, "total_tokens": 124675688} +{"current_steps": 185065, "total_steps": 204665, "loss": 0.0, "lr": 5.536150443774779e-08, "epoch": 4.521168739159114, "percentage": 90.42, "elapsed_time": "4:00:09", "remaining_time": "0:25:26", "throughput": 8652.73, "total_tokens": 124679016} +{"current_steps": 185070, "total_steps": 204665, "loss": 0.0, "lr": 5.5333527187265474e-08, "epoch": 4.5212908899909605, "percentage": 90.43, "elapsed_time": "4:00:09", "remaining_time": "0:25:25", "throughput": 8652.77, "total_tokens": 124682664} +{"current_steps": 185075, "total_steps": 204665, "loss": 0.0, "lr": 5.530555680665172e-08, "epoch": 4.521413040822808, "percentage": 90.43, "elapsed_time": "4:00:09", "remaining_time": "0:25:25", "throughput": 8652.77, "total_tokens": 124685544} +{"current_steps": 185080, "total_steps": 204665, "loss": 0.0, "lr": 5.5277593296110145e-08, "epoch": 4.521535191654655, "percentage": 90.43, "elapsed_time": "4:00:10", "remaining_time": "0:25:24", "throughput": 8652.82, "total_tokens": 124689384} +{"current_steps": 185085, "total_steps": 204665, "loss": 0.0, "lr": 5.5249636655843924e-08, "epoch": 4.5216573424865025, "percentage": 90.43, "elapsed_time": "4:00:10", "remaining_time": "0:25:24", "throughput": 8652.83, "total_tokens": 124692456} +{"current_steps": 185090, "total_steps": 204665, "loss": 0.0, "lr": 5.5221686886056326e-08, "epoch": 4.521779493318349, "percentage": 90.44, "elapsed_time": "4:00:10", "remaining_time": "0:25:24", "throughput": 8652.85, "total_tokens": 124695784} +{"current_steps": 185095, "total_steps": 204665, "loss": 0.0, "lr": 5.519374398695098e-08, "epoch": 4.521901644150197, "percentage": 90.44, "elapsed_time": "4:00:11", "remaining_time": "0:25:23", "throughput": 8652.9, "total_tokens": 124699624} +{"current_steps": 185100, "total_steps": 204665, "loss": 0.0713, "lr": 5.516580795873071e-08, "epoch": 4.522023794982044, "percentage": 90.44, "elapsed_time": "4:00:11", "remaining_time": "0:25:23", "throughput": 8652.92, "total_tokens": 124702888} +{"current_steps": 185105, "total_steps": 204665, "loss": 0.0463, "lr": 5.513787880159892e-08, "epoch": 4.522145945813891, "percentage": 90.44, "elapsed_time": "4:00:11", "remaining_time": "0:25:22", "throughput": 8652.93, "total_tokens": 124705960} +{"current_steps": 185110, "total_steps": 204665, "loss": 0.0, "lr": 5.5109956515758674e-08, "epoch": 4.522268096645738, "percentage": 90.45, "elapsed_time": "4:00:12", "remaining_time": "0:25:22", "throughput": 8652.95, "total_tokens": 124709160} +{"current_steps": 185115, "total_steps": 204665, "loss": 0.0, "lr": 5.508204110141279e-08, "epoch": 4.522390247477586, "percentage": 90.45, "elapsed_time": "4:00:12", "remaining_time": "0:25:22", "throughput": 8652.96, "total_tokens": 124712296} +{"current_steps": 185120, "total_steps": 204665, "loss": 0.0, "lr": 5.505413255876457e-08, "epoch": 4.522512398309432, "percentage": 90.45, "elapsed_time": "4:00:13", "remaining_time": "0:25:21", "throughput": 8653.03, "total_tokens": 124716456} +{"current_steps": 185125, "total_steps": 204665, "loss": 0.0, "lr": 5.502623088801672e-08, "epoch": 4.52263454914128, "percentage": 90.45, "elapsed_time": "4:00:13", "remaining_time": "0:25:21", "throughput": 8653.05, "total_tokens": 124719784} +{"current_steps": 185130, "total_steps": 204665, "loss": 0.0, "lr": 5.4998336089372546e-08, "epoch": 4.522756699973127, "percentage": 90.46, "elapsed_time": "4:00:13", "remaining_time": "0:25:20", "throughput": 8653.09, "total_tokens": 124723368} +{"current_steps": 185135, "total_steps": 204665, "loss": 0.0, "lr": 5.497044816303442e-08, "epoch": 4.5228788508049735, "percentage": 90.46, "elapsed_time": "4:00:14", "remaining_time": "0:25:20", "throughput": 8653.12, "total_tokens": 124726888} +{"current_steps": 185140, "total_steps": 204665, "loss": 0.0, "lr": 5.494256710920542e-08, "epoch": 4.523001001636821, "percentage": 90.46, "elapsed_time": "4:00:14", "remaining_time": "0:25:20", "throughput": 8653.14, "total_tokens": 124730216} +{"current_steps": 185145, "total_steps": 204665, "loss": 0.0, "lr": 5.4914692928088257e-08, "epoch": 4.523123152468669, "percentage": 90.46, "elapsed_time": "4:00:14", "remaining_time": "0:25:19", "throughput": 8653.16, "total_tokens": 124733544} +{"current_steps": 185150, "total_steps": 204665, "loss": 0.0, "lr": 5.488682561988556e-08, "epoch": 4.5232453033005156, "percentage": 90.46, "elapsed_time": "4:00:15", "remaining_time": "0:25:19", "throughput": 8653.17, "total_tokens": 124736552} +{"current_steps": 185155, "total_steps": 204665, "loss": 0.0, "lr": 5.485896518480026e-08, "epoch": 4.523367454132362, "percentage": 90.47, "elapsed_time": "4:00:15", "remaining_time": "0:25:18", "throughput": 8653.19, "total_tokens": 124739944} +{"current_steps": 185160, "total_steps": 204665, "loss": 0.0, "lr": 5.483111162303466e-08, "epoch": 4.52348960496421, "percentage": 90.47, "elapsed_time": "4:00:15", "remaining_time": "0:25:18", "throughput": 8653.21, "total_tokens": 124743144} +{"current_steps": 185165, "total_steps": 204665, "loss": 0.0, "lr": 5.480326493479148e-08, "epoch": 4.523611755796057, "percentage": 90.47, "elapsed_time": "4:00:16", "remaining_time": "0:25:18", "throughput": 8653.25, "total_tokens": 124746856} +{"current_steps": 185170, "total_steps": 204665, "loss": 0.0, "lr": 5.477542512027311e-08, "epoch": 4.523733906627904, "percentage": 90.47, "elapsed_time": "4:00:16", "remaining_time": "0:25:17", "throughput": 8653.26, "total_tokens": 124749992} +{"current_steps": 185175, "total_steps": 204665, "loss": 0.0, "lr": 5.474759217968228e-08, "epoch": 4.523856057459751, "percentage": 90.48, "elapsed_time": "4:00:16", "remaining_time": "0:25:17", "throughput": 8653.3, "total_tokens": 124753512} +{"current_steps": 185180, "total_steps": 204665, "loss": 0.0, "lr": 5.4719766113220936e-08, "epoch": 4.523978208291599, "percentage": 90.48, "elapsed_time": "4:00:17", "remaining_time": "0:25:17", "throughput": 8653.32, "total_tokens": 124756904} +{"current_steps": 185185, "total_steps": 204665, "loss": 0.0, "lr": 5.4691946921091804e-08, "epoch": 4.524100359123445, "percentage": 90.48, "elapsed_time": "4:00:17", "remaining_time": "0:25:16", "throughput": 8653.33, "total_tokens": 124759976} +{"current_steps": 185190, "total_steps": 204665, "loss": 0.0, "lr": 5.4664134603497166e-08, "epoch": 4.524222509955293, "percentage": 90.48, "elapsed_time": "4:00:17", "remaining_time": "0:25:16", "throughput": 8653.34, "total_tokens": 124763112} +{"current_steps": 185195, "total_steps": 204665, "loss": 0.0, "lr": 5.463632916063909e-08, "epoch": 4.52434466078714, "percentage": 90.49, "elapsed_time": "4:00:18", "remaining_time": "0:25:15", "throughput": 8653.37, "total_tokens": 124766568} +{"current_steps": 185200, "total_steps": 204665, "loss": 0.0, "lr": 5.460853059272008e-08, "epoch": 4.5244668116189874, "percentage": 90.49, "elapsed_time": "4:00:18", "remaining_time": "0:25:15", "throughput": 8653.38, "total_tokens": 124769640} +{"current_steps": 185205, "total_steps": 204665, "loss": 0.0, "lr": 5.458073889994197e-08, "epoch": 4.524588962450834, "percentage": 90.49, "elapsed_time": "4:00:18", "remaining_time": "0:25:15", "throughput": 8653.4, "total_tokens": 124772904} +{"current_steps": 185210, "total_steps": 204665, "loss": 0.0, "lr": 5.4552954082507154e-08, "epoch": 4.524711113282682, "percentage": 90.49, "elapsed_time": "4:00:19", "remaining_time": "0:25:14", "throughput": 8653.44, "total_tokens": 124776552} +{"current_steps": 185215, "total_steps": 204665, "loss": 0.0, "lr": 5.452517614061736e-08, "epoch": 4.524833264114529, "percentage": 90.5, "elapsed_time": "4:00:19", "remaining_time": "0:25:14", "throughput": 8653.47, "total_tokens": 124780072} +{"current_steps": 185220, "total_steps": 204665, "loss": 0.0, "lr": 5.4497405074474976e-08, "epoch": 4.524955414946376, "percentage": 90.5, "elapsed_time": "4:00:19", "remaining_time": "0:25:13", "throughput": 8653.49, "total_tokens": 124783208} +{"current_steps": 185225, "total_steps": 204665, "loss": 0.0, "lr": 5.446964088428174e-08, "epoch": 4.525077565778223, "percentage": 90.5, "elapsed_time": "4:00:20", "remaining_time": "0:25:13", "throughput": 8653.5, "total_tokens": 124786408} +{"current_steps": 185230, "total_steps": 204665, "loss": 0.0, "lr": 5.444188357023938e-08, "epoch": 4.52519971661007, "percentage": 90.5, "elapsed_time": "4:00:20", "remaining_time": "0:25:13", "throughput": 8653.53, "total_tokens": 124789800} +{"current_steps": 185235, "total_steps": 204665, "loss": 0.0, "lr": 5.441413313255028e-08, "epoch": 4.525321867441917, "percentage": 90.51, "elapsed_time": "4:00:21", "remaining_time": "0:25:12", "throughput": 8653.55, "total_tokens": 124793192} +{"current_steps": 185240, "total_steps": 204665, "loss": 0.0, "lr": 5.4386389571415616e-08, "epoch": 4.525444018273764, "percentage": 90.51, "elapsed_time": "4:00:21", "remaining_time": "0:25:12", "throughput": 8653.57, "total_tokens": 124796392} +{"current_steps": 185245, "total_steps": 204665, "loss": 0.0, "lr": 5.435865288703756e-08, "epoch": 4.525566169105612, "percentage": 90.51, "elapsed_time": "4:00:21", "remaining_time": "0:25:11", "throughput": 8653.59, "total_tokens": 124799592} +{"current_steps": 185250, "total_steps": 204665, "loss": 0.0, "lr": 5.433092307961784e-08, "epoch": 4.5256883199374585, "percentage": 90.51, "elapsed_time": "4:00:22", "remaining_time": "0:25:11", "throughput": 8653.58, "total_tokens": 124802472} +{"current_steps": 185255, "total_steps": 204665, "loss": 0.0, "lr": 5.4303200149357966e-08, "epoch": 4.525810470769306, "percentage": 90.52, "elapsed_time": "4:00:22", "remaining_time": "0:25:11", "throughput": 8653.59, "total_tokens": 124805480} +{"current_steps": 185260, "total_steps": 204665, "loss": 0.0, "lr": 5.4275484096459546e-08, "epoch": 4.525932621601153, "percentage": 90.52, "elapsed_time": "4:00:22", "remaining_time": "0:25:10", "throughput": 8653.67, "total_tokens": 124809896} +{"current_steps": 185265, "total_steps": 204665, "loss": 0.0, "lr": 5.42477749211242e-08, "epoch": 4.5260547724330005, "percentage": 90.52, "elapsed_time": "4:00:23", "remaining_time": "0:25:10", "throughput": 8653.72, "total_tokens": 124813672} +{"current_steps": 185270, "total_steps": 204665, "loss": 0.0, "lr": 5.422007262355344e-08, "epoch": 4.526176923264847, "percentage": 90.52, "elapsed_time": "4:00:23", "remaining_time": "0:25:09", "throughput": 8653.72, "total_tokens": 124816616} +{"current_steps": 185275, "total_steps": 204665, "loss": 0.0, "lr": 5.419237720394865e-08, "epoch": 4.526299074096695, "percentage": 90.53, "elapsed_time": "4:00:23", "remaining_time": "0:25:09", "throughput": 8653.77, "total_tokens": 124820456} +{"current_steps": 185280, "total_steps": 204665, "loss": 0.0, "lr": 5.416468866251123e-08, "epoch": 4.526421224928542, "percentage": 90.53, "elapsed_time": "4:00:24", "remaining_time": "0:25:09", "throughput": 8653.78, "total_tokens": 124823656} +{"current_steps": 185285, "total_steps": 204665, "loss": 0.0305, "lr": 5.413700699944268e-08, "epoch": 4.526543375760389, "percentage": 90.53, "elapsed_time": "4:00:24", "remaining_time": "0:25:08", "throughput": 8653.79, "total_tokens": 124826792} +{"current_steps": 185290, "total_steps": 204665, "loss": 0.0001, "lr": 5.4109332214944184e-08, "epoch": 4.526665526592236, "percentage": 90.53, "elapsed_time": "4:00:24", "remaining_time": "0:25:08", "throughput": 8653.81, "total_tokens": 124829928} +{"current_steps": 185295, "total_steps": 204665, "loss": 0.0, "lr": 5.4081664309217126e-08, "epoch": 4.526787677424084, "percentage": 90.54, "elapsed_time": "4:00:25", "remaining_time": "0:25:07", "throughput": 8653.8, "total_tokens": 124832808} +{"current_steps": 185300, "total_steps": 204665, "loss": 0.0, "lr": 5.405400328246246e-08, "epoch": 4.52690982825593, "percentage": 90.54, "elapsed_time": "4:00:25", "remaining_time": "0:25:07", "throughput": 8653.82, "total_tokens": 124835944} +{"current_steps": 185305, "total_steps": 204665, "loss": 0.0, "lr": 5.402634913488158e-08, "epoch": 4.527031979087777, "percentage": 90.54, "elapsed_time": "4:00:25", "remaining_time": "0:25:07", "throughput": 8653.86, "total_tokens": 124839656} +{"current_steps": 185310, "total_steps": 204665, "loss": 0.0, "lr": 5.399870186667554e-08, "epoch": 4.527154129919625, "percentage": 90.54, "elapsed_time": "4:00:26", "remaining_time": "0:25:06", "throughput": 8653.87, "total_tokens": 124842728} +{"current_steps": 185315, "total_steps": 204665, "loss": 0.0, "lr": 5.3971061478045533e-08, "epoch": 4.527276280751472, "percentage": 90.55, "elapsed_time": "4:00:26", "remaining_time": "0:25:06", "throughput": 8653.88, "total_tokens": 124845864} +{"current_steps": 185320, "total_steps": 204665, "loss": 0.0, "lr": 5.3943427969192154e-08, "epoch": 4.527398431583319, "percentage": 90.55, "elapsed_time": "4:00:26", "remaining_time": "0:25:05", "throughput": 8653.92, "total_tokens": 124849512} +{"current_steps": 185325, "total_steps": 204665, "loss": 0.0, "lr": 5.391580134031681e-08, "epoch": 4.527520582415166, "percentage": 90.55, "elapsed_time": "4:00:27", "remaining_time": "0:25:05", "throughput": 8653.95, "total_tokens": 124852904} +{"current_steps": 185330, "total_steps": 204665, "loss": 0.0, "lr": 5.388818159162034e-08, "epoch": 4.5276427332470135, "percentage": 90.55, "elapsed_time": "4:00:27", "remaining_time": "0:25:05", "throughput": 8653.99, "total_tokens": 124856552} +{"current_steps": 185335, "total_steps": 204665, "loss": 0.0, "lr": 5.386056872330325e-08, "epoch": 4.52776488407886, "percentage": 90.56, "elapsed_time": "4:00:27", "remaining_time": "0:25:04", "throughput": 8654.01, "total_tokens": 124859880} +{"current_steps": 185340, "total_steps": 204665, "loss": 0.0072, "lr": 5.383296273556648e-08, "epoch": 4.527887034910708, "percentage": 90.56, "elapsed_time": "4:00:28", "remaining_time": "0:25:04", "throughput": 8654.02, "total_tokens": 124862952} +{"current_steps": 185345, "total_steps": 204665, "loss": 0.0, "lr": 5.380536362861121e-08, "epoch": 4.528009185742555, "percentage": 90.56, "elapsed_time": "4:00:28", "remaining_time": "0:25:04", "throughput": 8654.03, "total_tokens": 124866024} +{"current_steps": 185350, "total_steps": 204665, "loss": 0.0, "lr": 5.377777140263762e-08, "epoch": 4.528131336574402, "percentage": 90.56, "elapsed_time": "4:00:29", "remaining_time": "0:25:03", "throughput": 8654.07, "total_tokens": 124869672} +{"current_steps": 185355, "total_steps": 204665, "loss": 0.0, "lr": 5.375018605784665e-08, "epoch": 4.528253487406249, "percentage": 90.57, "elapsed_time": "4:00:29", "remaining_time": "0:25:03", "throughput": 8654.1, "total_tokens": 124873064} +{"current_steps": 185360, "total_steps": 204665, "loss": 0.0, "lr": 5.372260759443881e-08, "epoch": 4.528375638238097, "percentage": 90.57, "elapsed_time": "4:00:29", "remaining_time": "0:25:02", "throughput": 8654.11, "total_tokens": 124876136} +{"current_steps": 185365, "total_steps": 204665, "loss": 0.0, "lr": 5.36950360126146e-08, "epoch": 4.528497789069943, "percentage": 90.57, "elapsed_time": "4:00:30", "remaining_time": "0:25:02", "throughput": 8654.14, "total_tokens": 124879592} +{"current_steps": 185370, "total_steps": 204665, "loss": 0.0, "lr": 5.3667471312574766e-08, "epoch": 4.528619939901791, "percentage": 90.57, "elapsed_time": "4:00:30", "remaining_time": "0:25:02", "throughput": 8654.17, "total_tokens": 124883048} +{"current_steps": 185375, "total_steps": 204665, "loss": 0.0, "lr": 5.363991349451957e-08, "epoch": 4.528742090733638, "percentage": 90.57, "elapsed_time": "4:00:30", "remaining_time": "0:25:01", "throughput": 8654.23, "total_tokens": 124887080} +{"current_steps": 185380, "total_steps": 204665, "loss": 0.0, "lr": 5.3612362558649536e-08, "epoch": 4.528864241565485, "percentage": 90.58, "elapsed_time": "4:00:31", "remaining_time": "0:25:01", "throughput": 8654.24, "total_tokens": 124890152} +{"current_steps": 185385, "total_steps": 204665, "loss": 0.0, "lr": 5.358481850516483e-08, "epoch": 4.528986392397332, "percentage": 90.58, "elapsed_time": "4:00:31", "remaining_time": "0:25:00", "throughput": 8654.24, "total_tokens": 124893096} +{"current_steps": 185390, "total_steps": 204665, "loss": 0.0, "lr": 5.3557281334265957e-08, "epoch": 4.52910854322918, "percentage": 90.58, "elapsed_time": "4:00:31", "remaining_time": "0:25:00", "throughput": 8654.26, "total_tokens": 124896360} +{"current_steps": 185395, "total_steps": 204665, "loss": 0.0489, "lr": 5.352975104615298e-08, "epoch": 4.5292306940610265, "percentage": 90.58, "elapsed_time": "4:00:32", "remaining_time": "0:25:00", "throughput": 8654.27, "total_tokens": 124899560} +{"current_steps": 185400, "total_steps": 204665, "loss": 0.0, "lr": 5.35022276410263e-08, "epoch": 4.529352844892873, "percentage": 90.59, "elapsed_time": "4:00:32", "remaining_time": "0:24:59", "throughput": 8654.31, "total_tokens": 124903144} +{"current_steps": 185405, "total_steps": 204665, "loss": 0.0, "lr": 5.347471111908608e-08, "epoch": 4.529474995724721, "percentage": 90.59, "elapsed_time": "4:00:32", "remaining_time": "0:24:59", "throughput": 8654.32, "total_tokens": 124906280} +{"current_steps": 185410, "total_steps": 204665, "loss": 0.0, "lr": 5.3447201480532164e-08, "epoch": 4.5295971465565685, "percentage": 90.59, "elapsed_time": "4:00:33", "remaining_time": "0:24:58", "throughput": 8654.35, "total_tokens": 124909800} +{"current_steps": 185415, "total_steps": 204665, "loss": 0.0, "lr": 5.3419698725564956e-08, "epoch": 4.529719297388415, "percentage": 90.59, "elapsed_time": "4:00:33", "remaining_time": "0:24:58", "throughput": 8654.37, "total_tokens": 124912936} +{"current_steps": 185420, "total_steps": 204665, "loss": 0.0, "lr": 5.3392202854384284e-08, "epoch": 4.529841448220262, "percentage": 90.6, "elapsed_time": "4:00:33", "remaining_time": "0:24:58", "throughput": 8654.39, "total_tokens": 124916264} +{"current_steps": 185425, "total_steps": 204665, "loss": 0.0, "lr": 5.3364713867189995e-08, "epoch": 4.52996359905211, "percentage": 90.6, "elapsed_time": "4:00:34", "remaining_time": "0:24:57", "throughput": 8654.39, "total_tokens": 124919208} +{"current_steps": 185430, "total_steps": 204665, "loss": 0.0, "lr": 5.3337231764182366e-08, "epoch": 4.530085749883956, "percentage": 90.6, "elapsed_time": "4:00:34", "remaining_time": "0:24:57", "throughput": 8654.42, "total_tokens": 124922664} +{"current_steps": 185435, "total_steps": 204665, "loss": 0.0, "lr": 5.3309756545560694e-08, "epoch": 4.530207900715804, "percentage": 90.6, "elapsed_time": "4:00:34", "remaining_time": "0:24:56", "throughput": 8654.44, "total_tokens": 124926056} +{"current_steps": 185440, "total_steps": 204665, "loss": 0.0, "lr": 5.328228821152536e-08, "epoch": 4.530330051547651, "percentage": 90.61, "elapsed_time": "4:00:35", "remaining_time": "0:24:56", "throughput": 8654.48, "total_tokens": 124929576} +{"current_steps": 185445, "total_steps": 204665, "loss": 0.0, "lr": 5.325482676227566e-08, "epoch": 4.530452202379498, "percentage": 90.61, "elapsed_time": "4:00:35", "remaining_time": "0:24:56", "throughput": 8654.48, "total_tokens": 124932584} +{"current_steps": 185450, "total_steps": 204665, "loss": 0.0, "lr": 5.3227372198011657e-08, "epoch": 4.530574353211345, "percentage": 90.61, "elapsed_time": "4:00:35", "remaining_time": "0:24:55", "throughput": 8654.51, "total_tokens": 124935976} +{"current_steps": 185455, "total_steps": 204665, "loss": 0.0, "lr": 5.319992451893274e-08, "epoch": 4.530696504043193, "percentage": 90.61, "elapsed_time": "4:00:36", "remaining_time": "0:24:55", "throughput": 8654.52, "total_tokens": 124939240} +{"current_steps": 185460, "total_steps": 204665, "loss": 0.0, "lr": 5.3172483725238635e-08, "epoch": 4.5308186548750395, "percentage": 90.62, "elapsed_time": "4:00:36", "remaining_time": "0:24:54", "throughput": 8654.53, "total_tokens": 124942312} +{"current_steps": 185465, "total_steps": 204665, "loss": 0.0, "lr": 5.3145049817128975e-08, "epoch": 4.530940805706887, "percentage": 90.62, "elapsed_time": "4:00:37", "remaining_time": "0:24:54", "throughput": 8654.61, "total_tokens": 124946600} +{"current_steps": 185470, "total_steps": 204665, "loss": 0.0, "lr": 5.311762279480314e-08, "epoch": 4.531062956538734, "percentage": 90.62, "elapsed_time": "4:00:37", "remaining_time": "0:24:54", "throughput": 8654.63, "total_tokens": 124949928} +{"current_steps": 185475, "total_steps": 204665, "loss": 0.0, "lr": 5.309020265846076e-08, "epoch": 4.5311851073705816, "percentage": 90.62, "elapsed_time": "4:00:37", "remaining_time": "0:24:53", "throughput": 8654.64, "total_tokens": 124953128} +{"current_steps": 185480, "total_steps": 204665, "loss": 0.0, "lr": 5.306278940830089e-08, "epoch": 4.531307258202428, "percentage": 90.63, "elapsed_time": "4:00:38", "remaining_time": "0:24:53", "throughput": 8654.68, "total_tokens": 124956712} +{"current_steps": 185485, "total_steps": 204665, "loss": 0.0, "lr": 5.3035383044523266e-08, "epoch": 4.531429409034276, "percentage": 90.63, "elapsed_time": "4:00:38", "remaining_time": "0:24:52", "throughput": 8654.73, "total_tokens": 124960552} +{"current_steps": 185490, "total_steps": 204665, "loss": 0.0, "lr": 5.3007983567326943e-08, "epoch": 4.531551559866123, "percentage": 90.63, "elapsed_time": "4:00:38", "remaining_time": "0:24:52", "throughput": 8654.75, "total_tokens": 124963816} +{"current_steps": 185495, "total_steps": 204665, "loss": 0.0, "lr": 5.298059097691132e-08, "epoch": 4.531673710697969, "percentage": 90.63, "elapsed_time": "4:00:39", "remaining_time": "0:24:52", "throughput": 8654.77, "total_tokens": 124967080} +{"current_steps": 185500, "total_steps": 204665, "loss": 0.0, "lr": 5.295320527347558e-08, "epoch": 4.531795861529817, "percentage": 90.64, "elapsed_time": "4:00:39", "remaining_time": "0:24:51", "throughput": 8654.82, "total_tokens": 124970856} +{"current_steps": 185505, "total_steps": 204665, "loss": 0.0, "lr": 5.292582645721877e-08, "epoch": 4.531918012361664, "percentage": 90.64, "elapsed_time": "4:00:39", "remaining_time": "0:24:51", "throughput": 8654.89, "total_tokens": 124975208} +{"current_steps": 185510, "total_steps": 204665, "loss": 0.0, "lr": 5.2898454528340296e-08, "epoch": 4.532040163193511, "percentage": 90.64, "elapsed_time": "4:00:40", "remaining_time": "0:24:51", "throughput": 8655.03, "total_tokens": 124980648} +{"current_steps": 185515, "total_steps": 204665, "loss": 0.0254, "lr": 5.287108948703878e-08, "epoch": 4.532162314025358, "percentage": 90.64, "elapsed_time": "4:00:40", "remaining_time": "0:24:50", "throughput": 8655.07, "total_tokens": 124984168} +{"current_steps": 185520, "total_steps": 204665, "loss": 0.0, "lr": 5.284373133351361e-08, "epoch": 4.532284464857206, "percentage": 90.65, "elapsed_time": "4:00:40", "remaining_time": "0:24:50", "throughput": 8655.11, "total_tokens": 124987816} +{"current_steps": 185525, "total_steps": 204665, "loss": 0.0, "lr": 5.2816380067963406e-08, "epoch": 4.532406615689053, "percentage": 90.65, "elapsed_time": "4:00:41", "remaining_time": "0:24:49", "throughput": 8655.13, "total_tokens": 124991144} +{"current_steps": 185530, "total_steps": 204665, "loss": 0.0, "lr": 5.278903569058735e-08, "epoch": 4.5325287665209, "percentage": 90.65, "elapsed_time": "4:00:41", "remaining_time": "0:24:49", "throughput": 8655.15, "total_tokens": 124994344} +{"current_steps": 185535, "total_steps": 204665, "loss": 0.0, "lr": 5.276169820158427e-08, "epoch": 4.532650917352747, "percentage": 90.65, "elapsed_time": "4:00:41", "remaining_time": "0:24:49", "throughput": 8655.15, "total_tokens": 124997288} +{"current_steps": 185540, "total_steps": 204665, "loss": 0.0, "lr": 5.27343676011528e-08, "epoch": 4.532773068184595, "percentage": 90.66, "elapsed_time": "4:00:42", "remaining_time": "0:24:48", "throughput": 8655.2, "total_tokens": 125001192} +{"current_steps": 185545, "total_steps": 204665, "loss": 0.0, "lr": 5.270704388949188e-08, "epoch": 4.532895219016441, "percentage": 90.66, "elapsed_time": "4:00:42", "remaining_time": "0:24:48", "throughput": 8655.22, "total_tokens": 125004456} +{"current_steps": 185550, "total_steps": 204665, "loss": 0.0, "lr": 5.2679727066799905e-08, "epoch": 4.533017369848289, "percentage": 90.66, "elapsed_time": "4:00:43", "remaining_time": "0:24:47", "throughput": 8655.25, "total_tokens": 125008040} +{"current_steps": 185555, "total_steps": 204665, "loss": 0.0, "lr": 5.265241713327584e-08, "epoch": 4.533139520680136, "percentage": 90.66, "elapsed_time": "4:00:43", "remaining_time": "0:24:47", "throughput": 8655.29, "total_tokens": 125011496} +{"current_steps": 185560, "total_steps": 204665, "loss": 0.0, "lr": 5.262511408911841e-08, "epoch": 4.533261671511983, "percentage": 90.67, "elapsed_time": "4:00:43", "remaining_time": "0:24:47", "throughput": 8655.32, "total_tokens": 125015016} +{"current_steps": 185565, "total_steps": 204665, "loss": 0.0, "lr": 5.2597817934525776e-08, "epoch": 4.53338382234383, "percentage": 90.67, "elapsed_time": "4:00:44", "remaining_time": "0:24:46", "throughput": 8655.35, "total_tokens": 125018408} +{"current_steps": 185570, "total_steps": 204665, "loss": 0.0, "lr": 5.257052866969669e-08, "epoch": 4.533505973175677, "percentage": 90.67, "elapsed_time": "4:00:44", "remaining_time": "0:24:46", "throughput": 8655.38, "total_tokens": 125021992} +{"current_steps": 185575, "total_steps": 204665, "loss": 0.0, "lr": 5.2543246294829426e-08, "epoch": 4.5336281240075245, "percentage": 90.67, "elapsed_time": "4:00:44", "remaining_time": "0:24:45", "throughput": 8655.41, "total_tokens": 125025384} +{"current_steps": 185580, "total_steps": 204665, "loss": 0.0, "lr": 5.2515970810122715e-08, "epoch": 4.533750274839372, "percentage": 90.68, "elapsed_time": "4:00:45", "remaining_time": "0:24:45", "throughput": 8655.43, "total_tokens": 125028712} +{"current_steps": 185585, "total_steps": 204665, "loss": 0.0, "lr": 5.248870221577451e-08, "epoch": 4.533872425671219, "percentage": 90.68, "elapsed_time": "4:00:45", "remaining_time": "0:24:45", "throughput": 8655.44, "total_tokens": 125031848} +{"current_steps": 185590, "total_steps": 204665, "loss": 0.0, "lr": 5.2461440511983424e-08, "epoch": 4.533994576503066, "percentage": 90.68, "elapsed_time": "4:00:45", "remaining_time": "0:24:44", "throughput": 8655.45, "total_tokens": 125034984} +{"current_steps": 185595, "total_steps": 204665, "loss": 0.0, "lr": 5.243418569894764e-08, "epoch": 4.534116727334913, "percentage": 90.68, "elapsed_time": "4:00:46", "remaining_time": "0:24:44", "throughput": 8655.47, "total_tokens": 125038248} +{"current_steps": 185600, "total_steps": 204665, "loss": 0.0, "lr": 5.2406937776865225e-08, "epoch": 4.53423887816676, "percentage": 90.68, "elapsed_time": "4:00:46", "remaining_time": "0:24:43", "throughput": 8655.5, "total_tokens": 125041640} +{"current_steps": 185605, "total_steps": 204665, "loss": 0.0, "lr": 5.2379696745934455e-08, "epoch": 4.534361028998608, "percentage": 90.69, "elapsed_time": "4:00:46", "remaining_time": "0:24:43", "throughput": 8655.51, "total_tokens": 125044776} +{"current_steps": 185610, "total_steps": 204665, "loss": 0.0003, "lr": 5.23524626063534e-08, "epoch": 4.534483179830454, "percentage": 90.69, "elapsed_time": "4:00:47", "remaining_time": "0:24:43", "throughput": 8655.52, "total_tokens": 125047912} +{"current_steps": 185615, "total_steps": 204665, "loss": 0.0, "lr": 5.232523535832012e-08, "epoch": 4.534605330662302, "percentage": 90.69, "elapsed_time": "4:00:47", "remaining_time": "0:24:42", "throughput": 8655.56, "total_tokens": 125051496} +{"current_steps": 185620, "total_steps": 204665, "loss": 0.0, "lr": 5.229801500203268e-08, "epoch": 4.534727481494149, "percentage": 90.69, "elapsed_time": "4:00:47", "remaining_time": "0:24:42", "throughput": 8655.58, "total_tokens": 125054888} +{"current_steps": 185625, "total_steps": 204665, "loss": 0.0, "lr": 5.2270801537689035e-08, "epoch": 4.534849632325996, "percentage": 90.7, "elapsed_time": "4:00:48", "remaining_time": "0:24:41", "throughput": 8655.62, "total_tokens": 125058536} +{"current_steps": 185630, "total_steps": 204665, "loss": 0.0, "lr": 5.2243594965486916e-08, "epoch": 4.534971783157843, "percentage": 90.7, "elapsed_time": "4:00:48", "remaining_time": "0:24:41", "throughput": 8655.65, "total_tokens": 125061928} +{"current_steps": 185635, "total_steps": 204665, "loss": 0.0, "lr": 5.221639528562438e-08, "epoch": 4.535093933989691, "percentage": 90.7, "elapsed_time": "4:00:48", "remaining_time": "0:24:41", "throughput": 8655.69, "total_tokens": 125065704} +{"current_steps": 185640, "total_steps": 204665, "loss": 0.0, "lr": 5.218920249829906e-08, "epoch": 4.5352160848215375, "percentage": 90.7, "elapsed_time": "4:00:49", "remaining_time": "0:24:40", "throughput": 8655.71, "total_tokens": 125068968} +{"current_steps": 185645, "total_steps": 204665, "loss": 0.0, "lr": 5.216201660370878e-08, "epoch": 4.535338235653385, "percentage": 90.71, "elapsed_time": "4:00:49", "remaining_time": "0:24:40", "throughput": 8655.73, "total_tokens": 125072168} +{"current_steps": 185650, "total_steps": 204665, "loss": 0.0, "lr": 5.2134837602051174e-08, "epoch": 4.535460386485232, "percentage": 90.71, "elapsed_time": "4:00:50", "remaining_time": "0:24:40", "throughput": 8655.78, "total_tokens": 125076008} +{"current_steps": 185655, "total_steps": 204665, "loss": 0.0, "lr": 5.210766549352419e-08, "epoch": 4.5355825373170795, "percentage": 90.71, "elapsed_time": "4:00:50", "remaining_time": "0:24:39", "throughput": 8655.78, "total_tokens": 125078888} +{"current_steps": 185660, "total_steps": 204665, "loss": 0.0, "lr": 5.2080500278325e-08, "epoch": 4.535704688148926, "percentage": 90.71, "elapsed_time": "4:00:50", "remaining_time": "0:24:39", "throughput": 8655.8, "total_tokens": 125082152} +{"current_steps": 185665, "total_steps": 204665, "loss": 0.0, "lr": 5.2053341956651566e-08, "epoch": 4.535826838980773, "percentage": 90.72, "elapsed_time": "4:00:51", "remaining_time": "0:24:38", "throughput": 8655.84, "total_tokens": 125085864} +{"current_steps": 185670, "total_steps": 204665, "loss": 0.0, "lr": 5.202619052870105e-08, "epoch": 4.535948989812621, "percentage": 90.72, "elapsed_time": "4:00:51", "remaining_time": "0:24:38", "throughput": 8655.87, "total_tokens": 125089256} +{"current_steps": 185675, "total_steps": 204665, "loss": 0.0, "lr": 5.19990459946712e-08, "epoch": 4.536071140644468, "percentage": 90.72, "elapsed_time": "4:00:51", "remaining_time": "0:24:38", "throughput": 8655.91, "total_tokens": 125092968} +{"current_steps": 185680, "total_steps": 204665, "loss": 0.0, "lr": 5.1971908354759065e-08, "epoch": 4.536193291476315, "percentage": 90.72, "elapsed_time": "4:00:52", "remaining_time": "0:24:37", "throughput": 8655.93, "total_tokens": 125096232} +{"current_steps": 185685, "total_steps": 204665, "loss": 0.0, "lr": 5.194477760916227e-08, "epoch": 4.536315442308162, "percentage": 90.73, "elapsed_time": "4:00:52", "remaining_time": "0:24:37", "throughput": 8655.97, "total_tokens": 125099880} +{"current_steps": 185690, "total_steps": 204665, "loss": 0.0256, "lr": 5.1917653758078216e-08, "epoch": 4.536437593140009, "percentage": 90.73, "elapsed_time": "4:00:52", "remaining_time": "0:24:36", "throughput": 8655.99, "total_tokens": 125103080} +{"current_steps": 185695, "total_steps": 204665, "loss": 0.0, "lr": 5.189053680170374e-08, "epoch": 4.536559743971856, "percentage": 90.73, "elapsed_time": "4:00:53", "remaining_time": "0:24:36", "throughput": 8656.04, "total_tokens": 125106856} +{"current_steps": 185700, "total_steps": 204665, "loss": 0.0, "lr": 5.186342674023647e-08, "epoch": 4.536681894803704, "percentage": 90.73, "elapsed_time": "4:00:53", "remaining_time": "0:24:36", "throughput": 8656.06, "total_tokens": 125110248} +{"current_steps": 185705, "total_steps": 204665, "loss": 0.0, "lr": 5.1836323573873354e-08, "epoch": 4.5368040456355505, "percentage": 90.74, "elapsed_time": "4:00:53", "remaining_time": "0:24:35", "throughput": 8656.06, "total_tokens": 125113256} +{"current_steps": 185710, "total_steps": 204665, "loss": 0.0006, "lr": 5.180922730281134e-08, "epoch": 4.536926196467398, "percentage": 90.74, "elapsed_time": "4:00:54", "remaining_time": "0:24:35", "throughput": 8656.08, "total_tokens": 125116392} +{"current_steps": 185715, "total_steps": 204665, "loss": 0.0, "lr": 5.178213792724795e-08, "epoch": 4.537048347299245, "percentage": 90.74, "elapsed_time": "4:00:54", "remaining_time": "0:24:34", "throughput": 8656.09, "total_tokens": 125119592} +{"current_steps": 185720, "total_steps": 204665, "loss": 0.0, "lr": 5.175505544737968e-08, "epoch": 4.5371704981310925, "percentage": 90.74, "elapsed_time": "4:00:54", "remaining_time": "0:24:34", "throughput": 8656.11, "total_tokens": 125122856} +{"current_steps": 185725, "total_steps": 204665, "loss": 0.0, "lr": 5.1727979863403826e-08, "epoch": 4.537292648962939, "percentage": 90.75, "elapsed_time": "4:00:55", "remaining_time": "0:24:34", "throughput": 8656.14, "total_tokens": 125126312} +{"current_steps": 185730, "total_steps": 204665, "loss": 0.0474, "lr": 5.1700911175517114e-08, "epoch": 4.537414799794787, "percentage": 90.75, "elapsed_time": "4:00:55", "remaining_time": "0:24:33", "throughput": 8656.15, "total_tokens": 125129448} +{"current_steps": 185735, "total_steps": 204665, "loss": 0.0, "lr": 5.167384938391639e-08, "epoch": 4.537536950626634, "percentage": 90.75, "elapsed_time": "4:00:55", "remaining_time": "0:24:33", "throughput": 8656.19, "total_tokens": 125133096} +{"current_steps": 185740, "total_steps": 204665, "loss": 0.0, "lr": 5.1646794488798606e-08, "epoch": 4.537659101458481, "percentage": 90.75, "elapsed_time": "4:00:56", "remaining_time": "0:24:32", "throughput": 8656.21, "total_tokens": 125136424} +{"current_steps": 185745, "total_steps": 204665, "loss": 0.0, "lr": 5.161974649036027e-08, "epoch": 4.537781252290328, "percentage": 90.76, "elapsed_time": "4:00:56", "remaining_time": "0:24:32", "throughput": 8656.24, "total_tokens": 125139880} +{"current_steps": 185750, "total_steps": 204665, "loss": 0.0, "lr": 5.159270538879834e-08, "epoch": 4.537903403122176, "percentage": 90.76, "elapsed_time": "4:00:56", "remaining_time": "0:24:32", "throughput": 8656.27, "total_tokens": 125143400} +{"current_steps": 185755, "total_steps": 204665, "loss": 0.0, "lr": 5.156567118430921e-08, "epoch": 4.538025553954022, "percentage": 90.76, "elapsed_time": "4:00:57", "remaining_time": "0:24:31", "throughput": 8656.3, "total_tokens": 125146856} +{"current_steps": 185760, "total_steps": 204665, "loss": 0.0, "lr": 5.1538643877089724e-08, "epoch": 4.538147704785869, "percentage": 90.76, "elapsed_time": "4:00:57", "remaining_time": "0:24:31", "throughput": 8656.32, "total_tokens": 125150120} +{"current_steps": 185765, "total_steps": 204665, "loss": 0.0, "lr": 5.151162346733629e-08, "epoch": 4.538269855617717, "percentage": 90.77, "elapsed_time": "4:00:57", "remaining_time": "0:24:30", "throughput": 8656.32, "total_tokens": 125153000} +{"current_steps": 185770, "total_steps": 204665, "loss": 0.0, "lr": 5.1484609955245395e-08, "epoch": 4.538392006449564, "percentage": 90.77, "elapsed_time": "4:00:58", "remaining_time": "0:24:30", "throughput": 8656.35, "total_tokens": 125156584} +{"current_steps": 185775, "total_steps": 204665, "loss": 0.0, "lr": 5.145760334101368e-08, "epoch": 4.538514157281411, "percentage": 90.77, "elapsed_time": "4:00:58", "remaining_time": "0:24:30", "throughput": 8656.37, "total_tokens": 125159848} +{"current_steps": 185780, "total_steps": 204665, "loss": 0.0, "lr": 5.14306036248372e-08, "epoch": 4.538636308113258, "percentage": 90.77, "elapsed_time": "4:00:59", "remaining_time": "0:24:29", "throughput": 8656.4, "total_tokens": 125163240} +{"current_steps": 185785, "total_steps": 204665, "loss": 0.0001, "lr": 5.140361080691269e-08, "epoch": 4.5387584589451055, "percentage": 90.78, "elapsed_time": "4:00:59", "remaining_time": "0:24:29", "throughput": 8656.41, "total_tokens": 125166440} +{"current_steps": 185790, "total_steps": 204665, "loss": 0.0, "lr": 5.1376624887436105e-08, "epoch": 4.538880609776952, "percentage": 90.78, "elapsed_time": "4:00:59", "remaining_time": "0:24:29", "throughput": 8656.42, "total_tokens": 125169576} +{"current_steps": 185795, "total_steps": 204665, "loss": 0.0, "lr": 5.134964586660406e-08, "epoch": 4.5390027606088, "percentage": 90.78, "elapsed_time": "4:01:00", "remaining_time": "0:24:28", "throughput": 8656.45, "total_tokens": 125173032} +{"current_steps": 185800, "total_steps": 204665, "loss": 0.0, "lr": 5.13226737446123e-08, "epoch": 4.539124911440647, "percentage": 90.78, "elapsed_time": "4:01:00", "remaining_time": "0:24:28", "throughput": 8656.5, "total_tokens": 125176872} +{"current_steps": 185805, "total_steps": 204665, "loss": 0.0, "lr": 5.129570852165732e-08, "epoch": 4.539247062272494, "percentage": 90.78, "elapsed_time": "4:01:00", "remaining_time": "0:24:27", "throughput": 8656.51, "total_tokens": 125179880} +{"current_steps": 185810, "total_steps": 204665, "loss": 0.0, "lr": 5.1268750197935196e-08, "epoch": 4.539369213104341, "percentage": 90.79, "elapsed_time": "4:01:01", "remaining_time": "0:24:27", "throughput": 8656.57, "total_tokens": 125183848} +{"current_steps": 185815, "total_steps": 204665, "loss": 0.0, "lr": 5.124179877364176e-08, "epoch": 4.539491363936189, "percentage": 90.79, "elapsed_time": "4:01:01", "remaining_time": "0:24:27", "throughput": 8656.59, "total_tokens": 125187304} +{"current_steps": 185820, "total_steps": 204665, "loss": 0.0, "lr": 5.1214854248973316e-08, "epoch": 4.539613514768035, "percentage": 90.79, "elapsed_time": "4:01:01", "remaining_time": "0:24:26", "throughput": 8656.66, "total_tokens": 125191400} +{"current_steps": 185825, "total_steps": 204665, "loss": 0.0, "lr": 5.118791662412558e-08, "epoch": 4.539735665599883, "percentage": 90.79, "elapsed_time": "4:01:02", "remaining_time": "0:24:26", "throughput": 8656.72, "total_tokens": 125195432} +{"current_steps": 185830, "total_steps": 204665, "loss": 0.0, "lr": 5.116098589929452e-08, "epoch": 4.53985781643173, "percentage": 90.8, "elapsed_time": "4:01:02", "remaining_time": "0:24:25", "throughput": 8656.75, "total_tokens": 125198888} +{"current_steps": 185835, "total_steps": 204665, "loss": 0.0, "lr": 5.1134062074675966e-08, "epoch": 4.539979967263577, "percentage": 90.8, "elapsed_time": "4:01:02", "remaining_time": "0:24:25", "throughput": 8656.79, "total_tokens": 125202536} +{"current_steps": 185840, "total_steps": 204665, "loss": 0.0, "lr": 5.110714515046577e-08, "epoch": 4.540102118095424, "percentage": 90.8, "elapsed_time": "4:01:03", "remaining_time": "0:24:25", "throughput": 8656.8, "total_tokens": 125205672} +{"current_steps": 185845, "total_steps": 204665, "loss": 0.0, "lr": 5.108023512685966e-08, "epoch": 4.540224268927272, "percentage": 90.8, "elapsed_time": "4:01:03", "remaining_time": "0:24:24", "throughput": 8656.83, "total_tokens": 125209128} +{"current_steps": 185850, "total_steps": 204665, "loss": 0.0, "lr": 5.105333200405315e-08, "epoch": 4.540346419759119, "percentage": 90.81, "elapsed_time": "4:01:03", "remaining_time": "0:24:24", "throughput": 8656.84, "total_tokens": 125212200} +{"current_steps": 185855, "total_steps": 204665, "loss": 0.0, "lr": 5.102643578224219e-08, "epoch": 4.540468570590965, "percentage": 90.81, "elapsed_time": "4:01:04", "remaining_time": "0:24:23", "throughput": 8656.9, "total_tokens": 125216296} +{"current_steps": 185860, "total_steps": 204665, "loss": 0.0, "lr": 5.099954646162208e-08, "epoch": 4.540590721422813, "percentage": 90.81, "elapsed_time": "4:01:04", "remaining_time": "0:24:23", "throughput": 8656.91, "total_tokens": 125219368} +{"current_steps": 185865, "total_steps": 204665, "loss": 0.0, "lr": 5.0972664042388534e-08, "epoch": 4.54071287225466, "percentage": 90.81, "elapsed_time": "4:01:05", "remaining_time": "0:24:23", "throughput": 8656.93, "total_tokens": 125222696} +{"current_steps": 185870, "total_steps": 204665, "loss": 0.0, "lr": 5.0945788524737186e-08, "epoch": 4.540835023086507, "percentage": 90.82, "elapsed_time": "4:01:05", "remaining_time": "0:24:22", "throughput": 8656.95, "total_tokens": 125226024} +{"current_steps": 185875, "total_steps": 204665, "loss": 0.0, "lr": 5.0918919908863214e-08, "epoch": 4.540957173918354, "percentage": 90.82, "elapsed_time": "4:01:05", "remaining_time": "0:24:22", "throughput": 8656.81, "total_tokens": 125229224} +{"current_steps": 185880, "total_steps": 204665, "loss": 0.0, "lr": 5.089205819496223e-08, "epoch": 4.541079324750202, "percentage": 90.82, "elapsed_time": "4:01:06", "remaining_time": "0:24:21", "throughput": 8656.84, "total_tokens": 125232616} +{"current_steps": 185885, "total_steps": 204665, "loss": 0.0402, "lr": 5.0865203383229305e-08, "epoch": 4.5412014755820485, "percentage": 90.82, "elapsed_time": "4:01:06", "remaining_time": "0:24:21", "throughput": 8656.84, "total_tokens": 125235560} +{"current_steps": 185890, "total_steps": 204665, "loss": 0.0, "lr": 5.0838355473860174e-08, "epoch": 4.541323626413896, "percentage": 90.83, "elapsed_time": "4:01:07", "remaining_time": "0:24:21", "throughput": 8656.88, "total_tokens": 125239272} +{"current_steps": 185895, "total_steps": 204665, "loss": 0.0, "lr": 5.081151446704956e-08, "epoch": 4.541445777245743, "percentage": 90.83, "elapsed_time": "4:01:07", "remaining_time": "0:24:20", "throughput": 8656.91, "total_tokens": 125242600} +{"current_steps": 185900, "total_steps": 204665, "loss": 0.0, "lr": 5.0784680362992884e-08, "epoch": 4.5415679280775905, "percentage": 90.83, "elapsed_time": "4:01:07", "remaining_time": "0:24:20", "throughput": 8656.93, "total_tokens": 125246056} +{"current_steps": 185905, "total_steps": 204665, "loss": 0.0, "lr": 5.075785316188552e-08, "epoch": 4.541690078909437, "percentage": 90.83, "elapsed_time": "4:01:08", "remaining_time": "0:24:19", "throughput": 8656.96, "total_tokens": 125249448} +{"current_steps": 185910, "total_steps": 204665, "loss": 0.0, "lr": 5.073103286392222e-08, "epoch": 4.541812229741285, "percentage": 90.84, "elapsed_time": "4:01:08", "remaining_time": "0:24:19", "throughput": 8656.98, "total_tokens": 125252776} +{"current_steps": 185915, "total_steps": 204665, "loss": 0.0, "lr": 5.070421946929837e-08, "epoch": 4.541934380573132, "percentage": 90.84, "elapsed_time": "4:01:08", "remaining_time": "0:24:19", "throughput": 8656.99, "total_tokens": 125255976} +{"current_steps": 185920, "total_steps": 204665, "loss": 0.0, "lr": 5.06774129782086e-08, "epoch": 4.542056531404979, "percentage": 90.84, "elapsed_time": "4:01:09", "remaining_time": "0:24:18", "throughput": 8657.01, "total_tokens": 125259240} +{"current_steps": 185925, "total_steps": 204665, "loss": 0.0, "lr": 5.0650613390847975e-08, "epoch": 4.542178682236826, "percentage": 90.84, "elapsed_time": "4:01:09", "remaining_time": "0:24:18", "throughput": 8657.04, "total_tokens": 125262632} +{"current_steps": 185930, "total_steps": 204665, "loss": 0.0, "lr": 5.0623820707411556e-08, "epoch": 4.542300833068673, "percentage": 90.85, "elapsed_time": "4:01:09", "remaining_time": "0:24:18", "throughput": 8657.03, "total_tokens": 125265448} +{"current_steps": 185935, "total_steps": 204665, "loss": 0.0, "lr": 5.0597034928094084e-08, "epoch": 4.54242298390052, "percentage": 90.85, "elapsed_time": "4:01:10", "remaining_time": "0:24:17", "throughput": 8657.07, "total_tokens": 125268904} +{"current_steps": 185940, "total_steps": 204665, "loss": 0.0, "lr": 5.057025605309029e-08, "epoch": 4.542545134732368, "percentage": 90.85, "elapsed_time": "4:01:10", "remaining_time": "0:24:17", "throughput": 8657.1, "total_tokens": 125272424} +{"current_steps": 185945, "total_steps": 204665, "loss": 0.0, "lr": 5.054348408259501e-08, "epoch": 4.542667285564215, "percentage": 90.85, "elapsed_time": "4:01:10", "remaining_time": "0:24:16", "throughput": 8657.11, "total_tokens": 125275624} +{"current_steps": 185950, "total_steps": 204665, "loss": 0.0, "lr": 5.051671901680288e-08, "epoch": 4.5427894363960615, "percentage": 90.86, "elapsed_time": "4:01:11", "remaining_time": "0:24:16", "throughput": 8657.14, "total_tokens": 125279080} +{"current_steps": 185955, "total_steps": 204665, "loss": 0.0, "lr": 5.0489960855908395e-08, "epoch": 4.542911587227909, "percentage": 90.86, "elapsed_time": "4:01:11", "remaining_time": "0:24:16", "throughput": 8657.16, "total_tokens": 125282344} +{"current_steps": 185960, "total_steps": 204665, "loss": 0.0, "lr": 5.04632096001063e-08, "epoch": 4.543033738059756, "percentage": 90.86, "elapsed_time": "4:01:11", "remaining_time": "0:24:15", "throughput": 8657.18, "total_tokens": 125285608} +{"current_steps": 185965, "total_steps": 204665, "loss": 0.0, "lr": 5.043646524959133e-08, "epoch": 4.5431558888916035, "percentage": 90.86, "elapsed_time": "4:01:12", "remaining_time": "0:24:15", "throughput": 8657.24, "total_tokens": 125289640} +{"current_steps": 185970, "total_steps": 204665, "loss": 0.0, "lr": 5.0409727804557655e-08, "epoch": 4.54327803972345, "percentage": 90.87, "elapsed_time": "4:01:12", "remaining_time": "0:24:14", "throughput": 8657.26, "total_tokens": 125292968} +{"current_steps": 185975, "total_steps": 204665, "loss": 0.0, "lr": 5.03829972651999e-08, "epoch": 4.543400190555298, "percentage": 90.87, "elapsed_time": "4:01:12", "remaining_time": "0:24:14", "throughput": 8657.29, "total_tokens": 125296360} +{"current_steps": 185980, "total_steps": 204665, "loss": 0.0, "lr": 5.0356273631712357e-08, "epoch": 4.543522341387145, "percentage": 90.87, "elapsed_time": "4:01:13", "remaining_time": "0:24:14", "throughput": 8657.31, "total_tokens": 125299624} +{"current_steps": 185985, "total_steps": 204665, "loss": 0.0, "lr": 5.032955690428953e-08, "epoch": 4.543644492218992, "percentage": 90.87, "elapsed_time": "4:01:13", "remaining_time": "0:24:13", "throughput": 8657.33, "total_tokens": 125302888} +{"current_steps": 185990, "total_steps": 204665, "loss": 0.0, "lr": 5.030284708312549e-08, "epoch": 4.543766643050839, "percentage": 90.88, "elapsed_time": "4:01:13", "remaining_time": "0:24:13", "throughput": 8657.34, "total_tokens": 125306024} +{"current_steps": 185995, "total_steps": 204665, "loss": 0.0, "lr": 5.027614416841453e-08, "epoch": 4.543888793882687, "percentage": 90.88, "elapsed_time": "4:01:14", "remaining_time": "0:24:12", "throughput": 8657.36, "total_tokens": 125309352} +{"current_steps": 186000, "total_steps": 204665, "loss": 0.0, "lr": 5.024944816035104e-08, "epoch": 4.544010944714533, "percentage": 90.88, "elapsed_time": "4:01:14", "remaining_time": "0:24:12", "throughput": 8657.38, "total_tokens": 125312680} +{"current_steps": 186005, "total_steps": 204665, "loss": 0.0, "lr": 5.0222759059128874e-08, "epoch": 4.544133095546381, "percentage": 90.88, "elapsed_time": "4:01:15", "remaining_time": "0:24:12", "throughput": 8657.39, "total_tokens": 125315752} +{"current_steps": 186010, "total_steps": 204665, "loss": 0.0, "lr": 5.0196076864942426e-08, "epoch": 4.544255246378228, "percentage": 90.89, "elapsed_time": "4:01:15", "remaining_time": "0:24:11", "throughput": 8657.4, "total_tokens": 125318760} +{"current_steps": 186015, "total_steps": 204665, "loss": 0.0, "lr": 5.0169401577985435e-08, "epoch": 4.544377397210075, "percentage": 90.89, "elapsed_time": "4:01:15", "remaining_time": "0:24:11", "throughput": 8657.41, "total_tokens": 125321832} +{"current_steps": 186020, "total_steps": 204665, "loss": 0.0, "lr": 5.014273319845197e-08, "epoch": 4.544499548041922, "percentage": 90.89, "elapsed_time": "4:01:16", "remaining_time": "0:24:10", "throughput": 8657.42, "total_tokens": 125324968} +{"current_steps": 186025, "total_steps": 204665, "loss": 0.0, "lr": 5.01160717265362e-08, "epoch": 4.544621698873769, "percentage": 90.89, "elapsed_time": "4:01:16", "remaining_time": "0:24:10", "throughput": 8657.45, "total_tokens": 125328488} +{"current_steps": 186030, "total_steps": 204665, "loss": 0.0, "lr": 5.008941716243176e-08, "epoch": 4.5447438497056165, "percentage": 90.89, "elapsed_time": "4:01:16", "remaining_time": "0:24:10", "throughput": 8657.51, "total_tokens": 125332520} +{"current_steps": 186035, "total_steps": 204665, "loss": 0.0, "lr": 5.0062769506332704e-08, "epoch": 4.544866000537464, "percentage": 90.9, "elapsed_time": "4:01:17", "remaining_time": "0:24:09", "throughput": 8657.52, "total_tokens": 125335656} +{"current_steps": 186040, "total_steps": 204665, "loss": 0.0, "lr": 5.003612875843266e-08, "epoch": 4.544988151369311, "percentage": 90.9, "elapsed_time": "4:01:17", "remaining_time": "0:24:09", "throughput": 8657.54, "total_tokens": 125338920} +{"current_steps": 186045, "total_steps": 204665, "loss": 0.0, "lr": 5.000949491892525e-08, "epoch": 4.545110302201158, "percentage": 90.9, "elapsed_time": "4:01:17", "remaining_time": "0:24:08", "throughput": 8657.56, "total_tokens": 125342120} +{"current_steps": 186050, "total_steps": 204665, "loss": 0.0, "lr": 4.998286798800444e-08, "epoch": 4.545232453033005, "percentage": 90.9, "elapsed_time": "4:01:18", "remaining_time": "0:24:08", "throughput": 8657.59, "total_tokens": 125345576} +{"current_steps": 186055, "total_steps": 204665, "loss": 0.0, "lr": 4.995624796586362e-08, "epoch": 4.545354603864852, "percentage": 90.91, "elapsed_time": "4:01:18", "remaining_time": "0:24:08", "throughput": 8657.62, "total_tokens": 125349096} +{"current_steps": 186060, "total_steps": 204665, "loss": 0.0, "lr": 4.992963485269663e-08, "epoch": 4.5454767546967, "percentage": 90.91, "elapsed_time": "4:01:18", "remaining_time": "0:24:07", "throughput": 8657.62, "total_tokens": 125352104} +{"current_steps": 186065, "total_steps": 204665, "loss": 0.0, "lr": 4.990302864869678e-08, "epoch": 4.545598905528546, "percentage": 90.91, "elapsed_time": "4:01:19", "remaining_time": "0:24:07", "throughput": 8657.67, "total_tokens": 125355752} +{"current_steps": 186070, "total_steps": 204665, "loss": 0.0, "lr": 4.987642935405767e-08, "epoch": 4.545721056360394, "percentage": 90.91, "elapsed_time": "4:01:19", "remaining_time": "0:24:07", "throughput": 8657.69, "total_tokens": 125359144} +{"current_steps": 186075, "total_steps": 204665, "loss": 0.0, "lr": 4.984983696897271e-08, "epoch": 4.545843207192241, "percentage": 90.92, "elapsed_time": "4:01:19", "remaining_time": "0:24:06", "throughput": 8657.73, "total_tokens": 125362728} +{"current_steps": 186080, "total_steps": 204665, "loss": 0.0, "lr": 4.98232514936352e-08, "epoch": 4.545965358024088, "percentage": 90.92, "elapsed_time": "4:01:20", "remaining_time": "0:24:06", "throughput": 8657.74, "total_tokens": 125365992} +{"current_steps": 186085, "total_steps": 204665, "loss": 0.0, "lr": 4.979667292823875e-08, "epoch": 4.546087508855935, "percentage": 90.92, "elapsed_time": "4:01:20", "remaining_time": "0:24:05", "throughput": 8657.81, "total_tokens": 125370088} +{"current_steps": 186090, "total_steps": 204665, "loss": 0.0, "lr": 4.9770101272976316e-08, "epoch": 4.546209659687783, "percentage": 90.92, "elapsed_time": "4:01:20", "remaining_time": "0:24:05", "throughput": 8657.84, "total_tokens": 125373544} +{"current_steps": 186095, "total_steps": 204665, "loss": 0.0, "lr": 4.974353652804142e-08, "epoch": 4.5463318105196295, "percentage": 90.93, "elapsed_time": "4:01:21", "remaining_time": "0:24:05", "throughput": 8657.86, "total_tokens": 125376872} +{"current_steps": 186100, "total_steps": 204665, "loss": 0.0, "lr": 4.971697869362701e-08, "epoch": 4.546453961351477, "percentage": 90.93, "elapsed_time": "4:01:21", "remaining_time": "0:24:04", "throughput": 8657.88, "total_tokens": 125380264} +{"current_steps": 186105, "total_steps": 204665, "loss": 0.0, "lr": 4.969042776992649e-08, "epoch": 4.546576112183324, "percentage": 90.93, "elapsed_time": "4:01:21", "remaining_time": "0:24:04", "throughput": 8657.9, "total_tokens": 125383400} +{"current_steps": 186110, "total_steps": 204665, "loss": 0.0512, "lr": 4.9663883757132596e-08, "epoch": 4.5466982630151715, "percentage": 90.93, "elapsed_time": "4:01:22", "remaining_time": "0:24:03", "throughput": 8657.92, "total_tokens": 125386728} +{"current_steps": 186115, "total_steps": 204665, "loss": 0.0, "lr": 4.96373466554385e-08, "epoch": 4.546820413847018, "percentage": 90.94, "elapsed_time": "4:01:22", "remaining_time": "0:24:03", "throughput": 8657.96, "total_tokens": 125390440} +{"current_steps": 186120, "total_steps": 204665, "loss": 0.0, "lr": 4.961081646503751e-08, "epoch": 4.546942564678865, "percentage": 90.94, "elapsed_time": "4:01:23", "remaining_time": "0:24:03", "throughput": 8657.99, "total_tokens": 125393960} +{"current_steps": 186125, "total_steps": 204665, "loss": 0.0, "lr": 4.9584293186122004e-08, "epoch": 4.547064715510713, "percentage": 90.94, "elapsed_time": "4:01:23", "remaining_time": "0:24:02", "throughput": 8658.04, "total_tokens": 125397800} +{"current_steps": 186130, "total_steps": 204665, "loss": 0.0, "lr": 4.95577768188854e-08, "epoch": 4.547186866342559, "percentage": 90.94, "elapsed_time": "4:01:23", "remaining_time": "0:24:02", "throughput": 8658.09, "total_tokens": 125401512} +{"current_steps": 186135, "total_steps": 204665, "loss": 0.0, "lr": 4.953126736352009e-08, "epoch": 4.547309017174407, "percentage": 90.95, "elapsed_time": "4:01:24", "remaining_time": "0:24:01", "throughput": 8658.12, "total_tokens": 125404968} +{"current_steps": 186140, "total_steps": 204665, "loss": 0.0, "lr": 4.950476482021915e-08, "epoch": 4.547431168006254, "percentage": 90.95, "elapsed_time": "4:01:24", "remaining_time": "0:24:01", "throughput": 8658.14, "total_tokens": 125408296} +{"current_steps": 186145, "total_steps": 204665, "loss": 0.0, "lr": 4.947826918917519e-08, "epoch": 4.547553318838101, "percentage": 90.95, "elapsed_time": "4:01:24", "remaining_time": "0:24:01", "throughput": 8658.14, "total_tokens": 125411176} +{"current_steps": 186150, "total_steps": 204665, "loss": 0.0, "lr": 4.945178047058096e-08, "epoch": 4.547675469669948, "percentage": 90.95, "elapsed_time": "4:01:25", "remaining_time": "0:24:00", "throughput": 8658.17, "total_tokens": 125414760} +{"current_steps": 186155, "total_steps": 204665, "loss": 0.0, "lr": 4.942529866462908e-08, "epoch": 4.547797620501796, "percentage": 90.96, "elapsed_time": "4:01:25", "remaining_time": "0:24:00", "throughput": 8658.2, "total_tokens": 125418152} +{"current_steps": 186160, "total_steps": 204665, "loss": 0.0, "lr": 4.9398823771511944e-08, "epoch": 4.547919771333643, "percentage": 90.96, "elapsed_time": "4:01:25", "remaining_time": "0:23:59", "throughput": 8658.21, "total_tokens": 125421224} +{"current_steps": 186165, "total_steps": 204665, "loss": 0.0, "lr": 4.9372355791422406e-08, "epoch": 4.54804192216549, "percentage": 90.96, "elapsed_time": "4:01:26", "remaining_time": "0:23:59", "throughput": 8658.24, "total_tokens": 125424808} +{"current_steps": 186170, "total_steps": 204665, "loss": 0.0, "lr": 4.934589472455264e-08, "epoch": 4.548164072997337, "percentage": 90.96, "elapsed_time": "4:01:26", "remaining_time": "0:23:59", "throughput": 8658.28, "total_tokens": 125428392} +{"current_steps": 186175, "total_steps": 204665, "loss": 0.0, "lr": 4.9319440571095164e-08, "epoch": 4.548286223829185, "percentage": 90.97, "elapsed_time": "4:01:26", "remaining_time": "0:23:58", "throughput": 8658.3, "total_tokens": 125431784} +{"current_steps": 186180, "total_steps": 204665, "loss": 0.0, "lr": 4.9292993331242595e-08, "epoch": 4.548408374661031, "percentage": 90.97, "elapsed_time": "4:01:27", "remaining_time": "0:23:58", "throughput": 8658.32, "total_tokens": 125435048} +{"current_steps": 186185, "total_steps": 204665, "loss": 0.0, "lr": 4.9266553005187005e-08, "epoch": 4.548530525492879, "percentage": 90.97, "elapsed_time": "4:01:27", "remaining_time": "0:23:57", "throughput": 8658.35, "total_tokens": 125438504} +{"current_steps": 186190, "total_steps": 204665, "loss": 0.0, "lr": 4.924011959312091e-08, "epoch": 4.548652676324726, "percentage": 90.97, "elapsed_time": "4:01:27", "remaining_time": "0:23:57", "throughput": 8658.37, "total_tokens": 125441832} +{"current_steps": 186195, "total_steps": 204665, "loss": 0.0, "lr": 4.9213693095236154e-08, "epoch": 4.548774827156572, "percentage": 90.98, "elapsed_time": "4:01:28", "remaining_time": "0:23:57", "throughput": 8658.4, "total_tokens": 125445288} +{"current_steps": 186200, "total_steps": 204665, "loss": 0.0359, "lr": 4.918727351172536e-08, "epoch": 4.54889697798842, "percentage": 90.98, "elapsed_time": "4:01:28", "remaining_time": "0:23:56", "throughput": 8658.4, "total_tokens": 125448168} +{"current_steps": 186205, "total_steps": 204665, "loss": 0.0, "lr": 4.916086084278026e-08, "epoch": 4.549019128820268, "percentage": 90.98, "elapsed_time": "4:01:28", "remaining_time": "0:23:56", "throughput": 8658.44, "total_tokens": 125451816} +{"current_steps": 186210, "total_steps": 204665, "loss": 0.0, "lr": 4.913445508859315e-08, "epoch": 4.5491412796521145, "percentage": 90.98, "elapsed_time": "4:01:29", "remaining_time": "0:23:56", "throughput": 8658.46, "total_tokens": 125455208} +{"current_steps": 186215, "total_steps": 204665, "loss": 0.0, "lr": 4.91080562493561e-08, "epoch": 4.549263430483961, "percentage": 90.99, "elapsed_time": "4:01:29", "remaining_time": "0:23:55", "throughput": 8658.49, "total_tokens": 125458664} +{"current_steps": 186220, "total_steps": 204665, "loss": 0.0, "lr": 4.908166432526106e-08, "epoch": 4.549385581315809, "percentage": 90.99, "elapsed_time": "4:01:30", "remaining_time": "0:23:55", "throughput": 8658.5, "total_tokens": 125461672} +{"current_steps": 186225, "total_steps": 204665, "loss": 0.0, "lr": 4.905527931649989e-08, "epoch": 4.549507732147656, "percentage": 90.99, "elapsed_time": "4:01:30", "remaining_time": "0:23:54", "throughput": 8658.53, "total_tokens": 125465256} +{"current_steps": 186230, "total_steps": 204665, "loss": 0.0, "lr": 4.902890122326442e-08, "epoch": 4.549629882979503, "percentage": 90.99, "elapsed_time": "4:01:30", "remaining_time": "0:23:54", "throughput": 8658.59, "total_tokens": 125469224} +{"current_steps": 186235, "total_steps": 204665, "loss": 0.0, "lr": 4.900253004574673e-08, "epoch": 4.54975203381135, "percentage": 91.0, "elapsed_time": "4:01:31", "remaining_time": "0:23:54", "throughput": 8658.63, "total_tokens": 125472808} +{"current_steps": 186240, "total_steps": 204665, "loss": 0.0, "lr": 4.8976165784138327e-08, "epoch": 4.549874184643198, "percentage": 91.0, "elapsed_time": "4:01:31", "remaining_time": "0:23:53", "throughput": 8658.64, "total_tokens": 125475944} +{"current_steps": 186245, "total_steps": 204665, "loss": 0.0, "lr": 4.894980843863106e-08, "epoch": 4.549996335475044, "percentage": 91.0, "elapsed_time": "4:01:31", "remaining_time": "0:23:53", "throughput": 8658.66, "total_tokens": 125479336} +{"current_steps": 186250, "total_steps": 204665, "loss": 0.0, "lr": 4.892345800941655e-08, "epoch": 4.550118486306892, "percentage": 91.0, "elapsed_time": "4:01:32", "remaining_time": "0:23:52", "throughput": 8658.68, "total_tokens": 125482536} +{"current_steps": 186255, "total_steps": 204665, "loss": 0.0, "lr": 4.889711449668654e-08, "epoch": 4.550240637138739, "percentage": 91.0, "elapsed_time": "4:01:32", "remaining_time": "0:23:52", "throughput": 8658.68, "total_tokens": 125485608} +{"current_steps": 186260, "total_steps": 204665, "loss": 0.0, "lr": 4.8870777900632543e-08, "epoch": 4.550362787970586, "percentage": 91.01, "elapsed_time": "4:01:32", "remaining_time": "0:23:52", "throughput": 8658.7, "total_tokens": 125488808} +{"current_steps": 186265, "total_steps": 204665, "loss": 0.0, "lr": 4.884444822144595e-08, "epoch": 4.550484938802433, "percentage": 91.01, "elapsed_time": "4:01:33", "remaining_time": "0:23:51", "throughput": 8658.7, "total_tokens": 125491880} +{"current_steps": 186270, "total_steps": 204665, "loss": 0.0, "lr": 4.88181254593184e-08, "epoch": 4.550607089634281, "percentage": 91.01, "elapsed_time": "4:01:33", "remaining_time": "0:23:51", "throughput": 8658.7, "total_tokens": 125494696} +{"current_steps": 186275, "total_steps": 204665, "loss": 0.0001, "lr": 4.8791809614441405e-08, "epoch": 4.5507292404661275, "percentage": 91.01, "elapsed_time": "4:01:33", "remaining_time": "0:23:50", "throughput": 8658.75, "total_tokens": 125498600} +{"current_steps": 186280, "total_steps": 204665, "loss": 0.0, "lr": 4.8765500687006024e-08, "epoch": 4.550851391297975, "percentage": 91.02, "elapsed_time": "4:01:34", "remaining_time": "0:23:50", "throughput": 8658.77, "total_tokens": 125501800} +{"current_steps": 186285, "total_steps": 204665, "loss": 0.0, "lr": 4.873919867720389e-08, "epoch": 4.550973542129822, "percentage": 91.02, "elapsed_time": "4:01:34", "remaining_time": "0:23:50", "throughput": 8658.82, "total_tokens": 125505576} +{"current_steps": 186290, "total_steps": 204665, "loss": 0.0, "lr": 4.871290358522606e-08, "epoch": 4.551095692961669, "percentage": 91.02, "elapsed_time": "4:01:34", "remaining_time": "0:23:49", "throughput": 8658.87, "total_tokens": 125509480} +{"current_steps": 186295, "total_steps": 204665, "loss": 0.0, "lr": 4.868661541126407e-08, "epoch": 4.551217843793516, "percentage": 91.02, "elapsed_time": "4:01:35", "remaining_time": "0:23:49", "throughput": 8658.89, "total_tokens": 125512808} +{"current_steps": 186300, "total_steps": 204665, "loss": 0.0, "lr": 4.866033415550863e-08, "epoch": 4.551339994625364, "percentage": 91.03, "elapsed_time": "4:01:35", "remaining_time": "0:23:48", "throughput": 8658.9, "total_tokens": 125515944} +{"current_steps": 186305, "total_steps": 204665, "loss": 0.0, "lr": 4.863405981815116e-08, "epoch": 4.551462145457211, "percentage": 91.03, "elapsed_time": "4:01:35", "remaining_time": "0:23:48", "throughput": 8658.96, "total_tokens": 125519848} +{"current_steps": 186310, "total_steps": 204665, "loss": 0.0, "lr": 4.860779239938284e-08, "epoch": 4.551584296289057, "percentage": 91.03, "elapsed_time": "4:01:36", "remaining_time": "0:23:48", "throughput": 8658.99, "total_tokens": 125523432} +{"current_steps": 186315, "total_steps": 204665, "loss": 0.0, "lr": 4.8581531899394404e-08, "epoch": 4.551706447120905, "percentage": 91.03, "elapsed_time": "4:01:36", "remaining_time": "0:23:47", "throughput": 8659.05, "total_tokens": 125527336} +{"current_steps": 186320, "total_steps": 204665, "loss": 0.0, "lr": 4.8555278318377136e-08, "epoch": 4.551828597952752, "percentage": 91.04, "elapsed_time": "4:01:37", "remaining_time": "0:23:47", "throughput": 8659.09, "total_tokens": 125531048} +{"current_steps": 186325, "total_steps": 204665, "loss": 0.0, "lr": 4.852903165652167e-08, "epoch": 4.551950748784599, "percentage": 91.04, "elapsed_time": "4:01:37", "remaining_time": "0:23:46", "throughput": 8659.12, "total_tokens": 125534440} +{"current_steps": 186330, "total_steps": 204665, "loss": 0.0, "lr": 4.850279191401896e-08, "epoch": 4.552072899616446, "percentage": 91.04, "elapsed_time": "4:01:37", "remaining_time": "0:23:46", "throughput": 8659.15, "total_tokens": 125538024} +{"current_steps": 186335, "total_steps": 204665, "loss": 0.0, "lr": 4.8476559091059966e-08, "epoch": 4.552195050448294, "percentage": 91.04, "elapsed_time": "4:01:38", "remaining_time": "0:23:46", "throughput": 8659.17, "total_tokens": 125541352} +{"current_steps": 186340, "total_steps": 204665, "loss": 0.0, "lr": 4.845033318783531e-08, "epoch": 4.5523172012801405, "percentage": 91.05, "elapsed_time": "4:01:38", "remaining_time": "0:23:45", "throughput": 8659.22, "total_tokens": 125545000} +{"current_steps": 186345, "total_steps": 204665, "loss": 0.0, "lr": 4.8424114204535846e-08, "epoch": 4.552439352111988, "percentage": 91.05, "elapsed_time": "4:01:38", "remaining_time": "0:23:45", "throughput": 8659.23, "total_tokens": 125548136} +{"current_steps": 186350, "total_steps": 204665, "loss": 0.0, "lr": 4.83979021413522e-08, "epoch": 4.552561502943835, "percentage": 91.05, "elapsed_time": "4:01:39", "remaining_time": "0:23:45", "throughput": 8659.25, "total_tokens": 125551528} +{"current_steps": 186355, "total_steps": 204665, "loss": 0.0, "lr": 4.837169699847476e-08, "epoch": 4.5526836537756825, "percentage": 91.05, "elapsed_time": "4:01:39", "remaining_time": "0:23:44", "throughput": 8659.26, "total_tokens": 125554600} +{"current_steps": 186360, "total_steps": 204665, "loss": 0.0, "lr": 4.834549877609451e-08, "epoch": 4.552805804607529, "percentage": 91.06, "elapsed_time": "4:01:39", "remaining_time": "0:23:44", "throughput": 8659.29, "total_tokens": 125558056} +{"current_steps": 186365, "total_steps": 204665, "loss": 0.0, "lr": 4.831930747440161e-08, "epoch": 4.552927955439377, "percentage": 91.06, "elapsed_time": "4:01:40", "remaining_time": "0:23:43", "throughput": 8659.31, "total_tokens": 125561320} +{"current_steps": 186370, "total_steps": 204665, "loss": 0.0, "lr": 4.8293123093586795e-08, "epoch": 4.553050106271224, "percentage": 91.06, "elapsed_time": "4:01:40", "remaining_time": "0:23:43", "throughput": 8659.34, "total_tokens": 125564840} +{"current_steps": 186375, "total_steps": 204665, "loss": 0.0, "lr": 4.8266945633840264e-08, "epoch": 4.553172257103071, "percentage": 91.06, "elapsed_time": "4:01:40", "remaining_time": "0:23:43", "throughput": 8659.35, "total_tokens": 125568040} +{"current_steps": 186380, "total_steps": 204665, "loss": 0.0, "lr": 4.8240775095352517e-08, "epoch": 4.553294407934918, "percentage": 91.07, "elapsed_time": "4:01:41", "remaining_time": "0:23:42", "throughput": 8659.35, "total_tokens": 125570984} +{"current_steps": 186385, "total_steps": 204665, "loss": 0.0, "lr": 4.821461147831385e-08, "epoch": 4.553416558766765, "percentage": 91.07, "elapsed_time": "4:01:41", "remaining_time": "0:23:42", "throughput": 8659.39, "total_tokens": 125574440} +{"current_steps": 186390, "total_steps": 204665, "loss": 0.0, "lr": 4.818845478291456e-08, "epoch": 4.553538709598612, "percentage": 91.07, "elapsed_time": "4:01:41", "remaining_time": "0:23:41", "throughput": 8659.43, "total_tokens": 125578152} +{"current_steps": 186395, "total_steps": 204665, "loss": 0.0, "lr": 4.8162305009344705e-08, "epoch": 4.553660860430459, "percentage": 91.07, "elapsed_time": "4:01:42", "remaining_time": "0:23:41", "throughput": 8659.43, "total_tokens": 125581160} +{"current_steps": 186400, "total_steps": 204665, "loss": 0.0489, "lr": 4.81361621577947e-08, "epoch": 4.553783011262307, "percentage": 91.08, "elapsed_time": "4:01:42", "remaining_time": "0:23:41", "throughput": 8659.48, "total_tokens": 125584936} +{"current_steps": 186405, "total_steps": 204665, "loss": 0.0, "lr": 4.81100262284545e-08, "epoch": 4.5539051620941535, "percentage": 91.08, "elapsed_time": "4:01:42", "remaining_time": "0:23:40", "throughput": 8659.52, "total_tokens": 125588520} +{"current_steps": 186410, "total_steps": 204665, "loss": 0.0, "lr": 4.808389722151418e-08, "epoch": 4.554027312926001, "percentage": 91.08, "elapsed_time": "4:01:43", "remaining_time": "0:23:40", "throughput": 8659.56, "total_tokens": 125592296} +{"current_steps": 186415, "total_steps": 204665, "loss": 0.0, "lr": 4.8057775137163913e-08, "epoch": 4.554149463757848, "percentage": 91.08, "elapsed_time": "4:01:43", "remaining_time": "0:23:39", "throughput": 8659.58, "total_tokens": 125595624} +{"current_steps": 186420, "total_steps": 204665, "loss": 0.0, "lr": 4.803165997559344e-08, "epoch": 4.5542716145896955, "percentage": 91.09, "elapsed_time": "4:01:44", "remaining_time": "0:23:39", "throughput": 8659.61, "total_tokens": 125598952} +{"current_steps": 186425, "total_steps": 204665, "loss": 0.0, "lr": 4.800555173699283e-08, "epoch": 4.554393765421542, "percentage": 91.09, "elapsed_time": "4:01:44", "remaining_time": "0:23:39", "throughput": 8659.61, "total_tokens": 125601896} +{"current_steps": 186430, "total_steps": 204665, "loss": 0.0, "lr": 4.797945042155194e-08, "epoch": 4.55451591625339, "percentage": 91.09, "elapsed_time": "4:01:44", "remaining_time": "0:23:38", "throughput": 8659.63, "total_tokens": 125605224} +{"current_steps": 186435, "total_steps": 204665, "loss": 0.0, "lr": 4.795335602946049e-08, "epoch": 4.554638067085237, "percentage": 91.09, "elapsed_time": "4:01:45", "remaining_time": "0:23:38", "throughput": 8659.65, "total_tokens": 125608552} +{"current_steps": 186440, "total_steps": 204665, "loss": 0.0, "lr": 4.7927268560908343e-08, "epoch": 4.554760217917084, "percentage": 91.1, "elapsed_time": "4:01:45", "remaining_time": "0:23:37", "throughput": 8659.69, "total_tokens": 125612072} +{"current_steps": 186445, "total_steps": 204665, "loss": 0.0, "lr": 4.7901188016085116e-08, "epoch": 4.554882368748931, "percentage": 91.1, "elapsed_time": "4:01:45", "remaining_time": "0:23:37", "throughput": 8659.71, "total_tokens": 125615464} +{"current_steps": 186450, "total_steps": 204665, "loss": 0.0, "lr": 4.787511439518066e-08, "epoch": 4.555004519580779, "percentage": 91.1, "elapsed_time": "4:01:46", "remaining_time": "0:23:37", "throughput": 8659.73, "total_tokens": 125618792} +{"current_steps": 186455, "total_steps": 204665, "loss": 0.0, "lr": 4.784904769838427e-08, "epoch": 4.555126670412625, "percentage": 91.1, "elapsed_time": "4:01:46", "remaining_time": "0:23:36", "throughput": 8659.76, "total_tokens": 125622248} +{"current_steps": 186460, "total_steps": 204665, "loss": 0.0686, "lr": 4.782298792588591e-08, "epoch": 4.555248821244473, "percentage": 91.1, "elapsed_time": "4:01:46", "remaining_time": "0:23:36", "throughput": 8659.8, "total_tokens": 125625896} +{"current_steps": 186465, "total_steps": 204665, "loss": 0.0, "lr": 4.7796935077874856e-08, "epoch": 4.55537097207632, "percentage": 91.11, "elapsed_time": "4:01:47", "remaining_time": "0:23:35", "throughput": 8659.83, "total_tokens": 125629288} +{"current_steps": 186470, "total_steps": 204665, "loss": 0.0, "lr": 4.7770889154540525e-08, "epoch": 4.555493122908167, "percentage": 91.11, "elapsed_time": "4:01:47", "remaining_time": "0:23:35", "throughput": 8659.84, "total_tokens": 125632488} +{"current_steps": 186475, "total_steps": 204665, "loss": 0.0, "lr": 4.774485015607244e-08, "epoch": 4.555615273740014, "percentage": 91.11, "elapsed_time": "4:01:47", "remaining_time": "0:23:35", "throughput": 8659.85, "total_tokens": 125635624} +{"current_steps": 186480, "total_steps": 204665, "loss": 0.0, "lr": 4.7718818082659874e-08, "epoch": 4.555737424571861, "percentage": 91.11, "elapsed_time": "4:01:48", "remaining_time": "0:23:34", "throughput": 8659.86, "total_tokens": 125638696} +{"current_steps": 186485, "total_steps": 204665, "loss": 0.0, "lr": 4.769279293449213e-08, "epoch": 4.555859575403709, "percentage": 91.12, "elapsed_time": "4:01:48", "remaining_time": "0:23:34", "throughput": 8659.91, "total_tokens": 125642536} +{"current_steps": 186490, "total_steps": 204665, "loss": 0.0, "lr": 4.766677471175873e-08, "epoch": 4.555981726235555, "percentage": 91.12, "elapsed_time": "4:01:48", "remaining_time": "0:23:34", "throughput": 8659.92, "total_tokens": 125645608} +{"current_steps": 186495, "total_steps": 204665, "loss": 0.0, "lr": 4.7640763414648624e-08, "epoch": 4.556103877067403, "percentage": 91.12, "elapsed_time": "4:01:49", "remaining_time": "0:23:33", "throughput": 8659.94, "total_tokens": 125649000} +{"current_steps": 186500, "total_steps": 204665, "loss": 0.0, "lr": 4.761475904335099e-08, "epoch": 4.55622602789925, "percentage": 91.12, "elapsed_time": "4:01:49", "remaining_time": "0:23:33", "throughput": 8659.97, "total_tokens": 125652392} +{"current_steps": 186505, "total_steps": 204665, "loss": 0.0, "lr": 4.758876159805503e-08, "epoch": 4.556348178731097, "percentage": 91.13, "elapsed_time": "4:01:49", "remaining_time": "0:23:32", "throughput": 8659.99, "total_tokens": 125655720} +{"current_steps": 186510, "total_steps": 204665, "loss": 0.0, "lr": 4.7562771078949794e-08, "epoch": 4.556470329562944, "percentage": 91.13, "elapsed_time": "4:01:50", "remaining_time": "0:23:32", "throughput": 8660.01, "total_tokens": 125659048} +{"current_steps": 186515, "total_steps": 204665, "loss": 0.0, "lr": 4.753678748622414e-08, "epoch": 4.556592480394792, "percentage": 91.13, "elapsed_time": "4:01:50", "remaining_time": "0:23:32", "throughput": 8660.06, "total_tokens": 125662824} +{"current_steps": 186520, "total_steps": 204665, "loss": 0.0, "lr": 4.751081082006714e-08, "epoch": 4.5567146312266384, "percentage": 91.13, "elapsed_time": "4:01:50", "remaining_time": "0:23:31", "throughput": 8660.07, "total_tokens": 125665832} +{"current_steps": 186525, "total_steps": 204665, "loss": 0.0, "lr": 4.748484108066786e-08, "epoch": 4.556836782058486, "percentage": 91.14, "elapsed_time": "4:01:51", "remaining_time": "0:23:31", "throughput": 8660.1, "total_tokens": 125669416} +{"current_steps": 186530, "total_steps": 204665, "loss": 0.0, "lr": 4.745887826821493e-08, "epoch": 4.556958932890333, "percentage": 91.14, "elapsed_time": "4:01:51", "remaining_time": "0:23:30", "throughput": 8660.11, "total_tokens": 125672424} +{"current_steps": 186535, "total_steps": 204665, "loss": 0.0305, "lr": 4.743292238289731e-08, "epoch": 4.5570810837221805, "percentage": 91.14, "elapsed_time": "4:01:51", "remaining_time": "0:23:30", "throughput": 8660.12, "total_tokens": 125675688} +{"current_steps": 186540, "total_steps": 204665, "loss": 0.0, "lr": 4.7406973424903626e-08, "epoch": 4.557203234554027, "percentage": 91.14, "elapsed_time": "4:01:52", "remaining_time": "0:23:30", "throughput": 8660.13, "total_tokens": 125678696} +{"current_steps": 186545, "total_steps": 204665, "loss": 0.0, "lr": 4.738103139442273e-08, "epoch": 4.557325385385875, "percentage": 91.15, "elapsed_time": "4:01:52", "remaining_time": "0:23:29", "throughput": 8660.13, "total_tokens": 125681768} +{"current_steps": 186550, "total_steps": 204665, "loss": 0.0001, "lr": 4.7355096291643026e-08, "epoch": 4.557447536217722, "percentage": 91.15, "elapsed_time": "4:01:53", "remaining_time": "0:23:29", "throughput": 8660.19, "total_tokens": 125685608} +{"current_steps": 186555, "total_steps": 204665, "loss": 0.0, "lr": 4.7329168116753473e-08, "epoch": 4.557569687049568, "percentage": 91.15, "elapsed_time": "4:01:53", "remaining_time": "0:23:28", "throughput": 8660.21, "total_tokens": 125689000} +{"current_steps": 186560, "total_steps": 204665, "loss": 0.0, "lr": 4.7303246869942246e-08, "epoch": 4.557691837881416, "percentage": 91.15, "elapsed_time": "4:01:53", "remaining_time": "0:23:28", "throughput": 8660.22, "total_tokens": 125692008} +{"current_steps": 186565, "total_steps": 204665, "loss": 0.0001, "lr": 4.727733255139832e-08, "epoch": 4.557813988713264, "percentage": 91.16, "elapsed_time": "4:01:54", "remaining_time": "0:23:28", "throughput": 8660.25, "total_tokens": 125695464} +{"current_steps": 186570, "total_steps": 204665, "loss": 0.0, "lr": 4.725142516130975e-08, "epoch": 4.55793613954511, "percentage": 91.16, "elapsed_time": "4:01:54", "remaining_time": "0:23:27", "throughput": 8660.27, "total_tokens": 125698728} +{"current_steps": 186575, "total_steps": 204665, "loss": 0.0, "lr": 4.722552469986507e-08, "epoch": 4.558058290376957, "percentage": 91.16, "elapsed_time": "4:01:54", "remaining_time": "0:23:27", "throughput": 8660.29, "total_tokens": 125701992} +{"current_steps": 186580, "total_steps": 204665, "loss": 0.0, "lr": 4.719963116725256e-08, "epoch": 4.558180441208805, "percentage": 91.16, "elapsed_time": "4:01:55", "remaining_time": "0:23:26", "throughput": 8660.31, "total_tokens": 125705320} +{"current_steps": 186585, "total_steps": 204665, "loss": 0.0, "lr": 4.717374456366074e-08, "epoch": 4.5583025920406515, "percentage": 91.17, "elapsed_time": "4:01:55", "remaining_time": "0:23:26", "throughput": 8660.32, "total_tokens": 125708520} +{"current_steps": 186590, "total_steps": 204665, "loss": 0.0, "lr": 4.714786488927758e-08, "epoch": 4.558424742872499, "percentage": 91.17, "elapsed_time": "4:01:55", "remaining_time": "0:23:26", "throughput": 8660.35, "total_tokens": 125711912} +{"current_steps": 186595, "total_steps": 204665, "loss": 0.0, "lr": 4.712199214429158e-08, "epoch": 4.558546893704346, "percentage": 91.17, "elapsed_time": "4:01:56", "remaining_time": "0:23:25", "throughput": 8660.37, "total_tokens": 125715176} +{"current_steps": 186600, "total_steps": 204665, "loss": 0.0, "lr": 4.709612632889059e-08, "epoch": 4.5586690445361935, "percentage": 91.17, "elapsed_time": "4:01:56", "remaining_time": "0:23:25", "throughput": 8660.37, "total_tokens": 125718248} +{"current_steps": 186605, "total_steps": 204665, "loss": 0.0, "lr": 4.7070267443263035e-08, "epoch": 4.55879119536804, "percentage": 91.18, "elapsed_time": "4:01:56", "remaining_time": "0:23:24", "throughput": 8660.42, "total_tokens": 125722088} +{"current_steps": 186610, "total_steps": 204665, "loss": 0.0, "lr": 4.7044415487596744e-08, "epoch": 4.558913346199888, "percentage": 91.18, "elapsed_time": "4:01:57", "remaining_time": "0:23:24", "throughput": 8660.45, "total_tokens": 125725544} +{"current_steps": 186615, "total_steps": 204665, "loss": 0.0, "lr": 4.701857046207969e-08, "epoch": 4.559035497031735, "percentage": 91.18, "elapsed_time": "4:01:57", "remaining_time": "0:23:24", "throughput": 8660.47, "total_tokens": 125728872} +{"current_steps": 186620, "total_steps": 204665, "loss": 0.0, "lr": 4.699273236690005e-08, "epoch": 4.559157647863582, "percentage": 91.18, "elapsed_time": "4:01:57", "remaining_time": "0:23:23", "throughput": 8660.49, "total_tokens": 125732136} +{"current_steps": 186625, "total_steps": 204665, "loss": 0.0, "lr": 4.6966901202245446e-08, "epoch": 4.559279798695429, "percentage": 91.19, "elapsed_time": "4:01:58", "remaining_time": "0:23:23", "throughput": 8660.51, "total_tokens": 125735400} +{"current_steps": 186630, "total_steps": 204665, "loss": 0.0, "lr": 4.694107696830407e-08, "epoch": 4.559401949527277, "percentage": 91.19, "elapsed_time": "4:01:58", "remaining_time": "0:23:23", "throughput": 8660.55, "total_tokens": 125739048} +{"current_steps": 186635, "total_steps": 204665, "loss": 0.0, "lr": 4.691525966526333e-08, "epoch": 4.559524100359123, "percentage": 91.19, "elapsed_time": "4:01:58", "remaining_time": "0:23:22", "throughput": 8660.56, "total_tokens": 125742120} +{"current_steps": 186640, "total_steps": 204665, "loss": 0.0, "lr": 4.6889449293311176e-08, "epoch": 4.559646251190971, "percentage": 91.19, "elapsed_time": "4:01:59", "remaining_time": "0:23:22", "throughput": 8660.6, "total_tokens": 125745704} +{"current_steps": 186645, "total_steps": 204665, "loss": 0.0004, "lr": 4.686364585263547e-08, "epoch": 4.559768402022818, "percentage": 91.2, "elapsed_time": "4:01:59", "remaining_time": "0:23:21", "throughput": 8660.62, "total_tokens": 125748968} +{"current_steps": 186650, "total_steps": 204665, "loss": 0.0, "lr": 4.6837849343423494e-08, "epoch": 4.5598905528546645, "percentage": 91.2, "elapsed_time": "4:01:59", "remaining_time": "0:23:21", "throughput": 8660.62, "total_tokens": 125751976} +{"current_steps": 186655, "total_steps": 204665, "loss": 0.0, "lr": 4.681205976586322e-08, "epoch": 4.560012703686512, "percentage": 91.2, "elapsed_time": "4:02:00", "remaining_time": "0:23:21", "throughput": 8660.62, "total_tokens": 125754920} +{"current_steps": 186660, "total_steps": 204665, "loss": 0.0, "lr": 4.6786277120142047e-08, "epoch": 4.56013485451836, "percentage": 91.2, "elapsed_time": "4:02:00", "remaining_time": "0:23:20", "throughput": 8660.66, "total_tokens": 125758504} +{"current_steps": 186665, "total_steps": 204665, "loss": 0.0, "lr": 4.676050140644727e-08, "epoch": 4.5602570053502065, "percentage": 91.21, "elapsed_time": "4:02:01", "remaining_time": "0:23:20", "throughput": 8660.68, "total_tokens": 125761768} +{"current_steps": 186670, "total_steps": 204665, "loss": 0.0, "lr": 4.673473262496663e-08, "epoch": 4.560379156182053, "percentage": 91.21, "elapsed_time": "4:02:01", "remaining_time": "0:23:19", "throughput": 8660.71, "total_tokens": 125765288} +{"current_steps": 186675, "total_steps": 204665, "loss": 0.0, "lr": 4.670897077588731e-08, "epoch": 4.560501307013901, "percentage": 91.21, "elapsed_time": "4:02:01", "remaining_time": "0:23:19", "throughput": 8660.8, "total_tokens": 125769832} +{"current_steps": 186680, "total_steps": 204665, "loss": 0.0, "lr": 4.668321585939694e-08, "epoch": 4.560623457845748, "percentage": 91.21, "elapsed_time": "4:02:02", "remaining_time": "0:23:19", "throughput": 8660.81, "total_tokens": 125773032} +{"current_steps": 186685, "total_steps": 204665, "loss": 0.0, "lr": 4.665746787568248e-08, "epoch": 4.560745608677595, "percentage": 91.21, "elapsed_time": "4:02:02", "remaining_time": "0:23:18", "throughput": 8660.84, "total_tokens": 125776360} +{"current_steps": 186690, "total_steps": 204665, "loss": 0.0, "lr": 4.663172682493144e-08, "epoch": 4.560867759509442, "percentage": 91.22, "elapsed_time": "4:02:02", "remaining_time": "0:23:18", "throughput": 8660.87, "total_tokens": 125779944} +{"current_steps": 186695, "total_steps": 204665, "loss": 0.0, "lr": 4.660599270733079e-08, "epoch": 4.56098991034129, "percentage": 91.22, "elapsed_time": "4:02:03", "remaining_time": "0:23:17", "throughput": 8660.9, "total_tokens": 125783464} +{"current_steps": 186700, "total_steps": 204665, "loss": 0.0, "lr": 4.658026552306793e-08, "epoch": 4.561112061173136, "percentage": 91.22, "elapsed_time": "4:02:03", "remaining_time": "0:23:17", "throughput": 8660.93, "total_tokens": 125786792} +{"current_steps": 186705, "total_steps": 204665, "loss": 0.0, "lr": 4.6554545272329715e-08, "epoch": 4.561234212004984, "percentage": 91.22, "elapsed_time": "4:02:03", "remaining_time": "0:23:17", "throughput": 8660.95, "total_tokens": 125790056} +{"current_steps": 186710, "total_steps": 204665, "loss": 0.0, "lr": 4.6528831955303215e-08, "epoch": 4.561356362836831, "percentage": 91.23, "elapsed_time": "4:02:04", "remaining_time": "0:23:16", "throughput": 8660.96, "total_tokens": 125793320} +{"current_steps": 186715, "total_steps": 204665, "loss": 0.0, "lr": 4.6503125572175725e-08, "epoch": 4.561478513668678, "percentage": 91.23, "elapsed_time": "4:02:04", "remaining_time": "0:23:16", "throughput": 8660.99, "total_tokens": 125796648} +{"current_steps": 186720, "total_steps": 204665, "loss": 0.0, "lr": 4.6477426123133765e-08, "epoch": 4.561600664500525, "percentage": 91.23, "elapsed_time": "4:02:04", "remaining_time": "0:23:15", "throughput": 8661.01, "total_tokens": 125799912} +{"current_steps": 186725, "total_steps": 204665, "loss": 0.0, "lr": 4.645173360836463e-08, "epoch": 4.561722815332373, "percentage": 91.23, "elapsed_time": "4:02:05", "remaining_time": "0:23:15", "throughput": 8661.01, "total_tokens": 125802920} +{"current_steps": 186730, "total_steps": 204665, "loss": 0.0, "lr": 4.642604802805472e-08, "epoch": 4.5618449661642195, "percentage": 91.24, "elapsed_time": "4:02:05", "remaining_time": "0:23:15", "throughput": 8661.02, "total_tokens": 125805928} +{"current_steps": 186735, "total_steps": 204665, "loss": 0.0, "lr": 4.640036938239111e-08, "epoch": 4.561967116996067, "percentage": 91.24, "elapsed_time": "4:02:05", "remaining_time": "0:23:14", "throughput": 8661.02, "total_tokens": 125808936} +{"current_steps": 186740, "total_steps": 204665, "loss": 0.0, "lr": 4.637469767156066e-08, "epoch": 4.562089267827914, "percentage": 91.24, "elapsed_time": "4:02:06", "remaining_time": "0:23:14", "throughput": 8661.04, "total_tokens": 125812264} +{"current_steps": 186745, "total_steps": 204665, "loss": 0.0, "lr": 4.634903289574976e-08, "epoch": 4.562211418659761, "percentage": 91.24, "elapsed_time": "4:02:06", "remaining_time": "0:23:13", "throughput": 8661.11, "total_tokens": 125816424} +{"current_steps": 186750, "total_steps": 204665, "loss": 0.0, "lr": 4.6323375055145386e-08, "epoch": 4.562333569491608, "percentage": 91.25, "elapsed_time": "4:02:06", "remaining_time": "0:23:13", "throughput": 8661.14, "total_tokens": 125819880} +{"current_steps": 186755, "total_steps": 204665, "loss": 0.0, "lr": 4.629772414993371e-08, "epoch": 4.562455720323455, "percentage": 91.25, "elapsed_time": "4:02:07", "remaining_time": "0:23:13", "throughput": 8661.17, "total_tokens": 125823336} +{"current_steps": 186760, "total_steps": 204665, "loss": 0.0, "lr": 4.627208018030171e-08, "epoch": 4.562577871155303, "percentage": 91.25, "elapsed_time": "4:02:07", "remaining_time": "0:23:12", "throughput": 8661.18, "total_tokens": 125826536} +{"current_steps": 186765, "total_steps": 204665, "loss": 0.0, "lr": 4.6246443146435554e-08, "epoch": 4.562700021987149, "percentage": 91.25, "elapsed_time": "4:02:07", "remaining_time": "0:23:12", "throughput": 8661.18, "total_tokens": 125829352} +{"current_steps": 186770, "total_steps": 204665, "loss": 0.0, "lr": 4.622081304852177e-08, "epoch": 4.562822172818997, "percentage": 91.26, "elapsed_time": "4:02:08", "remaining_time": "0:23:12", "throughput": 8661.21, "total_tokens": 125832872} +{"current_steps": 186775, "total_steps": 204665, "loss": 0.0, "lr": 4.619518988674686e-08, "epoch": 4.562944323650844, "percentage": 91.26, "elapsed_time": "4:02:08", "remaining_time": "0:23:11", "throughput": 8661.24, "total_tokens": 125836456} +{"current_steps": 186780, "total_steps": 204665, "loss": 0.0, "lr": 4.6169573661297034e-08, "epoch": 4.563066474482691, "percentage": 91.26, "elapsed_time": "4:02:09", "remaining_time": "0:23:11", "throughput": 8661.25, "total_tokens": 125839464} +{"current_steps": 186785, "total_steps": 204665, "loss": 0.0, "lr": 4.6143964372358676e-08, "epoch": 4.563188625314538, "percentage": 91.26, "elapsed_time": "4:02:09", "remaining_time": "0:23:10", "throughput": 8661.28, "total_tokens": 125842920} +{"current_steps": 186790, "total_steps": 204665, "loss": 0.0, "lr": 4.611836202011776e-08, "epoch": 4.563310776146386, "percentage": 91.27, "elapsed_time": "4:02:09", "remaining_time": "0:23:10", "throughput": 8661.29, "total_tokens": 125846056} +{"current_steps": 186795, "total_steps": 204665, "loss": 0.0001, "lr": 4.609276660476069e-08, "epoch": 4.5634329269782326, "percentage": 91.27, "elapsed_time": "4:02:10", "remaining_time": "0:23:10", "throughput": 8661.32, "total_tokens": 125849512} +{"current_steps": 186800, "total_steps": 204665, "loss": 0.0, "lr": 4.606717812647387e-08, "epoch": 4.56355507781008, "percentage": 91.27, "elapsed_time": "4:02:10", "remaining_time": "0:23:09", "throughput": 8661.36, "total_tokens": 125853224} +{"current_steps": 186805, "total_steps": 204665, "loss": 0.0, "lr": 4.604159658544282e-08, "epoch": 4.563677228641927, "percentage": 91.27, "elapsed_time": "4:02:10", "remaining_time": "0:23:09", "throughput": 8661.38, "total_tokens": 125856424} +{"current_steps": 186810, "total_steps": 204665, "loss": 0.0, "lr": 4.601602198185406e-08, "epoch": 4.563799379473775, "percentage": 91.28, "elapsed_time": "4:02:11", "remaining_time": "0:23:08", "throughput": 8661.41, "total_tokens": 125859816} +{"current_steps": 186815, "total_steps": 204665, "loss": 0.0, "lr": 4.599045431589321e-08, "epoch": 4.563921530305621, "percentage": 91.28, "elapsed_time": "4:02:11", "remaining_time": "0:23:08", "throughput": 8661.45, "total_tokens": 125863656} +{"current_steps": 186820, "total_steps": 204665, "loss": 0.0, "lr": 4.596489358774658e-08, "epoch": 4.564043681137468, "percentage": 91.28, "elapsed_time": "4:02:11", "remaining_time": "0:23:08", "throughput": 8661.47, "total_tokens": 125866984} +{"current_steps": 186825, "total_steps": 204665, "loss": 0.0, "lr": 4.593933979759967e-08, "epoch": 4.564165831969316, "percentage": 91.28, "elapsed_time": "4:02:12", "remaining_time": "0:23:07", "throughput": 8661.5, "total_tokens": 125870376} +{"current_steps": 186830, "total_steps": 204665, "loss": 0.0, "lr": 4.5913792945638465e-08, "epoch": 4.564287982801163, "percentage": 91.29, "elapsed_time": "4:02:12", "remaining_time": "0:23:07", "throughput": 8661.53, "total_tokens": 125873832} +{"current_steps": 186835, "total_steps": 204665, "loss": 0.0, "lr": 4.5888253032048906e-08, "epoch": 4.56441013363301, "percentage": 91.29, "elapsed_time": "4:02:12", "remaining_time": "0:23:06", "throughput": 8661.56, "total_tokens": 125877352} +{"current_steps": 186840, "total_steps": 204665, "loss": 0.0, "lr": 4.586272005701652e-08, "epoch": 4.564532284464857, "percentage": 91.29, "elapsed_time": "4:02:13", "remaining_time": "0:23:06", "throughput": 8661.58, "total_tokens": 125880616} +{"current_steps": 186845, "total_steps": 204665, "loss": 0.0, "lr": 4.5837194020727165e-08, "epoch": 4.5646544352967044, "percentage": 91.29, "elapsed_time": "4:02:13", "remaining_time": "0:23:06", "throughput": 8661.63, "total_tokens": 125884456} +{"current_steps": 186850, "total_steps": 204665, "loss": 0.0, "lr": 4.581167492336624e-08, "epoch": 4.564776586128551, "percentage": 91.3, "elapsed_time": "4:02:13", "remaining_time": "0:23:05", "throughput": 8661.67, "total_tokens": 125888104} +{"current_steps": 186855, "total_steps": 204665, "loss": 0.0, "lr": 4.5786162765119596e-08, "epoch": 4.564898736960399, "percentage": 91.3, "elapsed_time": "4:02:14", "remaining_time": "0:23:05", "throughput": 8661.71, "total_tokens": 125891816} +{"current_steps": 186860, "total_steps": 204665, "loss": 0.0, "lr": 4.576065754617253e-08, "epoch": 4.565020887792246, "percentage": 91.3, "elapsed_time": "4:02:14", "remaining_time": "0:23:04", "throughput": 8661.71, "total_tokens": 125894760} +{"current_steps": 186865, "total_steps": 204665, "loss": 0.0, "lr": 4.573515926671079e-08, "epoch": 4.565143038624093, "percentage": 91.3, "elapsed_time": "4:02:14", "remaining_time": "0:23:04", "throughput": 8661.72, "total_tokens": 125897768} +{"current_steps": 186870, "total_steps": 204665, "loss": 0.0, "lr": 4.570966792691944e-08, "epoch": 4.56526518945594, "percentage": 91.31, "elapsed_time": "4:02:15", "remaining_time": "0:23:04", "throughput": 8661.73, "total_tokens": 125900840} +{"current_steps": 186875, "total_steps": 204665, "loss": 0.0, "lr": 4.568418352698411e-08, "epoch": 4.565387340287788, "percentage": 91.31, "elapsed_time": "4:02:15", "remaining_time": "0:23:03", "throughput": 8661.74, "total_tokens": 125904040} +{"current_steps": 186880, "total_steps": 204665, "loss": 0.0, "lr": 4.5658706067090215e-08, "epoch": 4.565509491119634, "percentage": 91.31, "elapsed_time": "4:02:16", "remaining_time": "0:23:03", "throughput": 8661.78, "total_tokens": 125907752} +{"current_steps": 186885, "total_steps": 204665, "loss": 0.0, "lr": 4.563323554742271e-08, "epoch": 4.565631641951482, "percentage": 91.31, "elapsed_time": "4:02:16", "remaining_time": "0:23:02", "throughput": 8661.83, "total_tokens": 125911464} +{"current_steps": 186890, "total_steps": 204665, "loss": 0.0, "lr": 4.560777196816701e-08, "epoch": 4.565753792783329, "percentage": 91.32, "elapsed_time": "4:02:16", "remaining_time": "0:23:02", "throughput": 8661.86, "total_tokens": 125914920} +{"current_steps": 186895, "total_steps": 204665, "loss": 0.0, "lr": 4.5582315329508405e-08, "epoch": 4.565875943615176, "percentage": 91.32, "elapsed_time": "4:02:17", "remaining_time": "0:23:02", "throughput": 8661.89, "total_tokens": 125918376} +{"current_steps": 186900, "total_steps": 204665, "loss": 0.0001, "lr": 4.5556865631631856e-08, "epoch": 4.565998094447023, "percentage": 91.32, "elapsed_time": "4:02:17", "remaining_time": "0:23:01", "throughput": 8661.93, "total_tokens": 125922088} +{"current_steps": 186905, "total_steps": 204665, "loss": 0.0, "lr": 4.5531422874722555e-08, "epoch": 4.566120245278871, "percentage": 91.32, "elapsed_time": "4:02:17", "remaining_time": "0:23:01", "throughput": 8661.94, "total_tokens": 125925224} +{"current_steps": 186910, "total_steps": 204665, "loss": 0.0, "lr": 4.5505987058965355e-08, "epoch": 4.5662423961107175, "percentage": 91.32, "elapsed_time": "4:02:18", "remaining_time": "0:23:01", "throughput": 8661.95, "total_tokens": 125928360} +{"current_steps": 186915, "total_steps": 204665, "loss": 0.0, "lr": 4.548055818454544e-08, "epoch": 4.566364546942564, "percentage": 91.33, "elapsed_time": "4:02:18", "remaining_time": "0:23:00", "throughput": 8661.95, "total_tokens": 125931304} +{"current_steps": 186920, "total_steps": 204665, "loss": 0.0, "lr": 4.545513625164754e-08, "epoch": 4.566486697774412, "percentage": 91.33, "elapsed_time": "4:02:18", "remaining_time": "0:23:00", "throughput": 8661.97, "total_tokens": 125934504} +{"current_steps": 186925, "total_steps": 204665, "loss": 0.0246, "lr": 4.5429721260456633e-08, "epoch": 4.5666088486062595, "percentage": 91.33, "elapsed_time": "4:02:19", "remaining_time": "0:22:59", "throughput": 8662.01, "total_tokens": 125938280} +{"current_steps": 186930, "total_steps": 204665, "loss": 0.0, "lr": 4.5404313211157675e-08, "epoch": 4.566730999438106, "percentage": 91.33, "elapsed_time": "4:02:19", "remaining_time": "0:22:59", "throughput": 8662.02, "total_tokens": 125941416} +{"current_steps": 186935, "total_steps": 204665, "loss": 0.0, "lr": 4.537891210393519e-08, "epoch": 4.566853150269953, "percentage": 91.34, "elapsed_time": "4:02:19", "remaining_time": "0:22:59", "throughput": 8662.09, "total_tokens": 125945576} +{"current_steps": 186940, "total_steps": 204665, "loss": 0.0, "lr": 4.535351793897413e-08, "epoch": 4.566975301101801, "percentage": 91.34, "elapsed_time": "4:02:20", "remaining_time": "0:22:58", "throughput": 8662.1, "total_tokens": 125948648} +{"current_steps": 186945, "total_steps": 204665, "loss": 0.0, "lr": 4.532813071645891e-08, "epoch": 4.567097451933647, "percentage": 91.34, "elapsed_time": "4:02:20", "remaining_time": "0:22:58", "throughput": 8662.12, "total_tokens": 125952040} +{"current_steps": 186950, "total_steps": 204665, "loss": 0.0, "lr": 4.53027504365745e-08, "epoch": 4.567219602765495, "percentage": 91.34, "elapsed_time": "4:02:20", "remaining_time": "0:22:57", "throughput": 8662.15, "total_tokens": 125955432} +{"current_steps": 186955, "total_steps": 204665, "loss": 0.0, "lr": 4.5277377099505076e-08, "epoch": 4.567341753597342, "percentage": 91.35, "elapsed_time": "4:02:21", "remaining_time": "0:22:57", "throughput": 8662.18, "total_tokens": 125958824} +{"current_steps": 186960, "total_steps": 204665, "loss": 0.0, "lr": 4.5252010705435386e-08, "epoch": 4.567463904429189, "percentage": 91.35, "elapsed_time": "4:02:21", "remaining_time": "0:22:57", "throughput": 8662.19, "total_tokens": 125961896} +{"current_steps": 186965, "total_steps": 204665, "loss": 0.0, "lr": 4.522665125454994e-08, "epoch": 4.567586055261036, "percentage": 91.35, "elapsed_time": "4:02:21", "remaining_time": "0:22:56", "throughput": 8662.21, "total_tokens": 125965288} +{"current_steps": 186970, "total_steps": 204665, "loss": 0.0, "lr": 4.5201298747033155e-08, "epoch": 4.567708206092884, "percentage": 91.35, "elapsed_time": "4:02:22", "remaining_time": "0:22:56", "throughput": 8662.24, "total_tokens": 125968680} +{"current_steps": 186975, "total_steps": 204665, "loss": 0.0, "lr": 4.517595318306911e-08, "epoch": 4.5678303569247305, "percentage": 91.36, "elapsed_time": "4:02:22", "remaining_time": "0:22:55", "throughput": 8662.25, "total_tokens": 125971816} +{"current_steps": 186980, "total_steps": 204665, "loss": 0.0, "lr": 4.5150614562842635e-08, "epoch": 4.567952507756578, "percentage": 91.36, "elapsed_time": "4:02:22", "remaining_time": "0:22:55", "throughput": 8662.27, "total_tokens": 125975144} +{"current_steps": 186985, "total_steps": 204665, "loss": 0.0, "lr": 4.51252828865375e-08, "epoch": 4.568074658588425, "percentage": 91.36, "elapsed_time": "4:02:23", "remaining_time": "0:22:55", "throughput": 8662.29, "total_tokens": 125978408} +{"current_steps": 186990, "total_steps": 204665, "loss": 0.0, "lr": 4.5099958154338204e-08, "epoch": 4.5681968094202725, "percentage": 91.36, "elapsed_time": "4:02:23", "remaining_time": "0:22:54", "throughput": 8662.31, "total_tokens": 125981544} +{"current_steps": 186995, "total_steps": 204665, "loss": 0.0, "lr": 4.507464036642883e-08, "epoch": 4.568318960252119, "percentage": 91.37, "elapsed_time": "4:02:23", "remaining_time": "0:22:54", "throughput": 8662.33, "total_tokens": 125984872} +{"current_steps": 187000, "total_steps": 204665, "loss": 0.0, "lr": 4.504932952299356e-08, "epoch": 4.568441111083967, "percentage": 91.37, "elapsed_time": "4:02:24", "remaining_time": "0:22:53", "throughput": 8662.36, "total_tokens": 125988328} +{"current_steps": 187005, "total_steps": 204665, "loss": 0.0, "lr": 4.502402562421637e-08, "epoch": 4.568563261915814, "percentage": 91.37, "elapsed_time": "4:02:24", "remaining_time": "0:22:53", "throughput": 8662.36, "total_tokens": 125991208} +{"current_steps": 187010, "total_steps": 204665, "loss": 0.0, "lr": 4.499872867028143e-08, "epoch": 4.56868541274766, "percentage": 91.37, "elapsed_time": "4:02:25", "remaining_time": "0:22:53", "throughput": 8662.42, "total_tokens": 125995304} +{"current_steps": 187015, "total_steps": 204665, "loss": 0.0, "lr": 4.4973438661372374e-08, "epoch": 4.568807563579508, "percentage": 91.38, "elapsed_time": "4:02:25", "remaining_time": "0:22:52", "throughput": 8662.44, "total_tokens": 125998504} +{"current_steps": 187020, "total_steps": 204665, "loss": 0.0, "lr": 4.494815559767351e-08, "epoch": 4.568929714411355, "percentage": 91.38, "elapsed_time": "4:02:25", "remaining_time": "0:22:52", "throughput": 8662.47, "total_tokens": 126002024} +{"current_steps": 187025, "total_steps": 204665, "loss": 0.0, "lr": 4.492287947936857e-08, "epoch": 4.569051865243202, "percentage": 91.38, "elapsed_time": "4:02:26", "remaining_time": "0:22:51", "throughput": 8662.48, "total_tokens": 126005096} +{"current_steps": 187030, "total_steps": 204665, "loss": 0.0, "lr": 4.4897610306641184e-08, "epoch": 4.569174016075049, "percentage": 91.38, "elapsed_time": "4:02:26", "remaining_time": "0:22:51", "throughput": 8662.49, "total_tokens": 126008232} +{"current_steps": 187035, "total_steps": 204665, "loss": 0.0, "lr": 4.487234807967544e-08, "epoch": 4.569296166906897, "percentage": 91.39, "elapsed_time": "4:02:26", "remaining_time": "0:22:51", "throughput": 8662.54, "total_tokens": 126012136} +{"current_steps": 187040, "total_steps": 204665, "loss": 0.0, "lr": 4.484709279865473e-08, "epoch": 4.5694183177387435, "percentage": 91.39, "elapsed_time": "4:02:27", "remaining_time": "0:22:50", "throughput": 8662.59, "total_tokens": 126015848} +{"current_steps": 187045, "total_steps": 204665, "loss": 0.0, "lr": 4.482184446376291e-08, "epoch": 4.569540468570591, "percentage": 91.39, "elapsed_time": "4:02:27", "remaining_time": "0:22:50", "throughput": 8662.6, "total_tokens": 126019048} +{"current_steps": 187050, "total_steps": 204665, "loss": 0.0, "lr": 4.479660307518363e-08, "epoch": 4.569662619402438, "percentage": 91.39, "elapsed_time": "4:02:27", "remaining_time": "0:22:50", "throughput": 8662.64, "total_tokens": 126022632} +{"current_steps": 187055, "total_steps": 204665, "loss": 0.0, "lr": 4.477136863310016e-08, "epoch": 4.5697847702342855, "percentage": 91.4, "elapsed_time": "4:02:28", "remaining_time": "0:22:49", "throughput": 8662.68, "total_tokens": 126026216} +{"current_steps": 187060, "total_steps": 204665, "loss": 0.0, "lr": 4.474614113769648e-08, "epoch": 4.569906921066132, "percentage": 91.4, "elapsed_time": "4:02:28", "remaining_time": "0:22:49", "throughput": 8662.69, "total_tokens": 126029352} +{"current_steps": 187065, "total_steps": 204665, "loss": 0.0, "lr": 4.472092058915567e-08, "epoch": 4.57002907189798, "percentage": 91.4, "elapsed_time": "4:02:28", "remaining_time": "0:22:48", "throughput": 8662.71, "total_tokens": 126032616} +{"current_steps": 187070, "total_steps": 204665, "loss": 0.0, "lr": 4.469570698766134e-08, "epoch": 4.570151222729827, "percentage": 91.4, "elapsed_time": "4:02:29", "remaining_time": "0:22:48", "throughput": 8662.72, "total_tokens": 126035880} +{"current_steps": 187075, "total_steps": 204665, "loss": 0.0, "lr": 4.46705003333967e-08, "epoch": 4.570273373561674, "percentage": 91.41, "elapsed_time": "4:02:29", "remaining_time": "0:22:48", "throughput": 8662.74, "total_tokens": 126039080} +{"current_steps": 187080, "total_steps": 204665, "loss": 0.0, "lr": 4.4645300626545146e-08, "epoch": 4.570395524393521, "percentage": 91.41, "elapsed_time": "4:02:29", "remaining_time": "0:22:47", "throughput": 8662.77, "total_tokens": 126042600} +{"current_steps": 187085, "total_steps": 204665, "loss": 0.0, "lr": 4.462010786728998e-08, "epoch": 4.570517675225369, "percentage": 91.41, "elapsed_time": "4:02:30", "remaining_time": "0:22:47", "throughput": 8662.78, "total_tokens": 126045736} +{"current_steps": 187090, "total_steps": 204665, "loss": 0.0, "lr": 4.4594922055814275e-08, "epoch": 4.570639826057215, "percentage": 91.41, "elapsed_time": "4:02:30", "remaining_time": "0:22:46", "throughput": 8662.8, "total_tokens": 126049000} +{"current_steps": 187095, "total_steps": 204665, "loss": 0.0, "lr": 4.456974319230145e-08, "epoch": 4.570761976889063, "percentage": 91.42, "elapsed_time": "4:02:30", "remaining_time": "0:22:46", "throughput": 8662.87, "total_tokens": 126053288} +{"current_steps": 187100, "total_steps": 204665, "loss": 0.0, "lr": 4.454457127693412e-08, "epoch": 4.57088412772091, "percentage": 91.42, "elapsed_time": "4:02:31", "remaining_time": "0:22:46", "throughput": 8662.88, "total_tokens": 126056424} +{"current_steps": 187105, "total_steps": 204665, "loss": 0.0, "lr": 4.4519406309895924e-08, "epoch": 4.5710062785527565, "percentage": 91.42, "elapsed_time": "4:02:31", "remaining_time": "0:22:45", "throughput": 8662.91, "total_tokens": 126059944} +{"current_steps": 187110, "total_steps": 204665, "loss": 0.0001, "lr": 4.4494248291369495e-08, "epoch": 4.571128429384604, "percentage": 91.42, "elapsed_time": "4:02:32", "remaining_time": "0:22:45", "throughput": 8662.93, "total_tokens": 126063208} +{"current_steps": 187115, "total_steps": 204665, "loss": 0.0, "lr": 4.44690972215378e-08, "epoch": 4.571250580216451, "percentage": 91.43, "elapsed_time": "4:02:32", "remaining_time": "0:22:44", "throughput": 8662.93, "total_tokens": 126066088} +{"current_steps": 187120, "total_steps": 204665, "loss": 0.0, "lr": 4.444395310058402e-08, "epoch": 4.5713727310482986, "percentage": 91.43, "elapsed_time": "4:02:32", "remaining_time": "0:22:44", "throughput": 8662.94, "total_tokens": 126069288} +{"current_steps": 187125, "total_steps": 204665, "loss": 0.0, "lr": 4.441881592869068e-08, "epoch": 4.571494881880145, "percentage": 91.43, "elapsed_time": "4:02:33", "remaining_time": "0:22:44", "throughput": 8662.97, "total_tokens": 126072680} +{"current_steps": 187130, "total_steps": 204665, "loss": 0.0, "lr": 4.439368570604085e-08, "epoch": 4.571617032711993, "percentage": 91.43, "elapsed_time": "4:02:33", "remaining_time": "0:22:43", "throughput": 8662.99, "total_tokens": 126076008} +{"current_steps": 187135, "total_steps": 204665, "loss": 0.0, "lr": 4.436856243281706e-08, "epoch": 4.57173918354384, "percentage": 91.43, "elapsed_time": "4:02:33", "remaining_time": "0:22:43", "throughput": 8663.02, "total_tokens": 126079400} +{"current_steps": 187140, "total_steps": 204665, "loss": 0.0, "lr": 4.434344610920204e-08, "epoch": 4.571861334375687, "percentage": 91.44, "elapsed_time": "4:02:34", "remaining_time": "0:22:42", "throughput": 8663.04, "total_tokens": 126082792} +{"current_steps": 187145, "total_steps": 204665, "loss": 0.0, "lr": 4.431833673537877e-08, "epoch": 4.571983485207534, "percentage": 91.44, "elapsed_time": "4:02:34", "remaining_time": "0:22:42", "throughput": 8663.07, "total_tokens": 126086184} +{"current_steps": 187150, "total_steps": 204665, "loss": 0.0, "lr": 4.4293234311529315e-08, "epoch": 4.572105636039382, "percentage": 91.44, "elapsed_time": "4:02:34", "remaining_time": "0:22:42", "throughput": 8663.1, "total_tokens": 126089576} +{"current_steps": 187155, "total_steps": 204665, "loss": 0.0, "lr": 4.426813883783676e-08, "epoch": 4.572227786871228, "percentage": 91.44, "elapsed_time": "4:02:35", "remaining_time": "0:22:41", "throughput": 8663.11, "total_tokens": 126092776} +{"current_steps": 187160, "total_steps": 204665, "loss": 0.0, "lr": 4.424305031448328e-08, "epoch": 4.572349937703076, "percentage": 91.45, "elapsed_time": "4:02:35", "remaining_time": "0:22:41", "throughput": 8663.15, "total_tokens": 126096360} +{"current_steps": 187165, "total_steps": 204665, "loss": 0.0, "lr": 4.4217968741651403e-08, "epoch": 4.572472088534923, "percentage": 91.45, "elapsed_time": "4:02:35", "remaining_time": "0:22:40", "throughput": 8663.17, "total_tokens": 126099752} +{"current_steps": 187170, "total_steps": 204665, "loss": 0.0, "lr": 4.419289411952354e-08, "epoch": 4.5725942393667705, "percentage": 91.45, "elapsed_time": "4:02:36", "remaining_time": "0:22:40", "throughput": 8663.19, "total_tokens": 126102952} +{"current_steps": 187175, "total_steps": 204665, "loss": 0.0, "lr": 4.4167826448282095e-08, "epoch": 4.572716390198617, "percentage": 91.45, "elapsed_time": "4:02:36", "remaining_time": "0:22:40", "throughput": 8663.23, "total_tokens": 126106536} +{"current_steps": 187180, "total_steps": 204665, "loss": 0.0, "lr": 4.414276572810915e-08, "epoch": 4.572838541030464, "percentage": 91.46, "elapsed_time": "4:02:36", "remaining_time": "0:22:39", "throughput": 8663.28, "total_tokens": 126110376} +{"current_steps": 187185, "total_steps": 204665, "loss": 0.0, "lr": 4.411771195918723e-08, "epoch": 4.572960691862312, "percentage": 91.46, "elapsed_time": "4:02:37", "remaining_time": "0:22:39", "throughput": 8663.31, "total_tokens": 126113896} +{"current_steps": 187190, "total_steps": 204665, "loss": 0.0, "lr": 4.409266514169841e-08, "epoch": 4.573082842694159, "percentage": 91.46, "elapsed_time": "4:02:37", "remaining_time": "0:22:39", "throughput": 8663.33, "total_tokens": 126117224} +{"current_steps": 187195, "total_steps": 204665, "loss": 0.0017, "lr": 4.406762527582475e-08, "epoch": 4.573204993526006, "percentage": 91.46, "elapsed_time": "4:02:37", "remaining_time": "0:22:38", "throughput": 8663.36, "total_tokens": 126120552} +{"current_steps": 187200, "total_steps": 204665, "loss": 0.0, "lr": 4.404259236174846e-08, "epoch": 4.573327144357853, "percentage": 91.47, "elapsed_time": "4:02:38", "remaining_time": "0:22:38", "throughput": 8663.39, "total_tokens": 126124136} +{"current_steps": 187205, "total_steps": 204665, "loss": 0.0, "lr": 4.4017566399651596e-08, "epoch": 4.5734492951897, "percentage": 91.47, "elapsed_time": "4:02:38", "remaining_time": "0:22:37", "throughput": 8663.41, "total_tokens": 126127464} +{"current_steps": 187210, "total_steps": 204665, "loss": 0.0, "lr": 4.399254738971603e-08, "epoch": 4.573571446021547, "percentage": 91.47, "elapsed_time": "4:02:38", "remaining_time": "0:22:37", "throughput": 8663.43, "total_tokens": 126130728} +{"current_steps": 187215, "total_steps": 204665, "loss": 0.0, "lr": 4.396753533212394e-08, "epoch": 4.573693596853395, "percentage": 91.47, "elapsed_time": "4:02:39", "remaining_time": "0:22:37", "throughput": 8663.47, "total_tokens": 126134376} +{"current_steps": 187220, "total_steps": 204665, "loss": 0.0, "lr": 4.394253022705696e-08, "epoch": 4.5738157476852415, "percentage": 91.48, "elapsed_time": "4:02:39", "remaining_time": "0:22:36", "throughput": 8663.52, "total_tokens": 126138152} +{"current_steps": 187225, "total_steps": 204665, "loss": 0.0, "lr": 4.3917532074697175e-08, "epoch": 4.573937898517089, "percentage": 91.48, "elapsed_time": "4:02:40", "remaining_time": "0:22:36", "throughput": 8663.53, "total_tokens": 126141224} +{"current_steps": 187230, "total_steps": 204665, "loss": 0.0, "lr": 4.389254087522609e-08, "epoch": 4.574060049348936, "percentage": 91.48, "elapsed_time": "4:02:40", "remaining_time": "0:22:35", "throughput": 8663.55, "total_tokens": 126144552} +{"current_steps": 187235, "total_steps": 204665, "loss": 0.0, "lr": 4.386755662882558e-08, "epoch": 4.5741822001807835, "percentage": 91.48, "elapsed_time": "4:02:40", "remaining_time": "0:22:35", "throughput": 8663.57, "total_tokens": 126147880} +{"current_steps": 187240, "total_steps": 204665, "loss": 0.0, "lr": 4.384257933567759e-08, "epoch": 4.57430435101263, "percentage": 91.49, "elapsed_time": "4:02:41", "remaining_time": "0:22:35", "throughput": 8663.58, "total_tokens": 126151016} +{"current_steps": 187245, "total_steps": 204665, "loss": 0.0, "lr": 4.381760899596332e-08, "epoch": 4.574426501844478, "percentage": 91.49, "elapsed_time": "4:02:41", "remaining_time": "0:22:34", "throughput": 8663.64, "total_tokens": 126154984} +{"current_steps": 187250, "total_steps": 204665, "loss": 0.0, "lr": 4.379264560986473e-08, "epoch": 4.574548652676325, "percentage": 91.49, "elapsed_time": "4:02:41", "remaining_time": "0:22:34", "throughput": 8663.65, "total_tokens": 126158120} +{"current_steps": 187255, "total_steps": 204665, "loss": 0.0, "lr": 4.376768917756313e-08, "epoch": 4.574670803508172, "percentage": 91.49, "elapsed_time": "4:02:42", "remaining_time": "0:22:33", "throughput": 8663.67, "total_tokens": 126161320} +{"current_steps": 187260, "total_steps": 204665, "loss": 0.0, "lr": 4.374273969924014e-08, "epoch": 4.574792954340019, "percentage": 91.5, "elapsed_time": "4:02:42", "remaining_time": "0:22:33", "throughput": 8663.69, "total_tokens": 126164584} +{"current_steps": 187265, "total_steps": 204665, "loss": 0.0, "lr": 4.3717797175077064e-08, "epoch": 4.574915105171867, "percentage": 91.5, "elapsed_time": "4:02:42", "remaining_time": "0:22:33", "throughput": 8663.71, "total_tokens": 126167976} +{"current_steps": 187270, "total_steps": 204665, "loss": 0.0, "lr": 4.3692861605255424e-08, "epoch": 4.575037256003713, "percentage": 91.5, "elapsed_time": "4:02:43", "remaining_time": "0:22:32", "throughput": 8663.73, "total_tokens": 126171176} +{"current_steps": 187275, "total_steps": 204665, "loss": 0.0, "lr": 4.366793298995664e-08, "epoch": 4.57515940683556, "percentage": 91.5, "elapsed_time": "4:02:43", "remaining_time": "0:22:32", "throughput": 8663.76, "total_tokens": 126174696} +{"current_steps": 187280, "total_steps": 204665, "loss": 0.0, "lr": 4.364301132936177e-08, "epoch": 4.575281557667408, "percentage": 91.51, "elapsed_time": "4:02:43", "remaining_time": "0:22:31", "throughput": 8663.8, "total_tokens": 126178408} +{"current_steps": 187285, "total_steps": 204665, "loss": 0.0001, "lr": 4.3618096623652126e-08, "epoch": 4.575403708499255, "percentage": 91.51, "elapsed_time": "4:02:44", "remaining_time": "0:22:31", "throughput": 8663.82, "total_tokens": 126181672} +{"current_steps": 187290, "total_steps": 204665, "loss": 0.0, "lr": 4.3593188873009e-08, "epoch": 4.575525859331102, "percentage": 91.51, "elapsed_time": "4:02:44", "remaining_time": "0:22:31", "throughput": 8663.83, "total_tokens": 126184808} +{"current_steps": 187295, "total_steps": 204665, "loss": 0.0, "lr": 4.356828807761326e-08, "epoch": 4.575648010162949, "percentage": 91.51, "elapsed_time": "4:02:44", "remaining_time": "0:22:30", "throughput": 8663.84, "total_tokens": 126187880} +{"current_steps": 187300, "total_steps": 204665, "loss": 0.0667, "lr": 4.354339423764641e-08, "epoch": 4.5757701609947965, "percentage": 91.52, "elapsed_time": "4:02:45", "remaining_time": "0:22:30", "throughput": 8663.87, "total_tokens": 126191400} +{"current_steps": 187305, "total_steps": 204665, "loss": 0.0, "lr": 4.3518507353289103e-08, "epoch": 4.575892311826643, "percentage": 91.52, "elapsed_time": "4:02:45", "remaining_time": "0:22:29", "throughput": 8663.89, "total_tokens": 126194728} +{"current_steps": 187310, "total_steps": 204665, "loss": 0.0, "lr": 4.349362742472251e-08, "epoch": 4.576014462658491, "percentage": 91.52, "elapsed_time": "4:02:45", "remaining_time": "0:22:29", "throughput": 8663.92, "total_tokens": 126198120} +{"current_steps": 187315, "total_steps": 204665, "loss": 0.0, "lr": 4.34687544521275e-08, "epoch": 4.576136613490338, "percentage": 91.52, "elapsed_time": "4:02:46", "remaining_time": "0:22:29", "throughput": 8663.92, "total_tokens": 126201192} +{"current_steps": 187320, "total_steps": 204665, "loss": 0.0, "lr": 4.344388843568503e-08, "epoch": 4.576258764322185, "percentage": 91.53, "elapsed_time": "4:02:46", "remaining_time": "0:22:28", "throughput": 8663.92, "total_tokens": 126204008} +{"current_steps": 187325, "total_steps": 204665, "loss": 0.0, "lr": 4.3419029375575844e-08, "epoch": 4.576380915154032, "percentage": 91.53, "elapsed_time": "4:02:46", "remaining_time": "0:22:28", "throughput": 8663.97, "total_tokens": 126207912} +{"current_steps": 187330, "total_steps": 204665, "loss": 0.0, "lr": 4.339417727198069e-08, "epoch": 4.57650306598588, "percentage": 91.53, "elapsed_time": "4:02:47", "remaining_time": "0:22:28", "throughput": 8663.98, "total_tokens": 126210856} +{"current_steps": 187335, "total_steps": 204665, "loss": 0.0, "lr": 4.336933212508054e-08, "epoch": 4.576625216817726, "percentage": 91.53, "elapsed_time": "4:02:47", "remaining_time": "0:22:27", "throughput": 8664.02, "total_tokens": 126214632} +{"current_steps": 187340, "total_steps": 204665, "loss": 0.0001, "lr": 4.334449393505579e-08, "epoch": 4.576747367649574, "percentage": 91.53, "elapsed_time": "4:02:48", "remaining_time": "0:22:27", "throughput": 8664.11, "total_tokens": 126219176} +{"current_steps": 187345, "total_steps": 204665, "loss": 0.0, "lr": 4.331966270208731e-08, "epoch": 4.576869518481421, "percentage": 91.54, "elapsed_time": "4:02:48", "remaining_time": "0:22:26", "throughput": 8664.12, "total_tokens": 126222312} +{"current_steps": 187350, "total_steps": 204665, "loss": 0.0, "lr": 4.329483842635551e-08, "epoch": 4.576991669313268, "percentage": 91.54, "elapsed_time": "4:02:48", "remaining_time": "0:22:26", "throughput": 8664.2, "total_tokens": 126226600} +{"current_steps": 187355, "total_steps": 204665, "loss": 0.0, "lr": 4.3270021108040786e-08, "epoch": 4.577113820145115, "percentage": 91.54, "elapsed_time": "4:02:49", "remaining_time": "0:22:26", "throughput": 8664.24, "total_tokens": 126230248} +{"current_steps": 187360, "total_steps": 204665, "loss": 0.0, "lr": 4.324521074732412e-08, "epoch": 4.577235970976963, "percentage": 91.54, "elapsed_time": "4:02:49", "remaining_time": "0:22:25", "throughput": 8664.26, "total_tokens": 126233704} +{"current_steps": 187365, "total_steps": 204665, "loss": 0.0, "lr": 4.3220407344385365e-08, "epoch": 4.5773581218088095, "percentage": 91.55, "elapsed_time": "4:02:49", "remaining_time": "0:22:25", "throughput": 8664.29, "total_tokens": 126237096} +{"current_steps": 187370, "total_steps": 204665, "loss": 0.0, "lr": 4.3195610899405266e-08, "epoch": 4.577480272640656, "percentage": 91.55, "elapsed_time": "4:02:50", "remaining_time": "0:22:24", "throughput": 8664.32, "total_tokens": 126240552} +{"current_steps": 187375, "total_steps": 204665, "loss": 0.0, "lr": 4.317082141256401e-08, "epoch": 4.577602423472504, "percentage": 91.55, "elapsed_time": "4:02:50", "remaining_time": "0:22:24", "throughput": 8664.32, "total_tokens": 126243496} +{"current_steps": 187380, "total_steps": 204665, "loss": 0.0, "lr": 4.314603888404189e-08, "epoch": 4.577724574304351, "percentage": 91.55, "elapsed_time": "4:02:50", "remaining_time": "0:22:24", "throughput": 8664.35, "total_tokens": 126246952} +{"current_steps": 187385, "total_steps": 204665, "loss": 0.0, "lr": 4.312126331401911e-08, "epoch": 4.577846725136198, "percentage": 91.56, "elapsed_time": "4:02:51", "remaining_time": "0:22:23", "throughput": 8664.37, "total_tokens": 126250344} +{"current_steps": 187390, "total_steps": 204665, "loss": 0.0, "lr": 4.309649470267596e-08, "epoch": 4.577968875968045, "percentage": 91.56, "elapsed_time": "4:02:51", "remaining_time": "0:22:23", "throughput": 8664.39, "total_tokens": 126253608} +{"current_steps": 187395, "total_steps": 204665, "loss": 0.0, "lr": 4.3071733050192513e-08, "epoch": 4.578091026799893, "percentage": 91.56, "elapsed_time": "4:02:51", "remaining_time": "0:22:22", "throughput": 8664.42, "total_tokens": 126257064} +{"current_steps": 187400, "total_steps": 204665, "loss": 0.0, "lr": 4.304697835674864e-08, "epoch": 4.578213177631739, "percentage": 91.56, "elapsed_time": "4:02:52", "remaining_time": "0:22:22", "throughput": 8664.46, "total_tokens": 126260584} +{"current_steps": 187405, "total_steps": 204665, "loss": 0.0, "lr": 4.302223062252475e-08, "epoch": 4.578335328463587, "percentage": 91.57, "elapsed_time": "4:02:52", "remaining_time": "0:22:22", "throughput": 8664.47, "total_tokens": 126263848} +{"current_steps": 187410, "total_steps": 204665, "loss": 0.0, "lr": 4.2997489847700354e-08, "epoch": 4.578457479295434, "percentage": 91.57, "elapsed_time": "4:02:52", "remaining_time": "0:22:21", "throughput": 8664.48, "total_tokens": 126266984} +{"current_steps": 187415, "total_steps": 204665, "loss": 0.0, "lr": 4.297275603245576e-08, "epoch": 4.578579630127281, "percentage": 91.57, "elapsed_time": "4:02:53", "remaining_time": "0:22:21", "throughput": 8664.48, "total_tokens": 126269800} +{"current_steps": 187420, "total_steps": 204665, "loss": 0.0, "lr": 4.29480291769706e-08, "epoch": 4.578701780959128, "percentage": 91.57, "elapsed_time": "4:02:53", "remaining_time": "0:22:20", "throughput": 8664.49, "total_tokens": 126273000} +{"current_steps": 187425, "total_steps": 204665, "loss": 0.0, "lr": 4.2923309281424734e-08, "epoch": 4.578823931790976, "percentage": 91.58, "elapsed_time": "4:02:53", "remaining_time": "0:22:20", "throughput": 8664.53, "total_tokens": 126276584} +{"current_steps": 187430, "total_steps": 204665, "loss": 0.0, "lr": 4.289859634599824e-08, "epoch": 4.5789460826228225, "percentage": 91.58, "elapsed_time": "4:02:54", "remaining_time": "0:22:20", "throughput": 8664.58, "total_tokens": 126280488} +{"current_steps": 187435, "total_steps": 204665, "loss": 0.0, "lr": 4.28738903708703e-08, "epoch": 4.57906823345467, "percentage": 91.58, "elapsed_time": "4:02:54", "remaining_time": "0:22:19", "throughput": 8664.64, "total_tokens": 126284328} +{"current_steps": 187440, "total_steps": 204665, "loss": 0.0, "lr": 4.2849191356221116e-08, "epoch": 4.579190384286517, "percentage": 91.58, "elapsed_time": "4:02:55", "remaining_time": "0:22:19", "throughput": 8664.65, "total_tokens": 126287464} +{"current_steps": 187445, "total_steps": 204665, "loss": 0.0, "lr": 4.282449930222987e-08, "epoch": 4.579312535118364, "percentage": 91.59, "elapsed_time": "4:02:55", "remaining_time": "0:22:18", "throughput": 8664.69, "total_tokens": 126291112} +{"current_steps": 187450, "total_steps": 204665, "loss": 0.0001, "lr": 4.27998142090763e-08, "epoch": 4.579434685950211, "percentage": 91.59, "elapsed_time": "4:02:55", "remaining_time": "0:22:18", "throughput": 8664.73, "total_tokens": 126294760} +{"current_steps": 187455, "total_steps": 204665, "loss": 0.0, "lr": 4.2775136076940054e-08, "epoch": 4.579556836782059, "percentage": 91.59, "elapsed_time": "4:02:56", "remaining_time": "0:22:18", "throughput": 8664.77, "total_tokens": 126298408} +{"current_steps": 187460, "total_steps": 204665, "loss": 0.0, "lr": 4.275046490600043e-08, "epoch": 4.579678987613906, "percentage": 91.59, "elapsed_time": "4:02:56", "remaining_time": "0:22:17", "throughput": 8664.82, "total_tokens": 126302184} +{"current_steps": 187465, "total_steps": 204665, "loss": 0.0, "lr": 4.2725800696436945e-08, "epoch": 4.579801138445752, "percentage": 91.6, "elapsed_time": "4:02:56", "remaining_time": "0:22:17", "throughput": 8664.87, "total_tokens": 126306152} +{"current_steps": 187470, "total_steps": 204665, "loss": 0.0, "lr": 4.270114344842879e-08, "epoch": 4.5799232892776, "percentage": 91.6, "elapsed_time": "4:02:57", "remaining_time": "0:22:17", "throughput": 8664.91, "total_tokens": 126309736} +{"current_steps": 187475, "total_steps": 204665, "loss": 0.0, "lr": 4.26764931621556e-08, "epoch": 4.580045440109447, "percentage": 91.6, "elapsed_time": "4:02:57", "remaining_time": "0:22:16", "throughput": 8664.94, "total_tokens": 126313256} +{"current_steps": 187480, "total_steps": 204665, "loss": 0.0, "lr": 4.265184983779624e-08, "epoch": 4.580167590941294, "percentage": 91.6, "elapsed_time": "4:02:57", "remaining_time": "0:22:16", "throughput": 8664.96, "total_tokens": 126316456} +{"current_steps": 187485, "total_steps": 204665, "loss": 0.0, "lr": 4.262721347553033e-08, "epoch": 4.580289741773141, "percentage": 91.61, "elapsed_time": "4:02:58", "remaining_time": "0:22:15", "throughput": 8664.99, "total_tokens": 126319976} +{"current_steps": 187490, "total_steps": 204665, "loss": 0.0, "lr": 4.260258407553663e-08, "epoch": 4.580411892604989, "percentage": 91.61, "elapsed_time": "4:02:58", "remaining_time": "0:22:15", "throughput": 8665.02, "total_tokens": 126323496} +{"current_steps": 187495, "total_steps": 204665, "loss": 0.0, "lr": 4.257796163799454e-08, "epoch": 4.580534043436836, "percentage": 91.61, "elapsed_time": "4:02:58", "remaining_time": "0:22:15", "throughput": 8665.05, "total_tokens": 126326888} +{"current_steps": 187500, "total_steps": 204665, "loss": 0.0, "lr": 4.2553346163083146e-08, "epoch": 4.580656194268683, "percentage": 91.61, "elapsed_time": "4:02:59", "remaining_time": "0:22:14", "throughput": 8665.07, "total_tokens": 126330280} +{"current_steps": 187505, "total_steps": 204665, "loss": 0.0, "lr": 4.2528737650981086e-08, "epoch": 4.58077834510053, "percentage": 91.62, "elapsed_time": "4:02:59", "remaining_time": "0:22:14", "throughput": 8665.07, "total_tokens": 126333160} +{"current_steps": 187510, "total_steps": 204665, "loss": 0.0, "lr": 4.250413610186765e-08, "epoch": 4.580900495932378, "percentage": 91.62, "elapsed_time": "4:02:59", "remaining_time": "0:22:13", "throughput": 8665.09, "total_tokens": 126336488} +{"current_steps": 187515, "total_steps": 204665, "loss": 0.0, "lr": 4.2479541515921816e-08, "epoch": 4.581022646764224, "percentage": 91.62, "elapsed_time": "4:03:00", "remaining_time": "0:22:13", "throughput": 8665.09, "total_tokens": 126339368} +{"current_steps": 187520, "total_steps": 204665, "loss": 0.0, "lr": 4.24549538933221e-08, "epoch": 4.581144797596072, "percentage": 91.62, "elapsed_time": "4:03:00", "remaining_time": "0:22:13", "throughput": 8665.1, "total_tokens": 126342440} +{"current_steps": 187525, "total_steps": 204665, "loss": 0.0, "lr": 4.2430373234247696e-08, "epoch": 4.581266948427919, "percentage": 91.63, "elapsed_time": "4:03:00", "remaining_time": "0:22:12", "throughput": 8665.16, "total_tokens": 126346536} +{"current_steps": 187530, "total_steps": 204665, "loss": 0.0, "lr": 4.2405799538877016e-08, "epoch": 4.581389099259766, "percentage": 91.63, "elapsed_time": "4:03:01", "remaining_time": "0:22:12", "throughput": 8665.19, "total_tokens": 126350056} +{"current_steps": 187535, "total_steps": 204665, "loss": 0.0, "lr": 4.2381232807389035e-08, "epoch": 4.581511250091613, "percentage": 91.63, "elapsed_time": "4:03:01", "remaining_time": "0:22:11", "throughput": 8665.23, "total_tokens": 126353768} +{"current_steps": 187540, "total_steps": 204665, "loss": 0.0, "lr": 4.2356673039962265e-08, "epoch": 4.58163340092346, "percentage": 91.63, "elapsed_time": "4:03:02", "remaining_time": "0:22:11", "throughput": 8665.26, "total_tokens": 126357096} +{"current_steps": 187545, "total_steps": 204665, "loss": 0.0, "lr": 4.233212023677524e-08, "epoch": 4.5817555517553075, "percentage": 91.64, "elapsed_time": "4:03:02", "remaining_time": "0:22:11", "throughput": 8665.27, "total_tokens": 126360360} +{"current_steps": 187550, "total_steps": 204665, "loss": 0.0, "lr": 4.2307574398006806e-08, "epoch": 4.581877702587155, "percentage": 91.64, "elapsed_time": "4:03:02", "remaining_time": "0:22:10", "throughput": 8665.33, "total_tokens": 126364328} +{"current_steps": 187555, "total_steps": 204665, "loss": 0.0, "lr": 4.228303552383516e-08, "epoch": 4.581999853419002, "percentage": 91.64, "elapsed_time": "4:03:03", "remaining_time": "0:22:10", "throughput": 8665.34, "total_tokens": 126367464} +{"current_steps": 187560, "total_steps": 204665, "loss": 0.0, "lr": 4.225850361443894e-08, "epoch": 4.582122004250849, "percentage": 91.64, "elapsed_time": "4:03:03", "remaining_time": "0:22:09", "throughput": 8665.37, "total_tokens": 126370856} +{"current_steps": 187565, "total_steps": 204665, "loss": 0.0, "lr": 4.223397866999634e-08, "epoch": 4.582244155082696, "percentage": 91.64, "elapsed_time": "4:03:03", "remaining_time": "0:22:09", "throughput": 8665.39, "total_tokens": 126374248} +{"current_steps": 187570, "total_steps": 204665, "loss": 0.0, "lr": 4.2209460690686096e-08, "epoch": 4.582366305914543, "percentage": 91.65, "elapsed_time": "4:03:04", "remaining_time": "0:22:09", "throughput": 8665.43, "total_tokens": 126377832} +{"current_steps": 187575, "total_steps": 204665, "loss": 0.0, "lr": 4.218494967668607e-08, "epoch": 4.582488456746391, "percentage": 91.65, "elapsed_time": "4:03:04", "remaining_time": "0:22:08", "throughput": 8665.46, "total_tokens": 126381352} +{"current_steps": 187580, "total_steps": 204665, "loss": 0.0, "lr": 4.216044562817467e-08, "epoch": 4.582610607578237, "percentage": 91.65, "elapsed_time": "4:03:04", "remaining_time": "0:22:08", "throughput": 8665.49, "total_tokens": 126384872} +{"current_steps": 187585, "total_steps": 204665, "loss": 0.029, "lr": 4.213594854533031e-08, "epoch": 4.582732758410085, "percentage": 91.65, "elapsed_time": "4:03:05", "remaining_time": "0:22:08", "throughput": 8665.52, "total_tokens": 126388264} +{"current_steps": 187590, "total_steps": 204665, "loss": 0.0, "lr": 4.211145842833097e-08, "epoch": 4.582854909241932, "percentage": 91.66, "elapsed_time": "4:03:05", "remaining_time": "0:22:07", "throughput": 8665.53, "total_tokens": 126391400} +{"current_steps": 187595, "total_steps": 204665, "loss": 0.0, "lr": 4.2086975277354606e-08, "epoch": 4.582977060073779, "percentage": 91.66, "elapsed_time": "4:03:05", "remaining_time": "0:22:07", "throughput": 8665.53, "total_tokens": 126394344} +{"current_steps": 187600, "total_steps": 204665, "loss": 0.0, "lr": 4.206249909257953e-08, "epoch": 4.583099210905626, "percentage": 91.66, "elapsed_time": "4:03:06", "remaining_time": "0:22:06", "throughput": 8665.55, "total_tokens": 126397544} +{"current_steps": 187605, "total_steps": 204665, "loss": 0.0, "lr": 4.203802987418348e-08, "epoch": 4.583221361737474, "percentage": 91.66, "elapsed_time": "4:03:06", "remaining_time": "0:22:06", "throughput": 8665.6, "total_tokens": 126401320} +{"current_steps": 187610, "total_steps": 204665, "loss": 0.0, "lr": 4.201356762234476e-08, "epoch": 4.5833435125693205, "percentage": 91.67, "elapsed_time": "4:03:06", "remaining_time": "0:22:06", "throughput": 8665.63, "total_tokens": 126404904} +{"current_steps": 187615, "total_steps": 204665, "loss": 0.0005, "lr": 4.1989112337240784e-08, "epoch": 4.583465663401168, "percentage": 91.67, "elapsed_time": "4:03:07", "remaining_time": "0:22:05", "throughput": 8665.66, "total_tokens": 126408360} +{"current_steps": 187620, "total_steps": 204665, "loss": 0.0, "lr": 4.1964664019049855e-08, "epoch": 4.583587814233015, "percentage": 91.67, "elapsed_time": "4:03:07", "remaining_time": "0:22:05", "throughput": 8665.68, "total_tokens": 126411560} +{"current_steps": 187625, "total_steps": 204665, "loss": 0.0, "lr": 4.1940222667949385e-08, "epoch": 4.5837099650648625, "percentage": 91.67, "elapsed_time": "4:03:07", "remaining_time": "0:22:04", "throughput": 8665.7, "total_tokens": 126414888} +{"current_steps": 187630, "total_steps": 204665, "loss": 0.0, "lr": 4.191578828411746e-08, "epoch": 4.583832115896709, "percentage": 91.68, "elapsed_time": "4:03:08", "remaining_time": "0:22:04", "throughput": 8665.74, "total_tokens": 126418600} +{"current_steps": 187635, "total_steps": 204665, "loss": 0.0, "lr": 4.18913608677316e-08, "epoch": 4.583954266728556, "percentage": 91.68, "elapsed_time": "4:03:08", "remaining_time": "0:22:04", "throughput": 8665.75, "total_tokens": 126421736} +{"current_steps": 187640, "total_steps": 204665, "loss": 0.0, "lr": 4.1866940418969324e-08, "epoch": 4.584076417560404, "percentage": 91.68, "elapsed_time": "4:03:09", "remaining_time": "0:22:03", "throughput": 8665.78, "total_tokens": 126425064} +{"current_steps": 187645, "total_steps": 204665, "loss": 0.0, "lr": 4.1842526938008495e-08, "epoch": 4.58419856839225, "percentage": 91.68, "elapsed_time": "4:03:09", "remaining_time": "0:22:03", "throughput": 8665.8, "total_tokens": 126428456} +{"current_steps": 187650, "total_steps": 204665, "loss": 0.0, "lr": 4.181812042502641e-08, "epoch": 4.584320719224098, "percentage": 91.69, "elapsed_time": "4:03:09", "remaining_time": "0:22:02", "throughput": 8665.83, "total_tokens": 126431848} +{"current_steps": 187655, "total_steps": 204665, "loss": 0.0, "lr": 4.179372088020083e-08, "epoch": 4.584442870055945, "percentage": 91.69, "elapsed_time": "4:03:10", "remaining_time": "0:22:02", "throughput": 8665.87, "total_tokens": 126435496} +{"current_steps": 187660, "total_steps": 204665, "loss": 0.0, "lr": 4.176932830370894e-08, "epoch": 4.584565020887792, "percentage": 91.69, "elapsed_time": "4:03:10", "remaining_time": "0:22:02", "throughput": 8665.9, "total_tokens": 126438952} +{"current_steps": 187665, "total_steps": 204665, "loss": 0.0, "lr": 4.174494269572837e-08, "epoch": 4.584687171719639, "percentage": 91.69, "elapsed_time": "4:03:10", "remaining_time": "0:22:01", "throughput": 8665.92, "total_tokens": 126442216} +{"current_steps": 187670, "total_steps": 204665, "loss": 0.0, "lr": 4.172056405643609e-08, "epoch": 4.584809322551487, "percentage": 91.7, "elapsed_time": "4:03:11", "remaining_time": "0:22:01", "throughput": 8665.95, "total_tokens": 126445736} +{"current_steps": 187675, "total_steps": 204665, "loss": 0.0, "lr": 4.169619238600963e-08, "epoch": 4.5849314733833335, "percentage": 91.7, "elapsed_time": "4:03:11", "remaining_time": "0:22:00", "throughput": 8665.98, "total_tokens": 126449064} +{"current_steps": 187680, "total_steps": 204665, "loss": 0.0, "lr": 4.16718276846264e-08, "epoch": 4.585053624215181, "percentage": 91.7, "elapsed_time": "4:03:11", "remaining_time": "0:22:00", "throughput": 8666.0, "total_tokens": 126452328} +{"current_steps": 187685, "total_steps": 204665, "loss": 0.0, "lr": 4.164746995246327e-08, "epoch": 4.585175775047028, "percentage": 91.7, "elapsed_time": "4:03:12", "remaining_time": "0:22:00", "throughput": 8666.03, "total_tokens": 126455912} +{"current_steps": 187690, "total_steps": 204665, "loss": 0.0, "lr": 4.162311918969763e-08, "epoch": 4.5852979258788755, "percentage": 91.71, "elapsed_time": "4:03:12", "remaining_time": "0:21:59", "throughput": 8666.06, "total_tokens": 126459368} +{"current_steps": 187695, "total_steps": 204665, "loss": 0.0, "lr": 4.1598775396506246e-08, "epoch": 4.585420076710722, "percentage": 91.71, "elapsed_time": "4:03:12", "remaining_time": "0:21:59", "throughput": 8666.09, "total_tokens": 126462952} +{"current_steps": 187700, "total_steps": 204665, "loss": 0.0028, "lr": 4.1574438573066526e-08, "epoch": 4.58554222754257, "percentage": 91.71, "elapsed_time": "4:03:13", "remaining_time": "0:21:58", "throughput": 8666.11, "total_tokens": 126466088} +{"current_steps": 187705, "total_steps": 204665, "loss": 0.0, "lr": 4.155010871955522e-08, "epoch": 4.585664378374417, "percentage": 91.71, "elapsed_time": "4:03:13", "remaining_time": "0:21:58", "throughput": 8666.12, "total_tokens": 126469288} +{"current_steps": 187710, "total_steps": 204665, "loss": 0.0, "lr": 4.1525785836149294e-08, "epoch": 4.585786529206263, "percentage": 91.72, "elapsed_time": "4:03:13", "remaining_time": "0:21:58", "throughput": 8666.15, "total_tokens": 126472744} +{"current_steps": 187715, "total_steps": 204665, "loss": 0.0, "lr": 4.150146992302572e-08, "epoch": 4.585908680038111, "percentage": 91.72, "elapsed_time": "4:03:14", "remaining_time": "0:21:57", "throughput": 8666.19, "total_tokens": 126476456} +{"current_steps": 187720, "total_steps": 204665, "loss": 0.0, "lr": 4.147716098036103e-08, "epoch": 4.586030830869959, "percentage": 91.72, "elapsed_time": "4:03:14", "remaining_time": "0:21:57", "throughput": 8666.23, "total_tokens": 126479976} +{"current_steps": 187725, "total_steps": 204665, "loss": 0.0, "lr": 4.145285900833251e-08, "epoch": 4.586152981701805, "percentage": 91.72, "elapsed_time": "4:03:14", "remaining_time": "0:21:57", "throughput": 8666.23, "total_tokens": 126482984} +{"current_steps": 187730, "total_steps": 204665, "loss": 0.0, "lr": 4.142856400711647e-08, "epoch": 4.586275132533652, "percentage": 91.73, "elapsed_time": "4:03:15", "remaining_time": "0:21:56", "throughput": 8666.26, "total_tokens": 126486504} +{"current_steps": 187735, "total_steps": 204665, "loss": 0.0, "lr": 4.1404275976889666e-08, "epoch": 4.5863972833655, "percentage": 91.73, "elapsed_time": "4:03:15", "remaining_time": "0:21:56", "throughput": 8666.28, "total_tokens": 126489704} +{"current_steps": 187740, "total_steps": 204665, "loss": 0.0001, "lr": 4.1379994917828956e-08, "epoch": 4.5865194341973465, "percentage": 91.73, "elapsed_time": "4:03:15", "remaining_time": "0:21:55", "throughput": 8666.28, "total_tokens": 126492712} +{"current_steps": 187745, "total_steps": 204665, "loss": 0.0, "lr": 4.135572083011074e-08, "epoch": 4.586641585029194, "percentage": 91.73, "elapsed_time": "4:03:16", "remaining_time": "0:21:55", "throughput": 8666.3, "total_tokens": 126495976} +{"current_steps": 187750, "total_steps": 204665, "loss": 0.0, "lr": 4.133145371391156e-08, "epoch": 4.586763735861041, "percentage": 91.74, "elapsed_time": "4:03:16", "remaining_time": "0:21:55", "throughput": 8666.32, "total_tokens": 126499304} +{"current_steps": 187755, "total_steps": 204665, "loss": 0.0, "lr": 4.130719356940782e-08, "epoch": 4.5868858866928885, "percentage": 91.74, "elapsed_time": "4:03:17", "remaining_time": "0:21:54", "throughput": 8666.35, "total_tokens": 126502760} +{"current_steps": 187760, "total_steps": 204665, "loss": 0.0, "lr": 4.128294039677605e-08, "epoch": 4.587008037524735, "percentage": 91.74, "elapsed_time": "4:03:17", "remaining_time": "0:21:54", "throughput": 8666.38, "total_tokens": 126506152} +{"current_steps": 187765, "total_steps": 204665, "loss": 0.0, "lr": 4.125869419619266e-08, "epoch": 4.587130188356583, "percentage": 91.74, "elapsed_time": "4:03:17", "remaining_time": "0:21:53", "throughput": 8666.4, "total_tokens": 126509608} +{"current_steps": 187770, "total_steps": 204665, "loss": 0.0, "lr": 4.1234454967833844e-08, "epoch": 4.58725233918843, "percentage": 91.75, "elapsed_time": "4:03:18", "remaining_time": "0:21:53", "throughput": 8666.42, "total_tokens": 126512808} +{"current_steps": 187775, "total_steps": 204665, "loss": 0.0, "lr": 4.121022271187602e-08, "epoch": 4.587374490020277, "percentage": 91.75, "elapsed_time": "4:03:18", "remaining_time": "0:21:53", "throughput": 8666.44, "total_tokens": 126516136} +{"current_steps": 187780, "total_steps": 204665, "loss": 0.0001, "lr": 4.1185997428495265e-08, "epoch": 4.587496640852124, "percentage": 91.75, "elapsed_time": "4:03:18", "remaining_time": "0:21:52", "throughput": 8666.46, "total_tokens": 126519336} +{"current_steps": 187785, "total_steps": 204665, "loss": 0.0, "lr": 4.1161779117868004e-08, "epoch": 4.587618791683972, "percentage": 91.75, "elapsed_time": "4:03:19", "remaining_time": "0:21:52", "throughput": 8666.49, "total_tokens": 126522856} +{"current_steps": 187790, "total_steps": 204665, "loss": 0.0, "lr": 4.113756778016997e-08, "epoch": 4.587740942515818, "percentage": 91.75, "elapsed_time": "4:03:19", "remaining_time": "0:21:51", "throughput": 8666.5, "total_tokens": 126525928} +{"current_steps": 187795, "total_steps": 204665, "loss": 0.0, "lr": 4.1113363415577583e-08, "epoch": 4.587863093347666, "percentage": 91.76, "elapsed_time": "4:03:19", "remaining_time": "0:21:51", "throughput": 8666.53, "total_tokens": 126529448} +{"current_steps": 187800, "total_steps": 204665, "loss": 0.0, "lr": 4.10891660242666e-08, "epoch": 4.587985244179513, "percentage": 91.76, "elapsed_time": "4:03:20", "remaining_time": "0:21:51", "throughput": 8666.56, "total_tokens": 126532904} +{"current_steps": 187805, "total_steps": 204665, "loss": 0.0, "lr": 4.10649756064132e-08, "epoch": 4.58810739501136, "percentage": 91.76, "elapsed_time": "4:03:20", "remaining_time": "0:21:50", "throughput": 8666.57, "total_tokens": 126536104} +{"current_steps": 187810, "total_steps": 204665, "loss": 0.0, "lr": 4.104079216219336e-08, "epoch": 4.588229545843207, "percentage": 91.76, "elapsed_time": "4:03:20", "remaining_time": "0:21:50", "throughput": 8666.59, "total_tokens": 126539304} +{"current_steps": 187815, "total_steps": 204665, "loss": 0.0, "lr": 4.101661569178261e-08, "epoch": 4.588351696675055, "percentage": 91.77, "elapsed_time": "4:03:21", "remaining_time": "0:21:49", "throughput": 8666.6, "total_tokens": 126542504} +{"current_steps": 187820, "total_steps": 204665, "loss": 0.0, "lr": 4.099244619535702e-08, "epoch": 4.588473847506902, "percentage": 91.77, "elapsed_time": "4:03:21", "remaining_time": "0:21:49", "throughput": 8666.61, "total_tokens": 126545576} +{"current_steps": 187825, "total_steps": 204665, "loss": 0.0, "lr": 4.0968283673092244e-08, "epoch": 4.588595998338748, "percentage": 91.77, "elapsed_time": "4:03:21", "remaining_time": "0:21:49", "throughput": 8666.65, "total_tokens": 126549224} +{"current_steps": 187830, "total_steps": 204665, "loss": 0.0, "lr": 4.0944128125164014e-08, "epoch": 4.588718149170596, "percentage": 91.77, "elapsed_time": "4:03:22", "remaining_time": "0:21:48", "throughput": 8666.68, "total_tokens": 126552744} +{"current_steps": 187835, "total_steps": 204665, "loss": 0.0, "lr": 4.091997955174831e-08, "epoch": 4.588840300002443, "percentage": 91.78, "elapsed_time": "4:03:22", "remaining_time": "0:21:48", "throughput": 8666.75, "total_tokens": 126556840} +{"current_steps": 187840, "total_steps": 204665, "loss": 0.0, "lr": 4.089583795302021e-08, "epoch": 4.58896245083429, "percentage": 91.78, "elapsed_time": "4:03:22", "remaining_time": "0:21:47", "throughput": 8666.8, "total_tokens": 126560680} +{"current_steps": 187845, "total_steps": 204665, "loss": 0.0, "lr": 4.0871703329155685e-08, "epoch": 4.589084601666137, "percentage": 91.78, "elapsed_time": "4:03:23", "remaining_time": "0:21:47", "throughput": 8666.82, "total_tokens": 126563880} +{"current_steps": 187850, "total_steps": 204665, "loss": 0.0, "lr": 4.084757568033004e-08, "epoch": 4.589206752497985, "percentage": 91.78, "elapsed_time": "4:03:23", "remaining_time": "0:21:47", "throughput": 8666.86, "total_tokens": 126567720} +{"current_steps": 187855, "total_steps": 204665, "loss": 0.0, "lr": 4.082345500671869e-08, "epoch": 4.5893289033298315, "percentage": 91.79, "elapsed_time": "4:03:23", "remaining_time": "0:21:46", "throughput": 8666.87, "total_tokens": 126570856} +{"current_steps": 187860, "total_steps": 204665, "loss": 0.0, "lr": 4.079934130849738e-08, "epoch": 4.589451054161679, "percentage": 91.79, "elapsed_time": "4:03:24", "remaining_time": "0:21:46", "throughput": 8666.88, "total_tokens": 126573800} +{"current_steps": 187865, "total_steps": 204665, "loss": 0.0, "lr": 4.077523458584109e-08, "epoch": 4.589573204993526, "percentage": 91.79, "elapsed_time": "4:03:24", "remaining_time": "0:21:46", "throughput": 8666.94, "total_tokens": 126577960} +{"current_steps": 187870, "total_steps": 204665, "loss": 0.0436, "lr": 4.075113483892545e-08, "epoch": 4.5896953558253735, "percentage": 91.79, "elapsed_time": "4:03:25", "remaining_time": "0:21:45", "throughput": 8666.95, "total_tokens": 126581032} +{"current_steps": 187875, "total_steps": 204665, "loss": 0.0, "lr": 4.072704206792543e-08, "epoch": 4.58981750665722, "percentage": 91.8, "elapsed_time": "4:03:25", "remaining_time": "0:21:45", "throughput": 8667.0, "total_tokens": 126584808} +{"current_steps": 187880, "total_steps": 204665, "loss": 0.0, "lr": 4.070295627301656e-08, "epoch": 4.589939657489068, "percentage": 91.8, "elapsed_time": "4:03:25", "remaining_time": "0:21:44", "throughput": 8667.06, "total_tokens": 126588840} +{"current_steps": 187885, "total_steps": 204665, "loss": 0.0, "lr": 4.067887745437359e-08, "epoch": 4.590061808320915, "percentage": 91.8, "elapsed_time": "4:03:26", "remaining_time": "0:21:44", "throughput": 8667.09, "total_tokens": 126592232} +{"current_steps": 187890, "total_steps": 204665, "loss": 0.0, "lr": 4.0654805612171936e-08, "epoch": 4.590183959152762, "percentage": 91.8, "elapsed_time": "4:03:26", "remaining_time": "0:21:44", "throughput": 8667.11, "total_tokens": 126595560} +{"current_steps": 187895, "total_steps": 204665, "loss": 0.0, "lr": 4.0630740746586564e-08, "epoch": 4.590306109984609, "percentage": 91.81, "elapsed_time": "4:03:26", "remaining_time": "0:21:43", "throughput": 8667.14, "total_tokens": 126598952} +{"current_steps": 187900, "total_steps": 204665, "loss": 0.0, "lr": 4.060668285779256e-08, "epoch": 4.590428260816456, "percentage": 91.81, "elapsed_time": "4:03:27", "remaining_time": "0:21:43", "throughput": 8667.16, "total_tokens": 126602344} +{"current_steps": 187905, "total_steps": 204665, "loss": 0.0, "lr": 4.0582631945964786e-08, "epoch": 4.590550411648303, "percentage": 91.81, "elapsed_time": "4:03:27", "remaining_time": "0:21:42", "throughput": 8667.19, "total_tokens": 126605736} +{"current_steps": 187910, "total_steps": 204665, "loss": 0.0002, "lr": 4.055858801127809e-08, "epoch": 4.590672562480151, "percentage": 91.81, "elapsed_time": "4:03:27", "remaining_time": "0:21:42", "throughput": 8667.21, "total_tokens": 126609064} +{"current_steps": 187915, "total_steps": 204665, "loss": 0.0, "lr": 4.0534551053907464e-08, "epoch": 4.590794713311998, "percentage": 91.82, "elapsed_time": "4:03:28", "remaining_time": "0:21:42", "throughput": 8667.23, "total_tokens": 126612392} +{"current_steps": 187920, "total_steps": 204665, "loss": 0.0, "lr": 4.0510521074027636e-08, "epoch": 4.5909168641438445, "percentage": 91.82, "elapsed_time": "4:03:28", "remaining_time": "0:21:41", "throughput": 8667.24, "total_tokens": 126615528} +{"current_steps": 187925, "total_steps": 204665, "loss": 0.0, "lr": 4.0486498071813256e-08, "epoch": 4.591039014975692, "percentage": 91.82, "elapsed_time": "4:03:28", "remaining_time": "0:21:41", "throughput": 8667.27, "total_tokens": 126618920} +{"current_steps": 187930, "total_steps": 204665, "loss": 0.0, "lr": 4.0462482047439295e-08, "epoch": 4.591161165807539, "percentage": 91.82, "elapsed_time": "4:03:29", "remaining_time": "0:21:40", "throughput": 8667.28, "total_tokens": 126622120} +{"current_steps": 187935, "total_steps": 204665, "loss": 0.0, "lr": 4.043847300108016e-08, "epoch": 4.5912833166393865, "percentage": 91.83, "elapsed_time": "4:03:29", "remaining_time": "0:21:40", "throughput": 8667.31, "total_tokens": 126625576} +{"current_steps": 187940, "total_steps": 204665, "loss": 0.0, "lr": 4.041447093291062e-08, "epoch": 4.591405467471233, "percentage": 91.83, "elapsed_time": "4:03:29", "remaining_time": "0:21:40", "throughput": 8667.34, "total_tokens": 126629096} +{"current_steps": 187945, "total_steps": 204665, "loss": 0.0, "lr": 4.0390475843105066e-08, "epoch": 4.591527618303081, "percentage": 91.83, "elapsed_time": "4:03:30", "remaining_time": "0:21:39", "throughput": 8667.36, "total_tokens": 126632424} +{"current_steps": 187950, "total_steps": 204665, "loss": 0.0, "lr": 4.036648773183804e-08, "epoch": 4.591649769134928, "percentage": 91.83, "elapsed_time": "4:03:30", "remaining_time": "0:21:39", "throughput": 8667.39, "total_tokens": 126635880} +{"current_steps": 187955, "total_steps": 204665, "loss": 0.0, "lr": 4.0342506599284175e-08, "epoch": 4.591771919966775, "percentage": 91.84, "elapsed_time": "4:03:30", "remaining_time": "0:21:38", "throughput": 8667.41, "total_tokens": 126639208} +{"current_steps": 187960, "total_steps": 204665, "loss": 0.0, "lr": 4.0318532445617557e-08, "epoch": 4.591894070798622, "percentage": 91.84, "elapsed_time": "4:03:31", "remaining_time": "0:21:38", "throughput": 8667.46, "total_tokens": 126643112} +{"current_steps": 187965, "total_steps": 204665, "loss": 0.0, "lr": 4.0294565271012825e-08, "epoch": 4.59201622163047, "percentage": 91.84, "elapsed_time": "4:03:31", "remaining_time": "0:21:38", "throughput": 8667.49, "total_tokens": 126646440} +{"current_steps": 187970, "total_steps": 204665, "loss": 0.0, "lr": 4.027060507564406e-08, "epoch": 4.592138372462316, "percentage": 91.84, "elapsed_time": "4:03:32", "remaining_time": "0:21:37", "throughput": 8667.52, "total_tokens": 126650024} +{"current_steps": 187975, "total_steps": 204665, "loss": 0.0, "lr": 4.0246651859685675e-08, "epoch": 4.592260523294164, "percentage": 91.85, "elapsed_time": "4:03:32", "remaining_time": "0:21:37", "throughput": 8667.54, "total_tokens": 126653352} +{"current_steps": 187980, "total_steps": 204665, "loss": 0.0354, "lr": 4.0222705623311645e-08, "epoch": 4.592382674126011, "percentage": 91.85, "elapsed_time": "4:03:32", "remaining_time": "0:21:37", "throughput": 8667.56, "total_tokens": 126656552} +{"current_steps": 187985, "total_steps": 204665, "loss": 0.0, "lr": 4.019876636669628e-08, "epoch": 4.592504824957858, "percentage": 91.85, "elapsed_time": "4:03:33", "remaining_time": "0:21:36", "throughput": 8667.59, "total_tokens": 126660072} +{"current_steps": 187990, "total_steps": 204665, "loss": 0.0, "lr": 4.017483409001376e-08, "epoch": 4.592626975789705, "percentage": 91.85, "elapsed_time": "4:03:33", "remaining_time": "0:21:36", "throughput": 8667.61, "total_tokens": 126663272} +{"current_steps": 187995, "total_steps": 204665, "loss": 0.0, "lr": 4.0150908793437854e-08, "epoch": 4.592749126621552, "percentage": 91.85, "elapsed_time": "4:03:33", "remaining_time": "0:21:35", "throughput": 8667.65, "total_tokens": 126666984} +{"current_steps": 188000, "total_steps": 204665, "loss": 0.0, "lr": 4.0126990477142854e-08, "epoch": 4.5928712774533995, "percentage": 91.86, "elapsed_time": "4:03:34", "remaining_time": "0:21:35", "throughput": 8667.68, "total_tokens": 126670504} +{"current_steps": 188005, "total_steps": 204665, "loss": 0.0, "lr": 4.0103079141302507e-08, "epoch": 4.592993428285246, "percentage": 91.86, "elapsed_time": "4:03:34", "remaining_time": "0:21:35", "throughput": 8667.71, "total_tokens": 126673896} +{"current_steps": 188010, "total_steps": 204665, "loss": 0.0, "lr": 4.00791747860908e-08, "epoch": 4.593115579117094, "percentage": 91.86, "elapsed_time": "4:03:34", "remaining_time": "0:21:34", "throughput": 8667.72, "total_tokens": 126677096} +{"current_steps": 188015, "total_steps": 204665, "loss": 0.0, "lr": 4.005527741168147e-08, "epoch": 4.593237729948941, "percentage": 91.86, "elapsed_time": "4:03:35", "remaining_time": "0:21:34", "throughput": 8667.76, "total_tokens": 126680616} +{"current_steps": 188020, "total_steps": 204665, "loss": 0.0, "lr": 4.003138701824826e-08, "epoch": 4.593359880780788, "percentage": 91.87, "elapsed_time": "4:03:35", "remaining_time": "0:21:33", "throughput": 8667.8, "total_tokens": 126684264} +{"current_steps": 188025, "total_steps": 204665, "loss": 0.0, "lr": 4.000750360596517e-08, "epoch": 4.593482031612635, "percentage": 91.87, "elapsed_time": "4:03:35", "remaining_time": "0:21:33", "throughput": 8667.81, "total_tokens": 126687336} +{"current_steps": 188030, "total_steps": 204665, "loss": 0.0, "lr": 3.998362717500558e-08, "epoch": 4.593604182444483, "percentage": 91.87, "elapsed_time": "4:03:36", "remaining_time": "0:21:33", "throughput": 8667.84, "total_tokens": 126690856} +{"current_steps": 188035, "total_steps": 204665, "loss": 0.0394, "lr": 3.995975772554339e-08, "epoch": 4.593726333276329, "percentage": 91.87, "elapsed_time": "4:03:36", "remaining_time": "0:21:32", "throughput": 8667.88, "total_tokens": 126694632} +{"current_steps": 188040, "total_steps": 204665, "loss": 0.0, "lr": 3.9935895257751984e-08, "epoch": 4.593848484108177, "percentage": 91.88, "elapsed_time": "4:03:36", "remaining_time": "0:21:32", "throughput": 8667.91, "total_tokens": 126698024} +{"current_steps": 188045, "total_steps": 204665, "loss": 0.0, "lr": 3.9912039771804903e-08, "epoch": 4.593970634940024, "percentage": 91.88, "elapsed_time": "4:03:37", "remaining_time": "0:21:31", "throughput": 8667.94, "total_tokens": 126701544} +{"current_steps": 188050, "total_steps": 204665, "loss": 0.0, "lr": 3.98881912678759e-08, "epoch": 4.594092785771871, "percentage": 91.88, "elapsed_time": "4:03:37", "remaining_time": "0:21:31", "throughput": 8667.97, "total_tokens": 126704936} +{"current_steps": 188055, "total_steps": 204665, "loss": 0.0, "lr": 3.986434974613806e-08, "epoch": 4.594214936603718, "percentage": 91.88, "elapsed_time": "4:03:37", "remaining_time": "0:21:31", "throughput": 8667.99, "total_tokens": 126708264} +{"current_steps": 188060, "total_steps": 204665, "loss": 0.0, "lr": 3.984051520676501e-08, "epoch": 4.594337087435566, "percentage": 91.89, "elapsed_time": "4:03:38", "remaining_time": "0:21:30", "throughput": 8668.0, "total_tokens": 126711272} +{"current_steps": 188065, "total_steps": 204665, "loss": 0.0, "lr": 3.981668764992985e-08, "epoch": 4.5944592382674125, "percentage": 91.89, "elapsed_time": "4:03:38", "remaining_time": "0:21:30", "throughput": 8668.02, "total_tokens": 126714728} +{"current_steps": 188070, "total_steps": 204665, "loss": 0.0, "lr": 3.979286707580598e-08, "epoch": 4.594581389099259, "percentage": 91.89, "elapsed_time": "4:03:38", "remaining_time": "0:21:29", "throughput": 8668.06, "total_tokens": 126718376} +{"current_steps": 188075, "total_steps": 204665, "loss": 0.0, "lr": 3.976905348456683e-08, "epoch": 4.594703539931107, "percentage": 91.89, "elapsed_time": "4:03:39", "remaining_time": "0:21:29", "throughput": 8668.07, "total_tokens": 126721384} +{"current_steps": 188080, "total_steps": 204665, "loss": 0.0001, "lr": 3.9745246876385255e-08, "epoch": 4.5948256907629546, "percentage": 91.9, "elapsed_time": "4:03:39", "remaining_time": "0:21:29", "throughput": 8668.09, "total_tokens": 126724776} +{"current_steps": 188085, "total_steps": 204665, "loss": 0.0, "lr": 3.972144725143456e-08, "epoch": 4.594947841594801, "percentage": 91.9, "elapsed_time": "4:03:40", "remaining_time": "0:21:28", "throughput": 8668.12, "total_tokens": 126728168} +{"current_steps": 188090, "total_steps": 204665, "loss": 0.0, "lr": 3.969765460988772e-08, "epoch": 4.595069992426648, "percentage": 91.9, "elapsed_time": "4:03:40", "remaining_time": "0:21:28", "throughput": 8668.12, "total_tokens": 126731176} +{"current_steps": 188095, "total_steps": 204665, "loss": 0.0, "lr": 3.9673868951918045e-08, "epoch": 4.595192143258496, "percentage": 91.9, "elapsed_time": "4:03:40", "remaining_time": "0:21:27", "throughput": 8668.14, "total_tokens": 126734504} +{"current_steps": 188100, "total_steps": 204665, "loss": 0.0, "lr": 3.9650090277698054e-08, "epoch": 4.595314294090342, "percentage": 91.91, "elapsed_time": "4:03:41", "remaining_time": "0:21:27", "throughput": 8668.15, "total_tokens": 126737512} +{"current_steps": 188105, "total_steps": 204665, "loss": 0.0, "lr": 3.9626318587401066e-08, "epoch": 4.59543644492219, "percentage": 91.91, "elapsed_time": "4:03:41", "remaining_time": "0:21:27", "throughput": 8668.15, "total_tokens": 126740456} +{"current_steps": 188110, "total_steps": 204665, "loss": 0.0, "lr": 3.960255388119971e-08, "epoch": 4.595558595754037, "percentage": 91.91, "elapsed_time": "4:03:41", "remaining_time": "0:21:26", "throughput": 8668.16, "total_tokens": 126743592} +{"current_steps": 188115, "total_steps": 204665, "loss": 0.0, "lr": 3.957879615926696e-08, "epoch": 4.595680746585884, "percentage": 91.91, "elapsed_time": "4:03:42", "remaining_time": "0:21:26", "throughput": 8668.2, "total_tokens": 126747176} +{"current_steps": 188120, "total_steps": 204665, "loss": 0.0, "lr": 3.9555045421775566e-08, "epoch": 4.595802897417731, "percentage": 91.92, "elapsed_time": "4:03:42", "remaining_time": "0:21:26", "throughput": 8668.26, "total_tokens": 126751144} +{"current_steps": 188125, "total_steps": 204665, "loss": 0.0, "lr": 3.9531301668898066e-08, "epoch": 4.595925048249579, "percentage": 91.92, "elapsed_time": "4:03:42", "remaining_time": "0:21:25", "throughput": 8668.29, "total_tokens": 126754664} +{"current_steps": 188130, "total_steps": 204665, "loss": 0.0, "lr": 3.950756490080742e-08, "epoch": 4.596047199081426, "percentage": 91.92, "elapsed_time": "4:03:43", "remaining_time": "0:21:25", "throughput": 8668.31, "total_tokens": 126757864} +{"current_steps": 188135, "total_steps": 204665, "loss": 0.0, "lr": 3.9483835117675947e-08, "epoch": 4.596169349913273, "percentage": 91.92, "elapsed_time": "4:03:43", "remaining_time": "0:21:24", "throughput": 8668.35, "total_tokens": 126761576} +{"current_steps": 188140, "total_steps": 204665, "loss": 0.0, "lr": 3.946011231967639e-08, "epoch": 4.59629150074512, "percentage": 91.93, "elapsed_time": "4:03:43", "remaining_time": "0:21:24", "throughput": 8668.37, "total_tokens": 126764968} +{"current_steps": 188145, "total_steps": 204665, "loss": 0.0, "lr": 3.9436396506981383e-08, "epoch": 4.596413651576968, "percentage": 91.93, "elapsed_time": "4:03:44", "remaining_time": "0:21:24", "throughput": 8668.4, "total_tokens": 126768360} +{"current_steps": 188150, "total_steps": 204665, "loss": 0.0, "lr": 3.941268767976314e-08, "epoch": 4.596535802408814, "percentage": 91.93, "elapsed_time": "4:03:44", "remaining_time": "0:21:23", "throughput": 8668.44, "total_tokens": 126771944} +{"current_steps": 188155, "total_steps": 204665, "loss": 0.0, "lr": 3.938898583819428e-08, "epoch": 4.596657953240662, "percentage": 91.93, "elapsed_time": "4:03:44", "remaining_time": "0:21:23", "throughput": 8668.47, "total_tokens": 126775400} +{"current_steps": 188160, "total_steps": 204665, "loss": 0.0, "lr": 3.936529098244701e-08, "epoch": 4.596780104072509, "percentage": 91.94, "elapsed_time": "4:03:45", "remaining_time": "0:21:22", "throughput": 8668.49, "total_tokens": 126778856} +{"current_steps": 188165, "total_steps": 204665, "loss": 0.0, "lr": 3.934160311269374e-08, "epoch": 4.5969022549043554, "percentage": 91.94, "elapsed_time": "4:03:45", "remaining_time": "0:21:22", "throughput": 8668.5, "total_tokens": 126781864} +{"current_steps": 188170, "total_steps": 204665, "loss": 0.0, "lr": 3.931792222910679e-08, "epoch": 4.597024405736203, "percentage": 91.94, "elapsed_time": "4:03:45", "remaining_time": "0:21:22", "throughput": 8668.56, "total_tokens": 126785832} +{"current_steps": 188175, "total_steps": 204665, "loss": 0.0, "lr": 3.929424833185824e-08, "epoch": 4.597146556568051, "percentage": 91.94, "elapsed_time": "4:03:46", "remaining_time": "0:21:21", "throughput": 8668.59, "total_tokens": 126789352} +{"current_steps": 188180, "total_steps": 204665, "loss": 0.0, "lr": 3.9270581421120386e-08, "epoch": 4.5972687073998975, "percentage": 91.95, "elapsed_time": "4:03:46", "remaining_time": "0:21:21", "throughput": 8668.6, "total_tokens": 126792424} +{"current_steps": 188185, "total_steps": 204665, "loss": 0.0002, "lr": 3.92469214970651e-08, "epoch": 4.597390858231744, "percentage": 91.95, "elapsed_time": "4:03:46", "remaining_time": "0:21:20", "throughput": 8668.6, "total_tokens": 126795432} +{"current_steps": 188190, "total_steps": 204665, "loss": 0.0, "lr": 3.9223268559864796e-08, "epoch": 4.597513009063592, "percentage": 91.95, "elapsed_time": "4:03:47", "remaining_time": "0:21:20", "throughput": 8668.62, "total_tokens": 126798568} +{"current_steps": 188195, "total_steps": 204665, "loss": 0.0, "lr": 3.919962260969123e-08, "epoch": 4.597635159895439, "percentage": 91.95, "elapsed_time": "4:03:47", "remaining_time": "0:21:20", "throughput": 8668.63, "total_tokens": 126801832} +{"current_steps": 188200, "total_steps": 204665, "loss": 0.0, "lr": 3.917598364671637e-08, "epoch": 4.597757310727286, "percentage": 91.96, "elapsed_time": "4:03:48", "remaining_time": "0:21:19", "throughput": 8668.67, "total_tokens": 126805416} +{"current_steps": 188205, "total_steps": 204665, "loss": 0.0, "lr": 3.9152351671112305e-08, "epoch": 4.597879461559133, "percentage": 91.96, "elapsed_time": "4:03:48", "remaining_time": "0:21:19", "throughput": 8668.69, "total_tokens": 126808680} +{"current_steps": 188210, "total_steps": 204665, "loss": 0.0, "lr": 3.9128726683050675e-08, "epoch": 4.598001612390981, "percentage": 91.96, "elapsed_time": "4:03:48", "remaining_time": "0:21:18", "throughput": 8668.71, "total_tokens": 126812072} +{"current_steps": 188215, "total_steps": 204665, "loss": 0.0, "lr": 3.9105108682703447e-08, "epoch": 4.598123763222827, "percentage": 91.96, "elapsed_time": "4:03:49", "remaining_time": "0:21:18", "throughput": 8668.74, "total_tokens": 126815528} +{"current_steps": 188220, "total_steps": 204665, "loss": 0.0, "lr": 3.908149767024238e-08, "epoch": 4.598245914054675, "percentage": 91.96, "elapsed_time": "4:03:49", "remaining_time": "0:21:18", "throughput": 8668.74, "total_tokens": 126818344} +{"current_steps": 188225, "total_steps": 204665, "loss": 0.0488, "lr": 3.9057893645839005e-08, "epoch": 4.598368064886522, "percentage": 91.97, "elapsed_time": "4:03:49", "remaining_time": "0:21:17", "throughput": 8668.77, "total_tokens": 126821800} +{"current_steps": 188230, "total_steps": 204665, "loss": 0.0001, "lr": 3.903429660966517e-08, "epoch": 4.598490215718369, "percentage": 91.97, "elapsed_time": "4:03:50", "remaining_time": "0:21:17", "throughput": 8668.8, "total_tokens": 126825384} +{"current_steps": 188235, "total_steps": 204665, "loss": 0.0, "lr": 3.901070656189231e-08, "epoch": 4.598612366550216, "percentage": 91.97, "elapsed_time": "4:03:50", "remaining_time": "0:21:17", "throughput": 8668.82, "total_tokens": 126828584} +{"current_steps": 188240, "total_steps": 204665, "loss": 0.0, "lr": 3.898712350269218e-08, "epoch": 4.598734517382064, "percentage": 91.97, "elapsed_time": "4:03:50", "remaining_time": "0:21:16", "throughput": 8668.86, "total_tokens": 126832232} +{"current_steps": 188245, "total_steps": 204665, "loss": 0.0, "lr": 3.8963547432236064e-08, "epoch": 4.5988566682139105, "percentage": 91.98, "elapsed_time": "4:03:51", "remaining_time": "0:21:16", "throughput": 8668.88, "total_tokens": 126835624} +{"current_steps": 188250, "total_steps": 204665, "loss": 0.0, "lr": 3.8939978350695625e-08, "epoch": 4.598978819045758, "percentage": 91.98, "elapsed_time": "4:03:51", "remaining_time": "0:21:15", "throughput": 8668.93, "total_tokens": 126839336} +{"current_steps": 188255, "total_steps": 204665, "loss": 0.0, "lr": 3.8916416258242045e-08, "epoch": 4.599100969877605, "percentage": 91.98, "elapsed_time": "4:03:51", "remaining_time": "0:21:15", "throughput": 8668.95, "total_tokens": 126842600} +{"current_steps": 188260, "total_steps": 204665, "loss": 0.0, "lr": 3.889286115504686e-08, "epoch": 4.599223120709452, "percentage": 91.98, "elapsed_time": "4:03:52", "remaining_time": "0:21:15", "throughput": 8668.95, "total_tokens": 126845608} +{"current_steps": 188265, "total_steps": 204665, "loss": 0.0, "lr": 3.886931304128127e-08, "epoch": 4.599345271541299, "percentage": 91.99, "elapsed_time": "4:03:52", "remaining_time": "0:21:14", "throughput": 8668.97, "total_tokens": 126848936} +{"current_steps": 188270, "total_steps": 204665, "loss": 0.0, "lr": 3.8845771917116466e-08, "epoch": 4.599467422373146, "percentage": 91.99, "elapsed_time": "4:03:52", "remaining_time": "0:21:14", "throughput": 8668.97, "total_tokens": 126851816} +{"current_steps": 188275, "total_steps": 204665, "loss": 0.0, "lr": 3.882223778272398e-08, "epoch": 4.599589573204994, "percentage": 91.99, "elapsed_time": "4:03:53", "remaining_time": "0:21:13", "throughput": 8669.0, "total_tokens": 126855272} +{"current_steps": 188280, "total_steps": 204665, "loss": 0.0, "lr": 3.879871063827445e-08, "epoch": 4.59971172403684, "percentage": 91.99, "elapsed_time": "4:03:53", "remaining_time": "0:21:13", "throughput": 8669.03, "total_tokens": 126858792} +{"current_steps": 188285, "total_steps": 204665, "loss": 0.0, "lr": 3.87751904839394e-08, "epoch": 4.599833874868688, "percentage": 92.0, "elapsed_time": "4:03:53", "remaining_time": "0:21:13", "throughput": 8669.08, "total_tokens": 126862504} +{"current_steps": 188290, "total_steps": 204665, "loss": 0.0, "lr": 3.8751677319889485e-08, "epoch": 4.599956025700535, "percentage": 92.0, "elapsed_time": "4:03:54", "remaining_time": "0:21:12", "throughput": 8669.07, "total_tokens": 126865320} +{"current_steps": 188295, "total_steps": 204665, "loss": 0.0, "lr": 3.872817114629601e-08, "epoch": 4.600078176532382, "percentage": 92.0, "elapsed_time": "4:03:54", "remaining_time": "0:21:12", "throughput": 8669.07, "total_tokens": 126868264} +{"current_steps": 188300, "total_steps": 204665, "loss": 0.0, "lr": 3.8704671963329935e-08, "epoch": 4.600200327364229, "percentage": 92.0, "elapsed_time": "4:03:54", "remaining_time": "0:21:11", "throughput": 8669.09, "total_tokens": 126871464} +{"current_steps": 188305, "total_steps": 204665, "loss": 0.0, "lr": 3.868117977116192e-08, "epoch": 4.600322478196077, "percentage": 92.01, "elapsed_time": "4:03:55", "remaining_time": "0:21:11", "throughput": 8669.11, "total_tokens": 126874728} +{"current_steps": 188310, "total_steps": 204665, "loss": 0.0, "lr": 3.865769456996304e-08, "epoch": 4.6004446290279235, "percentage": 92.01, "elapsed_time": "4:03:55", "remaining_time": "0:21:11", "throughput": 8669.13, "total_tokens": 126878120} +{"current_steps": 188315, "total_steps": 204665, "loss": 0.0682, "lr": 3.863421635990394e-08, "epoch": 4.600566779859771, "percentage": 92.01, "elapsed_time": "4:03:55", "remaining_time": "0:21:10", "throughput": 8669.15, "total_tokens": 126881448} +{"current_steps": 188320, "total_steps": 204665, "loss": 0.0, "lr": 3.861074514115536e-08, "epoch": 4.600688930691618, "percentage": 92.01, "elapsed_time": "4:03:56", "remaining_time": "0:21:10", "throughput": 8669.17, "total_tokens": 126884584} +{"current_steps": 188325, "total_steps": 204665, "loss": 0.0, "lr": 3.858728091388819e-08, "epoch": 4.6008110815234655, "percentage": 92.02, "elapsed_time": "4:03:56", "remaining_time": "0:21:09", "throughput": 8669.18, "total_tokens": 126887784} +{"current_steps": 188330, "total_steps": 204665, "loss": 0.0, "lr": 3.85638236782726e-08, "epoch": 4.600933232355312, "percentage": 92.02, "elapsed_time": "4:03:56", "remaining_time": "0:21:09", "throughput": 8669.19, "total_tokens": 126890920} +{"current_steps": 188335, "total_steps": 204665, "loss": 0.0, "lr": 3.854037343447969e-08, "epoch": 4.601055383187159, "percentage": 92.02, "elapsed_time": "4:03:57", "remaining_time": "0:21:09", "throughput": 8669.2, "total_tokens": 126893928} +{"current_steps": 188340, "total_steps": 204665, "loss": 0.0, "lr": 3.8516930182679765e-08, "epoch": 4.601177534019007, "percentage": 92.02, "elapsed_time": "4:03:57", "remaining_time": "0:21:08", "throughput": 8669.22, "total_tokens": 126897256} +{"current_steps": 188345, "total_steps": 204665, "loss": 0.0, "lr": 3.849349392304335e-08, "epoch": 4.601299684850854, "percentage": 92.03, "elapsed_time": "4:03:58", "remaining_time": "0:21:08", "throughput": 8669.23, "total_tokens": 126900392} +{"current_steps": 188350, "total_steps": 204665, "loss": 0.0, "lr": 3.8470064655740655e-08, "epoch": 4.601421835682701, "percentage": 92.03, "elapsed_time": "4:03:58", "remaining_time": "0:21:07", "throughput": 8669.25, "total_tokens": 126903656} +{"current_steps": 188355, "total_steps": 204665, "loss": 0.0, "lr": 3.844664238094242e-08, "epoch": 4.601543986514548, "percentage": 92.03, "elapsed_time": "4:03:58", "remaining_time": "0:21:07", "throughput": 8669.26, "total_tokens": 126906792} +{"current_steps": 188360, "total_steps": 204665, "loss": 0.0, "lr": 3.842322709881884e-08, "epoch": 4.601666137346395, "percentage": 92.03, "elapsed_time": "4:03:59", "remaining_time": "0:21:07", "throughput": 8669.3, "total_tokens": 126910440} +{"current_steps": 188365, "total_steps": 204665, "loss": 0.0, "lr": 3.839981880954013e-08, "epoch": 4.601788288178242, "percentage": 92.04, "elapsed_time": "4:03:59", "remaining_time": "0:21:06", "throughput": 8669.32, "total_tokens": 126913704} +{"current_steps": 188370, "total_steps": 204665, "loss": 0.0, "lr": 3.837641751327669e-08, "epoch": 4.60191043901009, "percentage": 92.04, "elapsed_time": "4:03:59", "remaining_time": "0:21:06", "throughput": 8669.33, "total_tokens": 126916904} +{"current_steps": 188375, "total_steps": 204665, "loss": 0.0, "lr": 3.835302321019851e-08, "epoch": 4.6020325898419365, "percentage": 92.04, "elapsed_time": "4:04:00", "remaining_time": "0:21:06", "throughput": 8669.36, "total_tokens": 126920296} +{"current_steps": 188380, "total_steps": 204665, "loss": 0.0, "lr": 3.832963590047589e-08, "epoch": 4.602154740673784, "percentage": 92.04, "elapsed_time": "4:04:00", "remaining_time": "0:21:05", "throughput": 8669.43, "total_tokens": 126924456} +{"current_steps": 188385, "total_steps": 204665, "loss": 0.0005, "lr": 3.830625558427869e-08, "epoch": 4.602276891505631, "percentage": 92.05, "elapsed_time": "4:04:00", "remaining_time": "0:21:05", "throughput": 8669.44, "total_tokens": 126927656} +{"current_steps": 188390, "total_steps": 204665, "loss": 0.0, "lr": 3.8282882261777e-08, "epoch": 4.6023990423374785, "percentage": 92.05, "elapsed_time": "4:04:01", "remaining_time": "0:21:04", "throughput": 8669.48, "total_tokens": 126931240} +{"current_steps": 188395, "total_steps": 204665, "loss": 0.0, "lr": 3.825951593314103e-08, "epoch": 4.602521193169325, "percentage": 92.05, "elapsed_time": "4:04:01", "remaining_time": "0:21:04", "throughput": 8669.5, "total_tokens": 126934568} +{"current_steps": 188400, "total_steps": 204665, "loss": 0.0, "lr": 3.82361565985404e-08, "epoch": 4.602643344001173, "percentage": 92.05, "elapsed_time": "4:04:01", "remaining_time": "0:21:04", "throughput": 8669.53, "total_tokens": 126938024} +{"current_steps": 188405, "total_steps": 204665, "loss": 0.0, "lr": 3.8212804258145324e-08, "epoch": 4.60276549483302, "percentage": 92.06, "elapsed_time": "4:04:02", "remaining_time": "0:21:03", "throughput": 8669.56, "total_tokens": 126941544} +{"current_steps": 188410, "total_steps": 204665, "loss": 0.0, "lr": 3.818945891212522e-08, "epoch": 4.602887645664867, "percentage": 92.06, "elapsed_time": "4:04:02", "remaining_time": "0:21:03", "throughput": 8669.56, "total_tokens": 126944488} +{"current_steps": 188415, "total_steps": 204665, "loss": 0.0, "lr": 3.816612056065016e-08, "epoch": 4.603009796496714, "percentage": 92.06, "elapsed_time": "4:04:02", "remaining_time": "0:21:02", "throughput": 8669.58, "total_tokens": 126947816} +{"current_steps": 188420, "total_steps": 204665, "loss": 0.0, "lr": 3.814278920388969e-08, "epoch": 4.603131947328562, "percentage": 92.06, "elapsed_time": "4:04:03", "remaining_time": "0:21:02", "throughput": 8669.62, "total_tokens": 126951400} +{"current_steps": 188425, "total_steps": 204665, "loss": 0.0764, "lr": 3.811946484201378e-08, "epoch": 4.603254098160408, "percentage": 92.07, "elapsed_time": "4:04:03", "remaining_time": "0:21:02", "throughput": 8669.63, "total_tokens": 126954536} +{"current_steps": 188430, "total_steps": 204665, "loss": 0.0, "lr": 3.809614747519174e-08, "epoch": 4.603376248992255, "percentage": 92.07, "elapsed_time": "4:04:03", "remaining_time": "0:21:01", "throughput": 8669.62, "total_tokens": 126957288} +{"current_steps": 188435, "total_steps": 204665, "loss": 0.0, "lr": 3.8072837103593106e-08, "epoch": 4.603498399824103, "percentage": 92.07, "elapsed_time": "4:04:04", "remaining_time": "0:21:01", "throughput": 8669.65, "total_tokens": 126960744} +{"current_steps": 188440, "total_steps": 204665, "loss": 0.0, "lr": 3.804953372738762e-08, "epoch": 4.60362055065595, "percentage": 92.07, "elapsed_time": "4:04:04", "remaining_time": "0:21:00", "throughput": 8669.65, "total_tokens": 126963688} +{"current_steps": 188445, "total_steps": 204665, "loss": 0.0, "lr": 3.8026237346744596e-08, "epoch": 4.603742701487797, "percentage": 92.07, "elapsed_time": "4:04:04", "remaining_time": "0:21:00", "throughput": 8669.7, "total_tokens": 126967528} +{"current_steps": 188450, "total_steps": 204665, "loss": 0.0, "lr": 3.8002947961833565e-08, "epoch": 4.603864852319644, "percentage": 92.08, "elapsed_time": "4:04:05", "remaining_time": "0:21:00", "throughput": 8669.72, "total_tokens": 126970856} +{"current_steps": 188455, "total_steps": 204665, "loss": 0.062, "lr": 3.797966557282384e-08, "epoch": 4.603987003151492, "percentage": 92.08, "elapsed_time": "4:04:05", "remaining_time": "0:20:59", "throughput": 8669.73, "total_tokens": 126974056} +{"current_steps": 188460, "total_steps": 204665, "loss": 0.0, "lr": 3.795639017988472e-08, "epoch": 4.604109153983338, "percentage": 92.08, "elapsed_time": "4:04:06", "remaining_time": "0:20:59", "throughput": 8669.81, "total_tokens": 126978408} +{"current_steps": 188465, "total_steps": 204665, "loss": 0.0, "lr": 3.793312178318553e-08, "epoch": 4.604231304815186, "percentage": 92.08, "elapsed_time": "4:04:06", "remaining_time": "0:20:58", "throughput": 8669.84, "total_tokens": 126981736} +{"current_steps": 188470, "total_steps": 204665, "loss": 0.0002, "lr": 3.7909860382895455e-08, "epoch": 4.604353455647033, "percentage": 92.09, "elapsed_time": "4:04:06", "remaining_time": "0:20:58", "throughput": 8669.85, "total_tokens": 126984936} +{"current_steps": 188475, "total_steps": 204665, "loss": 0.0, "lr": 3.788660597918347e-08, "epoch": 4.60447560647888, "percentage": 92.09, "elapsed_time": "4:04:07", "remaining_time": "0:20:58", "throughput": 8669.86, "total_tokens": 126988072} +{"current_steps": 188480, "total_steps": 204665, "loss": 0.0, "lr": 3.786335857221912e-08, "epoch": 4.604597757310727, "percentage": 92.09, "elapsed_time": "4:04:07", "remaining_time": "0:20:57", "throughput": 8669.89, "total_tokens": 126991464} +{"current_steps": 188485, "total_steps": 204665, "loss": 0.0, "lr": 3.7840118162171033e-08, "epoch": 4.604719908142575, "percentage": 92.09, "elapsed_time": "4:04:07", "remaining_time": "0:20:57", "throughput": 8669.91, "total_tokens": 126994792} +{"current_steps": 188490, "total_steps": 204665, "loss": 0.0, "lr": 3.781688474920852e-08, "epoch": 4.6048420589744214, "percentage": 92.1, "elapsed_time": "4:04:08", "remaining_time": "0:20:57", "throughput": 8669.91, "total_tokens": 126997800} +{"current_steps": 188495, "total_steps": 204665, "loss": 0.0001, "lr": 3.779365833350035e-08, "epoch": 4.604964209806269, "percentage": 92.1, "elapsed_time": "4:04:08", "remaining_time": "0:20:56", "throughput": 8669.93, "total_tokens": 127001000} +{"current_steps": 188500, "total_steps": 204665, "loss": 0.0, "lr": 3.7770438915215586e-08, "epoch": 4.605086360638116, "percentage": 92.1, "elapsed_time": "4:04:08", "remaining_time": "0:20:56", "throughput": 8669.97, "total_tokens": 127004648} +{"current_steps": 188505, "total_steps": 204665, "loss": 0.0, "lr": 3.7747226494522775e-08, "epoch": 4.6052085114699635, "percentage": 92.1, "elapsed_time": "4:04:09", "remaining_time": "0:20:55", "throughput": 8669.99, "total_tokens": 127007976} +{"current_steps": 188510, "total_steps": 204665, "loss": 0.0, "lr": 3.7724021071591116e-08, "epoch": 4.60533066230181, "percentage": 92.11, "elapsed_time": "4:04:09", "remaining_time": "0:20:55", "throughput": 8670.03, "total_tokens": 127011624} +{"current_steps": 188515, "total_steps": 204665, "loss": 0.0, "lr": 3.770082264658925e-08, "epoch": 4.605452813133658, "percentage": 92.11, "elapsed_time": "4:04:09", "remaining_time": "0:20:55", "throughput": 8670.04, "total_tokens": 127014760} +{"current_steps": 188520, "total_steps": 204665, "loss": 0.0, "lr": 3.7677631219685704e-08, "epoch": 4.605574963965505, "percentage": 92.11, "elapsed_time": "4:04:10", "remaining_time": "0:20:54", "throughput": 8670.07, "total_tokens": 127018152} +{"current_steps": 188525, "total_steps": 204665, "loss": 0.0, "lr": 3.765444679104934e-08, "epoch": 4.605697114797351, "percentage": 92.11, "elapsed_time": "4:04:10", "remaining_time": "0:20:54", "throughput": 8670.09, "total_tokens": 127021544} +{"current_steps": 188530, "total_steps": 204665, "loss": 0.0, "lr": 3.7631269360848706e-08, "epoch": 4.605819265629199, "percentage": 92.12, "elapsed_time": "4:04:10", "remaining_time": "0:20:53", "throughput": 8670.12, "total_tokens": 127024936} +{"current_steps": 188535, "total_steps": 204665, "loss": 0.0, "lr": 3.7608098929252205e-08, "epoch": 4.605941416461046, "percentage": 92.12, "elapsed_time": "4:04:11", "remaining_time": "0:20:53", "throughput": 8670.14, "total_tokens": 127028136} +{"current_steps": 188540, "total_steps": 204665, "loss": 0.0, "lr": 3.7584935496428604e-08, "epoch": 4.606063567292893, "percentage": 92.12, "elapsed_time": "4:04:11", "remaining_time": "0:20:53", "throughput": 8670.15, "total_tokens": 127031336} +{"current_steps": 188545, "total_steps": 204665, "loss": 0.0, "lr": 3.756177906254609e-08, "epoch": 4.60618571812474, "percentage": 92.12, "elapsed_time": "4:04:11", "remaining_time": "0:20:52", "throughput": 8670.19, "total_tokens": 127035048} +{"current_steps": 188550, "total_steps": 204665, "loss": 0.0, "lr": 3.753862962777321e-08, "epoch": 4.606307868956588, "percentage": 92.13, "elapsed_time": "4:04:12", "remaining_time": "0:20:52", "throughput": 8670.22, "total_tokens": 127038568} +{"current_steps": 188555, "total_steps": 204665, "loss": 0.0, "lr": 3.751548719227826e-08, "epoch": 4.6064300197884345, "percentage": 92.13, "elapsed_time": "4:04:12", "remaining_time": "0:20:51", "throughput": 8670.24, "total_tokens": 127041704} +{"current_steps": 188560, "total_steps": 204665, "loss": 0.0, "lr": 3.749235175622967e-08, "epoch": 4.606552170620282, "percentage": 92.13, "elapsed_time": "4:04:12", "remaining_time": "0:20:51", "throughput": 8670.28, "total_tokens": 127045416} +{"current_steps": 188565, "total_steps": 204665, "loss": 0.0, "lr": 3.746922331979552e-08, "epoch": 4.606674321452129, "percentage": 92.13, "elapsed_time": "4:04:13", "remaining_time": "0:20:51", "throughput": 8670.3, "total_tokens": 127048744} +{"current_steps": 188570, "total_steps": 204665, "loss": 0.0, "lr": 3.744610188314401e-08, "epoch": 4.6067964722839765, "percentage": 92.14, "elapsed_time": "4:04:13", "remaining_time": "0:20:50", "throughput": 8670.33, "total_tokens": 127052264} +{"current_steps": 188575, "total_steps": 204665, "loss": 0.0005, "lr": 3.7422987446443455e-08, "epoch": 4.606918623115823, "percentage": 92.14, "elapsed_time": "4:04:14", "remaining_time": "0:20:50", "throughput": 8670.35, "total_tokens": 127055528} +{"current_steps": 188580, "total_steps": 204665, "loss": 0.0, "lr": 3.739988000986172e-08, "epoch": 4.607040773947671, "percentage": 92.14, "elapsed_time": "4:04:14", "remaining_time": "0:20:49", "throughput": 8670.38, "total_tokens": 127059048} +{"current_steps": 188585, "total_steps": 204665, "loss": 0.0, "lr": 3.7376779573567106e-08, "epoch": 4.607162924779518, "percentage": 92.14, "elapsed_time": "4:04:14", "remaining_time": "0:20:49", "throughput": 8670.41, "total_tokens": 127062504} +{"current_steps": 188590, "total_steps": 204665, "loss": 0.0, "lr": 3.735368613772727e-08, "epoch": 4.607285075611365, "percentage": 92.15, "elapsed_time": "4:04:15", "remaining_time": "0:20:49", "throughput": 8670.44, "total_tokens": 127065896} +{"current_steps": 188595, "total_steps": 204665, "loss": 0.0, "lr": 3.733059970251051e-08, "epoch": 4.607407226443212, "percentage": 92.15, "elapsed_time": "4:04:15", "remaining_time": "0:20:48", "throughput": 8670.46, "total_tokens": 127069160} +{"current_steps": 188600, "total_steps": 204665, "loss": 0.0, "lr": 3.7307520268084483e-08, "epoch": 4.60752937727506, "percentage": 92.15, "elapsed_time": "4:04:15", "remaining_time": "0:20:48", "throughput": 8670.48, "total_tokens": 127072552} +{"current_steps": 188605, "total_steps": 204665, "loss": 0.0, "lr": 3.728444783461704e-08, "epoch": 4.607651528106906, "percentage": 92.15, "elapsed_time": "4:04:16", "remaining_time": "0:20:47", "throughput": 8670.51, "total_tokens": 127076008} +{"current_steps": 188610, "total_steps": 204665, "loss": 0.0, "lr": 3.726138240227628e-08, "epoch": 4.607773678938754, "percentage": 92.16, "elapsed_time": "4:04:16", "remaining_time": "0:20:47", "throughput": 8670.53, "total_tokens": 127079336} +{"current_steps": 188615, "total_steps": 204665, "loss": 0.0, "lr": 3.723832397122939e-08, "epoch": 4.607895829770601, "percentage": 92.16, "elapsed_time": "4:04:16", "remaining_time": "0:20:47", "throughput": 8670.55, "total_tokens": 127082664} +{"current_steps": 188620, "total_steps": 204665, "loss": 0.0, "lr": 3.721527254164458e-08, "epoch": 4.6080179806024475, "percentage": 92.16, "elapsed_time": "4:04:17", "remaining_time": "0:20:46", "throughput": 8670.55, "total_tokens": 127085672} +{"current_steps": 188625, "total_steps": 204665, "loss": 0.0, "lr": 3.719222811368916e-08, "epoch": 4.608140131434295, "percentage": 92.16, "elapsed_time": "4:04:17", "remaining_time": "0:20:46", "throughput": 8670.57, "total_tokens": 127088872} +{"current_steps": 188630, "total_steps": 204665, "loss": 0.0, "lr": 3.716919068753088e-08, "epoch": 4.608262282266142, "percentage": 92.17, "elapsed_time": "4:04:17", "remaining_time": "0:20:46", "throughput": 8670.6, "total_tokens": 127092328} +{"current_steps": 188635, "total_steps": 204665, "loss": 0.0, "lr": 3.714616026333728e-08, "epoch": 4.6083844330979895, "percentage": 92.17, "elapsed_time": "4:04:18", "remaining_time": "0:20:45", "throughput": 8670.62, "total_tokens": 127095656} +{"current_steps": 188640, "total_steps": 204665, "loss": 0.0446, "lr": 3.712313684127566e-08, "epoch": 4.608506583929836, "percentage": 92.17, "elapsed_time": "4:04:18", "remaining_time": "0:20:45", "throughput": 8670.65, "total_tokens": 127099176} +{"current_steps": 188645, "total_steps": 204665, "loss": 0.0, "lr": 3.710012042151367e-08, "epoch": 4.608628734761684, "percentage": 92.17, "elapsed_time": "4:04:18", "remaining_time": "0:20:44", "throughput": 8670.67, "total_tokens": 127102440} +{"current_steps": 188650, "total_steps": 204665, "loss": 0.0, "lr": 3.70771110042184e-08, "epoch": 4.608750885593531, "percentage": 92.18, "elapsed_time": "4:04:19", "remaining_time": "0:20:44", "throughput": 8670.71, "total_tokens": 127106216} +{"current_steps": 188655, "total_steps": 204665, "loss": 0.0, "lr": 3.70541085895576e-08, "epoch": 4.608873036425378, "percentage": 92.18, "elapsed_time": "4:04:19", "remaining_time": "0:20:44", "throughput": 8670.74, "total_tokens": 127109672} +{"current_steps": 188660, "total_steps": 204665, "loss": 0.0329, "lr": 3.703111317769814e-08, "epoch": 4.608995187257225, "percentage": 92.18, "elapsed_time": "4:04:19", "remaining_time": "0:20:43", "throughput": 8670.73, "total_tokens": 127112424} +{"current_steps": 188665, "total_steps": 204665, "loss": 0.0, "lr": 3.700812476880744e-08, "epoch": 4.609117338089073, "percentage": 92.18, "elapsed_time": "4:04:20", "remaining_time": "0:20:43", "throughput": 8670.75, "total_tokens": 127115624} +{"current_steps": 188670, "total_steps": 204665, "loss": 0.0, "lr": 3.6985143363052806e-08, "epoch": 4.609239488920919, "percentage": 92.18, "elapsed_time": "4:04:20", "remaining_time": "0:20:42", "throughput": 8670.77, "total_tokens": 127119016} +{"current_steps": 188675, "total_steps": 204665, "loss": 0.0, "lr": 3.696216896060112e-08, "epoch": 4.609361639752767, "percentage": 92.19, "elapsed_time": "4:04:20", "remaining_time": "0:20:42", "throughput": 8670.81, "total_tokens": 127122600} +{"current_steps": 188680, "total_steps": 204665, "loss": 0.0, "lr": 3.693920156161967e-08, "epoch": 4.609483790584614, "percentage": 92.19, "elapsed_time": "4:04:21", "remaining_time": "0:20:42", "throughput": 8670.85, "total_tokens": 127126312} +{"current_steps": 188685, "total_steps": 204665, "loss": 0.0, "lr": 3.691624116627523e-08, "epoch": 4.609605941416461, "percentage": 92.19, "elapsed_time": "4:04:21", "remaining_time": "0:20:41", "throughput": 8670.92, "total_tokens": 127130536} +{"current_steps": 188690, "total_steps": 204665, "loss": 0.0512, "lr": 3.6893287774735106e-08, "epoch": 4.609728092248308, "percentage": 92.19, "elapsed_time": "4:04:22", "remaining_time": "0:20:41", "throughput": 8670.94, "total_tokens": 127133800} +{"current_steps": 188695, "total_steps": 204665, "loss": 0.0, "lr": 3.687034138716594e-08, "epoch": 4.609850243080155, "percentage": 92.2, "elapsed_time": "4:04:22", "remaining_time": "0:20:40", "throughput": 8671.0, "total_tokens": 127137832} +{"current_steps": 188700, "total_steps": 204665, "loss": 0.0001, "lr": 3.6847402003734596e-08, "epoch": 4.6099723939120025, "percentage": 92.2, "elapsed_time": "4:04:22", "remaining_time": "0:20:40", "throughput": 8671.02, "total_tokens": 127141160} +{"current_steps": 188705, "total_steps": 204665, "loss": 0.0001, "lr": 3.682446962460817e-08, "epoch": 4.61009454474385, "percentage": 92.2, "elapsed_time": "4:04:23", "remaining_time": "0:20:40", "throughput": 8671.04, "total_tokens": 127144552} +{"current_steps": 188710, "total_steps": 204665, "loss": 0.0, "lr": 3.680154424995319e-08, "epoch": 4.610216695575697, "percentage": 92.2, "elapsed_time": "4:04:23", "remaining_time": "0:20:39", "throughput": 8671.08, "total_tokens": 127148136} +{"current_steps": 188715, "total_steps": 204665, "loss": 0.0, "lr": 3.677862587993652e-08, "epoch": 4.610338846407544, "percentage": 92.21, "elapsed_time": "4:04:23", "remaining_time": "0:20:39", "throughput": 8671.11, "total_tokens": 127151656} +{"current_steps": 188720, "total_steps": 204665, "loss": 0.0, "lr": 3.675571451472459e-08, "epoch": 4.610460997239391, "percentage": 92.21, "elapsed_time": "4:04:24", "remaining_time": "0:20:38", "throughput": 8671.14, "total_tokens": 127155112} +{"current_steps": 188725, "total_steps": 204665, "loss": 0.0, "lr": 3.673281015448437e-08, "epoch": 4.610583148071238, "percentage": 92.21, "elapsed_time": "4:04:24", "remaining_time": "0:20:38", "throughput": 8671.17, "total_tokens": 127158632} +{"current_steps": 188730, "total_steps": 204665, "loss": 0.0, "lr": 3.670991279938218e-08, "epoch": 4.610705298903086, "percentage": 92.21, "elapsed_time": "4:04:24", "remaining_time": "0:20:38", "throughput": 8671.21, "total_tokens": 127162216} +{"current_steps": 188735, "total_steps": 204665, "loss": 0.0, "lr": 3.668702244958466e-08, "epoch": 4.610827449734932, "percentage": 92.22, "elapsed_time": "4:04:25", "remaining_time": "0:20:37", "throughput": 8671.25, "total_tokens": 127165864} +{"current_steps": 188740, "total_steps": 204665, "loss": 0.0, "lr": 3.6664139105258115e-08, "epoch": 4.61094960056678, "percentage": 92.22, "elapsed_time": "4:04:25", "remaining_time": "0:20:37", "throughput": 8671.27, "total_tokens": 127169256} +{"current_steps": 188745, "total_steps": 204665, "loss": 0.0, "lr": 3.664126276656909e-08, "epoch": 4.611071751398627, "percentage": 92.22, "elapsed_time": "4:04:25", "remaining_time": "0:20:37", "throughput": 8671.3, "total_tokens": 127172648} +{"current_steps": 188750, "total_steps": 204665, "loss": 0.0, "lr": 3.6618393433684006e-08, "epoch": 4.611193902230474, "percentage": 92.22, "elapsed_time": "4:04:26", "remaining_time": "0:20:36", "throughput": 8671.32, "total_tokens": 127176040} +{"current_steps": 188755, "total_steps": 204665, "loss": 0.0001, "lr": 3.6595531106768826e-08, "epoch": 4.611316053062321, "percentage": 92.23, "elapsed_time": "4:04:26", "remaining_time": "0:20:36", "throughput": 8671.33, "total_tokens": 127179176} +{"current_steps": 188760, "total_steps": 204665, "loss": 0.0001, "lr": 3.657267578599021e-08, "epoch": 4.611438203894169, "percentage": 92.23, "elapsed_time": "4:04:26", "remaining_time": "0:20:35", "throughput": 8671.34, "total_tokens": 127182184} +{"current_steps": 188765, "total_steps": 204665, "loss": 0.0, "lr": 3.654982747151436e-08, "epoch": 4.6115603547260156, "percentage": 92.23, "elapsed_time": "4:04:27", "remaining_time": "0:20:35", "throughput": 8671.36, "total_tokens": 127185512} +{"current_steps": 188770, "total_steps": 204665, "loss": 0.0002, "lr": 3.652698616350713e-08, "epoch": 4.611682505557863, "percentage": 92.23, "elapsed_time": "4:04:27", "remaining_time": "0:20:35", "throughput": 8671.4, "total_tokens": 127189096} +{"current_steps": 188775, "total_steps": 204665, "loss": 0.0, "lr": 3.650415186213485e-08, "epoch": 4.61180465638971, "percentage": 92.24, "elapsed_time": "4:04:28", "remaining_time": "0:20:34", "throughput": 8671.44, "total_tokens": 127192744} +{"current_steps": 188780, "total_steps": 204665, "loss": 0.0, "lr": 3.648132456756348e-08, "epoch": 4.611926807221558, "percentage": 92.24, "elapsed_time": "4:04:28", "remaining_time": "0:20:34", "throughput": 8671.45, "total_tokens": 127195944} +{"current_steps": 188785, "total_steps": 204665, "loss": 0.0002, "lr": 3.645850427995911e-08, "epoch": 4.612048958053404, "percentage": 92.24, "elapsed_time": "4:04:28", "remaining_time": "0:20:33", "throughput": 8671.51, "total_tokens": 127199912} +{"current_steps": 188790, "total_steps": 204665, "loss": 0.0, "lr": 3.643569099948773e-08, "epoch": 4.612171108885251, "percentage": 92.24, "elapsed_time": "4:04:29", "remaining_time": "0:20:33", "throughput": 8671.55, "total_tokens": 127203560} +{"current_steps": 188795, "total_steps": 204665, "loss": 0.0, "lr": 3.6412884726315095e-08, "epoch": 4.612293259717099, "percentage": 92.25, "elapsed_time": "4:04:29", "remaining_time": "0:20:33", "throughput": 8671.56, "total_tokens": 127206760} +{"current_steps": 188800, "total_steps": 204665, "loss": 0.0, "lr": 3.639008546060718e-08, "epoch": 4.612415410548946, "percentage": 92.25, "elapsed_time": "4:04:29", "remaining_time": "0:20:32", "throughput": 8671.6, "total_tokens": 127210344} +{"current_steps": 188805, "total_steps": 204665, "loss": 0.0, "lr": 3.636729320252962e-08, "epoch": 4.612537561380793, "percentage": 92.25, "elapsed_time": "4:04:30", "remaining_time": "0:20:32", "throughput": 8671.66, "total_tokens": 127214504} +{"current_steps": 188810, "total_steps": 204665, "loss": 0.0002, "lr": 3.6344507952248525e-08, "epoch": 4.61265971221264, "percentage": 92.25, "elapsed_time": "4:04:30", "remaining_time": "0:20:31", "throughput": 8671.69, "total_tokens": 127217896} +{"current_steps": 188815, "total_steps": 204665, "loss": 0.0, "lr": 3.632172970992919e-08, "epoch": 4.6127818630444875, "percentage": 92.26, "elapsed_time": "4:04:30", "remaining_time": "0:20:31", "throughput": 8671.72, "total_tokens": 127221288} +{"current_steps": 188820, "total_steps": 204665, "loss": 0.0, "lr": 3.6298958475737384e-08, "epoch": 4.612904013876334, "percentage": 92.26, "elapsed_time": "4:04:31", "remaining_time": "0:20:31", "throughput": 8671.75, "total_tokens": 127224808} +{"current_steps": 188825, "total_steps": 204665, "loss": 0.0, "lr": 3.6276194249838855e-08, "epoch": 4.613026164708182, "percentage": 92.26, "elapsed_time": "4:04:31", "remaining_time": "0:20:30", "throughput": 8671.79, "total_tokens": 127228456} +{"current_steps": 188830, "total_steps": 204665, "loss": 0.0, "lr": 3.625343703239903e-08, "epoch": 4.613148315540029, "percentage": 92.26, "elapsed_time": "4:04:31", "remaining_time": "0:20:30", "throughput": 8671.82, "total_tokens": 127231976} +{"current_steps": 188835, "total_steps": 204665, "loss": 0.0, "lr": 3.623068682358354e-08, "epoch": 4.613270466371876, "percentage": 92.27, "elapsed_time": "4:04:32", "remaining_time": "0:20:29", "throughput": 8671.84, "total_tokens": 127235368} +{"current_steps": 188840, "total_steps": 204665, "loss": 0.0, "lr": 3.620794362355761e-08, "epoch": 4.613392617203723, "percentage": 92.27, "elapsed_time": "4:04:32", "remaining_time": "0:20:29", "throughput": 8671.86, "total_tokens": 127238568} +{"current_steps": 188845, "total_steps": 204665, "loss": 0.0, "lr": 3.6185207432486764e-08, "epoch": 4.613514768035571, "percentage": 92.27, "elapsed_time": "4:04:32", "remaining_time": "0:20:29", "throughput": 8671.88, "total_tokens": 127241896} +{"current_steps": 188850, "total_steps": 204665, "loss": 0.0, "lr": 3.616247825053631e-08, "epoch": 4.613636918867417, "percentage": 92.27, "elapsed_time": "4:04:33", "remaining_time": "0:20:28", "throughput": 8671.9, "total_tokens": 127245224} +{"current_steps": 188855, "total_steps": 204665, "loss": 0.0164, "lr": 3.6139756077871563e-08, "epoch": 4.613759069699265, "percentage": 92.28, "elapsed_time": "4:04:33", "remaining_time": "0:20:28", "throughput": 8671.92, "total_tokens": 127248488} +{"current_steps": 188860, "total_steps": 204665, "loss": 0.0, "lr": 3.6117040914657726e-08, "epoch": 4.613881220531112, "percentage": 92.28, "elapsed_time": "4:04:33", "remaining_time": "0:20:28", "throughput": 8671.93, "total_tokens": 127251624} +{"current_steps": 188865, "total_steps": 204665, "loss": 0.0, "lr": 3.6094332761059995e-08, "epoch": 4.614003371362959, "percentage": 92.28, "elapsed_time": "4:04:34", "remaining_time": "0:20:27", "throughput": 8671.95, "total_tokens": 127254824} +{"current_steps": 188870, "total_steps": 204665, "loss": 0.0, "lr": 3.6071631617243694e-08, "epoch": 4.614125522194806, "percentage": 92.28, "elapsed_time": "4:04:34", "remaining_time": "0:20:27", "throughput": 8671.97, "total_tokens": 127258216} +{"current_steps": 188875, "total_steps": 204665, "loss": 0.0, "lr": 3.604893748337356e-08, "epoch": 4.614247673026654, "percentage": 92.28, "elapsed_time": "4:04:35", "remaining_time": "0:20:26", "throughput": 8672.0, "total_tokens": 127261736} +{"current_steps": 188880, "total_steps": 204665, "loss": 0.0, "lr": 3.6026250359614926e-08, "epoch": 4.6143698238585005, "percentage": 92.29, "elapsed_time": "4:04:35", "remaining_time": "0:20:26", "throughput": 8672.04, "total_tokens": 127265256} +{"current_steps": 188885, "total_steps": 204665, "loss": 0.0, "lr": 3.600357024613265e-08, "epoch": 4.614491974690347, "percentage": 92.29, "elapsed_time": "4:04:35", "remaining_time": "0:20:26", "throughput": 8672.09, "total_tokens": 127269096} +{"current_steps": 188890, "total_steps": 204665, "loss": 0.0, "lr": 3.598089714309172e-08, "epoch": 4.614614125522195, "percentage": 92.29, "elapsed_time": "4:04:36", "remaining_time": "0:20:25", "throughput": 8672.13, "total_tokens": 127272744} +{"current_steps": 188895, "total_steps": 204665, "loss": 0.0, "lr": 3.5958231050656985e-08, "epoch": 4.614736276354042, "percentage": 92.29, "elapsed_time": "4:04:36", "remaining_time": "0:20:25", "throughput": 8672.15, "total_tokens": 127276136} +{"current_steps": 188900, "total_steps": 204665, "loss": 0.0, "lr": 3.5935571968993215e-08, "epoch": 4.614858427185889, "percentage": 92.3, "elapsed_time": "4:04:36", "remaining_time": "0:20:24", "throughput": 8672.17, "total_tokens": 127279400} +{"current_steps": 188905, "total_steps": 204665, "loss": 0.0, "lr": 3.5912919898265394e-08, "epoch": 4.614980578017736, "percentage": 92.3, "elapsed_time": "4:04:37", "remaining_time": "0:20:24", "throughput": 8672.21, "total_tokens": 127283112} +{"current_steps": 188910, "total_steps": 204665, "loss": 0.0, "lr": 3.5890274838638044e-08, "epoch": 4.615102728849584, "percentage": 92.3, "elapsed_time": "4:04:37", "remaining_time": "0:20:24", "throughput": 8672.25, "total_tokens": 127286632} +{"current_steps": 188915, "total_steps": 204665, "loss": 0.0, "lr": 3.5867636790275933e-08, "epoch": 4.61522487968143, "percentage": 92.3, "elapsed_time": "4:04:37", "remaining_time": "0:20:23", "throughput": 8672.26, "total_tokens": 127289832} +{"current_steps": 188920, "total_steps": 204665, "loss": 0.0, "lr": 3.5845005753343704e-08, "epoch": 4.615347030513278, "percentage": 92.31, "elapsed_time": "4:04:38", "remaining_time": "0:20:23", "throughput": 8672.27, "total_tokens": 127292840} +{"current_steps": 188925, "total_steps": 204665, "loss": 0.0, "lr": 3.582238172800589e-08, "epoch": 4.615469181345125, "percentage": 92.31, "elapsed_time": "4:04:38", "remaining_time": "0:20:22", "throughput": 8672.28, "total_tokens": 127296104} +{"current_steps": 188930, "total_steps": 204665, "loss": 0.0, "lr": 3.579976471442714e-08, "epoch": 4.615591332176972, "percentage": 92.31, "elapsed_time": "4:04:38", "remaining_time": "0:20:22", "throughput": 8672.32, "total_tokens": 127299624} +{"current_steps": 188935, "total_steps": 204665, "loss": 0.0, "lr": 3.577715471277176e-08, "epoch": 4.615713483008819, "percentage": 92.31, "elapsed_time": "4:04:39", "remaining_time": "0:20:22", "throughput": 8672.35, "total_tokens": 127303272} +{"current_steps": 188940, "total_steps": 204665, "loss": 0.0001, "lr": 3.5754551723204404e-08, "epoch": 4.615835633840667, "percentage": 92.32, "elapsed_time": "4:04:39", "remaining_time": "0:20:21", "throughput": 8672.4, "total_tokens": 127307048} +{"current_steps": 188945, "total_steps": 204665, "loss": 0.0, "lr": 3.573195574588917e-08, "epoch": 4.6159577846725135, "percentage": 92.32, "elapsed_time": "4:04:39", "remaining_time": "0:20:21", "throughput": 8672.43, "total_tokens": 127310568} +{"current_steps": 188950, "total_steps": 204665, "loss": 0.0, "lr": 3.570936678099046e-08, "epoch": 4.616079935504361, "percentage": 92.32, "elapsed_time": "4:04:40", "remaining_time": "0:20:20", "throughput": 8672.47, "total_tokens": 127314280} +{"current_steps": 188955, "total_steps": 204665, "loss": 0.0, "lr": 3.568678482867271e-08, "epoch": 4.616202086336208, "percentage": 92.32, "elapsed_time": "4:04:40", "remaining_time": "0:20:20", "throughput": 8672.52, "total_tokens": 127318056} +{"current_steps": 188960, "total_steps": 204665, "loss": 0.0383, "lr": 3.5664209889099904e-08, "epoch": 4.616324237168055, "percentage": 92.33, "elapsed_time": "4:04:40", "remaining_time": "0:20:20", "throughput": 8672.56, "total_tokens": 127321768} +{"current_steps": 188965, "total_steps": 204665, "loss": 0.0, "lr": 3.564164196243658e-08, "epoch": 4.616446387999902, "percentage": 92.33, "elapsed_time": "4:04:41", "remaining_time": "0:20:19", "throughput": 8672.6, "total_tokens": 127325352} +{"current_steps": 188970, "total_steps": 204665, "loss": 0.0, "lr": 3.5619081048846364e-08, "epoch": 4.61656853883175, "percentage": 92.33, "elapsed_time": "4:04:41", "remaining_time": "0:20:19", "throughput": 8672.62, "total_tokens": 127328616} +{"current_steps": 188975, "total_steps": 204665, "loss": 0.0, "lr": 3.559652714849359e-08, "epoch": 4.616690689663597, "percentage": 92.33, "elapsed_time": "4:04:42", "remaining_time": "0:20:19", "throughput": 8672.63, "total_tokens": 127331816} +{"current_steps": 188980, "total_steps": 204665, "loss": 0.0, "lr": 3.5573980261542345e-08, "epoch": 4.616812840495443, "percentage": 92.34, "elapsed_time": "4:04:42", "remaining_time": "0:20:18", "throughput": 8672.64, "total_tokens": 127334952} +{"current_steps": 188985, "total_steps": 204665, "loss": 0.0, "lr": 3.5551440388156494e-08, "epoch": 4.616934991327291, "percentage": 92.34, "elapsed_time": "4:04:42", "remaining_time": "0:20:18", "throughput": 8672.67, "total_tokens": 127338344} +{"current_steps": 188990, "total_steps": 204665, "loss": 0.0, "lr": 3.552890752850002e-08, "epoch": 4.617057142159138, "percentage": 92.34, "elapsed_time": "4:04:43", "remaining_time": "0:20:17", "throughput": 8672.67, "total_tokens": 127341288} +{"current_steps": 188995, "total_steps": 204665, "loss": 0.0, "lr": 3.550638168273667e-08, "epoch": 4.617179292990985, "percentage": 92.34, "elapsed_time": "4:04:43", "remaining_time": "0:20:17", "throughput": 8672.7, "total_tokens": 127344744} +{"current_steps": 189000, "total_steps": 204665, "loss": 0.0, "lr": 3.5483862851030444e-08, "epoch": 4.617301443822832, "percentage": 92.35, "elapsed_time": "4:04:43", "remaining_time": "0:20:17", "throughput": 8672.71, "total_tokens": 127347944} +{"current_steps": 189005, "total_steps": 204665, "loss": 0.0, "lr": 3.546135103354486e-08, "epoch": 4.61742359465468, "percentage": 92.35, "elapsed_time": "4:04:44", "remaining_time": "0:20:16", "throughput": 8672.73, "total_tokens": 127351208} +{"current_steps": 189010, "total_steps": 204665, "loss": 0.0001, "lr": 3.54388462304438e-08, "epoch": 4.6175457454865265, "percentage": 92.35, "elapsed_time": "4:04:44", "remaining_time": "0:20:16", "throughput": 8672.74, "total_tokens": 127354344} +{"current_steps": 189015, "total_steps": 204665, "loss": 0.0, "lr": 3.54163484418909e-08, "epoch": 4.617667896318374, "percentage": 92.35, "elapsed_time": "4:04:44", "remaining_time": "0:20:15", "throughput": 8672.75, "total_tokens": 127357416} +{"current_steps": 189020, "total_steps": 204665, "loss": 0.0, "lr": 3.53938576680497e-08, "epoch": 4.617790047150221, "percentage": 92.36, "elapsed_time": "4:04:45", "remaining_time": "0:20:15", "throughput": 8672.75, "total_tokens": 127360488} +{"current_steps": 189025, "total_steps": 204665, "loss": 0.0, "lr": 3.5371373909083956e-08, "epoch": 4.6179121979820685, "percentage": 92.36, "elapsed_time": "4:04:45", "remaining_time": "0:20:15", "throughput": 8672.78, "total_tokens": 127363816} +{"current_steps": 189030, "total_steps": 204665, "loss": 0.0, "lr": 3.534889716515688e-08, "epoch": 4.618034348813915, "percentage": 92.36, "elapsed_time": "4:04:45", "remaining_time": "0:20:14", "throughput": 8672.8, "total_tokens": 127367144} +{"current_steps": 189035, "total_steps": 204665, "loss": 0.0, "lr": 3.532642743643221e-08, "epoch": 4.618156499645763, "percentage": 92.36, "elapsed_time": "4:04:46", "remaining_time": "0:20:14", "throughput": 8672.79, "total_tokens": 127369960} +{"current_steps": 189040, "total_steps": 204665, "loss": 0.0, "lr": 3.5303964723073174e-08, "epoch": 4.61827865047761, "percentage": 92.37, "elapsed_time": "4:04:46", "remaining_time": "0:20:13", "throughput": 8672.87, "total_tokens": 127374248} +{"current_steps": 189045, "total_steps": 204665, "loss": 0.0, "lr": 3.528150902524319e-08, "epoch": 4.618400801309457, "percentage": 92.37, "elapsed_time": "4:04:46", "remaining_time": "0:20:13", "throughput": 8672.87, "total_tokens": 127377256} +{"current_steps": 189050, "total_steps": 204665, "loss": 0.0, "lr": 3.525906034310555e-08, "epoch": 4.618522952141304, "percentage": 92.37, "elapsed_time": "4:04:47", "remaining_time": "0:20:13", "throughput": 8672.91, "total_tokens": 127380904} +{"current_steps": 189055, "total_steps": 204665, "loss": 0.0, "lr": 3.523661867682348e-08, "epoch": 4.618645102973151, "percentage": 92.37, "elapsed_time": "4:04:47", "remaining_time": "0:20:12", "throughput": 8672.95, "total_tokens": 127384424} +{"current_steps": 189060, "total_steps": 204665, "loss": 0.0, "lr": 3.521418402656029e-08, "epoch": 4.618767253804998, "percentage": 92.38, "elapsed_time": "4:04:47", "remaining_time": "0:20:12", "throughput": 8672.95, "total_tokens": 127387496} +{"current_steps": 189065, "total_steps": 204665, "loss": 0.0, "lr": 3.519175639247895e-08, "epoch": 4.618889404636846, "percentage": 92.38, "elapsed_time": "4:04:48", "remaining_time": "0:20:11", "throughput": 8672.96, "total_tokens": 127390632} +{"current_steps": 189070, "total_steps": 204665, "loss": 0.0, "lr": 3.516933577474257e-08, "epoch": 4.619011555468693, "percentage": 92.38, "elapsed_time": "4:04:48", "remaining_time": "0:20:11", "throughput": 8672.97, "total_tokens": 127393640} +{"current_steps": 189075, "total_steps": 204665, "loss": 0.0008, "lr": 3.514692217351456e-08, "epoch": 4.6191337063005395, "percentage": 92.38, "elapsed_time": "4:04:48", "remaining_time": "0:20:11", "throughput": 8672.98, "total_tokens": 127396840} +{"current_steps": 189080, "total_steps": 204665, "loss": 0.0, "lr": 3.512451558895735e-08, "epoch": 4.619255857132387, "percentage": 92.39, "elapsed_time": "4:04:49", "remaining_time": "0:20:10", "throughput": 8673.03, "total_tokens": 127400616} +{"current_steps": 189085, "total_steps": 204665, "loss": 0.0, "lr": 3.510211602123447e-08, "epoch": 4.619378007964234, "percentage": 92.39, "elapsed_time": "4:04:49", "remaining_time": "0:20:10", "throughput": 8673.05, "total_tokens": 127403880} +{"current_steps": 189090, "total_steps": 204665, "loss": 0.0, "lr": 3.507972347050825e-08, "epoch": 4.619500158796082, "percentage": 92.39, "elapsed_time": "4:04:49", "remaining_time": "0:20:09", "throughput": 8673.08, "total_tokens": 127407464} +{"current_steps": 189095, "total_steps": 204665, "loss": 0.0, "lr": 3.50573379369421e-08, "epoch": 4.619622309627928, "percentage": 92.39, "elapsed_time": "4:04:50", "remaining_time": "0:20:09", "throughput": 8673.1, "total_tokens": 127410728} +{"current_steps": 189100, "total_steps": 204665, "loss": 0.0, "lr": 3.503495942069834e-08, "epoch": 4.619744460459776, "percentage": 92.39, "elapsed_time": "4:04:50", "remaining_time": "0:20:09", "throughput": 8673.12, "total_tokens": 127413992} +{"current_steps": 189105, "total_steps": 204665, "loss": 0.0, "lr": 3.501258792193984e-08, "epoch": 4.619866611291623, "percentage": 92.4, "elapsed_time": "4:04:51", "remaining_time": "0:20:08", "throughput": 8673.13, "total_tokens": 127417064} +{"current_steps": 189110, "total_steps": 204665, "loss": 0.0, "lr": 3.499022344082947e-08, "epoch": 4.61998876212347, "percentage": 92.4, "elapsed_time": "4:04:51", "remaining_time": "0:20:08", "throughput": 8673.15, "total_tokens": 127420392} +{"current_steps": 189115, "total_steps": 204665, "loss": 0.0, "lr": 3.4967865977529655e-08, "epoch": 4.620110912955317, "percentage": 92.4, "elapsed_time": "4:04:51", "remaining_time": "0:20:08", "throughput": 8673.17, "total_tokens": 127423784} +{"current_steps": 189120, "total_steps": 204665, "loss": 0.0, "lr": 3.494551553220326e-08, "epoch": 4.620233063787165, "percentage": 92.4, "elapsed_time": "4:04:52", "remaining_time": "0:20:07", "throughput": 8673.17, "total_tokens": 127426600} +{"current_steps": 189125, "total_steps": 204665, "loss": 0.0, "lr": 3.49231721050125e-08, "epoch": 4.620355214619011, "percentage": 92.41, "elapsed_time": "4:04:52", "remaining_time": "0:20:07", "throughput": 8673.19, "total_tokens": 127430056} +{"current_steps": 189130, "total_steps": 204665, "loss": 0.0, "lr": 3.490083569612001e-08, "epoch": 4.620477365450859, "percentage": 92.41, "elapsed_time": "4:04:52", "remaining_time": "0:20:06", "throughput": 8673.22, "total_tokens": 127433384} +{"current_steps": 189135, "total_steps": 204665, "loss": 0.0, "lr": 3.487850630568834e-08, "epoch": 4.620599516282706, "percentage": 92.41, "elapsed_time": "4:04:53", "remaining_time": "0:20:06", "throughput": 8673.24, "total_tokens": 127436776} +{"current_steps": 189140, "total_steps": 204665, "loss": 0.0, "lr": 3.4856183933879566e-08, "epoch": 4.6207216671145535, "percentage": 92.41, "elapsed_time": "4:04:53", "remaining_time": "0:20:06", "throughput": 8673.28, "total_tokens": 127440488} +{"current_steps": 189145, "total_steps": 204665, "loss": 0.0, "lr": 3.483386858085646e-08, "epoch": 4.6208438179464, "percentage": 92.42, "elapsed_time": "4:04:53", "remaining_time": "0:20:05", "throughput": 8673.31, "total_tokens": 127443880} +{"current_steps": 189150, "total_steps": 204665, "loss": 0.0, "lr": 3.481156024678089e-08, "epoch": 4.620965968778247, "percentage": 92.42, "elapsed_time": "4:04:54", "remaining_time": "0:20:05", "throughput": 8673.32, "total_tokens": 127447016} +{"current_steps": 189155, "total_steps": 204665, "loss": 0.0, "lr": 3.478925893181528e-08, "epoch": 4.621088119610095, "percentage": 92.42, "elapsed_time": "4:04:54", "remaining_time": "0:20:04", "throughput": 8673.35, "total_tokens": 127450536} +{"current_steps": 189160, "total_steps": 204665, "loss": 0.0, "lr": 3.4766964636121834e-08, "epoch": 4.621210270441941, "percentage": 92.42, "elapsed_time": "4:04:54", "remaining_time": "0:20:04", "throughput": 8673.43, "total_tokens": 127454824} +{"current_steps": 189165, "total_steps": 204665, "loss": 0.0, "lr": 3.474467735986264e-08, "epoch": 4.621332421273789, "percentage": 92.43, "elapsed_time": "4:04:55", "remaining_time": "0:20:04", "throughput": 8673.45, "total_tokens": 127458216} +{"current_steps": 189170, "total_steps": 204665, "loss": 0.0, "lr": 3.4722397103199797e-08, "epoch": 4.621454572105636, "percentage": 92.43, "elapsed_time": "4:04:55", "remaining_time": "0:20:03", "throughput": 8673.48, "total_tokens": 127461672} +{"current_steps": 189175, "total_steps": 204665, "loss": 0.0, "lr": 3.4700123866295174e-08, "epoch": 4.621576722937483, "percentage": 92.43, "elapsed_time": "4:04:55", "remaining_time": "0:20:03", "throughput": 8673.49, "total_tokens": 127464872} +{"current_steps": 189180, "total_steps": 204665, "loss": 0.0, "lr": 3.4677857649311084e-08, "epoch": 4.62169887376933, "percentage": 92.43, "elapsed_time": "4:04:56", "remaining_time": "0:20:02", "throughput": 8673.61, "total_tokens": 127469992} +{"current_steps": 189185, "total_steps": 204665, "loss": 0.0, "lr": 3.4655598452409066e-08, "epoch": 4.621821024601178, "percentage": 92.44, "elapsed_time": "4:04:56", "remaining_time": "0:20:02", "throughput": 8673.64, "total_tokens": 127473448} +{"current_steps": 189190, "total_steps": 204665, "loss": 0.0, "lr": 3.4633346275751206e-08, "epoch": 4.6219431754330245, "percentage": 92.44, "elapsed_time": "4:04:57", "remaining_time": "0:20:02", "throughput": 8673.66, "total_tokens": 127476904} +{"current_steps": 189195, "total_steps": 204665, "loss": 0.0, "lr": 3.461110111949939e-08, "epoch": 4.622065326264872, "percentage": 92.44, "elapsed_time": "4:04:57", "remaining_time": "0:20:01", "throughput": 8673.68, "total_tokens": 127480168} +{"current_steps": 189200, "total_steps": 204665, "loss": 0.0, "lr": 3.458886298381525e-08, "epoch": 4.622187477096719, "percentage": 92.44, "elapsed_time": "4:04:57", "remaining_time": "0:20:01", "throughput": 8673.73, "total_tokens": 127483944} +{"current_steps": 189205, "total_steps": 204665, "loss": 0.0, "lr": 3.456663186886055e-08, "epoch": 4.6223096279285665, "percentage": 92.45, "elapsed_time": "4:04:58", "remaining_time": "0:20:00", "throughput": 8673.74, "total_tokens": 127487080} +{"current_steps": 189210, "total_steps": 204665, "loss": 0.0, "lr": 3.454440777479695e-08, "epoch": 4.622431778760413, "percentage": 92.45, "elapsed_time": "4:04:58", "remaining_time": "0:20:00", "throughput": 8673.73, "total_tokens": 127489896} +{"current_steps": 189215, "total_steps": 204665, "loss": 0.0, "lr": 3.45221907017863e-08, "epoch": 4.622553929592261, "percentage": 92.45, "elapsed_time": "4:04:58", "remaining_time": "0:20:00", "throughput": 8673.73, "total_tokens": 127492840} +{"current_steps": 189220, "total_steps": 204665, "loss": 0.0, "lr": 3.4499980649989716e-08, "epoch": 4.622676080424108, "percentage": 92.45, "elapsed_time": "4:04:59", "remaining_time": "0:19:59", "throughput": 8673.75, "total_tokens": 127496168} +{"current_steps": 189225, "total_steps": 204665, "loss": 0.0343, "lr": 3.447777761956894e-08, "epoch": 4.622798231255955, "percentage": 92.46, "elapsed_time": "4:04:59", "remaining_time": "0:19:59", "throughput": 8673.78, "total_tokens": 127499560} +{"current_steps": 189230, "total_steps": 204665, "loss": 0.0, "lr": 3.445558161068574e-08, "epoch": 4.622920382087802, "percentage": 92.46, "elapsed_time": "4:04:59", "remaining_time": "0:19:59", "throughput": 8673.79, "total_tokens": 127502696} +{"current_steps": 189235, "total_steps": 204665, "loss": 0.0, "lr": 3.443339262350098e-08, "epoch": 4.62304253291965, "percentage": 92.46, "elapsed_time": "4:05:00", "remaining_time": "0:19:58", "throughput": 8673.82, "total_tokens": 127506216} +{"current_steps": 189240, "total_steps": 204665, "loss": 0.0004, "lr": 3.441121065817643e-08, "epoch": 4.623164683751496, "percentage": 92.46, "elapsed_time": "4:05:00", "remaining_time": "0:19:58", "throughput": 8673.87, "total_tokens": 127510056} +{"current_steps": 189245, "total_steps": 204665, "loss": 0.0, "lr": 3.438903571487317e-08, "epoch": 4.623286834583343, "percentage": 92.47, "elapsed_time": "4:05:00", "remaining_time": "0:19:57", "throughput": 8673.94, "total_tokens": 127514280} +{"current_steps": 189250, "total_steps": 204665, "loss": 0.0001, "lr": 3.4366867793752755e-08, "epoch": 4.623408985415191, "percentage": 92.47, "elapsed_time": "4:05:01", "remaining_time": "0:19:57", "throughput": 8673.98, "total_tokens": 127517864} +{"current_steps": 189255, "total_steps": 204665, "loss": 0.0, "lr": 3.434470689497615e-08, "epoch": 4.6235311362470375, "percentage": 92.47, "elapsed_time": "4:05:01", "remaining_time": "0:19:57", "throughput": 8674.0, "total_tokens": 127521256} +{"current_steps": 189260, "total_steps": 204665, "loss": 0.0, "lr": 3.432255301870435e-08, "epoch": 4.623653287078885, "percentage": 92.47, "elapsed_time": "4:05:01", "remaining_time": "0:19:56", "throughput": 8674.03, "total_tokens": 127524584} +{"current_steps": 189265, "total_steps": 204665, "loss": 0.0, "lr": 3.4300406165098884e-08, "epoch": 4.623775437910732, "percentage": 92.48, "elapsed_time": "4:05:02", "remaining_time": "0:19:56", "throughput": 8674.05, "total_tokens": 127528040} +{"current_steps": 189270, "total_steps": 204665, "loss": 0.0, "lr": 3.427826633432051e-08, "epoch": 4.6238975887425795, "percentage": 92.48, "elapsed_time": "4:05:02", "remaining_time": "0:19:55", "throughput": 8674.06, "total_tokens": 127531112} +{"current_steps": 189275, "total_steps": 204665, "loss": 0.0, "lr": 3.425613352653045e-08, "epoch": 4.624019739574426, "percentage": 92.48, "elapsed_time": "4:05:02", "remaining_time": "0:19:55", "throughput": 8674.09, "total_tokens": 127534440} +{"current_steps": 189280, "total_steps": 204665, "loss": 0.0, "lr": 3.423400774188945e-08, "epoch": 4.624141890406274, "percentage": 92.48, "elapsed_time": "4:05:03", "remaining_time": "0:19:55", "throughput": 8674.12, "total_tokens": 127538024} +{"current_steps": 189285, "total_steps": 204665, "loss": 0.0001, "lr": 3.421188898055838e-08, "epoch": 4.624264041238121, "percentage": 92.49, "elapsed_time": "4:05:03", "remaining_time": "0:19:54", "throughput": 8674.14, "total_tokens": 127541224} +{"current_steps": 189290, "total_steps": 204665, "loss": 0.0001, "lr": 3.4189777242698447e-08, "epoch": 4.624386192069968, "percentage": 92.49, "elapsed_time": "4:05:03", "remaining_time": "0:19:54", "throughput": 8674.17, "total_tokens": 127544808} +{"current_steps": 189295, "total_steps": 204665, "loss": 0.0, "lr": 3.416767252847008e-08, "epoch": 4.624508342901815, "percentage": 92.49, "elapsed_time": "4:05:04", "remaining_time": "0:19:53", "throughput": 8674.21, "total_tokens": 127548328} +{"current_steps": 189300, "total_steps": 204665, "loss": 0.0, "lr": 3.4145574838034264e-08, "epoch": 4.624630493733663, "percentage": 92.49, "elapsed_time": "4:05:04", "remaining_time": "0:19:53", "throughput": 8674.22, "total_tokens": 127551464} +{"current_steps": 189305, "total_steps": 204665, "loss": 0.0, "lr": 3.412348417155153e-08, "epoch": 4.624752644565509, "percentage": 92.5, "elapsed_time": "4:05:05", "remaining_time": "0:19:53", "throughput": 8674.24, "total_tokens": 127554792} +{"current_steps": 189310, "total_steps": 204665, "loss": 0.0, "lr": 3.410140052918275e-08, "epoch": 4.624874795397357, "percentage": 92.5, "elapsed_time": "4:05:05", "remaining_time": "0:19:52", "throughput": 8674.26, "total_tokens": 127558056} +{"current_steps": 189315, "total_steps": 204665, "loss": 0.0, "lr": 3.4079323911088256e-08, "epoch": 4.624996946229204, "percentage": 92.5, "elapsed_time": "4:05:05", "remaining_time": "0:19:52", "throughput": 8674.26, "total_tokens": 127561064} +{"current_steps": 189320, "total_steps": 204665, "loss": 0.0, "lr": 3.4057254317428674e-08, "epoch": 4.6251190970610505, "percentage": 92.5, "elapsed_time": "4:05:06", "remaining_time": "0:19:51", "throughput": 8674.27, "total_tokens": 127564200} +{"current_steps": 189325, "total_steps": 204665, "loss": 0.0, "lr": 3.4035191748364665e-08, "epoch": 4.625241247892898, "percentage": 92.5, "elapsed_time": "4:05:06", "remaining_time": "0:19:51", "throughput": 8674.29, "total_tokens": 127567400} +{"current_steps": 189330, "total_steps": 204665, "loss": 0.0, "lr": 3.4013136204056436e-08, "epoch": 4.625363398724746, "percentage": 92.51, "elapsed_time": "4:05:06", "remaining_time": "0:19:51", "throughput": 8674.3, "total_tokens": 127570600} +{"current_steps": 189335, "total_steps": 204665, "loss": 0.0, "lr": 3.3991087684664634e-08, "epoch": 4.6254855495565925, "percentage": 92.51, "elapsed_time": "4:05:07", "remaining_time": "0:19:50", "throughput": 8674.35, "total_tokens": 127574312} +{"current_steps": 189340, "total_steps": 204665, "loss": 0.0, "lr": 3.3969046190349346e-08, "epoch": 4.625607700388439, "percentage": 92.51, "elapsed_time": "4:05:07", "remaining_time": "0:19:50", "throughput": 8674.37, "total_tokens": 127577704} +{"current_steps": 189345, "total_steps": 204665, "loss": 0.0, "lr": 3.3947011721271126e-08, "epoch": 4.625729851220287, "percentage": 92.51, "elapsed_time": "4:05:07", "remaining_time": "0:19:50", "throughput": 8674.41, "total_tokens": 127581352} +{"current_steps": 189350, "total_steps": 204665, "loss": 0.0, "lr": 3.392498427758994e-08, "epoch": 4.625852002052134, "percentage": 92.52, "elapsed_time": "4:05:08", "remaining_time": "0:19:49", "throughput": 8674.45, "total_tokens": 127585000} +{"current_steps": 189355, "total_steps": 204665, "loss": 0.0, "lr": 3.390296385946623e-08, "epoch": 4.625974152883981, "percentage": 92.52, "elapsed_time": "4:05:08", "remaining_time": "0:19:49", "throughput": 8674.46, "total_tokens": 127588136} +{"current_steps": 189360, "total_steps": 204665, "loss": 0.0, "lr": 3.3880950467059964e-08, "epoch": 4.626096303715828, "percentage": 92.52, "elapsed_time": "4:05:08", "remaining_time": "0:19:48", "throughput": 8674.47, "total_tokens": 127591336} +{"current_steps": 189365, "total_steps": 204665, "loss": 0.0, "lr": 3.385894410053125e-08, "epoch": 4.626218454547676, "percentage": 92.52, "elapsed_time": "4:05:09", "remaining_time": "0:19:48", "throughput": 8674.5, "total_tokens": 127594856} +{"current_steps": 189370, "total_steps": 204665, "loss": 0.0, "lr": 3.383694476004018e-08, "epoch": 4.626340605379522, "percentage": 92.53, "elapsed_time": "4:05:09", "remaining_time": "0:19:48", "throughput": 8674.52, "total_tokens": 127597992} +{"current_steps": 189375, "total_steps": 204665, "loss": 0.0, "lr": 3.381495244574673e-08, "epoch": 4.62646275621137, "percentage": 92.53, "elapsed_time": "4:05:09", "remaining_time": "0:19:47", "throughput": 8674.52, "total_tokens": 127601000} +{"current_steps": 189380, "total_steps": 204665, "loss": 0.0, "lr": 3.379296715781066e-08, "epoch": 4.626584907043217, "percentage": 92.53, "elapsed_time": "4:05:10", "remaining_time": "0:19:47", "throughput": 8674.54, "total_tokens": 127604328} +{"current_steps": 189385, "total_steps": 204665, "loss": 0.0, "lr": 3.377098889639229e-08, "epoch": 4.626707057875064, "percentage": 92.53, "elapsed_time": "4:05:10", "remaining_time": "0:19:46", "throughput": 8674.54, "total_tokens": 127607336} +{"current_steps": 189390, "total_steps": 204665, "loss": 0.0, "lr": 3.374901766165095e-08, "epoch": 4.626829208706911, "percentage": 92.54, "elapsed_time": "4:05:10", "remaining_time": "0:19:46", "throughput": 8674.56, "total_tokens": 127610728} +{"current_steps": 189395, "total_steps": 204665, "loss": 0.0001, "lr": 3.372705345374671e-08, "epoch": 4.626951359538759, "percentage": 92.54, "elapsed_time": "4:05:11", "remaining_time": "0:19:46", "throughput": 8674.58, "total_tokens": 127613928} +{"current_steps": 189400, "total_steps": 204665, "loss": 0.0, "lr": 3.370509627283924e-08, "epoch": 4.6270735103706055, "percentage": 92.54, "elapsed_time": "4:05:11", "remaining_time": "0:19:45", "throughput": 8674.58, "total_tokens": 127616936} +{"current_steps": 189405, "total_steps": 204665, "loss": 0.0, "lr": 3.368314611908829e-08, "epoch": 4.627195661202453, "percentage": 92.54, "elapsed_time": "4:05:11", "remaining_time": "0:19:45", "throughput": 8674.6, "total_tokens": 127620200} +{"current_steps": 189410, "total_steps": 204665, "loss": 0.0, "lr": 3.3661202992653294e-08, "epoch": 4.6273178120343, "percentage": 92.55, "elapsed_time": "4:05:12", "remaining_time": "0:19:44", "throughput": 8674.62, "total_tokens": 127623400} +{"current_steps": 189415, "total_steps": 204665, "loss": 0.0, "lr": 3.3639266893693894e-08, "epoch": 4.627439962866147, "percentage": 92.55, "elapsed_time": "4:05:12", "remaining_time": "0:19:44", "throughput": 8674.63, "total_tokens": 127626664} +{"current_steps": 189420, "total_steps": 204665, "loss": 0.0, "lr": 3.361733782236986e-08, "epoch": 4.627562113697994, "percentage": 92.55, "elapsed_time": "4:05:12", "remaining_time": "0:19:44", "throughput": 8674.65, "total_tokens": 127629864} +{"current_steps": 189425, "total_steps": 204665, "loss": 0.0, "lr": 3.3595415778840284e-08, "epoch": 4.627684264529842, "percentage": 92.55, "elapsed_time": "4:05:13", "remaining_time": "0:19:43", "throughput": 8674.69, "total_tokens": 127633512} +{"current_steps": 189430, "total_steps": 204665, "loss": 0.0, "lr": 3.357350076326493e-08, "epoch": 4.627806415361689, "percentage": 92.56, "elapsed_time": "4:05:13", "remaining_time": "0:19:43", "throughput": 8674.71, "total_tokens": 127636840} +{"current_steps": 189435, "total_steps": 204665, "loss": 0.0, "lr": 3.355159277580289e-08, "epoch": 4.627928566193535, "percentage": 92.56, "elapsed_time": "4:05:14", "remaining_time": "0:19:42", "throughput": 8674.76, "total_tokens": 127640680} +{"current_steps": 189440, "total_steps": 204665, "loss": 0.0, "lr": 3.352969181661358e-08, "epoch": 4.628050717025383, "percentage": 92.56, "elapsed_time": "4:05:14", "remaining_time": "0:19:42", "throughput": 8674.79, "total_tokens": 127644072} +{"current_steps": 189445, "total_steps": 204665, "loss": 0.0, "lr": 3.350779788585645e-08, "epoch": 4.62817286785723, "percentage": 92.56, "elapsed_time": "4:05:14", "remaining_time": "0:19:42", "throughput": 8674.81, "total_tokens": 127647336} +{"current_steps": 189450, "total_steps": 204665, "loss": 0.0, "lr": 3.348591098369047e-08, "epoch": 4.6282950186890774, "percentage": 92.57, "elapsed_time": "4:05:15", "remaining_time": "0:19:41", "throughput": 8674.85, "total_tokens": 127651048} +{"current_steps": 189455, "total_steps": 204665, "loss": 0.0, "lr": 3.346403111027507e-08, "epoch": 4.628417169520924, "percentage": 92.57, "elapsed_time": "4:05:15", "remaining_time": "0:19:41", "throughput": 8674.87, "total_tokens": 127654312} +{"current_steps": 189460, "total_steps": 204665, "loss": 0.0, "lr": 3.344215826576913e-08, "epoch": 4.628539320352772, "percentage": 92.57, "elapsed_time": "4:05:15", "remaining_time": "0:19:41", "throughput": 8674.87, "total_tokens": 127657320} +{"current_steps": 189465, "total_steps": 204665, "loss": 0.0, "lr": 3.342029245033162e-08, "epoch": 4.628661471184619, "percentage": 92.57, "elapsed_time": "4:05:16", "remaining_time": "0:19:40", "throughput": 8674.9, "total_tokens": 127660712} +{"current_steps": 189470, "total_steps": 204665, "loss": 0.0, "lr": 3.339843366412198e-08, "epoch": 4.628783622016466, "percentage": 92.58, "elapsed_time": "4:05:16", "remaining_time": "0:19:40", "throughput": 8674.93, "total_tokens": 127664296} +{"current_steps": 189475, "total_steps": 204665, "loss": 0.0313, "lr": 3.337658190729864e-08, "epoch": 4.628905772848313, "percentage": 92.58, "elapsed_time": "4:05:16", "remaining_time": "0:19:39", "throughput": 8674.94, "total_tokens": 127667432} +{"current_steps": 189480, "total_steps": 204665, "loss": 0.0, "lr": 3.335473718002102e-08, "epoch": 4.629027923680161, "percentage": 92.58, "elapsed_time": "4:05:17", "remaining_time": "0:19:39", "throughput": 8674.95, "total_tokens": 127670568} +{"current_steps": 189485, "total_steps": 204665, "loss": 0.0, "lr": 3.333289948244755e-08, "epoch": 4.629150074512007, "percentage": 92.58, "elapsed_time": "4:05:17", "remaining_time": "0:19:39", "throughput": 8675.01, "total_tokens": 127674472} +{"current_steps": 189490, "total_steps": 204665, "loss": 0.0, "lr": 3.331106881473744e-08, "epoch": 4.629272225343855, "percentage": 92.59, "elapsed_time": "4:05:17", "remaining_time": "0:19:38", "throughput": 8675.03, "total_tokens": 127677736} +{"current_steps": 189495, "total_steps": 204665, "loss": 0.0631, "lr": 3.328924517704912e-08, "epoch": 4.629394376175702, "percentage": 92.59, "elapsed_time": "4:05:18", "remaining_time": "0:19:38", "throughput": 8675.05, "total_tokens": 127681128} +{"current_steps": 189500, "total_steps": 204665, "loss": 0.0, "lr": 3.326742856954135e-08, "epoch": 4.629516527007549, "percentage": 92.59, "elapsed_time": "4:05:18", "remaining_time": "0:19:37", "throughput": 8675.05, "total_tokens": 127684072} +{"current_steps": 189505, "total_steps": 204665, "loss": 0.0, "lr": 3.324561899237299e-08, "epoch": 4.629638677839396, "percentage": 92.59, "elapsed_time": "4:05:18", "remaining_time": "0:19:37", "throughput": 8675.06, "total_tokens": 127687208} +{"current_steps": 189510, "total_steps": 204665, "loss": 0.0, "lr": 3.322381644570238e-08, "epoch": 4.629760828671243, "percentage": 92.6, "elapsed_time": "4:05:19", "remaining_time": "0:19:37", "throughput": 8675.09, "total_tokens": 127690600} +{"current_steps": 189515, "total_steps": 204665, "loss": 0.0, "lr": 3.3202020929688376e-08, "epoch": 4.6298829795030905, "percentage": 92.6, "elapsed_time": "4:05:19", "remaining_time": "0:19:36", "throughput": 8675.12, "total_tokens": 127694120} +{"current_steps": 189520, "total_steps": 204665, "loss": 0.0, "lr": 3.31802324444892e-08, "epoch": 4.630005130334937, "percentage": 92.6, "elapsed_time": "4:05:19", "remaining_time": "0:19:36", "throughput": 8675.13, "total_tokens": 127697128} +{"current_steps": 189525, "total_steps": 204665, "loss": 0.0, "lr": 3.31584509902636e-08, "epoch": 4.630127281166785, "percentage": 92.6, "elapsed_time": "4:05:20", "remaining_time": "0:19:35", "throughput": 8675.15, "total_tokens": 127700456} +{"current_steps": 189530, "total_steps": 204665, "loss": 0.0, "lr": 3.3136676567169566e-08, "epoch": 4.630249431998632, "percentage": 92.6, "elapsed_time": "4:05:20", "remaining_time": "0:19:35", "throughput": 8675.17, "total_tokens": 127703848} +{"current_steps": 189535, "total_steps": 204665, "loss": 0.0, "lr": 3.3114909175365635e-08, "epoch": 4.630371582830479, "percentage": 92.61, "elapsed_time": "4:05:20", "remaining_time": "0:19:35", "throughput": 8675.2, "total_tokens": 127707240} +{"current_steps": 189540, "total_steps": 204665, "loss": 0.0, "lr": 3.3093148815010355e-08, "epoch": 4.630493733662326, "percentage": 92.61, "elapsed_time": "4:05:21", "remaining_time": "0:19:34", "throughput": 8675.26, "total_tokens": 127711208} +{"current_steps": 189545, "total_steps": 204665, "loss": 0.0, "lr": 3.30713954862617e-08, "epoch": 4.630615884494174, "percentage": 92.61, "elapsed_time": "4:05:21", "remaining_time": "0:19:34", "throughput": 8675.28, "total_tokens": 127714664} +{"current_steps": 189550, "total_steps": 204665, "loss": 0.0619, "lr": 3.304964918927788e-08, "epoch": 4.63073803532602, "percentage": 92.61, "elapsed_time": "4:05:22", "remaining_time": "0:19:33", "throughput": 8675.31, "total_tokens": 127718120} +{"current_steps": 189555, "total_steps": 204665, "loss": 0.0, "lr": 3.3027909924217114e-08, "epoch": 4.630860186157868, "percentage": 92.62, "elapsed_time": "4:05:22", "remaining_time": "0:19:33", "throughput": 8675.37, "total_tokens": 127722088} +{"current_steps": 189560, "total_steps": 204665, "loss": 0.0, "lr": 3.300617769123748e-08, "epoch": 4.630982336989715, "percentage": 92.62, "elapsed_time": "4:05:22", "remaining_time": "0:19:33", "throughput": 8675.39, "total_tokens": 127725416} +{"current_steps": 189565, "total_steps": 204665, "loss": 0.0, "lr": 3.2984452490497084e-08, "epoch": 4.631104487821562, "percentage": 92.62, "elapsed_time": "4:05:23", "remaining_time": "0:19:32", "throughput": 8675.38, "total_tokens": 127728232} +{"current_steps": 189570, "total_steps": 204665, "loss": 0.0, "lr": 3.29627343221538e-08, "epoch": 4.631226638653409, "percentage": 92.62, "elapsed_time": "4:05:23", "remaining_time": "0:19:32", "throughput": 8675.4, "total_tokens": 127731496} +{"current_steps": 189575, "total_steps": 204665, "loss": 0.0, "lr": 3.29410231863656e-08, "epoch": 4.631348789485257, "percentage": 92.63, "elapsed_time": "4:05:23", "remaining_time": "0:19:31", "throughput": 8675.4, "total_tokens": 127734440} +{"current_steps": 189580, "total_steps": 204665, "loss": 0.0, "lr": 3.291931908329026e-08, "epoch": 4.6314709403171035, "percentage": 92.63, "elapsed_time": "4:05:24", "remaining_time": "0:19:31", "throughput": 8675.43, "total_tokens": 127737832} +{"current_steps": 189585, "total_steps": 204665, "loss": 0.0, "lr": 3.289762201308599e-08, "epoch": 4.63159309114895, "percentage": 92.63, "elapsed_time": "4:05:24", "remaining_time": "0:19:31", "throughput": 8675.47, "total_tokens": 127741480} +{"current_steps": 189590, "total_steps": 204665, "loss": 0.0, "lr": 3.28759319759101e-08, "epoch": 4.631715241980798, "percentage": 92.63, "elapsed_time": "4:05:24", "remaining_time": "0:19:30", "throughput": 8675.47, "total_tokens": 127744424} +{"current_steps": 189595, "total_steps": 204665, "loss": 0.0584, "lr": 3.2854248971920574e-08, "epoch": 4.6318373928126455, "percentage": 92.64, "elapsed_time": "4:05:25", "remaining_time": "0:19:30", "throughput": 8675.47, "total_tokens": 127747304} +{"current_steps": 189600, "total_steps": 204665, "loss": 0.0, "lr": 3.283257300127529e-08, "epoch": 4.631959543644492, "percentage": 92.64, "elapsed_time": "4:05:25", "remaining_time": "0:19:30", "throughput": 8675.48, "total_tokens": 127750376} +{"current_steps": 189605, "total_steps": 204665, "loss": 0.0, "lr": 3.281090406413145e-08, "epoch": 4.632081694476339, "percentage": 92.64, "elapsed_time": "4:05:25", "remaining_time": "0:19:29", "throughput": 8675.52, "total_tokens": 127754152} +{"current_steps": 189610, "total_steps": 204665, "loss": 0.0, "lr": 3.2789242160647046e-08, "epoch": 4.632203845308187, "percentage": 92.64, "elapsed_time": "4:05:26", "remaining_time": "0:19:29", "throughput": 8675.54, "total_tokens": 127757480} +{"current_steps": 189615, "total_steps": 204665, "loss": 0.0, "lr": 3.276758729097928e-08, "epoch": 4.632325996140033, "percentage": 92.65, "elapsed_time": "4:05:26", "remaining_time": "0:19:28", "throughput": 8675.56, "total_tokens": 127760680} +{"current_steps": 189620, "total_steps": 204665, "loss": 0.0, "lr": 3.274593945528581e-08, "epoch": 4.632448146971881, "percentage": 92.65, "elapsed_time": "4:05:26", "remaining_time": "0:19:28", "throughput": 8675.57, "total_tokens": 127763880} +{"current_steps": 189625, "total_steps": 204665, "loss": 0.0, "lr": 3.272429865372406e-08, "epoch": 4.632570297803728, "percentage": 92.65, "elapsed_time": "4:05:27", "remaining_time": "0:19:28", "throughput": 8675.62, "total_tokens": 127767592} +{"current_steps": 189630, "total_steps": 204665, "loss": 0.0, "lr": 3.270266488645124e-08, "epoch": 4.632692448635575, "percentage": 92.65, "elapsed_time": "4:05:27", "remaining_time": "0:19:27", "throughput": 8675.62, "total_tokens": 127770664} +{"current_steps": 189635, "total_steps": 204665, "loss": 0.0, "lr": 3.2681038153624995e-08, "epoch": 4.632814599467422, "percentage": 92.66, "elapsed_time": "4:05:27", "remaining_time": "0:19:27", "throughput": 8675.63, "total_tokens": 127773736} +{"current_steps": 189640, "total_steps": 204665, "loss": 0.0, "lr": 3.265941845540232e-08, "epoch": 4.63293675029927, "percentage": 92.66, "elapsed_time": "4:05:28", "remaining_time": "0:19:26", "throughput": 8675.66, "total_tokens": 127777128} +{"current_steps": 189645, "total_steps": 204665, "loss": 0.0, "lr": 3.2637805791940645e-08, "epoch": 4.6330589011311165, "percentage": 92.66, "elapsed_time": "4:05:28", "remaining_time": "0:19:26", "throughput": 8675.69, "total_tokens": 127780648} +{"current_steps": 189650, "total_steps": 204665, "loss": 0.0, "lr": 3.2616200163396834e-08, "epoch": 4.633181051962964, "percentage": 92.66, "elapsed_time": "4:05:28", "remaining_time": "0:19:26", "throughput": 8675.73, "total_tokens": 127784360} +{"current_steps": 189655, "total_steps": 204665, "loss": 0.0001, "lr": 3.259460156992844e-08, "epoch": 4.633303202794811, "percentage": 92.67, "elapsed_time": "4:05:29", "remaining_time": "0:19:25", "throughput": 8675.74, "total_tokens": 127787496} +{"current_steps": 189660, "total_steps": 204665, "loss": 0.0003, "lr": 3.2573010011692105e-08, "epoch": 4.6334253536266585, "percentage": 92.67, "elapsed_time": "4:05:29", "remaining_time": "0:19:25", "throughput": 8675.75, "total_tokens": 127790504} +{"current_steps": 189665, "total_steps": 204665, "loss": 0.0, "lr": 3.2551425488845264e-08, "epoch": 4.633547504458505, "percentage": 92.67, "elapsed_time": "4:05:29", "remaining_time": "0:19:24", "throughput": 8675.78, "total_tokens": 127793960} +{"current_steps": 189670, "total_steps": 204665, "loss": 0.0, "lr": 3.252984800154446e-08, "epoch": 4.633669655290353, "percentage": 92.67, "elapsed_time": "4:05:30", "remaining_time": "0:19:24", "throughput": 8675.82, "total_tokens": 127797736} +{"current_steps": 189675, "total_steps": 204665, "loss": 0.0, "lr": 3.250827754994701e-08, "epoch": 4.6337918061222, "percentage": 92.68, "elapsed_time": "4:05:30", "remaining_time": "0:19:24", "throughput": 8675.86, "total_tokens": 127801320} +{"current_steps": 189680, "total_steps": 204665, "loss": 0.0, "lr": 3.248671413420956e-08, "epoch": 4.633913956954046, "percentage": 92.68, "elapsed_time": "4:05:31", "remaining_time": "0:19:23", "throughput": 8675.88, "total_tokens": 127804584} +{"current_steps": 189685, "total_steps": 204665, "loss": 0.0, "lr": 3.246515775448877e-08, "epoch": 4.634036107785894, "percentage": 92.68, "elapsed_time": "4:05:31", "remaining_time": "0:19:23", "throughput": 8675.89, "total_tokens": 127807784} +{"current_steps": 189690, "total_steps": 204665, "loss": 0.0, "lr": 3.2443608410941624e-08, "epoch": 4.634158258617742, "percentage": 92.68, "elapsed_time": "4:05:31", "remaining_time": "0:19:22", "throughput": 8675.9, "total_tokens": 127810920} +{"current_steps": 189695, "total_steps": 204665, "loss": 0.0, "lr": 3.2422066103725e-08, "epoch": 4.634280409449588, "percentage": 92.69, "elapsed_time": "4:05:32", "remaining_time": "0:19:22", "throughput": 8675.93, "total_tokens": 127814440} +{"current_steps": 189700, "total_steps": 204665, "loss": 0.0, "lr": 3.24005308329951e-08, "epoch": 4.634402560281435, "percentage": 92.69, "elapsed_time": "4:05:32", "remaining_time": "0:19:22", "throughput": 8675.96, "total_tokens": 127817832} +{"current_steps": 189705, "total_steps": 204665, "loss": 0.0183, "lr": 3.237900259890902e-08, "epoch": 4.634524711113283, "percentage": 92.69, "elapsed_time": "4:05:32", "remaining_time": "0:19:21", "throughput": 8675.97, "total_tokens": 127820968} +{"current_steps": 189710, "total_steps": 204665, "loss": 0.0, "lr": 3.2357481401622976e-08, "epoch": 4.6346468619451295, "percentage": 92.69, "elapsed_time": "4:05:33", "remaining_time": "0:19:21", "throughput": 8675.99, "total_tokens": 127824232} +{"current_steps": 189715, "total_steps": 204665, "loss": 0.0, "lr": 3.233596724129361e-08, "epoch": 4.634769012776977, "percentage": 92.7, "elapsed_time": "4:05:33", "remaining_time": "0:19:21", "throughput": 8676.0, "total_tokens": 127827368} +{"current_steps": 189720, "total_steps": 204665, "loss": 0.0, "lr": 3.231446011807737e-08, "epoch": 4.634891163608824, "percentage": 92.7, "elapsed_time": "4:05:33", "remaining_time": "0:19:20", "throughput": 8676.02, "total_tokens": 127830760} +{"current_steps": 189725, "total_steps": 204665, "loss": 0.0, "lr": 3.229296003213056e-08, "epoch": 4.6350133144406716, "percentage": 92.7, "elapsed_time": "4:05:34", "remaining_time": "0:19:20", "throughput": 8676.04, "total_tokens": 127834024} +{"current_steps": 189730, "total_steps": 204665, "loss": 0.0, "lr": 3.227146698360983e-08, "epoch": 4.635135465272518, "percentage": 92.7, "elapsed_time": "4:05:34", "remaining_time": "0:19:19", "throughput": 8676.06, "total_tokens": 127837352} +{"current_steps": 189735, "total_steps": 204665, "loss": 0.0, "lr": 3.224998097267106e-08, "epoch": 4.635257616104366, "percentage": 92.71, "elapsed_time": "4:05:34", "remaining_time": "0:19:19", "throughput": 8676.1, "total_tokens": 127841064} +{"current_steps": 189740, "total_steps": 204665, "loss": 0.0, "lr": 3.222850199947091e-08, "epoch": 4.635379766936213, "percentage": 92.71, "elapsed_time": "4:05:35", "remaining_time": "0:19:19", "throughput": 8676.12, "total_tokens": 127844264} +{"current_steps": 189745, "total_steps": 204665, "loss": 0.0, "lr": 3.2207030064165255e-08, "epoch": 4.63550191776806, "percentage": 92.71, "elapsed_time": "4:05:35", "remaining_time": "0:19:18", "throughput": 8676.14, "total_tokens": 127847656} +{"current_steps": 189750, "total_steps": 204665, "loss": 0.0, "lr": 3.21855651669104e-08, "epoch": 4.635624068599907, "percentage": 92.71, "elapsed_time": "4:05:35", "remaining_time": "0:19:18", "throughput": 8676.16, "total_tokens": 127850920} +{"current_steps": 189755, "total_steps": 204665, "loss": 0.0, "lr": 3.2164107307862456e-08, "epoch": 4.635746219431755, "percentage": 92.71, "elapsed_time": "4:05:36", "remaining_time": "0:19:17", "throughput": 8676.2, "total_tokens": 127854632} +{"current_steps": 189760, "total_steps": 204665, "loss": 0.0, "lr": 3.21426564871774e-08, "epoch": 4.635868370263601, "percentage": 92.72, "elapsed_time": "4:05:36", "remaining_time": "0:19:17", "throughput": 8676.22, "total_tokens": 127857896} +{"current_steps": 189765, "total_steps": 204665, "loss": 0.0, "lr": 3.212121270501134e-08, "epoch": 4.635990521095449, "percentage": 92.72, "elapsed_time": "4:05:36", "remaining_time": "0:19:17", "throughput": 8676.25, "total_tokens": 127861416} +{"current_steps": 189770, "total_steps": 204665, "loss": 0.0002, "lr": 3.209977596152025e-08, "epoch": 4.636112671927296, "percentage": 92.72, "elapsed_time": "4:05:37", "remaining_time": "0:19:16", "throughput": 8676.27, "total_tokens": 127864616} +{"current_steps": 189775, "total_steps": 204665, "loss": 0.0, "lr": 3.207834625685968e-08, "epoch": 4.636234822759143, "percentage": 92.72, "elapsed_time": "4:05:37", "remaining_time": "0:19:16", "throughput": 8676.28, "total_tokens": 127867816} +{"current_steps": 189780, "total_steps": 204665, "loss": 0.0, "lr": 3.205692359118595e-08, "epoch": 4.63635697359099, "percentage": 92.73, "elapsed_time": "4:05:37", "remaining_time": "0:19:15", "throughput": 8676.32, "total_tokens": 127871464} +{"current_steps": 189785, "total_steps": 204665, "loss": 0.0, "lr": 3.203550796465437e-08, "epoch": 4.636479124422837, "percentage": 92.73, "elapsed_time": "4:05:38", "remaining_time": "0:19:15", "throughput": 8676.33, "total_tokens": 127874664} +{"current_steps": 189790, "total_steps": 204665, "loss": 0.0, "lr": 3.2014099377421165e-08, "epoch": 4.636601275254685, "percentage": 92.73, "elapsed_time": "4:05:38", "remaining_time": "0:19:15", "throughput": 8676.35, "total_tokens": 127877864} +{"current_steps": 189795, "total_steps": 204665, "loss": 0.0, "lr": 3.199269782964165e-08, "epoch": 4.636723426086531, "percentage": 92.73, "elapsed_time": "4:05:39", "remaining_time": "0:19:14", "throughput": 8676.35, "total_tokens": 127880872} +{"current_steps": 189800, "total_steps": 204665, "loss": 0.0, "lr": 3.197130332147169e-08, "epoch": 4.636845576918379, "percentage": 92.74, "elapsed_time": "4:05:39", "remaining_time": "0:19:14", "throughput": 8676.38, "total_tokens": 127884328} +{"current_steps": 189805, "total_steps": 204665, "loss": 0.0, "lr": 3.194991585306661e-08, "epoch": 4.636967727750226, "percentage": 92.74, "elapsed_time": "4:05:39", "remaining_time": "0:19:13", "throughput": 8676.4, "total_tokens": 127887528} +{"current_steps": 189810, "total_steps": 204665, "loss": 0.0, "lr": 3.1928535424582294e-08, "epoch": 4.637089878582073, "percentage": 92.74, "elapsed_time": "4:05:40", "remaining_time": "0:19:13", "throughput": 8676.39, "total_tokens": 127890408} +{"current_steps": 189815, "total_steps": 204665, "loss": 0.0, "lr": 3.190716203617394e-08, "epoch": 4.63721202941392, "percentage": 92.74, "elapsed_time": "4:05:40", "remaining_time": "0:19:13", "throughput": 8676.43, "total_tokens": 127893992} +{"current_steps": 189820, "total_steps": 204665, "loss": 0.0675, "lr": 3.1885795687997094e-08, "epoch": 4.637334180245768, "percentage": 92.75, "elapsed_time": "4:05:40", "remaining_time": "0:19:12", "throughput": 8676.44, "total_tokens": 127897192} +{"current_steps": 189825, "total_steps": 204665, "loss": 0.0, "lr": 3.1864436380207196e-08, "epoch": 4.6374563310776145, "percentage": 92.75, "elapsed_time": "4:05:41", "remaining_time": "0:19:12", "throughput": 8676.46, "total_tokens": 127900520} +{"current_steps": 189830, "total_steps": 204665, "loss": 0.0, "lr": 3.1843084112959437e-08, "epoch": 4.637578481909462, "percentage": 92.75, "elapsed_time": "4:05:41", "remaining_time": "0:19:12", "throughput": 8676.5, "total_tokens": 127904168} +{"current_steps": 189835, "total_steps": 204665, "loss": 0.0, "lr": 3.182173888640927e-08, "epoch": 4.637700632741309, "percentage": 92.75, "elapsed_time": "4:05:41", "remaining_time": "0:19:11", "throughput": 8676.53, "total_tokens": 127907560} +{"current_steps": 189840, "total_steps": 204665, "loss": 0.0, "lr": 3.180040070071166e-08, "epoch": 4.6378227835731565, "percentage": 92.76, "elapsed_time": "4:05:42", "remaining_time": "0:19:11", "throughput": 8676.55, "total_tokens": 127910824} +{"current_steps": 189845, "total_steps": 204665, "loss": 0.0, "lr": 3.1779069556022055e-08, "epoch": 4.637944934405003, "percentage": 92.76, "elapsed_time": "4:05:42", "remaining_time": "0:19:10", "throughput": 8676.57, "total_tokens": 127914152} +{"current_steps": 189850, "total_steps": 204665, "loss": 0.0, "lr": 3.175774545249554e-08, "epoch": 4.638067085236851, "percentage": 92.76, "elapsed_time": "4:05:42", "remaining_time": "0:19:10", "throughput": 8676.58, "total_tokens": 127917416} +{"current_steps": 189855, "total_steps": 204665, "loss": 0.0, "lr": 3.1736428390287005e-08, "epoch": 4.638189236068698, "percentage": 92.76, "elapsed_time": "4:05:43", "remaining_time": "0:19:10", "throughput": 8676.64, "total_tokens": 127921256} +{"current_steps": 189860, "total_steps": 204665, "loss": 0.0, "lr": 3.171511836955176e-08, "epoch": 4.638311386900545, "percentage": 92.77, "elapsed_time": "4:05:43", "remaining_time": "0:19:09", "throughput": 8676.65, "total_tokens": 127924456} +{"current_steps": 189865, "total_steps": 204665, "loss": 0.0, "lr": 3.1693815390444466e-08, "epoch": 4.638433537732392, "percentage": 92.77, "elapsed_time": "4:05:43", "remaining_time": "0:19:09", "throughput": 8676.68, "total_tokens": 127927848} +{"current_steps": 189870, "total_steps": 204665, "loss": 0.0, "lr": 3.1672519453120325e-08, "epoch": 4.638555688564239, "percentage": 92.77, "elapsed_time": "4:05:44", "remaining_time": "0:19:08", "throughput": 8676.71, "total_tokens": 127931368} +{"current_steps": 189875, "total_steps": 204665, "loss": 0.0, "lr": 3.1651230557733996e-08, "epoch": 4.638677839396086, "percentage": 92.77, "elapsed_time": "4:05:44", "remaining_time": "0:19:08", "throughput": 8676.72, "total_tokens": 127934568} +{"current_steps": 189880, "total_steps": 204665, "loss": 0.0, "lr": 3.162994870444036e-08, "epoch": 4.638799990227933, "percentage": 92.78, "elapsed_time": "4:05:44", "remaining_time": "0:19:08", "throughput": 8676.72, "total_tokens": 127937384} +{"current_steps": 189885, "total_steps": 204665, "loss": 0.0, "lr": 3.1608673893394164e-08, "epoch": 4.638922141059781, "percentage": 92.78, "elapsed_time": "4:05:45", "remaining_time": "0:19:07", "throughput": 8676.77, "total_tokens": 127941352} +{"current_steps": 189890, "total_steps": 204665, "loss": 0.0, "lr": 3.158740612475019e-08, "epoch": 4.6390442918916275, "percentage": 92.78, "elapsed_time": "4:05:45", "remaining_time": "0:19:07", "throughput": 8676.78, "total_tokens": 127944424} +{"current_steps": 189895, "total_steps": 204665, "loss": 0.0, "lr": 3.1566145398663094e-08, "epoch": 4.639166442723475, "percentage": 92.78, "elapsed_time": "4:05:45", "remaining_time": "0:19:06", "throughput": 8676.82, "total_tokens": 127948136} +{"current_steps": 189900, "total_steps": 204665, "loss": 0.0007, "lr": 3.1544891715287405e-08, "epoch": 4.639288593555322, "percentage": 92.79, "elapsed_time": "4:05:46", "remaining_time": "0:19:06", "throughput": 8676.85, "total_tokens": 127951592} +{"current_steps": 189905, "total_steps": 204665, "loss": 0.0001, "lr": 3.1523645074777783e-08, "epoch": 4.6394107443871695, "percentage": 92.79, "elapsed_time": "4:05:46", "remaining_time": "0:19:06", "throughput": 8676.86, "total_tokens": 127954728} +{"current_steps": 189910, "total_steps": 204665, "loss": 0.0, "lr": 3.150240547728877e-08, "epoch": 4.639532895219016, "percentage": 92.79, "elapsed_time": "4:05:47", "remaining_time": "0:19:05", "throughput": 8676.92, "total_tokens": 127958696} +{"current_steps": 189915, "total_steps": 204665, "loss": 0.0, "lr": 3.1481172922974584e-08, "epoch": 4.639655046050864, "percentage": 92.79, "elapsed_time": "4:05:47", "remaining_time": "0:19:05", "throughput": 8676.95, "total_tokens": 127962152} +{"current_steps": 189920, "total_steps": 204665, "loss": 0.0, "lr": 3.145994741198998e-08, "epoch": 4.639777196882711, "percentage": 92.8, "elapsed_time": "4:05:47", "remaining_time": "0:19:04", "throughput": 8676.99, "total_tokens": 127965864} +{"current_steps": 189925, "total_steps": 204665, "loss": 0.0, "lr": 3.143872894448907e-08, "epoch": 4.639899347714558, "percentage": 92.8, "elapsed_time": "4:05:48", "remaining_time": "0:19:04", "throughput": 8677.01, "total_tokens": 127969192} +{"current_steps": 189930, "total_steps": 204665, "loss": 0.0537, "lr": 3.141751752062627e-08, "epoch": 4.640021498546405, "percentage": 92.8, "elapsed_time": "4:05:48", "remaining_time": "0:19:04", "throughput": 8677.02, "total_tokens": 127972328} +{"current_steps": 189935, "total_steps": 204665, "loss": 0.0, "lr": 3.1396313140555684e-08, "epoch": 4.640143649378253, "percentage": 92.8, "elapsed_time": "4:05:48", "remaining_time": "0:19:03", "throughput": 8677.04, "total_tokens": 127975592} +{"current_steps": 189940, "total_steps": 204665, "loss": 0.0164, "lr": 3.137511580443175e-08, "epoch": 4.640265800210099, "percentage": 92.81, "elapsed_time": "4:05:49", "remaining_time": "0:19:03", "throughput": 8677.06, "total_tokens": 127978984} +{"current_steps": 189945, "total_steps": 204665, "loss": 0.0, "lr": 3.135392551240856e-08, "epoch": 4.640387951041946, "percentage": 92.81, "elapsed_time": "4:05:49", "remaining_time": "0:19:03", "throughput": 8677.09, "total_tokens": 127982312} +{"current_steps": 189950, "total_steps": 204665, "loss": 0.0, "lr": 3.1332742264639996e-08, "epoch": 4.640510101873794, "percentage": 92.81, "elapsed_time": "4:05:49", "remaining_time": "0:19:02", "throughput": 8677.11, "total_tokens": 127985704} +{"current_steps": 189955, "total_steps": 204665, "loss": 0.0, "lr": 3.131156606128038e-08, "epoch": 4.640632252705641, "percentage": 92.81, "elapsed_time": "4:05:50", "remaining_time": "0:19:02", "throughput": 8677.12, "total_tokens": 127988712} +{"current_steps": 189960, "total_steps": 204665, "loss": 0.0, "lr": 3.129039690248359e-08, "epoch": 4.640754403537488, "percentage": 92.82, "elapsed_time": "4:05:50", "remaining_time": "0:19:01", "throughput": 8677.15, "total_tokens": 127992296} +{"current_steps": 189965, "total_steps": 204665, "loss": 0.0, "lr": 3.12692347884036e-08, "epoch": 4.640876554369335, "percentage": 92.82, "elapsed_time": "4:05:50", "remaining_time": "0:19:01", "throughput": 8677.17, "total_tokens": 127995496} +{"current_steps": 189970, "total_steps": 204665, "loss": 0.0001, "lr": 3.124807971919419e-08, "epoch": 4.6409987052011825, "percentage": 92.82, "elapsed_time": "4:05:51", "remaining_time": "0:19:01", "throughput": 8677.19, "total_tokens": 127998824} +{"current_steps": 189975, "total_steps": 204665, "loss": 0.0, "lr": 3.122693169500945e-08, "epoch": 4.641120856033029, "percentage": 92.82, "elapsed_time": "4:05:51", "remaining_time": "0:19:00", "throughput": 8677.21, "total_tokens": 128002088} +{"current_steps": 189980, "total_steps": 204665, "loss": 0.0, "lr": 3.120579071600282e-08, "epoch": 4.641243006864877, "percentage": 92.82, "elapsed_time": "4:05:51", "remaining_time": "0:19:00", "throughput": 8677.21, "total_tokens": 128005032} +{"current_steps": 189985, "total_steps": 204665, "loss": 0.0, "lr": 3.118465678232851e-08, "epoch": 4.641365157696724, "percentage": 92.83, "elapsed_time": "4:05:52", "remaining_time": "0:18:59", "throughput": 8677.26, "total_tokens": 128008872} +{"current_steps": 189990, "total_steps": 204665, "loss": 0.0, "lr": 3.1163529894139837e-08, "epoch": 4.641487308528571, "percentage": 92.83, "elapsed_time": "4:05:52", "remaining_time": "0:18:59", "throughput": 8677.28, "total_tokens": 128012200} +{"current_steps": 189995, "total_steps": 204665, "loss": 0.0, "lr": 3.1142410051590573e-08, "epoch": 4.641609459360418, "percentage": 92.83, "elapsed_time": "4:05:52", "remaining_time": "0:18:59", "throughput": 8677.29, "total_tokens": 128015400} +{"current_steps": 190000, "total_steps": 204665, "loss": 0.0, "lr": 3.112129725483425e-08, "epoch": 4.641731610192266, "percentage": 92.83, "elapsed_time": "4:05:53", "remaining_time": "0:18:58", "throughput": 8677.31, "total_tokens": 128018664} +{"current_steps": 190005, "total_steps": 204665, "loss": 0.0, "lr": 3.1100191504024545e-08, "epoch": 4.641853761024112, "percentage": 92.84, "elapsed_time": "4:05:53", "remaining_time": "0:18:58", "throughput": 8677.32, "total_tokens": 128021864} +{"current_steps": 190010, "total_steps": 204665, "loss": 0.0, "lr": 3.1079092799314757e-08, "epoch": 4.64197591185596, "percentage": 92.84, "elapsed_time": "4:05:53", "remaining_time": "0:18:57", "throughput": 8677.35, "total_tokens": 128025320} +{"current_steps": 190015, "total_steps": 204665, "loss": 0.0, "lr": 3.105800114085844e-08, "epoch": 4.642098062687807, "percentage": 92.84, "elapsed_time": "4:05:54", "remaining_time": "0:18:57", "throughput": 8677.38, "total_tokens": 128028840} +{"current_steps": 190020, "total_steps": 204665, "loss": 0.0, "lr": 3.103691652880891e-08, "epoch": 4.642220213519654, "percentage": 92.84, "elapsed_time": "4:05:54", "remaining_time": "0:18:57", "throughput": 8677.42, "total_tokens": 128032360} +{"current_steps": 190025, "total_steps": 204665, "loss": 0.0, "lr": 3.1015838963319605e-08, "epoch": 4.642342364351501, "percentage": 92.85, "elapsed_time": "4:05:55", "remaining_time": "0:18:56", "throughput": 8677.43, "total_tokens": 128035560} +{"current_steps": 190030, "total_steps": 204665, "loss": 0.0, "lr": 3.099476844454374e-08, "epoch": 4.642464515183349, "percentage": 92.85, "elapsed_time": "4:05:55", "remaining_time": "0:18:56", "throughput": 8677.47, "total_tokens": 128039208} +{"current_steps": 190035, "total_steps": 204665, "loss": 0.0, "lr": 3.0973704972634515e-08, "epoch": 4.6425866660151955, "percentage": 92.85, "elapsed_time": "4:05:55", "remaining_time": "0:18:55", "throughput": 8677.49, "total_tokens": 128042536} +{"current_steps": 190040, "total_steps": 204665, "loss": 0.0, "lr": 3.095264854774515e-08, "epoch": 4.642708816847042, "percentage": 92.85, "elapsed_time": "4:05:56", "remaining_time": "0:18:55", "throughput": 8677.5, "total_tokens": 128045672} +{"current_steps": 190045, "total_steps": 204665, "loss": 0.0, "lr": 3.0931599170028745e-08, "epoch": 4.64283096767889, "percentage": 92.86, "elapsed_time": "4:05:56", "remaining_time": "0:18:55", "throughput": 8677.52, "total_tokens": 128048872} +{"current_steps": 190050, "total_steps": 204665, "loss": 0.0, "lr": 3.0910556839638504e-08, "epoch": 4.6429531185107376, "percentage": 92.86, "elapsed_time": "4:05:56", "remaining_time": "0:18:54", "throughput": 8677.55, "total_tokens": 128052392} +{"current_steps": 190055, "total_steps": 204665, "loss": 0.0, "lr": 3.0889521556727304e-08, "epoch": 4.643075269342584, "percentage": 92.86, "elapsed_time": "4:05:57", "remaining_time": "0:18:54", "throughput": 8677.57, "total_tokens": 128055784} +{"current_steps": 190060, "total_steps": 204665, "loss": 0.0, "lr": 3.086849332144803e-08, "epoch": 4.643197420174431, "percentage": 92.86, "elapsed_time": "4:05:57", "remaining_time": "0:18:54", "throughput": 8677.61, "total_tokens": 128059432} +{"current_steps": 190065, "total_steps": 204665, "loss": 0.0, "lr": 3.084747213395389e-08, "epoch": 4.643319571006279, "percentage": 92.87, "elapsed_time": "4:05:57", "remaining_time": "0:18:53", "throughput": 8677.61, "total_tokens": 128062312} +{"current_steps": 190070, "total_steps": 204665, "loss": 0.0, "lr": 3.0826457994397533e-08, "epoch": 4.643441721838125, "percentage": 92.87, "elapsed_time": "4:05:58", "remaining_time": "0:18:53", "throughput": 8677.62, "total_tokens": 128065384} +{"current_steps": 190075, "total_steps": 204665, "loss": 0.0, "lr": 3.080545090293196e-08, "epoch": 4.643563872669973, "percentage": 92.87, "elapsed_time": "4:05:58", "remaining_time": "0:18:52", "throughput": 8677.66, "total_tokens": 128069096} +{"current_steps": 190080, "total_steps": 204665, "loss": 0.0, "lr": 3.078445085970982e-08, "epoch": 4.64368602350182, "percentage": 92.87, "elapsed_time": "4:05:58", "remaining_time": "0:18:52", "throughput": 8677.7, "total_tokens": 128072744} +{"current_steps": 190085, "total_steps": 204665, "loss": 0.0, "lr": 3.076345786488377e-08, "epoch": 4.643808174333667, "percentage": 92.88, "elapsed_time": "4:05:59", "remaining_time": "0:18:52", "throughput": 8677.73, "total_tokens": 128076200} +{"current_steps": 190090, "total_steps": 204665, "loss": 0.0, "lr": 3.074247191860657e-08, "epoch": 4.643930325165514, "percentage": 92.88, "elapsed_time": "4:05:59", "remaining_time": "0:18:51", "throughput": 8677.75, "total_tokens": 128079464} +{"current_steps": 190095, "total_steps": 204665, "loss": 0.0, "lr": 3.072149302103078e-08, "epoch": 4.644052475997362, "percentage": 92.88, "elapsed_time": "4:05:59", "remaining_time": "0:18:51", "throughput": 8677.79, "total_tokens": 128083240} +{"current_steps": 190100, "total_steps": 204665, "loss": 0.0042, "lr": 3.0700521172309035e-08, "epoch": 4.644174626829209, "percentage": 92.88, "elapsed_time": "4:06:00", "remaining_time": "0:18:50", "throughput": 8677.83, "total_tokens": 128086824} +{"current_steps": 190105, "total_steps": 204665, "loss": 0.0, "lr": 3.067955637259367e-08, "epoch": 4.644296777661056, "percentage": 92.89, "elapsed_time": "4:06:00", "remaining_time": "0:18:50", "throughput": 8677.86, "total_tokens": 128090472} +{"current_steps": 190110, "total_steps": 204665, "loss": 0.0, "lr": 3.065859862203746e-08, "epoch": 4.644418928492903, "percentage": 92.89, "elapsed_time": "4:06:00", "remaining_time": "0:18:50", "throughput": 8677.88, "total_tokens": 128093736} +{"current_steps": 190115, "total_steps": 204665, "loss": 0.0, "lr": 3.06376479207926e-08, "epoch": 4.644541079324751, "percentage": 92.89, "elapsed_time": "4:06:01", "remaining_time": "0:18:49", "throughput": 8677.89, "total_tokens": 128096744} +{"current_steps": 190120, "total_steps": 204665, "loss": 0.0, "lr": 3.061670426901153e-08, "epoch": 4.644663230156597, "percentage": 92.89, "elapsed_time": "4:06:01", "remaining_time": "0:18:49", "throughput": 8677.89, "total_tokens": 128099752} +{"current_steps": 190125, "total_steps": 204665, "loss": 0.0, "lr": 3.059576766684635e-08, "epoch": 4.644785380988445, "percentage": 92.9, "elapsed_time": "4:06:01", "remaining_time": "0:18:48", "throughput": 8677.89, "total_tokens": 128102568} +{"current_steps": 190130, "total_steps": 204665, "loss": 0.0, "lr": 3.0574838114449605e-08, "epoch": 4.644907531820292, "percentage": 92.9, "elapsed_time": "4:06:02", "remaining_time": "0:18:48", "throughput": 8677.88, "total_tokens": 128105448} +{"current_steps": 190135, "total_steps": 204665, "loss": 0.0, "lr": 3.05539156119734e-08, "epoch": 4.6450296826521384, "percentage": 92.9, "elapsed_time": "4:06:02", "remaining_time": "0:18:48", "throughput": 8677.89, "total_tokens": 128108520} +{"current_steps": 190140, "total_steps": 204665, "loss": 0.0, "lr": 3.053300015956983e-08, "epoch": 4.645151833483986, "percentage": 92.9, "elapsed_time": "4:06:02", "remaining_time": "0:18:47", "throughput": 8677.92, "total_tokens": 128111912} +{"current_steps": 190145, "total_steps": 204665, "loss": 0.0, "lr": 3.0512091757391114e-08, "epoch": 4.645273984315833, "percentage": 92.91, "elapsed_time": "4:06:03", "remaining_time": "0:18:47", "throughput": 8677.94, "total_tokens": 128115240} +{"current_steps": 190150, "total_steps": 204665, "loss": 0.0, "lr": 3.049119040558912e-08, "epoch": 4.6453961351476805, "percentage": 92.91, "elapsed_time": "4:06:03", "remaining_time": "0:18:46", "throughput": 8677.97, "total_tokens": 128118696} +{"current_steps": 190155, "total_steps": 204665, "loss": 0.0, "lr": 3.047029610431595e-08, "epoch": 4.645518285979527, "percentage": 92.91, "elapsed_time": "4:06:04", "remaining_time": "0:18:46", "throughput": 8677.97, "total_tokens": 128121640} +{"current_steps": 190160, "total_steps": 204665, "loss": 0.0, "lr": 3.0449408853723715e-08, "epoch": 4.645640436811375, "percentage": 92.91, "elapsed_time": "4:06:04", "remaining_time": "0:18:46", "throughput": 8677.98, "total_tokens": 128124904} +{"current_steps": 190165, "total_steps": 204665, "loss": 0.0, "lr": 3.0428528653963946e-08, "epoch": 4.645762587643222, "percentage": 92.92, "elapsed_time": "4:06:04", "remaining_time": "0:18:45", "throughput": 8678.01, "total_tokens": 128128232} +{"current_steps": 190170, "total_steps": 204665, "loss": 0.0, "lr": 3.040765550518887e-08, "epoch": 4.645884738475069, "percentage": 92.92, "elapsed_time": "4:06:05", "remaining_time": "0:18:45", "throughput": 8678.0, "total_tokens": 128131176} +{"current_steps": 190175, "total_steps": 204665, "loss": 0.0, "lr": 3.0386789407550017e-08, "epoch": 4.646006889306916, "percentage": 92.92, "elapsed_time": "4:06:05", "remaining_time": "0:18:45", "throughput": 8678.03, "total_tokens": 128134568} +{"current_steps": 190180, "total_steps": 204665, "loss": 0.0, "lr": 3.036593036119928e-08, "epoch": 4.646129040138764, "percentage": 92.92, "elapsed_time": "4:06:05", "remaining_time": "0:18:44", "throughput": 8678.04, "total_tokens": 128137704} +{"current_steps": 190185, "total_steps": 204665, "loss": 0.0, "lr": 3.034507836628841e-08, "epoch": 4.64625119097061, "percentage": 92.93, "elapsed_time": "4:06:06", "remaining_time": "0:18:44", "throughput": 8678.07, "total_tokens": 128141160} +{"current_steps": 190190, "total_steps": 204665, "loss": 0.0, "lr": 3.0324233422968747e-08, "epoch": 4.646373341802458, "percentage": 92.93, "elapsed_time": "4:06:06", "remaining_time": "0:18:43", "throughput": 8678.12, "total_tokens": 128145128} +{"current_steps": 190195, "total_steps": 204665, "loss": 0.0, "lr": 3.030339553139216e-08, "epoch": 4.646495492634305, "percentage": 92.93, "elapsed_time": "4:06:06", "remaining_time": "0:18:43", "throughput": 8678.15, "total_tokens": 128148648} +{"current_steps": 190200, "total_steps": 204665, "loss": 0.0, "lr": 3.0282564691709975e-08, "epoch": 4.646617643466152, "percentage": 92.93, "elapsed_time": "4:06:07", "remaining_time": "0:18:43", "throughput": 8678.2, "total_tokens": 128152360} +{"current_steps": 190205, "total_steps": 204665, "loss": 0.0, "lr": 3.0261740904073965e-08, "epoch": 4.646739794297999, "percentage": 92.93, "elapsed_time": "4:06:07", "remaining_time": "0:18:42", "throughput": 8678.22, "total_tokens": 128155688} +{"current_steps": 190210, "total_steps": 204665, "loss": 0.0, "lr": 3.024092416863533e-08, "epoch": 4.646861945129846, "percentage": 92.94, "elapsed_time": "4:06:07", "remaining_time": "0:18:42", "throughput": 8678.23, "total_tokens": 128158888} +{"current_steps": 190215, "total_steps": 204665, "loss": 0.0, "lr": 3.02201144855454e-08, "epoch": 4.6469840959616935, "percentage": 92.94, "elapsed_time": "4:06:08", "remaining_time": "0:18:41", "throughput": 8678.25, "total_tokens": 128162216} +{"current_steps": 190220, "total_steps": 204665, "loss": 0.0, "lr": 3.0199311854955837e-08, "epoch": 4.647106246793541, "percentage": 92.94, "elapsed_time": "4:06:08", "remaining_time": "0:18:41", "throughput": 8678.28, "total_tokens": 128165736} +{"current_steps": 190225, "total_steps": 204665, "loss": 0.0, "lr": 3.017851627701762e-08, "epoch": 4.647228397625388, "percentage": 92.94, "elapsed_time": "4:06:08", "remaining_time": "0:18:41", "throughput": 8678.29, "total_tokens": 128168808} +{"current_steps": 190230, "total_steps": 204665, "loss": 0.0, "lr": 3.015772775188219e-08, "epoch": 4.647350548457235, "percentage": 92.95, "elapsed_time": "4:06:09", "remaining_time": "0:18:40", "throughput": 8678.34, "total_tokens": 128172584} +{"current_steps": 190235, "total_steps": 204665, "loss": 0.0, "lr": 3.013694627970054e-08, "epoch": 4.647472699289082, "percentage": 92.95, "elapsed_time": "4:06:09", "remaining_time": "0:18:40", "throughput": 8678.36, "total_tokens": 128175912} +{"current_steps": 190240, "total_steps": 204665, "loss": 0.0, "lr": 3.011617186062387e-08, "epoch": 4.647594850120929, "percentage": 92.95, "elapsed_time": "4:06:09", "remaining_time": "0:18:39", "throughput": 8678.38, "total_tokens": 128179240} +{"current_steps": 190245, "total_steps": 204665, "loss": 0.0, "lr": 3.00954044948033e-08, "epoch": 4.647717000952777, "percentage": 92.95, "elapsed_time": "4:06:10", "remaining_time": "0:18:39", "throughput": 8678.39, "total_tokens": 128182376} +{"current_steps": 190250, "total_steps": 204665, "loss": 0.065, "lr": 3.0074644182389694e-08, "epoch": 4.647839151784623, "percentage": 92.96, "elapsed_time": "4:06:10", "remaining_time": "0:18:39", "throughput": 8678.41, "total_tokens": 128185640} +{"current_steps": 190255, "total_steps": 204665, "loss": 0.0, "lr": 3.0053890923534273e-08, "epoch": 4.647961302616471, "percentage": 92.96, "elapsed_time": "4:06:10", "remaining_time": "0:18:38", "throughput": 8678.43, "total_tokens": 128188904} +{"current_steps": 190260, "total_steps": 204665, "loss": 0.0, "lr": 3.00331447183878e-08, "epoch": 4.648083453448318, "percentage": 92.96, "elapsed_time": "4:06:11", "remaining_time": "0:18:38", "throughput": 8678.44, "total_tokens": 128192104} +{"current_steps": 190265, "total_steps": 204665, "loss": 0.0, "lr": 3.0012405567101275e-08, "epoch": 4.648205604280165, "percentage": 92.96, "elapsed_time": "4:06:11", "remaining_time": "0:18:37", "throughput": 8678.46, "total_tokens": 128195368} +{"current_steps": 190270, "total_steps": 204665, "loss": 0.0, "lr": 2.999167346982534e-08, "epoch": 4.648327755112012, "percentage": 92.97, "elapsed_time": "4:06:12", "remaining_time": "0:18:37", "throughput": 8678.49, "total_tokens": 128198824} +{"current_steps": 190275, "total_steps": 204665, "loss": 0.0, "lr": 2.997094842671099e-08, "epoch": 4.64844990594386, "percentage": 92.97, "elapsed_time": "4:06:12", "remaining_time": "0:18:37", "throughput": 8678.49, "total_tokens": 128201832} +{"current_steps": 190280, "total_steps": 204665, "loss": 0.0, "lr": 2.9950230437908676e-08, "epoch": 4.6485720567757065, "percentage": 92.97, "elapsed_time": "4:06:12", "remaining_time": "0:18:36", "throughput": 8678.49, "total_tokens": 128204776} +{"current_steps": 190285, "total_steps": 204665, "loss": 0.0, "lr": 2.992951950356926e-08, "epoch": 4.648694207607554, "percentage": 92.97, "elapsed_time": "4:06:13", "remaining_time": "0:18:36", "throughput": 8678.53, "total_tokens": 128208488} +{"current_steps": 190290, "total_steps": 204665, "loss": 0.0563, "lr": 2.990881562384318e-08, "epoch": 4.648816358439401, "percentage": 92.98, "elapsed_time": "4:06:13", "remaining_time": "0:18:36", "throughput": 8678.57, "total_tokens": 128212136} +{"current_steps": 190295, "total_steps": 204665, "loss": 0.0, "lr": 2.9888118798881315e-08, "epoch": 4.6489385092712485, "percentage": 92.98, "elapsed_time": "4:06:13", "remaining_time": "0:18:35", "throughput": 8678.59, "total_tokens": 128215400} +{"current_steps": 190300, "total_steps": 204665, "loss": 0.0, "lr": 2.986742902883388e-08, "epoch": 4.649060660103095, "percentage": 92.98, "elapsed_time": "4:06:14", "remaining_time": "0:18:35", "throughput": 8678.62, "total_tokens": 128218856} +{"current_steps": 190305, "total_steps": 204665, "loss": 0.0001, "lr": 2.984674631385142e-08, "epoch": 4.649182810934942, "percentage": 92.98, "elapsed_time": "4:06:14", "remaining_time": "0:18:34", "throughput": 8678.62, "total_tokens": 128221736} +{"current_steps": 190310, "total_steps": 204665, "loss": 0.0, "lr": 2.982607065408427e-08, "epoch": 4.64930496176679, "percentage": 92.99, "elapsed_time": "4:06:14", "remaining_time": "0:18:34", "throughput": 8678.67, "total_tokens": 128225576} +{"current_steps": 190315, "total_steps": 204665, "loss": 0.0001, "lr": 2.980540204968307e-08, "epoch": 4.649427112598637, "percentage": 92.99, "elapsed_time": "4:06:15", "remaining_time": "0:18:34", "throughput": 8678.68, "total_tokens": 128228776} +{"current_steps": 190320, "total_steps": 204665, "loss": 0.0, "lr": 2.9784740500797822e-08, "epoch": 4.649549263430484, "percentage": 92.99, "elapsed_time": "4:06:15", "remaining_time": "0:18:33", "throughput": 8678.71, "total_tokens": 128232232} +{"current_steps": 190325, "total_steps": 204665, "loss": 0.0, "lr": 2.9764086007578958e-08, "epoch": 4.649671414262331, "percentage": 92.99, "elapsed_time": "4:06:15", "remaining_time": "0:18:33", "throughput": 8678.74, "total_tokens": 128235560} +{"current_steps": 190330, "total_steps": 204665, "loss": 0.0, "lr": 2.974343857017647e-08, "epoch": 4.649793565094178, "percentage": 93.0, "elapsed_time": "4:06:16", "remaining_time": "0:18:32", "throughput": 8678.77, "total_tokens": 128239080} +{"current_steps": 190335, "total_steps": 204665, "loss": 0.0, "lr": 2.9722798188740907e-08, "epoch": 4.649915715926025, "percentage": 93.0, "elapsed_time": "4:06:16", "remaining_time": "0:18:32", "throughput": 8678.77, "total_tokens": 128242088} +{"current_steps": 190340, "total_steps": 204665, "loss": 0.0, "lr": 2.970216486342192e-08, "epoch": 4.650037866757873, "percentage": 93.0, "elapsed_time": "4:06:16", "remaining_time": "0:18:32", "throughput": 8678.78, "total_tokens": 128245224} +{"current_steps": 190345, "total_steps": 204665, "loss": 0.0, "lr": 2.9681538594369837e-08, "epoch": 4.6501600175897195, "percentage": 93.0, "elapsed_time": "4:06:17", "remaining_time": "0:18:31", "throughput": 8678.81, "total_tokens": 128248552} +{"current_steps": 190350, "total_steps": 204665, "loss": 0.0, "lr": 2.9660919381734652e-08, "epoch": 4.650282168421567, "percentage": 93.01, "elapsed_time": "4:06:17", "remaining_time": "0:18:31", "throughput": 8678.84, "total_tokens": 128252072} +{"current_steps": 190355, "total_steps": 204665, "loss": 0.0, "lr": 2.964030722566613e-08, "epoch": 4.650404319253414, "percentage": 93.01, "elapsed_time": "4:06:17", "remaining_time": "0:18:30", "throughput": 8678.87, "total_tokens": 128255528} +{"current_steps": 190360, "total_steps": 204665, "loss": 0.0317, "lr": 2.961970212631437e-08, "epoch": 4.6505264700852615, "percentage": 93.01, "elapsed_time": "4:06:18", "remaining_time": "0:18:30", "throughput": 8678.9, "total_tokens": 128259176} +{"current_steps": 190365, "total_steps": 204665, "loss": 0.0, "lr": 2.9599104083829153e-08, "epoch": 4.650648620917108, "percentage": 93.01, "elapsed_time": "4:06:18", "remaining_time": "0:18:30", "throughput": 8678.93, "total_tokens": 128262504} +{"current_steps": 190370, "total_steps": 204665, "loss": 0.0, "lr": 2.9578513098360235e-08, "epoch": 4.650770771748956, "percentage": 93.02, "elapsed_time": "4:06:18", "remaining_time": "0:18:29", "throughput": 8678.93, "total_tokens": 128265512} +{"current_steps": 190375, "total_steps": 204665, "loss": 0.0, "lr": 2.9557929170057282e-08, "epoch": 4.650892922580803, "percentage": 93.02, "elapsed_time": "4:06:19", "remaining_time": "0:18:29", "throughput": 8678.95, "total_tokens": 128268776} +{"current_steps": 190380, "total_steps": 204665, "loss": 0.0, "lr": 2.9537352299070173e-08, "epoch": 4.65101507341265, "percentage": 93.02, "elapsed_time": "4:06:19", "remaining_time": "0:18:28", "throughput": 8678.97, "total_tokens": 128272232} +{"current_steps": 190385, "total_steps": 204665, "loss": 0.0, "lr": 2.9516782485548563e-08, "epoch": 4.651137224244497, "percentage": 93.02, "elapsed_time": "4:06:20", "remaining_time": "0:18:28", "throughput": 8679.0, "total_tokens": 128275624} +{"current_steps": 190390, "total_steps": 204665, "loss": 0.0, "lr": 2.9496219729641892e-08, "epoch": 4.651259375076345, "percentage": 93.03, "elapsed_time": "4:06:20", "remaining_time": "0:18:28", "throughput": 8679.02, "total_tokens": 128278888} +{"current_steps": 190395, "total_steps": 204665, "loss": 0.1665, "lr": 2.947566403149959e-08, "epoch": 4.651381525908191, "percentage": 93.03, "elapsed_time": "4:06:20", "remaining_time": "0:18:27", "throughput": 8679.04, "total_tokens": 128282152} +{"current_steps": 190400, "total_steps": 204665, "loss": 0.0, "lr": 2.9455115391271546e-08, "epoch": 4.651503676740038, "percentage": 93.03, "elapsed_time": "4:06:21", "remaining_time": "0:18:27", "throughput": 8679.07, "total_tokens": 128285800} +{"current_steps": 190405, "total_steps": 204665, "loss": 0.0, "lr": 2.9434573809106744e-08, "epoch": 4.651625827571886, "percentage": 93.03, "elapsed_time": "4:06:21", "remaining_time": "0:18:27", "throughput": 8679.08, "total_tokens": 128288936} +{"current_steps": 190410, "total_steps": 204665, "loss": 0.0, "lr": 2.9414039285154846e-08, "epoch": 4.6517479784037326, "percentage": 93.03, "elapsed_time": "4:06:21", "remaining_time": "0:18:26", "throughput": 8679.09, "total_tokens": 128292008} +{"current_steps": 190415, "total_steps": 204665, "loss": 0.0, "lr": 2.9393511819565063e-08, "epoch": 4.65187012923558, "percentage": 93.04, "elapsed_time": "4:06:22", "remaining_time": "0:18:26", "throughput": 8679.13, "total_tokens": 128295592} +{"current_steps": 190420, "total_steps": 204665, "loss": 0.0, "lr": 2.9372991412486836e-08, "epoch": 4.651992280067427, "percentage": 93.04, "elapsed_time": "4:06:22", "remaining_time": "0:18:25", "throughput": 8679.16, "total_tokens": 128299112} +{"current_steps": 190425, "total_steps": 204665, "loss": 0.0, "lr": 2.9352478064069152e-08, "epoch": 4.652114430899275, "percentage": 93.04, "elapsed_time": "4:06:22", "remaining_time": "0:18:25", "throughput": 8679.19, "total_tokens": 128302568} +{"current_steps": 190430, "total_steps": 204665, "loss": 0.0, "lr": 2.933197177446145e-08, "epoch": 4.652236581731121, "percentage": 93.04, "elapsed_time": "4:06:23", "remaining_time": "0:18:25", "throughput": 8679.22, "total_tokens": 128306024} +{"current_steps": 190435, "total_steps": 204665, "loss": 0.0, "lr": 2.931147254381261e-08, "epoch": 4.652358732562969, "percentage": 93.05, "elapsed_time": "4:06:23", "remaining_time": "0:18:24", "throughput": 8679.25, "total_tokens": 128309480} +{"current_steps": 190440, "total_steps": 204665, "loss": 0.0006, "lr": 2.9290980372271736e-08, "epoch": 4.652480883394816, "percentage": 93.05, "elapsed_time": "4:06:23", "remaining_time": "0:18:24", "throughput": 8679.27, "total_tokens": 128312808} +{"current_steps": 190445, "total_steps": 204665, "loss": 0.0, "lr": 2.927049525998815e-08, "epoch": 4.652603034226663, "percentage": 93.05, "elapsed_time": "4:06:24", "remaining_time": "0:18:23", "throughput": 8679.29, "total_tokens": 128316072} +{"current_steps": 190450, "total_steps": 204665, "loss": 0.0002, "lr": 2.925001720711051e-08, "epoch": 4.65272518505851, "percentage": 93.05, "elapsed_time": "4:06:24", "remaining_time": "0:18:23", "throughput": 8679.33, "total_tokens": 128319720} +{"current_steps": 190455, "total_steps": 204665, "loss": 0.0, "lr": 2.9229546213787925e-08, "epoch": 4.652847335890358, "percentage": 93.06, "elapsed_time": "4:06:24", "remaining_time": "0:18:23", "throughput": 8679.34, "total_tokens": 128322984} +{"current_steps": 190460, "total_steps": 204665, "loss": 0.0, "lr": 2.920908228016916e-08, "epoch": 4.6529694867222045, "percentage": 93.06, "elapsed_time": "4:06:25", "remaining_time": "0:18:22", "throughput": 8679.39, "total_tokens": 128326824} +{"current_steps": 190465, "total_steps": 204665, "loss": 0.0, "lr": 2.918862540640299e-08, "epoch": 4.653091637554052, "percentage": 93.06, "elapsed_time": "4:06:25", "remaining_time": "0:18:22", "throughput": 8679.4, "total_tokens": 128329896} +{"current_steps": 190470, "total_steps": 204665, "loss": 0.0, "lr": 2.9168175592638288e-08, "epoch": 4.653213788385899, "percentage": 93.06, "elapsed_time": "4:06:25", "remaining_time": "0:18:21", "throughput": 8679.4, "total_tokens": 128332776} +{"current_steps": 190475, "total_steps": 204665, "loss": 0.0, "lr": 2.914773283902372e-08, "epoch": 4.653335939217746, "percentage": 93.07, "elapsed_time": "4:06:26", "remaining_time": "0:18:21", "throughput": 8679.43, "total_tokens": 128336360} +{"current_steps": 190480, "total_steps": 204665, "loss": 0.0, "lr": 2.9127297145708052e-08, "epoch": 4.653458090049593, "percentage": 93.07, "elapsed_time": "4:06:26", "remaining_time": "0:18:21", "throughput": 8679.45, "total_tokens": 128339624} +{"current_steps": 190485, "total_steps": 204665, "loss": 0.0, "lr": 2.9106868512839722e-08, "epoch": 4.653580240881441, "percentage": 93.07, "elapsed_time": "4:06:26", "remaining_time": "0:18:20", "throughput": 8679.48, "total_tokens": 128343080} +{"current_steps": 190490, "total_steps": 204665, "loss": 0.0, "lr": 2.90864469405675e-08, "epoch": 4.653702391713288, "percentage": 93.07, "elapsed_time": "4:06:27", "remaining_time": "0:18:20", "throughput": 8679.5, "total_tokens": 128346280} +{"current_steps": 190495, "total_steps": 204665, "loss": 0.0, "lr": 2.906603242903971e-08, "epoch": 4.653824542545134, "percentage": 93.08, "elapsed_time": "4:06:27", "remaining_time": "0:18:19", "throughput": 8679.49, "total_tokens": 128349096} +{"current_steps": 190500, "total_steps": 204665, "loss": 0.0, "lr": 2.9045624978404793e-08, "epoch": 4.653946693376982, "percentage": 93.08, "elapsed_time": "4:06:27", "remaining_time": "0:18:19", "throughput": 8679.5, "total_tokens": 128352168} +{"current_steps": 190505, "total_steps": 204665, "loss": 0.0, "lr": 2.9025224588811402e-08, "epoch": 4.654068844208829, "percentage": 93.08, "elapsed_time": "4:06:28", "remaining_time": "0:18:19", "throughput": 8679.53, "total_tokens": 128355560} +{"current_steps": 190510, "total_steps": 204665, "loss": 0.0, "lr": 2.9004831260407647e-08, "epoch": 4.654190995040676, "percentage": 93.08, "elapsed_time": "4:06:28", "remaining_time": "0:18:18", "throughput": 8679.55, "total_tokens": 128359016} +{"current_steps": 190515, "total_steps": 204665, "loss": 0.0, "lr": 2.898444499334196e-08, "epoch": 4.654313145872523, "percentage": 93.09, "elapsed_time": "4:06:29", "remaining_time": "0:18:18", "throughput": 8679.58, "total_tokens": 128362344} +{"current_steps": 190520, "total_steps": 204665, "loss": 0.0, "lr": 2.896406578776256e-08, "epoch": 4.654435296704371, "percentage": 93.09, "elapsed_time": "4:06:29", "remaining_time": "0:18:18", "throughput": 8679.58, "total_tokens": 128365416} +{"current_steps": 190525, "total_steps": 204665, "loss": 0.0, "lr": 2.8943693643817656e-08, "epoch": 4.6545574475362175, "percentage": 93.09, "elapsed_time": "4:06:29", "remaining_time": "0:18:17", "throughput": 8679.61, "total_tokens": 128368872} +{"current_steps": 190530, "total_steps": 204665, "loss": 0.0, "lr": 2.8923328561655357e-08, "epoch": 4.654679598368065, "percentage": 93.09, "elapsed_time": "4:06:30", "remaining_time": "0:18:17", "throughput": 8679.64, "total_tokens": 128372392} +{"current_steps": 190535, "total_steps": 204665, "loss": 0.0467, "lr": 2.8902970541423765e-08, "epoch": 4.654801749199912, "percentage": 93.1, "elapsed_time": "4:06:30", "remaining_time": "0:18:16", "throughput": 8679.67, "total_tokens": 128375720} +{"current_steps": 190540, "total_steps": 204665, "loss": 0.0, "lr": 2.8882619583270983e-08, "epoch": 4.6549239000317595, "percentage": 93.1, "elapsed_time": "4:06:30", "remaining_time": "0:18:16", "throughput": 8679.71, "total_tokens": 128379496} +{"current_steps": 190545, "total_steps": 204665, "loss": 0.0, "lr": 2.8862275687345004e-08, "epoch": 4.655046050863606, "percentage": 93.1, "elapsed_time": "4:06:31", "remaining_time": "0:18:16", "throughput": 8679.77, "total_tokens": 128383400} +{"current_steps": 190550, "total_steps": 204665, "loss": 0.0, "lr": 2.8841938853793823e-08, "epoch": 4.655168201695454, "percentage": 93.1, "elapsed_time": "4:06:31", "remaining_time": "0:18:15", "throughput": 8679.79, "total_tokens": 128386792} +{"current_steps": 190555, "total_steps": 204665, "loss": 0.0, "lr": 2.8821609082765207e-08, "epoch": 4.655290352527301, "percentage": 93.11, "elapsed_time": "4:06:31", "remaining_time": "0:18:15", "throughput": 8679.81, "total_tokens": 128390120} +{"current_steps": 190560, "total_steps": 204665, "loss": 0.0, "lr": 2.880128637440704e-08, "epoch": 4.655412503359148, "percentage": 93.11, "elapsed_time": "4:06:32", "remaining_time": "0:18:14", "throughput": 8679.84, "total_tokens": 128393576} +{"current_steps": 190565, "total_steps": 204665, "loss": 0.0, "lr": 2.8780970728867204e-08, "epoch": 4.655534654190995, "percentage": 93.11, "elapsed_time": "4:06:32", "remaining_time": "0:18:14", "throughput": 8679.87, "total_tokens": 128397032} +{"current_steps": 190570, "total_steps": 204665, "loss": 0.0, "lr": 2.8760662146293357e-08, "epoch": 4.655656805022842, "percentage": 93.11, "elapsed_time": "4:06:32", "remaining_time": "0:18:14", "throughput": 8679.88, "total_tokens": 128400168} +{"current_steps": 190575, "total_steps": 204665, "loss": 0.0, "lr": 2.874036062683327e-08, "epoch": 4.655778955854689, "percentage": 93.12, "elapsed_time": "4:06:33", "remaining_time": "0:18:13", "throughput": 8679.9, "total_tokens": 128403560} +{"current_steps": 190580, "total_steps": 204665, "loss": 0.0625, "lr": 2.8720066170634383e-08, "epoch": 4.655901106686537, "percentage": 93.12, "elapsed_time": "4:06:33", "remaining_time": "0:18:13", "throughput": 8679.91, "total_tokens": 128406632} +{"current_steps": 190585, "total_steps": 204665, "loss": 0.0, "lr": 2.8699778777844574e-08, "epoch": 4.656023257518384, "percentage": 93.12, "elapsed_time": "4:06:33", "remaining_time": "0:18:12", "throughput": 8679.95, "total_tokens": 128410280} +{"current_steps": 190590, "total_steps": 204665, "loss": 0.0, "lr": 2.867949844861106e-08, "epoch": 4.6561454083502305, "percentage": 93.12, "elapsed_time": "4:06:34", "remaining_time": "0:18:12", "throughput": 8680.02, "total_tokens": 128414568} +{"current_steps": 190595, "total_steps": 204665, "loss": 0.0, "lr": 2.8659225183081613e-08, "epoch": 4.656267559182078, "percentage": 93.13, "elapsed_time": "4:06:34", "remaining_time": "0:18:12", "throughput": 8680.04, "total_tokens": 128417768} +{"current_steps": 190600, "total_steps": 204665, "loss": 0.0, "lr": 2.863895898140345e-08, "epoch": 4.656389710013925, "percentage": 93.13, "elapsed_time": "4:06:34", "remaining_time": "0:18:11", "throughput": 8680.06, "total_tokens": 128421032} +{"current_steps": 190605, "total_steps": 204665, "loss": 0.0, "lr": 2.8618699843724115e-08, "epoch": 4.6565118608457725, "percentage": 93.13, "elapsed_time": "4:06:35", "remaining_time": "0:18:11", "throughput": 8680.07, "total_tokens": 128424168} +{"current_steps": 190610, "total_steps": 204665, "loss": 0.0, "lr": 2.8598447770190938e-08, "epoch": 4.656634011677619, "percentage": 93.13, "elapsed_time": "4:06:35", "remaining_time": "0:18:10", "throughput": 8680.09, "total_tokens": 128427496} +{"current_steps": 190615, "total_steps": 204665, "loss": 0.0, "lr": 2.857820276095091e-08, "epoch": 4.656756162509467, "percentage": 93.14, "elapsed_time": "4:06:35", "remaining_time": "0:18:10", "throughput": 8680.11, "total_tokens": 128430824} +{"current_steps": 190620, "total_steps": 204665, "loss": 0.0, "lr": 2.855796481615158e-08, "epoch": 4.656878313341314, "percentage": 93.14, "elapsed_time": "4:06:36", "remaining_time": "0:18:10", "throughput": 8680.13, "total_tokens": 128434088} +{"current_steps": 190625, "total_steps": 204665, "loss": 0.0, "lr": 2.8537733935940055e-08, "epoch": 4.657000464173161, "percentage": 93.14, "elapsed_time": "4:06:36", "remaining_time": "0:18:09", "throughput": 8680.17, "total_tokens": 128437864} +{"current_steps": 190630, "total_steps": 204665, "loss": 0.0, "lr": 2.851751012046333e-08, "epoch": 4.657122615005008, "percentage": 93.14, "elapsed_time": "4:06:37", "remaining_time": "0:18:09", "throughput": 8680.22, "total_tokens": 128441576} +{"current_steps": 190635, "total_steps": 204665, "loss": 0.0, "lr": 2.8497293369868723e-08, "epoch": 4.657244765836856, "percentage": 93.14, "elapsed_time": "4:06:37", "remaining_time": "0:18:09", "throughput": 8680.22, "total_tokens": 128444520} +{"current_steps": 190640, "total_steps": 204665, "loss": 0.0, "lr": 2.8477083684302904e-08, "epoch": 4.657366916668702, "percentage": 93.15, "elapsed_time": "4:06:37", "remaining_time": "0:18:08", "throughput": 8680.26, "total_tokens": 128448296} +{"current_steps": 190645, "total_steps": 204665, "loss": 0.0, "lr": 2.8456881063913195e-08, "epoch": 4.65748906750055, "percentage": 93.15, "elapsed_time": "4:06:38", "remaining_time": "0:18:08", "throughput": 8680.26, "total_tokens": 128451240} +{"current_steps": 190650, "total_steps": 204665, "loss": 0.0, "lr": 2.843668550884626e-08, "epoch": 4.657611218332397, "percentage": 93.15, "elapsed_time": "4:06:38", "remaining_time": "0:18:07", "throughput": 8680.28, "total_tokens": 128454504} +{"current_steps": 190655, "total_steps": 204665, "loss": 0.0, "lr": 2.8416497019249086e-08, "epoch": 4.657733369164244, "percentage": 93.15, "elapsed_time": "4:06:38", "remaining_time": "0:18:07", "throughput": 8680.29, "total_tokens": 128457640} +{"current_steps": 190660, "total_steps": 204665, "loss": 0.0, "lr": 2.839631559526856e-08, "epoch": 4.657855519996091, "percentage": 93.16, "elapsed_time": "4:06:39", "remaining_time": "0:18:07", "throughput": 8680.32, "total_tokens": 128461096} +{"current_steps": 190665, "total_steps": 204665, "loss": 0.0, "lr": 2.8376141237051234e-08, "epoch": 4.657977670827938, "percentage": 93.16, "elapsed_time": "4:06:39", "remaining_time": "0:18:06", "throughput": 8680.34, "total_tokens": 128464424} +{"current_steps": 190670, "total_steps": 204665, "loss": 0.0, "lr": 2.8355973944743982e-08, "epoch": 4.6580998216597855, "percentage": 93.16, "elapsed_time": "4:06:39", "remaining_time": "0:18:06", "throughput": 8680.37, "total_tokens": 128467944} +{"current_steps": 190675, "total_steps": 204665, "loss": 0.0, "lr": 2.8335813718493474e-08, "epoch": 4.658221972491633, "percentage": 93.16, "elapsed_time": "4:06:40", "remaining_time": "0:18:05", "throughput": 8680.38, "total_tokens": 128471016} +{"current_steps": 190680, "total_steps": 204665, "loss": 0.0, "lr": 2.8315660558446252e-08, "epoch": 4.65834412332348, "percentage": 93.17, "elapsed_time": "4:06:40", "remaining_time": "0:18:05", "throughput": 8680.42, "total_tokens": 128474600} +{"current_steps": 190685, "total_steps": 204665, "loss": 0.0, "lr": 2.829551446474887e-08, "epoch": 4.658466274155327, "percentage": 93.17, "elapsed_time": "4:06:40", "remaining_time": "0:18:05", "throughput": 8680.44, "total_tokens": 128478056} +{"current_steps": 190690, "total_steps": 204665, "loss": 0.0001, "lr": 2.8275375437547876e-08, "epoch": 4.658588424987174, "percentage": 93.17, "elapsed_time": "4:06:41", "remaining_time": "0:18:04", "throughput": 8680.46, "total_tokens": 128481256} +{"current_steps": 190695, "total_steps": 204665, "loss": 0.0, "lr": 2.825524347698971e-08, "epoch": 4.658710575819021, "percentage": 93.17, "elapsed_time": "4:06:41", "remaining_time": "0:18:04", "throughput": 8680.47, "total_tokens": 128484456} +{"current_steps": 190700, "total_steps": 204665, "loss": 0.0, "lr": 2.8235118583220918e-08, "epoch": 4.658832726650869, "percentage": 93.18, "elapsed_time": "4:06:41", "remaining_time": "0:18:03", "throughput": 8680.48, "total_tokens": 128487464} +{"current_steps": 190705, "total_steps": 204665, "loss": 0.0, "lr": 2.8215000756387496e-08, "epoch": 4.658954877482715, "percentage": 93.18, "elapsed_time": "4:06:42", "remaining_time": "0:18:03", "throughput": 8680.49, "total_tokens": 128490728} +{"current_steps": 190710, "total_steps": 204665, "loss": 0.0, "lr": 2.8194889996636217e-08, "epoch": 4.659077028314563, "percentage": 93.18, "elapsed_time": "4:06:42", "remaining_time": "0:18:03", "throughput": 8680.52, "total_tokens": 128494120} +{"current_steps": 190715, "total_steps": 204665, "loss": 0.0, "lr": 2.8174786304112853e-08, "epoch": 4.65919917914641, "percentage": 93.18, "elapsed_time": "4:06:42", "remaining_time": "0:18:02", "throughput": 8680.52, "total_tokens": 128497192} +{"current_steps": 190720, "total_steps": 204665, "loss": 0.0002, "lr": 2.8154689678963948e-08, "epoch": 4.659321329978257, "percentage": 93.19, "elapsed_time": "4:06:43", "remaining_time": "0:18:02", "throughput": 8680.54, "total_tokens": 128500456} +{"current_steps": 190725, "total_steps": 204665, "loss": 0.0, "lr": 2.8134600121335506e-08, "epoch": 4.659443480810104, "percentage": 93.19, "elapsed_time": "4:06:43", "remaining_time": "0:18:01", "throughput": 8680.55, "total_tokens": 128503528} +{"current_steps": 190730, "total_steps": 204665, "loss": 0.0002, "lr": 2.8114517631373623e-08, "epoch": 4.659565631641952, "percentage": 93.19, "elapsed_time": "4:06:43", "remaining_time": "0:18:01", "throughput": 8680.55, "total_tokens": 128506536} +{"current_steps": 190735, "total_steps": 204665, "loss": 0.0, "lr": 2.8094442209224412e-08, "epoch": 4.659687782473799, "percentage": 93.19, "elapsed_time": "4:06:44", "remaining_time": "0:18:01", "throughput": 8680.58, "total_tokens": 128509992} +{"current_steps": 190740, "total_steps": 204665, "loss": 0.0, "lr": 2.8074373855033862e-08, "epoch": 4.659809933305646, "percentage": 93.2, "elapsed_time": "4:06:44", "remaining_time": "0:18:00", "throughput": 8680.61, "total_tokens": 128513576} +{"current_steps": 190745, "total_steps": 204665, "loss": 0.0001, "lr": 2.8054312568947747e-08, "epoch": 4.659932084137493, "percentage": 93.2, "elapsed_time": "4:06:45", "remaining_time": "0:18:00", "throughput": 8680.64, "total_tokens": 128516968} +{"current_steps": 190750, "total_steps": 204665, "loss": 0.0, "lr": 2.803425835111217e-08, "epoch": 4.660054234969341, "percentage": 93.2, "elapsed_time": "4:06:45", "remaining_time": "0:18:00", "throughput": 8680.66, "total_tokens": 128520232} +{"current_steps": 190755, "total_steps": 204665, "loss": 0.0, "lr": 2.801421120167291e-08, "epoch": 4.660176385801187, "percentage": 93.2, "elapsed_time": "4:06:45", "remaining_time": "0:17:59", "throughput": 8680.68, "total_tokens": 128523560} +{"current_steps": 190760, "total_steps": 204665, "loss": 0.0, "lr": 2.7994171120775732e-08, "epoch": 4.660298536633034, "percentage": 93.21, "elapsed_time": "4:06:46", "remaining_time": "0:17:59", "throughput": 8680.69, "total_tokens": 128526696} +{"current_steps": 190765, "total_steps": 204665, "loss": 0.0, "lr": 2.7974138108566414e-08, "epoch": 4.660420687464882, "percentage": 93.21, "elapsed_time": "4:06:46", "remaining_time": "0:17:58", "throughput": 8680.7, "total_tokens": 128529832} +{"current_steps": 190770, "total_steps": 204665, "loss": 0.0, "lr": 2.7954112165190502e-08, "epoch": 4.660542838296728, "percentage": 93.21, "elapsed_time": "4:06:46", "remaining_time": "0:17:58", "throughput": 8680.75, "total_tokens": 128533736} +{"current_steps": 190775, "total_steps": 204665, "loss": 0.0, "lr": 2.793409329079377e-08, "epoch": 4.660664989128576, "percentage": 93.21, "elapsed_time": "4:06:47", "remaining_time": "0:17:58", "throughput": 8680.8, "total_tokens": 128537448} +{"current_steps": 190780, "total_steps": 204665, "loss": 0.0, "lr": 2.791408148552188e-08, "epoch": 4.660787139960423, "percentage": 93.22, "elapsed_time": "4:06:47", "remaining_time": "0:17:57", "throughput": 8680.82, "total_tokens": 128540840} +{"current_steps": 190785, "total_steps": 204665, "loss": 0.0001, "lr": 2.7894076749520158e-08, "epoch": 4.6609092907922705, "percentage": 93.22, "elapsed_time": "4:06:47", "remaining_time": "0:17:57", "throughput": 8680.85, "total_tokens": 128544360} +{"current_steps": 190790, "total_steps": 204665, "loss": 0.0, "lr": 2.7874079082934155e-08, "epoch": 4.661031441624117, "percentage": 93.22, "elapsed_time": "4:06:48", "remaining_time": "0:17:56", "throughput": 8680.92, "total_tokens": 128548456} +{"current_steps": 190795, "total_steps": 204665, "loss": 0.0, "lr": 2.7854088485909312e-08, "epoch": 4.661153592455965, "percentage": 93.22, "elapsed_time": "4:06:48", "remaining_time": "0:17:56", "throughput": 8680.94, "total_tokens": 128551784} +{"current_steps": 190800, "total_steps": 204665, "loss": 0.0, "lr": 2.7834104958591176e-08, "epoch": 4.661275743287812, "percentage": 93.23, "elapsed_time": "4:06:48", "remaining_time": "0:17:56", "throughput": 8680.94, "total_tokens": 128554792} +{"current_steps": 190805, "total_steps": 204665, "loss": 0.0, "lr": 2.7814128501124856e-08, "epoch": 4.661397894119659, "percentage": 93.23, "elapsed_time": "4:06:49", "remaining_time": "0:17:55", "throughput": 8680.94, "total_tokens": 128557736} +{"current_steps": 190810, "total_steps": 204665, "loss": 0.0, "lr": 2.7794159113655567e-08, "epoch": 4.661520044951506, "percentage": 93.23, "elapsed_time": "4:06:49", "remaining_time": "0:17:55", "throughput": 8680.95, "total_tokens": 128560808} +{"current_steps": 190815, "total_steps": 204665, "loss": 0.0, "lr": 2.7774196796328752e-08, "epoch": 4.661642195783354, "percentage": 93.23, "elapsed_time": "4:06:49", "remaining_time": "0:17:54", "throughput": 8680.96, "total_tokens": 128563944} +{"current_steps": 190820, "total_steps": 204665, "loss": 0.0, "lr": 2.775424154928929e-08, "epoch": 4.6617643466152, "percentage": 93.24, "elapsed_time": "4:06:50", "remaining_time": "0:17:54", "throughput": 8681.02, "total_tokens": 128567976} +{"current_steps": 190825, "total_steps": 204665, "loss": 0.0002, "lr": 2.7734293372682737e-08, "epoch": 4.661886497447048, "percentage": 93.24, "elapsed_time": "4:06:50", "remaining_time": "0:17:54", "throughput": 8681.05, "total_tokens": 128571496} +{"current_steps": 190830, "total_steps": 204665, "loss": 0.0, "lr": 2.771435226665364e-08, "epoch": 4.662008648278895, "percentage": 93.24, "elapsed_time": "4:06:50", "remaining_time": "0:17:53", "throughput": 8681.06, "total_tokens": 128574568} +{"current_steps": 190835, "total_steps": 204665, "loss": 0.0, "lr": 2.769441823134755e-08, "epoch": 4.6621307991107415, "percentage": 93.24, "elapsed_time": "4:06:51", "remaining_time": "0:17:53", "throughput": 8681.07, "total_tokens": 128577640} +{"current_steps": 190840, "total_steps": 204665, "loss": 0.0, "lr": 2.7674491266909016e-08, "epoch": 4.662252949942589, "percentage": 93.25, "elapsed_time": "4:06:51", "remaining_time": "0:17:52", "throughput": 8681.1, "total_tokens": 128581160} +{"current_steps": 190845, "total_steps": 204665, "loss": 0.0, "lr": 2.765457137348304e-08, "epoch": 4.662375100774437, "percentage": 93.25, "elapsed_time": "4:06:51", "remaining_time": "0:17:52", "throughput": 8681.1, "total_tokens": 128584104} +{"current_steps": 190850, "total_steps": 204665, "loss": 0.0, "lr": 2.7634658551214717e-08, "epoch": 4.6624972516062835, "percentage": 93.25, "elapsed_time": "4:06:52", "remaining_time": "0:17:52", "throughput": 8681.12, "total_tokens": 128587368} +{"current_steps": 190855, "total_steps": 204665, "loss": 0.0, "lr": 2.7614752800248608e-08, "epoch": 4.66261940243813, "percentage": 93.25, "elapsed_time": "4:06:52", "remaining_time": "0:17:51", "throughput": 8681.14, "total_tokens": 128590696} +{"current_steps": 190860, "total_steps": 204665, "loss": 0.0, "lr": 2.7594854120729594e-08, "epoch": 4.662741553269978, "percentage": 93.25, "elapsed_time": "4:06:52", "remaining_time": "0:17:51", "throughput": 8681.16, "total_tokens": 128594024} +{"current_steps": 190865, "total_steps": 204665, "loss": 0.0, "lr": 2.7574962512802334e-08, "epoch": 4.662863704101825, "percentage": 93.26, "elapsed_time": "4:06:53", "remaining_time": "0:17:51", "throughput": 8681.2, "total_tokens": 128597544} +{"current_steps": 190870, "total_steps": 204665, "loss": 0.0, "lr": 2.7555077976611385e-08, "epoch": 4.662985854933672, "percentage": 93.26, "elapsed_time": "4:06:53", "remaining_time": "0:17:50", "throughput": 8681.34, "total_tokens": 128603112} +{"current_steps": 190875, "total_steps": 204665, "loss": 0.0, "lr": 2.7535200512301626e-08, "epoch": 4.663108005765519, "percentage": 93.26, "elapsed_time": "4:06:54", "remaining_time": "0:17:50", "throughput": 8681.37, "total_tokens": 128606760} +{"current_steps": 190880, "total_steps": 204665, "loss": 0.0, "lr": 2.7515330120017387e-08, "epoch": 4.663230156597367, "percentage": 93.26, "elapsed_time": "4:06:54", "remaining_time": "0:17:49", "throughput": 8681.39, "total_tokens": 128610024} +{"current_steps": 190885, "total_steps": 204665, "loss": 0.0, "lr": 2.7495466799903222e-08, "epoch": 4.663352307429213, "percentage": 93.27, "elapsed_time": "4:06:54", "remaining_time": "0:17:49", "throughput": 8681.42, "total_tokens": 128613480} +{"current_steps": 190890, "total_steps": 204665, "loss": 0.0, "lr": 2.7475610552103568e-08, "epoch": 4.663474458261061, "percentage": 93.27, "elapsed_time": "4:06:55", "remaining_time": "0:17:49", "throughput": 8681.44, "total_tokens": 128616808} +{"current_steps": 190895, "total_steps": 204665, "loss": 0.0, "lr": 2.7455761376762976e-08, "epoch": 4.663596609092908, "percentage": 93.27, "elapsed_time": "4:06:55", "remaining_time": "0:17:48", "throughput": 8681.45, "total_tokens": 128619944} +{"current_steps": 190900, "total_steps": 204665, "loss": 0.0, "lr": 2.7435919274025553e-08, "epoch": 4.663718759924755, "percentage": 93.27, "elapsed_time": "4:06:55", "remaining_time": "0:17:48", "throughput": 8681.46, "total_tokens": 128623016} +{"current_steps": 190905, "total_steps": 204665, "loss": 0.0, "lr": 2.7416084244035852e-08, "epoch": 4.663840910756602, "percentage": 93.28, "elapsed_time": "4:06:56", "remaining_time": "0:17:47", "throughput": 8681.47, "total_tokens": 128626152} +{"current_steps": 190910, "total_steps": 204665, "loss": 0.0, "lr": 2.739625628693776e-08, "epoch": 4.66396306158845, "percentage": 93.28, "elapsed_time": "4:06:56", "remaining_time": "0:17:47", "throughput": 8681.48, "total_tokens": 128629160} +{"current_steps": 190915, "total_steps": 204665, "loss": 0.0, "lr": 2.737643540287593e-08, "epoch": 4.6640852124202965, "percentage": 93.28, "elapsed_time": "4:06:56", "remaining_time": "0:17:47", "throughput": 8681.48, "total_tokens": 128632232} +{"current_steps": 190920, "total_steps": 204665, "loss": 0.0, "lr": 2.7356621591994146e-08, "epoch": 4.664207363252144, "percentage": 93.28, "elapsed_time": "4:06:57", "remaining_time": "0:17:46", "throughput": 8681.51, "total_tokens": 128635688} +{"current_steps": 190925, "total_steps": 204665, "loss": 0.0, "lr": 2.733681485443662e-08, "epoch": 4.664329514083991, "percentage": 93.29, "elapsed_time": "4:06:57", "remaining_time": "0:17:46", "throughput": 8681.55, "total_tokens": 128639336} +{"current_steps": 190930, "total_steps": 204665, "loss": 0.0, "lr": 2.731701519034735e-08, "epoch": 4.664451664915838, "percentage": 93.29, "elapsed_time": "4:06:57", "remaining_time": "0:17:45", "throughput": 8681.59, "total_tokens": 128642856} +{"current_steps": 190935, "total_steps": 204665, "loss": 0.0, "lr": 2.729722259987044e-08, "epoch": 4.664573815747685, "percentage": 93.29, "elapsed_time": "4:06:58", "remaining_time": "0:17:45", "throughput": 8681.61, "total_tokens": 128646184} +{"current_steps": 190940, "total_steps": 204665, "loss": 0.0, "lr": 2.7277437083149668e-08, "epoch": 4.664695966579533, "percentage": 93.29, "elapsed_time": "4:06:58", "remaining_time": "0:17:45", "throughput": 8681.63, "total_tokens": 128649576} +{"current_steps": 190945, "total_steps": 204665, "loss": 0.0, "lr": 2.725765864032914e-08, "epoch": 4.66481811741138, "percentage": 93.3, "elapsed_time": "4:06:58", "remaining_time": "0:17:44", "throughput": 8681.66, "total_tokens": 128653032} +{"current_steps": 190950, "total_steps": 204665, "loss": 0.0, "lr": 2.7237887271552406e-08, "epoch": 4.664940268243226, "percentage": 93.3, "elapsed_time": "4:06:59", "remaining_time": "0:17:44", "throughput": 8681.68, "total_tokens": 128656232} +{"current_steps": 190955, "total_steps": 204665, "loss": 0.0, "lr": 2.7218122976963465e-08, "epoch": 4.665062419075074, "percentage": 93.3, "elapsed_time": "4:06:59", "remaining_time": "0:17:44", "throughput": 8681.7, "total_tokens": 128659688} +{"current_steps": 190960, "total_steps": 204665, "loss": 0.0, "lr": 2.7198365756705976e-08, "epoch": 4.665184569906921, "percentage": 93.3, "elapsed_time": "4:06:59", "remaining_time": "0:17:43", "throughput": 8681.76, "total_tokens": 128663656} +{"current_steps": 190965, "total_steps": 204665, "loss": 0.0, "lr": 2.7178615610923606e-08, "epoch": 4.665306720738768, "percentage": 93.31, "elapsed_time": "4:07:00", "remaining_time": "0:17:43", "throughput": 8681.78, "total_tokens": 128666920} +{"current_steps": 190970, "total_steps": 204665, "loss": 0.0, "lr": 2.7158872539760014e-08, "epoch": 4.665428871570615, "percentage": 93.31, "elapsed_time": "4:07:00", "remaining_time": "0:17:42", "throughput": 8681.82, "total_tokens": 128670632} +{"current_steps": 190975, "total_steps": 204665, "loss": 0.0, "lr": 2.7139136543358754e-08, "epoch": 4.665551022402463, "percentage": 93.31, "elapsed_time": "4:07:01", "remaining_time": "0:17:42", "throughput": 8681.84, "total_tokens": 128673896} +{"current_steps": 190980, "total_steps": 204665, "loss": 0.0, "lr": 2.711940762186349e-08, "epoch": 4.6656731732343095, "percentage": 93.31, "elapsed_time": "4:07:01", "remaining_time": "0:17:42", "throughput": 8681.85, "total_tokens": 128677096} +{"current_steps": 190985, "total_steps": 204665, "loss": 0.041, "lr": 2.7099685775417324e-08, "epoch": 4.665795324066157, "percentage": 93.32, "elapsed_time": "4:07:01", "remaining_time": "0:17:41", "throughput": 8681.86, "total_tokens": 128680232} +{"current_steps": 190990, "total_steps": 204665, "loss": 0.0, "lr": 2.707997100416415e-08, "epoch": 4.665917474898004, "percentage": 93.32, "elapsed_time": "4:07:02", "remaining_time": "0:17:41", "throughput": 8681.89, "total_tokens": 128683752} +{"current_steps": 190995, "total_steps": 204665, "loss": 0.0, "lr": 2.7060263308246956e-08, "epoch": 4.6660396257298515, "percentage": 93.32, "elapsed_time": "4:07:02", "remaining_time": "0:17:40", "throughput": 8681.93, "total_tokens": 128687400} +{"current_steps": 191000, "total_steps": 204665, "loss": 0.0, "lr": 2.704056268780919e-08, "epoch": 4.666161776561698, "percentage": 93.32, "elapsed_time": "4:07:02", "remaining_time": "0:17:40", "throughput": 8681.94, "total_tokens": 128690472} +{"current_steps": 191005, "total_steps": 204665, "loss": 0.0, "lr": 2.7020869142994284e-08, "epoch": 4.666283927393546, "percentage": 93.33, "elapsed_time": "4:07:03", "remaining_time": "0:17:40", "throughput": 8681.97, "total_tokens": 128693992} +{"current_steps": 191010, "total_steps": 204665, "loss": 0.0, "lr": 2.7001182673945354e-08, "epoch": 4.666406078225393, "percentage": 93.33, "elapsed_time": "4:07:03", "remaining_time": "0:17:39", "throughput": 8681.98, "total_tokens": 128697064} +{"current_steps": 191015, "total_steps": 204665, "loss": 0.0, "lr": 2.6981503280805395e-08, "epoch": 4.66652822905724, "percentage": 93.33, "elapsed_time": "4:07:03", "remaining_time": "0:17:39", "throughput": 8682.01, "total_tokens": 128700456} +{"current_steps": 191020, "total_steps": 204665, "loss": 0.0, "lr": 2.6961830963717737e-08, "epoch": 4.666650379889087, "percentage": 93.33, "elapsed_time": "4:07:04", "remaining_time": "0:17:38", "throughput": 8682.01, "total_tokens": 128703464} +{"current_steps": 191025, "total_steps": 204665, "loss": 0.0, "lr": 2.694216572282526e-08, "epoch": 4.666772530720934, "percentage": 93.34, "elapsed_time": "4:07:04", "remaining_time": "0:17:38", "throughput": 8682.04, "total_tokens": 128706856} +{"current_steps": 191030, "total_steps": 204665, "loss": 0.0, "lr": 2.692250755827119e-08, "epoch": 4.666894681552781, "percentage": 93.34, "elapsed_time": "4:07:04", "remaining_time": "0:17:38", "throughput": 8682.1, "total_tokens": 128710888} +{"current_steps": 191035, "total_steps": 204665, "loss": 0.0, "lr": 2.69028564701983e-08, "epoch": 4.667016832384628, "percentage": 93.34, "elapsed_time": "4:07:05", "remaining_time": "0:17:37", "throughput": 8682.13, "total_tokens": 128714408} +{"current_steps": 191040, "total_steps": 204665, "loss": 0.0, "lr": 2.6883212458749694e-08, "epoch": 4.667138983216476, "percentage": 93.34, "elapsed_time": "4:07:05", "remaining_time": "0:17:37", "throughput": 8682.14, "total_tokens": 128717544} +{"current_steps": 191045, "total_steps": 204665, "loss": 0.0, "lr": 2.686357552406793e-08, "epoch": 4.6672611340483225, "percentage": 93.35, "elapsed_time": "4:07:05", "remaining_time": "0:17:36", "throughput": 8682.17, "total_tokens": 128721000} +{"current_steps": 191050, "total_steps": 204665, "loss": 0.0, "lr": 2.684394566629611e-08, "epoch": 4.66738328488017, "percentage": 93.35, "elapsed_time": "4:07:06", "remaining_time": "0:17:36", "throughput": 8682.18, "total_tokens": 128724200} +{"current_steps": 191055, "total_steps": 204665, "loss": 0.0, "lr": 2.682432288557679e-08, "epoch": 4.667505435712017, "percentage": 93.35, "elapsed_time": "4:07:06", "remaining_time": "0:17:36", "throughput": 8682.2, "total_tokens": 128727464} +{"current_steps": 191060, "total_steps": 204665, "loss": 0.0, "lr": 2.6804707182052633e-08, "epoch": 4.667627586543865, "percentage": 93.35, "elapsed_time": "4:07:06", "remaining_time": "0:17:35", "throughput": 8682.21, "total_tokens": 128730536} +{"current_steps": 191065, "total_steps": 204665, "loss": 0.0, "lr": 2.6785098555866635e-08, "epoch": 4.667749737375711, "percentage": 93.35, "elapsed_time": "4:07:07", "remaining_time": "0:17:35", "throughput": 8682.24, "total_tokens": 128734120} +{"current_steps": 191070, "total_steps": 204665, "loss": 0.0, "lr": 2.676549700716102e-08, "epoch": 4.667871888207559, "percentage": 93.36, "elapsed_time": "4:07:07", "remaining_time": "0:17:35", "throughput": 8682.27, "total_tokens": 128737640} +{"current_steps": 191075, "total_steps": 204665, "loss": 0.0, "lr": 2.6745902536078558e-08, "epoch": 4.667994039039406, "percentage": 93.36, "elapsed_time": "4:07:07", "remaining_time": "0:17:34", "throughput": 8682.29, "total_tokens": 128740904} +{"current_steps": 191080, "total_steps": 204665, "loss": 0.0, "lr": 2.6726315142761578e-08, "epoch": 4.668116189871253, "percentage": 93.36, "elapsed_time": "4:07:08", "remaining_time": "0:17:34", "throughput": 8682.31, "total_tokens": 128744296} +{"current_steps": 191085, "total_steps": 204665, "loss": 0.0, "lr": 2.670673482735275e-08, "epoch": 4.6682383407031, "percentage": 93.36, "elapsed_time": "4:07:08", "remaining_time": "0:17:33", "throughput": 8682.34, "total_tokens": 128747624} +{"current_steps": 191090, "total_steps": 204665, "loss": 0.0, "lr": 2.668716158999418e-08, "epoch": 4.668360491534948, "percentage": 93.37, "elapsed_time": "4:07:09", "remaining_time": "0:17:33", "throughput": 8682.37, "total_tokens": 128751208} +{"current_steps": 191095, "total_steps": 204665, "loss": 0.0, "lr": 2.6667595430828417e-08, "epoch": 4.668482642366794, "percentage": 93.37, "elapsed_time": "4:07:09", "remaining_time": "0:17:33", "throughput": 8682.4, "total_tokens": 128754600} +{"current_steps": 191100, "total_steps": 204665, "loss": 0.0, "lr": 2.6648036349997792e-08, "epoch": 4.668604793198641, "percentage": 93.37, "elapsed_time": "4:07:09", "remaining_time": "0:17:32", "throughput": 8682.43, "total_tokens": 128757992} +{"current_steps": 191105, "total_steps": 204665, "loss": 0.0, "lr": 2.662848434764431e-08, "epoch": 4.668726944030489, "percentage": 93.37, "elapsed_time": "4:07:10", "remaining_time": "0:17:32", "throughput": 8682.46, "total_tokens": 128761640} +{"current_steps": 191110, "total_steps": 204665, "loss": 0.0, "lr": 2.6608939423910404e-08, "epoch": 4.6688490948623365, "percentage": 93.38, "elapsed_time": "4:07:10", "remaining_time": "0:17:31", "throughput": 8682.46, "total_tokens": 128764584} +{"current_steps": 191115, "total_steps": 204665, "loss": 0.0, "lr": 2.6589401578938075e-08, "epoch": 4.668971245694183, "percentage": 93.38, "elapsed_time": "4:07:10", "remaining_time": "0:17:31", "throughput": 8682.49, "total_tokens": 128768040} +{"current_steps": 191120, "total_steps": 204665, "loss": 0.0, "lr": 2.6569870812869323e-08, "epoch": 4.66909339652603, "percentage": 93.38, "elapsed_time": "4:07:11", "remaining_time": "0:17:31", "throughput": 8682.51, "total_tokens": 128771368} +{"current_steps": 191125, "total_steps": 204665, "loss": 0.0, "lr": 2.6550347125846472e-08, "epoch": 4.669215547357878, "percentage": 93.38, "elapsed_time": "4:07:11", "remaining_time": "0:17:30", "throughput": 8682.53, "total_tokens": 128774632} +{"current_steps": 191130, "total_steps": 204665, "loss": 0.0, "lr": 2.6530830518011194e-08, "epoch": 4.669337698189724, "percentage": 93.39, "elapsed_time": "4:07:11", "remaining_time": "0:17:30", "throughput": 8682.54, "total_tokens": 128777704} +{"current_steps": 191135, "total_steps": 204665, "loss": 0.0, "lr": 2.651132098950559e-08, "epoch": 4.669459849021572, "percentage": 93.39, "elapsed_time": "4:07:12", "remaining_time": "0:17:29", "throughput": 8682.57, "total_tokens": 128781160} +{"current_steps": 191140, "total_steps": 204665, "loss": 0.0001, "lr": 2.6491818540471446e-08, "epoch": 4.669581999853419, "percentage": 93.39, "elapsed_time": "4:07:12", "remaining_time": "0:17:29", "throughput": 8682.57, "total_tokens": 128784104} +{"current_steps": 191145, "total_steps": 204665, "loss": 0.0, "lr": 2.6472323171050747e-08, "epoch": 4.669704150685266, "percentage": 93.39, "elapsed_time": "4:07:12", "remaining_time": "0:17:29", "throughput": 8682.61, "total_tokens": 128787752} +{"current_steps": 191150, "total_steps": 204665, "loss": 0.0, "lr": 2.6452834881385055e-08, "epoch": 4.669826301517113, "percentage": 93.4, "elapsed_time": "4:07:13", "remaining_time": "0:17:28", "throughput": 8682.62, "total_tokens": 128790888} +{"current_steps": 191155, "total_steps": 204665, "loss": 0.0, "lr": 2.6433353671616142e-08, "epoch": 4.669948452348961, "percentage": 93.4, "elapsed_time": "4:07:13", "remaining_time": "0:17:28", "throughput": 8682.63, "total_tokens": 128793960} +{"current_steps": 191160, "total_steps": 204665, "loss": 0.0, "lr": 2.64138795418859e-08, "epoch": 4.6700706031808075, "percentage": 93.4, "elapsed_time": "4:07:13", "remaining_time": "0:17:27", "throughput": 8682.66, "total_tokens": 128797416} +{"current_steps": 191165, "total_steps": 204665, "loss": 0.0, "lr": 2.6394412492335648e-08, "epoch": 4.670192754012655, "percentage": 93.4, "elapsed_time": "4:07:14", "remaining_time": "0:17:27", "throughput": 8682.7, "total_tokens": 128801128} +{"current_steps": 191170, "total_steps": 204665, "loss": 0.0, "lr": 2.6374952523107286e-08, "epoch": 4.670314904844502, "percentage": 93.41, "elapsed_time": "4:07:14", "remaining_time": "0:17:27", "throughput": 8682.7, "total_tokens": 128804136} +{"current_steps": 191175, "total_steps": 204665, "loss": 0.0, "lr": 2.6355499634341916e-08, "epoch": 4.6704370556763495, "percentage": 93.41, "elapsed_time": "4:07:14", "remaining_time": "0:17:26", "throughput": 8682.72, "total_tokens": 128807400} +{"current_steps": 191180, "total_steps": 204665, "loss": 0.0, "lr": 2.6336053826181314e-08, "epoch": 4.670559206508196, "percentage": 93.41, "elapsed_time": "4:07:15", "remaining_time": "0:17:26", "throughput": 8682.77, "total_tokens": 128811240} +{"current_steps": 191185, "total_steps": 204665, "loss": 0.0, "lr": 2.6316615098766927e-08, "epoch": 4.670681357340044, "percentage": 93.41, "elapsed_time": "4:07:15", "remaining_time": "0:17:26", "throughput": 8682.78, "total_tokens": 128814440} +{"current_steps": 191190, "total_steps": 204665, "loss": 0.0, "lr": 2.6297183452239856e-08, "epoch": 4.670803508171891, "percentage": 93.42, "elapsed_time": "4:07:15", "remaining_time": "0:17:25", "throughput": 8682.81, "total_tokens": 128817960} +{"current_steps": 191195, "total_steps": 204665, "loss": 0.0, "lr": 2.6277758886741664e-08, "epoch": 4.670925659003737, "percentage": 93.42, "elapsed_time": "4:07:16", "remaining_time": "0:17:25", "throughput": 8682.84, "total_tokens": 128821416} +{"current_steps": 191200, "total_steps": 204665, "loss": 0.0, "lr": 2.6258341402413454e-08, "epoch": 4.671047809835585, "percentage": 93.42, "elapsed_time": "4:07:16", "remaining_time": "0:17:24", "throughput": 8682.87, "total_tokens": 128824872} +{"current_steps": 191205, "total_steps": 204665, "loss": 0.0, "lr": 2.6238930999396557e-08, "epoch": 4.671169960667433, "percentage": 93.42, "elapsed_time": "4:07:17", "remaining_time": "0:17:24", "throughput": 8682.89, "total_tokens": 128828136} +{"current_steps": 191210, "total_steps": 204665, "loss": 0.0, "lr": 2.6219527677831976e-08, "epoch": 4.671292111499279, "percentage": 93.43, "elapsed_time": "4:07:17", "remaining_time": "0:17:24", "throughput": 8682.92, "total_tokens": 128831720} +{"current_steps": 191215, "total_steps": 204665, "loss": 0.0, "lr": 2.6200131437861038e-08, "epoch": 4.671414262331126, "percentage": 93.43, "elapsed_time": "4:07:17", "remaining_time": "0:17:23", "throughput": 8682.93, "total_tokens": 128834792} +{"current_steps": 191220, "total_steps": 204665, "loss": 0.0001, "lr": 2.6180742279624523e-08, "epoch": 4.671536413162974, "percentage": 93.43, "elapsed_time": "4:07:18", "remaining_time": "0:17:23", "throughput": 8682.94, "total_tokens": 128837800} +{"current_steps": 191225, "total_steps": 204665, "loss": 0.0, "lr": 2.616136020326365e-08, "epoch": 4.6716585639948205, "percentage": 93.43, "elapsed_time": "4:07:18", "remaining_time": "0:17:22", "throughput": 8682.95, "total_tokens": 128840936} +{"current_steps": 191230, "total_steps": 204665, "loss": 0.0, "lr": 2.6141985208919305e-08, "epoch": 4.671780714826668, "percentage": 93.44, "elapsed_time": "4:07:18", "remaining_time": "0:17:22", "throughput": 8682.99, "total_tokens": 128844648} +{"current_steps": 191235, "total_steps": 204665, "loss": 0.0, "lr": 2.6122617296732376e-08, "epoch": 4.671902865658515, "percentage": 93.44, "elapsed_time": "4:07:19", "remaining_time": "0:17:22", "throughput": 8683.01, "total_tokens": 128847976} +{"current_steps": 191240, "total_steps": 204665, "loss": 0.0, "lr": 2.610325646684375e-08, "epoch": 4.6720250164903625, "percentage": 93.44, "elapsed_time": "4:07:19", "remaining_time": "0:17:21", "throughput": 8683.01, "total_tokens": 128850984} +{"current_steps": 191245, "total_steps": 204665, "loss": 0.1687, "lr": 2.6083902719393978e-08, "epoch": 4.672147167322209, "percentage": 93.44, "elapsed_time": "4:07:19", "remaining_time": "0:17:21", "throughput": 8683.06, "total_tokens": 128854760} +{"current_steps": 191250, "total_steps": 204665, "loss": 0.0, "lr": 2.606455605452418e-08, "epoch": 4.672269318154057, "percentage": 93.45, "elapsed_time": "4:07:20", "remaining_time": "0:17:20", "throughput": 8683.08, "total_tokens": 128858088} +{"current_steps": 191255, "total_steps": 204665, "loss": 0.0, "lr": 2.6045216472374898e-08, "epoch": 4.672391468985904, "percentage": 93.45, "elapsed_time": "4:07:20", "remaining_time": "0:17:20", "throughput": 8683.12, "total_tokens": 128861672} +{"current_steps": 191260, "total_steps": 204665, "loss": 0.0, "lr": 2.6025883973086693e-08, "epoch": 4.672513619817751, "percentage": 93.45, "elapsed_time": "4:07:20", "remaining_time": "0:17:20", "throughput": 8683.17, "total_tokens": 128865576} +{"current_steps": 191265, "total_steps": 204665, "loss": 0.0, "lr": 2.600655855680034e-08, "epoch": 4.672635770649598, "percentage": 93.45, "elapsed_time": "4:07:21", "remaining_time": "0:17:19", "throughput": 8683.19, "total_tokens": 128868904} +{"current_steps": 191270, "total_steps": 204665, "loss": 0.0365, "lr": 2.598724022365617e-08, "epoch": 4.672757921481446, "percentage": 93.46, "elapsed_time": "4:07:21", "remaining_time": "0:17:19", "throughput": 8683.22, "total_tokens": 128872488} +{"current_steps": 191275, "total_steps": 204665, "loss": 0.0, "lr": 2.5967928973794738e-08, "epoch": 4.672880072313292, "percentage": 93.46, "elapsed_time": "4:07:21", "remaining_time": "0:17:18", "throughput": 8683.25, "total_tokens": 128875944} +{"current_steps": 191280, "total_steps": 204665, "loss": 0.0, "lr": 2.59486248073566e-08, "epoch": 4.67300222314514, "percentage": 93.46, "elapsed_time": "4:07:22", "remaining_time": "0:17:18", "throughput": 8683.26, "total_tokens": 128879016} +{"current_steps": 191285, "total_steps": 204665, "loss": 0.0, "lr": 2.5929327724481976e-08, "epoch": 4.673124373976987, "percentage": 93.46, "elapsed_time": "4:07:22", "remaining_time": "0:17:18", "throughput": 8683.29, "total_tokens": 128882472} +{"current_steps": 191290, "total_steps": 204665, "loss": 0.0, "lr": 2.5910037725311418e-08, "epoch": 4.6732465248088335, "percentage": 93.46, "elapsed_time": "4:07:22", "remaining_time": "0:17:17", "throughput": 8683.28, "total_tokens": 128885352} +{"current_steps": 191295, "total_steps": 204665, "loss": 0.0, "lr": 2.5890754809984928e-08, "epoch": 4.673368675640681, "percentage": 93.47, "elapsed_time": "4:07:23", "remaining_time": "0:17:17", "throughput": 8683.33, "total_tokens": 128889128} +{"current_steps": 191300, "total_steps": 204665, "loss": 0.0, "lr": 2.5871478978642945e-08, "epoch": 4.673490826472528, "percentage": 93.47, "elapsed_time": "4:07:23", "remaining_time": "0:17:17", "throughput": 8683.34, "total_tokens": 128892200} +{"current_steps": 191305, "total_steps": 204665, "loss": 0.0, "lr": 2.5852210231425475e-08, "epoch": 4.6736129773043755, "percentage": 93.47, "elapsed_time": "4:07:23", "remaining_time": "0:17:16", "throughput": 8683.39, "total_tokens": 128896104} +{"current_steps": 191310, "total_steps": 204665, "loss": 0.0, "lr": 2.5832948568472733e-08, "epoch": 4.673735128136222, "percentage": 93.47, "elapsed_time": "4:07:24", "remaining_time": "0:17:16", "throughput": 8683.4, "total_tokens": 128899304} +{"current_steps": 191315, "total_steps": 204665, "loss": 0.0, "lr": 2.5813693989924944e-08, "epoch": 4.67385727896807, "percentage": 93.48, "elapsed_time": "4:07:24", "remaining_time": "0:17:15", "throughput": 8683.42, "total_tokens": 128902568} +{"current_steps": 191320, "total_steps": 204665, "loss": 0.0, "lr": 2.5794446495921994e-08, "epoch": 4.673979429799917, "percentage": 93.48, "elapsed_time": "4:07:25", "remaining_time": "0:17:15", "throughput": 8683.48, "total_tokens": 128906600} +{"current_steps": 191325, "total_steps": 204665, "loss": 0.0, "lr": 2.5775206086603772e-08, "epoch": 4.674101580631764, "percentage": 93.48, "elapsed_time": "4:07:25", "remaining_time": "0:17:15", "throughput": 8683.51, "total_tokens": 128910120} +{"current_steps": 191330, "total_steps": 204665, "loss": 0.0, "lr": 2.575597276211039e-08, "epoch": 4.674223731463611, "percentage": 93.48, "elapsed_time": "4:07:25", "remaining_time": "0:17:14", "throughput": 8683.51, "total_tokens": 128913064} +{"current_steps": 191335, "total_steps": 204665, "loss": 0.0, "lr": 2.573674652258151e-08, "epoch": 4.674345882295459, "percentage": 93.49, "elapsed_time": "4:07:26", "remaining_time": "0:17:14", "throughput": 8683.54, "total_tokens": 128916456} +{"current_steps": 191340, "total_steps": 204665, "loss": 0.0, "lr": 2.5717527368157134e-08, "epoch": 4.674468033127305, "percentage": 93.49, "elapsed_time": "4:07:26", "remaining_time": "0:17:13", "throughput": 8683.54, "total_tokens": 128919528} +{"current_steps": 191345, "total_steps": 204665, "loss": 0.0, "lr": 2.5698315298976813e-08, "epoch": 4.674590183959153, "percentage": 93.49, "elapsed_time": "4:07:26", "remaining_time": "0:17:13", "throughput": 8683.59, "total_tokens": 128923304} +{"current_steps": 191350, "total_steps": 204665, "loss": 0.0, "lr": 2.5679110315180553e-08, "epoch": 4.674712334791, "percentage": 93.49, "elapsed_time": "4:07:27", "remaining_time": "0:17:13", "throughput": 8683.61, "total_tokens": 128926696} +{"current_steps": 191355, "total_steps": 204665, "loss": 0.0, "lr": 2.565991241690779e-08, "epoch": 4.674834485622847, "percentage": 93.5, "elapsed_time": "4:07:27", "remaining_time": "0:17:12", "throughput": 8683.66, "total_tokens": 128930472} +{"current_steps": 191360, "total_steps": 204665, "loss": 0.0, "lr": 2.564072160429831e-08, "epoch": 4.674956636454694, "percentage": 93.5, "elapsed_time": "4:07:27", "remaining_time": "0:17:12", "throughput": 8683.68, "total_tokens": 128933800} +{"current_steps": 191365, "total_steps": 204665, "loss": 0.0, "lr": 2.562153787749144e-08, "epoch": 4.675078787286542, "percentage": 93.5, "elapsed_time": "4:07:28", "remaining_time": "0:17:11", "throughput": 8683.69, "total_tokens": 128936936} +{"current_steps": 191370, "total_steps": 204665, "loss": 0.0, "lr": 2.5602361236626736e-08, "epoch": 4.6752009381183885, "percentage": 93.5, "elapsed_time": "4:07:28", "remaining_time": "0:17:11", "throughput": 8683.76, "total_tokens": 128941032} +{"current_steps": 191375, "total_steps": 204665, "loss": 0.0, "lr": 2.5583191681843973e-08, "epoch": 4.675323088950236, "percentage": 93.51, "elapsed_time": "4:07:28", "remaining_time": "0:17:11", "throughput": 8683.77, "total_tokens": 128944296} +{"current_steps": 191380, "total_steps": 204665, "loss": 0.0, "lr": 2.5564029213282157e-08, "epoch": 4.675445239782083, "percentage": 93.51, "elapsed_time": "4:07:29", "remaining_time": "0:17:10", "throughput": 8683.79, "total_tokens": 128947496} +{"current_steps": 191385, "total_steps": 204665, "loss": 0.0, "lr": 2.554487383108095e-08, "epoch": 4.67556739061393, "percentage": 93.51, "elapsed_time": "4:07:29", "remaining_time": "0:17:10", "throughput": 8683.79, "total_tokens": 128950568} +{"current_steps": 191390, "total_steps": 204665, "loss": 0.0, "lr": 2.552572553537935e-08, "epoch": 4.675689541445777, "percentage": 93.51, "elapsed_time": "4:07:29", "remaining_time": "0:17:10", "throughput": 8683.84, "total_tokens": 128954408} +{"current_steps": 191395, "total_steps": 204665, "loss": 0.0, "lr": 2.5506584326316916e-08, "epoch": 4.675811692277624, "percentage": 93.52, "elapsed_time": "4:07:30", "remaining_time": "0:17:09", "throughput": 8683.89, "total_tokens": 128958248} +{"current_steps": 191400, "total_steps": 204665, "loss": 0.0, "lr": 2.5487450204032644e-08, "epoch": 4.675933843109472, "percentage": 93.52, "elapsed_time": "4:07:30", "remaining_time": "0:17:09", "throughput": 8683.9, "total_tokens": 128961320} +{"current_steps": 191405, "total_steps": 204665, "loss": 0.0, "lr": 2.546832316866576e-08, "epoch": 4.676055993941318, "percentage": 93.52, "elapsed_time": "4:07:30", "remaining_time": "0:17:08", "throughput": 8683.92, "total_tokens": 128964776} +{"current_steps": 191410, "total_steps": 204665, "loss": 0.0, "lr": 2.5449203220355377e-08, "epoch": 4.676178144773166, "percentage": 93.52, "elapsed_time": "4:07:31", "remaining_time": "0:17:08", "throughput": 8683.96, "total_tokens": 128968296} +{"current_steps": 191415, "total_steps": 204665, "loss": 0.0, "lr": 2.5430090359240486e-08, "epoch": 4.676300295605013, "percentage": 93.53, "elapsed_time": "4:07:31", "remaining_time": "0:17:08", "throughput": 8683.98, "total_tokens": 128971752} +{"current_steps": 191420, "total_steps": 204665, "loss": 0.0, "lr": 2.5410984585460203e-08, "epoch": 4.6764224464368604, "percentage": 93.53, "elapsed_time": "4:07:32", "remaining_time": "0:17:07", "throughput": 8683.98, "total_tokens": 128974696} +{"current_steps": 191425, "total_steps": 204665, "loss": 0.0001, "lr": 2.539188589915331e-08, "epoch": 4.676544597268707, "percentage": 93.53, "elapsed_time": "4:07:32", "remaining_time": "0:17:07", "throughput": 8684.01, "total_tokens": 128978088} +{"current_steps": 191430, "total_steps": 204665, "loss": 0.0, "lr": 2.537279430045869e-08, "epoch": 4.676666748100555, "percentage": 93.53, "elapsed_time": "4:07:32", "remaining_time": "0:17:06", "throughput": 8684.02, "total_tokens": 128981288} +{"current_steps": 191435, "total_steps": 204665, "loss": 0.0, "lr": 2.5353709789515344e-08, "epoch": 4.676788898932402, "percentage": 93.54, "elapsed_time": "4:07:33", "remaining_time": "0:17:06", "throughput": 8684.07, "total_tokens": 128985064} +{"current_steps": 191440, "total_steps": 204665, "loss": 0.0, "lr": 2.5334632366461827e-08, "epoch": 4.676911049764249, "percentage": 93.54, "elapsed_time": "4:07:33", "remaining_time": "0:17:06", "throughput": 8684.1, "total_tokens": 128988584} +{"current_steps": 191445, "total_steps": 204665, "loss": 0.0, "lr": 2.5315562031437144e-08, "epoch": 4.677033200596096, "percentage": 93.54, "elapsed_time": "4:07:33", "remaining_time": "0:17:05", "throughput": 8684.12, "total_tokens": 128991784} +{"current_steps": 191450, "total_steps": 204665, "loss": 0.0, "lr": 2.5296498784579845e-08, "epoch": 4.677155351427944, "percentage": 93.54, "elapsed_time": "4:07:34", "remaining_time": "0:17:05", "throughput": 8684.15, "total_tokens": 128995432} +{"current_steps": 191455, "total_steps": 204665, "loss": 0.0, "lr": 2.52774426260286e-08, "epoch": 4.67727750225979, "percentage": 93.55, "elapsed_time": "4:07:34", "remaining_time": "0:17:04", "throughput": 8684.19, "total_tokens": 128999016} +{"current_steps": 191460, "total_steps": 204665, "loss": 0.0, "lr": 2.5258393555921855e-08, "epoch": 4.677399653091637, "percentage": 93.55, "elapsed_time": "4:07:34", "remaining_time": "0:17:04", "throughput": 8684.21, "total_tokens": 129002408} +{"current_steps": 191465, "total_steps": 204665, "loss": 0.0, "lr": 2.523935157439816e-08, "epoch": 4.677521803923485, "percentage": 93.55, "elapsed_time": "4:07:35", "remaining_time": "0:17:04", "throughput": 8684.26, "total_tokens": 129006248} +{"current_steps": 191470, "total_steps": 204665, "loss": 0.0, "lr": 2.52203166815963e-08, "epoch": 4.677643954755332, "percentage": 93.55, "elapsed_time": "4:07:35", "remaining_time": "0:17:03", "throughput": 8684.28, "total_tokens": 129009640} +{"current_steps": 191475, "total_steps": 204665, "loss": 0.0, "lr": 2.520128887765438e-08, "epoch": 4.677766105587179, "percentage": 93.56, "elapsed_time": "4:07:35", "remaining_time": "0:17:03", "throughput": 8684.3, "total_tokens": 129012968} +{"current_steps": 191480, "total_steps": 204665, "loss": 0.0, "lr": 2.5182268162710962e-08, "epoch": 4.677888256419026, "percentage": 93.56, "elapsed_time": "4:07:36", "remaining_time": "0:17:02", "throughput": 8684.31, "total_tokens": 129016104} +{"current_steps": 191485, "total_steps": 204665, "loss": 0.0, "lr": 2.5163254536904155e-08, "epoch": 4.6780104072508735, "percentage": 93.56, "elapsed_time": "4:07:36", "remaining_time": "0:17:02", "throughput": 8684.32, "total_tokens": 129019304} +{"current_steps": 191490, "total_steps": 204665, "loss": 0.0, "lr": 2.5144248000372403e-08, "epoch": 4.67813255808272, "percentage": 93.56, "elapsed_time": "4:07:36", "remaining_time": "0:17:02", "throughput": 8684.35, "total_tokens": 129022696} +{"current_steps": 191495, "total_steps": 204665, "loss": 0.0, "lr": 2.512524855325393e-08, "epoch": 4.678254708914568, "percentage": 93.57, "elapsed_time": "4:07:37", "remaining_time": "0:17:01", "throughput": 8684.38, "total_tokens": 129026152} +{"current_steps": 191500, "total_steps": 204665, "loss": 0.0, "lr": 2.510625619568674e-08, "epoch": 4.678376859746415, "percentage": 93.57, "elapsed_time": "4:07:37", "remaining_time": "0:17:01", "throughput": 8684.4, "total_tokens": 129029608} +{"current_steps": 191505, "total_steps": 204665, "loss": 0.0, "lr": 2.5087270927809266e-08, "epoch": 4.678499010578262, "percentage": 93.57, "elapsed_time": "4:07:37", "remaining_time": "0:17:01", "throughput": 8684.46, "total_tokens": 129033576} +{"current_steps": 191510, "total_steps": 204665, "loss": 0.0852, "lr": 2.506829274975919e-08, "epoch": 4.678621161410109, "percentage": 93.57, "elapsed_time": "4:07:38", "remaining_time": "0:17:00", "throughput": 8684.52, "total_tokens": 129037672} +{"current_steps": 191515, "total_steps": 204665, "loss": 0.0, "lr": 2.504932166167484e-08, "epoch": 4.678743312241957, "percentage": 93.57, "elapsed_time": "4:07:38", "remaining_time": "0:17:00", "throughput": 8684.54, "total_tokens": 129041000} +{"current_steps": 191520, "total_steps": 204665, "loss": 0.0, "lr": 2.503035766369399e-08, "epoch": 4.678865463073803, "percentage": 93.58, "elapsed_time": "4:07:39", "remaining_time": "0:16:59", "throughput": 8684.56, "total_tokens": 129044328} +{"current_steps": 191525, "total_steps": 204665, "loss": 0.0005, "lr": 2.5011400755954648e-08, "epoch": 4.678987613905651, "percentage": 93.58, "elapsed_time": "4:07:39", "remaining_time": "0:16:59", "throughput": 8684.6, "total_tokens": 129047976} +{"current_steps": 191530, "total_steps": 204665, "loss": 0.0, "lr": 2.499245093859459e-08, "epoch": 4.679109764737498, "percentage": 93.58, "elapsed_time": "4:07:39", "remaining_time": "0:16:59", "throughput": 8684.62, "total_tokens": 129051304} +{"current_steps": 191535, "total_steps": 204665, "loss": 0.0, "lr": 2.4973508211751816e-08, "epoch": 4.679231915569345, "percentage": 93.58, "elapsed_time": "4:07:40", "remaining_time": "0:16:58", "throughput": 8684.64, "total_tokens": 129054504} +{"current_steps": 191540, "total_steps": 204665, "loss": 0.0, "lr": 2.495457257556388e-08, "epoch": 4.679354066401192, "percentage": 93.59, "elapsed_time": "4:07:40", "remaining_time": "0:16:58", "throughput": 8684.64, "total_tokens": 129057512} +{"current_steps": 191545, "total_steps": 204665, "loss": 0.0, "lr": 2.4935644030168456e-08, "epoch": 4.67947621723304, "percentage": 93.59, "elapsed_time": "4:07:40", "remaining_time": "0:16:57", "throughput": 8684.67, "total_tokens": 129061032} +{"current_steps": 191550, "total_steps": 204665, "loss": 0.0, "lr": 2.491672257570343e-08, "epoch": 4.6795983680648865, "percentage": 93.59, "elapsed_time": "4:07:41", "remaining_time": "0:16:57", "throughput": 8684.69, "total_tokens": 129064296} +{"current_steps": 191555, "total_steps": 204665, "loss": 0.0, "lr": 2.4897808212306026e-08, "epoch": 4.679720518896733, "percentage": 93.59, "elapsed_time": "4:07:41", "remaining_time": "0:16:57", "throughput": 8684.72, "total_tokens": 129067752} +{"current_steps": 191560, "total_steps": 204665, "loss": 0.0, "lr": 2.4878900940114134e-08, "epoch": 4.679842669728581, "percentage": 93.6, "elapsed_time": "4:07:41", "remaining_time": "0:16:56", "throughput": 8684.73, "total_tokens": 129070952} +{"current_steps": 191565, "total_steps": 204665, "loss": 0.0501, "lr": 2.4860000759265308e-08, "epoch": 4.6799648205604285, "percentage": 93.6, "elapsed_time": "4:07:42", "remaining_time": "0:16:56", "throughput": 8684.76, "total_tokens": 129074280} +{"current_steps": 191570, "total_steps": 204665, "loss": 0.0, "lr": 2.4841107669896668e-08, "epoch": 4.680086971392275, "percentage": 93.6, "elapsed_time": "4:07:42", "remaining_time": "0:16:55", "throughput": 8684.78, "total_tokens": 129077672} +{"current_steps": 191575, "total_steps": 204665, "loss": 0.0, "lr": 2.4822221672145872e-08, "epoch": 4.680209122224122, "percentage": 93.6, "elapsed_time": "4:07:42", "remaining_time": "0:16:55", "throughput": 8684.82, "total_tokens": 129081384} +{"current_steps": 191580, "total_steps": 204665, "loss": 0.0, "lr": 2.4803342766150036e-08, "epoch": 4.68033127305597, "percentage": 93.61, "elapsed_time": "4:07:43", "remaining_time": "0:16:55", "throughput": 8684.84, "total_tokens": 129084648} +{"current_steps": 191585, "total_steps": 204665, "loss": 0.0, "lr": 2.4784470952046722e-08, "epoch": 4.680453423887816, "percentage": 93.61, "elapsed_time": "4:07:43", "remaining_time": "0:16:54", "throughput": 8684.85, "total_tokens": 129087848} +{"current_steps": 191590, "total_steps": 204665, "loss": 0.0, "lr": 2.4765606229973034e-08, "epoch": 4.680575574719664, "percentage": 93.61, "elapsed_time": "4:07:43", "remaining_time": "0:16:54", "throughput": 8684.9, "total_tokens": 129091624} +{"current_steps": 191595, "total_steps": 204665, "loss": 0.0, "lr": 2.474674860006609e-08, "epoch": 4.680697725551511, "percentage": 93.61, "elapsed_time": "4:07:44", "remaining_time": "0:16:53", "throughput": 8684.9, "total_tokens": 129094504} +{"current_steps": 191600, "total_steps": 204665, "loss": 0.0, "lr": 2.4727898062463226e-08, "epoch": 4.680819876383358, "percentage": 93.62, "elapsed_time": "4:07:44", "remaining_time": "0:16:53", "throughput": 8684.96, "total_tokens": 129098536} +{"current_steps": 191605, "total_steps": 204665, "loss": 0.0, "lr": 2.4709054617301218e-08, "epoch": 4.680942027215205, "percentage": 93.62, "elapsed_time": "4:07:44", "remaining_time": "0:16:53", "throughput": 8684.98, "total_tokens": 129101800} +{"current_steps": 191610, "total_steps": 204665, "loss": 0.0, "lr": 2.4690218264717398e-08, "epoch": 4.681064178047053, "percentage": 93.62, "elapsed_time": "4:07:45", "remaining_time": "0:16:52", "throughput": 8685.01, "total_tokens": 129105448} +{"current_steps": 191615, "total_steps": 204665, "loss": 0.0001, "lr": 2.4671389004848663e-08, "epoch": 4.6811863288788995, "percentage": 93.62, "elapsed_time": "4:07:45", "remaining_time": "0:16:52", "throughput": 8685.04, "total_tokens": 129108904} +{"current_steps": 191620, "total_steps": 204665, "loss": 0.0, "lr": 2.465256683783179e-08, "epoch": 4.681308479710747, "percentage": 93.63, "elapsed_time": "4:07:46", "remaining_time": "0:16:52", "throughput": 8685.08, "total_tokens": 129112616} +{"current_steps": 191625, "total_steps": 204665, "loss": 0.0, "lr": 2.4633751763804e-08, "epoch": 4.681430630542594, "percentage": 93.63, "elapsed_time": "4:07:46", "remaining_time": "0:16:51", "throughput": 8685.12, "total_tokens": 129116264} +{"current_steps": 191630, "total_steps": 204665, "loss": 0.0, "lr": 2.461494378290174e-08, "epoch": 4.6815527813744415, "percentage": 93.63, "elapsed_time": "4:07:46", "remaining_time": "0:16:51", "throughput": 8685.13, "total_tokens": 129119336} +{"current_steps": 191635, "total_steps": 204665, "loss": 0.0488, "lr": 2.4596142895262017e-08, "epoch": 4.681674932206288, "percentage": 93.63, "elapsed_time": "4:07:47", "remaining_time": "0:16:50", "throughput": 8685.15, "total_tokens": 129122728} +{"current_steps": 191640, "total_steps": 204665, "loss": 0.0, "lr": 2.4577349101021495e-08, "epoch": 4.681797083038136, "percentage": 93.64, "elapsed_time": "4:07:47", "remaining_time": "0:16:50", "throughput": 8685.19, "total_tokens": 129126376} +{"current_steps": 191645, "total_steps": 204665, "loss": 0.0, "lr": 2.455856240031684e-08, "epoch": 4.681919233869983, "percentage": 93.64, "elapsed_time": "4:07:47", "remaining_time": "0:16:50", "throughput": 8685.19, "total_tokens": 129129256} +{"current_steps": 191650, "total_steps": 204665, "loss": 0.0, "lr": 2.4539782793284723e-08, "epoch": 4.682041384701829, "percentage": 93.64, "elapsed_time": "4:07:48", "remaining_time": "0:16:49", "throughput": 8685.21, "total_tokens": 129132648} +{"current_steps": 191655, "total_steps": 204665, "loss": 0.0, "lr": 2.4521010280061592e-08, "epoch": 4.682163535533677, "percentage": 93.64, "elapsed_time": "4:07:48", "remaining_time": "0:16:49", "throughput": 8685.25, "total_tokens": 129136168} +{"current_steps": 191660, "total_steps": 204665, "loss": 0.0, "lr": 2.4502244860784115e-08, "epoch": 4.682285686365524, "percentage": 93.65, "elapsed_time": "4:07:48", "remaining_time": "0:16:48", "throughput": 8685.26, "total_tokens": 129139240} +{"current_steps": 191665, "total_steps": 204665, "loss": 0.0, "lr": 2.4483486535588628e-08, "epoch": 4.682407837197371, "percentage": 93.65, "elapsed_time": "4:07:49", "remaining_time": "0:16:48", "throughput": 8685.28, "total_tokens": 129142696} +{"current_steps": 191670, "total_steps": 204665, "loss": 0.0402, "lr": 2.4464735304611682e-08, "epoch": 4.682529988029218, "percentage": 93.65, "elapsed_time": "4:07:49", "remaining_time": "0:16:48", "throughput": 8685.29, "total_tokens": 129145704} +{"current_steps": 191675, "total_steps": 204665, "loss": 0.0, "lr": 2.444599116798951e-08, "epoch": 4.682652138861066, "percentage": 93.65, "elapsed_time": "4:07:49", "remaining_time": "0:16:47", "throughput": 8685.3, "total_tokens": 129148904} +{"current_steps": 191680, "total_steps": 204665, "loss": 0.0, "lr": 2.4427254125858444e-08, "epoch": 4.6827742896929125, "percentage": 93.66, "elapsed_time": "4:07:50", "remaining_time": "0:16:47", "throughput": 8685.35, "total_tokens": 129152744} +{"current_steps": 191685, "total_steps": 204665, "loss": 0.0, "lr": 2.440852417835482e-08, "epoch": 4.68289644052476, "percentage": 93.66, "elapsed_time": "4:07:50", "remaining_time": "0:16:46", "throughput": 8685.38, "total_tokens": 129156264} +{"current_steps": 191690, "total_steps": 204665, "loss": 0.0, "lr": 2.4389801325614855e-08, "epoch": 4.683018591356607, "percentage": 93.66, "elapsed_time": "4:07:50", "remaining_time": "0:16:46", "throughput": 8685.42, "total_tokens": 129159912} +{"current_steps": 191695, "total_steps": 204665, "loss": 0.0, "lr": 2.4371085567774676e-08, "epoch": 4.6831407421884546, "percentage": 93.66, "elapsed_time": "4:07:51", "remaining_time": "0:16:46", "throughput": 8685.44, "total_tokens": 129163112} +{"current_steps": 191700, "total_steps": 204665, "loss": 0.0348, "lr": 2.4352376904970275e-08, "epoch": 4.683262893020301, "percentage": 93.67, "elapsed_time": "4:07:51", "remaining_time": "0:16:45", "throughput": 8685.44, "total_tokens": 129166120} +{"current_steps": 191705, "total_steps": 204665, "loss": 0.0, "lr": 2.4333675337337876e-08, "epoch": 4.683385043852149, "percentage": 93.67, "elapsed_time": "4:07:51", "remaining_time": "0:16:45", "throughput": 8685.47, "total_tokens": 129169576} +{"current_steps": 191710, "total_steps": 204665, "loss": 0.0001, "lr": 2.431498086501338e-08, "epoch": 4.683507194683996, "percentage": 93.67, "elapsed_time": "4:07:52", "remaining_time": "0:16:45", "throughput": 8685.51, "total_tokens": 129173160} +{"current_steps": 191715, "total_steps": 204665, "loss": 0.0, "lr": 2.429629348813278e-08, "epoch": 4.683629345515843, "percentage": 93.67, "elapsed_time": "4:07:52", "remaining_time": "0:16:44", "throughput": 8685.53, "total_tokens": 129176424} +{"current_steps": 191720, "total_steps": 204665, "loss": 0.0, "lr": 2.427761320683208e-08, "epoch": 4.68375149634769, "percentage": 93.68, "elapsed_time": "4:07:52", "remaining_time": "0:16:44", "throughput": 8685.55, "total_tokens": 129179880} +{"current_steps": 191725, "total_steps": 204665, "loss": 0.0, "lr": 2.4258940021246842e-08, "epoch": 4.683873647179537, "percentage": 93.68, "elapsed_time": "4:07:53", "remaining_time": "0:16:43", "throughput": 8685.6, "total_tokens": 129183656} +{"current_steps": 191730, "total_steps": 204665, "loss": 0.0, "lr": 2.4240273931513176e-08, "epoch": 4.683995798011384, "percentage": 93.68, "elapsed_time": "4:07:53", "remaining_time": "0:16:43", "throughput": 8685.63, "total_tokens": 129187112} +{"current_steps": 191735, "total_steps": 204665, "loss": 0.0, "lr": 2.4221614937766643e-08, "epoch": 4.684117948843232, "percentage": 93.68, "elapsed_time": "4:07:54", "remaining_time": "0:16:43", "throughput": 8685.65, "total_tokens": 129190376} +{"current_steps": 191740, "total_steps": 204665, "loss": 0.0, "lr": 2.420296304014291e-08, "epoch": 4.684240099675079, "percentage": 93.68, "elapsed_time": "4:07:54", "remaining_time": "0:16:42", "throughput": 8685.66, "total_tokens": 129193576} +{"current_steps": 191745, "total_steps": 204665, "loss": 0.0, "lr": 2.4184318238777756e-08, "epoch": 4.684362250506926, "percentage": 93.69, "elapsed_time": "4:07:54", "remaining_time": "0:16:42", "throughput": 8685.67, "total_tokens": 129196584} +{"current_steps": 191750, "total_steps": 204665, "loss": 0.0, "lr": 2.4165680533806632e-08, "epoch": 4.684484401338773, "percentage": 93.69, "elapsed_time": "4:07:55", "remaining_time": "0:16:41", "throughput": 8685.7, "total_tokens": 129200104} +{"current_steps": 191755, "total_steps": 204665, "loss": 0.0, "lr": 2.4147049925365314e-08, "epoch": 4.68460655217062, "percentage": 93.69, "elapsed_time": "4:07:55", "remaining_time": "0:16:41", "throughput": 8685.73, "total_tokens": 129203688} +{"current_steps": 191760, "total_steps": 204665, "loss": 0.0265, "lr": 2.412842641358892e-08, "epoch": 4.684728703002468, "percentage": 93.69, "elapsed_time": "4:07:55", "remaining_time": "0:16:41", "throughput": 8685.76, "total_tokens": 129207080} +{"current_steps": 191765, "total_steps": 204665, "loss": 0.0, "lr": 2.410980999861323e-08, "epoch": 4.684850853834314, "percentage": 93.7, "elapsed_time": "4:07:56", "remaining_time": "0:16:40", "throughput": 8685.77, "total_tokens": 129210152} +{"current_steps": 191770, "total_steps": 204665, "loss": 0.0, "lr": 2.4091200680573352e-08, "epoch": 4.684973004666162, "percentage": 93.7, "elapsed_time": "4:07:56", "remaining_time": "0:16:40", "throughput": 8685.8, "total_tokens": 129213608} +{"current_steps": 191775, "total_steps": 204665, "loss": 0.0, "lr": 2.4072598459604743e-08, "epoch": 4.685095155498009, "percentage": 93.7, "elapsed_time": "4:07:56", "remaining_time": "0:16:39", "throughput": 8685.84, "total_tokens": 129217384} +{"current_steps": 191780, "total_steps": 204665, "loss": 0.0001, "lr": 2.4054003335842842e-08, "epoch": 4.685217306329856, "percentage": 93.7, "elapsed_time": "4:07:57", "remaining_time": "0:16:39", "throughput": 8685.87, "total_tokens": 129220840} +{"current_steps": 191785, "total_steps": 204665, "loss": 0.0, "lr": 2.4035415309422657e-08, "epoch": 4.685339457161703, "percentage": 93.71, "elapsed_time": "4:07:57", "remaining_time": "0:16:39", "throughput": 8685.88, "total_tokens": 129223912} +{"current_steps": 191790, "total_steps": 204665, "loss": 0.0, "lr": 2.401683438047941e-08, "epoch": 4.685461607993551, "percentage": 93.71, "elapsed_time": "4:07:57", "remaining_time": "0:16:38", "throughput": 8685.9, "total_tokens": 129227304} +{"current_steps": 191795, "total_steps": 204665, "loss": 0.0, "lr": 2.399826054914822e-08, "epoch": 4.6855837588253975, "percentage": 93.71, "elapsed_time": "4:07:58", "remaining_time": "0:16:38", "throughput": 8685.93, "total_tokens": 129230824} +{"current_steps": 191800, "total_steps": 204665, "loss": 0.0, "lr": 2.3979693815564305e-08, "epoch": 4.685705909657245, "percentage": 93.71, "elapsed_time": "4:07:58", "remaining_time": "0:16:37", "throughput": 8685.95, "total_tokens": 129234024} +{"current_steps": 191805, "total_steps": 204665, "loss": 0.0, "lr": 2.3961134179862564e-08, "epoch": 4.685828060489092, "percentage": 93.72, "elapsed_time": "4:07:58", "remaining_time": "0:16:37", "throughput": 8685.97, "total_tokens": 129237288} +{"current_steps": 191810, "total_steps": 204665, "loss": 0.0, "lr": 2.3942581642177884e-08, "epoch": 4.6859502113209395, "percentage": 93.72, "elapsed_time": "4:07:59", "remaining_time": "0:16:37", "throughput": 8685.99, "total_tokens": 129240680} +{"current_steps": 191815, "total_steps": 204665, "loss": 0.0, "lr": 2.392403620264538e-08, "epoch": 4.686072362152786, "percentage": 93.72, "elapsed_time": "4:07:59", "remaining_time": "0:16:36", "throughput": 8686.0, "total_tokens": 129243880} +{"current_steps": 191820, "total_steps": 204665, "loss": 0.0, "lr": 2.3905497861399616e-08, "epoch": 4.686194512984633, "percentage": 93.72, "elapsed_time": "4:07:59", "remaining_time": "0:16:36", "throughput": 8686.05, "total_tokens": 129247592} +{"current_steps": 191825, "total_steps": 204665, "loss": 0.0, "lr": 2.388696661857581e-08, "epoch": 4.686316663816481, "percentage": 93.73, "elapsed_time": "4:08:00", "remaining_time": "0:16:36", "throughput": 8686.05, "total_tokens": 129250600} +{"current_steps": 191830, "total_steps": 204665, "loss": 0.0, "lr": 2.3868442474308524e-08, "epoch": 4.686438814648328, "percentage": 93.73, "elapsed_time": "4:08:00", "remaining_time": "0:16:35", "throughput": 8686.06, "total_tokens": 129253672} +{"current_steps": 191835, "total_steps": 204665, "loss": 0.0, "lr": 2.384992542873243e-08, "epoch": 4.686560965480175, "percentage": 93.73, "elapsed_time": "4:08:00", "remaining_time": "0:16:35", "throughput": 8686.07, "total_tokens": 129256808} +{"current_steps": 191840, "total_steps": 204665, "loss": 0.0, "lr": 2.3831415481982198e-08, "epoch": 4.686683116312022, "percentage": 93.73, "elapsed_time": "4:08:01", "remaining_time": "0:16:34", "throughput": 8686.08, "total_tokens": 129259944} +{"current_steps": 191845, "total_steps": 204665, "loss": 0.0, "lr": 2.3812912634192495e-08, "epoch": 4.686805267143869, "percentage": 93.74, "elapsed_time": "4:08:01", "remaining_time": "0:16:34", "throughput": 8686.11, "total_tokens": 129263336} +{"current_steps": 191850, "total_steps": 204665, "loss": 0.0246, "lr": 2.379441688549788e-08, "epoch": 4.686927417975716, "percentage": 93.74, "elapsed_time": "4:08:01", "remaining_time": "0:16:34", "throughput": 8686.12, "total_tokens": 129266408} +{"current_steps": 191855, "total_steps": 204665, "loss": 0.0, "lr": 2.3775928236032806e-08, "epoch": 4.687049568807564, "percentage": 93.74, "elapsed_time": "4:08:02", "remaining_time": "0:16:33", "throughput": 8686.15, "total_tokens": 129269928} +{"current_steps": 191860, "total_steps": 204665, "loss": 0.0, "lr": 2.3757446685931826e-08, "epoch": 4.6871717196394105, "percentage": 93.74, "elapsed_time": "4:08:02", "remaining_time": "0:16:33", "throughput": 8686.17, "total_tokens": 129273256} +{"current_steps": 191865, "total_steps": 204665, "loss": 0.0024, "lr": 2.3738972235329168e-08, "epoch": 4.687293870471258, "percentage": 93.75, "elapsed_time": "4:08:02", "remaining_time": "0:16:32", "throughput": 8686.17, "total_tokens": 129276200} +{"current_steps": 191870, "total_steps": 204665, "loss": 0.0, "lr": 2.3720504884359282e-08, "epoch": 4.687416021303105, "percentage": 93.75, "elapsed_time": "4:08:03", "remaining_time": "0:16:32", "throughput": 8686.19, "total_tokens": 129279400} +{"current_steps": 191875, "total_steps": 204665, "loss": 0.0, "lr": 2.3702044633156503e-08, "epoch": 4.6875381721349525, "percentage": 93.75, "elapsed_time": "4:08:03", "remaining_time": "0:16:32", "throughput": 8686.21, "total_tokens": 129282664} +{"current_steps": 191880, "total_steps": 204665, "loss": 0.0, "lr": 2.3683591481855058e-08, "epoch": 4.687660322966799, "percentage": 93.75, "elapsed_time": "4:08:04", "remaining_time": "0:16:31", "throughput": 8686.22, "total_tokens": 129285864} +{"current_steps": 191885, "total_steps": 204665, "loss": 0.0, "lr": 2.3665145430589173e-08, "epoch": 4.687782473798647, "percentage": 93.76, "elapsed_time": "4:08:04", "remaining_time": "0:16:31", "throughput": 8686.24, "total_tokens": 129289128} +{"current_steps": 191890, "total_steps": 204665, "loss": 0.0, "lr": 2.364670647949285e-08, "epoch": 4.687904624630494, "percentage": 93.76, "elapsed_time": "4:08:04", "remaining_time": "0:16:30", "throughput": 8686.24, "total_tokens": 129292008} +{"current_steps": 191895, "total_steps": 204665, "loss": 0.0, "lr": 2.3628274628700318e-08, "epoch": 4.688026775462341, "percentage": 93.76, "elapsed_time": "4:08:05", "remaining_time": "0:16:30", "throughput": 8686.26, "total_tokens": 129295336} +{"current_steps": 191900, "total_steps": 204665, "loss": 0.0185, "lr": 2.3609849878345577e-08, "epoch": 4.688148926294188, "percentage": 93.76, "elapsed_time": "4:08:05", "remaining_time": "0:16:30", "throughput": 8686.3, "total_tokens": 129298984} +{"current_steps": 191905, "total_steps": 204665, "loss": 0.0, "lr": 2.3591432228562634e-08, "epoch": 4.688271077126036, "percentage": 93.77, "elapsed_time": "4:08:05", "remaining_time": "0:16:29", "throughput": 8686.35, "total_tokens": 129302824} +{"current_steps": 191910, "total_steps": 204665, "loss": 0.0, "lr": 2.3573021679485495e-08, "epoch": 4.688393227957882, "percentage": 93.77, "elapsed_time": "4:08:06", "remaining_time": "0:16:29", "throughput": 8686.38, "total_tokens": 129306408} +{"current_steps": 191915, "total_steps": 204665, "loss": 0.0, "lr": 2.3554618231247934e-08, "epoch": 4.688515378789729, "percentage": 93.77, "elapsed_time": "4:08:06", "remaining_time": "0:16:28", "throughput": 8686.42, "total_tokens": 129309992} +{"current_steps": 191920, "total_steps": 204665, "loss": 0.0001, "lr": 2.3536221883983854e-08, "epoch": 4.688637529621577, "percentage": 93.77, "elapsed_time": "4:08:06", "remaining_time": "0:16:28", "throughput": 8686.45, "total_tokens": 129313576} +{"current_steps": 191925, "total_steps": 204665, "loss": 0.0, "lr": 2.3517832637826806e-08, "epoch": 4.6887596804534235, "percentage": 93.78, "elapsed_time": "4:08:07", "remaining_time": "0:16:28", "throughput": 8686.49, "total_tokens": 129317160} +{"current_steps": 191930, "total_steps": 204665, "loss": 0.0, "lr": 2.349945049291091e-08, "epoch": 4.688881831285271, "percentage": 93.78, "elapsed_time": "4:08:07", "remaining_time": "0:16:27", "throughput": 8686.5, "total_tokens": 129320232} +{"current_steps": 191935, "total_steps": 204665, "loss": 0.0, "lr": 2.3481075449369614e-08, "epoch": 4.689003982117118, "percentage": 93.78, "elapsed_time": "4:08:07", "remaining_time": "0:16:27", "throughput": 8686.49, "total_tokens": 129323112} +{"current_steps": 191940, "total_steps": 204665, "loss": 0.0022, "lr": 2.34627075073367e-08, "epoch": 4.6891261329489655, "percentage": 93.78, "elapsed_time": "4:08:08", "remaining_time": "0:16:27", "throughput": 8686.52, "total_tokens": 129326504} +{"current_steps": 191945, "total_steps": 204665, "loss": 0.0, "lr": 2.3444346666945503e-08, "epoch": 4.689248283780812, "percentage": 93.78, "elapsed_time": "4:08:08", "remaining_time": "0:16:26", "throughput": 8686.52, "total_tokens": 129329512} +{"current_steps": 191950, "total_steps": 204665, "loss": 0.0, "lr": 2.3425992928329695e-08, "epoch": 4.68937043461266, "percentage": 93.79, "elapsed_time": "4:08:08", "remaining_time": "0:16:26", "throughput": 8686.58, "total_tokens": 129333480} +{"current_steps": 191955, "total_steps": 204665, "loss": 0.0, "lr": 2.340764629162284e-08, "epoch": 4.689492585444507, "percentage": 93.79, "elapsed_time": "4:08:09", "remaining_time": "0:16:25", "throughput": 8686.6, "total_tokens": 129336872} +{"current_steps": 191960, "total_steps": 204665, "loss": 0.0, "lr": 2.338930675695805e-08, "epoch": 4.689614736276354, "percentage": 93.79, "elapsed_time": "4:08:09", "remaining_time": "0:16:25", "throughput": 8686.62, "total_tokens": 129340072} +{"current_steps": 191965, "total_steps": 204665, "loss": 0.0, "lr": 2.3370974324468997e-08, "epoch": 4.689736887108201, "percentage": 93.79, "elapsed_time": "4:08:09", "remaining_time": "0:16:25", "throughput": 8686.66, "total_tokens": 129343848} +{"current_steps": 191970, "total_steps": 204665, "loss": 0.0224, "lr": 2.3352648994288905e-08, "epoch": 4.689859037940049, "percentage": 93.8, "elapsed_time": "4:08:10", "remaining_time": "0:16:24", "throughput": 8686.69, "total_tokens": 129347368} +{"current_steps": 191975, "total_steps": 204665, "loss": 0.0, "lr": 2.3334330766551002e-08, "epoch": 4.689981188771895, "percentage": 93.8, "elapsed_time": "4:08:10", "remaining_time": "0:16:24", "throughput": 8686.73, "total_tokens": 129350888} +{"current_steps": 191980, "total_steps": 204665, "loss": 0.0, "lr": 2.331601964138863e-08, "epoch": 4.690103339603743, "percentage": 93.8, "elapsed_time": "4:08:10", "remaining_time": "0:16:23", "throughput": 8686.78, "total_tokens": 129354728} +{"current_steps": 191985, "total_steps": 204665, "loss": 0.0, "lr": 2.329771561893479e-08, "epoch": 4.69022549043559, "percentage": 93.8, "elapsed_time": "4:08:11", "remaining_time": "0:16:23", "throughput": 8686.79, "total_tokens": 129357864} +{"current_steps": 191990, "total_steps": 204665, "loss": 0.0, "lr": 2.3279418699322594e-08, "epoch": 4.690347641267437, "percentage": 93.81, "elapsed_time": "4:08:11", "remaining_time": "0:16:23", "throughput": 8686.84, "total_tokens": 129361832} +{"current_steps": 191995, "total_steps": 204665, "loss": 0.0, "lr": 2.3261128882685275e-08, "epoch": 4.690469792099284, "percentage": 93.81, "elapsed_time": "4:08:12", "remaining_time": "0:16:22", "throughput": 8686.86, "total_tokens": 129365096} +{"current_steps": 192000, "total_steps": 204665, "loss": 0.0, "lr": 2.3242846169155728e-08, "epoch": 4.690591942931132, "percentage": 93.81, "elapsed_time": "4:08:12", "remaining_time": "0:16:22", "throughput": 8686.9, "total_tokens": 129368872} +{"current_steps": 192005, "total_steps": 204665, "loss": 0.0, "lr": 2.3224570558866952e-08, "epoch": 4.6907140937629785, "percentage": 93.81, "elapsed_time": "4:08:12", "remaining_time": "0:16:21", "throughput": 8686.91, "total_tokens": 129371944} +{"current_steps": 192010, "total_steps": 204665, "loss": 0.0, "lr": 2.320630205195173e-08, "epoch": 4.690836244594825, "percentage": 93.82, "elapsed_time": "4:08:13", "remaining_time": "0:16:21", "throughput": 8686.93, "total_tokens": 129375272} +{"current_steps": 192015, "total_steps": 204665, "loss": 0.0, "lr": 2.3188040648543073e-08, "epoch": 4.690958395426673, "percentage": 93.82, "elapsed_time": "4:08:13", "remaining_time": "0:16:21", "throughput": 8686.96, "total_tokens": 129378792} +{"current_steps": 192020, "total_steps": 204665, "loss": 0.0, "lr": 2.3169786348773644e-08, "epoch": 4.69108054625852, "percentage": 93.82, "elapsed_time": "4:08:13", "remaining_time": "0:16:20", "throughput": 8687.01, "total_tokens": 129382568} +{"current_steps": 192025, "total_steps": 204665, "loss": 0.0, "lr": 2.3151539152776345e-08, "epoch": 4.691202697090367, "percentage": 93.82, "elapsed_time": "4:08:14", "remaining_time": "0:16:20", "throughput": 8687.02, "total_tokens": 129385832} +{"current_steps": 192030, "total_steps": 204665, "loss": 0.0, "lr": 2.3133299060683732e-08, "epoch": 4.691324847922214, "percentage": 93.83, "elapsed_time": "4:08:14", "remaining_time": "0:16:20", "throughput": 8687.05, "total_tokens": 129389224} +{"current_steps": 192035, "total_steps": 204665, "loss": 0.0, "lr": 2.3115066072628585e-08, "epoch": 4.691446998754062, "percentage": 93.83, "elapsed_time": "4:08:14", "remaining_time": "0:16:19", "throughput": 8687.06, "total_tokens": 129392424} +{"current_steps": 192040, "total_steps": 204665, "loss": 0.0, "lr": 2.309684018874336e-08, "epoch": 4.691569149585908, "percentage": 93.83, "elapsed_time": "4:08:15", "remaining_time": "0:16:19", "throughput": 8687.09, "total_tokens": 129395880} +{"current_steps": 192045, "total_steps": 204665, "loss": 0.0, "lr": 2.3078621409160727e-08, "epoch": 4.691691300417756, "percentage": 93.83, "elapsed_time": "4:08:15", "remaining_time": "0:16:18", "throughput": 8687.12, "total_tokens": 129399464} +{"current_steps": 192050, "total_steps": 204665, "loss": 0.0, "lr": 2.306040973401313e-08, "epoch": 4.691813451249603, "percentage": 93.84, "elapsed_time": "4:08:15", "remaining_time": "0:16:18", "throughput": 8687.14, "total_tokens": 129402792} +{"current_steps": 192055, "total_steps": 204665, "loss": 0.0675, "lr": 2.3042205163432914e-08, "epoch": 4.69193560208145, "percentage": 93.84, "elapsed_time": "4:08:16", "remaining_time": "0:16:18", "throughput": 8687.17, "total_tokens": 129406248} +{"current_steps": 192060, "total_steps": 204665, "loss": 0.0, "lr": 2.302400769755264e-08, "epoch": 4.692057752913297, "percentage": 93.84, "elapsed_time": "4:08:16", "remaining_time": "0:16:17", "throughput": 8687.2, "total_tokens": 129409640} +{"current_steps": 192065, "total_steps": 204665, "loss": 0.0, "lr": 2.300581733650453e-08, "epoch": 4.692179903745145, "percentage": 93.84, "elapsed_time": "4:08:16", "remaining_time": "0:16:17", "throughput": 8687.23, "total_tokens": 129413224} +{"current_steps": 192070, "total_steps": 204665, "loss": 0.0, "lr": 2.2987634080420815e-08, "epoch": 4.692302054576992, "percentage": 93.85, "elapsed_time": "4:08:17", "remaining_time": "0:16:16", "throughput": 8687.27, "total_tokens": 129416808} +{"current_steps": 192075, "total_steps": 204665, "loss": 0.0, "lr": 2.2969457929433946e-08, "epoch": 4.692424205408839, "percentage": 93.85, "elapsed_time": "4:08:17", "remaining_time": "0:16:16", "throughput": 8687.32, "total_tokens": 129420712} +{"current_steps": 192080, "total_steps": 204665, "loss": 0.0, "lr": 2.295128888367581e-08, "epoch": 4.692546356240686, "percentage": 93.85, "elapsed_time": "4:08:18", "remaining_time": "0:16:16", "throughput": 8687.38, "total_tokens": 129424680} +{"current_steps": 192085, "total_steps": 204665, "loss": 0.0, "lr": 2.2933126943278758e-08, "epoch": 4.692668507072533, "percentage": 93.85, "elapsed_time": "4:08:18", "remaining_time": "0:16:15", "throughput": 8687.4, "total_tokens": 129428072} +{"current_steps": 192090, "total_steps": 204665, "loss": 0.0, "lr": 2.2914972108374896e-08, "epoch": 4.69279065790438, "percentage": 93.86, "elapsed_time": "4:08:18", "remaining_time": "0:16:15", "throughput": 8687.43, "total_tokens": 129431528} +{"current_steps": 192095, "total_steps": 204665, "loss": 0.0, "lr": 2.2896824379096014e-08, "epoch": 4.692912808736228, "percentage": 93.86, "elapsed_time": "4:08:19", "remaining_time": "0:16:14", "throughput": 8687.46, "total_tokens": 129434984} +{"current_steps": 192100, "total_steps": 204665, "loss": 0.0, "lr": 2.2878683755574446e-08, "epoch": 4.693034959568075, "percentage": 93.86, "elapsed_time": "4:08:19", "remaining_time": "0:16:14", "throughput": 8687.49, "total_tokens": 129438568} +{"current_steps": 192105, "total_steps": 204665, "loss": 0.0, "lr": 2.2860550237941644e-08, "epoch": 4.6931571103999215, "percentage": 93.86, "elapsed_time": "4:08:19", "remaining_time": "0:16:14", "throughput": 8687.54, "total_tokens": 129442408} +{"current_steps": 192110, "total_steps": 204665, "loss": 0.0, "lr": 2.284242382632995e-08, "epoch": 4.693279261231769, "percentage": 93.87, "elapsed_time": "4:08:20", "remaining_time": "0:16:13", "throughput": 8687.57, "total_tokens": 129445736} +{"current_steps": 192115, "total_steps": 204665, "loss": 0.0, "lr": 2.2824304520870808e-08, "epoch": 4.693401412063616, "percentage": 93.87, "elapsed_time": "4:08:20", "remaining_time": "0:16:13", "throughput": 8687.57, "total_tokens": 129448808} +{"current_steps": 192120, "total_steps": 204665, "loss": 0.0001, "lr": 2.2806192321696225e-08, "epoch": 4.6935235628954635, "percentage": 93.87, "elapsed_time": "4:08:20", "remaining_time": "0:16:12", "throughput": 8687.59, "total_tokens": 129452136} +{"current_steps": 192125, "total_steps": 204665, "loss": 0.0, "lr": 2.278808722893788e-08, "epoch": 4.69364571372731, "percentage": 93.87, "elapsed_time": "4:08:21", "remaining_time": "0:16:12", "throughput": 8687.62, "total_tokens": 129455464} +{"current_steps": 192130, "total_steps": 204665, "loss": 0.0, "lr": 2.2769989242727328e-08, "epoch": 4.693767864559158, "percentage": 93.88, "elapsed_time": "4:08:21", "remaining_time": "0:16:12", "throughput": 8687.62, "total_tokens": 129458472} +{"current_steps": 192135, "total_steps": 204665, "loss": 0.0, "lr": 2.2751898363196354e-08, "epoch": 4.693890015391005, "percentage": 93.88, "elapsed_time": "4:08:21", "remaining_time": "0:16:11", "throughput": 8687.66, "total_tokens": 129462120} +{"current_steps": 192140, "total_steps": 204665, "loss": 0.0, "lr": 2.273381459047641e-08, "epoch": 4.694012166222852, "percentage": 93.88, "elapsed_time": "4:08:22", "remaining_time": "0:16:11", "throughput": 8687.7, "total_tokens": 129465832} +{"current_steps": 192145, "total_steps": 204665, "loss": 0.0707, "lr": 2.271573792469905e-08, "epoch": 4.694134317054699, "percentage": 93.88, "elapsed_time": "4:08:22", "remaining_time": "0:16:11", "throughput": 8687.71, "total_tokens": 129468968} +{"current_steps": 192150, "total_steps": 204665, "loss": 0.0, "lr": 2.2697668365995514e-08, "epoch": 4.694256467886547, "percentage": 93.89, "elapsed_time": "4:08:22", "remaining_time": "0:16:10", "throughput": 8687.74, "total_tokens": 129472296} +{"current_steps": 192155, "total_steps": 204665, "loss": 0.0002, "lr": 2.2679605914497578e-08, "epoch": 4.694378618718393, "percentage": 93.89, "elapsed_time": "4:08:23", "remaining_time": "0:16:10", "throughput": 8687.74, "total_tokens": 129475368} +{"current_steps": 192160, "total_steps": 204665, "loss": 0.0, "lr": 2.2661550570336473e-08, "epoch": 4.694500769550241, "percentage": 93.89, "elapsed_time": "4:08:23", "remaining_time": "0:16:09", "throughput": 8687.76, "total_tokens": 129478696} +{"current_steps": 192165, "total_steps": 204665, "loss": 0.0, "lr": 2.2643502333643205e-08, "epoch": 4.694622920382088, "percentage": 93.89, "elapsed_time": "4:08:23", "remaining_time": "0:16:09", "throughput": 8687.79, "total_tokens": 129482088} +{"current_steps": 192170, "total_steps": 204665, "loss": 0.0, "lr": 2.2625461204549444e-08, "epoch": 4.694745071213935, "percentage": 93.89, "elapsed_time": "4:08:24", "remaining_time": "0:16:09", "throughput": 8687.81, "total_tokens": 129485416} +{"current_steps": 192175, "total_steps": 204665, "loss": 0.0, "lr": 2.26074271831862e-08, "epoch": 4.694867222045782, "percentage": 93.9, "elapsed_time": "4:08:24", "remaining_time": "0:16:08", "throughput": 8687.83, "total_tokens": 129488808} +{"current_steps": 192180, "total_steps": 204665, "loss": 0.0359, "lr": 2.2589400269684477e-08, "epoch": 4.694989372877629, "percentage": 93.9, "elapsed_time": "4:08:24", "remaining_time": "0:16:08", "throughput": 8687.86, "total_tokens": 129492200} +{"current_steps": 192185, "total_steps": 204665, "loss": 0.0, "lr": 2.2571380464175725e-08, "epoch": 4.6951115237094765, "percentage": 93.9, "elapsed_time": "4:08:25", "remaining_time": "0:16:07", "throughput": 8687.88, "total_tokens": 129495464} +{"current_steps": 192190, "total_steps": 204665, "loss": 0.0, "lr": 2.2553367766790622e-08, "epoch": 4.695233674541324, "percentage": 93.9, "elapsed_time": "4:08:25", "remaining_time": "0:16:07", "throughput": 8687.92, "total_tokens": 129499112} +{"current_steps": 192195, "total_steps": 204665, "loss": 0.0, "lr": 2.25353621776605e-08, "epoch": 4.695355825373171, "percentage": 93.91, "elapsed_time": "4:08:26", "remaining_time": "0:16:07", "throughput": 8687.96, "total_tokens": 129502824} +{"current_steps": 192200, "total_steps": 204665, "loss": 0.0667, "lr": 2.2517363696916037e-08, "epoch": 4.695477976205018, "percentage": 93.91, "elapsed_time": "4:08:26", "remaining_time": "0:16:06", "throughput": 8687.98, "total_tokens": 129506088} +{"current_steps": 192205, "total_steps": 204665, "loss": 0.0, "lr": 2.2499372324688125e-08, "epoch": 4.695600127036865, "percentage": 93.91, "elapsed_time": "4:08:26", "remaining_time": "0:16:06", "throughput": 8687.99, "total_tokens": 129509352} +{"current_steps": 192210, "total_steps": 204665, "loss": 0.0029, "lr": 2.2481388061107888e-08, "epoch": 4.695722277868712, "percentage": 93.91, "elapsed_time": "4:08:27", "remaining_time": "0:16:05", "throughput": 8688.04, "total_tokens": 129513064} +{"current_steps": 192215, "total_steps": 204665, "loss": 0.0354, "lr": 2.2463410906305768e-08, "epoch": 4.69584442870056, "percentage": 93.92, "elapsed_time": "4:08:27", "remaining_time": "0:16:05", "throughput": 8688.05, "total_tokens": 129516264} +{"current_steps": 192220, "total_steps": 204665, "loss": 0.0, "lr": 2.2445440860412777e-08, "epoch": 4.695966579532406, "percentage": 93.92, "elapsed_time": "4:08:27", "remaining_time": "0:16:05", "throughput": 8688.09, "total_tokens": 129519976} +{"current_steps": 192225, "total_steps": 204665, "loss": 0.0002, "lr": 2.242747792355937e-08, "epoch": 4.696088730364254, "percentage": 93.92, "elapsed_time": "4:08:28", "remaining_time": "0:16:04", "throughput": 8688.14, "total_tokens": 129523816} +{"current_steps": 192230, "total_steps": 204665, "loss": 0.0, "lr": 2.240952209587632e-08, "epoch": 4.696210881196101, "percentage": 93.92, "elapsed_time": "4:08:28", "remaining_time": "0:16:04", "throughput": 8688.16, "total_tokens": 129527080} +{"current_steps": 192235, "total_steps": 204665, "loss": 0.0, "lr": 2.239157337749409e-08, "epoch": 4.696333032027948, "percentage": 93.93, "elapsed_time": "4:08:28", "remaining_time": "0:16:04", "throughput": 8688.18, "total_tokens": 129530408} +{"current_steps": 192240, "total_steps": 204665, "loss": 0.0, "lr": 2.2373631768543344e-08, "epoch": 4.696455182859795, "percentage": 93.93, "elapsed_time": "4:08:29", "remaining_time": "0:16:03", "throughput": 8688.18, "total_tokens": 129533416} +{"current_steps": 192245, "total_steps": 204665, "loss": 0.0, "lr": 2.2355697269154537e-08, "epoch": 4.696577333691643, "percentage": 93.93, "elapsed_time": "4:08:29", "remaining_time": "0:16:03", "throughput": 8688.22, "total_tokens": 129537064} +{"current_steps": 192250, "total_steps": 204665, "loss": 0.0, "lr": 2.2337769879458014e-08, "epoch": 4.6966994845234895, "percentage": 93.93, "elapsed_time": "4:08:29", "remaining_time": "0:16:02", "throughput": 8688.24, "total_tokens": 129540264} +{"current_steps": 192255, "total_steps": 204665, "loss": 0.0, "lr": 2.231984959958422e-08, "epoch": 4.696821635355337, "percentage": 93.94, "elapsed_time": "4:08:30", "remaining_time": "0:16:02", "throughput": 8688.29, "total_tokens": 129544104} +{"current_steps": 192260, "total_steps": 204665, "loss": 0.0, "lr": 2.230193642966338e-08, "epoch": 4.696943786187184, "percentage": 93.94, "elapsed_time": "4:08:30", "remaining_time": "0:16:02", "throughput": 8688.29, "total_tokens": 129547176} +{"current_steps": 192265, "total_steps": 204665, "loss": 0.0, "lr": 2.2284030369825956e-08, "epoch": 4.6970659370190315, "percentage": 93.94, "elapsed_time": "4:08:30", "remaining_time": "0:16:01", "throughput": 8688.29, "total_tokens": 129550056} +{"current_steps": 192270, "total_steps": 204665, "loss": 0.0004, "lr": 2.226613142020195e-08, "epoch": 4.697188087850878, "percentage": 93.94, "elapsed_time": "4:08:31", "remaining_time": "0:16:01", "throughput": 8688.31, "total_tokens": 129553448} +{"current_steps": 192275, "total_steps": 204665, "loss": 0.0, "lr": 2.2248239580921478e-08, "epoch": 4.697310238682725, "percentage": 93.95, "elapsed_time": "4:08:31", "remaining_time": "0:16:00", "throughput": 8688.35, "total_tokens": 129556968} +{"current_steps": 192280, "total_steps": 204665, "loss": 0.0, "lr": 2.2230354852114998e-08, "epoch": 4.697432389514573, "percentage": 93.95, "elapsed_time": "4:08:31", "remaining_time": "0:16:00", "throughput": 8688.37, "total_tokens": 129560296} +{"current_steps": 192285, "total_steps": 204665, "loss": 0.0, "lr": 2.2212477233912285e-08, "epoch": 4.697554540346419, "percentage": 93.95, "elapsed_time": "4:08:32", "remaining_time": "0:16:00", "throughput": 8688.42, "total_tokens": 129564136} +{"current_steps": 192290, "total_steps": 204665, "loss": 0.0, "lr": 2.2194606726443465e-08, "epoch": 4.697676691178267, "percentage": 93.95, "elapsed_time": "4:08:32", "remaining_time": "0:15:59", "throughput": 8688.44, "total_tokens": 129567528} +{"current_steps": 192295, "total_steps": 204665, "loss": 0.0, "lr": 2.2176743329838433e-08, "epoch": 4.697798842010114, "percentage": 93.96, "elapsed_time": "4:08:32", "remaining_time": "0:15:59", "throughput": 8688.47, "total_tokens": 129570984} +{"current_steps": 192300, "total_steps": 204665, "loss": 0.0, "lr": 2.215888704422708e-08, "epoch": 4.697920992841961, "percentage": 93.96, "elapsed_time": "4:08:33", "remaining_time": "0:15:58", "throughput": 8688.47, "total_tokens": 129573992} +{"current_steps": 192305, "total_steps": 204665, "loss": 0.0, "lr": 2.214103786973931e-08, "epoch": 4.698043143673808, "percentage": 93.96, "elapsed_time": "4:08:33", "remaining_time": "0:15:58", "throughput": 8688.55, "total_tokens": 129578472} +{"current_steps": 192310, "total_steps": 204665, "loss": 0.0, "lr": 2.2123195806505013e-08, "epoch": 4.698165294505656, "percentage": 93.96, "elapsed_time": "4:08:34", "remaining_time": "0:15:58", "throughput": 8688.56, "total_tokens": 129581544} +{"current_steps": 192315, "total_steps": 204665, "loss": 0.0, "lr": 2.2105360854653865e-08, "epoch": 4.6982874453375025, "percentage": 93.97, "elapsed_time": "4:08:34", "remaining_time": "0:15:57", "throughput": 8688.58, "total_tokens": 129584744} +{"current_steps": 192320, "total_steps": 204665, "loss": 0.0, "lr": 2.2087533014315428e-08, "epoch": 4.69840959616935, "percentage": 93.97, "elapsed_time": "4:08:34", "remaining_time": "0:15:57", "throughput": 8688.6, "total_tokens": 129588072} +{"current_steps": 192325, "total_steps": 204665, "loss": 0.0, "lr": 2.2069712285619602e-08, "epoch": 4.698531747001197, "percentage": 93.97, "elapsed_time": "4:08:35", "remaining_time": "0:15:56", "throughput": 8688.6, "total_tokens": 129591016} +{"current_steps": 192330, "total_steps": 204665, "loss": 0.0001, "lr": 2.2051898668695724e-08, "epoch": 4.6986538978330445, "percentage": 93.97, "elapsed_time": "4:08:35", "remaining_time": "0:15:56", "throughput": 8688.63, "total_tokens": 129594536} +{"current_steps": 192335, "total_steps": 204665, "loss": 0.0, "lr": 2.203409216367358e-08, "epoch": 4.698776048664891, "percentage": 93.98, "elapsed_time": "4:08:35", "remaining_time": "0:15:56", "throughput": 8688.66, "total_tokens": 129597928} +{"current_steps": 192340, "total_steps": 204665, "loss": 0.0, "lr": 2.201629277068251e-08, "epoch": 4.698898199496739, "percentage": 93.98, "elapsed_time": "4:08:36", "remaining_time": "0:15:55", "throughput": 8688.66, "total_tokens": 129600872} +{"current_steps": 192345, "total_steps": 204665, "loss": 0.0, "lr": 2.1998500489852077e-08, "epoch": 4.699020350328586, "percentage": 93.98, "elapsed_time": "4:08:36", "remaining_time": "0:15:55", "throughput": 8688.71, "total_tokens": 129604776} +{"current_steps": 192350, "total_steps": 204665, "loss": 0.0, "lr": 2.1980715321311515e-08, "epoch": 4.699142501160432, "percentage": 93.98, "elapsed_time": "4:08:36", "remaining_time": "0:15:55", "throughput": 8688.72, "total_tokens": 129607848} +{"current_steps": 192355, "total_steps": 204665, "loss": 0.0, "lr": 2.1962937265190385e-08, "epoch": 4.69926465199228, "percentage": 93.99, "elapsed_time": "4:08:37", "remaining_time": "0:15:54", "throughput": 8688.74, "total_tokens": 129611240} +{"current_steps": 192360, "total_steps": 204665, "loss": 0.0, "lr": 2.194516632161769e-08, "epoch": 4.699386802824128, "percentage": 93.99, "elapsed_time": "4:08:37", "remaining_time": "0:15:54", "throughput": 8688.76, "total_tokens": 129614504} +{"current_steps": 192365, "total_steps": 204665, "loss": 0.0, "lr": 2.192740249072289e-08, "epoch": 4.699508953655974, "percentage": 93.99, "elapsed_time": "4:08:37", "remaining_time": "0:15:53", "throughput": 8688.8, "total_tokens": 129618088} +{"current_steps": 192370, "total_steps": 204665, "loss": 0.0, "lr": 2.1909645772634988e-08, "epoch": 4.699631104487821, "percentage": 93.99, "elapsed_time": "4:08:38", "remaining_time": "0:15:53", "throughput": 8688.83, "total_tokens": 129621608} +{"current_steps": 192375, "total_steps": 204665, "loss": 0.0, "lr": 2.1891896167483327e-08, "epoch": 4.699753255319669, "percentage": 94.0, "elapsed_time": "4:08:38", "remaining_time": "0:15:53", "throughput": 8688.87, "total_tokens": 129625192} +{"current_steps": 192380, "total_steps": 204665, "loss": 0.0, "lr": 2.1874153675396802e-08, "epoch": 4.6998754061515156, "percentage": 94.0, "elapsed_time": "4:08:38", "remaining_time": "0:15:52", "throughput": 8688.88, "total_tokens": 129628328} +{"current_steps": 192385, "total_steps": 204665, "loss": 0.0, "lr": 2.1856418296504642e-08, "epoch": 4.699997556983363, "percentage": 94.0, "elapsed_time": "4:08:39", "remaining_time": "0:15:52", "throughput": 8688.9, "total_tokens": 129631720} +{"current_steps": 192390, "total_steps": 204665, "loss": 0.0, "lr": 2.1838690030935524e-08, "epoch": 4.70011970781521, "percentage": 94.0, "elapsed_time": "4:08:39", "remaining_time": "0:15:51", "throughput": 8688.97, "total_tokens": 129635880} +{"current_steps": 192395, "total_steps": 204665, "loss": 0.0, "lr": 2.1820968878818567e-08, "epoch": 4.700241858647058, "percentage": 94.0, "elapsed_time": "4:08:39", "remaining_time": "0:15:51", "throughput": 8688.98, "total_tokens": 129639144} +{"current_steps": 192400, "total_steps": 204665, "loss": 0.0, "lr": 2.180325484028278e-08, "epoch": 4.700364009478904, "percentage": 94.01, "elapsed_time": "4:08:40", "remaining_time": "0:15:51", "throughput": 8689.0, "total_tokens": 129642344} +{"current_steps": 192405, "total_steps": 204665, "loss": 0.0, "lr": 2.1785547915456727e-08, "epoch": 4.700486160310752, "percentage": 94.01, "elapsed_time": "4:08:40", "remaining_time": "0:15:50", "throughput": 8689.01, "total_tokens": 129645416} +{"current_steps": 192410, "total_steps": 204665, "loss": 0.0, "lr": 2.17678481044693e-08, "epoch": 4.700608311142599, "percentage": 94.01, "elapsed_time": "4:08:40", "remaining_time": "0:15:50", "throughput": 8689.05, "total_tokens": 129649128} +{"current_steps": 192415, "total_steps": 204665, "loss": 0.0, "lr": 2.1750155407449178e-08, "epoch": 4.700730461974446, "percentage": 94.01, "elapsed_time": "4:08:41", "remaining_time": "0:15:49", "throughput": 8689.07, "total_tokens": 129652456} +{"current_steps": 192420, "total_steps": 204665, "loss": 0.0, "lr": 2.1732469824525035e-08, "epoch": 4.700852612806293, "percentage": 94.02, "elapsed_time": "4:08:41", "remaining_time": "0:15:49", "throughput": 8689.07, "total_tokens": 129655464} +{"current_steps": 192425, "total_steps": 204665, "loss": 0.0, "lr": 2.1714791355825434e-08, "epoch": 4.700974763638141, "percentage": 94.02, "elapsed_time": "4:08:42", "remaining_time": "0:15:49", "throughput": 8689.11, "total_tokens": 129659112} +{"current_steps": 192430, "total_steps": 204665, "loss": 0.0, "lr": 2.1697120001479053e-08, "epoch": 4.7010969144699875, "percentage": 94.02, "elapsed_time": "4:08:42", "remaining_time": "0:15:48", "throughput": 8689.15, "total_tokens": 129662888} +{"current_steps": 192435, "total_steps": 204665, "loss": 0.0, "lr": 2.167945576161434e-08, "epoch": 4.701219065301835, "percentage": 94.02, "elapsed_time": "4:08:42", "remaining_time": "0:15:48", "throughput": 8689.17, "total_tokens": 129666216} +{"current_steps": 192440, "total_steps": 204665, "loss": 0.017, "lr": 2.166179863635975e-08, "epoch": 4.701341216133682, "percentage": 94.03, "elapsed_time": "4:08:43", "remaining_time": "0:15:48", "throughput": 8689.19, "total_tokens": 129669608} +{"current_steps": 192445, "total_steps": 204665, "loss": 0.0, "lr": 2.164414862584385e-08, "epoch": 4.701463366965529, "percentage": 94.03, "elapsed_time": "4:08:43", "remaining_time": "0:15:47", "throughput": 8689.2, "total_tokens": 129672744} +{"current_steps": 192450, "total_steps": 204665, "loss": 0.0, "lr": 2.1626505730194645e-08, "epoch": 4.701585517797376, "percentage": 94.03, "elapsed_time": "4:08:43", "remaining_time": "0:15:47", "throughput": 8689.25, "total_tokens": 129676648} +{"current_steps": 192455, "total_steps": 204665, "loss": 0.0, "lr": 2.1608869949540808e-08, "epoch": 4.701707668629224, "percentage": 94.03, "elapsed_time": "4:08:44", "remaining_time": "0:15:46", "throughput": 8689.28, "total_tokens": 129680104} +{"current_steps": 192460, "total_steps": 204665, "loss": 0.0724, "lr": 2.1591241284010242e-08, "epoch": 4.701829819461071, "percentage": 94.04, "elapsed_time": "4:08:44", "remaining_time": "0:15:46", "throughput": 8689.29, "total_tokens": 129683304} +{"current_steps": 192465, "total_steps": 204665, "loss": 0.0002, "lr": 2.1573619733731507e-08, "epoch": 4.701951970292917, "percentage": 94.04, "elapsed_time": "4:08:44", "remaining_time": "0:15:46", "throughput": 8689.3, "total_tokens": 129686312} +{"current_steps": 192470, "total_steps": 204665, "loss": 0.0, "lr": 2.1556005298832502e-08, "epoch": 4.702074121124765, "percentage": 94.04, "elapsed_time": "4:08:45", "remaining_time": "0:15:45", "throughput": 8689.36, "total_tokens": 129690408} +{"current_steps": 192475, "total_steps": 204665, "loss": 0.0, "lr": 2.1538397979441348e-08, "epoch": 4.702196271956612, "percentage": 94.04, "elapsed_time": "4:08:45", "remaining_time": "0:15:45", "throughput": 8689.38, "total_tokens": 129693800} +{"current_steps": 192480, "total_steps": 204665, "loss": 0.0, "lr": 2.1520797775686273e-08, "epoch": 4.702318422788459, "percentage": 94.05, "elapsed_time": "4:08:45", "remaining_time": "0:15:44", "throughput": 8689.4, "total_tokens": 129697064} +{"current_steps": 192485, "total_steps": 204665, "loss": 0.0, "lr": 2.1503204687694952e-08, "epoch": 4.702440573620306, "percentage": 94.05, "elapsed_time": "4:08:46", "remaining_time": "0:15:44", "throughput": 8689.41, "total_tokens": 129700264} +{"current_steps": 192490, "total_steps": 204665, "loss": 0.0005, "lr": 2.148561871559562e-08, "epoch": 4.702562724452154, "percentage": 94.05, "elapsed_time": "4:08:46", "remaining_time": "0:15:44", "throughput": 8689.42, "total_tokens": 129703400} +{"current_steps": 192495, "total_steps": 204665, "loss": 0.0, "lr": 2.1468039859516062e-08, "epoch": 4.7026848752840005, "percentage": 94.05, "elapsed_time": "4:08:46", "remaining_time": "0:15:43", "throughput": 8689.44, "total_tokens": 129706600} +{"current_steps": 192500, "total_steps": 204665, "loss": 0.0, "lr": 2.1450468119584066e-08, "epoch": 4.702807026115848, "percentage": 94.06, "elapsed_time": "4:08:47", "remaining_time": "0:15:43", "throughput": 8689.46, "total_tokens": 129710056} +{"current_steps": 192505, "total_steps": 204665, "loss": 0.0, "lr": 2.1432903495927523e-08, "epoch": 4.702929176947695, "percentage": 94.06, "elapsed_time": "4:08:47", "remaining_time": "0:15:42", "throughput": 8689.49, "total_tokens": 129713576} +{"current_steps": 192510, "total_steps": 204665, "loss": 0.0, "lr": 2.1415345988674006e-08, "epoch": 4.7030513277795425, "percentage": 94.06, "elapsed_time": "4:08:47", "remaining_time": "0:15:42", "throughput": 8689.51, "total_tokens": 129716840} +{"current_steps": 192515, "total_steps": 204665, "loss": 0.0, "lr": 2.1397795597951406e-08, "epoch": 4.703173478611389, "percentage": 94.06, "elapsed_time": "4:08:48", "remaining_time": "0:15:42", "throughput": 8689.53, "total_tokens": 129720040} +{"current_steps": 192520, "total_steps": 204665, "loss": 0.0, "lr": 2.1380252323887182e-08, "epoch": 4.703295629443237, "percentage": 94.07, "elapsed_time": "4:08:48", "remaining_time": "0:15:41", "throughput": 8689.53, "total_tokens": 129723048} +{"current_steps": 192525, "total_steps": 204665, "loss": 0.0, "lr": 2.1362716166609008e-08, "epoch": 4.703417780275084, "percentage": 94.07, "elapsed_time": "4:08:49", "remaining_time": "0:15:41", "throughput": 8689.57, "total_tokens": 129726760} +{"current_steps": 192530, "total_steps": 204665, "loss": 0.0, "lr": 2.1345187126244335e-08, "epoch": 4.703539931106931, "percentage": 94.07, "elapsed_time": "4:08:49", "remaining_time": "0:15:40", "throughput": 8689.59, "total_tokens": 129730088} +{"current_steps": 192535, "total_steps": 204665, "loss": 0.0, "lr": 2.1327665202920732e-08, "epoch": 4.703662081938778, "percentage": 94.07, "elapsed_time": "4:08:49", "remaining_time": "0:15:40", "throughput": 8689.62, "total_tokens": 129733544} +{"current_steps": 192540, "total_steps": 204665, "loss": 0.0, "lr": 2.1310150396765646e-08, "epoch": 4.703784232770625, "percentage": 94.08, "elapsed_time": "4:08:50", "remaining_time": "0:15:40", "throughput": 8689.61, "total_tokens": 129736424} +{"current_steps": 192545, "total_steps": 204665, "loss": 0.0, "lr": 2.1292642707906316e-08, "epoch": 4.703906383602472, "percentage": 94.08, "elapsed_time": "4:08:50", "remaining_time": "0:15:39", "throughput": 8689.62, "total_tokens": 129739432} +{"current_steps": 192550, "total_steps": 204665, "loss": 0.0, "lr": 2.127514213647008e-08, "epoch": 4.704028534434319, "percentage": 94.08, "elapsed_time": "4:08:50", "remaining_time": "0:15:39", "throughput": 8689.67, "total_tokens": 129743336} +{"current_steps": 192555, "total_steps": 204665, "loss": 0.0, "lr": 2.1257648682584284e-08, "epoch": 4.704150685266167, "percentage": 94.08, "elapsed_time": "4:08:51", "remaining_time": "0:15:39", "throughput": 8689.72, "total_tokens": 129747112} +{"current_steps": 192560, "total_steps": 204665, "loss": 0.0, "lr": 2.1240162346376266e-08, "epoch": 4.7042728360980135, "percentage": 94.09, "elapsed_time": "4:08:51", "remaining_time": "0:15:38", "throughput": 8689.74, "total_tokens": 129750504} +{"current_steps": 192565, "total_steps": 204665, "loss": 0.0, "lr": 2.1222683127972817e-08, "epoch": 4.704394986929861, "percentage": 94.09, "elapsed_time": "4:08:51", "remaining_time": "0:15:38", "throughput": 8689.75, "total_tokens": 129753640} +{"current_steps": 192570, "total_steps": 204665, "loss": 0.0001, "lr": 2.120521102750139e-08, "epoch": 4.704517137761708, "percentage": 94.09, "elapsed_time": "4:08:52", "remaining_time": "0:15:37", "throughput": 8689.78, "total_tokens": 129757160} +{"current_steps": 192575, "total_steps": 204665, "loss": 0.0, "lr": 2.1187746045088996e-08, "epoch": 4.7046392885935555, "percentage": 94.09, "elapsed_time": "4:08:52", "remaining_time": "0:15:37", "throughput": 8689.79, "total_tokens": 129760296} +{"current_steps": 192580, "total_steps": 204665, "loss": 0.0, "lr": 2.1170288180862528e-08, "epoch": 4.704761439425402, "percentage": 94.1, "elapsed_time": "4:08:52", "remaining_time": "0:15:37", "throughput": 8689.83, "total_tokens": 129763880} +{"current_steps": 192585, "total_steps": 204665, "loss": 0.0, "lr": 2.115283743494889e-08, "epoch": 4.70488359025725, "percentage": 94.1, "elapsed_time": "4:08:53", "remaining_time": "0:15:36", "throughput": 8689.84, "total_tokens": 129766952} +{"current_steps": 192590, "total_steps": 204665, "loss": 0.0, "lr": 2.1135393807475198e-08, "epoch": 4.705005741089097, "percentage": 94.1, "elapsed_time": "4:08:53", "remaining_time": "0:15:36", "throughput": 8689.85, "total_tokens": 129770152} +{"current_steps": 192595, "total_steps": 204665, "loss": 0.0, "lr": 2.1117957298568133e-08, "epoch": 4.705127891920944, "percentage": 94.1, "elapsed_time": "4:08:53", "remaining_time": "0:15:35", "throughput": 8689.89, "total_tokens": 129773800} +{"current_steps": 192600, "total_steps": 204665, "loss": 0.0, "lr": 2.1100527908354704e-08, "epoch": 4.705250042752791, "percentage": 94.11, "elapsed_time": "4:08:54", "remaining_time": "0:15:35", "throughput": 8689.9, "total_tokens": 129776936} +{"current_steps": 192605, "total_steps": 204665, "loss": 0.0, "lr": 2.1083105636961363e-08, "epoch": 4.705372193584639, "percentage": 94.11, "elapsed_time": "4:08:54", "remaining_time": "0:15:35", "throughput": 8689.92, "total_tokens": 129780328} +{"current_steps": 192610, "total_steps": 204665, "loss": 0.0, "lr": 2.1065690484515007e-08, "epoch": 4.705494344416485, "percentage": 94.11, "elapsed_time": "4:08:54", "remaining_time": "0:15:34", "throughput": 8689.93, "total_tokens": 129783400} +{"current_steps": 192615, "total_steps": 204665, "loss": 0.0, "lr": 2.1048282451142428e-08, "epoch": 4.705616495248332, "percentage": 94.11, "elapsed_time": "4:08:55", "remaining_time": "0:15:34", "throughput": 8689.94, "total_tokens": 129786536} +{"current_steps": 192620, "total_steps": 204665, "loss": 0.0, "lr": 2.1030881536969857e-08, "epoch": 4.70573864608018, "percentage": 94.11, "elapsed_time": "4:08:55", "remaining_time": "0:15:33", "throughput": 8689.95, "total_tokens": 129789736} +{"current_steps": 192625, "total_steps": 204665, "loss": 0.0, "lr": 2.1013487742124192e-08, "epoch": 4.705860796912027, "percentage": 94.12, "elapsed_time": "4:08:55", "remaining_time": "0:15:33", "throughput": 8689.97, "total_tokens": 129793000} +{"current_steps": 192630, "total_steps": 204665, "loss": 0.0, "lr": 2.0996101066731552e-08, "epoch": 4.705982947743874, "percentage": 94.12, "elapsed_time": "4:08:56", "remaining_time": "0:15:33", "throughput": 8689.99, "total_tokens": 129796456} +{"current_steps": 192635, "total_steps": 204665, "loss": 0.0, "lr": 2.097872151091873e-08, "epoch": 4.706105098575721, "percentage": 94.12, "elapsed_time": "4:08:56", "remaining_time": "0:15:32", "throughput": 8690.01, "total_tokens": 129799784} +{"current_steps": 192640, "total_steps": 204665, "loss": 0.0, "lr": 2.0961349074811952e-08, "epoch": 4.7062272494075685, "percentage": 94.12, "elapsed_time": "4:08:57", "remaining_time": "0:15:32", "throughput": 8690.02, "total_tokens": 129802792} +{"current_steps": 192645, "total_steps": 204665, "loss": 0.0, "lr": 2.0943983758537453e-08, "epoch": 4.706349400239415, "percentage": 94.13, "elapsed_time": "4:08:57", "remaining_time": "0:15:32", "throughput": 8690.04, "total_tokens": 129806184} +{"current_steps": 192650, "total_steps": 204665, "loss": 0.0, "lr": 2.09266255622218e-08, "epoch": 4.706471551071263, "percentage": 94.13, "elapsed_time": "4:08:57", "remaining_time": "0:15:31", "throughput": 8690.09, "total_tokens": 129809960} +{"current_steps": 192655, "total_steps": 204665, "loss": 0.0, "lr": 2.0909274485991003e-08, "epoch": 4.70659370190311, "percentage": 94.13, "elapsed_time": "4:08:58", "remaining_time": "0:15:31", "throughput": 8690.09, "total_tokens": 129812904} +{"current_steps": 192660, "total_steps": 204665, "loss": 0.0, "lr": 2.089193052997129e-08, "epoch": 4.706715852734957, "percentage": 94.13, "elapsed_time": "4:08:58", "remaining_time": "0:15:30", "throughput": 8690.12, "total_tokens": 129816552} +{"current_steps": 192665, "total_steps": 204665, "loss": 0.0, "lr": 2.08745936942889e-08, "epoch": 4.706838003566804, "percentage": 94.14, "elapsed_time": "4:08:58", "remaining_time": "0:15:30", "throughput": 8690.14, "total_tokens": 129819816} +{"current_steps": 192670, "total_steps": 204665, "loss": 0.0, "lr": 2.0857263979069727e-08, "epoch": 4.706960154398652, "percentage": 94.14, "elapsed_time": "4:08:59", "remaining_time": "0:15:30", "throughput": 8690.18, "total_tokens": 129823400} +{"current_steps": 192675, "total_steps": 204665, "loss": 0.0043, "lr": 2.0839941384439897e-08, "epoch": 4.707082305230498, "percentage": 94.14, "elapsed_time": "4:08:59", "remaining_time": "0:15:29", "throughput": 8690.2, "total_tokens": 129826856} +{"current_steps": 192680, "total_steps": 204665, "loss": 0.0, "lr": 2.0822625910525415e-08, "epoch": 4.707204456062346, "percentage": 94.14, "elapsed_time": "4:08:59", "remaining_time": "0:15:29", "throughput": 8690.24, "total_tokens": 129830440} +{"current_steps": 192685, "total_steps": 204665, "loss": 0.0, "lr": 2.0805317557452184e-08, "epoch": 4.707326606894193, "percentage": 94.15, "elapsed_time": "4:09:00", "remaining_time": "0:15:28", "throughput": 8690.31, "total_tokens": 129834664} +{"current_steps": 192690, "total_steps": 204665, "loss": 0.0, "lr": 2.078801632534588e-08, "epoch": 4.70744875772604, "percentage": 94.15, "elapsed_time": "4:09:00", "remaining_time": "0:15:28", "throughput": 8690.33, "total_tokens": 129837992} +{"current_steps": 192695, "total_steps": 204665, "loss": 0.0, "lr": 2.0770722214332736e-08, "epoch": 4.707570908557887, "percentage": 94.15, "elapsed_time": "4:09:00", "remaining_time": "0:15:28", "throughput": 8690.34, "total_tokens": 129841192} +{"current_steps": 192700, "total_steps": 204665, "loss": 0.0, "lr": 2.0753435224538095e-08, "epoch": 4.707693059389735, "percentage": 94.15, "elapsed_time": "4:09:01", "remaining_time": "0:15:27", "throughput": 8690.37, "total_tokens": 129844648} +{"current_steps": 192705, "total_steps": 204665, "loss": 0.0, "lr": 2.0736155356087858e-08, "epoch": 4.707815210221582, "percentage": 94.16, "elapsed_time": "4:09:01", "remaining_time": "0:15:27", "throughput": 8690.4, "total_tokens": 129848232} +{"current_steps": 192710, "total_steps": 204665, "loss": 0.0, "lr": 2.0718882609107812e-08, "epoch": 4.707937361053428, "percentage": 94.16, "elapsed_time": "4:09:01", "remaining_time": "0:15:26", "throughput": 8690.43, "total_tokens": 129851816} +{"current_steps": 192715, "total_steps": 204665, "loss": 0.0, "lr": 2.0701616983723414e-08, "epoch": 4.708059511885276, "percentage": 94.16, "elapsed_time": "4:09:02", "remaining_time": "0:15:26", "throughput": 8690.45, "total_tokens": 129855144} +{"current_steps": 192720, "total_steps": 204665, "loss": 0.0, "lr": 2.0684358480060228e-08, "epoch": 4.708181662717124, "percentage": 94.16, "elapsed_time": "4:09:02", "remaining_time": "0:15:26", "throughput": 8690.47, "total_tokens": 129858408} +{"current_steps": 192725, "total_steps": 204665, "loss": 0.0, "lr": 2.0667107098243818e-08, "epoch": 4.70830381354897, "percentage": 94.17, "elapsed_time": "4:09:02", "remaining_time": "0:15:25", "throughput": 8690.52, "total_tokens": 129862248} +{"current_steps": 192730, "total_steps": 204665, "loss": 0.0, "lr": 2.0649862838399645e-08, "epoch": 4.708425964380817, "percentage": 94.17, "elapsed_time": "4:09:03", "remaining_time": "0:15:25", "throughput": 8690.54, "total_tokens": 129865640} +{"current_steps": 192735, "total_steps": 204665, "loss": 0.0, "lr": 2.0632625700652938e-08, "epoch": 4.708548115212665, "percentage": 94.17, "elapsed_time": "4:09:03", "remaining_time": "0:15:24", "throughput": 8690.59, "total_tokens": 129869416} +{"current_steps": 192740, "total_steps": 204665, "loss": 0.0, "lr": 2.0615395685129266e-08, "epoch": 4.708670266044511, "percentage": 94.17, "elapsed_time": "4:09:04", "remaining_time": "0:15:24", "throughput": 8690.6, "total_tokens": 129872680} +{"current_steps": 192745, "total_steps": 204665, "loss": 0.0, "lr": 2.059817279195397e-08, "epoch": 4.708792416876359, "percentage": 94.18, "elapsed_time": "4:09:04", "remaining_time": "0:15:24", "throughput": 8690.62, "total_tokens": 129875944} +{"current_steps": 192750, "total_steps": 204665, "loss": 0.0, "lr": 2.0580957021252067e-08, "epoch": 4.708914567708206, "percentage": 94.18, "elapsed_time": "4:09:04", "remaining_time": "0:15:23", "throughput": 8690.64, "total_tokens": 129879336} +{"current_steps": 192755, "total_steps": 204665, "loss": 0.0, "lr": 2.0563748373148894e-08, "epoch": 4.7090367185400535, "percentage": 94.18, "elapsed_time": "4:09:05", "remaining_time": "0:15:23", "throughput": 8690.67, "total_tokens": 129882728} +{"current_steps": 192760, "total_steps": 204665, "loss": 0.0, "lr": 2.0546546847769574e-08, "epoch": 4.7091588693719, "percentage": 94.18, "elapsed_time": "4:09:05", "remaining_time": "0:15:23", "throughput": 8690.68, "total_tokens": 129885800} +{"current_steps": 192765, "total_steps": 204665, "loss": 0.0, "lr": 2.0529352445239234e-08, "epoch": 4.709281020203748, "percentage": 94.19, "elapsed_time": "4:09:05", "remaining_time": "0:15:22", "throughput": 8690.7, "total_tokens": 129889128} +{"current_steps": 192770, "total_steps": 204665, "loss": 0.0, "lr": 2.0512165165682882e-08, "epoch": 4.709403171035595, "percentage": 94.19, "elapsed_time": "4:09:06", "remaining_time": "0:15:22", "throughput": 8690.76, "total_tokens": 129893160} +{"current_steps": 192775, "total_steps": 204665, "loss": 0.0001, "lr": 2.049498500922553e-08, "epoch": 4.709525321867442, "percentage": 94.19, "elapsed_time": "4:09:06", "remaining_time": "0:15:21", "throughput": 8690.76, "total_tokens": 129896168} +{"current_steps": 192780, "total_steps": 204665, "loss": 0.0, "lr": 2.0477811975992187e-08, "epoch": 4.709647472699289, "percentage": 94.19, "elapsed_time": "4:09:06", "remaining_time": "0:15:21", "throughput": 8690.77, "total_tokens": 129899176} +{"current_steps": 192785, "total_steps": 204665, "loss": 0.0245, "lr": 2.0460646066107533e-08, "epoch": 4.709769623531137, "percentage": 94.2, "elapsed_time": "4:09:07", "remaining_time": "0:15:21", "throughput": 8690.81, "total_tokens": 129902888} +{"current_steps": 192790, "total_steps": 204665, "loss": 0.0, "lr": 2.0443487279696582e-08, "epoch": 4.709891774362983, "percentage": 94.2, "elapsed_time": "4:09:07", "remaining_time": "0:15:20", "throughput": 8690.81, "total_tokens": 129905896} +{"current_steps": 192795, "total_steps": 204665, "loss": 0.0, "lr": 2.0426335616884005e-08, "epoch": 4.710013925194831, "percentage": 94.2, "elapsed_time": "4:09:07", "remaining_time": "0:15:20", "throughput": 8690.84, "total_tokens": 129909352} +{"current_steps": 192800, "total_steps": 204665, "loss": 0.0, "lr": 2.0409191077794595e-08, "epoch": 4.710136076026678, "percentage": 94.2, "elapsed_time": "4:09:08", "remaining_time": "0:15:19", "throughput": 8690.85, "total_tokens": 129912424} +{"current_steps": 192805, "total_steps": 204665, "loss": 0.0, "lr": 2.039205366255303e-08, "epoch": 4.7102582268585245, "percentage": 94.21, "elapsed_time": "4:09:08", "remaining_time": "0:15:19", "throughput": 8690.87, "total_tokens": 129915752} +{"current_steps": 192810, "total_steps": 204665, "loss": 0.0, "lr": 2.0374923371283992e-08, "epoch": 4.710380377690372, "percentage": 94.21, "elapsed_time": "4:09:08", "remaining_time": "0:15:19", "throughput": 8690.89, "total_tokens": 129919208} +{"current_steps": 192815, "total_steps": 204665, "loss": 0.0001, "lr": 2.035780020411193e-08, "epoch": 4.71050252852222, "percentage": 94.21, "elapsed_time": "4:09:09", "remaining_time": "0:15:18", "throughput": 8690.93, "total_tokens": 129922792} +{"current_steps": 192820, "total_steps": 204665, "loss": 0.0, "lr": 2.0340684161161414e-08, "epoch": 4.7106246793540665, "percentage": 94.21, "elapsed_time": "4:09:09", "remaining_time": "0:15:18", "throughput": 8690.96, "total_tokens": 129926312} +{"current_steps": 192825, "total_steps": 204665, "loss": 0.0, "lr": 2.0323575242557123e-08, "epoch": 4.710746830185913, "percentage": 94.21, "elapsed_time": "4:09:09", "remaining_time": "0:15:17", "throughput": 8691.0, "total_tokens": 129929896} +{"current_steps": 192830, "total_steps": 204665, "loss": 0.0, "lr": 2.0306473448423066e-08, "epoch": 4.710868981017761, "percentage": 94.22, "elapsed_time": "4:09:10", "remaining_time": "0:15:17", "throughput": 8691.01, "total_tokens": 129933096} +{"current_steps": 192835, "total_steps": 204665, "loss": 0.0, "lr": 2.0289378778883924e-08, "epoch": 4.710991131849608, "percentage": 94.22, "elapsed_time": "4:09:10", "remaining_time": "0:15:17", "throughput": 8691.04, "total_tokens": 129936488} +{"current_steps": 192840, "total_steps": 204665, "loss": 0.0, "lr": 2.027229123406393e-08, "epoch": 4.711113282681455, "percentage": 94.22, "elapsed_time": "4:09:10", "remaining_time": "0:15:16", "throughput": 8691.07, "total_tokens": 129939944} +{"current_steps": 192845, "total_steps": 204665, "loss": 0.0, "lr": 2.025521081408732e-08, "epoch": 4.711235433513302, "percentage": 94.22, "elapsed_time": "4:09:11", "remaining_time": "0:15:16", "throughput": 8691.09, "total_tokens": 129943400} +{"current_steps": 192850, "total_steps": 204665, "loss": 0.0, "lr": 2.0238137519078436e-08, "epoch": 4.71135758434515, "percentage": 94.23, "elapsed_time": "4:09:11", "remaining_time": "0:15:16", "throughput": 8691.12, "total_tokens": 129946728} +{"current_steps": 192855, "total_steps": 204665, "loss": 0.0, "lr": 2.022107134916129e-08, "epoch": 4.711479735176996, "percentage": 94.23, "elapsed_time": "4:09:12", "remaining_time": "0:15:15", "throughput": 8691.12, "total_tokens": 129949736} +{"current_steps": 192860, "total_steps": 204665, "loss": 0.0, "lr": 2.0204012304460005e-08, "epoch": 4.711601886008844, "percentage": 94.23, "elapsed_time": "4:09:12", "remaining_time": "0:15:15", "throughput": 8690.99, "total_tokens": 129952936} +{"current_steps": 192865, "total_steps": 204665, "loss": 0.0, "lr": 2.0186960385098707e-08, "epoch": 4.711724036840691, "percentage": 94.23, "elapsed_time": "4:09:12", "remaining_time": "0:15:14", "throughput": 8691.0, "total_tokens": 129956136} +{"current_steps": 192870, "total_steps": 204665, "loss": 0.0, "lr": 2.0169915591201403e-08, "epoch": 4.711846187672538, "percentage": 94.24, "elapsed_time": "4:09:13", "remaining_time": "0:15:14", "throughput": 8691.02, "total_tokens": 129959528} +{"current_steps": 192875, "total_steps": 204665, "loss": 0.0, "lr": 2.0152877922891996e-08, "epoch": 4.711968338504385, "percentage": 94.24, "elapsed_time": "4:09:13", "remaining_time": "0:15:14", "throughput": 8691.04, "total_tokens": 129962728} +{"current_steps": 192880, "total_steps": 204665, "loss": 0.0, "lr": 2.013584738029439e-08, "epoch": 4.712090489336233, "percentage": 94.24, "elapsed_time": "4:09:13", "remaining_time": "0:15:13", "throughput": 8691.05, "total_tokens": 129965928} +{"current_steps": 192885, "total_steps": 204665, "loss": 0.0, "lr": 2.0118823963532482e-08, "epoch": 4.7122126401680795, "percentage": 94.24, "elapsed_time": "4:09:14", "remaining_time": "0:15:13", "throughput": 8691.1, "total_tokens": 129969704} +{"current_steps": 192890, "total_steps": 204665, "loss": 0.0, "lr": 2.0101807672729953e-08, "epoch": 4.712334790999927, "percentage": 94.25, "elapsed_time": "4:09:14", "remaining_time": "0:15:12", "throughput": 8691.12, "total_tokens": 129972968} +{"current_steps": 192895, "total_steps": 204665, "loss": 0.0, "lr": 2.0084798508010703e-08, "epoch": 4.712456941831774, "percentage": 94.25, "elapsed_time": "4:09:15", "remaining_time": "0:15:12", "throughput": 8691.13, "total_tokens": 129976168} +{"current_steps": 192900, "total_steps": 204665, "loss": 0.0, "lr": 2.006779646949841e-08, "epoch": 4.712579092663621, "percentage": 94.25, "elapsed_time": "4:09:15", "remaining_time": "0:15:12", "throughput": 8691.16, "total_tokens": 129979752} +{"current_steps": 192905, "total_steps": 204665, "loss": 0.0, "lr": 2.0050801557316532e-08, "epoch": 4.712701243495468, "percentage": 94.25, "elapsed_time": "4:09:15", "remaining_time": "0:15:11", "throughput": 8691.18, "total_tokens": 129983016} +{"current_steps": 192910, "total_steps": 204665, "loss": 0.0, "lr": 2.003381377158897e-08, "epoch": 4.712823394327315, "percentage": 94.26, "elapsed_time": "4:09:16", "remaining_time": "0:15:11", "throughput": 8691.21, "total_tokens": 129986472} +{"current_steps": 192915, "total_steps": 204665, "loss": 0.0321, "lr": 2.0016833112438958e-08, "epoch": 4.712945545159163, "percentage": 94.26, "elapsed_time": "4:09:16", "remaining_time": "0:15:10", "throughput": 8691.23, "total_tokens": 129989672} +{"current_steps": 192920, "total_steps": 204665, "loss": 0.0, "lr": 1.9999859579990175e-08, "epoch": 4.713067695991009, "percentage": 94.26, "elapsed_time": "4:09:16", "remaining_time": "0:15:10", "throughput": 8691.25, "total_tokens": 129993000} +{"current_steps": 192925, "total_steps": 204665, "loss": 0.0, "lr": 1.9982893174366077e-08, "epoch": 4.713189846822857, "percentage": 94.26, "elapsed_time": "4:09:17", "remaining_time": "0:15:10", "throughput": 8691.26, "total_tokens": 129996136} +{"current_steps": 192930, "total_steps": 204665, "loss": 0.0, "lr": 1.996593389568979e-08, "epoch": 4.713311997654704, "percentage": 94.27, "elapsed_time": "4:09:17", "remaining_time": "0:15:09", "throughput": 8691.3, "total_tokens": 129999784} +{"current_steps": 192935, "total_steps": 204665, "loss": 0.0, "lr": 1.9948981744084992e-08, "epoch": 4.713434148486551, "percentage": 94.27, "elapsed_time": "4:09:17", "remaining_time": "0:15:09", "throughput": 8691.33, "total_tokens": 130003368} +{"current_steps": 192940, "total_steps": 204665, "loss": 0.0, "lr": 1.9932036719674696e-08, "epoch": 4.713556299318398, "percentage": 94.27, "elapsed_time": "4:09:18", "remaining_time": "0:15:09", "throughput": 8691.35, "total_tokens": 130006632} +{"current_steps": 192945, "total_steps": 204665, "loss": 0.0, "lr": 1.9915098822582353e-08, "epoch": 4.713678450150246, "percentage": 94.27, "elapsed_time": "4:09:18", "remaining_time": "0:15:08", "throughput": 8691.36, "total_tokens": 130009704} +{"current_steps": 192950, "total_steps": 204665, "loss": 0.0, "lr": 1.9898168052930987e-08, "epoch": 4.7138006009820925, "percentage": 94.28, "elapsed_time": "4:09:18", "remaining_time": "0:15:08", "throughput": 8691.4, "total_tokens": 130013352} +{"current_steps": 192955, "total_steps": 204665, "loss": 0.0, "lr": 1.98812444108436e-08, "epoch": 4.71392275181394, "percentage": 94.28, "elapsed_time": "4:09:19", "remaining_time": "0:15:07", "throughput": 8691.41, "total_tokens": 130016552} +{"current_steps": 192960, "total_steps": 204665, "loss": 0.0, "lr": 1.9864327896443655e-08, "epoch": 4.714044902645787, "percentage": 94.28, "elapsed_time": "4:09:19", "remaining_time": "0:15:07", "throughput": 8691.43, "total_tokens": 130019752} +{"current_steps": 192965, "total_steps": 204665, "loss": 0.0, "lr": 1.984741850985383e-08, "epoch": 4.7141670534776345, "percentage": 94.28, "elapsed_time": "4:09:19", "remaining_time": "0:15:07", "throughput": 8691.49, "total_tokens": 130023720} +{"current_steps": 192970, "total_steps": 204665, "loss": 0.0, "lr": 1.9830516251197247e-08, "epoch": 4.714289204309481, "percentage": 94.29, "elapsed_time": "4:09:20", "remaining_time": "0:15:06", "throughput": 8691.49, "total_tokens": 130026728} +{"current_steps": 192975, "total_steps": 204665, "loss": 0.0, "lr": 1.9813621120596703e-08, "epoch": 4.714411355141328, "percentage": 94.29, "elapsed_time": "4:09:20", "remaining_time": "0:15:06", "throughput": 8691.51, "total_tokens": 130029992} +{"current_steps": 192980, "total_steps": 204665, "loss": 0.0, "lr": 1.97967331181752e-08, "epoch": 4.714533505973176, "percentage": 94.29, "elapsed_time": "4:09:20", "remaining_time": "0:15:05", "throughput": 8691.52, "total_tokens": 130033192} +{"current_steps": 192985, "total_steps": 204665, "loss": 0.0, "lr": 1.977985224405554e-08, "epoch": 4.714655656805023, "percentage": 94.29, "elapsed_time": "4:09:21", "remaining_time": "0:15:05", "throughput": 8691.57, "total_tokens": 130036968} +{"current_steps": 192990, "total_steps": 204665, "loss": 0.0, "lr": 1.9762978498360393e-08, "epoch": 4.71477780763687, "percentage": 94.3, "elapsed_time": "4:09:21", "remaining_time": "0:15:05", "throughput": 8691.57, "total_tokens": 130039912} +{"current_steps": 192995, "total_steps": 204665, "loss": 0.0, "lr": 1.9746111881212556e-08, "epoch": 4.714899958468717, "percentage": 94.3, "elapsed_time": "4:09:21", "remaining_time": "0:15:04", "throughput": 8691.6, "total_tokens": 130043368} +{"current_steps": 193000, "total_steps": 204665, "loss": 0.0, "lr": 1.9729252392734597e-08, "epoch": 4.715022109300564, "percentage": 94.3, "elapsed_time": "4:09:22", "remaining_time": "0:15:04", "throughput": 8691.62, "total_tokens": 130046568} +{"current_steps": 193005, "total_steps": 204665, "loss": 0.0, "lr": 1.9712400033049194e-08, "epoch": 4.715144260132411, "percentage": 94.3, "elapsed_time": "4:09:22", "remaining_time": "0:15:03", "throughput": 8691.62, "total_tokens": 130049640} +{"current_steps": 193010, "total_steps": 204665, "loss": 0.0001, "lr": 1.9695554802278803e-08, "epoch": 4.715266410964259, "percentage": 94.31, "elapsed_time": "4:09:22", "remaining_time": "0:15:03", "throughput": 8691.65, "total_tokens": 130053096} +{"current_steps": 193015, "total_steps": 204665, "loss": 0.0, "lr": 1.9678716700546106e-08, "epoch": 4.7153885617961055, "percentage": 94.31, "elapsed_time": "4:09:23", "remaining_time": "0:15:03", "throughput": 8691.67, "total_tokens": 130056424} +{"current_steps": 193020, "total_steps": 204665, "loss": 0.0, "lr": 1.9661885727973448e-08, "epoch": 4.715510712627953, "percentage": 94.31, "elapsed_time": "4:09:23", "remaining_time": "0:15:02", "throughput": 8691.7, "total_tokens": 130059816} +{"current_steps": 193025, "total_steps": 204665, "loss": 0.0, "lr": 1.964506188468318e-08, "epoch": 4.7156328634598, "percentage": 94.31, "elapsed_time": "4:09:24", "remaining_time": "0:15:02", "throughput": 8691.71, "total_tokens": 130062888} +{"current_steps": 193030, "total_steps": 204665, "loss": 0.0, "lr": 1.9628245170797865e-08, "epoch": 4.715755014291648, "percentage": 94.32, "elapsed_time": "4:09:24", "remaining_time": "0:15:01", "throughput": 8691.72, "total_tokens": 130065960} +{"current_steps": 193035, "total_steps": 204665, "loss": 0.0005, "lr": 1.9611435586439405e-08, "epoch": 4.715877165123494, "percentage": 94.32, "elapsed_time": "4:09:24", "remaining_time": "0:15:01", "throughput": 8691.74, "total_tokens": 130069416} +{"current_steps": 193040, "total_steps": 204665, "loss": 0.0224, "lr": 1.9594633131730488e-08, "epoch": 4.715999315955342, "percentage": 94.32, "elapsed_time": "4:09:25", "remaining_time": "0:15:01", "throughput": 8691.8, "total_tokens": 130073320} +{"current_steps": 193045, "total_steps": 204665, "loss": 0.0, "lr": 1.957783780679301e-08, "epoch": 4.716121466787189, "percentage": 94.32, "elapsed_time": "4:09:25", "remaining_time": "0:15:00", "throughput": 8691.81, "total_tokens": 130076456} +{"current_steps": 193050, "total_steps": 204665, "loss": 0.0, "lr": 1.9561049611749093e-08, "epoch": 4.716243617619036, "percentage": 94.32, "elapsed_time": "4:09:25", "remaining_time": "0:15:00", "throughput": 8691.81, "total_tokens": 130079528} +{"current_steps": 193055, "total_steps": 204665, "loss": 0.0, "lr": 1.9544268546721087e-08, "epoch": 4.716365768450883, "percentage": 94.33, "elapsed_time": "4:09:26", "remaining_time": "0:15:00", "throughput": 8691.85, "total_tokens": 130083048} +{"current_steps": 193060, "total_steps": 204665, "loss": 0.0004, "lr": 1.9527494611830786e-08, "epoch": 4.716487919282731, "percentage": 94.33, "elapsed_time": "4:09:26", "remaining_time": "0:14:59", "throughput": 8691.86, "total_tokens": 130086184} +{"current_steps": 193065, "total_steps": 204665, "loss": 0.001, "lr": 1.951072780720031e-08, "epoch": 4.7166100701145774, "percentage": 94.33, "elapsed_time": "4:09:26", "remaining_time": "0:14:59", "throughput": 8691.88, "total_tokens": 130089512} +{"current_steps": 193070, "total_steps": 204665, "loss": 0.0, "lr": 1.9493968132951455e-08, "epoch": 4.716732220946424, "percentage": 94.33, "elapsed_time": "4:09:27", "remaining_time": "0:14:58", "throughput": 8691.89, "total_tokens": 130092520} +{"current_steps": 193075, "total_steps": 204665, "loss": 0.0, "lr": 1.947721558920634e-08, "epoch": 4.716854371778272, "percentage": 94.34, "elapsed_time": "4:09:27", "remaining_time": "0:14:58", "throughput": 8691.91, "total_tokens": 130095912} +{"current_steps": 193080, "total_steps": 204665, "loss": 0.0, "lr": 1.946047017608654e-08, "epoch": 4.7169765226101195, "percentage": 94.34, "elapsed_time": "4:09:27", "remaining_time": "0:14:58", "throughput": 8691.93, "total_tokens": 130099304} +{"current_steps": 193085, "total_steps": 204665, "loss": 0.0, "lr": 1.9443731893713954e-08, "epoch": 4.717098673441966, "percentage": 94.34, "elapsed_time": "4:09:28", "remaining_time": "0:14:57", "throughput": 8691.95, "total_tokens": 130102568} +{"current_steps": 193090, "total_steps": 204665, "loss": 0.0, "lr": 1.9427000742210376e-08, "epoch": 4.717220824273813, "percentage": 94.34, "elapsed_time": "4:09:28", "remaining_time": "0:14:57", "throughput": 8691.99, "total_tokens": 130106216} +{"current_steps": 193095, "total_steps": 204665, "loss": 0.0, "lr": 1.9410276721697262e-08, "epoch": 4.717342975105661, "percentage": 94.35, "elapsed_time": "4:09:28", "remaining_time": "0:14:56", "throughput": 8692.01, "total_tokens": 130109544} +{"current_steps": 193100, "total_steps": 204665, "loss": 0.0, "lr": 1.9393559832296404e-08, "epoch": 4.717465125937507, "percentage": 94.35, "elapsed_time": "4:09:29", "remaining_time": "0:14:56", "throughput": 8692.03, "total_tokens": 130112744} +{"current_steps": 193105, "total_steps": 204665, "loss": 0.0, "lr": 1.9376850074129257e-08, "epoch": 4.717587276769355, "percentage": 94.35, "elapsed_time": "4:09:29", "remaining_time": "0:14:56", "throughput": 8692.06, "total_tokens": 130116264} +{"current_steps": 193110, "total_steps": 204665, "loss": 0.0, "lr": 1.9360147447317398e-08, "epoch": 4.717709427601202, "percentage": 94.35, "elapsed_time": "4:09:29", "remaining_time": "0:14:55", "throughput": 8692.09, "total_tokens": 130119720} +{"current_steps": 193115, "total_steps": 204665, "loss": 0.0, "lr": 1.9343451951982505e-08, "epoch": 4.717831578433049, "percentage": 94.36, "elapsed_time": "4:09:30", "remaining_time": "0:14:55", "throughput": 8692.12, "total_tokens": 130123240} +{"current_steps": 193120, "total_steps": 204665, "loss": 0.0, "lr": 1.9326763588245587e-08, "epoch": 4.717953729264896, "percentage": 94.36, "elapsed_time": "4:09:30", "remaining_time": "0:14:54", "throughput": 8692.17, "total_tokens": 130127080} +{"current_steps": 193125, "total_steps": 204665, "loss": 0.0, "lr": 1.931008235622844e-08, "epoch": 4.718075880096744, "percentage": 94.36, "elapsed_time": "4:09:30", "remaining_time": "0:14:54", "throughput": 8692.21, "total_tokens": 130130792} +{"current_steps": 193130, "total_steps": 204665, "loss": 0.0, "lr": 1.929340825605197e-08, "epoch": 4.7181980309285905, "percentage": 94.36, "elapsed_time": "4:09:31", "remaining_time": "0:14:54", "throughput": 8692.24, "total_tokens": 130134184} +{"current_steps": 193135, "total_steps": 204665, "loss": 0.0, "lr": 1.927674128783763e-08, "epoch": 4.718320181760438, "percentage": 94.37, "elapsed_time": "4:09:31", "remaining_time": "0:14:53", "throughput": 8692.26, "total_tokens": 130137448} +{"current_steps": 193140, "total_steps": 204665, "loss": 0.0, "lr": 1.9260081451706654e-08, "epoch": 4.718442332592285, "percentage": 94.37, "elapsed_time": "4:09:31", "remaining_time": "0:14:53", "throughput": 8692.28, "total_tokens": 130140776} +{"current_steps": 193145, "total_steps": 204665, "loss": 0.0, "lr": 1.9243428747780065e-08, "epoch": 4.7185644834241325, "percentage": 94.37, "elapsed_time": "4:09:32", "remaining_time": "0:14:53", "throughput": 8692.33, "total_tokens": 130144744} +{"current_steps": 193150, "total_steps": 204665, "loss": 0.0, "lr": 1.9226783176179208e-08, "epoch": 4.718686634255979, "percentage": 94.37, "elapsed_time": "4:09:32", "remaining_time": "0:14:52", "throughput": 8692.36, "total_tokens": 130148200} +{"current_steps": 193155, "total_steps": 204665, "loss": 0.0002, "lr": 1.921014473702476e-08, "epoch": 4.718808785087827, "percentage": 94.38, "elapsed_time": "4:09:33", "remaining_time": "0:14:52", "throughput": 8692.38, "total_tokens": 130151464} +{"current_steps": 193160, "total_steps": 204665, "loss": 0.0, "lr": 1.919351343043818e-08, "epoch": 4.718930935919674, "percentage": 94.38, "elapsed_time": "4:09:33", "remaining_time": "0:14:51", "throughput": 8692.39, "total_tokens": 130154664} +{"current_steps": 193165, "total_steps": 204665, "loss": 0.0, "lr": 1.917688925654004e-08, "epoch": 4.71905308675152, "percentage": 94.38, "elapsed_time": "4:09:33", "remaining_time": "0:14:51", "throughput": 8692.42, "total_tokens": 130158184} +{"current_steps": 193170, "total_steps": 204665, "loss": 0.0001, "lr": 1.9160272215451355e-08, "epoch": 4.719175237583368, "percentage": 94.38, "elapsed_time": "4:09:34", "remaining_time": "0:14:51", "throughput": 8692.46, "total_tokens": 130161704} +{"current_steps": 193175, "total_steps": 204665, "loss": 0.0, "lr": 1.914366230729303e-08, "epoch": 4.719297388415215, "percentage": 94.39, "elapsed_time": "4:09:34", "remaining_time": "0:14:50", "throughput": 8692.51, "total_tokens": 130165736} +{"current_steps": 193180, "total_steps": 204665, "loss": 0.0, "lr": 1.9127059532185853e-08, "epoch": 4.719419539247062, "percentage": 94.39, "elapsed_time": "4:09:34", "remaining_time": "0:14:50", "throughput": 8692.53, "total_tokens": 130169000} +{"current_steps": 193185, "total_steps": 204665, "loss": 0.0, "lr": 1.9110463890250506e-08, "epoch": 4.719541690078909, "percentage": 94.39, "elapsed_time": "4:09:35", "remaining_time": "0:14:49", "throughput": 8692.57, "total_tokens": 130172712} +{"current_steps": 193190, "total_steps": 204665, "loss": 0.0, "lr": 1.909387538160767e-08, "epoch": 4.719663840910757, "percentage": 94.39, "elapsed_time": "4:09:35", "remaining_time": "0:14:49", "throughput": 8692.64, "total_tokens": 130176872} +{"current_steps": 193195, "total_steps": 204665, "loss": 0.0002, "lr": 1.907729400637803e-08, "epoch": 4.7197859917426035, "percentage": 94.4, "elapsed_time": "4:09:35", "remaining_time": "0:14:49", "throughput": 8692.66, "total_tokens": 130180136} +{"current_steps": 193200, "total_steps": 204665, "loss": 0.0, "lr": 1.9060719764682155e-08, "epoch": 4.719908142574451, "percentage": 94.4, "elapsed_time": "4:09:36", "remaining_time": "0:14:48", "throughput": 8692.69, "total_tokens": 130183656} +{"current_steps": 193205, "total_steps": 204665, "loss": 0.0, "lr": 1.9044152656640498e-08, "epoch": 4.720030293406298, "percentage": 94.4, "elapsed_time": "4:09:36", "remaining_time": "0:14:48", "throughput": 8692.69, "total_tokens": 130186728} +{"current_steps": 193210, "total_steps": 204665, "loss": 0.0, "lr": 1.902759268237364e-08, "epoch": 4.7201524442381455, "percentage": 94.4, "elapsed_time": "4:09:36", "remaining_time": "0:14:47", "throughput": 8692.71, "total_tokens": 130189992} +{"current_steps": 193215, "total_steps": 204665, "loss": 0.0, "lr": 1.901103984200192e-08, "epoch": 4.720274595069992, "percentage": 94.41, "elapsed_time": "4:09:37", "remaining_time": "0:14:47", "throughput": 8692.72, "total_tokens": 130193192} +{"current_steps": 193220, "total_steps": 204665, "loss": 0.0, "lr": 1.899449413564591e-08, "epoch": 4.72039674590184, "percentage": 94.41, "elapsed_time": "4:09:37", "remaining_time": "0:14:47", "throughput": 8692.76, "total_tokens": 130196712} +{"current_steps": 193225, "total_steps": 204665, "loss": 0.0, "lr": 1.897795556342563e-08, "epoch": 4.720518896733687, "percentage": 94.41, "elapsed_time": "4:09:37", "remaining_time": "0:14:46", "throughput": 8692.78, "total_tokens": 130200104} +{"current_steps": 193230, "total_steps": 204665, "loss": 0.0, "lr": 1.8961424125461756e-08, "epoch": 4.720641047565534, "percentage": 94.41, "elapsed_time": "4:09:38", "remaining_time": "0:14:46", "throughput": 8692.8, "total_tokens": 130203496} +{"current_steps": 193235, "total_steps": 204665, "loss": 0.0, "lr": 1.8944899821874083e-08, "epoch": 4.720763198397381, "percentage": 94.42, "elapsed_time": "4:09:38", "remaining_time": "0:14:45", "throughput": 8692.83, "total_tokens": 130206952} +{"current_steps": 193240, "total_steps": 204665, "loss": 0.0, "lr": 1.892838265278296e-08, "epoch": 4.720885349229228, "percentage": 94.42, "elapsed_time": "4:09:38", "remaining_time": "0:14:45", "throughput": 8692.84, "total_tokens": 130210088} +{"current_steps": 193245, "total_steps": 204665, "loss": 0.0, "lr": 1.891187261830862e-08, "epoch": 4.721007500061075, "percentage": 94.42, "elapsed_time": "4:09:39", "remaining_time": "0:14:45", "throughput": 8692.87, "total_tokens": 130213608} +{"current_steps": 193250, "total_steps": 204665, "loss": 0.0, "lr": 1.8895369718570865e-08, "epoch": 4.721129650892923, "percentage": 94.42, "elapsed_time": "4:09:39", "remaining_time": "0:14:44", "throughput": 8692.91, "total_tokens": 130217128} +{"current_steps": 193255, "total_steps": 204665, "loss": 0.0, "lr": 1.8878873953690035e-08, "epoch": 4.72125180172477, "percentage": 94.43, "elapsed_time": "4:09:40", "remaining_time": "0:14:44", "throughput": 8692.92, "total_tokens": 130220456} +{"current_steps": 193260, "total_steps": 204665, "loss": 0.0, "lr": 1.8862385323785813e-08, "epoch": 4.7213739525566165, "percentage": 94.43, "elapsed_time": "4:09:40", "remaining_time": "0:14:44", "throughput": 8692.94, "total_tokens": 130223784} +{"current_steps": 193265, "total_steps": 204665, "loss": 0.0, "lr": 1.8845903828978216e-08, "epoch": 4.721496103388464, "percentage": 94.43, "elapsed_time": "4:09:40", "remaining_time": "0:14:43", "throughput": 8692.98, "total_tokens": 130227304} +{"current_steps": 193270, "total_steps": 204665, "loss": 0.0, "lr": 1.882942946938726e-08, "epoch": 4.721618254220311, "percentage": 94.43, "elapsed_time": "4:09:41", "remaining_time": "0:14:43", "throughput": 8693.0, "total_tokens": 130230632} +{"current_steps": 193275, "total_steps": 204665, "loss": 0.0, "lr": 1.8812962245132403e-08, "epoch": 4.7217404050521585, "percentage": 94.43, "elapsed_time": "4:09:41", "remaining_time": "0:14:42", "throughput": 8693.04, "total_tokens": 130234280} +{"current_steps": 193280, "total_steps": 204665, "loss": 0.0, "lr": 1.879650215633377e-08, "epoch": 4.721862555884005, "percentage": 94.44, "elapsed_time": "4:09:41", "remaining_time": "0:14:42", "throughput": 8693.07, "total_tokens": 130237800} +{"current_steps": 193285, "total_steps": 204665, "loss": 0.0002, "lr": 1.8780049203110714e-08, "epoch": 4.721984706715853, "percentage": 94.44, "elapsed_time": "4:09:42", "remaining_time": "0:14:42", "throughput": 8693.1, "total_tokens": 130241320} +{"current_steps": 193290, "total_steps": 204665, "loss": 0.0, "lr": 1.876360338558325e-08, "epoch": 4.7221068575477, "percentage": 94.44, "elapsed_time": "4:09:42", "remaining_time": "0:14:41", "throughput": 8693.12, "total_tokens": 130244712} +{"current_steps": 193295, "total_steps": 204665, "loss": 0.0, "lr": 1.8747164703870722e-08, "epoch": 4.722229008379547, "percentage": 94.44, "elapsed_time": "4:09:42", "remaining_time": "0:14:41", "throughput": 8693.13, "total_tokens": 130247848} +{"current_steps": 193300, "total_steps": 204665, "loss": 0.0, "lr": 1.8730733158092593e-08, "epoch": 4.722351159211394, "percentage": 94.45, "elapsed_time": "4:09:43", "remaining_time": "0:14:40", "throughput": 8693.17, "total_tokens": 130251432} +{"current_steps": 193305, "total_steps": 204665, "loss": 0.0, "lr": 1.8714308748368657e-08, "epoch": 4.722473310043242, "percentage": 94.45, "elapsed_time": "4:09:43", "remaining_time": "0:14:40", "throughput": 8693.18, "total_tokens": 130254696} +{"current_steps": 193310, "total_steps": 204665, "loss": 0.0, "lr": 1.869789147481815e-08, "epoch": 4.722595460875088, "percentage": 94.45, "elapsed_time": "4:09:43", "remaining_time": "0:14:40", "throughput": 8693.2, "total_tokens": 130257896} +{"current_steps": 193315, "total_steps": 204665, "loss": 0.0, "lr": 1.8681481337560534e-08, "epoch": 4.722717611706936, "percentage": 94.45, "elapsed_time": "4:09:44", "remaining_time": "0:14:39", "throughput": 8693.22, "total_tokens": 130261224} +{"current_steps": 193320, "total_steps": 204665, "loss": 0.0, "lr": 1.8665078336715046e-08, "epoch": 4.722839762538783, "percentage": 94.46, "elapsed_time": "4:09:44", "remaining_time": "0:14:39", "throughput": 8693.23, "total_tokens": 130264424} +{"current_steps": 193325, "total_steps": 204665, "loss": 0.0, "lr": 1.8648682472401033e-08, "epoch": 4.72296191337063, "percentage": 94.46, "elapsed_time": "4:09:44", "remaining_time": "0:14:38", "throughput": 8693.25, "total_tokens": 130267752} +{"current_steps": 193330, "total_steps": 204665, "loss": 0.0, "lr": 1.8632293744737958e-08, "epoch": 4.723084064202477, "percentage": 94.46, "elapsed_time": "4:09:45", "remaining_time": "0:14:38", "throughput": 8693.27, "total_tokens": 130270952} +{"current_steps": 193335, "total_steps": 204665, "loss": 0.0, "lr": 1.8615912153844617e-08, "epoch": 4.723206215034324, "percentage": 94.46, "elapsed_time": "4:09:45", "remaining_time": "0:14:38", "throughput": 8693.27, "total_tokens": 130274024} +{"current_steps": 193340, "total_steps": 204665, "loss": 0.0, "lr": 1.859953769984046e-08, "epoch": 4.7233283658661716, "percentage": 94.47, "elapsed_time": "4:09:45", "remaining_time": "0:14:37", "throughput": 8693.31, "total_tokens": 130277672} +{"current_steps": 193345, "total_steps": 204665, "loss": 0.0, "lr": 1.8583170382844294e-08, "epoch": 4.723450516698019, "percentage": 94.47, "elapsed_time": "4:09:46", "remaining_time": "0:14:37", "throughput": 8693.33, "total_tokens": 130281064} +{"current_steps": 193350, "total_steps": 204665, "loss": 0.0, "lr": 1.8566810202975348e-08, "epoch": 4.723572667529866, "percentage": 94.47, "elapsed_time": "4:09:46", "remaining_time": "0:14:37", "throughput": 8693.36, "total_tokens": 130284584} +{"current_steps": 193355, "total_steps": 204665, "loss": 0.0, "lr": 1.855045716035253e-08, "epoch": 4.723694818361713, "percentage": 94.47, "elapsed_time": "4:09:47", "remaining_time": "0:14:36", "throughput": 8693.4, "total_tokens": 130288232} +{"current_steps": 193360, "total_steps": 204665, "loss": 0.0, "lr": 1.853411125509463e-08, "epoch": 4.72381696919356, "percentage": 94.48, "elapsed_time": "4:09:47", "remaining_time": "0:14:36", "throughput": 8693.43, "total_tokens": 130291688} +{"current_steps": 193365, "total_steps": 204665, "loss": 0.0, "lr": 1.8517772487320892e-08, "epoch": 4.723939120025407, "percentage": 94.48, "elapsed_time": "4:09:47", "remaining_time": "0:14:35", "throughput": 8693.44, "total_tokens": 130294888} +{"current_steps": 193370, "total_steps": 204665, "loss": 0.0, "lr": 1.8501440857149776e-08, "epoch": 4.724061270857255, "percentage": 94.48, "elapsed_time": "4:09:48", "remaining_time": "0:14:35", "throughput": 8693.46, "total_tokens": 130298216} +{"current_steps": 193375, "total_steps": 204665, "loss": 0.0, "lr": 1.848511636470018e-08, "epoch": 4.724183421689101, "percentage": 94.48, "elapsed_time": "4:09:48", "remaining_time": "0:14:35", "throughput": 8693.48, "total_tokens": 130301608} +{"current_steps": 193380, "total_steps": 204665, "loss": 0.0, "lr": 1.8468799010090796e-08, "epoch": 4.724305572520949, "percentage": 94.49, "elapsed_time": "4:09:48", "remaining_time": "0:14:34", "throughput": 8693.51, "total_tokens": 130305064} +{"current_steps": 193385, "total_steps": 204665, "loss": 0.0, "lr": 1.8452488793440414e-08, "epoch": 4.724427723352796, "percentage": 94.49, "elapsed_time": "4:09:49", "remaining_time": "0:14:34", "throughput": 8693.52, "total_tokens": 130308328} +{"current_steps": 193390, "total_steps": 204665, "loss": 0.0, "lr": 1.8436185714867382e-08, "epoch": 4.7245498741846434, "percentage": 94.49, "elapsed_time": "4:09:49", "remaining_time": "0:14:33", "throughput": 8693.57, "total_tokens": 130312104} +{"current_steps": 193395, "total_steps": 204665, "loss": 0.0, "lr": 1.8419889774490494e-08, "epoch": 4.72467202501649, "percentage": 94.49, "elapsed_time": "4:09:49", "remaining_time": "0:14:33", "throughput": 8693.59, "total_tokens": 130315432} +{"current_steps": 193400, "total_steps": 204665, "loss": 0.0, "lr": 1.8403600972428322e-08, "epoch": 4.724794175848338, "percentage": 94.5, "elapsed_time": "4:09:50", "remaining_time": "0:14:33", "throughput": 8693.63, "total_tokens": 130319080} +{"current_steps": 193405, "total_steps": 204665, "loss": 0.0, "lr": 1.8387319308799e-08, "epoch": 4.724916326680185, "percentage": 94.5, "elapsed_time": "4:09:50", "remaining_time": "0:14:32", "throughput": 8693.64, "total_tokens": 130322216} +{"current_steps": 193410, "total_steps": 204665, "loss": 0.0, "lr": 1.837104478372109e-08, "epoch": 4.725038477512032, "percentage": 94.5, "elapsed_time": "4:09:50", "remaining_time": "0:14:32", "throughput": 8693.66, "total_tokens": 130325480} +{"current_steps": 193415, "total_steps": 204665, "loss": 0.0, "lr": 1.835477739731306e-08, "epoch": 4.725160628343879, "percentage": 94.5, "elapsed_time": "4:09:51", "remaining_time": "0:14:31", "throughput": 8693.69, "total_tokens": 130329128} +{"current_steps": 193420, "total_steps": 204665, "loss": 0.0, "lr": 1.8338517149693034e-08, "epoch": 4.725282779175727, "percentage": 94.51, "elapsed_time": "4:09:51", "remaining_time": "0:14:31", "throughput": 8693.72, "total_tokens": 130332584} +{"current_steps": 193425, "total_steps": 204665, "loss": 0.0, "lr": 1.8322264040979472e-08, "epoch": 4.725404930007573, "percentage": 94.51, "elapsed_time": "4:09:51", "remaining_time": "0:14:31", "throughput": 8693.74, "total_tokens": 130335848} +{"current_steps": 193430, "total_steps": 204665, "loss": 0.0184, "lr": 1.8306018071290284e-08, "epoch": 4.72552708083942, "percentage": 94.51, "elapsed_time": "4:09:52", "remaining_time": "0:14:30", "throughput": 8693.75, "total_tokens": 130339112} +{"current_steps": 193435, "total_steps": 204665, "loss": 0.0, "lr": 1.828977924074393e-08, "epoch": 4.725649231671268, "percentage": 94.51, "elapsed_time": "4:09:52", "remaining_time": "0:14:30", "throughput": 8693.78, "total_tokens": 130342568} +{"current_steps": 193440, "total_steps": 204665, "loss": 0.0002, "lr": 1.8273547549458203e-08, "epoch": 4.7257713825031145, "percentage": 94.52, "elapsed_time": "4:09:52", "remaining_time": "0:14:30", "throughput": 8693.82, "total_tokens": 130346152} +{"current_steps": 193445, "total_steps": 204665, "loss": 0.0, "lr": 1.825732299755145e-08, "epoch": 4.725893533334962, "percentage": 94.52, "elapsed_time": "4:09:53", "remaining_time": "0:14:29", "throughput": 8693.82, "total_tokens": 130349224} +{"current_steps": 193450, "total_steps": 204665, "loss": 0.0, "lr": 1.824110558514136e-08, "epoch": 4.726015684166809, "percentage": 94.52, "elapsed_time": "4:09:53", "remaining_time": "0:14:29", "throughput": 8693.84, "total_tokens": 130352488} +{"current_steps": 193455, "total_steps": 204665, "loss": 0.0, "lr": 1.8224895312346055e-08, "epoch": 4.7261378349986565, "percentage": 94.52, "elapsed_time": "4:09:54", "remaining_time": "0:14:28", "throughput": 8693.86, "total_tokens": 130355816} +{"current_steps": 193460, "total_steps": 204665, "loss": 0.0, "lr": 1.8208692179283446e-08, "epoch": 4.726259985830503, "percentage": 94.53, "elapsed_time": "4:09:54", "remaining_time": "0:14:28", "throughput": 8693.88, "total_tokens": 130359144} +{"current_steps": 193465, "total_steps": 204665, "loss": 0.0, "lr": 1.8192496186071216e-08, "epoch": 4.726382136662351, "percentage": 94.53, "elapsed_time": "4:09:54", "remaining_time": "0:14:28", "throughput": 8693.89, "total_tokens": 130362216} +{"current_steps": 193470, "total_steps": 204665, "loss": 0.0, "lr": 1.8176307332827378e-08, "epoch": 4.726504287494198, "percentage": 94.53, "elapsed_time": "4:09:55", "remaining_time": "0:14:27", "throughput": 8693.88, "total_tokens": 130365032} +{"current_steps": 193475, "total_steps": 204665, "loss": 0.0, "lr": 1.8160125619669285e-08, "epoch": 4.726626438326045, "percentage": 94.53, "elapsed_time": "4:09:55", "remaining_time": "0:14:27", "throughput": 8693.92, "total_tokens": 130368680} +{"current_steps": 193480, "total_steps": 204665, "loss": 0.0001, "lr": 1.8143951046714957e-08, "epoch": 4.726748589157892, "percentage": 94.53, "elapsed_time": "4:09:55", "remaining_time": "0:14:26", "throughput": 8693.94, "total_tokens": 130372008} +{"current_steps": 193485, "total_steps": 204665, "loss": 0.0, "lr": 1.8127783614081958e-08, "epoch": 4.72687073998974, "percentage": 94.54, "elapsed_time": "4:09:56", "remaining_time": "0:14:26", "throughput": 8693.98, "total_tokens": 130375784} +{"current_steps": 193490, "total_steps": 204665, "loss": 0.0, "lr": 1.811162332188776e-08, "epoch": 4.726992890821586, "percentage": 94.54, "elapsed_time": "4:09:56", "remaining_time": "0:14:26", "throughput": 8694.04, "total_tokens": 130379816} +{"current_steps": 193495, "total_steps": 204665, "loss": 0.0, "lr": 1.809547017024993e-08, "epoch": 4.727115041653434, "percentage": 94.54, "elapsed_time": "4:09:56", "remaining_time": "0:14:25", "throughput": 8694.07, "total_tokens": 130383208} +{"current_steps": 193500, "total_steps": 204665, "loss": 0.0, "lr": 1.8079324159285925e-08, "epoch": 4.727237192485281, "percentage": 94.54, "elapsed_time": "4:09:57", "remaining_time": "0:14:25", "throughput": 8694.11, "total_tokens": 130386920} +{"current_steps": 193505, "total_steps": 204665, "loss": 0.0, "lr": 1.8063185289113326e-08, "epoch": 4.727359343317128, "percentage": 94.55, "elapsed_time": "4:09:57", "remaining_time": "0:14:24", "throughput": 8694.16, "total_tokens": 130390824} +{"current_steps": 193510, "total_steps": 204665, "loss": 0.0, "lr": 1.8047053559849146e-08, "epoch": 4.727481494148975, "percentage": 94.55, "elapsed_time": "4:09:57", "remaining_time": "0:14:24", "throughput": 8694.17, "total_tokens": 130393960} +{"current_steps": 193515, "total_steps": 204665, "loss": 0.0305, "lr": 1.8030928971610958e-08, "epoch": 4.727603644980823, "percentage": 94.55, "elapsed_time": "4:09:58", "remaining_time": "0:14:24", "throughput": 8694.18, "total_tokens": 130397160} +{"current_steps": 193520, "total_steps": 204665, "loss": 0.0, "lr": 1.8014811524516006e-08, "epoch": 4.7277257958126695, "percentage": 94.55, "elapsed_time": "4:09:58", "remaining_time": "0:14:23", "throughput": 8694.21, "total_tokens": 130400552} +{"current_steps": 193525, "total_steps": 204665, "loss": 0.0, "lr": 1.7998701218681413e-08, "epoch": 4.727847946644516, "percentage": 94.56, "elapsed_time": "4:09:58", "remaining_time": "0:14:23", "throughput": 8694.23, "total_tokens": 130403816} +{"current_steps": 193530, "total_steps": 204665, "loss": 0.0, "lr": 1.798259805422453e-08, "epoch": 4.727970097476364, "percentage": 94.56, "elapsed_time": "4:09:59", "remaining_time": "0:14:23", "throughput": 8694.25, "total_tokens": 130407080} +{"current_steps": 193535, "total_steps": 204665, "loss": 0.0, "lr": 1.7966502031262154e-08, "epoch": 4.728092248308211, "percentage": 94.56, "elapsed_time": "4:09:59", "remaining_time": "0:14:22", "throughput": 8694.25, "total_tokens": 130410088} +{"current_steps": 193540, "total_steps": 204665, "loss": 0.0, "lr": 1.7950413149911638e-08, "epoch": 4.728214399140058, "percentage": 94.56, "elapsed_time": "4:09:59", "remaining_time": "0:14:22", "throughput": 8694.3, "total_tokens": 130413992} +{"current_steps": 193545, "total_steps": 204665, "loss": 0.0, "lr": 1.7934331410289773e-08, "epoch": 4.728336549971905, "percentage": 94.57, "elapsed_time": "4:10:00", "remaining_time": "0:14:21", "throughput": 8694.33, "total_tokens": 130417576} +{"current_steps": 193550, "total_steps": 204665, "loss": 0.0, "lr": 1.7918256812513576e-08, "epoch": 4.728458700803753, "percentage": 94.57, "elapsed_time": "4:10:00", "remaining_time": "0:14:21", "throughput": 8694.36, "total_tokens": 130420968} +{"current_steps": 193555, "total_steps": 204665, "loss": 0.0, "lr": 1.790218935670007e-08, "epoch": 4.728580851635599, "percentage": 94.57, "elapsed_time": "4:10:00", "remaining_time": "0:14:21", "throughput": 8694.38, "total_tokens": 130424424} +{"current_steps": 193560, "total_steps": 204665, "loss": 0.0, "lr": 1.7886129042965826e-08, "epoch": 4.728703002467447, "percentage": 94.57, "elapsed_time": "4:10:01", "remaining_time": "0:14:20", "throughput": 8694.42, "total_tokens": 130428008} +{"current_steps": 193565, "total_steps": 204665, "loss": 0.0, "lr": 1.787007587142797e-08, "epoch": 4.728825153299294, "percentage": 94.58, "elapsed_time": "4:10:01", "remaining_time": "0:14:20", "throughput": 8694.43, "total_tokens": 130431080} +{"current_steps": 193570, "total_steps": 204665, "loss": 0.0, "lr": 1.7854029842203078e-08, "epoch": 4.728947304131141, "percentage": 94.58, "elapsed_time": "4:10:02", "remaining_time": "0:14:19", "throughput": 8694.45, "total_tokens": 130434536} +{"current_steps": 193575, "total_steps": 204665, "loss": 0.0, "lr": 1.7837990955407723e-08, "epoch": 4.729069454962988, "percentage": 94.58, "elapsed_time": "4:10:02", "remaining_time": "0:14:19", "throughput": 8694.47, "total_tokens": 130437864} +{"current_steps": 193580, "total_steps": 204665, "loss": 0.0, "lr": 1.782195921115881e-08, "epoch": 4.729191605794836, "percentage": 94.58, "elapsed_time": "4:10:02", "remaining_time": "0:14:19", "throughput": 8694.48, "total_tokens": 130440872} +{"current_steps": 193585, "total_steps": 204665, "loss": 0.0, "lr": 1.7805934609572693e-08, "epoch": 4.7293137566266825, "percentage": 94.59, "elapsed_time": "4:10:03", "remaining_time": "0:14:18", "throughput": 8694.48, "total_tokens": 130443880} +{"current_steps": 193590, "total_steps": 204665, "loss": 0.0, "lr": 1.7789917150766054e-08, "epoch": 4.72943590745853, "percentage": 94.59, "elapsed_time": "4:10:03", "remaining_time": "0:14:18", "throughput": 8694.51, "total_tokens": 130447400} +{"current_steps": 193595, "total_steps": 204665, "loss": 0.0, "lr": 1.7773906834855245e-08, "epoch": 4.729558058290377, "percentage": 94.59, "elapsed_time": "4:10:03", "remaining_time": "0:14:17", "throughput": 8694.56, "total_tokens": 130451240} +{"current_steps": 193600, "total_steps": 204665, "loss": 0.0, "lr": 1.7757903661956842e-08, "epoch": 4.729680209122224, "percentage": 94.59, "elapsed_time": "4:10:04", "remaining_time": "0:14:17", "throughput": 8694.59, "total_tokens": 130454760} +{"current_steps": 193605, "total_steps": 204665, "loss": 0.0, "lr": 1.774190763218719e-08, "epoch": 4.729802359954071, "percentage": 94.6, "elapsed_time": "4:10:04", "remaining_time": "0:14:17", "throughput": 8694.65, "total_tokens": 130458728} +{"current_steps": 193610, "total_steps": 204665, "loss": 0.0, "lr": 1.7725918745662426e-08, "epoch": 4.729924510785919, "percentage": 94.6, "elapsed_time": "4:10:04", "remaining_time": "0:14:16", "throughput": 8694.68, "total_tokens": 130462120} +{"current_steps": 193615, "total_steps": 204665, "loss": 0.0, "lr": 1.770993700249912e-08, "epoch": 4.730046661617766, "percentage": 94.6, "elapsed_time": "4:10:05", "remaining_time": "0:14:16", "throughput": 8694.71, "total_tokens": 130465768} +{"current_steps": 193620, "total_steps": 204665, "loss": 0.0, "lr": 1.7693962402813288e-08, "epoch": 4.730168812449612, "percentage": 94.6, "elapsed_time": "4:10:05", "remaining_time": "0:14:15", "throughput": 8694.74, "total_tokens": 130469288} +{"current_steps": 193625, "total_steps": 204665, "loss": 0.0, "lr": 1.7677994946721286e-08, "epoch": 4.73029096328146, "percentage": 94.61, "elapsed_time": "4:10:05", "remaining_time": "0:14:15", "throughput": 8694.78, "total_tokens": 130472808} +{"current_steps": 193630, "total_steps": 204665, "loss": 0.0, "lr": 1.7662034634339017e-08, "epoch": 4.730413114113307, "percentage": 94.61, "elapsed_time": "4:10:06", "remaining_time": "0:14:15", "throughput": 8694.8, "total_tokens": 130476264} +{"current_steps": 193635, "total_steps": 204665, "loss": 0.0, "lr": 1.7646081465782614e-08, "epoch": 4.730535264945154, "percentage": 94.61, "elapsed_time": "4:10:06", "remaining_time": "0:14:14", "throughput": 8694.81, "total_tokens": 130479272} +{"current_steps": 193640, "total_steps": 204665, "loss": 0.0, "lr": 1.7630135441168203e-08, "epoch": 4.730657415777001, "percentage": 94.61, "elapsed_time": "4:10:06", "remaining_time": "0:14:14", "throughput": 8694.83, "total_tokens": 130482728} +{"current_steps": 193645, "total_steps": 204665, "loss": 0.0, "lr": 1.7614196560611583e-08, "epoch": 4.730779566608849, "percentage": 94.62, "elapsed_time": "4:10:07", "remaining_time": "0:14:14", "throughput": 8694.86, "total_tokens": 130486056} +{"current_steps": 193650, "total_steps": 204665, "loss": 0.0, "lr": 1.7598264824228883e-08, "epoch": 4.7309017174406955, "percentage": 94.62, "elapsed_time": "4:10:07", "remaining_time": "0:14:13", "throughput": 8694.86, "total_tokens": 130489128} +{"current_steps": 193655, "total_steps": 204665, "loss": 0.0, "lr": 1.7582340232135782e-08, "epoch": 4.731023868272543, "percentage": 94.62, "elapsed_time": "4:10:07", "remaining_time": "0:14:13", "throughput": 8694.88, "total_tokens": 130492456} +{"current_steps": 193660, "total_steps": 204665, "loss": 0.0, "lr": 1.7566422784448087e-08, "epoch": 4.73114601910439, "percentage": 94.62, "elapsed_time": "4:10:08", "remaining_time": "0:14:12", "throughput": 8694.91, "total_tokens": 130495912} +{"current_steps": 193665, "total_steps": 204665, "loss": 0.0, "lr": 1.7550512481281698e-08, "epoch": 4.7312681699362376, "percentage": 94.63, "elapsed_time": "4:10:08", "remaining_time": "0:14:12", "throughput": 8694.94, "total_tokens": 130499368} +{"current_steps": 193670, "total_steps": 204665, "loss": 0.0, "lr": 1.753460932275208e-08, "epoch": 4.731390320768084, "percentage": 94.63, "elapsed_time": "4:10:09", "remaining_time": "0:14:12", "throughput": 8694.98, "total_tokens": 130503016} +{"current_steps": 193675, "total_steps": 204665, "loss": 0.0, "lr": 1.751871330897514e-08, "epoch": 4.731512471599932, "percentage": 94.63, "elapsed_time": "4:10:09", "remaining_time": "0:14:11", "throughput": 8694.99, "total_tokens": 130506216} +{"current_steps": 193680, "total_steps": 204665, "loss": 0.0, "lr": 1.7502824440066344e-08, "epoch": 4.731634622431779, "percentage": 94.63, "elapsed_time": "4:10:09", "remaining_time": "0:14:11", "throughput": 8695.01, "total_tokens": 130509544} +{"current_steps": 193685, "total_steps": 204665, "loss": 0.0, "lr": 1.7486942716141374e-08, "epoch": 4.731756773263626, "percentage": 94.64, "elapsed_time": "4:10:10", "remaining_time": "0:14:10", "throughput": 8695.02, "total_tokens": 130512552} +{"current_steps": 193690, "total_steps": 204665, "loss": 0.0825, "lr": 1.747106813731547e-08, "epoch": 4.731878924095473, "percentage": 94.64, "elapsed_time": "4:10:10", "remaining_time": "0:14:10", "throughput": 8695.02, "total_tokens": 130515688} +{"current_steps": 193695, "total_steps": 204665, "loss": 0.0, "lr": 1.7455200703704432e-08, "epoch": 4.73200107492732, "percentage": 94.64, "elapsed_time": "4:10:10", "remaining_time": "0:14:10", "throughput": 8695.04, "total_tokens": 130518888} +{"current_steps": 193700, "total_steps": 204665, "loss": 0.0, "lr": 1.7439340415423164e-08, "epoch": 4.732123225759167, "percentage": 94.64, "elapsed_time": "4:10:11", "remaining_time": "0:14:09", "throughput": 8695.06, "total_tokens": 130522216} +{"current_steps": 193705, "total_steps": 204665, "loss": 0.0, "lr": 1.7423487272587577e-08, "epoch": 4.732245376591015, "percentage": 94.64, "elapsed_time": "4:10:11", "remaining_time": "0:14:09", "throughput": 8695.07, "total_tokens": 130525352} +{"current_steps": 193710, "total_steps": 204665, "loss": 0.0, "lr": 1.740764127531258e-08, "epoch": 4.732367527422862, "percentage": 94.65, "elapsed_time": "4:10:11", "remaining_time": "0:14:08", "throughput": 8695.12, "total_tokens": 130529128} +{"current_steps": 193715, "total_steps": 204665, "loss": 0.0, "lr": 1.739180242371341e-08, "epoch": 4.732489678254709, "percentage": 94.65, "elapsed_time": "4:10:12", "remaining_time": "0:14:08", "throughput": 8695.11, "total_tokens": 130532008} +{"current_steps": 193720, "total_steps": 204665, "loss": 0.0, "lr": 1.7375970717905418e-08, "epoch": 4.732611829086556, "percentage": 94.65, "elapsed_time": "4:10:12", "remaining_time": "0:14:08", "throughput": 8695.13, "total_tokens": 130535208} +{"current_steps": 193725, "total_steps": 204665, "loss": 0.0, "lr": 1.736014615800352e-08, "epoch": 4.732733979918403, "percentage": 94.65, "elapsed_time": "4:10:12", "remaining_time": "0:14:07", "throughput": 8695.14, "total_tokens": 130538344} +{"current_steps": 193730, "total_steps": 204665, "loss": 0.0, "lr": 1.734432874412306e-08, "epoch": 4.732856130750251, "percentage": 94.66, "elapsed_time": "4:10:13", "remaining_time": "0:14:07", "throughput": 8695.14, "total_tokens": 130541416} +{"current_steps": 193735, "total_steps": 204665, "loss": 0.0, "lr": 1.732851847637895e-08, "epoch": 4.732978281582097, "percentage": 94.66, "elapsed_time": "4:10:13", "remaining_time": "0:14:07", "throughput": 8695.15, "total_tokens": 130544552} +{"current_steps": 193740, "total_steps": 204665, "loss": 0.0, "lr": 1.7312715354886098e-08, "epoch": 4.733100432413945, "percentage": 94.66, "elapsed_time": "4:10:13", "remaining_time": "0:14:06", "throughput": 8695.18, "total_tokens": 130548008} +{"current_steps": 193745, "total_steps": 204665, "loss": 0.0, "lr": 1.7296919379759635e-08, "epoch": 4.733222583245792, "percentage": 94.66, "elapsed_time": "4:10:14", "remaining_time": "0:14:06", "throughput": 8695.2, "total_tokens": 130551208} +{"current_steps": 193750, "total_steps": 204665, "loss": 0.0, "lr": 1.7281130551114132e-08, "epoch": 4.733344734077639, "percentage": 94.67, "elapsed_time": "4:10:14", "remaining_time": "0:14:05", "throughput": 8695.23, "total_tokens": 130554792} +{"current_steps": 193755, "total_steps": 204665, "loss": 0.0, "lr": 1.726534886906461e-08, "epoch": 4.733466884909486, "percentage": 94.67, "elapsed_time": "4:10:14", "remaining_time": "0:14:05", "throughput": 8695.26, "total_tokens": 130558376} +{"current_steps": 193760, "total_steps": 204665, "loss": 0.0, "lr": 1.7249574333725868e-08, "epoch": 4.733589035741334, "percentage": 94.67, "elapsed_time": "4:10:15", "remaining_time": "0:14:05", "throughput": 8695.3, "total_tokens": 130561896} +{"current_steps": 193765, "total_steps": 204665, "loss": 0.0, "lr": 1.723380694521237e-08, "epoch": 4.7337111865731805, "percentage": 94.67, "elapsed_time": "4:10:15", "remaining_time": "0:14:04", "throughput": 8695.33, "total_tokens": 130565480} +{"current_steps": 193770, "total_steps": 204665, "loss": 0.0, "lr": 1.7218046703639134e-08, "epoch": 4.733833337405028, "percentage": 94.68, "elapsed_time": "4:10:15", "remaining_time": "0:14:04", "throughput": 8695.34, "total_tokens": 130568616} +{"current_steps": 193775, "total_steps": 204665, "loss": 0.0, "lr": 1.7202293609120512e-08, "epoch": 4.733955488236875, "percentage": 94.68, "elapsed_time": "4:10:16", "remaining_time": "0:14:03", "throughput": 8695.38, "total_tokens": 130572264} +{"current_steps": 193780, "total_steps": 204665, "loss": 0.0, "lr": 1.71865476617713e-08, "epoch": 4.7340776390687225, "percentage": 94.68, "elapsed_time": "4:10:16", "remaining_time": "0:14:03", "throughput": 8695.41, "total_tokens": 130575848} +{"current_steps": 193785, "total_steps": 204665, "loss": 0.0, "lr": 1.7170808861705633e-08, "epoch": 4.734199789900569, "percentage": 94.68, "elapsed_time": "4:10:16", "remaining_time": "0:14:03", "throughput": 8695.45, "total_tokens": 130579496} +{"current_steps": 193790, "total_steps": 204665, "loss": 0.0, "lr": 1.7155077209038416e-08, "epoch": 4.734321940732416, "percentage": 94.69, "elapsed_time": "4:10:17", "remaining_time": "0:14:02", "throughput": 8695.46, "total_tokens": 130582568} +{"current_steps": 193795, "total_steps": 204665, "loss": 0.0, "lr": 1.713935270388367e-08, "epoch": 4.734444091564264, "percentage": 94.69, "elapsed_time": "4:10:17", "remaining_time": "0:14:02", "throughput": 8695.47, "total_tokens": 130585640} +{"current_steps": 193800, "total_steps": 204665, "loss": 0.0, "lr": 1.712363534635597e-08, "epoch": 4.73456624239611, "percentage": 94.69, "elapsed_time": "4:10:18", "remaining_time": "0:14:01", "throughput": 8695.49, "total_tokens": 130588904} +{"current_steps": 193805, "total_steps": 204665, "loss": 0.0, "lr": 1.7107925136569557e-08, "epoch": 4.734688393227958, "percentage": 94.69, "elapsed_time": "4:10:18", "remaining_time": "0:14:01", "throughput": 8695.51, "total_tokens": 130592296} +{"current_steps": 193810, "total_steps": 204665, "loss": 0.0, "lr": 1.7092222074638674e-08, "epoch": 4.734810544059805, "percentage": 94.7, "elapsed_time": "4:10:18", "remaining_time": "0:14:01", "throughput": 8695.52, "total_tokens": 130595432} +{"current_steps": 193815, "total_steps": 204665, "loss": 0.0, "lr": 1.7076526160677563e-08, "epoch": 4.734932694891652, "percentage": 94.7, "elapsed_time": "4:10:19", "remaining_time": "0:14:00", "throughput": 8695.53, "total_tokens": 130598568} +{"current_steps": 193820, "total_steps": 204665, "loss": 0.0, "lr": 1.7060837394800244e-08, "epoch": 4.735054845723499, "percentage": 94.7, "elapsed_time": "4:10:19", "remaining_time": "0:14:00", "throughput": 8695.6, "total_tokens": 130602856} +{"current_steps": 193825, "total_steps": 204665, "loss": 0.0, "lr": 1.7045155777120844e-08, "epoch": 4.735176996555347, "percentage": 94.7, "elapsed_time": "4:10:19", "remaining_time": "0:14:00", "throughput": 8695.63, "total_tokens": 130606248} +{"current_steps": 193830, "total_steps": 204665, "loss": 0.0, "lr": 1.7029481307753613e-08, "epoch": 4.7352991473871935, "percentage": 94.71, "elapsed_time": "4:10:20", "remaining_time": "0:13:59", "throughput": 8695.69, "total_tokens": 130610408} +{"current_steps": 193835, "total_steps": 204665, "loss": 0.0, "lr": 1.7013813986812233e-08, "epoch": 4.735421298219041, "percentage": 94.71, "elapsed_time": "4:10:20", "remaining_time": "0:13:59", "throughput": 8695.78, "total_tokens": 130615080} +{"current_steps": 193840, "total_steps": 204665, "loss": 0.0, "lr": 1.6998153814410943e-08, "epoch": 4.735543449050888, "percentage": 94.71, "elapsed_time": "4:10:20", "remaining_time": "0:13:58", "throughput": 8695.79, "total_tokens": 130618152} +{"current_steps": 193845, "total_steps": 204665, "loss": 0.0, "lr": 1.6982500790663325e-08, "epoch": 4.7356655998827355, "percentage": 94.71, "elapsed_time": "4:10:21", "remaining_time": "0:13:58", "throughput": 8695.82, "total_tokens": 130621672} +{"current_steps": 193850, "total_steps": 204665, "loss": 0.0, "lr": 1.6966854915683504e-08, "epoch": 4.735787750714582, "percentage": 94.72, "elapsed_time": "4:10:21", "remaining_time": "0:13:58", "throughput": 8695.84, "total_tokens": 130625064} +{"current_steps": 193855, "total_steps": 204665, "loss": 0.0, "lr": 1.6951216189585062e-08, "epoch": 4.73590990154643, "percentage": 94.72, "elapsed_time": "4:10:21", "remaining_time": "0:13:57", "throughput": 8695.87, "total_tokens": 130628456} +{"current_steps": 193860, "total_steps": 204665, "loss": 0.0, "lr": 1.693558461248168e-08, "epoch": 4.736032052378277, "percentage": 94.72, "elapsed_time": "4:10:22", "remaining_time": "0:13:57", "throughput": 8695.89, "total_tokens": 130631848} +{"current_steps": 193865, "total_steps": 204665, "loss": 0.0, "lr": 1.691996018448727e-08, "epoch": 4.736154203210123, "percentage": 94.72, "elapsed_time": "4:10:22", "remaining_time": "0:13:56", "throughput": 8695.9, "total_tokens": 130634920} +{"current_steps": 193870, "total_steps": 204665, "loss": 0.0, "lr": 1.6904342905715297e-08, "epoch": 4.736276354041971, "percentage": 94.73, "elapsed_time": "4:10:22", "remaining_time": "0:13:56", "throughput": 8695.91, "total_tokens": 130638120} +{"current_steps": 193875, "total_steps": 204665, "loss": 0.0, "lr": 1.6888732776279336e-08, "epoch": 4.736398504873819, "percentage": 94.73, "elapsed_time": "4:10:23", "remaining_time": "0:13:56", "throughput": 8695.93, "total_tokens": 130641448} +{"current_steps": 193880, "total_steps": 204665, "loss": 0.0, "lr": 1.6873129796292964e-08, "epoch": 4.736520655705665, "percentage": 94.73, "elapsed_time": "4:10:23", "remaining_time": "0:13:55", "throughput": 8695.97, "total_tokens": 130645096} +{"current_steps": 193885, "total_steps": 204665, "loss": 0.0, "lr": 1.685753396586953e-08, "epoch": 4.736642806537512, "percentage": 94.73, "elapsed_time": "4:10:23", "remaining_time": "0:13:55", "throughput": 8696.01, "total_tokens": 130648744} +{"current_steps": 193890, "total_steps": 204665, "loss": 0.0, "lr": 1.6841945285122727e-08, "epoch": 4.73676495736936, "percentage": 94.74, "elapsed_time": "4:10:24", "remaining_time": "0:13:54", "throughput": 8696.03, "total_tokens": 130652200} +{"current_steps": 193895, "total_steps": 204665, "loss": 0.0, "lr": 1.6826363754165573e-08, "epoch": 4.7368871082012065, "percentage": 94.74, "elapsed_time": "4:10:24", "remaining_time": "0:13:54", "throughput": 8696.05, "total_tokens": 130655528} +{"current_steps": 193900, "total_steps": 204665, "loss": 0.0, "lr": 1.6810789373111644e-08, "epoch": 4.737009259033054, "percentage": 94.74, "elapsed_time": "4:10:25", "remaining_time": "0:13:54", "throughput": 8696.05, "total_tokens": 130658472} +{"current_steps": 193905, "total_steps": 204665, "loss": 0.0, "lr": 1.6795222142073962e-08, "epoch": 4.737131409864901, "percentage": 94.74, "elapsed_time": "4:10:25", "remaining_time": "0:13:53", "throughput": 8696.08, "total_tokens": 130661928} +{"current_steps": 193910, "total_steps": 204665, "loss": 0.0, "lr": 1.677966206116599e-08, "epoch": 4.7372535606967485, "percentage": 94.75, "elapsed_time": "4:10:25", "remaining_time": "0:13:53", "throughput": 8696.11, "total_tokens": 130665448} +{"current_steps": 193915, "total_steps": 204665, "loss": 0.0, "lr": 1.676410913050086e-08, "epoch": 4.737375711528595, "percentage": 94.75, "elapsed_time": "4:10:26", "remaining_time": "0:13:52", "throughput": 8696.13, "total_tokens": 130668712} +{"current_steps": 193920, "total_steps": 204665, "loss": 0.0, "lr": 1.674856335019137e-08, "epoch": 4.737497862360443, "percentage": 94.75, "elapsed_time": "4:10:26", "remaining_time": "0:13:52", "throughput": 8696.14, "total_tokens": 130671976} +{"current_steps": 193925, "total_steps": 204665, "loss": 0.0, "lr": 1.6733024720350987e-08, "epoch": 4.73762001319229, "percentage": 94.75, "elapsed_time": "4:10:26", "remaining_time": "0:13:52", "throughput": 8696.16, "total_tokens": 130675240} +{"current_steps": 193930, "total_steps": 204665, "loss": 0.0, "lr": 1.6717493241092396e-08, "epoch": 4.737742164024137, "percentage": 94.75, "elapsed_time": "4:10:27", "remaining_time": "0:13:51", "throughput": 8696.17, "total_tokens": 130678376} +{"current_steps": 193935, "total_steps": 204665, "loss": 0.0, "lr": 1.670196891252873e-08, "epoch": 4.737864314855984, "percentage": 94.76, "elapsed_time": "4:10:27", "remaining_time": "0:13:51", "throughput": 8696.2, "total_tokens": 130681768} +{"current_steps": 193940, "total_steps": 204665, "loss": 0.0, "lr": 1.668645173477279e-08, "epoch": 4.737986465687832, "percentage": 94.76, "elapsed_time": "4:10:27", "remaining_time": "0:13:51", "throughput": 8696.21, "total_tokens": 130684968} +{"current_steps": 193945, "total_steps": 204665, "loss": 0.0, "lr": 1.667094170793748e-08, "epoch": 4.738108616519678, "percentage": 94.76, "elapsed_time": "4:10:28", "remaining_time": "0:13:50", "throughput": 8696.25, "total_tokens": 130688744} +{"current_steps": 193950, "total_steps": 204665, "loss": 0.0, "lr": 1.6655438832135494e-08, "epoch": 4.738230767351526, "percentage": 94.76, "elapsed_time": "4:10:28", "remaining_time": "0:13:50", "throughput": 8696.29, "total_tokens": 130692392} +{"current_steps": 193955, "total_steps": 204665, "loss": 0.0, "lr": 1.6639943107479627e-08, "epoch": 4.738352918183373, "percentage": 94.77, "elapsed_time": "4:10:28", "remaining_time": "0:13:49", "throughput": 8696.32, "total_tokens": 130695976} +{"current_steps": 193960, "total_steps": 204665, "loss": 0.0, "lr": 1.6624454534082678e-08, "epoch": 4.7384750690152195, "percentage": 94.77, "elapsed_time": "4:10:29", "remaining_time": "0:13:49", "throughput": 8696.36, "total_tokens": 130699560} +{"current_steps": 193965, "total_steps": 204665, "loss": 0.0, "lr": 1.6608973112057113e-08, "epoch": 4.738597219847067, "percentage": 94.77, "elapsed_time": "4:10:29", "remaining_time": "0:13:49", "throughput": 8696.41, "total_tokens": 130703528} +{"current_steps": 193970, "total_steps": 204665, "loss": 0.0, "lr": 1.659349884151573e-08, "epoch": 4.738719370678915, "percentage": 94.77, "elapsed_time": "4:10:29", "remaining_time": "0:13:48", "throughput": 8696.44, "total_tokens": 130706920} +{"current_steps": 193975, "total_steps": 204665, "loss": 0.0004, "lr": 1.6578031722570774e-08, "epoch": 4.7388415215107615, "percentage": 94.78, "elapsed_time": "4:10:30", "remaining_time": "0:13:48", "throughput": 8696.46, "total_tokens": 130710376} +{"current_steps": 193980, "total_steps": 204665, "loss": 0.0, "lr": 1.656257175533493e-08, "epoch": 4.738963672342608, "percentage": 94.78, "elapsed_time": "4:10:30", "remaining_time": "0:13:47", "throughput": 8696.49, "total_tokens": 130713704} +{"current_steps": 193985, "total_steps": 204665, "loss": 0.0, "lr": 1.6547118939920556e-08, "epoch": 4.739085823174456, "percentage": 94.78, "elapsed_time": "4:10:30", "remaining_time": "0:13:47", "throughput": 8696.51, "total_tokens": 130717032} +{"current_steps": 193990, "total_steps": 204665, "loss": 0.0, "lr": 1.6531673276440118e-08, "epoch": 4.739207974006303, "percentage": 94.78, "elapsed_time": "4:10:31", "remaining_time": "0:13:47", "throughput": 8696.56, "total_tokens": 130720936} +{"current_steps": 193995, "total_steps": 204665, "loss": 0.0001, "lr": 1.6516234765005855e-08, "epoch": 4.73933012483815, "percentage": 94.79, "elapsed_time": "4:10:31", "remaining_time": "0:13:46", "throughput": 8696.57, "total_tokens": 130724136} +{"current_steps": 194000, "total_steps": 204665, "loss": 0.0, "lr": 1.6500803405730013e-08, "epoch": 4.739452275669997, "percentage": 94.79, "elapsed_time": "4:10:32", "remaining_time": "0:13:46", "throughput": 8696.61, "total_tokens": 130727784} +{"current_steps": 194005, "total_steps": 204665, "loss": 0.0, "lr": 1.6485379198724948e-08, "epoch": 4.739574426501845, "percentage": 94.79, "elapsed_time": "4:10:32", "remaining_time": "0:13:45", "throughput": 8696.62, "total_tokens": 130730856} +{"current_steps": 194010, "total_steps": 204665, "loss": 0.0, "lr": 1.6469962144102568e-08, "epoch": 4.739696577333691, "percentage": 94.79, "elapsed_time": "4:10:32", "remaining_time": "0:13:45", "throughput": 8696.64, "total_tokens": 130734312} +{"current_steps": 194015, "total_steps": 204665, "loss": 0.0, "lr": 1.645455224197534e-08, "epoch": 4.739818728165539, "percentage": 94.8, "elapsed_time": "4:10:33", "remaining_time": "0:13:45", "throughput": 8696.64, "total_tokens": 130737256} +{"current_steps": 194020, "total_steps": 204665, "loss": 0.0, "lr": 1.6439149492455172e-08, "epoch": 4.739940878997386, "percentage": 94.8, "elapsed_time": "4:10:33", "remaining_time": "0:13:44", "throughput": 8696.65, "total_tokens": 130740456} +{"current_steps": 194025, "total_steps": 204665, "loss": 0.0, "lr": 1.642375389565387e-08, "epoch": 4.740063029829233, "percentage": 94.8, "elapsed_time": "4:10:33", "remaining_time": "0:13:44", "throughput": 8696.66, "total_tokens": 130743528} +{"current_steps": 194030, "total_steps": 204665, "loss": 0.0, "lr": 1.640836545168378e-08, "epoch": 4.74018518066108, "percentage": 94.8, "elapsed_time": "4:10:34", "remaining_time": "0:13:44", "throughput": 8696.68, "total_tokens": 130746792} +{"current_steps": 194035, "total_steps": 204665, "loss": 0.0, "lr": 1.6392984160656486e-08, "epoch": 4.740307331492928, "percentage": 94.81, "elapsed_time": "4:10:34", "remaining_time": "0:13:43", "throughput": 8696.69, "total_tokens": 130750056} +{"current_steps": 194040, "total_steps": 204665, "loss": 0.0, "lr": 1.6377610022683897e-08, "epoch": 4.740429482324775, "percentage": 94.81, "elapsed_time": "4:10:34", "remaining_time": "0:13:43", "throughput": 8696.68, "total_tokens": 130752744} +{"current_steps": 194045, "total_steps": 204665, "loss": 0.0, "lr": 1.6362243037878032e-08, "epoch": 4.740551633156622, "percentage": 94.81, "elapsed_time": "4:10:35", "remaining_time": "0:13:42", "throughput": 8696.7, "total_tokens": 130755944} +{"current_steps": 194050, "total_steps": 204665, "loss": 0.0403, "lr": 1.634688320635047e-08, "epoch": 4.740673783988469, "percentage": 94.81, "elapsed_time": "4:10:35", "remaining_time": "0:13:42", "throughput": 8696.73, "total_tokens": 130759464} +{"current_steps": 194055, "total_steps": 204665, "loss": 0.0, "lr": 1.6331530528212902e-08, "epoch": 4.740795934820316, "percentage": 94.82, "elapsed_time": "4:10:35", "remaining_time": "0:13:42", "throughput": 8696.76, "total_tokens": 130762984} +{"current_steps": 194060, "total_steps": 204665, "loss": 0.0, "lr": 1.6316185003577008e-08, "epoch": 4.740918085652163, "percentage": 94.82, "elapsed_time": "4:10:36", "remaining_time": "0:13:41", "throughput": 8696.78, "total_tokens": 130766248} +{"current_steps": 194065, "total_steps": 204665, "loss": 0.0, "lr": 1.630084663255449e-08, "epoch": 4.74104023648401, "percentage": 94.82, "elapsed_time": "4:10:36", "remaining_time": "0:13:41", "throughput": 8696.79, "total_tokens": 130769448} +{"current_steps": 194070, "total_steps": 204665, "loss": 0.0, "lr": 1.628551541525669e-08, "epoch": 4.741162387315858, "percentage": 94.82, "elapsed_time": "4:10:36", "remaining_time": "0:13:40", "throughput": 8696.82, "total_tokens": 130773032} +{"current_steps": 194075, "total_steps": 204665, "loss": 0.0, "lr": 1.6270191351795194e-08, "epoch": 4.7412845381477045, "percentage": 94.83, "elapsed_time": "4:10:37", "remaining_time": "0:13:40", "throughput": 8696.85, "total_tokens": 130776424} +{"current_steps": 194080, "total_steps": 204665, "loss": 0.0, "lr": 1.6254874442281574e-08, "epoch": 4.741406688979552, "percentage": 94.83, "elapsed_time": "4:10:37", "remaining_time": "0:13:40", "throughput": 8696.88, "total_tokens": 130779944} +{"current_steps": 194085, "total_steps": 204665, "loss": 0.0, "lr": 1.623956468682708e-08, "epoch": 4.741528839811399, "percentage": 94.83, "elapsed_time": "4:10:37", "remaining_time": "0:13:39", "throughput": 8696.89, "total_tokens": 130783144} +{"current_steps": 194090, "total_steps": 204665, "loss": 0.0, "lr": 1.6224262085543063e-08, "epoch": 4.7416509906432465, "percentage": 94.83, "elapsed_time": "4:10:38", "remaining_time": "0:13:39", "throughput": 8696.9, "total_tokens": 130786216} +{"current_steps": 194095, "total_steps": 204665, "loss": 0.0, "lr": 1.6208966638540766e-08, "epoch": 4.741773141475093, "percentage": 94.84, "elapsed_time": "4:10:38", "remaining_time": "0:13:38", "throughput": 8696.91, "total_tokens": 130789416} +{"current_steps": 194100, "total_steps": 204665, "loss": 0.0, "lr": 1.619367834593155e-08, "epoch": 4.741895292306941, "percentage": 94.84, "elapsed_time": "4:10:38", "remaining_time": "0:13:38", "throughput": 8696.93, "total_tokens": 130792680} +{"current_steps": 194105, "total_steps": 204665, "loss": 0.0, "lr": 1.6178397207826434e-08, "epoch": 4.742017443138788, "percentage": 94.84, "elapsed_time": "4:10:39", "remaining_time": "0:13:38", "throughput": 8696.95, "total_tokens": 130795944} +{"current_steps": 194110, "total_steps": 204665, "loss": 0.0, "lr": 1.616312322433666e-08, "epoch": 4.742139593970635, "percentage": 94.84, "elapsed_time": "4:10:39", "remaining_time": "0:13:37", "throughput": 8696.97, "total_tokens": 130799400} +{"current_steps": 194115, "total_steps": 204665, "loss": 0.0, "lr": 1.6147856395573258e-08, "epoch": 4.742261744802482, "percentage": 94.85, "elapsed_time": "4:10:40", "remaining_time": "0:13:37", "throughput": 8697.01, "total_tokens": 130802984} +{"current_steps": 194120, "total_steps": 204665, "loss": 0.0, "lr": 1.613259672164735e-08, "epoch": 4.74238389563433, "percentage": 94.85, "elapsed_time": "4:10:40", "remaining_time": "0:13:37", "throughput": 8697.07, "total_tokens": 130807016} +{"current_steps": 194125, "total_steps": 204665, "loss": 0.0, "lr": 1.611734420266986e-08, "epoch": 4.742506046466176, "percentage": 94.85, "elapsed_time": "4:10:40", "remaining_time": "0:13:36", "throughput": 8697.09, "total_tokens": 130810344} +{"current_steps": 194130, "total_steps": 204665, "loss": 0.0477, "lr": 1.6102098838751465e-08, "epoch": 4.742628197298024, "percentage": 94.85, "elapsed_time": "4:10:41", "remaining_time": "0:13:36", "throughput": 8697.12, "total_tokens": 130813928} +{"current_steps": 194135, "total_steps": 204665, "loss": 0.0, "lr": 1.6086860630003418e-08, "epoch": 4.742750348129871, "percentage": 94.86, "elapsed_time": "4:10:41", "remaining_time": "0:13:35", "throughput": 8697.13, "total_tokens": 130817064} +{"current_steps": 194140, "total_steps": 204665, "loss": 0.0, "lr": 1.6071629576536295e-08, "epoch": 4.742872498961718, "percentage": 94.86, "elapsed_time": "4:10:41", "remaining_time": "0:13:35", "throughput": 8697.16, "total_tokens": 130820456} +{"current_steps": 194145, "total_steps": 204665, "loss": 0.0, "lr": 1.6056405678460892e-08, "epoch": 4.742994649793565, "percentage": 94.86, "elapsed_time": "4:10:42", "remaining_time": "0:13:35", "throughput": 8697.18, "total_tokens": 130823848} +{"current_steps": 194150, "total_steps": 204665, "loss": 0.0, "lr": 1.604118893588802e-08, "epoch": 4.743116800625412, "percentage": 94.86, "elapsed_time": "4:10:42", "remaining_time": "0:13:34", "throughput": 8697.21, "total_tokens": 130827240} +{"current_steps": 194155, "total_steps": 204665, "loss": 0.0, "lr": 1.6025979348928242e-08, "epoch": 4.7432389514572595, "percentage": 94.86, "elapsed_time": "4:10:42", "remaining_time": "0:13:34", "throughput": 8697.25, "total_tokens": 130830952} +{"current_steps": 194160, "total_steps": 204665, "loss": 0.0, "lr": 1.601077691769226e-08, "epoch": 4.743361102289106, "percentage": 94.87, "elapsed_time": "4:10:43", "remaining_time": "0:13:33", "throughput": 8697.26, "total_tokens": 130834088} +{"current_steps": 194165, "total_steps": 204665, "loss": 0.0, "lr": 1.5995581642290534e-08, "epoch": 4.743483253120954, "percentage": 94.87, "elapsed_time": "4:10:43", "remaining_time": "0:13:33", "throughput": 8697.28, "total_tokens": 130837352} +{"current_steps": 194170, "total_steps": 204665, "loss": 0.0, "lr": 1.5980393522833536e-08, "epoch": 4.743605403952801, "percentage": 94.87, "elapsed_time": "4:10:43", "remaining_time": "0:13:33", "throughput": 8697.28, "total_tokens": 130840296} +{"current_steps": 194175, "total_steps": 204665, "loss": 0.0, "lr": 1.596521255943184e-08, "epoch": 4.743727554784648, "percentage": 94.87, "elapsed_time": "4:10:44", "remaining_time": "0:13:32", "throughput": 8697.33, "total_tokens": 130844200} +{"current_steps": 194180, "total_steps": 204665, "loss": 0.0, "lr": 1.5950038752195806e-08, "epoch": 4.743849705616495, "percentage": 94.88, "elapsed_time": "4:10:44", "remaining_time": "0:13:32", "throughput": 8697.33, "total_tokens": 130847208} +{"current_steps": 194185, "total_steps": 204665, "loss": 0.0, "lr": 1.5934872101235785e-08, "epoch": 4.743971856448343, "percentage": 94.88, "elapsed_time": "4:10:44", "remaining_time": "0:13:31", "throughput": 8697.4, "total_tokens": 130851368} +{"current_steps": 194190, "total_steps": 204665, "loss": 0.0, "lr": 1.5919712606662027e-08, "epoch": 4.744094007280189, "percentage": 94.88, "elapsed_time": "4:10:45", "remaining_time": "0:13:31", "throughput": 8697.41, "total_tokens": 130854568} +{"current_steps": 194195, "total_steps": 204665, "loss": 0.0, "lr": 1.590456026858478e-08, "epoch": 4.744216158112037, "percentage": 94.88, "elapsed_time": "4:10:45", "remaining_time": "0:13:31", "throughput": 8697.47, "total_tokens": 130858600} +{"current_steps": 194200, "total_steps": 204665, "loss": 0.0, "lr": 1.588941508711428e-08, "epoch": 4.744338308943884, "percentage": 94.89, "elapsed_time": "4:10:45", "remaining_time": "0:13:30", "throughput": 8697.49, "total_tokens": 130861864} +{"current_steps": 194205, "total_steps": 204665, "loss": 0.0, "lr": 1.5874277062360663e-08, "epoch": 4.744460459775731, "percentage": 94.89, "elapsed_time": "4:10:46", "remaining_time": "0:13:30", "throughput": 8697.51, "total_tokens": 130865192} +{"current_steps": 194210, "total_steps": 204665, "loss": 0.0, "lr": 1.5859146194433958e-08, "epoch": 4.744582610607578, "percentage": 94.89, "elapsed_time": "4:10:46", "remaining_time": "0:13:30", "throughput": 8697.52, "total_tokens": 130868392} +{"current_steps": 194215, "total_steps": 204665, "loss": 0.0, "lr": 1.5844022483444296e-08, "epoch": 4.744704761439426, "percentage": 94.89, "elapsed_time": "4:10:46", "remaining_time": "0:13:29", "throughput": 8697.53, "total_tokens": 130871592} +{"current_steps": 194220, "total_steps": 204665, "loss": 0.0, "lr": 1.582890592950159e-08, "epoch": 4.7448269122712725, "percentage": 94.9, "elapsed_time": "4:10:47", "remaining_time": "0:13:29", "throughput": 8697.54, "total_tokens": 130874664} +{"current_steps": 194225, "total_steps": 204665, "loss": 0.0, "lr": 1.581379653271586e-08, "epoch": 4.744949063103119, "percentage": 94.9, "elapsed_time": "4:10:47", "remaining_time": "0:13:28", "throughput": 8697.56, "total_tokens": 130878056} +{"current_steps": 194230, "total_steps": 204665, "loss": 0.0, "lr": 1.57986942931968e-08, "epoch": 4.745071213934967, "percentage": 94.9, "elapsed_time": "4:10:48", "remaining_time": "0:13:28", "throughput": 8697.6, "total_tokens": 130881768} +{"current_steps": 194235, "total_steps": 204665, "loss": 0.0, "lr": 1.5783599211054434e-08, "epoch": 4.7451933647668145, "percentage": 94.9, "elapsed_time": "4:10:48", "remaining_time": "0:13:28", "throughput": 8697.63, "total_tokens": 130885224} +{"current_steps": 194240, "total_steps": 204665, "loss": 0.0, "lr": 1.5768511286398446e-08, "epoch": 4.745315515598661, "percentage": 94.91, "elapsed_time": "4:10:48", "remaining_time": "0:13:27", "throughput": 8697.64, "total_tokens": 130888424} +{"current_steps": 194245, "total_steps": 204665, "loss": 0.0, "lr": 1.575343051933853e-08, "epoch": 4.745437666430508, "percentage": 94.91, "elapsed_time": "4:10:49", "remaining_time": "0:13:27", "throughput": 8697.66, "total_tokens": 130891624} +{"current_steps": 194250, "total_steps": 204665, "loss": 0.0, "lr": 1.5738356909984372e-08, "epoch": 4.745559817262356, "percentage": 94.91, "elapsed_time": "4:10:49", "remaining_time": "0:13:26", "throughput": 8697.68, "total_tokens": 130895016} +{"current_steps": 194255, "total_steps": 204665, "loss": 0.0, "lr": 1.572329045844578e-08, "epoch": 4.745681968094202, "percentage": 94.91, "elapsed_time": "4:10:49", "remaining_time": "0:13:26", "throughput": 8697.73, "total_tokens": 130898856} +{"current_steps": 194260, "total_steps": 204665, "loss": 0.0, "lr": 1.5708231164831998e-08, "epoch": 4.74580411892605, "percentage": 94.92, "elapsed_time": "4:10:50", "remaining_time": "0:13:26", "throughput": 8697.74, "total_tokens": 130902056} +{"current_steps": 194265, "total_steps": 204665, "loss": 0.0, "lr": 1.569317902925271e-08, "epoch": 4.745926269757897, "percentage": 94.92, "elapsed_time": "4:10:50", "remaining_time": "0:13:25", "throughput": 8697.76, "total_tokens": 130905384} +{"current_steps": 194270, "total_steps": 204665, "loss": 0.0, "lr": 1.5678134051817392e-08, "epoch": 4.746048420589744, "percentage": 94.92, "elapsed_time": "4:10:50", "remaining_time": "0:13:25", "throughput": 8697.78, "total_tokens": 130908712} +{"current_steps": 194275, "total_steps": 204665, "loss": 0.0, "lr": 1.56630962326354e-08, "epoch": 4.746170571421591, "percentage": 94.92, "elapsed_time": "4:10:51", "remaining_time": "0:13:24", "throughput": 8697.8, "total_tokens": 130911976} +{"current_steps": 194280, "total_steps": 204665, "loss": 0.0, "lr": 1.5648065571816193e-08, "epoch": 4.746292722253439, "percentage": 94.93, "elapsed_time": "4:10:51", "remaining_time": "0:13:24", "throughput": 8697.79, "total_tokens": 130914728} +{"current_steps": 194285, "total_steps": 204665, "loss": 0.0, "lr": 1.5633042069469025e-08, "epoch": 4.7464148730852855, "percentage": 94.93, "elapsed_time": "4:10:51", "remaining_time": "0:13:24", "throughput": 8697.81, "total_tokens": 130917992} +{"current_steps": 194290, "total_steps": 204665, "loss": 0.0, "lr": 1.561802572570303e-08, "epoch": 4.746537023917133, "percentage": 94.93, "elapsed_time": "4:10:52", "remaining_time": "0:13:23", "throughput": 8697.84, "total_tokens": 130921512} +{"current_steps": 194295, "total_steps": 204665, "loss": 0.0, "lr": 1.560301654062768e-08, "epoch": 4.74665917474898, "percentage": 94.93, "elapsed_time": "4:10:52", "remaining_time": "0:13:23", "throughput": 8697.88, "total_tokens": 130925224} +{"current_steps": 194300, "total_steps": 204665, "loss": 0.0, "lr": 1.5588014514351766e-08, "epoch": 4.7467813255808275, "percentage": 94.94, "elapsed_time": "4:10:52", "remaining_time": "0:13:23", "throughput": 8697.89, "total_tokens": 130928424} +{"current_steps": 194305, "total_steps": 204665, "loss": 0.0001, "lr": 1.5573019646984765e-08, "epoch": 4.746903476412674, "percentage": 94.94, "elapsed_time": "4:10:53", "remaining_time": "0:13:22", "throughput": 8697.95, "total_tokens": 130932520} +{"current_steps": 194310, "total_steps": 204665, "loss": 0.0, "lr": 1.5558031938635474e-08, "epoch": 4.747025627244522, "percentage": 94.94, "elapsed_time": "4:10:53", "remaining_time": "0:13:22", "throughput": 8697.99, "total_tokens": 130936168} +{"current_steps": 194315, "total_steps": 204665, "loss": 0.0002, "lr": 1.554305138941292e-08, "epoch": 4.747147778076369, "percentage": 94.94, "elapsed_time": "4:10:53", "remaining_time": "0:13:21", "throughput": 8698.0, "total_tokens": 130939240} +{"current_steps": 194320, "total_steps": 204665, "loss": 0.0, "lr": 1.5528077999426125e-08, "epoch": 4.747269928908215, "percentage": 94.95, "elapsed_time": "4:10:54", "remaining_time": "0:13:21", "throughput": 8698.05, "total_tokens": 130943080} +{"current_steps": 194325, "total_steps": 204665, "loss": 0.0, "lr": 1.5513111768784004e-08, "epoch": 4.747392079740063, "percentage": 94.95, "elapsed_time": "4:10:54", "remaining_time": "0:13:21", "throughput": 8698.08, "total_tokens": 130946536} +{"current_steps": 194330, "total_steps": 204665, "loss": 0.0, "lr": 1.5498152697595245e-08, "epoch": 4.747514230571911, "percentage": 94.95, "elapsed_time": "4:10:55", "remaining_time": "0:13:20", "throughput": 8698.12, "total_tokens": 130950312} +{"current_steps": 194335, "total_steps": 204665, "loss": 0.0, "lr": 1.5483200785968765e-08, "epoch": 4.747636381403757, "percentage": 94.95, "elapsed_time": "4:10:55", "remaining_time": "0:13:20", "throughput": 8698.17, "total_tokens": 130954088} +{"current_steps": 194340, "total_steps": 204665, "loss": 0.0, "lr": 1.546825603401325e-08, "epoch": 4.747758532235604, "percentage": 94.96, "elapsed_time": "4:10:55", "remaining_time": "0:13:19", "throughput": 8698.2, "total_tokens": 130957608} +{"current_steps": 194345, "total_steps": 204665, "loss": 0.0, "lr": 1.5453318441837282e-08, "epoch": 4.747880683067452, "percentage": 94.96, "elapsed_time": "4:10:56", "remaining_time": "0:13:19", "throughput": 8698.24, "total_tokens": 130961384} +{"current_steps": 194350, "total_steps": 204665, "loss": 0.0, "lr": 1.5438388009549665e-08, "epoch": 4.748002833899299, "percentage": 94.96, "elapsed_time": "4:10:56", "remaining_time": "0:13:19", "throughput": 8698.28, "total_tokens": 130965096} +{"current_steps": 194355, "total_steps": 204665, "loss": 0.0, "lr": 1.5423464737258974e-08, "epoch": 4.748124984731146, "percentage": 94.96, "elapsed_time": "4:10:56", "remaining_time": "0:13:18", "throughput": 8698.31, "total_tokens": 130968488} +{"current_steps": 194360, "total_steps": 204665, "loss": 0.0, "lr": 1.540854862507357e-08, "epoch": 4.748247135562993, "percentage": 94.96, "elapsed_time": "4:10:57", "remaining_time": "0:13:18", "throughput": 8698.37, "total_tokens": 130972520} +{"current_steps": 194365, "total_steps": 204665, "loss": 0.0001, "lr": 1.5393639673102033e-08, "epoch": 4.748369286394841, "percentage": 94.97, "elapsed_time": "4:10:57", "remaining_time": "0:13:17", "throughput": 8698.38, "total_tokens": 130975720} +{"current_steps": 194370, "total_steps": 204665, "loss": 0.0, "lr": 1.537873788145283e-08, "epoch": 4.748491437226687, "percentage": 94.97, "elapsed_time": "4:10:57", "remaining_time": "0:13:17", "throughput": 8698.38, "total_tokens": 130978600} +{"current_steps": 194375, "total_steps": 204665, "loss": 0.0, "lr": 1.536384325023421e-08, "epoch": 4.748613588058535, "percentage": 94.97, "elapsed_time": "4:10:58", "remaining_time": "0:13:17", "throughput": 8698.41, "total_tokens": 130982120} +{"current_steps": 194380, "total_steps": 204665, "loss": 0.0, "lr": 1.534895577955464e-08, "epoch": 4.748735738890382, "percentage": 94.97, "elapsed_time": "4:10:58", "remaining_time": "0:13:16", "throughput": 8698.42, "total_tokens": 130985384} +{"current_steps": 194385, "total_steps": 204665, "loss": 0.0, "lr": 1.5334075469522146e-08, "epoch": 4.748857889722229, "percentage": 94.98, "elapsed_time": "4:10:58", "remaining_time": "0:13:16", "throughput": 8698.46, "total_tokens": 130988968} +{"current_steps": 194390, "total_steps": 204665, "loss": 0.0, "lr": 1.5319202320245305e-08, "epoch": 4.748980040554076, "percentage": 94.98, "elapsed_time": "4:10:59", "remaining_time": "0:13:15", "throughput": 8698.5, "total_tokens": 130992616} +{"current_steps": 194395, "total_steps": 204665, "loss": 0.0, "lr": 1.5304336331831924e-08, "epoch": 4.749102191385924, "percentage": 94.98, "elapsed_time": "4:10:59", "remaining_time": "0:13:15", "throughput": 8698.53, "total_tokens": 130996072} +{"current_steps": 194400, "total_steps": 204665, "loss": 0.0, "lr": 1.5289477504390358e-08, "epoch": 4.7492243422177705, "percentage": 94.98, "elapsed_time": "4:10:59", "remaining_time": "0:13:15", "throughput": 8698.53, "total_tokens": 130999144} +{"current_steps": 194405, "total_steps": 204665, "loss": 0.0, "lr": 1.527462583802852e-08, "epoch": 4.749346493049618, "percentage": 94.99, "elapsed_time": "4:11:00", "remaining_time": "0:13:14", "throughput": 8698.55, "total_tokens": 131002408} +{"current_steps": 194410, "total_steps": 204665, "loss": 0.0, "lr": 1.5259781332854436e-08, "epoch": 4.749468643881465, "percentage": 94.99, "elapsed_time": "4:11:00", "remaining_time": "0:13:14", "throughput": 8698.58, "total_tokens": 131005864} +{"current_steps": 194415, "total_steps": 204665, "loss": 0.0, "lr": 1.5244943988976135e-08, "epoch": 4.749590794713312, "percentage": 94.99, "elapsed_time": "4:11:00", "remaining_time": "0:13:14", "throughput": 8698.6, "total_tokens": 131009256} +{"current_steps": 194420, "total_steps": 204665, "loss": 0.0, "lr": 1.523011380650141e-08, "epoch": 4.749712945545159, "percentage": 94.99, "elapsed_time": "4:11:01", "remaining_time": "0:13:13", "throughput": 8698.64, "total_tokens": 131012904} +{"current_steps": 194425, "total_steps": 204665, "loss": 0.0, "lr": 1.521529078553818e-08, "epoch": 4.749835096377006, "percentage": 95.0, "elapsed_time": "4:11:01", "remaining_time": "0:13:13", "throughput": 8698.66, "total_tokens": 131016232} +{"current_steps": 194430, "total_steps": 204665, "loss": 0.0006, "lr": 1.5200474926194363e-08, "epoch": 4.749957247208854, "percentage": 95.0, "elapsed_time": "4:11:02", "remaining_time": "0:13:12", "throughput": 8698.68, "total_tokens": 131019624} +{"current_steps": 194435, "total_steps": 204665, "loss": 0.0058, "lr": 1.518566622857742e-08, "epoch": 4.7500793980407, "percentage": 95.0, "elapsed_time": "4:11:02", "remaining_time": "0:13:12", "throughput": 8698.7, "total_tokens": 131022824} +{"current_steps": 194440, "total_steps": 204665, "loss": 0.0, "lr": 1.5170864692795272e-08, "epoch": 4.750201548872548, "percentage": 95.0, "elapsed_time": "4:11:02", "remaining_time": "0:13:12", "throughput": 8698.75, "total_tokens": 131026792} +{"current_steps": 194445, "total_steps": 204665, "loss": 0.0727, "lr": 1.5156070318955384e-08, "epoch": 4.750323699704395, "percentage": 95.01, "elapsed_time": "4:11:03", "remaining_time": "0:13:11", "throughput": 8698.75, "total_tokens": 131029672} +{"current_steps": 194446, "total_steps": 204665, "eval_loss": 0.3178049325942993, "epoch": 4.750348129870765, "percentage": 95.01, "elapsed_time": "4:11:50", "remaining_time": "0:13:14", "throughput": 8671.38, "total_tokens": 131030440} +{"current_steps": 194450, "total_steps": 204665, "loss": 0.0, "lr": 1.514128310716556e-08, "epoch": 4.750445850536242, "percentage": 95.01, "elapsed_time": "4:12:24", "remaining_time": "0:13:15", "throughput": 8652.05, "total_tokens": 131033192} +{"current_steps": 194455, "total_steps": 204665, "loss": 0.0, "lr": 1.512650305753316e-08, "epoch": 4.750568001368089, "percentage": 95.01, "elapsed_time": "4:12:25", "remaining_time": "0:13:15", "throughput": 8652.06, "total_tokens": 131036264} +{"current_steps": 194460, "total_steps": 204665, "loss": 0.0, "lr": 1.511173017016576e-08, "epoch": 4.750690152199937, "percentage": 95.01, "elapsed_time": "4:12:25", "remaining_time": "0:13:14", "throughput": 8652.06, "total_tokens": 131039144} +{"current_steps": 194465, "total_steps": 204665, "loss": 0.0, "lr": 1.5096964445170723e-08, "epoch": 4.7508123030317835, "percentage": 95.02, "elapsed_time": "4:12:25", "remaining_time": "0:13:14", "throughput": 8652.11, "total_tokens": 131042984} +{"current_steps": 194470, "total_steps": 204665, "loss": 0.0, "lr": 1.5082205882655518e-08, "epoch": 4.750934453863631, "percentage": 95.02, "elapsed_time": "4:12:26", "remaining_time": "0:13:14", "throughput": 8652.14, "total_tokens": 131046504} +{"current_steps": 194475, "total_steps": 204665, "loss": 0.0, "lr": 1.506745448272728e-08, "epoch": 4.751056604695478, "percentage": 95.02, "elapsed_time": "4:12:26", "remaining_time": "0:13:13", "throughput": 8652.16, "total_tokens": 131049896} +{"current_steps": 194480, "total_steps": 204665, "loss": 0.0, "lr": 1.5052710245493593e-08, "epoch": 4.7511787555273255, "percentage": 95.02, "elapsed_time": "4:12:26", "remaining_time": "0:13:13", "throughput": 8652.19, "total_tokens": 131053288} +{"current_steps": 194485, "total_steps": 204665, "loss": 0.0, "lr": 1.503797317106148e-08, "epoch": 4.751300906359172, "percentage": 95.03, "elapsed_time": "4:12:27", "remaining_time": "0:13:12", "throughput": 8652.21, "total_tokens": 131056680} +{"current_steps": 194490, "total_steps": 204665, "loss": 0.0, "lr": 1.5023243259538078e-08, "epoch": 4.751423057191019, "percentage": 95.03, "elapsed_time": "4:12:27", "remaining_time": "0:13:12", "throughput": 8652.28, "total_tokens": 131060840} +{"current_steps": 194495, "total_steps": 204665, "loss": 0.0, "lr": 1.5008520511030632e-08, "epoch": 4.751545208022867, "percentage": 95.03, "elapsed_time": "4:12:27", "remaining_time": "0:13:12", "throughput": 8652.29, "total_tokens": 131064104} +{"current_steps": 194500, "total_steps": 204665, "loss": 0.0, "lr": 1.499380492564617e-08, "epoch": 4.751667358854714, "percentage": 95.03, "elapsed_time": "4:12:28", "remaining_time": "0:13:11", "throughput": 8652.32, "total_tokens": 131067624} +{"current_steps": 194505, "total_steps": 204665, "loss": 0.0, "lr": 1.497909650349172e-08, "epoch": 4.751789509686561, "percentage": 95.04, "elapsed_time": "4:12:28", "remaining_time": "0:13:11", "throughput": 8652.35, "total_tokens": 131071144} +{"current_steps": 194510, "total_steps": 204665, "loss": 0.0489, "lr": 1.4964395244674077e-08, "epoch": 4.751911660518408, "percentage": 95.04, "elapsed_time": "4:12:28", "remaining_time": "0:13:10", "throughput": 8652.39, "total_tokens": 131074728} +{"current_steps": 194515, "total_steps": 204665, "loss": 0.0, "lr": 1.4949701149300385e-08, "epoch": 4.752033811350255, "percentage": 95.04, "elapsed_time": "4:12:29", "remaining_time": "0:13:10", "throughput": 8652.4, "total_tokens": 131077992} +{"current_steps": 194520, "total_steps": 204665, "loss": 0.0, "lr": 1.493501421747745e-08, "epoch": 4.752155962182102, "percentage": 95.04, "elapsed_time": "4:12:29", "remaining_time": "0:13:10", "throughput": 8652.42, "total_tokens": 131081192} +{"current_steps": 194525, "total_steps": 204665, "loss": 0.0, "lr": 1.4920334449311957e-08, "epoch": 4.75227811301395, "percentage": 95.05, "elapsed_time": "4:12:30", "remaining_time": "0:13:09", "throughput": 8652.43, "total_tokens": 131084328} +{"current_steps": 194530, "total_steps": 204665, "loss": 0.0, "lr": 1.4905661844910934e-08, "epoch": 4.7524002638457965, "percentage": 95.05, "elapsed_time": "4:12:30", "remaining_time": "0:13:09", "throughput": 8652.46, "total_tokens": 131087784} +{"current_steps": 194535, "total_steps": 204665, "loss": 0.0, "lr": 1.489099640438074e-08, "epoch": 4.752522414677644, "percentage": 95.05, "elapsed_time": "4:12:30", "remaining_time": "0:13:08", "throughput": 8652.48, "total_tokens": 131091112} +{"current_steps": 194540, "total_steps": 204665, "loss": 0.0, "lr": 1.4876338127828181e-08, "epoch": 4.752644565509491, "percentage": 95.05, "elapsed_time": "4:12:31", "remaining_time": "0:13:08", "throughput": 8652.49, "total_tokens": 131094248} +{"current_steps": 194545, "total_steps": 204665, "loss": 0.0, "lr": 1.4861687015359947e-08, "epoch": 4.7527667163413385, "percentage": 95.06, "elapsed_time": "4:12:31", "remaining_time": "0:13:08", "throughput": 8652.49, "total_tokens": 131097192} +{"current_steps": 194550, "total_steps": 204665, "loss": 0.0266, "lr": 1.4847043067082398e-08, "epoch": 4.752888867173185, "percentage": 95.06, "elapsed_time": "4:12:31", "remaining_time": "0:13:07", "throughput": 8652.51, "total_tokens": 131100456} +{"current_steps": 194555, "total_steps": 204665, "loss": 0.0313, "lr": 1.4832406283102228e-08, "epoch": 4.753011018005033, "percentage": 95.06, "elapsed_time": "4:12:32", "remaining_time": "0:13:07", "throughput": 8652.52, "total_tokens": 131103720} +{"current_steps": 194560, "total_steps": 204665, "loss": 0.0, "lr": 1.4817776663525683e-08, "epoch": 4.75313316883688, "percentage": 95.06, "elapsed_time": "4:12:32", "remaining_time": "0:13:06", "throughput": 8652.54, "total_tokens": 131106920} +{"current_steps": 194565, "total_steps": 204665, "loss": 0.0, "lr": 1.4803154208459233e-08, "epoch": 4.753255319668727, "percentage": 95.07, "elapsed_time": "4:12:32", "remaining_time": "0:13:06", "throughput": 8652.54, "total_tokens": 131109992} +{"current_steps": 194570, "total_steps": 204665, "loss": 0.0, "lr": 1.4788538918009242e-08, "epoch": 4.753377470500574, "percentage": 95.07, "elapsed_time": "4:12:33", "remaining_time": "0:13:06", "throughput": 8652.58, "total_tokens": 131113640} +{"current_steps": 194575, "total_steps": 204665, "loss": 0.0, "lr": 1.4773930792282064e-08, "epoch": 4.753499621332422, "percentage": 95.07, "elapsed_time": "4:12:33", "remaining_time": "0:13:05", "throughput": 8652.59, "total_tokens": 131116776} +{"current_steps": 194580, "total_steps": 204665, "loss": 0.0, "lr": 1.4759329831383837e-08, "epoch": 4.753621772164268, "percentage": 95.07, "elapsed_time": "4:12:33", "remaining_time": "0:13:05", "throughput": 8652.63, "total_tokens": 131120424} +{"current_steps": 194585, "total_steps": 204665, "loss": 0.0, "lr": 1.4744736035420702e-08, "epoch": 4.753743922996115, "percentage": 95.07, "elapsed_time": "4:12:34", "remaining_time": "0:13:05", "throughput": 8652.65, "total_tokens": 131123752} +{"current_steps": 194590, "total_steps": 204665, "loss": 0.0, "lr": 1.4730149404498905e-08, "epoch": 4.753866073827963, "percentage": 95.08, "elapsed_time": "4:12:34", "remaining_time": "0:13:04", "throughput": 8652.66, "total_tokens": 131126888} +{"current_steps": 194595, "total_steps": 204665, "loss": 0.0, "lr": 1.4715569938724359e-08, "epoch": 4.75398822465981, "percentage": 95.08, "elapsed_time": "4:12:34", "remaining_time": "0:13:04", "throughput": 8652.69, "total_tokens": 131130344} +{"current_steps": 194600, "total_steps": 204665, "loss": 0.0, "lr": 1.4700997638203316e-08, "epoch": 4.754110375491657, "percentage": 95.08, "elapsed_time": "4:12:35", "remaining_time": "0:13:03", "throughput": 8652.72, "total_tokens": 131133800} +{"current_steps": 194605, "total_steps": 204665, "loss": 0.0, "lr": 1.468643250304158e-08, "epoch": 4.754232526323504, "percentage": 95.08, "elapsed_time": "4:12:35", "remaining_time": "0:13:03", "throughput": 8652.73, "total_tokens": 131136872} +{"current_steps": 194610, "total_steps": 204665, "loss": 0.0, "lr": 1.4671874533345064e-08, "epoch": 4.7543546771553515, "percentage": 95.09, "elapsed_time": "4:12:35", "remaining_time": "0:13:03", "throughput": 8652.75, "total_tokens": 131140200} +{"current_steps": 194615, "total_steps": 204665, "loss": 0.0, "lr": 1.4657323729219906e-08, "epoch": 4.754476827987198, "percentage": 95.09, "elapsed_time": "4:12:36", "remaining_time": "0:13:02", "throughput": 8652.79, "total_tokens": 131143912} +{"current_steps": 194620, "total_steps": 204665, "loss": 0.0, "lr": 1.4642780090771467e-08, "epoch": 4.754598978819046, "percentage": 95.09, "elapsed_time": "4:12:36", "remaining_time": "0:13:02", "throughput": 8652.84, "total_tokens": 131147944} +{"current_steps": 194625, "total_steps": 204665, "loss": 0.0, "lr": 1.4628243618105996e-08, "epoch": 4.754721129650893, "percentage": 95.09, "elapsed_time": "4:12:36", "remaining_time": "0:13:01", "throughput": 8652.87, "total_tokens": 131151272} +{"current_steps": 194630, "total_steps": 204665, "loss": 0.0, "lr": 1.4613714311328739e-08, "epoch": 4.75484328048274, "percentage": 95.1, "elapsed_time": "4:12:37", "remaining_time": "0:13:01", "throughput": 8652.91, "total_tokens": 131155112} +{"current_steps": 194635, "total_steps": 204665, "loss": 0.0, "lr": 1.4599192170545838e-08, "epoch": 4.754965431314587, "percentage": 95.1, "elapsed_time": "4:12:37", "remaining_time": "0:13:01", "throughput": 8652.94, "total_tokens": 131158504} +{"current_steps": 194640, "total_steps": 204665, "loss": 0.0, "lr": 1.4584677195862538e-08, "epoch": 4.755087582146435, "percentage": 95.1, "elapsed_time": "4:12:38", "remaining_time": "0:13:00", "throughput": 8652.96, "total_tokens": 131161832} +{"current_steps": 194645, "total_steps": 204665, "loss": 0.0, "lr": 1.4570169387384424e-08, "epoch": 4.755209732978281, "percentage": 95.1, "elapsed_time": "4:12:38", "remaining_time": "0:13:00", "throughput": 8653.01, "total_tokens": 131165864} +{"current_steps": 194650, "total_steps": 204665, "loss": 0.0, "lr": 1.4555668745217186e-08, "epoch": 4.755331883810129, "percentage": 95.11, "elapsed_time": "4:12:38", "remaining_time": "0:12:59", "throughput": 8653.05, "total_tokens": 131169448} +{"current_steps": 194655, "total_steps": 204665, "loss": 0.0, "lr": 1.4541175269466078e-08, "epoch": 4.755454034641976, "percentage": 95.11, "elapsed_time": "4:12:39", "remaining_time": "0:12:59", "throughput": 8653.08, "total_tokens": 131173096} +{"current_steps": 194660, "total_steps": 204665, "loss": 0.0, "lr": 1.4526688960236788e-08, "epoch": 4.755576185473823, "percentage": 95.11, "elapsed_time": "4:12:39", "remaining_time": "0:12:59", "throughput": 8653.09, "total_tokens": 131176232} +{"current_steps": 194665, "total_steps": 204665, "loss": 0.0, "lr": 1.4512209817634235e-08, "epoch": 4.75569833630567, "percentage": 95.11, "elapsed_time": "4:12:39", "remaining_time": "0:12:58", "throughput": 8653.11, "total_tokens": 131179560} +{"current_steps": 194670, "total_steps": 204665, "loss": 0.0, "lr": 1.4497737841764114e-08, "epoch": 4.755820487137518, "percentage": 95.12, "elapsed_time": "4:12:40", "remaining_time": "0:12:58", "throughput": 8653.12, "total_tokens": 131182696} +{"current_steps": 194675, "total_steps": 204665, "loss": 0.0, "lr": 1.4483273032731447e-08, "epoch": 4.755942637969365, "percentage": 95.12, "elapsed_time": "4:12:40", "remaining_time": "0:12:57", "throughput": 8653.15, "total_tokens": 131186024} +{"current_steps": 194680, "total_steps": 204665, "loss": 0.0, "lr": 1.4468815390641486e-08, "epoch": 4.756064788801211, "percentage": 95.12, "elapsed_time": "4:12:40", "remaining_time": "0:12:57", "throughput": 8653.17, "total_tokens": 131189416} +{"current_steps": 194685, "total_steps": 204665, "loss": 0.0, "lr": 1.4454364915599482e-08, "epoch": 4.756186939633059, "percentage": 95.12, "elapsed_time": "4:12:41", "remaining_time": "0:12:57", "throughput": 8653.21, "total_tokens": 131193064} +{"current_steps": 194690, "total_steps": 204665, "loss": 0.0, "lr": 1.4439921607710348e-08, "epoch": 4.756309090464906, "percentage": 95.13, "elapsed_time": "4:12:41", "remaining_time": "0:12:56", "throughput": 8653.23, "total_tokens": 131196392} +{"current_steps": 194695, "total_steps": 204665, "loss": 0.0, "lr": 1.4425485467079113e-08, "epoch": 4.756431241296753, "percentage": 95.13, "elapsed_time": "4:12:41", "remaining_time": "0:12:56", "throughput": 8653.24, "total_tokens": 131199528} +{"current_steps": 194700, "total_steps": 204665, "loss": 0.0, "lr": 1.4411056493810913e-08, "epoch": 4.7565533921286, "percentage": 95.13, "elapsed_time": "4:12:42", "remaining_time": "0:12:56", "throughput": 8653.27, "total_tokens": 131203176} +{"current_steps": 194705, "total_steps": 204665, "loss": 0.0, "lr": 1.4396634688010556e-08, "epoch": 4.756675542960448, "percentage": 95.13, "elapsed_time": "4:12:42", "remaining_time": "0:12:55", "throughput": 8653.28, "total_tokens": 131206248} +{"current_steps": 194710, "total_steps": 204665, "loss": 0.0, "lr": 1.4382220049783068e-08, "epoch": 4.7567976937922944, "percentage": 95.14, "elapsed_time": "4:12:42", "remaining_time": "0:12:55", "throughput": 8653.33, "total_tokens": 131210088} +{"current_steps": 194715, "total_steps": 204665, "loss": 0.0, "lr": 1.436781257923303e-08, "epoch": 4.756919844624142, "percentage": 95.14, "elapsed_time": "4:12:43", "remaining_time": "0:12:54", "throughput": 8653.34, "total_tokens": 131213224} +{"current_steps": 194720, "total_steps": 204665, "loss": 0.0, "lr": 1.4353412276465471e-08, "epoch": 4.757041995455989, "percentage": 95.14, "elapsed_time": "4:12:43", "remaining_time": "0:12:54", "throughput": 8653.35, "total_tokens": 131216488} +{"current_steps": 194725, "total_steps": 204665, "loss": 0.0, "lr": 1.4339019141584973e-08, "epoch": 4.7571641462878365, "percentage": 95.14, "elapsed_time": "4:12:44", "remaining_time": "0:12:54", "throughput": 8653.38, "total_tokens": 131219944} +{"current_steps": 194730, "total_steps": 204665, "loss": 0.0, "lr": 1.4324633174696343e-08, "epoch": 4.757286297119683, "percentage": 95.15, "elapsed_time": "4:12:44", "remaining_time": "0:12:53", "throughput": 8653.5, "total_tokens": 131225256} +{"current_steps": 194735, "total_steps": 204665, "loss": 0.0, "lr": 1.4310254375903941e-08, "epoch": 4.757408447951531, "percentage": 95.15, "elapsed_time": "4:12:44", "remaining_time": "0:12:53", "throughput": 8653.53, "total_tokens": 131228712} +{"current_steps": 194740, "total_steps": 204665, "loss": 0.0, "lr": 1.429588274531257e-08, "epoch": 4.757530598783378, "percentage": 95.15, "elapsed_time": "4:12:45", "remaining_time": "0:12:52", "throughput": 8653.54, "total_tokens": 131231912} +{"current_steps": 194745, "total_steps": 204665, "loss": 0.0004, "lr": 1.4281518283026595e-08, "epoch": 4.757652749615225, "percentage": 95.15, "elapsed_time": "4:12:45", "remaining_time": "0:12:52", "throughput": 8653.55, "total_tokens": 131234984} +{"current_steps": 194750, "total_steps": 204665, "loss": 0.0, "lr": 1.4267160989150595e-08, "epoch": 4.757774900447072, "percentage": 95.16, "elapsed_time": "4:12:45", "remaining_time": "0:12:52", "throughput": 8653.55, "total_tokens": 131237992} +{"current_steps": 194755, "total_steps": 204665, "loss": 0.0, "lr": 1.4252810863788932e-08, "epoch": 4.75789705127892, "percentage": 95.16, "elapsed_time": "4:12:46", "remaining_time": "0:12:51", "throughput": 8653.53, "total_tokens": 131240744} +{"current_steps": 194760, "total_steps": 204665, "loss": 0.0, "lr": 1.4238467907045971e-08, "epoch": 4.758019202110766, "percentage": 95.16, "elapsed_time": "4:12:46", "remaining_time": "0:12:51", "throughput": 8653.54, "total_tokens": 131243752} +{"current_steps": 194765, "total_steps": 204665, "loss": 0.0, "lr": 1.4224132119025956e-08, "epoch": 4.758141352942614, "percentage": 95.16, "elapsed_time": "4:12:46", "remaining_time": "0:12:50", "throughput": 8653.55, "total_tokens": 131246952} +{"current_steps": 194770, "total_steps": 204665, "loss": 0.0, "lr": 1.420980349983325e-08, "epoch": 4.758263503774461, "percentage": 95.17, "elapsed_time": "4:12:47", "remaining_time": "0:12:50", "throughput": 8653.55, "total_tokens": 131250024} +{"current_steps": 194775, "total_steps": 204665, "loss": 0.0, "lr": 1.4195482049571993e-08, "epoch": 4.7583856546063075, "percentage": 95.17, "elapsed_time": "4:12:47", "remaining_time": "0:12:50", "throughput": 8653.55, "total_tokens": 131253032} +{"current_steps": 194780, "total_steps": 204665, "loss": 0.0, "lr": 1.4181167768346324e-08, "epoch": 4.758507805438155, "percentage": 95.17, "elapsed_time": "4:12:47", "remaining_time": "0:12:49", "throughput": 8653.57, "total_tokens": 131256360} +{"current_steps": 194785, "total_steps": 204665, "loss": 0.0, "lr": 1.4166860656260271e-08, "epoch": 4.758629956270002, "percentage": 95.17, "elapsed_time": "4:12:48", "remaining_time": "0:12:49", "throughput": 8653.6, "total_tokens": 131259880} +{"current_steps": 194790, "total_steps": 204665, "loss": 0.0, "lr": 1.4152560713418082e-08, "epoch": 4.7587521071018495, "percentage": 95.18, "elapsed_time": "4:12:48", "remaining_time": "0:12:48", "throughput": 8653.63, "total_tokens": 131263464} +{"current_steps": 194795, "total_steps": 204665, "loss": 0.0, "lr": 1.4138267939923565e-08, "epoch": 4.758874257933696, "percentage": 95.18, "elapsed_time": "4:12:48", "remaining_time": "0:12:48", "throughput": 8653.65, "total_tokens": 131266984} +{"current_steps": 194800, "total_steps": 204665, "loss": 0.0, "lr": 1.4123982335880746e-08, "epoch": 4.758996408765544, "percentage": 95.18, "elapsed_time": "4:12:49", "remaining_time": "0:12:48", "throughput": 8653.66, "total_tokens": 131270184} +{"current_steps": 194805, "total_steps": 204665, "loss": 0.0, "lr": 1.4109703901393543e-08, "epoch": 4.759118559597391, "percentage": 95.18, "elapsed_time": "4:12:49", "remaining_time": "0:12:47", "throughput": 8653.74, "total_tokens": 131274536} +{"current_steps": 194810, "total_steps": 204665, "loss": 0.0, "lr": 1.4095432636565763e-08, "epoch": 4.759240710429238, "percentage": 95.18, "elapsed_time": "4:12:50", "remaining_time": "0:12:47", "throughput": 8653.76, "total_tokens": 131278056} +{"current_steps": 194815, "total_steps": 204665, "loss": 0.0, "lr": 1.4081168541501099e-08, "epoch": 4.759362861261085, "percentage": 95.19, "elapsed_time": "4:12:50", "remaining_time": "0:12:47", "throughput": 8653.79, "total_tokens": 131281512} +{"current_steps": 194820, "total_steps": 204665, "loss": 0.0, "lr": 1.4066911616303357e-08, "epoch": 4.759485012092933, "percentage": 95.19, "elapsed_time": "4:12:50", "remaining_time": "0:12:46", "throughput": 8653.82, "total_tokens": 131285096} +{"current_steps": 194825, "total_steps": 204665, "loss": 0.0, "lr": 1.4052661861076232e-08, "epoch": 4.759607162924779, "percentage": 95.19, "elapsed_time": "4:12:51", "remaining_time": "0:12:46", "throughput": 8653.85, "total_tokens": 131288680} +{"current_steps": 194830, "total_steps": 204665, "loss": 0.0, "lr": 1.4038419275923419e-08, "epoch": 4.759729313756627, "percentage": 95.19, "elapsed_time": "4:12:51", "remaining_time": "0:12:45", "throughput": 8653.84, "total_tokens": 131291496} +{"current_steps": 194835, "total_steps": 204665, "loss": 0.0, "lr": 1.402418386094828e-08, "epoch": 4.759851464588474, "percentage": 95.2, "elapsed_time": "4:12:51", "remaining_time": "0:12:45", "throughput": 8653.84, "total_tokens": 131294440} +{"current_steps": 194840, "total_steps": 204665, "loss": 0.0, "lr": 1.400995561625451e-08, "epoch": 4.759973615420321, "percentage": 95.2, "elapsed_time": "4:12:52", "remaining_time": "0:12:45", "throughput": 8653.87, "total_tokens": 131297960} +{"current_steps": 194845, "total_steps": 204665, "loss": 0.0, "lr": 1.3995734541945692e-08, "epoch": 4.760095766252168, "percentage": 95.2, "elapsed_time": "4:12:52", "remaining_time": "0:12:44", "throughput": 8653.9, "total_tokens": 131301480} +{"current_steps": 194850, "total_steps": 204665, "loss": 0.0, "lr": 1.3981520638124855e-08, "epoch": 4.760217917084015, "percentage": 95.2, "elapsed_time": "4:12:52", "remaining_time": "0:12:44", "throughput": 8653.92, "total_tokens": 131304872} +{"current_steps": 194855, "total_steps": 204665, "loss": 0.0, "lr": 1.3967313904895805e-08, "epoch": 4.7603400679158625, "percentage": 95.21, "elapsed_time": "4:12:53", "remaining_time": "0:12:43", "throughput": 8653.95, "total_tokens": 131308392} +{"current_steps": 194860, "total_steps": 204665, "loss": 0.001, "lr": 1.3953114342361571e-08, "epoch": 4.76046221874771, "percentage": 95.21, "elapsed_time": "4:12:53", "remaining_time": "0:12:43", "throughput": 8653.97, "total_tokens": 131311912} +{"current_steps": 194865, "total_steps": 204665, "loss": 0.0, "lr": 1.3938921950625515e-08, "epoch": 4.760584369579557, "percentage": 95.21, "elapsed_time": "4:12:53", "remaining_time": "0:12:43", "throughput": 8654.0, "total_tokens": 131315368} +{"current_steps": 194870, "total_steps": 204665, "loss": 0.0, "lr": 1.3924736729790775e-08, "epoch": 4.760706520411404, "percentage": 95.21, "elapsed_time": "4:12:54", "remaining_time": "0:12:42", "throughput": 8654.07, "total_tokens": 131319592} +{"current_steps": 194875, "total_steps": 204665, "loss": 0.0, "lr": 1.3910558679960715e-08, "epoch": 4.760828671243251, "percentage": 95.22, "elapsed_time": "4:12:54", "remaining_time": "0:12:42", "throughput": 8654.12, "total_tokens": 131323624} +{"current_steps": 194880, "total_steps": 204665, "loss": 0.0087, "lr": 1.3896387801238141e-08, "epoch": 4.760950822075098, "percentage": 95.22, "elapsed_time": "4:12:55", "remaining_time": "0:12:41", "throughput": 8654.17, "total_tokens": 131327400} +{"current_steps": 194885, "total_steps": 204665, "loss": 0.0, "lr": 1.3882224093726302e-08, "epoch": 4.761072972906946, "percentage": 95.22, "elapsed_time": "4:12:55", "remaining_time": "0:12:41", "throughput": 8654.18, "total_tokens": 131330600} +{"current_steps": 194890, "total_steps": 204665, "loss": 0.0, "lr": 1.3868067557528228e-08, "epoch": 4.761195123738792, "percentage": 95.22, "elapsed_time": "4:12:55", "remaining_time": "0:12:41", "throughput": 8654.19, "total_tokens": 131333800} +{"current_steps": 194895, "total_steps": 204665, "loss": 0.0, "lr": 1.3853918192746839e-08, "epoch": 4.76131727457064, "percentage": 95.23, "elapsed_time": "4:12:56", "remaining_time": "0:12:40", "throughput": 8654.2, "total_tokens": 131336936} +{"current_steps": 194900, "total_steps": 204665, "loss": 0.0, "lr": 1.3839775999484938e-08, "epoch": 4.761439425402487, "percentage": 95.23, "elapsed_time": "4:12:56", "remaining_time": "0:12:40", "throughput": 8654.23, "total_tokens": 131340456} +{"current_steps": 194905, "total_steps": 204665, "loss": 0.0, "lr": 1.3825640977845333e-08, "epoch": 4.761561576234334, "percentage": 95.23, "elapsed_time": "4:12:56", "remaining_time": "0:12:39", "throughput": 8654.27, "total_tokens": 131344168} +{"current_steps": 194910, "total_steps": 204665, "loss": 0.0, "lr": 1.3811513127931052e-08, "epoch": 4.761683727066181, "percentage": 95.23, "elapsed_time": "4:12:57", "remaining_time": "0:12:39", "throughput": 8654.3, "total_tokens": 131347752} +{"current_steps": 194915, "total_steps": 204665, "loss": 0.0002, "lr": 1.379739244984468e-08, "epoch": 4.761805877898029, "percentage": 95.24, "elapsed_time": "4:12:57", "remaining_time": "0:12:39", "throughput": 8654.3, "total_tokens": 131350696} +{"current_steps": 194920, "total_steps": 204665, "loss": 0.0, "lr": 1.3783278943688914e-08, "epoch": 4.7619280287298755, "percentage": 95.24, "elapsed_time": "4:12:57", "remaining_time": "0:12:38", "throughput": 8654.33, "total_tokens": 131354216} +{"current_steps": 194925, "total_steps": 204665, "loss": 0.0, "lr": 1.3769172609566337e-08, "epoch": 4.762050179561723, "percentage": 95.24, "elapsed_time": "4:12:58", "remaining_time": "0:12:38", "throughput": 8654.34, "total_tokens": 131357352} +{"current_steps": 194930, "total_steps": 204665, "loss": 0.0, "lr": 1.3755073447579646e-08, "epoch": 4.76217233039357, "percentage": 95.24, "elapsed_time": "4:12:58", "remaining_time": "0:12:38", "throughput": 8654.34, "total_tokens": 131360360} +{"current_steps": 194935, "total_steps": 204665, "loss": 0.0, "lr": 1.3740981457831424e-08, "epoch": 4.7622944812254175, "percentage": 95.25, "elapsed_time": "4:12:58", "remaining_time": "0:12:37", "throughput": 8654.36, "total_tokens": 131363688} +{"current_steps": 194940, "total_steps": 204665, "loss": 0.0, "lr": 1.3726896640423924e-08, "epoch": 4.762416632057264, "percentage": 95.25, "elapsed_time": "4:12:59", "remaining_time": "0:12:37", "throughput": 8654.38, "total_tokens": 131367016} +{"current_steps": 194945, "total_steps": 204665, "loss": 0.0, "lr": 1.3712818995459841e-08, "epoch": 4.762538782889111, "percentage": 95.25, "elapsed_time": "4:12:59", "remaining_time": "0:12:36", "throughput": 8654.42, "total_tokens": 131370600} +{"current_steps": 194950, "total_steps": 204665, "loss": 0.0, "lr": 1.3698748523041314e-08, "epoch": 4.762660933720959, "percentage": 95.25, "elapsed_time": "4:12:59", "remaining_time": "0:12:36", "throughput": 8654.43, "total_tokens": 131373864} +{"current_steps": 194955, "total_steps": 204665, "loss": 0.0, "lr": 1.368468522327082e-08, "epoch": 4.762783084552806, "percentage": 95.26, "elapsed_time": "4:13:00", "remaining_time": "0:12:36", "throughput": 8654.46, "total_tokens": 131377384} +{"current_steps": 194960, "total_steps": 204665, "loss": 0.0, "lr": 1.3670629096250496e-08, "epoch": 4.762905235384653, "percentage": 95.26, "elapsed_time": "4:13:00", "remaining_time": "0:12:35", "throughput": 8654.47, "total_tokens": 131380520} +{"current_steps": 194965, "total_steps": 204665, "loss": 0.0, "lr": 1.3656580142082707e-08, "epoch": 4.7630273862165, "percentage": 95.26, "elapsed_time": "4:13:01", "remaining_time": "0:12:35", "throughput": 8654.49, "total_tokens": 131383912} +{"current_steps": 194970, "total_steps": 204665, "loss": 0.0, "lr": 1.3642538360869593e-08, "epoch": 4.763149537048347, "percentage": 95.26, "elapsed_time": "4:13:01", "remaining_time": "0:12:34", "throughput": 8654.51, "total_tokens": 131387240} +{"current_steps": 194975, "total_steps": 204665, "loss": 0.0, "lr": 1.3628503752713183e-08, "epoch": 4.763271687880194, "percentage": 95.27, "elapsed_time": "4:13:01", "remaining_time": "0:12:34", "throughput": 8654.52, "total_tokens": 131390312} +{"current_steps": 194980, "total_steps": 204665, "loss": 0.0, "lr": 1.3614476317715618e-08, "epoch": 4.763393838712042, "percentage": 95.27, "elapsed_time": "4:13:02", "remaining_time": "0:12:34", "throughput": 8654.54, "total_tokens": 131393704} +{"current_steps": 194985, "total_steps": 204665, "loss": 0.0, "lr": 1.3600456055978926e-08, "epoch": 4.7635159895438886, "percentage": 95.27, "elapsed_time": "4:13:02", "remaining_time": "0:12:33", "throughput": 8654.56, "total_tokens": 131397160} +{"current_steps": 194990, "total_steps": 204665, "loss": 0.0, "lr": 1.3586442967604916e-08, "epoch": 4.763638140375736, "percentage": 95.27, "elapsed_time": "4:13:02", "remaining_time": "0:12:33", "throughput": 8654.6, "total_tokens": 131400872} +{"current_steps": 194995, "total_steps": 204665, "loss": 0.0, "lr": 1.3572437052695729e-08, "epoch": 4.763760291207583, "percentage": 95.28, "elapsed_time": "4:13:03", "remaining_time": "0:12:32", "throughput": 8654.6, "total_tokens": 131403944} +{"current_steps": 195000, "total_steps": 204665, "loss": 0.0, "lr": 1.355843831135306e-08, "epoch": 4.763882442039431, "percentage": 95.28, "elapsed_time": "4:13:03", "remaining_time": "0:12:32", "throughput": 8654.61, "total_tokens": 131407144} +{"current_steps": 195005, "total_steps": 204665, "loss": 0.0, "lr": 1.3544446743678717e-08, "epoch": 4.764004592871277, "percentage": 95.28, "elapsed_time": "4:13:03", "remaining_time": "0:12:32", "throughput": 8654.64, "total_tokens": 131410728} +{"current_steps": 195010, "total_steps": 204665, "loss": 0.0, "lr": 1.3530462349774508e-08, "epoch": 4.764126743703125, "percentage": 95.28, "elapsed_time": "4:13:04", "remaining_time": "0:12:31", "throughput": 8654.68, "total_tokens": 131414440} +{"current_steps": 195015, "total_steps": 204665, "loss": 0.0, "lr": 1.3516485129742018e-08, "epoch": 4.764248894534972, "percentage": 95.28, "elapsed_time": "4:13:04", "remaining_time": "0:12:31", "throughput": 8654.71, "total_tokens": 131417896} +{"current_steps": 195020, "total_steps": 204665, "loss": 0.0, "lr": 1.3502515083683164e-08, "epoch": 4.764371045366819, "percentage": 95.29, "elapsed_time": "4:13:04", "remaining_time": "0:12:30", "throughput": 8654.71, "total_tokens": 131420968} +{"current_steps": 195025, "total_steps": 204665, "loss": 0.0, "lr": 1.34885522116992e-08, "epoch": 4.764493196198666, "percentage": 95.29, "elapsed_time": "4:13:05", "remaining_time": "0:12:30", "throughput": 8654.72, "total_tokens": 131424104} +{"current_steps": 195030, "total_steps": 204665, "loss": 0.0, "lr": 1.3474596513891935e-08, "epoch": 4.764615347030514, "percentage": 95.29, "elapsed_time": "4:13:05", "remaining_time": "0:12:30", "throughput": 8654.73, "total_tokens": 131427304} +{"current_steps": 195035, "total_steps": 204665, "loss": 0.0, "lr": 1.3460647990362617e-08, "epoch": 4.7647374978623604, "percentage": 95.29, "elapsed_time": "4:13:05", "remaining_time": "0:12:29", "throughput": 8654.76, "total_tokens": 131430760} +{"current_steps": 195040, "total_steps": 204665, "loss": 0.0, "lr": 1.3446706641212946e-08, "epoch": 4.764859648694207, "percentage": 95.3, "elapsed_time": "4:13:06", "remaining_time": "0:12:29", "throughput": 8654.76, "total_tokens": 131433768} +{"current_steps": 195045, "total_steps": 204665, "loss": 0.0, "lr": 1.3432772466544062e-08, "epoch": 4.764981799526055, "percentage": 95.3, "elapsed_time": "4:13:06", "remaining_time": "0:12:29", "throughput": 8654.78, "total_tokens": 131437096} +{"current_steps": 195050, "total_steps": 204665, "loss": 0.0, "lr": 1.341884546645744e-08, "epoch": 4.765103950357902, "percentage": 95.3, "elapsed_time": "4:13:06", "remaining_time": "0:12:28", "throughput": 8654.79, "total_tokens": 131440296} +{"current_steps": 195055, "total_steps": 204665, "loss": 0.0, "lr": 1.3404925641054331e-08, "epoch": 4.765226101189749, "percentage": 95.3, "elapsed_time": "4:13:07", "remaining_time": "0:12:28", "throughput": 8654.82, "total_tokens": 131443752} +{"current_steps": 195060, "total_steps": 204665, "loss": 0.0039, "lr": 1.3391012990436101e-08, "epoch": 4.765348252021596, "percentage": 95.31, "elapsed_time": "4:13:07", "remaining_time": "0:12:27", "throughput": 8654.83, "total_tokens": 131446952} +{"current_steps": 195065, "total_steps": 204665, "loss": 0.0, "lr": 1.3377107514703667e-08, "epoch": 4.765470402853444, "percentage": 95.31, "elapsed_time": "4:13:08", "remaining_time": "0:12:27", "throughput": 8654.87, "total_tokens": 131450536} +{"current_steps": 195070, "total_steps": 204665, "loss": 0.0, "lr": 1.3363209213958282e-08, "epoch": 4.76559255368529, "percentage": 95.31, "elapsed_time": "4:13:08", "remaining_time": "0:12:27", "throughput": 8654.91, "total_tokens": 131454184} +{"current_steps": 195075, "total_steps": 204665, "loss": 0.0, "lr": 1.3349318088300976e-08, "epoch": 4.765714704517138, "percentage": 95.31, "elapsed_time": "4:13:08", "remaining_time": "0:12:26", "throughput": 8654.92, "total_tokens": 131457320} +{"current_steps": 195080, "total_steps": 204665, "loss": 0.0, "lr": 1.333543413783289e-08, "epoch": 4.765836855348985, "percentage": 95.32, "elapsed_time": "4:13:09", "remaining_time": "0:12:26", "throughput": 8654.93, "total_tokens": 131460520} +{"current_steps": 195085, "total_steps": 204665, "loss": 0.0, "lr": 1.3321557362654833e-08, "epoch": 4.765959006180832, "percentage": 95.32, "elapsed_time": "4:13:09", "remaining_time": "0:12:25", "throughput": 8654.96, "total_tokens": 131463912} +{"current_steps": 195090, "total_steps": 204665, "loss": 0.0, "lr": 1.3307687762867836e-08, "epoch": 4.766081157012679, "percentage": 95.32, "elapsed_time": "4:13:09", "remaining_time": "0:12:25", "throughput": 8654.98, "total_tokens": 131467304} +{"current_steps": 195095, "total_steps": 204665, "loss": 0.0, "lr": 1.3293825338572706e-08, "epoch": 4.766203307844527, "percentage": 95.32, "elapsed_time": "4:13:10", "remaining_time": "0:12:25", "throughput": 8655.02, "total_tokens": 131470888} +{"current_steps": 195100, "total_steps": 204665, "loss": 0.0, "lr": 1.3279970089870251e-08, "epoch": 4.7663254586763735, "percentage": 95.33, "elapsed_time": "4:13:10", "remaining_time": "0:12:24", "throughput": 8655.05, "total_tokens": 131474472} +{"current_steps": 195105, "total_steps": 204665, "loss": 0.0, "lr": 1.3266122016861392e-08, "epoch": 4.766447609508221, "percentage": 95.33, "elapsed_time": "4:13:10", "remaining_time": "0:12:24", "throughput": 8655.07, "total_tokens": 131477736} +{"current_steps": 195110, "total_steps": 204665, "loss": 0.0, "lr": 1.3252281119646491e-08, "epoch": 4.766569760340068, "percentage": 95.33, "elapsed_time": "4:13:11", "remaining_time": "0:12:23", "throughput": 8655.1, "total_tokens": 131481256} +{"current_steps": 195115, "total_steps": 204665, "loss": 0.0, "lr": 1.323844739832658e-08, "epoch": 4.766691911171915, "percentage": 95.33, "elapsed_time": "4:13:11", "remaining_time": "0:12:23", "throughput": 8655.09, "total_tokens": 131484072} +{"current_steps": 195120, "total_steps": 204665, "loss": 0.0, "lr": 1.3224620853001911e-08, "epoch": 4.766814062003762, "percentage": 95.34, "elapsed_time": "4:13:11", "remaining_time": "0:12:23", "throughput": 8655.08, "total_tokens": 131486952} +{"current_steps": 195125, "total_steps": 204665, "loss": 0.0, "lr": 1.3210801483773404e-08, "epoch": 4.76693621283561, "percentage": 95.34, "elapsed_time": "4:13:12", "remaining_time": "0:12:22", "throughput": 8655.11, "total_tokens": 131490344} +{"current_steps": 195130, "total_steps": 204665, "loss": 0.0, "lr": 1.3196989290741201e-08, "epoch": 4.767058363667457, "percentage": 95.34, "elapsed_time": "4:13:12", "remaining_time": "0:12:22", "throughput": 8655.14, "total_tokens": 131493864} +{"current_steps": 195135, "total_steps": 204665, "loss": 0.0, "lr": 1.3183184274005888e-08, "epoch": 4.767180514499303, "percentage": 95.34, "elapsed_time": "4:13:12", "remaining_time": "0:12:21", "throughput": 8655.18, "total_tokens": 131497576} +{"current_steps": 195140, "total_steps": 204665, "loss": 0.0, "lr": 1.316938643366805e-08, "epoch": 4.767302665331151, "percentage": 95.35, "elapsed_time": "4:13:13", "remaining_time": "0:12:21", "throughput": 8655.2, "total_tokens": 131500840} +{"current_steps": 195145, "total_steps": 204665, "loss": 0.0, "lr": 1.3155595769827721e-08, "epoch": 4.767424816162998, "percentage": 95.35, "elapsed_time": "4:13:13", "remaining_time": "0:12:21", "throughput": 8655.22, "total_tokens": 131504168} +{"current_steps": 195150, "total_steps": 204665, "loss": 0.0, "lr": 1.3141812282585374e-08, "epoch": 4.767546966994845, "percentage": 95.35, "elapsed_time": "4:13:13", "remaining_time": "0:12:20", "throughput": 8655.24, "total_tokens": 131507432} +{"current_steps": 195155, "total_steps": 204665, "loss": 0.0, "lr": 1.3128035972041262e-08, "epoch": 4.767669117826692, "percentage": 95.35, "elapsed_time": "4:13:14", "remaining_time": "0:12:20", "throughput": 8655.26, "total_tokens": 131510888} +{"current_steps": 195160, "total_steps": 204665, "loss": 0.0, "lr": 1.3114266838295418e-08, "epoch": 4.76779126865854, "percentage": 95.36, "elapsed_time": "4:13:14", "remaining_time": "0:12:20", "throughput": 8655.3, "total_tokens": 131514408} +{"current_steps": 195165, "total_steps": 204665, "loss": 0.0, "lr": 1.3100504881448093e-08, "epoch": 4.7679134194903865, "percentage": 95.36, "elapsed_time": "4:13:15", "remaining_time": "0:12:19", "throughput": 8655.32, "total_tokens": 131517800} +{"current_steps": 195170, "total_steps": 204665, "loss": 0.0, "lr": 1.308675010159932e-08, "epoch": 4.768035570322234, "percentage": 95.36, "elapsed_time": "4:13:15", "remaining_time": "0:12:19", "throughput": 8655.35, "total_tokens": 131521384} +{"current_steps": 195175, "total_steps": 204665, "loss": 0.0, "lr": 1.307300249884924e-08, "epoch": 4.768157721154081, "percentage": 95.36, "elapsed_time": "4:13:15", "remaining_time": "0:12:18", "throughput": 8655.36, "total_tokens": 131524392} +{"current_steps": 195180, "total_steps": 204665, "loss": 0.0, "lr": 1.305926207329766e-08, "epoch": 4.7682798719859285, "percentage": 95.37, "elapsed_time": "4:13:16", "remaining_time": "0:12:18", "throughput": 8655.4, "total_tokens": 131528168} +{"current_steps": 195185, "total_steps": 204665, "loss": 0.0, "lr": 1.3045528825044615e-08, "epoch": 4.768402022817775, "percentage": 95.37, "elapsed_time": "4:13:16", "remaining_time": "0:12:18", "throughput": 8655.42, "total_tokens": 131531368} +{"current_steps": 195190, "total_steps": 204665, "loss": 0.0, "lr": 1.3031802754189913e-08, "epoch": 4.768524173649623, "percentage": 95.37, "elapsed_time": "4:13:16", "remaining_time": "0:12:17", "throughput": 8655.42, "total_tokens": 131534376} +{"current_steps": 195195, "total_steps": 204665, "loss": 0.0004, "lr": 1.3018083860833362e-08, "epoch": 4.76864632448147, "percentage": 95.37, "elapsed_time": "4:13:17", "remaining_time": "0:12:17", "throughput": 8655.43, "total_tokens": 131537448} +{"current_steps": 195200, "total_steps": 204665, "loss": 0.0, "lr": 1.3004372145074883e-08, "epoch": 4.768768475313317, "percentage": 95.38, "elapsed_time": "4:13:17", "remaining_time": "0:12:16", "throughput": 8655.44, "total_tokens": 131540584} +{"current_steps": 195205, "total_steps": 204665, "loss": 0.0, "lr": 1.299066760701395e-08, "epoch": 4.768890626145164, "percentage": 95.38, "elapsed_time": "4:13:17", "remaining_time": "0:12:16", "throughput": 8655.49, "total_tokens": 131544360} +{"current_steps": 195210, "total_steps": 204665, "loss": 0.0, "lr": 1.2976970246750484e-08, "epoch": 4.769012776977011, "percentage": 95.38, "elapsed_time": "4:13:18", "remaining_time": "0:12:16", "throughput": 8655.51, "total_tokens": 131547816} +{"current_steps": 195215, "total_steps": 204665, "loss": 0.0, "lr": 1.2963280064383853e-08, "epoch": 4.769134927808858, "percentage": 95.38, "elapsed_time": "4:13:18", "remaining_time": "0:12:15", "throughput": 8655.52, "total_tokens": 131550824} +{"current_steps": 195220, "total_steps": 204665, "loss": 0.0, "lr": 1.2949597060013862e-08, "epoch": 4.769257078640706, "percentage": 95.39, "elapsed_time": "4:13:18", "remaining_time": "0:12:15", "throughput": 8655.55, "total_tokens": 131554280} +{"current_steps": 195225, "total_steps": 204665, "loss": 0.0, "lr": 1.2935921233739766e-08, "epoch": 4.769379229472553, "percentage": 95.39, "elapsed_time": "4:13:19", "remaining_time": "0:12:14", "throughput": 8655.56, "total_tokens": 131557480} +{"current_steps": 195230, "total_steps": 204665, "loss": 0.0, "lr": 1.2922252585661153e-08, "epoch": 4.7695013803043995, "percentage": 95.39, "elapsed_time": "4:13:19", "remaining_time": "0:12:14", "throughput": 8655.59, "total_tokens": 131560872} +{"current_steps": 195235, "total_steps": 204665, "loss": 0.0, "lr": 1.2908591115877499e-08, "epoch": 4.769623531136247, "percentage": 95.39, "elapsed_time": "4:13:19", "remaining_time": "0:12:14", "throughput": 8655.59, "total_tokens": 131563944} +{"current_steps": 195240, "total_steps": 204665, "loss": 0.0, "lr": 1.2894936824487945e-08, "epoch": 4.769745681968094, "percentage": 95.39, "elapsed_time": "4:13:20", "remaining_time": "0:12:13", "throughput": 8655.6, "total_tokens": 131567080} +{"current_steps": 195245, "total_steps": 204665, "loss": 0.0, "lr": 1.288128971159197e-08, "epoch": 4.7698678327999415, "percentage": 95.4, "elapsed_time": "4:13:20", "remaining_time": "0:12:13", "throughput": 8655.62, "total_tokens": 131570408} +{"current_steps": 195250, "total_steps": 204665, "loss": 0.0, "lr": 1.2867649777288826e-08, "epoch": 4.769989983631788, "percentage": 95.4, "elapsed_time": "4:13:20", "remaining_time": "0:12:12", "throughput": 8655.64, "total_tokens": 131573608} +{"current_steps": 195255, "total_steps": 204665, "loss": 0.0, "lr": 1.2854017021677543e-08, "epoch": 4.770112134463636, "percentage": 95.4, "elapsed_time": "4:13:21", "remaining_time": "0:12:12", "throughput": 8655.76, "total_tokens": 131578984} +{"current_steps": 195260, "total_steps": 204665, "loss": 0.0, "lr": 1.284039144485749e-08, "epoch": 4.770234285295483, "percentage": 95.4, "elapsed_time": "4:13:21", "remaining_time": "0:12:12", "throughput": 8655.77, "total_tokens": 131582120} +{"current_steps": 195265, "total_steps": 204665, "loss": 0.0, "lr": 1.2826773046927475e-08, "epoch": 4.77035643612733, "percentage": 95.41, "elapsed_time": "4:13:22", "remaining_time": "0:12:11", "throughput": 8655.8, "total_tokens": 131585448} +{"current_steps": 195270, "total_steps": 204665, "loss": 0.0, "lr": 1.2813161827986752e-08, "epoch": 4.770478586959177, "percentage": 95.41, "elapsed_time": "4:13:22", "remaining_time": "0:12:11", "throughput": 8655.84, "total_tokens": 131589160} +{"current_steps": 195275, "total_steps": 204665, "loss": 0.0, "lr": 1.2799557788134241e-08, "epoch": 4.770600737791025, "percentage": 95.41, "elapsed_time": "4:13:22", "remaining_time": "0:12:11", "throughput": 8655.84, "total_tokens": 131592168} +{"current_steps": 195280, "total_steps": 204665, "loss": 0.0, "lr": 1.2785960927468863e-08, "epoch": 4.770722888622871, "percentage": 95.41, "elapsed_time": "4:13:23", "remaining_time": "0:12:10", "throughput": 8655.85, "total_tokens": 131595176} +{"current_steps": 195285, "total_steps": 204665, "loss": 0.0, "lr": 1.2772371246089431e-08, "epoch": 4.770845039454719, "percentage": 95.42, "elapsed_time": "4:13:23", "remaining_time": "0:12:10", "throughput": 8655.86, "total_tokens": 131598376} +{"current_steps": 195290, "total_steps": 204665, "loss": 0.0, "lr": 1.2758788744094862e-08, "epoch": 4.770967190286566, "percentage": 95.42, "elapsed_time": "4:13:23", "remaining_time": "0:12:09", "throughput": 8655.95, "total_tokens": 131602984} +{"current_steps": 195295, "total_steps": 204665, "loss": 0.0, "lr": 1.2745213421583967e-08, "epoch": 4.771089341118413, "percentage": 95.42, "elapsed_time": "4:13:24", "remaining_time": "0:12:09", "throughput": 8655.97, "total_tokens": 131606248} +{"current_steps": 195300, "total_steps": 204665, "loss": 0.0, "lr": 1.2731645278655445e-08, "epoch": 4.77121149195026, "percentage": 95.42, "elapsed_time": "4:13:24", "remaining_time": "0:12:09", "throughput": 8655.99, "total_tokens": 131609512} +{"current_steps": 195305, "total_steps": 204665, "loss": 0.0, "lr": 1.2718084315407995e-08, "epoch": 4.771333642782107, "percentage": 95.43, "elapsed_time": "4:13:24", "remaining_time": "0:12:08", "throughput": 8656.02, "total_tokens": 131612968} +{"current_steps": 195310, "total_steps": 204665, "loss": 0.0, "lr": 1.2704530531939982e-08, "epoch": 4.7714557936139546, "percentage": 95.43, "elapsed_time": "4:13:25", "remaining_time": "0:12:08", "throughput": 8656.06, "total_tokens": 131616808} +{"current_steps": 195315, "total_steps": 204665, "loss": 0.0, "lr": 1.269098392835033e-08, "epoch": 4.771577944445801, "percentage": 95.43, "elapsed_time": "4:13:25", "remaining_time": "0:12:07", "throughput": 8656.08, "total_tokens": 131620008} +{"current_steps": 195320, "total_steps": 204665, "loss": 0.0, "lr": 1.26774445047374e-08, "epoch": 4.771700095277649, "percentage": 95.43, "elapsed_time": "4:13:25", "remaining_time": "0:12:07", "throughput": 8656.12, "total_tokens": 131623720} +{"current_steps": 195325, "total_steps": 204665, "loss": 0.0, "lr": 1.266391226119956e-08, "epoch": 4.771822246109496, "percentage": 95.44, "elapsed_time": "4:13:26", "remaining_time": "0:12:07", "throughput": 8656.15, "total_tokens": 131627176} +{"current_steps": 195330, "total_steps": 204665, "loss": 0.0, "lr": 1.2650387197835399e-08, "epoch": 4.771944396941343, "percentage": 95.44, "elapsed_time": "4:13:26", "remaining_time": "0:12:06", "throughput": 8656.19, "total_tokens": 131630888} +{"current_steps": 195335, "total_steps": 204665, "loss": 0.0, "lr": 1.2636869314743059e-08, "epoch": 4.77206654777319, "percentage": 95.44, "elapsed_time": "4:13:26", "remaining_time": "0:12:06", "throughput": 8656.23, "total_tokens": 131634536} +{"current_steps": 195340, "total_steps": 204665, "loss": 0.0001, "lr": 1.2623358612021128e-08, "epoch": 4.772188698605038, "percentage": 95.44, "elapsed_time": "4:13:27", "remaining_time": "0:12:05", "throughput": 8656.24, "total_tokens": 131637736} +{"current_steps": 195345, "total_steps": 204665, "loss": 0.0, "lr": 1.260985508976764e-08, "epoch": 4.772310849436884, "percentage": 95.45, "elapsed_time": "4:13:27", "remaining_time": "0:12:05", "throughput": 8656.26, "total_tokens": 131641000} +{"current_steps": 195350, "total_steps": 204665, "loss": 0.0, "lr": 1.2596358748080848e-08, "epoch": 4.772433000268732, "percentage": 95.45, "elapsed_time": "4:13:27", "remaining_time": "0:12:05", "throughput": 8656.3, "total_tokens": 131644712} +{"current_steps": 195355, "total_steps": 204665, "loss": 0.0, "lr": 1.2582869587059008e-08, "epoch": 4.772555151100579, "percentage": 95.45, "elapsed_time": "4:13:28", "remaining_time": "0:12:04", "throughput": 8656.33, "total_tokens": 131648104} +{"current_steps": 195360, "total_steps": 204665, "loss": 0.0, "lr": 1.2569387606800041e-08, "epoch": 4.7726773019324265, "percentage": 95.45, "elapsed_time": "4:13:28", "remaining_time": "0:12:04", "throughput": 8656.34, "total_tokens": 131651368} +{"current_steps": 195365, "total_steps": 204665, "loss": 0.0, "lr": 1.2555912807402092e-08, "epoch": 4.772799452764273, "percentage": 95.46, "elapsed_time": "4:13:29", "remaining_time": "0:12:03", "throughput": 8656.36, "total_tokens": 131654632} +{"current_steps": 195370, "total_steps": 204665, "loss": 0.0, "lr": 1.254244518896308e-08, "epoch": 4.772921603596121, "percentage": 95.46, "elapsed_time": "4:13:29", "remaining_time": "0:12:03", "throughput": 8656.37, "total_tokens": 131657768} +{"current_steps": 195375, "total_steps": 204665, "loss": 0.0, "lr": 1.2528984751581151e-08, "epoch": 4.773043754427968, "percentage": 95.46, "elapsed_time": "4:13:29", "remaining_time": "0:12:03", "throughput": 8656.4, "total_tokens": 131661160} +{"current_steps": 195380, "total_steps": 204665, "loss": 0.0, "lr": 1.2515531495353893e-08, "epoch": 4.773165905259814, "percentage": 95.46, "elapsed_time": "4:13:30", "remaining_time": "0:12:02", "throughput": 8656.44, "total_tokens": 131664808} +{"current_steps": 195385, "total_steps": 204665, "loss": 0.0, "lr": 1.2502085420379337e-08, "epoch": 4.773288056091662, "percentage": 95.47, "elapsed_time": "4:13:30", "remaining_time": "0:12:02", "throughput": 8656.43, "total_tokens": 131667688} +{"current_steps": 195390, "total_steps": 204665, "loss": 0.0, "lr": 1.2488646526755187e-08, "epoch": 4.77341020692351, "percentage": 95.47, "elapsed_time": "4:13:30", "remaining_time": "0:12:02", "throughput": 8656.46, "total_tokens": 131671080} +{"current_steps": 195395, "total_steps": 204665, "loss": 0.0, "lr": 1.2475214814579248e-08, "epoch": 4.773532357755356, "percentage": 95.47, "elapsed_time": "4:13:31", "remaining_time": "0:12:01", "throughput": 8656.46, "total_tokens": 131674152} +{"current_steps": 195400, "total_steps": 204665, "loss": 0.0, "lr": 1.2461790283949115e-08, "epoch": 4.773654508587203, "percentage": 95.47, "elapsed_time": "4:13:31", "remaining_time": "0:12:01", "throughput": 8656.48, "total_tokens": 131677416} +{"current_steps": 195405, "total_steps": 204665, "loss": 0.0, "lr": 1.244837293496248e-08, "epoch": 4.773776659419051, "percentage": 95.48, "elapsed_time": "4:13:31", "remaining_time": "0:12:00", "throughput": 8656.5, "total_tokens": 131680616} +{"current_steps": 195410, "total_steps": 204665, "loss": 0.0, "lr": 1.2434962767716939e-08, "epoch": 4.7738988102508975, "percentage": 95.48, "elapsed_time": "4:13:32", "remaining_time": "0:12:00", "throughput": 8656.52, "total_tokens": 131683944} +{"current_steps": 195415, "total_steps": 204665, "loss": 0.0, "lr": 1.2421559782309966e-08, "epoch": 4.774020961082745, "percentage": 95.48, "elapsed_time": "4:13:32", "remaining_time": "0:12:00", "throughput": 8656.52, "total_tokens": 131686952} +{"current_steps": 195420, "total_steps": 204665, "loss": 0.0, "lr": 1.2408163978839036e-08, "epoch": 4.774143111914592, "percentage": 95.48, "elapsed_time": "4:13:32", "remaining_time": "0:11:59", "throughput": 8656.52, "total_tokens": 131689896} +{"current_steps": 195425, "total_steps": 204665, "loss": 0.0, "lr": 1.2394775357401521e-08, "epoch": 4.7742652627464395, "percentage": 95.49, "elapsed_time": "4:13:33", "remaining_time": "0:11:59", "throughput": 8656.57, "total_tokens": 131693608} +{"current_steps": 195430, "total_steps": 204665, "loss": 0.0348, "lr": 1.2381393918094784e-08, "epoch": 4.774387413578286, "percentage": 95.49, "elapsed_time": "4:13:33", "remaining_time": "0:11:58", "throughput": 8656.6, "total_tokens": 131697064} +{"current_steps": 195435, "total_steps": 204665, "loss": 0.0, "lr": 1.2368019661016304e-08, "epoch": 4.774509564410134, "percentage": 95.49, "elapsed_time": "4:13:33", "remaining_time": "0:11:58", "throughput": 8656.6, "total_tokens": 131700072} +{"current_steps": 195440, "total_steps": 204665, "loss": 0.0, "lr": 1.2354652586263226e-08, "epoch": 4.774631715241981, "percentage": 95.49, "elapsed_time": "4:13:34", "remaining_time": "0:11:58", "throughput": 8656.61, "total_tokens": 131703144} +{"current_steps": 195445, "total_steps": 204665, "loss": 0.0, "lr": 1.2341292693932692e-08, "epoch": 4.774753866073828, "percentage": 95.5, "elapsed_time": "4:13:34", "remaining_time": "0:11:57", "throughput": 8656.64, "total_tokens": 131706664} +{"current_steps": 195450, "total_steps": 204665, "loss": 0.0, "lr": 1.232793998412196e-08, "epoch": 4.774876016905675, "percentage": 95.5, "elapsed_time": "4:13:34", "remaining_time": "0:11:57", "throughput": 8656.66, "total_tokens": 131709992} +{"current_steps": 195455, "total_steps": 204665, "loss": 0.0, "lr": 1.2314594456928063e-08, "epoch": 4.774998167737523, "percentage": 95.5, "elapsed_time": "4:13:35", "remaining_time": "0:11:56", "throughput": 8656.69, "total_tokens": 131713576} +{"current_steps": 195460, "total_steps": 204665, "loss": 0.0, "lr": 1.2301256112448144e-08, "epoch": 4.775120318569369, "percentage": 95.5, "elapsed_time": "4:13:35", "remaining_time": "0:11:56", "throughput": 8656.7, "total_tokens": 131716584} +{"current_steps": 195465, "total_steps": 204665, "loss": 0.0, "lr": 1.2287924950779238e-08, "epoch": 4.775242469401217, "percentage": 95.5, "elapsed_time": "4:13:35", "remaining_time": "0:11:56", "throughput": 8656.72, "total_tokens": 131719912} +{"current_steps": 195470, "total_steps": 204665, "loss": 0.0, "lr": 1.2274600972018046e-08, "epoch": 4.775364620233064, "percentage": 95.51, "elapsed_time": "4:13:36", "remaining_time": "0:11:55", "throughput": 8656.76, "total_tokens": 131723688} +{"current_steps": 195475, "total_steps": 204665, "loss": 0.0, "lr": 1.226128417626171e-08, "epoch": 4.7754867710649105, "percentage": 95.51, "elapsed_time": "4:13:36", "remaining_time": "0:11:55", "throughput": 8656.78, "total_tokens": 131726888} +{"current_steps": 195480, "total_steps": 204665, "loss": 0.0, "lr": 1.2247974563606823e-08, "epoch": 4.775608921896758, "percentage": 95.51, "elapsed_time": "4:13:36", "remaining_time": "0:11:54", "throughput": 8656.79, "total_tokens": 131730024} +{"current_steps": 195485, "total_steps": 204665, "loss": 0.0, "lr": 1.2234672134150525e-08, "epoch": 4.775731072728606, "percentage": 95.51, "elapsed_time": "4:13:37", "remaining_time": "0:11:54", "throughput": 8656.79, "total_tokens": 131733032} +{"current_steps": 195490, "total_steps": 204665, "loss": 0.0, "lr": 1.2221376887989298e-08, "epoch": 4.7758532235604525, "percentage": 95.52, "elapsed_time": "4:13:37", "remaining_time": "0:11:54", "throughput": 8656.84, "total_tokens": 131736808} +{"current_steps": 195495, "total_steps": 204665, "loss": 0.0, "lr": 1.2208088825219954e-08, "epoch": 4.775975374392299, "percentage": 95.52, "elapsed_time": "4:13:38", "remaining_time": "0:11:53", "throughput": 8656.87, "total_tokens": 131740328} +{"current_steps": 195500, "total_steps": 204665, "loss": 0.0, "lr": 1.2194807945938967e-08, "epoch": 4.776097525224147, "percentage": 95.52, "elapsed_time": "4:13:38", "remaining_time": "0:11:53", "throughput": 8656.88, "total_tokens": 131743592} +{"current_steps": 195505, "total_steps": 204665, "loss": 0.0, "lr": 1.2181534250243041e-08, "epoch": 4.776219676055994, "percentage": 95.52, "elapsed_time": "4:13:38", "remaining_time": "0:11:53", "throughput": 8656.91, "total_tokens": 131746984} +{"current_steps": 195510, "total_steps": 204665, "loss": 0.0, "lr": 1.2168267738228765e-08, "epoch": 4.776341826887841, "percentage": 95.53, "elapsed_time": "4:13:39", "remaining_time": "0:11:52", "throughput": 8656.92, "total_tokens": 131750248} +{"current_steps": 195515, "total_steps": 204665, "loss": 0.0, "lr": 1.2155008409992507e-08, "epoch": 4.776463977719688, "percentage": 95.53, "elapsed_time": "4:13:39", "remaining_time": "0:11:52", "throughput": 8656.96, "total_tokens": 131753768} +{"current_steps": 195520, "total_steps": 204665, "loss": 0.0, "lr": 1.2141756265630742e-08, "epoch": 4.776586128551536, "percentage": 95.53, "elapsed_time": "4:13:39", "remaining_time": "0:11:51", "throughput": 8656.97, "total_tokens": 131756904} +{"current_steps": 195525, "total_steps": 204665, "loss": 0.0, "lr": 1.212851130523973e-08, "epoch": 4.776708279383382, "percentage": 95.53, "elapsed_time": "4:13:40", "remaining_time": "0:11:51", "throughput": 8656.99, "total_tokens": 131760296} +{"current_steps": 195530, "total_steps": 204665, "loss": 0.0, "lr": 1.211527352891606e-08, "epoch": 4.77683043021523, "percentage": 95.54, "elapsed_time": "4:13:40", "remaining_time": "0:11:51", "throughput": 8657.02, "total_tokens": 131763688} +{"current_steps": 195535, "total_steps": 204665, "loss": 0.0, "lr": 1.2102042936755652e-08, "epoch": 4.776952581047077, "percentage": 95.54, "elapsed_time": "4:13:40", "remaining_time": "0:11:50", "throughput": 8657.06, "total_tokens": 131767400} +{"current_steps": 195540, "total_steps": 204665, "loss": 0.0, "lr": 1.2088819528854988e-08, "epoch": 4.777074731878924, "percentage": 95.54, "elapsed_time": "4:13:41", "remaining_time": "0:11:50", "throughput": 8657.09, "total_tokens": 131770920} +{"current_steps": 195545, "total_steps": 204665, "loss": 0.0, "lr": 1.2075603305310211e-08, "epoch": 4.777196882710771, "percentage": 95.54, "elapsed_time": "4:13:41", "remaining_time": "0:11:49", "throughput": 8657.1, "total_tokens": 131774056} +{"current_steps": 195550, "total_steps": 204665, "loss": 0.0001, "lr": 1.2062394266217246e-08, "epoch": 4.777319033542619, "percentage": 95.55, "elapsed_time": "4:13:41", "remaining_time": "0:11:49", "throughput": 8657.13, "total_tokens": 131777640} +{"current_steps": 195555, "total_steps": 204665, "loss": 0.0, "lr": 1.2049192411672348e-08, "epoch": 4.7774411843744655, "percentage": 95.55, "elapsed_time": "4:13:42", "remaining_time": "0:11:49", "throughput": 8657.16, "total_tokens": 131781032} +{"current_steps": 195560, "total_steps": 204665, "loss": 0.0, "lr": 1.2035997741771442e-08, "epoch": 4.777563335206313, "percentage": 95.55, "elapsed_time": "4:13:42", "remaining_time": "0:11:48", "throughput": 8657.17, "total_tokens": 131784168} +{"current_steps": 195565, "total_steps": 204665, "loss": 0.0, "lr": 1.202281025661045e-08, "epoch": 4.77768548603816, "percentage": 95.55, "elapsed_time": "4:13:42", "remaining_time": "0:11:48", "throughput": 8657.19, "total_tokens": 131787496} +{"current_steps": 195570, "total_steps": 204665, "loss": 0.0, "lr": 1.2009629956285405e-08, "epoch": 4.777807636870007, "percentage": 95.56, "elapsed_time": "4:13:43", "remaining_time": "0:11:47", "throughput": 8657.21, "total_tokens": 131790888} +{"current_steps": 195575, "total_steps": 204665, "loss": 0.0, "lr": 1.1996456840892011e-08, "epoch": 4.777929787701854, "percentage": 95.56, "elapsed_time": "4:13:43", "remaining_time": "0:11:47", "throughput": 8657.21, "total_tokens": 131793768} +{"current_steps": 195580, "total_steps": 204665, "loss": 0.0, "lr": 1.1983290910526079e-08, "epoch": 4.778051938533701, "percentage": 95.56, "elapsed_time": "4:13:43", "remaining_time": "0:11:47", "throughput": 8657.25, "total_tokens": 131797416} +{"current_steps": 195585, "total_steps": 204665, "loss": 0.0, "lr": 1.197013216528342e-08, "epoch": 4.778174089365549, "percentage": 95.56, "elapsed_time": "4:13:44", "remaining_time": "0:11:46", "throughput": 8657.27, "total_tokens": 131800744} +{"current_steps": 195590, "total_steps": 204665, "loss": 0.0, "lr": 1.1956980605259737e-08, "epoch": 4.778296240197395, "percentage": 95.57, "elapsed_time": "4:13:44", "remaining_time": "0:11:46", "throughput": 8657.28, "total_tokens": 131803944} +{"current_steps": 195595, "total_steps": 204665, "loss": 0.0, "lr": 1.1943836230550619e-08, "epoch": 4.778418391029243, "percentage": 95.57, "elapsed_time": "4:13:44", "remaining_time": "0:11:46", "throughput": 8657.3, "total_tokens": 131807208} +{"current_steps": 195600, "total_steps": 204665, "loss": 0.0, "lr": 1.1930699041251657e-08, "epoch": 4.77854054186109, "percentage": 95.57, "elapsed_time": "4:13:45", "remaining_time": "0:11:45", "throughput": 8657.34, "total_tokens": 131810856} +{"current_steps": 195605, "total_steps": 204665, "loss": 0.0007, "lr": 1.1917569037458553e-08, "epoch": 4.778662692692937, "percentage": 95.57, "elapsed_time": "4:13:45", "remaining_time": "0:11:45", "throughput": 8657.34, "total_tokens": 131813800} +{"current_steps": 195610, "total_steps": 204665, "loss": 0.0, "lr": 1.1904446219266451e-08, "epoch": 4.778784843524784, "percentage": 95.58, "elapsed_time": "4:13:46", "remaining_time": "0:11:44", "throughput": 8657.37, "total_tokens": 131817384} +{"current_steps": 195615, "total_steps": 204665, "loss": 0.0, "lr": 1.1891330586771165e-08, "epoch": 4.778906994356632, "percentage": 95.58, "elapsed_time": "4:13:46", "remaining_time": "0:11:44", "throughput": 8657.39, "total_tokens": 131820648} +{"current_steps": 195620, "total_steps": 204665, "loss": 0.0, "lr": 1.1878222140067729e-08, "epoch": 4.7790291451884785, "percentage": 95.58, "elapsed_time": "4:13:46", "remaining_time": "0:11:44", "throughput": 8657.42, "total_tokens": 131824168} +{"current_steps": 195625, "total_steps": 204665, "loss": 0.0, "lr": 1.1865120879251734e-08, "epoch": 4.779151296020326, "percentage": 95.58, "elapsed_time": "4:13:47", "remaining_time": "0:11:43", "throughput": 8657.43, "total_tokens": 131827240} +{"current_steps": 195630, "total_steps": 204665, "loss": 0.0, "lr": 1.1852026804418325e-08, "epoch": 4.779273446852173, "percentage": 95.59, "elapsed_time": "4:13:47", "remaining_time": "0:11:43", "throughput": 8657.43, "total_tokens": 131830184} +{"current_steps": 195635, "total_steps": 204665, "loss": 0.0, "lr": 1.1838939915662761e-08, "epoch": 4.779395597684021, "percentage": 95.59, "elapsed_time": "4:13:47", "remaining_time": "0:11:42", "throughput": 8657.44, "total_tokens": 131833384} +{"current_steps": 195640, "total_steps": 204665, "loss": 0.0, "lr": 1.1825860213080185e-08, "epoch": 4.779517748515867, "percentage": 95.59, "elapsed_time": "4:13:48", "remaining_time": "0:11:42", "throughput": 8657.48, "total_tokens": 131836968} +{"current_steps": 195645, "total_steps": 204665, "loss": 0.0, "lr": 1.1812787696765747e-08, "epoch": 4.779639899347715, "percentage": 95.59, "elapsed_time": "4:13:48", "remaining_time": "0:11:42", "throughput": 8657.53, "total_tokens": 131840872} +{"current_steps": 195650, "total_steps": 204665, "loss": 0.0, "lr": 1.1799722366814591e-08, "epoch": 4.779762050179562, "percentage": 95.6, "elapsed_time": "4:13:48", "remaining_time": "0:11:41", "throughput": 8657.54, "total_tokens": 131844008} +{"current_steps": 195655, "total_steps": 204665, "loss": 0.0004, "lr": 1.1786664223321529e-08, "epoch": 4.779884201011409, "percentage": 95.6, "elapsed_time": "4:13:49", "remaining_time": "0:11:41", "throughput": 8657.56, "total_tokens": 131847336} +{"current_steps": 195660, "total_steps": 204665, "loss": 0.0, "lr": 1.177361326638171e-08, "epoch": 4.780006351843256, "percentage": 95.6, "elapsed_time": "4:13:49", "remaining_time": "0:11:40", "throughput": 8657.57, "total_tokens": 131850344} +{"current_steps": 195665, "total_steps": 204665, "loss": 0.0, "lr": 1.1760569496089946e-08, "epoch": 4.780128502675103, "percentage": 95.6, "elapsed_time": "4:13:49", "remaining_time": "0:11:40", "throughput": 8657.57, "total_tokens": 131853224} +{"current_steps": 195670, "total_steps": 204665, "loss": 0.0, "lr": 1.1747532912541159e-08, "epoch": 4.78025065350695, "percentage": 95.61, "elapsed_time": "4:13:50", "remaining_time": "0:11:40", "throughput": 8657.57, "total_tokens": 131856168} +{"current_steps": 195675, "total_steps": 204665, "loss": 0.0, "lr": 1.1734503515830053e-08, "epoch": 4.780372804338797, "percentage": 95.61, "elapsed_time": "4:13:50", "remaining_time": "0:11:39", "throughput": 8657.58, "total_tokens": 131859368} +{"current_steps": 195680, "total_steps": 204665, "loss": 0.0, "lr": 1.172148130605155e-08, "epoch": 4.780494955170645, "percentage": 95.61, "elapsed_time": "4:13:50", "remaining_time": "0:11:39", "throughput": 8657.6, "total_tokens": 131862760} +{"current_steps": 195685, "total_steps": 204665, "loss": 0.0, "lr": 1.1708466283300245e-08, "epoch": 4.780617106002492, "percentage": 95.61, "elapsed_time": "4:13:51", "remaining_time": "0:11:38", "throughput": 8657.63, "total_tokens": 131866280} +{"current_steps": 195690, "total_steps": 204665, "loss": 0.0, "lr": 1.1695458447670725e-08, "epoch": 4.780739256834339, "percentage": 95.61, "elapsed_time": "4:13:51", "remaining_time": "0:11:38", "throughput": 8657.67, "total_tokens": 131869992} +{"current_steps": 195695, "total_steps": 204665, "loss": 0.0336, "lr": 1.1682457799257584e-08, "epoch": 4.780861407666186, "percentage": 95.62, "elapsed_time": "4:13:51", "remaining_time": "0:11:38", "throughput": 8657.72, "total_tokens": 131873768} +{"current_steps": 195700, "total_steps": 204665, "loss": 0.0, "lr": 1.1669464338155632e-08, "epoch": 4.780983558498034, "percentage": 95.62, "elapsed_time": "4:13:52", "remaining_time": "0:11:37", "throughput": 8657.77, "total_tokens": 131877672} +{"current_steps": 195705, "total_steps": 204665, "loss": 0.0, "lr": 1.1656478064459019e-08, "epoch": 4.78110570932988, "percentage": 95.62, "elapsed_time": "4:13:52", "remaining_time": "0:11:37", "throughput": 8657.77, "total_tokens": 131880616} +{"current_steps": 195710, "total_steps": 204665, "loss": 0.0, "lr": 1.1643498978262334e-08, "epoch": 4.781227860161728, "percentage": 95.62, "elapsed_time": "4:13:52", "remaining_time": "0:11:37", "throughput": 8657.8, "total_tokens": 131884072} +{"current_steps": 195715, "total_steps": 204665, "loss": 0.0, "lr": 1.1630527079660057e-08, "epoch": 4.781350010993575, "percentage": 95.63, "elapsed_time": "4:13:53", "remaining_time": "0:11:36", "throughput": 8657.82, "total_tokens": 131887464} +{"current_steps": 195720, "total_steps": 204665, "loss": 0.0, "lr": 1.1617562368746226e-08, "epoch": 4.781472161825422, "percentage": 95.63, "elapsed_time": "4:13:53", "remaining_time": "0:11:36", "throughput": 8657.86, "total_tokens": 131891048} +{"current_steps": 195725, "total_steps": 204665, "loss": 0.0, "lr": 1.160460484561554e-08, "epoch": 4.781594312657269, "percentage": 95.63, "elapsed_time": "4:13:54", "remaining_time": "0:11:35", "throughput": 8657.87, "total_tokens": 131894312} +{"current_steps": 195730, "total_steps": 204665, "loss": 0.0, "lr": 1.1591654510361926e-08, "epoch": 4.781716463489117, "percentage": 95.63, "elapsed_time": "4:13:54", "remaining_time": "0:11:35", "throughput": 8657.91, "total_tokens": 131897960} +{"current_steps": 195735, "total_steps": 204665, "loss": 0.0001, "lr": 1.157871136307964e-08, "epoch": 4.7818386143209635, "percentage": 95.64, "elapsed_time": "4:13:54", "remaining_time": "0:11:35", "throughput": 8657.95, "total_tokens": 131901544} +{"current_steps": 195740, "total_steps": 204665, "loss": 0.0, "lr": 1.1565775403862831e-08, "epoch": 4.78196076515281, "percentage": 95.64, "elapsed_time": "4:13:55", "remaining_time": "0:11:34", "throughput": 8657.96, "total_tokens": 131904680} +{"current_steps": 195745, "total_steps": 204665, "loss": 0.0, "lr": 1.1552846632805646e-08, "epoch": 4.782082915984658, "percentage": 95.64, "elapsed_time": "4:13:55", "remaining_time": "0:11:34", "throughput": 8657.98, "total_tokens": 131908008} +{"current_steps": 195750, "total_steps": 204665, "loss": 0.0, "lr": 1.1539925050001897e-08, "epoch": 4.7822050668165055, "percentage": 95.64, "elapsed_time": "4:13:55", "remaining_time": "0:11:33", "throughput": 8658.0, "total_tokens": 131911400} +{"current_steps": 195755, "total_steps": 204665, "loss": 0.0436, "lr": 1.1527010655545621e-08, "epoch": 4.782327217648352, "percentage": 95.65, "elapsed_time": "4:13:56", "remaining_time": "0:11:33", "throughput": 8658.05, "total_tokens": 131915304} +{"current_steps": 195760, "total_steps": 204665, "loss": 0.0, "lr": 1.1514103449530966e-08, "epoch": 4.782449368480199, "percentage": 95.65, "elapsed_time": "4:13:56", "remaining_time": "0:11:33", "throughput": 8658.06, "total_tokens": 131918312} +{"current_steps": 195765, "total_steps": 204665, "loss": 0.0, "lr": 1.150120343205152e-08, "epoch": 4.782571519312047, "percentage": 95.65, "elapsed_time": "4:13:56", "remaining_time": "0:11:32", "throughput": 8658.11, "total_tokens": 131922280} +{"current_steps": 195770, "total_steps": 204665, "loss": 0.0538, "lr": 1.1488310603201323e-08, "epoch": 4.782693670143893, "percentage": 95.65, "elapsed_time": "4:13:57", "remaining_time": "0:11:32", "throughput": 8658.12, "total_tokens": 131925352} +{"current_steps": 195775, "total_steps": 204665, "loss": 0.0, "lr": 1.1475424963073853e-08, "epoch": 4.782815820975741, "percentage": 95.66, "elapsed_time": "4:13:57", "remaining_time": "0:11:31", "throughput": 8658.13, "total_tokens": 131928488} +{"current_steps": 195780, "total_steps": 204665, "loss": 0.0, "lr": 1.1462546511763039e-08, "epoch": 4.782937971807588, "percentage": 95.66, "elapsed_time": "4:13:57", "remaining_time": "0:11:31", "throughput": 8658.14, "total_tokens": 131931560} +{"current_steps": 195785, "total_steps": 204665, "loss": 0.0, "lr": 1.1449675249362467e-08, "epoch": 4.783060122639435, "percentage": 95.66, "elapsed_time": "4:13:58", "remaining_time": "0:11:31", "throughput": 8658.18, "total_tokens": 131935400} +{"current_steps": 195790, "total_steps": 204665, "loss": 0.0, "lr": 1.1436811175965732e-08, "epoch": 4.783182273471282, "percentage": 95.66, "elapsed_time": "4:13:58", "remaining_time": "0:11:30", "throughput": 8658.22, "total_tokens": 131938920} +{"current_steps": 195795, "total_steps": 204665, "loss": 0.0, "lr": 1.1423954291666427e-08, "epoch": 4.78330442430313, "percentage": 95.67, "elapsed_time": "4:13:58", "remaining_time": "0:11:30", "throughput": 8658.23, "total_tokens": 131942120} +{"current_steps": 195800, "total_steps": 204665, "loss": 0.0, "lr": 1.141110459655803e-08, "epoch": 4.7834265751349765, "percentage": 95.67, "elapsed_time": "4:13:59", "remaining_time": "0:11:29", "throughput": 8658.23, "total_tokens": 131945064} +{"current_steps": 195805, "total_steps": 204665, "loss": 0.0, "lr": 1.1398262090733913e-08, "epoch": 4.783548725966824, "percentage": 95.67, "elapsed_time": "4:13:59", "remaining_time": "0:11:29", "throughput": 8658.27, "total_tokens": 131948776} +{"current_steps": 195810, "total_steps": 204665, "loss": 0.0, "lr": 1.1385426774287555e-08, "epoch": 4.783670876798671, "percentage": 95.67, "elapsed_time": "4:13:59", "remaining_time": "0:11:29", "throughput": 8658.29, "total_tokens": 131951976} +{"current_steps": 195815, "total_steps": 204665, "loss": 0.0, "lr": 1.1372598647312325e-08, "epoch": 4.7837930276305185, "percentage": 95.68, "elapsed_time": "4:14:00", "remaining_time": "0:11:28", "throughput": 8658.32, "total_tokens": 131955560} +{"current_steps": 195820, "total_steps": 204665, "loss": 0.0, "lr": 1.1359777709901374e-08, "epoch": 4.783915178462365, "percentage": 95.68, "elapsed_time": "4:14:00", "remaining_time": "0:11:28", "throughput": 8658.34, "total_tokens": 131958888} +{"current_steps": 195825, "total_steps": 204665, "loss": 0.0, "lr": 1.134696396214807e-08, "epoch": 4.784037329294213, "percentage": 95.68, "elapsed_time": "4:14:01", "remaining_time": "0:11:28", "throughput": 8658.36, "total_tokens": 131962088} +{"current_steps": 195830, "total_steps": 204665, "loss": 0.0, "lr": 1.1334157404145672e-08, "epoch": 4.78415948012606, "percentage": 95.68, "elapsed_time": "4:14:01", "remaining_time": "0:11:27", "throughput": 8658.38, "total_tokens": 131965352} +{"current_steps": 195835, "total_steps": 204665, "loss": 0.0, "lr": 1.1321358035987106e-08, "epoch": 4.784281630957906, "percentage": 95.69, "elapsed_time": "4:14:01", "remaining_time": "0:11:27", "throughput": 8658.4, "total_tokens": 131968808} +{"current_steps": 195840, "total_steps": 204665, "loss": 0.0, "lr": 1.1308565857765517e-08, "epoch": 4.784403781789754, "percentage": 95.69, "elapsed_time": "4:14:02", "remaining_time": "0:11:26", "throughput": 8658.44, "total_tokens": 131972520} +{"current_steps": 195845, "total_steps": 204665, "loss": 0.0, "lr": 1.1295780869574056e-08, "epoch": 4.784525932621602, "percentage": 95.69, "elapsed_time": "4:14:02", "remaining_time": "0:11:26", "throughput": 8658.45, "total_tokens": 131975528} +{"current_steps": 195850, "total_steps": 204665, "loss": 0.0, "lr": 1.1283003071505426e-08, "epoch": 4.784648083453448, "percentage": 95.69, "elapsed_time": "4:14:02", "remaining_time": "0:11:26", "throughput": 8658.45, "total_tokens": 131978408} +{"current_steps": 195855, "total_steps": 204665, "loss": 0.0, "lr": 1.1270232463652884e-08, "epoch": 4.784770234285295, "percentage": 95.7, "elapsed_time": "4:14:03", "remaining_time": "0:11:25", "throughput": 8658.47, "total_tokens": 131981736} +{"current_steps": 195860, "total_steps": 204665, "loss": 0.0, "lr": 1.1257469046109135e-08, "epoch": 4.784892385117143, "percentage": 95.7, "elapsed_time": "4:14:03", "remaining_time": "0:11:25", "throughput": 8658.49, "total_tokens": 131985192} +{"current_steps": 195865, "total_steps": 204665, "loss": 0.0, "lr": 1.1244712818966995e-08, "epoch": 4.7850145359489895, "percentage": 95.7, "elapsed_time": "4:14:03", "remaining_time": "0:11:24", "throughput": 8658.5, "total_tokens": 131988264} +{"current_steps": 195870, "total_steps": 204665, "loss": 0.0527, "lr": 1.1231963782319275e-08, "epoch": 4.785136686780837, "percentage": 95.7, "elapsed_time": "4:14:04", "remaining_time": "0:11:24", "throughput": 8658.57, "total_tokens": 131992424} +{"current_steps": 195875, "total_steps": 204665, "loss": 0.0, "lr": 1.1219221936258682e-08, "epoch": 4.785258837612684, "percentage": 95.71, "elapsed_time": "4:14:04", "remaining_time": "0:11:24", "throughput": 8658.58, "total_tokens": 131995560} +{"current_steps": 195880, "total_steps": 204665, "loss": 0.0, "lr": 1.1206487280877807e-08, "epoch": 4.7853809884445315, "percentage": 95.71, "elapsed_time": "4:14:04", "remaining_time": "0:11:23", "throughput": 8658.58, "total_tokens": 131998504} +{"current_steps": 195885, "total_steps": 204665, "loss": 0.0, "lr": 1.1193759816269243e-08, "epoch": 4.785503139276378, "percentage": 95.71, "elapsed_time": "4:14:05", "remaining_time": "0:11:23", "throughput": 8658.59, "total_tokens": 132001704} +{"current_steps": 195890, "total_steps": 204665, "loss": 0.0, "lr": 1.1181039542525806e-08, "epoch": 4.785625290108226, "percentage": 95.71, "elapsed_time": "4:14:05", "remaining_time": "0:11:22", "throughput": 8658.63, "total_tokens": 132005416} +{"current_steps": 195895, "total_steps": 204665, "loss": 0.0, "lr": 1.1168326459739642e-08, "epoch": 4.785747440940073, "percentage": 95.71, "elapsed_time": "4:14:05", "remaining_time": "0:11:22", "throughput": 8658.66, "total_tokens": 132008808} +{"current_steps": 195900, "total_steps": 204665, "loss": 0.0, "lr": 1.1155620568003455e-08, "epoch": 4.78586959177192, "percentage": 95.72, "elapsed_time": "4:14:06", "remaining_time": "0:11:22", "throughput": 8658.66, "total_tokens": 132011880} +{"current_steps": 195905, "total_steps": 204665, "loss": 0.0, "lr": 1.1142921867409505e-08, "epoch": 4.785991742603767, "percentage": 95.72, "elapsed_time": "4:14:06", "remaining_time": "0:11:21", "throughput": 8658.68, "total_tokens": 132015080} +{"current_steps": 195910, "total_steps": 204665, "loss": 0.0, "lr": 1.1130230358050164e-08, "epoch": 4.786113893435615, "percentage": 95.72, "elapsed_time": "4:14:06", "remaining_time": "0:11:21", "throughput": 8658.7, "total_tokens": 132018344} +{"current_steps": 195915, "total_steps": 204665, "loss": 0.0, "lr": 1.11175460400178e-08, "epoch": 4.786236044267461, "percentage": 95.72, "elapsed_time": "4:14:07", "remaining_time": "0:11:20", "throughput": 8658.7, "total_tokens": 132021352} +{"current_steps": 195920, "total_steps": 204665, "loss": 0.0, "lr": 1.1104868913404563e-08, "epoch": 4.786358195099309, "percentage": 95.73, "elapsed_time": "4:14:07", "remaining_time": "0:11:20", "throughput": 8658.74, "total_tokens": 132025064} +{"current_steps": 195925, "total_steps": 204665, "loss": 0.0, "lr": 1.1092198978302824e-08, "epoch": 4.786480345931156, "percentage": 95.73, "elapsed_time": "4:14:07", "remaining_time": "0:11:20", "throughput": 8658.77, "total_tokens": 132028520} +{"current_steps": 195930, "total_steps": 204665, "loss": 0.0, "lr": 1.107953623480451e-08, "epoch": 4.7866024967630025, "percentage": 95.73, "elapsed_time": "4:14:08", "remaining_time": "0:11:19", "throughput": 8658.81, "total_tokens": 132032232} +{"current_steps": 195935, "total_steps": 204665, "loss": 0.0, "lr": 1.1066880683001878e-08, "epoch": 4.78672464759485, "percentage": 95.73, "elapsed_time": "4:14:08", "remaining_time": "0:11:19", "throughput": 8658.83, "total_tokens": 132035496} +{"current_steps": 195940, "total_steps": 204665, "loss": 0.0001, "lr": 1.1054232322986857e-08, "epoch": 4.786846798426697, "percentage": 95.74, "elapsed_time": "4:14:09", "remaining_time": "0:11:19", "throughput": 8658.84, "total_tokens": 132038760} +{"current_steps": 195945, "total_steps": 204665, "loss": 0.0, "lr": 1.1041591154851371e-08, "epoch": 4.7869689492585445, "percentage": 95.74, "elapsed_time": "4:14:09", "remaining_time": "0:11:18", "throughput": 8658.85, "total_tokens": 132041768} +{"current_steps": 195950, "total_steps": 204665, "loss": 0.0, "lr": 1.102895717868757e-08, "epoch": 4.787091100090391, "percentage": 95.74, "elapsed_time": "4:14:09", "remaining_time": "0:11:18", "throughput": 8658.89, "total_tokens": 132045416} +{"current_steps": 195955, "total_steps": 204665, "loss": 0.0, "lr": 1.1016330394587048e-08, "epoch": 4.787213250922239, "percentage": 95.74, "elapsed_time": "4:14:10", "remaining_time": "0:11:17", "throughput": 8658.9, "total_tokens": 132048680} +{"current_steps": 195960, "total_steps": 204665, "loss": 0.0, "lr": 1.1003710802641842e-08, "epoch": 4.787335401754086, "percentage": 95.75, "elapsed_time": "4:14:10", "remaining_time": "0:11:17", "throughput": 8658.93, "total_tokens": 132052136} +{"current_steps": 195965, "total_steps": 204665, "loss": 0.0, "lr": 1.0991098402943655e-08, "epoch": 4.787457552585933, "percentage": 95.75, "elapsed_time": "4:14:10", "remaining_time": "0:11:17", "throughput": 8658.98, "total_tokens": 132056040} +{"current_steps": 195970, "total_steps": 204665, "loss": 0.0, "lr": 1.0978493195584193e-08, "epoch": 4.78757970341778, "percentage": 95.75, "elapsed_time": "4:14:11", "remaining_time": "0:11:16", "throughput": 8659.04, "total_tokens": 132060008} +{"current_steps": 195975, "total_steps": 204665, "loss": 0.0, "lr": 1.096589518065516e-08, "epoch": 4.787701854249628, "percentage": 95.75, "elapsed_time": "4:14:11", "remaining_time": "0:11:16", "throughput": 8659.07, "total_tokens": 132063528} +{"current_steps": 195980, "total_steps": 204665, "loss": 0.0, "lr": 1.095330435824826e-08, "epoch": 4.787824005081474, "percentage": 95.76, "elapsed_time": "4:14:11", "remaining_time": "0:11:15", "throughput": 8659.09, "total_tokens": 132066856} +{"current_steps": 195985, "total_steps": 204665, "loss": 0.0, "lr": 1.0940720728454755e-08, "epoch": 4.787946155913322, "percentage": 95.76, "elapsed_time": "4:14:12", "remaining_time": "0:11:15", "throughput": 8659.1, "total_tokens": 132070120} +{"current_steps": 195990, "total_steps": 204665, "loss": 0.0, "lr": 1.092814429136646e-08, "epoch": 4.788068306745169, "percentage": 95.76, "elapsed_time": "4:14:12", "remaining_time": "0:11:15", "throughput": 8659.15, "total_tokens": 132073896} +{"current_steps": 195995, "total_steps": 204665, "loss": 0.0, "lr": 1.0915575047074854e-08, "epoch": 4.7881904575770164, "percentage": 95.76, "elapsed_time": "4:14:12", "remaining_time": "0:11:14", "throughput": 8659.16, "total_tokens": 132077096} +{"current_steps": 196000, "total_steps": 204665, "loss": 0.0, "lr": 1.090301299567098e-08, "epoch": 4.788312608408863, "percentage": 95.77, "elapsed_time": "4:14:13", "remaining_time": "0:11:14", "throughput": 8659.19, "total_tokens": 132080680} +{"current_steps": 196005, "total_steps": 204665, "loss": 0.0, "lr": 1.0890458137246539e-08, "epoch": 4.78843475924071, "percentage": 95.77, "elapsed_time": "4:14:13", "remaining_time": "0:11:13", "throughput": 8659.21, "total_tokens": 132083816} +{"current_steps": 196010, "total_steps": 204665, "loss": 0.0, "lr": 1.0877910471892793e-08, "epoch": 4.788556910072558, "percentage": 95.77, "elapsed_time": "4:14:13", "remaining_time": "0:11:13", "throughput": 8659.23, "total_tokens": 132087208} +{"current_steps": 196015, "total_steps": 204665, "loss": 0.0, "lr": 1.086536999970078e-08, "epoch": 4.788679060904405, "percentage": 95.77, "elapsed_time": "4:14:14", "remaining_time": "0:11:13", "throughput": 8659.25, "total_tokens": 132090472} +{"current_steps": 196020, "total_steps": 204665, "loss": 0.0, "lr": 1.0852836720761982e-08, "epoch": 4.788801211736252, "percentage": 95.78, "elapsed_time": "4:14:14", "remaining_time": "0:11:12", "throughput": 8659.28, "total_tokens": 132093928} +{"current_steps": 196025, "total_steps": 204665, "loss": 0.0, "lr": 1.0840310635167216e-08, "epoch": 4.788923362568099, "percentage": 95.78, "elapsed_time": "4:14:14", "remaining_time": "0:11:12", "throughput": 8659.3, "total_tokens": 132097320} +{"current_steps": 196030, "total_steps": 204665, "loss": 0.0, "lr": 1.0827791743007852e-08, "epoch": 4.789045513399946, "percentage": 95.78, "elapsed_time": "4:14:15", "remaining_time": "0:11:11", "throughput": 8659.3, "total_tokens": 132100264} +{"current_steps": 196035, "total_steps": 204665, "loss": 0.0, "lr": 1.081528004437493e-08, "epoch": 4.789167664231793, "percentage": 95.78, "elapsed_time": "4:14:15", "remaining_time": "0:11:11", "throughput": 8659.33, "total_tokens": 132103848} +{"current_steps": 196040, "total_steps": 204665, "loss": 0.0, "lr": 1.0802775539359266e-08, "epoch": 4.789289815063641, "percentage": 95.79, "elapsed_time": "4:14:16", "remaining_time": "0:11:11", "throughput": 8659.35, "total_tokens": 132107176} +{"current_steps": 196045, "total_steps": 204665, "loss": 0.0, "lr": 1.0790278228051897e-08, "epoch": 4.7894119658954875, "percentage": 95.79, "elapsed_time": "4:14:16", "remaining_time": "0:11:10", "throughput": 8659.37, "total_tokens": 132110376} +{"current_steps": 196050, "total_steps": 204665, "loss": 0.0, "lr": 1.0777788110543751e-08, "epoch": 4.789534116727335, "percentage": 95.79, "elapsed_time": "4:14:16", "remaining_time": "0:11:10", "throughput": 8659.39, "total_tokens": 132113704} +{"current_steps": 196055, "total_steps": 204665, "loss": 0.0, "lr": 1.0765305186925532e-08, "epoch": 4.789656267559182, "percentage": 95.79, "elapsed_time": "4:14:17", "remaining_time": "0:11:10", "throughput": 8659.42, "total_tokens": 132117288} +{"current_steps": 196060, "total_steps": 204665, "loss": 0.0, "lr": 1.075282945728806e-08, "epoch": 4.7897784183910295, "percentage": 95.8, "elapsed_time": "4:14:17", "remaining_time": "0:11:09", "throughput": 8659.44, "total_tokens": 132120488} +{"current_steps": 196065, "total_steps": 204665, "loss": 0.0, "lr": 1.0740360921722146e-08, "epoch": 4.789900569222876, "percentage": 95.8, "elapsed_time": "4:14:17", "remaining_time": "0:11:09", "throughput": 8659.46, "total_tokens": 132123816} +{"current_steps": 196070, "total_steps": 204665, "loss": 0.0, "lr": 1.0727899580318388e-08, "epoch": 4.790022720054724, "percentage": 95.8, "elapsed_time": "4:14:18", "remaining_time": "0:11:08", "throughput": 8659.47, "total_tokens": 132126952} +{"current_steps": 196075, "total_steps": 204665, "loss": 0.0, "lr": 1.071544543316738e-08, "epoch": 4.790144870886571, "percentage": 95.8, "elapsed_time": "4:14:18", "remaining_time": "0:11:08", "throughput": 8659.5, "total_tokens": 132130536} +{"current_steps": 196080, "total_steps": 204665, "loss": 0.0, "lr": 1.0702998480359827e-08, "epoch": 4.790267021718418, "percentage": 95.81, "elapsed_time": "4:14:18", "remaining_time": "0:11:08", "throughput": 8659.52, "total_tokens": 132133800} +{"current_steps": 196085, "total_steps": 204665, "loss": 0.0, "lr": 1.0690558721986209e-08, "epoch": 4.790389172550265, "percentage": 95.81, "elapsed_time": "4:14:19", "remaining_time": "0:11:07", "throughput": 8659.53, "total_tokens": 132136872} +{"current_steps": 196090, "total_steps": 204665, "loss": 0.0001, "lr": 1.0678126158136791e-08, "epoch": 4.790511323382113, "percentage": 95.81, "elapsed_time": "4:14:19", "remaining_time": "0:11:07", "throughput": 8659.56, "total_tokens": 132140392} +{"current_steps": 196095, "total_steps": 204665, "loss": 0.0, "lr": 1.0665700788902277e-08, "epoch": 4.790633474213959, "percentage": 95.81, "elapsed_time": "4:14:19", "remaining_time": "0:11:06", "throughput": 8659.56, "total_tokens": 132143336} +{"current_steps": 196100, "total_steps": 204665, "loss": 0.0, "lr": 1.0653282614372705e-08, "epoch": 4.790755625045806, "percentage": 95.82, "elapsed_time": "4:14:20", "remaining_time": "0:11:06", "throughput": 8659.58, "total_tokens": 132146664} +{"current_steps": 196105, "total_steps": 204665, "loss": 0.0, "lr": 1.064087163463867e-08, "epoch": 4.790877775877654, "percentage": 95.82, "elapsed_time": "4:14:20", "remaining_time": "0:11:06", "throughput": 8659.6, "total_tokens": 132149928} +{"current_steps": 196110, "total_steps": 204665, "loss": 0.0, "lr": 1.0628467849790323e-08, "epoch": 4.790999926709501, "percentage": 95.82, "elapsed_time": "4:14:20", "remaining_time": "0:11:05", "throughput": 8659.62, "total_tokens": 132153320} +{"current_steps": 196115, "total_steps": 204665, "loss": 0.0, "lr": 1.0616071259917925e-08, "epoch": 4.791122077541348, "percentage": 95.82, "elapsed_time": "4:14:21", "remaining_time": "0:11:05", "throughput": 8659.62, "total_tokens": 132156328} +{"current_steps": 196120, "total_steps": 204665, "loss": 0.0, "lr": 1.0603681865111402e-08, "epoch": 4.791244228373195, "percentage": 95.82, "elapsed_time": "4:14:21", "remaining_time": "0:11:04", "throughput": 8659.63, "total_tokens": 132159272} +{"current_steps": 196125, "total_steps": 204665, "loss": 0.0, "lr": 1.0591299665461128e-08, "epoch": 4.7913663792050425, "percentage": 95.83, "elapsed_time": "4:14:21", "remaining_time": "0:11:04", "throughput": 8659.65, "total_tokens": 132162600} +{"current_steps": 196130, "total_steps": 204665, "loss": 0.0001, "lr": 1.057892466105703e-08, "epoch": 4.791488530036889, "percentage": 95.83, "elapsed_time": "4:14:22", "remaining_time": "0:11:04", "throughput": 8659.67, "total_tokens": 132165864} +{"current_steps": 196135, "total_steps": 204665, "loss": 0.0, "lr": 1.056655685198915e-08, "epoch": 4.791610680868737, "percentage": 95.83, "elapsed_time": "4:14:22", "remaining_time": "0:11:03", "throughput": 8659.7, "total_tokens": 132169512} +{"current_steps": 196140, "total_steps": 204665, "loss": 0.0, "lr": 1.0554196238347302e-08, "epoch": 4.791732831700584, "percentage": 95.83, "elapsed_time": "4:14:22", "remaining_time": "0:11:03", "throughput": 8659.71, "total_tokens": 132172520} +{"current_steps": 196145, "total_steps": 204665, "loss": 0.0, "lr": 1.0541842820221524e-08, "epoch": 4.791854982532431, "percentage": 95.84, "elapsed_time": "4:14:23", "remaining_time": "0:11:02", "throughput": 8659.72, "total_tokens": 132175656} +{"current_steps": 196150, "total_steps": 204665, "loss": 0.0, "lr": 1.0529496597701636e-08, "epoch": 4.791977133364278, "percentage": 95.84, "elapsed_time": "4:14:23", "remaining_time": "0:11:02", "throughput": 8659.79, "total_tokens": 132180008} +{"current_steps": 196155, "total_steps": 204665, "loss": 0.0, "lr": 1.0517157570877344e-08, "epoch": 4.792099284196126, "percentage": 95.84, "elapsed_time": "4:14:23", "remaining_time": "0:11:02", "throughput": 8659.82, "total_tokens": 132183400} +{"current_steps": 196160, "total_steps": 204665, "loss": 0.0, "lr": 1.0504825739838353e-08, "epoch": 4.792221435027972, "percentage": 95.84, "elapsed_time": "4:14:24", "remaining_time": "0:11:01", "throughput": 8659.84, "total_tokens": 132186856} +{"current_steps": 196165, "total_steps": 204665, "loss": 0.0, "lr": 1.049250110467459e-08, "epoch": 4.79234358585982, "percentage": 95.85, "elapsed_time": "4:14:24", "remaining_time": "0:11:01", "throughput": 8659.87, "total_tokens": 132190248} +{"current_steps": 196170, "total_steps": 204665, "loss": 0.0, "lr": 1.0480183665475317e-08, "epoch": 4.792465736691667, "percentage": 95.85, "elapsed_time": "4:14:25", "remaining_time": "0:11:01", "throughput": 8659.89, "total_tokens": 132193640} +{"current_steps": 196175, "total_steps": 204665, "loss": 0.0, "lr": 1.0467873422330464e-08, "epoch": 4.792587887523514, "percentage": 95.85, "elapsed_time": "4:14:25", "remaining_time": "0:11:00", "throughput": 8659.9, "total_tokens": 132196712} +{"current_steps": 196180, "total_steps": 204665, "loss": 0.0, "lr": 1.0455570375329181e-08, "epoch": 4.792710038355361, "percentage": 95.85, "elapsed_time": "4:14:25", "remaining_time": "0:11:00", "throughput": 8659.93, "total_tokens": 132200232} +{"current_steps": 196185, "total_steps": 204665, "loss": 0.0, "lr": 1.0443274524561396e-08, "epoch": 4.792832189187209, "percentage": 95.86, "elapsed_time": "4:14:26", "remaining_time": "0:10:59", "throughput": 8659.95, "total_tokens": 132203432} +{"current_steps": 196190, "total_steps": 204665, "loss": 0.0, "lr": 1.043098587011615e-08, "epoch": 4.7929543400190555, "percentage": 95.86, "elapsed_time": "4:14:26", "remaining_time": "0:10:59", "throughput": 8659.97, "total_tokens": 132206824} +{"current_steps": 196195, "total_steps": 204665, "loss": 0.0001, "lr": 1.0418704412082924e-08, "epoch": 4.793076490850902, "percentage": 95.86, "elapsed_time": "4:14:26", "remaining_time": "0:10:59", "throughput": 8659.99, "total_tokens": 132210088} +{"current_steps": 196200, "total_steps": 204665, "loss": 0.0003, "lr": 1.0406430150551094e-08, "epoch": 4.79319864168275, "percentage": 95.86, "elapsed_time": "4:14:27", "remaining_time": "0:10:58", "throughput": 8660.01, "total_tokens": 132213480} +{"current_steps": 196205, "total_steps": 204665, "loss": 0.0, "lr": 1.0394163085609808e-08, "epoch": 4.793320792514597, "percentage": 95.87, "elapsed_time": "4:14:27", "remaining_time": "0:10:58", "throughput": 8660.03, "total_tokens": 132216616} +{"current_steps": 196210, "total_steps": 204665, "loss": 0.0, "lr": 1.038190321734833e-08, "epoch": 4.793442943346444, "percentage": 95.87, "elapsed_time": "4:14:27", "remaining_time": "0:10:57", "throughput": 8660.05, "total_tokens": 132220008} +{"current_steps": 196215, "total_steps": 204665, "loss": 0.0003, "lr": 1.0369650545855813e-08, "epoch": 4.793565094178291, "percentage": 95.87, "elapsed_time": "4:14:28", "remaining_time": "0:10:57", "throughput": 8660.05, "total_tokens": 132222952} +{"current_steps": 196220, "total_steps": 204665, "loss": 0.0, "lr": 1.0357405071221404e-08, "epoch": 4.793687245010139, "percentage": 95.87, "elapsed_time": "4:14:28", "remaining_time": "0:10:57", "throughput": 8660.07, "total_tokens": 132226216} +{"current_steps": 196225, "total_steps": 204665, "loss": 0.0, "lr": 1.0345166793534255e-08, "epoch": 4.793809395841985, "percentage": 95.88, "elapsed_time": "4:14:28", "remaining_time": "0:10:56", "throughput": 8660.07, "total_tokens": 132229224} +{"current_steps": 196230, "total_steps": 204665, "loss": 0.0, "lr": 1.0332935712883073e-08, "epoch": 4.793931546673833, "percentage": 95.88, "elapsed_time": "4:14:29", "remaining_time": "0:10:56", "throughput": 8660.09, "total_tokens": 132232488} +{"current_steps": 196235, "total_steps": 204665, "loss": 0.0, "lr": 1.032071182935701e-08, "epoch": 4.79405369750568, "percentage": 95.88, "elapsed_time": "4:14:29", "remaining_time": "0:10:55", "throughput": 8660.08, "total_tokens": 132235304} +{"current_steps": 196240, "total_steps": 204665, "loss": 0.0, "lr": 1.0308495143044993e-08, "epoch": 4.794175848337527, "percentage": 95.88, "elapsed_time": "4:14:29", "remaining_time": "0:10:55", "throughput": 8660.11, "total_tokens": 132238696} +{"current_steps": 196245, "total_steps": 204665, "loss": 0.0001, "lr": 1.029628565403573e-08, "epoch": 4.794297999169374, "percentage": 95.89, "elapsed_time": "4:14:30", "remaining_time": "0:10:55", "throughput": 8660.13, "total_tokens": 132242024} +{"current_steps": 196250, "total_steps": 204665, "loss": 0.0, "lr": 1.028408336241804e-08, "epoch": 4.794420150001222, "percentage": 95.89, "elapsed_time": "4:14:30", "remaining_time": "0:10:54", "throughput": 8660.16, "total_tokens": 132245480} +{"current_steps": 196255, "total_steps": 204665, "loss": 0.0, "lr": 1.0271888268280737e-08, "epoch": 4.7945423008330685, "percentage": 95.89, "elapsed_time": "4:14:30", "remaining_time": "0:10:54", "throughput": 8660.18, "total_tokens": 132248936} +{"current_steps": 196260, "total_steps": 204665, "loss": 0.0, "lr": 1.0259700371712532e-08, "epoch": 4.794664451664916, "percentage": 95.89, "elapsed_time": "4:14:31", "remaining_time": "0:10:54", "throughput": 8660.22, "total_tokens": 132252584} +{"current_steps": 196265, "total_steps": 204665, "loss": 0.0, "lr": 1.0247519672801907e-08, "epoch": 4.794786602496763, "percentage": 95.9, "elapsed_time": "4:14:31", "remaining_time": "0:10:53", "throughput": 8660.23, "total_tokens": 132255784} +{"current_steps": 196270, "total_steps": 204665, "loss": 0.0, "lr": 1.0235346171637571e-08, "epoch": 4.7949087533286106, "percentage": 95.9, "elapsed_time": "4:14:31", "remaining_time": "0:10:53", "throughput": 8660.27, "total_tokens": 132259368} +{"current_steps": 196275, "total_steps": 204665, "loss": 0.0, "lr": 1.0223179868308007e-08, "epoch": 4.795030904160457, "percentage": 95.9, "elapsed_time": "4:14:32", "remaining_time": "0:10:52", "throughput": 8660.29, "total_tokens": 132262824} +{"current_steps": 196280, "total_steps": 204665, "loss": 0.0, "lr": 1.02110207629017e-08, "epoch": 4.795153054992305, "percentage": 95.9, "elapsed_time": "4:14:32", "remaining_time": "0:10:52", "throughput": 8660.31, "total_tokens": 132266024} +{"current_steps": 196285, "total_steps": 204665, "loss": 0.0, "lr": 1.0198868855507026e-08, "epoch": 4.795275205824152, "percentage": 95.91, "elapsed_time": "4:14:33", "remaining_time": "0:10:52", "throughput": 8660.35, "total_tokens": 132269672} +{"current_steps": 196290, "total_steps": 204665, "loss": 0.0, "lr": 1.0186724146212467e-08, "epoch": 4.795397356655998, "percentage": 95.91, "elapsed_time": "4:14:33", "remaining_time": "0:10:51", "throughput": 8660.36, "total_tokens": 132272872} +{"current_steps": 196295, "total_steps": 204665, "loss": 0.0, "lr": 1.0174586635106285e-08, "epoch": 4.795519507487846, "percentage": 95.91, "elapsed_time": "4:14:33", "remaining_time": "0:10:51", "throughput": 8660.37, "total_tokens": 132275944} +{"current_steps": 196300, "total_steps": 204665, "loss": 0.0, "lr": 1.0162456322276747e-08, "epoch": 4.795641658319693, "percentage": 95.91, "elapsed_time": "4:14:34", "remaining_time": "0:10:50", "throughput": 8660.4, "total_tokens": 132279400} +{"current_steps": 196305, "total_steps": 204665, "loss": 0.0, "lr": 1.0150333207812001e-08, "epoch": 4.79576380915154, "percentage": 95.92, "elapsed_time": "4:14:34", "remaining_time": "0:10:50", "throughput": 8660.44, "total_tokens": 132283240} +{"current_steps": 196310, "total_steps": 204665, "loss": 0.0, "lr": 1.013821729180031e-08, "epoch": 4.795885959983387, "percentage": 95.92, "elapsed_time": "4:14:34", "remaining_time": "0:10:50", "throughput": 8660.45, "total_tokens": 132286248} +{"current_steps": 196315, "total_steps": 204665, "loss": 0.0, "lr": 1.0126108574329718e-08, "epoch": 4.796008110815235, "percentage": 95.92, "elapsed_time": "4:14:35", "remaining_time": "0:10:49", "throughput": 8660.48, "total_tokens": 132289768} +{"current_steps": 196320, "total_steps": 204665, "loss": 0.0453, "lr": 1.0114007055488261e-08, "epoch": 4.796130261647082, "percentage": 95.92, "elapsed_time": "4:14:35", "remaining_time": "0:10:49", "throughput": 8660.48, "total_tokens": 132292840} +{"current_steps": 196325, "total_steps": 204665, "loss": 0.0275, "lr": 1.0101912735364092e-08, "epoch": 4.796252412478929, "percentage": 95.93, "elapsed_time": "4:14:35", "remaining_time": "0:10:48", "throughput": 8660.51, "total_tokens": 132296168} +{"current_steps": 196330, "total_steps": 204665, "loss": 0.0, "lr": 1.0089825614045032e-08, "epoch": 4.796374563310776, "percentage": 95.93, "elapsed_time": "4:14:36", "remaining_time": "0:10:48", "throughput": 8660.54, "total_tokens": 132299688} +{"current_steps": 196335, "total_steps": 204665, "loss": 0.0, "lr": 1.00777456916189e-08, "epoch": 4.796496714142624, "percentage": 95.93, "elapsed_time": "4:14:36", "remaining_time": "0:10:48", "throughput": 8660.56, "total_tokens": 132302952} +{"current_steps": 196340, "total_steps": 204665, "loss": 0.0, "lr": 1.0065672968173734e-08, "epoch": 4.79661886497447, "percentage": 95.93, "elapsed_time": "4:14:36", "remaining_time": "0:10:47", "throughput": 8660.56, "total_tokens": 132305960} +{"current_steps": 196345, "total_steps": 204665, "loss": 0.0, "lr": 1.0053607443797351e-08, "epoch": 4.796741015806318, "percentage": 95.93, "elapsed_time": "4:14:37", "remaining_time": "0:10:47", "throughput": 8660.57, "total_tokens": 132309096} +{"current_steps": 196350, "total_steps": 204665, "loss": 0.0, "lr": 1.0041549118577353e-08, "epoch": 4.796863166638165, "percentage": 95.94, "elapsed_time": "4:14:37", "remaining_time": "0:10:46", "throughput": 8660.6, "total_tokens": 132312552} +{"current_steps": 196355, "total_steps": 204665, "loss": 0.0003, "lr": 1.0029497992601443e-08, "epoch": 4.796985317470012, "percentage": 95.94, "elapsed_time": "4:14:37", "remaining_time": "0:10:46", "throughput": 8660.6, "total_tokens": 132315624} +{"current_steps": 196360, "total_steps": 204665, "loss": 0.0, "lr": 1.001745406595722e-08, "epoch": 4.797107468301859, "percentage": 95.94, "elapsed_time": "4:14:38", "remaining_time": "0:10:46", "throughput": 8660.63, "total_tokens": 132319144} +{"current_steps": 196365, "total_steps": 204665, "loss": 0.0, "lr": 1.0005417338732502e-08, "epoch": 4.797229619133706, "percentage": 95.94, "elapsed_time": "4:14:38", "remaining_time": "0:10:45", "throughput": 8660.65, "total_tokens": 132322472} +{"current_steps": 196370, "total_steps": 204665, "loss": 0.0, "lr": 9.993387811014553e-09, "epoch": 4.7973517699655535, "percentage": 95.95, "elapsed_time": "4:14:38", "remaining_time": "0:10:45", "throughput": 8660.67, "total_tokens": 132325672} +{"current_steps": 196375, "total_steps": 204665, "loss": 0.0, "lr": 9.98136548289097e-09, "epoch": 4.797473920797401, "percentage": 95.95, "elapsed_time": "4:14:39", "remaining_time": "0:10:45", "throughput": 8660.67, "total_tokens": 132328744} +{"current_steps": 196380, "total_steps": 204665, "loss": 0.0, "lr": 9.969350354449236e-09, "epoch": 4.797596071629248, "percentage": 95.95, "elapsed_time": "4:14:39", "remaining_time": "0:10:44", "throughput": 8660.68, "total_tokens": 132331880} +{"current_steps": 196385, "total_steps": 204665, "loss": 0.0, "lr": 9.957342425776617e-09, "epoch": 4.797718222461095, "percentage": 95.95, "elapsed_time": "4:14:39", "remaining_time": "0:10:44", "throughput": 8660.71, "total_tokens": 132335208} +{"current_steps": 196390, "total_steps": 204665, "loss": 0.0, "lr": 9.945341696960596e-09, "epoch": 4.797840373292942, "percentage": 95.96, "elapsed_time": "4:14:40", "remaining_time": "0:10:43", "throughput": 8660.73, "total_tokens": 132338536} +{"current_steps": 196395, "total_steps": 204665, "loss": 0.0, "lr": 9.933348168088329e-09, "epoch": 4.797962524124789, "percentage": 95.96, "elapsed_time": "4:14:40", "remaining_time": "0:10:43", "throughput": 8660.75, "total_tokens": 132341928} +{"current_steps": 196400, "total_steps": 204665, "loss": 0.0, "lr": 9.921361839246967e-09, "epoch": 4.798084674956637, "percentage": 95.96, "elapsed_time": "4:14:41", "remaining_time": "0:10:43", "throughput": 8660.77, "total_tokens": 132345256} +{"current_steps": 196405, "total_steps": 204665, "loss": 0.0, "lr": 9.909382710523773e-09, "epoch": 4.798206825788483, "percentage": 95.96, "elapsed_time": "4:14:41", "remaining_time": "0:10:42", "throughput": 8660.78, "total_tokens": 132348456} +{"current_steps": 196410, "total_steps": 204665, "loss": 0.0, "lr": 9.897410782005789e-09, "epoch": 4.798328976620331, "percentage": 95.97, "elapsed_time": "4:14:41", "remaining_time": "0:10:42", "throughput": 8660.79, "total_tokens": 132351464} +{"current_steps": 196415, "total_steps": 204665, "loss": 0.0, "lr": 9.885446053780278e-09, "epoch": 4.798451127452178, "percentage": 95.97, "elapsed_time": "4:14:42", "remaining_time": "0:10:41", "throughput": 8660.81, "total_tokens": 132354728} +{"current_steps": 196420, "total_steps": 204665, "loss": 0.0, "lr": 9.87348852593406e-09, "epoch": 4.798573278284025, "percentage": 95.97, "elapsed_time": "4:14:42", "remaining_time": "0:10:41", "throughput": 8660.85, "total_tokens": 132358376} +{"current_steps": 196425, "total_steps": 204665, "loss": 0.0, "lr": 9.861538198554175e-09, "epoch": 4.798695429115872, "percentage": 95.97, "elapsed_time": "4:14:42", "remaining_time": "0:10:41", "throughput": 8660.85, "total_tokens": 132361448} +{"current_steps": 196430, "total_steps": 204665, "loss": 0.0, "lr": 9.849595071727445e-09, "epoch": 4.79881757994772, "percentage": 95.98, "elapsed_time": "4:14:43", "remaining_time": "0:10:40", "throughput": 8660.87, "total_tokens": 132364712} +{"current_steps": 196435, "total_steps": 204665, "loss": 0.0, "lr": 9.837659145540689e-09, "epoch": 4.7989397307795665, "percentage": 95.98, "elapsed_time": "4:14:43", "remaining_time": "0:10:40", "throughput": 8660.87, "total_tokens": 132367656} +{"current_steps": 196440, "total_steps": 204665, "loss": 0.0, "lr": 9.825730420080946e-09, "epoch": 4.799061881611414, "percentage": 95.98, "elapsed_time": "4:14:43", "remaining_time": "0:10:39", "throughput": 8660.89, "total_tokens": 132370920} +{"current_steps": 196445, "total_steps": 204665, "loss": 0.0, "lr": 9.813808895434706e-09, "epoch": 4.799184032443261, "percentage": 95.98, "elapsed_time": "4:14:44", "remaining_time": "0:10:39", "throughput": 8660.92, "total_tokens": 132374440} +{"current_steps": 196450, "total_steps": 204665, "loss": 0.0001, "lr": 9.801894571688895e-09, "epoch": 4.7993061832751085, "percentage": 95.99, "elapsed_time": "4:14:44", "remaining_time": "0:10:39", "throughput": 8660.94, "total_tokens": 132377832} +{"current_steps": 196455, "total_steps": 204665, "loss": 0.0, "lr": 9.789987448930004e-09, "epoch": 4.799428334106955, "percentage": 95.99, "elapsed_time": "4:14:44", "remaining_time": "0:10:38", "throughput": 8660.95, "total_tokens": 132380904} +{"current_steps": 196460, "total_steps": 204665, "loss": 0.0, "lr": 9.778087527244628e-09, "epoch": 4.799550484938802, "percentage": 95.99, "elapsed_time": "4:14:45", "remaining_time": "0:10:38", "throughput": 8660.96, "total_tokens": 132384104} +{"current_steps": 196465, "total_steps": 204665, "loss": 0.0, "lr": 9.766194806719364e-09, "epoch": 4.79967263577065, "percentage": 95.99, "elapsed_time": "4:14:45", "remaining_time": "0:10:37", "throughput": 8661.0, "total_tokens": 132387624} +{"current_steps": 196470, "total_steps": 204665, "loss": 0.0, "lr": 9.754309287440588e-09, "epoch": 4.799794786602497, "percentage": 96.0, "elapsed_time": "4:14:45", "remaining_time": "0:10:37", "throughput": 8661.0, "total_tokens": 132390568} +{"current_steps": 196475, "total_steps": 204665, "loss": 0.0, "lr": 9.742430969494896e-09, "epoch": 4.799916937434344, "percentage": 96.0, "elapsed_time": "4:14:46", "remaining_time": "0:10:37", "throughput": 8661.01, "total_tokens": 132393704} +{"current_steps": 196480, "total_steps": 204665, "loss": 0.0, "lr": 9.730559852968557e-09, "epoch": 4.800039088266191, "percentage": 96.0, "elapsed_time": "4:14:46", "remaining_time": "0:10:36", "throughput": 8661.02, "total_tokens": 132396968} +{"current_steps": 196485, "total_steps": 204665, "loss": 0.0, "lr": 9.718695937948052e-09, "epoch": 4.800161239098038, "percentage": 96.0, "elapsed_time": "4:14:46", "remaining_time": "0:10:36", "throughput": 8661.05, "total_tokens": 132400488} +{"current_steps": 196490, "total_steps": 204665, "loss": 0.0, "lr": 9.706839224519426e-09, "epoch": 4.800283389929885, "percentage": 96.01, "elapsed_time": "4:14:47", "remaining_time": "0:10:36", "throughput": 8661.05, "total_tokens": 132403368} +{"current_steps": 196495, "total_steps": 204665, "loss": 0.0, "lr": 9.694989712769053e-09, "epoch": 4.800405540761733, "percentage": 96.01, "elapsed_time": "4:14:47", "remaining_time": "0:10:35", "throughput": 8661.07, "total_tokens": 132406632} +{"current_steps": 196500, "total_steps": 204665, "loss": 0.0, "lr": 9.683147402783088e-09, "epoch": 4.8005276915935795, "percentage": 96.01, "elapsed_time": "4:14:47", "remaining_time": "0:10:35", "throughput": 8661.07, "total_tokens": 132409640} +{"current_steps": 196505, "total_steps": 204665, "loss": 0.0, "lr": 9.671312294647683e-09, "epoch": 4.800649842425427, "percentage": 96.01, "elapsed_time": "4:14:48", "remaining_time": "0:10:34", "throughput": 8661.09, "total_tokens": 132412840} +{"current_steps": 196510, "total_steps": 204665, "loss": 0.0001, "lr": 9.659484388448768e-09, "epoch": 4.800771993257274, "percentage": 96.02, "elapsed_time": "4:14:48", "remaining_time": "0:10:34", "throughput": 8661.1, "total_tokens": 132415976} +{"current_steps": 196515, "total_steps": 204665, "loss": 0.0, "lr": 9.6476636842725e-09, "epoch": 4.8008941440891215, "percentage": 96.02, "elapsed_time": "4:14:48", "remaining_time": "0:10:34", "throughput": 8661.14, "total_tokens": 132419688} +{"current_steps": 196520, "total_steps": 204665, "loss": 0.0, "lr": 9.635850182204809e-09, "epoch": 4.801016294920968, "percentage": 96.02, "elapsed_time": "4:14:49", "remaining_time": "0:10:33", "throughput": 8661.17, "total_tokens": 132423272} +{"current_steps": 196525, "total_steps": 204665, "loss": 0.0, "lr": 9.624043882331511e-09, "epoch": 4.801138445752816, "percentage": 96.02, "elapsed_time": "4:14:49", "remaining_time": "0:10:33", "throughput": 8661.21, "total_tokens": 132426920} +{"current_steps": 196530, "total_steps": 204665, "loss": 0.0, "lr": 9.612244784738543e-09, "epoch": 4.801260596584663, "percentage": 96.03, "elapsed_time": "4:14:49", "remaining_time": "0:10:32", "throughput": 8661.22, "total_tokens": 132430056} +{"current_steps": 196535, "total_steps": 204665, "loss": 0.0, "lr": 9.600452889511835e-09, "epoch": 4.80138274741651, "percentage": 96.03, "elapsed_time": "4:14:50", "remaining_time": "0:10:32", "throughput": 8661.23, "total_tokens": 132433128} +{"current_steps": 196540, "total_steps": 204665, "loss": 0.0, "lr": 9.588668196736871e-09, "epoch": 4.801504898248357, "percentage": 96.03, "elapsed_time": "4:14:50", "remaining_time": "0:10:32", "throughput": 8661.24, "total_tokens": 132436264} +{"current_steps": 196545, "total_steps": 204665, "loss": 0.0, "lr": 9.576890706499696e-09, "epoch": 4.801627049080205, "percentage": 96.03, "elapsed_time": "4:14:51", "remaining_time": "0:10:31", "throughput": 8661.27, "total_tokens": 132439720} +{"current_steps": 196550, "total_steps": 204665, "loss": 0.0, "lr": 9.565120418885574e-09, "epoch": 4.801749199912051, "percentage": 96.03, "elapsed_time": "4:14:51", "remaining_time": "0:10:31", "throughput": 8661.3, "total_tokens": 132443176} +{"current_steps": 196555, "total_steps": 204665, "loss": 0.0, "lr": 9.553357333980438e-09, "epoch": 4.801871350743898, "percentage": 96.04, "elapsed_time": "4:14:51", "remaining_time": "0:10:30", "throughput": 8661.34, "total_tokens": 132446888} +{"current_steps": 196560, "total_steps": 204665, "loss": 0.0354, "lr": 9.541601451869552e-09, "epoch": 4.801993501575746, "percentage": 96.04, "elapsed_time": "4:14:52", "remaining_time": "0:10:30", "throughput": 8661.35, "total_tokens": 132450024} +{"current_steps": 196565, "total_steps": 204665, "loss": 0.0, "lr": 9.529852772638625e-09, "epoch": 4.8021156524075925, "percentage": 96.04, "elapsed_time": "4:14:52", "remaining_time": "0:10:30", "throughput": 8661.39, "total_tokens": 132453800} +{"current_steps": 196570, "total_steps": 204665, "loss": 0.0, "lr": 9.518111296372921e-09, "epoch": 4.80223780323944, "percentage": 96.04, "elapsed_time": "4:14:52", "remaining_time": "0:10:29", "throughput": 8661.4, "total_tokens": 132456936} +{"current_steps": 196575, "total_steps": 204665, "loss": 0.0, "lr": 9.506377023158042e-09, "epoch": 4.802359954071287, "percentage": 96.05, "elapsed_time": "4:14:53", "remaining_time": "0:10:29", "throughput": 8661.41, "total_tokens": 132459944} +{"current_steps": 196580, "total_steps": 204665, "loss": 0.0, "lr": 9.494649953079137e-09, "epoch": 4.8024821049031345, "percentage": 96.05, "elapsed_time": "4:14:53", "remaining_time": "0:10:28", "throughput": 8661.46, "total_tokens": 132463784} +{"current_steps": 196585, "total_steps": 204665, "loss": 0.0, "lr": 9.482930086221585e-09, "epoch": 4.802604255734981, "percentage": 96.05, "elapsed_time": "4:14:53", "remaining_time": "0:10:28", "throughput": 8661.46, "total_tokens": 132466792} +{"current_steps": 196590, "total_steps": 204665, "loss": 0.0, "lr": 9.471217422670541e-09, "epoch": 4.802726406566829, "percentage": 96.05, "elapsed_time": "4:14:54", "remaining_time": "0:10:28", "throughput": 8661.51, "total_tokens": 132470696} +{"current_steps": 196595, "total_steps": 204665, "loss": 0.0, "lr": 9.459511962511268e-09, "epoch": 4.802848557398676, "percentage": 96.06, "elapsed_time": "4:14:54", "remaining_time": "0:10:27", "throughput": 8661.55, "total_tokens": 132474408} +{"current_steps": 196600, "total_steps": 204665, "loss": 0.0, "lr": 9.44781370582881e-09, "epoch": 4.802970708230523, "percentage": 96.06, "elapsed_time": "4:14:54", "remaining_time": "0:10:27", "throughput": 8661.58, "total_tokens": 132477864} +{"current_steps": 196605, "total_steps": 204665, "loss": 0.0, "lr": 9.436122652708212e-09, "epoch": 4.80309285906237, "percentage": 96.06, "elapsed_time": "4:14:55", "remaining_time": "0:10:27", "throughput": 8661.59, "total_tokens": 132480872} +{"current_steps": 196610, "total_steps": 204665, "loss": 0.0, "lr": 9.424438803234736e-09, "epoch": 4.803215009894218, "percentage": 96.06, "elapsed_time": "4:14:55", "remaining_time": "0:10:26", "throughput": 8661.61, "total_tokens": 132484264} +{"current_steps": 196615, "total_steps": 204665, "loss": 0.0, "lr": 9.412762157493092e-09, "epoch": 4.803337160726064, "percentage": 96.07, "elapsed_time": "4:14:55", "remaining_time": "0:10:26", "throughput": 8661.64, "total_tokens": 132487848} +{"current_steps": 196620, "total_steps": 204665, "loss": 0.0, "lr": 9.401092715568215e-09, "epoch": 4.803459311557912, "percentage": 96.07, "elapsed_time": "4:14:56", "remaining_time": "0:10:25", "throughput": 8661.67, "total_tokens": 132491304} +{"current_steps": 196625, "total_steps": 204665, "loss": 0.0, "lr": 9.389430477545035e-09, "epoch": 4.803581462389759, "percentage": 96.07, "elapsed_time": "4:14:56", "remaining_time": "0:10:25", "throughput": 8661.7, "total_tokens": 132494824} +{"current_steps": 196630, "total_steps": 204665, "loss": 0.0, "lr": 9.377775443508485e-09, "epoch": 4.8037036132216056, "percentage": 96.07, "elapsed_time": "4:14:56", "remaining_time": "0:10:25", "throughput": 8661.73, "total_tokens": 132498280} +{"current_steps": 196635, "total_steps": 204665, "loss": 0.0, "lr": 9.366127613543051e-09, "epoch": 4.803825764053453, "percentage": 96.08, "elapsed_time": "4:14:57", "remaining_time": "0:10:24", "throughput": 8661.76, "total_tokens": 132501672} +{"current_steps": 196640, "total_steps": 204665, "loss": 0.0, "lr": 9.354486987733668e-09, "epoch": 4.803947914885301, "percentage": 96.08, "elapsed_time": "4:14:57", "remaining_time": "0:10:24", "throughput": 8661.78, "total_tokens": 132505000} +{"current_steps": 196645, "total_steps": 204665, "loss": 0.0, "lr": 9.342853566164932e-09, "epoch": 4.804070065717148, "percentage": 96.08, "elapsed_time": "4:14:58", "remaining_time": "0:10:23", "throughput": 8661.79, "total_tokens": 132508328} +{"current_steps": 196650, "total_steps": 204665, "loss": 0.0, "lr": 9.331227348921333e-09, "epoch": 4.804192216548994, "percentage": 96.08, "elapsed_time": "4:14:58", "remaining_time": "0:10:23", "throughput": 8661.82, "total_tokens": 132511720} +{"current_steps": 196655, "total_steps": 204665, "loss": 0.0, "lr": 9.319608336087582e-09, "epoch": 4.804314367380842, "percentage": 96.09, "elapsed_time": "4:14:58", "remaining_time": "0:10:23", "throughput": 8661.85, "total_tokens": 132515176} +{"current_steps": 196660, "total_steps": 204665, "loss": 0.0, "lr": 9.307996527747941e-09, "epoch": 4.804436518212689, "percentage": 96.09, "elapsed_time": "4:14:59", "remaining_time": "0:10:22", "throughput": 8661.88, "total_tokens": 132518760} +{"current_steps": 196665, "total_steps": 204665, "loss": 0.0, "lr": 9.296391923987235e-09, "epoch": 4.804558669044536, "percentage": 96.09, "elapsed_time": "4:14:59", "remaining_time": "0:10:22", "throughput": 8661.89, "total_tokens": 132521832} +{"current_steps": 196670, "total_steps": 204665, "loss": 0.0, "lr": 9.284794524889505e-09, "epoch": 4.804680819876383, "percentage": 96.09, "elapsed_time": "4:14:59", "remaining_time": "0:10:21", "throughput": 8661.9, "total_tokens": 132524968} +{"current_steps": 196675, "total_steps": 204665, "loss": 0.0, "lr": 9.273204330539242e-09, "epoch": 4.804802970708231, "percentage": 96.1, "elapsed_time": "4:15:00", "remaining_time": "0:10:21", "throughput": 8661.94, "total_tokens": 132528616} +{"current_steps": 196680, "total_steps": 204665, "loss": 0.0, "lr": 9.26162134102071e-09, "epoch": 4.8049251215400774, "percentage": 96.1, "elapsed_time": "4:15:00", "remaining_time": "0:10:21", "throughput": 8661.96, "total_tokens": 132531880} +{"current_steps": 196685, "total_steps": 204665, "loss": 0.0, "lr": 9.250045556418173e-09, "epoch": 4.805047272371925, "percentage": 96.1, "elapsed_time": "4:15:00", "remaining_time": "0:10:20", "throughput": 8661.96, "total_tokens": 132534888} +{"current_steps": 196690, "total_steps": 204665, "loss": 0.0, "lr": 9.23847697681579e-09, "epoch": 4.805169423203772, "percentage": 96.1, "elapsed_time": "4:15:01", "remaining_time": "0:10:20", "throughput": 8661.97, "total_tokens": 132537960} +{"current_steps": 196695, "total_steps": 204665, "loss": 0.0, "lr": 9.226915602297602e-09, "epoch": 4.8052915740356195, "percentage": 96.11, "elapsed_time": "4:15:01", "remaining_time": "0:10:20", "throughput": 8661.98, "total_tokens": 132541096} +{"current_steps": 196700, "total_steps": 204665, "loss": 0.0, "lr": 9.215361432947877e-09, "epoch": 4.805413724867466, "percentage": 96.11, "elapsed_time": "4:15:01", "remaining_time": "0:10:19", "throughput": 8662.01, "total_tokens": 132544616} +{"current_steps": 196705, "total_steps": 204665, "loss": 0.0, "lr": 9.203814468850547e-09, "epoch": 4.805535875699314, "percentage": 96.11, "elapsed_time": "4:15:02", "remaining_time": "0:10:19", "throughput": 8662.04, "total_tokens": 132548008} +{"current_steps": 196710, "total_steps": 204665, "loss": 0.0, "lr": 9.192274710089432e-09, "epoch": 4.805658026531161, "percentage": 96.11, "elapsed_time": "4:15:02", "remaining_time": "0:10:18", "throughput": 8662.08, "total_tokens": 132551784} +{"current_steps": 196715, "total_steps": 204665, "loss": 0.0, "lr": 9.180742156748688e-09, "epoch": 4.805780177363008, "percentage": 96.12, "elapsed_time": "4:15:02", "remaining_time": "0:10:18", "throughput": 8662.1, "total_tokens": 132555112} +{"current_steps": 196720, "total_steps": 204665, "loss": 0.0536, "lr": 9.169216808912028e-09, "epoch": 4.805902328194855, "percentage": 96.12, "elapsed_time": "4:15:03", "remaining_time": "0:10:18", "throughput": 8662.12, "total_tokens": 132558376} +{"current_steps": 196725, "total_steps": 204665, "loss": 0.0, "lr": 9.157698666663382e-09, "epoch": 4.806024479026702, "percentage": 96.12, "elapsed_time": "4:15:03", "remaining_time": "0:10:17", "throughput": 8662.15, "total_tokens": 132561896} +{"current_steps": 196730, "total_steps": 204665, "loss": 0.0, "lr": 9.146187730086463e-09, "epoch": 4.806146629858549, "percentage": 96.12, "elapsed_time": "4:15:03", "remaining_time": "0:10:17", "throughput": 8662.16, "total_tokens": 132564968} +{"current_steps": 196735, "total_steps": 204665, "loss": 0.0, "lr": 9.134683999264981e-09, "epoch": 4.806268780690397, "percentage": 96.13, "elapsed_time": "4:15:04", "remaining_time": "0:10:16", "throughput": 8662.17, "total_tokens": 132568168} +{"current_steps": 196740, "total_steps": 204665, "loss": 0.0, "lr": 9.123187474282535e-09, "epoch": 4.806390931522244, "percentage": 96.13, "elapsed_time": "4:15:04", "remaining_time": "0:10:16", "throughput": 8662.18, "total_tokens": 132571240} +{"current_steps": 196745, "total_steps": 204665, "loss": 0.0, "lr": 9.111698155222724e-09, "epoch": 4.8065130823540905, "percentage": 96.13, "elapsed_time": "4:15:04", "remaining_time": "0:10:16", "throughput": 8662.22, "total_tokens": 132575016} +{"current_steps": 196750, "total_steps": 204665, "loss": 0.0, "lr": 9.100216042169262e-09, "epoch": 4.806635233185938, "percentage": 96.13, "elapsed_time": "4:15:05", "remaining_time": "0:10:15", "throughput": 8662.22, "total_tokens": 132577832} +{"current_steps": 196755, "total_steps": 204665, "loss": 0.0, "lr": 9.088741135205525e-09, "epoch": 4.806757384017785, "percentage": 96.14, "elapsed_time": "4:15:05", "remaining_time": "0:10:15", "throughput": 8662.24, "total_tokens": 132581224} +{"current_steps": 196760, "total_steps": 204665, "loss": 0.0001, "lr": 9.077273434415e-09, "epoch": 4.8068795348496325, "percentage": 96.14, "elapsed_time": "4:15:05", "remaining_time": "0:10:14", "throughput": 8662.26, "total_tokens": 132584424} +{"current_steps": 196765, "total_steps": 204665, "loss": 0.0, "lr": 9.065812939881067e-09, "epoch": 4.807001685681479, "percentage": 96.14, "elapsed_time": "4:15:06", "remaining_time": "0:10:14", "throughput": 8662.27, "total_tokens": 132587560} +{"current_steps": 196770, "total_steps": 204665, "loss": 0.0, "lr": 9.054359651687105e-09, "epoch": 4.807123836513327, "percentage": 96.14, "elapsed_time": "4:15:06", "remaining_time": "0:10:14", "throughput": 8662.27, "total_tokens": 132590568} +{"current_steps": 196775, "total_steps": 204665, "loss": 0.0, "lr": 9.042913569916266e-09, "epoch": 4.807245987345174, "percentage": 96.14, "elapsed_time": "4:15:07", "remaining_time": "0:10:13", "throughput": 8662.3, "total_tokens": 132594024} +{"current_steps": 196780, "total_steps": 204665, "loss": 0.0, "lr": 9.03147469465193e-09, "epoch": 4.807368138177021, "percentage": 96.15, "elapsed_time": "4:15:07", "remaining_time": "0:10:13", "throughput": 8662.34, "total_tokens": 132597608} +{"current_steps": 196785, "total_steps": 204665, "loss": 0.0, "lr": 9.020043025977253e-09, "epoch": 4.807490289008868, "percentage": 96.15, "elapsed_time": "4:15:07", "remaining_time": "0:10:12", "throughput": 8662.36, "total_tokens": 132601000} +{"current_steps": 196790, "total_steps": 204665, "loss": 0.0, "lr": 9.00861856397539e-09, "epoch": 4.807612439840716, "percentage": 96.15, "elapsed_time": "4:15:08", "remaining_time": "0:10:12", "throughput": 8662.4, "total_tokens": 132604584} +{"current_steps": 196795, "total_steps": 204665, "loss": 0.0, "lr": 8.997201308729385e-09, "epoch": 4.807734590672562, "percentage": 96.15, "elapsed_time": "4:15:08", "remaining_time": "0:10:12", "throughput": 8662.4, "total_tokens": 132607656} +{"current_steps": 196800, "total_steps": 204665, "loss": 0.0, "lr": 8.985791260322283e-09, "epoch": 4.80785674150441, "percentage": 96.16, "elapsed_time": "4:15:08", "remaining_time": "0:10:11", "throughput": 8662.44, "total_tokens": 132611304} +{"current_steps": 196805, "total_steps": 204665, "loss": 0.0, "lr": 8.97438841883713e-09, "epoch": 4.807978892336257, "percentage": 96.16, "elapsed_time": "4:15:09", "remaining_time": "0:10:11", "throughput": 8662.46, "total_tokens": 132614632} +{"current_steps": 196810, "total_steps": 204665, "loss": 0.001, "lr": 8.962992784356749e-09, "epoch": 4.808101043168104, "percentage": 96.16, "elapsed_time": "4:15:09", "remaining_time": "0:10:11", "throughput": 8662.49, "total_tokens": 132618152} +{"current_steps": 196815, "total_steps": 204665, "loss": 0.0, "lr": 8.95160435696396e-09, "epoch": 4.808223193999951, "percentage": 96.16, "elapsed_time": "4:15:09", "remaining_time": "0:10:10", "throughput": 8662.52, "total_tokens": 132621544} +{"current_steps": 196820, "total_steps": 204665, "loss": 0.0, "lr": 8.940223136741698e-09, "epoch": 4.808345344831798, "percentage": 96.17, "elapsed_time": "4:15:10", "remaining_time": "0:10:10", "throughput": 8662.55, "total_tokens": 132625128} +{"current_steps": 196825, "total_steps": 204665, "loss": 0.0, "lr": 8.928849123772674e-09, "epoch": 4.8084674956636455, "percentage": 96.17, "elapsed_time": "4:15:10", "remaining_time": "0:10:09", "throughput": 8662.56, "total_tokens": 132628200} +{"current_steps": 196830, "total_steps": 204665, "loss": 0.0, "lr": 8.917482318139713e-09, "epoch": 4.808589646495492, "percentage": 96.17, "elapsed_time": "4:15:10", "remaining_time": "0:10:09", "throughput": 8662.6, "total_tokens": 132631848} +{"current_steps": 196835, "total_steps": 204665, "loss": 0.0, "lr": 8.906122719925302e-09, "epoch": 4.80871179732734, "percentage": 96.17, "elapsed_time": "4:15:11", "remaining_time": "0:10:09", "throughput": 8662.61, "total_tokens": 132634984} +{"current_steps": 196840, "total_steps": 204665, "loss": 0.0, "lr": 8.894770329212154e-09, "epoch": 4.808833948159187, "percentage": 96.18, "elapsed_time": "4:15:11", "remaining_time": "0:10:08", "throughput": 8662.64, "total_tokens": 132638440} +{"current_steps": 196845, "total_steps": 204665, "loss": 0.0, "lr": 8.883425146082868e-09, "epoch": 4.808956098991034, "percentage": 96.18, "elapsed_time": "4:15:11", "remaining_time": "0:10:08", "throughput": 8662.66, "total_tokens": 132641704} +{"current_steps": 196850, "total_steps": 204665, "loss": 0.0, "lr": 8.872087170619825e-09, "epoch": 4.809078249822881, "percentage": 96.18, "elapsed_time": "4:15:12", "remaining_time": "0:10:07", "throughput": 8662.69, "total_tokens": 132645288} +{"current_steps": 196855, "total_steps": 204665, "loss": 0.0001, "lr": 8.860756402905623e-09, "epoch": 4.809200400654729, "percentage": 96.18, "elapsed_time": "4:15:12", "remaining_time": "0:10:07", "throughput": 8662.71, "total_tokens": 132648552} +{"current_steps": 196860, "total_steps": 204665, "loss": 0.0, "lr": 8.84943284302253e-09, "epoch": 4.809322551486575, "percentage": 96.19, "elapsed_time": "4:15:12", "remaining_time": "0:10:07", "throughput": 8662.7, "total_tokens": 132651368} +{"current_steps": 196865, "total_steps": 204665, "loss": 0.0, "lr": 8.838116491052927e-09, "epoch": 4.809444702318423, "percentage": 96.19, "elapsed_time": "4:15:13", "remaining_time": "0:10:06", "throughput": 8662.72, "total_tokens": 132654632} +{"current_steps": 196870, "total_steps": 204665, "loss": 0.0, "lr": 8.82680734707919e-09, "epoch": 4.80956685315027, "percentage": 96.19, "elapsed_time": "4:15:13", "remaining_time": "0:10:06", "throughput": 8662.74, "total_tokens": 132657960} +{"current_steps": 196875, "total_steps": 204665, "loss": 0.0, "lr": 8.815505411183367e-09, "epoch": 4.809689003982117, "percentage": 96.19, "elapsed_time": "4:15:13", "remaining_time": "0:10:05", "throughput": 8662.76, "total_tokens": 132661288} +{"current_steps": 196880, "total_steps": 204665, "loss": 0.0, "lr": 8.804210683447944e-09, "epoch": 4.809811154813964, "percentage": 96.2, "elapsed_time": "4:15:14", "remaining_time": "0:10:05", "throughput": 8662.78, "total_tokens": 132664552} +{"current_steps": 196885, "total_steps": 204665, "loss": 0.0, "lr": 8.792923163954857e-09, "epoch": 4.809933305645812, "percentage": 96.2, "elapsed_time": "4:15:14", "remaining_time": "0:10:05", "throughput": 8662.79, "total_tokens": 132667624} +{"current_steps": 196890, "total_steps": 204665, "loss": 0.0, "lr": 8.781642852786264e-09, "epoch": 4.8100554564776585, "percentage": 96.2, "elapsed_time": "4:15:15", "remaining_time": "0:10:04", "throughput": 8662.81, "total_tokens": 132670888} +{"current_steps": 196895, "total_steps": 204665, "loss": 0.0, "lr": 8.770369750024099e-09, "epoch": 4.810177607309506, "percentage": 96.2, "elapsed_time": "4:15:15", "remaining_time": "0:10:04", "throughput": 8662.82, "total_tokens": 132674152} +{"current_steps": 196900, "total_steps": 204665, "loss": 0.0, "lr": 8.759103855750404e-09, "epoch": 4.810299758141353, "percentage": 96.21, "elapsed_time": "4:15:15", "remaining_time": "0:10:03", "throughput": 8662.83, "total_tokens": 132677160} +{"current_steps": 196905, "total_steps": 204665, "loss": 0.0, "lr": 8.747845170047119e-09, "epoch": 4.8104219089732005, "percentage": 96.21, "elapsed_time": "4:15:16", "remaining_time": "0:10:03", "throughput": 8662.84, "total_tokens": 132680360} +{"current_steps": 196910, "total_steps": 204665, "loss": 0.0, "lr": 8.736593692996174e-09, "epoch": 4.810544059805047, "percentage": 96.21, "elapsed_time": "4:15:16", "remaining_time": "0:10:03", "throughput": 8662.88, "total_tokens": 132683944} +{"current_steps": 196915, "total_steps": 204665, "loss": 0.0, "lr": 8.725349424679396e-09, "epoch": 4.810666210636894, "percentage": 96.21, "elapsed_time": "4:15:16", "remaining_time": "0:10:02", "throughput": 8662.89, "total_tokens": 132687080} +{"current_steps": 196920, "total_steps": 204665, "loss": 0.0, "lr": 8.714112365178383e-09, "epoch": 4.810788361468742, "percentage": 96.22, "elapsed_time": "4:15:17", "remaining_time": "0:10:02", "throughput": 8662.91, "total_tokens": 132690344} +{"current_steps": 196925, "total_steps": 204665, "loss": 0.0, "lr": 8.702882514575072e-09, "epoch": 4.810910512300588, "percentage": 96.22, "elapsed_time": "4:15:17", "remaining_time": "0:10:02", "throughput": 8662.91, "total_tokens": 132693288} +{"current_steps": 196930, "total_steps": 204665, "loss": 0.0, "lr": 8.691659872950951e-09, "epoch": 4.811032663132436, "percentage": 96.22, "elapsed_time": "4:15:17", "remaining_time": "0:10:01", "throughput": 8662.92, "total_tokens": 132696424} +{"current_steps": 196935, "total_steps": 204665, "loss": 0.0, "lr": 8.680444440387624e-09, "epoch": 4.811154813964283, "percentage": 96.22, "elapsed_time": "4:15:18", "remaining_time": "0:10:01", "throughput": 8662.91, "total_tokens": 132699304} +{"current_steps": 196940, "total_steps": 204665, "loss": 0.0, "lr": 8.669236216966913e-09, "epoch": 4.81127696479613, "percentage": 96.23, "elapsed_time": "4:15:18", "remaining_time": "0:10:00", "throughput": 8662.95, "total_tokens": 132702888} +{"current_steps": 196945, "total_steps": 204665, "loss": 0.0, "lr": 8.658035202770086e-09, "epoch": 4.811399115627977, "percentage": 96.23, "elapsed_time": "4:15:18", "remaining_time": "0:10:00", "throughput": 8663.0, "total_tokens": 132706728} +{"current_steps": 196950, "total_steps": 204665, "loss": 0.0, "lr": 8.646841397878634e-09, "epoch": 4.811521266459825, "percentage": 96.23, "elapsed_time": "4:15:19", "remaining_time": "0:10:00", "throughput": 8663.03, "total_tokens": 132710248} +{"current_steps": 196955, "total_steps": 204665, "loss": 0.0, "lr": 8.635654802374048e-09, "epoch": 4.8116434172916716, "percentage": 96.23, "elapsed_time": "4:15:19", "remaining_time": "0:09:59", "throughput": 8663.05, "total_tokens": 132713640} +{"current_steps": 196960, "total_steps": 204665, "loss": 0.0, "lr": 8.624475416337596e-09, "epoch": 4.811765568123519, "percentage": 96.24, "elapsed_time": "4:15:19", "remaining_time": "0:09:59", "throughput": 8663.08, "total_tokens": 132717096} +{"current_steps": 196965, "total_steps": 204665, "loss": 0.0, "lr": 8.613303239850544e-09, "epoch": 4.811887718955366, "percentage": 96.24, "elapsed_time": "4:15:20", "remaining_time": "0:09:58", "throughput": 8663.11, "total_tokens": 132720552} +{"current_steps": 196970, "total_steps": 204665, "loss": 0.0, "lr": 8.602138272994274e-09, "epoch": 4.812009869787214, "percentage": 96.24, "elapsed_time": "4:15:20", "remaining_time": "0:09:58", "throughput": 8663.12, "total_tokens": 132723624} +{"current_steps": 196975, "total_steps": 204665, "loss": 0.0436, "lr": 8.590980515849945e-09, "epoch": 4.81213202061906, "percentage": 96.24, "elapsed_time": "4:15:20", "remaining_time": "0:09:58", "throughput": 8663.16, "total_tokens": 132727336} +{"current_steps": 196980, "total_steps": 204665, "loss": 0.0, "lr": 8.579829968498486e-09, "epoch": 4.812254171450908, "percentage": 96.25, "elapsed_time": "4:15:21", "remaining_time": "0:09:57", "throughput": 8663.16, "total_tokens": 132730280} +{"current_steps": 196985, "total_steps": 204665, "loss": 0.0, "lr": 8.568686631021394e-09, "epoch": 4.812376322282755, "percentage": 96.25, "elapsed_time": "4:15:21", "remaining_time": "0:09:57", "throughput": 8663.16, "total_tokens": 132733288} +{"current_steps": 196990, "total_steps": 204665, "loss": 0.0, "lr": 8.557550503499378e-09, "epoch": 4.812498473114601, "percentage": 96.25, "elapsed_time": "4:15:21", "remaining_time": "0:09:56", "throughput": 8663.2, "total_tokens": 132736872} +{"current_steps": 196995, "total_steps": 204665, "loss": 0.0, "lr": 8.546421586013486e-09, "epoch": 4.812620623946449, "percentage": 96.25, "elapsed_time": "4:15:22", "remaining_time": "0:09:56", "throughput": 8663.22, "total_tokens": 132740264} +{"current_steps": 197000, "total_steps": 204665, "loss": 0.0, "lr": 8.535299878644653e-09, "epoch": 4.812742774778297, "percentage": 96.25, "elapsed_time": "4:15:22", "remaining_time": "0:09:56", "throughput": 8663.26, "total_tokens": 132743912} +{"current_steps": 197005, "total_steps": 204665, "loss": 0.0, "lr": 8.524185381473815e-09, "epoch": 4.8128649256101435, "percentage": 96.26, "elapsed_time": "4:15:22", "remaining_time": "0:09:55", "throughput": 8663.28, "total_tokens": 132747368} +{"current_steps": 197010, "total_steps": 204665, "loss": 0.0, "lr": 8.513078094581904e-09, "epoch": 4.81298707644199, "percentage": 96.26, "elapsed_time": "4:15:23", "remaining_time": "0:09:55", "throughput": 8663.29, "total_tokens": 132750376} +{"current_steps": 197015, "total_steps": 204665, "loss": 0.0, "lr": 8.501978018049528e-09, "epoch": 4.813109227273838, "percentage": 96.26, "elapsed_time": "4:15:23", "remaining_time": "0:09:55", "throughput": 8663.31, "total_tokens": 132753640} +{"current_steps": 197020, "total_steps": 204665, "loss": 0.0, "lr": 8.490885151957283e-09, "epoch": 4.813231378105685, "percentage": 96.26, "elapsed_time": "4:15:24", "remaining_time": "0:09:54", "throughput": 8663.37, "total_tokens": 132757736} +{"current_steps": 197025, "total_steps": 204665, "loss": 0.0, "lr": 8.47979949638622e-09, "epoch": 4.813353528937532, "percentage": 96.27, "elapsed_time": "4:15:24", "remaining_time": "0:09:54", "throughput": 8663.38, "total_tokens": 132761000} +{"current_steps": 197030, "total_steps": 204665, "loss": 0.0, "lr": 8.468721051416606e-09, "epoch": 4.813475679769379, "percentage": 96.27, "elapsed_time": "4:15:24", "remaining_time": "0:09:53", "throughput": 8663.41, "total_tokens": 132764392} +{"current_steps": 197035, "total_steps": 204665, "loss": 0.0, "lr": 8.457649817129153e-09, "epoch": 4.813597830601227, "percentage": 96.27, "elapsed_time": "4:15:25", "remaining_time": "0:09:53", "throughput": 8663.43, "total_tokens": 132767656} +{"current_steps": 197040, "total_steps": 204665, "loss": 0.0, "lr": 8.446585793604355e-09, "epoch": 4.813719981433073, "percentage": 96.27, "elapsed_time": "4:15:25", "remaining_time": "0:09:53", "throughput": 8663.45, "total_tokens": 132771048} +{"current_steps": 197045, "total_steps": 204665, "loss": 0.0, "lr": 8.435528980922812e-09, "epoch": 4.813842132264921, "percentage": 96.28, "elapsed_time": "4:15:25", "remaining_time": "0:09:52", "throughput": 8663.47, "total_tokens": 132774312} +{"current_steps": 197050, "total_steps": 204665, "loss": 0.0, "lr": 8.424479379164684e-09, "epoch": 4.813964283096768, "percentage": 96.28, "elapsed_time": "4:15:26", "remaining_time": "0:09:52", "throughput": 8663.48, "total_tokens": 132777448} +{"current_steps": 197055, "total_steps": 204665, "loss": 0.0, "lr": 8.41343698841035e-09, "epoch": 4.814086433928615, "percentage": 96.28, "elapsed_time": "4:15:26", "remaining_time": "0:09:51", "throughput": 8663.49, "total_tokens": 132780584} +{"current_steps": 197060, "total_steps": 204665, "loss": 0.0, "lr": 8.402401808740411e-09, "epoch": 4.814208584760462, "percentage": 96.28, "elapsed_time": "4:15:26", "remaining_time": "0:09:51", "throughput": 8663.52, "total_tokens": 132784168} +{"current_steps": 197065, "total_steps": 204665, "loss": 0.0, "lr": 8.391373840234805e-09, "epoch": 4.81433073559231, "percentage": 96.29, "elapsed_time": "4:15:27", "remaining_time": "0:09:51", "throughput": 8663.55, "total_tokens": 132787624} +{"current_steps": 197070, "total_steps": 204665, "loss": 0.0, "lr": 8.380353082973913e-09, "epoch": 4.8144528864241565, "percentage": 96.29, "elapsed_time": "4:15:27", "remaining_time": "0:09:50", "throughput": 8663.58, "total_tokens": 132791144} +{"current_steps": 197075, "total_steps": 204665, "loss": 0.0, "lr": 8.369339537037668e-09, "epoch": 4.814575037256004, "percentage": 96.29, "elapsed_time": "4:15:27", "remaining_time": "0:09:50", "throughput": 8663.6, "total_tokens": 132794472} +{"current_steps": 197080, "total_steps": 204665, "loss": 0.0, "lr": 8.358333202506451e-09, "epoch": 4.814697188087851, "percentage": 96.29, "elapsed_time": "4:15:28", "remaining_time": "0:09:49", "throughput": 8663.63, "total_tokens": 132797992} +{"current_steps": 197085, "total_steps": 204665, "loss": 0.0, "lr": 8.347334079459978e-09, "epoch": 4.814819338919698, "percentage": 96.3, "elapsed_time": "4:15:28", "remaining_time": "0:09:49", "throughput": 8663.66, "total_tokens": 132801512} +{"current_steps": 197090, "total_steps": 204665, "loss": 0.0, "lr": 8.336342167978516e-09, "epoch": 4.814941489751545, "percentage": 96.3, "elapsed_time": "4:15:28", "remaining_time": "0:09:49", "throughput": 8663.69, "total_tokens": 132805032} +{"current_steps": 197095, "total_steps": 204665, "loss": 0.0, "lr": 8.325357468142002e-09, "epoch": 4.815063640583393, "percentage": 96.3, "elapsed_time": "4:15:29", "remaining_time": "0:09:48", "throughput": 8663.73, "total_tokens": 132808744} +{"current_steps": 197100, "total_steps": 204665, "loss": 0.0012, "lr": 8.31437998003004e-09, "epoch": 4.81518579141524, "percentage": 96.3, "elapsed_time": "4:15:29", "remaining_time": "0:09:48", "throughput": 8663.75, "total_tokens": 132812136} +{"current_steps": 197105, "total_steps": 204665, "loss": 0.0, "lr": 8.303409703722786e-09, "epoch": 4.815307942247086, "percentage": 96.31, "elapsed_time": "4:15:29", "remaining_time": "0:09:47", "throughput": 8663.79, "total_tokens": 132815848} +{"current_steps": 197110, "total_steps": 204665, "loss": 0.0, "lr": 8.292446639299732e-09, "epoch": 4.815430093078934, "percentage": 96.31, "elapsed_time": "4:15:30", "remaining_time": "0:09:47", "throughput": 8663.8, "total_tokens": 132818920} +{"current_steps": 197115, "total_steps": 204665, "loss": 0.0006, "lr": 8.281490786840927e-09, "epoch": 4.815552243910781, "percentage": 96.31, "elapsed_time": "4:15:30", "remaining_time": "0:09:47", "throughput": 8663.82, "total_tokens": 132822312} +{"current_steps": 197120, "total_steps": 204665, "loss": 0.0, "lr": 8.270542146425751e-09, "epoch": 4.815674394742628, "percentage": 96.31, "elapsed_time": "4:15:31", "remaining_time": "0:09:46", "throughput": 8663.87, "total_tokens": 132826152} +{"current_steps": 197125, "total_steps": 204665, "loss": 0.0, "lr": 8.25960071813392e-09, "epoch": 4.815796545574475, "percentage": 96.32, "elapsed_time": "4:15:31", "remaining_time": "0:09:46", "throughput": 8663.9, "total_tokens": 132829672} +{"current_steps": 197130, "total_steps": 204665, "loss": 0.0, "lr": 8.248666502045032e-09, "epoch": 4.815918696406323, "percentage": 96.32, "elapsed_time": "4:15:31", "remaining_time": "0:09:46", "throughput": 8663.9, "total_tokens": 132832552} +{"current_steps": 197135, "total_steps": 204665, "loss": 0.0, "lr": 8.237739498238582e-09, "epoch": 4.8160408472381695, "percentage": 96.32, "elapsed_time": "4:15:32", "remaining_time": "0:09:45", "throughput": 8663.92, "total_tokens": 132835880} +{"current_steps": 197140, "total_steps": 204665, "loss": 0.0, "lr": 8.226819706794063e-09, "epoch": 4.816162998070017, "percentage": 96.32, "elapsed_time": "4:15:32", "remaining_time": "0:09:45", "throughput": 8663.93, "total_tokens": 132839080} +{"current_steps": 197145, "total_steps": 204665, "loss": 0.0, "lr": 8.215907127790856e-09, "epoch": 4.816285148901864, "percentage": 96.33, "elapsed_time": "4:15:32", "remaining_time": "0:09:44", "throughput": 8663.96, "total_tokens": 132842536} +{"current_steps": 197150, "total_steps": 204665, "loss": 0.0, "lr": 8.205001761308228e-09, "epoch": 4.8164072997337115, "percentage": 96.33, "elapsed_time": "4:15:33", "remaining_time": "0:09:44", "throughput": 8663.98, "total_tokens": 132845736} +{"current_steps": 197155, "total_steps": 204665, "loss": 0.0, "lr": 8.194103607425784e-09, "epoch": 4.816529450565558, "percentage": 96.33, "elapsed_time": "4:15:33", "remaining_time": "0:09:44", "throughput": 8664.0, "total_tokens": 132849192} +{"current_steps": 197160, "total_steps": 204665, "loss": 0.0, "lr": 8.183212666222461e-09, "epoch": 4.816651601397406, "percentage": 96.33, "elapsed_time": "4:15:33", "remaining_time": "0:09:43", "throughput": 8664.0, "total_tokens": 132851944} +{"current_steps": 197165, "total_steps": 204665, "loss": 0.0, "lr": 8.172328937777639e-09, "epoch": 4.816773752229253, "percentage": 96.34, "elapsed_time": "4:15:34", "remaining_time": "0:09:43", "throughput": 8664.02, "total_tokens": 132855272} +{"current_steps": 197170, "total_steps": 204665, "loss": 0.0, "lr": 8.161452422170367e-09, "epoch": 4.8168959030611, "percentage": 96.34, "elapsed_time": "4:15:34", "remaining_time": "0:09:42", "throughput": 8664.03, "total_tokens": 132858408} +{"current_steps": 197175, "total_steps": 204665, "loss": 0.0, "lr": 8.150583119479803e-09, "epoch": 4.817018053892947, "percentage": 96.34, "elapsed_time": "4:15:34", "remaining_time": "0:09:42", "throughput": 8664.05, "total_tokens": 132861736} +{"current_steps": 197180, "total_steps": 204665, "loss": 0.0, "lr": 8.139721029784996e-09, "epoch": 4.817140204724794, "percentage": 96.34, "elapsed_time": "4:15:35", "remaining_time": "0:09:42", "throughput": 8664.08, "total_tokens": 132865256} +{"current_steps": 197185, "total_steps": 204665, "loss": 0.0, "lr": 8.12886615316477e-09, "epoch": 4.817262355556641, "percentage": 96.35, "elapsed_time": "4:15:35", "remaining_time": "0:09:41", "throughput": 8664.11, "total_tokens": 132868712} +{"current_steps": 197190, "total_steps": 204665, "loss": 0.0, "lr": 8.118018489698396e-09, "epoch": 4.817384506388488, "percentage": 96.35, "elapsed_time": "4:15:35", "remaining_time": "0:09:41", "throughput": 8664.12, "total_tokens": 132871720} +{"current_steps": 197195, "total_steps": 204665, "loss": 0.0, "lr": 8.10717803946448e-09, "epoch": 4.817506657220336, "percentage": 96.35, "elapsed_time": "4:15:36", "remaining_time": "0:09:40", "throughput": 8664.13, "total_tokens": 132874856} +{"current_steps": 197200, "total_steps": 204665, "loss": 0.0, "lr": 8.096344802542066e-09, "epoch": 4.8176288080521825, "percentage": 96.35, "elapsed_time": "4:15:36", "remaining_time": "0:09:40", "throughput": 8664.15, "total_tokens": 132878184} +{"current_steps": 197205, "total_steps": 204665, "loss": 0.0, "lr": 8.085518779009648e-09, "epoch": 4.81775095888403, "percentage": 96.36, "elapsed_time": "4:15:36", "remaining_time": "0:09:40", "throughput": 8664.17, "total_tokens": 132881448} +{"current_steps": 197210, "total_steps": 204665, "loss": 0.0, "lr": 8.074699968946275e-09, "epoch": 4.817873109715877, "percentage": 96.36, "elapsed_time": "4:15:37", "remaining_time": "0:09:39", "throughput": 8664.22, "total_tokens": 132885288} +{"current_steps": 197215, "total_steps": 204665, "loss": 0.0, "lr": 8.063888372430439e-09, "epoch": 4.8179952605477245, "percentage": 96.36, "elapsed_time": "4:15:37", "remaining_time": "0:09:39", "throughput": 8664.27, "total_tokens": 132889256} +{"current_steps": 197220, "total_steps": 204665, "loss": 0.0, "lr": 8.053083989540743e-09, "epoch": 4.818117411379571, "percentage": 96.36, "elapsed_time": "4:15:37", "remaining_time": "0:09:39", "throughput": 8664.3, "total_tokens": 132892712} +{"current_steps": 197225, "total_steps": 204665, "loss": 0.0, "lr": 8.042286820355903e-09, "epoch": 4.818239562211419, "percentage": 96.36, "elapsed_time": "4:15:38", "remaining_time": "0:09:38", "throughput": 8664.32, "total_tokens": 132895976} +{"current_steps": 197230, "total_steps": 204665, "loss": 0.0, "lr": 8.031496864954302e-09, "epoch": 4.818361713043266, "percentage": 96.37, "elapsed_time": "4:15:38", "remaining_time": "0:09:38", "throughput": 8664.31, "total_tokens": 132898792} +{"current_steps": 197235, "total_steps": 204665, "loss": 0.0, "lr": 8.020714123414541e-09, "epoch": 4.818483863875113, "percentage": 96.37, "elapsed_time": "4:15:38", "remaining_time": "0:09:37", "throughput": 8664.34, "total_tokens": 132902248} +{"current_steps": 197240, "total_steps": 204665, "loss": 0.0, "lr": 8.009938595814892e-09, "epoch": 4.81860601470696, "percentage": 96.37, "elapsed_time": "4:15:39", "remaining_time": "0:09:37", "throughput": 8664.37, "total_tokens": 132905640} +{"current_steps": 197245, "total_steps": 204665, "loss": 0.0, "lr": 7.999170282233736e-09, "epoch": 4.818728165538808, "percentage": 96.37, "elapsed_time": "4:15:39", "remaining_time": "0:09:37", "throughput": 8664.39, "total_tokens": 132909032} +{"current_steps": 197250, "total_steps": 204665, "loss": 0.0, "lr": 7.988409182749567e-09, "epoch": 4.818850316370654, "percentage": 96.38, "elapsed_time": "4:15:40", "remaining_time": "0:09:36", "throughput": 8664.4, "total_tokens": 132912232} +{"current_steps": 197255, "total_steps": 204665, "loss": 0.0, "lr": 7.977655297440433e-09, "epoch": 4.818972467202501, "percentage": 96.38, "elapsed_time": "4:15:40", "remaining_time": "0:09:36", "throughput": 8664.44, "total_tokens": 132915752} +{"current_steps": 197260, "total_steps": 204665, "loss": 0.0, "lr": 7.966908626384605e-09, "epoch": 4.819094618034349, "percentage": 96.38, "elapsed_time": "4:15:40", "remaining_time": "0:09:35", "throughput": 8664.44, "total_tokens": 132918824} +{"current_steps": 197265, "total_steps": 204665, "loss": 0.0, "lr": 7.956169169660242e-09, "epoch": 4.819216768866196, "percentage": 96.38, "elapsed_time": "4:15:41", "remaining_time": "0:09:35", "throughput": 8664.46, "total_tokens": 132922024} +{"current_steps": 197270, "total_steps": 204665, "loss": 0.0, "lr": 7.945436927345395e-09, "epoch": 4.819338919698043, "percentage": 96.39, "elapsed_time": "4:15:41", "remaining_time": "0:09:35", "throughput": 8664.5, "total_tokens": 132925736} +{"current_steps": 197275, "total_steps": 204665, "loss": 0.0, "lr": 7.93471189951822e-09, "epoch": 4.81946107052989, "percentage": 96.39, "elapsed_time": "4:15:41", "remaining_time": "0:09:34", "throughput": 8664.55, "total_tokens": 132929576} +{"current_steps": 197280, "total_steps": 204665, "loss": 0.0, "lr": 7.923994086256657e-09, "epoch": 4.819583221361738, "percentage": 96.39, "elapsed_time": "4:15:42", "remaining_time": "0:09:34", "throughput": 8664.56, "total_tokens": 132932776} +{"current_steps": 197285, "total_steps": 204665, "loss": 0.0, "lr": 7.913283487638645e-09, "epoch": 4.819705372193584, "percentage": 96.39, "elapsed_time": "4:15:42", "remaining_time": "0:09:33", "throughput": 8664.58, "total_tokens": 132936104} +{"current_steps": 197290, "total_steps": 204665, "loss": 0.0, "lr": 7.902580103742008e-09, "epoch": 4.819827523025432, "percentage": 96.4, "elapsed_time": "4:15:42", "remaining_time": "0:09:33", "throughput": 8664.62, "total_tokens": 132939816} +{"current_steps": 197295, "total_steps": 204665, "loss": 0.0, "lr": 7.891883934644794e-09, "epoch": 4.819949673857279, "percentage": 96.4, "elapsed_time": "4:15:43", "remaining_time": "0:09:33", "throughput": 8664.65, "total_tokens": 132943336} +{"current_steps": 197300, "total_steps": 204665, "loss": 0.0, "lr": 7.8811949804245e-09, "epoch": 4.820071824689126, "percentage": 96.4, "elapsed_time": "4:15:43", "remaining_time": "0:09:32", "throughput": 8664.67, "total_tokens": 132946536} +{"current_steps": 197305, "total_steps": 204665, "loss": 0.0, "lr": 7.87051324115906e-09, "epoch": 4.820193975520973, "percentage": 96.4, "elapsed_time": "4:15:43", "remaining_time": "0:09:32", "throughput": 8664.69, "total_tokens": 132949864} +{"current_steps": 197310, "total_steps": 204665, "loss": 0.0, "lr": 7.859838716926081e-09, "epoch": 4.820316126352821, "percentage": 96.41, "elapsed_time": "4:15:44", "remaining_time": "0:09:31", "throughput": 8664.7, "total_tokens": 132953064} +{"current_steps": 197315, "total_steps": 204665, "loss": 0.0, "lr": 7.849171407803168e-09, "epoch": 4.820438277184667, "percentage": 96.41, "elapsed_time": "4:15:44", "remaining_time": "0:09:31", "throughput": 8664.7, "total_tokens": 132956008} +{"current_steps": 197320, "total_steps": 204665, "loss": 0.0, "lr": 7.838511313868035e-09, "epoch": 4.820560428016515, "percentage": 96.41, "elapsed_time": "4:15:44", "remaining_time": "0:09:31", "throughput": 8664.73, "total_tokens": 132959400} +{"current_steps": 197325, "total_steps": 204665, "loss": 0.0, "lr": 7.827858435198176e-09, "epoch": 4.820682578848362, "percentage": 96.41, "elapsed_time": "4:15:45", "remaining_time": "0:09:30", "throughput": 8664.75, "total_tokens": 132962728} +{"current_steps": 197330, "total_steps": 204665, "loss": 0.0, "lr": 7.817212771870863e-09, "epoch": 4.8208047296802095, "percentage": 96.42, "elapsed_time": "4:15:45", "remaining_time": "0:09:30", "throughput": 8664.78, "total_tokens": 132966312} +{"current_steps": 197335, "total_steps": 204665, "loss": 0.0, "lr": 7.806574323963699e-09, "epoch": 4.820926880512056, "percentage": 96.42, "elapsed_time": "4:15:45", "remaining_time": "0:09:30", "throughput": 8664.82, "total_tokens": 132970024} +{"current_steps": 197340, "total_steps": 204665, "loss": 0.0, "lr": 7.795943091553847e-09, "epoch": 4.821049031343904, "percentage": 96.42, "elapsed_time": "4:15:46", "remaining_time": "0:09:29", "throughput": 8664.84, "total_tokens": 132973224} +{"current_steps": 197345, "total_steps": 204665, "loss": 0.0, "lr": 7.78531907471891e-09, "epoch": 4.821171182175751, "percentage": 96.42, "elapsed_time": "4:15:46", "remaining_time": "0:09:29", "throughput": 8664.86, "total_tokens": 132976680} +{"current_steps": 197350, "total_steps": 204665, "loss": 0.0, "lr": 7.774702273535937e-09, "epoch": 4.821293333007597, "percentage": 96.43, "elapsed_time": "4:15:47", "remaining_time": "0:09:28", "throughput": 8664.9, "total_tokens": 132980264} +{"current_steps": 197355, "total_steps": 204665, "loss": 0.0, "lr": 7.764092688082313e-09, "epoch": 4.821415483839445, "percentage": 96.43, "elapsed_time": "4:15:47", "remaining_time": "0:09:28", "throughput": 8664.94, "total_tokens": 132984040} +{"current_steps": 197360, "total_steps": 204665, "loss": 0.0, "lr": 7.753490318434975e-09, "epoch": 4.821537634671293, "percentage": 96.43, "elapsed_time": "4:15:47", "remaining_time": "0:09:28", "throughput": 8664.97, "total_tokens": 132987560} +{"current_steps": 197365, "total_steps": 204665, "loss": 0.0, "lr": 7.742895164671303e-09, "epoch": 4.821659785503139, "percentage": 96.43, "elapsed_time": "4:15:48", "remaining_time": "0:09:27", "throughput": 8665.0, "total_tokens": 132990952} +{"current_steps": 197370, "total_steps": 204665, "loss": 0.0, "lr": 7.732307226868017e-09, "epoch": 4.821781936334986, "percentage": 96.44, "elapsed_time": "4:15:48", "remaining_time": "0:09:27", "throughput": 8665.0, "total_tokens": 132993960} +{"current_steps": 197375, "total_steps": 204665, "loss": 0.0004, "lr": 7.721726505102277e-09, "epoch": 4.821904087166834, "percentage": 96.44, "elapsed_time": "4:15:48", "remaining_time": "0:09:26", "throughput": 8665.03, "total_tokens": 132997288} +{"current_steps": 197380, "total_steps": 204665, "loss": 0.0, "lr": 7.711152999451132e-09, "epoch": 4.8220262379986805, "percentage": 96.44, "elapsed_time": "4:15:49", "remaining_time": "0:09:26", "throughput": 8665.07, "total_tokens": 133001192} +{"current_steps": 197385, "total_steps": 204665, "loss": 0.0001, "lr": 7.700586709991297e-09, "epoch": 4.822148388830528, "percentage": 96.44, "elapsed_time": "4:15:49", "remaining_time": "0:09:26", "throughput": 8665.1, "total_tokens": 133004520} +{"current_steps": 197390, "total_steps": 204665, "loss": 0.0, "lr": 7.690027636799712e-09, "epoch": 4.822270539662375, "percentage": 96.45, "elapsed_time": "4:15:49", "remaining_time": "0:09:25", "throughput": 8665.14, "total_tokens": 133008232} +{"current_steps": 197395, "total_steps": 204665, "loss": 0.0, "lr": 7.679475779953093e-09, "epoch": 4.8223926904942225, "percentage": 96.45, "elapsed_time": "4:15:50", "remaining_time": "0:09:25", "throughput": 8665.15, "total_tokens": 133011432} +{"current_steps": 197400, "total_steps": 204665, "loss": 0.0, "lr": 7.668931139528267e-09, "epoch": 4.822514841326069, "percentage": 96.45, "elapsed_time": "4:15:50", "remaining_time": "0:09:24", "throughput": 8665.19, "total_tokens": 133015080} +{"current_steps": 197405, "total_steps": 204665, "loss": 0.0, "lr": 7.658393715601951e-09, "epoch": 4.822636992157917, "percentage": 96.45, "elapsed_time": "4:15:50", "remaining_time": "0:09:24", "throughput": 8665.23, "total_tokens": 133018792} +{"current_steps": 197410, "total_steps": 204665, "loss": 0.0, "lr": 7.64786350825064e-09, "epoch": 4.822759142989764, "percentage": 96.46, "elapsed_time": "4:15:51", "remaining_time": "0:09:24", "throughput": 8665.26, "total_tokens": 133022248} +{"current_steps": 197415, "total_steps": 204665, "loss": 0.0, "lr": 7.637340517551049e-09, "epoch": 4.822881293821611, "percentage": 96.46, "elapsed_time": "4:15:51", "remaining_time": "0:09:23", "throughput": 8665.29, "total_tokens": 133025896} +{"current_steps": 197420, "total_steps": 204665, "loss": 0.0, "lr": 7.626824743579564e-09, "epoch": 4.823003444653458, "percentage": 96.46, "elapsed_time": "4:15:51", "remaining_time": "0:09:23", "throughput": 8665.35, "total_tokens": 133029864} +{"current_steps": 197425, "total_steps": 204665, "loss": 0.0, "lr": 7.616316186412675e-09, "epoch": 4.823125595485306, "percentage": 96.46, "elapsed_time": "4:15:52", "remaining_time": "0:09:23", "throughput": 8665.38, "total_tokens": 133033448} +{"current_steps": 197430, "total_steps": 204665, "loss": 0.0, "lr": 7.60581484612699e-09, "epoch": 4.823247746317152, "percentage": 96.46, "elapsed_time": "4:15:52", "remaining_time": "0:09:22", "throughput": 8665.42, "total_tokens": 133037224} +{"current_steps": 197435, "total_steps": 204665, "loss": 0.0, "lr": 7.59532072279867e-09, "epoch": 4.823369897149, "percentage": 96.47, "elapsed_time": "4:15:53", "remaining_time": "0:09:22", "throughput": 8665.47, "total_tokens": 133041000} +{"current_steps": 197440, "total_steps": 204665, "loss": 0.0, "lr": 7.5848338165041e-09, "epoch": 4.823492047980847, "percentage": 96.47, "elapsed_time": "4:15:53", "remaining_time": "0:09:21", "throughput": 8665.49, "total_tokens": 133044392} +{"current_steps": 197445, "total_steps": 204665, "loss": 0.0, "lr": 7.574354127319548e-09, "epoch": 4.8236141988126935, "percentage": 96.47, "elapsed_time": "4:15:53", "remaining_time": "0:09:21", "throughput": 8665.53, "total_tokens": 133047976} +{"current_steps": 197450, "total_steps": 204665, "loss": 0.0, "lr": 7.56388165532118e-09, "epoch": 4.823736349644541, "percentage": 96.47, "elapsed_time": "4:15:54", "remaining_time": "0:09:21", "throughput": 8665.54, "total_tokens": 133051240} +{"current_steps": 197455, "total_steps": 204665, "loss": 0.0, "lr": 7.553416400585267e-09, "epoch": 4.823858500476388, "percentage": 96.48, "elapsed_time": "4:15:54", "remaining_time": "0:09:20", "throughput": 8665.56, "total_tokens": 133054504} +{"current_steps": 197460, "total_steps": 204665, "loss": 0.0, "lr": 7.542958363187746e-09, "epoch": 4.8239806513082355, "percentage": 96.48, "elapsed_time": "4:15:54", "remaining_time": "0:09:20", "throughput": 8665.59, "total_tokens": 133057896} +{"current_steps": 197465, "total_steps": 204665, "loss": 0.0, "lr": 7.532507543204891e-09, "epoch": 4.824102802140082, "percentage": 96.48, "elapsed_time": "4:15:55", "remaining_time": "0:09:19", "throughput": 8665.59, "total_tokens": 133060904} +{"current_steps": 197470, "total_steps": 204665, "loss": 0.0, "lr": 7.522063940712531e-09, "epoch": 4.82422495297193, "percentage": 96.48, "elapsed_time": "4:15:55", "remaining_time": "0:09:19", "throughput": 8665.61, "total_tokens": 133064296} +{"current_steps": 197475, "total_steps": 204665, "loss": 0.0, "lr": 7.511627555786715e-09, "epoch": 4.824347103803777, "percentage": 96.49, "elapsed_time": "4:15:55", "remaining_time": "0:09:19", "throughput": 8665.63, "total_tokens": 133067624} +{"current_steps": 197480, "total_steps": 204665, "loss": 0.0, "lr": 7.50119838850316e-09, "epoch": 4.824469254635624, "percentage": 96.49, "elapsed_time": "4:15:56", "remaining_time": "0:09:18", "throughput": 8665.69, "total_tokens": 133071720} +{"current_steps": 197485, "total_steps": 204665, "loss": 0.0, "lr": 7.490776438937918e-09, "epoch": 4.824591405467471, "percentage": 96.49, "elapsed_time": "4:15:56", "remaining_time": "0:09:18", "throughput": 8665.73, "total_tokens": 133075304} +{"current_steps": 197490, "total_steps": 204665, "loss": 0.0, "lr": 7.480361707166705e-09, "epoch": 4.824713556299319, "percentage": 96.49, "elapsed_time": "4:15:56", "remaining_time": "0:09:17", "throughput": 8665.74, "total_tokens": 133078568} +{"current_steps": 197495, "total_steps": 204665, "loss": 0.0, "lr": 7.469954193265238e-09, "epoch": 4.824835707131165, "percentage": 96.5, "elapsed_time": "4:15:57", "remaining_time": "0:09:17", "throughput": 8665.77, "total_tokens": 133082024} +{"current_steps": 197500, "total_steps": 204665, "loss": 0.0, "lr": 7.459553897309346e-09, "epoch": 4.824957857963013, "percentage": 96.5, "elapsed_time": "4:15:57", "remaining_time": "0:09:17", "throughput": 8665.8, "total_tokens": 133085544} +{"current_steps": 197505, "total_steps": 204665, "loss": 0.0, "lr": 7.4491608193744115e-09, "epoch": 4.82508000879486, "percentage": 96.5, "elapsed_time": "4:15:57", "remaining_time": "0:09:16", "throughput": 8665.81, "total_tokens": 133088680} +{"current_steps": 197510, "total_steps": 204665, "loss": 0.0, "lr": 7.438774959536154e-09, "epoch": 4.825202159626707, "percentage": 96.5, "elapsed_time": "4:15:58", "remaining_time": "0:09:16", "throughput": 8665.82, "total_tokens": 133091752} +{"current_steps": 197515, "total_steps": 204665, "loss": 0.0, "lr": 7.428396317870067e-09, "epoch": 4.825324310458554, "percentage": 96.51, "elapsed_time": "4:15:58", "remaining_time": "0:09:15", "throughput": 8665.83, "total_tokens": 133094888} +{"current_steps": 197520, "total_steps": 204665, "loss": 0.0, "lr": 7.4180248944517575e-09, "epoch": 4.825446461290401, "percentage": 96.51, "elapsed_time": "4:15:58", "remaining_time": "0:09:15", "throughput": 8665.85, "total_tokens": 133098152} +{"current_steps": 197525, "total_steps": 204665, "loss": 0.0, "lr": 7.407660689356388e-09, "epoch": 4.8255686121222485, "percentage": 96.51, "elapsed_time": "4:15:59", "remaining_time": "0:09:15", "throughput": 8665.86, "total_tokens": 133101224} +{"current_steps": 197530, "total_steps": 204665, "loss": 0.0, "lr": 7.397303702659674e-09, "epoch": 4.825690762954096, "percentage": 96.51, "elapsed_time": "4:15:59", "remaining_time": "0:09:14", "throughput": 8665.89, "total_tokens": 133104744} +{"current_steps": 197535, "total_steps": 204665, "loss": 0.0, "lr": 7.3869539344365575e-09, "epoch": 4.825812913785943, "percentage": 96.52, "elapsed_time": "4:15:59", "remaining_time": "0:09:14", "throughput": 8665.91, "total_tokens": 133108072} +{"current_steps": 197540, "total_steps": 204665, "loss": 0.0, "lr": 7.376611384762643e-09, "epoch": 4.82593506461779, "percentage": 96.52, "elapsed_time": "4:16:00", "remaining_time": "0:09:14", "throughput": 8665.95, "total_tokens": 133111784} +{"current_steps": 197545, "total_steps": 204665, "loss": 0.0001, "lr": 7.366276053712983e-09, "epoch": 4.826057215449637, "percentage": 96.52, "elapsed_time": "4:16:00", "remaining_time": "0:09:13", "throughput": 8665.97, "total_tokens": 133115176} +{"current_steps": 197550, "total_steps": 204665, "loss": 0.0, "lr": 7.355947941362628e-09, "epoch": 4.826179366281484, "percentage": 96.52, "elapsed_time": "4:16:01", "remaining_time": "0:09:13", "throughput": 8665.99, "total_tokens": 133118312} +{"current_steps": 197555, "total_steps": 204665, "loss": 0.0, "lr": 7.345627047786851e-09, "epoch": 4.826301517113332, "percentage": 96.53, "elapsed_time": "4:16:01", "remaining_time": "0:09:12", "throughput": 8666.02, "total_tokens": 133121960} +{"current_steps": 197560, "total_steps": 204665, "loss": 0.0, "lr": 7.335313373060703e-09, "epoch": 4.826423667945178, "percentage": 96.53, "elapsed_time": "4:16:01", "remaining_time": "0:09:12", "throughput": 8666.03, "total_tokens": 133125096} +{"current_steps": 197565, "total_steps": 204665, "loss": 0.0, "lr": 7.325006917259124e-09, "epoch": 4.826545818777026, "percentage": 96.53, "elapsed_time": "4:16:02", "remaining_time": "0:09:12", "throughput": 8666.06, "total_tokens": 133128488} +{"current_steps": 197570, "total_steps": 204665, "loss": 0.0, "lr": 7.3147076804571665e-09, "epoch": 4.826667969608873, "percentage": 96.53, "elapsed_time": "4:16:02", "remaining_time": "0:09:11", "throughput": 8666.09, "total_tokens": 133132072} +{"current_steps": 197575, "total_steps": 204665, "loss": 0.0, "lr": 7.304415662729546e-09, "epoch": 4.82679012044072, "percentage": 96.54, "elapsed_time": "4:16:02", "remaining_time": "0:09:11", "throughput": 8666.11, "total_tokens": 133135400} +{"current_steps": 197580, "total_steps": 204665, "loss": 0.0, "lr": 7.294130864151315e-09, "epoch": 4.826912271272567, "percentage": 96.54, "elapsed_time": "4:16:03", "remaining_time": "0:09:10", "throughput": 8666.14, "total_tokens": 133138856} +{"current_steps": 197585, "total_steps": 204665, "loss": 0.0, "lr": 7.2838532847971926e-09, "epoch": 4.827034422104415, "percentage": 96.54, "elapsed_time": "4:16:03", "remaining_time": "0:09:10", "throughput": 8666.15, "total_tokens": 133141992} +{"current_steps": 197590, "total_steps": 204665, "loss": 0.0, "lr": 7.273582924741783e-09, "epoch": 4.8271565729362615, "percentage": 96.54, "elapsed_time": "4:16:03", "remaining_time": "0:09:10", "throughput": 8666.16, "total_tokens": 133145256} +{"current_steps": 197595, "total_steps": 204665, "loss": 0.0, "lr": 7.263319784059918e-09, "epoch": 4.827278723768109, "percentage": 96.55, "elapsed_time": "4:16:04", "remaining_time": "0:09:09", "throughput": 8666.19, "total_tokens": 133148712} +{"current_steps": 197600, "total_steps": 204665, "loss": 0.0, "lr": 7.253063862826203e-09, "epoch": 4.827400874599956, "percentage": 96.55, "elapsed_time": "4:16:04", "remaining_time": "0:09:09", "throughput": 8666.22, "total_tokens": 133152104} +{"current_steps": 197605, "total_steps": 204665, "loss": 0.0, "lr": 7.242815161115246e-09, "epoch": 4.827523025431804, "percentage": 96.55, "elapsed_time": "4:16:04", "remaining_time": "0:09:08", "throughput": 8666.24, "total_tokens": 133155432} +{"current_steps": 197610, "total_steps": 204665, "loss": 0.0256, "lr": 7.232573679001541e-09, "epoch": 4.82764517626365, "percentage": 96.55, "elapsed_time": "4:16:05", "remaining_time": "0:09:08", "throughput": 8666.26, "total_tokens": 133158760} +{"current_steps": 197615, "total_steps": 204665, "loss": 0.0762, "lr": 7.222339416559587e-09, "epoch": 4.827767327095497, "percentage": 96.56, "elapsed_time": "4:16:05", "remaining_time": "0:09:08", "throughput": 8666.33, "total_tokens": 133163176} +{"current_steps": 197620, "total_steps": 204665, "loss": 0.0001, "lr": 7.212112373863877e-09, "epoch": 4.827889477927345, "percentage": 96.56, "elapsed_time": "4:16:05", "remaining_time": "0:09:07", "throughput": 8666.34, "total_tokens": 133166312} +{"current_steps": 197625, "total_steps": 204665, "loss": 0.0, "lr": 7.201892550988686e-09, "epoch": 4.828011628759192, "percentage": 96.56, "elapsed_time": "4:16:06", "remaining_time": "0:09:07", "throughput": 8666.36, "total_tokens": 133169640} +{"current_steps": 197630, "total_steps": 204665, "loss": 0.0, "lr": 7.191679948008289e-09, "epoch": 4.828133779591039, "percentage": 96.56, "elapsed_time": "4:16:06", "remaining_time": "0:09:07", "throughput": 8666.39, "total_tokens": 133173224} +{"current_steps": 197635, "total_steps": 204665, "loss": 0.0, "lr": 7.1814745649971805e-09, "epoch": 4.828255930422886, "percentage": 96.57, "elapsed_time": "4:16:06", "remaining_time": "0:09:06", "throughput": 8666.42, "total_tokens": 133176680} +{"current_steps": 197640, "total_steps": 204665, "loss": 0.0, "lr": 7.171276402029191e-09, "epoch": 4.8283780812547334, "percentage": 96.57, "elapsed_time": "4:16:07", "remaining_time": "0:09:06", "throughput": 8666.41, "total_tokens": 133179496} +{"current_steps": 197645, "total_steps": 204665, "loss": 0.0001, "lr": 7.161085459178928e-09, "epoch": 4.82850023208658, "percentage": 96.57, "elapsed_time": "4:16:07", "remaining_time": "0:09:05", "throughput": 8666.43, "total_tokens": 133182760} +{"current_steps": 197650, "total_steps": 204665, "loss": 0.0, "lr": 7.150901736520221e-09, "epoch": 4.828622382918428, "percentage": 96.57, "elapsed_time": "4:16:08", "remaining_time": "0:09:05", "throughput": 8666.46, "total_tokens": 133186216} +{"current_steps": 197655, "total_steps": 204665, "loss": 0.0, "lr": 7.140725234127231e-09, "epoch": 4.828744533750275, "percentage": 96.57, "elapsed_time": "4:16:08", "remaining_time": "0:09:05", "throughput": 8666.47, "total_tokens": 133189416} +{"current_steps": 197660, "total_steps": 204665, "loss": 0.0, "lr": 7.130555952073792e-09, "epoch": 4.828866684582122, "percentage": 96.58, "elapsed_time": "4:16:08", "remaining_time": "0:09:04", "throughput": 8666.54, "total_tokens": 133193704} +{"current_steps": 197665, "total_steps": 204665, "loss": 0.0, "lr": 7.120393890434173e-09, "epoch": 4.828988835413969, "percentage": 96.58, "elapsed_time": "4:16:09", "remaining_time": "0:09:04", "throughput": 8666.61, "total_tokens": 133197992} +{"current_steps": 197670, "total_steps": 204665, "loss": 0.0, "lr": 7.1102390492819855e-09, "epoch": 4.829110986245817, "percentage": 96.58, "elapsed_time": "4:16:09", "remaining_time": "0:09:03", "throughput": 8666.65, "total_tokens": 133201640} +{"current_steps": 197675, "total_steps": 204665, "loss": 0.0, "lr": 7.100091428691279e-09, "epoch": 4.829233137077663, "percentage": 96.58, "elapsed_time": "4:16:09", "remaining_time": "0:09:03", "throughput": 8666.7, "total_tokens": 133205544} +{"current_steps": 197680, "total_steps": 204665, "loss": 0.0, "lr": 7.089951028735663e-09, "epoch": 4.829355287909511, "percentage": 96.59, "elapsed_time": "4:16:10", "remaining_time": "0:09:03", "throughput": 8666.72, "total_tokens": 133208872} +{"current_steps": 197685, "total_steps": 204665, "loss": 0.0, "lr": 7.079817849489078e-09, "epoch": 4.829477438741358, "percentage": 96.59, "elapsed_time": "4:16:10", "remaining_time": "0:09:02", "throughput": 8666.73, "total_tokens": 133212008} +{"current_steps": 197690, "total_steps": 204665, "loss": 0.0, "lr": 7.069691891025132e-09, "epoch": 4.829599589573205, "percentage": 96.59, "elapsed_time": "4:16:10", "remaining_time": "0:09:02", "throughput": 8666.76, "total_tokens": 133215464} +{"current_steps": 197695, "total_steps": 204665, "loss": 0.0, "lr": 7.05957315341732e-09, "epoch": 4.829721740405052, "percentage": 96.59, "elapsed_time": "4:16:11", "remaining_time": "0:09:01", "throughput": 8666.77, "total_tokens": 133218536} +{"current_steps": 197700, "total_steps": 204665, "loss": 0.0, "lr": 7.049461636739473e-09, "epoch": 4.8298438912369, "percentage": 96.6, "elapsed_time": "4:16:11", "remaining_time": "0:09:01", "throughput": 8666.77, "total_tokens": 133221544} +{"current_steps": 197705, "total_steps": 204665, "loss": 0.0, "lr": 7.039357341064978e-09, "epoch": 4.8299660420687465, "percentage": 96.6, "elapsed_time": "4:16:11", "remaining_time": "0:09:01", "throughput": 8666.78, "total_tokens": 133224744} +{"current_steps": 197710, "total_steps": 204665, "loss": 0.0, "lr": 7.0292602664673295e-09, "epoch": 4.830088192900593, "percentage": 96.6, "elapsed_time": "4:16:12", "remaining_time": "0:09:00", "throughput": 8666.8, "total_tokens": 133227944} +{"current_steps": 197715, "total_steps": 204665, "loss": 0.0174, "lr": 7.019170413020026e-09, "epoch": 4.830210343732441, "percentage": 96.6, "elapsed_time": "4:16:12", "remaining_time": "0:09:00", "throughput": 8666.84, "total_tokens": 133231784} +{"current_steps": 197720, "total_steps": 204665, "loss": 0.0, "lr": 7.009087780796452e-09, "epoch": 4.8303324945642885, "percentage": 96.61, "elapsed_time": "4:16:12", "remaining_time": "0:08:59", "throughput": 8666.86, "total_tokens": 133235112} +{"current_steps": 197725, "total_steps": 204665, "loss": 0.0, "lr": 6.999012369869773e-09, "epoch": 4.830454645396135, "percentage": 96.61, "elapsed_time": "4:16:13", "remaining_time": "0:08:59", "throughput": 8666.89, "total_tokens": 133238632} +{"current_steps": 197730, "total_steps": 204665, "loss": 0.0, "lr": 6.988944180313372e-09, "epoch": 4.830576796227982, "percentage": 96.61, "elapsed_time": "4:16:13", "remaining_time": "0:08:59", "throughput": 8666.91, "total_tokens": 133241896} +{"current_steps": 197735, "total_steps": 204665, "loss": 0.0, "lr": 6.978883212200526e-09, "epoch": 4.83069894705983, "percentage": 96.61, "elapsed_time": "4:16:13", "remaining_time": "0:08:58", "throughput": 8666.92, "total_tokens": 133244904} +{"current_steps": 197740, "total_steps": 204665, "loss": 0.0, "lr": 6.968829465604287e-09, "epoch": 4.830821097891676, "percentage": 96.62, "elapsed_time": "4:16:14", "remaining_time": "0:08:58", "throughput": 8666.91, "total_tokens": 133247656} +{"current_steps": 197745, "total_steps": 204665, "loss": 0.0, "lr": 6.9587829405978184e-09, "epoch": 4.830943248723524, "percentage": 96.62, "elapsed_time": "4:16:14", "remaining_time": "0:08:58", "throughput": 8666.93, "total_tokens": 133250984} +{"current_steps": 197750, "total_steps": 204665, "loss": 0.0, "lr": 6.948743637254173e-09, "epoch": 4.831065399555371, "percentage": 96.62, "elapsed_time": "4:16:15", "remaining_time": "0:08:57", "throughput": 8667.0, "total_tokens": 133255208} +{"current_steps": 197755, "total_steps": 204665, "loss": 0.0, "lr": 6.938711555646293e-09, "epoch": 4.831187550387218, "percentage": 96.62, "elapsed_time": "4:16:15", "remaining_time": "0:08:57", "throughput": 8667.02, "total_tokens": 133258600} +{"current_steps": 197760, "total_steps": 204665, "loss": 0.0009, "lr": 6.928686695847341e-09, "epoch": 4.831309701219065, "percentage": 96.63, "elapsed_time": "4:16:15", "remaining_time": "0:08:56", "throughput": 8667.05, "total_tokens": 133262120} +{"current_steps": 197765, "total_steps": 204665, "loss": 0.0, "lr": 6.918669057929927e-09, "epoch": 4.831431852050913, "percentage": 96.63, "elapsed_time": "4:16:16", "remaining_time": "0:08:56", "throughput": 8667.07, "total_tokens": 133265448} +{"current_steps": 197770, "total_steps": 204665, "loss": 0.0, "lr": 6.908658641967102e-09, "epoch": 4.8315540028827595, "percentage": 96.63, "elapsed_time": "4:16:16", "remaining_time": "0:08:56", "throughput": 8667.09, "total_tokens": 133268584} +{"current_steps": 197775, "total_steps": 204665, "loss": 0.0, "lr": 6.8986554480316985e-09, "epoch": 4.831676153714607, "percentage": 96.63, "elapsed_time": "4:16:16", "remaining_time": "0:08:55", "throughput": 8667.11, "total_tokens": 133271848} +{"current_steps": 197780, "total_steps": 204665, "loss": 0.0, "lr": 6.888659476196323e-09, "epoch": 4.831798304546454, "percentage": 96.64, "elapsed_time": "4:16:17", "remaining_time": "0:08:55", "throughput": 8667.16, "total_tokens": 133275880} +{"current_steps": 197785, "total_steps": 204665, "loss": 0.0, "lr": 6.878670726533808e-09, "epoch": 4.8319204553783015, "percentage": 96.64, "elapsed_time": "4:16:17", "remaining_time": "0:08:54", "throughput": 8667.21, "total_tokens": 133279656} +{"current_steps": 197790, "total_steps": 204665, "loss": 0.0, "lr": 6.868689199116651e-09, "epoch": 4.832042606210148, "percentage": 96.64, "elapsed_time": "4:16:17", "remaining_time": "0:08:54", "throughput": 8667.24, "total_tokens": 133283112} +{"current_steps": 197795, "total_steps": 204665, "loss": 0.0, "lr": 6.85871489401757e-09, "epoch": 4.832164757041996, "percentage": 96.64, "elapsed_time": "4:16:18", "remaining_time": "0:08:54", "throughput": 8667.24, "total_tokens": 133285992} +{"current_steps": 197800, "total_steps": 204665, "loss": 0.0, "lr": 6.8487478113089524e-09, "epoch": 4.832286907873843, "percentage": 96.65, "elapsed_time": "4:16:18", "remaining_time": "0:08:53", "throughput": 8667.27, "total_tokens": 133289512} +{"current_steps": 197805, "total_steps": 204665, "loss": 0.0, "lr": 6.838787951063407e-09, "epoch": 4.832409058705689, "percentage": 96.65, "elapsed_time": "4:16:18", "remaining_time": "0:08:53", "throughput": 8667.28, "total_tokens": 133292712} +{"current_steps": 197810, "total_steps": 204665, "loss": 0.0, "lr": 6.8288353133533205e-09, "epoch": 4.832531209537537, "percentage": 96.65, "elapsed_time": "4:16:19", "remaining_time": "0:08:52", "throughput": 8667.32, "total_tokens": 133296360} +{"current_steps": 197815, "total_steps": 204665, "loss": 0.0, "lr": 6.818889898250968e-09, "epoch": 4.832653360369384, "percentage": 96.65, "elapsed_time": "4:16:19", "remaining_time": "0:08:52", "throughput": 8667.35, "total_tokens": 133299752} +{"current_steps": 197820, "total_steps": 204665, "loss": 0.0019, "lr": 6.8089517058289584e-09, "epoch": 4.832775511201231, "percentage": 96.66, "elapsed_time": "4:16:19", "remaining_time": "0:08:52", "throughput": 8667.38, "total_tokens": 133303272} +{"current_steps": 197825, "total_steps": 204665, "loss": 0.0, "lr": 6.7990207361593445e-09, "epoch": 4.832897662033078, "percentage": 96.66, "elapsed_time": "4:16:20", "remaining_time": "0:08:51", "throughput": 8667.4, "total_tokens": 133306664} +{"current_steps": 197830, "total_steps": 204665, "loss": 0.0, "lr": 6.789096989314291e-09, "epoch": 4.833019812864926, "percentage": 96.66, "elapsed_time": "4:16:20", "remaining_time": "0:08:51", "throughput": 8667.44, "total_tokens": 133310312} +{"current_steps": 197835, "total_steps": 204665, "loss": 0.0, "lr": 6.7791804653661855e-09, "epoch": 4.8331419636967725, "percentage": 96.66, "elapsed_time": "4:16:20", "remaining_time": "0:08:51", "throughput": 8667.48, "total_tokens": 133313960} +{"current_steps": 197840, "total_steps": 204665, "loss": 0.0, "lr": 6.769271164386969e-09, "epoch": 4.83326411452862, "percentage": 96.67, "elapsed_time": "4:16:21", "remaining_time": "0:08:50", "throughput": 8667.48, "total_tokens": 133316968} +{"current_steps": 197845, "total_steps": 204665, "loss": 0.0001, "lr": 6.759369086448696e-09, "epoch": 4.833386265360467, "percentage": 96.67, "elapsed_time": "4:16:21", "remaining_time": "0:08:50", "throughput": 8667.48, "total_tokens": 133319848} +{"current_steps": 197850, "total_steps": 204665, "loss": 0.0, "lr": 6.749474231623531e-09, "epoch": 4.8335084161923145, "percentage": 96.67, "elapsed_time": "4:16:21", "remaining_time": "0:08:49", "throughput": 8667.49, "total_tokens": 133323048} +{"current_steps": 197855, "total_steps": 204665, "loss": 0.0, "lr": 6.739586599983416e-09, "epoch": 4.833630567024161, "percentage": 96.67, "elapsed_time": "4:16:22", "remaining_time": "0:08:49", "throughput": 8667.5, "total_tokens": 133326184} +{"current_steps": 197860, "total_steps": 204665, "loss": 0.0, "lr": 6.7297061916000706e-09, "epoch": 4.833752717856009, "percentage": 96.68, "elapsed_time": "4:16:22", "remaining_time": "0:08:49", "throughput": 8667.53, "total_tokens": 133329512} +{"current_steps": 197865, "total_steps": 204665, "loss": 0.0, "lr": 6.719833006545439e-09, "epoch": 4.833874868687856, "percentage": 96.68, "elapsed_time": "4:16:22", "remaining_time": "0:08:48", "throughput": 8667.55, "total_tokens": 133332840} +{"current_steps": 197870, "total_steps": 204665, "loss": 0.0, "lr": 6.709967044891351e-09, "epoch": 4.833997019519703, "percentage": 96.68, "elapsed_time": "4:16:23", "remaining_time": "0:08:48", "throughput": 8667.58, "total_tokens": 133336424} +{"current_steps": 197875, "total_steps": 204665, "loss": 0.0, "lr": 6.7001083067095285e-09, "epoch": 4.83411917035155, "percentage": 96.68, "elapsed_time": "4:16:23", "remaining_time": "0:08:47", "throughput": 8667.61, "total_tokens": 133339880} +{"current_steps": 197880, "total_steps": 204665, "loss": 0.0, "lr": 6.690256792071802e-09, "epoch": 4.834241321183397, "percentage": 96.68, "elapsed_time": "4:16:24", "remaining_time": "0:08:47", "throughput": 8667.63, "total_tokens": 133343208} +{"current_steps": 197885, "total_steps": 204665, "loss": 0.0, "lr": 6.680412501049559e-09, "epoch": 4.834363472015244, "percentage": 96.69, "elapsed_time": "4:16:24", "remaining_time": "0:08:47", "throughput": 8667.66, "total_tokens": 133346664} +{"current_steps": 197890, "total_steps": 204665, "loss": 0.0, "lr": 6.670575433714631e-09, "epoch": 4.834485622847092, "percentage": 96.69, "elapsed_time": "4:16:24", "remaining_time": "0:08:46", "throughput": 8667.7, "total_tokens": 133350440} +{"current_steps": 197895, "total_steps": 204665, "loss": 0.0, "lr": 6.660745590138406e-09, "epoch": 4.834607773678939, "percentage": 96.69, "elapsed_time": "4:16:25", "remaining_time": "0:08:46", "throughput": 8667.72, "total_tokens": 133353704} +{"current_steps": 197900, "total_steps": 204665, "loss": 0.0, "lr": 6.650922970392381e-09, "epoch": 4.8347299245107855, "percentage": 96.69, "elapsed_time": "4:16:25", "remaining_time": "0:08:45", "throughput": 8667.72, "total_tokens": 133356648} +{"current_steps": 197905, "total_steps": 204665, "loss": 0.0001, "lr": 6.641107574548055e-09, "epoch": 4.834852075342633, "percentage": 96.7, "elapsed_time": "4:16:25", "remaining_time": "0:08:45", "throughput": 8667.8, "total_tokens": 133361064} +{"current_steps": 197910, "total_steps": 204665, "loss": 0.0, "lr": 6.6312994026768155e-09, "epoch": 4.83497422617448, "percentage": 96.7, "elapsed_time": "4:16:26", "remaining_time": "0:08:45", "throughput": 8667.86, "total_tokens": 133365160} +{"current_steps": 197915, "total_steps": 204665, "loss": 0.0, "lr": 6.621498454849939e-09, "epoch": 4.8350963770063276, "percentage": 96.7, "elapsed_time": "4:16:26", "remaining_time": "0:08:44", "throughput": 8667.87, "total_tokens": 133368296} +{"current_steps": 197920, "total_steps": 204665, "loss": 0.0, "lr": 6.6117047311387006e-09, "epoch": 4.835218527838174, "percentage": 96.7, "elapsed_time": "4:16:26", "remaining_time": "0:08:44", "throughput": 8667.87, "total_tokens": 133371304} +{"current_steps": 197925, "total_steps": 204665, "loss": 0.0279, "lr": 6.601918231614267e-09, "epoch": 4.835340678670022, "percentage": 96.71, "elapsed_time": "4:16:27", "remaining_time": "0:08:43", "throughput": 8667.9, "total_tokens": 133374760} +{"current_steps": 197930, "total_steps": 204665, "loss": 0.0001, "lr": 6.592138956347915e-09, "epoch": 4.835462829501869, "percentage": 96.71, "elapsed_time": "4:16:27", "remaining_time": "0:08:43", "throughput": 8667.95, "total_tokens": 133378728} +{"current_steps": 197935, "total_steps": 204665, "loss": 0.0, "lr": 6.582366905410808e-09, "epoch": 4.835584980333716, "percentage": 96.71, "elapsed_time": "4:16:27", "remaining_time": "0:08:43", "throughput": 8667.99, "total_tokens": 133382376} +{"current_steps": 197940, "total_steps": 204665, "loss": 0.0, "lr": 6.57260207887389e-09, "epoch": 4.835707131165563, "percentage": 96.71, "elapsed_time": "4:16:28", "remaining_time": "0:08:42", "throughput": 8668.0, "total_tokens": 133385576} +{"current_steps": 197945, "total_steps": 204665, "loss": 0.0, "lr": 6.562844476808216e-09, "epoch": 4.835829281997411, "percentage": 96.72, "elapsed_time": "4:16:28", "remaining_time": "0:08:42", "throughput": 8668.0, "total_tokens": 133388456} +{"current_steps": 197950, "total_steps": 204665, "loss": 0.0, "lr": 6.553094099284617e-09, "epoch": 4.835951432829257, "percentage": 96.72, "elapsed_time": "4:16:29", "remaining_time": "0:08:42", "throughput": 8668.12, "total_tokens": 133393704} +{"current_steps": 197955, "total_steps": 204665, "loss": 0.0, "lr": 6.543350946374259e-09, "epoch": 4.836073583661105, "percentage": 96.72, "elapsed_time": "4:16:29", "remaining_time": "0:08:41", "throughput": 8668.14, "total_tokens": 133397096} +{"current_steps": 197960, "total_steps": 204665, "loss": 0.0, "lr": 6.533615018147753e-09, "epoch": 4.836195734492952, "percentage": 96.72, "elapsed_time": "4:16:29", "remaining_time": "0:08:41", "throughput": 8668.14, "total_tokens": 133399976} +{"current_steps": 197965, "total_steps": 204665, "loss": 0.0, "lr": 6.523886314676152e-09, "epoch": 4.8363178853247994, "percentage": 96.73, "elapsed_time": "4:16:30", "remaining_time": "0:08:40", "throughput": 8668.18, "total_tokens": 133403688} +{"current_steps": 197970, "total_steps": 204665, "loss": 0.0, "lr": 6.514164836029956e-09, "epoch": 4.836440036156646, "percentage": 96.73, "elapsed_time": "4:16:30", "remaining_time": "0:08:40", "throughput": 8668.22, "total_tokens": 133407336} +{"current_steps": 197975, "total_steps": 204665, "loss": 0.0002, "lr": 6.504450582279997e-09, "epoch": 4.836562186988493, "percentage": 96.73, "elapsed_time": "4:16:30", "remaining_time": "0:08:40", "throughput": 8668.23, "total_tokens": 133410536} +{"current_steps": 197980, "total_steps": 204665, "loss": 0.0, "lr": 6.494743553496884e-09, "epoch": 4.836684337820341, "percentage": 96.73, "elapsed_time": "4:16:31", "remaining_time": "0:08:39", "throughput": 8668.24, "total_tokens": 133413480} +{"current_steps": 197985, "total_steps": 204665, "loss": 0.0, "lr": 6.485043749751229e-09, "epoch": 4.836806488652188, "percentage": 96.74, "elapsed_time": "4:16:31", "remaining_time": "0:08:39", "throughput": 8668.27, "total_tokens": 133417000} +{"current_steps": 197990, "total_steps": 204665, "loss": 0.0, "lr": 6.47535117111353e-09, "epoch": 4.836928639484035, "percentage": 96.74, "elapsed_time": "4:16:31", "remaining_time": "0:08:38", "throughput": 8668.28, "total_tokens": 133420200} +{"current_steps": 197995, "total_steps": 204665, "loss": 0.0, "lr": 6.465665817654287e-09, "epoch": 4.837050790315882, "percentage": 96.74, "elapsed_time": "4:16:32", "remaining_time": "0:08:38", "throughput": 8668.3, "total_tokens": 133423528} +{"current_steps": 198000, "total_steps": 204665, "loss": 0.0, "lr": 6.455987689443998e-09, "epoch": 4.837172941147729, "percentage": 96.74, "elapsed_time": "4:16:32", "remaining_time": "0:08:38", "throughput": 8668.33, "total_tokens": 133426984} +{"current_steps": 198005, "total_steps": 204665, "loss": 0.0, "lr": 6.446316786552941e-09, "epoch": 4.837295091979576, "percentage": 96.75, "elapsed_time": "4:16:32", "remaining_time": "0:08:37", "throughput": 8668.35, "total_tokens": 133430312} +{"current_steps": 198010, "total_steps": 204665, "loss": 0.0, "lr": 6.436653109051615e-09, "epoch": 4.837417242811424, "percentage": 96.75, "elapsed_time": "4:16:33", "remaining_time": "0:08:37", "throughput": 8668.39, "total_tokens": 133434024} +{"current_steps": 198015, "total_steps": 204665, "loss": 0.0, "lr": 6.426996657010075e-09, "epoch": 4.8375393936432705, "percentage": 96.75, "elapsed_time": "4:16:33", "remaining_time": "0:08:36", "throughput": 8668.42, "total_tokens": 133437480} +{"current_steps": 198020, "total_steps": 204665, "loss": 0.0, "lr": 6.4173474304987096e-09, "epoch": 4.837661544475118, "percentage": 96.75, "elapsed_time": "4:16:33", "remaining_time": "0:08:36", "throughput": 8668.45, "total_tokens": 133440936} +{"current_steps": 198025, "total_steps": 204665, "loss": 0.0, "lr": 6.407705429587573e-09, "epoch": 4.837783695306965, "percentage": 96.76, "elapsed_time": "4:16:34", "remaining_time": "0:08:36", "throughput": 8668.46, "total_tokens": 133444072} +{"current_steps": 198030, "total_steps": 204665, "loss": 0.0, "lr": 6.398070654346943e-09, "epoch": 4.8379058461388125, "percentage": 96.76, "elapsed_time": "4:16:34", "remaining_time": "0:08:35", "throughput": 8668.5, "total_tokens": 133447848} +{"current_steps": 198035, "total_steps": 204665, "loss": 0.0, "lr": 6.3884431048467635e-09, "epoch": 4.838027996970659, "percentage": 96.76, "elapsed_time": "4:16:34", "remaining_time": "0:08:35", "throughput": 8668.53, "total_tokens": 133451432} +{"current_steps": 198040, "total_steps": 204665, "loss": 0.0, "lr": 6.378822781156978e-09, "epoch": 4.838150147802507, "percentage": 96.76, "elapsed_time": "4:16:35", "remaining_time": "0:08:35", "throughput": 8668.54, "total_tokens": 133454440} +{"current_steps": 198045, "total_steps": 204665, "loss": 0.0, "lr": 6.369209683347754e-09, "epoch": 4.838272298634354, "percentage": 96.77, "elapsed_time": "4:16:35", "remaining_time": "0:08:34", "throughput": 8668.55, "total_tokens": 133457704} +{"current_steps": 198050, "total_steps": 204665, "loss": 0.0, "lr": 6.3596038114888114e-09, "epoch": 4.838394449466201, "percentage": 96.77, "elapsed_time": "4:16:35", "remaining_time": "0:08:34", "throughput": 8668.58, "total_tokens": 133461096} +{"current_steps": 198055, "total_steps": 204665, "loss": 0.0, "lr": 6.350005165650207e-09, "epoch": 4.838516600298048, "percentage": 96.77, "elapsed_time": "4:16:36", "remaining_time": "0:08:33", "throughput": 8668.6, "total_tokens": 133464360} +{"current_steps": 198060, "total_steps": 204665, "loss": 0.0, "lr": 6.340413745901551e-09, "epoch": 4.838638751129896, "percentage": 96.77, "elapsed_time": "4:16:36", "remaining_time": "0:08:33", "throughput": 8668.63, "total_tokens": 133467816} +{"current_steps": 198065, "total_steps": 204665, "loss": 0.0, "lr": 6.330829552312678e-09, "epoch": 4.838760901961742, "percentage": 96.78, "elapsed_time": "4:16:36", "remaining_time": "0:08:33", "throughput": 8668.64, "total_tokens": 133471080} +{"current_steps": 198070, "total_steps": 204665, "loss": 0.0001, "lr": 6.321252584953307e-09, "epoch": 4.838883052793589, "percentage": 96.78, "elapsed_time": "4:16:37", "remaining_time": "0:08:32", "throughput": 8668.66, "total_tokens": 133474408} +{"current_steps": 198075, "total_steps": 204665, "loss": 0.0, "lr": 6.31168284389294e-09, "epoch": 4.839005203625437, "percentage": 96.78, "elapsed_time": "4:16:37", "remaining_time": "0:08:32", "throughput": 8668.69, "total_tokens": 133477800} +{"current_steps": 198080, "total_steps": 204665, "loss": 0.0, "lr": 6.302120329201411e-09, "epoch": 4.8391273544572835, "percentage": 96.78, "elapsed_time": "4:16:38", "remaining_time": "0:08:31", "throughput": 8668.7, "total_tokens": 133480936} +{"current_steps": 198085, "total_steps": 204665, "loss": 0.0, "lr": 6.292565040947995e-09, "epoch": 4.839249505289131, "percentage": 96.78, "elapsed_time": "4:16:38", "remaining_time": "0:08:31", "throughput": 8668.73, "total_tokens": 133484392} +{"current_steps": 198090, "total_steps": 204665, "loss": 0.0, "lr": 6.283016979202416e-09, "epoch": 4.839371656120978, "percentage": 96.79, "elapsed_time": "4:16:38", "remaining_time": "0:08:31", "throughput": 8668.76, "total_tokens": 133488040} +{"current_steps": 198095, "total_steps": 204665, "loss": 0.0, "lr": 6.273476144034062e-09, "epoch": 4.8394938069528255, "percentage": 96.79, "elapsed_time": "4:16:39", "remaining_time": "0:08:30", "throughput": 8668.77, "total_tokens": 133491112} +{"current_steps": 198100, "total_steps": 204665, "loss": 0.0, "lr": 6.2639425355122126e-09, "epoch": 4.839615957784672, "percentage": 96.79, "elapsed_time": "4:16:39", "remaining_time": "0:08:30", "throughput": 8668.79, "total_tokens": 133494504} +{"current_steps": 198105, "total_steps": 204665, "loss": 0.0, "lr": 6.254416153706254e-09, "epoch": 4.83973810861652, "percentage": 96.79, "elapsed_time": "4:16:39", "remaining_time": "0:08:29", "throughput": 8668.81, "total_tokens": 133497640} +{"current_steps": 198110, "total_steps": 204665, "loss": 0.0, "lr": 6.244896998685467e-09, "epoch": 4.839860259448367, "percentage": 96.8, "elapsed_time": "4:16:40", "remaining_time": "0:08:29", "throughput": 8668.83, "total_tokens": 133500968} +{"current_steps": 198115, "total_steps": 204665, "loss": 0.0, "lr": 6.235385070519017e-09, "epoch": 4.839982410280214, "percentage": 96.8, "elapsed_time": "4:16:40", "remaining_time": "0:08:29", "throughput": 8668.86, "total_tokens": 133504488} +{"current_steps": 198120, "total_steps": 204665, "loss": 0.0, "lr": 6.225880369276293e-09, "epoch": 4.840104561112061, "percentage": 96.8, "elapsed_time": "4:16:40", "remaining_time": "0:08:28", "throughput": 8668.86, "total_tokens": 133507560} +{"current_steps": 198125, "total_steps": 204665, "loss": 0.0001, "lr": 6.216382895026129e-09, "epoch": 4.840226711943909, "percentage": 96.8, "elapsed_time": "4:16:41", "remaining_time": "0:08:28", "throughput": 8668.89, "total_tokens": 133510952} +{"current_steps": 198130, "total_steps": 204665, "loss": 0.0, "lr": 6.206892647837802e-09, "epoch": 4.840348862775755, "percentage": 96.81, "elapsed_time": "4:16:41", "remaining_time": "0:08:27", "throughput": 8668.88, "total_tokens": 133513768} +{"current_steps": 198135, "total_steps": 204665, "loss": 0.0, "lr": 6.197409627780148e-09, "epoch": 4.840471013607603, "percentage": 96.81, "elapsed_time": "4:16:41", "remaining_time": "0:08:27", "throughput": 8668.92, "total_tokens": 133517480} +{"current_steps": 198140, "total_steps": 204665, "loss": 0.0, "lr": 6.187933834922332e-09, "epoch": 4.84059316443945, "percentage": 96.81, "elapsed_time": "4:16:42", "remaining_time": "0:08:27", "throughput": 8668.94, "total_tokens": 133520744} +{"current_steps": 198145, "total_steps": 204665, "loss": 0.0, "lr": 6.178465269333188e-09, "epoch": 4.8407153152712965, "percentage": 96.81, "elapsed_time": "4:16:42", "remaining_time": "0:08:26", "throughput": 8668.95, "total_tokens": 133523944} +{"current_steps": 198150, "total_steps": 204665, "loss": 0.0, "lr": 6.16900393108144e-09, "epoch": 4.840837466103144, "percentage": 96.82, "elapsed_time": "4:16:42", "remaining_time": "0:08:26", "throughput": 8668.96, "total_tokens": 133527016} +{"current_steps": 198155, "total_steps": 204665, "loss": 0.0, "lr": 6.159549820236032e-09, "epoch": 4.840959616934992, "percentage": 96.82, "elapsed_time": "4:16:43", "remaining_time": "0:08:26", "throughput": 8668.97, "total_tokens": 133530152} +{"current_steps": 198160, "total_steps": 204665, "loss": 0.0, "lr": 6.150102936865797e-09, "epoch": 4.8410817677668385, "percentage": 96.82, "elapsed_time": "4:16:43", "remaining_time": "0:08:25", "throughput": 8668.98, "total_tokens": 133533352} +{"current_steps": 198165, "total_steps": 204665, "loss": 0.0, "lr": 6.140663281039238e-09, "epoch": 4.841203918598685, "percentage": 96.82, "elapsed_time": "4:16:43", "remaining_time": "0:08:25", "throughput": 8668.99, "total_tokens": 133536488} +{"current_steps": 198170, "total_steps": 204665, "loss": 0.0, "lr": 6.131230852825075e-09, "epoch": 4.841326069430533, "percentage": 96.83, "elapsed_time": "4:16:44", "remaining_time": "0:08:24", "throughput": 8669.02, "total_tokens": 133539944} +{"current_steps": 198175, "total_steps": 204665, "loss": 0.0, "lr": 6.1218056522919225e-09, "epoch": 4.84144822026238, "percentage": 96.83, "elapsed_time": "4:16:44", "remaining_time": "0:08:24", "throughput": 8669.03, "total_tokens": 133542952} +{"current_steps": 198180, "total_steps": 204665, "loss": 0.0, "lr": 6.11238767950839e-09, "epoch": 4.841570371094227, "percentage": 96.83, "elapsed_time": "4:16:44", "remaining_time": "0:08:24", "throughput": 8669.05, "total_tokens": 133546472} +{"current_steps": 198185, "total_steps": 204665, "loss": 0.0, "lr": 6.102976934542758e-09, "epoch": 4.841692521926074, "percentage": 96.83, "elapsed_time": "4:16:45", "remaining_time": "0:08:23", "throughput": 8669.07, "total_tokens": 133549608} +{"current_steps": 198190, "total_steps": 204665, "loss": 0.0, "lr": 6.0935734174637485e-09, "epoch": 4.841814672757922, "percentage": 96.84, "elapsed_time": "4:16:45", "remaining_time": "0:08:23", "throughput": 8669.09, "total_tokens": 133553064} +{"current_steps": 198195, "total_steps": 204665, "loss": 0.0, "lr": 6.084177128339529e-09, "epoch": 4.841936823589768, "percentage": 96.84, "elapsed_time": "4:16:46", "remaining_time": "0:08:22", "throughput": 8669.13, "total_tokens": 133556776} +{"current_steps": 198200, "total_steps": 204665, "loss": 0.0, "lr": 6.074788067238601e-09, "epoch": 4.842058974421616, "percentage": 96.84, "elapsed_time": "4:16:46", "remaining_time": "0:08:22", "throughput": 8669.17, "total_tokens": 133560488} +{"current_steps": 198205, "total_steps": 204665, "loss": 0.0, "lr": 6.0654062342290204e-09, "epoch": 4.842181125253463, "percentage": 96.84, "elapsed_time": "4:16:46", "remaining_time": "0:08:22", "throughput": 8669.18, "total_tokens": 133563496} +{"current_steps": 198210, "total_steps": 204665, "loss": 0.0, "lr": 6.056031629379177e-09, "epoch": 4.84230327608531, "percentage": 96.85, "elapsed_time": "4:16:47", "remaining_time": "0:08:21", "throughput": 8669.18, "total_tokens": 133566504} +{"current_steps": 198215, "total_steps": 204665, "loss": 0.0, "lr": 6.046664252757239e-09, "epoch": 4.842425426917157, "percentage": 96.85, "elapsed_time": "4:16:47", "remaining_time": "0:08:21", "throughput": 8669.2, "total_tokens": 133569832} +{"current_steps": 198220, "total_steps": 204665, "loss": 0.0, "lr": 6.037304104431262e-09, "epoch": 4.842547577749005, "percentage": 96.85, "elapsed_time": "4:16:47", "remaining_time": "0:08:20", "throughput": 8669.21, "total_tokens": 133572904} +{"current_steps": 198225, "total_steps": 204665, "loss": 0.0, "lr": 6.027951184469416e-09, "epoch": 4.8426697285808515, "percentage": 96.85, "elapsed_time": "4:16:48", "remaining_time": "0:08:20", "throughput": 8669.25, "total_tokens": 133576744} +{"current_steps": 198230, "total_steps": 204665, "loss": 0.0, "lr": 6.018605492939533e-09, "epoch": 4.842791879412699, "percentage": 96.86, "elapsed_time": "4:16:48", "remaining_time": "0:08:20", "throughput": 8669.31, "total_tokens": 133580648} +{"current_steps": 198235, "total_steps": 204665, "loss": 0.0, "lr": 6.009267029909892e-09, "epoch": 4.842914030244546, "percentage": 96.86, "elapsed_time": "4:16:48", "remaining_time": "0:08:19", "throughput": 8669.33, "total_tokens": 133584104} +{"current_steps": 198240, "total_steps": 204665, "loss": 0.0, "lr": 5.999935795447997e-09, "epoch": 4.843036181076393, "percentage": 96.86, "elapsed_time": "4:16:49", "remaining_time": "0:08:19", "throughput": 8669.36, "total_tokens": 133587496} +{"current_steps": 198245, "total_steps": 204665, "loss": 0.0, "lr": 5.990611789622013e-09, "epoch": 4.84315833190824, "percentage": 96.86, "elapsed_time": "4:16:49", "remaining_time": "0:08:19", "throughput": 8669.38, "total_tokens": 133590888} +{"current_steps": 198250, "total_steps": 204665, "loss": 0.0, "lr": 5.9812950124997765e-09, "epoch": 4.843280482740088, "percentage": 96.87, "elapsed_time": "4:16:49", "remaining_time": "0:08:18", "throughput": 8669.41, "total_tokens": 133594344} +{"current_steps": 198255, "total_steps": 204665, "loss": 0.0, "lr": 5.971985464148788e-09, "epoch": 4.843402633571935, "percentage": 96.87, "elapsed_time": "4:16:50", "remaining_time": "0:08:18", "throughput": 8669.42, "total_tokens": 133597608} +{"current_steps": 198260, "total_steps": 204665, "loss": 0.0, "lr": 5.962683144636882e-09, "epoch": 4.843524784403781, "percentage": 96.87, "elapsed_time": "4:16:50", "remaining_time": "0:08:17", "throughput": 8669.44, "total_tokens": 133600872} +{"current_steps": 198265, "total_steps": 204665, "loss": 0.0, "lr": 5.9533880540317826e-09, "epoch": 4.843646935235629, "percentage": 96.87, "elapsed_time": "4:16:50", "remaining_time": "0:08:17", "throughput": 8669.47, "total_tokens": 133604392} +{"current_steps": 198270, "total_steps": 204665, "loss": 0.0, "lr": 5.944100192400992e-09, "epoch": 4.843769086067476, "percentage": 96.88, "elapsed_time": "4:16:51", "remaining_time": "0:08:17", "throughput": 8669.5, "total_tokens": 133607912} +{"current_steps": 198275, "total_steps": 204665, "loss": 0.0, "lr": 5.93481955981201e-09, "epoch": 4.843891236899323, "percentage": 96.88, "elapsed_time": "4:16:51", "remaining_time": "0:08:16", "throughput": 8669.51, "total_tokens": 133610920} +{"current_steps": 198280, "total_steps": 204665, "loss": 0.0, "lr": 5.92554615633245e-09, "epoch": 4.84401338773117, "percentage": 96.88, "elapsed_time": "4:16:51", "remaining_time": "0:08:16", "throughput": 8669.55, "total_tokens": 133614760} +{"current_steps": 198285, "total_steps": 204665, "loss": 0.0, "lr": 5.916279982029704e-09, "epoch": 4.844135538563018, "percentage": 96.88, "elapsed_time": "4:16:52", "remaining_time": "0:08:15", "throughput": 8669.6, "total_tokens": 133618664} +{"current_steps": 198290, "total_steps": 204665, "loss": 0.0, "lr": 5.90702103697105e-09, "epoch": 4.844257689394865, "percentage": 96.89, "elapsed_time": "4:16:52", "remaining_time": "0:08:15", "throughput": 8669.62, "total_tokens": 133621992} +{"current_steps": 198295, "total_steps": 204665, "loss": 0.0, "lr": 5.897769321223989e-09, "epoch": 4.844379840226712, "percentage": 96.89, "elapsed_time": "4:16:53", "remaining_time": "0:08:15", "throughput": 8669.64, "total_tokens": 133625192} +{"current_steps": 198300, "total_steps": 204665, "loss": 0.0, "lr": 5.888524834855802e-09, "epoch": 4.844501991058559, "percentage": 96.89, "elapsed_time": "4:16:53", "remaining_time": "0:08:14", "throughput": 8669.67, "total_tokens": 133628840} +{"current_steps": 198305, "total_steps": 204665, "loss": 0.0, "lr": 5.879287577933545e-09, "epoch": 4.844624141890407, "percentage": 96.89, "elapsed_time": "4:16:53", "remaining_time": "0:08:14", "throughput": 8669.7, "total_tokens": 133632296} +{"current_steps": 198310, "total_steps": 204665, "loss": 0.0, "lr": 5.870057550524499e-09, "epoch": 4.844746292722253, "percentage": 96.89, "elapsed_time": "4:16:54", "remaining_time": "0:08:13", "throughput": 8669.72, "total_tokens": 133635560} +{"current_steps": 198315, "total_steps": 204665, "loss": 0.0, "lr": 5.860834752695831e-09, "epoch": 4.844868443554101, "percentage": 96.9, "elapsed_time": "4:16:54", "remaining_time": "0:08:13", "throughput": 8669.72, "total_tokens": 133638568} +{"current_steps": 198320, "total_steps": 204665, "loss": 0.0, "lr": 5.851619184514489e-09, "epoch": 4.844990594385948, "percentage": 96.9, "elapsed_time": "4:16:54", "remaining_time": "0:08:13", "throughput": 8669.75, "total_tokens": 133642024} +{"current_steps": 198325, "total_steps": 204665, "loss": 0.0, "lr": 5.842410846047641e-09, "epoch": 4.845112745217795, "percentage": 96.9, "elapsed_time": "4:16:55", "remaining_time": "0:08:12", "throughput": 8669.78, "total_tokens": 133645544} +{"current_steps": 198330, "total_steps": 204665, "loss": 0.0, "lr": 5.833209737362121e-09, "epoch": 4.845234896049642, "percentage": 96.9, "elapsed_time": "4:16:55", "remaining_time": "0:08:12", "throughput": 8669.8, "total_tokens": 133648936} +{"current_steps": 198335, "total_steps": 204665, "loss": 0.0, "lr": 5.8240158585249886e-09, "epoch": 4.845357046881489, "percentage": 96.91, "elapsed_time": "4:16:55", "remaining_time": "0:08:12", "throughput": 8669.84, "total_tokens": 133652584} +{"current_steps": 198340, "total_steps": 204665, "loss": 0.0, "lr": 5.814829209602856e-09, "epoch": 4.8454791977133365, "percentage": 96.91, "elapsed_time": "4:16:56", "remaining_time": "0:08:11", "throughput": 8669.89, "total_tokens": 133656424} +{"current_steps": 198345, "total_steps": 204665, "loss": 0.0, "lr": 5.805649790662892e-09, "epoch": 4.845601348545183, "percentage": 96.91, "elapsed_time": "4:16:56", "remaining_time": "0:08:11", "throughput": 8669.94, "total_tokens": 133660328} +{"current_steps": 198350, "total_steps": 204665, "loss": 0.0, "lr": 5.796477601771488e-09, "epoch": 4.845723499377031, "percentage": 96.91, "elapsed_time": "4:16:56", "remaining_time": "0:08:10", "throughput": 8669.98, "total_tokens": 133664040} +{"current_steps": 198355, "total_steps": 204665, "loss": 0.0, "lr": 5.78731264299559e-09, "epoch": 4.845845650208878, "percentage": 96.92, "elapsed_time": "4:16:57", "remaining_time": "0:08:10", "throughput": 8670.0, "total_tokens": 133667496} +{"current_steps": 198360, "total_steps": 204665, "loss": 0.0, "lr": 5.7781549144017e-09, "epoch": 4.845967801040725, "percentage": 96.92, "elapsed_time": "4:16:57", "remaining_time": "0:08:10", "throughput": 8670.06, "total_tokens": 133671464} +{"current_steps": 198365, "total_steps": 204665, "loss": 0.0, "lr": 5.769004416056544e-09, "epoch": 4.846089951872572, "percentage": 96.92, "elapsed_time": "4:16:57", "remaining_time": "0:08:09", "throughput": 8670.09, "total_tokens": 133674920} +{"current_steps": 198370, "total_steps": 204665, "loss": 0.0, "lr": 5.759861148026624e-09, "epoch": 4.84621210270442, "percentage": 96.92, "elapsed_time": "4:16:58", "remaining_time": "0:08:09", "throughput": 8670.11, "total_tokens": 133678312} +{"current_steps": 198375, "total_steps": 204665, "loss": 0.0, "lr": 5.75072511037833e-09, "epoch": 4.846334253536266, "percentage": 96.93, "elapsed_time": "4:16:58", "remaining_time": "0:08:08", "throughput": 8670.14, "total_tokens": 133681832} +{"current_steps": 198380, "total_steps": 204665, "loss": 0.0, "lr": 5.741596303178276e-09, "epoch": 4.846456404368114, "percentage": 96.93, "elapsed_time": "4:16:58", "remaining_time": "0:08:08", "throughput": 8670.17, "total_tokens": 133685288} +{"current_steps": 198385, "total_steps": 204665, "loss": 0.0, "lr": 5.732474726492631e-09, "epoch": 4.846578555199961, "percentage": 96.93, "elapsed_time": "4:16:59", "remaining_time": "0:08:08", "throughput": 8670.19, "total_tokens": 133688616} +{"current_steps": 198390, "total_steps": 204665, "loss": 0.0, "lr": 5.723360380388009e-09, "epoch": 4.846700706031808, "percentage": 96.93, "elapsed_time": "4:16:59", "remaining_time": "0:08:07", "throughput": 8670.2, "total_tokens": 133691880} +{"current_steps": 198395, "total_steps": 204665, "loss": 0.0, "lr": 5.714253264930357e-09, "epoch": 4.846822856863655, "percentage": 96.94, "elapsed_time": "4:17:00", "remaining_time": "0:08:07", "throughput": 8670.24, "total_tokens": 133695400} +{"current_steps": 198400, "total_steps": 204665, "loss": 0.0, "lr": 5.7051533801861786e-09, "epoch": 4.846945007695503, "percentage": 96.94, "elapsed_time": "4:17:00", "remaining_time": "0:08:06", "throughput": 8670.25, "total_tokens": 133698664} +{"current_steps": 198405, "total_steps": 204665, "loss": 0.0, "lr": 5.696060726221641e-09, "epoch": 4.8470671585273495, "percentage": 96.94, "elapsed_time": "4:17:00", "remaining_time": "0:08:06", "throughput": 8670.29, "total_tokens": 133702248} +{"current_steps": 198410, "total_steps": 204665, "loss": 0.0, "lr": 5.686975303102693e-09, "epoch": 4.847189309359197, "percentage": 96.94, "elapsed_time": "4:17:01", "remaining_time": "0:08:06", "throughput": 8670.32, "total_tokens": 133705832} +{"current_steps": 198415, "total_steps": 204665, "loss": 0.0818, "lr": 5.677897110895502e-09, "epoch": 4.847311460191044, "percentage": 96.95, "elapsed_time": "4:17:01", "remaining_time": "0:08:05", "throughput": 8670.34, "total_tokens": 133709224} +{"current_steps": 198420, "total_steps": 204665, "loss": 0.0, "lr": 5.6688261496661286e-09, "epoch": 4.8474336110228915, "percentage": 96.95, "elapsed_time": "4:17:01", "remaining_time": "0:08:05", "throughput": 8670.36, "total_tokens": 133712488} +{"current_steps": 198425, "total_steps": 204665, "loss": 0.0, "lr": 5.659762419480407e-09, "epoch": 4.847555761854738, "percentage": 96.95, "elapsed_time": "4:17:02", "remaining_time": "0:08:04", "throughput": 8670.39, "total_tokens": 133715880} +{"current_steps": 198430, "total_steps": 204665, "loss": 0.0, "lr": 5.650705920404397e-09, "epoch": 4.847677912686585, "percentage": 96.95, "elapsed_time": "4:17:02", "remaining_time": "0:08:04", "throughput": 8670.4, "total_tokens": 133719016} +{"current_steps": 198435, "total_steps": 204665, "loss": 0.0, "lr": 5.641656652503934e-09, "epoch": 4.847800063518433, "percentage": 96.96, "elapsed_time": "4:17:02", "remaining_time": "0:08:04", "throughput": 8670.44, "total_tokens": 133722728} +{"current_steps": 198440, "total_steps": 204665, "loss": 0.0, "lr": 5.632614615844744e-09, "epoch": 4.847922214350279, "percentage": 96.96, "elapsed_time": "4:17:03", "remaining_time": "0:08:03", "throughput": 8670.45, "total_tokens": 133725864} +{"current_steps": 198445, "total_steps": 204665, "loss": 0.0, "lr": 5.6235798104926625e-09, "epoch": 4.848044365182127, "percentage": 96.96, "elapsed_time": "4:17:03", "remaining_time": "0:08:03", "throughput": 8670.46, "total_tokens": 133729064} +{"current_steps": 198450, "total_steps": 204665, "loss": 0.0, "lr": 5.614552236513304e-09, "epoch": 4.848166516013974, "percentage": 96.96, "elapsed_time": "4:17:03", "remaining_time": "0:08:03", "throughput": 8670.47, "total_tokens": 133732200} +{"current_steps": 198455, "total_steps": 204665, "loss": 0.0, "lr": 5.605531893972393e-09, "epoch": 4.848288666845821, "percentage": 96.97, "elapsed_time": "4:17:04", "remaining_time": "0:08:02", "throughput": 8670.5, "total_tokens": 133735784} +{"current_steps": 198460, "total_steps": 204665, "loss": 0.0, "lr": 5.596518782935655e-09, "epoch": 4.848410817677668, "percentage": 96.97, "elapsed_time": "4:17:04", "remaining_time": "0:08:02", "throughput": 8670.53, "total_tokens": 133739304} +{"current_steps": 198465, "total_steps": 204665, "loss": 0.0, "lr": 5.587512903468372e-09, "epoch": 4.848532968509516, "percentage": 96.97, "elapsed_time": "4:17:04", "remaining_time": "0:08:01", "throughput": 8670.55, "total_tokens": 133742696} +{"current_steps": 198470, "total_steps": 204665, "loss": 0.0, "lr": 5.578514255636158e-09, "epoch": 4.8486551193413625, "percentage": 96.97, "elapsed_time": "4:17:05", "remaining_time": "0:08:01", "throughput": 8670.6, "total_tokens": 133746472} +{"current_steps": 198475, "total_steps": 204665, "loss": 0.0, "lr": 5.5695228395045145e-09, "epoch": 4.84877727017321, "percentage": 96.98, "elapsed_time": "4:17:05", "remaining_time": "0:08:01", "throughput": 8670.6, "total_tokens": 133749416} +{"current_steps": 198480, "total_steps": 204665, "loss": 0.0, "lr": 5.560538655138724e-09, "epoch": 4.848899421005057, "percentage": 96.98, "elapsed_time": "4:17:05", "remaining_time": "0:08:00", "throughput": 8670.63, "total_tokens": 133753000} +{"current_steps": 198485, "total_steps": 204665, "loss": 0.0, "lr": 5.5515617026041796e-09, "epoch": 4.8490215718369045, "percentage": 96.98, "elapsed_time": "4:17:06", "remaining_time": "0:08:00", "throughput": 8670.64, "total_tokens": 133756008} +{"current_steps": 198490, "total_steps": 204665, "loss": 0.0, "lr": 5.542591981966049e-09, "epoch": 4.849143722668751, "percentage": 96.98, "elapsed_time": "4:17:06", "remaining_time": "0:07:59", "throughput": 8670.67, "total_tokens": 133759528} +{"current_steps": 198495, "total_steps": 204665, "loss": 0.0, "lr": 5.5336294932898376e-09, "epoch": 4.849265873500599, "percentage": 96.99, "elapsed_time": "4:17:07", "remaining_time": "0:07:59", "throughput": 8670.69, "total_tokens": 133762920} +{"current_steps": 198500, "total_steps": 204665, "loss": 0.0, "lr": 5.5246742366404915e-09, "epoch": 4.849388024332446, "percentage": 96.99, "elapsed_time": "4:17:07", "remaining_time": "0:07:59", "throughput": 8670.73, "total_tokens": 133766632} +{"current_steps": 198505, "total_steps": 204665, "loss": 0.0, "lr": 5.515726212083071e-09, "epoch": 4.849510175164292, "percentage": 96.99, "elapsed_time": "4:17:07", "remaining_time": "0:07:58", "throughput": 8670.74, "total_tokens": 133769832} +{"current_steps": 198510, "total_steps": 204665, "loss": 0.0, "lr": 5.506785419682969e-09, "epoch": 4.84963232599614, "percentage": 96.99, "elapsed_time": "4:17:08", "remaining_time": "0:07:58", "throughput": 8670.77, "total_tokens": 133773224} +{"current_steps": 198515, "total_steps": 204665, "loss": 0.0, "lr": 5.49785185950491e-09, "epoch": 4.849754476827988, "percentage": 97.0, "elapsed_time": "4:17:08", "remaining_time": "0:07:57", "throughput": 8670.79, "total_tokens": 133776616} +{"current_steps": 198520, "total_steps": 204665, "loss": 0.0, "lr": 5.488925531613953e-09, "epoch": 4.849876627659834, "percentage": 97.0, "elapsed_time": "4:17:08", "remaining_time": "0:07:57", "throughput": 8670.8, "total_tokens": 133779752} +{"current_steps": 198525, "total_steps": 204665, "loss": 0.0, "lr": 5.480006436075046e-09, "epoch": 4.849998778491681, "percentage": 97.0, "elapsed_time": "4:17:09", "remaining_time": "0:07:57", "throughput": 8670.84, "total_tokens": 133783528} +{"current_steps": 198530, "total_steps": 204665, "loss": 0.0, "lr": 5.471094572953028e-09, "epoch": 4.850120929323529, "percentage": 97.0, "elapsed_time": "4:17:09", "remaining_time": "0:07:56", "throughput": 8670.87, "total_tokens": 133786920} +{"current_steps": 198535, "total_steps": 204665, "loss": 0.0, "lr": 5.462189942312734e-09, "epoch": 4.8502430801553755, "percentage": 97.0, "elapsed_time": "4:17:09", "remaining_time": "0:07:56", "throughput": 8670.89, "total_tokens": 133790248} +{"current_steps": 198540, "total_steps": 204665, "loss": 0.0, "lr": 5.453292544218779e-09, "epoch": 4.850365230987223, "percentage": 97.01, "elapsed_time": "4:17:10", "remaining_time": "0:07:56", "throughput": 8670.91, "total_tokens": 133793576} +{"current_steps": 198545, "total_steps": 204665, "loss": 0.0001, "lr": 5.444402378736113e-09, "epoch": 4.85048738181907, "percentage": 97.01, "elapsed_time": "4:17:10", "remaining_time": "0:07:55", "throughput": 8670.95, "total_tokens": 133797224} +{"current_steps": 198550, "total_steps": 204665, "loss": 0.1013, "lr": 5.435519445929237e-09, "epoch": 4.8506095326509175, "percentage": 97.01, "elapsed_time": "4:17:10", "remaining_time": "0:07:55", "throughput": 8670.96, "total_tokens": 133800360} +{"current_steps": 198555, "total_steps": 204665, "loss": 0.0, "lr": 5.426643745862658e-09, "epoch": 4.850731683482764, "percentage": 97.01, "elapsed_time": "4:17:11", "remaining_time": "0:07:54", "throughput": 8670.98, "total_tokens": 133803880} +{"current_steps": 198560, "total_steps": 204665, "loss": 0.0, "lr": 5.4177752786011e-09, "epoch": 4.850853834314612, "percentage": 97.02, "elapsed_time": "4:17:11", "remaining_time": "0:07:54", "throughput": 8671.02, "total_tokens": 133807400} +{"current_steps": 198565, "total_steps": 204665, "loss": 0.0, "lr": 5.408914044209068e-09, "epoch": 4.850975985146459, "percentage": 97.02, "elapsed_time": "4:17:11", "remaining_time": "0:07:54", "throughput": 8671.05, "total_tokens": 133810984} +{"current_steps": 198570, "total_steps": 204665, "loss": 0.0, "lr": 5.400060042750843e-09, "epoch": 4.851098135978306, "percentage": 97.02, "elapsed_time": "4:17:12", "remaining_time": "0:07:53", "throughput": 8671.09, "total_tokens": 133814632} +{"current_steps": 198575, "total_steps": 204665, "loss": 0.0, "lr": 5.391213274290929e-09, "epoch": 4.851220286810153, "percentage": 97.02, "elapsed_time": "4:17:12", "remaining_time": "0:07:53", "throughput": 8671.1, "total_tokens": 133817896} +{"current_steps": 198580, "total_steps": 204665, "loss": 0.0001, "lr": 5.382373738893609e-09, "epoch": 4.851342437642001, "percentage": 97.03, "elapsed_time": "4:17:12", "remaining_time": "0:07:52", "throughput": 8671.1, "total_tokens": 133820840} +{"current_steps": 198585, "total_steps": 204665, "loss": 0.0, "lr": 5.3735414366232745e-09, "epoch": 4.851464588473847, "percentage": 97.03, "elapsed_time": "4:17:13", "remaining_time": "0:07:52", "throughput": 8671.13, "total_tokens": 133824232} +{"current_steps": 198590, "total_steps": 204665, "loss": 0.0, "lr": 5.3647163675439864e-09, "epoch": 4.851586739305695, "percentage": 97.03, "elapsed_time": "4:17:13", "remaining_time": "0:07:52", "throughput": 8671.15, "total_tokens": 133827688} +{"current_steps": 198595, "total_steps": 204665, "loss": 0.0, "lr": 5.3558985317200265e-09, "epoch": 4.851708890137542, "percentage": 97.03, "elapsed_time": "4:17:14", "remaining_time": "0:07:51", "throughput": 8671.19, "total_tokens": 133831272} +{"current_steps": 198600, "total_steps": 204665, "loss": 0.0004, "lr": 5.347087929215455e-09, "epoch": 4.8518310409693886, "percentage": 97.04, "elapsed_time": "4:17:14", "remaining_time": "0:07:51", "throughput": 8671.23, "total_tokens": 133835048} +{"current_steps": 198605, "total_steps": 204665, "loss": 0.0, "lr": 5.338284560094442e-09, "epoch": 4.851953191801236, "percentage": 97.04, "elapsed_time": "4:17:14", "remaining_time": "0:07:50", "throughput": 8671.23, "total_tokens": 133837992} +{"current_steps": 198610, "total_steps": 204665, "loss": 0.0, "lr": 5.3294884244208246e-09, "epoch": 4.852075342633084, "percentage": 97.04, "elapsed_time": "4:17:15", "remaining_time": "0:07:50", "throughput": 8671.25, "total_tokens": 133841256} +{"current_steps": 198615, "total_steps": 204665, "loss": 0.0, "lr": 5.320699522258887e-09, "epoch": 4.852197493464931, "percentage": 97.04, "elapsed_time": "4:17:15", "remaining_time": "0:07:50", "throughput": 8671.26, "total_tokens": 133844520} +{"current_steps": 198620, "total_steps": 204665, "loss": 0.0, "lr": 5.311917853672243e-09, "epoch": 4.852319644296777, "percentage": 97.05, "elapsed_time": "4:17:15", "remaining_time": "0:07:49", "throughput": 8671.28, "total_tokens": 133847848} +{"current_steps": 198625, "total_steps": 204665, "loss": 0.0, "lr": 5.303143418724843e-09, "epoch": 4.852441795128625, "percentage": 97.05, "elapsed_time": "4:17:16", "remaining_time": "0:07:49", "throughput": 8671.31, "total_tokens": 133851496} +{"current_steps": 198630, "total_steps": 204665, "loss": 0.0, "lr": 5.294376217480634e-09, "epoch": 4.852563945960472, "percentage": 97.05, "elapsed_time": "4:17:16", "remaining_time": "0:07:49", "throughput": 8671.32, "total_tokens": 133854568} +{"current_steps": 198635, "total_steps": 204665, "loss": 0.0001, "lr": 5.285616250003233e-09, "epoch": 4.852686096792319, "percentage": 97.05, "elapsed_time": "4:17:16", "remaining_time": "0:07:48", "throughput": 8671.35, "total_tokens": 133858024} +{"current_steps": 198640, "total_steps": 204665, "loss": 0.0, "lr": 5.276863516356367e-09, "epoch": 4.852808247624166, "percentage": 97.06, "elapsed_time": "4:17:17", "remaining_time": "0:07:48", "throughput": 8671.36, "total_tokens": 133861224} +{"current_steps": 198645, "total_steps": 204665, "loss": 0.0, "lr": 5.268118016603651e-09, "epoch": 4.852930398456014, "percentage": 97.06, "elapsed_time": "4:17:17", "remaining_time": "0:07:47", "throughput": 8671.39, "total_tokens": 133864616} +{"current_steps": 198650, "total_steps": 204665, "loss": 0.0, "lr": 5.259379750808812e-09, "epoch": 4.8530525492878605, "percentage": 97.06, "elapsed_time": "4:17:17", "remaining_time": "0:07:47", "throughput": 8671.39, "total_tokens": 133867688} +{"current_steps": 198655, "total_steps": 204665, "loss": 0.0, "lr": 5.250648719035245e-09, "epoch": 4.853174700119708, "percentage": 97.06, "elapsed_time": "4:17:18", "remaining_time": "0:07:47", "throughput": 8671.43, "total_tokens": 133871336} +{"current_steps": 198660, "total_steps": 204665, "loss": 0.0, "lr": 5.241924921346564e-09, "epoch": 4.853296850951555, "percentage": 97.07, "elapsed_time": "4:17:18", "remaining_time": "0:07:46", "throughput": 8671.49, "total_tokens": 133875432} +{"current_steps": 198665, "total_steps": 204665, "loss": 0.0, "lr": 5.233208357806163e-09, "epoch": 4.8534190017834025, "percentage": 97.07, "elapsed_time": "4:17:18", "remaining_time": "0:07:46", "throughput": 8671.51, "total_tokens": 133878760} +{"current_steps": 198670, "total_steps": 204665, "loss": 0.0, "lr": 5.224499028477436e-09, "epoch": 4.853541152615249, "percentage": 97.07, "elapsed_time": "4:17:19", "remaining_time": "0:07:45", "throughput": 8671.53, "total_tokens": 133882024} +{"current_steps": 198675, "total_steps": 204665, "loss": 0.0, "lr": 5.215796933423666e-09, "epoch": 4.853663303447097, "percentage": 97.07, "elapsed_time": "4:17:19", "remaining_time": "0:07:45", "throughput": 8671.56, "total_tokens": 133885544} +{"current_steps": 198680, "total_steps": 204665, "loss": 0.0, "lr": 5.207102072708247e-09, "epoch": 4.853785454278944, "percentage": 97.08, "elapsed_time": "4:17:19", "remaining_time": "0:07:45", "throughput": 8671.57, "total_tokens": 133888808} +{"current_steps": 198685, "total_steps": 204665, "loss": 0.0, "lr": 5.1984144463943505e-09, "epoch": 4.853907605110791, "percentage": 97.08, "elapsed_time": "4:17:20", "remaining_time": "0:07:44", "throughput": 8671.62, "total_tokens": 133892648} +{"current_steps": 198690, "total_steps": 204665, "loss": 0.0, "lr": 5.1897340545451474e-09, "epoch": 4.854029755942638, "percentage": 97.08, "elapsed_time": "4:17:20", "remaining_time": "0:07:44", "throughput": 8671.66, "total_tokens": 133896360} +{"current_steps": 198695, "total_steps": 204665, "loss": 0.0, "lr": 5.181060897223699e-09, "epoch": 4.854151906774485, "percentage": 97.08, "elapsed_time": "4:17:21", "remaining_time": "0:07:43", "throughput": 8671.68, "total_tokens": 133899816} +{"current_steps": 198700, "total_steps": 204665, "loss": 0.0, "lr": 5.172394974493177e-09, "epoch": 4.854274057606332, "percentage": 97.09, "elapsed_time": "4:17:21", "remaining_time": "0:07:43", "throughput": 8671.7, "total_tokens": 133903144} +{"current_steps": 198705, "total_steps": 204665, "loss": 0.0, "lr": 5.1637362864166424e-09, "epoch": 4.854396208438179, "percentage": 97.09, "elapsed_time": "4:17:21", "remaining_time": "0:07:43", "throughput": 8671.72, "total_tokens": 133906408} +{"current_steps": 198710, "total_steps": 204665, "loss": 0.0, "lr": 5.155084833056933e-09, "epoch": 4.854518359270027, "percentage": 97.09, "elapsed_time": "4:17:22", "remaining_time": "0:07:42", "throughput": 8671.74, "total_tokens": 133909672} +{"current_steps": 198715, "total_steps": 204665, "loss": 0.0, "lr": 5.146440614476999e-09, "epoch": 4.8546405101018735, "percentage": 97.09, "elapsed_time": "4:17:22", "remaining_time": "0:07:42", "throughput": 8671.77, "total_tokens": 133913192} +{"current_steps": 198720, "total_steps": 204665, "loss": 0.0, "lr": 5.13780363073979e-09, "epoch": 4.854762660933721, "percentage": 97.1, "elapsed_time": "4:17:22", "remaining_time": "0:07:41", "throughput": 8671.79, "total_tokens": 133916584} +{"current_steps": 198725, "total_steps": 204665, "loss": 0.0, "lr": 5.129173881908033e-09, "epoch": 4.854884811765568, "percentage": 97.1, "elapsed_time": "4:17:23", "remaining_time": "0:07:41", "throughput": 8671.8, "total_tokens": 133919720} +{"current_steps": 198730, "total_steps": 204665, "loss": 0.0, "lr": 5.120551368044568e-09, "epoch": 4.8550069625974155, "percentage": 97.1, "elapsed_time": "4:17:23", "remaining_time": "0:07:41", "throughput": 8671.81, "total_tokens": 133922856} +{"current_steps": 198735, "total_steps": 204665, "loss": 0.0, "lr": 5.11193608921201e-09, "epoch": 4.855129113429262, "percentage": 97.1, "elapsed_time": "4:17:23", "remaining_time": "0:07:40", "throughput": 8671.84, "total_tokens": 133926312} +{"current_steps": 198740, "total_steps": 204665, "loss": 0.0, "lr": 5.103328045472977e-09, "epoch": 4.85525126426111, "percentage": 97.11, "elapsed_time": "4:17:24", "remaining_time": "0:07:40", "throughput": 8671.85, "total_tokens": 133929512} +{"current_steps": 198745, "total_steps": 204665, "loss": 0.0, "lr": 5.094727236890195e-09, "epoch": 4.855373415092957, "percentage": 97.11, "elapsed_time": "4:17:24", "remaining_time": "0:07:40", "throughput": 8671.88, "total_tokens": 133932968} +{"current_steps": 198750, "total_steps": 204665, "loss": 0.0695, "lr": 5.086133663526171e-09, "epoch": 4.855495565924804, "percentage": 97.11, "elapsed_time": "4:17:24", "remaining_time": "0:07:39", "throughput": 8671.9, "total_tokens": 133936360} +{"current_steps": 198755, "total_steps": 204665, "loss": 0.0, "lr": 5.0775473254434094e-09, "epoch": 4.855617716756651, "percentage": 97.11, "elapsed_time": "4:17:25", "remaining_time": "0:07:39", "throughput": 8671.92, "total_tokens": 133939560} +{"current_steps": 198760, "total_steps": 204665, "loss": 0.0, "lr": 5.068968222704307e-09, "epoch": 4.855739867588499, "percentage": 97.11, "elapsed_time": "4:17:25", "remaining_time": "0:07:38", "throughput": 8671.94, "total_tokens": 133942952} +{"current_steps": 198765, "total_steps": 204665, "loss": 0.0, "lr": 5.0603963553711435e-09, "epoch": 4.855862018420345, "percentage": 97.12, "elapsed_time": "4:17:25", "remaining_time": "0:07:38", "throughput": 8671.95, "total_tokens": 133945960} +{"current_steps": 198770, "total_steps": 204665, "loss": 0.0, "lr": 5.051831723506539e-09, "epoch": 4.855984169252192, "percentage": 97.12, "elapsed_time": "4:17:26", "remaining_time": "0:07:38", "throughput": 8671.96, "total_tokens": 133949096} +{"current_steps": 198775, "total_steps": 204665, "loss": 0.0, "lr": 5.043274327172553e-09, "epoch": 4.85610632008404, "percentage": 97.12, "elapsed_time": "4:17:26", "remaining_time": "0:07:37", "throughput": 8671.99, "total_tokens": 133952680} +{"current_steps": 198780, "total_steps": 204665, "loss": 0.0, "lr": 5.034724166431581e-09, "epoch": 4.856228470915887, "percentage": 97.12, "elapsed_time": "4:17:26", "remaining_time": "0:07:37", "throughput": 8672.0, "total_tokens": 133955816} +{"current_steps": 198785, "total_steps": 204665, "loss": 0.0, "lr": 5.026181241345573e-09, "epoch": 4.856350621747734, "percentage": 97.13, "elapsed_time": "4:17:27", "remaining_time": "0:07:36", "throughput": 8672.02, "total_tokens": 133959080} +{"current_steps": 198790, "total_steps": 204665, "loss": 0.0, "lr": 5.017645551976812e-09, "epoch": 4.856472772579581, "percentage": 97.13, "elapsed_time": "4:17:27", "remaining_time": "0:07:36", "throughput": 8672.04, "total_tokens": 133962344} +{"current_steps": 198795, "total_steps": 204665, "loss": 0.0, "lr": 5.009117098387472e-09, "epoch": 4.8565949234114285, "percentage": 97.13, "elapsed_time": "4:17:27", "remaining_time": "0:07:36", "throughput": 8672.05, "total_tokens": 133965544} +{"current_steps": 198800, "total_steps": 204665, "loss": 0.0, "lr": 5.000595880639391e-09, "epoch": 4.856717074243275, "percentage": 97.13, "elapsed_time": "4:17:28", "remaining_time": "0:07:35", "throughput": 8672.06, "total_tokens": 133968680} +{"current_steps": 198805, "total_steps": 204665, "loss": 0.0, "lr": 4.9920818987945205e-09, "epoch": 4.856839225075123, "percentage": 97.14, "elapsed_time": "4:17:28", "remaining_time": "0:07:35", "throughput": 8672.09, "total_tokens": 133972072} +{"current_steps": 198810, "total_steps": 204665, "loss": 0.0, "lr": 4.98357515291492e-09, "epoch": 4.85696137590697, "percentage": 97.14, "elapsed_time": "4:17:29", "remaining_time": "0:07:34", "throughput": 8672.14, "total_tokens": 133975976} +{"current_steps": 198815, "total_steps": 204665, "loss": 0.0, "lr": 4.975075643062321e-09, "epoch": 4.857083526738817, "percentage": 97.14, "elapsed_time": "4:17:29", "remaining_time": "0:07:34", "throughput": 8672.14, "total_tokens": 133978920} +{"current_steps": 198820, "total_steps": 204665, "loss": 0.0706, "lr": 4.966583369298782e-09, "epoch": 4.857205677570664, "percentage": 97.14, "elapsed_time": "4:17:29", "remaining_time": "0:07:34", "throughput": 8672.16, "total_tokens": 133982312} +{"current_steps": 198825, "total_steps": 204665, "loss": 0.0, "lr": 4.9580983316857005e-09, "epoch": 4.857327828402512, "percentage": 97.15, "elapsed_time": "4:17:30", "remaining_time": "0:07:33", "throughput": 8672.21, "total_tokens": 133986216} +{"current_steps": 198830, "total_steps": 204665, "loss": 0.0, "lr": 4.9496205302850256e-09, "epoch": 4.857449979234358, "percentage": 97.15, "elapsed_time": "4:17:30", "remaining_time": "0:07:33", "throughput": 8672.22, "total_tokens": 133989288} +{"current_steps": 198835, "total_steps": 204665, "loss": 0.0, "lr": 4.941149965158375e-09, "epoch": 4.857572130066206, "percentage": 97.15, "elapsed_time": "4:17:30", "remaining_time": "0:07:33", "throughput": 8672.25, "total_tokens": 133992680} +{"current_steps": 198840, "total_steps": 204665, "loss": 0.0, "lr": 4.932686636367256e-09, "epoch": 4.857694280898053, "percentage": 97.15, "elapsed_time": "4:17:31", "remaining_time": "0:07:32", "throughput": 8672.27, "total_tokens": 133996008} +{"current_steps": 198845, "total_steps": 204665, "loss": 0.0, "lr": 4.924230543973284e-09, "epoch": 4.8578164317299, "percentage": 97.16, "elapsed_time": "4:17:31", "remaining_time": "0:07:32", "throughput": 8672.3, "total_tokens": 133999528} +{"current_steps": 198850, "total_steps": 204665, "loss": 0.0, "lr": 4.915781688037967e-09, "epoch": 4.857938582561747, "percentage": 97.16, "elapsed_time": "4:17:31", "remaining_time": "0:07:31", "throughput": 8672.33, "total_tokens": 134002984} +{"current_steps": 198855, "total_steps": 204665, "loss": 0.0, "lr": 4.9073400686228115e-09, "epoch": 4.858060733393595, "percentage": 97.16, "elapsed_time": "4:17:32", "remaining_time": "0:07:31", "throughput": 8672.38, "total_tokens": 134007016} +{"current_steps": 198860, "total_steps": 204665, "loss": 0.0, "lr": 4.89890568578899e-09, "epoch": 4.8581828842254415, "percentage": 97.16, "elapsed_time": "4:17:32", "remaining_time": "0:07:31", "throughput": 8672.4, "total_tokens": 134010216} +{"current_steps": 198865, "total_steps": 204665, "loss": 0.0, "lr": 4.890478539598008e-09, "epoch": 4.858305035057288, "percentage": 97.17, "elapsed_time": "4:17:32", "remaining_time": "0:07:30", "throughput": 8672.41, "total_tokens": 134013416} +{"current_steps": 198870, "total_steps": 204665, "loss": 0.0, "lr": 4.8820586301112635e-09, "epoch": 4.858427185889136, "percentage": 97.17, "elapsed_time": "4:17:33", "remaining_time": "0:07:30", "throughput": 8672.43, "total_tokens": 134016680} +{"current_steps": 198875, "total_steps": 204665, "loss": 0.0, "lr": 4.873645957389705e-09, "epoch": 4.8585493367209835, "percentage": 97.17, "elapsed_time": "4:17:33", "remaining_time": "0:07:29", "throughput": 8672.44, "total_tokens": 134019816} +{"current_steps": 198880, "total_steps": 204665, "loss": 0.0, "lr": 4.865240521494729e-09, "epoch": 4.85867148755283, "percentage": 97.17, "elapsed_time": "4:17:33", "remaining_time": "0:07:29", "throughput": 8672.47, "total_tokens": 134023400} +{"current_steps": 198885, "total_steps": 204665, "loss": 0.0134, "lr": 4.8568423224872866e-09, "epoch": 4.858793638384677, "percentage": 97.18, "elapsed_time": "4:17:34", "remaining_time": "0:07:29", "throughput": 8672.49, "total_tokens": 134026664} +{"current_steps": 198890, "total_steps": 204665, "loss": 0.0, "lr": 4.848451360428551e-09, "epoch": 4.858915789216525, "percentage": 97.18, "elapsed_time": "4:17:34", "remaining_time": "0:07:28", "throughput": 8672.5, "total_tokens": 134029736} +{"current_steps": 198895, "total_steps": 204665, "loss": 0.0, "lr": 4.840067635379697e-09, "epoch": 4.859037940048371, "percentage": 97.18, "elapsed_time": "4:17:34", "remaining_time": "0:07:28", "throughput": 8672.51, "total_tokens": 134032808} +{"current_steps": 198900, "total_steps": 204665, "loss": 0.0, "lr": 4.83169114740134e-09, "epoch": 4.859160090880219, "percentage": 97.18, "elapsed_time": "4:17:35", "remaining_time": "0:07:27", "throughput": 8672.54, "total_tokens": 134036392} +{"current_steps": 198905, "total_steps": 204665, "loss": 0.0, "lr": 4.823321896554766e-09, "epoch": 4.859282241712066, "percentage": 97.19, "elapsed_time": "4:17:35", "remaining_time": "0:07:27", "throughput": 8672.58, "total_tokens": 134040168} +{"current_steps": 198910, "total_steps": 204665, "loss": 0.0, "lr": 4.814959882900482e-09, "epoch": 4.859404392543913, "percentage": 97.19, "elapsed_time": "4:17:35", "remaining_time": "0:07:27", "throughput": 8672.61, "total_tokens": 134043624} +{"current_steps": 198915, "total_steps": 204665, "loss": 0.0, "lr": 4.806605106499661e-09, "epoch": 4.85952654337576, "percentage": 97.19, "elapsed_time": "4:17:36", "remaining_time": "0:07:26", "throughput": 8672.61, "total_tokens": 134046440} +{"current_steps": 198920, "total_steps": 204665, "loss": 0.0001, "lr": 4.7982575674128115e-09, "epoch": 4.859648694207608, "percentage": 97.19, "elapsed_time": "4:17:36", "remaining_time": "0:07:26", "throughput": 8672.61, "total_tokens": 134049384} +{"current_steps": 198925, "total_steps": 204665, "loss": 0.0, "lr": 4.78991726570066e-09, "epoch": 4.859770845039455, "percentage": 97.2, "elapsed_time": "4:17:36", "remaining_time": "0:07:26", "throughput": 8672.61, "total_tokens": 134052392} +{"current_steps": 198930, "total_steps": 204665, "loss": 0.0, "lr": 4.7815842014239385e-09, "epoch": 4.859892995871302, "percentage": 97.2, "elapsed_time": "4:17:37", "remaining_time": "0:07:25", "throughput": 8672.64, "total_tokens": 134055976} +{"current_steps": 198935, "total_steps": 204665, "loss": 0.0, "lr": 4.7732583746432635e-09, "epoch": 4.860015146703149, "percentage": 97.2, "elapsed_time": "4:17:37", "remaining_time": "0:07:25", "throughput": 8672.7, "total_tokens": 134060008} +{"current_steps": 198940, "total_steps": 204665, "loss": 0.0, "lr": 4.764939785419031e-09, "epoch": 4.860137297534997, "percentage": 97.2, "elapsed_time": "4:17:38", "remaining_time": "0:07:24", "throughput": 8672.73, "total_tokens": 134063400} +{"current_steps": 198945, "total_steps": 204665, "loss": 0.0, "lr": 4.756628433811971e-09, "epoch": 4.860259448366843, "percentage": 97.21, "elapsed_time": "4:17:38", "remaining_time": "0:07:24", "throughput": 8672.76, "total_tokens": 134066920} +{"current_steps": 198950, "total_steps": 204665, "loss": 0.0, "lr": 4.7483243198823685e-09, "epoch": 4.860381599198691, "percentage": 97.21, "elapsed_time": "4:17:38", "remaining_time": "0:07:24", "throughput": 8672.78, "total_tokens": 134070312} +{"current_steps": 198955, "total_steps": 204665, "loss": 0.0, "lr": 4.740027443690509e-09, "epoch": 4.860503750030538, "percentage": 97.21, "elapsed_time": "4:17:39", "remaining_time": "0:07:23", "throughput": 8672.8, "total_tokens": 134073576} +{"current_steps": 198960, "total_steps": 204665, "loss": 0.0, "lr": 4.731737805297009e-09, "epoch": 4.860625900862384, "percentage": 97.21, "elapsed_time": "4:17:39", "remaining_time": "0:07:23", "throughput": 8672.81, "total_tokens": 134076840} +{"current_steps": 198965, "total_steps": 204665, "loss": 0.0001, "lr": 4.723455404761933e-09, "epoch": 4.860748051694232, "percentage": 97.21, "elapsed_time": "4:17:39", "remaining_time": "0:07:22", "throughput": 8672.83, "total_tokens": 134080040} +{"current_steps": 198970, "total_steps": 204665, "loss": 0.0, "lr": 4.715180242145678e-09, "epoch": 4.860870202526079, "percentage": 97.22, "elapsed_time": "4:17:40", "remaining_time": "0:07:22", "throughput": 8672.83, "total_tokens": 134082984} +{"current_steps": 198975, "total_steps": 204665, "loss": 0.0, "lr": 4.706912317508305e-09, "epoch": 4.8609923533579265, "percentage": 97.22, "elapsed_time": "4:17:40", "remaining_time": "0:07:22", "throughput": 8672.86, "total_tokens": 134086440} +{"current_steps": 198980, "total_steps": 204665, "loss": 0.0, "lr": 4.698651630909878e-09, "epoch": 4.861114504189773, "percentage": 97.22, "elapsed_time": "4:17:40", "remaining_time": "0:07:21", "throughput": 8672.88, "total_tokens": 134089768} +{"current_steps": 198985, "total_steps": 204665, "loss": 0.0, "lr": 4.690398182410682e-09, "epoch": 4.861236655021621, "percentage": 97.22, "elapsed_time": "4:17:41", "remaining_time": "0:07:21", "throughput": 8672.89, "total_tokens": 134092968} +{"current_steps": 198990, "total_steps": 204665, "loss": 0.0, "lr": 4.682151972070558e-09, "epoch": 4.861358805853468, "percentage": 97.23, "elapsed_time": "4:17:41", "remaining_time": "0:07:20", "throughput": 8672.9, "total_tokens": 134096168} +{"current_steps": 198995, "total_steps": 204665, "loss": 0.0, "lr": 4.673912999949459e-09, "epoch": 4.861480956685315, "percentage": 97.23, "elapsed_time": "4:17:41", "remaining_time": "0:07:20", "throughput": 8672.92, "total_tokens": 134099368} +{"current_steps": 199000, "total_steps": 204665, "loss": 0.0, "lr": 4.665681266107446e-09, "epoch": 4.861603107517162, "percentage": 97.23, "elapsed_time": "4:17:42", "remaining_time": "0:07:20", "throughput": 8672.98, "total_tokens": 134103528} +{"current_steps": 199005, "total_steps": 204665, "loss": 0.0, "lr": 4.657456770604362e-09, "epoch": 4.86172525834901, "percentage": 97.23, "elapsed_time": "4:17:42", "remaining_time": "0:07:19", "throughput": 8672.99, "total_tokens": 134106600} +{"current_steps": 199010, "total_steps": 204665, "loss": 0.0, "lr": 4.649239513499936e-09, "epoch": 4.861847409180856, "percentage": 97.24, "elapsed_time": "4:17:42", "remaining_time": "0:07:19", "throughput": 8673.01, "total_tokens": 134109864} +{"current_steps": 199015, "total_steps": 204665, "loss": 0.0, "lr": 4.641029494853899e-09, "epoch": 4.861969560012704, "percentage": 97.24, "elapsed_time": "4:17:43", "remaining_time": "0:07:18", "throughput": 8673.04, "total_tokens": 134113448} +{"current_steps": 199020, "total_steps": 204665, "loss": 0.0, "lr": 4.632826714725979e-09, "epoch": 4.862091710844551, "percentage": 97.24, "elapsed_time": "4:17:43", "remaining_time": "0:07:18", "throughput": 8673.05, "total_tokens": 134116648} +{"current_steps": 199025, "total_steps": 204665, "loss": 0.0, "lr": 4.624631173176019e-09, "epoch": 4.862213861676398, "percentage": 97.24, "elapsed_time": "4:17:43", "remaining_time": "0:07:18", "throughput": 8673.09, "total_tokens": 134120296} +{"current_steps": 199030, "total_steps": 204665, "loss": 0.0, "lr": 4.616442870263304e-09, "epoch": 4.862336012508245, "percentage": 97.25, "elapsed_time": "4:17:44", "remaining_time": "0:07:17", "throughput": 8673.11, "total_tokens": 134123624} +{"current_steps": 199035, "total_steps": 204665, "loss": 0.0, "lr": 4.608261806047675e-09, "epoch": 4.862458163340093, "percentage": 97.25, "elapsed_time": "4:17:44", "remaining_time": "0:07:17", "throughput": 8673.13, "total_tokens": 134126824} +{"current_steps": 199040, "total_steps": 204665, "loss": 0.0, "lr": 4.600087980588418e-09, "epoch": 4.8625803141719395, "percentage": 97.25, "elapsed_time": "4:17:44", "remaining_time": "0:07:17", "throughput": 8673.15, "total_tokens": 134130152} +{"current_steps": 199045, "total_steps": 204665, "loss": 0.0, "lr": 4.591921393945042e-09, "epoch": 4.862702465003787, "percentage": 97.25, "elapsed_time": "4:17:45", "remaining_time": "0:07:16", "throughput": 8673.18, "total_tokens": 134133800} +{"current_steps": 199050, "total_steps": 204665, "loss": 0.0, "lr": 4.583762046177053e-09, "epoch": 4.862824615835634, "percentage": 97.26, "elapsed_time": "4:17:45", "remaining_time": "0:07:16", "throughput": 8673.21, "total_tokens": 134137256} +{"current_steps": 199055, "total_steps": 204665, "loss": 0.0, "lr": 4.575609937343517e-09, "epoch": 4.862946766667481, "percentage": 97.26, "elapsed_time": "4:17:46", "remaining_time": "0:07:15", "throughput": 8673.24, "total_tokens": 134140648} +{"current_steps": 199060, "total_steps": 204665, "loss": 0.0, "lr": 4.567465067504051e-09, "epoch": 4.863068917499328, "percentage": 97.26, "elapsed_time": "4:17:46", "remaining_time": "0:07:15", "throughput": 8673.26, "total_tokens": 134144040} +{"current_steps": 199065, "total_steps": 204665, "loss": 0.0, "lr": 4.559327436717608e-09, "epoch": 4.863191068331175, "percentage": 97.26, "elapsed_time": "4:17:46", "remaining_time": "0:07:15", "throughput": 8673.32, "total_tokens": 134148200} +{"current_steps": 199070, "total_steps": 204665, "loss": 0.0, "lr": 4.5511970450434755e-09, "epoch": 4.863313219163023, "percentage": 97.27, "elapsed_time": "4:17:47", "remaining_time": "0:07:14", "throughput": 8673.35, "total_tokens": 134151528} +{"current_steps": 199075, "total_steps": 204665, "loss": 0.0, "lr": 4.543073892540828e-09, "epoch": 4.863435369994869, "percentage": 97.27, "elapsed_time": "4:17:47", "remaining_time": "0:07:14", "throughput": 8673.36, "total_tokens": 134154664} +{"current_steps": 199080, "total_steps": 204665, "loss": 0.0, "lr": 4.534957979268728e-09, "epoch": 4.863557520826717, "percentage": 97.27, "elapsed_time": "4:17:47", "remaining_time": "0:07:13", "throughput": 8673.36, "total_tokens": 134157672} +{"current_steps": 199085, "total_steps": 204665, "loss": 0.0, "lr": 4.526849305286129e-09, "epoch": 4.863679671658564, "percentage": 97.27, "elapsed_time": "4:17:48", "remaining_time": "0:07:13", "throughput": 8673.37, "total_tokens": 134160744} +{"current_steps": 199090, "total_steps": 204665, "loss": 0.0, "lr": 4.518747870651985e-09, "epoch": 4.863801822490411, "percentage": 97.28, "elapsed_time": "4:17:48", "remaining_time": "0:07:13", "throughput": 8673.41, "total_tokens": 134164392} +{"current_steps": 199095, "total_steps": 204665, "loss": 0.0, "lr": 4.510653675425358e-09, "epoch": 4.863923973322258, "percentage": 97.28, "elapsed_time": "4:17:48", "remaining_time": "0:07:12", "throughput": 8673.4, "total_tokens": 134167208} +{"current_steps": 199100, "total_steps": 204665, "loss": 0.0, "lr": 4.502566719664869e-09, "epoch": 4.864046124154106, "percentage": 97.28, "elapsed_time": "4:17:49", "remaining_time": "0:07:12", "throughput": 8673.41, "total_tokens": 134170280} +{"current_steps": 199105, "total_steps": 204665, "loss": 0.0, "lr": 4.494487003429581e-09, "epoch": 4.8641682749859525, "percentage": 97.28, "elapsed_time": "4:17:49", "remaining_time": "0:07:11", "throughput": 8673.43, "total_tokens": 134173736} +{"current_steps": 199110, "total_steps": 204665, "loss": 0.0, "lr": 4.486414526778115e-09, "epoch": 4.8642904258178, "percentage": 97.29, "elapsed_time": "4:17:49", "remaining_time": "0:07:11", "throughput": 8673.45, "total_tokens": 134177000} +{"current_steps": 199115, "total_steps": 204665, "loss": 0.0, "lr": 4.478349289769201e-09, "epoch": 4.864412576649647, "percentage": 97.29, "elapsed_time": "4:17:50", "remaining_time": "0:07:11", "throughput": 8673.5, "total_tokens": 134180904} +{"current_steps": 199120, "total_steps": 204665, "loss": 0.0, "lr": 4.470291292461459e-09, "epoch": 4.8645347274814945, "percentage": 97.29, "elapsed_time": "4:17:50", "remaining_time": "0:07:10", "throughput": 8673.52, "total_tokens": 134184168} +{"current_steps": 199125, "total_steps": 204665, "loss": 0.0, "lr": 4.462240534913508e-09, "epoch": 4.864656878313341, "percentage": 97.29, "elapsed_time": "4:17:50", "remaining_time": "0:07:10", "throughput": 8673.56, "total_tokens": 134187944} +{"current_steps": 199130, "total_steps": 204665, "loss": 0.0, "lr": 4.45419701718397e-09, "epoch": 4.864779029145188, "percentage": 97.3, "elapsed_time": "4:17:51", "remaining_time": "0:07:10", "throughput": 8673.58, "total_tokens": 134191208} +{"current_steps": 199135, "total_steps": 204665, "loss": 0.0, "lr": 4.446160739331239e-09, "epoch": 4.864901179977036, "percentage": 97.3, "elapsed_time": "4:17:51", "remaining_time": "0:07:09", "throughput": 8673.6, "total_tokens": 134194472} +{"current_steps": 199140, "total_steps": 204665, "loss": 0.0, "lr": 4.4381317014138274e-09, "epoch": 4.865023330808883, "percentage": 97.3, "elapsed_time": "4:17:51", "remaining_time": "0:07:09", "throughput": 8673.6, "total_tokens": 134197480} +{"current_steps": 199145, "total_steps": 204665, "loss": 0.0, "lr": 4.4301099034901315e-09, "epoch": 4.86514548164073, "percentage": 97.3, "elapsed_time": "4:17:52", "remaining_time": "0:07:08", "throughput": 8673.64, "total_tokens": 134201128} +{"current_steps": 199150, "total_steps": 204665, "loss": 0.0, "lr": 4.422095345618437e-09, "epoch": 4.865267632472577, "percentage": 97.31, "elapsed_time": "4:17:52", "remaining_time": "0:07:08", "throughput": 8673.69, "total_tokens": 134204968} +{"current_steps": 199155, "total_steps": 204665, "loss": 0.0, "lr": 4.414088027857032e-09, "epoch": 4.865389783304424, "percentage": 97.31, "elapsed_time": "4:17:53", "remaining_time": "0:07:08", "throughput": 8673.72, "total_tokens": 134208616} +{"current_steps": 199160, "total_steps": 204665, "loss": 0.0, "lr": 4.406087950264092e-09, "epoch": 4.865511934136271, "percentage": 97.31, "elapsed_time": "4:17:53", "remaining_time": "0:07:07", "throughput": 8673.74, "total_tokens": 134211752} +{"current_steps": 199165, "total_steps": 204665, "loss": 0.0, "lr": 4.398095112898015e-09, "epoch": 4.865634084968119, "percentage": 97.31, "elapsed_time": "4:17:53", "remaining_time": "0:07:07", "throughput": 8673.76, "total_tokens": 134215144} +{"current_steps": 199170, "total_steps": 204665, "loss": 0.0, "lr": 4.390109515816642e-09, "epoch": 4.8657562357999655, "percentage": 97.32, "elapsed_time": "4:17:54", "remaining_time": "0:07:06", "throughput": 8673.79, "total_tokens": 134218728} +{"current_steps": 199175, "total_steps": 204665, "loss": 0.0, "lr": 4.3821311590781505e-09, "epoch": 4.865878386631813, "percentage": 97.32, "elapsed_time": "4:17:54", "remaining_time": "0:07:06", "throughput": 8673.83, "total_tokens": 134222440} +{"current_steps": 199180, "total_steps": 204665, "loss": 0.0, "lr": 4.374160042740716e-09, "epoch": 4.86600053746366, "percentage": 97.32, "elapsed_time": "4:17:54", "remaining_time": "0:07:06", "throughput": 8673.85, "total_tokens": 134225640} +{"current_steps": 199185, "total_steps": 204665, "loss": 0.0, "lr": 4.366196166862179e-09, "epoch": 4.8661226882955075, "percentage": 97.32, "elapsed_time": "4:17:55", "remaining_time": "0:07:05", "throughput": 8673.88, "total_tokens": 134229096} +{"current_steps": 199190, "total_steps": 204665, "loss": 0.0, "lr": 4.358239531500385e-09, "epoch": 4.866244839127354, "percentage": 97.32, "elapsed_time": "4:17:55", "remaining_time": "0:07:05", "throughput": 8673.89, "total_tokens": 134232360} +{"current_steps": 199195, "total_steps": 204665, "loss": 0.0, "lr": 4.3502901367132864e-09, "epoch": 4.866366989959202, "percentage": 97.33, "elapsed_time": "4:17:55", "remaining_time": "0:07:04", "throughput": 8673.9, "total_tokens": 134235432} +{"current_steps": 199200, "total_steps": 204665, "loss": 0.0, "lr": 4.342347982558614e-09, "epoch": 4.866489140791049, "percentage": 97.33, "elapsed_time": "4:17:56", "remaining_time": "0:07:04", "throughput": 8673.96, "total_tokens": 134239400} +{"current_steps": 199205, "total_steps": 204665, "loss": 0.0, "lr": 4.334413069094322e-09, "epoch": 4.866611291622896, "percentage": 97.33, "elapsed_time": "4:17:56", "remaining_time": "0:07:04", "throughput": 8673.99, "total_tokens": 134242984} +{"current_steps": 199210, "total_steps": 204665, "loss": 0.0, "lr": 4.326485396377921e-09, "epoch": 4.866733442454743, "percentage": 97.33, "elapsed_time": "4:17:56", "remaining_time": "0:07:03", "throughput": 8674.02, "total_tokens": 134246440} +{"current_steps": 199215, "total_steps": 204665, "loss": 0.0, "lr": 4.318564964467031e-09, "epoch": 4.866855593286591, "percentage": 97.34, "elapsed_time": "4:17:57", "remaining_time": "0:07:03", "throughput": 8674.05, "total_tokens": 134249896} +{"current_steps": 199220, "total_steps": 204665, "loss": 0.0, "lr": 4.3106517734194935e-09, "epoch": 4.866977744118437, "percentage": 97.34, "elapsed_time": "4:17:57", "remaining_time": "0:07:03", "throughput": 8674.05, "total_tokens": 134252968} +{"current_steps": 199225, "total_steps": 204665, "loss": 0.0, "lr": 4.302745823292598e-09, "epoch": 4.867099894950284, "percentage": 97.34, "elapsed_time": "4:17:57", "remaining_time": "0:07:02", "throughput": 8674.06, "total_tokens": 134255976} +{"current_steps": 199230, "total_steps": 204665, "loss": 0.0, "lr": 4.294847114143963e-09, "epoch": 4.867222045782132, "percentage": 97.34, "elapsed_time": "4:17:58", "remaining_time": "0:07:02", "throughput": 8674.11, "total_tokens": 134259944} +{"current_steps": 199235, "total_steps": 204665, "loss": 0.0, "lr": 4.286955646030988e-09, "epoch": 4.867344196613979, "percentage": 97.35, "elapsed_time": "4:17:58", "remaining_time": "0:07:01", "throughput": 8674.12, "total_tokens": 134263080} +{"current_steps": 199240, "total_steps": 204665, "loss": 0.0, "lr": 4.279071419011182e-09, "epoch": 4.867466347445826, "percentage": 97.35, "elapsed_time": "4:17:58", "remaining_time": "0:07:01", "throughput": 8674.14, "total_tokens": 134266280} +{"current_steps": 199245, "total_steps": 204665, "loss": 0.0, "lr": 4.271194433141723e-09, "epoch": 4.867588498277673, "percentage": 97.35, "elapsed_time": "4:17:59", "remaining_time": "0:07:01", "throughput": 8674.17, "total_tokens": 134269864} +{"current_steps": 199250, "total_steps": 204665, "loss": 0.0, "lr": 4.263324688480008e-09, "epoch": 4.867710649109521, "percentage": 97.35, "elapsed_time": "4:17:59", "remaining_time": "0:07:00", "throughput": 8674.19, "total_tokens": 134273128} +{"current_steps": 199255, "total_steps": 204665, "loss": 0.0, "lr": 4.255462185083103e-09, "epoch": 4.867832799941367, "percentage": 97.36, "elapsed_time": "4:17:59", "remaining_time": "0:07:00", "throughput": 8674.21, "total_tokens": 134276456} +{"current_steps": 199260, "total_steps": 204665, "loss": 0.0, "lr": 4.2476069230084066e-09, "epoch": 4.867954950773215, "percentage": 97.36, "elapsed_time": "4:18:00", "remaining_time": "0:06:59", "throughput": 8674.22, "total_tokens": 134279592} +{"current_steps": 199265, "total_steps": 204665, "loss": 0.0001, "lr": 4.239758902312873e-09, "epoch": 4.868077101605062, "percentage": 97.36, "elapsed_time": "4:18:00", "remaining_time": "0:06:59", "throughput": 8674.34, "total_tokens": 134284840} +{"current_steps": 199270, "total_steps": 204665, "loss": 0.0, "lr": 4.231918123053679e-09, "epoch": 4.868199252436909, "percentage": 97.36, "elapsed_time": "4:18:01", "remaining_time": "0:06:59", "throughput": 8674.37, "total_tokens": 134288424} +{"current_steps": 199275, "total_steps": 204665, "loss": 0.0, "lr": 4.22408458528778e-09, "epoch": 4.868321403268756, "percentage": 97.37, "elapsed_time": "4:18:01", "remaining_time": "0:06:58", "throughput": 8674.38, "total_tokens": 134291560} +{"current_steps": 199280, "total_steps": 204665, "loss": 0.0, "lr": 4.216258289072128e-09, "epoch": 4.868443554100604, "percentage": 97.37, "elapsed_time": "4:18:01", "remaining_time": "0:06:58", "throughput": 8674.4, "total_tokens": 134294760} +{"current_steps": 199285, "total_steps": 204665, "loss": 0.0, "lr": 4.20843923446379e-09, "epoch": 4.8685657049324504, "percentage": 97.37, "elapsed_time": "4:18:02", "remaining_time": "0:06:57", "throughput": 8674.45, "total_tokens": 134298600} +{"current_steps": 199290, "total_steps": 204665, "loss": 0.0, "lr": 4.200627421519498e-09, "epoch": 4.868687855764298, "percentage": 97.37, "elapsed_time": "4:18:02", "remaining_time": "0:06:57", "throughput": 8674.47, "total_tokens": 134301864} +{"current_steps": 199295, "total_steps": 204665, "loss": 0.0, "lr": 4.192822850295985e-09, "epoch": 4.868810006596145, "percentage": 97.38, "elapsed_time": "4:18:02", "remaining_time": "0:06:57", "throughput": 8674.48, "total_tokens": 134305064} +{"current_steps": 199300, "total_steps": 204665, "loss": 0.0, "lr": 4.185025520850205e-09, "epoch": 4.8689321574279925, "percentage": 97.38, "elapsed_time": "4:18:03", "remaining_time": "0:06:56", "throughput": 8674.54, "total_tokens": 134309224} +{"current_steps": 199305, "total_steps": 204665, "loss": 0.0, "lr": 4.1772354332386686e-09, "epoch": 4.869054308259839, "percentage": 97.38, "elapsed_time": "4:18:03", "remaining_time": "0:06:56", "throughput": 8674.56, "total_tokens": 134312488} +{"current_steps": 199310, "total_steps": 204665, "loss": 0.0, "lr": 4.169452587518219e-09, "epoch": 4.869176459091687, "percentage": 97.38, "elapsed_time": "4:18:03", "remaining_time": "0:06:56", "throughput": 8674.61, "total_tokens": 134316328} +{"current_steps": 199315, "total_steps": 204665, "loss": 0.0, "lr": 4.161676983745255e-09, "epoch": 4.869298609923534, "percentage": 97.39, "elapsed_time": "4:18:04", "remaining_time": "0:06:55", "throughput": 8674.63, "total_tokens": 134319592} +{"current_steps": 199320, "total_steps": 204665, "loss": 0.0, "lr": 4.15390862197651e-09, "epoch": 4.86942076075538, "percentage": 97.39, "elapsed_time": "4:18:04", "remaining_time": "0:06:55", "throughput": 8674.64, "total_tokens": 134322856} +{"current_steps": 199325, "total_steps": 204665, "loss": 0.0, "lr": 4.146147502268383e-09, "epoch": 4.869542911587228, "percentage": 97.39, "elapsed_time": "4:18:04", "remaining_time": "0:06:54", "throughput": 8674.66, "total_tokens": 134326056} +{"current_steps": 199330, "total_steps": 204665, "loss": 0.0, "lr": 4.138393624677272e-09, "epoch": 4.869665062419075, "percentage": 97.39, "elapsed_time": "4:18:05", "remaining_time": "0:06:54", "throughput": 8674.68, "total_tokens": 134329384} +{"current_steps": 199335, "total_steps": 204665, "loss": 0.0, "lr": 4.13064698925969e-09, "epoch": 4.869787213250922, "percentage": 97.4, "elapsed_time": "4:18:05", "remaining_time": "0:06:54", "throughput": 8674.68, "total_tokens": 134332328} +{"current_steps": 199340, "total_steps": 204665, "loss": 0.0, "lr": 4.122907596071812e-09, "epoch": 4.869909364082769, "percentage": 97.4, "elapsed_time": "4:18:05", "remaining_time": "0:06:53", "throughput": 8674.7, "total_tokens": 134335720} +{"current_steps": 199345, "total_steps": 204665, "loss": 0.0, "lr": 4.115175445170038e-09, "epoch": 4.870031514914617, "percentage": 97.4, "elapsed_time": "4:18:06", "remaining_time": "0:06:53", "throughput": 8674.75, "total_tokens": 134339560} +{"current_steps": 199350, "total_steps": 204665, "loss": 0.0, "lr": 4.107450536610657e-09, "epoch": 4.8701536657464635, "percentage": 97.4, "elapsed_time": "4:18:06", "remaining_time": "0:06:52", "throughput": 8674.76, "total_tokens": 134342632} +{"current_steps": 199355, "total_steps": 204665, "loss": 0.0, "lr": 4.099732870449624e-09, "epoch": 4.870275816578311, "percentage": 97.41, "elapsed_time": "4:18:06", "remaining_time": "0:06:52", "throughput": 8674.79, "total_tokens": 134346088} +{"current_steps": 199360, "total_steps": 204665, "loss": 0.0, "lr": 4.092022446743337e-09, "epoch": 4.870397967410158, "percentage": 97.41, "elapsed_time": "4:18:07", "remaining_time": "0:06:52", "throughput": 8674.84, "total_tokens": 134350056} +{"current_steps": 199365, "total_steps": 204665, "loss": 0.0, "lr": 4.084319265547531e-09, "epoch": 4.8705201182420055, "percentage": 97.41, "elapsed_time": "4:18:07", "remaining_time": "0:06:51", "throughput": 8674.86, "total_tokens": 134353256} +{"current_steps": 199370, "total_steps": 204665, "loss": 0.0, "lr": 4.076623326918604e-09, "epoch": 4.870642269073852, "percentage": 97.41, "elapsed_time": "4:18:08", "remaining_time": "0:06:51", "throughput": 8674.87, "total_tokens": 134356456} +{"current_steps": 199375, "total_steps": 204665, "loss": 0.0, "lr": 4.068934630912291e-09, "epoch": 4.8707644199057, "percentage": 97.42, "elapsed_time": "4:18:08", "remaining_time": "0:06:50", "throughput": 8674.87, "total_tokens": 134359464} +{"current_steps": 199380, "total_steps": 204665, "loss": 0.0, "lr": 4.061253177584545e-09, "epoch": 4.870886570737547, "percentage": 97.42, "elapsed_time": "4:18:08", "remaining_time": "0:06:50", "throughput": 8674.89, "total_tokens": 134362664} +{"current_steps": 199385, "total_steps": 204665, "loss": 0.0, "lr": 4.053578966991211e-09, "epoch": 4.871008721569394, "percentage": 97.42, "elapsed_time": "4:18:09", "remaining_time": "0:06:50", "throughput": 8674.91, "total_tokens": 134365928} +{"current_steps": 199390, "total_steps": 204665, "loss": 0.0, "lr": 4.045911999188245e-09, "epoch": 4.871130872401241, "percentage": 97.42, "elapsed_time": "4:18:09", "remaining_time": "0:06:49", "throughput": 8674.93, "total_tokens": 134369256} +{"current_steps": 199395, "total_steps": 204665, "loss": 0.0, "lr": 4.038252274231157e-09, "epoch": 4.871253023233088, "percentage": 97.43, "elapsed_time": "4:18:09", "remaining_time": "0:06:49", "throughput": 8674.97, "total_tokens": 134372968} +{"current_steps": 199400, "total_steps": 204665, "loss": 0.0, "lr": 4.030599792175904e-09, "epoch": 4.871375174064935, "percentage": 97.43, "elapsed_time": "4:18:10", "remaining_time": "0:06:49", "throughput": 8675.0, "total_tokens": 134376488} +{"current_steps": 199405, "total_steps": 204665, "loss": 0.0, "lr": 4.022954553077884e-09, "epoch": 4.871497324896783, "percentage": 97.43, "elapsed_time": "4:18:10", "remaining_time": "0:06:48", "throughput": 8675.0, "total_tokens": 134379560} +{"current_steps": 199410, "total_steps": 204665, "loss": 0.0, "lr": 4.015316556992943e-09, "epoch": 4.87161947572863, "percentage": 97.43, "elapsed_time": "4:18:10", "remaining_time": "0:06:48", "throughput": 8675.03, "total_tokens": 134383016} +{"current_steps": 199415, "total_steps": 204665, "loss": 0.079, "lr": 4.007685803976479e-09, "epoch": 4.8717416265604765, "percentage": 97.43, "elapsed_time": "4:18:11", "remaining_time": "0:06:47", "throughput": 8675.05, "total_tokens": 134386344} +{"current_steps": 199420, "total_steps": 204665, "loss": 0.0, "lr": 4.0000622940838945e-09, "epoch": 4.871863777392324, "percentage": 97.44, "elapsed_time": "4:18:11", "remaining_time": "0:06:47", "throughput": 8675.09, "total_tokens": 134390056} +{"current_steps": 199425, "total_steps": 204665, "loss": 0.0, "lr": 3.99244602737081e-09, "epoch": 4.871985928224171, "percentage": 97.44, "elapsed_time": "4:18:11", "remaining_time": "0:06:47", "throughput": 8675.12, "total_tokens": 134393448} +{"current_steps": 199430, "total_steps": 204665, "loss": 0.0002, "lr": 3.9848370038926275e-09, "epoch": 4.8721080790560185, "percentage": 97.44, "elapsed_time": "4:18:12", "remaining_time": "0:06:46", "throughput": 8675.13, "total_tokens": 134396712} +{"current_steps": 199435, "total_steps": 204665, "loss": 0.0, "lr": 3.977235223704523e-09, "epoch": 4.872230229887865, "percentage": 97.44, "elapsed_time": "4:18:12", "remaining_time": "0:06:46", "throughput": 8675.13, "total_tokens": 134399656} +{"current_steps": 199440, "total_steps": 204665, "loss": 0.0, "lr": 3.969640686861897e-09, "epoch": 4.872352380719713, "percentage": 97.45, "elapsed_time": "4:18:12", "remaining_time": "0:06:45", "throughput": 8675.17, "total_tokens": 134403304} +{"current_steps": 199445, "total_steps": 204665, "loss": 0.0, "lr": 3.962053393419929e-09, "epoch": 4.87247453155156, "percentage": 97.45, "elapsed_time": "4:18:13", "remaining_time": "0:06:45", "throughput": 8675.17, "total_tokens": 134406184} +{"current_steps": 199450, "total_steps": 204665, "loss": 0.0, "lr": 3.954473343433795e-09, "epoch": 4.872596682383407, "percentage": 97.45, "elapsed_time": "4:18:13", "remaining_time": "0:06:45", "throughput": 8675.21, "total_tokens": 134409960} +{"current_steps": 199455, "total_steps": 204665, "loss": 0.0, "lr": 3.946900536958675e-09, "epoch": 4.872718833215254, "percentage": 97.45, "elapsed_time": "4:18:13", "remaining_time": "0:06:44", "throughput": 8675.24, "total_tokens": 134413480} +{"current_steps": 199460, "total_steps": 204665, "loss": 0.0, "lr": 3.939334974049635e-09, "epoch": 4.872840984047102, "percentage": 97.46, "elapsed_time": "4:18:14", "remaining_time": "0:06:44", "throughput": 8675.27, "total_tokens": 134416872} +{"current_steps": 199465, "total_steps": 204665, "loss": 0.0, "lr": 3.931776654761631e-09, "epoch": 4.872963134878948, "percentage": 97.46, "elapsed_time": "4:18:14", "remaining_time": "0:06:43", "throughput": 8675.28, "total_tokens": 134420008} +{"current_steps": 199470, "total_steps": 204665, "loss": 0.0, "lr": 3.924225579149621e-09, "epoch": 4.873085285710796, "percentage": 97.46, "elapsed_time": "4:18:14", "remaining_time": "0:06:43", "throughput": 8675.29, "total_tokens": 134423272} +{"current_steps": 199475, "total_steps": 204665, "loss": 0.0, "lr": 3.916681747268558e-09, "epoch": 4.873207436542643, "percentage": 97.46, "elapsed_time": "4:18:15", "remaining_time": "0:06:43", "throughput": 8675.31, "total_tokens": 134426536} +{"current_steps": 199480, "total_steps": 204665, "loss": 0.0, "lr": 3.909145159173289e-09, "epoch": 4.87332958737449, "percentage": 97.47, "elapsed_time": "4:18:15", "remaining_time": "0:06:42", "throughput": 8675.37, "total_tokens": 134430568} +{"current_steps": 199485, "total_steps": 204665, "loss": 0.0, "lr": 3.901615814918657e-09, "epoch": 4.873451738206337, "percentage": 97.47, "elapsed_time": "4:18:16", "remaining_time": "0:06:42", "throughput": 8675.37, "total_tokens": 134433640} +{"current_steps": 199490, "total_steps": 204665, "loss": 0.0, "lr": 3.894093714559399e-09, "epoch": 4.873573889038184, "percentage": 97.47, "elapsed_time": "4:18:16", "remaining_time": "0:06:41", "throughput": 8675.39, "total_tokens": 134436904} +{"current_steps": 199495, "total_steps": 204665, "loss": 0.0, "lr": 3.886578858150247e-09, "epoch": 4.8736960398700315, "percentage": 97.47, "elapsed_time": "4:18:16", "remaining_time": "0:06:41", "throughput": 8675.41, "total_tokens": 134440168} +{"current_steps": 199500, "total_steps": 204665, "loss": 0.0, "lr": 3.879071245745713e-09, "epoch": 4.873818190701879, "percentage": 97.48, "elapsed_time": "4:18:17", "remaining_time": "0:06:41", "throughput": 8675.43, "total_tokens": 134443496} +{"current_steps": 199505, "total_steps": 204665, "loss": 0.0, "lr": 3.871570877400643e-09, "epoch": 4.873940341533726, "percentage": 97.48, "elapsed_time": "4:18:17", "remaining_time": "0:06:40", "throughput": 8675.47, "total_tokens": 134447208} +{"current_steps": 199510, "total_steps": 204665, "loss": 0.0, "lr": 3.864077753169326e-09, "epoch": 4.874062492365573, "percentage": 97.48, "elapsed_time": "4:18:17", "remaining_time": "0:06:40", "throughput": 8675.5, "total_tokens": 134450792} +{"current_steps": 199515, "total_steps": 204665, "loss": 0.0, "lr": 3.8565918731063855e-09, "epoch": 4.87418464319742, "percentage": 97.48, "elapsed_time": "4:18:18", "remaining_time": "0:06:40", "throughput": 8675.57, "total_tokens": 134454952} +{"current_steps": 199520, "total_steps": 204665, "loss": 0.0, "lr": 3.849113237266222e-09, "epoch": 4.874306794029267, "percentage": 97.49, "elapsed_time": "4:18:18", "remaining_time": "0:06:39", "throughput": 8675.57, "total_tokens": 134457960} +{"current_steps": 199525, "total_steps": 204665, "loss": 0.0, "lr": 3.8416418457032365e-09, "epoch": 4.874428944861115, "percentage": 97.49, "elapsed_time": "4:18:18", "remaining_time": "0:06:39", "throughput": 8675.61, "total_tokens": 134461608} +{"current_steps": 199530, "total_steps": 204665, "loss": 0.0, "lr": 3.83417769847183e-09, "epoch": 4.874551095692961, "percentage": 97.49, "elapsed_time": "4:18:19", "remaining_time": "0:06:38", "throughput": 8675.61, "total_tokens": 134464360} +{"current_steps": 199535, "total_steps": 204665, "loss": 0.0, "lr": 3.826720795626181e-09, "epoch": 4.874673246524809, "percentage": 97.49, "elapsed_time": "4:18:19", "remaining_time": "0:06:38", "throughput": 8675.64, "total_tokens": 134467944} +{"current_steps": 199540, "total_steps": 204665, "loss": 0.0, "lr": 3.819271137220581e-09, "epoch": 4.874795397356656, "percentage": 97.5, "elapsed_time": "4:18:19", "remaining_time": "0:06:38", "throughput": 8675.64, "total_tokens": 134470952} +{"current_steps": 199545, "total_steps": 204665, "loss": 0.0, "lr": 3.8118287233090965e-09, "epoch": 4.874917548188503, "percentage": 97.5, "elapsed_time": "4:18:20", "remaining_time": "0:06:37", "throughput": 8675.68, "total_tokens": 134474536} +{"current_steps": 199550, "total_steps": 204665, "loss": 0.0, "lr": 3.804393553946017e-09, "epoch": 4.87503969902035, "percentage": 97.5, "elapsed_time": "4:18:20", "remaining_time": "0:06:37", "throughput": 8675.7, "total_tokens": 134477800} +{"current_steps": 199555, "total_steps": 204665, "loss": 0.0, "lr": 3.7969656291853e-09, "epoch": 4.875161849852198, "percentage": 97.5, "elapsed_time": "4:18:20", "remaining_time": "0:06:36", "throughput": 8675.71, "total_tokens": 134481000} +{"current_steps": 199560, "total_steps": 204665, "loss": 0.0, "lr": 3.789544949081014e-09, "epoch": 4.8752840006840445, "percentage": 97.51, "elapsed_time": "4:18:21", "remaining_time": "0:06:36", "throughput": 8675.74, "total_tokens": 134484584} +{"current_steps": 199565, "total_steps": 204665, "loss": 0.0, "lr": 3.7821315136871145e-09, "epoch": 4.875406151515892, "percentage": 97.51, "elapsed_time": "4:18:21", "remaining_time": "0:06:36", "throughput": 8675.78, "total_tokens": 134488168} +{"current_steps": 199570, "total_steps": 204665, "loss": 0.0, "lr": 3.774725323057449e-09, "epoch": 4.875528302347739, "percentage": 97.51, "elapsed_time": "4:18:21", "remaining_time": "0:06:35", "throughput": 8675.8, "total_tokens": 134491432} +{"current_steps": 199575, "total_steps": 204665, "loss": 0.0, "lr": 3.767326377245972e-09, "epoch": 4.875650453179587, "percentage": 97.51, "elapsed_time": "4:18:22", "remaining_time": "0:06:35", "throughput": 8675.82, "total_tokens": 134494760} +{"current_steps": 199580, "total_steps": 204665, "loss": 0.0, "lr": 3.75993467630642e-09, "epoch": 4.875772604011433, "percentage": 97.52, "elapsed_time": "4:18:22", "remaining_time": "0:06:34", "throughput": 8675.83, "total_tokens": 134497896} +{"current_steps": 199585, "total_steps": 204665, "loss": 0.0, "lr": 3.752550220292638e-09, "epoch": 4.87589475484328, "percentage": 97.52, "elapsed_time": "4:18:22", "remaining_time": "0:06:34", "throughput": 8675.88, "total_tokens": 134501736} +{"current_steps": 199590, "total_steps": 204665, "loss": 0.0, "lr": 3.745173009258252e-09, "epoch": 4.876016905675128, "percentage": 97.52, "elapsed_time": "4:18:23", "remaining_time": "0:06:34", "throughput": 8675.9, "total_tokens": 134505128} +{"current_steps": 199595, "total_steps": 204665, "loss": 0.0, "lr": 3.737803043256993e-09, "epoch": 4.876139056506974, "percentage": 97.52, "elapsed_time": "4:18:23", "remaining_time": "0:06:33", "throughput": 8675.93, "total_tokens": 134508712} +{"current_steps": 199600, "total_steps": 204665, "loss": 0.0, "lr": 3.730440322342266e-09, "epoch": 4.876261207338822, "percentage": 97.53, "elapsed_time": "4:18:24", "remaining_time": "0:06:33", "throughput": 8675.97, "total_tokens": 134512424} +{"current_steps": 199605, "total_steps": 204665, "loss": 0.0, "lr": 3.7230848465678033e-09, "epoch": 4.876383358170669, "percentage": 97.53, "elapsed_time": "4:18:24", "remaining_time": "0:06:33", "throughput": 8676.0, "total_tokens": 134515816} +{"current_steps": 199610, "total_steps": 204665, "loss": 0.0, "lr": 3.7157366159870086e-09, "epoch": 4.8765055090025164, "percentage": 97.53, "elapsed_time": "4:18:24", "remaining_time": "0:06:32", "throughput": 8676.02, "total_tokens": 134519144} +{"current_steps": 199615, "total_steps": 204665, "loss": 0.0, "lr": 3.7083956306533936e-09, "epoch": 4.876627659834363, "percentage": 97.53, "elapsed_time": "4:18:25", "remaining_time": "0:06:32", "throughput": 8676.04, "total_tokens": 134522536} +{"current_steps": 199620, "total_steps": 204665, "loss": 0.0, "lr": 3.7010618906202494e-09, "epoch": 4.876749810666211, "percentage": 97.53, "elapsed_time": "4:18:25", "remaining_time": "0:06:31", "throughput": 8676.05, "total_tokens": 134525544} +{"current_steps": 199625, "total_steps": 204665, "loss": 0.0, "lr": 3.693735395940978e-09, "epoch": 4.876871961498058, "percentage": 97.54, "elapsed_time": "4:18:25", "remaining_time": "0:06:31", "throughput": 8676.06, "total_tokens": 134528808} +{"current_steps": 199630, "total_steps": 204665, "loss": 0.0, "lr": 3.6864161466688694e-09, "epoch": 4.876994112329905, "percentage": 97.54, "elapsed_time": "4:18:26", "remaining_time": "0:06:31", "throughput": 8676.09, "total_tokens": 134532264} +{"current_steps": 199635, "total_steps": 204665, "loss": 0.0, "lr": 3.6791041428569926e-09, "epoch": 4.877116263161752, "percentage": 97.54, "elapsed_time": "4:18:26", "remaining_time": "0:06:30", "throughput": 8676.1, "total_tokens": 134535400} +{"current_steps": 199640, "total_steps": 204665, "loss": 0.0, "lr": 3.6717993845587493e-09, "epoch": 4.8772384139936, "percentage": 97.54, "elapsed_time": "4:18:26", "remaining_time": "0:06:30", "throughput": 8676.16, "total_tokens": 134539432} +{"current_steps": 199645, "total_steps": 204665, "loss": 0.0, "lr": 3.6645018718272082e-09, "epoch": 4.877360564825446, "percentage": 97.55, "elapsed_time": "4:18:27", "remaining_time": "0:06:29", "throughput": 8676.18, "total_tokens": 134542632} +{"current_steps": 199650, "total_steps": 204665, "loss": 0.0, "lr": 3.6572116047153267e-09, "epoch": 4.877482715657294, "percentage": 97.55, "elapsed_time": "4:18:27", "remaining_time": "0:06:29", "throughput": 8676.2, "total_tokens": 134546024} +{"current_steps": 199655, "total_steps": 204665, "loss": 0.0, "lr": 3.649928583276174e-09, "epoch": 4.877604866489141, "percentage": 97.55, "elapsed_time": "4:18:27", "remaining_time": "0:06:29", "throughput": 8676.21, "total_tokens": 134549160} +{"current_steps": 199660, "total_steps": 204665, "loss": 0.0, "lr": 3.6426528075627073e-09, "epoch": 4.8777270173209875, "percentage": 97.55, "elapsed_time": "4:18:28", "remaining_time": "0:06:28", "throughput": 8676.2, "total_tokens": 134551848} +{"current_steps": 199665, "total_steps": 204665, "loss": 0.0, "lr": 3.635384277627884e-09, "epoch": 4.877849168152835, "percentage": 97.56, "elapsed_time": "4:18:28", "remaining_time": "0:06:28", "throughput": 8676.24, "total_tokens": 134555624} +{"current_steps": 199670, "total_steps": 204665, "loss": 0.0, "lr": 3.6281229935245516e-09, "epoch": 4.877971318984683, "percentage": 97.56, "elapsed_time": "4:18:28", "remaining_time": "0:06:27", "throughput": 8676.27, "total_tokens": 134559080} +{"current_steps": 199675, "total_steps": 204665, "loss": 0.0, "lr": 3.620868955305445e-09, "epoch": 4.8780934698165295, "percentage": 97.56, "elapsed_time": "4:18:29", "remaining_time": "0:06:27", "throughput": 8676.28, "total_tokens": 134562280} +{"current_steps": 199680, "total_steps": 204665, "loss": 0.0, "lr": 3.613622163023522e-09, "epoch": 4.878215620648376, "percentage": 97.56, "elapsed_time": "4:18:29", "remaining_time": "0:06:27", "throughput": 8676.31, "total_tokens": 134565800} +{"current_steps": 199685, "total_steps": 204665, "loss": 0.0, "lr": 3.606382616731185e-09, "epoch": 4.878337771480224, "percentage": 97.57, "elapsed_time": "4:18:29", "remaining_time": "0:06:26", "throughput": 8676.34, "total_tokens": 134569320} +{"current_steps": 199690, "total_steps": 204665, "loss": 0.0464, "lr": 3.599150316481281e-09, "epoch": 4.878459922312071, "percentage": 97.57, "elapsed_time": "4:18:30", "remaining_time": "0:06:26", "throughput": 8676.37, "total_tokens": 134572712} +{"current_steps": 199695, "total_steps": 204665, "loss": 0.0, "lr": 3.591925262326323e-09, "epoch": 4.878582073143918, "percentage": 97.57, "elapsed_time": "4:18:30", "remaining_time": "0:06:26", "throughput": 8676.39, "total_tokens": 134576104} +{"current_steps": 199700, "total_steps": 204665, "loss": 0.0, "lr": 3.584707454318936e-09, "epoch": 4.878704223975765, "percentage": 97.57, "elapsed_time": "4:18:30", "remaining_time": "0:06:25", "throughput": 8676.41, "total_tokens": 134579368} +{"current_steps": 199705, "total_steps": 204665, "loss": 0.0, "lr": 3.5774968925115223e-09, "epoch": 4.878826374807613, "percentage": 97.58, "elapsed_time": "4:18:31", "remaining_time": "0:06:25", "throughput": 8676.41, "total_tokens": 134582376} +{"current_steps": 199710, "total_steps": 204665, "loss": 0.0, "lr": 3.5702935769565956e-09, "epoch": 4.878948525639459, "percentage": 97.58, "elapsed_time": "4:18:31", "remaining_time": "0:06:24", "throughput": 8676.45, "total_tokens": 134586024} +{"current_steps": 199715, "total_steps": 204665, "loss": 0.0, "lr": 3.5630975077065583e-09, "epoch": 4.879070676471307, "percentage": 97.58, "elapsed_time": "4:18:32", "remaining_time": "0:06:24", "throughput": 8676.49, "total_tokens": 134589736} +{"current_steps": 199720, "total_steps": 204665, "loss": 0.0, "lr": 3.555908684813591e-09, "epoch": 4.879192827303154, "percentage": 97.58, "elapsed_time": "4:18:32", "remaining_time": "0:06:24", "throughput": 8676.53, "total_tokens": 134593384} +{"current_steps": 199725, "total_steps": 204665, "loss": 0.0, "lr": 3.5487271083300962e-09, "epoch": 4.879314978135001, "percentage": 97.59, "elapsed_time": "4:18:32", "remaining_time": "0:06:23", "throughput": 8676.56, "total_tokens": 134596968} +{"current_steps": 199730, "total_steps": 204665, "loss": 0.0, "lr": 3.5415527783082544e-09, "epoch": 4.879437128966848, "percentage": 97.59, "elapsed_time": "4:18:33", "remaining_time": "0:06:23", "throughput": 8676.59, "total_tokens": 134600424} +{"current_steps": 199735, "total_steps": 204665, "loss": 0.0, "lr": 3.534385694800246e-09, "epoch": 4.879559279798696, "percentage": 97.59, "elapsed_time": "4:18:33", "remaining_time": "0:06:22", "throughput": 8676.48, "total_tokens": 134604072} +{"current_steps": 199740, "total_steps": 204665, "loss": 0.0, "lr": 3.5272258578581405e-09, "epoch": 4.8796814306305425, "percentage": 97.59, "elapsed_time": "4:18:34", "remaining_time": "0:06:22", "throughput": 8676.49, "total_tokens": 134607272} +{"current_steps": 199745, "total_steps": 204665, "loss": 0.0, "lr": 3.5200732675341185e-09, "epoch": 4.87980358146239, "percentage": 97.6, "elapsed_time": "4:18:34", "remaining_time": "0:06:22", "throughput": 8676.5, "total_tokens": 134610408} +{"current_steps": 199750, "total_steps": 204665, "loss": 0.0, "lr": 3.512927923880249e-09, "epoch": 4.879925732294237, "percentage": 97.6, "elapsed_time": "4:18:34", "remaining_time": "0:06:21", "throughput": 8676.52, "total_tokens": 134613672} +{"current_steps": 199755, "total_steps": 204665, "loss": 0.0, "lr": 3.505789826948269e-09, "epoch": 4.880047883126084, "percentage": 97.6, "elapsed_time": "4:18:35", "remaining_time": "0:06:21", "throughput": 8676.53, "total_tokens": 134616872} +{"current_steps": 199760, "total_steps": 204665, "loss": 0.0, "lr": 3.4986589767902476e-09, "epoch": 4.880170033957931, "percentage": 97.6, "elapsed_time": "4:18:35", "remaining_time": "0:06:20", "throughput": 8676.56, "total_tokens": 134620392} +{"current_steps": 199765, "total_steps": 204665, "loss": 0.0, "lr": 3.4915353734580322e-09, "epoch": 4.880292184789779, "percentage": 97.61, "elapsed_time": "4:18:35", "remaining_time": "0:06:20", "throughput": 8676.57, "total_tokens": 134623528} +{"current_steps": 199770, "total_steps": 204665, "loss": 0.0, "lr": 3.4844190170033596e-09, "epoch": 4.880414335621626, "percentage": 97.61, "elapsed_time": "4:18:36", "remaining_time": "0:06:20", "throughput": 8676.58, "total_tokens": 134626536} +{"current_steps": 199775, "total_steps": 204665, "loss": 0.0, "lr": 3.4773099074780765e-09, "epoch": 4.880536486453472, "percentage": 97.61, "elapsed_time": "4:18:36", "remaining_time": "0:06:19", "throughput": 8676.6, "total_tokens": 134629928} +{"current_steps": 199780, "total_steps": 204665, "loss": 0.031, "lr": 3.470208044933809e-09, "epoch": 4.88065863728532, "percentage": 97.61, "elapsed_time": "4:18:36", "remaining_time": "0:06:19", "throughput": 8676.62, "total_tokens": 134633256} +{"current_steps": 199785, "total_steps": 204665, "loss": 0.0, "lr": 3.463113429422182e-09, "epoch": 4.880780788117167, "percentage": 97.62, "elapsed_time": "4:18:37", "remaining_time": "0:06:19", "throughput": 8676.63, "total_tokens": 134636392} +{"current_steps": 199790, "total_steps": 204665, "loss": 0.0005, "lr": 3.456026060994821e-09, "epoch": 4.880902938949014, "percentage": 97.62, "elapsed_time": "4:18:37", "remaining_time": "0:06:18", "throughput": 8676.65, "total_tokens": 134639720} +{"current_steps": 199795, "total_steps": 204665, "loss": 0.0, "lr": 3.4489459397033514e-09, "epoch": 4.881025089780861, "percentage": 97.62, "elapsed_time": "4:18:37", "remaining_time": "0:06:18", "throughput": 8676.68, "total_tokens": 134643112} +{"current_steps": 199800, "total_steps": 204665, "loss": 0.0, "lr": 3.441873065599066e-09, "epoch": 4.881147240612709, "percentage": 97.62, "elapsed_time": "4:18:38", "remaining_time": "0:06:17", "throughput": 8676.7, "total_tokens": 134646504} +{"current_steps": 199805, "total_steps": 204665, "loss": 0.0, "lr": 3.4348074387337e-09, "epoch": 4.8812693914445555, "percentage": 97.63, "elapsed_time": "4:18:38", "remaining_time": "0:06:17", "throughput": 8676.73, "total_tokens": 134650024} +{"current_steps": 199810, "total_steps": 204665, "loss": 0.0, "lr": 3.4277490591583245e-09, "epoch": 4.881391542276403, "percentage": 97.63, "elapsed_time": "4:18:38", "remaining_time": "0:06:17", "throughput": 8676.76, "total_tokens": 134653544} +{"current_steps": 199815, "total_steps": 204665, "loss": 0.0, "lr": 3.420697926924454e-09, "epoch": 4.88151369310825, "percentage": 97.63, "elapsed_time": "4:18:39", "remaining_time": "0:06:16", "throughput": 8676.79, "total_tokens": 134657064} +{"current_steps": 199820, "total_steps": 204665, "loss": 0.0, "lr": 3.413654042083269e-09, "epoch": 4.8816358439400975, "percentage": 97.63, "elapsed_time": "4:18:39", "remaining_time": "0:06:16", "throughput": 8676.82, "total_tokens": 134660584} +{"current_steps": 199825, "total_steps": 204665, "loss": 0.0, "lr": 3.406617404686063e-09, "epoch": 4.881757994771944, "percentage": 97.64, "elapsed_time": "4:18:39", "remaining_time": "0:06:15", "throughput": 8676.87, "total_tokens": 134664424} +{"current_steps": 199830, "total_steps": 204665, "loss": 0.0, "lr": 3.3995880147840163e-09, "epoch": 4.881880145603792, "percentage": 97.64, "elapsed_time": "4:18:40", "remaining_time": "0:06:15", "throughput": 8676.89, "total_tokens": 134667816} +{"current_steps": 199835, "total_steps": 204665, "loss": 0.0, "lr": 3.392565872428199e-09, "epoch": 4.882002296435639, "percentage": 97.64, "elapsed_time": "4:18:40", "remaining_time": "0:06:15", "throughput": 8676.91, "total_tokens": 134671144} +{"current_steps": 199840, "total_steps": 204665, "loss": 0.0, "lr": 3.385550977669682e-09, "epoch": 4.882124447267486, "percentage": 97.64, "elapsed_time": "4:18:40", "remaining_time": "0:06:14", "throughput": 8676.92, "total_tokens": 134674152} +{"current_steps": 199845, "total_steps": 204665, "loss": 0.0, "lr": 3.3785433305595355e-09, "epoch": 4.882246598099333, "percentage": 97.64, "elapsed_time": "4:18:41", "remaining_time": "0:06:14", "throughput": 8676.94, "total_tokens": 134677480} +{"current_steps": 199850, "total_steps": 204665, "loss": 0.058, "lr": 3.371542931148608e-09, "epoch": 4.88236874893118, "percentage": 97.65, "elapsed_time": "4:18:41", "remaining_time": "0:06:13", "throughput": 8676.96, "total_tokens": 134680936} +{"current_steps": 199855, "total_steps": 204665, "loss": 0.0002, "lr": 3.3645497794879684e-09, "epoch": 4.882490899763027, "percentage": 97.65, "elapsed_time": "4:18:42", "remaining_time": "0:06:13", "throughput": 8676.98, "total_tokens": 134684264} +{"current_steps": 199860, "total_steps": 204665, "loss": 0.0, "lr": 3.3575638756283555e-09, "epoch": 4.882613050594875, "percentage": 97.65, "elapsed_time": "4:18:42", "remaining_time": "0:06:13", "throughput": 8676.98, "total_tokens": 134687208} +{"current_steps": 199865, "total_steps": 204665, "loss": 0.0, "lr": 3.350585219620505e-09, "epoch": 4.882735201426722, "percentage": 97.65, "elapsed_time": "4:18:42", "remaining_time": "0:06:12", "throughput": 8677.02, "total_tokens": 134690728} +{"current_steps": 199870, "total_steps": 204665, "loss": 0.0, "lr": 3.343613811515378e-09, "epoch": 4.8828573522585685, "percentage": 97.66, "elapsed_time": "4:18:43", "remaining_time": "0:06:12", "throughput": 8677.05, "total_tokens": 134694248} +{"current_steps": 199875, "total_steps": 204665, "loss": 0.0318, "lr": 3.336649651363599e-09, "epoch": 4.882979503090416, "percentage": 97.66, "elapsed_time": "4:18:43", "remaining_time": "0:06:12", "throughput": 8677.09, "total_tokens": 134698024} +{"current_steps": 199880, "total_steps": 204665, "loss": 0.0, "lr": 3.3296927392156836e-09, "epoch": 4.883101653922263, "percentage": 97.66, "elapsed_time": "4:18:43", "remaining_time": "0:06:11", "throughput": 8677.13, "total_tokens": 134701608} +{"current_steps": 199885, "total_steps": 204665, "loss": 0.0, "lr": 3.3227430751223696e-09, "epoch": 4.8832238047541106, "percentage": 97.66, "elapsed_time": "4:18:44", "remaining_time": "0:06:11", "throughput": 8677.13, "total_tokens": 134704616} +{"current_steps": 199890, "total_steps": 204665, "loss": 0.0, "lr": 3.3158006591340603e-09, "epoch": 4.883345955585957, "percentage": 97.67, "elapsed_time": "4:18:44", "remaining_time": "0:06:10", "throughput": 8677.14, "total_tokens": 134707752} +{"current_steps": 199895, "total_steps": 204665, "loss": 0.0, "lr": 3.3088654913013825e-09, "epoch": 4.883468106417805, "percentage": 97.67, "elapsed_time": "4:18:44", "remaining_time": "0:06:10", "throughput": 8677.16, "total_tokens": 134711016} +{"current_steps": 199900, "total_steps": 204665, "loss": 0.0, "lr": 3.30193757167474e-09, "epoch": 4.883590257249652, "percentage": 97.67, "elapsed_time": "4:18:45", "remaining_time": "0:06:10", "throughput": 8677.19, "total_tokens": 134714536} +{"current_steps": 199905, "total_steps": 204665, "loss": 0.0, "lr": 3.295016900304426e-09, "epoch": 4.883712408081499, "percentage": 97.67, "elapsed_time": "4:18:45", "remaining_time": "0:06:09", "throughput": 8677.19, "total_tokens": 134717544} +{"current_steps": 199910, "total_steps": 204665, "loss": 0.0, "lr": 3.2881034772408444e-09, "epoch": 4.883834558913346, "percentage": 97.68, "elapsed_time": "4:18:45", "remaining_time": "0:06:09", "throughput": 8677.22, "total_tokens": 134720936} +{"current_steps": 199915, "total_steps": 204665, "loss": 0.0, "lr": 3.281197302534289e-09, "epoch": 4.883956709745194, "percentage": 97.68, "elapsed_time": "4:18:46", "remaining_time": "0:06:08", "throughput": 8677.25, "total_tokens": 134724584} +{"current_steps": 199920, "total_steps": 204665, "loss": 0.0, "lr": 3.2742983762349406e-09, "epoch": 4.88407886057704, "percentage": 97.68, "elapsed_time": "4:18:46", "remaining_time": "0:06:08", "throughput": 8677.26, "total_tokens": 134727720} +{"current_steps": 199925, "total_steps": 204665, "loss": 0.0, "lr": 3.2674066983929826e-09, "epoch": 4.884201011408888, "percentage": 97.68, "elapsed_time": "4:18:46", "remaining_time": "0:06:08", "throughput": 8677.29, "total_tokens": 134731112} +{"current_steps": 199930, "total_steps": 204665, "loss": 0.0, "lr": 3.2605222690585967e-09, "epoch": 4.884323162240735, "percentage": 97.69, "elapsed_time": "4:18:47", "remaining_time": "0:06:07", "throughput": 8677.3, "total_tokens": 134734248} +{"current_steps": 199935, "total_steps": 204665, "loss": 0.0, "lr": 3.253645088281631e-09, "epoch": 4.8844453130725825, "percentage": 97.69, "elapsed_time": "4:18:47", "remaining_time": "0:06:07", "throughput": 8677.31, "total_tokens": 134737320} +{"current_steps": 199940, "total_steps": 204665, "loss": 0.0, "lr": 3.2467751561123803e-09, "epoch": 4.884567463904429, "percentage": 97.69, "elapsed_time": "4:18:47", "remaining_time": "0:06:06", "throughput": 8677.33, "total_tokens": 134740648} +{"current_steps": 199945, "total_steps": 204665, "loss": 0.0, "lr": 3.2399124726005813e-09, "epoch": 4.884689614736276, "percentage": 97.69, "elapsed_time": "4:18:48", "remaining_time": "0:06:06", "throughput": 8677.35, "total_tokens": 134743976} +{"current_steps": 199950, "total_steps": 204665, "loss": 0.0, "lr": 3.2330570377963053e-09, "epoch": 4.884811765568124, "percentage": 97.7, "elapsed_time": "4:18:48", "remaining_time": "0:06:06", "throughput": 8677.36, "total_tokens": 134747112} +{"current_steps": 199955, "total_steps": 204665, "loss": 0.0, "lr": 3.2262088517492903e-09, "epoch": 4.88493391639997, "percentage": 97.7, "elapsed_time": "4:18:48", "remaining_time": "0:06:05", "throughput": 8677.39, "total_tokens": 134750696} +{"current_steps": 199960, "total_steps": 204665, "loss": 0.0, "lr": 3.2193679145093857e-09, "epoch": 4.885056067231818, "percentage": 97.7, "elapsed_time": "4:18:49", "remaining_time": "0:06:05", "throughput": 8677.43, "total_tokens": 134754344} +{"current_steps": 199965, "total_steps": 204665, "loss": 0.0, "lr": 3.21253422612644e-09, "epoch": 4.885178218063665, "percentage": 97.7, "elapsed_time": "4:18:49", "remaining_time": "0:06:05", "throughput": 8677.49, "total_tokens": 134758440} +{"current_steps": 199970, "total_steps": 204665, "loss": 0.0, "lr": 3.205707786649858e-09, "epoch": 4.885300368895512, "percentage": 97.71, "elapsed_time": "4:18:50", "remaining_time": "0:06:04", "throughput": 8677.52, "total_tokens": 134761896} +{"current_steps": 199975, "total_steps": 204665, "loss": 0.0, "lr": 3.198888596129712e-09, "epoch": 4.885422519727359, "percentage": 97.71, "elapsed_time": "4:18:50", "remaining_time": "0:06:04", "throughput": 8677.56, "total_tokens": 134765608} +{"current_steps": 199980, "total_steps": 204665, "loss": 0.0, "lr": 3.1920766546151833e-09, "epoch": 4.885544670559207, "percentage": 97.71, "elapsed_time": "4:18:50", "remaining_time": "0:06:03", "throughput": 8677.59, "total_tokens": 134769064} +{"current_steps": 199985, "total_steps": 204665, "loss": 0.0, "lr": 3.185271962156011e-09, "epoch": 4.8856668213910535, "percentage": 97.71, "elapsed_time": "4:18:51", "remaining_time": "0:06:03", "throughput": 8677.6, "total_tokens": 134772200} +{"current_steps": 199990, "total_steps": 204665, "loss": 0.0, "lr": 3.1784745188017106e-09, "epoch": 4.885788972222901, "percentage": 97.72, "elapsed_time": "4:18:51", "remaining_time": "0:06:03", "throughput": 8677.61, "total_tokens": 134775400} +{"current_steps": 199995, "total_steps": 204665, "loss": 0.0004, "lr": 3.1716843246015757e-09, "epoch": 4.885911123054748, "percentage": 97.72, "elapsed_time": "4:18:51", "remaining_time": "0:06:02", "throughput": 8677.62, "total_tokens": 134778600} +{"current_steps": 200000, "total_steps": 204665, "loss": 0.0, "lr": 3.1649013796051226e-09, "epoch": 4.8860332738865955, "percentage": 97.72, "elapsed_time": "4:18:52", "remaining_time": "0:06:02", "throughput": 8677.65, "total_tokens": 134782056} +{"current_steps": 200005, "total_steps": 204665, "loss": 0.0001, "lr": 3.1581256838615346e-09, "epoch": 4.886155424718442, "percentage": 97.72, "elapsed_time": "4:18:52", "remaining_time": "0:06:01", "throughput": 8677.68, "total_tokens": 134785448} +{"current_steps": 200010, "total_steps": 204665, "loss": 0.0, "lr": 3.1513572374203267e-09, "epoch": 4.88627757555029, "percentage": 97.73, "elapsed_time": "4:18:52", "remaining_time": "0:06:01", "throughput": 8677.71, "total_tokens": 134788968} +{"current_steps": 200015, "total_steps": 204665, "loss": 0.0, "lr": 3.1445960403304605e-09, "epoch": 4.886399726382137, "percentage": 97.73, "elapsed_time": "4:18:53", "remaining_time": "0:06:01", "throughput": 8677.74, "total_tokens": 134792488} +{"current_steps": 200020, "total_steps": 204665, "loss": 0.0, "lr": 3.13784209264123e-09, "epoch": 4.886521877213983, "percentage": 97.73, "elapsed_time": "4:18:53", "remaining_time": "0:06:00", "throughput": 8677.76, "total_tokens": 134795752} +{"current_steps": 200025, "total_steps": 204665, "loss": 0.0, "lr": 3.131095394401817e-09, "epoch": 4.886644028045831, "percentage": 97.73, "elapsed_time": "4:18:53", "remaining_time": "0:06:00", "throughput": 8677.79, "total_tokens": 134799400} +{"current_steps": 200030, "total_steps": 204665, "loss": 0.0, "lr": 3.1243559456610726e-09, "epoch": 4.886766178877679, "percentage": 97.74, "elapsed_time": "4:18:54", "remaining_time": "0:05:59", "throughput": 8677.81, "total_tokens": 134802600} +{"current_steps": 200035, "total_steps": 204665, "loss": 0.0, "lr": 3.117623746468179e-09, "epoch": 4.886888329709525, "percentage": 97.74, "elapsed_time": "4:18:54", "remaining_time": "0:05:59", "throughput": 8677.83, "total_tokens": 134806056} +{"current_steps": 200040, "total_steps": 204665, "loss": 0.0, "lr": 3.110898796872097e-09, "epoch": 4.887010480541372, "percentage": 97.74, "elapsed_time": "4:18:54", "remaining_time": "0:05:59", "throughput": 8677.9, "total_tokens": 134810216} +{"current_steps": 200045, "total_steps": 204665, "loss": 0.0, "lr": 3.1041810969216766e-09, "epoch": 4.88713263137322, "percentage": 97.74, "elapsed_time": "4:18:55", "remaining_time": "0:05:58", "throughput": 8677.9, "total_tokens": 134813288} +{"current_steps": 200050, "total_steps": 204665, "loss": 0.0001, "lr": 3.0974706466657676e-09, "epoch": 4.8872547822050665, "percentage": 97.75, "elapsed_time": "4:18:55", "remaining_time": "0:05:58", "throughput": 8677.91, "total_tokens": 134816360} +{"current_steps": 200055, "total_steps": 204665, "loss": 0.0001, "lr": 3.09076744615322e-09, "epoch": 4.887376933036914, "percentage": 97.75, "elapsed_time": "4:18:55", "remaining_time": "0:05:58", "throughput": 8677.94, "total_tokens": 134819816} +{"current_steps": 200060, "total_steps": 204665, "loss": 0.0, "lr": 3.0840714954326608e-09, "epoch": 4.887499083868761, "percentage": 97.75, "elapsed_time": "4:18:56", "remaining_time": "0:05:57", "throughput": 8677.95, "total_tokens": 134823016} +{"current_steps": 200065, "total_steps": 204665, "loss": 0.0, "lr": 3.077382794552941e-09, "epoch": 4.8876212347006085, "percentage": 97.75, "elapsed_time": "4:18:56", "remaining_time": "0:05:57", "throughput": 8677.98, "total_tokens": 134826472} +{"current_steps": 200070, "total_steps": 204665, "loss": 0.0, "lr": 3.070701343562687e-09, "epoch": 4.887743385532455, "percentage": 97.75, "elapsed_time": "4:18:56", "remaining_time": "0:05:56", "throughput": 8678.0, "total_tokens": 134829800} +{"current_steps": 200075, "total_steps": 204665, "loss": 0.0, "lr": 3.064027142510306e-09, "epoch": 4.887865536364303, "percentage": 97.76, "elapsed_time": "4:18:57", "remaining_time": "0:05:56", "throughput": 8678.02, "total_tokens": 134833128} +{"current_steps": 200080, "total_steps": 204665, "loss": 0.0, "lr": 3.057360191444536e-09, "epoch": 4.88798768719615, "percentage": 97.76, "elapsed_time": "4:18:57", "remaining_time": "0:05:56", "throughput": 8678.03, "total_tokens": 134836264} +{"current_steps": 200085, "total_steps": 204665, "loss": 0.0, "lr": 3.0507004904137823e-09, "epoch": 4.888109838027997, "percentage": 97.76, "elapsed_time": "4:18:57", "remaining_time": "0:05:55", "throughput": 8678.04, "total_tokens": 134839272} +{"current_steps": 200090, "total_steps": 204665, "loss": 0.0, "lr": 3.0440480394664516e-09, "epoch": 4.888231988859844, "percentage": 97.76, "elapsed_time": "4:18:58", "remaining_time": "0:05:55", "throughput": 8678.05, "total_tokens": 134842472} +{"current_steps": 200095, "total_steps": 204665, "loss": 0.0, "lr": 3.0374028386510596e-09, "epoch": 4.888354139691692, "percentage": 97.77, "elapsed_time": "4:18:58", "remaining_time": "0:05:54", "throughput": 8678.07, "total_tokens": 134845800} +{"current_steps": 200100, "total_steps": 204665, "loss": 0.0, "lr": 3.0307648880156798e-09, "epoch": 4.888476290523538, "percentage": 97.77, "elapsed_time": "4:18:59", "remaining_time": "0:05:54", "throughput": 8678.09, "total_tokens": 134849064} +{"current_steps": 200105, "total_steps": 204665, "loss": 0.0, "lr": 3.0241341876088287e-09, "epoch": 4.888598441355386, "percentage": 97.77, "elapsed_time": "4:18:59", "remaining_time": "0:05:54", "throughput": 8678.13, "total_tokens": 134852712} +{"current_steps": 200110, "total_steps": 204665, "loss": 0.0, "lr": 3.0175107374785792e-09, "epoch": 4.888720592187233, "percentage": 97.77, "elapsed_time": "4:18:59", "remaining_time": "0:05:53", "throughput": 8678.14, "total_tokens": 134856040} +{"current_steps": 200115, "total_steps": 204665, "loss": 0.0, "lr": 3.0108945376732254e-09, "epoch": 4.8888427430190795, "percentage": 97.78, "elapsed_time": "4:19:00", "remaining_time": "0:05:53", "throughput": 8678.17, "total_tokens": 134859560} +{"current_steps": 200120, "total_steps": 204665, "loss": 0.0, "lr": 3.0042855882407293e-09, "epoch": 4.888964893850927, "percentage": 97.78, "elapsed_time": "4:19:00", "remaining_time": "0:05:52", "throughput": 8678.18, "total_tokens": 134862568} +{"current_steps": 200125, "total_steps": 204665, "loss": 0.0, "lr": 2.9976838892292746e-09, "epoch": 4.889087044682775, "percentage": 97.78, "elapsed_time": "4:19:00", "remaining_time": "0:05:52", "throughput": 8678.23, "total_tokens": 134866408} +{"current_steps": 200130, "total_steps": 204665, "loss": 0.0, "lr": 2.9910894406868224e-09, "epoch": 4.8892091955146215, "percentage": 97.78, "elapsed_time": "4:19:01", "remaining_time": "0:05:52", "throughput": 8678.3, "total_tokens": 134870696} +{"current_steps": 200135, "total_steps": 204665, "loss": 0.0, "lr": 2.9845022426612243e-09, "epoch": 4.889331346346468, "percentage": 97.79, "elapsed_time": "4:19:01", "remaining_time": "0:05:51", "throughput": 8678.32, "total_tokens": 134874088} +{"current_steps": 200140, "total_steps": 204665, "loss": 0.0, "lr": 2.9779222952005524e-09, "epoch": 4.889453497178316, "percentage": 97.79, "elapsed_time": "4:19:01", "remaining_time": "0:05:51", "throughput": 8678.33, "total_tokens": 134877096} +{"current_steps": 200145, "total_steps": 204665, "loss": 0.0, "lr": 2.971349598352657e-09, "epoch": 4.889575648010163, "percentage": 97.79, "elapsed_time": "4:19:02", "remaining_time": "0:05:50", "throughput": 8678.34, "total_tokens": 134880232} +{"current_steps": 200150, "total_steps": 204665, "loss": 0.0, "lr": 2.9647841521652783e-09, "epoch": 4.88969779884201, "percentage": 97.79, "elapsed_time": "4:19:02", "remaining_time": "0:05:50", "throughput": 8678.36, "total_tokens": 134883560} +{"current_steps": 200155, "total_steps": 204665, "loss": 0.0, "lr": 2.9582259566860446e-09, "epoch": 4.889819949673857, "percentage": 97.8, "elapsed_time": "4:19:02", "remaining_time": "0:05:50", "throughput": 8678.39, "total_tokens": 134887208} +{"current_steps": 200160, "total_steps": 204665, "loss": 0.0224, "lr": 2.9516750119629176e-09, "epoch": 4.889942100505705, "percentage": 97.8, "elapsed_time": "4:19:03", "remaining_time": "0:05:49", "throughput": 8678.42, "total_tokens": 134890600} +{"current_steps": 200165, "total_steps": 204665, "loss": 0.0, "lr": 2.9451313180431924e-09, "epoch": 4.890064251337551, "percentage": 97.8, "elapsed_time": "4:19:03", "remaining_time": "0:05:49", "throughput": 8678.45, "total_tokens": 134894120} +{"current_steps": 200170, "total_steps": 204665, "loss": 0.0, "lr": 2.938594874974831e-09, "epoch": 4.890186402169399, "percentage": 97.8, "elapsed_time": "4:19:03", "remaining_time": "0:05:49", "throughput": 8678.47, "total_tokens": 134897384} +{"current_steps": 200175, "total_steps": 204665, "loss": 0.0, "lr": 2.9320656828050182e-09, "epoch": 4.890308553001246, "percentage": 97.81, "elapsed_time": "4:19:04", "remaining_time": "0:05:48", "throughput": 8678.5, "total_tokens": 134900904} +{"current_steps": 200180, "total_steps": 204665, "loss": 0.0, "lr": 2.9255437415816044e-09, "epoch": 4.890430703833093, "percentage": 97.81, "elapsed_time": "4:19:04", "remaining_time": "0:05:48", "throughput": 8678.5, "total_tokens": 134903912} +{"current_steps": 200185, "total_steps": 204665, "loss": 0.0, "lr": 2.9190290513516624e-09, "epoch": 4.89055285466494, "percentage": 97.81, "elapsed_time": "4:19:04", "remaining_time": "0:05:47", "throughput": 8678.51, "total_tokens": 134907048} +{"current_steps": 200190, "total_steps": 204665, "loss": 0.0, "lr": 2.9125216121628214e-09, "epoch": 4.890675005496788, "percentage": 97.81, "elapsed_time": "4:19:05", "remaining_time": "0:05:47", "throughput": 8678.52, "total_tokens": 134910184} +{"current_steps": 200195, "total_steps": 204665, "loss": 0.0, "lr": 2.906021424062155e-09, "epoch": 4.8907971563286345, "percentage": 97.82, "elapsed_time": "4:19:05", "remaining_time": "0:05:47", "throughput": 8678.52, "total_tokens": 134913192} +{"current_steps": 200200, "total_steps": 204665, "loss": 0.0, "lr": 2.8995284870971804e-09, "epoch": 4.890919307160482, "percentage": 97.82, "elapsed_time": "4:19:05", "remaining_time": "0:05:46", "throughput": 8678.54, "total_tokens": 134916456} +{"current_steps": 200205, "total_steps": 204665, "loss": 0.0, "lr": 2.893042801315082e-09, "epoch": 4.891041457992329, "percentage": 97.82, "elapsed_time": "4:19:06", "remaining_time": "0:05:46", "throughput": 8678.55, "total_tokens": 134919528} +{"current_steps": 200210, "total_steps": 204665, "loss": 0.0, "lr": 2.8865643667629336e-09, "epoch": 4.891163608824176, "percentage": 97.82, "elapsed_time": "4:19:06", "remaining_time": "0:05:45", "throughput": 8678.57, "total_tokens": 134922920} +{"current_steps": 200215, "total_steps": 204665, "loss": 0.0, "lr": 2.8800931834878085e-09, "epoch": 4.891285759656023, "percentage": 97.83, "elapsed_time": "4:19:07", "remaining_time": "0:05:45", "throughput": 8678.58, "total_tokens": 134925992} +{"current_steps": 200220, "total_steps": 204665, "loss": 0.0, "lr": 2.873629251536891e-09, "epoch": 4.89140791048787, "percentage": 97.83, "elapsed_time": "4:19:07", "remaining_time": "0:05:45", "throughput": 8678.58, "total_tokens": 134928936} +{"current_steps": 200225, "total_steps": 204665, "loss": 0.0, "lr": 2.8671725709571437e-09, "epoch": 4.891530061319718, "percentage": 97.83, "elapsed_time": "4:19:07", "remaining_time": "0:05:44", "throughput": 8678.61, "total_tokens": 134932328} +{"current_steps": 200230, "total_steps": 204665, "loss": 0.0, "lr": 2.860723141795529e-09, "epoch": 4.891652212151564, "percentage": 97.83, "elapsed_time": "4:19:08", "remaining_time": "0:05:44", "throughput": 8678.62, "total_tokens": 134935592} +{"current_steps": 200235, "total_steps": 204665, "loss": 0.0, "lr": 2.8542809640988986e-09, "epoch": 4.891774362983412, "percentage": 97.84, "elapsed_time": "4:19:08", "remaining_time": "0:05:43", "throughput": 8678.63, "total_tokens": 134938664} +{"current_steps": 200240, "total_steps": 204665, "loss": 0.0, "lr": 2.847846037914103e-09, "epoch": 4.891896513815259, "percentage": 97.84, "elapsed_time": "4:19:08", "remaining_time": "0:05:43", "throughput": 8678.63, "total_tokens": 134941608} +{"current_steps": 200245, "total_steps": 204665, "loss": 0.0001, "lr": 2.841418363287995e-09, "epoch": 4.892018664647106, "percentage": 97.84, "elapsed_time": "4:19:09", "remaining_time": "0:05:43", "throughput": 8678.67, "total_tokens": 134945192} +{"current_steps": 200250, "total_steps": 204665, "loss": 0.0, "lr": 2.834997940267425e-09, "epoch": 4.892140815478953, "percentage": 97.84, "elapsed_time": "4:19:09", "remaining_time": "0:05:42", "throughput": 8678.67, "total_tokens": 134948200} +{"current_steps": 200255, "total_steps": 204665, "loss": 0.0, "lr": 2.8285847688988006e-09, "epoch": 4.892262966310801, "percentage": 97.85, "elapsed_time": "4:19:09", "remaining_time": "0:05:42", "throughput": 8678.69, "total_tokens": 134951528} +{"current_steps": 200260, "total_steps": 204665, "loss": 0.0, "lr": 2.8221788492289733e-09, "epoch": 4.892385117142648, "percentage": 97.85, "elapsed_time": "4:19:10", "remaining_time": "0:05:42", "throughput": 8678.71, "total_tokens": 134954792} +{"current_steps": 200265, "total_steps": 204665, "loss": 0.0, "lr": 2.8157801813044613e-09, "epoch": 4.892507267974495, "percentage": 97.85, "elapsed_time": "4:19:10", "remaining_time": "0:05:41", "throughput": 8678.73, "total_tokens": 134958120} +{"current_steps": 200270, "total_steps": 204665, "loss": 0.0, "lr": 2.809388765171783e-09, "epoch": 4.892629418806342, "percentage": 97.85, "elapsed_time": "4:19:10", "remaining_time": "0:05:41", "throughput": 8678.74, "total_tokens": 134961128} +{"current_steps": 200275, "total_steps": 204665, "loss": 0.0, "lr": 2.8030046008774564e-09, "epoch": 4.89275156963819, "percentage": 97.86, "elapsed_time": "4:19:11", "remaining_time": "0:05:40", "throughput": 8678.75, "total_tokens": 134964264} +{"current_steps": 200280, "total_steps": 204665, "loss": 0.0, "lr": 2.79662768846789e-09, "epoch": 4.892873720470036, "percentage": 97.86, "elapsed_time": "4:19:11", "remaining_time": "0:05:40", "throughput": 8678.77, "total_tokens": 134967720} +{"current_steps": 200285, "total_steps": 204665, "loss": 0.0, "lr": 2.7902580279894895e-09, "epoch": 4.892995871301883, "percentage": 97.86, "elapsed_time": "4:19:11", "remaining_time": "0:05:40", "throughput": 8678.81, "total_tokens": 134971368} +{"current_steps": 200290, "total_steps": 204665, "loss": 0.0, "lr": 2.783895619488552e-09, "epoch": 4.893118022133731, "percentage": 97.86, "elapsed_time": "4:19:12", "remaining_time": "0:05:39", "throughput": 8678.85, "total_tokens": 134975080} +{"current_steps": 200295, "total_steps": 204665, "loss": 0.0, "lr": 2.7775404630112632e-09, "epoch": 4.893240172965578, "percentage": 97.86, "elapsed_time": "4:19:12", "remaining_time": "0:05:39", "throughput": 8678.87, "total_tokens": 134978472} +{"current_steps": 200300, "total_steps": 204665, "loss": 0.0, "lr": 2.7711925586040298e-09, "epoch": 4.893362323797425, "percentage": 97.87, "elapsed_time": "4:19:12", "remaining_time": "0:05:38", "throughput": 8678.91, "total_tokens": 134982248} +{"current_steps": 200305, "total_steps": 204665, "loss": 0.0, "lr": 2.764851906312815e-09, "epoch": 4.893484474629272, "percentage": 97.87, "elapsed_time": "4:19:13", "remaining_time": "0:05:38", "throughput": 8678.93, "total_tokens": 134985512} +{"current_steps": 200310, "total_steps": 204665, "loss": 0.0, "lr": 2.7585185061839154e-09, "epoch": 4.8936066254611195, "percentage": 97.87, "elapsed_time": "4:19:13", "remaining_time": "0:05:38", "throughput": 8678.94, "total_tokens": 134988520} +{"current_steps": 200315, "total_steps": 204665, "loss": 0.0, "lr": 2.7521923582631833e-09, "epoch": 4.893728776292966, "percentage": 97.87, "elapsed_time": "4:19:13", "remaining_time": "0:05:37", "throughput": 8679.0, "total_tokens": 134992616} +{"current_steps": 200320, "total_steps": 204665, "loss": 0.0, "lr": 2.745873462596804e-09, "epoch": 4.893850927124814, "percentage": 97.88, "elapsed_time": "4:19:14", "remaining_time": "0:05:37", "throughput": 8679.02, "total_tokens": 134996008} +{"current_steps": 200325, "total_steps": 204665, "loss": 0.0, "lr": 2.7395618192306292e-09, "epoch": 4.893973077956661, "percentage": 97.88, "elapsed_time": "4:19:14", "remaining_time": "0:05:36", "throughput": 8679.05, "total_tokens": 134999400} +{"current_steps": 200330, "total_steps": 204665, "loss": 0.0, "lr": 2.7332574282107335e-09, "epoch": 4.894095228788508, "percentage": 97.88, "elapsed_time": "4:19:14", "remaining_time": "0:05:36", "throughput": 8679.06, "total_tokens": 135002600} +{"current_steps": 200335, "total_steps": 204665, "loss": 0.0, "lr": 2.7269602895826362e-09, "epoch": 4.894217379620355, "percentage": 97.88, "elapsed_time": "4:19:15", "remaining_time": "0:05:36", "throughput": 8679.08, "total_tokens": 135005992} +{"current_steps": 200340, "total_steps": 204665, "loss": 0.0, "lr": 2.720670403392411e-09, "epoch": 4.894339530452203, "percentage": 97.89, "elapsed_time": "4:19:15", "remaining_time": "0:05:35", "throughput": 8679.09, "total_tokens": 135009128} +{"current_steps": 200345, "total_steps": 204665, "loss": 0.0, "lr": 2.7143877696856887e-09, "epoch": 4.894461681284049, "percentage": 97.89, "elapsed_time": "4:19:16", "remaining_time": "0:05:35", "throughput": 8679.12, "total_tokens": 135012648} +{"current_steps": 200350, "total_steps": 204665, "loss": 0.0, "lr": 2.70811238850821e-09, "epoch": 4.894583832115897, "percentage": 97.89, "elapsed_time": "4:19:16", "remaining_time": "0:05:35", "throughput": 8679.13, "total_tokens": 135015720} +{"current_steps": 200355, "total_steps": 204665, "loss": 0.0, "lr": 2.701844259905495e-09, "epoch": 4.894705982947744, "percentage": 97.89, "elapsed_time": "4:19:16", "remaining_time": "0:05:34", "throughput": 8679.17, "total_tokens": 135019560} +{"current_steps": 200360, "total_steps": 204665, "loss": 0.0, "lr": 2.6955833839232834e-09, "epoch": 4.894828133779591, "percentage": 97.9, "elapsed_time": "4:19:17", "remaining_time": "0:05:34", "throughput": 8679.22, "total_tokens": 135023400} +{"current_steps": 200365, "total_steps": 204665, "loss": 0.0, "lr": 2.6893297606069843e-09, "epoch": 4.894950284611438, "percentage": 97.9, "elapsed_time": "4:19:17", "remaining_time": "0:05:33", "throughput": 8679.28, "total_tokens": 135027432} +{"current_steps": 200370, "total_steps": 204665, "loss": 0.0, "lr": 2.6830833900021166e-09, "epoch": 4.895072435443286, "percentage": 97.9, "elapsed_time": "4:19:17", "remaining_time": "0:05:33", "throughput": 8679.3, "total_tokens": 135030760} +{"current_steps": 200375, "total_steps": 204665, "loss": 0.0, "lr": 2.6768442721541994e-09, "epoch": 4.8951945862751325, "percentage": 97.9, "elapsed_time": "4:19:18", "remaining_time": "0:05:33", "throughput": 8679.34, "total_tokens": 135034600} +{"current_steps": 200380, "total_steps": 204665, "loss": 0.0, "lr": 2.670612407108419e-09, "epoch": 4.895316737106979, "percentage": 97.91, "elapsed_time": "4:19:18", "remaining_time": "0:05:32", "throughput": 8679.38, "total_tokens": 135038248} +{"current_steps": 200385, "total_steps": 204665, "loss": 0.0, "lr": 2.6643877949101834e-09, "epoch": 4.895438887938827, "percentage": 97.91, "elapsed_time": "4:19:18", "remaining_time": "0:05:32", "throughput": 8679.4, "total_tokens": 135041576} +{"current_steps": 200390, "total_steps": 204665, "loss": 0.0, "lr": 2.6581704356047895e-09, "epoch": 4.8955610387706745, "percentage": 97.91, "elapsed_time": "4:19:19", "remaining_time": "0:05:31", "throughput": 8679.44, "total_tokens": 135045288} +{"current_steps": 200395, "total_steps": 204665, "loss": 0.0, "lr": 2.6519603292375347e-09, "epoch": 4.895683189602521, "percentage": 97.91, "elapsed_time": "4:19:19", "remaining_time": "0:05:31", "throughput": 8679.47, "total_tokens": 135048872} +{"current_steps": 200400, "total_steps": 204665, "loss": 0.0, "lr": 2.645757475853383e-09, "epoch": 4.895805340434368, "percentage": 97.92, "elapsed_time": "4:19:19", "remaining_time": "0:05:31", "throughput": 8679.49, "total_tokens": 135052072} +{"current_steps": 200405, "total_steps": 204665, "loss": 0.0, "lr": 2.639561875497631e-09, "epoch": 4.895927491266216, "percentage": 97.92, "elapsed_time": "4:19:20", "remaining_time": "0:05:30", "throughput": 8679.5, "total_tokens": 135055336} +{"current_steps": 200410, "total_steps": 204665, "loss": 0.0, "lr": 2.6333735282151326e-09, "epoch": 4.896049642098062, "percentage": 97.92, "elapsed_time": "4:19:20", "remaining_time": "0:05:30", "throughput": 8679.54, "total_tokens": 135058984} +{"current_steps": 200415, "total_steps": 204665, "loss": 0.0, "lr": 2.627192434050962e-09, "epoch": 4.89617179292991, "percentage": 97.92, "elapsed_time": "4:19:20", "remaining_time": "0:05:29", "throughput": 8679.56, "total_tokens": 135062248} +{"current_steps": 200420, "total_steps": 204665, "loss": 0.0, "lr": 2.621018593050195e-09, "epoch": 4.896293943761757, "percentage": 97.93, "elapsed_time": "4:19:21", "remaining_time": "0:05:29", "throughput": 8679.59, "total_tokens": 135065832} +{"current_steps": 200425, "total_steps": 204665, "loss": 0.0, "lr": 2.6148520052576838e-09, "epoch": 4.896416094593604, "percentage": 97.93, "elapsed_time": "4:19:21", "remaining_time": "0:05:29", "throughput": 8679.61, "total_tokens": 135069160} +{"current_steps": 200430, "total_steps": 204665, "loss": 0.0, "lr": 2.608692670718171e-09, "epoch": 4.896538245425451, "percentage": 97.93, "elapsed_time": "4:19:22", "remaining_time": "0:05:28", "throughput": 8679.63, "total_tokens": 135072552} +{"current_steps": 200435, "total_steps": 204665, "loss": 0.0, "lr": 2.6025405894766204e-09, "epoch": 4.896660396257299, "percentage": 97.93, "elapsed_time": "4:19:22", "remaining_time": "0:05:28", "throughput": 8679.64, "total_tokens": 135075688} +{"current_steps": 200440, "total_steps": 204665, "loss": 0.0, "lr": 2.596395761577552e-09, "epoch": 4.8967825470891455, "percentage": 97.94, "elapsed_time": "4:19:22", "remaining_time": "0:05:28", "throughput": 8679.65, "total_tokens": 135078824} +{"current_steps": 200445, "total_steps": 204665, "loss": 0.0, "lr": 2.590258187065708e-09, "epoch": 4.896904697920993, "percentage": 97.94, "elapsed_time": "4:19:23", "remaining_time": "0:05:27", "throughput": 8679.68, "total_tokens": 135082280} +{"current_steps": 200450, "total_steps": 204665, "loss": 0.0, "lr": 2.5841278659858303e-09, "epoch": 4.89702684875284, "percentage": 97.94, "elapsed_time": "4:19:23", "remaining_time": "0:05:27", "throughput": 8679.72, "total_tokens": 135085928} +{"current_steps": 200455, "total_steps": 204665, "loss": 0.0538, "lr": 2.578004798382549e-09, "epoch": 4.8971489995846875, "percentage": 97.94, "elapsed_time": "4:19:23", "remaining_time": "0:05:26", "throughput": 8679.76, "total_tokens": 135089768} +{"current_steps": 200460, "total_steps": 204665, "loss": 0.0, "lr": 2.5718889843001632e-09, "epoch": 4.897271150416534, "percentage": 97.95, "elapsed_time": "4:19:24", "remaining_time": "0:05:26", "throughput": 8679.78, "total_tokens": 135093096} +{"current_steps": 200465, "total_steps": 204665, "loss": 0.0, "lr": 2.5657804237833037e-09, "epoch": 4.897393301248382, "percentage": 97.95, "elapsed_time": "4:19:24", "remaining_time": "0:05:26", "throughput": 8679.81, "total_tokens": 135096552} +{"current_steps": 200470, "total_steps": 204665, "loss": 0.0, "lr": 2.559679116876379e-09, "epoch": 4.897515452080229, "percentage": 97.95, "elapsed_time": "4:19:24", "remaining_time": "0:05:25", "throughput": 8679.84, "total_tokens": 135100072} +{"current_steps": 200475, "total_steps": 204665, "loss": 0.0, "lr": 2.5535850636237976e-09, "epoch": 4.897637602912075, "percentage": 97.95, "elapsed_time": "4:19:25", "remaining_time": "0:05:25", "throughput": 8679.87, "total_tokens": 135103528} +{"current_steps": 200480, "total_steps": 204665, "loss": 0.0, "lr": 2.5474982640697475e-09, "epoch": 4.897759753743923, "percentage": 97.96, "elapsed_time": "4:19:25", "remaining_time": "0:05:24", "throughput": 8679.91, "total_tokens": 135107240} +{"current_steps": 200485, "total_steps": 204665, "loss": 0.0, "lr": 2.5414187182586365e-09, "epoch": 4.89788190457577, "percentage": 97.96, "elapsed_time": "4:19:25", "remaining_time": "0:05:24", "throughput": 8679.95, "total_tokens": 135110952} +{"current_steps": 200490, "total_steps": 204665, "loss": 0.0, "lr": 2.5353464262345415e-09, "epoch": 4.898004055407617, "percentage": 97.96, "elapsed_time": "4:19:26", "remaining_time": "0:05:24", "throughput": 8680.0, "total_tokens": 135114728} +{"current_steps": 200495, "total_steps": 204665, "loss": 0.0, "lr": 2.5292813880417595e-09, "epoch": 4.898126206239464, "percentage": 97.96, "elapsed_time": "4:19:26", "remaining_time": "0:05:23", "throughput": 8680.02, "total_tokens": 135118120} +{"current_steps": 200500, "total_steps": 204665, "loss": 0.0, "lr": 2.523223603724367e-09, "epoch": 4.898248357071312, "percentage": 97.96, "elapsed_time": "4:19:26", "remaining_time": "0:05:23", "throughput": 8680.05, "total_tokens": 135121640} +{"current_steps": 200505, "total_steps": 204665, "loss": 0.0, "lr": 2.5171730733262175e-09, "epoch": 4.8983705079031585, "percentage": 97.97, "elapsed_time": "4:19:27", "remaining_time": "0:05:22", "throughput": 8680.11, "total_tokens": 135125672} +{"current_steps": 200510, "total_steps": 204665, "loss": 0.0, "lr": 2.511129796891609e-09, "epoch": 4.898492658735006, "percentage": 97.97, "elapsed_time": "4:19:27", "remaining_time": "0:05:22", "throughput": 8680.13, "total_tokens": 135129000} +{"current_steps": 200515, "total_steps": 204665, "loss": 0.0318, "lr": 2.5050937744643952e-09, "epoch": 4.898614809566853, "percentage": 97.97, "elapsed_time": "4:19:27", "remaining_time": "0:05:22", "throughput": 8680.15, "total_tokens": 135132392} +{"current_steps": 200520, "total_steps": 204665, "loss": 0.0, "lr": 2.4990650060883188e-09, "epoch": 4.8987369603987005, "percentage": 97.97, "elapsed_time": "4:19:28", "remaining_time": "0:05:21", "throughput": 8680.17, "total_tokens": 135135656} +{"current_steps": 200525, "total_steps": 204665, "loss": 0.0, "lr": 2.493043491807345e-09, "epoch": 4.898859111230547, "percentage": 97.98, "elapsed_time": "4:19:28", "remaining_time": "0:05:21", "throughput": 8680.21, "total_tokens": 135139496} +{"current_steps": 200530, "total_steps": 204665, "loss": 0.0, "lr": 2.4870292316653275e-09, "epoch": 4.898981262062395, "percentage": 97.98, "elapsed_time": "4:19:29", "remaining_time": "0:05:21", "throughput": 8680.23, "total_tokens": 135142696} +{"current_steps": 200535, "total_steps": 204665, "loss": 0.0, "lr": 2.481022225705898e-09, "epoch": 4.899103412894242, "percentage": 97.98, "elapsed_time": "4:19:29", "remaining_time": "0:05:20", "throughput": 8680.24, "total_tokens": 135145832} +{"current_steps": 200540, "total_steps": 204665, "loss": 0.0, "lr": 2.475022473972799e-09, "epoch": 4.899225563726089, "percentage": 97.98, "elapsed_time": "4:19:29", "remaining_time": "0:05:20", "throughput": 8680.28, "total_tokens": 135149608} +{"current_steps": 200545, "total_steps": 204665, "loss": 0.0, "lr": 2.469029976509662e-09, "epoch": 4.899347714557936, "percentage": 97.99, "elapsed_time": "4:19:30", "remaining_time": "0:05:19", "throughput": 8680.31, "total_tokens": 135153192} +{"current_steps": 200550, "total_steps": 204665, "loss": 0.0, "lr": 2.463044733360009e-09, "epoch": 4.899469865389784, "percentage": 97.99, "elapsed_time": "4:19:30", "remaining_time": "0:05:19", "throughput": 8680.34, "total_tokens": 135156712} +{"current_steps": 200555, "total_steps": 204665, "loss": 0.0, "lr": 2.4570667445673597e-09, "epoch": 4.89959201622163, "percentage": 97.99, "elapsed_time": "4:19:30", "remaining_time": "0:05:19", "throughput": 8680.36, "total_tokens": 135159912} +{"current_steps": 200560, "total_steps": 204665, "loss": 0.0339, "lr": 2.4510960101752355e-09, "epoch": 4.899714167053478, "percentage": 97.99, "elapsed_time": "4:19:31", "remaining_time": "0:05:18", "throughput": 8680.39, "total_tokens": 135163496} +{"current_steps": 200565, "total_steps": 204665, "loss": 0.0, "lr": 2.4451325302270455e-09, "epoch": 4.899836317885325, "percentage": 98.0, "elapsed_time": "4:19:31", "remaining_time": "0:05:18", "throughput": 8680.41, "total_tokens": 135166888} +{"current_steps": 200570, "total_steps": 204665, "loss": 0.0002, "lr": 2.4391763047661997e-09, "epoch": 4.8999584687171716, "percentage": 98.0, "elapsed_time": "4:19:31", "remaining_time": "0:05:17", "throughput": 8680.43, "total_tokens": 135170216} +{"current_steps": 200575, "total_steps": 204665, "loss": 0.0, "lr": 2.4332273338359965e-09, "epoch": 4.900080619549019, "percentage": 98.0, "elapsed_time": "4:19:32", "remaining_time": "0:05:17", "throughput": 8680.46, "total_tokens": 135173608} +{"current_steps": 200580, "total_steps": 204665, "loss": 0.0, "lr": 2.4272856174796242e-09, "epoch": 4.900202770380866, "percentage": 98.0, "elapsed_time": "4:19:32", "remaining_time": "0:05:17", "throughput": 8680.46, "total_tokens": 135176552} +{"current_steps": 200585, "total_steps": 204665, "loss": 0.0, "lr": 2.421351155740381e-09, "epoch": 4.900324921212714, "percentage": 98.01, "elapsed_time": "4:19:32", "remaining_time": "0:05:16", "throughput": 8680.5, "total_tokens": 135180328} +{"current_steps": 200590, "total_steps": 204665, "loss": 0.0, "lr": 2.4154239486613438e-09, "epoch": 4.90044707204456, "percentage": 98.01, "elapsed_time": "4:19:33", "remaining_time": "0:05:16", "throughput": 8680.54, "total_tokens": 135184040} +{"current_steps": 200595, "total_steps": 204665, "loss": 0.0, "lr": 2.4095039962857e-09, "epoch": 4.900569222876408, "percentage": 98.01, "elapsed_time": "4:19:33", "remaining_time": "0:05:15", "throughput": 8680.57, "total_tokens": 135187624} +{"current_steps": 200600, "total_steps": 204665, "loss": 0.0, "lr": 2.4035912986564155e-09, "epoch": 4.900691373708255, "percentage": 98.01, "elapsed_time": "4:19:33", "remaining_time": "0:05:15", "throughput": 8680.59, "total_tokens": 135190760} +{"current_steps": 200605, "total_steps": 204665, "loss": 0.0002, "lr": 2.3976858558165667e-09, "epoch": 4.900813524540102, "percentage": 98.02, "elapsed_time": "4:19:34", "remaining_time": "0:05:15", "throughput": 8680.61, "total_tokens": 135194216} +{"current_steps": 200610, "total_steps": 204665, "loss": 0.0, "lr": 2.3917876678091197e-09, "epoch": 4.900935675371949, "percentage": 98.02, "elapsed_time": "4:19:34", "remaining_time": "0:05:14", "throughput": 8680.62, "total_tokens": 135197288} +{"current_steps": 200615, "total_steps": 204665, "loss": 0.0, "lr": 2.385896734676818e-09, "epoch": 4.901057826203797, "percentage": 98.02, "elapsed_time": "4:19:34", "remaining_time": "0:05:14", "throughput": 8680.65, "total_tokens": 135200872} +{"current_steps": 200620, "total_steps": 204665, "loss": 0.0, "lr": 2.3800130564627374e-09, "epoch": 4.9011799770356435, "percentage": 98.02, "elapsed_time": "4:19:35", "remaining_time": "0:05:14", "throughput": 8680.67, "total_tokens": 135204008} +{"current_steps": 200625, "total_steps": 204665, "loss": 0.0, "lr": 2.3741366332094003e-09, "epoch": 4.901302127867491, "percentage": 98.03, "elapsed_time": "4:19:35", "remaining_time": "0:05:13", "throughput": 8680.68, "total_tokens": 135207208} +{"current_steps": 200630, "total_steps": 204665, "loss": 0.0, "lr": 2.3682674649597725e-09, "epoch": 4.901424278699338, "percentage": 98.03, "elapsed_time": "4:19:35", "remaining_time": "0:05:13", "throughput": 8680.71, "total_tokens": 135210664} +{"current_steps": 200635, "total_steps": 204665, "loss": 0.0, "lr": 2.3624055517562634e-09, "epoch": 4.9015464295311855, "percentage": 98.03, "elapsed_time": "4:19:36", "remaining_time": "0:05:12", "throughput": 8680.72, "total_tokens": 135213800} +{"current_steps": 200640, "total_steps": 204665, "loss": 0.0, "lr": 2.35655089364184e-09, "epoch": 4.901668580363032, "percentage": 98.03, "elapsed_time": "4:19:36", "remaining_time": "0:05:12", "throughput": 8680.72, "total_tokens": 135216744} +{"current_steps": 200645, "total_steps": 204665, "loss": 0.0, "lr": 2.350703490658912e-09, "epoch": 4.901790731194879, "percentage": 98.04, "elapsed_time": "4:19:37", "remaining_time": "0:05:12", "throughput": 8680.76, "total_tokens": 135220392} +{"current_steps": 200650, "total_steps": 204665, "loss": 0.0, "lr": 2.34486334284989e-09, "epoch": 4.901912882026727, "percentage": 98.04, "elapsed_time": "4:19:37", "remaining_time": "0:05:11", "throughput": 8680.77, "total_tokens": 135223592} +{"current_steps": 200655, "total_steps": 204665, "loss": 0.0, "lr": 2.3390304502575175e-09, "epoch": 4.902035032858574, "percentage": 98.04, "elapsed_time": "4:19:37", "remaining_time": "0:05:11", "throughput": 8680.79, "total_tokens": 135226856} +{"current_steps": 200660, "total_steps": 204665, "loss": 0.0, "lr": 2.3332048129238724e-09, "epoch": 4.902157183690421, "percentage": 98.04, "elapsed_time": "4:19:38", "remaining_time": "0:05:10", "throughput": 8680.83, "total_tokens": 135230568} +{"current_steps": 200665, "total_steps": 204665, "loss": 0.0, "lr": 2.3273864308915867e-09, "epoch": 4.902279334522268, "percentage": 98.05, "elapsed_time": "4:19:38", "remaining_time": "0:05:10", "throughput": 8680.84, "total_tokens": 135233704} +{"current_steps": 200670, "total_steps": 204665, "loss": 0.0, "lr": 2.321575304202961e-09, "epoch": 4.902401485354115, "percentage": 98.05, "elapsed_time": "4:19:38", "remaining_time": "0:05:10", "throughput": 8680.87, "total_tokens": 135237288} +{"current_steps": 200675, "total_steps": 204665, "loss": 0.0, "lr": 2.315771432900071e-09, "epoch": 4.902523636185962, "percentage": 98.05, "elapsed_time": "4:19:39", "remaining_time": "0:05:09", "throughput": 8680.91, "total_tokens": 135241000} +{"current_steps": 200680, "total_steps": 204665, "loss": 0.0, "lr": 2.3099748170253287e-09, "epoch": 4.90264578701781, "percentage": 98.05, "elapsed_time": "4:19:39", "remaining_time": "0:05:09", "throughput": 8680.94, "total_tokens": 135244520} +{"current_steps": 200685, "total_steps": 204665, "loss": 0.0, "lr": 2.3041854566206997e-09, "epoch": 4.9027679378496565, "percentage": 98.06, "elapsed_time": "4:19:39", "remaining_time": "0:05:08", "throughput": 8680.97, "total_tokens": 135247976} +{"current_steps": 200690, "total_steps": 204665, "loss": 0.0, "lr": 2.298403351728484e-09, "epoch": 4.902890088681504, "percentage": 98.06, "elapsed_time": "4:19:40", "remaining_time": "0:05:08", "throughput": 8681.01, "total_tokens": 135251560} +{"current_steps": 200695, "total_steps": 204665, "loss": 0.0, "lr": 2.2926285023905368e-09, "epoch": 4.903012239513351, "percentage": 98.06, "elapsed_time": "4:19:40", "remaining_time": "0:05:08", "throughput": 8681.03, "total_tokens": 135254824} +{"current_steps": 200700, "total_steps": 204665, "loss": 0.0, "lr": 2.2868609086489355e-09, "epoch": 4.9031343903451985, "percentage": 98.06, "elapsed_time": "4:19:40", "remaining_time": "0:05:07", "throughput": 8681.02, "total_tokens": 135257704} +{"current_steps": 200705, "total_steps": 204665, "loss": 0.0, "lr": 2.2811005705456466e-09, "epoch": 4.903256541177045, "percentage": 98.07, "elapsed_time": "4:19:41", "remaining_time": "0:05:07", "throughput": 8681.07, "total_tokens": 135261416} +{"current_steps": 200710, "total_steps": 204665, "loss": 0.0, "lr": 2.2753474881226365e-09, "epoch": 4.903378692008893, "percentage": 98.07, "elapsed_time": "4:19:41", "remaining_time": "0:05:07", "throughput": 8681.07, "total_tokens": 135264424} +{"current_steps": 200715, "total_steps": 204665, "loss": 0.0, "lr": 2.2696016614216497e-09, "epoch": 4.90350084284074, "percentage": 98.07, "elapsed_time": "4:19:41", "remaining_time": "0:05:06", "throughput": 8681.09, "total_tokens": 135267816} +{"current_steps": 200720, "total_steps": 204665, "loss": 0.0, "lr": 2.263863090484319e-09, "epoch": 4.903622993672587, "percentage": 98.07, "elapsed_time": "4:19:42", "remaining_time": "0:05:06", "throughput": 8681.12, "total_tokens": 135271400} +{"current_steps": 200725, "total_steps": 204665, "loss": 0.0438, "lr": 2.2581317753527227e-09, "epoch": 4.903745144504434, "percentage": 98.07, "elapsed_time": "4:19:42", "remaining_time": "0:05:05", "throughput": 8681.14, "total_tokens": 135274664} +{"current_steps": 200730, "total_steps": 204665, "loss": 0.0, "lr": 2.252407716068272e-09, "epoch": 4.903867295336282, "percentage": 98.08, "elapsed_time": "4:19:42", "remaining_time": "0:05:05", "throughput": 8681.17, "total_tokens": 135278248} +{"current_steps": 200735, "total_steps": 204665, "loss": 0.0, "lr": 2.2466909126726e-09, "epoch": 4.903989446168128, "percentage": 98.08, "elapsed_time": "4:19:43", "remaining_time": "0:05:05", "throughput": 8681.19, "total_tokens": 135281576} +{"current_steps": 200740, "total_steps": 204665, "loss": 0.0, "lr": 2.2409813652074503e-09, "epoch": 4.904111596999975, "percentage": 98.08, "elapsed_time": "4:19:43", "remaining_time": "0:05:04", "throughput": 8681.22, "total_tokens": 135285160} +{"current_steps": 200745, "total_steps": 204665, "loss": 0.0, "lr": 2.2352790737142357e-09, "epoch": 4.904233747831823, "percentage": 98.08, "elapsed_time": "4:19:43", "remaining_time": "0:05:04", "throughput": 8681.25, "total_tokens": 135288552} +{"current_steps": 200750, "total_steps": 204665, "loss": 0.0, "lr": 2.2295840382344776e-09, "epoch": 4.90435589866367, "percentage": 98.09, "elapsed_time": "4:19:44", "remaining_time": "0:05:03", "throughput": 8681.27, "total_tokens": 135291944} +{"current_steps": 200755, "total_steps": 204665, "loss": 0.0, "lr": 2.2238962588094766e-09, "epoch": 4.904478049495517, "percentage": 98.09, "elapsed_time": "4:19:44", "remaining_time": "0:05:03", "throughput": 8681.29, "total_tokens": 135295208} +{"current_steps": 200760, "total_steps": 204665, "loss": 0.0383, "lr": 2.2182157354807552e-09, "epoch": 4.904600200327364, "percentage": 98.09, "elapsed_time": "4:19:45", "remaining_time": "0:05:03", "throughput": 8681.29, "total_tokens": 135298088} +{"current_steps": 200765, "total_steps": 204665, "loss": 0.0001, "lr": 2.212542468289502e-09, "epoch": 4.9047223511592115, "percentage": 98.09, "elapsed_time": "4:19:45", "remaining_time": "0:05:02", "throughput": 8681.33, "total_tokens": 135301864} +{"current_steps": 200770, "total_steps": 204665, "loss": 0.0, "lr": 2.206876457276907e-09, "epoch": 4.904844501991058, "percentage": 98.1, "elapsed_time": "4:19:45", "remaining_time": "0:05:02", "throughput": 8681.34, "total_tokens": 135304936} +{"current_steps": 200775, "total_steps": 204665, "loss": 0.0, "lr": 2.2012177024843816e-09, "epoch": 4.904966652822906, "percentage": 98.1, "elapsed_time": "4:19:46", "remaining_time": "0:05:01", "throughput": 8681.35, "total_tokens": 135308200} +{"current_steps": 200780, "total_steps": 204665, "loss": 0.0021, "lr": 2.1955662039530032e-09, "epoch": 4.905088803654753, "percentage": 98.1, "elapsed_time": "4:19:46", "remaining_time": "0:05:01", "throughput": 8681.37, "total_tokens": 135311464} +{"current_steps": 200785, "total_steps": 204665, "loss": 0.0, "lr": 2.189921961723851e-09, "epoch": 4.9052109544866, "percentage": 98.1, "elapsed_time": "4:19:46", "remaining_time": "0:05:01", "throughput": 8681.4, "total_tokens": 135314984} +{"current_steps": 200790, "total_steps": 204665, "loss": 0.0, "lr": 2.184284975837891e-09, "epoch": 4.905333105318447, "percentage": 98.11, "elapsed_time": "4:19:47", "remaining_time": "0:05:00", "throughput": 8681.4, "total_tokens": 135317928} +{"current_steps": 200795, "total_steps": 204665, "loss": 0.0, "lr": 2.178655246336203e-09, "epoch": 4.905455256150295, "percentage": 98.11, "elapsed_time": "4:19:47", "remaining_time": "0:05:00", "throughput": 8681.43, "total_tokens": 135321448} +{"current_steps": 200800, "total_steps": 204665, "loss": 0.0, "lr": 2.173032773259753e-09, "epoch": 4.905577406982141, "percentage": 98.11, "elapsed_time": "4:19:47", "remaining_time": "0:05:00", "throughput": 8681.44, "total_tokens": 135324648} +{"current_steps": 200805, "total_steps": 204665, "loss": 0.0, "lr": 2.167417556649287e-09, "epoch": 4.905699557813989, "percentage": 98.11, "elapsed_time": "4:19:48", "remaining_time": "0:04:59", "throughput": 8681.47, "total_tokens": 135328104} +{"current_steps": 200810, "total_steps": 204665, "loss": 0.0, "lr": 2.1618095965458826e-09, "epoch": 4.905821708645836, "percentage": 98.12, "elapsed_time": "4:19:48", "remaining_time": "0:04:59", "throughput": 8681.49, "total_tokens": 135331496} +{"current_steps": 200815, "total_steps": 204665, "loss": 0.0402, "lr": 2.1562088929901745e-09, "epoch": 4.905943859477683, "percentage": 98.12, "elapsed_time": "4:19:48", "remaining_time": "0:04:58", "throughput": 8681.5, "total_tokens": 135334504} +{"current_steps": 200820, "total_steps": 204665, "loss": 0.0, "lr": 2.1506154460227965e-09, "epoch": 4.90606601030953, "percentage": 98.12, "elapsed_time": "4:19:49", "remaining_time": "0:04:58", "throughput": 8681.53, "total_tokens": 135338024} +{"current_steps": 200825, "total_steps": 204665, "loss": 0.0, "lr": 2.145029255684605e-09, "epoch": 4.906188161141378, "percentage": 98.12, "elapsed_time": "4:19:49", "remaining_time": "0:04:58", "throughput": 8681.54, "total_tokens": 135341160} +{"current_steps": 200830, "total_steps": 204665, "loss": 0.0, "lr": 2.139450322016123e-09, "epoch": 4.9063103119732245, "percentage": 98.13, "elapsed_time": "4:19:49", "remaining_time": "0:04:57", "throughput": 8681.55, "total_tokens": 135344360} +{"current_steps": 200835, "total_steps": 204665, "loss": 0.0, "lr": 2.1338786450579847e-09, "epoch": 4.906432462805071, "percentage": 98.13, "elapsed_time": "4:19:50", "remaining_time": "0:04:57", "throughput": 8681.57, "total_tokens": 135347752} +{"current_steps": 200840, "total_steps": 204665, "loss": 0.0, "lr": 2.1283142248507135e-09, "epoch": 4.906554613636919, "percentage": 98.13, "elapsed_time": "4:19:50", "remaining_time": "0:04:56", "throughput": 8681.59, "total_tokens": 135351080} +{"current_steps": 200845, "total_steps": 204665, "loss": 0.0, "lr": 2.1227570614346103e-09, "epoch": 4.906676764468766, "percentage": 98.13, "elapsed_time": "4:19:50", "remaining_time": "0:04:56", "throughput": 8681.62, "total_tokens": 135354536} +{"current_steps": 200850, "total_steps": 204665, "loss": 0.0, "lr": 2.117207154850309e-09, "epoch": 4.906798915300613, "percentage": 98.14, "elapsed_time": "4:19:51", "remaining_time": "0:04:56", "throughput": 8681.64, "total_tokens": 135357992} +{"current_steps": 200855, "total_steps": 204665, "loss": 0.0, "lr": 2.111664505138111e-09, "epoch": 4.90692106613246, "percentage": 98.14, "elapsed_time": "4:19:51", "remaining_time": "0:04:55", "throughput": 8681.65, "total_tokens": 135361128} +{"current_steps": 200860, "total_steps": 204665, "loss": 0.0, "lr": 2.1061291123382063e-09, "epoch": 4.907043216964308, "percentage": 98.14, "elapsed_time": "4:19:51", "remaining_time": "0:04:55", "throughput": 8681.69, "total_tokens": 135364776} +{"current_steps": 200865, "total_steps": 204665, "loss": 0.0, "lr": 2.100600976491007e-09, "epoch": 4.907165367796154, "percentage": 98.14, "elapsed_time": "4:19:52", "remaining_time": "0:04:54", "throughput": 8681.71, "total_tokens": 135368104} +{"current_steps": 200870, "total_steps": 204665, "loss": 0.0, "lr": 2.095080097636592e-09, "epoch": 4.907287518628002, "percentage": 98.15, "elapsed_time": "4:19:52", "remaining_time": "0:04:54", "throughput": 8681.71, "total_tokens": 135370984} +{"current_steps": 200875, "total_steps": 204665, "loss": 0.0, "lr": 2.089566475815152e-09, "epoch": 4.907409669459849, "percentage": 98.15, "elapsed_time": "4:19:53", "remaining_time": "0:04:54", "throughput": 8681.73, "total_tokens": 135374248} +{"current_steps": 200880, "total_steps": 204665, "loss": 0.0, "lr": 2.0840601110667654e-09, "epoch": 4.907531820291696, "percentage": 98.15, "elapsed_time": "4:19:53", "remaining_time": "0:04:53", "throughput": 8681.74, "total_tokens": 135377512} +{"current_steps": 200885, "total_steps": 204665, "loss": 0.0, "lr": 2.0785610034315114e-09, "epoch": 4.907653971123543, "percentage": 98.15, "elapsed_time": "4:19:53", "remaining_time": "0:04:53", "throughput": 8681.76, "total_tokens": 135380840} +{"current_steps": 200890, "total_steps": 204665, "loss": 0.0, "lr": 2.0730691529493583e-09, "epoch": 4.907776121955391, "percentage": 98.16, "elapsed_time": "4:19:54", "remaining_time": "0:04:53", "throughput": 8681.79, "total_tokens": 135384360} +{"current_steps": 200895, "total_steps": 204665, "loss": 0.0, "lr": 2.0675845596602737e-09, "epoch": 4.907898272787238, "percentage": 98.16, "elapsed_time": "4:19:54", "remaining_time": "0:04:52", "throughput": 8681.82, "total_tokens": 135387816} +{"current_steps": 200900, "total_steps": 204665, "loss": 0.0, "lr": 2.0621072236042257e-09, "epoch": 4.908020423619085, "percentage": 98.16, "elapsed_time": "4:19:54", "remaining_time": "0:04:52", "throughput": 8681.82, "total_tokens": 135390888} +{"current_steps": 200905, "total_steps": 204665, "loss": 0.0, "lr": 2.0566371448208497e-09, "epoch": 4.908142574450932, "percentage": 98.16, "elapsed_time": "4:19:55", "remaining_time": "0:04:51", "throughput": 8681.84, "total_tokens": 135394216} +{"current_steps": 200910, "total_steps": 204665, "loss": 0.0, "lr": 2.0511743233500024e-09, "epoch": 4.908264725282779, "percentage": 98.17, "elapsed_time": "4:19:55", "remaining_time": "0:04:51", "throughput": 8681.87, "total_tokens": 135397608} +{"current_steps": 200915, "total_steps": 204665, "loss": 0.0, "lr": 2.0457187592314294e-09, "epoch": 4.908386876114626, "percentage": 98.17, "elapsed_time": "4:19:55", "remaining_time": "0:04:51", "throughput": 8681.9, "total_tokens": 135401256} +{"current_steps": 200920, "total_steps": 204665, "loss": 0.0, "lr": 2.0402704525048776e-09, "epoch": 4.908509026946474, "percentage": 98.17, "elapsed_time": "4:19:56", "remaining_time": "0:04:50", "throughput": 8681.91, "total_tokens": 135404328} +{"current_steps": 200925, "total_steps": 204665, "loss": 0.0, "lr": 2.03482940320987e-09, "epoch": 4.908631177778321, "percentage": 98.17, "elapsed_time": "4:19:56", "remaining_time": "0:04:50", "throughput": 8681.93, "total_tokens": 135407528} +{"current_steps": 200930, "total_steps": 204665, "loss": 0.0, "lr": 2.029395611386042e-09, "epoch": 4.908753328610167, "percentage": 98.18, "elapsed_time": "4:19:56", "remaining_time": "0:04:49", "throughput": 8681.94, "total_tokens": 135410728} +{"current_steps": 200935, "total_steps": 204665, "loss": 0.0, "lr": 2.0239690770728068e-09, "epoch": 4.908875479442015, "percentage": 98.18, "elapsed_time": "4:19:57", "remaining_time": "0:04:49", "throughput": 8681.99, "total_tokens": 135414696} +{"current_steps": 200940, "total_steps": 204665, "loss": 0.0, "lr": 2.018549800309688e-09, "epoch": 4.908997630273862, "percentage": 98.18, "elapsed_time": "4:19:57", "remaining_time": "0:04:49", "throughput": 8682.01, "total_tokens": 135418088} +{"current_steps": 200945, "total_steps": 204665, "loss": 0.0, "lr": 2.0131377811360982e-09, "epoch": 4.9091197811057095, "percentage": 98.18, "elapsed_time": "4:19:57", "remaining_time": "0:04:48", "throughput": 8682.04, "total_tokens": 135421544} +{"current_steps": 200950, "total_steps": 204665, "loss": 0.0, "lr": 2.0077330195914512e-09, "epoch": 4.909241931937556, "percentage": 98.18, "elapsed_time": "4:19:58", "remaining_time": "0:04:48", "throughput": 8682.07, "total_tokens": 135425064} +{"current_steps": 200955, "total_steps": 204665, "loss": 0.0, "lr": 2.0023355157149367e-09, "epoch": 4.909364082769404, "percentage": 98.19, "elapsed_time": "4:19:58", "remaining_time": "0:04:47", "throughput": 8682.1, "total_tokens": 135428712} +{"current_steps": 200960, "total_steps": 204665, "loss": 0.0, "lr": 1.9969452695458576e-09, "epoch": 4.909486233601251, "percentage": 98.19, "elapsed_time": "4:19:58", "remaining_time": "0:04:47", "throughput": 8682.12, "total_tokens": 135432040} +{"current_steps": 200965, "total_steps": 204665, "loss": 0.0, "lr": 1.9915622811235155e-09, "epoch": 4.909608384433098, "percentage": 98.19, "elapsed_time": "4:19:59", "remaining_time": "0:04:47", "throughput": 8682.14, "total_tokens": 135435304} +{"current_steps": 200970, "total_steps": 204665, "loss": 0.0, "lr": 1.9861865504868792e-09, "epoch": 4.909730535264945, "percentage": 98.19, "elapsed_time": "4:19:59", "remaining_time": "0:04:46", "throughput": 8682.15, "total_tokens": 135438504} +{"current_steps": 200975, "total_steps": 204665, "loss": 0.0, "lr": 1.9808180776751393e-09, "epoch": 4.909852686096793, "percentage": 98.2, "elapsed_time": "4:19:59", "remaining_time": "0:04:46", "throughput": 8682.16, "total_tokens": 135441576} +{"current_steps": 200980, "total_steps": 204665, "loss": 0.0, "lr": 1.975456862727376e-09, "epoch": 4.909974836928639, "percentage": 98.2, "elapsed_time": "4:20:00", "remaining_time": "0:04:46", "throughput": 8682.19, "total_tokens": 135445160} +{"current_steps": 200985, "total_steps": 204665, "loss": 0.0, "lr": 1.970102905682447e-09, "epoch": 4.910096987760487, "percentage": 98.2, "elapsed_time": "4:20:00", "remaining_time": "0:04:45", "throughput": 8682.23, "total_tokens": 135448872} +{"current_steps": 200990, "total_steps": 204665, "loss": 0.0001, "lr": 1.964756206579432e-09, "epoch": 4.910219138592334, "percentage": 98.2, "elapsed_time": "4:20:01", "remaining_time": "0:04:45", "throughput": 8682.26, "total_tokens": 135452328} +{"current_steps": 200995, "total_steps": 204665, "loss": 0.0001, "lr": 1.959416765457189e-09, "epoch": 4.910341289424181, "percentage": 98.21, "elapsed_time": "4:20:01", "remaining_time": "0:04:44", "throughput": 8682.29, "total_tokens": 135455976} +{"current_steps": 201000, "total_steps": 204665, "loss": 0.0, "lr": 1.954084582354465e-09, "epoch": 4.910463440256028, "percentage": 98.21, "elapsed_time": "4:20:01", "remaining_time": "0:04:44", "throughput": 8682.32, "total_tokens": 135459368} +{"current_steps": 201005, "total_steps": 204665, "loss": 0.0, "lr": 1.948759657310006e-09, "epoch": 4.910585591087875, "percentage": 98.21, "elapsed_time": "4:20:02", "remaining_time": "0:04:44", "throughput": 8682.35, "total_tokens": 135462952} +{"current_steps": 201010, "total_steps": 204665, "loss": 0.0, "lr": 1.9434419903626708e-09, "epoch": 4.9107077419197225, "percentage": 98.21, "elapsed_time": "4:20:02", "remaining_time": "0:04:43", "throughput": 8682.4, "total_tokens": 135466856} +{"current_steps": 201015, "total_steps": 204665, "loss": 0.0, "lr": 1.9381315815510946e-09, "epoch": 4.91082989275157, "percentage": 98.22, "elapsed_time": "4:20:02", "remaining_time": "0:04:43", "throughput": 8682.41, "total_tokens": 135469992} +{"current_steps": 201020, "total_steps": 204665, "loss": 0.0, "lr": 1.9328284309138022e-09, "epoch": 4.910952043583417, "percentage": 98.22, "elapsed_time": "4:20:03", "remaining_time": "0:04:42", "throughput": 8682.45, "total_tokens": 135473576} +{"current_steps": 201025, "total_steps": 204665, "loss": 0.0, "lr": 1.92753253848954e-09, "epoch": 4.911074194415264, "percentage": 98.22, "elapsed_time": "4:20:03", "remaining_time": "0:04:42", "throughput": 8682.48, "total_tokens": 135477160} +{"current_steps": 201030, "total_steps": 204665, "loss": 0.0, "lr": 1.9222439043166116e-09, "epoch": 4.911196345247111, "percentage": 98.22, "elapsed_time": "4:20:03", "remaining_time": "0:04:42", "throughput": 8682.51, "total_tokens": 135480680} +{"current_steps": 201035, "total_steps": 204665, "loss": 0.0, "lr": 1.9169625284336523e-09, "epoch": 4.911318496078958, "percentage": 98.23, "elapsed_time": "4:20:04", "remaining_time": "0:04:41", "throughput": 8682.61, "total_tokens": 135485544} +{"current_steps": 201040, "total_steps": 204665, "loss": 0.0, "lr": 1.9116884108789644e-09, "epoch": 4.911440646910806, "percentage": 98.23, "elapsed_time": "4:20:04", "remaining_time": "0:04:41", "throughput": 8682.62, "total_tokens": 135488808} +{"current_steps": 201045, "total_steps": 204665, "loss": 0.0, "lr": 1.9064215516908513e-09, "epoch": 4.911562797742652, "percentage": 98.23, "elapsed_time": "4:20:04", "remaining_time": "0:04:40", "throughput": 8682.65, "total_tokens": 135492200} +{"current_steps": 201050, "total_steps": 204665, "loss": 0.0, "lr": 1.901161950907837e-09, "epoch": 4.9116849485745, "percentage": 98.23, "elapsed_time": "4:20:05", "remaining_time": "0:04:40", "throughput": 8682.69, "total_tokens": 135495912} +{"current_steps": 201055, "total_steps": 204665, "loss": 0.0, "lr": 1.8959096085678915e-09, "epoch": 4.911807099406347, "percentage": 98.24, "elapsed_time": "4:20:05", "remaining_time": "0:04:40", "throughput": 8682.69, "total_tokens": 135498984} +{"current_steps": 201060, "total_steps": 204665, "loss": 0.0, "lr": 1.8906645247094288e-09, "epoch": 4.911929250238194, "percentage": 98.24, "elapsed_time": "4:20:05", "remaining_time": "0:04:39", "throughput": 8682.71, "total_tokens": 135502184} +{"current_steps": 201065, "total_steps": 204665, "loss": 0.0, "lr": 1.885426699370529e-09, "epoch": 4.912051401070041, "percentage": 98.24, "elapsed_time": "4:20:06", "remaining_time": "0:04:39", "throughput": 8682.76, "total_tokens": 135506088} +{"current_steps": 201070, "total_steps": 204665, "loss": 0.0, "lr": 1.8801961325892735e-09, "epoch": 4.912173551901889, "percentage": 98.24, "elapsed_time": "4:20:06", "remaining_time": "0:04:39", "throughput": 8682.78, "total_tokens": 135509544} +{"current_steps": 201075, "total_steps": 204665, "loss": 0.0, "lr": 1.874972824403631e-09, "epoch": 4.9122957027337355, "percentage": 98.25, "elapsed_time": "4:20:07", "remaining_time": "0:04:38", "throughput": 8682.81, "total_tokens": 135512936} +{"current_steps": 201080, "total_steps": 204665, "loss": 0.0, "lr": 1.869756774851683e-09, "epoch": 4.912417853565583, "percentage": 98.25, "elapsed_time": "4:20:07", "remaining_time": "0:04:38", "throughput": 8682.84, "total_tokens": 135516392} +{"current_steps": 201085, "total_steps": 204665, "loss": 0.0, "lr": 1.8645479839712873e-09, "epoch": 4.91254000439743, "percentage": 98.25, "elapsed_time": "4:20:07", "remaining_time": "0:04:37", "throughput": 8682.87, "total_tokens": 135519912} +{"current_steps": 201090, "total_steps": 204665, "loss": 0.0, "lr": 1.8593464518004143e-09, "epoch": 4.9126621552292775, "percentage": 98.25, "elapsed_time": "4:20:08", "remaining_time": "0:04:37", "throughput": 8682.89, "total_tokens": 135523240} +{"current_steps": 201095, "total_steps": 204665, "loss": 0.0422, "lr": 1.8541521783768111e-09, "epoch": 4.912784306061124, "percentage": 98.26, "elapsed_time": "4:20:08", "remaining_time": "0:04:37", "throughput": 8682.91, "total_tokens": 135526568} +{"current_steps": 201100, "total_steps": 204665, "loss": 0.0, "lr": 1.8489651637383363e-09, "epoch": 4.912906456892971, "percentage": 98.26, "elapsed_time": "4:20:08", "remaining_time": "0:04:36", "throughput": 8682.94, "total_tokens": 135530216} +{"current_steps": 201105, "total_steps": 204665, "loss": 0.0, "lr": 1.8437854079225158e-09, "epoch": 4.913028607724819, "percentage": 98.26, "elapsed_time": "4:20:09", "remaining_time": "0:04:36", "throughput": 8682.96, "total_tokens": 135533416} +{"current_steps": 201110, "total_steps": 204665, "loss": 0.0, "lr": 1.8386129109673187e-09, "epoch": 4.913150758556665, "percentage": 98.26, "elapsed_time": "4:20:09", "remaining_time": "0:04:35", "throughput": 8682.96, "total_tokens": 135536360} +{"current_steps": 201115, "total_steps": 204665, "loss": 0.041, "lr": 1.83344767291016e-09, "epoch": 4.913272909388513, "percentage": 98.27, "elapsed_time": "4:20:09", "remaining_time": "0:04:35", "throughput": 8682.97, "total_tokens": 135539624} +{"current_steps": 201120, "total_steps": 204665, "loss": 0.0, "lr": 1.828289693788565e-09, "epoch": 4.91339506022036, "percentage": 98.27, "elapsed_time": "4:20:10", "remaining_time": "0:04:35", "throughput": 8683.0, "total_tokens": 135543016} +{"current_steps": 201125, "total_steps": 204665, "loss": 0.0, "lr": 1.8231389736401703e-09, "epoch": 4.913517211052207, "percentage": 98.27, "elapsed_time": "4:20:10", "remaining_time": "0:04:34", "throughput": 8683.01, "total_tokens": 135546280} +{"current_steps": 201130, "total_steps": 204665, "loss": 0.0, "lr": 1.8179955125023905e-09, "epoch": 4.913639361884054, "percentage": 98.27, "elapsed_time": "4:20:10", "remaining_time": "0:04:34", "throughput": 8683.02, "total_tokens": 135549416} +{"current_steps": 201135, "total_steps": 204665, "loss": 0.0, "lr": 1.81285931041264e-09, "epoch": 4.913761512715902, "percentage": 98.28, "elapsed_time": "4:20:11", "remaining_time": "0:04:33", "throughput": 8683.05, "total_tokens": 135552872} +{"current_steps": 201140, "total_steps": 204665, "loss": 0.0, "lr": 1.8077303674083332e-09, "epoch": 4.9138836635477485, "percentage": 98.28, "elapsed_time": "4:20:11", "remaining_time": "0:04:33", "throughput": 8683.07, "total_tokens": 135556136} +{"current_steps": 201145, "total_steps": 204665, "loss": 0.0, "lr": 1.802608683526552e-09, "epoch": 4.914005814379596, "percentage": 98.28, "elapsed_time": "4:20:11", "remaining_time": "0:04:33", "throughput": 8683.1, "total_tokens": 135559784} +{"current_steps": 201150, "total_steps": 204665, "loss": 0.0, "lr": 1.7974942588048213e-09, "epoch": 4.914127965211443, "percentage": 98.28, "elapsed_time": "4:20:12", "remaining_time": "0:04:32", "throughput": 8683.11, "total_tokens": 135562984} +{"current_steps": 201155, "total_steps": 204665, "loss": 0.0, "lr": 1.7923870932801123e-09, "epoch": 4.9142501160432905, "percentage": 98.29, "elapsed_time": "4:20:12", "remaining_time": "0:04:32", "throughput": 8683.12, "total_tokens": 135566056} +{"current_steps": 201160, "total_steps": 204665, "loss": 0.0, "lr": 1.7872871869896166e-09, "epoch": 4.914372266875137, "percentage": 98.29, "elapsed_time": "4:20:12", "remaining_time": "0:04:32", "throughput": 8683.16, "total_tokens": 135569704} +{"current_steps": 201165, "total_steps": 204665, "loss": 0.0558, "lr": 1.7821945399705273e-09, "epoch": 4.914494417706985, "percentage": 98.29, "elapsed_time": "4:20:13", "remaining_time": "0:04:31", "throughput": 8683.19, "total_tokens": 135573224} +{"current_steps": 201170, "total_steps": 204665, "loss": 0.0, "lr": 1.7771091522598146e-09, "epoch": 4.914616568538832, "percentage": 98.29, "elapsed_time": "4:20:13", "remaining_time": "0:04:31", "throughput": 8683.19, "total_tokens": 135576296} +{"current_steps": 201175, "total_steps": 204665, "loss": 0.0, "lr": 1.7720310238943381e-09, "epoch": 4.914738719370679, "percentage": 98.29, "elapsed_time": "4:20:14", "remaining_time": "0:04:30", "throughput": 8683.27, "total_tokens": 135580648} +{"current_steps": 201180, "total_steps": 204665, "loss": 0.0, "lr": 1.76696015491129e-09, "epoch": 4.914860870202526, "percentage": 98.3, "elapsed_time": "4:20:14", "remaining_time": "0:04:30", "throughput": 8683.29, "total_tokens": 135583912} +{"current_steps": 201185, "total_steps": 204665, "loss": 0.0, "lr": 1.7618965453473078e-09, "epoch": 4.914983021034374, "percentage": 98.3, "elapsed_time": "4:20:14", "remaining_time": "0:04:30", "throughput": 8683.29, "total_tokens": 135586856} +{"current_steps": 201190, "total_steps": 204665, "loss": 0.0, "lr": 1.7568401952392509e-09, "epoch": 4.91510517186622, "percentage": 98.3, "elapsed_time": "4:20:15", "remaining_time": "0:04:29", "throughput": 8683.3, "total_tokens": 135590120} +{"current_steps": 201195, "total_steps": 204665, "loss": 0.0, "lr": 1.7517911046240897e-09, "epoch": 4.915227322698067, "percentage": 98.3, "elapsed_time": "4:20:15", "remaining_time": "0:04:29", "throughput": 8683.3, "total_tokens": 135593000} +{"current_steps": 201200, "total_steps": 204665, "loss": 0.0, "lr": 1.7467492735383505e-09, "epoch": 4.915349473529915, "percentage": 98.31, "elapsed_time": "4:20:15", "remaining_time": "0:04:28", "throughput": 8683.33, "total_tokens": 135596456} +{"current_steps": 201205, "total_steps": 204665, "loss": 0.0, "lr": 1.7417147020186706e-09, "epoch": 4.9154716243617615, "percentage": 98.31, "elapsed_time": "4:20:16", "remaining_time": "0:04:28", "throughput": 8683.38, "total_tokens": 135600360} +{"current_steps": 201210, "total_steps": 204665, "loss": 0.0, "lr": 1.7366873901017987e-09, "epoch": 4.915593775193609, "percentage": 98.31, "elapsed_time": "4:20:16", "remaining_time": "0:04:28", "throughput": 8683.41, "total_tokens": 135603880} +{"current_steps": 201215, "total_steps": 204665, "loss": 0.0, "lr": 1.7316673378242609e-09, "epoch": 4.915715926025456, "percentage": 98.31, "elapsed_time": "4:20:16", "remaining_time": "0:04:27", "throughput": 8683.43, "total_tokens": 135607272} +{"current_steps": 201220, "total_steps": 204665, "loss": 0.0, "lr": 1.7266545452225835e-09, "epoch": 4.915838076857304, "percentage": 98.32, "elapsed_time": "4:20:17", "remaining_time": "0:04:27", "throughput": 8683.45, "total_tokens": 135610728} +{"current_steps": 201225, "total_steps": 204665, "loss": 0.0, "lr": 1.7216490123330707e-09, "epoch": 4.91596022768915, "percentage": 98.32, "elapsed_time": "4:20:17", "remaining_time": "0:04:26", "throughput": 8683.46, "total_tokens": 135613672} +{"current_steps": 201230, "total_steps": 204665, "loss": 0.0, "lr": 1.716650739192249e-09, "epoch": 4.916082378520998, "percentage": 98.32, "elapsed_time": "4:20:17", "remaining_time": "0:04:26", "throughput": 8683.49, "total_tokens": 135617256} +{"current_steps": 201235, "total_steps": 204665, "loss": 0.0, "lr": 1.711659725836534e-09, "epoch": 4.916204529352845, "percentage": 98.32, "elapsed_time": "4:20:18", "remaining_time": "0:04:26", "throughput": 8683.51, "total_tokens": 135620584} +{"current_steps": 201240, "total_steps": 204665, "loss": 0.0, "lr": 1.7066759723021185e-09, "epoch": 4.916326680184692, "percentage": 98.33, "elapsed_time": "4:20:18", "remaining_time": "0:04:25", "throughput": 8683.53, "total_tokens": 135623976} +{"current_steps": 201245, "total_steps": 204665, "loss": 0.0, "lr": 1.7016994786251958e-09, "epoch": 4.916448831016539, "percentage": 98.33, "elapsed_time": "4:20:18", "remaining_time": "0:04:25", "throughput": 8683.54, "total_tokens": 135627112} +{"current_steps": 201250, "total_steps": 204665, "loss": 0.0001, "lr": 1.6967302448420707e-09, "epoch": 4.916570981848387, "percentage": 98.33, "elapsed_time": "4:20:19", "remaining_time": "0:04:25", "throughput": 8683.54, "total_tokens": 135630056} +{"current_steps": 201255, "total_steps": 204665, "loss": 0.0, "lr": 1.6917682709887139e-09, "epoch": 4.9166931326802334, "percentage": 98.33, "elapsed_time": "4:20:19", "remaining_time": "0:04:24", "throughput": 8683.55, "total_tokens": 135633256} +{"current_steps": 201260, "total_steps": 204665, "loss": 0.0, "lr": 1.6868135571015408e-09, "epoch": 4.916815283512081, "percentage": 98.34, "elapsed_time": "4:20:19", "remaining_time": "0:04:24", "throughput": 8683.58, "total_tokens": 135636648} +{"current_steps": 201265, "total_steps": 204665, "loss": 0.0, "lr": 1.6818661032161896e-09, "epoch": 4.916937434343928, "percentage": 98.34, "elapsed_time": "4:20:20", "remaining_time": "0:04:23", "throughput": 8683.61, "total_tokens": 135640360} +{"current_steps": 201270, "total_steps": 204665, "loss": 0.0, "lr": 1.6769259093689647e-09, "epoch": 4.917059585175775, "percentage": 98.34, "elapsed_time": "4:20:20", "remaining_time": "0:04:23", "throughput": 8683.64, "total_tokens": 135643880} +{"current_steps": 201275, "total_steps": 204665, "loss": 0.0018, "lr": 1.6719929755956152e-09, "epoch": 4.917181736007622, "percentage": 98.34, "elapsed_time": "4:20:20", "remaining_time": "0:04:23", "throughput": 8683.66, "total_tokens": 135647208} +{"current_steps": 201280, "total_steps": 204665, "loss": 0.0, "lr": 1.6670673019320014e-09, "epoch": 4.91730388683947, "percentage": 98.35, "elapsed_time": "4:20:21", "remaining_time": "0:04:22", "throughput": 8683.67, "total_tokens": 135650280} +{"current_steps": 201285, "total_steps": 204665, "loss": 0.0, "lr": 1.6621488884139834e-09, "epoch": 4.917426037671317, "percentage": 98.35, "elapsed_time": "4:20:21", "remaining_time": "0:04:22", "throughput": 8683.69, "total_tokens": 135653672} +{"current_steps": 201290, "total_steps": 204665, "loss": 0.0, "lr": 1.6572377350774213e-09, "epoch": 4.917548188503163, "percentage": 98.35, "elapsed_time": "4:20:22", "remaining_time": "0:04:21", "throughput": 8683.72, "total_tokens": 135657064} +{"current_steps": 201295, "total_steps": 204665, "loss": 0.0, "lr": 1.6523338419578426e-09, "epoch": 4.917670339335011, "percentage": 98.35, "elapsed_time": "4:20:22", "remaining_time": "0:04:21", "throughput": 8683.73, "total_tokens": 135660328} +{"current_steps": 201300, "total_steps": 204665, "loss": 0.0, "lr": 1.647437209091107e-09, "epoch": 4.917792490166858, "percentage": 98.36, "elapsed_time": "4:20:22", "remaining_time": "0:04:21", "throughput": 8683.74, "total_tokens": 135663464} +{"current_steps": 201305, "total_steps": 204665, "loss": 0.0, "lr": 1.6425478365126311e-09, "epoch": 4.917914640998705, "percentage": 98.36, "elapsed_time": "4:20:23", "remaining_time": "0:04:20", "throughput": 8683.79, "total_tokens": 135667432} +{"current_steps": 201310, "total_steps": 204665, "loss": 0.0001, "lr": 1.6376657242581638e-09, "epoch": 4.918036791830552, "percentage": 98.36, "elapsed_time": "4:20:23", "remaining_time": "0:04:20", "throughput": 8683.8, "total_tokens": 135670504} +{"current_steps": 201315, "total_steps": 204665, "loss": 0.0, "lr": 1.6327908723631213e-09, "epoch": 4.9181589426624, "percentage": 98.36, "elapsed_time": "4:20:23", "remaining_time": "0:04:19", "throughput": 8683.82, "total_tokens": 135673768} +{"current_steps": 201320, "total_steps": 204665, "loss": 0.0, "lr": 1.6279232808629196e-09, "epoch": 4.9182810934942465, "percentage": 98.37, "elapsed_time": "4:20:24", "remaining_time": "0:04:19", "throughput": 8683.82, "total_tokens": 135676840} +{"current_steps": 201325, "total_steps": 204665, "loss": 0.0, "lr": 1.6230629497929748e-09, "epoch": 4.918403244326094, "percentage": 98.37, "elapsed_time": "4:20:24", "remaining_time": "0:04:19", "throughput": 8683.84, "total_tokens": 135680104} +{"current_steps": 201330, "total_steps": 204665, "loss": 0.0, "lr": 1.6182098791887033e-09, "epoch": 4.918525395157941, "percentage": 98.37, "elapsed_time": "4:20:24", "remaining_time": "0:04:18", "throughput": 8683.88, "total_tokens": 135683752} +{"current_steps": 201335, "total_steps": 204665, "loss": 0.0, "lr": 1.613364069085299e-09, "epoch": 4.9186475459897885, "percentage": 98.37, "elapsed_time": "4:20:25", "remaining_time": "0:04:18", "throughput": 8683.91, "total_tokens": 135687272} +{"current_steps": 201340, "total_steps": 204665, "loss": 0.0, "lr": 1.608525519518067e-09, "epoch": 4.918769696821635, "percentage": 98.38, "elapsed_time": "4:20:25", "remaining_time": "0:04:18", "throughput": 8683.95, "total_tokens": 135691048} +{"current_steps": 201345, "total_steps": 204665, "loss": 0.0, "lr": 1.6036942305220902e-09, "epoch": 4.918891847653483, "percentage": 98.38, "elapsed_time": "4:20:25", "remaining_time": "0:04:17", "throughput": 8683.96, "total_tokens": 135694248} +{"current_steps": 201350, "total_steps": 204665, "loss": 0.0, "lr": 1.5988702021326738e-09, "epoch": 4.91901399848533, "percentage": 98.38, "elapsed_time": "4:20:26", "remaining_time": "0:04:17", "throughput": 8684.01, "total_tokens": 135698216} +{"current_steps": 201355, "total_steps": 204665, "loss": 0.0, "lr": 1.59405343438479e-09, "epoch": 4.919136149317177, "percentage": 98.38, "elapsed_time": "4:20:26", "remaining_time": "0:04:16", "throughput": 8684.03, "total_tokens": 135701544} +{"current_steps": 201360, "total_steps": 204665, "loss": 0.0, "lr": 1.5892439273135216e-09, "epoch": 4.919258300149024, "percentage": 98.39, "elapsed_time": "4:20:26", "remaining_time": "0:04:16", "throughput": 8684.06, "total_tokens": 135705064} +{"current_steps": 201365, "total_steps": 204665, "loss": 0.0, "lr": 1.5844416809537297e-09, "epoch": 4.919380450980871, "percentage": 98.39, "elapsed_time": "4:20:27", "remaining_time": "0:04:16", "throughput": 8684.07, "total_tokens": 135708264} +{"current_steps": 201370, "total_steps": 204665, "loss": 0.0, "lr": 1.5796466953404974e-09, "epoch": 4.919502601812718, "percentage": 98.39, "elapsed_time": "4:20:27", "remaining_time": "0:04:15", "throughput": 8684.08, "total_tokens": 135711336} +{"current_steps": 201375, "total_steps": 204665, "loss": 0.0, "lr": 1.5748589705085747e-09, "epoch": 4.919624752644566, "percentage": 98.39, "elapsed_time": "4:20:27", "remaining_time": "0:04:15", "throughput": 8684.12, "total_tokens": 135714984} +{"current_steps": 201380, "total_steps": 204665, "loss": 0.0, "lr": 1.5700785064928224e-09, "epoch": 4.919746903476413, "percentage": 98.39, "elapsed_time": "4:20:28", "remaining_time": "0:04:14", "throughput": 8684.14, "total_tokens": 135718440} +{"current_steps": 201385, "total_steps": 204665, "loss": 0.0, "lr": 1.5653053033279906e-09, "epoch": 4.9198690543082595, "percentage": 98.4, "elapsed_time": "4:20:28", "remaining_time": "0:04:14", "throughput": 8684.2, "total_tokens": 135722536} +{"current_steps": 201390, "total_steps": 204665, "loss": 0.0, "lr": 1.5605393610488294e-09, "epoch": 4.919991205140107, "percentage": 98.4, "elapsed_time": "4:20:29", "remaining_time": "0:04:14", "throughput": 8684.23, "total_tokens": 135725992} +{"current_steps": 201395, "total_steps": 204665, "loss": 0.0, "lr": 1.5557806796899776e-09, "epoch": 4.920113355971954, "percentage": 98.4, "elapsed_time": "4:20:29", "remaining_time": "0:04:13", "throughput": 8684.24, "total_tokens": 135729256} +{"current_steps": 201400, "total_steps": 204665, "loss": 0.0, "lr": 1.551029259286074e-09, "epoch": 4.9202355068038015, "percentage": 98.4, "elapsed_time": "4:20:29", "remaining_time": "0:04:13", "throughput": 8684.25, "total_tokens": 135732392} +{"current_steps": 201405, "total_steps": 204665, "loss": 0.0, "lr": 1.546285099871647e-09, "epoch": 4.920357657635648, "percentage": 98.41, "elapsed_time": "4:20:30", "remaining_time": "0:04:12", "throughput": 8684.28, "total_tokens": 135735848} +{"current_steps": 201410, "total_steps": 204665, "loss": 0.0, "lr": 1.5415482014811132e-09, "epoch": 4.920479808467496, "percentage": 98.41, "elapsed_time": "4:20:30", "remaining_time": "0:04:12", "throughput": 8684.3, "total_tokens": 135739240} +{"current_steps": 201415, "total_steps": 204665, "loss": 0.0, "lr": 1.5368185641490005e-09, "epoch": 4.920601959299343, "percentage": 98.41, "elapsed_time": "4:20:30", "remaining_time": "0:04:12", "throughput": 8684.32, "total_tokens": 135742440} +{"current_steps": 201420, "total_steps": 204665, "loss": 0.0, "lr": 1.532096187909726e-09, "epoch": 4.92072411013119, "percentage": 98.41, "elapsed_time": "4:20:31", "remaining_time": "0:04:11", "throughput": 8684.32, "total_tokens": 135745384} +{"current_steps": 201425, "total_steps": 204665, "loss": 0.0, "lr": 1.5273810727975955e-09, "epoch": 4.920846260963037, "percentage": 98.42, "elapsed_time": "4:20:31", "remaining_time": "0:04:11", "throughput": 8684.33, "total_tokens": 135748584} +{"current_steps": 201430, "total_steps": 204665, "loss": 0.0, "lr": 1.522673218846915e-09, "epoch": 4.920968411794885, "percentage": 98.42, "elapsed_time": "4:20:31", "remaining_time": "0:04:11", "throughput": 8684.36, "total_tokens": 135752104} +{"current_steps": 201435, "total_steps": 204665, "loss": 0.0002, "lr": 1.5179726260918791e-09, "epoch": 4.921090562626731, "percentage": 98.42, "elapsed_time": "4:20:32", "remaining_time": "0:04:10", "throughput": 8684.36, "total_tokens": 135755112} +{"current_steps": 201440, "total_steps": 204665, "loss": 0.0002, "lr": 1.5132792945666827e-09, "epoch": 4.921212713458579, "percentage": 98.42, "elapsed_time": "4:20:32", "remaining_time": "0:04:10", "throughput": 8684.38, "total_tokens": 135758376} +{"current_steps": 201445, "total_steps": 204665, "loss": 0.0, "lr": 1.5085932243055211e-09, "epoch": 4.921334864290426, "percentage": 98.43, "elapsed_time": "4:20:32", "remaining_time": "0:04:09", "throughput": 8684.38, "total_tokens": 135761320} +{"current_steps": 201450, "total_steps": 204665, "loss": 0.0436, "lr": 1.5039144153424777e-09, "epoch": 4.921457015122273, "percentage": 98.43, "elapsed_time": "4:20:33", "remaining_time": "0:04:09", "throughput": 8684.4, "total_tokens": 135764712} +{"current_steps": 201455, "total_steps": 204665, "loss": 0.0, "lr": 1.4992428677115255e-09, "epoch": 4.92157916595412, "percentage": 98.43, "elapsed_time": "4:20:33", "remaining_time": "0:04:09", "throughput": 8684.42, "total_tokens": 135767912} +{"current_steps": 201460, "total_steps": 204665, "loss": 0.0, "lr": 1.4945785814465262e-09, "epoch": 4.921701316785967, "percentage": 98.43, "elapsed_time": "4:20:33", "remaining_time": "0:04:08", "throughput": 8684.45, "total_tokens": 135771432} +{"current_steps": 201465, "total_steps": 204665, "loss": 0.0, "lr": 1.4899215565816748e-09, "epoch": 4.9218234676178145, "percentage": 98.44, "elapsed_time": "4:20:34", "remaining_time": "0:04:08", "throughput": 8684.45, "total_tokens": 135774504} +{"current_steps": 201470, "total_steps": 204665, "loss": 0.0, "lr": 1.485271793150611e-09, "epoch": 4.921945618449661, "percentage": 98.44, "elapsed_time": "4:20:34", "remaining_time": "0:04:07", "throughput": 8684.47, "total_tokens": 135777832} +{"current_steps": 201475, "total_steps": 204665, "loss": 0.0, "lr": 1.4806292911871965e-09, "epoch": 4.922067769281509, "percentage": 98.44, "elapsed_time": "4:20:34", "remaining_time": "0:04:07", "throughput": 8684.51, "total_tokens": 135781480} +{"current_steps": 201480, "total_steps": 204665, "loss": 0.0, "lr": 1.4759940507251822e-09, "epoch": 4.922189920113356, "percentage": 98.44, "elapsed_time": "4:20:35", "remaining_time": "0:04:07", "throughput": 8684.55, "total_tokens": 135785256} +{"current_steps": 201485, "total_steps": 204665, "loss": 0.0002, "lr": 1.47136607179843e-09, "epoch": 4.922312070945203, "percentage": 98.45, "elapsed_time": "4:20:35", "remaining_time": "0:04:06", "throughput": 8684.59, "total_tokens": 135788968} +{"current_steps": 201490, "total_steps": 204665, "loss": 0.0, "lr": 1.4667453544403573e-09, "epoch": 4.92243422177705, "percentage": 98.45, "elapsed_time": "4:20:35", "remaining_time": "0:04:06", "throughput": 8684.58, "total_tokens": 135791784} +{"current_steps": 201495, "total_steps": 204665, "loss": 0.0, "lr": 1.4621318986847154e-09, "epoch": 4.922556372608898, "percentage": 98.45, "elapsed_time": "4:20:36", "remaining_time": "0:04:05", "throughput": 8684.6, "total_tokens": 135794984} +{"current_steps": 201500, "total_steps": 204665, "loss": 0.0, "lr": 1.4575257045650325e-09, "epoch": 4.922678523440744, "percentage": 98.45, "elapsed_time": "4:20:36", "remaining_time": "0:04:05", "throughput": 8684.63, "total_tokens": 135798568} +{"current_steps": 201505, "total_steps": 204665, "loss": 0.0037, "lr": 1.4529267721148375e-09, "epoch": 4.922800674272592, "percentage": 98.46, "elapsed_time": "4:20:37", "remaining_time": "0:04:05", "throughput": 8684.64, "total_tokens": 135801704} +{"current_steps": 201510, "total_steps": 204665, "loss": 0.0, "lr": 1.4483351013675482e-09, "epoch": 4.922922825104439, "percentage": 98.46, "elapsed_time": "4:20:37", "remaining_time": "0:04:04", "throughput": 8684.67, "total_tokens": 135805288} +{"current_steps": 201515, "total_steps": 204665, "loss": 0.0, "lr": 1.4437506923564714e-09, "epoch": 4.923044975936286, "percentage": 98.46, "elapsed_time": "4:20:37", "remaining_time": "0:04:04", "throughput": 8684.72, "total_tokens": 135809128} +{"current_steps": 201520, "total_steps": 204665, "loss": 0.0, "lr": 1.4391735451150245e-09, "epoch": 4.923167126768133, "percentage": 98.46, "elapsed_time": "4:20:38", "remaining_time": "0:04:04", "throughput": 8684.72, "total_tokens": 135812200} +{"current_steps": 201525, "total_steps": 204665, "loss": 0.0, "lr": 1.4346036596765142e-09, "epoch": 4.923289277599981, "percentage": 98.47, "elapsed_time": "4:20:38", "remaining_time": "0:04:03", "throughput": 8684.76, "total_tokens": 135815912} +{"current_steps": 201530, "total_steps": 204665, "loss": 0.0, "lr": 1.4300410360741365e-09, "epoch": 4.9234114284318276, "percentage": 98.47, "elapsed_time": "4:20:38", "remaining_time": "0:04:03", "throughput": 8684.82, "total_tokens": 135819880} +{"current_steps": 201535, "total_steps": 204665, "loss": 0.0, "lr": 1.425485674341087e-09, "epoch": 4.923533579263674, "percentage": 98.47, "elapsed_time": "4:20:39", "remaining_time": "0:04:02", "throughput": 8684.85, "total_tokens": 135823400} +{"current_steps": 201540, "total_steps": 204665, "loss": 0.0, "lr": 1.4209375745105613e-09, "epoch": 4.923655730095522, "percentage": 98.47, "elapsed_time": "4:20:39", "remaining_time": "0:04:02", "throughput": 8684.85, "total_tokens": 135826408} +{"current_steps": 201545, "total_steps": 204665, "loss": 0.0, "lr": 1.4163967366154217e-09, "epoch": 4.92377788092737, "percentage": 98.48, "elapsed_time": "4:20:39", "remaining_time": "0:04:02", "throughput": 8684.88, "total_tokens": 135829928} +{"current_steps": 201550, "total_steps": 204665, "loss": 0.0, "lr": 1.4118631606889752e-09, "epoch": 4.923900031759216, "percentage": 98.48, "elapsed_time": "4:20:40", "remaining_time": "0:04:01", "throughput": 8684.88, "total_tokens": 135832808} +{"current_steps": 201555, "total_steps": 204665, "loss": 0.0, "lr": 1.4073368467639735e-09, "epoch": 4.924022182591063, "percentage": 98.48, "elapsed_time": "4:20:40", "remaining_time": "0:04:01", "throughput": 8684.89, "total_tokens": 135836072} +{"current_steps": 201560, "total_steps": 204665, "loss": 0.0, "lr": 1.40281779487339e-09, "epoch": 4.924144333422911, "percentage": 98.48, "elapsed_time": "4:20:40", "remaining_time": "0:04:00", "throughput": 8684.93, "total_tokens": 135839720} +{"current_steps": 201565, "total_steps": 204665, "loss": 0.0, "lr": 1.3983060050500872e-09, "epoch": 4.924266484254757, "percentage": 98.49, "elapsed_time": "4:20:41", "remaining_time": "0:04:00", "throughput": 8684.98, "total_tokens": 135843688} +{"current_steps": 201570, "total_steps": 204665, "loss": 0.0003, "lr": 1.393801477327039e-09, "epoch": 4.924388635086605, "percentage": 98.49, "elapsed_time": "4:20:41", "remaining_time": "0:04:00", "throughput": 8685.03, "total_tokens": 135847528} +{"current_steps": 201575, "total_steps": 204665, "loss": 0.0, "lr": 1.3893042117367748e-09, "epoch": 4.924510785918452, "percentage": 98.49, "elapsed_time": "4:20:41", "remaining_time": "0:03:59", "throughput": 8685.06, "total_tokens": 135851048} +{"current_steps": 201580, "total_steps": 204665, "loss": 0.0, "lr": 1.3848142083120462e-09, "epoch": 4.9246329367502995, "percentage": 98.49, "elapsed_time": "4:20:42", "remaining_time": "0:03:59", "throughput": 8685.07, "total_tokens": 135854120} +{"current_steps": 201585, "total_steps": 204665, "loss": 0.0, "lr": 1.3803314670856047e-09, "epoch": 4.924755087582146, "percentage": 98.5, "elapsed_time": "4:20:42", "remaining_time": "0:03:59", "throughput": 8685.07, "total_tokens": 135857192} +{"current_steps": 201590, "total_steps": 204665, "loss": 0.0, "lr": 1.3758559880898691e-09, "epoch": 4.924877238413994, "percentage": 98.5, "elapsed_time": "4:20:42", "remaining_time": "0:03:58", "throughput": 8685.12, "total_tokens": 135861160} +{"current_steps": 201595, "total_steps": 204665, "loss": 0.0, "lr": 1.3713877713575905e-09, "epoch": 4.924999389245841, "percentage": 98.5, "elapsed_time": "4:20:43", "remaining_time": "0:03:58", "throughput": 8685.13, "total_tokens": 135864232} +{"current_steps": 201600, "total_steps": 204665, "loss": 0.0, "lr": 1.366926816921188e-09, "epoch": 4.925121540077688, "percentage": 98.5, "elapsed_time": "4:20:43", "remaining_time": "0:03:57", "throughput": 8685.16, "total_tokens": 135867752} +{"current_steps": 201605, "total_steps": 204665, "loss": 0.0, "lr": 1.3624731248130794e-09, "epoch": 4.925243690909535, "percentage": 98.5, "elapsed_time": "4:20:44", "remaining_time": "0:03:57", "throughput": 8685.18, "total_tokens": 135871080} +{"current_steps": 201610, "total_steps": 204665, "loss": 0.0, "lr": 1.3580266950656837e-09, "epoch": 4.925365841741383, "percentage": 98.51, "elapsed_time": "4:20:44", "remaining_time": "0:03:57", "throughput": 8685.19, "total_tokens": 135874152} +{"current_steps": 201615, "total_steps": 204665, "loss": 0.0288, "lr": 1.3535875277113085e-09, "epoch": 4.925487992573229, "percentage": 98.51, "elapsed_time": "4:20:44", "remaining_time": "0:03:56", "throughput": 8685.22, "total_tokens": 135877672} +{"current_steps": 201620, "total_steps": 204665, "loss": 0.0, "lr": 1.349155622782261e-09, "epoch": 4.925610143405077, "percentage": 98.51, "elapsed_time": "4:20:45", "remaining_time": "0:03:56", "throughput": 8685.24, "total_tokens": 135881000} +{"current_steps": 201625, "total_steps": 204665, "loss": 0.0, "lr": 1.3447309803107376e-09, "epoch": 4.925732294236924, "percentage": 98.51, "elapsed_time": "4:20:45", "remaining_time": "0:03:55", "throughput": 8685.27, "total_tokens": 135884520} +{"current_steps": 201630, "total_steps": 204665, "loss": 0.0, "lr": 1.340313600328935e-09, "epoch": 4.9258544450687705, "percentage": 98.52, "elapsed_time": "4:20:45", "remaining_time": "0:03:55", "throughput": 8685.29, "total_tokens": 135887912} +{"current_steps": 201635, "total_steps": 204665, "loss": 0.0, "lr": 1.3359034828689385e-09, "epoch": 4.925976595900618, "percentage": 98.52, "elapsed_time": "4:20:46", "remaining_time": "0:03:55", "throughput": 8685.29, "total_tokens": 135890920} +{"current_steps": 201640, "total_steps": 204665, "loss": 0.0, "lr": 1.3315006279629448e-09, "epoch": 4.926098746732466, "percentage": 98.52, "elapsed_time": "4:20:46", "remaining_time": "0:03:54", "throughput": 8685.31, "total_tokens": 135894120} +{"current_steps": 201645, "total_steps": 204665, "loss": 0.0, "lr": 1.327105035642817e-09, "epoch": 4.9262208975643125, "percentage": 98.52, "elapsed_time": "4:20:46", "remaining_time": "0:03:54", "throughput": 8685.33, "total_tokens": 135897576} +{"current_steps": 201650, "total_steps": 204665, "loss": 0.0, "lr": 1.3227167059406407e-09, "epoch": 4.926343048396159, "percentage": 98.53, "elapsed_time": "4:20:47", "remaining_time": "0:03:53", "throughput": 8685.35, "total_tokens": 135900776} +{"current_steps": 201655, "total_steps": 204665, "loss": 0.0, "lr": 1.3183356388882794e-09, "epoch": 4.926465199228007, "percentage": 98.53, "elapsed_time": "4:20:47", "remaining_time": "0:03:53", "throughput": 8685.38, "total_tokens": 135904360} +{"current_steps": 201660, "total_steps": 204665, "loss": 0.0, "lr": 1.3139618345175962e-09, "epoch": 4.926587350059854, "percentage": 98.53, "elapsed_time": "4:20:47", "remaining_time": "0:03:53", "throughput": 8685.39, "total_tokens": 135907496} +{"current_steps": 201665, "total_steps": 204665, "loss": 0.0, "lr": 1.3095952928603438e-09, "epoch": 4.926709500891701, "percentage": 98.53, "elapsed_time": "4:20:48", "remaining_time": "0:03:52", "throughput": 8685.42, "total_tokens": 135911016} +{"current_steps": 201670, "total_steps": 204665, "loss": 0.0, "lr": 1.3052360139483853e-09, "epoch": 4.926831651723548, "percentage": 98.54, "elapsed_time": "4:20:48", "remaining_time": "0:03:52", "throughput": 8685.44, "total_tokens": 135914344} +{"current_steps": 201675, "total_steps": 204665, "loss": 0.0, "lr": 1.3008839978133623e-09, "epoch": 4.926953802555396, "percentage": 98.54, "elapsed_time": "4:20:48", "remaining_time": "0:03:52", "throughput": 8685.45, "total_tokens": 135917480} +{"current_steps": 201680, "total_steps": 204665, "loss": 0.0, "lr": 1.296539244486916e-09, "epoch": 4.927075953387242, "percentage": 98.54, "elapsed_time": "4:20:49", "remaining_time": "0:03:51", "throughput": 8685.49, "total_tokens": 135921192} +{"current_steps": 201685, "total_steps": 204665, "loss": 0.0, "lr": 1.292201754000688e-09, "epoch": 4.92719810421909, "percentage": 98.54, "elapsed_time": "4:20:49", "remaining_time": "0:03:51", "throughput": 8685.55, "total_tokens": 135925416} +{"current_steps": 201690, "total_steps": 204665, "loss": 0.0, "lr": 1.2878715263860973e-09, "epoch": 4.927320255050937, "percentage": 98.55, "elapsed_time": "4:20:49", "remaining_time": "0:03:50", "throughput": 8685.56, "total_tokens": 135928488} +{"current_steps": 201695, "total_steps": 204665, "loss": 0.0, "lr": 1.2835485616748964e-09, "epoch": 4.927442405882784, "percentage": 98.55, "elapsed_time": "4:20:50", "remaining_time": "0:03:50", "throughput": 8685.57, "total_tokens": 135931624} +{"current_steps": 201700, "total_steps": 204665, "loss": 0.0, "lr": 1.2792328598981716e-09, "epoch": 4.927564556714631, "percentage": 98.55, "elapsed_time": "4:20:50", "remaining_time": "0:03:50", "throughput": 8685.62, "total_tokens": 135935400} +{"current_steps": 201705, "total_steps": 204665, "loss": 0.0, "lr": 1.2749244210875643e-09, "epoch": 4.927686707546479, "percentage": 98.55, "elapsed_time": "4:20:50", "remaining_time": "0:03:49", "throughput": 8685.64, "total_tokens": 135938728} +{"current_steps": 201710, "total_steps": 204665, "loss": 0.0, "lr": 1.2706232452743826e-09, "epoch": 4.9278088583783255, "percentage": 98.56, "elapsed_time": "4:20:51", "remaining_time": "0:03:49", "throughput": 8685.65, "total_tokens": 135941992} +{"current_steps": 201715, "total_steps": 204665, "loss": 0.0, "lr": 1.2663293324897128e-09, "epoch": 4.927931009210173, "percentage": 98.56, "elapsed_time": "4:20:51", "remaining_time": "0:03:48", "throughput": 8685.69, "total_tokens": 135945640} +{"current_steps": 201720, "total_steps": 204665, "loss": 0.0, "lr": 1.2620426827650854e-09, "epoch": 4.92805316004202, "percentage": 98.56, "elapsed_time": "4:20:52", "remaining_time": "0:03:48", "throughput": 8685.71, "total_tokens": 135948904} +{"current_steps": 201725, "total_steps": 204665, "loss": 0.0002, "lr": 1.2577632961313644e-09, "epoch": 4.928175310873867, "percentage": 98.56, "elapsed_time": "4:20:52", "remaining_time": "0:03:48", "throughput": 8685.73, "total_tokens": 135952360} +{"current_steps": 201730, "total_steps": 204665, "loss": 0.0, "lr": 1.2534911726199693e-09, "epoch": 4.928297461705714, "percentage": 98.57, "elapsed_time": "4:20:52", "remaining_time": "0:03:47", "throughput": 8685.75, "total_tokens": 135955688} +{"current_steps": 201735, "total_steps": 204665, "loss": 0.0, "lr": 1.2492263122616532e-09, "epoch": 4.928419612537561, "percentage": 98.57, "elapsed_time": "4:20:53", "remaining_time": "0:03:47", "throughput": 8685.78, "total_tokens": 135959080} +{"current_steps": 201740, "total_steps": 204665, "loss": 0.0, "lr": 1.2449687150877242e-09, "epoch": 4.928541763369409, "percentage": 98.57, "elapsed_time": "4:20:53", "remaining_time": "0:03:46", "throughput": 8685.83, "total_tokens": 135963048} +{"current_steps": 201745, "total_steps": 204665, "loss": 0.0006, "lr": 1.2407183811289357e-09, "epoch": 4.928663914201255, "percentage": 98.57, "elapsed_time": "4:20:53", "remaining_time": "0:03:46", "throughput": 8685.84, "total_tokens": 135966120} +{"current_steps": 201750, "total_steps": 204665, "loss": 0.0, "lr": 1.2364753104163738e-09, "epoch": 4.928786065033103, "percentage": 98.58, "elapsed_time": "4:20:54", "remaining_time": "0:03:46", "throughput": 8685.85, "total_tokens": 135969448} +{"current_steps": 201755, "total_steps": 204665, "loss": 0.0146, "lr": 1.232239502980681e-09, "epoch": 4.92890821586495, "percentage": 98.58, "elapsed_time": "4:20:54", "remaining_time": "0:03:45", "throughput": 8685.86, "total_tokens": 135972520} +{"current_steps": 201760, "total_steps": 204665, "loss": 0.0, "lr": 1.228010958852832e-09, "epoch": 4.929030366696797, "percentage": 98.58, "elapsed_time": "4:20:54", "remaining_time": "0:03:45", "throughput": 8685.88, "total_tokens": 135975784} +{"current_steps": 201765, "total_steps": 204665, "loss": 0.0, "lr": 1.2237896780635803e-09, "epoch": 4.929152517528644, "percentage": 98.58, "elapsed_time": "4:20:55", "remaining_time": "0:03:45", "throughput": 8685.89, "total_tokens": 135978984} +{"current_steps": 201770, "total_steps": 204665, "loss": 0.0, "lr": 1.2195756606434571e-09, "epoch": 4.929274668360492, "percentage": 98.59, "elapsed_time": "4:20:55", "remaining_time": "0:03:44", "throughput": 8685.91, "total_tokens": 135982248} +{"current_steps": 201775, "total_steps": 204665, "loss": 0.0, "lr": 1.2153689066233263e-09, "epoch": 4.9293968191923385, "percentage": 98.59, "elapsed_time": "4:20:55", "remaining_time": "0:03:44", "throughput": 8685.95, "total_tokens": 135985960} +{"current_steps": 201780, "total_steps": 204665, "loss": 0.0, "lr": 1.2111694160336083e-09, "epoch": 4.929518970024186, "percentage": 98.59, "elapsed_time": "4:20:56", "remaining_time": "0:03:43", "throughput": 8685.99, "total_tokens": 135989800} +{"current_steps": 201785, "total_steps": 204665, "loss": 0.0, "lr": 1.2069771889049452e-09, "epoch": 4.929641120856033, "percentage": 98.59, "elapsed_time": "4:20:56", "remaining_time": "0:03:43", "throughput": 8686.03, "total_tokens": 135993384} +{"current_steps": 201790, "total_steps": 204665, "loss": 0.0, "lr": 1.202792225267757e-09, "epoch": 4.9297632716878805, "percentage": 98.6, "elapsed_time": "4:20:56", "remaining_time": "0:03:43", "throughput": 8686.06, "total_tokens": 135997032} +{"current_steps": 201795, "total_steps": 204665, "loss": 0.0, "lr": 1.1986145251524637e-09, "epoch": 4.929885422519727, "percentage": 98.6, "elapsed_time": "4:20:57", "remaining_time": "0:03:42", "throughput": 8686.09, "total_tokens": 136000360} +{"current_steps": 201800, "total_steps": 204665, "loss": 0.0, "lr": 1.1944440885895968e-09, "epoch": 4.930007573351575, "percentage": 98.6, "elapsed_time": "4:20:57", "remaining_time": "0:03:42", "throughput": 8686.1, "total_tokens": 136003560} +{"current_steps": 201805, "total_steps": 204665, "loss": 0.0, "lr": 1.190280915609354e-09, "epoch": 4.930129724183422, "percentage": 98.6, "elapsed_time": "4:20:57", "remaining_time": "0:03:41", "throughput": 8686.12, "total_tokens": 136006824} +{"current_steps": 201810, "total_steps": 204665, "loss": 0.0, "lr": 1.1861250062419336e-09, "epoch": 4.930251875015269, "percentage": 98.61, "elapsed_time": "4:20:58", "remaining_time": "0:03:41", "throughput": 8686.14, "total_tokens": 136010280} +{"current_steps": 201815, "total_steps": 204665, "loss": 0.0, "lr": 1.1819763605177557e-09, "epoch": 4.930374025847116, "percentage": 98.61, "elapsed_time": "4:20:58", "remaining_time": "0:03:41", "throughput": 8686.15, "total_tokens": 136013352} +{"current_steps": 201820, "total_steps": 204665, "loss": 0.0, "lr": 1.1778349784669073e-09, "epoch": 4.930496176678963, "percentage": 98.61, "elapsed_time": "4:20:59", "remaining_time": "0:03:40", "throughput": 8686.19, "total_tokens": 136017064} +{"current_steps": 201825, "total_steps": 204665, "loss": 0.0, "lr": 1.1737008601194754e-09, "epoch": 4.93061832751081, "percentage": 98.61, "elapsed_time": "4:20:59", "remaining_time": "0:03:40", "throughput": 8686.22, "total_tokens": 136020648} +{"current_steps": 201830, "total_steps": 204665, "loss": 0.0, "lr": 1.169574005505547e-09, "epoch": 4.930740478342657, "percentage": 98.61, "elapsed_time": "4:20:59", "remaining_time": "0:03:39", "throughput": 8686.25, "total_tokens": 136024168} +{"current_steps": 201835, "total_steps": 204665, "loss": 0.0, "lr": 1.1654544146550982e-09, "epoch": 4.930862629174505, "percentage": 98.62, "elapsed_time": "4:21:00", "remaining_time": "0:03:39", "throughput": 8686.27, "total_tokens": 136027432} +{"current_steps": 201840, "total_steps": 204665, "loss": 0.0, "lr": 1.161342087598105e-09, "epoch": 4.9309847800063515, "percentage": 98.62, "elapsed_time": "4:21:00", "remaining_time": "0:03:39", "throughput": 8686.3, "total_tokens": 136030952} +{"current_steps": 201845, "total_steps": 204665, "loss": 0.0, "lr": 1.1572370243645434e-09, "epoch": 4.931106930838199, "percentage": 98.62, "elapsed_time": "4:21:00", "remaining_time": "0:03:38", "throughput": 8686.36, "total_tokens": 136035048} +{"current_steps": 201850, "total_steps": 204665, "loss": 0.0, "lr": 1.1531392249841675e-09, "epoch": 4.931229081670046, "percentage": 98.62, "elapsed_time": "4:21:01", "remaining_time": "0:03:38", "throughput": 8686.39, "total_tokens": 136038632} +{"current_steps": 201855, "total_steps": 204665, "loss": 0.0, "lr": 1.1490486894868422e-09, "epoch": 4.9313512325018936, "percentage": 98.63, "elapsed_time": "4:21:01", "remaining_time": "0:03:38", "throughput": 8686.42, "total_tokens": 136042088} +{"current_steps": 201860, "total_steps": 204665, "loss": 0.0, "lr": 1.1449654179022105e-09, "epoch": 4.93147338333374, "percentage": 98.63, "elapsed_time": "4:21:01", "remaining_time": "0:03:37", "throughput": 8686.44, "total_tokens": 136045416} +{"current_steps": 201865, "total_steps": 204665, "loss": 0.0, "lr": 1.1408894102601374e-09, "epoch": 4.931595534165588, "percentage": 98.63, "elapsed_time": "4:21:02", "remaining_time": "0:03:37", "throughput": 8686.45, "total_tokens": 136048552} +{"current_steps": 201870, "total_steps": 204665, "loss": 0.0, "lr": 1.1368206665901548e-09, "epoch": 4.931717684997435, "percentage": 98.63, "elapsed_time": "4:21:02", "remaining_time": "0:03:36", "throughput": 8686.48, "total_tokens": 136052200} +{"current_steps": 201875, "total_steps": 204665, "loss": 0.0, "lr": 1.1327591869219055e-09, "epoch": 4.931839835829282, "percentage": 98.64, "elapsed_time": "4:21:02", "remaining_time": "0:03:36", "throughput": 8686.51, "total_tokens": 136055720} +{"current_steps": 201880, "total_steps": 204665, "loss": 0.0, "lr": 1.1287049712849217e-09, "epoch": 4.931961986661129, "percentage": 98.64, "elapsed_time": "4:21:03", "remaining_time": "0:03:36", "throughput": 8686.51, "total_tokens": 136058728} +{"current_steps": 201885, "total_steps": 204665, "loss": 0.0, "lr": 1.1246580197086242e-09, "epoch": 4.932084137492977, "percentage": 98.64, "elapsed_time": "4:21:03", "remaining_time": "0:03:35", "throughput": 8686.53, "total_tokens": 136061992} +{"current_steps": 201890, "total_steps": 204665, "loss": 0.0, "lr": 1.120618332222434e-09, "epoch": 4.932206288324823, "percentage": 98.64, "elapsed_time": "4:21:03", "remaining_time": "0:03:35", "throughput": 8686.54, "total_tokens": 136065192} +{"current_steps": 201895, "total_steps": 204665, "loss": 0.0, "lr": 1.1165859088558826e-09, "epoch": 4.93232843915667, "percentage": 98.65, "elapsed_time": "4:21:04", "remaining_time": "0:03:34", "throughput": 8686.56, "total_tokens": 136068584} +{"current_steps": 201900, "total_steps": 204665, "loss": 0.0, "lr": 1.1125607496380584e-09, "epoch": 4.932450589988518, "percentage": 98.65, "elapsed_time": "4:21:04", "remaining_time": "0:03:34", "throughput": 8686.58, "total_tokens": 136071848} +{"current_steps": 201905, "total_steps": 204665, "loss": 0.0, "lr": 1.108542854598382e-09, "epoch": 4.9325727408203655, "percentage": 98.65, "elapsed_time": "4:21:04", "remaining_time": "0:03:34", "throughput": 8686.61, "total_tokens": 136075304} +{"current_steps": 201910, "total_steps": 204665, "loss": 0.0, "lr": 1.1045322237660527e-09, "epoch": 4.932694891652212, "percentage": 98.65, "elapsed_time": "4:21:05", "remaining_time": "0:03:33", "throughput": 8686.61, "total_tokens": 136078312} +{"current_steps": 201915, "total_steps": 204665, "loss": 0.0, "lr": 1.1005288571702687e-09, "epoch": 4.932817042484059, "percentage": 98.66, "elapsed_time": "4:21:05", "remaining_time": "0:03:33", "throughput": 8686.64, "total_tokens": 136081896} +{"current_steps": 201920, "total_steps": 204665, "loss": 0.0, "lr": 1.0965327548401183e-09, "epoch": 4.932939193315907, "percentage": 98.66, "elapsed_time": "4:21:06", "remaining_time": "0:03:32", "throughput": 8686.68, "total_tokens": 136085480} +{"current_steps": 201925, "total_steps": 204665, "loss": 0.0, "lr": 1.092543916804689e-09, "epoch": 4.933061344147753, "percentage": 98.66, "elapsed_time": "4:21:06", "remaining_time": "0:03:32", "throughput": 8686.71, "total_tokens": 136089128} +{"current_steps": 201930, "total_steps": 204665, "loss": 0.0, "lr": 1.088562343092847e-09, "epoch": 4.933183494979601, "percentage": 98.66, "elapsed_time": "4:21:06", "remaining_time": "0:03:32", "throughput": 8686.74, "total_tokens": 136092584} +{"current_steps": 201935, "total_steps": 204665, "loss": 0.0, "lr": 1.084588033733791e-09, "epoch": 4.933305645811448, "percentage": 98.67, "elapsed_time": "4:21:07", "remaining_time": "0:03:31", "throughput": 8686.76, "total_tokens": 136095976} +{"current_steps": 201940, "total_steps": 204665, "loss": 0.0, "lr": 1.0806209887561646e-09, "epoch": 4.933427796643295, "percentage": 98.67, "elapsed_time": "4:21:07", "remaining_time": "0:03:31", "throughput": 8686.8, "total_tokens": 136099624} +{"current_steps": 201945, "total_steps": 204665, "loss": 0.0, "lr": 1.0766612081889448e-09, "epoch": 4.933549947475142, "percentage": 98.67, "elapsed_time": "4:21:07", "remaining_time": "0:03:31", "throughput": 8686.83, "total_tokens": 136103272} +{"current_steps": 201950, "total_steps": 204665, "loss": 0.0, "lr": 1.0727086920609973e-09, "epoch": 4.93367209830699, "percentage": 98.67, "elapsed_time": "4:21:08", "remaining_time": "0:03:30", "throughput": 8686.85, "total_tokens": 136106472} +{"current_steps": 201955, "total_steps": 204665, "loss": 0.0, "lr": 1.068763440400966e-09, "epoch": 4.9337942491388365, "percentage": 98.68, "elapsed_time": "4:21:08", "remaining_time": "0:03:30", "throughput": 8686.87, "total_tokens": 136109864} +{"current_steps": 201960, "total_steps": 204665, "loss": 0.0, "lr": 1.0648254532376055e-09, "epoch": 4.933916399970684, "percentage": 98.68, "elapsed_time": "4:21:08", "remaining_time": "0:03:29", "throughput": 8686.89, "total_tokens": 136113192} +{"current_steps": 201965, "total_steps": 204665, "loss": 0.0, "lr": 1.0608947305994487e-09, "epoch": 4.934038550802531, "percentage": 98.68, "elapsed_time": "4:21:09", "remaining_time": "0:03:29", "throughput": 8686.9, "total_tokens": 136116456} +{"current_steps": 201970, "total_steps": 204665, "loss": 0.0, "lr": 1.0569712725151392e-09, "epoch": 4.9341607016343785, "percentage": 98.68, "elapsed_time": "4:21:09", "remaining_time": "0:03:29", "throughput": 8686.93, "total_tokens": 136119848} +{"current_steps": 201975, "total_steps": 204665, "loss": 0.0, "lr": 1.0530550790132098e-09, "epoch": 4.934282852466225, "percentage": 98.69, "elapsed_time": "4:21:09", "remaining_time": "0:03:28", "throughput": 8686.92, "total_tokens": 136122664} +{"current_steps": 201980, "total_steps": 204665, "loss": 0.0279, "lr": 1.0491461501221932e-09, "epoch": 4.934405003298073, "percentage": 98.69, "elapsed_time": "4:21:10", "remaining_time": "0:03:28", "throughput": 8686.94, "total_tokens": 136126056} +{"current_steps": 201985, "total_steps": 204665, "loss": 0.0, "lr": 1.0452444858705113e-09, "epoch": 4.93452715412992, "percentage": 98.69, "elapsed_time": "4:21:10", "remaining_time": "0:03:27", "throughput": 8686.99, "total_tokens": 136129832} +{"current_steps": 201990, "total_steps": 204665, "loss": 0.0, "lr": 1.0413500862864743e-09, "epoch": 4.934649304961766, "percentage": 98.69, "elapsed_time": "4:21:10", "remaining_time": "0:03:27", "throughput": 8686.99, "total_tokens": 136132968} +{"current_steps": 201995, "total_steps": 204665, "loss": 0.0, "lr": 1.0374629513983935e-09, "epoch": 4.934771455793614, "percentage": 98.7, "elapsed_time": "4:21:11", "remaining_time": "0:03:27", "throughput": 8687.02, "total_tokens": 136136488} +{"current_steps": 202000, "total_steps": 204665, "loss": 0.0, "lr": 1.0335830812345792e-09, "epoch": 4.934893606625462, "percentage": 98.7, "elapsed_time": "4:21:11", "remaining_time": "0:03:26", "throughput": 8687.06, "total_tokens": 136140136} +{"current_steps": 202005, "total_steps": 204665, "loss": 0.0, "lr": 1.0297104758232311e-09, "epoch": 4.935015757457308, "percentage": 98.7, "elapsed_time": "4:21:11", "remaining_time": "0:03:26", "throughput": 8687.07, "total_tokens": 136143272} +{"current_steps": 202010, "total_steps": 204665, "loss": 0.0, "lr": 1.0258451351925491e-09, "epoch": 4.935137908289155, "percentage": 98.7, "elapsed_time": "4:21:12", "remaining_time": "0:03:25", "throughput": 8687.08, "total_tokens": 136146408} +{"current_steps": 202015, "total_steps": 204665, "loss": 0.0, "lr": 1.0219870593706215e-09, "epoch": 4.935260059121003, "percentage": 98.71, "elapsed_time": "4:21:12", "remaining_time": "0:03:25", "throughput": 8687.09, "total_tokens": 136149480} +{"current_steps": 202020, "total_steps": 204665, "loss": 0.0, "lr": 1.0181362483854262e-09, "epoch": 4.9353822099528495, "percentage": 98.71, "elapsed_time": "4:21:12", "remaining_time": "0:03:25", "throughput": 8687.1, "total_tokens": 136152616} +{"current_steps": 202025, "total_steps": 204665, "loss": 0.0, "lr": 1.0142927022650516e-09, "epoch": 4.935504360784697, "percentage": 98.71, "elapsed_time": "4:21:13", "remaining_time": "0:03:24", "throughput": 8687.1, "total_tokens": 136155688} +{"current_steps": 202030, "total_steps": 204665, "loss": 0.0, "lr": 1.0104564210374756e-09, "epoch": 4.935626511616544, "percentage": 98.71, "elapsed_time": "4:21:13", "remaining_time": "0:03:24", "throughput": 8687.12, "total_tokens": 136159016} +{"current_steps": 202035, "total_steps": 204665, "loss": 0.0, "lr": 1.0066274047305645e-09, "epoch": 4.9357486624483915, "percentage": 98.71, "elapsed_time": "4:21:14", "remaining_time": "0:03:24", "throughput": 8687.15, "total_tokens": 136162600} +{"current_steps": 202040, "total_steps": 204665, "loss": 0.0, "lr": 1.0028056533720742e-09, "epoch": 4.935870813280238, "percentage": 98.72, "elapsed_time": "4:21:14", "remaining_time": "0:03:23", "throughput": 8687.17, "total_tokens": 136165864} +{"current_steps": 202045, "total_steps": 204665, "loss": 0.0, "lr": 9.98991166989982e-10, "epoch": 4.935992964112086, "percentage": 98.72, "elapsed_time": "4:21:14", "remaining_time": "0:03:23", "throughput": 8687.19, "total_tokens": 136169128} +{"current_steps": 202050, "total_steps": 204665, "loss": 0.0, "lr": 9.951839456119327e-10, "epoch": 4.936115114943933, "percentage": 98.72, "elapsed_time": "4:21:15", "remaining_time": "0:03:22", "throughput": 8687.25, "total_tokens": 136173224} +{"current_steps": 202055, "total_steps": 204665, "loss": 0.0, "lr": 9.913839892654596e-10, "epoch": 4.93623726577578, "percentage": 98.72, "elapsed_time": "4:21:15", "remaining_time": "0:03:22", "throughput": 8687.29, "total_tokens": 136176936} +{"current_steps": 202060, "total_steps": 204665, "loss": 0.0, "lr": 9.875912979784296e-10, "epoch": 4.936359416607627, "percentage": 98.73, "elapsed_time": "4:21:15", "remaining_time": "0:03:22", "throughput": 8687.31, "total_tokens": 136180392} +{"current_steps": 202065, "total_steps": 204665, "loss": 0.0, "lr": 9.83805871778376e-10, "epoch": 4.936481567439475, "percentage": 98.73, "elapsed_time": "4:21:16", "remaining_time": "0:03:21", "throughput": 8687.32, "total_tokens": 136183400} +{"current_steps": 202070, "total_steps": 204665, "loss": 0.0, "lr": 9.800277106927213e-10, "epoch": 4.936603718271321, "percentage": 98.73, "elapsed_time": "4:21:16", "remaining_time": "0:03:21", "throughput": 8687.34, "total_tokens": 136186792} +{"current_steps": 202075, "total_steps": 204665, "loss": 0.0, "lr": 9.762568147491102e-10, "epoch": 4.936725869103169, "percentage": 98.73, "elapsed_time": "4:21:16", "remaining_time": "0:03:20", "throughput": 8687.37, "total_tokens": 136190376} +{"current_steps": 202080, "total_steps": 204665, "loss": 0.0, "lr": 9.72493183974743e-10, "epoch": 4.936848019935016, "percentage": 98.74, "elapsed_time": "4:21:17", "remaining_time": "0:03:20", "throughput": 8687.37, "total_tokens": 136193384} +{"current_steps": 202085, "total_steps": 204665, "loss": 0.0, "lr": 9.687368183972644e-10, "epoch": 4.9369701707668625, "percentage": 98.74, "elapsed_time": "4:21:17", "remaining_time": "0:03:20", "throughput": 8687.4, "total_tokens": 136196840} +{"current_steps": 202090, "total_steps": 204665, "loss": 0.0, "lr": 9.649877180437637e-10, "epoch": 4.93709232159871, "percentage": 98.74, "elapsed_time": "4:21:17", "remaining_time": "0:03:19", "throughput": 8687.43, "total_tokens": 136200360} +{"current_steps": 202095, "total_steps": 204665, "loss": 0.0, "lr": 9.612458829415527e-10, "epoch": 4.937214472430557, "percentage": 98.74, "elapsed_time": "4:21:18", "remaining_time": "0:03:19", "throughput": 8687.46, "total_tokens": 136203944} +{"current_steps": 202100, "total_steps": 204665, "loss": 0.0, "lr": 9.575113131178315e-10, "epoch": 4.9373366232624045, "percentage": 98.75, "elapsed_time": "4:21:18", "remaining_time": "0:03:18", "throughput": 8687.49, "total_tokens": 136207528} +{"current_steps": 202105, "total_steps": 204665, "loss": 0.041, "lr": 9.537840085998006e-10, "epoch": 4.937458774094251, "percentage": 98.75, "elapsed_time": "4:21:18", "remaining_time": "0:03:18", "throughput": 8687.49, "total_tokens": 136210472} +{"current_steps": 202110, "total_steps": 204665, "loss": 0.0, "lr": 9.500639694146606e-10, "epoch": 4.937580924926099, "percentage": 98.75, "elapsed_time": "4:21:19", "remaining_time": "0:03:18", "throughput": 8687.5, "total_tokens": 136213544} +{"current_steps": 202115, "total_steps": 204665, "loss": 0.0, "lr": 9.46351195589279e-10, "epoch": 4.937703075757946, "percentage": 98.75, "elapsed_time": "4:21:19", "remaining_time": "0:03:17", "throughput": 8687.51, "total_tokens": 136216616} +{"current_steps": 202120, "total_steps": 204665, "loss": 0.0, "lr": 9.426456871508559e-10, "epoch": 4.937825226589793, "percentage": 98.76, "elapsed_time": "4:21:19", "remaining_time": "0:03:17", "throughput": 8687.53, "total_tokens": 136220008} +{"current_steps": 202125, "total_steps": 204665, "loss": 0.0, "lr": 9.38947444126148e-10, "epoch": 4.93794737742164, "percentage": 98.76, "elapsed_time": "4:21:20", "remaining_time": "0:03:17", "throughput": 8687.54, "total_tokens": 136223080} +{"current_steps": 202130, "total_steps": 204665, "loss": 0.0, "lr": 9.352564665421337e-10, "epoch": 4.938069528253488, "percentage": 98.76, "elapsed_time": "4:21:20", "remaining_time": "0:03:16", "throughput": 8687.57, "total_tokens": 136226664} +{"current_steps": 202135, "total_steps": 204665, "loss": 0.0, "lr": 9.315727544256801e-10, "epoch": 4.938191679085334, "percentage": 98.76, "elapsed_time": "4:21:20", "remaining_time": "0:03:16", "throughput": 8687.59, "total_tokens": 136229928} +{"current_steps": 202140, "total_steps": 204665, "loss": 0.0, "lr": 9.27896307803433e-10, "epoch": 4.938313829917182, "percentage": 98.77, "elapsed_time": "4:21:21", "remaining_time": "0:03:15", "throughput": 8687.59, "total_tokens": 136232936} +{"current_steps": 202145, "total_steps": 204665, "loss": 0.0, "lr": 9.242271267023705e-10, "epoch": 4.938435980749029, "percentage": 98.77, "elapsed_time": "4:21:21", "remaining_time": "0:03:15", "throughput": 8687.61, "total_tokens": 136236264} +{"current_steps": 202150, "total_steps": 204665, "loss": 0.0002, "lr": 9.20565211149027e-10, "epoch": 4.938558131580876, "percentage": 98.77, "elapsed_time": "4:21:22", "remaining_time": "0:03:15", "throughput": 8687.62, "total_tokens": 136239400} +{"current_steps": 202155, "total_steps": 204665, "loss": 0.0, "lr": 9.169105611699369e-10, "epoch": 4.938680282412723, "percentage": 98.77, "elapsed_time": "4:21:22", "remaining_time": "0:03:14", "throughput": 8687.63, "total_tokens": 136242536} +{"current_steps": 202160, "total_steps": 204665, "loss": 0.0, "lr": 9.132631767919674e-10, "epoch": 4.93880243324457, "percentage": 98.78, "elapsed_time": "4:21:22", "remaining_time": "0:03:14", "throughput": 8687.64, "total_tokens": 136245736} +{"current_steps": 202165, "total_steps": 204665, "loss": 0.0, "lr": 9.096230580413201e-10, "epoch": 4.9389245840764175, "percentage": 98.78, "elapsed_time": "4:21:23", "remaining_time": "0:03:13", "throughput": 8687.66, "total_tokens": 136249064} +{"current_steps": 202170, "total_steps": 204665, "loss": 0.0, "lr": 9.05990204944751e-10, "epoch": 4.939046734908265, "percentage": 98.78, "elapsed_time": "4:21:23", "remaining_time": "0:03:13", "throughput": 8687.69, "total_tokens": 136252456} +{"current_steps": 202175, "total_steps": 204665, "loss": 0.0, "lr": 9.023646175284616e-10, "epoch": 4.939168885740112, "percentage": 98.78, "elapsed_time": "4:21:23", "remaining_time": "0:03:13", "throughput": 8687.72, "total_tokens": 136256104} +{"current_steps": 202180, "total_steps": 204665, "loss": 0.0, "lr": 8.987462958189862e-10, "epoch": 4.939291036571959, "percentage": 98.79, "elapsed_time": "4:21:24", "remaining_time": "0:03:12", "throughput": 8687.74, "total_tokens": 136259432} +{"current_steps": 202185, "total_steps": 204665, "loss": 0.0, "lr": 8.95135239842415e-10, "epoch": 4.939413187403806, "percentage": 98.79, "elapsed_time": "4:21:24", "remaining_time": "0:03:12", "throughput": 8687.77, "total_tokens": 136262888} +{"current_steps": 202190, "total_steps": 204665, "loss": 0.0, "lr": 8.915314496252824e-10, "epoch": 4.939535338235653, "percentage": 98.79, "elapsed_time": "4:21:24", "remaining_time": "0:03:11", "throughput": 8687.77, "total_tokens": 136265896} +{"current_steps": 202195, "total_steps": 204665, "loss": 0.0, "lr": 8.879349251935675e-10, "epoch": 4.939657489067501, "percentage": 98.79, "elapsed_time": "4:21:25", "remaining_time": "0:03:11", "throughput": 8687.79, "total_tokens": 136269288} +{"current_steps": 202200, "total_steps": 204665, "loss": 0.0, "lr": 8.843456665735827e-10, "epoch": 4.939779639899347, "percentage": 98.8, "elapsed_time": "4:21:25", "remaining_time": "0:03:11", "throughput": 8687.81, "total_tokens": 136272552} +{"current_steps": 202205, "total_steps": 204665, "loss": 0.0, "lr": 8.807636737913071e-10, "epoch": 4.939901790731195, "percentage": 98.8, "elapsed_time": "4:21:25", "remaining_time": "0:03:10", "throughput": 8687.82, "total_tokens": 136275688} +{"current_steps": 202210, "total_steps": 204665, "loss": 0.0, "lr": 8.771889468728311e-10, "epoch": 4.940023941563042, "percentage": 98.8, "elapsed_time": "4:21:26", "remaining_time": "0:03:10", "throughput": 8687.82, "total_tokens": 136278632} +{"current_steps": 202215, "total_steps": 204665, "loss": 0.0, "lr": 8.736214858442448e-10, "epoch": 4.940146092394889, "percentage": 98.8, "elapsed_time": "4:21:26", "remaining_time": "0:03:10", "throughput": 8687.85, "total_tokens": 136282152} +{"current_steps": 202220, "total_steps": 204665, "loss": 0.0, "lr": 8.700612907314164e-10, "epoch": 4.940268243226736, "percentage": 98.81, "elapsed_time": "4:21:26", "remaining_time": "0:03:09", "throughput": 8687.85, "total_tokens": 136285160} +{"current_steps": 202225, "total_steps": 204665, "loss": 0.0576, "lr": 8.665083615602142e-10, "epoch": 4.940390394058584, "percentage": 98.81, "elapsed_time": "4:21:27", "remaining_time": "0:03:09", "throughput": 8687.87, "total_tokens": 136288424} +{"current_steps": 202230, "total_steps": 204665, "loss": 0.1, "lr": 8.629626983565064e-10, "epoch": 4.940512544890431, "percentage": 98.81, "elapsed_time": "4:21:27", "remaining_time": "0:03:08", "throughput": 8687.88, "total_tokens": 136291560} +{"current_steps": 202235, "total_steps": 204665, "loss": 0.0, "lr": 8.5942430114605e-10, "epoch": 4.940634695722278, "percentage": 98.81, "elapsed_time": "4:21:27", "remaining_time": "0:03:08", "throughput": 8687.87, "total_tokens": 136294376} +{"current_steps": 202240, "total_steps": 204665, "loss": 0.0, "lr": 8.558931699546023e-10, "epoch": 4.940756846554125, "percentage": 98.82, "elapsed_time": "4:21:28", "remaining_time": "0:03:08", "throughput": 8687.87, "total_tokens": 136297384} +{"current_steps": 202245, "total_steps": 204665, "loss": 0.0, "lr": 8.523693048078096e-10, "epoch": 4.940878997385973, "percentage": 98.82, "elapsed_time": "4:21:28", "remaining_time": "0:03:07", "throughput": 8687.9, "total_tokens": 136300968} +{"current_steps": 202250, "total_steps": 204665, "loss": 0.0, "lr": 8.488527057313177e-10, "epoch": 4.941001148217819, "percentage": 98.82, "elapsed_time": "4:21:28", "remaining_time": "0:03:07", "throughput": 8687.94, "total_tokens": 136304552} +{"current_steps": 202255, "total_steps": 204665, "loss": 0.0, "lr": 8.45343372750773e-10, "epoch": 4.941123299049666, "percentage": 98.82, "elapsed_time": "4:21:29", "remaining_time": "0:03:06", "throughput": 8687.99, "total_tokens": 136308520} +{"current_steps": 202260, "total_steps": 204665, "loss": 0.0, "lr": 8.418413058915997e-10, "epoch": 4.941245449881514, "percentage": 98.82, "elapsed_time": "4:21:29", "remaining_time": "0:03:06", "throughput": 8688.02, "total_tokens": 136312104} +{"current_steps": 202265, "total_steps": 204665, "loss": 0.0002, "lr": 8.383465051792216e-10, "epoch": 4.941367600713361, "percentage": 98.83, "elapsed_time": "4:21:30", "remaining_time": "0:03:06", "throughput": 8688.04, "total_tokens": 136315304} +{"current_steps": 202270, "total_steps": 204665, "loss": 0.0, "lr": 8.34858970639285e-10, "epoch": 4.941489751545208, "percentage": 98.83, "elapsed_time": "4:21:30", "remaining_time": "0:03:05", "throughput": 8688.04, "total_tokens": 136318312} +{"current_steps": 202275, "total_steps": 204665, "loss": 0.0376, "lr": 8.31378702296881e-10, "epoch": 4.941611902377055, "percentage": 98.83, "elapsed_time": "4:21:30", "remaining_time": "0:03:05", "throughput": 8688.08, "total_tokens": 136322024} +{"current_steps": 202280, "total_steps": 204665, "loss": 0.0, "lr": 8.279057001774336e-10, "epoch": 4.9417340532089025, "percentage": 98.83, "elapsed_time": "4:21:31", "remaining_time": "0:03:05", "throughput": 8688.08, "total_tokens": 136325160} +{"current_steps": 202285, "total_steps": 204665, "loss": 0.0001, "lr": 8.244399643062561e-10, "epoch": 4.941856204040749, "percentage": 98.84, "elapsed_time": "4:21:31", "remaining_time": "0:03:04", "throughput": 8688.09, "total_tokens": 136328296} +{"current_steps": 202290, "total_steps": 204665, "loss": 0.0, "lr": 8.209814947084392e-10, "epoch": 4.941978354872597, "percentage": 98.84, "elapsed_time": "4:21:31", "remaining_time": "0:03:04", "throughput": 8688.16, "total_tokens": 136332584} +{"current_steps": 202295, "total_steps": 204665, "loss": 0.0, "lr": 8.175302914092963e-10, "epoch": 4.942100505704444, "percentage": 98.84, "elapsed_time": "4:21:32", "remaining_time": "0:03:03", "throughput": 8688.18, "total_tokens": 136335912} +{"current_steps": 202300, "total_steps": 204665, "loss": 0.0, "lr": 8.140863544336963e-10, "epoch": 4.942222656536291, "percentage": 98.84, "elapsed_time": "4:21:32", "remaining_time": "0:03:03", "throughput": 8688.18, "total_tokens": 136338856} +{"current_steps": 202305, "total_steps": 204665, "loss": 0.0, "lr": 8.106496838069521e-10, "epoch": 4.942344807368138, "percentage": 98.85, "elapsed_time": "4:21:32", "remaining_time": "0:03:03", "throughput": 8688.19, "total_tokens": 136341928} +{"current_steps": 202310, "total_steps": 204665, "loss": 0.0, "lr": 8.072202795538219e-10, "epoch": 4.942466958199986, "percentage": 98.85, "elapsed_time": "4:21:33", "remaining_time": "0:03:02", "throughput": 8688.21, "total_tokens": 136345256} +{"current_steps": 202315, "total_steps": 204665, "loss": 0.0, "lr": 8.037981416992857e-10, "epoch": 4.942589109031832, "percentage": 98.85, "elapsed_time": "4:21:33", "remaining_time": "0:03:02", "throughput": 8688.22, "total_tokens": 136348456} +{"current_steps": 202320, "total_steps": 204665, "loss": 0.0, "lr": 8.003832702683233e-10, "epoch": 4.94271125986368, "percentage": 98.85, "elapsed_time": "4:21:33", "remaining_time": "0:03:01", "throughput": 8688.24, "total_tokens": 136351720} +{"current_steps": 202325, "total_steps": 204665, "loss": 0.0, "lr": 7.969756652858039e-10, "epoch": 4.942833410695527, "percentage": 98.86, "elapsed_time": "4:21:34", "remaining_time": "0:03:01", "throughput": 8688.29, "total_tokens": 136355752} +{"current_steps": 202330, "total_steps": 204665, "loss": 0.0, "lr": 7.935753267763745e-10, "epoch": 4.942955561527374, "percentage": 98.86, "elapsed_time": "4:21:34", "remaining_time": "0:03:01", "throughput": 8688.31, "total_tokens": 136359016} +{"current_steps": 202335, "total_steps": 204665, "loss": 0.0855, "lr": 7.901822547647929e-10, "epoch": 4.943077712359221, "percentage": 98.86, "elapsed_time": "4:21:34", "remaining_time": "0:03:00", "throughput": 8688.3, "total_tokens": 136361704} +{"current_steps": 202340, "total_steps": 204665, "loss": 0.0, "lr": 7.867964492758172e-10, "epoch": 4.943199863191069, "percentage": 98.86, "elapsed_time": "4:21:35", "remaining_time": "0:03:00", "throughput": 8688.32, "total_tokens": 136365096} +{"current_steps": 202345, "total_steps": 204665, "loss": 0.0, "lr": 7.834179103339833e-10, "epoch": 4.9433220140229155, "percentage": 98.87, "elapsed_time": "4:21:35", "remaining_time": "0:02:59", "throughput": 8688.36, "total_tokens": 136368744} +{"current_steps": 202350, "total_steps": 204665, "loss": 0.0501, "lr": 7.800466379638271e-10, "epoch": 4.943444164854762, "percentage": 98.87, "elapsed_time": "4:21:35", "remaining_time": "0:02:59", "throughput": 8688.39, "total_tokens": 136372392} +{"current_steps": 202355, "total_steps": 204665, "loss": 0.0, "lr": 7.766826321899955e-10, "epoch": 4.94356631568661, "percentage": 98.87, "elapsed_time": "4:21:36", "remaining_time": "0:02:59", "throughput": 8688.43, "total_tokens": 136376168} +{"current_steps": 202360, "total_steps": 204665, "loss": 0.0, "lr": 7.733258930369135e-10, "epoch": 4.943688466518457, "percentage": 98.87, "elapsed_time": "4:21:36", "remaining_time": "0:02:58", "throughput": 8688.45, "total_tokens": 136379368} +{"current_steps": 202365, "total_steps": 204665, "loss": 0.0, "lr": 7.69976420528895e-10, "epoch": 4.943810617350304, "percentage": 98.88, "elapsed_time": "4:21:36", "remaining_time": "0:02:58", "throughput": 8688.47, "total_tokens": 136382888} +{"current_steps": 202370, "total_steps": 204665, "loss": 0.0, "lr": 7.666342146904759e-10, "epoch": 4.943932768182151, "percentage": 98.88, "elapsed_time": "4:21:37", "remaining_time": "0:02:58", "throughput": 8688.51, "total_tokens": 136386472} +{"current_steps": 202375, "total_steps": 204665, "loss": 0.0, "lr": 7.632992755457479e-10, "epoch": 4.944054919013999, "percentage": 98.88, "elapsed_time": "4:21:37", "remaining_time": "0:02:57", "throughput": 8688.53, "total_tokens": 136389864} +{"current_steps": 202380, "total_steps": 204665, "loss": 0.0, "lr": 7.599716031191361e-10, "epoch": 4.944177069845845, "percentage": 98.88, "elapsed_time": "4:21:38", "remaining_time": "0:02:57", "throughput": 8688.56, "total_tokens": 136393512} +{"current_steps": 202385, "total_steps": 204665, "loss": 0.0, "lr": 7.566511974347322e-10, "epoch": 4.944299220677693, "percentage": 98.89, "elapsed_time": "4:21:38", "remaining_time": "0:02:56", "throughput": 8688.58, "total_tokens": 136396712} +{"current_steps": 202390, "total_steps": 204665, "loss": 0.0001, "lr": 7.533380585167393e-10, "epoch": 4.94442137150954, "percentage": 98.89, "elapsed_time": "4:21:38", "remaining_time": "0:02:56", "throughput": 8688.6, "total_tokens": 136400104} +{"current_steps": 202395, "total_steps": 204665, "loss": 0.0, "lr": 7.50032186389249e-10, "epoch": 4.944543522341387, "percentage": 98.89, "elapsed_time": "4:21:39", "remaining_time": "0:02:56", "throughput": 8688.62, "total_tokens": 136403432} +{"current_steps": 202400, "total_steps": 204665, "loss": 0.0, "lr": 7.467335810762421e-10, "epoch": 4.944665673173234, "percentage": 98.89, "elapsed_time": "4:21:39", "remaining_time": "0:02:55", "throughput": 8688.65, "total_tokens": 136406888} +{"current_steps": 202405, "total_steps": 204665, "loss": 0.0, "lr": 7.434422426018105e-10, "epoch": 4.944787824005082, "percentage": 98.9, "elapsed_time": "4:21:39", "remaining_time": "0:02:55", "throughput": 8688.67, "total_tokens": 136410280} +{"current_steps": 202410, "total_steps": 204665, "loss": 0.0, "lr": 7.401581709898241e-10, "epoch": 4.9449099748369285, "percentage": 98.9, "elapsed_time": "4:21:40", "remaining_time": "0:02:54", "throughput": 8688.69, "total_tokens": 136413544} +{"current_steps": 202415, "total_steps": 204665, "loss": 0.0, "lr": 7.368813662641527e-10, "epoch": 4.945032125668776, "percentage": 98.9, "elapsed_time": "4:21:40", "remaining_time": "0:02:54", "throughput": 8688.7, "total_tokens": 136416872} +{"current_steps": 202420, "total_steps": 204665, "loss": 0.0, "lr": 7.336118284486659e-10, "epoch": 4.945154276500623, "percentage": 98.9, "elapsed_time": "4:21:40", "remaining_time": "0:02:54", "throughput": 8688.72, "total_tokens": 136420008} +{"current_steps": 202425, "total_steps": 204665, "loss": 0.0, "lr": 7.303495575671226e-10, "epoch": 4.94527642733247, "percentage": 98.91, "elapsed_time": "4:21:41", "remaining_time": "0:02:53", "throughput": 8688.75, "total_tokens": 136423656} +{"current_steps": 202430, "total_steps": 204665, "loss": 0.0, "lr": 7.270945536431705e-10, "epoch": 4.945398578164317, "percentage": 98.91, "elapsed_time": "4:21:41", "remaining_time": "0:02:53", "throughput": 8688.77, "total_tokens": 136427048} +{"current_steps": 202435, "total_steps": 204665, "loss": 0.0, "lr": 7.238468167006795e-10, "epoch": 4.945520728996165, "percentage": 98.91, "elapsed_time": "4:21:41", "remaining_time": "0:02:52", "throughput": 8688.9, "total_tokens": 136432552} +{"current_steps": 202440, "total_steps": 204665, "loss": 0.0, "lr": 7.206063467630752e-10, "epoch": 4.945642879828012, "percentage": 98.91, "elapsed_time": "4:21:42", "remaining_time": "0:02:52", "throughput": 8688.91, "total_tokens": 136435816} +{"current_steps": 202445, "total_steps": 204665, "loss": 0.0, "lr": 7.173731438540054e-10, "epoch": 4.945765030659858, "percentage": 98.92, "elapsed_time": "4:21:42", "remaining_time": "0:02:52", "throughput": 8688.93, "total_tokens": 136439080} +{"current_steps": 202450, "total_steps": 204665, "loss": 0.0, "lr": 7.141472079970068e-10, "epoch": 4.945887181491706, "percentage": 98.92, "elapsed_time": "4:21:42", "remaining_time": "0:02:51", "throughput": 8688.93, "total_tokens": 136442024} +{"current_steps": 202455, "total_steps": 204665, "loss": 0.0, "lr": 7.109285392155051e-10, "epoch": 4.946009332323553, "percentage": 98.92, "elapsed_time": "4:21:43", "remaining_time": "0:02:51", "throughput": 8688.95, "total_tokens": 136445352} +{"current_steps": 202460, "total_steps": 204665, "loss": 0.0, "lr": 7.077171375329261e-10, "epoch": 4.9461314831554, "percentage": 98.92, "elapsed_time": "4:21:43", "remaining_time": "0:02:51", "throughput": 8688.97, "total_tokens": 136448616} +{"current_steps": 202465, "total_steps": 204665, "loss": 0.0, "lr": 7.045130029725843e-10, "epoch": 4.946253633987247, "percentage": 98.93, "elapsed_time": "4:21:44", "remaining_time": "0:02:50", "throughput": 8688.98, "total_tokens": 136451816} +{"current_steps": 202470, "total_steps": 204665, "loss": 0.0, "lr": 7.013161355577945e-10, "epoch": 4.946375784819095, "percentage": 98.93, "elapsed_time": "4:21:44", "remaining_time": "0:02:50", "throughput": 8689.03, "total_tokens": 136455720} +{"current_steps": 202475, "total_steps": 204665, "loss": 0.0, "lr": 6.981265353117605e-10, "epoch": 4.9464979356509415, "percentage": 98.93, "elapsed_time": "4:21:44", "remaining_time": "0:02:49", "throughput": 8689.06, "total_tokens": 136459304} +{"current_steps": 202480, "total_steps": 204665, "loss": 0.0, "lr": 6.949442022577967e-10, "epoch": 4.946620086482789, "percentage": 98.93, "elapsed_time": "4:21:45", "remaining_time": "0:02:49", "throughput": 8689.08, "total_tokens": 136462632} +{"current_steps": 202485, "total_steps": 204665, "loss": 0.0, "lr": 6.917691364188849e-10, "epoch": 4.946742237314636, "percentage": 98.93, "elapsed_time": "4:21:45", "remaining_time": "0:02:49", "throughput": 8689.11, "total_tokens": 136466152} +{"current_steps": 202490, "total_steps": 204665, "loss": 0.0001, "lr": 6.886013378183397e-10, "epoch": 4.9468643881464835, "percentage": 98.94, "elapsed_time": "4:21:45", "remaining_time": "0:02:48", "throughput": 8689.12, "total_tokens": 136469352} +{"current_steps": 202495, "total_steps": 204665, "loss": 0.0, "lr": 6.854408064790318e-10, "epoch": 4.94698653897833, "percentage": 98.94, "elapsed_time": "4:21:46", "remaining_time": "0:02:48", "throughput": 8689.18, "total_tokens": 136473512} +{"current_steps": 202500, "total_steps": 204665, "loss": 0.0, "lr": 6.822875424239427e-10, "epoch": 4.947108689810178, "percentage": 98.94, "elapsed_time": "4:21:46", "remaining_time": "0:02:47", "throughput": 8689.23, "total_tokens": 136477416} +{"current_steps": 202505, "total_steps": 204665, "loss": 0.0, "lr": 6.79141545676054e-10, "epoch": 4.947230840642025, "percentage": 98.94, "elapsed_time": "4:21:46", "remaining_time": "0:02:47", "throughput": 8689.25, "total_tokens": 136480616} +{"current_steps": 202510, "total_steps": 204665, "loss": 0.0, "lr": 6.760028162582365e-10, "epoch": 4.947352991473872, "percentage": 98.95, "elapsed_time": "4:21:47", "remaining_time": "0:02:47", "throughput": 8689.26, "total_tokens": 136483752} +{"current_steps": 202515, "total_steps": 204665, "loss": 0.0, "lr": 6.728713541933606e-10, "epoch": 4.947475142305719, "percentage": 98.95, "elapsed_time": "4:21:47", "remaining_time": "0:02:46", "throughput": 8689.26, "total_tokens": 136486760} +{"current_steps": 202520, "total_steps": 204665, "loss": 0.0, "lr": 6.697471595040749e-10, "epoch": 4.947597293137566, "percentage": 98.95, "elapsed_time": "4:21:47", "remaining_time": "0:02:46", "throughput": 8689.28, "total_tokens": 136490088} +{"current_steps": 202525, "total_steps": 204665, "loss": 0.0, "lr": 6.6663023221325e-10, "epoch": 4.947719443969413, "percentage": 98.95, "elapsed_time": "4:21:48", "remaining_time": "0:02:45", "throughput": 8689.32, "total_tokens": 136493928} +{"current_steps": 202530, "total_steps": 204665, "loss": 0.0, "lr": 6.635205723434234e-10, "epoch": 4.947841594801261, "percentage": 98.96, "elapsed_time": "4:21:48", "remaining_time": "0:02:45", "throughput": 8689.37, "total_tokens": 136497832} +{"current_steps": 202535, "total_steps": 204665, "loss": 0.0, "lr": 6.604181799172437e-10, "epoch": 4.947963745633108, "percentage": 98.96, "elapsed_time": "4:21:48", "remaining_time": "0:02:45", "throughput": 8689.38, "total_tokens": 136500968} +{"current_steps": 202540, "total_steps": 204665, "loss": 0.0, "lr": 6.573230549573594e-10, "epoch": 4.948085896464955, "percentage": 98.96, "elapsed_time": "4:21:49", "remaining_time": "0:02:44", "throughput": 8689.4, "total_tokens": 136504296} +{"current_steps": 202545, "total_steps": 204665, "loss": 0.0, "lr": 6.54235197486197e-10, "epoch": 4.948208047296802, "percentage": 98.96, "elapsed_time": "4:21:49", "remaining_time": "0:02:44", "throughput": 8689.44, "total_tokens": 136508008} +{"current_steps": 202550, "total_steps": 204665, "loss": 0.0, "lr": 6.511546075261831e-10, "epoch": 4.948330198128649, "percentage": 98.97, "elapsed_time": "4:21:50", "remaining_time": "0:02:44", "throughput": 8689.48, "total_tokens": 136511656} +{"current_steps": 202555, "total_steps": 204665, "loss": 0.0, "lr": 6.480812850997442e-10, "epoch": 4.948452348960497, "percentage": 98.97, "elapsed_time": "4:21:50", "remaining_time": "0:02:43", "throughput": 8689.49, "total_tokens": 136514856} +{"current_steps": 202560, "total_steps": 204665, "loss": 0.0, "lr": 6.450152302293066e-10, "epoch": 4.948574499792343, "percentage": 98.97, "elapsed_time": "4:21:50", "remaining_time": "0:02:43", "throughput": 8689.54, "total_tokens": 136518888} +{"current_steps": 202565, "total_steps": 204665, "loss": 0.0, "lr": 6.41956442937186e-10, "epoch": 4.948696650624191, "percentage": 98.97, "elapsed_time": "4:21:51", "remaining_time": "0:02:42", "throughput": 8689.57, "total_tokens": 136522344} +{"current_steps": 202570, "total_steps": 204665, "loss": 0.0, "lr": 6.389049232454757e-10, "epoch": 4.948818801456038, "percentage": 98.98, "elapsed_time": "4:21:51", "remaining_time": "0:02:42", "throughput": 8689.58, "total_tokens": 136525544} +{"current_steps": 202575, "total_steps": 204665, "loss": 0.0, "lr": 6.358606711763803e-10, "epoch": 4.948940952287885, "percentage": 98.98, "elapsed_time": "4:21:51", "remaining_time": "0:02:42", "throughput": 8689.6, "total_tokens": 136528872} +{"current_steps": 202580, "total_steps": 204665, "loss": 0.0, "lr": 6.328236867522152e-10, "epoch": 4.949063103119732, "percentage": 98.98, "elapsed_time": "4:21:52", "remaining_time": "0:02:41", "throughput": 8689.62, "total_tokens": 136532264} +{"current_steps": 202585, "total_steps": 204665, "loss": 0.0615, "lr": 6.297939699948518e-10, "epoch": 4.94918525395158, "percentage": 98.98, "elapsed_time": "4:21:52", "remaining_time": "0:02:41", "throughput": 8689.63, "total_tokens": 136535272} +{"current_steps": 202590, "total_steps": 204665, "loss": 0.0, "lr": 6.267715209264945e-10, "epoch": 4.9493074047834265, "percentage": 98.99, "elapsed_time": "4:21:52", "remaining_time": "0:02:40", "throughput": 8689.65, "total_tokens": 136538664} +{"current_steps": 202595, "total_steps": 204665, "loss": 0.0, "lr": 6.237563395690149e-10, "epoch": 4.949429555615274, "percentage": 98.99, "elapsed_time": "4:21:53", "remaining_time": "0:02:40", "throughput": 8689.68, "total_tokens": 136542120} +{"current_steps": 202600, "total_steps": 204665, "loss": 0.0, "lr": 6.207484259443952e-10, "epoch": 4.949551706447121, "percentage": 98.99, "elapsed_time": "4:21:53", "remaining_time": "0:02:40", "throughput": 8689.71, "total_tokens": 136545768} +{"current_steps": 202605, "total_steps": 204665, "loss": 0.0, "lr": 6.177477800745067e-10, "epoch": 4.9496738572789685, "percentage": 98.99, "elapsed_time": "4:21:53", "remaining_time": "0:02:39", "throughput": 8689.73, "total_tokens": 136548968} +{"current_steps": 202610, "total_steps": 204665, "loss": 0.0, "lr": 6.147544019812212e-10, "epoch": 4.949796008110815, "percentage": 99.0, "elapsed_time": "4:21:54", "remaining_time": "0:02:39", "throughput": 8689.74, "total_tokens": 136552168} +{"current_steps": 202615, "total_steps": 204665, "loss": 0.0, "lr": 6.117682916861877e-10, "epoch": 4.949918158942662, "percentage": 99.0, "elapsed_time": "4:21:54", "remaining_time": "0:02:38", "throughput": 8689.75, "total_tokens": 136555432} +{"current_steps": 202620, "total_steps": 204665, "loss": 0.0, "lr": 6.087894492111667e-10, "epoch": 4.95004030977451, "percentage": 99.0, "elapsed_time": "4:21:54", "remaining_time": "0:02:38", "throughput": 8689.78, "total_tokens": 136558952} +{"current_steps": 202625, "total_steps": 204665, "loss": 0.0, "lr": 6.058178745778076e-10, "epoch": 4.950162460606357, "percentage": 99.0, "elapsed_time": "4:21:55", "remaining_time": "0:02:38", "throughput": 8689.8, "total_tokens": 136562216} +{"current_steps": 202630, "total_steps": 204665, "loss": 0.0, "lr": 6.028535678077595e-10, "epoch": 4.950284611438204, "percentage": 99.01, "elapsed_time": "4:21:55", "remaining_time": "0:02:37", "throughput": 8689.84, "total_tokens": 136565864} +{"current_steps": 202635, "total_steps": 204665, "loss": 0.0, "lr": 5.998965289225611e-10, "epoch": 4.950406762270051, "percentage": 99.01, "elapsed_time": "4:21:55", "remaining_time": "0:02:37", "throughput": 8689.85, "total_tokens": 136569128} +{"current_steps": 202640, "total_steps": 204665, "loss": 0.0, "lr": 5.969467579437504e-10, "epoch": 4.950528913101898, "percentage": 99.01, "elapsed_time": "4:21:56", "remaining_time": "0:02:37", "throughput": 8689.86, "total_tokens": 136572200} +{"current_steps": 202645, "total_steps": 204665, "loss": 0.0, "lr": 5.940042548927548e-10, "epoch": 4.950651063933745, "percentage": 99.01, "elapsed_time": "4:21:56", "remaining_time": "0:02:36", "throughput": 8689.88, "total_tokens": 136575592} +{"current_steps": 202650, "total_steps": 204665, "loss": 0.0, "lr": 5.910690197908908e-10, "epoch": 4.950773214765593, "percentage": 99.02, "elapsed_time": "4:21:56", "remaining_time": "0:02:36", "throughput": 8689.9, "total_tokens": 136578920} +{"current_steps": 202655, "total_steps": 204665, "loss": 0.0, "lr": 5.881410526595854e-10, "epoch": 4.9508953655974395, "percentage": 99.02, "elapsed_time": "4:21:57", "remaining_time": "0:02:35", "throughput": 8689.91, "total_tokens": 136581992} +{"current_steps": 202660, "total_steps": 204665, "loss": 0.0, "lr": 5.85220353520266e-10, "epoch": 4.951017516429287, "percentage": 99.02, "elapsed_time": "4:21:57", "remaining_time": "0:02:35", "throughput": 8689.94, "total_tokens": 136585576} +{"current_steps": 202665, "total_steps": 204665, "loss": 0.0, "lr": 5.823069223939159e-10, "epoch": 4.951139667261134, "percentage": 99.02, "elapsed_time": "4:21:58", "remaining_time": "0:02:35", "throughput": 8689.99, "total_tokens": 136589480} +{"current_steps": 202670, "total_steps": 204665, "loss": 0.0, "lr": 5.794007593018512e-10, "epoch": 4.9512618180929815, "percentage": 99.03, "elapsed_time": "4:21:58", "remaining_time": "0:02:34", "throughput": 8690.01, "total_tokens": 136592808} +{"current_steps": 202675, "total_steps": 204665, "loss": 0.0, "lr": 5.765018642652775e-10, "epoch": 4.951383968924828, "percentage": 99.03, "elapsed_time": "4:21:58", "remaining_time": "0:02:34", "throughput": 8690.03, "total_tokens": 136596200} +{"current_steps": 202680, "total_steps": 204665, "loss": 0.0, "lr": 5.736102373050666e-10, "epoch": 4.951506119756676, "percentage": 99.03, "elapsed_time": "4:21:59", "remaining_time": "0:02:33", "throughput": 8690.07, "total_tokens": 136599848} +{"current_steps": 202685, "total_steps": 204665, "loss": 0.0, "lr": 5.707258784424241e-10, "epoch": 4.951628270588523, "percentage": 99.03, "elapsed_time": "4:21:59", "remaining_time": "0:02:33", "throughput": 8690.08, "total_tokens": 136603048} +{"current_steps": 202690, "total_steps": 204665, "loss": 0.0, "lr": 5.678487876983329e-10, "epoch": 4.95175042142037, "percentage": 99.04, "elapsed_time": "4:21:59", "remaining_time": "0:02:33", "throughput": 8690.14, "total_tokens": 136607080} +{"current_steps": 202695, "total_steps": 204665, "loss": 0.0, "lr": 5.649789650936654e-10, "epoch": 4.951872572252217, "percentage": 99.04, "elapsed_time": "4:22:00", "remaining_time": "0:02:32", "throughput": 8690.15, "total_tokens": 136610344} +{"current_steps": 202700, "total_steps": 204665, "loss": 0.0, "lr": 5.621164106491827e-10, "epoch": 4.951994723084065, "percentage": 99.04, "elapsed_time": "4:22:00", "remaining_time": "0:02:32", "throughput": 8690.18, "total_tokens": 136613800} +{"current_steps": 202705, "total_steps": 204665, "loss": 0.0, "lr": 5.592611243858681e-10, "epoch": 4.952116873915911, "percentage": 99.04, "elapsed_time": "4:22:00", "remaining_time": "0:02:32", "throughput": 8690.19, "total_tokens": 136617064} +{"current_steps": 202710, "total_steps": 204665, "loss": 0.0, "lr": 5.564131063244826e-10, "epoch": 4.952239024747758, "percentage": 99.04, "elapsed_time": "4:22:01", "remaining_time": "0:02:31", "throughput": 8690.21, "total_tokens": 136620392} +{"current_steps": 202715, "total_steps": 204665, "loss": 0.0346, "lr": 5.535723564855654e-10, "epoch": 4.952361175579606, "percentage": 99.05, "elapsed_time": "4:22:01", "remaining_time": "0:02:31", "throughput": 8690.22, "total_tokens": 136623528} +{"current_steps": 202720, "total_steps": 204665, "loss": 0.0, "lr": 5.507388748899889e-10, "epoch": 4.9524833264114525, "percentage": 99.05, "elapsed_time": "4:22:01", "remaining_time": "0:02:30", "throughput": 8690.24, "total_tokens": 136626728} +{"current_steps": 202725, "total_steps": 204665, "loss": 0.0, "lr": 5.479126615581808e-10, "epoch": 4.9526054772433, "percentage": 99.05, "elapsed_time": "4:22:02", "remaining_time": "0:02:30", "throughput": 8690.26, "total_tokens": 136630120} +{"current_steps": 202730, "total_steps": 204665, "loss": 0.0437, "lr": 5.450937165109026e-10, "epoch": 4.952727628075147, "percentage": 99.05, "elapsed_time": "4:22:02", "remaining_time": "0:02:30", "throughput": 8690.28, "total_tokens": 136633448} +{"current_steps": 202735, "total_steps": 204665, "loss": 0.0, "lr": 5.422820397683603e-10, "epoch": 4.9528497789069945, "percentage": 99.06, "elapsed_time": "4:22:02", "remaining_time": "0:02:29", "throughput": 8690.29, "total_tokens": 136636648} +{"current_steps": 202740, "total_steps": 204665, "loss": 0.0, "lr": 5.394776313512039e-10, "epoch": 4.952971929738841, "percentage": 99.06, "elapsed_time": "4:22:03", "remaining_time": "0:02:29", "throughput": 8690.33, "total_tokens": 136640424} +{"current_steps": 202745, "total_steps": 204665, "loss": 0.0, "lr": 5.366804912798617e-10, "epoch": 4.953094080570689, "percentage": 99.06, "elapsed_time": "4:22:03", "remaining_time": "0:02:28", "throughput": 8690.37, "total_tokens": 136644008} +{"current_steps": 202750, "total_steps": 204665, "loss": 0.0, "lr": 5.338906195745396e-10, "epoch": 4.953216231402536, "percentage": 99.06, "elapsed_time": "4:22:03", "remaining_time": "0:02:28", "throughput": 8690.4, "total_tokens": 136647528} +{"current_steps": 202755, "total_steps": 204665, "loss": 0.0, "lr": 5.311080162556658e-10, "epoch": 4.953338382234383, "percentage": 99.07, "elapsed_time": "4:22:04", "remaining_time": "0:02:28", "throughput": 8690.42, "total_tokens": 136651048} +{"current_steps": 202760, "total_steps": 204665, "loss": 0.0, "lr": 5.283326813433353e-10, "epoch": 4.95346053306623, "percentage": 99.07, "elapsed_time": "4:22:04", "remaining_time": "0:02:27", "throughput": 8690.45, "total_tokens": 136654568} +{"current_steps": 202765, "total_steps": 204665, "loss": 0.0, "lr": 5.255646148577542e-10, "epoch": 4.953582683898078, "percentage": 99.07, "elapsed_time": "4:22:05", "remaining_time": "0:02:27", "throughput": 8690.48, "total_tokens": 136658152} +{"current_steps": 202770, "total_steps": 204665, "loss": 0.0, "lr": 5.228038168191284e-10, "epoch": 4.953704834729924, "percentage": 99.07, "elapsed_time": "4:22:05", "remaining_time": "0:02:26", "throughput": 8690.51, "total_tokens": 136661608} +{"current_steps": 202775, "total_steps": 204665, "loss": 0.0, "lr": 5.200502872475531e-10, "epoch": 4.953826985561772, "percentage": 99.08, "elapsed_time": "4:22:05", "remaining_time": "0:02:26", "throughput": 8690.52, "total_tokens": 136664744} +{"current_steps": 202780, "total_steps": 204665, "loss": 0.0, "lr": 5.173040261629014e-10, "epoch": 4.953949136393619, "percentage": 99.08, "elapsed_time": "4:22:06", "remaining_time": "0:02:26", "throughput": 8690.57, "total_tokens": 136668584} +{"current_steps": 202785, "total_steps": 204665, "loss": 0.0, "lr": 5.145650335853791e-10, "epoch": 4.9540712872254655, "percentage": 99.08, "elapsed_time": "4:22:06", "remaining_time": "0:02:25", "throughput": 8690.59, "total_tokens": 136671912} +{"current_steps": 202790, "total_steps": 204665, "loss": 0.0, "lr": 5.118333095346372e-10, "epoch": 4.954193438057313, "percentage": 99.08, "elapsed_time": "4:22:06", "remaining_time": "0:02:25", "throughput": 8690.61, "total_tokens": 136675368} +{"current_steps": 202795, "total_steps": 204665, "loss": 0.0, "lr": 5.091088540307708e-10, "epoch": 4.954315588889161, "percentage": 99.09, "elapsed_time": "4:22:07", "remaining_time": "0:02:25", "throughput": 8690.64, "total_tokens": 136678888} +{"current_steps": 202800, "total_steps": 204665, "loss": 0.0, "lr": 5.06391667093431e-10, "epoch": 4.9544377397210075, "percentage": 99.09, "elapsed_time": "4:22:07", "remaining_time": "0:02:24", "throughput": 8690.68, "total_tokens": 136682600} +{"current_steps": 202805, "total_steps": 204665, "loss": 0.0, "lr": 5.036817487424905e-10, "epoch": 4.954559890552854, "percentage": 99.09, "elapsed_time": "4:22:07", "remaining_time": "0:02:24", "throughput": 8690.71, "total_tokens": 136686184} +{"current_steps": 202810, "total_steps": 204665, "loss": 0.0, "lr": 5.009790989974893e-10, "epoch": 4.954682041384702, "percentage": 99.09, "elapsed_time": "4:22:08", "remaining_time": "0:02:23", "throughput": 8690.72, "total_tokens": 136689320} +{"current_steps": 202815, "total_steps": 204665, "loss": 0.0, "lr": 4.982837178783006e-10, "epoch": 4.954804192216549, "percentage": 99.1, "elapsed_time": "4:22:08", "remaining_time": "0:02:23", "throughput": 8690.74, "total_tokens": 136692584} +{"current_steps": 202820, "total_steps": 204665, "loss": 0.0001, "lr": 4.955956054044641e-10, "epoch": 4.954926343048396, "percentage": 99.1, "elapsed_time": "4:22:08", "remaining_time": "0:02:23", "throughput": 8690.77, "total_tokens": 136696104} +{"current_steps": 202825, "total_steps": 204665, "loss": 0.0, "lr": 4.929147615954088e-10, "epoch": 4.955048493880243, "percentage": 99.1, "elapsed_time": "4:22:09", "remaining_time": "0:02:22", "throughput": 8690.79, "total_tokens": 136699496} +{"current_steps": 202830, "total_steps": 204665, "loss": 0.0, "lr": 4.902411864707856e-10, "epoch": 4.955170644712091, "percentage": 99.1, "elapsed_time": "4:22:09", "remaining_time": "0:02:22", "throughput": 8690.82, "total_tokens": 136703080} +{"current_steps": 202835, "total_steps": 204665, "loss": 0.0, "lr": 4.875748800499124e-10, "epoch": 4.955292795543937, "percentage": 99.11, "elapsed_time": "4:22:09", "remaining_time": "0:02:21", "throughput": 8690.85, "total_tokens": 136706536} +{"current_steps": 202840, "total_steps": 204665, "loss": 0.0, "lr": 4.849158423522181e-10, "epoch": 4.955414946375785, "percentage": 99.11, "elapsed_time": "4:22:10", "remaining_time": "0:02:21", "throughput": 8690.86, "total_tokens": 136709608} +{"current_steps": 202845, "total_steps": 204665, "loss": 0.0, "lr": 4.822640733971317e-10, "epoch": 4.955537097207632, "percentage": 99.11, "elapsed_time": "4:22:10", "remaining_time": "0:02:21", "throughput": 8690.87, "total_tokens": 136712872} +{"current_steps": 202850, "total_steps": 204665, "loss": 0.0, "lr": 4.796195732038599e-10, "epoch": 4.955659248039479, "percentage": 99.11, "elapsed_time": "4:22:10", "remaining_time": "0:02:20", "throughput": 8690.87, "total_tokens": 136715752} +{"current_steps": 202855, "total_steps": 204665, "loss": 0.0, "lr": 4.769823417914987e-10, "epoch": 4.955781398871326, "percentage": 99.12, "elapsed_time": "4:22:11", "remaining_time": "0:02:20", "throughput": 8690.88, "total_tokens": 136718952} +{"current_steps": 202860, "total_steps": 204665, "loss": 0.0, "lr": 4.743523791794768e-10, "epoch": 4.955903549703174, "percentage": 99.12, "elapsed_time": "4:22:11", "remaining_time": "0:02:19", "throughput": 8690.9, "total_tokens": 136722344} +{"current_steps": 202865, "total_steps": 204665, "loss": 0.0, "lr": 4.717296853867791e-10, "epoch": 4.956025700535021, "percentage": 99.12, "elapsed_time": "4:22:12", "remaining_time": "0:02:19", "throughput": 8690.92, "total_tokens": 136725608} +{"current_steps": 202870, "total_steps": 204665, "loss": 0.0, "lr": 4.691142604325016e-10, "epoch": 4.956147851366868, "percentage": 99.12, "elapsed_time": "4:22:12", "remaining_time": "0:02:19", "throughput": 8690.96, "total_tokens": 136729384} +{"current_steps": 202875, "total_steps": 204665, "loss": 0.0, "lr": 4.665061043356289e-10, "epoch": 4.956270002198715, "percentage": 99.13, "elapsed_time": "4:22:12", "remaining_time": "0:02:18", "throughput": 8690.98, "total_tokens": 136732584} +{"current_steps": 202880, "total_steps": 204665, "loss": 0.0, "lr": 4.639052171152569e-10, "epoch": 4.956392153030562, "percentage": 99.13, "elapsed_time": "4:22:13", "remaining_time": "0:02:18", "throughput": 8691.0, "total_tokens": 136735976} +{"current_steps": 202885, "total_steps": 204665, "loss": 0.0, "lr": 4.6131159879014834e-10, "epoch": 4.956514303862409, "percentage": 99.13, "elapsed_time": "4:22:13", "remaining_time": "0:02:18", "throughput": 8691.03, "total_tokens": 136739432} +{"current_steps": 202890, "total_steps": 204665, "loss": 0.0, "lr": 4.5872524937917713e-10, "epoch": 4.956636454694257, "percentage": 99.13, "elapsed_time": "4:22:13", "remaining_time": "0:02:17", "throughput": 8691.05, "total_tokens": 136742760} +{"current_steps": 202895, "total_steps": 204665, "loss": 0.0, "lr": 4.5614616890121693e-10, "epoch": 4.956758605526104, "percentage": 99.14, "elapsed_time": "4:22:14", "remaining_time": "0:02:17", "throughput": 8691.07, "total_tokens": 136746088} +{"current_steps": 202900, "total_steps": 204665, "loss": 0.0, "lr": 4.535743573750306e-10, "epoch": 4.9568807563579504, "percentage": 99.14, "elapsed_time": "4:22:14", "remaining_time": "0:02:16", "throughput": 8691.09, "total_tokens": 136749544} +{"current_steps": 202905, "total_steps": 204665, "loss": 0.0, "lr": 4.5100981481938085e-10, "epoch": 4.957002907189798, "percentage": 99.14, "elapsed_time": "4:22:14", "remaining_time": "0:02:16", "throughput": 8691.12, "total_tokens": 136753064} +{"current_steps": 202910, "total_steps": 204665, "loss": 0.0717, "lr": 4.484525412526974e-10, "epoch": 4.957125058021645, "percentage": 99.14, "elapsed_time": "4:22:15", "remaining_time": "0:02:16", "throughput": 8691.13, "total_tokens": 136756136} +{"current_steps": 202915, "total_steps": 204665, "loss": 0.0043, "lr": 4.4590253669385404e-10, "epoch": 4.9572472088534925, "percentage": 99.14, "elapsed_time": "4:22:15", "remaining_time": "0:02:15", "throughput": 8691.14, "total_tokens": 136759336} +{"current_steps": 202920, "total_steps": 204665, "loss": 0.0, "lr": 4.4335980116116946e-10, "epoch": 4.957369359685339, "percentage": 99.15, "elapsed_time": "4:22:15", "remaining_time": "0:02:15", "throughput": 8691.16, "total_tokens": 136762728} +{"current_steps": 202925, "total_steps": 204665, "loss": 0.0, "lr": 4.4082433467318436e-10, "epoch": 4.957491510517187, "percentage": 99.15, "elapsed_time": "4:22:16", "remaining_time": "0:02:14", "throughput": 8691.18, "total_tokens": 136765992} +{"current_steps": 202930, "total_steps": 204665, "loss": 0.0, "lr": 4.382961372484395e-10, "epoch": 4.957613661349034, "percentage": 99.15, "elapsed_time": "4:22:16", "remaining_time": "0:02:14", "throughput": 8691.23, "total_tokens": 136770024} +{"current_steps": 202935, "total_steps": 204665, "loss": 0.0001, "lr": 4.3577520890525353e-10, "epoch": 4.957735812180881, "percentage": 99.15, "elapsed_time": "4:22:16", "remaining_time": "0:02:14", "throughput": 8691.24, "total_tokens": 136773160} +{"current_steps": 202940, "total_steps": 204665, "loss": 0.0, "lr": 4.332615496619452e-10, "epoch": 4.957857963012728, "percentage": 99.16, "elapsed_time": "4:22:17", "remaining_time": "0:02:13", "throughput": 8691.28, "total_tokens": 136776872} +{"current_steps": 202945, "total_steps": 204665, "loss": 0.0, "lr": 4.3075515953683306e-10, "epoch": 4.957980113844576, "percentage": 99.16, "elapsed_time": "4:22:17", "remaining_time": "0:02:13", "throughput": 8691.32, "total_tokens": 136780648} +{"current_steps": 202950, "total_steps": 204665, "loss": 0.0, "lr": 4.2825603854801385e-10, "epoch": 4.958102264676422, "percentage": 99.16, "elapsed_time": "4:22:17", "remaining_time": "0:02:12", "throughput": 8691.34, "total_tokens": 136783912} +{"current_steps": 202955, "total_steps": 204665, "loss": 0.0001, "lr": 4.257641867139172e-10, "epoch": 4.95822441550827, "percentage": 99.16, "elapsed_time": "4:22:18", "remaining_time": "0:02:12", "throughput": 8691.34, "total_tokens": 136787048} +{"current_steps": 202960, "total_steps": 204665, "loss": 0.0, "lr": 4.2327960405241783e-10, "epoch": 4.958346566340117, "percentage": 99.17, "elapsed_time": "4:22:18", "remaining_time": "0:02:12", "throughput": 8691.39, "total_tokens": 136790952} +{"current_steps": 202965, "total_steps": 204665, "loss": 0.0, "lr": 4.2080229058172325e-10, "epoch": 4.958468717171964, "percentage": 99.17, "elapsed_time": "4:22:19", "remaining_time": "0:02:11", "throughput": 8691.43, "total_tokens": 136794600} +{"current_steps": 202970, "total_steps": 204665, "loss": 0.0, "lr": 4.183322463198191e-10, "epoch": 4.958590868003811, "percentage": 99.17, "elapsed_time": "4:22:19", "remaining_time": "0:02:11", "throughput": 8691.45, "total_tokens": 136797992} +{"current_steps": 202975, "total_steps": 204665, "loss": 0.0, "lr": 4.1586947128458006e-10, "epoch": 4.958713018835658, "percentage": 99.17, "elapsed_time": "4:22:19", "remaining_time": "0:02:11", "throughput": 8691.49, "total_tokens": 136801640} +{"current_steps": 202980, "total_steps": 204665, "loss": 0.0, "lr": 4.134139654941027e-10, "epoch": 4.9588351696675055, "percentage": 99.18, "elapsed_time": "4:22:20", "remaining_time": "0:02:10", "throughput": 8691.51, "total_tokens": 136804968} +{"current_steps": 202985, "total_steps": 204665, "loss": 0.0, "lr": 4.109657289660395e-10, "epoch": 4.958957320499352, "percentage": 99.18, "elapsed_time": "4:22:20", "remaining_time": "0:02:10", "throughput": 8691.5, "total_tokens": 136807848} +{"current_steps": 202990, "total_steps": 204665, "loss": 0.0, "lr": 4.085247617183762e-10, "epoch": 4.9590794713312, "percentage": 99.18, "elapsed_time": "4:22:20", "remaining_time": "0:02:09", "throughput": 8691.53, "total_tokens": 136811368} +{"current_steps": 202995, "total_steps": 204665, "loss": 0.0, "lr": 4.0609106376876537e-10, "epoch": 4.959201622163047, "percentage": 99.18, "elapsed_time": "4:22:21", "remaining_time": "0:02:09", "throughput": 8691.54, "total_tokens": 136814376} +{"current_steps": 203000, "total_steps": 204665, "loss": 0.0, "lr": 4.036646351348594e-10, "epoch": 4.959323772994894, "percentage": 99.19, "elapsed_time": "4:22:21", "remaining_time": "0:02:09", "throughput": 8691.56, "total_tokens": 136817704} +{"current_steps": 203005, "total_steps": 204665, "loss": 0.0, "lr": 4.012454758344219e-10, "epoch": 4.959445923826741, "percentage": 99.19, "elapsed_time": "4:22:21", "remaining_time": "0:02:08", "throughput": 8691.58, "total_tokens": 136821096} +{"current_steps": 203010, "total_steps": 204665, "loss": 0.0, "lr": 3.988335858849945e-10, "epoch": 4.959568074658589, "percentage": 99.19, "elapsed_time": "4:22:22", "remaining_time": "0:02:08", "throughput": 8691.64, "total_tokens": 136825320} +{"current_steps": 203015, "total_steps": 204665, "loss": 0.0, "lr": 3.964289653040076e-10, "epoch": 4.959690225490435, "percentage": 99.19, "elapsed_time": "4:22:22", "remaining_time": "0:02:07", "throughput": 8691.66, "total_tokens": 136828520} +{"current_steps": 203020, "total_steps": 204665, "loss": 0.0, "lr": 3.940316141091138e-10, "epoch": 4.959812376322283, "percentage": 99.2, "elapsed_time": "4:22:22", "remaining_time": "0:02:07", "throughput": 8691.67, "total_tokens": 136831784} +{"current_steps": 203025, "total_steps": 204665, "loss": 0.0, "lr": 3.9164153231774353e-10, "epoch": 4.95993452715413, "percentage": 99.2, "elapsed_time": "4:22:23", "remaining_time": "0:02:07", "throughput": 8691.72, "total_tokens": 136835688} +{"current_steps": 203030, "total_steps": 204665, "loss": 0.0738, "lr": 3.8925871994710536e-10, "epoch": 4.960056677985977, "percentage": 99.2, "elapsed_time": "4:22:23", "remaining_time": "0:02:06", "throughput": 8691.73, "total_tokens": 136838760} +{"current_steps": 203035, "total_steps": 204665, "loss": 0.0, "lr": 3.868831770147407e-10, "epoch": 4.960178828817824, "percentage": 99.2, "elapsed_time": "4:22:23", "remaining_time": "0:02:06", "throughput": 8691.75, "total_tokens": 136842024} +{"current_steps": 203040, "total_steps": 204665, "loss": 0.0, "lr": 3.8451490353774706e-10, "epoch": 4.960300979649672, "percentage": 99.21, "elapsed_time": "4:22:24", "remaining_time": "0:02:06", "throughput": 8691.78, "total_tokens": 136845736} +{"current_steps": 203045, "total_steps": 204665, "loss": 0.0, "lr": 3.8215389953355494e-10, "epoch": 4.9604231304815185, "percentage": 99.21, "elapsed_time": "4:22:24", "remaining_time": "0:02:05", "throughput": 8691.81, "total_tokens": 136849128} +{"current_steps": 203050, "total_steps": 204665, "loss": 0.0, "lr": 3.7980016501903966e-10, "epoch": 4.960545281313365, "percentage": 99.21, "elapsed_time": "4:22:24", "remaining_time": "0:02:05", "throughput": 8691.83, "total_tokens": 136852520} +{"current_steps": 203055, "total_steps": 204665, "loss": 0.0001, "lr": 3.774537000116318e-10, "epoch": 4.960667432145213, "percentage": 99.21, "elapsed_time": "4:22:25", "remaining_time": "0:02:04", "throughput": 8691.87, "total_tokens": 136856360} +{"current_steps": 203060, "total_steps": 204665, "loss": 0.0, "lr": 3.7511450452809565e-10, "epoch": 4.9607895829770605, "percentage": 99.22, "elapsed_time": "4:22:25", "remaining_time": "0:02:04", "throughput": 8691.88, "total_tokens": 136859432} +{"current_steps": 203065, "total_steps": 204665, "loss": 0.0, "lr": 3.727825785857508e-10, "epoch": 4.960911733808907, "percentage": 99.22, "elapsed_time": "4:22:26", "remaining_time": "0:02:04", "throughput": 8691.89, "total_tokens": 136862504} +{"current_steps": 203070, "total_steps": 204665, "loss": 0.0, "lr": 3.704579222012505e-10, "epoch": 4.961033884640754, "percentage": 99.22, "elapsed_time": "4:22:26", "remaining_time": "0:02:03", "throughput": 8691.88, "total_tokens": 136865320} +{"current_steps": 203075, "total_steps": 204665, "loss": 0.0, "lr": 3.681405353916922e-10, "epoch": 4.961156035472602, "percentage": 99.22, "elapsed_time": "4:22:26", "remaining_time": "0:02:03", "throughput": 8691.89, "total_tokens": 136868520} +{"current_steps": 203080, "total_steps": 204665, "loss": 0.0397, "lr": 3.658304181739513e-10, "epoch": 4.961278186304448, "percentage": 99.23, "elapsed_time": "4:22:27", "remaining_time": "0:02:02", "throughput": 8691.9, "total_tokens": 136871720} +{"current_steps": 203085, "total_steps": 204665, "loss": 0.0, "lr": 3.635275705646812e-10, "epoch": 4.961400337136296, "percentage": 99.23, "elapsed_time": "4:22:27", "remaining_time": "0:02:02", "throughput": 8691.93, "total_tokens": 136875176} +{"current_steps": 203090, "total_steps": 204665, "loss": 0.0, "lr": 3.612319925807572e-10, "epoch": 4.961522487968143, "percentage": 99.23, "elapsed_time": "4:22:27", "remaining_time": "0:02:02", "throughput": 8691.95, "total_tokens": 136878440} +{"current_steps": 203095, "total_steps": 204665, "loss": 0.0, "lr": 3.589436842388327e-10, "epoch": 4.96164463879999, "percentage": 99.23, "elapsed_time": "4:22:28", "remaining_time": "0:02:01", "throughput": 8691.95, "total_tokens": 136881512} +{"current_steps": 203100, "total_steps": 204665, "loss": 0.0003, "lr": 3.56662645555561e-10, "epoch": 4.961766789631837, "percentage": 99.24, "elapsed_time": "4:22:28", "remaining_time": "0:02:01", "throughput": 8691.95, "total_tokens": 136884392} +{"current_steps": 203105, "total_steps": 204665, "loss": 0.0, "lr": 3.543888765473735e-10, "epoch": 4.961888940463685, "percentage": 99.24, "elapsed_time": "4:22:28", "remaining_time": "0:02:00", "throughput": 8691.96, "total_tokens": 136887592} +{"current_steps": 203110, "total_steps": 204665, "loss": 0.0, "lr": 3.521223772311455e-10, "epoch": 4.9620110912955315, "percentage": 99.24, "elapsed_time": "4:22:29", "remaining_time": "0:02:00", "throughput": 8691.97, "total_tokens": 136890600} +{"current_steps": 203115, "total_steps": 204665, "loss": 0.0, "lr": 3.498631476229752e-10, "epoch": 4.962133242127379, "percentage": 99.24, "elapsed_time": "4:22:29", "remaining_time": "0:02:00", "throughput": 8691.99, "total_tokens": 136894056} +{"current_steps": 203120, "total_steps": 204665, "loss": 0.0, "lr": 3.47611187739516e-10, "epoch": 4.962255392959226, "percentage": 99.25, "elapsed_time": "4:22:29", "remaining_time": "0:01:59", "throughput": 8692.01, "total_tokens": 136897448} +{"current_steps": 203125, "total_steps": 204665, "loss": 0.0, "lr": 3.453664975971993e-10, "epoch": 4.9623775437910735, "percentage": 99.25, "elapsed_time": "4:22:30", "remaining_time": "0:01:59", "throughput": 8692.03, "total_tokens": 136900840} +{"current_steps": 203130, "total_steps": 204665, "loss": 0.0, "lr": 3.4312907721212316e-10, "epoch": 4.96249969462292, "percentage": 99.25, "elapsed_time": "4:22:30", "remaining_time": "0:01:59", "throughput": 8692.05, "total_tokens": 136904104} +{"current_steps": 203135, "total_steps": 204665, "loss": 0.0513, "lr": 3.4089892660082997e-10, "epoch": 4.962621845454768, "percentage": 99.25, "elapsed_time": "4:22:30", "remaining_time": "0:01:58", "throughput": 8692.08, "total_tokens": 136907752} +{"current_steps": 203140, "total_steps": 204665, "loss": 0.0, "lr": 3.38676045779307e-10, "epoch": 4.962743996286615, "percentage": 99.25, "elapsed_time": "4:22:31", "remaining_time": "0:01:58", "throughput": 8692.12, "total_tokens": 136911400} +{"current_steps": 203145, "total_steps": 204665, "loss": 0.0, "lr": 3.364604347637634e-10, "epoch": 4.962866147118461, "percentage": 99.26, "elapsed_time": "4:22:31", "remaining_time": "0:01:57", "throughput": 8692.13, "total_tokens": 136914472} +{"current_steps": 203150, "total_steps": 204665, "loss": 0.0, "lr": 3.342520935704085e-10, "epoch": 4.962988297950309, "percentage": 99.26, "elapsed_time": "4:22:31", "remaining_time": "0:01:57", "throughput": 8692.16, "total_tokens": 136918056} +{"current_steps": 203155, "total_steps": 204665, "loss": 0.0, "lr": 3.3205102221534054e-10, "epoch": 4.963110448782157, "percentage": 99.26, "elapsed_time": "4:22:32", "remaining_time": "0:01:57", "throughput": 8692.19, "total_tokens": 136921576} +{"current_steps": 203160, "total_steps": 204665, "loss": 0.0, "lr": 3.2985722071432465e-10, "epoch": 4.963232599614003, "percentage": 99.26, "elapsed_time": "4:22:32", "remaining_time": "0:01:56", "throughput": 8692.21, "total_tokens": 136925032} +{"current_steps": 203165, "total_steps": 204665, "loss": 0.0, "lr": 3.276706890835701e-10, "epoch": 4.96335475044585, "percentage": 99.27, "elapsed_time": "4:22:32", "remaining_time": "0:01:56", "throughput": 8692.23, "total_tokens": 136928232} +{"current_steps": 203170, "total_steps": 204665, "loss": 0.0, "lr": 3.2549142733884203e-10, "epoch": 4.963476901277698, "percentage": 99.27, "elapsed_time": "4:22:33", "remaining_time": "0:01:55", "throughput": 8692.25, "total_tokens": 136931688} +{"current_steps": 203175, "total_steps": 204665, "loss": 0.0, "lr": 3.2331943549601673e-10, "epoch": 4.9635990521095446, "percentage": 99.27, "elapsed_time": "4:22:33", "remaining_time": "0:01:55", "throughput": 8692.27, "total_tokens": 136935016} +{"current_steps": 203180, "total_steps": 204665, "loss": 0.0001, "lr": 3.211547135708592e-10, "epoch": 4.963721202941392, "percentage": 99.27, "elapsed_time": "4:22:34", "remaining_time": "0:01:55", "throughput": 8692.29, "total_tokens": 136938408} +{"current_steps": 203185, "total_steps": 204665, "loss": 0.0, "lr": 3.1899726157913476e-10, "epoch": 4.963843353773239, "percentage": 99.28, "elapsed_time": "4:22:34", "remaining_time": "0:01:54", "throughput": 8692.31, "total_tokens": 136941736} +{"current_steps": 203190, "total_steps": 204665, "loss": 0.0, "lr": 3.168470795366085e-10, "epoch": 4.963965504605087, "percentage": 99.28, "elapsed_time": "4:22:34", "remaining_time": "0:01:54", "throughput": 8692.34, "total_tokens": 136945128} +{"current_steps": 203195, "total_steps": 204665, "loss": 0.0, "lr": 3.1470416745882353e-10, "epoch": 4.964087655436933, "percentage": 99.28, "elapsed_time": "4:22:35", "remaining_time": "0:01:53", "throughput": 8692.35, "total_tokens": 136948456} +{"current_steps": 203200, "total_steps": 204665, "loss": 0.0, "lr": 3.1256852536143407e-10, "epoch": 4.964209806268781, "percentage": 99.28, "elapsed_time": "4:22:35", "remaining_time": "0:01:53", "throughput": 8692.36, "total_tokens": 136951464} +{"current_steps": 203205, "total_steps": 204665, "loss": 0.0, "lr": 3.1044015325987217e-10, "epoch": 4.964331957100628, "percentage": 99.29, "elapsed_time": "4:22:35", "remaining_time": "0:01:53", "throughput": 8692.37, "total_tokens": 136954664} +{"current_steps": 203210, "total_steps": 204665, "loss": 0.0, "lr": 3.0831905116968093e-10, "epoch": 4.964454107932475, "percentage": 99.29, "elapsed_time": "4:22:36", "remaining_time": "0:01:52", "throughput": 8692.39, "total_tokens": 136957928} +{"current_steps": 203215, "total_steps": 204665, "loss": 0.0, "lr": 3.062052191062925e-10, "epoch": 4.964576258764322, "percentage": 99.29, "elapsed_time": "4:22:36", "remaining_time": "0:01:52", "throughput": 8692.43, "total_tokens": 136961640} +{"current_steps": 203220, "total_steps": 204665, "loss": 0.0, "lr": 3.040986570851389e-10, "epoch": 4.96469840959617, "percentage": 99.29, "elapsed_time": "4:22:36", "remaining_time": "0:01:52", "throughput": 8692.45, "total_tokens": 136965032} +{"current_steps": 203225, "total_steps": 204665, "loss": 0.0, "lr": 3.019993651213193e-10, "epoch": 4.9648205604280164, "percentage": 99.3, "elapsed_time": "4:22:37", "remaining_time": "0:01:51", "throughput": 8692.5, "total_tokens": 136968872} +{"current_steps": 203230, "total_steps": 204665, "loss": 0.0, "lr": 2.999073432303767e-10, "epoch": 4.964942711259864, "percentage": 99.3, "elapsed_time": "4:22:37", "remaining_time": "0:01:51", "throughput": 8692.53, "total_tokens": 136972584} +{"current_steps": 203235, "total_steps": 204665, "loss": 0.0001, "lr": 2.9782259142729913e-10, "epoch": 4.965064862091711, "percentage": 99.3, "elapsed_time": "4:22:37", "remaining_time": "0:01:50", "throughput": 8692.57, "total_tokens": 136976232} +{"current_steps": 203240, "total_steps": 204665, "loss": 0.0, "lr": 2.957451097274077e-10, "epoch": 4.965187012923558, "percentage": 99.3, "elapsed_time": "4:22:38", "remaining_time": "0:01:50", "throughput": 8692.62, "total_tokens": 136980136} +{"current_steps": 203245, "total_steps": 204665, "loss": 0.0, "lr": 2.9367489814569044e-10, "epoch": 4.965309163755405, "percentage": 99.31, "elapsed_time": "4:22:38", "remaining_time": "0:01:50", "throughput": 8692.62, "total_tokens": 136983208} +{"current_steps": 203250, "total_steps": 204665, "loss": 0.0, "lr": 2.9161195669735736e-10, "epoch": 4.965431314587252, "percentage": 99.31, "elapsed_time": "4:22:38", "remaining_time": "0:01:49", "throughput": 8692.66, "total_tokens": 136986920} +{"current_steps": 203255, "total_steps": 204665, "loss": 0.0, "lr": 2.8955628539717447e-10, "epoch": 4.9655534654191, "percentage": 99.31, "elapsed_time": "4:22:39", "remaining_time": "0:01:49", "throughput": 8692.66, "total_tokens": 136989864} +{"current_steps": 203260, "total_steps": 204665, "loss": 0.0, "lr": 2.8750788426035175e-10, "epoch": 4.965675616250946, "percentage": 99.31, "elapsed_time": "4:22:39", "remaining_time": "0:01:48", "throughput": 8692.68, "total_tokens": 136993192} +{"current_steps": 203265, "total_steps": 204665, "loss": 0.0, "lr": 2.854667533015442e-10, "epoch": 4.965797767082794, "percentage": 99.32, "elapsed_time": "4:22:39", "remaining_time": "0:01:48", "throughput": 8692.69, "total_tokens": 136996264} +{"current_steps": 203270, "total_steps": 204665, "loss": 0.0, "lr": 2.834328925358509e-10, "epoch": 4.965919917914641, "percentage": 99.32, "elapsed_time": "4:22:40", "remaining_time": "0:01:48", "throughput": 8692.7, "total_tokens": 136999528} +{"current_steps": 203275, "total_steps": 204665, "loss": 0.0, "lr": 2.814063019778157e-10, "epoch": 4.966042068746488, "percentage": 99.32, "elapsed_time": "4:22:40", "remaining_time": "0:01:47", "throughput": 8692.71, "total_tokens": 137002536} +{"current_steps": 203280, "total_steps": 204665, "loss": 0.0, "lr": 2.7938698164231556e-10, "epoch": 4.966164219578335, "percentage": 99.32, "elapsed_time": "4:22:40", "remaining_time": "0:01:47", "throughput": 8692.71, "total_tokens": 137005544} +{"current_steps": 203285, "total_steps": 204665, "loss": 0.0234, "lr": 2.773749315440055e-10, "epoch": 4.966286370410183, "percentage": 99.33, "elapsed_time": "4:22:41", "remaining_time": "0:01:46", "throughput": 8692.73, "total_tokens": 137008808} +{"current_steps": 203290, "total_steps": 204665, "loss": 0.0, "lr": 2.753701516975404e-10, "epoch": 4.9664085212420295, "percentage": 99.33, "elapsed_time": "4:22:41", "remaining_time": "0:01:46", "throughput": 8692.74, "total_tokens": 137012008} +{"current_steps": 203295, "total_steps": 204665, "loss": 0.0, "lr": 2.7337264211746427e-10, "epoch": 4.966530672073877, "percentage": 99.33, "elapsed_time": "4:22:42", "remaining_time": "0:01:46", "throughput": 8692.75, "total_tokens": 137015272} +{"current_steps": 203300, "total_steps": 204665, "loss": 0.0, "lr": 2.713824028183209e-10, "epoch": 4.966652822905724, "percentage": 99.33, "elapsed_time": "4:22:42", "remaining_time": "0:01:45", "throughput": 8692.76, "total_tokens": 137018280} +{"current_steps": 203305, "total_steps": 204665, "loss": 0.0, "lr": 2.693994338145433e-10, "epoch": 4.9667749737375715, "percentage": 99.34, "elapsed_time": "4:22:42", "remaining_time": "0:01:45", "throughput": 8692.79, "total_tokens": 137021864} +{"current_steps": 203310, "total_steps": 204665, "loss": 0.0, "lr": 2.6742373512056435e-10, "epoch": 4.966897124569418, "percentage": 99.34, "elapsed_time": "4:22:43", "remaining_time": "0:01:45", "throughput": 8692.83, "total_tokens": 137025640} +{"current_steps": 203315, "total_steps": 204665, "loss": 0.0, "lr": 2.6545530675081695e-10, "epoch": 4.967019275401266, "percentage": 99.34, "elapsed_time": "4:22:43", "remaining_time": "0:01:44", "throughput": 8692.85, "total_tokens": 137028968} +{"current_steps": 203320, "total_steps": 204665, "loss": 0.0008, "lr": 2.6349414871962297e-10, "epoch": 4.967141426233113, "percentage": 99.34, "elapsed_time": "4:22:43", "remaining_time": "0:01:44", "throughput": 8692.86, "total_tokens": 137032104} +{"current_steps": 203325, "total_steps": 204665, "loss": 0.0, "lr": 2.615402610411932e-10, "epoch": 4.96726357706496, "percentage": 99.35, "elapsed_time": "4:22:44", "remaining_time": "0:01:43", "throughput": 8692.89, "total_tokens": 137035624} +{"current_steps": 203330, "total_steps": 204665, "loss": 0.0, "lr": 2.595936437296276e-10, "epoch": 4.967385727896807, "percentage": 99.35, "elapsed_time": "4:22:44", "remaining_time": "0:01:43", "throughput": 8692.91, "total_tokens": 137039016} +{"current_steps": 203335, "total_steps": 204665, "loss": 0.0, "lr": 2.576542967993589e-10, "epoch": 4.967507878728654, "percentage": 99.35, "elapsed_time": "4:22:44", "remaining_time": "0:01:43", "throughput": 8692.93, "total_tokens": 137042344} +{"current_steps": 203340, "total_steps": 204665, "loss": 0.0, "lr": 2.55722220264154e-10, "epoch": 4.967630029560501, "percentage": 99.35, "elapsed_time": "4:22:45", "remaining_time": "0:01:42", "throughput": 8692.95, "total_tokens": 137045672} +{"current_steps": 203345, "total_steps": 204665, "loss": 0.0, "lr": 2.5379741413833475e-10, "epoch": 4.967752180392348, "percentage": 99.36, "elapsed_time": "4:22:45", "remaining_time": "0:01:42", "throughput": 8692.98, "total_tokens": 137049128} +{"current_steps": 203350, "total_steps": 204665, "loss": 0.0, "lr": 2.5187987843577897e-10, "epoch": 4.967874331224196, "percentage": 99.36, "elapsed_time": "4:22:45", "remaining_time": "0:01:41", "throughput": 8693.02, "total_tokens": 137052904} +{"current_steps": 203355, "total_steps": 204665, "loss": 0.0, "lr": 2.499696131704754e-10, "epoch": 4.9679964820560425, "percentage": 99.36, "elapsed_time": "4:22:46", "remaining_time": "0:01:41", "throughput": 8693.06, "total_tokens": 137056616} +{"current_steps": 203360, "total_steps": 204665, "loss": 0.0, "lr": 2.4806661835630185e-10, "epoch": 4.96811863288789, "percentage": 99.36, "elapsed_time": "4:22:46", "remaining_time": "0:01:41", "throughput": 8693.08, "total_tokens": 137060008} +{"current_steps": 203365, "total_steps": 204665, "loss": 0.0, "lr": 2.461708940070251e-10, "epoch": 4.968240783719737, "percentage": 99.36, "elapsed_time": "4:22:46", "remaining_time": "0:01:40", "throughput": 8693.09, "total_tokens": 137063208} +{"current_steps": 203370, "total_steps": 204665, "loss": 0.0, "lr": 2.4428244013652287e-10, "epoch": 4.9683629345515845, "percentage": 99.37, "elapsed_time": "4:22:47", "remaining_time": "0:01:40", "throughput": 8693.12, "total_tokens": 137066728} +{"current_steps": 203375, "total_steps": 204665, "loss": 0.0, "lr": 2.4240125675856206e-10, "epoch": 4.968485085383431, "percentage": 99.37, "elapsed_time": "4:22:47", "remaining_time": "0:01:40", "throughput": 8693.14, "total_tokens": 137070056} +{"current_steps": 203380, "total_steps": 204665, "loss": 0.0, "lr": 2.405273438866873e-10, "epoch": 4.968607236215279, "percentage": 99.37, "elapsed_time": "4:22:47", "remaining_time": "0:01:39", "throughput": 8693.15, "total_tokens": 137073192} +{"current_steps": 203385, "total_steps": 204665, "loss": 0.0008, "lr": 2.3866070153466534e-10, "epoch": 4.968729387047126, "percentage": 99.37, "elapsed_time": "4:22:48", "remaining_time": "0:01:39", "throughput": 8693.16, "total_tokens": 137076264} +{"current_steps": 203390, "total_steps": 204665, "loss": 0.0, "lr": 2.368013297159299e-10, "epoch": 4.968851537878973, "percentage": 99.38, "elapsed_time": "4:22:48", "remaining_time": "0:01:38", "throughput": 8693.17, "total_tokens": 137079528} +{"current_steps": 203395, "total_steps": 204665, "loss": 0.0, "lr": 2.349492284441368e-10, "epoch": 4.96897368871082, "percentage": 99.38, "elapsed_time": "4:22:48", "remaining_time": "0:01:38", "throughput": 8693.19, "total_tokens": 137082728} +{"current_steps": 203400, "total_steps": 204665, "loss": 0.0, "lr": 2.331043977327196e-10, "epoch": 4.969095839542668, "percentage": 99.38, "elapsed_time": "4:22:49", "remaining_time": "0:01:38", "throughput": 8693.22, "total_tokens": 137086376} +{"current_steps": 203405, "total_steps": 204665, "loss": 0.0, "lr": 2.312668375950011e-10, "epoch": 4.969217990374514, "percentage": 99.38, "elapsed_time": "4:22:49", "remaining_time": "0:01:37", "throughput": 8693.24, "total_tokens": 137089704} +{"current_steps": 203410, "total_steps": 204665, "loss": 0.0, "lr": 2.2943654804441493e-10, "epoch": 4.969340141206361, "percentage": 99.39, "elapsed_time": "4:22:50", "remaining_time": "0:01:37", "throughput": 8693.27, "total_tokens": 137093160} +{"current_steps": 203415, "total_steps": 204665, "loss": 0.0, "lr": 2.2761352909428377e-10, "epoch": 4.969462292038209, "percentage": 99.39, "elapsed_time": "4:22:50", "remaining_time": "0:01:36", "throughput": 8693.29, "total_tokens": 137096552} +{"current_steps": 203420, "total_steps": 204665, "loss": 0.0, "lr": 2.2579778075793031e-10, "epoch": 4.969584442870056, "percentage": 99.39, "elapsed_time": "4:22:50", "remaining_time": "0:01:36", "throughput": 8693.31, "total_tokens": 137099816} +{"current_steps": 203425, "total_steps": 204665, "loss": 0.0, "lr": 2.2398930304834417e-10, "epoch": 4.969706593701903, "percentage": 99.39, "elapsed_time": "4:22:51", "remaining_time": "0:01:36", "throughput": 8693.33, "total_tokens": 137103208} +{"current_steps": 203430, "total_steps": 204665, "loss": 0.0, "lr": 2.2218809597895906e-10, "epoch": 4.96982874453375, "percentage": 99.4, "elapsed_time": "4:22:51", "remaining_time": "0:01:35", "throughput": 8693.35, "total_tokens": 137106600} +{"current_steps": 203435, "total_steps": 204665, "loss": 0.0, "lr": 2.203941595626535e-10, "epoch": 4.9699508953655975, "percentage": 99.4, "elapsed_time": "4:22:51", "remaining_time": "0:01:35", "throughput": 8693.38, "total_tokens": 137110056} +{"current_steps": 203440, "total_steps": 204665, "loss": 0.0, "lr": 2.186074938125282e-10, "epoch": 4.970073046197444, "percentage": 99.4, "elapsed_time": "4:22:52", "remaining_time": "0:01:34", "throughput": 8693.4, "total_tokens": 137113320} +{"current_steps": 203445, "total_steps": 204665, "loss": 0.0, "lr": 2.1682809874168373e-10, "epoch": 4.970195197029292, "percentage": 99.4, "elapsed_time": "4:22:52", "remaining_time": "0:01:34", "throughput": 8693.41, "total_tokens": 137116456} +{"current_steps": 203450, "total_steps": 204665, "loss": 0.0, "lr": 2.150559743628877e-10, "epoch": 4.970317347861139, "percentage": 99.41, "elapsed_time": "4:22:52", "remaining_time": "0:01:34", "throughput": 8693.43, "total_tokens": 137119784} +{"current_steps": 203455, "total_steps": 204665, "loss": 0.0, "lr": 2.132911206891297e-10, "epoch": 4.970439498692986, "percentage": 99.41, "elapsed_time": "4:22:53", "remaining_time": "0:01:33", "throughput": 8693.46, "total_tokens": 137123368} +{"current_steps": 203460, "total_steps": 204665, "loss": 0.0, "lr": 2.115335377332883e-10, "epoch": 4.970561649524833, "percentage": 99.41, "elapsed_time": "4:22:53", "remaining_time": "0:01:33", "throughput": 8693.49, "total_tokens": 137126824} +{"current_steps": 203465, "total_steps": 204665, "loss": 0.0, "lr": 2.0978322550802007e-10, "epoch": 4.970683800356681, "percentage": 99.41, "elapsed_time": "4:22:53", "remaining_time": "0:01:33", "throughput": 8693.51, "total_tokens": 137130280} +{"current_steps": 203470, "total_steps": 204665, "loss": 0.0, "lr": 2.080401840262036e-10, "epoch": 4.970805951188527, "percentage": 99.42, "elapsed_time": "4:22:54", "remaining_time": "0:01:32", "throughput": 8693.54, "total_tokens": 137133736} +{"current_steps": 203475, "total_steps": 204665, "loss": 0.0, "lr": 2.063044133003844e-10, "epoch": 4.970928102020375, "percentage": 99.42, "elapsed_time": "4:22:54", "remaining_time": "0:01:32", "throughput": 8693.56, "total_tokens": 137137128} +{"current_steps": 203480, "total_steps": 204665, "loss": 0.0, "lr": 2.04575913343219e-10, "epoch": 4.971050252852222, "percentage": 99.42, "elapsed_time": "4:22:54", "remaining_time": "0:01:31", "throughput": 8693.58, "total_tokens": 137140456} +{"current_steps": 203485, "total_steps": 204665, "loss": 0.0, "lr": 2.0285468416725294e-10, "epoch": 4.971172403684069, "percentage": 99.42, "elapsed_time": "4:22:55", "remaining_time": "0:01:31", "throughput": 8693.59, "total_tokens": 137143656} +{"current_steps": 203490, "total_steps": 204665, "loss": 0.0001, "lr": 2.0114072578503172e-10, "epoch": 4.971294554515916, "percentage": 99.43, "elapsed_time": "4:22:55", "remaining_time": "0:01:31", "throughput": 8693.6, "total_tokens": 137146792} +{"current_steps": 203495, "total_steps": 204665, "loss": 0.0011, "lr": 1.9943403820910086e-10, "epoch": 4.971416705347764, "percentage": 99.43, "elapsed_time": "4:22:55", "remaining_time": "0:01:30", "throughput": 8693.61, "total_tokens": 137149800} +{"current_steps": 203500, "total_steps": 204665, "loss": 0.0, "lr": 1.9773462145178387e-10, "epoch": 4.9715388561796106, "percentage": 99.43, "elapsed_time": "4:22:56", "remaining_time": "0:01:30", "throughput": 8693.62, "total_tokens": 137153064} +{"current_steps": 203505, "total_steps": 204665, "loss": 0.0, "lr": 1.960424755254042e-10, "epoch": 4.971661007011457, "percentage": 99.43, "elapsed_time": "4:22:56", "remaining_time": "0:01:29", "throughput": 8693.64, "total_tokens": 137156328} +{"current_steps": 203510, "total_steps": 204665, "loss": 0.0, "lr": 1.9435760044239635e-10, "epoch": 4.971783157843305, "percentage": 99.44, "elapsed_time": "4:22:56", "remaining_time": "0:01:29", "throughput": 8693.65, "total_tokens": 137159400} +{"current_steps": 203515, "total_steps": 204665, "loss": 0.0, "lr": 1.9267999621486174e-10, "epoch": 4.971905308675153, "percentage": 99.44, "elapsed_time": "4:22:57", "remaining_time": "0:01:29", "throughput": 8693.66, "total_tokens": 137162600} +{"current_steps": 203520, "total_steps": 204665, "loss": 0.0, "lr": 1.9100966285512388e-10, "epoch": 4.972027459506999, "percentage": 99.44, "elapsed_time": "4:22:57", "remaining_time": "0:01:28", "throughput": 8693.71, "total_tokens": 137166568} +{"current_steps": 203525, "total_steps": 204665, "loss": 0.0, "lr": 1.8934660037528417e-10, "epoch": 4.972149610338846, "percentage": 99.44, "elapsed_time": "4:22:58", "remaining_time": "0:01:28", "throughput": 8693.74, "total_tokens": 137170088} +{"current_steps": 203530, "total_steps": 204665, "loss": 0.0, "lr": 1.8769080878744402e-10, "epoch": 4.972271761170694, "percentage": 99.45, "elapsed_time": "4:22:58", "remaining_time": "0:01:27", "throughput": 8693.77, "total_tokens": 137173608} +{"current_steps": 203535, "total_steps": 204665, "loss": 0.0, "lr": 1.860422881035939e-10, "epoch": 4.97239391200254, "percentage": 99.45, "elapsed_time": "4:22:58", "remaining_time": "0:01:27", "throughput": 8693.89, "total_tokens": 137178984} +{"current_steps": 203540, "total_steps": 204665, "loss": 0.0, "lr": 1.8440103833572417e-10, "epoch": 4.972516062834388, "percentage": 99.45, "elapsed_time": "4:22:59", "remaining_time": "0:01:27", "throughput": 8693.89, "total_tokens": 137182056} +{"current_steps": 203545, "total_steps": 204665, "loss": 0.0, "lr": 1.8276705949593629e-10, "epoch": 4.972638213666235, "percentage": 99.45, "elapsed_time": "4:22:59", "remaining_time": "0:01:26", "throughput": 8693.9, "total_tokens": 137185064} +{"current_steps": 203550, "total_steps": 204665, "loss": 0.0224, "lr": 1.8114035159588758e-10, "epoch": 4.9727603644980825, "percentage": 99.46, "elapsed_time": "4:22:59", "remaining_time": "0:01:26", "throughput": 8693.89, "total_tokens": 137187944} +{"current_steps": 203555, "total_steps": 204665, "loss": 0.0, "lr": 1.7952091464756846e-10, "epoch": 4.972882515329929, "percentage": 99.46, "elapsed_time": "4:23:00", "remaining_time": "0:01:26", "throughput": 8693.9, "total_tokens": 137190952} +{"current_steps": 203560, "total_steps": 204665, "loss": 0.0, "lr": 1.7790874866263628e-10, "epoch": 4.973004666161777, "percentage": 99.46, "elapsed_time": "4:23:00", "remaining_time": "0:01:25", "throughput": 8693.92, "total_tokens": 137194408} +{"current_steps": 203565, "total_steps": 204665, "loss": 0.0, "lr": 1.7630385365285938e-10, "epoch": 4.973126816993624, "percentage": 99.46, "elapsed_time": "4:23:00", "remaining_time": "0:01:25", "throughput": 8693.94, "total_tokens": 137197736} +{"current_steps": 203570, "total_steps": 204665, "loss": 0.0, "lr": 1.7470622962989511e-10, "epoch": 4.973248967825471, "percentage": 99.46, "elapsed_time": "4:23:01", "remaining_time": "0:01:24", "throughput": 8693.96, "total_tokens": 137201064} +{"current_steps": 203575, "total_steps": 204665, "loss": 0.0, "lr": 1.7311587660551186e-10, "epoch": 4.973371118657318, "percentage": 99.47, "elapsed_time": "4:23:01", "remaining_time": "0:01:24", "throughput": 8693.98, "total_tokens": 137204456} +{"current_steps": 203580, "total_steps": 204665, "loss": 0.0, "lr": 1.7153279459103386e-10, "epoch": 4.973493269489166, "percentage": 99.47, "elapsed_time": "4:23:01", "remaining_time": "0:01:24", "throughput": 8694.02, "total_tokens": 137208168} +{"current_steps": 203585, "total_steps": 204665, "loss": 0.0, "lr": 1.699569835981185e-10, "epoch": 4.973615420321012, "percentage": 99.47, "elapsed_time": "4:23:02", "remaining_time": "0:01:23", "throughput": 8694.03, "total_tokens": 137211304} +{"current_steps": 203590, "total_steps": 204665, "loss": 0.0, "lr": 1.6838844363820103e-10, "epoch": 4.97373757115286, "percentage": 99.47, "elapsed_time": "4:23:02", "remaining_time": "0:01:23", "throughput": 8694.05, "total_tokens": 137214696} +{"current_steps": 203595, "total_steps": 204665, "loss": 0.0, "lr": 1.668271747227168e-10, "epoch": 4.973859721984707, "percentage": 99.48, "elapsed_time": "4:23:02", "remaining_time": "0:01:22", "throughput": 8694.09, "total_tokens": 137218408} +{"current_steps": 203600, "total_steps": 204665, "loss": 0.0, "lr": 1.6527317686299002e-10, "epoch": 4.9739818728165535, "percentage": 99.48, "elapsed_time": "4:23:03", "remaining_time": "0:01:22", "throughput": 8694.12, "total_tokens": 137221864} +{"current_steps": 203605, "total_steps": 204665, "loss": 0.0, "lr": 1.63726450070234e-10, "epoch": 4.974104023648401, "percentage": 99.48, "elapsed_time": "4:23:03", "remaining_time": "0:01:22", "throughput": 8694.14, "total_tokens": 137225192} +{"current_steps": 203610, "total_steps": 204665, "loss": 0.0, "lr": 1.62186994355884e-10, "epoch": 4.974226174480248, "percentage": 99.48, "elapsed_time": "4:23:03", "remaining_time": "0:01:21", "throughput": 8694.15, "total_tokens": 137228392} +{"current_steps": 203615, "total_steps": 204665, "loss": 0.0, "lr": 1.6065480973104228e-10, "epoch": 4.9743483253120955, "percentage": 99.49, "elapsed_time": "4:23:04", "remaining_time": "0:01:21", "throughput": 8694.2, "total_tokens": 137232296} +{"current_steps": 203620, "total_steps": 204665, "loss": 0.0, "lr": 1.5912989620681107e-10, "epoch": 4.974470476143942, "percentage": 99.49, "elapsed_time": "4:23:04", "remaining_time": "0:01:21", "throughput": 8694.2, "total_tokens": 137235304} +{"current_steps": 203625, "total_steps": 204665, "loss": 0.0, "lr": 1.5761225379429255e-10, "epoch": 4.97459262697579, "percentage": 99.49, "elapsed_time": "4:23:05", "remaining_time": "0:01:20", "throughput": 8694.19, "total_tokens": 137238120} +{"current_steps": 203630, "total_steps": 204665, "loss": 0.0, "lr": 1.56101882504478e-10, "epoch": 4.974714777807637, "percentage": 99.49, "elapsed_time": "4:23:05", "remaining_time": "0:01:20", "throughput": 8694.2, "total_tokens": 137241320} +{"current_steps": 203635, "total_steps": 204665, "loss": 0.0, "lr": 1.5459878234846958e-10, "epoch": 4.974836928639484, "percentage": 99.5, "elapsed_time": "4:23:05", "remaining_time": "0:01:19", "throughput": 8694.25, "total_tokens": 137245160} +{"current_steps": 203640, "total_steps": 204665, "loss": 0.0, "lr": 1.5310295333725853e-10, "epoch": 4.974959079471331, "percentage": 99.5, "elapsed_time": "4:23:06", "remaining_time": "0:01:19", "throughput": 8694.27, "total_tokens": 137248616} +{"current_steps": 203645, "total_steps": 204665, "loss": 0.0001, "lr": 1.5161439548150301e-10, "epoch": 4.975081230303179, "percentage": 99.5, "elapsed_time": "4:23:06", "remaining_time": "0:01:19", "throughput": 8694.28, "total_tokens": 137251688} +{"current_steps": 203650, "total_steps": 204665, "loss": 0.0, "lr": 1.501331087920832e-10, "epoch": 4.975203381135025, "percentage": 99.5, "elapsed_time": "4:23:06", "remaining_time": "0:01:18", "throughput": 8694.3, "total_tokens": 137255016} +{"current_steps": 203655, "total_steps": 204665, "loss": 0.0, "lr": 1.4865909327987924e-10, "epoch": 4.975325531966873, "percentage": 99.51, "elapsed_time": "4:23:07", "remaining_time": "0:01:18", "throughput": 8694.32, "total_tokens": 137258408} +{"current_steps": 203660, "total_steps": 204665, "loss": 0.0, "lr": 1.4719234895566034e-10, "epoch": 4.97544768279872, "percentage": 99.51, "elapsed_time": "4:23:07", "remaining_time": "0:01:17", "throughput": 8694.33, "total_tokens": 137261480} +{"current_steps": 203665, "total_steps": 204665, "loss": 0.0, "lr": 1.457328758298626e-10, "epoch": 4.975569833630567, "percentage": 99.51, "elapsed_time": "4:23:07", "remaining_time": "0:01:17", "throughput": 8694.35, "total_tokens": 137264808} +{"current_steps": 203670, "total_steps": 204665, "loss": 0.0, "lr": 1.4428067391325515e-10, "epoch": 4.975691984462414, "percentage": 99.51, "elapsed_time": "4:23:08", "remaining_time": "0:01:17", "throughput": 8694.37, "total_tokens": 137268328} +{"current_steps": 203675, "total_steps": 204665, "loss": 0.0001, "lr": 1.4283574321627413e-10, "epoch": 4.975814135294261, "percentage": 99.52, "elapsed_time": "4:23:08", "remaining_time": "0:01:16", "throughput": 8694.38, "total_tokens": 137271464} +{"current_steps": 203680, "total_steps": 204665, "loss": 0.0, "lr": 1.4139808374968864e-10, "epoch": 4.9759362861261085, "percentage": 99.52, "elapsed_time": "4:23:08", "remaining_time": "0:01:16", "throughput": 8694.39, "total_tokens": 137274472} +{"current_steps": 203685, "total_steps": 204665, "loss": 0.0, "lr": 1.3996769552371279e-10, "epoch": 4.976058436957956, "percentage": 99.52, "elapsed_time": "4:23:09", "remaining_time": "0:01:15", "throughput": 8694.41, "total_tokens": 137277928} +{"current_steps": 203690, "total_steps": 204665, "loss": 0.0, "lr": 1.3854457854878265e-10, "epoch": 4.976180587789803, "percentage": 99.52, "elapsed_time": "4:23:09", "remaining_time": "0:01:15", "throughput": 8694.43, "total_tokens": 137281320} +{"current_steps": 203695, "total_steps": 204665, "loss": 0.0, "lr": 1.3712873283533433e-10, "epoch": 4.97630273862165, "percentage": 99.53, "elapsed_time": "4:23:09", "remaining_time": "0:01:15", "throughput": 8694.45, "total_tokens": 137284648} +{"current_steps": 203700, "total_steps": 204665, "loss": 0.0, "lr": 1.3572015839358187e-10, "epoch": 4.976424889453497, "percentage": 99.53, "elapsed_time": "4:23:10", "remaining_time": "0:01:14", "throughput": 8694.49, "total_tokens": 137288232} +{"current_steps": 203705, "total_steps": 204665, "loss": 0.0, "lr": 1.3431885523385034e-10, "epoch": 4.976547040285344, "percentage": 99.53, "elapsed_time": "4:23:10", "remaining_time": "0:01:14", "throughput": 8694.5, "total_tokens": 137291432} +{"current_steps": 203710, "total_steps": 204665, "loss": 0.0, "lr": 1.329248233662428e-10, "epoch": 4.976669191117192, "percentage": 99.53, "elapsed_time": "4:23:10", "remaining_time": "0:01:14", "throughput": 8694.52, "total_tokens": 137294760} +{"current_steps": 203715, "total_steps": 204665, "loss": 0.0, "lr": 1.3153806280097323e-10, "epoch": 4.976791341949038, "percentage": 99.54, "elapsed_time": "4:23:11", "remaining_time": "0:01:13", "throughput": 8694.54, "total_tokens": 137298280} +{"current_steps": 203720, "total_steps": 204665, "loss": 0.0, "lr": 1.3015857354803372e-10, "epoch": 4.976913492780886, "percentage": 99.54, "elapsed_time": "4:23:11", "remaining_time": "0:01:13", "throughput": 8694.56, "total_tokens": 137301608} +{"current_steps": 203725, "total_steps": 204665, "loss": 0.0, "lr": 1.2878635561752726e-10, "epoch": 4.977035643612733, "percentage": 99.54, "elapsed_time": "4:23:12", "remaining_time": "0:01:12", "throughput": 8694.56, "total_tokens": 137304488} +{"current_steps": 203730, "total_steps": 204665, "loss": 0.0, "lr": 1.2742140901944587e-10, "epoch": 4.97715779444458, "percentage": 99.54, "elapsed_time": "4:23:12", "remaining_time": "0:01:12", "throughput": 8694.62, "total_tokens": 137308520} +{"current_steps": 203735, "total_steps": 204665, "loss": 0.0, "lr": 1.2606373376367052e-10, "epoch": 4.977279945276427, "percentage": 99.55, "elapsed_time": "4:23:12", "remaining_time": "0:01:12", "throughput": 8694.64, "total_tokens": 137312040} +{"current_steps": 203740, "total_steps": 204665, "loss": 0.0, "lr": 1.2471332986008222e-10, "epoch": 4.977402096108275, "percentage": 99.55, "elapsed_time": "4:23:13", "remaining_time": "0:01:11", "throughput": 8694.67, "total_tokens": 137315560} +{"current_steps": 203745, "total_steps": 204665, "loss": 0.0, "lr": 1.233701973185619e-10, "epoch": 4.9775242469401215, "percentage": 99.55, "elapsed_time": "4:23:13", "remaining_time": "0:01:11", "throughput": 8694.69, "total_tokens": 137318952} +{"current_steps": 203750, "total_steps": 204665, "loss": 0.0, "lr": 1.2203433614876858e-10, "epoch": 4.977646397771969, "percentage": 99.55, "elapsed_time": "4:23:13", "remaining_time": "0:01:10", "throughput": 8694.72, "total_tokens": 137322408} +{"current_steps": 203755, "total_steps": 204665, "loss": 0.0, "lr": 1.2070574636058318e-10, "epoch": 4.977768548603816, "percentage": 99.56, "elapsed_time": "4:23:14", "remaining_time": "0:01:10", "throughput": 8694.74, "total_tokens": 137325800} +{"current_steps": 203760, "total_steps": 204665, "loss": 0.0, "lr": 1.1938442796344263e-10, "epoch": 4.9778906994356635, "percentage": 99.56, "elapsed_time": "4:23:14", "remaining_time": "0:01:10", "throughput": 8694.78, "total_tokens": 137329448} +{"current_steps": 203765, "total_steps": 204665, "loss": 0.0, "lr": 1.1807038096711685e-10, "epoch": 4.97801285026751, "percentage": 99.56, "elapsed_time": "4:23:14", "remaining_time": "0:01:09", "throughput": 8694.8, "total_tokens": 137332776} +{"current_steps": 203770, "total_steps": 204665, "loss": 0.0, "lr": 1.1676360538115381e-10, "epoch": 4.978135001099357, "percentage": 99.56, "elapsed_time": "4:23:15", "remaining_time": "0:01:09", "throughput": 8694.85, "total_tokens": 137336808} +{"current_steps": 203775, "total_steps": 204665, "loss": 0.0, "lr": 1.154641012149904e-10, "epoch": 4.978257151931205, "percentage": 99.57, "elapsed_time": "4:23:15", "remaining_time": "0:01:08", "throughput": 8694.88, "total_tokens": 137340392} +{"current_steps": 203780, "total_steps": 204665, "loss": 0.0, "lr": 1.1417186847806349e-10, "epoch": 4.978379302763052, "percentage": 99.57, "elapsed_time": "4:23:15", "remaining_time": "0:01:08", "throughput": 8694.89, "total_tokens": 137343592} +{"current_steps": 203785, "total_steps": 204665, "loss": 0.0, "lr": 1.12886907179921e-10, "epoch": 4.978501453594899, "percentage": 99.57, "elapsed_time": "4:23:16", "remaining_time": "0:01:08", "throughput": 8694.94, "total_tokens": 137347496} +{"current_steps": 203790, "total_steps": 204665, "loss": 0.0, "lr": 1.1160921732977779e-10, "epoch": 4.978623604426746, "percentage": 99.57, "elapsed_time": "4:23:16", "remaining_time": "0:01:07", "throughput": 8694.96, "total_tokens": 137350760} +{"current_steps": 203795, "total_steps": 204665, "loss": 0.0, "lr": 1.1033879893684872e-10, "epoch": 4.978745755258593, "percentage": 99.57, "elapsed_time": "4:23:16", "remaining_time": "0:01:07", "throughput": 8694.98, "total_tokens": 137354216} +{"current_steps": 203800, "total_steps": 204665, "loss": 0.0, "lr": 1.0907565201057067e-10, "epoch": 4.97886790609044, "percentage": 99.58, "elapsed_time": "4:23:17", "remaining_time": "0:01:07", "throughput": 8695.0, "total_tokens": 137357416} +{"current_steps": 203805, "total_steps": 204665, "loss": 0.0, "lr": 1.0781977655993645e-10, "epoch": 4.978990056922288, "percentage": 99.58, "elapsed_time": "4:23:17", "remaining_time": "0:01:06", "throughput": 8695.0, "total_tokens": 137360424} +{"current_steps": 203810, "total_steps": 204665, "loss": 0.0, "lr": 1.0657117259427195e-10, "epoch": 4.9791122077541345, "percentage": 99.58, "elapsed_time": "4:23:17", "remaining_time": "0:01:06", "throughput": 8695.0, "total_tokens": 137363368} +{"current_steps": 203815, "total_steps": 204665, "loss": 0.0, "lr": 1.0532984012256995e-10, "epoch": 4.979234358585982, "percentage": 99.58, "elapsed_time": "4:23:18", "remaining_time": "0:01:05", "throughput": 8695.05, "total_tokens": 137367208} +{"current_steps": 203820, "total_steps": 204665, "loss": 0.0, "lr": 1.0409577915382328e-10, "epoch": 4.979356509417829, "percentage": 99.59, "elapsed_time": "4:23:18", "remaining_time": "0:01:05", "throughput": 8695.06, "total_tokens": 137370344} +{"current_steps": 203825, "total_steps": 204665, "loss": 0.1607, "lr": 1.0286898969702473e-10, "epoch": 4.979478660249677, "percentage": 99.59, "elapsed_time": "4:23:19", "remaining_time": "0:01:05", "throughput": 8695.07, "total_tokens": 137373480} +{"current_steps": 203830, "total_steps": 204665, "loss": 0.0, "lr": 1.016494717610561e-10, "epoch": 4.979600811081523, "percentage": 99.59, "elapsed_time": "4:23:19", "remaining_time": "0:01:04", "throughput": 8695.09, "total_tokens": 137377000} +{"current_steps": 203835, "total_steps": 204665, "loss": 0.0, "lr": 1.0043722535491018e-10, "epoch": 4.979722961913371, "percentage": 99.59, "elapsed_time": "4:23:19", "remaining_time": "0:01:04", "throughput": 8695.13, "total_tokens": 137380712} +{"current_steps": 203840, "total_steps": 204665, "loss": 0.0, "lr": 9.92322504872467e-11, "epoch": 4.979845112745218, "percentage": 99.6, "elapsed_time": "4:23:20", "remaining_time": "0:01:03", "throughput": 8695.17, "total_tokens": 137384360} +{"current_steps": 203845, "total_steps": 204665, "loss": 0.0, "lr": 9.803454716694748e-11, "epoch": 4.979967263577065, "percentage": 99.6, "elapsed_time": "4:23:20", "remaining_time": "0:01:03", "throughput": 8695.18, "total_tokens": 137387560} +{"current_steps": 203850, "total_steps": 204665, "loss": 0.0, "lr": 9.684411540267224e-11, "epoch": 4.980089414408912, "percentage": 99.6, "elapsed_time": "4:23:20", "remaining_time": "0:01:03", "throughput": 8695.21, "total_tokens": 137391016} +{"current_steps": 203855, "total_steps": 204665, "loss": 0.0, "lr": 9.566095520308071e-11, "epoch": 4.98021156524076, "percentage": 99.6, "elapsed_time": "4:23:21", "remaining_time": "0:01:02", "throughput": 8695.23, "total_tokens": 137394472} +{"current_steps": 203860, "total_steps": 204665, "loss": 0.0, "lr": 9.448506657683264e-11, "epoch": 4.980333716072606, "percentage": 99.61, "elapsed_time": "4:23:21", "remaining_time": "0:01:02", "throughput": 8695.23, "total_tokens": 137397352} +{"current_steps": 203865, "total_steps": 204665, "loss": 0.0001, "lr": 9.331644953236573e-11, "epoch": 4.980455866904453, "percentage": 99.61, "elapsed_time": "4:23:21", "remaining_time": "0:01:02", "throughput": 8695.25, "total_tokens": 137400808} +{"current_steps": 203870, "total_steps": 204665, "loss": 0.0, "lr": 9.21551040783397e-11, "epoch": 4.980578017736301, "percentage": 99.61, "elapsed_time": "4:23:22", "remaining_time": "0:01:01", "throughput": 8695.26, "total_tokens": 137403880} +{"current_steps": 203875, "total_steps": 204665, "loss": 0.0, "lr": 9.100103022297023e-11, "epoch": 4.980700168568148, "percentage": 99.61, "elapsed_time": "4:23:22", "remaining_time": "0:01:01", "throughput": 8695.26, "total_tokens": 137406824} +{"current_steps": 203880, "total_steps": 204665, "loss": 0.0002, "lr": 8.985422797491704e-11, "epoch": 4.980822319399995, "percentage": 99.62, "elapsed_time": "4:23:22", "remaining_time": "0:01:00", "throughput": 8695.28, "total_tokens": 137410216} +{"current_steps": 203885, "total_steps": 204665, "loss": 0.0, "lr": 8.871469734228477e-11, "epoch": 4.980944470231842, "percentage": 99.62, "elapsed_time": "4:23:23", "remaining_time": "0:01:00", "throughput": 8695.33, "total_tokens": 137414056} +{"current_steps": 203890, "total_steps": 204665, "loss": 0.0, "lr": 8.758243833351109e-11, "epoch": 4.98106662106369, "percentage": 99.62, "elapsed_time": "4:23:23", "remaining_time": "0:01:00", "throughput": 8695.34, "total_tokens": 137417192} +{"current_steps": 203895, "total_steps": 204665, "loss": 0.0, "lr": 8.645745095681167e-11, "epoch": 4.981188771895536, "percentage": 99.62, "elapsed_time": "4:23:23", "remaining_time": "0:00:59", "throughput": 8695.34, "total_tokens": 137420264} +{"current_steps": 203900, "total_steps": 204665, "loss": 0.0, "lr": 8.533973522029114e-11, "epoch": 4.981310922727384, "percentage": 99.63, "elapsed_time": "4:23:24", "remaining_time": "0:00:59", "throughput": 8695.35, "total_tokens": 137423400} +{"current_steps": 203905, "total_steps": 204665, "loss": 0.0, "lr": 8.422929113216515e-11, "epoch": 4.981433073559231, "percentage": 99.63, "elapsed_time": "4:23:24", "remaining_time": "0:00:58", "throughput": 8695.36, "total_tokens": 137426408} +{"current_steps": 203910, "total_steps": 204665, "loss": 0.0, "lr": 8.312611870042729e-11, "epoch": 4.981555224391078, "percentage": 99.63, "elapsed_time": "4:23:24", "remaining_time": "0:00:58", "throughput": 8695.4, "total_tokens": 137430184} +{"current_steps": 203915, "total_steps": 204665, "loss": 0.0002, "lr": 8.203021793318221e-11, "epoch": 4.981677375222925, "percentage": 99.63, "elapsed_time": "4:23:25", "remaining_time": "0:00:58", "throughput": 8695.41, "total_tokens": 137433448} +{"current_steps": 203920, "total_steps": 204665, "loss": 0.0, "lr": 8.094158883831248e-11, "epoch": 4.981799526054773, "percentage": 99.64, "elapsed_time": "4:23:25", "remaining_time": "0:00:57", "throughput": 8695.45, "total_tokens": 137437032} +{"current_steps": 203925, "total_steps": 204665, "loss": 0.0, "lr": 7.98602314238117e-11, "epoch": 4.9819216768866195, "percentage": 99.64, "elapsed_time": "4:23:25", "remaining_time": "0:00:57", "throughput": 8695.48, "total_tokens": 137440680} +{"current_steps": 203930, "total_steps": 204665, "loss": 0.0, "lr": 7.878614569745146e-11, "epoch": 4.982043827718467, "percentage": 99.64, "elapsed_time": "4:23:26", "remaining_time": "0:00:56", "throughput": 8695.48, "total_tokens": 137443688} +{"current_steps": 203935, "total_steps": 204665, "loss": 0.0001, "lr": 7.771933166722532e-11, "epoch": 4.982165978550314, "percentage": 99.64, "elapsed_time": "4:23:26", "remaining_time": "0:00:56", "throughput": 8695.51, "total_tokens": 137447144} +{"current_steps": 203940, "total_steps": 204665, "loss": 0.0, "lr": 7.665978934068285e-11, "epoch": 4.9822881293821615, "percentage": 99.65, "elapsed_time": "4:23:27", "remaining_time": "0:00:56", "throughput": 8695.52, "total_tokens": 137450344} +{"current_steps": 203945, "total_steps": 204665, "loss": 0.0, "lr": 7.560751872559557e-11, "epoch": 4.982410280214008, "percentage": 99.65, "elapsed_time": "4:23:27", "remaining_time": "0:00:55", "throughput": 8695.54, "total_tokens": 137453800} +{"current_steps": 203950, "total_steps": 204665, "loss": 0.0, "lr": 7.456251982973505e-11, "epoch": 4.982532431045856, "percentage": 99.65, "elapsed_time": "4:23:27", "remaining_time": "0:00:55", "throughput": 8695.55, "total_tokens": 137456808} +{"current_steps": 203955, "total_steps": 204665, "loss": 0.0, "lr": 7.352479266053979e-11, "epoch": 4.982654581877703, "percentage": 99.65, "elapsed_time": "4:23:28", "remaining_time": "0:00:55", "throughput": 8695.56, "total_tokens": 137460072} +{"current_steps": 203960, "total_steps": 204665, "loss": 0.0, "lr": 7.249433722567033e-11, "epoch": 4.982776732709549, "percentage": 99.66, "elapsed_time": "4:23:28", "remaining_time": "0:00:54", "throughput": 8695.6, "total_tokens": 137463720} +{"current_steps": 203965, "total_steps": 204665, "loss": 0.0, "lr": 7.147115353245414e-11, "epoch": 4.982898883541397, "percentage": 99.66, "elapsed_time": "4:23:28", "remaining_time": "0:00:54", "throughput": 8695.64, "total_tokens": 137467560} +{"current_steps": 203970, "total_steps": 204665, "loss": 0.0, "lr": 7.045524158855176e-11, "epoch": 4.983021034373244, "percentage": 99.66, "elapsed_time": "4:23:29", "remaining_time": "0:00:53", "throughput": 8695.66, "total_tokens": 137470888} +{"current_steps": 203975, "total_steps": 204665, "loss": 0.0, "lr": 6.944660140117964e-11, "epoch": 4.983143185205091, "percentage": 99.66, "elapsed_time": "4:23:29", "remaining_time": "0:00:53", "throughput": 8695.67, "total_tokens": 137474088} +{"current_steps": 203980, "total_steps": 204665, "loss": 0.0, "lr": 6.844523297777627e-11, "epoch": 4.983265336036938, "percentage": 99.67, "elapsed_time": "4:23:29", "remaining_time": "0:00:53", "throughput": 8695.68, "total_tokens": 137477160} +{"current_steps": 203985, "total_steps": 204665, "loss": 0.0, "lr": 6.745113632566912e-11, "epoch": 4.983387486868786, "percentage": 99.67, "elapsed_time": "4:23:30", "remaining_time": "0:00:52", "throughput": 8695.7, "total_tokens": 137480552} +{"current_steps": 203990, "total_steps": 204665, "loss": 0.0, "lr": 6.64643114518526e-11, "epoch": 4.9835096377006325, "percentage": 99.67, "elapsed_time": "4:23:30", "remaining_time": "0:00:52", "throughput": 8695.73, "total_tokens": 137484072} +{"current_steps": 203995, "total_steps": 204665, "loss": 0.0, "lr": 6.548475836376521e-11, "epoch": 4.98363178853248, "percentage": 99.67, "elapsed_time": "4:23:30", "remaining_time": "0:00:51", "throughput": 8695.75, "total_tokens": 137487464} +{"current_steps": 204000, "total_steps": 204665, "loss": 0.0, "lr": 6.451247706840136e-11, "epoch": 4.983753939364327, "percentage": 99.68, "elapsed_time": "4:23:31", "remaining_time": "0:00:51", "throughput": 8695.8, "total_tokens": 137491560} +{"current_steps": 204005, "total_steps": 204665, "loss": 0.0, "lr": 6.354746757286645e-11, "epoch": 4.9838760901961745, "percentage": 99.68, "elapsed_time": "4:23:31", "remaining_time": "0:00:51", "throughput": 8695.81, "total_tokens": 137494632} +{"current_steps": 204010, "total_steps": 204665, "loss": 0.0, "lr": 6.258972988415489e-11, "epoch": 4.983998241028021, "percentage": 99.68, "elapsed_time": "4:23:31", "remaining_time": "0:00:50", "throughput": 8695.83, "total_tokens": 137497896} +{"current_steps": 204015, "total_steps": 204665, "loss": 0.0, "lr": 6.16392640091501e-11, "epoch": 4.984120391859869, "percentage": 99.68, "elapsed_time": "4:23:32", "remaining_time": "0:00:50", "throughput": 8695.85, "total_tokens": 137501288} +{"current_steps": 204020, "total_steps": 204665, "loss": 0.0, "lr": 6.069606995495746e-11, "epoch": 4.984242542691716, "percentage": 99.68, "elapsed_time": "4:23:32", "remaining_time": "0:00:49", "throughput": 8695.87, "total_tokens": 137504680} +{"current_steps": 204025, "total_steps": 204665, "loss": 0.0, "lr": 5.976014772834937e-11, "epoch": 4.984364693523563, "percentage": 99.69, "elapsed_time": "4:23:32", "remaining_time": "0:00:49", "throughput": 8695.87, "total_tokens": 137507688} +{"current_steps": 204030, "total_steps": 204665, "loss": 0.0, "lr": 5.883149733609816e-11, "epoch": 4.98448684435541, "percentage": 99.69, "elapsed_time": "4:23:33", "remaining_time": "0:00:49", "throughput": 8695.88, "total_tokens": 137510824} +{"current_steps": 204035, "total_steps": 204665, "loss": 0.0, "lr": 5.7910118784976204e-11, "epoch": 4.984608995187257, "percentage": 99.69, "elapsed_time": "4:23:33", "remaining_time": "0:00:48", "throughput": 8695.89, "total_tokens": 137514088} +{"current_steps": 204040, "total_steps": 204665, "loss": 0.0, "lr": 5.699601208164484e-11, "epoch": 4.984731146019104, "percentage": 99.69, "elapsed_time": "4:23:34", "remaining_time": "0:00:48", "throughput": 8695.94, "total_tokens": 137517928} +{"current_steps": 204045, "total_steps": 204665, "loss": 0.0, "lr": 5.6089177232765406e-11, "epoch": 4.984853296850952, "percentage": 99.7, "elapsed_time": "4:23:34", "remaining_time": "0:00:48", "throughput": 8695.95, "total_tokens": 137521128} +{"current_steps": 204050, "total_steps": 204665, "loss": 0.0492, "lr": 5.518961424499924e-11, "epoch": 4.984975447682799, "percentage": 99.7, "elapsed_time": "4:23:34", "remaining_time": "0:00:47", "throughput": 8695.98, "total_tokens": 137524584} +{"current_steps": 204055, "total_steps": 204665, "loss": 0.0, "lr": 5.4297323124896655e-11, "epoch": 4.9850975985146455, "percentage": 99.7, "elapsed_time": "4:23:35", "remaining_time": "0:00:47", "throughput": 8696.02, "total_tokens": 137528296} +{"current_steps": 204060, "total_steps": 204665, "loss": 0.0, "lr": 5.341230387878593e-11, "epoch": 4.985219749346493, "percentage": 99.7, "elapsed_time": "4:23:35", "remaining_time": "0:00:46", "throughput": 8696.08, "total_tokens": 137532456} +{"current_steps": 204065, "total_steps": 204665, "loss": 0.0, "lr": 5.253455651332839e-11, "epoch": 4.98534190017834, "percentage": 99.71, "elapsed_time": "4:23:35", "remaining_time": "0:00:46", "throughput": 8696.1, "total_tokens": 137535784} +{"current_steps": 204070, "total_steps": 204665, "loss": 0.0, "lr": 5.166408103474129e-11, "epoch": 4.9854640510101875, "percentage": 99.71, "elapsed_time": "4:23:36", "remaining_time": "0:00:46", "throughput": 8696.11, "total_tokens": 137538920} +{"current_steps": 204075, "total_steps": 204665, "loss": 0.0, "lr": 5.080087744946393e-11, "epoch": 4.985586201842034, "percentage": 99.71, "elapsed_time": "4:23:36", "remaining_time": "0:00:45", "throughput": 8696.12, "total_tokens": 137541992} +{"current_steps": 204080, "total_steps": 204665, "loss": 0.0, "lr": 4.994494576360253e-11, "epoch": 4.985708352673882, "percentage": 99.71, "elapsed_time": "4:23:36", "remaining_time": "0:00:45", "throughput": 8696.17, "total_tokens": 137545960} +{"current_steps": 204085, "total_steps": 204665, "loss": 0.0, "lr": 4.909628598359639e-11, "epoch": 4.985830503505729, "percentage": 99.72, "elapsed_time": "4:23:37", "remaining_time": "0:00:44", "throughput": 8696.19, "total_tokens": 137549352} +{"current_steps": 204090, "total_steps": 204665, "loss": 0.0, "lr": 4.82548981154407e-11, "epoch": 4.985952654337576, "percentage": 99.72, "elapsed_time": "4:23:37", "remaining_time": "0:00:44", "throughput": 8696.2, "total_tokens": 137552488} +{"current_steps": 204095, "total_steps": 204665, "loss": 0.0, "lr": 4.742078216535272e-11, "epoch": 4.986074805169423, "percentage": 99.72, "elapsed_time": "4:23:37", "remaining_time": "0:00:44", "throughput": 8696.22, "total_tokens": 137555816} +{"current_steps": 204100, "total_steps": 204665, "loss": 0.0, "lr": 4.6593938139438685e-11, "epoch": 4.986196956001271, "percentage": 99.72, "elapsed_time": "4:23:38", "remaining_time": "0:00:43", "throughput": 8696.28, "total_tokens": 137559976} +{"current_steps": 204105, "total_steps": 204665, "loss": 0.0, "lr": 4.577436604358276e-11, "epoch": 4.986319106833117, "percentage": 99.73, "elapsed_time": "4:23:38", "remaining_time": "0:00:43", "throughput": 8696.3, "total_tokens": 137563368} +{"current_steps": 204110, "total_steps": 204665, "loss": 0.0, "lr": 4.496206588378015e-11, "epoch": 4.986441257664965, "percentage": 99.73, "elapsed_time": "4:23:38", "remaining_time": "0:00:43", "throughput": 8696.32, "total_tokens": 137566760} +{"current_steps": 204115, "total_steps": 204665, "loss": 0.0, "lr": 4.4157037666026075e-11, "epoch": 4.986563408496812, "percentage": 99.73, "elapsed_time": "4:23:39", "remaining_time": "0:00:42", "throughput": 8696.34, "total_tokens": 137570088} +{"current_steps": 204120, "total_steps": 204665, "loss": 0.0, "lr": 4.335928139609368e-11, "epoch": 4.986685559328659, "percentage": 99.73, "elapsed_time": "4:23:39", "remaining_time": "0:00:42", "throughput": 8696.39, "total_tokens": 137573928} +{"current_steps": 204125, "total_steps": 204665, "loss": 0.0003, "lr": 4.2568797079867156e-11, "epoch": 4.986807710160506, "percentage": 99.74, "elapsed_time": "4:23:40", "remaining_time": "0:00:41", "throughput": 8696.4, "total_tokens": 137577128} +{"current_steps": 204130, "total_steps": 204665, "loss": 0.0, "lr": 4.1785584723008635e-11, "epoch": 4.986929860992353, "percentage": 99.74, "elapsed_time": "4:23:40", "remaining_time": "0:00:41", "throughput": 8696.41, "total_tokens": 137580264} +{"current_steps": 204135, "total_steps": 204665, "loss": 0.0, "lr": 4.100964433118026e-11, "epoch": 4.9870520118242005, "percentage": 99.74, "elapsed_time": "4:23:40", "remaining_time": "0:00:41", "throughput": 8696.42, "total_tokens": 137583464} +{"current_steps": 204140, "total_steps": 204665, "loss": 0.0, "lr": 4.0240975910155186e-11, "epoch": 4.987174162656048, "percentage": 99.74, "elapsed_time": "4:23:41", "remaining_time": "0:00:40", "throughput": 8696.44, "total_tokens": 137586728} +{"current_steps": 204145, "total_steps": 204665, "loss": 0.0, "lr": 3.9479579465373504e-11, "epoch": 4.987296313487895, "percentage": 99.75, "elapsed_time": "4:23:41", "remaining_time": "0:00:40", "throughput": 8696.44, "total_tokens": 137589864} +{"current_steps": 204150, "total_steps": 204665, "loss": 0.0, "lr": 3.8725455002608377e-11, "epoch": 4.987418464319742, "percentage": 99.75, "elapsed_time": "4:23:41", "remaining_time": "0:00:39", "throughput": 8696.46, "total_tokens": 137593064} +{"current_steps": 204155, "total_steps": 204665, "loss": 0.0, "lr": 3.797860252707785e-11, "epoch": 4.987540615151589, "percentage": 99.75, "elapsed_time": "4:23:42", "remaining_time": "0:00:39", "throughput": 8696.48, "total_tokens": 137596520} +{"current_steps": 204160, "total_steps": 204665, "loss": 0.0, "lr": 3.7239022044333045e-11, "epoch": 4.987662765983436, "percentage": 99.75, "elapsed_time": "4:23:42", "remaining_time": "0:00:39", "throughput": 8696.49, "total_tokens": 137599720} +{"current_steps": 204165, "total_steps": 204665, "loss": 0.0, "lr": 3.6506713559703025e-11, "epoch": 4.987784916815284, "percentage": 99.76, "elapsed_time": "4:23:42", "remaining_time": "0:00:38", "throughput": 8696.63, "total_tokens": 137605352} +{"current_steps": 204170, "total_steps": 204665, "loss": 0.0, "lr": 3.578167707862789e-11, "epoch": 4.98790706764713, "percentage": 99.76, "elapsed_time": "4:23:43", "remaining_time": "0:00:38", "throughput": 8696.63, "total_tokens": 137608360} +{"current_steps": 204175, "total_steps": 204665, "loss": 0.0, "lr": 3.506391260621466e-11, "epoch": 4.988029218478978, "percentage": 99.76, "elapsed_time": "4:23:43", "remaining_time": "0:00:37", "throughput": 8696.65, "total_tokens": 137611816} +{"current_steps": 204180, "total_steps": 204665, "loss": 0.0, "lr": 3.4353420147903435e-11, "epoch": 4.988151369310825, "percentage": 99.76, "elapsed_time": "4:23:43", "remaining_time": "0:00:37", "throughput": 8696.67, "total_tokens": 137615144} +{"current_steps": 204185, "total_steps": 204665, "loss": 0.0, "lr": 3.365019970869021e-11, "epoch": 4.9882735201426724, "percentage": 99.77, "elapsed_time": "4:23:44", "remaining_time": "0:00:37", "throughput": 8696.69, "total_tokens": 137618536} +{"current_steps": 204190, "total_steps": 204665, "loss": 0.0, "lr": 3.295425129368201e-11, "epoch": 4.988395670974519, "percentage": 99.77, "elapsed_time": "4:23:44", "remaining_time": "0:00:36", "throughput": 8696.71, "total_tokens": 137621800} +{"current_steps": 204195, "total_steps": 204665, "loss": 0.0, "lr": 3.226557490798587e-11, "epoch": 4.988517821806367, "percentage": 99.77, "elapsed_time": "4:23:44", "remaining_time": "0:00:36", "throughput": 8696.71, "total_tokens": 137624872} +{"current_steps": 204200, "total_steps": 204665, "loss": 0.0579, "lr": 3.158417055670881e-11, "epoch": 4.988639972638214, "percentage": 99.77, "elapsed_time": "4:23:45", "remaining_time": "0:00:36", "throughput": 8696.72, "total_tokens": 137627944} +{"current_steps": 204205, "total_steps": 204665, "loss": 0.0, "lr": 3.0910038244624794e-11, "epoch": 4.988762123470061, "percentage": 99.78, "elapsed_time": "4:23:45", "remaining_time": "0:00:35", "throughput": 8696.76, "total_tokens": 137631592} +{"current_steps": 204210, "total_steps": 204665, "loss": 0.0, "lr": 3.0243177976729816e-11, "epoch": 4.988884274301908, "percentage": 99.78, "elapsed_time": "4:23:45", "remaining_time": "0:00:35", "throughput": 8696.76, "total_tokens": 137634664} +{"current_steps": 204215, "total_steps": 204665, "loss": 0.0, "lr": 2.9583589757908863e-11, "epoch": 4.989006425133756, "percentage": 99.78, "elapsed_time": "4:23:46", "remaining_time": "0:00:34", "throughput": 8696.77, "total_tokens": 137637800} +{"current_steps": 204220, "total_steps": 204665, "loss": 0.0, "lr": 2.893127359282488e-11, "epoch": 4.989128575965602, "percentage": 99.78, "elapsed_time": "4:23:46", "remaining_time": "0:00:34", "throughput": 8696.81, "total_tokens": 137641512} +{"current_steps": 204225, "total_steps": 204665, "loss": 0.0, "lr": 2.8286229486362833e-11, "epoch": 4.989250726797449, "percentage": 99.79, "elapsed_time": "4:23:47", "remaining_time": "0:00:34", "throughput": 8696.82, "total_tokens": 137644648} +{"current_steps": 204230, "total_steps": 204665, "loss": 0.0763, "lr": 2.764845744318567e-11, "epoch": 4.989372877629297, "percentage": 99.79, "elapsed_time": "4:23:47", "remaining_time": "0:00:33", "throughput": 8696.85, "total_tokens": 137648232} +{"current_steps": 204235, "total_steps": 204665, "loss": 0.0, "lr": 2.701795746795632e-11, "epoch": 4.9894950284611435, "percentage": 99.79, "elapsed_time": "4:23:47", "remaining_time": "0:00:33", "throughput": 8696.87, "total_tokens": 137651752} +{"current_steps": 204240, "total_steps": 204665, "loss": 0.0, "lr": 2.6394729565115682e-11, "epoch": 4.989617179292991, "percentage": 99.79, "elapsed_time": "4:23:48", "remaining_time": "0:00:32", "throughput": 8696.89, "total_tokens": 137655016} +{"current_steps": 204245, "total_steps": 204665, "loss": 0.0, "lr": 2.5778773739326687e-11, "epoch": 4.989739330124838, "percentage": 99.79, "elapsed_time": "4:23:48", "remaining_time": "0:00:32", "throughput": 8696.92, "total_tokens": 137658664} +{"current_steps": 204250, "total_steps": 204665, "loss": 0.0, "lr": 2.517008999503023e-11, "epoch": 4.9898614809566855, "percentage": 99.8, "elapsed_time": "4:23:48", "remaining_time": "0:00:32", "throughput": 8696.97, "total_tokens": 137662440} +{"current_steps": 204255, "total_steps": 204665, "loss": 0.0, "lr": 2.4568678336667203e-11, "epoch": 4.989983631788532, "percentage": 99.8, "elapsed_time": "4:23:49", "remaining_time": "0:00:31", "throughput": 8697.02, "total_tokens": 137666344} +{"current_steps": 204260, "total_steps": 204665, "loss": 0.0, "lr": 2.3974538768567475e-11, "epoch": 4.99010578262038, "percentage": 99.8, "elapsed_time": "4:23:49", "remaining_time": "0:00:31", "throughput": 8697.03, "total_tokens": 137669544} +{"current_steps": 204265, "total_steps": 204665, "loss": 0.0, "lr": 2.338767129517194e-11, "epoch": 4.990227933452227, "percentage": 99.8, "elapsed_time": "4:23:49", "remaining_time": "0:00:30", "throughput": 8697.04, "total_tokens": 137672744} +{"current_steps": 204270, "total_steps": 204665, "loss": 0.112, "lr": 2.280807592058842e-11, "epoch": 4.990350084284074, "percentage": 99.81, "elapsed_time": "4:23:50", "remaining_time": "0:00:30", "throughput": 8697.07, "total_tokens": 137676200} +{"current_steps": 204275, "total_steps": 204665, "loss": 0.0, "lr": 2.2235752649146787e-11, "epoch": 4.990472235115921, "percentage": 99.81, "elapsed_time": "4:23:50", "remaining_time": "0:00:30", "throughput": 8697.09, "total_tokens": 137679592} +{"current_steps": 204280, "total_steps": 204665, "loss": 0.0, "lr": 2.1670701484954866e-11, "epoch": 4.990594385947769, "percentage": 99.81, "elapsed_time": "4:23:50", "remaining_time": "0:00:29", "throughput": 8697.11, "total_tokens": 137682984} +{"current_steps": 204285, "total_steps": 204665, "loss": 0.0, "lr": 2.1112922432120482e-11, "epoch": 4.990716536779615, "percentage": 99.81, "elapsed_time": "4:23:51", "remaining_time": "0:00:29", "throughput": 8697.12, "total_tokens": 137686056} +{"current_steps": 204290, "total_steps": 204665, "loss": 0.0, "lr": 2.056241549475146e-11, "epoch": 4.990838687611463, "percentage": 99.82, "elapsed_time": "4:23:51", "remaining_time": "0:00:29", "throughput": 8697.13, "total_tokens": 137689192} +{"current_steps": 204295, "total_steps": 204665, "loss": 0.0, "lr": 2.0019180676733582e-11, "epoch": 4.99096083844331, "percentage": 99.82, "elapsed_time": "4:23:51", "remaining_time": "0:00:28", "throughput": 8697.15, "total_tokens": 137692648} +{"current_steps": 204300, "total_steps": 204665, "loss": 0.0, "lr": 1.948321798217467e-11, "epoch": 4.9910829892751565, "percentage": 99.82, "elapsed_time": "4:23:52", "remaining_time": "0:00:28", "throughput": 8697.16, "total_tokens": 137695848} +{"current_steps": 204305, "total_steps": 204665, "loss": 0.0, "lr": 1.8954527414849488e-11, "epoch": 4.991205140107004, "percentage": 99.82, "elapsed_time": "4:23:52", "remaining_time": "0:00:27", "throughput": 8697.17, "total_tokens": 137698856} +{"current_steps": 204310, "total_steps": 204665, "loss": 0.0, "lr": 1.843310897875483e-11, "epoch": 4.991327290938852, "percentage": 99.83, "elapsed_time": "4:23:52", "remaining_time": "0:00:27", "throughput": 8697.19, "total_tokens": 137702120} +{"current_steps": 204315, "total_steps": 204665, "loss": 0.0, "lr": 1.7918962677443418e-11, "epoch": 4.9914494417706985, "percentage": 99.83, "elapsed_time": "4:23:53", "remaining_time": "0:00:27", "throughput": 8697.22, "total_tokens": 137705704} +{"current_steps": 204320, "total_steps": 204665, "loss": 0.0, "lr": 1.741208851491205e-11, "epoch": 4.991571592602545, "percentage": 99.83, "elapsed_time": "4:23:53", "remaining_time": "0:00:26", "throughput": 8697.24, "total_tokens": 137709096} +{"current_steps": 204325, "total_steps": 204665, "loss": 0.0, "lr": 1.6912486494602417e-11, "epoch": 4.991693743434393, "percentage": 99.83, "elapsed_time": "4:23:54", "remaining_time": "0:00:26", "throughput": 8697.28, "total_tokens": 137712744} +{"current_steps": 204330, "total_steps": 204665, "loss": 0.0, "lr": 1.6420156620289283e-11, "epoch": 4.99181589426624, "percentage": 99.84, "elapsed_time": "4:23:54", "remaining_time": "0:00:25", "throughput": 8697.29, "total_tokens": 137715880} +{"current_steps": 204335, "total_steps": 204665, "loss": 0.0, "lr": 1.593509889563638e-11, "epoch": 4.991938045098087, "percentage": 99.84, "elapsed_time": "4:23:54", "remaining_time": "0:00:25", "throughput": 8697.32, "total_tokens": 137719528} +{"current_steps": 204340, "total_steps": 204665, "loss": 0.0, "lr": 1.5457313323974374e-11, "epoch": 4.992060195929934, "percentage": 99.84, "elapsed_time": "4:23:55", "remaining_time": "0:00:25", "throughput": 8697.34, "total_tokens": 137722856} +{"current_steps": 204345, "total_steps": 204665, "loss": 0.0, "lr": 1.4986799908855985e-11, "epoch": 4.992182346761782, "percentage": 99.84, "elapsed_time": "4:23:55", "remaining_time": "0:00:24", "throughput": 8697.37, "total_tokens": 137726312} +{"current_steps": 204350, "total_steps": 204665, "loss": 0.0, "lr": 1.45235586537229e-11, "epoch": 4.992304497593628, "percentage": 99.85, "elapsed_time": "4:23:55", "remaining_time": "0:00:24", "throughput": 8697.38, "total_tokens": 137729576} +{"current_steps": 204355, "total_steps": 204665, "loss": 0.0302, "lr": 1.4067589562016812e-11, "epoch": 4.992426648425476, "percentage": 99.85, "elapsed_time": "4:23:56", "remaining_time": "0:00:24", "throughput": 8697.38, "total_tokens": 137732584} +{"current_steps": 204360, "total_steps": 204665, "loss": 0.0, "lr": 1.361889263695737e-11, "epoch": 4.992548799257323, "percentage": 99.85, "elapsed_time": "4:23:56", "remaining_time": "0:00:23", "throughput": 8697.4, "total_tokens": 137735976} +{"current_steps": 204365, "total_steps": 204665, "loss": 0.0, "lr": 1.3177467881764214e-11, "epoch": 4.99267095008917, "percentage": 99.85, "elapsed_time": "4:23:56", "remaining_time": "0:00:23", "throughput": 8697.46, "total_tokens": 137739944} +{"current_steps": 204370, "total_steps": 204665, "loss": 0.0, "lr": 1.274331529976802e-11, "epoch": 4.992793100921017, "percentage": 99.86, "elapsed_time": "4:23:57", "remaining_time": "0:00:22", "throughput": 8697.48, "total_tokens": 137743336} +{"current_steps": 204375, "total_steps": 204665, "loss": 0.0, "lr": 1.2316434893966387e-11, "epoch": 4.992915251752865, "percentage": 99.86, "elapsed_time": "4:23:57", "remaining_time": "0:00:22", "throughput": 8697.5, "total_tokens": 137746728} +{"current_steps": 204380, "total_steps": 204665, "loss": 0.0, "lr": 1.1896826667689985e-11, "epoch": 4.9930374025847115, "percentage": 99.86, "elapsed_time": "4:23:57", "remaining_time": "0:00:22", "throughput": 8697.53, "total_tokens": 137750312} +{"current_steps": 204385, "total_steps": 204665, "loss": 0.0, "lr": 1.1484490623825394e-11, "epoch": 4.993159553416559, "percentage": 99.86, "elapsed_time": "4:23:58", "remaining_time": "0:00:21", "throughput": 8697.53, "total_tokens": 137753192} +{"current_steps": 204390, "total_steps": 204665, "loss": 0.0, "lr": 1.1079426765370215e-11, "epoch": 4.993281704248406, "percentage": 99.87, "elapsed_time": "4:23:58", "remaining_time": "0:00:21", "throughput": 8697.54, "total_tokens": 137756328} +{"current_steps": 204395, "total_steps": 204665, "loss": 0.0, "lr": 1.068163509532205e-11, "epoch": 4.993403855080253, "percentage": 99.87, "elapsed_time": "4:23:58", "remaining_time": "0:00:20", "throughput": 8697.56, "total_tokens": 137759656} +{"current_steps": 204400, "total_steps": 204665, "loss": 0.0, "lr": 1.0291115616567481e-11, "epoch": 4.9935260059121, "percentage": 99.87, "elapsed_time": "4:23:59", "remaining_time": "0:00:20", "throughput": 8697.57, "total_tokens": 137762792} +{"current_steps": 204405, "total_steps": 204665, "loss": 0.0, "lr": 9.907868331882063e-12, "epoch": 4.993648156743948, "percentage": 99.87, "elapsed_time": "4:23:59", "remaining_time": "0:00:20", "throughput": 8697.59, "total_tokens": 137766056} +{"current_steps": 204410, "total_steps": 204665, "loss": 0.0, "lr": 9.5318932442634e-12, "epoch": 4.993770307575795, "percentage": 99.88, "elapsed_time": "4:23:59", "remaining_time": "0:00:19", "throughput": 8697.62, "total_tokens": 137769512} +{"current_steps": 204415, "total_steps": 204665, "loss": 0.0, "lr": 9.163190356153982e-12, "epoch": 4.993892458407641, "percentage": 99.88, "elapsed_time": "4:24:00", "remaining_time": "0:00:19", "throughput": 8697.64, "total_tokens": 137772904} +{"current_steps": 204420, "total_steps": 204665, "loss": 0.0, "lr": 8.80175967044039e-12, "epoch": 4.994014609239489, "percentage": 99.88, "elapsed_time": "4:24:00", "remaining_time": "0:00:18", "throughput": 8697.64, "total_tokens": 137775912} +{"current_steps": 204425, "total_steps": 204665, "loss": 0.0, "lr": 8.447601189676135e-12, "epoch": 4.994136760071336, "percentage": 99.88, "elapsed_time": "4:24:00", "remaining_time": "0:00:18", "throughput": 8697.68, "total_tokens": 137779496} +{"current_steps": 204430, "total_steps": 204665, "loss": 0.0, "lr": 8.100714916414731e-12, "epoch": 4.994258910903183, "percentage": 99.89, "elapsed_time": "4:24:01", "remaining_time": "0:00:18", "throughput": 8697.68, "total_tokens": 137782504} +{"current_steps": 204435, "total_steps": 204665, "loss": 0.0, "lr": 7.76110085320969e-12, "epoch": 4.99438106173503, "percentage": 99.89, "elapsed_time": "4:24:01", "remaining_time": "0:00:17", "throughput": 8697.7, "total_tokens": 137785896} +{"current_steps": 204440, "total_steps": 204665, "loss": 0.0, "lr": 7.428759002614527e-12, "epoch": 4.994503212566878, "percentage": 99.89, "elapsed_time": "4:24:02", "remaining_time": "0:00:17", "throughput": 8697.76, "total_tokens": 137789928} +{"current_steps": 204445, "total_steps": 204665, "loss": 0.0, "lr": 7.103689366849686e-12, "epoch": 4.9946253633987245, "percentage": 99.89, "elapsed_time": "4:24:02", "remaining_time": "0:00:17", "throughput": 8697.79, "total_tokens": 137793512} +{"current_steps": 204450, "total_steps": 204665, "loss": 0.0, "lr": 6.7858919484686805e-12, "epoch": 4.994747514230572, "percentage": 99.89, "elapsed_time": "4:24:02", "remaining_time": "0:00:16", "throughput": 8697.8, "total_tokens": 137796776} +{"current_steps": 204455, "total_steps": 204665, "loss": 0.0, "lr": 6.4753667496919575e-12, "epoch": 4.994869665062419, "percentage": 99.9, "elapsed_time": "4:24:03", "remaining_time": "0:00:16", "throughput": 8697.83, "total_tokens": 137800168} +{"current_steps": 204460, "total_steps": 204665, "loss": 0.0, "lr": 6.172113772850984e-12, "epoch": 4.9949918158942666, "percentage": 99.9, "elapsed_time": "4:24:03", "remaining_time": "0:00:15", "throughput": 8697.86, "total_tokens": 137803816} +{"current_steps": 204465, "total_steps": 204665, "loss": 0.0, "lr": 5.876133019944163e-12, "epoch": 4.995113966726113, "percentage": 99.9, "elapsed_time": "4:24:03", "remaining_time": "0:00:15", "throughput": 8697.87, "total_tokens": 137806888} +{"current_steps": 204470, "total_steps": 204665, "loss": 0.0, "lr": 5.587424493413984e-12, "epoch": 4.995236117557961, "percentage": 99.9, "elapsed_time": "4:24:04", "remaining_time": "0:00:15", "throughput": 8697.88, "total_tokens": 137810088} +{"current_steps": 204475, "total_steps": 204665, "loss": 0.0, "lr": 5.3059881951478254e-12, "epoch": 4.995358268389808, "percentage": 99.91, "elapsed_time": "4:24:04", "remaining_time": "0:00:14", "throughput": 8697.91, "total_tokens": 137813608} +{"current_steps": 204480, "total_steps": 204665, "loss": 0.0, "lr": 5.031824127255113e-12, "epoch": 4.995480419221655, "percentage": 99.91, "elapsed_time": "4:24:04", "remaining_time": "0:00:14", "throughput": 8697.93, "total_tokens": 137817064} +{"current_steps": 204485, "total_steps": 204665, "loss": 0.0, "lr": 4.764932291734247e-12, "epoch": 4.995602570053502, "percentage": 99.91, "elapsed_time": "4:24:05", "remaining_time": "0:00:13", "throughput": 8697.95, "total_tokens": 137820264} +{"current_steps": 204490, "total_steps": 204665, "loss": 0.0, "lr": 4.505312690583629e-12, "epoch": 4.995724720885349, "percentage": 99.91, "elapsed_time": "4:24:05", "remaining_time": "0:00:13", "throughput": 8697.99, "total_tokens": 137824040} +{"current_steps": 204495, "total_steps": 204665, "loss": 0.0, "lr": 4.252965325579616e-12, "epoch": 4.995846871717196, "percentage": 99.92, "elapsed_time": "4:24:05", "remaining_time": "0:00:13", "throughput": 8697.98, "total_tokens": 137826920} +{"current_steps": 204500, "total_steps": 204665, "loss": 0.0, "lr": 4.0078901987206096e-12, "epoch": 4.995969022549043, "percentage": 99.92, "elapsed_time": "4:24:06", "remaining_time": "0:00:12", "throughput": 8698.02, "total_tokens": 137830568} +{"current_steps": 204505, "total_steps": 204665, "loss": 0.0, "lr": 3.770087311560921e-12, "epoch": 4.996091173380891, "percentage": 99.92, "elapsed_time": "4:24:06", "remaining_time": "0:00:12", "throughput": 8698.05, "total_tokens": 137834088} +{"current_steps": 204510, "total_steps": 204665, "loss": 0.0, "lr": 3.5395566659879307e-12, "epoch": 4.996213324212738, "percentage": 99.92, "elapsed_time": "4:24:06", "remaining_time": "0:00:12", "throughput": 8698.05, "total_tokens": 137837160} +{"current_steps": 204515, "total_steps": 204665, "loss": 0.0, "lr": 3.3162982636669722e-12, "epoch": 4.996335475044585, "percentage": 99.93, "elapsed_time": "4:24:07", "remaining_time": "0:00:11", "throughput": 8698.07, "total_tokens": 137840424} +{"current_steps": 204520, "total_steps": 204665, "loss": 0.0, "lr": 3.1003121061523583e-12, "epoch": 4.996457625876432, "percentage": 99.93, "elapsed_time": "4:24:07", "remaining_time": "0:00:11", "throughput": 8698.1, "total_tokens": 137843944} +{"current_steps": 204525, "total_steps": 204665, "loss": 0.0, "lr": 2.8915981951094236e-12, "epoch": 4.99657977670828, "percentage": 99.93, "elapsed_time": "4:24:07", "remaining_time": "0:00:10", "throughput": 8698.11, "total_tokens": 137847080} +{"current_steps": 204530, "total_steps": 204665, "loss": 0.0, "lr": 2.690156531981458e-12, "epoch": 4.996701927540126, "percentage": 99.93, "elapsed_time": "4:24:08", "remaining_time": "0:00:10", "throughput": 8698.14, "total_tokens": 137850792} +{"current_steps": 204535, "total_steps": 204665, "loss": 0.0, "lr": 2.4959871183227732e-12, "epoch": 4.996824078371974, "percentage": 99.94, "elapsed_time": "4:24:08", "remaining_time": "0:00:10", "throughput": 8698.15, "total_tokens": 137853928} +{"current_steps": 204540, "total_steps": 204665, "loss": 0.0, "lr": 2.309089955354615e-12, "epoch": 4.996946229203821, "percentage": 99.94, "elapsed_time": "4:24:08", "remaining_time": "0:00:09", "throughput": 8698.16, "total_tokens": 137856936} +{"current_steps": 204545, "total_steps": 204665, "loss": 0.0, "lr": 2.1294650446312955e-12, "epoch": 4.997068380035668, "percentage": 99.94, "elapsed_time": "4:24:09", "remaining_time": "0:00:09", "throughput": 8698.19, "total_tokens": 137860584} +{"current_steps": 204550, "total_steps": 204665, "loss": 0.0, "lr": 1.9571123873740602e-12, "epoch": 4.997190530867515, "percentage": 99.94, "elapsed_time": "4:24:09", "remaining_time": "0:00:08", "throughput": 8698.21, "total_tokens": 137863976} +{"current_steps": 204555, "total_steps": 204665, "loss": 0.0, "lr": 1.7920319849151766e-12, "epoch": 4.997312681699363, "percentage": 99.95, "elapsed_time": "4:24:10", "remaining_time": "0:00:08", "throughput": 8698.24, "total_tokens": 137867496} +{"current_steps": 204560, "total_steps": 204665, "loss": 0.0, "lr": 1.6342238382538453e-12, "epoch": 4.9974348325312095, "percentage": 99.95, "elapsed_time": "4:24:10", "remaining_time": "0:00:08", "throughput": 8698.28, "total_tokens": 137871272} +{"current_steps": 204565, "total_steps": 204665, "loss": 0.0, "lr": 1.4836879488333564e-12, "epoch": 4.997556983363056, "percentage": 99.95, "elapsed_time": "4:24:10", "remaining_time": "0:00:07", "throughput": 8698.29, "total_tokens": 137874280} +{"current_steps": 204570, "total_steps": 204665, "loss": 0.0, "lr": 1.340424317430866e-12, "epoch": 4.997679134194904, "percentage": 99.95, "elapsed_time": "4:24:11", "remaining_time": "0:00:07", "throughput": 8698.31, "total_tokens": 137877672} +{"current_steps": 204575, "total_steps": 204665, "loss": 0.0, "lr": 1.2044329453786417e-12, "epoch": 4.9978012850267515, "percentage": 99.96, "elapsed_time": "4:24:11", "remaining_time": "0:00:06", "throughput": 8698.34, "total_tokens": 137881192} +{"current_steps": 204580, "total_steps": 204665, "loss": 0.0, "lr": 1.0757138334538396e-12, "epoch": 4.997923435858598, "percentage": 99.96, "elapsed_time": "4:24:11", "remaining_time": "0:00:06", "throughput": 8698.37, "total_tokens": 137884712} +{"current_steps": 204585, "total_steps": 204665, "loss": 0.0, "lr": 9.542669827666827e-13, "epoch": 4.998045586690445, "percentage": 99.96, "elapsed_time": "4:24:12", "remaining_time": "0:00:06", "throughput": 8698.37, "total_tokens": 137887720} +{"current_steps": 204590, "total_steps": 204665, "loss": 0.0, "lr": 8.400923939833049e-13, "epoch": 4.998167737522293, "percentage": 99.96, "elapsed_time": "4:24:12", "remaining_time": "0:00:05", "throughput": 8698.4, "total_tokens": 137891240} +{"current_steps": 204595, "total_steps": 204665, "loss": 0.0, "lr": 7.331900682139292e-13, "epoch": 4.998289888354139, "percentage": 99.97, "elapsed_time": "4:24:12", "remaining_time": "0:00:05", "throughput": 8698.42, "total_tokens": 137894696} +{"current_steps": 204600, "total_steps": 204665, "loss": 0.0, "lr": 6.335600059026447e-13, "epoch": 4.998412039185987, "percentage": 99.97, "elapsed_time": "4:24:13", "remaining_time": "0:00:05", "throughput": 8698.45, "total_tokens": 137898152} +{"current_steps": 204605, "total_steps": 204665, "loss": 0.0, "lr": 5.412022080486523e-13, "epoch": 4.998534190017834, "percentage": 99.97, "elapsed_time": "4:24:13", "remaining_time": "0:00:04", "throughput": 8698.46, "total_tokens": 137901288} +{"current_steps": 204610, "total_steps": 204665, "loss": 0.0, "lr": 4.561166752070633e-13, "epoch": 4.998656340849681, "percentage": 99.97, "elapsed_time": "4:24:13", "remaining_time": "0:00:04", "throughput": 8698.5, "total_tokens": 137905000} +{"current_steps": 204615, "total_steps": 204665, "loss": 0.0, "lr": 3.7830340804401173e-13, "epoch": 4.998778491681528, "percentage": 99.98, "elapsed_time": "4:24:14", "remaining_time": "0:00:03", "throughput": 8698.55, "total_tokens": 137909032} +{"current_steps": 204620, "total_steps": 204665, "loss": 0.0, "lr": 3.07762407114609e-13, "epoch": 4.998900642513376, "percentage": 99.98, "elapsed_time": "4:24:14", "remaining_time": "0:00:03", "throughput": 8698.56, "total_tokens": 137912168} +{"current_steps": 204625, "total_steps": 204665, "loss": 0.0, "lr": 2.444936728629443e-13, "epoch": 4.9990227933452225, "percentage": 99.98, "elapsed_time": "4:24:14", "remaining_time": "0:00:03", "throughput": 8698.58, "total_tokens": 137915560} +{"current_steps": 204630, "total_steps": 204665, "loss": 0.0, "lr": 1.884972058441292e-13, "epoch": 4.99914494417707, "percentage": 99.98, "elapsed_time": "4:24:15", "remaining_time": "0:00:02", "throughput": 8698.6, "total_tokens": 137918888} +{"current_steps": 204635, "total_steps": 204665, "loss": 0.0, "lr": 1.3977300639123058e-13, "epoch": 4.999267095008917, "percentage": 99.99, "elapsed_time": "4:24:15", "remaining_time": "0:00:02", "throughput": 8698.62, "total_tokens": 137922344} +{"current_steps": 204640, "total_steps": 204665, "loss": 0.0, "lr": 9.832107483731533e-14, "epoch": 4.9993892458407645, "percentage": 99.99, "elapsed_time": "4:24:16", "remaining_time": "0:00:01", "throughput": 8698.65, "total_tokens": 137925800} +{"current_steps": 204645, "total_steps": 204665, "loss": 0.0, "lr": 6.414141162647269e-14, "epoch": 4.999511396672611, "percentage": 99.99, "elapsed_time": "4:24:16", "remaining_time": "0:00:01", "throughput": 8698.68, "total_tokens": 137929256} +{"current_steps": 204650, "total_steps": 204665, "loss": 0.0, "lr": 3.723401686972494e-14, "epoch": 4.999633547504459, "percentage": 99.99, "elapsed_time": "4:24:16", "remaining_time": "0:00:01", "throughput": 8698.68, "total_tokens": 137932264} +{"current_steps": 204655, "total_steps": 204665, "loss": 0.0, "lr": 1.7598890900138997e-14, "epoch": 4.999755698336306, "percentage": 100.0, "elapsed_time": "4:24:17", "remaining_time": "0:00:00", "throughput": 8698.68, "total_tokens": 137935272} +{"current_steps": 204660, "total_steps": 204665, "loss": 0.0013, "lr": 5.236033606692558e-15, "epoch": 4.999877849168152, "percentage": 100.0, "elapsed_time": "4:24:17", "remaining_time": "0:00:00", "throughput": 8698.69, "total_tokens": 137938536} +{"current_steps": 204665, "total_steps": 204665, "loss": 0.0, "lr": 1.454454334748334e-16, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "4:24:17", "remaining_time": "0:00:00", "throughput": 8698.66, "total_tokens": 137941664} +{"current_steps": 204665, "total_steps": 204665, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "4:24:53", "remaining_time": "0:00:00", "throughput": 8679.19, "total_tokens": 137941664} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..fe0abf3 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,327679 @@ +{ + "best_global_step": 20468, + "best_metric": 0.11281616985797882, + "best_model_checkpoint": "saves_bts_preliminary/base/llama-3.2-1b-instruct/train_qqp_42_1779207273/checkpoint-20468", + "epoch": 5.0, + "eval_steps": 10234, + "global_step": 204665, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00012215083184716487, + "grad_norm": 436.6144714355469, + "learning_rate": 3.908731128157522e-10, + "loss": 1.5874, + "num_input_tokens_seen": 3200, + "step": 5 + }, + { + "epoch": 0.00024430166369432974, + "grad_norm": 601.8504028320312, + "learning_rate": 8.794645038354424e-10, + "loss": 1.5585, + "num_input_tokens_seen": 6528, + "step": 10 + }, + { + "epoch": 0.00036645249554149463, + "grad_norm": 531.4441528320312, + "learning_rate": 1.3680558948551327e-09, + "loss": 1.5632, + "num_input_tokens_seen": 9856, + "step": 15 + }, + { + "epoch": 0.0004886033273886595, + "grad_norm": 527.0687255859375, + "learning_rate": 1.8566472858748227e-09, + "loss": 1.6307, + "num_input_tokens_seen": 12864, + "step": 20 + }, + { + "epoch": 0.0006107541592358244, + "grad_norm": 550.139892578125, + "learning_rate": 2.345238676894513e-09, + "loss": 1.559, + "num_input_tokens_seen": 16768, + "step": 25 + }, + { + "epoch": 0.0007329049910829893, + "grad_norm": 502.5528259277344, + "learning_rate": 2.833830067914203e-09, + "loss": 1.604, + "num_input_tokens_seen": 19840, + "step": 30 + }, + { + "epoch": 0.0008550558229301541, + "grad_norm": 617.6123046875, + "learning_rate": 3.3224214589338933e-09, + "loss": 1.4891, + "num_input_tokens_seen": 23488, + "step": 35 + }, + { + "epoch": 0.000977206654777319, + "grad_norm": 457.3122863769531, + "learning_rate": 3.811012849953584e-09, + "loss": 1.63, + "num_input_tokens_seen": 26816, + "step": 40 + }, + { + "epoch": 0.001099357486624484, + "grad_norm": 443.7096252441406, + "learning_rate": 4.299604240973273e-09, + "loss": 1.6983, + "num_input_tokens_seen": 30464, + "step": 45 + }, + { + "epoch": 0.0012215083184716488, + "grad_norm": 616.4371948242188, + "learning_rate": 4.788195631992964e-09, + "loss": 1.6001, + "num_input_tokens_seen": 33856, + "step": 50 + }, + { + "epoch": 0.0013436591503188137, + "grad_norm": 567.6763305664062, + "learning_rate": 5.276787023012655e-09, + "loss": 1.6222, + "num_input_tokens_seen": 37248, + "step": 55 + }, + { + "epoch": 0.0014658099821659785, + "grad_norm": 493.2958984375, + "learning_rate": 5.7653784140323445e-09, + "loss": 1.3772, + "num_input_tokens_seen": 40832, + "step": 60 + }, + { + "epoch": 0.0015879608140131434, + "grad_norm": 565.9519653320312, + "learning_rate": 6.253969805052035e-09, + "loss": 1.7392, + "num_input_tokens_seen": 44032, + "step": 65 + }, + { + "epoch": 0.0017101116458603082, + "grad_norm": 430.8811340332031, + "learning_rate": 6.7425611960717245e-09, + "loss": 1.5064, + "num_input_tokens_seen": 46912, + "step": 70 + }, + { + "epoch": 0.0018322624777074733, + "grad_norm": 476.9154052734375, + "learning_rate": 7.231152587091415e-09, + "loss": 1.5169, + "num_input_tokens_seen": 49792, + "step": 75 + }, + { + "epoch": 0.001954413309554638, + "grad_norm": 484.8668518066406, + "learning_rate": 7.719743978111105e-09, + "loss": 1.6833, + "num_input_tokens_seen": 52800, + "step": 80 + }, + { + "epoch": 0.002076564141401803, + "grad_norm": 539.1660766601562, + "learning_rate": 8.208335369130795e-09, + "loss": 1.7706, + "num_input_tokens_seen": 56704, + "step": 85 + }, + { + "epoch": 0.002198714973248968, + "grad_norm": 551.7511596679688, + "learning_rate": 8.696926760150486e-09, + "loss": 1.5604, + "num_input_tokens_seen": 60096, + "step": 90 + }, + { + "epoch": 0.0023208658050961326, + "grad_norm": 461.3867492675781, + "learning_rate": 9.185518151170177e-09, + "loss": 1.5772, + "num_input_tokens_seen": 63296, + "step": 95 + }, + { + "epoch": 0.0024430166369432977, + "grad_norm": 624.7465209960938, + "learning_rate": 9.674109542189865e-09, + "loss": 1.4907, + "num_input_tokens_seen": 66816, + "step": 100 + }, + { + "epoch": 0.0025651674687904623, + "grad_norm": 368.129638671875, + "learning_rate": 1.0162700933209557e-08, + "loss": 1.5834, + "num_input_tokens_seen": 70272, + "step": 105 + }, + { + "epoch": 0.0026873183006376274, + "grad_norm": 483.0669860839844, + "learning_rate": 1.0651292324229246e-08, + "loss": 1.6402, + "num_input_tokens_seen": 73344, + "step": 110 + }, + { + "epoch": 0.0028094691324847924, + "grad_norm": 391.2715148925781, + "learning_rate": 1.1139883715248937e-08, + "loss": 1.3031, + "num_input_tokens_seen": 76480, + "step": 115 + }, + { + "epoch": 0.002931619964331957, + "grad_norm": 569.3486328125, + "learning_rate": 1.1628475106268627e-08, + "loss": 1.8053, + "num_input_tokens_seen": 79936, + "step": 120 + }, + { + "epoch": 0.003053770796179122, + "grad_norm": 522.3450317382812, + "learning_rate": 1.2117066497288317e-08, + "loss": 1.5657, + "num_input_tokens_seen": 83200, + "step": 125 + }, + { + "epoch": 0.0031759216280262867, + "grad_norm": 409.33636474609375, + "learning_rate": 1.2605657888308008e-08, + "loss": 1.4574, + "num_input_tokens_seen": 86720, + "step": 130 + }, + { + "epoch": 0.003298072459873452, + "grad_norm": 452.3154296875, + "learning_rate": 1.3094249279327697e-08, + "loss": 1.4738, + "num_input_tokens_seen": 89984, + "step": 135 + }, + { + "epoch": 0.0034202232917206164, + "grad_norm": 526.6522827148438, + "learning_rate": 1.3582840670347389e-08, + "loss": 1.5492, + "num_input_tokens_seen": 93120, + "step": 140 + }, + { + "epoch": 0.0035423741235677815, + "grad_norm": 398.2308654785156, + "learning_rate": 1.4071432061367078e-08, + "loss": 1.5314, + "num_input_tokens_seen": 97088, + "step": 145 + }, + { + "epoch": 0.0036645249554149465, + "grad_norm": 432.50579833984375, + "learning_rate": 1.4560023452386768e-08, + "loss": 1.3488, + "num_input_tokens_seen": 100160, + "step": 150 + }, + { + "epoch": 0.003786675787262111, + "grad_norm": 392.37359619140625, + "learning_rate": 1.504861484340646e-08, + "loss": 1.3233, + "num_input_tokens_seen": 103488, + "step": 155 + }, + { + "epoch": 0.003908826619109276, + "grad_norm": 406.7005920410156, + "learning_rate": 1.553720623442615e-08, + "loss": 1.4491, + "num_input_tokens_seen": 106816, + "step": 160 + }, + { + "epoch": 0.004030977450956441, + "grad_norm": 440.5775451660156, + "learning_rate": 1.6025797625445838e-08, + "loss": 1.327, + "num_input_tokens_seen": 110208, + "step": 165 + }, + { + "epoch": 0.004153128282803606, + "grad_norm": 404.6108093261719, + "learning_rate": 1.651438901646553e-08, + "loss": 1.3947, + "num_input_tokens_seen": 113408, + "step": 170 + }, + { + "epoch": 0.0042752791146507705, + "grad_norm": 452.1753234863281, + "learning_rate": 1.700298040748522e-08, + "loss": 1.2108, + "num_input_tokens_seen": 116288, + "step": 175 + }, + { + "epoch": 0.004397429946497936, + "grad_norm": 450.4519348144531, + "learning_rate": 1.749157179850491e-08, + "loss": 1.3352, + "num_input_tokens_seen": 119296, + "step": 180 + }, + { + "epoch": 0.004519580778345101, + "grad_norm": 426.2240295410156, + "learning_rate": 1.79801631895246e-08, + "loss": 1.3764, + "num_input_tokens_seen": 122624, + "step": 185 + }, + { + "epoch": 0.004641731610192265, + "grad_norm": 432.450439453125, + "learning_rate": 1.8468754580544288e-08, + "loss": 1.4257, + "num_input_tokens_seen": 126144, + "step": 190 + }, + { + "epoch": 0.00476388244203943, + "grad_norm": 438.7889709472656, + "learning_rate": 1.8957345971563982e-08, + "loss": 1.3153, + "num_input_tokens_seen": 131648, + "step": 195 + }, + { + "epoch": 0.004886033273886595, + "grad_norm": 564.0687866210938, + "learning_rate": 1.944593736258367e-08, + "loss": 1.3291, + "num_input_tokens_seen": 134912, + "step": 200 + }, + { + "epoch": 0.00500818410573376, + "grad_norm": 329.5206604003906, + "learning_rate": 1.9934528753603358e-08, + "loss": 1.0956, + "num_input_tokens_seen": 138304, + "step": 205 + }, + { + "epoch": 0.005130334937580925, + "grad_norm": 308.6362609863281, + "learning_rate": 2.0423120144623053e-08, + "loss": 0.9825, + "num_input_tokens_seen": 141888, + "step": 210 + }, + { + "epoch": 0.00525248576942809, + "grad_norm": 405.0592956542969, + "learning_rate": 2.091171153564274e-08, + "loss": 0.9918, + "num_input_tokens_seen": 145088, + "step": 215 + }, + { + "epoch": 0.005374636601275255, + "grad_norm": 299.959228515625, + "learning_rate": 2.1400302926662432e-08, + "loss": 0.9288, + "num_input_tokens_seen": 148480, + "step": 220 + }, + { + "epoch": 0.005496787433122419, + "grad_norm": 297.4017333984375, + "learning_rate": 2.1888894317682123e-08, + "loss": 0.8789, + "num_input_tokens_seen": 152256, + "step": 225 + }, + { + "epoch": 0.005618938264969585, + "grad_norm": 289.1470947265625, + "learning_rate": 2.237748570870181e-08, + "loss": 0.9644, + "num_input_tokens_seen": 155584, + "step": 230 + }, + { + "epoch": 0.0057410890968167495, + "grad_norm": 353.2586364746094, + "learning_rate": 2.2866077099721502e-08, + "loss": 0.9388, + "num_input_tokens_seen": 159104, + "step": 235 + }, + { + "epoch": 0.005863239928663914, + "grad_norm": 282.7951965332031, + "learning_rate": 2.335466849074119e-08, + "loss": 1.0616, + "num_input_tokens_seen": 162176, + "step": 240 + }, + { + "epoch": 0.005985390760511079, + "grad_norm": 192.14768981933594, + "learning_rate": 2.384325988176088e-08, + "loss": 0.7593, + "num_input_tokens_seen": 165440, + "step": 245 + }, + { + "epoch": 0.006107541592358244, + "grad_norm": 278.6020812988281, + "learning_rate": 2.4331851272780573e-08, + "loss": 0.8342, + "num_input_tokens_seen": 169088, + "step": 250 + }, + { + "epoch": 0.006229692424205409, + "grad_norm": 374.0990905761719, + "learning_rate": 2.482044266380026e-08, + "loss": 0.8074, + "num_input_tokens_seen": 172480, + "step": 255 + }, + { + "epoch": 0.0063518432560525735, + "grad_norm": 201.84979248046875, + "learning_rate": 2.5309034054819955e-08, + "loss": 0.7764, + "num_input_tokens_seen": 175680, + "step": 260 + }, + { + "epoch": 0.006473994087899739, + "grad_norm": 327.6642150878906, + "learning_rate": 2.5797625445839643e-08, + "loss": 0.7722, + "num_input_tokens_seen": 179008, + "step": 265 + }, + { + "epoch": 0.006596144919746904, + "grad_norm": 248.3802490234375, + "learning_rate": 2.628621683685933e-08, + "loss": 0.7188, + "num_input_tokens_seen": 182528, + "step": 270 + }, + { + "epoch": 0.006718295751594068, + "grad_norm": 121.84048461914062, + "learning_rate": 2.6774808227879026e-08, + "loss": 0.4984, + "num_input_tokens_seen": 186112, + "step": 275 + }, + { + "epoch": 0.006840446583441233, + "grad_norm": 130.44570922851562, + "learning_rate": 2.7263399618898714e-08, + "loss": 0.4016, + "num_input_tokens_seen": 189760, + "step": 280 + }, + { + "epoch": 0.006962597415288398, + "grad_norm": 161.82168579101562, + "learning_rate": 2.7751991009918405e-08, + "loss": 0.4292, + "num_input_tokens_seen": 193216, + "step": 285 + }, + { + "epoch": 0.007084748247135563, + "grad_norm": 110.50860595703125, + "learning_rate": 2.8240582400938093e-08, + "loss": 0.3719, + "num_input_tokens_seen": 196800, + "step": 290 + }, + { + "epoch": 0.007206899078982728, + "grad_norm": 31.40703010559082, + "learning_rate": 2.8729173791957784e-08, + "loss": 0.266, + "num_input_tokens_seen": 200000, + "step": 295 + }, + { + "epoch": 0.007329049910829893, + "grad_norm": 96.29593658447266, + "learning_rate": 2.9217765182977475e-08, + "loss": 0.3052, + "num_input_tokens_seen": 203392, + "step": 300 + }, + { + "epoch": 0.007451200742677058, + "grad_norm": 96.10907745361328, + "learning_rate": 2.9706356573997163e-08, + "loss": 0.3312, + "num_input_tokens_seen": 206656, + "step": 305 + }, + { + "epoch": 0.007573351574524222, + "grad_norm": 32.14572525024414, + "learning_rate": 3.0194947965016854e-08, + "loss": 0.2604, + "num_input_tokens_seen": 210368, + "step": 310 + }, + { + "epoch": 0.007695502406371387, + "grad_norm": 61.403629302978516, + "learning_rate": 3.068353935603654e-08, + "loss": 0.3638, + "num_input_tokens_seen": 213760, + "step": 315 + }, + { + "epoch": 0.007817653238218552, + "grad_norm": 41.64677047729492, + "learning_rate": 3.117213074705624e-08, + "loss": 0.2793, + "num_input_tokens_seen": 217152, + "step": 320 + }, + { + "epoch": 0.007939804070065717, + "grad_norm": 59.9798583984375, + "learning_rate": 3.1660722138075925e-08, + "loss": 0.2819, + "num_input_tokens_seen": 220928, + "step": 325 + }, + { + "epoch": 0.008061954901912883, + "grad_norm": 45.30350875854492, + "learning_rate": 3.214931352909562e-08, + "loss": 0.2614, + "num_input_tokens_seen": 225024, + "step": 330 + }, + { + "epoch": 0.008184105733760046, + "grad_norm": 82.58984375, + "learning_rate": 3.263790492011531e-08, + "loss": 0.2997, + "num_input_tokens_seen": 228416, + "step": 335 + }, + { + "epoch": 0.008306256565607212, + "grad_norm": 63.688331604003906, + "learning_rate": 3.3126496311134995e-08, + "loss": 0.2678, + "num_input_tokens_seen": 231552, + "step": 340 + }, + { + "epoch": 0.008428407397454377, + "grad_norm": 56.17790222167969, + "learning_rate": 3.361508770215469e-08, + "loss": 0.2656, + "num_input_tokens_seen": 234752, + "step": 345 + }, + { + "epoch": 0.008550558229301541, + "grad_norm": 21.489974975585938, + "learning_rate": 3.410367909317437e-08, + "loss": 0.2684, + "num_input_tokens_seen": 238016, + "step": 350 + }, + { + "epoch": 0.008672709061148707, + "grad_norm": 24.420082092285156, + "learning_rate": 3.4592270484194066e-08, + "loss": 0.3036, + "num_input_tokens_seen": 241536, + "step": 355 + }, + { + "epoch": 0.008794859892995872, + "grad_norm": 40.783447265625, + "learning_rate": 3.508086187521376e-08, + "loss": 0.2641, + "num_input_tokens_seen": 244928, + "step": 360 + }, + { + "epoch": 0.008917010724843036, + "grad_norm": 28.45875358581543, + "learning_rate": 3.556945326623344e-08, + "loss": 0.2976, + "num_input_tokens_seen": 248576, + "step": 365 + }, + { + "epoch": 0.009039161556690201, + "grad_norm": 139.9940185546875, + "learning_rate": 3.6058044657253136e-08, + "loss": 0.3249, + "num_input_tokens_seen": 251712, + "step": 370 + }, + { + "epoch": 0.009161312388537367, + "grad_norm": 37.76359939575195, + "learning_rate": 3.654663604827283e-08, + "loss": 0.255, + "num_input_tokens_seen": 255232, + "step": 375 + }, + { + "epoch": 0.00928346322038453, + "grad_norm": 60.50453567504883, + "learning_rate": 3.703522743929252e-08, + "loss": 0.3003, + "num_input_tokens_seen": 258880, + "step": 380 + }, + { + "epoch": 0.009405614052231696, + "grad_norm": 55.451576232910156, + "learning_rate": 3.7523818830312206e-08, + "loss": 0.2777, + "num_input_tokens_seen": 262336, + "step": 385 + }, + { + "epoch": 0.00952776488407886, + "grad_norm": 74.9093246459961, + "learning_rate": 3.80124102213319e-08, + "loss": 0.2748, + "num_input_tokens_seen": 265664, + "step": 390 + }, + { + "epoch": 0.009649915715926025, + "grad_norm": 64.74765014648438, + "learning_rate": 3.850100161235159e-08, + "loss": 0.2474, + "num_input_tokens_seen": 269056, + "step": 395 + }, + { + "epoch": 0.00977206654777319, + "grad_norm": 52.95093536376953, + "learning_rate": 3.898959300337128e-08, + "loss": 0.2575, + "num_input_tokens_seen": 272640, + "step": 400 + }, + { + "epoch": 0.009894217379620355, + "grad_norm": 86.30628204345703, + "learning_rate": 3.947818439439097e-08, + "loss": 0.2952, + "num_input_tokens_seen": 276032, + "step": 405 + }, + { + "epoch": 0.01001636821146752, + "grad_norm": 41.927181243896484, + "learning_rate": 3.996677578541066e-08, + "loss": 0.2455, + "num_input_tokens_seen": 279488, + "step": 410 + }, + { + "epoch": 0.010138519043314686, + "grad_norm": 36.422889709472656, + "learning_rate": 4.045536717643035e-08, + "loss": 0.2665, + "num_input_tokens_seen": 282752, + "step": 415 + }, + { + "epoch": 0.01026066987516185, + "grad_norm": 105.75531768798828, + "learning_rate": 4.094395856745004e-08, + "loss": 0.244, + "num_input_tokens_seen": 285952, + "step": 420 + }, + { + "epoch": 0.010382820707009015, + "grad_norm": 115.46588897705078, + "learning_rate": 4.143254995846973e-08, + "loss": 0.2549, + "num_input_tokens_seen": 289024, + "step": 425 + }, + { + "epoch": 0.01050497153885618, + "grad_norm": 55.69449234008789, + "learning_rate": 4.192114134948942e-08, + "loss": 0.2537, + "num_input_tokens_seen": 292352, + "step": 430 + }, + { + "epoch": 0.010627122370703344, + "grad_norm": 37.521759033203125, + "learning_rate": 4.240973274050911e-08, + "loss": 0.2425, + "num_input_tokens_seen": 295616, + "step": 435 + }, + { + "epoch": 0.01074927320255051, + "grad_norm": 53.086669921875, + "learning_rate": 4.28983241315288e-08, + "loss": 0.2613, + "num_input_tokens_seen": 299200, + "step": 440 + }, + { + "epoch": 0.010871424034397675, + "grad_norm": 27.426273345947266, + "learning_rate": 4.3386915522548495e-08, + "loss": 0.2972, + "num_input_tokens_seen": 302272, + "step": 445 + }, + { + "epoch": 0.010993574866244839, + "grad_norm": 28.601957321166992, + "learning_rate": 4.3875506913568176e-08, + "loss": 0.2497, + "num_input_tokens_seen": 305600, + "step": 450 + }, + { + "epoch": 0.011115725698092004, + "grad_norm": 83.53900146484375, + "learning_rate": 4.436409830458787e-08, + "loss": 0.2463, + "num_input_tokens_seen": 309760, + "step": 455 + }, + { + "epoch": 0.01123787652993917, + "grad_norm": 36.838340759277344, + "learning_rate": 4.4852689695607565e-08, + "loss": 0.2568, + "num_input_tokens_seen": 313856, + "step": 460 + }, + { + "epoch": 0.011360027361786334, + "grad_norm": 37.25075149536133, + "learning_rate": 4.5341281086627246e-08, + "loss": 0.2602, + "num_input_tokens_seen": 317696, + "step": 465 + }, + { + "epoch": 0.011482178193633499, + "grad_norm": 53.051753997802734, + "learning_rate": 4.582987247764694e-08, + "loss": 0.2595, + "num_input_tokens_seen": 320704, + "step": 470 + }, + { + "epoch": 0.011604329025480663, + "grad_norm": 37.922607421875, + "learning_rate": 4.6318463868666636e-08, + "loss": 0.2515, + "num_input_tokens_seen": 324160, + "step": 475 + }, + { + "epoch": 0.011726479857327828, + "grad_norm": 40.48796081542969, + "learning_rate": 4.680705525968632e-08, + "loss": 0.2352, + "num_input_tokens_seen": 327936, + "step": 480 + }, + { + "epoch": 0.011848630689174994, + "grad_norm": 31.205713272094727, + "learning_rate": 4.729564665070601e-08, + "loss": 0.2296, + "num_input_tokens_seen": 331264, + "step": 485 + }, + { + "epoch": 0.011970781521022157, + "grad_norm": 108.14125061035156, + "learning_rate": 4.7784238041725706e-08, + "loss": 0.2708, + "num_input_tokens_seen": 334592, + "step": 490 + }, + { + "epoch": 0.012092932352869323, + "grad_norm": 19.461109161376953, + "learning_rate": 4.8272829432745394e-08, + "loss": 0.2384, + "num_input_tokens_seen": 338688, + "step": 495 + }, + { + "epoch": 0.012215083184716488, + "grad_norm": 97.08906555175781, + "learning_rate": 4.876142082376508e-08, + "loss": 0.2568, + "num_input_tokens_seen": 341888, + "step": 500 + }, + { + "epoch": 0.012337234016563652, + "grad_norm": 49.344146728515625, + "learning_rate": 4.9250012214784776e-08, + "loss": 0.2555, + "num_input_tokens_seen": 345088, + "step": 505 + }, + { + "epoch": 0.012459384848410818, + "grad_norm": 32.22880172729492, + "learning_rate": 4.9738603605804464e-08, + "loss": 0.2275, + "num_input_tokens_seen": 348480, + "step": 510 + }, + { + "epoch": 0.012581535680257983, + "grad_norm": 68.86043548583984, + "learning_rate": 5.022719499682415e-08, + "loss": 0.2045, + "num_input_tokens_seen": 351936, + "step": 515 + }, + { + "epoch": 0.012703686512105147, + "grad_norm": 18.139911651611328, + "learning_rate": 5.071578638784385e-08, + "loss": 0.2334, + "num_input_tokens_seen": 355712, + "step": 520 + }, + { + "epoch": 0.012825837343952312, + "grad_norm": 17.971744537353516, + "learning_rate": 5.1204377778863535e-08, + "loss": 0.2546, + "num_input_tokens_seen": 359232, + "step": 525 + }, + { + "epoch": 0.012947988175799478, + "grad_norm": 152.04522705078125, + "learning_rate": 5.169296916988322e-08, + "loss": 0.2338, + "num_input_tokens_seen": 362624, + "step": 530 + }, + { + "epoch": 0.013070139007646642, + "grad_norm": 22.91851234436035, + "learning_rate": 5.218156056090292e-08, + "loss": 0.2105, + "num_input_tokens_seen": 366336, + "step": 535 + }, + { + "epoch": 0.013192289839493807, + "grad_norm": 29.61043357849121, + "learning_rate": 5.2670151951922605e-08, + "loss": 0.1841, + "num_input_tokens_seen": 370112, + "step": 540 + }, + { + "epoch": 0.013314440671340971, + "grad_norm": 27.556501388549805, + "learning_rate": 5.315874334294229e-08, + "loss": 0.1867, + "num_input_tokens_seen": 373120, + "step": 545 + }, + { + "epoch": 0.013436591503188136, + "grad_norm": 48.11063766479492, + "learning_rate": 5.364733473396198e-08, + "loss": 0.2397, + "num_input_tokens_seen": 376768, + "step": 550 + }, + { + "epoch": 0.013558742335035302, + "grad_norm": 16.25624656677246, + "learning_rate": 5.4135926124981675e-08, + "loss": 0.1875, + "num_input_tokens_seen": 380544, + "step": 555 + }, + { + "epoch": 0.013680893166882466, + "grad_norm": 26.067886352539062, + "learning_rate": 5.462451751600137e-08, + "loss": 0.224, + "num_input_tokens_seen": 383552, + "step": 560 + }, + { + "epoch": 0.013803043998729631, + "grad_norm": 61.1496467590332, + "learning_rate": 5.511310890702105e-08, + "loss": 0.2137, + "num_input_tokens_seen": 387456, + "step": 565 + }, + { + "epoch": 0.013925194830576797, + "grad_norm": 23.894100189208984, + "learning_rate": 5.5601700298040746e-08, + "loss": 0.1913, + "num_input_tokens_seen": 391616, + "step": 570 + }, + { + "epoch": 0.01404734566242396, + "grad_norm": 26.000904083251953, + "learning_rate": 5.609029168906044e-08, + "loss": 0.1981, + "num_input_tokens_seen": 395392, + "step": 575 + }, + { + "epoch": 0.014169496494271126, + "grad_norm": 56.60203170776367, + "learning_rate": 5.657888308008012e-08, + "loss": 0.1901, + "num_input_tokens_seen": 400384, + "step": 580 + }, + { + "epoch": 0.014291647326118291, + "grad_norm": 71.11356353759766, + "learning_rate": 5.7067474471099816e-08, + "loss": 0.2619, + "num_input_tokens_seen": 403392, + "step": 585 + }, + { + "epoch": 0.014413798157965455, + "grad_norm": 23.953935623168945, + "learning_rate": 5.755606586211951e-08, + "loss": 0.1605, + "num_input_tokens_seen": 406592, + "step": 590 + }, + { + "epoch": 0.01453594898981262, + "grad_norm": 69.51998138427734, + "learning_rate": 5.804465725313919e-08, + "loss": 0.1769, + "num_input_tokens_seen": 409920, + "step": 595 + }, + { + "epoch": 0.014658099821659786, + "grad_norm": 25.43825912475586, + "learning_rate": 5.853324864415889e-08, + "loss": 0.2082, + "num_input_tokens_seen": 413440, + "step": 600 + }, + { + "epoch": 0.01478025065350695, + "grad_norm": 39.317501068115234, + "learning_rate": 5.902184003517858e-08, + "loss": 0.1786, + "num_input_tokens_seen": 416896, + "step": 605 + }, + { + "epoch": 0.014902401485354115, + "grad_norm": 34.33527374267578, + "learning_rate": 5.951043142619827e-08, + "loss": 0.182, + "num_input_tokens_seen": 420416, + "step": 610 + }, + { + "epoch": 0.015024552317201281, + "grad_norm": 21.858205795288086, + "learning_rate": 5.999902281721795e-08, + "loss": 0.1692, + "num_input_tokens_seen": 423936, + "step": 615 + }, + { + "epoch": 0.015146703149048445, + "grad_norm": 32.28944396972656, + "learning_rate": 6.048761420823765e-08, + "loss": 0.2483, + "num_input_tokens_seen": 427776, + "step": 620 + }, + { + "epoch": 0.01526885398089561, + "grad_norm": 52.35483932495117, + "learning_rate": 6.097620559925734e-08, + "loss": 0.1979, + "num_input_tokens_seen": 431616, + "step": 625 + }, + { + "epoch": 0.015391004812742774, + "grad_norm": 16.09345245361328, + "learning_rate": 6.146479699027702e-08, + "loss": 0.2048, + "num_input_tokens_seen": 434944, + "step": 630 + }, + { + "epoch": 0.01551315564458994, + "grad_norm": 19.902141571044922, + "learning_rate": 6.195338838129672e-08, + "loss": 0.2058, + "num_input_tokens_seen": 438272, + "step": 635 + }, + { + "epoch": 0.015635306476437103, + "grad_norm": 20.941940307617188, + "learning_rate": 6.244197977231641e-08, + "loss": 0.2192, + "num_input_tokens_seen": 441792, + "step": 640 + }, + { + "epoch": 0.01575745730828427, + "grad_norm": 26.100826263427734, + "learning_rate": 6.293057116333609e-08, + "loss": 0.2148, + "num_input_tokens_seen": 444864, + "step": 645 + }, + { + "epoch": 0.015879608140131434, + "grad_norm": 78.05301666259766, + "learning_rate": 6.34191625543558e-08, + "loss": 0.153, + "num_input_tokens_seen": 448512, + "step": 650 + }, + { + "epoch": 0.0160017589719786, + "grad_norm": 32.38904571533203, + "learning_rate": 6.390775394537548e-08, + "loss": 0.1999, + "num_input_tokens_seen": 451456, + "step": 655 + }, + { + "epoch": 0.016123909803825765, + "grad_norm": 88.80257415771484, + "learning_rate": 6.439634533639516e-08, + "loss": 0.205, + "num_input_tokens_seen": 454912, + "step": 660 + }, + { + "epoch": 0.01624606063567293, + "grad_norm": 20.400436401367188, + "learning_rate": 6.488493672741487e-08, + "loss": 0.1934, + "num_input_tokens_seen": 458688, + "step": 665 + }, + { + "epoch": 0.016368211467520093, + "grad_norm": 59.74924850463867, + "learning_rate": 6.537352811843455e-08, + "loss": 0.1651, + "num_input_tokens_seen": 461504, + "step": 670 + }, + { + "epoch": 0.016490362299367258, + "grad_norm": 88.08880615234375, + "learning_rate": 6.586211950945423e-08, + "loss": 0.2466, + "num_input_tokens_seen": 464512, + "step": 675 + }, + { + "epoch": 0.016612513131214424, + "grad_norm": 25.69615936279297, + "learning_rate": 6.635071090047394e-08, + "loss": 0.2258, + "num_input_tokens_seen": 467584, + "step": 680 + }, + { + "epoch": 0.01673466396306159, + "grad_norm": 54.38239288330078, + "learning_rate": 6.683930229149362e-08, + "loss": 0.2461, + "num_input_tokens_seen": 470848, + "step": 685 + }, + { + "epoch": 0.016856814794908755, + "grad_norm": 136.9866943359375, + "learning_rate": 6.73278936825133e-08, + "loss": 0.216, + "num_input_tokens_seen": 473920, + "step": 690 + }, + { + "epoch": 0.016978965626755917, + "grad_norm": 44.14219665527344, + "learning_rate": 6.781648507353301e-08, + "loss": 0.2186, + "num_input_tokens_seen": 477184, + "step": 695 + }, + { + "epoch": 0.017101116458603082, + "grad_norm": 53.0874137878418, + "learning_rate": 6.830507646455269e-08, + "loss": 0.2767, + "num_input_tokens_seen": 482688, + "step": 700 + }, + { + "epoch": 0.017223267290450248, + "grad_norm": 41.03855514526367, + "learning_rate": 6.879366785557237e-08, + "loss": 0.1857, + "num_input_tokens_seen": 485888, + "step": 705 + }, + { + "epoch": 0.017345418122297413, + "grad_norm": 35.35697555541992, + "learning_rate": 6.928225924659208e-08, + "loss": 0.206, + "num_input_tokens_seen": 489536, + "step": 710 + }, + { + "epoch": 0.01746756895414458, + "grad_norm": 41.01392364501953, + "learning_rate": 6.977085063761176e-08, + "loss": 0.1713, + "num_input_tokens_seen": 493056, + "step": 715 + }, + { + "epoch": 0.017589719785991744, + "grad_norm": 80.70348358154297, + "learning_rate": 7.025944202863144e-08, + "loss": 0.2117, + "num_input_tokens_seen": 496256, + "step": 720 + }, + { + "epoch": 0.017711870617838906, + "grad_norm": 56.13608169555664, + "learning_rate": 7.074803341965115e-08, + "loss": 0.1537, + "num_input_tokens_seen": 500160, + "step": 725 + }, + { + "epoch": 0.01783402144968607, + "grad_norm": 78.34611511230469, + "learning_rate": 7.123662481067083e-08, + "loss": 0.2369, + "num_input_tokens_seen": 503488, + "step": 730 + }, + { + "epoch": 0.017956172281533237, + "grad_norm": 35.684146881103516, + "learning_rate": 7.172521620169051e-08, + "loss": 0.2371, + "num_input_tokens_seen": 506560, + "step": 735 + }, + { + "epoch": 0.018078323113380403, + "grad_norm": 27.17791748046875, + "learning_rate": 7.221380759271022e-08, + "loss": 0.1702, + "num_input_tokens_seen": 509888, + "step": 740 + }, + { + "epoch": 0.018200473945227568, + "grad_norm": 47.520076751708984, + "learning_rate": 7.27023989837299e-08, + "loss": 0.2225, + "num_input_tokens_seen": 512960, + "step": 745 + }, + { + "epoch": 0.018322624777074734, + "grad_norm": 33.98930358886719, + "learning_rate": 7.31909903747496e-08, + "loss": 0.2422, + "num_input_tokens_seen": 516416, + "step": 750 + }, + { + "epoch": 0.018444775608921896, + "grad_norm": 82.38633728027344, + "learning_rate": 7.367958176576929e-08, + "loss": 0.2441, + "num_input_tokens_seen": 519680, + "step": 755 + }, + { + "epoch": 0.01856692644076906, + "grad_norm": 45.27608108520508, + "learning_rate": 7.416817315678897e-08, + "loss": 0.1828, + "num_input_tokens_seen": 522944, + "step": 760 + }, + { + "epoch": 0.018689077272616227, + "grad_norm": 24.22097396850586, + "learning_rate": 7.465676454780867e-08, + "loss": 0.1292, + "num_input_tokens_seen": 526144, + "step": 765 + }, + { + "epoch": 0.018811228104463392, + "grad_norm": 79.6414566040039, + "learning_rate": 7.514535593882836e-08, + "loss": 0.1908, + "num_input_tokens_seen": 529152, + "step": 770 + }, + { + "epoch": 0.018933378936310558, + "grad_norm": 120.36722564697266, + "learning_rate": 7.563394732984804e-08, + "loss": 0.2619, + "num_input_tokens_seen": 532608, + "step": 775 + }, + { + "epoch": 0.01905552976815772, + "grad_norm": 141.484130859375, + "learning_rate": 7.612253872086774e-08, + "loss": 0.1843, + "num_input_tokens_seen": 536320, + "step": 780 + }, + { + "epoch": 0.019177680600004885, + "grad_norm": 50.55957794189453, + "learning_rate": 7.661113011188742e-08, + "loss": 0.1081, + "num_input_tokens_seen": 539520, + "step": 785 + }, + { + "epoch": 0.01929983143185205, + "grad_norm": 95.52667236328125, + "learning_rate": 7.709972150290711e-08, + "loss": 0.2077, + "num_input_tokens_seen": 542848, + "step": 790 + }, + { + "epoch": 0.019421982263699216, + "grad_norm": 58.965370178222656, + "learning_rate": 7.758831289392681e-08, + "loss": 0.1838, + "num_input_tokens_seen": 546304, + "step": 795 + }, + { + "epoch": 0.01954413309554638, + "grad_norm": 50.59172058105469, + "learning_rate": 7.807690428494649e-08, + "loss": 0.2594, + "num_input_tokens_seen": 549120, + "step": 800 + }, + { + "epoch": 0.019666283927393547, + "grad_norm": 71.519775390625, + "learning_rate": 7.856549567596618e-08, + "loss": 0.1731, + "num_input_tokens_seen": 552832, + "step": 805 + }, + { + "epoch": 0.01978843475924071, + "grad_norm": 29.882675170898438, + "learning_rate": 7.905408706698588e-08, + "loss": 0.1918, + "num_input_tokens_seen": 556224, + "step": 810 + }, + { + "epoch": 0.019910585591087875, + "grad_norm": 46.32589340209961, + "learning_rate": 7.954267845800556e-08, + "loss": 0.1981, + "num_input_tokens_seen": 559552, + "step": 815 + }, + { + "epoch": 0.02003273642293504, + "grad_norm": 21.45256233215332, + "learning_rate": 8.003126984902525e-08, + "loss": 0.1791, + "num_input_tokens_seen": 562752, + "step": 820 + }, + { + "epoch": 0.020154887254782206, + "grad_norm": 80.42591094970703, + "learning_rate": 8.051986124004495e-08, + "loss": 0.1723, + "num_input_tokens_seen": 566080, + "step": 825 + }, + { + "epoch": 0.02027703808662937, + "grad_norm": 31.974721908569336, + "learning_rate": 8.100845263106463e-08, + "loss": 0.2432, + "num_input_tokens_seen": 569408, + "step": 830 + }, + { + "epoch": 0.020399188918476537, + "grad_norm": 23.089448928833008, + "learning_rate": 8.149704402208433e-08, + "loss": 0.1827, + "num_input_tokens_seen": 572928, + "step": 835 + }, + { + "epoch": 0.0205213397503237, + "grad_norm": 53.952659606933594, + "learning_rate": 8.198563541310402e-08, + "loss": 0.256, + "num_input_tokens_seen": 576448, + "step": 840 + }, + { + "epoch": 0.020643490582170864, + "grad_norm": 49.31153106689453, + "learning_rate": 8.24742268041237e-08, + "loss": 0.2329, + "num_input_tokens_seen": 579712, + "step": 845 + }, + { + "epoch": 0.02076564141401803, + "grad_norm": 26.863073348999023, + "learning_rate": 8.29628181951434e-08, + "loss": 0.1903, + "num_input_tokens_seen": 583104, + "step": 850 + }, + { + "epoch": 0.020887792245865195, + "grad_norm": 176.17437744140625, + "learning_rate": 8.345140958616309e-08, + "loss": 0.2145, + "num_input_tokens_seen": 586560, + "step": 855 + }, + { + "epoch": 0.02100994307771236, + "grad_norm": 75.82144927978516, + "learning_rate": 8.394000097718277e-08, + "loss": 0.1631, + "num_input_tokens_seen": 590336, + "step": 860 + }, + { + "epoch": 0.021132093909559523, + "grad_norm": 50.85761260986328, + "learning_rate": 8.442859236820248e-08, + "loss": 0.2079, + "num_input_tokens_seen": 593728, + "step": 865 + }, + { + "epoch": 0.021254244741406688, + "grad_norm": 33.126644134521484, + "learning_rate": 8.491718375922216e-08, + "loss": 0.1904, + "num_input_tokens_seen": 597056, + "step": 870 + }, + { + "epoch": 0.021376395573253854, + "grad_norm": 38.85331344604492, + "learning_rate": 8.540577515024184e-08, + "loss": 0.1724, + "num_input_tokens_seen": 600384, + "step": 875 + }, + { + "epoch": 0.02149854640510102, + "grad_norm": 31.606834411621094, + "learning_rate": 8.589436654126155e-08, + "loss": 0.137, + "num_input_tokens_seen": 603968, + "step": 880 + }, + { + "epoch": 0.021620697236948185, + "grad_norm": 72.67549896240234, + "learning_rate": 8.638295793228123e-08, + "loss": 0.2714, + "num_input_tokens_seen": 607360, + "step": 885 + }, + { + "epoch": 0.02174284806879535, + "grad_norm": 35.970947265625, + "learning_rate": 8.687154932330091e-08, + "loss": 0.2029, + "num_input_tokens_seen": 611008, + "step": 890 + }, + { + "epoch": 0.021864998900642512, + "grad_norm": 43.83979034423828, + "learning_rate": 8.736014071432062e-08, + "loss": 0.267, + "num_input_tokens_seen": 614080, + "step": 895 + }, + { + "epoch": 0.021987149732489678, + "grad_norm": 43.2574577331543, + "learning_rate": 8.78487321053403e-08, + "loss": 0.2438, + "num_input_tokens_seen": 617408, + "step": 900 + }, + { + "epoch": 0.022109300564336843, + "grad_norm": 25.0458984375, + "learning_rate": 8.833732349635998e-08, + "loss": 0.2295, + "num_input_tokens_seen": 621056, + "step": 905 + }, + { + "epoch": 0.02223145139618401, + "grad_norm": 79.44489288330078, + "learning_rate": 8.882591488737969e-08, + "loss": 0.1759, + "num_input_tokens_seen": 624064, + "step": 910 + }, + { + "epoch": 0.022353602228031174, + "grad_norm": 126.1421127319336, + "learning_rate": 8.931450627839937e-08, + "loss": 0.1532, + "num_input_tokens_seen": 627520, + "step": 915 + }, + { + "epoch": 0.02247575305987834, + "grad_norm": 57.74498748779297, + "learning_rate": 8.980309766941905e-08, + "loss": 0.1624, + "num_input_tokens_seen": 631168, + "step": 920 + }, + { + "epoch": 0.0225979038917255, + "grad_norm": 91.72770690917969, + "learning_rate": 9.029168906043876e-08, + "loss": 0.1618, + "num_input_tokens_seen": 634240, + "step": 925 + }, + { + "epoch": 0.022720054723572667, + "grad_norm": 32.56623458862305, + "learning_rate": 9.078028045145844e-08, + "loss": 0.1677, + "num_input_tokens_seen": 637696, + "step": 930 + }, + { + "epoch": 0.022842205555419832, + "grad_norm": 51.39784622192383, + "learning_rate": 9.126887184247812e-08, + "loss": 0.1632, + "num_input_tokens_seen": 641216, + "step": 935 + }, + { + "epoch": 0.022964356387266998, + "grad_norm": 81.02871704101562, + "learning_rate": 9.175746323349783e-08, + "loss": 0.1576, + "num_input_tokens_seen": 644544, + "step": 940 + }, + { + "epoch": 0.023086507219114163, + "grad_norm": 54.13098907470703, + "learning_rate": 9.224605462451751e-08, + "loss": 0.2266, + "num_input_tokens_seen": 647552, + "step": 945 + }, + { + "epoch": 0.023208658050961326, + "grad_norm": 20.77579689025879, + "learning_rate": 9.27346460155372e-08, + "loss": 0.2041, + "num_input_tokens_seen": 651264, + "step": 950 + }, + { + "epoch": 0.02333080888280849, + "grad_norm": 88.34909057617188, + "learning_rate": 9.32232374065569e-08, + "loss": 0.1553, + "num_input_tokens_seen": 654528, + "step": 955 + }, + { + "epoch": 0.023452959714655656, + "grad_norm": 65.97145080566406, + "learning_rate": 9.371182879757658e-08, + "loss": 0.2058, + "num_input_tokens_seen": 658048, + "step": 960 + }, + { + "epoch": 0.023575110546502822, + "grad_norm": 106.45439147949219, + "learning_rate": 9.420042018859626e-08, + "loss": 0.1971, + "num_input_tokens_seen": 661312, + "step": 965 + }, + { + "epoch": 0.023697261378349987, + "grad_norm": 55.05768585205078, + "learning_rate": 9.468901157961596e-08, + "loss": 0.1805, + "num_input_tokens_seen": 664128, + "step": 970 + }, + { + "epoch": 0.023819412210197153, + "grad_norm": 78.96724700927734, + "learning_rate": 9.517760297063565e-08, + "loss": 0.2021, + "num_input_tokens_seen": 667328, + "step": 975 + }, + { + "epoch": 0.023941563042044315, + "grad_norm": 164.31275939941406, + "learning_rate": 9.566619436165535e-08, + "loss": 0.1853, + "num_input_tokens_seen": 670464, + "step": 980 + }, + { + "epoch": 0.02406371387389148, + "grad_norm": 54.7586555480957, + "learning_rate": 9.615478575267503e-08, + "loss": 0.1505, + "num_input_tokens_seen": 673984, + "step": 985 + }, + { + "epoch": 0.024185864705738646, + "grad_norm": 41.857547760009766, + "learning_rate": 9.664337714369472e-08, + "loss": 0.1624, + "num_input_tokens_seen": 677056, + "step": 990 + }, + { + "epoch": 0.02430801553758581, + "grad_norm": 81.85855865478516, + "learning_rate": 9.713196853471442e-08, + "loss": 0.1981, + "num_input_tokens_seen": 680704, + "step": 995 + }, + { + "epoch": 0.024430166369432977, + "grad_norm": 40.62771987915039, + "learning_rate": 9.76205599257341e-08, + "loss": 0.2078, + "num_input_tokens_seen": 683968, + "step": 1000 + }, + { + "epoch": 0.024552317201280142, + "grad_norm": 82.70243835449219, + "learning_rate": 9.81091513167538e-08, + "loss": 0.1137, + "num_input_tokens_seen": 687552, + "step": 1005 + }, + { + "epoch": 0.024674468033127304, + "grad_norm": 57.07538604736328, + "learning_rate": 9.859774270777349e-08, + "loss": 0.1055, + "num_input_tokens_seen": 690624, + "step": 1010 + }, + { + "epoch": 0.02479661886497447, + "grad_norm": 108.51544952392578, + "learning_rate": 9.908633409879317e-08, + "loss": 0.1542, + "num_input_tokens_seen": 694016, + "step": 1015 + }, + { + "epoch": 0.024918769696821635, + "grad_norm": 94.2655029296875, + "learning_rate": 9.957492548981286e-08, + "loss": 0.1864, + "num_input_tokens_seen": 697152, + "step": 1020 + }, + { + "epoch": 0.0250409205286688, + "grad_norm": 49.298973083496094, + "learning_rate": 1.0006351688083256e-07, + "loss": 0.2495, + "num_input_tokens_seen": 700672, + "step": 1025 + }, + { + "epoch": 0.025163071360515966, + "grad_norm": 151.5254669189453, + "learning_rate": 1.0055210827185224e-07, + "loss": 0.1592, + "num_input_tokens_seen": 704256, + "step": 1030 + }, + { + "epoch": 0.02528522219236313, + "grad_norm": 86.0088882446289, + "learning_rate": 1.0104069966287194e-07, + "loss": 0.1961, + "num_input_tokens_seen": 707712, + "step": 1035 + }, + { + "epoch": 0.025407373024210294, + "grad_norm": 58.221675872802734, + "learning_rate": 1.0152929105389163e-07, + "loss": 0.1893, + "num_input_tokens_seen": 711232, + "step": 1040 + }, + { + "epoch": 0.02552952385605746, + "grad_norm": 94.9822998046875, + "learning_rate": 1.0201788244491131e-07, + "loss": 0.1576, + "num_input_tokens_seen": 714560, + "step": 1045 + }, + { + "epoch": 0.025651674687904625, + "grad_norm": 23.00892448425293, + "learning_rate": 1.02506473835931e-07, + "loss": 0.1167, + "num_input_tokens_seen": 718144, + "step": 1050 + }, + { + "epoch": 0.02577382551975179, + "grad_norm": 64.81526184082031, + "learning_rate": 1.029950652269507e-07, + "loss": 0.1414, + "num_input_tokens_seen": 721600, + "step": 1055 + }, + { + "epoch": 0.025895976351598956, + "grad_norm": 143.27706909179688, + "learning_rate": 1.0348365661797038e-07, + "loss": 0.3008, + "num_input_tokens_seen": 724928, + "step": 1060 + }, + { + "epoch": 0.026018127183446118, + "grad_norm": 167.41702270507812, + "learning_rate": 1.0397224800899008e-07, + "loss": 0.218, + "num_input_tokens_seen": 728704, + "step": 1065 + }, + { + "epoch": 0.026140278015293283, + "grad_norm": 19.40850830078125, + "learning_rate": 1.0446083940000977e-07, + "loss": 0.1445, + "num_input_tokens_seen": 731968, + "step": 1070 + }, + { + "epoch": 0.02626242884714045, + "grad_norm": 110.09046936035156, + "learning_rate": 1.0494943079102945e-07, + "loss": 0.1742, + "num_input_tokens_seen": 734976, + "step": 1075 + }, + { + "epoch": 0.026384579678987614, + "grad_norm": 169.50914001464844, + "learning_rate": 1.0543802218204915e-07, + "loss": 0.2347, + "num_input_tokens_seen": 738176, + "step": 1080 + }, + { + "epoch": 0.02650673051083478, + "grad_norm": 37.02994155883789, + "learning_rate": 1.0592661357306884e-07, + "loss": 0.1286, + "num_input_tokens_seen": 742080, + "step": 1085 + }, + { + "epoch": 0.026628881342681942, + "grad_norm": 42.34803009033203, + "learning_rate": 1.0641520496408852e-07, + "loss": 0.1454, + "num_input_tokens_seen": 745152, + "step": 1090 + }, + { + "epoch": 0.026751032174529107, + "grad_norm": 60.0620231628418, + "learning_rate": 1.0690379635510823e-07, + "loss": 0.2185, + "num_input_tokens_seen": 748608, + "step": 1095 + }, + { + "epoch": 0.026873183006376273, + "grad_norm": 115.29000091552734, + "learning_rate": 1.0739238774612791e-07, + "loss": 0.2498, + "num_input_tokens_seen": 752192, + "step": 1100 + }, + { + "epoch": 0.02699533383822344, + "grad_norm": 37.201820373535156, + "learning_rate": 1.0788097913714759e-07, + "loss": 0.2033, + "num_input_tokens_seen": 756096, + "step": 1105 + }, + { + "epoch": 0.027117484670070604, + "grad_norm": 39.57706069946289, + "learning_rate": 1.083695705281673e-07, + "loss": 0.1271, + "num_input_tokens_seen": 759296, + "step": 1110 + }, + { + "epoch": 0.02723963550191777, + "grad_norm": 32.63840103149414, + "learning_rate": 1.0885816191918698e-07, + "loss": 0.1702, + "num_input_tokens_seen": 762368, + "step": 1115 + }, + { + "epoch": 0.02736178633376493, + "grad_norm": 236.14743041992188, + "learning_rate": 1.0934675331020666e-07, + "loss": 0.1896, + "num_input_tokens_seen": 765568, + "step": 1120 + }, + { + "epoch": 0.027483937165612097, + "grad_norm": 117.73133850097656, + "learning_rate": 1.0983534470122637e-07, + "loss": 0.1672, + "num_input_tokens_seen": 768960, + "step": 1125 + }, + { + "epoch": 0.027606087997459262, + "grad_norm": 47.201210021972656, + "learning_rate": 1.1032393609224605e-07, + "loss": 0.1767, + "num_input_tokens_seen": 772352, + "step": 1130 + }, + { + "epoch": 0.027728238829306428, + "grad_norm": 95.38451385498047, + "learning_rate": 1.1081252748326573e-07, + "loss": 0.2014, + "num_input_tokens_seen": 775616, + "step": 1135 + }, + { + "epoch": 0.027850389661153593, + "grad_norm": 70.23417663574219, + "learning_rate": 1.1130111887428544e-07, + "loss": 0.2096, + "num_input_tokens_seen": 778880, + "step": 1140 + }, + { + "epoch": 0.02797254049300076, + "grad_norm": 115.88958740234375, + "learning_rate": 1.1178971026530512e-07, + "loss": 0.161, + "num_input_tokens_seen": 781824, + "step": 1145 + }, + { + "epoch": 0.02809469132484792, + "grad_norm": 71.50474548339844, + "learning_rate": 1.122783016563248e-07, + "loss": 0.1846, + "num_input_tokens_seen": 785152, + "step": 1150 + }, + { + "epoch": 0.028216842156695086, + "grad_norm": 93.5502700805664, + "learning_rate": 1.1276689304734451e-07, + "loss": 0.2438, + "num_input_tokens_seen": 788160, + "step": 1155 + }, + { + "epoch": 0.028338992988542252, + "grad_norm": 137.8145294189453, + "learning_rate": 1.1325548443836419e-07, + "loss": 0.1536, + "num_input_tokens_seen": 791808, + "step": 1160 + }, + { + "epoch": 0.028461143820389417, + "grad_norm": 121.41155242919922, + "learning_rate": 1.1374407582938387e-07, + "loss": 0.1637, + "num_input_tokens_seen": 795392, + "step": 1165 + }, + { + "epoch": 0.028583294652236583, + "grad_norm": 95.23970031738281, + "learning_rate": 1.1423266722040357e-07, + "loss": 0.2223, + "num_input_tokens_seen": 799168, + "step": 1170 + }, + { + "epoch": 0.028705445484083745, + "grad_norm": 89.20221710205078, + "learning_rate": 1.1472125861142326e-07, + "loss": 0.2553, + "num_input_tokens_seen": 802624, + "step": 1175 + }, + { + "epoch": 0.02882759631593091, + "grad_norm": 78.99765014648438, + "learning_rate": 1.1520985000244294e-07, + "loss": 0.1225, + "num_input_tokens_seen": 806400, + "step": 1180 + }, + { + "epoch": 0.028949747147778076, + "grad_norm": 81.3134994506836, + "learning_rate": 1.1569844139346264e-07, + "loss": 0.1684, + "num_input_tokens_seen": 810048, + "step": 1185 + }, + { + "epoch": 0.02907189797962524, + "grad_norm": 69.7149429321289, + "learning_rate": 1.1618703278448233e-07, + "loss": 0.1959, + "num_input_tokens_seen": 813248, + "step": 1190 + }, + { + "epoch": 0.029194048811472407, + "grad_norm": 25.824722290039062, + "learning_rate": 1.1667562417550202e-07, + "loss": 0.1737, + "num_input_tokens_seen": 816320, + "step": 1195 + }, + { + "epoch": 0.029316199643319572, + "grad_norm": 26.50715446472168, + "learning_rate": 1.1716421556652171e-07, + "loss": 0.1948, + "num_input_tokens_seen": 819584, + "step": 1200 + }, + { + "epoch": 0.029438350475166734, + "grad_norm": 107.89574432373047, + "learning_rate": 1.176528069575414e-07, + "loss": 0.1995, + "num_input_tokens_seen": 822400, + "step": 1205 + }, + { + "epoch": 0.0295605013070139, + "grad_norm": 28.458755493164062, + "learning_rate": 1.181413983485611e-07, + "loss": 0.1063, + "num_input_tokens_seen": 825728, + "step": 1210 + }, + { + "epoch": 0.029682652138861065, + "grad_norm": 87.51367950439453, + "learning_rate": 1.1862998973958078e-07, + "loss": 0.2259, + "num_input_tokens_seen": 828992, + "step": 1215 + }, + { + "epoch": 0.02980480297070823, + "grad_norm": 91.14592742919922, + "learning_rate": 1.1911858113060047e-07, + "loss": 0.2222, + "num_input_tokens_seen": 831936, + "step": 1220 + }, + { + "epoch": 0.029926953802555396, + "grad_norm": 154.0288848876953, + "learning_rate": 1.1960717252162018e-07, + "loss": 0.2204, + "num_input_tokens_seen": 835520, + "step": 1225 + }, + { + "epoch": 0.030049104634402562, + "grad_norm": 110.25627136230469, + "learning_rate": 1.2009576391263986e-07, + "loss": 0.147, + "num_input_tokens_seen": 838976, + "step": 1230 + }, + { + "epoch": 0.030171255466249724, + "grad_norm": 39.7508659362793, + "learning_rate": 1.2058435530365955e-07, + "loss": 0.2507, + "num_input_tokens_seen": 842368, + "step": 1235 + }, + { + "epoch": 0.03029340629809689, + "grad_norm": 153.55630493164062, + "learning_rate": 1.2107294669467925e-07, + "loss": 0.1361, + "num_input_tokens_seen": 845632, + "step": 1240 + }, + { + "epoch": 0.030415557129944055, + "grad_norm": 138.62791442871094, + "learning_rate": 1.2156153808569893e-07, + "loss": 0.1966, + "num_input_tokens_seen": 848832, + "step": 1245 + }, + { + "epoch": 0.03053770796179122, + "grad_norm": 40.55439758300781, + "learning_rate": 1.2205012947671862e-07, + "loss": 0.1246, + "num_input_tokens_seen": 852416, + "step": 1250 + }, + { + "epoch": 0.030659858793638386, + "grad_norm": 140.769775390625, + "learning_rate": 1.225387208677383e-07, + "loss": 0.1815, + "num_input_tokens_seen": 855808, + "step": 1255 + }, + { + "epoch": 0.030782009625485548, + "grad_norm": 133.49143981933594, + "learning_rate": 1.23027312258758e-07, + "loss": 0.1261, + "num_input_tokens_seen": 859008, + "step": 1260 + }, + { + "epoch": 0.030904160457332713, + "grad_norm": 30.480289459228516, + "learning_rate": 1.2351590364977769e-07, + "loss": 0.0881, + "num_input_tokens_seen": 862976, + "step": 1265 + }, + { + "epoch": 0.03102631128917988, + "grad_norm": 44.4013671875, + "learning_rate": 1.2400449504079737e-07, + "loss": 0.2887, + "num_input_tokens_seen": 866240, + "step": 1270 + }, + { + "epoch": 0.031148462121027044, + "grad_norm": 83.47310638427734, + "learning_rate": 1.2449308643181707e-07, + "loss": 0.202, + "num_input_tokens_seen": 869376, + "step": 1275 + }, + { + "epoch": 0.031270612952874206, + "grad_norm": 83.847900390625, + "learning_rate": 1.2498167782283676e-07, + "loss": 0.1258, + "num_input_tokens_seen": 872896, + "step": 1280 + }, + { + "epoch": 0.031392763784721375, + "grad_norm": 147.70680236816406, + "learning_rate": 1.2547026921385644e-07, + "loss": 0.1506, + "num_input_tokens_seen": 876416, + "step": 1285 + }, + { + "epoch": 0.03151491461656854, + "grad_norm": 115.61612701416016, + "learning_rate": 1.2595886060487615e-07, + "loss": 0.2235, + "num_input_tokens_seen": 879680, + "step": 1290 + }, + { + "epoch": 0.031637065448415706, + "grad_norm": 119.4736328125, + "learning_rate": 1.2644745199589583e-07, + "loss": 0.1283, + "num_input_tokens_seen": 883008, + "step": 1295 + }, + { + "epoch": 0.03175921628026287, + "grad_norm": 43.5504035949707, + "learning_rate": 1.269360433869155e-07, + "loss": 0.2082, + "num_input_tokens_seen": 886848, + "step": 1300 + }, + { + "epoch": 0.03188136711211003, + "grad_norm": 124.5676040649414, + "learning_rate": 1.2742463477793522e-07, + "loss": 0.1701, + "num_input_tokens_seen": 890176, + "step": 1305 + }, + { + "epoch": 0.0320035179439572, + "grad_norm": 39.65419006347656, + "learning_rate": 1.279132261689549e-07, + "loss": 0.1525, + "num_input_tokens_seen": 893184, + "step": 1310 + }, + { + "epoch": 0.03212566877580436, + "grad_norm": 131.31710815429688, + "learning_rate": 1.2840181755997458e-07, + "loss": 0.1911, + "num_input_tokens_seen": 896448, + "step": 1315 + }, + { + "epoch": 0.03224781960765153, + "grad_norm": 267.3889465332031, + "learning_rate": 1.2889040895099429e-07, + "loss": 0.2272, + "num_input_tokens_seen": 899840, + "step": 1320 + }, + { + "epoch": 0.03236997043949869, + "grad_norm": 104.76681518554688, + "learning_rate": 1.2937900034201397e-07, + "loss": 0.1963, + "num_input_tokens_seen": 902912, + "step": 1325 + }, + { + "epoch": 0.03249212127134586, + "grad_norm": 31.133392333984375, + "learning_rate": 1.2986759173303365e-07, + "loss": 0.1968, + "num_input_tokens_seen": 906688, + "step": 1330 + }, + { + "epoch": 0.03261427210319302, + "grad_norm": 158.30206298828125, + "learning_rate": 1.3035618312405336e-07, + "loss": 0.3204, + "num_input_tokens_seen": 910272, + "step": 1335 + }, + { + "epoch": 0.032736422935040185, + "grad_norm": 117.29009246826172, + "learning_rate": 1.3084477451507304e-07, + "loss": 0.1802, + "num_input_tokens_seen": 913472, + "step": 1340 + }, + { + "epoch": 0.032858573766887354, + "grad_norm": 35.10454559326172, + "learning_rate": 1.3133336590609272e-07, + "loss": 0.1534, + "num_input_tokens_seen": 916800, + "step": 1345 + }, + { + "epoch": 0.032980724598734516, + "grad_norm": 66.56099700927734, + "learning_rate": 1.3182195729711243e-07, + "loss": 0.1086, + "num_input_tokens_seen": 920384, + "step": 1350 + }, + { + "epoch": 0.033102875430581685, + "grad_norm": 26.81028175354004, + "learning_rate": 1.323105486881321e-07, + "loss": 0.1473, + "num_input_tokens_seen": 923456, + "step": 1355 + }, + { + "epoch": 0.03322502626242885, + "grad_norm": 123.34690856933594, + "learning_rate": 1.327991400791518e-07, + "loss": 0.1892, + "num_input_tokens_seen": 926848, + "step": 1360 + }, + { + "epoch": 0.03334717709427601, + "grad_norm": 27.120784759521484, + "learning_rate": 1.332877314701715e-07, + "loss": 0.1372, + "num_input_tokens_seen": 930560, + "step": 1365 + }, + { + "epoch": 0.03346932792612318, + "grad_norm": 70.49962615966797, + "learning_rate": 1.3377632286119118e-07, + "loss": 0.2135, + "num_input_tokens_seen": 934144, + "step": 1370 + }, + { + "epoch": 0.03359147875797034, + "grad_norm": 45.35801696777344, + "learning_rate": 1.3426491425221086e-07, + "loss": 0.1484, + "num_input_tokens_seen": 937088, + "step": 1375 + }, + { + "epoch": 0.03371362958981751, + "grad_norm": 63.658634185791016, + "learning_rate": 1.3475350564323057e-07, + "loss": 0.2285, + "num_input_tokens_seen": 940544, + "step": 1380 + }, + { + "epoch": 0.03383578042166467, + "grad_norm": 62.41775131225586, + "learning_rate": 1.3524209703425025e-07, + "loss": 0.1582, + "num_input_tokens_seen": 944192, + "step": 1385 + }, + { + "epoch": 0.03395793125351183, + "grad_norm": 59.16685104370117, + "learning_rate": 1.3573068842526993e-07, + "loss": 0.1455, + "num_input_tokens_seen": 947712, + "step": 1390 + }, + { + "epoch": 0.034080082085359, + "grad_norm": 36.50937271118164, + "learning_rate": 1.3621927981628964e-07, + "loss": 0.193, + "num_input_tokens_seen": 950784, + "step": 1395 + }, + { + "epoch": 0.034202232917206164, + "grad_norm": 17.540971755981445, + "learning_rate": 1.3670787120730932e-07, + "loss": 0.1433, + "num_input_tokens_seen": 954432, + "step": 1400 + }, + { + "epoch": 0.03432438374905333, + "grad_norm": 82.31527709960938, + "learning_rate": 1.37196462598329e-07, + "loss": 0.1772, + "num_input_tokens_seen": 957504, + "step": 1405 + }, + { + "epoch": 0.034446534580900495, + "grad_norm": 201.3554229736328, + "learning_rate": 1.376850539893487e-07, + "loss": 0.1693, + "num_input_tokens_seen": 960448, + "step": 1410 + }, + { + "epoch": 0.034568685412747664, + "grad_norm": 87.75347137451172, + "learning_rate": 1.381736453803684e-07, + "loss": 0.1895, + "num_input_tokens_seen": 963840, + "step": 1415 + }, + { + "epoch": 0.034690836244594826, + "grad_norm": 111.88566589355469, + "learning_rate": 1.3866223677138807e-07, + "loss": 0.174, + "num_input_tokens_seen": 967232, + "step": 1420 + }, + { + "epoch": 0.03481298707644199, + "grad_norm": 114.49220275878906, + "learning_rate": 1.3915082816240778e-07, + "loss": 0.2, + "num_input_tokens_seen": 971008, + "step": 1425 + }, + { + "epoch": 0.03493513790828916, + "grad_norm": 30.848787307739258, + "learning_rate": 1.3963941955342746e-07, + "loss": 0.1766, + "num_input_tokens_seen": 974784, + "step": 1430 + }, + { + "epoch": 0.03505728874013632, + "grad_norm": 77.15776062011719, + "learning_rate": 1.4012801094444714e-07, + "loss": 0.243, + "num_input_tokens_seen": 977792, + "step": 1435 + }, + { + "epoch": 0.03517943957198349, + "grad_norm": 58.27136993408203, + "learning_rate": 1.4061660233546685e-07, + "loss": 0.14, + "num_input_tokens_seen": 981056, + "step": 1440 + }, + { + "epoch": 0.03530159040383065, + "grad_norm": 48.294612884521484, + "learning_rate": 1.4110519372648653e-07, + "loss": 0.1969, + "num_input_tokens_seen": 984192, + "step": 1445 + }, + { + "epoch": 0.03542374123567781, + "grad_norm": 29.22694969177246, + "learning_rate": 1.415937851175062e-07, + "loss": 0.1798, + "num_input_tokens_seen": 987520, + "step": 1450 + }, + { + "epoch": 0.03554589206752498, + "grad_norm": 66.20771789550781, + "learning_rate": 1.4208237650852592e-07, + "loss": 0.1784, + "num_input_tokens_seen": 991168, + "step": 1455 + }, + { + "epoch": 0.03566804289937214, + "grad_norm": 78.16349029541016, + "learning_rate": 1.425709678995456e-07, + "loss": 0.1598, + "num_input_tokens_seen": 994496, + "step": 1460 + }, + { + "epoch": 0.03579019373121931, + "grad_norm": 176.6148223876953, + "learning_rate": 1.4305955929056528e-07, + "loss": 0.1563, + "num_input_tokens_seen": 998208, + "step": 1465 + }, + { + "epoch": 0.035912344563066474, + "grad_norm": 46.208099365234375, + "learning_rate": 1.43548150681585e-07, + "loss": 0.159, + "num_input_tokens_seen": 1001984, + "step": 1470 + }, + { + "epoch": 0.036034495394913636, + "grad_norm": 50.82776641845703, + "learning_rate": 1.4403674207260467e-07, + "loss": 0.229, + "num_input_tokens_seen": 1004992, + "step": 1475 + }, + { + "epoch": 0.036156646226760805, + "grad_norm": 77.68282318115234, + "learning_rate": 1.4452533346362435e-07, + "loss": 0.1666, + "num_input_tokens_seen": 1008320, + "step": 1480 + }, + { + "epoch": 0.03627879705860797, + "grad_norm": 63.0606575012207, + "learning_rate": 1.4501392485464406e-07, + "loss": 0.1998, + "num_input_tokens_seen": 1011776, + "step": 1485 + }, + { + "epoch": 0.036400947890455136, + "grad_norm": 21.415904998779297, + "learning_rate": 1.4550251624566374e-07, + "loss": 0.1194, + "num_input_tokens_seen": 1014976, + "step": 1490 + }, + { + "epoch": 0.0365230987223023, + "grad_norm": 95.43403625488281, + "learning_rate": 1.4599110763668342e-07, + "loss": 0.1989, + "num_input_tokens_seen": 1018496, + "step": 1495 + }, + { + "epoch": 0.03664524955414947, + "grad_norm": 77.62108612060547, + "learning_rate": 1.4647969902770313e-07, + "loss": 0.1353, + "num_input_tokens_seen": 1021696, + "step": 1500 + }, + { + "epoch": 0.03676740038599663, + "grad_norm": 140.04039001464844, + "learning_rate": 1.4696829041872284e-07, + "loss": 0.2075, + "num_input_tokens_seen": 1025344, + "step": 1505 + }, + { + "epoch": 0.03688955121784379, + "grad_norm": 73.93856048583984, + "learning_rate": 1.474568818097425e-07, + "loss": 0.1738, + "num_input_tokens_seen": 1028672, + "step": 1510 + }, + { + "epoch": 0.03701170204969096, + "grad_norm": 157.8907928466797, + "learning_rate": 1.479454732007622e-07, + "loss": 0.1116, + "num_input_tokens_seen": 1031936, + "step": 1515 + }, + { + "epoch": 0.03713385288153812, + "grad_norm": 80.8382568359375, + "learning_rate": 1.484340645917819e-07, + "loss": 0.1733, + "num_input_tokens_seen": 1035200, + "step": 1520 + }, + { + "epoch": 0.03725600371338529, + "grad_norm": 90.218505859375, + "learning_rate": 1.4892265598280156e-07, + "loss": 0.1415, + "num_input_tokens_seen": 1038656, + "step": 1525 + }, + { + "epoch": 0.03737815454523245, + "grad_norm": 52.294525146484375, + "learning_rate": 1.4941124737382127e-07, + "loss": 0.138, + "num_input_tokens_seen": 1041984, + "step": 1530 + }, + { + "epoch": 0.037500305377079615, + "grad_norm": 103.6115951538086, + "learning_rate": 1.4989983876484098e-07, + "loss": 0.1636, + "num_input_tokens_seen": 1045184, + "step": 1535 + }, + { + "epoch": 0.037622456208926784, + "grad_norm": 96.00193786621094, + "learning_rate": 1.5038843015586063e-07, + "loss": 0.1178, + "num_input_tokens_seen": 1048640, + "step": 1540 + }, + { + "epoch": 0.037744607040773946, + "grad_norm": 29.498638153076172, + "learning_rate": 1.5087702154688034e-07, + "loss": 0.1089, + "num_input_tokens_seen": 1052352, + "step": 1545 + }, + { + "epoch": 0.037866757872621115, + "grad_norm": 93.26831817626953, + "learning_rate": 1.5136561293790005e-07, + "loss": 0.1198, + "num_input_tokens_seen": 1056192, + "step": 1550 + }, + { + "epoch": 0.03798890870446828, + "grad_norm": 136.19781494140625, + "learning_rate": 1.518542043289197e-07, + "loss": 0.2486, + "num_input_tokens_seen": 1059392, + "step": 1555 + }, + { + "epoch": 0.03811105953631544, + "grad_norm": 156.92518615722656, + "learning_rate": 1.523427957199394e-07, + "loss": 0.1491, + "num_input_tokens_seen": 1062400, + "step": 1560 + }, + { + "epoch": 0.03823321036816261, + "grad_norm": 77.09222412109375, + "learning_rate": 1.5283138711095912e-07, + "loss": 0.1352, + "num_input_tokens_seen": 1065792, + "step": 1565 + }, + { + "epoch": 0.03835536120000977, + "grad_norm": 208.2629852294922, + "learning_rate": 1.5331997850197878e-07, + "loss": 0.2562, + "num_input_tokens_seen": 1068864, + "step": 1570 + }, + { + "epoch": 0.03847751203185694, + "grad_norm": 46.75325012207031, + "learning_rate": 1.5380856989299848e-07, + "loss": 0.1579, + "num_input_tokens_seen": 1071872, + "step": 1575 + }, + { + "epoch": 0.0385996628637041, + "grad_norm": 71.7177734375, + "learning_rate": 1.542971612840182e-07, + "loss": 0.1577, + "num_input_tokens_seen": 1074880, + "step": 1580 + }, + { + "epoch": 0.03872181369555127, + "grad_norm": 79.63215637207031, + "learning_rate": 1.5478575267503785e-07, + "loss": 0.1595, + "num_input_tokens_seen": 1079168, + "step": 1585 + }, + { + "epoch": 0.03884396452739843, + "grad_norm": 233.81239318847656, + "learning_rate": 1.5527434406605755e-07, + "loss": 0.2213, + "num_input_tokens_seen": 1082496, + "step": 1590 + }, + { + "epoch": 0.038966115359245594, + "grad_norm": 154.5967559814453, + "learning_rate": 1.5576293545707726e-07, + "loss": 0.1883, + "num_input_tokens_seen": 1085824, + "step": 1595 + }, + { + "epoch": 0.03908826619109276, + "grad_norm": 81.72132873535156, + "learning_rate": 1.5625152684809692e-07, + "loss": 0.1329, + "num_input_tokens_seen": 1089920, + "step": 1600 + }, + { + "epoch": 0.039210417022939925, + "grad_norm": 64.67450714111328, + "learning_rate": 1.5674011823911662e-07, + "loss": 0.1146, + "num_input_tokens_seen": 1093760, + "step": 1605 + }, + { + "epoch": 0.039332567854787094, + "grad_norm": 65.7629165649414, + "learning_rate": 1.5722870963013633e-07, + "loss": 0.1238, + "num_input_tokens_seen": 1097216, + "step": 1610 + }, + { + "epoch": 0.039454718686634256, + "grad_norm": 99.66740417480469, + "learning_rate": 1.5771730102115599e-07, + "loss": 0.1502, + "num_input_tokens_seen": 1100608, + "step": 1615 + }, + { + "epoch": 0.03957686951848142, + "grad_norm": 82.75312805175781, + "learning_rate": 1.582058924121757e-07, + "loss": 0.1395, + "num_input_tokens_seen": 1104064, + "step": 1620 + }, + { + "epoch": 0.03969902035032859, + "grad_norm": 49.08173370361328, + "learning_rate": 1.586944838031954e-07, + "loss": 0.1604, + "num_input_tokens_seen": 1107584, + "step": 1625 + }, + { + "epoch": 0.03982117118217575, + "grad_norm": 43.87813949584961, + "learning_rate": 1.5918307519421506e-07, + "loss": 0.0803, + "num_input_tokens_seen": 1110656, + "step": 1630 + }, + { + "epoch": 0.03994332201402292, + "grad_norm": 122.28240203857422, + "learning_rate": 1.5967166658523476e-07, + "loss": 0.158, + "num_input_tokens_seen": 1113792, + "step": 1635 + }, + { + "epoch": 0.04006547284587008, + "grad_norm": 71.87992858886719, + "learning_rate": 1.6016025797625445e-07, + "loss": 0.1535, + "num_input_tokens_seen": 1117440, + "step": 1640 + }, + { + "epoch": 0.04018762367771724, + "grad_norm": 49.055503845214844, + "learning_rate": 1.6064884936727413e-07, + "loss": 0.1075, + "num_input_tokens_seen": 1120960, + "step": 1645 + }, + { + "epoch": 0.04030977450956441, + "grad_norm": 38.739166259765625, + "learning_rate": 1.6113744075829384e-07, + "loss": 0.063, + "num_input_tokens_seen": 1124032, + "step": 1650 + }, + { + "epoch": 0.04043192534141157, + "grad_norm": 29.723430633544922, + "learning_rate": 1.6162603214931352e-07, + "loss": 0.0726, + "num_input_tokens_seen": 1127424, + "step": 1655 + }, + { + "epoch": 0.04055407617325874, + "grad_norm": 115.54558563232422, + "learning_rate": 1.621146235403332e-07, + "loss": 0.2601, + "num_input_tokens_seen": 1130624, + "step": 1660 + }, + { + "epoch": 0.040676227005105904, + "grad_norm": 70.30089569091797, + "learning_rate": 1.626032149313529e-07, + "loss": 0.2492, + "num_input_tokens_seen": 1134272, + "step": 1665 + }, + { + "epoch": 0.04079837783695307, + "grad_norm": 72.49864196777344, + "learning_rate": 1.630918063223726e-07, + "loss": 0.1489, + "num_input_tokens_seen": 1137792, + "step": 1670 + }, + { + "epoch": 0.040920528668800235, + "grad_norm": 59.99082565307617, + "learning_rate": 1.6358039771339227e-07, + "loss": 0.1915, + "num_input_tokens_seen": 1141056, + "step": 1675 + }, + { + "epoch": 0.0410426795006474, + "grad_norm": 129.36558532714844, + "learning_rate": 1.6406898910441198e-07, + "loss": 0.2433, + "num_input_tokens_seen": 1144576, + "step": 1680 + }, + { + "epoch": 0.041164830332494566, + "grad_norm": 44.43220901489258, + "learning_rate": 1.6455758049543166e-07, + "loss": 0.1577, + "num_input_tokens_seen": 1147840, + "step": 1685 + }, + { + "epoch": 0.04128698116434173, + "grad_norm": 104.24834442138672, + "learning_rate": 1.6504617188645134e-07, + "loss": 0.105, + "num_input_tokens_seen": 1151552, + "step": 1690 + }, + { + "epoch": 0.0414091319961889, + "grad_norm": 87.4388198852539, + "learning_rate": 1.6553476327747105e-07, + "loss": 0.1276, + "num_input_tokens_seen": 1154560, + "step": 1695 + }, + { + "epoch": 0.04153128282803606, + "grad_norm": 81.07981872558594, + "learning_rate": 1.6602335466849073e-07, + "loss": 0.1723, + "num_input_tokens_seen": 1157760, + "step": 1700 + }, + { + "epoch": 0.04165343365988322, + "grad_norm": 237.32518005371094, + "learning_rate": 1.665119460595104e-07, + "loss": 0.2233, + "num_input_tokens_seen": 1161472, + "step": 1705 + }, + { + "epoch": 0.04177558449173039, + "grad_norm": 151.68650817871094, + "learning_rate": 1.6700053745053012e-07, + "loss": 0.1753, + "num_input_tokens_seen": 1164480, + "step": 1710 + }, + { + "epoch": 0.04189773532357755, + "grad_norm": 131.32754516601562, + "learning_rate": 1.674891288415498e-07, + "loss": 0.1315, + "num_input_tokens_seen": 1168384, + "step": 1715 + }, + { + "epoch": 0.04201988615542472, + "grad_norm": 108.83721160888672, + "learning_rate": 1.6797772023256948e-07, + "loss": 0.158, + "num_input_tokens_seen": 1171648, + "step": 1720 + }, + { + "epoch": 0.04214203698727188, + "grad_norm": 85.09517669677734, + "learning_rate": 1.684663116235892e-07, + "loss": 0.1922, + "num_input_tokens_seen": 1174976, + "step": 1725 + }, + { + "epoch": 0.042264187819119045, + "grad_norm": 47.35398483276367, + "learning_rate": 1.6895490301460887e-07, + "loss": 0.2365, + "num_input_tokens_seen": 1178176, + "step": 1730 + }, + { + "epoch": 0.042386338650966214, + "grad_norm": 70.96749114990234, + "learning_rate": 1.6944349440562858e-07, + "loss": 0.114, + "num_input_tokens_seen": 1181696, + "step": 1735 + }, + { + "epoch": 0.042508489482813376, + "grad_norm": 106.02008056640625, + "learning_rate": 1.6993208579664826e-07, + "loss": 0.16, + "num_input_tokens_seen": 1185216, + "step": 1740 + }, + { + "epoch": 0.042630640314660545, + "grad_norm": 127.25152587890625, + "learning_rate": 1.7042067718766794e-07, + "loss": 0.1776, + "num_input_tokens_seen": 1188672, + "step": 1745 + }, + { + "epoch": 0.04275279114650771, + "grad_norm": 33.11648178100586, + "learning_rate": 1.7090926857868765e-07, + "loss": 0.1947, + "num_input_tokens_seen": 1192512, + "step": 1750 + }, + { + "epoch": 0.042874941978354876, + "grad_norm": 75.8089599609375, + "learning_rate": 1.7139785996970733e-07, + "loss": 0.1722, + "num_input_tokens_seen": 1195520, + "step": 1755 + }, + { + "epoch": 0.04299709281020204, + "grad_norm": 75.60196685791016, + "learning_rate": 1.71886451360727e-07, + "loss": 0.1517, + "num_input_tokens_seen": 1199040, + "step": 1760 + }, + { + "epoch": 0.0431192436420492, + "grad_norm": 40.18544387817383, + "learning_rate": 1.7237504275174672e-07, + "loss": 0.1429, + "num_input_tokens_seen": 1202240, + "step": 1765 + }, + { + "epoch": 0.04324139447389637, + "grad_norm": 61.27647018432617, + "learning_rate": 1.728636341427664e-07, + "loss": 0.1743, + "num_input_tokens_seen": 1205504, + "step": 1770 + }, + { + "epoch": 0.04336354530574353, + "grad_norm": 145.07838439941406, + "learning_rate": 1.7335222553378608e-07, + "loss": 0.1751, + "num_input_tokens_seen": 1208896, + "step": 1775 + }, + { + "epoch": 0.0434856961375907, + "grad_norm": 23.42119789123535, + "learning_rate": 1.738408169248058e-07, + "loss": 0.1709, + "num_input_tokens_seen": 1212864, + "step": 1780 + }, + { + "epoch": 0.04360784696943786, + "grad_norm": 24.543663024902344, + "learning_rate": 1.7432940831582547e-07, + "loss": 0.0921, + "num_input_tokens_seen": 1216192, + "step": 1785 + }, + { + "epoch": 0.043729997801285024, + "grad_norm": 79.07312774658203, + "learning_rate": 1.7481799970684515e-07, + "loss": 0.2697, + "num_input_tokens_seen": 1219264, + "step": 1790 + }, + { + "epoch": 0.04385214863313219, + "grad_norm": 27.745731353759766, + "learning_rate": 1.7530659109786486e-07, + "loss": 0.1171, + "num_input_tokens_seen": 1222400, + "step": 1795 + }, + { + "epoch": 0.043974299464979355, + "grad_norm": 96.10833740234375, + "learning_rate": 1.7579518248888454e-07, + "loss": 0.1516, + "num_input_tokens_seen": 1225472, + "step": 1800 + }, + { + "epoch": 0.044096450296826524, + "grad_norm": 31.986623764038086, + "learning_rate": 1.7628377387990422e-07, + "loss": 0.2495, + "num_input_tokens_seen": 1228736, + "step": 1805 + }, + { + "epoch": 0.044218601128673686, + "grad_norm": 23.220766067504883, + "learning_rate": 1.7677236527092393e-07, + "loss": 0.098, + "num_input_tokens_seen": 1232960, + "step": 1810 + }, + { + "epoch": 0.04434075196052085, + "grad_norm": 26.95801544189453, + "learning_rate": 1.772609566619436e-07, + "loss": 0.1844, + "num_input_tokens_seen": 1236480, + "step": 1815 + }, + { + "epoch": 0.04446290279236802, + "grad_norm": 69.52427673339844, + "learning_rate": 1.777495480529633e-07, + "loss": 0.0747, + "num_input_tokens_seen": 1240448, + "step": 1820 + }, + { + "epoch": 0.04458505362421518, + "grad_norm": 56.518741607666016, + "learning_rate": 1.78238139443983e-07, + "loss": 0.1022, + "num_input_tokens_seen": 1243648, + "step": 1825 + }, + { + "epoch": 0.04470720445606235, + "grad_norm": 31.176511764526367, + "learning_rate": 1.7872673083500268e-07, + "loss": 0.1376, + "num_input_tokens_seen": 1247040, + "step": 1830 + }, + { + "epoch": 0.04482935528790951, + "grad_norm": 76.87928771972656, + "learning_rate": 1.7921532222602236e-07, + "loss": 0.2258, + "num_input_tokens_seen": 1250816, + "step": 1835 + }, + { + "epoch": 0.04495150611975668, + "grad_norm": 54.71757888793945, + "learning_rate": 1.7970391361704207e-07, + "loss": 0.1251, + "num_input_tokens_seen": 1254656, + "step": 1840 + }, + { + "epoch": 0.04507365695160384, + "grad_norm": 24.595806121826172, + "learning_rate": 1.8019250500806175e-07, + "loss": 0.1035, + "num_input_tokens_seen": 1258176, + "step": 1845 + }, + { + "epoch": 0.045195807783451, + "grad_norm": 98.47230529785156, + "learning_rate": 1.8068109639908143e-07, + "loss": 0.1309, + "num_input_tokens_seen": 1261248, + "step": 1850 + }, + { + "epoch": 0.04531795861529817, + "grad_norm": 139.2314453125, + "learning_rate": 1.8116968779010114e-07, + "loss": 0.2031, + "num_input_tokens_seen": 1264576, + "step": 1855 + }, + { + "epoch": 0.045440109447145334, + "grad_norm": 67.46702575683594, + "learning_rate": 1.8165827918112082e-07, + "loss": 0.1752, + "num_input_tokens_seen": 1267776, + "step": 1860 + }, + { + "epoch": 0.0455622602789925, + "grad_norm": 78.09505462646484, + "learning_rate": 1.821468705721405e-07, + "loss": 0.1825, + "num_input_tokens_seen": 1271040, + "step": 1865 + }, + { + "epoch": 0.045684411110839665, + "grad_norm": 28.8896427154541, + "learning_rate": 1.826354619631602e-07, + "loss": 0.099, + "num_input_tokens_seen": 1274496, + "step": 1870 + }, + { + "epoch": 0.04580656194268683, + "grad_norm": 71.47722625732422, + "learning_rate": 1.831240533541799e-07, + "loss": 0.2014, + "num_input_tokens_seen": 1277824, + "step": 1875 + }, + { + "epoch": 0.045928712774533996, + "grad_norm": 23.266725540161133, + "learning_rate": 1.8361264474519957e-07, + "loss": 0.1235, + "num_input_tokens_seen": 1280832, + "step": 1880 + }, + { + "epoch": 0.04605086360638116, + "grad_norm": 111.74454498291016, + "learning_rate": 1.8410123613621928e-07, + "loss": 0.122, + "num_input_tokens_seen": 1284160, + "step": 1885 + }, + { + "epoch": 0.04617301443822833, + "grad_norm": 34.12566375732422, + "learning_rate": 1.8458982752723896e-07, + "loss": 0.1444, + "num_input_tokens_seen": 1287168, + "step": 1890 + }, + { + "epoch": 0.04629516527007549, + "grad_norm": 48.650390625, + "learning_rate": 1.8507841891825864e-07, + "loss": 0.093, + "num_input_tokens_seen": 1290880, + "step": 1895 + }, + { + "epoch": 0.04641731610192265, + "grad_norm": 55.02333450317383, + "learning_rate": 1.8556701030927835e-07, + "loss": 0.1654, + "num_input_tokens_seen": 1294656, + "step": 1900 + }, + { + "epoch": 0.04653946693376982, + "grad_norm": 94.02783203125, + "learning_rate": 1.8605560170029803e-07, + "loss": 0.1705, + "num_input_tokens_seen": 1297728, + "step": 1905 + }, + { + "epoch": 0.04666161776561698, + "grad_norm": 78.1871566772461, + "learning_rate": 1.8654419309131771e-07, + "loss": 0.1407, + "num_input_tokens_seen": 1300928, + "step": 1910 + }, + { + "epoch": 0.04678376859746415, + "grad_norm": 117.9102554321289, + "learning_rate": 1.8703278448233742e-07, + "loss": 0.2134, + "num_input_tokens_seen": 1303808, + "step": 1915 + }, + { + "epoch": 0.04690591942931131, + "grad_norm": 41.647972106933594, + "learning_rate": 1.875213758733571e-07, + "loss": 0.1497, + "num_input_tokens_seen": 1307648, + "step": 1920 + }, + { + "epoch": 0.04702807026115848, + "grad_norm": 55.67569351196289, + "learning_rate": 1.8800996726437678e-07, + "loss": 0.1378, + "num_input_tokens_seen": 1310848, + "step": 1925 + }, + { + "epoch": 0.047150221093005644, + "grad_norm": 48.958621978759766, + "learning_rate": 1.884985586553965e-07, + "loss": 0.2215, + "num_input_tokens_seen": 1314432, + "step": 1930 + }, + { + "epoch": 0.047272371924852806, + "grad_norm": 41.2342643737793, + "learning_rate": 1.8898715004641617e-07, + "loss": 0.2139, + "num_input_tokens_seen": 1318080, + "step": 1935 + }, + { + "epoch": 0.047394522756699975, + "grad_norm": 92.6572036743164, + "learning_rate": 1.8947574143743585e-07, + "loss": 0.2444, + "num_input_tokens_seen": 1321024, + "step": 1940 + }, + { + "epoch": 0.04751667358854714, + "grad_norm": 26.353044509887695, + "learning_rate": 1.8996433282845556e-07, + "loss": 0.1379, + "num_input_tokens_seen": 1324736, + "step": 1945 + }, + { + "epoch": 0.047638824420394306, + "grad_norm": 75.55083465576172, + "learning_rate": 1.9045292421947524e-07, + "loss": 0.1321, + "num_input_tokens_seen": 1328000, + "step": 1950 + }, + { + "epoch": 0.04776097525224147, + "grad_norm": 33.58150100708008, + "learning_rate": 1.9094151561049492e-07, + "loss": 0.124, + "num_input_tokens_seen": 1331008, + "step": 1955 + }, + { + "epoch": 0.04788312608408863, + "grad_norm": 79.53211212158203, + "learning_rate": 1.9143010700151463e-07, + "loss": 0.1557, + "num_input_tokens_seen": 1334208, + "step": 1960 + }, + { + "epoch": 0.0480052769159358, + "grad_norm": 56.7999153137207, + "learning_rate": 1.9191869839253434e-07, + "loss": 0.1513, + "num_input_tokens_seen": 1337472, + "step": 1965 + }, + { + "epoch": 0.04812742774778296, + "grad_norm": 48.73048400878906, + "learning_rate": 1.92407289783554e-07, + "loss": 0.2155, + "num_input_tokens_seen": 1340928, + "step": 1970 + }, + { + "epoch": 0.04824957857963013, + "grad_norm": 34.07157516479492, + "learning_rate": 1.928958811745737e-07, + "loss": 0.1602, + "num_input_tokens_seen": 1344128, + "step": 1975 + }, + { + "epoch": 0.04837172941147729, + "grad_norm": 69.41503143310547, + "learning_rate": 1.933844725655934e-07, + "loss": 0.0793, + "num_input_tokens_seen": 1347904, + "step": 1980 + }, + { + "epoch": 0.048493880243324454, + "grad_norm": 82.7708740234375, + "learning_rate": 1.9387306395661307e-07, + "loss": 0.1941, + "num_input_tokens_seen": 1351552, + "step": 1985 + }, + { + "epoch": 0.04861603107517162, + "grad_norm": 70.6302719116211, + "learning_rate": 1.9436165534763277e-07, + "loss": 0.1792, + "num_input_tokens_seen": 1354816, + "step": 1990 + }, + { + "epoch": 0.048738181907018785, + "grad_norm": 80.82976531982422, + "learning_rate": 1.9485024673865248e-07, + "loss": 0.2255, + "num_input_tokens_seen": 1357824, + "step": 1995 + }, + { + "epoch": 0.048860332738865954, + "grad_norm": 113.82199096679688, + "learning_rate": 1.9533883812967214e-07, + "loss": 0.1439, + "num_input_tokens_seen": 1360896, + "step": 2000 + }, + { + "epoch": 0.048982483570713116, + "grad_norm": 55.600555419921875, + "learning_rate": 1.9582742952069184e-07, + "loss": 0.1135, + "num_input_tokens_seen": 1364288, + "step": 2005 + }, + { + "epoch": 0.049104634402560285, + "grad_norm": 101.28015899658203, + "learning_rate": 1.9631602091171155e-07, + "loss": 0.1285, + "num_input_tokens_seen": 1367424, + "step": 2010 + }, + { + "epoch": 0.04922678523440745, + "grad_norm": 68.34232330322266, + "learning_rate": 1.968046123027312e-07, + "loss": 0.149, + "num_input_tokens_seen": 1370880, + "step": 2015 + }, + { + "epoch": 0.04934893606625461, + "grad_norm": 45.278873443603516, + "learning_rate": 1.9729320369375091e-07, + "loss": 0.0844, + "num_input_tokens_seen": 1374272, + "step": 2020 + }, + { + "epoch": 0.04947108689810178, + "grad_norm": 49.61530303955078, + "learning_rate": 1.977817950847706e-07, + "loss": 0.1647, + "num_input_tokens_seen": 1377600, + "step": 2025 + }, + { + "epoch": 0.04959323772994894, + "grad_norm": 28.334949493408203, + "learning_rate": 1.9827038647579028e-07, + "loss": 0.1265, + "num_input_tokens_seen": 1380544, + "step": 2030 + }, + { + "epoch": 0.04971538856179611, + "grad_norm": 98.55791473388672, + "learning_rate": 1.9875897786680998e-07, + "loss": 0.1444, + "num_input_tokens_seen": 1384064, + "step": 2035 + }, + { + "epoch": 0.04983753939364327, + "grad_norm": 121.94667053222656, + "learning_rate": 1.9924756925782967e-07, + "loss": 0.1977, + "num_input_tokens_seen": 1387264, + "step": 2040 + }, + { + "epoch": 0.04995969022549043, + "grad_norm": 129.62332153320312, + "learning_rate": 1.9973616064884935e-07, + "loss": 0.2471, + "num_input_tokens_seen": 1390464, + "step": 2045 + }, + { + "epoch": 0.0500818410573376, + "grad_norm": 50.89700698852539, + "learning_rate": 2.0022475203986905e-07, + "loss": 0.1957, + "num_input_tokens_seen": 1394368, + "step": 2050 + }, + { + "epoch": 0.050203991889184764, + "grad_norm": 37.54369354248047, + "learning_rate": 2.0071334343088874e-07, + "loss": 0.2043, + "num_input_tokens_seen": 1397824, + "step": 2055 + }, + { + "epoch": 0.05032614272103193, + "grad_norm": 22.28923988342285, + "learning_rate": 2.0120193482190842e-07, + "loss": 0.0892, + "num_input_tokens_seen": 1401664, + "step": 2060 + }, + { + "epoch": 0.050448293552879095, + "grad_norm": 44.674705505371094, + "learning_rate": 2.0169052621292813e-07, + "loss": 0.1588, + "num_input_tokens_seen": 1405504, + "step": 2065 + }, + { + "epoch": 0.05057044438472626, + "grad_norm": 61.58700180053711, + "learning_rate": 2.021791176039478e-07, + "loss": 0.131, + "num_input_tokens_seen": 1408768, + "step": 2070 + }, + { + "epoch": 0.050692595216573426, + "grad_norm": 20.806472778320312, + "learning_rate": 2.026677089949675e-07, + "loss": 0.1556, + "num_input_tokens_seen": 1411968, + "step": 2075 + }, + { + "epoch": 0.05081474604842059, + "grad_norm": 24.93768310546875, + "learning_rate": 2.031563003859872e-07, + "loss": 0.1472, + "num_input_tokens_seen": 1415168, + "step": 2080 + }, + { + "epoch": 0.05093689688026776, + "grad_norm": 45.42394256591797, + "learning_rate": 2.0364489177700688e-07, + "loss": 0.149, + "num_input_tokens_seen": 1418688, + "step": 2085 + }, + { + "epoch": 0.05105904771211492, + "grad_norm": 54.84282302856445, + "learning_rate": 2.0413348316802656e-07, + "loss": 0.1877, + "num_input_tokens_seen": 1422144, + "step": 2090 + }, + { + "epoch": 0.05118119854396209, + "grad_norm": 49.84492492675781, + "learning_rate": 2.0462207455904627e-07, + "loss": 0.1552, + "num_input_tokens_seen": 1425792, + "step": 2095 + }, + { + "epoch": 0.05130334937580925, + "grad_norm": 79.23111724853516, + "learning_rate": 2.0511066595006595e-07, + "loss": 0.181, + "num_input_tokens_seen": 1429184, + "step": 2100 + }, + { + "epoch": 0.05142550020765641, + "grad_norm": 77.6365737915039, + "learning_rate": 2.0559925734108563e-07, + "loss": 0.2218, + "num_input_tokens_seen": 1432192, + "step": 2105 + }, + { + "epoch": 0.05154765103950358, + "grad_norm": 34.6860237121582, + "learning_rate": 2.0608784873210534e-07, + "loss": 0.0809, + "num_input_tokens_seen": 1435520, + "step": 2110 + }, + { + "epoch": 0.05166980187135074, + "grad_norm": 57.39856719970703, + "learning_rate": 2.0657644012312502e-07, + "loss": 0.1314, + "num_input_tokens_seen": 1438848, + "step": 2115 + }, + { + "epoch": 0.05179195270319791, + "grad_norm": 76.45271301269531, + "learning_rate": 2.070650315141447e-07, + "loss": 0.1002, + "num_input_tokens_seen": 1442432, + "step": 2120 + }, + { + "epoch": 0.051914103535045074, + "grad_norm": 64.7785873413086, + "learning_rate": 2.075536229051644e-07, + "loss": 0.1973, + "num_input_tokens_seen": 1445568, + "step": 2125 + }, + { + "epoch": 0.052036254366892236, + "grad_norm": 129.55709838867188, + "learning_rate": 2.080422142961841e-07, + "loss": 0.223, + "num_input_tokens_seen": 1449152, + "step": 2130 + }, + { + "epoch": 0.052158405198739405, + "grad_norm": 67.76712799072266, + "learning_rate": 2.0853080568720377e-07, + "loss": 0.1381, + "num_input_tokens_seen": 1452224, + "step": 2135 + }, + { + "epoch": 0.05228055603058657, + "grad_norm": 65.36962890625, + "learning_rate": 2.0901939707822348e-07, + "loss": 0.1132, + "num_input_tokens_seen": 1456064, + "step": 2140 + }, + { + "epoch": 0.052402706862433736, + "grad_norm": 21.638248443603516, + "learning_rate": 2.0950798846924316e-07, + "loss": 0.1221, + "num_input_tokens_seen": 1459648, + "step": 2145 + }, + { + "epoch": 0.0525248576942809, + "grad_norm": 41.81351852416992, + "learning_rate": 2.0999657986026284e-07, + "loss": 0.1435, + "num_input_tokens_seen": 1463296, + "step": 2150 + }, + { + "epoch": 0.05264700852612806, + "grad_norm": 49.325618743896484, + "learning_rate": 2.1048517125128255e-07, + "loss": 0.0786, + "num_input_tokens_seen": 1466752, + "step": 2155 + }, + { + "epoch": 0.05276915935797523, + "grad_norm": 70.45987701416016, + "learning_rate": 2.1097376264230223e-07, + "loss": 0.2243, + "num_input_tokens_seen": 1469760, + "step": 2160 + }, + { + "epoch": 0.05289131018982239, + "grad_norm": 16.85369300842285, + "learning_rate": 2.114623540333219e-07, + "loss": 0.2312, + "num_input_tokens_seen": 1473408, + "step": 2165 + }, + { + "epoch": 0.05301346102166956, + "grad_norm": 48.095367431640625, + "learning_rate": 2.1195094542434162e-07, + "loss": 0.1557, + "num_input_tokens_seen": 1476736, + "step": 2170 + }, + { + "epoch": 0.05313561185351672, + "grad_norm": 122.31871032714844, + "learning_rate": 2.124395368153613e-07, + "loss": 0.1821, + "num_input_tokens_seen": 1479936, + "step": 2175 + }, + { + "epoch": 0.053257762685363884, + "grad_norm": 94.89913177490234, + "learning_rate": 2.1292812820638098e-07, + "loss": 0.1161, + "num_input_tokens_seen": 1483328, + "step": 2180 + }, + { + "epoch": 0.05337991351721105, + "grad_norm": 68.65199279785156, + "learning_rate": 2.134167195974007e-07, + "loss": 0.1854, + "num_input_tokens_seen": 1486592, + "step": 2185 + }, + { + "epoch": 0.053502064349058215, + "grad_norm": 26.472394943237305, + "learning_rate": 2.1390531098842037e-07, + "loss": 0.0968, + "num_input_tokens_seen": 1489792, + "step": 2190 + }, + { + "epoch": 0.053624215180905384, + "grad_norm": 25.325607299804688, + "learning_rate": 2.1439390237944008e-07, + "loss": 0.1555, + "num_input_tokens_seen": 1492928, + "step": 2195 + }, + { + "epoch": 0.053746366012752546, + "grad_norm": 97.38853454589844, + "learning_rate": 2.1488249377045976e-07, + "loss": 0.1967, + "num_input_tokens_seen": 1496512, + "step": 2200 + }, + { + "epoch": 0.053868516844599715, + "grad_norm": 23.16677474975586, + "learning_rate": 2.1537108516147944e-07, + "loss": 0.1324, + "num_input_tokens_seen": 1499840, + "step": 2205 + }, + { + "epoch": 0.05399066767644688, + "grad_norm": 21.841686248779297, + "learning_rate": 2.1585967655249915e-07, + "loss": 0.1308, + "num_input_tokens_seen": 1502976, + "step": 2210 + }, + { + "epoch": 0.05411281850829404, + "grad_norm": 23.00255584716797, + "learning_rate": 2.1634826794351883e-07, + "loss": 0.24, + "num_input_tokens_seen": 1506432, + "step": 2215 + }, + { + "epoch": 0.05423496934014121, + "grad_norm": 48.22821044921875, + "learning_rate": 2.168368593345385e-07, + "loss": 0.2346, + "num_input_tokens_seen": 1509888, + "step": 2220 + }, + { + "epoch": 0.05435712017198837, + "grad_norm": 38.02397155761719, + "learning_rate": 2.1732545072555822e-07, + "loss": 0.2164, + "num_input_tokens_seen": 1513024, + "step": 2225 + }, + { + "epoch": 0.05447927100383554, + "grad_norm": 52.66263961791992, + "learning_rate": 2.178140421165779e-07, + "loss": 0.2196, + "num_input_tokens_seen": 1516544, + "step": 2230 + }, + { + "epoch": 0.0546014218356827, + "grad_norm": 28.82169532775879, + "learning_rate": 2.1830263350759758e-07, + "loss": 0.1195, + "num_input_tokens_seen": 1520576, + "step": 2235 + }, + { + "epoch": 0.05472357266752986, + "grad_norm": 52.429527282714844, + "learning_rate": 2.187912248986173e-07, + "loss": 0.1513, + "num_input_tokens_seen": 1523712, + "step": 2240 + }, + { + "epoch": 0.05484572349937703, + "grad_norm": 13.38976764678955, + "learning_rate": 2.1927981628963697e-07, + "loss": 0.0863, + "num_input_tokens_seen": 1527808, + "step": 2245 + }, + { + "epoch": 0.054967874331224194, + "grad_norm": 44.932308197021484, + "learning_rate": 2.1976840768065665e-07, + "loss": 0.232, + "num_input_tokens_seen": 1531264, + "step": 2250 + }, + { + "epoch": 0.05509002516307136, + "grad_norm": 30.4183292388916, + "learning_rate": 2.2025699907167636e-07, + "loss": 0.1553, + "num_input_tokens_seen": 1534912, + "step": 2255 + }, + { + "epoch": 0.055212175994918525, + "grad_norm": 44.02510452270508, + "learning_rate": 2.2074559046269604e-07, + "loss": 0.14, + "num_input_tokens_seen": 1538176, + "step": 2260 + }, + { + "epoch": 0.05533432682676569, + "grad_norm": 75.7412109375, + "learning_rate": 2.2123418185371572e-07, + "loss": 0.1312, + "num_input_tokens_seen": 1541760, + "step": 2265 + }, + { + "epoch": 0.055456477658612856, + "grad_norm": 100.07758331298828, + "learning_rate": 2.2172277324473543e-07, + "loss": 0.2186, + "num_input_tokens_seen": 1545152, + "step": 2270 + }, + { + "epoch": 0.05557862849046002, + "grad_norm": 50.55472183227539, + "learning_rate": 2.222113646357551e-07, + "loss": 0.1079, + "num_input_tokens_seen": 1548096, + "step": 2275 + }, + { + "epoch": 0.05570077932230719, + "grad_norm": 33.335689544677734, + "learning_rate": 2.226999560267748e-07, + "loss": 0.0889, + "num_input_tokens_seen": 1551168, + "step": 2280 + }, + { + "epoch": 0.05582293015415435, + "grad_norm": 134.88308715820312, + "learning_rate": 2.231885474177945e-07, + "loss": 0.2402, + "num_input_tokens_seen": 1554432, + "step": 2285 + }, + { + "epoch": 0.05594508098600152, + "grad_norm": 53.62267303466797, + "learning_rate": 2.2367713880881418e-07, + "loss": 0.1479, + "num_input_tokens_seen": 1557568, + "step": 2290 + }, + { + "epoch": 0.05606723181784868, + "grad_norm": 10.846335411071777, + "learning_rate": 2.2416573019983386e-07, + "loss": 0.1292, + "num_input_tokens_seen": 1561152, + "step": 2295 + }, + { + "epoch": 0.05618938264969584, + "grad_norm": 56.55923843383789, + "learning_rate": 2.2465432159085357e-07, + "loss": 0.1956, + "num_input_tokens_seen": 1564160, + "step": 2300 + }, + { + "epoch": 0.05631153348154301, + "grad_norm": 41.92458724975586, + "learning_rate": 2.2514291298187325e-07, + "loss": 0.101, + "num_input_tokens_seen": 1567616, + "step": 2305 + }, + { + "epoch": 0.05643368431339017, + "grad_norm": 21.67095375061035, + "learning_rate": 2.2563150437289293e-07, + "loss": 0.1142, + "num_input_tokens_seen": 1570944, + "step": 2310 + }, + { + "epoch": 0.05655583514523734, + "grad_norm": 56.14408493041992, + "learning_rate": 2.2612009576391264e-07, + "loss": 0.1082, + "num_input_tokens_seen": 1574208, + "step": 2315 + }, + { + "epoch": 0.056677985977084504, + "grad_norm": 61.64763259887695, + "learning_rate": 2.2660868715493232e-07, + "loss": 0.1785, + "num_input_tokens_seen": 1577664, + "step": 2320 + }, + { + "epoch": 0.056800136808931666, + "grad_norm": 33.018280029296875, + "learning_rate": 2.27097278545952e-07, + "loss": 0.1554, + "num_input_tokens_seen": 1580992, + "step": 2325 + }, + { + "epoch": 0.056922287640778835, + "grad_norm": 33.67935562133789, + "learning_rate": 2.275858699369717e-07, + "loss": 0.1593, + "num_input_tokens_seen": 1583808, + "step": 2330 + }, + { + "epoch": 0.057044438472626, + "grad_norm": 111.91667175292969, + "learning_rate": 2.280744613279914e-07, + "loss": 0.1516, + "num_input_tokens_seen": 1587136, + "step": 2335 + }, + { + "epoch": 0.057166589304473166, + "grad_norm": 33.9388542175293, + "learning_rate": 2.2856305271901107e-07, + "loss": 0.0974, + "num_input_tokens_seen": 1590592, + "step": 2340 + }, + { + "epoch": 0.05728874013632033, + "grad_norm": 31.756982803344727, + "learning_rate": 2.2905164411003078e-07, + "loss": 0.1008, + "num_input_tokens_seen": 1593984, + "step": 2345 + }, + { + "epoch": 0.05741089096816749, + "grad_norm": 16.234106063842773, + "learning_rate": 2.2954023550105044e-07, + "loss": 0.1619, + "num_input_tokens_seen": 1597248, + "step": 2350 + }, + { + "epoch": 0.05753304180001466, + "grad_norm": 57.85862731933594, + "learning_rate": 2.3002882689207014e-07, + "loss": 0.0828, + "num_input_tokens_seen": 1600512, + "step": 2355 + }, + { + "epoch": 0.05765519263186182, + "grad_norm": 96.33238220214844, + "learning_rate": 2.3051741828308985e-07, + "loss": 0.229, + "num_input_tokens_seen": 1603520, + "step": 2360 + }, + { + "epoch": 0.05777734346370899, + "grad_norm": 133.5282440185547, + "learning_rate": 2.310060096741095e-07, + "loss": 0.2023, + "num_input_tokens_seen": 1606848, + "step": 2365 + }, + { + "epoch": 0.05789949429555615, + "grad_norm": 25.754037857055664, + "learning_rate": 2.3149460106512921e-07, + "loss": 0.1168, + "num_input_tokens_seen": 1610688, + "step": 2370 + }, + { + "epoch": 0.05802164512740332, + "grad_norm": 64.81108856201172, + "learning_rate": 2.3198319245614892e-07, + "loss": 0.0904, + "num_input_tokens_seen": 1614336, + "step": 2375 + }, + { + "epoch": 0.05814379595925048, + "grad_norm": 21.53687858581543, + "learning_rate": 2.3247178384716858e-07, + "loss": 0.1832, + "num_input_tokens_seen": 1617216, + "step": 2380 + }, + { + "epoch": 0.058265946791097645, + "grad_norm": 33.07185363769531, + "learning_rate": 2.3296037523818829e-07, + "loss": 0.157, + "num_input_tokens_seen": 1620544, + "step": 2385 + }, + { + "epoch": 0.058388097622944814, + "grad_norm": 31.172517776489258, + "learning_rate": 2.33448966629208e-07, + "loss": 0.1079, + "num_input_tokens_seen": 1623680, + "step": 2390 + }, + { + "epoch": 0.058510248454791976, + "grad_norm": 30.034866333007812, + "learning_rate": 2.3393755802022765e-07, + "loss": 0.1297, + "num_input_tokens_seen": 1626816, + "step": 2395 + }, + { + "epoch": 0.058632399286639145, + "grad_norm": 41.092018127441406, + "learning_rate": 2.3442614941124736e-07, + "loss": 0.2271, + "num_input_tokens_seen": 1630336, + "step": 2400 + }, + { + "epoch": 0.05875455011848631, + "grad_norm": 17.320775985717773, + "learning_rate": 2.3491474080226706e-07, + "loss": 0.1235, + "num_input_tokens_seen": 1634304, + "step": 2405 + }, + { + "epoch": 0.05887670095033347, + "grad_norm": 34.5215950012207, + "learning_rate": 2.3540333219328672e-07, + "loss": 0.1444, + "num_input_tokens_seen": 1637760, + "step": 2410 + }, + { + "epoch": 0.05899885178218064, + "grad_norm": 60.64455795288086, + "learning_rate": 2.3589192358430643e-07, + "loss": 0.1087, + "num_input_tokens_seen": 1640896, + "step": 2415 + }, + { + "epoch": 0.0591210026140278, + "grad_norm": 44.74037551879883, + "learning_rate": 2.3638051497532613e-07, + "loss": 0.1224, + "num_input_tokens_seen": 1644352, + "step": 2420 + }, + { + "epoch": 0.05924315344587497, + "grad_norm": 59.06151580810547, + "learning_rate": 2.3686910636634582e-07, + "loss": 0.2265, + "num_input_tokens_seen": 1648320, + "step": 2425 + }, + { + "epoch": 0.05936530427772213, + "grad_norm": 37.72708511352539, + "learning_rate": 2.373576977573655e-07, + "loss": 0.2262, + "num_input_tokens_seen": 1651712, + "step": 2430 + }, + { + "epoch": 0.05948745510956929, + "grad_norm": 79.2681884765625, + "learning_rate": 2.378462891483852e-07, + "loss": 0.1233, + "num_input_tokens_seen": 1654848, + "step": 2435 + }, + { + "epoch": 0.05960960594141646, + "grad_norm": 35.262062072753906, + "learning_rate": 2.3833488053940489e-07, + "loss": 0.187, + "num_input_tokens_seen": 1658240, + "step": 2440 + }, + { + "epoch": 0.059731756773263624, + "grad_norm": 23.657838821411133, + "learning_rate": 2.3882347193042457e-07, + "loss": 0.1262, + "num_input_tokens_seen": 1661440, + "step": 2445 + }, + { + "epoch": 0.05985390760511079, + "grad_norm": 30.22248649597168, + "learning_rate": 2.3931206332144425e-07, + "loss": 0.0881, + "num_input_tokens_seen": 1664704, + "step": 2450 + }, + { + "epoch": 0.059976058436957955, + "grad_norm": 30.023712158203125, + "learning_rate": 2.39800654712464e-07, + "loss": 0.1146, + "num_input_tokens_seen": 1668352, + "step": 2455 + }, + { + "epoch": 0.060098209268805124, + "grad_norm": 55.32008361816406, + "learning_rate": 2.4028924610348366e-07, + "loss": 0.198, + "num_input_tokens_seen": 1672256, + "step": 2460 + }, + { + "epoch": 0.060220360100652286, + "grad_norm": 56.359771728515625, + "learning_rate": 2.4077783749450335e-07, + "loss": 0.2135, + "num_input_tokens_seen": 1675008, + "step": 2465 + }, + { + "epoch": 0.06034251093249945, + "grad_norm": 79.16679382324219, + "learning_rate": 2.41266428885523e-07, + "loss": 0.1464, + "num_input_tokens_seen": 1678528, + "step": 2470 + }, + { + "epoch": 0.06046466176434662, + "grad_norm": 58.615352630615234, + "learning_rate": 2.417550202765427e-07, + "loss": 0.1469, + "num_input_tokens_seen": 1681536, + "step": 2475 + }, + { + "epoch": 0.06058681259619378, + "grad_norm": 77.35822296142578, + "learning_rate": 2.422436116675624e-07, + "loss": 0.2015, + "num_input_tokens_seen": 1684544, + "step": 2480 + }, + { + "epoch": 0.06070896342804095, + "grad_norm": 78.36398315429688, + "learning_rate": 2.427322030585821e-07, + "loss": 0.2508, + "num_input_tokens_seen": 1688000, + "step": 2485 + }, + { + "epoch": 0.06083111425988811, + "grad_norm": 46.09014129638672, + "learning_rate": 2.432207944496018e-07, + "loss": 0.1095, + "num_input_tokens_seen": 1691264, + "step": 2490 + }, + { + "epoch": 0.06095326509173527, + "grad_norm": 34.414459228515625, + "learning_rate": 2.437093858406215e-07, + "loss": 0.0887, + "num_input_tokens_seen": 1694656, + "step": 2495 + }, + { + "epoch": 0.06107541592358244, + "grad_norm": 33.107975006103516, + "learning_rate": 2.4419797723164117e-07, + "loss": 0.1181, + "num_input_tokens_seen": 1698304, + "step": 2500 + }, + { + "epoch": 0.0611975667554296, + "grad_norm": 75.07171630859375, + "learning_rate": 2.4468656862266085e-07, + "loss": 0.1827, + "num_input_tokens_seen": 1701376, + "step": 2505 + }, + { + "epoch": 0.06131971758727677, + "grad_norm": 54.65150451660156, + "learning_rate": 2.4517516001368053e-07, + "loss": 0.1661, + "num_input_tokens_seen": 1704320, + "step": 2510 + }, + { + "epoch": 0.061441868419123934, + "grad_norm": 21.785341262817383, + "learning_rate": 2.4566375140470026e-07, + "loss": 0.0947, + "num_input_tokens_seen": 1707840, + "step": 2515 + }, + { + "epoch": 0.061564019250971096, + "grad_norm": 25.026840209960938, + "learning_rate": 2.4615234279571995e-07, + "loss": 0.0512, + "num_input_tokens_seen": 1711232, + "step": 2520 + }, + { + "epoch": 0.061686170082818265, + "grad_norm": 22.339088439941406, + "learning_rate": 2.4664093418673963e-07, + "loss": 0.1004, + "num_input_tokens_seen": 1714944, + "step": 2525 + }, + { + "epoch": 0.06180832091466543, + "grad_norm": 24.17763328552246, + "learning_rate": 2.471295255777593e-07, + "loss": 0.0867, + "num_input_tokens_seen": 1718336, + "step": 2530 + }, + { + "epoch": 0.061930471746512596, + "grad_norm": 52.764808654785156, + "learning_rate": 2.47618116968779e-07, + "loss": 0.1462, + "num_input_tokens_seen": 1721536, + "step": 2535 + }, + { + "epoch": 0.06205262257835976, + "grad_norm": 50.03315353393555, + "learning_rate": 2.4810670835979867e-07, + "loss": 0.1976, + "num_input_tokens_seen": 1724928, + "step": 2540 + }, + { + "epoch": 0.06217477341020693, + "grad_norm": 42.78987121582031, + "learning_rate": 2.485952997508184e-07, + "loss": 0.1417, + "num_input_tokens_seen": 1728064, + "step": 2545 + }, + { + "epoch": 0.06229692424205409, + "grad_norm": 85.56526947021484, + "learning_rate": 2.4908389114183803e-07, + "loss": 0.142, + "num_input_tokens_seen": 1731392, + "step": 2550 + }, + { + "epoch": 0.06241907507390125, + "grad_norm": 34.91035461425781, + "learning_rate": 2.4957248253285777e-07, + "loss": 0.1403, + "num_input_tokens_seen": 1734912, + "step": 2555 + }, + { + "epoch": 0.06254122590574841, + "grad_norm": 102.80146026611328, + "learning_rate": 2.5006107392387745e-07, + "loss": 0.138, + "num_input_tokens_seen": 1737984, + "step": 2560 + }, + { + "epoch": 0.06266337673759559, + "grad_norm": 36.7209587097168, + "learning_rate": 2.5054966531489713e-07, + "loss": 0.2083, + "num_input_tokens_seen": 1741504, + "step": 2565 + }, + { + "epoch": 0.06278552756944275, + "grad_norm": 51.84968185424805, + "learning_rate": 2.510382567059168e-07, + "loss": 0.1017, + "num_input_tokens_seen": 1745152, + "step": 2570 + }, + { + "epoch": 0.06290767840128991, + "grad_norm": 54.773658752441406, + "learning_rate": 2.5152684809693655e-07, + "loss": 0.1951, + "num_input_tokens_seen": 1748480, + "step": 2575 + }, + { + "epoch": 0.06302982923313707, + "grad_norm": 35.93278884887695, + "learning_rate": 2.5201543948795623e-07, + "loss": 0.1033, + "num_input_tokens_seen": 1751936, + "step": 2580 + }, + { + "epoch": 0.06315198006498424, + "grad_norm": 39.635047912597656, + "learning_rate": 2.525040308789759e-07, + "loss": 0.257, + "num_input_tokens_seen": 1755200, + "step": 2585 + }, + { + "epoch": 0.06327413089683141, + "grad_norm": 27.207603454589844, + "learning_rate": 2.529926222699956e-07, + "loss": 0.1453, + "num_input_tokens_seen": 1758656, + "step": 2590 + }, + { + "epoch": 0.06339628172867857, + "grad_norm": 33.18776321411133, + "learning_rate": 2.5348121366101527e-07, + "loss": 0.1116, + "num_input_tokens_seen": 1761664, + "step": 2595 + }, + { + "epoch": 0.06351843256052574, + "grad_norm": 19.57836151123047, + "learning_rate": 2.5396980505203495e-07, + "loss": 0.0599, + "num_input_tokens_seen": 1764992, + "step": 2600 + }, + { + "epoch": 0.0636405833923729, + "grad_norm": 19.68585205078125, + "learning_rate": 2.544583964430547e-07, + "loss": 0.0763, + "num_input_tokens_seen": 1768512, + "step": 2605 + }, + { + "epoch": 0.06376273422422006, + "grad_norm": 60.64818572998047, + "learning_rate": 2.5494698783407437e-07, + "loss": 0.1781, + "num_input_tokens_seen": 1771776, + "step": 2610 + }, + { + "epoch": 0.06388488505606724, + "grad_norm": 51.9759635925293, + "learning_rate": 2.5543557922509405e-07, + "loss": 0.1103, + "num_input_tokens_seen": 1775488, + "step": 2615 + }, + { + "epoch": 0.0640070358879144, + "grad_norm": 61.86332321166992, + "learning_rate": 2.5592417061611373e-07, + "loss": 0.1475, + "num_input_tokens_seen": 1778880, + "step": 2620 + }, + { + "epoch": 0.06412918671976156, + "grad_norm": 45.70049285888672, + "learning_rate": 2.564127620071334e-07, + "loss": 0.1251, + "num_input_tokens_seen": 1782848, + "step": 2625 + }, + { + "epoch": 0.06425133755160872, + "grad_norm": 9.266349792480469, + "learning_rate": 2.569013533981531e-07, + "loss": 0.1095, + "num_input_tokens_seen": 1786240, + "step": 2630 + }, + { + "epoch": 0.06437348838345588, + "grad_norm": 47.50636672973633, + "learning_rate": 2.5738994478917283e-07, + "loss": 0.1151, + "num_input_tokens_seen": 1789760, + "step": 2635 + }, + { + "epoch": 0.06449563921530306, + "grad_norm": 36.173431396484375, + "learning_rate": 2.578785361801925e-07, + "loss": 0.0562, + "num_input_tokens_seen": 1793536, + "step": 2640 + }, + { + "epoch": 0.06461779004715022, + "grad_norm": 48.79774856567383, + "learning_rate": 2.583671275712122e-07, + "loss": 0.1524, + "num_input_tokens_seen": 1796928, + "step": 2645 + }, + { + "epoch": 0.06473994087899738, + "grad_norm": 75.12857055664062, + "learning_rate": 2.5885571896223187e-07, + "loss": 0.2607, + "num_input_tokens_seen": 1800384, + "step": 2650 + }, + { + "epoch": 0.06486209171084455, + "grad_norm": 75.68853759765625, + "learning_rate": 2.5934431035325155e-07, + "loss": 0.1124, + "num_input_tokens_seen": 1803648, + "step": 2655 + }, + { + "epoch": 0.06498424254269172, + "grad_norm": 42.324928283691406, + "learning_rate": 2.5983290174427123e-07, + "loss": 0.1881, + "num_input_tokens_seen": 1806784, + "step": 2660 + }, + { + "epoch": 0.06510639337453888, + "grad_norm": 92.44508361816406, + "learning_rate": 2.6032149313529097e-07, + "loss": 0.3077, + "num_input_tokens_seen": 1809728, + "step": 2665 + }, + { + "epoch": 0.06522854420638605, + "grad_norm": 103.27250671386719, + "learning_rate": 2.6081008452631065e-07, + "loss": 0.1816, + "num_input_tokens_seen": 1813056, + "step": 2670 + }, + { + "epoch": 0.06535069503823321, + "grad_norm": 15.108059883117676, + "learning_rate": 2.6129867591733033e-07, + "loss": 0.1059, + "num_input_tokens_seen": 1816448, + "step": 2675 + }, + { + "epoch": 0.06547284587008037, + "grad_norm": 14.266429901123047, + "learning_rate": 2.6178726730835e-07, + "loss": 0.08, + "num_input_tokens_seen": 1819712, + "step": 2680 + }, + { + "epoch": 0.06559499670192755, + "grad_norm": 79.78052520751953, + "learning_rate": 2.622758586993697e-07, + "loss": 0.2463, + "num_input_tokens_seen": 1822784, + "step": 2685 + }, + { + "epoch": 0.06571714753377471, + "grad_norm": 69.0220718383789, + "learning_rate": 2.627644500903894e-07, + "loss": 0.1522, + "num_input_tokens_seen": 1826048, + "step": 2690 + }, + { + "epoch": 0.06583929836562187, + "grad_norm": 2.8440940380096436, + "learning_rate": 2.632530414814091e-07, + "loss": 0.1671, + "num_input_tokens_seen": 1829056, + "step": 2695 + }, + { + "epoch": 0.06596144919746903, + "grad_norm": 19.573835372924805, + "learning_rate": 2.637416328724288e-07, + "loss": 0.1142, + "num_input_tokens_seen": 1832576, + "step": 2700 + }, + { + "epoch": 0.0660836000293162, + "grad_norm": 50.86659622192383, + "learning_rate": 2.6423022426344847e-07, + "loss": 0.2161, + "num_input_tokens_seen": 1836224, + "step": 2705 + }, + { + "epoch": 0.06620575086116337, + "grad_norm": 48.93684005737305, + "learning_rate": 2.6471881565446815e-07, + "loss": 0.0834, + "num_input_tokens_seen": 1839424, + "step": 2710 + }, + { + "epoch": 0.06632790169301053, + "grad_norm": 112.5682144165039, + "learning_rate": 2.6520740704548783e-07, + "loss": 0.1834, + "num_input_tokens_seen": 1842432, + "step": 2715 + }, + { + "epoch": 0.0664500525248577, + "grad_norm": 100.8270034790039, + "learning_rate": 2.656959984365075e-07, + "loss": 0.1845, + "num_input_tokens_seen": 1845952, + "step": 2720 + }, + { + "epoch": 0.06657220335670486, + "grad_norm": 3.2667179107666016, + "learning_rate": 2.6618458982752725e-07, + "loss": 0.1038, + "num_input_tokens_seen": 1849600, + "step": 2725 + }, + { + "epoch": 0.06669435418855202, + "grad_norm": 65.88026428222656, + "learning_rate": 2.6667318121854693e-07, + "loss": 0.139, + "num_input_tokens_seen": 1852992, + "step": 2730 + }, + { + "epoch": 0.0668165050203992, + "grad_norm": 21.890911102294922, + "learning_rate": 2.671617726095666e-07, + "loss": 0.1293, + "num_input_tokens_seen": 1855872, + "step": 2735 + }, + { + "epoch": 0.06693865585224636, + "grad_norm": 41.880271911621094, + "learning_rate": 2.676503640005863e-07, + "loss": 0.1518, + "num_input_tokens_seen": 1859200, + "step": 2740 + }, + { + "epoch": 0.06706080668409352, + "grad_norm": 22.694137573242188, + "learning_rate": 2.68138955391606e-07, + "loss": 0.1194, + "num_input_tokens_seen": 1863168, + "step": 2745 + }, + { + "epoch": 0.06718295751594068, + "grad_norm": 36.17405319213867, + "learning_rate": 2.6862754678262566e-07, + "loss": 0.173, + "num_input_tokens_seen": 1866112, + "step": 2750 + }, + { + "epoch": 0.06730510834778784, + "grad_norm": 41.965919494628906, + "learning_rate": 2.691161381736454e-07, + "loss": 0.1099, + "num_input_tokens_seen": 1869376, + "step": 2755 + }, + { + "epoch": 0.06742725917963502, + "grad_norm": 96.62606811523438, + "learning_rate": 2.6960472956466507e-07, + "loss": 0.1761, + "num_input_tokens_seen": 1873152, + "step": 2760 + }, + { + "epoch": 0.06754941001148218, + "grad_norm": 42.52037048339844, + "learning_rate": 2.7009332095568475e-07, + "loss": 0.1735, + "num_input_tokens_seen": 1876992, + "step": 2765 + }, + { + "epoch": 0.06767156084332934, + "grad_norm": 42.74496841430664, + "learning_rate": 2.705819123467045e-07, + "loss": 0.158, + "num_input_tokens_seen": 1880320, + "step": 2770 + }, + { + "epoch": 0.0677937116751765, + "grad_norm": 24.743160247802734, + "learning_rate": 2.710705037377241e-07, + "loss": 0.0991, + "num_input_tokens_seen": 1883328, + "step": 2775 + }, + { + "epoch": 0.06791586250702367, + "grad_norm": 37.85517120361328, + "learning_rate": 2.715590951287438e-07, + "loss": 0.2085, + "num_input_tokens_seen": 1886784, + "step": 2780 + }, + { + "epoch": 0.06803801333887084, + "grad_norm": 17.8824462890625, + "learning_rate": 2.7204768651976353e-07, + "loss": 0.1438, + "num_input_tokens_seen": 1890176, + "step": 2785 + }, + { + "epoch": 0.068160164170718, + "grad_norm": 13.706899642944336, + "learning_rate": 2.725362779107832e-07, + "loss": 0.0999, + "num_input_tokens_seen": 1893376, + "step": 2790 + }, + { + "epoch": 0.06828231500256517, + "grad_norm": 13.36403751373291, + "learning_rate": 2.730248693018029e-07, + "loss": 0.1893, + "num_input_tokens_seen": 1896768, + "step": 2795 + }, + { + "epoch": 0.06840446583441233, + "grad_norm": 39.36397171020508, + "learning_rate": 2.7351346069282263e-07, + "loss": 0.189, + "num_input_tokens_seen": 1899904, + "step": 2800 + }, + { + "epoch": 0.06852661666625949, + "grad_norm": 28.411413192749023, + "learning_rate": 2.7400205208384226e-07, + "loss": 0.1138, + "num_input_tokens_seen": 1903488, + "step": 2805 + }, + { + "epoch": 0.06864876749810667, + "grad_norm": 43.3648681640625, + "learning_rate": 2.7449064347486194e-07, + "loss": 0.1319, + "num_input_tokens_seen": 1907392, + "step": 2810 + }, + { + "epoch": 0.06877091832995383, + "grad_norm": 72.1719741821289, + "learning_rate": 2.7497923486588167e-07, + "loss": 0.1733, + "num_input_tokens_seen": 1911040, + "step": 2815 + }, + { + "epoch": 0.06889306916180099, + "grad_norm": 25.68684959411621, + "learning_rate": 2.7546782625690135e-07, + "loss": 0.1078, + "num_input_tokens_seen": 1914048, + "step": 2820 + }, + { + "epoch": 0.06901521999364815, + "grad_norm": 30.87330436706543, + "learning_rate": 2.7595641764792103e-07, + "loss": 0.1456, + "num_input_tokens_seen": 1917120, + "step": 2825 + }, + { + "epoch": 0.06913737082549533, + "grad_norm": 38.771766662597656, + "learning_rate": 2.7644500903894077e-07, + "loss": 0.238, + "num_input_tokens_seen": 1920448, + "step": 2830 + }, + { + "epoch": 0.06925952165734249, + "grad_norm": 7.507987976074219, + "learning_rate": 2.769336004299604e-07, + "loss": 0.2612, + "num_input_tokens_seen": 1923968, + "step": 2835 + }, + { + "epoch": 0.06938167248918965, + "grad_norm": 36.60184860229492, + "learning_rate": 2.774221918209801e-07, + "loss": 0.1823, + "num_input_tokens_seen": 1926912, + "step": 2840 + }, + { + "epoch": 0.06950382332103681, + "grad_norm": 30.51137351989746, + "learning_rate": 2.779107832119998e-07, + "loss": 0.0955, + "num_input_tokens_seen": 1930304, + "step": 2845 + }, + { + "epoch": 0.06962597415288398, + "grad_norm": 25.373884201049805, + "learning_rate": 2.783993746030195e-07, + "loss": 0.0827, + "num_input_tokens_seen": 1933696, + "step": 2850 + }, + { + "epoch": 0.06974812498473115, + "grad_norm": 45.243255615234375, + "learning_rate": 2.788879659940392e-07, + "loss": 0.1063, + "num_input_tokens_seen": 1937472, + "step": 2855 + }, + { + "epoch": 0.06987027581657831, + "grad_norm": 87.1949691772461, + "learning_rate": 2.793765573850589e-07, + "loss": 0.1743, + "num_input_tokens_seen": 1940992, + "step": 2860 + }, + { + "epoch": 0.06999242664842548, + "grad_norm": 17.872364044189453, + "learning_rate": 2.7986514877607854e-07, + "loss": 0.1996, + "num_input_tokens_seen": 1944704, + "step": 2865 + }, + { + "epoch": 0.07011457748027264, + "grad_norm": 33.64302444458008, + "learning_rate": 2.803537401670982e-07, + "loss": 0.2116, + "num_input_tokens_seen": 1947840, + "step": 2870 + }, + { + "epoch": 0.0702367283121198, + "grad_norm": 8.390338897705078, + "learning_rate": 2.8084233155811795e-07, + "loss": 0.1368, + "num_input_tokens_seen": 1951168, + "step": 2875 + }, + { + "epoch": 0.07035887914396698, + "grad_norm": 12.758993148803711, + "learning_rate": 2.8133092294913764e-07, + "loss": 0.087, + "num_input_tokens_seen": 1954624, + "step": 2880 + }, + { + "epoch": 0.07048102997581414, + "grad_norm": 52.06711959838867, + "learning_rate": 2.818195143401573e-07, + "loss": 0.1982, + "num_input_tokens_seen": 1958016, + "step": 2885 + }, + { + "epoch": 0.0706031808076613, + "grad_norm": 28.96224594116211, + "learning_rate": 2.8230810573117705e-07, + "loss": 0.2, + "num_input_tokens_seen": 1961152, + "step": 2890 + }, + { + "epoch": 0.07072533163950846, + "grad_norm": 34.215213775634766, + "learning_rate": 2.827966971221967e-07, + "loss": 0.1631, + "num_input_tokens_seen": 1964224, + "step": 2895 + }, + { + "epoch": 0.07084748247135562, + "grad_norm": 64.00146484375, + "learning_rate": 2.8328528851321636e-07, + "loss": 0.1541, + "num_input_tokens_seen": 1967104, + "step": 2900 + }, + { + "epoch": 0.0709696333032028, + "grad_norm": 12.239304542541504, + "learning_rate": 2.837738799042361e-07, + "loss": 0.1006, + "num_input_tokens_seen": 1970944, + "step": 2905 + }, + { + "epoch": 0.07109178413504996, + "grad_norm": 49.136531829833984, + "learning_rate": 2.842624712952558e-07, + "loss": 0.1888, + "num_input_tokens_seen": 1974016, + "step": 2910 + }, + { + "epoch": 0.07121393496689712, + "grad_norm": 33.941627502441406, + "learning_rate": 2.8475106268627546e-07, + "loss": 0.0826, + "num_input_tokens_seen": 1977664, + "step": 2915 + }, + { + "epoch": 0.07133608579874429, + "grad_norm": 100.75859069824219, + "learning_rate": 2.852396540772952e-07, + "loss": 0.2103, + "num_input_tokens_seen": 1980928, + "step": 2920 + }, + { + "epoch": 0.07145823663059145, + "grad_norm": 53.549861907958984, + "learning_rate": 2.857282454683148e-07, + "loss": 0.1023, + "num_input_tokens_seen": 1984256, + "step": 2925 + }, + { + "epoch": 0.07158038746243862, + "grad_norm": 23.475570678710938, + "learning_rate": 2.862168368593345e-07, + "loss": 0.0925, + "num_input_tokens_seen": 1987904, + "step": 2930 + }, + { + "epoch": 0.07170253829428579, + "grad_norm": 9.245582580566406, + "learning_rate": 2.867054282503542e-07, + "loss": 0.0685, + "num_input_tokens_seen": 1991104, + "step": 2935 + }, + { + "epoch": 0.07182468912613295, + "grad_norm": 38.58662033081055, + "learning_rate": 2.871940196413739e-07, + "loss": 0.1111, + "num_input_tokens_seen": 1994624, + "step": 2940 + }, + { + "epoch": 0.07194683995798011, + "grad_norm": 9.25555419921875, + "learning_rate": 2.876826110323936e-07, + "loss": 0.1573, + "num_input_tokens_seen": 1997696, + "step": 2945 + }, + { + "epoch": 0.07206899078982727, + "grad_norm": 62.213035583496094, + "learning_rate": 2.8817120242341333e-07, + "loss": 0.125, + "num_input_tokens_seen": 2000960, + "step": 2950 + }, + { + "epoch": 0.07219114162167445, + "grad_norm": 47.498817443847656, + "learning_rate": 2.8865979381443296e-07, + "loss": 0.2306, + "num_input_tokens_seen": 2004224, + "step": 2955 + }, + { + "epoch": 0.07231329245352161, + "grad_norm": 34.238121032714844, + "learning_rate": 2.8914838520545264e-07, + "loss": 0.2147, + "num_input_tokens_seen": 2007680, + "step": 2960 + }, + { + "epoch": 0.07243544328536877, + "grad_norm": 53.50193786621094, + "learning_rate": 2.896369765964723e-07, + "loss": 0.249, + "num_input_tokens_seen": 2011008, + "step": 2965 + }, + { + "epoch": 0.07255759411721593, + "grad_norm": 27.394147872924805, + "learning_rate": 2.9012556798749206e-07, + "loss": 0.183, + "num_input_tokens_seen": 2014464, + "step": 2970 + }, + { + "epoch": 0.0726797449490631, + "grad_norm": 35.457420349121094, + "learning_rate": 2.9061415937851174e-07, + "loss": 0.1893, + "num_input_tokens_seen": 2018240, + "step": 2975 + }, + { + "epoch": 0.07280189578091027, + "grad_norm": 34.14503860473633, + "learning_rate": 2.911027507695314e-07, + "loss": 0.1779, + "num_input_tokens_seen": 2021440, + "step": 2980 + }, + { + "epoch": 0.07292404661275743, + "grad_norm": 82.81512451171875, + "learning_rate": 2.915913421605511e-07, + "loss": 0.2133, + "num_input_tokens_seen": 2024832, + "step": 2985 + }, + { + "epoch": 0.0730461974446046, + "grad_norm": 16.721773147583008, + "learning_rate": 2.920799335515708e-07, + "loss": 0.1293, + "num_input_tokens_seen": 2028480, + "step": 2990 + }, + { + "epoch": 0.07316834827645176, + "grad_norm": 32.263912200927734, + "learning_rate": 2.9256852494259046e-07, + "loss": 0.1734, + "num_input_tokens_seen": 2031808, + "step": 2995 + }, + { + "epoch": 0.07329049910829893, + "grad_norm": 23.13930892944336, + "learning_rate": 2.930571163336102e-07, + "loss": 0.1972, + "num_input_tokens_seen": 2034496, + "step": 3000 + }, + { + "epoch": 0.0734126499401461, + "grad_norm": 34.838260650634766, + "learning_rate": 2.935457077246299e-07, + "loss": 0.1524, + "num_input_tokens_seen": 2037504, + "step": 3005 + }, + { + "epoch": 0.07353480077199326, + "grad_norm": 56.00746154785156, + "learning_rate": 2.9403429911564956e-07, + "loss": 0.1649, + "num_input_tokens_seen": 2040512, + "step": 3010 + }, + { + "epoch": 0.07365695160384042, + "grad_norm": 29.56755256652832, + "learning_rate": 2.945228905066693e-07, + "loss": 0.1462, + "num_input_tokens_seen": 2043712, + "step": 3015 + }, + { + "epoch": 0.07377910243568758, + "grad_norm": 28.033449172973633, + "learning_rate": 2.950114818976889e-07, + "loss": 0.1338, + "num_input_tokens_seen": 2047104, + "step": 3020 + }, + { + "epoch": 0.07390125326753476, + "grad_norm": 20.67574119567871, + "learning_rate": 2.955000732887086e-07, + "loss": 0.1718, + "num_input_tokens_seen": 2050944, + "step": 3025 + }, + { + "epoch": 0.07402340409938192, + "grad_norm": 30.444156646728516, + "learning_rate": 2.9598866467972834e-07, + "loss": 0.0823, + "num_input_tokens_seen": 2054016, + "step": 3030 + }, + { + "epoch": 0.07414555493122908, + "grad_norm": 9.620954513549805, + "learning_rate": 2.96477256070748e-07, + "loss": 0.091, + "num_input_tokens_seen": 2056896, + "step": 3035 + }, + { + "epoch": 0.07426770576307624, + "grad_norm": 25.14250373840332, + "learning_rate": 2.969658474617677e-07, + "loss": 0.1407, + "num_input_tokens_seen": 2060224, + "step": 3040 + }, + { + "epoch": 0.0743898565949234, + "grad_norm": 69.50040435791016, + "learning_rate": 2.9745443885278744e-07, + "loss": 0.1161, + "num_input_tokens_seen": 2063808, + "step": 3045 + }, + { + "epoch": 0.07451200742677058, + "grad_norm": 58.84195327758789, + "learning_rate": 2.9794303024380706e-07, + "loss": 0.2451, + "num_input_tokens_seen": 2067072, + "step": 3050 + }, + { + "epoch": 0.07463415825861774, + "grad_norm": 15.835522651672363, + "learning_rate": 2.9843162163482675e-07, + "loss": 0.1461, + "num_input_tokens_seen": 2070720, + "step": 3055 + }, + { + "epoch": 0.0747563090904649, + "grad_norm": 52.69871139526367, + "learning_rate": 2.989202130258465e-07, + "loss": 0.0868, + "num_input_tokens_seen": 2074176, + "step": 3060 + }, + { + "epoch": 0.07487845992231207, + "grad_norm": 75.31199645996094, + "learning_rate": 2.9940880441686616e-07, + "loss": 0.2104, + "num_input_tokens_seen": 2077376, + "step": 3065 + }, + { + "epoch": 0.07500061075415923, + "grad_norm": 25.50840950012207, + "learning_rate": 2.9989739580788584e-07, + "loss": 0.1225, + "num_input_tokens_seen": 2080896, + "step": 3070 + }, + { + "epoch": 0.0751227615860064, + "grad_norm": 38.57832717895508, + "learning_rate": 3.003859871989056e-07, + "loss": 0.167, + "num_input_tokens_seen": 2084096, + "step": 3075 + }, + { + "epoch": 0.07524491241785357, + "grad_norm": 53.26701736450195, + "learning_rate": 3.008745785899252e-07, + "loss": 0.2051, + "num_input_tokens_seen": 2087296, + "step": 3080 + }, + { + "epoch": 0.07536706324970073, + "grad_norm": 6.57878303527832, + "learning_rate": 3.013631699809449e-07, + "loss": 0.1622, + "num_input_tokens_seen": 2090624, + "step": 3085 + }, + { + "epoch": 0.07548921408154789, + "grad_norm": 44.716312408447266, + "learning_rate": 3.018517613719646e-07, + "loss": 0.0648, + "num_input_tokens_seen": 2094848, + "step": 3090 + }, + { + "epoch": 0.07561136491339505, + "grad_norm": 29.319324493408203, + "learning_rate": 3.023403527629843e-07, + "loss": 0.1455, + "num_input_tokens_seen": 2098048, + "step": 3095 + }, + { + "epoch": 0.07573351574524223, + "grad_norm": 15.153791427612305, + "learning_rate": 3.02828944154004e-07, + "loss": 0.076, + "num_input_tokens_seen": 2101888, + "step": 3100 + }, + { + "epoch": 0.07585566657708939, + "grad_norm": 30.897048950195312, + "learning_rate": 3.033175355450237e-07, + "loss": 0.2214, + "num_input_tokens_seen": 2104960, + "step": 3105 + }, + { + "epoch": 0.07597781740893655, + "grad_norm": 23.50116539001465, + "learning_rate": 3.0380612693604335e-07, + "loss": 0.1427, + "num_input_tokens_seen": 2107968, + "step": 3110 + }, + { + "epoch": 0.07609996824078372, + "grad_norm": 33.305538177490234, + "learning_rate": 3.0429471832706303e-07, + "loss": 0.1003, + "num_input_tokens_seen": 2111168, + "step": 3115 + }, + { + "epoch": 0.07622211907263088, + "grad_norm": 31.173358917236328, + "learning_rate": 3.0478330971808276e-07, + "loss": 0.1005, + "num_input_tokens_seen": 2114496, + "step": 3120 + }, + { + "epoch": 0.07634426990447805, + "grad_norm": 4.587650775909424, + "learning_rate": 3.0527190110910244e-07, + "loss": 0.0685, + "num_input_tokens_seen": 2117952, + "step": 3125 + }, + { + "epoch": 0.07646642073632522, + "grad_norm": 59.983375549316406, + "learning_rate": 3.057604925001221e-07, + "loss": 0.2478, + "num_input_tokens_seen": 2121088, + "step": 3130 + }, + { + "epoch": 0.07658857156817238, + "grad_norm": 26.86677360534668, + "learning_rate": 3.0624908389114186e-07, + "loss": 0.1306, + "num_input_tokens_seen": 2124224, + "step": 3135 + }, + { + "epoch": 0.07671072240001954, + "grad_norm": 34.66873550415039, + "learning_rate": 3.067376752821615e-07, + "loss": 0.1123, + "num_input_tokens_seen": 2127296, + "step": 3140 + }, + { + "epoch": 0.0768328732318667, + "grad_norm": 18.21170425415039, + "learning_rate": 3.0722626667318117e-07, + "loss": 0.1153, + "num_input_tokens_seen": 2131008, + "step": 3145 + }, + { + "epoch": 0.07695502406371388, + "grad_norm": 94.64302062988281, + "learning_rate": 3.077148580642009e-07, + "loss": 0.2439, + "num_input_tokens_seen": 2134016, + "step": 3150 + }, + { + "epoch": 0.07707717489556104, + "grad_norm": 46.07650375366211, + "learning_rate": 3.082034494552206e-07, + "loss": 0.1683, + "num_input_tokens_seen": 2138048, + "step": 3155 + }, + { + "epoch": 0.0771993257274082, + "grad_norm": 32.91551208496094, + "learning_rate": 3.0869204084624027e-07, + "loss": 0.099, + "num_input_tokens_seen": 2141504, + "step": 3160 + }, + { + "epoch": 0.07732147655925536, + "grad_norm": 55.034549713134766, + "learning_rate": 3.0918063223726e-07, + "loss": 0.1558, + "num_input_tokens_seen": 2144576, + "step": 3165 + }, + { + "epoch": 0.07744362739110254, + "grad_norm": 20.479995727539062, + "learning_rate": 3.0966922362827963e-07, + "loss": 0.1402, + "num_input_tokens_seen": 2147904, + "step": 3170 + }, + { + "epoch": 0.0775657782229497, + "grad_norm": 20.71714210510254, + "learning_rate": 3.101578150192993e-07, + "loss": 0.1425, + "num_input_tokens_seen": 2151360, + "step": 3175 + }, + { + "epoch": 0.07768792905479686, + "grad_norm": 21.133060455322266, + "learning_rate": 3.1064640641031904e-07, + "loss": 0.1607, + "num_input_tokens_seen": 2154816, + "step": 3180 + }, + { + "epoch": 0.07781007988664403, + "grad_norm": 18.6674747467041, + "learning_rate": 3.111349978013387e-07, + "loss": 0.1372, + "num_input_tokens_seen": 2157952, + "step": 3185 + }, + { + "epoch": 0.07793223071849119, + "grad_norm": 49.647891998291016, + "learning_rate": 3.116235891923584e-07, + "loss": 0.139, + "num_input_tokens_seen": 2161216, + "step": 3190 + }, + { + "epoch": 0.07805438155033836, + "grad_norm": 25.552492141723633, + "learning_rate": 3.1211218058337814e-07, + "loss": 0.1316, + "num_input_tokens_seen": 2164416, + "step": 3195 + }, + { + "epoch": 0.07817653238218553, + "grad_norm": 60.401676177978516, + "learning_rate": 3.1260077197439777e-07, + "loss": 0.1019, + "num_input_tokens_seen": 2167680, + "step": 3200 + }, + { + "epoch": 0.07829868321403269, + "grad_norm": 83.02261352539062, + "learning_rate": 3.1308936336541745e-07, + "loss": 0.177, + "num_input_tokens_seen": 2170688, + "step": 3205 + }, + { + "epoch": 0.07842083404587985, + "grad_norm": 31.337995529174805, + "learning_rate": 3.135779547564372e-07, + "loss": 0.0725, + "num_input_tokens_seen": 2173696, + "step": 3210 + }, + { + "epoch": 0.07854298487772701, + "grad_norm": 27.961589813232422, + "learning_rate": 3.1406654614745687e-07, + "loss": 0.1236, + "num_input_tokens_seen": 2177344, + "step": 3215 + }, + { + "epoch": 0.07866513570957419, + "grad_norm": 31.159038543701172, + "learning_rate": 3.1455513753847655e-07, + "loss": 0.13, + "num_input_tokens_seen": 2180672, + "step": 3220 + }, + { + "epoch": 0.07878728654142135, + "grad_norm": 57.42909622192383, + "learning_rate": 3.150437289294963e-07, + "loss": 0.1509, + "num_input_tokens_seen": 2183808, + "step": 3225 + }, + { + "epoch": 0.07890943737326851, + "grad_norm": 51.559627532958984, + "learning_rate": 3.1553232032051596e-07, + "loss": 0.1114, + "num_input_tokens_seen": 2187584, + "step": 3230 + }, + { + "epoch": 0.07903158820511567, + "grad_norm": 55.43442916870117, + "learning_rate": 3.160209117115356e-07, + "loss": 0.1227, + "num_input_tokens_seen": 2190784, + "step": 3235 + }, + { + "epoch": 0.07915373903696284, + "grad_norm": 42.4616813659668, + "learning_rate": 3.165095031025553e-07, + "loss": 0.1845, + "num_input_tokens_seen": 2194112, + "step": 3240 + }, + { + "epoch": 0.07927588986881001, + "grad_norm": 55.628013610839844, + "learning_rate": 3.16998094493575e-07, + "loss": 0.1198, + "num_input_tokens_seen": 2197504, + "step": 3245 + }, + { + "epoch": 0.07939804070065717, + "grad_norm": 34.316688537597656, + "learning_rate": 3.174866858845947e-07, + "loss": 0.1323, + "num_input_tokens_seen": 2200512, + "step": 3250 + }, + { + "epoch": 0.07952019153250434, + "grad_norm": 164.48052978515625, + "learning_rate": 3.179752772756144e-07, + "loss": 0.1792, + "num_input_tokens_seen": 2204224, + "step": 3255 + }, + { + "epoch": 0.0796423423643515, + "grad_norm": 91.04350280761719, + "learning_rate": 3.184638686666341e-07, + "loss": 0.1659, + "num_input_tokens_seen": 2207488, + "step": 3260 + }, + { + "epoch": 0.07976449319619866, + "grad_norm": 8.240289688110352, + "learning_rate": 3.1895246005765373e-07, + "loss": 0.1968, + "num_input_tokens_seen": 2210496, + "step": 3265 + }, + { + "epoch": 0.07988664402804584, + "grad_norm": 21.83321762084961, + "learning_rate": 3.1944105144867347e-07, + "loss": 0.0935, + "num_input_tokens_seen": 2213824, + "step": 3270 + }, + { + "epoch": 0.080008794859893, + "grad_norm": 38.04369354248047, + "learning_rate": 3.1992964283969315e-07, + "loss": 0.1406, + "num_input_tokens_seen": 2217600, + "step": 3275 + }, + { + "epoch": 0.08013094569174016, + "grad_norm": 54.49981689453125, + "learning_rate": 3.2041823423071283e-07, + "loss": 0.1532, + "num_input_tokens_seen": 2221056, + "step": 3280 + }, + { + "epoch": 0.08025309652358732, + "grad_norm": 38.160335540771484, + "learning_rate": 3.2090682562173256e-07, + "loss": 0.094, + "num_input_tokens_seen": 2224448, + "step": 3285 + }, + { + "epoch": 0.08037524735543448, + "grad_norm": 33.39901351928711, + "learning_rate": 3.2139541701275224e-07, + "loss": 0.1089, + "num_input_tokens_seen": 2228160, + "step": 3290 + }, + { + "epoch": 0.08049739818728166, + "grad_norm": 32.578369140625, + "learning_rate": 3.2188400840377187e-07, + "loss": 0.127, + "num_input_tokens_seen": 2231488, + "step": 3295 + }, + { + "epoch": 0.08061954901912882, + "grad_norm": 57.24115753173828, + "learning_rate": 3.223725997947916e-07, + "loss": 0.0842, + "num_input_tokens_seen": 2235008, + "step": 3300 + }, + { + "epoch": 0.08074169985097598, + "grad_norm": 71.96141052246094, + "learning_rate": 3.228611911858113e-07, + "loss": 0.2433, + "num_input_tokens_seen": 2238016, + "step": 3305 + }, + { + "epoch": 0.08086385068282315, + "grad_norm": 5.054819583892822, + "learning_rate": 3.2334978257683097e-07, + "loss": 0.1516, + "num_input_tokens_seen": 2241664, + "step": 3310 + }, + { + "epoch": 0.08098600151467031, + "grad_norm": 73.97016906738281, + "learning_rate": 3.238383739678507e-07, + "loss": 0.2021, + "num_input_tokens_seen": 2244864, + "step": 3315 + }, + { + "epoch": 0.08110815234651748, + "grad_norm": 13.356393814086914, + "learning_rate": 3.243269653588704e-07, + "loss": 0.0696, + "num_input_tokens_seen": 2248320, + "step": 3320 + }, + { + "epoch": 0.08123030317836465, + "grad_norm": 36.295074462890625, + "learning_rate": 3.2481555674989e-07, + "loss": 0.088, + "num_input_tokens_seen": 2251712, + "step": 3325 + }, + { + "epoch": 0.08135245401021181, + "grad_norm": 89.90282440185547, + "learning_rate": 3.2530414814090975e-07, + "loss": 0.1939, + "num_input_tokens_seen": 2254976, + "step": 3330 + }, + { + "epoch": 0.08147460484205897, + "grad_norm": 22.770984649658203, + "learning_rate": 3.2579273953192943e-07, + "loss": 0.2149, + "num_input_tokens_seen": 2258304, + "step": 3335 + }, + { + "epoch": 0.08159675567390615, + "grad_norm": 9.757627487182617, + "learning_rate": 3.262813309229491e-07, + "loss": 0.1501, + "num_input_tokens_seen": 2261632, + "step": 3340 + }, + { + "epoch": 0.08171890650575331, + "grad_norm": 39.16585922241211, + "learning_rate": 3.2676992231396884e-07, + "loss": 0.1428, + "num_input_tokens_seen": 2264576, + "step": 3345 + }, + { + "epoch": 0.08184105733760047, + "grad_norm": 54.00513458251953, + "learning_rate": 3.272585137049885e-07, + "loss": 0.2068, + "num_input_tokens_seen": 2267520, + "step": 3350 + }, + { + "epoch": 0.08196320816944763, + "grad_norm": 17.601160049438477, + "learning_rate": 3.2774710509600815e-07, + "loss": 0.0769, + "num_input_tokens_seen": 2271552, + "step": 3355 + }, + { + "epoch": 0.0820853590012948, + "grad_norm": 43.5909538269043, + "learning_rate": 3.282356964870279e-07, + "loss": 0.0671, + "num_input_tokens_seen": 2275072, + "step": 3360 + }, + { + "epoch": 0.08220750983314197, + "grad_norm": 36.28582763671875, + "learning_rate": 3.2872428787804757e-07, + "loss": 0.1913, + "num_input_tokens_seen": 2278912, + "step": 3365 + }, + { + "epoch": 0.08232966066498913, + "grad_norm": 48.7681999206543, + "learning_rate": 3.2921287926906725e-07, + "loss": 0.1205, + "num_input_tokens_seen": 2282624, + "step": 3370 + }, + { + "epoch": 0.0824518114968363, + "grad_norm": 6.605459690093994, + "learning_rate": 3.29701470660087e-07, + "loss": 0.192, + "num_input_tokens_seen": 2286336, + "step": 3375 + }, + { + "epoch": 0.08257396232868346, + "grad_norm": 38.756752014160156, + "learning_rate": 3.3019006205110667e-07, + "loss": 0.1847, + "num_input_tokens_seen": 2289792, + "step": 3380 + }, + { + "epoch": 0.08269611316053062, + "grad_norm": 37.65235900878906, + "learning_rate": 3.306786534421263e-07, + "loss": 0.1688, + "num_input_tokens_seen": 2293248, + "step": 3385 + }, + { + "epoch": 0.0828182639923778, + "grad_norm": 46.75678253173828, + "learning_rate": 3.3116724483314603e-07, + "loss": 0.1734, + "num_input_tokens_seen": 2296512, + "step": 3390 + }, + { + "epoch": 0.08294041482422496, + "grad_norm": 21.31465721130371, + "learning_rate": 3.316558362241657e-07, + "loss": 0.08, + "num_input_tokens_seen": 2299904, + "step": 3395 + }, + { + "epoch": 0.08306256565607212, + "grad_norm": 45.592308044433594, + "learning_rate": 3.321444276151854e-07, + "loss": 0.0663, + "num_input_tokens_seen": 2303296, + "step": 3400 + }, + { + "epoch": 0.08318471648791928, + "grad_norm": 42.98056411743164, + "learning_rate": 3.326330190062051e-07, + "loss": 0.1287, + "num_input_tokens_seen": 2307072, + "step": 3405 + }, + { + "epoch": 0.08330686731976644, + "grad_norm": 50.305381774902344, + "learning_rate": 3.331216103972248e-07, + "loss": 0.1319, + "num_input_tokens_seen": 2310144, + "step": 3410 + }, + { + "epoch": 0.08342901815161362, + "grad_norm": 91.99374389648438, + "learning_rate": 3.3361020178824444e-07, + "loss": 0.1975, + "num_input_tokens_seen": 2313472, + "step": 3415 + }, + { + "epoch": 0.08355116898346078, + "grad_norm": 27.918872833251953, + "learning_rate": 3.3409879317926417e-07, + "loss": 0.077, + "num_input_tokens_seen": 2316672, + "step": 3420 + }, + { + "epoch": 0.08367331981530794, + "grad_norm": 24.6630859375, + "learning_rate": 3.3458738457028385e-07, + "loss": 0.0846, + "num_input_tokens_seen": 2319872, + "step": 3425 + }, + { + "epoch": 0.0837954706471551, + "grad_norm": 12.345520973205566, + "learning_rate": 3.3507597596130353e-07, + "loss": 0.0846, + "num_input_tokens_seen": 2323328, + "step": 3430 + }, + { + "epoch": 0.08391762147900227, + "grad_norm": 29.239362716674805, + "learning_rate": 3.3556456735232327e-07, + "loss": 0.1385, + "num_input_tokens_seen": 2326208, + "step": 3435 + }, + { + "epoch": 0.08403977231084944, + "grad_norm": 18.62710189819336, + "learning_rate": 3.3605315874334295e-07, + "loss": 0.0969, + "num_input_tokens_seen": 2329536, + "step": 3440 + }, + { + "epoch": 0.0841619231426966, + "grad_norm": 87.40646362304688, + "learning_rate": 3.365417501343626e-07, + "loss": 0.1586, + "num_input_tokens_seen": 2333440, + "step": 3445 + }, + { + "epoch": 0.08428407397454377, + "grad_norm": 77.6432113647461, + "learning_rate": 3.370303415253823e-07, + "loss": 0.1598, + "num_input_tokens_seen": 2336640, + "step": 3450 + }, + { + "epoch": 0.08440622480639093, + "grad_norm": 41.03227615356445, + "learning_rate": 3.37518932916402e-07, + "loss": 0.1757, + "num_input_tokens_seen": 2339904, + "step": 3455 + }, + { + "epoch": 0.08452837563823809, + "grad_norm": 23.494503021240234, + "learning_rate": 3.3800752430742167e-07, + "loss": 0.0961, + "num_input_tokens_seen": 2343808, + "step": 3460 + }, + { + "epoch": 0.08465052647008527, + "grad_norm": 11.61443042755127, + "learning_rate": 3.384961156984414e-07, + "loss": 0.2321, + "num_input_tokens_seen": 2347264, + "step": 3465 + }, + { + "epoch": 0.08477267730193243, + "grad_norm": 26.035037994384766, + "learning_rate": 3.389847070894611e-07, + "loss": 0.187, + "num_input_tokens_seen": 2350464, + "step": 3470 + }, + { + "epoch": 0.08489482813377959, + "grad_norm": 52.27019500732422, + "learning_rate": 3.3947329848048077e-07, + "loss": 0.2035, + "num_input_tokens_seen": 2353536, + "step": 3475 + }, + { + "epoch": 0.08501697896562675, + "grad_norm": 22.691511154174805, + "learning_rate": 3.3996188987150045e-07, + "loss": 0.1203, + "num_input_tokens_seen": 2356864, + "step": 3480 + }, + { + "epoch": 0.08513912979747391, + "grad_norm": 37.64372253417969, + "learning_rate": 3.4045048126252013e-07, + "loss": 0.0895, + "num_input_tokens_seen": 2360192, + "step": 3485 + }, + { + "epoch": 0.08526128062932109, + "grad_norm": 36.15673065185547, + "learning_rate": 3.409390726535398e-07, + "loss": 0.0905, + "num_input_tokens_seen": 2363520, + "step": 3490 + }, + { + "epoch": 0.08538343146116825, + "grad_norm": 42.125545501708984, + "learning_rate": 3.4142766404455955e-07, + "loss": 0.0922, + "num_input_tokens_seen": 2366656, + "step": 3495 + }, + { + "epoch": 0.08550558229301541, + "grad_norm": 49.36576843261719, + "learning_rate": 3.4191625543557923e-07, + "loss": 0.1001, + "num_input_tokens_seen": 2369920, + "step": 3500 + }, + { + "epoch": 0.08562773312486258, + "grad_norm": 7.662629127502441, + "learning_rate": 3.424048468265989e-07, + "loss": 0.1379, + "num_input_tokens_seen": 2373312, + "step": 3505 + }, + { + "epoch": 0.08574988395670975, + "grad_norm": 79.47669982910156, + "learning_rate": 3.428934382176186e-07, + "loss": 0.1846, + "num_input_tokens_seen": 2376448, + "step": 3510 + }, + { + "epoch": 0.08587203478855691, + "grad_norm": 57.955997467041016, + "learning_rate": 3.433820296086383e-07, + "loss": 0.1408, + "num_input_tokens_seen": 2380160, + "step": 3515 + }, + { + "epoch": 0.08599418562040408, + "grad_norm": 23.892112731933594, + "learning_rate": 3.4387062099965796e-07, + "loss": 0.156, + "num_input_tokens_seen": 2383616, + "step": 3520 + }, + { + "epoch": 0.08611633645225124, + "grad_norm": 4.1693830490112305, + "learning_rate": 3.443592123906777e-07, + "loss": 0.0705, + "num_input_tokens_seen": 2386944, + "step": 3525 + }, + { + "epoch": 0.0862384872840984, + "grad_norm": 38.16275405883789, + "learning_rate": 3.4484780378169737e-07, + "loss": 0.0863, + "num_input_tokens_seen": 2390336, + "step": 3530 + }, + { + "epoch": 0.08636063811594558, + "grad_norm": 35.907779693603516, + "learning_rate": 3.4533639517271705e-07, + "loss": 0.1225, + "num_input_tokens_seen": 2393600, + "step": 3535 + }, + { + "epoch": 0.08648278894779274, + "grad_norm": 37.99102783203125, + "learning_rate": 3.4582498656373673e-07, + "loss": 0.1515, + "num_input_tokens_seen": 2396608, + "step": 3540 + }, + { + "epoch": 0.0866049397796399, + "grad_norm": 53.43939208984375, + "learning_rate": 3.463135779547564e-07, + "loss": 0.1262, + "num_input_tokens_seen": 2399680, + "step": 3545 + }, + { + "epoch": 0.08672709061148706, + "grad_norm": 21.493928909301758, + "learning_rate": 3.468021693457761e-07, + "loss": 0.0761, + "num_input_tokens_seen": 2402432, + "step": 3550 + }, + { + "epoch": 0.08684924144333422, + "grad_norm": 21.230327606201172, + "learning_rate": 3.4729076073679583e-07, + "loss": 0.2028, + "num_input_tokens_seen": 2405504, + "step": 3555 + }, + { + "epoch": 0.0869713922751814, + "grad_norm": 55.68876647949219, + "learning_rate": 3.477793521278155e-07, + "loss": 0.1644, + "num_input_tokens_seen": 2409024, + "step": 3560 + }, + { + "epoch": 0.08709354310702856, + "grad_norm": 90.71858215332031, + "learning_rate": 3.482679435188352e-07, + "loss": 0.1153, + "num_input_tokens_seen": 2412416, + "step": 3565 + }, + { + "epoch": 0.08721569393887572, + "grad_norm": 106.14388275146484, + "learning_rate": 3.487565349098549e-07, + "loss": 0.1801, + "num_input_tokens_seen": 2416192, + "step": 3570 + }, + { + "epoch": 0.08733784477072289, + "grad_norm": 27.10405158996582, + "learning_rate": 3.4924512630087456e-07, + "loss": 0.1723, + "num_input_tokens_seen": 2419840, + "step": 3575 + }, + { + "epoch": 0.08745999560257005, + "grad_norm": 26.839338302612305, + "learning_rate": 3.4973371769189424e-07, + "loss": 0.0996, + "num_input_tokens_seen": 2422976, + "step": 3580 + }, + { + "epoch": 0.08758214643441722, + "grad_norm": 64.43603515625, + "learning_rate": 3.5022230908291397e-07, + "loss": 0.1036, + "num_input_tokens_seen": 2426496, + "step": 3585 + }, + { + "epoch": 0.08770429726626439, + "grad_norm": 39.594139099121094, + "learning_rate": 3.5071090047393365e-07, + "loss": 0.0971, + "num_input_tokens_seen": 2429376, + "step": 3590 + }, + { + "epoch": 0.08782644809811155, + "grad_norm": 41.60728073120117, + "learning_rate": 3.5119949186495333e-07, + "loss": 0.1917, + "num_input_tokens_seen": 2432576, + "step": 3595 + }, + { + "epoch": 0.08794859892995871, + "grad_norm": 47.88832473754883, + "learning_rate": 3.51688083255973e-07, + "loss": 0.1268, + "num_input_tokens_seen": 2436096, + "step": 3600 + }, + { + "epoch": 0.08807074976180587, + "grad_norm": 66.10047149658203, + "learning_rate": 3.521766746469927e-07, + "loss": 0.1253, + "num_input_tokens_seen": 2440064, + "step": 3605 + }, + { + "epoch": 0.08819290059365305, + "grad_norm": 36.456153869628906, + "learning_rate": 3.526652660380124e-07, + "loss": 0.1452, + "num_input_tokens_seen": 2443200, + "step": 3610 + }, + { + "epoch": 0.08831505142550021, + "grad_norm": 14.999835968017578, + "learning_rate": 3.531538574290321e-07, + "loss": 0.2102, + "num_input_tokens_seen": 2446848, + "step": 3615 + }, + { + "epoch": 0.08843720225734737, + "grad_norm": 33.82028579711914, + "learning_rate": 3.536424488200518e-07, + "loss": 0.131, + "num_input_tokens_seen": 2450368, + "step": 3620 + }, + { + "epoch": 0.08855935308919453, + "grad_norm": 27.221302032470703, + "learning_rate": 3.541310402110715e-07, + "loss": 0.109, + "num_input_tokens_seen": 2454016, + "step": 3625 + }, + { + "epoch": 0.0886815039210417, + "grad_norm": 57.56504440307617, + "learning_rate": 3.5461963160209116e-07, + "loss": 0.1683, + "num_input_tokens_seen": 2457472, + "step": 3630 + }, + { + "epoch": 0.08880365475288887, + "grad_norm": 26.43320083618164, + "learning_rate": 3.5510822299311084e-07, + "loss": 0.0988, + "num_input_tokens_seen": 2461312, + "step": 3635 + }, + { + "epoch": 0.08892580558473603, + "grad_norm": 23.146326065063477, + "learning_rate": 3.555968143841305e-07, + "loss": 0.1124, + "num_input_tokens_seen": 2464512, + "step": 3640 + }, + { + "epoch": 0.0890479564165832, + "grad_norm": 42.45225524902344, + "learning_rate": 3.5608540577515025e-07, + "loss": 0.1564, + "num_input_tokens_seen": 2468288, + "step": 3645 + }, + { + "epoch": 0.08917010724843036, + "grad_norm": 38.197105407714844, + "learning_rate": 3.5657399716616993e-07, + "loss": 0.1885, + "num_input_tokens_seen": 2471424, + "step": 3650 + }, + { + "epoch": 0.08929225808027752, + "grad_norm": 13.764596939086914, + "learning_rate": 3.570625885571896e-07, + "loss": 0.0834, + "num_input_tokens_seen": 2475008, + "step": 3655 + }, + { + "epoch": 0.0894144089121247, + "grad_norm": 43.19955825805664, + "learning_rate": 3.5755117994820924e-07, + "loss": 0.1104, + "num_input_tokens_seen": 2478080, + "step": 3660 + }, + { + "epoch": 0.08953655974397186, + "grad_norm": 17.45258140563965, + "learning_rate": 3.58039771339229e-07, + "loss": 0.136, + "num_input_tokens_seen": 2481472, + "step": 3665 + }, + { + "epoch": 0.08965871057581902, + "grad_norm": 36.92076110839844, + "learning_rate": 3.5852836273024866e-07, + "loss": 0.2465, + "num_input_tokens_seen": 2484736, + "step": 3670 + }, + { + "epoch": 0.08978086140766618, + "grad_norm": 42.25662612915039, + "learning_rate": 3.590169541212684e-07, + "loss": 0.0973, + "num_input_tokens_seen": 2487872, + "step": 3675 + }, + { + "epoch": 0.08990301223951336, + "grad_norm": 43.27426528930664, + "learning_rate": 3.595055455122881e-07, + "loss": 0.1254, + "num_input_tokens_seen": 2491392, + "step": 3680 + }, + { + "epoch": 0.09002516307136052, + "grad_norm": 56.761863708496094, + "learning_rate": 3.5999413690330776e-07, + "loss": 0.1701, + "num_input_tokens_seen": 2494784, + "step": 3685 + }, + { + "epoch": 0.09014731390320768, + "grad_norm": 16.943342208862305, + "learning_rate": 3.604827282943275e-07, + "loss": 0.0829, + "num_input_tokens_seen": 2497984, + "step": 3690 + }, + { + "epoch": 0.09026946473505484, + "grad_norm": 36.46076965332031, + "learning_rate": 3.609713196853471e-07, + "loss": 0.2115, + "num_input_tokens_seen": 2500992, + "step": 3695 + }, + { + "epoch": 0.090391615566902, + "grad_norm": 30.30020523071289, + "learning_rate": 3.614599110763668e-07, + "loss": 0.1073, + "num_input_tokens_seen": 2504384, + "step": 3700 + }, + { + "epoch": 0.09051376639874918, + "grad_norm": 25.540746688842773, + "learning_rate": 3.619485024673865e-07, + "loss": 0.1459, + "num_input_tokens_seen": 2508096, + "step": 3705 + }, + { + "epoch": 0.09063591723059634, + "grad_norm": 83.55743408203125, + "learning_rate": 3.624370938584062e-07, + "loss": 0.2189, + "num_input_tokens_seen": 2511232, + "step": 3710 + }, + { + "epoch": 0.0907580680624435, + "grad_norm": 20.501020431518555, + "learning_rate": 3.629256852494259e-07, + "loss": 0.1667, + "num_input_tokens_seen": 2514240, + "step": 3715 + }, + { + "epoch": 0.09088021889429067, + "grad_norm": 64.41683959960938, + "learning_rate": 3.6341427664044563e-07, + "loss": 0.1413, + "num_input_tokens_seen": 2518144, + "step": 3720 + }, + { + "epoch": 0.09100236972613783, + "grad_norm": 57.561256408691406, + "learning_rate": 3.6390286803146526e-07, + "loss": 0.1179, + "num_input_tokens_seen": 2521344, + "step": 3725 + }, + { + "epoch": 0.091124520557985, + "grad_norm": 44.93265914916992, + "learning_rate": 3.6439145942248494e-07, + "loss": 0.2325, + "num_input_tokens_seen": 2524288, + "step": 3730 + }, + { + "epoch": 0.09124667138983217, + "grad_norm": 54.41051483154297, + "learning_rate": 3.648800508135046e-07, + "loss": 0.1381, + "num_input_tokens_seen": 2528064, + "step": 3735 + }, + { + "epoch": 0.09136882222167933, + "grad_norm": 65.68891906738281, + "learning_rate": 3.6536864220452436e-07, + "loss": 0.1633, + "num_input_tokens_seen": 2531392, + "step": 3740 + }, + { + "epoch": 0.09149097305352649, + "grad_norm": 39.20538330078125, + "learning_rate": 3.6585723359554404e-07, + "loss": 0.0835, + "num_input_tokens_seen": 2534720, + "step": 3745 + }, + { + "epoch": 0.09161312388537365, + "grad_norm": 43.629066467285156, + "learning_rate": 3.663458249865637e-07, + "loss": 0.2312, + "num_input_tokens_seen": 2538304, + "step": 3750 + }, + { + "epoch": 0.09173527471722083, + "grad_norm": 28.365129470825195, + "learning_rate": 3.668344163775834e-07, + "loss": 0.1795, + "num_input_tokens_seen": 2541696, + "step": 3755 + }, + { + "epoch": 0.09185742554906799, + "grad_norm": 4.946837425231934, + "learning_rate": 3.673230077686031e-07, + "loss": 0.1341, + "num_input_tokens_seen": 2545216, + "step": 3760 + }, + { + "epoch": 0.09197957638091515, + "grad_norm": 51.57637405395508, + "learning_rate": 3.6781159915962276e-07, + "loss": 0.126, + "num_input_tokens_seen": 2548480, + "step": 3765 + }, + { + "epoch": 0.09210172721276232, + "grad_norm": 20.10322380065918, + "learning_rate": 3.683001905506425e-07, + "loss": 0.0626, + "num_input_tokens_seen": 2551808, + "step": 3770 + }, + { + "epoch": 0.09222387804460948, + "grad_norm": 51.054447174072266, + "learning_rate": 3.687887819416622e-07, + "loss": 0.1606, + "num_input_tokens_seen": 2555200, + "step": 3775 + }, + { + "epoch": 0.09234602887645665, + "grad_norm": 37.11040115356445, + "learning_rate": 3.6927737333268186e-07, + "loss": 0.1056, + "num_input_tokens_seen": 2558336, + "step": 3780 + }, + { + "epoch": 0.09246817970830382, + "grad_norm": 35.62503433227539, + "learning_rate": 3.6976596472370154e-07, + "loss": 0.148, + "num_input_tokens_seen": 2561856, + "step": 3785 + }, + { + "epoch": 0.09259033054015098, + "grad_norm": 27.626007080078125, + "learning_rate": 3.702545561147212e-07, + "loss": 0.2257, + "num_input_tokens_seen": 2565120, + "step": 3790 + }, + { + "epoch": 0.09271248137199814, + "grad_norm": 81.18321990966797, + "learning_rate": 3.707431475057409e-07, + "loss": 0.1475, + "num_input_tokens_seen": 2568704, + "step": 3795 + }, + { + "epoch": 0.0928346322038453, + "grad_norm": 25.010517120361328, + "learning_rate": 3.7123173889676064e-07, + "loss": 0.1184, + "num_input_tokens_seen": 2572288, + "step": 3800 + }, + { + "epoch": 0.09295678303569248, + "grad_norm": 67.2930679321289, + "learning_rate": 3.717203302877803e-07, + "loss": 0.1522, + "num_input_tokens_seen": 2575872, + "step": 3805 + }, + { + "epoch": 0.09307893386753964, + "grad_norm": 20.680904388427734, + "learning_rate": 3.722089216788e-07, + "loss": 0.1005, + "num_input_tokens_seen": 2578624, + "step": 3810 + }, + { + "epoch": 0.0932010846993868, + "grad_norm": 48.87485885620117, + "learning_rate": 3.726975130698197e-07, + "loss": 0.075, + "num_input_tokens_seen": 2581888, + "step": 3815 + }, + { + "epoch": 0.09332323553123396, + "grad_norm": 29.187978744506836, + "learning_rate": 3.7318610446083936e-07, + "loss": 0.1201, + "num_input_tokens_seen": 2585216, + "step": 3820 + }, + { + "epoch": 0.09344538636308113, + "grad_norm": 25.896148681640625, + "learning_rate": 3.7367469585185904e-07, + "loss": 0.1435, + "num_input_tokens_seen": 2588480, + "step": 3825 + }, + { + "epoch": 0.0935675371949283, + "grad_norm": 32.884918212890625, + "learning_rate": 3.741632872428788e-07, + "loss": 0.1068, + "num_input_tokens_seen": 2591488, + "step": 3830 + }, + { + "epoch": 0.09368968802677546, + "grad_norm": 4.826634407043457, + "learning_rate": 3.7465187863389846e-07, + "loss": 0.0608, + "num_input_tokens_seen": 2594560, + "step": 3835 + }, + { + "epoch": 0.09381183885862263, + "grad_norm": 45.7083625793457, + "learning_rate": 3.7514047002491814e-07, + "loss": 0.166, + "num_input_tokens_seen": 2597952, + "step": 3840 + }, + { + "epoch": 0.09393398969046979, + "grad_norm": 76.23258972167969, + "learning_rate": 3.756290614159378e-07, + "loss": 0.2698, + "num_input_tokens_seen": 2602112, + "step": 3845 + }, + { + "epoch": 0.09405614052231696, + "grad_norm": 49.7977294921875, + "learning_rate": 3.761176528069575e-07, + "loss": 0.1339, + "num_input_tokens_seen": 2605184, + "step": 3850 + }, + { + "epoch": 0.09417829135416413, + "grad_norm": 29.901966094970703, + "learning_rate": 3.766062441979772e-07, + "loss": 0.1611, + "num_input_tokens_seen": 2608384, + "step": 3855 + }, + { + "epoch": 0.09430044218601129, + "grad_norm": 99.41666412353516, + "learning_rate": 3.770948355889969e-07, + "loss": 0.0959, + "num_input_tokens_seen": 2611968, + "step": 3860 + }, + { + "epoch": 0.09442259301785845, + "grad_norm": 55.669315338134766, + "learning_rate": 3.775834269800166e-07, + "loss": 0.3012, + "num_input_tokens_seen": 2615168, + "step": 3865 + }, + { + "epoch": 0.09454474384970561, + "grad_norm": 40.56696701049805, + "learning_rate": 3.780720183710363e-07, + "loss": 0.1067, + "num_input_tokens_seen": 2618688, + "step": 3870 + }, + { + "epoch": 0.09466689468155279, + "grad_norm": 35.744956970214844, + "learning_rate": 3.7856060976205596e-07, + "loss": 0.1673, + "num_input_tokens_seen": 2622080, + "step": 3875 + }, + { + "epoch": 0.09478904551339995, + "grad_norm": 21.277301788330078, + "learning_rate": 3.7904920115307564e-07, + "loss": 0.1458, + "num_input_tokens_seen": 2625216, + "step": 3880 + }, + { + "epoch": 0.09491119634524711, + "grad_norm": 23.259803771972656, + "learning_rate": 3.795377925440953e-07, + "loss": 0.0596, + "num_input_tokens_seen": 2628736, + "step": 3885 + }, + { + "epoch": 0.09503334717709427, + "grad_norm": 48.48746871948242, + "learning_rate": 3.8002638393511506e-07, + "loss": 0.0805, + "num_input_tokens_seen": 2631744, + "step": 3890 + }, + { + "epoch": 0.09515549800894144, + "grad_norm": 98.93216705322266, + "learning_rate": 3.8051497532613474e-07, + "loss": 0.1671, + "num_input_tokens_seen": 2634688, + "step": 3895 + }, + { + "epoch": 0.09527764884078861, + "grad_norm": 37.49694061279297, + "learning_rate": 3.810035667171544e-07, + "loss": 0.1827, + "num_input_tokens_seen": 2637824, + "step": 3900 + }, + { + "epoch": 0.09539979967263577, + "grad_norm": 84.37271881103516, + "learning_rate": 3.814921581081741e-07, + "loss": 0.1409, + "num_input_tokens_seen": 2640768, + "step": 3905 + }, + { + "epoch": 0.09552195050448294, + "grad_norm": 53.07040786743164, + "learning_rate": 3.819807494991938e-07, + "loss": 0.256, + "num_input_tokens_seen": 2643648, + "step": 3910 + }, + { + "epoch": 0.0956441013363301, + "grad_norm": 3.5294644832611084, + "learning_rate": 3.8246934089021347e-07, + "loss": 0.155, + "num_input_tokens_seen": 2647296, + "step": 3915 + }, + { + "epoch": 0.09576625216817726, + "grad_norm": 43.76532745361328, + "learning_rate": 3.829579322812332e-07, + "loss": 0.1863, + "num_input_tokens_seen": 2650368, + "step": 3920 + }, + { + "epoch": 0.09588840300002444, + "grad_norm": 19.401376724243164, + "learning_rate": 3.834465236722529e-07, + "loss": 0.1506, + "num_input_tokens_seen": 2653696, + "step": 3925 + }, + { + "epoch": 0.0960105538318716, + "grad_norm": 53.53800964355469, + "learning_rate": 3.8393511506327256e-07, + "loss": 0.1153, + "num_input_tokens_seen": 2656896, + "step": 3930 + }, + { + "epoch": 0.09613270466371876, + "grad_norm": 19.80342674255371, + "learning_rate": 3.844237064542923e-07, + "loss": 0.0772, + "num_input_tokens_seen": 2660160, + "step": 3935 + }, + { + "epoch": 0.09625485549556592, + "grad_norm": 58.60802459716797, + "learning_rate": 3.8491229784531193e-07, + "loss": 0.1788, + "num_input_tokens_seen": 2663232, + "step": 3940 + }, + { + "epoch": 0.09637700632741308, + "grad_norm": 31.286151885986328, + "learning_rate": 3.854008892363316e-07, + "loss": 0.0492, + "num_input_tokens_seen": 2666432, + "step": 3945 + }, + { + "epoch": 0.09649915715926026, + "grad_norm": 32.254730224609375, + "learning_rate": 3.8588948062735134e-07, + "loss": 0.2631, + "num_input_tokens_seen": 2669504, + "step": 3950 + }, + { + "epoch": 0.09662130799110742, + "grad_norm": 24.599687576293945, + "learning_rate": 3.86378072018371e-07, + "loss": 0.0781, + "num_input_tokens_seen": 2672896, + "step": 3955 + }, + { + "epoch": 0.09674345882295458, + "grad_norm": 19.06269645690918, + "learning_rate": 3.868666634093907e-07, + "loss": 0.0871, + "num_input_tokens_seen": 2676096, + "step": 3960 + }, + { + "epoch": 0.09686560965480175, + "grad_norm": 24.946197509765625, + "learning_rate": 3.8735525480041044e-07, + "loss": 0.0841, + "num_input_tokens_seen": 2679168, + "step": 3965 + }, + { + "epoch": 0.09698776048664891, + "grad_norm": 118.59291076660156, + "learning_rate": 3.8784384619143007e-07, + "loss": 0.18, + "num_input_tokens_seen": 2682240, + "step": 3970 + }, + { + "epoch": 0.09710991131849608, + "grad_norm": 52.05529022216797, + "learning_rate": 3.8833243758244975e-07, + "loss": 0.2067, + "num_input_tokens_seen": 2686016, + "step": 3975 + }, + { + "epoch": 0.09723206215034325, + "grad_norm": 41.86702346801758, + "learning_rate": 3.888210289734695e-07, + "loss": 0.0556, + "num_input_tokens_seen": 2689088, + "step": 3980 + }, + { + "epoch": 0.09735421298219041, + "grad_norm": 66.6626205444336, + "learning_rate": 3.8930962036448916e-07, + "loss": 0.215, + "num_input_tokens_seen": 2692416, + "step": 3985 + }, + { + "epoch": 0.09747636381403757, + "grad_norm": 14.627568244934082, + "learning_rate": 3.8979821175550885e-07, + "loss": 0.1439, + "num_input_tokens_seen": 2695744, + "step": 3990 + }, + { + "epoch": 0.09759851464588473, + "grad_norm": 1.7436994314193726, + "learning_rate": 3.902868031465286e-07, + "loss": 0.1125, + "num_input_tokens_seen": 2698880, + "step": 3995 + }, + { + "epoch": 0.09772066547773191, + "grad_norm": 17.24464988708496, + "learning_rate": 3.907753945375482e-07, + "loss": 0.1243, + "num_input_tokens_seen": 2703040, + "step": 4000 + }, + { + "epoch": 0.09784281630957907, + "grad_norm": 4.9105682373046875, + "learning_rate": 3.912639859285679e-07, + "loss": 0.137, + "num_input_tokens_seen": 2706816, + "step": 4005 + }, + { + "epoch": 0.09796496714142623, + "grad_norm": 44.11586380004883, + "learning_rate": 3.917525773195876e-07, + "loss": 0.1429, + "num_input_tokens_seen": 2710336, + "step": 4010 + }, + { + "epoch": 0.0980871179732734, + "grad_norm": 1.7405118942260742, + "learning_rate": 3.922411687106073e-07, + "loss": 0.0883, + "num_input_tokens_seen": 2713344, + "step": 4015 + }, + { + "epoch": 0.09820926880512057, + "grad_norm": 2.1863999366760254, + "learning_rate": 3.92729760101627e-07, + "loss": 0.1977, + "num_input_tokens_seen": 2716736, + "step": 4020 + }, + { + "epoch": 0.09833141963696773, + "grad_norm": 65.2334976196289, + "learning_rate": 3.932183514926467e-07, + "loss": 0.2043, + "num_input_tokens_seen": 2720640, + "step": 4025 + }, + { + "epoch": 0.0984535704688149, + "grad_norm": 88.74542236328125, + "learning_rate": 3.9370694288366635e-07, + "loss": 0.257, + "num_input_tokens_seen": 2723840, + "step": 4030 + }, + { + "epoch": 0.09857572130066206, + "grad_norm": 9.849947929382324, + "learning_rate": 3.9419553427468603e-07, + "loss": 0.094, + "num_input_tokens_seen": 2726976, + "step": 4035 + }, + { + "epoch": 0.09869787213250922, + "grad_norm": 69.36286163330078, + "learning_rate": 3.9468412566570576e-07, + "loss": 0.2761, + "num_input_tokens_seen": 2730368, + "step": 4040 + }, + { + "epoch": 0.0988200229643564, + "grad_norm": 28.743057250976562, + "learning_rate": 3.9517271705672545e-07, + "loss": 0.1444, + "num_input_tokens_seen": 2733504, + "step": 4045 + }, + { + "epoch": 0.09894217379620356, + "grad_norm": 25.775449752807617, + "learning_rate": 3.9566130844774513e-07, + "loss": 0.1586, + "num_input_tokens_seen": 2736704, + "step": 4050 + }, + { + "epoch": 0.09906432462805072, + "grad_norm": 16.260456085205078, + "learning_rate": 3.9614989983876486e-07, + "loss": 0.0806, + "num_input_tokens_seen": 2739904, + "step": 4055 + }, + { + "epoch": 0.09918647545989788, + "grad_norm": 29.549999237060547, + "learning_rate": 3.966384912297845e-07, + "loss": 0.0966, + "num_input_tokens_seen": 2743424, + "step": 4060 + }, + { + "epoch": 0.09930862629174504, + "grad_norm": 43.79568099975586, + "learning_rate": 3.9712708262080417e-07, + "loss": 0.0883, + "num_input_tokens_seen": 2746560, + "step": 4065 + }, + { + "epoch": 0.09943077712359222, + "grad_norm": 31.825456619262695, + "learning_rate": 3.976156740118239e-07, + "loss": 0.2489, + "num_input_tokens_seen": 2750528, + "step": 4070 + }, + { + "epoch": 0.09955292795543938, + "grad_norm": 29.87146759033203, + "learning_rate": 3.981042654028436e-07, + "loss": 0.1553, + "num_input_tokens_seen": 2754048, + "step": 4075 + }, + { + "epoch": 0.09967507878728654, + "grad_norm": 104.86454010009766, + "learning_rate": 3.9859285679386327e-07, + "loss": 0.2288, + "num_input_tokens_seen": 2757632, + "step": 4080 + }, + { + "epoch": 0.0997972296191337, + "grad_norm": 39.14055252075195, + "learning_rate": 3.99081448184883e-07, + "loss": 0.1119, + "num_input_tokens_seen": 2761408, + "step": 4085 + }, + { + "epoch": 0.09991938045098087, + "grad_norm": 58.73264694213867, + "learning_rate": 3.9957003957590263e-07, + "loss": 0.1223, + "num_input_tokens_seen": 2764608, + "step": 4090 + }, + { + "epoch": 0.10004153128282804, + "grad_norm": 29.07638931274414, + "learning_rate": 4.000586309669223e-07, + "loss": 0.1659, + "num_input_tokens_seen": 2767616, + "step": 4095 + }, + { + "epoch": 0.1001636821146752, + "grad_norm": 4.408840179443359, + "learning_rate": 4.0054722235794205e-07, + "loss": 0.0863, + "num_input_tokens_seen": 2770688, + "step": 4100 + }, + { + "epoch": 0.10028583294652237, + "grad_norm": 36.848358154296875, + "learning_rate": 4.0103581374896173e-07, + "loss": 0.1221, + "num_input_tokens_seen": 2773824, + "step": 4105 + }, + { + "epoch": 0.10040798377836953, + "grad_norm": 5.810712814331055, + "learning_rate": 4.015244051399814e-07, + "loss": 0.0746, + "num_input_tokens_seen": 2777344, + "step": 4110 + }, + { + "epoch": 0.10053013461021669, + "grad_norm": 1.0392895936965942, + "learning_rate": 4.0201299653100114e-07, + "loss": 0.0951, + "num_input_tokens_seen": 2781632, + "step": 4115 + }, + { + "epoch": 0.10065228544206387, + "grad_norm": 50.974159240722656, + "learning_rate": 4.0250158792202077e-07, + "loss": 0.2181, + "num_input_tokens_seen": 2785024, + "step": 4120 + }, + { + "epoch": 0.10077443627391103, + "grad_norm": 22.851579666137695, + "learning_rate": 4.0299017931304045e-07, + "loss": 0.2985, + "num_input_tokens_seen": 2788416, + "step": 4125 + }, + { + "epoch": 0.10089658710575819, + "grad_norm": 43.48515319824219, + "learning_rate": 4.034787707040602e-07, + "loss": 0.1828, + "num_input_tokens_seen": 2791872, + "step": 4130 + }, + { + "epoch": 0.10101873793760535, + "grad_norm": 15.764205932617188, + "learning_rate": 4.0396736209507987e-07, + "loss": 0.0847, + "num_input_tokens_seen": 2795200, + "step": 4135 + }, + { + "epoch": 0.10114088876945251, + "grad_norm": 52.164432525634766, + "learning_rate": 4.0445595348609955e-07, + "loss": 0.1232, + "num_input_tokens_seen": 2798784, + "step": 4140 + }, + { + "epoch": 0.10126303960129969, + "grad_norm": 5.731689929962158, + "learning_rate": 4.049445448771193e-07, + "loss": 0.1623, + "num_input_tokens_seen": 2802432, + "step": 4145 + }, + { + "epoch": 0.10138519043314685, + "grad_norm": 60.42582702636719, + "learning_rate": 4.0543313626813897e-07, + "loss": 0.1074, + "num_input_tokens_seen": 2806144, + "step": 4150 + }, + { + "epoch": 0.10150734126499401, + "grad_norm": 58.094215393066406, + "learning_rate": 4.059217276591586e-07, + "loss": 0.116, + "num_input_tokens_seen": 2809472, + "step": 4155 + }, + { + "epoch": 0.10162949209684118, + "grad_norm": 41.62969970703125, + "learning_rate": 4.0641031905017833e-07, + "loss": 0.1349, + "num_input_tokens_seen": 2813632, + "step": 4160 + }, + { + "epoch": 0.10175164292868834, + "grad_norm": 21.467771530151367, + "learning_rate": 4.06898910441198e-07, + "loss": 0.1785, + "num_input_tokens_seen": 2816576, + "step": 4165 + }, + { + "epoch": 0.10187379376053551, + "grad_norm": 38.030250549316406, + "learning_rate": 4.073875018322177e-07, + "loss": 0.1915, + "num_input_tokens_seen": 2819904, + "step": 4170 + }, + { + "epoch": 0.10199594459238268, + "grad_norm": 46.13201904296875, + "learning_rate": 4.078760932232374e-07, + "loss": 0.09, + "num_input_tokens_seen": 2823360, + "step": 4175 + }, + { + "epoch": 0.10211809542422984, + "grad_norm": 36.75332260131836, + "learning_rate": 4.083646846142571e-07, + "loss": 0.1443, + "num_input_tokens_seen": 2826752, + "step": 4180 + }, + { + "epoch": 0.102240246256077, + "grad_norm": 54.063507080078125, + "learning_rate": 4.0885327600527673e-07, + "loss": 0.1226, + "num_input_tokens_seen": 2830016, + "step": 4185 + }, + { + "epoch": 0.10236239708792418, + "grad_norm": 31.851612091064453, + "learning_rate": 4.0934186739629647e-07, + "loss": 0.1663, + "num_input_tokens_seen": 2833408, + "step": 4190 + }, + { + "epoch": 0.10248454791977134, + "grad_norm": 50.87443923950195, + "learning_rate": 4.0983045878731615e-07, + "loss": 0.1124, + "num_input_tokens_seen": 2836480, + "step": 4195 + }, + { + "epoch": 0.1026066987516185, + "grad_norm": 26.74880599975586, + "learning_rate": 4.1031905017833583e-07, + "loss": 0.1346, + "num_input_tokens_seen": 2839744, + "step": 4200 + }, + { + "epoch": 0.10272884958346566, + "grad_norm": 35.637325286865234, + "learning_rate": 4.1080764156935557e-07, + "loss": 0.1257, + "num_input_tokens_seen": 2842880, + "step": 4205 + }, + { + "epoch": 0.10285100041531282, + "grad_norm": 8.508408546447754, + "learning_rate": 4.1129623296037525e-07, + "loss": 0.0673, + "num_input_tokens_seen": 2846272, + "step": 4210 + }, + { + "epoch": 0.10297315124716, + "grad_norm": 10.577549934387207, + "learning_rate": 4.117848243513949e-07, + "loss": 0.0861, + "num_input_tokens_seen": 2849536, + "step": 4215 + }, + { + "epoch": 0.10309530207900716, + "grad_norm": 5.944873809814453, + "learning_rate": 4.122734157424146e-07, + "loss": 0.1275, + "num_input_tokens_seen": 2852800, + "step": 4220 + }, + { + "epoch": 0.10321745291085432, + "grad_norm": 25.301698684692383, + "learning_rate": 4.127620071334343e-07, + "loss": 0.1408, + "num_input_tokens_seen": 2856000, + "step": 4225 + }, + { + "epoch": 0.10333960374270149, + "grad_norm": 56.8901481628418, + "learning_rate": 4.1325059852445397e-07, + "loss": 0.1893, + "num_input_tokens_seen": 2858944, + "step": 4230 + }, + { + "epoch": 0.10346175457454865, + "grad_norm": 38.725032806396484, + "learning_rate": 4.137391899154737e-07, + "loss": 0.1007, + "num_input_tokens_seen": 2862208, + "step": 4235 + }, + { + "epoch": 0.10358390540639582, + "grad_norm": 27.819990158081055, + "learning_rate": 4.142277813064934e-07, + "loss": 0.0926, + "num_input_tokens_seen": 2865408, + "step": 4240 + }, + { + "epoch": 0.10370605623824299, + "grad_norm": 18.120283126831055, + "learning_rate": 4.14716372697513e-07, + "loss": 0.1382, + "num_input_tokens_seen": 2869120, + "step": 4245 + }, + { + "epoch": 0.10382820707009015, + "grad_norm": 13.521397590637207, + "learning_rate": 4.1520496408853275e-07, + "loss": 0.0831, + "num_input_tokens_seen": 2872448, + "step": 4250 + }, + { + "epoch": 0.10395035790193731, + "grad_norm": 33.140411376953125, + "learning_rate": 4.1569355547955243e-07, + "loss": 0.2276, + "num_input_tokens_seen": 2875776, + "step": 4255 + }, + { + "epoch": 0.10407250873378447, + "grad_norm": 39.06216049194336, + "learning_rate": 4.161821468705721e-07, + "loss": 0.1392, + "num_input_tokens_seen": 2878720, + "step": 4260 + }, + { + "epoch": 0.10419465956563165, + "grad_norm": 39.4327507019043, + "learning_rate": 4.1667073826159185e-07, + "loss": 0.154, + "num_input_tokens_seen": 2882432, + "step": 4265 + }, + { + "epoch": 0.10431681039747881, + "grad_norm": 18.142154693603516, + "learning_rate": 4.1715932965261153e-07, + "loss": 0.1137, + "num_input_tokens_seen": 2885824, + "step": 4270 + }, + { + "epoch": 0.10443896122932597, + "grad_norm": 33.220359802246094, + "learning_rate": 4.1764792104363116e-07, + "loss": 0.15, + "num_input_tokens_seen": 2889408, + "step": 4275 + }, + { + "epoch": 0.10456111206117313, + "grad_norm": 48.25017166137695, + "learning_rate": 4.181365124346509e-07, + "loss": 0.221, + "num_input_tokens_seen": 2892416, + "step": 4280 + }, + { + "epoch": 0.1046832628930203, + "grad_norm": 38.912113189697266, + "learning_rate": 4.1862510382567057e-07, + "loss": 0.1015, + "num_input_tokens_seen": 2895552, + "step": 4285 + }, + { + "epoch": 0.10480541372486747, + "grad_norm": 24.962831497192383, + "learning_rate": 4.1911369521669025e-07, + "loss": 0.1397, + "num_input_tokens_seen": 2899264, + "step": 4290 + }, + { + "epoch": 0.10492756455671463, + "grad_norm": 14.322883605957031, + "learning_rate": 4.1960228660771e-07, + "loss": 0.0841, + "num_input_tokens_seen": 2902464, + "step": 4295 + }, + { + "epoch": 0.1050497153885618, + "grad_norm": 26.260435104370117, + "learning_rate": 4.2009087799872967e-07, + "loss": 0.1638, + "num_input_tokens_seen": 2906304, + "step": 4300 + }, + { + "epoch": 0.10517186622040896, + "grad_norm": 11.948546409606934, + "learning_rate": 4.205794693897493e-07, + "loss": 0.1394, + "num_input_tokens_seen": 2909632, + "step": 4305 + }, + { + "epoch": 0.10529401705225612, + "grad_norm": 34.74366760253906, + "learning_rate": 4.2106806078076903e-07, + "loss": 0.0915, + "num_input_tokens_seen": 2913280, + "step": 4310 + }, + { + "epoch": 0.1054161678841033, + "grad_norm": 24.318824768066406, + "learning_rate": 4.215566521717887e-07, + "loss": 0.1507, + "num_input_tokens_seen": 2916672, + "step": 4315 + }, + { + "epoch": 0.10553831871595046, + "grad_norm": 20.615177154541016, + "learning_rate": 4.220452435628084e-07, + "loss": 0.1375, + "num_input_tokens_seen": 2921472, + "step": 4320 + }, + { + "epoch": 0.10566046954779762, + "grad_norm": 55.968509674072266, + "learning_rate": 4.2253383495382813e-07, + "loss": 0.1117, + "num_input_tokens_seen": 2924736, + "step": 4325 + }, + { + "epoch": 0.10578262037964478, + "grad_norm": 63.45806884765625, + "learning_rate": 4.230224263448478e-07, + "loss": 0.1468, + "num_input_tokens_seen": 2927936, + "step": 4330 + }, + { + "epoch": 0.10590477121149194, + "grad_norm": 32.341922760009766, + "learning_rate": 4.2351101773586744e-07, + "loss": 0.1953, + "num_input_tokens_seen": 2931456, + "step": 4335 + }, + { + "epoch": 0.10602692204333912, + "grad_norm": 38.54914093017578, + "learning_rate": 4.2399960912688717e-07, + "loss": 0.1076, + "num_input_tokens_seen": 2934656, + "step": 4340 + }, + { + "epoch": 0.10614907287518628, + "grad_norm": 31.62356948852539, + "learning_rate": 4.2448820051790685e-07, + "loss": 0.145, + "num_input_tokens_seen": 2937728, + "step": 4345 + }, + { + "epoch": 0.10627122370703344, + "grad_norm": 69.52967834472656, + "learning_rate": 4.2497679190892654e-07, + "loss": 0.1833, + "num_input_tokens_seen": 2940992, + "step": 4350 + }, + { + "epoch": 0.1063933745388806, + "grad_norm": 35.780120849609375, + "learning_rate": 4.2546538329994627e-07, + "loss": 0.0839, + "num_input_tokens_seen": 2944640, + "step": 4355 + }, + { + "epoch": 0.10651552537072777, + "grad_norm": 19.068248748779297, + "learning_rate": 4.2595397469096595e-07, + "loss": 0.0561, + "num_input_tokens_seen": 2948224, + "step": 4360 + }, + { + "epoch": 0.10663767620257494, + "grad_norm": 53.031036376953125, + "learning_rate": 4.264425660819856e-07, + "loss": 0.1876, + "num_input_tokens_seen": 2952320, + "step": 4365 + }, + { + "epoch": 0.1067598270344221, + "grad_norm": 15.228533744812012, + "learning_rate": 4.269311574730053e-07, + "loss": 0.0542, + "num_input_tokens_seen": 2955456, + "step": 4370 + }, + { + "epoch": 0.10688197786626927, + "grad_norm": 28.26727294921875, + "learning_rate": 4.27419748864025e-07, + "loss": 0.1754, + "num_input_tokens_seen": 2958848, + "step": 4375 + }, + { + "epoch": 0.10700412869811643, + "grad_norm": 6.667966842651367, + "learning_rate": 4.279083402550447e-07, + "loss": 0.0646, + "num_input_tokens_seen": 2962816, + "step": 4380 + }, + { + "epoch": 0.1071262795299636, + "grad_norm": 25.915861129760742, + "learning_rate": 4.283969316460644e-07, + "loss": 0.133, + "num_input_tokens_seen": 2965888, + "step": 4385 + }, + { + "epoch": 0.10724843036181077, + "grad_norm": 37.18895721435547, + "learning_rate": 4.288855230370841e-07, + "loss": 0.1174, + "num_input_tokens_seen": 2969600, + "step": 4390 + }, + { + "epoch": 0.10737058119365793, + "grad_norm": 47.06808090209961, + "learning_rate": 4.2937411442810377e-07, + "loss": 0.051, + "num_input_tokens_seen": 2972864, + "step": 4395 + }, + { + "epoch": 0.10749273202550509, + "grad_norm": 5.6044921875, + "learning_rate": 4.2986270581912345e-07, + "loss": 0.198, + "num_input_tokens_seen": 2976192, + "step": 4400 + }, + { + "epoch": 0.10761488285735225, + "grad_norm": 43.25118637084961, + "learning_rate": 4.3035129721014314e-07, + "loss": 0.2707, + "num_input_tokens_seen": 2979840, + "step": 4405 + }, + { + "epoch": 0.10773703368919943, + "grad_norm": 47.26470947265625, + "learning_rate": 4.308398886011628e-07, + "loss": 0.149, + "num_input_tokens_seen": 2983168, + "step": 4410 + }, + { + "epoch": 0.10785918452104659, + "grad_norm": 19.98095703125, + "learning_rate": 4.3132847999218255e-07, + "loss": 0.0545, + "num_input_tokens_seen": 2987072, + "step": 4415 + }, + { + "epoch": 0.10798133535289375, + "grad_norm": 32.365257263183594, + "learning_rate": 4.3181707138320223e-07, + "loss": 0.129, + "num_input_tokens_seen": 2990144, + "step": 4420 + }, + { + "epoch": 0.10810348618474092, + "grad_norm": 47.49005889892578, + "learning_rate": 4.323056627742219e-07, + "loss": 0.2385, + "num_input_tokens_seen": 2994176, + "step": 4425 + }, + { + "epoch": 0.10822563701658808, + "grad_norm": 47.387611389160156, + "learning_rate": 4.3279425416524154e-07, + "loss": 0.125, + "num_input_tokens_seen": 2997248, + "step": 4430 + }, + { + "epoch": 0.10834778784843525, + "grad_norm": 42.15059280395508, + "learning_rate": 4.332828455562613e-07, + "loss": 0.2284, + "num_input_tokens_seen": 3000832, + "step": 4435 + }, + { + "epoch": 0.10846993868028242, + "grad_norm": 39.20221710205078, + "learning_rate": 4.3377143694728096e-07, + "loss": 0.2434, + "num_input_tokens_seen": 3004096, + "step": 4440 + }, + { + "epoch": 0.10859208951212958, + "grad_norm": 21.04318618774414, + "learning_rate": 4.342600283383007e-07, + "loss": 0.2088, + "num_input_tokens_seen": 3007296, + "step": 4445 + }, + { + "epoch": 0.10871424034397674, + "grad_norm": 26.36186408996582, + "learning_rate": 4.3474861972932037e-07, + "loss": 0.1412, + "num_input_tokens_seen": 3010304, + "step": 4450 + }, + { + "epoch": 0.1088363911758239, + "grad_norm": 52.20693588256836, + "learning_rate": 4.3523721112034005e-07, + "loss": 0.2006, + "num_input_tokens_seen": 3013696, + "step": 4455 + }, + { + "epoch": 0.10895854200767108, + "grad_norm": 17.572002410888672, + "learning_rate": 4.357258025113597e-07, + "loss": 0.1489, + "num_input_tokens_seen": 3016704, + "step": 4460 + }, + { + "epoch": 0.10908069283951824, + "grad_norm": 40.26177215576172, + "learning_rate": 4.362143939023794e-07, + "loss": 0.1612, + "num_input_tokens_seen": 3020032, + "step": 4465 + }, + { + "epoch": 0.1092028436713654, + "grad_norm": 51.259647369384766, + "learning_rate": 4.367029852933991e-07, + "loss": 0.1526, + "num_input_tokens_seen": 3023232, + "step": 4470 + }, + { + "epoch": 0.10932499450321256, + "grad_norm": 32.641841888427734, + "learning_rate": 4.371915766844188e-07, + "loss": 0.1318, + "num_input_tokens_seen": 3026432, + "step": 4475 + }, + { + "epoch": 0.10944714533505973, + "grad_norm": 6.420051574707031, + "learning_rate": 4.376801680754385e-07, + "loss": 0.0994, + "num_input_tokens_seen": 3029568, + "step": 4480 + }, + { + "epoch": 0.1095692961669069, + "grad_norm": 24.219253540039062, + "learning_rate": 4.381687594664582e-07, + "loss": 0.166, + "num_input_tokens_seen": 3032576, + "step": 4485 + }, + { + "epoch": 0.10969144699875406, + "grad_norm": 13.019012451171875, + "learning_rate": 4.386573508574778e-07, + "loss": 0.0961, + "num_input_tokens_seen": 3035904, + "step": 4490 + }, + { + "epoch": 0.10981359783060123, + "grad_norm": 88.06275939941406, + "learning_rate": 4.3914594224849756e-07, + "loss": 0.1549, + "num_input_tokens_seen": 3040064, + "step": 4495 + }, + { + "epoch": 0.10993574866244839, + "grad_norm": 52.31646728515625, + "learning_rate": 4.3963453363951724e-07, + "loss": 0.1122, + "num_input_tokens_seen": 3043648, + "step": 4500 + }, + { + "epoch": 0.11005789949429555, + "grad_norm": 15.511794090270996, + "learning_rate": 4.401231250305369e-07, + "loss": 0.1506, + "num_input_tokens_seen": 3046656, + "step": 4505 + }, + { + "epoch": 0.11018005032614273, + "grad_norm": 9.491758346557617, + "learning_rate": 4.4061171642155665e-07, + "loss": 0.1529, + "num_input_tokens_seen": 3050304, + "step": 4510 + }, + { + "epoch": 0.11030220115798989, + "grad_norm": 28.71082878112793, + "learning_rate": 4.4110030781257634e-07, + "loss": 0.1095, + "num_input_tokens_seen": 3053952, + "step": 4515 + }, + { + "epoch": 0.11042435198983705, + "grad_norm": 61.650177001953125, + "learning_rate": 4.4158889920359596e-07, + "loss": 0.2287, + "num_input_tokens_seen": 3057152, + "step": 4520 + }, + { + "epoch": 0.11054650282168421, + "grad_norm": 18.630380630493164, + "learning_rate": 4.420774905946157e-07, + "loss": 0.1766, + "num_input_tokens_seen": 3060416, + "step": 4525 + }, + { + "epoch": 0.11066865365353137, + "grad_norm": 23.90188980102539, + "learning_rate": 4.425660819856354e-07, + "loss": 0.145, + "num_input_tokens_seen": 3063616, + "step": 4530 + }, + { + "epoch": 0.11079080448537855, + "grad_norm": 56.79856491088867, + "learning_rate": 4.4305467337665506e-07, + "loss": 0.107, + "num_input_tokens_seen": 3067072, + "step": 4535 + }, + { + "epoch": 0.11091295531722571, + "grad_norm": 50.20392608642578, + "learning_rate": 4.435432647676748e-07, + "loss": 0.1432, + "num_input_tokens_seen": 3070592, + "step": 4540 + }, + { + "epoch": 0.11103510614907287, + "grad_norm": 5.514303684234619, + "learning_rate": 4.440318561586945e-07, + "loss": 0.1287, + "num_input_tokens_seen": 3074176, + "step": 4545 + }, + { + "epoch": 0.11115725698092004, + "grad_norm": 47.270328521728516, + "learning_rate": 4.445204475497141e-07, + "loss": 0.1766, + "num_input_tokens_seen": 3077632, + "step": 4550 + }, + { + "epoch": 0.11127940781276721, + "grad_norm": 9.4541015625, + "learning_rate": 4.4500903894073384e-07, + "loss": 0.1604, + "num_input_tokens_seen": 3081088, + "step": 4555 + }, + { + "epoch": 0.11140155864461437, + "grad_norm": 11.66859245300293, + "learning_rate": 4.454976303317535e-07, + "loss": 0.0403, + "num_input_tokens_seen": 3084224, + "step": 4560 + }, + { + "epoch": 0.11152370947646154, + "grad_norm": 55.1706428527832, + "learning_rate": 4.459862217227732e-07, + "loss": 0.1672, + "num_input_tokens_seen": 3087424, + "step": 4565 + }, + { + "epoch": 0.1116458603083087, + "grad_norm": 26.333547592163086, + "learning_rate": 4.4647481311379294e-07, + "loss": 0.2273, + "num_input_tokens_seen": 3091008, + "step": 4570 + }, + { + "epoch": 0.11176801114015586, + "grad_norm": 53.830989837646484, + "learning_rate": 4.469634045048126e-07, + "loss": 0.1293, + "num_input_tokens_seen": 3094016, + "step": 4575 + }, + { + "epoch": 0.11189016197200304, + "grad_norm": 48.61365509033203, + "learning_rate": 4.4745199589583225e-07, + "loss": 0.13, + "num_input_tokens_seen": 3097536, + "step": 4580 + }, + { + "epoch": 0.1120123128038502, + "grad_norm": 13.673266410827637, + "learning_rate": 4.47940587286852e-07, + "loss": 0.0926, + "num_input_tokens_seen": 3100800, + "step": 4585 + }, + { + "epoch": 0.11213446363569736, + "grad_norm": 11.077170372009277, + "learning_rate": 4.4842917867787166e-07, + "loss": 0.1141, + "num_input_tokens_seen": 3104128, + "step": 4590 + }, + { + "epoch": 0.11225661446754452, + "grad_norm": 23.086936950683594, + "learning_rate": 4.4891777006889134e-07, + "loss": 0.1559, + "num_input_tokens_seen": 3107456, + "step": 4595 + }, + { + "epoch": 0.11237876529939168, + "grad_norm": 12.579133033752441, + "learning_rate": 4.494063614599111e-07, + "loss": 0.1458, + "num_input_tokens_seen": 3111296, + "step": 4600 + }, + { + "epoch": 0.11250091613123886, + "grad_norm": 31.168546676635742, + "learning_rate": 4.4989495285093076e-07, + "loss": 0.1157, + "num_input_tokens_seen": 3114752, + "step": 4605 + }, + { + "epoch": 0.11262306696308602, + "grad_norm": 34.17367172241211, + "learning_rate": 4.5038354424195044e-07, + "loss": 0.1587, + "num_input_tokens_seen": 3117824, + "step": 4610 + }, + { + "epoch": 0.11274521779493318, + "grad_norm": 6.410637378692627, + "learning_rate": 4.508721356329701e-07, + "loss": 0.0831, + "num_input_tokens_seen": 3121792, + "step": 4615 + }, + { + "epoch": 0.11286736862678035, + "grad_norm": 40.198307037353516, + "learning_rate": 4.513607270239898e-07, + "loss": 0.0569, + "num_input_tokens_seen": 3125248, + "step": 4620 + }, + { + "epoch": 0.11298951945862751, + "grad_norm": 4.963204383850098, + "learning_rate": 4.518493184150095e-07, + "loss": 0.1616, + "num_input_tokens_seen": 3128640, + "step": 4625 + }, + { + "epoch": 0.11311167029047468, + "grad_norm": 42.212196350097656, + "learning_rate": 4.523379098060292e-07, + "loss": 0.1618, + "num_input_tokens_seen": 3131776, + "step": 4630 + }, + { + "epoch": 0.11323382112232185, + "grad_norm": 31.58185386657715, + "learning_rate": 4.528265011970489e-07, + "loss": 0.1112, + "num_input_tokens_seen": 3135168, + "step": 4635 + }, + { + "epoch": 0.11335597195416901, + "grad_norm": 57.309425354003906, + "learning_rate": 4.533150925880686e-07, + "loss": 0.247, + "num_input_tokens_seen": 3138688, + "step": 4640 + }, + { + "epoch": 0.11347812278601617, + "grad_norm": 41.127506256103516, + "learning_rate": 4.5380368397908826e-07, + "loss": 0.1139, + "num_input_tokens_seen": 3141952, + "step": 4645 + }, + { + "epoch": 0.11360027361786333, + "grad_norm": 61.593666076660156, + "learning_rate": 4.5429227537010794e-07, + "loss": 0.1915, + "num_input_tokens_seen": 3145280, + "step": 4650 + }, + { + "epoch": 0.11372242444971051, + "grad_norm": 26.210472106933594, + "learning_rate": 4.547808667611276e-07, + "loss": 0.1277, + "num_input_tokens_seen": 3148736, + "step": 4655 + }, + { + "epoch": 0.11384457528155767, + "grad_norm": 40.950233459472656, + "learning_rate": 4.5526945815214736e-07, + "loss": 0.121, + "num_input_tokens_seen": 3152320, + "step": 4660 + }, + { + "epoch": 0.11396672611340483, + "grad_norm": 29.210453033447266, + "learning_rate": 4.5575804954316704e-07, + "loss": 0.11, + "num_input_tokens_seen": 3155904, + "step": 4665 + }, + { + "epoch": 0.114088876945252, + "grad_norm": 19.651756286621094, + "learning_rate": 4.562466409341867e-07, + "loss": 0.1673, + "num_input_tokens_seen": 3159488, + "step": 4670 + }, + { + "epoch": 0.11421102777709916, + "grad_norm": 30.511878967285156, + "learning_rate": 4.567352323252064e-07, + "loss": 0.2023, + "num_input_tokens_seen": 3162752, + "step": 4675 + }, + { + "epoch": 0.11433317860894633, + "grad_norm": 27.229795455932617, + "learning_rate": 4.572238237162261e-07, + "loss": 0.134, + "num_input_tokens_seen": 3165888, + "step": 4680 + }, + { + "epoch": 0.1144553294407935, + "grad_norm": 20.034862518310547, + "learning_rate": 4.5771241510724577e-07, + "loss": 0.1225, + "num_input_tokens_seen": 3169472, + "step": 4685 + }, + { + "epoch": 0.11457748027264066, + "grad_norm": 21.58957862854004, + "learning_rate": 4.582010064982655e-07, + "loss": 0.1116, + "num_input_tokens_seen": 3172608, + "step": 4690 + }, + { + "epoch": 0.11469963110448782, + "grad_norm": 30.950048446655273, + "learning_rate": 4.586895978892852e-07, + "loss": 0.0563, + "num_input_tokens_seen": 3175680, + "step": 4695 + }, + { + "epoch": 0.11482178193633498, + "grad_norm": 37.59648895263672, + "learning_rate": 4.5917818928030486e-07, + "loss": 0.1314, + "num_input_tokens_seen": 3178944, + "step": 4700 + }, + { + "epoch": 0.11494393276818216, + "grad_norm": 54.260047912597656, + "learning_rate": 4.5966678067132454e-07, + "loss": 0.1484, + "num_input_tokens_seen": 3182272, + "step": 4705 + }, + { + "epoch": 0.11506608360002932, + "grad_norm": 35.50208282470703, + "learning_rate": 4.601553720623442e-07, + "loss": 0.2703, + "num_input_tokens_seen": 3185536, + "step": 4710 + }, + { + "epoch": 0.11518823443187648, + "grad_norm": 10.326123237609863, + "learning_rate": 4.606439634533639e-07, + "loss": 0.0611, + "num_input_tokens_seen": 3188928, + "step": 4715 + }, + { + "epoch": 0.11531038526372364, + "grad_norm": 22.59487533569336, + "learning_rate": 4.6113255484438364e-07, + "loss": 0.1684, + "num_input_tokens_seen": 3192832, + "step": 4720 + }, + { + "epoch": 0.11543253609557082, + "grad_norm": 33.71693801879883, + "learning_rate": 4.616211462354033e-07, + "loss": 0.1242, + "num_input_tokens_seen": 3196416, + "step": 4725 + }, + { + "epoch": 0.11555468692741798, + "grad_norm": 16.002641677856445, + "learning_rate": 4.62109737626423e-07, + "loss": 0.0917, + "num_input_tokens_seen": 3200000, + "step": 4730 + }, + { + "epoch": 0.11567683775926514, + "grad_norm": 7.192635536193848, + "learning_rate": 4.625983290174427e-07, + "loss": 0.0778, + "num_input_tokens_seen": 3203456, + "step": 4735 + }, + { + "epoch": 0.1157989885911123, + "grad_norm": 10.36010456085205, + "learning_rate": 4.6308692040846237e-07, + "loss": 0.1563, + "num_input_tokens_seen": 3206720, + "step": 4740 + }, + { + "epoch": 0.11592113942295947, + "grad_norm": 26.770761489868164, + "learning_rate": 4.6357551179948205e-07, + "loss": 0.1827, + "num_input_tokens_seen": 3210176, + "step": 4745 + }, + { + "epoch": 0.11604329025480664, + "grad_norm": 45.02693176269531, + "learning_rate": 4.640641031905018e-07, + "loss": 0.1546, + "num_input_tokens_seen": 3213056, + "step": 4750 + }, + { + "epoch": 0.1161654410866538, + "grad_norm": 23.50362205505371, + "learning_rate": 4.6455269458152146e-07, + "loss": 0.206, + "num_input_tokens_seen": 3216064, + "step": 4755 + }, + { + "epoch": 0.11628759191850097, + "grad_norm": 5.375946521759033, + "learning_rate": 4.6504128597254114e-07, + "loss": 0.075, + "num_input_tokens_seen": 3219264, + "step": 4760 + }, + { + "epoch": 0.11640974275034813, + "grad_norm": 6.029793739318848, + "learning_rate": 4.655298773635608e-07, + "loss": 0.0856, + "num_input_tokens_seen": 3222784, + "step": 4765 + }, + { + "epoch": 0.11653189358219529, + "grad_norm": 43.54521179199219, + "learning_rate": 4.660184687545805e-07, + "loss": 0.2882, + "num_input_tokens_seen": 3225856, + "step": 4770 + }, + { + "epoch": 0.11665404441404247, + "grad_norm": 49.9635124206543, + "learning_rate": 4.665070601456002e-07, + "loss": 0.1822, + "num_input_tokens_seen": 3229120, + "step": 4775 + }, + { + "epoch": 0.11677619524588963, + "grad_norm": 31.54857063293457, + "learning_rate": 4.669956515366199e-07, + "loss": 0.0688, + "num_input_tokens_seen": 3233024, + "step": 4780 + }, + { + "epoch": 0.11689834607773679, + "grad_norm": 39.51899337768555, + "learning_rate": 4.674842429276396e-07, + "loss": 0.0929, + "num_input_tokens_seen": 3236608, + "step": 4785 + }, + { + "epoch": 0.11702049690958395, + "grad_norm": 32.86260223388672, + "learning_rate": 4.679728343186593e-07, + "loss": 0.0559, + "num_input_tokens_seen": 3239808, + "step": 4790 + }, + { + "epoch": 0.11714264774143111, + "grad_norm": 18.63022804260254, + "learning_rate": 4.6846142570967897e-07, + "loss": 0.1595, + "num_input_tokens_seen": 3243072, + "step": 4795 + }, + { + "epoch": 0.11726479857327829, + "grad_norm": 34.65690612792969, + "learning_rate": 4.6895001710069865e-07, + "loss": 0.1409, + "num_input_tokens_seen": 3246208, + "step": 4800 + }, + { + "epoch": 0.11738694940512545, + "grad_norm": 50.13557052612305, + "learning_rate": 4.6943860849171833e-07, + "loss": 0.1298, + "num_input_tokens_seen": 3250496, + "step": 4805 + }, + { + "epoch": 0.11750910023697261, + "grad_norm": 5.652262210845947, + "learning_rate": 4.6992719988273806e-07, + "loss": 0.1475, + "num_input_tokens_seen": 3254208, + "step": 4810 + }, + { + "epoch": 0.11763125106881978, + "grad_norm": 26.71537208557129, + "learning_rate": 4.7041579127375774e-07, + "loss": 0.0992, + "num_input_tokens_seen": 3257600, + "step": 4815 + }, + { + "epoch": 0.11775340190066694, + "grad_norm": 30.07550048828125, + "learning_rate": 4.709043826647774e-07, + "loss": 0.2301, + "num_input_tokens_seen": 3260928, + "step": 4820 + }, + { + "epoch": 0.11787555273251411, + "grad_norm": 20.74995994567871, + "learning_rate": 4.713929740557971e-07, + "loss": 0.0816, + "num_input_tokens_seen": 3264192, + "step": 4825 + }, + { + "epoch": 0.11799770356436128, + "grad_norm": 47.43042755126953, + "learning_rate": 4.718815654468168e-07, + "loss": 0.0875, + "num_input_tokens_seen": 3267392, + "step": 4830 + }, + { + "epoch": 0.11811985439620844, + "grad_norm": 1.182123064994812, + "learning_rate": 4.7237015683783647e-07, + "loss": 0.1291, + "num_input_tokens_seen": 3270272, + "step": 4835 + }, + { + "epoch": 0.1182420052280556, + "grad_norm": 59.24190902709961, + "learning_rate": 4.728587482288562e-07, + "loss": 0.1015, + "num_input_tokens_seen": 3273408, + "step": 4840 + }, + { + "epoch": 0.11836415605990276, + "grad_norm": 52.56169509887695, + "learning_rate": 4.733473396198759e-07, + "loss": 0.1972, + "num_input_tokens_seen": 3276800, + "step": 4845 + }, + { + "epoch": 0.11848630689174994, + "grad_norm": 27.176464080810547, + "learning_rate": 4.7383593101089557e-07, + "loss": 0.0876, + "num_input_tokens_seen": 3279936, + "step": 4850 + }, + { + "epoch": 0.1186084577235971, + "grad_norm": 32.131317138671875, + "learning_rate": 4.743245224019153e-07, + "loss": 0.1071, + "num_input_tokens_seen": 3283456, + "step": 4855 + }, + { + "epoch": 0.11873060855544426, + "grad_norm": 43.500160217285156, + "learning_rate": 4.7481311379293493e-07, + "loss": 0.16, + "num_input_tokens_seen": 3286848, + "step": 4860 + }, + { + "epoch": 0.11885275938729142, + "grad_norm": 58.2629280090332, + "learning_rate": 4.753017051839546e-07, + "loss": 0.1569, + "num_input_tokens_seen": 3290432, + "step": 4865 + }, + { + "epoch": 0.11897491021913859, + "grad_norm": 0.8482736945152283, + "learning_rate": 4.7579029657497434e-07, + "loss": 0.1321, + "num_input_tokens_seen": 3293760, + "step": 4870 + }, + { + "epoch": 0.11909706105098576, + "grad_norm": 28.063852310180664, + "learning_rate": 4.76278887965994e-07, + "loss": 0.1658, + "num_input_tokens_seen": 3296768, + "step": 4875 + }, + { + "epoch": 0.11921921188283292, + "grad_norm": 42.519065856933594, + "learning_rate": 4.767674793570137e-07, + "loss": 0.2226, + "num_input_tokens_seen": 3299968, + "step": 4880 + }, + { + "epoch": 0.11934136271468009, + "grad_norm": 17.598634719848633, + "learning_rate": 4.772560707480334e-07, + "loss": 0.1144, + "num_input_tokens_seen": 3303488, + "step": 4885 + }, + { + "epoch": 0.11946351354652725, + "grad_norm": 35.20301818847656, + "learning_rate": 4.777446621390531e-07, + "loss": 0.1006, + "num_input_tokens_seen": 3306880, + "step": 4890 + }, + { + "epoch": 0.11958566437837442, + "grad_norm": 4.097479343414307, + "learning_rate": 4.782332535300728e-07, + "loss": 0.1341, + "num_input_tokens_seen": 3310336, + "step": 4895 + }, + { + "epoch": 0.11970781521022159, + "grad_norm": 19.13021469116211, + "learning_rate": 4.787218449210924e-07, + "loss": 0.1649, + "num_input_tokens_seen": 3313856, + "step": 4900 + }, + { + "epoch": 0.11982996604206875, + "grad_norm": 16.483863830566406, + "learning_rate": 4.792104363121121e-07, + "loss": 0.1185, + "num_input_tokens_seen": 3317120, + "step": 4905 + }, + { + "epoch": 0.11995211687391591, + "grad_norm": 6.986123561859131, + "learning_rate": 4.796990277031319e-07, + "loss": 0.1056, + "num_input_tokens_seen": 3320768, + "step": 4910 + }, + { + "epoch": 0.12007426770576307, + "grad_norm": 28.12428092956543, + "learning_rate": 4.801876190941516e-07, + "loss": 0.0633, + "num_input_tokens_seen": 3324288, + "step": 4915 + }, + { + "epoch": 0.12019641853761025, + "grad_norm": 14.742039680480957, + "learning_rate": 4.806762104851712e-07, + "loss": 0.1208, + "num_input_tokens_seen": 3327680, + "step": 4920 + }, + { + "epoch": 0.12031856936945741, + "grad_norm": 8.952460289001465, + "learning_rate": 4.811648018761909e-07, + "loss": 0.1176, + "num_input_tokens_seen": 3331008, + "step": 4925 + }, + { + "epoch": 0.12044072020130457, + "grad_norm": 39.11952590942383, + "learning_rate": 4.816533932672106e-07, + "loss": 0.151, + "num_input_tokens_seen": 3334272, + "step": 4930 + }, + { + "epoch": 0.12056287103315173, + "grad_norm": 27.464441299438477, + "learning_rate": 4.821419846582303e-07, + "loss": 0.0791, + "num_input_tokens_seen": 3337728, + "step": 4935 + }, + { + "epoch": 0.1206850218649989, + "grad_norm": 37.72605514526367, + "learning_rate": 4.8263057604925e-07, + "loss": 0.1625, + "num_input_tokens_seen": 3341056, + "step": 4940 + }, + { + "epoch": 0.12080717269684607, + "grad_norm": 31.29315185546875, + "learning_rate": 4.831191674402697e-07, + "loss": 0.14, + "num_input_tokens_seen": 3344704, + "step": 4945 + }, + { + "epoch": 0.12092932352869323, + "grad_norm": 37.559452056884766, + "learning_rate": 4.836077588312894e-07, + "loss": 0.2221, + "num_input_tokens_seen": 3347648, + "step": 4950 + }, + { + "epoch": 0.1210514743605404, + "grad_norm": 31.414438247680664, + "learning_rate": 4.84096350222309e-07, + "loss": 0.0901, + "num_input_tokens_seen": 3350976, + "step": 4955 + }, + { + "epoch": 0.12117362519238756, + "grad_norm": 27.78361701965332, + "learning_rate": 4.845849416133287e-07, + "loss": 0.2851, + "num_input_tokens_seen": 3354112, + "step": 4960 + }, + { + "epoch": 0.12129577602423472, + "grad_norm": 24.971927642822266, + "learning_rate": 4.850735330043484e-07, + "loss": 0.1281, + "num_input_tokens_seen": 3357696, + "step": 4965 + }, + { + "epoch": 0.1214179268560819, + "grad_norm": 26.934574127197266, + "learning_rate": 4.855621243953682e-07, + "loss": 0.1526, + "num_input_tokens_seen": 3361024, + "step": 4970 + }, + { + "epoch": 0.12154007768792906, + "grad_norm": 56.78594207763672, + "learning_rate": 4.860507157863879e-07, + "loss": 0.2137, + "num_input_tokens_seen": 3364032, + "step": 4975 + }, + { + "epoch": 0.12166222851977622, + "grad_norm": 7.1227521896362305, + "learning_rate": 4.865393071774074e-07, + "loss": 0.1128, + "num_input_tokens_seen": 3367744, + "step": 4980 + }, + { + "epoch": 0.12178437935162338, + "grad_norm": 13.746490478515625, + "learning_rate": 4.870278985684272e-07, + "loss": 0.1315, + "num_input_tokens_seen": 3370880, + "step": 4985 + }, + { + "epoch": 0.12190653018347054, + "grad_norm": 19.91473388671875, + "learning_rate": 4.875164899594469e-07, + "loss": 0.0926, + "num_input_tokens_seen": 3374720, + "step": 4990 + }, + { + "epoch": 0.12202868101531772, + "grad_norm": 17.86402130126953, + "learning_rate": 4.880050813504666e-07, + "loss": 0.0964, + "num_input_tokens_seen": 3377984, + "step": 4995 + }, + { + "epoch": 0.12215083184716488, + "grad_norm": 17.522184371948242, + "learning_rate": 4.884936727414863e-07, + "loss": 0.0963, + "num_input_tokens_seen": 3381376, + "step": 5000 + }, + { + "epoch": 0.12227298267901204, + "grad_norm": 12.18857192993164, + "learning_rate": 4.88982264132506e-07, + "loss": 0.1253, + "num_input_tokens_seen": 3384832, + "step": 5005 + }, + { + "epoch": 0.1223951335108592, + "grad_norm": 54.867347717285156, + "learning_rate": 4.894708555235256e-07, + "loss": 0.1456, + "num_input_tokens_seen": 3388480, + "step": 5010 + }, + { + "epoch": 0.12251728434270637, + "grad_norm": 67.53438568115234, + "learning_rate": 4.899594469145453e-07, + "loss": 0.1403, + "num_input_tokens_seen": 3391744, + "step": 5015 + }, + { + "epoch": 0.12263943517455354, + "grad_norm": 3.713974952697754, + "learning_rate": 4.90448038305565e-07, + "loss": 0.1419, + "num_input_tokens_seen": 3395136, + "step": 5020 + }, + { + "epoch": 0.1227615860064007, + "grad_norm": 12.415149688720703, + "learning_rate": 4.909366296965847e-07, + "loss": 0.1163, + "num_input_tokens_seen": 3398272, + "step": 5025 + }, + { + "epoch": 0.12288373683824787, + "grad_norm": 14.186080932617188, + "learning_rate": 4.914252210876045e-07, + "loss": 0.1212, + "num_input_tokens_seen": 3402112, + "step": 5030 + }, + { + "epoch": 0.12300588767009503, + "grad_norm": 18.723878860473633, + "learning_rate": 4.919138124786241e-07, + "loss": 0.1536, + "num_input_tokens_seen": 3405440, + "step": 5035 + }, + { + "epoch": 0.12312803850194219, + "grad_norm": 7.663374423980713, + "learning_rate": 4.924024038696437e-07, + "loss": 0.0942, + "num_input_tokens_seen": 3408384, + "step": 5040 + }, + { + "epoch": 0.12325018933378937, + "grad_norm": 36.07602310180664, + "learning_rate": 4.928909952606635e-07, + "loss": 0.157, + "num_input_tokens_seen": 3411968, + "step": 5045 + }, + { + "epoch": 0.12337234016563653, + "grad_norm": 33.824764251708984, + "learning_rate": 4.933795866516832e-07, + "loss": 0.1502, + "num_input_tokens_seen": 3415360, + "step": 5050 + }, + { + "epoch": 0.12349449099748369, + "grad_norm": 36.74732971191406, + "learning_rate": 4.938681780427029e-07, + "loss": 0.1455, + "num_input_tokens_seen": 3418560, + "step": 5055 + }, + { + "epoch": 0.12361664182933085, + "grad_norm": 29.577823638916016, + "learning_rate": 4.943567694337226e-07, + "loss": 0.1418, + "num_input_tokens_seen": 3421888, + "step": 5060 + }, + { + "epoch": 0.12373879266117803, + "grad_norm": 17.719083786010742, + "learning_rate": 4.948453608247422e-07, + "loss": 0.1016, + "num_input_tokens_seen": 3425536, + "step": 5065 + }, + { + "epoch": 0.12386094349302519, + "grad_norm": 20.10811424255371, + "learning_rate": 4.953339522157619e-07, + "loss": 0.0608, + "num_input_tokens_seen": 3429248, + "step": 5070 + }, + { + "epoch": 0.12398309432487235, + "grad_norm": 17.528579711914062, + "learning_rate": 4.958225436067816e-07, + "loss": 0.1167, + "num_input_tokens_seen": 3432512, + "step": 5075 + }, + { + "epoch": 0.12410524515671952, + "grad_norm": 17.629924774169922, + "learning_rate": 4.963111349978013e-07, + "loss": 0.1865, + "num_input_tokens_seen": 3435712, + "step": 5080 + }, + { + "epoch": 0.12422739598856668, + "grad_norm": 23.14451789855957, + "learning_rate": 4.96799726388821e-07, + "loss": 0.0807, + "num_input_tokens_seen": 3438848, + "step": 5085 + }, + { + "epoch": 0.12434954682041385, + "grad_norm": 57.59390640258789, + "learning_rate": 4.972883177798407e-07, + "loss": 0.0983, + "num_input_tokens_seen": 3442368, + "step": 5090 + }, + { + "epoch": 0.12447169765226102, + "grad_norm": 6.212153911590576, + "learning_rate": 4.977769091708604e-07, + "loss": 0.0881, + "num_input_tokens_seen": 3445760, + "step": 5095 + }, + { + "epoch": 0.12459384848410818, + "grad_norm": 32.92991256713867, + "learning_rate": 4.982655005618801e-07, + "loss": 0.1125, + "num_input_tokens_seen": 3448768, + "step": 5100 + }, + { + "epoch": 0.12471599931595534, + "grad_norm": 43.08573532104492, + "learning_rate": 4.987540919528998e-07, + "loss": 0.1208, + "num_input_tokens_seen": 3451968, + "step": 5105 + }, + { + "epoch": 0.1248381501478025, + "grad_norm": 62.34801483154297, + "learning_rate": 4.992426833439195e-07, + "loss": 0.1621, + "num_input_tokens_seen": 3455680, + "step": 5110 + }, + { + "epoch": 0.12496030097964968, + "grad_norm": 2.1716251373291016, + "learning_rate": 4.997312747349392e-07, + "loss": 0.0785, + "num_input_tokens_seen": 3458816, + "step": 5115 + }, + { + "epoch": 0.12508245181149683, + "grad_norm": 41.48759841918945, + "learning_rate": 5.002198661259588e-07, + "loss": 0.2852, + "num_input_tokens_seen": 3461824, + "step": 5120 + }, + { + "epoch": 0.125204602643344, + "grad_norm": 48.85375213623047, + "learning_rate": 5.007084575169785e-07, + "loss": 0.1053, + "num_input_tokens_seen": 3466304, + "step": 5125 + }, + { + "epoch": 0.12532675347519118, + "grad_norm": 5.803524017333984, + "learning_rate": 5.011970489079982e-07, + "loss": 0.1095, + "num_input_tokens_seen": 3469696, + "step": 5130 + }, + { + "epoch": 0.12544890430703834, + "grad_norm": 31.555641174316406, + "learning_rate": 5.01685640299018e-07, + "loss": 0.1306, + "num_input_tokens_seen": 3473216, + "step": 5135 + }, + { + "epoch": 0.1255710551388855, + "grad_norm": 49.93914031982422, + "learning_rate": 5.021742316900376e-07, + "loss": 0.113, + "num_input_tokens_seen": 3476352, + "step": 5140 + }, + { + "epoch": 0.12569320597073266, + "grad_norm": 3.528350353240967, + "learning_rate": 5.026628230810573e-07, + "loss": 0.0987, + "num_input_tokens_seen": 3479680, + "step": 5145 + }, + { + "epoch": 0.12581535680257983, + "grad_norm": 18.18247413635254, + "learning_rate": 5.03151414472077e-07, + "loss": 0.0808, + "num_input_tokens_seen": 3483392, + "step": 5150 + }, + { + "epoch": 0.125937507634427, + "grad_norm": 23.490751266479492, + "learning_rate": 5.036400058630966e-07, + "loss": 0.0885, + "num_input_tokens_seen": 3486720, + "step": 5155 + }, + { + "epoch": 0.12605965846627415, + "grad_norm": 34.18044662475586, + "learning_rate": 5.041285972541164e-07, + "loss": 0.0894, + "num_input_tokens_seen": 3489920, + "step": 5160 + }, + { + "epoch": 0.1261818092981213, + "grad_norm": 62.92462158203125, + "learning_rate": 5.046171886451361e-07, + "loss": 0.1762, + "num_input_tokens_seen": 3493248, + "step": 5165 + }, + { + "epoch": 0.12630396012996847, + "grad_norm": 44.83457946777344, + "learning_rate": 5.051057800361558e-07, + "loss": 0.0735, + "num_input_tokens_seen": 3496640, + "step": 5170 + }, + { + "epoch": 0.12642611096181566, + "grad_norm": 22.707056045532227, + "learning_rate": 5.055943714271754e-07, + "loss": 0.1073, + "num_input_tokens_seen": 3499648, + "step": 5175 + }, + { + "epoch": 0.12654826179366283, + "grad_norm": 33.12401580810547, + "learning_rate": 5.060829628181951e-07, + "loss": 0.0827, + "num_input_tokens_seen": 3502592, + "step": 5180 + }, + { + "epoch": 0.12667041262551, + "grad_norm": 33.55727005004883, + "learning_rate": 5.065715542092148e-07, + "loss": 0.1814, + "num_input_tokens_seen": 3505984, + "step": 5185 + }, + { + "epoch": 0.12679256345735715, + "grad_norm": 57.06978225708008, + "learning_rate": 5.070601456002345e-07, + "loss": 0.2021, + "num_input_tokens_seen": 3509120, + "step": 5190 + }, + { + "epoch": 0.1269147142892043, + "grad_norm": 3.5166196823120117, + "learning_rate": 5.075487369912543e-07, + "loss": 0.1381, + "num_input_tokens_seen": 3512192, + "step": 5195 + }, + { + "epoch": 0.12703686512105147, + "grad_norm": 19.803998947143555, + "learning_rate": 5.080373283822738e-07, + "loss": 0.1622, + "num_input_tokens_seen": 3515200, + "step": 5200 + }, + { + "epoch": 0.12715901595289864, + "grad_norm": 25.440887451171875, + "learning_rate": 5.085259197732936e-07, + "loss": 0.1644, + "num_input_tokens_seen": 3518272, + "step": 5205 + }, + { + "epoch": 0.1272811667847458, + "grad_norm": 24.9045352935791, + "learning_rate": 5.090145111643133e-07, + "loss": 0.0878, + "num_input_tokens_seen": 3521600, + "step": 5210 + }, + { + "epoch": 0.12740331761659296, + "grad_norm": 49.746337890625, + "learning_rate": 5.095031025553329e-07, + "loss": 0.1429, + "num_input_tokens_seen": 3524544, + "step": 5215 + }, + { + "epoch": 0.12752546844844012, + "grad_norm": 12.738018035888672, + "learning_rate": 5.099916939463527e-07, + "loss": 0.1227, + "num_input_tokens_seen": 3528000, + "step": 5220 + }, + { + "epoch": 0.1276476192802873, + "grad_norm": 19.108055114746094, + "learning_rate": 5.104802853373724e-07, + "loss": 0.1693, + "num_input_tokens_seen": 3531072, + "step": 5225 + }, + { + "epoch": 0.12776977011213447, + "grad_norm": 28.770009994506836, + "learning_rate": 5.10968876728392e-07, + "loss": 0.1392, + "num_input_tokens_seen": 3534912, + "step": 5230 + }, + { + "epoch": 0.12789192094398164, + "grad_norm": 61.38738250732422, + "learning_rate": 5.114574681194117e-07, + "loss": 0.1636, + "num_input_tokens_seen": 3538048, + "step": 5235 + }, + { + "epoch": 0.1280140717758288, + "grad_norm": 13.256145477294922, + "learning_rate": 5.119460595104314e-07, + "loss": 0.1147, + "num_input_tokens_seen": 3541248, + "step": 5240 + }, + { + "epoch": 0.12813622260767596, + "grad_norm": 10.175660133361816, + "learning_rate": 5.124346509014511e-07, + "loss": 0.1623, + "num_input_tokens_seen": 3544640, + "step": 5245 + }, + { + "epoch": 0.12825837343952312, + "grad_norm": 52.19990158081055, + "learning_rate": 5.129232422924708e-07, + "loss": 0.1917, + "num_input_tokens_seen": 3548160, + "step": 5250 + }, + { + "epoch": 0.12838052427137028, + "grad_norm": 67.88219451904297, + "learning_rate": 5.134118336834905e-07, + "loss": 0.3176, + "num_input_tokens_seen": 3551488, + "step": 5255 + }, + { + "epoch": 0.12850267510321745, + "grad_norm": 55.14279556274414, + "learning_rate": 5.139004250745101e-07, + "loss": 0.1757, + "num_input_tokens_seen": 3555456, + "step": 5260 + }, + { + "epoch": 0.1286248259350646, + "grad_norm": 31.440595626831055, + "learning_rate": 5.143890164655299e-07, + "loss": 0.0561, + "num_input_tokens_seen": 3558848, + "step": 5265 + }, + { + "epoch": 0.12874697676691177, + "grad_norm": 27.637584686279297, + "learning_rate": 5.148776078565496e-07, + "loss": 0.1261, + "num_input_tokens_seen": 3562624, + "step": 5270 + }, + { + "epoch": 0.12886912759875896, + "grad_norm": 22.20782470703125, + "learning_rate": 5.153661992475692e-07, + "loss": 0.0671, + "num_input_tokens_seen": 3566336, + "step": 5275 + }, + { + "epoch": 0.12899127843060612, + "grad_norm": 19.314735412597656, + "learning_rate": 5.15854790638589e-07, + "loss": 0.0612, + "num_input_tokens_seen": 3569280, + "step": 5280 + }, + { + "epoch": 0.12911342926245328, + "grad_norm": 56.35293197631836, + "learning_rate": 5.163433820296086e-07, + "loss": 0.1168, + "num_input_tokens_seen": 3572160, + "step": 5285 + }, + { + "epoch": 0.12923558009430045, + "grad_norm": 47.87625503540039, + "learning_rate": 5.168319734206283e-07, + "loss": 0.1233, + "num_input_tokens_seen": 3575360, + "step": 5290 + }, + { + "epoch": 0.1293577309261476, + "grad_norm": 34.83906936645508, + "learning_rate": 5.17320564811648e-07, + "loss": 0.2233, + "num_input_tokens_seen": 3578816, + "step": 5295 + }, + { + "epoch": 0.12947988175799477, + "grad_norm": 25.64273452758789, + "learning_rate": 5.178091562026678e-07, + "loss": 0.1171, + "num_input_tokens_seen": 3582208, + "step": 5300 + }, + { + "epoch": 0.12960203258984193, + "grad_norm": 65.27552795410156, + "learning_rate": 5.182977475936874e-07, + "loss": 0.1628, + "num_input_tokens_seen": 3585472, + "step": 5305 + }, + { + "epoch": 0.1297241834216891, + "grad_norm": 25.95465660095215, + "learning_rate": 5.18786338984707e-07, + "loss": 0.2128, + "num_input_tokens_seen": 3588480, + "step": 5310 + }, + { + "epoch": 0.12984633425353626, + "grad_norm": 19.13545036315918, + "learning_rate": 5.192749303757268e-07, + "loss": 0.2443, + "num_input_tokens_seen": 3591872, + "step": 5315 + }, + { + "epoch": 0.12996848508538345, + "grad_norm": 11.983221054077148, + "learning_rate": 5.197635217667464e-07, + "loss": 0.106, + "num_input_tokens_seen": 3595328, + "step": 5320 + }, + { + "epoch": 0.1300906359172306, + "grad_norm": 17.041215896606445, + "learning_rate": 5.202521131577662e-07, + "loss": 0.0865, + "num_input_tokens_seen": 3599168, + "step": 5325 + }, + { + "epoch": 0.13021278674907777, + "grad_norm": 6.825228691101074, + "learning_rate": 5.207407045487859e-07, + "loss": 0.1366, + "num_input_tokens_seen": 3602176, + "step": 5330 + }, + { + "epoch": 0.13033493758092493, + "grad_norm": 24.280309677124023, + "learning_rate": 5.212292959398054e-07, + "loss": 0.0841, + "num_input_tokens_seen": 3606144, + "step": 5335 + }, + { + "epoch": 0.1304570884127721, + "grad_norm": 58.35236358642578, + "learning_rate": 5.217178873308252e-07, + "loss": 0.1475, + "num_input_tokens_seen": 3609344, + "step": 5340 + }, + { + "epoch": 0.13057923924461926, + "grad_norm": 34.24288558959961, + "learning_rate": 5.222064787218449e-07, + "loss": 0.0872, + "num_input_tokens_seen": 3612608, + "step": 5345 + }, + { + "epoch": 0.13070139007646642, + "grad_norm": 40.71889114379883, + "learning_rate": 5.226950701128646e-07, + "loss": 0.2108, + "num_input_tokens_seen": 3615680, + "step": 5350 + }, + { + "epoch": 0.13082354090831358, + "grad_norm": 27.078983306884766, + "learning_rate": 5.231836615038843e-07, + "loss": 0.1861, + "num_input_tokens_seen": 3618944, + "step": 5355 + }, + { + "epoch": 0.13094569174016074, + "grad_norm": 19.30623435974121, + "learning_rate": 5.236722528949041e-07, + "loss": 0.0905, + "num_input_tokens_seen": 3622080, + "step": 5360 + }, + { + "epoch": 0.1310678425720079, + "grad_norm": 17.23054313659668, + "learning_rate": 5.241608442859236e-07, + "loss": 0.1503, + "num_input_tokens_seen": 3625280, + "step": 5365 + }, + { + "epoch": 0.1311899934038551, + "grad_norm": 34.495113372802734, + "learning_rate": 5.246494356769433e-07, + "loss": 0.2217, + "num_input_tokens_seen": 3628672, + "step": 5370 + }, + { + "epoch": 0.13131214423570226, + "grad_norm": 26.085792541503906, + "learning_rate": 5.251380270679631e-07, + "loss": 0.1845, + "num_input_tokens_seen": 3631744, + "step": 5375 + }, + { + "epoch": 0.13143429506754942, + "grad_norm": 11.248368263244629, + "learning_rate": 5.256266184589827e-07, + "loss": 0.0909, + "num_input_tokens_seen": 3635008, + "step": 5380 + }, + { + "epoch": 0.13155644589939658, + "grad_norm": 22.01517105102539, + "learning_rate": 5.261152098500025e-07, + "loss": 0.1601, + "num_input_tokens_seen": 3639040, + "step": 5385 + }, + { + "epoch": 0.13167859673124374, + "grad_norm": 16.220895767211914, + "learning_rate": 5.266038012410222e-07, + "loss": 0.1047, + "num_input_tokens_seen": 3642048, + "step": 5390 + }, + { + "epoch": 0.1318007475630909, + "grad_norm": 35.09331512451172, + "learning_rate": 5.270923926320417e-07, + "loss": 0.1907, + "num_input_tokens_seen": 3645504, + "step": 5395 + }, + { + "epoch": 0.13192289839493807, + "grad_norm": 18.162155151367188, + "learning_rate": 5.275809840230615e-07, + "loss": 0.0751, + "num_input_tokens_seen": 3648640, + "step": 5400 + }, + { + "epoch": 0.13204504922678523, + "grad_norm": 35.38753128051758, + "learning_rate": 5.280695754140812e-07, + "loss": 0.1491, + "num_input_tokens_seen": 3651904, + "step": 5405 + }, + { + "epoch": 0.1321672000586324, + "grad_norm": 25.72626304626465, + "learning_rate": 5.285581668051009e-07, + "loss": 0.1306, + "num_input_tokens_seen": 3654912, + "step": 5410 + }, + { + "epoch": 0.13228935089047955, + "grad_norm": 39.30805587768555, + "learning_rate": 5.290467581961206e-07, + "loss": 0.1601, + "num_input_tokens_seen": 3658112, + "step": 5415 + }, + { + "epoch": 0.13241150172232674, + "grad_norm": 18.81882095336914, + "learning_rate": 5.295353495871403e-07, + "loss": 0.1715, + "num_input_tokens_seen": 3661440, + "step": 5420 + }, + { + "epoch": 0.1325336525541739, + "grad_norm": 31.62189483642578, + "learning_rate": 5.300239409781599e-07, + "loss": 0.1183, + "num_input_tokens_seen": 3664768, + "step": 5425 + }, + { + "epoch": 0.13265580338602107, + "grad_norm": 24.074796676635742, + "learning_rate": 5.305125323691796e-07, + "loss": 0.1052, + "num_input_tokens_seen": 3668800, + "step": 5430 + }, + { + "epoch": 0.13277795421786823, + "grad_norm": 1.8361209630966187, + "learning_rate": 5.310011237601994e-07, + "loss": 0.0333, + "num_input_tokens_seen": 3671680, + "step": 5435 + }, + { + "epoch": 0.1329001050497154, + "grad_norm": 76.28353118896484, + "learning_rate": 5.31489715151219e-07, + "loss": 0.123, + "num_input_tokens_seen": 3675456, + "step": 5440 + }, + { + "epoch": 0.13302225588156255, + "grad_norm": 10.621566772460938, + "learning_rate": 5.319783065422388e-07, + "loss": 0.0356, + "num_input_tokens_seen": 3679040, + "step": 5445 + }, + { + "epoch": 0.1331444067134097, + "grad_norm": 8.068056106567383, + "learning_rate": 5.324668979332584e-07, + "loss": 0.0859, + "num_input_tokens_seen": 3682816, + "step": 5450 + }, + { + "epoch": 0.13326655754525688, + "grad_norm": 26.7534122467041, + "learning_rate": 5.32955489324278e-07, + "loss": 0.2202, + "num_input_tokens_seen": 3685952, + "step": 5455 + }, + { + "epoch": 0.13338870837710404, + "grad_norm": 39.93231964111328, + "learning_rate": 5.334440807152978e-07, + "loss": 0.1361, + "num_input_tokens_seen": 3689024, + "step": 5460 + }, + { + "epoch": 0.1335108592089512, + "grad_norm": 4.293823719024658, + "learning_rate": 5.339326721063175e-07, + "loss": 0.1323, + "num_input_tokens_seen": 3692544, + "step": 5465 + }, + { + "epoch": 0.1336330100407984, + "grad_norm": 1.931091070175171, + "learning_rate": 5.344212634973372e-07, + "loss": 0.1305, + "num_input_tokens_seen": 3696000, + "step": 5470 + }, + { + "epoch": 0.13375516087264555, + "grad_norm": 73.22601318359375, + "learning_rate": 5.349098548883568e-07, + "loss": 0.2171, + "num_input_tokens_seen": 3699136, + "step": 5475 + }, + { + "epoch": 0.1338773117044927, + "grad_norm": 29.357030868530273, + "learning_rate": 5.353984462793766e-07, + "loss": 0.3066, + "num_input_tokens_seen": 3702528, + "step": 5480 + }, + { + "epoch": 0.13399946253633988, + "grad_norm": 27.00177764892578, + "learning_rate": 5.358870376703962e-07, + "loss": 0.2215, + "num_input_tokens_seen": 3705984, + "step": 5485 + }, + { + "epoch": 0.13412161336818704, + "grad_norm": 15.332404136657715, + "learning_rate": 5.363756290614159e-07, + "loss": 0.238, + "num_input_tokens_seen": 3709184, + "step": 5490 + }, + { + "epoch": 0.1342437642000342, + "grad_norm": 31.646596908569336, + "learning_rate": 5.368642204524357e-07, + "loss": 0.1463, + "num_input_tokens_seen": 3712320, + "step": 5495 + }, + { + "epoch": 0.13436591503188136, + "grad_norm": 25.798009872436523, + "learning_rate": 5.373528118434552e-07, + "loss": 0.0943, + "num_input_tokens_seen": 3715392, + "step": 5500 + }, + { + "epoch": 0.13448806586372852, + "grad_norm": 29.131210327148438, + "learning_rate": 5.37841403234475e-07, + "loss": 0.143, + "num_input_tokens_seen": 3718528, + "step": 5505 + }, + { + "epoch": 0.13461021669557569, + "grad_norm": 23.474040985107422, + "learning_rate": 5.383299946254947e-07, + "loss": 0.1029, + "num_input_tokens_seen": 3721600, + "step": 5510 + }, + { + "epoch": 0.13473236752742287, + "grad_norm": 19.45317268371582, + "learning_rate": 5.388185860165143e-07, + "loss": 0.1197, + "num_input_tokens_seen": 3725312, + "step": 5515 + }, + { + "epoch": 0.13485451835927004, + "grad_norm": 16.48733901977539, + "learning_rate": 5.393071774075341e-07, + "loss": 0.1296, + "num_input_tokens_seen": 3728448, + "step": 5520 + }, + { + "epoch": 0.1349766691911172, + "grad_norm": 48.141746520996094, + "learning_rate": 5.397957687985537e-07, + "loss": 0.1721, + "num_input_tokens_seen": 3732160, + "step": 5525 + }, + { + "epoch": 0.13509882002296436, + "grad_norm": 37.35277557373047, + "learning_rate": 5.402843601895734e-07, + "loss": 0.1383, + "num_input_tokens_seen": 3735552, + "step": 5530 + }, + { + "epoch": 0.13522097085481152, + "grad_norm": 14.689126968383789, + "learning_rate": 5.407729515805931e-07, + "loss": 0.1532, + "num_input_tokens_seen": 3738944, + "step": 5535 + }, + { + "epoch": 0.13534312168665869, + "grad_norm": 18.417491912841797, + "learning_rate": 5.412615429716129e-07, + "loss": 0.177, + "num_input_tokens_seen": 3742720, + "step": 5540 + }, + { + "epoch": 0.13546527251850585, + "grad_norm": 20.64642333984375, + "learning_rate": 5.417501343626325e-07, + "loss": 0.1112, + "num_input_tokens_seen": 3745984, + "step": 5545 + }, + { + "epoch": 0.135587423350353, + "grad_norm": 35.43182373046875, + "learning_rate": 5.422387257536522e-07, + "loss": 0.0857, + "num_input_tokens_seen": 3749696, + "step": 5550 + }, + { + "epoch": 0.13570957418220017, + "grad_norm": 17.774368286132812, + "learning_rate": 5.42727317144672e-07, + "loss": 0.0957, + "num_input_tokens_seen": 3753152, + "step": 5555 + }, + { + "epoch": 0.13583172501404733, + "grad_norm": 3.5985748767852783, + "learning_rate": 5.432159085356915e-07, + "loss": 0.0524, + "num_input_tokens_seen": 3756800, + "step": 5560 + }, + { + "epoch": 0.13595387584589452, + "grad_norm": 34.646461486816406, + "learning_rate": 5.437044999267113e-07, + "loss": 0.1252, + "num_input_tokens_seen": 3760896, + "step": 5565 + }, + { + "epoch": 0.13607602667774168, + "grad_norm": 2.2413535118103027, + "learning_rate": 5.441930913177309e-07, + "loss": 0.0735, + "num_input_tokens_seen": 3764032, + "step": 5570 + }, + { + "epoch": 0.13619817750958885, + "grad_norm": 20.598520278930664, + "learning_rate": 5.446816827087507e-07, + "loss": 0.0557, + "num_input_tokens_seen": 3767488, + "step": 5575 + }, + { + "epoch": 0.136320328341436, + "grad_norm": 30.10392189025879, + "learning_rate": 5.451702740997704e-07, + "loss": 0.2006, + "num_input_tokens_seen": 3770944, + "step": 5580 + }, + { + "epoch": 0.13644247917328317, + "grad_norm": 32.69186782836914, + "learning_rate": 5.456588654907899e-07, + "loss": 0.1705, + "num_input_tokens_seen": 3774208, + "step": 5585 + }, + { + "epoch": 0.13656463000513033, + "grad_norm": 38.73693084716797, + "learning_rate": 5.461474568818097e-07, + "loss": 0.1656, + "num_input_tokens_seen": 3777920, + "step": 5590 + }, + { + "epoch": 0.1366867808369775, + "grad_norm": 6.445084571838379, + "learning_rate": 5.466360482728294e-07, + "loss": 0.0259, + "num_input_tokens_seen": 3781504, + "step": 5595 + }, + { + "epoch": 0.13680893166882466, + "grad_norm": 60.09385681152344, + "learning_rate": 5.471246396638492e-07, + "loss": 0.1219, + "num_input_tokens_seen": 3785280, + "step": 5600 + }, + { + "epoch": 0.13693108250067182, + "grad_norm": 29.345659255981445, + "learning_rate": 5.476132310548688e-07, + "loss": 0.1452, + "num_input_tokens_seen": 3791168, + "step": 5605 + }, + { + "epoch": 0.13705323333251898, + "grad_norm": 14.892522811889648, + "learning_rate": 5.481018224458884e-07, + "loss": 0.0706, + "num_input_tokens_seen": 3794432, + "step": 5610 + }, + { + "epoch": 0.13717538416436617, + "grad_norm": 1.7643539905548096, + "learning_rate": 5.485904138369081e-07, + "loss": 0.0769, + "num_input_tokens_seen": 3797504, + "step": 5615 + }, + { + "epoch": 0.13729753499621333, + "grad_norm": 27.91608238220215, + "learning_rate": 5.490790052279278e-07, + "loss": 0.1494, + "num_input_tokens_seen": 3801280, + "step": 5620 + }, + { + "epoch": 0.1374196858280605, + "grad_norm": 32.55119705200195, + "learning_rate": 5.495675966189476e-07, + "loss": 0.1295, + "num_input_tokens_seen": 3804736, + "step": 5625 + }, + { + "epoch": 0.13754183665990766, + "grad_norm": 43.087921142578125, + "learning_rate": 5.500561880099672e-07, + "loss": 0.1242, + "num_input_tokens_seen": 3808512, + "step": 5630 + }, + { + "epoch": 0.13766398749175482, + "grad_norm": 55.47489929199219, + "learning_rate": 5.50544779400987e-07, + "loss": 0.161, + "num_input_tokens_seen": 3811776, + "step": 5635 + }, + { + "epoch": 0.13778613832360198, + "grad_norm": 23.946348190307617, + "learning_rate": 5.510333707920066e-07, + "loss": 0.1648, + "num_input_tokens_seen": 3815232, + "step": 5640 + }, + { + "epoch": 0.13790828915544914, + "grad_norm": 37.26478958129883, + "learning_rate": 5.515219621830262e-07, + "loss": 0.0756, + "num_input_tokens_seen": 3818816, + "step": 5645 + }, + { + "epoch": 0.1380304399872963, + "grad_norm": 46.365379333496094, + "learning_rate": 5.52010553574046e-07, + "loss": 0.147, + "num_input_tokens_seen": 3821952, + "step": 5650 + }, + { + "epoch": 0.13815259081914347, + "grad_norm": 12.609604835510254, + "learning_rate": 5.524991449650657e-07, + "loss": 0.0421, + "num_input_tokens_seen": 3825408, + "step": 5655 + }, + { + "epoch": 0.13827474165099066, + "grad_norm": 25.285354614257812, + "learning_rate": 5.529877363560854e-07, + "loss": 0.0934, + "num_input_tokens_seen": 3828864, + "step": 5660 + }, + { + "epoch": 0.13839689248283782, + "grad_norm": 16.68872833251953, + "learning_rate": 5.53476327747105e-07, + "loss": 0.0789, + "num_input_tokens_seen": 3832064, + "step": 5665 + }, + { + "epoch": 0.13851904331468498, + "grad_norm": 66.28137969970703, + "learning_rate": 5.539649191381247e-07, + "loss": 0.0692, + "num_input_tokens_seen": 3835264, + "step": 5670 + }, + { + "epoch": 0.13864119414653214, + "grad_norm": 0.4072583317756653, + "learning_rate": 5.544535105291444e-07, + "loss": 0.1247, + "num_input_tokens_seen": 3838528, + "step": 5675 + }, + { + "epoch": 0.1387633449783793, + "grad_norm": 72.48626708984375, + "learning_rate": 5.549421019201641e-07, + "loss": 0.1948, + "num_input_tokens_seen": 3841984, + "step": 5680 + }, + { + "epoch": 0.13888549581022647, + "grad_norm": 70.16142272949219, + "learning_rate": 5.554306933111839e-07, + "loss": 0.1382, + "num_input_tokens_seen": 3845440, + "step": 5685 + }, + { + "epoch": 0.13900764664207363, + "grad_norm": 42.96235656738281, + "learning_rate": 5.559192847022035e-07, + "loss": 0.1565, + "num_input_tokens_seen": 3848704, + "step": 5690 + }, + { + "epoch": 0.1391297974739208, + "grad_norm": 86.99781036376953, + "learning_rate": 5.564078760932232e-07, + "loss": 0.1449, + "num_input_tokens_seen": 3852480, + "step": 5695 + }, + { + "epoch": 0.13925194830576795, + "grad_norm": 43.10320281982422, + "learning_rate": 5.568964674842429e-07, + "loss": 0.2077, + "num_input_tokens_seen": 3855616, + "step": 5700 + }, + { + "epoch": 0.13937409913761512, + "grad_norm": 1.7821024656295776, + "learning_rate": 5.573850588752625e-07, + "loss": 0.0602, + "num_input_tokens_seen": 3859200, + "step": 5705 + }, + { + "epoch": 0.1394962499694623, + "grad_norm": 38.42781066894531, + "learning_rate": 5.578736502662823e-07, + "loss": 0.2262, + "num_input_tokens_seen": 3862784, + "step": 5710 + }, + { + "epoch": 0.13961840080130947, + "grad_norm": 48.475135803222656, + "learning_rate": 5.58362241657302e-07, + "loss": 0.0682, + "num_input_tokens_seen": 3865920, + "step": 5715 + }, + { + "epoch": 0.13974055163315663, + "grad_norm": 9.301112174987793, + "learning_rate": 5.588508330483217e-07, + "loss": 0.036, + "num_input_tokens_seen": 3869632, + "step": 5720 + }, + { + "epoch": 0.1398627024650038, + "grad_norm": 8.468901634216309, + "learning_rate": 5.593394244393413e-07, + "loss": 0.1062, + "num_input_tokens_seen": 3872576, + "step": 5725 + }, + { + "epoch": 0.13998485329685095, + "grad_norm": 52.738311767578125, + "learning_rate": 5.59828015830361e-07, + "loss": 0.2479, + "num_input_tokens_seen": 3876288, + "step": 5730 + }, + { + "epoch": 0.14010700412869811, + "grad_norm": 40.101619720458984, + "learning_rate": 5.603166072213807e-07, + "loss": 0.138, + "num_input_tokens_seen": 3879360, + "step": 5735 + }, + { + "epoch": 0.14022915496054528, + "grad_norm": 0.8611541390419006, + "learning_rate": 5.608051986124004e-07, + "loss": 0.1795, + "num_input_tokens_seen": 3883200, + "step": 5740 + }, + { + "epoch": 0.14035130579239244, + "grad_norm": 24.446453094482422, + "learning_rate": 5.612937900034202e-07, + "loss": 0.1743, + "num_input_tokens_seen": 3887296, + "step": 5745 + }, + { + "epoch": 0.1404734566242396, + "grad_norm": 17.659931182861328, + "learning_rate": 5.617823813944397e-07, + "loss": 0.1256, + "num_input_tokens_seen": 3890880, + "step": 5750 + }, + { + "epoch": 0.14059560745608676, + "grad_norm": 32.55177307128906, + "learning_rate": 5.622709727854595e-07, + "loss": 0.1736, + "num_input_tokens_seen": 3894208, + "step": 5755 + }, + { + "epoch": 0.14071775828793395, + "grad_norm": 14.735184669494629, + "learning_rate": 5.627595641764792e-07, + "loss": 0.1214, + "num_input_tokens_seen": 3897536, + "step": 5760 + }, + { + "epoch": 0.14083990911978111, + "grad_norm": 21.811237335205078, + "learning_rate": 5.632481555674988e-07, + "loss": 0.1589, + "num_input_tokens_seen": 3900480, + "step": 5765 + }, + { + "epoch": 0.14096205995162828, + "grad_norm": 18.260709762573242, + "learning_rate": 5.637367469585186e-07, + "loss": 0.1105, + "num_input_tokens_seen": 3903552, + "step": 5770 + }, + { + "epoch": 0.14108421078347544, + "grad_norm": 13.663732528686523, + "learning_rate": 5.642253383495383e-07, + "loss": 0.0396, + "num_input_tokens_seen": 3906688, + "step": 5775 + }, + { + "epoch": 0.1412063616153226, + "grad_norm": 4.043575763702393, + "learning_rate": 5.647139297405579e-07, + "loss": 0.1107, + "num_input_tokens_seen": 3910080, + "step": 5780 + }, + { + "epoch": 0.14132851244716976, + "grad_norm": 14.13771915435791, + "learning_rate": 5.652025211315776e-07, + "loss": 0.1385, + "num_input_tokens_seen": 3912960, + "step": 5785 + }, + { + "epoch": 0.14145066327901692, + "grad_norm": 59.14686584472656, + "learning_rate": 5.656911125225974e-07, + "loss": 0.1114, + "num_input_tokens_seen": 3915904, + "step": 5790 + }, + { + "epoch": 0.1415728141108641, + "grad_norm": 1.1978075504302979, + "learning_rate": 5.66179703913617e-07, + "loss": 0.1246, + "num_input_tokens_seen": 3919104, + "step": 5795 + }, + { + "epoch": 0.14169496494271125, + "grad_norm": 26.907318115234375, + "learning_rate": 5.666682953046367e-07, + "loss": 0.2857, + "num_input_tokens_seen": 3922496, + "step": 5800 + }, + { + "epoch": 0.1418171157745584, + "grad_norm": 70.06200408935547, + "learning_rate": 5.671568866956564e-07, + "loss": 0.0916, + "num_input_tokens_seen": 3925824, + "step": 5805 + }, + { + "epoch": 0.1419392666064056, + "grad_norm": 5.980767250061035, + "learning_rate": 5.67645478086676e-07, + "loss": 0.1194, + "num_input_tokens_seen": 3929344, + "step": 5810 + }, + { + "epoch": 0.14206141743825276, + "grad_norm": 39.06313705444336, + "learning_rate": 5.681340694776958e-07, + "loss": 0.1381, + "num_input_tokens_seen": 3932608, + "step": 5815 + }, + { + "epoch": 0.14218356827009992, + "grad_norm": 40.51398849487305, + "learning_rate": 5.686226608687155e-07, + "loss": 0.1014, + "num_input_tokens_seen": 3935936, + "step": 5820 + }, + { + "epoch": 0.1423057191019471, + "grad_norm": 18.47413444519043, + "learning_rate": 5.691112522597351e-07, + "loss": 0.129, + "num_input_tokens_seen": 3939648, + "step": 5825 + }, + { + "epoch": 0.14242786993379425, + "grad_norm": 18.596654891967773, + "learning_rate": 5.695998436507549e-07, + "loss": 0.0574, + "num_input_tokens_seen": 3942976, + "step": 5830 + }, + { + "epoch": 0.1425500207656414, + "grad_norm": 33.14046096801758, + "learning_rate": 5.700884350417745e-07, + "loss": 0.1438, + "num_input_tokens_seen": 3946048, + "step": 5835 + }, + { + "epoch": 0.14267217159748857, + "grad_norm": 20.379133224487305, + "learning_rate": 5.705770264327942e-07, + "loss": 0.1404, + "num_input_tokens_seen": 3949568, + "step": 5840 + }, + { + "epoch": 0.14279432242933573, + "grad_norm": 10.639100074768066, + "learning_rate": 5.710656178238139e-07, + "loss": 0.1302, + "num_input_tokens_seen": 3953088, + "step": 5845 + }, + { + "epoch": 0.1429164732611829, + "grad_norm": 42.40608215332031, + "learning_rate": 5.715542092148337e-07, + "loss": 0.063, + "num_input_tokens_seen": 3956416, + "step": 5850 + }, + { + "epoch": 0.1430386240930301, + "grad_norm": 35.883018493652344, + "learning_rate": 5.720428006058533e-07, + "loss": 0.1239, + "num_input_tokens_seen": 3959616, + "step": 5855 + }, + { + "epoch": 0.14316077492487725, + "grad_norm": 32.85486602783203, + "learning_rate": 5.725313919968729e-07, + "loss": 0.0998, + "num_input_tokens_seen": 3962944, + "step": 5860 + }, + { + "epoch": 0.1432829257567244, + "grad_norm": 27.143404006958008, + "learning_rate": 5.730199833878927e-07, + "loss": 0.1661, + "num_input_tokens_seen": 3966528, + "step": 5865 + }, + { + "epoch": 0.14340507658857157, + "grad_norm": 25.951753616333008, + "learning_rate": 5.735085747789123e-07, + "loss": 0.0673, + "num_input_tokens_seen": 3969728, + "step": 5870 + }, + { + "epoch": 0.14352722742041873, + "grad_norm": 36.098236083984375, + "learning_rate": 5.739971661699321e-07, + "loss": 0.1448, + "num_input_tokens_seen": 3973056, + "step": 5875 + }, + { + "epoch": 0.1436493782522659, + "grad_norm": 1.0197221040725708, + "learning_rate": 5.744857575609518e-07, + "loss": 0.063, + "num_input_tokens_seen": 3976192, + "step": 5880 + }, + { + "epoch": 0.14377152908411306, + "grad_norm": 60.88951873779297, + "learning_rate": 5.749743489519713e-07, + "loss": 0.2091, + "num_input_tokens_seen": 3980480, + "step": 5885 + }, + { + "epoch": 0.14389367991596022, + "grad_norm": 72.92743682861328, + "learning_rate": 5.754629403429911e-07, + "loss": 0.15, + "num_input_tokens_seen": 3984000, + "step": 5890 + }, + { + "epoch": 0.14401583074780738, + "grad_norm": 2.2103939056396484, + "learning_rate": 5.759515317340108e-07, + "loss": 0.1052, + "num_input_tokens_seen": 3987264, + "step": 5895 + }, + { + "epoch": 0.14413798157965454, + "grad_norm": 27.705286026000977, + "learning_rate": 5.764401231250305e-07, + "loss": 0.0707, + "num_input_tokens_seen": 3990784, + "step": 5900 + }, + { + "epoch": 0.14426013241150173, + "grad_norm": 31.684846878051758, + "learning_rate": 5.769287145160502e-07, + "loss": 0.1064, + "num_input_tokens_seen": 3994304, + "step": 5905 + }, + { + "epoch": 0.1443822832433489, + "grad_norm": 13.381579399108887, + "learning_rate": 5.7741730590707e-07, + "loss": 0.0527, + "num_input_tokens_seen": 3997504, + "step": 5910 + }, + { + "epoch": 0.14450443407519606, + "grad_norm": 38.11252975463867, + "learning_rate": 5.779058972980895e-07, + "loss": 0.1981, + "num_input_tokens_seen": 4000832, + "step": 5915 + }, + { + "epoch": 0.14462658490704322, + "grad_norm": 65.77743530273438, + "learning_rate": 5.783944886891092e-07, + "loss": 0.0958, + "num_input_tokens_seen": 4004352, + "step": 5920 + }, + { + "epoch": 0.14474873573889038, + "grad_norm": 28.584915161132812, + "learning_rate": 5.78883080080129e-07, + "loss": 0.1188, + "num_input_tokens_seen": 4007552, + "step": 5925 + }, + { + "epoch": 0.14487088657073754, + "grad_norm": 58.487369537353516, + "learning_rate": 5.793716714711486e-07, + "loss": 0.1757, + "num_input_tokens_seen": 4010944, + "step": 5930 + }, + { + "epoch": 0.1449930374025847, + "grad_norm": 52.59897232055664, + "learning_rate": 5.798602628621684e-07, + "loss": 0.0767, + "num_input_tokens_seen": 4014528, + "step": 5935 + }, + { + "epoch": 0.14511518823443187, + "grad_norm": 15.854714393615723, + "learning_rate": 5.80348854253188e-07, + "loss": 0.1921, + "num_input_tokens_seen": 4018368, + "step": 5940 + }, + { + "epoch": 0.14523733906627903, + "grad_norm": 4.226526260375977, + "learning_rate": 5.808374456442076e-07, + "loss": 0.0764, + "num_input_tokens_seen": 4021888, + "step": 5945 + }, + { + "epoch": 0.1453594898981262, + "grad_norm": 40.54411315917969, + "learning_rate": 5.813260370352274e-07, + "loss": 0.0979, + "num_input_tokens_seen": 4025024, + "step": 5950 + }, + { + "epoch": 0.14548164072997338, + "grad_norm": 2.4570257663726807, + "learning_rate": 5.818146284262471e-07, + "loss": 0.1028, + "num_input_tokens_seen": 4028416, + "step": 5955 + }, + { + "epoch": 0.14560379156182054, + "grad_norm": 29.194568634033203, + "learning_rate": 5.823032198172668e-07, + "loss": 0.1325, + "num_input_tokens_seen": 4031488, + "step": 5960 + }, + { + "epoch": 0.1457259423936677, + "grad_norm": 87.35152435302734, + "learning_rate": 5.827918112082865e-07, + "loss": 0.305, + "num_input_tokens_seen": 4035008, + "step": 5965 + }, + { + "epoch": 0.14584809322551487, + "grad_norm": 76.78803253173828, + "learning_rate": 5.832804025993062e-07, + "loss": 0.1991, + "num_input_tokens_seen": 4038272, + "step": 5970 + }, + { + "epoch": 0.14597024405736203, + "grad_norm": 54.32789611816406, + "learning_rate": 5.837689939903258e-07, + "loss": 0.1803, + "num_input_tokens_seen": 4041536, + "step": 5975 + }, + { + "epoch": 0.1460923948892092, + "grad_norm": 45.33341598510742, + "learning_rate": 5.842575853813455e-07, + "loss": 0.2937, + "num_input_tokens_seen": 4044608, + "step": 5980 + }, + { + "epoch": 0.14621454572105635, + "grad_norm": 4.984194755554199, + "learning_rate": 5.847461767723653e-07, + "loss": 0.0937, + "num_input_tokens_seen": 4048064, + "step": 5985 + }, + { + "epoch": 0.14633669655290352, + "grad_norm": 22.13462257385254, + "learning_rate": 5.852347681633849e-07, + "loss": 0.098, + "num_input_tokens_seen": 4051392, + "step": 5990 + }, + { + "epoch": 0.14645884738475068, + "grad_norm": 66.0450439453125, + "learning_rate": 5.857233595544047e-07, + "loss": 0.1357, + "num_input_tokens_seen": 4055232, + "step": 5995 + }, + { + "epoch": 0.14658099821659787, + "grad_norm": 8.29734992980957, + "learning_rate": 5.862119509454243e-07, + "loss": 0.1316, + "num_input_tokens_seen": 4058112, + "step": 6000 + }, + { + "epoch": 0.14670314904844503, + "grad_norm": 41.1361083984375, + "learning_rate": 5.86700542336444e-07, + "loss": 0.1374, + "num_input_tokens_seen": 4061056, + "step": 6005 + }, + { + "epoch": 0.1468252998802922, + "grad_norm": 7.894331932067871, + "learning_rate": 5.871891337274637e-07, + "loss": 0.112, + "num_input_tokens_seen": 4064640, + "step": 6010 + }, + { + "epoch": 0.14694745071213935, + "grad_norm": 4.169567108154297, + "learning_rate": 5.876777251184834e-07, + "loss": 0.1227, + "num_input_tokens_seen": 4067904, + "step": 6015 + }, + { + "epoch": 0.14706960154398652, + "grad_norm": 46.6483154296875, + "learning_rate": 5.881663165095031e-07, + "loss": 0.213, + "num_input_tokens_seen": 4071232, + "step": 6020 + }, + { + "epoch": 0.14719175237583368, + "grad_norm": 41.06544494628906, + "learning_rate": 5.886549079005227e-07, + "loss": 0.0935, + "num_input_tokens_seen": 4074496, + "step": 6025 + }, + { + "epoch": 0.14731390320768084, + "grad_norm": 31.227313995361328, + "learning_rate": 5.891434992915425e-07, + "loss": 0.1533, + "num_input_tokens_seen": 4077888, + "step": 6030 + }, + { + "epoch": 0.147436054039528, + "grad_norm": 20.721681594848633, + "learning_rate": 5.896320906825621e-07, + "loss": 0.1824, + "num_input_tokens_seen": 4080960, + "step": 6035 + }, + { + "epoch": 0.14755820487137516, + "grad_norm": 46.61140060424805, + "learning_rate": 5.901206820735818e-07, + "loss": 0.1432, + "num_input_tokens_seen": 4084416, + "step": 6040 + }, + { + "epoch": 0.14768035570322233, + "grad_norm": 19.30379295349121, + "learning_rate": 5.906092734646016e-07, + "loss": 0.0937, + "num_input_tokens_seen": 4087744, + "step": 6045 + }, + { + "epoch": 0.14780250653506952, + "grad_norm": 21.93967056274414, + "learning_rate": 5.910978648556211e-07, + "loss": 0.1235, + "num_input_tokens_seen": 4091200, + "step": 6050 + }, + { + "epoch": 0.14792465736691668, + "grad_norm": 20.221229553222656, + "learning_rate": 5.915864562466409e-07, + "loss": 0.0512, + "num_input_tokens_seen": 4094336, + "step": 6055 + }, + { + "epoch": 0.14804680819876384, + "grad_norm": 28.617212295532227, + "learning_rate": 5.920750476376606e-07, + "loss": 0.1712, + "num_input_tokens_seen": 4098048, + "step": 6060 + }, + { + "epoch": 0.148168959030611, + "grad_norm": 41.802513122558594, + "learning_rate": 5.925636390286803e-07, + "loss": 0.1851, + "num_input_tokens_seen": 4101760, + "step": 6065 + }, + { + "epoch": 0.14829110986245816, + "grad_norm": 14.1126708984375, + "learning_rate": 5.930522304197e-07, + "loss": 0.1798, + "num_input_tokens_seen": 4105408, + "step": 6070 + }, + { + "epoch": 0.14841326069430533, + "grad_norm": 62.553184509277344, + "learning_rate": 5.935408218107197e-07, + "loss": 0.1347, + "num_input_tokens_seen": 4109312, + "step": 6075 + }, + { + "epoch": 0.1485354115261525, + "grad_norm": 20.548730850219727, + "learning_rate": 5.940294132017393e-07, + "loss": 0.1295, + "num_input_tokens_seen": 4112640, + "step": 6080 + }, + { + "epoch": 0.14865756235799965, + "grad_norm": 14.097305297851562, + "learning_rate": 5.94518004592759e-07, + "loss": 0.1796, + "num_input_tokens_seen": 4115904, + "step": 6085 + }, + { + "epoch": 0.1487797131898468, + "grad_norm": 3.2791802883148193, + "learning_rate": 5.950065959837788e-07, + "loss": 0.0824, + "num_input_tokens_seen": 4119360, + "step": 6090 + }, + { + "epoch": 0.14890186402169397, + "grad_norm": 2.169283390045166, + "learning_rate": 5.954951873747984e-07, + "loss": 0.1154, + "num_input_tokens_seen": 4122816, + "step": 6095 + }, + { + "epoch": 0.14902401485354116, + "grad_norm": 23.00556755065918, + "learning_rate": 5.959837787658181e-07, + "loss": 0.1101, + "num_input_tokens_seen": 4126144, + "step": 6100 + }, + { + "epoch": 0.14914616568538833, + "grad_norm": 38.71464920043945, + "learning_rate": 5.964723701568379e-07, + "loss": 0.0952, + "num_input_tokens_seen": 4129216, + "step": 6105 + }, + { + "epoch": 0.1492683165172355, + "grad_norm": 31.493146896362305, + "learning_rate": 5.969609615478574e-07, + "loss": 0.1513, + "num_input_tokens_seen": 4132032, + "step": 6110 + }, + { + "epoch": 0.14939046734908265, + "grad_norm": 43.34003448486328, + "learning_rate": 5.974495529388772e-07, + "loss": 0.1582, + "num_input_tokens_seen": 4135616, + "step": 6115 + }, + { + "epoch": 0.1495126181809298, + "grad_norm": 52.86286926269531, + "learning_rate": 5.979381443298969e-07, + "loss": 0.1166, + "num_input_tokens_seen": 4139136, + "step": 6120 + }, + { + "epoch": 0.14963476901277697, + "grad_norm": 17.101299285888672, + "learning_rate": 5.984267357209166e-07, + "loss": 0.223, + "num_input_tokens_seen": 4142464, + "step": 6125 + }, + { + "epoch": 0.14975691984462414, + "grad_norm": 23.224618911743164, + "learning_rate": 5.989153271119363e-07, + "loss": 0.063, + "num_input_tokens_seen": 4145920, + "step": 6130 + }, + { + "epoch": 0.1498790706764713, + "grad_norm": 6.051283836364746, + "learning_rate": 5.994039185029559e-07, + "loss": 0.0648, + "num_input_tokens_seen": 4149568, + "step": 6135 + }, + { + "epoch": 0.15000122150831846, + "grad_norm": 22.5601806640625, + "learning_rate": 5.998925098939756e-07, + "loss": 0.2033, + "num_input_tokens_seen": 4153088, + "step": 6140 + }, + { + "epoch": 0.15012337234016562, + "grad_norm": 42.55513000488281, + "learning_rate": 6.003811012849953e-07, + "loss": 0.1186, + "num_input_tokens_seen": 4156544, + "step": 6145 + }, + { + "epoch": 0.1502455231720128, + "grad_norm": 50.73028564453125, + "learning_rate": 6.008696926760151e-07, + "loss": 0.086, + "num_input_tokens_seen": 4159872, + "step": 6150 + }, + { + "epoch": 0.15036767400385997, + "grad_norm": 19.807815551757812, + "learning_rate": 6.013582840670347e-07, + "loss": 0.1547, + "num_input_tokens_seen": 4162880, + "step": 6155 + }, + { + "epoch": 0.15048982483570714, + "grad_norm": 31.513826370239258, + "learning_rate": 6.018468754580543e-07, + "loss": 0.1011, + "num_input_tokens_seen": 4166208, + "step": 6160 + }, + { + "epoch": 0.1506119756675543, + "grad_norm": 3.1334080696105957, + "learning_rate": 6.023354668490741e-07, + "loss": 0.0551, + "num_input_tokens_seen": 4169472, + "step": 6165 + }, + { + "epoch": 0.15073412649940146, + "grad_norm": 43.65282440185547, + "learning_rate": 6.028240582400937e-07, + "loss": 0.1453, + "num_input_tokens_seen": 4172544, + "step": 6170 + }, + { + "epoch": 0.15085627733124862, + "grad_norm": 25.209524154663086, + "learning_rate": 6.033126496311135e-07, + "loss": 0.2232, + "num_input_tokens_seen": 4176384, + "step": 6175 + }, + { + "epoch": 0.15097842816309578, + "grad_norm": 25.307361602783203, + "learning_rate": 6.038012410221332e-07, + "loss": 0.0965, + "num_input_tokens_seen": 4179904, + "step": 6180 + }, + { + "epoch": 0.15110057899494295, + "grad_norm": 13.137029647827148, + "learning_rate": 6.042898324131529e-07, + "loss": 0.0665, + "num_input_tokens_seen": 4183104, + "step": 6185 + }, + { + "epoch": 0.1512227298267901, + "grad_norm": 2.0337600708007812, + "learning_rate": 6.047784238041725e-07, + "loss": 0.1853, + "num_input_tokens_seen": 4186624, + "step": 6190 + }, + { + "epoch": 0.1513448806586373, + "grad_norm": 28.856618881225586, + "learning_rate": 6.052670151951922e-07, + "loss": 0.1416, + "num_input_tokens_seen": 4189568, + "step": 6195 + }, + { + "epoch": 0.15146703149048446, + "grad_norm": 7.899556636810303, + "learning_rate": 6.057556065862119e-07, + "loss": 0.0886, + "num_input_tokens_seen": 4192960, + "step": 6200 + }, + { + "epoch": 0.15158918232233162, + "grad_norm": 30.406021118164062, + "learning_rate": 6.062441979772316e-07, + "loss": 0.1363, + "num_input_tokens_seen": 4196032, + "step": 6205 + }, + { + "epoch": 0.15171133315417878, + "grad_norm": 42.989532470703125, + "learning_rate": 6.067327893682514e-07, + "loss": 0.1335, + "num_input_tokens_seen": 4199488, + "step": 6210 + }, + { + "epoch": 0.15183348398602595, + "grad_norm": 5.426946640014648, + "learning_rate": 6.07221380759271e-07, + "loss": 0.1709, + "num_input_tokens_seen": 4202816, + "step": 6215 + }, + { + "epoch": 0.1519556348178731, + "grad_norm": 3.0106301307678223, + "learning_rate": 6.077099721502907e-07, + "loss": 0.1961, + "num_input_tokens_seen": 4205760, + "step": 6220 + }, + { + "epoch": 0.15207778564972027, + "grad_norm": 56.07905578613281, + "learning_rate": 6.081985635413104e-07, + "loss": 0.1633, + "num_input_tokens_seen": 4209152, + "step": 6225 + }, + { + "epoch": 0.15219993648156743, + "grad_norm": 35.35799026489258, + "learning_rate": 6.0868715493233e-07, + "loss": 0.2286, + "num_input_tokens_seen": 4212288, + "step": 6230 + }, + { + "epoch": 0.1523220873134146, + "grad_norm": 20.754413604736328, + "learning_rate": 6.091757463233498e-07, + "loss": 0.1305, + "num_input_tokens_seen": 4215744, + "step": 6235 + }, + { + "epoch": 0.15244423814526176, + "grad_norm": 15.790669441223145, + "learning_rate": 6.096643377143695e-07, + "loss": 0.1388, + "num_input_tokens_seen": 4219008, + "step": 6240 + }, + { + "epoch": 0.15256638897710895, + "grad_norm": 18.692581176757812, + "learning_rate": 6.101529291053891e-07, + "loss": 0.0912, + "num_input_tokens_seen": 4222272, + "step": 6245 + }, + { + "epoch": 0.1526885398089561, + "grad_norm": 33.882667541503906, + "learning_rate": 6.106415204964088e-07, + "loss": 0.1363, + "num_input_tokens_seen": 4225792, + "step": 6250 + }, + { + "epoch": 0.15281069064080327, + "grad_norm": 14.197614669799805, + "learning_rate": 6.111301118874285e-07, + "loss": 0.1123, + "num_input_tokens_seen": 4228800, + "step": 6255 + }, + { + "epoch": 0.15293284147265043, + "grad_norm": 42.19538116455078, + "learning_rate": 6.116187032784482e-07, + "loss": 0.1594, + "num_input_tokens_seen": 4232000, + "step": 6260 + }, + { + "epoch": 0.1530549923044976, + "grad_norm": 10.284985542297363, + "learning_rate": 6.121072946694679e-07, + "loss": 0.1191, + "num_input_tokens_seen": 4235328, + "step": 6265 + }, + { + "epoch": 0.15317714313634476, + "grad_norm": 31.913787841796875, + "learning_rate": 6.125958860604877e-07, + "loss": 0.1284, + "num_input_tokens_seen": 4238784, + "step": 6270 + }, + { + "epoch": 0.15329929396819192, + "grad_norm": 46.03422927856445, + "learning_rate": 6.130844774515072e-07, + "loss": 0.161, + "num_input_tokens_seen": 4242112, + "step": 6275 + }, + { + "epoch": 0.15342144480003908, + "grad_norm": 47.99618911743164, + "learning_rate": 6.13573068842527e-07, + "loss": 0.1836, + "num_input_tokens_seen": 4245568, + "step": 6280 + }, + { + "epoch": 0.15354359563188624, + "grad_norm": 10.648454666137695, + "learning_rate": 6.140616602335467e-07, + "loss": 0.1363, + "num_input_tokens_seen": 4248896, + "step": 6285 + }, + { + "epoch": 0.1536657464637334, + "grad_norm": 16.757213592529297, + "learning_rate": 6.145502516245663e-07, + "loss": 0.1812, + "num_input_tokens_seen": 4252096, + "step": 6290 + }, + { + "epoch": 0.1537878972955806, + "grad_norm": 27.234424591064453, + "learning_rate": 6.150388430155861e-07, + "loss": 0.1504, + "num_input_tokens_seen": 4255424, + "step": 6295 + }, + { + "epoch": 0.15391004812742776, + "grad_norm": 17.769634246826172, + "learning_rate": 6.155274344066057e-07, + "loss": 0.1123, + "num_input_tokens_seen": 4258816, + "step": 6300 + }, + { + "epoch": 0.15403219895927492, + "grad_norm": 18.891216278076172, + "learning_rate": 6.160160257976254e-07, + "loss": 0.0778, + "num_input_tokens_seen": 4262272, + "step": 6305 + }, + { + "epoch": 0.15415434979112208, + "grad_norm": 5.673632621765137, + "learning_rate": 6.165046171886451e-07, + "loss": 0.1027, + "num_input_tokens_seen": 4265472, + "step": 6310 + }, + { + "epoch": 0.15427650062296924, + "grad_norm": 21.022520065307617, + "learning_rate": 6.169932085796648e-07, + "loss": 0.0871, + "num_input_tokens_seen": 4268416, + "step": 6315 + }, + { + "epoch": 0.1543986514548164, + "grad_norm": 8.996367454528809, + "learning_rate": 6.174817999706845e-07, + "loss": 0.1307, + "num_input_tokens_seen": 4271232, + "step": 6320 + }, + { + "epoch": 0.15452080228666357, + "grad_norm": 35.42950439453125, + "learning_rate": 6.179703913617041e-07, + "loss": 0.0743, + "num_input_tokens_seen": 4274624, + "step": 6325 + }, + { + "epoch": 0.15464295311851073, + "grad_norm": 14.378525733947754, + "learning_rate": 6.184589827527239e-07, + "loss": 0.0905, + "num_input_tokens_seen": 4277824, + "step": 6330 + }, + { + "epoch": 0.1547651039503579, + "grad_norm": 25.608861923217773, + "learning_rate": 6.189475741437435e-07, + "loss": 0.1555, + "num_input_tokens_seen": 4280768, + "step": 6335 + }, + { + "epoch": 0.15488725478220508, + "grad_norm": 11.071989059448242, + "learning_rate": 6.194361655347633e-07, + "loss": 0.1338, + "num_input_tokens_seen": 4283776, + "step": 6340 + }, + { + "epoch": 0.15500940561405224, + "grad_norm": 30.60003662109375, + "learning_rate": 6.19924756925783e-07, + "loss": 0.0846, + "num_input_tokens_seen": 4287168, + "step": 6345 + }, + { + "epoch": 0.1551315564458994, + "grad_norm": 54.509544372558594, + "learning_rate": 6.204133483168026e-07, + "loss": 0.0945, + "num_input_tokens_seen": 4290496, + "step": 6350 + }, + { + "epoch": 0.15525370727774657, + "grad_norm": 45.7187385559082, + "learning_rate": 6.209019397078223e-07, + "loss": 0.1313, + "num_input_tokens_seen": 4294464, + "step": 6355 + }, + { + "epoch": 0.15537585810959373, + "grad_norm": 20.216039657592773, + "learning_rate": 6.21390531098842e-07, + "loss": 0.113, + "num_input_tokens_seen": 4298048, + "step": 6360 + }, + { + "epoch": 0.1554980089414409, + "grad_norm": 25.05139923095703, + "learning_rate": 6.218791224898617e-07, + "loss": 0.0559, + "num_input_tokens_seen": 4300928, + "step": 6365 + }, + { + "epoch": 0.15562015977328805, + "grad_norm": 52.927974700927734, + "learning_rate": 6.223677138808814e-07, + "loss": 0.1924, + "num_input_tokens_seen": 4304320, + "step": 6370 + }, + { + "epoch": 0.15574231060513521, + "grad_norm": 3.7601611614227295, + "learning_rate": 6.228563052719011e-07, + "loss": 0.0631, + "num_input_tokens_seen": 4307584, + "step": 6375 + }, + { + "epoch": 0.15586446143698238, + "grad_norm": 96.52560424804688, + "learning_rate": 6.233448966629207e-07, + "loss": 0.2685, + "num_input_tokens_seen": 4310528, + "step": 6380 + }, + { + "epoch": 0.15598661226882954, + "grad_norm": 10.017736434936523, + "learning_rate": 6.238334880539404e-07, + "loss": 0.1225, + "num_input_tokens_seen": 4313664, + "step": 6385 + }, + { + "epoch": 0.15610876310067673, + "grad_norm": 38.85869598388672, + "learning_rate": 6.243220794449602e-07, + "loss": 0.0834, + "num_input_tokens_seen": 4316992, + "step": 6390 + }, + { + "epoch": 0.1562309139325239, + "grad_norm": 20.78007698059082, + "learning_rate": 6.248106708359798e-07, + "loss": 0.1642, + "num_input_tokens_seen": 4320320, + "step": 6395 + }, + { + "epoch": 0.15635306476437105, + "grad_norm": 3.2366719245910645, + "learning_rate": 6.252992622269996e-07, + "loss": 0.1726, + "num_input_tokens_seen": 4323840, + "step": 6400 + }, + { + "epoch": 0.15647521559621821, + "grad_norm": 15.642763137817383, + "learning_rate": 6.257878536180193e-07, + "loss": 0.0551, + "num_input_tokens_seen": 4327104, + "step": 6405 + }, + { + "epoch": 0.15659736642806538, + "grad_norm": 22.45438575744629, + "learning_rate": 6.262764450090388e-07, + "loss": 0.1901, + "num_input_tokens_seen": 4330560, + "step": 6410 + }, + { + "epoch": 0.15671951725991254, + "grad_norm": 53.00764465332031, + "learning_rate": 6.267650364000586e-07, + "loss": 0.1802, + "num_input_tokens_seen": 4333952, + "step": 6415 + }, + { + "epoch": 0.1568416680917597, + "grad_norm": 15.467325210571289, + "learning_rate": 6.272536277910783e-07, + "loss": 0.0586, + "num_input_tokens_seen": 4337280, + "step": 6420 + }, + { + "epoch": 0.15696381892360686, + "grad_norm": 1.292763352394104, + "learning_rate": 6.27742219182098e-07, + "loss": 0.0911, + "num_input_tokens_seen": 4340736, + "step": 6425 + }, + { + "epoch": 0.15708596975545402, + "grad_norm": 24.337791442871094, + "learning_rate": 6.282308105731177e-07, + "loss": 0.1349, + "num_input_tokens_seen": 4343808, + "step": 6430 + }, + { + "epoch": 0.1572081205873012, + "grad_norm": 54.08406066894531, + "learning_rate": 6.287194019641373e-07, + "loss": 0.1372, + "num_input_tokens_seen": 4347136, + "step": 6435 + }, + { + "epoch": 0.15733027141914838, + "grad_norm": 18.846115112304688, + "learning_rate": 6.29207993355157e-07, + "loss": 0.0889, + "num_input_tokens_seen": 4350336, + "step": 6440 + }, + { + "epoch": 0.15745242225099554, + "grad_norm": 24.113834381103516, + "learning_rate": 6.296965847461767e-07, + "loss": 0.099, + "num_input_tokens_seen": 4353600, + "step": 6445 + }, + { + "epoch": 0.1575745730828427, + "grad_norm": 33.02431106567383, + "learning_rate": 6.301851761371965e-07, + "loss": 0.2071, + "num_input_tokens_seen": 4357056, + "step": 6450 + }, + { + "epoch": 0.15769672391468986, + "grad_norm": 27.968963623046875, + "learning_rate": 6.306737675282161e-07, + "loss": 0.2063, + "num_input_tokens_seen": 4360704, + "step": 6455 + }, + { + "epoch": 0.15781887474653702, + "grad_norm": 16.813751220703125, + "learning_rate": 6.311623589192359e-07, + "loss": 0.1524, + "num_input_tokens_seen": 4364352, + "step": 6460 + }, + { + "epoch": 0.1579410255783842, + "grad_norm": 25.05374526977539, + "learning_rate": 6.316509503102555e-07, + "loss": 0.1161, + "num_input_tokens_seen": 4368192, + "step": 6465 + }, + { + "epoch": 0.15806317641023135, + "grad_norm": 23.216142654418945, + "learning_rate": 6.321395417012751e-07, + "loss": 0.1139, + "num_input_tokens_seen": 4371456, + "step": 6470 + }, + { + "epoch": 0.1581853272420785, + "grad_norm": 33.85103225708008, + "learning_rate": 6.326281330922949e-07, + "loss": 0.1646, + "num_input_tokens_seen": 4374400, + "step": 6475 + }, + { + "epoch": 0.15830747807392567, + "grad_norm": 21.796924591064453, + "learning_rate": 6.331167244833146e-07, + "loss": 0.1281, + "num_input_tokens_seen": 4377792, + "step": 6480 + }, + { + "epoch": 0.15842962890577283, + "grad_norm": 37.538352966308594, + "learning_rate": 6.336053158743343e-07, + "loss": 0.1209, + "num_input_tokens_seen": 4381120, + "step": 6485 + }, + { + "epoch": 0.15855177973762002, + "grad_norm": 17.142486572265625, + "learning_rate": 6.34093907265354e-07, + "loss": 0.2482, + "num_input_tokens_seen": 4384448, + "step": 6490 + }, + { + "epoch": 0.1586739305694672, + "grad_norm": 6.992273807525635, + "learning_rate": 6.345824986563737e-07, + "loss": 0.113, + "num_input_tokens_seen": 4387520, + "step": 6495 + }, + { + "epoch": 0.15879608140131435, + "grad_norm": 12.753617286682129, + "learning_rate": 6.350710900473933e-07, + "loss": 0.0973, + "num_input_tokens_seen": 4391040, + "step": 6500 + }, + { + "epoch": 0.1589182322331615, + "grad_norm": 6.668143272399902, + "learning_rate": 6.35559681438413e-07, + "loss": 0.151, + "num_input_tokens_seen": 4394880, + "step": 6505 + }, + { + "epoch": 0.15904038306500867, + "grad_norm": 21.62032699584961, + "learning_rate": 6.360482728294328e-07, + "loss": 0.0759, + "num_input_tokens_seen": 4397824, + "step": 6510 + }, + { + "epoch": 0.15916253389685583, + "grad_norm": 5.089215278625488, + "learning_rate": 6.365368642204524e-07, + "loss": 0.0951, + "num_input_tokens_seen": 4401152, + "step": 6515 + }, + { + "epoch": 0.159284684728703, + "grad_norm": 45.12503433227539, + "learning_rate": 6.370254556114721e-07, + "loss": 0.0996, + "num_input_tokens_seen": 4404352, + "step": 6520 + }, + { + "epoch": 0.15940683556055016, + "grad_norm": 37.46071243286133, + "learning_rate": 6.375140470024918e-07, + "loss": 0.1367, + "num_input_tokens_seen": 4407488, + "step": 6525 + }, + { + "epoch": 0.15952898639239732, + "grad_norm": 31.072433471679688, + "learning_rate": 6.380026383935114e-07, + "loss": 0.0958, + "num_input_tokens_seen": 4410816, + "step": 6530 + }, + { + "epoch": 0.1596511372242445, + "grad_norm": 12.748991012573242, + "learning_rate": 6.384912297845312e-07, + "loss": 0.015, + "num_input_tokens_seen": 4414016, + "step": 6535 + }, + { + "epoch": 0.15977328805609167, + "grad_norm": 47.43623733520508, + "learning_rate": 6.389798211755509e-07, + "loss": 0.271, + "num_input_tokens_seen": 4417472, + "step": 6540 + }, + { + "epoch": 0.15989543888793883, + "grad_norm": 43.680030822753906, + "learning_rate": 6.394684125665705e-07, + "loss": 0.1196, + "num_input_tokens_seen": 4420544, + "step": 6545 + }, + { + "epoch": 0.160017589719786, + "grad_norm": 1.3316268920898438, + "learning_rate": 6.399570039575902e-07, + "loss": 0.0459, + "num_input_tokens_seen": 4423936, + "step": 6550 + }, + { + "epoch": 0.16013974055163316, + "grad_norm": 41.480247497558594, + "learning_rate": 6.4044559534861e-07, + "loss": 0.1126, + "num_input_tokens_seen": 4427520, + "step": 6555 + }, + { + "epoch": 0.16026189138348032, + "grad_norm": 75.16388702392578, + "learning_rate": 6.409341867396296e-07, + "loss": 0.1337, + "num_input_tokens_seen": 4431296, + "step": 6560 + }, + { + "epoch": 0.16038404221532748, + "grad_norm": 42.94929885864258, + "learning_rate": 6.414227781306493e-07, + "loss": 0.2227, + "num_input_tokens_seen": 4434176, + "step": 6565 + }, + { + "epoch": 0.16050619304717464, + "grad_norm": 96.06494903564453, + "learning_rate": 6.419113695216691e-07, + "loss": 0.0667, + "num_input_tokens_seen": 4437568, + "step": 6570 + }, + { + "epoch": 0.1606283438790218, + "grad_norm": 25.993467330932617, + "learning_rate": 6.423999609126886e-07, + "loss": 0.0748, + "num_input_tokens_seen": 4440960, + "step": 6575 + }, + { + "epoch": 0.16075049471086897, + "grad_norm": 18.6013240814209, + "learning_rate": 6.428885523037084e-07, + "loss": 0.1785, + "num_input_tokens_seen": 4444224, + "step": 6580 + }, + { + "epoch": 0.16087264554271616, + "grad_norm": 16.228498458862305, + "learning_rate": 6.433771436947281e-07, + "loss": 0.1116, + "num_input_tokens_seen": 4447424, + "step": 6585 + }, + { + "epoch": 0.16099479637456332, + "grad_norm": 56.65265655517578, + "learning_rate": 6.438657350857477e-07, + "loss": 0.178, + "num_input_tokens_seen": 4450688, + "step": 6590 + }, + { + "epoch": 0.16111694720641048, + "grad_norm": 89.38908386230469, + "learning_rate": 6.443543264767675e-07, + "loss": 0.2095, + "num_input_tokens_seen": 4454336, + "step": 6595 + }, + { + "epoch": 0.16123909803825764, + "grad_norm": 23.842357635498047, + "learning_rate": 6.448429178677871e-07, + "loss": 0.0842, + "num_input_tokens_seen": 4457792, + "step": 6600 + }, + { + "epoch": 0.1613612488701048, + "grad_norm": 14.655643463134766, + "learning_rate": 6.453315092588068e-07, + "loss": 0.1051, + "num_input_tokens_seen": 4460992, + "step": 6605 + }, + { + "epoch": 0.16148339970195197, + "grad_norm": 38.159698486328125, + "learning_rate": 6.458201006498265e-07, + "loss": 0.1712, + "num_input_tokens_seen": 4464576, + "step": 6610 + }, + { + "epoch": 0.16160555053379913, + "grad_norm": 29.461435317993164, + "learning_rate": 6.463086920408463e-07, + "loss": 0.1776, + "num_input_tokens_seen": 4467584, + "step": 6615 + }, + { + "epoch": 0.1617277013656463, + "grad_norm": 38.899383544921875, + "learning_rate": 6.467972834318659e-07, + "loss": 0.2165, + "num_input_tokens_seen": 4470848, + "step": 6620 + }, + { + "epoch": 0.16184985219749345, + "grad_norm": 15.065881729125977, + "learning_rate": 6.472858748228856e-07, + "loss": 0.2397, + "num_input_tokens_seen": 4474368, + "step": 6625 + }, + { + "epoch": 0.16197200302934062, + "grad_norm": 47.70626449584961, + "learning_rate": 6.477744662139053e-07, + "loss": 0.1456, + "num_input_tokens_seen": 4478208, + "step": 6630 + }, + { + "epoch": 0.1620941538611878, + "grad_norm": 10.539874076843262, + "learning_rate": 6.482630576049249e-07, + "loss": 0.1784, + "num_input_tokens_seen": 4481664, + "step": 6635 + }, + { + "epoch": 0.16221630469303497, + "grad_norm": 13.360639572143555, + "learning_rate": 6.487516489959447e-07, + "loss": 0.1122, + "num_input_tokens_seen": 4484736, + "step": 6640 + }, + { + "epoch": 0.16233845552488213, + "grad_norm": 4.426427364349365, + "learning_rate": 6.492402403869644e-07, + "loss": 0.0607, + "num_input_tokens_seen": 4489024, + "step": 6645 + }, + { + "epoch": 0.1624606063567293, + "grad_norm": 3.5760512351989746, + "learning_rate": 6.49728831777984e-07, + "loss": 0.0822, + "num_input_tokens_seen": 4492352, + "step": 6650 + }, + { + "epoch": 0.16258275718857645, + "grad_norm": 29.747814178466797, + "learning_rate": 6.502174231690037e-07, + "loss": 0.1032, + "num_input_tokens_seen": 4495360, + "step": 6655 + }, + { + "epoch": 0.16270490802042362, + "grad_norm": 19.472917556762695, + "learning_rate": 6.507060145600234e-07, + "loss": 0.0636, + "num_input_tokens_seen": 4498752, + "step": 6660 + }, + { + "epoch": 0.16282705885227078, + "grad_norm": 37.16254425048828, + "learning_rate": 6.511946059510431e-07, + "loss": 0.2362, + "num_input_tokens_seen": 4501760, + "step": 6665 + }, + { + "epoch": 0.16294920968411794, + "grad_norm": 71.30066680908203, + "learning_rate": 6.516831973420628e-07, + "loss": 0.243, + "num_input_tokens_seen": 4505344, + "step": 6670 + }, + { + "epoch": 0.1630713605159651, + "grad_norm": 2.2536447048187256, + "learning_rate": 6.521717887330826e-07, + "loss": 0.093, + "num_input_tokens_seen": 4508672, + "step": 6675 + }, + { + "epoch": 0.1631935113478123, + "grad_norm": 3.9860222339630127, + "learning_rate": 6.526603801241022e-07, + "loss": 0.1341, + "num_input_tokens_seen": 4512768, + "step": 6680 + }, + { + "epoch": 0.16331566217965945, + "grad_norm": 21.329500198364258, + "learning_rate": 6.531489715151218e-07, + "loss": 0.1077, + "num_input_tokens_seen": 4515840, + "step": 6685 + }, + { + "epoch": 0.16343781301150662, + "grad_norm": 15.191680908203125, + "learning_rate": 6.536375629061416e-07, + "loss": 0.1545, + "num_input_tokens_seen": 4519232, + "step": 6690 + }, + { + "epoch": 0.16355996384335378, + "grad_norm": 52.87079620361328, + "learning_rate": 6.541261542971612e-07, + "loss": 0.1031, + "num_input_tokens_seen": 4522816, + "step": 6695 + }, + { + "epoch": 0.16368211467520094, + "grad_norm": 20.58293914794922, + "learning_rate": 6.54614745688181e-07, + "loss": 0.1403, + "num_input_tokens_seen": 4525888, + "step": 6700 + }, + { + "epoch": 0.1638042655070481, + "grad_norm": 10.931684494018555, + "learning_rate": 6.551033370792007e-07, + "loss": 0.1446, + "num_input_tokens_seen": 4529280, + "step": 6705 + }, + { + "epoch": 0.16392641633889526, + "grad_norm": 68.2391357421875, + "learning_rate": 6.555919284702204e-07, + "loss": 0.186, + "num_input_tokens_seen": 4532544, + "step": 6710 + }, + { + "epoch": 0.16404856717074243, + "grad_norm": 28.83963394165039, + "learning_rate": 6.5608051986124e-07, + "loss": 0.1599, + "num_input_tokens_seen": 4535360, + "step": 6715 + }, + { + "epoch": 0.1641707180025896, + "grad_norm": 10.062387466430664, + "learning_rate": 6.565691112522597e-07, + "loss": 0.1133, + "num_input_tokens_seen": 4538368, + "step": 6720 + }, + { + "epoch": 0.16429286883443675, + "grad_norm": 35.1144905090332, + "learning_rate": 6.570577026432794e-07, + "loss": 0.0491, + "num_input_tokens_seen": 4541632, + "step": 6725 + }, + { + "epoch": 0.16441501966628394, + "grad_norm": 49.85148620605469, + "learning_rate": 6.575462940342991e-07, + "loss": 0.1565, + "num_input_tokens_seen": 4544512, + "step": 6730 + }, + { + "epoch": 0.1645371704981311, + "grad_norm": 16.927494049072266, + "learning_rate": 6.580348854253189e-07, + "loss": 0.102, + "num_input_tokens_seen": 4547776, + "step": 6735 + }, + { + "epoch": 0.16465932132997826, + "grad_norm": 61.242252349853516, + "learning_rate": 6.585234768163384e-07, + "loss": 0.1985, + "num_input_tokens_seen": 4550912, + "step": 6740 + }, + { + "epoch": 0.16478147216182543, + "grad_norm": 14.769601821899414, + "learning_rate": 6.590120682073581e-07, + "loss": 0.0903, + "num_input_tokens_seen": 4554112, + "step": 6745 + }, + { + "epoch": 0.1649036229936726, + "grad_norm": 14.11997127532959, + "learning_rate": 6.595006595983779e-07, + "loss": 0.18, + "num_input_tokens_seen": 4557120, + "step": 6750 + }, + { + "epoch": 0.16502577382551975, + "grad_norm": 17.178272247314453, + "learning_rate": 6.599892509893975e-07, + "loss": 0.0676, + "num_input_tokens_seen": 4560576, + "step": 6755 + }, + { + "epoch": 0.1651479246573669, + "grad_norm": 16.599720001220703, + "learning_rate": 6.604778423804173e-07, + "loss": 0.1992, + "num_input_tokens_seen": 4564096, + "step": 6760 + }, + { + "epoch": 0.16527007548921407, + "grad_norm": 14.66077995300293, + "learning_rate": 6.60966433771437e-07, + "loss": 0.1163, + "num_input_tokens_seen": 4567104, + "step": 6765 + }, + { + "epoch": 0.16539222632106124, + "grad_norm": 14.158099174499512, + "learning_rate": 6.614550251624566e-07, + "loss": 0.1756, + "num_input_tokens_seen": 4570496, + "step": 6770 + }, + { + "epoch": 0.1655143771529084, + "grad_norm": 7.5408034324646, + "learning_rate": 6.619436165534763e-07, + "loss": 0.0301, + "num_input_tokens_seen": 4576000, + "step": 6775 + }, + { + "epoch": 0.1656365279847556, + "grad_norm": 2.0234808921813965, + "learning_rate": 6.62432207944496e-07, + "loss": 0.1214, + "num_input_tokens_seen": 4579392, + "step": 6780 + }, + { + "epoch": 0.16575867881660275, + "grad_norm": 36.175838470458984, + "learning_rate": 6.629207993355157e-07, + "loss": 0.0789, + "num_input_tokens_seen": 4582656, + "step": 6785 + }, + { + "epoch": 0.1658808296484499, + "grad_norm": 48.25929641723633, + "learning_rate": 6.634093907265354e-07, + "loss": 0.1426, + "num_input_tokens_seen": 4586304, + "step": 6790 + }, + { + "epoch": 0.16600298048029707, + "grad_norm": 20.59220314025879, + "learning_rate": 6.638979821175551e-07, + "loss": 0.0402, + "num_input_tokens_seen": 4589376, + "step": 6795 + }, + { + "epoch": 0.16612513131214424, + "grad_norm": 50.40332794189453, + "learning_rate": 6.643865735085747e-07, + "loss": 0.1095, + "num_input_tokens_seen": 4592704, + "step": 6800 + }, + { + "epoch": 0.1662472821439914, + "grad_norm": 22.707887649536133, + "learning_rate": 6.648751648995944e-07, + "loss": 0.1261, + "num_input_tokens_seen": 4596672, + "step": 6805 + }, + { + "epoch": 0.16636943297583856, + "grad_norm": 27.34088897705078, + "learning_rate": 6.653637562906142e-07, + "loss": 0.1199, + "num_input_tokens_seen": 4600128, + "step": 6810 + }, + { + "epoch": 0.16649158380768572, + "grad_norm": 15.953385353088379, + "learning_rate": 6.658523476816338e-07, + "loss": 0.1429, + "num_input_tokens_seen": 4603200, + "step": 6815 + }, + { + "epoch": 0.16661373463953288, + "grad_norm": 12.182960510253906, + "learning_rate": 6.663409390726536e-07, + "loss": 0.1214, + "num_input_tokens_seen": 4606208, + "step": 6820 + }, + { + "epoch": 0.16673588547138005, + "grad_norm": 9.563040733337402, + "learning_rate": 6.668295304636732e-07, + "loss": 0.0518, + "num_input_tokens_seen": 4609280, + "step": 6825 + }, + { + "epoch": 0.16685803630322724, + "grad_norm": 44.647499084472656, + "learning_rate": 6.673181218546929e-07, + "loss": 0.1066, + "num_input_tokens_seen": 4612352, + "step": 6830 + }, + { + "epoch": 0.1669801871350744, + "grad_norm": 9.651607513427734, + "learning_rate": 6.678067132457126e-07, + "loss": 0.1821, + "num_input_tokens_seen": 4615744, + "step": 6835 + }, + { + "epoch": 0.16710233796692156, + "grad_norm": 103.38037872314453, + "learning_rate": 6.682953046367323e-07, + "loss": 0.3838, + "num_input_tokens_seen": 4619392, + "step": 6840 + }, + { + "epoch": 0.16722448879876872, + "grad_norm": 56.43678665161133, + "learning_rate": 6.68783896027752e-07, + "loss": 0.0645, + "num_input_tokens_seen": 4622336, + "step": 6845 + }, + { + "epoch": 0.16734663963061588, + "grad_norm": 27.00754165649414, + "learning_rate": 6.692724874187716e-07, + "loss": 0.1702, + "num_input_tokens_seen": 4625280, + "step": 6850 + }, + { + "epoch": 0.16746879046246305, + "grad_norm": 12.663601875305176, + "learning_rate": 6.697610788097914e-07, + "loss": 0.2138, + "num_input_tokens_seen": 4628544, + "step": 6855 + }, + { + "epoch": 0.1675909412943102, + "grad_norm": 43.01512908935547, + "learning_rate": 6.70249670200811e-07, + "loss": 0.1501, + "num_input_tokens_seen": 4632384, + "step": 6860 + }, + { + "epoch": 0.16771309212615737, + "grad_norm": 4.006659984588623, + "learning_rate": 6.707382615918307e-07, + "loss": 0.0843, + "num_input_tokens_seen": 4635712, + "step": 6865 + }, + { + "epoch": 0.16783524295800453, + "grad_norm": 5.144809722900391, + "learning_rate": 6.712268529828505e-07, + "loss": 0.0591, + "num_input_tokens_seen": 4638848, + "step": 6870 + }, + { + "epoch": 0.16795739378985172, + "grad_norm": 0.25560063123703003, + "learning_rate": 6.7171544437387e-07, + "loss": 0.0569, + "num_input_tokens_seen": 4641728, + "step": 6875 + }, + { + "epoch": 0.16807954462169888, + "grad_norm": 85.87944030761719, + "learning_rate": 6.722040357648898e-07, + "loss": 0.2355, + "num_input_tokens_seen": 4644992, + "step": 6880 + }, + { + "epoch": 0.16820169545354605, + "grad_norm": 36.92861557006836, + "learning_rate": 6.726926271559095e-07, + "loss": 0.1886, + "num_input_tokens_seen": 4647872, + "step": 6885 + }, + { + "epoch": 0.1683238462853932, + "grad_norm": 6.552468776702881, + "learning_rate": 6.731812185469292e-07, + "loss": 0.1627, + "num_input_tokens_seen": 4651520, + "step": 6890 + }, + { + "epoch": 0.16844599711724037, + "grad_norm": 20.896957397460938, + "learning_rate": 6.736698099379489e-07, + "loss": 0.1422, + "num_input_tokens_seen": 4655488, + "step": 6895 + }, + { + "epoch": 0.16856814794908753, + "grad_norm": 61.85845947265625, + "learning_rate": 6.741584013289686e-07, + "loss": 0.2005, + "num_input_tokens_seen": 4658944, + "step": 6900 + }, + { + "epoch": 0.1686902987809347, + "grad_norm": 4.090170383453369, + "learning_rate": 6.746469927199882e-07, + "loss": 0.2195, + "num_input_tokens_seen": 4661952, + "step": 6905 + }, + { + "epoch": 0.16881244961278186, + "grad_norm": 52.27777862548828, + "learning_rate": 6.751355841110079e-07, + "loss": 0.2332, + "num_input_tokens_seen": 4665472, + "step": 6910 + }, + { + "epoch": 0.16893460044462902, + "grad_norm": 24.961549758911133, + "learning_rate": 6.756241755020277e-07, + "loss": 0.1836, + "num_input_tokens_seen": 4668800, + "step": 6915 + }, + { + "epoch": 0.16905675127647618, + "grad_norm": 31.262836456298828, + "learning_rate": 6.761127668930473e-07, + "loss": 0.132, + "num_input_tokens_seen": 4672512, + "step": 6920 + }, + { + "epoch": 0.16917890210832337, + "grad_norm": 25.470243453979492, + "learning_rate": 6.766013582840671e-07, + "loss": 0.1959, + "num_input_tokens_seen": 4675904, + "step": 6925 + }, + { + "epoch": 0.16930105294017053, + "grad_norm": 16.708919525146484, + "learning_rate": 6.770899496750868e-07, + "loss": 0.0929, + "num_input_tokens_seen": 4679424, + "step": 6930 + }, + { + "epoch": 0.1694232037720177, + "grad_norm": 6.275186061859131, + "learning_rate": 6.775785410661063e-07, + "loss": 0.1339, + "num_input_tokens_seen": 4683520, + "step": 6935 + }, + { + "epoch": 0.16954535460386486, + "grad_norm": 23.944786071777344, + "learning_rate": 6.780671324571261e-07, + "loss": 0.138, + "num_input_tokens_seen": 4686848, + "step": 6940 + }, + { + "epoch": 0.16966750543571202, + "grad_norm": 8.790188789367676, + "learning_rate": 6.785557238481458e-07, + "loss": 0.068, + "num_input_tokens_seen": 4690304, + "step": 6945 + }, + { + "epoch": 0.16978965626755918, + "grad_norm": 23.236326217651367, + "learning_rate": 6.790443152391655e-07, + "loss": 0.0549, + "num_input_tokens_seen": 4693632, + "step": 6950 + }, + { + "epoch": 0.16991180709940634, + "grad_norm": 24.558181762695312, + "learning_rate": 6.795329066301852e-07, + "loss": 0.104, + "num_input_tokens_seen": 4696832, + "step": 6955 + }, + { + "epoch": 0.1700339579312535, + "grad_norm": 2.767685890197754, + "learning_rate": 6.800214980212048e-07, + "loss": 0.1351, + "num_input_tokens_seen": 4700672, + "step": 6960 + }, + { + "epoch": 0.17015610876310067, + "grad_norm": 9.79178524017334, + "learning_rate": 6.805100894122245e-07, + "loss": 0.1338, + "num_input_tokens_seen": 4704384, + "step": 6965 + }, + { + "epoch": 0.17027825959494783, + "grad_norm": 14.672940254211426, + "learning_rate": 6.809986808032442e-07, + "loss": 0.0839, + "num_input_tokens_seen": 4707712, + "step": 6970 + }, + { + "epoch": 0.17040041042679502, + "grad_norm": 10.246041297912598, + "learning_rate": 6.81487272194264e-07, + "loss": 0.0875, + "num_input_tokens_seen": 4710720, + "step": 6975 + }, + { + "epoch": 0.17052256125864218, + "grad_norm": 1.6319217681884766, + "learning_rate": 6.819758635852836e-07, + "loss": 0.0822, + "num_input_tokens_seen": 4714048, + "step": 6980 + }, + { + "epoch": 0.17064471209048934, + "grad_norm": 23.715749740600586, + "learning_rate": 6.824644549763034e-07, + "loss": 0.1104, + "num_input_tokens_seen": 4717312, + "step": 6985 + }, + { + "epoch": 0.1707668629223365, + "grad_norm": 29.73052406311035, + "learning_rate": 6.82953046367323e-07, + "loss": 0.0742, + "num_input_tokens_seen": 4721152, + "step": 6990 + }, + { + "epoch": 0.17088901375418367, + "grad_norm": 20.4119873046875, + "learning_rate": 6.834416377583426e-07, + "loss": 0.1043, + "num_input_tokens_seen": 4724416, + "step": 6995 + }, + { + "epoch": 0.17101116458603083, + "grad_norm": 37.61855697631836, + "learning_rate": 6.839302291493624e-07, + "loss": 0.1324, + "num_input_tokens_seen": 4727872, + "step": 7000 + }, + { + "epoch": 0.171133315417878, + "grad_norm": 44.900047302246094, + "learning_rate": 6.844188205403821e-07, + "loss": 0.2628, + "num_input_tokens_seen": 4731328, + "step": 7005 + }, + { + "epoch": 0.17125546624972515, + "grad_norm": 23.994895935058594, + "learning_rate": 6.849074119314018e-07, + "loss": 0.1661, + "num_input_tokens_seen": 4734912, + "step": 7010 + }, + { + "epoch": 0.17137761708157231, + "grad_norm": 36.63328170776367, + "learning_rate": 6.853960033224214e-07, + "loss": 0.0935, + "num_input_tokens_seen": 4738816, + "step": 7015 + }, + { + "epoch": 0.1714997679134195, + "grad_norm": 31.2292423248291, + "learning_rate": 6.85884594713441e-07, + "loss": 0.1215, + "num_input_tokens_seen": 4741888, + "step": 7020 + }, + { + "epoch": 0.17162191874526667, + "grad_norm": 27.03329086303711, + "learning_rate": 6.863731861044608e-07, + "loss": 0.1481, + "num_input_tokens_seen": 4745152, + "step": 7025 + }, + { + "epoch": 0.17174406957711383, + "grad_norm": 10.102874755859375, + "learning_rate": 6.868617774954805e-07, + "loss": 0.1287, + "num_input_tokens_seen": 4748608, + "step": 7030 + }, + { + "epoch": 0.171866220408961, + "grad_norm": 57.4080696105957, + "learning_rate": 6.873503688865003e-07, + "loss": 0.1947, + "num_input_tokens_seen": 4751552, + "step": 7035 + }, + { + "epoch": 0.17198837124080815, + "grad_norm": 19.714679718017578, + "learning_rate": 6.878389602775198e-07, + "loss": 0.0742, + "num_input_tokens_seen": 4754688, + "step": 7040 + }, + { + "epoch": 0.17211052207265531, + "grad_norm": 26.733041763305664, + "learning_rate": 6.883275516685396e-07, + "loss": 0.1217, + "num_input_tokens_seen": 4757760, + "step": 7045 + }, + { + "epoch": 0.17223267290450248, + "grad_norm": 16.901596069335938, + "learning_rate": 6.888161430595593e-07, + "loss": 0.2113, + "num_input_tokens_seen": 4760896, + "step": 7050 + }, + { + "epoch": 0.17235482373634964, + "grad_norm": 26.491451263427734, + "learning_rate": 6.893047344505789e-07, + "loss": 0.1273, + "num_input_tokens_seen": 4763904, + "step": 7055 + }, + { + "epoch": 0.1724769745681968, + "grad_norm": 34.51689529418945, + "learning_rate": 6.897933258415987e-07, + "loss": 0.1319, + "num_input_tokens_seen": 4767680, + "step": 7060 + }, + { + "epoch": 0.17259912540004396, + "grad_norm": 22.6583251953125, + "learning_rate": 6.902819172326183e-07, + "loss": 0.1141, + "num_input_tokens_seen": 4771200, + "step": 7065 + }, + { + "epoch": 0.17272127623189115, + "grad_norm": 16.140857696533203, + "learning_rate": 6.90770508623638e-07, + "loss": 0.0704, + "num_input_tokens_seen": 4774592, + "step": 7070 + }, + { + "epoch": 0.17284342706373831, + "grad_norm": 28.333457946777344, + "learning_rate": 6.912591000146577e-07, + "loss": 0.1583, + "num_input_tokens_seen": 4777600, + "step": 7075 + }, + { + "epoch": 0.17296557789558548, + "grad_norm": 22.110576629638672, + "learning_rate": 6.917476914056773e-07, + "loss": 0.0724, + "num_input_tokens_seen": 4780736, + "step": 7080 + }, + { + "epoch": 0.17308772872743264, + "grad_norm": 22.722089767456055, + "learning_rate": 6.922362827966971e-07, + "loss": 0.196, + "num_input_tokens_seen": 4783936, + "step": 7085 + }, + { + "epoch": 0.1732098795592798, + "grad_norm": 5.605364799499512, + "learning_rate": 6.927248741877168e-07, + "loss": 0.088, + "num_input_tokens_seen": 4787328, + "step": 7090 + }, + { + "epoch": 0.17333203039112696, + "grad_norm": 36.73191833496094, + "learning_rate": 6.932134655787366e-07, + "loss": 0.0997, + "num_input_tokens_seen": 4790528, + "step": 7095 + }, + { + "epoch": 0.17345418122297412, + "grad_norm": 4.203964710235596, + "learning_rate": 6.937020569697561e-07, + "loss": 0.0858, + "num_input_tokens_seen": 4793728, + "step": 7100 + }, + { + "epoch": 0.1735763320548213, + "grad_norm": 21.231687545776367, + "learning_rate": 6.941906483607759e-07, + "loss": 0.1983, + "num_input_tokens_seen": 4797312, + "step": 7105 + }, + { + "epoch": 0.17369848288666845, + "grad_norm": 20.893413543701172, + "learning_rate": 6.946792397517955e-07, + "loss": 0.1578, + "num_input_tokens_seen": 4801344, + "step": 7110 + }, + { + "epoch": 0.1738206337185156, + "grad_norm": 17.35948944091797, + "learning_rate": 6.951678311428152e-07, + "loss": 0.0992, + "num_input_tokens_seen": 4804928, + "step": 7115 + }, + { + "epoch": 0.1739427845503628, + "grad_norm": 12.587772369384766, + "learning_rate": 6.95656422533835e-07, + "loss": 0.101, + "num_input_tokens_seen": 4808832, + "step": 7120 + }, + { + "epoch": 0.17406493538220996, + "grad_norm": 66.40949249267578, + "learning_rate": 6.961450139248545e-07, + "loss": 0.3265, + "num_input_tokens_seen": 4811648, + "step": 7125 + }, + { + "epoch": 0.17418708621405712, + "grad_norm": 19.095821380615234, + "learning_rate": 6.966336053158743e-07, + "loss": 0.0718, + "num_input_tokens_seen": 4815360, + "step": 7130 + }, + { + "epoch": 0.1743092370459043, + "grad_norm": 21.798635482788086, + "learning_rate": 6.97122196706894e-07, + "loss": 0.0902, + "num_input_tokens_seen": 4818816, + "step": 7135 + }, + { + "epoch": 0.17443138787775145, + "grad_norm": 30.94866943359375, + "learning_rate": 6.976107880979138e-07, + "loss": 0.1007, + "num_input_tokens_seen": 4822464, + "step": 7140 + }, + { + "epoch": 0.1745535387095986, + "grad_norm": 6.085841655731201, + "learning_rate": 6.980993794889334e-07, + "loss": 0.177, + "num_input_tokens_seen": 4825664, + "step": 7145 + }, + { + "epoch": 0.17467568954144577, + "grad_norm": 25.90575408935547, + "learning_rate": 6.98587970879953e-07, + "loss": 0.1748, + "num_input_tokens_seen": 4828480, + "step": 7150 + }, + { + "epoch": 0.17479784037329293, + "grad_norm": 40.3851203918457, + "learning_rate": 6.990765622709727e-07, + "loss": 0.1292, + "num_input_tokens_seen": 4832000, + "step": 7155 + }, + { + "epoch": 0.1749199912051401, + "grad_norm": 29.815210342407227, + "learning_rate": 6.995651536619924e-07, + "loss": 0.1327, + "num_input_tokens_seen": 4835584, + "step": 7160 + }, + { + "epoch": 0.17504214203698726, + "grad_norm": 9.371803283691406, + "learning_rate": 7.000537450530122e-07, + "loss": 0.1325, + "num_input_tokens_seen": 4838848, + "step": 7165 + }, + { + "epoch": 0.17516429286883445, + "grad_norm": 23.05524444580078, + "learning_rate": 7.005423364440318e-07, + "loss": 0.0945, + "num_input_tokens_seen": 4842048, + "step": 7170 + }, + { + "epoch": 0.1752864437006816, + "grad_norm": 13.318825721740723, + "learning_rate": 7.010309278350515e-07, + "loss": 0.0969, + "num_input_tokens_seen": 4845632, + "step": 7175 + }, + { + "epoch": 0.17540859453252877, + "grad_norm": 50.86394119262695, + "learning_rate": 7.015195192260712e-07, + "loss": 0.1881, + "num_input_tokens_seen": 4848832, + "step": 7180 + }, + { + "epoch": 0.17553074536437593, + "grad_norm": 6.3671040534973145, + "learning_rate": 7.020081106170908e-07, + "loss": 0.1016, + "num_input_tokens_seen": 4851968, + "step": 7185 + }, + { + "epoch": 0.1756528961962231, + "grad_norm": 28.17583656311035, + "learning_rate": 7.024967020081106e-07, + "loss": 0.0988, + "num_input_tokens_seen": 4855360, + "step": 7190 + }, + { + "epoch": 0.17577504702807026, + "grad_norm": 30.14576530456543, + "learning_rate": 7.029852933991303e-07, + "loss": 0.1393, + "num_input_tokens_seen": 4858496, + "step": 7195 + }, + { + "epoch": 0.17589719785991742, + "grad_norm": 21.089799880981445, + "learning_rate": 7.0347388479015e-07, + "loss": 0.0923, + "num_input_tokens_seen": 4862016, + "step": 7200 + }, + { + "epoch": 0.17601934869176458, + "grad_norm": 32.99907302856445, + "learning_rate": 7.039624761811696e-07, + "loss": 0.1801, + "num_input_tokens_seen": 4865216, + "step": 7205 + }, + { + "epoch": 0.17614149952361174, + "grad_norm": 6.302981376647949, + "learning_rate": 7.044510675721893e-07, + "loss": 0.0319, + "num_input_tokens_seen": 4868352, + "step": 7210 + }, + { + "epoch": 0.17626365035545893, + "grad_norm": 9.754927635192871, + "learning_rate": 7.04939658963209e-07, + "loss": 0.082, + "num_input_tokens_seen": 4871744, + "step": 7215 + }, + { + "epoch": 0.1763858011873061, + "grad_norm": 1.676681637763977, + "learning_rate": 7.054282503542287e-07, + "loss": 0.0897, + "num_input_tokens_seen": 4875520, + "step": 7220 + }, + { + "epoch": 0.17650795201915326, + "grad_norm": 23.12895393371582, + "learning_rate": 7.059168417452485e-07, + "loss": 0.0919, + "num_input_tokens_seen": 4878912, + "step": 7225 + }, + { + "epoch": 0.17663010285100042, + "grad_norm": 32.92357635498047, + "learning_rate": 7.064054331362681e-07, + "loss": 0.0748, + "num_input_tokens_seen": 4882368, + "step": 7230 + }, + { + "epoch": 0.17675225368284758, + "grad_norm": 37.07939910888672, + "learning_rate": 7.068940245272877e-07, + "loss": 0.0947, + "num_input_tokens_seen": 4885504, + "step": 7235 + }, + { + "epoch": 0.17687440451469474, + "grad_norm": 0.42080605030059814, + "learning_rate": 7.073826159183075e-07, + "loss": 0.0184, + "num_input_tokens_seen": 4889344, + "step": 7240 + }, + { + "epoch": 0.1769965553465419, + "grad_norm": 34.457584381103516, + "learning_rate": 7.078712073093271e-07, + "loss": 0.0587, + "num_input_tokens_seen": 4892480, + "step": 7245 + }, + { + "epoch": 0.17711870617838907, + "grad_norm": 83.84002685546875, + "learning_rate": 7.083597987003469e-07, + "loss": 0.1063, + "num_input_tokens_seen": 4895936, + "step": 7250 + }, + { + "epoch": 0.17724085701023623, + "grad_norm": 30.910654067993164, + "learning_rate": 7.088483900913666e-07, + "loss": 0.2293, + "num_input_tokens_seen": 4899200, + "step": 7255 + }, + { + "epoch": 0.1773630078420834, + "grad_norm": 24.910533905029297, + "learning_rate": 7.093369814823862e-07, + "loss": 0.1442, + "num_input_tokens_seen": 4902464, + "step": 7260 + }, + { + "epoch": 0.17748515867393058, + "grad_norm": 16.055313110351562, + "learning_rate": 7.098255728734059e-07, + "loss": 0.35, + "num_input_tokens_seen": 4906240, + "step": 7265 + }, + { + "epoch": 0.17760730950577774, + "grad_norm": 19.099559783935547, + "learning_rate": 7.103141642644256e-07, + "loss": 0.0915, + "num_input_tokens_seen": 4909760, + "step": 7270 + }, + { + "epoch": 0.1777294603376249, + "grad_norm": 34.7686882019043, + "learning_rate": 7.108027556554453e-07, + "loss": 0.1682, + "num_input_tokens_seen": 4912768, + "step": 7275 + }, + { + "epoch": 0.17785161116947207, + "grad_norm": 12.762410163879395, + "learning_rate": 7.11291347046465e-07, + "loss": 0.0714, + "num_input_tokens_seen": 4915968, + "step": 7280 + }, + { + "epoch": 0.17797376200131923, + "grad_norm": 1.6703814268112183, + "learning_rate": 7.117799384374848e-07, + "loss": 0.211, + "num_input_tokens_seen": 4918976, + "step": 7285 + }, + { + "epoch": 0.1780959128331664, + "grad_norm": 40.02713394165039, + "learning_rate": 7.122685298285043e-07, + "loss": 0.1074, + "num_input_tokens_seen": 4922816, + "step": 7290 + }, + { + "epoch": 0.17821806366501355, + "grad_norm": 34.362022399902344, + "learning_rate": 7.12757121219524e-07, + "loss": 0.1798, + "num_input_tokens_seen": 4925824, + "step": 7295 + }, + { + "epoch": 0.17834021449686072, + "grad_norm": 26.875831604003906, + "learning_rate": 7.132457126105438e-07, + "loss": 0.1859, + "num_input_tokens_seen": 4929664, + "step": 7300 + }, + { + "epoch": 0.17846236532870788, + "grad_norm": 1.096500039100647, + "learning_rate": 7.137343040015634e-07, + "loss": 0.0412, + "num_input_tokens_seen": 4933184, + "step": 7305 + }, + { + "epoch": 0.17858451616055504, + "grad_norm": 4.698089122772217, + "learning_rate": 7.142228953925832e-07, + "loss": 0.0465, + "num_input_tokens_seen": 4936320, + "step": 7310 + }, + { + "epoch": 0.17870666699240223, + "grad_norm": 1.2416880130767822, + "learning_rate": 7.147114867836028e-07, + "loss": 0.0908, + "num_input_tokens_seen": 4940160, + "step": 7315 + }, + { + "epoch": 0.1788288178242494, + "grad_norm": 24.97760772705078, + "learning_rate": 7.152000781746225e-07, + "loss": 0.1059, + "num_input_tokens_seen": 4943680, + "step": 7320 + }, + { + "epoch": 0.17895096865609655, + "grad_norm": 31.947988510131836, + "learning_rate": 7.156886695656422e-07, + "loss": 0.0785, + "num_input_tokens_seen": 4946560, + "step": 7325 + }, + { + "epoch": 0.17907311948794372, + "grad_norm": 35.131752014160156, + "learning_rate": 7.161772609566619e-07, + "loss": 0.1813, + "num_input_tokens_seen": 4949760, + "step": 7330 + }, + { + "epoch": 0.17919527031979088, + "grad_norm": 58.12864685058594, + "learning_rate": 7.166658523476816e-07, + "loss": 0.0774, + "num_input_tokens_seen": 4953024, + "step": 7335 + }, + { + "epoch": 0.17931742115163804, + "grad_norm": 18.54363441467285, + "learning_rate": 7.171544437387013e-07, + "loss": 0.095, + "num_input_tokens_seen": 4956672, + "step": 7340 + }, + { + "epoch": 0.1794395719834852, + "grad_norm": 24.689943313598633, + "learning_rate": 7.17643035129721e-07, + "loss": 0.2202, + "num_input_tokens_seen": 4959936, + "step": 7345 + }, + { + "epoch": 0.17956172281533236, + "grad_norm": 6.816656589508057, + "learning_rate": 7.181316265207406e-07, + "loss": 0.0811, + "num_input_tokens_seen": 4963392, + "step": 7350 + }, + { + "epoch": 0.17968387364717953, + "grad_norm": 19.98248863220215, + "learning_rate": 7.186202179117603e-07, + "loss": 0.2623, + "num_input_tokens_seen": 4966720, + "step": 7355 + }, + { + "epoch": 0.17980602447902672, + "grad_norm": 33.617252349853516, + "learning_rate": 7.191088093027801e-07, + "loss": 0.1531, + "num_input_tokens_seen": 4969792, + "step": 7360 + }, + { + "epoch": 0.17992817531087388, + "grad_norm": 56.086578369140625, + "learning_rate": 7.195974006937997e-07, + "loss": 0.1272, + "num_input_tokens_seen": 4973184, + "step": 7365 + }, + { + "epoch": 0.18005032614272104, + "grad_norm": 20.100421905517578, + "learning_rate": 7.200859920848194e-07, + "loss": 0.1201, + "num_input_tokens_seen": 4976192, + "step": 7370 + }, + { + "epoch": 0.1801724769745682, + "grad_norm": 47.291358947753906, + "learning_rate": 7.205745834758391e-07, + "loss": 0.1259, + "num_input_tokens_seen": 4979584, + "step": 7375 + }, + { + "epoch": 0.18029462780641536, + "grad_norm": 32.62343978881836, + "learning_rate": 7.210631748668588e-07, + "loss": 0.1405, + "num_input_tokens_seen": 4982784, + "step": 7380 + }, + { + "epoch": 0.18041677863826253, + "grad_norm": 2.495743751525879, + "learning_rate": 7.215517662578785e-07, + "loss": 0.1383, + "num_input_tokens_seen": 4986240, + "step": 7385 + }, + { + "epoch": 0.1805389294701097, + "grad_norm": 27.57878875732422, + "learning_rate": 7.220403576488982e-07, + "loss": 0.133, + "num_input_tokens_seen": 4989760, + "step": 7390 + }, + { + "epoch": 0.18066108030195685, + "grad_norm": 31.568477630615234, + "learning_rate": 7.225289490399179e-07, + "loss": 0.1669, + "num_input_tokens_seen": 4993216, + "step": 7395 + }, + { + "epoch": 0.180783231133804, + "grad_norm": 13.473135948181152, + "learning_rate": 7.230175404309375e-07, + "loss": 0.2285, + "num_input_tokens_seen": 4996672, + "step": 7400 + }, + { + "epoch": 0.18090538196565117, + "grad_norm": 23.307954788208008, + "learning_rate": 7.235061318219573e-07, + "loss": 0.1672, + "num_input_tokens_seen": 5000320, + "step": 7405 + }, + { + "epoch": 0.18102753279749836, + "grad_norm": 23.983013153076172, + "learning_rate": 7.239947232129769e-07, + "loss": 0.0913, + "num_input_tokens_seen": 5003392, + "step": 7410 + }, + { + "epoch": 0.18114968362934553, + "grad_norm": 13.432177543640137, + "learning_rate": 7.244833146039967e-07, + "loss": 0.1249, + "num_input_tokens_seen": 5006528, + "step": 7415 + }, + { + "epoch": 0.1812718344611927, + "grad_norm": 22.402496337890625, + "learning_rate": 7.249719059950164e-07, + "loss": 0.1705, + "num_input_tokens_seen": 5009536, + "step": 7420 + }, + { + "epoch": 0.18139398529303985, + "grad_norm": 9.332347869873047, + "learning_rate": 7.254604973860359e-07, + "loss": 0.1101, + "num_input_tokens_seen": 5013184, + "step": 7425 + }, + { + "epoch": 0.181516136124887, + "grad_norm": 2.7800843715667725, + "learning_rate": 7.259490887770557e-07, + "loss": 0.0673, + "num_input_tokens_seen": 5016320, + "step": 7430 + }, + { + "epoch": 0.18163828695673417, + "grad_norm": 5.731550216674805, + "learning_rate": 7.264376801680754e-07, + "loss": 0.085, + "num_input_tokens_seen": 5019456, + "step": 7435 + }, + { + "epoch": 0.18176043778858134, + "grad_norm": 32.029327392578125, + "learning_rate": 7.269262715590951e-07, + "loss": 0.1711, + "num_input_tokens_seen": 5022720, + "step": 7440 + }, + { + "epoch": 0.1818825886204285, + "grad_norm": 22.998117446899414, + "learning_rate": 7.274148629501148e-07, + "loss": 0.0828, + "num_input_tokens_seen": 5025664, + "step": 7445 + }, + { + "epoch": 0.18200473945227566, + "grad_norm": 44.311195373535156, + "learning_rate": 7.279034543411345e-07, + "loss": 0.1175, + "num_input_tokens_seen": 5029120, + "step": 7450 + }, + { + "epoch": 0.18212689028412282, + "grad_norm": 2.4467546939849854, + "learning_rate": 7.283920457321541e-07, + "loss": 0.0589, + "num_input_tokens_seen": 5032448, + "step": 7455 + }, + { + "epoch": 0.18224904111597, + "grad_norm": 17.926509857177734, + "learning_rate": 7.288806371231738e-07, + "loss": 0.1039, + "num_input_tokens_seen": 5035456, + "step": 7460 + }, + { + "epoch": 0.18237119194781717, + "grad_norm": 19.704965591430664, + "learning_rate": 7.293692285141936e-07, + "loss": 0.2372, + "num_input_tokens_seen": 5038656, + "step": 7465 + }, + { + "epoch": 0.18249334277966434, + "grad_norm": 6.128961086273193, + "learning_rate": 7.298578199052132e-07, + "loss": 0.1839, + "num_input_tokens_seen": 5042304, + "step": 7470 + }, + { + "epoch": 0.1826154936115115, + "grad_norm": 25.417865753173828, + "learning_rate": 7.30346411296233e-07, + "loss": 0.1081, + "num_input_tokens_seen": 5045440, + "step": 7475 + }, + { + "epoch": 0.18273764444335866, + "grad_norm": 18.716413497924805, + "learning_rate": 7.308350026872526e-07, + "loss": 0.2091, + "num_input_tokens_seen": 5048576, + "step": 7480 + }, + { + "epoch": 0.18285979527520582, + "grad_norm": 13.868701934814453, + "learning_rate": 7.313235940782722e-07, + "loss": 0.0761, + "num_input_tokens_seen": 5051840, + "step": 7485 + }, + { + "epoch": 0.18298194610705298, + "grad_norm": 10.58272647857666, + "learning_rate": 7.31812185469292e-07, + "loss": 0.2031, + "num_input_tokens_seen": 5054976, + "step": 7490 + }, + { + "epoch": 0.18310409693890015, + "grad_norm": 54.09087371826172, + "learning_rate": 7.323007768603117e-07, + "loss": 0.1034, + "num_input_tokens_seen": 5058560, + "step": 7495 + }, + { + "epoch": 0.1832262477707473, + "grad_norm": 26.250953674316406, + "learning_rate": 7.327893682513314e-07, + "loss": 0.0831, + "num_input_tokens_seen": 5061568, + "step": 7500 + }, + { + "epoch": 0.18334839860259447, + "grad_norm": 13.708479881286621, + "learning_rate": 7.332779596423511e-07, + "loss": 0.0543, + "num_input_tokens_seen": 5064768, + "step": 7505 + }, + { + "epoch": 0.18347054943444166, + "grad_norm": 16.8136043548584, + "learning_rate": 7.337665510333707e-07, + "loss": 0.0832, + "num_input_tokens_seen": 5070336, + "step": 7510 + }, + { + "epoch": 0.18359270026628882, + "grad_norm": 10.423450469970703, + "learning_rate": 7.342551424243904e-07, + "loss": 0.0929, + "num_input_tokens_seen": 5073472, + "step": 7515 + }, + { + "epoch": 0.18371485109813598, + "grad_norm": 22.869081497192383, + "learning_rate": 7.347437338154101e-07, + "loss": 0.0853, + "num_input_tokens_seen": 5076992, + "step": 7520 + }, + { + "epoch": 0.18383700192998315, + "grad_norm": 16.256441116333008, + "learning_rate": 7.352323252064299e-07, + "loss": 0.1682, + "num_input_tokens_seen": 5080320, + "step": 7525 + }, + { + "epoch": 0.1839591527618303, + "grad_norm": 29.337858200073242, + "learning_rate": 7.357209165974495e-07, + "loss": 0.1089, + "num_input_tokens_seen": 5083584, + "step": 7530 + }, + { + "epoch": 0.18408130359367747, + "grad_norm": 25.76587677001953, + "learning_rate": 7.362095079884692e-07, + "loss": 0.2105, + "num_input_tokens_seen": 5086912, + "step": 7535 + }, + { + "epoch": 0.18420345442552463, + "grad_norm": 4.57034969329834, + "learning_rate": 7.366980993794889e-07, + "loss": 0.1018, + "num_input_tokens_seen": 5090240, + "step": 7540 + }, + { + "epoch": 0.1843256052573718, + "grad_norm": 26.5098819732666, + "learning_rate": 7.371866907705085e-07, + "loss": 0.1192, + "num_input_tokens_seen": 5093632, + "step": 7545 + }, + { + "epoch": 0.18444775608921896, + "grad_norm": 34.24437713623047, + "learning_rate": 7.376752821615283e-07, + "loss": 0.1163, + "num_input_tokens_seen": 5097792, + "step": 7550 + }, + { + "epoch": 0.18456990692106615, + "grad_norm": 28.638696670532227, + "learning_rate": 7.38163873552548e-07, + "loss": 0.098, + "num_input_tokens_seen": 5100992, + "step": 7555 + }, + { + "epoch": 0.1846920577529133, + "grad_norm": 23.46084213256836, + "learning_rate": 7.386524649435677e-07, + "loss": 0.1937, + "num_input_tokens_seen": 5104128, + "step": 7560 + }, + { + "epoch": 0.18481420858476047, + "grad_norm": 30.672029495239258, + "learning_rate": 7.391410563345873e-07, + "loss": 0.1587, + "num_input_tokens_seen": 5107328, + "step": 7565 + }, + { + "epoch": 0.18493635941660763, + "grad_norm": 42.915653228759766, + "learning_rate": 7.39629647725607e-07, + "loss": 0.117, + "num_input_tokens_seen": 5110272, + "step": 7570 + }, + { + "epoch": 0.1850585102484548, + "grad_norm": 18.438732147216797, + "learning_rate": 7.401182391166267e-07, + "loss": 0.1317, + "num_input_tokens_seen": 5113792, + "step": 7575 + }, + { + "epoch": 0.18518066108030196, + "grad_norm": 16.258872985839844, + "learning_rate": 7.406068305076464e-07, + "loss": 0.2001, + "num_input_tokens_seen": 5116992, + "step": 7580 + }, + { + "epoch": 0.18530281191214912, + "grad_norm": 33.11271286010742, + "learning_rate": 7.410954218986662e-07, + "loss": 0.1577, + "num_input_tokens_seen": 5120192, + "step": 7585 + }, + { + "epoch": 0.18542496274399628, + "grad_norm": 23.325136184692383, + "learning_rate": 7.415840132896857e-07, + "loss": 0.0709, + "num_input_tokens_seen": 5123776, + "step": 7590 + }, + { + "epoch": 0.18554711357584344, + "grad_norm": 12.562117576599121, + "learning_rate": 7.420726046807055e-07, + "loss": 0.1228, + "num_input_tokens_seen": 5127360, + "step": 7595 + }, + { + "epoch": 0.1856692644076906, + "grad_norm": 7.058622360229492, + "learning_rate": 7.425611960717252e-07, + "loss": 0.0538, + "num_input_tokens_seen": 5130432, + "step": 7600 + }, + { + "epoch": 0.1857914152395378, + "grad_norm": 46.326969146728516, + "learning_rate": 7.430497874627448e-07, + "loss": 0.0803, + "num_input_tokens_seen": 5133632, + "step": 7605 + }, + { + "epoch": 0.18591356607138496, + "grad_norm": 26.77501106262207, + "learning_rate": 7.435383788537646e-07, + "loss": 0.1216, + "num_input_tokens_seen": 5136576, + "step": 7610 + }, + { + "epoch": 0.18603571690323212, + "grad_norm": 12.72427749633789, + "learning_rate": 7.440269702447843e-07, + "loss": 0.1015, + "num_input_tokens_seen": 5139840, + "step": 7615 + }, + { + "epoch": 0.18615786773507928, + "grad_norm": 3.829493522644043, + "learning_rate": 7.445155616358039e-07, + "loss": 0.0851, + "num_input_tokens_seen": 5142976, + "step": 7620 + }, + { + "epoch": 0.18628001856692644, + "grad_norm": 7.829679489135742, + "learning_rate": 7.450041530268236e-07, + "loss": 0.0948, + "num_input_tokens_seen": 5146368, + "step": 7625 + }, + { + "epoch": 0.1864021693987736, + "grad_norm": 56.75333023071289, + "learning_rate": 7.454927444178434e-07, + "loss": 0.1405, + "num_input_tokens_seen": 5150080, + "step": 7630 + }, + { + "epoch": 0.18652432023062077, + "grad_norm": 32.06376647949219, + "learning_rate": 7.45981335808863e-07, + "loss": 0.1601, + "num_input_tokens_seen": 5153280, + "step": 7635 + }, + { + "epoch": 0.18664647106246793, + "grad_norm": 19.36145782470703, + "learning_rate": 7.464699271998827e-07, + "loss": 0.1544, + "num_input_tokens_seen": 5156032, + "step": 7640 + }, + { + "epoch": 0.1867686218943151, + "grad_norm": 17.868488311767578, + "learning_rate": 7.469585185909024e-07, + "loss": 0.0794, + "num_input_tokens_seen": 5159296, + "step": 7645 + }, + { + "epoch": 0.18689077272616225, + "grad_norm": 43.9721794128418, + "learning_rate": 7.47447109981922e-07, + "loss": 0.172, + "num_input_tokens_seen": 5162752, + "step": 7650 + }, + { + "epoch": 0.18701292355800944, + "grad_norm": 4.353318691253662, + "learning_rate": 7.479357013729418e-07, + "loss": 0.3807, + "num_input_tokens_seen": 5166080, + "step": 7655 + }, + { + "epoch": 0.1871350743898566, + "grad_norm": 7.278509616851807, + "learning_rate": 7.484242927639615e-07, + "loss": 0.1094, + "num_input_tokens_seen": 5169536, + "step": 7660 + }, + { + "epoch": 0.18725722522170377, + "grad_norm": 7.651449203491211, + "learning_rate": 7.489128841549811e-07, + "loss": 0.0831, + "num_input_tokens_seen": 5172928, + "step": 7665 + }, + { + "epoch": 0.18737937605355093, + "grad_norm": 22.87514877319336, + "learning_rate": 7.494014755460009e-07, + "loss": 0.1211, + "num_input_tokens_seen": 5176448, + "step": 7670 + }, + { + "epoch": 0.1875015268853981, + "grad_norm": 21.99827766418457, + "learning_rate": 7.498900669370205e-07, + "loss": 0.2272, + "num_input_tokens_seen": 5180160, + "step": 7675 + }, + { + "epoch": 0.18762367771724525, + "grad_norm": 59.72435760498047, + "learning_rate": 7.503786583280402e-07, + "loss": 0.1483, + "num_input_tokens_seen": 5183360, + "step": 7680 + }, + { + "epoch": 0.18774582854909241, + "grad_norm": 13.781630516052246, + "learning_rate": 7.508672497190599e-07, + "loss": 0.1039, + "num_input_tokens_seen": 5187264, + "step": 7685 + }, + { + "epoch": 0.18786797938093958, + "grad_norm": 10.817276954650879, + "learning_rate": 7.513558411100797e-07, + "loss": 0.0943, + "num_input_tokens_seen": 5190528, + "step": 7690 + }, + { + "epoch": 0.18799013021278674, + "grad_norm": 44.46306610107422, + "learning_rate": 7.518444325010993e-07, + "loss": 0.1337, + "num_input_tokens_seen": 5193536, + "step": 7695 + }, + { + "epoch": 0.18811228104463393, + "grad_norm": 20.650449752807617, + "learning_rate": 7.523330238921189e-07, + "loss": 0.0767, + "num_input_tokens_seen": 5197056, + "step": 7700 + }, + { + "epoch": 0.1882344318764811, + "grad_norm": 19.215862274169922, + "learning_rate": 7.528216152831387e-07, + "loss": 0.1273, + "num_input_tokens_seen": 5200832, + "step": 7705 + }, + { + "epoch": 0.18835658270832825, + "grad_norm": 41.045352935791016, + "learning_rate": 7.533102066741583e-07, + "loss": 0.1268, + "num_input_tokens_seen": 5204352, + "step": 7710 + }, + { + "epoch": 0.1884787335401754, + "grad_norm": 38.370384216308594, + "learning_rate": 7.537987980651781e-07, + "loss": 0.1765, + "num_input_tokens_seen": 5207488, + "step": 7715 + }, + { + "epoch": 0.18860088437202258, + "grad_norm": 6.754038333892822, + "learning_rate": 7.542873894561978e-07, + "loss": 0.0459, + "num_input_tokens_seen": 5211008, + "step": 7720 + }, + { + "epoch": 0.18872303520386974, + "grad_norm": 3.6415226459503174, + "learning_rate": 7.547759808472174e-07, + "loss": 0.1811, + "num_input_tokens_seen": 5214400, + "step": 7725 + }, + { + "epoch": 0.1888451860357169, + "grad_norm": 53.11391830444336, + "learning_rate": 7.552645722382371e-07, + "loss": 0.1476, + "num_input_tokens_seen": 5217536, + "step": 7730 + }, + { + "epoch": 0.18896733686756406, + "grad_norm": 7.384988784790039, + "learning_rate": 7.557531636292568e-07, + "loss": 0.0754, + "num_input_tokens_seen": 5220800, + "step": 7735 + }, + { + "epoch": 0.18908948769941122, + "grad_norm": 43.585693359375, + "learning_rate": 7.562417550202765e-07, + "loss": 0.1234, + "num_input_tokens_seen": 5224064, + "step": 7740 + }, + { + "epoch": 0.18921163853125839, + "grad_norm": 5.436645984649658, + "learning_rate": 7.567303464112962e-07, + "loss": 0.124, + "num_input_tokens_seen": 5227712, + "step": 7745 + }, + { + "epoch": 0.18933378936310558, + "grad_norm": 9.57922649383545, + "learning_rate": 7.57218937802316e-07, + "loss": 0.0938, + "num_input_tokens_seen": 5231168, + "step": 7750 + }, + { + "epoch": 0.18945594019495274, + "grad_norm": 41.084922790527344, + "learning_rate": 7.577075291933355e-07, + "loss": 0.0911, + "num_input_tokens_seen": 5234432, + "step": 7755 + }, + { + "epoch": 0.1895780910267999, + "grad_norm": 32.1443977355957, + "learning_rate": 7.581961205843552e-07, + "loss": 0.1401, + "num_input_tokens_seen": 5237696, + "step": 7760 + }, + { + "epoch": 0.18970024185864706, + "grad_norm": 42.15285110473633, + "learning_rate": 7.58684711975375e-07, + "loss": 0.0962, + "num_input_tokens_seen": 5243264, + "step": 7765 + }, + { + "epoch": 0.18982239269049422, + "grad_norm": 3.755997657775879, + "learning_rate": 7.591733033663946e-07, + "loss": 0.1051, + "num_input_tokens_seen": 5246272, + "step": 7770 + }, + { + "epoch": 0.18994454352234139, + "grad_norm": 33.203025817871094, + "learning_rate": 7.596618947574144e-07, + "loss": 0.1633, + "num_input_tokens_seen": 5249664, + "step": 7775 + }, + { + "epoch": 0.19006669435418855, + "grad_norm": 22.799898147583008, + "learning_rate": 7.601504861484341e-07, + "loss": 0.1516, + "num_input_tokens_seen": 5253056, + "step": 7780 + }, + { + "epoch": 0.1901888451860357, + "grad_norm": 8.654549598693848, + "learning_rate": 7.606390775394536e-07, + "loss": 0.0891, + "num_input_tokens_seen": 5256448, + "step": 7785 + }, + { + "epoch": 0.19031099601788287, + "grad_norm": 58.40709686279297, + "learning_rate": 7.611276689304734e-07, + "loss": 0.1017, + "num_input_tokens_seen": 5259712, + "step": 7790 + }, + { + "epoch": 0.19043314684973003, + "grad_norm": 1.5954786539077759, + "learning_rate": 7.616162603214931e-07, + "loss": 0.039, + "num_input_tokens_seen": 5263040, + "step": 7795 + }, + { + "epoch": 0.19055529768157722, + "grad_norm": 2.5210037231445312, + "learning_rate": 7.621048517125128e-07, + "loss": 0.1871, + "num_input_tokens_seen": 5266432, + "step": 7800 + }, + { + "epoch": 0.19067744851342439, + "grad_norm": 43.504371643066406, + "learning_rate": 7.625934431035325e-07, + "loss": 0.2188, + "num_input_tokens_seen": 5269952, + "step": 7805 + }, + { + "epoch": 0.19079959934527155, + "grad_norm": 25.404308319091797, + "learning_rate": 7.630820344945523e-07, + "loss": 0.052, + "num_input_tokens_seen": 5273088, + "step": 7810 + }, + { + "epoch": 0.1909217501771187, + "grad_norm": 55.287723541259766, + "learning_rate": 7.635706258855718e-07, + "loss": 0.2614, + "num_input_tokens_seen": 5276800, + "step": 7815 + }, + { + "epoch": 0.19104390100896587, + "grad_norm": 1.3680720329284668, + "learning_rate": 7.640592172765915e-07, + "loss": 0.1203, + "num_input_tokens_seen": 5280384, + "step": 7820 + }, + { + "epoch": 0.19116605184081303, + "grad_norm": 66.30671691894531, + "learning_rate": 7.645478086676113e-07, + "loss": 0.1966, + "num_input_tokens_seen": 5283584, + "step": 7825 + }, + { + "epoch": 0.1912882026726602, + "grad_norm": 0.8676133155822754, + "learning_rate": 7.650364000586309e-07, + "loss": 0.0947, + "num_input_tokens_seen": 5286976, + "step": 7830 + }, + { + "epoch": 0.19141035350450736, + "grad_norm": 34.26639175415039, + "learning_rate": 7.655249914496507e-07, + "loss": 0.1253, + "num_input_tokens_seen": 5290432, + "step": 7835 + }, + { + "epoch": 0.19153250433635452, + "grad_norm": 48.03748321533203, + "learning_rate": 7.660135828406703e-07, + "loss": 0.1757, + "num_input_tokens_seen": 5294144, + "step": 7840 + }, + { + "epoch": 0.19165465516820168, + "grad_norm": 33.33087158203125, + "learning_rate": 7.6650217423169e-07, + "loss": 0.1266, + "num_input_tokens_seen": 5297344, + "step": 7845 + }, + { + "epoch": 0.19177680600004887, + "grad_norm": 46.95682907104492, + "learning_rate": 7.669907656227097e-07, + "loss": 0.0449, + "num_input_tokens_seen": 5300992, + "step": 7850 + }, + { + "epoch": 0.19189895683189603, + "grad_norm": 1.6643356084823608, + "learning_rate": 7.674793570137294e-07, + "loss": 0.1091, + "num_input_tokens_seen": 5304384, + "step": 7855 + }, + { + "epoch": 0.1920211076637432, + "grad_norm": 60.34025192260742, + "learning_rate": 7.679679484047491e-07, + "loss": 0.3198, + "num_input_tokens_seen": 5307840, + "step": 7860 + }, + { + "epoch": 0.19214325849559036, + "grad_norm": 35.65785598754883, + "learning_rate": 7.684565397957687e-07, + "loss": 0.2131, + "num_input_tokens_seen": 5310720, + "step": 7865 + }, + { + "epoch": 0.19226540932743752, + "grad_norm": 35.110530853271484, + "learning_rate": 7.689451311867885e-07, + "loss": 0.2205, + "num_input_tokens_seen": 5314304, + "step": 7870 + }, + { + "epoch": 0.19238756015928468, + "grad_norm": 13.194342613220215, + "learning_rate": 7.694337225778081e-07, + "loss": 0.1237, + "num_input_tokens_seen": 5317376, + "step": 7875 + }, + { + "epoch": 0.19250971099113184, + "grad_norm": 26.445087432861328, + "learning_rate": 7.699223139688278e-07, + "loss": 0.0954, + "num_input_tokens_seen": 5320704, + "step": 7880 + }, + { + "epoch": 0.192631861822979, + "grad_norm": 2.860102891921997, + "learning_rate": 7.704109053598476e-07, + "loss": 0.085, + "num_input_tokens_seen": 5323776, + "step": 7885 + }, + { + "epoch": 0.19275401265482617, + "grad_norm": 9.533358573913574, + "learning_rate": 7.708994967508672e-07, + "loss": 0.1251, + "num_input_tokens_seen": 5326912, + "step": 7890 + }, + { + "epoch": 0.19287616348667336, + "grad_norm": 22.648160934448242, + "learning_rate": 7.713880881418869e-07, + "loss": 0.1403, + "num_input_tokens_seen": 5330176, + "step": 7895 + }, + { + "epoch": 0.19299831431852052, + "grad_norm": 25.47882652282715, + "learning_rate": 7.718766795329066e-07, + "loss": 0.2507, + "num_input_tokens_seen": 5333376, + "step": 7900 + }, + { + "epoch": 0.19312046515036768, + "grad_norm": 14.389740943908691, + "learning_rate": 7.723652709239263e-07, + "loss": 0.1507, + "num_input_tokens_seen": 5336896, + "step": 7905 + }, + { + "epoch": 0.19324261598221484, + "grad_norm": 27.832984924316406, + "learning_rate": 7.72853862314946e-07, + "loss": 0.1085, + "num_input_tokens_seen": 5340032, + "step": 7910 + }, + { + "epoch": 0.193364766814062, + "grad_norm": 37.951759338378906, + "learning_rate": 7.733424537059657e-07, + "loss": 0.1381, + "num_input_tokens_seen": 5343104, + "step": 7915 + }, + { + "epoch": 0.19348691764590917, + "grad_norm": 20.33961296081543, + "learning_rate": 7.738310450969853e-07, + "loss": 0.0414, + "num_input_tokens_seen": 5346752, + "step": 7920 + }, + { + "epoch": 0.19360906847775633, + "grad_norm": 3.8267455101013184, + "learning_rate": 7.74319636488005e-07, + "loss": 0.1919, + "num_input_tokens_seen": 5350016, + "step": 7925 + }, + { + "epoch": 0.1937312193096035, + "grad_norm": 40.605690002441406, + "learning_rate": 7.748082278790248e-07, + "loss": 0.1732, + "num_input_tokens_seen": 5353216, + "step": 7930 + }, + { + "epoch": 0.19385337014145065, + "grad_norm": 41.19668197631836, + "learning_rate": 7.752968192700444e-07, + "loss": 0.3179, + "num_input_tokens_seen": 5356864, + "step": 7935 + }, + { + "epoch": 0.19397552097329782, + "grad_norm": 25.50882911682129, + "learning_rate": 7.757854106610641e-07, + "loss": 0.0838, + "num_input_tokens_seen": 5360192, + "step": 7940 + }, + { + "epoch": 0.194097671805145, + "grad_norm": 9.193754196166992, + "learning_rate": 7.762740020520839e-07, + "loss": 0.1119, + "num_input_tokens_seen": 5363648, + "step": 7945 + }, + { + "epoch": 0.19421982263699217, + "grad_norm": 44.46554946899414, + "learning_rate": 7.767625934431034e-07, + "loss": 0.1827, + "num_input_tokens_seen": 5366528, + "step": 7950 + }, + { + "epoch": 0.19434197346883933, + "grad_norm": 22.7675838470459, + "learning_rate": 7.772511848341232e-07, + "loss": 0.1905, + "num_input_tokens_seen": 5370112, + "step": 7955 + }, + { + "epoch": 0.1944641243006865, + "grad_norm": 22.128459930419922, + "learning_rate": 7.777397762251429e-07, + "loss": 0.0704, + "num_input_tokens_seen": 5373760, + "step": 7960 + }, + { + "epoch": 0.19458627513253365, + "grad_norm": 28.93324089050293, + "learning_rate": 7.782283676161626e-07, + "loss": 0.153, + "num_input_tokens_seen": 5379520, + "step": 7965 + }, + { + "epoch": 0.19470842596438082, + "grad_norm": 16.455291748046875, + "learning_rate": 7.787169590071823e-07, + "loss": 0.0725, + "num_input_tokens_seen": 5382464, + "step": 7970 + }, + { + "epoch": 0.19483057679622798, + "grad_norm": 40.39324951171875, + "learning_rate": 7.792055503982019e-07, + "loss": 0.1389, + "num_input_tokens_seen": 5385792, + "step": 7975 + }, + { + "epoch": 0.19495272762807514, + "grad_norm": 30.548707962036133, + "learning_rate": 7.796941417892216e-07, + "loss": 0.1474, + "num_input_tokens_seen": 5389120, + "step": 7980 + }, + { + "epoch": 0.1950748784599223, + "grad_norm": 21.405261993408203, + "learning_rate": 7.801827331802413e-07, + "loss": 0.0684, + "num_input_tokens_seen": 5392576, + "step": 7985 + }, + { + "epoch": 0.19519702929176946, + "grad_norm": 27.85494613647461, + "learning_rate": 7.806713245712611e-07, + "loss": 0.1549, + "num_input_tokens_seen": 5396160, + "step": 7990 + }, + { + "epoch": 0.19531918012361665, + "grad_norm": 14.183422088623047, + "learning_rate": 7.811599159622807e-07, + "loss": 0.0496, + "num_input_tokens_seen": 5399424, + "step": 7995 + }, + { + "epoch": 0.19544133095546382, + "grad_norm": 17.311738967895508, + "learning_rate": 7.816485073533004e-07, + "loss": 0.1259, + "num_input_tokens_seen": 5402752, + "step": 8000 + }, + { + "epoch": 0.19556348178731098, + "grad_norm": 29.035099029541016, + "learning_rate": 7.821370987443201e-07, + "loss": 0.0967, + "num_input_tokens_seen": 5405952, + "step": 8005 + }, + { + "epoch": 0.19568563261915814, + "grad_norm": 26.67888069152832, + "learning_rate": 7.826256901353397e-07, + "loss": 0.1424, + "num_input_tokens_seen": 5409472, + "step": 8010 + }, + { + "epoch": 0.1958077834510053, + "grad_norm": 42.170406341552734, + "learning_rate": 7.831142815263595e-07, + "loss": 0.0976, + "num_input_tokens_seen": 5412800, + "step": 8015 + }, + { + "epoch": 0.19592993428285246, + "grad_norm": 73.1128158569336, + "learning_rate": 7.836028729173792e-07, + "loss": 0.133, + "num_input_tokens_seen": 5416256, + "step": 8020 + }, + { + "epoch": 0.19605208511469963, + "grad_norm": 34.78676223754883, + "learning_rate": 7.840914643083989e-07, + "loss": 0.3602, + "num_input_tokens_seen": 5419520, + "step": 8025 + }, + { + "epoch": 0.1961742359465468, + "grad_norm": 8.561169624328613, + "learning_rate": 7.845800556994185e-07, + "loss": 0.1214, + "num_input_tokens_seen": 5422976, + "step": 8030 + }, + { + "epoch": 0.19629638677839395, + "grad_norm": 32.9570198059082, + "learning_rate": 7.850686470904382e-07, + "loss": 0.0961, + "num_input_tokens_seen": 5426176, + "step": 8035 + }, + { + "epoch": 0.19641853761024114, + "grad_norm": 28.533235549926758, + "learning_rate": 7.855572384814579e-07, + "loss": 0.282, + "num_input_tokens_seen": 5429696, + "step": 8040 + }, + { + "epoch": 0.1965406884420883, + "grad_norm": 22.516508102416992, + "learning_rate": 7.860458298724776e-07, + "loss": 0.0883, + "num_input_tokens_seen": 5432832, + "step": 8045 + }, + { + "epoch": 0.19666283927393546, + "grad_norm": 3.684361696243286, + "learning_rate": 7.865344212634974e-07, + "loss": 0.0787, + "num_input_tokens_seen": 5436160, + "step": 8050 + }, + { + "epoch": 0.19678499010578263, + "grad_norm": 19.151451110839844, + "learning_rate": 7.87023012654517e-07, + "loss": 0.137, + "num_input_tokens_seen": 5439168, + "step": 8055 + }, + { + "epoch": 0.1969071409376298, + "grad_norm": 2.493832588195801, + "learning_rate": 7.875116040455367e-07, + "loss": 0.0778, + "num_input_tokens_seen": 5442496, + "step": 8060 + }, + { + "epoch": 0.19702929176947695, + "grad_norm": 58.97475051879883, + "learning_rate": 7.880001954365564e-07, + "loss": 0.1975, + "num_input_tokens_seen": 5446144, + "step": 8065 + }, + { + "epoch": 0.1971514426013241, + "grad_norm": 29.96076011657715, + "learning_rate": 7.88488786827576e-07, + "loss": 0.2025, + "num_input_tokens_seen": 5449408, + "step": 8070 + }, + { + "epoch": 0.19727359343317127, + "grad_norm": 4.311563014984131, + "learning_rate": 7.889773782185958e-07, + "loss": 0.1698, + "num_input_tokens_seen": 5453056, + "step": 8075 + }, + { + "epoch": 0.19739574426501844, + "grad_norm": 56.86328887939453, + "learning_rate": 7.894659696096155e-07, + "loss": 0.3359, + "num_input_tokens_seen": 5456064, + "step": 8080 + }, + { + "epoch": 0.1975178950968656, + "grad_norm": 14.863036155700684, + "learning_rate": 7.899545610006351e-07, + "loss": 0.1443, + "num_input_tokens_seen": 5459264, + "step": 8085 + }, + { + "epoch": 0.1976400459287128, + "grad_norm": 10.164009094238281, + "learning_rate": 7.904431523916548e-07, + "loss": 0.0892, + "num_input_tokens_seen": 5462976, + "step": 8090 + }, + { + "epoch": 0.19776219676055995, + "grad_norm": 29.270055770874023, + "learning_rate": 7.909317437826745e-07, + "loss": 0.2065, + "num_input_tokens_seen": 5466304, + "step": 8095 + }, + { + "epoch": 0.1978843475924071, + "grad_norm": 28.83013343811035, + "learning_rate": 7.914203351736942e-07, + "loss": 0.0773, + "num_input_tokens_seen": 5469632, + "step": 8100 + }, + { + "epoch": 0.19800649842425427, + "grad_norm": 17.059755325317383, + "learning_rate": 7.919089265647139e-07, + "loss": 0.0448, + "num_input_tokens_seen": 5472768, + "step": 8105 + }, + { + "epoch": 0.19812864925610144, + "grad_norm": 13.976848602294922, + "learning_rate": 7.923975179557337e-07, + "loss": 0.1155, + "num_input_tokens_seen": 5476032, + "step": 8110 + }, + { + "epoch": 0.1982508000879486, + "grad_norm": 19.26152229309082, + "learning_rate": 7.928861093467532e-07, + "loss": 0.2373, + "num_input_tokens_seen": 5479296, + "step": 8115 + }, + { + "epoch": 0.19837295091979576, + "grad_norm": 13.917434692382812, + "learning_rate": 7.93374700737773e-07, + "loss": 0.217, + "num_input_tokens_seen": 5483072, + "step": 8120 + }, + { + "epoch": 0.19849510175164292, + "grad_norm": 2.5597445964813232, + "learning_rate": 7.938632921287927e-07, + "loss": 0.084, + "num_input_tokens_seen": 5486208, + "step": 8125 + }, + { + "epoch": 0.19861725258349008, + "grad_norm": 13.927988052368164, + "learning_rate": 7.943518835198123e-07, + "loss": 0.0706, + "num_input_tokens_seen": 5489280, + "step": 8130 + }, + { + "epoch": 0.19873940341533725, + "grad_norm": 14.691224098205566, + "learning_rate": 7.948404749108321e-07, + "loss": 0.1443, + "num_input_tokens_seen": 5492608, + "step": 8135 + }, + { + "epoch": 0.19886155424718444, + "grad_norm": 16.61770248413086, + "learning_rate": 7.953290663018517e-07, + "loss": 0.1016, + "num_input_tokens_seen": 5495744, + "step": 8140 + }, + { + "epoch": 0.1989837050790316, + "grad_norm": 13.769593238830566, + "learning_rate": 7.958176576928714e-07, + "loss": 0.0974, + "num_input_tokens_seen": 5499520, + "step": 8145 + }, + { + "epoch": 0.19910585591087876, + "grad_norm": 47.58414077758789, + "learning_rate": 7.963062490838911e-07, + "loss": 0.096, + "num_input_tokens_seen": 5502976, + "step": 8150 + }, + { + "epoch": 0.19922800674272592, + "grad_norm": 0.6372967958450317, + "learning_rate": 7.967948404749108e-07, + "loss": 0.1639, + "num_input_tokens_seen": 5505984, + "step": 8155 + }, + { + "epoch": 0.19935015757457308, + "grad_norm": 17.84566879272461, + "learning_rate": 7.972834318659305e-07, + "loss": 0.0544, + "num_input_tokens_seen": 5509760, + "step": 8160 + }, + { + "epoch": 0.19947230840642025, + "grad_norm": 43.51220703125, + "learning_rate": 7.977720232569502e-07, + "loss": 0.2632, + "num_input_tokens_seen": 5512704, + "step": 8165 + }, + { + "epoch": 0.1995944592382674, + "grad_norm": 0.9529390335083008, + "learning_rate": 7.982606146479699e-07, + "loss": 0.0972, + "num_input_tokens_seen": 5516096, + "step": 8170 + }, + { + "epoch": 0.19971661007011457, + "grad_norm": 18.115739822387695, + "learning_rate": 7.987492060389895e-07, + "loss": 0.2251, + "num_input_tokens_seen": 5519872, + "step": 8175 + }, + { + "epoch": 0.19983876090196173, + "grad_norm": 41.57394027709961, + "learning_rate": 7.992377974300093e-07, + "loss": 0.1235, + "num_input_tokens_seen": 5523264, + "step": 8180 + }, + { + "epoch": 0.1999609117338089, + "grad_norm": 18.623090744018555, + "learning_rate": 7.99726388821029e-07, + "loss": 0.083, + "num_input_tokens_seen": 5526272, + "step": 8185 + }, + { + "epoch": 0.20008306256565608, + "grad_norm": 1.6735551357269287, + "learning_rate": 8.002149802120486e-07, + "loss": 0.1533, + "num_input_tokens_seen": 5529792, + "step": 8190 + }, + { + "epoch": 0.20020521339750325, + "grad_norm": 70.54412078857422, + "learning_rate": 8.007035716030683e-07, + "loss": 0.092, + "num_input_tokens_seen": 5532992, + "step": 8195 + }, + { + "epoch": 0.2003273642293504, + "grad_norm": 29.043832778930664, + "learning_rate": 8.01192162994088e-07, + "loss": 0.0971, + "num_input_tokens_seen": 5536640, + "step": 8200 + }, + { + "epoch": 0.20044951506119757, + "grad_norm": 6.162371635437012, + "learning_rate": 8.016807543851077e-07, + "loss": 0.0859, + "num_input_tokens_seen": 5539968, + "step": 8205 + }, + { + "epoch": 0.20057166589304473, + "grad_norm": 37.265079498291016, + "learning_rate": 8.021693457761274e-07, + "loss": 0.1228, + "num_input_tokens_seen": 5542912, + "step": 8210 + }, + { + "epoch": 0.2006938167248919, + "grad_norm": 34.13664245605469, + "learning_rate": 8.026579371671471e-07, + "loss": 0.1139, + "num_input_tokens_seen": 5545728, + "step": 8215 + }, + { + "epoch": 0.20081596755673906, + "grad_norm": 19.852344512939453, + "learning_rate": 8.031465285581668e-07, + "loss": 0.1316, + "num_input_tokens_seen": 5548864, + "step": 8220 + }, + { + "epoch": 0.20093811838858622, + "grad_norm": 9.395726203918457, + "learning_rate": 8.036351199491864e-07, + "loss": 0.1198, + "num_input_tokens_seen": 5551744, + "step": 8225 + }, + { + "epoch": 0.20106026922043338, + "grad_norm": 10.440095901489258, + "learning_rate": 8.041237113402062e-07, + "loss": 0.1258, + "num_input_tokens_seen": 5555968, + "step": 8230 + }, + { + "epoch": 0.20118242005228057, + "grad_norm": 50.22675704956055, + "learning_rate": 8.046123027312258e-07, + "loss": 0.1186, + "num_input_tokens_seen": 5559360, + "step": 8235 + }, + { + "epoch": 0.20130457088412773, + "grad_norm": 38.31979751586914, + "learning_rate": 8.051008941222456e-07, + "loss": 0.0886, + "num_input_tokens_seen": 5563008, + "step": 8240 + }, + { + "epoch": 0.2014267217159749, + "grad_norm": 8.49152946472168, + "learning_rate": 8.055894855132653e-07, + "loss": 0.113, + "num_input_tokens_seen": 5566464, + "step": 8245 + }, + { + "epoch": 0.20154887254782206, + "grad_norm": 7.730495929718018, + "learning_rate": 8.060780769042848e-07, + "loss": 0.0882, + "num_input_tokens_seen": 5569408, + "step": 8250 + }, + { + "epoch": 0.20167102337966922, + "grad_norm": 35.468379974365234, + "learning_rate": 8.065666682953046e-07, + "loss": 0.1449, + "num_input_tokens_seen": 5573056, + "step": 8255 + }, + { + "epoch": 0.20179317421151638, + "grad_norm": 20.88719367980957, + "learning_rate": 8.070552596863243e-07, + "loss": 0.1259, + "num_input_tokens_seen": 5576512, + "step": 8260 + }, + { + "epoch": 0.20191532504336354, + "grad_norm": 21.56291389465332, + "learning_rate": 8.07543851077344e-07, + "loss": 0.2285, + "num_input_tokens_seen": 5579584, + "step": 8265 + }, + { + "epoch": 0.2020374758752107, + "grad_norm": 21.869901657104492, + "learning_rate": 8.080324424683637e-07, + "loss": 0.1879, + "num_input_tokens_seen": 5582784, + "step": 8270 + }, + { + "epoch": 0.20215962670705787, + "grad_norm": 5.118806838989258, + "learning_rate": 8.085210338593834e-07, + "loss": 0.1162, + "num_input_tokens_seen": 5585920, + "step": 8275 + }, + { + "epoch": 0.20228177753890503, + "grad_norm": 27.39159393310547, + "learning_rate": 8.09009625250403e-07, + "loss": 0.1593, + "num_input_tokens_seen": 5589504, + "step": 8280 + }, + { + "epoch": 0.20240392837075222, + "grad_norm": 5.3895158767700195, + "learning_rate": 8.094982166414227e-07, + "loss": 0.082, + "num_input_tokens_seen": 5592576, + "step": 8285 + }, + { + "epoch": 0.20252607920259938, + "grad_norm": 16.00442123413086, + "learning_rate": 8.099868080324425e-07, + "loss": 0.1413, + "num_input_tokens_seen": 5595648, + "step": 8290 + }, + { + "epoch": 0.20264823003444654, + "grad_norm": 14.546874046325684, + "learning_rate": 8.104753994234621e-07, + "loss": 0.079, + "num_input_tokens_seen": 5599104, + "step": 8295 + }, + { + "epoch": 0.2027703808662937, + "grad_norm": 0.4530252516269684, + "learning_rate": 8.109639908144819e-07, + "loss": 0.0501, + "num_input_tokens_seen": 5602432, + "step": 8300 + }, + { + "epoch": 0.20289253169814087, + "grad_norm": 21.29576301574707, + "learning_rate": 8.114525822055015e-07, + "loss": 0.1506, + "num_input_tokens_seen": 5605504, + "step": 8305 + }, + { + "epoch": 0.20301468252998803, + "grad_norm": 0.7760035991668701, + "learning_rate": 8.119411735965211e-07, + "loss": 0.1131, + "num_input_tokens_seen": 5609216, + "step": 8310 + }, + { + "epoch": 0.2031368333618352, + "grad_norm": 23.73055648803711, + "learning_rate": 8.124297649875409e-07, + "loss": 0.1505, + "num_input_tokens_seen": 5613248, + "step": 8315 + }, + { + "epoch": 0.20325898419368235, + "grad_norm": 26.038301467895508, + "learning_rate": 8.129183563785606e-07, + "loss": 0.0719, + "num_input_tokens_seen": 5616832, + "step": 8320 + }, + { + "epoch": 0.2033811350255295, + "grad_norm": 73.4818115234375, + "learning_rate": 8.134069477695803e-07, + "loss": 0.1667, + "num_input_tokens_seen": 5620800, + "step": 8325 + }, + { + "epoch": 0.20350328585737668, + "grad_norm": 30.03678321838379, + "learning_rate": 8.138955391606e-07, + "loss": 0.1361, + "num_input_tokens_seen": 5624448, + "step": 8330 + }, + { + "epoch": 0.20362543668922387, + "grad_norm": 28.63085174560547, + "learning_rate": 8.143841305516197e-07, + "loss": 0.1627, + "num_input_tokens_seen": 5627712, + "step": 8335 + }, + { + "epoch": 0.20374758752107103, + "grad_norm": 0.3213491141796112, + "learning_rate": 8.148727219426393e-07, + "loss": 0.1031, + "num_input_tokens_seen": 5631040, + "step": 8340 + }, + { + "epoch": 0.2038697383529182, + "grad_norm": 21.672334671020508, + "learning_rate": 8.15361313333659e-07, + "loss": 0.1729, + "num_input_tokens_seen": 5634624, + "step": 8345 + }, + { + "epoch": 0.20399188918476535, + "grad_norm": 3.710036277770996, + "learning_rate": 8.158499047246788e-07, + "loss": 0.1207, + "num_input_tokens_seen": 5638080, + "step": 8350 + }, + { + "epoch": 0.2041140400166125, + "grad_norm": 7.503905296325684, + "learning_rate": 8.163384961156984e-07, + "loss": 0.1336, + "num_input_tokens_seen": 5641536, + "step": 8355 + }, + { + "epoch": 0.20423619084845968, + "grad_norm": 3.1992719173431396, + "learning_rate": 8.168270875067181e-07, + "loss": 0.0766, + "num_input_tokens_seen": 5644544, + "step": 8360 + }, + { + "epoch": 0.20435834168030684, + "grad_norm": 21.4776554107666, + "learning_rate": 8.173156788977378e-07, + "loss": 0.1265, + "num_input_tokens_seen": 5647936, + "step": 8365 + }, + { + "epoch": 0.204480492512154, + "grad_norm": 23.008224487304688, + "learning_rate": 8.178042702887574e-07, + "loss": 0.1076, + "num_input_tokens_seen": 5651584, + "step": 8370 + }, + { + "epoch": 0.20460264334400116, + "grad_norm": 12.869095802307129, + "learning_rate": 8.182928616797772e-07, + "loss": 0.0794, + "num_input_tokens_seen": 5654784, + "step": 8375 + }, + { + "epoch": 0.20472479417584835, + "grad_norm": 51.182090759277344, + "learning_rate": 8.187814530707969e-07, + "loss": 0.2906, + "num_input_tokens_seen": 5657984, + "step": 8380 + }, + { + "epoch": 0.2048469450076955, + "grad_norm": 38.773929595947266, + "learning_rate": 8.192700444618166e-07, + "loss": 0.1992, + "num_input_tokens_seen": 5661248, + "step": 8385 + }, + { + "epoch": 0.20496909583954268, + "grad_norm": 45.63922882080078, + "learning_rate": 8.197586358528362e-07, + "loss": 0.1088, + "num_input_tokens_seen": 5664384, + "step": 8390 + }, + { + "epoch": 0.20509124667138984, + "grad_norm": 16.322813034057617, + "learning_rate": 8.20247227243856e-07, + "loss": 0.1534, + "num_input_tokens_seen": 5667840, + "step": 8395 + }, + { + "epoch": 0.205213397503237, + "grad_norm": 20.92977523803711, + "learning_rate": 8.207358186348756e-07, + "loss": 0.082, + "num_input_tokens_seen": 5670848, + "step": 8400 + }, + { + "epoch": 0.20533554833508416, + "grad_norm": 22.75577735900879, + "learning_rate": 8.212244100258953e-07, + "loss": 0.1136, + "num_input_tokens_seen": 5674176, + "step": 8405 + }, + { + "epoch": 0.20545769916693132, + "grad_norm": 18.192934036254883, + "learning_rate": 8.217130014169151e-07, + "loss": 0.1473, + "num_input_tokens_seen": 5677376, + "step": 8410 + }, + { + "epoch": 0.20557984999877849, + "grad_norm": 11.31889820098877, + "learning_rate": 8.222015928079346e-07, + "loss": 0.083, + "num_input_tokens_seen": 5680640, + "step": 8415 + }, + { + "epoch": 0.20570200083062565, + "grad_norm": 13.893218040466309, + "learning_rate": 8.226901841989544e-07, + "loss": 0.1466, + "num_input_tokens_seen": 5684288, + "step": 8420 + }, + { + "epoch": 0.2058241516624728, + "grad_norm": 16.788461685180664, + "learning_rate": 8.231787755899741e-07, + "loss": 0.1573, + "num_input_tokens_seen": 5687680, + "step": 8425 + }, + { + "epoch": 0.20594630249432, + "grad_norm": 25.608034133911133, + "learning_rate": 8.236673669809937e-07, + "loss": 0.0939, + "num_input_tokens_seen": 5691328, + "step": 8430 + }, + { + "epoch": 0.20606845332616716, + "grad_norm": 59.523643493652344, + "learning_rate": 8.241559583720135e-07, + "loss": 0.1198, + "num_input_tokens_seen": 5694336, + "step": 8435 + }, + { + "epoch": 0.20619060415801432, + "grad_norm": 38.94904327392578, + "learning_rate": 8.246445497630332e-07, + "loss": 0.2146, + "num_input_tokens_seen": 5697344, + "step": 8440 + }, + { + "epoch": 0.20631275498986149, + "grad_norm": 10.258160591125488, + "learning_rate": 8.251331411540528e-07, + "loss": 0.1408, + "num_input_tokens_seen": 5700800, + "step": 8445 + }, + { + "epoch": 0.20643490582170865, + "grad_norm": 13.627341270446777, + "learning_rate": 8.256217325450725e-07, + "loss": 0.1243, + "num_input_tokens_seen": 5704640, + "step": 8450 + }, + { + "epoch": 0.2065570566535558, + "grad_norm": 15.296594619750977, + "learning_rate": 8.261103239360923e-07, + "loss": 0.1146, + "num_input_tokens_seen": 5708352, + "step": 8455 + }, + { + "epoch": 0.20667920748540297, + "grad_norm": 19.404726028442383, + "learning_rate": 8.265989153271119e-07, + "loss": 0.1354, + "num_input_tokens_seen": 5711744, + "step": 8460 + }, + { + "epoch": 0.20680135831725013, + "grad_norm": 28.34406280517578, + "learning_rate": 8.270875067181316e-07, + "loss": 0.1455, + "num_input_tokens_seen": 5714880, + "step": 8465 + }, + { + "epoch": 0.2069235091490973, + "grad_norm": 4.979307651519775, + "learning_rate": 8.275760981091513e-07, + "loss": 0.0521, + "num_input_tokens_seen": 5718208, + "step": 8470 + }, + { + "epoch": 0.20704565998094446, + "grad_norm": 20.692550659179688, + "learning_rate": 8.280646895001709e-07, + "loss": 0.0445, + "num_input_tokens_seen": 5721408, + "step": 8475 + }, + { + "epoch": 0.20716781081279165, + "grad_norm": 1.2701551914215088, + "learning_rate": 8.285532808911907e-07, + "loss": 0.1252, + "num_input_tokens_seen": 5724800, + "step": 8480 + }, + { + "epoch": 0.2072899616446388, + "grad_norm": 67.5052719116211, + "learning_rate": 8.290418722822104e-07, + "loss": 0.2579, + "num_input_tokens_seen": 5728000, + "step": 8485 + }, + { + "epoch": 0.20741211247648597, + "grad_norm": 1.723806619644165, + "learning_rate": 8.2953046367323e-07, + "loss": 0.0754, + "num_input_tokens_seen": 5731456, + "step": 8490 + }, + { + "epoch": 0.20753426330833313, + "grad_norm": 11.588358879089355, + "learning_rate": 8.300190550642498e-07, + "loss": 0.1841, + "num_input_tokens_seen": 5734400, + "step": 8495 + }, + { + "epoch": 0.2076564141401803, + "grad_norm": 41.95431900024414, + "learning_rate": 8.305076464552694e-07, + "loss": 0.1131, + "num_input_tokens_seen": 5737920, + "step": 8500 + }, + { + "epoch": 0.20777856497202746, + "grad_norm": 33.22675704956055, + "learning_rate": 8.309962378462891e-07, + "loss": 0.1254, + "num_input_tokens_seen": 5741056, + "step": 8505 + }, + { + "epoch": 0.20790071580387462, + "grad_norm": 40.123233795166016, + "learning_rate": 8.314848292373088e-07, + "loss": 0.1324, + "num_input_tokens_seen": 5744576, + "step": 8510 + }, + { + "epoch": 0.20802286663572178, + "grad_norm": 12.709420204162598, + "learning_rate": 8.319734206283286e-07, + "loss": 0.197, + "num_input_tokens_seen": 5748480, + "step": 8515 + }, + { + "epoch": 0.20814501746756894, + "grad_norm": 0.15196305513381958, + "learning_rate": 8.324620120193482e-07, + "loss": 0.1648, + "num_input_tokens_seen": 5752000, + "step": 8520 + }, + { + "epoch": 0.2082671682994161, + "grad_norm": 0.7214540243148804, + "learning_rate": 8.329506034103678e-07, + "loss": 0.2212, + "num_input_tokens_seen": 5755648, + "step": 8525 + }, + { + "epoch": 0.2083893191312633, + "grad_norm": 31.200590133666992, + "learning_rate": 8.334391948013876e-07, + "loss": 0.2119, + "num_input_tokens_seen": 5759040, + "step": 8530 + }, + { + "epoch": 0.20851146996311046, + "grad_norm": 51.559303283691406, + "learning_rate": 8.339277861924072e-07, + "loss": 0.0682, + "num_input_tokens_seen": 5762496, + "step": 8535 + }, + { + "epoch": 0.20863362079495762, + "grad_norm": 9.00422191619873, + "learning_rate": 8.34416377583427e-07, + "loss": 0.0486, + "num_input_tokens_seen": 5765312, + "step": 8540 + }, + { + "epoch": 0.20875577162680478, + "grad_norm": 5.614874362945557, + "learning_rate": 8.349049689744467e-07, + "loss": 0.1615, + "num_input_tokens_seen": 5768768, + "step": 8545 + }, + { + "epoch": 0.20887792245865194, + "grad_norm": 13.906744003295898, + "learning_rate": 8.353935603654664e-07, + "loss": 0.0643, + "num_input_tokens_seen": 5771968, + "step": 8550 + }, + { + "epoch": 0.2090000732904991, + "grad_norm": 0.11603455245494843, + "learning_rate": 8.35882151756486e-07, + "loss": 0.2121, + "num_input_tokens_seen": 5775296, + "step": 8555 + }, + { + "epoch": 0.20912222412234627, + "grad_norm": 29.86203956604004, + "learning_rate": 8.363707431475056e-07, + "loss": 0.0623, + "num_input_tokens_seen": 5778048, + "step": 8560 + }, + { + "epoch": 0.20924437495419343, + "grad_norm": 14.665985107421875, + "learning_rate": 8.368593345385254e-07, + "loss": 0.1852, + "num_input_tokens_seen": 5781312, + "step": 8565 + }, + { + "epoch": 0.2093665257860406, + "grad_norm": 11.448308944702148, + "learning_rate": 8.373479259295451e-07, + "loss": 0.0873, + "num_input_tokens_seen": 5784704, + "step": 8570 + }, + { + "epoch": 0.20948867661788778, + "grad_norm": 13.049193382263184, + "learning_rate": 8.378365173205649e-07, + "loss": 0.0896, + "num_input_tokens_seen": 5788224, + "step": 8575 + }, + { + "epoch": 0.20961082744973494, + "grad_norm": 22.919639587402344, + "learning_rate": 8.383251087115844e-07, + "loss": 0.1468, + "num_input_tokens_seen": 5791488, + "step": 8580 + }, + { + "epoch": 0.2097329782815821, + "grad_norm": 15.393905639648438, + "learning_rate": 8.388137001026041e-07, + "loss": 0.1301, + "num_input_tokens_seen": 5794752, + "step": 8585 + }, + { + "epoch": 0.20985512911342927, + "grad_norm": 7.1114583015441895, + "learning_rate": 8.393022914936239e-07, + "loss": 0.077, + "num_input_tokens_seen": 5798336, + "step": 8590 + }, + { + "epoch": 0.20997727994527643, + "grad_norm": 32.48029327392578, + "learning_rate": 8.397908828846435e-07, + "loss": 0.1719, + "num_input_tokens_seen": 5802048, + "step": 8595 + }, + { + "epoch": 0.2100994307771236, + "grad_norm": 10.279617309570312, + "learning_rate": 8.402794742756633e-07, + "loss": 0.1771, + "num_input_tokens_seen": 5805312, + "step": 8600 + }, + { + "epoch": 0.21022158160897075, + "grad_norm": 6.890974521636963, + "learning_rate": 8.407680656666829e-07, + "loss": 0.102, + "num_input_tokens_seen": 5808448, + "step": 8605 + }, + { + "epoch": 0.21034373244081792, + "grad_norm": 29.157899856567383, + "learning_rate": 8.412566570577026e-07, + "loss": 0.1211, + "num_input_tokens_seen": 5811776, + "step": 8610 + }, + { + "epoch": 0.21046588327266508, + "grad_norm": 11.824888229370117, + "learning_rate": 8.417452484487223e-07, + "loss": 0.0809, + "num_input_tokens_seen": 5815232, + "step": 8615 + }, + { + "epoch": 0.21058803410451224, + "grad_norm": 16.32293128967285, + "learning_rate": 8.422338398397419e-07, + "loss": 0.0449, + "num_input_tokens_seen": 5818560, + "step": 8620 + }, + { + "epoch": 0.21071018493635943, + "grad_norm": 36.444828033447266, + "learning_rate": 8.427224312307617e-07, + "loss": 0.1412, + "num_input_tokens_seen": 5822272, + "step": 8625 + }, + { + "epoch": 0.2108323357682066, + "grad_norm": 23.423480987548828, + "learning_rate": 8.432110226217814e-07, + "loss": 0.1022, + "num_input_tokens_seen": 5825600, + "step": 8630 + }, + { + "epoch": 0.21095448660005375, + "grad_norm": 36.47949981689453, + "learning_rate": 8.436996140128011e-07, + "loss": 0.1595, + "num_input_tokens_seen": 5828672, + "step": 8635 + }, + { + "epoch": 0.21107663743190092, + "grad_norm": 20.2095890045166, + "learning_rate": 8.441882054038207e-07, + "loss": 0.0977, + "num_input_tokens_seen": 5831552, + "step": 8640 + }, + { + "epoch": 0.21119878826374808, + "grad_norm": 9.84154224395752, + "learning_rate": 8.446767967948404e-07, + "loss": 0.1556, + "num_input_tokens_seen": 5834368, + "step": 8645 + }, + { + "epoch": 0.21132093909559524, + "grad_norm": 1.6117609739303589, + "learning_rate": 8.451653881858601e-07, + "loss": 0.2125, + "num_input_tokens_seen": 5837440, + "step": 8650 + }, + { + "epoch": 0.2114430899274424, + "grad_norm": 2.4990859031677246, + "learning_rate": 8.456539795768798e-07, + "loss": 0.1083, + "num_input_tokens_seen": 5840640, + "step": 8655 + }, + { + "epoch": 0.21156524075928956, + "grad_norm": 28.441207885742188, + "learning_rate": 8.461425709678996e-07, + "loss": 0.1269, + "num_input_tokens_seen": 5843712, + "step": 8660 + }, + { + "epoch": 0.21168739159113673, + "grad_norm": 1.2140380144119263, + "learning_rate": 8.466311623589191e-07, + "loss": 0.1657, + "num_input_tokens_seen": 5847104, + "step": 8665 + }, + { + "epoch": 0.2118095424229839, + "grad_norm": 21.97617530822754, + "learning_rate": 8.471197537499389e-07, + "loss": 0.0945, + "num_input_tokens_seen": 5850432, + "step": 8670 + }, + { + "epoch": 0.21193169325483108, + "grad_norm": 46.13754653930664, + "learning_rate": 8.476083451409586e-07, + "loss": 0.0999, + "num_input_tokens_seen": 5853824, + "step": 8675 + }, + { + "epoch": 0.21205384408667824, + "grad_norm": 11.853911399841309, + "learning_rate": 8.480969365319782e-07, + "loss": 0.0518, + "num_input_tokens_seen": 5857472, + "step": 8680 + }, + { + "epoch": 0.2121759949185254, + "grad_norm": 21.628971099853516, + "learning_rate": 8.48585527922998e-07, + "loss": 0.1515, + "num_input_tokens_seen": 5861312, + "step": 8685 + }, + { + "epoch": 0.21229814575037256, + "grad_norm": 21.702733993530273, + "learning_rate": 8.490741193140176e-07, + "loss": 0.1207, + "num_input_tokens_seen": 5864832, + "step": 8690 + }, + { + "epoch": 0.21242029658221973, + "grad_norm": 22.620609283447266, + "learning_rate": 8.495627107050373e-07, + "loss": 0.0963, + "num_input_tokens_seen": 5867776, + "step": 8695 + }, + { + "epoch": 0.2125424474140669, + "grad_norm": 21.320842742919922, + "learning_rate": 8.50051302096057e-07, + "loss": 0.1344, + "num_input_tokens_seen": 5870848, + "step": 8700 + }, + { + "epoch": 0.21266459824591405, + "grad_norm": 10.63657283782959, + "learning_rate": 8.505398934870767e-07, + "loss": 0.1278, + "num_input_tokens_seen": 5874560, + "step": 8705 + }, + { + "epoch": 0.2127867490777612, + "grad_norm": 21.64299964904785, + "learning_rate": 8.510284848780964e-07, + "loss": 0.098, + "num_input_tokens_seen": 5877824, + "step": 8710 + }, + { + "epoch": 0.21290889990960837, + "grad_norm": 32.38628387451172, + "learning_rate": 8.51517076269116e-07, + "loss": 0.1258, + "num_input_tokens_seen": 5880704, + "step": 8715 + }, + { + "epoch": 0.21303105074145554, + "grad_norm": 31.72425651550293, + "learning_rate": 8.520056676601358e-07, + "loss": 0.1202, + "num_input_tokens_seen": 5883904, + "step": 8720 + }, + { + "epoch": 0.21315320157330273, + "grad_norm": 10.176417350769043, + "learning_rate": 8.524942590511554e-07, + "loss": 0.0826, + "num_input_tokens_seen": 5887296, + "step": 8725 + }, + { + "epoch": 0.2132753524051499, + "grad_norm": 32.1301383972168, + "learning_rate": 8.529828504421752e-07, + "loss": 0.0917, + "num_input_tokens_seen": 5890304, + "step": 8730 + }, + { + "epoch": 0.21339750323699705, + "grad_norm": 22.305946350097656, + "learning_rate": 8.534714418331949e-07, + "loss": 0.2655, + "num_input_tokens_seen": 5893632, + "step": 8735 + }, + { + "epoch": 0.2135196540688442, + "grad_norm": 1.0712743997573853, + "learning_rate": 8.539600332242145e-07, + "loss": 0.0348, + "num_input_tokens_seen": 5896960, + "step": 8740 + }, + { + "epoch": 0.21364180490069137, + "grad_norm": 21.419750213623047, + "learning_rate": 8.544486246152342e-07, + "loss": 0.0938, + "num_input_tokens_seen": 5900224, + "step": 8745 + }, + { + "epoch": 0.21376395573253854, + "grad_norm": 31.946155548095703, + "learning_rate": 8.549372160062539e-07, + "loss": 0.1702, + "num_input_tokens_seen": 5903360, + "step": 8750 + }, + { + "epoch": 0.2138861065643857, + "grad_norm": 27.752010345458984, + "learning_rate": 8.554258073972736e-07, + "loss": 0.1216, + "num_input_tokens_seen": 5906432, + "step": 8755 + }, + { + "epoch": 0.21400825739623286, + "grad_norm": 24.327363967895508, + "learning_rate": 8.559143987882933e-07, + "loss": 0.0419, + "num_input_tokens_seen": 5909568, + "step": 8760 + }, + { + "epoch": 0.21413040822808002, + "grad_norm": 60.757179260253906, + "learning_rate": 8.564029901793131e-07, + "loss": 0.1031, + "num_input_tokens_seen": 5913280, + "step": 8765 + }, + { + "epoch": 0.2142525590599272, + "grad_norm": 25.384445190429688, + "learning_rate": 8.568915815703327e-07, + "loss": 0.0859, + "num_input_tokens_seen": 5916352, + "step": 8770 + }, + { + "epoch": 0.21437470989177437, + "grad_norm": 25.03833770751953, + "learning_rate": 8.573801729613523e-07, + "loss": 0.1947, + "num_input_tokens_seen": 5919232, + "step": 8775 + }, + { + "epoch": 0.21449686072362154, + "grad_norm": 25.255443572998047, + "learning_rate": 8.578687643523721e-07, + "loss": 0.2758, + "num_input_tokens_seen": 5921984, + "step": 8780 + }, + { + "epoch": 0.2146190115554687, + "grad_norm": 22.09063720703125, + "learning_rate": 8.583573557433917e-07, + "loss": 0.1436, + "num_input_tokens_seen": 5925504, + "step": 8785 + }, + { + "epoch": 0.21474116238731586, + "grad_norm": 22.207136154174805, + "learning_rate": 8.588459471344115e-07, + "loss": 0.0936, + "num_input_tokens_seen": 5928384, + "step": 8790 + }, + { + "epoch": 0.21486331321916302, + "grad_norm": 3.0565848350524902, + "learning_rate": 8.593345385254312e-07, + "loss": 0.0995, + "num_input_tokens_seen": 5931776, + "step": 8795 + }, + { + "epoch": 0.21498546405101018, + "grad_norm": 28.339794158935547, + "learning_rate": 8.598231299164507e-07, + "loss": 0.1071, + "num_input_tokens_seen": 5934784, + "step": 8800 + }, + { + "epoch": 0.21510761488285735, + "grad_norm": 34.90781784057617, + "learning_rate": 8.603117213074705e-07, + "loss": 0.1719, + "num_input_tokens_seen": 5938560, + "step": 8805 + }, + { + "epoch": 0.2152297657147045, + "grad_norm": 7.775395393371582, + "learning_rate": 8.608003126984902e-07, + "loss": 0.169, + "num_input_tokens_seen": 5941824, + "step": 8810 + }, + { + "epoch": 0.21535191654655167, + "grad_norm": 19.345985412597656, + "learning_rate": 8.612889040895099e-07, + "loss": 0.2145, + "num_input_tokens_seen": 5945152, + "step": 8815 + }, + { + "epoch": 0.21547406737839886, + "grad_norm": 25.269824981689453, + "learning_rate": 8.617774954805296e-07, + "loss": 0.1402, + "num_input_tokens_seen": 5948992, + "step": 8820 + }, + { + "epoch": 0.21559621821024602, + "grad_norm": 23.3660888671875, + "learning_rate": 8.622660868715494e-07, + "loss": 0.1242, + "num_input_tokens_seen": 5952256, + "step": 8825 + }, + { + "epoch": 0.21571836904209318, + "grad_norm": 20.00462532043457, + "learning_rate": 8.627546782625689e-07, + "loss": 0.1165, + "num_input_tokens_seen": 5955648, + "step": 8830 + }, + { + "epoch": 0.21584051987394035, + "grad_norm": 4.807917594909668, + "learning_rate": 8.632432696535886e-07, + "loss": 0.0812, + "num_input_tokens_seen": 5959040, + "step": 8835 + }, + { + "epoch": 0.2159626707057875, + "grad_norm": 5.229181289672852, + "learning_rate": 8.637318610446084e-07, + "loss": 0.1652, + "num_input_tokens_seen": 5962560, + "step": 8840 + }, + { + "epoch": 0.21608482153763467, + "grad_norm": 18.924203872680664, + "learning_rate": 8.64220452435628e-07, + "loss": 0.1325, + "num_input_tokens_seen": 5965952, + "step": 8845 + }, + { + "epoch": 0.21620697236948183, + "grad_norm": 2.562269926071167, + "learning_rate": 8.647090438266478e-07, + "loss": 0.0561, + "num_input_tokens_seen": 5969280, + "step": 8850 + }, + { + "epoch": 0.216329123201329, + "grad_norm": 37.25053787231445, + "learning_rate": 8.651976352176674e-07, + "loss": 0.1161, + "num_input_tokens_seen": 5972480, + "step": 8855 + }, + { + "epoch": 0.21645127403317616, + "grad_norm": 9.465447425842285, + "learning_rate": 8.65686226608687e-07, + "loss": 0.1002, + "num_input_tokens_seen": 5975872, + "step": 8860 + }, + { + "epoch": 0.21657342486502332, + "grad_norm": 13.03969669342041, + "learning_rate": 8.661748179997068e-07, + "loss": 0.1086, + "num_input_tokens_seen": 5978944, + "step": 8865 + }, + { + "epoch": 0.2166955756968705, + "grad_norm": 39.545013427734375, + "learning_rate": 8.666634093907265e-07, + "loss": 0.135, + "num_input_tokens_seen": 5982656, + "step": 8870 + }, + { + "epoch": 0.21681772652871767, + "grad_norm": 22.047517776489258, + "learning_rate": 8.671520007817462e-07, + "loss": 0.0856, + "num_input_tokens_seen": 5985664, + "step": 8875 + }, + { + "epoch": 0.21693987736056483, + "grad_norm": 0.7058843374252319, + "learning_rate": 8.676405921727659e-07, + "loss": 0.1775, + "num_input_tokens_seen": 5989120, + "step": 8880 + }, + { + "epoch": 0.217062028192412, + "grad_norm": 29.685516357421875, + "learning_rate": 8.681291835637856e-07, + "loss": 0.0496, + "num_input_tokens_seen": 5992768, + "step": 8885 + }, + { + "epoch": 0.21718417902425916, + "grad_norm": 30.59242820739746, + "learning_rate": 8.686177749548052e-07, + "loss": 0.2056, + "num_input_tokens_seen": 5996288, + "step": 8890 + }, + { + "epoch": 0.21730632985610632, + "grad_norm": 7.976591110229492, + "learning_rate": 8.691063663458249e-07, + "loss": 0.1601, + "num_input_tokens_seen": 5999808, + "step": 8895 + }, + { + "epoch": 0.21742848068795348, + "grad_norm": 39.25209426879883, + "learning_rate": 8.695949577368447e-07, + "loss": 0.1511, + "num_input_tokens_seen": 6002944, + "step": 8900 + }, + { + "epoch": 0.21755063151980064, + "grad_norm": 27.88641357421875, + "learning_rate": 8.700835491278643e-07, + "loss": 0.1434, + "num_input_tokens_seen": 6006080, + "step": 8905 + }, + { + "epoch": 0.2176727823516478, + "grad_norm": 1.7723349332809448, + "learning_rate": 8.70572140518884e-07, + "loss": 0.0495, + "num_input_tokens_seen": 6009600, + "step": 8910 + }, + { + "epoch": 0.217794933183495, + "grad_norm": 25.451271057128906, + "learning_rate": 8.710607319099037e-07, + "loss": 0.0918, + "num_input_tokens_seen": 6012736, + "step": 8915 + }, + { + "epoch": 0.21791708401534216, + "grad_norm": 6.012026309967041, + "learning_rate": 8.715493233009233e-07, + "loss": 0.0888, + "num_input_tokens_seen": 6016064, + "step": 8920 + }, + { + "epoch": 0.21803923484718932, + "grad_norm": 3.7859063148498535, + "learning_rate": 8.720379146919431e-07, + "loss": 0.1266, + "num_input_tokens_seen": 6019328, + "step": 8925 + }, + { + "epoch": 0.21816138567903648, + "grad_norm": 15.164278984069824, + "learning_rate": 8.725265060829628e-07, + "loss": 0.1356, + "num_input_tokens_seen": 6023040, + "step": 8930 + }, + { + "epoch": 0.21828353651088364, + "grad_norm": 17.575706481933594, + "learning_rate": 8.730150974739825e-07, + "loss": 0.1061, + "num_input_tokens_seen": 6026496, + "step": 8935 + }, + { + "epoch": 0.2184056873427308, + "grad_norm": 1.0441234111785889, + "learning_rate": 8.735036888650021e-07, + "loss": 0.0705, + "num_input_tokens_seen": 6030080, + "step": 8940 + }, + { + "epoch": 0.21852783817457797, + "grad_norm": 10.35993766784668, + "learning_rate": 8.739922802560219e-07, + "loss": 0.105, + "num_input_tokens_seen": 6033408, + "step": 8945 + }, + { + "epoch": 0.21864998900642513, + "grad_norm": 38.489383697509766, + "learning_rate": 8.744808716470415e-07, + "loss": 0.156, + "num_input_tokens_seen": 6037120, + "step": 8950 + }, + { + "epoch": 0.2187721398382723, + "grad_norm": 4.143792629241943, + "learning_rate": 8.749694630380612e-07, + "loss": 0.0852, + "num_input_tokens_seen": 6040640, + "step": 8955 + }, + { + "epoch": 0.21889429067011945, + "grad_norm": 48.676483154296875, + "learning_rate": 8.75458054429081e-07, + "loss": 0.1522, + "num_input_tokens_seen": 6044096, + "step": 8960 + }, + { + "epoch": 0.21901644150196664, + "grad_norm": 34.4107780456543, + "learning_rate": 8.759466458201005e-07, + "loss": 0.1482, + "num_input_tokens_seen": 6047488, + "step": 8965 + }, + { + "epoch": 0.2191385923338138, + "grad_norm": 29.14084815979004, + "learning_rate": 8.764352372111203e-07, + "loss": 0.1361, + "num_input_tokens_seen": 6051072, + "step": 8970 + }, + { + "epoch": 0.21926074316566097, + "grad_norm": 26.460193634033203, + "learning_rate": 8.7692382860214e-07, + "loss": 0.1871, + "num_input_tokens_seen": 6054080, + "step": 8975 + }, + { + "epoch": 0.21938289399750813, + "grad_norm": 5.312205791473389, + "learning_rate": 8.774124199931597e-07, + "loss": 0.0459, + "num_input_tokens_seen": 6057856, + "step": 8980 + }, + { + "epoch": 0.2195050448293553, + "grad_norm": 21.948001861572266, + "learning_rate": 8.779010113841794e-07, + "loss": 0.1141, + "num_input_tokens_seen": 6061120, + "step": 8985 + }, + { + "epoch": 0.21962719566120245, + "grad_norm": 34.10026168823242, + "learning_rate": 8.78389602775199e-07, + "loss": 0.142, + "num_input_tokens_seen": 6064512, + "step": 8990 + }, + { + "epoch": 0.2197493464930496, + "grad_norm": 12.825250625610352, + "learning_rate": 8.788781941662187e-07, + "loss": 0.0917, + "num_input_tokens_seen": 6067904, + "step": 8995 + }, + { + "epoch": 0.21987149732489678, + "grad_norm": 17.931827545166016, + "learning_rate": 8.793667855572384e-07, + "loss": 0.0879, + "num_input_tokens_seen": 6071040, + "step": 9000 + }, + { + "epoch": 0.21999364815674394, + "grad_norm": 4.5756731033325195, + "learning_rate": 8.798553769482582e-07, + "loss": 0.0989, + "num_input_tokens_seen": 6074240, + "step": 9005 + }, + { + "epoch": 0.2201157989885911, + "grad_norm": 31.641714096069336, + "learning_rate": 8.803439683392778e-07, + "loss": 0.2064, + "num_input_tokens_seen": 6077696, + "step": 9010 + }, + { + "epoch": 0.2202379498204383, + "grad_norm": 20.369218826293945, + "learning_rate": 8.808325597302975e-07, + "loss": 0.0578, + "num_input_tokens_seen": 6080960, + "step": 9015 + }, + { + "epoch": 0.22036010065228545, + "grad_norm": 21.847148895263672, + "learning_rate": 8.813211511213172e-07, + "loss": 0.0985, + "num_input_tokens_seen": 6084736, + "step": 9020 + }, + { + "epoch": 0.2204822514841326, + "grad_norm": 8.247864723205566, + "learning_rate": 8.818097425123368e-07, + "loss": 0.2164, + "num_input_tokens_seen": 6087872, + "step": 9025 + }, + { + "epoch": 0.22060440231597978, + "grad_norm": 17.761096954345703, + "learning_rate": 8.822983339033566e-07, + "loss": 0.0508, + "num_input_tokens_seen": 6091264, + "step": 9030 + }, + { + "epoch": 0.22072655314782694, + "grad_norm": 25.695043563842773, + "learning_rate": 8.827869252943763e-07, + "loss": 0.0883, + "num_input_tokens_seen": 6094784, + "step": 9035 + }, + { + "epoch": 0.2208487039796741, + "grad_norm": 25.67939567565918, + "learning_rate": 8.83275516685396e-07, + "loss": 0.132, + "num_input_tokens_seen": 6098560, + "step": 9040 + }, + { + "epoch": 0.22097085481152126, + "grad_norm": 38.35128402709961, + "learning_rate": 8.837641080764157e-07, + "loss": 0.1693, + "num_input_tokens_seen": 6101952, + "step": 9045 + }, + { + "epoch": 0.22109300564336842, + "grad_norm": 41.83271789550781, + "learning_rate": 8.842526994674353e-07, + "loss": 0.1167, + "num_input_tokens_seen": 6105280, + "step": 9050 + }, + { + "epoch": 0.22121515647521559, + "grad_norm": 31.90433120727539, + "learning_rate": 8.84741290858455e-07, + "loss": 0.1305, + "num_input_tokens_seen": 6108224, + "step": 9055 + }, + { + "epoch": 0.22133730730706275, + "grad_norm": 23.579755783081055, + "learning_rate": 8.852298822494747e-07, + "loss": 0.1659, + "num_input_tokens_seen": 6111488, + "step": 9060 + }, + { + "epoch": 0.22145945813890994, + "grad_norm": 32.75372314453125, + "learning_rate": 8.857184736404945e-07, + "loss": 0.1377, + "num_input_tokens_seen": 6114752, + "step": 9065 + }, + { + "epoch": 0.2215816089707571, + "grad_norm": 30.386550903320312, + "learning_rate": 8.862070650315141e-07, + "loss": 0.13, + "num_input_tokens_seen": 6117824, + "step": 9070 + }, + { + "epoch": 0.22170375980260426, + "grad_norm": 30.29648208618164, + "learning_rate": 8.866956564225337e-07, + "loss": 0.1427, + "num_input_tokens_seen": 6121216, + "step": 9075 + }, + { + "epoch": 0.22182591063445142, + "grad_norm": 44.06056213378906, + "learning_rate": 8.871842478135535e-07, + "loss": 0.2194, + "num_input_tokens_seen": 6124736, + "step": 9080 + }, + { + "epoch": 0.22194806146629859, + "grad_norm": 42.379302978515625, + "learning_rate": 8.876728392045731e-07, + "loss": 0.1909, + "num_input_tokens_seen": 6128192, + "step": 9085 + }, + { + "epoch": 0.22207021229814575, + "grad_norm": 2.4536609649658203, + "learning_rate": 8.881614305955929e-07, + "loss": 0.0626, + "num_input_tokens_seen": 6131456, + "step": 9090 + }, + { + "epoch": 0.2221923631299929, + "grad_norm": 23.942848205566406, + "learning_rate": 8.886500219866126e-07, + "loss": 0.1528, + "num_input_tokens_seen": 6135104, + "step": 9095 + }, + { + "epoch": 0.22231451396184007, + "grad_norm": 10.383399963378906, + "learning_rate": 8.891386133776323e-07, + "loss": 0.0779, + "num_input_tokens_seen": 6138560, + "step": 9100 + }, + { + "epoch": 0.22243666479368723, + "grad_norm": 17.084259033203125, + "learning_rate": 8.896272047686519e-07, + "loss": 0.168, + "num_input_tokens_seen": 6141760, + "step": 9105 + }, + { + "epoch": 0.22255881562553442, + "grad_norm": 34.821205139160156, + "learning_rate": 8.901157961596716e-07, + "loss": 0.1326, + "num_input_tokens_seen": 6145280, + "step": 9110 + }, + { + "epoch": 0.22268096645738159, + "grad_norm": 16.09421157836914, + "learning_rate": 8.906043875506913e-07, + "loss": 0.2119, + "num_input_tokens_seen": 6148608, + "step": 9115 + }, + { + "epoch": 0.22280311728922875, + "grad_norm": 11.02747631072998, + "learning_rate": 8.91092978941711e-07, + "loss": 0.1082, + "num_input_tokens_seen": 6152128, + "step": 9120 + }, + { + "epoch": 0.2229252681210759, + "grad_norm": 39.81501007080078, + "learning_rate": 8.915815703327308e-07, + "loss": 0.1069, + "num_input_tokens_seen": 6155456, + "step": 9125 + }, + { + "epoch": 0.22304741895292307, + "grad_norm": 18.303213119506836, + "learning_rate": 8.920701617237503e-07, + "loss": 0.0835, + "num_input_tokens_seen": 6159360, + "step": 9130 + }, + { + "epoch": 0.22316956978477023, + "grad_norm": 12.419539451599121, + "learning_rate": 8.9255875311477e-07, + "loss": 0.101, + "num_input_tokens_seen": 6162944, + "step": 9135 + }, + { + "epoch": 0.2232917206166174, + "grad_norm": 4.48032283782959, + "learning_rate": 8.930473445057898e-07, + "loss": 0.12, + "num_input_tokens_seen": 6165952, + "step": 9140 + }, + { + "epoch": 0.22341387144846456, + "grad_norm": 15.183457374572754, + "learning_rate": 8.935359358968094e-07, + "loss": 0.1024, + "num_input_tokens_seen": 6169600, + "step": 9145 + }, + { + "epoch": 0.22353602228031172, + "grad_norm": 16.72601318359375, + "learning_rate": 8.940245272878292e-07, + "loss": 0.0809, + "num_input_tokens_seen": 6172992, + "step": 9150 + }, + { + "epoch": 0.22365817311215888, + "grad_norm": 2.3916523456573486, + "learning_rate": 8.945131186788489e-07, + "loss": 0.0282, + "num_input_tokens_seen": 6176064, + "step": 9155 + }, + { + "epoch": 0.22378032394400607, + "grad_norm": 25.425764083862305, + "learning_rate": 8.950017100698685e-07, + "loss": 0.0747, + "num_input_tokens_seen": 6179648, + "step": 9160 + }, + { + "epoch": 0.22390247477585323, + "grad_norm": 0.3232182264328003, + "learning_rate": 8.954903014608882e-07, + "loss": 0.0868, + "num_input_tokens_seen": 6183360, + "step": 9165 + }, + { + "epoch": 0.2240246256077004, + "grad_norm": 21.200733184814453, + "learning_rate": 8.959788928519079e-07, + "loss": 0.1656, + "num_input_tokens_seen": 6186368, + "step": 9170 + }, + { + "epoch": 0.22414677643954756, + "grad_norm": 26.374645233154297, + "learning_rate": 8.964674842429276e-07, + "loss": 0.1054, + "num_input_tokens_seen": 6190208, + "step": 9175 + }, + { + "epoch": 0.22426892727139472, + "grad_norm": 2.9865565299987793, + "learning_rate": 8.969560756339473e-07, + "loss": 0.1436, + "num_input_tokens_seen": 6193728, + "step": 9180 + }, + { + "epoch": 0.22439107810324188, + "grad_norm": 33.91632080078125, + "learning_rate": 8.97444667024967e-07, + "loss": 0.1397, + "num_input_tokens_seen": 6196864, + "step": 9185 + }, + { + "epoch": 0.22451322893508904, + "grad_norm": 9.978410720825195, + "learning_rate": 8.979332584159866e-07, + "loss": 0.0629, + "num_input_tokens_seen": 6200384, + "step": 9190 + }, + { + "epoch": 0.2246353797669362, + "grad_norm": 21.483123779296875, + "learning_rate": 8.984218498070063e-07, + "loss": 0.1048, + "num_input_tokens_seen": 6204032, + "step": 9195 + }, + { + "epoch": 0.22475753059878337, + "grad_norm": 20.44054412841797, + "learning_rate": 8.989104411980261e-07, + "loss": 0.0767, + "num_input_tokens_seen": 6206848, + "step": 9200 + }, + { + "epoch": 0.22487968143063053, + "grad_norm": 1.512670874595642, + "learning_rate": 8.993990325890457e-07, + "loss": 0.083, + "num_input_tokens_seen": 6210496, + "step": 9205 + }, + { + "epoch": 0.22500183226247772, + "grad_norm": 14.777823448181152, + "learning_rate": 8.998876239800655e-07, + "loss": 0.1987, + "num_input_tokens_seen": 6213632, + "step": 9210 + }, + { + "epoch": 0.22512398309432488, + "grad_norm": 27.269006729125977, + "learning_rate": 9.003762153710851e-07, + "loss": 0.1313, + "num_input_tokens_seen": 6217088, + "step": 9215 + }, + { + "epoch": 0.22524613392617204, + "grad_norm": 9.171672821044922, + "learning_rate": 9.008648067621048e-07, + "loss": 0.0453, + "num_input_tokens_seen": 6220224, + "step": 9220 + }, + { + "epoch": 0.2253682847580192, + "grad_norm": 39.435176849365234, + "learning_rate": 9.013533981531245e-07, + "loss": 0.1343, + "num_input_tokens_seen": 6223808, + "step": 9225 + }, + { + "epoch": 0.22549043558986637, + "grad_norm": 32.36396789550781, + "learning_rate": 9.018419895441442e-07, + "loss": 0.1499, + "num_input_tokens_seen": 6227904, + "step": 9230 + }, + { + "epoch": 0.22561258642171353, + "grad_norm": 32.112342834472656, + "learning_rate": 9.023305809351639e-07, + "loss": 0.1896, + "num_input_tokens_seen": 6231232, + "step": 9235 + }, + { + "epoch": 0.2257347372535607, + "grad_norm": 15.149031639099121, + "learning_rate": 9.028191723261835e-07, + "loss": 0.1545, + "num_input_tokens_seen": 6234944, + "step": 9240 + }, + { + "epoch": 0.22585688808540785, + "grad_norm": 14.708928108215332, + "learning_rate": 9.033077637172033e-07, + "loss": 0.0667, + "num_input_tokens_seen": 6237696, + "step": 9245 + }, + { + "epoch": 0.22597903891725502, + "grad_norm": 17.376279830932617, + "learning_rate": 9.037963551082229e-07, + "loss": 0.0615, + "num_input_tokens_seen": 6240896, + "step": 9250 + }, + { + "epoch": 0.2261011897491022, + "grad_norm": 20.528581619262695, + "learning_rate": 9.042849464992427e-07, + "loss": 0.0874, + "num_input_tokens_seen": 6244160, + "step": 9255 + }, + { + "epoch": 0.22622334058094937, + "grad_norm": 35.168731689453125, + "learning_rate": 9.047735378902624e-07, + "loss": 0.1483, + "num_input_tokens_seen": 6247040, + "step": 9260 + }, + { + "epoch": 0.22634549141279653, + "grad_norm": 29.01551055908203, + "learning_rate": 9.05262129281282e-07, + "loss": 0.1543, + "num_input_tokens_seen": 6250752, + "step": 9265 + }, + { + "epoch": 0.2264676422446437, + "grad_norm": 19.882156372070312, + "learning_rate": 9.057507206723017e-07, + "loss": 0.1016, + "num_input_tokens_seen": 6253824, + "step": 9270 + }, + { + "epoch": 0.22658979307649085, + "grad_norm": 8.662846565246582, + "learning_rate": 9.062393120633214e-07, + "loss": 0.1943, + "num_input_tokens_seen": 6257344, + "step": 9275 + }, + { + "epoch": 0.22671194390833802, + "grad_norm": 53.7777099609375, + "learning_rate": 9.067279034543411e-07, + "loss": 0.0592, + "num_input_tokens_seen": 6260672, + "step": 9280 + }, + { + "epoch": 0.22683409474018518, + "grad_norm": 19.83289337158203, + "learning_rate": 9.072164948453608e-07, + "loss": 0.1493, + "num_input_tokens_seen": 6263872, + "step": 9285 + }, + { + "epoch": 0.22695624557203234, + "grad_norm": 8.226678848266602, + "learning_rate": 9.077050862363805e-07, + "loss": 0.1311, + "num_input_tokens_seen": 6266944, + "step": 9290 + }, + { + "epoch": 0.2270783964038795, + "grad_norm": 36.27229309082031, + "learning_rate": 9.081936776274001e-07, + "loss": 0.1872, + "num_input_tokens_seen": 6270144, + "step": 9295 + }, + { + "epoch": 0.22720054723572666, + "grad_norm": 39.83232498168945, + "learning_rate": 9.086822690184198e-07, + "loss": 0.1492, + "num_input_tokens_seen": 6273536, + "step": 9300 + }, + { + "epoch": 0.22732269806757385, + "grad_norm": 17.2782039642334, + "learning_rate": 9.091708604094396e-07, + "loss": 0.0562, + "num_input_tokens_seen": 6276608, + "step": 9305 + }, + { + "epoch": 0.22744484889942101, + "grad_norm": 4.376680850982666, + "learning_rate": 9.096594518004592e-07, + "loss": 0.2263, + "num_input_tokens_seen": 6280128, + "step": 9310 + }, + { + "epoch": 0.22756699973126818, + "grad_norm": 28.051551818847656, + "learning_rate": 9.10148043191479e-07, + "loss": 0.1597, + "num_input_tokens_seen": 6284032, + "step": 9315 + }, + { + "epoch": 0.22768915056311534, + "grad_norm": 19.270139694213867, + "learning_rate": 9.106366345824987e-07, + "loss": 0.1385, + "num_input_tokens_seen": 6287552, + "step": 9320 + }, + { + "epoch": 0.2278113013949625, + "grad_norm": 14.105006217956543, + "learning_rate": 9.111252259735182e-07, + "loss": 0.1459, + "num_input_tokens_seen": 6291200, + "step": 9325 + }, + { + "epoch": 0.22793345222680966, + "grad_norm": 49.273494720458984, + "learning_rate": 9.11613817364538e-07, + "loss": 0.1791, + "num_input_tokens_seen": 6294336, + "step": 9330 + }, + { + "epoch": 0.22805560305865683, + "grad_norm": 1.47030770778656, + "learning_rate": 9.121024087555577e-07, + "loss": 0.1285, + "num_input_tokens_seen": 6297856, + "step": 9335 + }, + { + "epoch": 0.228177753890504, + "grad_norm": 25.866708755493164, + "learning_rate": 9.125910001465774e-07, + "loss": 0.1282, + "num_input_tokens_seen": 6301440, + "step": 9340 + }, + { + "epoch": 0.22829990472235115, + "grad_norm": 13.393543243408203, + "learning_rate": 9.130795915375971e-07, + "loss": 0.119, + "num_input_tokens_seen": 6305024, + "step": 9345 + }, + { + "epoch": 0.2284220555541983, + "grad_norm": 19.900217056274414, + "learning_rate": 9.135681829286167e-07, + "loss": 0.0722, + "num_input_tokens_seen": 6308224, + "step": 9350 + }, + { + "epoch": 0.2285442063860455, + "grad_norm": 24.861238479614258, + "learning_rate": 9.140567743196364e-07, + "loss": 0.1011, + "num_input_tokens_seen": 6311808, + "step": 9355 + }, + { + "epoch": 0.22866635721789266, + "grad_norm": 30.723133087158203, + "learning_rate": 9.145453657106561e-07, + "loss": 0.1862, + "num_input_tokens_seen": 6315200, + "step": 9360 + }, + { + "epoch": 0.22878850804973982, + "grad_norm": 27.590089797973633, + "learning_rate": 9.150339571016759e-07, + "loss": 0.0647, + "num_input_tokens_seen": 6318592, + "step": 9365 + }, + { + "epoch": 0.228910658881587, + "grad_norm": 4.14084529876709, + "learning_rate": 9.155225484926955e-07, + "loss": 0.0721, + "num_input_tokens_seen": 6321600, + "step": 9370 + }, + { + "epoch": 0.22903280971343415, + "grad_norm": 11.999881744384766, + "learning_rate": 9.160111398837153e-07, + "loss": 0.064, + "num_input_tokens_seen": 6324608, + "step": 9375 + }, + { + "epoch": 0.2291549605452813, + "grad_norm": 20.66971206665039, + "learning_rate": 9.164997312747349e-07, + "loss": 0.1505, + "num_input_tokens_seen": 6327936, + "step": 9380 + }, + { + "epoch": 0.22927711137712847, + "grad_norm": 42.416725158691406, + "learning_rate": 9.169883226657545e-07, + "loss": 0.3507, + "num_input_tokens_seen": 6330752, + "step": 9385 + }, + { + "epoch": 0.22939926220897564, + "grad_norm": 27.1995849609375, + "learning_rate": 9.174769140567743e-07, + "loss": 0.1794, + "num_input_tokens_seen": 6334336, + "step": 9390 + }, + { + "epoch": 0.2295214130408228, + "grad_norm": 10.624309539794922, + "learning_rate": 9.17965505447794e-07, + "loss": 0.1112, + "num_input_tokens_seen": 6337728, + "step": 9395 + }, + { + "epoch": 0.22964356387266996, + "grad_norm": 19.080080032348633, + "learning_rate": 9.184540968388137e-07, + "loss": 0.0702, + "num_input_tokens_seen": 6340992, + "step": 9400 + }, + { + "epoch": 0.22976571470451715, + "grad_norm": 5.388887882232666, + "learning_rate": 9.189426882298333e-07, + "loss": 0.068, + "num_input_tokens_seen": 6344000, + "step": 9405 + }, + { + "epoch": 0.2298878655363643, + "grad_norm": 23.71079444885254, + "learning_rate": 9.19431279620853e-07, + "loss": 0.1371, + "num_input_tokens_seen": 6347200, + "step": 9410 + }, + { + "epoch": 0.23001001636821147, + "grad_norm": 7.927711009979248, + "learning_rate": 9.199198710118727e-07, + "loss": 0.1161, + "num_input_tokens_seen": 6350848, + "step": 9415 + }, + { + "epoch": 0.23013216720005863, + "grad_norm": 15.976344108581543, + "learning_rate": 9.204084624028924e-07, + "loss": 0.1124, + "num_input_tokens_seen": 6354496, + "step": 9420 + }, + { + "epoch": 0.2302543180319058, + "grad_norm": 9.561179161071777, + "learning_rate": 9.208970537939122e-07, + "loss": 0.054, + "num_input_tokens_seen": 6358016, + "step": 9425 + }, + { + "epoch": 0.23037646886375296, + "grad_norm": 48.035709381103516, + "learning_rate": 9.213856451849317e-07, + "loss": 0.1934, + "num_input_tokens_seen": 6361536, + "step": 9430 + }, + { + "epoch": 0.23049861969560012, + "grad_norm": 2.9448719024658203, + "learning_rate": 9.218742365759515e-07, + "loss": 0.1495, + "num_input_tokens_seen": 6364544, + "step": 9435 + }, + { + "epoch": 0.23062077052744728, + "grad_norm": 1.32425856590271, + "learning_rate": 9.223628279669712e-07, + "loss": 0.1165, + "num_input_tokens_seen": 6368832, + "step": 9440 + }, + { + "epoch": 0.23074292135929445, + "grad_norm": 20.775575637817383, + "learning_rate": 9.228514193579908e-07, + "loss": 0.0733, + "num_input_tokens_seen": 6372160, + "step": 9445 + }, + { + "epoch": 0.23086507219114163, + "grad_norm": 9.844050407409668, + "learning_rate": 9.233400107490106e-07, + "loss": 0.136, + "num_input_tokens_seen": 6375552, + "step": 9450 + }, + { + "epoch": 0.2309872230229888, + "grad_norm": 11.708232879638672, + "learning_rate": 9.238286021400303e-07, + "loss": 0.0873, + "num_input_tokens_seen": 6378944, + "step": 9455 + }, + { + "epoch": 0.23110937385483596, + "grad_norm": 27.45308494567871, + "learning_rate": 9.243171935310499e-07, + "loss": 0.1288, + "num_input_tokens_seen": 6381888, + "step": 9460 + }, + { + "epoch": 0.23123152468668312, + "grad_norm": 17.817472457885742, + "learning_rate": 9.248057849220696e-07, + "loss": 0.1454, + "num_input_tokens_seen": 6385088, + "step": 9465 + }, + { + "epoch": 0.23135367551853028, + "grad_norm": 57.0069580078125, + "learning_rate": 9.252943763130894e-07, + "loss": 0.0999, + "num_input_tokens_seen": 6388224, + "step": 9470 + }, + { + "epoch": 0.23147582635037744, + "grad_norm": 36.93852615356445, + "learning_rate": 9.25782967704109e-07, + "loss": 0.1227, + "num_input_tokens_seen": 6391552, + "step": 9475 + }, + { + "epoch": 0.2315979771822246, + "grad_norm": 0.25835201144218445, + "learning_rate": 9.262715590951287e-07, + "loss": 0.0858, + "num_input_tokens_seen": 6395200, + "step": 9480 + }, + { + "epoch": 0.23172012801407177, + "grad_norm": 43.729427337646484, + "learning_rate": 9.267601504861485e-07, + "loss": 0.0834, + "num_input_tokens_seen": 6398976, + "step": 9485 + }, + { + "epoch": 0.23184227884591893, + "grad_norm": 10.132993698120117, + "learning_rate": 9.27248741877168e-07, + "loss": 0.1258, + "num_input_tokens_seen": 6402304, + "step": 9490 + }, + { + "epoch": 0.2319644296777661, + "grad_norm": 17.87611198425293, + "learning_rate": 9.277373332681878e-07, + "loss": 0.1547, + "num_input_tokens_seen": 6405696, + "step": 9495 + }, + { + "epoch": 0.23208658050961328, + "grad_norm": 61.92757034301758, + "learning_rate": 9.282259246592075e-07, + "loss": 0.269, + "num_input_tokens_seen": 6409280, + "step": 9500 + }, + { + "epoch": 0.23220873134146044, + "grad_norm": 19.06719398498535, + "learning_rate": 9.287145160502271e-07, + "loss": 0.1537, + "num_input_tokens_seen": 6413504, + "step": 9505 + }, + { + "epoch": 0.2323308821733076, + "grad_norm": 75.03028869628906, + "learning_rate": 9.292031074412469e-07, + "loss": 0.0875, + "num_input_tokens_seen": 6417088, + "step": 9510 + }, + { + "epoch": 0.23245303300515477, + "grad_norm": 81.23187255859375, + "learning_rate": 9.296916988322665e-07, + "loss": 0.123, + "num_input_tokens_seen": 6420352, + "step": 9515 + }, + { + "epoch": 0.23257518383700193, + "grad_norm": 17.87434959411621, + "learning_rate": 9.301802902232862e-07, + "loss": 0.1393, + "num_input_tokens_seen": 6424192, + "step": 9520 + }, + { + "epoch": 0.2326973346688491, + "grad_norm": 40.12156677246094, + "learning_rate": 9.306688816143059e-07, + "loss": 0.1615, + "num_input_tokens_seen": 6427520, + "step": 9525 + }, + { + "epoch": 0.23281948550069625, + "grad_norm": 2.44599986076355, + "learning_rate": 9.311574730053257e-07, + "loss": 0.0897, + "num_input_tokens_seen": 6431104, + "step": 9530 + }, + { + "epoch": 0.23294163633254342, + "grad_norm": 11.04211711883545, + "learning_rate": 9.316460643963453e-07, + "loss": 0.1335, + "num_input_tokens_seen": 6434688, + "step": 9535 + }, + { + "epoch": 0.23306378716439058, + "grad_norm": 46.168495178222656, + "learning_rate": 9.32134655787365e-07, + "loss": 0.1245, + "num_input_tokens_seen": 6438080, + "step": 9540 + }, + { + "epoch": 0.23318593799623774, + "grad_norm": 14.355307579040527, + "learning_rate": 9.326232471783847e-07, + "loss": 0.0828, + "num_input_tokens_seen": 6441408, + "step": 9545 + }, + { + "epoch": 0.23330808882808493, + "grad_norm": 24.860210418701172, + "learning_rate": 9.331118385694043e-07, + "loss": 0.1374, + "num_input_tokens_seen": 6444992, + "step": 9550 + }, + { + "epoch": 0.2334302396599321, + "grad_norm": 32.60927200317383, + "learning_rate": 9.336004299604241e-07, + "loss": 0.1874, + "num_input_tokens_seen": 6448768, + "step": 9555 + }, + { + "epoch": 0.23355239049177925, + "grad_norm": 6.585495948791504, + "learning_rate": 9.340890213514438e-07, + "loss": 0.125, + "num_input_tokens_seen": 6451840, + "step": 9560 + }, + { + "epoch": 0.23367454132362642, + "grad_norm": 7.540535926818848, + "learning_rate": 9.345776127424634e-07, + "loss": 0.1218, + "num_input_tokens_seen": 6455296, + "step": 9565 + }, + { + "epoch": 0.23379669215547358, + "grad_norm": 26.24741554260254, + "learning_rate": 9.350662041334831e-07, + "loss": 0.1065, + "num_input_tokens_seen": 6458496, + "step": 9570 + }, + { + "epoch": 0.23391884298732074, + "grad_norm": 22.11855697631836, + "learning_rate": 9.355547955245028e-07, + "loss": 0.1746, + "num_input_tokens_seen": 6461760, + "step": 9575 + }, + { + "epoch": 0.2340409938191679, + "grad_norm": 6.021768093109131, + "learning_rate": 9.360433869155225e-07, + "loss": 0.0387, + "num_input_tokens_seen": 6465472, + "step": 9580 + }, + { + "epoch": 0.23416314465101506, + "grad_norm": 13.273813247680664, + "learning_rate": 9.365319783065422e-07, + "loss": 0.1766, + "num_input_tokens_seen": 6468544, + "step": 9585 + }, + { + "epoch": 0.23428529548286223, + "grad_norm": 1.5607789754867554, + "learning_rate": 9.37020569697562e-07, + "loss": 0.0384, + "num_input_tokens_seen": 6472128, + "step": 9590 + }, + { + "epoch": 0.23440744631470942, + "grad_norm": 28.771175384521484, + "learning_rate": 9.375091610885816e-07, + "loss": 0.307, + "num_input_tokens_seen": 6475392, + "step": 9595 + }, + { + "epoch": 0.23452959714655658, + "grad_norm": 31.59319496154785, + "learning_rate": 9.379977524796012e-07, + "loss": 0.159, + "num_input_tokens_seen": 6478720, + "step": 9600 + }, + { + "epoch": 0.23465174797840374, + "grad_norm": 37.83382034301758, + "learning_rate": 9.38486343870621e-07, + "loss": 0.2908, + "num_input_tokens_seen": 6482688, + "step": 9605 + }, + { + "epoch": 0.2347738988102509, + "grad_norm": 30.60443878173828, + "learning_rate": 9.389749352616406e-07, + "loss": 0.1376, + "num_input_tokens_seen": 6486016, + "step": 9610 + }, + { + "epoch": 0.23489604964209806, + "grad_norm": 11.49390983581543, + "learning_rate": 9.394635266526604e-07, + "loss": 0.0965, + "num_input_tokens_seen": 6489280, + "step": 9615 + }, + { + "epoch": 0.23501820047394523, + "grad_norm": 24.10028648376465, + "learning_rate": 9.399521180436801e-07, + "loss": 0.0924, + "num_input_tokens_seen": 6492416, + "step": 9620 + }, + { + "epoch": 0.2351403513057924, + "grad_norm": 39.35089874267578, + "learning_rate": 9.404407094346996e-07, + "loss": 0.1259, + "num_input_tokens_seen": 6495744, + "step": 9625 + }, + { + "epoch": 0.23526250213763955, + "grad_norm": 23.8758544921875, + "learning_rate": 9.409293008257194e-07, + "loss": 0.1663, + "num_input_tokens_seen": 6499136, + "step": 9630 + }, + { + "epoch": 0.2353846529694867, + "grad_norm": 6.008744716644287, + "learning_rate": 9.414178922167391e-07, + "loss": 0.0544, + "num_input_tokens_seen": 6502528, + "step": 9635 + }, + { + "epoch": 0.23550680380133387, + "grad_norm": 30.769689559936523, + "learning_rate": 9.419064836077588e-07, + "loss": 0.2107, + "num_input_tokens_seen": 6506112, + "step": 9640 + }, + { + "epoch": 0.23562895463318106, + "grad_norm": 21.367860794067383, + "learning_rate": 9.423950749987785e-07, + "loss": 0.0687, + "num_input_tokens_seen": 6509696, + "step": 9645 + }, + { + "epoch": 0.23575110546502823, + "grad_norm": 1.8322582244873047, + "learning_rate": 9.428836663897983e-07, + "loss": 0.0455, + "num_input_tokens_seen": 6513408, + "step": 9650 + }, + { + "epoch": 0.2358732562968754, + "grad_norm": 0.16293790936470032, + "learning_rate": 9.433722577808178e-07, + "loss": 0.0809, + "num_input_tokens_seen": 6516864, + "step": 9655 + }, + { + "epoch": 0.23599540712872255, + "grad_norm": 19.307575225830078, + "learning_rate": 9.438608491718375e-07, + "loss": 0.2114, + "num_input_tokens_seen": 6519872, + "step": 9660 + }, + { + "epoch": 0.2361175579605697, + "grad_norm": 1.3225882053375244, + "learning_rate": 9.443494405628573e-07, + "loss": 0.1159, + "num_input_tokens_seen": 6522944, + "step": 9665 + }, + { + "epoch": 0.23623970879241687, + "grad_norm": 67.67269897460938, + "learning_rate": 9.448380319538769e-07, + "loss": 0.1237, + "num_input_tokens_seen": 6526080, + "step": 9670 + }, + { + "epoch": 0.23636185962426404, + "grad_norm": 0.4603472948074341, + "learning_rate": 9.453266233448967e-07, + "loss": 0.0633, + "num_input_tokens_seen": 6529536, + "step": 9675 + }, + { + "epoch": 0.2364840104561112, + "grad_norm": 7.607626914978027, + "learning_rate": 9.458152147359163e-07, + "loss": 0.1407, + "num_input_tokens_seen": 6533056, + "step": 9680 + }, + { + "epoch": 0.23660616128795836, + "grad_norm": 46.98657989501953, + "learning_rate": 9.46303806126936e-07, + "loss": 0.1077, + "num_input_tokens_seen": 6536384, + "step": 9685 + }, + { + "epoch": 0.23672831211980552, + "grad_norm": 2.524136781692505, + "learning_rate": 9.467923975179557e-07, + "loss": 0.0448, + "num_input_tokens_seen": 6540160, + "step": 9690 + }, + { + "epoch": 0.2368504629516527, + "grad_norm": 37.658634185791016, + "learning_rate": 9.472809889089754e-07, + "loss": 0.239, + "num_input_tokens_seen": 6544000, + "step": 9695 + }, + { + "epoch": 0.23697261378349987, + "grad_norm": 2.243898630142212, + "learning_rate": 9.477695802999951e-07, + "loss": 0.2482, + "num_input_tokens_seen": 6547584, + "step": 9700 + }, + { + "epoch": 0.23709476461534704, + "grad_norm": 5.683414459228516, + "learning_rate": 9.482581716910148e-07, + "loss": 0.1092, + "num_input_tokens_seen": 6550656, + "step": 9705 + }, + { + "epoch": 0.2372169154471942, + "grad_norm": 4.624309539794922, + "learning_rate": 9.487467630820345e-07, + "loss": 0.0269, + "num_input_tokens_seen": 6553920, + "step": 9710 + }, + { + "epoch": 0.23733906627904136, + "grad_norm": 0.224063441157341, + "learning_rate": 9.492353544730541e-07, + "loss": 0.2107, + "num_input_tokens_seen": 6557312, + "step": 9715 + }, + { + "epoch": 0.23746121711088852, + "grad_norm": 1.4567283391952515, + "learning_rate": 9.497239458640738e-07, + "loss": 0.1803, + "num_input_tokens_seen": 6560512, + "step": 9720 + }, + { + "epoch": 0.23758336794273568, + "grad_norm": 12.508830070495605, + "learning_rate": 9.502125372550936e-07, + "loss": 0.0741, + "num_input_tokens_seen": 6564480, + "step": 9725 + }, + { + "epoch": 0.23770551877458285, + "grad_norm": 27.259662628173828, + "learning_rate": 9.507011286461132e-07, + "loss": 0.1461, + "num_input_tokens_seen": 6567808, + "step": 9730 + }, + { + "epoch": 0.23782766960643, + "grad_norm": 2.4258787631988525, + "learning_rate": 9.511897200371329e-07, + "loss": 0.0947, + "num_input_tokens_seen": 6571200, + "step": 9735 + }, + { + "epoch": 0.23794982043827717, + "grad_norm": 14.623574256896973, + "learning_rate": 9.516783114281526e-07, + "loss": 0.0608, + "num_input_tokens_seen": 6574912, + "step": 9740 + }, + { + "epoch": 0.23807197127012436, + "grad_norm": 32.38935852050781, + "learning_rate": 9.521669028191723e-07, + "loss": 0.1259, + "num_input_tokens_seen": 6578432, + "step": 9745 + }, + { + "epoch": 0.23819412210197152, + "grad_norm": 18.282604217529297, + "learning_rate": 9.52655494210192e-07, + "loss": 0.1251, + "num_input_tokens_seen": 6581760, + "step": 9750 + }, + { + "epoch": 0.23831627293381868, + "grad_norm": 52.684356689453125, + "learning_rate": 9.531440856012117e-07, + "loss": 0.0943, + "num_input_tokens_seen": 6585280, + "step": 9755 + }, + { + "epoch": 0.23843842376566585, + "grad_norm": 25.119375228881836, + "learning_rate": 9.536326769922314e-07, + "loss": 0.0738, + "num_input_tokens_seen": 6588416, + "step": 9760 + }, + { + "epoch": 0.238560574597513, + "grad_norm": 44.266971588134766, + "learning_rate": 9.54121268383251e-07, + "loss": 0.1411, + "num_input_tokens_seen": 6591808, + "step": 9765 + }, + { + "epoch": 0.23868272542936017, + "grad_norm": 61.14725112915039, + "learning_rate": 9.546098597742707e-07, + "loss": 0.2692, + "num_input_tokens_seen": 6595328, + "step": 9770 + }, + { + "epoch": 0.23880487626120733, + "grad_norm": 73.36861419677734, + "learning_rate": 9.550984511652904e-07, + "loss": 0.1692, + "num_input_tokens_seen": 6598464, + "step": 9775 + }, + { + "epoch": 0.2389270270930545, + "grad_norm": 50.22648239135742, + "learning_rate": 9.5558704255631e-07, + "loss": 0.1712, + "num_input_tokens_seen": 6601984, + "step": 9780 + }, + { + "epoch": 0.23904917792490166, + "grad_norm": 22.295808792114258, + "learning_rate": 9.560756339473298e-07, + "loss": 0.0982, + "num_input_tokens_seen": 6605184, + "step": 9785 + }, + { + "epoch": 0.23917132875674885, + "grad_norm": 16.143505096435547, + "learning_rate": 9.565642253383494e-07, + "loss": 0.3098, + "num_input_tokens_seen": 6608384, + "step": 9790 + }, + { + "epoch": 0.239293479588596, + "grad_norm": 2.035536766052246, + "learning_rate": 9.570528167293691e-07, + "loss": 0.0284, + "num_input_tokens_seen": 6611776, + "step": 9795 + }, + { + "epoch": 0.23941563042044317, + "grad_norm": 17.494781494140625, + "learning_rate": 9.575414081203888e-07, + "loss": 0.0463, + "num_input_tokens_seen": 6615168, + "step": 9800 + }, + { + "epoch": 0.23953778125229033, + "grad_norm": 23.606449127197266, + "learning_rate": 9.580299995114087e-07, + "loss": 0.2122, + "num_input_tokens_seen": 6618496, + "step": 9805 + }, + { + "epoch": 0.2396599320841375, + "grad_norm": 13.581524848937988, + "learning_rate": 9.585185909024282e-07, + "loss": 0.0728, + "num_input_tokens_seen": 6621888, + "step": 9810 + }, + { + "epoch": 0.23978208291598466, + "grad_norm": 25.172103881835938, + "learning_rate": 9.590071822934478e-07, + "loss": 0.1752, + "num_input_tokens_seen": 6625152, + "step": 9815 + }, + { + "epoch": 0.23990423374783182, + "grad_norm": 19.240901947021484, + "learning_rate": 9.594957736844677e-07, + "loss": 0.0904, + "num_input_tokens_seen": 6628736, + "step": 9820 + }, + { + "epoch": 0.24002638457967898, + "grad_norm": 14.154614448547363, + "learning_rate": 9.599843650754872e-07, + "loss": 0.1229, + "num_input_tokens_seen": 6632192, + "step": 9825 + }, + { + "epoch": 0.24014853541152614, + "grad_norm": 8.691808700561523, + "learning_rate": 9.60472956466507e-07, + "loss": 0.0957, + "num_input_tokens_seen": 6635648, + "step": 9830 + }, + { + "epoch": 0.2402706862433733, + "grad_norm": 70.14839172363281, + "learning_rate": 9.609615478575268e-07, + "loss": 0.178, + "num_input_tokens_seen": 6639104, + "step": 9835 + }, + { + "epoch": 0.2403928370752205, + "grad_norm": 13.37100887298584, + "learning_rate": 9.614501392485463e-07, + "loss": 0.2036, + "num_input_tokens_seen": 6642752, + "step": 9840 + }, + { + "epoch": 0.24051498790706766, + "grad_norm": 9.792305946350098, + "learning_rate": 9.619387306395661e-07, + "loss": 0.0657, + "num_input_tokens_seen": 6646208, + "step": 9845 + }, + { + "epoch": 0.24063713873891482, + "grad_norm": 34.969730377197266, + "learning_rate": 9.624273220305858e-07, + "loss": 0.122, + "num_input_tokens_seen": 6649600, + "step": 9850 + }, + { + "epoch": 0.24075928957076198, + "grad_norm": 23.51738929748535, + "learning_rate": 9.629159134216055e-07, + "loss": 0.1153, + "num_input_tokens_seen": 6653248, + "step": 9855 + }, + { + "epoch": 0.24088144040260914, + "grad_norm": 51.84558868408203, + "learning_rate": 9.634045048126252e-07, + "loss": 0.0749, + "num_input_tokens_seen": 6656512, + "step": 9860 + }, + { + "epoch": 0.2410035912344563, + "grad_norm": 25.279094696044922, + "learning_rate": 9.638930962036449e-07, + "loss": 0.1768, + "num_input_tokens_seen": 6660160, + "step": 9865 + }, + { + "epoch": 0.24112574206630347, + "grad_norm": 10.345446586608887, + "learning_rate": 9.643816875946646e-07, + "loss": 0.185, + "num_input_tokens_seen": 6663168, + "step": 9870 + }, + { + "epoch": 0.24124789289815063, + "grad_norm": 51.44864273071289, + "learning_rate": 9.648702789856842e-07, + "loss": 0.1417, + "num_input_tokens_seen": 6666496, + "step": 9875 + }, + { + "epoch": 0.2413700437299978, + "grad_norm": 39.88957595825195, + "learning_rate": 9.65358870376704e-07, + "loss": 0.1139, + "num_input_tokens_seen": 6669504, + "step": 9880 + }, + { + "epoch": 0.24149219456184495, + "grad_norm": 2.5328776836395264, + "learning_rate": 9.658474617677236e-07, + "loss": 0.1812, + "num_input_tokens_seen": 6673152, + "step": 9885 + }, + { + "epoch": 0.24161434539369214, + "grad_norm": 22.786235809326172, + "learning_rate": 9.663360531587433e-07, + "loss": 0.062, + "num_input_tokens_seen": 6676608, + "step": 9890 + }, + { + "epoch": 0.2417364962255393, + "grad_norm": 24.119340896606445, + "learning_rate": 9.66824644549763e-07, + "loss": 0.0548, + "num_input_tokens_seen": 6679488, + "step": 9895 + }, + { + "epoch": 0.24185864705738647, + "grad_norm": 40.44359588623047, + "learning_rate": 9.673132359407826e-07, + "loss": 0.2652, + "num_input_tokens_seen": 6682624, + "step": 9900 + }, + { + "epoch": 0.24198079788923363, + "grad_norm": 2.0219240188598633, + "learning_rate": 9.678018273318023e-07, + "loss": 0.0769, + "num_input_tokens_seen": 6685952, + "step": 9905 + }, + { + "epoch": 0.2421029487210808, + "grad_norm": 21.13841438293457, + "learning_rate": 9.68290418722822e-07, + "loss": 0.1698, + "num_input_tokens_seen": 6689920, + "step": 9910 + }, + { + "epoch": 0.24222509955292795, + "grad_norm": 14.74864387512207, + "learning_rate": 9.687790101138417e-07, + "loss": 0.146, + "num_input_tokens_seen": 6693184, + "step": 9915 + }, + { + "epoch": 0.24234725038477511, + "grad_norm": 2.7721073627471924, + "learning_rate": 9.692676015048614e-07, + "loss": 0.0271, + "num_input_tokens_seen": 6696320, + "step": 9920 + }, + { + "epoch": 0.24246940121662228, + "grad_norm": 33.85337448120117, + "learning_rate": 9.697561928958813e-07, + "loss": 0.0984, + "num_input_tokens_seen": 6699136, + "step": 9925 + }, + { + "epoch": 0.24259155204846944, + "grad_norm": 49.046958923339844, + "learning_rate": 9.702447842869007e-07, + "loss": 0.113, + "num_input_tokens_seen": 6703360, + "step": 9930 + }, + { + "epoch": 0.24271370288031663, + "grad_norm": 9.85345458984375, + "learning_rate": 9.707333756779204e-07, + "loss": 0.1492, + "num_input_tokens_seen": 6706752, + "step": 9935 + }, + { + "epoch": 0.2428358537121638, + "grad_norm": 17.741451263427734, + "learning_rate": 9.712219670689403e-07, + "loss": 0.1364, + "num_input_tokens_seen": 6710144, + "step": 9940 + }, + { + "epoch": 0.24295800454401095, + "grad_norm": 5.377196788787842, + "learning_rate": 9.717105584599598e-07, + "loss": 0.2191, + "num_input_tokens_seen": 6713088, + "step": 9945 + }, + { + "epoch": 0.24308015537585811, + "grad_norm": 43.747596740722656, + "learning_rate": 9.721991498509797e-07, + "loss": 0.1625, + "num_input_tokens_seen": 6716352, + "step": 9950 + }, + { + "epoch": 0.24320230620770528, + "grad_norm": 29.562347412109375, + "learning_rate": 9.726877412419993e-07, + "loss": 0.1616, + "num_input_tokens_seen": 6719488, + "step": 9955 + }, + { + "epoch": 0.24332445703955244, + "grad_norm": 11.152497291564941, + "learning_rate": 9.73176332633019e-07, + "loss": 0.0587, + "num_input_tokens_seen": 6722880, + "step": 9960 + }, + { + "epoch": 0.2434466078713996, + "grad_norm": 11.652654647827148, + "learning_rate": 9.736649240240387e-07, + "loss": 0.1104, + "num_input_tokens_seen": 6726784, + "step": 9965 + }, + { + "epoch": 0.24356875870324676, + "grad_norm": 15.605438232421875, + "learning_rate": 9.741535154150584e-07, + "loss": 0.076, + "num_input_tokens_seen": 6730560, + "step": 9970 + }, + { + "epoch": 0.24369090953509392, + "grad_norm": 47.092041015625, + "learning_rate": 9.74642106806078e-07, + "loss": 0.0844, + "num_input_tokens_seen": 6734144, + "step": 9975 + }, + { + "epoch": 0.2438130603669411, + "grad_norm": 2.1623594760894775, + "learning_rate": 9.751306981970978e-07, + "loss": 0.0711, + "num_input_tokens_seen": 6737536, + "step": 9980 + }, + { + "epoch": 0.24393521119878828, + "grad_norm": 0.6756454706192017, + "learning_rate": 9.756192895881174e-07, + "loss": 0.0827, + "num_input_tokens_seen": 6740864, + "step": 9985 + }, + { + "epoch": 0.24405736203063544, + "grad_norm": 36.02912139892578, + "learning_rate": 9.761078809791371e-07, + "loss": 0.119, + "num_input_tokens_seen": 6744256, + "step": 9990 + }, + { + "epoch": 0.2441795128624826, + "grad_norm": 62.33737564086914, + "learning_rate": 9.765964723701568e-07, + "loss": 0.1792, + "num_input_tokens_seen": 6747904, + "step": 9995 + }, + { + "epoch": 0.24430166369432976, + "grad_norm": 39.74384307861328, + "learning_rate": 9.770850637611765e-07, + "loss": 0.1775, + "num_input_tokens_seen": 6751232, + "step": 10000 + }, + { + "epoch": 0.24442381452617692, + "grad_norm": 37.42814636230469, + "learning_rate": 9.775736551521962e-07, + "loss": 0.2521, + "num_input_tokens_seen": 6754368, + "step": 10005 + }, + { + "epoch": 0.2445459653580241, + "grad_norm": 30.444894790649414, + "learning_rate": 9.780622465432158e-07, + "loss": 0.0733, + "num_input_tokens_seen": 6757632, + "step": 10010 + }, + { + "epoch": 0.24466811618987125, + "grad_norm": 25.95069122314453, + "learning_rate": 9.785508379342355e-07, + "loss": 0.2123, + "num_input_tokens_seen": 6761024, + "step": 10015 + }, + { + "epoch": 0.2447902670217184, + "grad_norm": 33.53807067871094, + "learning_rate": 9.790394293252552e-07, + "loss": 0.1199, + "num_input_tokens_seen": 6764480, + "step": 10020 + }, + { + "epoch": 0.24491241785356557, + "grad_norm": 16.67969512939453, + "learning_rate": 9.795280207162749e-07, + "loss": 0.0975, + "num_input_tokens_seen": 6767680, + "step": 10025 + }, + { + "epoch": 0.24503456868541273, + "grad_norm": 29.962873458862305, + "learning_rate": 9.800166121072946e-07, + "loss": 0.1569, + "num_input_tokens_seen": 6771008, + "step": 10030 + }, + { + "epoch": 0.24515671951725992, + "grad_norm": 1.9181692600250244, + "learning_rate": 9.805052034983142e-07, + "loss": 0.1284, + "num_input_tokens_seen": 6774080, + "step": 10035 + }, + { + "epoch": 0.2452788703491071, + "grad_norm": 6.126092433929443, + "learning_rate": 9.80993794889334e-07, + "loss": 0.0622, + "num_input_tokens_seen": 6777472, + "step": 10040 + }, + { + "epoch": 0.24540102118095425, + "grad_norm": 40.68521499633789, + "learning_rate": 9.814823862803538e-07, + "loss": 0.087, + "num_input_tokens_seen": 6780736, + "step": 10045 + }, + { + "epoch": 0.2455231720128014, + "grad_norm": 6.570443153381348, + "learning_rate": 9.819709776713733e-07, + "loss": 0.192, + "num_input_tokens_seen": 6783872, + "step": 10050 + }, + { + "epoch": 0.24564532284464857, + "grad_norm": 30.137697219848633, + "learning_rate": 9.82459569062393e-07, + "loss": 0.1749, + "num_input_tokens_seen": 6787328, + "step": 10055 + }, + { + "epoch": 0.24576747367649573, + "grad_norm": 33.942325592041016, + "learning_rate": 9.829481604534129e-07, + "loss": 0.1122, + "num_input_tokens_seen": 6791296, + "step": 10060 + }, + { + "epoch": 0.2458896245083429, + "grad_norm": 16.92814064025879, + "learning_rate": 9.834367518444323e-07, + "loss": 0.0593, + "num_input_tokens_seen": 6794624, + "step": 10065 + }, + { + "epoch": 0.24601177534019006, + "grad_norm": 22.280366897583008, + "learning_rate": 9.839253432354522e-07, + "loss": 0.092, + "num_input_tokens_seen": 6797504, + "step": 10070 + }, + { + "epoch": 0.24613392617203722, + "grad_norm": 29.49980926513672, + "learning_rate": 9.84413934626472e-07, + "loss": 0.0173, + "num_input_tokens_seen": 6800960, + "step": 10075 + }, + { + "epoch": 0.24625607700388438, + "grad_norm": 6.354789733886719, + "learning_rate": 9.849025260174916e-07, + "loss": 0.0718, + "num_input_tokens_seen": 6804416, + "step": 10080 + }, + { + "epoch": 0.24637822783573157, + "grad_norm": 14.301334381103516, + "learning_rate": 9.853911174085113e-07, + "loss": 0.0776, + "num_input_tokens_seen": 6808128, + "step": 10085 + }, + { + "epoch": 0.24650037866757873, + "grad_norm": 2.1689467430114746, + "learning_rate": 9.85879708799531e-07, + "loss": 0.0546, + "num_input_tokens_seen": 6811584, + "step": 10090 + }, + { + "epoch": 0.2466225294994259, + "grad_norm": 33.23162078857422, + "learning_rate": 9.863683001905506e-07, + "loss": 0.1041, + "num_input_tokens_seen": 6815232, + "step": 10095 + }, + { + "epoch": 0.24674468033127306, + "grad_norm": 25.61594581604004, + "learning_rate": 9.868568915815703e-07, + "loss": 0.1665, + "num_input_tokens_seen": 6818816, + "step": 10100 + }, + { + "epoch": 0.24686683116312022, + "grad_norm": 13.704983711242676, + "learning_rate": 9.8734548297259e-07, + "loss": 0.2181, + "num_input_tokens_seen": 6822080, + "step": 10105 + }, + { + "epoch": 0.24698898199496738, + "grad_norm": 28.8029727935791, + "learning_rate": 9.878340743636097e-07, + "loss": 0.1648, + "num_input_tokens_seen": 6825216, + "step": 10110 + }, + { + "epoch": 0.24711113282681454, + "grad_norm": 6.627451419830322, + "learning_rate": 9.883226657546294e-07, + "loss": 0.2491, + "num_input_tokens_seen": 6828608, + "step": 10115 + }, + { + "epoch": 0.2472332836586617, + "grad_norm": 32.42625427246094, + "learning_rate": 9.88811257145649e-07, + "loss": 0.281, + "num_input_tokens_seen": 6831872, + "step": 10120 + }, + { + "epoch": 0.24735543449050887, + "grad_norm": 5.586592674255371, + "learning_rate": 9.892998485366687e-07, + "loss": 0.0527, + "num_input_tokens_seen": 6835456, + "step": 10125 + }, + { + "epoch": 0.24747758532235606, + "grad_norm": 15.273069381713867, + "learning_rate": 9.897884399276884e-07, + "loss": 0.1532, + "num_input_tokens_seen": 6839616, + "step": 10130 + }, + { + "epoch": 0.24759973615420322, + "grad_norm": 29.001066207885742, + "learning_rate": 9.90277031318708e-07, + "loss": 0.1079, + "num_input_tokens_seen": 6842880, + "step": 10135 + }, + { + "epoch": 0.24772188698605038, + "grad_norm": 0.7507160902023315, + "learning_rate": 9.907656227097278e-07, + "loss": 0.075, + "num_input_tokens_seen": 6846400, + "step": 10140 + }, + { + "epoch": 0.24784403781789754, + "grad_norm": 19.052486419677734, + "learning_rate": 9.912542141007474e-07, + "loss": 0.1472, + "num_input_tokens_seen": 6849728, + "step": 10145 + }, + { + "epoch": 0.2479661886497447, + "grad_norm": 42.05897903442383, + "learning_rate": 9.917428054917671e-07, + "loss": 0.1916, + "num_input_tokens_seen": 6852544, + "step": 10150 + }, + { + "epoch": 0.24808833948159187, + "grad_norm": 11.443634986877441, + "learning_rate": 9.922313968827868e-07, + "loss": 0.1914, + "num_input_tokens_seen": 6855936, + "step": 10155 + }, + { + "epoch": 0.24821049031343903, + "grad_norm": 24.85001564025879, + "learning_rate": 9.927199882738065e-07, + "loss": 0.1312, + "num_input_tokens_seen": 6859584, + "step": 10160 + }, + { + "epoch": 0.2483326411452862, + "grad_norm": 15.051764488220215, + "learning_rate": 9.932085796648264e-07, + "loss": 0.2342, + "num_input_tokens_seen": 6862720, + "step": 10165 + }, + { + "epoch": 0.24845479197713335, + "grad_norm": 15.476161003112793, + "learning_rate": 9.936971710558459e-07, + "loss": 0.0421, + "num_input_tokens_seen": 6866368, + "step": 10170 + }, + { + "epoch": 0.24857694280898052, + "grad_norm": 19.41805648803711, + "learning_rate": 9.941857624468657e-07, + "loss": 0.0989, + "num_input_tokens_seen": 6869568, + "step": 10175 + }, + { + "epoch": 0.2486990936408277, + "grad_norm": 37.16714096069336, + "learning_rate": 9.946743538378854e-07, + "loss": 0.0829, + "num_input_tokens_seen": 6872832, + "step": 10180 + }, + { + "epoch": 0.24882124447267487, + "grad_norm": 23.69383430480957, + "learning_rate": 9.95162945228905e-07, + "loss": 0.1695, + "num_input_tokens_seen": 6876672, + "step": 10185 + }, + { + "epoch": 0.24894339530452203, + "grad_norm": 32.78408432006836, + "learning_rate": 9.956515366199248e-07, + "loss": 0.1031, + "num_input_tokens_seen": 6880000, + "step": 10190 + }, + { + "epoch": 0.2490655461363692, + "grad_norm": 28.071138381958008, + "learning_rate": 9.961401280109445e-07, + "loss": 0.0678, + "num_input_tokens_seen": 6883520, + "step": 10195 + }, + { + "epoch": 0.24918769696821635, + "grad_norm": 14.612953186035156, + "learning_rate": 9.966287194019642e-07, + "loss": 0.107, + "num_input_tokens_seen": 6887168, + "step": 10200 + }, + { + "epoch": 0.24930984780006352, + "grad_norm": 29.333614349365234, + "learning_rate": 9.971173107929838e-07, + "loss": 0.0546, + "num_input_tokens_seen": 6890944, + "step": 10205 + }, + { + "epoch": 0.24943199863191068, + "grad_norm": 11.473103523254395, + "learning_rate": 9.976059021840035e-07, + "loss": 0.1747, + "num_input_tokens_seen": 6894080, + "step": 10210 + }, + { + "epoch": 0.24955414946375784, + "grad_norm": 33.84881591796875, + "learning_rate": 9.980944935750232e-07, + "loss": 0.1655, + "num_input_tokens_seen": 6897408, + "step": 10215 + }, + { + "epoch": 0.249676300295605, + "grad_norm": 2.882136821746826, + "learning_rate": 9.985830849660429e-07, + "loss": 0.1251, + "num_input_tokens_seen": 6900608, + "step": 10220 + }, + { + "epoch": 0.24979845112745216, + "grad_norm": 18.558082580566406, + "learning_rate": 9.990716763570626e-07, + "loss": 0.1599, + "num_input_tokens_seen": 6904192, + "step": 10225 + }, + { + "epoch": 0.24992060195929935, + "grad_norm": 20.312400817871094, + "learning_rate": 9.995602677480822e-07, + "loss": 0.1388, + "num_input_tokens_seen": 6908032, + "step": 10230 + }, + { + "epoch": 0.2500183226247771, + "eval_loss": 0.150357186794281, + "eval_runtime": 47.7693, + "eval_samples_per_second": 761.681, + "eval_steps_per_second": 95.228, + "num_input_tokens_seen": 6910656, + "step": 10234 + }, + { + "epoch": 0.2500427527911465, + "grad_norm": 30.07940101623535, + "learning_rate": 1.000048859139102e-06, + "loss": 0.2217, + "num_input_tokens_seen": 6911360, + "step": 10235 + }, + { + "epoch": 0.25016490362299365, + "grad_norm": 11.564278602600098, + "learning_rate": 1.0005374505301216e-06, + "loss": 0.1544, + "num_input_tokens_seen": 6914624, + "step": 10240 + }, + { + "epoch": 0.25028705445484084, + "grad_norm": 30.50282859802246, + "learning_rate": 1.0010260419211413e-06, + "loss": 0.1194, + "num_input_tokens_seen": 6918144, + "step": 10245 + }, + { + "epoch": 0.250409205286688, + "grad_norm": 29.134689331054688, + "learning_rate": 1.001514633312161e-06, + "loss": 0.0953, + "num_input_tokens_seen": 6921408, + "step": 10250 + }, + { + "epoch": 0.25053135611853516, + "grad_norm": 7.919709205627441, + "learning_rate": 1.0020032247031806e-06, + "loss": 0.0691, + "num_input_tokens_seen": 6925248, + "step": 10255 + }, + { + "epoch": 0.25065350695038235, + "grad_norm": 59.279876708984375, + "learning_rate": 1.0024918160942003e-06, + "loss": 0.1579, + "num_input_tokens_seen": 6928512, + "step": 10260 + }, + { + "epoch": 0.2507756577822295, + "grad_norm": 32.2935676574707, + "learning_rate": 1.0029804074852202e-06, + "loss": 0.1694, + "num_input_tokens_seen": 6931904, + "step": 10265 + }, + { + "epoch": 0.2508978086140767, + "grad_norm": 21.012094497680664, + "learning_rate": 1.0034689988762397e-06, + "loss": 0.1749, + "num_input_tokens_seen": 6935552, + "step": 10270 + }, + { + "epoch": 0.2510199594459238, + "grad_norm": 37.00103759765625, + "learning_rate": 1.0039575902672594e-06, + "loss": 0.1575, + "num_input_tokens_seen": 6939200, + "step": 10275 + }, + { + "epoch": 0.251142110277771, + "grad_norm": 34.77095413208008, + "learning_rate": 1.0044461816582793e-06, + "loss": 0.1271, + "num_input_tokens_seen": 6942400, + "step": 10280 + }, + { + "epoch": 0.25126426110961814, + "grad_norm": 4.355811595916748, + "learning_rate": 1.0049347730492987e-06, + "loss": 0.1603, + "num_input_tokens_seen": 6945344, + "step": 10285 + }, + { + "epoch": 0.2513864119414653, + "grad_norm": 24.556846618652344, + "learning_rate": 1.0054233644403184e-06, + "loss": 0.1221, + "num_input_tokens_seen": 6948672, + "step": 10290 + }, + { + "epoch": 0.25150856277331246, + "grad_norm": 12.190361976623535, + "learning_rate": 1.0059119558313383e-06, + "loss": 0.1968, + "num_input_tokens_seen": 6952192, + "step": 10295 + }, + { + "epoch": 0.25163071360515965, + "grad_norm": 5.837921142578125, + "learning_rate": 1.0064005472223578e-06, + "loss": 0.0807, + "num_input_tokens_seen": 6955328, + "step": 10300 + }, + { + "epoch": 0.25175286443700684, + "grad_norm": 2.7097465991973877, + "learning_rate": 1.0068891386133775e-06, + "loss": 0.166, + "num_input_tokens_seen": 6958336, + "step": 10305 + }, + { + "epoch": 0.251875015268854, + "grad_norm": 19.389720916748047, + "learning_rate": 1.0073777300043974e-06, + "loss": 0.1161, + "num_input_tokens_seen": 6961920, + "step": 10310 + }, + { + "epoch": 0.25199716610070116, + "grad_norm": 7.943970680236816, + "learning_rate": 1.007866321395417e-06, + "loss": 0.1002, + "num_input_tokens_seen": 6965184, + "step": 10315 + }, + { + "epoch": 0.2521193169325483, + "grad_norm": 10.147026062011719, + "learning_rate": 1.0083549127864365e-06, + "loss": 0.1075, + "num_input_tokens_seen": 6968896, + "step": 10320 + }, + { + "epoch": 0.2522414677643955, + "grad_norm": 19.060894012451172, + "learning_rate": 1.0088435041774564e-06, + "loss": 0.1228, + "num_input_tokens_seen": 6972352, + "step": 10325 + }, + { + "epoch": 0.2523636185962426, + "grad_norm": 24.950483322143555, + "learning_rate": 1.009332095568476e-06, + "loss": 0.0779, + "num_input_tokens_seen": 6975680, + "step": 10330 + }, + { + "epoch": 0.2524857694280898, + "grad_norm": 74.65290069580078, + "learning_rate": 1.0098206869594955e-06, + "loss": 0.1664, + "num_input_tokens_seen": 6978816, + "step": 10335 + }, + { + "epoch": 0.25260792025993695, + "grad_norm": 23.90727996826172, + "learning_rate": 1.0103092783505154e-06, + "loss": 0.1542, + "num_input_tokens_seen": 6982464, + "step": 10340 + }, + { + "epoch": 0.25273007109178414, + "grad_norm": 36.87062454223633, + "learning_rate": 1.0107978697415351e-06, + "loss": 0.1632, + "num_input_tokens_seen": 6985664, + "step": 10345 + }, + { + "epoch": 0.2528522219236313, + "grad_norm": 23.86003303527832, + "learning_rate": 1.0112864611325548e-06, + "loss": 0.0819, + "num_input_tokens_seen": 6988608, + "step": 10350 + }, + { + "epoch": 0.25297437275547846, + "grad_norm": 4.453189373016357, + "learning_rate": 1.0117750525235745e-06, + "loss": 0.0703, + "num_input_tokens_seen": 6992064, + "step": 10355 + }, + { + "epoch": 0.25309652358732565, + "grad_norm": 23.15815544128418, + "learning_rate": 1.0122636439145942e-06, + "loss": 0.1059, + "num_input_tokens_seen": 6995456, + "step": 10360 + }, + { + "epoch": 0.2532186744191728, + "grad_norm": 29.278385162353516, + "learning_rate": 1.0127522353056138e-06, + "loss": 0.0914, + "num_input_tokens_seen": 6999488, + "step": 10365 + }, + { + "epoch": 0.25334082525102, + "grad_norm": 23.133424758911133, + "learning_rate": 1.0132408266966335e-06, + "loss": 0.1598, + "num_input_tokens_seen": 7002496, + "step": 10370 + }, + { + "epoch": 0.2534629760828671, + "grad_norm": 16.84296989440918, + "learning_rate": 1.0137294180876532e-06, + "loss": 0.2147, + "num_input_tokens_seen": 7005696, + "step": 10375 + }, + { + "epoch": 0.2535851269147143, + "grad_norm": 26.553749084472656, + "learning_rate": 1.0142180094786729e-06, + "loss": 0.2037, + "num_input_tokens_seen": 7009216, + "step": 10380 + }, + { + "epoch": 0.25370727774656143, + "grad_norm": 0.6738439798355103, + "learning_rate": 1.0147066008696928e-06, + "loss": 0.0145, + "num_input_tokens_seen": 7012544, + "step": 10385 + }, + { + "epoch": 0.2538294285784086, + "grad_norm": 1.1998552083969116, + "learning_rate": 1.0151951922607123e-06, + "loss": 0.1001, + "num_input_tokens_seen": 7015936, + "step": 10390 + }, + { + "epoch": 0.25395157941025576, + "grad_norm": 3.0031771659851074, + "learning_rate": 1.015683783651732e-06, + "loss": 0.104, + "num_input_tokens_seen": 7019328, + "step": 10395 + }, + { + "epoch": 0.25407373024210295, + "grad_norm": 7.232554912567139, + "learning_rate": 1.0161723750427518e-06, + "loss": 0.1893, + "num_input_tokens_seen": 7022784, + "step": 10400 + }, + { + "epoch": 0.25419588107395014, + "grad_norm": 2.9085607528686523, + "learning_rate": 1.0166609664337713e-06, + "loss": 0.0968, + "num_input_tokens_seen": 7025856, + "step": 10405 + }, + { + "epoch": 0.25431803190579727, + "grad_norm": 38.883914947509766, + "learning_rate": 1.017149557824791e-06, + "loss": 0.1591, + "num_input_tokens_seen": 7029376, + "step": 10410 + }, + { + "epoch": 0.25444018273764446, + "grad_norm": 2.6737284660339355, + "learning_rate": 1.0176381492158109e-06, + "loss": 0.1463, + "num_input_tokens_seen": 7032832, + "step": 10415 + }, + { + "epoch": 0.2545623335694916, + "grad_norm": 2.1448378562927246, + "learning_rate": 1.0181267406068306e-06, + "loss": 0.0194, + "num_input_tokens_seen": 7036096, + "step": 10420 + }, + { + "epoch": 0.2546844844013388, + "grad_norm": 18.835430145263672, + "learning_rate": 1.01861533199785e-06, + "loss": 0.1483, + "num_input_tokens_seen": 7039360, + "step": 10425 + }, + { + "epoch": 0.2548066352331859, + "grad_norm": 27.005678176879883, + "learning_rate": 1.01910392338887e-06, + "loss": 0.1019, + "num_input_tokens_seen": 7042496, + "step": 10430 + }, + { + "epoch": 0.2549287860650331, + "grad_norm": 41.107025146484375, + "learning_rate": 1.0195925147798896e-06, + "loss": 0.3398, + "num_input_tokens_seen": 7045632, + "step": 10435 + }, + { + "epoch": 0.25505093689688024, + "grad_norm": 10.689356803894043, + "learning_rate": 1.020081106170909e-06, + "loss": 0.1183, + "num_input_tokens_seen": 7049024, + "step": 10440 + }, + { + "epoch": 0.25517308772872743, + "grad_norm": 36.1721305847168, + "learning_rate": 1.020569697561929e-06, + "loss": 0.0926, + "num_input_tokens_seen": 7053056, + "step": 10445 + }, + { + "epoch": 0.2552952385605746, + "grad_norm": 27.044384002685547, + "learning_rate": 1.0210582889529486e-06, + "loss": 0.1251, + "num_input_tokens_seen": 7056128, + "step": 10450 + }, + { + "epoch": 0.25541738939242176, + "grad_norm": 1.3893952369689941, + "learning_rate": 1.0215468803439681e-06, + "loss": 0.0557, + "num_input_tokens_seen": 7059584, + "step": 10455 + }, + { + "epoch": 0.25553954022426895, + "grad_norm": 1.2650136947631836, + "learning_rate": 1.022035471734988e-06, + "loss": 0.0973, + "num_input_tokens_seen": 7063104, + "step": 10460 + }, + { + "epoch": 0.2556616910561161, + "grad_norm": 8.588451385498047, + "learning_rate": 1.0225240631260077e-06, + "loss": 0.1217, + "num_input_tokens_seen": 7066368, + "step": 10465 + }, + { + "epoch": 0.25578384188796327, + "grad_norm": 60.455047607421875, + "learning_rate": 1.0230126545170274e-06, + "loss": 0.1888, + "num_input_tokens_seen": 7069504, + "step": 10470 + }, + { + "epoch": 0.2559059927198104, + "grad_norm": 22.16314125061035, + "learning_rate": 1.023501245908047e-06, + "loss": 0.1105, + "num_input_tokens_seen": 7073152, + "step": 10475 + }, + { + "epoch": 0.2560281435516576, + "grad_norm": 36.67524337768555, + "learning_rate": 1.0239898372990667e-06, + "loss": 0.1404, + "num_input_tokens_seen": 7076608, + "step": 10480 + }, + { + "epoch": 0.25615029438350473, + "grad_norm": 0.3097395896911621, + "learning_rate": 1.0244784286900864e-06, + "loss": 0.1086, + "num_input_tokens_seen": 7080000, + "step": 10485 + }, + { + "epoch": 0.2562724452153519, + "grad_norm": 49.033294677734375, + "learning_rate": 1.024967020081106e-06, + "loss": 0.1311, + "num_input_tokens_seen": 7083264, + "step": 10490 + }, + { + "epoch": 0.2563945960471991, + "grad_norm": 21.20745086669922, + "learning_rate": 1.0254556114721258e-06, + "loss": 0.1045, + "num_input_tokens_seen": 7086528, + "step": 10495 + }, + { + "epoch": 0.25651674687904624, + "grad_norm": 8.799860954284668, + "learning_rate": 1.0259442028631455e-06, + "loss": 0.2151, + "num_input_tokens_seen": 7089600, + "step": 10500 + }, + { + "epoch": 0.25663889771089343, + "grad_norm": 21.061410903930664, + "learning_rate": 1.0264327942541653e-06, + "loss": 0.19, + "num_input_tokens_seen": 7092672, + "step": 10505 + }, + { + "epoch": 0.25676104854274057, + "grad_norm": 0.9606673717498779, + "learning_rate": 1.0269213856451848e-06, + "loss": 0.1237, + "num_input_tokens_seen": 7096128, + "step": 10510 + }, + { + "epoch": 0.25688319937458776, + "grad_norm": 5.484951972961426, + "learning_rate": 1.0274099770362045e-06, + "loss": 0.0726, + "num_input_tokens_seen": 7099712, + "step": 10515 + }, + { + "epoch": 0.2570053502064349, + "grad_norm": 0.6776230335235596, + "learning_rate": 1.0278985684272244e-06, + "loss": 0.0353, + "num_input_tokens_seen": 7103040, + "step": 10520 + }, + { + "epoch": 0.2571275010382821, + "grad_norm": 27.392627716064453, + "learning_rate": 1.0283871598182439e-06, + "loss": 0.14, + "num_input_tokens_seen": 7106304, + "step": 10525 + }, + { + "epoch": 0.2572496518701292, + "grad_norm": 0.6827471852302551, + "learning_rate": 1.0288757512092635e-06, + "loss": 0.1152, + "num_input_tokens_seen": 7109824, + "step": 10530 + }, + { + "epoch": 0.2573718027019764, + "grad_norm": 0.37764617800712585, + "learning_rate": 1.0293643426002834e-06, + "loss": 0.1283, + "num_input_tokens_seen": 7112896, + "step": 10535 + }, + { + "epoch": 0.25749395353382354, + "grad_norm": 37.246009826660156, + "learning_rate": 1.0298529339913031e-06, + "loss": 0.1707, + "num_input_tokens_seen": 7116096, + "step": 10540 + }, + { + "epoch": 0.25761610436567073, + "grad_norm": 24.440879821777344, + "learning_rate": 1.0303415253823226e-06, + "loss": 0.1114, + "num_input_tokens_seen": 7119488, + "step": 10545 + }, + { + "epoch": 0.2577382551975179, + "grad_norm": 32.95541763305664, + "learning_rate": 1.0308301167733425e-06, + "loss": 0.3013, + "num_input_tokens_seen": 7122688, + "step": 10550 + }, + { + "epoch": 0.25786040602936505, + "grad_norm": 24.378509521484375, + "learning_rate": 1.0313187081643622e-06, + "loss": 0.1704, + "num_input_tokens_seen": 7126080, + "step": 10555 + }, + { + "epoch": 0.25798255686121224, + "grad_norm": 13.029778480529785, + "learning_rate": 1.0318072995553816e-06, + "loss": 0.1579, + "num_input_tokens_seen": 7129280, + "step": 10560 + }, + { + "epoch": 0.2581047076930594, + "grad_norm": 20.938175201416016, + "learning_rate": 1.0322958909464015e-06, + "loss": 0.0861, + "num_input_tokens_seen": 7132672, + "step": 10565 + }, + { + "epoch": 0.25822685852490657, + "grad_norm": 9.321990013122559, + "learning_rate": 1.0327844823374212e-06, + "loss": 0.1046, + "num_input_tokens_seen": 7136320, + "step": 10570 + }, + { + "epoch": 0.2583490093567537, + "grad_norm": 15.518908500671387, + "learning_rate": 1.0332730737284407e-06, + "loss": 0.1541, + "num_input_tokens_seen": 7139712, + "step": 10575 + }, + { + "epoch": 0.2584711601886009, + "grad_norm": 15.851611137390137, + "learning_rate": 1.0337616651194606e-06, + "loss": 0.1296, + "num_input_tokens_seen": 7142784, + "step": 10580 + }, + { + "epoch": 0.258593311020448, + "grad_norm": 6.366760730743408, + "learning_rate": 1.0342502565104803e-06, + "loss": 0.0574, + "num_input_tokens_seen": 7146112, + "step": 10585 + }, + { + "epoch": 0.2587154618522952, + "grad_norm": 16.666038513183594, + "learning_rate": 1.0347388479015e-06, + "loss": 0.0815, + "num_input_tokens_seen": 7149248, + "step": 10590 + }, + { + "epoch": 0.2588376126841424, + "grad_norm": 10.800519943237305, + "learning_rate": 1.0352274392925196e-06, + "loss": 0.1847, + "num_input_tokens_seen": 7152320, + "step": 10595 + }, + { + "epoch": 0.25895976351598954, + "grad_norm": 17.76580238342285, + "learning_rate": 1.0357160306835393e-06, + "loss": 0.0792, + "num_input_tokens_seen": 7155392, + "step": 10600 + }, + { + "epoch": 0.25908191434783673, + "grad_norm": 13.790797233581543, + "learning_rate": 1.036204622074559e-06, + "loss": 0.1473, + "num_input_tokens_seen": 7159424, + "step": 10605 + }, + { + "epoch": 0.25920406517968386, + "grad_norm": 19.42371368408203, + "learning_rate": 1.0366932134655787e-06, + "loss": 0.1038, + "num_input_tokens_seen": 7162752, + "step": 10610 + }, + { + "epoch": 0.25932621601153105, + "grad_norm": 26.431636810302734, + "learning_rate": 1.0371818048565983e-06, + "loss": 0.0652, + "num_input_tokens_seen": 7166080, + "step": 10615 + }, + { + "epoch": 0.2594483668433782, + "grad_norm": 16.103227615356445, + "learning_rate": 1.037670396247618e-06, + "loss": 0.0808, + "num_input_tokens_seen": 7169280, + "step": 10620 + }, + { + "epoch": 0.2595705176752254, + "grad_norm": 27.495723724365234, + "learning_rate": 1.038158987638638e-06, + "loss": 0.1462, + "num_input_tokens_seen": 7172480, + "step": 10625 + }, + { + "epoch": 0.2596926685070725, + "grad_norm": 1.79203200340271, + "learning_rate": 1.0386475790296574e-06, + "loss": 0.0809, + "num_input_tokens_seen": 7176128, + "step": 10630 + }, + { + "epoch": 0.2598148193389197, + "grad_norm": 28.41242218017578, + "learning_rate": 1.039136170420677e-06, + "loss": 0.1069, + "num_input_tokens_seen": 7179136, + "step": 10635 + }, + { + "epoch": 0.2599369701707669, + "grad_norm": 82.02660369873047, + "learning_rate": 1.039624761811697e-06, + "loss": 0.2446, + "num_input_tokens_seen": 7182272, + "step": 10640 + }, + { + "epoch": 0.260059121002614, + "grad_norm": 15.813164710998535, + "learning_rate": 1.0401133532027164e-06, + "loss": 0.0538, + "num_input_tokens_seen": 7185088, + "step": 10645 + }, + { + "epoch": 0.2601812718344612, + "grad_norm": 3.777578592300415, + "learning_rate": 1.0406019445937361e-06, + "loss": 0.1061, + "num_input_tokens_seen": 7188480, + "step": 10650 + }, + { + "epoch": 0.26030342266630835, + "grad_norm": 0.10652507096529007, + "learning_rate": 1.041090535984756e-06, + "loss": 0.0527, + "num_input_tokens_seen": 7191552, + "step": 10655 + }, + { + "epoch": 0.26042557349815554, + "grad_norm": 44.32902526855469, + "learning_rate": 1.0415791273757757e-06, + "loss": 0.1326, + "num_input_tokens_seen": 7194752, + "step": 10660 + }, + { + "epoch": 0.2605477243300027, + "grad_norm": 3.918788433074951, + "learning_rate": 1.0420677187667952e-06, + "loss": 0.3496, + "num_input_tokens_seen": 7198528, + "step": 10665 + }, + { + "epoch": 0.26066987516184986, + "grad_norm": 42.468475341796875, + "learning_rate": 1.042556310157815e-06, + "loss": 0.3142, + "num_input_tokens_seen": 7201728, + "step": 10670 + }, + { + "epoch": 0.260792025993697, + "grad_norm": 2.682720899581909, + "learning_rate": 1.0430449015488347e-06, + "loss": 0.1681, + "num_input_tokens_seen": 7205120, + "step": 10675 + }, + { + "epoch": 0.2609141768255442, + "grad_norm": 12.731380462646484, + "learning_rate": 1.0435334929398542e-06, + "loss": 0.0965, + "num_input_tokens_seen": 7208576, + "step": 10680 + }, + { + "epoch": 0.2610363276573913, + "grad_norm": 37.0477409362793, + "learning_rate": 1.044022084330874e-06, + "loss": 0.3016, + "num_input_tokens_seen": 7211584, + "step": 10685 + }, + { + "epoch": 0.2611584784892385, + "grad_norm": 3.5659420490264893, + "learning_rate": 1.0445106757218938e-06, + "loss": 0.1004, + "num_input_tokens_seen": 7214976, + "step": 10690 + }, + { + "epoch": 0.2612806293210857, + "grad_norm": 6.397414207458496, + "learning_rate": 1.0449992671129135e-06, + "loss": 0.0837, + "num_input_tokens_seen": 7218240, + "step": 10695 + }, + { + "epoch": 0.26140278015293283, + "grad_norm": 17.796171188354492, + "learning_rate": 1.0454878585039331e-06, + "loss": 0.1402, + "num_input_tokens_seen": 7221952, + "step": 10700 + }, + { + "epoch": 0.26152493098478, + "grad_norm": 34.56057357788086, + "learning_rate": 1.0459764498949528e-06, + "loss": 0.0783, + "num_input_tokens_seen": 7225344, + "step": 10705 + }, + { + "epoch": 0.26164708181662716, + "grad_norm": 22.077072143554688, + "learning_rate": 1.0464650412859725e-06, + "loss": 0.0884, + "num_input_tokens_seen": 7228800, + "step": 10710 + }, + { + "epoch": 0.26176923264847435, + "grad_norm": 9.046226501464844, + "learning_rate": 1.0469536326769922e-06, + "loss": 0.1519, + "num_input_tokens_seen": 7232192, + "step": 10715 + }, + { + "epoch": 0.2618913834803215, + "grad_norm": 10.881168365478516, + "learning_rate": 1.0474422240680119e-06, + "loss": 0.0741, + "num_input_tokens_seen": 7235648, + "step": 10720 + }, + { + "epoch": 0.26201353431216867, + "grad_norm": 2.611872673034668, + "learning_rate": 1.0479308154590315e-06, + "loss": 0.104, + "num_input_tokens_seen": 7238976, + "step": 10725 + }, + { + "epoch": 0.2621356851440158, + "grad_norm": 28.027048110961914, + "learning_rate": 1.0484194068500512e-06, + "loss": 0.1013, + "num_input_tokens_seen": 7242304, + "step": 10730 + }, + { + "epoch": 0.262257835975863, + "grad_norm": 0.523321807384491, + "learning_rate": 1.048907998241071e-06, + "loss": 0.1474, + "num_input_tokens_seen": 7245632, + "step": 10735 + }, + { + "epoch": 0.2623799868077102, + "grad_norm": 28.843177795410156, + "learning_rate": 1.0493965896320906e-06, + "loss": 0.2086, + "num_input_tokens_seen": 7248896, + "step": 10740 + }, + { + "epoch": 0.2625021376395573, + "grad_norm": 8.959317207336426, + "learning_rate": 1.0498851810231105e-06, + "loss": 0.1583, + "num_input_tokens_seen": 7252352, + "step": 10745 + }, + { + "epoch": 0.2626242884714045, + "grad_norm": 23.193830490112305, + "learning_rate": 1.05037377241413e-06, + "loss": 0.1325, + "num_input_tokens_seen": 7255552, + "step": 10750 + }, + { + "epoch": 0.26274643930325164, + "grad_norm": 0.8629907965660095, + "learning_rate": 1.0508623638051496e-06, + "loss": 0.106, + "num_input_tokens_seen": 7259008, + "step": 10755 + }, + { + "epoch": 0.26286859013509883, + "grad_norm": 68.59699249267578, + "learning_rate": 1.0513509551961695e-06, + "loss": 0.1296, + "num_input_tokens_seen": 7262464, + "step": 10760 + }, + { + "epoch": 0.26299074096694597, + "grad_norm": 18.427112579345703, + "learning_rate": 1.051839546587189e-06, + "loss": 0.0801, + "num_input_tokens_seen": 7265792, + "step": 10765 + }, + { + "epoch": 0.26311289179879316, + "grad_norm": 14.622143745422363, + "learning_rate": 1.0523281379782087e-06, + "loss": 0.086, + "num_input_tokens_seen": 7269376, + "step": 10770 + }, + { + "epoch": 0.2632350426306403, + "grad_norm": 28.404579162597656, + "learning_rate": 1.0528167293692286e-06, + "loss": 0.1174, + "num_input_tokens_seen": 7272832, + "step": 10775 + }, + { + "epoch": 0.2633571934624875, + "grad_norm": 29.52048683166504, + "learning_rate": 1.0533053207602482e-06, + "loss": 0.0803, + "num_input_tokens_seen": 7276096, + "step": 10780 + }, + { + "epoch": 0.26347934429433467, + "grad_norm": 5.9442548751831055, + "learning_rate": 1.0537939121512677e-06, + "loss": 0.0768, + "num_input_tokens_seen": 7279488, + "step": 10785 + }, + { + "epoch": 0.2636014951261818, + "grad_norm": 12.992175102233887, + "learning_rate": 1.0542825035422876e-06, + "loss": 0.1289, + "num_input_tokens_seen": 7282944, + "step": 10790 + }, + { + "epoch": 0.263723645958029, + "grad_norm": 4.273730278015137, + "learning_rate": 1.0547710949333073e-06, + "loss": 0.0317, + "num_input_tokens_seen": 7286464, + "step": 10795 + }, + { + "epoch": 0.26384579678987613, + "grad_norm": 32.03951644897461, + "learning_rate": 1.0552596863243268e-06, + "loss": 0.1985, + "num_input_tokens_seen": 7289792, + "step": 10800 + }, + { + "epoch": 0.2639679476217233, + "grad_norm": 27.004234313964844, + "learning_rate": 1.0557482777153467e-06, + "loss": 0.2123, + "num_input_tokens_seen": 7293120, + "step": 10805 + }, + { + "epoch": 0.26409009845357045, + "grad_norm": 22.78232765197754, + "learning_rate": 1.0562368691063663e-06, + "loss": 0.0863, + "num_input_tokens_seen": 7296640, + "step": 10810 + }, + { + "epoch": 0.26421224928541764, + "grad_norm": 12.611019134521484, + "learning_rate": 1.056725460497386e-06, + "loss": 0.1017, + "num_input_tokens_seen": 7299584, + "step": 10815 + }, + { + "epoch": 0.2643344001172648, + "grad_norm": 28.07274627685547, + "learning_rate": 1.0572140518884057e-06, + "loss": 0.0895, + "num_input_tokens_seen": 7302528, + "step": 10820 + }, + { + "epoch": 0.26445655094911197, + "grad_norm": 11.911782264709473, + "learning_rate": 1.0577026432794254e-06, + "loss": 0.2, + "num_input_tokens_seen": 7305728, + "step": 10825 + }, + { + "epoch": 0.2645787017809591, + "grad_norm": 27.716493606567383, + "learning_rate": 1.058191234670445e-06, + "loss": 0.1894, + "num_input_tokens_seen": 7309312, + "step": 10830 + }, + { + "epoch": 0.2647008526128063, + "grad_norm": 53.70561218261719, + "learning_rate": 1.0586798260614647e-06, + "loss": 0.1635, + "num_input_tokens_seen": 7312448, + "step": 10835 + }, + { + "epoch": 0.2648230034446535, + "grad_norm": 20.634546279907227, + "learning_rate": 1.0591684174524844e-06, + "loss": 0.1332, + "num_input_tokens_seen": 7315904, + "step": 10840 + }, + { + "epoch": 0.2649451542765006, + "grad_norm": 20.69364356994629, + "learning_rate": 1.059657008843504e-06, + "loss": 0.0642, + "num_input_tokens_seen": 7319360, + "step": 10845 + }, + { + "epoch": 0.2650673051083478, + "grad_norm": 29.68886375427246, + "learning_rate": 1.060145600234524e-06, + "loss": 0.1647, + "num_input_tokens_seen": 7322816, + "step": 10850 + }, + { + "epoch": 0.26518945594019494, + "grad_norm": 18.294721603393555, + "learning_rate": 1.0606341916255435e-06, + "loss": 0.1161, + "num_input_tokens_seen": 7326784, + "step": 10855 + }, + { + "epoch": 0.26531160677204213, + "grad_norm": 12.947708129882812, + "learning_rate": 1.0611227830165631e-06, + "loss": 0.1226, + "num_input_tokens_seen": 7330304, + "step": 10860 + }, + { + "epoch": 0.26543375760388926, + "grad_norm": 9.835976600646973, + "learning_rate": 1.061611374407583e-06, + "loss": 0.1123, + "num_input_tokens_seen": 7333632, + "step": 10865 + }, + { + "epoch": 0.26555590843573645, + "grad_norm": 24.71727180480957, + "learning_rate": 1.0620999657986025e-06, + "loss": 0.1671, + "num_input_tokens_seen": 7337280, + "step": 10870 + }, + { + "epoch": 0.2656780592675836, + "grad_norm": 29.23613166809082, + "learning_rate": 1.0625885571896222e-06, + "loss": 0.1121, + "num_input_tokens_seen": 7340480, + "step": 10875 + }, + { + "epoch": 0.2658002100994308, + "grad_norm": 27.944616317749023, + "learning_rate": 1.063077148580642e-06, + "loss": 0.1282, + "num_input_tokens_seen": 7343424, + "step": 10880 + }, + { + "epoch": 0.26592236093127797, + "grad_norm": 24.89887046813965, + "learning_rate": 1.0635657399716616e-06, + "loss": 0.1091, + "num_input_tokens_seen": 7346752, + "step": 10885 + }, + { + "epoch": 0.2660445117631251, + "grad_norm": 21.505664825439453, + "learning_rate": 1.0640543313626812e-06, + "loss": 0.1666, + "num_input_tokens_seen": 7350144, + "step": 10890 + }, + { + "epoch": 0.2661666625949723, + "grad_norm": 1.815690040588379, + "learning_rate": 1.0645429227537011e-06, + "loss": 0.0835, + "num_input_tokens_seen": 7353344, + "step": 10895 + }, + { + "epoch": 0.2662888134268194, + "grad_norm": 2.3935718536376953, + "learning_rate": 1.0650315141447208e-06, + "loss": 0.1354, + "num_input_tokens_seen": 7356544, + "step": 10900 + }, + { + "epoch": 0.2664109642586666, + "grad_norm": 0.4179496169090271, + "learning_rate": 1.0655201055357403e-06, + "loss": 0.147, + "num_input_tokens_seen": 7359872, + "step": 10905 + }, + { + "epoch": 0.26653311509051375, + "grad_norm": 11.624155044555664, + "learning_rate": 1.0660086969267602e-06, + "loss": 0.1455, + "num_input_tokens_seen": 7363392, + "step": 10910 + }, + { + "epoch": 0.26665526592236094, + "grad_norm": 17.070810317993164, + "learning_rate": 1.0664972883177799e-06, + "loss": 0.066, + "num_input_tokens_seen": 7366912, + "step": 10915 + }, + { + "epoch": 0.2667774167542081, + "grad_norm": 19.962379455566406, + "learning_rate": 1.0669858797087993e-06, + "loss": 0.0645, + "num_input_tokens_seen": 7370176, + "step": 10920 + }, + { + "epoch": 0.26689956758605526, + "grad_norm": 31.91449546813965, + "learning_rate": 1.0674744710998192e-06, + "loss": 0.1761, + "num_input_tokens_seen": 7373440, + "step": 10925 + }, + { + "epoch": 0.2670217184179024, + "grad_norm": 20.309640884399414, + "learning_rate": 1.067963062490839e-06, + "loss": 0.0981, + "num_input_tokens_seen": 7377216, + "step": 10930 + }, + { + "epoch": 0.2671438692497496, + "grad_norm": 17.986515045166016, + "learning_rate": 1.0684516538818586e-06, + "loss": 0.0754, + "num_input_tokens_seen": 7380480, + "step": 10935 + }, + { + "epoch": 0.2672660200815968, + "grad_norm": 27.000978469848633, + "learning_rate": 1.0689402452728783e-06, + "loss": 0.0664, + "num_input_tokens_seen": 7383872, + "step": 10940 + }, + { + "epoch": 0.2673881709134439, + "grad_norm": 10.778860092163086, + "learning_rate": 1.069428836663898e-06, + "loss": 0.1116, + "num_input_tokens_seen": 7387008, + "step": 10945 + }, + { + "epoch": 0.2675103217452911, + "grad_norm": 26.65261459350586, + "learning_rate": 1.0699174280549176e-06, + "loss": 0.2517, + "num_input_tokens_seen": 7390272, + "step": 10950 + }, + { + "epoch": 0.26763247257713824, + "grad_norm": 43.30821990966797, + "learning_rate": 1.0704060194459373e-06, + "loss": 0.1716, + "num_input_tokens_seen": 7393856, + "step": 10955 + }, + { + "epoch": 0.2677546234089854, + "grad_norm": 11.451996803283691, + "learning_rate": 1.070894610836957e-06, + "loss": 0.1175, + "num_input_tokens_seen": 7396992, + "step": 10960 + }, + { + "epoch": 0.26787677424083256, + "grad_norm": 40.32914352416992, + "learning_rate": 1.0713832022279767e-06, + "loss": 0.2216, + "num_input_tokens_seen": 7400448, + "step": 10965 + }, + { + "epoch": 0.26799892507267975, + "grad_norm": 38.25727081298828, + "learning_rate": 1.0718717936189966e-06, + "loss": 0.1282, + "num_input_tokens_seen": 7403904, + "step": 10970 + }, + { + "epoch": 0.2681210759045269, + "grad_norm": 7.2183756828308105, + "learning_rate": 1.072360385010016e-06, + "loss": 0.0799, + "num_input_tokens_seen": 7406976, + "step": 10975 + }, + { + "epoch": 0.2682432267363741, + "grad_norm": 3.240387201309204, + "learning_rate": 1.0728489764010357e-06, + "loss": 0.0994, + "num_input_tokens_seen": 7410176, + "step": 10980 + }, + { + "epoch": 0.26836537756822126, + "grad_norm": 6.279014587402344, + "learning_rate": 1.0733375677920556e-06, + "loss": 0.069, + "num_input_tokens_seen": 7413760, + "step": 10985 + }, + { + "epoch": 0.2684875284000684, + "grad_norm": 18.114967346191406, + "learning_rate": 1.073826159183075e-06, + "loss": 0.2193, + "num_input_tokens_seen": 7417216, + "step": 10990 + }, + { + "epoch": 0.2686096792319156, + "grad_norm": 21.088268280029297, + "learning_rate": 1.0743147505740948e-06, + "loss": 0.1294, + "num_input_tokens_seen": 7420672, + "step": 10995 + }, + { + "epoch": 0.2687318300637627, + "grad_norm": 30.939210891723633, + "learning_rate": 1.0748033419651146e-06, + "loss": 0.1481, + "num_input_tokens_seen": 7423808, + "step": 11000 + }, + { + "epoch": 0.2688539808956099, + "grad_norm": 13.835090637207031, + "learning_rate": 1.0752919333561341e-06, + "loss": 0.1, + "num_input_tokens_seen": 7427328, + "step": 11005 + }, + { + "epoch": 0.26897613172745705, + "grad_norm": 34.25198745727539, + "learning_rate": 1.0757805247471538e-06, + "loss": 0.1661, + "num_input_tokens_seen": 7430592, + "step": 11010 + }, + { + "epoch": 0.26909828255930424, + "grad_norm": 35.57109451293945, + "learning_rate": 1.0762691161381737e-06, + "loss": 0.1138, + "num_input_tokens_seen": 7433664, + "step": 11015 + }, + { + "epoch": 0.26922043339115137, + "grad_norm": 2.9439427852630615, + "learning_rate": 1.0767577075291934e-06, + "loss": 0.1723, + "num_input_tokens_seen": 7436416, + "step": 11020 + }, + { + "epoch": 0.26934258422299856, + "grad_norm": 27.516061782836914, + "learning_rate": 1.0772462989202128e-06, + "loss": 0.0646, + "num_input_tokens_seen": 7439616, + "step": 11025 + }, + { + "epoch": 0.26946473505484575, + "grad_norm": 31.73204231262207, + "learning_rate": 1.0777348903112327e-06, + "loss": 0.1593, + "num_input_tokens_seen": 7442624, + "step": 11030 + }, + { + "epoch": 0.2695868858866929, + "grad_norm": 25.48242950439453, + "learning_rate": 1.0782234817022524e-06, + "loss": 0.1045, + "num_input_tokens_seen": 7445888, + "step": 11035 + }, + { + "epoch": 0.2697090367185401, + "grad_norm": 3.6421027183532715, + "learning_rate": 1.0787120730932719e-06, + "loss": 0.0894, + "num_input_tokens_seen": 7448960, + "step": 11040 + }, + { + "epoch": 0.2698311875503872, + "grad_norm": 2.8793671131134033, + "learning_rate": 1.0792006644842918e-06, + "loss": 0.0651, + "num_input_tokens_seen": 7452544, + "step": 11045 + }, + { + "epoch": 0.2699533383822344, + "grad_norm": 75.24100494384766, + "learning_rate": 1.0796892558753115e-06, + "loss": 0.1117, + "num_input_tokens_seen": 7455680, + "step": 11050 + }, + { + "epoch": 0.27007548921408153, + "grad_norm": 26.78316879272461, + "learning_rate": 1.0801778472663311e-06, + "loss": 0.12, + "num_input_tokens_seen": 7459008, + "step": 11055 + }, + { + "epoch": 0.2701976400459287, + "grad_norm": 17.241432189941406, + "learning_rate": 1.0806664386573508e-06, + "loss": 0.0946, + "num_input_tokens_seen": 7462464, + "step": 11060 + }, + { + "epoch": 0.27031979087777586, + "grad_norm": 30.588302612304688, + "learning_rate": 1.0811550300483705e-06, + "loss": 0.179, + "num_input_tokens_seen": 7465664, + "step": 11065 + }, + { + "epoch": 0.27044194170962305, + "grad_norm": 11.447126388549805, + "learning_rate": 1.0816436214393902e-06, + "loss": 0.0426, + "num_input_tokens_seen": 7469056, + "step": 11070 + }, + { + "epoch": 0.2705640925414702, + "grad_norm": 26.24823760986328, + "learning_rate": 1.0821322128304099e-06, + "loss": 0.1197, + "num_input_tokens_seen": 7472512, + "step": 11075 + }, + { + "epoch": 0.27068624337331737, + "grad_norm": 12.120738983154297, + "learning_rate": 1.0826208042214295e-06, + "loss": 0.0911, + "num_input_tokens_seen": 7475392, + "step": 11080 + }, + { + "epoch": 0.27080839420516456, + "grad_norm": 23.091278076171875, + "learning_rate": 1.0831093956124492e-06, + "loss": 0.1618, + "num_input_tokens_seen": 7478912, + "step": 11085 + }, + { + "epoch": 0.2709305450370117, + "grad_norm": 23.355270385742188, + "learning_rate": 1.0835979870034691e-06, + "loss": 0.1194, + "num_input_tokens_seen": 7482048, + "step": 11090 + }, + { + "epoch": 0.2710526958688589, + "grad_norm": 19.180255889892578, + "learning_rate": 1.0840865783944886e-06, + "loss": 0.1323, + "num_input_tokens_seen": 7485248, + "step": 11095 + }, + { + "epoch": 0.271174846700706, + "grad_norm": 35.145591735839844, + "learning_rate": 1.0845751697855083e-06, + "loss": 0.2519, + "num_input_tokens_seen": 7488576, + "step": 11100 + }, + { + "epoch": 0.2712969975325532, + "grad_norm": 4.28073787689209, + "learning_rate": 1.0850637611765282e-06, + "loss": 0.0914, + "num_input_tokens_seen": 7491904, + "step": 11105 + }, + { + "epoch": 0.27141914836440034, + "grad_norm": 1.7749961614608765, + "learning_rate": 1.0855523525675476e-06, + "loss": 0.1573, + "num_input_tokens_seen": 7496064, + "step": 11110 + }, + { + "epoch": 0.27154129919624753, + "grad_norm": 18.251876831054688, + "learning_rate": 1.0860409439585673e-06, + "loss": 0.0401, + "num_input_tokens_seen": 7499328, + "step": 11115 + }, + { + "epoch": 0.27166345002809467, + "grad_norm": 29.7154598236084, + "learning_rate": 1.0865295353495872e-06, + "loss": 0.1413, + "num_input_tokens_seen": 7503040, + "step": 11120 + }, + { + "epoch": 0.27178560085994186, + "grad_norm": 22.521013259887695, + "learning_rate": 1.0870181267406069e-06, + "loss": 0.2132, + "num_input_tokens_seen": 7506368, + "step": 11125 + }, + { + "epoch": 0.27190775169178905, + "grad_norm": 21.76904296875, + "learning_rate": 1.0875067181316264e-06, + "loss": 0.1462, + "num_input_tokens_seen": 7509888, + "step": 11130 + }, + { + "epoch": 0.2720299025236362, + "grad_norm": 26.12929916381836, + "learning_rate": 1.0879953095226463e-06, + "loss": 0.118, + "num_input_tokens_seen": 7513728, + "step": 11135 + }, + { + "epoch": 0.27215205335548337, + "grad_norm": 49.16400909423828, + "learning_rate": 1.088483900913666e-06, + "loss": 0.1055, + "num_input_tokens_seen": 7517376, + "step": 11140 + }, + { + "epoch": 0.2722742041873305, + "grad_norm": 23.947162628173828, + "learning_rate": 1.0889724923046854e-06, + "loss": 0.2541, + "num_input_tokens_seen": 7520576, + "step": 11145 + }, + { + "epoch": 0.2723963550191777, + "grad_norm": 28.64226531982422, + "learning_rate": 1.0894610836957053e-06, + "loss": 0.1983, + "num_input_tokens_seen": 7523584, + "step": 11150 + }, + { + "epoch": 0.27251850585102483, + "grad_norm": 30.37683868408203, + "learning_rate": 1.089949675086725e-06, + "loss": 0.067, + "num_input_tokens_seen": 7526976, + "step": 11155 + }, + { + "epoch": 0.272640656682872, + "grad_norm": 32.52040100097656, + "learning_rate": 1.0904382664777444e-06, + "loss": 0.0961, + "num_input_tokens_seen": 7529920, + "step": 11160 + }, + { + "epoch": 0.27276280751471915, + "grad_norm": 3.7241928577423096, + "learning_rate": 1.0909268578687643e-06, + "loss": 0.0953, + "num_input_tokens_seen": 7533312, + "step": 11165 + }, + { + "epoch": 0.27288495834656634, + "grad_norm": 11.024883270263672, + "learning_rate": 1.091415449259784e-06, + "loss": 0.0257, + "num_input_tokens_seen": 7537536, + "step": 11170 + }, + { + "epoch": 0.27300710917841353, + "grad_norm": 29.855224609375, + "learning_rate": 1.0919040406508037e-06, + "loss": 0.1563, + "num_input_tokens_seen": 7540736, + "step": 11175 + }, + { + "epoch": 0.27312926001026067, + "grad_norm": 25.81363296508789, + "learning_rate": 1.0923926320418234e-06, + "loss": 0.1178, + "num_input_tokens_seen": 7544256, + "step": 11180 + }, + { + "epoch": 0.27325141084210786, + "grad_norm": 16.829345703125, + "learning_rate": 1.092881223432843e-06, + "loss": 0.2243, + "num_input_tokens_seen": 7547648, + "step": 11185 + }, + { + "epoch": 0.273373561673955, + "grad_norm": 45.78081130981445, + "learning_rate": 1.0933698148238627e-06, + "loss": 0.192, + "num_input_tokens_seen": 7551488, + "step": 11190 + }, + { + "epoch": 0.2734957125058022, + "grad_norm": 34.01533508300781, + "learning_rate": 1.0938584062148824e-06, + "loss": 0.1384, + "num_input_tokens_seen": 7554752, + "step": 11195 + }, + { + "epoch": 0.2736178633376493, + "grad_norm": 19.07633399963379, + "learning_rate": 1.0943469976059021e-06, + "loss": 0.0948, + "num_input_tokens_seen": 7558080, + "step": 11200 + }, + { + "epoch": 0.2737400141694965, + "grad_norm": 0.7051904201507568, + "learning_rate": 1.0948355889969218e-06, + "loss": 0.1033, + "num_input_tokens_seen": 7561472, + "step": 11205 + }, + { + "epoch": 0.27386216500134364, + "grad_norm": 1.2588499784469604, + "learning_rate": 1.0953241803879417e-06, + "loss": 0.1728, + "num_input_tokens_seen": 7565248, + "step": 11210 + }, + { + "epoch": 0.27398431583319083, + "grad_norm": 4.5054097175598145, + "learning_rate": 1.0958127717789612e-06, + "loss": 0.0578, + "num_input_tokens_seen": 7568384, + "step": 11215 + }, + { + "epoch": 0.27410646666503796, + "grad_norm": 20.64672088623047, + "learning_rate": 1.0963013631699808e-06, + "loss": 0.1279, + "num_input_tokens_seen": 7571776, + "step": 11220 + }, + { + "epoch": 0.27422861749688515, + "grad_norm": 4.561092376708984, + "learning_rate": 1.0967899545610007e-06, + "loss": 0.0493, + "num_input_tokens_seen": 7574784, + "step": 11225 + }, + { + "epoch": 0.27435076832873234, + "grad_norm": 46.156089782714844, + "learning_rate": 1.0972785459520202e-06, + "loss": 0.1948, + "num_input_tokens_seen": 7577984, + "step": 11230 + }, + { + "epoch": 0.2744729191605795, + "grad_norm": 29.57512664794922, + "learning_rate": 1.0977671373430399e-06, + "loss": 0.0757, + "num_input_tokens_seen": 7580992, + "step": 11235 + }, + { + "epoch": 0.27459506999242667, + "grad_norm": 32.9466667175293, + "learning_rate": 1.0982557287340598e-06, + "loss": 0.1289, + "num_input_tokens_seen": 7584256, + "step": 11240 + }, + { + "epoch": 0.2747172208242738, + "grad_norm": 18.003232955932617, + "learning_rate": 1.0987443201250795e-06, + "loss": 0.2638, + "num_input_tokens_seen": 7587392, + "step": 11245 + }, + { + "epoch": 0.274839371656121, + "grad_norm": 35.77971649169922, + "learning_rate": 1.099232911516099e-06, + "loss": 0.1724, + "num_input_tokens_seen": 7590528, + "step": 11250 + }, + { + "epoch": 0.2749615224879681, + "grad_norm": 5.708560466766357, + "learning_rate": 1.0997215029071188e-06, + "loss": 0.0775, + "num_input_tokens_seen": 7593792, + "step": 11255 + }, + { + "epoch": 0.2750836733198153, + "grad_norm": 0.5288552641868591, + "learning_rate": 1.1002100942981385e-06, + "loss": 0.1246, + "num_input_tokens_seen": 7597120, + "step": 11260 + }, + { + "epoch": 0.27520582415166245, + "grad_norm": 56.88216018676758, + "learning_rate": 1.100698685689158e-06, + "loss": 0.1392, + "num_input_tokens_seen": 7600256, + "step": 11265 + }, + { + "epoch": 0.27532797498350964, + "grad_norm": 45.19654083251953, + "learning_rate": 1.1011872770801779e-06, + "loss": 0.1305, + "num_input_tokens_seen": 7603200, + "step": 11270 + }, + { + "epoch": 0.2754501258153568, + "grad_norm": 27.574857711791992, + "learning_rate": 1.1016758684711975e-06, + "loss": 0.1257, + "num_input_tokens_seen": 7606400, + "step": 11275 + }, + { + "epoch": 0.27557227664720396, + "grad_norm": 21.560970306396484, + "learning_rate": 1.1021644598622172e-06, + "loss": 0.2147, + "num_input_tokens_seen": 7609600, + "step": 11280 + }, + { + "epoch": 0.27569442747905115, + "grad_norm": 22.572582244873047, + "learning_rate": 1.102653051253237e-06, + "loss": 0.1249, + "num_input_tokens_seen": 7612736, + "step": 11285 + }, + { + "epoch": 0.2758165783108983, + "grad_norm": 37.17354965209961, + "learning_rate": 1.1031416426442566e-06, + "loss": 0.0402, + "num_input_tokens_seen": 7616128, + "step": 11290 + }, + { + "epoch": 0.2759387291427455, + "grad_norm": 46.42210388183594, + "learning_rate": 1.1036302340352763e-06, + "loss": 0.1116, + "num_input_tokens_seen": 7619264, + "step": 11295 + }, + { + "epoch": 0.2760608799745926, + "grad_norm": 30.43973731994629, + "learning_rate": 1.104118825426296e-06, + "loss": 0.1853, + "num_input_tokens_seen": 7622400, + "step": 11300 + }, + { + "epoch": 0.2761830308064398, + "grad_norm": 0.16094528138637543, + "learning_rate": 1.1046074168173156e-06, + "loss": 0.134, + "num_input_tokens_seen": 7625408, + "step": 11305 + }, + { + "epoch": 0.27630518163828693, + "grad_norm": 16.4888858795166, + "learning_rate": 1.1050960082083353e-06, + "loss": 0.1378, + "num_input_tokens_seen": 7628992, + "step": 11310 + }, + { + "epoch": 0.2764273324701341, + "grad_norm": 15.717243194580078, + "learning_rate": 1.105584599599355e-06, + "loss": 0.1405, + "num_input_tokens_seen": 7632384, + "step": 11315 + }, + { + "epoch": 0.2765494833019813, + "grad_norm": 36.28303527832031, + "learning_rate": 1.1060731909903747e-06, + "loss": 0.0711, + "num_input_tokens_seen": 7635264, + "step": 11320 + }, + { + "epoch": 0.27667163413382845, + "grad_norm": 43.312347412109375, + "learning_rate": 1.1065617823813944e-06, + "loss": 0.133, + "num_input_tokens_seen": 7638272, + "step": 11325 + }, + { + "epoch": 0.27679378496567564, + "grad_norm": 5.552413463592529, + "learning_rate": 1.1070503737724142e-06, + "loss": 0.1541, + "num_input_tokens_seen": 7641536, + "step": 11330 + }, + { + "epoch": 0.27691593579752277, + "grad_norm": 34.17829513549805, + "learning_rate": 1.1075389651634337e-06, + "loss": 0.1017, + "num_input_tokens_seen": 7645120, + "step": 11335 + }, + { + "epoch": 0.27703808662936996, + "grad_norm": 23.244646072387695, + "learning_rate": 1.1080275565544534e-06, + "loss": 0.0997, + "num_input_tokens_seen": 7648768, + "step": 11340 + }, + { + "epoch": 0.2771602374612171, + "grad_norm": 26.523557662963867, + "learning_rate": 1.1085161479454733e-06, + "loss": 0.2157, + "num_input_tokens_seen": 7651776, + "step": 11345 + }, + { + "epoch": 0.2772823882930643, + "grad_norm": 1.2325987815856934, + "learning_rate": 1.1090047393364928e-06, + "loss": 0.0374, + "num_input_tokens_seen": 7655168, + "step": 11350 + }, + { + "epoch": 0.2774045391249114, + "grad_norm": 13.653995513916016, + "learning_rate": 1.1094933307275124e-06, + "loss": 0.0361, + "num_input_tokens_seen": 7658752, + "step": 11355 + }, + { + "epoch": 0.2775266899567586, + "grad_norm": 30.004316329956055, + "learning_rate": 1.1099819221185323e-06, + "loss": 0.1332, + "num_input_tokens_seen": 7662080, + "step": 11360 + }, + { + "epoch": 0.27764884078860574, + "grad_norm": 17.038942337036133, + "learning_rate": 1.110470513509552e-06, + "loss": 0.2056, + "num_input_tokens_seen": 7665280, + "step": 11365 + }, + { + "epoch": 0.27777099162045293, + "grad_norm": 29.278806686401367, + "learning_rate": 1.1109591049005715e-06, + "loss": 0.09, + "num_input_tokens_seen": 7668672, + "step": 11370 + }, + { + "epoch": 0.2778931424523001, + "grad_norm": 30.497360229492188, + "learning_rate": 1.1114476962915914e-06, + "loss": 0.1548, + "num_input_tokens_seen": 7671872, + "step": 11375 + }, + { + "epoch": 0.27801529328414726, + "grad_norm": 0.15975019335746765, + "learning_rate": 1.111936287682611e-06, + "loss": 0.08, + "num_input_tokens_seen": 7675072, + "step": 11380 + }, + { + "epoch": 0.27813744411599445, + "grad_norm": 46.03263854980469, + "learning_rate": 1.1124248790736305e-06, + "loss": 0.089, + "num_input_tokens_seen": 7677952, + "step": 11385 + }, + { + "epoch": 0.2782595949478416, + "grad_norm": 26.398767471313477, + "learning_rate": 1.1129134704646504e-06, + "loss": 0.0878, + "num_input_tokens_seen": 7681088, + "step": 11390 + }, + { + "epoch": 0.27838174577968877, + "grad_norm": 46.81501770019531, + "learning_rate": 1.11340206185567e-06, + "loss": 0.0657, + "num_input_tokens_seen": 7684288, + "step": 11395 + }, + { + "epoch": 0.2785038966115359, + "grad_norm": 49.09954071044922, + "learning_rate": 1.1138906532466898e-06, + "loss": 0.0987, + "num_input_tokens_seen": 7687360, + "step": 11400 + }, + { + "epoch": 0.2786260474433831, + "grad_norm": 25.029376983642578, + "learning_rate": 1.1143792446377095e-06, + "loss": 0.1251, + "num_input_tokens_seen": 7690432, + "step": 11405 + }, + { + "epoch": 0.27874819827523023, + "grad_norm": 40.152469635009766, + "learning_rate": 1.1148678360287291e-06, + "loss": 0.1636, + "num_input_tokens_seen": 7693888, + "step": 11410 + }, + { + "epoch": 0.2788703491070774, + "grad_norm": 42.61248779296875, + "learning_rate": 1.1153564274197488e-06, + "loss": 0.1634, + "num_input_tokens_seen": 7697664, + "step": 11415 + }, + { + "epoch": 0.2789924999389246, + "grad_norm": 27.95641326904297, + "learning_rate": 1.1158450188107685e-06, + "loss": 0.136, + "num_input_tokens_seen": 7700800, + "step": 11420 + }, + { + "epoch": 0.27911465077077174, + "grad_norm": 23.90423583984375, + "learning_rate": 1.1163336102017882e-06, + "loss": 0.0537, + "num_input_tokens_seen": 7703680, + "step": 11425 + }, + { + "epoch": 0.27923680160261893, + "grad_norm": 17.14470100402832, + "learning_rate": 1.1168222015928079e-06, + "loss": 0.2367, + "num_input_tokens_seen": 7707456, + "step": 11430 + }, + { + "epoch": 0.27935895243446607, + "grad_norm": 24.315845489501953, + "learning_rate": 1.1173107929838276e-06, + "loss": 0.1418, + "num_input_tokens_seen": 7710720, + "step": 11435 + }, + { + "epoch": 0.27948110326631326, + "grad_norm": 28.46575355529785, + "learning_rate": 1.1177993843748472e-06, + "loss": 0.2804, + "num_input_tokens_seen": 7713792, + "step": 11440 + }, + { + "epoch": 0.2796032540981604, + "grad_norm": 23.07647705078125, + "learning_rate": 1.118287975765867e-06, + "loss": 0.1272, + "num_input_tokens_seen": 7716736, + "step": 11445 + }, + { + "epoch": 0.2797254049300076, + "grad_norm": 17.141408920288086, + "learning_rate": 1.1187765671568868e-06, + "loss": 0.1038, + "num_input_tokens_seen": 7720000, + "step": 11450 + }, + { + "epoch": 0.2798475557618547, + "grad_norm": 13.484521865844727, + "learning_rate": 1.1192651585479063e-06, + "loss": 0.1069, + "num_input_tokens_seen": 7723456, + "step": 11455 + }, + { + "epoch": 0.2799697065937019, + "grad_norm": 14.077258110046387, + "learning_rate": 1.119753749938926e-06, + "loss": 0.0691, + "num_input_tokens_seen": 7726464, + "step": 11460 + }, + { + "epoch": 0.2800918574255491, + "grad_norm": 11.436298370361328, + "learning_rate": 1.1202423413299459e-06, + "loss": 0.1096, + "num_input_tokens_seen": 7729920, + "step": 11465 + }, + { + "epoch": 0.28021400825739623, + "grad_norm": 25.3713436126709, + "learning_rate": 1.1207309327209653e-06, + "loss": 0.0664, + "num_input_tokens_seen": 7733184, + "step": 11470 + }, + { + "epoch": 0.2803361590892434, + "grad_norm": 14.680299758911133, + "learning_rate": 1.121219524111985e-06, + "loss": 0.11, + "num_input_tokens_seen": 7736384, + "step": 11475 + }, + { + "epoch": 0.28045830992109055, + "grad_norm": 0.4156033396720886, + "learning_rate": 1.121708115503005e-06, + "loss": 0.1157, + "num_input_tokens_seen": 7740096, + "step": 11480 + }, + { + "epoch": 0.28058046075293774, + "grad_norm": 15.260326385498047, + "learning_rate": 1.1221967068940246e-06, + "loss": 0.0868, + "num_input_tokens_seen": 7743744, + "step": 11485 + }, + { + "epoch": 0.2807026115847849, + "grad_norm": 30.606847763061523, + "learning_rate": 1.122685298285044e-06, + "loss": 0.1729, + "num_input_tokens_seen": 7747264, + "step": 11490 + }, + { + "epoch": 0.28082476241663207, + "grad_norm": 0.62032550573349, + "learning_rate": 1.123173889676064e-06, + "loss": 0.1499, + "num_input_tokens_seen": 7750400, + "step": 11495 + }, + { + "epoch": 0.2809469132484792, + "grad_norm": 0.2940658926963806, + "learning_rate": 1.1236624810670836e-06, + "loss": 0.1297, + "num_input_tokens_seen": 7753600, + "step": 11500 + }, + { + "epoch": 0.2810690640803264, + "grad_norm": 11.697123527526855, + "learning_rate": 1.124151072458103e-06, + "loss": 0.1523, + "num_input_tokens_seen": 7756928, + "step": 11505 + }, + { + "epoch": 0.2811912149121735, + "grad_norm": 21.05707359313965, + "learning_rate": 1.124639663849123e-06, + "loss": 0.1703, + "num_input_tokens_seen": 7759872, + "step": 11510 + }, + { + "epoch": 0.2813133657440207, + "grad_norm": 3.735363721847534, + "learning_rate": 1.1251282552401427e-06, + "loss": 0.0938, + "num_input_tokens_seen": 7763584, + "step": 11515 + }, + { + "epoch": 0.2814355165758679, + "grad_norm": 3.861677646636963, + "learning_rate": 1.1256168466311623e-06, + "loss": 0.1084, + "num_input_tokens_seen": 7766720, + "step": 11520 + }, + { + "epoch": 0.28155766740771504, + "grad_norm": 27.23649024963379, + "learning_rate": 1.126105438022182e-06, + "loss": 0.2459, + "num_input_tokens_seen": 7769856, + "step": 11525 + }, + { + "epoch": 0.28167981823956223, + "grad_norm": 3.6670424938201904, + "learning_rate": 1.1265940294132017e-06, + "loss": 0.1084, + "num_input_tokens_seen": 7772992, + "step": 11530 + }, + { + "epoch": 0.28180196907140936, + "grad_norm": 3.219539165496826, + "learning_rate": 1.1270826208042214e-06, + "loss": 0.1102, + "num_input_tokens_seen": 7776320, + "step": 11535 + }, + { + "epoch": 0.28192411990325655, + "grad_norm": 18.450578689575195, + "learning_rate": 1.127571212195241e-06, + "loss": 0.1183, + "num_input_tokens_seen": 7779520, + "step": 11540 + }, + { + "epoch": 0.2820462707351037, + "grad_norm": 4.513743877410889, + "learning_rate": 1.1280598035862608e-06, + "loss": 0.0264, + "num_input_tokens_seen": 7783296, + "step": 11545 + }, + { + "epoch": 0.2821684215669509, + "grad_norm": 18.675949096679688, + "learning_rate": 1.1285483949772804e-06, + "loss": 0.0725, + "num_input_tokens_seen": 7786304, + "step": 11550 + }, + { + "epoch": 0.282290572398798, + "grad_norm": 18.002805709838867, + "learning_rate": 1.1290369863683003e-06, + "loss": 0.0659, + "num_input_tokens_seen": 7790592, + "step": 11555 + }, + { + "epoch": 0.2824127232306452, + "grad_norm": 3.769627094268799, + "learning_rate": 1.1295255777593198e-06, + "loss": 0.2091, + "num_input_tokens_seen": 7794432, + "step": 11560 + }, + { + "epoch": 0.2825348740624924, + "grad_norm": 33.11570739746094, + "learning_rate": 1.1300141691503395e-06, + "loss": 0.056, + "num_input_tokens_seen": 7797888, + "step": 11565 + }, + { + "epoch": 0.2826570248943395, + "grad_norm": 0.7942283749580383, + "learning_rate": 1.1305027605413594e-06, + "loss": 0.1011, + "num_input_tokens_seen": 7800896, + "step": 11570 + }, + { + "epoch": 0.2827791757261867, + "grad_norm": 25.094234466552734, + "learning_rate": 1.1309913519323788e-06, + "loss": 0.144, + "num_input_tokens_seen": 7803904, + "step": 11575 + }, + { + "epoch": 0.28290132655803385, + "grad_norm": 0.5091480016708374, + "learning_rate": 1.1314799433233985e-06, + "loss": 0.0422, + "num_input_tokens_seen": 7807360, + "step": 11580 + }, + { + "epoch": 0.28302347738988104, + "grad_norm": 34.18690490722656, + "learning_rate": 1.1319685347144184e-06, + "loss": 0.1381, + "num_input_tokens_seen": 7810560, + "step": 11585 + }, + { + "epoch": 0.2831456282217282, + "grad_norm": 3.1453356742858887, + "learning_rate": 1.1324571261054379e-06, + "loss": 0.0882, + "num_input_tokens_seen": 7814400, + "step": 11590 + }, + { + "epoch": 0.28326777905357536, + "grad_norm": 62.70692443847656, + "learning_rate": 1.1329457174964576e-06, + "loss": 0.1955, + "num_input_tokens_seen": 7817728, + "step": 11595 + }, + { + "epoch": 0.2833899298854225, + "grad_norm": 1.02499520778656, + "learning_rate": 1.1334343088874775e-06, + "loss": 0.0308, + "num_input_tokens_seen": 7820928, + "step": 11600 + }, + { + "epoch": 0.2835120807172697, + "grad_norm": 11.048792839050293, + "learning_rate": 1.1339229002784971e-06, + "loss": 0.1583, + "num_input_tokens_seen": 7824128, + "step": 11605 + }, + { + "epoch": 0.2836342315491168, + "grad_norm": 36.67851257324219, + "learning_rate": 1.1344114916695166e-06, + "loss": 0.2019, + "num_input_tokens_seen": 7827712, + "step": 11610 + }, + { + "epoch": 0.283756382380964, + "grad_norm": 96.36241149902344, + "learning_rate": 1.1349000830605365e-06, + "loss": 0.097, + "num_input_tokens_seen": 7830720, + "step": 11615 + }, + { + "epoch": 0.2838785332128112, + "grad_norm": 14.921711921691895, + "learning_rate": 1.1353886744515562e-06, + "loss": 0.065, + "num_input_tokens_seen": 7834816, + "step": 11620 + }, + { + "epoch": 0.28400068404465834, + "grad_norm": 1.4795444011688232, + "learning_rate": 1.1358772658425757e-06, + "loss": 0.1565, + "num_input_tokens_seen": 7838080, + "step": 11625 + }, + { + "epoch": 0.2841228348765055, + "grad_norm": 12.521466255187988, + "learning_rate": 1.1363658572335956e-06, + "loss": 0.0938, + "num_input_tokens_seen": 7841280, + "step": 11630 + }, + { + "epoch": 0.28424498570835266, + "grad_norm": 15.332459449768066, + "learning_rate": 1.1368544486246152e-06, + "loss": 0.1394, + "num_input_tokens_seen": 7844224, + "step": 11635 + }, + { + "epoch": 0.28436713654019985, + "grad_norm": 10.226259231567383, + "learning_rate": 1.137343040015635e-06, + "loss": 0.1112, + "num_input_tokens_seen": 7847488, + "step": 11640 + }, + { + "epoch": 0.284489287372047, + "grad_norm": 33.42863082885742, + "learning_rate": 1.1378316314066546e-06, + "loss": 0.146, + "num_input_tokens_seen": 7850816, + "step": 11645 + }, + { + "epoch": 0.2846114382038942, + "grad_norm": 29.166101455688477, + "learning_rate": 1.1383202227976743e-06, + "loss": 0.1581, + "num_input_tokens_seen": 7853952, + "step": 11650 + }, + { + "epoch": 0.2847335890357413, + "grad_norm": 11.100362777709961, + "learning_rate": 1.138808814188694e-06, + "loss": 0.1123, + "num_input_tokens_seen": 7857344, + "step": 11655 + }, + { + "epoch": 0.2848557398675885, + "grad_norm": 27.956087112426758, + "learning_rate": 1.1392974055797136e-06, + "loss": 0.0412, + "num_input_tokens_seen": 7860928, + "step": 11660 + }, + { + "epoch": 0.2849778906994357, + "grad_norm": 45.29127502441406, + "learning_rate": 1.1397859969707333e-06, + "loss": 0.126, + "num_input_tokens_seen": 7865024, + "step": 11665 + }, + { + "epoch": 0.2851000415312828, + "grad_norm": 3.9593372344970703, + "learning_rate": 1.140274588361753e-06, + "loss": 0.0255, + "num_input_tokens_seen": 7868672, + "step": 11670 + }, + { + "epoch": 0.28522219236313, + "grad_norm": 12.407175064086914, + "learning_rate": 1.1407631797527729e-06, + "loss": 0.1446, + "num_input_tokens_seen": 7871680, + "step": 11675 + }, + { + "epoch": 0.28534434319497715, + "grad_norm": 23.719539642333984, + "learning_rate": 1.1412517711437924e-06, + "loss": 0.1934, + "num_input_tokens_seen": 7875136, + "step": 11680 + }, + { + "epoch": 0.28546649402682434, + "grad_norm": 25.424650192260742, + "learning_rate": 1.141740362534812e-06, + "loss": 0.1407, + "num_input_tokens_seen": 7878528, + "step": 11685 + }, + { + "epoch": 0.28558864485867147, + "grad_norm": 30.113014221191406, + "learning_rate": 1.142228953925832e-06, + "loss": 0.0432, + "num_input_tokens_seen": 7881984, + "step": 11690 + }, + { + "epoch": 0.28571079569051866, + "grad_norm": 62.609764099121094, + "learning_rate": 1.1427175453168514e-06, + "loss": 0.0923, + "num_input_tokens_seen": 7885184, + "step": 11695 + }, + { + "epoch": 0.2858329465223658, + "grad_norm": 19.35966682434082, + "learning_rate": 1.143206136707871e-06, + "loss": 0.2471, + "num_input_tokens_seen": 7889088, + "step": 11700 + }, + { + "epoch": 0.285955097354213, + "grad_norm": 1.5210208892822266, + "learning_rate": 1.143694728098891e-06, + "loss": 0.0819, + "num_input_tokens_seen": 7892928, + "step": 11705 + }, + { + "epoch": 0.2860772481860602, + "grad_norm": 25.11351776123047, + "learning_rate": 1.1441833194899105e-06, + "loss": 0.0932, + "num_input_tokens_seen": 7896320, + "step": 11710 + }, + { + "epoch": 0.2861993990179073, + "grad_norm": 22.904691696166992, + "learning_rate": 1.1446719108809301e-06, + "loss": 0.2197, + "num_input_tokens_seen": 7899776, + "step": 11715 + }, + { + "epoch": 0.2863215498497545, + "grad_norm": 3.6096725463867188, + "learning_rate": 1.14516050227195e-06, + "loss": 0.0535, + "num_input_tokens_seen": 7903424, + "step": 11720 + }, + { + "epoch": 0.28644370068160163, + "grad_norm": 11.384994506835938, + "learning_rate": 1.1456490936629697e-06, + "loss": 0.1282, + "num_input_tokens_seen": 7906432, + "step": 11725 + }, + { + "epoch": 0.2865658515134488, + "grad_norm": 11.53609561920166, + "learning_rate": 1.1461376850539892e-06, + "loss": 0.0956, + "num_input_tokens_seen": 7909760, + "step": 11730 + }, + { + "epoch": 0.28668800234529596, + "grad_norm": 20.164472579956055, + "learning_rate": 1.146626276445009e-06, + "loss": 0.1069, + "num_input_tokens_seen": 7912960, + "step": 11735 + }, + { + "epoch": 0.28681015317714315, + "grad_norm": 32.91118621826172, + "learning_rate": 1.1471148678360288e-06, + "loss": 0.1146, + "num_input_tokens_seen": 7916800, + "step": 11740 + }, + { + "epoch": 0.2869323040089903, + "grad_norm": 17.6117000579834, + "learning_rate": 1.1476034592270482e-06, + "loss": 0.1474, + "num_input_tokens_seen": 7919808, + "step": 11745 + }, + { + "epoch": 0.28705445484083747, + "grad_norm": 23.570358276367188, + "learning_rate": 1.1480920506180681e-06, + "loss": 0.0573, + "num_input_tokens_seen": 7923200, + "step": 11750 + }, + { + "epoch": 0.2871766056726846, + "grad_norm": 6.977721691131592, + "learning_rate": 1.1485806420090878e-06, + "loss": 0.214, + "num_input_tokens_seen": 7926208, + "step": 11755 + }, + { + "epoch": 0.2872987565045318, + "grad_norm": 5.850998401641846, + "learning_rate": 1.1490692334001075e-06, + "loss": 0.0956, + "num_input_tokens_seen": 7929664, + "step": 11760 + }, + { + "epoch": 0.287420907336379, + "grad_norm": 26.8560791015625, + "learning_rate": 1.1495578247911272e-06, + "loss": 0.0706, + "num_input_tokens_seen": 7933312, + "step": 11765 + }, + { + "epoch": 0.2875430581682261, + "grad_norm": 8.291579246520996, + "learning_rate": 1.1500464161821468e-06, + "loss": 0.2371, + "num_input_tokens_seen": 7936896, + "step": 11770 + }, + { + "epoch": 0.2876652090000733, + "grad_norm": 23.933063507080078, + "learning_rate": 1.1505350075731665e-06, + "loss": 0.1282, + "num_input_tokens_seen": 7940672, + "step": 11775 + }, + { + "epoch": 0.28778735983192044, + "grad_norm": 3.920012950897217, + "learning_rate": 1.1510235989641862e-06, + "loss": 0.1103, + "num_input_tokens_seen": 7944064, + "step": 11780 + }, + { + "epoch": 0.28790951066376763, + "grad_norm": 31.8831844329834, + "learning_rate": 1.1515121903552059e-06, + "loss": 0.0834, + "num_input_tokens_seen": 7947456, + "step": 11785 + }, + { + "epoch": 0.28803166149561477, + "grad_norm": 20.2723445892334, + "learning_rate": 1.1520007817462256e-06, + "loss": 0.1646, + "num_input_tokens_seen": 7950528, + "step": 11790 + }, + { + "epoch": 0.28815381232746196, + "grad_norm": 24.096601486206055, + "learning_rate": 1.1524893731372455e-06, + "loss": 0.1808, + "num_input_tokens_seen": 7953728, + "step": 11795 + }, + { + "epoch": 0.2882759631593091, + "grad_norm": 25.648883819580078, + "learning_rate": 1.152977964528265e-06, + "loss": 0.0937, + "num_input_tokens_seen": 7957248, + "step": 11800 + }, + { + "epoch": 0.2883981139911563, + "grad_norm": 1.3611674308776855, + "learning_rate": 1.1534665559192846e-06, + "loss": 0.1189, + "num_input_tokens_seen": 7960320, + "step": 11805 + }, + { + "epoch": 0.28852026482300347, + "grad_norm": 14.197624206542969, + "learning_rate": 1.1539551473103045e-06, + "loss": 0.1614, + "num_input_tokens_seen": 7963264, + "step": 11810 + }, + { + "epoch": 0.2886424156548506, + "grad_norm": 27.47894859313965, + "learning_rate": 1.154443738701324e-06, + "loss": 0.1142, + "num_input_tokens_seen": 7966464, + "step": 11815 + }, + { + "epoch": 0.2887645664866978, + "grad_norm": 24.667394638061523, + "learning_rate": 1.1549323300923437e-06, + "loss": 0.1608, + "num_input_tokens_seen": 7970240, + "step": 11820 + }, + { + "epoch": 0.2888867173185449, + "grad_norm": 36.73628234863281, + "learning_rate": 1.1554209214833635e-06, + "loss": 0.2324, + "num_input_tokens_seen": 7973632, + "step": 11825 + }, + { + "epoch": 0.2890088681503921, + "grad_norm": 18.555866241455078, + "learning_rate": 1.1559095128743832e-06, + "loss": 0.1112, + "num_input_tokens_seen": 7976832, + "step": 11830 + }, + { + "epoch": 0.28913101898223925, + "grad_norm": 49.42720031738281, + "learning_rate": 1.1563981042654027e-06, + "loss": 0.0498, + "num_input_tokens_seen": 7981120, + "step": 11835 + }, + { + "epoch": 0.28925316981408644, + "grad_norm": 54.10476303100586, + "learning_rate": 1.1568866956564226e-06, + "loss": 0.127, + "num_input_tokens_seen": 7984320, + "step": 11840 + }, + { + "epoch": 0.2893753206459336, + "grad_norm": 1.2627156972885132, + "learning_rate": 1.1573752870474423e-06, + "loss": 0.0945, + "num_input_tokens_seen": 7987712, + "step": 11845 + }, + { + "epoch": 0.28949747147778077, + "grad_norm": 23.782079696655273, + "learning_rate": 1.1578638784384617e-06, + "loss": 0.2029, + "num_input_tokens_seen": 7991104, + "step": 11850 + }, + { + "epoch": 0.28961962230962796, + "grad_norm": 56.127593994140625, + "learning_rate": 1.1583524698294816e-06, + "loss": 0.1268, + "num_input_tokens_seen": 7994112, + "step": 11855 + }, + { + "epoch": 0.2897417731414751, + "grad_norm": 1.2978109121322632, + "learning_rate": 1.1588410612205013e-06, + "loss": 0.1884, + "num_input_tokens_seen": 7997568, + "step": 11860 + }, + { + "epoch": 0.2898639239733223, + "grad_norm": 27.923282623291016, + "learning_rate": 1.1593296526115208e-06, + "loss": 0.1154, + "num_input_tokens_seen": 8000832, + "step": 11865 + }, + { + "epoch": 0.2899860748051694, + "grad_norm": 33.25914001464844, + "learning_rate": 1.1598182440025407e-06, + "loss": 0.0996, + "num_input_tokens_seen": 8004096, + "step": 11870 + }, + { + "epoch": 0.2901082256370166, + "grad_norm": 14.677995681762695, + "learning_rate": 1.1603068353935604e-06, + "loss": 0.0903, + "num_input_tokens_seen": 8007040, + "step": 11875 + }, + { + "epoch": 0.29023037646886374, + "grad_norm": 47.54405212402344, + "learning_rate": 1.16079542678458e-06, + "loss": 0.1717, + "num_input_tokens_seen": 8009984, + "step": 11880 + }, + { + "epoch": 0.2903525273007109, + "grad_norm": 93.65504455566406, + "learning_rate": 1.1612840181755997e-06, + "loss": 0.1056, + "num_input_tokens_seen": 8013312, + "step": 11885 + }, + { + "epoch": 0.29047467813255806, + "grad_norm": 0.011830669827759266, + "learning_rate": 1.1617726095666194e-06, + "loss": 0.0957, + "num_input_tokens_seen": 8016640, + "step": 11890 + }, + { + "epoch": 0.29059682896440525, + "grad_norm": 2.5089898109436035, + "learning_rate": 1.162261200957639e-06, + "loss": 0.107, + "num_input_tokens_seen": 8020096, + "step": 11895 + }, + { + "epoch": 0.2907189797962524, + "grad_norm": 28.145421981811523, + "learning_rate": 1.1627497923486588e-06, + "loss": 0.0793, + "num_input_tokens_seen": 8024000, + "step": 11900 + }, + { + "epoch": 0.2908411306280996, + "grad_norm": 60.22999572753906, + "learning_rate": 1.1632383837396784e-06, + "loss": 0.0996, + "num_input_tokens_seen": 8027520, + "step": 11905 + }, + { + "epoch": 0.29096328145994677, + "grad_norm": 11.291279792785645, + "learning_rate": 1.1637269751306981e-06, + "loss": 0.0543, + "num_input_tokens_seen": 8031232, + "step": 11910 + }, + { + "epoch": 0.2910854322917939, + "grad_norm": 49.44854736328125, + "learning_rate": 1.164215566521718e-06, + "loss": 0.1778, + "num_input_tokens_seen": 8034368, + "step": 11915 + }, + { + "epoch": 0.2912075831236411, + "grad_norm": 34.739707946777344, + "learning_rate": 1.1647041579127375e-06, + "loss": 0.1668, + "num_input_tokens_seen": 8037440, + "step": 11920 + }, + { + "epoch": 0.2913297339554882, + "grad_norm": 10.186627388000488, + "learning_rate": 1.1651927493037572e-06, + "loss": 0.2086, + "num_input_tokens_seen": 8041152, + "step": 11925 + }, + { + "epoch": 0.2914518847873354, + "grad_norm": 14.452888488769531, + "learning_rate": 1.165681340694777e-06, + "loss": 0.1015, + "num_input_tokens_seen": 8044608, + "step": 11930 + }, + { + "epoch": 0.29157403561918255, + "grad_norm": 1.4567221403121948, + "learning_rate": 1.1661699320857965e-06, + "loss": 0.0934, + "num_input_tokens_seen": 8048064, + "step": 11935 + }, + { + "epoch": 0.29169618645102974, + "grad_norm": 22.50691032409668, + "learning_rate": 1.1666585234768162e-06, + "loss": 0.1407, + "num_input_tokens_seen": 8051392, + "step": 11940 + }, + { + "epoch": 0.29181833728287687, + "grad_norm": 32.28067398071289, + "learning_rate": 1.1671471148678361e-06, + "loss": 0.1593, + "num_input_tokens_seen": 8054656, + "step": 11945 + }, + { + "epoch": 0.29194048811472406, + "grad_norm": 20.15083122253418, + "learning_rate": 1.1676357062588558e-06, + "loss": 0.0877, + "num_input_tokens_seen": 8058176, + "step": 11950 + }, + { + "epoch": 0.29206263894657125, + "grad_norm": 20.036026000976562, + "learning_rate": 1.1681242976498753e-06, + "loss": 0.0686, + "num_input_tokens_seen": 8062208, + "step": 11955 + }, + { + "epoch": 0.2921847897784184, + "grad_norm": 11.90791130065918, + "learning_rate": 1.1686128890408952e-06, + "loss": 0.1233, + "num_input_tokens_seen": 8066560, + "step": 11960 + }, + { + "epoch": 0.2923069406102656, + "grad_norm": 10.932024002075195, + "learning_rate": 1.1691014804319148e-06, + "loss": 0.1607, + "num_input_tokens_seen": 8069824, + "step": 11965 + }, + { + "epoch": 0.2924290914421127, + "grad_norm": 26.350589752197266, + "learning_rate": 1.1695900718229343e-06, + "loss": 0.2533, + "num_input_tokens_seen": 8073216, + "step": 11970 + }, + { + "epoch": 0.2925512422739599, + "grad_norm": 2.1203091144561768, + "learning_rate": 1.1700786632139542e-06, + "loss": 0.104, + "num_input_tokens_seen": 8076416, + "step": 11975 + }, + { + "epoch": 0.29267339310580703, + "grad_norm": 9.464871406555176, + "learning_rate": 1.1705672546049739e-06, + "loss": 0.0888, + "num_input_tokens_seen": 8079680, + "step": 11980 + }, + { + "epoch": 0.2927955439376542, + "grad_norm": 27.66861915588379, + "learning_rate": 1.1710558459959936e-06, + "loss": 0.1051, + "num_input_tokens_seen": 8083136, + "step": 11985 + }, + { + "epoch": 0.29291769476950136, + "grad_norm": 1.5247504711151123, + "learning_rate": 1.1715444373870132e-06, + "loss": 0.1162, + "num_input_tokens_seen": 8086336, + "step": 11990 + }, + { + "epoch": 0.29303984560134855, + "grad_norm": 11.192178726196289, + "learning_rate": 1.172033028778033e-06, + "loss": 0.1732, + "num_input_tokens_seen": 8089536, + "step": 11995 + }, + { + "epoch": 0.29316199643319574, + "grad_norm": 8.231268882751465, + "learning_rate": 1.1725216201690526e-06, + "loss": 0.1816, + "num_input_tokens_seen": 8092928, + "step": 12000 + }, + { + "epoch": 0.29328414726504287, + "grad_norm": 13.495226860046387, + "learning_rate": 1.1730102115600723e-06, + "loss": 0.0593, + "num_input_tokens_seen": 8095872, + "step": 12005 + }, + { + "epoch": 0.29340629809689006, + "grad_norm": 11.1940336227417, + "learning_rate": 1.173498802951092e-06, + "loss": 0.1083, + "num_input_tokens_seen": 8099008, + "step": 12010 + }, + { + "epoch": 0.2935284489287372, + "grad_norm": 10.936391830444336, + "learning_rate": 1.1739873943421116e-06, + "loss": 0.07, + "num_input_tokens_seen": 8102208, + "step": 12015 + }, + { + "epoch": 0.2936505997605844, + "grad_norm": 26.182369232177734, + "learning_rate": 1.1744759857331313e-06, + "loss": 0.094, + "num_input_tokens_seen": 8105472, + "step": 12020 + }, + { + "epoch": 0.2937727505924315, + "grad_norm": 21.47533416748047, + "learning_rate": 1.174964577124151e-06, + "loss": 0.1558, + "num_input_tokens_seen": 8108928, + "step": 12025 + }, + { + "epoch": 0.2938949014242787, + "grad_norm": 9.534170150756836, + "learning_rate": 1.1754531685151707e-06, + "loss": 0.0828, + "num_input_tokens_seen": 8112320, + "step": 12030 + }, + { + "epoch": 0.29401705225612584, + "grad_norm": 44.23080825805664, + "learning_rate": 1.1759417599061906e-06, + "loss": 0.201, + "num_input_tokens_seen": 8115776, + "step": 12035 + }, + { + "epoch": 0.29413920308797303, + "grad_norm": 16.109403610229492, + "learning_rate": 1.17643035129721e-06, + "loss": 0.1382, + "num_input_tokens_seen": 8118784, + "step": 12040 + }, + { + "epoch": 0.29426135391982017, + "grad_norm": 9.764286994934082, + "learning_rate": 1.1769189426882297e-06, + "loss": 0.1141, + "num_input_tokens_seen": 8122368, + "step": 12045 + }, + { + "epoch": 0.29438350475166736, + "grad_norm": 21.418296813964844, + "learning_rate": 1.1774075340792496e-06, + "loss": 0.1614, + "num_input_tokens_seen": 8125568, + "step": 12050 + }, + { + "epoch": 0.29450565558351455, + "grad_norm": 4.4351677894592285, + "learning_rate": 1.177896125470269e-06, + "loss": 0.1131, + "num_input_tokens_seen": 8128768, + "step": 12055 + }, + { + "epoch": 0.2946278064153617, + "grad_norm": 19.241079330444336, + "learning_rate": 1.1783847168612888e-06, + "loss": 0.112, + "num_input_tokens_seen": 8132096, + "step": 12060 + }, + { + "epoch": 0.29474995724720887, + "grad_norm": 35.76866912841797, + "learning_rate": 1.1788733082523087e-06, + "loss": 0.0945, + "num_input_tokens_seen": 8135360, + "step": 12065 + }, + { + "epoch": 0.294872108079056, + "grad_norm": 23.178844451904297, + "learning_rate": 1.1793618996433284e-06, + "loss": 0.1098, + "num_input_tokens_seen": 8138560, + "step": 12070 + }, + { + "epoch": 0.2949942589109032, + "grad_norm": 4.689840316772461, + "learning_rate": 1.1798504910343478e-06, + "loss": 0.0424, + "num_input_tokens_seen": 8141952, + "step": 12075 + }, + { + "epoch": 0.29511640974275033, + "grad_norm": 23.47108268737793, + "learning_rate": 1.1803390824253677e-06, + "loss": 0.0183, + "num_input_tokens_seen": 8145472, + "step": 12080 + }, + { + "epoch": 0.2952385605745975, + "grad_norm": 45.8759651184082, + "learning_rate": 1.1808276738163874e-06, + "loss": 0.1725, + "num_input_tokens_seen": 8148544, + "step": 12085 + }, + { + "epoch": 0.29536071140644465, + "grad_norm": 66.73482513427734, + "learning_rate": 1.1813162652074069e-06, + "loss": 0.1385, + "num_input_tokens_seen": 8152192, + "step": 12090 + }, + { + "epoch": 0.29548286223829184, + "grad_norm": 86.15653991699219, + "learning_rate": 1.1818048565984268e-06, + "loss": 0.2717, + "num_input_tokens_seen": 8155008, + "step": 12095 + }, + { + "epoch": 0.29560501307013903, + "grad_norm": 72.50959777832031, + "learning_rate": 1.1822934479894464e-06, + "loss": 0.2778, + "num_input_tokens_seen": 8158592, + "step": 12100 + }, + { + "epoch": 0.29572716390198617, + "grad_norm": 25.257177352905273, + "learning_rate": 1.1827820393804661e-06, + "loss": 0.109, + "num_input_tokens_seen": 8162048, + "step": 12105 + }, + { + "epoch": 0.29584931473383336, + "grad_norm": 18.258644104003906, + "learning_rate": 1.1832706307714858e-06, + "loss": 0.2265, + "num_input_tokens_seen": 8165312, + "step": 12110 + }, + { + "epoch": 0.2959714655656805, + "grad_norm": 35.079010009765625, + "learning_rate": 1.1837592221625055e-06, + "loss": 0.0558, + "num_input_tokens_seen": 8168768, + "step": 12115 + }, + { + "epoch": 0.2960936163975277, + "grad_norm": 23.954132080078125, + "learning_rate": 1.1842478135535252e-06, + "loss": 0.2307, + "num_input_tokens_seen": 8172288, + "step": 12120 + }, + { + "epoch": 0.2962157672293748, + "grad_norm": 24.541086196899414, + "learning_rate": 1.1847364049445448e-06, + "loss": 0.13, + "num_input_tokens_seen": 8175808, + "step": 12125 + }, + { + "epoch": 0.296337918061222, + "grad_norm": 54.54975128173828, + "learning_rate": 1.1852249963355645e-06, + "loss": 0.1947, + "num_input_tokens_seen": 8179136, + "step": 12130 + }, + { + "epoch": 0.29646006889306914, + "grad_norm": 21.549692153930664, + "learning_rate": 1.1857135877265842e-06, + "loss": 0.2153, + "num_input_tokens_seen": 8182528, + "step": 12135 + }, + { + "epoch": 0.29658221972491633, + "grad_norm": 15.662209510803223, + "learning_rate": 1.1862021791176037e-06, + "loss": 0.1133, + "num_input_tokens_seen": 8185536, + "step": 12140 + }, + { + "epoch": 0.2967043705567635, + "grad_norm": 14.91734790802002, + "learning_rate": 1.1866907705086236e-06, + "loss": 0.0854, + "num_input_tokens_seen": 8188864, + "step": 12145 + }, + { + "epoch": 0.29682652138861065, + "grad_norm": 3.287745475769043, + "learning_rate": 1.1871793618996433e-06, + "loss": 0.086, + "num_input_tokens_seen": 8192192, + "step": 12150 + }, + { + "epoch": 0.29694867222045784, + "grad_norm": 0.6978838443756104, + "learning_rate": 1.1876679532906631e-06, + "loss": 0.1689, + "num_input_tokens_seen": 8195136, + "step": 12155 + }, + { + "epoch": 0.297070823052305, + "grad_norm": 1.291856288909912, + "learning_rate": 1.1881565446816826e-06, + "loss": 0.1316, + "num_input_tokens_seen": 8198592, + "step": 12160 + }, + { + "epoch": 0.29719297388415217, + "grad_norm": 15.981237411499023, + "learning_rate": 1.1886451360727023e-06, + "loss": 0.1468, + "num_input_tokens_seen": 8201920, + "step": 12165 + }, + { + "epoch": 0.2973151247159993, + "grad_norm": 8.645312309265137, + "learning_rate": 1.1891337274637222e-06, + "loss": 0.1071, + "num_input_tokens_seen": 8205824, + "step": 12170 + }, + { + "epoch": 0.2974372755478465, + "grad_norm": 0.7097776532173157, + "learning_rate": 1.1896223188547417e-06, + "loss": 0.1096, + "num_input_tokens_seen": 8209216, + "step": 12175 + }, + { + "epoch": 0.2975594263796936, + "grad_norm": 10.166160583496094, + "learning_rate": 1.1901109102457613e-06, + "loss": 0.1757, + "num_input_tokens_seen": 8212672, + "step": 12180 + }, + { + "epoch": 0.2976815772115408, + "grad_norm": 38.063968658447266, + "learning_rate": 1.1905995016367812e-06, + "loss": 0.1019, + "num_input_tokens_seen": 8215744, + "step": 12185 + }, + { + "epoch": 0.29780372804338795, + "grad_norm": 16.18952178955078, + "learning_rate": 1.191088093027801e-06, + "loss": 0.1574, + "num_input_tokens_seen": 8219200, + "step": 12190 + }, + { + "epoch": 0.29792587887523514, + "grad_norm": 30.04621124267578, + "learning_rate": 1.1915766844188204e-06, + "loss": 0.0352, + "num_input_tokens_seen": 8222784, + "step": 12195 + }, + { + "epoch": 0.29804802970708233, + "grad_norm": 29.685110092163086, + "learning_rate": 1.1920652758098403e-06, + "loss": 0.1086, + "num_input_tokens_seen": 8226368, + "step": 12200 + }, + { + "epoch": 0.29817018053892946, + "grad_norm": 13.526518821716309, + "learning_rate": 1.19255386720086e-06, + "loss": 0.194, + "num_input_tokens_seen": 8229440, + "step": 12205 + }, + { + "epoch": 0.29829233137077665, + "grad_norm": 32.48648452758789, + "learning_rate": 1.1930424585918794e-06, + "loss": 0.0659, + "num_input_tokens_seen": 8232768, + "step": 12210 + }, + { + "epoch": 0.2984144822026238, + "grad_norm": 30.82636833190918, + "learning_rate": 1.1935310499828993e-06, + "loss": 0.3557, + "num_input_tokens_seen": 8235968, + "step": 12215 + }, + { + "epoch": 0.298536633034471, + "grad_norm": 25.68510627746582, + "learning_rate": 1.194019641373919e-06, + "loss": 0.1073, + "num_input_tokens_seen": 8239040, + "step": 12220 + }, + { + "epoch": 0.2986587838663181, + "grad_norm": 27.209468841552734, + "learning_rate": 1.1945082327649387e-06, + "loss": 0.0733, + "num_input_tokens_seen": 8242112, + "step": 12225 + }, + { + "epoch": 0.2987809346981653, + "grad_norm": 18.996612548828125, + "learning_rate": 1.1949968241559582e-06, + "loss": 0.0708, + "num_input_tokens_seen": 8245888, + "step": 12230 + }, + { + "epoch": 0.29890308553001244, + "grad_norm": 34.09287643432617, + "learning_rate": 1.195485415546978e-06, + "loss": 0.0598, + "num_input_tokens_seen": 8249344, + "step": 12235 + }, + { + "epoch": 0.2990252363618596, + "grad_norm": 34.00294494628906, + "learning_rate": 1.1959740069379977e-06, + "loss": 0.1704, + "num_input_tokens_seen": 8252992, + "step": 12240 + }, + { + "epoch": 0.2991473871937068, + "grad_norm": 4.011249542236328, + "learning_rate": 1.1964625983290172e-06, + "loss": 0.0935, + "num_input_tokens_seen": 8256832, + "step": 12245 + }, + { + "epoch": 0.29926953802555395, + "grad_norm": 50.23273849487305, + "learning_rate": 1.196951189720037e-06, + "loss": 0.0363, + "num_input_tokens_seen": 8260352, + "step": 12250 + }, + { + "epoch": 0.29939168885740114, + "grad_norm": 55.2866096496582, + "learning_rate": 1.1974397811110568e-06, + "loss": 0.0674, + "num_input_tokens_seen": 8263680, + "step": 12255 + }, + { + "epoch": 0.2995138396892483, + "grad_norm": 126.1909408569336, + "learning_rate": 1.1979283725020767e-06, + "loss": 0.1637, + "num_input_tokens_seen": 8266944, + "step": 12260 + }, + { + "epoch": 0.29963599052109546, + "grad_norm": 0.3145304024219513, + "learning_rate": 1.1984169638930961e-06, + "loss": 0.2747, + "num_input_tokens_seen": 8269952, + "step": 12265 + }, + { + "epoch": 0.2997581413529426, + "grad_norm": 51.68240737915039, + "learning_rate": 1.1989055552841158e-06, + "loss": 0.2082, + "num_input_tokens_seen": 8273472, + "step": 12270 + }, + { + "epoch": 0.2998802921847898, + "grad_norm": 3.867333173751831, + "learning_rate": 1.1993941466751357e-06, + "loss": 0.2137, + "num_input_tokens_seen": 8276416, + "step": 12275 + }, + { + "epoch": 0.3000024430166369, + "grad_norm": 64.91502380371094, + "learning_rate": 1.1998827380661552e-06, + "loss": 0.0824, + "num_input_tokens_seen": 8279488, + "step": 12280 + }, + { + "epoch": 0.3001245938484841, + "grad_norm": 12.365072250366211, + "learning_rate": 1.2003713294571749e-06, + "loss": 0.2047, + "num_input_tokens_seen": 8283072, + "step": 12285 + }, + { + "epoch": 0.30024674468033125, + "grad_norm": 17.204029083251953, + "learning_rate": 1.2008599208481948e-06, + "loss": 0.1809, + "num_input_tokens_seen": 8286208, + "step": 12290 + }, + { + "epoch": 0.30036889551217844, + "grad_norm": 44.201969146728516, + "learning_rate": 1.2013485122392142e-06, + "loss": 0.2269, + "num_input_tokens_seen": 8289216, + "step": 12295 + }, + { + "epoch": 0.3004910463440256, + "grad_norm": 26.522666931152344, + "learning_rate": 1.201837103630234e-06, + "loss": 0.1067, + "num_input_tokens_seen": 8292672, + "step": 12300 + }, + { + "epoch": 0.30061319717587276, + "grad_norm": 15.294692993164062, + "learning_rate": 1.2023256950212538e-06, + "loss": 0.1393, + "num_input_tokens_seen": 8296576, + "step": 12305 + }, + { + "epoch": 0.30073534800771995, + "grad_norm": 2.377216339111328, + "learning_rate": 1.2028142864122735e-06, + "loss": 0.1101, + "num_input_tokens_seen": 8299776, + "step": 12310 + }, + { + "epoch": 0.3008574988395671, + "grad_norm": 3.2018935680389404, + "learning_rate": 1.203302877803293e-06, + "loss": 0.1248, + "num_input_tokens_seen": 8303616, + "step": 12315 + }, + { + "epoch": 0.3009796496714143, + "grad_norm": 1.4899169206619263, + "learning_rate": 1.2037914691943128e-06, + "loss": 0.044, + "num_input_tokens_seen": 8306816, + "step": 12320 + }, + { + "epoch": 0.3011018005032614, + "grad_norm": 15.029468536376953, + "learning_rate": 1.2042800605853325e-06, + "loss": 0.0833, + "num_input_tokens_seen": 8310144, + "step": 12325 + }, + { + "epoch": 0.3012239513351086, + "grad_norm": 18.200010299682617, + "learning_rate": 1.204768651976352e-06, + "loss": 0.0851, + "num_input_tokens_seen": 8313408, + "step": 12330 + }, + { + "epoch": 0.30134610216695573, + "grad_norm": 17.035367965698242, + "learning_rate": 1.2052572433673717e-06, + "loss": 0.0985, + "num_input_tokens_seen": 8316608, + "step": 12335 + }, + { + "epoch": 0.3014682529988029, + "grad_norm": 32.94978713989258, + "learning_rate": 1.2057458347583916e-06, + "loss": 0.1569, + "num_input_tokens_seen": 8320576, + "step": 12340 + }, + { + "epoch": 0.3015904038306501, + "grad_norm": 18.8249568939209, + "learning_rate": 1.2062344261494112e-06, + "loss": 0.1164, + "num_input_tokens_seen": 8323520, + "step": 12345 + }, + { + "epoch": 0.30171255466249725, + "grad_norm": 0.9230943322181702, + "learning_rate": 1.2067230175404307e-06, + "loss": 0.0494, + "num_input_tokens_seen": 8327040, + "step": 12350 + }, + { + "epoch": 0.30183470549434444, + "grad_norm": 18.419689178466797, + "learning_rate": 1.2072116089314506e-06, + "loss": 0.2087, + "num_input_tokens_seen": 8330240, + "step": 12355 + }, + { + "epoch": 0.30195685632619157, + "grad_norm": 5.805023670196533, + "learning_rate": 1.2077002003224703e-06, + "loss": 0.0641, + "num_input_tokens_seen": 8333312, + "step": 12360 + }, + { + "epoch": 0.30207900715803876, + "grad_norm": 10.554583549499512, + "learning_rate": 1.2081887917134898e-06, + "loss": 0.1007, + "num_input_tokens_seen": 8336576, + "step": 12365 + }, + { + "epoch": 0.3022011579898859, + "grad_norm": 25.36183738708496, + "learning_rate": 1.2086773831045097e-06, + "loss": 0.2068, + "num_input_tokens_seen": 8340288, + "step": 12370 + }, + { + "epoch": 0.3023233088217331, + "grad_norm": 39.68854904174805, + "learning_rate": 1.2091659744955293e-06, + "loss": 0.1953, + "num_input_tokens_seen": 8343552, + "step": 12375 + }, + { + "epoch": 0.3024454596535802, + "grad_norm": 1.862717628479004, + "learning_rate": 1.2096545658865492e-06, + "loss": 0.1275, + "num_input_tokens_seen": 8347136, + "step": 12380 + }, + { + "epoch": 0.3025676104854274, + "grad_norm": 34.478878021240234, + "learning_rate": 1.2101431572775687e-06, + "loss": 0.2043, + "num_input_tokens_seen": 8351104, + "step": 12385 + }, + { + "epoch": 0.3026897613172746, + "grad_norm": 5.264378070831299, + "learning_rate": 1.2106317486685884e-06, + "loss": 0.1308, + "num_input_tokens_seen": 8354176, + "step": 12390 + }, + { + "epoch": 0.30281191214912173, + "grad_norm": 2.541132688522339, + "learning_rate": 1.2111203400596083e-06, + "loss": 0.0783, + "num_input_tokens_seen": 8357888, + "step": 12395 + }, + { + "epoch": 0.3029340629809689, + "grad_norm": 18.312211990356445, + "learning_rate": 1.2116089314506277e-06, + "loss": 0.0698, + "num_input_tokens_seen": 8361024, + "step": 12400 + }, + { + "epoch": 0.30305621381281606, + "grad_norm": 18.090362548828125, + "learning_rate": 1.2120975228416474e-06, + "loss": 0.0714, + "num_input_tokens_seen": 8364224, + "step": 12405 + }, + { + "epoch": 0.30317836464466325, + "grad_norm": 23.060293197631836, + "learning_rate": 1.2125861142326673e-06, + "loss": 0.184, + "num_input_tokens_seen": 8367296, + "step": 12410 + }, + { + "epoch": 0.3033005154765104, + "grad_norm": 29.778749465942383, + "learning_rate": 1.2130747056236868e-06, + "loss": 0.1278, + "num_input_tokens_seen": 8370304, + "step": 12415 + }, + { + "epoch": 0.30342266630835757, + "grad_norm": 20.16764259338379, + "learning_rate": 1.2135632970147065e-06, + "loss": 0.1006, + "num_input_tokens_seen": 8373504, + "step": 12420 + }, + { + "epoch": 0.3035448171402047, + "grad_norm": 14.994250297546387, + "learning_rate": 1.2140518884057261e-06, + "loss": 0.1913, + "num_input_tokens_seen": 8377024, + "step": 12425 + }, + { + "epoch": 0.3036669679720519, + "grad_norm": 39.417724609375, + "learning_rate": 1.214540479796746e-06, + "loss": 0.1267, + "num_input_tokens_seen": 8380160, + "step": 12430 + }, + { + "epoch": 0.303789118803899, + "grad_norm": 20.88626480102539, + "learning_rate": 1.2150290711877655e-06, + "loss": 0.1016, + "num_input_tokens_seen": 8383424, + "step": 12435 + }, + { + "epoch": 0.3039112696357462, + "grad_norm": 4.2171149253845215, + "learning_rate": 1.2155176625787852e-06, + "loss": 0.0801, + "num_input_tokens_seen": 8387072, + "step": 12440 + }, + { + "epoch": 0.3040334204675934, + "grad_norm": 4.536027908325195, + "learning_rate": 1.216006253969805e-06, + "loss": 0.0782, + "num_input_tokens_seen": 8390656, + "step": 12445 + }, + { + "epoch": 0.30415557129944054, + "grad_norm": 1.3891936540603638, + "learning_rate": 1.2164948453608246e-06, + "loss": 0.0775, + "num_input_tokens_seen": 8394304, + "step": 12450 + }, + { + "epoch": 0.30427772213128773, + "grad_norm": 13.561484336853027, + "learning_rate": 1.2169834367518442e-06, + "loss": 0.243, + "num_input_tokens_seen": 8397632, + "step": 12455 + }, + { + "epoch": 0.30439987296313487, + "grad_norm": 21.17917823791504, + "learning_rate": 1.2174720281428641e-06, + "loss": 0.2416, + "num_input_tokens_seen": 8401024, + "step": 12460 + }, + { + "epoch": 0.30452202379498206, + "grad_norm": 0.6886555552482605, + "learning_rate": 1.2179606195338838e-06, + "loss": 0.0535, + "num_input_tokens_seen": 8404416, + "step": 12465 + }, + { + "epoch": 0.3046441746268292, + "grad_norm": 32.76136779785156, + "learning_rate": 1.2184492109249033e-06, + "loss": 0.151, + "num_input_tokens_seen": 8408448, + "step": 12470 + }, + { + "epoch": 0.3047663254586764, + "grad_norm": 1.4499430656433105, + "learning_rate": 1.2189378023159232e-06, + "loss": 0.1923, + "num_input_tokens_seen": 8412416, + "step": 12475 + }, + { + "epoch": 0.3048884762905235, + "grad_norm": 22.090356826782227, + "learning_rate": 1.2194263937069429e-06, + "loss": 0.1284, + "num_input_tokens_seen": 8415296, + "step": 12480 + }, + { + "epoch": 0.3050106271223707, + "grad_norm": 8.059381484985352, + "learning_rate": 1.2199149850979623e-06, + "loss": 0.0634, + "num_input_tokens_seen": 8418240, + "step": 12485 + }, + { + "epoch": 0.3051327779542179, + "grad_norm": 33.66912841796875, + "learning_rate": 1.2204035764889822e-06, + "loss": 0.108, + "num_input_tokens_seen": 8421952, + "step": 12490 + }, + { + "epoch": 0.305254928786065, + "grad_norm": 18.86580467224121, + "learning_rate": 1.220892167880002e-06, + "loss": 0.0815, + "num_input_tokens_seen": 8425024, + "step": 12495 + }, + { + "epoch": 0.3053770796179122, + "grad_norm": 32.913883209228516, + "learning_rate": 1.2213807592710218e-06, + "loss": 0.1205, + "num_input_tokens_seen": 8428352, + "step": 12500 + }, + { + "epoch": 0.30549923044975935, + "grad_norm": 0.14717574417591095, + "learning_rate": 1.2218693506620413e-06, + "loss": 0.0864, + "num_input_tokens_seen": 8431488, + "step": 12505 + }, + { + "epoch": 0.30562138128160654, + "grad_norm": 0.622601330280304, + "learning_rate": 1.222357942053061e-06, + "loss": 0.0892, + "num_input_tokens_seen": 8434944, + "step": 12510 + }, + { + "epoch": 0.3057435321134537, + "grad_norm": 0.17498765885829926, + "learning_rate": 1.2228465334440806e-06, + "loss": 0.0486, + "num_input_tokens_seen": 8438272, + "step": 12515 + }, + { + "epoch": 0.30586568294530087, + "grad_norm": 14.947955131530762, + "learning_rate": 1.2233351248351003e-06, + "loss": 0.1224, + "num_input_tokens_seen": 8441792, + "step": 12520 + }, + { + "epoch": 0.305987833777148, + "grad_norm": 8.9529447555542, + "learning_rate": 1.22382371622612e-06, + "loss": 0.2268, + "num_input_tokens_seen": 8444864, + "step": 12525 + }, + { + "epoch": 0.3061099846089952, + "grad_norm": 14.22401237487793, + "learning_rate": 1.2243123076171397e-06, + "loss": 0.107, + "num_input_tokens_seen": 8448320, + "step": 12530 + }, + { + "epoch": 0.3062321354408424, + "grad_norm": 32.58435821533203, + "learning_rate": 1.2248008990081596e-06, + "loss": 0.1659, + "num_input_tokens_seen": 8451904, + "step": 12535 + }, + { + "epoch": 0.3063542862726895, + "grad_norm": 42.00128173828125, + "learning_rate": 1.225289490399179e-06, + "loss": 0.1476, + "num_input_tokens_seen": 8455552, + "step": 12540 + }, + { + "epoch": 0.3064764371045367, + "grad_norm": 0.3023090064525604, + "learning_rate": 1.2257780817901987e-06, + "loss": 0.041, + "num_input_tokens_seen": 8458752, + "step": 12545 + }, + { + "epoch": 0.30659858793638384, + "grad_norm": 30.7508487701416, + "learning_rate": 1.2262666731812186e-06, + "loss": 0.1281, + "num_input_tokens_seen": 8461888, + "step": 12550 + }, + { + "epoch": 0.306720738768231, + "grad_norm": 16.92136001586914, + "learning_rate": 1.226755264572238e-06, + "loss": 0.0974, + "num_input_tokens_seen": 8465536, + "step": 12555 + }, + { + "epoch": 0.30684288960007816, + "grad_norm": 1.042176604270935, + "learning_rate": 1.2272438559632578e-06, + "loss": 0.1158, + "num_input_tokens_seen": 8468608, + "step": 12560 + }, + { + "epoch": 0.30696504043192535, + "grad_norm": 0.04367939010262489, + "learning_rate": 1.2277324473542776e-06, + "loss": 0.058, + "num_input_tokens_seen": 8472192, + "step": 12565 + }, + { + "epoch": 0.3070871912637725, + "grad_norm": 18.037769317626953, + "learning_rate": 1.2282210387452971e-06, + "loss": 0.067, + "num_input_tokens_seen": 8476224, + "step": 12570 + }, + { + "epoch": 0.3072093420956197, + "grad_norm": 1.1944955587387085, + "learning_rate": 1.2287096301363168e-06, + "loss": 0.2403, + "num_input_tokens_seen": 8479680, + "step": 12575 + }, + { + "epoch": 0.3073314929274668, + "grad_norm": 28.309049606323242, + "learning_rate": 1.2291982215273367e-06, + "loss": 0.1631, + "num_input_tokens_seen": 8483392, + "step": 12580 + }, + { + "epoch": 0.307453643759314, + "grad_norm": 32.31857681274414, + "learning_rate": 1.2296868129183564e-06, + "loss": 0.1993, + "num_input_tokens_seen": 8487232, + "step": 12585 + }, + { + "epoch": 0.3075757945911612, + "grad_norm": 26.554603576660156, + "learning_rate": 1.2301754043093758e-06, + "loss": 0.1225, + "num_input_tokens_seen": 8490944, + "step": 12590 + }, + { + "epoch": 0.3076979454230083, + "grad_norm": 28.783294677734375, + "learning_rate": 1.2306639957003957e-06, + "loss": 0.0538, + "num_input_tokens_seen": 8494336, + "step": 12595 + }, + { + "epoch": 0.3078200962548555, + "grad_norm": 34.531803131103516, + "learning_rate": 1.2311525870914154e-06, + "loss": 0.1477, + "num_input_tokens_seen": 8497472, + "step": 12600 + }, + { + "epoch": 0.30794224708670265, + "grad_norm": 20.531259536743164, + "learning_rate": 1.2316411784824349e-06, + "loss": 0.0984, + "num_input_tokens_seen": 8501056, + "step": 12605 + }, + { + "epoch": 0.30806439791854984, + "grad_norm": 1.207240104675293, + "learning_rate": 1.2321297698734548e-06, + "loss": 0.0836, + "num_input_tokens_seen": 8504320, + "step": 12610 + }, + { + "epoch": 0.30818654875039697, + "grad_norm": 24.03017807006836, + "learning_rate": 1.2326183612644745e-06, + "loss": 0.249, + "num_input_tokens_seen": 8507648, + "step": 12615 + }, + { + "epoch": 0.30830869958224416, + "grad_norm": 39.64318084716797, + "learning_rate": 1.2331069526554941e-06, + "loss": 0.1641, + "num_input_tokens_seen": 8510720, + "step": 12620 + }, + { + "epoch": 0.3084308504140913, + "grad_norm": 35.29051971435547, + "learning_rate": 1.2335955440465138e-06, + "loss": 0.1154, + "num_input_tokens_seen": 8513984, + "step": 12625 + }, + { + "epoch": 0.3085530012459385, + "grad_norm": 1.6317697763442993, + "learning_rate": 1.2340841354375335e-06, + "loss": 0.1706, + "num_input_tokens_seen": 8517312, + "step": 12630 + }, + { + "epoch": 0.3086751520777857, + "grad_norm": 11.825055122375488, + "learning_rate": 1.2345727268285532e-06, + "loss": 0.025, + "num_input_tokens_seen": 8520768, + "step": 12635 + }, + { + "epoch": 0.3087973029096328, + "grad_norm": 11.842734336853027, + "learning_rate": 1.2350613182195729e-06, + "loss": 0.1898, + "num_input_tokens_seen": 8523712, + "step": 12640 + }, + { + "epoch": 0.30891945374148, + "grad_norm": 66.35395050048828, + "learning_rate": 1.2355499096105926e-06, + "loss": 0.2702, + "num_input_tokens_seen": 8526784, + "step": 12645 + }, + { + "epoch": 0.30904160457332713, + "grad_norm": 21.204254150390625, + "learning_rate": 1.2360385010016122e-06, + "loss": 0.0935, + "num_input_tokens_seen": 8530048, + "step": 12650 + }, + { + "epoch": 0.3091637554051743, + "grad_norm": 41.15862274169922, + "learning_rate": 1.2365270923926321e-06, + "loss": 0.1544, + "num_input_tokens_seen": 8533056, + "step": 12655 + }, + { + "epoch": 0.30928590623702146, + "grad_norm": 34.149261474609375, + "learning_rate": 1.2370156837836516e-06, + "loss": 0.1554, + "num_input_tokens_seen": 8536320, + "step": 12660 + }, + { + "epoch": 0.30940805706886865, + "grad_norm": 7.996495723724365, + "learning_rate": 1.2375042751746713e-06, + "loss": 0.1172, + "num_input_tokens_seen": 8539392, + "step": 12665 + }, + { + "epoch": 0.3095302079007158, + "grad_norm": 27.449047088623047, + "learning_rate": 1.2379928665656912e-06, + "loss": 0.1463, + "num_input_tokens_seen": 8543168, + "step": 12670 + }, + { + "epoch": 0.30965235873256297, + "grad_norm": 32.094703674316406, + "learning_rate": 1.2384814579567106e-06, + "loss": 0.1172, + "num_input_tokens_seen": 8546496, + "step": 12675 + }, + { + "epoch": 0.30977450956441016, + "grad_norm": 10.903212547302246, + "learning_rate": 1.2389700493477303e-06, + "loss": 0.0659, + "num_input_tokens_seen": 8550336, + "step": 12680 + }, + { + "epoch": 0.3098966603962573, + "grad_norm": 3.5534703731536865, + "learning_rate": 1.2394586407387502e-06, + "loss": 0.1353, + "num_input_tokens_seen": 8553600, + "step": 12685 + }, + { + "epoch": 0.3100188112281045, + "grad_norm": 6.912077903747559, + "learning_rate": 1.23994723212977e-06, + "loss": 0.1039, + "num_input_tokens_seen": 8557184, + "step": 12690 + }, + { + "epoch": 0.3101409620599516, + "grad_norm": 32.508079528808594, + "learning_rate": 1.2404358235207894e-06, + "loss": 0.1146, + "num_input_tokens_seen": 8561152, + "step": 12695 + }, + { + "epoch": 0.3102631128917988, + "grad_norm": 22.530750274658203, + "learning_rate": 1.2409244149118093e-06, + "loss": 0.1317, + "num_input_tokens_seen": 8564992, + "step": 12700 + }, + { + "epoch": 0.31038526372364594, + "grad_norm": 23.627466201782227, + "learning_rate": 1.241413006302829e-06, + "loss": 0.1771, + "num_input_tokens_seen": 8568448, + "step": 12705 + }, + { + "epoch": 0.31050741455549313, + "grad_norm": 3.3501780033111572, + "learning_rate": 1.2419015976938484e-06, + "loss": 0.0726, + "num_input_tokens_seen": 8572224, + "step": 12710 + }, + { + "epoch": 0.31062956538734027, + "grad_norm": 20.607057571411133, + "learning_rate": 1.2423901890848683e-06, + "loss": 0.1894, + "num_input_tokens_seen": 8576064, + "step": 12715 + }, + { + "epoch": 0.31075171621918746, + "grad_norm": 23.418180465698242, + "learning_rate": 1.242878780475888e-06, + "loss": 0.1499, + "num_input_tokens_seen": 8579840, + "step": 12720 + }, + { + "epoch": 0.3108738670510346, + "grad_norm": 0.8048353791236877, + "learning_rate": 1.2433673718669075e-06, + "loss": 0.021, + "num_input_tokens_seen": 8583488, + "step": 12725 + }, + { + "epoch": 0.3109960178828818, + "grad_norm": 36.71179962158203, + "learning_rate": 1.2438559632579273e-06, + "loss": 0.2072, + "num_input_tokens_seen": 8586688, + "step": 12730 + }, + { + "epoch": 0.31111816871472897, + "grad_norm": 5.470469951629639, + "learning_rate": 1.244344554648947e-06, + "loss": 0.0856, + "num_input_tokens_seen": 8589888, + "step": 12735 + }, + { + "epoch": 0.3112403195465761, + "grad_norm": 20.87204360961914, + "learning_rate": 1.2448331460399667e-06, + "loss": 0.1108, + "num_input_tokens_seen": 8593088, + "step": 12740 + }, + { + "epoch": 0.3113624703784233, + "grad_norm": 32.569461822509766, + "learning_rate": 1.2453217374309864e-06, + "loss": 0.0851, + "num_input_tokens_seen": 8596288, + "step": 12745 + }, + { + "epoch": 0.31148462121027043, + "grad_norm": 2.8659555912017822, + "learning_rate": 1.245810328822006e-06, + "loss": 0.1089, + "num_input_tokens_seen": 8600000, + "step": 12750 + }, + { + "epoch": 0.3116067720421176, + "grad_norm": 39.90515899658203, + "learning_rate": 1.2462989202130258e-06, + "loss": 0.0856, + "num_input_tokens_seen": 8602944, + "step": 12755 + }, + { + "epoch": 0.31172892287396475, + "grad_norm": 1.3492740392684937, + "learning_rate": 1.2467875116040454e-06, + "loss": 0.1233, + "num_input_tokens_seen": 8606336, + "step": 12760 + }, + { + "epoch": 0.31185107370581194, + "grad_norm": 21.49714469909668, + "learning_rate": 1.2472761029950651e-06, + "loss": 0.185, + "num_input_tokens_seen": 8609536, + "step": 12765 + }, + { + "epoch": 0.3119732245376591, + "grad_norm": 1.6193816661834717, + "learning_rate": 1.2477646943860848e-06, + "loss": 0.1804, + "num_input_tokens_seen": 8612672, + "step": 12770 + }, + { + "epoch": 0.31209537536950627, + "grad_norm": 19.069721221923828, + "learning_rate": 1.2482532857771047e-06, + "loss": 0.1443, + "num_input_tokens_seen": 8616128, + "step": 12775 + }, + { + "epoch": 0.31221752620135346, + "grad_norm": 21.02128791809082, + "learning_rate": 1.2487418771681242e-06, + "loss": 0.1616, + "num_input_tokens_seen": 8619584, + "step": 12780 + }, + { + "epoch": 0.3123396770332006, + "grad_norm": 21.080299377441406, + "learning_rate": 1.2492304685591438e-06, + "loss": 0.1156, + "num_input_tokens_seen": 8623040, + "step": 12785 + }, + { + "epoch": 0.3124618278650478, + "grad_norm": 15.542115211486816, + "learning_rate": 1.2497190599501637e-06, + "loss": 0.1364, + "num_input_tokens_seen": 8626432, + "step": 12790 + }, + { + "epoch": 0.3125839786968949, + "grad_norm": 26.10782814025879, + "learning_rate": 1.2502076513411832e-06, + "loss": 0.1408, + "num_input_tokens_seen": 8630400, + "step": 12795 + }, + { + "epoch": 0.3127061295287421, + "grad_norm": 17.829648971557617, + "learning_rate": 1.2506962427322029e-06, + "loss": 0.0587, + "num_input_tokens_seen": 8633536, + "step": 12800 + }, + { + "epoch": 0.31282828036058924, + "grad_norm": 25.496232986450195, + "learning_rate": 1.2511848341232228e-06, + "loss": 0.1218, + "num_input_tokens_seen": 8636992, + "step": 12805 + }, + { + "epoch": 0.31295043119243643, + "grad_norm": 11.133720397949219, + "learning_rate": 1.2516734255142425e-06, + "loss": 0.0968, + "num_input_tokens_seen": 8640448, + "step": 12810 + }, + { + "epoch": 0.31307258202428356, + "grad_norm": 11.136938095092773, + "learning_rate": 1.252162016905262e-06, + "loss": 0.1877, + "num_input_tokens_seen": 8643456, + "step": 12815 + }, + { + "epoch": 0.31319473285613075, + "grad_norm": 24.134342193603516, + "learning_rate": 1.2526506082962818e-06, + "loss": 0.1171, + "num_input_tokens_seen": 8647104, + "step": 12820 + }, + { + "epoch": 0.31331688368797794, + "grad_norm": 30.42660903930664, + "learning_rate": 1.2531391996873015e-06, + "loss": 0.1403, + "num_input_tokens_seen": 8650624, + "step": 12825 + }, + { + "epoch": 0.3134390345198251, + "grad_norm": 26.292850494384766, + "learning_rate": 1.253627791078321e-06, + "loss": 0.0747, + "num_input_tokens_seen": 8654144, + "step": 12830 + }, + { + "epoch": 0.31356118535167227, + "grad_norm": 36.40958786010742, + "learning_rate": 1.2541163824693409e-06, + "loss": 0.0731, + "num_input_tokens_seen": 8657792, + "step": 12835 + }, + { + "epoch": 0.3136833361835194, + "grad_norm": 55.101200103759766, + "learning_rate": 1.2546049738603605e-06, + "loss": 0.1482, + "num_input_tokens_seen": 8661056, + "step": 12840 + }, + { + "epoch": 0.3138054870153666, + "grad_norm": 3.0964555740356445, + "learning_rate": 1.25509356525138e-06, + "loss": 0.0934, + "num_input_tokens_seen": 8664448, + "step": 12845 + }, + { + "epoch": 0.3139276378472137, + "grad_norm": 33.852081298828125, + "learning_rate": 1.2555821566424e-06, + "loss": 0.2028, + "num_input_tokens_seen": 8667840, + "step": 12850 + }, + { + "epoch": 0.3140497886790609, + "grad_norm": 3.9148716926574707, + "learning_rate": 1.2560707480334196e-06, + "loss": 0.1502, + "num_input_tokens_seen": 8671744, + "step": 12855 + }, + { + "epoch": 0.31417193951090805, + "grad_norm": 0.09759137779474258, + "learning_rate": 1.2565593394244393e-06, + "loss": 0.0424, + "num_input_tokens_seen": 8675200, + "step": 12860 + }, + { + "epoch": 0.31429409034275524, + "grad_norm": 7.122541904449463, + "learning_rate": 1.257047930815459e-06, + "loss": 0.0822, + "num_input_tokens_seen": 8678464, + "step": 12865 + }, + { + "epoch": 0.3144162411746024, + "grad_norm": 0.599284827709198, + "learning_rate": 1.2575365222064786e-06, + "loss": 0.128, + "num_input_tokens_seen": 8681600, + "step": 12870 + }, + { + "epoch": 0.31453839200644956, + "grad_norm": 19.219327926635742, + "learning_rate": 1.2580251135974983e-06, + "loss": 0.1034, + "num_input_tokens_seen": 8685120, + "step": 12875 + }, + { + "epoch": 0.31466054283829675, + "grad_norm": 26.379104614257812, + "learning_rate": 1.258513704988518e-06, + "loss": 0.2057, + "num_input_tokens_seen": 8688448, + "step": 12880 + }, + { + "epoch": 0.3147826936701439, + "grad_norm": 34.90458297729492, + "learning_rate": 1.2590022963795377e-06, + "loss": 0.1319, + "num_input_tokens_seen": 8691584, + "step": 12885 + }, + { + "epoch": 0.3149048445019911, + "grad_norm": 3.304243564605713, + "learning_rate": 1.2594908877705574e-06, + "loss": 0.0822, + "num_input_tokens_seen": 8695104, + "step": 12890 + }, + { + "epoch": 0.3150269953338382, + "grad_norm": 0.8440029621124268, + "learning_rate": 1.2599794791615773e-06, + "loss": 0.1366, + "num_input_tokens_seen": 8698944, + "step": 12895 + }, + { + "epoch": 0.3151491461656854, + "grad_norm": 10.932198524475098, + "learning_rate": 1.2604680705525967e-06, + "loss": 0.1846, + "num_input_tokens_seen": 8702784, + "step": 12900 + }, + { + "epoch": 0.31527129699753254, + "grad_norm": 11.124720573425293, + "learning_rate": 1.2609566619436164e-06, + "loss": 0.0956, + "num_input_tokens_seen": 8706560, + "step": 12905 + }, + { + "epoch": 0.3153934478293797, + "grad_norm": 25.000829696655273, + "learning_rate": 1.2614452533346363e-06, + "loss": 0.1179, + "num_input_tokens_seen": 8709632, + "step": 12910 + }, + { + "epoch": 0.31551559866122686, + "grad_norm": 31.354000091552734, + "learning_rate": 1.2619338447256558e-06, + "loss": 0.1559, + "num_input_tokens_seen": 8712960, + "step": 12915 + }, + { + "epoch": 0.31563774949307405, + "grad_norm": 3.164998769760132, + "learning_rate": 1.2624224361166754e-06, + "loss": 0.1198, + "num_input_tokens_seen": 8716928, + "step": 12920 + }, + { + "epoch": 0.31575990032492124, + "grad_norm": 8.413860321044922, + "learning_rate": 1.2629110275076953e-06, + "loss": 0.1578, + "num_input_tokens_seen": 8720320, + "step": 12925 + }, + { + "epoch": 0.3158820511567684, + "grad_norm": 2.5737593173980713, + "learning_rate": 1.263399618898715e-06, + "loss": 0.0998, + "num_input_tokens_seen": 8723776, + "step": 12930 + }, + { + "epoch": 0.31600420198861556, + "grad_norm": 56.24729537963867, + "learning_rate": 1.2638882102897345e-06, + "loss": 0.1375, + "num_input_tokens_seen": 8727424, + "step": 12935 + }, + { + "epoch": 0.3161263528204627, + "grad_norm": 22.61892318725586, + "learning_rate": 1.2643768016807544e-06, + "loss": 0.1309, + "num_input_tokens_seen": 8730432, + "step": 12940 + }, + { + "epoch": 0.3162485036523099, + "grad_norm": 31.44400405883789, + "learning_rate": 1.264865393071774e-06, + "loss": 0.1737, + "num_input_tokens_seen": 8733952, + "step": 12945 + }, + { + "epoch": 0.316370654484157, + "grad_norm": 16.67905616760254, + "learning_rate": 1.2653539844627935e-06, + "loss": 0.0793, + "num_input_tokens_seen": 8736960, + "step": 12950 + }, + { + "epoch": 0.3164928053160042, + "grad_norm": 10.576042175292969, + "learning_rate": 1.2658425758538134e-06, + "loss": 0.075, + "num_input_tokens_seen": 8740160, + "step": 12955 + }, + { + "epoch": 0.31661495614785135, + "grad_norm": 9.4652681350708, + "learning_rate": 1.2663311672448331e-06, + "loss": 0.1446, + "num_input_tokens_seen": 8743744, + "step": 12960 + }, + { + "epoch": 0.31673710697969854, + "grad_norm": 10.283336639404297, + "learning_rate": 1.2668197586358528e-06, + "loss": 0.0679, + "num_input_tokens_seen": 8746880, + "step": 12965 + }, + { + "epoch": 0.31685925781154567, + "grad_norm": 7.207754611968994, + "learning_rate": 1.2673083500268725e-06, + "loss": 0.117, + "num_input_tokens_seen": 8750272, + "step": 12970 + }, + { + "epoch": 0.31698140864339286, + "grad_norm": 1.7486845254898071, + "learning_rate": 1.2677969414178922e-06, + "loss": 0.1294, + "num_input_tokens_seen": 8753728, + "step": 12975 + }, + { + "epoch": 0.31710355947524005, + "grad_norm": 7.140497207641602, + "learning_rate": 1.2682855328089118e-06, + "loss": 0.1858, + "num_input_tokens_seen": 8757248, + "step": 12980 + }, + { + "epoch": 0.3172257103070872, + "grad_norm": 15.590055465698242, + "learning_rate": 1.2687741241999315e-06, + "loss": 0.1265, + "num_input_tokens_seen": 8760512, + "step": 12985 + }, + { + "epoch": 0.3173478611389344, + "grad_norm": 12.499231338500977, + "learning_rate": 1.2692627155909512e-06, + "loss": 0.1477, + "num_input_tokens_seen": 8763584, + "step": 12990 + }, + { + "epoch": 0.3174700119707815, + "grad_norm": 1.9817485809326172, + "learning_rate": 1.2697513069819709e-06, + "loss": 0.045, + "num_input_tokens_seen": 8766912, + "step": 12995 + }, + { + "epoch": 0.3175921628026287, + "grad_norm": 34.18552017211914, + "learning_rate": 1.2702398983729906e-06, + "loss": 0.0796, + "num_input_tokens_seen": 8770624, + "step": 13000 + }, + { + "epoch": 0.31771431363447583, + "grad_norm": 5.509322643280029, + "learning_rate": 1.2707284897640102e-06, + "loss": 0.067, + "num_input_tokens_seen": 8773696, + "step": 13005 + }, + { + "epoch": 0.317836464466323, + "grad_norm": 11.841221809387207, + "learning_rate": 1.27121708115503e-06, + "loss": 0.254, + "num_input_tokens_seen": 8777280, + "step": 13010 + }, + { + "epoch": 0.31795861529817016, + "grad_norm": 20.776254653930664, + "learning_rate": 1.2717056725460498e-06, + "loss": 0.1108, + "num_input_tokens_seen": 8780224, + "step": 13015 + }, + { + "epoch": 0.31808076613001735, + "grad_norm": 0.46001580357551575, + "learning_rate": 1.2721942639370693e-06, + "loss": 0.138, + "num_input_tokens_seen": 8783552, + "step": 13020 + }, + { + "epoch": 0.31820291696186453, + "grad_norm": 5.034506797790527, + "learning_rate": 1.272682855328089e-06, + "loss": 0.1391, + "num_input_tokens_seen": 8786816, + "step": 13025 + }, + { + "epoch": 0.31832506779371167, + "grad_norm": 8.84393310546875, + "learning_rate": 1.2731714467191089e-06, + "loss": 0.0396, + "num_input_tokens_seen": 8790016, + "step": 13030 + }, + { + "epoch": 0.31844721862555886, + "grad_norm": 23.799724578857422, + "learning_rate": 1.2736600381101283e-06, + "loss": 0.1898, + "num_input_tokens_seen": 8793216, + "step": 13035 + }, + { + "epoch": 0.318569369457406, + "grad_norm": 33.30390930175781, + "learning_rate": 1.274148629501148e-06, + "loss": 0.0986, + "num_input_tokens_seen": 8796352, + "step": 13040 + }, + { + "epoch": 0.3186915202892532, + "grad_norm": 19.92257308959961, + "learning_rate": 1.274637220892168e-06, + "loss": 0.0983, + "num_input_tokens_seen": 8799360, + "step": 13045 + }, + { + "epoch": 0.3188136711211003, + "grad_norm": 0.6676555871963501, + "learning_rate": 1.2751258122831876e-06, + "loss": 0.0892, + "num_input_tokens_seen": 8802304, + "step": 13050 + }, + { + "epoch": 0.3189358219529475, + "grad_norm": 25.848312377929688, + "learning_rate": 1.275614403674207e-06, + "loss": 0.1344, + "num_input_tokens_seen": 8805312, + "step": 13055 + }, + { + "epoch": 0.31905797278479464, + "grad_norm": 13.977712631225586, + "learning_rate": 1.276102995065227e-06, + "loss": 0.1088, + "num_input_tokens_seen": 8808256, + "step": 13060 + }, + { + "epoch": 0.31918012361664183, + "grad_norm": 7.164159774780273, + "learning_rate": 1.2765915864562466e-06, + "loss": 0.0799, + "num_input_tokens_seen": 8811648, + "step": 13065 + }, + { + "epoch": 0.319302274448489, + "grad_norm": 34.81647491455078, + "learning_rate": 1.277080177847266e-06, + "loss": 0.079, + "num_input_tokens_seen": 8814784, + "step": 13070 + }, + { + "epoch": 0.31942442528033616, + "grad_norm": 38.616493225097656, + "learning_rate": 1.277568769238286e-06, + "loss": 0.0918, + "num_input_tokens_seen": 8817920, + "step": 13075 + }, + { + "epoch": 0.31954657611218334, + "grad_norm": 13.898828506469727, + "learning_rate": 1.2780573606293057e-06, + "loss": 0.0483, + "num_input_tokens_seen": 8821248, + "step": 13080 + }, + { + "epoch": 0.3196687269440305, + "grad_norm": 35.597225189208984, + "learning_rate": 1.2785459520203254e-06, + "loss": 0.0595, + "num_input_tokens_seen": 8824704, + "step": 13085 + }, + { + "epoch": 0.31979087777587767, + "grad_norm": 39.44450759887695, + "learning_rate": 1.279034543411345e-06, + "loss": 0.1064, + "num_input_tokens_seen": 8828032, + "step": 13090 + }, + { + "epoch": 0.3199130286077248, + "grad_norm": 3.0465939044952393, + "learning_rate": 1.2795231348023647e-06, + "loss": 0.1114, + "num_input_tokens_seen": 8831040, + "step": 13095 + }, + { + "epoch": 0.320035179439572, + "grad_norm": 10.89338207244873, + "learning_rate": 1.2800117261933844e-06, + "loss": 0.3178, + "num_input_tokens_seen": 8834240, + "step": 13100 + }, + { + "epoch": 0.3201573302714191, + "grad_norm": 40.36931610107422, + "learning_rate": 1.280500317584404e-06, + "loss": 0.1605, + "num_input_tokens_seen": 8837696, + "step": 13105 + }, + { + "epoch": 0.3202794811032663, + "grad_norm": 32.20518493652344, + "learning_rate": 1.2809889089754238e-06, + "loss": 0.1192, + "num_input_tokens_seen": 8840960, + "step": 13110 + }, + { + "epoch": 0.32040163193511345, + "grad_norm": 17.135501861572266, + "learning_rate": 1.2814775003664434e-06, + "loss": 0.0765, + "num_input_tokens_seen": 8844032, + "step": 13115 + }, + { + "epoch": 0.32052378276696064, + "grad_norm": 3.2122209072113037, + "learning_rate": 1.2819660917574633e-06, + "loss": 0.0768, + "num_input_tokens_seen": 8847424, + "step": 13120 + }, + { + "epoch": 0.32064593359880783, + "grad_norm": 41.46558380126953, + "learning_rate": 1.2824546831484828e-06, + "loss": 0.142, + "num_input_tokens_seen": 8850560, + "step": 13125 + }, + { + "epoch": 0.32076808443065497, + "grad_norm": 20.672643661499023, + "learning_rate": 1.2829432745395025e-06, + "loss": 0.0831, + "num_input_tokens_seen": 8854016, + "step": 13130 + }, + { + "epoch": 0.32089023526250215, + "grad_norm": 0.6543686389923096, + "learning_rate": 1.2834318659305224e-06, + "loss": 0.1633, + "num_input_tokens_seen": 8857280, + "step": 13135 + }, + { + "epoch": 0.3210123860943493, + "grad_norm": 23.34600067138672, + "learning_rate": 1.2839204573215418e-06, + "loss": 0.0974, + "num_input_tokens_seen": 8860672, + "step": 13140 + }, + { + "epoch": 0.3211345369261965, + "grad_norm": 24.48014259338379, + "learning_rate": 1.2844090487125615e-06, + "loss": 0.0883, + "num_input_tokens_seen": 8863808, + "step": 13145 + }, + { + "epoch": 0.3212566877580436, + "grad_norm": 37.018714904785156, + "learning_rate": 1.2848976401035814e-06, + "loss": 0.2072, + "num_input_tokens_seen": 8867392, + "step": 13150 + }, + { + "epoch": 0.3213788385898908, + "grad_norm": 28.14137077331543, + "learning_rate": 1.2853862314946009e-06, + "loss": 0.1008, + "num_input_tokens_seen": 8870592, + "step": 13155 + }, + { + "epoch": 0.32150098942173794, + "grad_norm": 19.779029846191406, + "learning_rate": 1.2858748228856206e-06, + "loss": 0.087, + "num_input_tokens_seen": 8873856, + "step": 13160 + }, + { + "epoch": 0.3216231402535851, + "grad_norm": 0.5094308257102966, + "learning_rate": 1.2863634142766405e-06, + "loss": 0.1631, + "num_input_tokens_seen": 8877056, + "step": 13165 + }, + { + "epoch": 0.3217452910854323, + "grad_norm": 21.85708236694336, + "learning_rate": 1.2868520056676601e-06, + "loss": 0.1666, + "num_input_tokens_seen": 8880320, + "step": 13170 + }, + { + "epoch": 0.32186744191727945, + "grad_norm": 11.574304580688477, + "learning_rate": 1.2873405970586796e-06, + "loss": 0.1232, + "num_input_tokens_seen": 8883520, + "step": 13175 + }, + { + "epoch": 0.32198959274912664, + "grad_norm": 0.3177202045917511, + "learning_rate": 1.2878291884496995e-06, + "loss": 0.0721, + "num_input_tokens_seen": 8886464, + "step": 13180 + }, + { + "epoch": 0.3221117435809738, + "grad_norm": 0.4103322923183441, + "learning_rate": 1.2883177798407192e-06, + "loss": 0.1252, + "num_input_tokens_seen": 8889664, + "step": 13185 + }, + { + "epoch": 0.32223389441282096, + "grad_norm": 40.97935485839844, + "learning_rate": 1.2888063712317387e-06, + "loss": 0.0993, + "num_input_tokens_seen": 8892736, + "step": 13190 + }, + { + "epoch": 0.3223560452446681, + "grad_norm": 24.75423240661621, + "learning_rate": 1.2892949626227586e-06, + "loss": 0.1207, + "num_input_tokens_seen": 8895936, + "step": 13195 + }, + { + "epoch": 0.3224781960765153, + "grad_norm": 31.197431564331055, + "learning_rate": 1.2897835540137782e-06, + "loss": 0.1758, + "num_input_tokens_seen": 8899264, + "step": 13200 + }, + { + "epoch": 0.3226003469083624, + "grad_norm": 6.995538234710693, + "learning_rate": 1.290272145404798e-06, + "loss": 0.1376, + "num_input_tokens_seen": 8902528, + "step": 13205 + }, + { + "epoch": 0.3227224977402096, + "grad_norm": 4.579649448394775, + "learning_rate": 1.2907607367958176e-06, + "loss": 0.0735, + "num_input_tokens_seen": 8905792, + "step": 13210 + }, + { + "epoch": 0.3228446485720568, + "grad_norm": 26.562294006347656, + "learning_rate": 1.2912493281868373e-06, + "loss": 0.1756, + "num_input_tokens_seen": 8908928, + "step": 13215 + }, + { + "epoch": 0.32296679940390394, + "grad_norm": 46.12147903442383, + "learning_rate": 1.291737919577857e-06, + "loss": 0.2033, + "num_input_tokens_seen": 8912128, + "step": 13220 + }, + { + "epoch": 0.3230889502357511, + "grad_norm": 7.420219898223877, + "learning_rate": 1.2922265109688766e-06, + "loss": 0.0812, + "num_input_tokens_seen": 8915136, + "step": 13225 + }, + { + "epoch": 0.32321110106759826, + "grad_norm": 29.414127349853516, + "learning_rate": 1.2927151023598963e-06, + "loss": 0.1386, + "num_input_tokens_seen": 8918400, + "step": 13230 + }, + { + "epoch": 0.32333325189944545, + "grad_norm": 38.283241271972656, + "learning_rate": 1.293203693750916e-06, + "loss": 0.1314, + "num_input_tokens_seen": 8921280, + "step": 13235 + }, + { + "epoch": 0.3234554027312926, + "grad_norm": 46.68159103393555, + "learning_rate": 1.293692285141936e-06, + "loss": 0.1797, + "num_input_tokens_seen": 8924288, + "step": 13240 + }, + { + "epoch": 0.3235775535631398, + "grad_norm": 12.005806922912598, + "learning_rate": 1.2941808765329554e-06, + "loss": 0.1386, + "num_input_tokens_seen": 8928192, + "step": 13245 + }, + { + "epoch": 0.3236997043949869, + "grad_norm": 28.0427188873291, + "learning_rate": 1.294669467923975e-06, + "loss": 0.068, + "num_input_tokens_seen": 8931968, + "step": 13250 + }, + { + "epoch": 0.3238218552268341, + "grad_norm": 33.40077209472656, + "learning_rate": 1.295158059314995e-06, + "loss": 0.2441, + "num_input_tokens_seen": 8935168, + "step": 13255 + }, + { + "epoch": 0.32394400605868123, + "grad_norm": 29.742095947265625, + "learning_rate": 1.2956466507060144e-06, + "loss": 0.1352, + "num_input_tokens_seen": 8938240, + "step": 13260 + }, + { + "epoch": 0.3240661568905284, + "grad_norm": 23.64820098876953, + "learning_rate": 1.296135242097034e-06, + "loss": 0.1099, + "num_input_tokens_seen": 8941696, + "step": 13265 + }, + { + "epoch": 0.3241883077223756, + "grad_norm": 17.8576717376709, + "learning_rate": 1.296623833488054e-06, + "loss": 0.0451, + "num_input_tokens_seen": 8945408, + "step": 13270 + }, + { + "epoch": 0.32431045855422275, + "grad_norm": 11.507369995117188, + "learning_rate": 1.2971124248790735e-06, + "loss": 0.1093, + "num_input_tokens_seen": 8948288, + "step": 13275 + }, + { + "epoch": 0.32443260938606994, + "grad_norm": 7.504124164581299, + "learning_rate": 1.2976010162700931e-06, + "loss": 0.0524, + "num_input_tokens_seen": 8951808, + "step": 13280 + }, + { + "epoch": 0.32455476021791707, + "grad_norm": 17.95069122314453, + "learning_rate": 1.298089607661113e-06, + "loss": 0.0399, + "num_input_tokens_seen": 8955456, + "step": 13285 + }, + { + "epoch": 0.32467691104976426, + "grad_norm": 31.09222984313965, + "learning_rate": 1.2985781990521327e-06, + "loss": 0.1968, + "num_input_tokens_seen": 8958720, + "step": 13290 + }, + { + "epoch": 0.3247990618816114, + "grad_norm": 13.580461502075195, + "learning_rate": 1.2990667904431522e-06, + "loss": 0.1475, + "num_input_tokens_seen": 8961984, + "step": 13295 + }, + { + "epoch": 0.3249212127134586, + "grad_norm": 36.72895050048828, + "learning_rate": 1.299555381834172e-06, + "loss": 0.1347, + "num_input_tokens_seen": 8965888, + "step": 13300 + }, + { + "epoch": 0.3250433635453057, + "grad_norm": 29.59313201904297, + "learning_rate": 1.3000439732251918e-06, + "loss": 0.2356, + "num_input_tokens_seen": 8969152, + "step": 13305 + }, + { + "epoch": 0.3251655143771529, + "grad_norm": 28.670122146606445, + "learning_rate": 1.3005325646162112e-06, + "loss": 0.2769, + "num_input_tokens_seen": 8972224, + "step": 13310 + }, + { + "epoch": 0.3252876652090001, + "grad_norm": 9.596991539001465, + "learning_rate": 1.3010211560072311e-06, + "loss": 0.1549, + "num_input_tokens_seen": 8975360, + "step": 13315 + }, + { + "epoch": 0.32540981604084723, + "grad_norm": 4.092257976531982, + "learning_rate": 1.3015097473982508e-06, + "loss": 0.0598, + "num_input_tokens_seen": 8978560, + "step": 13320 + }, + { + "epoch": 0.3255319668726944, + "grad_norm": 22.807069778442383, + "learning_rate": 1.3019983387892705e-06, + "loss": 0.1031, + "num_input_tokens_seen": 8981504, + "step": 13325 + }, + { + "epoch": 0.32565411770454156, + "grad_norm": 14.356558799743652, + "learning_rate": 1.3024869301802902e-06, + "loss": 0.1894, + "num_input_tokens_seen": 8984640, + "step": 13330 + }, + { + "epoch": 0.32577626853638875, + "grad_norm": 3.0045337677001953, + "learning_rate": 1.3029755215713098e-06, + "loss": 0.0675, + "num_input_tokens_seen": 8987648, + "step": 13335 + }, + { + "epoch": 0.3258984193682359, + "grad_norm": 1.4146621227264404, + "learning_rate": 1.3034641129623295e-06, + "loss": 0.0484, + "num_input_tokens_seen": 8991232, + "step": 13340 + }, + { + "epoch": 0.32602057020008307, + "grad_norm": 8.540959358215332, + "learning_rate": 1.3039527043533492e-06, + "loss": 0.0963, + "num_input_tokens_seen": 8994432, + "step": 13345 + }, + { + "epoch": 0.3261427210319302, + "grad_norm": 49.685516357421875, + "learning_rate": 1.3044412957443689e-06, + "loss": 0.173, + "num_input_tokens_seen": 8998080, + "step": 13350 + }, + { + "epoch": 0.3262648718637774, + "grad_norm": 42.36631774902344, + "learning_rate": 1.3049298871353886e-06, + "loss": 0.1508, + "num_input_tokens_seen": 9001344, + "step": 13355 + }, + { + "epoch": 0.3263870226956246, + "grad_norm": 12.289406776428223, + "learning_rate": 1.3054184785264085e-06, + "loss": 0.1075, + "num_input_tokens_seen": 9004928, + "step": 13360 + }, + { + "epoch": 0.3265091735274717, + "grad_norm": 55.62197494506836, + "learning_rate": 1.305907069917428e-06, + "loss": 0.1445, + "num_input_tokens_seen": 9007936, + "step": 13365 + }, + { + "epoch": 0.3266313243593189, + "grad_norm": 25.463848114013672, + "learning_rate": 1.3063956613084476e-06, + "loss": 0.0846, + "num_input_tokens_seen": 9011264, + "step": 13370 + }, + { + "epoch": 0.32675347519116604, + "grad_norm": 44.88855743408203, + "learning_rate": 1.3068842526994675e-06, + "loss": 0.1817, + "num_input_tokens_seen": 9014528, + "step": 13375 + }, + { + "epoch": 0.32687562602301323, + "grad_norm": 25.74319839477539, + "learning_rate": 1.307372844090487e-06, + "loss": 0.1482, + "num_input_tokens_seen": 9017536, + "step": 13380 + }, + { + "epoch": 0.32699777685486037, + "grad_norm": 25.14618682861328, + "learning_rate": 1.3078614354815067e-06, + "loss": 0.1093, + "num_input_tokens_seen": 9020480, + "step": 13385 + }, + { + "epoch": 0.32711992768670756, + "grad_norm": 14.147246360778809, + "learning_rate": 1.3083500268725265e-06, + "loss": 0.0675, + "num_input_tokens_seen": 9023808, + "step": 13390 + }, + { + "epoch": 0.3272420785185547, + "grad_norm": 3.24316668510437, + "learning_rate": 1.3088386182635462e-06, + "loss": 0.2227, + "num_input_tokens_seen": 9027200, + "step": 13395 + }, + { + "epoch": 0.3273642293504019, + "grad_norm": 36.026851654052734, + "learning_rate": 1.3093272096545657e-06, + "loss": 0.132, + "num_input_tokens_seen": 9030400, + "step": 13400 + }, + { + "epoch": 0.327486380182249, + "grad_norm": 0.8746715188026428, + "learning_rate": 1.3098158010455856e-06, + "loss": 0.0713, + "num_input_tokens_seen": 9033920, + "step": 13405 + }, + { + "epoch": 0.3276085310140962, + "grad_norm": 11.483881950378418, + "learning_rate": 1.3103043924366053e-06, + "loss": 0.1003, + "num_input_tokens_seen": 9037504, + "step": 13410 + }, + { + "epoch": 0.3277306818459434, + "grad_norm": 31.39374542236328, + "learning_rate": 1.3107929838276247e-06, + "loss": 0.1388, + "num_input_tokens_seen": 9040384, + "step": 13415 + }, + { + "epoch": 0.32785283267779053, + "grad_norm": 0.4166433811187744, + "learning_rate": 1.3112815752186446e-06, + "loss": 0.0525, + "num_input_tokens_seen": 9043712, + "step": 13420 + }, + { + "epoch": 0.3279749835096377, + "grad_norm": 24.85698890686035, + "learning_rate": 1.3117701666096643e-06, + "loss": 0.2262, + "num_input_tokens_seen": 9046336, + "step": 13425 + }, + { + "epoch": 0.32809713434148485, + "grad_norm": 1.9511191844940186, + "learning_rate": 1.3122587580006838e-06, + "loss": 0.1109, + "num_input_tokens_seen": 9049536, + "step": 13430 + }, + { + "epoch": 0.32821928517333204, + "grad_norm": 2.2473626136779785, + "learning_rate": 1.3127473493917037e-06, + "loss": 0.0949, + "num_input_tokens_seen": 9052928, + "step": 13435 + }, + { + "epoch": 0.3283414360051792, + "grad_norm": 17.47495460510254, + "learning_rate": 1.3132359407827234e-06, + "loss": 0.1478, + "num_input_tokens_seen": 9056384, + "step": 13440 + }, + { + "epoch": 0.32846358683702637, + "grad_norm": 8.121326446533203, + "learning_rate": 1.313724532173743e-06, + "loss": 0.0471, + "num_input_tokens_seen": 9059840, + "step": 13445 + }, + { + "epoch": 0.3285857376688735, + "grad_norm": 13.05697250366211, + "learning_rate": 1.3142131235647627e-06, + "loss": 0.1651, + "num_input_tokens_seen": 9063680, + "step": 13450 + }, + { + "epoch": 0.3287078885007207, + "grad_norm": 16.482269287109375, + "learning_rate": 1.3147017149557824e-06, + "loss": 0.0945, + "num_input_tokens_seen": 9067520, + "step": 13455 + }, + { + "epoch": 0.3288300393325679, + "grad_norm": 0.4536917805671692, + "learning_rate": 1.315190306346802e-06, + "loss": 0.072, + "num_input_tokens_seen": 9070656, + "step": 13460 + }, + { + "epoch": 0.328952190164415, + "grad_norm": 27.94501304626465, + "learning_rate": 1.3156788977378218e-06, + "loss": 0.1405, + "num_input_tokens_seen": 9074240, + "step": 13465 + }, + { + "epoch": 0.3290743409962622, + "grad_norm": 25.520456314086914, + "learning_rate": 1.3161674891288414e-06, + "loss": 0.0691, + "num_input_tokens_seen": 9077568, + "step": 13470 + }, + { + "epoch": 0.32919649182810934, + "grad_norm": 0.49751996994018555, + "learning_rate": 1.3166560805198611e-06, + "loss": 0.0812, + "num_input_tokens_seen": 9081024, + "step": 13475 + }, + { + "epoch": 0.32931864265995653, + "grad_norm": 18.828784942626953, + "learning_rate": 1.317144671910881e-06, + "loss": 0.0681, + "num_input_tokens_seen": 9084288, + "step": 13480 + }, + { + "epoch": 0.32944079349180366, + "grad_norm": 3.951866388320923, + "learning_rate": 1.3176332633019005e-06, + "loss": 0.0192, + "num_input_tokens_seen": 9087424, + "step": 13485 + }, + { + "epoch": 0.32956294432365085, + "grad_norm": 2.443051338195801, + "learning_rate": 1.3181218546929202e-06, + "loss": 0.1937, + "num_input_tokens_seen": 9090688, + "step": 13490 + }, + { + "epoch": 0.329685095155498, + "grad_norm": 48.771732330322266, + "learning_rate": 1.31861044608394e-06, + "loss": 0.1272, + "num_input_tokens_seen": 9093824, + "step": 13495 + }, + { + "epoch": 0.3298072459873452, + "grad_norm": 39.788150787353516, + "learning_rate": 1.3190990374749595e-06, + "loss": 0.1441, + "num_input_tokens_seen": 9097088, + "step": 13500 + }, + { + "epoch": 0.3299293968191923, + "grad_norm": 22.174091339111328, + "learning_rate": 1.3195876288659792e-06, + "loss": 0.163, + "num_input_tokens_seen": 9100736, + "step": 13505 + }, + { + "epoch": 0.3300515476510395, + "grad_norm": 29.268009185791016, + "learning_rate": 1.3200762202569991e-06, + "loss": 0.1131, + "num_input_tokens_seen": 9104192, + "step": 13510 + }, + { + "epoch": 0.3301736984828867, + "grad_norm": 0.8035185933113098, + "learning_rate": 1.3205648116480188e-06, + "loss": 0.0136, + "num_input_tokens_seen": 9107584, + "step": 13515 + }, + { + "epoch": 0.3302958493147338, + "grad_norm": 11.216259002685547, + "learning_rate": 1.3210534030390383e-06, + "loss": 0.0706, + "num_input_tokens_seen": 9110784, + "step": 13520 + }, + { + "epoch": 0.330418000146581, + "grad_norm": 1.113582968711853, + "learning_rate": 1.3215419944300582e-06, + "loss": 0.1026, + "num_input_tokens_seen": 9114240, + "step": 13525 + }, + { + "epoch": 0.33054015097842815, + "grad_norm": 15.959999084472656, + "learning_rate": 1.3220305858210778e-06, + "loss": 0.1298, + "num_input_tokens_seen": 9117568, + "step": 13530 + }, + { + "epoch": 0.33066230181027534, + "grad_norm": 18.487342834472656, + "learning_rate": 1.3225191772120973e-06, + "loss": 0.0658, + "num_input_tokens_seen": 9121152, + "step": 13535 + }, + { + "epoch": 0.3307844526421225, + "grad_norm": 34.5030632019043, + "learning_rate": 1.3230077686031172e-06, + "loss": 0.1518, + "num_input_tokens_seen": 9124352, + "step": 13540 + }, + { + "epoch": 0.33090660347396966, + "grad_norm": 80.67737579345703, + "learning_rate": 1.3234963599941369e-06, + "loss": 0.1158, + "num_input_tokens_seen": 9127488, + "step": 13545 + }, + { + "epoch": 0.3310287543058168, + "grad_norm": 35.42460250854492, + "learning_rate": 1.3239849513851564e-06, + "loss": 0.0693, + "num_input_tokens_seen": 9131584, + "step": 13550 + }, + { + "epoch": 0.331150905137664, + "grad_norm": 12.657451629638672, + "learning_rate": 1.3244735427761762e-06, + "loss": 0.1466, + "num_input_tokens_seen": 9135104, + "step": 13555 + }, + { + "epoch": 0.3312730559695112, + "grad_norm": 36.274234771728516, + "learning_rate": 1.324962134167196e-06, + "loss": 0.2083, + "num_input_tokens_seen": 9138880, + "step": 13560 + }, + { + "epoch": 0.3313952068013583, + "grad_norm": 69.3318862915039, + "learning_rate": 1.3254507255582156e-06, + "loss": 0.087, + "num_input_tokens_seen": 9141952, + "step": 13565 + }, + { + "epoch": 0.3315173576332055, + "grad_norm": 37.832054138183594, + "learning_rate": 1.3259393169492353e-06, + "loss": 0.2453, + "num_input_tokens_seen": 9145024, + "step": 13570 + }, + { + "epoch": 0.33163950846505263, + "grad_norm": 0.09780533611774445, + "learning_rate": 1.326427908340255e-06, + "loss": 0.2447, + "num_input_tokens_seen": 9148096, + "step": 13575 + }, + { + "epoch": 0.3317616592968998, + "grad_norm": 0.24856971204280853, + "learning_rate": 1.3269164997312747e-06, + "loss": 0.1698, + "num_input_tokens_seen": 9151296, + "step": 13580 + }, + { + "epoch": 0.33188381012874696, + "grad_norm": 18.92795181274414, + "learning_rate": 1.3274050911222943e-06, + "loss": 0.1941, + "num_input_tokens_seen": 9154560, + "step": 13585 + }, + { + "epoch": 0.33200596096059415, + "grad_norm": 43.682830810546875, + "learning_rate": 1.327893682513314e-06, + "loss": 0.1002, + "num_input_tokens_seen": 9157888, + "step": 13590 + }, + { + "epoch": 0.3321281117924413, + "grad_norm": 1.1924934387207031, + "learning_rate": 1.3283822739043337e-06, + "loss": 0.1172, + "num_input_tokens_seen": 9161472, + "step": 13595 + }, + { + "epoch": 0.3322502626242885, + "grad_norm": 0.16776405274868011, + "learning_rate": 1.3288708652953536e-06, + "loss": 0.0366, + "num_input_tokens_seen": 9164864, + "step": 13600 + }, + { + "epoch": 0.33237241345613566, + "grad_norm": 16.99744415283203, + "learning_rate": 1.329359456686373e-06, + "loss": 0.073, + "num_input_tokens_seen": 9168576, + "step": 13605 + }, + { + "epoch": 0.3324945642879828, + "grad_norm": 0.9078207015991211, + "learning_rate": 1.3298480480773927e-06, + "loss": 0.2594, + "num_input_tokens_seen": 9171648, + "step": 13610 + }, + { + "epoch": 0.33261671511983, + "grad_norm": 45.5037841796875, + "learning_rate": 1.3303366394684126e-06, + "loss": 0.2359, + "num_input_tokens_seen": 9174848, + "step": 13615 + }, + { + "epoch": 0.3327388659516771, + "grad_norm": 43.5709342956543, + "learning_rate": 1.330825230859432e-06, + "loss": 0.187, + "num_input_tokens_seen": 9177984, + "step": 13620 + }, + { + "epoch": 0.3328610167835243, + "grad_norm": 19.13081932067871, + "learning_rate": 1.3313138222504518e-06, + "loss": 0.1991, + "num_input_tokens_seen": 9181440, + "step": 13625 + }, + { + "epoch": 0.33298316761537144, + "grad_norm": 59.84111404418945, + "learning_rate": 1.3318024136414717e-06, + "loss": 0.1258, + "num_input_tokens_seen": 9184704, + "step": 13630 + }, + { + "epoch": 0.33310531844721863, + "grad_norm": 25.472261428833008, + "learning_rate": 1.3322910050324914e-06, + "loss": 0.1824, + "num_input_tokens_seen": 9187904, + "step": 13635 + }, + { + "epoch": 0.33322746927906577, + "grad_norm": 10.703448295593262, + "learning_rate": 1.3327795964235108e-06, + "loss": 0.1674, + "num_input_tokens_seen": 9191168, + "step": 13640 + }, + { + "epoch": 0.33334962011091296, + "grad_norm": 15.315190315246582, + "learning_rate": 1.3332681878145307e-06, + "loss": 0.164, + "num_input_tokens_seen": 9194752, + "step": 13645 + }, + { + "epoch": 0.3334717709427601, + "grad_norm": 15.319101333618164, + "learning_rate": 1.3337567792055504e-06, + "loss": 0.1096, + "num_input_tokens_seen": 9198528, + "step": 13650 + }, + { + "epoch": 0.3335939217746073, + "grad_norm": 13.643003463745117, + "learning_rate": 1.3342453705965699e-06, + "loss": 0.0589, + "num_input_tokens_seen": 9201728, + "step": 13655 + }, + { + "epoch": 0.3337160726064545, + "grad_norm": 15.755820274353027, + "learning_rate": 1.3347339619875898e-06, + "loss": 0.1301, + "num_input_tokens_seen": 9204800, + "step": 13660 + }, + { + "epoch": 0.3338382234383016, + "grad_norm": 20.200284957885742, + "learning_rate": 1.3352225533786094e-06, + "loss": 0.129, + "num_input_tokens_seen": 9208192, + "step": 13665 + }, + { + "epoch": 0.3339603742701488, + "grad_norm": 21.0355281829834, + "learning_rate": 1.3357111447696291e-06, + "loss": 0.1446, + "num_input_tokens_seen": 9211328, + "step": 13670 + }, + { + "epoch": 0.33408252510199593, + "grad_norm": 10.959577560424805, + "learning_rate": 1.3361997361606488e-06, + "loss": 0.1586, + "num_input_tokens_seen": 9214720, + "step": 13675 + }, + { + "epoch": 0.3342046759338431, + "grad_norm": 45.59690475463867, + "learning_rate": 1.3366883275516685e-06, + "loss": 0.1188, + "num_input_tokens_seen": 9217856, + "step": 13680 + }, + { + "epoch": 0.33432682676569025, + "grad_norm": 16.36484146118164, + "learning_rate": 1.3371769189426882e-06, + "loss": 0.0507, + "num_input_tokens_seen": 9221312, + "step": 13685 + }, + { + "epoch": 0.33444897759753744, + "grad_norm": 0.2937169075012207, + "learning_rate": 1.3376655103337079e-06, + "loss": 0.0902, + "num_input_tokens_seen": 9224896, + "step": 13690 + }, + { + "epoch": 0.3345711284293846, + "grad_norm": 13.3306884765625, + "learning_rate": 1.3381541017247275e-06, + "loss": 0.0699, + "num_input_tokens_seen": 9227904, + "step": 13695 + }, + { + "epoch": 0.33469327926123177, + "grad_norm": 1.268502950668335, + "learning_rate": 1.3386426931157472e-06, + "loss": 0.054, + "num_input_tokens_seen": 9231360, + "step": 13700 + }, + { + "epoch": 0.33481543009307896, + "grad_norm": 40.71323776245117, + "learning_rate": 1.339131284506767e-06, + "loss": 0.1826, + "num_input_tokens_seen": 9235072, + "step": 13705 + }, + { + "epoch": 0.3349375809249261, + "grad_norm": 0.662493109703064, + "learning_rate": 1.3396198758977866e-06, + "loss": 0.1498, + "num_input_tokens_seen": 9238464, + "step": 13710 + }, + { + "epoch": 0.3350597317567733, + "grad_norm": 8.303804397583008, + "learning_rate": 1.3401084672888063e-06, + "loss": 0.1819, + "num_input_tokens_seen": 9241920, + "step": 13715 + }, + { + "epoch": 0.3351818825886204, + "grad_norm": 8.60287094116211, + "learning_rate": 1.3405970586798262e-06, + "loss": 0.1203, + "num_input_tokens_seen": 9245056, + "step": 13720 + }, + { + "epoch": 0.3353040334204676, + "grad_norm": 13.480615615844727, + "learning_rate": 1.3410856500708456e-06, + "loss": 0.1672, + "num_input_tokens_seen": 9248832, + "step": 13725 + }, + { + "epoch": 0.33542618425231474, + "grad_norm": 8.956427574157715, + "learning_rate": 1.3415742414618653e-06, + "loss": 0.201, + "num_input_tokens_seen": 9251968, + "step": 13730 + }, + { + "epoch": 0.33554833508416193, + "grad_norm": 9.379366874694824, + "learning_rate": 1.3420628328528852e-06, + "loss": 0.1651, + "num_input_tokens_seen": 9255360, + "step": 13735 + }, + { + "epoch": 0.33567048591600906, + "grad_norm": 12.51583194732666, + "learning_rate": 1.3425514242439047e-06, + "loss": 0.1308, + "num_input_tokens_seen": 9258560, + "step": 13740 + }, + { + "epoch": 0.33579263674785625, + "grad_norm": 28.122478485107422, + "learning_rate": 1.3430400156349243e-06, + "loss": 0.2076, + "num_input_tokens_seen": 9261824, + "step": 13745 + }, + { + "epoch": 0.33591478757970344, + "grad_norm": 7.3910746574401855, + "learning_rate": 1.3435286070259442e-06, + "loss": 0.111, + "num_input_tokens_seen": 9265088, + "step": 13750 + }, + { + "epoch": 0.3360369384115506, + "grad_norm": 14.954057693481445, + "learning_rate": 1.344017198416964e-06, + "loss": 0.0729, + "num_input_tokens_seen": 9268416, + "step": 13755 + }, + { + "epoch": 0.33615908924339777, + "grad_norm": 21.911819458007812, + "learning_rate": 1.3445057898079834e-06, + "loss": 0.1038, + "num_input_tokens_seen": 9271936, + "step": 13760 + }, + { + "epoch": 0.3362812400752449, + "grad_norm": 19.510435104370117, + "learning_rate": 1.3449943811990033e-06, + "loss": 0.18, + "num_input_tokens_seen": 9275392, + "step": 13765 + }, + { + "epoch": 0.3364033909070921, + "grad_norm": 13.656414985656738, + "learning_rate": 1.345482972590023e-06, + "loss": 0.1319, + "num_input_tokens_seen": 9278720, + "step": 13770 + }, + { + "epoch": 0.3365255417389392, + "grad_norm": 4.69479513168335, + "learning_rate": 1.3459715639810424e-06, + "loss": 0.1157, + "num_input_tokens_seen": 9281792, + "step": 13775 + }, + { + "epoch": 0.3366476925707864, + "grad_norm": 21.458940505981445, + "learning_rate": 1.3464601553720623e-06, + "loss": 0.0984, + "num_input_tokens_seen": 9285056, + "step": 13780 + }, + { + "epoch": 0.33676984340263355, + "grad_norm": 1.911492943763733, + "learning_rate": 1.346948746763082e-06, + "loss": 0.0676, + "num_input_tokens_seen": 9288512, + "step": 13785 + }, + { + "epoch": 0.33689199423448074, + "grad_norm": 10.871212005615234, + "learning_rate": 1.3474373381541017e-06, + "loss": 0.1568, + "num_input_tokens_seen": 9291456, + "step": 13790 + }, + { + "epoch": 0.3370141450663279, + "grad_norm": 1.0117473602294922, + "learning_rate": 1.3479259295451214e-06, + "loss": 0.0388, + "num_input_tokens_seen": 9294912, + "step": 13795 + }, + { + "epoch": 0.33713629589817506, + "grad_norm": 0.2364530861377716, + "learning_rate": 1.348414520936141e-06, + "loss": 0.1024, + "num_input_tokens_seen": 9298304, + "step": 13800 + }, + { + "epoch": 0.33725844673002225, + "grad_norm": 20.05010414123535, + "learning_rate": 1.3489031123271607e-06, + "loss": 0.0944, + "num_input_tokens_seen": 9301632, + "step": 13805 + }, + { + "epoch": 0.3373805975618694, + "grad_norm": 20.302865982055664, + "learning_rate": 1.3493917037181804e-06, + "loss": 0.1152, + "num_input_tokens_seen": 9304896, + "step": 13810 + }, + { + "epoch": 0.3375027483937166, + "grad_norm": 0.9043674468994141, + "learning_rate": 1.3498802951092e-06, + "loss": 0.19, + "num_input_tokens_seen": 9308096, + "step": 13815 + }, + { + "epoch": 0.3376248992255637, + "grad_norm": 0.870741069316864, + "learning_rate": 1.3503688865002198e-06, + "loss": 0.0909, + "num_input_tokens_seen": 9311616, + "step": 13820 + }, + { + "epoch": 0.3377470500574109, + "grad_norm": 29.548307418823242, + "learning_rate": 1.3508574778912397e-06, + "loss": 0.2014, + "num_input_tokens_seen": 9314880, + "step": 13825 + }, + { + "epoch": 0.33786920088925804, + "grad_norm": 23.69499397277832, + "learning_rate": 1.3513460692822591e-06, + "loss": 0.1298, + "num_input_tokens_seen": 9319424, + "step": 13830 + }, + { + "epoch": 0.3379913517211052, + "grad_norm": 8.086859703063965, + "learning_rate": 1.3518346606732788e-06, + "loss": 0.1577, + "num_input_tokens_seen": 9322560, + "step": 13835 + }, + { + "epoch": 0.33811350255295236, + "grad_norm": 46.39087677001953, + "learning_rate": 1.3523232520642987e-06, + "loss": 0.1624, + "num_input_tokens_seen": 9326080, + "step": 13840 + }, + { + "epoch": 0.33823565338479955, + "grad_norm": 35.82646179199219, + "learning_rate": 1.3528118434553182e-06, + "loss": 0.2945, + "num_input_tokens_seen": 9328960, + "step": 13845 + }, + { + "epoch": 0.33835780421664674, + "grad_norm": 7.0568766593933105, + "learning_rate": 1.3533004348463379e-06, + "loss": 0.0515, + "num_input_tokens_seen": 9332288, + "step": 13850 + }, + { + "epoch": 0.3384799550484939, + "grad_norm": 1.6458518505096436, + "learning_rate": 1.3537890262373578e-06, + "loss": 0.0948, + "num_input_tokens_seen": 9335424, + "step": 13855 + }, + { + "epoch": 0.33860210588034106, + "grad_norm": 14.4976806640625, + "learning_rate": 1.3542776176283772e-06, + "loss": 0.1205, + "num_input_tokens_seen": 9338624, + "step": 13860 + }, + { + "epoch": 0.3387242567121882, + "grad_norm": 0.4415428340435028, + "learning_rate": 1.354766209019397e-06, + "loss": 0.0804, + "num_input_tokens_seen": 9342016, + "step": 13865 + }, + { + "epoch": 0.3388464075440354, + "grad_norm": 6.9508161544799805, + "learning_rate": 1.3552548004104168e-06, + "loss": 0.0663, + "num_input_tokens_seen": 9345408, + "step": 13870 + }, + { + "epoch": 0.3389685583758825, + "grad_norm": 10.175514221191406, + "learning_rate": 1.3557433918014365e-06, + "loss": 0.2014, + "num_input_tokens_seen": 9348736, + "step": 13875 + }, + { + "epoch": 0.3390907092077297, + "grad_norm": 2.260913848876953, + "learning_rate": 1.356231983192456e-06, + "loss": 0.1201, + "num_input_tokens_seen": 9352064, + "step": 13880 + }, + { + "epoch": 0.33921286003957685, + "grad_norm": 33.67397689819336, + "learning_rate": 1.3567205745834758e-06, + "loss": 0.1617, + "num_input_tokens_seen": 9356032, + "step": 13885 + }, + { + "epoch": 0.33933501087142404, + "grad_norm": 13.100915908813477, + "learning_rate": 1.3572091659744955e-06, + "loss": 0.0969, + "num_input_tokens_seen": 9359808, + "step": 13890 + }, + { + "epoch": 0.3394571617032712, + "grad_norm": 20.006126403808594, + "learning_rate": 1.357697757365515e-06, + "loss": 0.1939, + "num_input_tokens_seen": 9362880, + "step": 13895 + }, + { + "epoch": 0.33957931253511836, + "grad_norm": 24.357524871826172, + "learning_rate": 1.3581863487565349e-06, + "loss": 0.1736, + "num_input_tokens_seen": 9366336, + "step": 13900 + }, + { + "epoch": 0.33970146336696555, + "grad_norm": 2.007045030593872, + "learning_rate": 1.3586749401475546e-06, + "loss": 0.0832, + "num_input_tokens_seen": 9370240, + "step": 13905 + }, + { + "epoch": 0.3398236141988127, + "grad_norm": 38.24091339111328, + "learning_rate": 1.3591635315385743e-06, + "loss": 0.1163, + "num_input_tokens_seen": 9373504, + "step": 13910 + }, + { + "epoch": 0.3399457650306599, + "grad_norm": 17.034334182739258, + "learning_rate": 1.359652122929594e-06, + "loss": 0.1155, + "num_input_tokens_seen": 9376704, + "step": 13915 + }, + { + "epoch": 0.340067915862507, + "grad_norm": 49.082942962646484, + "learning_rate": 1.3601407143206136e-06, + "loss": 0.1426, + "num_input_tokens_seen": 9380096, + "step": 13920 + }, + { + "epoch": 0.3401900666943542, + "grad_norm": 18.470176696777344, + "learning_rate": 1.3606293057116333e-06, + "loss": 0.0888, + "num_input_tokens_seen": 9383936, + "step": 13925 + }, + { + "epoch": 0.34031221752620133, + "grad_norm": 46.603797912597656, + "learning_rate": 1.361117897102653e-06, + "loss": 0.2428, + "num_input_tokens_seen": 9387072, + "step": 13930 + }, + { + "epoch": 0.3404343683580485, + "grad_norm": 36.19292449951172, + "learning_rate": 1.3616064884936727e-06, + "loss": 0.0733, + "num_input_tokens_seen": 9390208, + "step": 13935 + }, + { + "epoch": 0.34055651918989566, + "grad_norm": 33.56175231933594, + "learning_rate": 1.3620950798846923e-06, + "loss": 0.2286, + "num_input_tokens_seen": 9393280, + "step": 13940 + }, + { + "epoch": 0.34067867002174285, + "grad_norm": 11.620241165161133, + "learning_rate": 1.3625836712757122e-06, + "loss": 0.195, + "num_input_tokens_seen": 9396928, + "step": 13945 + }, + { + "epoch": 0.34080082085359004, + "grad_norm": 5.474987030029297, + "learning_rate": 1.3630722626667317e-06, + "loss": 0.1635, + "num_input_tokens_seen": 9400512, + "step": 13950 + }, + { + "epoch": 0.34092297168543717, + "grad_norm": 31.326126098632812, + "learning_rate": 1.3635608540577514e-06, + "loss": 0.0758, + "num_input_tokens_seen": 9403968, + "step": 13955 + }, + { + "epoch": 0.34104512251728436, + "grad_norm": 4.288376808166504, + "learning_rate": 1.3640494454487713e-06, + "loss": 0.0502, + "num_input_tokens_seen": 9406848, + "step": 13960 + }, + { + "epoch": 0.3411672733491315, + "grad_norm": 42.29257583618164, + "learning_rate": 1.3645380368397907e-06, + "loss": 0.0588, + "num_input_tokens_seen": 9410304, + "step": 13965 + }, + { + "epoch": 0.3412894241809787, + "grad_norm": 26.356138229370117, + "learning_rate": 1.3650266282308104e-06, + "loss": 0.1204, + "num_input_tokens_seen": 9413504, + "step": 13970 + }, + { + "epoch": 0.3414115750128258, + "grad_norm": 17.461332321166992, + "learning_rate": 1.3655152196218303e-06, + "loss": 0.0598, + "num_input_tokens_seen": 9416512, + "step": 13975 + }, + { + "epoch": 0.341533725844673, + "grad_norm": 25.22222328186035, + "learning_rate": 1.3660038110128498e-06, + "loss": 0.1496, + "num_input_tokens_seen": 9419840, + "step": 13980 + }, + { + "epoch": 0.34165587667652014, + "grad_norm": 2.4524993896484375, + "learning_rate": 1.3664924024038695e-06, + "loss": 0.1019, + "num_input_tokens_seen": 9423168, + "step": 13985 + }, + { + "epoch": 0.34177802750836733, + "grad_norm": 55.20090103149414, + "learning_rate": 1.3669809937948894e-06, + "loss": 0.1098, + "num_input_tokens_seen": 9426752, + "step": 13990 + }, + { + "epoch": 0.3419001783402145, + "grad_norm": 38.13460922241211, + "learning_rate": 1.367469585185909e-06, + "loss": 0.1414, + "num_input_tokens_seen": 9430464, + "step": 13995 + }, + { + "epoch": 0.34202232917206166, + "grad_norm": 10.772130966186523, + "learning_rate": 1.3679581765769285e-06, + "loss": 0.1338, + "num_input_tokens_seen": 9434752, + "step": 14000 + }, + { + "epoch": 0.34214448000390885, + "grad_norm": 7.737995624542236, + "learning_rate": 1.3684467679679484e-06, + "loss": 0.1397, + "num_input_tokens_seen": 9438144, + "step": 14005 + }, + { + "epoch": 0.342266630835756, + "grad_norm": 21.433128356933594, + "learning_rate": 1.368935359358968e-06, + "loss": 0.186, + "num_input_tokens_seen": 9441472, + "step": 14010 + }, + { + "epoch": 0.34238878166760317, + "grad_norm": 14.350296020507812, + "learning_rate": 1.3694239507499876e-06, + "loss": 0.1268, + "num_input_tokens_seen": 9444544, + "step": 14015 + }, + { + "epoch": 0.3425109324994503, + "grad_norm": 10.877737998962402, + "learning_rate": 1.3699125421410075e-06, + "loss": 0.1247, + "num_input_tokens_seen": 9447808, + "step": 14020 + }, + { + "epoch": 0.3426330833312975, + "grad_norm": 32.232540130615234, + "learning_rate": 1.3704011335320271e-06, + "loss": 0.2446, + "num_input_tokens_seen": 9450688, + "step": 14025 + }, + { + "epoch": 0.34275523416314463, + "grad_norm": 11.100104331970215, + "learning_rate": 1.3708897249230468e-06, + "loss": 0.1617, + "num_input_tokens_seen": 9453824, + "step": 14030 + }, + { + "epoch": 0.3428773849949918, + "grad_norm": 17.309906005859375, + "learning_rate": 1.3713783163140665e-06, + "loss": 0.1629, + "num_input_tokens_seen": 9457408, + "step": 14035 + }, + { + "epoch": 0.342999535826839, + "grad_norm": 15.123571395874023, + "learning_rate": 1.3718669077050862e-06, + "loss": 0.1217, + "num_input_tokens_seen": 9461376, + "step": 14040 + }, + { + "epoch": 0.34312168665868614, + "grad_norm": 1.2097609043121338, + "learning_rate": 1.3723554990961059e-06, + "loss": 0.0887, + "num_input_tokens_seen": 9464576, + "step": 14045 + }, + { + "epoch": 0.34324383749053333, + "grad_norm": 2.189056158065796, + "learning_rate": 1.3728440904871255e-06, + "loss": 0.0384, + "num_input_tokens_seen": 9467776, + "step": 14050 + }, + { + "epoch": 0.34336598832238047, + "grad_norm": 24.694095611572266, + "learning_rate": 1.3733326818781452e-06, + "loss": 0.1598, + "num_input_tokens_seen": 9471040, + "step": 14055 + }, + { + "epoch": 0.34348813915422766, + "grad_norm": 29.479421615600586, + "learning_rate": 1.373821273269165e-06, + "loss": 0.0941, + "num_input_tokens_seen": 9474368, + "step": 14060 + }, + { + "epoch": 0.3436102899860748, + "grad_norm": 11.847206115722656, + "learning_rate": 1.3743098646601848e-06, + "loss": 0.133, + "num_input_tokens_seen": 9477632, + "step": 14065 + }, + { + "epoch": 0.343732440817922, + "grad_norm": 20.486820220947266, + "learning_rate": 1.3747984560512043e-06, + "loss": 0.0703, + "num_input_tokens_seen": 9481280, + "step": 14070 + }, + { + "epoch": 0.3438545916497691, + "grad_norm": 3.3354222774505615, + "learning_rate": 1.375287047442224e-06, + "loss": 0.1171, + "num_input_tokens_seen": 9483968, + "step": 14075 + }, + { + "epoch": 0.3439767424816163, + "grad_norm": 16.6711368560791, + "learning_rate": 1.3757756388332438e-06, + "loss": 0.1291, + "num_input_tokens_seen": 9487040, + "step": 14080 + }, + { + "epoch": 0.34409889331346344, + "grad_norm": 32.15079879760742, + "learning_rate": 1.3762642302242633e-06, + "loss": 0.2305, + "num_input_tokens_seen": 9490752, + "step": 14085 + }, + { + "epoch": 0.34422104414531063, + "grad_norm": 16.164844512939453, + "learning_rate": 1.376752821615283e-06, + "loss": 0.1599, + "num_input_tokens_seen": 9494016, + "step": 14090 + }, + { + "epoch": 0.3443431949771578, + "grad_norm": 21.418575286865234, + "learning_rate": 1.3772414130063029e-06, + "loss": 0.0997, + "num_input_tokens_seen": 9497408, + "step": 14095 + }, + { + "epoch": 0.34446534580900495, + "grad_norm": 4.597272872924805, + "learning_rate": 1.3777300043973226e-06, + "loss": 0.0827, + "num_input_tokens_seen": 9500928, + "step": 14100 + }, + { + "epoch": 0.34458749664085214, + "grad_norm": 36.62041473388672, + "learning_rate": 1.378218595788342e-06, + "loss": 0.2393, + "num_input_tokens_seen": 9504128, + "step": 14105 + }, + { + "epoch": 0.3447096474726993, + "grad_norm": 21.164810180664062, + "learning_rate": 1.378707187179362e-06, + "loss": 0.1463, + "num_input_tokens_seen": 9507456, + "step": 14110 + }, + { + "epoch": 0.34483179830454647, + "grad_norm": 12.85429573059082, + "learning_rate": 1.3791957785703816e-06, + "loss": 0.0823, + "num_input_tokens_seen": 9511104, + "step": 14115 + }, + { + "epoch": 0.3449539491363936, + "grad_norm": 12.703222274780273, + "learning_rate": 1.379684369961401e-06, + "loss": 0.0464, + "num_input_tokens_seen": 9514112, + "step": 14120 + }, + { + "epoch": 0.3450760999682408, + "grad_norm": 1.6505541801452637, + "learning_rate": 1.380172961352421e-06, + "loss": 0.1753, + "num_input_tokens_seen": 9517184, + "step": 14125 + }, + { + "epoch": 0.3451982508000879, + "grad_norm": 1.279025912284851, + "learning_rate": 1.3806615527434407e-06, + "loss": 0.0295, + "num_input_tokens_seen": 9520448, + "step": 14130 + }, + { + "epoch": 0.3453204016319351, + "grad_norm": 37.95659637451172, + "learning_rate": 1.3811501441344601e-06, + "loss": 0.3037, + "num_input_tokens_seen": 9523776, + "step": 14135 + }, + { + "epoch": 0.3454425524637823, + "grad_norm": 10.611307144165039, + "learning_rate": 1.38163873552548e-06, + "loss": 0.1079, + "num_input_tokens_seen": 9527360, + "step": 14140 + }, + { + "epoch": 0.34556470329562944, + "grad_norm": 3.302593946456909, + "learning_rate": 1.3821273269164997e-06, + "loss": 0.0986, + "num_input_tokens_seen": 9530560, + "step": 14145 + }, + { + "epoch": 0.34568685412747663, + "grad_norm": 36.47740173339844, + "learning_rate": 1.3826159183075194e-06, + "loss": 0.075, + "num_input_tokens_seen": 9533888, + "step": 14150 + }, + { + "epoch": 0.34580900495932376, + "grad_norm": 0.7400091886520386, + "learning_rate": 1.383104509698539e-06, + "loss": 0.1583, + "num_input_tokens_seen": 9538176, + "step": 14155 + }, + { + "epoch": 0.34593115579117095, + "grad_norm": 1.301757574081421, + "learning_rate": 1.3835931010895587e-06, + "loss": 0.0705, + "num_input_tokens_seen": 9541760, + "step": 14160 + }, + { + "epoch": 0.3460533066230181, + "grad_norm": 27.92353630065918, + "learning_rate": 1.3840816924805784e-06, + "loss": 0.0922, + "num_input_tokens_seen": 9544960, + "step": 14165 + }, + { + "epoch": 0.3461754574548653, + "grad_norm": 47.610252380371094, + "learning_rate": 1.384570283871598e-06, + "loss": 0.251, + "num_input_tokens_seen": 9548224, + "step": 14170 + }, + { + "epoch": 0.3462976082867124, + "grad_norm": 33.565673828125, + "learning_rate": 1.3850588752626178e-06, + "loss": 0.0849, + "num_input_tokens_seen": 9552000, + "step": 14175 + }, + { + "epoch": 0.3464197591185596, + "grad_norm": 45.37564468383789, + "learning_rate": 1.3855474666536375e-06, + "loss": 0.2377, + "num_input_tokens_seen": 9555072, + "step": 14180 + }, + { + "epoch": 0.34654190995040673, + "grad_norm": 10.524803161621094, + "learning_rate": 1.3860360580446574e-06, + "loss": 0.0824, + "num_input_tokens_seen": 9557952, + "step": 14185 + }, + { + "epoch": 0.3466640607822539, + "grad_norm": 6.826573848724365, + "learning_rate": 1.3865246494356768e-06, + "loss": 0.1271, + "num_input_tokens_seen": 9561024, + "step": 14190 + }, + { + "epoch": 0.3467862116141011, + "grad_norm": 42.382469177246094, + "learning_rate": 1.3870132408266965e-06, + "loss": 0.1956, + "num_input_tokens_seen": 9564736, + "step": 14195 + }, + { + "epoch": 0.34690836244594825, + "grad_norm": 9.132492065429688, + "learning_rate": 1.3875018322177164e-06, + "loss": 0.1167, + "num_input_tokens_seen": 9567808, + "step": 14200 + }, + { + "epoch": 0.34703051327779544, + "grad_norm": 5.075997829437256, + "learning_rate": 1.3879904236087359e-06, + "loss": 0.0684, + "num_input_tokens_seen": 9570880, + "step": 14205 + }, + { + "epoch": 0.3471526641096426, + "grad_norm": 29.276092529296875, + "learning_rate": 1.3884790149997556e-06, + "loss": 0.1585, + "num_input_tokens_seen": 9574720, + "step": 14210 + }, + { + "epoch": 0.34727481494148976, + "grad_norm": 24.11640167236328, + "learning_rate": 1.3889676063907754e-06, + "loss": 0.1083, + "num_input_tokens_seen": 9578560, + "step": 14215 + }, + { + "epoch": 0.3473969657733369, + "grad_norm": 0.9906240701675415, + "learning_rate": 1.3894561977817951e-06, + "loss": 0.047, + "num_input_tokens_seen": 9581568, + "step": 14220 + }, + { + "epoch": 0.3475191166051841, + "grad_norm": 1.8317592144012451, + "learning_rate": 1.3899447891728146e-06, + "loss": 0.0997, + "num_input_tokens_seen": 9585408, + "step": 14225 + }, + { + "epoch": 0.3476412674370312, + "grad_norm": 23.83988380432129, + "learning_rate": 1.3904333805638345e-06, + "loss": 0.0707, + "num_input_tokens_seen": 9588864, + "step": 14230 + }, + { + "epoch": 0.3477634182688784, + "grad_norm": 2.099985122680664, + "learning_rate": 1.3909219719548542e-06, + "loss": 0.1108, + "num_input_tokens_seen": 9592064, + "step": 14235 + }, + { + "epoch": 0.3478855691007256, + "grad_norm": 15.784367561340332, + "learning_rate": 1.3914105633458736e-06, + "loss": 0.1077, + "num_input_tokens_seen": 9595328, + "step": 14240 + }, + { + "epoch": 0.34800771993257273, + "grad_norm": 103.47981262207031, + "learning_rate": 1.3918991547368935e-06, + "loss": 0.2657, + "num_input_tokens_seen": 9598592, + "step": 14245 + }, + { + "epoch": 0.3481298707644199, + "grad_norm": 58.79197692871094, + "learning_rate": 1.3923877461279132e-06, + "loss": 0.1663, + "num_input_tokens_seen": 9601856, + "step": 14250 + }, + { + "epoch": 0.34825202159626706, + "grad_norm": 0.24049213528633118, + "learning_rate": 1.3928763375189327e-06, + "loss": 0.0336, + "num_input_tokens_seen": 9605184, + "step": 14255 + }, + { + "epoch": 0.34837417242811425, + "grad_norm": 51.701271057128906, + "learning_rate": 1.3933649289099526e-06, + "loss": 0.1935, + "num_input_tokens_seen": 9608256, + "step": 14260 + }, + { + "epoch": 0.3484963232599614, + "grad_norm": 16.26445960998535, + "learning_rate": 1.3938535203009723e-06, + "loss": 0.1625, + "num_input_tokens_seen": 9611328, + "step": 14265 + }, + { + "epoch": 0.3486184740918086, + "grad_norm": 13.481324195861816, + "learning_rate": 1.394342111691992e-06, + "loss": 0.1454, + "num_input_tokens_seen": 9614400, + "step": 14270 + }, + { + "epoch": 0.3487406249236557, + "grad_norm": 23.933629989624023, + "learning_rate": 1.3948307030830116e-06, + "loss": 0.1765, + "num_input_tokens_seen": 9618304, + "step": 14275 + }, + { + "epoch": 0.3488627757555029, + "grad_norm": 11.629081726074219, + "learning_rate": 1.3953192944740313e-06, + "loss": 0.097, + "num_input_tokens_seen": 9622016, + "step": 14280 + }, + { + "epoch": 0.3489849265873501, + "grad_norm": 14.450347900390625, + "learning_rate": 1.395807885865051e-06, + "loss": 0.1955, + "num_input_tokens_seen": 9625344, + "step": 14285 + }, + { + "epoch": 0.3491070774191972, + "grad_norm": 28.15384864807129, + "learning_rate": 1.3962964772560707e-06, + "loss": 0.1924, + "num_input_tokens_seen": 9629056, + "step": 14290 + }, + { + "epoch": 0.3492292282510444, + "grad_norm": 13.48028564453125, + "learning_rate": 1.3967850686470903e-06, + "loss": 0.2156, + "num_input_tokens_seen": 9632384, + "step": 14295 + }, + { + "epoch": 0.34935137908289154, + "grad_norm": 2.4608681201934814, + "learning_rate": 1.39727366003811e-06, + "loss": 0.0258, + "num_input_tokens_seen": 9636224, + "step": 14300 + }, + { + "epoch": 0.34947352991473873, + "grad_norm": 16.832611083984375, + "learning_rate": 1.39776225142913e-06, + "loss": 0.0459, + "num_input_tokens_seen": 9639680, + "step": 14305 + }, + { + "epoch": 0.34959568074658587, + "grad_norm": 25.56661605834961, + "learning_rate": 1.3982508428201494e-06, + "loss": 0.0851, + "num_input_tokens_seen": 9642816, + "step": 14310 + }, + { + "epoch": 0.34971783157843306, + "grad_norm": 2.605492353439331, + "learning_rate": 1.398739434211169e-06, + "loss": 0.1033, + "num_input_tokens_seen": 9645952, + "step": 14315 + }, + { + "epoch": 0.3498399824102802, + "grad_norm": 0.7130058407783508, + "learning_rate": 1.399228025602189e-06, + "loss": 0.1258, + "num_input_tokens_seen": 9649152, + "step": 14320 + }, + { + "epoch": 0.3499621332421274, + "grad_norm": 28.27107810974121, + "learning_rate": 1.3997166169932084e-06, + "loss": 0.0797, + "num_input_tokens_seen": 9652544, + "step": 14325 + }, + { + "epoch": 0.3500842840739745, + "grad_norm": 40.19394302368164, + "learning_rate": 1.4002052083842281e-06, + "loss": 0.145, + "num_input_tokens_seen": 9656320, + "step": 14330 + }, + { + "epoch": 0.3502064349058217, + "grad_norm": 26.140825271606445, + "learning_rate": 1.400693799775248e-06, + "loss": 0.1833, + "num_input_tokens_seen": 9659264, + "step": 14335 + }, + { + "epoch": 0.3503285857376689, + "grad_norm": 36.82887649536133, + "learning_rate": 1.4011823911662677e-06, + "loss": 0.2014, + "num_input_tokens_seen": 9662528, + "step": 14340 + }, + { + "epoch": 0.35045073656951603, + "grad_norm": 6.582906246185303, + "learning_rate": 1.4016709825572872e-06, + "loss": 0.0795, + "num_input_tokens_seen": 9666240, + "step": 14345 + }, + { + "epoch": 0.3505728874013632, + "grad_norm": 51.33736801147461, + "learning_rate": 1.402159573948307e-06, + "loss": 0.0894, + "num_input_tokens_seen": 9669376, + "step": 14350 + }, + { + "epoch": 0.35069503823321035, + "grad_norm": 2.4924840927124023, + "learning_rate": 1.4026481653393267e-06, + "loss": 0.0369, + "num_input_tokens_seen": 9672768, + "step": 14355 + }, + { + "epoch": 0.35081718906505754, + "grad_norm": 13.934281349182129, + "learning_rate": 1.4031367567303462e-06, + "loss": 0.1528, + "num_input_tokens_seen": 9675840, + "step": 14360 + }, + { + "epoch": 0.3509393398969047, + "grad_norm": 16.30111312866211, + "learning_rate": 1.403625348121366e-06, + "loss": 0.1661, + "num_input_tokens_seen": 9678976, + "step": 14365 + }, + { + "epoch": 0.35106149072875187, + "grad_norm": 1.643057942390442, + "learning_rate": 1.4041139395123858e-06, + "loss": 0.0757, + "num_input_tokens_seen": 9682496, + "step": 14370 + }, + { + "epoch": 0.351183641560599, + "grad_norm": 26.52783203125, + "learning_rate": 1.4046025309034055e-06, + "loss": 0.1252, + "num_input_tokens_seen": 9685568, + "step": 14375 + }, + { + "epoch": 0.3513057923924462, + "grad_norm": 31.023881912231445, + "learning_rate": 1.4050911222944251e-06, + "loss": 0.183, + "num_input_tokens_seen": 9689344, + "step": 14380 + }, + { + "epoch": 0.3514279432242934, + "grad_norm": 53.98402786254883, + "learning_rate": 1.4055797136854448e-06, + "loss": 0.0644, + "num_input_tokens_seen": 9692288, + "step": 14385 + }, + { + "epoch": 0.3515500940561405, + "grad_norm": 32.249046325683594, + "learning_rate": 1.4060683050764645e-06, + "loss": 0.1095, + "num_input_tokens_seen": 9695744, + "step": 14390 + }, + { + "epoch": 0.3516722448879877, + "grad_norm": 0.3559386432170868, + "learning_rate": 1.4065568964674842e-06, + "loss": 0.1922, + "num_input_tokens_seen": 9698880, + "step": 14395 + }, + { + "epoch": 0.35179439571983484, + "grad_norm": 1.8468283414840698, + "learning_rate": 1.4070454878585039e-06, + "loss": 0.1522, + "num_input_tokens_seen": 9702400, + "step": 14400 + }, + { + "epoch": 0.35191654655168203, + "grad_norm": 1.7591185569763184, + "learning_rate": 1.4075340792495235e-06, + "loss": 0.059, + "num_input_tokens_seen": 9705792, + "step": 14405 + }, + { + "epoch": 0.35203869738352916, + "grad_norm": 15.38747787475586, + "learning_rate": 1.4080226706405432e-06, + "loss": 0.0717, + "num_input_tokens_seen": 9709056, + "step": 14410 + }, + { + "epoch": 0.35216084821537635, + "grad_norm": 2.455747127532959, + "learning_rate": 1.408511262031563e-06, + "loss": 0.113, + "num_input_tokens_seen": 9712512, + "step": 14415 + }, + { + "epoch": 0.3522829990472235, + "grad_norm": 12.720258712768555, + "learning_rate": 1.4089998534225826e-06, + "loss": 0.2413, + "num_input_tokens_seen": 9716544, + "step": 14420 + }, + { + "epoch": 0.3524051498790707, + "grad_norm": 20.368738174438477, + "learning_rate": 1.4094884448136025e-06, + "loss": 0.126, + "num_input_tokens_seen": 9720000, + "step": 14425 + }, + { + "epoch": 0.35252730071091787, + "grad_norm": 30.44447898864746, + "learning_rate": 1.409977036204622e-06, + "loss": 0.186, + "num_input_tokens_seen": 9723520, + "step": 14430 + }, + { + "epoch": 0.352649451542765, + "grad_norm": 8.672028541564941, + "learning_rate": 1.4104656275956416e-06, + "loss": 0.1164, + "num_input_tokens_seen": 9726400, + "step": 14435 + }, + { + "epoch": 0.3527716023746122, + "grad_norm": 23.961200714111328, + "learning_rate": 1.4109542189866615e-06, + "loss": 0.1667, + "num_input_tokens_seen": 9729472, + "step": 14440 + }, + { + "epoch": 0.3528937532064593, + "grad_norm": 17.22600746154785, + "learning_rate": 1.411442810377681e-06, + "loss": 0.0485, + "num_input_tokens_seen": 9733056, + "step": 14445 + }, + { + "epoch": 0.3530159040383065, + "grad_norm": 1.832526445388794, + "learning_rate": 1.4119314017687007e-06, + "loss": 0.0319, + "num_input_tokens_seen": 9737088, + "step": 14450 + }, + { + "epoch": 0.35313805487015365, + "grad_norm": 21.00478172302246, + "learning_rate": 1.4124199931597206e-06, + "loss": 0.0899, + "num_input_tokens_seen": 9740480, + "step": 14455 + }, + { + "epoch": 0.35326020570200084, + "grad_norm": 11.341621398925781, + "learning_rate": 1.4129085845507403e-06, + "loss": 0.1556, + "num_input_tokens_seen": 9743744, + "step": 14460 + }, + { + "epoch": 0.353382356533848, + "grad_norm": 22.92499542236328, + "learning_rate": 1.4133971759417597e-06, + "loss": 0.0558, + "num_input_tokens_seen": 9746944, + "step": 14465 + }, + { + "epoch": 0.35350450736569516, + "grad_norm": 21.124168395996094, + "learning_rate": 1.4138857673327796e-06, + "loss": 0.0949, + "num_input_tokens_seen": 9750144, + "step": 14470 + }, + { + "epoch": 0.3536266581975423, + "grad_norm": 6.369211673736572, + "learning_rate": 1.4143743587237993e-06, + "loss": 0.1063, + "num_input_tokens_seen": 9753984, + "step": 14475 + }, + { + "epoch": 0.3537488090293895, + "grad_norm": 10.5155611038208, + "learning_rate": 1.4148629501148188e-06, + "loss": 0.1582, + "num_input_tokens_seen": 9757376, + "step": 14480 + }, + { + "epoch": 0.3538709598612367, + "grad_norm": 12.186725616455078, + "learning_rate": 1.4153515415058387e-06, + "loss": 0.047, + "num_input_tokens_seen": 9760576, + "step": 14485 + }, + { + "epoch": 0.3539931106930838, + "grad_norm": 3.1093106269836426, + "learning_rate": 1.4158401328968583e-06, + "loss": 0.0452, + "num_input_tokens_seen": 9763520, + "step": 14490 + }, + { + "epoch": 0.354115261524931, + "grad_norm": 24.648107528686523, + "learning_rate": 1.416328724287878e-06, + "loss": 0.1651, + "num_input_tokens_seen": 9767040, + "step": 14495 + }, + { + "epoch": 0.35423741235677814, + "grad_norm": 1.211809515953064, + "learning_rate": 1.4168173156788977e-06, + "loss": 0.1228, + "num_input_tokens_seen": 9770368, + "step": 14500 + }, + { + "epoch": 0.3543595631886253, + "grad_norm": 0.7195934057235718, + "learning_rate": 1.4173059070699174e-06, + "loss": 0.0829, + "num_input_tokens_seen": 9773888, + "step": 14505 + }, + { + "epoch": 0.35448171402047246, + "grad_norm": 19.63247299194336, + "learning_rate": 1.417794498460937e-06, + "loss": 0.1262, + "num_input_tokens_seen": 9777216, + "step": 14510 + }, + { + "epoch": 0.35460386485231965, + "grad_norm": 36.69753646850586, + "learning_rate": 1.4182830898519568e-06, + "loss": 0.0925, + "num_input_tokens_seen": 9780544, + "step": 14515 + }, + { + "epoch": 0.3547260156841668, + "grad_norm": 0.07466413825750351, + "learning_rate": 1.4187716812429764e-06, + "loss": 0.1258, + "num_input_tokens_seen": 9783680, + "step": 14520 + }, + { + "epoch": 0.354848166516014, + "grad_norm": 24.7801570892334, + "learning_rate": 1.4192602726339961e-06, + "loss": 0.2356, + "num_input_tokens_seen": 9786880, + "step": 14525 + }, + { + "epoch": 0.35497031734786116, + "grad_norm": 23.782604217529297, + "learning_rate": 1.419748864025016e-06, + "loss": 0.1121, + "num_input_tokens_seen": 9790144, + "step": 14530 + }, + { + "epoch": 0.3550924681797083, + "grad_norm": 16.412891387939453, + "learning_rate": 1.4202374554160355e-06, + "loss": 0.1484, + "num_input_tokens_seen": 9794240, + "step": 14535 + }, + { + "epoch": 0.3552146190115555, + "grad_norm": 21.980588912963867, + "learning_rate": 1.4207260468070552e-06, + "loss": 0.1654, + "num_input_tokens_seen": 9797632, + "step": 14540 + }, + { + "epoch": 0.3553367698434026, + "grad_norm": 22.10216522216797, + "learning_rate": 1.421214638198075e-06, + "loss": 0.0966, + "num_input_tokens_seen": 9801152, + "step": 14545 + }, + { + "epoch": 0.3554589206752498, + "grad_norm": 28.08307456970215, + "learning_rate": 1.4217032295890945e-06, + "loss": 0.1549, + "num_input_tokens_seen": 9804352, + "step": 14550 + }, + { + "epoch": 0.35558107150709695, + "grad_norm": 17.134769439697266, + "learning_rate": 1.4221918209801142e-06, + "loss": 0.0237, + "num_input_tokens_seen": 9808512, + "step": 14555 + }, + { + "epoch": 0.35570322233894414, + "grad_norm": 26.15998649597168, + "learning_rate": 1.422680412371134e-06, + "loss": 0.1175, + "num_input_tokens_seen": 9811328, + "step": 14560 + }, + { + "epoch": 0.35582537317079127, + "grad_norm": 16.268836975097656, + "learning_rate": 1.4231690037621536e-06, + "loss": 0.1035, + "num_input_tokens_seen": 9815104, + "step": 14565 + }, + { + "epoch": 0.35594752400263846, + "grad_norm": 6.109137535095215, + "learning_rate": 1.4236575951531732e-06, + "loss": 0.1708, + "num_input_tokens_seen": 9818688, + "step": 14570 + }, + { + "epoch": 0.35606967483448565, + "grad_norm": 1.4849107265472412, + "learning_rate": 1.4241461865441931e-06, + "loss": 0.0521, + "num_input_tokens_seen": 9822464, + "step": 14575 + }, + { + "epoch": 0.3561918256663328, + "grad_norm": 23.204818725585938, + "learning_rate": 1.4246347779352128e-06, + "loss": 0.182, + "num_input_tokens_seen": 9825728, + "step": 14580 + }, + { + "epoch": 0.35631397649818, + "grad_norm": 15.597477912902832, + "learning_rate": 1.4251233693262323e-06, + "loss": 0.1155, + "num_input_tokens_seen": 9829184, + "step": 14585 + }, + { + "epoch": 0.3564361273300271, + "grad_norm": 2.52561616897583, + "learning_rate": 1.4256119607172522e-06, + "loss": 0.1183, + "num_input_tokens_seen": 9832704, + "step": 14590 + }, + { + "epoch": 0.3565582781618743, + "grad_norm": 15.523015022277832, + "learning_rate": 1.4261005521082719e-06, + "loss": 0.2253, + "num_input_tokens_seen": 9835776, + "step": 14595 + }, + { + "epoch": 0.35668042899372143, + "grad_norm": 26.568798065185547, + "learning_rate": 1.4265891434992913e-06, + "loss": 0.1039, + "num_input_tokens_seen": 9839168, + "step": 14600 + }, + { + "epoch": 0.3568025798255686, + "grad_norm": 12.2405366897583, + "learning_rate": 1.4270777348903112e-06, + "loss": 0.1266, + "num_input_tokens_seen": 9842112, + "step": 14605 + }, + { + "epoch": 0.35692473065741576, + "grad_norm": 10.383499145507812, + "learning_rate": 1.427566326281331e-06, + "loss": 0.0948, + "num_input_tokens_seen": 9846016, + "step": 14610 + }, + { + "epoch": 0.35704688148926295, + "grad_norm": 6.05488395690918, + "learning_rate": 1.4280549176723506e-06, + "loss": 0.1952, + "num_input_tokens_seen": 9849280, + "step": 14615 + }, + { + "epoch": 0.3571690323211101, + "grad_norm": 12.122830390930176, + "learning_rate": 1.4285435090633703e-06, + "loss": 0.0812, + "num_input_tokens_seen": 9852224, + "step": 14620 + }, + { + "epoch": 0.35729118315295727, + "grad_norm": 10.15803050994873, + "learning_rate": 1.42903210045439e-06, + "loss": 0.1073, + "num_input_tokens_seen": 9855552, + "step": 14625 + }, + { + "epoch": 0.35741333398480446, + "grad_norm": 21.924358367919922, + "learning_rate": 1.4295206918454096e-06, + "loss": 0.1347, + "num_input_tokens_seen": 9858560, + "step": 14630 + }, + { + "epoch": 0.3575354848166516, + "grad_norm": 22.783618927001953, + "learning_rate": 1.4300092832364293e-06, + "loss": 0.1511, + "num_input_tokens_seen": 9862528, + "step": 14635 + }, + { + "epoch": 0.3576576356484988, + "grad_norm": 52.09235763549805, + "learning_rate": 1.430497874627449e-06, + "loss": 0.1841, + "num_input_tokens_seen": 9865920, + "step": 14640 + }, + { + "epoch": 0.3577797864803459, + "grad_norm": 24.848285675048828, + "learning_rate": 1.4309864660184687e-06, + "loss": 0.0648, + "num_input_tokens_seen": 9869440, + "step": 14645 + }, + { + "epoch": 0.3579019373121931, + "grad_norm": 14.198959350585938, + "learning_rate": 1.4314750574094886e-06, + "loss": 0.1556, + "num_input_tokens_seen": 9872320, + "step": 14650 + }, + { + "epoch": 0.35802408814404024, + "grad_norm": 5.674281120300293, + "learning_rate": 1.431963648800508e-06, + "loss": 0.1211, + "num_input_tokens_seen": 9875968, + "step": 14655 + }, + { + "epoch": 0.35814623897588743, + "grad_norm": 25.904781341552734, + "learning_rate": 1.4324522401915277e-06, + "loss": 0.1172, + "num_input_tokens_seen": 9879040, + "step": 14660 + }, + { + "epoch": 0.35826838980773457, + "grad_norm": 7.980208396911621, + "learning_rate": 1.4329408315825476e-06, + "loss": 0.091, + "num_input_tokens_seen": 9882368, + "step": 14665 + }, + { + "epoch": 0.35839054063958176, + "grad_norm": 10.887474060058594, + "learning_rate": 1.433429422973567e-06, + "loss": 0.1043, + "num_input_tokens_seen": 9885504, + "step": 14670 + }, + { + "epoch": 0.35851269147142895, + "grad_norm": 4.5630316734313965, + "learning_rate": 1.4339180143645868e-06, + "loss": 0.0735, + "num_input_tokens_seen": 9889088, + "step": 14675 + }, + { + "epoch": 0.3586348423032761, + "grad_norm": 21.410566329956055, + "learning_rate": 1.4344066057556067e-06, + "loss": 0.0663, + "num_input_tokens_seen": 9892352, + "step": 14680 + }, + { + "epoch": 0.35875699313512327, + "grad_norm": 6.099719047546387, + "learning_rate": 1.4348951971466261e-06, + "loss": 0.0196, + "num_input_tokens_seen": 9895872, + "step": 14685 + }, + { + "epoch": 0.3588791439669704, + "grad_norm": 17.751115798950195, + "learning_rate": 1.4353837885376458e-06, + "loss": 0.0654, + "num_input_tokens_seen": 9899200, + "step": 14690 + }, + { + "epoch": 0.3590012947988176, + "grad_norm": 0.37945330142974854, + "learning_rate": 1.4358723799286657e-06, + "loss": 0.143, + "num_input_tokens_seen": 9903104, + "step": 14695 + }, + { + "epoch": 0.35912344563066473, + "grad_norm": 13.141345977783203, + "learning_rate": 1.4363609713196854e-06, + "loss": 0.1784, + "num_input_tokens_seen": 9906176, + "step": 14700 + }, + { + "epoch": 0.3592455964625119, + "grad_norm": 22.139732360839844, + "learning_rate": 1.4368495627107049e-06, + "loss": 0.0943, + "num_input_tokens_seen": 9909504, + "step": 14705 + }, + { + "epoch": 0.35936774729435905, + "grad_norm": 14.856293678283691, + "learning_rate": 1.4373381541017247e-06, + "loss": 0.1623, + "num_input_tokens_seen": 9913088, + "step": 14710 + }, + { + "epoch": 0.35948989812620624, + "grad_norm": 20.756912231445312, + "learning_rate": 1.4378267454927444e-06, + "loss": 0.216, + "num_input_tokens_seen": 9916608, + "step": 14715 + }, + { + "epoch": 0.35961204895805343, + "grad_norm": 7.858725070953369, + "learning_rate": 1.438315336883764e-06, + "loss": 0.2218, + "num_input_tokens_seen": 9919936, + "step": 14720 + }, + { + "epoch": 0.35973419978990057, + "grad_norm": 22.180316925048828, + "learning_rate": 1.4388039282747838e-06, + "loss": 0.1147, + "num_input_tokens_seen": 9922752, + "step": 14725 + }, + { + "epoch": 0.35985635062174776, + "grad_norm": 14.053325653076172, + "learning_rate": 1.4392925196658035e-06, + "loss": 0.1799, + "num_input_tokens_seen": 9926144, + "step": 14730 + }, + { + "epoch": 0.3599785014535949, + "grad_norm": 13.91140365600586, + "learning_rate": 1.4397811110568232e-06, + "loss": 0.0715, + "num_input_tokens_seen": 9929344, + "step": 14735 + }, + { + "epoch": 0.3601006522854421, + "grad_norm": 13.437630653381348, + "learning_rate": 1.4402697024478428e-06, + "loss": 0.101, + "num_input_tokens_seen": 9932416, + "step": 14740 + }, + { + "epoch": 0.3602228031172892, + "grad_norm": 2.004776954650879, + "learning_rate": 1.4407582938388625e-06, + "loss": 0.0896, + "num_input_tokens_seen": 9936000, + "step": 14745 + }, + { + "epoch": 0.3603449539491364, + "grad_norm": 24.528547286987305, + "learning_rate": 1.4412468852298822e-06, + "loss": 0.0944, + "num_input_tokens_seen": 9938816, + "step": 14750 + }, + { + "epoch": 0.36046710478098354, + "grad_norm": 24.631078720092773, + "learning_rate": 1.4417354766209019e-06, + "loss": 0.1834, + "num_input_tokens_seen": 9942080, + "step": 14755 + }, + { + "epoch": 0.36058925561283073, + "grad_norm": 25.52674102783203, + "learning_rate": 1.4422240680119216e-06, + "loss": 0.0603, + "num_input_tokens_seen": 9945216, + "step": 14760 + }, + { + "epoch": 0.36071140644467786, + "grad_norm": 7.100739479064941, + "learning_rate": 1.4427126594029412e-06, + "loss": 0.0395, + "num_input_tokens_seen": 9948416, + "step": 14765 + }, + { + "epoch": 0.36083355727652505, + "grad_norm": 41.38105010986328, + "learning_rate": 1.4432012507939611e-06, + "loss": 0.174, + "num_input_tokens_seen": 9952128, + "step": 14770 + }, + { + "epoch": 0.36095570810837224, + "grad_norm": 10.956008911132812, + "learning_rate": 1.4436898421849806e-06, + "loss": 0.201, + "num_input_tokens_seen": 9955584, + "step": 14775 + }, + { + "epoch": 0.3610778589402194, + "grad_norm": 19.014673233032227, + "learning_rate": 1.4441784335760003e-06, + "loss": 0.1133, + "num_input_tokens_seen": 9959296, + "step": 14780 + }, + { + "epoch": 0.36120000977206657, + "grad_norm": 16.54399871826172, + "learning_rate": 1.4446670249670202e-06, + "loss": 0.0877, + "num_input_tokens_seen": 9962816, + "step": 14785 + }, + { + "epoch": 0.3613221606039137, + "grad_norm": 0.5795342922210693, + "learning_rate": 1.4451556163580396e-06, + "loss": 0.0743, + "num_input_tokens_seen": 9965952, + "step": 14790 + }, + { + "epoch": 0.3614443114357609, + "grad_norm": 23.187488555908203, + "learning_rate": 1.4456442077490593e-06, + "loss": 0.1787, + "num_input_tokens_seen": 9969344, + "step": 14795 + }, + { + "epoch": 0.361566462267608, + "grad_norm": 22.39498519897461, + "learning_rate": 1.4461327991400792e-06, + "loss": 0.1723, + "num_input_tokens_seen": 9972672, + "step": 14800 + }, + { + "epoch": 0.3616886130994552, + "grad_norm": 7.977668762207031, + "learning_rate": 1.446621390531099e-06, + "loss": 0.0897, + "num_input_tokens_seen": 9975616, + "step": 14805 + }, + { + "epoch": 0.36181076393130235, + "grad_norm": 40.34514617919922, + "learning_rate": 1.4471099819221184e-06, + "loss": 0.1675, + "num_input_tokens_seen": 9979648, + "step": 14810 + }, + { + "epoch": 0.36193291476314954, + "grad_norm": 38.52743911743164, + "learning_rate": 1.4475985733131383e-06, + "loss": 0.1314, + "num_input_tokens_seen": 9983168, + "step": 14815 + }, + { + "epoch": 0.36205506559499673, + "grad_norm": 14.206141471862793, + "learning_rate": 1.448087164704158e-06, + "loss": 0.1952, + "num_input_tokens_seen": 9986624, + "step": 14820 + }, + { + "epoch": 0.36217721642684386, + "grad_norm": 36.0733757019043, + "learning_rate": 1.4485757560951774e-06, + "loss": 0.0967, + "num_input_tokens_seen": 9990080, + "step": 14825 + }, + { + "epoch": 0.36229936725869105, + "grad_norm": 3.471787691116333, + "learning_rate": 1.4490643474861973e-06, + "loss": 0.1674, + "num_input_tokens_seen": 9993088, + "step": 14830 + }, + { + "epoch": 0.3624215180905382, + "grad_norm": 1.9856698513031006, + "learning_rate": 1.449552938877217e-06, + "loss": 0.1011, + "num_input_tokens_seen": 9996096, + "step": 14835 + }, + { + "epoch": 0.3625436689223854, + "grad_norm": 22.805315017700195, + "learning_rate": 1.4500415302682365e-06, + "loss": 0.065, + "num_input_tokens_seen": 9999744, + "step": 14840 + }, + { + "epoch": 0.3626658197542325, + "grad_norm": 4.531700611114502, + "learning_rate": 1.4505301216592564e-06, + "loss": 0.0536, + "num_input_tokens_seen": 10002944, + "step": 14845 + }, + { + "epoch": 0.3627879705860797, + "grad_norm": 4.974030494689941, + "learning_rate": 1.451018713050276e-06, + "loss": 0.1733, + "num_input_tokens_seen": 10006336, + "step": 14850 + }, + { + "epoch": 0.36291012141792683, + "grad_norm": 15.88058853149414, + "learning_rate": 1.4515073044412957e-06, + "loss": 0.1593, + "num_input_tokens_seen": 10009344, + "step": 14855 + }, + { + "epoch": 0.363032272249774, + "grad_norm": 61.39813995361328, + "learning_rate": 1.4519958958323154e-06, + "loss": 0.1427, + "num_input_tokens_seen": 10012352, + "step": 14860 + }, + { + "epoch": 0.36315442308162116, + "grad_norm": 1.7952611446380615, + "learning_rate": 1.452484487223335e-06, + "loss": 0.1268, + "num_input_tokens_seen": 10015360, + "step": 14865 + }, + { + "epoch": 0.36327657391346835, + "grad_norm": 12.197927474975586, + "learning_rate": 1.4529730786143548e-06, + "loss": 0.084, + "num_input_tokens_seen": 10019392, + "step": 14870 + }, + { + "epoch": 0.36339872474531554, + "grad_norm": 19.5502872467041, + "learning_rate": 1.4534616700053744e-06, + "loss": 0.156, + "num_input_tokens_seen": 10022912, + "step": 14875 + }, + { + "epoch": 0.36352087557716267, + "grad_norm": 26.997758865356445, + "learning_rate": 1.4539502613963941e-06, + "loss": 0.059, + "num_input_tokens_seen": 10026304, + "step": 14880 + }, + { + "epoch": 0.36364302640900986, + "grad_norm": 1.9117192029953003, + "learning_rate": 1.4544388527874138e-06, + "loss": 0.1032, + "num_input_tokens_seen": 10029568, + "step": 14885 + }, + { + "epoch": 0.363765177240857, + "grad_norm": 37.157466888427734, + "learning_rate": 1.4549274441784337e-06, + "loss": 0.1649, + "num_input_tokens_seen": 10033088, + "step": 14890 + }, + { + "epoch": 0.3638873280727042, + "grad_norm": 30.732894897460938, + "learning_rate": 1.4554160355694532e-06, + "loss": 0.0947, + "num_input_tokens_seen": 10036672, + "step": 14895 + }, + { + "epoch": 0.3640094789045513, + "grad_norm": 4.4297404289245605, + "learning_rate": 1.4559046269604728e-06, + "loss": 0.2256, + "num_input_tokens_seen": 10040000, + "step": 14900 + }, + { + "epoch": 0.3641316297363985, + "grad_norm": 0.5586517453193665, + "learning_rate": 1.4563932183514927e-06, + "loss": 0.0815, + "num_input_tokens_seen": 10043072, + "step": 14905 + }, + { + "epoch": 0.36425378056824564, + "grad_norm": 0.9251121878623962, + "learning_rate": 1.4568818097425122e-06, + "loss": 0.2063, + "num_input_tokens_seen": 10046336, + "step": 14910 + }, + { + "epoch": 0.36437593140009283, + "grad_norm": 31.80733299255371, + "learning_rate": 1.4573704011335319e-06, + "loss": 0.1413, + "num_input_tokens_seen": 10049152, + "step": 14915 + }, + { + "epoch": 0.36449808223194, + "grad_norm": 23.533954620361328, + "learning_rate": 1.4578589925245518e-06, + "loss": 0.0804, + "num_input_tokens_seen": 10052224, + "step": 14920 + }, + { + "epoch": 0.36462023306378716, + "grad_norm": 6.359363079071045, + "learning_rate": 1.4583475839155715e-06, + "loss": 0.0736, + "num_input_tokens_seen": 10055488, + "step": 14925 + }, + { + "epoch": 0.36474238389563435, + "grad_norm": 2.5986242294311523, + "learning_rate": 1.458836175306591e-06, + "loss": 0.0469, + "num_input_tokens_seen": 10058752, + "step": 14930 + }, + { + "epoch": 0.3648645347274815, + "grad_norm": 30.787466049194336, + "learning_rate": 1.4593247666976108e-06, + "loss": 0.2228, + "num_input_tokens_seen": 10062080, + "step": 14935 + }, + { + "epoch": 0.36498668555932867, + "grad_norm": 10.304224014282227, + "learning_rate": 1.4598133580886305e-06, + "loss": 0.1705, + "num_input_tokens_seen": 10065408, + "step": 14940 + }, + { + "epoch": 0.3651088363911758, + "grad_norm": 3.115684747695923, + "learning_rate": 1.46030194947965e-06, + "loss": 0.0597, + "num_input_tokens_seen": 10068480, + "step": 14945 + }, + { + "epoch": 0.365230987223023, + "grad_norm": 28.490812301635742, + "learning_rate": 1.4607905408706699e-06, + "loss": 0.2122, + "num_input_tokens_seen": 10071488, + "step": 14950 + }, + { + "epoch": 0.36535313805487013, + "grad_norm": 3.1588618755340576, + "learning_rate": 1.4612791322616896e-06, + "loss": 0.1656, + "num_input_tokens_seen": 10075072, + "step": 14955 + }, + { + "epoch": 0.3654752888867173, + "grad_norm": 12.030863761901855, + "learning_rate": 1.4617677236527092e-06, + "loss": 0.157, + "num_input_tokens_seen": 10078400, + "step": 14960 + }, + { + "epoch": 0.3655974397185645, + "grad_norm": 9.774025917053223, + "learning_rate": 1.462256315043729e-06, + "loss": 0.0806, + "num_input_tokens_seen": 10081472, + "step": 14965 + }, + { + "epoch": 0.36571959055041164, + "grad_norm": 24.714754104614258, + "learning_rate": 1.4627449064347486e-06, + "loss": 0.1563, + "num_input_tokens_seen": 10084608, + "step": 14970 + }, + { + "epoch": 0.36584174138225883, + "grad_norm": 4.036285877227783, + "learning_rate": 1.4632334978257683e-06, + "loss": 0.1024, + "num_input_tokens_seen": 10088000, + "step": 14975 + }, + { + "epoch": 0.36596389221410597, + "grad_norm": 11.198826789855957, + "learning_rate": 1.463722089216788e-06, + "loss": 0.185, + "num_input_tokens_seen": 10091520, + "step": 14980 + }, + { + "epoch": 0.36608604304595316, + "grad_norm": 16.43745994567871, + "learning_rate": 1.4642106806078076e-06, + "loss": 0.1085, + "num_input_tokens_seen": 10094656, + "step": 14985 + }, + { + "epoch": 0.3662081938778003, + "grad_norm": 33.093833923339844, + "learning_rate": 1.4646992719988273e-06, + "loss": 0.154, + "num_input_tokens_seen": 10097792, + "step": 14990 + }, + { + "epoch": 0.3663303447096475, + "grad_norm": 6.07112979888916, + "learning_rate": 1.465187863389847e-06, + "loss": 0.0851, + "num_input_tokens_seen": 10101184, + "step": 14995 + }, + { + "epoch": 0.3664524955414946, + "grad_norm": 18.167476654052734, + "learning_rate": 1.4656764547808667e-06, + "loss": 0.1695, + "num_input_tokens_seen": 10104256, + "step": 15000 + }, + { + "epoch": 0.3665746463733418, + "grad_norm": 4.230319499969482, + "learning_rate": 1.4661650461718864e-06, + "loss": 0.0803, + "num_input_tokens_seen": 10107328, + "step": 15005 + }, + { + "epoch": 0.36669679720518894, + "grad_norm": 1.558373212814331, + "learning_rate": 1.4666536375629063e-06, + "loss": 0.0945, + "num_input_tokens_seen": 10110912, + "step": 15010 + }, + { + "epoch": 0.36681894803703613, + "grad_norm": 3.3440184593200684, + "learning_rate": 1.4671422289539257e-06, + "loss": 0.108, + "num_input_tokens_seen": 10114176, + "step": 15015 + }, + { + "epoch": 0.3669410988688833, + "grad_norm": 2.4016432762145996, + "learning_rate": 1.4676308203449454e-06, + "loss": 0.0921, + "num_input_tokens_seen": 10117248, + "step": 15020 + }, + { + "epoch": 0.36706324970073045, + "grad_norm": 11.902193069458008, + "learning_rate": 1.4681194117359653e-06, + "loss": 0.0643, + "num_input_tokens_seen": 10120768, + "step": 15025 + }, + { + "epoch": 0.36718540053257764, + "grad_norm": 14.054060935974121, + "learning_rate": 1.4686080031269848e-06, + "loss": 0.0781, + "num_input_tokens_seen": 10123840, + "step": 15030 + }, + { + "epoch": 0.3673075513644248, + "grad_norm": 2.9538633823394775, + "learning_rate": 1.4690965945180045e-06, + "loss": 0.0874, + "num_input_tokens_seen": 10127104, + "step": 15035 + }, + { + "epoch": 0.36742970219627197, + "grad_norm": 10.337444305419922, + "learning_rate": 1.4695851859090243e-06, + "loss": 0.098, + "num_input_tokens_seen": 10130688, + "step": 15040 + }, + { + "epoch": 0.3675518530281191, + "grad_norm": 19.6004695892334, + "learning_rate": 1.470073777300044e-06, + "loss": 0.1618, + "num_input_tokens_seen": 10134144, + "step": 15045 + }, + { + "epoch": 0.3676740038599663, + "grad_norm": 11.789777755737305, + "learning_rate": 1.4705623686910635e-06, + "loss": 0.0887, + "num_input_tokens_seen": 10137344, + "step": 15050 + }, + { + "epoch": 0.3677961546918134, + "grad_norm": 29.683469772338867, + "learning_rate": 1.4710509600820834e-06, + "loss": 0.2011, + "num_input_tokens_seen": 10140800, + "step": 15055 + }, + { + "epoch": 0.3679183055236606, + "grad_norm": 30.46916961669922, + "learning_rate": 1.471539551473103e-06, + "loss": 0.1338, + "num_input_tokens_seen": 10143936, + "step": 15060 + }, + { + "epoch": 0.3680404563555078, + "grad_norm": 10.194910049438477, + "learning_rate": 1.4720281428641225e-06, + "loss": 0.0552, + "num_input_tokens_seen": 10147392, + "step": 15065 + }, + { + "epoch": 0.36816260718735494, + "grad_norm": 23.93616485595703, + "learning_rate": 1.4725167342551424e-06, + "loss": 0.1437, + "num_input_tokens_seen": 10150400, + "step": 15070 + }, + { + "epoch": 0.36828475801920213, + "grad_norm": 7.265649795532227, + "learning_rate": 1.4730053256461621e-06, + "loss": 0.0665, + "num_input_tokens_seen": 10153728, + "step": 15075 + }, + { + "epoch": 0.36840690885104926, + "grad_norm": 23.494064331054688, + "learning_rate": 1.4734939170371818e-06, + "loss": 0.064, + "num_input_tokens_seen": 10157056, + "step": 15080 + }, + { + "epoch": 0.36852905968289645, + "grad_norm": 17.327404022216797, + "learning_rate": 1.4739825084282015e-06, + "loss": 0.1049, + "num_input_tokens_seen": 10161152, + "step": 15085 + }, + { + "epoch": 0.3686512105147436, + "grad_norm": 15.256969451904297, + "learning_rate": 1.4744710998192212e-06, + "loss": 0.2415, + "num_input_tokens_seen": 10164480, + "step": 15090 + }, + { + "epoch": 0.3687733613465908, + "grad_norm": 17.404699325561523, + "learning_rate": 1.4749596912102408e-06, + "loss": 0.154, + "num_input_tokens_seen": 10167360, + "step": 15095 + }, + { + "epoch": 0.3688955121784379, + "grad_norm": 0.9550981521606445, + "learning_rate": 1.4754482826012605e-06, + "loss": 0.0262, + "num_input_tokens_seen": 10170752, + "step": 15100 + }, + { + "epoch": 0.3690176630102851, + "grad_norm": 16.822263717651367, + "learning_rate": 1.4759368739922802e-06, + "loss": 0.1336, + "num_input_tokens_seen": 10174336, + "step": 15105 + }, + { + "epoch": 0.3691398138421323, + "grad_norm": 2.805422782897949, + "learning_rate": 1.4764254653832999e-06, + "loss": 0.0717, + "num_input_tokens_seen": 10177408, + "step": 15110 + }, + { + "epoch": 0.3692619646739794, + "grad_norm": 1.0176135301589966, + "learning_rate": 1.4769140567743196e-06, + "loss": 0.2487, + "num_input_tokens_seen": 10181440, + "step": 15115 + }, + { + "epoch": 0.3693841155058266, + "grad_norm": 1.3316823244094849, + "learning_rate": 1.4774026481653392e-06, + "loss": 0.0709, + "num_input_tokens_seen": 10184640, + "step": 15120 + }, + { + "epoch": 0.36950626633767375, + "grad_norm": 20.177623748779297, + "learning_rate": 1.477891239556359e-06, + "loss": 0.049, + "num_input_tokens_seen": 10187584, + "step": 15125 + }, + { + "epoch": 0.36962841716952094, + "grad_norm": 19.99055290222168, + "learning_rate": 1.4783798309473788e-06, + "loss": 0.1035, + "num_input_tokens_seen": 10190848, + "step": 15130 + }, + { + "epoch": 0.3697505680013681, + "grad_norm": 19.489837646484375, + "learning_rate": 1.4788684223383983e-06, + "loss": 0.0623, + "num_input_tokens_seen": 10194304, + "step": 15135 + }, + { + "epoch": 0.36987271883321526, + "grad_norm": 20.77276039123535, + "learning_rate": 1.479357013729418e-06, + "loss": 0.1515, + "num_input_tokens_seen": 10197696, + "step": 15140 + }, + { + "epoch": 0.3699948696650624, + "grad_norm": 4.104226589202881, + "learning_rate": 1.4798456051204379e-06, + "loss": 0.2384, + "num_input_tokens_seen": 10200576, + "step": 15145 + }, + { + "epoch": 0.3701170204969096, + "grad_norm": 45.69715881347656, + "learning_rate": 1.4803341965114573e-06, + "loss": 0.0572, + "num_input_tokens_seen": 10203840, + "step": 15150 + }, + { + "epoch": 0.3702391713287567, + "grad_norm": 29.123130798339844, + "learning_rate": 1.480822787902477e-06, + "loss": 0.0828, + "num_input_tokens_seen": 10207488, + "step": 15155 + }, + { + "epoch": 0.3703613221606039, + "grad_norm": 35.56546401977539, + "learning_rate": 1.481311379293497e-06, + "loss": 0.1452, + "num_input_tokens_seen": 10211008, + "step": 15160 + }, + { + "epoch": 0.3704834729924511, + "grad_norm": 27.84103012084961, + "learning_rate": 1.4817999706845166e-06, + "loss": 0.17, + "num_input_tokens_seen": 10214336, + "step": 15165 + }, + { + "epoch": 0.37060562382429824, + "grad_norm": 0.9257592558860779, + "learning_rate": 1.482288562075536e-06, + "loss": 0.1446, + "num_input_tokens_seen": 10217856, + "step": 15170 + }, + { + "epoch": 0.3707277746561454, + "grad_norm": 18.228384017944336, + "learning_rate": 1.482777153466556e-06, + "loss": 0.0167, + "num_input_tokens_seen": 10221696, + "step": 15175 + }, + { + "epoch": 0.37084992548799256, + "grad_norm": 31.668554306030273, + "learning_rate": 1.4832657448575756e-06, + "loss": 0.2496, + "num_input_tokens_seen": 10225088, + "step": 15180 + }, + { + "epoch": 0.37097207631983975, + "grad_norm": 16.257932662963867, + "learning_rate": 1.483754336248595e-06, + "loss": 0.0886, + "num_input_tokens_seen": 10228160, + "step": 15185 + }, + { + "epoch": 0.3710942271516869, + "grad_norm": 20.400663375854492, + "learning_rate": 1.484242927639615e-06, + "loss": 0.0719, + "num_input_tokens_seen": 10231744, + "step": 15190 + }, + { + "epoch": 0.3712163779835341, + "grad_norm": 48.53895950317383, + "learning_rate": 1.4847315190306347e-06, + "loss": 0.2309, + "num_input_tokens_seen": 10235520, + "step": 15195 + }, + { + "epoch": 0.3713385288153812, + "grad_norm": 25.890565872192383, + "learning_rate": 1.4852201104216544e-06, + "loss": 0.1409, + "num_input_tokens_seen": 10238848, + "step": 15200 + }, + { + "epoch": 0.3714606796472284, + "grad_norm": 0.1722467839717865, + "learning_rate": 1.485708701812674e-06, + "loss": 0.0193, + "num_input_tokens_seen": 10242304, + "step": 15205 + }, + { + "epoch": 0.3715828304790756, + "grad_norm": 1.651286244392395, + "learning_rate": 1.4861972932036937e-06, + "loss": 0.1968, + "num_input_tokens_seen": 10245440, + "step": 15210 + }, + { + "epoch": 0.3717049813109227, + "grad_norm": 33.69007110595703, + "learning_rate": 1.4866858845947134e-06, + "loss": 0.1411, + "num_input_tokens_seen": 10248768, + "step": 15215 + }, + { + "epoch": 0.3718271321427699, + "grad_norm": 4.165786266326904, + "learning_rate": 1.4871744759857329e-06, + "loss": 0.0839, + "num_input_tokens_seen": 10252160, + "step": 15220 + }, + { + "epoch": 0.37194928297461705, + "grad_norm": 3.8775460720062256, + "learning_rate": 1.4876630673767528e-06, + "loss": 0.2086, + "num_input_tokens_seen": 10255168, + "step": 15225 + }, + { + "epoch": 0.37207143380646424, + "grad_norm": 19.101089477539062, + "learning_rate": 1.4881516587677724e-06, + "loss": 0.2026, + "num_input_tokens_seen": 10259648, + "step": 15230 + }, + { + "epoch": 0.37219358463831137, + "grad_norm": 6.220980167388916, + "learning_rate": 1.4886402501587923e-06, + "loss": 0.0053, + "num_input_tokens_seen": 10263424, + "step": 15235 + }, + { + "epoch": 0.37231573547015856, + "grad_norm": 3.341536283493042, + "learning_rate": 1.4891288415498118e-06, + "loss": 0.1144, + "num_input_tokens_seen": 10266496, + "step": 15240 + }, + { + "epoch": 0.3724378863020057, + "grad_norm": 0.19938251376152039, + "learning_rate": 1.4896174329408315e-06, + "loss": 0.0223, + "num_input_tokens_seen": 10270144, + "step": 15245 + }, + { + "epoch": 0.3725600371338529, + "grad_norm": 18.8046932220459, + "learning_rate": 1.4901060243318514e-06, + "loss": 0.2106, + "num_input_tokens_seen": 10273984, + "step": 15250 + }, + { + "epoch": 0.3726821879657001, + "grad_norm": 5.193131923675537, + "learning_rate": 1.4905946157228709e-06, + "loss": 0.1617, + "num_input_tokens_seen": 10277056, + "step": 15255 + }, + { + "epoch": 0.3728043387975472, + "grad_norm": 32.93628692626953, + "learning_rate": 1.4910832071138905e-06, + "loss": 0.0294, + "num_input_tokens_seen": 10280384, + "step": 15260 + }, + { + "epoch": 0.3729264896293944, + "grad_norm": 6.409894943237305, + "learning_rate": 1.4915717985049104e-06, + "loss": 0.118, + "num_input_tokens_seen": 10283520, + "step": 15265 + }, + { + "epoch": 0.37304864046124153, + "grad_norm": 0.4226660132408142, + "learning_rate": 1.49206038989593e-06, + "loss": 0.1248, + "num_input_tokens_seen": 10286784, + "step": 15270 + }, + { + "epoch": 0.3731707912930887, + "grad_norm": 31.093536376953125, + "learning_rate": 1.4925489812869496e-06, + "loss": 0.2098, + "num_input_tokens_seen": 10290048, + "step": 15275 + }, + { + "epoch": 0.37329294212493586, + "grad_norm": 7.129814147949219, + "learning_rate": 1.4930375726779695e-06, + "loss": 0.2542, + "num_input_tokens_seen": 10293312, + "step": 15280 + }, + { + "epoch": 0.37341509295678305, + "grad_norm": 24.767244338989258, + "learning_rate": 1.4935261640689892e-06, + "loss": 0.1726, + "num_input_tokens_seen": 10296448, + "step": 15285 + }, + { + "epoch": 0.3735372437886302, + "grad_norm": 20.03944206237793, + "learning_rate": 1.4940147554600086e-06, + "loss": 0.257, + "num_input_tokens_seen": 10299520, + "step": 15290 + }, + { + "epoch": 0.37365939462047737, + "grad_norm": 3.744540214538574, + "learning_rate": 1.4945033468510285e-06, + "loss": 0.0406, + "num_input_tokens_seen": 10302976, + "step": 15295 + }, + { + "epoch": 0.3737815454523245, + "grad_norm": 28.81475830078125, + "learning_rate": 1.4949919382420482e-06, + "loss": 0.1918, + "num_input_tokens_seen": 10306432, + "step": 15300 + }, + { + "epoch": 0.3739036962841717, + "grad_norm": 1.5936713218688965, + "learning_rate": 1.4954805296330677e-06, + "loss": 0.1485, + "num_input_tokens_seen": 10310016, + "step": 15305 + }, + { + "epoch": 0.3740258471160189, + "grad_norm": 15.979466438293457, + "learning_rate": 1.4959691210240873e-06, + "loss": 0.1358, + "num_input_tokens_seen": 10313408, + "step": 15310 + }, + { + "epoch": 0.374147997947866, + "grad_norm": 0.897377073764801, + "learning_rate": 1.4964577124151072e-06, + "loss": 0.0169, + "num_input_tokens_seen": 10316992, + "step": 15315 + }, + { + "epoch": 0.3742701487797132, + "grad_norm": 14.27642822265625, + "learning_rate": 1.496946303806127e-06, + "loss": 0.1957, + "num_input_tokens_seen": 10320384, + "step": 15320 + }, + { + "epoch": 0.37439229961156034, + "grad_norm": 21.05185890197754, + "learning_rate": 1.4974348951971464e-06, + "loss": 0.1696, + "num_input_tokens_seen": 10323712, + "step": 15325 + }, + { + "epoch": 0.37451445044340753, + "grad_norm": 11.088553428649902, + "learning_rate": 1.4979234865881663e-06, + "loss": 0.1251, + "num_input_tokens_seen": 10326848, + "step": 15330 + }, + { + "epoch": 0.37463660127525467, + "grad_norm": 61.10281753540039, + "learning_rate": 1.498412077979186e-06, + "loss": 0.1813, + "num_input_tokens_seen": 10330112, + "step": 15335 + }, + { + "epoch": 0.37475875210710186, + "grad_norm": 14.84995174407959, + "learning_rate": 1.4989006693702054e-06, + "loss": 0.0824, + "num_input_tokens_seen": 10333696, + "step": 15340 + }, + { + "epoch": 0.374880902938949, + "grad_norm": 0.3659310042858124, + "learning_rate": 1.4993892607612253e-06, + "loss": 0.0672, + "num_input_tokens_seen": 10337024, + "step": 15345 + }, + { + "epoch": 0.3750030537707962, + "grad_norm": 20.780288696289062, + "learning_rate": 1.499877852152245e-06, + "loss": 0.066, + "num_input_tokens_seen": 10340480, + "step": 15350 + }, + { + "epoch": 0.37512520460264337, + "grad_norm": 27.659393310546875, + "learning_rate": 1.500366443543265e-06, + "loss": 0.2135, + "num_input_tokens_seen": 10343872, + "step": 15355 + }, + { + "epoch": 0.3752473554344905, + "grad_norm": 0.8734622597694397, + "learning_rate": 1.5008550349342844e-06, + "loss": 0.0905, + "num_input_tokens_seen": 10347200, + "step": 15360 + }, + { + "epoch": 0.3753695062663377, + "grad_norm": 14.381644248962402, + "learning_rate": 1.501343626325304e-06, + "loss": 0.1545, + "num_input_tokens_seen": 10350464, + "step": 15365 + }, + { + "epoch": 0.37549165709818483, + "grad_norm": 7.951765060424805, + "learning_rate": 1.501832217716324e-06, + "loss": 0.1257, + "num_input_tokens_seen": 10353728, + "step": 15370 + }, + { + "epoch": 0.375613807930032, + "grad_norm": 18.489259719848633, + "learning_rate": 1.5023208091073434e-06, + "loss": 0.0401, + "num_input_tokens_seen": 10356672, + "step": 15375 + }, + { + "epoch": 0.37573595876187915, + "grad_norm": 20.716266632080078, + "learning_rate": 1.502809400498363e-06, + "loss": 0.1308, + "num_input_tokens_seen": 10359808, + "step": 15380 + }, + { + "epoch": 0.37585810959372634, + "grad_norm": 2.1580662727355957, + "learning_rate": 1.503297991889383e-06, + "loss": 0.116, + "num_input_tokens_seen": 10363136, + "step": 15385 + }, + { + "epoch": 0.3759802604255735, + "grad_norm": 50.68510055541992, + "learning_rate": 1.5037865832804025e-06, + "loss": 0.1752, + "num_input_tokens_seen": 10366528, + "step": 15390 + }, + { + "epoch": 0.37610241125742067, + "grad_norm": 52.328433990478516, + "learning_rate": 1.5042751746714221e-06, + "loss": 0.287, + "num_input_tokens_seen": 10370240, + "step": 15395 + }, + { + "epoch": 0.37622456208926786, + "grad_norm": 12.517891883850098, + "learning_rate": 1.5047637660624418e-06, + "loss": 0.1475, + "num_input_tokens_seen": 10373888, + "step": 15400 + }, + { + "epoch": 0.376346712921115, + "grad_norm": 15.019147872924805, + "learning_rate": 1.5052523574534617e-06, + "loss": 0.1297, + "num_input_tokens_seen": 10377472, + "step": 15405 + }, + { + "epoch": 0.3764688637529622, + "grad_norm": 23.52758026123047, + "learning_rate": 1.5057409488444812e-06, + "loss": 0.1229, + "num_input_tokens_seen": 10380672, + "step": 15410 + }, + { + "epoch": 0.3765910145848093, + "grad_norm": 9.015829086303711, + "learning_rate": 1.5062295402355009e-06, + "loss": 0.1197, + "num_input_tokens_seen": 10384128, + "step": 15415 + }, + { + "epoch": 0.3767131654166565, + "grad_norm": 11.400869369506836, + "learning_rate": 1.5067181316265208e-06, + "loss": 0.142, + "num_input_tokens_seen": 10386816, + "step": 15420 + }, + { + "epoch": 0.37683531624850364, + "grad_norm": 3.6362061500549316, + "learning_rate": 1.5072067230175402e-06, + "loss": 0.0599, + "num_input_tokens_seen": 10390336, + "step": 15425 + }, + { + "epoch": 0.3769574670803508, + "grad_norm": 20.962465286254883, + "learning_rate": 1.50769531440856e-06, + "loss": 0.1482, + "num_input_tokens_seen": 10394112, + "step": 15430 + }, + { + "epoch": 0.37707961791219796, + "grad_norm": 15.88277530670166, + "learning_rate": 1.5081839057995798e-06, + "loss": 0.1473, + "num_input_tokens_seen": 10398016, + "step": 15435 + }, + { + "epoch": 0.37720176874404515, + "grad_norm": 0.9228224158287048, + "learning_rate": 1.5086724971905995e-06, + "loss": 0.099, + "num_input_tokens_seen": 10401280, + "step": 15440 + }, + { + "epoch": 0.3773239195758923, + "grad_norm": 1.427764654159546, + "learning_rate": 1.509161088581619e-06, + "loss": 0.1114, + "num_input_tokens_seen": 10404736, + "step": 15445 + }, + { + "epoch": 0.3774460704077395, + "grad_norm": 8.329426765441895, + "learning_rate": 1.5096496799726388e-06, + "loss": 0.1868, + "num_input_tokens_seen": 10408064, + "step": 15450 + }, + { + "epoch": 0.37756822123958667, + "grad_norm": 2.8574776649475098, + "learning_rate": 1.5101382713636585e-06, + "loss": 0.171, + "num_input_tokens_seen": 10411456, + "step": 15455 + }, + { + "epoch": 0.3776903720714338, + "grad_norm": 0.10268853604793549, + "learning_rate": 1.510626862754678e-06, + "loss": 0.048, + "num_input_tokens_seen": 10414592, + "step": 15460 + }, + { + "epoch": 0.377812522903281, + "grad_norm": 33.580810546875, + "learning_rate": 1.5111154541456979e-06, + "loss": 0.1467, + "num_input_tokens_seen": 10417984, + "step": 15465 + }, + { + "epoch": 0.3779346737351281, + "grad_norm": 6.88870906829834, + "learning_rate": 1.5116040455367176e-06, + "loss": 0.0913, + "num_input_tokens_seen": 10421568, + "step": 15470 + }, + { + "epoch": 0.3780568245669753, + "grad_norm": 2.1941027641296387, + "learning_rate": 1.5120926369277375e-06, + "loss": 0.0812, + "num_input_tokens_seen": 10424896, + "step": 15475 + }, + { + "epoch": 0.37817897539882245, + "grad_norm": 54.10744857788086, + "learning_rate": 1.512581228318757e-06, + "loss": 0.2021, + "num_input_tokens_seen": 10428544, + "step": 15480 + }, + { + "epoch": 0.37830112623066964, + "grad_norm": 28.76828384399414, + "learning_rate": 1.5130698197097766e-06, + "loss": 0.1401, + "num_input_tokens_seen": 10431744, + "step": 15485 + }, + { + "epoch": 0.37842327706251677, + "grad_norm": 0.7208065986633301, + "learning_rate": 1.5135584111007965e-06, + "loss": 0.0534, + "num_input_tokens_seen": 10435712, + "step": 15490 + }, + { + "epoch": 0.37854542789436396, + "grad_norm": 1.000946283340454, + "learning_rate": 1.514047002491816e-06, + "loss": 0.1069, + "num_input_tokens_seen": 10439040, + "step": 15495 + }, + { + "epoch": 0.37866757872621115, + "grad_norm": 0.2939513325691223, + "learning_rate": 1.5145355938828357e-06, + "loss": 0.0527, + "num_input_tokens_seen": 10442304, + "step": 15500 + }, + { + "epoch": 0.3787897295580583, + "grad_norm": 0.05420077219605446, + "learning_rate": 1.5150241852738553e-06, + "loss": 0.0106, + "num_input_tokens_seen": 10445696, + "step": 15505 + }, + { + "epoch": 0.3789118803899055, + "grad_norm": 51.432655334472656, + "learning_rate": 1.5155127766648752e-06, + "loss": 0.2094, + "num_input_tokens_seen": 10448896, + "step": 15510 + }, + { + "epoch": 0.3790340312217526, + "grad_norm": 32.62167739868164, + "learning_rate": 1.5160013680558947e-06, + "loss": 0.0971, + "num_input_tokens_seen": 10452352, + "step": 15515 + }, + { + "epoch": 0.3791561820535998, + "grad_norm": 22.451175689697266, + "learning_rate": 1.5164899594469144e-06, + "loss": 0.2383, + "num_input_tokens_seen": 10456000, + "step": 15520 + }, + { + "epoch": 0.37927833288544693, + "grad_norm": 0.42879417538642883, + "learning_rate": 1.5169785508379343e-06, + "loss": 0.1404, + "num_input_tokens_seen": 10459968, + "step": 15525 + }, + { + "epoch": 0.3794004837172941, + "grad_norm": 25.073436737060547, + "learning_rate": 1.5174671422289538e-06, + "loss": 0.1199, + "num_input_tokens_seen": 10463488, + "step": 15530 + }, + { + "epoch": 0.37952263454914126, + "grad_norm": 24.64297103881836, + "learning_rate": 1.5179557336199734e-06, + "loss": 0.0706, + "num_input_tokens_seen": 10467072, + "step": 15535 + }, + { + "epoch": 0.37964478538098845, + "grad_norm": 15.014025688171387, + "learning_rate": 1.5184443250109933e-06, + "loss": 0.1237, + "num_input_tokens_seen": 10470272, + "step": 15540 + }, + { + "epoch": 0.3797669362128356, + "grad_norm": 1.430694341659546, + "learning_rate": 1.5189329164020128e-06, + "loss": 0.1375, + "num_input_tokens_seen": 10473920, + "step": 15545 + }, + { + "epoch": 0.37988908704468277, + "grad_norm": 33.992801666259766, + "learning_rate": 1.5194215077930325e-06, + "loss": 0.0761, + "num_input_tokens_seen": 10477312, + "step": 15550 + }, + { + "epoch": 0.38001123787652996, + "grad_norm": 48.1543083190918, + "learning_rate": 1.5199100991840524e-06, + "loss": 0.0856, + "num_input_tokens_seen": 10480576, + "step": 15555 + }, + { + "epoch": 0.3801333887083771, + "grad_norm": 2.4673454761505127, + "learning_rate": 1.520398690575072e-06, + "loss": 0.0892, + "num_input_tokens_seen": 10483776, + "step": 15560 + }, + { + "epoch": 0.3802555395402243, + "grad_norm": 77.2197036743164, + "learning_rate": 1.5208872819660915e-06, + "loss": 0.1898, + "num_input_tokens_seen": 10487040, + "step": 15565 + }, + { + "epoch": 0.3803776903720714, + "grad_norm": 11.173587799072266, + "learning_rate": 1.5213758733571114e-06, + "loss": 0.1709, + "num_input_tokens_seen": 10490752, + "step": 15570 + }, + { + "epoch": 0.3804998412039186, + "grad_norm": 25.323055267333984, + "learning_rate": 1.521864464748131e-06, + "loss": 0.1493, + "num_input_tokens_seen": 10493888, + "step": 15575 + }, + { + "epoch": 0.38062199203576574, + "grad_norm": 13.963493347167969, + "learning_rate": 1.5223530561391506e-06, + "loss": 0.1155, + "num_input_tokens_seen": 10497728, + "step": 15580 + }, + { + "epoch": 0.38074414286761293, + "grad_norm": 47.22227478027344, + "learning_rate": 1.5228416475301705e-06, + "loss": 0.1956, + "num_input_tokens_seen": 10501248, + "step": 15585 + }, + { + "epoch": 0.38086629369946007, + "grad_norm": 1.2307871580123901, + "learning_rate": 1.5233302389211901e-06, + "loss": 0.1305, + "num_input_tokens_seen": 10504576, + "step": 15590 + }, + { + "epoch": 0.38098844453130726, + "grad_norm": 14.952744483947754, + "learning_rate": 1.5238188303122098e-06, + "loss": 0.0638, + "num_input_tokens_seen": 10507712, + "step": 15595 + }, + { + "epoch": 0.38111059536315445, + "grad_norm": 23.147411346435547, + "learning_rate": 1.5243074217032295e-06, + "loss": 0.2178, + "num_input_tokens_seen": 10511552, + "step": 15600 + }, + { + "epoch": 0.3812327461950016, + "grad_norm": 23.532026290893555, + "learning_rate": 1.5247960130942492e-06, + "loss": 0.1734, + "num_input_tokens_seen": 10514432, + "step": 15605 + }, + { + "epoch": 0.38135489702684877, + "grad_norm": 8.446444511413574, + "learning_rate": 1.5252846044852689e-06, + "loss": 0.1081, + "num_input_tokens_seen": 10517632, + "step": 15610 + }, + { + "epoch": 0.3814770478586959, + "grad_norm": 57.32693862915039, + "learning_rate": 1.5257731958762885e-06, + "loss": 0.2098, + "num_input_tokens_seen": 10521024, + "step": 15615 + }, + { + "epoch": 0.3815991986905431, + "grad_norm": 21.209300994873047, + "learning_rate": 1.5262617872673082e-06, + "loss": 0.1375, + "num_input_tokens_seen": 10524608, + "step": 15620 + }, + { + "epoch": 0.38172134952239023, + "grad_norm": 27.126405715942383, + "learning_rate": 1.526750378658328e-06, + "loss": 0.0546, + "num_input_tokens_seen": 10528256, + "step": 15625 + }, + { + "epoch": 0.3818435003542374, + "grad_norm": 26.83810806274414, + "learning_rate": 1.5272389700493478e-06, + "loss": 0.1676, + "num_input_tokens_seen": 10531200, + "step": 15630 + }, + { + "epoch": 0.38196565118608455, + "grad_norm": 9.054481506347656, + "learning_rate": 1.5277275614403673e-06, + "loss": 0.159, + "num_input_tokens_seen": 10534656, + "step": 15635 + }, + { + "epoch": 0.38208780201793174, + "grad_norm": 27.62286949157715, + "learning_rate": 1.528216152831387e-06, + "loss": 0.079, + "num_input_tokens_seen": 10537920, + "step": 15640 + }, + { + "epoch": 0.38220995284977893, + "grad_norm": 18.78369140625, + "learning_rate": 1.5287047442224068e-06, + "loss": 0.1409, + "num_input_tokens_seen": 10541504, + "step": 15645 + }, + { + "epoch": 0.38233210368162607, + "grad_norm": 3.5283448696136475, + "learning_rate": 1.5291933356134263e-06, + "loss": 0.1738, + "num_input_tokens_seen": 10544384, + "step": 15650 + }, + { + "epoch": 0.38245425451347326, + "grad_norm": 21.821565628051758, + "learning_rate": 1.529681927004446e-06, + "loss": 0.1478, + "num_input_tokens_seen": 10547712, + "step": 15655 + }, + { + "epoch": 0.3825764053453204, + "grad_norm": 33.52692794799805, + "learning_rate": 1.5301705183954659e-06, + "loss": 0.2437, + "num_input_tokens_seen": 10550784, + "step": 15660 + }, + { + "epoch": 0.3826985561771676, + "grad_norm": 5.589592456817627, + "learning_rate": 1.5306591097864856e-06, + "loss": 0.0959, + "num_input_tokens_seen": 10554304, + "step": 15665 + }, + { + "epoch": 0.3828207070090147, + "grad_norm": 27.160764694213867, + "learning_rate": 1.531147701177505e-06, + "loss": 0.1287, + "num_input_tokens_seen": 10557760, + "step": 15670 + }, + { + "epoch": 0.3829428578408619, + "grad_norm": 15.26628589630127, + "learning_rate": 1.531636292568525e-06, + "loss": 0.1185, + "num_input_tokens_seen": 10561152, + "step": 15675 + }, + { + "epoch": 0.38306500867270904, + "grad_norm": 12.412528038024902, + "learning_rate": 1.5321248839595446e-06, + "loss": 0.1117, + "num_input_tokens_seen": 10564544, + "step": 15680 + }, + { + "epoch": 0.38318715950455623, + "grad_norm": 1.3267569541931152, + "learning_rate": 1.532613475350564e-06, + "loss": 0.1692, + "num_input_tokens_seen": 10567552, + "step": 15685 + }, + { + "epoch": 0.38330931033640336, + "grad_norm": 3.8919661045074463, + "learning_rate": 1.533102066741584e-06, + "loss": 0.1682, + "num_input_tokens_seen": 10570624, + "step": 15690 + }, + { + "epoch": 0.38343146116825055, + "grad_norm": 0.884100615978241, + "learning_rate": 1.5335906581326037e-06, + "loss": 0.0318, + "num_input_tokens_seen": 10573952, + "step": 15695 + }, + { + "epoch": 0.38355361200009774, + "grad_norm": 18.8651065826416, + "learning_rate": 1.5340792495236231e-06, + "loss": 0.0905, + "num_input_tokens_seen": 10577344, + "step": 15700 + }, + { + "epoch": 0.3836757628319449, + "grad_norm": 22.760498046875, + "learning_rate": 1.534567840914643e-06, + "loss": 0.0864, + "num_input_tokens_seen": 10580480, + "step": 15705 + }, + { + "epoch": 0.38379791366379207, + "grad_norm": 30.443721771240234, + "learning_rate": 1.5350564323056627e-06, + "loss": 0.2148, + "num_input_tokens_seen": 10584064, + "step": 15710 + }, + { + "epoch": 0.3839200644956392, + "grad_norm": 5.712250709533691, + "learning_rate": 1.5355450236966824e-06, + "loss": 0.1423, + "num_input_tokens_seen": 10587776, + "step": 15715 + }, + { + "epoch": 0.3840422153274864, + "grad_norm": 24.226604461669922, + "learning_rate": 1.536033615087702e-06, + "loss": 0.1419, + "num_input_tokens_seen": 10591296, + "step": 15720 + }, + { + "epoch": 0.3841643661593335, + "grad_norm": 26.179553985595703, + "learning_rate": 1.5365222064787217e-06, + "loss": 0.1796, + "num_input_tokens_seen": 10594368, + "step": 15725 + }, + { + "epoch": 0.3842865169911807, + "grad_norm": 11.114547729492188, + "learning_rate": 1.5370107978697414e-06, + "loss": 0.1257, + "num_input_tokens_seen": 10597376, + "step": 15730 + }, + { + "epoch": 0.38440866782302785, + "grad_norm": 2.732433795928955, + "learning_rate": 1.5374993892607611e-06, + "loss": 0.1822, + "num_input_tokens_seen": 10600704, + "step": 15735 + }, + { + "epoch": 0.38453081865487504, + "grad_norm": 30.16254425048828, + "learning_rate": 1.5379879806517808e-06, + "loss": 0.2058, + "num_input_tokens_seen": 10604352, + "step": 15740 + }, + { + "epoch": 0.38465296948672223, + "grad_norm": 12.766703605651855, + "learning_rate": 1.5384765720428005e-06, + "loss": 0.0936, + "num_input_tokens_seen": 10607680, + "step": 15745 + }, + { + "epoch": 0.38477512031856936, + "grad_norm": 18.774023056030273, + "learning_rate": 1.5389651634338204e-06, + "loss": 0.1499, + "num_input_tokens_seen": 10611072, + "step": 15750 + }, + { + "epoch": 0.38489727115041655, + "grad_norm": 16.16313934326172, + "learning_rate": 1.5394537548248398e-06, + "loss": 0.1205, + "num_input_tokens_seen": 10614272, + "step": 15755 + }, + { + "epoch": 0.3850194219822637, + "grad_norm": 11.570549964904785, + "learning_rate": 1.5399423462158595e-06, + "loss": 0.0851, + "num_input_tokens_seen": 10617088, + "step": 15760 + }, + { + "epoch": 0.3851415728141109, + "grad_norm": 18.607933044433594, + "learning_rate": 1.5404309376068794e-06, + "loss": 0.1307, + "num_input_tokens_seen": 10619904, + "step": 15765 + }, + { + "epoch": 0.385263723645958, + "grad_norm": 18.242626190185547, + "learning_rate": 1.5409195289978989e-06, + "loss": 0.1781, + "num_input_tokens_seen": 10623488, + "step": 15770 + }, + { + "epoch": 0.3853858744778052, + "grad_norm": 7.264750003814697, + "learning_rate": 1.5414081203889186e-06, + "loss": 0.1018, + "num_input_tokens_seen": 10626880, + "step": 15775 + }, + { + "epoch": 0.38550802530965234, + "grad_norm": 35.27031326293945, + "learning_rate": 1.5418967117799385e-06, + "loss": 0.1506, + "num_input_tokens_seen": 10630208, + "step": 15780 + }, + { + "epoch": 0.3856301761414995, + "grad_norm": 2.565133571624756, + "learning_rate": 1.5423853031709581e-06, + "loss": 0.043, + "num_input_tokens_seen": 10633216, + "step": 15785 + }, + { + "epoch": 0.3857523269733467, + "grad_norm": 14.965524673461914, + "learning_rate": 1.5428738945619776e-06, + "loss": 0.1396, + "num_input_tokens_seen": 10636672, + "step": 15790 + }, + { + "epoch": 0.38587447780519385, + "grad_norm": 14.533952713012695, + "learning_rate": 1.5433624859529975e-06, + "loss": 0.1323, + "num_input_tokens_seen": 10639936, + "step": 15795 + }, + { + "epoch": 0.38599662863704104, + "grad_norm": 11.508635520935059, + "learning_rate": 1.5438510773440172e-06, + "loss": 0.1178, + "num_input_tokens_seen": 10643136, + "step": 15800 + }, + { + "epoch": 0.3861187794688882, + "grad_norm": 19.610515594482422, + "learning_rate": 1.5443396687350366e-06, + "loss": 0.086, + "num_input_tokens_seen": 10646528, + "step": 15805 + }, + { + "epoch": 0.38624093030073536, + "grad_norm": 1.9880129098892212, + "learning_rate": 1.5448282601260565e-06, + "loss": 0.0228, + "num_input_tokens_seen": 10649984, + "step": 15810 + }, + { + "epoch": 0.3863630811325825, + "grad_norm": 17.143823623657227, + "learning_rate": 1.5453168515170762e-06, + "loss": 0.0876, + "num_input_tokens_seen": 10653120, + "step": 15815 + }, + { + "epoch": 0.3864852319644297, + "grad_norm": 21.041353225708008, + "learning_rate": 1.5458054429080957e-06, + "loss": 0.1813, + "num_input_tokens_seen": 10656256, + "step": 15820 + }, + { + "epoch": 0.3866073827962768, + "grad_norm": 8.247550010681152, + "learning_rate": 1.5462940342991156e-06, + "loss": 0.1512, + "num_input_tokens_seen": 10659328, + "step": 15825 + }, + { + "epoch": 0.386729533628124, + "grad_norm": 4.186298370361328, + "learning_rate": 1.5467826256901353e-06, + "loss": 0.0881, + "num_input_tokens_seen": 10662848, + "step": 15830 + }, + { + "epoch": 0.38685168445997115, + "grad_norm": 7.553508281707764, + "learning_rate": 1.547271217081155e-06, + "loss": 0.3, + "num_input_tokens_seen": 10666112, + "step": 15835 + }, + { + "epoch": 0.38697383529181834, + "grad_norm": 2.3813862800598145, + "learning_rate": 1.5477598084721746e-06, + "loss": 0.1846, + "num_input_tokens_seen": 10669184, + "step": 15840 + }, + { + "epoch": 0.3870959861236655, + "grad_norm": 5.976868152618408, + "learning_rate": 1.5482483998631943e-06, + "loss": 0.0803, + "num_input_tokens_seen": 10672512, + "step": 15845 + }, + { + "epoch": 0.38721813695551266, + "grad_norm": 9.132421493530273, + "learning_rate": 1.548736991254214e-06, + "loss": 0.1346, + "num_input_tokens_seen": 10675392, + "step": 15850 + }, + { + "epoch": 0.38734028778735985, + "grad_norm": 16.281137466430664, + "learning_rate": 1.5492255826452337e-06, + "loss": 0.1351, + "num_input_tokens_seen": 10678720, + "step": 15855 + }, + { + "epoch": 0.387462438619207, + "grad_norm": 14.683212280273438, + "learning_rate": 1.5497141740362534e-06, + "loss": 0.0553, + "num_input_tokens_seen": 10682176, + "step": 15860 + }, + { + "epoch": 0.3875845894510542, + "grad_norm": 20.908401489257812, + "learning_rate": 1.550202765427273e-06, + "loss": 0.0425, + "num_input_tokens_seen": 10685696, + "step": 15865 + }, + { + "epoch": 0.3877067402829013, + "grad_norm": 4.737698554992676, + "learning_rate": 1.550691356818293e-06, + "loss": 0.0721, + "num_input_tokens_seen": 10689216, + "step": 15870 + }, + { + "epoch": 0.3878288911147485, + "grad_norm": 10.016496658325195, + "learning_rate": 1.5511799482093124e-06, + "loss": 0.1571, + "num_input_tokens_seen": 10692288, + "step": 15875 + }, + { + "epoch": 0.38795104194659563, + "grad_norm": 10.282059669494629, + "learning_rate": 1.551668539600332e-06, + "loss": 0.0994, + "num_input_tokens_seen": 10695680, + "step": 15880 + }, + { + "epoch": 0.3880731927784428, + "grad_norm": 21.099702835083008, + "learning_rate": 1.552157130991352e-06, + "loss": 0.2294, + "num_input_tokens_seen": 10698624, + "step": 15885 + }, + { + "epoch": 0.38819534361029, + "grad_norm": 11.00361442565918, + "learning_rate": 1.5526457223823714e-06, + "loss": 0.2457, + "num_input_tokens_seen": 10702208, + "step": 15890 + }, + { + "epoch": 0.38831749444213715, + "grad_norm": 1.1076817512512207, + "learning_rate": 1.5531343137733911e-06, + "loss": 0.0488, + "num_input_tokens_seen": 10705216, + "step": 15895 + }, + { + "epoch": 0.38843964527398434, + "grad_norm": 10.160321235656738, + "learning_rate": 1.553622905164411e-06, + "loss": 0.081, + "num_input_tokens_seen": 10708160, + "step": 15900 + }, + { + "epoch": 0.38856179610583147, + "grad_norm": 0.8632951378822327, + "learning_rate": 1.5541114965554307e-06, + "loss": 0.1093, + "num_input_tokens_seen": 10712384, + "step": 15905 + }, + { + "epoch": 0.38868394693767866, + "grad_norm": 34.629905700683594, + "learning_rate": 1.5546000879464502e-06, + "loss": 0.1033, + "num_input_tokens_seen": 10716096, + "step": 15910 + }, + { + "epoch": 0.3888060977695258, + "grad_norm": 17.712677001953125, + "learning_rate": 1.55508867933747e-06, + "loss": 0.1871, + "num_input_tokens_seen": 10719232, + "step": 15915 + }, + { + "epoch": 0.388928248601373, + "grad_norm": 0.24668991565704346, + "learning_rate": 1.5555772707284897e-06, + "loss": 0.0706, + "num_input_tokens_seen": 10722560, + "step": 15920 + }, + { + "epoch": 0.3890503994332201, + "grad_norm": 17.20937156677246, + "learning_rate": 1.5560658621195092e-06, + "loss": 0.0899, + "num_input_tokens_seen": 10725504, + "step": 15925 + }, + { + "epoch": 0.3891725502650673, + "grad_norm": 6.1287994384765625, + "learning_rate": 1.556554453510529e-06, + "loss": 0.2236, + "num_input_tokens_seen": 10728704, + "step": 15930 + }, + { + "epoch": 0.3892947010969145, + "grad_norm": 29.251150131225586, + "learning_rate": 1.5570430449015488e-06, + "loss": 0.148, + "num_input_tokens_seen": 10732096, + "step": 15935 + }, + { + "epoch": 0.38941685192876163, + "grad_norm": 0.6747702956199646, + "learning_rate": 1.5575316362925685e-06, + "loss": 0.0168, + "num_input_tokens_seen": 10735360, + "step": 15940 + }, + { + "epoch": 0.3895390027606088, + "grad_norm": 44.67377853393555, + "learning_rate": 1.5580202276835881e-06, + "loss": 0.2019, + "num_input_tokens_seen": 10738688, + "step": 15945 + }, + { + "epoch": 0.38966115359245596, + "grad_norm": 47.97969436645508, + "learning_rate": 1.5585088190746078e-06, + "loss": 0.0239, + "num_input_tokens_seen": 10741632, + "step": 15950 + }, + { + "epoch": 0.38978330442430315, + "grad_norm": 29.029560089111328, + "learning_rate": 1.5589974104656275e-06, + "loss": 0.1504, + "num_input_tokens_seen": 10745472, + "step": 15955 + }, + { + "epoch": 0.3899054552561503, + "grad_norm": 11.867027282714844, + "learning_rate": 1.5594860018566472e-06, + "loss": 0.1547, + "num_input_tokens_seen": 10748864, + "step": 15960 + }, + { + "epoch": 0.39002760608799747, + "grad_norm": 16.851787567138672, + "learning_rate": 1.5599745932476669e-06, + "loss": 0.1754, + "num_input_tokens_seen": 10752128, + "step": 15965 + }, + { + "epoch": 0.3901497569198446, + "grad_norm": 2.712797164916992, + "learning_rate": 1.5604631846386866e-06, + "loss": 0.1719, + "num_input_tokens_seen": 10755840, + "step": 15970 + }, + { + "epoch": 0.3902719077516918, + "grad_norm": 20.49784278869629, + "learning_rate": 1.5609517760297062e-06, + "loss": 0.1273, + "num_input_tokens_seen": 10759040, + "step": 15975 + }, + { + "epoch": 0.3903940585835389, + "grad_norm": 3.896900177001953, + "learning_rate": 1.561440367420726e-06, + "loss": 0.1223, + "num_input_tokens_seen": 10762176, + "step": 15980 + }, + { + "epoch": 0.3905162094153861, + "grad_norm": 17.352317810058594, + "learning_rate": 1.5619289588117456e-06, + "loss": 0.1119, + "num_input_tokens_seen": 10765696, + "step": 15985 + }, + { + "epoch": 0.3906383602472333, + "grad_norm": 1.630355954170227, + "learning_rate": 1.5624175502027655e-06, + "loss": 0.1843, + "num_input_tokens_seen": 10769024, + "step": 15990 + }, + { + "epoch": 0.39076051107908044, + "grad_norm": 4.977283000946045, + "learning_rate": 1.562906141593785e-06, + "loss": 0.1047, + "num_input_tokens_seen": 10772800, + "step": 15995 + }, + { + "epoch": 0.39088266191092763, + "grad_norm": 5.900393009185791, + "learning_rate": 1.5633947329848046e-06, + "loss": 0.0575, + "num_input_tokens_seen": 10776704, + "step": 16000 + }, + { + "epoch": 0.39100481274277477, + "grad_norm": 21.47952651977539, + "learning_rate": 1.5638833243758245e-06, + "loss": 0.071, + "num_input_tokens_seen": 10780416, + "step": 16005 + }, + { + "epoch": 0.39112696357462196, + "grad_norm": 16.857471466064453, + "learning_rate": 1.564371915766844e-06, + "loss": 0.1896, + "num_input_tokens_seen": 10783552, + "step": 16010 + }, + { + "epoch": 0.3912491144064691, + "grad_norm": 12.78052806854248, + "learning_rate": 1.5648605071578637e-06, + "loss": 0.1082, + "num_input_tokens_seen": 10786432, + "step": 16015 + }, + { + "epoch": 0.3913712652383163, + "grad_norm": 18.435972213745117, + "learning_rate": 1.5653490985488836e-06, + "loss": 0.1499, + "num_input_tokens_seen": 10789504, + "step": 16020 + }, + { + "epoch": 0.3914934160701634, + "grad_norm": 14.367976188659668, + "learning_rate": 1.5658376899399033e-06, + "loss": 0.1097, + "num_input_tokens_seen": 10792768, + "step": 16025 + }, + { + "epoch": 0.3916155669020106, + "grad_norm": 35.80486297607422, + "learning_rate": 1.5663262813309227e-06, + "loss": 0.0895, + "num_input_tokens_seen": 10796288, + "step": 16030 + }, + { + "epoch": 0.3917377177338578, + "grad_norm": 52.145294189453125, + "learning_rate": 1.5668148727219426e-06, + "loss": 0.2037, + "num_input_tokens_seen": 10800064, + "step": 16035 + }, + { + "epoch": 0.3918598685657049, + "grad_norm": 50.63142395019531, + "learning_rate": 1.5673034641129623e-06, + "loss": 0.155, + "num_input_tokens_seen": 10803456, + "step": 16040 + }, + { + "epoch": 0.3919820193975521, + "grad_norm": 3.279127836227417, + "learning_rate": 1.5677920555039818e-06, + "loss": 0.1418, + "num_input_tokens_seen": 10806784, + "step": 16045 + }, + { + "epoch": 0.39210417022939925, + "grad_norm": 39.02543258666992, + "learning_rate": 1.5682806468950017e-06, + "loss": 0.1995, + "num_input_tokens_seen": 10810496, + "step": 16050 + }, + { + "epoch": 0.39222632106124644, + "grad_norm": 13.758199691772461, + "learning_rate": 1.5687692382860213e-06, + "loss": 0.0676, + "num_input_tokens_seen": 10813760, + "step": 16055 + }, + { + "epoch": 0.3923484718930936, + "grad_norm": 6.683971881866455, + "learning_rate": 1.569257829677041e-06, + "loss": 0.111, + "num_input_tokens_seen": 10817088, + "step": 16060 + }, + { + "epoch": 0.39247062272494077, + "grad_norm": 15.666305541992188, + "learning_rate": 1.5697464210680607e-06, + "loss": 0.1503, + "num_input_tokens_seen": 10820480, + "step": 16065 + }, + { + "epoch": 0.3925927735567879, + "grad_norm": 10.361749649047852, + "learning_rate": 1.5702350124590804e-06, + "loss": 0.2075, + "num_input_tokens_seen": 10823680, + "step": 16070 + }, + { + "epoch": 0.3927149243886351, + "grad_norm": 42.955177307128906, + "learning_rate": 1.5707236038501e-06, + "loss": 0.1805, + "num_input_tokens_seen": 10827456, + "step": 16075 + }, + { + "epoch": 0.3928370752204823, + "grad_norm": 16.565195083618164, + "learning_rate": 1.5712121952411198e-06, + "loss": 0.1784, + "num_input_tokens_seen": 10830784, + "step": 16080 + }, + { + "epoch": 0.3929592260523294, + "grad_norm": 15.590194702148438, + "learning_rate": 1.5717007866321394e-06, + "loss": 0.1074, + "num_input_tokens_seen": 10834112, + "step": 16085 + }, + { + "epoch": 0.3930813768841766, + "grad_norm": 1.7364860773086548, + "learning_rate": 1.5721893780231591e-06, + "loss": 0.1116, + "num_input_tokens_seen": 10837312, + "step": 16090 + }, + { + "epoch": 0.39320352771602374, + "grad_norm": 13.02885627746582, + "learning_rate": 1.5726779694141788e-06, + "loss": 0.1407, + "num_input_tokens_seen": 10840448, + "step": 16095 + }, + { + "epoch": 0.3933256785478709, + "grad_norm": 12.833983421325684, + "learning_rate": 1.5731665608051985e-06, + "loss": 0.0713, + "num_input_tokens_seen": 10843648, + "step": 16100 + }, + { + "epoch": 0.39344782937971806, + "grad_norm": 14.190690040588379, + "learning_rate": 1.5736551521962182e-06, + "loss": 0.0996, + "num_input_tokens_seen": 10847168, + "step": 16105 + }, + { + "epoch": 0.39356998021156525, + "grad_norm": 36.03334045410156, + "learning_rate": 1.574143743587238e-06, + "loss": 0.1066, + "num_input_tokens_seen": 10850240, + "step": 16110 + }, + { + "epoch": 0.3936921310434124, + "grad_norm": 17.66339111328125, + "learning_rate": 1.5746323349782575e-06, + "loss": 0.07, + "num_input_tokens_seen": 10853632, + "step": 16115 + }, + { + "epoch": 0.3938142818752596, + "grad_norm": 16.764387130737305, + "learning_rate": 1.5751209263692772e-06, + "loss": 0.2055, + "num_input_tokens_seen": 10857472, + "step": 16120 + }, + { + "epoch": 0.3939364327071067, + "grad_norm": 24.138320922851562, + "learning_rate": 1.575609517760297e-06, + "loss": 0.2077, + "num_input_tokens_seen": 10860864, + "step": 16125 + }, + { + "epoch": 0.3940585835389539, + "grad_norm": 1.7676178216934204, + "learning_rate": 1.5760981091513166e-06, + "loss": 0.0593, + "num_input_tokens_seen": 10864256, + "step": 16130 + }, + { + "epoch": 0.3941807343708011, + "grad_norm": 15.465826034545898, + "learning_rate": 1.5765867005423362e-06, + "loss": 0.075, + "num_input_tokens_seen": 10867328, + "step": 16135 + }, + { + "epoch": 0.3943028852026482, + "grad_norm": 31.692584991455078, + "learning_rate": 1.5770752919333561e-06, + "loss": 0.1361, + "num_input_tokens_seen": 10870720, + "step": 16140 + }, + { + "epoch": 0.3944250360344954, + "grad_norm": 5.40956449508667, + "learning_rate": 1.5775638833243758e-06, + "loss": 0.1391, + "num_input_tokens_seen": 10874176, + "step": 16145 + }, + { + "epoch": 0.39454718686634255, + "grad_norm": 12.23112678527832, + "learning_rate": 1.5780524747153953e-06, + "loss": 0.1064, + "num_input_tokens_seen": 10877312, + "step": 16150 + }, + { + "epoch": 0.39466933769818974, + "grad_norm": 11.061859130859375, + "learning_rate": 1.5785410661064152e-06, + "loss": 0.0347, + "num_input_tokens_seen": 10880384, + "step": 16155 + }, + { + "epoch": 0.39479148853003687, + "grad_norm": 29.293039321899414, + "learning_rate": 1.5790296574974349e-06, + "loss": 0.1328, + "num_input_tokens_seen": 10883648, + "step": 16160 + }, + { + "epoch": 0.39491363936188406, + "grad_norm": 16.09516143798828, + "learning_rate": 1.5795182488884543e-06, + "loss": 0.043, + "num_input_tokens_seen": 10887104, + "step": 16165 + }, + { + "epoch": 0.3950357901937312, + "grad_norm": 17.577499389648438, + "learning_rate": 1.5800068402794742e-06, + "loss": 0.135, + "num_input_tokens_seen": 10890560, + "step": 16170 + }, + { + "epoch": 0.3951579410255784, + "grad_norm": 45.8845100402832, + "learning_rate": 1.580495431670494e-06, + "loss": 0.1755, + "num_input_tokens_seen": 10893696, + "step": 16175 + }, + { + "epoch": 0.3952800918574256, + "grad_norm": 26.468963623046875, + "learning_rate": 1.5809840230615136e-06, + "loss": 0.0819, + "num_input_tokens_seen": 10896768, + "step": 16180 + }, + { + "epoch": 0.3954022426892727, + "grad_norm": 1.9360413551330566, + "learning_rate": 1.5814726144525333e-06, + "loss": 0.0533, + "num_input_tokens_seen": 10899840, + "step": 16185 + }, + { + "epoch": 0.3955243935211199, + "grad_norm": 1.5298985242843628, + "learning_rate": 1.581961205843553e-06, + "loss": 0.1159, + "num_input_tokens_seen": 10902656, + "step": 16190 + }, + { + "epoch": 0.39564654435296703, + "grad_norm": 15.531476974487305, + "learning_rate": 1.5824497972345726e-06, + "loss": 0.1456, + "num_input_tokens_seen": 10905856, + "step": 16195 + }, + { + "epoch": 0.3957686951848142, + "grad_norm": 13.77304458618164, + "learning_rate": 1.5829383886255923e-06, + "loss": 0.2502, + "num_input_tokens_seen": 10909120, + "step": 16200 + }, + { + "epoch": 0.39589084601666136, + "grad_norm": 19.4348201751709, + "learning_rate": 1.583426980016612e-06, + "loss": 0.1608, + "num_input_tokens_seen": 10912384, + "step": 16205 + }, + { + "epoch": 0.39601299684850855, + "grad_norm": 0.1257823258638382, + "learning_rate": 1.5839155714076317e-06, + "loss": 0.0518, + "num_input_tokens_seen": 10915840, + "step": 16210 + }, + { + "epoch": 0.3961351476803557, + "grad_norm": 31.687843322753906, + "learning_rate": 1.5844041627986516e-06, + "loss": 0.1392, + "num_input_tokens_seen": 10919168, + "step": 16215 + }, + { + "epoch": 0.39625729851220287, + "grad_norm": 9.020833015441895, + "learning_rate": 1.584892754189671e-06, + "loss": 0.1297, + "num_input_tokens_seen": 10922176, + "step": 16220 + }, + { + "epoch": 0.39637944934405, + "grad_norm": 17.56090545654297, + "learning_rate": 1.5853813455806907e-06, + "loss": 0.0987, + "num_input_tokens_seen": 10925504, + "step": 16225 + }, + { + "epoch": 0.3965016001758972, + "grad_norm": 25.40732192993164, + "learning_rate": 1.5858699369717106e-06, + "loss": 0.0924, + "num_input_tokens_seen": 10928384, + "step": 16230 + }, + { + "epoch": 0.3966237510077444, + "grad_norm": 21.291419982910156, + "learning_rate": 1.58635852836273e-06, + "loss": 0.1234, + "num_input_tokens_seen": 10931392, + "step": 16235 + }, + { + "epoch": 0.3967459018395915, + "grad_norm": 20.114309310913086, + "learning_rate": 1.5868471197537498e-06, + "loss": 0.0636, + "num_input_tokens_seen": 10934912, + "step": 16240 + }, + { + "epoch": 0.3968680526714387, + "grad_norm": 23.1328067779541, + "learning_rate": 1.5873357111447697e-06, + "loss": 0.1387, + "num_input_tokens_seen": 10938112, + "step": 16245 + }, + { + "epoch": 0.39699020350328584, + "grad_norm": 13.599346160888672, + "learning_rate": 1.5878243025357891e-06, + "loss": 0.1428, + "num_input_tokens_seen": 10942144, + "step": 16250 + }, + { + "epoch": 0.39711235433513303, + "grad_norm": 23.25737190246582, + "learning_rate": 1.5883128939268088e-06, + "loss": 0.1288, + "num_input_tokens_seen": 10945280, + "step": 16255 + }, + { + "epoch": 0.39723450516698017, + "grad_norm": 10.759117126464844, + "learning_rate": 1.5888014853178287e-06, + "loss": 0.1731, + "num_input_tokens_seen": 10949056, + "step": 16260 + }, + { + "epoch": 0.39735665599882736, + "grad_norm": 24.098670959472656, + "learning_rate": 1.5892900767088484e-06, + "loss": 0.0676, + "num_input_tokens_seen": 10952384, + "step": 16265 + }, + { + "epoch": 0.3974788068306745, + "grad_norm": 37.097408294677734, + "learning_rate": 1.5897786680998679e-06, + "loss": 0.0302, + "num_input_tokens_seen": 10955712, + "step": 16270 + }, + { + "epoch": 0.3976009576625217, + "grad_norm": 41.81599044799805, + "learning_rate": 1.5902672594908877e-06, + "loss": 0.0313, + "num_input_tokens_seen": 10959040, + "step": 16275 + }, + { + "epoch": 0.39772310849436887, + "grad_norm": 14.4953031539917, + "learning_rate": 1.5907558508819074e-06, + "loss": 0.1402, + "num_input_tokens_seen": 10962240, + "step": 16280 + }, + { + "epoch": 0.397845259326216, + "grad_norm": 1.8347476720809937, + "learning_rate": 1.591244442272927e-06, + "loss": 0.0433, + "num_input_tokens_seen": 10965504, + "step": 16285 + }, + { + "epoch": 0.3979674101580632, + "grad_norm": 50.44518280029297, + "learning_rate": 1.5917330336639468e-06, + "loss": 0.1955, + "num_input_tokens_seen": 10968960, + "step": 16290 + }, + { + "epoch": 0.39808956098991033, + "grad_norm": 47.06920623779297, + "learning_rate": 1.5922216250549665e-06, + "loss": 0.1945, + "num_input_tokens_seen": 10972672, + "step": 16295 + }, + { + "epoch": 0.3982117118217575, + "grad_norm": 32.896270751953125, + "learning_rate": 1.5927102164459862e-06, + "loss": 0.2091, + "num_input_tokens_seen": 10975680, + "step": 16300 + }, + { + "epoch": 0.39833386265360465, + "grad_norm": 31.728307723999023, + "learning_rate": 1.5931988078370058e-06, + "loss": 0.135, + "num_input_tokens_seen": 10978688, + "step": 16305 + }, + { + "epoch": 0.39845601348545184, + "grad_norm": 1.8147532939910889, + "learning_rate": 1.5936873992280255e-06, + "loss": 0.1638, + "num_input_tokens_seen": 10981696, + "step": 16310 + }, + { + "epoch": 0.398578164317299, + "grad_norm": 47.015018463134766, + "learning_rate": 1.5941759906190452e-06, + "loss": 0.1767, + "num_input_tokens_seen": 10985024, + "step": 16315 + }, + { + "epoch": 0.39870031514914617, + "grad_norm": 20.603796005249023, + "learning_rate": 1.5946645820100649e-06, + "loss": 0.128, + "num_input_tokens_seen": 10988416, + "step": 16320 + }, + { + "epoch": 0.39882246598099336, + "grad_norm": 0.8213732838630676, + "learning_rate": 1.5951531734010846e-06, + "loss": 0.1098, + "num_input_tokens_seen": 10992000, + "step": 16325 + }, + { + "epoch": 0.3989446168128405, + "grad_norm": 25.38956069946289, + "learning_rate": 1.5956417647921042e-06, + "loss": 0.1098, + "num_input_tokens_seen": 10995264, + "step": 16330 + }, + { + "epoch": 0.3990667676446877, + "grad_norm": 9.087261199951172, + "learning_rate": 1.5961303561831241e-06, + "loss": 0.0463, + "num_input_tokens_seen": 10998464, + "step": 16335 + }, + { + "epoch": 0.3991889184765348, + "grad_norm": 23.549148559570312, + "learning_rate": 1.5966189475741436e-06, + "loss": 0.1157, + "num_input_tokens_seen": 11001600, + "step": 16340 + }, + { + "epoch": 0.399311069308382, + "grad_norm": 44.26289749145508, + "learning_rate": 1.5971075389651633e-06, + "loss": 0.053, + "num_input_tokens_seen": 11004928, + "step": 16345 + }, + { + "epoch": 0.39943322014022914, + "grad_norm": 30.471595764160156, + "learning_rate": 1.5975961303561832e-06, + "loss": 0.1173, + "num_input_tokens_seen": 11008064, + "step": 16350 + }, + { + "epoch": 0.39955537097207633, + "grad_norm": 14.239789962768555, + "learning_rate": 1.5980847217472026e-06, + "loss": 0.2029, + "num_input_tokens_seen": 11011392, + "step": 16355 + }, + { + "epoch": 0.39967752180392346, + "grad_norm": 14.216521263122559, + "learning_rate": 1.5985733131382223e-06, + "loss": 0.2419, + "num_input_tokens_seen": 11014912, + "step": 16360 + }, + { + "epoch": 0.39979967263577065, + "grad_norm": 34.19306945800781, + "learning_rate": 1.5990619045292422e-06, + "loss": 0.1646, + "num_input_tokens_seen": 11018496, + "step": 16365 + }, + { + "epoch": 0.3999218234676178, + "grad_norm": 0.6496175527572632, + "learning_rate": 1.599550495920262e-06, + "loss": 0.0541, + "num_input_tokens_seen": 11022016, + "step": 16370 + }, + { + "epoch": 0.400043974299465, + "grad_norm": 24.54413604736328, + "learning_rate": 1.6000390873112814e-06, + "loss": 0.0528, + "num_input_tokens_seen": 11025088, + "step": 16375 + }, + { + "epoch": 0.40016612513131217, + "grad_norm": 6.894566059112549, + "learning_rate": 1.6005276787023013e-06, + "loss": 0.0828, + "num_input_tokens_seen": 11027968, + "step": 16380 + }, + { + "epoch": 0.4002882759631593, + "grad_norm": 0.47866374254226685, + "learning_rate": 1.601016270093321e-06, + "loss": 0.1717, + "num_input_tokens_seen": 11031296, + "step": 16385 + }, + { + "epoch": 0.4004104267950065, + "grad_norm": 6.847443580627441, + "learning_rate": 1.6015048614843404e-06, + "loss": 0.0743, + "num_input_tokens_seen": 11034688, + "step": 16390 + }, + { + "epoch": 0.4005325776268536, + "grad_norm": 21.000062942504883, + "learning_rate": 1.6019934528753603e-06, + "loss": 0.134, + "num_input_tokens_seen": 11038016, + "step": 16395 + }, + { + "epoch": 0.4006547284587008, + "grad_norm": 33.84720230102539, + "learning_rate": 1.60248204426638e-06, + "loss": 0.1923, + "num_input_tokens_seen": 11041472, + "step": 16400 + }, + { + "epoch": 0.40077687929054795, + "grad_norm": 34.48461151123047, + "learning_rate": 1.6029706356573995e-06, + "loss": 0.1039, + "num_input_tokens_seen": 11044864, + "step": 16405 + }, + { + "epoch": 0.40089903012239514, + "grad_norm": 0.1740226000547409, + "learning_rate": 1.6034592270484194e-06, + "loss": 0.1081, + "num_input_tokens_seen": 11048384, + "step": 16410 + }, + { + "epoch": 0.4010211809542423, + "grad_norm": 33.16176223754883, + "learning_rate": 1.603947818439439e-06, + "loss": 0.0626, + "num_input_tokens_seen": 11051712, + "step": 16415 + }, + { + "epoch": 0.40114333178608946, + "grad_norm": 10.99461555480957, + "learning_rate": 1.6044364098304587e-06, + "loss": 0.0369, + "num_input_tokens_seen": 11055552, + "step": 16420 + }, + { + "epoch": 0.40126548261793665, + "grad_norm": 11.003421783447266, + "learning_rate": 1.6049250012214784e-06, + "loss": 0.1517, + "num_input_tokens_seen": 11058880, + "step": 16425 + }, + { + "epoch": 0.4013876334497838, + "grad_norm": 34.61371994018555, + "learning_rate": 1.605413592612498e-06, + "loss": 0.1362, + "num_input_tokens_seen": 11062208, + "step": 16430 + }, + { + "epoch": 0.401509784281631, + "grad_norm": 12.106389999389648, + "learning_rate": 1.6059021840035178e-06, + "loss": 0.2636, + "num_input_tokens_seen": 11066048, + "step": 16435 + }, + { + "epoch": 0.4016319351134781, + "grad_norm": 33.2092170715332, + "learning_rate": 1.6063907753945374e-06, + "loss": 0.3135, + "num_input_tokens_seen": 11069824, + "step": 16440 + }, + { + "epoch": 0.4017540859453253, + "grad_norm": 31.4134521484375, + "learning_rate": 1.6068793667855571e-06, + "loss": 0.1552, + "num_input_tokens_seen": 11073088, + "step": 16445 + }, + { + "epoch": 0.40187623677717244, + "grad_norm": 38.61223220825195, + "learning_rate": 1.6073679581765768e-06, + "loss": 0.1472, + "num_input_tokens_seen": 11076864, + "step": 16450 + }, + { + "epoch": 0.4019983876090196, + "grad_norm": 0.4963672161102295, + "learning_rate": 1.6078565495675967e-06, + "loss": 0.0921, + "num_input_tokens_seen": 11080256, + "step": 16455 + }, + { + "epoch": 0.40212053844086676, + "grad_norm": 36.525779724121094, + "learning_rate": 1.6083451409586162e-06, + "loss": 0.1323, + "num_input_tokens_seen": 11083712, + "step": 16460 + }, + { + "epoch": 0.40224268927271395, + "grad_norm": 14.989439964294434, + "learning_rate": 1.6088337323496359e-06, + "loss": 0.1764, + "num_input_tokens_seen": 11087040, + "step": 16465 + }, + { + "epoch": 0.40236484010456114, + "grad_norm": 15.095987319946289, + "learning_rate": 1.6093223237406557e-06, + "loss": 0.1396, + "num_input_tokens_seen": 11090240, + "step": 16470 + }, + { + "epoch": 0.4024869909364083, + "grad_norm": 15.008681297302246, + "learning_rate": 1.6098109151316752e-06, + "loss": 0.0541, + "num_input_tokens_seen": 11093632, + "step": 16475 + }, + { + "epoch": 0.40260914176825546, + "grad_norm": 28.64449691772461, + "learning_rate": 1.610299506522695e-06, + "loss": 0.1336, + "num_input_tokens_seen": 11096768, + "step": 16480 + }, + { + "epoch": 0.4027312926001026, + "grad_norm": 20.235105514526367, + "learning_rate": 1.6107880979137148e-06, + "loss": 0.0992, + "num_input_tokens_seen": 11099968, + "step": 16485 + }, + { + "epoch": 0.4028534434319498, + "grad_norm": 29.153833389282227, + "learning_rate": 1.6112766893047345e-06, + "loss": 0.1079, + "num_input_tokens_seen": 11103232, + "step": 16490 + }, + { + "epoch": 0.4029755942637969, + "grad_norm": 13.984383583068848, + "learning_rate": 1.611765280695754e-06, + "loss": 0.1288, + "num_input_tokens_seen": 11106048, + "step": 16495 + }, + { + "epoch": 0.4030977450956441, + "grad_norm": 22.53131866455078, + "learning_rate": 1.6122538720867738e-06, + "loss": 0.0708, + "num_input_tokens_seen": 11109248, + "step": 16500 + }, + { + "epoch": 0.40321989592749125, + "grad_norm": 21.125389099121094, + "learning_rate": 1.6127424634777935e-06, + "loss": 0.1343, + "num_input_tokens_seen": 11112384, + "step": 16505 + }, + { + "epoch": 0.40334204675933844, + "grad_norm": 0.2616809606552124, + "learning_rate": 1.613231054868813e-06, + "loss": 0.075, + "num_input_tokens_seen": 11116096, + "step": 16510 + }, + { + "epoch": 0.40346419759118557, + "grad_norm": 23.93106460571289, + "learning_rate": 1.6137196462598329e-06, + "loss": 0.1278, + "num_input_tokens_seen": 11119232, + "step": 16515 + }, + { + "epoch": 0.40358634842303276, + "grad_norm": 19.08568572998047, + "learning_rate": 1.6142082376508526e-06, + "loss": 0.1306, + "num_input_tokens_seen": 11123200, + "step": 16520 + }, + { + "epoch": 0.40370849925487995, + "grad_norm": 11.759346008300781, + "learning_rate": 1.614696829041872e-06, + "loss": 0.0965, + "num_input_tokens_seen": 11126528, + "step": 16525 + }, + { + "epoch": 0.4038306500867271, + "grad_norm": 1.0693756341934204, + "learning_rate": 1.615185420432892e-06, + "loss": 0.1082, + "num_input_tokens_seen": 11130048, + "step": 16530 + }, + { + "epoch": 0.4039528009185743, + "grad_norm": 29.432117462158203, + "learning_rate": 1.6156740118239116e-06, + "loss": 0.1653, + "num_input_tokens_seen": 11133248, + "step": 16535 + }, + { + "epoch": 0.4040749517504214, + "grad_norm": 15.797622680664062, + "learning_rate": 1.6161626032149313e-06, + "loss": 0.198, + "num_input_tokens_seen": 11136576, + "step": 16540 + }, + { + "epoch": 0.4041971025822686, + "grad_norm": 2.2607970237731934, + "learning_rate": 1.616651194605951e-06, + "loss": 0.0332, + "num_input_tokens_seen": 11139968, + "step": 16545 + }, + { + "epoch": 0.40431925341411573, + "grad_norm": 26.23784637451172, + "learning_rate": 1.6171397859969706e-06, + "loss": 0.1053, + "num_input_tokens_seen": 11143104, + "step": 16550 + }, + { + "epoch": 0.4044414042459629, + "grad_norm": 19.77354621887207, + "learning_rate": 1.6176283773879903e-06, + "loss": 0.1145, + "num_input_tokens_seen": 11146816, + "step": 16555 + }, + { + "epoch": 0.40456355507781006, + "grad_norm": 20.735017776489258, + "learning_rate": 1.61811696877901e-06, + "loss": 0.058, + "num_input_tokens_seen": 11150016, + "step": 16560 + }, + { + "epoch": 0.40468570590965725, + "grad_norm": 40.133853912353516, + "learning_rate": 1.6186055601700297e-06, + "loss": 0.3602, + "num_input_tokens_seen": 11153280, + "step": 16565 + }, + { + "epoch": 0.40480785674150443, + "grad_norm": 17.214582443237305, + "learning_rate": 1.6190941515610494e-06, + "loss": 0.2336, + "num_input_tokens_seen": 11156352, + "step": 16570 + }, + { + "epoch": 0.40493000757335157, + "grad_norm": 29.597932815551758, + "learning_rate": 1.6195827429520693e-06, + "loss": 0.1717, + "num_input_tokens_seen": 11159872, + "step": 16575 + }, + { + "epoch": 0.40505215840519876, + "grad_norm": 13.730437278747559, + "learning_rate": 1.6200713343430887e-06, + "loss": 0.0624, + "num_input_tokens_seen": 11163072, + "step": 16580 + }, + { + "epoch": 0.4051743092370459, + "grad_norm": 25.89980697631836, + "learning_rate": 1.6205599257341084e-06, + "loss": 0.1306, + "num_input_tokens_seen": 11166336, + "step": 16585 + }, + { + "epoch": 0.4052964600688931, + "grad_norm": 14.32689094543457, + "learning_rate": 1.6210485171251283e-06, + "loss": 0.1343, + "num_input_tokens_seen": 11169856, + "step": 16590 + }, + { + "epoch": 0.4054186109007402, + "grad_norm": 16.669872283935547, + "learning_rate": 1.6215371085161478e-06, + "loss": 0.1087, + "num_input_tokens_seen": 11172992, + "step": 16595 + }, + { + "epoch": 0.4055407617325874, + "grad_norm": 26.87580680847168, + "learning_rate": 1.6220256999071675e-06, + "loss": 0.0759, + "num_input_tokens_seen": 11176320, + "step": 16600 + }, + { + "epoch": 0.40566291256443454, + "grad_norm": 2.764390468597412, + "learning_rate": 1.6225142912981874e-06, + "loss": 0.0217, + "num_input_tokens_seen": 11179648, + "step": 16605 + }, + { + "epoch": 0.40578506339628173, + "grad_norm": 23.70087432861328, + "learning_rate": 1.623002882689207e-06, + "loss": 0.1103, + "num_input_tokens_seen": 11182656, + "step": 16610 + }, + { + "epoch": 0.4059072142281289, + "grad_norm": 44.54327392578125, + "learning_rate": 1.6234914740802265e-06, + "loss": 0.225, + "num_input_tokens_seen": 11186240, + "step": 16615 + }, + { + "epoch": 0.40602936505997606, + "grad_norm": 0.4073219895362854, + "learning_rate": 1.6239800654712464e-06, + "loss": 0.1125, + "num_input_tokens_seen": 11190016, + "step": 16620 + }, + { + "epoch": 0.40615151589182324, + "grad_norm": 13.439949035644531, + "learning_rate": 1.624468656862266e-06, + "loss": 0.0681, + "num_input_tokens_seen": 11193408, + "step": 16625 + }, + { + "epoch": 0.4062736667236704, + "grad_norm": 28.58769416809082, + "learning_rate": 1.6249572482532855e-06, + "loss": 0.1086, + "num_input_tokens_seen": 11196416, + "step": 16630 + }, + { + "epoch": 0.40639581755551757, + "grad_norm": 0.441162645816803, + "learning_rate": 1.6254458396443054e-06, + "loss": 0.163, + "num_input_tokens_seen": 11199808, + "step": 16635 + }, + { + "epoch": 0.4065179683873647, + "grad_norm": 43.27127456665039, + "learning_rate": 1.6259344310353251e-06, + "loss": 0.2066, + "num_input_tokens_seen": 11203072, + "step": 16640 + }, + { + "epoch": 0.4066401192192119, + "grad_norm": 25.73714828491211, + "learning_rate": 1.6264230224263448e-06, + "loss": 0.0939, + "num_input_tokens_seen": 11206400, + "step": 16645 + }, + { + "epoch": 0.406762270051059, + "grad_norm": 11.029995918273926, + "learning_rate": 1.6269116138173645e-06, + "loss": 0.1402, + "num_input_tokens_seen": 11209600, + "step": 16650 + }, + { + "epoch": 0.4068844208829062, + "grad_norm": 24.101545333862305, + "learning_rate": 1.6274002052083842e-06, + "loss": 0.1372, + "num_input_tokens_seen": 11212736, + "step": 16655 + }, + { + "epoch": 0.40700657171475335, + "grad_norm": 25.160419464111328, + "learning_rate": 1.6278887965994038e-06, + "loss": 0.1845, + "num_input_tokens_seen": 11216064, + "step": 16660 + }, + { + "epoch": 0.40712872254660054, + "grad_norm": 25.641063690185547, + "learning_rate": 1.6283773879904235e-06, + "loss": 0.1796, + "num_input_tokens_seen": 11219264, + "step": 16665 + }, + { + "epoch": 0.40725087337844773, + "grad_norm": 14.866890907287598, + "learning_rate": 1.6288659793814432e-06, + "loss": 0.0825, + "num_input_tokens_seen": 11222784, + "step": 16670 + }, + { + "epoch": 0.40737302421029487, + "grad_norm": 24.974517822265625, + "learning_rate": 1.6293545707724629e-06, + "loss": 0.1176, + "num_input_tokens_seen": 11226048, + "step": 16675 + }, + { + "epoch": 0.40749517504214205, + "grad_norm": 0.3464401662349701, + "learning_rate": 1.6298431621634826e-06, + "loss": 0.0873, + "num_input_tokens_seen": 11229568, + "step": 16680 + }, + { + "epoch": 0.4076173258739892, + "grad_norm": 2.9293575286865234, + "learning_rate": 1.6303317535545023e-06, + "loss": 0.0537, + "num_input_tokens_seen": 11233024, + "step": 16685 + }, + { + "epoch": 0.4077394767058364, + "grad_norm": 19.832813262939453, + "learning_rate": 1.630820344945522e-06, + "loss": 0.0696, + "num_input_tokens_seen": 11236032, + "step": 16690 + }, + { + "epoch": 0.4078616275376835, + "grad_norm": 25.053936004638672, + "learning_rate": 1.6313089363365418e-06, + "loss": 0.1806, + "num_input_tokens_seen": 11239104, + "step": 16695 + }, + { + "epoch": 0.4079837783695307, + "grad_norm": 28.535404205322266, + "learning_rate": 1.6317975277275613e-06, + "loss": 0.149, + "num_input_tokens_seen": 11242368, + "step": 16700 + }, + { + "epoch": 0.40810592920137784, + "grad_norm": 3.1667778491973877, + "learning_rate": 1.632286119118581e-06, + "loss": 0.049, + "num_input_tokens_seen": 11245632, + "step": 16705 + }, + { + "epoch": 0.408228080033225, + "grad_norm": 19.77776336669922, + "learning_rate": 1.6327747105096009e-06, + "loss": 0.088, + "num_input_tokens_seen": 11249344, + "step": 16710 + }, + { + "epoch": 0.4083502308650722, + "grad_norm": 3.902125597000122, + "learning_rate": 1.6332633019006203e-06, + "loss": 0.0908, + "num_input_tokens_seen": 11253056, + "step": 16715 + }, + { + "epoch": 0.40847238169691935, + "grad_norm": 6.418919563293457, + "learning_rate": 1.63375189329164e-06, + "loss": 0.1675, + "num_input_tokens_seen": 11256256, + "step": 16720 + }, + { + "epoch": 0.40859453252876654, + "grad_norm": 5.540237903594971, + "learning_rate": 1.63424048468266e-06, + "loss": 0.0575, + "num_input_tokens_seen": 11259904, + "step": 16725 + }, + { + "epoch": 0.4087166833606137, + "grad_norm": 4.5995330810546875, + "learning_rate": 1.6347290760736796e-06, + "loss": 0.0636, + "num_input_tokens_seen": 11263296, + "step": 16730 + }, + { + "epoch": 0.40883883419246086, + "grad_norm": 6.6088643074035645, + "learning_rate": 1.635217667464699e-06, + "loss": 0.1156, + "num_input_tokens_seen": 11266560, + "step": 16735 + }, + { + "epoch": 0.408960985024308, + "grad_norm": 14.79891586303711, + "learning_rate": 1.635706258855719e-06, + "loss": 0.171, + "num_input_tokens_seen": 11270592, + "step": 16740 + }, + { + "epoch": 0.4090831358561552, + "grad_norm": 19.40209197998047, + "learning_rate": 1.6361948502467386e-06, + "loss": 0.1687, + "num_input_tokens_seen": 11274048, + "step": 16745 + }, + { + "epoch": 0.4092052866880023, + "grad_norm": 36.3360595703125, + "learning_rate": 1.6366834416377581e-06, + "loss": 0.1968, + "num_input_tokens_seen": 11277760, + "step": 16750 + }, + { + "epoch": 0.4093274375198495, + "grad_norm": 2.9016504287719727, + "learning_rate": 1.637172033028778e-06, + "loss": 0.1212, + "num_input_tokens_seen": 11281344, + "step": 16755 + }, + { + "epoch": 0.4094495883516967, + "grad_norm": 0.9625802636146545, + "learning_rate": 1.6376606244197977e-06, + "loss": 0.1246, + "num_input_tokens_seen": 11285184, + "step": 16760 + }, + { + "epoch": 0.40957173918354384, + "grad_norm": 0.9809098839759827, + "learning_rate": 1.6381492158108174e-06, + "loss": 0.1465, + "num_input_tokens_seen": 11288576, + "step": 16765 + }, + { + "epoch": 0.409693890015391, + "grad_norm": 16.860475540161133, + "learning_rate": 1.638637807201837e-06, + "loss": 0.1732, + "num_input_tokens_seen": 11291968, + "step": 16770 + }, + { + "epoch": 0.40981604084723816, + "grad_norm": 11.833168983459473, + "learning_rate": 1.6391263985928567e-06, + "loss": 0.1714, + "num_input_tokens_seen": 11295680, + "step": 16775 + }, + { + "epoch": 0.40993819167908535, + "grad_norm": 34.927284240722656, + "learning_rate": 1.6396149899838764e-06, + "loss": 0.1803, + "num_input_tokens_seen": 11299264, + "step": 16780 + }, + { + "epoch": 0.4100603425109325, + "grad_norm": 21.84634017944336, + "learning_rate": 1.640103581374896e-06, + "loss": 0.1094, + "num_input_tokens_seen": 11302720, + "step": 16785 + }, + { + "epoch": 0.4101824933427797, + "grad_norm": 14.141610145568848, + "learning_rate": 1.6405921727659158e-06, + "loss": 0.1998, + "num_input_tokens_seen": 11305664, + "step": 16790 + }, + { + "epoch": 0.4103046441746268, + "grad_norm": 10.894978523254395, + "learning_rate": 1.6410807641569355e-06, + "loss": 0.067, + "num_input_tokens_seen": 11309056, + "step": 16795 + }, + { + "epoch": 0.410426795006474, + "grad_norm": 10.09036922454834, + "learning_rate": 1.6415693555479553e-06, + "loss": 0.1192, + "num_input_tokens_seen": 11312576, + "step": 16800 + }, + { + "epoch": 0.41054894583832113, + "grad_norm": 19.356460571289062, + "learning_rate": 1.6420579469389748e-06, + "loss": 0.1092, + "num_input_tokens_seen": 11315648, + "step": 16805 + }, + { + "epoch": 0.4106710966701683, + "grad_norm": 3.952444076538086, + "learning_rate": 1.6425465383299945e-06, + "loss": 0.1387, + "num_input_tokens_seen": 11318848, + "step": 16810 + }, + { + "epoch": 0.4107932475020155, + "grad_norm": 26.717870712280273, + "learning_rate": 1.6430351297210144e-06, + "loss": 0.1777, + "num_input_tokens_seen": 11322048, + "step": 16815 + }, + { + "epoch": 0.41091539833386265, + "grad_norm": 1.2356115579605103, + "learning_rate": 1.6435237211120339e-06, + "loss": 0.125, + "num_input_tokens_seen": 11325376, + "step": 16820 + }, + { + "epoch": 0.41103754916570984, + "grad_norm": 28.821317672729492, + "learning_rate": 1.6440123125030535e-06, + "loss": 0.151, + "num_input_tokens_seen": 11328704, + "step": 16825 + }, + { + "epoch": 0.41115969999755697, + "grad_norm": 0.7280601263046265, + "learning_rate": 1.6445009038940734e-06, + "loss": 0.0885, + "num_input_tokens_seen": 11331776, + "step": 16830 + }, + { + "epoch": 0.41128185082940416, + "grad_norm": 6.10056209564209, + "learning_rate": 1.644989495285093e-06, + "loss": 0.0768, + "num_input_tokens_seen": 11335680, + "step": 16835 + }, + { + "epoch": 0.4114040016612513, + "grad_norm": 6.15144157409668, + "learning_rate": 1.6454780866761126e-06, + "loss": 0.159, + "num_input_tokens_seen": 11338624, + "step": 16840 + }, + { + "epoch": 0.4115261524930985, + "grad_norm": 0.3368801176548004, + "learning_rate": 1.6459666780671325e-06, + "loss": 0.0856, + "num_input_tokens_seen": 11341888, + "step": 16845 + }, + { + "epoch": 0.4116483033249456, + "grad_norm": 12.542458534240723, + "learning_rate": 1.6464552694581522e-06, + "loss": 0.139, + "num_input_tokens_seen": 11345728, + "step": 16850 + }, + { + "epoch": 0.4117704541567928, + "grad_norm": 2.5582902431488037, + "learning_rate": 1.6469438608491716e-06, + "loss": 0.1357, + "num_input_tokens_seen": 11349184, + "step": 16855 + }, + { + "epoch": 0.41189260498864, + "grad_norm": 18.847349166870117, + "learning_rate": 1.6474324522401915e-06, + "loss": 0.1509, + "num_input_tokens_seen": 11352384, + "step": 16860 + }, + { + "epoch": 0.41201475582048713, + "grad_norm": 30.65785026550293, + "learning_rate": 1.6479210436312112e-06, + "loss": 0.1452, + "num_input_tokens_seen": 11355904, + "step": 16865 + }, + { + "epoch": 0.4121369066523343, + "grad_norm": 7.287036418914795, + "learning_rate": 1.6484096350222307e-06, + "loss": 0.0671, + "num_input_tokens_seen": 11359040, + "step": 16870 + }, + { + "epoch": 0.41225905748418146, + "grad_norm": 9.512429237365723, + "learning_rate": 1.6488982264132506e-06, + "loss": 0.1257, + "num_input_tokens_seen": 11361984, + "step": 16875 + }, + { + "epoch": 0.41238120831602865, + "grad_norm": 3.455568790435791, + "learning_rate": 1.6493868178042702e-06, + "loss": 0.066, + "num_input_tokens_seen": 11366016, + "step": 16880 + }, + { + "epoch": 0.4125033591478758, + "grad_norm": 0.9491122364997864, + "learning_rate": 1.64987540919529e-06, + "loss": 0.1641, + "num_input_tokens_seen": 11369984, + "step": 16885 + }, + { + "epoch": 0.41262550997972297, + "grad_norm": 4.618907928466797, + "learning_rate": 1.6503640005863096e-06, + "loss": 0.1858, + "num_input_tokens_seen": 11373312, + "step": 16890 + }, + { + "epoch": 0.4127476608115701, + "grad_norm": 8.991425514221191, + "learning_rate": 1.6508525919773293e-06, + "loss": 0.1818, + "num_input_tokens_seen": 11376448, + "step": 16895 + }, + { + "epoch": 0.4128698116434173, + "grad_norm": 5.761434555053711, + "learning_rate": 1.651341183368349e-06, + "loss": 0.0363, + "num_input_tokens_seen": 11379904, + "step": 16900 + }, + { + "epoch": 0.41299196247526443, + "grad_norm": 37.52250671386719, + "learning_rate": 1.6518297747593687e-06, + "loss": 0.1037, + "num_input_tokens_seen": 11383360, + "step": 16905 + }, + { + "epoch": 0.4131141133071116, + "grad_norm": 26.177513122558594, + "learning_rate": 1.6523183661503883e-06, + "loss": 0.2688, + "num_input_tokens_seen": 11386624, + "step": 16910 + }, + { + "epoch": 0.4132362641389588, + "grad_norm": 10.464012145996094, + "learning_rate": 1.652806957541408e-06, + "loss": 0.0447, + "num_input_tokens_seen": 11390208, + "step": 16915 + }, + { + "epoch": 0.41335841497080594, + "grad_norm": 15.334251403808594, + "learning_rate": 1.653295548932428e-06, + "loss": 0.1803, + "num_input_tokens_seen": 11394048, + "step": 16920 + }, + { + "epoch": 0.41348056580265313, + "grad_norm": 13.855216026306152, + "learning_rate": 1.6537841403234474e-06, + "loss": 0.0391, + "num_input_tokens_seen": 11397376, + "step": 16925 + }, + { + "epoch": 0.41360271663450027, + "grad_norm": 26.607481002807617, + "learning_rate": 1.654272731714467e-06, + "loss": 0.126, + "num_input_tokens_seen": 11400960, + "step": 16930 + }, + { + "epoch": 0.41372486746634746, + "grad_norm": 23.822050094604492, + "learning_rate": 1.654761323105487e-06, + "loss": 0.0747, + "num_input_tokens_seen": 11404160, + "step": 16935 + }, + { + "epoch": 0.4138470182981946, + "grad_norm": 28.980283737182617, + "learning_rate": 1.6552499144965064e-06, + "loss": 0.1278, + "num_input_tokens_seen": 11407488, + "step": 16940 + }, + { + "epoch": 0.4139691691300418, + "grad_norm": 14.651640892028809, + "learning_rate": 1.655738505887526e-06, + "loss": 0.0899, + "num_input_tokens_seen": 11410944, + "step": 16945 + }, + { + "epoch": 0.4140913199618889, + "grad_norm": 24.085874557495117, + "learning_rate": 1.656227097278546e-06, + "loss": 0.1208, + "num_input_tokens_seen": 11414848, + "step": 16950 + }, + { + "epoch": 0.4142134707937361, + "grad_norm": 1.1301546096801758, + "learning_rate": 1.6567156886695655e-06, + "loss": 0.0931, + "num_input_tokens_seen": 11418176, + "step": 16955 + }, + { + "epoch": 0.4143356216255833, + "grad_norm": 0.6874004006385803, + "learning_rate": 1.6572042800605851e-06, + "loss": 0.3264, + "num_input_tokens_seen": 11421504, + "step": 16960 + }, + { + "epoch": 0.41445777245743043, + "grad_norm": 4.807636260986328, + "learning_rate": 1.657692871451605e-06, + "loss": 0.0632, + "num_input_tokens_seen": 11424896, + "step": 16965 + }, + { + "epoch": 0.4145799232892776, + "grad_norm": 24.025726318359375, + "learning_rate": 1.6581814628426247e-06, + "loss": 0.2217, + "num_input_tokens_seen": 11428096, + "step": 16970 + }, + { + "epoch": 0.41470207412112475, + "grad_norm": 17.033933639526367, + "learning_rate": 1.6586700542336442e-06, + "loss": 0.1892, + "num_input_tokens_seen": 11431872, + "step": 16975 + }, + { + "epoch": 0.41482422495297194, + "grad_norm": 0.6783763766288757, + "learning_rate": 1.659158645624664e-06, + "loss": 0.118, + "num_input_tokens_seen": 11434944, + "step": 16980 + }, + { + "epoch": 0.4149463757848191, + "grad_norm": 20.161361694335938, + "learning_rate": 1.6596472370156838e-06, + "loss": 0.1374, + "num_input_tokens_seen": 11438528, + "step": 16985 + }, + { + "epoch": 0.41506852661666627, + "grad_norm": 17.77075958251953, + "learning_rate": 1.6601358284067032e-06, + "loss": 0.1233, + "num_input_tokens_seen": 11441472, + "step": 16990 + }, + { + "epoch": 0.4151906774485134, + "grad_norm": 23.082717895507812, + "learning_rate": 1.6606244197977231e-06, + "loss": 0.0969, + "num_input_tokens_seen": 11444800, + "step": 16995 + }, + { + "epoch": 0.4153128282803606, + "grad_norm": 37.304412841796875, + "learning_rate": 1.6611130111887428e-06, + "loss": 0.1357, + "num_input_tokens_seen": 11448576, + "step": 17000 + }, + { + "epoch": 0.4154349791122078, + "grad_norm": 2.0742123126983643, + "learning_rate": 1.6616016025797625e-06, + "loss": 0.1851, + "num_input_tokens_seen": 11451968, + "step": 17005 + }, + { + "epoch": 0.4155571299440549, + "grad_norm": 10.271985054016113, + "learning_rate": 1.6620901939707822e-06, + "loss": 0.0463, + "num_input_tokens_seen": 11455232, + "step": 17010 + }, + { + "epoch": 0.4156792807759021, + "grad_norm": 1.929952621459961, + "learning_rate": 1.6625787853618019e-06, + "loss": 0.0914, + "num_input_tokens_seen": 11458240, + "step": 17015 + }, + { + "epoch": 0.41580143160774924, + "grad_norm": 0.3577294945716858, + "learning_rate": 1.6630673767528215e-06, + "loss": 0.1541, + "num_input_tokens_seen": 11461376, + "step": 17020 + }, + { + "epoch": 0.41592358243959643, + "grad_norm": 0.5475679039955139, + "learning_rate": 1.6635559681438412e-06, + "loss": 0.1685, + "num_input_tokens_seen": 11464576, + "step": 17025 + }, + { + "epoch": 0.41604573327144356, + "grad_norm": 2.176693916320801, + "learning_rate": 1.664044559534861e-06, + "loss": 0.1494, + "num_input_tokens_seen": 11468032, + "step": 17030 + }, + { + "epoch": 0.41616788410329075, + "grad_norm": 26.510526657104492, + "learning_rate": 1.6645331509258806e-06, + "loss": 0.0575, + "num_input_tokens_seen": 11472192, + "step": 17035 + }, + { + "epoch": 0.4162900349351379, + "grad_norm": 11.13955307006836, + "learning_rate": 1.6650217423169005e-06, + "loss": 0.1208, + "num_input_tokens_seen": 11475328, + "step": 17040 + }, + { + "epoch": 0.4164121857669851, + "grad_norm": 4.4048590660095215, + "learning_rate": 1.66551033370792e-06, + "loss": 0.1505, + "num_input_tokens_seen": 11478336, + "step": 17045 + }, + { + "epoch": 0.4165343365988322, + "grad_norm": 15.270955085754395, + "learning_rate": 1.6659989250989396e-06, + "loss": 0.1621, + "num_input_tokens_seen": 11481920, + "step": 17050 + }, + { + "epoch": 0.4166564874306794, + "grad_norm": 20.76490020751953, + "learning_rate": 1.6664875164899595e-06, + "loss": 0.187, + "num_input_tokens_seen": 11484992, + "step": 17055 + }, + { + "epoch": 0.4167786382625266, + "grad_norm": 9.951630592346191, + "learning_rate": 1.666976107880979e-06, + "loss": 0.0291, + "num_input_tokens_seen": 11488384, + "step": 17060 + }, + { + "epoch": 0.4169007890943737, + "grad_norm": 22.358646392822266, + "learning_rate": 1.6674646992719987e-06, + "loss": 0.1093, + "num_input_tokens_seen": 11491712, + "step": 17065 + }, + { + "epoch": 0.4170229399262209, + "grad_norm": 7.269237518310547, + "learning_rate": 1.6679532906630186e-06, + "loss": 0.0532, + "num_input_tokens_seen": 11495232, + "step": 17070 + }, + { + "epoch": 0.41714509075806805, + "grad_norm": 50.06626892089844, + "learning_rate": 1.6684418820540382e-06, + "loss": 0.2184, + "num_input_tokens_seen": 11498304, + "step": 17075 + }, + { + "epoch": 0.41726724158991524, + "grad_norm": 23.30377960205078, + "learning_rate": 1.6689304734450577e-06, + "loss": 0.1244, + "num_input_tokens_seen": 11501632, + "step": 17080 + }, + { + "epoch": 0.4173893924217624, + "grad_norm": 28.480815887451172, + "learning_rate": 1.6694190648360776e-06, + "loss": 0.0564, + "num_input_tokens_seen": 11505152, + "step": 17085 + }, + { + "epoch": 0.41751154325360956, + "grad_norm": 14.612655639648438, + "learning_rate": 1.6699076562270973e-06, + "loss": 0.0455, + "num_input_tokens_seen": 11508352, + "step": 17090 + }, + { + "epoch": 0.4176336940854567, + "grad_norm": 52.15671920776367, + "learning_rate": 1.6703962476181168e-06, + "loss": 0.138, + "num_input_tokens_seen": 11512192, + "step": 17095 + }, + { + "epoch": 0.4177558449173039, + "grad_norm": 41.2117919921875, + "learning_rate": 1.6708848390091366e-06, + "loss": 0.1228, + "num_input_tokens_seen": 11515456, + "step": 17100 + }, + { + "epoch": 0.4178779957491511, + "grad_norm": 29.704301834106445, + "learning_rate": 1.6713734304001563e-06, + "loss": 0.1136, + "num_input_tokens_seen": 11518336, + "step": 17105 + }, + { + "epoch": 0.4180001465809982, + "grad_norm": 15.26375675201416, + "learning_rate": 1.6718620217911758e-06, + "loss": 0.0804, + "num_input_tokens_seen": 11521920, + "step": 17110 + }, + { + "epoch": 0.4181222974128454, + "grad_norm": 13.994719505310059, + "learning_rate": 1.6723506131821957e-06, + "loss": 0.0496, + "num_input_tokens_seen": 11525248, + "step": 17115 + }, + { + "epoch": 0.41824444824469253, + "grad_norm": 30.99201202392578, + "learning_rate": 1.6728392045732154e-06, + "loss": 0.055, + "num_input_tokens_seen": 11528256, + "step": 17120 + }, + { + "epoch": 0.4183665990765397, + "grad_norm": 24.87019920349121, + "learning_rate": 1.673327795964235e-06, + "loss": 0.1625, + "num_input_tokens_seen": 11531264, + "step": 17125 + }, + { + "epoch": 0.41848874990838686, + "grad_norm": 38.52754211425781, + "learning_rate": 1.6738163873552547e-06, + "loss": 0.1851, + "num_input_tokens_seen": 11534784, + "step": 17130 + }, + { + "epoch": 0.41861090074023405, + "grad_norm": 3.06817889213562, + "learning_rate": 1.6743049787462744e-06, + "loss": 0.2029, + "num_input_tokens_seen": 11538880, + "step": 17135 + }, + { + "epoch": 0.4187330515720812, + "grad_norm": 11.713780403137207, + "learning_rate": 1.674793570137294e-06, + "loss": 0.2042, + "num_input_tokens_seen": 11542080, + "step": 17140 + }, + { + "epoch": 0.4188552024039284, + "grad_norm": 14.065862655639648, + "learning_rate": 1.6752821615283138e-06, + "loss": 0.0884, + "num_input_tokens_seen": 11545664, + "step": 17145 + }, + { + "epoch": 0.41897735323577556, + "grad_norm": 14.169081687927246, + "learning_rate": 1.6757707529193335e-06, + "loss": 0.0796, + "num_input_tokens_seen": 11549056, + "step": 17150 + }, + { + "epoch": 0.4190995040676227, + "grad_norm": 25.049545288085938, + "learning_rate": 1.6762593443103531e-06, + "loss": 0.166, + "num_input_tokens_seen": 11552000, + "step": 17155 + }, + { + "epoch": 0.4192216548994699, + "grad_norm": 2.066195487976074, + "learning_rate": 1.676747935701373e-06, + "loss": 0.2232, + "num_input_tokens_seen": 11555008, + "step": 17160 + }, + { + "epoch": 0.419343805731317, + "grad_norm": 25.07562828063965, + "learning_rate": 1.6772365270923925e-06, + "loss": 0.1723, + "num_input_tokens_seen": 11558720, + "step": 17165 + }, + { + "epoch": 0.4194659565631642, + "grad_norm": 21.18793296813965, + "learning_rate": 1.6777251184834122e-06, + "loss": 0.0894, + "num_input_tokens_seen": 11562048, + "step": 17170 + }, + { + "epoch": 0.41958810739501134, + "grad_norm": 3.6823861598968506, + "learning_rate": 1.678213709874432e-06, + "loss": 0.0682, + "num_input_tokens_seen": 11565632, + "step": 17175 + }, + { + "epoch": 0.41971025822685853, + "grad_norm": 12.41185188293457, + "learning_rate": 1.6787023012654515e-06, + "loss": 0.1125, + "num_input_tokens_seen": 11569152, + "step": 17180 + }, + { + "epoch": 0.41983240905870567, + "grad_norm": 31.988386154174805, + "learning_rate": 1.6791908926564712e-06, + "loss": 0.18, + "num_input_tokens_seen": 11572416, + "step": 17185 + }, + { + "epoch": 0.41995455989055286, + "grad_norm": 22.454463958740234, + "learning_rate": 1.6796794840474911e-06, + "loss": 0.0782, + "num_input_tokens_seen": 11575552, + "step": 17190 + }, + { + "epoch": 0.4200767107224, + "grad_norm": 10.569482803344727, + "learning_rate": 1.6801680754385108e-06, + "loss": 0.0783, + "num_input_tokens_seen": 11578688, + "step": 17195 + }, + { + "epoch": 0.4201988615542472, + "grad_norm": 10.280340194702148, + "learning_rate": 1.6806566668295303e-06, + "loss": 0.2211, + "num_input_tokens_seen": 11581632, + "step": 17200 + }, + { + "epoch": 0.4203210123860944, + "grad_norm": 23.099761962890625, + "learning_rate": 1.6811452582205502e-06, + "loss": 0.2273, + "num_input_tokens_seen": 11584896, + "step": 17205 + }, + { + "epoch": 0.4204431632179415, + "grad_norm": 18.303823471069336, + "learning_rate": 1.6816338496115698e-06, + "loss": 0.0593, + "num_input_tokens_seen": 11588416, + "step": 17210 + }, + { + "epoch": 0.4205653140497887, + "grad_norm": 1.9946376085281372, + "learning_rate": 1.6821224410025893e-06, + "loss": 0.0771, + "num_input_tokens_seen": 11591488, + "step": 17215 + }, + { + "epoch": 0.42068746488163583, + "grad_norm": 19.712543487548828, + "learning_rate": 1.6826110323936092e-06, + "loss": 0.094, + "num_input_tokens_seen": 11594944, + "step": 17220 + }, + { + "epoch": 0.420809615713483, + "grad_norm": 41.17828369140625, + "learning_rate": 1.6830996237846289e-06, + "loss": 0.1358, + "num_input_tokens_seen": 11597952, + "step": 17225 + }, + { + "epoch": 0.42093176654533015, + "grad_norm": 22.785945892333984, + "learning_rate": 1.6835882151756484e-06, + "loss": 0.1798, + "num_input_tokens_seen": 11601344, + "step": 17230 + }, + { + "epoch": 0.42105391737717734, + "grad_norm": 15.529670715332031, + "learning_rate": 1.6840768065666683e-06, + "loss": 0.1284, + "num_input_tokens_seen": 11604864, + "step": 17235 + }, + { + "epoch": 0.4211760682090245, + "grad_norm": 1.1814509630203247, + "learning_rate": 1.684565397957688e-06, + "loss": 0.0769, + "num_input_tokens_seen": 11608320, + "step": 17240 + }, + { + "epoch": 0.42129821904087167, + "grad_norm": 28.040863037109375, + "learning_rate": 1.6850539893487076e-06, + "loss": 0.1262, + "num_input_tokens_seen": 11611968, + "step": 17245 + }, + { + "epoch": 0.42142036987271886, + "grad_norm": 27.960603713989258, + "learning_rate": 1.6855425807397273e-06, + "loss": 0.1179, + "num_input_tokens_seen": 11615872, + "step": 17250 + }, + { + "epoch": 0.421542520704566, + "grad_norm": 0.27788135409355164, + "learning_rate": 1.686031172130747e-06, + "loss": 0.0897, + "num_input_tokens_seen": 11619136, + "step": 17255 + }, + { + "epoch": 0.4216646715364132, + "grad_norm": 9.456171989440918, + "learning_rate": 1.6865197635217667e-06, + "loss": 0.1211, + "num_input_tokens_seen": 11622400, + "step": 17260 + }, + { + "epoch": 0.4217868223682603, + "grad_norm": 15.458897590637207, + "learning_rate": 1.6870083549127863e-06, + "loss": 0.117, + "num_input_tokens_seen": 11625600, + "step": 17265 + }, + { + "epoch": 0.4219089732001075, + "grad_norm": 63.20888137817383, + "learning_rate": 1.687496946303806e-06, + "loss": 0.2705, + "num_input_tokens_seen": 11628928, + "step": 17270 + }, + { + "epoch": 0.42203112403195464, + "grad_norm": 23.697669982910156, + "learning_rate": 1.6879855376948257e-06, + "loss": 0.1376, + "num_input_tokens_seen": 11632192, + "step": 17275 + }, + { + "epoch": 0.42215327486380183, + "grad_norm": 23.548437118530273, + "learning_rate": 1.6884741290858456e-06, + "loss": 0.109, + "num_input_tokens_seen": 11635200, + "step": 17280 + }, + { + "epoch": 0.42227542569564896, + "grad_norm": 21.84796714782715, + "learning_rate": 1.688962720476865e-06, + "loss": 0.0928, + "num_input_tokens_seen": 11638336, + "step": 17285 + }, + { + "epoch": 0.42239757652749615, + "grad_norm": 18.382299423217773, + "learning_rate": 1.6894513118678847e-06, + "loss": 0.0988, + "num_input_tokens_seen": 11642048, + "step": 17290 + }, + { + "epoch": 0.42251972735934334, + "grad_norm": 25.732805252075195, + "learning_rate": 1.6899399032589046e-06, + "loss": 0.182, + "num_input_tokens_seen": 11645248, + "step": 17295 + }, + { + "epoch": 0.4226418781911905, + "grad_norm": 21.091764450073242, + "learning_rate": 1.6904284946499241e-06, + "loss": 0.1811, + "num_input_tokens_seen": 11648768, + "step": 17300 + }, + { + "epoch": 0.42276402902303767, + "grad_norm": 0.5553004145622253, + "learning_rate": 1.6909170860409438e-06, + "loss": 0.0477, + "num_input_tokens_seen": 11652032, + "step": 17305 + }, + { + "epoch": 0.4228861798548848, + "grad_norm": 1.810340166091919, + "learning_rate": 1.6914056774319637e-06, + "loss": 0.082, + "num_input_tokens_seen": 11655104, + "step": 17310 + }, + { + "epoch": 0.423008330686732, + "grad_norm": 15.941764831542969, + "learning_rate": 1.6918942688229834e-06, + "loss": 0.2205, + "num_input_tokens_seen": 11658624, + "step": 17315 + }, + { + "epoch": 0.4231304815185791, + "grad_norm": 21.933530807495117, + "learning_rate": 1.6923828602140028e-06, + "loss": 0.1187, + "num_input_tokens_seen": 11661440, + "step": 17320 + }, + { + "epoch": 0.4232526323504263, + "grad_norm": 43.517852783203125, + "learning_rate": 1.6928714516050227e-06, + "loss": 0.0832, + "num_input_tokens_seen": 11664512, + "step": 17325 + }, + { + "epoch": 0.42337478318227345, + "grad_norm": 3.793125867843628, + "learning_rate": 1.6933600429960424e-06, + "loss": 0.2175, + "num_input_tokens_seen": 11667648, + "step": 17330 + }, + { + "epoch": 0.42349693401412064, + "grad_norm": 8.742578506469727, + "learning_rate": 1.6938486343870619e-06, + "loss": 0.1996, + "num_input_tokens_seen": 11670784, + "step": 17335 + }, + { + "epoch": 0.4236190848459678, + "grad_norm": 3.419423818588257, + "learning_rate": 1.6943372257780818e-06, + "loss": 0.0694, + "num_input_tokens_seen": 11673792, + "step": 17340 + }, + { + "epoch": 0.42374123567781496, + "grad_norm": 14.705204963684082, + "learning_rate": 1.6948258171691015e-06, + "loss": 0.0343, + "num_input_tokens_seen": 11677568, + "step": 17345 + }, + { + "epoch": 0.42386338650966215, + "grad_norm": 32.34300994873047, + "learning_rate": 1.6953144085601211e-06, + "loss": 0.1287, + "num_input_tokens_seen": 11681216, + "step": 17350 + }, + { + "epoch": 0.4239855373415093, + "grad_norm": 25.323711395263672, + "learning_rate": 1.6958029999511408e-06, + "loss": 0.2124, + "num_input_tokens_seen": 11684544, + "step": 17355 + }, + { + "epoch": 0.4241076881733565, + "grad_norm": 7.455917835235596, + "learning_rate": 1.6962915913421605e-06, + "loss": 0.1128, + "num_input_tokens_seen": 11687744, + "step": 17360 + }, + { + "epoch": 0.4242298390052036, + "grad_norm": 4.69118070602417, + "learning_rate": 1.6967801827331802e-06, + "loss": 0.0695, + "num_input_tokens_seen": 11691136, + "step": 17365 + }, + { + "epoch": 0.4243519898370508, + "grad_norm": 2.9468350410461426, + "learning_rate": 1.6972687741241999e-06, + "loss": 0.0546, + "num_input_tokens_seen": 11694592, + "step": 17370 + }, + { + "epoch": 0.42447414066889794, + "grad_norm": 8.724267959594727, + "learning_rate": 1.6977573655152195e-06, + "loss": 0.1033, + "num_input_tokens_seen": 11697856, + "step": 17375 + }, + { + "epoch": 0.4245962915007451, + "grad_norm": 17.574451446533203, + "learning_rate": 1.6982459569062392e-06, + "loss": 0.0761, + "num_input_tokens_seen": 11701696, + "step": 17380 + }, + { + "epoch": 0.42471844233259226, + "grad_norm": 30.870471954345703, + "learning_rate": 1.698734548297259e-06, + "loss": 0.1001, + "num_input_tokens_seen": 11704832, + "step": 17385 + }, + { + "epoch": 0.42484059316443945, + "grad_norm": 1.6396796703338623, + "learning_rate": 1.6992231396882786e-06, + "loss": 0.0872, + "num_input_tokens_seen": 11708096, + "step": 17390 + }, + { + "epoch": 0.42496274399628664, + "grad_norm": 10.13924789428711, + "learning_rate": 1.6997117310792983e-06, + "loss": 0.1583, + "num_input_tokens_seen": 11710976, + "step": 17395 + }, + { + "epoch": 0.4250848948281338, + "grad_norm": 10.56521987915039, + "learning_rate": 1.7002003224703182e-06, + "loss": 0.0655, + "num_input_tokens_seen": 11714560, + "step": 17400 + }, + { + "epoch": 0.42520704565998096, + "grad_norm": 13.599597930908203, + "learning_rate": 1.7006889138613376e-06, + "loss": 0.2183, + "num_input_tokens_seen": 11717760, + "step": 17405 + }, + { + "epoch": 0.4253291964918281, + "grad_norm": 29.13743019104004, + "learning_rate": 1.7011775052523573e-06, + "loss": 0.1628, + "num_input_tokens_seen": 11721152, + "step": 17410 + }, + { + "epoch": 0.4254513473236753, + "grad_norm": 15.540716171264648, + "learning_rate": 1.7016660966433772e-06, + "loss": 0.0634, + "num_input_tokens_seen": 11724224, + "step": 17415 + }, + { + "epoch": 0.4255734981555224, + "grad_norm": 36.84275436401367, + "learning_rate": 1.7021546880343967e-06, + "loss": 0.0711, + "num_input_tokens_seen": 11727296, + "step": 17420 + }, + { + "epoch": 0.4256956489873696, + "grad_norm": 27.168283462524414, + "learning_rate": 1.7026432794254164e-06, + "loss": 0.0998, + "num_input_tokens_seen": 11730624, + "step": 17425 + }, + { + "epoch": 0.42581779981921675, + "grad_norm": 1.0942851305007935, + "learning_rate": 1.7031318708164362e-06, + "loss": 0.1372, + "num_input_tokens_seen": 11733888, + "step": 17430 + }, + { + "epoch": 0.42593995065106394, + "grad_norm": 29.49009132385254, + "learning_rate": 1.703620462207456e-06, + "loss": 0.1501, + "num_input_tokens_seen": 11737152, + "step": 17435 + }, + { + "epoch": 0.42606210148291107, + "grad_norm": 37.950321197509766, + "learning_rate": 1.7041090535984754e-06, + "loss": 0.2104, + "num_input_tokens_seen": 11740352, + "step": 17440 + }, + { + "epoch": 0.42618425231475826, + "grad_norm": 5.7830915451049805, + "learning_rate": 1.7045976449894953e-06, + "loss": 0.1267, + "num_input_tokens_seen": 11744320, + "step": 17445 + }, + { + "epoch": 0.42630640314660545, + "grad_norm": 27.485319137573242, + "learning_rate": 1.705086236380515e-06, + "loss": 0.0531, + "num_input_tokens_seen": 11747328, + "step": 17450 + }, + { + "epoch": 0.4264285539784526, + "grad_norm": 10.673401832580566, + "learning_rate": 1.7055748277715344e-06, + "loss": 0.1527, + "num_input_tokens_seen": 11750272, + "step": 17455 + }, + { + "epoch": 0.4265507048102998, + "grad_norm": 47.06698226928711, + "learning_rate": 1.7060634191625543e-06, + "loss": 0.1504, + "num_input_tokens_seen": 11753856, + "step": 17460 + }, + { + "epoch": 0.4266728556421469, + "grad_norm": 5.912046909332275, + "learning_rate": 1.706552010553574e-06, + "loss": 0.0409, + "num_input_tokens_seen": 11756928, + "step": 17465 + }, + { + "epoch": 0.4267950064739941, + "grad_norm": 1.2672039270401, + "learning_rate": 1.7070406019445937e-06, + "loss": 0.2459, + "num_input_tokens_seen": 11760128, + "step": 17470 + }, + { + "epoch": 0.42691715730584123, + "grad_norm": 15.835184097290039, + "learning_rate": 1.7075291933356134e-06, + "loss": 0.0789, + "num_input_tokens_seen": 11763584, + "step": 17475 + }, + { + "epoch": 0.4270393081376884, + "grad_norm": 10.859064102172852, + "learning_rate": 1.708017784726633e-06, + "loss": 0.1138, + "num_input_tokens_seen": 11766912, + "step": 17480 + }, + { + "epoch": 0.42716145896953556, + "grad_norm": 22.323833465576172, + "learning_rate": 1.7085063761176527e-06, + "loss": 0.0573, + "num_input_tokens_seen": 11771008, + "step": 17485 + }, + { + "epoch": 0.42728360980138275, + "grad_norm": 8.541205406188965, + "learning_rate": 1.7089949675086724e-06, + "loss": 0.1585, + "num_input_tokens_seen": 11773952, + "step": 17490 + }, + { + "epoch": 0.42740576063322994, + "grad_norm": 0.24411046504974365, + "learning_rate": 1.7094835588996921e-06, + "loss": 0.0502, + "num_input_tokens_seen": 11777344, + "step": 17495 + }, + { + "epoch": 0.42752791146507707, + "grad_norm": 1.567487120628357, + "learning_rate": 1.7099721502907118e-06, + "loss": 0.1001, + "num_input_tokens_seen": 11780928, + "step": 17500 + }, + { + "epoch": 0.42765006229692426, + "grad_norm": 18.680326461791992, + "learning_rate": 1.7104607416817317e-06, + "loss": 0.1082, + "num_input_tokens_seen": 11784512, + "step": 17505 + }, + { + "epoch": 0.4277722131287714, + "grad_norm": 33.931095123291016, + "learning_rate": 1.7109493330727512e-06, + "loss": 0.1218, + "num_input_tokens_seen": 11787776, + "step": 17510 + }, + { + "epoch": 0.4278943639606186, + "grad_norm": 24.828832626342773, + "learning_rate": 1.7114379244637708e-06, + "loss": 0.1005, + "num_input_tokens_seen": 11791360, + "step": 17515 + }, + { + "epoch": 0.4280165147924657, + "grad_norm": 26.003156661987305, + "learning_rate": 1.7119265158547907e-06, + "loss": 0.0767, + "num_input_tokens_seen": 11794560, + "step": 17520 + }, + { + "epoch": 0.4281386656243129, + "grad_norm": 30.900161743164062, + "learning_rate": 1.7124151072458102e-06, + "loss": 0.1079, + "num_input_tokens_seen": 11797632, + "step": 17525 + }, + { + "epoch": 0.42826081645616004, + "grad_norm": 14.616107940673828, + "learning_rate": 1.7129036986368299e-06, + "loss": 0.1193, + "num_input_tokens_seen": 11801024, + "step": 17530 + }, + { + "epoch": 0.42838296728800723, + "grad_norm": 31.8143367767334, + "learning_rate": 1.7133922900278498e-06, + "loss": 0.157, + "num_input_tokens_seen": 11804736, + "step": 17535 + }, + { + "epoch": 0.4285051181198544, + "grad_norm": 26.70197105407715, + "learning_rate": 1.7138808814188692e-06, + "loss": 0.235, + "num_input_tokens_seen": 11808512, + "step": 17540 + }, + { + "epoch": 0.42862726895170156, + "grad_norm": 15.07053279876709, + "learning_rate": 1.714369472809889e-06, + "loss": 0.1438, + "num_input_tokens_seen": 11811584, + "step": 17545 + }, + { + "epoch": 0.42874941978354875, + "grad_norm": 0.40839216113090515, + "learning_rate": 1.7148580642009088e-06, + "loss": 0.1372, + "num_input_tokens_seen": 11814848, + "step": 17550 + }, + { + "epoch": 0.4288715706153959, + "grad_norm": 19.45197105407715, + "learning_rate": 1.7153466555919285e-06, + "loss": 0.0802, + "num_input_tokens_seen": 11818368, + "step": 17555 + }, + { + "epoch": 0.42899372144724307, + "grad_norm": 0.8354930877685547, + "learning_rate": 1.715835246982948e-06, + "loss": 0.133, + "num_input_tokens_seen": 11821696, + "step": 17560 + }, + { + "epoch": 0.4291158722790902, + "grad_norm": 40.36201095581055, + "learning_rate": 1.7163238383739679e-06, + "loss": 0.1478, + "num_input_tokens_seen": 11825088, + "step": 17565 + }, + { + "epoch": 0.4292380231109374, + "grad_norm": 0.6440602540969849, + "learning_rate": 1.7168124297649875e-06, + "loss": 0.0598, + "num_input_tokens_seen": 11828480, + "step": 17570 + }, + { + "epoch": 0.42936017394278453, + "grad_norm": 36.08130645751953, + "learning_rate": 1.717301021156007e-06, + "loss": 0.1228, + "num_input_tokens_seen": 11831936, + "step": 17575 + }, + { + "epoch": 0.4294823247746317, + "grad_norm": 23.151386260986328, + "learning_rate": 1.717789612547027e-06, + "loss": 0.0906, + "num_input_tokens_seen": 11835328, + "step": 17580 + }, + { + "epoch": 0.42960447560647885, + "grad_norm": 0.3527524173259735, + "learning_rate": 1.7182782039380466e-06, + "loss": 0.078, + "num_input_tokens_seen": 11838720, + "step": 17585 + }, + { + "epoch": 0.42972662643832604, + "grad_norm": 1.2807598114013672, + "learning_rate": 1.7187667953290663e-06, + "loss": 0.0597, + "num_input_tokens_seen": 11842112, + "step": 17590 + }, + { + "epoch": 0.42984877727017323, + "grad_norm": 36.731380462646484, + "learning_rate": 1.719255386720086e-06, + "loss": 0.2555, + "num_input_tokens_seen": 11845760, + "step": 17595 + }, + { + "epoch": 0.42997092810202037, + "grad_norm": 1.584189534187317, + "learning_rate": 1.7197439781111056e-06, + "loss": 0.2048, + "num_input_tokens_seen": 11849088, + "step": 17600 + }, + { + "epoch": 0.43009307893386756, + "grad_norm": 35.07202911376953, + "learning_rate": 1.7202325695021253e-06, + "loss": 0.1001, + "num_input_tokens_seen": 11852160, + "step": 17605 + }, + { + "epoch": 0.4302152297657147, + "grad_norm": 3.500735282897949, + "learning_rate": 1.720721160893145e-06, + "loss": 0.0159, + "num_input_tokens_seen": 11855232, + "step": 17610 + }, + { + "epoch": 0.4303373805975619, + "grad_norm": 33.144378662109375, + "learning_rate": 1.7212097522841647e-06, + "loss": 0.149, + "num_input_tokens_seen": 11858432, + "step": 17615 + }, + { + "epoch": 0.430459531429409, + "grad_norm": 2.410709857940674, + "learning_rate": 1.7216983436751844e-06, + "loss": 0.1561, + "num_input_tokens_seen": 11862336, + "step": 17620 + }, + { + "epoch": 0.4305816822612562, + "grad_norm": 45.39206314086914, + "learning_rate": 1.7221869350662042e-06, + "loss": 0.1548, + "num_input_tokens_seen": 11865344, + "step": 17625 + }, + { + "epoch": 0.43070383309310334, + "grad_norm": 10.103440284729004, + "learning_rate": 1.7226755264572237e-06, + "loss": 0.1538, + "num_input_tokens_seen": 11868672, + "step": 17630 + }, + { + "epoch": 0.43082598392495053, + "grad_norm": 66.29302978515625, + "learning_rate": 1.7231641178482434e-06, + "loss": 0.1825, + "num_input_tokens_seen": 11871872, + "step": 17635 + }, + { + "epoch": 0.4309481347567977, + "grad_norm": 16.076980590820312, + "learning_rate": 1.7236527092392633e-06, + "loss": 0.169, + "num_input_tokens_seen": 11875328, + "step": 17640 + }, + { + "epoch": 0.43107028558864485, + "grad_norm": 20.99383544921875, + "learning_rate": 1.7241413006302828e-06, + "loss": 0.1005, + "num_input_tokens_seen": 11878592, + "step": 17645 + }, + { + "epoch": 0.43119243642049204, + "grad_norm": 2.398129940032959, + "learning_rate": 1.7246298920213024e-06, + "loss": 0.0513, + "num_input_tokens_seen": 11881984, + "step": 17650 + }, + { + "epoch": 0.4313145872523392, + "grad_norm": 38.867679595947266, + "learning_rate": 1.7251184834123223e-06, + "loss": 0.1343, + "num_input_tokens_seen": 11884992, + "step": 17655 + }, + { + "epoch": 0.43143673808418637, + "grad_norm": 13.77950668334961, + "learning_rate": 1.7256070748033418e-06, + "loss": 0.1457, + "num_input_tokens_seen": 11888384, + "step": 17660 + }, + { + "epoch": 0.4315588889160335, + "grad_norm": 16.17148208618164, + "learning_rate": 1.7260956661943615e-06, + "loss": 0.1399, + "num_input_tokens_seen": 11891584, + "step": 17665 + }, + { + "epoch": 0.4316810397478807, + "grad_norm": 22.251768112182617, + "learning_rate": 1.7265842575853814e-06, + "loss": 0.1125, + "num_input_tokens_seen": 11894656, + "step": 17670 + }, + { + "epoch": 0.4318031905797278, + "grad_norm": 23.052682876586914, + "learning_rate": 1.727072848976401e-06, + "loss": 0.1155, + "num_input_tokens_seen": 11897600, + "step": 17675 + }, + { + "epoch": 0.431925341411575, + "grad_norm": 1.7083724737167358, + "learning_rate": 1.7275614403674205e-06, + "loss": 0.0801, + "num_input_tokens_seen": 11900864, + "step": 17680 + }, + { + "epoch": 0.4320474922434222, + "grad_norm": 19.45560646057129, + "learning_rate": 1.7280500317584404e-06, + "loss": 0.2049, + "num_input_tokens_seen": 11903744, + "step": 17685 + }, + { + "epoch": 0.43216964307526934, + "grad_norm": 30.353389739990234, + "learning_rate": 1.72853862314946e-06, + "loss": 0.0588, + "num_input_tokens_seen": 11907456, + "step": 17690 + }, + { + "epoch": 0.43229179390711653, + "grad_norm": 1.1827892065048218, + "learning_rate": 1.7290272145404796e-06, + "loss": 0.1682, + "num_input_tokens_seen": 11910912, + "step": 17695 + }, + { + "epoch": 0.43241394473896366, + "grad_norm": 36.37336349487305, + "learning_rate": 1.7295158059314995e-06, + "loss": 0.1615, + "num_input_tokens_seen": 11914176, + "step": 17700 + }, + { + "epoch": 0.43253609557081085, + "grad_norm": 22.18280601501465, + "learning_rate": 1.7300043973225191e-06, + "loss": 0.1076, + "num_input_tokens_seen": 11917184, + "step": 17705 + }, + { + "epoch": 0.432658246402658, + "grad_norm": 0.602412223815918, + "learning_rate": 1.7304929887135388e-06, + "loss": 0.1363, + "num_input_tokens_seen": 11920192, + "step": 17710 + }, + { + "epoch": 0.4327803972345052, + "grad_norm": 33.112857818603516, + "learning_rate": 1.7309815801045585e-06, + "loss": 0.134, + "num_input_tokens_seen": 11923392, + "step": 17715 + }, + { + "epoch": 0.4329025480663523, + "grad_norm": 14.708128929138184, + "learning_rate": 1.7314701714955782e-06, + "loss": 0.1268, + "num_input_tokens_seen": 11926656, + "step": 17720 + }, + { + "epoch": 0.4330246988981995, + "grad_norm": 0.573269784450531, + "learning_rate": 1.7319587628865979e-06, + "loss": 0.121, + "num_input_tokens_seen": 11930432, + "step": 17725 + }, + { + "epoch": 0.43314684973004663, + "grad_norm": 18.703655242919922, + "learning_rate": 1.7324473542776176e-06, + "loss": 0.0776, + "num_input_tokens_seen": 11933952, + "step": 17730 + }, + { + "epoch": 0.4332690005618938, + "grad_norm": 11.47458267211914, + "learning_rate": 1.7329359456686372e-06, + "loss": 0.1244, + "num_input_tokens_seen": 11937088, + "step": 17735 + }, + { + "epoch": 0.433391151393741, + "grad_norm": 18.38603973388672, + "learning_rate": 1.733424537059657e-06, + "loss": 0.1057, + "num_input_tokens_seen": 11940032, + "step": 17740 + }, + { + "epoch": 0.43351330222558815, + "grad_norm": 17.476255416870117, + "learning_rate": 1.7339131284506768e-06, + "loss": 0.0861, + "num_input_tokens_seen": 11943680, + "step": 17745 + }, + { + "epoch": 0.43363545305743534, + "grad_norm": 0.9486721754074097, + "learning_rate": 1.7344017198416963e-06, + "loss": 0.0573, + "num_input_tokens_seen": 11947520, + "step": 17750 + }, + { + "epoch": 0.4337576038892825, + "grad_norm": 0.8889973163604736, + "learning_rate": 1.734890311232716e-06, + "loss": 0.0298, + "num_input_tokens_seen": 11950720, + "step": 17755 + }, + { + "epoch": 0.43387975472112966, + "grad_norm": 1.1656432151794434, + "learning_rate": 1.7353789026237359e-06, + "loss": 0.1699, + "num_input_tokens_seen": 11954048, + "step": 17760 + }, + { + "epoch": 0.4340019055529768, + "grad_norm": 4.165920257568359, + "learning_rate": 1.7358674940147553e-06, + "loss": 0.0999, + "num_input_tokens_seen": 11957248, + "step": 17765 + }, + { + "epoch": 0.434124056384824, + "grad_norm": 46.445533752441406, + "learning_rate": 1.736356085405775e-06, + "loss": 0.1424, + "num_input_tokens_seen": 11960640, + "step": 17770 + }, + { + "epoch": 0.4342462072166711, + "grad_norm": 55.10757064819336, + "learning_rate": 1.736844676796795e-06, + "loss": 0.212, + "num_input_tokens_seen": 11963968, + "step": 17775 + }, + { + "epoch": 0.4343683580485183, + "grad_norm": 37.553836822509766, + "learning_rate": 1.7373332681878146e-06, + "loss": 0.2234, + "num_input_tokens_seen": 11967104, + "step": 17780 + }, + { + "epoch": 0.4344905088803655, + "grad_norm": 16.70302391052246, + "learning_rate": 1.737821859578834e-06, + "loss": 0.055, + "num_input_tokens_seen": 11970304, + "step": 17785 + }, + { + "epoch": 0.43461265971221263, + "grad_norm": 0.09657532721757889, + "learning_rate": 1.738310450969854e-06, + "loss": 0.1663, + "num_input_tokens_seen": 11974144, + "step": 17790 + }, + { + "epoch": 0.4347348105440598, + "grad_norm": 1.6909934282302856, + "learning_rate": 1.7387990423608736e-06, + "loss": 0.1038, + "num_input_tokens_seen": 11977280, + "step": 17795 + }, + { + "epoch": 0.43485696137590696, + "grad_norm": 16.23759651184082, + "learning_rate": 1.739287633751893e-06, + "loss": 0.0562, + "num_input_tokens_seen": 11981184, + "step": 17800 + }, + { + "epoch": 0.43497911220775415, + "grad_norm": 0.7810158133506775, + "learning_rate": 1.739776225142913e-06, + "loss": 0.028, + "num_input_tokens_seen": 11984320, + "step": 17805 + }, + { + "epoch": 0.4351012630396013, + "grad_norm": 17.658899307250977, + "learning_rate": 1.7402648165339327e-06, + "loss": 0.054, + "num_input_tokens_seen": 11987712, + "step": 17810 + }, + { + "epoch": 0.4352234138714485, + "grad_norm": 15.886202812194824, + "learning_rate": 1.7407534079249521e-06, + "loss": 0.1537, + "num_input_tokens_seen": 11991104, + "step": 17815 + }, + { + "epoch": 0.4353455647032956, + "grad_norm": 32.44907760620117, + "learning_rate": 1.741241999315972e-06, + "loss": 0.08, + "num_input_tokens_seen": 11994688, + "step": 17820 + }, + { + "epoch": 0.4354677155351428, + "grad_norm": 7.900591850280762, + "learning_rate": 1.7417305907069917e-06, + "loss": 0.0853, + "num_input_tokens_seen": 11998400, + "step": 17825 + }, + { + "epoch": 0.43558986636699, + "grad_norm": 20.13414192199707, + "learning_rate": 1.7422191820980114e-06, + "loss": 0.2011, + "num_input_tokens_seen": 12001280, + "step": 17830 + }, + { + "epoch": 0.4357120171988371, + "grad_norm": 16.002933502197266, + "learning_rate": 1.742707773489031e-06, + "loss": 0.0902, + "num_input_tokens_seen": 12004480, + "step": 17835 + }, + { + "epoch": 0.4358341680306843, + "grad_norm": 12.876802444458008, + "learning_rate": 1.7431963648800508e-06, + "loss": 0.2976, + "num_input_tokens_seen": 12008128, + "step": 17840 + }, + { + "epoch": 0.43595631886253144, + "grad_norm": 24.299243927001953, + "learning_rate": 1.7436849562710704e-06, + "loss": 0.0274, + "num_input_tokens_seen": 12011456, + "step": 17845 + }, + { + "epoch": 0.43607846969437863, + "grad_norm": 40.15287399291992, + "learning_rate": 1.7441735476620901e-06, + "loss": 0.1043, + "num_input_tokens_seen": 12015040, + "step": 17850 + }, + { + "epoch": 0.43620062052622577, + "grad_norm": 5.354029655456543, + "learning_rate": 1.7446621390531098e-06, + "loss": 0.0902, + "num_input_tokens_seen": 12018112, + "step": 17855 + }, + { + "epoch": 0.43632277135807296, + "grad_norm": 16.343725204467773, + "learning_rate": 1.7451507304441295e-06, + "loss": 0.0827, + "num_input_tokens_seen": 12021504, + "step": 17860 + }, + { + "epoch": 0.4364449221899201, + "grad_norm": 19.908218383789062, + "learning_rate": 1.7456393218351494e-06, + "loss": 0.2358, + "num_input_tokens_seen": 12024704, + "step": 17865 + }, + { + "epoch": 0.4365670730217673, + "grad_norm": 20.10467529296875, + "learning_rate": 1.7461279132261688e-06, + "loss": 0.1437, + "num_input_tokens_seen": 12028544, + "step": 17870 + }, + { + "epoch": 0.4366892238536144, + "grad_norm": 0.4849933981895447, + "learning_rate": 1.7466165046171885e-06, + "loss": 0.1021, + "num_input_tokens_seen": 12032192, + "step": 17875 + }, + { + "epoch": 0.4368113746854616, + "grad_norm": 24.99449920654297, + "learning_rate": 1.7471050960082084e-06, + "loss": 0.1488, + "num_input_tokens_seen": 12035520, + "step": 17880 + }, + { + "epoch": 0.4369335255173088, + "grad_norm": 46.49135208129883, + "learning_rate": 1.7475936873992279e-06, + "loss": 0.2402, + "num_input_tokens_seen": 12038848, + "step": 17885 + }, + { + "epoch": 0.43705567634915593, + "grad_norm": 29.723981857299805, + "learning_rate": 1.7480822787902476e-06, + "loss": 0.084, + "num_input_tokens_seen": 12042240, + "step": 17890 + }, + { + "epoch": 0.4371778271810031, + "grad_norm": 2.592606544494629, + "learning_rate": 1.7485708701812675e-06, + "loss": 0.0383, + "num_input_tokens_seen": 12046400, + "step": 17895 + }, + { + "epoch": 0.43729997801285025, + "grad_norm": 33.512306213378906, + "learning_rate": 1.7490594615722871e-06, + "loss": 0.0683, + "num_input_tokens_seen": 12050048, + "step": 17900 + }, + { + "epoch": 0.43742212884469744, + "grad_norm": 25.43195915222168, + "learning_rate": 1.7495480529633066e-06, + "loss": 0.1424, + "num_input_tokens_seen": 12053120, + "step": 17905 + }, + { + "epoch": 0.4375442796765446, + "grad_norm": 0.5998150110244751, + "learning_rate": 1.7500366443543265e-06, + "loss": 0.0343, + "num_input_tokens_seen": 12056704, + "step": 17910 + }, + { + "epoch": 0.43766643050839177, + "grad_norm": 2.1911861896514893, + "learning_rate": 1.7505252357453462e-06, + "loss": 0.0722, + "num_input_tokens_seen": 12059712, + "step": 17915 + }, + { + "epoch": 0.4377885813402389, + "grad_norm": 28.80990219116211, + "learning_rate": 1.7510138271363657e-06, + "loss": 0.0684, + "num_input_tokens_seen": 12063168, + "step": 17920 + }, + { + "epoch": 0.4379107321720861, + "grad_norm": 8.22693157196045, + "learning_rate": 1.7515024185273855e-06, + "loss": 0.0728, + "num_input_tokens_seen": 12066560, + "step": 17925 + }, + { + "epoch": 0.4380328830039333, + "grad_norm": 3.5628767013549805, + "learning_rate": 1.7519910099184052e-06, + "loss": 0.1294, + "num_input_tokens_seen": 12069760, + "step": 17930 + }, + { + "epoch": 0.4381550338357804, + "grad_norm": 9.691713333129883, + "learning_rate": 1.7524796013094247e-06, + "loss": 0.1249, + "num_input_tokens_seen": 12073088, + "step": 17935 + }, + { + "epoch": 0.4382771846676276, + "grad_norm": 17.05838394165039, + "learning_rate": 1.7529681927004446e-06, + "loss": 0.1269, + "num_input_tokens_seen": 12076096, + "step": 17940 + }, + { + "epoch": 0.43839933549947474, + "grad_norm": 3.224658966064453, + "learning_rate": 1.7534567840914643e-06, + "loss": 0.0412, + "num_input_tokens_seen": 12079680, + "step": 17945 + }, + { + "epoch": 0.43852148633132193, + "grad_norm": 15.042247772216797, + "learning_rate": 1.753945375482484e-06, + "loss": 0.0797, + "num_input_tokens_seen": 12082880, + "step": 17950 + }, + { + "epoch": 0.43864363716316906, + "grad_norm": 21.121103286743164, + "learning_rate": 1.7544339668735036e-06, + "loss": 0.0568, + "num_input_tokens_seen": 12085888, + "step": 17955 + }, + { + "epoch": 0.43876578799501625, + "grad_norm": 12.655285835266113, + "learning_rate": 1.7549225582645233e-06, + "loss": 0.1215, + "num_input_tokens_seen": 12089408, + "step": 17960 + }, + { + "epoch": 0.4388879388268634, + "grad_norm": 2.449303150177002, + "learning_rate": 1.755411149655543e-06, + "loss": 0.0904, + "num_input_tokens_seen": 12092160, + "step": 17965 + }, + { + "epoch": 0.4390100896587106, + "grad_norm": 18.23929786682129, + "learning_rate": 1.7558997410465627e-06, + "loss": 0.1017, + "num_input_tokens_seen": 12095616, + "step": 17970 + }, + { + "epoch": 0.43913224049055777, + "grad_norm": 6.55383825302124, + "learning_rate": 1.7563883324375824e-06, + "loss": 0.1135, + "num_input_tokens_seen": 12098496, + "step": 17975 + }, + { + "epoch": 0.4392543913224049, + "grad_norm": 20.092472076416016, + "learning_rate": 1.756876923828602e-06, + "loss": 0.1535, + "num_input_tokens_seen": 12101824, + "step": 17980 + }, + { + "epoch": 0.4393765421542521, + "grad_norm": 15.990132331848145, + "learning_rate": 1.757365515219622e-06, + "loss": 0.0734, + "num_input_tokens_seen": 12105152, + "step": 17985 + }, + { + "epoch": 0.4394986929860992, + "grad_norm": 0.2111085206270218, + "learning_rate": 1.7578541066106414e-06, + "loss": 0.1458, + "num_input_tokens_seen": 12108992, + "step": 17990 + }, + { + "epoch": 0.4396208438179464, + "grad_norm": 12.769996643066406, + "learning_rate": 1.758342698001661e-06, + "loss": 0.1572, + "num_input_tokens_seen": 12112192, + "step": 17995 + }, + { + "epoch": 0.43974299464979355, + "grad_norm": 25.63375473022461, + "learning_rate": 1.758831289392681e-06, + "loss": 0.1239, + "num_input_tokens_seen": 12115904, + "step": 18000 + }, + { + "epoch": 0.43986514548164074, + "grad_norm": 36.48350524902344, + "learning_rate": 1.7593198807837004e-06, + "loss": 0.1095, + "num_input_tokens_seen": 12119744, + "step": 18005 + }, + { + "epoch": 0.4399872963134879, + "grad_norm": 11.921119689941406, + "learning_rate": 1.7598084721747201e-06, + "loss": 0.0698, + "num_input_tokens_seen": 12122880, + "step": 18010 + }, + { + "epoch": 0.44010944714533506, + "grad_norm": 27.85514259338379, + "learning_rate": 1.76029706356574e-06, + "loss": 0.1874, + "num_input_tokens_seen": 12126080, + "step": 18015 + }, + { + "epoch": 0.4402315979771822, + "grad_norm": 13.67387866973877, + "learning_rate": 1.7607856549567597e-06, + "loss": 0.0898, + "num_input_tokens_seen": 12129536, + "step": 18020 + }, + { + "epoch": 0.4403537488090294, + "grad_norm": 9.061943054199219, + "learning_rate": 1.7612742463477792e-06, + "loss": 0.0822, + "num_input_tokens_seen": 12132992, + "step": 18025 + }, + { + "epoch": 0.4404758996408766, + "grad_norm": 40.795631408691406, + "learning_rate": 1.761762837738799e-06, + "loss": 0.1154, + "num_input_tokens_seen": 12136320, + "step": 18030 + }, + { + "epoch": 0.4405980504727237, + "grad_norm": 17.49411964416504, + "learning_rate": 1.7622514291298187e-06, + "loss": 0.1134, + "num_input_tokens_seen": 12139712, + "step": 18035 + }, + { + "epoch": 0.4407202013045709, + "grad_norm": 4.147327423095703, + "learning_rate": 1.7627400205208382e-06, + "loss": 0.1963, + "num_input_tokens_seen": 12142656, + "step": 18040 + }, + { + "epoch": 0.44084235213641804, + "grad_norm": 57.400169372558594, + "learning_rate": 1.7632286119118581e-06, + "loss": 0.2346, + "num_input_tokens_seen": 12145984, + "step": 18045 + }, + { + "epoch": 0.4409645029682652, + "grad_norm": 15.839056968688965, + "learning_rate": 1.7637172033028778e-06, + "loss": 0.128, + "num_input_tokens_seen": 12149568, + "step": 18050 + }, + { + "epoch": 0.44108665380011236, + "grad_norm": 14.90942668914795, + "learning_rate": 1.7642057946938975e-06, + "loss": 0.054, + "num_input_tokens_seen": 12153216, + "step": 18055 + }, + { + "epoch": 0.44120880463195955, + "grad_norm": 10.704864501953125, + "learning_rate": 1.7646943860849172e-06, + "loss": 0.1968, + "num_input_tokens_seen": 12156992, + "step": 18060 + }, + { + "epoch": 0.4413309554638067, + "grad_norm": 15.071078300476074, + "learning_rate": 1.7651829774759368e-06, + "loss": 0.0838, + "num_input_tokens_seen": 12160448, + "step": 18065 + }, + { + "epoch": 0.4414531062956539, + "grad_norm": 15.569796562194824, + "learning_rate": 1.7656715688669565e-06, + "loss": 0.1253, + "num_input_tokens_seen": 12163520, + "step": 18070 + }, + { + "epoch": 0.44157525712750106, + "grad_norm": 9.158796310424805, + "learning_rate": 1.7661601602579762e-06, + "loss": 0.0315, + "num_input_tokens_seen": 12167488, + "step": 18075 + }, + { + "epoch": 0.4416974079593482, + "grad_norm": 16.890132904052734, + "learning_rate": 1.7666487516489959e-06, + "loss": 0.0407, + "num_input_tokens_seen": 12170944, + "step": 18080 + }, + { + "epoch": 0.4418195587911954, + "grad_norm": 9.779267311096191, + "learning_rate": 1.7671373430400156e-06, + "loss": 0.1787, + "num_input_tokens_seen": 12174336, + "step": 18085 + }, + { + "epoch": 0.4419417096230425, + "grad_norm": 5.361752033233643, + "learning_rate": 1.7676259344310352e-06, + "loss": 0.0386, + "num_input_tokens_seen": 12177472, + "step": 18090 + }, + { + "epoch": 0.4420638604548897, + "grad_norm": 21.10449981689453, + "learning_rate": 1.768114525822055e-06, + "loss": 0.1516, + "num_input_tokens_seen": 12180480, + "step": 18095 + }, + { + "epoch": 0.44218601128673685, + "grad_norm": 2.0351269245147705, + "learning_rate": 1.7686031172130746e-06, + "loss": 0.1046, + "num_input_tokens_seen": 12183808, + "step": 18100 + }, + { + "epoch": 0.44230816211858404, + "grad_norm": 44.6690673828125, + "learning_rate": 1.7690917086040945e-06, + "loss": 0.2344, + "num_input_tokens_seen": 12187008, + "step": 18105 + }, + { + "epoch": 0.44243031295043117, + "grad_norm": 1.5464696884155273, + "learning_rate": 1.769580299995114e-06, + "loss": 0.0702, + "num_input_tokens_seen": 12190464, + "step": 18110 + }, + { + "epoch": 0.44255246378227836, + "grad_norm": 0.9766054749488831, + "learning_rate": 1.7700688913861336e-06, + "loss": 0.2018, + "num_input_tokens_seen": 12194624, + "step": 18115 + }, + { + "epoch": 0.4426746146141255, + "grad_norm": 22.986719131469727, + "learning_rate": 1.7705574827771535e-06, + "loss": 0.2166, + "num_input_tokens_seen": 12198016, + "step": 18120 + }, + { + "epoch": 0.4427967654459727, + "grad_norm": 1.6592448949813843, + "learning_rate": 1.771046074168173e-06, + "loss": 0.0513, + "num_input_tokens_seen": 12201088, + "step": 18125 + }, + { + "epoch": 0.4429189162778199, + "grad_norm": 10.565286636352539, + "learning_rate": 1.7715346655591927e-06, + "loss": 0.1119, + "num_input_tokens_seen": 12204288, + "step": 18130 + }, + { + "epoch": 0.443041067109667, + "grad_norm": 37.32499313354492, + "learning_rate": 1.7720232569502126e-06, + "loss": 0.2426, + "num_input_tokens_seen": 12207488, + "step": 18135 + }, + { + "epoch": 0.4431632179415142, + "grad_norm": 27.269149780273438, + "learning_rate": 1.7725118483412323e-06, + "loss": 0.2005, + "num_input_tokens_seen": 12210560, + "step": 18140 + }, + { + "epoch": 0.44328536877336133, + "grad_norm": 0.24170862138271332, + "learning_rate": 1.7730004397322517e-06, + "loss": 0.0823, + "num_input_tokens_seen": 12213824, + "step": 18145 + }, + { + "epoch": 0.4434075196052085, + "grad_norm": 24.47719383239746, + "learning_rate": 1.7734890311232716e-06, + "loss": 0.0994, + "num_input_tokens_seen": 12217280, + "step": 18150 + }, + { + "epoch": 0.44352967043705566, + "grad_norm": 10.670074462890625, + "learning_rate": 1.7739776225142913e-06, + "loss": 0.1956, + "num_input_tokens_seen": 12220544, + "step": 18155 + }, + { + "epoch": 0.44365182126890285, + "grad_norm": 19.86406135559082, + "learning_rate": 1.7744662139053108e-06, + "loss": 0.109, + "num_input_tokens_seen": 12223616, + "step": 18160 + }, + { + "epoch": 0.44377397210075, + "grad_norm": 13.4468412399292, + "learning_rate": 1.7749548052963307e-06, + "loss": 0.168, + "num_input_tokens_seen": 12227136, + "step": 18165 + }, + { + "epoch": 0.44389612293259717, + "grad_norm": 15.348857879638672, + "learning_rate": 1.7754433966873504e-06, + "loss": 0.1427, + "num_input_tokens_seen": 12229952, + "step": 18170 + }, + { + "epoch": 0.44401827376444436, + "grad_norm": 27.562042236328125, + "learning_rate": 1.77593198807837e-06, + "loss": 0.0636, + "num_input_tokens_seen": 12233472, + "step": 18175 + }, + { + "epoch": 0.4441404245962915, + "grad_norm": 3.60552716255188, + "learning_rate": 1.7764205794693897e-06, + "loss": 0.1436, + "num_input_tokens_seen": 12236864, + "step": 18180 + }, + { + "epoch": 0.4442625754281387, + "grad_norm": 43.09302520751953, + "learning_rate": 1.7769091708604094e-06, + "loss": 0.1426, + "num_input_tokens_seen": 12240192, + "step": 18185 + }, + { + "epoch": 0.4443847262599858, + "grad_norm": 9.167168617248535, + "learning_rate": 1.777397762251429e-06, + "loss": 0.1146, + "num_input_tokens_seen": 12243328, + "step": 18190 + }, + { + "epoch": 0.444506877091833, + "grad_norm": 11.480422019958496, + "learning_rate": 1.7778863536424485e-06, + "loss": 0.0868, + "num_input_tokens_seen": 12246656, + "step": 18195 + }, + { + "epoch": 0.44462902792368014, + "grad_norm": 24.931589126586914, + "learning_rate": 1.7783749450334684e-06, + "loss": 0.089, + "num_input_tokens_seen": 12250176, + "step": 18200 + }, + { + "epoch": 0.44475117875552733, + "grad_norm": 0.5531277060508728, + "learning_rate": 1.7788635364244881e-06, + "loss": 0.2103, + "num_input_tokens_seen": 12253376, + "step": 18205 + }, + { + "epoch": 0.44487332958737447, + "grad_norm": 4.852066516876221, + "learning_rate": 1.779352127815508e-06, + "loss": 0.1752, + "num_input_tokens_seen": 12256512, + "step": 18210 + }, + { + "epoch": 0.44499548041922166, + "grad_norm": 17.739526748657227, + "learning_rate": 1.7798407192065275e-06, + "loss": 0.0764, + "num_input_tokens_seen": 12260032, + "step": 18215 + }, + { + "epoch": 0.44511763125106885, + "grad_norm": 0.7192504405975342, + "learning_rate": 1.7803293105975472e-06, + "loss": 0.0446, + "num_input_tokens_seen": 12263360, + "step": 18220 + }, + { + "epoch": 0.445239782082916, + "grad_norm": 21.309751510620117, + "learning_rate": 1.780817901988567e-06, + "loss": 0.238, + "num_input_tokens_seen": 12267008, + "step": 18225 + }, + { + "epoch": 0.44536193291476317, + "grad_norm": 29.178531646728516, + "learning_rate": 1.7813064933795865e-06, + "loss": 0.1511, + "num_input_tokens_seen": 12270400, + "step": 18230 + }, + { + "epoch": 0.4454840837466103, + "grad_norm": 16.01999282836914, + "learning_rate": 1.7817950847706062e-06, + "loss": 0.0897, + "num_input_tokens_seen": 12273792, + "step": 18235 + }, + { + "epoch": 0.4456062345784575, + "grad_norm": 13.02960205078125, + "learning_rate": 1.782283676161626e-06, + "loss": 0.1574, + "num_input_tokens_seen": 12276992, + "step": 18240 + }, + { + "epoch": 0.44572838541030463, + "grad_norm": 5.595557689666748, + "learning_rate": 1.7827722675526456e-06, + "loss": 0.1676, + "num_input_tokens_seen": 12280192, + "step": 18245 + }, + { + "epoch": 0.4458505362421518, + "grad_norm": 17.469158172607422, + "learning_rate": 1.7832608589436653e-06, + "loss": 0.1209, + "num_input_tokens_seen": 12283264, + "step": 18250 + }, + { + "epoch": 0.44597268707399895, + "grad_norm": 22.485258102416992, + "learning_rate": 1.7837494503346851e-06, + "loss": 0.0612, + "num_input_tokens_seen": 12286528, + "step": 18255 + }, + { + "epoch": 0.44609483790584614, + "grad_norm": 18.569324493408203, + "learning_rate": 1.7842380417257048e-06, + "loss": 0.106, + "num_input_tokens_seen": 12289856, + "step": 18260 + }, + { + "epoch": 0.4462169887376933, + "grad_norm": 16.557361602783203, + "learning_rate": 1.7847266331167243e-06, + "loss": 0.08, + "num_input_tokens_seen": 12292928, + "step": 18265 + }, + { + "epoch": 0.44633913956954047, + "grad_norm": 13.699399948120117, + "learning_rate": 1.7852152245077442e-06, + "loss": 0.1225, + "num_input_tokens_seen": 12296000, + "step": 18270 + }, + { + "epoch": 0.44646129040138766, + "grad_norm": 20.304798126220703, + "learning_rate": 1.7857038158987639e-06, + "loss": 0.0658, + "num_input_tokens_seen": 12299200, + "step": 18275 + }, + { + "epoch": 0.4465834412332348, + "grad_norm": 8.509125709533691, + "learning_rate": 1.7861924072897833e-06, + "loss": 0.1892, + "num_input_tokens_seen": 12302528, + "step": 18280 + }, + { + "epoch": 0.446705592065082, + "grad_norm": 1.5570862293243408, + "learning_rate": 1.7866809986808032e-06, + "loss": 0.1002, + "num_input_tokens_seen": 12305408, + "step": 18285 + }, + { + "epoch": 0.4468277428969291, + "grad_norm": 4.416943073272705, + "learning_rate": 1.787169590071823e-06, + "loss": 0.0617, + "num_input_tokens_seen": 12308608, + "step": 18290 + }, + { + "epoch": 0.4469498937287763, + "grad_norm": 8.536765098571777, + "learning_rate": 1.7876581814628426e-06, + "loss": 0.071, + "num_input_tokens_seen": 12312000, + "step": 18295 + }, + { + "epoch": 0.44707204456062344, + "grad_norm": 20.15012550354004, + "learning_rate": 1.788146772853862e-06, + "loss": 0.1079, + "num_input_tokens_seen": 12314944, + "step": 18300 + }, + { + "epoch": 0.44719419539247063, + "grad_norm": 1.5687205791473389, + "learning_rate": 1.788635364244882e-06, + "loss": 0.095, + "num_input_tokens_seen": 12318144, + "step": 18305 + }, + { + "epoch": 0.44731634622431776, + "grad_norm": 12.047454833984375, + "learning_rate": 1.7891239556359016e-06, + "loss": 0.095, + "num_input_tokens_seen": 12321344, + "step": 18310 + }, + { + "epoch": 0.44743849705616495, + "grad_norm": 23.377124786376953, + "learning_rate": 1.7896125470269211e-06, + "loss": 0.1015, + "num_input_tokens_seen": 12324672, + "step": 18315 + }, + { + "epoch": 0.44756064788801214, + "grad_norm": 29.494647979736328, + "learning_rate": 1.790101138417941e-06, + "loss": 0.2122, + "num_input_tokens_seen": 12327744, + "step": 18320 + }, + { + "epoch": 0.4476827987198593, + "grad_norm": 0.5062751770019531, + "learning_rate": 1.7905897298089607e-06, + "loss": 0.1254, + "num_input_tokens_seen": 12331136, + "step": 18325 + }, + { + "epoch": 0.44780494955170647, + "grad_norm": 28.779891967773438, + "learning_rate": 1.7910783211999806e-06, + "loss": 0.1622, + "num_input_tokens_seen": 12334528, + "step": 18330 + }, + { + "epoch": 0.4479271003835536, + "grad_norm": 19.23775863647461, + "learning_rate": 1.791566912591e-06, + "loss": 0.2092, + "num_input_tokens_seen": 12338112, + "step": 18335 + }, + { + "epoch": 0.4480492512154008, + "grad_norm": 24.164121627807617, + "learning_rate": 1.7920555039820197e-06, + "loss": 0.0867, + "num_input_tokens_seen": 12341632, + "step": 18340 + }, + { + "epoch": 0.4481714020472479, + "grad_norm": 5.992556095123291, + "learning_rate": 1.7925440953730396e-06, + "loss": 0.0842, + "num_input_tokens_seen": 12344576, + "step": 18345 + }, + { + "epoch": 0.4482935528790951, + "grad_norm": 3.7169806957244873, + "learning_rate": 1.793032686764059e-06, + "loss": 0.0792, + "num_input_tokens_seen": 12347776, + "step": 18350 + }, + { + "epoch": 0.44841570371094225, + "grad_norm": 15.159074783325195, + "learning_rate": 1.7935212781550788e-06, + "loss": 0.1075, + "num_input_tokens_seen": 12351296, + "step": 18355 + }, + { + "epoch": 0.44853785454278944, + "grad_norm": 28.605384826660156, + "learning_rate": 1.7940098695460987e-06, + "loss": 0.1293, + "num_input_tokens_seen": 12355008, + "step": 18360 + }, + { + "epoch": 0.44866000537463663, + "grad_norm": 35.69192123413086, + "learning_rate": 1.7944984609371181e-06, + "loss": 0.0894, + "num_input_tokens_seen": 12358272, + "step": 18365 + }, + { + "epoch": 0.44878215620648376, + "grad_norm": 39.226112365722656, + "learning_rate": 1.7949870523281378e-06, + "loss": 0.1345, + "num_input_tokens_seen": 12361408, + "step": 18370 + }, + { + "epoch": 0.44890430703833095, + "grad_norm": 20.16849708557129, + "learning_rate": 1.7954756437191577e-06, + "loss": 0.1673, + "num_input_tokens_seen": 12364800, + "step": 18375 + }, + { + "epoch": 0.4490264578701781, + "grad_norm": 27.406200408935547, + "learning_rate": 1.7959642351101774e-06, + "loss": 0.0761, + "num_input_tokens_seen": 12368064, + "step": 18380 + }, + { + "epoch": 0.4491486087020253, + "grad_norm": 22.962316513061523, + "learning_rate": 1.7964528265011969e-06, + "loss": 0.2545, + "num_input_tokens_seen": 12371584, + "step": 18385 + }, + { + "epoch": 0.4492707595338724, + "grad_norm": 3.987178087234497, + "learning_rate": 1.7969414178922165e-06, + "loss": 0.0337, + "num_input_tokens_seen": 12374592, + "step": 18390 + }, + { + "epoch": 0.4493929103657196, + "grad_norm": 1.4741876125335693, + "learning_rate": 1.7974300092832364e-06, + "loss": 0.1143, + "num_input_tokens_seen": 12377920, + "step": 18395 + }, + { + "epoch": 0.44951506119756673, + "grad_norm": 0.08817076683044434, + "learning_rate": 1.797918600674256e-06, + "loss": 0.1046, + "num_input_tokens_seen": 12380992, + "step": 18400 + }, + { + "epoch": 0.4496372120294139, + "grad_norm": 12.917234420776367, + "learning_rate": 1.7984071920652756e-06, + "loss": 0.2225, + "num_input_tokens_seen": 12384448, + "step": 18405 + }, + { + "epoch": 0.44975936286126106, + "grad_norm": 7.490536689758301, + "learning_rate": 1.7988957834562955e-06, + "loss": 0.1052, + "num_input_tokens_seen": 12387584, + "step": 18410 + }, + { + "epoch": 0.44988151369310825, + "grad_norm": 34.98502731323242, + "learning_rate": 1.7993843748473152e-06, + "loss": 0.1601, + "num_input_tokens_seen": 12391040, + "step": 18415 + }, + { + "epoch": 0.45000366452495544, + "grad_norm": 45.478694915771484, + "learning_rate": 1.7998729662383346e-06, + "loss": 0.0576, + "num_input_tokens_seen": 12394048, + "step": 18420 + }, + { + "epoch": 0.45012581535680257, + "grad_norm": 0.9002415537834167, + "learning_rate": 1.8003615576293545e-06, + "loss": 0.1271, + "num_input_tokens_seen": 12396928, + "step": 18425 + }, + { + "epoch": 0.45024796618864976, + "grad_norm": 14.245929718017578, + "learning_rate": 1.8008501490203742e-06, + "loss": 0.042, + "num_input_tokens_seen": 12400128, + "step": 18430 + }, + { + "epoch": 0.4503701170204969, + "grad_norm": 21.265151977539062, + "learning_rate": 1.8013387404113937e-06, + "loss": 0.1996, + "num_input_tokens_seen": 12403520, + "step": 18435 + }, + { + "epoch": 0.4504922678523441, + "grad_norm": 27.13143539428711, + "learning_rate": 1.8018273318024136e-06, + "loss": 0.1522, + "num_input_tokens_seen": 12406912, + "step": 18440 + }, + { + "epoch": 0.4506144186841912, + "grad_norm": 25.90831184387207, + "learning_rate": 1.8023159231934333e-06, + "loss": 0.1372, + "num_input_tokens_seen": 12410240, + "step": 18445 + }, + { + "epoch": 0.4507365695160384, + "grad_norm": 27.729671478271484, + "learning_rate": 1.8028045145844531e-06, + "loss": 0.023, + "num_input_tokens_seen": 12413120, + "step": 18450 + }, + { + "epoch": 0.45085872034788554, + "grad_norm": 20.380538940429688, + "learning_rate": 1.8032931059754726e-06, + "loss": 0.1611, + "num_input_tokens_seen": 12416256, + "step": 18455 + }, + { + "epoch": 0.45098087117973273, + "grad_norm": 7.213114261627197, + "learning_rate": 1.8037816973664923e-06, + "loss": 0.0895, + "num_input_tokens_seen": 12419584, + "step": 18460 + }, + { + "epoch": 0.4511030220115799, + "grad_norm": 66.10938262939453, + "learning_rate": 1.8042702887575122e-06, + "loss": 0.0713, + "num_input_tokens_seen": 12422976, + "step": 18465 + }, + { + "epoch": 0.45122517284342706, + "grad_norm": 26.906949996948242, + "learning_rate": 1.8047588801485317e-06, + "loss": 0.1842, + "num_input_tokens_seen": 12426432, + "step": 18470 + }, + { + "epoch": 0.45134732367527425, + "grad_norm": 37.6614875793457, + "learning_rate": 1.8052474715395513e-06, + "loss": 0.122, + "num_input_tokens_seen": 12429504, + "step": 18475 + }, + { + "epoch": 0.4514694745071214, + "grad_norm": 5.725609302520752, + "learning_rate": 1.805736062930571e-06, + "loss": 0.0757, + "num_input_tokens_seen": 12432576, + "step": 18480 + }, + { + "epoch": 0.45159162533896857, + "grad_norm": 42.64928436279297, + "learning_rate": 1.806224654321591e-06, + "loss": 0.0932, + "num_input_tokens_seen": 12435904, + "step": 18485 + }, + { + "epoch": 0.4517137761708157, + "grad_norm": 1.5770378112792969, + "learning_rate": 1.8067132457126104e-06, + "loss": 0.0791, + "num_input_tokens_seen": 12439872, + "step": 18490 + }, + { + "epoch": 0.4518359270026629, + "grad_norm": 19.790578842163086, + "learning_rate": 1.80720183710363e-06, + "loss": 0.2214, + "num_input_tokens_seen": 12443328, + "step": 18495 + }, + { + "epoch": 0.45195807783451003, + "grad_norm": 5.598482608795166, + "learning_rate": 1.80769042849465e-06, + "loss": 0.1355, + "num_input_tokens_seen": 12446656, + "step": 18500 + }, + { + "epoch": 0.4520802286663572, + "grad_norm": 36.39820098876953, + "learning_rate": 1.8081790198856694e-06, + "loss": 0.1599, + "num_input_tokens_seen": 12450112, + "step": 18505 + }, + { + "epoch": 0.4522023794982044, + "grad_norm": 7.050886631011963, + "learning_rate": 1.8086676112766891e-06, + "loss": 0.1091, + "num_input_tokens_seen": 12453568, + "step": 18510 + }, + { + "epoch": 0.45232453033005154, + "grad_norm": 5.478260517120361, + "learning_rate": 1.809156202667709e-06, + "loss": 0.1155, + "num_input_tokens_seen": 12457088, + "step": 18515 + }, + { + "epoch": 0.45244668116189873, + "grad_norm": 4.716951370239258, + "learning_rate": 1.8096447940587285e-06, + "loss": 0.1302, + "num_input_tokens_seen": 12460352, + "step": 18520 + }, + { + "epoch": 0.45256883199374587, + "grad_norm": 10.715855598449707, + "learning_rate": 1.8101333854497482e-06, + "loss": 0.152, + "num_input_tokens_seen": 12464512, + "step": 18525 + }, + { + "epoch": 0.45269098282559306, + "grad_norm": 2.5258378982543945, + "learning_rate": 1.810621976840768e-06, + "loss": 0.0808, + "num_input_tokens_seen": 12468160, + "step": 18530 + }, + { + "epoch": 0.4528131336574402, + "grad_norm": 6.279117584228516, + "learning_rate": 1.8111105682317877e-06, + "loss": 0.1997, + "num_input_tokens_seen": 12471744, + "step": 18535 + }, + { + "epoch": 0.4529352844892874, + "grad_norm": 10.042743682861328, + "learning_rate": 1.8115991596228072e-06, + "loss": 0.0638, + "num_input_tokens_seen": 12475584, + "step": 18540 + }, + { + "epoch": 0.4530574353211345, + "grad_norm": 9.592670440673828, + "learning_rate": 1.812087751013827e-06, + "loss": 0.1659, + "num_input_tokens_seen": 12479488, + "step": 18545 + }, + { + "epoch": 0.4531795861529817, + "grad_norm": 17.198471069335938, + "learning_rate": 1.8125763424048468e-06, + "loss": 0.1573, + "num_input_tokens_seen": 12482496, + "step": 18550 + }, + { + "epoch": 0.45330173698482884, + "grad_norm": 6.23707914352417, + "learning_rate": 1.8130649337958662e-06, + "loss": 0.0731, + "num_input_tokens_seen": 12486272, + "step": 18555 + }, + { + "epoch": 0.45342388781667603, + "grad_norm": 29.733585357666016, + "learning_rate": 1.8135535251868861e-06, + "loss": 0.2805, + "num_input_tokens_seen": 12489472, + "step": 18560 + }, + { + "epoch": 0.4535460386485232, + "grad_norm": 21.276653289794922, + "learning_rate": 1.8140421165779058e-06, + "loss": 0.1171, + "num_input_tokens_seen": 12492608, + "step": 18565 + }, + { + "epoch": 0.45366818948037035, + "grad_norm": 2.3936610221862793, + "learning_rate": 1.8145307079689255e-06, + "loss": 0.1096, + "num_input_tokens_seen": 12496256, + "step": 18570 + }, + { + "epoch": 0.45379034031221754, + "grad_norm": 3.278949737548828, + "learning_rate": 1.8150192993599452e-06, + "loss": 0.1323, + "num_input_tokens_seen": 12499712, + "step": 18575 + }, + { + "epoch": 0.4539124911440647, + "grad_norm": 3.4298598766326904, + "learning_rate": 1.8155078907509649e-06, + "loss": 0.1229, + "num_input_tokens_seen": 12503296, + "step": 18580 + }, + { + "epoch": 0.45403464197591187, + "grad_norm": 15.00065803527832, + "learning_rate": 1.8159964821419845e-06, + "loss": 0.1623, + "num_input_tokens_seen": 12507264, + "step": 18585 + }, + { + "epoch": 0.454156792807759, + "grad_norm": 8.928850173950195, + "learning_rate": 1.8164850735330042e-06, + "loss": 0.0724, + "num_input_tokens_seen": 12510528, + "step": 18590 + }, + { + "epoch": 0.4542789436396062, + "grad_norm": 14.59353256225586, + "learning_rate": 1.816973664924024e-06, + "loss": 0.094, + "num_input_tokens_seen": 12514304, + "step": 18595 + }, + { + "epoch": 0.4544010944714533, + "grad_norm": 18.910980224609375, + "learning_rate": 1.8174622563150436e-06, + "loss": 0.1304, + "num_input_tokens_seen": 12518528, + "step": 18600 + }, + { + "epoch": 0.4545232453033005, + "grad_norm": 42.9864616394043, + "learning_rate": 1.8179508477060635e-06, + "loss": 0.1311, + "num_input_tokens_seen": 12522240, + "step": 18605 + }, + { + "epoch": 0.4546453961351477, + "grad_norm": 15.668207168579102, + "learning_rate": 1.818439439097083e-06, + "loss": 0.1302, + "num_input_tokens_seen": 12525568, + "step": 18610 + }, + { + "epoch": 0.45476754696699484, + "grad_norm": 0.8417151570320129, + "learning_rate": 1.8189280304881026e-06, + "loss": 0.0112, + "num_input_tokens_seen": 12528768, + "step": 18615 + }, + { + "epoch": 0.45488969779884203, + "grad_norm": 18.940181732177734, + "learning_rate": 1.8194166218791225e-06, + "loss": 0.0795, + "num_input_tokens_seen": 12532224, + "step": 18620 + }, + { + "epoch": 0.45501184863068916, + "grad_norm": 40.989173889160156, + "learning_rate": 1.819905213270142e-06, + "loss": 0.1319, + "num_input_tokens_seen": 12535360, + "step": 18625 + }, + { + "epoch": 0.45513399946253635, + "grad_norm": 4.111881256103516, + "learning_rate": 1.8203938046611617e-06, + "loss": 0.1793, + "num_input_tokens_seen": 12538624, + "step": 18630 + }, + { + "epoch": 0.4552561502943835, + "grad_norm": 6.590780735015869, + "learning_rate": 1.8208823960521816e-06, + "loss": 0.1235, + "num_input_tokens_seen": 12542080, + "step": 18635 + }, + { + "epoch": 0.4553783011262307, + "grad_norm": 41.6611213684082, + "learning_rate": 1.8213709874432012e-06, + "loss": 0.1727, + "num_input_tokens_seen": 12546048, + "step": 18640 + }, + { + "epoch": 0.4555004519580778, + "grad_norm": 3.607111692428589, + "learning_rate": 1.8218595788342207e-06, + "loss": 0.2543, + "num_input_tokens_seen": 12549248, + "step": 18645 + }, + { + "epoch": 0.455622602789925, + "grad_norm": 37.54118347167969, + "learning_rate": 1.8223481702252406e-06, + "loss": 0.2284, + "num_input_tokens_seen": 12552640, + "step": 18650 + }, + { + "epoch": 0.4557447536217722, + "grad_norm": 18.619918823242188, + "learning_rate": 1.8228367616162603e-06, + "loss": 0.0855, + "num_input_tokens_seen": 12555968, + "step": 18655 + }, + { + "epoch": 0.4558669044536193, + "grad_norm": 3.5834906101226807, + "learning_rate": 1.8233253530072798e-06, + "loss": 0.0782, + "num_input_tokens_seen": 12559296, + "step": 18660 + }, + { + "epoch": 0.4559890552854665, + "grad_norm": 16.49788475036621, + "learning_rate": 1.8238139443982997e-06, + "loss": 0.1598, + "num_input_tokens_seen": 12562368, + "step": 18665 + }, + { + "epoch": 0.45611120611731365, + "grad_norm": 1.2870237827301025, + "learning_rate": 1.8243025357893193e-06, + "loss": 0.0274, + "num_input_tokens_seen": 12565696, + "step": 18670 + }, + { + "epoch": 0.45623335694916084, + "grad_norm": 1.1009882688522339, + "learning_rate": 1.8247911271803388e-06, + "loss": 0.0718, + "num_input_tokens_seen": 12569408, + "step": 18675 + }, + { + "epoch": 0.456355507781008, + "grad_norm": 0.1918848603963852, + "learning_rate": 1.8252797185713587e-06, + "loss": 0.1147, + "num_input_tokens_seen": 12572864, + "step": 18680 + }, + { + "epoch": 0.45647765861285516, + "grad_norm": 47.69165802001953, + "learning_rate": 1.8257683099623784e-06, + "loss": 0.1923, + "num_input_tokens_seen": 12576448, + "step": 18685 + }, + { + "epoch": 0.4565998094447023, + "grad_norm": 0.2405889481306076, + "learning_rate": 1.826256901353398e-06, + "loss": 0.3076, + "num_input_tokens_seen": 12579584, + "step": 18690 + }, + { + "epoch": 0.4567219602765495, + "grad_norm": 39.91948699951172, + "learning_rate": 1.8267454927444177e-06, + "loss": 0.1864, + "num_input_tokens_seen": 12582976, + "step": 18695 + }, + { + "epoch": 0.4568441111083966, + "grad_norm": 18.106645584106445, + "learning_rate": 1.8272340841354374e-06, + "loss": 0.1133, + "num_input_tokens_seen": 12586432, + "step": 18700 + }, + { + "epoch": 0.4569662619402438, + "grad_norm": 14.690360069274902, + "learning_rate": 1.827722675526457e-06, + "loss": 0.1829, + "num_input_tokens_seen": 12589184, + "step": 18705 + }, + { + "epoch": 0.457088412772091, + "grad_norm": 0.1765415072441101, + "learning_rate": 1.8282112669174768e-06, + "loss": 0.1619, + "num_input_tokens_seen": 12592768, + "step": 18710 + }, + { + "epoch": 0.45721056360393814, + "grad_norm": 17.927274703979492, + "learning_rate": 1.8286998583084965e-06, + "loss": 0.1941, + "num_input_tokens_seen": 12595840, + "step": 18715 + }, + { + "epoch": 0.4573327144357853, + "grad_norm": 20.598888397216797, + "learning_rate": 1.8291884496995161e-06, + "loss": 0.1251, + "num_input_tokens_seen": 12599168, + "step": 18720 + }, + { + "epoch": 0.45745486526763246, + "grad_norm": 5.029316425323486, + "learning_rate": 1.829677041090536e-06, + "loss": 0.1294, + "num_input_tokens_seen": 12602560, + "step": 18725 + }, + { + "epoch": 0.45757701609947965, + "grad_norm": 32.626529693603516, + "learning_rate": 1.8301656324815555e-06, + "loss": 0.087, + "num_input_tokens_seen": 12606208, + "step": 18730 + }, + { + "epoch": 0.4576991669313268, + "grad_norm": 20.20982551574707, + "learning_rate": 1.8306542238725752e-06, + "loss": 0.0713, + "num_input_tokens_seen": 12609600, + "step": 18735 + }, + { + "epoch": 0.457821317763174, + "grad_norm": 10.495606422424316, + "learning_rate": 1.831142815263595e-06, + "loss": 0.1419, + "num_input_tokens_seen": 12612864, + "step": 18740 + }, + { + "epoch": 0.4579434685950211, + "grad_norm": 28.25389289855957, + "learning_rate": 1.8316314066546146e-06, + "loss": 0.0825, + "num_input_tokens_seen": 12616192, + "step": 18745 + }, + { + "epoch": 0.4580656194268683, + "grad_norm": 17.525806427001953, + "learning_rate": 1.8321199980456342e-06, + "loss": 0.0839, + "num_input_tokens_seen": 12619584, + "step": 18750 + }, + { + "epoch": 0.4581877702587155, + "grad_norm": 0.5782161951065063, + "learning_rate": 1.8326085894366541e-06, + "loss": 0.0678, + "num_input_tokens_seen": 12623232, + "step": 18755 + }, + { + "epoch": 0.4583099210905626, + "grad_norm": 21.926904678344727, + "learning_rate": 1.8330971808276738e-06, + "loss": 0.1289, + "num_input_tokens_seen": 12627200, + "step": 18760 + }, + { + "epoch": 0.4584320719224098, + "grad_norm": 0.8156311511993408, + "learning_rate": 1.8335857722186933e-06, + "loss": 0.2559, + "num_input_tokens_seen": 12630528, + "step": 18765 + }, + { + "epoch": 0.45855422275425695, + "grad_norm": 10.35279369354248, + "learning_rate": 1.8340743636097132e-06, + "loss": 0.0332, + "num_input_tokens_seen": 12633536, + "step": 18770 + }, + { + "epoch": 0.45867637358610414, + "grad_norm": 20.163043975830078, + "learning_rate": 1.8345629550007329e-06, + "loss": 0.1104, + "num_input_tokens_seen": 12636864, + "step": 18775 + }, + { + "epoch": 0.45879852441795127, + "grad_norm": 20.536880493164062, + "learning_rate": 1.8350515463917523e-06, + "loss": 0.1016, + "num_input_tokens_seen": 12640576, + "step": 18780 + }, + { + "epoch": 0.45892067524979846, + "grad_norm": 9.669657707214355, + "learning_rate": 1.8355401377827722e-06, + "loss": 0.0848, + "num_input_tokens_seen": 12643968, + "step": 18785 + }, + { + "epoch": 0.4590428260816456, + "grad_norm": 7.986501216888428, + "learning_rate": 1.836028729173792e-06, + "loss": 0.1632, + "num_input_tokens_seen": 12647040, + "step": 18790 + }, + { + "epoch": 0.4591649769134928, + "grad_norm": 11.392936706542969, + "learning_rate": 1.8365173205648114e-06, + "loss": 0.2297, + "num_input_tokens_seen": 12650240, + "step": 18795 + }, + { + "epoch": 0.4592871277453399, + "grad_norm": 36.556251525878906, + "learning_rate": 1.8370059119558313e-06, + "loss": 0.1519, + "num_input_tokens_seen": 12653312, + "step": 18800 + }, + { + "epoch": 0.4594092785771871, + "grad_norm": 47.813777923583984, + "learning_rate": 1.837494503346851e-06, + "loss": 0.0764, + "num_input_tokens_seen": 12656832, + "step": 18805 + }, + { + "epoch": 0.4595314294090343, + "grad_norm": 39.372642517089844, + "learning_rate": 1.8379830947378706e-06, + "loss": 0.2496, + "num_input_tokens_seen": 12660288, + "step": 18810 + }, + { + "epoch": 0.45965358024088143, + "grad_norm": 9.365509986877441, + "learning_rate": 1.8384716861288903e-06, + "loss": 0.1129, + "num_input_tokens_seen": 12663680, + "step": 18815 + }, + { + "epoch": 0.4597757310727286, + "grad_norm": 16.465686798095703, + "learning_rate": 1.83896027751991e-06, + "loss": 0.1077, + "num_input_tokens_seen": 12667200, + "step": 18820 + }, + { + "epoch": 0.45989788190457576, + "grad_norm": 15.674369812011719, + "learning_rate": 1.8394488689109297e-06, + "loss": 0.0589, + "num_input_tokens_seen": 12671040, + "step": 18825 + }, + { + "epoch": 0.46002003273642295, + "grad_norm": 18.266897201538086, + "learning_rate": 1.8399374603019493e-06, + "loss": 0.129, + "num_input_tokens_seen": 12674496, + "step": 18830 + }, + { + "epoch": 0.4601421835682701, + "grad_norm": 25.660734176635742, + "learning_rate": 1.840426051692969e-06, + "loss": 0.098, + "num_input_tokens_seen": 12677504, + "step": 18835 + }, + { + "epoch": 0.46026433440011727, + "grad_norm": 1.7656751871109009, + "learning_rate": 1.8409146430839887e-06, + "loss": 0.1474, + "num_input_tokens_seen": 12680640, + "step": 18840 + }, + { + "epoch": 0.4603864852319644, + "grad_norm": 20.569786071777344, + "learning_rate": 1.8414032344750086e-06, + "loss": 0.0447, + "num_input_tokens_seen": 12684160, + "step": 18845 + }, + { + "epoch": 0.4605086360638116, + "grad_norm": 4.835124969482422, + "learning_rate": 1.841891825866028e-06, + "loss": 0.0752, + "num_input_tokens_seen": 12687424, + "step": 18850 + }, + { + "epoch": 0.4606307868956588, + "grad_norm": 13.825311660766602, + "learning_rate": 1.8423804172570478e-06, + "loss": 0.0254, + "num_input_tokens_seen": 12690816, + "step": 18855 + }, + { + "epoch": 0.4607529377275059, + "grad_norm": 32.11545944213867, + "learning_rate": 1.8428690086480676e-06, + "loss": 0.1209, + "num_input_tokens_seen": 12694016, + "step": 18860 + }, + { + "epoch": 0.4608750885593531, + "grad_norm": 44.318389892578125, + "learning_rate": 1.8433576000390871e-06, + "loss": 0.0971, + "num_input_tokens_seen": 12697728, + "step": 18865 + }, + { + "epoch": 0.46099723939120024, + "grad_norm": 21.114696502685547, + "learning_rate": 1.8438461914301068e-06, + "loss": 0.1475, + "num_input_tokens_seen": 12701632, + "step": 18870 + }, + { + "epoch": 0.46111939022304743, + "grad_norm": 26.01463508605957, + "learning_rate": 1.8443347828211267e-06, + "loss": 0.1265, + "num_input_tokens_seen": 12705024, + "step": 18875 + }, + { + "epoch": 0.46124154105489457, + "grad_norm": 44.7984733581543, + "learning_rate": 1.8448233742121464e-06, + "loss": 0.2305, + "num_input_tokens_seen": 12708480, + "step": 18880 + }, + { + "epoch": 0.46136369188674176, + "grad_norm": 18.585708618164062, + "learning_rate": 1.8453119656031658e-06, + "loss": 0.3209, + "num_input_tokens_seen": 12711936, + "step": 18885 + }, + { + "epoch": 0.4614858427185889, + "grad_norm": 7.216206073760986, + "learning_rate": 1.8458005569941857e-06, + "loss": 0.1201, + "num_input_tokens_seen": 12715904, + "step": 18890 + }, + { + "epoch": 0.4616079935504361, + "grad_norm": 24.87909698486328, + "learning_rate": 1.8462891483852054e-06, + "loss": 0.0603, + "num_input_tokens_seen": 12719744, + "step": 18895 + }, + { + "epoch": 0.46173014438228327, + "grad_norm": 1.087585687637329, + "learning_rate": 1.8467777397762249e-06, + "loss": 0.1219, + "num_input_tokens_seen": 12723200, + "step": 18900 + }, + { + "epoch": 0.4618522952141304, + "grad_norm": 14.558207511901855, + "learning_rate": 1.8472663311672448e-06, + "loss": 0.1796, + "num_input_tokens_seen": 12726208, + "step": 18905 + }, + { + "epoch": 0.4619744460459776, + "grad_norm": 14.949580192565918, + "learning_rate": 1.8477549225582645e-06, + "loss": 0.1212, + "num_input_tokens_seen": 12729088, + "step": 18910 + }, + { + "epoch": 0.46209659687782473, + "grad_norm": 3.4832208156585693, + "learning_rate": 1.8482435139492841e-06, + "loss": 0.0825, + "num_input_tokens_seen": 12732864, + "step": 18915 + }, + { + "epoch": 0.4622187477096719, + "grad_norm": 23.411306381225586, + "learning_rate": 1.8487321053403038e-06, + "loss": 0.1234, + "num_input_tokens_seen": 12736256, + "step": 18920 + }, + { + "epoch": 0.46234089854151905, + "grad_norm": 3.059123992919922, + "learning_rate": 1.8492206967313235e-06, + "loss": 0.0573, + "num_input_tokens_seen": 12739520, + "step": 18925 + }, + { + "epoch": 0.46246304937336624, + "grad_norm": 28.218448638916016, + "learning_rate": 1.8497092881223432e-06, + "loss": 0.1303, + "num_input_tokens_seen": 12743104, + "step": 18930 + }, + { + "epoch": 0.4625852002052134, + "grad_norm": 22.900333404541016, + "learning_rate": 1.8501978795133629e-06, + "loss": 0.1818, + "num_input_tokens_seen": 12746432, + "step": 18935 + }, + { + "epoch": 0.46270735103706057, + "grad_norm": 15.471698760986328, + "learning_rate": 1.8506864709043825e-06, + "loss": 0.0869, + "num_input_tokens_seen": 12749568, + "step": 18940 + }, + { + "epoch": 0.4628295018689077, + "grad_norm": 19.799257278442383, + "learning_rate": 1.8511750622954022e-06, + "loss": 0.109, + "num_input_tokens_seen": 12752768, + "step": 18945 + }, + { + "epoch": 0.4629516527007549, + "grad_norm": 15.0982666015625, + "learning_rate": 1.851663653686422e-06, + "loss": 0.1561, + "num_input_tokens_seen": 12756480, + "step": 18950 + }, + { + "epoch": 0.4630738035326021, + "grad_norm": 0.41449591517448425, + "learning_rate": 1.8521522450774416e-06, + "loss": 0.1497, + "num_input_tokens_seen": 12759872, + "step": 18955 + }, + { + "epoch": 0.4631959543644492, + "grad_norm": 31.08782386779785, + "learning_rate": 1.8526408364684613e-06, + "loss": 0.1316, + "num_input_tokens_seen": 12763264, + "step": 18960 + }, + { + "epoch": 0.4633181051962964, + "grad_norm": 2.4979774951934814, + "learning_rate": 1.8531294278594812e-06, + "loss": 0.0591, + "num_input_tokens_seen": 12766720, + "step": 18965 + }, + { + "epoch": 0.46344025602814354, + "grad_norm": 30.141979217529297, + "learning_rate": 1.8536180192505006e-06, + "loss": 0.1837, + "num_input_tokens_seen": 12770240, + "step": 18970 + }, + { + "epoch": 0.4635624068599907, + "grad_norm": 8.070691108703613, + "learning_rate": 1.8541066106415203e-06, + "loss": 0.0643, + "num_input_tokens_seen": 12774080, + "step": 18975 + }, + { + "epoch": 0.46368455769183786, + "grad_norm": 44.15768051147461, + "learning_rate": 1.8545952020325402e-06, + "loss": 0.1817, + "num_input_tokens_seen": 12777280, + "step": 18980 + }, + { + "epoch": 0.46380670852368505, + "grad_norm": 24.315988540649414, + "learning_rate": 1.8550837934235597e-06, + "loss": 0.1293, + "num_input_tokens_seen": 12781056, + "step": 18985 + }, + { + "epoch": 0.4639288593555322, + "grad_norm": 18.086763381958008, + "learning_rate": 1.8555723848145794e-06, + "loss": 0.139, + "num_input_tokens_seen": 12784384, + "step": 18990 + }, + { + "epoch": 0.4640510101873794, + "grad_norm": 23.693946838378906, + "learning_rate": 1.8560609762055993e-06, + "loss": 0.1052, + "num_input_tokens_seen": 12787520, + "step": 18995 + }, + { + "epoch": 0.46417316101922657, + "grad_norm": 2.2032880783081055, + "learning_rate": 1.856549567596619e-06, + "loss": 0.1275, + "num_input_tokens_seen": 12791040, + "step": 19000 + }, + { + "epoch": 0.4642953118510737, + "grad_norm": 21.85564613342285, + "learning_rate": 1.8570381589876384e-06, + "loss": 0.0531, + "num_input_tokens_seen": 12794432, + "step": 19005 + }, + { + "epoch": 0.4644174626829209, + "grad_norm": 33.41852951049805, + "learning_rate": 1.8575267503786583e-06, + "loss": 0.1065, + "num_input_tokens_seen": 12797632, + "step": 19010 + }, + { + "epoch": 0.464539613514768, + "grad_norm": 67.6316909790039, + "learning_rate": 1.858015341769678e-06, + "loss": 0.1797, + "num_input_tokens_seen": 12801152, + "step": 19015 + }, + { + "epoch": 0.4646617643466152, + "grad_norm": 2.0205495357513428, + "learning_rate": 1.8585039331606974e-06, + "loss": 0.0999, + "num_input_tokens_seen": 12804864, + "step": 19020 + }, + { + "epoch": 0.46478391517846235, + "grad_norm": 1.9264830350875854, + "learning_rate": 1.8589925245517173e-06, + "loss": 0.0713, + "num_input_tokens_seen": 12808256, + "step": 19025 + }, + { + "epoch": 0.46490606601030954, + "grad_norm": 17.218624114990234, + "learning_rate": 1.859481115942737e-06, + "loss": 0.1033, + "num_input_tokens_seen": 12812224, + "step": 19030 + }, + { + "epoch": 0.46502821684215667, + "grad_norm": 37.7265510559082, + "learning_rate": 1.8599697073337567e-06, + "loss": 0.2061, + "num_input_tokens_seen": 12815616, + "step": 19035 + }, + { + "epoch": 0.46515036767400386, + "grad_norm": 2.984351873397827, + "learning_rate": 1.8604582987247764e-06, + "loss": 0.1223, + "num_input_tokens_seen": 12819392, + "step": 19040 + }, + { + "epoch": 0.46527251850585105, + "grad_norm": 10.050679206848145, + "learning_rate": 1.860946890115796e-06, + "loss": 0.0222, + "num_input_tokens_seen": 12822528, + "step": 19045 + }, + { + "epoch": 0.4653946693376982, + "grad_norm": 35.5335578918457, + "learning_rate": 1.8614354815068157e-06, + "loss": 0.1632, + "num_input_tokens_seen": 12825664, + "step": 19050 + }, + { + "epoch": 0.4655168201695454, + "grad_norm": 3.787121534347534, + "learning_rate": 1.8619240728978354e-06, + "loss": 0.0751, + "num_input_tokens_seen": 12829184, + "step": 19055 + }, + { + "epoch": 0.4656389710013925, + "grad_norm": 23.502910614013672, + "learning_rate": 1.8624126642888551e-06, + "loss": 0.1655, + "num_input_tokens_seen": 12832256, + "step": 19060 + }, + { + "epoch": 0.4657611218332397, + "grad_norm": 45.55996322631836, + "learning_rate": 1.8629012556798748e-06, + "loss": 0.1237, + "num_input_tokens_seen": 12835520, + "step": 19065 + }, + { + "epoch": 0.46588327266508683, + "grad_norm": 3.102910280227661, + "learning_rate": 1.8633898470708945e-06, + "loss": 0.1247, + "num_input_tokens_seen": 12838656, + "step": 19070 + }, + { + "epoch": 0.466005423496934, + "grad_norm": 17.618194580078125, + "learning_rate": 1.8638784384619142e-06, + "loss": 0.0818, + "num_input_tokens_seen": 12841984, + "step": 19075 + }, + { + "epoch": 0.46612757432878116, + "grad_norm": 8.12735652923584, + "learning_rate": 1.8643670298529338e-06, + "loss": 0.1951, + "num_input_tokens_seen": 12845952, + "step": 19080 + }, + { + "epoch": 0.46624972516062835, + "grad_norm": 49.927486419677734, + "learning_rate": 1.8648556212439537e-06, + "loss": 0.1492, + "num_input_tokens_seen": 12849216, + "step": 19085 + }, + { + "epoch": 0.4663718759924755, + "grad_norm": 25.571504592895508, + "learning_rate": 1.8653442126349732e-06, + "loss": 0.0904, + "num_input_tokens_seen": 12852352, + "step": 19090 + }, + { + "epoch": 0.46649402682432267, + "grad_norm": 15.531042098999023, + "learning_rate": 1.8658328040259929e-06, + "loss": 0.1296, + "num_input_tokens_seen": 12856000, + "step": 19095 + }, + { + "epoch": 0.46661617765616986, + "grad_norm": 11.947113037109375, + "learning_rate": 1.8663213954170128e-06, + "loss": 0.0455, + "num_input_tokens_seen": 12859008, + "step": 19100 + }, + { + "epoch": 0.466738328488017, + "grad_norm": 12.411914825439453, + "learning_rate": 1.8668099868080322e-06, + "loss": 0.1772, + "num_input_tokens_seen": 12862464, + "step": 19105 + }, + { + "epoch": 0.4668604793198642, + "grad_norm": 11.80659294128418, + "learning_rate": 1.867298578199052e-06, + "loss": 0.0789, + "num_input_tokens_seen": 12865792, + "step": 19110 + }, + { + "epoch": 0.4669826301517113, + "grad_norm": 0.7526969313621521, + "learning_rate": 1.8677871695900718e-06, + "loss": 0.0677, + "num_input_tokens_seen": 12869376, + "step": 19115 + }, + { + "epoch": 0.4671047809835585, + "grad_norm": 8.360123634338379, + "learning_rate": 1.8682757609810915e-06, + "loss": 0.1072, + "num_input_tokens_seen": 12872896, + "step": 19120 + }, + { + "epoch": 0.46722693181540564, + "grad_norm": 8.367980003356934, + "learning_rate": 1.868764352372111e-06, + "loss": 0.1762, + "num_input_tokens_seen": 12876608, + "step": 19125 + }, + { + "epoch": 0.46734908264725283, + "grad_norm": 3.0096700191497803, + "learning_rate": 1.8692529437631309e-06, + "loss": 0.1542, + "num_input_tokens_seen": 12879936, + "step": 19130 + }, + { + "epoch": 0.46747123347909997, + "grad_norm": 73.32782745361328, + "learning_rate": 1.8697415351541505e-06, + "loss": 0.1688, + "num_input_tokens_seen": 12883392, + "step": 19135 + }, + { + "epoch": 0.46759338431094716, + "grad_norm": 1.9654453992843628, + "learning_rate": 1.87023012654517e-06, + "loss": 0.1618, + "num_input_tokens_seen": 12886784, + "step": 19140 + }, + { + "epoch": 0.46771553514279435, + "grad_norm": 11.835115432739258, + "learning_rate": 1.87071871793619e-06, + "loss": 0.1077, + "num_input_tokens_seen": 12890048, + "step": 19145 + }, + { + "epoch": 0.4678376859746415, + "grad_norm": 0.22136236727237701, + "learning_rate": 1.8712073093272096e-06, + "loss": 0.0752, + "num_input_tokens_seen": 12893312, + "step": 19150 + }, + { + "epoch": 0.46795983680648867, + "grad_norm": 32.926368713378906, + "learning_rate": 1.8716959007182293e-06, + "loss": 0.1155, + "num_input_tokens_seen": 12896704, + "step": 19155 + }, + { + "epoch": 0.4680819876383358, + "grad_norm": 1.585870623588562, + "learning_rate": 1.872184492109249e-06, + "loss": 0.0286, + "num_input_tokens_seen": 12900096, + "step": 19160 + }, + { + "epoch": 0.468204138470183, + "grad_norm": 21.62451171875, + "learning_rate": 1.8726730835002686e-06, + "loss": 0.1852, + "num_input_tokens_seen": 12903040, + "step": 19165 + }, + { + "epoch": 0.46832628930203013, + "grad_norm": 10.86733627319336, + "learning_rate": 1.8731616748912883e-06, + "loss": 0.1419, + "num_input_tokens_seen": 12906432, + "step": 19170 + }, + { + "epoch": 0.4684484401338773, + "grad_norm": 51.00568771362305, + "learning_rate": 1.873650266282308e-06, + "loss": 0.106, + "num_input_tokens_seen": 12909376, + "step": 19175 + }, + { + "epoch": 0.46857059096572445, + "grad_norm": 41.37013626098633, + "learning_rate": 1.8741388576733277e-06, + "loss": 0.1177, + "num_input_tokens_seen": 12912896, + "step": 19180 + }, + { + "epoch": 0.46869274179757164, + "grad_norm": 14.004033088684082, + "learning_rate": 1.8746274490643474e-06, + "loss": 0.1659, + "num_input_tokens_seen": 12916096, + "step": 19185 + }, + { + "epoch": 0.46881489262941883, + "grad_norm": 51.04507827758789, + "learning_rate": 1.8751160404553672e-06, + "loss": 0.126, + "num_input_tokens_seen": 12919168, + "step": 19190 + }, + { + "epoch": 0.46893704346126597, + "grad_norm": 36.705265045166016, + "learning_rate": 1.8756046318463867e-06, + "loss": 0.233, + "num_input_tokens_seen": 12922496, + "step": 19195 + }, + { + "epoch": 0.46905919429311316, + "grad_norm": 9.139296531677246, + "learning_rate": 1.8760932232374064e-06, + "loss": 0.2077, + "num_input_tokens_seen": 12926016, + "step": 19200 + }, + { + "epoch": 0.4691813451249603, + "grad_norm": 11.94605541229248, + "learning_rate": 1.8765818146284263e-06, + "loss": 0.1411, + "num_input_tokens_seen": 12929472, + "step": 19205 + }, + { + "epoch": 0.4693034959568075, + "grad_norm": 4.031689643859863, + "learning_rate": 1.8770704060194458e-06, + "loss": 0.0664, + "num_input_tokens_seen": 12933056, + "step": 19210 + }, + { + "epoch": 0.4694256467886546, + "grad_norm": 11.11829948425293, + "learning_rate": 1.8775589974104654e-06, + "loss": 0.0971, + "num_input_tokens_seen": 12936384, + "step": 19215 + }, + { + "epoch": 0.4695477976205018, + "grad_norm": 3.863867998123169, + "learning_rate": 1.8780475888014853e-06, + "loss": 0.0367, + "num_input_tokens_seen": 12939648, + "step": 19220 + }, + { + "epoch": 0.46966994845234894, + "grad_norm": 20.8947696685791, + "learning_rate": 1.8785361801925048e-06, + "loss": 0.0736, + "num_input_tokens_seen": 12943104, + "step": 19225 + }, + { + "epoch": 0.46979209928419613, + "grad_norm": 17.901206970214844, + "learning_rate": 1.8790247715835245e-06, + "loss": 0.0748, + "num_input_tokens_seen": 12946304, + "step": 19230 + }, + { + "epoch": 0.46991425011604326, + "grad_norm": 37.201446533203125, + "learning_rate": 1.8795133629745444e-06, + "loss": 0.044, + "num_input_tokens_seen": 12949248, + "step": 19235 + }, + { + "epoch": 0.47003640094789045, + "grad_norm": 34.428733825683594, + "learning_rate": 1.880001954365564e-06, + "loss": 0.2714, + "num_input_tokens_seen": 12952512, + "step": 19240 + }, + { + "epoch": 0.47015855177973764, + "grad_norm": 60.48667526245117, + "learning_rate": 1.8804905457565835e-06, + "loss": 0.1009, + "num_input_tokens_seen": 12955712, + "step": 19245 + }, + { + "epoch": 0.4702807026115848, + "grad_norm": 6.710975646972656, + "learning_rate": 1.8809791371476034e-06, + "loss": 0.0945, + "num_input_tokens_seen": 12959424, + "step": 19250 + }, + { + "epoch": 0.47040285344343197, + "grad_norm": 14.131996154785156, + "learning_rate": 1.881467728538623e-06, + "loss": 0.1379, + "num_input_tokens_seen": 12962880, + "step": 19255 + }, + { + "epoch": 0.4705250042752791, + "grad_norm": 15.032042503356934, + "learning_rate": 1.8819563199296426e-06, + "loss": 0.1535, + "num_input_tokens_seen": 12966464, + "step": 19260 + }, + { + "epoch": 0.4706471551071263, + "grad_norm": 5.918193817138672, + "learning_rate": 1.8824449113206625e-06, + "loss": 0.0838, + "num_input_tokens_seen": 12970048, + "step": 19265 + }, + { + "epoch": 0.4707693059389734, + "grad_norm": 32.9735221862793, + "learning_rate": 1.8829335027116821e-06, + "loss": 0.1308, + "num_input_tokens_seen": 12973760, + "step": 19270 + }, + { + "epoch": 0.4708914567708206, + "grad_norm": 0.23837028443813324, + "learning_rate": 1.8834220941027018e-06, + "loss": 0.0659, + "num_input_tokens_seen": 12977600, + "step": 19275 + }, + { + "epoch": 0.47101360760266775, + "grad_norm": 32.31455993652344, + "learning_rate": 1.8839106854937215e-06, + "loss": 0.2964, + "num_input_tokens_seen": 12980864, + "step": 19280 + }, + { + "epoch": 0.47113575843451494, + "grad_norm": 26.42384910583496, + "learning_rate": 1.8843992768847412e-06, + "loss": 0.0543, + "num_input_tokens_seen": 12984256, + "step": 19285 + }, + { + "epoch": 0.47125790926636213, + "grad_norm": 0.42388415336608887, + "learning_rate": 1.8848878682757609e-06, + "loss": 0.0297, + "num_input_tokens_seen": 12987456, + "step": 19290 + }, + { + "epoch": 0.47138006009820926, + "grad_norm": 1.073360562324524, + "learning_rate": 1.8853764596667806e-06, + "loss": 0.1323, + "num_input_tokens_seen": 12990976, + "step": 19295 + }, + { + "epoch": 0.47150221093005645, + "grad_norm": 36.52716064453125, + "learning_rate": 1.8858650510578002e-06, + "loss": 0.0875, + "num_input_tokens_seen": 12994368, + "step": 19300 + }, + { + "epoch": 0.4716243617619036, + "grad_norm": 18.84002113342285, + "learning_rate": 1.88635364244882e-06, + "loss": 0.2024, + "num_input_tokens_seen": 12997376, + "step": 19305 + }, + { + "epoch": 0.4717465125937508, + "grad_norm": 9.292922019958496, + "learning_rate": 1.8868422338398398e-06, + "loss": 0.259, + "num_input_tokens_seen": 13000448, + "step": 19310 + }, + { + "epoch": 0.4718686634255979, + "grad_norm": 57.36844253540039, + "learning_rate": 1.8873308252308593e-06, + "loss": 0.1971, + "num_input_tokens_seen": 13003840, + "step": 19315 + }, + { + "epoch": 0.4719908142574451, + "grad_norm": 22.608705520629883, + "learning_rate": 1.887819416621879e-06, + "loss": 0.2406, + "num_input_tokens_seen": 13007424, + "step": 19320 + }, + { + "epoch": 0.47211296508929224, + "grad_norm": 33.60593032836914, + "learning_rate": 1.8883080080128989e-06, + "loss": 0.1605, + "num_input_tokens_seen": 13010816, + "step": 19325 + }, + { + "epoch": 0.4722351159211394, + "grad_norm": 0.326453298330307, + "learning_rate": 1.8887965994039183e-06, + "loss": 0.1467, + "num_input_tokens_seen": 13014080, + "step": 19330 + }, + { + "epoch": 0.4723572667529866, + "grad_norm": 30.46360206604004, + "learning_rate": 1.889285190794938e-06, + "loss": 0.0574, + "num_input_tokens_seen": 13017344, + "step": 19335 + }, + { + "epoch": 0.47247941758483375, + "grad_norm": 31.23234748840332, + "learning_rate": 1.889773782185958e-06, + "loss": 0.1405, + "num_input_tokens_seen": 13020608, + "step": 19340 + }, + { + "epoch": 0.47260156841668094, + "grad_norm": 30.33367919921875, + "learning_rate": 1.8902623735769776e-06, + "loss": 0.1287, + "num_input_tokens_seen": 13024128, + "step": 19345 + }, + { + "epoch": 0.4727237192485281, + "grad_norm": 0.36472418904304504, + "learning_rate": 1.890750964967997e-06, + "loss": 0.1658, + "num_input_tokens_seen": 13027584, + "step": 19350 + }, + { + "epoch": 0.47284587008037526, + "grad_norm": 9.631101608276367, + "learning_rate": 1.891239556359017e-06, + "loss": 0.2057, + "num_input_tokens_seen": 13030592, + "step": 19355 + }, + { + "epoch": 0.4729680209122224, + "grad_norm": 19.1376895904541, + "learning_rate": 1.8917281477500366e-06, + "loss": 0.1622, + "num_input_tokens_seen": 13033792, + "step": 19360 + }, + { + "epoch": 0.4730901717440696, + "grad_norm": 5.452675819396973, + "learning_rate": 1.892216739141056e-06, + "loss": 0.086, + "num_input_tokens_seen": 13036800, + "step": 19365 + }, + { + "epoch": 0.4732123225759167, + "grad_norm": 16.26456642150879, + "learning_rate": 1.892705330532076e-06, + "loss": 0.1403, + "num_input_tokens_seen": 13040128, + "step": 19370 + }, + { + "epoch": 0.4733344734077639, + "grad_norm": 13.444612503051758, + "learning_rate": 1.8931939219230957e-06, + "loss": 0.1043, + "num_input_tokens_seen": 13043584, + "step": 19375 + }, + { + "epoch": 0.47345662423961105, + "grad_norm": 13.632548332214355, + "learning_rate": 1.8936825133141151e-06, + "loss": 0.1329, + "num_input_tokens_seen": 13046656, + "step": 19380 + }, + { + "epoch": 0.47357877507145824, + "grad_norm": 1.3274794816970825, + "learning_rate": 1.894171104705135e-06, + "loss": 0.0519, + "num_input_tokens_seen": 13049792, + "step": 19385 + }, + { + "epoch": 0.4737009259033054, + "grad_norm": 1.07449209690094, + "learning_rate": 1.8946596960961547e-06, + "loss": 0.1164, + "num_input_tokens_seen": 13052736, + "step": 19390 + }, + { + "epoch": 0.47382307673515256, + "grad_norm": 26.944843292236328, + "learning_rate": 1.8951482874871744e-06, + "loss": 0.1841, + "num_input_tokens_seen": 13055616, + "step": 19395 + }, + { + "epoch": 0.47394522756699975, + "grad_norm": 12.593786239624023, + "learning_rate": 1.895636878878194e-06, + "loss": 0.1359, + "num_input_tokens_seen": 13058880, + "step": 19400 + }, + { + "epoch": 0.4740673783988469, + "grad_norm": 10.367599487304688, + "learning_rate": 1.8961254702692138e-06, + "loss": 0.1169, + "num_input_tokens_seen": 13062080, + "step": 19405 + }, + { + "epoch": 0.4741895292306941, + "grad_norm": 16.240982055664062, + "learning_rate": 1.8966140616602334e-06, + "loss": 0.0242, + "num_input_tokens_seen": 13065728, + "step": 19410 + }, + { + "epoch": 0.4743116800625412, + "grad_norm": 2.3575432300567627, + "learning_rate": 1.8971026530512531e-06, + "loss": 0.0707, + "num_input_tokens_seen": 13068992, + "step": 19415 + }, + { + "epoch": 0.4744338308943884, + "grad_norm": 46.091365814208984, + "learning_rate": 1.8975912444422728e-06, + "loss": 0.2041, + "num_input_tokens_seen": 13072384, + "step": 19420 + }, + { + "epoch": 0.47455598172623553, + "grad_norm": 4.177423477172852, + "learning_rate": 1.8980798358332925e-06, + "loss": 0.0601, + "num_input_tokens_seen": 13075648, + "step": 19425 + }, + { + "epoch": 0.4746781325580827, + "grad_norm": 32.177249908447266, + "learning_rate": 1.8985684272243124e-06, + "loss": 0.1482, + "num_input_tokens_seen": 13079040, + "step": 19430 + }, + { + "epoch": 0.4748002833899299, + "grad_norm": 23.470182418823242, + "learning_rate": 1.8990570186153318e-06, + "loss": 0.0946, + "num_input_tokens_seen": 13082304, + "step": 19435 + }, + { + "epoch": 0.47492243422177705, + "grad_norm": 11.740708351135254, + "learning_rate": 1.8995456100063515e-06, + "loss": 0.1373, + "num_input_tokens_seen": 13085632, + "step": 19440 + }, + { + "epoch": 0.47504458505362424, + "grad_norm": 1.594720721244812, + "learning_rate": 1.9000342013973714e-06, + "loss": 0.1384, + "num_input_tokens_seen": 13089024, + "step": 19445 + }, + { + "epoch": 0.47516673588547137, + "grad_norm": 2.3788228034973145, + "learning_rate": 1.9005227927883909e-06, + "loss": 0.0781, + "num_input_tokens_seen": 13092480, + "step": 19450 + }, + { + "epoch": 0.47528888671731856, + "grad_norm": 13.307036399841309, + "learning_rate": 1.9010113841794106e-06, + "loss": 0.1162, + "num_input_tokens_seen": 13096064, + "step": 19455 + }, + { + "epoch": 0.4754110375491657, + "grad_norm": 19.755977630615234, + "learning_rate": 1.9014999755704305e-06, + "loss": 0.1173, + "num_input_tokens_seen": 13099648, + "step": 19460 + }, + { + "epoch": 0.4755331883810129, + "grad_norm": 9.011610984802246, + "learning_rate": 1.9019885669614501e-06, + "loss": 0.1658, + "num_input_tokens_seen": 13102848, + "step": 19465 + }, + { + "epoch": 0.47565533921286, + "grad_norm": 8.883974075317383, + "learning_rate": 1.9024771583524696e-06, + "loss": 0.0847, + "num_input_tokens_seen": 13106688, + "step": 19470 + }, + { + "epoch": 0.4757774900447072, + "grad_norm": 26.36639976501465, + "learning_rate": 1.9029657497434895e-06, + "loss": 0.1892, + "num_input_tokens_seen": 13110272, + "step": 19475 + }, + { + "epoch": 0.47589964087655434, + "grad_norm": 6.351770401000977, + "learning_rate": 1.9034543411345092e-06, + "loss": 0.1094, + "num_input_tokens_seen": 13113280, + "step": 19480 + }, + { + "epoch": 0.47602179170840153, + "grad_norm": 3.0226128101348877, + "learning_rate": 1.9039429325255287e-06, + "loss": 0.0507, + "num_input_tokens_seen": 13116480, + "step": 19485 + }, + { + "epoch": 0.4761439425402487, + "grad_norm": 49.442291259765625, + "learning_rate": 1.9044315239165486e-06, + "loss": 0.1353, + "num_input_tokens_seen": 13119936, + "step": 19490 + }, + { + "epoch": 0.47626609337209586, + "grad_norm": 27.841060638427734, + "learning_rate": 1.9049201153075682e-06, + "loss": 0.0437, + "num_input_tokens_seen": 13123392, + "step": 19495 + }, + { + "epoch": 0.47638824420394305, + "grad_norm": 31.20875358581543, + "learning_rate": 1.9054087066985877e-06, + "loss": 0.1317, + "num_input_tokens_seen": 13126464, + "step": 19500 + }, + { + "epoch": 0.4765103950357902, + "grad_norm": 10.526351928710938, + "learning_rate": 1.9058972980896076e-06, + "loss": 0.2263, + "num_input_tokens_seen": 13130048, + "step": 19505 + }, + { + "epoch": 0.47663254586763737, + "grad_norm": 0.17358553409576416, + "learning_rate": 1.9063858894806273e-06, + "loss": 0.1022, + "num_input_tokens_seen": 13133440, + "step": 19510 + }, + { + "epoch": 0.4767546966994845, + "grad_norm": 15.227017402648926, + "learning_rate": 1.906874480871647e-06, + "loss": 0.1827, + "num_input_tokens_seen": 13136448, + "step": 19515 + }, + { + "epoch": 0.4768768475313317, + "grad_norm": 35.180110931396484, + "learning_rate": 1.9073630722626666e-06, + "loss": 0.1598, + "num_input_tokens_seen": 13139584, + "step": 19520 + }, + { + "epoch": 0.4769989983631788, + "grad_norm": 46.55698776245117, + "learning_rate": 1.9078516636536863e-06, + "loss": 0.1595, + "num_input_tokens_seen": 13142784, + "step": 19525 + }, + { + "epoch": 0.477121149195026, + "grad_norm": 2.008786201477051, + "learning_rate": 1.908340255044706e-06, + "loss": 0.2117, + "num_input_tokens_seen": 13146112, + "step": 19530 + }, + { + "epoch": 0.4772433000268732, + "grad_norm": 30.464975357055664, + "learning_rate": 1.9088288464357257e-06, + "loss": 0.2164, + "num_input_tokens_seen": 13149504, + "step": 19535 + }, + { + "epoch": 0.47736545085872034, + "grad_norm": 6.6905364990234375, + "learning_rate": 1.9093174378267454e-06, + "loss": 0.0371, + "num_input_tokens_seen": 13152704, + "step": 19540 + }, + { + "epoch": 0.47748760169056753, + "grad_norm": 14.021597862243652, + "learning_rate": 1.909806029217765e-06, + "loss": 0.1501, + "num_input_tokens_seen": 13156160, + "step": 19545 + }, + { + "epoch": 0.47760975252241467, + "grad_norm": 12.774880409240723, + "learning_rate": 1.9102946206087847e-06, + "loss": 0.0894, + "num_input_tokens_seen": 13159616, + "step": 19550 + }, + { + "epoch": 0.47773190335426186, + "grad_norm": 23.132492065429688, + "learning_rate": 1.9107832119998044e-06, + "loss": 0.1337, + "num_input_tokens_seen": 13162624, + "step": 19555 + }, + { + "epoch": 0.477854054186109, + "grad_norm": 20.382837295532227, + "learning_rate": 1.911271803390824e-06, + "loss": 0.0509, + "num_input_tokens_seen": 13165760, + "step": 19560 + }, + { + "epoch": 0.4779762050179562, + "grad_norm": 0.5650962591171265, + "learning_rate": 1.9117603947818438e-06, + "loss": 0.115, + "num_input_tokens_seen": 13169024, + "step": 19565 + }, + { + "epoch": 0.4780983558498033, + "grad_norm": 39.08525085449219, + "learning_rate": 1.9122489861728635e-06, + "loss": 0.2129, + "num_input_tokens_seen": 13171968, + "step": 19570 + }, + { + "epoch": 0.4782205066816505, + "grad_norm": 5.242583751678467, + "learning_rate": 1.912737577563883e-06, + "loss": 0.0831, + "num_input_tokens_seen": 13175424, + "step": 19575 + }, + { + "epoch": 0.4783426575134977, + "grad_norm": 18.628211975097656, + "learning_rate": 1.913226168954903e-06, + "loss": 0.1144, + "num_input_tokens_seen": 13178880, + "step": 19580 + }, + { + "epoch": 0.4784648083453448, + "grad_norm": 4.374256610870361, + "learning_rate": 1.913714760345923e-06, + "loss": 0.0943, + "num_input_tokens_seen": 13182208, + "step": 19585 + }, + { + "epoch": 0.478586959177192, + "grad_norm": 1.0530842542648315, + "learning_rate": 1.914203351736942e-06, + "loss": 0.0779, + "num_input_tokens_seen": 13185600, + "step": 19590 + }, + { + "epoch": 0.47870911000903915, + "grad_norm": 31.666772842407227, + "learning_rate": 1.914691943127962e-06, + "loss": 0.0657, + "num_input_tokens_seen": 13188928, + "step": 19595 + }, + { + "epoch": 0.47883126084088634, + "grad_norm": 9.453741073608398, + "learning_rate": 1.915180534518982e-06, + "loss": 0.2389, + "num_input_tokens_seen": 13192064, + "step": 19600 + }, + { + "epoch": 0.4789534116727335, + "grad_norm": 28.946199417114258, + "learning_rate": 1.9156691259100012e-06, + "loss": 0.1893, + "num_input_tokens_seen": 13195712, + "step": 19605 + }, + { + "epoch": 0.47907556250458067, + "grad_norm": 17.196870803833008, + "learning_rate": 1.916157717301021e-06, + "loss": 0.262, + "num_input_tokens_seen": 13198656, + "step": 19610 + }, + { + "epoch": 0.4791977133364278, + "grad_norm": 14.507476806640625, + "learning_rate": 1.916646308692041e-06, + "loss": 0.1524, + "num_input_tokens_seen": 13201984, + "step": 19615 + }, + { + "epoch": 0.479319864168275, + "grad_norm": 41.175994873046875, + "learning_rate": 1.9171349000830607e-06, + "loss": 0.0971, + "num_input_tokens_seen": 13205312, + "step": 19620 + }, + { + "epoch": 0.4794420150001221, + "grad_norm": 15.390592575073242, + "learning_rate": 1.91762349147408e-06, + "loss": 0.1953, + "num_input_tokens_seen": 13208192, + "step": 19625 + }, + { + "epoch": 0.4795641658319693, + "grad_norm": 13.96489143371582, + "learning_rate": 1.9181120828651e-06, + "loss": 0.1565, + "num_input_tokens_seen": 13211520, + "step": 19630 + }, + { + "epoch": 0.4796863166638165, + "grad_norm": 10.897221565246582, + "learning_rate": 1.9186006742561197e-06, + "loss": 0.1034, + "num_input_tokens_seen": 13214848, + "step": 19635 + }, + { + "epoch": 0.47980846749566364, + "grad_norm": 12.923073768615723, + "learning_rate": 1.919089265647139e-06, + "loss": 0.1566, + "num_input_tokens_seen": 13218304, + "step": 19640 + }, + { + "epoch": 0.4799306183275108, + "grad_norm": 15.097844123840332, + "learning_rate": 1.919577857038159e-06, + "loss": 0.139, + "num_input_tokens_seen": 13221696, + "step": 19645 + }, + { + "epoch": 0.48005276915935796, + "grad_norm": 3.387833595275879, + "learning_rate": 1.9200664484291788e-06, + "loss": 0.0572, + "num_input_tokens_seen": 13224896, + "step": 19650 + }, + { + "epoch": 0.48017491999120515, + "grad_norm": 9.004018783569336, + "learning_rate": 1.920555039820198e-06, + "loss": 0.1403, + "num_input_tokens_seen": 13227968, + "step": 19655 + }, + { + "epoch": 0.4802970708230523, + "grad_norm": 3.3921761512756348, + "learning_rate": 1.921043631211218e-06, + "loss": 0.0733, + "num_input_tokens_seen": 13231232, + "step": 19660 + }, + { + "epoch": 0.4804192216548995, + "grad_norm": 1.3995615243911743, + "learning_rate": 1.921532222602238e-06, + "loss": 0.088, + "num_input_tokens_seen": 13234560, + "step": 19665 + }, + { + "epoch": 0.4805413724867466, + "grad_norm": 16.81403923034668, + "learning_rate": 1.9220208139932575e-06, + "loss": 0.0543, + "num_input_tokens_seen": 13237696, + "step": 19670 + }, + { + "epoch": 0.4806635233185938, + "grad_norm": 0.8026001453399658, + "learning_rate": 1.922509405384277e-06, + "loss": 0.1292, + "num_input_tokens_seen": 13241216, + "step": 19675 + }, + { + "epoch": 0.480785674150441, + "grad_norm": 80.34431457519531, + "learning_rate": 1.922997996775297e-06, + "loss": 0.1134, + "num_input_tokens_seen": 13244736, + "step": 19680 + }, + { + "epoch": 0.4809078249822881, + "grad_norm": 87.73758697509766, + "learning_rate": 1.9234865881663165e-06, + "loss": 0.2556, + "num_input_tokens_seen": 13248000, + "step": 19685 + }, + { + "epoch": 0.4810299758141353, + "grad_norm": 25.267589569091797, + "learning_rate": 1.923975179557336e-06, + "loss": 0.2345, + "num_input_tokens_seen": 13251008, + "step": 19690 + }, + { + "epoch": 0.48115212664598245, + "grad_norm": 35.30669021606445, + "learning_rate": 1.924463770948356e-06, + "loss": 0.1163, + "num_input_tokens_seen": 13254336, + "step": 19695 + }, + { + "epoch": 0.48127427747782964, + "grad_norm": 15.753335952758789, + "learning_rate": 1.9249523623393756e-06, + "loss": 0.1854, + "num_input_tokens_seen": 13258048, + "step": 19700 + }, + { + "epoch": 0.48139642830967677, + "grad_norm": 21.301998138427734, + "learning_rate": 1.9254409537303953e-06, + "loss": 0.3226, + "num_input_tokens_seen": 13261632, + "step": 19705 + }, + { + "epoch": 0.48151857914152396, + "grad_norm": 25.74140739440918, + "learning_rate": 1.925929545121415e-06, + "loss": 0.1299, + "num_input_tokens_seen": 13265024, + "step": 19710 + }, + { + "epoch": 0.4816407299733711, + "grad_norm": 23.157045364379883, + "learning_rate": 1.9264181365124346e-06, + "loss": 0.1003, + "num_input_tokens_seen": 13268416, + "step": 19715 + }, + { + "epoch": 0.4817628808052183, + "grad_norm": 44.55127716064453, + "learning_rate": 1.9269067279034543e-06, + "loss": 0.16, + "num_input_tokens_seen": 13272064, + "step": 19720 + }, + { + "epoch": 0.4818850316370655, + "grad_norm": 24.022293090820312, + "learning_rate": 1.927395319294474e-06, + "loss": 0.1596, + "num_input_tokens_seen": 13275712, + "step": 19725 + }, + { + "epoch": 0.4820071824689126, + "grad_norm": 18.870023727416992, + "learning_rate": 1.9278839106854937e-06, + "loss": 0.1389, + "num_input_tokens_seen": 13278976, + "step": 19730 + }, + { + "epoch": 0.4821293333007598, + "grad_norm": 1.568988561630249, + "learning_rate": 1.9283725020765134e-06, + "loss": 0.0965, + "num_input_tokens_seen": 13282496, + "step": 19735 + }, + { + "epoch": 0.48225148413260693, + "grad_norm": 33.636409759521484, + "learning_rate": 1.928861093467533e-06, + "loss": 0.1027, + "num_input_tokens_seen": 13285568, + "step": 19740 + }, + { + "epoch": 0.4823736349644541, + "grad_norm": 22.591812133789062, + "learning_rate": 1.9293496848585527e-06, + "loss": 0.0695, + "num_input_tokens_seen": 13288960, + "step": 19745 + }, + { + "epoch": 0.48249578579630126, + "grad_norm": 2.1983540058135986, + "learning_rate": 1.9298382762495724e-06, + "loss": 0.109, + "num_input_tokens_seen": 13292224, + "step": 19750 + }, + { + "epoch": 0.48261793662814845, + "grad_norm": 0.34230145812034607, + "learning_rate": 1.930326867640592e-06, + "loss": 0.1414, + "num_input_tokens_seen": 13296064, + "step": 19755 + }, + { + "epoch": 0.4827400874599956, + "grad_norm": 0.6198939681053162, + "learning_rate": 1.9308154590316118e-06, + "loss": 0.0575, + "num_input_tokens_seen": 13299648, + "step": 19760 + }, + { + "epoch": 0.48286223829184277, + "grad_norm": 0.22586674988269806, + "learning_rate": 1.9313040504226314e-06, + "loss": 0.1743, + "num_input_tokens_seen": 13302912, + "step": 19765 + }, + { + "epoch": 0.4829843891236899, + "grad_norm": 13.703788757324219, + "learning_rate": 1.931792641813651e-06, + "loss": 0.0443, + "num_input_tokens_seen": 13306368, + "step": 19770 + }, + { + "epoch": 0.4831065399555371, + "grad_norm": 9.638721466064453, + "learning_rate": 1.932281233204671e-06, + "loss": 0.0543, + "num_input_tokens_seen": 13309376, + "step": 19775 + }, + { + "epoch": 0.4832286907873843, + "grad_norm": 34.85390090942383, + "learning_rate": 1.9327698245956905e-06, + "loss": 0.1372, + "num_input_tokens_seen": 13312640, + "step": 19780 + }, + { + "epoch": 0.4833508416192314, + "grad_norm": 40.97196960449219, + "learning_rate": 1.93325841598671e-06, + "loss": 0.0512, + "num_input_tokens_seen": 13315776, + "step": 19785 + }, + { + "epoch": 0.4834729924510786, + "grad_norm": 46.25328063964844, + "learning_rate": 1.93374700737773e-06, + "loss": 0.0811, + "num_input_tokens_seen": 13319616, + "step": 19790 + }, + { + "epoch": 0.48359514328292574, + "grad_norm": 4.664642333984375, + "learning_rate": 1.9342355987687495e-06, + "loss": 0.1762, + "num_input_tokens_seen": 13323008, + "step": 19795 + }, + { + "epoch": 0.48371729411477293, + "grad_norm": 10.510961532592773, + "learning_rate": 1.9347241901597692e-06, + "loss": 0.0668, + "num_input_tokens_seen": 13326400, + "step": 19800 + }, + { + "epoch": 0.48383944494662007, + "grad_norm": 57.567710876464844, + "learning_rate": 1.935212781550789e-06, + "loss": 0.2493, + "num_input_tokens_seen": 13329728, + "step": 19805 + }, + { + "epoch": 0.48396159577846726, + "grad_norm": 63.7676887512207, + "learning_rate": 1.9357013729418086e-06, + "loss": 0.1694, + "num_input_tokens_seen": 13333120, + "step": 19810 + }, + { + "epoch": 0.4840837466103144, + "grad_norm": 16.2789306640625, + "learning_rate": 1.9361899643328283e-06, + "loss": 0.1478, + "num_input_tokens_seen": 13336576, + "step": 19815 + }, + { + "epoch": 0.4842058974421616, + "grad_norm": 2.470158100128174, + "learning_rate": 1.936678555723848e-06, + "loss": 0.0925, + "num_input_tokens_seen": 13339712, + "step": 19820 + }, + { + "epoch": 0.48432804827400877, + "grad_norm": 10.53795337677002, + "learning_rate": 1.937167147114868e-06, + "loss": 0.089, + "num_input_tokens_seen": 13343168, + "step": 19825 + }, + { + "epoch": 0.4844501991058559, + "grad_norm": 2.1699910163879395, + "learning_rate": 1.9376557385058873e-06, + "loss": 0.1069, + "num_input_tokens_seen": 13346240, + "step": 19830 + }, + { + "epoch": 0.4845723499377031, + "grad_norm": 9.327561378479004, + "learning_rate": 1.938144329896907e-06, + "loss": 0.0559, + "num_input_tokens_seen": 13349888, + "step": 19835 + }, + { + "epoch": 0.48469450076955023, + "grad_norm": 8.914467811584473, + "learning_rate": 1.938632921287927e-06, + "loss": 0.0851, + "num_input_tokens_seen": 13353472, + "step": 19840 + }, + { + "epoch": 0.4848166516013974, + "grad_norm": 10.970634460449219, + "learning_rate": 1.9391215126789463e-06, + "loss": 0.1351, + "num_input_tokens_seen": 13356672, + "step": 19845 + }, + { + "epoch": 0.48493880243324455, + "grad_norm": 0.31943902373313904, + "learning_rate": 1.939610104069966e-06, + "loss": 0.2086, + "num_input_tokens_seen": 13360064, + "step": 19850 + }, + { + "epoch": 0.48506095326509174, + "grad_norm": 30.93590545654297, + "learning_rate": 1.940098695460986e-06, + "loss": 0.0905, + "num_input_tokens_seen": 13363392, + "step": 19855 + }, + { + "epoch": 0.4851831040969389, + "grad_norm": 12.492888450622559, + "learning_rate": 1.940587286852006e-06, + "loss": 0.1664, + "num_input_tokens_seen": 13366656, + "step": 19860 + }, + { + "epoch": 0.48530525492878607, + "grad_norm": 12.074633598327637, + "learning_rate": 1.941075878243025e-06, + "loss": 0.0714, + "num_input_tokens_seen": 13370240, + "step": 19865 + }, + { + "epoch": 0.48542740576063326, + "grad_norm": 13.443814277648926, + "learning_rate": 1.941564469634045e-06, + "loss": 0.1149, + "num_input_tokens_seen": 13373568, + "step": 19870 + }, + { + "epoch": 0.4855495565924804, + "grad_norm": 13.719369888305664, + "learning_rate": 1.942053061025065e-06, + "loss": 0.1532, + "num_input_tokens_seen": 13377024, + "step": 19875 + }, + { + "epoch": 0.4856717074243276, + "grad_norm": 27.99519157409668, + "learning_rate": 1.942541652416084e-06, + "loss": 0.133, + "num_input_tokens_seen": 13380800, + "step": 19880 + }, + { + "epoch": 0.4857938582561747, + "grad_norm": 0.44706669449806213, + "learning_rate": 1.943030243807104e-06, + "loss": 0.1333, + "num_input_tokens_seen": 13384320, + "step": 19885 + }, + { + "epoch": 0.4859160090880219, + "grad_norm": 0.12808431684970856, + "learning_rate": 1.943518835198124e-06, + "loss": 0.1726, + "num_input_tokens_seen": 13387648, + "step": 19890 + }, + { + "epoch": 0.48603815991986904, + "grad_norm": 0.382138192653656, + "learning_rate": 1.9440074265891436e-06, + "loss": 0.1649, + "num_input_tokens_seen": 13391232, + "step": 19895 + }, + { + "epoch": 0.48616031075171623, + "grad_norm": 20.595565795898438, + "learning_rate": 1.944496017980163e-06, + "loss": 0.0477, + "num_input_tokens_seen": 13395136, + "step": 19900 + }, + { + "epoch": 0.48628246158356336, + "grad_norm": 26.79218292236328, + "learning_rate": 1.944984609371183e-06, + "loss": 0.1885, + "num_input_tokens_seen": 13398400, + "step": 19905 + }, + { + "epoch": 0.48640461241541055, + "grad_norm": 15.850521087646484, + "learning_rate": 1.9454732007622026e-06, + "loss": 0.22, + "num_input_tokens_seen": 13401792, + "step": 19910 + }, + { + "epoch": 0.4865267632472577, + "grad_norm": 0.2584015130996704, + "learning_rate": 1.945961792153222e-06, + "loss": 0.1027, + "num_input_tokens_seen": 13405120, + "step": 19915 + }, + { + "epoch": 0.4866489140791049, + "grad_norm": 20.799280166625977, + "learning_rate": 1.946450383544242e-06, + "loss": 0.064, + "num_input_tokens_seen": 13409088, + "step": 19920 + }, + { + "epoch": 0.48677106491095207, + "grad_norm": 0.3154126703739166, + "learning_rate": 1.9469389749352617e-06, + "loss": 0.0144, + "num_input_tokens_seen": 13412736, + "step": 19925 + }, + { + "epoch": 0.4868932157427992, + "grad_norm": 14.14961051940918, + "learning_rate": 1.947427566326281e-06, + "loss": 0.0679, + "num_input_tokens_seen": 13415872, + "step": 19930 + }, + { + "epoch": 0.4870153665746464, + "grad_norm": 24.828821182250977, + "learning_rate": 1.947916157717301e-06, + "loss": 0.1872, + "num_input_tokens_seen": 13419136, + "step": 19935 + }, + { + "epoch": 0.4871375174064935, + "grad_norm": 14.158141136169434, + "learning_rate": 1.9484047491083207e-06, + "loss": 0.3057, + "num_input_tokens_seen": 13422848, + "step": 19940 + }, + { + "epoch": 0.4872596682383407, + "grad_norm": 14.543496131896973, + "learning_rate": 1.9488933404993404e-06, + "loss": 0.1605, + "num_input_tokens_seen": 13426816, + "step": 19945 + }, + { + "epoch": 0.48738181907018785, + "grad_norm": 8.337520599365234, + "learning_rate": 1.94938193189036e-06, + "loss": 0.0222, + "num_input_tokens_seen": 13429824, + "step": 19950 + }, + { + "epoch": 0.48750396990203504, + "grad_norm": 35.78062057495117, + "learning_rate": 1.9498705232813798e-06, + "loss": 0.1204, + "num_input_tokens_seen": 13433088, + "step": 19955 + }, + { + "epoch": 0.4876261207338822, + "grad_norm": 11.560881614685059, + "learning_rate": 1.9503591146723994e-06, + "loss": 0.1232, + "num_input_tokens_seen": 13436416, + "step": 19960 + }, + { + "epoch": 0.48774827156572936, + "grad_norm": 9.376676559448242, + "learning_rate": 1.950847706063419e-06, + "loss": 0.0839, + "num_input_tokens_seen": 13439936, + "step": 19965 + }, + { + "epoch": 0.48787042239757655, + "grad_norm": 0.3136023283004761, + "learning_rate": 1.951336297454439e-06, + "loss": 0.0839, + "num_input_tokens_seen": 13443200, + "step": 19970 + }, + { + "epoch": 0.4879925732294237, + "grad_norm": 11.248197555541992, + "learning_rate": 1.9518248888454585e-06, + "loss": 0.0941, + "num_input_tokens_seen": 13446400, + "step": 19975 + }, + { + "epoch": 0.4881147240612709, + "grad_norm": 2.7925679683685303, + "learning_rate": 1.952313480236478e-06, + "loss": 0.0274, + "num_input_tokens_seen": 13450240, + "step": 19980 + }, + { + "epoch": 0.488236874893118, + "grad_norm": 1.5199958086013794, + "learning_rate": 1.952802071627498e-06, + "loss": 0.0952, + "num_input_tokens_seen": 13453376, + "step": 19985 + }, + { + "epoch": 0.4883590257249652, + "grad_norm": 0.21349725127220154, + "learning_rate": 1.9532906630185175e-06, + "loss": 0.1549, + "num_input_tokens_seen": 13456512, + "step": 19990 + }, + { + "epoch": 0.48848117655681234, + "grad_norm": 28.426145553588867, + "learning_rate": 1.953779254409537e-06, + "loss": 0.1634, + "num_input_tokens_seen": 13459712, + "step": 19995 + }, + { + "epoch": 0.4886033273886595, + "grad_norm": 18.666841506958008, + "learning_rate": 1.954267845800557e-06, + "loss": 0.1312, + "num_input_tokens_seen": 13462976, + "step": 20000 + }, + { + "epoch": 0.48872547822050666, + "grad_norm": 1.9791568517684937, + "learning_rate": 1.9547564371915766e-06, + "loss": 0.159, + "num_input_tokens_seen": 13466432, + "step": 20005 + }, + { + "epoch": 0.48884762905235385, + "grad_norm": 1.8186808824539185, + "learning_rate": 1.9552450285825963e-06, + "loss": 0.1475, + "num_input_tokens_seen": 13470016, + "step": 20010 + }, + { + "epoch": 0.48896977988420104, + "grad_norm": 5.565760135650635, + "learning_rate": 1.955733619973616e-06, + "loss": 0.1175, + "num_input_tokens_seen": 13474368, + "step": 20015 + }, + { + "epoch": 0.4890919307160482, + "grad_norm": 40.79534149169922, + "learning_rate": 1.9562222113646356e-06, + "loss": 0.1288, + "num_input_tokens_seen": 13477696, + "step": 20020 + }, + { + "epoch": 0.48921408154789536, + "grad_norm": 17.945369720458984, + "learning_rate": 1.9567108027556553e-06, + "loss": 0.1885, + "num_input_tokens_seen": 13481152, + "step": 20025 + }, + { + "epoch": 0.4893362323797425, + "grad_norm": 11.218626022338867, + "learning_rate": 1.957199394146675e-06, + "loss": 0.0811, + "num_input_tokens_seen": 13484608, + "step": 20030 + }, + { + "epoch": 0.4894583832115897, + "grad_norm": 13.835955619812012, + "learning_rate": 1.9576879855376947e-06, + "loss": 0.1319, + "num_input_tokens_seen": 13487808, + "step": 20035 + }, + { + "epoch": 0.4895805340434368, + "grad_norm": 18.503908157348633, + "learning_rate": 1.9581765769287143e-06, + "loss": 0.136, + "num_input_tokens_seen": 13491008, + "step": 20040 + }, + { + "epoch": 0.489702684875284, + "grad_norm": 4.769625663757324, + "learning_rate": 1.958665168319734e-06, + "loss": 0.0896, + "num_input_tokens_seen": 13494016, + "step": 20045 + }, + { + "epoch": 0.48982483570713115, + "grad_norm": 6.78912353515625, + "learning_rate": 1.959153759710754e-06, + "loss": 0.1986, + "num_input_tokens_seen": 13497600, + "step": 20050 + }, + { + "epoch": 0.48994698653897834, + "grad_norm": 13.250788688659668, + "learning_rate": 1.9596423511017734e-06, + "loss": 0.1285, + "num_input_tokens_seen": 13500672, + "step": 20055 + }, + { + "epoch": 0.49006913737082547, + "grad_norm": 2.766676664352417, + "learning_rate": 1.960130942492793e-06, + "loss": 0.0348, + "num_input_tokens_seen": 13503808, + "step": 20060 + }, + { + "epoch": 0.49019128820267266, + "grad_norm": 21.56344985961914, + "learning_rate": 1.9606195338838127e-06, + "loss": 0.0635, + "num_input_tokens_seen": 13507200, + "step": 20065 + }, + { + "epoch": 0.49031343903451985, + "grad_norm": 1.6514256000518799, + "learning_rate": 1.9611081252748324e-06, + "loss": 0.1066, + "num_input_tokens_seen": 13510464, + "step": 20070 + }, + { + "epoch": 0.490435589866367, + "grad_norm": 2.146864652633667, + "learning_rate": 1.961596716665852e-06, + "loss": 0.0995, + "num_input_tokens_seen": 13513472, + "step": 20075 + }, + { + "epoch": 0.4905577406982142, + "grad_norm": 13.342808723449707, + "learning_rate": 1.962085308056872e-06, + "loss": 0.2648, + "num_input_tokens_seen": 13516672, + "step": 20080 + }, + { + "epoch": 0.4906798915300613, + "grad_norm": 17.412988662719727, + "learning_rate": 1.9625738994478915e-06, + "loss": 0.1242, + "num_input_tokens_seen": 13519808, + "step": 20085 + }, + { + "epoch": 0.4908020423619085, + "grad_norm": 57.60816955566406, + "learning_rate": 1.963062490838911e-06, + "loss": 0.0692, + "num_input_tokens_seen": 13522816, + "step": 20090 + }, + { + "epoch": 0.49092419319375563, + "grad_norm": 24.054847717285156, + "learning_rate": 1.963551082229931e-06, + "loss": 0.3476, + "num_input_tokens_seen": 13526208, + "step": 20095 + }, + { + "epoch": 0.4910463440256028, + "grad_norm": 24.362337112426758, + "learning_rate": 1.964039673620951e-06, + "loss": 0.3069, + "num_input_tokens_seen": 13529664, + "step": 20100 + }, + { + "epoch": 0.49116849485744996, + "grad_norm": 11.493230819702148, + "learning_rate": 1.96452826501197e-06, + "loss": 0.0861, + "num_input_tokens_seen": 13533184, + "step": 20105 + }, + { + "epoch": 0.49129064568929715, + "grad_norm": 29.886674880981445, + "learning_rate": 1.96501685640299e-06, + "loss": 0.1571, + "num_input_tokens_seen": 13536704, + "step": 20110 + }, + { + "epoch": 0.49141279652114433, + "grad_norm": 14.273351669311523, + "learning_rate": 1.96550544779401e-06, + "loss": 0.0438, + "num_input_tokens_seen": 13540224, + "step": 20115 + }, + { + "epoch": 0.49153494735299147, + "grad_norm": 19.529502868652344, + "learning_rate": 1.9659940391850292e-06, + "loss": 0.1241, + "num_input_tokens_seen": 13543808, + "step": 20120 + }, + { + "epoch": 0.49165709818483866, + "grad_norm": 8.674605369567871, + "learning_rate": 1.966482630576049e-06, + "loss": 0.0822, + "num_input_tokens_seen": 13547264, + "step": 20125 + }, + { + "epoch": 0.4917792490166858, + "grad_norm": 34.01549530029297, + "learning_rate": 1.966971221967069e-06, + "loss": 0.1256, + "num_input_tokens_seen": 13550400, + "step": 20130 + }, + { + "epoch": 0.491901399848533, + "grad_norm": 23.52373695373535, + "learning_rate": 1.9674598133580887e-06, + "loss": 0.0989, + "num_input_tokens_seen": 13554048, + "step": 20135 + }, + { + "epoch": 0.4920235506803801, + "grad_norm": 17.888687133789062, + "learning_rate": 1.967948404749108e-06, + "loss": 0.0538, + "num_input_tokens_seen": 13557248, + "step": 20140 + }, + { + "epoch": 0.4921457015122273, + "grad_norm": 31.919679641723633, + "learning_rate": 1.968436996140128e-06, + "loss": 0.1511, + "num_input_tokens_seen": 13560960, + "step": 20145 + }, + { + "epoch": 0.49226785234407444, + "grad_norm": 16.944351196289062, + "learning_rate": 1.9689255875311478e-06, + "loss": 0.1369, + "num_input_tokens_seen": 13563776, + "step": 20150 + }, + { + "epoch": 0.49239000317592163, + "grad_norm": 40.24680709838867, + "learning_rate": 1.969414178922167e-06, + "loss": 0.1426, + "num_input_tokens_seen": 13566976, + "step": 20155 + }, + { + "epoch": 0.49251215400776877, + "grad_norm": 22.800159454345703, + "learning_rate": 1.969902770313187e-06, + "loss": 0.0422, + "num_input_tokens_seen": 13570304, + "step": 20160 + }, + { + "epoch": 0.49263430483961596, + "grad_norm": 14.297897338867188, + "learning_rate": 1.970391361704207e-06, + "loss": 0.2077, + "num_input_tokens_seen": 13574080, + "step": 20165 + }, + { + "epoch": 0.49275645567146314, + "grad_norm": 28.413267135620117, + "learning_rate": 1.9708799530952265e-06, + "loss": 0.1064, + "num_input_tokens_seen": 13577344, + "step": 20170 + }, + { + "epoch": 0.4928786065033103, + "grad_norm": 20.27289581298828, + "learning_rate": 1.971368544486246e-06, + "loss": 0.0783, + "num_input_tokens_seen": 13580480, + "step": 20175 + }, + { + "epoch": 0.49300075733515747, + "grad_norm": 3.2449045181274414, + "learning_rate": 1.971857135877266e-06, + "loss": 0.1319, + "num_input_tokens_seen": 13583424, + "step": 20180 + }, + { + "epoch": 0.4931229081670046, + "grad_norm": 23.341785430908203, + "learning_rate": 1.9723457272682855e-06, + "loss": 0.1384, + "num_input_tokens_seen": 13586624, + "step": 20185 + }, + { + "epoch": 0.4932450589988518, + "grad_norm": 17.50263214111328, + "learning_rate": 1.972834318659305e-06, + "loss": 0.0931, + "num_input_tokens_seen": 13589888, + "step": 20190 + }, + { + "epoch": 0.4933672098306989, + "grad_norm": 11.289210319519043, + "learning_rate": 1.973322910050325e-06, + "loss": 0.1851, + "num_input_tokens_seen": 13593280, + "step": 20195 + }, + { + "epoch": 0.4934893606625461, + "grad_norm": 12.591771125793457, + "learning_rate": 1.9738115014413446e-06, + "loss": 0.0802, + "num_input_tokens_seen": 13596608, + "step": 20200 + }, + { + "epoch": 0.49361151149439325, + "grad_norm": 16.865806579589844, + "learning_rate": 1.9743000928323642e-06, + "loss": 0.0851, + "num_input_tokens_seen": 13599936, + "step": 20205 + }, + { + "epoch": 0.49373366232624044, + "grad_norm": 17.790006637573242, + "learning_rate": 1.974788684223384e-06, + "loss": 0.1579, + "num_input_tokens_seen": 13603072, + "step": 20210 + }, + { + "epoch": 0.49385581315808763, + "grad_norm": 31.209308624267578, + "learning_rate": 1.9752772756144036e-06, + "loss": 0.1716, + "num_input_tokens_seen": 13606656, + "step": 20215 + }, + { + "epoch": 0.49397796398993477, + "grad_norm": 25.8765926361084, + "learning_rate": 1.9757658670054233e-06, + "loss": 0.1867, + "num_input_tokens_seen": 13610048, + "step": 20220 + }, + { + "epoch": 0.49410011482178195, + "grad_norm": 17.896934509277344, + "learning_rate": 1.976254458396443e-06, + "loss": 0.095, + "num_input_tokens_seen": 13613504, + "step": 20225 + }, + { + "epoch": 0.4942222656536291, + "grad_norm": 2.6187055110931396, + "learning_rate": 1.9767430497874627e-06, + "loss": 0.1053, + "num_input_tokens_seen": 13617472, + "step": 20230 + }, + { + "epoch": 0.4943444164854763, + "grad_norm": 14.588163375854492, + "learning_rate": 1.9772316411784823e-06, + "loss": 0.0849, + "num_input_tokens_seen": 13621312, + "step": 20235 + }, + { + "epoch": 0.4944665673173234, + "grad_norm": 4.078947067260742, + "learning_rate": 1.977720232569502e-06, + "loss": 0.0733, + "num_input_tokens_seen": 13624640, + "step": 20240 + }, + { + "epoch": 0.4945887181491706, + "grad_norm": 20.883548736572266, + "learning_rate": 1.9782088239605217e-06, + "loss": 0.1103, + "num_input_tokens_seen": 13628032, + "step": 20245 + }, + { + "epoch": 0.49471086898101774, + "grad_norm": 6.015990257263184, + "learning_rate": 1.9786974153515414e-06, + "loss": 0.153, + "num_input_tokens_seen": 13631296, + "step": 20250 + }, + { + "epoch": 0.4948330198128649, + "grad_norm": 2.3922202587127686, + "learning_rate": 1.979186006742561e-06, + "loss": 0.0764, + "num_input_tokens_seen": 13634816, + "step": 20255 + }, + { + "epoch": 0.4949551706447121, + "grad_norm": 27.54640007019043, + "learning_rate": 1.9796745981335807e-06, + "loss": 0.075, + "num_input_tokens_seen": 13637952, + "step": 20260 + }, + { + "epoch": 0.49507732147655925, + "grad_norm": 8.645411491394043, + "learning_rate": 1.9801631895246004e-06, + "loss": 0.1036, + "num_input_tokens_seen": 13641344, + "step": 20265 + }, + { + "epoch": 0.49519947230840644, + "grad_norm": 15.165426254272461, + "learning_rate": 1.98065178091562e-06, + "loss": 0.0937, + "num_input_tokens_seen": 13644992, + "step": 20270 + }, + { + "epoch": 0.4953216231402536, + "grad_norm": 23.38542938232422, + "learning_rate": 1.9811403723066398e-06, + "loss": 0.0303, + "num_input_tokens_seen": 13648256, + "step": 20275 + }, + { + "epoch": 0.49544377397210076, + "grad_norm": 2.478635311126709, + "learning_rate": 1.9816289636976595e-06, + "loss": 0.0959, + "num_input_tokens_seen": 13651776, + "step": 20280 + }, + { + "epoch": 0.4955659248039479, + "grad_norm": 27.348520278930664, + "learning_rate": 1.982117555088679e-06, + "loss": 0.1028, + "num_input_tokens_seen": 13655232, + "step": 20285 + }, + { + "epoch": 0.4956880756357951, + "grad_norm": 33.220062255859375, + "learning_rate": 1.982606146479699e-06, + "loss": 0.1079, + "num_input_tokens_seen": 13659264, + "step": 20290 + }, + { + "epoch": 0.4958102264676422, + "grad_norm": 12.448121070861816, + "learning_rate": 1.9830947378707185e-06, + "loss": 0.11, + "num_input_tokens_seen": 13663232, + "step": 20295 + }, + { + "epoch": 0.4959323772994894, + "grad_norm": 14.38774299621582, + "learning_rate": 1.983583329261738e-06, + "loss": 0.1488, + "num_input_tokens_seen": 13666496, + "step": 20300 + }, + { + "epoch": 0.49605452813133655, + "grad_norm": 16.634559631347656, + "learning_rate": 1.984071920652758e-06, + "loss": 0.1363, + "num_input_tokens_seen": 13669952, + "step": 20305 + }, + { + "epoch": 0.49617667896318374, + "grad_norm": 0.1701052337884903, + "learning_rate": 1.9845605120437776e-06, + "loss": 0.0476, + "num_input_tokens_seen": 13673984, + "step": 20310 + }, + { + "epoch": 0.4962988297950309, + "grad_norm": 14.704794883728027, + "learning_rate": 1.9850491034347972e-06, + "loss": 0.0905, + "num_input_tokens_seen": 13677696, + "step": 20315 + }, + { + "epoch": 0.49642098062687806, + "grad_norm": 4.579434871673584, + "learning_rate": 1.985537694825817e-06, + "loss": 0.1117, + "num_input_tokens_seen": 13680960, + "step": 20320 + }, + { + "epoch": 0.49654313145872525, + "grad_norm": 3.7386457920074463, + "learning_rate": 1.986026286216837e-06, + "loss": 0.0668, + "num_input_tokens_seen": 13684480, + "step": 20325 + }, + { + "epoch": 0.4966652822905724, + "grad_norm": 5.1689276695251465, + "learning_rate": 1.9865148776078563e-06, + "loss": 0.0343, + "num_input_tokens_seen": 13687872, + "step": 20330 + }, + { + "epoch": 0.4967874331224196, + "grad_norm": 12.612316131591797, + "learning_rate": 1.987003468998876e-06, + "loss": 0.0929, + "num_input_tokens_seen": 13691072, + "step": 20335 + }, + { + "epoch": 0.4969095839542667, + "grad_norm": 0.49819138646125793, + "learning_rate": 1.987492060389896e-06, + "loss": 0.0431, + "num_input_tokens_seen": 13694720, + "step": 20340 + }, + { + "epoch": 0.4970317347861139, + "grad_norm": 46.28751754760742, + "learning_rate": 1.9879806517809153e-06, + "loss": 0.1056, + "num_input_tokens_seen": 13697984, + "step": 20345 + }, + { + "epoch": 0.49715388561796103, + "grad_norm": 33.69471740722656, + "learning_rate": 1.988469243171935e-06, + "loss": 0.1068, + "num_input_tokens_seen": 13701184, + "step": 20350 + }, + { + "epoch": 0.4972760364498082, + "grad_norm": 30.221221923828125, + "learning_rate": 1.988957834562955e-06, + "loss": 0.1515, + "num_input_tokens_seen": 13704704, + "step": 20355 + }, + { + "epoch": 0.4973981872816554, + "grad_norm": 3.9779465198516846, + "learning_rate": 1.9894464259539744e-06, + "loss": 0.1188, + "num_input_tokens_seen": 13708096, + "step": 20360 + }, + { + "epoch": 0.49752033811350255, + "grad_norm": 14.227984428405762, + "learning_rate": 1.989935017344994e-06, + "loss": 0.1672, + "num_input_tokens_seen": 13711488, + "step": 20365 + }, + { + "epoch": 0.49764248894534974, + "grad_norm": 18.283992767333984, + "learning_rate": 1.990423608736014e-06, + "loss": 0.087, + "num_input_tokens_seen": 13714944, + "step": 20370 + }, + { + "epoch": 0.49776463977719687, + "grad_norm": 13.039130210876465, + "learning_rate": 1.990912200127034e-06, + "loss": 0.1452, + "num_input_tokens_seen": 13718656, + "step": 20375 + }, + { + "epoch": 0.49788679060904406, + "grad_norm": 14.41788387298584, + "learning_rate": 1.991400791518053e-06, + "loss": 0.2331, + "num_input_tokens_seen": 13721472, + "step": 20380 + }, + { + "epoch": 0.4980089414408912, + "grad_norm": 16.680484771728516, + "learning_rate": 1.991889382909073e-06, + "loss": 0.0584, + "num_input_tokens_seen": 13725248, + "step": 20385 + }, + { + "epoch": 0.4981310922727384, + "grad_norm": 33.19993209838867, + "learning_rate": 1.992377974300093e-06, + "loss": 0.1784, + "num_input_tokens_seen": 13729088, + "step": 20390 + }, + { + "epoch": 0.4982532431045855, + "grad_norm": 3.5821926593780518, + "learning_rate": 1.992866565691112e-06, + "loss": 0.1174, + "num_input_tokens_seen": 13732608, + "step": 20395 + }, + { + "epoch": 0.4983753939364327, + "grad_norm": 22.986421585083008, + "learning_rate": 1.9933551570821322e-06, + "loss": 0.2099, + "num_input_tokens_seen": 13736128, + "step": 20400 + }, + { + "epoch": 0.4984975447682799, + "grad_norm": 15.068368911743164, + "learning_rate": 1.993843748473152e-06, + "loss": 0.0965, + "num_input_tokens_seen": 13739456, + "step": 20405 + }, + { + "epoch": 0.49861969560012703, + "grad_norm": 19.621456146240234, + "learning_rate": 1.9943323398641716e-06, + "loss": 0.1392, + "num_input_tokens_seen": 13742528, + "step": 20410 + }, + { + "epoch": 0.4987418464319742, + "grad_norm": 4.565086841583252, + "learning_rate": 1.9948209312551913e-06, + "loss": 0.0692, + "num_input_tokens_seen": 13745600, + "step": 20415 + }, + { + "epoch": 0.49886399726382136, + "grad_norm": 7.886087417602539, + "learning_rate": 1.995309522646211e-06, + "loss": 0.0727, + "num_input_tokens_seen": 13748864, + "step": 20420 + }, + { + "epoch": 0.49898614809566855, + "grad_norm": 26.047780990600586, + "learning_rate": 1.9957981140372307e-06, + "loss": 0.0934, + "num_input_tokens_seen": 13753472, + "step": 20425 + }, + { + "epoch": 0.4991082989275157, + "grad_norm": 27.23785972595215, + "learning_rate": 1.9962867054282503e-06, + "loss": 0.0655, + "num_input_tokens_seen": 13756352, + "step": 20430 + }, + { + "epoch": 0.49923044975936287, + "grad_norm": 23.27521324157715, + "learning_rate": 1.99677529681927e-06, + "loss": 0.1874, + "num_input_tokens_seen": 13759872, + "step": 20435 + }, + { + "epoch": 0.49935260059121, + "grad_norm": 24.3862361907959, + "learning_rate": 1.9972638882102897e-06, + "loss": 0.1828, + "num_input_tokens_seen": 13763008, + "step": 20440 + }, + { + "epoch": 0.4994747514230572, + "grad_norm": 0.43370214104652405, + "learning_rate": 1.9977524796013094e-06, + "loss": 0.2337, + "num_input_tokens_seen": 13766144, + "step": 20445 + }, + { + "epoch": 0.49959690225490433, + "grad_norm": 23.112834930419922, + "learning_rate": 1.998241070992329e-06, + "loss": 0.0943, + "num_input_tokens_seen": 13769408, + "step": 20450 + }, + { + "epoch": 0.4997190530867515, + "grad_norm": 16.897188186645508, + "learning_rate": 1.9987296623833487e-06, + "loss": 0.1177, + "num_input_tokens_seen": 13772608, + "step": 20455 + }, + { + "epoch": 0.4998412039185987, + "grad_norm": 18.243366241455078, + "learning_rate": 1.9992182537743684e-06, + "loss": 0.1733, + "num_input_tokens_seen": 13775680, + "step": 20460 + }, + { + "epoch": 0.49996335475044584, + "grad_norm": 18.259639739990234, + "learning_rate": 1.999706845165388e-06, + "loss": 0.1021, + "num_input_tokens_seen": 13778880, + "step": 20465 + }, + { + "epoch": 0.5000366452495542, + "eval_loss": 0.11281616985797882, + "eval_runtime": 47.6807, + "eval_samples_per_second": 763.096, + "eval_steps_per_second": 95.405, + "num_input_tokens_seen": 13780928, + "step": 20468 + }, + { + "epoch": 0.500085505582293, + "grad_norm": 19.301481246948242, + "learning_rate": 1.9999999994182183e-06, + "loss": 0.1982, + "num_input_tokens_seen": 13782144, + "step": 20470 + }, + { + "epoch": 0.5002076564141402, + "grad_norm": 4.169570446014404, + "learning_rate": 1.9999999928731765e-06, + "loss": 0.0798, + "num_input_tokens_seen": 13785600, + "step": 20475 + }, + { + "epoch": 0.5003298072459873, + "grad_norm": 5.7112860679626465, + "learning_rate": 1.9999999790558656e-06, + "loss": 0.1128, + "num_input_tokens_seen": 13788800, + "step": 20480 + }, + { + "epoch": 0.5004519580778345, + "grad_norm": 21.82765769958496, + "learning_rate": 1.9999999579662855e-06, + "loss": 0.1589, + "num_input_tokens_seen": 13792256, + "step": 20485 + }, + { + "epoch": 0.5005741089096817, + "grad_norm": 17.94799041748047, + "learning_rate": 1.999999929604437e-06, + "loss": 0.1436, + "num_input_tokens_seen": 13795264, + "step": 20490 + }, + { + "epoch": 0.5006962597415289, + "grad_norm": 3.4309074878692627, + "learning_rate": 1.99999989397032e-06, + "loss": 0.0407, + "num_input_tokens_seen": 13799040, + "step": 20495 + }, + { + "epoch": 0.500818410573376, + "grad_norm": 11.802199363708496, + "learning_rate": 1.9999998510639352e-06, + "loss": 0.0523, + "num_input_tokens_seen": 13801984, + "step": 20500 + }, + { + "epoch": 0.5009405614052231, + "grad_norm": 2.4828591346740723, + "learning_rate": 1.999999800885282e-06, + "loss": 0.0662, + "num_input_tokens_seen": 13805440, + "step": 20505 + }, + { + "epoch": 0.5010627122370703, + "grad_norm": 15.110939979553223, + "learning_rate": 1.9999997434343614e-06, + "loss": 0.1458, + "num_input_tokens_seen": 13808384, + "step": 20510 + }, + { + "epoch": 0.5011848630689175, + "grad_norm": 4.889379501342773, + "learning_rate": 1.9999996787111737e-06, + "loss": 0.122, + "num_input_tokens_seen": 13811456, + "step": 20515 + }, + { + "epoch": 0.5013070139007647, + "grad_norm": 13.039811134338379, + "learning_rate": 1.9999996067157195e-06, + "loss": 0.115, + "num_input_tokens_seen": 13814464, + "step": 20520 + }, + { + "epoch": 0.5014291647326118, + "grad_norm": 18.891918182373047, + "learning_rate": 1.999999527447999e-06, + "loss": 0.0875, + "num_input_tokens_seen": 13817920, + "step": 20525 + }, + { + "epoch": 0.501551315564459, + "grad_norm": 19.57078742980957, + "learning_rate": 1.9999994409080134e-06, + "loss": 0.0648, + "num_input_tokens_seen": 13820992, + "step": 20530 + }, + { + "epoch": 0.5016734663963062, + "grad_norm": 25.669971466064453, + "learning_rate": 1.9999993470957628e-06, + "loss": 0.1201, + "num_input_tokens_seen": 13824576, + "step": 20535 + }, + { + "epoch": 0.5017956172281534, + "grad_norm": 0.14215537905693054, + "learning_rate": 1.9999992460112477e-06, + "loss": 0.0275, + "num_input_tokens_seen": 13827520, + "step": 20540 + }, + { + "epoch": 0.5019177680600004, + "grad_norm": 6.8776092529296875, + "learning_rate": 1.999999137654469e-06, + "loss": 0.0616, + "num_input_tokens_seen": 13830528, + "step": 20545 + }, + { + "epoch": 0.5020399188918476, + "grad_norm": 16.818981170654297, + "learning_rate": 1.999999022025428e-06, + "loss": 0.0757, + "num_input_tokens_seen": 13833984, + "step": 20550 + }, + { + "epoch": 0.5021620697236948, + "grad_norm": 6.901256084442139, + "learning_rate": 1.999998899124125e-06, + "loss": 0.2078, + "num_input_tokens_seen": 13837120, + "step": 20555 + }, + { + "epoch": 0.502284220555542, + "grad_norm": 1.4620168209075928, + "learning_rate": 1.999998768950561e-06, + "loss": 0.0793, + "num_input_tokens_seen": 13840512, + "step": 20560 + }, + { + "epoch": 0.5024063713873892, + "grad_norm": 9.964655876159668, + "learning_rate": 1.999998631504737e-06, + "loss": 0.1634, + "num_input_tokens_seen": 13844096, + "step": 20565 + }, + { + "epoch": 0.5025285222192363, + "grad_norm": 1.1515722274780273, + "learning_rate": 1.9999984867866536e-06, + "loss": 0.1221, + "num_input_tokens_seen": 13847168, + "step": 20570 + }, + { + "epoch": 0.5026506730510835, + "grad_norm": 25.116619110107422, + "learning_rate": 1.999998334796313e-06, + "loss": 0.2725, + "num_input_tokens_seen": 13850496, + "step": 20575 + }, + { + "epoch": 0.5027728238829307, + "grad_norm": 5.306918144226074, + "learning_rate": 1.9999981755337148e-06, + "loss": 0.2294, + "num_input_tokens_seen": 13853568, + "step": 20580 + }, + { + "epoch": 0.5028949747147778, + "grad_norm": 7.240964889526367, + "learning_rate": 1.9999980089988614e-06, + "loss": 0.1153, + "num_input_tokens_seen": 13857216, + "step": 20585 + }, + { + "epoch": 0.5030171255466249, + "grad_norm": 21.508577346801758, + "learning_rate": 1.9999978351917536e-06, + "loss": 0.0272, + "num_input_tokens_seen": 13860608, + "step": 20590 + }, + { + "epoch": 0.5031392763784721, + "grad_norm": 0.7625007033348083, + "learning_rate": 1.999997654112392e-06, + "loss": 0.0345, + "num_input_tokens_seen": 13864448, + "step": 20595 + }, + { + "epoch": 0.5032614272103193, + "grad_norm": 24.227930068969727, + "learning_rate": 1.9999974657607787e-06, + "loss": 0.0895, + "num_input_tokens_seen": 13867904, + "step": 20600 + }, + { + "epoch": 0.5033835780421665, + "grad_norm": 46.96719741821289, + "learning_rate": 1.999997270136915e-06, + "loss": 0.2186, + "num_input_tokens_seen": 13871744, + "step": 20605 + }, + { + "epoch": 0.5035057288740137, + "grad_norm": 24.50628662109375, + "learning_rate": 1.9999970672408025e-06, + "loss": 0.0855, + "num_input_tokens_seen": 13875136, + "step": 20610 + }, + { + "epoch": 0.5036278797058608, + "grad_norm": 8.925004005432129, + "learning_rate": 1.999996857072442e-06, + "loss": 0.1191, + "num_input_tokens_seen": 13878464, + "step": 20615 + }, + { + "epoch": 0.503750030537708, + "grad_norm": 24.1169490814209, + "learning_rate": 1.9999966396318354e-06, + "loss": 0.1111, + "num_input_tokens_seen": 13881856, + "step": 20620 + }, + { + "epoch": 0.5038721813695551, + "grad_norm": 8.494999885559082, + "learning_rate": 1.9999964149189844e-06, + "loss": 0.0529, + "num_input_tokens_seen": 13885248, + "step": 20625 + }, + { + "epoch": 0.5039943322014023, + "grad_norm": 12.20933723449707, + "learning_rate": 1.99999618293389e-06, + "loss": 0.0501, + "num_input_tokens_seen": 13888640, + "step": 20630 + }, + { + "epoch": 0.5041164830332494, + "grad_norm": 22.99600601196289, + "learning_rate": 1.999995943676555e-06, + "loss": 0.2339, + "num_input_tokens_seen": 13891712, + "step": 20635 + }, + { + "epoch": 0.5042386338650966, + "grad_norm": 0.8674524426460266, + "learning_rate": 1.9999956971469804e-06, + "loss": 0.0618, + "num_input_tokens_seen": 13895360, + "step": 20640 + }, + { + "epoch": 0.5043607846969438, + "grad_norm": 3.513089656829834, + "learning_rate": 1.9999954433451676e-06, + "loss": 0.1164, + "num_input_tokens_seen": 13898240, + "step": 20645 + }, + { + "epoch": 0.504482935528791, + "grad_norm": 0.6792322993278503, + "learning_rate": 1.9999951822711196e-06, + "loss": 0.1748, + "num_input_tokens_seen": 13901888, + "step": 20650 + }, + { + "epoch": 0.5046050863606382, + "grad_norm": 10.531157493591309, + "learning_rate": 1.9999949139248376e-06, + "loss": 0.1279, + "num_input_tokens_seen": 13904896, + "step": 20655 + }, + { + "epoch": 0.5047272371924852, + "grad_norm": 16.625782012939453, + "learning_rate": 1.9999946383063233e-06, + "loss": 0.0989, + "num_input_tokens_seen": 13907584, + "step": 20660 + }, + { + "epoch": 0.5048493880243324, + "grad_norm": 39.52769088745117, + "learning_rate": 1.9999943554155792e-06, + "loss": 0.0806, + "num_input_tokens_seen": 13911104, + "step": 20665 + }, + { + "epoch": 0.5049715388561796, + "grad_norm": 11.926505088806152, + "learning_rate": 1.999994065252607e-06, + "loss": 0.1024, + "num_input_tokens_seen": 13914496, + "step": 20670 + }, + { + "epoch": 0.5050936896880268, + "grad_norm": 18.042837142944336, + "learning_rate": 1.9999937678174095e-06, + "loss": 0.0824, + "num_input_tokens_seen": 13917632, + "step": 20675 + }, + { + "epoch": 0.5052158405198739, + "grad_norm": 8.337514877319336, + "learning_rate": 1.999993463109988e-06, + "loss": 0.0504, + "num_input_tokens_seen": 13921280, + "step": 20680 + }, + { + "epoch": 0.5053379913517211, + "grad_norm": 25.05613136291504, + "learning_rate": 1.9999931511303454e-06, + "loss": 0.089, + "num_input_tokens_seen": 13924928, + "step": 20685 + }, + { + "epoch": 0.5054601421835683, + "grad_norm": 28.33561897277832, + "learning_rate": 1.999992831878483e-06, + "loss": 0.0856, + "num_input_tokens_seen": 13927744, + "step": 20690 + }, + { + "epoch": 0.5055822930154155, + "grad_norm": 24.654722213745117, + "learning_rate": 1.9999925053544042e-06, + "loss": 0.2061, + "num_input_tokens_seen": 13930944, + "step": 20695 + }, + { + "epoch": 0.5057044438472627, + "grad_norm": 10.103041648864746, + "learning_rate": 1.999992171558111e-06, + "loss": 0.1686, + "num_input_tokens_seen": 13933952, + "step": 20700 + }, + { + "epoch": 0.5058265946791097, + "grad_norm": 18.388208389282227, + "learning_rate": 1.9999918304896055e-06, + "loss": 0.1257, + "num_input_tokens_seen": 13937088, + "step": 20705 + }, + { + "epoch": 0.5059487455109569, + "grad_norm": 0.4605598747730255, + "learning_rate": 1.9999914821488907e-06, + "loss": 0.0984, + "num_input_tokens_seen": 13940096, + "step": 20710 + }, + { + "epoch": 0.5060708963428041, + "grad_norm": 27.575864791870117, + "learning_rate": 1.9999911265359687e-06, + "loss": 0.153, + "num_input_tokens_seen": 13943360, + "step": 20715 + }, + { + "epoch": 0.5061930471746513, + "grad_norm": 5.479243755340576, + "learning_rate": 1.9999907636508423e-06, + "loss": 0.0386, + "num_input_tokens_seen": 13947136, + "step": 20720 + }, + { + "epoch": 0.5063151980064984, + "grad_norm": 15.30483341217041, + "learning_rate": 1.999990393493514e-06, + "loss": 0.0095, + "num_input_tokens_seen": 13950784, + "step": 20725 + }, + { + "epoch": 0.5064373488383456, + "grad_norm": 18.604610443115234, + "learning_rate": 1.9999900160639867e-06, + "loss": 0.0739, + "num_input_tokens_seen": 13954048, + "step": 20730 + }, + { + "epoch": 0.5065594996701928, + "grad_norm": 0.8294208645820618, + "learning_rate": 1.999989631362263e-06, + "loss": 0.0863, + "num_input_tokens_seen": 13957568, + "step": 20735 + }, + { + "epoch": 0.50668165050204, + "grad_norm": 6.405564785003662, + "learning_rate": 1.999989239388346e-06, + "loss": 0.1229, + "num_input_tokens_seen": 13960896, + "step": 20740 + }, + { + "epoch": 0.5068038013338871, + "grad_norm": 141.67640686035156, + "learning_rate": 1.999988840142238e-06, + "loss": 0.1095, + "num_input_tokens_seen": 13964096, + "step": 20745 + }, + { + "epoch": 0.5069259521657342, + "grad_norm": 18.958993911743164, + "learning_rate": 1.999988433623942e-06, + "loss": 0.18, + "num_input_tokens_seen": 13967488, + "step": 20750 + }, + { + "epoch": 0.5070481029975814, + "grad_norm": 36.95416259765625, + "learning_rate": 1.9999880198334615e-06, + "loss": 0.22, + "num_input_tokens_seen": 13970496, + "step": 20755 + }, + { + "epoch": 0.5071702538294286, + "grad_norm": 0.5629600286483765, + "learning_rate": 1.999987598770799e-06, + "loss": 0.1957, + "num_input_tokens_seen": 13973824, + "step": 20760 + }, + { + "epoch": 0.5072924046612758, + "grad_norm": 40.47126007080078, + "learning_rate": 1.999987170435958e-06, + "loss": 0.2626, + "num_input_tokens_seen": 13976768, + "step": 20765 + }, + { + "epoch": 0.5074145554931229, + "grad_norm": 15.845697402954102, + "learning_rate": 1.999986734828941e-06, + "loss": 0.1409, + "num_input_tokens_seen": 13979712, + "step": 20770 + }, + { + "epoch": 0.50753670632497, + "grad_norm": 19.21963119506836, + "learning_rate": 1.9999862919497516e-06, + "loss": 0.2365, + "num_input_tokens_seen": 13982848, + "step": 20775 + }, + { + "epoch": 0.5076588571568172, + "grad_norm": 24.437908172607422, + "learning_rate": 1.9999858417983926e-06, + "loss": 0.1471, + "num_input_tokens_seen": 13985984, + "step": 20780 + }, + { + "epoch": 0.5077810079886644, + "grad_norm": 23.66278839111328, + "learning_rate": 1.999985384374868e-06, + "loss": 0.1118, + "num_input_tokens_seen": 13989632, + "step": 20785 + }, + { + "epoch": 0.5079031588205115, + "grad_norm": 28.34226417541504, + "learning_rate": 1.9999849196791806e-06, + "loss": 0.0515, + "num_input_tokens_seen": 13993280, + "step": 20790 + }, + { + "epoch": 0.5080253096523587, + "grad_norm": 2.3390464782714844, + "learning_rate": 1.999984447711334e-06, + "loss": 0.0598, + "num_input_tokens_seen": 13996544, + "step": 20795 + }, + { + "epoch": 0.5081474604842059, + "grad_norm": 5.47605037689209, + "learning_rate": 1.9999839684713317e-06, + "loss": 0.1112, + "num_input_tokens_seen": 13999680, + "step": 20800 + }, + { + "epoch": 0.5082696113160531, + "grad_norm": 0.12189412862062454, + "learning_rate": 1.9999834819591767e-06, + "loss": 0.1447, + "num_input_tokens_seen": 14002752, + "step": 20805 + }, + { + "epoch": 0.5083917621479003, + "grad_norm": 29.23406982421875, + "learning_rate": 1.9999829881748725e-06, + "loss": 0.164, + "num_input_tokens_seen": 14005760, + "step": 20810 + }, + { + "epoch": 0.5085139129797474, + "grad_norm": 21.18992805480957, + "learning_rate": 1.9999824871184237e-06, + "loss": 0.1718, + "num_input_tokens_seen": 14009088, + "step": 20815 + }, + { + "epoch": 0.5086360638115945, + "grad_norm": 20.333215713500977, + "learning_rate": 1.999981978789833e-06, + "loss": 0.085, + "num_input_tokens_seen": 14012544, + "step": 20820 + }, + { + "epoch": 0.5087582146434417, + "grad_norm": 11.833674430847168, + "learning_rate": 1.999981463189105e-06, + "loss": 0.1174, + "num_input_tokens_seen": 14016064, + "step": 20825 + }, + { + "epoch": 0.5088803654752889, + "grad_norm": 33.08906555175781, + "learning_rate": 1.999980940316242e-06, + "loss": 0.0498, + "num_input_tokens_seen": 14019520, + "step": 20830 + }, + { + "epoch": 0.509002516307136, + "grad_norm": 24.263809204101562, + "learning_rate": 1.9999804101712487e-06, + "loss": 0.1354, + "num_input_tokens_seen": 14022528, + "step": 20835 + }, + { + "epoch": 0.5091246671389832, + "grad_norm": 0.9982167482376099, + "learning_rate": 1.9999798727541293e-06, + "loss": 0.1094, + "num_input_tokens_seen": 14025856, + "step": 20840 + }, + { + "epoch": 0.5092468179708304, + "grad_norm": 15.391318321228027, + "learning_rate": 1.9999793280648873e-06, + "loss": 0.1613, + "num_input_tokens_seen": 14029056, + "step": 20845 + }, + { + "epoch": 0.5093689688026776, + "grad_norm": 4.251640319824219, + "learning_rate": 1.9999787761035265e-06, + "loss": 0.0166, + "num_input_tokens_seen": 14032448, + "step": 20850 + }, + { + "epoch": 0.5094911196345248, + "grad_norm": 17.91326904296875, + "learning_rate": 1.9999782168700507e-06, + "loss": 0.1246, + "num_input_tokens_seen": 14035712, + "step": 20855 + }, + { + "epoch": 0.5096132704663718, + "grad_norm": 8.680523872375488, + "learning_rate": 1.999977650364465e-06, + "loss": 0.1157, + "num_input_tokens_seen": 14039168, + "step": 20860 + }, + { + "epoch": 0.509735421298219, + "grad_norm": 3.187202215194702, + "learning_rate": 1.9999770765867725e-06, + "loss": 0.0137, + "num_input_tokens_seen": 14042624, + "step": 20865 + }, + { + "epoch": 0.5098575721300662, + "grad_norm": 10.735001564025879, + "learning_rate": 1.9999764955369777e-06, + "loss": 0.2449, + "num_input_tokens_seen": 14046016, + "step": 20870 + }, + { + "epoch": 0.5099797229619134, + "grad_norm": 6.723996639251709, + "learning_rate": 1.9999759072150852e-06, + "loss": 0.1225, + "num_input_tokens_seen": 14049472, + "step": 20875 + }, + { + "epoch": 0.5101018737937605, + "grad_norm": 18.004169464111328, + "learning_rate": 1.9999753116210986e-06, + "loss": 0.117, + "num_input_tokens_seen": 14053056, + "step": 20880 + }, + { + "epoch": 0.5102240246256077, + "grad_norm": 21.24515151977539, + "learning_rate": 1.999974708755023e-06, + "loss": 0.148, + "num_input_tokens_seen": 14056512, + "step": 20885 + }, + { + "epoch": 0.5103461754574549, + "grad_norm": 21.89750099182129, + "learning_rate": 1.9999740986168617e-06, + "loss": 0.1261, + "num_input_tokens_seen": 14059904, + "step": 20890 + }, + { + "epoch": 0.510468326289302, + "grad_norm": 16.346757888793945, + "learning_rate": 1.9999734812066203e-06, + "loss": 0.0516, + "num_input_tokens_seen": 14063744, + "step": 20895 + }, + { + "epoch": 0.5105904771211492, + "grad_norm": 19.123580932617188, + "learning_rate": 1.9999728565243025e-06, + "loss": 0.1038, + "num_input_tokens_seen": 14067264, + "step": 20900 + }, + { + "epoch": 0.5107126279529963, + "grad_norm": 43.41123580932617, + "learning_rate": 1.9999722245699134e-06, + "loss": 0.1236, + "num_input_tokens_seen": 14070656, + "step": 20905 + }, + { + "epoch": 0.5108347787848435, + "grad_norm": 16.726806640625, + "learning_rate": 1.999971585343457e-06, + "loss": 0.1776, + "num_input_tokens_seen": 14074048, + "step": 20910 + }, + { + "epoch": 0.5109569296166907, + "grad_norm": 0.432910293340683, + "learning_rate": 1.999970938844939e-06, + "loss": 0.0515, + "num_input_tokens_seen": 14077312, + "step": 20915 + }, + { + "epoch": 0.5110790804485379, + "grad_norm": 28.468435287475586, + "learning_rate": 1.9999702850743623e-06, + "loss": 0.1006, + "num_input_tokens_seen": 14080192, + "step": 20920 + }, + { + "epoch": 0.511201231280385, + "grad_norm": 24.04837417602539, + "learning_rate": 1.9999696240317335e-06, + "loss": 0.085, + "num_input_tokens_seen": 14083392, + "step": 20925 + }, + { + "epoch": 0.5113233821122322, + "grad_norm": 43.350826263427734, + "learning_rate": 1.9999689557170562e-06, + "loss": 0.1012, + "num_input_tokens_seen": 14087296, + "step": 20930 + }, + { + "epoch": 0.5114455329440794, + "grad_norm": 43.21065139770508, + "learning_rate": 1.999968280130336e-06, + "loss": 0.0761, + "num_input_tokens_seen": 14090624, + "step": 20935 + }, + { + "epoch": 0.5115676837759265, + "grad_norm": 8.68442440032959, + "learning_rate": 1.9999675972715774e-06, + "loss": 0.0917, + "num_input_tokens_seen": 14094272, + "step": 20940 + }, + { + "epoch": 0.5116898346077737, + "grad_norm": 10.342585563659668, + "learning_rate": 1.9999669071407856e-06, + "loss": 0.2236, + "num_input_tokens_seen": 14097280, + "step": 20945 + }, + { + "epoch": 0.5118119854396208, + "grad_norm": 0.07209543883800507, + "learning_rate": 1.9999662097379652e-06, + "loss": 0.0352, + "num_input_tokens_seen": 14100480, + "step": 20950 + }, + { + "epoch": 0.511934136271468, + "grad_norm": 49.62693405151367, + "learning_rate": 1.9999655050631218e-06, + "loss": 0.1376, + "num_input_tokens_seen": 14103680, + "step": 20955 + }, + { + "epoch": 0.5120562871033152, + "grad_norm": 18.054792404174805, + "learning_rate": 1.99996479311626e-06, + "loss": 0.3091, + "num_input_tokens_seen": 14106944, + "step": 20960 + }, + { + "epoch": 0.5121784379351624, + "grad_norm": 4.0170722007751465, + "learning_rate": 1.9999640738973856e-06, + "loss": 0.2194, + "num_input_tokens_seen": 14110336, + "step": 20965 + }, + { + "epoch": 0.5123005887670095, + "grad_norm": 8.806989669799805, + "learning_rate": 1.9999633474065034e-06, + "loss": 0.1982, + "num_input_tokens_seen": 14113728, + "step": 20970 + }, + { + "epoch": 0.5124227395988566, + "grad_norm": 2.0078670978546143, + "learning_rate": 1.9999626136436185e-06, + "loss": 0.0321, + "num_input_tokens_seen": 14116864, + "step": 20975 + }, + { + "epoch": 0.5125448904307038, + "grad_norm": 13.33098316192627, + "learning_rate": 1.9999618726087373e-06, + "loss": 0.1371, + "num_input_tokens_seen": 14120000, + "step": 20980 + }, + { + "epoch": 0.512667041262551, + "grad_norm": 2.074002742767334, + "learning_rate": 1.999961124301864e-06, + "loss": 0.0286, + "num_input_tokens_seen": 14123200, + "step": 20985 + }, + { + "epoch": 0.5127891920943982, + "grad_norm": 3.3571574687957764, + "learning_rate": 1.9999603687230037e-06, + "loss": 0.1198, + "num_input_tokens_seen": 14126720, + "step": 20990 + }, + { + "epoch": 0.5129113429262453, + "grad_norm": 28.249244689941406, + "learning_rate": 1.9999596058721634e-06, + "loss": 0.1523, + "num_input_tokens_seen": 14130048, + "step": 20995 + }, + { + "epoch": 0.5130334937580925, + "grad_norm": 23.86388397216797, + "learning_rate": 1.999958835749348e-06, + "loss": 0.1031, + "num_input_tokens_seen": 14133184, + "step": 21000 + }, + { + "epoch": 0.5131556445899397, + "grad_norm": 23.31868553161621, + "learning_rate": 1.9999580583545625e-06, + "loss": 0.1792, + "num_input_tokens_seen": 14136768, + "step": 21005 + }, + { + "epoch": 0.5132777954217869, + "grad_norm": 0.21132534742355347, + "learning_rate": 1.9999572736878134e-06, + "loss": 0.0683, + "num_input_tokens_seen": 14140160, + "step": 21010 + }, + { + "epoch": 0.5133999462536339, + "grad_norm": 23.453798294067383, + "learning_rate": 1.999956481749106e-06, + "loss": 0.1488, + "num_input_tokens_seen": 14143744, + "step": 21015 + }, + { + "epoch": 0.5135220970854811, + "grad_norm": 14.550957679748535, + "learning_rate": 1.999955682538446e-06, + "loss": 0.0436, + "num_input_tokens_seen": 14147072, + "step": 21020 + }, + { + "epoch": 0.5136442479173283, + "grad_norm": 4.155126571655273, + "learning_rate": 1.9999548760558395e-06, + "loss": 0.0647, + "num_input_tokens_seen": 14149952, + "step": 21025 + }, + { + "epoch": 0.5137663987491755, + "grad_norm": 10.049470901489258, + "learning_rate": 1.9999540623012917e-06, + "loss": 0.1996, + "num_input_tokens_seen": 14153216, + "step": 21030 + }, + { + "epoch": 0.5138885495810227, + "grad_norm": 0.2116897702217102, + "learning_rate": 1.9999532412748093e-06, + "loss": 0.0563, + "num_input_tokens_seen": 14156352, + "step": 21035 + }, + { + "epoch": 0.5140107004128698, + "grad_norm": 52.188636779785156, + "learning_rate": 1.9999524129763983e-06, + "loss": 0.1079, + "num_input_tokens_seen": 14159808, + "step": 21040 + }, + { + "epoch": 0.514132851244717, + "grad_norm": 27.014354705810547, + "learning_rate": 1.999951577406064e-06, + "loss": 0.1015, + "num_input_tokens_seen": 14163328, + "step": 21045 + }, + { + "epoch": 0.5142550020765642, + "grad_norm": 11.840407371520996, + "learning_rate": 1.9999507345638132e-06, + "loss": 0.1002, + "num_input_tokens_seen": 14166976, + "step": 21050 + }, + { + "epoch": 0.5143771529084114, + "grad_norm": 40.124752044677734, + "learning_rate": 1.9999498844496515e-06, + "loss": 0.1809, + "num_input_tokens_seen": 14170048, + "step": 21055 + }, + { + "epoch": 0.5144993037402584, + "grad_norm": 60.805362701416016, + "learning_rate": 1.999949027063585e-06, + "loss": 0.2079, + "num_input_tokens_seen": 14173312, + "step": 21060 + }, + { + "epoch": 0.5146214545721056, + "grad_norm": 73.4646224975586, + "learning_rate": 1.999948162405621e-06, + "loss": 0.2025, + "num_input_tokens_seen": 14176832, + "step": 21065 + }, + { + "epoch": 0.5147436054039528, + "grad_norm": 39.85783386230469, + "learning_rate": 1.9999472904757644e-06, + "loss": 0.1187, + "num_input_tokens_seen": 14179712, + "step": 21070 + }, + { + "epoch": 0.5148657562358, + "grad_norm": 0.24146099388599396, + "learning_rate": 1.999946411274022e-06, + "loss": 0.0518, + "num_input_tokens_seen": 14182912, + "step": 21075 + }, + { + "epoch": 0.5149879070676471, + "grad_norm": 27.32053565979004, + "learning_rate": 1.9999455248004005e-06, + "loss": 0.057, + "num_input_tokens_seen": 14185984, + "step": 21080 + }, + { + "epoch": 0.5151100578994943, + "grad_norm": 31.766202926635742, + "learning_rate": 1.9999446310549067e-06, + "loss": 0.2056, + "num_input_tokens_seen": 14189504, + "step": 21085 + }, + { + "epoch": 0.5152322087313415, + "grad_norm": 3.2112789154052734, + "learning_rate": 1.9999437300375457e-06, + "loss": 0.0752, + "num_input_tokens_seen": 14192640, + "step": 21090 + }, + { + "epoch": 0.5153543595631886, + "grad_norm": 31.221282958984375, + "learning_rate": 1.9999428217483256e-06, + "loss": 0.1698, + "num_input_tokens_seen": 14196160, + "step": 21095 + }, + { + "epoch": 0.5154765103950358, + "grad_norm": 4.7101359367370605, + "learning_rate": 1.9999419061872526e-06, + "loss": 0.1077, + "num_input_tokens_seen": 14200000, + "step": 21100 + }, + { + "epoch": 0.5155986612268829, + "grad_norm": 26.364850997924805, + "learning_rate": 1.9999409833543327e-06, + "loss": 0.1557, + "num_input_tokens_seen": 14203200, + "step": 21105 + }, + { + "epoch": 0.5157208120587301, + "grad_norm": 0.626477062702179, + "learning_rate": 1.9999400532495728e-06, + "loss": 0.0347, + "num_input_tokens_seen": 14206720, + "step": 21110 + }, + { + "epoch": 0.5158429628905773, + "grad_norm": 10.539756774902344, + "learning_rate": 1.99993911587298e-06, + "loss": 0.2105, + "num_input_tokens_seen": 14210048, + "step": 21115 + }, + { + "epoch": 0.5159651137224245, + "grad_norm": 3.132664203643799, + "learning_rate": 1.9999381712245613e-06, + "loss": 0.2227, + "num_input_tokens_seen": 14213504, + "step": 21120 + }, + { + "epoch": 0.5160872645542716, + "grad_norm": 31.730087280273438, + "learning_rate": 1.999937219304323e-06, + "loss": 0.1199, + "num_input_tokens_seen": 14217088, + "step": 21125 + }, + { + "epoch": 0.5162094153861188, + "grad_norm": 0.2630053162574768, + "learning_rate": 1.999936260112272e-06, + "loss": 0.0566, + "num_input_tokens_seen": 14220608, + "step": 21130 + }, + { + "epoch": 0.5163315662179659, + "grad_norm": 33.741310119628906, + "learning_rate": 1.999935293648416e-06, + "loss": 0.2041, + "num_input_tokens_seen": 14224128, + "step": 21135 + }, + { + "epoch": 0.5164537170498131, + "grad_norm": 39.81195831298828, + "learning_rate": 1.9999343199127616e-06, + "loss": 0.126, + "num_input_tokens_seen": 14227136, + "step": 21140 + }, + { + "epoch": 0.5165758678816603, + "grad_norm": 37.612003326416016, + "learning_rate": 1.9999333389053157e-06, + "loss": 0.1738, + "num_input_tokens_seen": 14230528, + "step": 21145 + }, + { + "epoch": 0.5166980187135074, + "grad_norm": 35.23280715942383, + "learning_rate": 1.999932350626086e-06, + "loss": 0.1324, + "num_input_tokens_seen": 14233920, + "step": 21150 + }, + { + "epoch": 0.5168201695453546, + "grad_norm": 0.28483980894088745, + "learning_rate": 1.999931355075079e-06, + "loss": 0.0723, + "num_input_tokens_seen": 14236928, + "step": 21155 + }, + { + "epoch": 0.5169423203772018, + "grad_norm": 27.012868881225586, + "learning_rate": 1.999930352252302e-06, + "loss": 0.1253, + "num_input_tokens_seen": 14240512, + "step": 21160 + }, + { + "epoch": 0.517064471209049, + "grad_norm": 3.816581964492798, + "learning_rate": 1.999929342157763e-06, + "loss": 0.0818, + "num_input_tokens_seen": 14243968, + "step": 21165 + }, + { + "epoch": 0.517186622040896, + "grad_norm": 18.7530574798584, + "learning_rate": 1.9999283247914684e-06, + "loss": 0.1234, + "num_input_tokens_seen": 14247040, + "step": 21170 + }, + { + "epoch": 0.5173087728727432, + "grad_norm": 0.8007626533508301, + "learning_rate": 1.9999273001534265e-06, + "loss": 0.058, + "num_input_tokens_seen": 14250112, + "step": 21175 + }, + { + "epoch": 0.5174309237045904, + "grad_norm": 2.104343891143799, + "learning_rate": 1.999926268243644e-06, + "loss": 0.0567, + "num_input_tokens_seen": 14254144, + "step": 21180 + }, + { + "epoch": 0.5175530745364376, + "grad_norm": 31.472410202026367, + "learning_rate": 1.999925229062129e-06, + "loss": 0.0362, + "num_input_tokens_seen": 14257344, + "step": 21185 + }, + { + "epoch": 0.5176752253682848, + "grad_norm": 0.5018709301948547, + "learning_rate": 1.9999241826088883e-06, + "loss": 0.0792, + "num_input_tokens_seen": 14260672, + "step": 21190 + }, + { + "epoch": 0.5177973762001319, + "grad_norm": 13.426669120788574, + "learning_rate": 1.9999231288839303e-06, + "loss": 0.0565, + "num_input_tokens_seen": 14263936, + "step": 21195 + }, + { + "epoch": 0.5179195270319791, + "grad_norm": 4.62431526184082, + "learning_rate": 1.9999220678872626e-06, + "loss": 0.1347, + "num_input_tokens_seen": 14267264, + "step": 21200 + }, + { + "epoch": 0.5180416778638263, + "grad_norm": 30.063844680786133, + "learning_rate": 1.9999209996188924e-06, + "loss": 0.2543, + "num_input_tokens_seen": 14270720, + "step": 21205 + }, + { + "epoch": 0.5181638286956735, + "grad_norm": 4.680274486541748, + "learning_rate": 1.9999199240788282e-06, + "loss": 0.22, + "num_input_tokens_seen": 14274112, + "step": 21210 + }, + { + "epoch": 0.5182859795275205, + "grad_norm": 67.60924530029297, + "learning_rate": 1.999918841267077e-06, + "loss": 0.199, + "num_input_tokens_seen": 14277376, + "step": 21215 + }, + { + "epoch": 0.5184081303593677, + "grad_norm": 3.24830961227417, + "learning_rate": 1.9999177511836467e-06, + "loss": 0.134, + "num_input_tokens_seen": 14280896, + "step": 21220 + }, + { + "epoch": 0.5185302811912149, + "grad_norm": 24.1802921295166, + "learning_rate": 1.9999166538285463e-06, + "loss": 0.1493, + "num_input_tokens_seen": 14284224, + "step": 21225 + }, + { + "epoch": 0.5186524320230621, + "grad_norm": 7.4385504722595215, + "learning_rate": 1.9999155492017824e-06, + "loss": 0.0893, + "num_input_tokens_seen": 14287488, + "step": 21230 + }, + { + "epoch": 0.5187745828549093, + "grad_norm": 0.4136931002140045, + "learning_rate": 1.999914437303364e-06, + "loss": 0.0955, + "num_input_tokens_seen": 14290560, + "step": 21235 + }, + { + "epoch": 0.5188967336867564, + "grad_norm": 13.242329597473145, + "learning_rate": 1.9999133181332984e-06, + "loss": 0.0706, + "num_input_tokens_seen": 14294272, + "step": 21240 + }, + { + "epoch": 0.5190188845186036, + "grad_norm": 13.405590057373047, + "learning_rate": 1.9999121916915948e-06, + "loss": 0.1165, + "num_input_tokens_seen": 14298048, + "step": 21245 + }, + { + "epoch": 0.5191410353504508, + "grad_norm": 23.771085739135742, + "learning_rate": 1.9999110579782607e-06, + "loss": 0.0853, + "num_input_tokens_seen": 14301184, + "step": 21250 + }, + { + "epoch": 0.5192631861822979, + "grad_norm": 10.134591102600098, + "learning_rate": 1.9999099169933046e-06, + "loss": 0.0565, + "num_input_tokens_seen": 14304128, + "step": 21255 + }, + { + "epoch": 0.519385337014145, + "grad_norm": 25.851348876953125, + "learning_rate": 1.999908768736734e-06, + "loss": 0.0888, + "num_input_tokens_seen": 14307648, + "step": 21260 + }, + { + "epoch": 0.5195074878459922, + "grad_norm": 23.614892959594727, + "learning_rate": 1.9999076132085582e-06, + "loss": 0.1576, + "num_input_tokens_seen": 14310592, + "step": 21265 + }, + { + "epoch": 0.5196296386778394, + "grad_norm": 4.225952625274658, + "learning_rate": 1.9999064504087853e-06, + "loss": 0.1492, + "num_input_tokens_seen": 14313728, + "step": 21270 + }, + { + "epoch": 0.5197517895096866, + "grad_norm": 29.918014526367188, + "learning_rate": 1.9999052803374243e-06, + "loss": 0.2074, + "num_input_tokens_seen": 14317056, + "step": 21275 + }, + { + "epoch": 0.5198739403415338, + "grad_norm": 23.858491897583008, + "learning_rate": 1.9999041029944827e-06, + "loss": 0.1367, + "num_input_tokens_seen": 14319936, + "step": 21280 + }, + { + "epoch": 0.5199960911733809, + "grad_norm": 8.586871147155762, + "learning_rate": 1.9999029183799696e-06, + "loss": 0.1315, + "num_input_tokens_seen": 14323072, + "step": 21285 + }, + { + "epoch": 0.520118242005228, + "grad_norm": 6.292385101318359, + "learning_rate": 1.9999017264938933e-06, + "loss": 0.1303, + "num_input_tokens_seen": 14326336, + "step": 21290 + }, + { + "epoch": 0.5202403928370752, + "grad_norm": 5.245234489440918, + "learning_rate": 1.9999005273362628e-06, + "loss": 0.0619, + "num_input_tokens_seen": 14329792, + "step": 21295 + }, + { + "epoch": 0.5203625436689224, + "grad_norm": 3.494668960571289, + "learning_rate": 1.9998993209070865e-06, + "loss": 0.1505, + "num_input_tokens_seen": 14332864, + "step": 21300 + }, + { + "epoch": 0.5204846945007695, + "grad_norm": 5.071745872497559, + "learning_rate": 1.9998981072063738e-06, + "loss": 0.1236, + "num_input_tokens_seen": 14336256, + "step": 21305 + }, + { + "epoch": 0.5206068453326167, + "grad_norm": 10.048419952392578, + "learning_rate": 1.999896886234133e-06, + "loss": 0.1244, + "num_input_tokens_seen": 14339904, + "step": 21310 + }, + { + "epoch": 0.5207289961644639, + "grad_norm": 15.470492362976074, + "learning_rate": 1.9998956579903733e-06, + "loss": 0.2658, + "num_input_tokens_seen": 14343424, + "step": 21315 + }, + { + "epoch": 0.5208511469963111, + "grad_norm": 18.155609130859375, + "learning_rate": 1.999894422475103e-06, + "loss": 0.0877, + "num_input_tokens_seen": 14346880, + "step": 21320 + }, + { + "epoch": 0.5209732978281582, + "grad_norm": 0.9843332171440125, + "learning_rate": 1.9998931796883316e-06, + "loss": 0.0885, + "num_input_tokens_seen": 14350336, + "step": 21325 + }, + { + "epoch": 0.5210954486600053, + "grad_norm": 25.281551361083984, + "learning_rate": 1.9998919296300684e-06, + "loss": 0.1958, + "num_input_tokens_seen": 14353600, + "step": 21330 + }, + { + "epoch": 0.5212175994918525, + "grad_norm": 21.323984146118164, + "learning_rate": 1.9998906723003216e-06, + "loss": 0.194, + "num_input_tokens_seen": 14356928, + "step": 21335 + }, + { + "epoch": 0.5213397503236997, + "grad_norm": 3.6292245388031006, + "learning_rate": 1.9998894076991015e-06, + "loss": 0.1148, + "num_input_tokens_seen": 14361024, + "step": 21340 + }, + { + "epoch": 0.5214619011555469, + "grad_norm": 1.0466417074203491, + "learning_rate": 1.9998881358264165e-06, + "loss": 0.1752, + "num_input_tokens_seen": 14364416, + "step": 21345 + }, + { + "epoch": 0.521584051987394, + "grad_norm": 15.274713516235352, + "learning_rate": 1.9998868566822756e-06, + "loss": 0.1184, + "num_input_tokens_seen": 14367936, + "step": 21350 + }, + { + "epoch": 0.5217062028192412, + "grad_norm": 14.416821479797363, + "learning_rate": 1.9998855702666892e-06, + "loss": 0.0963, + "num_input_tokens_seen": 14371392, + "step": 21355 + }, + { + "epoch": 0.5218283536510884, + "grad_norm": 5.276510238647461, + "learning_rate": 1.9998842765796656e-06, + "loss": 0.0451, + "num_input_tokens_seen": 14374976, + "step": 21360 + }, + { + "epoch": 0.5219505044829356, + "grad_norm": 3.4116029739379883, + "learning_rate": 1.9998829756212147e-06, + "loss": 0.0914, + "num_input_tokens_seen": 14378752, + "step": 21365 + }, + { + "epoch": 0.5220726553147826, + "grad_norm": 5.307886600494385, + "learning_rate": 1.999881667391346e-06, + "loss": 0.1199, + "num_input_tokens_seen": 14381760, + "step": 21370 + }, + { + "epoch": 0.5221948061466298, + "grad_norm": 9.030430793762207, + "learning_rate": 1.9998803518900687e-06, + "loss": 0.1328, + "num_input_tokens_seen": 14385344, + "step": 21375 + }, + { + "epoch": 0.522316956978477, + "grad_norm": 67.31908416748047, + "learning_rate": 1.999879029117393e-06, + "loss": 0.2202, + "num_input_tokens_seen": 14389120, + "step": 21380 + }, + { + "epoch": 0.5224391078103242, + "grad_norm": 1.338660478591919, + "learning_rate": 1.9998776990733274e-06, + "loss": 0.0601, + "num_input_tokens_seen": 14392832, + "step": 21385 + }, + { + "epoch": 0.5225612586421714, + "grad_norm": 13.9439115524292, + "learning_rate": 1.9998763617578825e-06, + "loss": 0.2013, + "num_input_tokens_seen": 14396544, + "step": 21390 + }, + { + "epoch": 0.5226834094740185, + "grad_norm": 6.826702117919922, + "learning_rate": 1.999875017171068e-06, + "loss": 0.0907, + "num_input_tokens_seen": 14399552, + "step": 21395 + }, + { + "epoch": 0.5228055603058657, + "grad_norm": 36.58500671386719, + "learning_rate": 1.999873665312893e-06, + "loss": 0.1445, + "num_input_tokens_seen": 14402752, + "step": 21400 + }, + { + "epoch": 0.5229277111377129, + "grad_norm": 3.132948160171509, + "learning_rate": 1.9998723061833676e-06, + "loss": 0.174, + "num_input_tokens_seen": 14406144, + "step": 21405 + }, + { + "epoch": 0.52304986196956, + "grad_norm": 17.92841148376465, + "learning_rate": 1.9998709397825024e-06, + "loss": 0.0878, + "num_input_tokens_seen": 14409344, + "step": 21410 + }, + { + "epoch": 0.5231720128014071, + "grad_norm": 9.672657012939453, + "learning_rate": 1.999869566110307e-06, + "loss": 0.067, + "num_input_tokens_seen": 14412608, + "step": 21415 + }, + { + "epoch": 0.5232941636332543, + "grad_norm": 15.635049819946289, + "learning_rate": 1.9998681851667903e-06, + "loss": 0.1183, + "num_input_tokens_seen": 14415744, + "step": 21420 + }, + { + "epoch": 0.5234163144651015, + "grad_norm": 9.510732650756836, + "learning_rate": 1.999866796951964e-06, + "loss": 0.1525, + "num_input_tokens_seen": 14418752, + "step": 21425 + }, + { + "epoch": 0.5235384652969487, + "grad_norm": 16.59809684753418, + "learning_rate": 1.999865401465837e-06, + "loss": 0.2345, + "num_input_tokens_seen": 14421696, + "step": 21430 + }, + { + "epoch": 0.5236606161287959, + "grad_norm": 17.911949157714844, + "learning_rate": 1.9998639987084204e-06, + "loss": 0.1961, + "num_input_tokens_seen": 14424960, + "step": 21435 + }, + { + "epoch": 0.523782766960643, + "grad_norm": 2.501218557357788, + "learning_rate": 1.9998625886797235e-06, + "loss": 0.0777, + "num_input_tokens_seen": 14428608, + "step": 21440 + }, + { + "epoch": 0.5239049177924902, + "grad_norm": 1.355796456336975, + "learning_rate": 1.9998611713797566e-06, + "loss": 0.1527, + "num_input_tokens_seen": 14431680, + "step": 21445 + }, + { + "epoch": 0.5240270686243373, + "grad_norm": 19.79297637939453, + "learning_rate": 1.9998597468085306e-06, + "loss": 0.148, + "num_input_tokens_seen": 14434944, + "step": 21450 + }, + { + "epoch": 0.5241492194561845, + "grad_norm": 18.022729873657227, + "learning_rate": 1.9998583149660558e-06, + "loss": 0.1219, + "num_input_tokens_seen": 14437952, + "step": 21455 + }, + { + "epoch": 0.5242713702880316, + "grad_norm": 18.11164093017578, + "learning_rate": 1.999856875852342e-06, + "loss": 0.1344, + "num_input_tokens_seen": 14440896, + "step": 21460 + }, + { + "epoch": 0.5243935211198788, + "grad_norm": 2.50659441947937, + "learning_rate": 1.9998554294674005e-06, + "loss": 0.0374, + "num_input_tokens_seen": 14444096, + "step": 21465 + }, + { + "epoch": 0.524515671951726, + "grad_norm": 6.1895365715026855, + "learning_rate": 1.9998539758112413e-06, + "loss": 0.2057, + "num_input_tokens_seen": 14447232, + "step": 21470 + }, + { + "epoch": 0.5246378227835732, + "grad_norm": 28.5371036529541, + "learning_rate": 1.999852514883875e-06, + "loss": 0.1155, + "num_input_tokens_seen": 14450624, + "step": 21475 + }, + { + "epoch": 0.5247599736154204, + "grad_norm": 19.53989601135254, + "learning_rate": 1.999851046685312e-06, + "loss": 0.142, + "num_input_tokens_seen": 14453824, + "step": 21480 + }, + { + "epoch": 0.5248821244472675, + "grad_norm": 5.390257835388184, + "learning_rate": 1.9998495712155638e-06, + "loss": 0.0319, + "num_input_tokens_seen": 14457088, + "step": 21485 + }, + { + "epoch": 0.5250042752791146, + "grad_norm": 0.5065277218818665, + "learning_rate": 1.9998480884746403e-06, + "loss": 0.0508, + "num_input_tokens_seen": 14460032, + "step": 21490 + }, + { + "epoch": 0.5251264261109618, + "grad_norm": 0.2909560203552246, + "learning_rate": 1.9998465984625526e-06, + "loss": 0.1255, + "num_input_tokens_seen": 14463744, + "step": 21495 + }, + { + "epoch": 0.525248576942809, + "grad_norm": 31.88382911682129, + "learning_rate": 1.9998451011793113e-06, + "loss": 0.1094, + "num_input_tokens_seen": 14467072, + "step": 21500 + }, + { + "epoch": 0.5253707277746561, + "grad_norm": 7.6989898681640625, + "learning_rate": 1.999843596624928e-06, + "loss": 0.0689, + "num_input_tokens_seen": 14470272, + "step": 21505 + }, + { + "epoch": 0.5254928786065033, + "grad_norm": 2.5370209217071533, + "learning_rate": 1.999842084799413e-06, + "loss": 0.012, + "num_input_tokens_seen": 14474048, + "step": 21510 + }, + { + "epoch": 0.5256150294383505, + "grad_norm": 31.16155433654785, + "learning_rate": 1.999840565702777e-06, + "loss": 0.0521, + "num_input_tokens_seen": 14477760, + "step": 21515 + }, + { + "epoch": 0.5257371802701977, + "grad_norm": 0.8267857432365417, + "learning_rate": 1.9998390393350318e-06, + "loss": 0.3199, + "num_input_tokens_seen": 14481216, + "step": 21520 + }, + { + "epoch": 0.5258593311020449, + "grad_norm": 14.096746444702148, + "learning_rate": 1.9998375056961877e-06, + "loss": 0.0071, + "num_input_tokens_seen": 14484416, + "step": 21525 + }, + { + "epoch": 0.5259814819338919, + "grad_norm": 10.87549114227295, + "learning_rate": 1.999835964786257e-06, + "loss": 0.1347, + "num_input_tokens_seen": 14488000, + "step": 21530 + }, + { + "epoch": 0.5261036327657391, + "grad_norm": 48.07831954956055, + "learning_rate": 1.99983441660525e-06, + "loss": 0.2018, + "num_input_tokens_seen": 14491264, + "step": 21535 + }, + { + "epoch": 0.5262257835975863, + "grad_norm": 0.5932395458221436, + "learning_rate": 1.9998328611531783e-06, + "loss": 0.1217, + "num_input_tokens_seen": 14494784, + "step": 21540 + }, + { + "epoch": 0.5263479344294335, + "grad_norm": 28.88714599609375, + "learning_rate": 1.9998312984300527e-06, + "loss": 0.2129, + "num_input_tokens_seen": 14497856, + "step": 21545 + }, + { + "epoch": 0.5264700852612806, + "grad_norm": 1.3318819999694824, + "learning_rate": 1.9998297284358854e-06, + "loss": 0.1674, + "num_input_tokens_seen": 14501440, + "step": 21550 + }, + { + "epoch": 0.5265922360931278, + "grad_norm": 0.5254182815551758, + "learning_rate": 1.9998281511706874e-06, + "loss": 0.1212, + "num_input_tokens_seen": 14504832, + "step": 21555 + }, + { + "epoch": 0.526714386924975, + "grad_norm": 1.647344708442688, + "learning_rate": 1.99982656663447e-06, + "loss": 0.0934, + "num_input_tokens_seen": 14508352, + "step": 21560 + }, + { + "epoch": 0.5268365377568222, + "grad_norm": 1.297066569328308, + "learning_rate": 1.999824974827245e-06, + "loss": 0.1684, + "num_input_tokens_seen": 14511552, + "step": 21565 + }, + { + "epoch": 0.5269586885886693, + "grad_norm": 41.3592643737793, + "learning_rate": 1.9998233757490237e-06, + "loss": 0.1697, + "num_input_tokens_seen": 14514752, + "step": 21570 + }, + { + "epoch": 0.5270808394205164, + "grad_norm": 8.706243515014648, + "learning_rate": 1.9998217693998177e-06, + "loss": 0.0283, + "num_input_tokens_seen": 14517824, + "step": 21575 + }, + { + "epoch": 0.5272029902523636, + "grad_norm": 9.767743110656738, + "learning_rate": 1.9998201557796395e-06, + "loss": 0.0999, + "num_input_tokens_seen": 14521216, + "step": 21580 + }, + { + "epoch": 0.5273251410842108, + "grad_norm": 32.12539291381836, + "learning_rate": 1.9998185348885e-06, + "loss": 0.0718, + "num_input_tokens_seen": 14524800, + "step": 21585 + }, + { + "epoch": 0.527447291916058, + "grad_norm": 1.750207543373108, + "learning_rate": 1.999816906726411e-06, + "loss": 0.0716, + "num_input_tokens_seen": 14528640, + "step": 21590 + }, + { + "epoch": 0.5275694427479051, + "grad_norm": 13.995022773742676, + "learning_rate": 1.9998152712933846e-06, + "loss": 0.0889, + "num_input_tokens_seen": 14531968, + "step": 21595 + }, + { + "epoch": 0.5276915935797523, + "grad_norm": 36.821590423583984, + "learning_rate": 1.9998136285894326e-06, + "loss": 0.0779, + "num_input_tokens_seen": 14535296, + "step": 21600 + }, + { + "epoch": 0.5278137444115995, + "grad_norm": 2.830824613571167, + "learning_rate": 1.999811978614567e-06, + "loss": 0.1281, + "num_input_tokens_seen": 14538944, + "step": 21605 + }, + { + "epoch": 0.5279358952434466, + "grad_norm": 17.418807983398438, + "learning_rate": 1.9998103213687994e-06, + "loss": 0.1101, + "num_input_tokens_seen": 14542464, + "step": 21610 + }, + { + "epoch": 0.5280580460752937, + "grad_norm": 25.319671630859375, + "learning_rate": 1.9998086568521426e-06, + "loss": 0.1037, + "num_input_tokens_seen": 14545664, + "step": 21615 + }, + { + "epoch": 0.5281801969071409, + "grad_norm": 0.47470173239707947, + "learning_rate": 1.9998069850646084e-06, + "loss": 0.2195, + "num_input_tokens_seen": 14549120, + "step": 21620 + }, + { + "epoch": 0.5283023477389881, + "grad_norm": 52.587093353271484, + "learning_rate": 1.999805306006209e-06, + "loss": 0.1886, + "num_input_tokens_seen": 14552320, + "step": 21625 + }, + { + "epoch": 0.5284244985708353, + "grad_norm": 0.40930500626564026, + "learning_rate": 1.9998036196769564e-06, + "loss": 0.1231, + "num_input_tokens_seen": 14555520, + "step": 21630 + }, + { + "epoch": 0.5285466494026825, + "grad_norm": 10.084890365600586, + "learning_rate": 1.9998019260768626e-06, + "loss": 0.0935, + "num_input_tokens_seen": 14558848, + "step": 21635 + }, + { + "epoch": 0.5286688002345296, + "grad_norm": 15.713746070861816, + "learning_rate": 1.9998002252059406e-06, + "loss": 0.247, + "num_input_tokens_seen": 14562176, + "step": 21640 + }, + { + "epoch": 0.5287909510663767, + "grad_norm": 12.560420036315918, + "learning_rate": 1.9997985170642025e-06, + "loss": 0.1526, + "num_input_tokens_seen": 14565760, + "step": 21645 + }, + { + "epoch": 0.5289131018982239, + "grad_norm": 27.058290481567383, + "learning_rate": 1.9997968016516606e-06, + "loss": 0.067, + "num_input_tokens_seen": 14569408, + "step": 21650 + }, + { + "epoch": 0.5290352527300711, + "grad_norm": 7.252396106719971, + "learning_rate": 1.9997950789683277e-06, + "loss": 0.0626, + "num_input_tokens_seen": 14573504, + "step": 21655 + }, + { + "epoch": 0.5291574035619182, + "grad_norm": 35.32759475708008, + "learning_rate": 1.9997933490142156e-06, + "loss": 0.0374, + "num_input_tokens_seen": 14576704, + "step": 21660 + }, + { + "epoch": 0.5292795543937654, + "grad_norm": 17.713476181030273, + "learning_rate": 1.9997916117893374e-06, + "loss": 0.084, + "num_input_tokens_seen": 14580864, + "step": 21665 + }, + { + "epoch": 0.5294017052256126, + "grad_norm": 22.07648468017578, + "learning_rate": 1.999789867293706e-06, + "loss": 0.0969, + "num_input_tokens_seen": 14584320, + "step": 21670 + }, + { + "epoch": 0.5295238560574598, + "grad_norm": 0.4838829040527344, + "learning_rate": 1.9997881155273336e-06, + "loss": 0.1203, + "num_input_tokens_seen": 14587328, + "step": 21675 + }, + { + "epoch": 0.529646006889307, + "grad_norm": 31.40498924255371, + "learning_rate": 1.999786356490233e-06, + "loss": 0.188, + "num_input_tokens_seen": 14590720, + "step": 21680 + }, + { + "epoch": 0.529768157721154, + "grad_norm": 13.535494804382324, + "learning_rate": 1.999784590182417e-06, + "loss": 0.1405, + "num_input_tokens_seen": 14593728, + "step": 21685 + }, + { + "epoch": 0.5298903085530012, + "grad_norm": 1.923133373260498, + "learning_rate": 1.999782816603899e-06, + "loss": 0.1511, + "num_input_tokens_seen": 14597440, + "step": 21690 + }, + { + "epoch": 0.5300124593848484, + "grad_norm": 18.139060974121094, + "learning_rate": 1.9997810357546913e-06, + "loss": 0.0933, + "num_input_tokens_seen": 14600704, + "step": 21695 + }, + { + "epoch": 0.5301346102166956, + "grad_norm": 1.0402435064315796, + "learning_rate": 1.999779247634807e-06, + "loss": 0.0149, + "num_input_tokens_seen": 14603968, + "step": 21700 + }, + { + "epoch": 0.5302567610485427, + "grad_norm": 1.9129348993301392, + "learning_rate": 1.9997774522442587e-06, + "loss": 0.0334, + "num_input_tokens_seen": 14607488, + "step": 21705 + }, + { + "epoch": 0.5303789118803899, + "grad_norm": 23.60872459411621, + "learning_rate": 1.9997756495830606e-06, + "loss": 0.1878, + "num_input_tokens_seen": 14610880, + "step": 21710 + }, + { + "epoch": 0.5305010627122371, + "grad_norm": 10.163957595825195, + "learning_rate": 1.9997738396512243e-06, + "loss": 0.2545, + "num_input_tokens_seen": 14614336, + "step": 21715 + }, + { + "epoch": 0.5306232135440843, + "grad_norm": 57.82450485229492, + "learning_rate": 1.9997720224487642e-06, + "loss": 0.1535, + "num_input_tokens_seen": 14617600, + "step": 21720 + }, + { + "epoch": 0.5307453643759314, + "grad_norm": 17.80607795715332, + "learning_rate": 1.999770197975693e-06, + "loss": 0.1522, + "num_input_tokens_seen": 14620480, + "step": 21725 + }, + { + "epoch": 0.5308675152077785, + "grad_norm": 8.344657897949219, + "learning_rate": 1.999768366232024e-06, + "loss": 0.0956, + "num_input_tokens_seen": 14623808, + "step": 21730 + }, + { + "epoch": 0.5309896660396257, + "grad_norm": 29.846019744873047, + "learning_rate": 1.9997665272177706e-06, + "loss": 0.1534, + "num_input_tokens_seen": 14626944, + "step": 21735 + }, + { + "epoch": 0.5311118168714729, + "grad_norm": 10.817802429199219, + "learning_rate": 1.999764680932946e-06, + "loss": 0.1231, + "num_input_tokens_seen": 14629824, + "step": 21740 + }, + { + "epoch": 0.5312339677033201, + "grad_norm": 35.45216369628906, + "learning_rate": 1.9997628273775635e-06, + "loss": 0.1844, + "num_input_tokens_seen": 14633536, + "step": 21745 + }, + { + "epoch": 0.5313561185351672, + "grad_norm": 17.77213478088379, + "learning_rate": 1.999760966551637e-06, + "loss": 0.1802, + "num_input_tokens_seen": 14636480, + "step": 21750 + }, + { + "epoch": 0.5314782693670144, + "grad_norm": 8.96764087677002, + "learning_rate": 1.99975909845518e-06, + "loss": 0.1074, + "num_input_tokens_seen": 14640000, + "step": 21755 + }, + { + "epoch": 0.5316004201988616, + "grad_norm": 0.3799511790275574, + "learning_rate": 1.999757223088206e-06, + "loss": 0.0523, + "num_input_tokens_seen": 14643520, + "step": 21760 + }, + { + "epoch": 0.5317225710307087, + "grad_norm": 6.152425765991211, + "learning_rate": 1.9997553404507284e-06, + "loss": 0.1024, + "num_input_tokens_seen": 14646720, + "step": 21765 + }, + { + "epoch": 0.5318447218625559, + "grad_norm": 1.7916256189346313, + "learning_rate": 1.9997534505427607e-06, + "loss": 0.0662, + "num_input_tokens_seen": 14649920, + "step": 21770 + }, + { + "epoch": 0.531966872694403, + "grad_norm": 26.20082664489746, + "learning_rate": 1.9997515533643176e-06, + "loss": 0.1042, + "num_input_tokens_seen": 14653120, + "step": 21775 + }, + { + "epoch": 0.5320890235262502, + "grad_norm": 3.543252468109131, + "learning_rate": 1.999749648915412e-06, + "loss": 0.1285, + "num_input_tokens_seen": 14656704, + "step": 21780 + }, + { + "epoch": 0.5322111743580974, + "grad_norm": 11.144600868225098, + "learning_rate": 1.999747737196058e-06, + "loss": 0.1138, + "num_input_tokens_seen": 14659968, + "step": 21785 + }, + { + "epoch": 0.5323333251899446, + "grad_norm": 5.704688549041748, + "learning_rate": 1.9997458182062695e-06, + "loss": 0.1466, + "num_input_tokens_seen": 14663168, + "step": 21790 + }, + { + "epoch": 0.5324554760217917, + "grad_norm": 13.300514221191406, + "learning_rate": 1.999743891946061e-06, + "loss": 0.1678, + "num_input_tokens_seen": 14666880, + "step": 21795 + }, + { + "epoch": 0.5325776268536389, + "grad_norm": 27.650346755981445, + "learning_rate": 1.999741958415446e-06, + "loss": 0.0876, + "num_input_tokens_seen": 14670016, + "step": 21800 + }, + { + "epoch": 0.532699777685486, + "grad_norm": 1.8453861474990845, + "learning_rate": 1.999740017614438e-06, + "loss": 0.1508, + "num_input_tokens_seen": 14673152, + "step": 21805 + }, + { + "epoch": 0.5328219285173332, + "grad_norm": 28.67298126220703, + "learning_rate": 1.999738069543052e-06, + "loss": 0.0931, + "num_input_tokens_seen": 14676800, + "step": 21810 + }, + { + "epoch": 0.5329440793491804, + "grad_norm": 14.552690505981445, + "learning_rate": 1.9997361142013016e-06, + "loss": 0.0399, + "num_input_tokens_seen": 14680320, + "step": 21815 + }, + { + "epoch": 0.5330662301810275, + "grad_norm": 23.46329116821289, + "learning_rate": 1.9997341515892016e-06, + "loss": 0.1401, + "num_input_tokens_seen": 14683264, + "step": 21820 + }, + { + "epoch": 0.5331883810128747, + "grad_norm": 12.535654067993164, + "learning_rate": 1.9997321817067662e-06, + "loss": 0.135, + "num_input_tokens_seen": 14686656, + "step": 21825 + }, + { + "epoch": 0.5333105318447219, + "grad_norm": 25.697513580322266, + "learning_rate": 1.999730204554009e-06, + "loss": 0.1525, + "num_input_tokens_seen": 14690304, + "step": 21830 + }, + { + "epoch": 0.5334326826765691, + "grad_norm": 2.353025436401367, + "learning_rate": 1.999728220130945e-06, + "loss": 0.1182, + "num_input_tokens_seen": 14693632, + "step": 21835 + }, + { + "epoch": 0.5335548335084161, + "grad_norm": 7.448736667633057, + "learning_rate": 1.9997262284375886e-06, + "loss": 0.0625, + "num_input_tokens_seen": 14696768, + "step": 21840 + }, + { + "epoch": 0.5336769843402633, + "grad_norm": 17.941051483154297, + "learning_rate": 1.999724229473954e-06, + "loss": 0.0942, + "num_input_tokens_seen": 14699904, + "step": 21845 + }, + { + "epoch": 0.5337991351721105, + "grad_norm": 11.695417404174805, + "learning_rate": 1.999722223240056e-06, + "loss": 0.1101, + "num_input_tokens_seen": 14703936, + "step": 21850 + }, + { + "epoch": 0.5339212860039577, + "grad_norm": 0.5295389294624329, + "learning_rate": 1.999720209735909e-06, + "loss": 0.0913, + "num_input_tokens_seen": 14707264, + "step": 21855 + }, + { + "epoch": 0.5340434368358048, + "grad_norm": 19.66712188720703, + "learning_rate": 1.9997181889615277e-06, + "loss": 0.1847, + "num_input_tokens_seen": 14710720, + "step": 21860 + }, + { + "epoch": 0.534165587667652, + "grad_norm": 8.764230728149414, + "learning_rate": 1.999716160916927e-06, + "loss": 0.0553, + "num_input_tokens_seen": 14713920, + "step": 21865 + }, + { + "epoch": 0.5342877384994992, + "grad_norm": 20.96441078186035, + "learning_rate": 1.9997141256021214e-06, + "loss": 0.1577, + "num_input_tokens_seen": 14717184, + "step": 21870 + }, + { + "epoch": 0.5344098893313464, + "grad_norm": 21.725391387939453, + "learning_rate": 1.999712083017126e-06, + "loss": 0.1194, + "num_input_tokens_seen": 14720512, + "step": 21875 + }, + { + "epoch": 0.5345320401631936, + "grad_norm": 0.7325649857521057, + "learning_rate": 1.999710033161955e-06, + "loss": 0.0821, + "num_input_tokens_seen": 14723776, + "step": 21880 + }, + { + "epoch": 0.5346541909950406, + "grad_norm": 4.8892083168029785, + "learning_rate": 1.9997079760366242e-06, + "loss": 0.0919, + "num_input_tokens_seen": 14726784, + "step": 21885 + }, + { + "epoch": 0.5347763418268878, + "grad_norm": 4.73694372177124, + "learning_rate": 1.999705911641148e-06, + "loss": 0.0458, + "num_input_tokens_seen": 14730368, + "step": 21890 + }, + { + "epoch": 0.534898492658735, + "grad_norm": 0.25473085045814514, + "learning_rate": 1.9997038399755416e-06, + "loss": 0.1431, + "num_input_tokens_seen": 14733888, + "step": 21895 + }, + { + "epoch": 0.5350206434905822, + "grad_norm": 12.120611190795898, + "learning_rate": 1.99970176103982e-06, + "loss": 0.1114, + "num_input_tokens_seen": 14736960, + "step": 21900 + }, + { + "epoch": 0.5351427943224293, + "grad_norm": 17.077253341674805, + "learning_rate": 1.999699674833998e-06, + "loss": 0.2132, + "num_input_tokens_seen": 14740160, + "step": 21905 + }, + { + "epoch": 0.5352649451542765, + "grad_norm": 7.404626369476318, + "learning_rate": 1.9996975813580913e-06, + "loss": 0.1527, + "num_input_tokens_seen": 14743744, + "step": 21910 + }, + { + "epoch": 0.5353870959861237, + "grad_norm": 2.2422728538513184, + "learning_rate": 1.9996954806121145e-06, + "loss": 0.0924, + "num_input_tokens_seen": 14747648, + "step": 21915 + }, + { + "epoch": 0.5355092468179709, + "grad_norm": 12.412498474121094, + "learning_rate": 1.999693372596084e-06, + "loss": 0.2639, + "num_input_tokens_seen": 14750912, + "step": 21920 + }, + { + "epoch": 0.535631397649818, + "grad_norm": 12.730358123779297, + "learning_rate": 1.999691257310014e-06, + "loss": 0.1055, + "num_input_tokens_seen": 14754880, + "step": 21925 + }, + { + "epoch": 0.5357535484816651, + "grad_norm": 16.045204162597656, + "learning_rate": 1.99968913475392e-06, + "loss": 0.1137, + "num_input_tokens_seen": 14758336, + "step": 21930 + }, + { + "epoch": 0.5358756993135123, + "grad_norm": 3.7376608848571777, + "learning_rate": 1.9996870049278183e-06, + "loss": 0.1668, + "num_input_tokens_seen": 14762048, + "step": 21935 + }, + { + "epoch": 0.5359978501453595, + "grad_norm": 21.60993194580078, + "learning_rate": 1.9996848678317236e-06, + "loss": 0.1627, + "num_input_tokens_seen": 14765248, + "step": 21940 + }, + { + "epoch": 0.5361200009772067, + "grad_norm": 18.804241180419922, + "learning_rate": 1.9996827234656515e-06, + "loss": 0.2253, + "num_input_tokens_seen": 14768320, + "step": 21945 + }, + { + "epoch": 0.5362421518090538, + "grad_norm": 17.075807571411133, + "learning_rate": 1.999680571829618e-06, + "loss": 0.1673, + "num_input_tokens_seen": 14771840, + "step": 21950 + }, + { + "epoch": 0.536364302640901, + "grad_norm": 17.029102325439453, + "learning_rate": 1.9996784129236383e-06, + "loss": 0.0802, + "num_input_tokens_seen": 14775744, + "step": 21955 + }, + { + "epoch": 0.5364864534727481, + "grad_norm": 9.187379837036133, + "learning_rate": 1.999676246747728e-06, + "loss": 0.0289, + "num_input_tokens_seen": 14779776, + "step": 21960 + }, + { + "epoch": 0.5366086043045953, + "grad_norm": 31.00434684753418, + "learning_rate": 1.9996740733019037e-06, + "loss": 0.2489, + "num_input_tokens_seen": 14783168, + "step": 21965 + }, + { + "epoch": 0.5367307551364425, + "grad_norm": 20.447832107543945, + "learning_rate": 1.9996718925861805e-06, + "loss": 0.0224, + "num_input_tokens_seen": 14786240, + "step": 21970 + }, + { + "epoch": 0.5368529059682896, + "grad_norm": 27.77326202392578, + "learning_rate": 1.9996697046005746e-06, + "loss": 0.1144, + "num_input_tokens_seen": 14789248, + "step": 21975 + }, + { + "epoch": 0.5369750568001368, + "grad_norm": 0.8397197127342224, + "learning_rate": 1.9996675093451014e-06, + "loss": 0.0216, + "num_input_tokens_seen": 14792128, + "step": 21980 + }, + { + "epoch": 0.537097207631984, + "grad_norm": 30.160160064697266, + "learning_rate": 1.9996653068197774e-06, + "loss": 0.0429, + "num_input_tokens_seen": 14795200, + "step": 21985 + }, + { + "epoch": 0.5372193584638312, + "grad_norm": 20.93111228942871, + "learning_rate": 1.999663097024618e-06, + "loss": 0.1353, + "num_input_tokens_seen": 14798656, + "step": 21990 + }, + { + "epoch": 0.5373415092956783, + "grad_norm": 15.375563621520996, + "learning_rate": 1.9996608799596402e-06, + "loss": 0.0957, + "num_input_tokens_seen": 14802112, + "step": 21995 + }, + { + "epoch": 0.5374636601275254, + "grad_norm": 31.491281509399414, + "learning_rate": 1.9996586556248593e-06, + "loss": 0.1855, + "num_input_tokens_seen": 14805568, + "step": 22000 + }, + { + "epoch": 0.5375858109593726, + "grad_norm": 16.63956642150879, + "learning_rate": 1.999656424020292e-06, + "loss": 0.1993, + "num_input_tokens_seen": 14809280, + "step": 22005 + }, + { + "epoch": 0.5377079617912198, + "grad_norm": 56.18572998046875, + "learning_rate": 1.999654185145954e-06, + "loss": 0.3984, + "num_input_tokens_seen": 14812288, + "step": 22010 + }, + { + "epoch": 0.537830112623067, + "grad_norm": 14.070927619934082, + "learning_rate": 1.9996519390018626e-06, + "loss": 0.1776, + "num_input_tokens_seen": 14815296, + "step": 22015 + }, + { + "epoch": 0.5379522634549141, + "grad_norm": 7.646341323852539, + "learning_rate": 1.9996496855880327e-06, + "loss": 0.1282, + "num_input_tokens_seen": 14818368, + "step": 22020 + }, + { + "epoch": 0.5380744142867613, + "grad_norm": 3.7417798042297363, + "learning_rate": 1.9996474249044816e-06, + "loss": 0.0895, + "num_input_tokens_seen": 14821888, + "step": 22025 + }, + { + "epoch": 0.5381965651186085, + "grad_norm": 19.079700469970703, + "learning_rate": 1.999645156951226e-06, + "loss": 0.0929, + "num_input_tokens_seen": 14824960, + "step": 22030 + }, + { + "epoch": 0.5383187159504557, + "grad_norm": 4.660904407501221, + "learning_rate": 1.999642881728281e-06, + "loss": 0.0822, + "num_input_tokens_seen": 14828672, + "step": 22035 + }, + { + "epoch": 0.5384408667823027, + "grad_norm": 17.99866485595703, + "learning_rate": 1.9996405992356648e-06, + "loss": 0.129, + "num_input_tokens_seen": 14832192, + "step": 22040 + }, + { + "epoch": 0.5385630176141499, + "grad_norm": 2.2056124210357666, + "learning_rate": 1.999638309473393e-06, + "loss": 0.1256, + "num_input_tokens_seen": 14835776, + "step": 22045 + }, + { + "epoch": 0.5386851684459971, + "grad_norm": 14.415079116821289, + "learning_rate": 1.999636012441483e-06, + "loss": 0.0424, + "num_input_tokens_seen": 14839296, + "step": 22050 + }, + { + "epoch": 0.5388073192778443, + "grad_norm": 2.106815814971924, + "learning_rate": 1.9996337081399508e-06, + "loss": 0.0114, + "num_input_tokens_seen": 14842496, + "step": 22055 + }, + { + "epoch": 0.5389294701096915, + "grad_norm": 0.8564073443412781, + "learning_rate": 1.9996313965688134e-06, + "loss": 0.2205, + "num_input_tokens_seen": 14845760, + "step": 22060 + }, + { + "epoch": 0.5390516209415386, + "grad_norm": 5.316920757293701, + "learning_rate": 1.9996290777280873e-06, + "loss": 0.0879, + "num_input_tokens_seen": 14849088, + "step": 22065 + }, + { + "epoch": 0.5391737717733858, + "grad_norm": 5.526222229003906, + "learning_rate": 1.99962675161779e-06, + "loss": 0.0739, + "num_input_tokens_seen": 14852416, + "step": 22070 + }, + { + "epoch": 0.539295922605233, + "grad_norm": 33.81382369995117, + "learning_rate": 1.9996244182379376e-06, + "loss": 0.2628, + "num_input_tokens_seen": 14856000, + "step": 22075 + }, + { + "epoch": 0.5394180734370801, + "grad_norm": 0.3070218861103058, + "learning_rate": 1.9996220775885484e-06, + "loss": 0.1335, + "num_input_tokens_seen": 14859200, + "step": 22080 + }, + { + "epoch": 0.5395402242689272, + "grad_norm": 20.04486656188965, + "learning_rate": 1.999619729669638e-06, + "loss": 0.0093, + "num_input_tokens_seen": 14862464, + "step": 22085 + }, + { + "epoch": 0.5396623751007744, + "grad_norm": 19.408672332763672, + "learning_rate": 1.999617374481224e-06, + "loss": 0.0814, + "num_input_tokens_seen": 14865728, + "step": 22090 + }, + { + "epoch": 0.5397845259326216, + "grad_norm": 10.519676208496094, + "learning_rate": 1.999615012023324e-06, + "loss": 0.1876, + "num_input_tokens_seen": 14869056, + "step": 22095 + }, + { + "epoch": 0.5399066767644688, + "grad_norm": 35.646114349365234, + "learning_rate": 1.9996126422959544e-06, + "loss": 0.238, + "num_input_tokens_seen": 14872704, + "step": 22100 + }, + { + "epoch": 0.540028827596316, + "grad_norm": 17.06508445739746, + "learning_rate": 1.9996102652991332e-06, + "loss": 0.1577, + "num_input_tokens_seen": 14876032, + "step": 22105 + }, + { + "epoch": 0.5401509784281631, + "grad_norm": 2.0623927116394043, + "learning_rate": 1.9996078810328767e-06, + "loss": 0.1087, + "num_input_tokens_seen": 14880448, + "step": 22110 + }, + { + "epoch": 0.5402731292600103, + "grad_norm": 19.45257568359375, + "learning_rate": 1.9996054894972035e-06, + "loss": 0.156, + "num_input_tokens_seen": 14883584, + "step": 22115 + }, + { + "epoch": 0.5403952800918574, + "grad_norm": 26.047109603881836, + "learning_rate": 1.99960309069213e-06, + "loss": 0.1056, + "num_input_tokens_seen": 14888960, + "step": 22120 + }, + { + "epoch": 0.5405174309237046, + "grad_norm": 28.765560150146484, + "learning_rate": 1.999600684617674e-06, + "loss": 0.1507, + "num_input_tokens_seen": 14892544, + "step": 22125 + }, + { + "epoch": 0.5406395817555517, + "grad_norm": 31.686187744140625, + "learning_rate": 1.999598271273853e-06, + "loss": 0.1527, + "num_input_tokens_seen": 14896064, + "step": 22130 + }, + { + "epoch": 0.5407617325873989, + "grad_norm": 40.38404083251953, + "learning_rate": 1.9995958506606843e-06, + "loss": 0.1882, + "num_input_tokens_seen": 14899072, + "step": 22135 + }, + { + "epoch": 0.5408838834192461, + "grad_norm": 0.6900488138198853, + "learning_rate": 1.999593422778186e-06, + "loss": 0.0974, + "num_input_tokens_seen": 14902528, + "step": 22140 + }, + { + "epoch": 0.5410060342510933, + "grad_norm": 2.6025142669677734, + "learning_rate": 1.9995909876263753e-06, + "loss": 0.1271, + "num_input_tokens_seen": 14905792, + "step": 22145 + }, + { + "epoch": 0.5411281850829404, + "grad_norm": 9.224105834960938, + "learning_rate": 1.99958854520527e-06, + "loss": 0.1134, + "num_input_tokens_seen": 14909184, + "step": 22150 + }, + { + "epoch": 0.5412503359147876, + "grad_norm": 3.023387908935547, + "learning_rate": 1.9995860955148884e-06, + "loss": 0.097, + "num_input_tokens_seen": 14912512, + "step": 22155 + }, + { + "epoch": 0.5413724867466347, + "grad_norm": 1.4665299654006958, + "learning_rate": 1.999583638555247e-06, + "loss": 0.0944, + "num_input_tokens_seen": 14915712, + "step": 22160 + }, + { + "epoch": 0.5414946375784819, + "grad_norm": 21.681215286254883, + "learning_rate": 1.999581174326365e-06, + "loss": 0.0748, + "num_input_tokens_seen": 14919168, + "step": 22165 + }, + { + "epoch": 0.5416167884103291, + "grad_norm": 35.79258346557617, + "learning_rate": 1.99957870282826e-06, + "loss": 0.1841, + "num_input_tokens_seen": 14923072, + "step": 22170 + }, + { + "epoch": 0.5417389392421762, + "grad_norm": 26.38593864440918, + "learning_rate": 1.99957622406095e-06, + "loss": 0.1218, + "num_input_tokens_seen": 14926720, + "step": 22175 + }, + { + "epoch": 0.5418610900740234, + "grad_norm": 1.901475191116333, + "learning_rate": 1.9995737380244523e-06, + "loss": 0.0952, + "num_input_tokens_seen": 14929856, + "step": 22180 + }, + { + "epoch": 0.5419832409058706, + "grad_norm": 14.192605972290039, + "learning_rate": 1.999571244718786e-06, + "loss": 0.1337, + "num_input_tokens_seen": 14933056, + "step": 22185 + }, + { + "epoch": 0.5421053917377178, + "grad_norm": 12.209223747253418, + "learning_rate": 1.9995687441439685e-06, + "loss": 0.104, + "num_input_tokens_seen": 14936320, + "step": 22190 + }, + { + "epoch": 0.5422275425695648, + "grad_norm": 0.9199336767196655, + "learning_rate": 1.9995662363000184e-06, + "loss": 0.0855, + "num_input_tokens_seen": 14939264, + "step": 22195 + }, + { + "epoch": 0.542349693401412, + "grad_norm": 23.20319938659668, + "learning_rate": 1.999563721186953e-06, + "loss": 0.1127, + "num_input_tokens_seen": 14942976, + "step": 22200 + }, + { + "epoch": 0.5424718442332592, + "grad_norm": 2.204730272293091, + "learning_rate": 1.9995611988047926e-06, + "loss": 0.0067, + "num_input_tokens_seen": 14946304, + "step": 22205 + }, + { + "epoch": 0.5425939950651064, + "grad_norm": 15.761679649353027, + "learning_rate": 1.9995586691535537e-06, + "loss": 0.1645, + "num_input_tokens_seen": 14950336, + "step": 22210 + }, + { + "epoch": 0.5427161458969536, + "grad_norm": 9.355384826660156, + "learning_rate": 1.999556132233255e-06, + "loss": 0.0654, + "num_input_tokens_seen": 14953600, + "step": 22215 + }, + { + "epoch": 0.5428382967288007, + "grad_norm": 4.341427326202393, + "learning_rate": 1.9995535880439158e-06, + "loss": 0.1185, + "num_input_tokens_seen": 14956736, + "step": 22220 + }, + { + "epoch": 0.5429604475606479, + "grad_norm": 7.728878498077393, + "learning_rate": 1.999551036585554e-06, + "loss": 0.0389, + "num_input_tokens_seen": 14959808, + "step": 22225 + }, + { + "epoch": 0.5430825983924951, + "grad_norm": 3.8340866565704346, + "learning_rate": 1.999548477858188e-06, + "loss": 0.1265, + "num_input_tokens_seen": 14963008, + "step": 22230 + }, + { + "epoch": 0.5432047492243423, + "grad_norm": 23.842802047729492, + "learning_rate": 1.9995459118618364e-06, + "loss": 0.08, + "num_input_tokens_seen": 14965952, + "step": 22235 + }, + { + "epoch": 0.5433269000561893, + "grad_norm": 44.32612228393555, + "learning_rate": 1.9995433385965187e-06, + "loss": 0.2909, + "num_input_tokens_seen": 14969216, + "step": 22240 + }, + { + "epoch": 0.5434490508880365, + "grad_norm": 11.29781723022461, + "learning_rate": 1.9995407580622526e-06, + "loss": 0.2292, + "num_input_tokens_seen": 14972864, + "step": 22245 + }, + { + "epoch": 0.5435712017198837, + "grad_norm": 31.859079360961914, + "learning_rate": 1.9995381702590572e-06, + "loss": 0.0804, + "num_input_tokens_seen": 14976192, + "step": 22250 + }, + { + "epoch": 0.5436933525517309, + "grad_norm": 26.400442123413086, + "learning_rate": 1.9995355751869517e-06, + "loss": 0.2389, + "num_input_tokens_seen": 14979520, + "step": 22255 + }, + { + "epoch": 0.5438155033835781, + "grad_norm": 15.490806579589844, + "learning_rate": 1.9995329728459545e-06, + "loss": 0.1369, + "num_input_tokens_seen": 14982592, + "step": 22260 + }, + { + "epoch": 0.5439376542154252, + "grad_norm": 32.64035415649414, + "learning_rate": 1.999530363236085e-06, + "loss": 0.113, + "num_input_tokens_seen": 14986240, + "step": 22265 + }, + { + "epoch": 0.5440598050472724, + "grad_norm": 15.278639793395996, + "learning_rate": 1.9995277463573612e-06, + "loss": 0.0672, + "num_input_tokens_seen": 14989568, + "step": 22270 + }, + { + "epoch": 0.5441819558791195, + "grad_norm": 17.714778900146484, + "learning_rate": 1.999525122209803e-06, + "loss": 0.0729, + "num_input_tokens_seen": 14993280, + "step": 22275 + }, + { + "epoch": 0.5443041067109667, + "grad_norm": 15.31547737121582, + "learning_rate": 1.9995224907934295e-06, + "loss": 0.07, + "num_input_tokens_seen": 14997440, + "step": 22280 + }, + { + "epoch": 0.5444262575428138, + "grad_norm": 22.41041374206543, + "learning_rate": 1.9995198521082594e-06, + "loss": 0.0847, + "num_input_tokens_seen": 15000960, + "step": 22285 + }, + { + "epoch": 0.544548408374661, + "grad_norm": 10.246545791625977, + "learning_rate": 1.999517206154312e-06, + "loss": 0.0509, + "num_input_tokens_seen": 15004096, + "step": 22290 + }, + { + "epoch": 0.5446705592065082, + "grad_norm": 22.861297607421875, + "learning_rate": 1.999514552931607e-06, + "loss": 0.1495, + "num_input_tokens_seen": 15007680, + "step": 22295 + }, + { + "epoch": 0.5447927100383554, + "grad_norm": 35.24309158325195, + "learning_rate": 1.9995118924401632e-06, + "loss": 0.151, + "num_input_tokens_seen": 15011072, + "step": 22300 + }, + { + "epoch": 0.5449148608702026, + "grad_norm": 0.9900000095367432, + "learning_rate": 1.99950922468e-06, + "loss": 0.1137, + "num_input_tokens_seen": 15014208, + "step": 22305 + }, + { + "epoch": 0.5450370117020497, + "grad_norm": 13.241475105285645, + "learning_rate": 1.9995065496511367e-06, + "loss": 0.0952, + "num_input_tokens_seen": 15017344, + "step": 22310 + }, + { + "epoch": 0.5451591625338968, + "grad_norm": 35.15651321411133, + "learning_rate": 1.9995038673535933e-06, + "loss": 0.1833, + "num_input_tokens_seen": 15020992, + "step": 22315 + }, + { + "epoch": 0.545281313365744, + "grad_norm": 11.369232177734375, + "learning_rate": 1.9995011777873887e-06, + "loss": 0.0792, + "num_input_tokens_seen": 15024000, + "step": 22320 + }, + { + "epoch": 0.5454034641975912, + "grad_norm": 8.909126281738281, + "learning_rate": 1.999498480952543e-06, + "loss": 0.1104, + "num_input_tokens_seen": 15027840, + "step": 22325 + }, + { + "epoch": 0.5455256150294383, + "grad_norm": 37.6539306640625, + "learning_rate": 1.999495776849075e-06, + "loss": 0.2498, + "num_input_tokens_seen": 15031104, + "step": 22330 + }, + { + "epoch": 0.5456477658612855, + "grad_norm": 9.263086318969727, + "learning_rate": 1.999493065477005e-06, + "loss": 0.1117, + "num_input_tokens_seen": 15034432, + "step": 22335 + }, + { + "epoch": 0.5457699166931327, + "grad_norm": 8.902159690856934, + "learning_rate": 1.999490346836353e-06, + "loss": 0.0532, + "num_input_tokens_seen": 15037824, + "step": 22340 + }, + { + "epoch": 0.5458920675249799, + "grad_norm": 32.49031448364258, + "learning_rate": 1.999487620927138e-06, + "loss": 0.0565, + "num_input_tokens_seen": 15040960, + "step": 22345 + }, + { + "epoch": 0.5460142183568271, + "grad_norm": 11.853031158447266, + "learning_rate": 1.9994848877493806e-06, + "loss": 0.0731, + "num_input_tokens_seen": 15043904, + "step": 22350 + }, + { + "epoch": 0.5461363691886741, + "grad_norm": 1.538690209388733, + "learning_rate": 1.9994821473031e-06, + "loss": 0.1369, + "num_input_tokens_seen": 15046848, + "step": 22355 + }, + { + "epoch": 0.5462585200205213, + "grad_norm": 3.006669521331787, + "learning_rate": 1.9994793995883165e-06, + "loss": 0.1618, + "num_input_tokens_seen": 15050176, + "step": 22360 + }, + { + "epoch": 0.5463806708523685, + "grad_norm": 2.1008987426757812, + "learning_rate": 1.9994766446050497e-06, + "loss": 0.1655, + "num_input_tokens_seen": 15053632, + "step": 22365 + }, + { + "epoch": 0.5465028216842157, + "grad_norm": 26.6789608001709, + "learning_rate": 1.9994738823533203e-06, + "loss": 0.0954, + "num_input_tokens_seen": 15057088, + "step": 22370 + }, + { + "epoch": 0.5466249725160628, + "grad_norm": 15.680182456970215, + "learning_rate": 1.9994711128331474e-06, + "loss": 0.0837, + "num_input_tokens_seen": 15060224, + "step": 22375 + }, + { + "epoch": 0.54674712334791, + "grad_norm": 4.9395318031311035, + "learning_rate": 1.9994683360445522e-06, + "loss": 0.1012, + "num_input_tokens_seen": 15063424, + "step": 22380 + }, + { + "epoch": 0.5468692741797572, + "grad_norm": 10.766144752502441, + "learning_rate": 1.9994655519875546e-06, + "loss": 0.0936, + "num_input_tokens_seen": 15066368, + "step": 22385 + }, + { + "epoch": 0.5469914250116044, + "grad_norm": 11.765340805053711, + "learning_rate": 1.9994627606621745e-06, + "loss": 0.0797, + "num_input_tokens_seen": 15070016, + "step": 22390 + }, + { + "epoch": 0.5471135758434514, + "grad_norm": 12.449207305908203, + "learning_rate": 1.999459962068432e-06, + "loss": 0.2556, + "num_input_tokens_seen": 15073152, + "step": 22395 + }, + { + "epoch": 0.5472357266752986, + "grad_norm": 19.93573760986328, + "learning_rate": 1.9994571562063483e-06, + "loss": 0.2031, + "num_input_tokens_seen": 15076480, + "step": 22400 + }, + { + "epoch": 0.5473578775071458, + "grad_norm": 3.5067667961120605, + "learning_rate": 1.999454343075943e-06, + "loss": 0.1312, + "num_input_tokens_seen": 15079680, + "step": 22405 + }, + { + "epoch": 0.547480028338993, + "grad_norm": 9.037195205688477, + "learning_rate": 1.9994515226772373e-06, + "loss": 0.089, + "num_input_tokens_seen": 15082752, + "step": 22410 + }, + { + "epoch": 0.5476021791708402, + "grad_norm": 18.904560089111328, + "learning_rate": 1.9994486950102512e-06, + "loss": 0.0578, + "num_input_tokens_seen": 15086016, + "step": 22415 + }, + { + "epoch": 0.5477243300026873, + "grad_norm": 0.3442266285419464, + "learning_rate": 1.9994458600750054e-06, + "loss": 0.0682, + "num_input_tokens_seen": 15089408, + "step": 22420 + }, + { + "epoch": 0.5478464808345345, + "grad_norm": 11.249054908752441, + "learning_rate": 1.99944301787152e-06, + "loss": 0.1, + "num_input_tokens_seen": 15092544, + "step": 22425 + }, + { + "epoch": 0.5479686316663817, + "grad_norm": 27.72504234313965, + "learning_rate": 1.999440168399817e-06, + "loss": 0.1168, + "num_input_tokens_seen": 15096000, + "step": 22430 + }, + { + "epoch": 0.5480907824982288, + "grad_norm": 22.7575626373291, + "learning_rate": 1.9994373116599155e-06, + "loss": 0.0899, + "num_input_tokens_seen": 15099136, + "step": 22435 + }, + { + "epoch": 0.5482129333300759, + "grad_norm": 24.837921142578125, + "learning_rate": 1.9994344476518376e-06, + "loss": 0.1968, + "num_input_tokens_seen": 15102720, + "step": 22440 + }, + { + "epoch": 0.5483350841619231, + "grad_norm": 44.41413879394531, + "learning_rate": 1.9994315763756033e-06, + "loss": 0.1988, + "num_input_tokens_seen": 15106432, + "step": 22445 + }, + { + "epoch": 0.5484572349937703, + "grad_norm": 15.867010116577148, + "learning_rate": 1.9994286978312338e-06, + "loss": 0.0623, + "num_input_tokens_seen": 15109888, + "step": 22450 + }, + { + "epoch": 0.5485793858256175, + "grad_norm": 2.3468613624572754, + "learning_rate": 1.99942581201875e-06, + "loss": 0.121, + "num_input_tokens_seen": 15113472, + "step": 22455 + }, + { + "epoch": 0.5487015366574647, + "grad_norm": 28.272499084472656, + "learning_rate": 1.9994229189381726e-06, + "loss": 0.0837, + "num_input_tokens_seen": 15116544, + "step": 22460 + }, + { + "epoch": 0.5488236874893118, + "grad_norm": 64.57221984863281, + "learning_rate": 1.9994200185895233e-06, + "loss": 0.1636, + "num_input_tokens_seen": 15120000, + "step": 22465 + }, + { + "epoch": 0.548945838321159, + "grad_norm": 29.440067291259766, + "learning_rate": 1.9994171109728227e-06, + "loss": 0.1474, + "num_input_tokens_seen": 15123136, + "step": 22470 + }, + { + "epoch": 0.5490679891530061, + "grad_norm": 31.00202178955078, + "learning_rate": 1.999414196088092e-06, + "loss": 0.1026, + "num_input_tokens_seen": 15126592, + "step": 22475 + }, + { + "epoch": 0.5491901399848533, + "grad_norm": 9.43714427947998, + "learning_rate": 1.9994112739353526e-06, + "loss": 0.0944, + "num_input_tokens_seen": 15129984, + "step": 22480 + }, + { + "epoch": 0.5493122908167004, + "grad_norm": 30.70720863342285, + "learning_rate": 1.9994083445146255e-06, + "loss": 0.1571, + "num_input_tokens_seen": 15133504, + "step": 22485 + }, + { + "epoch": 0.5494344416485476, + "grad_norm": 30.060976028442383, + "learning_rate": 1.999405407825932e-06, + "loss": 0.0858, + "num_input_tokens_seen": 15136576, + "step": 22490 + }, + { + "epoch": 0.5495565924803948, + "grad_norm": 22.569185256958008, + "learning_rate": 1.999402463869294e-06, + "loss": 0.1534, + "num_input_tokens_seen": 15140224, + "step": 22495 + }, + { + "epoch": 0.549678743312242, + "grad_norm": 4.827227592468262, + "learning_rate": 1.9993995126447325e-06, + "loss": 0.1497, + "num_input_tokens_seen": 15143488, + "step": 22500 + }, + { + "epoch": 0.5498008941440892, + "grad_norm": 35.69637680053711, + "learning_rate": 1.9993965541522684e-06, + "loss": 0.1645, + "num_input_tokens_seen": 15146816, + "step": 22505 + }, + { + "epoch": 0.5499230449759362, + "grad_norm": 1.8859728574752808, + "learning_rate": 1.999393588391924e-06, + "loss": 0.1014, + "num_input_tokens_seen": 15150208, + "step": 22510 + }, + { + "epoch": 0.5500451958077834, + "grad_norm": 12.349617004394531, + "learning_rate": 1.9993906153637204e-06, + "loss": 0.0376, + "num_input_tokens_seen": 15153664, + "step": 22515 + }, + { + "epoch": 0.5501673466396306, + "grad_norm": 49.3721809387207, + "learning_rate": 1.9993876350676796e-06, + "loss": 0.0523, + "num_input_tokens_seen": 15157248, + "step": 22520 + }, + { + "epoch": 0.5502894974714778, + "grad_norm": 20.546186447143555, + "learning_rate": 1.999384647503823e-06, + "loss": 0.1262, + "num_input_tokens_seen": 15160256, + "step": 22525 + }, + { + "epoch": 0.5504116483033249, + "grad_norm": 17.570690155029297, + "learning_rate": 1.9993816526721725e-06, + "loss": 0.2036, + "num_input_tokens_seen": 15163712, + "step": 22530 + }, + { + "epoch": 0.5505337991351721, + "grad_norm": 33.04792785644531, + "learning_rate": 1.9993786505727503e-06, + "loss": 0.0762, + "num_input_tokens_seen": 15166912, + "step": 22535 + }, + { + "epoch": 0.5506559499670193, + "grad_norm": 1.1662955284118652, + "learning_rate": 1.9993756412055773e-06, + "loss": 0.1405, + "num_input_tokens_seen": 15170432, + "step": 22540 + }, + { + "epoch": 0.5507781007988665, + "grad_norm": 15.58870792388916, + "learning_rate": 1.999372624570676e-06, + "loss": 0.2726, + "num_input_tokens_seen": 15173760, + "step": 22545 + }, + { + "epoch": 0.5509002516307137, + "grad_norm": 24.114355087280273, + "learning_rate": 1.999369600668068e-06, + "loss": 0.0718, + "num_input_tokens_seen": 15176704, + "step": 22550 + }, + { + "epoch": 0.5510224024625607, + "grad_norm": 9.267765998840332, + "learning_rate": 1.9993665694977755e-06, + "loss": 0.122, + "num_input_tokens_seen": 15180096, + "step": 22555 + }, + { + "epoch": 0.5511445532944079, + "grad_norm": 0.6705265045166016, + "learning_rate": 1.9993635310598207e-06, + "loss": 0.0551, + "num_input_tokens_seen": 15183168, + "step": 22560 + }, + { + "epoch": 0.5512667041262551, + "grad_norm": 1.3118199110031128, + "learning_rate": 1.9993604853542254e-06, + "loss": 0.0613, + "num_input_tokens_seen": 15186560, + "step": 22565 + }, + { + "epoch": 0.5513888549581023, + "grad_norm": 0.7696697115898132, + "learning_rate": 1.9993574323810115e-06, + "loss": 0.0921, + "num_input_tokens_seen": 15189824, + "step": 22570 + }, + { + "epoch": 0.5515110057899494, + "grad_norm": 15.944690704345703, + "learning_rate": 1.999354372140202e-06, + "loss": 0.1304, + "num_input_tokens_seen": 15192960, + "step": 22575 + }, + { + "epoch": 0.5516331566217966, + "grad_norm": 28.77052879333496, + "learning_rate": 1.9993513046318186e-06, + "loss": 0.1192, + "num_input_tokens_seen": 15196544, + "step": 22580 + }, + { + "epoch": 0.5517553074536438, + "grad_norm": 34.70576858520508, + "learning_rate": 1.9993482298558836e-06, + "loss": 0.1632, + "num_input_tokens_seen": 15199552, + "step": 22585 + }, + { + "epoch": 0.551877458285491, + "grad_norm": 15.37161922454834, + "learning_rate": 1.99934514781242e-06, + "loss": 0.0365, + "num_input_tokens_seen": 15202880, + "step": 22590 + }, + { + "epoch": 0.5519996091173381, + "grad_norm": 1.150341272354126, + "learning_rate": 1.999342058501449e-06, + "loss": 0.1904, + "num_input_tokens_seen": 15205952, + "step": 22595 + }, + { + "epoch": 0.5521217599491852, + "grad_norm": 14.19490909576416, + "learning_rate": 1.999338961922994e-06, + "loss": 0.0517, + "num_input_tokens_seen": 15208896, + "step": 22600 + }, + { + "epoch": 0.5522439107810324, + "grad_norm": 10.007599830627441, + "learning_rate": 1.9993358580770774e-06, + "loss": 0.2134, + "num_input_tokens_seen": 15212224, + "step": 22605 + }, + { + "epoch": 0.5523660616128796, + "grad_norm": 13.70540714263916, + "learning_rate": 1.9993327469637215e-06, + "loss": 0.1293, + "num_input_tokens_seen": 15215360, + "step": 22610 + }, + { + "epoch": 0.5524882124447268, + "grad_norm": 0.5830227136611938, + "learning_rate": 1.9993296285829492e-06, + "loss": 0.0241, + "num_input_tokens_seen": 15219136, + "step": 22615 + }, + { + "epoch": 0.5526103632765739, + "grad_norm": 0.17814908921718597, + "learning_rate": 1.999326502934783e-06, + "loss": 0.1111, + "num_input_tokens_seen": 15222720, + "step": 22620 + }, + { + "epoch": 0.5527325141084211, + "grad_norm": 31.84588623046875, + "learning_rate": 1.9993233700192454e-06, + "loss": 0.1049, + "num_input_tokens_seen": 15225856, + "step": 22625 + }, + { + "epoch": 0.5528546649402682, + "grad_norm": 8.72294807434082, + "learning_rate": 1.99932022983636e-06, + "loss": 0.1601, + "num_input_tokens_seen": 15229120, + "step": 22630 + }, + { + "epoch": 0.5529768157721154, + "grad_norm": 30.907611846923828, + "learning_rate": 1.9993170823861488e-06, + "loss": 0.2518, + "num_input_tokens_seen": 15232384, + "step": 22635 + }, + { + "epoch": 0.5530989666039626, + "grad_norm": 0.607671856880188, + "learning_rate": 1.999313927668635e-06, + "loss": 0.0603, + "num_input_tokens_seen": 15235904, + "step": 22640 + }, + { + "epoch": 0.5532211174358097, + "grad_norm": 3.4030096530914307, + "learning_rate": 1.9993107656838415e-06, + "loss": 0.0919, + "num_input_tokens_seen": 15239296, + "step": 22645 + }, + { + "epoch": 0.5533432682676569, + "grad_norm": 7.613509178161621, + "learning_rate": 1.9993075964317912e-06, + "loss": 0.092, + "num_input_tokens_seen": 15242624, + "step": 22650 + }, + { + "epoch": 0.5534654190995041, + "grad_norm": 24.67089080810547, + "learning_rate": 1.999304419912508e-06, + "loss": 0.0941, + "num_input_tokens_seen": 15246464, + "step": 22655 + }, + { + "epoch": 0.5535875699313513, + "grad_norm": 83.90634155273438, + "learning_rate": 1.9993012361260134e-06, + "loss": 0.1296, + "num_input_tokens_seen": 15249984, + "step": 22660 + }, + { + "epoch": 0.5537097207631984, + "grad_norm": 23.177980422973633, + "learning_rate": 1.999298045072332e-06, + "loss": 0.1609, + "num_input_tokens_seen": 15253184, + "step": 22665 + }, + { + "epoch": 0.5538318715950455, + "grad_norm": 19.674943923950195, + "learning_rate": 1.999294846751486e-06, + "loss": 0.1717, + "num_input_tokens_seen": 15257280, + "step": 22670 + }, + { + "epoch": 0.5539540224268927, + "grad_norm": 61.09177780151367, + "learning_rate": 1.9992916411634995e-06, + "loss": 0.1045, + "num_input_tokens_seen": 15260288, + "step": 22675 + }, + { + "epoch": 0.5540761732587399, + "grad_norm": 0.37195536494255066, + "learning_rate": 1.9992884283083954e-06, + "loss": 0.0683, + "num_input_tokens_seen": 15263424, + "step": 22680 + }, + { + "epoch": 0.554198324090587, + "grad_norm": 21.757293701171875, + "learning_rate": 1.9992852081861967e-06, + "loss": 0.345, + "num_input_tokens_seen": 15267008, + "step": 22685 + }, + { + "epoch": 0.5543204749224342, + "grad_norm": 0.7218241095542908, + "learning_rate": 1.9992819807969275e-06, + "loss": 0.1971, + "num_input_tokens_seen": 15270208, + "step": 22690 + }, + { + "epoch": 0.5544426257542814, + "grad_norm": 17.101781845092773, + "learning_rate": 1.9992787461406107e-06, + "loss": 0.0953, + "num_input_tokens_seen": 15274048, + "step": 22695 + }, + { + "epoch": 0.5545647765861286, + "grad_norm": 36.492496490478516, + "learning_rate": 1.9992755042172705e-06, + "loss": 0.1193, + "num_input_tokens_seen": 15277760, + "step": 22700 + }, + { + "epoch": 0.5546869274179758, + "grad_norm": 1.468518614768982, + "learning_rate": 1.9992722550269296e-06, + "loss": 0.0082, + "num_input_tokens_seen": 15281664, + "step": 22705 + }, + { + "epoch": 0.5548090782498228, + "grad_norm": 33.85728454589844, + "learning_rate": 1.9992689985696123e-06, + "loss": 0.1653, + "num_input_tokens_seen": 15284928, + "step": 22710 + }, + { + "epoch": 0.55493122908167, + "grad_norm": 0.2482694536447525, + "learning_rate": 1.999265734845342e-06, + "loss": 0.0878, + "num_input_tokens_seen": 15288448, + "step": 22715 + }, + { + "epoch": 0.5550533799135172, + "grad_norm": 17.697750091552734, + "learning_rate": 1.9992624638541425e-06, + "loss": 0.2041, + "num_input_tokens_seen": 15291200, + "step": 22720 + }, + { + "epoch": 0.5551755307453644, + "grad_norm": 6.032927989959717, + "learning_rate": 1.9992591855960377e-06, + "loss": 0.1671, + "num_input_tokens_seen": 15294592, + "step": 22725 + }, + { + "epoch": 0.5552976815772115, + "grad_norm": 37.14913558959961, + "learning_rate": 1.9992559000710514e-06, + "loss": 0.1456, + "num_input_tokens_seen": 15297600, + "step": 22730 + }, + { + "epoch": 0.5554198324090587, + "grad_norm": 1.839289903640747, + "learning_rate": 1.9992526072792077e-06, + "loss": 0.1581, + "num_input_tokens_seen": 15300992, + "step": 22735 + }, + { + "epoch": 0.5555419832409059, + "grad_norm": 16.924457550048828, + "learning_rate": 1.9992493072205298e-06, + "loss": 0.2398, + "num_input_tokens_seen": 15304448, + "step": 22740 + }, + { + "epoch": 0.5556641340727531, + "grad_norm": 0.8010534048080444, + "learning_rate": 1.999245999895042e-06, + "loss": 0.1129, + "num_input_tokens_seen": 15307584, + "step": 22745 + }, + { + "epoch": 0.5557862849046002, + "grad_norm": 10.311168670654297, + "learning_rate": 1.999242685302769e-06, + "loss": 0.1458, + "num_input_tokens_seen": 15310848, + "step": 22750 + }, + { + "epoch": 0.5559084357364473, + "grad_norm": 18.564916610717773, + "learning_rate": 1.9992393634437343e-06, + "loss": 0.0751, + "num_input_tokens_seen": 15314624, + "step": 22755 + }, + { + "epoch": 0.5560305865682945, + "grad_norm": 0.27975115180015564, + "learning_rate": 1.999236034317962e-06, + "loss": 0.0323, + "num_input_tokens_seen": 15317888, + "step": 22760 + }, + { + "epoch": 0.5561527374001417, + "grad_norm": 1.1480165719985962, + "learning_rate": 1.9992326979254764e-06, + "loss": 0.1697, + "num_input_tokens_seen": 15321216, + "step": 22765 + }, + { + "epoch": 0.5562748882319889, + "grad_norm": 11.348422050476074, + "learning_rate": 1.9992293542663023e-06, + "loss": 0.097, + "num_input_tokens_seen": 15324416, + "step": 22770 + }, + { + "epoch": 0.556397039063836, + "grad_norm": 28.212505340576172, + "learning_rate": 1.999226003340463e-06, + "loss": 0.1083, + "num_input_tokens_seen": 15327936, + "step": 22775 + }, + { + "epoch": 0.5565191898956832, + "grad_norm": 16.56534194946289, + "learning_rate": 1.999222645147984e-06, + "loss": 0.1821, + "num_input_tokens_seen": 15331264, + "step": 22780 + }, + { + "epoch": 0.5566413407275304, + "grad_norm": 8.404296875, + "learning_rate": 1.999219279688889e-06, + "loss": 0.08, + "num_input_tokens_seen": 15334400, + "step": 22785 + }, + { + "epoch": 0.5567634915593775, + "grad_norm": 8.955206871032715, + "learning_rate": 1.999215906963203e-06, + "loss": 0.112, + "num_input_tokens_seen": 15337792, + "step": 22790 + }, + { + "epoch": 0.5568856423912247, + "grad_norm": 1.1889548301696777, + "learning_rate": 1.9992125269709494e-06, + "loss": 0.0274, + "num_input_tokens_seen": 15341504, + "step": 22795 + }, + { + "epoch": 0.5570077932230718, + "grad_norm": 12.320734977722168, + "learning_rate": 1.9992091397121536e-06, + "loss": 0.0542, + "num_input_tokens_seen": 15344704, + "step": 22800 + }, + { + "epoch": 0.557129944054919, + "grad_norm": 0.4388432204723358, + "learning_rate": 1.999205745186841e-06, + "loss": 0.1122, + "num_input_tokens_seen": 15347648, + "step": 22805 + }, + { + "epoch": 0.5572520948867662, + "grad_norm": 1.2065421342849731, + "learning_rate": 1.9992023433950346e-06, + "loss": 0.0918, + "num_input_tokens_seen": 15351360, + "step": 22810 + }, + { + "epoch": 0.5573742457186134, + "grad_norm": 0.21142229437828064, + "learning_rate": 1.9991989343367604e-06, + "loss": 0.2024, + "num_input_tokens_seen": 15354624, + "step": 22815 + }, + { + "epoch": 0.5574963965504605, + "grad_norm": 1.033294916152954, + "learning_rate": 1.9991955180120426e-06, + "loss": 0.1199, + "num_input_tokens_seen": 15357952, + "step": 22820 + }, + { + "epoch": 0.5576185473823076, + "grad_norm": 59.05493927001953, + "learning_rate": 1.9991920944209065e-06, + "loss": 0.1254, + "num_input_tokens_seen": 15361408, + "step": 22825 + }, + { + "epoch": 0.5577406982141548, + "grad_norm": 21.44487190246582, + "learning_rate": 1.9991886635633768e-06, + "loss": 0.1779, + "num_input_tokens_seen": 15364672, + "step": 22830 + }, + { + "epoch": 0.557862849046002, + "grad_norm": 2.3592638969421387, + "learning_rate": 1.9991852254394783e-06, + "loss": 0.146, + "num_input_tokens_seen": 15367936, + "step": 22835 + }, + { + "epoch": 0.5579849998778492, + "grad_norm": 30.120269775390625, + "learning_rate": 1.9991817800492357e-06, + "loss": 0.1583, + "num_input_tokens_seen": 15370880, + "step": 22840 + }, + { + "epoch": 0.5581071507096963, + "grad_norm": 21.18474578857422, + "learning_rate": 1.999178327392675e-06, + "loss": 0.0518, + "num_input_tokens_seen": 15374144, + "step": 22845 + }, + { + "epoch": 0.5582293015415435, + "grad_norm": 0.8629884719848633, + "learning_rate": 1.9991748674698202e-06, + "loss": 0.0418, + "num_input_tokens_seen": 15376960, + "step": 22850 + }, + { + "epoch": 0.5583514523733907, + "grad_norm": 9.767329216003418, + "learning_rate": 1.9991714002806977e-06, + "loss": 0.1065, + "num_input_tokens_seen": 15380608, + "step": 22855 + }, + { + "epoch": 0.5584736032052379, + "grad_norm": 11.950826644897461, + "learning_rate": 1.9991679258253314e-06, + "loss": 0.1124, + "num_input_tokens_seen": 15383872, + "step": 22860 + }, + { + "epoch": 0.558595754037085, + "grad_norm": 22.139690399169922, + "learning_rate": 1.9991644441037476e-06, + "loss": 0.1763, + "num_input_tokens_seen": 15387072, + "step": 22865 + }, + { + "epoch": 0.5587179048689321, + "grad_norm": 7.289351940155029, + "learning_rate": 1.9991609551159713e-06, + "loss": 0.0812, + "num_input_tokens_seen": 15390528, + "step": 22870 + }, + { + "epoch": 0.5588400557007793, + "grad_norm": 17.15496826171875, + "learning_rate": 1.9991574588620274e-06, + "loss": 0.3103, + "num_input_tokens_seen": 15393856, + "step": 22875 + }, + { + "epoch": 0.5589622065326265, + "grad_norm": 56.3012580871582, + "learning_rate": 1.999153955341942e-06, + "loss": 0.2112, + "num_input_tokens_seen": 15397440, + "step": 22880 + }, + { + "epoch": 0.5590843573644737, + "grad_norm": 2.765378475189209, + "learning_rate": 1.99915044455574e-06, + "loss": 0.0452, + "num_input_tokens_seen": 15400512, + "step": 22885 + }, + { + "epoch": 0.5592065081963208, + "grad_norm": 1.1748318672180176, + "learning_rate": 1.999146926503448e-06, + "loss": 0.1835, + "num_input_tokens_seen": 15403712, + "step": 22890 + }, + { + "epoch": 0.559328659028168, + "grad_norm": 29.10700225830078, + "learning_rate": 1.9991434011850897e-06, + "loss": 0.1538, + "num_input_tokens_seen": 15406976, + "step": 22895 + }, + { + "epoch": 0.5594508098600152, + "grad_norm": 43.47568893432617, + "learning_rate": 1.9991398686006927e-06, + "loss": 0.1319, + "num_input_tokens_seen": 15410432, + "step": 22900 + }, + { + "epoch": 0.5595729606918624, + "grad_norm": 20.069847106933594, + "learning_rate": 1.9991363287502816e-06, + "loss": 0.1377, + "num_input_tokens_seen": 15414016, + "step": 22905 + }, + { + "epoch": 0.5596951115237094, + "grad_norm": 9.078425407409668, + "learning_rate": 1.999132781633882e-06, + "loss": 0.2197, + "num_input_tokens_seen": 15417344, + "step": 22910 + }, + { + "epoch": 0.5598172623555566, + "grad_norm": 31.116849899291992, + "learning_rate": 1.9991292272515204e-06, + "loss": 0.1525, + "num_input_tokens_seen": 15421248, + "step": 22915 + }, + { + "epoch": 0.5599394131874038, + "grad_norm": 3.8848142623901367, + "learning_rate": 1.9991256656032224e-06, + "loss": 0.1281, + "num_input_tokens_seen": 15424320, + "step": 22920 + }, + { + "epoch": 0.560061564019251, + "grad_norm": 13.48864459991455, + "learning_rate": 1.999122096689014e-06, + "loss": 0.1147, + "num_input_tokens_seen": 15427264, + "step": 22925 + }, + { + "epoch": 0.5601837148510982, + "grad_norm": 20.86384391784668, + "learning_rate": 1.9991185205089206e-06, + "loss": 0.0943, + "num_input_tokens_seen": 15430912, + "step": 22930 + }, + { + "epoch": 0.5603058656829453, + "grad_norm": 23.013959884643555, + "learning_rate": 1.9991149370629684e-06, + "loss": 0.0882, + "num_input_tokens_seen": 15434496, + "step": 22935 + }, + { + "epoch": 0.5604280165147925, + "grad_norm": 2.0098519325256348, + "learning_rate": 1.999111346351184e-06, + "loss": 0.1102, + "num_input_tokens_seen": 15438016, + "step": 22940 + }, + { + "epoch": 0.5605501673466396, + "grad_norm": 13.733756065368652, + "learning_rate": 1.9991077483735934e-06, + "loss": 0.2042, + "num_input_tokens_seen": 15441216, + "step": 22945 + }, + { + "epoch": 0.5606723181784868, + "grad_norm": 8.212313652038574, + "learning_rate": 1.9991041431302224e-06, + "loss": 0.0349, + "num_input_tokens_seen": 15444544, + "step": 22950 + }, + { + "epoch": 0.5607944690103339, + "grad_norm": 33.92264175415039, + "learning_rate": 1.9991005306210967e-06, + "loss": 0.2182, + "num_input_tokens_seen": 15448256, + "step": 22955 + }, + { + "epoch": 0.5609166198421811, + "grad_norm": 9.712660789489746, + "learning_rate": 1.999096910846244e-06, + "loss": 0.1111, + "num_input_tokens_seen": 15451584, + "step": 22960 + }, + { + "epoch": 0.5610387706740283, + "grad_norm": 16.679542541503906, + "learning_rate": 1.999093283805689e-06, + "loss": 0.093, + "num_input_tokens_seen": 15455232, + "step": 22965 + }, + { + "epoch": 0.5611609215058755, + "grad_norm": 12.44924545288086, + "learning_rate": 1.99908964949946e-06, + "loss": 0.1077, + "num_input_tokens_seen": 15458432, + "step": 22970 + }, + { + "epoch": 0.5612830723377226, + "grad_norm": 1.9566651582717896, + "learning_rate": 1.9990860079275818e-06, + "loss": 0.2061, + "num_input_tokens_seen": 15461312, + "step": 22975 + }, + { + "epoch": 0.5614052231695698, + "grad_norm": 13.28664493560791, + "learning_rate": 1.9990823590900812e-06, + "loss": 0.0531, + "num_input_tokens_seen": 15465088, + "step": 22980 + }, + { + "epoch": 0.561527374001417, + "grad_norm": 1.9221934080123901, + "learning_rate": 1.9990787029869853e-06, + "loss": 0.1349, + "num_input_tokens_seen": 15468928, + "step": 22985 + }, + { + "epoch": 0.5616495248332641, + "grad_norm": 15.852862358093262, + "learning_rate": 1.9990750396183203e-06, + "loss": 0.113, + "num_input_tokens_seen": 15472512, + "step": 22990 + }, + { + "epoch": 0.5617716756651113, + "grad_norm": 29.440183639526367, + "learning_rate": 1.999071368984113e-06, + "loss": 0.0624, + "num_input_tokens_seen": 15476096, + "step": 22995 + }, + { + "epoch": 0.5618938264969584, + "grad_norm": 30.857269287109375, + "learning_rate": 1.9990676910843897e-06, + "loss": 0.1023, + "num_input_tokens_seen": 15479168, + "step": 23000 + }, + { + "epoch": 0.5620159773288056, + "grad_norm": 4.187375068664551, + "learning_rate": 1.9990640059191775e-06, + "loss": 0.0564, + "num_input_tokens_seen": 15483072, + "step": 23005 + }, + { + "epoch": 0.5621381281606528, + "grad_norm": 0.8417240381240845, + "learning_rate": 1.999060313488503e-06, + "loss": 0.0212, + "num_input_tokens_seen": 15487360, + "step": 23010 + }, + { + "epoch": 0.5622602789925, + "grad_norm": 36.2202033996582, + "learning_rate": 1.9990566137923935e-06, + "loss": 0.1005, + "num_input_tokens_seen": 15491072, + "step": 23015 + }, + { + "epoch": 0.562382429824347, + "grad_norm": 1.2734776735305786, + "learning_rate": 1.9990529068308755e-06, + "loss": 0.1663, + "num_input_tokens_seen": 15494336, + "step": 23020 + }, + { + "epoch": 0.5625045806561942, + "grad_norm": 23.2691593170166, + "learning_rate": 1.999049192603976e-06, + "loss": 0.0967, + "num_input_tokens_seen": 15497920, + "step": 23025 + }, + { + "epoch": 0.5626267314880414, + "grad_norm": 40.35521697998047, + "learning_rate": 1.999045471111722e-06, + "loss": 0.0999, + "num_input_tokens_seen": 15501184, + "step": 23030 + }, + { + "epoch": 0.5627488823198886, + "grad_norm": 2.3825042247772217, + "learning_rate": 1.999041742354141e-06, + "loss": 0.128, + "num_input_tokens_seen": 15505280, + "step": 23035 + }, + { + "epoch": 0.5628710331517358, + "grad_norm": 11.271585464477539, + "learning_rate": 1.9990380063312596e-06, + "loss": 0.1172, + "num_input_tokens_seen": 15508864, + "step": 23040 + }, + { + "epoch": 0.5629931839835829, + "grad_norm": 29.12808609008789, + "learning_rate": 1.999034263043105e-06, + "loss": 0.2176, + "num_input_tokens_seen": 15511808, + "step": 23045 + }, + { + "epoch": 0.5631153348154301, + "grad_norm": 37.746665954589844, + "learning_rate": 1.999030512489704e-06, + "loss": 0.0788, + "num_input_tokens_seen": 15515072, + "step": 23050 + }, + { + "epoch": 0.5632374856472773, + "grad_norm": 13.444511413574219, + "learning_rate": 1.9990267546710853e-06, + "loss": 0.0788, + "num_input_tokens_seen": 15518336, + "step": 23055 + }, + { + "epoch": 0.5633596364791245, + "grad_norm": 28.247379302978516, + "learning_rate": 1.9990229895872747e-06, + "loss": 0.0714, + "num_input_tokens_seen": 15521344, + "step": 23060 + }, + { + "epoch": 0.5634817873109715, + "grad_norm": 34.90865707397461, + "learning_rate": 1.9990192172383004e-06, + "loss": 0.1301, + "num_input_tokens_seen": 15524672, + "step": 23065 + }, + { + "epoch": 0.5636039381428187, + "grad_norm": 1.4494937658309937, + "learning_rate": 1.99901543762419e-06, + "loss": 0.1546, + "num_input_tokens_seen": 15528384, + "step": 23070 + }, + { + "epoch": 0.5637260889746659, + "grad_norm": 0.2677226960659027, + "learning_rate": 1.99901165074497e-06, + "loss": 0.0498, + "num_input_tokens_seen": 15531456, + "step": 23075 + }, + { + "epoch": 0.5638482398065131, + "grad_norm": 57.566551208496094, + "learning_rate": 1.999007856600669e-06, + "loss": 0.0804, + "num_input_tokens_seen": 15534656, + "step": 23080 + }, + { + "epoch": 0.5639703906383603, + "grad_norm": 0.6210819482803345, + "learning_rate": 1.999004055191314e-06, + "loss": 0.0787, + "num_input_tokens_seen": 15537664, + "step": 23085 + }, + { + "epoch": 0.5640925414702074, + "grad_norm": 60.58045196533203, + "learning_rate": 1.9990002465169333e-06, + "loss": 0.1669, + "num_input_tokens_seen": 15541056, + "step": 23090 + }, + { + "epoch": 0.5642146923020546, + "grad_norm": 25.531545639038086, + "learning_rate": 1.9989964305775535e-06, + "loss": 0.116, + "num_input_tokens_seen": 15545024, + "step": 23095 + }, + { + "epoch": 0.5643368431339018, + "grad_norm": 0.1737961322069168, + "learning_rate": 1.998992607373203e-06, + "loss": 0.1408, + "num_input_tokens_seen": 15548608, + "step": 23100 + }, + { + "epoch": 0.564458993965749, + "grad_norm": 5.435920715332031, + "learning_rate": 1.9989887769039097e-06, + "loss": 0.0136, + "num_input_tokens_seen": 15551680, + "step": 23105 + }, + { + "epoch": 0.564581144797596, + "grad_norm": 21.51247787475586, + "learning_rate": 1.9989849391697013e-06, + "loss": 0.1633, + "num_input_tokens_seen": 15555008, + "step": 23110 + }, + { + "epoch": 0.5647032956294432, + "grad_norm": 40.0219612121582, + "learning_rate": 1.9989810941706056e-06, + "loss": 0.2074, + "num_input_tokens_seen": 15558720, + "step": 23115 + }, + { + "epoch": 0.5648254464612904, + "grad_norm": 0.7361969947814941, + "learning_rate": 1.998977241906651e-06, + "loss": 0.0749, + "num_input_tokens_seen": 15562048, + "step": 23120 + }, + { + "epoch": 0.5649475972931376, + "grad_norm": 21.490753173828125, + "learning_rate": 1.9989733823778653e-06, + "loss": 0.2976, + "num_input_tokens_seen": 15564992, + "step": 23125 + }, + { + "epoch": 0.5650697481249848, + "grad_norm": 7.7838454246521, + "learning_rate": 1.998969515584276e-06, + "loss": 0.1892, + "num_input_tokens_seen": 15569152, + "step": 23130 + }, + { + "epoch": 0.5651918989568319, + "grad_norm": 25.361003875732422, + "learning_rate": 1.9989656415259118e-06, + "loss": 0.2072, + "num_input_tokens_seen": 15572928, + "step": 23135 + }, + { + "epoch": 0.565314049788679, + "grad_norm": 4.220102310180664, + "learning_rate": 1.998961760202801e-06, + "loss": 0.1046, + "num_input_tokens_seen": 15576256, + "step": 23140 + }, + { + "epoch": 0.5654362006205262, + "grad_norm": 14.987222671508789, + "learning_rate": 1.9989578716149713e-06, + "loss": 0.213, + "num_input_tokens_seen": 15579072, + "step": 23145 + }, + { + "epoch": 0.5655583514523734, + "grad_norm": 1.6472913026809692, + "learning_rate": 1.9989539757624515e-06, + "loss": 0.0259, + "num_input_tokens_seen": 15582912, + "step": 23150 + }, + { + "epoch": 0.5656805022842205, + "grad_norm": 29.221179962158203, + "learning_rate": 1.9989500726452697e-06, + "loss": 0.1362, + "num_input_tokens_seen": 15586496, + "step": 23155 + }, + { + "epoch": 0.5658026531160677, + "grad_norm": 0.8034424185752869, + "learning_rate": 1.9989461622634543e-06, + "loss": 0.0701, + "num_input_tokens_seen": 15589632, + "step": 23160 + }, + { + "epoch": 0.5659248039479149, + "grad_norm": 6.2514238357543945, + "learning_rate": 1.998942244617034e-06, + "loss": 0.0172, + "num_input_tokens_seen": 15593088, + "step": 23165 + }, + { + "epoch": 0.5660469547797621, + "grad_norm": 12.599471092224121, + "learning_rate": 1.998938319706036e-06, + "loss": 0.2101, + "num_input_tokens_seen": 15596352, + "step": 23170 + }, + { + "epoch": 0.5661691056116093, + "grad_norm": 0.26789960265159607, + "learning_rate": 1.9989343875304908e-06, + "loss": 0.2097, + "num_input_tokens_seen": 15600064, + "step": 23175 + }, + { + "epoch": 0.5662912564434563, + "grad_norm": 0.5412006378173828, + "learning_rate": 1.998930448090426e-06, + "loss": 0.0918, + "num_input_tokens_seen": 15603520, + "step": 23180 + }, + { + "epoch": 0.5664134072753035, + "grad_norm": 0.15992608666419983, + "learning_rate": 1.99892650138587e-06, + "loss": 0.0316, + "num_input_tokens_seen": 15607040, + "step": 23185 + }, + { + "epoch": 0.5665355581071507, + "grad_norm": 2.5661678314208984, + "learning_rate": 1.998922547416852e-06, + "loss": 0.082, + "num_input_tokens_seen": 15610368, + "step": 23190 + }, + { + "epoch": 0.5666577089389979, + "grad_norm": 33.32538604736328, + "learning_rate": 1.9989185861834003e-06, + "loss": 0.1905, + "num_input_tokens_seen": 15613696, + "step": 23195 + }, + { + "epoch": 0.566779859770845, + "grad_norm": 2.2595510482788086, + "learning_rate": 1.998914617685544e-06, + "loss": 0.0609, + "num_input_tokens_seen": 15617408, + "step": 23200 + }, + { + "epoch": 0.5669020106026922, + "grad_norm": 55.58548355102539, + "learning_rate": 1.998910641923312e-06, + "loss": 0.1334, + "num_input_tokens_seen": 15620480, + "step": 23205 + }, + { + "epoch": 0.5670241614345394, + "grad_norm": 21.146512985229492, + "learning_rate": 1.9989066588967333e-06, + "loss": 0.1426, + "num_input_tokens_seen": 15623424, + "step": 23210 + }, + { + "epoch": 0.5671463122663866, + "grad_norm": 41.37771987915039, + "learning_rate": 1.9989026686058365e-06, + "loss": 0.1113, + "num_input_tokens_seen": 15626624, + "step": 23215 + }, + { + "epoch": 0.5672684630982336, + "grad_norm": 31.741256713867188, + "learning_rate": 1.998898671050651e-06, + "loss": 0.0721, + "num_input_tokens_seen": 15630144, + "step": 23220 + }, + { + "epoch": 0.5673906139300808, + "grad_norm": 0.1878584921360016, + "learning_rate": 1.9988946662312052e-06, + "loss": 0.1699, + "num_input_tokens_seen": 15633472, + "step": 23225 + }, + { + "epoch": 0.567512764761928, + "grad_norm": 33.57866287231445, + "learning_rate": 1.9988906541475292e-06, + "loss": 0.2271, + "num_input_tokens_seen": 15636928, + "step": 23230 + }, + { + "epoch": 0.5676349155937752, + "grad_norm": 10.572066307067871, + "learning_rate": 1.9988866347996517e-06, + "loss": 0.144, + "num_input_tokens_seen": 15640256, + "step": 23235 + }, + { + "epoch": 0.5677570664256224, + "grad_norm": 1.2487159967422485, + "learning_rate": 1.9988826081876018e-06, + "loss": 0.0924, + "num_input_tokens_seen": 15643456, + "step": 23240 + }, + { + "epoch": 0.5678792172574695, + "grad_norm": 22.059751510620117, + "learning_rate": 1.9988785743114087e-06, + "loss": 0.1721, + "num_input_tokens_seen": 15646400, + "step": 23245 + }, + { + "epoch": 0.5680013680893167, + "grad_norm": 35.92079162597656, + "learning_rate": 1.9988745331711022e-06, + "loss": 0.0163, + "num_input_tokens_seen": 15649472, + "step": 23250 + }, + { + "epoch": 0.5681235189211639, + "grad_norm": 0.7913476228713989, + "learning_rate": 1.9988704847667115e-06, + "loss": 0.062, + "num_input_tokens_seen": 15653120, + "step": 23255 + }, + { + "epoch": 0.568245669753011, + "grad_norm": 1.6168322563171387, + "learning_rate": 1.9988664290982657e-06, + "loss": 0.108, + "num_input_tokens_seen": 15657088, + "step": 23260 + }, + { + "epoch": 0.5683678205848581, + "grad_norm": 46.63363265991211, + "learning_rate": 1.998862366165795e-06, + "loss": 0.2226, + "num_input_tokens_seen": 15660288, + "step": 23265 + }, + { + "epoch": 0.5684899714167053, + "grad_norm": 33.23601150512695, + "learning_rate": 1.998858295969328e-06, + "loss": 0.1452, + "num_input_tokens_seen": 15663424, + "step": 23270 + }, + { + "epoch": 0.5686121222485525, + "grad_norm": 2.1423802375793457, + "learning_rate": 1.998854218508895e-06, + "loss": 0.0867, + "num_input_tokens_seen": 15666496, + "step": 23275 + }, + { + "epoch": 0.5687342730803997, + "grad_norm": 0.09010536223649979, + "learning_rate": 1.9988501337845256e-06, + "loss": 0.0866, + "num_input_tokens_seen": 15670272, + "step": 23280 + }, + { + "epoch": 0.5688564239122469, + "grad_norm": 2.580996036529541, + "learning_rate": 1.9988460417962494e-06, + "loss": 0.0921, + "num_input_tokens_seen": 15673600, + "step": 23285 + }, + { + "epoch": 0.568978574744094, + "grad_norm": 26.938867568969727, + "learning_rate": 1.998841942544096e-06, + "loss": 0.0575, + "num_input_tokens_seen": 15676864, + "step": 23290 + }, + { + "epoch": 0.5691007255759412, + "grad_norm": 66.43077850341797, + "learning_rate": 1.9988378360280955e-06, + "loss": 0.1449, + "num_input_tokens_seen": 15680320, + "step": 23295 + }, + { + "epoch": 0.5692228764077883, + "grad_norm": 7.964977264404297, + "learning_rate": 1.9988337222482776e-06, + "loss": 0.198, + "num_input_tokens_seen": 15683328, + "step": 23300 + }, + { + "epoch": 0.5693450272396355, + "grad_norm": 37.287174224853516, + "learning_rate": 1.998829601204672e-06, + "loss": 0.1606, + "num_input_tokens_seen": 15686656, + "step": 23305 + }, + { + "epoch": 0.5694671780714826, + "grad_norm": 1.1268587112426758, + "learning_rate": 1.998825472897309e-06, + "loss": 0.0729, + "num_input_tokens_seen": 15690368, + "step": 23310 + }, + { + "epoch": 0.5695893289033298, + "grad_norm": 2.2257397174835205, + "learning_rate": 1.9988213373262183e-06, + "loss": 0.1514, + "num_input_tokens_seen": 15693504, + "step": 23315 + }, + { + "epoch": 0.569711479735177, + "grad_norm": 1.2565116882324219, + "learning_rate": 1.9988171944914305e-06, + "loss": 0.0697, + "num_input_tokens_seen": 15696640, + "step": 23320 + }, + { + "epoch": 0.5698336305670242, + "grad_norm": 70.08368682861328, + "learning_rate": 1.998813044392975e-06, + "loss": 0.2161, + "num_input_tokens_seen": 15700096, + "step": 23325 + }, + { + "epoch": 0.5699557813988714, + "grad_norm": 8.830229759216309, + "learning_rate": 1.9988088870308824e-06, + "loss": 0.0429, + "num_input_tokens_seen": 15703424, + "step": 23330 + }, + { + "epoch": 0.5700779322307185, + "grad_norm": 76.5599136352539, + "learning_rate": 1.9988047224051835e-06, + "loss": 0.208, + "num_input_tokens_seen": 15707008, + "step": 23335 + }, + { + "epoch": 0.5702000830625656, + "grad_norm": 9.068888664245605, + "learning_rate": 1.9988005505159078e-06, + "loss": 0.1379, + "num_input_tokens_seen": 15710592, + "step": 23340 + }, + { + "epoch": 0.5703222338944128, + "grad_norm": 45.0673942565918, + "learning_rate": 1.9987963713630856e-06, + "loss": 0.4155, + "num_input_tokens_seen": 15714304, + "step": 23345 + }, + { + "epoch": 0.57044438472626, + "grad_norm": 36.7464714050293, + "learning_rate": 1.9987921849467476e-06, + "loss": 0.2139, + "num_input_tokens_seen": 15717824, + "step": 23350 + }, + { + "epoch": 0.5705665355581071, + "grad_norm": 0.4737611711025238, + "learning_rate": 1.998787991266924e-06, + "loss": 0.0858, + "num_input_tokens_seen": 15721216, + "step": 23355 + }, + { + "epoch": 0.5706886863899543, + "grad_norm": 21.547607421875, + "learning_rate": 1.998783790323646e-06, + "loss": 0.1141, + "num_input_tokens_seen": 15724352, + "step": 23360 + }, + { + "epoch": 0.5708108372218015, + "grad_norm": 12.531356811523438, + "learning_rate": 1.998779582116943e-06, + "loss": 0.107, + "num_input_tokens_seen": 15727808, + "step": 23365 + }, + { + "epoch": 0.5709329880536487, + "grad_norm": 5.0288872718811035, + "learning_rate": 1.9987753666468473e-06, + "loss": 0.1105, + "num_input_tokens_seen": 15730752, + "step": 23370 + }, + { + "epoch": 0.5710551388854959, + "grad_norm": 0.6317335963249207, + "learning_rate": 1.9987711439133877e-06, + "loss": 0.0375, + "num_input_tokens_seen": 15734016, + "step": 23375 + }, + { + "epoch": 0.5711772897173429, + "grad_norm": 29.76620864868164, + "learning_rate": 1.9987669139165955e-06, + "loss": 0.1535, + "num_input_tokens_seen": 15737664, + "step": 23380 + }, + { + "epoch": 0.5712994405491901, + "grad_norm": 0.38784685730934143, + "learning_rate": 1.998762676656502e-06, + "loss": 0.07, + "num_input_tokens_seen": 15741120, + "step": 23385 + }, + { + "epoch": 0.5714215913810373, + "grad_norm": 26.450305938720703, + "learning_rate": 1.9987584321331377e-06, + "loss": 0.1223, + "num_input_tokens_seen": 15744320, + "step": 23390 + }, + { + "epoch": 0.5715437422128845, + "grad_norm": 15.62353229522705, + "learning_rate": 1.9987541803465335e-06, + "loss": 0.0944, + "num_input_tokens_seen": 15747776, + "step": 23395 + }, + { + "epoch": 0.5716658930447316, + "grad_norm": 0.3525586426258087, + "learning_rate": 1.9987499212967205e-06, + "loss": 0.0861, + "num_input_tokens_seen": 15751552, + "step": 23400 + }, + { + "epoch": 0.5717880438765788, + "grad_norm": 0.8962100148200989, + "learning_rate": 1.998745654983729e-06, + "loss": 0.1516, + "num_input_tokens_seen": 15754752, + "step": 23405 + }, + { + "epoch": 0.571910194708426, + "grad_norm": 3.1474618911743164, + "learning_rate": 1.9987413814075907e-06, + "loss": 0.1224, + "num_input_tokens_seen": 15757888, + "step": 23410 + }, + { + "epoch": 0.5720323455402732, + "grad_norm": 0.26637038588523865, + "learning_rate": 1.998737100568336e-06, + "loss": 0.0904, + "num_input_tokens_seen": 15761152, + "step": 23415 + }, + { + "epoch": 0.5721544963721203, + "grad_norm": 16.29987144470215, + "learning_rate": 1.998732812465997e-06, + "loss": 0.1148, + "num_input_tokens_seen": 15764672, + "step": 23420 + }, + { + "epoch": 0.5722766472039674, + "grad_norm": 0.1825859397649765, + "learning_rate": 1.9987285171006042e-06, + "loss": 0.0455, + "num_input_tokens_seen": 15768064, + "step": 23425 + }, + { + "epoch": 0.5723987980358146, + "grad_norm": 12.438610076904297, + "learning_rate": 1.998724214472189e-06, + "loss": 0.1486, + "num_input_tokens_seen": 15771328, + "step": 23430 + }, + { + "epoch": 0.5725209488676618, + "grad_norm": 12.719995498657227, + "learning_rate": 1.9987199045807823e-06, + "loss": 0.2083, + "num_input_tokens_seen": 15774528, + "step": 23435 + }, + { + "epoch": 0.572643099699509, + "grad_norm": 34.43893814086914, + "learning_rate": 1.9987155874264166e-06, + "loss": 0.1026, + "num_input_tokens_seen": 15777728, + "step": 23440 + }, + { + "epoch": 0.5727652505313561, + "grad_norm": 8.031460762023926, + "learning_rate": 1.998711263009122e-06, + "loss": 0.0887, + "num_input_tokens_seen": 15781120, + "step": 23445 + }, + { + "epoch": 0.5728874013632033, + "grad_norm": 36.22572326660156, + "learning_rate": 1.9987069313289307e-06, + "loss": 0.2051, + "num_input_tokens_seen": 15784320, + "step": 23450 + }, + { + "epoch": 0.5730095521950505, + "grad_norm": 0.5932744741439819, + "learning_rate": 1.9987025923858736e-06, + "loss": 0.2077, + "num_input_tokens_seen": 15787584, + "step": 23455 + }, + { + "epoch": 0.5731317030268976, + "grad_norm": 26.168432235717773, + "learning_rate": 1.998698246179983e-06, + "loss": 0.3206, + "num_input_tokens_seen": 15790976, + "step": 23460 + }, + { + "epoch": 0.5732538538587448, + "grad_norm": 26.840835571289062, + "learning_rate": 1.9986938927112903e-06, + "loss": 0.1039, + "num_input_tokens_seen": 15794368, + "step": 23465 + }, + { + "epoch": 0.5733760046905919, + "grad_norm": 6.414973735809326, + "learning_rate": 1.998689531979827e-06, + "loss": 0.0355, + "num_input_tokens_seen": 15797952, + "step": 23470 + }, + { + "epoch": 0.5734981555224391, + "grad_norm": 12.151657104492188, + "learning_rate": 1.998685163985624e-06, + "loss": 0.1528, + "num_input_tokens_seen": 15800960, + "step": 23475 + }, + { + "epoch": 0.5736203063542863, + "grad_norm": 2.4973652362823486, + "learning_rate": 1.9986807887287145e-06, + "loss": 0.0738, + "num_input_tokens_seen": 15805248, + "step": 23480 + }, + { + "epoch": 0.5737424571861335, + "grad_norm": 8.367063522338867, + "learning_rate": 1.99867640620913e-06, + "loss": 0.0482, + "num_input_tokens_seen": 15808384, + "step": 23485 + }, + { + "epoch": 0.5738646080179806, + "grad_norm": 10.651350975036621, + "learning_rate": 1.9986720164269014e-06, + "loss": 0.1362, + "num_input_tokens_seen": 15811392, + "step": 23490 + }, + { + "epoch": 0.5739867588498277, + "grad_norm": 21.571401596069336, + "learning_rate": 1.998667619382062e-06, + "loss": 0.195, + "num_input_tokens_seen": 15814784, + "step": 23495 + }, + { + "epoch": 0.5741089096816749, + "grad_norm": 32.89027404785156, + "learning_rate": 1.998663215074642e-06, + "loss": 0.1152, + "num_input_tokens_seen": 15818304, + "step": 23500 + }, + { + "epoch": 0.5742310605135221, + "grad_norm": 18.197383880615234, + "learning_rate": 1.9986588035046755e-06, + "loss": 0.1916, + "num_input_tokens_seen": 15821632, + "step": 23505 + }, + { + "epoch": 0.5743532113453692, + "grad_norm": 16.271366119384766, + "learning_rate": 1.998654384672193e-06, + "loss": 0.1102, + "num_input_tokens_seen": 15825280, + "step": 23510 + }, + { + "epoch": 0.5744753621772164, + "grad_norm": 20.66217041015625, + "learning_rate": 1.9986499585772275e-06, + "loss": 0.2022, + "num_input_tokens_seen": 15828672, + "step": 23515 + }, + { + "epoch": 0.5745975130090636, + "grad_norm": 12.838661193847656, + "learning_rate": 1.998645525219811e-06, + "loss": 0.1016, + "num_input_tokens_seen": 15831808, + "step": 23520 + }, + { + "epoch": 0.5747196638409108, + "grad_norm": 27.447620391845703, + "learning_rate": 1.9986410845999752e-06, + "loss": 0.2264, + "num_input_tokens_seen": 15835072, + "step": 23525 + }, + { + "epoch": 0.574841814672758, + "grad_norm": 20.3715763092041, + "learning_rate": 1.998636636717753e-06, + "loss": 0.0441, + "num_input_tokens_seen": 15838144, + "step": 23530 + }, + { + "epoch": 0.574963965504605, + "grad_norm": 16.542808532714844, + "learning_rate": 1.9986321815731766e-06, + "loss": 0.1378, + "num_input_tokens_seen": 15841728, + "step": 23535 + }, + { + "epoch": 0.5750861163364522, + "grad_norm": 23.26079559326172, + "learning_rate": 1.998627719166278e-06, + "loss": 0.0454, + "num_input_tokens_seen": 15845504, + "step": 23540 + }, + { + "epoch": 0.5752082671682994, + "grad_norm": 16.223146438598633, + "learning_rate": 1.9986232494970908e-06, + "loss": 0.0466, + "num_input_tokens_seen": 15849600, + "step": 23545 + }, + { + "epoch": 0.5753304180001466, + "grad_norm": 28.47535514831543, + "learning_rate": 1.9986187725656466e-06, + "loss": 0.0798, + "num_input_tokens_seen": 15852736, + "step": 23550 + }, + { + "epoch": 0.5754525688319937, + "grad_norm": 21.106651306152344, + "learning_rate": 1.9986142883719774e-06, + "loss": 0.1278, + "num_input_tokens_seen": 15856064, + "step": 23555 + }, + { + "epoch": 0.5755747196638409, + "grad_norm": 0.2558057904243469, + "learning_rate": 1.998609796916117e-06, + "loss": 0.063, + "num_input_tokens_seen": 15859648, + "step": 23560 + }, + { + "epoch": 0.5756968704956881, + "grad_norm": 24.275699615478516, + "learning_rate": 1.998605298198098e-06, + "loss": 0.0897, + "num_input_tokens_seen": 15862976, + "step": 23565 + }, + { + "epoch": 0.5758190213275353, + "grad_norm": 14.609818458557129, + "learning_rate": 1.9986007922179523e-06, + "loss": 0.0718, + "num_input_tokens_seen": 15866496, + "step": 23570 + }, + { + "epoch": 0.5759411721593825, + "grad_norm": 0.0939621701836586, + "learning_rate": 1.9985962789757126e-06, + "loss": 0.101, + "num_input_tokens_seen": 15869888, + "step": 23575 + }, + { + "epoch": 0.5760633229912295, + "grad_norm": 3.2896945476531982, + "learning_rate": 1.9985917584714126e-06, + "loss": 0.0232, + "num_input_tokens_seen": 15872960, + "step": 23580 + }, + { + "epoch": 0.5761854738230767, + "grad_norm": 4.536641597747803, + "learning_rate": 1.998587230705085e-06, + "loss": 0.1324, + "num_input_tokens_seen": 15876160, + "step": 23585 + }, + { + "epoch": 0.5763076246549239, + "grad_norm": 0.8103224635124207, + "learning_rate": 1.9985826956767618e-06, + "loss": 0.0985, + "num_input_tokens_seen": 15879360, + "step": 23590 + }, + { + "epoch": 0.5764297754867711, + "grad_norm": 24.116247177124023, + "learning_rate": 1.998578153386477e-06, + "loss": 0.1614, + "num_input_tokens_seen": 15883008, + "step": 23595 + }, + { + "epoch": 0.5765519263186182, + "grad_norm": 0.26677149534225464, + "learning_rate": 1.9985736038342634e-06, + "loss": 0.0492, + "num_input_tokens_seen": 15886400, + "step": 23600 + }, + { + "epoch": 0.5766740771504654, + "grad_norm": 16.559232711791992, + "learning_rate": 1.9985690470201537e-06, + "loss": 0.0798, + "num_input_tokens_seen": 15889920, + "step": 23605 + }, + { + "epoch": 0.5767962279823126, + "grad_norm": 45.85171127319336, + "learning_rate": 1.9985644829441816e-06, + "loss": 0.1079, + "num_input_tokens_seen": 15893248, + "step": 23610 + }, + { + "epoch": 0.5769183788141597, + "grad_norm": 20.431299209594727, + "learning_rate": 1.9985599116063796e-06, + "loss": 0.1202, + "num_input_tokens_seen": 15896000, + "step": 23615 + }, + { + "epoch": 0.5770405296460069, + "grad_norm": 14.595911979675293, + "learning_rate": 1.9985553330067816e-06, + "loss": 0.1365, + "num_input_tokens_seen": 15901504, + "step": 23620 + }, + { + "epoch": 0.577162680477854, + "grad_norm": 49.17565155029297, + "learning_rate": 1.9985507471454207e-06, + "loss": 0.1305, + "num_input_tokens_seen": 15904832, + "step": 23625 + }, + { + "epoch": 0.5772848313097012, + "grad_norm": 14.491154670715332, + "learning_rate": 1.9985461540223303e-06, + "loss": 0.212, + "num_input_tokens_seen": 15908224, + "step": 23630 + }, + { + "epoch": 0.5774069821415484, + "grad_norm": 25.94092559814453, + "learning_rate": 1.9985415536375434e-06, + "loss": 0.1236, + "num_input_tokens_seen": 15911744, + "step": 23635 + }, + { + "epoch": 0.5775291329733956, + "grad_norm": 12.70712947845459, + "learning_rate": 1.998536945991094e-06, + "loss": 0.3236, + "num_input_tokens_seen": 15914816, + "step": 23640 + }, + { + "epoch": 0.5776512838052427, + "grad_norm": 35.25370407104492, + "learning_rate": 1.9985323310830152e-06, + "loss": 0.0493, + "num_input_tokens_seen": 15917888, + "step": 23645 + }, + { + "epoch": 0.5777734346370899, + "grad_norm": 43.0831413269043, + "learning_rate": 1.9985277089133405e-06, + "loss": 0.094, + "num_input_tokens_seen": 15921536, + "step": 23650 + }, + { + "epoch": 0.577895585468937, + "grad_norm": 21.820783615112305, + "learning_rate": 1.998523079482104e-06, + "loss": 0.0982, + "num_input_tokens_seen": 15924800, + "step": 23655 + }, + { + "epoch": 0.5780177363007842, + "grad_norm": 10.404958724975586, + "learning_rate": 1.998518442789339e-06, + "loss": 0.2915, + "num_input_tokens_seen": 15928000, + "step": 23660 + }, + { + "epoch": 0.5781398871326314, + "grad_norm": 1.6005030870437622, + "learning_rate": 1.9985137988350795e-06, + "loss": 0.2105, + "num_input_tokens_seen": 15931328, + "step": 23665 + }, + { + "epoch": 0.5782620379644785, + "grad_norm": 2.767606258392334, + "learning_rate": 1.998509147619359e-06, + "loss": 0.0554, + "num_input_tokens_seen": 15934592, + "step": 23670 + }, + { + "epoch": 0.5783841887963257, + "grad_norm": 34.56588363647461, + "learning_rate": 1.998504489142211e-06, + "loss": 0.1021, + "num_input_tokens_seen": 15937984, + "step": 23675 + }, + { + "epoch": 0.5785063396281729, + "grad_norm": 11.574126243591309, + "learning_rate": 1.9984998234036704e-06, + "loss": 0.08, + "num_input_tokens_seen": 15941568, + "step": 23680 + }, + { + "epoch": 0.5786284904600201, + "grad_norm": 64.17758178710938, + "learning_rate": 1.9984951504037704e-06, + "loss": 0.1458, + "num_input_tokens_seen": 15945280, + "step": 23685 + }, + { + "epoch": 0.5787506412918672, + "grad_norm": 10.47195053100586, + "learning_rate": 1.998490470142545e-06, + "loss": 0.0967, + "num_input_tokens_seen": 15948288, + "step": 23690 + }, + { + "epoch": 0.5788727921237143, + "grad_norm": 33.58171844482422, + "learning_rate": 1.9984857826200284e-06, + "loss": 0.1066, + "num_input_tokens_seen": 15952064, + "step": 23695 + }, + { + "epoch": 0.5789949429555615, + "grad_norm": 25.143238067626953, + "learning_rate": 1.998481087836254e-06, + "loss": 0.2611, + "num_input_tokens_seen": 15956032, + "step": 23700 + }, + { + "epoch": 0.5791170937874087, + "grad_norm": 14.816621780395508, + "learning_rate": 1.9984763857912573e-06, + "loss": 0.1465, + "num_input_tokens_seen": 15959360, + "step": 23705 + }, + { + "epoch": 0.5792392446192559, + "grad_norm": 9.596741676330566, + "learning_rate": 1.998471676485072e-06, + "loss": 0.0293, + "num_input_tokens_seen": 15962752, + "step": 23710 + }, + { + "epoch": 0.579361395451103, + "grad_norm": 24.233652114868164, + "learning_rate": 1.9984669599177315e-06, + "loss": 0.1096, + "num_input_tokens_seen": 15965888, + "step": 23715 + }, + { + "epoch": 0.5794835462829502, + "grad_norm": 26.97178077697754, + "learning_rate": 1.9984622360892707e-06, + "loss": 0.2157, + "num_input_tokens_seen": 15969216, + "step": 23720 + }, + { + "epoch": 0.5796056971147974, + "grad_norm": 27.27730941772461, + "learning_rate": 1.998457504999724e-06, + "loss": 0.1427, + "num_input_tokens_seen": 15972672, + "step": 23725 + }, + { + "epoch": 0.5797278479466446, + "grad_norm": 24.768224716186523, + "learning_rate": 1.9984527666491262e-06, + "loss": 0.0804, + "num_input_tokens_seen": 15976320, + "step": 23730 + }, + { + "epoch": 0.5798499987784916, + "grad_norm": 32.22639465332031, + "learning_rate": 1.998448021037511e-06, + "loss": 0.1042, + "num_input_tokens_seen": 15979392, + "step": 23735 + }, + { + "epoch": 0.5799721496103388, + "grad_norm": 33.8211669921875, + "learning_rate": 1.998443268164913e-06, + "loss": 0.141, + "num_input_tokens_seen": 15982784, + "step": 23740 + }, + { + "epoch": 0.580094300442186, + "grad_norm": 2.913667678833008, + "learning_rate": 1.998438508031368e-06, + "loss": 0.0586, + "num_input_tokens_seen": 15986624, + "step": 23745 + }, + { + "epoch": 0.5802164512740332, + "grad_norm": 3.4265143871307373, + "learning_rate": 1.9984337406369084e-06, + "loss": 0.0323, + "num_input_tokens_seen": 15989696, + "step": 23750 + }, + { + "epoch": 0.5803386021058803, + "grad_norm": 29.47190284729004, + "learning_rate": 1.9984289659815707e-06, + "loss": 0.061, + "num_input_tokens_seen": 15993280, + "step": 23755 + }, + { + "epoch": 0.5804607529377275, + "grad_norm": 15.11453628540039, + "learning_rate": 1.998424184065389e-06, + "loss": 0.0845, + "num_input_tokens_seen": 15996096, + "step": 23760 + }, + { + "epoch": 0.5805829037695747, + "grad_norm": 23.776002883911133, + "learning_rate": 1.998419394888398e-06, + "loss": 0.1935, + "num_input_tokens_seen": 15999680, + "step": 23765 + }, + { + "epoch": 0.5807050546014219, + "grad_norm": 0.5569315552711487, + "learning_rate": 1.998414598450633e-06, + "loss": 0.0199, + "num_input_tokens_seen": 16003072, + "step": 23770 + }, + { + "epoch": 0.580827205433269, + "grad_norm": 1.0250940322875977, + "learning_rate": 1.998409794752128e-06, + "loss": 0.0787, + "num_input_tokens_seen": 16006400, + "step": 23775 + }, + { + "epoch": 0.5809493562651161, + "grad_norm": 2.4078798294067383, + "learning_rate": 1.9984049837929183e-06, + "loss": 0.0284, + "num_input_tokens_seen": 16009600, + "step": 23780 + }, + { + "epoch": 0.5810715070969633, + "grad_norm": 9.696256637573242, + "learning_rate": 1.9984001655730397e-06, + "loss": 0.1318, + "num_input_tokens_seen": 16013248, + "step": 23785 + }, + { + "epoch": 0.5811936579288105, + "grad_norm": 44.90831756591797, + "learning_rate": 1.998395340092526e-06, + "loss": 0.126, + "num_input_tokens_seen": 16016320, + "step": 23790 + }, + { + "epoch": 0.5813158087606577, + "grad_norm": 20.846860885620117, + "learning_rate": 1.998390507351413e-06, + "loss": 0.0767, + "num_input_tokens_seen": 16019968, + "step": 23795 + }, + { + "epoch": 0.5814379595925048, + "grad_norm": 38.1915397644043, + "learning_rate": 1.9983856673497357e-06, + "loss": 0.0845, + "num_input_tokens_seen": 16023232, + "step": 23800 + }, + { + "epoch": 0.581560110424352, + "grad_norm": 32.8565559387207, + "learning_rate": 1.9983808200875295e-06, + "loss": 0.0591, + "num_input_tokens_seen": 16026304, + "step": 23805 + }, + { + "epoch": 0.5816822612561992, + "grad_norm": 0.4736187756061554, + "learning_rate": 1.9983759655648293e-06, + "loss": 0.1458, + "num_input_tokens_seen": 16029824, + "step": 23810 + }, + { + "epoch": 0.5818044120880463, + "grad_norm": 0.13305804133415222, + "learning_rate": 1.9983711037816705e-06, + "loss": 0.0809, + "num_input_tokens_seen": 16034176, + "step": 23815 + }, + { + "epoch": 0.5819265629198935, + "grad_norm": 45.56284713745117, + "learning_rate": 1.9983662347380883e-06, + "loss": 0.095, + "num_input_tokens_seen": 16037824, + "step": 23820 + }, + { + "epoch": 0.5820487137517406, + "grad_norm": 8.108329772949219, + "learning_rate": 1.9983613584341184e-06, + "loss": 0.1983, + "num_input_tokens_seen": 16040960, + "step": 23825 + }, + { + "epoch": 0.5821708645835878, + "grad_norm": 8.468049049377441, + "learning_rate": 1.998356474869796e-06, + "loss": 0.0863, + "num_input_tokens_seen": 16044608, + "step": 23830 + }, + { + "epoch": 0.582293015415435, + "grad_norm": 1.143520474433899, + "learning_rate": 1.9983515840451574e-06, + "loss": 0.1203, + "num_input_tokens_seen": 16048192, + "step": 23835 + }, + { + "epoch": 0.5824151662472822, + "grad_norm": 23.47208595275879, + "learning_rate": 1.998346685960237e-06, + "loss": 0.121, + "num_input_tokens_seen": 16051456, + "step": 23840 + }, + { + "epoch": 0.5825373170791293, + "grad_norm": 7.825652122497559, + "learning_rate": 1.9983417806150716e-06, + "loss": 0.0554, + "num_input_tokens_seen": 16054976, + "step": 23845 + }, + { + "epoch": 0.5826594679109764, + "grad_norm": 16.5784854888916, + "learning_rate": 1.998336868009696e-06, + "loss": 0.0686, + "num_input_tokens_seen": 16058240, + "step": 23850 + }, + { + "epoch": 0.5827816187428236, + "grad_norm": 40.77692413330078, + "learning_rate": 1.998331948144146e-06, + "loss": 0.1335, + "num_input_tokens_seen": 16061312, + "step": 23855 + }, + { + "epoch": 0.5829037695746708, + "grad_norm": 36.636295318603516, + "learning_rate": 1.9983270210184573e-06, + "loss": 0.0715, + "num_input_tokens_seen": 16064768, + "step": 23860 + }, + { + "epoch": 0.583025920406518, + "grad_norm": 0.427692711353302, + "learning_rate": 1.998322086632666e-06, + "loss": 0.082, + "num_input_tokens_seen": 16068416, + "step": 23865 + }, + { + "epoch": 0.5831480712383651, + "grad_norm": 71.27447509765625, + "learning_rate": 1.9983171449868086e-06, + "loss": 0.2176, + "num_input_tokens_seen": 16071488, + "step": 23870 + }, + { + "epoch": 0.5832702220702123, + "grad_norm": 0.7104368805885315, + "learning_rate": 1.9983121960809198e-06, + "loss": 0.1019, + "num_input_tokens_seen": 16075200, + "step": 23875 + }, + { + "epoch": 0.5833923729020595, + "grad_norm": 2.0664944648742676, + "learning_rate": 1.9983072399150367e-06, + "loss": 0.0461, + "num_input_tokens_seen": 16078656, + "step": 23880 + }, + { + "epoch": 0.5835145237339067, + "grad_norm": 17.349943161010742, + "learning_rate": 1.9983022764891943e-06, + "loss": 0.119, + "num_input_tokens_seen": 16082112, + "step": 23885 + }, + { + "epoch": 0.5836366745657537, + "grad_norm": 9.098965644836426, + "learning_rate": 1.9982973058034297e-06, + "loss": 0.1487, + "num_input_tokens_seen": 16085376, + "step": 23890 + }, + { + "epoch": 0.5837588253976009, + "grad_norm": 13.251080513000488, + "learning_rate": 1.998292327857778e-06, + "loss": 0.1018, + "num_input_tokens_seen": 16088576, + "step": 23895 + }, + { + "epoch": 0.5838809762294481, + "grad_norm": 23.803606033325195, + "learning_rate": 1.998287342652277e-06, + "loss": 0.0956, + "num_input_tokens_seen": 16091904, + "step": 23900 + }, + { + "epoch": 0.5840031270612953, + "grad_norm": 9.364514350891113, + "learning_rate": 1.998282350186961e-06, + "loss": 0.1887, + "num_input_tokens_seen": 16094912, + "step": 23905 + }, + { + "epoch": 0.5841252778931425, + "grad_norm": 2.0720276832580566, + "learning_rate": 1.998277350461868e-06, + "loss": 0.0914, + "num_input_tokens_seen": 16097856, + "step": 23910 + }, + { + "epoch": 0.5842474287249896, + "grad_norm": 19.379125595092773, + "learning_rate": 1.998272343477033e-06, + "loss": 0.1373, + "num_input_tokens_seen": 16101632, + "step": 23915 + }, + { + "epoch": 0.5843695795568368, + "grad_norm": 9.671746253967285, + "learning_rate": 1.998267329232493e-06, + "loss": 0.193, + "num_input_tokens_seen": 16104704, + "step": 23920 + }, + { + "epoch": 0.584491730388684, + "grad_norm": 4.722978591918945, + "learning_rate": 1.9982623077282846e-06, + "loss": 0.1243, + "num_input_tokens_seen": 16107968, + "step": 23925 + }, + { + "epoch": 0.5846138812205312, + "grad_norm": 6.806799411773682, + "learning_rate": 1.9982572789644442e-06, + "loss": 0.1061, + "num_input_tokens_seen": 16111488, + "step": 23930 + }, + { + "epoch": 0.5847360320523782, + "grad_norm": 0.32206377387046814, + "learning_rate": 1.9982522429410085e-06, + "loss": 0.0316, + "num_input_tokens_seen": 16115136, + "step": 23935 + }, + { + "epoch": 0.5848581828842254, + "grad_norm": 2.084754228591919, + "learning_rate": 1.998247199658014e-06, + "loss": 0.0877, + "num_input_tokens_seen": 16118848, + "step": 23940 + }, + { + "epoch": 0.5849803337160726, + "grad_norm": 10.631635665893555, + "learning_rate": 1.9982421491154973e-06, + "loss": 0.0641, + "num_input_tokens_seen": 16122432, + "step": 23945 + }, + { + "epoch": 0.5851024845479198, + "grad_norm": 43.512943267822266, + "learning_rate": 1.998237091313495e-06, + "loss": 0.1244, + "num_input_tokens_seen": 16125888, + "step": 23950 + }, + { + "epoch": 0.585224635379767, + "grad_norm": 0.6749215126037598, + "learning_rate": 1.9982320262520445e-06, + "loss": 0.1109, + "num_input_tokens_seen": 16129408, + "step": 23955 + }, + { + "epoch": 0.5853467862116141, + "grad_norm": 36.5847053527832, + "learning_rate": 1.998226953931182e-06, + "loss": 0.0638, + "num_input_tokens_seen": 16133376, + "step": 23960 + }, + { + "epoch": 0.5854689370434613, + "grad_norm": 0.6342559456825256, + "learning_rate": 1.9982218743509445e-06, + "loss": 0.0532, + "num_input_tokens_seen": 16136640, + "step": 23965 + }, + { + "epoch": 0.5855910878753084, + "grad_norm": 41.035850524902344, + "learning_rate": 1.9982167875113692e-06, + "loss": 0.2682, + "num_input_tokens_seen": 16140096, + "step": 23970 + }, + { + "epoch": 0.5857132387071556, + "grad_norm": 5.5960235595703125, + "learning_rate": 1.9982116934124925e-06, + "loss": 0.0938, + "num_input_tokens_seen": 16143488, + "step": 23975 + }, + { + "epoch": 0.5858353895390027, + "grad_norm": 27.44890022277832, + "learning_rate": 1.9982065920543524e-06, + "loss": 0.2205, + "num_input_tokens_seen": 16147264, + "step": 23980 + }, + { + "epoch": 0.5859575403708499, + "grad_norm": 6.895400047302246, + "learning_rate": 1.9982014834369853e-06, + "loss": 0.0564, + "num_input_tokens_seen": 16150592, + "step": 23985 + }, + { + "epoch": 0.5860796912026971, + "grad_norm": 35.16048812866211, + "learning_rate": 1.9981963675604286e-06, + "loss": 0.1804, + "num_input_tokens_seen": 16153664, + "step": 23990 + }, + { + "epoch": 0.5862018420345443, + "grad_norm": 15.177404403686523, + "learning_rate": 1.9981912444247195e-06, + "loss": 0.0559, + "num_input_tokens_seen": 16156800, + "step": 23995 + }, + { + "epoch": 0.5863239928663915, + "grad_norm": 39.688987731933594, + "learning_rate": 1.9981861140298948e-06, + "loss": 0.1858, + "num_input_tokens_seen": 16160064, + "step": 24000 + }, + { + "epoch": 0.5864461436982386, + "grad_norm": 11.082685470581055, + "learning_rate": 1.9981809763759926e-06, + "loss": 0.1304, + "num_input_tokens_seen": 16163648, + "step": 24005 + }, + { + "epoch": 0.5865682945300857, + "grad_norm": 17.348304748535156, + "learning_rate": 1.9981758314630495e-06, + "loss": 0.2359, + "num_input_tokens_seen": 16166720, + "step": 24010 + }, + { + "epoch": 0.5866904453619329, + "grad_norm": 19.414400100708008, + "learning_rate": 1.998170679291104e-06, + "loss": 0.116, + "num_input_tokens_seen": 16170112, + "step": 24015 + }, + { + "epoch": 0.5868125961937801, + "grad_norm": 37.96230697631836, + "learning_rate": 1.9981655198601918e-06, + "loss": 0.1495, + "num_input_tokens_seen": 16173696, + "step": 24020 + }, + { + "epoch": 0.5869347470256272, + "grad_norm": 9.247244834899902, + "learning_rate": 1.9981603531703526e-06, + "loss": 0.0887, + "num_input_tokens_seen": 16177024, + "step": 24025 + }, + { + "epoch": 0.5870568978574744, + "grad_norm": 15.933195114135742, + "learning_rate": 1.998155179221622e-06, + "loss": 0.0788, + "num_input_tokens_seen": 16180608, + "step": 24030 + }, + { + "epoch": 0.5871790486893216, + "grad_norm": 19.613866806030273, + "learning_rate": 1.9981499980140386e-06, + "loss": 0.1616, + "num_input_tokens_seen": 16184128, + "step": 24035 + }, + { + "epoch": 0.5873011995211688, + "grad_norm": 22.159255981445312, + "learning_rate": 1.99814480954764e-06, + "loss": 0.1168, + "num_input_tokens_seen": 16187392, + "step": 24040 + }, + { + "epoch": 0.5874233503530158, + "grad_norm": 17.167551040649414, + "learning_rate": 1.998139613822464e-06, + "loss": 0.1582, + "num_input_tokens_seen": 16190912, + "step": 24045 + }, + { + "epoch": 0.587545501184863, + "grad_norm": 15.00357437133789, + "learning_rate": 1.998134410838548e-06, + "loss": 0.0451, + "num_input_tokens_seen": 16194176, + "step": 24050 + }, + { + "epoch": 0.5876676520167102, + "grad_norm": 10.094125747680664, + "learning_rate": 1.9981292005959305e-06, + "loss": 0.1327, + "num_input_tokens_seen": 16197952, + "step": 24055 + }, + { + "epoch": 0.5877898028485574, + "grad_norm": 0.6391386985778809, + "learning_rate": 1.998123983094649e-06, + "loss": 0.0967, + "num_input_tokens_seen": 16201600, + "step": 24060 + }, + { + "epoch": 0.5879119536804046, + "grad_norm": 26.472667694091797, + "learning_rate": 1.998118758334741e-06, + "loss": 0.0567, + "num_input_tokens_seen": 16205440, + "step": 24065 + }, + { + "epoch": 0.5880341045122517, + "grad_norm": 16.453083038330078, + "learning_rate": 1.998113526316245e-06, + "loss": 0.0901, + "num_input_tokens_seen": 16208640, + "step": 24070 + }, + { + "epoch": 0.5881562553440989, + "grad_norm": 21.841779708862305, + "learning_rate": 1.998108287039199e-06, + "loss": 0.1703, + "num_input_tokens_seen": 16211968, + "step": 24075 + }, + { + "epoch": 0.5882784061759461, + "grad_norm": 17.799901962280273, + "learning_rate": 1.998103040503641e-06, + "loss": 0.0712, + "num_input_tokens_seen": 16215296, + "step": 24080 + }, + { + "epoch": 0.5884005570077933, + "grad_norm": 1.8213036060333252, + "learning_rate": 1.9980977867096097e-06, + "loss": 0.0626, + "num_input_tokens_seen": 16218560, + "step": 24085 + }, + { + "epoch": 0.5885227078396403, + "grad_norm": 9.8761625289917, + "learning_rate": 1.9980925256571424e-06, + "loss": 0.1004, + "num_input_tokens_seen": 16221504, + "step": 24090 + }, + { + "epoch": 0.5886448586714875, + "grad_norm": 58.34889602661133, + "learning_rate": 1.9980872573462783e-06, + "loss": 0.1884, + "num_input_tokens_seen": 16225088, + "step": 24095 + }, + { + "epoch": 0.5887670095033347, + "grad_norm": 23.790300369262695, + "learning_rate": 1.9980819817770546e-06, + "loss": 0.0542, + "num_input_tokens_seen": 16228928, + "step": 24100 + }, + { + "epoch": 0.5888891603351819, + "grad_norm": 22.176443099975586, + "learning_rate": 1.9980766989495107e-06, + "loss": 0.0811, + "num_input_tokens_seen": 16232064, + "step": 24105 + }, + { + "epoch": 0.5890113111670291, + "grad_norm": 12.136371612548828, + "learning_rate": 1.9980714088636844e-06, + "loss": 0.1497, + "num_input_tokens_seen": 16235136, + "step": 24110 + }, + { + "epoch": 0.5891334619988762, + "grad_norm": 16.363262176513672, + "learning_rate": 1.9980661115196145e-06, + "loss": 0.0565, + "num_input_tokens_seen": 16239168, + "step": 24115 + }, + { + "epoch": 0.5892556128307234, + "grad_norm": 1.3040426969528198, + "learning_rate": 1.998060806917339e-06, + "loss": 0.14, + "num_input_tokens_seen": 16242368, + "step": 24120 + }, + { + "epoch": 0.5893777636625706, + "grad_norm": 0.18406778573989868, + "learning_rate": 1.9980554950568973e-06, + "loss": 0.0388, + "num_input_tokens_seen": 16245888, + "step": 24125 + }, + { + "epoch": 0.5894999144944177, + "grad_norm": 2.5366475582122803, + "learning_rate": 1.9980501759383276e-06, + "loss": 0.1424, + "num_input_tokens_seen": 16249152, + "step": 24130 + }, + { + "epoch": 0.5896220653262648, + "grad_norm": 4.652827739715576, + "learning_rate": 1.9980448495616685e-06, + "loss": 0.0769, + "num_input_tokens_seen": 16252416, + "step": 24135 + }, + { + "epoch": 0.589744216158112, + "grad_norm": 24.142471313476562, + "learning_rate": 1.9980395159269586e-06, + "loss": 0.206, + "num_input_tokens_seen": 16255872, + "step": 24140 + }, + { + "epoch": 0.5898663669899592, + "grad_norm": 0.5410691499710083, + "learning_rate": 1.9980341750342372e-06, + "loss": 0.121, + "num_input_tokens_seen": 16259968, + "step": 24145 + }, + { + "epoch": 0.5899885178218064, + "grad_norm": 14.989289283752441, + "learning_rate": 1.9980288268835425e-06, + "loss": 0.2076, + "num_input_tokens_seen": 16263488, + "step": 24150 + }, + { + "epoch": 0.5901106686536536, + "grad_norm": 10.395974159240723, + "learning_rate": 1.998023471474914e-06, + "loss": 0.0236, + "num_input_tokens_seen": 16266368, + "step": 24155 + }, + { + "epoch": 0.5902328194855007, + "grad_norm": 26.658100128173828, + "learning_rate": 1.9980181088083903e-06, + "loss": 0.1343, + "num_input_tokens_seen": 16269760, + "step": 24160 + }, + { + "epoch": 0.5903549703173478, + "grad_norm": 1.5090116262435913, + "learning_rate": 1.9980127388840106e-06, + "loss": 0.1414, + "num_input_tokens_seen": 16273216, + "step": 24165 + }, + { + "epoch": 0.590477121149195, + "grad_norm": 12.554591178894043, + "learning_rate": 1.9980073617018135e-06, + "loss": 0.0678, + "num_input_tokens_seen": 16276288, + "step": 24170 + }, + { + "epoch": 0.5905992719810422, + "grad_norm": 12.531062126159668, + "learning_rate": 1.9980019772618387e-06, + "loss": 0.0568, + "num_input_tokens_seen": 16279872, + "step": 24175 + }, + { + "epoch": 0.5907214228128893, + "grad_norm": 25.284109115600586, + "learning_rate": 1.997996585564125e-06, + "loss": 0.1065, + "num_input_tokens_seen": 16283136, + "step": 24180 + }, + { + "epoch": 0.5908435736447365, + "grad_norm": 9.407654762268066, + "learning_rate": 1.997991186608712e-06, + "loss": 0.1299, + "num_input_tokens_seen": 16286720, + "step": 24185 + }, + { + "epoch": 0.5909657244765837, + "grad_norm": 1.669158697128296, + "learning_rate": 1.9979857803956383e-06, + "loss": 0.0945, + "num_input_tokens_seen": 16289792, + "step": 24190 + }, + { + "epoch": 0.5910878753084309, + "grad_norm": 16.71095848083496, + "learning_rate": 1.9979803669249434e-06, + "loss": 0.2258, + "num_input_tokens_seen": 16293184, + "step": 24195 + }, + { + "epoch": 0.5912100261402781, + "grad_norm": 21.24453353881836, + "learning_rate": 1.9979749461966672e-06, + "loss": 0.1589, + "num_input_tokens_seen": 16296448, + "step": 24200 + }, + { + "epoch": 0.5913321769721251, + "grad_norm": 23.289432525634766, + "learning_rate": 1.997969518210849e-06, + "loss": 0.2786, + "num_input_tokens_seen": 16299904, + "step": 24205 + }, + { + "epoch": 0.5914543278039723, + "grad_norm": 18.00309181213379, + "learning_rate": 1.9979640829675273e-06, + "loss": 0.0697, + "num_input_tokens_seen": 16303424, + "step": 24210 + }, + { + "epoch": 0.5915764786358195, + "grad_norm": 17.946182250976562, + "learning_rate": 1.997958640466743e-06, + "loss": 0.0782, + "num_input_tokens_seen": 16306560, + "step": 24215 + }, + { + "epoch": 0.5916986294676667, + "grad_norm": 7.101922512054443, + "learning_rate": 1.997953190708535e-06, + "loss": 0.2103, + "num_input_tokens_seen": 16309632, + "step": 24220 + }, + { + "epoch": 0.5918207802995138, + "grad_norm": 4.2682013511657715, + "learning_rate": 1.9979477336929426e-06, + "loss": 0.0855, + "num_input_tokens_seen": 16312704, + "step": 24225 + }, + { + "epoch": 0.591942931131361, + "grad_norm": 23.60270118713379, + "learning_rate": 1.9979422694200062e-06, + "loss": 0.0863, + "num_input_tokens_seen": 16316160, + "step": 24230 + }, + { + "epoch": 0.5920650819632082, + "grad_norm": 0.4715448319911957, + "learning_rate": 1.997936797889765e-06, + "loss": 0.0656, + "num_input_tokens_seen": 16319872, + "step": 24235 + }, + { + "epoch": 0.5921872327950554, + "grad_norm": 6.69528341293335, + "learning_rate": 1.997931319102259e-06, + "loss": 0.0183, + "num_input_tokens_seen": 16322752, + "step": 24240 + }, + { + "epoch": 0.5923093836269026, + "grad_norm": 8.694469451904297, + "learning_rate": 1.9979258330575283e-06, + "loss": 0.1014, + "num_input_tokens_seen": 16326208, + "step": 24245 + }, + { + "epoch": 0.5924315344587496, + "grad_norm": 17.657978057861328, + "learning_rate": 1.9979203397556124e-06, + "loss": 0.0631, + "num_input_tokens_seen": 16329664, + "step": 24250 + }, + { + "epoch": 0.5925536852905968, + "grad_norm": 16.77488899230957, + "learning_rate": 1.997914839196551e-06, + "loss": 0.1876, + "num_input_tokens_seen": 16332800, + "step": 24255 + }, + { + "epoch": 0.592675836122444, + "grad_norm": 1.8182240724563599, + "learning_rate": 1.997909331380385e-06, + "loss": 0.1368, + "num_input_tokens_seen": 16336512, + "step": 24260 + }, + { + "epoch": 0.5927979869542912, + "grad_norm": 26.796363830566406, + "learning_rate": 1.997903816307154e-06, + "loss": 0.0687, + "num_input_tokens_seen": 16340160, + "step": 24265 + }, + { + "epoch": 0.5929201377861383, + "grad_norm": 26.58648109436035, + "learning_rate": 1.9978982939768975e-06, + "loss": 0.2328, + "num_input_tokens_seen": 16343680, + "step": 24270 + }, + { + "epoch": 0.5930422886179855, + "grad_norm": 6.620156288146973, + "learning_rate": 1.9978927643896567e-06, + "loss": 0.0459, + "num_input_tokens_seen": 16347200, + "step": 24275 + }, + { + "epoch": 0.5931644394498327, + "grad_norm": 15.268500328063965, + "learning_rate": 1.9978872275454713e-06, + "loss": 0.1105, + "num_input_tokens_seen": 16350272, + "step": 24280 + }, + { + "epoch": 0.5932865902816798, + "grad_norm": 2.916973352432251, + "learning_rate": 1.997881683444381e-06, + "loss": 0.1195, + "num_input_tokens_seen": 16353728, + "step": 24285 + }, + { + "epoch": 0.593408741113527, + "grad_norm": 24.336292266845703, + "learning_rate": 1.997876132086427e-06, + "loss": 0.1208, + "num_input_tokens_seen": 16357888, + "step": 24290 + }, + { + "epoch": 0.5935308919453741, + "grad_norm": 7.969274997711182, + "learning_rate": 1.99787057347165e-06, + "loss": 0.1052, + "num_input_tokens_seen": 16361280, + "step": 24295 + }, + { + "epoch": 0.5936530427772213, + "grad_norm": 18.69225311279297, + "learning_rate": 1.9978650076000887e-06, + "loss": 0.1275, + "num_input_tokens_seen": 16365312, + "step": 24300 + }, + { + "epoch": 0.5937751936090685, + "grad_norm": 18.079143524169922, + "learning_rate": 1.9978594344717855e-06, + "loss": 0.0785, + "num_input_tokens_seen": 16368832, + "step": 24305 + }, + { + "epoch": 0.5938973444409157, + "grad_norm": 38.38662338256836, + "learning_rate": 1.99785385408678e-06, + "loss": 0.0691, + "num_input_tokens_seen": 16372480, + "step": 24310 + }, + { + "epoch": 0.5940194952727628, + "grad_norm": 0.4318760633468628, + "learning_rate": 1.9978482664451126e-06, + "loss": 0.1628, + "num_input_tokens_seen": 16375424, + "step": 24315 + }, + { + "epoch": 0.59414164610461, + "grad_norm": 3.622248411178589, + "learning_rate": 1.997842671546824e-06, + "loss": 0.1075, + "num_input_tokens_seen": 16378624, + "step": 24320 + }, + { + "epoch": 0.5942637969364571, + "grad_norm": 0.4291881024837494, + "learning_rate": 1.997837069391956e-06, + "loss": 0.0945, + "num_input_tokens_seen": 16382592, + "step": 24325 + }, + { + "epoch": 0.5943859477683043, + "grad_norm": 20.735027313232422, + "learning_rate": 1.997831459980548e-06, + "loss": 0.1419, + "num_input_tokens_seen": 16386176, + "step": 24330 + }, + { + "epoch": 0.5945080986001514, + "grad_norm": 23.61785125732422, + "learning_rate": 1.997825843312641e-06, + "loss": 0.1901, + "num_input_tokens_seen": 16389376, + "step": 24335 + }, + { + "epoch": 0.5946302494319986, + "grad_norm": 9.151907920837402, + "learning_rate": 1.997820219388276e-06, + "loss": 0.2088, + "num_input_tokens_seen": 16392832, + "step": 24340 + }, + { + "epoch": 0.5947524002638458, + "grad_norm": 11.61373233795166, + "learning_rate": 1.997814588207494e-06, + "loss": 0.0537, + "num_input_tokens_seen": 16396544, + "step": 24345 + }, + { + "epoch": 0.594874551095693, + "grad_norm": 10.756101608276367, + "learning_rate": 1.9978089497703366e-06, + "loss": 0.1235, + "num_input_tokens_seen": 16399680, + "step": 24350 + }, + { + "epoch": 0.5949967019275402, + "grad_norm": 19.338497161865234, + "learning_rate": 1.9978033040768435e-06, + "loss": 0.1369, + "num_input_tokens_seen": 16403264, + "step": 24355 + }, + { + "epoch": 0.5951188527593873, + "grad_norm": 8.648137092590332, + "learning_rate": 1.9977976511270564e-06, + "loss": 0.1138, + "num_input_tokens_seen": 16406464, + "step": 24360 + }, + { + "epoch": 0.5952410035912344, + "grad_norm": 22.793720245361328, + "learning_rate": 1.9977919909210167e-06, + "loss": 0.1247, + "num_input_tokens_seen": 16409856, + "step": 24365 + }, + { + "epoch": 0.5953631544230816, + "grad_norm": 12.735052108764648, + "learning_rate": 1.997786323458765e-06, + "loss": 0.0726, + "num_input_tokens_seen": 16413120, + "step": 24370 + }, + { + "epoch": 0.5954853052549288, + "grad_norm": 11.214115142822266, + "learning_rate": 1.997780648740343e-06, + "loss": 0.1003, + "num_input_tokens_seen": 16416512, + "step": 24375 + }, + { + "epoch": 0.5956074560867759, + "grad_norm": 14.181450843811035, + "learning_rate": 1.997774966765792e-06, + "loss": 0.1277, + "num_input_tokens_seen": 16420736, + "step": 24380 + }, + { + "epoch": 0.5957296069186231, + "grad_norm": 12.658021926879883, + "learning_rate": 1.9977692775351525e-06, + "loss": 0.0907, + "num_input_tokens_seen": 16424384, + "step": 24385 + }, + { + "epoch": 0.5958517577504703, + "grad_norm": 1.3953849077224731, + "learning_rate": 1.997763581048467e-06, + "loss": 0.0685, + "num_input_tokens_seen": 16427520, + "step": 24390 + }, + { + "epoch": 0.5959739085823175, + "grad_norm": 8.562386512756348, + "learning_rate": 1.997757877305776e-06, + "loss": 0.0535, + "num_input_tokens_seen": 16430912, + "step": 24395 + }, + { + "epoch": 0.5960960594141647, + "grad_norm": 31.926748275756836, + "learning_rate": 1.997752166307121e-06, + "loss": 0.1696, + "num_input_tokens_seen": 16434048, + "step": 24400 + }, + { + "epoch": 0.5962182102460117, + "grad_norm": 24.208181381225586, + "learning_rate": 1.9977464480525447e-06, + "loss": 0.1728, + "num_input_tokens_seen": 16437120, + "step": 24405 + }, + { + "epoch": 0.5963403610778589, + "grad_norm": 0.18723651766777039, + "learning_rate": 1.997740722542087e-06, + "loss": 0.0879, + "num_input_tokens_seen": 16440256, + "step": 24410 + }, + { + "epoch": 0.5964625119097061, + "grad_norm": 20.13685417175293, + "learning_rate": 1.9977349897757913e-06, + "loss": 0.1947, + "num_input_tokens_seen": 16443520, + "step": 24415 + }, + { + "epoch": 0.5965846627415533, + "grad_norm": 23.72542953491211, + "learning_rate": 1.9977292497536976e-06, + "loss": 0.0704, + "num_input_tokens_seen": 16446720, + "step": 24420 + }, + { + "epoch": 0.5967068135734004, + "grad_norm": 3.0584630966186523, + "learning_rate": 1.997723502475849e-06, + "loss": 0.1163, + "num_input_tokens_seen": 16450112, + "step": 24425 + }, + { + "epoch": 0.5968289644052476, + "grad_norm": 0.9413558840751648, + "learning_rate": 1.9977177479422865e-06, + "loss": 0.0953, + "num_input_tokens_seen": 16453760, + "step": 24430 + }, + { + "epoch": 0.5969511152370948, + "grad_norm": 18.52378273010254, + "learning_rate": 1.997711986153052e-06, + "loss": 0.1416, + "num_input_tokens_seen": 16457280, + "step": 24435 + }, + { + "epoch": 0.597073266068942, + "grad_norm": 7.341268539428711, + "learning_rate": 1.997706217108188e-06, + "loss": 0.073, + "num_input_tokens_seen": 16460544, + "step": 24440 + }, + { + "epoch": 0.5971954169007891, + "grad_norm": 44.4236946105957, + "learning_rate": 1.997700440807736e-06, + "loss": 0.1074, + "num_input_tokens_seen": 16463552, + "step": 24445 + }, + { + "epoch": 0.5973175677326362, + "grad_norm": 16.1765193939209, + "learning_rate": 1.9976946572517377e-06, + "loss": 0.2375, + "num_input_tokens_seen": 16467008, + "step": 24450 + }, + { + "epoch": 0.5974397185644834, + "grad_norm": 0.35032007098197937, + "learning_rate": 1.997688866440236e-06, + "loss": 0.0157, + "num_input_tokens_seen": 16470784, + "step": 24455 + }, + { + "epoch": 0.5975618693963306, + "grad_norm": 36.04292297363281, + "learning_rate": 1.997683068373272e-06, + "loss": 0.0783, + "num_input_tokens_seen": 16473856, + "step": 24460 + }, + { + "epoch": 0.5976840202281778, + "grad_norm": 11.974666595458984, + "learning_rate": 1.997677263050889e-06, + "loss": 0.0781, + "num_input_tokens_seen": 16476992, + "step": 24465 + }, + { + "epoch": 0.5978061710600249, + "grad_norm": 12.146720886230469, + "learning_rate": 1.997671450473128e-06, + "loss": 0.1661, + "num_input_tokens_seen": 16480064, + "step": 24470 + }, + { + "epoch": 0.5979283218918721, + "grad_norm": 32.76498794555664, + "learning_rate": 1.997665630640032e-06, + "loss": 0.1997, + "num_input_tokens_seen": 16483200, + "step": 24475 + }, + { + "epoch": 0.5980504727237193, + "grad_norm": 0.38879939913749695, + "learning_rate": 1.9976598035516433e-06, + "loss": 0.1039, + "num_input_tokens_seen": 16486208, + "step": 24480 + }, + { + "epoch": 0.5981726235555664, + "grad_norm": 11.602495193481445, + "learning_rate": 1.997653969208004e-06, + "loss": 0.1477, + "num_input_tokens_seen": 16489664, + "step": 24485 + }, + { + "epoch": 0.5982947743874136, + "grad_norm": 10.761492729187012, + "learning_rate": 1.9976481276091572e-06, + "loss": 0.1914, + "num_input_tokens_seen": 16493184, + "step": 24490 + }, + { + "epoch": 0.5984169252192607, + "grad_norm": 15.484816551208496, + "learning_rate": 1.9976422787551443e-06, + "loss": 0.0836, + "num_input_tokens_seen": 16496448, + "step": 24495 + }, + { + "epoch": 0.5985390760511079, + "grad_norm": 15.162385940551758, + "learning_rate": 1.9976364226460087e-06, + "loss": 0.1352, + "num_input_tokens_seen": 16499648, + "step": 24500 + }, + { + "epoch": 0.5986612268829551, + "grad_norm": 1.365733027458191, + "learning_rate": 1.9976305592817928e-06, + "loss": 0.152, + "num_input_tokens_seen": 16502656, + "step": 24505 + }, + { + "epoch": 0.5987833777148023, + "grad_norm": 12.582515716552734, + "learning_rate": 1.997624688662539e-06, + "loss": 0.1363, + "num_input_tokens_seen": 16505728, + "step": 24510 + }, + { + "epoch": 0.5989055285466494, + "grad_norm": 10.660904884338379, + "learning_rate": 1.99761881078829e-06, + "loss": 0.1303, + "num_input_tokens_seen": 16509440, + "step": 24515 + }, + { + "epoch": 0.5990276793784965, + "grad_norm": 22.079450607299805, + "learning_rate": 1.9976129256590885e-06, + "loss": 0.1228, + "num_input_tokens_seen": 16512320, + "step": 24520 + }, + { + "epoch": 0.5991498302103437, + "grad_norm": 4.311355113983154, + "learning_rate": 1.997607033274978e-06, + "loss": 0.0758, + "num_input_tokens_seen": 16515456, + "step": 24525 + }, + { + "epoch": 0.5992719810421909, + "grad_norm": 2.3310272693634033, + "learning_rate": 1.9976011336360005e-06, + "loss": 0.0758, + "num_input_tokens_seen": 16518656, + "step": 24530 + }, + { + "epoch": 0.5993941318740381, + "grad_norm": 18.549488067626953, + "learning_rate": 1.9975952267421995e-06, + "loss": 0.0999, + "num_input_tokens_seen": 16521984, + "step": 24535 + }, + { + "epoch": 0.5995162827058852, + "grad_norm": 19.15254020690918, + "learning_rate": 1.9975893125936176e-06, + "loss": 0.1037, + "num_input_tokens_seen": 16525376, + "step": 24540 + }, + { + "epoch": 0.5996384335377324, + "grad_norm": 3.7639474868774414, + "learning_rate": 1.9975833911902975e-06, + "loss": 0.2703, + "num_input_tokens_seen": 16528512, + "step": 24545 + }, + { + "epoch": 0.5997605843695796, + "grad_norm": 1.803524136543274, + "learning_rate": 1.997577462532283e-06, + "loss": 0.058, + "num_input_tokens_seen": 16531840, + "step": 24550 + }, + { + "epoch": 0.5998827352014268, + "grad_norm": 18.983144760131836, + "learning_rate": 1.997571526619617e-06, + "loss": 0.1103, + "num_input_tokens_seen": 16535680, + "step": 24555 + }, + { + "epoch": 0.6000048860332738, + "grad_norm": 13.398836135864258, + "learning_rate": 1.9975655834523426e-06, + "loss": 0.0944, + "num_input_tokens_seen": 16539072, + "step": 24560 + }, + { + "epoch": 0.600127036865121, + "grad_norm": 18.70598602294922, + "learning_rate": 1.9975596330305027e-06, + "loss": 0.1462, + "num_input_tokens_seen": 16542464, + "step": 24565 + }, + { + "epoch": 0.6002491876969682, + "grad_norm": 18.957441329956055, + "learning_rate": 1.997553675354141e-06, + "loss": 0.0323, + "num_input_tokens_seen": 16545920, + "step": 24570 + }, + { + "epoch": 0.6003713385288154, + "grad_norm": 14.466696739196777, + "learning_rate": 1.9975477104233005e-06, + "loss": 0.0757, + "num_input_tokens_seen": 16549184, + "step": 24575 + }, + { + "epoch": 0.6004934893606625, + "grad_norm": 16.620651245117188, + "learning_rate": 1.9975417382380247e-06, + "loss": 0.1313, + "num_input_tokens_seen": 16552128, + "step": 24580 + }, + { + "epoch": 0.6006156401925097, + "grad_norm": 26.417072296142578, + "learning_rate": 1.997535758798357e-06, + "loss": 0.1199, + "num_input_tokens_seen": 16555712, + "step": 24585 + }, + { + "epoch": 0.6007377910243569, + "grad_norm": 9.686941146850586, + "learning_rate": 1.9975297721043413e-06, + "loss": 0.1158, + "num_input_tokens_seen": 16559104, + "step": 24590 + }, + { + "epoch": 0.6008599418562041, + "grad_norm": 28.315837860107422, + "learning_rate": 1.9975237781560205e-06, + "loss": 0.0862, + "num_input_tokens_seen": 16562880, + "step": 24595 + }, + { + "epoch": 0.6009820926880513, + "grad_norm": 22.558536529541016, + "learning_rate": 1.997517776953439e-06, + "loss": 0.1336, + "num_input_tokens_seen": 16566080, + "step": 24600 + }, + { + "epoch": 0.6011042435198983, + "grad_norm": 24.751571655273438, + "learning_rate": 1.9975117684966394e-06, + "loss": 0.1872, + "num_input_tokens_seen": 16569792, + "step": 24605 + }, + { + "epoch": 0.6012263943517455, + "grad_norm": 0.8444401621818542, + "learning_rate": 1.997505752785666e-06, + "loss": 0.0663, + "num_input_tokens_seen": 16572736, + "step": 24610 + }, + { + "epoch": 0.6013485451835927, + "grad_norm": 2.5160603523254395, + "learning_rate": 1.9974997298205624e-06, + "loss": 0.1085, + "num_input_tokens_seen": 16576192, + "step": 24615 + }, + { + "epoch": 0.6014706960154399, + "grad_norm": 10.282331466674805, + "learning_rate": 1.9974936996013727e-06, + "loss": 0.2155, + "num_input_tokens_seen": 16579328, + "step": 24620 + }, + { + "epoch": 0.601592846847287, + "grad_norm": 2.0695960521698, + "learning_rate": 1.9974876621281407e-06, + "loss": 0.0528, + "num_input_tokens_seen": 16582464, + "step": 24625 + }, + { + "epoch": 0.6017149976791342, + "grad_norm": 42.36409378051758, + "learning_rate": 1.9974816174009096e-06, + "loss": 0.169, + "num_input_tokens_seen": 16585856, + "step": 24630 + }, + { + "epoch": 0.6018371485109814, + "grad_norm": 0.8332366943359375, + "learning_rate": 1.9974755654197244e-06, + "loss": 0.126, + "num_input_tokens_seen": 16589568, + "step": 24635 + }, + { + "epoch": 0.6019592993428285, + "grad_norm": 0.6155003309249878, + "learning_rate": 1.9974695061846283e-06, + "loss": 0.1085, + "num_input_tokens_seen": 16593088, + "step": 24640 + }, + { + "epoch": 0.6020814501746757, + "grad_norm": 1.1654919385910034, + "learning_rate": 1.9974634396956656e-06, + "loss": 0.1587, + "num_input_tokens_seen": 16597376, + "step": 24645 + }, + { + "epoch": 0.6022036010065228, + "grad_norm": 0.6619818210601807, + "learning_rate": 1.9974573659528805e-06, + "loss": 0.1954, + "num_input_tokens_seen": 16600704, + "step": 24650 + }, + { + "epoch": 0.60232575183837, + "grad_norm": 22.623855590820312, + "learning_rate": 1.9974512849563174e-06, + "loss": 0.117, + "num_input_tokens_seen": 16604416, + "step": 24655 + }, + { + "epoch": 0.6024479026702172, + "grad_norm": 2.9463284015655518, + "learning_rate": 1.9974451967060204e-06, + "loss": 0.1157, + "num_input_tokens_seen": 16607680, + "step": 24660 + }, + { + "epoch": 0.6025700535020644, + "grad_norm": 3.2619848251342773, + "learning_rate": 1.997439101202033e-06, + "loss": 0.0785, + "num_input_tokens_seen": 16610752, + "step": 24665 + }, + { + "epoch": 0.6026922043339115, + "grad_norm": 28.431598663330078, + "learning_rate": 1.9974329984444007e-06, + "loss": 0.1308, + "num_input_tokens_seen": 16614336, + "step": 24670 + }, + { + "epoch": 0.6028143551657587, + "grad_norm": 1.6486842632293701, + "learning_rate": 1.997426888433167e-06, + "loss": 0.0228, + "num_input_tokens_seen": 16617856, + "step": 24675 + }, + { + "epoch": 0.6029365059976058, + "grad_norm": 0.8875168561935425, + "learning_rate": 1.9974207711683772e-06, + "loss": 0.1213, + "num_input_tokens_seen": 16621120, + "step": 24680 + }, + { + "epoch": 0.603058656829453, + "grad_norm": 11.766212463378906, + "learning_rate": 1.9974146466500746e-06, + "loss": 0.1865, + "num_input_tokens_seen": 16624320, + "step": 24685 + }, + { + "epoch": 0.6031808076613002, + "grad_norm": 19.604454040527344, + "learning_rate": 1.997408514878305e-06, + "loss": 0.202, + "num_input_tokens_seen": 16627456, + "step": 24690 + }, + { + "epoch": 0.6033029584931473, + "grad_norm": 24.02836036682129, + "learning_rate": 1.997402375853112e-06, + "loss": 0.2102, + "num_input_tokens_seen": 16630848, + "step": 24695 + }, + { + "epoch": 0.6034251093249945, + "grad_norm": 1.0167436599731445, + "learning_rate": 1.997396229574541e-06, + "loss": 0.0908, + "num_input_tokens_seen": 16634176, + "step": 24700 + }, + { + "epoch": 0.6035472601568417, + "grad_norm": 23.059194564819336, + "learning_rate": 1.9973900760426364e-06, + "loss": 0.1419, + "num_input_tokens_seen": 16637824, + "step": 24705 + }, + { + "epoch": 0.6036694109886889, + "grad_norm": 4.2288498878479, + "learning_rate": 1.9973839152574425e-06, + "loss": 0.104, + "num_input_tokens_seen": 16641344, + "step": 24710 + }, + { + "epoch": 0.603791561820536, + "grad_norm": 0.7364501953125, + "learning_rate": 1.9973777472190046e-06, + "loss": 0.0662, + "num_input_tokens_seen": 16644480, + "step": 24715 + }, + { + "epoch": 0.6039137126523831, + "grad_norm": 12.857014656066895, + "learning_rate": 1.9973715719273677e-06, + "loss": 0.0551, + "num_input_tokens_seen": 16647808, + "step": 24720 + }, + { + "epoch": 0.6040358634842303, + "grad_norm": 5.512856960296631, + "learning_rate": 1.9973653893825762e-06, + "loss": 0.0722, + "num_input_tokens_seen": 16651264, + "step": 24725 + }, + { + "epoch": 0.6041580143160775, + "grad_norm": 41.143104553222656, + "learning_rate": 1.9973591995846755e-06, + "loss": 0.1888, + "num_input_tokens_seen": 16654400, + "step": 24730 + }, + { + "epoch": 0.6042801651479247, + "grad_norm": 5.552034854888916, + "learning_rate": 1.9973530025337105e-06, + "loss": 0.066, + "num_input_tokens_seen": 16657856, + "step": 24735 + }, + { + "epoch": 0.6044023159797718, + "grad_norm": 2.2578725814819336, + "learning_rate": 1.997346798229726e-06, + "loss": 0.0497, + "num_input_tokens_seen": 16660864, + "step": 24740 + }, + { + "epoch": 0.604524466811619, + "grad_norm": 10.679595947265625, + "learning_rate": 1.9973405866727673e-06, + "loss": 0.1068, + "num_input_tokens_seen": 16664320, + "step": 24745 + }, + { + "epoch": 0.6046466176434662, + "grad_norm": 8.024046897888184, + "learning_rate": 1.99733436786288e-06, + "loss": 0.1451, + "num_input_tokens_seen": 16667648, + "step": 24750 + }, + { + "epoch": 0.6047687684753134, + "grad_norm": 0.018951889127492905, + "learning_rate": 1.997328141800109e-06, + "loss": 0.0484, + "num_input_tokens_seen": 16671424, + "step": 24755 + }, + { + "epoch": 0.6048909193071604, + "grad_norm": 0.1066962331533432, + "learning_rate": 1.997321908484499e-06, + "loss": 0.2128, + "num_input_tokens_seen": 16675264, + "step": 24760 + }, + { + "epoch": 0.6050130701390076, + "grad_norm": 23.60710334777832, + "learning_rate": 1.997315667916096e-06, + "loss": 0.2069, + "num_input_tokens_seen": 16678976, + "step": 24765 + }, + { + "epoch": 0.6051352209708548, + "grad_norm": 12.42275333404541, + "learning_rate": 1.997309420094945e-06, + "loss": 0.0918, + "num_input_tokens_seen": 16682624, + "step": 24770 + }, + { + "epoch": 0.605257371802702, + "grad_norm": 3.2556633949279785, + "learning_rate": 1.9973031650210922e-06, + "loss": 0.1285, + "num_input_tokens_seen": 16685952, + "step": 24775 + }, + { + "epoch": 0.6053795226345492, + "grad_norm": 16.80837631225586, + "learning_rate": 1.997296902694582e-06, + "loss": 0.081, + "num_input_tokens_seen": 16690112, + "step": 24780 + }, + { + "epoch": 0.6055016734663963, + "grad_norm": 17.06452178955078, + "learning_rate": 1.997290633115461e-06, + "loss": 0.1219, + "num_input_tokens_seen": 16693696, + "step": 24785 + }, + { + "epoch": 0.6056238242982435, + "grad_norm": 7.428202152252197, + "learning_rate": 1.9972843562837737e-06, + "loss": 0.1666, + "num_input_tokens_seen": 16696768, + "step": 24790 + }, + { + "epoch": 0.6057459751300907, + "grad_norm": 19.68970489501953, + "learning_rate": 1.997278072199567e-06, + "loss": 0.0921, + "num_input_tokens_seen": 16700032, + "step": 24795 + }, + { + "epoch": 0.6058681259619378, + "grad_norm": 7.9083027839660645, + "learning_rate": 1.997271780862885e-06, + "loss": 0.0925, + "num_input_tokens_seen": 16703744, + "step": 24800 + }, + { + "epoch": 0.6059902767937849, + "grad_norm": 7.435184001922607, + "learning_rate": 1.9972654822737753e-06, + "loss": 0.085, + "num_input_tokens_seen": 16707264, + "step": 24805 + }, + { + "epoch": 0.6061124276256321, + "grad_norm": 16.882427215576172, + "learning_rate": 1.997259176432282e-06, + "loss": 0.0473, + "num_input_tokens_seen": 16710976, + "step": 24810 + }, + { + "epoch": 0.6062345784574793, + "grad_norm": 22.054662704467773, + "learning_rate": 1.997252863338452e-06, + "loss": 0.0786, + "num_input_tokens_seen": 16714560, + "step": 24815 + }, + { + "epoch": 0.6063567292893265, + "grad_norm": 4.894041061401367, + "learning_rate": 1.9972465429923315e-06, + "loss": 0.0628, + "num_input_tokens_seen": 16718144, + "step": 24820 + }, + { + "epoch": 0.6064788801211737, + "grad_norm": 7.330321788787842, + "learning_rate": 1.997240215393965e-06, + "loss": 0.1397, + "num_input_tokens_seen": 16721344, + "step": 24825 + }, + { + "epoch": 0.6066010309530208, + "grad_norm": 11.391115188598633, + "learning_rate": 1.9972338805434002e-06, + "loss": 0.1264, + "num_input_tokens_seen": 16724480, + "step": 24830 + }, + { + "epoch": 0.606723181784868, + "grad_norm": 10.962759971618652, + "learning_rate": 1.9972275384406823e-06, + "loss": 0.0505, + "num_input_tokens_seen": 16727808, + "step": 24835 + }, + { + "epoch": 0.6068453326167151, + "grad_norm": 14.033049583435059, + "learning_rate": 1.997221189085857e-06, + "loss": 0.0473, + "num_input_tokens_seen": 16730752, + "step": 24840 + }, + { + "epoch": 0.6069674834485623, + "grad_norm": 8.855101585388184, + "learning_rate": 1.9972148324789714e-06, + "loss": 0.1355, + "num_input_tokens_seen": 16734144, + "step": 24845 + }, + { + "epoch": 0.6070896342804094, + "grad_norm": 19.79100799560547, + "learning_rate": 1.9972084686200712e-06, + "loss": 0.2389, + "num_input_tokens_seen": 16737792, + "step": 24850 + }, + { + "epoch": 0.6072117851122566, + "grad_norm": 12.284224510192871, + "learning_rate": 1.997202097509203e-06, + "loss": 0.2192, + "num_input_tokens_seen": 16740928, + "step": 24855 + }, + { + "epoch": 0.6073339359441038, + "grad_norm": 17.570894241333008, + "learning_rate": 1.997195719146413e-06, + "loss": 0.1178, + "num_input_tokens_seen": 16744256, + "step": 24860 + }, + { + "epoch": 0.607456086775951, + "grad_norm": 12.302034378051758, + "learning_rate": 1.9971893335317472e-06, + "loss": 0.1032, + "num_input_tokens_seen": 16747776, + "step": 24865 + }, + { + "epoch": 0.607578237607798, + "grad_norm": 17.891658782958984, + "learning_rate": 1.997182940665252e-06, + "loss": 0.1418, + "num_input_tokens_seen": 16751168, + "step": 24870 + }, + { + "epoch": 0.6077003884396452, + "grad_norm": 7.210624694824219, + "learning_rate": 1.997176540546975e-06, + "loss": 0.1081, + "num_input_tokens_seen": 16754624, + "step": 24875 + }, + { + "epoch": 0.6078225392714924, + "grad_norm": 16.911026000976562, + "learning_rate": 1.997170133176962e-06, + "loss": 0.0947, + "num_input_tokens_seen": 16758016, + "step": 24880 + }, + { + "epoch": 0.6079446901033396, + "grad_norm": 8.62895679473877, + "learning_rate": 1.9971637185552593e-06, + "loss": 0.0705, + "num_input_tokens_seen": 16761344, + "step": 24885 + }, + { + "epoch": 0.6080668409351868, + "grad_norm": 18.918378829956055, + "learning_rate": 1.997157296681914e-06, + "loss": 0.1168, + "num_input_tokens_seen": 16764480, + "step": 24890 + }, + { + "epoch": 0.6081889917670339, + "grad_norm": 8.970353126525879, + "learning_rate": 1.997150867556972e-06, + "loss": 0.1466, + "num_input_tokens_seen": 16767936, + "step": 24895 + }, + { + "epoch": 0.6083111425988811, + "grad_norm": 17.729246139526367, + "learning_rate": 1.997144431180481e-06, + "loss": 0.0522, + "num_input_tokens_seen": 16771328, + "step": 24900 + }, + { + "epoch": 0.6084332934307283, + "grad_norm": 25.908403396606445, + "learning_rate": 1.9971379875524876e-06, + "loss": 0.1292, + "num_input_tokens_seen": 16774400, + "step": 24905 + }, + { + "epoch": 0.6085554442625755, + "grad_norm": 22.828269958496094, + "learning_rate": 1.9971315366730388e-06, + "loss": 0.13, + "num_input_tokens_seen": 16777536, + "step": 24910 + }, + { + "epoch": 0.6086775950944225, + "grad_norm": 13.071663856506348, + "learning_rate": 1.997125078542181e-06, + "loss": 0.0675, + "num_input_tokens_seen": 16780416, + "step": 24915 + }, + { + "epoch": 0.6087997459262697, + "grad_norm": 17.404088973999023, + "learning_rate": 1.9971186131599617e-06, + "loss": 0.0505, + "num_input_tokens_seen": 16783360, + "step": 24920 + }, + { + "epoch": 0.6089218967581169, + "grad_norm": 6.976062297821045, + "learning_rate": 1.9971121405264275e-06, + "loss": 0.1188, + "num_input_tokens_seen": 16786496, + "step": 24925 + }, + { + "epoch": 0.6090440475899641, + "grad_norm": 2.1634817123413086, + "learning_rate": 1.997105660641625e-06, + "loss": 0.0607, + "num_input_tokens_seen": 16789504, + "step": 24930 + }, + { + "epoch": 0.6091661984218113, + "grad_norm": 12.604896545410156, + "learning_rate": 1.997099173505603e-06, + "loss": 0.0866, + "num_input_tokens_seen": 16793088, + "step": 24935 + }, + { + "epoch": 0.6092883492536584, + "grad_norm": 1.5911040306091309, + "learning_rate": 1.997092679118407e-06, + "loss": 0.0902, + "num_input_tokens_seen": 16796864, + "step": 24940 + }, + { + "epoch": 0.6094105000855056, + "grad_norm": 11.825122833251953, + "learning_rate": 1.9970861774800848e-06, + "loss": 0.1309, + "num_input_tokens_seen": 16800256, + "step": 24945 + }, + { + "epoch": 0.6095326509173528, + "grad_norm": 20.49022102355957, + "learning_rate": 1.9970796685906838e-06, + "loss": 0.066, + "num_input_tokens_seen": 16803648, + "step": 24950 + }, + { + "epoch": 0.6096548017492, + "grad_norm": 12.80346393585205, + "learning_rate": 1.9970731524502517e-06, + "loss": 0.0865, + "num_input_tokens_seen": 16807232, + "step": 24955 + }, + { + "epoch": 0.609776952581047, + "grad_norm": 0.6074082851409912, + "learning_rate": 1.9970666290588348e-06, + "loss": 0.1032, + "num_input_tokens_seen": 16810624, + "step": 24960 + }, + { + "epoch": 0.6098991034128942, + "grad_norm": 37.6062126159668, + "learning_rate": 1.9970600984164817e-06, + "loss": 0.2635, + "num_input_tokens_seen": 16813696, + "step": 24965 + }, + { + "epoch": 0.6100212542447414, + "grad_norm": 25.862201690673828, + "learning_rate": 1.9970535605232394e-06, + "loss": 0.137, + "num_input_tokens_seen": 16816960, + "step": 24970 + }, + { + "epoch": 0.6101434050765886, + "grad_norm": 15.700074195861816, + "learning_rate": 1.9970470153791553e-06, + "loss": 0.0676, + "num_input_tokens_seen": 16820096, + "step": 24975 + }, + { + "epoch": 0.6102655559084358, + "grad_norm": 22.41241455078125, + "learning_rate": 1.997040462984277e-06, + "loss": 0.1488, + "num_input_tokens_seen": 16823232, + "step": 24980 + }, + { + "epoch": 0.6103877067402829, + "grad_norm": 7.360961437225342, + "learning_rate": 1.997033903338652e-06, + "loss": 0.0996, + "num_input_tokens_seen": 16826304, + "step": 24985 + }, + { + "epoch": 0.61050985757213, + "grad_norm": 19.96744728088379, + "learning_rate": 1.9970273364423292e-06, + "loss": 0.198, + "num_input_tokens_seen": 16829760, + "step": 24990 + }, + { + "epoch": 0.6106320084039772, + "grad_norm": 21.676393508911133, + "learning_rate": 1.9970207622953547e-06, + "loss": 0.1193, + "num_input_tokens_seen": 16832832, + "step": 24995 + }, + { + "epoch": 0.6107541592358244, + "grad_norm": 5.05864143371582, + "learning_rate": 1.9970141808977773e-06, + "loss": 0.0615, + "num_input_tokens_seen": 16836416, + "step": 25000 + }, + { + "epoch": 0.6108763100676715, + "grad_norm": 31.107868194580078, + "learning_rate": 1.9970075922496444e-06, + "loss": 0.1696, + "num_input_tokens_seen": 16839616, + "step": 25005 + }, + { + "epoch": 0.6109984608995187, + "grad_norm": 20.692991256713867, + "learning_rate": 1.9970009963510044e-06, + "loss": 0.0412, + "num_input_tokens_seen": 16843008, + "step": 25010 + }, + { + "epoch": 0.6111206117313659, + "grad_norm": 31.31917381286621, + "learning_rate": 1.9969943932019047e-06, + "loss": 0.1899, + "num_input_tokens_seen": 16846464, + "step": 25015 + }, + { + "epoch": 0.6112427625632131, + "grad_norm": 18.618642807006836, + "learning_rate": 1.996987782802394e-06, + "loss": 0.0507, + "num_input_tokens_seen": 16849984, + "step": 25020 + }, + { + "epoch": 0.6113649133950603, + "grad_norm": 0.8102744221687317, + "learning_rate": 1.9969811651525196e-06, + "loss": 0.0448, + "num_input_tokens_seen": 16853184, + "step": 25025 + }, + { + "epoch": 0.6114870642269074, + "grad_norm": 0.0666152685880661, + "learning_rate": 1.9969745402523303e-06, + "loss": 0.1035, + "num_input_tokens_seen": 16856640, + "step": 25030 + }, + { + "epoch": 0.6116092150587545, + "grad_norm": 41.11848831176758, + "learning_rate": 1.9969679081018737e-06, + "loss": 0.0872, + "num_input_tokens_seen": 16859712, + "step": 25035 + }, + { + "epoch": 0.6117313658906017, + "grad_norm": 9.146672248840332, + "learning_rate": 1.9969612687011987e-06, + "loss": 0.1359, + "num_input_tokens_seen": 16863040, + "step": 25040 + }, + { + "epoch": 0.6118535167224489, + "grad_norm": 22.66708755493164, + "learning_rate": 1.996954622050353e-06, + "loss": 0.205, + "num_input_tokens_seen": 16866176, + "step": 25045 + }, + { + "epoch": 0.611975667554296, + "grad_norm": 11.185577392578125, + "learning_rate": 1.996947968149385e-06, + "loss": 0.0858, + "num_input_tokens_seen": 16869632, + "step": 25050 + }, + { + "epoch": 0.6120978183861432, + "grad_norm": 4.112269878387451, + "learning_rate": 1.9969413069983435e-06, + "loss": 0.0653, + "num_input_tokens_seen": 16873024, + "step": 25055 + }, + { + "epoch": 0.6122199692179904, + "grad_norm": 10.49512004852295, + "learning_rate": 1.9969346385972764e-06, + "loss": 0.14, + "num_input_tokens_seen": 16876736, + "step": 25060 + }, + { + "epoch": 0.6123421200498376, + "grad_norm": 24.078441619873047, + "learning_rate": 1.9969279629462327e-06, + "loss": 0.2184, + "num_input_tokens_seen": 16880128, + "step": 25065 + }, + { + "epoch": 0.6124642708816848, + "grad_norm": 10.833511352539062, + "learning_rate": 1.9969212800452608e-06, + "loss": 0.066, + "num_input_tokens_seen": 16883072, + "step": 25070 + }, + { + "epoch": 0.6125864217135318, + "grad_norm": 18.679733276367188, + "learning_rate": 1.996914589894409e-06, + "loss": 0.0957, + "num_input_tokens_seen": 16886464, + "step": 25075 + }, + { + "epoch": 0.612708572545379, + "grad_norm": 4.080750942230225, + "learning_rate": 1.9969078924937263e-06, + "loss": 0.0604, + "num_input_tokens_seen": 16890112, + "step": 25080 + }, + { + "epoch": 0.6128307233772262, + "grad_norm": 33.040748596191406, + "learning_rate": 1.9969011878432608e-06, + "loss": 0.1052, + "num_input_tokens_seen": 16893632, + "step": 25085 + }, + { + "epoch": 0.6129528742090734, + "grad_norm": 27.59091567993164, + "learning_rate": 1.996894475943062e-06, + "loss": 0.1961, + "num_input_tokens_seen": 16896832, + "step": 25090 + }, + { + "epoch": 0.6130750250409205, + "grad_norm": 8.494720458984375, + "learning_rate": 1.996887756793179e-06, + "loss": 0.237, + "num_input_tokens_seen": 16900224, + "step": 25095 + }, + { + "epoch": 0.6131971758727677, + "grad_norm": 9.366389274597168, + "learning_rate": 1.9968810303936593e-06, + "loss": 0.0974, + "num_input_tokens_seen": 16903552, + "step": 25100 + }, + { + "epoch": 0.6133193267046149, + "grad_norm": 27.643888473510742, + "learning_rate": 1.996874296744553e-06, + "loss": 0.1542, + "num_input_tokens_seen": 16906688, + "step": 25105 + }, + { + "epoch": 0.613441477536462, + "grad_norm": 24.457929611206055, + "learning_rate": 1.9968675558459085e-06, + "loss": 0.0714, + "num_input_tokens_seen": 16909760, + "step": 25110 + }, + { + "epoch": 0.6135636283683091, + "grad_norm": 2.291285753250122, + "learning_rate": 1.9968608076977753e-06, + "loss": 0.0904, + "num_input_tokens_seen": 16913088, + "step": 25115 + }, + { + "epoch": 0.6136857792001563, + "grad_norm": 9.119710922241211, + "learning_rate": 1.996854052300202e-06, + "loss": 0.1161, + "num_input_tokens_seen": 16916160, + "step": 25120 + }, + { + "epoch": 0.6138079300320035, + "grad_norm": 16.056711196899414, + "learning_rate": 1.996847289653238e-06, + "loss": 0.034, + "num_input_tokens_seen": 16919744, + "step": 25125 + }, + { + "epoch": 0.6139300808638507, + "grad_norm": 14.756852149963379, + "learning_rate": 1.996840519756932e-06, + "loss": 0.1062, + "num_input_tokens_seen": 16923264, + "step": 25130 + }, + { + "epoch": 0.6140522316956979, + "grad_norm": 13.74815845489502, + "learning_rate": 1.996833742611334e-06, + "loss": 0.153, + "num_input_tokens_seen": 16926272, + "step": 25135 + }, + { + "epoch": 0.614174382527545, + "grad_norm": 12.079375267028809, + "learning_rate": 1.996826958216493e-06, + "loss": 0.1932, + "num_input_tokens_seen": 16929728, + "step": 25140 + }, + { + "epoch": 0.6142965333593922, + "grad_norm": 14.67190933227539, + "learning_rate": 1.996820166572458e-06, + "loss": 0.0886, + "num_input_tokens_seen": 16933376, + "step": 25145 + }, + { + "epoch": 0.6144186841912394, + "grad_norm": 4.266879081726074, + "learning_rate": 1.996813367679279e-06, + "loss": 0.0943, + "num_input_tokens_seen": 16936448, + "step": 25150 + }, + { + "epoch": 0.6145408350230865, + "grad_norm": 14.214287757873535, + "learning_rate": 1.9968065615370046e-06, + "loss": 0.1077, + "num_input_tokens_seen": 16939968, + "step": 25155 + }, + { + "epoch": 0.6146629858549336, + "grad_norm": 1.067999243736267, + "learning_rate": 1.996799748145685e-06, + "loss": 0.1043, + "num_input_tokens_seen": 16943232, + "step": 25160 + }, + { + "epoch": 0.6147851366867808, + "grad_norm": 9.434431076049805, + "learning_rate": 1.9967929275053695e-06, + "loss": 0.1222, + "num_input_tokens_seen": 16945984, + "step": 25165 + }, + { + "epoch": 0.614907287518628, + "grad_norm": 3.1949663162231445, + "learning_rate": 1.996786099616108e-06, + "loss": 0.2028, + "num_input_tokens_seen": 16949376, + "step": 25170 + }, + { + "epoch": 0.6150294383504752, + "grad_norm": 15.2947359085083, + "learning_rate": 1.9967792644779496e-06, + "loss": 0.1709, + "num_input_tokens_seen": 16952512, + "step": 25175 + }, + { + "epoch": 0.6151515891823224, + "grad_norm": 18.07305908203125, + "learning_rate": 1.9967724220909444e-06, + "loss": 0.154, + "num_input_tokens_seen": 16955840, + "step": 25180 + }, + { + "epoch": 0.6152737400141695, + "grad_norm": 16.07686424255371, + "learning_rate": 1.996765572455142e-06, + "loss": 0.0944, + "num_input_tokens_seen": 16959616, + "step": 25185 + }, + { + "epoch": 0.6153958908460166, + "grad_norm": 36.54639434814453, + "learning_rate": 1.996758715570592e-06, + "loss": 0.147, + "num_input_tokens_seen": 16962880, + "step": 25190 + }, + { + "epoch": 0.6155180416778638, + "grad_norm": 9.950146675109863, + "learning_rate": 1.9967518514373447e-06, + "loss": 0.1115, + "num_input_tokens_seen": 16965952, + "step": 25195 + }, + { + "epoch": 0.615640192509711, + "grad_norm": 22.124109268188477, + "learning_rate": 1.9967449800554497e-06, + "loss": 0.0778, + "num_input_tokens_seen": 16969280, + "step": 25200 + }, + { + "epoch": 0.6157623433415581, + "grad_norm": 17.615293502807617, + "learning_rate": 1.996738101424957e-06, + "loss": 0.1822, + "num_input_tokens_seen": 16972736, + "step": 25205 + }, + { + "epoch": 0.6158844941734053, + "grad_norm": 15.409132957458496, + "learning_rate": 1.9967312155459175e-06, + "loss": 0.1052, + "num_input_tokens_seen": 16976064, + "step": 25210 + }, + { + "epoch": 0.6160066450052525, + "grad_norm": 23.066360473632812, + "learning_rate": 1.99672432241838e-06, + "loss": 0.1949, + "num_input_tokens_seen": 16979328, + "step": 25215 + }, + { + "epoch": 0.6161287958370997, + "grad_norm": 13.9695463180542, + "learning_rate": 1.9967174220423954e-06, + "loss": 0.1043, + "num_input_tokens_seen": 16982464, + "step": 25220 + }, + { + "epoch": 0.6162509466689469, + "grad_norm": 0.6445543169975281, + "learning_rate": 1.996710514418013e-06, + "loss": 0.1102, + "num_input_tokens_seen": 16986048, + "step": 25225 + }, + { + "epoch": 0.6163730975007939, + "grad_norm": 15.676551818847656, + "learning_rate": 1.996703599545284e-06, + "loss": 0.134, + "num_input_tokens_seen": 16989312, + "step": 25230 + }, + { + "epoch": 0.6164952483326411, + "grad_norm": 1.9470303058624268, + "learning_rate": 1.996696677424259e-06, + "loss": 0.0338, + "num_input_tokens_seen": 16993088, + "step": 25235 + }, + { + "epoch": 0.6166173991644883, + "grad_norm": 13.360188484191895, + "learning_rate": 1.996689748054987e-06, + "loss": 0.1011, + "num_input_tokens_seen": 16996672, + "step": 25240 + }, + { + "epoch": 0.6167395499963355, + "grad_norm": 11.234295845031738, + "learning_rate": 1.996682811437519e-06, + "loss": 0.1098, + "num_input_tokens_seen": 17000128, + "step": 25245 + }, + { + "epoch": 0.6168617008281826, + "grad_norm": 6.157711505889893, + "learning_rate": 1.9966758675719057e-06, + "loss": 0.0585, + "num_input_tokens_seen": 17003712, + "step": 25250 + }, + { + "epoch": 0.6169838516600298, + "grad_norm": 10.527791023254395, + "learning_rate": 1.996668916458197e-06, + "loss": 0.0604, + "num_input_tokens_seen": 17007872, + "step": 25255 + }, + { + "epoch": 0.617106002491877, + "grad_norm": 1.8814789056777954, + "learning_rate": 1.9966619580964446e-06, + "loss": 0.0614, + "num_input_tokens_seen": 17011392, + "step": 25260 + }, + { + "epoch": 0.6172281533237242, + "grad_norm": 22.949785232543945, + "learning_rate": 1.996654992486698e-06, + "loss": 0.1322, + "num_input_tokens_seen": 17014720, + "step": 25265 + }, + { + "epoch": 0.6173503041555713, + "grad_norm": 15.85322093963623, + "learning_rate": 1.9966480196290087e-06, + "loss": 0.0883, + "num_input_tokens_seen": 17018304, + "step": 25270 + }, + { + "epoch": 0.6174724549874184, + "grad_norm": 11.826990127563477, + "learning_rate": 1.996641039523426e-06, + "loss": 0.1705, + "num_input_tokens_seen": 17021888, + "step": 25275 + }, + { + "epoch": 0.6175946058192656, + "grad_norm": 0.42177316546440125, + "learning_rate": 1.9966340521700024e-06, + "loss": 0.1127, + "num_input_tokens_seen": 17025280, + "step": 25280 + }, + { + "epoch": 0.6177167566511128, + "grad_norm": 0.6035380959510803, + "learning_rate": 1.9966270575687876e-06, + "loss": 0.021, + "num_input_tokens_seen": 17029056, + "step": 25285 + }, + { + "epoch": 0.61783890748296, + "grad_norm": 6.330459117889404, + "learning_rate": 1.996620055719833e-06, + "loss": 0.0255, + "num_input_tokens_seen": 17032384, + "step": 25290 + }, + { + "epoch": 0.6179610583148071, + "grad_norm": 0.0786670371890068, + "learning_rate": 1.9966130466231886e-06, + "loss": 0.081, + "num_input_tokens_seen": 17035392, + "step": 25295 + }, + { + "epoch": 0.6180832091466543, + "grad_norm": 4.98344087600708, + "learning_rate": 1.996606030278907e-06, + "loss": 0.1271, + "num_input_tokens_seen": 17038848, + "step": 25300 + }, + { + "epoch": 0.6182053599785015, + "grad_norm": 27.002077102661133, + "learning_rate": 1.9965990066870374e-06, + "loss": 0.1147, + "num_input_tokens_seen": 17042816, + "step": 25305 + }, + { + "epoch": 0.6183275108103486, + "grad_norm": 14.993169784545898, + "learning_rate": 1.9965919758476325e-06, + "loss": 0.0699, + "num_input_tokens_seen": 17045824, + "step": 25310 + }, + { + "epoch": 0.6184496616421958, + "grad_norm": 20.037349700927734, + "learning_rate": 1.9965849377607423e-06, + "loss": 0.1055, + "num_input_tokens_seen": 17049152, + "step": 25315 + }, + { + "epoch": 0.6185718124740429, + "grad_norm": 15.71719741821289, + "learning_rate": 1.9965778924264183e-06, + "loss": 0.1039, + "num_input_tokens_seen": 17052416, + "step": 25320 + }, + { + "epoch": 0.6186939633058901, + "grad_norm": 9.317479133605957, + "learning_rate": 1.996570839844712e-06, + "loss": 0.0592, + "num_input_tokens_seen": 17055872, + "step": 25325 + }, + { + "epoch": 0.6188161141377373, + "grad_norm": 2.1688971519470215, + "learning_rate": 1.9965637800156747e-06, + "loss": 0.1786, + "num_input_tokens_seen": 17058880, + "step": 25330 + }, + { + "epoch": 0.6189382649695845, + "grad_norm": 23.790361404418945, + "learning_rate": 1.9965567129393576e-06, + "loss": 0.0409, + "num_input_tokens_seen": 17062208, + "step": 25335 + }, + { + "epoch": 0.6190604158014316, + "grad_norm": 0.34484490752220154, + "learning_rate": 1.9965496386158117e-06, + "loss": 0.1413, + "num_input_tokens_seen": 17065920, + "step": 25340 + }, + { + "epoch": 0.6191825666332788, + "grad_norm": 1.4263343811035156, + "learning_rate": 1.996542557045089e-06, + "loss": 0.1079, + "num_input_tokens_seen": 17069568, + "step": 25345 + }, + { + "epoch": 0.6193047174651259, + "grad_norm": 34.41661071777344, + "learning_rate": 1.9965354682272405e-06, + "loss": 0.2241, + "num_input_tokens_seen": 17073216, + "step": 25350 + }, + { + "epoch": 0.6194268682969731, + "grad_norm": 0.44979265332221985, + "learning_rate": 1.9965283721623185e-06, + "loss": 0.1075, + "num_input_tokens_seen": 17076928, + "step": 25355 + }, + { + "epoch": 0.6195490191288203, + "grad_norm": 3.4772098064422607, + "learning_rate": 1.9965212688503736e-06, + "loss": 0.0357, + "num_input_tokens_seen": 17080000, + "step": 25360 + }, + { + "epoch": 0.6196711699606674, + "grad_norm": 20.216279983520508, + "learning_rate": 1.9965141582914583e-06, + "loss": 0.0264, + "num_input_tokens_seen": 17083456, + "step": 25365 + }, + { + "epoch": 0.6197933207925146, + "grad_norm": 41.14569854736328, + "learning_rate": 1.996507040485624e-06, + "loss": 0.1875, + "num_input_tokens_seen": 17087616, + "step": 25370 + }, + { + "epoch": 0.6199154716243618, + "grad_norm": 13.675374984741211, + "learning_rate": 1.9964999154329224e-06, + "loss": 0.1642, + "num_input_tokens_seen": 17090688, + "step": 25375 + }, + { + "epoch": 0.620037622456209, + "grad_norm": 0.7034504413604736, + "learning_rate": 1.9964927831334056e-06, + "loss": 0.1116, + "num_input_tokens_seen": 17094272, + "step": 25380 + }, + { + "epoch": 0.620159773288056, + "grad_norm": 33.59286880493164, + "learning_rate": 1.996485643587125e-06, + "loss": 0.1427, + "num_input_tokens_seen": 17097344, + "step": 25385 + }, + { + "epoch": 0.6202819241199032, + "grad_norm": 9.86114501953125, + "learning_rate": 1.996478496794133e-06, + "loss": 0.1185, + "num_input_tokens_seen": 17101120, + "step": 25390 + }, + { + "epoch": 0.6204040749517504, + "grad_norm": 11.22215461730957, + "learning_rate": 1.9964713427544813e-06, + "loss": 0.0828, + "num_input_tokens_seen": 17104192, + "step": 25395 + }, + { + "epoch": 0.6205262257835976, + "grad_norm": 22.48546028137207, + "learning_rate": 1.996464181468222e-06, + "loss": 0.1144, + "num_input_tokens_seen": 17108096, + "step": 25400 + }, + { + "epoch": 0.6206483766154447, + "grad_norm": 2.969916582107544, + "learning_rate": 1.9964570129354066e-06, + "loss": 0.2225, + "num_input_tokens_seen": 17111104, + "step": 25405 + }, + { + "epoch": 0.6207705274472919, + "grad_norm": 3.541419506072998, + "learning_rate": 1.9964498371560886e-06, + "loss": 0.0343, + "num_input_tokens_seen": 17114432, + "step": 25410 + }, + { + "epoch": 0.6208926782791391, + "grad_norm": 29.178781509399414, + "learning_rate": 1.9964426541303186e-06, + "loss": 0.1387, + "num_input_tokens_seen": 17117440, + "step": 25415 + }, + { + "epoch": 0.6210148291109863, + "grad_norm": 2.5262792110443115, + "learning_rate": 1.9964354638581503e-06, + "loss": 0.1161, + "num_input_tokens_seen": 17120896, + "step": 25420 + }, + { + "epoch": 0.6211369799428335, + "grad_norm": 0.37980780005455017, + "learning_rate": 1.996428266339635e-06, + "loss": 0.0561, + "num_input_tokens_seen": 17124416, + "step": 25425 + }, + { + "epoch": 0.6212591307746805, + "grad_norm": 26.190187454223633, + "learning_rate": 1.9964210615748255e-06, + "loss": 0.1095, + "num_input_tokens_seen": 17127808, + "step": 25430 + }, + { + "epoch": 0.6213812816065277, + "grad_norm": 0.6196547150611877, + "learning_rate": 1.996413849563774e-06, + "loss": 0.0779, + "num_input_tokens_seen": 17131008, + "step": 25435 + }, + { + "epoch": 0.6215034324383749, + "grad_norm": 0.7363035678863525, + "learning_rate": 1.9964066303065325e-06, + "loss": 0.0774, + "num_input_tokens_seen": 17133952, + "step": 25440 + }, + { + "epoch": 0.6216255832702221, + "grad_norm": 1.5437747240066528, + "learning_rate": 1.9963994038031546e-06, + "loss": 0.0625, + "num_input_tokens_seen": 17137600, + "step": 25445 + }, + { + "epoch": 0.6217477341020692, + "grad_norm": 83.51165008544922, + "learning_rate": 1.996392170053692e-06, + "loss": 0.2014, + "num_input_tokens_seen": 17140864, + "step": 25450 + }, + { + "epoch": 0.6218698849339164, + "grad_norm": 0.4975201189517975, + "learning_rate": 1.9963849290581974e-06, + "loss": 0.0841, + "num_input_tokens_seen": 17144256, + "step": 25455 + }, + { + "epoch": 0.6219920357657636, + "grad_norm": 2.202531576156616, + "learning_rate": 1.996377680816724e-06, + "loss": 0.1522, + "num_input_tokens_seen": 17147776, + "step": 25460 + }, + { + "epoch": 0.6221141865976108, + "grad_norm": 17.93608283996582, + "learning_rate": 1.9963704253293237e-06, + "loss": 0.1654, + "num_input_tokens_seen": 17151232, + "step": 25465 + }, + { + "epoch": 0.6222363374294579, + "grad_norm": 0.37878966331481934, + "learning_rate": 1.99636316259605e-06, + "loss": 0.1012, + "num_input_tokens_seen": 17154624, + "step": 25470 + }, + { + "epoch": 0.622358488261305, + "grad_norm": 15.742181777954102, + "learning_rate": 1.9963558926169552e-06, + "loss": 0.1666, + "num_input_tokens_seen": 17158080, + "step": 25475 + }, + { + "epoch": 0.6224806390931522, + "grad_norm": 0.2844081521034241, + "learning_rate": 1.9963486153920925e-06, + "loss": 0.0974, + "num_input_tokens_seen": 17161920, + "step": 25480 + }, + { + "epoch": 0.6226027899249994, + "grad_norm": 0.17397256195545197, + "learning_rate": 1.9963413309215143e-06, + "loss": 0.1068, + "num_input_tokens_seen": 17165248, + "step": 25485 + }, + { + "epoch": 0.6227249407568466, + "grad_norm": 9.662365913391113, + "learning_rate": 1.9963340392052744e-06, + "loss": 0.1319, + "num_input_tokens_seen": 17168576, + "step": 25490 + }, + { + "epoch": 0.6228470915886937, + "grad_norm": 43.83585739135742, + "learning_rate": 1.9963267402434253e-06, + "loss": 0.1814, + "num_input_tokens_seen": 17172032, + "step": 25495 + }, + { + "epoch": 0.6229692424205409, + "grad_norm": 22.395421981811523, + "learning_rate": 1.99631943403602e-06, + "loss": 0.0794, + "num_input_tokens_seen": 17175552, + "step": 25500 + }, + { + "epoch": 0.623091393252388, + "grad_norm": 13.20292854309082, + "learning_rate": 1.996312120583112e-06, + "loss": 0.0511, + "num_input_tokens_seen": 17178880, + "step": 25505 + }, + { + "epoch": 0.6232135440842352, + "grad_norm": 1.7054976224899292, + "learning_rate": 1.996304799884754e-06, + "loss": 0.1083, + "num_input_tokens_seen": 17182208, + "step": 25510 + }, + { + "epoch": 0.6233356949160824, + "grad_norm": 1.7298126220703125, + "learning_rate": 1.996297471941e-06, + "loss": 0.0684, + "num_input_tokens_seen": 17185600, + "step": 25515 + }, + { + "epoch": 0.6234578457479295, + "grad_norm": 23.883642196655273, + "learning_rate": 1.9962901367519023e-06, + "loss": 0.071, + "num_input_tokens_seen": 17189184, + "step": 25520 + }, + { + "epoch": 0.6235799965797767, + "grad_norm": 20.736419677734375, + "learning_rate": 1.996282794317515e-06, + "loss": 0.1737, + "num_input_tokens_seen": 17193152, + "step": 25525 + }, + { + "epoch": 0.6237021474116239, + "grad_norm": 9.693829536437988, + "learning_rate": 1.996275444637891e-06, + "loss": 0.1166, + "num_input_tokens_seen": 17196672, + "step": 25530 + }, + { + "epoch": 0.6238242982434711, + "grad_norm": 22.503318786621094, + "learning_rate": 1.9962680877130842e-06, + "loss": 0.1957, + "num_input_tokens_seen": 17200000, + "step": 25535 + }, + { + "epoch": 0.6239464490753182, + "grad_norm": 15.77971363067627, + "learning_rate": 1.996260723543148e-06, + "loss": 0.1698, + "num_input_tokens_seen": 17203392, + "step": 25540 + }, + { + "epoch": 0.6240685999071653, + "grad_norm": 52.5566291809082, + "learning_rate": 1.996253352128136e-06, + "loss": 0.1698, + "num_input_tokens_seen": 17206592, + "step": 25545 + }, + { + "epoch": 0.6241907507390125, + "grad_norm": 4.498231410980225, + "learning_rate": 1.996245973468101e-06, + "loss": 0.1594, + "num_input_tokens_seen": 17210048, + "step": 25550 + }, + { + "epoch": 0.6243129015708597, + "grad_norm": 6.834176063537598, + "learning_rate": 1.9962385875630977e-06, + "loss": 0.1584, + "num_input_tokens_seen": 17214848, + "step": 25555 + }, + { + "epoch": 0.6244350524027069, + "grad_norm": 20.276023864746094, + "learning_rate": 1.9962311944131796e-06, + "loss": 0.0985, + "num_input_tokens_seen": 17218304, + "step": 25560 + }, + { + "epoch": 0.624557203234554, + "grad_norm": 9.306912422180176, + "learning_rate": 1.9962237940184003e-06, + "loss": 0.0956, + "num_input_tokens_seen": 17221632, + "step": 25565 + }, + { + "epoch": 0.6246793540664012, + "grad_norm": 14.916698455810547, + "learning_rate": 1.9962163863788134e-06, + "loss": 0.1397, + "num_input_tokens_seen": 17225152, + "step": 25570 + }, + { + "epoch": 0.6248015048982484, + "grad_norm": 21.793596267700195, + "learning_rate": 1.996208971494473e-06, + "loss": 0.0717, + "num_input_tokens_seen": 17229056, + "step": 25575 + }, + { + "epoch": 0.6249236557300956, + "grad_norm": 1.2358403205871582, + "learning_rate": 1.9962015493654334e-06, + "loss": 0.0514, + "num_input_tokens_seen": 17232320, + "step": 25580 + }, + { + "epoch": 0.6250458065619426, + "grad_norm": 9.152735710144043, + "learning_rate": 1.9961941199917477e-06, + "loss": 0.1642, + "num_input_tokens_seen": 17235328, + "step": 25585 + }, + { + "epoch": 0.6251679573937898, + "grad_norm": 5.852708339691162, + "learning_rate": 1.9961866833734705e-06, + "loss": 0.0463, + "num_input_tokens_seen": 17238336, + "step": 25590 + }, + { + "epoch": 0.625290108225637, + "grad_norm": 16.526809692382812, + "learning_rate": 1.996179239510656e-06, + "loss": 0.1545, + "num_input_tokens_seen": 17241728, + "step": 25595 + }, + { + "epoch": 0.6254122590574842, + "grad_norm": 24.526718139648438, + "learning_rate": 1.996171788403358e-06, + "loss": 0.1015, + "num_input_tokens_seen": 17245248, + "step": 25600 + }, + { + "epoch": 0.6255344098893314, + "grad_norm": 3.7110772132873535, + "learning_rate": 1.996164330051631e-06, + "loss": 0.1408, + "num_input_tokens_seen": 17248960, + "step": 25605 + }, + { + "epoch": 0.6256565607211785, + "grad_norm": 28.51833152770996, + "learning_rate": 1.996156864455529e-06, + "loss": 0.0738, + "num_input_tokens_seen": 17252352, + "step": 25610 + }, + { + "epoch": 0.6257787115530257, + "grad_norm": 14.709949493408203, + "learning_rate": 1.996149391615106e-06, + "loss": 0.1187, + "num_input_tokens_seen": 17255488, + "step": 25615 + }, + { + "epoch": 0.6259008623848729, + "grad_norm": 17.817319869995117, + "learning_rate": 1.996141911530417e-06, + "loss": 0.0899, + "num_input_tokens_seen": 17258752, + "step": 25620 + }, + { + "epoch": 0.62602301321672, + "grad_norm": 17.193923950195312, + "learning_rate": 1.996134424201516e-06, + "loss": 0.1487, + "num_input_tokens_seen": 17262272, + "step": 25625 + }, + { + "epoch": 0.6261451640485671, + "grad_norm": 22.279172897338867, + "learning_rate": 1.9961269296284574e-06, + "loss": 0.1187, + "num_input_tokens_seen": 17265600, + "step": 25630 + }, + { + "epoch": 0.6262673148804143, + "grad_norm": 23.591035842895508, + "learning_rate": 1.9961194278112963e-06, + "loss": 0.072, + "num_input_tokens_seen": 17268992, + "step": 25635 + }, + { + "epoch": 0.6263894657122615, + "grad_norm": 10.646063804626465, + "learning_rate": 1.9961119187500867e-06, + "loss": 0.1307, + "num_input_tokens_seen": 17273152, + "step": 25640 + }, + { + "epoch": 0.6265116165441087, + "grad_norm": 13.584372520446777, + "learning_rate": 1.996104402444883e-06, + "loss": 0.0832, + "num_input_tokens_seen": 17276480, + "step": 25645 + }, + { + "epoch": 0.6266337673759559, + "grad_norm": 8.617551803588867, + "learning_rate": 1.99609687889574e-06, + "loss": 0.1212, + "num_input_tokens_seen": 17279488, + "step": 25650 + }, + { + "epoch": 0.626755918207803, + "grad_norm": 30.092082977294922, + "learning_rate": 1.996089348102713e-06, + "loss": 0.1022, + "num_input_tokens_seen": 17283328, + "step": 25655 + }, + { + "epoch": 0.6268780690396502, + "grad_norm": 26.2705135345459, + "learning_rate": 1.996081810065856e-06, + "loss": 0.0568, + "num_input_tokens_seen": 17286848, + "step": 25660 + }, + { + "epoch": 0.6270002198714973, + "grad_norm": 11.056796073913574, + "learning_rate": 1.9960742647852246e-06, + "loss": 0.1768, + "num_input_tokens_seen": 17291072, + "step": 25665 + }, + { + "epoch": 0.6271223707033445, + "grad_norm": 22.28460121154785, + "learning_rate": 1.9960667122608732e-06, + "loss": 0.1485, + "num_input_tokens_seen": 17294208, + "step": 25670 + }, + { + "epoch": 0.6272445215351916, + "grad_norm": 10.146092414855957, + "learning_rate": 1.996059152492856e-06, + "loss": 0.1853, + "num_input_tokens_seen": 17298112, + "step": 25675 + }, + { + "epoch": 0.6273666723670388, + "grad_norm": 12.486412048339844, + "learning_rate": 1.9960515854812298e-06, + "loss": 0.1721, + "num_input_tokens_seen": 17301184, + "step": 25680 + }, + { + "epoch": 0.627488823198886, + "grad_norm": 12.366875648498535, + "learning_rate": 1.996044011226048e-06, + "loss": 0.1373, + "num_input_tokens_seen": 17304512, + "step": 25685 + }, + { + "epoch": 0.6276109740307332, + "grad_norm": 12.222225189208984, + "learning_rate": 1.996036429727366e-06, + "loss": 0.0416, + "num_input_tokens_seen": 17307520, + "step": 25690 + }, + { + "epoch": 0.6277331248625803, + "grad_norm": 11.332962036132812, + "learning_rate": 1.99602884098524e-06, + "loss": 0.152, + "num_input_tokens_seen": 17311104, + "step": 25695 + }, + { + "epoch": 0.6278552756944275, + "grad_norm": 13.932801246643066, + "learning_rate": 1.9960212449997238e-06, + "loss": 0.0961, + "num_input_tokens_seen": 17314560, + "step": 25700 + }, + { + "epoch": 0.6279774265262746, + "grad_norm": 12.53373908996582, + "learning_rate": 1.996013641770873e-06, + "loss": 0.1072, + "num_input_tokens_seen": 17317888, + "step": 25705 + }, + { + "epoch": 0.6280995773581218, + "grad_norm": 7.917189598083496, + "learning_rate": 1.9960060312987434e-06, + "loss": 0.0653, + "num_input_tokens_seen": 17320896, + "step": 25710 + }, + { + "epoch": 0.628221728189969, + "grad_norm": 25.79001808166504, + "learning_rate": 1.9959984135833902e-06, + "loss": 0.1914, + "num_input_tokens_seen": 17324160, + "step": 25715 + }, + { + "epoch": 0.6283438790218161, + "grad_norm": 16.6901912689209, + "learning_rate": 1.9959907886248686e-06, + "loss": 0.1041, + "num_input_tokens_seen": 17327168, + "step": 25720 + }, + { + "epoch": 0.6284660298536633, + "grad_norm": 11.882837295532227, + "learning_rate": 1.9959831564232335e-06, + "loss": 0.1079, + "num_input_tokens_seen": 17330624, + "step": 25725 + }, + { + "epoch": 0.6285881806855105, + "grad_norm": 15.25439453125, + "learning_rate": 1.9959755169785417e-06, + "loss": 0.0786, + "num_input_tokens_seen": 17333632, + "step": 25730 + }, + { + "epoch": 0.6287103315173577, + "grad_norm": 9.499648094177246, + "learning_rate": 1.995967870290848e-06, + "loss": 0.0607, + "num_input_tokens_seen": 17337024, + "step": 25735 + }, + { + "epoch": 0.6288324823492047, + "grad_norm": 12.742144584655762, + "learning_rate": 1.9959602163602077e-06, + "loss": 0.0855, + "num_input_tokens_seen": 17340352, + "step": 25740 + }, + { + "epoch": 0.6289546331810519, + "grad_norm": 10.112947463989258, + "learning_rate": 1.9959525551866767e-06, + "loss": 0.2015, + "num_input_tokens_seen": 17343616, + "step": 25745 + }, + { + "epoch": 0.6290767840128991, + "grad_norm": 0.5093333721160889, + "learning_rate": 1.9959448867703115e-06, + "loss": 0.1225, + "num_input_tokens_seen": 17346880, + "step": 25750 + }, + { + "epoch": 0.6291989348447463, + "grad_norm": 11.308860778808594, + "learning_rate": 1.995937211111167e-06, + "loss": 0.1385, + "num_input_tokens_seen": 17350080, + "step": 25755 + }, + { + "epoch": 0.6293210856765935, + "grad_norm": 15.600391387939453, + "learning_rate": 1.9959295282092987e-06, + "loss": 0.1036, + "num_input_tokens_seen": 17353152, + "step": 25760 + }, + { + "epoch": 0.6294432365084406, + "grad_norm": 10.270207405090332, + "learning_rate": 1.9959218380647638e-06, + "loss": 0.1537, + "num_input_tokens_seen": 17356736, + "step": 25765 + }, + { + "epoch": 0.6295653873402878, + "grad_norm": 29.24824333190918, + "learning_rate": 1.995914140677617e-06, + "loss": 0.1194, + "num_input_tokens_seen": 17360192, + "step": 25770 + }, + { + "epoch": 0.629687538172135, + "grad_norm": 2.259371519088745, + "learning_rate": 1.9959064360479144e-06, + "loss": 0.0897, + "num_input_tokens_seen": 17366144, + "step": 25775 + }, + { + "epoch": 0.6298096890039822, + "grad_norm": 0.8481370806694031, + "learning_rate": 1.9958987241757126e-06, + "loss": 0.1683, + "num_input_tokens_seen": 17369408, + "step": 25780 + }, + { + "epoch": 0.6299318398358292, + "grad_norm": 12.3348388671875, + "learning_rate": 1.9958910050610674e-06, + "loss": 0.0886, + "num_input_tokens_seen": 17372928, + "step": 25785 + }, + { + "epoch": 0.6300539906676764, + "grad_norm": 22.768659591674805, + "learning_rate": 1.995883278704035e-06, + "loss": 0.0823, + "num_input_tokens_seen": 17376448, + "step": 25790 + }, + { + "epoch": 0.6301761414995236, + "grad_norm": 1.0633013248443604, + "learning_rate": 1.9958755451046716e-06, + "loss": 0.1744, + "num_input_tokens_seen": 17379776, + "step": 25795 + }, + { + "epoch": 0.6302982923313708, + "grad_norm": 47.173545837402344, + "learning_rate": 1.9958678042630333e-06, + "loss": 0.1318, + "num_input_tokens_seen": 17382976, + "step": 25800 + }, + { + "epoch": 0.630420443163218, + "grad_norm": 28.43817138671875, + "learning_rate": 1.9958600561791765e-06, + "loss": 0.0919, + "num_input_tokens_seen": 17386368, + "step": 25805 + }, + { + "epoch": 0.6305425939950651, + "grad_norm": 11.135772705078125, + "learning_rate": 1.9958523008531574e-06, + "loss": 0.1327, + "num_input_tokens_seen": 17389376, + "step": 25810 + }, + { + "epoch": 0.6306647448269123, + "grad_norm": 13.410459518432617, + "learning_rate": 1.9958445382850325e-06, + "loss": 0.1494, + "num_input_tokens_seen": 17392768, + "step": 25815 + }, + { + "epoch": 0.6307868956587595, + "grad_norm": 15.97334098815918, + "learning_rate": 1.9958367684748585e-06, + "loss": 0.1626, + "num_input_tokens_seen": 17395648, + "step": 25820 + }, + { + "epoch": 0.6309090464906066, + "grad_norm": 25.007976531982422, + "learning_rate": 1.9958289914226917e-06, + "loss": 0.097, + "num_input_tokens_seen": 17398784, + "step": 25825 + }, + { + "epoch": 0.6310311973224537, + "grad_norm": 22.77874755859375, + "learning_rate": 1.9958212071285885e-06, + "loss": 0.1565, + "num_input_tokens_seen": 17402112, + "step": 25830 + }, + { + "epoch": 0.6311533481543009, + "grad_norm": 22.907556533813477, + "learning_rate": 1.9958134155926055e-06, + "loss": 0.1212, + "num_input_tokens_seen": 17405568, + "step": 25835 + }, + { + "epoch": 0.6312754989861481, + "grad_norm": 13.266105651855469, + "learning_rate": 1.9958056168147996e-06, + "loss": 0.0662, + "num_input_tokens_seen": 17408640, + "step": 25840 + }, + { + "epoch": 0.6313976498179953, + "grad_norm": 14.850981712341309, + "learning_rate": 1.9957978107952275e-06, + "loss": 0.1154, + "num_input_tokens_seen": 17412096, + "step": 25845 + }, + { + "epoch": 0.6315198006498425, + "grad_norm": 6.523622989654541, + "learning_rate": 1.995789997533946e-06, + "loss": 0.124, + "num_input_tokens_seen": 17415168, + "step": 25850 + }, + { + "epoch": 0.6316419514816896, + "grad_norm": 28.513198852539062, + "learning_rate": 1.995782177031011e-06, + "loss": 0.1397, + "num_input_tokens_seen": 17418368, + "step": 25855 + }, + { + "epoch": 0.6317641023135367, + "grad_norm": 5.756598472595215, + "learning_rate": 1.995774349286481e-06, + "loss": 0.1161, + "num_input_tokens_seen": 17421824, + "step": 25860 + }, + { + "epoch": 0.6318862531453839, + "grad_norm": 9.733460426330566, + "learning_rate": 1.995766514300412e-06, + "loss": 0.0884, + "num_input_tokens_seen": 17425024, + "step": 25865 + }, + { + "epoch": 0.6320084039772311, + "grad_norm": 1.50833261013031, + "learning_rate": 1.995758672072861e-06, + "loss": 0.1705, + "num_input_tokens_seen": 17428096, + "step": 25870 + }, + { + "epoch": 0.6321305548090782, + "grad_norm": 3.84963059425354, + "learning_rate": 1.995750822603885e-06, + "loss": 0.0775, + "num_input_tokens_seen": 17431360, + "step": 25875 + }, + { + "epoch": 0.6322527056409254, + "grad_norm": 11.950239181518555, + "learning_rate": 1.9957429658935415e-06, + "loss": 0.1135, + "num_input_tokens_seen": 17434816, + "step": 25880 + }, + { + "epoch": 0.6323748564727726, + "grad_norm": 8.948583602905273, + "learning_rate": 1.995735101941887e-06, + "loss": 0.1094, + "num_input_tokens_seen": 17438336, + "step": 25885 + }, + { + "epoch": 0.6324970073046198, + "grad_norm": 27.25109100341797, + "learning_rate": 1.995727230748979e-06, + "loss": 0.0563, + "num_input_tokens_seen": 17441536, + "step": 25890 + }, + { + "epoch": 0.632619158136467, + "grad_norm": 1.836618423461914, + "learning_rate": 1.995719352314875e-06, + "loss": 0.1082, + "num_input_tokens_seen": 17444992, + "step": 25895 + }, + { + "epoch": 0.632741308968314, + "grad_norm": 12.529705047607422, + "learning_rate": 1.995711466639632e-06, + "loss": 0.1079, + "num_input_tokens_seen": 17448192, + "step": 25900 + }, + { + "epoch": 0.6328634598001612, + "grad_norm": 7.985225200653076, + "learning_rate": 1.9957035737233072e-06, + "loss": 0.0583, + "num_input_tokens_seen": 17451456, + "step": 25905 + }, + { + "epoch": 0.6329856106320084, + "grad_norm": 13.829870223999023, + "learning_rate": 1.9956956735659583e-06, + "loss": 0.0693, + "num_input_tokens_seen": 17455168, + "step": 25910 + }, + { + "epoch": 0.6331077614638556, + "grad_norm": 31.966915130615234, + "learning_rate": 1.9956877661676427e-06, + "loss": 0.0966, + "num_input_tokens_seen": 17458688, + "step": 25915 + }, + { + "epoch": 0.6332299122957027, + "grad_norm": 30.319499969482422, + "learning_rate": 1.9956798515284178e-06, + "loss": 0.171, + "num_input_tokens_seen": 17462080, + "step": 25920 + }, + { + "epoch": 0.6333520631275499, + "grad_norm": 35.57968521118164, + "learning_rate": 1.9956719296483414e-06, + "loss": 0.0922, + "num_input_tokens_seen": 17465472, + "step": 25925 + }, + { + "epoch": 0.6334742139593971, + "grad_norm": 7.720409870147705, + "learning_rate": 1.9956640005274708e-06, + "loss": 0.1561, + "num_input_tokens_seen": 17469184, + "step": 25930 + }, + { + "epoch": 0.6335963647912443, + "grad_norm": 27.032007217407227, + "learning_rate": 1.9956560641658635e-06, + "loss": 0.154, + "num_input_tokens_seen": 17473344, + "step": 25935 + }, + { + "epoch": 0.6337185156230913, + "grad_norm": 8.09611988067627, + "learning_rate": 1.995648120563578e-06, + "loss": 0.0313, + "num_input_tokens_seen": 17476992, + "step": 25940 + }, + { + "epoch": 0.6338406664549385, + "grad_norm": 17.054302215576172, + "learning_rate": 1.9956401697206712e-06, + "loss": 0.1835, + "num_input_tokens_seen": 17480192, + "step": 25945 + }, + { + "epoch": 0.6339628172867857, + "grad_norm": 27.591604232788086, + "learning_rate": 1.9956322116372013e-06, + "loss": 0.1739, + "num_input_tokens_seen": 17483712, + "step": 25950 + }, + { + "epoch": 0.6340849681186329, + "grad_norm": 31.46297264099121, + "learning_rate": 1.9956242463132265e-06, + "loss": 0.1891, + "num_input_tokens_seen": 17486464, + "step": 25955 + }, + { + "epoch": 0.6342071189504801, + "grad_norm": 20.843233108520508, + "learning_rate": 1.9956162737488043e-06, + "loss": 0.1866, + "num_input_tokens_seen": 17490240, + "step": 25960 + }, + { + "epoch": 0.6343292697823272, + "grad_norm": 6.045319557189941, + "learning_rate": 1.9956082939439923e-06, + "loss": 0.0477, + "num_input_tokens_seen": 17493504, + "step": 25965 + }, + { + "epoch": 0.6344514206141744, + "grad_norm": 1.8652979135513306, + "learning_rate": 1.99560030689885e-06, + "loss": 0.1017, + "num_input_tokens_seen": 17496704, + "step": 25970 + }, + { + "epoch": 0.6345735714460216, + "grad_norm": 17.12334632873535, + "learning_rate": 1.9955923126134336e-06, + "loss": 0.0958, + "num_input_tokens_seen": 17499712, + "step": 25975 + }, + { + "epoch": 0.6346957222778687, + "grad_norm": 1.6485508680343628, + "learning_rate": 1.995584311087802e-06, + "loss": 0.0124, + "num_input_tokens_seen": 17503360, + "step": 25980 + }, + { + "epoch": 0.6348178731097158, + "grad_norm": 24.18747329711914, + "learning_rate": 1.995576302322014e-06, + "loss": 0.0913, + "num_input_tokens_seen": 17506944, + "step": 25985 + }, + { + "epoch": 0.634940023941563, + "grad_norm": 0.19784998893737793, + "learning_rate": 1.995568286316127e-06, + "loss": 0.0773, + "num_input_tokens_seen": 17510144, + "step": 25990 + }, + { + "epoch": 0.6350621747734102, + "grad_norm": 1.5027730464935303, + "learning_rate": 1.9955602630702004e-06, + "loss": 0.1737, + "num_input_tokens_seen": 17513216, + "step": 25995 + }, + { + "epoch": 0.6351843256052574, + "grad_norm": 22.669113159179688, + "learning_rate": 1.995552232584291e-06, + "loss": 0.1516, + "num_input_tokens_seen": 17516608, + "step": 26000 + }, + { + "epoch": 0.6353064764371046, + "grad_norm": 0.15162105858325958, + "learning_rate": 1.9955441948584584e-06, + "loss": 0.1055, + "num_input_tokens_seen": 17519872, + "step": 26005 + }, + { + "epoch": 0.6354286272689517, + "grad_norm": 2.181324005126953, + "learning_rate": 1.9955361498927604e-06, + "loss": 0.1741, + "num_input_tokens_seen": 17524032, + "step": 26010 + }, + { + "epoch": 0.6355507781007989, + "grad_norm": 13.539152145385742, + "learning_rate": 1.995528097687256e-06, + "loss": 0.081, + "num_input_tokens_seen": 17527168, + "step": 26015 + }, + { + "epoch": 0.635672928932646, + "grad_norm": 5.645986557006836, + "learning_rate": 1.995520038242003e-06, + "loss": 0.1234, + "num_input_tokens_seen": 17530240, + "step": 26020 + }, + { + "epoch": 0.6357950797644932, + "grad_norm": 11.694540977478027, + "learning_rate": 1.995511971557061e-06, + "loss": 0.1828, + "num_input_tokens_seen": 17533440, + "step": 26025 + }, + { + "epoch": 0.6359172305963403, + "grad_norm": 23.707487106323242, + "learning_rate": 1.9955038976324882e-06, + "loss": 0.1446, + "num_input_tokens_seen": 17536768, + "step": 26030 + }, + { + "epoch": 0.6360393814281875, + "grad_norm": 0.40436723828315735, + "learning_rate": 1.995495816468343e-06, + "loss": 0.022, + "num_input_tokens_seen": 17540352, + "step": 26035 + }, + { + "epoch": 0.6361615322600347, + "grad_norm": 0.8619459867477417, + "learning_rate": 1.9954877280646847e-06, + "loss": 0.0648, + "num_input_tokens_seen": 17543616, + "step": 26040 + }, + { + "epoch": 0.6362836830918819, + "grad_norm": 12.597610473632812, + "learning_rate": 1.995479632421572e-06, + "loss": 0.0706, + "num_input_tokens_seen": 17547520, + "step": 26045 + }, + { + "epoch": 0.6364058339237291, + "grad_norm": 2.8348464965820312, + "learning_rate": 1.9954715295390634e-06, + "loss": 0.1602, + "num_input_tokens_seen": 17550592, + "step": 26050 + }, + { + "epoch": 0.6365279847555761, + "grad_norm": 0.8372215032577515, + "learning_rate": 1.995463419417218e-06, + "loss": 0.0806, + "num_input_tokens_seen": 17553472, + "step": 26055 + }, + { + "epoch": 0.6366501355874233, + "grad_norm": 0.1670871526002884, + "learning_rate": 1.9954553020560952e-06, + "loss": 0.1826, + "num_input_tokens_seen": 17556928, + "step": 26060 + }, + { + "epoch": 0.6367722864192705, + "grad_norm": 2.472623348236084, + "learning_rate": 1.9954471774557536e-06, + "loss": 0.0413, + "num_input_tokens_seen": 17560384, + "step": 26065 + }, + { + "epoch": 0.6368944372511177, + "grad_norm": 28.401121139526367, + "learning_rate": 1.995439045616252e-06, + "loss": 0.1716, + "num_input_tokens_seen": 17563584, + "step": 26070 + }, + { + "epoch": 0.6370165880829648, + "grad_norm": 36.34634780883789, + "learning_rate": 1.9954309065376504e-06, + "loss": 0.1879, + "num_input_tokens_seen": 17566784, + "step": 26075 + }, + { + "epoch": 0.637138738914812, + "grad_norm": 19.85298728942871, + "learning_rate": 1.9954227602200075e-06, + "loss": 0.1128, + "num_input_tokens_seen": 17569984, + "step": 26080 + }, + { + "epoch": 0.6372608897466592, + "grad_norm": 13.330507278442383, + "learning_rate": 1.995414606663382e-06, + "loss": 0.099, + "num_input_tokens_seen": 17573504, + "step": 26085 + }, + { + "epoch": 0.6373830405785064, + "grad_norm": 38.39118576049805, + "learning_rate": 1.995406445867834e-06, + "loss": 0.1222, + "num_input_tokens_seen": 17577472, + "step": 26090 + }, + { + "epoch": 0.6375051914103536, + "grad_norm": 17.758310317993164, + "learning_rate": 1.9953982778334232e-06, + "loss": 0.047, + "num_input_tokens_seen": 17581504, + "step": 26095 + }, + { + "epoch": 0.6376273422422006, + "grad_norm": 7.126058101654053, + "learning_rate": 1.995390102560208e-06, + "loss": 0.1131, + "num_input_tokens_seen": 17584512, + "step": 26100 + }, + { + "epoch": 0.6377494930740478, + "grad_norm": 34.98472595214844, + "learning_rate": 1.995381920048248e-06, + "loss": 0.3489, + "num_input_tokens_seen": 17587648, + "step": 26105 + }, + { + "epoch": 0.637871643905895, + "grad_norm": 64.70793151855469, + "learning_rate": 1.995373730297603e-06, + "loss": 0.12, + "num_input_tokens_seen": 17591296, + "step": 26110 + }, + { + "epoch": 0.6379937947377422, + "grad_norm": 21.996980667114258, + "learning_rate": 1.9953655333083325e-06, + "loss": 0.2038, + "num_input_tokens_seen": 17594368, + "step": 26115 + }, + { + "epoch": 0.6381159455695893, + "grad_norm": 18.887956619262695, + "learning_rate": 1.995357329080496e-06, + "loss": 0.0196, + "num_input_tokens_seen": 17597440, + "step": 26120 + }, + { + "epoch": 0.6382380964014365, + "grad_norm": 20.388181686401367, + "learning_rate": 1.995349117614154e-06, + "loss": 0.1449, + "num_input_tokens_seen": 17600512, + "step": 26125 + }, + { + "epoch": 0.6383602472332837, + "grad_norm": 11.004735946655273, + "learning_rate": 1.995340898909365e-06, + "loss": 0.0482, + "num_input_tokens_seen": 17603584, + "step": 26130 + }, + { + "epoch": 0.6384823980651309, + "grad_norm": 1.7154580354690552, + "learning_rate": 1.9953326729661894e-06, + "loss": 0.132, + "num_input_tokens_seen": 17606720, + "step": 26135 + }, + { + "epoch": 0.638604548896978, + "grad_norm": 7.153088569641113, + "learning_rate": 1.9953244397846867e-06, + "loss": 0.0638, + "num_input_tokens_seen": 17610176, + "step": 26140 + }, + { + "epoch": 0.6387266997288251, + "grad_norm": 0.09430580586194992, + "learning_rate": 1.995316199364917e-06, + "loss": 0.1026, + "num_input_tokens_seen": 17613440, + "step": 26145 + }, + { + "epoch": 0.6388488505606723, + "grad_norm": 32.89512634277344, + "learning_rate": 1.9953079517069404e-06, + "loss": 0.1488, + "num_input_tokens_seen": 17616448, + "step": 26150 + }, + { + "epoch": 0.6389710013925195, + "grad_norm": 27.483251571655273, + "learning_rate": 1.9952996968108163e-06, + "loss": 0.1919, + "num_input_tokens_seen": 17619968, + "step": 26155 + }, + { + "epoch": 0.6390931522243667, + "grad_norm": 4.14311408996582, + "learning_rate": 1.9952914346766055e-06, + "loss": 0.0421, + "num_input_tokens_seen": 17623040, + "step": 26160 + }, + { + "epoch": 0.6392153030562138, + "grad_norm": 26.047630310058594, + "learning_rate": 1.9952831653043673e-06, + "loss": 0.1205, + "num_input_tokens_seen": 17626560, + "step": 26165 + }, + { + "epoch": 0.639337453888061, + "grad_norm": 8.898675918579102, + "learning_rate": 1.9952748886941623e-06, + "loss": 0.1684, + "num_input_tokens_seen": 17629888, + "step": 26170 + }, + { + "epoch": 0.6394596047199081, + "grad_norm": 6.229605197906494, + "learning_rate": 1.995266604846051e-06, + "loss": 0.12, + "num_input_tokens_seen": 17633600, + "step": 26175 + }, + { + "epoch": 0.6395817555517553, + "grad_norm": 44.44392395019531, + "learning_rate": 1.9952583137600927e-06, + "loss": 0.0854, + "num_input_tokens_seen": 17637184, + "step": 26180 + }, + { + "epoch": 0.6397039063836025, + "grad_norm": 13.342550277709961, + "learning_rate": 1.995250015436349e-06, + "loss": 0.1761, + "num_input_tokens_seen": 17640384, + "step": 26185 + }, + { + "epoch": 0.6398260572154496, + "grad_norm": 21.162038803100586, + "learning_rate": 1.9952417098748787e-06, + "loss": 0.1808, + "num_input_tokens_seen": 17643456, + "step": 26190 + }, + { + "epoch": 0.6399482080472968, + "grad_norm": 10.54422664642334, + "learning_rate": 1.9952333970757437e-06, + "loss": 0.1093, + "num_input_tokens_seen": 17646912, + "step": 26195 + }, + { + "epoch": 0.640070358879144, + "grad_norm": 7.449054718017578, + "learning_rate": 1.995225077039003e-06, + "loss": 0.0242, + "num_input_tokens_seen": 17650304, + "step": 26200 + }, + { + "epoch": 0.6401925097109912, + "grad_norm": 19.278846740722656, + "learning_rate": 1.9952167497647183e-06, + "loss": 0.1729, + "num_input_tokens_seen": 17653504, + "step": 26205 + }, + { + "epoch": 0.6403146605428383, + "grad_norm": 7.0912628173828125, + "learning_rate": 1.9952084152529496e-06, + "loss": 0.0961, + "num_input_tokens_seen": 17656768, + "step": 26210 + }, + { + "epoch": 0.6404368113746854, + "grad_norm": 12.94286060333252, + "learning_rate": 1.9952000735037577e-06, + "loss": 0.0957, + "num_input_tokens_seen": 17660032, + "step": 26215 + }, + { + "epoch": 0.6405589622065326, + "grad_norm": 0.9649350643157959, + "learning_rate": 1.995191724517203e-06, + "loss": 0.1386, + "num_input_tokens_seen": 17663424, + "step": 26220 + }, + { + "epoch": 0.6406811130383798, + "grad_norm": 0.6071916222572327, + "learning_rate": 1.9951833682933468e-06, + "loss": 0.0311, + "num_input_tokens_seen": 17666496, + "step": 26225 + }, + { + "epoch": 0.6408032638702269, + "grad_norm": 17.107208251953125, + "learning_rate": 1.995175004832249e-06, + "loss": 0.0999, + "num_input_tokens_seen": 17669632, + "step": 26230 + }, + { + "epoch": 0.6409254147020741, + "grad_norm": 32.13701629638672, + "learning_rate": 1.9951666341339717e-06, + "loss": 0.1355, + "num_input_tokens_seen": 17673408, + "step": 26235 + }, + { + "epoch": 0.6410475655339213, + "grad_norm": 37.73835372924805, + "learning_rate": 1.9951582561985743e-06, + "loss": 0.1531, + "num_input_tokens_seen": 17676416, + "step": 26240 + }, + { + "epoch": 0.6411697163657685, + "grad_norm": 22.128427505493164, + "learning_rate": 1.995149871026118e-06, + "loss": 0.1753, + "num_input_tokens_seen": 17679744, + "step": 26245 + }, + { + "epoch": 0.6412918671976157, + "grad_norm": 10.967851638793945, + "learning_rate": 1.995141478616665e-06, + "loss": 0.1188, + "num_input_tokens_seen": 17682944, + "step": 26250 + }, + { + "epoch": 0.6414140180294627, + "grad_norm": 13.98348331451416, + "learning_rate": 1.995133078970275e-06, + "loss": 0.1189, + "num_input_tokens_seen": 17686336, + "step": 26255 + }, + { + "epoch": 0.6415361688613099, + "grad_norm": 5.867333889007568, + "learning_rate": 1.99512467208701e-06, + "loss": 0.099, + "num_input_tokens_seen": 17689792, + "step": 26260 + }, + { + "epoch": 0.6416583196931571, + "grad_norm": 12.464974403381348, + "learning_rate": 1.9951162579669306e-06, + "loss": 0.0971, + "num_input_tokens_seen": 17693888, + "step": 26265 + }, + { + "epoch": 0.6417804705250043, + "grad_norm": 26.993873596191406, + "learning_rate": 1.995107836610098e-06, + "loss": 0.1654, + "num_input_tokens_seen": 17697152, + "step": 26270 + }, + { + "epoch": 0.6419026213568514, + "grad_norm": 2.5690789222717285, + "learning_rate": 1.9950994080165736e-06, + "loss": 0.0956, + "num_input_tokens_seen": 17700224, + "step": 26275 + }, + { + "epoch": 0.6420247721886986, + "grad_norm": 11.626635551452637, + "learning_rate": 1.9950909721864184e-06, + "loss": 0.116, + "num_input_tokens_seen": 17703488, + "step": 26280 + }, + { + "epoch": 0.6421469230205458, + "grad_norm": 0.5176971554756165, + "learning_rate": 1.9950825291196944e-06, + "loss": 0.0592, + "num_input_tokens_seen": 17706688, + "step": 26285 + }, + { + "epoch": 0.642269073852393, + "grad_norm": 32.07765579223633, + "learning_rate": 1.995074078816462e-06, + "loss": 0.0388, + "num_input_tokens_seen": 17709888, + "step": 26290 + }, + { + "epoch": 0.6423912246842401, + "grad_norm": 0.510884702205658, + "learning_rate": 1.9950656212767844e-06, + "loss": 0.0883, + "num_input_tokens_seen": 17713536, + "step": 26295 + }, + { + "epoch": 0.6425133755160872, + "grad_norm": 13.728007316589355, + "learning_rate": 1.995057156500721e-06, + "loss": 0.1991, + "num_input_tokens_seen": 17716928, + "step": 26300 + }, + { + "epoch": 0.6426355263479344, + "grad_norm": 9.4790678024292, + "learning_rate": 1.9950486844883348e-06, + "loss": 0.134, + "num_input_tokens_seen": 17720128, + "step": 26305 + }, + { + "epoch": 0.6427576771797816, + "grad_norm": 29.495006561279297, + "learning_rate": 1.9950402052396866e-06, + "loss": 0.1259, + "num_input_tokens_seen": 17723520, + "step": 26310 + }, + { + "epoch": 0.6428798280116288, + "grad_norm": 33.22211837768555, + "learning_rate": 1.9950317187548385e-06, + "loss": 0.202, + "num_input_tokens_seen": 17727232, + "step": 26315 + }, + { + "epoch": 0.6430019788434759, + "grad_norm": 13.970088005065918, + "learning_rate": 1.995023225033852e-06, + "loss": 0.1676, + "num_input_tokens_seen": 17730496, + "step": 26320 + }, + { + "epoch": 0.6431241296753231, + "grad_norm": 12.428041458129883, + "learning_rate": 1.9950147240767895e-06, + "loss": 0.3496, + "num_input_tokens_seen": 17734336, + "step": 26325 + }, + { + "epoch": 0.6432462805071703, + "grad_norm": 13.682167053222656, + "learning_rate": 1.9950062158837118e-06, + "loss": 0.0899, + "num_input_tokens_seen": 17737792, + "step": 26330 + }, + { + "epoch": 0.6433684313390174, + "grad_norm": 16.482370376586914, + "learning_rate": 1.9949977004546814e-06, + "loss": 0.1277, + "num_input_tokens_seen": 17741312, + "step": 26335 + }, + { + "epoch": 0.6434905821708646, + "grad_norm": 3.444857358932495, + "learning_rate": 1.99498917778976e-06, + "loss": 0.1018, + "num_input_tokens_seen": 17744640, + "step": 26340 + }, + { + "epoch": 0.6436127330027117, + "grad_norm": 20.923669815063477, + "learning_rate": 1.9949806478890095e-06, + "loss": 0.143, + "num_input_tokens_seen": 17748096, + "step": 26345 + }, + { + "epoch": 0.6437348838345589, + "grad_norm": 3.6736643314361572, + "learning_rate": 1.9949721107524924e-06, + "loss": 0.0545, + "num_input_tokens_seen": 17751424, + "step": 26350 + }, + { + "epoch": 0.6438570346664061, + "grad_norm": 0.2368733286857605, + "learning_rate": 1.9949635663802705e-06, + "loss": 0.1333, + "num_input_tokens_seen": 17754560, + "step": 26355 + }, + { + "epoch": 0.6439791854982533, + "grad_norm": 2.8474624156951904, + "learning_rate": 1.994955014772406e-06, + "loss": 0.0863, + "num_input_tokens_seen": 17757952, + "step": 26360 + }, + { + "epoch": 0.6441013363301004, + "grad_norm": 31.334247589111328, + "learning_rate": 1.9949464559289607e-06, + "loss": 0.2483, + "num_input_tokens_seen": 17761472, + "step": 26365 + }, + { + "epoch": 0.6442234871619476, + "grad_norm": 8.788104057312012, + "learning_rate": 1.9949378898499974e-06, + "loss": 0.0981, + "num_input_tokens_seen": 17764608, + "step": 26370 + }, + { + "epoch": 0.6443456379937947, + "grad_norm": 0.9639668464660645, + "learning_rate": 1.9949293165355783e-06, + "loss": 0.1006, + "num_input_tokens_seen": 17767872, + "step": 26375 + }, + { + "epoch": 0.6444677888256419, + "grad_norm": 13.94326114654541, + "learning_rate": 1.9949207359857656e-06, + "loss": 0.0816, + "num_input_tokens_seen": 17771136, + "step": 26380 + }, + { + "epoch": 0.6445899396574891, + "grad_norm": 8.497142791748047, + "learning_rate": 1.9949121482006216e-06, + "loss": 0.1563, + "num_input_tokens_seen": 17774464, + "step": 26385 + }, + { + "epoch": 0.6447120904893362, + "grad_norm": 5.612656116485596, + "learning_rate": 1.9949035531802086e-06, + "loss": 0.0626, + "num_input_tokens_seen": 17777664, + "step": 26390 + }, + { + "epoch": 0.6448342413211834, + "grad_norm": 24.19043731689453, + "learning_rate": 1.9948949509245897e-06, + "loss": 0.1521, + "num_input_tokens_seen": 17780672, + "step": 26395 + }, + { + "epoch": 0.6449563921530306, + "grad_norm": 4.007930278778076, + "learning_rate": 1.994886341433827e-06, + "loss": 0.1073, + "num_input_tokens_seen": 17783744, + "step": 26400 + }, + { + "epoch": 0.6450785429848778, + "grad_norm": 24.8145809173584, + "learning_rate": 1.994877724707983e-06, + "loss": 0.225, + "num_input_tokens_seen": 17787904, + "step": 26405 + }, + { + "epoch": 0.6452006938167248, + "grad_norm": 12.867918014526367, + "learning_rate": 1.994869100747121e-06, + "loss": 0.044, + "num_input_tokens_seen": 17791424, + "step": 26410 + }, + { + "epoch": 0.645322844648572, + "grad_norm": 1.9785236120224, + "learning_rate": 1.994860469551303e-06, + "loss": 0.0557, + "num_input_tokens_seen": 17794624, + "step": 26415 + }, + { + "epoch": 0.6454449954804192, + "grad_norm": 11.472222328186035, + "learning_rate": 1.9948518311205925e-06, + "loss": 0.1954, + "num_input_tokens_seen": 17797888, + "step": 26420 + }, + { + "epoch": 0.6455671463122664, + "grad_norm": 2.1420435905456543, + "learning_rate": 1.9948431854550517e-06, + "loss": 0.084, + "num_input_tokens_seen": 17801216, + "step": 26425 + }, + { + "epoch": 0.6456892971441136, + "grad_norm": 9.826559066772461, + "learning_rate": 1.9948345325547433e-06, + "loss": 0.0945, + "num_input_tokens_seen": 17804352, + "step": 26430 + }, + { + "epoch": 0.6458114479759607, + "grad_norm": 2.30534291267395, + "learning_rate": 1.994825872419731e-06, + "loss": 0.0385, + "num_input_tokens_seen": 17807616, + "step": 26435 + }, + { + "epoch": 0.6459335988078079, + "grad_norm": 11.928797721862793, + "learning_rate": 1.994817205050077e-06, + "loss": 0.1779, + "num_input_tokens_seen": 17810752, + "step": 26440 + }, + { + "epoch": 0.6460557496396551, + "grad_norm": 0.038654062896966934, + "learning_rate": 1.9948085304458453e-06, + "loss": 0.2423, + "num_input_tokens_seen": 17813824, + "step": 26445 + }, + { + "epoch": 0.6461779004715023, + "grad_norm": 2.4233062267303467, + "learning_rate": 1.994799848607098e-06, + "loss": 0.0702, + "num_input_tokens_seen": 17816960, + "step": 26450 + }, + { + "epoch": 0.6463000513033493, + "grad_norm": 0.9603357315063477, + "learning_rate": 1.9947911595338986e-06, + "loss": 0.1297, + "num_input_tokens_seen": 17820032, + "step": 26455 + }, + { + "epoch": 0.6464222021351965, + "grad_norm": 0.8634178042411804, + "learning_rate": 1.9947824632263102e-06, + "loss": 0.0541, + "num_input_tokens_seen": 17823680, + "step": 26460 + }, + { + "epoch": 0.6465443529670437, + "grad_norm": 36.10398864746094, + "learning_rate": 1.9947737596843964e-06, + "loss": 0.1016, + "num_input_tokens_seen": 17826880, + "step": 26465 + }, + { + "epoch": 0.6466665037988909, + "grad_norm": 0.6066563725471497, + "learning_rate": 1.9947650489082207e-06, + "loss": 0.1124, + "num_input_tokens_seen": 17830656, + "step": 26470 + }, + { + "epoch": 0.646788654630738, + "grad_norm": 0.11005598306655884, + "learning_rate": 1.9947563308978453e-06, + "loss": 0.1057, + "num_input_tokens_seen": 17834048, + "step": 26475 + }, + { + "epoch": 0.6469108054625852, + "grad_norm": 0.32724529504776, + "learning_rate": 1.9947476056533347e-06, + "loss": 0.1586, + "num_input_tokens_seen": 17837504, + "step": 26480 + }, + { + "epoch": 0.6470329562944324, + "grad_norm": 28.255603790283203, + "learning_rate": 1.994738873174752e-06, + "loss": 0.1599, + "num_input_tokens_seen": 17840704, + "step": 26485 + }, + { + "epoch": 0.6471551071262795, + "grad_norm": 8.742977142333984, + "learning_rate": 1.9947301334621603e-06, + "loss": 0.1022, + "num_input_tokens_seen": 17844736, + "step": 26490 + }, + { + "epoch": 0.6472772579581267, + "grad_norm": 28.56509017944336, + "learning_rate": 1.9947213865156237e-06, + "loss": 0.1502, + "num_input_tokens_seen": 17848384, + "step": 26495 + }, + { + "epoch": 0.6473994087899738, + "grad_norm": 6.090130805969238, + "learning_rate": 1.994712632335206e-06, + "loss": 0.0923, + "num_input_tokens_seen": 17851584, + "step": 26500 + }, + { + "epoch": 0.647521559621821, + "grad_norm": 0.3652131259441376, + "learning_rate": 1.9947038709209696e-06, + "loss": 0.1213, + "num_input_tokens_seen": 17855040, + "step": 26505 + }, + { + "epoch": 0.6476437104536682, + "grad_norm": 14.173577308654785, + "learning_rate": 1.99469510227298e-06, + "loss": 0.1322, + "num_input_tokens_seen": 17858880, + "step": 26510 + }, + { + "epoch": 0.6477658612855154, + "grad_norm": 10.040029525756836, + "learning_rate": 1.9946863263912995e-06, + "loss": 0.0871, + "num_input_tokens_seen": 17861952, + "step": 26515 + }, + { + "epoch": 0.6478880121173625, + "grad_norm": 18.37601661682129, + "learning_rate": 1.9946775432759927e-06, + "loss": 0.0217, + "num_input_tokens_seen": 17865024, + "step": 26520 + }, + { + "epoch": 0.6480101629492097, + "grad_norm": 0.8798476457595825, + "learning_rate": 1.994668752927123e-06, + "loss": 0.0965, + "num_input_tokens_seen": 17867968, + "step": 26525 + }, + { + "epoch": 0.6481323137810568, + "grad_norm": 1.0580397844314575, + "learning_rate": 1.9946599553447554e-06, + "loss": 0.0301, + "num_input_tokens_seen": 17871744, + "step": 26530 + }, + { + "epoch": 0.648254464612904, + "grad_norm": 27.158893585205078, + "learning_rate": 1.9946511505289524e-06, + "loss": 0.0995, + "num_input_tokens_seen": 17876992, + "step": 26535 + }, + { + "epoch": 0.6483766154447512, + "grad_norm": 4.429322719573975, + "learning_rate": 1.9946423384797785e-06, + "loss": 0.0387, + "num_input_tokens_seen": 17880064, + "step": 26540 + }, + { + "epoch": 0.6484987662765983, + "grad_norm": 12.550520896911621, + "learning_rate": 1.9946335191972986e-06, + "loss": 0.0889, + "num_input_tokens_seen": 17883840, + "step": 26545 + }, + { + "epoch": 0.6486209171084455, + "grad_norm": 59.04765701293945, + "learning_rate": 1.9946246926815758e-06, + "loss": 0.1818, + "num_input_tokens_seen": 17887424, + "step": 26550 + }, + { + "epoch": 0.6487430679402927, + "grad_norm": 56.20634841918945, + "learning_rate": 1.994615858932675e-06, + "loss": 0.2036, + "num_input_tokens_seen": 17890752, + "step": 26555 + }, + { + "epoch": 0.6488652187721399, + "grad_norm": 29.603675842285156, + "learning_rate": 1.99460701795066e-06, + "loss": 0.1904, + "num_input_tokens_seen": 17894528, + "step": 26560 + }, + { + "epoch": 0.648987369603987, + "grad_norm": 30.748910903930664, + "learning_rate": 1.994598169735595e-06, + "loss": 0.1425, + "num_input_tokens_seen": 17897600, + "step": 26565 + }, + { + "epoch": 0.6491095204358341, + "grad_norm": 46.07027053833008, + "learning_rate": 1.994589314287545e-06, + "loss": 0.2702, + "num_input_tokens_seen": 17901248, + "step": 26570 + }, + { + "epoch": 0.6492316712676813, + "grad_norm": 1.5052845478057861, + "learning_rate": 1.9945804516065737e-06, + "loss": 0.1467, + "num_input_tokens_seen": 17904448, + "step": 26575 + }, + { + "epoch": 0.6493538220995285, + "grad_norm": 26.738313674926758, + "learning_rate": 1.9945715816927464e-06, + "loss": 0.1725, + "num_input_tokens_seen": 17907968, + "step": 26580 + }, + { + "epoch": 0.6494759729313757, + "grad_norm": 6.558767795562744, + "learning_rate": 1.9945627045461263e-06, + "loss": 0.027, + "num_input_tokens_seen": 17911232, + "step": 26585 + }, + { + "epoch": 0.6495981237632228, + "grad_norm": 11.350381851196289, + "learning_rate": 1.9945538201667792e-06, + "loss": 0.1729, + "num_input_tokens_seen": 17914496, + "step": 26590 + }, + { + "epoch": 0.64972027459507, + "grad_norm": 4.032224178314209, + "learning_rate": 1.9945449285547694e-06, + "loss": 0.0971, + "num_input_tokens_seen": 17917760, + "step": 26595 + }, + { + "epoch": 0.6498424254269172, + "grad_norm": 22.45256996154785, + "learning_rate": 1.9945360297101607e-06, + "loss": 0.1367, + "num_input_tokens_seen": 17921280, + "step": 26600 + }, + { + "epoch": 0.6499645762587644, + "grad_norm": 9.463356971740723, + "learning_rate": 1.994527123633019e-06, + "loss": 0.1497, + "num_input_tokens_seen": 17924288, + "step": 26605 + }, + { + "epoch": 0.6500867270906114, + "grad_norm": 0.3891908824443817, + "learning_rate": 1.9945182103234085e-06, + "loss": 0.0734, + "num_input_tokens_seen": 17928192, + "step": 26610 + }, + { + "epoch": 0.6502088779224586, + "grad_norm": 1.3331596851348877, + "learning_rate": 1.9945092897813937e-06, + "loss": 0.0735, + "num_input_tokens_seen": 17931584, + "step": 26615 + }, + { + "epoch": 0.6503310287543058, + "grad_norm": 8.364001274108887, + "learning_rate": 1.99450036200704e-06, + "loss": 0.0445, + "num_input_tokens_seen": 17935296, + "step": 26620 + }, + { + "epoch": 0.650453179586153, + "grad_norm": 3.21260929107666, + "learning_rate": 1.9944914270004126e-06, + "loss": 0.132, + "num_input_tokens_seen": 17938496, + "step": 26625 + }, + { + "epoch": 0.6505753304180002, + "grad_norm": 19.593875885009766, + "learning_rate": 1.994482484761576e-06, + "loss": 0.1255, + "num_input_tokens_seen": 17941440, + "step": 26630 + }, + { + "epoch": 0.6506974812498473, + "grad_norm": 11.708880424499512, + "learning_rate": 1.994473535290595e-06, + "loss": 0.1735, + "num_input_tokens_seen": 17944576, + "step": 26635 + }, + { + "epoch": 0.6508196320816945, + "grad_norm": 8.608492851257324, + "learning_rate": 1.994464578587535e-06, + "loss": 0.0996, + "num_input_tokens_seen": 17948288, + "step": 26640 + }, + { + "epoch": 0.6509417829135417, + "grad_norm": 3.7675461769104004, + "learning_rate": 1.9944556146524613e-06, + "loss": 0.1026, + "num_input_tokens_seen": 17951360, + "step": 26645 + }, + { + "epoch": 0.6510639337453888, + "grad_norm": 0.9686684608459473, + "learning_rate": 1.9944466434854386e-06, + "loss": 0.0945, + "num_input_tokens_seen": 17954944, + "step": 26650 + }, + { + "epoch": 0.6511860845772359, + "grad_norm": 1.992281198501587, + "learning_rate": 1.9944376650865325e-06, + "loss": 0.117, + "num_input_tokens_seen": 17958080, + "step": 26655 + }, + { + "epoch": 0.6513082354090831, + "grad_norm": 23.731996536254883, + "learning_rate": 1.994428679455808e-06, + "loss": 0.1298, + "num_input_tokens_seen": 17961664, + "step": 26660 + }, + { + "epoch": 0.6514303862409303, + "grad_norm": 0.7598839998245239, + "learning_rate": 1.9944196865933313e-06, + "loss": 0.0816, + "num_input_tokens_seen": 17964672, + "step": 26665 + }, + { + "epoch": 0.6515525370727775, + "grad_norm": 23.543251037597656, + "learning_rate": 1.994410686499167e-06, + "loss": 0.2047, + "num_input_tokens_seen": 17967808, + "step": 26670 + }, + { + "epoch": 0.6516746879046247, + "grad_norm": 14.285406112670898, + "learning_rate": 1.9944016791733806e-06, + "loss": 0.1808, + "num_input_tokens_seen": 17971200, + "step": 26675 + }, + { + "epoch": 0.6517968387364718, + "grad_norm": 9.57889461517334, + "learning_rate": 1.9943926646160377e-06, + "loss": 0.077, + "num_input_tokens_seen": 17974528, + "step": 26680 + }, + { + "epoch": 0.651918989568319, + "grad_norm": 9.059340476989746, + "learning_rate": 1.994383642827204e-06, + "loss": 0.0653, + "num_input_tokens_seen": 17977984, + "step": 26685 + }, + { + "epoch": 0.6520411404001661, + "grad_norm": 15.571893692016602, + "learning_rate": 1.9943746138069446e-06, + "loss": 0.1219, + "num_input_tokens_seen": 17980800, + "step": 26690 + }, + { + "epoch": 0.6521632912320133, + "grad_norm": 7.93010139465332, + "learning_rate": 1.9943655775553257e-06, + "loss": 0.1261, + "num_input_tokens_seen": 17984128, + "step": 26695 + }, + { + "epoch": 0.6522854420638604, + "grad_norm": 9.046224594116211, + "learning_rate": 1.9943565340724133e-06, + "loss": 0.0388, + "num_input_tokens_seen": 17987776, + "step": 26700 + }, + { + "epoch": 0.6524075928957076, + "grad_norm": 19.525766372680664, + "learning_rate": 1.994347483358272e-06, + "loss": 0.1485, + "num_input_tokens_seen": 17991168, + "step": 26705 + }, + { + "epoch": 0.6525297437275548, + "grad_norm": 15.36963176727295, + "learning_rate": 1.9943384254129693e-06, + "loss": 0.1009, + "num_input_tokens_seen": 17994432, + "step": 26710 + }, + { + "epoch": 0.652651894559402, + "grad_norm": 19.087261199951172, + "learning_rate": 1.9943293602365694e-06, + "loss": 0.3058, + "num_input_tokens_seen": 17997696, + "step": 26715 + }, + { + "epoch": 0.6527740453912492, + "grad_norm": 36.39265823364258, + "learning_rate": 1.994320287829139e-06, + "loss": 0.1395, + "num_input_tokens_seen": 18001408, + "step": 26720 + }, + { + "epoch": 0.6528961962230962, + "grad_norm": 17.34744644165039, + "learning_rate": 1.9943112081907443e-06, + "loss": 0.0725, + "num_input_tokens_seen": 18004480, + "step": 26725 + }, + { + "epoch": 0.6530183470549434, + "grad_norm": 8.52084732055664, + "learning_rate": 1.9943021213214508e-06, + "loss": 0.0555, + "num_input_tokens_seen": 18007616, + "step": 26730 + }, + { + "epoch": 0.6531404978867906, + "grad_norm": 8.265021324157715, + "learning_rate": 1.994293027221325e-06, + "loss": 0.0594, + "num_input_tokens_seen": 18011072, + "step": 26735 + }, + { + "epoch": 0.6532626487186378, + "grad_norm": 20.10539436340332, + "learning_rate": 1.994283925890433e-06, + "loss": 0.1316, + "num_input_tokens_seen": 18013824, + "step": 26740 + }, + { + "epoch": 0.6533847995504849, + "grad_norm": 16.809226989746094, + "learning_rate": 1.9942748173288408e-06, + "loss": 0.0942, + "num_input_tokens_seen": 18017024, + "step": 26745 + }, + { + "epoch": 0.6535069503823321, + "grad_norm": 11.143219947814941, + "learning_rate": 1.9942657015366145e-06, + "loss": 0.1295, + "num_input_tokens_seen": 18020736, + "step": 26750 + }, + { + "epoch": 0.6536291012141793, + "grad_norm": 19.93425178527832, + "learning_rate": 1.9942565785138207e-06, + "loss": 0.0743, + "num_input_tokens_seen": 18024000, + "step": 26755 + }, + { + "epoch": 0.6537512520460265, + "grad_norm": 2.00927996635437, + "learning_rate": 1.994247448260526e-06, + "loss": 0.1127, + "num_input_tokens_seen": 18027072, + "step": 26760 + }, + { + "epoch": 0.6538734028778735, + "grad_norm": 34.23299026489258, + "learning_rate": 1.994238310776796e-06, + "loss": 0.1408, + "num_input_tokens_seen": 18030464, + "step": 26765 + }, + { + "epoch": 0.6539955537097207, + "grad_norm": 9.078926086425781, + "learning_rate": 1.9942291660626974e-06, + "loss": 0.1588, + "num_input_tokens_seen": 18033856, + "step": 26770 + }, + { + "epoch": 0.6541177045415679, + "grad_norm": 13.535733222961426, + "learning_rate": 1.9942200141182973e-06, + "loss": 0.0567, + "num_input_tokens_seen": 18037056, + "step": 26775 + }, + { + "epoch": 0.6542398553734151, + "grad_norm": 1.5711990594863892, + "learning_rate": 1.9942108549436617e-06, + "loss": 0.0857, + "num_input_tokens_seen": 18040064, + "step": 26780 + }, + { + "epoch": 0.6543620062052623, + "grad_norm": 0.7724623084068298, + "learning_rate": 1.9942016885388575e-06, + "loss": 0.0756, + "num_input_tokens_seen": 18043456, + "step": 26785 + }, + { + "epoch": 0.6544841570371094, + "grad_norm": 25.43880844116211, + "learning_rate": 1.994192514903951e-06, + "loss": 0.0706, + "num_input_tokens_seen": 18046656, + "step": 26790 + }, + { + "epoch": 0.6546063078689566, + "grad_norm": 18.88958740234375, + "learning_rate": 1.994183334039009e-06, + "loss": 0.0439, + "num_input_tokens_seen": 18050624, + "step": 26795 + }, + { + "epoch": 0.6547284587008038, + "grad_norm": 25.788715362548828, + "learning_rate": 1.9941741459440987e-06, + "loss": 0.0505, + "num_input_tokens_seen": 18054720, + "step": 26800 + }, + { + "epoch": 0.654850609532651, + "grad_norm": 0.5160311460494995, + "learning_rate": 1.9941649506192866e-06, + "loss": 0.1256, + "num_input_tokens_seen": 18057856, + "step": 26805 + }, + { + "epoch": 0.654972760364498, + "grad_norm": 37.93132781982422, + "learning_rate": 1.994155748064639e-06, + "loss": 0.1938, + "num_input_tokens_seen": 18061568, + "step": 26810 + }, + { + "epoch": 0.6550949111963452, + "grad_norm": 9.682503700256348, + "learning_rate": 1.994146538280224e-06, + "loss": 0.1578, + "num_input_tokens_seen": 18065408, + "step": 26815 + }, + { + "epoch": 0.6552170620281924, + "grad_norm": 32.54736328125, + "learning_rate": 1.994137321266108e-06, + "loss": 0.0594, + "num_input_tokens_seen": 18069184, + "step": 26820 + }, + { + "epoch": 0.6553392128600396, + "grad_norm": 3.4581313133239746, + "learning_rate": 1.994128097022358e-06, + "loss": 0.0316, + "num_input_tokens_seen": 18073344, + "step": 26825 + }, + { + "epoch": 0.6554613636918868, + "grad_norm": 29.386049270629883, + "learning_rate": 1.9941188655490406e-06, + "loss": 0.2249, + "num_input_tokens_seen": 18077056, + "step": 26830 + }, + { + "epoch": 0.6555835145237339, + "grad_norm": 0.32932719588279724, + "learning_rate": 1.994109626846224e-06, + "loss": 0.1366, + "num_input_tokens_seen": 18080576, + "step": 26835 + }, + { + "epoch": 0.6557056653555811, + "grad_norm": 4.738411903381348, + "learning_rate": 1.9941003809139746e-06, + "loss": 0.0704, + "num_input_tokens_seen": 18084032, + "step": 26840 + }, + { + "epoch": 0.6558278161874282, + "grad_norm": 38.715606689453125, + "learning_rate": 1.99409112775236e-06, + "loss": 0.0993, + "num_input_tokens_seen": 18087104, + "step": 26845 + }, + { + "epoch": 0.6559499670192754, + "grad_norm": 24.37164878845215, + "learning_rate": 1.994081867361447e-06, + "loss": 0.1199, + "num_input_tokens_seen": 18090432, + "step": 26850 + }, + { + "epoch": 0.6560721178511225, + "grad_norm": 4.3538899421691895, + "learning_rate": 1.9940725997413037e-06, + "loss": 0.2395, + "num_input_tokens_seen": 18093632, + "step": 26855 + }, + { + "epoch": 0.6561942686829697, + "grad_norm": 0.20880497992038727, + "learning_rate": 1.994063324891997e-06, + "loss": 0.1031, + "num_input_tokens_seen": 18096768, + "step": 26860 + }, + { + "epoch": 0.6563164195148169, + "grad_norm": 17.8417911529541, + "learning_rate": 1.9940540428135942e-06, + "loss": 0.107, + "num_input_tokens_seen": 18100160, + "step": 26865 + }, + { + "epoch": 0.6564385703466641, + "grad_norm": 2.653338670730591, + "learning_rate": 1.9940447535061627e-06, + "loss": 0.126, + "num_input_tokens_seen": 18103616, + "step": 26870 + }, + { + "epoch": 0.6565607211785113, + "grad_norm": 10.069886207580566, + "learning_rate": 1.994035456969771e-06, + "loss": 0.1211, + "num_input_tokens_seen": 18107392, + "step": 26875 + }, + { + "epoch": 0.6566828720103584, + "grad_norm": 0.5909165143966675, + "learning_rate": 1.994026153204486e-06, + "loss": 0.0588, + "num_input_tokens_seen": 18111552, + "step": 26880 + }, + { + "epoch": 0.6568050228422055, + "grad_norm": 12.051314353942871, + "learning_rate": 1.9940168422103752e-06, + "loss": 0.052, + "num_input_tokens_seen": 18114944, + "step": 26885 + }, + { + "epoch": 0.6569271736740527, + "grad_norm": 16.518218994140625, + "learning_rate": 1.9940075239875068e-06, + "loss": 0.1785, + "num_input_tokens_seen": 18118080, + "step": 26890 + }, + { + "epoch": 0.6570493245058999, + "grad_norm": 0.5696353316307068, + "learning_rate": 1.993998198535948e-06, + "loss": 0.0843, + "num_input_tokens_seen": 18121664, + "step": 26895 + }, + { + "epoch": 0.657171475337747, + "grad_norm": 12.237276077270508, + "learning_rate": 1.993988865855767e-06, + "loss": 0.0735, + "num_input_tokens_seen": 18124928, + "step": 26900 + }, + { + "epoch": 0.6572936261695942, + "grad_norm": 18.05430793762207, + "learning_rate": 1.9939795259470324e-06, + "loss": 0.2564, + "num_input_tokens_seen": 18128128, + "step": 26905 + }, + { + "epoch": 0.6574157770014414, + "grad_norm": 2.649308919906616, + "learning_rate": 1.9939701788098104e-06, + "loss": 0.0907, + "num_input_tokens_seen": 18131328, + "step": 26910 + }, + { + "epoch": 0.6575379278332886, + "grad_norm": 17.577932357788086, + "learning_rate": 1.99396082444417e-06, + "loss": 0.098, + "num_input_tokens_seen": 18134976, + "step": 26915 + }, + { + "epoch": 0.6576600786651358, + "grad_norm": 1.0797964334487915, + "learning_rate": 1.99395146285018e-06, + "loss": 0.0583, + "num_input_tokens_seen": 18138432, + "step": 26920 + }, + { + "epoch": 0.6577822294969828, + "grad_norm": 21.48768424987793, + "learning_rate": 1.993942094027907e-06, + "loss": 0.151, + "num_input_tokens_seen": 18141952, + "step": 26925 + }, + { + "epoch": 0.65790438032883, + "grad_norm": 54.25938415527344, + "learning_rate": 1.9939327179774198e-06, + "loss": 0.1185, + "num_input_tokens_seen": 18145920, + "step": 26930 + }, + { + "epoch": 0.6580265311606772, + "grad_norm": 11.435982704162598, + "learning_rate": 1.9939233346987863e-06, + "loss": 0.1919, + "num_input_tokens_seen": 18149440, + "step": 26935 + }, + { + "epoch": 0.6581486819925244, + "grad_norm": 10.653462409973145, + "learning_rate": 1.993913944192075e-06, + "loss": 0.1845, + "num_input_tokens_seen": 18152704, + "step": 26940 + }, + { + "epoch": 0.6582708328243715, + "grad_norm": 16.521217346191406, + "learning_rate": 1.9939045464573544e-06, + "loss": 0.2145, + "num_input_tokens_seen": 18155776, + "step": 26945 + }, + { + "epoch": 0.6583929836562187, + "grad_norm": 21.310832977294922, + "learning_rate": 1.993895141494693e-06, + "loss": 0.1141, + "num_input_tokens_seen": 18158912, + "step": 26950 + }, + { + "epoch": 0.6585151344880659, + "grad_norm": 32.2432975769043, + "learning_rate": 1.993885729304158e-06, + "loss": 0.1262, + "num_input_tokens_seen": 18162752, + "step": 26955 + }, + { + "epoch": 0.6586372853199131, + "grad_norm": 0.8311634659767151, + "learning_rate": 1.993876309885819e-06, + "loss": 0.1028, + "num_input_tokens_seen": 18166272, + "step": 26960 + }, + { + "epoch": 0.6587594361517602, + "grad_norm": 15.096733093261719, + "learning_rate": 1.993866883239744e-06, + "loss": 0.1589, + "num_input_tokens_seen": 18169600, + "step": 26965 + }, + { + "epoch": 0.6588815869836073, + "grad_norm": 10.199955940246582, + "learning_rate": 1.9938574493660023e-06, + "loss": 0.0782, + "num_input_tokens_seen": 18173568, + "step": 26970 + }, + { + "epoch": 0.6590037378154545, + "grad_norm": 17.658369064331055, + "learning_rate": 1.9938480082646613e-06, + "loss": 0.1405, + "num_input_tokens_seen": 18176576, + "step": 26975 + }, + { + "epoch": 0.6591258886473017, + "grad_norm": 27.87171173095703, + "learning_rate": 1.9938385599357907e-06, + "loss": 0.0897, + "num_input_tokens_seen": 18179840, + "step": 26980 + }, + { + "epoch": 0.6592480394791489, + "grad_norm": 2.524864912033081, + "learning_rate": 1.9938291043794585e-06, + "loss": 0.1164, + "num_input_tokens_seen": 18182784, + "step": 26985 + }, + { + "epoch": 0.659370190310996, + "grad_norm": 17.257301330566406, + "learning_rate": 1.9938196415957334e-06, + "loss": 0.1907, + "num_input_tokens_seen": 18185984, + "step": 26990 + }, + { + "epoch": 0.6594923411428432, + "grad_norm": 0.8043541312217712, + "learning_rate": 1.9938101715846853e-06, + "loss": 0.0442, + "num_input_tokens_seen": 18189056, + "step": 26995 + }, + { + "epoch": 0.6596144919746904, + "grad_norm": 18.458202362060547, + "learning_rate": 1.993800694346382e-06, + "loss": 0.1052, + "num_input_tokens_seen": 18192384, + "step": 27000 + }, + { + "epoch": 0.6597366428065375, + "grad_norm": 12.032055854797363, + "learning_rate": 1.9937912098808927e-06, + "loss": 0.0961, + "num_input_tokens_seen": 18195712, + "step": 27005 + }, + { + "epoch": 0.6598587936383846, + "grad_norm": 8.875457763671875, + "learning_rate": 1.9937817181882864e-06, + "loss": 0.1263, + "num_input_tokens_seen": 18198848, + "step": 27010 + }, + { + "epoch": 0.6599809444702318, + "grad_norm": 1.453246831893921, + "learning_rate": 1.9937722192686323e-06, + "loss": 0.0561, + "num_input_tokens_seen": 18202112, + "step": 27015 + }, + { + "epoch": 0.660103095302079, + "grad_norm": 3.466822385787964, + "learning_rate": 1.9937627131219995e-06, + "loss": 0.071, + "num_input_tokens_seen": 18205696, + "step": 27020 + }, + { + "epoch": 0.6602252461339262, + "grad_norm": 40.0861930847168, + "learning_rate": 1.9937531997484566e-06, + "loss": 0.2277, + "num_input_tokens_seen": 18209152, + "step": 27025 + }, + { + "epoch": 0.6603473969657734, + "grad_norm": 1.530218243598938, + "learning_rate": 1.993743679148073e-06, + "loss": 0.1101, + "num_input_tokens_seen": 18212352, + "step": 27030 + }, + { + "epoch": 0.6604695477976205, + "grad_norm": 16.85233497619629, + "learning_rate": 1.9937341513209183e-06, + "loss": 0.1003, + "num_input_tokens_seen": 18215680, + "step": 27035 + }, + { + "epoch": 0.6605916986294676, + "grad_norm": 1.5461503267288208, + "learning_rate": 1.9937246162670614e-06, + "loss": 0.0563, + "num_input_tokens_seen": 18219072, + "step": 27040 + }, + { + "epoch": 0.6607138494613148, + "grad_norm": 11.609980583190918, + "learning_rate": 1.9937150739865723e-06, + "loss": 0.105, + "num_input_tokens_seen": 18222464, + "step": 27045 + }, + { + "epoch": 0.660836000293162, + "grad_norm": 17.705228805541992, + "learning_rate": 1.9937055244795195e-06, + "loss": 0.0846, + "num_input_tokens_seen": 18226112, + "step": 27050 + }, + { + "epoch": 0.6609581511250091, + "grad_norm": 10.436869621276855, + "learning_rate": 1.993695967745973e-06, + "loss": 0.1617, + "num_input_tokens_seen": 18229184, + "step": 27055 + }, + { + "epoch": 0.6610803019568563, + "grad_norm": 30.66120147705078, + "learning_rate": 1.993686403786002e-06, + "loss": 0.0614, + "num_input_tokens_seen": 18232320, + "step": 27060 + }, + { + "epoch": 0.6612024527887035, + "grad_norm": 5.639974594116211, + "learning_rate": 1.9936768325996763e-06, + "loss": 0.0612, + "num_input_tokens_seen": 18235584, + "step": 27065 + }, + { + "epoch": 0.6613246036205507, + "grad_norm": 0.6442601084709167, + "learning_rate": 1.9936672541870656e-06, + "loss": 0.0812, + "num_input_tokens_seen": 18238656, + "step": 27070 + }, + { + "epoch": 0.6614467544523979, + "grad_norm": 2.0177724361419678, + "learning_rate": 1.993657668548239e-06, + "loss": 0.0975, + "num_input_tokens_seen": 18242240, + "step": 27075 + }, + { + "epoch": 0.661568905284245, + "grad_norm": 0.5144067406654358, + "learning_rate": 1.993648075683267e-06, + "loss": 0.1443, + "num_input_tokens_seen": 18245952, + "step": 27080 + }, + { + "epoch": 0.6616910561160921, + "grad_norm": 3.1164534091949463, + "learning_rate": 1.9936384755922185e-06, + "loss": 0.1316, + "num_input_tokens_seen": 18249088, + "step": 27085 + }, + { + "epoch": 0.6618132069479393, + "grad_norm": 0.15728719532489777, + "learning_rate": 1.993628868275164e-06, + "loss": 0.0605, + "num_input_tokens_seen": 18252672, + "step": 27090 + }, + { + "epoch": 0.6619353577797865, + "grad_norm": 22.022380828857422, + "learning_rate": 1.9936192537321733e-06, + "loss": 0.2245, + "num_input_tokens_seen": 18255744, + "step": 27095 + }, + { + "epoch": 0.6620575086116336, + "grad_norm": 5.147634029388428, + "learning_rate": 1.993609631963316e-06, + "loss": 0.0751, + "num_input_tokens_seen": 18259584, + "step": 27100 + }, + { + "epoch": 0.6621796594434808, + "grad_norm": 2.5567846298217773, + "learning_rate": 1.993600002968662e-06, + "loss": 0.1795, + "num_input_tokens_seen": 18262656, + "step": 27105 + }, + { + "epoch": 0.662301810275328, + "grad_norm": 0.2086157351732254, + "learning_rate": 1.9935903667482815e-06, + "loss": 0.1977, + "num_input_tokens_seen": 18266112, + "step": 27110 + }, + { + "epoch": 0.6624239611071752, + "grad_norm": 25.842384338378906, + "learning_rate": 1.993580723302245e-06, + "loss": 0.1104, + "num_input_tokens_seen": 18269248, + "step": 27115 + }, + { + "epoch": 0.6625461119390224, + "grad_norm": 24.809724807739258, + "learning_rate": 1.9935710726306215e-06, + "loss": 0.1705, + "num_input_tokens_seen": 18272640, + "step": 27120 + }, + { + "epoch": 0.6626682627708694, + "grad_norm": 26.71112060546875, + "learning_rate": 1.9935614147334825e-06, + "loss": 0.116, + "num_input_tokens_seen": 18276032, + "step": 27125 + }, + { + "epoch": 0.6627904136027166, + "grad_norm": 6.851133346557617, + "learning_rate": 1.9935517496108976e-06, + "loss": 0.1621, + "num_input_tokens_seen": 18279744, + "step": 27130 + }, + { + "epoch": 0.6629125644345638, + "grad_norm": 19.518213272094727, + "learning_rate": 1.9935420772629374e-06, + "loss": 0.0469, + "num_input_tokens_seen": 18282688, + "step": 27135 + }, + { + "epoch": 0.663034715266411, + "grad_norm": 10.472986221313477, + "learning_rate": 1.9935323976896713e-06, + "loss": 0.1928, + "num_input_tokens_seen": 18286080, + "step": 27140 + }, + { + "epoch": 0.6631568660982581, + "grad_norm": 2.578057050704956, + "learning_rate": 1.993522710891171e-06, + "loss": 0.1215, + "num_input_tokens_seen": 18289664, + "step": 27145 + }, + { + "epoch": 0.6632790169301053, + "grad_norm": 5.96986722946167, + "learning_rate": 1.993513016867506e-06, + "loss": 0.0822, + "num_input_tokens_seen": 18292608, + "step": 27150 + }, + { + "epoch": 0.6634011677619525, + "grad_norm": 21.24750518798828, + "learning_rate": 1.993503315618747e-06, + "loss": 0.0923, + "num_input_tokens_seen": 18295808, + "step": 27155 + }, + { + "epoch": 0.6635233185937996, + "grad_norm": 15.410693168640137, + "learning_rate": 1.993493607144965e-06, + "loss": 0.1647, + "num_input_tokens_seen": 18299264, + "step": 27160 + }, + { + "epoch": 0.6636454694256468, + "grad_norm": 10.475720405578613, + "learning_rate": 1.9934838914462303e-06, + "loss": 0.1891, + "num_input_tokens_seen": 18302336, + "step": 27165 + }, + { + "epoch": 0.6637676202574939, + "grad_norm": 16.02503776550293, + "learning_rate": 1.9934741685226133e-06, + "loss": 0.0913, + "num_input_tokens_seen": 18305792, + "step": 27170 + }, + { + "epoch": 0.6638897710893411, + "grad_norm": 14.892638206481934, + "learning_rate": 1.993464438374185e-06, + "loss": 0.0621, + "num_input_tokens_seen": 18308928, + "step": 27175 + }, + { + "epoch": 0.6640119219211883, + "grad_norm": 33.46926498413086, + "learning_rate": 1.993454701001016e-06, + "loss": 0.1885, + "num_input_tokens_seen": 18312064, + "step": 27180 + }, + { + "epoch": 0.6641340727530355, + "grad_norm": 18.051313400268555, + "learning_rate": 1.9934449564031774e-06, + "loss": 0.1044, + "num_input_tokens_seen": 18315712, + "step": 27185 + }, + { + "epoch": 0.6642562235848826, + "grad_norm": 30.73536491394043, + "learning_rate": 1.99343520458074e-06, + "loss": 0.1608, + "num_input_tokens_seen": 18319296, + "step": 27190 + }, + { + "epoch": 0.6643783744167298, + "grad_norm": 14.0807466506958, + "learning_rate": 1.993425445533774e-06, + "loss": 0.0892, + "num_input_tokens_seen": 18322688, + "step": 27195 + }, + { + "epoch": 0.664500525248577, + "grad_norm": 17.66388702392578, + "learning_rate": 1.993415679262351e-06, + "loss": 0.031, + "num_input_tokens_seen": 18326272, + "step": 27200 + }, + { + "epoch": 0.6646226760804241, + "grad_norm": 2.5478765964508057, + "learning_rate": 1.9934059057665428e-06, + "loss": 0.1511, + "num_input_tokens_seen": 18329216, + "step": 27205 + }, + { + "epoch": 0.6647448269122713, + "grad_norm": 0.9261932373046875, + "learning_rate": 1.993396125046419e-06, + "loss": 0.0901, + "num_input_tokens_seen": 18332864, + "step": 27210 + }, + { + "epoch": 0.6648669777441184, + "grad_norm": 15.885643005371094, + "learning_rate": 1.9933863371020515e-06, + "loss": 0.1088, + "num_input_tokens_seen": 18336064, + "step": 27215 + }, + { + "epoch": 0.6649891285759656, + "grad_norm": 14.773920059204102, + "learning_rate": 1.9933765419335114e-06, + "loss": 0.0837, + "num_input_tokens_seen": 18339584, + "step": 27220 + }, + { + "epoch": 0.6651112794078128, + "grad_norm": 16.5922794342041, + "learning_rate": 1.9933667395408703e-06, + "loss": 0.1089, + "num_input_tokens_seen": 18342656, + "step": 27225 + }, + { + "epoch": 0.66523343023966, + "grad_norm": 19.870290756225586, + "learning_rate": 1.9933569299241987e-06, + "loss": 0.1059, + "num_input_tokens_seen": 18346112, + "step": 27230 + }, + { + "epoch": 0.665355581071507, + "grad_norm": 19.615880966186523, + "learning_rate": 1.993347113083568e-06, + "loss": 0.0226, + "num_input_tokens_seen": 18349568, + "step": 27235 + }, + { + "epoch": 0.6654777319033542, + "grad_norm": 0.1664038896560669, + "learning_rate": 1.9933372890190503e-06, + "loss": 0.024, + "num_input_tokens_seen": 18352832, + "step": 27240 + }, + { + "epoch": 0.6655998827352014, + "grad_norm": 26.959386825561523, + "learning_rate": 1.9933274577307167e-06, + "loss": 0.1599, + "num_input_tokens_seen": 18355840, + "step": 27245 + }, + { + "epoch": 0.6657220335670486, + "grad_norm": 40.0178108215332, + "learning_rate": 1.993317619218639e-06, + "loss": 0.176, + "num_input_tokens_seen": 18359424, + "step": 27250 + }, + { + "epoch": 0.6658441843988958, + "grad_norm": 35.85573959350586, + "learning_rate": 1.9933077734828877e-06, + "loss": 0.2333, + "num_input_tokens_seen": 18362816, + "step": 27255 + }, + { + "epoch": 0.6659663352307429, + "grad_norm": 33.02347946166992, + "learning_rate": 1.993297920523535e-06, + "loss": 0.088, + "num_input_tokens_seen": 18366528, + "step": 27260 + }, + { + "epoch": 0.6660884860625901, + "grad_norm": 16.668325424194336, + "learning_rate": 1.9932880603406533e-06, + "loss": 0.1244, + "num_input_tokens_seen": 18370048, + "step": 27265 + }, + { + "epoch": 0.6662106368944373, + "grad_norm": 0.7546607255935669, + "learning_rate": 1.9932781929343135e-06, + "loss": 0.0695, + "num_input_tokens_seen": 18373120, + "step": 27270 + }, + { + "epoch": 0.6663327877262845, + "grad_norm": 8.710541725158691, + "learning_rate": 1.9932683183045872e-06, + "loss": 0.182, + "num_input_tokens_seen": 18376512, + "step": 27275 + }, + { + "epoch": 0.6664549385581315, + "grad_norm": 12.80578899383545, + "learning_rate": 1.993258436451547e-06, + "loss": 0.1501, + "num_input_tokens_seen": 18379776, + "step": 27280 + }, + { + "epoch": 0.6665770893899787, + "grad_norm": 9.051602363586426, + "learning_rate": 1.993248547375264e-06, + "loss": 0.1145, + "num_input_tokens_seen": 18383168, + "step": 27285 + }, + { + "epoch": 0.6666992402218259, + "grad_norm": 14.54477596282959, + "learning_rate": 1.9932386510758102e-06, + "loss": 0.0978, + "num_input_tokens_seen": 18386560, + "step": 27290 + }, + { + "epoch": 0.6668213910536731, + "grad_norm": 29.884191513061523, + "learning_rate": 1.9932287475532582e-06, + "loss": 0.1649, + "num_input_tokens_seen": 18390208, + "step": 27295 + }, + { + "epoch": 0.6669435418855202, + "grad_norm": 42.955299377441406, + "learning_rate": 1.9932188368076793e-06, + "loss": 0.3528, + "num_input_tokens_seen": 18393600, + "step": 27300 + }, + { + "epoch": 0.6670656927173674, + "grad_norm": 11.649964332580566, + "learning_rate": 1.993208918839146e-06, + "loss": 0.1349, + "num_input_tokens_seen": 18397184, + "step": 27305 + }, + { + "epoch": 0.6671878435492146, + "grad_norm": 19.09230613708496, + "learning_rate": 1.99319899364773e-06, + "loss": 0.1671, + "num_input_tokens_seen": 18401728, + "step": 27310 + }, + { + "epoch": 0.6673099943810618, + "grad_norm": 16.074893951416016, + "learning_rate": 1.9931890612335044e-06, + "loss": 0.2605, + "num_input_tokens_seen": 18405184, + "step": 27315 + }, + { + "epoch": 0.667432145212909, + "grad_norm": 20.0411319732666, + "learning_rate": 1.9931791215965405e-06, + "loss": 0.0884, + "num_input_tokens_seen": 18408256, + "step": 27320 + }, + { + "epoch": 0.667554296044756, + "grad_norm": 5.844738006591797, + "learning_rate": 1.993169174736911e-06, + "loss": 0.1059, + "num_input_tokens_seen": 18411264, + "step": 27325 + }, + { + "epoch": 0.6676764468766032, + "grad_norm": 10.027684211730957, + "learning_rate": 1.993159220654688e-06, + "loss": 0.0379, + "num_input_tokens_seen": 18414848, + "step": 27330 + }, + { + "epoch": 0.6677985977084504, + "grad_norm": 1.0045827627182007, + "learning_rate": 1.993149259349944e-06, + "loss": 0.1058, + "num_input_tokens_seen": 18418304, + "step": 27335 + }, + { + "epoch": 0.6679207485402976, + "grad_norm": 6.239989757537842, + "learning_rate": 1.9931392908227515e-06, + "loss": 0.061, + "num_input_tokens_seen": 18421696, + "step": 27340 + }, + { + "epoch": 0.6680428993721447, + "grad_norm": 21.569900512695312, + "learning_rate": 1.993129315073183e-06, + "loss": 0.1927, + "num_input_tokens_seen": 18424960, + "step": 27345 + }, + { + "epoch": 0.6681650502039919, + "grad_norm": 4.123659610748291, + "learning_rate": 1.993119332101311e-06, + "loss": 0.0656, + "num_input_tokens_seen": 18428032, + "step": 27350 + }, + { + "epoch": 0.668287201035839, + "grad_norm": 3.6173691749572754, + "learning_rate": 1.993109341907208e-06, + "loss": 0.0642, + "num_input_tokens_seen": 18431616, + "step": 27355 + }, + { + "epoch": 0.6684093518676862, + "grad_norm": 30.157012939453125, + "learning_rate": 1.993099344490947e-06, + "loss": 0.1862, + "num_input_tokens_seen": 18434560, + "step": 27360 + }, + { + "epoch": 0.6685315026995334, + "grad_norm": 11.95051383972168, + "learning_rate": 1.9930893398526e-06, + "loss": 0.1303, + "num_input_tokens_seen": 18437696, + "step": 27365 + }, + { + "epoch": 0.6686536535313805, + "grad_norm": 24.992183685302734, + "learning_rate": 1.9930793279922408e-06, + "loss": 0.102, + "num_input_tokens_seen": 18440896, + "step": 27370 + }, + { + "epoch": 0.6687758043632277, + "grad_norm": 31.214475631713867, + "learning_rate": 1.993069308909941e-06, + "loss": 0.0309, + "num_input_tokens_seen": 18443968, + "step": 27375 + }, + { + "epoch": 0.6688979551950749, + "grad_norm": 19.314767837524414, + "learning_rate": 1.9930592826057746e-06, + "loss": 0.1691, + "num_input_tokens_seen": 18447424, + "step": 27380 + }, + { + "epoch": 0.6690201060269221, + "grad_norm": 1.9460688829421997, + "learning_rate": 1.993049249079814e-06, + "loss": 0.1198, + "num_input_tokens_seen": 18450944, + "step": 27385 + }, + { + "epoch": 0.6691422568587692, + "grad_norm": 36.64439392089844, + "learning_rate": 1.9930392083321315e-06, + "loss": 0.2652, + "num_input_tokens_seen": 18453952, + "step": 27390 + }, + { + "epoch": 0.6692644076906163, + "grad_norm": 0.25118666887283325, + "learning_rate": 1.9930291603628013e-06, + "loss": 0.0694, + "num_input_tokens_seen": 18457088, + "step": 27395 + }, + { + "epoch": 0.6693865585224635, + "grad_norm": 13.27421760559082, + "learning_rate": 1.993019105171896e-06, + "loss": 0.0814, + "num_input_tokens_seen": 18459904, + "step": 27400 + }, + { + "epoch": 0.6695087093543107, + "grad_norm": 23.91324234008789, + "learning_rate": 1.9930090427594885e-06, + "loss": 0.0772, + "num_input_tokens_seen": 18463232, + "step": 27405 + }, + { + "epoch": 0.6696308601861579, + "grad_norm": 2.134026050567627, + "learning_rate": 1.992998973125652e-06, + "loss": 0.0064, + "num_input_tokens_seen": 18466816, + "step": 27410 + }, + { + "epoch": 0.669753011018005, + "grad_norm": 0.7445598840713501, + "learning_rate": 1.9929888962704603e-06, + "loss": 0.0142, + "num_input_tokens_seen": 18470080, + "step": 27415 + }, + { + "epoch": 0.6698751618498522, + "grad_norm": 34.15476608276367, + "learning_rate": 1.992978812193986e-06, + "loss": 0.0758, + "num_input_tokens_seen": 18473664, + "step": 27420 + }, + { + "epoch": 0.6699973126816994, + "grad_norm": 0.30861836671829224, + "learning_rate": 1.9929687208963026e-06, + "loss": 0.0811, + "num_input_tokens_seen": 18476928, + "step": 27425 + }, + { + "epoch": 0.6701194635135466, + "grad_norm": 0.0861566960811615, + "learning_rate": 1.992958622377484e-06, + "loss": 0.1545, + "num_input_tokens_seen": 18480320, + "step": 27430 + }, + { + "epoch": 0.6702416143453936, + "grad_norm": 26.23560905456543, + "learning_rate": 1.9929485166376026e-06, + "loss": 0.1974, + "num_input_tokens_seen": 18483840, + "step": 27435 + }, + { + "epoch": 0.6703637651772408, + "grad_norm": 0.39746278524398804, + "learning_rate": 1.992938403676733e-06, + "loss": 0.07, + "num_input_tokens_seen": 18486912, + "step": 27440 + }, + { + "epoch": 0.670485916009088, + "grad_norm": 12.6868257522583, + "learning_rate": 1.9929282834949483e-06, + "loss": 0.1578, + "num_input_tokens_seen": 18489984, + "step": 27445 + }, + { + "epoch": 0.6706080668409352, + "grad_norm": 26.02521324157715, + "learning_rate": 1.9929181560923217e-06, + "loss": 0.2339, + "num_input_tokens_seen": 18493312, + "step": 27450 + }, + { + "epoch": 0.6707302176727824, + "grad_norm": 0.4023156464099884, + "learning_rate": 1.9929080214689274e-06, + "loss": 0.0862, + "num_input_tokens_seen": 18496832, + "step": 27455 + }, + { + "epoch": 0.6708523685046295, + "grad_norm": 18.912397384643555, + "learning_rate": 1.992897879624839e-06, + "loss": 0.1234, + "num_input_tokens_seen": 18500160, + "step": 27460 + }, + { + "epoch": 0.6709745193364767, + "grad_norm": 47.363975524902344, + "learning_rate": 1.99288773056013e-06, + "loss": 0.1291, + "num_input_tokens_seen": 18503936, + "step": 27465 + }, + { + "epoch": 0.6710966701683239, + "grad_norm": 9.934727668762207, + "learning_rate": 1.9928775742748747e-06, + "loss": 0.1425, + "num_input_tokens_seen": 18507776, + "step": 27470 + }, + { + "epoch": 0.671218821000171, + "grad_norm": 10.783501625061035, + "learning_rate": 1.9928674107691463e-06, + "loss": 0.0539, + "num_input_tokens_seen": 18511104, + "step": 27475 + }, + { + "epoch": 0.6713409718320181, + "grad_norm": 12.993548393249512, + "learning_rate": 1.992857240043019e-06, + "loss": 0.1931, + "num_input_tokens_seen": 18514304, + "step": 27480 + }, + { + "epoch": 0.6714631226638653, + "grad_norm": 20.122446060180664, + "learning_rate": 1.992847062096567e-06, + "loss": 0.0949, + "num_input_tokens_seen": 18517696, + "step": 27485 + }, + { + "epoch": 0.6715852734957125, + "grad_norm": 13.75194263458252, + "learning_rate": 1.9928368769298636e-06, + "loss": 0.1095, + "num_input_tokens_seen": 18520896, + "step": 27490 + }, + { + "epoch": 0.6717074243275597, + "grad_norm": 6.688014507293701, + "learning_rate": 1.992826684542984e-06, + "loss": 0.0394, + "num_input_tokens_seen": 18524288, + "step": 27495 + }, + { + "epoch": 0.6718295751594069, + "grad_norm": 4.681891918182373, + "learning_rate": 1.9928164849360018e-06, + "loss": 0.2707, + "num_input_tokens_seen": 18527680, + "step": 27500 + }, + { + "epoch": 0.671951725991254, + "grad_norm": 9.065070152282715, + "learning_rate": 1.9928062781089906e-06, + "loss": 0.0718, + "num_input_tokens_seen": 18531328, + "step": 27505 + }, + { + "epoch": 0.6720738768231012, + "grad_norm": 25.745248794555664, + "learning_rate": 1.9927960640620256e-06, + "loss": 0.2083, + "num_input_tokens_seen": 18535040, + "step": 27510 + }, + { + "epoch": 0.6721960276549483, + "grad_norm": 13.078929901123047, + "learning_rate": 1.9927858427951804e-06, + "loss": 0.0871, + "num_input_tokens_seen": 18538304, + "step": 27515 + }, + { + "epoch": 0.6723181784867955, + "grad_norm": 11.144567489624023, + "learning_rate": 1.9927756143085293e-06, + "loss": 0.099, + "num_input_tokens_seen": 18541568, + "step": 27520 + }, + { + "epoch": 0.6724403293186426, + "grad_norm": 11.036469459533691, + "learning_rate": 1.9927653786021466e-06, + "loss": 0.0368, + "num_input_tokens_seen": 18545536, + "step": 27525 + }, + { + "epoch": 0.6725624801504898, + "grad_norm": 8.302674293518066, + "learning_rate": 1.992755135676108e-06, + "loss": 0.1327, + "num_input_tokens_seen": 18548864, + "step": 27530 + }, + { + "epoch": 0.672684630982337, + "grad_norm": 14.096845626831055, + "learning_rate": 1.9927448855304862e-06, + "loss": 0.0681, + "num_input_tokens_seen": 18552192, + "step": 27535 + }, + { + "epoch": 0.6728067818141842, + "grad_norm": 9.845850944519043, + "learning_rate": 1.992734628165357e-06, + "loss": 0.06, + "num_input_tokens_seen": 18555264, + "step": 27540 + }, + { + "epoch": 0.6729289326460314, + "grad_norm": 14.49575138092041, + "learning_rate": 1.9927243635807948e-06, + "loss": 0.1047, + "num_input_tokens_seen": 18558976, + "step": 27545 + }, + { + "epoch": 0.6730510834778785, + "grad_norm": 6.2803730964660645, + "learning_rate": 1.9927140917768736e-06, + "loss": 0.0696, + "num_input_tokens_seen": 18562304, + "step": 27550 + }, + { + "epoch": 0.6731732343097256, + "grad_norm": 7.356665134429932, + "learning_rate": 1.9927038127536686e-06, + "loss": 0.0841, + "num_input_tokens_seen": 18565888, + "step": 27555 + }, + { + "epoch": 0.6732953851415728, + "grad_norm": 9.611376762390137, + "learning_rate": 1.9926935265112543e-06, + "loss": 0.0578, + "num_input_tokens_seen": 18569536, + "step": 27560 + }, + { + "epoch": 0.67341753597342, + "grad_norm": 24.40125846862793, + "learning_rate": 1.992683233049706e-06, + "loss": 0.0541, + "num_input_tokens_seen": 18572608, + "step": 27565 + }, + { + "epoch": 0.6735396868052671, + "grad_norm": 39.64024353027344, + "learning_rate": 1.992672932369098e-06, + "loss": 0.2431, + "num_input_tokens_seen": 18576448, + "step": 27570 + }, + { + "epoch": 0.6736618376371143, + "grad_norm": 8.382988929748535, + "learning_rate": 1.9926626244695056e-06, + "loss": 0.1644, + "num_input_tokens_seen": 18579776, + "step": 27575 + }, + { + "epoch": 0.6737839884689615, + "grad_norm": 4.696765422821045, + "learning_rate": 1.9926523093510034e-06, + "loss": 0.1107, + "num_input_tokens_seen": 18582784, + "step": 27580 + }, + { + "epoch": 0.6739061393008087, + "grad_norm": 21.366455078125, + "learning_rate": 1.992641987013667e-06, + "loss": 0.1118, + "num_input_tokens_seen": 18585728, + "step": 27585 + }, + { + "epoch": 0.6740282901326557, + "grad_norm": 42.7935791015625, + "learning_rate": 1.9926316574575707e-06, + "loss": 0.064, + "num_input_tokens_seen": 18588800, + "step": 27590 + }, + { + "epoch": 0.6741504409645029, + "grad_norm": 26.108415603637695, + "learning_rate": 1.99262132068279e-06, + "loss": 0.2027, + "num_input_tokens_seen": 18592000, + "step": 27595 + }, + { + "epoch": 0.6742725917963501, + "grad_norm": 10.856925010681152, + "learning_rate": 1.9926109766894003e-06, + "loss": 0.1205, + "num_input_tokens_seen": 18595584, + "step": 27600 + }, + { + "epoch": 0.6743947426281973, + "grad_norm": 16.994104385375977, + "learning_rate": 1.992600625477476e-06, + "loss": 0.1196, + "num_input_tokens_seen": 18599296, + "step": 27605 + }, + { + "epoch": 0.6745168934600445, + "grad_norm": 28.180084228515625, + "learning_rate": 1.992590267047094e-06, + "loss": 0.1573, + "num_input_tokens_seen": 18602304, + "step": 27610 + }, + { + "epoch": 0.6746390442918916, + "grad_norm": 1.7627689838409424, + "learning_rate": 1.992579901398328e-06, + "loss": 0.0645, + "num_input_tokens_seen": 18605632, + "step": 27615 + }, + { + "epoch": 0.6747611951237388, + "grad_norm": 15.111403465270996, + "learning_rate": 1.992569528531254e-06, + "loss": 0.1136, + "num_input_tokens_seen": 18609152, + "step": 27620 + }, + { + "epoch": 0.674883345955586, + "grad_norm": 16.666126251220703, + "learning_rate": 1.9925591484459474e-06, + "loss": 0.1165, + "num_input_tokens_seen": 18612928, + "step": 27625 + }, + { + "epoch": 0.6750054967874332, + "grad_norm": 10.067097663879395, + "learning_rate": 1.992548761142484e-06, + "loss": 0.1876, + "num_input_tokens_seen": 18616768, + "step": 27630 + }, + { + "epoch": 0.6751276476192802, + "grad_norm": 8.604232788085938, + "learning_rate": 1.9925383666209387e-06, + "loss": 0.0641, + "num_input_tokens_seen": 18619840, + "step": 27635 + }, + { + "epoch": 0.6752497984511274, + "grad_norm": 2.173659324645996, + "learning_rate": 1.9925279648813875e-06, + "loss": 0.0321, + "num_input_tokens_seen": 18623104, + "step": 27640 + }, + { + "epoch": 0.6753719492829746, + "grad_norm": 24.729249954223633, + "learning_rate": 1.992517555923906e-06, + "loss": 0.1091, + "num_input_tokens_seen": 18626496, + "step": 27645 + }, + { + "epoch": 0.6754941001148218, + "grad_norm": 3.241520643234253, + "learning_rate": 1.99250713974857e-06, + "loss": 0.075, + "num_input_tokens_seen": 18629632, + "step": 27650 + }, + { + "epoch": 0.675616250946669, + "grad_norm": 39.44907760620117, + "learning_rate": 1.992496716355455e-06, + "loss": 0.0667, + "num_input_tokens_seen": 18632896, + "step": 27655 + }, + { + "epoch": 0.6757384017785161, + "grad_norm": 15.261924743652344, + "learning_rate": 1.9924862857446374e-06, + "loss": 0.1238, + "num_input_tokens_seen": 18636416, + "step": 27660 + }, + { + "epoch": 0.6758605526103633, + "grad_norm": 32.404754638671875, + "learning_rate": 1.9924758479161916e-06, + "loss": 0.1344, + "num_input_tokens_seen": 18639296, + "step": 27665 + }, + { + "epoch": 0.6759827034422105, + "grad_norm": 5.737441539764404, + "learning_rate": 1.992465402870195e-06, + "loss": 0.1151, + "num_input_tokens_seen": 18643072, + "step": 27670 + }, + { + "epoch": 0.6761048542740576, + "grad_norm": 0.8999488353729248, + "learning_rate": 1.9924549506067236e-06, + "loss": 0.0603, + "num_input_tokens_seen": 18646336, + "step": 27675 + }, + { + "epoch": 0.6762270051059047, + "grad_norm": 11.820429801940918, + "learning_rate": 1.992444491125852e-06, + "loss": 0.0828, + "num_input_tokens_seen": 18650112, + "step": 27680 + }, + { + "epoch": 0.6763491559377519, + "grad_norm": 29.754968643188477, + "learning_rate": 1.9924340244276576e-06, + "loss": 0.1562, + "num_input_tokens_seen": 18653632, + "step": 27685 + }, + { + "epoch": 0.6764713067695991, + "grad_norm": 9.74490737915039, + "learning_rate": 1.992423550512216e-06, + "loss": 0.1278, + "num_input_tokens_seen": 18656704, + "step": 27690 + }, + { + "epoch": 0.6765934576014463, + "grad_norm": 10.454187393188477, + "learning_rate": 1.9924130693796034e-06, + "loss": 0.1062, + "num_input_tokens_seen": 18660224, + "step": 27695 + }, + { + "epoch": 0.6767156084332935, + "grad_norm": 3.680006742477417, + "learning_rate": 1.9924025810298957e-06, + "loss": 0.0639, + "num_input_tokens_seen": 18663680, + "step": 27700 + }, + { + "epoch": 0.6768377592651406, + "grad_norm": 18.555456161499023, + "learning_rate": 1.99239208546317e-06, + "loss": 0.1237, + "num_input_tokens_seen": 18666880, + "step": 27705 + }, + { + "epoch": 0.6769599100969877, + "grad_norm": 26.601221084594727, + "learning_rate": 1.9923815826795018e-06, + "loss": 0.0874, + "num_input_tokens_seen": 18670464, + "step": 27710 + }, + { + "epoch": 0.6770820609288349, + "grad_norm": 12.380290031433105, + "learning_rate": 1.992371072678968e-06, + "loss": 0.0819, + "num_input_tokens_seen": 18673920, + "step": 27715 + }, + { + "epoch": 0.6772042117606821, + "grad_norm": 9.469911575317383, + "learning_rate": 1.9923605554616447e-06, + "loss": 0.1129, + "num_input_tokens_seen": 18677632, + "step": 27720 + }, + { + "epoch": 0.6773263625925292, + "grad_norm": 26.473451614379883, + "learning_rate": 1.9923500310276085e-06, + "loss": 0.0451, + "num_input_tokens_seen": 18681088, + "step": 27725 + }, + { + "epoch": 0.6774485134243764, + "grad_norm": 23.648157119750977, + "learning_rate": 1.9923394993769362e-06, + "loss": 0.0925, + "num_input_tokens_seen": 18684160, + "step": 27730 + }, + { + "epoch": 0.6775706642562236, + "grad_norm": 28.7203426361084, + "learning_rate": 1.992328960509704e-06, + "loss": 0.143, + "num_input_tokens_seen": 18687296, + "step": 27735 + }, + { + "epoch": 0.6776928150880708, + "grad_norm": 24.867673873901367, + "learning_rate": 1.9923184144259886e-06, + "loss": 0.3171, + "num_input_tokens_seen": 18690496, + "step": 27740 + }, + { + "epoch": 0.677814965919918, + "grad_norm": 3.4889917373657227, + "learning_rate": 1.992307861125867e-06, + "loss": 0.1289, + "num_input_tokens_seen": 18693824, + "step": 27745 + }, + { + "epoch": 0.677937116751765, + "grad_norm": 5.78082799911499, + "learning_rate": 1.9922973006094156e-06, + "loss": 0.1006, + "num_input_tokens_seen": 18697024, + "step": 27750 + }, + { + "epoch": 0.6780592675836122, + "grad_norm": 20.06686019897461, + "learning_rate": 1.9922867328767114e-06, + "loss": 0.1345, + "num_input_tokens_seen": 18699968, + "step": 27755 + }, + { + "epoch": 0.6781814184154594, + "grad_norm": 7.188560485839844, + "learning_rate": 1.992276157927831e-06, + "loss": 0.0691, + "num_input_tokens_seen": 18703296, + "step": 27760 + }, + { + "epoch": 0.6783035692473066, + "grad_norm": 0.8174273371696472, + "learning_rate": 1.9922655757628516e-06, + "loss": 0.067, + "num_input_tokens_seen": 18706496, + "step": 27765 + }, + { + "epoch": 0.6784257200791537, + "grad_norm": 3.802631378173828, + "learning_rate": 1.9922549863818504e-06, + "loss": 0.0486, + "num_input_tokens_seen": 18709696, + "step": 27770 + }, + { + "epoch": 0.6785478709110009, + "grad_norm": 26.586246490478516, + "learning_rate": 1.9922443897849037e-06, + "loss": 0.0436, + "num_input_tokens_seen": 18712704, + "step": 27775 + }, + { + "epoch": 0.6786700217428481, + "grad_norm": 27.751142501831055, + "learning_rate": 1.9922337859720887e-06, + "loss": 0.0707, + "num_input_tokens_seen": 18715648, + "step": 27780 + }, + { + "epoch": 0.6787921725746953, + "grad_norm": 1.3806562423706055, + "learning_rate": 1.992223174943483e-06, + "loss": 0.1024, + "num_input_tokens_seen": 18719040, + "step": 27785 + }, + { + "epoch": 0.6789143234065425, + "grad_norm": 16.745941162109375, + "learning_rate": 1.992212556699164e-06, + "loss": 0.0903, + "num_input_tokens_seen": 18722176, + "step": 27790 + }, + { + "epoch": 0.6790364742383895, + "grad_norm": 35.78384780883789, + "learning_rate": 1.9922019312392077e-06, + "loss": 0.0914, + "num_input_tokens_seen": 18725952, + "step": 27795 + }, + { + "epoch": 0.6791586250702367, + "grad_norm": 16.95638656616211, + "learning_rate": 1.992191298563692e-06, + "loss": 0.0705, + "num_input_tokens_seen": 18729152, + "step": 27800 + }, + { + "epoch": 0.6792807759020839, + "grad_norm": 19.122446060180664, + "learning_rate": 1.9921806586726946e-06, + "loss": 0.0881, + "num_input_tokens_seen": 18732480, + "step": 27805 + }, + { + "epoch": 0.6794029267339311, + "grad_norm": 33.02607345581055, + "learning_rate": 1.9921700115662927e-06, + "loss": 0.1584, + "num_input_tokens_seen": 18735488, + "step": 27810 + }, + { + "epoch": 0.6795250775657782, + "grad_norm": 0.3400420546531677, + "learning_rate": 1.992159357244564e-06, + "loss": 0.1024, + "num_input_tokens_seen": 18738688, + "step": 27815 + }, + { + "epoch": 0.6796472283976254, + "grad_norm": 19.92205238342285, + "learning_rate": 1.9921486957075847e-06, + "loss": 0.1267, + "num_input_tokens_seen": 18742336, + "step": 27820 + }, + { + "epoch": 0.6797693792294726, + "grad_norm": 14.314276695251465, + "learning_rate": 1.9921380269554337e-06, + "loss": 0.1554, + "num_input_tokens_seen": 18745856, + "step": 27825 + }, + { + "epoch": 0.6798915300613197, + "grad_norm": 6.991714000701904, + "learning_rate": 1.992127350988188e-06, + "loss": 0.0212, + "num_input_tokens_seen": 18748928, + "step": 27830 + }, + { + "epoch": 0.6800136808931668, + "grad_norm": 2.587913751602173, + "learning_rate": 1.9921166678059255e-06, + "loss": 0.1102, + "num_input_tokens_seen": 18752704, + "step": 27835 + }, + { + "epoch": 0.680135831725014, + "grad_norm": 14.374567985534668, + "learning_rate": 1.9921059774087234e-06, + "loss": 0.0976, + "num_input_tokens_seen": 18756288, + "step": 27840 + }, + { + "epoch": 0.6802579825568612, + "grad_norm": 30.722314834594727, + "learning_rate": 1.9920952797966598e-06, + "loss": 0.1522, + "num_input_tokens_seen": 18759360, + "step": 27845 + }, + { + "epoch": 0.6803801333887084, + "grad_norm": 19.905860900878906, + "learning_rate": 1.992084574969813e-06, + "loss": 0.1346, + "num_input_tokens_seen": 18762304, + "step": 27850 + }, + { + "epoch": 0.6805022842205556, + "grad_norm": 1.7042980194091797, + "learning_rate": 1.99207386292826e-06, + "loss": 0.0575, + "num_input_tokens_seen": 18765952, + "step": 27855 + }, + { + "epoch": 0.6806244350524027, + "grad_norm": 26.2788143157959, + "learning_rate": 1.992063143672079e-06, + "loss": 0.0994, + "num_input_tokens_seen": 18769344, + "step": 27860 + }, + { + "epoch": 0.6807465858842499, + "grad_norm": 6.333975791931152, + "learning_rate": 1.9920524172013482e-06, + "loss": 0.0941, + "num_input_tokens_seen": 18772480, + "step": 27865 + }, + { + "epoch": 0.680868736716097, + "grad_norm": 20.061012268066406, + "learning_rate": 1.9920416835161453e-06, + "loss": 0.0403, + "num_input_tokens_seen": 18775872, + "step": 27870 + }, + { + "epoch": 0.6809908875479442, + "grad_norm": 17.780420303344727, + "learning_rate": 1.9920309426165485e-06, + "loss": 0.2086, + "num_input_tokens_seen": 18779584, + "step": 27875 + }, + { + "epoch": 0.6811130383797913, + "grad_norm": 23.597476959228516, + "learning_rate": 1.992020194502635e-06, + "loss": 0.1813, + "num_input_tokens_seen": 18783168, + "step": 27880 + }, + { + "epoch": 0.6812351892116385, + "grad_norm": 1.0889683961868286, + "learning_rate": 1.992009439174485e-06, + "loss": 0.0425, + "num_input_tokens_seen": 18787072, + "step": 27885 + }, + { + "epoch": 0.6813573400434857, + "grad_norm": 33.030147552490234, + "learning_rate": 1.9919986766321754e-06, + "loss": 0.0671, + "num_input_tokens_seen": 18790272, + "step": 27890 + }, + { + "epoch": 0.6814794908753329, + "grad_norm": 15.32840347290039, + "learning_rate": 1.991987906875784e-06, + "loss": 0.0871, + "num_input_tokens_seen": 18793472, + "step": 27895 + }, + { + "epoch": 0.6816016417071801, + "grad_norm": 41.0756721496582, + "learning_rate": 1.9919771299053902e-06, + "loss": 0.2074, + "num_input_tokens_seen": 18797056, + "step": 27900 + }, + { + "epoch": 0.6817237925390272, + "grad_norm": 17.466093063354492, + "learning_rate": 1.991966345721072e-06, + "loss": 0.1312, + "num_input_tokens_seen": 18801088, + "step": 27905 + }, + { + "epoch": 0.6818459433708743, + "grad_norm": 9.908241271972656, + "learning_rate": 1.9919555543229072e-06, + "loss": 0.0392, + "num_input_tokens_seen": 18804224, + "step": 27910 + }, + { + "epoch": 0.6819680942027215, + "grad_norm": 27.994863510131836, + "learning_rate": 1.991944755710975e-06, + "loss": 0.1424, + "num_input_tokens_seen": 18807360, + "step": 27915 + }, + { + "epoch": 0.6820902450345687, + "grad_norm": 0.7694000005722046, + "learning_rate": 1.9919339498853537e-06, + "loss": 0.169, + "num_input_tokens_seen": 18810304, + "step": 27920 + }, + { + "epoch": 0.6822123958664158, + "grad_norm": 2.132155418395996, + "learning_rate": 1.9919231368461224e-06, + "loss": 0.0517, + "num_input_tokens_seen": 18813568, + "step": 27925 + }, + { + "epoch": 0.682334546698263, + "grad_norm": 39.752418518066406, + "learning_rate": 1.9919123165933586e-06, + "loss": 0.1451, + "num_input_tokens_seen": 18816640, + "step": 27930 + }, + { + "epoch": 0.6824566975301102, + "grad_norm": 10.385972023010254, + "learning_rate": 1.9919014891271423e-06, + "loss": 0.0729, + "num_input_tokens_seen": 18820032, + "step": 27935 + }, + { + "epoch": 0.6825788483619574, + "grad_norm": 23.70648193359375, + "learning_rate": 1.9918906544475507e-06, + "loss": 0.082, + "num_input_tokens_seen": 18823040, + "step": 27940 + }, + { + "epoch": 0.6827009991938046, + "grad_norm": 25.676509857177734, + "learning_rate": 1.9918798125546643e-06, + "loss": 0.1848, + "num_input_tokens_seen": 18826048, + "step": 27945 + }, + { + "epoch": 0.6828231500256516, + "grad_norm": 34.59342575073242, + "learning_rate": 1.991868963448561e-06, + "loss": 0.1456, + "num_input_tokens_seen": 18829504, + "step": 27950 + }, + { + "epoch": 0.6829453008574988, + "grad_norm": 6.966836452484131, + "learning_rate": 1.9918581071293196e-06, + "loss": 0.0392, + "num_input_tokens_seen": 18832768, + "step": 27955 + }, + { + "epoch": 0.683067451689346, + "grad_norm": 7.659114360809326, + "learning_rate": 1.9918472435970194e-06, + "loss": 0.0952, + "num_input_tokens_seen": 18836352, + "step": 27960 + }, + { + "epoch": 0.6831896025211932, + "grad_norm": 21.104238510131836, + "learning_rate": 1.991836372851739e-06, + "loss": 0.0636, + "num_input_tokens_seen": 18839488, + "step": 27965 + }, + { + "epoch": 0.6833117533530403, + "grad_norm": 26.50173568725586, + "learning_rate": 1.9918254948935576e-06, + "loss": 0.1515, + "num_input_tokens_seen": 18842432, + "step": 27970 + }, + { + "epoch": 0.6834339041848875, + "grad_norm": 34.945533752441406, + "learning_rate": 1.991814609722555e-06, + "loss": 0.1336, + "num_input_tokens_seen": 18845760, + "step": 27975 + }, + { + "epoch": 0.6835560550167347, + "grad_norm": 0.18488149344921112, + "learning_rate": 1.9918037173388098e-06, + "loss": 0.0967, + "num_input_tokens_seen": 18849152, + "step": 27980 + }, + { + "epoch": 0.6836782058485819, + "grad_norm": 40.403011322021484, + "learning_rate": 1.9917928177424005e-06, + "loss": 0.0754, + "num_input_tokens_seen": 18853184, + "step": 27985 + }, + { + "epoch": 0.683800356680429, + "grad_norm": 4.494559288024902, + "learning_rate": 1.9917819109334074e-06, + "loss": 0.057, + "num_input_tokens_seen": 18856448, + "step": 27990 + }, + { + "epoch": 0.6839225075122761, + "grad_norm": 6.149030685424805, + "learning_rate": 1.9917709969119097e-06, + "loss": 0.0696, + "num_input_tokens_seen": 18859392, + "step": 27995 + }, + { + "epoch": 0.6840446583441233, + "grad_norm": 30.208892822265625, + "learning_rate": 1.9917600756779866e-06, + "loss": 0.1231, + "num_input_tokens_seen": 18862848, + "step": 28000 + }, + { + "epoch": 0.6841668091759705, + "grad_norm": 15.175532341003418, + "learning_rate": 1.9917491472317173e-06, + "loss": 0.1157, + "num_input_tokens_seen": 18866048, + "step": 28005 + }, + { + "epoch": 0.6842889600078177, + "grad_norm": 22.627941131591797, + "learning_rate": 1.9917382115731814e-06, + "loss": 0.0812, + "num_input_tokens_seen": 18869696, + "step": 28010 + }, + { + "epoch": 0.6844111108396648, + "grad_norm": 46.66557693481445, + "learning_rate": 1.9917272687024586e-06, + "loss": 0.171, + "num_input_tokens_seen": 18872960, + "step": 28015 + }, + { + "epoch": 0.684533261671512, + "grad_norm": 15.672872543334961, + "learning_rate": 1.9917163186196284e-06, + "loss": 0.1007, + "num_input_tokens_seen": 18876480, + "step": 28020 + }, + { + "epoch": 0.6846554125033592, + "grad_norm": 26.201805114746094, + "learning_rate": 1.99170536132477e-06, + "loss": 0.1806, + "num_input_tokens_seen": 18880000, + "step": 28025 + }, + { + "epoch": 0.6847775633352063, + "grad_norm": 1.5974482297897339, + "learning_rate": 1.991694396817964e-06, + "loss": 0.1536, + "num_input_tokens_seen": 18883328, + "step": 28030 + }, + { + "epoch": 0.6848997141670535, + "grad_norm": 30.57724952697754, + "learning_rate": 1.991683425099289e-06, + "loss": 0.0765, + "num_input_tokens_seen": 18886464, + "step": 28035 + }, + { + "epoch": 0.6850218649989006, + "grad_norm": 37.85762405395508, + "learning_rate": 1.991672446168826e-06, + "loss": 0.2011, + "num_input_tokens_seen": 18889408, + "step": 28040 + }, + { + "epoch": 0.6851440158307478, + "grad_norm": 23.697330474853516, + "learning_rate": 1.9916614600266543e-06, + "loss": 0.2166, + "num_input_tokens_seen": 18892416, + "step": 28045 + }, + { + "epoch": 0.685266166662595, + "grad_norm": 14.929670333862305, + "learning_rate": 1.991650466672853e-06, + "loss": 0.1019, + "num_input_tokens_seen": 18895680, + "step": 28050 + }, + { + "epoch": 0.6853883174944422, + "grad_norm": 15.712745666503906, + "learning_rate": 1.9916394661075037e-06, + "loss": 0.234, + "num_input_tokens_seen": 18899072, + "step": 28055 + }, + { + "epoch": 0.6855104683262893, + "grad_norm": 4.213320255279541, + "learning_rate": 1.991628458330685e-06, + "loss": 0.0497, + "num_input_tokens_seen": 18902784, + "step": 28060 + }, + { + "epoch": 0.6856326191581364, + "grad_norm": 11.769521713256836, + "learning_rate": 1.9916174433424774e-06, + "loss": 0.0878, + "num_input_tokens_seen": 18905984, + "step": 28065 + }, + { + "epoch": 0.6857547699899836, + "grad_norm": 8.652548789978027, + "learning_rate": 1.991606421142961e-06, + "loss": 0.0997, + "num_input_tokens_seen": 18908992, + "step": 28070 + }, + { + "epoch": 0.6858769208218308, + "grad_norm": 0.8606699705123901, + "learning_rate": 1.991595391732216e-06, + "loss": 0.0843, + "num_input_tokens_seen": 18912256, + "step": 28075 + }, + { + "epoch": 0.685999071653678, + "grad_norm": 5.10831356048584, + "learning_rate": 1.991584355110323e-06, + "loss": 0.0455, + "num_input_tokens_seen": 18915584, + "step": 28080 + }, + { + "epoch": 0.6861212224855251, + "grad_norm": 21.28264617919922, + "learning_rate": 1.9915733112773613e-06, + "loss": 0.1036, + "num_input_tokens_seen": 18918784, + "step": 28085 + }, + { + "epoch": 0.6862433733173723, + "grad_norm": 0.4145818054676056, + "learning_rate": 1.9915622602334122e-06, + "loss": 0.0086, + "num_input_tokens_seen": 18921984, + "step": 28090 + }, + { + "epoch": 0.6863655241492195, + "grad_norm": 45.218753814697266, + "learning_rate": 1.9915512019785556e-06, + "loss": 0.2377, + "num_input_tokens_seen": 18925312, + "step": 28095 + }, + { + "epoch": 0.6864876749810667, + "grad_norm": 24.27002716064453, + "learning_rate": 1.9915401365128715e-06, + "loss": 0.1546, + "num_input_tokens_seen": 18928576, + "step": 28100 + }, + { + "epoch": 0.6866098258129137, + "grad_norm": 0.8130379319190979, + "learning_rate": 1.991529063836441e-06, + "loss": 0.1028, + "num_input_tokens_seen": 18932096, + "step": 28105 + }, + { + "epoch": 0.6867319766447609, + "grad_norm": 12.115750312805176, + "learning_rate": 1.991517983949345e-06, + "loss": 0.1427, + "num_input_tokens_seen": 18935360, + "step": 28110 + }, + { + "epoch": 0.6868541274766081, + "grad_norm": 0.17904898524284363, + "learning_rate": 1.991506896851663e-06, + "loss": 0.0468, + "num_input_tokens_seen": 18938688, + "step": 28115 + }, + { + "epoch": 0.6869762783084553, + "grad_norm": 10.688509941101074, + "learning_rate": 1.9914958025434764e-06, + "loss": 0.1421, + "num_input_tokens_seen": 18942080, + "step": 28120 + }, + { + "epoch": 0.6870984291403024, + "grad_norm": 33.10630416870117, + "learning_rate": 1.9914847010248657e-06, + "loss": 0.0983, + "num_input_tokens_seen": 18945152, + "step": 28125 + }, + { + "epoch": 0.6872205799721496, + "grad_norm": 7.811311721801758, + "learning_rate": 1.9914735922959116e-06, + "loss": 0.0689, + "num_input_tokens_seen": 18948224, + "step": 28130 + }, + { + "epoch": 0.6873427308039968, + "grad_norm": 5.051802635192871, + "learning_rate": 1.9914624763566946e-06, + "loss": 0.0645, + "num_input_tokens_seen": 18951680, + "step": 28135 + }, + { + "epoch": 0.687464881635844, + "grad_norm": 12.035042762756348, + "learning_rate": 1.991451353207296e-06, + "loss": 0.0881, + "num_input_tokens_seen": 18957248, + "step": 28140 + }, + { + "epoch": 0.6875870324676912, + "grad_norm": 8.856138229370117, + "learning_rate": 1.9914402228477962e-06, + "loss": 0.1386, + "num_input_tokens_seen": 18960640, + "step": 28145 + }, + { + "epoch": 0.6877091832995382, + "grad_norm": 0.4886510670185089, + "learning_rate": 1.991429085278277e-06, + "loss": 0.1298, + "num_input_tokens_seen": 18963840, + "step": 28150 + }, + { + "epoch": 0.6878313341313854, + "grad_norm": 1.587204098701477, + "learning_rate": 1.9914179404988185e-06, + "loss": 0.0415, + "num_input_tokens_seen": 18967616, + "step": 28155 + }, + { + "epoch": 0.6879534849632326, + "grad_norm": 1.0043456554412842, + "learning_rate": 1.991406788509502e-06, + "loss": 0.0961, + "num_input_tokens_seen": 18972928, + "step": 28160 + }, + { + "epoch": 0.6880756357950798, + "grad_norm": 15.976723670959473, + "learning_rate": 1.991395629310409e-06, + "loss": 0.157, + "num_input_tokens_seen": 18976192, + "step": 28165 + }, + { + "epoch": 0.6881977866269269, + "grad_norm": 14.203861236572266, + "learning_rate": 1.99138446290162e-06, + "loss": 0.0917, + "num_input_tokens_seen": 18979392, + "step": 28170 + }, + { + "epoch": 0.6883199374587741, + "grad_norm": 4.559242248535156, + "learning_rate": 1.9913732892832166e-06, + "loss": 0.0725, + "num_input_tokens_seen": 18982464, + "step": 28175 + }, + { + "epoch": 0.6884420882906213, + "grad_norm": 32.41259765625, + "learning_rate": 1.9913621084552797e-06, + "loss": 0.156, + "num_input_tokens_seen": 18985856, + "step": 28180 + }, + { + "epoch": 0.6885642391224684, + "grad_norm": 9.131508827209473, + "learning_rate": 1.9913509204178913e-06, + "loss": 0.1084, + "num_input_tokens_seen": 18989248, + "step": 28185 + }, + { + "epoch": 0.6886863899543156, + "grad_norm": 11.379778861999512, + "learning_rate": 1.9913397251711323e-06, + "loss": 0.0997, + "num_input_tokens_seen": 18992960, + "step": 28190 + }, + { + "epoch": 0.6888085407861627, + "grad_norm": 8.573840141296387, + "learning_rate": 1.991328522715084e-06, + "loss": 0.0419, + "num_input_tokens_seen": 18996352, + "step": 28195 + }, + { + "epoch": 0.6889306916180099, + "grad_norm": 14.748952865600586, + "learning_rate": 1.9913173130498283e-06, + "loss": 0.2366, + "num_input_tokens_seen": 19000064, + "step": 28200 + }, + { + "epoch": 0.6890528424498571, + "grad_norm": 20.12869644165039, + "learning_rate": 1.9913060961754463e-06, + "loss": 0.069, + "num_input_tokens_seen": 19003648, + "step": 28205 + }, + { + "epoch": 0.6891749932817043, + "grad_norm": 1.0335286855697632, + "learning_rate": 1.99129487209202e-06, + "loss": 0.1106, + "num_input_tokens_seen": 19007360, + "step": 28210 + }, + { + "epoch": 0.6892971441135514, + "grad_norm": 32.707645416259766, + "learning_rate": 1.9912836407996307e-06, + "loss": 0.209, + "num_input_tokens_seen": 19010304, + "step": 28215 + }, + { + "epoch": 0.6894192949453986, + "grad_norm": 27.200157165527344, + "learning_rate": 1.9912724022983597e-06, + "loss": 0.1874, + "num_input_tokens_seen": 19013696, + "step": 28220 + }, + { + "epoch": 0.6895414457772457, + "grad_norm": 0.2330540120601654, + "learning_rate": 1.9912611565882894e-06, + "loss": 0.0766, + "num_input_tokens_seen": 19016960, + "step": 28225 + }, + { + "epoch": 0.6896635966090929, + "grad_norm": 0.19438402354717255, + "learning_rate": 1.9912499036695016e-06, + "loss": 0.0774, + "num_input_tokens_seen": 19020736, + "step": 28230 + }, + { + "epoch": 0.6897857474409401, + "grad_norm": 13.885992050170898, + "learning_rate": 1.991238643542078e-06, + "loss": 0.064, + "num_input_tokens_seen": 19024256, + "step": 28235 + }, + { + "epoch": 0.6899078982727872, + "grad_norm": 10.976266860961914, + "learning_rate": 1.9912273762061e-06, + "loss": 0.1147, + "num_input_tokens_seen": 19027392, + "step": 28240 + }, + { + "epoch": 0.6900300491046344, + "grad_norm": 32.6606330871582, + "learning_rate": 1.9912161016616496e-06, + "loss": 0.105, + "num_input_tokens_seen": 19031168, + "step": 28245 + }, + { + "epoch": 0.6901521999364816, + "grad_norm": 13.941425323486328, + "learning_rate": 1.99120481990881e-06, + "loss": 0.2081, + "num_input_tokens_seen": 19034112, + "step": 28250 + }, + { + "epoch": 0.6902743507683288, + "grad_norm": 8.773990631103516, + "learning_rate": 1.991193530947662e-06, + "loss": 0.1315, + "num_input_tokens_seen": 19037248, + "step": 28255 + }, + { + "epoch": 0.6903965016001758, + "grad_norm": 23.475357055664062, + "learning_rate": 1.9911822347782876e-06, + "loss": 0.1358, + "num_input_tokens_seen": 19040320, + "step": 28260 + }, + { + "epoch": 0.690518652432023, + "grad_norm": 19.392465591430664, + "learning_rate": 1.9911709314007696e-06, + "loss": 0.073, + "num_input_tokens_seen": 19043648, + "step": 28265 + }, + { + "epoch": 0.6906408032638702, + "grad_norm": 27.202695846557617, + "learning_rate": 1.99115962081519e-06, + "loss": 0.0217, + "num_input_tokens_seen": 19047296, + "step": 28270 + }, + { + "epoch": 0.6907629540957174, + "grad_norm": 0.36758825182914734, + "learning_rate": 1.991148303021631e-06, + "loss": 0.1819, + "num_input_tokens_seen": 19050816, + "step": 28275 + }, + { + "epoch": 0.6908851049275646, + "grad_norm": 15.720108985900879, + "learning_rate": 1.9911369780201754e-06, + "loss": 0.1465, + "num_input_tokens_seen": 19054016, + "step": 28280 + }, + { + "epoch": 0.6910072557594117, + "grad_norm": 3.562192440032959, + "learning_rate": 1.991125645810905e-06, + "loss": 0.0116, + "num_input_tokens_seen": 19057088, + "step": 28285 + }, + { + "epoch": 0.6911294065912589, + "grad_norm": 14.69986343383789, + "learning_rate": 1.991114306393902e-06, + "loss": 0.1489, + "num_input_tokens_seen": 19060544, + "step": 28290 + }, + { + "epoch": 0.6912515574231061, + "grad_norm": 1.1109415292739868, + "learning_rate": 1.991102959769249e-06, + "loss": 0.0615, + "num_input_tokens_seen": 19063936, + "step": 28295 + }, + { + "epoch": 0.6913737082549533, + "grad_norm": 13.292527198791504, + "learning_rate": 1.991091605937029e-06, + "loss": 0.3578, + "num_input_tokens_seen": 19067264, + "step": 28300 + }, + { + "epoch": 0.6914958590868003, + "grad_norm": 16.39297866821289, + "learning_rate": 1.9910802448973245e-06, + "loss": 0.1859, + "num_input_tokens_seen": 19070272, + "step": 28305 + }, + { + "epoch": 0.6916180099186475, + "grad_norm": 25.160232543945312, + "learning_rate": 1.9910688766502177e-06, + "loss": 0.1424, + "num_input_tokens_seen": 19073408, + "step": 28310 + }, + { + "epoch": 0.6917401607504947, + "grad_norm": 21.078258514404297, + "learning_rate": 1.9910575011957914e-06, + "loss": 0.0961, + "num_input_tokens_seen": 19076544, + "step": 28315 + }, + { + "epoch": 0.6918623115823419, + "grad_norm": 75.08121490478516, + "learning_rate": 1.9910461185341287e-06, + "loss": 0.0969, + "num_input_tokens_seen": 19080256, + "step": 28320 + }, + { + "epoch": 0.6919844624141891, + "grad_norm": 1.2669124603271484, + "learning_rate": 1.9910347286653116e-06, + "loss": 0.0501, + "num_input_tokens_seen": 19083776, + "step": 28325 + }, + { + "epoch": 0.6921066132460362, + "grad_norm": 5.270265102386475, + "learning_rate": 1.9910233315894237e-06, + "loss": 0.0835, + "num_input_tokens_seen": 19087552, + "step": 28330 + }, + { + "epoch": 0.6922287640778834, + "grad_norm": 0.8477705121040344, + "learning_rate": 1.9910119273065474e-06, + "loss": 0.1434, + "num_input_tokens_seen": 19090688, + "step": 28335 + }, + { + "epoch": 0.6923509149097306, + "grad_norm": 26.06892967224121, + "learning_rate": 1.991000515816766e-06, + "loss": 0.1814, + "num_input_tokens_seen": 19093888, + "step": 28340 + }, + { + "epoch": 0.6924730657415777, + "grad_norm": 14.113260269165039, + "learning_rate": 1.990989097120162e-06, + "loss": 0.2087, + "num_input_tokens_seen": 19097152, + "step": 28345 + }, + { + "epoch": 0.6925952165734248, + "grad_norm": 0.316220223903656, + "learning_rate": 1.990977671216819e-06, + "loss": 0.0672, + "num_input_tokens_seen": 19100608, + "step": 28350 + }, + { + "epoch": 0.692717367405272, + "grad_norm": 13.8766450881958, + "learning_rate": 1.9909662381068195e-06, + "loss": 0.3032, + "num_input_tokens_seen": 19103936, + "step": 28355 + }, + { + "epoch": 0.6928395182371192, + "grad_norm": 24.313175201416016, + "learning_rate": 1.9909547977902473e-06, + "loss": 0.1386, + "num_input_tokens_seen": 19107136, + "step": 28360 + }, + { + "epoch": 0.6929616690689664, + "grad_norm": 23.778879165649414, + "learning_rate": 1.9909433502671853e-06, + "loss": 0.0419, + "num_input_tokens_seen": 19110400, + "step": 28365 + }, + { + "epoch": 0.6930838199008135, + "grad_norm": 26.069019317626953, + "learning_rate": 1.9909318955377165e-06, + "loss": 0.1421, + "num_input_tokens_seen": 19113920, + "step": 28370 + }, + { + "epoch": 0.6932059707326607, + "grad_norm": 18.443458557128906, + "learning_rate": 1.9909204336019247e-06, + "loss": 0.1806, + "num_input_tokens_seen": 19117696, + "step": 28375 + }, + { + "epoch": 0.6933281215645078, + "grad_norm": 15.086554527282715, + "learning_rate": 1.990908964459893e-06, + "loss": 0.127, + "num_input_tokens_seen": 19120832, + "step": 28380 + }, + { + "epoch": 0.693450272396355, + "grad_norm": 35.456085205078125, + "learning_rate": 1.9908974881117042e-06, + "loss": 0.0871, + "num_input_tokens_seen": 19124224, + "step": 28385 + }, + { + "epoch": 0.6935724232282022, + "grad_norm": 26.71317481994629, + "learning_rate": 1.990886004557443e-06, + "loss": 0.1087, + "num_input_tokens_seen": 19127232, + "step": 28390 + }, + { + "epoch": 0.6936945740600493, + "grad_norm": 25.975605010986328, + "learning_rate": 1.990874513797192e-06, + "loss": 0.0849, + "num_input_tokens_seen": 19130816, + "step": 28395 + }, + { + "epoch": 0.6938167248918965, + "grad_norm": 11.141236305236816, + "learning_rate": 1.990863015831035e-06, + "loss": 0.1112, + "num_input_tokens_seen": 19133888, + "step": 28400 + }, + { + "epoch": 0.6939388757237437, + "grad_norm": 41.74148178100586, + "learning_rate": 1.990851510659056e-06, + "loss": 0.2546, + "num_input_tokens_seen": 19136832, + "step": 28405 + }, + { + "epoch": 0.6940610265555909, + "grad_norm": 26.85130500793457, + "learning_rate": 1.990839998281338e-06, + "loss": 0.1442, + "num_input_tokens_seen": 19140032, + "step": 28410 + }, + { + "epoch": 0.694183177387438, + "grad_norm": 18.30794906616211, + "learning_rate": 1.9908284786979647e-06, + "loss": 0.1615, + "num_input_tokens_seen": 19142912, + "step": 28415 + }, + { + "epoch": 0.6943053282192851, + "grad_norm": 14.795523643493652, + "learning_rate": 1.9908169519090208e-06, + "loss": 0.1455, + "num_input_tokens_seen": 19145920, + "step": 28420 + }, + { + "epoch": 0.6944274790511323, + "grad_norm": 2.048726797103882, + "learning_rate": 1.990805417914589e-06, + "loss": 0.1582, + "num_input_tokens_seen": 19148992, + "step": 28425 + }, + { + "epoch": 0.6945496298829795, + "grad_norm": 14.487932205200195, + "learning_rate": 1.9907938767147542e-06, + "loss": 0.1379, + "num_input_tokens_seen": 19152448, + "step": 28430 + }, + { + "epoch": 0.6946717807148267, + "grad_norm": 20.653125762939453, + "learning_rate": 1.9907823283095998e-06, + "loss": 0.0703, + "num_input_tokens_seen": 19155776, + "step": 28435 + }, + { + "epoch": 0.6947939315466738, + "grad_norm": 15.435851097106934, + "learning_rate": 1.9907707726992095e-06, + "loss": 0.1394, + "num_input_tokens_seen": 19158848, + "step": 28440 + }, + { + "epoch": 0.694916082378521, + "grad_norm": 15.184447288513184, + "learning_rate": 1.9907592098836678e-06, + "loss": 0.1629, + "num_input_tokens_seen": 19162240, + "step": 28445 + }, + { + "epoch": 0.6950382332103682, + "grad_norm": 14.811301231384277, + "learning_rate": 1.9907476398630584e-06, + "loss": 0.0741, + "num_input_tokens_seen": 19165696, + "step": 28450 + }, + { + "epoch": 0.6951603840422154, + "grad_norm": 17.973371505737305, + "learning_rate": 1.990736062637466e-06, + "loss": 0.128, + "num_input_tokens_seen": 19168768, + "step": 28455 + }, + { + "epoch": 0.6952825348740624, + "grad_norm": 19.12712860107422, + "learning_rate": 1.9907244782069745e-06, + "loss": 0.184, + "num_input_tokens_seen": 19172544, + "step": 28460 + }, + { + "epoch": 0.6954046857059096, + "grad_norm": 7.91143798828125, + "learning_rate": 1.990712886571668e-06, + "loss": 0.0412, + "num_input_tokens_seen": 19176128, + "step": 28465 + }, + { + "epoch": 0.6955268365377568, + "grad_norm": 7.214994430541992, + "learning_rate": 1.990701287731631e-06, + "loss": 0.1263, + "num_input_tokens_seen": 19179776, + "step": 28470 + }, + { + "epoch": 0.695648987369604, + "grad_norm": 28.023527145385742, + "learning_rate": 1.9906896816869475e-06, + "loss": 0.1573, + "num_input_tokens_seen": 19183360, + "step": 28475 + }, + { + "epoch": 0.6957711382014512, + "grad_norm": 2.206017255783081, + "learning_rate": 1.9906780684377025e-06, + "loss": 0.0671, + "num_input_tokens_seen": 19186432, + "step": 28480 + }, + { + "epoch": 0.6958932890332983, + "grad_norm": 17.74364471435547, + "learning_rate": 1.99066644798398e-06, + "loss": 0.1002, + "num_input_tokens_seen": 19189760, + "step": 28485 + }, + { + "epoch": 0.6960154398651455, + "grad_norm": 0.5346532464027405, + "learning_rate": 1.9906548203258644e-06, + "loss": 0.0669, + "num_input_tokens_seen": 19192896, + "step": 28490 + }, + { + "epoch": 0.6961375906969927, + "grad_norm": 10.89641284942627, + "learning_rate": 1.990643185463441e-06, + "loss": 0.158, + "num_input_tokens_seen": 19196480, + "step": 28495 + }, + { + "epoch": 0.6962597415288398, + "grad_norm": 17.990570068359375, + "learning_rate": 1.9906315433967937e-06, + "loss": 0.1286, + "num_input_tokens_seen": 19200128, + "step": 28500 + }, + { + "epoch": 0.6963818923606869, + "grad_norm": 23.91288948059082, + "learning_rate": 1.990619894126007e-06, + "loss": 0.125, + "num_input_tokens_seen": 19203328, + "step": 28505 + }, + { + "epoch": 0.6965040431925341, + "grad_norm": 10.834264755249023, + "learning_rate": 1.9906082376511665e-06, + "loss": 0.1438, + "num_input_tokens_seen": 19206400, + "step": 28510 + }, + { + "epoch": 0.6966261940243813, + "grad_norm": 2.1280264854431152, + "learning_rate": 1.9905965739723563e-06, + "loss": 0.0225, + "num_input_tokens_seen": 19209920, + "step": 28515 + }, + { + "epoch": 0.6967483448562285, + "grad_norm": 0.8384555578231812, + "learning_rate": 1.9905849030896614e-06, + "loss": 0.0699, + "num_input_tokens_seen": 19213120, + "step": 28520 + }, + { + "epoch": 0.6968704956880757, + "grad_norm": 6.136308670043945, + "learning_rate": 1.9905732250031664e-06, + "loss": 0.1218, + "num_input_tokens_seen": 19216512, + "step": 28525 + }, + { + "epoch": 0.6969926465199228, + "grad_norm": 23.7427921295166, + "learning_rate": 1.9905615397129565e-06, + "loss": 0.0611, + "num_input_tokens_seen": 19219328, + "step": 28530 + }, + { + "epoch": 0.69711479735177, + "grad_norm": 8.574586868286133, + "learning_rate": 1.9905498472191168e-06, + "loss": 0.1589, + "num_input_tokens_seen": 19222528, + "step": 28535 + }, + { + "epoch": 0.6972369481836171, + "grad_norm": 2.952967643737793, + "learning_rate": 1.9905381475217323e-06, + "loss": 0.1138, + "num_input_tokens_seen": 19226048, + "step": 28540 + }, + { + "epoch": 0.6973590990154643, + "grad_norm": 25.55811882019043, + "learning_rate": 1.990526440620888e-06, + "loss": 0.0534, + "num_input_tokens_seen": 19229184, + "step": 28545 + }, + { + "epoch": 0.6974812498473114, + "grad_norm": 24.35697364807129, + "learning_rate": 1.9905147265166686e-06, + "loss": 0.2159, + "num_input_tokens_seen": 19232704, + "step": 28550 + }, + { + "epoch": 0.6976034006791586, + "grad_norm": 1.1305099725723267, + "learning_rate": 1.99050300520916e-06, + "loss": 0.1376, + "num_input_tokens_seen": 19236544, + "step": 28555 + }, + { + "epoch": 0.6977255515110058, + "grad_norm": 5.858948230743408, + "learning_rate": 1.9904912766984472e-06, + "loss": 0.062, + "num_input_tokens_seen": 19239616, + "step": 28560 + }, + { + "epoch": 0.697847702342853, + "grad_norm": 20.73585319519043, + "learning_rate": 1.990479540984615e-06, + "loss": 0.0789, + "num_input_tokens_seen": 19243136, + "step": 28565 + }, + { + "epoch": 0.6979698531747002, + "grad_norm": 0.351258784532547, + "learning_rate": 1.9904677980677496e-06, + "loss": 0.1022, + "num_input_tokens_seen": 19246400, + "step": 28570 + }, + { + "epoch": 0.6980920040065473, + "grad_norm": 0.48112377524375916, + "learning_rate": 1.990456047947936e-06, + "loss": 0.0678, + "num_input_tokens_seen": 19249472, + "step": 28575 + }, + { + "epoch": 0.6982141548383944, + "grad_norm": 35.932861328125, + "learning_rate": 1.9904442906252594e-06, + "loss": 0.1318, + "num_input_tokens_seen": 19252992, + "step": 28580 + }, + { + "epoch": 0.6983363056702416, + "grad_norm": 0.44541868567466736, + "learning_rate": 1.9904325260998055e-06, + "loss": 0.1667, + "num_input_tokens_seen": 19256064, + "step": 28585 + }, + { + "epoch": 0.6984584565020888, + "grad_norm": 1.5130938291549683, + "learning_rate": 1.99042075437166e-06, + "loss": 0.1057, + "num_input_tokens_seen": 19259456, + "step": 28590 + }, + { + "epoch": 0.6985806073339359, + "grad_norm": 32.22797393798828, + "learning_rate": 1.9904089754409083e-06, + "loss": 0.1113, + "num_input_tokens_seen": 19262272, + "step": 28595 + }, + { + "epoch": 0.6987027581657831, + "grad_norm": 0.38581711053848267, + "learning_rate": 1.990397189307636e-06, + "loss": 0.0824, + "num_input_tokens_seen": 19265664, + "step": 28600 + }, + { + "epoch": 0.6988249089976303, + "grad_norm": 0.6255455613136292, + "learning_rate": 1.9903853959719293e-06, + "loss": 0.0588, + "num_input_tokens_seen": 19269632, + "step": 28605 + }, + { + "epoch": 0.6989470598294775, + "grad_norm": 19.804824829101562, + "learning_rate": 1.9903735954338736e-06, + "loss": 0.156, + "num_input_tokens_seen": 19272768, + "step": 28610 + }, + { + "epoch": 0.6990692106613247, + "grad_norm": 15.568331718444824, + "learning_rate": 1.9903617876935544e-06, + "loss": 0.2028, + "num_input_tokens_seen": 19275904, + "step": 28615 + }, + { + "epoch": 0.6991913614931717, + "grad_norm": 30.59419059753418, + "learning_rate": 1.990349972751058e-06, + "loss": 0.2168, + "num_input_tokens_seen": 19279104, + "step": 28620 + }, + { + "epoch": 0.6993135123250189, + "grad_norm": 11.51990795135498, + "learning_rate": 1.9903381506064704e-06, + "loss": 0.1528, + "num_input_tokens_seen": 19282624, + "step": 28625 + }, + { + "epoch": 0.6994356631568661, + "grad_norm": 18.640762329101562, + "learning_rate": 1.9903263212598772e-06, + "loss": 0.0853, + "num_input_tokens_seen": 19285696, + "step": 28630 + }, + { + "epoch": 0.6995578139887133, + "grad_norm": 0.11530325561761856, + "learning_rate": 1.990314484711365e-06, + "loss": 0.0641, + "num_input_tokens_seen": 19288960, + "step": 28635 + }, + { + "epoch": 0.6996799648205604, + "grad_norm": 25.19300079345703, + "learning_rate": 1.990302640961019e-06, + "loss": 0.1388, + "num_input_tokens_seen": 19292032, + "step": 28640 + }, + { + "epoch": 0.6998021156524076, + "grad_norm": 57.09481430053711, + "learning_rate": 1.990290790008926e-06, + "loss": 0.1323, + "num_input_tokens_seen": 19294976, + "step": 28645 + }, + { + "epoch": 0.6999242664842548, + "grad_norm": 12.756439208984375, + "learning_rate": 1.9902789318551727e-06, + "loss": 0.0876, + "num_input_tokens_seen": 19298240, + "step": 28650 + }, + { + "epoch": 0.700046417316102, + "grad_norm": 59.20932388305664, + "learning_rate": 1.990267066499844e-06, + "loss": 0.1469, + "num_input_tokens_seen": 19301440, + "step": 28655 + }, + { + "epoch": 0.700168568147949, + "grad_norm": 1.153745174407959, + "learning_rate": 1.9902551939430266e-06, + "loss": 0.0732, + "num_input_tokens_seen": 19304640, + "step": 28660 + }, + { + "epoch": 0.7002907189797962, + "grad_norm": 22.583284378051758, + "learning_rate": 1.9902433141848076e-06, + "loss": 0.0645, + "num_input_tokens_seen": 19307968, + "step": 28665 + }, + { + "epoch": 0.7004128698116434, + "grad_norm": 13.325112342834473, + "learning_rate": 1.9902314272252724e-06, + "loss": 0.0677, + "num_input_tokens_seen": 19311104, + "step": 28670 + }, + { + "epoch": 0.7005350206434906, + "grad_norm": 0.29962795972824097, + "learning_rate": 1.9902195330645084e-06, + "loss": 0.0831, + "num_input_tokens_seen": 19314304, + "step": 28675 + }, + { + "epoch": 0.7006571714753378, + "grad_norm": 29.55235481262207, + "learning_rate": 1.9902076317026014e-06, + "loss": 0.1596, + "num_input_tokens_seen": 19317760, + "step": 28680 + }, + { + "epoch": 0.7007793223071849, + "grad_norm": 16.093425750732422, + "learning_rate": 1.990195723139638e-06, + "loss": 0.1439, + "num_input_tokens_seen": 19321344, + "step": 28685 + }, + { + "epoch": 0.7009014731390321, + "grad_norm": 38.62454605102539, + "learning_rate": 1.990183807375705e-06, + "loss": 0.2093, + "num_input_tokens_seen": 19325312, + "step": 28690 + }, + { + "epoch": 0.7010236239708793, + "grad_norm": 1.2527316808700562, + "learning_rate": 1.9901718844108894e-06, + "loss": 0.1465, + "num_input_tokens_seen": 19328896, + "step": 28695 + }, + { + "epoch": 0.7011457748027264, + "grad_norm": 31.273618698120117, + "learning_rate": 1.9901599542452773e-06, + "loss": 0.083, + "num_input_tokens_seen": 19332480, + "step": 28700 + }, + { + "epoch": 0.7012679256345735, + "grad_norm": 1.2295677661895752, + "learning_rate": 1.9901480168789554e-06, + "loss": 0.0821, + "num_input_tokens_seen": 19335680, + "step": 28705 + }, + { + "epoch": 0.7013900764664207, + "grad_norm": 0.519361138343811, + "learning_rate": 1.990136072312011e-06, + "loss": 0.0284, + "num_input_tokens_seen": 19339136, + "step": 28710 + }, + { + "epoch": 0.7015122272982679, + "grad_norm": 25.611343383789062, + "learning_rate": 1.9901241205445313e-06, + "loss": 0.197, + "num_input_tokens_seen": 19342592, + "step": 28715 + }, + { + "epoch": 0.7016343781301151, + "grad_norm": 1.822134256362915, + "learning_rate": 1.990112161576602e-06, + "loss": 0.0998, + "num_input_tokens_seen": 19346496, + "step": 28720 + }, + { + "epoch": 0.7017565289619623, + "grad_norm": 11.004918098449707, + "learning_rate": 1.990100195408311e-06, + "loss": 0.1877, + "num_input_tokens_seen": 19349952, + "step": 28725 + }, + { + "epoch": 0.7018786797938094, + "grad_norm": 8.324329376220703, + "learning_rate": 1.9900882220397454e-06, + "loss": 0.1481, + "num_input_tokens_seen": 19353408, + "step": 28730 + }, + { + "epoch": 0.7020008306256565, + "grad_norm": 21.26910400390625, + "learning_rate": 1.9900762414709913e-06, + "loss": 0.1623, + "num_input_tokens_seen": 19357120, + "step": 28735 + }, + { + "epoch": 0.7021229814575037, + "grad_norm": 24.641504287719727, + "learning_rate": 1.990064253702137e-06, + "loss": 0.1476, + "num_input_tokens_seen": 19360640, + "step": 28740 + }, + { + "epoch": 0.7022451322893509, + "grad_norm": 100.31071472167969, + "learning_rate": 1.990052258733269e-06, + "loss": 0.0906, + "num_input_tokens_seen": 19364672, + "step": 28745 + }, + { + "epoch": 0.702367283121198, + "grad_norm": 0.5302855372428894, + "learning_rate": 1.9900402565644745e-06, + "loss": 0.0672, + "num_input_tokens_seen": 19367680, + "step": 28750 + }, + { + "epoch": 0.7024894339530452, + "grad_norm": 34.36920928955078, + "learning_rate": 1.9900282471958413e-06, + "loss": 0.1317, + "num_input_tokens_seen": 19370624, + "step": 28755 + }, + { + "epoch": 0.7026115847848924, + "grad_norm": 36.110626220703125, + "learning_rate": 1.990016230627456e-06, + "loss": 0.1801, + "num_input_tokens_seen": 19373568, + "step": 28760 + }, + { + "epoch": 0.7027337356167396, + "grad_norm": 1.0250879526138306, + "learning_rate": 1.9900042068594066e-06, + "loss": 0.1105, + "num_input_tokens_seen": 19377024, + "step": 28765 + }, + { + "epoch": 0.7028558864485868, + "grad_norm": 18.993581771850586, + "learning_rate": 1.98999217589178e-06, + "loss": 0.1156, + "num_input_tokens_seen": 19380032, + "step": 28770 + }, + { + "epoch": 0.7029780372804338, + "grad_norm": 19.046401977539062, + "learning_rate": 1.9899801377246645e-06, + "loss": 0.1092, + "num_input_tokens_seen": 19383680, + "step": 28775 + }, + { + "epoch": 0.703100188112281, + "grad_norm": 15.642848014831543, + "learning_rate": 1.989968092358147e-06, + "loss": 0.2078, + "num_input_tokens_seen": 19386944, + "step": 28780 + }, + { + "epoch": 0.7032223389441282, + "grad_norm": 4.28993558883667, + "learning_rate": 1.9899560397923154e-06, + "loss": 0.1218, + "num_input_tokens_seen": 19390272, + "step": 28785 + }, + { + "epoch": 0.7033444897759754, + "grad_norm": 0.3806248903274536, + "learning_rate": 1.9899439800272568e-06, + "loss": 0.1744, + "num_input_tokens_seen": 19393344, + "step": 28790 + }, + { + "epoch": 0.7034666406078225, + "grad_norm": 21.923810958862305, + "learning_rate": 1.9899319130630597e-06, + "loss": 0.0851, + "num_input_tokens_seen": 19396992, + "step": 28795 + }, + { + "epoch": 0.7035887914396697, + "grad_norm": 17.54344367980957, + "learning_rate": 1.989919838899811e-06, + "loss": 0.1174, + "num_input_tokens_seen": 19400320, + "step": 28800 + }, + { + "epoch": 0.7037109422715169, + "grad_norm": 0.955232560634613, + "learning_rate": 1.9899077575376e-06, + "loss": 0.1332, + "num_input_tokens_seen": 19404032, + "step": 28805 + }, + { + "epoch": 0.7038330931033641, + "grad_norm": 23.174301147460938, + "learning_rate": 1.9898956689765127e-06, + "loss": 0.1948, + "num_input_tokens_seen": 19407360, + "step": 28810 + }, + { + "epoch": 0.7039552439352113, + "grad_norm": 7.8368072509765625, + "learning_rate": 1.989883573216638e-06, + "loss": 0.0506, + "num_input_tokens_seen": 19410560, + "step": 28815 + }, + { + "epoch": 0.7040773947670583, + "grad_norm": 26.670555114746094, + "learning_rate": 1.9898714702580637e-06, + "loss": 0.0926, + "num_input_tokens_seen": 19413760, + "step": 28820 + }, + { + "epoch": 0.7041995455989055, + "grad_norm": 21.16333770751953, + "learning_rate": 1.9898593601008776e-06, + "loss": 0.1177, + "num_input_tokens_seen": 19417216, + "step": 28825 + }, + { + "epoch": 0.7043216964307527, + "grad_norm": 30.32469940185547, + "learning_rate": 1.9898472427451684e-06, + "loss": 0.0938, + "num_input_tokens_seen": 19420480, + "step": 28830 + }, + { + "epoch": 0.7044438472625999, + "grad_norm": 25.04443359375, + "learning_rate": 1.989835118191024e-06, + "loss": 0.197, + "num_input_tokens_seen": 19424000, + "step": 28835 + }, + { + "epoch": 0.704565998094447, + "grad_norm": 1.6711410284042358, + "learning_rate": 1.989822986438532e-06, + "loss": 0.0714, + "num_input_tokens_seen": 19427648, + "step": 28840 + }, + { + "epoch": 0.7046881489262942, + "grad_norm": 31.52394676208496, + "learning_rate": 1.9898108474877805e-06, + "loss": 0.0972, + "num_input_tokens_seen": 19430912, + "step": 28845 + }, + { + "epoch": 0.7048102997581414, + "grad_norm": 20.945629119873047, + "learning_rate": 1.989798701338859e-06, + "loss": 0.1386, + "num_input_tokens_seen": 19434432, + "step": 28850 + }, + { + "epoch": 0.7049324505899885, + "grad_norm": 11.488555908203125, + "learning_rate": 1.989786547991855e-06, + "loss": 0.2156, + "num_input_tokens_seen": 19437824, + "step": 28855 + }, + { + "epoch": 0.7050546014218357, + "grad_norm": 7.017218112945557, + "learning_rate": 1.989774387446857e-06, + "loss": 0.1754, + "num_input_tokens_seen": 19441344, + "step": 28860 + }, + { + "epoch": 0.7051767522536828, + "grad_norm": 0.6078383326530457, + "learning_rate": 1.9897622197039533e-06, + "loss": 0.0371, + "num_input_tokens_seen": 19445056, + "step": 28865 + }, + { + "epoch": 0.70529890308553, + "grad_norm": 8.64908504486084, + "learning_rate": 1.9897500447632326e-06, + "loss": 0.1202, + "num_input_tokens_seen": 19448192, + "step": 28870 + }, + { + "epoch": 0.7054210539173772, + "grad_norm": 16.532529830932617, + "learning_rate": 1.9897378626247835e-06, + "loss": 0.1468, + "num_input_tokens_seen": 19451776, + "step": 28875 + }, + { + "epoch": 0.7055432047492244, + "grad_norm": 1.9767354726791382, + "learning_rate": 1.9897256732886943e-06, + "loss": 0.0516, + "num_input_tokens_seen": 19454848, + "step": 28880 + }, + { + "epoch": 0.7056653555810715, + "grad_norm": 18.877872467041016, + "learning_rate": 1.989713476755054e-06, + "loss": 0.1063, + "num_input_tokens_seen": 19458624, + "step": 28885 + }, + { + "epoch": 0.7057875064129187, + "grad_norm": 6.534497261047363, + "learning_rate": 1.9897012730239508e-06, + "loss": 0.0761, + "num_input_tokens_seen": 19461760, + "step": 28890 + }, + { + "epoch": 0.7059096572447658, + "grad_norm": 12.019935607910156, + "learning_rate": 1.989689062095474e-06, + "loss": 0.1534, + "num_input_tokens_seen": 19465600, + "step": 28895 + }, + { + "epoch": 0.706031808076613, + "grad_norm": 6.496730327606201, + "learning_rate": 1.989676843969712e-06, + "loss": 0.0526, + "num_input_tokens_seen": 19469504, + "step": 28900 + }, + { + "epoch": 0.7061539589084602, + "grad_norm": 0.14786797761917114, + "learning_rate": 1.9896646186467537e-06, + "loss": 0.1339, + "num_input_tokens_seen": 19472192, + "step": 28905 + }, + { + "epoch": 0.7062761097403073, + "grad_norm": 14.278701782226562, + "learning_rate": 1.9896523861266882e-06, + "loss": 0.1101, + "num_input_tokens_seen": 19476096, + "step": 28910 + }, + { + "epoch": 0.7063982605721545, + "grad_norm": 13.4442720413208, + "learning_rate": 1.9896401464096045e-06, + "loss": 0.0629, + "num_input_tokens_seen": 19479232, + "step": 28915 + }, + { + "epoch": 0.7065204114040017, + "grad_norm": 17.962377548217773, + "learning_rate": 1.9896278994955914e-06, + "loss": 0.1961, + "num_input_tokens_seen": 19482112, + "step": 28920 + }, + { + "epoch": 0.7066425622358489, + "grad_norm": 14.128437042236328, + "learning_rate": 1.9896156453847383e-06, + "loss": 0.0798, + "num_input_tokens_seen": 19485376, + "step": 28925 + }, + { + "epoch": 0.706764713067696, + "grad_norm": 4.156303882598877, + "learning_rate": 1.9896033840771333e-06, + "loss": 0.0248, + "num_input_tokens_seen": 19488768, + "step": 28930 + }, + { + "epoch": 0.7068868638995431, + "grad_norm": 9.4829740524292, + "learning_rate": 1.989591115572867e-06, + "loss": 0.1189, + "num_input_tokens_seen": 19492160, + "step": 28935 + }, + { + "epoch": 0.7070090147313903, + "grad_norm": 4.698817729949951, + "learning_rate": 1.9895788398720276e-06, + "loss": 0.0783, + "num_input_tokens_seen": 19495616, + "step": 28940 + }, + { + "epoch": 0.7071311655632375, + "grad_norm": 17.0122013092041, + "learning_rate": 1.9895665569747047e-06, + "loss": 0.0378, + "num_input_tokens_seen": 19498880, + "step": 28945 + }, + { + "epoch": 0.7072533163950846, + "grad_norm": 15.190747261047363, + "learning_rate": 1.989554266880988e-06, + "loss": 0.0813, + "num_input_tokens_seen": 19501952, + "step": 28950 + }, + { + "epoch": 0.7073754672269318, + "grad_norm": 28.33185386657715, + "learning_rate": 1.9895419695909663e-06, + "loss": 0.0586, + "num_input_tokens_seen": 19505792, + "step": 28955 + }, + { + "epoch": 0.707497618058779, + "grad_norm": 6.6051225662231445, + "learning_rate": 1.989529665104729e-06, + "loss": 0.0589, + "num_input_tokens_seen": 19509504, + "step": 28960 + }, + { + "epoch": 0.7076197688906262, + "grad_norm": 10.518942832946777, + "learning_rate": 1.989517353422366e-06, + "loss": 0.1812, + "num_input_tokens_seen": 19512640, + "step": 28965 + }, + { + "epoch": 0.7077419197224734, + "grad_norm": 24.557565689086914, + "learning_rate": 1.989505034543967e-06, + "loss": 0.1463, + "num_input_tokens_seen": 19516032, + "step": 28970 + }, + { + "epoch": 0.7078640705543204, + "grad_norm": 36.49713897705078, + "learning_rate": 1.989492708469621e-06, + "loss": 0.191, + "num_input_tokens_seen": 19519296, + "step": 28975 + }, + { + "epoch": 0.7079862213861676, + "grad_norm": 1.5474926233291626, + "learning_rate": 1.9894803751994176e-06, + "loss": 0.1693, + "num_input_tokens_seen": 19522688, + "step": 28980 + }, + { + "epoch": 0.7081083722180148, + "grad_norm": 0.5776304006576538, + "learning_rate": 1.989468034733447e-06, + "loss": 0.0771, + "num_input_tokens_seen": 19526720, + "step": 28985 + }, + { + "epoch": 0.708230523049862, + "grad_norm": 4.457348346710205, + "learning_rate": 1.989455687071799e-06, + "loss": 0.1575, + "num_input_tokens_seen": 19529856, + "step": 28990 + }, + { + "epoch": 0.7083526738817091, + "grad_norm": 0.3430345952510834, + "learning_rate": 1.9894433322145624e-06, + "loss": 0.0408, + "num_input_tokens_seen": 19533824, + "step": 28995 + }, + { + "epoch": 0.7084748247135563, + "grad_norm": 1.5313557386398315, + "learning_rate": 1.9894309701618285e-06, + "loss": 0.075, + "num_input_tokens_seen": 19537216, + "step": 29000 + }, + { + "epoch": 0.7085969755454035, + "grad_norm": 19.49930191040039, + "learning_rate": 1.989418600913686e-06, + "loss": 0.1021, + "num_input_tokens_seen": 19540288, + "step": 29005 + }, + { + "epoch": 0.7087191263772507, + "grad_norm": 23.722192764282227, + "learning_rate": 1.9894062244702258e-06, + "loss": 0.0327, + "num_input_tokens_seen": 19544064, + "step": 29010 + }, + { + "epoch": 0.7088412772090978, + "grad_norm": 3.8661394119262695, + "learning_rate": 1.989393840831537e-06, + "loss": 0.089, + "num_input_tokens_seen": 19547648, + "step": 29015 + }, + { + "epoch": 0.7089634280409449, + "grad_norm": 13.596760749816895, + "learning_rate": 1.98938144999771e-06, + "loss": 0.1718, + "num_input_tokens_seen": 19551488, + "step": 29020 + }, + { + "epoch": 0.7090855788727921, + "grad_norm": 33.69784927368164, + "learning_rate": 1.989369051968835e-06, + "loss": 0.1411, + "num_input_tokens_seen": 19554560, + "step": 29025 + }, + { + "epoch": 0.7092077297046393, + "grad_norm": 21.88158416748047, + "learning_rate": 1.9893566467450024e-06, + "loss": 0.1518, + "num_input_tokens_seen": 19557952, + "step": 29030 + }, + { + "epoch": 0.7093298805364865, + "grad_norm": 2.869716167449951, + "learning_rate": 1.989344234326302e-06, + "loss": 0.0834, + "num_input_tokens_seen": 19561088, + "step": 29035 + }, + { + "epoch": 0.7094520313683336, + "grad_norm": 10.745006561279297, + "learning_rate": 1.989331814712824e-06, + "loss": 0.2314, + "num_input_tokens_seen": 19564800, + "step": 29040 + }, + { + "epoch": 0.7095741822001808, + "grad_norm": 30.001081466674805, + "learning_rate": 1.9893193879046594e-06, + "loss": 0.1122, + "num_input_tokens_seen": 19568384, + "step": 29045 + }, + { + "epoch": 0.709696333032028, + "grad_norm": 30.0404109954834, + "learning_rate": 1.989306953901898e-06, + "loss": 0.1931, + "num_input_tokens_seen": 19571456, + "step": 29050 + }, + { + "epoch": 0.7098184838638751, + "grad_norm": 43.12938690185547, + "learning_rate": 1.9892945127046304e-06, + "loss": 0.1065, + "num_input_tokens_seen": 19575040, + "step": 29055 + }, + { + "epoch": 0.7099406346957223, + "grad_norm": 27.3835506439209, + "learning_rate": 1.989282064312947e-06, + "loss": 0.0938, + "num_input_tokens_seen": 19577920, + "step": 29060 + }, + { + "epoch": 0.7100627855275694, + "grad_norm": 19.477853775024414, + "learning_rate": 1.989269608726938e-06, + "loss": 0.0894, + "num_input_tokens_seen": 19581312, + "step": 29065 + }, + { + "epoch": 0.7101849363594166, + "grad_norm": 23.02320098876953, + "learning_rate": 1.9892571459466945e-06, + "loss": 0.1275, + "num_input_tokens_seen": 19584640, + "step": 29070 + }, + { + "epoch": 0.7103070871912638, + "grad_norm": 20.052583694458008, + "learning_rate": 1.9892446759723073e-06, + "loss": 0.1724, + "num_input_tokens_seen": 19588224, + "step": 29075 + }, + { + "epoch": 0.710429238023111, + "grad_norm": 3.1162588596343994, + "learning_rate": 1.989232198803866e-06, + "loss": 0.1144, + "num_input_tokens_seen": 19591744, + "step": 29080 + }, + { + "epoch": 0.710551388854958, + "grad_norm": 15.635176658630371, + "learning_rate": 1.9892197144414627e-06, + "loss": 0.0349, + "num_input_tokens_seen": 19595136, + "step": 29085 + }, + { + "epoch": 0.7106735396868052, + "grad_norm": 3.7856173515319824, + "learning_rate": 1.9892072228851876e-06, + "loss": 0.0683, + "num_input_tokens_seen": 19598464, + "step": 29090 + }, + { + "epoch": 0.7107956905186524, + "grad_norm": 15.923710823059082, + "learning_rate": 1.9891947241351313e-06, + "loss": 0.0898, + "num_input_tokens_seen": 19602048, + "step": 29095 + }, + { + "epoch": 0.7109178413504996, + "grad_norm": 31.128541946411133, + "learning_rate": 1.989182218191385e-06, + "loss": 0.1653, + "num_input_tokens_seen": 19605696, + "step": 29100 + }, + { + "epoch": 0.7110399921823468, + "grad_norm": 31.127042770385742, + "learning_rate": 1.9891697050540395e-06, + "loss": 0.0267, + "num_input_tokens_seen": 19608896, + "step": 29105 + }, + { + "epoch": 0.7111621430141939, + "grad_norm": 19.851909637451172, + "learning_rate": 1.9891571847231858e-06, + "loss": 0.1332, + "num_input_tokens_seen": 19612672, + "step": 29110 + }, + { + "epoch": 0.7112842938460411, + "grad_norm": 27.556503295898438, + "learning_rate": 1.989144657198915e-06, + "loss": 0.185, + "num_input_tokens_seen": 19616768, + "step": 29115 + }, + { + "epoch": 0.7114064446778883, + "grad_norm": 1.0737075805664062, + "learning_rate": 1.989132122481318e-06, + "loss": 0.0358, + "num_input_tokens_seen": 19619776, + "step": 29120 + }, + { + "epoch": 0.7115285955097355, + "grad_norm": 67.04922485351562, + "learning_rate": 1.9891195805704865e-06, + "loss": 0.1996, + "num_input_tokens_seen": 19623040, + "step": 29125 + }, + { + "epoch": 0.7116507463415825, + "grad_norm": 9.727737426757812, + "learning_rate": 1.9891070314665114e-06, + "loss": 0.1877, + "num_input_tokens_seen": 19625984, + "step": 29130 + }, + { + "epoch": 0.7117728971734297, + "grad_norm": 0.47230786085128784, + "learning_rate": 1.9890944751694838e-06, + "loss": 0.1285, + "num_input_tokens_seen": 19630272, + "step": 29135 + }, + { + "epoch": 0.7118950480052769, + "grad_norm": 12.917232513427734, + "learning_rate": 1.989081911679495e-06, + "loss": 0.1313, + "num_input_tokens_seen": 19633472, + "step": 29140 + }, + { + "epoch": 0.7120171988371241, + "grad_norm": 0.19916245341300964, + "learning_rate": 1.9890693409966366e-06, + "loss": 0.1579, + "num_input_tokens_seen": 19636416, + "step": 29145 + }, + { + "epoch": 0.7121393496689713, + "grad_norm": 12.989840507507324, + "learning_rate": 1.9890567631209996e-06, + "loss": 0.1654, + "num_input_tokens_seen": 19639616, + "step": 29150 + }, + { + "epoch": 0.7122615005008184, + "grad_norm": 15.532271385192871, + "learning_rate": 1.9890441780526764e-06, + "loss": 0.2201, + "num_input_tokens_seen": 19643456, + "step": 29155 + }, + { + "epoch": 0.7123836513326656, + "grad_norm": 31.146207809448242, + "learning_rate": 1.9890315857917577e-06, + "loss": 0.0386, + "num_input_tokens_seen": 19646784, + "step": 29160 + }, + { + "epoch": 0.7125058021645128, + "grad_norm": 21.65290069580078, + "learning_rate": 1.9890189863383354e-06, + "loss": 0.1972, + "num_input_tokens_seen": 19650112, + "step": 29165 + }, + { + "epoch": 0.71262795299636, + "grad_norm": 13.02177906036377, + "learning_rate": 1.9890063796925006e-06, + "loss": 0.1494, + "num_input_tokens_seen": 19653312, + "step": 29170 + }, + { + "epoch": 0.712750103828207, + "grad_norm": 22.124685287475586, + "learning_rate": 1.988993765854346e-06, + "loss": 0.2056, + "num_input_tokens_seen": 19656576, + "step": 29175 + }, + { + "epoch": 0.7128722546600542, + "grad_norm": 1.5387930870056152, + "learning_rate": 1.9889811448239625e-06, + "loss": 0.1209, + "num_input_tokens_seen": 19659520, + "step": 29180 + }, + { + "epoch": 0.7129944054919014, + "grad_norm": 15.83926773071289, + "learning_rate": 1.9889685166014417e-06, + "loss": 0.1168, + "num_input_tokens_seen": 19662912, + "step": 29185 + }, + { + "epoch": 0.7131165563237486, + "grad_norm": 4.195895195007324, + "learning_rate": 1.988955881186876e-06, + "loss": 0.102, + "num_input_tokens_seen": 19666176, + "step": 29190 + }, + { + "epoch": 0.7132387071555957, + "grad_norm": 11.080657005310059, + "learning_rate": 1.9889432385803574e-06, + "loss": 0.0322, + "num_input_tokens_seen": 19669888, + "step": 29195 + }, + { + "epoch": 0.7133608579874429, + "grad_norm": 16.482271194458008, + "learning_rate": 1.9889305887819776e-06, + "loss": 0.1683, + "num_input_tokens_seen": 19672960, + "step": 29200 + }, + { + "epoch": 0.71348300881929, + "grad_norm": 15.85693359375, + "learning_rate": 1.9889179317918285e-06, + "loss": 0.1291, + "num_input_tokens_seen": 19676224, + "step": 29205 + }, + { + "epoch": 0.7136051596511372, + "grad_norm": 1.0122528076171875, + "learning_rate": 1.988905267610002e-06, + "loss": 0.0831, + "num_input_tokens_seen": 19680000, + "step": 29210 + }, + { + "epoch": 0.7137273104829844, + "grad_norm": 28.881397247314453, + "learning_rate": 1.9888925962365907e-06, + "loss": 0.083, + "num_input_tokens_seen": 19683328, + "step": 29215 + }, + { + "epoch": 0.7138494613148315, + "grad_norm": 18.924196243286133, + "learning_rate": 1.9888799176716866e-06, + "loss": 0.1372, + "num_input_tokens_seen": 19686592, + "step": 29220 + }, + { + "epoch": 0.7139716121466787, + "grad_norm": 2.5882859230041504, + "learning_rate": 1.988867231915381e-06, + "loss": 0.1241, + "num_input_tokens_seen": 19689728, + "step": 29225 + }, + { + "epoch": 0.7140937629785259, + "grad_norm": 39.74570846557617, + "learning_rate": 1.9888545389677675e-06, + "loss": 0.1089, + "num_input_tokens_seen": 19692864, + "step": 29230 + }, + { + "epoch": 0.7142159138103731, + "grad_norm": 8.0762939453125, + "learning_rate": 1.9888418388289376e-06, + "loss": 0.1428, + "num_input_tokens_seen": 19695872, + "step": 29235 + }, + { + "epoch": 0.7143380646422202, + "grad_norm": 1.500272512435913, + "learning_rate": 1.988829131498984e-06, + "loss": 0.0693, + "num_input_tokens_seen": 19699328, + "step": 29240 + }, + { + "epoch": 0.7144602154740674, + "grad_norm": 3.4893500804901123, + "learning_rate": 1.9888164169779992e-06, + "loss": 0.0795, + "num_input_tokens_seen": 19702848, + "step": 29245 + }, + { + "epoch": 0.7145823663059145, + "grad_norm": 20.037160873413086, + "learning_rate": 1.9888036952660754e-06, + "loss": 0.0835, + "num_input_tokens_seen": 19706048, + "step": 29250 + }, + { + "epoch": 0.7147045171377617, + "grad_norm": 0.3974679112434387, + "learning_rate": 1.9887909663633047e-06, + "loss": 0.0817, + "num_input_tokens_seen": 19709440, + "step": 29255 + }, + { + "epoch": 0.7148266679696089, + "grad_norm": 15.502142906188965, + "learning_rate": 1.9887782302697803e-06, + "loss": 0.0933, + "num_input_tokens_seen": 19712704, + "step": 29260 + }, + { + "epoch": 0.714948818801456, + "grad_norm": 1.3267139196395874, + "learning_rate": 1.988765486985595e-06, + "loss": 0.0394, + "num_input_tokens_seen": 19715968, + "step": 29265 + }, + { + "epoch": 0.7150709696333032, + "grad_norm": 2.264620542526245, + "learning_rate": 1.988752736510841e-06, + "loss": 0.0985, + "num_input_tokens_seen": 19718784, + "step": 29270 + }, + { + "epoch": 0.7151931204651504, + "grad_norm": 33.954647064208984, + "learning_rate": 1.9887399788456113e-06, + "loss": 0.1016, + "num_input_tokens_seen": 19722368, + "step": 29275 + }, + { + "epoch": 0.7153152712969976, + "grad_norm": 10.680964469909668, + "learning_rate": 1.988727213989998e-06, + "loss": 0.1463, + "num_input_tokens_seen": 19726016, + "step": 29280 + }, + { + "epoch": 0.7154374221288446, + "grad_norm": 8.014860153198242, + "learning_rate": 1.9887144419440948e-06, + "loss": 0.2273, + "num_input_tokens_seen": 19729280, + "step": 29285 + }, + { + "epoch": 0.7155595729606918, + "grad_norm": 11.822001457214355, + "learning_rate": 1.9887016627079946e-06, + "loss": 0.0731, + "num_input_tokens_seen": 19732736, + "step": 29290 + }, + { + "epoch": 0.715681723792539, + "grad_norm": 23.019365310668945, + "learning_rate": 1.9886888762817897e-06, + "loss": 0.1675, + "num_input_tokens_seen": 19736832, + "step": 29295 + }, + { + "epoch": 0.7158038746243862, + "grad_norm": 1.2372710704803467, + "learning_rate": 1.988676082665573e-06, + "loss": 0.1408, + "num_input_tokens_seen": 19740224, + "step": 29300 + }, + { + "epoch": 0.7159260254562334, + "grad_norm": 9.24066162109375, + "learning_rate": 1.9886632818594384e-06, + "loss": 0.1125, + "num_input_tokens_seen": 19743872, + "step": 29305 + }, + { + "epoch": 0.7160481762880805, + "grad_norm": 14.921540260314941, + "learning_rate": 1.988650473863478e-06, + "loss": 0.0717, + "num_input_tokens_seen": 19747136, + "step": 29310 + }, + { + "epoch": 0.7161703271199277, + "grad_norm": 1.0777639150619507, + "learning_rate": 1.988637658677786e-06, + "loss": 0.0765, + "num_input_tokens_seen": 19750336, + "step": 29315 + }, + { + "epoch": 0.7162924779517749, + "grad_norm": 18.06236457824707, + "learning_rate": 1.9886248363024545e-06, + "loss": 0.1244, + "num_input_tokens_seen": 19754112, + "step": 29320 + }, + { + "epoch": 0.716414628783622, + "grad_norm": 0.44852542877197266, + "learning_rate": 1.9886120067375777e-06, + "loss": 0.0769, + "num_input_tokens_seen": 19757376, + "step": 29325 + }, + { + "epoch": 0.7165367796154691, + "grad_norm": 3.454195022583008, + "learning_rate": 1.9885991699832483e-06, + "loss": 0.0164, + "num_input_tokens_seen": 19760576, + "step": 29330 + }, + { + "epoch": 0.7166589304473163, + "grad_norm": 26.183717727661133, + "learning_rate": 1.98858632603956e-06, + "loss": 0.1532, + "num_input_tokens_seen": 19763776, + "step": 29335 + }, + { + "epoch": 0.7167810812791635, + "grad_norm": 10.307730674743652, + "learning_rate": 1.988573474906606e-06, + "loss": 0.1228, + "num_input_tokens_seen": 19767872, + "step": 29340 + }, + { + "epoch": 0.7169032321110107, + "grad_norm": 26.782163619995117, + "learning_rate": 1.9885606165844796e-06, + "loss": 0.2179, + "num_input_tokens_seen": 19771200, + "step": 29345 + }, + { + "epoch": 0.7170253829428579, + "grad_norm": 11.243006706237793, + "learning_rate": 1.9885477510732745e-06, + "loss": 0.0435, + "num_input_tokens_seen": 19774400, + "step": 29350 + }, + { + "epoch": 0.717147533774705, + "grad_norm": 17.485429763793945, + "learning_rate": 1.9885348783730843e-06, + "loss": 0.1557, + "num_input_tokens_seen": 19778368, + "step": 29355 + }, + { + "epoch": 0.7172696846065522, + "grad_norm": 30.79226303100586, + "learning_rate": 1.9885219984840027e-06, + "loss": 0.0719, + "num_input_tokens_seen": 19781824, + "step": 29360 + }, + { + "epoch": 0.7173918354383994, + "grad_norm": 14.088363647460938, + "learning_rate": 1.9885091114061233e-06, + "loss": 0.0791, + "num_input_tokens_seen": 19785152, + "step": 29365 + }, + { + "epoch": 0.7175139862702465, + "grad_norm": 0.36788198351860046, + "learning_rate": 1.9884962171395396e-06, + "loss": 0.0856, + "num_input_tokens_seen": 19788288, + "step": 29370 + }, + { + "epoch": 0.7176361371020936, + "grad_norm": 5.974476337432861, + "learning_rate": 1.9884833156843457e-06, + "loss": 0.0835, + "num_input_tokens_seen": 19792192, + "step": 29375 + }, + { + "epoch": 0.7177582879339408, + "grad_norm": 16.720901489257812, + "learning_rate": 1.988470407040635e-06, + "loss": 0.2071, + "num_input_tokens_seen": 19795520, + "step": 29380 + }, + { + "epoch": 0.717880438765788, + "grad_norm": 0.36717942357063293, + "learning_rate": 1.988457491208502e-06, + "loss": 0.0197, + "num_input_tokens_seen": 19798720, + "step": 29385 + }, + { + "epoch": 0.7180025895976352, + "grad_norm": 2.1908514499664307, + "learning_rate": 1.9884445681880402e-06, + "loss": 0.0873, + "num_input_tokens_seen": 19801792, + "step": 29390 + }, + { + "epoch": 0.7181247404294824, + "grad_norm": 26.066774368286133, + "learning_rate": 1.9884316379793435e-06, + "loss": 0.1053, + "num_input_tokens_seen": 19804928, + "step": 29395 + }, + { + "epoch": 0.7182468912613295, + "grad_norm": 0.9114089608192444, + "learning_rate": 1.9884187005825058e-06, + "loss": 0.0368, + "num_input_tokens_seen": 19808576, + "step": 29400 + }, + { + "epoch": 0.7183690420931766, + "grad_norm": 30.349382400512695, + "learning_rate": 1.988405755997622e-06, + "loss": 0.0519, + "num_input_tokens_seen": 19812288, + "step": 29405 + }, + { + "epoch": 0.7184911929250238, + "grad_norm": 1.4230334758758545, + "learning_rate": 1.9883928042247856e-06, + "loss": 0.1122, + "num_input_tokens_seen": 19815744, + "step": 29410 + }, + { + "epoch": 0.718613343756871, + "grad_norm": 11.513659477233887, + "learning_rate": 1.9883798452640904e-06, + "loss": 0.1189, + "num_input_tokens_seen": 19818880, + "step": 29415 + }, + { + "epoch": 0.7187354945887181, + "grad_norm": 31.712440490722656, + "learning_rate": 1.9883668791156316e-06, + "loss": 0.3849, + "num_input_tokens_seen": 19821952, + "step": 29420 + }, + { + "epoch": 0.7188576454205653, + "grad_norm": 37.41859817504883, + "learning_rate": 1.988353905779503e-06, + "loss": 0.1492, + "num_input_tokens_seen": 19825024, + "step": 29425 + }, + { + "epoch": 0.7189797962524125, + "grad_norm": 29.80327796936035, + "learning_rate": 1.9883409252557987e-06, + "loss": 0.1241, + "num_input_tokens_seen": 19828288, + "step": 29430 + }, + { + "epoch": 0.7191019470842597, + "grad_norm": 5.770595550537109, + "learning_rate": 1.9883279375446135e-06, + "loss": 0.2149, + "num_input_tokens_seen": 19831552, + "step": 29435 + }, + { + "epoch": 0.7192240979161069, + "grad_norm": 10.977890014648438, + "learning_rate": 1.9883149426460416e-06, + "loss": 0.125, + "num_input_tokens_seen": 19834688, + "step": 29440 + }, + { + "epoch": 0.7193462487479539, + "grad_norm": 6.461169719696045, + "learning_rate": 1.9883019405601775e-06, + "loss": 0.0197, + "num_input_tokens_seen": 19837952, + "step": 29445 + }, + { + "epoch": 0.7194683995798011, + "grad_norm": 28.704113006591797, + "learning_rate": 1.988288931287116e-06, + "loss": 0.1573, + "num_input_tokens_seen": 19841216, + "step": 29450 + }, + { + "epoch": 0.7195905504116483, + "grad_norm": 0.888616681098938, + "learning_rate": 1.9882759148269517e-06, + "loss": 0.1007, + "num_input_tokens_seen": 19844864, + "step": 29455 + }, + { + "epoch": 0.7197127012434955, + "grad_norm": 7.894357204437256, + "learning_rate": 1.988262891179779e-06, + "loss": 0.075, + "num_input_tokens_seen": 19848000, + "step": 29460 + }, + { + "epoch": 0.7198348520753426, + "grad_norm": 25.15194320678711, + "learning_rate": 1.988249860345693e-06, + "loss": 0.0291, + "num_input_tokens_seen": 19851456, + "step": 29465 + }, + { + "epoch": 0.7199570029071898, + "grad_norm": 16.579662322998047, + "learning_rate": 1.9882368223247883e-06, + "loss": 0.1826, + "num_input_tokens_seen": 19854656, + "step": 29470 + }, + { + "epoch": 0.720079153739037, + "grad_norm": 0.5325263142585754, + "learning_rate": 1.988223777117159e-06, + "loss": 0.1084, + "num_input_tokens_seen": 19857472, + "step": 29475 + }, + { + "epoch": 0.7202013045708842, + "grad_norm": 24.2345027923584, + "learning_rate": 1.988210724722901e-06, + "loss": 0.104, + "num_input_tokens_seen": 19861184, + "step": 29480 + }, + { + "epoch": 0.7203234554027312, + "grad_norm": 2.030306339263916, + "learning_rate": 1.988197665142109e-06, + "loss": 0.1386, + "num_input_tokens_seen": 19864640, + "step": 29485 + }, + { + "epoch": 0.7204456062345784, + "grad_norm": 21.660234451293945, + "learning_rate": 1.9881845983748774e-06, + "loss": 0.1442, + "num_input_tokens_seen": 19868160, + "step": 29490 + }, + { + "epoch": 0.7205677570664256, + "grad_norm": 15.106928825378418, + "learning_rate": 1.988171524421302e-06, + "loss": 0.16, + "num_input_tokens_seen": 19871552, + "step": 29495 + }, + { + "epoch": 0.7206899078982728, + "grad_norm": 0.21059578657150269, + "learning_rate": 1.9881584432814767e-06, + "loss": 0.1296, + "num_input_tokens_seen": 19874688, + "step": 29500 + }, + { + "epoch": 0.72081205873012, + "grad_norm": 12.154464721679688, + "learning_rate": 1.988145354955498e-06, + "loss": 0.103, + "num_input_tokens_seen": 19878592, + "step": 29505 + }, + { + "epoch": 0.7209342095619671, + "grad_norm": 18.481788635253906, + "learning_rate": 1.9881322594434606e-06, + "loss": 0.1291, + "num_input_tokens_seen": 19881792, + "step": 29510 + }, + { + "epoch": 0.7210563603938143, + "grad_norm": 0.16375590860843658, + "learning_rate": 1.9881191567454594e-06, + "loss": 0.096, + "num_input_tokens_seen": 19884800, + "step": 29515 + }, + { + "epoch": 0.7211785112256615, + "grad_norm": 22.23729133605957, + "learning_rate": 1.98810604686159e-06, + "loss": 0.195, + "num_input_tokens_seen": 19887808, + "step": 29520 + }, + { + "epoch": 0.7213006620575086, + "grad_norm": 14.942244529724121, + "learning_rate": 1.9880929297919476e-06, + "loss": 0.0749, + "num_input_tokens_seen": 19890816, + "step": 29525 + }, + { + "epoch": 0.7214228128893557, + "grad_norm": 3.9260544776916504, + "learning_rate": 1.988079805536628e-06, + "loss": 0.179, + "num_input_tokens_seen": 19894336, + "step": 29530 + }, + { + "epoch": 0.7215449637212029, + "grad_norm": 7.509902000427246, + "learning_rate": 1.988066674095726e-06, + "loss": 0.1218, + "num_input_tokens_seen": 19897728, + "step": 29535 + }, + { + "epoch": 0.7216671145530501, + "grad_norm": 10.838415145874023, + "learning_rate": 1.988053535469337e-06, + "loss": 0.0808, + "num_input_tokens_seen": 19901056, + "step": 29540 + }, + { + "epoch": 0.7217892653848973, + "grad_norm": 17.19542694091797, + "learning_rate": 1.9880403896575573e-06, + "loss": 0.1164, + "num_input_tokens_seen": 19904384, + "step": 29545 + }, + { + "epoch": 0.7219114162167445, + "grad_norm": 27.95970916748047, + "learning_rate": 1.9880272366604824e-06, + "loss": 0.141, + "num_input_tokens_seen": 19907904, + "step": 29550 + }, + { + "epoch": 0.7220335670485916, + "grad_norm": 13.673565864562988, + "learning_rate": 1.9880140764782074e-06, + "loss": 0.1567, + "num_input_tokens_seen": 19911296, + "step": 29555 + }, + { + "epoch": 0.7221557178804388, + "grad_norm": 42.8197135925293, + "learning_rate": 1.9880009091108284e-06, + "loss": 0.1494, + "num_input_tokens_seen": 19914496, + "step": 29560 + }, + { + "epoch": 0.7222778687122859, + "grad_norm": 12.1239652633667, + "learning_rate": 1.9879877345584412e-06, + "loss": 0.0898, + "num_input_tokens_seen": 19917760, + "step": 29565 + }, + { + "epoch": 0.7224000195441331, + "grad_norm": 29.40520668029785, + "learning_rate": 1.987974552821141e-06, + "loss": 0.0657, + "num_input_tokens_seen": 19921216, + "step": 29570 + }, + { + "epoch": 0.7225221703759802, + "grad_norm": 16.474172592163086, + "learning_rate": 1.9879613638990247e-06, + "loss": 0.0873, + "num_input_tokens_seen": 19924864, + "step": 29575 + }, + { + "epoch": 0.7226443212078274, + "grad_norm": 8.598320007324219, + "learning_rate": 1.987948167792187e-06, + "loss": 0.1493, + "num_input_tokens_seen": 19928064, + "step": 29580 + }, + { + "epoch": 0.7227664720396746, + "grad_norm": 9.523024559020996, + "learning_rate": 1.9879349645007246e-06, + "loss": 0.0707, + "num_input_tokens_seen": 19931584, + "step": 29585 + }, + { + "epoch": 0.7228886228715218, + "grad_norm": 22.509151458740234, + "learning_rate": 1.9879217540247338e-06, + "loss": 0.195, + "num_input_tokens_seen": 19935040, + "step": 29590 + }, + { + "epoch": 0.723010773703369, + "grad_norm": 19.896160125732422, + "learning_rate": 1.9879085363643102e-06, + "loss": 0.1682, + "num_input_tokens_seen": 19938560, + "step": 29595 + }, + { + "epoch": 0.723132924535216, + "grad_norm": 1.9905105829238892, + "learning_rate": 1.9878953115195498e-06, + "loss": 0.0726, + "num_input_tokens_seen": 19941696, + "step": 29600 + }, + { + "epoch": 0.7232550753670632, + "grad_norm": 13.280790328979492, + "learning_rate": 1.987882079490549e-06, + "loss": 0.1861, + "num_input_tokens_seen": 19945984, + "step": 29605 + }, + { + "epoch": 0.7233772261989104, + "grad_norm": 4.532742977142334, + "learning_rate": 1.9878688402774042e-06, + "loss": 0.0521, + "num_input_tokens_seen": 19949184, + "step": 29610 + }, + { + "epoch": 0.7234993770307576, + "grad_norm": 23.19292449951172, + "learning_rate": 1.9878555938802115e-06, + "loss": 0.0757, + "num_input_tokens_seen": 19952320, + "step": 29615 + }, + { + "epoch": 0.7236215278626047, + "grad_norm": 0.6940127015113831, + "learning_rate": 1.987842340299067e-06, + "loss": 0.0802, + "num_input_tokens_seen": 19955392, + "step": 29620 + }, + { + "epoch": 0.7237436786944519, + "grad_norm": 0.5754919648170471, + "learning_rate": 1.9878290795340674e-06, + "loss": 0.1038, + "num_input_tokens_seen": 19958912, + "step": 29625 + }, + { + "epoch": 0.7238658295262991, + "grad_norm": 6.234220027923584, + "learning_rate": 1.9878158115853088e-06, + "loss": 0.0661, + "num_input_tokens_seen": 19962368, + "step": 29630 + }, + { + "epoch": 0.7239879803581463, + "grad_norm": 26.875852584838867, + "learning_rate": 1.9878025364528883e-06, + "loss": 0.1026, + "num_input_tokens_seen": 19965760, + "step": 29635 + }, + { + "epoch": 0.7241101311899935, + "grad_norm": 23.62150764465332, + "learning_rate": 1.987789254136902e-06, + "loss": 0.1179, + "num_input_tokens_seen": 19969216, + "step": 29640 + }, + { + "epoch": 0.7242322820218405, + "grad_norm": 0.09608346223831177, + "learning_rate": 1.987775964637447e-06, + "loss": 0.1176, + "num_input_tokens_seen": 19972800, + "step": 29645 + }, + { + "epoch": 0.7243544328536877, + "grad_norm": 0.43758726119995117, + "learning_rate": 1.9877626679546185e-06, + "loss": 0.0449, + "num_input_tokens_seen": 19976192, + "step": 29650 + }, + { + "epoch": 0.7244765836855349, + "grad_norm": 18.002891540527344, + "learning_rate": 1.987749364088515e-06, + "loss": 0.073, + "num_input_tokens_seen": 19979648, + "step": 29655 + }, + { + "epoch": 0.7245987345173821, + "grad_norm": 19.831453323364258, + "learning_rate": 1.987736053039232e-06, + "loss": 0.1078, + "num_input_tokens_seen": 19983296, + "step": 29660 + }, + { + "epoch": 0.7247208853492292, + "grad_norm": 11.848029136657715, + "learning_rate": 1.987722734806867e-06, + "loss": 0.1702, + "num_input_tokens_seen": 19986944, + "step": 29665 + }, + { + "epoch": 0.7248430361810764, + "grad_norm": 0.2218346893787384, + "learning_rate": 1.9877094093915166e-06, + "loss": 0.1022, + "num_input_tokens_seen": 19990528, + "step": 29670 + }, + { + "epoch": 0.7249651870129236, + "grad_norm": 6.095734596252441, + "learning_rate": 1.9876960767932775e-06, + "loss": 0.0221, + "num_input_tokens_seen": 19993664, + "step": 29675 + }, + { + "epoch": 0.7250873378447708, + "grad_norm": 35.75461196899414, + "learning_rate": 1.9876827370122472e-06, + "loss": 0.1655, + "num_input_tokens_seen": 19997440, + "step": 29680 + }, + { + "epoch": 0.7252094886766179, + "grad_norm": 54.55160903930664, + "learning_rate": 1.987669390048522e-06, + "loss": 0.1892, + "num_input_tokens_seen": 20000704, + "step": 29685 + }, + { + "epoch": 0.725331639508465, + "grad_norm": 0.1276654452085495, + "learning_rate": 1.9876560359021997e-06, + "loss": 0.0763, + "num_input_tokens_seen": 20003904, + "step": 29690 + }, + { + "epoch": 0.7254537903403122, + "grad_norm": 53.620574951171875, + "learning_rate": 1.9876426745733768e-06, + "loss": 0.1608, + "num_input_tokens_seen": 20007040, + "step": 29695 + }, + { + "epoch": 0.7255759411721594, + "grad_norm": 5.176275730133057, + "learning_rate": 1.9876293060621507e-06, + "loss": 0.1339, + "num_input_tokens_seen": 20010496, + "step": 29700 + }, + { + "epoch": 0.7256980920040066, + "grad_norm": 0.024253157898783684, + "learning_rate": 1.987615930368619e-06, + "loss": 0.0026, + "num_input_tokens_seen": 20013696, + "step": 29705 + }, + { + "epoch": 0.7258202428358537, + "grad_norm": 70.7147445678711, + "learning_rate": 1.987602547492878e-06, + "loss": 0.0895, + "num_input_tokens_seen": 20016960, + "step": 29710 + }, + { + "epoch": 0.7259423936677009, + "grad_norm": 24.393190383911133, + "learning_rate": 1.987589157435026e-06, + "loss": 0.2477, + "num_input_tokens_seen": 20020288, + "step": 29715 + }, + { + "epoch": 0.726064544499548, + "grad_norm": 0.23399071395397186, + "learning_rate": 1.98757576019516e-06, + "loss": 0.1913, + "num_input_tokens_seen": 20023360, + "step": 29720 + }, + { + "epoch": 0.7261866953313952, + "grad_norm": 26.293479919433594, + "learning_rate": 1.9875623557733777e-06, + "loss": 0.0992, + "num_input_tokens_seen": 20026688, + "step": 29725 + }, + { + "epoch": 0.7263088461632423, + "grad_norm": 9.954926490783691, + "learning_rate": 1.9875489441697764e-06, + "loss": 0.1794, + "num_input_tokens_seen": 20029952, + "step": 29730 + }, + { + "epoch": 0.7264309969950895, + "grad_norm": 22.676538467407227, + "learning_rate": 1.987535525384453e-06, + "loss": 0.0604, + "num_input_tokens_seen": 20033536, + "step": 29735 + }, + { + "epoch": 0.7265531478269367, + "grad_norm": 18.85304832458496, + "learning_rate": 1.9875220994175058e-06, + "loss": 0.0791, + "num_input_tokens_seen": 20036928, + "step": 29740 + }, + { + "epoch": 0.7266752986587839, + "grad_norm": 3.8162927627563477, + "learning_rate": 1.987508666269033e-06, + "loss": 0.0879, + "num_input_tokens_seen": 20039936, + "step": 29745 + }, + { + "epoch": 0.7267974494906311, + "grad_norm": 12.169044494628906, + "learning_rate": 1.987495225939131e-06, + "loss": 0.1627, + "num_input_tokens_seen": 20043456, + "step": 29750 + }, + { + "epoch": 0.7269196003224782, + "grad_norm": 0.9503796100616455, + "learning_rate": 1.987481778427898e-06, + "loss": 0.0406, + "num_input_tokens_seen": 20046592, + "step": 29755 + }, + { + "epoch": 0.7270417511543253, + "grad_norm": 13.596330642700195, + "learning_rate": 1.9874683237354317e-06, + "loss": 0.0608, + "num_input_tokens_seen": 20050112, + "step": 29760 + }, + { + "epoch": 0.7271639019861725, + "grad_norm": 1.3646011352539062, + "learning_rate": 1.9874548618618303e-06, + "loss": 0.0841, + "num_input_tokens_seen": 20053312, + "step": 29765 + }, + { + "epoch": 0.7272860528180197, + "grad_norm": 45.84081268310547, + "learning_rate": 1.987441392807192e-06, + "loss": 0.1837, + "num_input_tokens_seen": 20056768, + "step": 29770 + }, + { + "epoch": 0.7274082036498668, + "grad_norm": 0.8793035745620728, + "learning_rate": 1.987427916571614e-06, + "loss": 0.1468, + "num_input_tokens_seen": 20059840, + "step": 29775 + }, + { + "epoch": 0.727530354481714, + "grad_norm": 7.483636856079102, + "learning_rate": 1.9874144331551946e-06, + "loss": 0.1255, + "num_input_tokens_seen": 20063424, + "step": 29780 + }, + { + "epoch": 0.7276525053135612, + "grad_norm": 18.731956481933594, + "learning_rate": 1.9874009425580317e-06, + "loss": 0.2606, + "num_input_tokens_seen": 20066752, + "step": 29785 + }, + { + "epoch": 0.7277746561454084, + "grad_norm": 0.3487199544906616, + "learning_rate": 1.9873874447802236e-06, + "loss": 0.0847, + "num_input_tokens_seen": 20069696, + "step": 29790 + }, + { + "epoch": 0.7278968069772556, + "grad_norm": 30.68777084350586, + "learning_rate": 1.9873739398218687e-06, + "loss": 0.1515, + "num_input_tokens_seen": 20072896, + "step": 29795 + }, + { + "epoch": 0.7280189578091026, + "grad_norm": 4.902219295501709, + "learning_rate": 1.9873604276830647e-06, + "loss": 0.1143, + "num_input_tokens_seen": 20076096, + "step": 29800 + }, + { + "epoch": 0.7281411086409498, + "grad_norm": 23.401994705200195, + "learning_rate": 1.9873469083639103e-06, + "loss": 0.1678, + "num_input_tokens_seen": 20079424, + "step": 29805 + }, + { + "epoch": 0.728263259472797, + "grad_norm": 17.54922103881836, + "learning_rate": 1.9873333818645033e-06, + "loss": 0.1527, + "num_input_tokens_seen": 20082752, + "step": 29810 + }, + { + "epoch": 0.7283854103046442, + "grad_norm": 13.365433692932129, + "learning_rate": 1.987319848184943e-06, + "loss": 0.0715, + "num_input_tokens_seen": 20086528, + "step": 29815 + }, + { + "epoch": 0.7285075611364913, + "grad_norm": 13.830577850341797, + "learning_rate": 1.987306307325327e-06, + "loss": 0.0958, + "num_input_tokens_seen": 20089984, + "step": 29820 + }, + { + "epoch": 0.7286297119683385, + "grad_norm": 14.710626602172852, + "learning_rate": 1.9872927592857535e-06, + "loss": 0.0715, + "num_input_tokens_seen": 20093504, + "step": 29825 + }, + { + "epoch": 0.7287518628001857, + "grad_norm": 0.9895759224891663, + "learning_rate": 1.987279204066322e-06, + "loss": 0.0669, + "num_input_tokens_seen": 20096640, + "step": 29830 + }, + { + "epoch": 0.7288740136320329, + "grad_norm": 16.83676528930664, + "learning_rate": 1.98726564166713e-06, + "loss": 0.05, + "num_input_tokens_seen": 20099776, + "step": 29835 + }, + { + "epoch": 0.72899616446388, + "grad_norm": 19.734445571899414, + "learning_rate": 1.987252072088277e-06, + "loss": 0.1161, + "num_input_tokens_seen": 20102784, + "step": 29840 + }, + { + "epoch": 0.7291183152957271, + "grad_norm": 27.47193145751953, + "learning_rate": 1.9872384953298615e-06, + "loss": 0.2201, + "num_input_tokens_seen": 20105920, + "step": 29845 + }, + { + "epoch": 0.7292404661275743, + "grad_norm": 0.11942492425441742, + "learning_rate": 1.987224911391982e-06, + "loss": 0.0443, + "num_input_tokens_seen": 20109248, + "step": 29850 + }, + { + "epoch": 0.7293626169594215, + "grad_norm": 0.13697798550128937, + "learning_rate": 1.987211320274738e-06, + "loss": 0.0409, + "num_input_tokens_seen": 20112448, + "step": 29855 + }, + { + "epoch": 0.7294847677912687, + "grad_norm": 0.028840813785791397, + "learning_rate": 1.987197721978227e-06, + "loss": 0.1766, + "num_input_tokens_seen": 20115520, + "step": 29860 + }, + { + "epoch": 0.7296069186231158, + "grad_norm": 18.40993881225586, + "learning_rate": 1.987184116502549e-06, + "loss": 0.2882, + "num_input_tokens_seen": 20118784, + "step": 29865 + }, + { + "epoch": 0.729729069454963, + "grad_norm": 23.006805419921875, + "learning_rate": 1.9871705038478025e-06, + "loss": 0.1965, + "num_input_tokens_seen": 20121600, + "step": 29870 + }, + { + "epoch": 0.7298512202868102, + "grad_norm": 0.6357484459877014, + "learning_rate": 1.9871568840140863e-06, + "loss": 0.1923, + "num_input_tokens_seen": 20124672, + "step": 29875 + }, + { + "epoch": 0.7299733711186573, + "grad_norm": 0.2545313537120819, + "learning_rate": 1.9871432570015e-06, + "loss": 0.0443, + "num_input_tokens_seen": 20128128, + "step": 29880 + }, + { + "epoch": 0.7300955219505045, + "grad_norm": 5.443319320678711, + "learning_rate": 1.9871296228101426e-06, + "loss": 0.1228, + "num_input_tokens_seen": 20131328, + "step": 29885 + }, + { + "epoch": 0.7302176727823516, + "grad_norm": 0.4481469988822937, + "learning_rate": 1.9871159814401127e-06, + "loss": 0.0921, + "num_input_tokens_seen": 20134784, + "step": 29890 + }, + { + "epoch": 0.7303398236141988, + "grad_norm": 17.447044372558594, + "learning_rate": 1.9871023328915102e-06, + "loss": 0.1063, + "num_input_tokens_seen": 20137856, + "step": 29895 + }, + { + "epoch": 0.730461974446046, + "grad_norm": 3.0904958248138428, + "learning_rate": 1.987088677164434e-06, + "loss": 0.0474, + "num_input_tokens_seen": 20141376, + "step": 29900 + }, + { + "epoch": 0.7305841252778932, + "grad_norm": 19.669401168823242, + "learning_rate": 1.9870750142589835e-06, + "loss": 0.076, + "num_input_tokens_seen": 20145408, + "step": 29905 + }, + { + "epoch": 0.7307062761097403, + "grad_norm": 0.0858718529343605, + "learning_rate": 1.987061344175258e-06, + "loss": 0.0828, + "num_input_tokens_seen": 20148416, + "step": 29910 + }, + { + "epoch": 0.7308284269415875, + "grad_norm": 15.151061058044434, + "learning_rate": 1.9870476669133566e-06, + "loss": 0.2602, + "num_input_tokens_seen": 20151552, + "step": 29915 + }, + { + "epoch": 0.7309505777734346, + "grad_norm": 0.39679262042045593, + "learning_rate": 1.987033982473379e-06, + "loss": 0.1584, + "num_input_tokens_seen": 20154752, + "step": 29920 + }, + { + "epoch": 0.7310727286052818, + "grad_norm": 24.496591567993164, + "learning_rate": 1.9870202908554253e-06, + "loss": 0.2126, + "num_input_tokens_seen": 20158080, + "step": 29925 + }, + { + "epoch": 0.731194879437129, + "grad_norm": 16.306676864624023, + "learning_rate": 1.9870065920595942e-06, + "loss": 0.069, + "num_input_tokens_seen": 20161792, + "step": 29930 + }, + { + "epoch": 0.7313170302689761, + "grad_norm": 51.136375427246094, + "learning_rate": 1.986992886085986e-06, + "loss": 0.1502, + "num_input_tokens_seen": 20164800, + "step": 29935 + }, + { + "epoch": 0.7314391811008233, + "grad_norm": 14.362354278564453, + "learning_rate": 1.9869791729347e-06, + "loss": 0.1002, + "num_input_tokens_seen": 20167744, + "step": 29940 + }, + { + "epoch": 0.7315613319326705, + "grad_norm": 24.119476318359375, + "learning_rate": 1.986965452605836e-06, + "loss": 0.1387, + "num_input_tokens_seen": 20171072, + "step": 29945 + }, + { + "epoch": 0.7316834827645177, + "grad_norm": 5.217164516448975, + "learning_rate": 1.9869517250994932e-06, + "loss": 0.0676, + "num_input_tokens_seen": 20174656, + "step": 29950 + }, + { + "epoch": 0.7318056335963647, + "grad_norm": 9.99504566192627, + "learning_rate": 1.9869379904157724e-06, + "loss": 0.1438, + "num_input_tokens_seen": 20177984, + "step": 29955 + }, + { + "epoch": 0.7319277844282119, + "grad_norm": 0.7991713285446167, + "learning_rate": 1.9869242485547734e-06, + "loss": 0.0504, + "num_input_tokens_seen": 20181504, + "step": 29960 + }, + { + "epoch": 0.7320499352600591, + "grad_norm": 16.180362701416016, + "learning_rate": 1.9869104995165957e-06, + "loss": 0.1732, + "num_input_tokens_seen": 20184704, + "step": 29965 + }, + { + "epoch": 0.7321720860919063, + "grad_norm": 10.701885223388672, + "learning_rate": 1.986896743301339e-06, + "loss": 0.0326, + "num_input_tokens_seen": 20188608, + "step": 29970 + }, + { + "epoch": 0.7322942369237535, + "grad_norm": 14.758902549743652, + "learning_rate": 1.986882979909104e-06, + "loss": 0.1004, + "num_input_tokens_seen": 20192384, + "step": 29975 + }, + { + "epoch": 0.7324163877556006, + "grad_norm": 36.978084564208984, + "learning_rate": 1.9868692093399905e-06, + "loss": 0.1361, + "num_input_tokens_seen": 20195264, + "step": 29980 + }, + { + "epoch": 0.7325385385874478, + "grad_norm": 4.0124993324279785, + "learning_rate": 1.986855431594099e-06, + "loss": 0.0881, + "num_input_tokens_seen": 20199232, + "step": 29985 + }, + { + "epoch": 0.732660689419295, + "grad_norm": 1.9948008060455322, + "learning_rate": 1.986841646671529e-06, + "loss": 0.0377, + "num_input_tokens_seen": 20203264, + "step": 29990 + }, + { + "epoch": 0.7327828402511422, + "grad_norm": 0.7837508916854858, + "learning_rate": 1.986827854572381e-06, + "loss": 0.1068, + "num_input_tokens_seen": 20206592, + "step": 29995 + }, + { + "epoch": 0.7329049910829892, + "grad_norm": 13.117432594299316, + "learning_rate": 1.9868140552967555e-06, + "loss": 0.1614, + "num_input_tokens_seen": 20209856, + "step": 30000 + }, + { + "epoch": 0.7330271419148364, + "grad_norm": 37.95238494873047, + "learning_rate": 1.986800248844753e-06, + "loss": 0.0859, + "num_input_tokens_seen": 20213376, + "step": 30005 + }, + { + "epoch": 0.7331492927466836, + "grad_norm": 17.042789459228516, + "learning_rate": 1.9867864352164735e-06, + "loss": 0.1404, + "num_input_tokens_seen": 20216704, + "step": 30010 + }, + { + "epoch": 0.7332714435785308, + "grad_norm": 16.459827423095703, + "learning_rate": 1.9867726144120173e-06, + "loss": 0.1487, + "num_input_tokens_seen": 20219968, + "step": 30015 + }, + { + "epoch": 0.7333935944103779, + "grad_norm": 2.686231851577759, + "learning_rate": 1.9867587864314858e-06, + "loss": 0.0727, + "num_input_tokens_seen": 20223424, + "step": 30020 + }, + { + "epoch": 0.7335157452422251, + "grad_norm": 7.266627788543701, + "learning_rate": 1.9867449512749787e-06, + "loss": 0.095, + "num_input_tokens_seen": 20226688, + "step": 30025 + }, + { + "epoch": 0.7336378960740723, + "grad_norm": 13.3108491897583, + "learning_rate": 1.986731108942597e-06, + "loss": 0.0888, + "num_input_tokens_seen": 20229888, + "step": 30030 + }, + { + "epoch": 0.7337600469059194, + "grad_norm": 0.9927554130554199, + "learning_rate": 1.9867172594344415e-06, + "loss": 0.0408, + "num_input_tokens_seen": 20233344, + "step": 30035 + }, + { + "epoch": 0.7338821977377666, + "grad_norm": 13.185794830322266, + "learning_rate": 1.986703402750612e-06, + "loss": 0.0662, + "num_input_tokens_seen": 20236736, + "step": 30040 + }, + { + "epoch": 0.7340043485696137, + "grad_norm": 0.6203752160072327, + "learning_rate": 1.9866895388912107e-06, + "loss": 0.1591, + "num_input_tokens_seen": 20240128, + "step": 30045 + }, + { + "epoch": 0.7341264994014609, + "grad_norm": 1.171931266784668, + "learning_rate": 1.9866756678563375e-06, + "loss": 0.1238, + "num_input_tokens_seen": 20243072, + "step": 30050 + }, + { + "epoch": 0.7342486502333081, + "grad_norm": 30.23444366455078, + "learning_rate": 1.9866617896460936e-06, + "loss": 0.1967, + "num_input_tokens_seen": 20246400, + "step": 30055 + }, + { + "epoch": 0.7343708010651553, + "grad_norm": 20.61321258544922, + "learning_rate": 1.9866479042605794e-06, + "loss": 0.1251, + "num_input_tokens_seen": 20249792, + "step": 30060 + }, + { + "epoch": 0.7344929518970024, + "grad_norm": 3.040947198867798, + "learning_rate": 1.9866340116998965e-06, + "loss": 0.0821, + "num_input_tokens_seen": 20253376, + "step": 30065 + }, + { + "epoch": 0.7346151027288496, + "grad_norm": 0.0874570682644844, + "learning_rate": 1.986620111964146e-06, + "loss": 0.0891, + "num_input_tokens_seen": 20256832, + "step": 30070 + }, + { + "epoch": 0.7347372535606967, + "grad_norm": 22.917316436767578, + "learning_rate": 1.986606205053428e-06, + "loss": 0.3907, + "num_input_tokens_seen": 20260352, + "step": 30075 + }, + { + "epoch": 0.7348594043925439, + "grad_norm": 0.2937084138393402, + "learning_rate": 1.9865922909678444e-06, + "loss": 0.0612, + "num_input_tokens_seen": 20263616, + "step": 30080 + }, + { + "epoch": 0.7349815552243911, + "grad_norm": 0.6326661705970764, + "learning_rate": 1.9865783697074965e-06, + "loss": 0.1698, + "num_input_tokens_seen": 20266880, + "step": 30085 + }, + { + "epoch": 0.7351037060562382, + "grad_norm": 15.97380256652832, + "learning_rate": 1.9865644412724857e-06, + "loss": 0.0698, + "num_input_tokens_seen": 20270336, + "step": 30090 + }, + { + "epoch": 0.7352258568880854, + "grad_norm": 14.575950622558594, + "learning_rate": 1.9865505056629122e-06, + "loss": 0.1139, + "num_input_tokens_seen": 20273408, + "step": 30095 + }, + { + "epoch": 0.7353480077199326, + "grad_norm": 12.12072467803955, + "learning_rate": 1.9865365628788787e-06, + "loss": 0.193, + "num_input_tokens_seen": 20276736, + "step": 30100 + }, + { + "epoch": 0.7354701585517798, + "grad_norm": 7.838393211364746, + "learning_rate": 1.9865226129204858e-06, + "loss": 0.0898, + "num_input_tokens_seen": 20280256, + "step": 30105 + }, + { + "epoch": 0.7355923093836269, + "grad_norm": 14.239495277404785, + "learning_rate": 1.9865086557878348e-06, + "loss": 0.0696, + "num_input_tokens_seen": 20283456, + "step": 30110 + }, + { + "epoch": 0.735714460215474, + "grad_norm": 13.856727600097656, + "learning_rate": 1.9864946914810278e-06, + "loss": 0.1215, + "num_input_tokens_seen": 20286720, + "step": 30115 + }, + { + "epoch": 0.7358366110473212, + "grad_norm": 0.6099316477775574, + "learning_rate": 1.986480720000166e-06, + "loss": 0.1099, + "num_input_tokens_seen": 20289920, + "step": 30120 + }, + { + "epoch": 0.7359587618791684, + "grad_norm": 25.46120834350586, + "learning_rate": 1.986466741345351e-06, + "loss": 0.1411, + "num_input_tokens_seen": 20293312, + "step": 30125 + }, + { + "epoch": 0.7360809127110156, + "grad_norm": 15.407801628112793, + "learning_rate": 1.9864527555166844e-06, + "loss": 0.196, + "num_input_tokens_seen": 20296576, + "step": 30130 + }, + { + "epoch": 0.7362030635428627, + "grad_norm": 14.528338432312012, + "learning_rate": 1.986438762514269e-06, + "loss": 0.0724, + "num_input_tokens_seen": 20300352, + "step": 30135 + }, + { + "epoch": 0.7363252143747099, + "grad_norm": 10.5165433883667, + "learning_rate": 1.9864247623382046e-06, + "loss": 0.1201, + "num_input_tokens_seen": 20303616, + "step": 30140 + }, + { + "epoch": 0.7364473652065571, + "grad_norm": 0.36507347226142883, + "learning_rate": 1.986410754988594e-06, + "loss": 0.039, + "num_input_tokens_seen": 20306880, + "step": 30145 + }, + { + "epoch": 0.7365695160384043, + "grad_norm": 0.10817314684391022, + "learning_rate": 1.9863967404655397e-06, + "loss": 0.1245, + "num_input_tokens_seen": 20310464, + "step": 30150 + }, + { + "epoch": 0.7366916668702513, + "grad_norm": 26.634794235229492, + "learning_rate": 1.9863827187691423e-06, + "loss": 0.2526, + "num_input_tokens_seen": 20313728, + "step": 30155 + }, + { + "epoch": 0.7368138177020985, + "grad_norm": 94.8357925415039, + "learning_rate": 1.986368689899505e-06, + "loss": 0.0782, + "num_input_tokens_seen": 20316800, + "step": 30160 + }, + { + "epoch": 0.7369359685339457, + "grad_norm": 0.20598088204860687, + "learning_rate": 1.9863546538567292e-06, + "loss": 0.0681, + "num_input_tokens_seen": 20320320, + "step": 30165 + }, + { + "epoch": 0.7370581193657929, + "grad_norm": 1.645045518875122, + "learning_rate": 1.9863406106409165e-06, + "loss": 0.0784, + "num_input_tokens_seen": 20323648, + "step": 30170 + }, + { + "epoch": 0.7371802701976401, + "grad_norm": 0.7895174622535706, + "learning_rate": 1.9863265602521703e-06, + "loss": 0.0968, + "num_input_tokens_seen": 20327104, + "step": 30175 + }, + { + "epoch": 0.7373024210294872, + "grad_norm": 3.127288818359375, + "learning_rate": 1.9863125026905917e-06, + "loss": 0.1154, + "num_input_tokens_seen": 20330432, + "step": 30180 + }, + { + "epoch": 0.7374245718613344, + "grad_norm": 0.4002511203289032, + "learning_rate": 1.9862984379562833e-06, + "loss": 0.218, + "num_input_tokens_seen": 20333696, + "step": 30185 + }, + { + "epoch": 0.7375467226931816, + "grad_norm": 24.047679901123047, + "learning_rate": 1.9862843660493475e-06, + "loss": 0.1431, + "num_input_tokens_seen": 20337088, + "step": 30190 + }, + { + "epoch": 0.7376688735250287, + "grad_norm": 0.9673386216163635, + "learning_rate": 1.9862702869698865e-06, + "loss": 0.1025, + "num_input_tokens_seen": 20340032, + "step": 30195 + }, + { + "epoch": 0.7377910243568758, + "grad_norm": 6.145616054534912, + "learning_rate": 1.986256200718003e-06, + "loss": 0.0184, + "num_input_tokens_seen": 20343296, + "step": 30200 + }, + { + "epoch": 0.737913175188723, + "grad_norm": 20.293169021606445, + "learning_rate": 1.9862421072937986e-06, + "loss": 0.2651, + "num_input_tokens_seen": 20346304, + "step": 30205 + }, + { + "epoch": 0.7380353260205702, + "grad_norm": 25.896528244018555, + "learning_rate": 1.9862280066973765e-06, + "loss": 0.0288, + "num_input_tokens_seen": 20349888, + "step": 30210 + }, + { + "epoch": 0.7381574768524174, + "grad_norm": 0.5437335968017578, + "learning_rate": 1.9862138989288393e-06, + "loss": 0.1911, + "num_input_tokens_seen": 20353408, + "step": 30215 + }, + { + "epoch": 0.7382796276842646, + "grad_norm": 56.40694808959961, + "learning_rate": 1.986199783988289e-06, + "loss": 0.1025, + "num_input_tokens_seen": 20356416, + "step": 30220 + }, + { + "epoch": 0.7384017785161117, + "grad_norm": 0.2323615700006485, + "learning_rate": 1.9861856618758292e-06, + "loss": 0.2797, + "num_input_tokens_seen": 20359360, + "step": 30225 + }, + { + "epoch": 0.7385239293479589, + "grad_norm": 54.13261413574219, + "learning_rate": 1.9861715325915612e-06, + "loss": 0.137, + "num_input_tokens_seen": 20362560, + "step": 30230 + }, + { + "epoch": 0.738646080179806, + "grad_norm": 35.675174713134766, + "learning_rate": 1.986157396135589e-06, + "loss": 0.1808, + "num_input_tokens_seen": 20365760, + "step": 30235 + }, + { + "epoch": 0.7387682310116532, + "grad_norm": 0.4263570308685303, + "learning_rate": 1.986143252508015e-06, + "loss": 0.111, + "num_input_tokens_seen": 20368832, + "step": 30240 + }, + { + "epoch": 0.7388903818435003, + "grad_norm": 0.35891321301460266, + "learning_rate": 1.986129101708942e-06, + "loss": 0.0594, + "num_input_tokens_seen": 20371904, + "step": 30245 + }, + { + "epoch": 0.7390125326753475, + "grad_norm": 17.240028381347656, + "learning_rate": 1.986114943738473e-06, + "loss": 0.1885, + "num_input_tokens_seen": 20375168, + "step": 30250 + }, + { + "epoch": 0.7391346835071947, + "grad_norm": 42.6297721862793, + "learning_rate": 1.986100778596711e-06, + "loss": 0.0938, + "num_input_tokens_seen": 20378496, + "step": 30255 + }, + { + "epoch": 0.7392568343390419, + "grad_norm": 19.804243087768555, + "learning_rate": 1.9860866062837584e-06, + "loss": 0.0634, + "num_input_tokens_seen": 20382080, + "step": 30260 + }, + { + "epoch": 0.7393789851708891, + "grad_norm": 11.977760314941406, + "learning_rate": 1.986072426799719e-06, + "loss": 0.0541, + "num_input_tokens_seen": 20385664, + "step": 30265 + }, + { + "epoch": 0.7395011360027361, + "grad_norm": 1.364763617515564, + "learning_rate": 1.9860582401446957e-06, + "loss": 0.191, + "num_input_tokens_seen": 20388992, + "step": 30270 + }, + { + "epoch": 0.7396232868345833, + "grad_norm": 0.07516565173864365, + "learning_rate": 1.986044046318792e-06, + "loss": 0.0786, + "num_input_tokens_seen": 20392448, + "step": 30275 + }, + { + "epoch": 0.7397454376664305, + "grad_norm": 9.209480285644531, + "learning_rate": 1.986029845322111e-06, + "loss": 0.113, + "num_input_tokens_seen": 20395584, + "step": 30280 + }, + { + "epoch": 0.7398675884982777, + "grad_norm": 19.559255599975586, + "learning_rate": 1.986015637154755e-06, + "loss": 0.142, + "num_input_tokens_seen": 20399040, + "step": 30285 + }, + { + "epoch": 0.7399897393301248, + "grad_norm": 9.277286529541016, + "learning_rate": 1.9860014218168283e-06, + "loss": 0.1325, + "num_input_tokens_seen": 20402496, + "step": 30290 + }, + { + "epoch": 0.740111890161972, + "grad_norm": 0.35132232308387756, + "learning_rate": 1.985987199308434e-06, + "loss": 0.1215, + "num_input_tokens_seen": 20405952, + "step": 30295 + }, + { + "epoch": 0.7402340409938192, + "grad_norm": 13.239752769470215, + "learning_rate": 1.985972969629676e-06, + "loss": 0.1274, + "num_input_tokens_seen": 20409600, + "step": 30300 + }, + { + "epoch": 0.7403561918256664, + "grad_norm": 18.34958839416504, + "learning_rate": 1.985958732780657e-06, + "loss": 0.207, + "num_input_tokens_seen": 20413184, + "step": 30305 + }, + { + "epoch": 0.7404783426575134, + "grad_norm": 25.13006019592285, + "learning_rate": 1.985944488761481e-06, + "loss": 0.1524, + "num_input_tokens_seen": 20416768, + "step": 30310 + }, + { + "epoch": 0.7406004934893606, + "grad_norm": 2.601231336593628, + "learning_rate": 1.9859302375722514e-06, + "loss": 0.1147, + "num_input_tokens_seen": 20420096, + "step": 30315 + }, + { + "epoch": 0.7407226443212078, + "grad_norm": 22.842487335205078, + "learning_rate": 1.985915979213072e-06, + "loss": 0.1366, + "num_input_tokens_seen": 20422912, + "step": 30320 + }, + { + "epoch": 0.740844795153055, + "grad_norm": 9.892926216125488, + "learning_rate": 1.9859017136840465e-06, + "loss": 0.1295, + "num_input_tokens_seen": 20426304, + "step": 30325 + }, + { + "epoch": 0.7409669459849022, + "grad_norm": 2.6345763206481934, + "learning_rate": 1.9858874409852786e-06, + "loss": 0.0794, + "num_input_tokens_seen": 20429696, + "step": 30330 + }, + { + "epoch": 0.7410890968167493, + "grad_norm": 4.601977825164795, + "learning_rate": 1.9858731611168713e-06, + "loss": 0.1226, + "num_input_tokens_seen": 20433024, + "step": 30335 + }, + { + "epoch": 0.7412112476485965, + "grad_norm": 1.0514150857925415, + "learning_rate": 1.9858588740789304e-06, + "loss": 0.0475, + "num_input_tokens_seen": 20436416, + "step": 30340 + }, + { + "epoch": 0.7413333984804437, + "grad_norm": 1.2894636392593384, + "learning_rate": 1.985844579871558e-06, + "loss": 0.0868, + "num_input_tokens_seen": 20439872, + "step": 30345 + }, + { + "epoch": 0.7414555493122909, + "grad_norm": 2.993844747543335, + "learning_rate": 1.9858302784948587e-06, + "loss": 0.0766, + "num_input_tokens_seen": 20443200, + "step": 30350 + }, + { + "epoch": 0.7415777001441379, + "grad_norm": 1.2760772705078125, + "learning_rate": 1.9858159699489364e-06, + "loss": 0.0788, + "num_input_tokens_seen": 20446592, + "step": 30355 + }, + { + "epoch": 0.7416998509759851, + "grad_norm": 16.633935928344727, + "learning_rate": 1.9858016542338954e-06, + "loss": 0.116, + "num_input_tokens_seen": 20449920, + "step": 30360 + }, + { + "epoch": 0.7418220018078323, + "grad_norm": 0.8385209441184998, + "learning_rate": 1.9857873313498394e-06, + "loss": 0.0391, + "num_input_tokens_seen": 20453376, + "step": 30365 + }, + { + "epoch": 0.7419441526396795, + "grad_norm": 20.808696746826172, + "learning_rate": 1.9857730012968727e-06, + "loss": 0.1101, + "num_input_tokens_seen": 20456832, + "step": 30370 + }, + { + "epoch": 0.7420663034715267, + "grad_norm": 3.597788095474243, + "learning_rate": 1.9857586640750997e-06, + "loss": 0.1149, + "num_input_tokens_seen": 20460096, + "step": 30375 + }, + { + "epoch": 0.7421884543033738, + "grad_norm": 7.343143463134766, + "learning_rate": 1.985744319684625e-06, + "loss": 0.0846, + "num_input_tokens_seen": 20463744, + "step": 30380 + }, + { + "epoch": 0.742310605135221, + "grad_norm": 42.95801544189453, + "learning_rate": 1.985729968125552e-06, + "loss": 0.1214, + "num_input_tokens_seen": 20467264, + "step": 30385 + }, + { + "epoch": 0.7424327559670681, + "grad_norm": 15.052309036254883, + "learning_rate": 1.9857156093979857e-06, + "loss": 0.1327, + "num_input_tokens_seen": 20470720, + "step": 30390 + }, + { + "epoch": 0.7425549067989153, + "grad_norm": 45.49873733520508, + "learning_rate": 1.9857012435020303e-06, + "loss": 0.0937, + "num_input_tokens_seen": 20473728, + "step": 30395 + }, + { + "epoch": 0.7426770576307624, + "grad_norm": 11.375425338745117, + "learning_rate": 1.98568687043779e-06, + "loss": 0.0842, + "num_input_tokens_seen": 20476992, + "step": 30400 + }, + { + "epoch": 0.7427992084626096, + "grad_norm": 2.5061888694763184, + "learning_rate": 1.98567249020537e-06, + "loss": 0.1288, + "num_input_tokens_seen": 20480512, + "step": 30405 + }, + { + "epoch": 0.7429213592944568, + "grad_norm": 20.130294799804688, + "learning_rate": 1.9856581028048746e-06, + "loss": 0.1023, + "num_input_tokens_seen": 20483776, + "step": 30410 + }, + { + "epoch": 0.743043510126304, + "grad_norm": 0.7941417694091797, + "learning_rate": 1.9856437082364084e-06, + "loss": 0.01, + "num_input_tokens_seen": 20487168, + "step": 30415 + }, + { + "epoch": 0.7431656609581512, + "grad_norm": 20.287015914916992, + "learning_rate": 1.9856293065000763e-06, + "loss": 0.1419, + "num_input_tokens_seen": 20490432, + "step": 30420 + }, + { + "epoch": 0.7432878117899983, + "grad_norm": 34.13698196411133, + "learning_rate": 1.9856148975959824e-06, + "loss": 0.233, + "num_input_tokens_seen": 20493888, + "step": 30425 + }, + { + "epoch": 0.7434099626218454, + "grad_norm": 17.93484878540039, + "learning_rate": 1.9856004815242317e-06, + "loss": 0.1279, + "num_input_tokens_seen": 20496960, + "step": 30430 + }, + { + "epoch": 0.7435321134536926, + "grad_norm": 8.161019325256348, + "learning_rate": 1.9855860582849293e-06, + "loss": 0.1186, + "num_input_tokens_seen": 20500480, + "step": 30435 + }, + { + "epoch": 0.7436542642855398, + "grad_norm": 1.821930170059204, + "learning_rate": 1.9855716278781802e-06, + "loss": 0.0902, + "num_input_tokens_seen": 20503808, + "step": 30440 + }, + { + "epoch": 0.7437764151173869, + "grad_norm": 1.025917410850525, + "learning_rate": 1.985557190304089e-06, + "loss": 0.0876, + "num_input_tokens_seen": 20507072, + "step": 30445 + }, + { + "epoch": 0.7438985659492341, + "grad_norm": 9.403602600097656, + "learning_rate": 1.985542745562761e-06, + "loss": 0.1953, + "num_input_tokens_seen": 20510336, + "step": 30450 + }, + { + "epoch": 0.7440207167810813, + "grad_norm": 8.631169319152832, + "learning_rate": 1.9855282936543007e-06, + "loss": 0.1771, + "num_input_tokens_seen": 20513472, + "step": 30455 + }, + { + "epoch": 0.7441428676129285, + "grad_norm": 16.620887756347656, + "learning_rate": 1.985513834578814e-06, + "loss": 0.1286, + "num_input_tokens_seen": 20517312, + "step": 30460 + }, + { + "epoch": 0.7442650184447757, + "grad_norm": 0.690407395362854, + "learning_rate": 1.9854993683364056e-06, + "loss": 0.0696, + "num_input_tokens_seen": 20520832, + "step": 30465 + }, + { + "epoch": 0.7443871692766227, + "grad_norm": 8.413174629211426, + "learning_rate": 1.9854848949271804e-06, + "loss": 0.0721, + "num_input_tokens_seen": 20523904, + "step": 30470 + }, + { + "epoch": 0.7445093201084699, + "grad_norm": 19.46962547302246, + "learning_rate": 1.985470414351244e-06, + "loss": 0.0882, + "num_input_tokens_seen": 20527424, + "step": 30475 + }, + { + "epoch": 0.7446314709403171, + "grad_norm": 3.141707420349121, + "learning_rate": 1.985455926608702e-06, + "loss": 0.1156, + "num_input_tokens_seen": 20530816, + "step": 30480 + }, + { + "epoch": 0.7447536217721643, + "grad_norm": 46.364749908447266, + "learning_rate": 1.985441431699659e-06, + "loss": 0.1396, + "num_input_tokens_seen": 20534208, + "step": 30485 + }, + { + "epoch": 0.7448757726040114, + "grad_norm": 5.581932544708252, + "learning_rate": 1.9854269296242216e-06, + "loss": 0.0677, + "num_input_tokens_seen": 20537664, + "step": 30490 + }, + { + "epoch": 0.7449979234358586, + "grad_norm": 21.660884857177734, + "learning_rate": 1.9854124203824936e-06, + "loss": 0.084, + "num_input_tokens_seen": 20540672, + "step": 30495 + }, + { + "epoch": 0.7451200742677058, + "grad_norm": 8.933382034301758, + "learning_rate": 1.985397903974582e-06, + "loss": 0.0468, + "num_input_tokens_seen": 20543808, + "step": 30500 + }, + { + "epoch": 0.745242225099553, + "grad_norm": 0.280394971370697, + "learning_rate": 1.985383380400592e-06, + "loss": 0.196, + "num_input_tokens_seen": 20547200, + "step": 30505 + }, + { + "epoch": 0.7453643759314001, + "grad_norm": 28.417062759399414, + "learning_rate": 1.9853688496606286e-06, + "loss": 0.2718, + "num_input_tokens_seen": 20550720, + "step": 30510 + }, + { + "epoch": 0.7454865267632472, + "grad_norm": 36.462669372558594, + "learning_rate": 1.985354311754798e-06, + "loss": 0.0283, + "num_input_tokens_seen": 20553792, + "step": 30515 + }, + { + "epoch": 0.7456086775950944, + "grad_norm": 29.662124633789062, + "learning_rate": 1.985339766683206e-06, + "loss": 0.0974, + "num_input_tokens_seen": 20557120, + "step": 30520 + }, + { + "epoch": 0.7457308284269416, + "grad_norm": 18.273874282836914, + "learning_rate": 1.985325214445958e-06, + "loss": 0.0844, + "num_input_tokens_seen": 20560256, + "step": 30525 + }, + { + "epoch": 0.7458529792587888, + "grad_norm": 6.227776527404785, + "learning_rate": 1.98531065504316e-06, + "loss": 0.0386, + "num_input_tokens_seen": 20563584, + "step": 30530 + }, + { + "epoch": 0.7459751300906359, + "grad_norm": 13.516763687133789, + "learning_rate": 1.985296088474918e-06, + "loss": 0.2243, + "num_input_tokens_seen": 20566912, + "step": 30535 + }, + { + "epoch": 0.7460972809224831, + "grad_norm": 13.371734619140625, + "learning_rate": 1.9852815147413376e-06, + "loss": 0.2049, + "num_input_tokens_seen": 20570176, + "step": 30540 + }, + { + "epoch": 0.7462194317543303, + "grad_norm": 1.8927969932556152, + "learning_rate": 1.985266933842525e-06, + "loss": 0.0925, + "num_input_tokens_seen": 20573440, + "step": 30545 + }, + { + "epoch": 0.7463415825861774, + "grad_norm": 21.35140037536621, + "learning_rate": 1.9852523457785864e-06, + "loss": 0.1478, + "num_input_tokens_seen": 20576768, + "step": 30550 + }, + { + "epoch": 0.7464637334180245, + "grad_norm": 1.4200108051300049, + "learning_rate": 1.985237750549628e-06, + "loss": 0.077, + "num_input_tokens_seen": 20580416, + "step": 30555 + }, + { + "epoch": 0.7465858842498717, + "grad_norm": 15.096092224121094, + "learning_rate": 1.9852231481557556e-06, + "loss": 0.1403, + "num_input_tokens_seen": 20583488, + "step": 30560 + }, + { + "epoch": 0.7467080350817189, + "grad_norm": 12.31279182434082, + "learning_rate": 1.985208538597075e-06, + "loss": 0.1793, + "num_input_tokens_seen": 20587520, + "step": 30565 + }, + { + "epoch": 0.7468301859135661, + "grad_norm": 1.6888927221298218, + "learning_rate": 1.9851939218736937e-06, + "loss": 0.0606, + "num_input_tokens_seen": 20591232, + "step": 30570 + }, + { + "epoch": 0.7469523367454133, + "grad_norm": 14.612831115722656, + "learning_rate": 1.9851792979857166e-06, + "loss": 0.0999, + "num_input_tokens_seen": 20594816, + "step": 30575 + }, + { + "epoch": 0.7470744875772604, + "grad_norm": 15.661672592163086, + "learning_rate": 1.9851646669332507e-06, + "loss": 0.1943, + "num_input_tokens_seen": 20597888, + "step": 30580 + }, + { + "epoch": 0.7471966384091075, + "grad_norm": 50.38776397705078, + "learning_rate": 1.9851500287164023e-06, + "loss": 0.1239, + "num_input_tokens_seen": 20601472, + "step": 30585 + }, + { + "epoch": 0.7473187892409547, + "grad_norm": 5.186892032623291, + "learning_rate": 1.985135383335278e-06, + "loss": 0.1193, + "num_input_tokens_seen": 20604864, + "step": 30590 + }, + { + "epoch": 0.7474409400728019, + "grad_norm": 9.449992179870605, + "learning_rate": 1.9851207307899847e-06, + "loss": 0.0523, + "num_input_tokens_seen": 20608384, + "step": 30595 + }, + { + "epoch": 0.747563090904649, + "grad_norm": 16.765907287597656, + "learning_rate": 1.985106071080628e-06, + "loss": 0.0525, + "num_input_tokens_seen": 20611712, + "step": 30600 + }, + { + "epoch": 0.7476852417364962, + "grad_norm": 4.915185451507568, + "learning_rate": 1.985091404207315e-06, + "loss": 0.0778, + "num_input_tokens_seen": 20615104, + "step": 30605 + }, + { + "epoch": 0.7478073925683434, + "grad_norm": 10.675619125366211, + "learning_rate": 1.9850767301701523e-06, + "loss": 0.0828, + "num_input_tokens_seen": 20618432, + "step": 30610 + }, + { + "epoch": 0.7479295434001906, + "grad_norm": 9.624067306518555, + "learning_rate": 1.985062048969247e-06, + "loss": 0.1881, + "num_input_tokens_seen": 20621760, + "step": 30615 + }, + { + "epoch": 0.7480516942320378, + "grad_norm": 20.003002166748047, + "learning_rate": 1.985047360604705e-06, + "loss": 0.1091, + "num_input_tokens_seen": 20625280, + "step": 30620 + }, + { + "epoch": 0.7481738450638848, + "grad_norm": 17.2797794342041, + "learning_rate": 1.9850326650766343e-06, + "loss": 0.0786, + "num_input_tokens_seen": 20628608, + "step": 30625 + }, + { + "epoch": 0.748295995895732, + "grad_norm": 2.4404211044311523, + "learning_rate": 1.985017962385141e-06, + "loss": 0.0865, + "num_input_tokens_seen": 20631744, + "step": 30630 + }, + { + "epoch": 0.7484181467275792, + "grad_norm": 4.35370397567749, + "learning_rate": 1.985003252530332e-06, + "loss": 0.0841, + "num_input_tokens_seen": 20635200, + "step": 30635 + }, + { + "epoch": 0.7485402975594264, + "grad_norm": 19.579072952270508, + "learning_rate": 1.984988535512314e-06, + "loss": 0.0835, + "num_input_tokens_seen": 20638912, + "step": 30640 + }, + { + "epoch": 0.7486624483912735, + "grad_norm": 12.48481273651123, + "learning_rate": 1.984973811331195e-06, + "loss": 0.098, + "num_input_tokens_seen": 20642368, + "step": 30645 + }, + { + "epoch": 0.7487845992231207, + "grad_norm": 2.2552871704101562, + "learning_rate": 1.9849590799870813e-06, + "loss": 0.1273, + "num_input_tokens_seen": 20646336, + "step": 30650 + }, + { + "epoch": 0.7489067500549679, + "grad_norm": 17.537343978881836, + "learning_rate": 1.98494434148008e-06, + "loss": 0.0843, + "num_input_tokens_seen": 20649600, + "step": 30655 + }, + { + "epoch": 0.7490289008868151, + "grad_norm": 13.28173542022705, + "learning_rate": 1.984929595810299e-06, + "loss": 0.0638, + "num_input_tokens_seen": 20653184, + "step": 30660 + }, + { + "epoch": 0.7491510517186623, + "grad_norm": 18.33385467529297, + "learning_rate": 1.984914842977845e-06, + "loss": 0.1266, + "num_input_tokens_seen": 20656192, + "step": 30665 + }, + { + "epoch": 0.7492732025505093, + "grad_norm": 29.58997917175293, + "learning_rate": 1.984900082982825e-06, + "loss": 0.2753, + "num_input_tokens_seen": 20659136, + "step": 30670 + }, + { + "epoch": 0.7493953533823565, + "grad_norm": 17.214855194091797, + "learning_rate": 1.9848853158253472e-06, + "loss": 0.061, + "num_input_tokens_seen": 20662400, + "step": 30675 + }, + { + "epoch": 0.7495175042142037, + "grad_norm": 33.4477653503418, + "learning_rate": 1.984870541505518e-06, + "loss": 0.2228, + "num_input_tokens_seen": 20665856, + "step": 30680 + }, + { + "epoch": 0.7496396550460509, + "grad_norm": 38.7508430480957, + "learning_rate": 1.9848557600234453e-06, + "loss": 0.0521, + "num_input_tokens_seen": 20669440, + "step": 30685 + }, + { + "epoch": 0.749761805877898, + "grad_norm": 28.749181747436523, + "learning_rate": 1.984840971379237e-06, + "loss": 0.1166, + "num_input_tokens_seen": 20672640, + "step": 30690 + }, + { + "epoch": 0.7498839567097452, + "grad_norm": 3.055140733718872, + "learning_rate": 1.9848261755730002e-06, + "loss": 0.0697, + "num_input_tokens_seen": 20676160, + "step": 30695 + }, + { + "epoch": 0.7500061075415924, + "grad_norm": 10.008118629455566, + "learning_rate": 1.9848113726048427e-06, + "loss": 0.0459, + "num_input_tokens_seen": 20679424, + "step": 30700 + }, + { + "epoch": 0.7500549678743312, + "eval_loss": 0.1214575320482254, + "eval_runtime": 48.0351, + "eval_samples_per_second": 757.467, + "eval_steps_per_second": 94.702, + "num_input_tokens_seen": 20680640, + "step": 30702 + }, + { + "epoch": 0.7501282583734395, + "grad_norm": 32.10566329956055, + "learning_rate": 1.9847965624748717e-06, + "loss": 0.0963, + "num_input_tokens_seen": 20683008, + "step": 30705 + }, + { + "epoch": 0.7502504092052867, + "grad_norm": 0.3510077893733978, + "learning_rate": 1.9847817451831952e-06, + "loss": 0.0733, + "num_input_tokens_seen": 20686400, + "step": 30710 + }, + { + "epoch": 0.7503725600371338, + "grad_norm": 12.46418285369873, + "learning_rate": 1.9847669207299212e-06, + "loss": 0.1472, + "num_input_tokens_seen": 20690048, + "step": 30715 + }, + { + "epoch": 0.750494710868981, + "grad_norm": 0.30937764048576355, + "learning_rate": 1.984752089115157e-06, + "loss": 0.1717, + "num_input_tokens_seen": 20693824, + "step": 30720 + }, + { + "epoch": 0.7506168617008282, + "grad_norm": 1.3294833898544312, + "learning_rate": 1.9847372503390106e-06, + "loss": 0.0115, + "num_input_tokens_seen": 20697344, + "step": 30725 + }, + { + "epoch": 0.7507390125326754, + "grad_norm": 2.612206220626831, + "learning_rate": 1.984722404401591e-06, + "loss": 0.0988, + "num_input_tokens_seen": 20700992, + "step": 30730 + }, + { + "epoch": 0.7508611633645225, + "grad_norm": 18.258258819580078, + "learning_rate": 1.9847075513030042e-06, + "loss": 0.1538, + "num_input_tokens_seen": 20704384, + "step": 30735 + }, + { + "epoch": 0.7509833141963697, + "grad_norm": 0.13408470153808594, + "learning_rate": 1.9846926910433597e-06, + "loss": 0.0825, + "num_input_tokens_seen": 20707968, + "step": 30740 + }, + { + "epoch": 0.7511054650282168, + "grad_norm": 76.22888946533203, + "learning_rate": 1.984677823622765e-06, + "loss": 0.2069, + "num_input_tokens_seen": 20711680, + "step": 30745 + }, + { + "epoch": 0.751227615860064, + "grad_norm": 17.194808959960938, + "learning_rate": 1.9846629490413284e-06, + "loss": 0.1362, + "num_input_tokens_seen": 20714880, + "step": 30750 + }, + { + "epoch": 0.7513497666919112, + "grad_norm": 12.739709854125977, + "learning_rate": 1.9846480672991576e-06, + "loss": 0.1142, + "num_input_tokens_seen": 20718720, + "step": 30755 + }, + { + "epoch": 0.7514719175237583, + "grad_norm": 0.08701960742473602, + "learning_rate": 1.9846331783963618e-06, + "loss": 0.1044, + "num_input_tokens_seen": 20722112, + "step": 30760 + }, + { + "epoch": 0.7515940683556055, + "grad_norm": 28.54833984375, + "learning_rate": 1.9846182823330483e-06, + "loss": 0.1727, + "num_input_tokens_seen": 20725312, + "step": 30765 + }, + { + "epoch": 0.7517162191874527, + "grad_norm": 32.205657958984375, + "learning_rate": 1.984603379109326e-06, + "loss": 0.0193, + "num_input_tokens_seen": 20729664, + "step": 30770 + }, + { + "epoch": 0.7518383700192999, + "grad_norm": 24.44585418701172, + "learning_rate": 1.984588468725303e-06, + "loss": 0.2256, + "num_input_tokens_seen": 20732992, + "step": 30775 + }, + { + "epoch": 0.751960520851147, + "grad_norm": 18.783945083618164, + "learning_rate": 1.984573551181088e-06, + "loss": 0.1115, + "num_input_tokens_seen": 20736128, + "step": 30780 + }, + { + "epoch": 0.7520826716829941, + "grad_norm": 36.28510665893555, + "learning_rate": 1.984558626476789e-06, + "loss": 0.0376, + "num_input_tokens_seen": 20739456, + "step": 30785 + }, + { + "epoch": 0.7522048225148413, + "grad_norm": 9.365072250366211, + "learning_rate": 1.984543694612515e-06, + "loss": 0.071, + "num_input_tokens_seen": 20742976, + "step": 30790 + }, + { + "epoch": 0.7523269733466885, + "grad_norm": 33.965946197509766, + "learning_rate": 1.9845287555883745e-06, + "loss": 0.0298, + "num_input_tokens_seen": 20746304, + "step": 30795 + }, + { + "epoch": 0.7524491241785357, + "grad_norm": 41.173213958740234, + "learning_rate": 1.984513809404476e-06, + "loss": 0.0712, + "num_input_tokens_seen": 20749568, + "step": 30800 + }, + { + "epoch": 0.7525712750103828, + "grad_norm": 23.996231079101562, + "learning_rate": 1.9844988560609287e-06, + "loss": 0.1266, + "num_input_tokens_seen": 20753024, + "step": 30805 + }, + { + "epoch": 0.75269342584223, + "grad_norm": 19.44346046447754, + "learning_rate": 1.98448389555784e-06, + "loss": 0.0992, + "num_input_tokens_seen": 20756224, + "step": 30810 + }, + { + "epoch": 0.7528155766740772, + "grad_norm": 23.903226852416992, + "learning_rate": 1.9844689278953204e-06, + "loss": 0.1103, + "num_input_tokens_seen": 20759424, + "step": 30815 + }, + { + "epoch": 0.7529377275059244, + "grad_norm": 14.277491569519043, + "learning_rate": 1.984453953073478e-06, + "loss": 0.123, + "num_input_tokens_seen": 20763072, + "step": 30820 + }, + { + "epoch": 0.7530598783377714, + "grad_norm": 17.374441146850586, + "learning_rate": 1.984438971092421e-06, + "loss": 0.0925, + "num_input_tokens_seen": 20766208, + "step": 30825 + }, + { + "epoch": 0.7531820291696186, + "grad_norm": 9.210493087768555, + "learning_rate": 1.9844239819522595e-06, + "loss": 0.1563, + "num_input_tokens_seen": 20770496, + "step": 30830 + }, + { + "epoch": 0.7533041800014658, + "grad_norm": 38.678043365478516, + "learning_rate": 1.984408985653102e-06, + "loss": 0.155, + "num_input_tokens_seen": 20773696, + "step": 30835 + }, + { + "epoch": 0.753426330833313, + "grad_norm": 6.5732622146606445, + "learning_rate": 1.9843939821950577e-06, + "loss": 0.0861, + "num_input_tokens_seen": 20776896, + "step": 30840 + }, + { + "epoch": 0.7535484816651601, + "grad_norm": 26.16443634033203, + "learning_rate": 1.9843789715782356e-06, + "loss": 0.1297, + "num_input_tokens_seen": 20780224, + "step": 30845 + }, + { + "epoch": 0.7536706324970073, + "grad_norm": 10.182201385498047, + "learning_rate": 1.984363953802744e-06, + "loss": 0.0626, + "num_input_tokens_seen": 20783488, + "step": 30850 + }, + { + "epoch": 0.7537927833288545, + "grad_norm": 27.792400360107422, + "learning_rate": 1.984348928868694e-06, + "loss": 0.2264, + "num_input_tokens_seen": 20787136, + "step": 30855 + }, + { + "epoch": 0.7539149341607017, + "grad_norm": 16.452760696411133, + "learning_rate": 1.9843338967761934e-06, + "loss": 0.1147, + "num_input_tokens_seen": 20790912, + "step": 30860 + }, + { + "epoch": 0.7540370849925488, + "grad_norm": 2.219255208969116, + "learning_rate": 1.984318857525352e-06, + "loss": 0.1371, + "num_input_tokens_seen": 20794112, + "step": 30865 + }, + { + "epoch": 0.7541592358243959, + "grad_norm": 0.26177316904067993, + "learning_rate": 1.9843038111162796e-06, + "loss": 0.3025, + "num_input_tokens_seen": 20797184, + "step": 30870 + }, + { + "epoch": 0.7542813866562431, + "grad_norm": 0.38541722297668457, + "learning_rate": 1.9842887575490844e-06, + "loss": 0.0441, + "num_input_tokens_seen": 20800576, + "step": 30875 + }, + { + "epoch": 0.7544035374880903, + "grad_norm": 10.842700004577637, + "learning_rate": 1.9842736968238773e-06, + "loss": 0.1573, + "num_input_tokens_seen": 20803968, + "step": 30880 + }, + { + "epoch": 0.7545256883199375, + "grad_norm": 14.55325698852539, + "learning_rate": 1.9842586289407665e-06, + "loss": 0.1318, + "num_input_tokens_seen": 20807488, + "step": 30885 + }, + { + "epoch": 0.7546478391517846, + "grad_norm": 28.751131057739258, + "learning_rate": 1.9842435538998627e-06, + "loss": 0.1698, + "num_input_tokens_seen": 20811008, + "step": 30890 + }, + { + "epoch": 0.7547699899836318, + "grad_norm": 17.204044342041016, + "learning_rate": 1.9842284717012743e-06, + "loss": 0.1536, + "num_input_tokens_seen": 20814464, + "step": 30895 + }, + { + "epoch": 0.754892140815479, + "grad_norm": 33.994510650634766, + "learning_rate": 1.984213382345112e-06, + "loss": 0.1537, + "num_input_tokens_seen": 20817664, + "step": 30900 + }, + { + "epoch": 0.7550142916473261, + "grad_norm": 16.428762435913086, + "learning_rate": 1.984198285831486e-06, + "loss": 0.1269, + "num_input_tokens_seen": 20821184, + "step": 30905 + }, + { + "epoch": 0.7551364424791733, + "grad_norm": 17.435991287231445, + "learning_rate": 1.9841831821605045e-06, + "loss": 0.1899, + "num_input_tokens_seen": 20825216, + "step": 30910 + }, + { + "epoch": 0.7552585933110204, + "grad_norm": 1.3063730001449585, + "learning_rate": 1.9841680713322786e-06, + "loss": 0.1374, + "num_input_tokens_seen": 20829056, + "step": 30915 + }, + { + "epoch": 0.7553807441428676, + "grad_norm": 2.680938959121704, + "learning_rate": 1.984152953346918e-06, + "loss": 0.1459, + "num_input_tokens_seen": 20832512, + "step": 30920 + }, + { + "epoch": 0.7555028949747148, + "grad_norm": 0.2480306476354599, + "learning_rate": 1.984137828204532e-06, + "loss": 0.1336, + "num_input_tokens_seen": 20835968, + "step": 30925 + }, + { + "epoch": 0.755625045806562, + "grad_norm": 5.193900108337402, + "learning_rate": 1.9841226959052314e-06, + "loss": 0.0356, + "num_input_tokens_seen": 20839680, + "step": 30930 + }, + { + "epoch": 0.7557471966384091, + "grad_norm": 1.7502617835998535, + "learning_rate": 1.9841075564491253e-06, + "loss": 0.1557, + "num_input_tokens_seen": 20843456, + "step": 30935 + }, + { + "epoch": 0.7558693474702562, + "grad_norm": 11.336082458496094, + "learning_rate": 1.984092409836325e-06, + "loss": 0.0466, + "num_input_tokens_seen": 20846976, + "step": 30940 + }, + { + "epoch": 0.7559914983021034, + "grad_norm": 9.426225662231445, + "learning_rate": 1.984077256066939e-06, + "loss": 0.0731, + "num_input_tokens_seen": 20850048, + "step": 30945 + }, + { + "epoch": 0.7561136491339506, + "grad_norm": 24.966283798217773, + "learning_rate": 1.9840620951410797e-06, + "loss": 0.1131, + "num_input_tokens_seen": 20853632, + "step": 30950 + }, + { + "epoch": 0.7562357999657978, + "grad_norm": 0.18268482387065887, + "learning_rate": 1.9840469270588557e-06, + "loss": 0.0763, + "num_input_tokens_seen": 20857024, + "step": 30955 + }, + { + "epoch": 0.7563579507976449, + "grad_norm": 10.175745964050293, + "learning_rate": 1.9840317518203773e-06, + "loss": 0.0524, + "num_input_tokens_seen": 20860416, + "step": 30960 + }, + { + "epoch": 0.7564801016294921, + "grad_norm": 30.9974365234375, + "learning_rate": 1.984016569425756e-06, + "loss": 0.1901, + "num_input_tokens_seen": 20864064, + "step": 30965 + }, + { + "epoch": 0.7566022524613393, + "grad_norm": 22.473278045654297, + "learning_rate": 1.984001379875101e-06, + "loss": 0.1206, + "num_input_tokens_seen": 20867264, + "step": 30970 + }, + { + "epoch": 0.7567244032931865, + "grad_norm": 21.42226791381836, + "learning_rate": 1.9839861831685235e-06, + "loss": 0.0689, + "num_input_tokens_seen": 20870784, + "step": 30975 + }, + { + "epoch": 0.7568465541250335, + "grad_norm": 12.539742469787598, + "learning_rate": 1.983970979306134e-06, + "loss": 0.0657, + "num_input_tokens_seen": 20873856, + "step": 30980 + }, + { + "epoch": 0.7569687049568807, + "grad_norm": 3.040371894836426, + "learning_rate": 1.983955768288043e-06, + "loss": 0.1117, + "num_input_tokens_seen": 20876864, + "step": 30985 + }, + { + "epoch": 0.7570908557887279, + "grad_norm": 20.58167266845703, + "learning_rate": 1.9839405501143606e-06, + "loss": 0.1582, + "num_input_tokens_seen": 20880192, + "step": 30990 + }, + { + "epoch": 0.7572130066205751, + "grad_norm": 21.035499572753906, + "learning_rate": 1.983925324785198e-06, + "loss": 0.202, + "num_input_tokens_seen": 20883584, + "step": 30995 + }, + { + "epoch": 0.7573351574524223, + "grad_norm": 20.17402458190918, + "learning_rate": 1.983910092300666e-06, + "loss": 0.166, + "num_input_tokens_seen": 20886336, + "step": 31000 + }, + { + "epoch": 0.7574573082842694, + "grad_norm": 22.944866180419922, + "learning_rate": 1.983894852660875e-06, + "loss": 0.0651, + "num_input_tokens_seen": 20889728, + "step": 31005 + }, + { + "epoch": 0.7575794591161166, + "grad_norm": 2.4674253463745117, + "learning_rate": 1.983879605865936e-06, + "loss": 0.0693, + "num_input_tokens_seen": 20893184, + "step": 31010 + }, + { + "epoch": 0.7577016099479638, + "grad_norm": 1.2100327014923096, + "learning_rate": 1.9838643519159596e-06, + "loss": 0.1108, + "num_input_tokens_seen": 20896384, + "step": 31015 + }, + { + "epoch": 0.757823760779811, + "grad_norm": 16.186931610107422, + "learning_rate": 1.9838490908110573e-06, + "loss": 0.1034, + "num_input_tokens_seen": 20899776, + "step": 31020 + }, + { + "epoch": 0.757945911611658, + "grad_norm": 29.93137550354004, + "learning_rate": 1.9838338225513397e-06, + "loss": 0.2739, + "num_input_tokens_seen": 20902848, + "step": 31025 + }, + { + "epoch": 0.7580680624435052, + "grad_norm": 12.030484199523926, + "learning_rate": 1.9838185471369182e-06, + "loss": 0.1084, + "num_input_tokens_seen": 20906368, + "step": 31030 + }, + { + "epoch": 0.7581902132753524, + "grad_norm": 0.8099299669265747, + "learning_rate": 1.9838032645679033e-06, + "loss": 0.1732, + "num_input_tokens_seen": 20909952, + "step": 31035 + }, + { + "epoch": 0.7583123641071996, + "grad_norm": 0.6834255456924438, + "learning_rate": 1.9837879748444065e-06, + "loss": 0.0867, + "num_input_tokens_seen": 20913152, + "step": 31040 + }, + { + "epoch": 0.7584345149390468, + "grad_norm": 18.76382064819336, + "learning_rate": 1.983772677966539e-06, + "loss": 0.1665, + "num_input_tokens_seen": 20916608, + "step": 31045 + }, + { + "epoch": 0.7585566657708939, + "grad_norm": 25.39693832397461, + "learning_rate": 1.983757373934412e-06, + "loss": 0.1466, + "num_input_tokens_seen": 20920512, + "step": 31050 + }, + { + "epoch": 0.7586788166027411, + "grad_norm": 1.737460970878601, + "learning_rate": 1.983742062748137e-06, + "loss": 0.0726, + "num_input_tokens_seen": 20924032, + "step": 31055 + }, + { + "epoch": 0.7588009674345882, + "grad_norm": 13.280280113220215, + "learning_rate": 1.9837267444078245e-06, + "loss": 0.0878, + "num_input_tokens_seen": 20927424, + "step": 31060 + }, + { + "epoch": 0.7589231182664354, + "grad_norm": 10.46796989440918, + "learning_rate": 1.9837114189135867e-06, + "loss": 0.0989, + "num_input_tokens_seen": 20931136, + "step": 31065 + }, + { + "epoch": 0.7590452690982825, + "grad_norm": 26.009592056274414, + "learning_rate": 1.9836960862655352e-06, + "loss": 0.1624, + "num_input_tokens_seen": 20935040, + "step": 31070 + }, + { + "epoch": 0.7591674199301297, + "grad_norm": 17.237714767456055, + "learning_rate": 1.9836807464637814e-06, + "loss": 0.0539, + "num_input_tokens_seen": 20938304, + "step": 31075 + }, + { + "epoch": 0.7592895707619769, + "grad_norm": 33.32792282104492, + "learning_rate": 1.983665399508436e-06, + "loss": 0.1256, + "num_input_tokens_seen": 20941568, + "step": 31080 + }, + { + "epoch": 0.7594117215938241, + "grad_norm": 44.93278121948242, + "learning_rate": 1.9836500453996116e-06, + "loss": 0.1151, + "num_input_tokens_seen": 20944896, + "step": 31085 + }, + { + "epoch": 0.7595338724256712, + "grad_norm": 10.59104061126709, + "learning_rate": 1.9836346841374192e-06, + "loss": 0.0971, + "num_input_tokens_seen": 20948160, + "step": 31090 + }, + { + "epoch": 0.7596560232575184, + "grad_norm": 38.38275146484375, + "learning_rate": 1.9836193157219713e-06, + "loss": 0.2196, + "num_input_tokens_seen": 20951360, + "step": 31095 + }, + { + "epoch": 0.7597781740893655, + "grad_norm": 17.22246742248535, + "learning_rate": 1.983603940153379e-06, + "loss": 0.1416, + "num_input_tokens_seen": 20954752, + "step": 31100 + }, + { + "epoch": 0.7599003249212127, + "grad_norm": 8.937365531921387, + "learning_rate": 1.983588557431754e-06, + "loss": 0.0766, + "num_input_tokens_seen": 20957952, + "step": 31105 + }, + { + "epoch": 0.7600224757530599, + "grad_norm": 2.2201955318450928, + "learning_rate": 1.983573167557209e-06, + "loss": 0.1191, + "num_input_tokens_seen": 20961152, + "step": 31110 + }, + { + "epoch": 0.760144626584907, + "grad_norm": 2.811094045639038, + "learning_rate": 1.9835577705298545e-06, + "loss": 0.1919, + "num_input_tokens_seen": 20964224, + "step": 31115 + }, + { + "epoch": 0.7602667774167542, + "grad_norm": 0.6476378440856934, + "learning_rate": 1.983542366349804e-06, + "loss": 0.0693, + "num_input_tokens_seen": 20967552, + "step": 31120 + }, + { + "epoch": 0.7603889282486014, + "grad_norm": 1.8725968599319458, + "learning_rate": 1.9835269550171687e-06, + "loss": 0.1099, + "num_input_tokens_seen": 20970496, + "step": 31125 + }, + { + "epoch": 0.7605110790804486, + "grad_norm": 5.410181999206543, + "learning_rate": 1.983511536532061e-06, + "loss": 0.1083, + "num_input_tokens_seen": 20973632, + "step": 31130 + }, + { + "epoch": 0.7606332299122956, + "grad_norm": 16.716602325439453, + "learning_rate": 1.983496110894593e-06, + "loss": 0.1891, + "num_input_tokens_seen": 20977152, + "step": 31135 + }, + { + "epoch": 0.7607553807441428, + "grad_norm": 0.11880694329738617, + "learning_rate": 1.9834806781048764e-06, + "loss": 0.0849, + "num_input_tokens_seen": 20980736, + "step": 31140 + }, + { + "epoch": 0.76087753157599, + "grad_norm": 2.018104314804077, + "learning_rate": 1.983465238163024e-06, + "loss": 0.0771, + "num_input_tokens_seen": 20983936, + "step": 31145 + }, + { + "epoch": 0.7609996824078372, + "grad_norm": 0.5362045168876648, + "learning_rate": 1.9834497910691478e-06, + "loss": 0.044, + "num_input_tokens_seen": 20988224, + "step": 31150 + }, + { + "epoch": 0.7611218332396844, + "grad_norm": 32.71379852294922, + "learning_rate": 1.98343433682336e-06, + "loss": 0.0602, + "num_input_tokens_seen": 20991552, + "step": 31155 + }, + { + "epoch": 0.7612439840715315, + "grad_norm": 21.509185791015625, + "learning_rate": 1.9834188754257733e-06, + "loss": 0.2165, + "num_input_tokens_seen": 20994688, + "step": 31160 + }, + { + "epoch": 0.7613661349033787, + "grad_norm": 37.06200408935547, + "learning_rate": 1.9834034068765e-06, + "loss": 0.0566, + "num_input_tokens_seen": 20997952, + "step": 31165 + }, + { + "epoch": 0.7614882857352259, + "grad_norm": 5.213249683380127, + "learning_rate": 1.983387931175653e-06, + "loss": 0.1226, + "num_input_tokens_seen": 21001216, + "step": 31170 + }, + { + "epoch": 0.7616104365670731, + "grad_norm": 10.156071662902832, + "learning_rate": 1.983372448323344e-06, + "loss": 0.0843, + "num_input_tokens_seen": 21004736, + "step": 31175 + }, + { + "epoch": 0.7617325873989201, + "grad_norm": 18.279315948486328, + "learning_rate": 1.983356958319686e-06, + "loss": 0.0738, + "num_input_tokens_seen": 21008064, + "step": 31180 + }, + { + "epoch": 0.7618547382307673, + "grad_norm": 3.7977609634399414, + "learning_rate": 1.9833414611647925e-06, + "loss": 0.044, + "num_input_tokens_seen": 21011136, + "step": 31185 + }, + { + "epoch": 0.7619768890626145, + "grad_norm": 21.37438201904297, + "learning_rate": 1.9833259568587744e-06, + "loss": 0.1582, + "num_input_tokens_seen": 21014400, + "step": 31190 + }, + { + "epoch": 0.7620990398944617, + "grad_norm": 14.26490306854248, + "learning_rate": 1.983310445401746e-06, + "loss": 0.0659, + "num_input_tokens_seen": 21018112, + "step": 31195 + }, + { + "epoch": 0.7622211907263089, + "grad_norm": 36.653236389160156, + "learning_rate": 1.9832949267938195e-06, + "loss": 0.0602, + "num_input_tokens_seen": 21021120, + "step": 31200 + }, + { + "epoch": 0.762343341558156, + "grad_norm": 1.8205872774124146, + "learning_rate": 1.9832794010351077e-06, + "loss": 0.1516, + "num_input_tokens_seen": 21024384, + "step": 31205 + }, + { + "epoch": 0.7624654923900032, + "grad_norm": 7.122799873352051, + "learning_rate": 1.9832638681257238e-06, + "loss": 0.1687, + "num_input_tokens_seen": 21027328, + "step": 31210 + }, + { + "epoch": 0.7625876432218504, + "grad_norm": 62.234737396240234, + "learning_rate": 1.9832483280657805e-06, + "loss": 0.1582, + "num_input_tokens_seen": 21030912, + "step": 31215 + }, + { + "epoch": 0.7627097940536975, + "grad_norm": 0.24601157009601593, + "learning_rate": 1.983232780855391e-06, + "loss": 0.0989, + "num_input_tokens_seen": 21034688, + "step": 31220 + }, + { + "epoch": 0.7628319448855446, + "grad_norm": 28.595394134521484, + "learning_rate": 1.983217226494668e-06, + "loss": 0.1562, + "num_input_tokens_seen": 21037760, + "step": 31225 + }, + { + "epoch": 0.7629540957173918, + "grad_norm": 10.716483116149902, + "learning_rate": 1.983201664983725e-06, + "loss": 0.1243, + "num_input_tokens_seen": 21040896, + "step": 31230 + }, + { + "epoch": 0.763076246549239, + "grad_norm": 0.8800622820854187, + "learning_rate": 1.9831860963226754e-06, + "loss": 0.1733, + "num_input_tokens_seen": 21044864, + "step": 31235 + }, + { + "epoch": 0.7631983973810862, + "grad_norm": 13.254878044128418, + "learning_rate": 1.9831705205116317e-06, + "loss": 0.1372, + "num_input_tokens_seen": 21048128, + "step": 31240 + }, + { + "epoch": 0.7633205482129334, + "grad_norm": 30.35858917236328, + "learning_rate": 1.9831549375507076e-06, + "loss": 0.1415, + "num_input_tokens_seen": 21051968, + "step": 31245 + }, + { + "epoch": 0.7634426990447805, + "grad_norm": 1.8185697793960571, + "learning_rate": 1.983139347440016e-06, + "loss": 0.0339, + "num_input_tokens_seen": 21055296, + "step": 31250 + }, + { + "epoch": 0.7635648498766276, + "grad_norm": 0.16570863127708435, + "learning_rate": 1.983123750179671e-06, + "loss": 0.04, + "num_input_tokens_seen": 21058432, + "step": 31255 + }, + { + "epoch": 0.7636870007084748, + "grad_norm": 52.01047897338867, + "learning_rate": 1.9831081457697856e-06, + "loss": 0.0793, + "num_input_tokens_seen": 21061760, + "step": 31260 + }, + { + "epoch": 0.763809151540322, + "grad_norm": 22.145647048950195, + "learning_rate": 1.9830925342104736e-06, + "loss": 0.1415, + "num_input_tokens_seen": 21065088, + "step": 31265 + }, + { + "epoch": 0.7639313023721691, + "grad_norm": 11.057469367980957, + "learning_rate": 1.983076915501848e-06, + "loss": 0.1292, + "num_input_tokens_seen": 21068864, + "step": 31270 + }, + { + "epoch": 0.7640534532040163, + "grad_norm": 51.785011291503906, + "learning_rate": 1.9830612896440226e-06, + "loss": 0.1785, + "num_input_tokens_seen": 21072192, + "step": 31275 + }, + { + "epoch": 0.7641756040358635, + "grad_norm": 43.112247467041016, + "learning_rate": 1.983045656637111e-06, + "loss": 0.158, + "num_input_tokens_seen": 21075072, + "step": 31280 + }, + { + "epoch": 0.7642977548677107, + "grad_norm": 32.858177185058594, + "learning_rate": 1.9830300164812273e-06, + "loss": 0.1041, + "num_input_tokens_seen": 21079232, + "step": 31285 + }, + { + "epoch": 0.7644199056995579, + "grad_norm": 0.2882313132286072, + "learning_rate": 1.9830143691764846e-06, + "loss": 0.0476, + "num_input_tokens_seen": 21082560, + "step": 31290 + }, + { + "epoch": 0.764542056531405, + "grad_norm": 1.6282767057418823, + "learning_rate": 1.9829987147229974e-06, + "loss": 0.0529, + "num_input_tokens_seen": 21086144, + "step": 31295 + }, + { + "epoch": 0.7646642073632521, + "grad_norm": 21.46419906616211, + "learning_rate": 1.982983053120879e-06, + "loss": 0.2246, + "num_input_tokens_seen": 21089600, + "step": 31300 + }, + { + "epoch": 0.7647863581950993, + "grad_norm": 31.539697647094727, + "learning_rate": 1.9829673843702434e-06, + "loss": 0.0652, + "num_input_tokens_seen": 21093056, + "step": 31305 + }, + { + "epoch": 0.7649085090269465, + "grad_norm": 12.277565956115723, + "learning_rate": 1.9829517084712045e-06, + "loss": 0.172, + "num_input_tokens_seen": 21096256, + "step": 31310 + }, + { + "epoch": 0.7650306598587936, + "grad_norm": 21.35080337524414, + "learning_rate": 1.9829360254238767e-06, + "loss": 0.1213, + "num_input_tokens_seen": 21099520, + "step": 31315 + }, + { + "epoch": 0.7651528106906408, + "grad_norm": 19.8055419921875, + "learning_rate": 1.9829203352283735e-06, + "loss": 0.1974, + "num_input_tokens_seen": 21102592, + "step": 31320 + }, + { + "epoch": 0.765274961522488, + "grad_norm": 9.7948637008667, + "learning_rate": 1.982904637884809e-06, + "loss": 0.1138, + "num_input_tokens_seen": 21105920, + "step": 31325 + }, + { + "epoch": 0.7653971123543352, + "grad_norm": 28.4554443359375, + "learning_rate": 1.982888933393298e-06, + "loss": 0.1358, + "num_input_tokens_seen": 21109504, + "step": 31330 + }, + { + "epoch": 0.7655192631861824, + "grad_norm": 0.3278570771217346, + "learning_rate": 1.982873221753954e-06, + "loss": 0.1178, + "num_input_tokens_seen": 21112576, + "step": 31335 + }, + { + "epoch": 0.7656414140180294, + "grad_norm": 14.416967391967773, + "learning_rate": 1.982857502966892e-06, + "loss": 0.1292, + "num_input_tokens_seen": 21115840, + "step": 31340 + }, + { + "epoch": 0.7657635648498766, + "grad_norm": 13.378105163574219, + "learning_rate": 1.9828417770322255e-06, + "loss": 0.1554, + "num_input_tokens_seen": 21119296, + "step": 31345 + }, + { + "epoch": 0.7658857156817238, + "grad_norm": 1.7292650938034058, + "learning_rate": 1.9828260439500694e-06, + "loss": 0.0441, + "num_input_tokens_seen": 21122944, + "step": 31350 + }, + { + "epoch": 0.766007866513571, + "grad_norm": 10.598871231079102, + "learning_rate": 1.9828103037205376e-06, + "loss": 0.1804, + "num_input_tokens_seen": 21126016, + "step": 31355 + }, + { + "epoch": 0.7661300173454181, + "grad_norm": 15.65273666381836, + "learning_rate": 1.9827945563437455e-06, + "loss": 0.1402, + "num_input_tokens_seen": 21129344, + "step": 31360 + }, + { + "epoch": 0.7662521681772653, + "grad_norm": 0.42412981390953064, + "learning_rate": 1.9827788018198067e-06, + "loss": 0.0889, + "num_input_tokens_seen": 21132288, + "step": 31365 + }, + { + "epoch": 0.7663743190091125, + "grad_norm": 10.7468843460083, + "learning_rate": 1.9827630401488365e-06, + "loss": 0.093, + "num_input_tokens_seen": 21135616, + "step": 31370 + }, + { + "epoch": 0.7664964698409596, + "grad_norm": 23.37337303161621, + "learning_rate": 1.9827472713309486e-06, + "loss": 0.1495, + "num_input_tokens_seen": 21138816, + "step": 31375 + }, + { + "epoch": 0.7666186206728067, + "grad_norm": 7.535536766052246, + "learning_rate": 1.9827314953662584e-06, + "loss": 0.1836, + "num_input_tokens_seen": 21141952, + "step": 31380 + }, + { + "epoch": 0.7667407715046539, + "grad_norm": 0.7598505616188049, + "learning_rate": 1.9827157122548806e-06, + "loss": 0.0087, + "num_input_tokens_seen": 21145920, + "step": 31385 + }, + { + "epoch": 0.7668629223365011, + "grad_norm": 22.626522064208984, + "learning_rate": 1.98269992199693e-06, + "loss": 0.1052, + "num_input_tokens_seen": 21149120, + "step": 31390 + }, + { + "epoch": 0.7669850731683483, + "grad_norm": 37.745269775390625, + "learning_rate": 1.982684124592521e-06, + "loss": 0.119, + "num_input_tokens_seen": 21152128, + "step": 31395 + }, + { + "epoch": 0.7671072240001955, + "grad_norm": 3.7507152557373047, + "learning_rate": 1.9826683200417684e-06, + "loss": 0.0412, + "num_input_tokens_seen": 21155520, + "step": 31400 + }, + { + "epoch": 0.7672293748320426, + "grad_norm": 29.963966369628906, + "learning_rate": 1.982652508344788e-06, + "loss": 0.2221, + "num_input_tokens_seen": 21159040, + "step": 31405 + }, + { + "epoch": 0.7673515256638898, + "grad_norm": 8.20248794555664, + "learning_rate": 1.982636689501694e-06, + "loss": 0.1157, + "num_input_tokens_seen": 21162432, + "step": 31410 + }, + { + "epoch": 0.767473676495737, + "grad_norm": 12.889708518981934, + "learning_rate": 1.9826208635126017e-06, + "loss": 0.0612, + "num_input_tokens_seen": 21166208, + "step": 31415 + }, + { + "epoch": 0.7675958273275841, + "grad_norm": 0.2748701870441437, + "learning_rate": 1.9826050303776265e-06, + "loss": 0.0213, + "num_input_tokens_seen": 21169856, + "step": 31420 + }, + { + "epoch": 0.7677179781594312, + "grad_norm": 10.752982139587402, + "learning_rate": 1.982589190096883e-06, + "loss": 0.1161, + "num_input_tokens_seen": 21173504, + "step": 31425 + }, + { + "epoch": 0.7678401289912784, + "grad_norm": 4.767438888549805, + "learning_rate": 1.9825733426704867e-06, + "loss": 0.0755, + "num_input_tokens_seen": 21177088, + "step": 31430 + }, + { + "epoch": 0.7679622798231256, + "grad_norm": 18.476245880126953, + "learning_rate": 1.9825574880985525e-06, + "loss": 0.1222, + "num_input_tokens_seen": 21180544, + "step": 31435 + }, + { + "epoch": 0.7680844306549728, + "grad_norm": 28.4121036529541, + "learning_rate": 1.982541626381196e-06, + "loss": 0.08, + "num_input_tokens_seen": 21183616, + "step": 31440 + }, + { + "epoch": 0.76820658148682, + "grad_norm": 0.08580884337425232, + "learning_rate": 1.9825257575185326e-06, + "loss": 0.0965, + "num_input_tokens_seen": 21187456, + "step": 31445 + }, + { + "epoch": 0.768328732318667, + "grad_norm": 12.523801803588867, + "learning_rate": 1.9825098815106777e-06, + "loss": 0.0524, + "num_input_tokens_seen": 21190656, + "step": 31450 + }, + { + "epoch": 0.7684508831505142, + "grad_norm": 21.91022300720215, + "learning_rate": 1.982493998357747e-06, + "loss": 0.1822, + "num_input_tokens_seen": 21193920, + "step": 31455 + }, + { + "epoch": 0.7685730339823614, + "grad_norm": 14.95090103149414, + "learning_rate": 1.982478108059855e-06, + "loss": 0.0697, + "num_input_tokens_seen": 21196864, + "step": 31460 + }, + { + "epoch": 0.7686951848142086, + "grad_norm": 17.197738647460938, + "learning_rate": 1.982462210617118e-06, + "loss": 0.1942, + "num_input_tokens_seen": 21200064, + "step": 31465 + }, + { + "epoch": 0.7688173356460557, + "grad_norm": 18.8331241607666, + "learning_rate": 1.982446306029652e-06, + "loss": 0.0911, + "num_input_tokens_seen": 21203456, + "step": 31470 + }, + { + "epoch": 0.7689394864779029, + "grad_norm": 54.55223846435547, + "learning_rate": 1.982430394297572e-06, + "loss": 0.1893, + "num_input_tokens_seen": 21206464, + "step": 31475 + }, + { + "epoch": 0.7690616373097501, + "grad_norm": 0.18907591700553894, + "learning_rate": 1.9824144754209944e-06, + "loss": 0.0927, + "num_input_tokens_seen": 21209856, + "step": 31480 + }, + { + "epoch": 0.7691837881415973, + "grad_norm": 21.11206817626953, + "learning_rate": 1.982398549400034e-06, + "loss": 0.1205, + "num_input_tokens_seen": 21212672, + "step": 31485 + }, + { + "epoch": 0.7693059389734445, + "grad_norm": 60.83143997192383, + "learning_rate": 1.982382616234807e-06, + "loss": 0.0765, + "num_input_tokens_seen": 21216448, + "step": 31490 + }, + { + "epoch": 0.7694280898052915, + "grad_norm": 14.315446853637695, + "learning_rate": 1.98236667592543e-06, + "loss": 0.1077, + "num_input_tokens_seen": 21219392, + "step": 31495 + }, + { + "epoch": 0.7695502406371387, + "grad_norm": 6.813923358917236, + "learning_rate": 1.9823507284720174e-06, + "loss": 0.0534, + "num_input_tokens_seen": 21222464, + "step": 31500 + }, + { + "epoch": 0.7696723914689859, + "grad_norm": 30.06726837158203, + "learning_rate": 1.9823347738746868e-06, + "loss": 0.2408, + "num_input_tokens_seen": 21225600, + "step": 31505 + }, + { + "epoch": 0.7697945423008331, + "grad_norm": 0.7989888787269592, + "learning_rate": 1.9823188121335535e-06, + "loss": 0.1162, + "num_input_tokens_seen": 21228928, + "step": 31510 + }, + { + "epoch": 0.7699166931326802, + "grad_norm": 6.25483512878418, + "learning_rate": 1.9823028432487332e-06, + "loss": 0.1069, + "num_input_tokens_seen": 21232576, + "step": 31515 + }, + { + "epoch": 0.7700388439645274, + "grad_norm": 0.3138057589530945, + "learning_rate": 1.982286867220343e-06, + "loss": 0.1632, + "num_input_tokens_seen": 21236032, + "step": 31520 + }, + { + "epoch": 0.7701609947963746, + "grad_norm": 27.970476150512695, + "learning_rate": 1.9822708840484976e-06, + "loss": 0.1045, + "num_input_tokens_seen": 21239744, + "step": 31525 + }, + { + "epoch": 0.7702831456282218, + "grad_norm": 46.2231559753418, + "learning_rate": 1.9822548937333148e-06, + "loss": 0.1838, + "num_input_tokens_seen": 21243008, + "step": 31530 + }, + { + "epoch": 0.770405296460069, + "grad_norm": 29.226469039916992, + "learning_rate": 1.98223889627491e-06, + "loss": 0.0838, + "num_input_tokens_seen": 21246272, + "step": 31535 + }, + { + "epoch": 0.770527447291916, + "grad_norm": 15.603208541870117, + "learning_rate": 1.9822228916733996e-06, + "loss": 0.1898, + "num_input_tokens_seen": 21249600, + "step": 31540 + }, + { + "epoch": 0.7706495981237632, + "grad_norm": 12.144451141357422, + "learning_rate": 1.9822068799289003e-06, + "loss": 0.1083, + "num_input_tokens_seen": 21252992, + "step": 31545 + }, + { + "epoch": 0.7707717489556104, + "grad_norm": 3.6689765453338623, + "learning_rate": 1.982190861041529e-06, + "loss": 0.0789, + "num_input_tokens_seen": 21256448, + "step": 31550 + }, + { + "epoch": 0.7708938997874576, + "grad_norm": 12.056143760681152, + "learning_rate": 1.9821748350114004e-06, + "loss": 0.0737, + "num_input_tokens_seen": 21259392, + "step": 31555 + }, + { + "epoch": 0.7710160506193047, + "grad_norm": 25.346725463867188, + "learning_rate": 1.982158801838633e-06, + "loss": 0.0581, + "num_input_tokens_seen": 21262848, + "step": 31560 + }, + { + "epoch": 0.7711382014511519, + "grad_norm": 7.405025005340576, + "learning_rate": 1.9821427615233427e-06, + "loss": 0.1367, + "num_input_tokens_seen": 21266048, + "step": 31565 + }, + { + "epoch": 0.771260352282999, + "grad_norm": 0.4394875168800354, + "learning_rate": 1.9821267140656457e-06, + "loss": 0.1666, + "num_input_tokens_seen": 21269120, + "step": 31570 + }, + { + "epoch": 0.7713825031148462, + "grad_norm": 30.82460594177246, + "learning_rate": 1.982110659465659e-06, + "loss": 0.1126, + "num_input_tokens_seen": 21272320, + "step": 31575 + }, + { + "epoch": 0.7715046539466934, + "grad_norm": 8.263945579528809, + "learning_rate": 1.9820945977235e-06, + "loss": 0.164, + "num_input_tokens_seen": 21275328, + "step": 31580 + }, + { + "epoch": 0.7716268047785405, + "grad_norm": 17.32110595703125, + "learning_rate": 1.9820785288392844e-06, + "loss": 0.0813, + "num_input_tokens_seen": 21278720, + "step": 31585 + }, + { + "epoch": 0.7717489556103877, + "grad_norm": 4.869380474090576, + "learning_rate": 1.98206245281313e-06, + "loss": 0.0579, + "num_input_tokens_seen": 21282048, + "step": 31590 + }, + { + "epoch": 0.7718711064422349, + "grad_norm": 0.19078025221824646, + "learning_rate": 1.982046369645153e-06, + "loss": 0.1305, + "num_input_tokens_seen": 21285440, + "step": 31595 + }, + { + "epoch": 0.7719932572740821, + "grad_norm": 24.013418197631836, + "learning_rate": 1.9820302793354704e-06, + "loss": 0.1274, + "num_input_tokens_seen": 21288448, + "step": 31600 + }, + { + "epoch": 0.7721154081059292, + "grad_norm": 0.5197308659553528, + "learning_rate": 1.9820141818842e-06, + "loss": 0.1899, + "num_input_tokens_seen": 21291776, + "step": 31605 + }, + { + "epoch": 0.7722375589377763, + "grad_norm": 12.339673042297363, + "learning_rate": 1.981998077291458e-06, + "loss": 0.1039, + "num_input_tokens_seen": 21295360, + "step": 31610 + }, + { + "epoch": 0.7723597097696235, + "grad_norm": 17.625242233276367, + "learning_rate": 1.981981965557362e-06, + "loss": 0.1433, + "num_input_tokens_seen": 21298240, + "step": 31615 + }, + { + "epoch": 0.7724818606014707, + "grad_norm": 0.9017745852470398, + "learning_rate": 1.981965846682029e-06, + "loss": 0.0809, + "num_input_tokens_seen": 21301568, + "step": 31620 + }, + { + "epoch": 0.7726040114333178, + "grad_norm": 1.5347234010696411, + "learning_rate": 1.981949720665576e-06, + "loss": 0.1297, + "num_input_tokens_seen": 21305024, + "step": 31625 + }, + { + "epoch": 0.772726162265165, + "grad_norm": 1.6608084440231323, + "learning_rate": 1.981933587508121e-06, + "loss": 0.0577, + "num_input_tokens_seen": 21308224, + "step": 31630 + }, + { + "epoch": 0.7728483130970122, + "grad_norm": 12.739276885986328, + "learning_rate": 1.9819174472097807e-06, + "loss": 0.171, + "num_input_tokens_seen": 21311552, + "step": 31635 + }, + { + "epoch": 0.7729704639288594, + "grad_norm": 19.08176612854004, + "learning_rate": 1.9819012997706727e-06, + "loss": 0.0992, + "num_input_tokens_seen": 21314688, + "step": 31640 + }, + { + "epoch": 0.7730926147607066, + "grad_norm": 2.2028987407684326, + "learning_rate": 1.981885145190914e-06, + "loss": 0.1189, + "num_input_tokens_seen": 21317952, + "step": 31645 + }, + { + "epoch": 0.7732147655925536, + "grad_norm": 0.6936241388320923, + "learning_rate": 1.981868983470623e-06, + "loss": 0.0643, + "num_input_tokens_seen": 21321024, + "step": 31650 + }, + { + "epoch": 0.7733369164244008, + "grad_norm": 0.8882758021354675, + "learning_rate": 1.981852814609916e-06, + "loss": 0.1486, + "num_input_tokens_seen": 21324288, + "step": 31655 + }, + { + "epoch": 0.773459067256248, + "grad_norm": 27.76462745666504, + "learning_rate": 1.981836638608911e-06, + "loss": 0.1112, + "num_input_tokens_seen": 21327808, + "step": 31660 + }, + { + "epoch": 0.7735812180880952, + "grad_norm": 20.353557586669922, + "learning_rate": 1.981820455467727e-06, + "loss": 0.1939, + "num_input_tokens_seen": 21330944, + "step": 31665 + }, + { + "epoch": 0.7737033689199423, + "grad_norm": 1.4390501976013184, + "learning_rate": 1.9818042651864797e-06, + "loss": 0.1189, + "num_input_tokens_seen": 21334208, + "step": 31670 + }, + { + "epoch": 0.7738255197517895, + "grad_norm": 6.415053844451904, + "learning_rate": 1.981788067765288e-06, + "loss": 0.0309, + "num_input_tokens_seen": 21337088, + "step": 31675 + }, + { + "epoch": 0.7739476705836367, + "grad_norm": 1.232407808303833, + "learning_rate": 1.9817718632042695e-06, + "loss": 0.1, + "num_input_tokens_seen": 21340608, + "step": 31680 + }, + { + "epoch": 0.7740698214154839, + "grad_norm": 0.8504520654678345, + "learning_rate": 1.981755651503542e-06, + "loss": 0.0482, + "num_input_tokens_seen": 21343808, + "step": 31685 + }, + { + "epoch": 0.774191972247331, + "grad_norm": 3.254563093185425, + "learning_rate": 1.981739432663223e-06, + "loss": 0.0957, + "num_input_tokens_seen": 21347264, + "step": 31690 + }, + { + "epoch": 0.7743141230791781, + "grad_norm": 7.742297649383545, + "learning_rate": 1.981723206683431e-06, + "loss": 0.0278, + "num_input_tokens_seen": 21350912, + "step": 31695 + }, + { + "epoch": 0.7744362739110253, + "grad_norm": 27.937456130981445, + "learning_rate": 1.981706973564284e-06, + "loss": 0.2443, + "num_input_tokens_seen": 21354368, + "step": 31700 + }, + { + "epoch": 0.7745584247428725, + "grad_norm": 6.21821403503418, + "learning_rate": 1.9816907333058993e-06, + "loss": 0.0087, + "num_input_tokens_seen": 21357888, + "step": 31705 + }, + { + "epoch": 0.7746805755747197, + "grad_norm": 34.79883575439453, + "learning_rate": 1.981674485908396e-06, + "loss": 0.1654, + "num_input_tokens_seen": 21361344, + "step": 31710 + }, + { + "epoch": 0.7748027264065668, + "grad_norm": 22.730897903442383, + "learning_rate": 1.9816582313718917e-06, + "loss": 0.0697, + "num_input_tokens_seen": 21364224, + "step": 31715 + }, + { + "epoch": 0.774924877238414, + "grad_norm": 14.0791654586792, + "learning_rate": 1.9816419696965045e-06, + "loss": 0.1227, + "num_input_tokens_seen": 21367424, + "step": 31720 + }, + { + "epoch": 0.7750470280702612, + "grad_norm": 19.73372459411621, + "learning_rate": 1.9816257008823532e-06, + "loss": 0.0791, + "num_input_tokens_seen": 21370560, + "step": 31725 + }, + { + "epoch": 0.7751691789021083, + "grad_norm": 0.3301796317100525, + "learning_rate": 1.9816094249295557e-06, + "loss": 0.0378, + "num_input_tokens_seen": 21374720, + "step": 31730 + }, + { + "epoch": 0.7752913297339555, + "grad_norm": 38.05630874633789, + "learning_rate": 1.98159314183823e-06, + "loss": 0.1237, + "num_input_tokens_seen": 21378112, + "step": 31735 + }, + { + "epoch": 0.7754134805658026, + "grad_norm": 2.712581157684326, + "learning_rate": 1.9815768516084956e-06, + "loss": 0.063, + "num_input_tokens_seen": 21381504, + "step": 31740 + }, + { + "epoch": 0.7755356313976498, + "grad_norm": 36.26502227783203, + "learning_rate": 1.9815605542404698e-06, + "loss": 0.2259, + "num_input_tokens_seen": 21385344, + "step": 31745 + }, + { + "epoch": 0.775657782229497, + "grad_norm": 10.401970863342285, + "learning_rate": 1.981544249734272e-06, + "loss": 0.0612, + "num_input_tokens_seen": 21388736, + "step": 31750 + }, + { + "epoch": 0.7757799330613442, + "grad_norm": 35.05522537231445, + "learning_rate": 1.98152793809002e-06, + "loss": 0.1576, + "num_input_tokens_seen": 21391872, + "step": 31755 + }, + { + "epoch": 0.7759020838931913, + "grad_norm": 6.847743988037109, + "learning_rate": 1.9815116193078333e-06, + "loss": 0.0493, + "num_input_tokens_seen": 21395520, + "step": 31760 + }, + { + "epoch": 0.7760242347250385, + "grad_norm": 34.985145568847656, + "learning_rate": 1.98149529338783e-06, + "loss": 0.0779, + "num_input_tokens_seen": 21399296, + "step": 31765 + }, + { + "epoch": 0.7761463855568856, + "grad_norm": 8.233406066894531, + "learning_rate": 1.981478960330129e-06, + "loss": 0.1058, + "num_input_tokens_seen": 21402880, + "step": 31770 + }, + { + "epoch": 0.7762685363887328, + "grad_norm": 14.839694023132324, + "learning_rate": 1.9814626201348484e-06, + "loss": 0.0803, + "num_input_tokens_seen": 21406336, + "step": 31775 + }, + { + "epoch": 0.77639068722058, + "grad_norm": 28.401588439941406, + "learning_rate": 1.9814462728021084e-06, + "loss": 0.1543, + "num_input_tokens_seen": 21409536, + "step": 31780 + }, + { + "epoch": 0.7765128380524271, + "grad_norm": 77.49806213378906, + "learning_rate": 1.981429918332027e-06, + "loss": 0.133, + "num_input_tokens_seen": 21413056, + "step": 31785 + }, + { + "epoch": 0.7766349888842743, + "grad_norm": 7.4428815841674805, + "learning_rate": 1.981413556724723e-06, + "loss": 0.0619, + "num_input_tokens_seen": 21416896, + "step": 31790 + }, + { + "epoch": 0.7767571397161215, + "grad_norm": 23.340656280517578, + "learning_rate": 1.9813971879803155e-06, + "loss": 0.1494, + "num_input_tokens_seen": 21420096, + "step": 31795 + }, + { + "epoch": 0.7768792905479687, + "grad_norm": 0.6986697316169739, + "learning_rate": 1.9813808120989238e-06, + "loss": 0.0836, + "num_input_tokens_seen": 21423424, + "step": 31800 + }, + { + "epoch": 0.7770014413798157, + "grad_norm": 49.14704132080078, + "learning_rate": 1.981364429080667e-06, + "loss": 0.0432, + "num_input_tokens_seen": 21426688, + "step": 31805 + }, + { + "epoch": 0.7771235922116629, + "grad_norm": 0.13445626199245453, + "learning_rate": 1.9813480389256643e-06, + "loss": 0.1327, + "num_input_tokens_seen": 21430016, + "step": 31810 + }, + { + "epoch": 0.7772457430435101, + "grad_norm": 15.11719799041748, + "learning_rate": 1.9813316416340345e-06, + "loss": 0.1589, + "num_input_tokens_seen": 21433856, + "step": 31815 + }, + { + "epoch": 0.7773678938753573, + "grad_norm": 49.894168853759766, + "learning_rate": 1.981315237205897e-06, + "loss": 0.1383, + "num_input_tokens_seen": 21437120, + "step": 31820 + }, + { + "epoch": 0.7774900447072045, + "grad_norm": 0.596743106842041, + "learning_rate": 1.9812988256413715e-06, + "loss": 0.1419, + "num_input_tokens_seen": 21440704, + "step": 31825 + }, + { + "epoch": 0.7776121955390516, + "grad_norm": 8.4089937210083, + "learning_rate": 1.9812824069405766e-06, + "loss": 0.2094, + "num_input_tokens_seen": 21444416, + "step": 31830 + }, + { + "epoch": 0.7777343463708988, + "grad_norm": 20.740663528442383, + "learning_rate": 1.981265981103632e-06, + "loss": 0.1335, + "num_input_tokens_seen": 21447616, + "step": 31835 + }, + { + "epoch": 0.777856497202746, + "grad_norm": 13.120567321777344, + "learning_rate": 1.9812495481306577e-06, + "loss": 0.0973, + "num_input_tokens_seen": 21450752, + "step": 31840 + }, + { + "epoch": 0.7779786480345932, + "grad_norm": 26.166528701782227, + "learning_rate": 1.9812331080217726e-06, + "loss": 0.1563, + "num_input_tokens_seen": 21453888, + "step": 31845 + }, + { + "epoch": 0.7781007988664402, + "grad_norm": 19.302303314208984, + "learning_rate": 1.9812166607770965e-06, + "loss": 0.0927, + "num_input_tokens_seen": 21457216, + "step": 31850 + }, + { + "epoch": 0.7782229496982874, + "grad_norm": 3.334254264831543, + "learning_rate": 1.981200206396749e-06, + "loss": 0.155, + "num_input_tokens_seen": 21461248, + "step": 31855 + }, + { + "epoch": 0.7783451005301346, + "grad_norm": 36.11018753051758, + "learning_rate": 1.981183744880849e-06, + "loss": 0.0909, + "num_input_tokens_seen": 21464960, + "step": 31860 + }, + { + "epoch": 0.7784672513619818, + "grad_norm": 1.2233189344406128, + "learning_rate": 1.9811672762295176e-06, + "loss": 0.0728, + "num_input_tokens_seen": 21468032, + "step": 31865 + }, + { + "epoch": 0.778589402193829, + "grad_norm": 12.21750259399414, + "learning_rate": 1.9811508004428737e-06, + "loss": 0.0325, + "num_input_tokens_seen": 21471744, + "step": 31870 + }, + { + "epoch": 0.7787115530256761, + "grad_norm": 7.178306579589844, + "learning_rate": 1.981134317521037e-06, + "loss": 0.129, + "num_input_tokens_seen": 21475584, + "step": 31875 + }, + { + "epoch": 0.7788337038575233, + "grad_norm": 20.816633224487305, + "learning_rate": 1.981117827464128e-06, + "loss": 0.1179, + "num_input_tokens_seen": 21478720, + "step": 31880 + }, + { + "epoch": 0.7789558546893705, + "grad_norm": 0.3289129436016083, + "learning_rate": 1.981101330272266e-06, + "loss": 0.0984, + "num_input_tokens_seen": 21482112, + "step": 31885 + }, + { + "epoch": 0.7790780055212176, + "grad_norm": 2.1822543144226074, + "learning_rate": 1.9810848259455716e-06, + "loss": 0.0813, + "num_input_tokens_seen": 21485696, + "step": 31890 + }, + { + "epoch": 0.7792001563530647, + "grad_norm": 12.473344802856445, + "learning_rate": 1.981068314484164e-06, + "loss": 0.0913, + "num_input_tokens_seen": 21489216, + "step": 31895 + }, + { + "epoch": 0.7793223071849119, + "grad_norm": 30.03060531616211, + "learning_rate": 1.981051795888164e-06, + "loss": 0.1576, + "num_input_tokens_seen": 21492800, + "step": 31900 + }, + { + "epoch": 0.7794444580167591, + "grad_norm": 0.5922881960868835, + "learning_rate": 1.9810352701576917e-06, + "loss": 0.1572, + "num_input_tokens_seen": 21495744, + "step": 31905 + }, + { + "epoch": 0.7795666088486063, + "grad_norm": 0.5877641439437866, + "learning_rate": 1.981018737292867e-06, + "loss": 0.133, + "num_input_tokens_seen": 21498944, + "step": 31910 + }, + { + "epoch": 0.7796887596804534, + "grad_norm": 14.157615661621094, + "learning_rate": 1.98100219729381e-06, + "loss": 0.0848, + "num_input_tokens_seen": 21502400, + "step": 31915 + }, + { + "epoch": 0.7798109105123006, + "grad_norm": 8.548436164855957, + "learning_rate": 1.980985650160641e-06, + "loss": 0.2272, + "num_input_tokens_seen": 21505856, + "step": 31920 + }, + { + "epoch": 0.7799330613441477, + "grad_norm": 19.105016708374023, + "learning_rate": 1.9809690958934804e-06, + "loss": 0.0443, + "num_input_tokens_seen": 21509056, + "step": 31925 + }, + { + "epoch": 0.7800552121759949, + "grad_norm": 0.6176168322563171, + "learning_rate": 1.980952534492449e-06, + "loss": 0.1034, + "num_input_tokens_seen": 21512640, + "step": 31930 + }, + { + "epoch": 0.7801773630078421, + "grad_norm": 0.15723645687103271, + "learning_rate": 1.980935965957667e-06, + "loss": 0.1021, + "num_input_tokens_seen": 21515904, + "step": 31935 + }, + { + "epoch": 0.7802995138396892, + "grad_norm": 16.714370727539062, + "learning_rate": 1.9809193902892548e-06, + "loss": 0.1275, + "num_input_tokens_seen": 21519040, + "step": 31940 + }, + { + "epoch": 0.7804216646715364, + "grad_norm": 2.1100945472717285, + "learning_rate": 1.980902807487333e-06, + "loss": 0.054, + "num_input_tokens_seen": 21522368, + "step": 31945 + }, + { + "epoch": 0.7805438155033836, + "grad_norm": 0.33257073163986206, + "learning_rate": 1.980886217552022e-06, + "loss": 0.0054, + "num_input_tokens_seen": 21525696, + "step": 31950 + }, + { + "epoch": 0.7806659663352308, + "grad_norm": 4.166648864746094, + "learning_rate": 1.9808696204834427e-06, + "loss": 0.0705, + "num_input_tokens_seen": 21529344, + "step": 31955 + }, + { + "epoch": 0.7807881171670779, + "grad_norm": 21.16596221923828, + "learning_rate": 1.9808530162817153e-06, + "loss": 0.1702, + "num_input_tokens_seen": 21532992, + "step": 31960 + }, + { + "epoch": 0.780910267998925, + "grad_norm": 7.611130237579346, + "learning_rate": 1.9808364049469613e-06, + "loss": 0.0475, + "num_input_tokens_seen": 21536512, + "step": 31965 + }, + { + "epoch": 0.7810324188307722, + "grad_norm": 16.877761840820312, + "learning_rate": 1.980819786479301e-06, + "loss": 0.2233, + "num_input_tokens_seen": 21540096, + "step": 31970 + }, + { + "epoch": 0.7811545696626194, + "grad_norm": 31.75934600830078, + "learning_rate": 1.9808031608788557e-06, + "loss": 0.116, + "num_input_tokens_seen": 21543936, + "step": 31975 + }, + { + "epoch": 0.7812767204944666, + "grad_norm": 2.284510612487793, + "learning_rate": 1.980786528145746e-06, + "loss": 0.1117, + "num_input_tokens_seen": 21547328, + "step": 31980 + }, + { + "epoch": 0.7813988713263137, + "grad_norm": 121.72061157226562, + "learning_rate": 1.9807698882800924e-06, + "loss": 0.1172, + "num_input_tokens_seen": 21550592, + "step": 31985 + }, + { + "epoch": 0.7815210221581609, + "grad_norm": 18.22707748413086, + "learning_rate": 1.9807532412820165e-06, + "loss": 0.1161, + "num_input_tokens_seen": 21554048, + "step": 31990 + }, + { + "epoch": 0.7816431729900081, + "grad_norm": 27.85599708557129, + "learning_rate": 1.9807365871516394e-06, + "loss": 0.1212, + "num_input_tokens_seen": 21557440, + "step": 31995 + }, + { + "epoch": 0.7817653238218553, + "grad_norm": 0.6961256861686707, + "learning_rate": 1.9807199258890823e-06, + "loss": 0.0333, + "num_input_tokens_seen": 21561280, + "step": 32000 + }, + { + "epoch": 0.7818874746537023, + "grad_norm": 12.567249298095703, + "learning_rate": 1.980703257494466e-06, + "loss": 0.0678, + "num_input_tokens_seen": 21564672, + "step": 32005 + }, + { + "epoch": 0.7820096254855495, + "grad_norm": 2.788113594055176, + "learning_rate": 1.9806865819679116e-06, + "loss": 0.0862, + "num_input_tokens_seen": 21568704, + "step": 32010 + }, + { + "epoch": 0.7821317763173967, + "grad_norm": 10.924982070922852, + "learning_rate": 1.9806698993095407e-06, + "loss": 0.026, + "num_input_tokens_seen": 21572160, + "step": 32015 + }, + { + "epoch": 0.7822539271492439, + "grad_norm": 25.12057876586914, + "learning_rate": 1.9806532095194742e-06, + "loss": 0.247, + "num_input_tokens_seen": 21575680, + "step": 32020 + }, + { + "epoch": 0.7823760779810911, + "grad_norm": 18.78164291381836, + "learning_rate": 1.980636512597834e-06, + "loss": 0.1509, + "num_input_tokens_seen": 21579072, + "step": 32025 + }, + { + "epoch": 0.7824982288129382, + "grad_norm": 59.385494232177734, + "learning_rate": 1.9806198085447417e-06, + "loss": 0.1559, + "num_input_tokens_seen": 21582656, + "step": 32030 + }, + { + "epoch": 0.7826203796447854, + "grad_norm": 0.6709699034690857, + "learning_rate": 1.9806030973603183e-06, + "loss": 0.1118, + "num_input_tokens_seen": 21585600, + "step": 32035 + }, + { + "epoch": 0.7827425304766326, + "grad_norm": 40.532989501953125, + "learning_rate": 1.980586379044685e-06, + "loss": 0.1132, + "num_input_tokens_seen": 21588928, + "step": 32040 + }, + { + "epoch": 0.7828646813084797, + "grad_norm": 16.257848739624023, + "learning_rate": 1.9805696535979643e-06, + "loss": 0.1005, + "num_input_tokens_seen": 21591872, + "step": 32045 + }, + { + "epoch": 0.7829868321403268, + "grad_norm": 18.62839698791504, + "learning_rate": 1.980552921020277e-06, + "loss": 0.0973, + "num_input_tokens_seen": 21595456, + "step": 32050 + }, + { + "epoch": 0.783108982972174, + "grad_norm": 13.70004940032959, + "learning_rate": 1.980536181311745e-06, + "loss": 0.0618, + "num_input_tokens_seen": 21598208, + "step": 32055 + }, + { + "epoch": 0.7832311338040212, + "grad_norm": 1.1185194253921509, + "learning_rate": 1.9805194344724906e-06, + "loss": 0.148, + "num_input_tokens_seen": 21601152, + "step": 32060 + }, + { + "epoch": 0.7833532846358684, + "grad_norm": 9.538461685180664, + "learning_rate": 1.980502680502635e-06, + "loss": 0.0887, + "num_input_tokens_seen": 21604224, + "step": 32065 + }, + { + "epoch": 0.7834754354677156, + "grad_norm": 17.74492645263672, + "learning_rate": 1.9804859194023e-06, + "loss": 0.1157, + "num_input_tokens_seen": 21607168, + "step": 32070 + }, + { + "epoch": 0.7835975862995627, + "grad_norm": 17.56431770324707, + "learning_rate": 1.980469151171608e-06, + "loss": 0.1083, + "num_input_tokens_seen": 21610176, + "step": 32075 + }, + { + "epoch": 0.7837197371314099, + "grad_norm": 30.301050186157227, + "learning_rate": 1.9804523758106805e-06, + "loss": 0.1411, + "num_input_tokens_seen": 21613184, + "step": 32080 + }, + { + "epoch": 0.783841887963257, + "grad_norm": 34.66545104980469, + "learning_rate": 1.9804355933196397e-06, + "loss": 0.1508, + "num_input_tokens_seen": 21616384, + "step": 32085 + }, + { + "epoch": 0.7839640387951042, + "grad_norm": 0.4128032922744751, + "learning_rate": 1.9804188036986068e-06, + "loss": 0.0559, + "num_input_tokens_seen": 21619840, + "step": 32090 + }, + { + "epoch": 0.7840861896269513, + "grad_norm": 18.219940185546875, + "learning_rate": 1.9804020069477058e-06, + "loss": 0.0879, + "num_input_tokens_seen": 21622976, + "step": 32095 + }, + { + "epoch": 0.7842083404587985, + "grad_norm": 45.373016357421875, + "learning_rate": 1.980385203067057e-06, + "loss": 0.0968, + "num_input_tokens_seen": 21626432, + "step": 32100 + }, + { + "epoch": 0.7843304912906457, + "grad_norm": 0.6460981369018555, + "learning_rate": 1.9803683920567832e-06, + "loss": 0.0054, + "num_input_tokens_seen": 21629632, + "step": 32105 + }, + { + "epoch": 0.7844526421224929, + "grad_norm": 24.669296264648438, + "learning_rate": 1.9803515739170073e-06, + "loss": 0.1798, + "num_input_tokens_seen": 21632768, + "step": 32110 + }, + { + "epoch": 0.7845747929543401, + "grad_norm": 40.41068649291992, + "learning_rate": 1.9803347486478508e-06, + "loss": 0.0212, + "num_input_tokens_seen": 21635904, + "step": 32115 + }, + { + "epoch": 0.7846969437861872, + "grad_norm": 49.69608688354492, + "learning_rate": 1.980317916249436e-06, + "loss": 0.1661, + "num_input_tokens_seen": 21639168, + "step": 32120 + }, + { + "epoch": 0.7848190946180343, + "grad_norm": 32.63628005981445, + "learning_rate": 1.9803010767218864e-06, + "loss": 0.1302, + "num_input_tokens_seen": 21642432, + "step": 32125 + }, + { + "epoch": 0.7849412454498815, + "grad_norm": 0.2649306356906891, + "learning_rate": 1.980284230065323e-06, + "loss": 0.0673, + "num_input_tokens_seen": 21645888, + "step": 32130 + }, + { + "epoch": 0.7850633962817287, + "grad_norm": 21.959501266479492, + "learning_rate": 1.9802673762798696e-06, + "loss": 0.3088, + "num_input_tokens_seen": 21649216, + "step": 32135 + }, + { + "epoch": 0.7851855471135758, + "grad_norm": 1.1638773679733276, + "learning_rate": 1.9802505153656477e-06, + "loss": 0.1599, + "num_input_tokens_seen": 21652416, + "step": 32140 + }, + { + "epoch": 0.785307697945423, + "grad_norm": 1.0158441066741943, + "learning_rate": 1.9802336473227804e-06, + "loss": 0.0481, + "num_input_tokens_seen": 21655808, + "step": 32145 + }, + { + "epoch": 0.7854298487772702, + "grad_norm": 4.161768436431885, + "learning_rate": 1.9802167721513906e-06, + "loss": 0.2222, + "num_input_tokens_seen": 21658880, + "step": 32150 + }, + { + "epoch": 0.7855519996091174, + "grad_norm": 7.720676898956299, + "learning_rate": 1.9801998898516006e-06, + "loss": 0.1291, + "num_input_tokens_seen": 21662464, + "step": 32155 + }, + { + "epoch": 0.7856741504409646, + "grad_norm": 0.2764137387275696, + "learning_rate": 1.9801830004235333e-06, + "loss": 0.089, + "num_input_tokens_seen": 21665664, + "step": 32160 + }, + { + "epoch": 0.7857963012728116, + "grad_norm": 37.62343978881836, + "learning_rate": 1.9801661038673123e-06, + "loss": 0.11, + "num_input_tokens_seen": 21668928, + "step": 32165 + }, + { + "epoch": 0.7859184521046588, + "grad_norm": 13.400060653686523, + "learning_rate": 1.9801492001830587e-06, + "loss": 0.0739, + "num_input_tokens_seen": 21672832, + "step": 32170 + }, + { + "epoch": 0.786040602936506, + "grad_norm": 7.543498992919922, + "learning_rate": 1.980132289370897e-06, + "loss": 0.0536, + "num_input_tokens_seen": 21676608, + "step": 32175 + }, + { + "epoch": 0.7861627537683532, + "grad_norm": 3.021576404571533, + "learning_rate": 1.98011537143095e-06, + "loss": 0.1529, + "num_input_tokens_seen": 21679680, + "step": 32180 + }, + { + "epoch": 0.7862849046002003, + "grad_norm": 17.60646629333496, + "learning_rate": 1.98009844636334e-06, + "loss": 0.1392, + "num_input_tokens_seen": 21682624, + "step": 32185 + }, + { + "epoch": 0.7864070554320475, + "grad_norm": 18.22474479675293, + "learning_rate": 1.9800815141681902e-06, + "loss": 0.1319, + "num_input_tokens_seen": 21685632, + "step": 32190 + }, + { + "epoch": 0.7865292062638947, + "grad_norm": 27.5200138092041, + "learning_rate": 1.9800645748456247e-06, + "loss": 0.2551, + "num_input_tokens_seen": 21689024, + "step": 32195 + }, + { + "epoch": 0.7866513570957419, + "grad_norm": 18.015851974487305, + "learning_rate": 1.9800476283957656e-06, + "loss": 0.101, + "num_input_tokens_seen": 21692416, + "step": 32200 + }, + { + "epoch": 0.7867735079275889, + "grad_norm": 33.747230529785156, + "learning_rate": 1.9800306748187367e-06, + "loss": 0.105, + "num_input_tokens_seen": 21695552, + "step": 32205 + }, + { + "epoch": 0.7868956587594361, + "grad_norm": 25.726058959960938, + "learning_rate": 1.9800137141146612e-06, + "loss": 0.0228, + "num_input_tokens_seen": 21698752, + "step": 32210 + }, + { + "epoch": 0.7870178095912833, + "grad_norm": 31.176944732666016, + "learning_rate": 1.979996746283662e-06, + "loss": 0.131, + "num_input_tokens_seen": 21702336, + "step": 32215 + }, + { + "epoch": 0.7871399604231305, + "grad_norm": 0.5230793952941895, + "learning_rate": 1.9799797713258634e-06, + "loss": 0.0525, + "num_input_tokens_seen": 21705856, + "step": 32220 + }, + { + "epoch": 0.7872621112549777, + "grad_norm": 27.23691177368164, + "learning_rate": 1.9799627892413876e-06, + "loss": 0.11, + "num_input_tokens_seen": 21709056, + "step": 32225 + }, + { + "epoch": 0.7873842620868248, + "grad_norm": 34.13618087768555, + "learning_rate": 1.9799458000303594e-06, + "loss": 0.1699, + "num_input_tokens_seen": 21712832, + "step": 32230 + }, + { + "epoch": 0.787506412918672, + "grad_norm": 14.356066703796387, + "learning_rate": 1.9799288036929016e-06, + "loss": 0.1034, + "num_input_tokens_seen": 21716288, + "step": 32235 + }, + { + "epoch": 0.7876285637505192, + "grad_norm": 16.639842987060547, + "learning_rate": 1.979911800229138e-06, + "loss": 0.0651, + "num_input_tokens_seen": 21719872, + "step": 32240 + }, + { + "epoch": 0.7877507145823663, + "grad_norm": 25.77016830444336, + "learning_rate": 1.979894789639192e-06, + "loss": 0.1812, + "num_input_tokens_seen": 21723264, + "step": 32245 + }, + { + "epoch": 0.7878728654142134, + "grad_norm": 15.297296524047852, + "learning_rate": 1.9798777719231882e-06, + "loss": 0.0791, + "num_input_tokens_seen": 21726912, + "step": 32250 + }, + { + "epoch": 0.7879950162460606, + "grad_norm": 1.2768000364303589, + "learning_rate": 1.979860747081249e-06, + "loss": 0.0353, + "num_input_tokens_seen": 21730880, + "step": 32255 + }, + { + "epoch": 0.7881171670779078, + "grad_norm": 0.267890602350235, + "learning_rate": 1.979843715113499e-06, + "loss": 0.1154, + "num_input_tokens_seen": 21734080, + "step": 32260 + }, + { + "epoch": 0.788239317909755, + "grad_norm": 22.803361892700195, + "learning_rate": 1.9798266760200623e-06, + "loss": 0.2999, + "num_input_tokens_seen": 21737088, + "step": 32265 + }, + { + "epoch": 0.7883614687416022, + "grad_norm": 1.6341105699539185, + "learning_rate": 1.979809629801062e-06, + "loss": 0.0086, + "num_input_tokens_seen": 21740288, + "step": 32270 + }, + { + "epoch": 0.7884836195734493, + "grad_norm": 36.74137878417969, + "learning_rate": 1.979792576456623e-06, + "loss": 0.1003, + "num_input_tokens_seen": 21743424, + "step": 32275 + }, + { + "epoch": 0.7886057704052964, + "grad_norm": 28.10059356689453, + "learning_rate": 1.9797755159868687e-06, + "loss": 0.2502, + "num_input_tokens_seen": 21746368, + "step": 32280 + }, + { + "epoch": 0.7887279212371436, + "grad_norm": 18.822345733642578, + "learning_rate": 1.979758448391923e-06, + "loss": 0.0359, + "num_input_tokens_seen": 21749952, + "step": 32285 + }, + { + "epoch": 0.7888500720689908, + "grad_norm": 16.408477783203125, + "learning_rate": 1.9797413736719105e-06, + "loss": 0.1076, + "num_input_tokens_seen": 21753408, + "step": 32290 + }, + { + "epoch": 0.7889722229008379, + "grad_norm": 0.689196765422821, + "learning_rate": 1.9797242918269553e-06, + "loss": 0.1068, + "num_input_tokens_seen": 21756992, + "step": 32295 + }, + { + "epoch": 0.7890943737326851, + "grad_norm": 1.9564344882965088, + "learning_rate": 1.9797072028571816e-06, + "loss": 0.0652, + "num_input_tokens_seen": 21760704, + "step": 32300 + }, + { + "epoch": 0.7892165245645323, + "grad_norm": 17.53862190246582, + "learning_rate": 1.9796901067627135e-06, + "loss": 0.1639, + "num_input_tokens_seen": 21763904, + "step": 32305 + }, + { + "epoch": 0.7893386753963795, + "grad_norm": 53.596839904785156, + "learning_rate": 1.9796730035436756e-06, + "loss": 0.1575, + "num_input_tokens_seen": 21767168, + "step": 32310 + }, + { + "epoch": 0.7894608262282267, + "grad_norm": 0.7122951149940491, + "learning_rate": 1.9796558932001923e-06, + "loss": 0.09, + "num_input_tokens_seen": 21770688, + "step": 32315 + }, + { + "epoch": 0.7895829770600737, + "grad_norm": 30.160568237304688, + "learning_rate": 1.9796387757323873e-06, + "loss": 0.1841, + "num_input_tokens_seen": 21774144, + "step": 32320 + }, + { + "epoch": 0.7897051278919209, + "grad_norm": 3.537285089492798, + "learning_rate": 1.979621651140386e-06, + "loss": 0.0075, + "num_input_tokens_seen": 21777344, + "step": 32325 + }, + { + "epoch": 0.7898272787237681, + "grad_norm": 14.99126148223877, + "learning_rate": 1.9796045194243123e-06, + "loss": 0.166, + "num_input_tokens_seen": 21780544, + "step": 32330 + }, + { + "epoch": 0.7899494295556153, + "grad_norm": 17.25547981262207, + "learning_rate": 1.9795873805842914e-06, + "loss": 0.121, + "num_input_tokens_seen": 21783872, + "step": 32335 + }, + { + "epoch": 0.7900715803874624, + "grad_norm": 39.5103759765625, + "learning_rate": 1.9795702346204473e-06, + "loss": 0.1063, + "num_input_tokens_seen": 21787264, + "step": 32340 + }, + { + "epoch": 0.7901937312193096, + "grad_norm": 34.86042404174805, + "learning_rate": 1.9795530815329053e-06, + "loss": 0.1067, + "num_input_tokens_seen": 21790336, + "step": 32345 + }, + { + "epoch": 0.7903158820511568, + "grad_norm": 39.395835876464844, + "learning_rate": 1.979535921321789e-06, + "loss": 0.0771, + "num_input_tokens_seen": 21793536, + "step": 32350 + }, + { + "epoch": 0.790438032883004, + "grad_norm": 0.45142436027526855, + "learning_rate": 1.979518753987225e-06, + "loss": 0.0313, + "num_input_tokens_seen": 21796352, + "step": 32355 + }, + { + "epoch": 0.7905601837148512, + "grad_norm": 0.10581724345684052, + "learning_rate": 1.979501579529337e-06, + "loss": 0.0862, + "num_input_tokens_seen": 21800064, + "step": 32360 + }, + { + "epoch": 0.7906823345466982, + "grad_norm": 28.593807220458984, + "learning_rate": 1.9794843979482495e-06, + "loss": 0.1273, + "num_input_tokens_seen": 21803136, + "step": 32365 + }, + { + "epoch": 0.7908044853785454, + "grad_norm": 19.38428497314453, + "learning_rate": 1.9794672092440884e-06, + "loss": 0.2033, + "num_input_tokens_seen": 21806464, + "step": 32370 + }, + { + "epoch": 0.7909266362103926, + "grad_norm": 21.668018341064453, + "learning_rate": 1.9794500134169783e-06, + "loss": 0.0742, + "num_input_tokens_seen": 21809664, + "step": 32375 + }, + { + "epoch": 0.7910487870422398, + "grad_norm": 20.23079490661621, + "learning_rate": 1.979432810467044e-06, + "loss": 0.1761, + "num_input_tokens_seen": 21812800, + "step": 32380 + }, + { + "epoch": 0.7911709378740869, + "grad_norm": 29.678831100463867, + "learning_rate": 1.9794156003944115e-06, + "loss": 0.0591, + "num_input_tokens_seen": 21816576, + "step": 32385 + }, + { + "epoch": 0.7912930887059341, + "grad_norm": 21.465499877929688, + "learning_rate": 1.979398383199205e-06, + "loss": 0.1364, + "num_input_tokens_seen": 21819968, + "step": 32390 + }, + { + "epoch": 0.7914152395377813, + "grad_norm": 22.944368362426758, + "learning_rate": 1.9793811588815496e-06, + "loss": 0.1386, + "num_input_tokens_seen": 21823488, + "step": 32395 + }, + { + "epoch": 0.7915373903696284, + "grad_norm": 22.3177547454834, + "learning_rate": 1.9793639274415716e-06, + "loss": 0.1915, + "num_input_tokens_seen": 21826816, + "step": 32400 + }, + { + "epoch": 0.7916595412014756, + "grad_norm": 1.5493758916854858, + "learning_rate": 1.9793466888793953e-06, + "loss": 0.109, + "num_input_tokens_seen": 21830208, + "step": 32405 + }, + { + "epoch": 0.7917816920333227, + "grad_norm": 0.8863944411277771, + "learning_rate": 1.9793294431951467e-06, + "loss": 0.0359, + "num_input_tokens_seen": 21833344, + "step": 32410 + }, + { + "epoch": 0.7919038428651699, + "grad_norm": 13.146297454833984, + "learning_rate": 1.979312190388951e-06, + "loss": 0.1417, + "num_input_tokens_seen": 21836352, + "step": 32415 + }, + { + "epoch": 0.7920259936970171, + "grad_norm": 2.506059408187866, + "learning_rate": 1.9792949304609336e-06, + "loss": 0.1282, + "num_input_tokens_seen": 21840000, + "step": 32420 + }, + { + "epoch": 0.7921481445288643, + "grad_norm": 24.569217681884766, + "learning_rate": 1.97927766341122e-06, + "loss": 0.1626, + "num_input_tokens_seen": 21843264, + "step": 32425 + }, + { + "epoch": 0.7922702953607114, + "grad_norm": 9.783016204833984, + "learning_rate": 1.9792603892399357e-06, + "loss": 0.1898, + "num_input_tokens_seen": 21847168, + "step": 32430 + }, + { + "epoch": 0.7923924461925586, + "grad_norm": 19.442590713500977, + "learning_rate": 1.9792431079472067e-06, + "loss": 0.1549, + "num_input_tokens_seen": 21851136, + "step": 32435 + }, + { + "epoch": 0.7925145970244057, + "grad_norm": 2.4780938625335693, + "learning_rate": 1.9792258195331583e-06, + "loss": 0.1009, + "num_input_tokens_seen": 21854720, + "step": 32440 + }, + { + "epoch": 0.7926367478562529, + "grad_norm": 8.407721519470215, + "learning_rate": 1.9792085239979163e-06, + "loss": 0.0791, + "num_input_tokens_seen": 21858176, + "step": 32445 + }, + { + "epoch": 0.7927588986881, + "grad_norm": 7.173499584197998, + "learning_rate": 1.9791912213416065e-06, + "loss": 0.2024, + "num_input_tokens_seen": 21861376, + "step": 32450 + }, + { + "epoch": 0.7928810495199472, + "grad_norm": 1.6724401712417603, + "learning_rate": 1.9791739115643547e-06, + "loss": 0.0583, + "num_input_tokens_seen": 21864448, + "step": 32455 + }, + { + "epoch": 0.7930032003517944, + "grad_norm": 15.159879684448242, + "learning_rate": 1.9791565946662875e-06, + "loss": 0.1266, + "num_input_tokens_seen": 21868096, + "step": 32460 + }, + { + "epoch": 0.7931253511836416, + "grad_norm": 22.336624145507812, + "learning_rate": 1.9791392706475298e-06, + "loss": 0.1452, + "num_input_tokens_seen": 21871296, + "step": 32465 + }, + { + "epoch": 0.7932475020154888, + "grad_norm": 0.22137446701526642, + "learning_rate": 1.979121939508208e-06, + "loss": 0.0596, + "num_input_tokens_seen": 21875072, + "step": 32470 + }, + { + "epoch": 0.7933696528473358, + "grad_norm": 2.6097412109375, + "learning_rate": 1.9791046012484478e-06, + "loss": 0.1029, + "num_input_tokens_seen": 21878528, + "step": 32475 + }, + { + "epoch": 0.793491803679183, + "grad_norm": 10.868502616882324, + "learning_rate": 1.9790872558683755e-06, + "loss": 0.0529, + "num_input_tokens_seen": 21882176, + "step": 32480 + }, + { + "epoch": 0.7936139545110302, + "grad_norm": 0.6876401305198669, + "learning_rate": 1.9790699033681176e-06, + "loss": 0.1334, + "num_input_tokens_seen": 21885760, + "step": 32485 + }, + { + "epoch": 0.7937361053428774, + "grad_norm": 31.104290008544922, + "learning_rate": 1.9790525437478002e-06, + "loss": 0.3129, + "num_input_tokens_seen": 21889152, + "step": 32490 + }, + { + "epoch": 0.7938582561747245, + "grad_norm": 2.89729380607605, + "learning_rate": 1.9790351770075492e-06, + "loss": 0.0127, + "num_input_tokens_seen": 21892352, + "step": 32495 + }, + { + "epoch": 0.7939804070065717, + "grad_norm": 8.085244178771973, + "learning_rate": 1.9790178031474912e-06, + "loss": 0.1781, + "num_input_tokens_seen": 21895744, + "step": 32500 + }, + { + "epoch": 0.7941025578384189, + "grad_norm": 25.23969078063965, + "learning_rate": 1.9790004221677524e-06, + "loss": 0.0983, + "num_input_tokens_seen": 21898944, + "step": 32505 + }, + { + "epoch": 0.7942247086702661, + "grad_norm": 12.462812423706055, + "learning_rate": 1.978983034068459e-06, + "loss": 0.1732, + "num_input_tokens_seen": 21902400, + "step": 32510 + }, + { + "epoch": 0.7943468595021133, + "grad_norm": 1.5091313123703003, + "learning_rate": 1.9789656388497376e-06, + "loss": 0.1392, + "num_input_tokens_seen": 21905728, + "step": 32515 + }, + { + "epoch": 0.7944690103339603, + "grad_norm": 6.18993616104126, + "learning_rate": 1.978948236511715e-06, + "loss": 0.1258, + "num_input_tokens_seen": 21909184, + "step": 32520 + }, + { + "epoch": 0.7945911611658075, + "grad_norm": 11.886087417602539, + "learning_rate": 1.9789308270545175e-06, + "loss": 0.0554, + "num_input_tokens_seen": 21912256, + "step": 32525 + }, + { + "epoch": 0.7947133119976547, + "grad_norm": 30.678869247436523, + "learning_rate": 1.9789134104782716e-06, + "loss": 0.0334, + "num_input_tokens_seen": 21915904, + "step": 32530 + }, + { + "epoch": 0.7948354628295019, + "grad_norm": 9.77381420135498, + "learning_rate": 1.9788959867831043e-06, + "loss": 0.2204, + "num_input_tokens_seen": 21919232, + "step": 32535 + }, + { + "epoch": 0.794957613661349, + "grad_norm": 0.5201031565666199, + "learning_rate": 1.9788785559691417e-06, + "loss": 0.1402, + "num_input_tokens_seen": 21922048, + "step": 32540 + }, + { + "epoch": 0.7950797644931962, + "grad_norm": 0.22782576084136963, + "learning_rate": 1.9788611180365114e-06, + "loss": 0.0586, + "num_input_tokens_seen": 21925568, + "step": 32545 + }, + { + "epoch": 0.7952019153250434, + "grad_norm": 30.4063663482666, + "learning_rate": 1.9788436729853395e-06, + "loss": 0.2114, + "num_input_tokens_seen": 21928704, + "step": 32550 + }, + { + "epoch": 0.7953240661568906, + "grad_norm": 39.70443344116211, + "learning_rate": 1.978826220815753e-06, + "loss": 0.1629, + "num_input_tokens_seen": 21932032, + "step": 32555 + }, + { + "epoch": 0.7954462169887377, + "grad_norm": 14.86407470703125, + "learning_rate": 1.9788087615278793e-06, + "loss": 0.1327, + "num_input_tokens_seen": 21935808, + "step": 32560 + }, + { + "epoch": 0.7955683678205848, + "grad_norm": 2.5342135429382324, + "learning_rate": 1.9787912951218447e-06, + "loss": 0.0649, + "num_input_tokens_seen": 21939264, + "step": 32565 + }, + { + "epoch": 0.795690518652432, + "grad_norm": 13.98967170715332, + "learning_rate": 1.978773821597777e-06, + "loss": 0.1148, + "num_input_tokens_seen": 21942848, + "step": 32570 + }, + { + "epoch": 0.7958126694842792, + "grad_norm": 0.22576695680618286, + "learning_rate": 1.978756340955802e-06, + "loss": 0.0996, + "num_input_tokens_seen": 21946048, + "step": 32575 + }, + { + "epoch": 0.7959348203161264, + "grad_norm": 13.705849647521973, + "learning_rate": 1.9787388531960488e-06, + "loss": 0.0786, + "num_input_tokens_seen": 21949696, + "step": 32580 + }, + { + "epoch": 0.7960569711479735, + "grad_norm": 0.5072705149650574, + "learning_rate": 1.9787213583186423e-06, + "loss": 0.0497, + "num_input_tokens_seen": 21953088, + "step": 32585 + }, + { + "epoch": 0.7961791219798207, + "grad_norm": 9.198322296142578, + "learning_rate": 1.9787038563237117e-06, + "loss": 0.1569, + "num_input_tokens_seen": 21956224, + "step": 32590 + }, + { + "epoch": 0.7963012728116678, + "grad_norm": 0.8958269357681274, + "learning_rate": 1.978686347211383e-06, + "loss": 0.0616, + "num_input_tokens_seen": 21959360, + "step": 32595 + }, + { + "epoch": 0.796423423643515, + "grad_norm": 38.93531036376953, + "learning_rate": 1.9786688309817836e-06, + "loss": 0.1849, + "num_input_tokens_seen": 21962496, + "step": 32600 + }, + { + "epoch": 0.7965455744753622, + "grad_norm": 17.935117721557617, + "learning_rate": 1.978651307635042e-06, + "loss": 0.1986, + "num_input_tokens_seen": 21966016, + "step": 32605 + }, + { + "epoch": 0.7966677253072093, + "grad_norm": 8.136306762695312, + "learning_rate": 1.9786337771712845e-06, + "loss": 0.114, + "num_input_tokens_seen": 21969792, + "step": 32610 + }, + { + "epoch": 0.7967898761390565, + "grad_norm": 0.9560222029685974, + "learning_rate": 1.9786162395906388e-06, + "loss": 0.1428, + "num_input_tokens_seen": 21973184, + "step": 32615 + }, + { + "epoch": 0.7969120269709037, + "grad_norm": 10.984770774841309, + "learning_rate": 1.9785986948932324e-06, + "loss": 0.1996, + "num_input_tokens_seen": 21976768, + "step": 32620 + }, + { + "epoch": 0.7970341778027509, + "grad_norm": 37.20198440551758, + "learning_rate": 1.9785811430791932e-06, + "loss": 0.0833, + "num_input_tokens_seen": 21980480, + "step": 32625 + }, + { + "epoch": 0.797156328634598, + "grad_norm": 0.6844213604927063, + "learning_rate": 1.9785635841486492e-06, + "loss": 0.0975, + "num_input_tokens_seen": 21984320, + "step": 32630 + }, + { + "epoch": 0.7972784794664451, + "grad_norm": 15.415057182312012, + "learning_rate": 1.9785460181017274e-06, + "loss": 0.0554, + "num_input_tokens_seen": 21987776, + "step": 32635 + }, + { + "epoch": 0.7974006302982923, + "grad_norm": 17.4122257232666, + "learning_rate": 1.9785284449385557e-06, + "loss": 0.1462, + "num_input_tokens_seen": 21991360, + "step": 32640 + }, + { + "epoch": 0.7975227811301395, + "grad_norm": 0.6733130812644958, + "learning_rate": 1.978510864659262e-06, + "loss": 0.1373, + "num_input_tokens_seen": 21994816, + "step": 32645 + }, + { + "epoch": 0.7976449319619867, + "grad_norm": 28.956544876098633, + "learning_rate": 1.978493277263974e-06, + "loss": 0.1573, + "num_input_tokens_seen": 21998208, + "step": 32650 + }, + { + "epoch": 0.7977670827938338, + "grad_norm": 26.73524284362793, + "learning_rate": 1.97847568275282e-06, + "loss": 0.081, + "num_input_tokens_seen": 22002048, + "step": 32655 + }, + { + "epoch": 0.797889233625681, + "grad_norm": 0.4550372064113617, + "learning_rate": 1.978458081125927e-06, + "loss": 0.0815, + "num_input_tokens_seen": 22005056, + "step": 32660 + }, + { + "epoch": 0.7980113844575282, + "grad_norm": 33.69190979003906, + "learning_rate": 1.978440472383424e-06, + "loss": 0.1989, + "num_input_tokens_seen": 22008320, + "step": 32665 + }, + { + "epoch": 0.7981335352893754, + "grad_norm": 16.381532669067383, + "learning_rate": 1.978422856525439e-06, + "loss": 0.1015, + "num_input_tokens_seen": 22011008, + "step": 32670 + }, + { + "epoch": 0.7982556861212224, + "grad_norm": 26.662216186523438, + "learning_rate": 1.9784052335520997e-06, + "loss": 0.1529, + "num_input_tokens_seen": 22014016, + "step": 32675 + }, + { + "epoch": 0.7983778369530696, + "grad_norm": 8.46114730834961, + "learning_rate": 1.978387603463534e-06, + "loss": 0.0387, + "num_input_tokens_seen": 22017472, + "step": 32680 + }, + { + "epoch": 0.7984999877849168, + "grad_norm": 0.5248132348060608, + "learning_rate": 1.978369966259871e-06, + "loss": 0.0815, + "num_input_tokens_seen": 22020544, + "step": 32685 + }, + { + "epoch": 0.798622138616764, + "grad_norm": 10.598689079284668, + "learning_rate": 1.978352321941238e-06, + "loss": 0.0838, + "num_input_tokens_seen": 22023744, + "step": 32690 + }, + { + "epoch": 0.7987442894486112, + "grad_norm": 0.6789451241493225, + "learning_rate": 1.978334670507764e-06, + "loss": 0.0937, + "num_input_tokens_seen": 22026944, + "step": 32695 + }, + { + "epoch": 0.7988664402804583, + "grad_norm": 25.466279983520508, + "learning_rate": 1.9783170119595775e-06, + "loss": 0.1423, + "num_input_tokens_seen": 22030144, + "step": 32700 + }, + { + "epoch": 0.7989885911123055, + "grad_norm": 18.387006759643555, + "learning_rate": 1.978299346296806e-06, + "loss": 0.1669, + "num_input_tokens_seen": 22033408, + "step": 32705 + }, + { + "epoch": 0.7991107419441527, + "grad_norm": 13.358413696289062, + "learning_rate": 1.9782816735195786e-06, + "loss": 0.1384, + "num_input_tokens_seen": 22036928, + "step": 32710 + }, + { + "epoch": 0.7992328927759998, + "grad_norm": 10.378283500671387, + "learning_rate": 1.978263993628024e-06, + "loss": 0.201, + "num_input_tokens_seen": 22040512, + "step": 32715 + }, + { + "epoch": 0.7993550436078469, + "grad_norm": 0.48704561591148376, + "learning_rate": 1.9782463066222702e-06, + "loss": 0.0564, + "num_input_tokens_seen": 22043776, + "step": 32720 + }, + { + "epoch": 0.7994771944396941, + "grad_norm": 35.04000473022461, + "learning_rate": 1.9782286125024464e-06, + "loss": 0.1006, + "num_input_tokens_seen": 22046784, + "step": 32725 + }, + { + "epoch": 0.7995993452715413, + "grad_norm": 23.447757720947266, + "learning_rate": 1.9782109112686812e-06, + "loss": 0.125, + "num_input_tokens_seen": 22049856, + "step": 32730 + }, + { + "epoch": 0.7997214961033885, + "grad_norm": 27.612590789794922, + "learning_rate": 1.9781932029211027e-06, + "loss": 0.1607, + "num_input_tokens_seen": 22053056, + "step": 32735 + }, + { + "epoch": 0.7998436469352356, + "grad_norm": 24.697265625, + "learning_rate": 1.9781754874598403e-06, + "loss": 0.1071, + "num_input_tokens_seen": 22056128, + "step": 32740 + }, + { + "epoch": 0.7999657977670828, + "grad_norm": 43.93508529663086, + "learning_rate": 1.978157764885023e-06, + "loss": 0.1883, + "num_input_tokens_seen": 22059520, + "step": 32745 + }, + { + "epoch": 0.80008794859893, + "grad_norm": 2.7973132133483887, + "learning_rate": 1.9781400351967787e-06, + "loss": 0.0534, + "num_input_tokens_seen": 22063168, + "step": 32750 + }, + { + "epoch": 0.8002100994307771, + "grad_norm": 21.7699031829834, + "learning_rate": 1.9781222983952374e-06, + "loss": 0.199, + "num_input_tokens_seen": 22066496, + "step": 32755 + }, + { + "epoch": 0.8003322502626243, + "grad_norm": 0.8064271807670593, + "learning_rate": 1.9781045544805273e-06, + "loss": 0.0776, + "num_input_tokens_seen": 22069696, + "step": 32760 + }, + { + "epoch": 0.8004544010944714, + "grad_norm": 2.368868589401245, + "learning_rate": 1.9780868034527783e-06, + "loss": 0.0598, + "num_input_tokens_seen": 22073472, + "step": 32765 + }, + { + "epoch": 0.8005765519263186, + "grad_norm": 22.38524627685547, + "learning_rate": 1.9780690453121185e-06, + "loss": 0.1173, + "num_input_tokens_seen": 22076352, + "step": 32770 + }, + { + "epoch": 0.8006987027581658, + "grad_norm": 3.6442208290100098, + "learning_rate": 1.978051280058678e-06, + "loss": 0.0787, + "num_input_tokens_seen": 22079808, + "step": 32775 + }, + { + "epoch": 0.800820853590013, + "grad_norm": 17.011329650878906, + "learning_rate": 1.978033507692585e-06, + "loss": 0.1477, + "num_input_tokens_seen": 22082944, + "step": 32780 + }, + { + "epoch": 0.8009430044218601, + "grad_norm": 21.807342529296875, + "learning_rate": 1.9780157282139697e-06, + "loss": 0.1523, + "num_input_tokens_seen": 22086336, + "step": 32785 + }, + { + "epoch": 0.8010651552537073, + "grad_norm": 2.756991386413574, + "learning_rate": 1.977997941622961e-06, + "loss": 0.0947, + "num_input_tokens_seen": 22089280, + "step": 32790 + }, + { + "epoch": 0.8011873060855544, + "grad_norm": 34.63652420043945, + "learning_rate": 1.9779801479196877e-06, + "loss": 0.1376, + "num_input_tokens_seen": 22093120, + "step": 32795 + }, + { + "epoch": 0.8013094569174016, + "grad_norm": 0.5275918245315552, + "learning_rate": 1.97796234710428e-06, + "loss": 0.1301, + "num_input_tokens_seen": 22096640, + "step": 32800 + }, + { + "epoch": 0.8014316077492488, + "grad_norm": 29.089365005493164, + "learning_rate": 1.9779445391768673e-06, + "loss": 0.1151, + "num_input_tokens_seen": 22099776, + "step": 32805 + }, + { + "epoch": 0.8015537585810959, + "grad_norm": 0.1555165946483612, + "learning_rate": 1.9779267241375786e-06, + "loss": 0.0475, + "num_input_tokens_seen": 22102912, + "step": 32810 + }, + { + "epoch": 0.8016759094129431, + "grad_norm": 7.828727722167969, + "learning_rate": 1.977908901986544e-06, + "loss": 0.0806, + "num_input_tokens_seen": 22106304, + "step": 32815 + }, + { + "epoch": 0.8017980602447903, + "grad_norm": 14.934768676757812, + "learning_rate": 1.9778910727238925e-06, + "loss": 0.0678, + "num_input_tokens_seen": 22109568, + "step": 32820 + }, + { + "epoch": 0.8019202110766375, + "grad_norm": 20.33085823059082, + "learning_rate": 1.9778732363497544e-06, + "loss": 0.1423, + "num_input_tokens_seen": 22112960, + "step": 32825 + }, + { + "epoch": 0.8020423619084845, + "grad_norm": 18.72711181640625, + "learning_rate": 1.977855392864259e-06, + "loss": 0.0479, + "num_input_tokens_seen": 22116096, + "step": 32830 + }, + { + "epoch": 0.8021645127403317, + "grad_norm": 0.35270586609840393, + "learning_rate": 1.977837542267536e-06, + "loss": 0.0336, + "num_input_tokens_seen": 22119552, + "step": 32835 + }, + { + "epoch": 0.8022866635721789, + "grad_norm": 6.33324670791626, + "learning_rate": 1.9778196845597157e-06, + "loss": 0.0892, + "num_input_tokens_seen": 22122752, + "step": 32840 + }, + { + "epoch": 0.8024088144040261, + "grad_norm": 16.176767349243164, + "learning_rate": 1.977801819740928e-06, + "loss": 0.0598, + "num_input_tokens_seen": 22125952, + "step": 32845 + }, + { + "epoch": 0.8025309652358733, + "grad_norm": 25.649085998535156, + "learning_rate": 1.9777839478113015e-06, + "loss": 0.134, + "num_input_tokens_seen": 22129152, + "step": 32850 + }, + { + "epoch": 0.8026531160677204, + "grad_norm": 0.7280669808387756, + "learning_rate": 1.9777660687709678e-06, + "loss": 0.0702, + "num_input_tokens_seen": 22132416, + "step": 32855 + }, + { + "epoch": 0.8027752668995676, + "grad_norm": 10.644025802612305, + "learning_rate": 1.977748182620056e-06, + "loss": 0.1017, + "num_input_tokens_seen": 22135808, + "step": 32860 + }, + { + "epoch": 0.8028974177314148, + "grad_norm": 0.5144332647323608, + "learning_rate": 1.9777302893586966e-06, + "loss": 0.1627, + "num_input_tokens_seen": 22138944, + "step": 32865 + }, + { + "epoch": 0.803019568563262, + "grad_norm": 2.8789901733398438, + "learning_rate": 1.9777123889870197e-06, + "loss": 0.0645, + "num_input_tokens_seen": 22142272, + "step": 32870 + }, + { + "epoch": 0.803141719395109, + "grad_norm": 17.88456916809082, + "learning_rate": 1.9776944815051547e-06, + "loss": 0.1231, + "num_input_tokens_seen": 22145472, + "step": 32875 + }, + { + "epoch": 0.8032638702269562, + "grad_norm": 21.952150344848633, + "learning_rate": 1.977676566913233e-06, + "loss": 0.0995, + "num_input_tokens_seen": 22148608, + "step": 32880 + }, + { + "epoch": 0.8033860210588034, + "grad_norm": 0.8212730884552002, + "learning_rate": 1.9776586452113842e-06, + "loss": 0.1121, + "num_input_tokens_seen": 22152064, + "step": 32885 + }, + { + "epoch": 0.8035081718906506, + "grad_norm": 21.854928970336914, + "learning_rate": 1.977640716399739e-06, + "loss": 0.098, + "num_input_tokens_seen": 22155328, + "step": 32890 + }, + { + "epoch": 0.8036303227224978, + "grad_norm": 7.417600631713867, + "learning_rate": 1.9776227804784275e-06, + "loss": 0.0745, + "num_input_tokens_seen": 22158912, + "step": 32895 + }, + { + "epoch": 0.8037524735543449, + "grad_norm": 0.8115556240081787, + "learning_rate": 1.9776048374475794e-06, + "loss": 0.0898, + "num_input_tokens_seen": 22162112, + "step": 32900 + }, + { + "epoch": 0.8038746243861921, + "grad_norm": 40.99019241333008, + "learning_rate": 1.9775868873073267e-06, + "loss": 0.1985, + "num_input_tokens_seen": 22165504, + "step": 32905 + }, + { + "epoch": 0.8039967752180393, + "grad_norm": 1.1838963031768799, + "learning_rate": 1.9775689300577995e-06, + "loss": 0.038, + "num_input_tokens_seen": 22168832, + "step": 32910 + }, + { + "epoch": 0.8041189260498864, + "grad_norm": 16.456296920776367, + "learning_rate": 1.9775509656991277e-06, + "loss": 0.1404, + "num_input_tokens_seen": 22172096, + "step": 32915 + }, + { + "epoch": 0.8042410768817335, + "grad_norm": 18.349414825439453, + "learning_rate": 1.977532994231442e-06, + "loss": 0.0826, + "num_input_tokens_seen": 22175360, + "step": 32920 + }, + { + "epoch": 0.8043632277135807, + "grad_norm": 6.988818168640137, + "learning_rate": 1.9775150156548743e-06, + "loss": 0.0411, + "num_input_tokens_seen": 22179008, + "step": 32925 + }, + { + "epoch": 0.8044853785454279, + "grad_norm": 106.58795166015625, + "learning_rate": 1.977497029969554e-06, + "loss": 0.1473, + "num_input_tokens_seen": 22182336, + "step": 32930 + }, + { + "epoch": 0.8046075293772751, + "grad_norm": 22.680753707885742, + "learning_rate": 1.977479037175612e-06, + "loss": 0.0851, + "num_input_tokens_seen": 22185472, + "step": 32935 + }, + { + "epoch": 0.8047296802091223, + "grad_norm": 8.83067512512207, + "learning_rate": 1.97746103727318e-06, + "loss": 0.2425, + "num_input_tokens_seen": 22189120, + "step": 32940 + }, + { + "epoch": 0.8048518310409694, + "grad_norm": 27.481521606445312, + "learning_rate": 1.9774430302623887e-06, + "loss": 0.1671, + "num_input_tokens_seen": 22192768, + "step": 32945 + }, + { + "epoch": 0.8049739818728165, + "grad_norm": 41.317142486572266, + "learning_rate": 1.977425016143368e-06, + "loss": 0.2252, + "num_input_tokens_seen": 22196288, + "step": 32950 + }, + { + "epoch": 0.8050961327046637, + "grad_norm": 45.449501037597656, + "learning_rate": 1.9774069949162504e-06, + "loss": 0.1459, + "num_input_tokens_seen": 22199744, + "step": 32955 + }, + { + "epoch": 0.8052182835365109, + "grad_norm": 0.5199189186096191, + "learning_rate": 1.9773889665811657e-06, + "loss": 0.132, + "num_input_tokens_seen": 22203520, + "step": 32960 + }, + { + "epoch": 0.805340434368358, + "grad_norm": 18.819425582885742, + "learning_rate": 1.977370931138246e-06, + "loss": 0.0947, + "num_input_tokens_seen": 22206976, + "step": 32965 + }, + { + "epoch": 0.8054625852002052, + "grad_norm": 36.9178352355957, + "learning_rate": 1.977352888587622e-06, + "loss": 0.2265, + "num_input_tokens_seen": 22210496, + "step": 32970 + }, + { + "epoch": 0.8055847360320524, + "grad_norm": 18.44637680053711, + "learning_rate": 1.9773348389294243e-06, + "loss": 0.09, + "num_input_tokens_seen": 22213760, + "step": 32975 + }, + { + "epoch": 0.8057068868638996, + "grad_norm": 1.2746877670288086, + "learning_rate": 1.9773167821637855e-06, + "loss": 0.1072, + "num_input_tokens_seen": 22217152, + "step": 32980 + }, + { + "epoch": 0.8058290376957467, + "grad_norm": 2.1266651153564453, + "learning_rate": 1.977298718290836e-06, + "loss": 0.1145, + "num_input_tokens_seen": 22221056, + "step": 32985 + }, + { + "epoch": 0.8059511885275938, + "grad_norm": 0.1620056927204132, + "learning_rate": 1.9772806473107072e-06, + "loss": 0.0897, + "num_input_tokens_seen": 22224320, + "step": 32990 + }, + { + "epoch": 0.806073339359441, + "grad_norm": 40.33462905883789, + "learning_rate": 1.977262569223531e-06, + "loss": 0.1855, + "num_input_tokens_seen": 22227840, + "step": 32995 + }, + { + "epoch": 0.8061954901912882, + "grad_norm": 0.31348946690559387, + "learning_rate": 1.977244484029438e-06, + "loss": 0.0983, + "num_input_tokens_seen": 22230976, + "step": 33000 + }, + { + "epoch": 0.8063176410231354, + "grad_norm": 28.071033477783203, + "learning_rate": 1.9772263917285606e-06, + "loss": 0.1356, + "num_input_tokens_seen": 22234432, + "step": 33005 + }, + { + "epoch": 0.8064397918549825, + "grad_norm": 10.321966171264648, + "learning_rate": 1.97720829232103e-06, + "loss": 0.1449, + "num_input_tokens_seen": 22237824, + "step": 33010 + }, + { + "epoch": 0.8065619426868297, + "grad_norm": 0.7909249067306519, + "learning_rate": 1.9771901858069778e-06, + "loss": 0.1319, + "num_input_tokens_seen": 22241088, + "step": 33015 + }, + { + "epoch": 0.8066840935186769, + "grad_norm": 20.836950302124023, + "learning_rate": 1.9771720721865355e-06, + "loss": 0.1683, + "num_input_tokens_seen": 22244352, + "step": 33020 + }, + { + "epoch": 0.8068062443505241, + "grad_norm": 3.560326099395752, + "learning_rate": 1.977153951459836e-06, + "loss": 0.1275, + "num_input_tokens_seen": 22247552, + "step": 33025 + }, + { + "epoch": 0.8069283951823711, + "grad_norm": 0.5384361147880554, + "learning_rate": 1.977135823627009e-06, + "loss": 0.083, + "num_input_tokens_seen": 22251456, + "step": 33030 + }, + { + "epoch": 0.8070505460142183, + "grad_norm": 0.37456443905830383, + "learning_rate": 1.9771176886881883e-06, + "loss": 0.1143, + "num_input_tokens_seen": 22254656, + "step": 33035 + }, + { + "epoch": 0.8071726968460655, + "grad_norm": 6.5614237785339355, + "learning_rate": 1.9770995466435044e-06, + "loss": 0.0217, + "num_input_tokens_seen": 22257728, + "step": 33040 + }, + { + "epoch": 0.8072948476779127, + "grad_norm": 42.75928497314453, + "learning_rate": 1.97708139749309e-06, + "loss": 0.1372, + "num_input_tokens_seen": 22260992, + "step": 33045 + }, + { + "epoch": 0.8074169985097599, + "grad_norm": 0.1671907603740692, + "learning_rate": 1.977063241237077e-06, + "loss": 0.1286, + "num_input_tokens_seen": 22264128, + "step": 33050 + }, + { + "epoch": 0.807539149341607, + "grad_norm": 10.650352478027344, + "learning_rate": 1.9770450778755972e-06, + "loss": 0.103, + "num_input_tokens_seen": 22267392, + "step": 33055 + }, + { + "epoch": 0.8076613001734542, + "grad_norm": 16.05209732055664, + "learning_rate": 1.9770269074087825e-06, + "loss": 0.1014, + "num_input_tokens_seen": 22270720, + "step": 33060 + }, + { + "epoch": 0.8077834510053014, + "grad_norm": 0.3560777008533478, + "learning_rate": 1.9770087298367657e-06, + "loss": 0.0469, + "num_input_tokens_seen": 22273856, + "step": 33065 + }, + { + "epoch": 0.8079056018371485, + "grad_norm": 0.26360440254211426, + "learning_rate": 1.976990545159679e-06, + "loss": 0.088, + "num_input_tokens_seen": 22277248, + "step": 33070 + }, + { + "epoch": 0.8080277526689956, + "grad_norm": 7.372500419616699, + "learning_rate": 1.9769723533776537e-06, + "loss": 0.1035, + "num_input_tokens_seen": 22280512, + "step": 33075 + }, + { + "epoch": 0.8081499035008428, + "grad_norm": 12.81210708618164, + "learning_rate": 1.9769541544908228e-06, + "loss": 0.1061, + "num_input_tokens_seen": 22283776, + "step": 33080 + }, + { + "epoch": 0.80827205433269, + "grad_norm": 16.666837692260742, + "learning_rate": 1.9769359484993183e-06, + "loss": 0.1643, + "num_input_tokens_seen": 22288128, + "step": 33085 + }, + { + "epoch": 0.8083942051645372, + "grad_norm": 0.25864091515541077, + "learning_rate": 1.976917735403273e-06, + "loss": 0.0042, + "num_input_tokens_seen": 22291456, + "step": 33090 + }, + { + "epoch": 0.8085163559963844, + "grad_norm": 6.298408508300781, + "learning_rate": 1.976899515202819e-06, + "loss": 0.2322, + "num_input_tokens_seen": 22294528, + "step": 33095 + }, + { + "epoch": 0.8086385068282315, + "grad_norm": 0.8649539351463318, + "learning_rate": 1.9768812878980896e-06, + "loss": 0.0393, + "num_input_tokens_seen": 22297472, + "step": 33100 + }, + { + "epoch": 0.8087606576600787, + "grad_norm": 0.1666417419910431, + "learning_rate": 1.9768630534892164e-06, + "loss": 0.1869, + "num_input_tokens_seen": 22300672, + "step": 33105 + }, + { + "epoch": 0.8088828084919258, + "grad_norm": 20.9919376373291, + "learning_rate": 1.976844811976332e-06, + "loss": 0.0332, + "num_input_tokens_seen": 22304320, + "step": 33110 + }, + { + "epoch": 0.809004959323773, + "grad_norm": 17.010021209716797, + "learning_rate": 1.97682656335957e-06, + "loss": 0.1254, + "num_input_tokens_seen": 22307200, + "step": 33115 + }, + { + "epoch": 0.8091271101556201, + "grad_norm": 17.23734474182129, + "learning_rate": 1.976808307639062e-06, + "loss": 0.1566, + "num_input_tokens_seen": 22310464, + "step": 33120 + }, + { + "epoch": 0.8092492609874673, + "grad_norm": 19.25299644470215, + "learning_rate": 1.976790044814941e-06, + "loss": 0.1316, + "num_input_tokens_seen": 22313792, + "step": 33125 + }, + { + "epoch": 0.8093714118193145, + "grad_norm": 1.0612061023712158, + "learning_rate": 1.976771774887341e-06, + "loss": 0.0509, + "num_input_tokens_seen": 22317376, + "step": 33130 + }, + { + "epoch": 0.8094935626511617, + "grad_norm": 30.042892456054688, + "learning_rate": 1.976753497856393e-06, + "loss": 0.1537, + "num_input_tokens_seen": 22321024, + "step": 33135 + }, + { + "epoch": 0.8096157134830089, + "grad_norm": 0.48264002799987793, + "learning_rate": 1.9767352137222313e-06, + "loss": 0.0427, + "num_input_tokens_seen": 22324288, + "step": 33140 + }, + { + "epoch": 0.809737864314856, + "grad_norm": 14.422656059265137, + "learning_rate": 1.9767169224849884e-06, + "loss": 0.1251, + "num_input_tokens_seen": 22327360, + "step": 33145 + }, + { + "epoch": 0.8098600151467031, + "grad_norm": 19.12939453125, + "learning_rate": 1.9766986241447975e-06, + "loss": 0.1213, + "num_input_tokens_seen": 22330688, + "step": 33150 + }, + { + "epoch": 0.8099821659785503, + "grad_norm": 27.57765007019043, + "learning_rate": 1.9766803187017914e-06, + "loss": 0.1751, + "num_input_tokens_seen": 22334272, + "step": 33155 + }, + { + "epoch": 0.8101043168103975, + "grad_norm": 0.18605254590511322, + "learning_rate": 1.976662006156103e-06, + "loss": 0.0847, + "num_input_tokens_seen": 22337600, + "step": 33160 + }, + { + "epoch": 0.8102264676422446, + "grad_norm": 23.411142349243164, + "learning_rate": 1.9766436865078663e-06, + "loss": 0.1183, + "num_input_tokens_seen": 22340672, + "step": 33165 + }, + { + "epoch": 0.8103486184740918, + "grad_norm": 47.72703170776367, + "learning_rate": 1.9766253597572136e-06, + "loss": 0.2254, + "num_input_tokens_seen": 22343936, + "step": 33170 + }, + { + "epoch": 0.810470769305939, + "grad_norm": 12.8462553024292, + "learning_rate": 1.9766070259042786e-06, + "loss": 0.1445, + "num_input_tokens_seen": 22347008, + "step": 33175 + }, + { + "epoch": 0.8105929201377862, + "grad_norm": 19.043668746948242, + "learning_rate": 1.976588684949195e-06, + "loss": 0.1202, + "num_input_tokens_seen": 22350016, + "step": 33180 + }, + { + "epoch": 0.8107150709696334, + "grad_norm": 8.385092735290527, + "learning_rate": 1.9765703368920958e-06, + "loss": 0.1223, + "num_input_tokens_seen": 22353408, + "step": 33185 + }, + { + "epoch": 0.8108372218014804, + "grad_norm": 10.078439712524414, + "learning_rate": 1.976551981733114e-06, + "loss": 0.0386, + "num_input_tokens_seen": 22356736, + "step": 33190 + }, + { + "epoch": 0.8109593726333276, + "grad_norm": 12.328940391540527, + "learning_rate": 1.9765336194723836e-06, + "loss": 0.1587, + "num_input_tokens_seen": 22360384, + "step": 33195 + }, + { + "epoch": 0.8110815234651748, + "grad_norm": 0.8422468900680542, + "learning_rate": 1.9765152501100386e-06, + "loss": 0.1238, + "num_input_tokens_seen": 22363904, + "step": 33200 + }, + { + "epoch": 0.811203674297022, + "grad_norm": 2.3422234058380127, + "learning_rate": 1.9764968736462116e-06, + "loss": 0.0939, + "num_input_tokens_seen": 22367488, + "step": 33205 + }, + { + "epoch": 0.8113258251288691, + "grad_norm": 0.6595805287361145, + "learning_rate": 1.9764784900810367e-06, + "loss": 0.0102, + "num_input_tokens_seen": 22370560, + "step": 33210 + }, + { + "epoch": 0.8114479759607163, + "grad_norm": 2.1569666862487793, + "learning_rate": 1.9764600994146474e-06, + "loss": 0.0797, + "num_input_tokens_seen": 22374336, + "step": 33215 + }, + { + "epoch": 0.8115701267925635, + "grad_norm": 15.887921333312988, + "learning_rate": 1.976441701647178e-06, + "loss": 0.1435, + "num_input_tokens_seen": 22377792, + "step": 33220 + }, + { + "epoch": 0.8116922776244107, + "grad_norm": 0.7121223211288452, + "learning_rate": 1.976423296778762e-06, + "loss": 0.082, + "num_input_tokens_seen": 22380800, + "step": 33225 + }, + { + "epoch": 0.8118144284562578, + "grad_norm": 33.41606903076172, + "learning_rate": 1.976404884809533e-06, + "loss": 0.1396, + "num_input_tokens_seen": 22384128, + "step": 33230 + }, + { + "epoch": 0.8119365792881049, + "grad_norm": 31.53432846069336, + "learning_rate": 1.9763864657396247e-06, + "loss": 0.0456, + "num_input_tokens_seen": 22388032, + "step": 33235 + }, + { + "epoch": 0.8120587301199521, + "grad_norm": 0.27628836035728455, + "learning_rate": 1.976368039569172e-06, + "loss": 0.1648, + "num_input_tokens_seen": 22391424, + "step": 33240 + }, + { + "epoch": 0.8121808809517993, + "grad_norm": 42.41301345825195, + "learning_rate": 1.976349606298308e-06, + "loss": 0.1858, + "num_input_tokens_seen": 22395136, + "step": 33245 + }, + { + "epoch": 0.8123030317836465, + "grad_norm": 5.459909439086914, + "learning_rate": 1.9763311659271672e-06, + "loss": 0.0211, + "num_input_tokens_seen": 22398592, + "step": 33250 + }, + { + "epoch": 0.8124251826154936, + "grad_norm": 14.837047576904297, + "learning_rate": 1.976312718455883e-06, + "loss": 0.1305, + "num_input_tokens_seen": 22402368, + "step": 33255 + }, + { + "epoch": 0.8125473334473408, + "grad_norm": 33.13792037963867, + "learning_rate": 1.976294263884591e-06, + "loss": 0.0898, + "num_input_tokens_seen": 22406144, + "step": 33260 + }, + { + "epoch": 0.812669484279188, + "grad_norm": 1.2484996318817139, + "learning_rate": 1.9762758022134236e-06, + "loss": 0.0859, + "num_input_tokens_seen": 22409408, + "step": 33265 + }, + { + "epoch": 0.8127916351110351, + "grad_norm": 5.0173258781433105, + "learning_rate": 1.9762573334425164e-06, + "loss": 0.1138, + "num_input_tokens_seen": 22412736, + "step": 33270 + }, + { + "epoch": 0.8129137859428822, + "grad_norm": 25.858051300048828, + "learning_rate": 1.9762388575720035e-06, + "loss": 0.1897, + "num_input_tokens_seen": 22415936, + "step": 33275 + }, + { + "epoch": 0.8130359367747294, + "grad_norm": 13.351430892944336, + "learning_rate": 1.976220374602019e-06, + "loss": 0.2099, + "num_input_tokens_seen": 22419456, + "step": 33280 + }, + { + "epoch": 0.8131580876065766, + "grad_norm": 21.063621520996094, + "learning_rate": 1.976201884532697e-06, + "loss": 0.2129, + "num_input_tokens_seen": 22422720, + "step": 33285 + }, + { + "epoch": 0.8132802384384238, + "grad_norm": 13.48155689239502, + "learning_rate": 1.9761833873641727e-06, + "loss": 0.0426, + "num_input_tokens_seen": 22427072, + "step": 33290 + }, + { + "epoch": 0.813402389270271, + "grad_norm": 0.40031149983406067, + "learning_rate": 1.9761648830965804e-06, + "loss": 0.0406, + "num_input_tokens_seen": 22430656, + "step": 33295 + }, + { + "epoch": 0.813524540102118, + "grad_norm": 40.440528869628906, + "learning_rate": 1.976146371730054e-06, + "loss": 0.1687, + "num_input_tokens_seen": 22433920, + "step": 33300 + }, + { + "epoch": 0.8136466909339652, + "grad_norm": 0.33487313985824585, + "learning_rate": 1.976127853264729e-06, + "loss": 0.0645, + "num_input_tokens_seen": 22437632, + "step": 33305 + }, + { + "epoch": 0.8137688417658124, + "grad_norm": 41.765663146972656, + "learning_rate": 1.9761093277007394e-06, + "loss": 0.1122, + "num_input_tokens_seen": 22440704, + "step": 33310 + }, + { + "epoch": 0.8138909925976596, + "grad_norm": 11.581916809082031, + "learning_rate": 1.9760907950382204e-06, + "loss": 0.1018, + "num_input_tokens_seen": 22444032, + "step": 33315 + }, + { + "epoch": 0.8140131434295067, + "grad_norm": 1.0770961046218872, + "learning_rate": 1.9760722552773066e-06, + "loss": 0.0939, + "num_input_tokens_seen": 22447488, + "step": 33320 + }, + { + "epoch": 0.8141352942613539, + "grad_norm": 13.341182708740234, + "learning_rate": 1.976053708418133e-06, + "loss": 0.1864, + "num_input_tokens_seen": 22450880, + "step": 33325 + }, + { + "epoch": 0.8142574450932011, + "grad_norm": 2.477572441101074, + "learning_rate": 1.9760351544608343e-06, + "loss": 0.0875, + "num_input_tokens_seen": 22453888, + "step": 33330 + }, + { + "epoch": 0.8143795959250483, + "grad_norm": 0.025869879871606827, + "learning_rate": 1.976016593405545e-06, + "loss": 0.1386, + "num_input_tokens_seen": 22457152, + "step": 33335 + }, + { + "epoch": 0.8145017467568955, + "grad_norm": 38.82683563232422, + "learning_rate": 1.9759980252524007e-06, + "loss": 0.1697, + "num_input_tokens_seen": 22460288, + "step": 33340 + }, + { + "epoch": 0.8146238975887425, + "grad_norm": 11.620903015136719, + "learning_rate": 1.9759794500015365e-06, + "loss": 0.0614, + "num_input_tokens_seen": 22463424, + "step": 33345 + }, + { + "epoch": 0.8147460484205897, + "grad_norm": 1.6850146055221558, + "learning_rate": 1.9759608676530872e-06, + "loss": 0.1574, + "num_input_tokens_seen": 22466432, + "step": 33350 + }, + { + "epoch": 0.8148681992524369, + "grad_norm": 9.8765869140625, + "learning_rate": 1.975942278207188e-06, + "loss": 0.0647, + "num_input_tokens_seen": 22469440, + "step": 33355 + }, + { + "epoch": 0.8149903500842841, + "grad_norm": 33.7421760559082, + "learning_rate": 1.9759236816639733e-06, + "loss": 0.0556, + "num_input_tokens_seen": 22472256, + "step": 33360 + }, + { + "epoch": 0.8151125009161312, + "grad_norm": 17.823688507080078, + "learning_rate": 1.97590507802358e-06, + "loss": 0.0245, + "num_input_tokens_seen": 22475904, + "step": 33365 + }, + { + "epoch": 0.8152346517479784, + "grad_norm": 3.1829872131347656, + "learning_rate": 1.9758864672861423e-06, + "loss": 0.0503, + "num_input_tokens_seen": 22479040, + "step": 33370 + }, + { + "epoch": 0.8153568025798256, + "grad_norm": 31.175722122192383, + "learning_rate": 1.9758678494517957e-06, + "loss": 0.2484, + "num_input_tokens_seen": 22482816, + "step": 33375 + }, + { + "epoch": 0.8154789534116728, + "grad_norm": 8.305288314819336, + "learning_rate": 1.9758492245206754e-06, + "loss": 0.1218, + "num_input_tokens_seen": 22485824, + "step": 33380 + }, + { + "epoch": 0.81560110424352, + "grad_norm": 28.994346618652344, + "learning_rate": 1.975830592492917e-06, + "loss": 0.1422, + "num_input_tokens_seen": 22489024, + "step": 33385 + }, + { + "epoch": 0.815723255075367, + "grad_norm": 36.34081268310547, + "learning_rate": 1.9758119533686565e-06, + "loss": 0.0978, + "num_input_tokens_seen": 22492480, + "step": 33390 + }, + { + "epoch": 0.8158454059072142, + "grad_norm": 0.08920477330684662, + "learning_rate": 1.9757933071480285e-06, + "loss": 0.0414, + "num_input_tokens_seen": 22495616, + "step": 33395 + }, + { + "epoch": 0.8159675567390614, + "grad_norm": 21.8082275390625, + "learning_rate": 1.9757746538311694e-06, + "loss": 0.0998, + "num_input_tokens_seen": 22499072, + "step": 33400 + }, + { + "epoch": 0.8160897075709086, + "grad_norm": 0.20586279034614563, + "learning_rate": 1.9757559934182146e-06, + "loss": 0.0641, + "num_input_tokens_seen": 22502656, + "step": 33405 + }, + { + "epoch": 0.8162118584027557, + "grad_norm": 25.77524757385254, + "learning_rate": 1.9757373259092998e-06, + "loss": 0.1152, + "num_input_tokens_seen": 22506112, + "step": 33410 + }, + { + "epoch": 0.8163340092346029, + "grad_norm": 0.29158711433410645, + "learning_rate": 1.9757186513045604e-06, + "loss": 0.1796, + "num_input_tokens_seen": 22509376, + "step": 33415 + }, + { + "epoch": 0.81645616006645, + "grad_norm": 2.5156776905059814, + "learning_rate": 1.975699969604133e-06, + "loss": 0.1909, + "num_input_tokens_seen": 22513024, + "step": 33420 + }, + { + "epoch": 0.8165783108982972, + "grad_norm": 4.3752121925354, + "learning_rate": 1.9756812808081527e-06, + "loss": 0.1162, + "num_input_tokens_seen": 22516544, + "step": 33425 + }, + { + "epoch": 0.8167004617301444, + "grad_norm": 38.32486343383789, + "learning_rate": 1.975662584916756e-06, + "loss": 0.0691, + "num_input_tokens_seen": 22519808, + "step": 33430 + }, + { + "epoch": 0.8168226125619915, + "grad_norm": 43.266361236572266, + "learning_rate": 1.975643881930078e-06, + "loss": 0.1068, + "num_input_tokens_seen": 22523264, + "step": 33435 + }, + { + "epoch": 0.8169447633938387, + "grad_norm": 22.88056755065918, + "learning_rate": 1.9756251718482558e-06, + "loss": 0.0976, + "num_input_tokens_seen": 22526592, + "step": 33440 + }, + { + "epoch": 0.8170669142256859, + "grad_norm": 0.9959394335746765, + "learning_rate": 1.975606454671425e-06, + "loss": 0.0408, + "num_input_tokens_seen": 22530304, + "step": 33445 + }, + { + "epoch": 0.8171890650575331, + "grad_norm": 2.919494152069092, + "learning_rate": 1.975587730399721e-06, + "loss": 0.1699, + "num_input_tokens_seen": 22533568, + "step": 33450 + }, + { + "epoch": 0.8173112158893802, + "grad_norm": 50.999454498291016, + "learning_rate": 1.9755689990332813e-06, + "loss": 0.1904, + "num_input_tokens_seen": 22536704, + "step": 33455 + }, + { + "epoch": 0.8174333667212274, + "grad_norm": 0.2796314060688019, + "learning_rate": 1.975550260572241e-06, + "loss": 0.0707, + "num_input_tokens_seen": 22540416, + "step": 33460 + }, + { + "epoch": 0.8175555175530745, + "grad_norm": 40.962093353271484, + "learning_rate": 1.975531515016737e-06, + "loss": 0.0283, + "num_input_tokens_seen": 22543680, + "step": 33465 + }, + { + "epoch": 0.8176776683849217, + "grad_norm": 3.3434290885925293, + "learning_rate": 1.9755127623669053e-06, + "loss": 0.1277, + "num_input_tokens_seen": 22547264, + "step": 33470 + }, + { + "epoch": 0.8177998192167689, + "grad_norm": 4.9675703048706055, + "learning_rate": 1.9754940026228826e-06, + "loss": 0.1602, + "num_input_tokens_seen": 22550784, + "step": 33475 + }, + { + "epoch": 0.817921970048616, + "grad_norm": 17.3238468170166, + "learning_rate": 1.975475235784805e-06, + "loss": 0.2102, + "num_input_tokens_seen": 22554048, + "step": 33480 + }, + { + "epoch": 0.8180441208804632, + "grad_norm": 34.665199279785156, + "learning_rate": 1.975456461852809e-06, + "loss": 0.0947, + "num_input_tokens_seen": 22556864, + "step": 33485 + }, + { + "epoch": 0.8181662717123104, + "grad_norm": 19.935836791992188, + "learning_rate": 1.9754376808270316e-06, + "loss": 0.0851, + "num_input_tokens_seen": 22560192, + "step": 33490 + }, + { + "epoch": 0.8182884225441576, + "grad_norm": 4.1569647789001465, + "learning_rate": 1.975418892707609e-06, + "loss": 0.1027, + "num_input_tokens_seen": 22563584, + "step": 33495 + }, + { + "epoch": 0.8184105733760046, + "grad_norm": 0.48780179023742676, + "learning_rate": 1.975400097494678e-06, + "loss": 0.0578, + "num_input_tokens_seen": 22566720, + "step": 33500 + }, + { + "epoch": 0.8185327242078518, + "grad_norm": 2.4946086406707764, + "learning_rate": 1.9753812951883744e-06, + "loss": 0.1196, + "num_input_tokens_seen": 22570048, + "step": 33505 + }, + { + "epoch": 0.818654875039699, + "grad_norm": 16.733564376831055, + "learning_rate": 1.9753624857888362e-06, + "loss": 0.0979, + "num_input_tokens_seen": 22573376, + "step": 33510 + }, + { + "epoch": 0.8187770258715462, + "grad_norm": 4.550495624542236, + "learning_rate": 1.9753436692961992e-06, + "loss": 0.0704, + "num_input_tokens_seen": 22576704, + "step": 33515 + }, + { + "epoch": 0.8188991767033934, + "grad_norm": 0.7165770530700684, + "learning_rate": 1.975324845710601e-06, + "loss": 0.1453, + "num_input_tokens_seen": 22579840, + "step": 33520 + }, + { + "epoch": 0.8190213275352405, + "grad_norm": 16.152631759643555, + "learning_rate": 1.9753060150321786e-06, + "loss": 0.051, + "num_input_tokens_seen": 22583104, + "step": 33525 + }, + { + "epoch": 0.8191434783670877, + "grad_norm": 7.786255359649658, + "learning_rate": 1.975287177261068e-06, + "loss": 0.1462, + "num_input_tokens_seen": 22587008, + "step": 33530 + }, + { + "epoch": 0.8192656291989349, + "grad_norm": 19.497220993041992, + "learning_rate": 1.975268332397407e-06, + "loss": 0.1047, + "num_input_tokens_seen": 22590528, + "step": 33535 + }, + { + "epoch": 0.819387780030782, + "grad_norm": 0.2000948041677475, + "learning_rate": 1.975249480441332e-06, + "loss": 0.0521, + "num_input_tokens_seen": 22593600, + "step": 33540 + }, + { + "epoch": 0.8195099308626291, + "grad_norm": 45.75498580932617, + "learning_rate": 1.975230621392981e-06, + "loss": 0.1804, + "num_input_tokens_seen": 22596672, + "step": 33545 + }, + { + "epoch": 0.8196320816944763, + "grad_norm": 7.454740047454834, + "learning_rate": 1.9752117552524905e-06, + "loss": 0.0828, + "num_input_tokens_seen": 22600000, + "step": 33550 + }, + { + "epoch": 0.8197542325263235, + "grad_norm": 15.156696319580078, + "learning_rate": 1.9751928820199976e-06, + "loss": 0.1668, + "num_input_tokens_seen": 22603392, + "step": 33555 + }, + { + "epoch": 0.8198763833581707, + "grad_norm": 0.2669239938259125, + "learning_rate": 1.97517400169564e-06, + "loss": 0.1005, + "num_input_tokens_seen": 22606784, + "step": 33560 + }, + { + "epoch": 0.8199985341900178, + "grad_norm": 3.1211867332458496, + "learning_rate": 1.9751551142795545e-06, + "loss": 0.1006, + "num_input_tokens_seen": 22610112, + "step": 33565 + }, + { + "epoch": 0.820120685021865, + "grad_norm": 21.486875534057617, + "learning_rate": 1.975136219771879e-06, + "loss": 0.1059, + "num_input_tokens_seen": 22613312, + "step": 33570 + }, + { + "epoch": 0.8202428358537122, + "grad_norm": 0.183716282248497, + "learning_rate": 1.97511731817275e-06, + "loss": 0.1553, + "num_input_tokens_seen": 22616448, + "step": 33575 + }, + { + "epoch": 0.8203649866855593, + "grad_norm": 41.991580963134766, + "learning_rate": 1.9750984094823065e-06, + "loss": 0.1131, + "num_input_tokens_seen": 22620032, + "step": 33580 + }, + { + "epoch": 0.8204871375174065, + "grad_norm": 1.0276497602462769, + "learning_rate": 1.9750794937006847e-06, + "loss": 0.0494, + "num_input_tokens_seen": 22623296, + "step": 33585 + }, + { + "epoch": 0.8206092883492536, + "grad_norm": 13.941420555114746, + "learning_rate": 1.9750605708280224e-06, + "loss": 0.1359, + "num_input_tokens_seen": 22626560, + "step": 33590 + }, + { + "epoch": 0.8207314391811008, + "grad_norm": 27.367237091064453, + "learning_rate": 1.9750416408644573e-06, + "loss": 0.0691, + "num_input_tokens_seen": 22629696, + "step": 33595 + }, + { + "epoch": 0.820853590012948, + "grad_norm": 8.477364540100098, + "learning_rate": 1.9750227038101273e-06, + "loss": 0.0392, + "num_input_tokens_seen": 22632832, + "step": 33600 + }, + { + "epoch": 0.8209757408447952, + "grad_norm": 21.650747299194336, + "learning_rate": 1.9750037596651702e-06, + "loss": 0.0865, + "num_input_tokens_seen": 22636224, + "step": 33605 + }, + { + "epoch": 0.8210978916766423, + "grad_norm": 9.361983299255371, + "learning_rate": 1.974984808429723e-06, + "loss": 0.2274, + "num_input_tokens_seen": 22639744, + "step": 33610 + }, + { + "epoch": 0.8212200425084895, + "grad_norm": 23.482181549072266, + "learning_rate": 1.9749658501039247e-06, + "loss": 0.1166, + "num_input_tokens_seen": 22643456, + "step": 33615 + }, + { + "epoch": 0.8213421933403366, + "grad_norm": 16.769548416137695, + "learning_rate": 1.974946884687912e-06, + "loss": 0.1583, + "num_input_tokens_seen": 22647104, + "step": 33620 + }, + { + "epoch": 0.8214643441721838, + "grad_norm": 1.3182966709136963, + "learning_rate": 1.9749279121818236e-06, + "loss": 0.1557, + "num_input_tokens_seen": 22650240, + "step": 33625 + }, + { + "epoch": 0.821586495004031, + "grad_norm": 31.42665672302246, + "learning_rate": 1.974908932585797e-06, + "loss": 0.1548, + "num_input_tokens_seen": 22653440, + "step": 33630 + }, + { + "epoch": 0.8217086458358781, + "grad_norm": 5.566858768463135, + "learning_rate": 1.9748899458999706e-06, + "loss": 0.0919, + "num_input_tokens_seen": 22656960, + "step": 33635 + }, + { + "epoch": 0.8218307966677253, + "grad_norm": 0.2816479802131653, + "learning_rate": 1.974870952124482e-06, + "loss": 0.0835, + "num_input_tokens_seen": 22660160, + "step": 33640 + }, + { + "epoch": 0.8219529474995725, + "grad_norm": 4.983752250671387, + "learning_rate": 1.9748519512594697e-06, + "loss": 0.1284, + "num_input_tokens_seen": 22663360, + "step": 33645 + }, + { + "epoch": 0.8220750983314197, + "grad_norm": 2.6677727699279785, + "learning_rate": 1.974832943305072e-06, + "loss": 0.0585, + "num_input_tokens_seen": 22666624, + "step": 33650 + }, + { + "epoch": 0.8221972491632668, + "grad_norm": 6.633481025695801, + "learning_rate": 1.974813928261427e-06, + "loss": 0.0665, + "num_input_tokens_seen": 22670080, + "step": 33655 + }, + { + "epoch": 0.8223193999951139, + "grad_norm": 1.056289792060852, + "learning_rate": 1.9747949061286724e-06, + "loss": 0.0723, + "num_input_tokens_seen": 22673088, + "step": 33660 + }, + { + "epoch": 0.8224415508269611, + "grad_norm": 20.022216796875, + "learning_rate": 1.9747758769069477e-06, + "loss": 0.1446, + "num_input_tokens_seen": 22676288, + "step": 33665 + }, + { + "epoch": 0.8225637016588083, + "grad_norm": 7.566336154937744, + "learning_rate": 1.9747568405963902e-06, + "loss": 0.3086, + "num_input_tokens_seen": 22679552, + "step": 33670 + }, + { + "epoch": 0.8226858524906555, + "grad_norm": 23.478660583496094, + "learning_rate": 1.974737797197139e-06, + "loss": 0.1643, + "num_input_tokens_seen": 22682752, + "step": 33675 + }, + { + "epoch": 0.8228080033225026, + "grad_norm": 4.6182355880737305, + "learning_rate": 1.9747187467093324e-06, + "loss": 0.0487, + "num_input_tokens_seen": 22685696, + "step": 33680 + }, + { + "epoch": 0.8229301541543498, + "grad_norm": 1.7821851968765259, + "learning_rate": 1.9746996891331086e-06, + "loss": 0.1025, + "num_input_tokens_seen": 22689536, + "step": 33685 + }, + { + "epoch": 0.823052304986197, + "grad_norm": 28.503374099731445, + "learning_rate": 1.974680624468607e-06, + "loss": 0.1342, + "num_input_tokens_seen": 22692928, + "step": 33690 + }, + { + "epoch": 0.8231744558180442, + "grad_norm": 0.768397867679596, + "learning_rate": 1.974661552715965e-06, + "loss": 0.1626, + "num_input_tokens_seen": 22695936, + "step": 33695 + }, + { + "epoch": 0.8232966066498912, + "grad_norm": 0.2706652879714966, + "learning_rate": 1.9746424738753225e-06, + "loss": 0.0827, + "num_input_tokens_seen": 22698816, + "step": 33700 + }, + { + "epoch": 0.8234187574817384, + "grad_norm": 6.46179723739624, + "learning_rate": 1.974623387946818e-06, + "loss": 0.0661, + "num_input_tokens_seen": 22702528, + "step": 33705 + }, + { + "epoch": 0.8235409083135856, + "grad_norm": 17.515541076660156, + "learning_rate": 1.97460429493059e-06, + "loss": 0.0431, + "num_input_tokens_seen": 22705856, + "step": 33710 + }, + { + "epoch": 0.8236630591454328, + "grad_norm": 21.961942672729492, + "learning_rate": 1.974585194826777e-06, + "loss": 0.1104, + "num_input_tokens_seen": 22708992, + "step": 33715 + }, + { + "epoch": 0.82378520997728, + "grad_norm": 15.293503761291504, + "learning_rate": 1.9745660876355187e-06, + "loss": 0.1426, + "num_input_tokens_seen": 22712128, + "step": 33720 + }, + { + "epoch": 0.8239073608091271, + "grad_norm": 6.357548236846924, + "learning_rate": 1.9745469733569536e-06, + "loss": 0.0338, + "num_input_tokens_seen": 22715392, + "step": 33725 + }, + { + "epoch": 0.8240295116409743, + "grad_norm": 29.685779571533203, + "learning_rate": 1.9745278519912206e-06, + "loss": 0.1035, + "num_input_tokens_seen": 22719232, + "step": 33730 + }, + { + "epoch": 0.8241516624728215, + "grad_norm": 46.714149475097656, + "learning_rate": 1.9745087235384596e-06, + "loss": 0.1042, + "num_input_tokens_seen": 22722240, + "step": 33735 + }, + { + "epoch": 0.8242738133046686, + "grad_norm": 15.977401733398438, + "learning_rate": 1.9744895879988085e-06, + "loss": 0.1429, + "num_input_tokens_seen": 22725632, + "step": 33740 + }, + { + "epoch": 0.8243959641365157, + "grad_norm": 51.249656677246094, + "learning_rate": 1.974470445372407e-06, + "loss": 0.194, + "num_input_tokens_seen": 22728640, + "step": 33745 + }, + { + "epoch": 0.8245181149683629, + "grad_norm": 18.208505630493164, + "learning_rate": 1.9744512956593943e-06, + "loss": 0.0791, + "num_input_tokens_seen": 22731904, + "step": 33750 + }, + { + "epoch": 0.8246402658002101, + "grad_norm": 11.103679656982422, + "learning_rate": 1.97443213885991e-06, + "loss": 0.1784, + "num_input_tokens_seen": 22735040, + "step": 33755 + }, + { + "epoch": 0.8247624166320573, + "grad_norm": 16.442033767700195, + "learning_rate": 1.9744129749740925e-06, + "loss": 0.196, + "num_input_tokens_seen": 22738688, + "step": 33760 + }, + { + "epoch": 0.8248845674639045, + "grad_norm": 4.221897602081299, + "learning_rate": 1.974393804002082e-06, + "loss": 0.0673, + "num_input_tokens_seen": 22741696, + "step": 33765 + }, + { + "epoch": 0.8250067182957516, + "grad_norm": 28.284793853759766, + "learning_rate": 1.974374625944018e-06, + "loss": 0.1446, + "num_input_tokens_seen": 22745216, + "step": 33770 + }, + { + "epoch": 0.8251288691275988, + "grad_norm": 13.259180068969727, + "learning_rate": 1.9743554408000394e-06, + "loss": 0.1043, + "num_input_tokens_seen": 22748352, + "step": 33775 + }, + { + "epoch": 0.8252510199594459, + "grad_norm": 11.632582664489746, + "learning_rate": 1.974336248570286e-06, + "loss": 0.156, + "num_input_tokens_seen": 22751744, + "step": 33780 + }, + { + "epoch": 0.8253731707912931, + "grad_norm": 0.9505820870399475, + "learning_rate": 1.9743170492548974e-06, + "loss": 0.1163, + "num_input_tokens_seen": 22755712, + "step": 33785 + }, + { + "epoch": 0.8254953216231402, + "grad_norm": 7.451425552368164, + "learning_rate": 1.9742978428540132e-06, + "loss": 0.126, + "num_input_tokens_seen": 22759232, + "step": 33790 + }, + { + "epoch": 0.8256174724549874, + "grad_norm": 15.902039527893066, + "learning_rate": 1.974278629367773e-06, + "loss": 0.0943, + "num_input_tokens_seen": 22762688, + "step": 33795 + }, + { + "epoch": 0.8257396232868346, + "grad_norm": 17.254871368408203, + "learning_rate": 1.974259408796317e-06, + "loss": 0.0327, + "num_input_tokens_seen": 22766144, + "step": 33800 + }, + { + "epoch": 0.8258617741186818, + "grad_norm": 12.781633377075195, + "learning_rate": 1.9742401811397834e-06, + "loss": 0.0922, + "num_input_tokens_seen": 22769216, + "step": 33805 + }, + { + "epoch": 0.8259839249505289, + "grad_norm": 4.0394182205200195, + "learning_rate": 1.9742209463983143e-06, + "loss": 0.0987, + "num_input_tokens_seen": 22772864, + "step": 33810 + }, + { + "epoch": 0.826106075782376, + "grad_norm": 23.54717445373535, + "learning_rate": 1.9742017045720474e-06, + "loss": 0.0945, + "num_input_tokens_seen": 22775872, + "step": 33815 + }, + { + "epoch": 0.8262282266142232, + "grad_norm": 41.05208969116211, + "learning_rate": 1.9741824556611245e-06, + "loss": 0.049, + "num_input_tokens_seen": 22779456, + "step": 33820 + }, + { + "epoch": 0.8263503774460704, + "grad_norm": 7.894565582275391, + "learning_rate": 1.9741631996656846e-06, + "loss": 0.0323, + "num_input_tokens_seen": 22782528, + "step": 33825 + }, + { + "epoch": 0.8264725282779176, + "grad_norm": 13.551430702209473, + "learning_rate": 1.9741439365858677e-06, + "loss": 0.1273, + "num_input_tokens_seen": 22785664, + "step": 33830 + }, + { + "epoch": 0.8265946791097647, + "grad_norm": 51.72720718383789, + "learning_rate": 1.974124666421814e-06, + "loss": 0.1618, + "num_input_tokens_seen": 22788992, + "step": 33835 + }, + { + "epoch": 0.8267168299416119, + "grad_norm": 0.09919314086437225, + "learning_rate": 1.974105389173664e-06, + "loss": 0.1561, + "num_input_tokens_seen": 22792064, + "step": 33840 + }, + { + "epoch": 0.8268389807734591, + "grad_norm": 0.31595703959465027, + "learning_rate": 1.974086104841557e-06, + "loss": 0.0267, + "num_input_tokens_seen": 22795584, + "step": 33845 + }, + { + "epoch": 0.8269611316053063, + "grad_norm": 14.558001518249512, + "learning_rate": 1.974066813425635e-06, + "loss": 0.1838, + "num_input_tokens_seen": 22799104, + "step": 33850 + }, + { + "epoch": 0.8270832824371533, + "grad_norm": 4.938451766967773, + "learning_rate": 1.9740475149260364e-06, + "loss": 0.0809, + "num_input_tokens_seen": 22802112, + "step": 33855 + }, + { + "epoch": 0.8272054332690005, + "grad_norm": 1.022783637046814, + "learning_rate": 1.974028209342902e-06, + "loss": 0.0087, + "num_input_tokens_seen": 22805824, + "step": 33860 + }, + { + "epoch": 0.8273275841008477, + "grad_norm": 18.273351669311523, + "learning_rate": 1.974008896676373e-06, + "loss": 0.0718, + "num_input_tokens_seen": 22808768, + "step": 33865 + }, + { + "epoch": 0.8274497349326949, + "grad_norm": 6.545739650726318, + "learning_rate": 1.973989576926589e-06, + "loss": 0.1391, + "num_input_tokens_seen": 22812096, + "step": 33870 + }, + { + "epoch": 0.8275718857645421, + "grad_norm": 9.688802719116211, + "learning_rate": 1.973970250093691e-06, + "loss": 0.1762, + "num_input_tokens_seen": 22815296, + "step": 33875 + }, + { + "epoch": 0.8276940365963892, + "grad_norm": 40.52885818481445, + "learning_rate": 1.9739509161778196e-06, + "loss": 0.1399, + "num_input_tokens_seen": 22818944, + "step": 33880 + }, + { + "epoch": 0.8278161874282364, + "grad_norm": 16.736942291259766, + "learning_rate": 1.9739315751791146e-06, + "loss": 0.084, + "num_input_tokens_seen": 22822272, + "step": 33885 + }, + { + "epoch": 0.8279383382600836, + "grad_norm": 0.25654950737953186, + "learning_rate": 1.973912227097718e-06, + "loss": 0.0412, + "num_input_tokens_seen": 22825792, + "step": 33890 + }, + { + "epoch": 0.8280604890919308, + "grad_norm": 5.899317741394043, + "learning_rate": 1.9738928719337695e-06, + "loss": 0.1633, + "num_input_tokens_seen": 22829632, + "step": 33895 + }, + { + "epoch": 0.8281826399237778, + "grad_norm": 36.036251068115234, + "learning_rate": 1.97387350968741e-06, + "loss": 0.1873, + "num_input_tokens_seen": 22833152, + "step": 33900 + }, + { + "epoch": 0.828304790755625, + "grad_norm": 0.2765234410762787, + "learning_rate": 1.97385414035878e-06, + "loss": 0.1141, + "num_input_tokens_seen": 22836736, + "step": 33905 + }, + { + "epoch": 0.8284269415874722, + "grad_norm": 9.578060150146484, + "learning_rate": 1.973834763948021e-06, + "loss": 0.0695, + "num_input_tokens_seen": 22840064, + "step": 33910 + }, + { + "epoch": 0.8285490924193194, + "grad_norm": 1.5241056680679321, + "learning_rate": 1.9738153804552734e-06, + "loss": 0.0851, + "num_input_tokens_seen": 22843520, + "step": 33915 + }, + { + "epoch": 0.8286712432511666, + "grad_norm": 5.904448986053467, + "learning_rate": 1.973795989880679e-06, + "loss": 0.1975, + "num_input_tokens_seen": 22846720, + "step": 33920 + }, + { + "epoch": 0.8287933940830137, + "grad_norm": 36.777313232421875, + "learning_rate": 1.973776592224378e-06, + "loss": 0.0485, + "num_input_tokens_seen": 22850048, + "step": 33925 + }, + { + "epoch": 0.8289155449148609, + "grad_norm": 2.7984845638275146, + "learning_rate": 1.9737571874865115e-06, + "loss": 0.0265, + "num_input_tokens_seen": 22853504, + "step": 33930 + }, + { + "epoch": 0.829037695746708, + "grad_norm": 0.4170108437538147, + "learning_rate": 1.973737775667221e-06, + "loss": 0.0968, + "num_input_tokens_seen": 22856512, + "step": 33935 + }, + { + "epoch": 0.8291598465785552, + "grad_norm": 15.714227676391602, + "learning_rate": 1.9737183567666478e-06, + "loss": 0.1644, + "num_input_tokens_seen": 22860032, + "step": 33940 + }, + { + "epoch": 0.8292819974104023, + "grad_norm": 7.653704643249512, + "learning_rate": 1.9736989307849323e-06, + "loss": 0.183, + "num_input_tokens_seen": 22863424, + "step": 33945 + }, + { + "epoch": 0.8294041482422495, + "grad_norm": 16.792009353637695, + "learning_rate": 1.9736794977222166e-06, + "loss": 0.155, + "num_input_tokens_seen": 22866496, + "step": 33950 + }, + { + "epoch": 0.8295262990740967, + "grad_norm": 0.18280255794525146, + "learning_rate": 1.9736600575786413e-06, + "loss": 0.0301, + "num_input_tokens_seen": 22869632, + "step": 33955 + }, + { + "epoch": 0.8296484499059439, + "grad_norm": 1.9841983318328857, + "learning_rate": 1.973640610354349e-06, + "loss": 0.1194, + "num_input_tokens_seen": 22873216, + "step": 33960 + }, + { + "epoch": 0.8297706007377911, + "grad_norm": 1.050477385520935, + "learning_rate": 1.9736211560494796e-06, + "loss": 0.1718, + "num_input_tokens_seen": 22876480, + "step": 33965 + }, + { + "epoch": 0.8298927515696382, + "grad_norm": 15.280632972717285, + "learning_rate": 1.9736016946641755e-06, + "loss": 0.1535, + "num_input_tokens_seen": 22879552, + "step": 33970 + }, + { + "epoch": 0.8300149024014853, + "grad_norm": 1.5406540632247925, + "learning_rate": 1.973582226198578e-06, + "loss": 0.2542, + "num_input_tokens_seen": 22882816, + "step": 33975 + }, + { + "epoch": 0.8301370532333325, + "grad_norm": 4.964468955993652, + "learning_rate": 1.9735627506528284e-06, + "loss": 0.1118, + "num_input_tokens_seen": 22886208, + "step": 33980 + }, + { + "epoch": 0.8302592040651797, + "grad_norm": 10.970595359802246, + "learning_rate": 1.973543268027069e-06, + "loss": 0.2168, + "num_input_tokens_seen": 22889600, + "step": 33985 + }, + { + "epoch": 0.8303813548970268, + "grad_norm": 12.386752128601074, + "learning_rate": 1.9735237783214413e-06, + "loss": 0.0727, + "num_input_tokens_seen": 22893312, + "step": 33990 + }, + { + "epoch": 0.830503505728874, + "grad_norm": 20.162940979003906, + "learning_rate": 1.973504281536086e-06, + "loss": 0.1407, + "num_input_tokens_seen": 22896512, + "step": 33995 + }, + { + "epoch": 0.8306256565607212, + "grad_norm": 15.259532928466797, + "learning_rate": 1.9734847776711465e-06, + "loss": 0.1526, + "num_input_tokens_seen": 22900608, + "step": 34000 + }, + { + "epoch": 0.8307478073925684, + "grad_norm": 0.4917971193790436, + "learning_rate": 1.973465266726764e-06, + "loss": 0.0175, + "num_input_tokens_seen": 22904000, + "step": 34005 + }, + { + "epoch": 0.8308699582244156, + "grad_norm": 9.38034725189209, + "learning_rate": 1.9734457487030792e-06, + "loss": 0.0464, + "num_input_tokens_seen": 22907712, + "step": 34010 + }, + { + "epoch": 0.8309921090562626, + "grad_norm": 5.182671546936035, + "learning_rate": 1.973426223600236e-06, + "loss": 0.0883, + "num_input_tokens_seen": 22911232, + "step": 34015 + }, + { + "epoch": 0.8311142598881098, + "grad_norm": 13.00207233428955, + "learning_rate": 1.9734066914183752e-06, + "loss": 0.0874, + "num_input_tokens_seen": 22914624, + "step": 34020 + }, + { + "epoch": 0.831236410719957, + "grad_norm": 2.0715930461883545, + "learning_rate": 1.973387152157639e-06, + "loss": 0.0753, + "num_input_tokens_seen": 22918208, + "step": 34025 + }, + { + "epoch": 0.8313585615518042, + "grad_norm": 26.91462516784668, + "learning_rate": 1.97336760581817e-06, + "loss": 0.2246, + "num_input_tokens_seen": 22921664, + "step": 34030 + }, + { + "epoch": 0.8314807123836513, + "grad_norm": 20.108482360839844, + "learning_rate": 1.9733480524001096e-06, + "loss": 0.1399, + "num_input_tokens_seen": 22925184, + "step": 34035 + }, + { + "epoch": 0.8316028632154985, + "grad_norm": 14.097265243530273, + "learning_rate": 1.9733284919036007e-06, + "loss": 0.1742, + "num_input_tokens_seen": 22928640, + "step": 34040 + }, + { + "epoch": 0.8317250140473457, + "grad_norm": 29.254487991333008, + "learning_rate": 1.9733089243287847e-06, + "loss": 0.1754, + "num_input_tokens_seen": 22931968, + "step": 34045 + }, + { + "epoch": 0.8318471648791929, + "grad_norm": 12.961664199829102, + "learning_rate": 1.973289349675805e-06, + "loss": 0.0714, + "num_input_tokens_seen": 22935168, + "step": 34050 + }, + { + "epoch": 0.83196931571104, + "grad_norm": 1.3785406351089478, + "learning_rate": 1.973269767944803e-06, + "loss": 0.2318, + "num_input_tokens_seen": 22938944, + "step": 34055 + }, + { + "epoch": 0.8320914665428871, + "grad_norm": 22.194021224975586, + "learning_rate": 1.9732501791359217e-06, + "loss": 0.1794, + "num_input_tokens_seen": 22942016, + "step": 34060 + }, + { + "epoch": 0.8322136173747343, + "grad_norm": 5.744136810302734, + "learning_rate": 1.973230583249303e-06, + "loss": 0.0536, + "num_input_tokens_seen": 22945152, + "step": 34065 + }, + { + "epoch": 0.8323357682065815, + "grad_norm": 16.040979385375977, + "learning_rate": 1.97321098028509e-06, + "loss": 0.081, + "num_input_tokens_seen": 22948544, + "step": 34070 + }, + { + "epoch": 0.8324579190384287, + "grad_norm": 5.4098100662231445, + "learning_rate": 1.973191370243425e-06, + "loss": 0.0337, + "num_input_tokens_seen": 22951936, + "step": 34075 + }, + { + "epoch": 0.8325800698702758, + "grad_norm": 0.48113128542900085, + "learning_rate": 1.9731717531244507e-06, + "loss": 0.0878, + "num_input_tokens_seen": 22955520, + "step": 34080 + }, + { + "epoch": 0.832702220702123, + "grad_norm": 11.145101547241211, + "learning_rate": 1.973152128928309e-06, + "loss": 0.0361, + "num_input_tokens_seen": 22958656, + "step": 34085 + }, + { + "epoch": 0.8328243715339702, + "grad_norm": 10.848712921142578, + "learning_rate": 1.973132497655144e-06, + "loss": 0.0429, + "num_input_tokens_seen": 22962240, + "step": 34090 + }, + { + "epoch": 0.8329465223658173, + "grad_norm": 0.10347910970449448, + "learning_rate": 1.9731128593050974e-06, + "loss": 0.0689, + "num_input_tokens_seen": 22965568, + "step": 34095 + }, + { + "epoch": 0.8330686731976644, + "grad_norm": 26.27857780456543, + "learning_rate": 1.9730932138783122e-06, + "loss": 0.0238, + "num_input_tokens_seen": 22969152, + "step": 34100 + }, + { + "epoch": 0.8331908240295116, + "grad_norm": 15.342934608459473, + "learning_rate": 1.973073561374932e-06, + "loss": 0.2501, + "num_input_tokens_seen": 22972928, + "step": 34105 + }, + { + "epoch": 0.8333129748613588, + "grad_norm": 13.802165985107422, + "learning_rate": 1.9730539017950986e-06, + "loss": 0.2352, + "num_input_tokens_seen": 22976256, + "step": 34110 + }, + { + "epoch": 0.833435125693206, + "grad_norm": 28.344444274902344, + "learning_rate": 1.9730342351389555e-06, + "loss": 0.1129, + "num_input_tokens_seen": 22979648, + "step": 34115 + }, + { + "epoch": 0.8335572765250532, + "grad_norm": 4.112672805786133, + "learning_rate": 1.973014561406646e-06, + "loss": 0.1751, + "num_input_tokens_seen": 22982784, + "step": 34120 + }, + { + "epoch": 0.8336794273569003, + "grad_norm": 44.83839416503906, + "learning_rate": 1.972994880598313e-06, + "loss": 0.1128, + "num_input_tokens_seen": 22985728, + "step": 34125 + }, + { + "epoch": 0.8338015781887474, + "grad_norm": 1.7575098276138306, + "learning_rate": 1.9729751927140994e-06, + "loss": 0.0816, + "num_input_tokens_seen": 22989056, + "step": 34130 + }, + { + "epoch": 0.8339237290205946, + "grad_norm": 18.21506690979004, + "learning_rate": 1.9729554977541484e-06, + "loss": 0.1597, + "num_input_tokens_seen": 22992832, + "step": 34135 + }, + { + "epoch": 0.8340458798524418, + "grad_norm": 1.0433268547058105, + "learning_rate": 1.9729357957186034e-06, + "loss": 0.0803, + "num_input_tokens_seen": 22996096, + "step": 34140 + }, + { + "epoch": 0.8341680306842889, + "grad_norm": 2.801239013671875, + "learning_rate": 1.972916086607607e-06, + "loss": 0.1056, + "num_input_tokens_seen": 23000128, + "step": 34145 + }, + { + "epoch": 0.8342901815161361, + "grad_norm": 32.306697845458984, + "learning_rate": 1.9728963704213044e-06, + "loss": 0.059, + "num_input_tokens_seen": 23003136, + "step": 34150 + }, + { + "epoch": 0.8344123323479833, + "grad_norm": 0.4662260413169861, + "learning_rate": 1.9728766471598367e-06, + "loss": 0.0162, + "num_input_tokens_seen": 23006656, + "step": 34155 + }, + { + "epoch": 0.8345344831798305, + "grad_norm": 42.929996490478516, + "learning_rate": 1.972856916823349e-06, + "loss": 0.0996, + "num_input_tokens_seen": 23010304, + "step": 34160 + }, + { + "epoch": 0.8346566340116777, + "grad_norm": 42.26788330078125, + "learning_rate": 1.9728371794119836e-06, + "loss": 0.2583, + "num_input_tokens_seen": 23013440, + "step": 34165 + }, + { + "epoch": 0.8347787848435247, + "grad_norm": 26.08919334411621, + "learning_rate": 1.9728174349258844e-06, + "loss": 0.2494, + "num_input_tokens_seen": 23016576, + "step": 34170 + }, + { + "epoch": 0.8349009356753719, + "grad_norm": 26.989700317382812, + "learning_rate": 1.972797683365196e-06, + "loss": 0.1302, + "num_input_tokens_seen": 23019648, + "step": 34175 + }, + { + "epoch": 0.8350230865072191, + "grad_norm": 6.1088972091674805, + "learning_rate": 1.9727779247300606e-06, + "loss": 0.0458, + "num_input_tokens_seen": 23022912, + "step": 34180 + }, + { + "epoch": 0.8351452373390663, + "grad_norm": 40.35777282714844, + "learning_rate": 1.9727581590206226e-06, + "loss": 0.0924, + "num_input_tokens_seen": 23026624, + "step": 34185 + }, + { + "epoch": 0.8352673881709134, + "grad_norm": 0.571607768535614, + "learning_rate": 1.9727383862370255e-06, + "loss": 0.1005, + "num_input_tokens_seen": 23029824, + "step": 34190 + }, + { + "epoch": 0.8353895390027606, + "grad_norm": 22.8275146484375, + "learning_rate": 1.9727186063794133e-06, + "loss": 0.1247, + "num_input_tokens_seen": 23033408, + "step": 34195 + }, + { + "epoch": 0.8355116898346078, + "grad_norm": 0.1954413503408432, + "learning_rate": 1.9726988194479303e-06, + "loss": 0.097, + "num_input_tokens_seen": 23036928, + "step": 34200 + }, + { + "epoch": 0.835633840666455, + "grad_norm": 20.129854202270508, + "learning_rate": 1.9726790254427194e-06, + "loss": 0.1309, + "num_input_tokens_seen": 23040704, + "step": 34205 + }, + { + "epoch": 0.8357559914983022, + "grad_norm": 0.10747195780277252, + "learning_rate": 1.972659224363925e-06, + "loss": 0.0704, + "num_input_tokens_seen": 23044288, + "step": 34210 + }, + { + "epoch": 0.8358781423301492, + "grad_norm": 0.35761135816574097, + "learning_rate": 1.9726394162116907e-06, + "loss": 0.1503, + "num_input_tokens_seen": 23047552, + "step": 34215 + }, + { + "epoch": 0.8360002931619964, + "grad_norm": 16.95950698852539, + "learning_rate": 1.9726196009861614e-06, + "loss": 0.1158, + "num_input_tokens_seen": 23050944, + "step": 34220 + }, + { + "epoch": 0.8361224439938436, + "grad_norm": 9.001870155334473, + "learning_rate": 1.972599778687481e-06, + "loss": 0.1044, + "num_input_tokens_seen": 23054208, + "step": 34225 + }, + { + "epoch": 0.8362445948256908, + "grad_norm": 16.461078643798828, + "learning_rate": 1.972579949315793e-06, + "loss": 0.0879, + "num_input_tokens_seen": 23057472, + "step": 34230 + }, + { + "epoch": 0.8363667456575379, + "grad_norm": 16.833782196044922, + "learning_rate": 1.972560112871242e-06, + "loss": 0.1846, + "num_input_tokens_seen": 23061184, + "step": 34235 + }, + { + "epoch": 0.8364888964893851, + "grad_norm": 45.61178970336914, + "learning_rate": 1.972540269353972e-06, + "loss": 0.1902, + "num_input_tokens_seen": 23064256, + "step": 34240 + }, + { + "epoch": 0.8366110473212323, + "grad_norm": 12.094864845275879, + "learning_rate": 1.9725204187641282e-06, + "loss": 0.0897, + "num_input_tokens_seen": 23067584, + "step": 34245 + }, + { + "epoch": 0.8367331981530794, + "grad_norm": 12.797513961791992, + "learning_rate": 1.9725005611018544e-06, + "loss": 0.1588, + "num_input_tokens_seen": 23070912, + "step": 34250 + }, + { + "epoch": 0.8368553489849266, + "grad_norm": 20.513973236083984, + "learning_rate": 1.9724806963672947e-06, + "loss": 0.0433, + "num_input_tokens_seen": 23074560, + "step": 34255 + }, + { + "epoch": 0.8369774998167737, + "grad_norm": 14.594740867614746, + "learning_rate": 1.972460824560594e-06, + "loss": 0.0958, + "num_input_tokens_seen": 23078592, + "step": 34260 + }, + { + "epoch": 0.8370996506486209, + "grad_norm": 4.112400531768799, + "learning_rate": 1.972440945681896e-06, + "loss": 0.0886, + "num_input_tokens_seen": 23081920, + "step": 34265 + }, + { + "epoch": 0.8372218014804681, + "grad_norm": 2.1494662761688232, + "learning_rate": 1.9724210597313463e-06, + "loss": 0.1852, + "num_input_tokens_seen": 23084928, + "step": 34270 + }, + { + "epoch": 0.8373439523123153, + "grad_norm": 3.337203025817871, + "learning_rate": 1.972401166709089e-06, + "loss": 0.0785, + "num_input_tokens_seen": 23087936, + "step": 34275 + }, + { + "epoch": 0.8374661031441624, + "grad_norm": 10.968202590942383, + "learning_rate": 1.9723812666152695e-06, + "loss": 0.1037, + "num_input_tokens_seen": 23091072, + "step": 34280 + }, + { + "epoch": 0.8375882539760096, + "grad_norm": 9.930312156677246, + "learning_rate": 1.9723613594500312e-06, + "loss": 0.0931, + "num_input_tokens_seen": 23094528, + "step": 34285 + }, + { + "epoch": 0.8377104048078567, + "grad_norm": 22.673933029174805, + "learning_rate": 1.9723414452135197e-06, + "loss": 0.1358, + "num_input_tokens_seen": 23098560, + "step": 34290 + }, + { + "epoch": 0.8378325556397039, + "grad_norm": 18.80763053894043, + "learning_rate": 1.9723215239058797e-06, + "loss": 0.0928, + "num_input_tokens_seen": 23102080, + "step": 34295 + }, + { + "epoch": 0.8379547064715511, + "grad_norm": 1.3128330707550049, + "learning_rate": 1.972301595527256e-06, + "loss": 0.0881, + "num_input_tokens_seen": 23105728, + "step": 34300 + }, + { + "epoch": 0.8380768573033982, + "grad_norm": 4.349679946899414, + "learning_rate": 1.9722816600777937e-06, + "loss": 0.1869, + "num_input_tokens_seen": 23109184, + "step": 34305 + }, + { + "epoch": 0.8381990081352454, + "grad_norm": 1.3009531497955322, + "learning_rate": 1.972261717557638e-06, + "loss": 0.0709, + "num_input_tokens_seen": 23112576, + "step": 34310 + }, + { + "epoch": 0.8383211589670926, + "grad_norm": 39.514305114746094, + "learning_rate": 1.972241767966933e-06, + "loss": 0.0985, + "num_input_tokens_seen": 23116096, + "step": 34315 + }, + { + "epoch": 0.8384433097989398, + "grad_norm": 25.719438552856445, + "learning_rate": 1.9722218113058246e-06, + "loss": 0.1932, + "num_input_tokens_seen": 23119488, + "step": 34320 + }, + { + "epoch": 0.8385654606307869, + "grad_norm": 32.30820083618164, + "learning_rate": 1.9722018475744573e-06, + "loss": 0.1424, + "num_input_tokens_seen": 23122816, + "step": 34325 + }, + { + "epoch": 0.838687611462634, + "grad_norm": 9.513768196105957, + "learning_rate": 1.972181876772977e-06, + "loss": 0.0381, + "num_input_tokens_seen": 23125888, + "step": 34330 + }, + { + "epoch": 0.8388097622944812, + "grad_norm": 20.49899673461914, + "learning_rate": 1.9721618989015285e-06, + "loss": 0.0856, + "num_input_tokens_seen": 23129280, + "step": 34335 + }, + { + "epoch": 0.8389319131263284, + "grad_norm": 48.79369354248047, + "learning_rate": 1.972141913960257e-06, + "loss": 0.1908, + "num_input_tokens_seen": 23132608, + "step": 34340 + }, + { + "epoch": 0.8390540639581755, + "grad_norm": 17.959671020507812, + "learning_rate": 1.9721219219493087e-06, + "loss": 0.0528, + "num_input_tokens_seen": 23136000, + "step": 34345 + }, + { + "epoch": 0.8391762147900227, + "grad_norm": 5.5302815437316895, + "learning_rate": 1.972101922868828e-06, + "loss": 0.1189, + "num_input_tokens_seen": 23139520, + "step": 34350 + }, + { + "epoch": 0.8392983656218699, + "grad_norm": 13.573472023010254, + "learning_rate": 1.9720819167189605e-06, + "loss": 0.0453, + "num_input_tokens_seen": 23143168, + "step": 34355 + }, + { + "epoch": 0.8394205164537171, + "grad_norm": 36.57072448730469, + "learning_rate": 1.972061903499852e-06, + "loss": 0.0662, + "num_input_tokens_seen": 23146688, + "step": 34360 + }, + { + "epoch": 0.8395426672855643, + "grad_norm": 0.20541389286518097, + "learning_rate": 1.972041883211648e-06, + "loss": 0.1032, + "num_input_tokens_seen": 23149888, + "step": 34365 + }, + { + "epoch": 0.8396648181174113, + "grad_norm": 0.509107232093811, + "learning_rate": 1.9720218558544937e-06, + "loss": 0.1527, + "num_input_tokens_seen": 23153344, + "step": 34370 + }, + { + "epoch": 0.8397869689492585, + "grad_norm": 72.95315551757812, + "learning_rate": 1.972001821428535e-06, + "loss": 0.1562, + "num_input_tokens_seen": 23156608, + "step": 34375 + }, + { + "epoch": 0.8399091197811057, + "grad_norm": 31.27646255493164, + "learning_rate": 1.9719817799339178e-06, + "loss": 0.0158, + "num_input_tokens_seen": 23159936, + "step": 34380 + }, + { + "epoch": 0.8400312706129529, + "grad_norm": 1.4220023155212402, + "learning_rate": 1.9719617313707875e-06, + "loss": 0.1794, + "num_input_tokens_seen": 23163136, + "step": 34385 + }, + { + "epoch": 0.8401534214448, + "grad_norm": 40.343875885009766, + "learning_rate": 1.9719416757392906e-06, + "loss": 0.1996, + "num_input_tokens_seen": 23166336, + "step": 34390 + }, + { + "epoch": 0.8402755722766472, + "grad_norm": 27.2926025390625, + "learning_rate": 1.9719216130395718e-06, + "loss": 0.1569, + "num_input_tokens_seen": 23169536, + "step": 34395 + }, + { + "epoch": 0.8403977231084944, + "grad_norm": 5.333216190338135, + "learning_rate": 1.9719015432717776e-06, + "loss": 0.1087, + "num_input_tokens_seen": 23172544, + "step": 34400 + }, + { + "epoch": 0.8405198739403416, + "grad_norm": 15.621480941772461, + "learning_rate": 1.9718814664360543e-06, + "loss": 0.0696, + "num_input_tokens_seen": 23175936, + "step": 34405 + }, + { + "epoch": 0.8406420247721887, + "grad_norm": 0.1253618448972702, + "learning_rate": 1.9718613825325474e-06, + "loss": 0.1507, + "num_input_tokens_seen": 23179136, + "step": 34410 + }, + { + "epoch": 0.8407641756040358, + "grad_norm": 35.372894287109375, + "learning_rate": 1.971841291561403e-06, + "loss": 0.1766, + "num_input_tokens_seen": 23182464, + "step": 34415 + }, + { + "epoch": 0.840886326435883, + "grad_norm": 21.857149124145508, + "learning_rate": 1.9718211935227676e-06, + "loss": 0.0337, + "num_input_tokens_seen": 23186624, + "step": 34420 + }, + { + "epoch": 0.8410084772677302, + "grad_norm": 5.667567729949951, + "learning_rate": 1.971801088416787e-06, + "loss": 0.0837, + "num_input_tokens_seen": 23190016, + "step": 34425 + }, + { + "epoch": 0.8411306280995774, + "grad_norm": 32.595516204833984, + "learning_rate": 1.9717809762436075e-06, + "loss": 0.065, + "num_input_tokens_seen": 23193344, + "step": 34430 + }, + { + "epoch": 0.8412527789314245, + "grad_norm": 11.109789848327637, + "learning_rate": 1.9717608570033755e-06, + "loss": 0.0964, + "num_input_tokens_seen": 23196928, + "step": 34435 + }, + { + "epoch": 0.8413749297632717, + "grad_norm": 0.47146910429000854, + "learning_rate": 1.971740730696237e-06, + "loss": 0.0474, + "num_input_tokens_seen": 23200192, + "step": 34440 + }, + { + "epoch": 0.8414970805951189, + "grad_norm": 11.289263725280762, + "learning_rate": 1.9717205973223386e-06, + "loss": 0.1256, + "num_input_tokens_seen": 23203200, + "step": 34445 + }, + { + "epoch": 0.841619231426966, + "grad_norm": 1.9000232219696045, + "learning_rate": 1.9717004568818266e-06, + "loss": 0.0673, + "num_input_tokens_seen": 23206464, + "step": 34450 + }, + { + "epoch": 0.8417413822588132, + "grad_norm": 47.20060729980469, + "learning_rate": 1.9716803093748474e-06, + "loss": 0.074, + "num_input_tokens_seen": 23209600, + "step": 34455 + }, + { + "epoch": 0.8418635330906603, + "grad_norm": 5.45139217376709, + "learning_rate": 1.971660154801548e-06, + "loss": 0.1398, + "num_input_tokens_seen": 23212800, + "step": 34460 + }, + { + "epoch": 0.8419856839225075, + "grad_norm": 1.3095818758010864, + "learning_rate": 1.9716399931620743e-06, + "loss": 0.0769, + "num_input_tokens_seen": 23215872, + "step": 34465 + }, + { + "epoch": 0.8421078347543547, + "grad_norm": 0.6733562350273132, + "learning_rate": 1.9716198244565734e-06, + "loss": 0.1463, + "num_input_tokens_seen": 23219456, + "step": 34470 + }, + { + "epoch": 0.8422299855862019, + "grad_norm": 0.19409097731113434, + "learning_rate": 1.9715996486851915e-06, + "loss": 0.0977, + "num_input_tokens_seen": 23222912, + "step": 34475 + }, + { + "epoch": 0.842352136418049, + "grad_norm": 1.5221052169799805, + "learning_rate": 1.971579465848076e-06, + "loss": 0.0233, + "num_input_tokens_seen": 23225920, + "step": 34480 + }, + { + "epoch": 0.8424742872498961, + "grad_norm": 44.901756286621094, + "learning_rate": 1.971559275945373e-06, + "loss": 0.0681, + "num_input_tokens_seen": 23228992, + "step": 34485 + }, + { + "epoch": 0.8425964380817433, + "grad_norm": 79.00640106201172, + "learning_rate": 1.9715390789772297e-06, + "loss": 0.2153, + "num_input_tokens_seen": 23232064, + "step": 34490 + }, + { + "epoch": 0.8427185889135905, + "grad_norm": 20.789770126342773, + "learning_rate": 1.971518874943793e-06, + "loss": 0.1866, + "num_input_tokens_seen": 23235392, + "step": 34495 + }, + { + "epoch": 0.8428407397454377, + "grad_norm": 2.384317398071289, + "learning_rate": 1.97149866384521e-06, + "loss": 0.0846, + "num_input_tokens_seen": 23239040, + "step": 34500 + }, + { + "epoch": 0.8429628905772848, + "grad_norm": 0.427656352519989, + "learning_rate": 1.971478445681627e-06, + "loss": 0.1035, + "num_input_tokens_seen": 23242496, + "step": 34505 + }, + { + "epoch": 0.843085041409132, + "grad_norm": 1.450196385383606, + "learning_rate": 1.9714582204531916e-06, + "loss": 0.0301, + "num_input_tokens_seen": 23245696, + "step": 34510 + }, + { + "epoch": 0.8432071922409792, + "grad_norm": 7.800384998321533, + "learning_rate": 1.9714379881600507e-06, + "loss": 0.1464, + "num_input_tokens_seen": 23249152, + "step": 34515 + }, + { + "epoch": 0.8433293430728264, + "grad_norm": 19.53229522705078, + "learning_rate": 1.9714177488023514e-06, + "loss": 0.3484, + "num_input_tokens_seen": 23251968, + "step": 34520 + }, + { + "epoch": 0.8434514939046734, + "grad_norm": 0.45127061009407043, + "learning_rate": 1.971397502380241e-06, + "loss": 0.0453, + "num_input_tokens_seen": 23254976, + "step": 34525 + }, + { + "epoch": 0.8435736447365206, + "grad_norm": 0.17473919689655304, + "learning_rate": 1.971377248893867e-06, + "loss": 0.0724, + "num_input_tokens_seen": 23258048, + "step": 34530 + }, + { + "epoch": 0.8436957955683678, + "grad_norm": 9.839377403259277, + "learning_rate": 1.971356988343376e-06, + "loss": 0.1655, + "num_input_tokens_seen": 23262144, + "step": 34535 + }, + { + "epoch": 0.843817946400215, + "grad_norm": 33.3941764831543, + "learning_rate": 1.971336720728916e-06, + "loss": 0.0349, + "num_input_tokens_seen": 23266176, + "step": 34540 + }, + { + "epoch": 0.8439400972320622, + "grad_norm": 26.51119613647461, + "learning_rate": 1.9713164460506337e-06, + "loss": 0.122, + "num_input_tokens_seen": 23269440, + "step": 34545 + }, + { + "epoch": 0.8440622480639093, + "grad_norm": 11.167991638183594, + "learning_rate": 1.971296164308677e-06, + "loss": 0.1521, + "num_input_tokens_seen": 23272768, + "step": 34550 + }, + { + "epoch": 0.8441843988957565, + "grad_norm": 27.68284797668457, + "learning_rate": 1.971275875503194e-06, + "loss": 0.1201, + "num_input_tokens_seen": 23276288, + "step": 34555 + }, + { + "epoch": 0.8443065497276037, + "grad_norm": 5.643763542175293, + "learning_rate": 1.9712555796343307e-06, + "loss": 0.0251, + "num_input_tokens_seen": 23279424, + "step": 34560 + }, + { + "epoch": 0.8444287005594509, + "grad_norm": 5.355016708374023, + "learning_rate": 1.9712352767022364e-06, + "loss": 0.0856, + "num_input_tokens_seen": 23282816, + "step": 34565 + }, + { + "epoch": 0.8445508513912979, + "grad_norm": 19.52223014831543, + "learning_rate": 1.971214966707057e-06, + "loss": 0.0897, + "num_input_tokens_seen": 23286208, + "step": 34570 + }, + { + "epoch": 0.8446730022231451, + "grad_norm": 32.717464447021484, + "learning_rate": 1.971194649648942e-06, + "loss": 0.0634, + "num_input_tokens_seen": 23289536, + "step": 34575 + }, + { + "epoch": 0.8447951530549923, + "grad_norm": 31.83834457397461, + "learning_rate": 1.971174325528038e-06, + "loss": 0.0682, + "num_input_tokens_seen": 23293248, + "step": 34580 + }, + { + "epoch": 0.8449173038868395, + "grad_norm": 43.34263229370117, + "learning_rate": 1.971153994344493e-06, + "loss": 0.1036, + "num_input_tokens_seen": 23296128, + "step": 34585 + }, + { + "epoch": 0.8450394547186867, + "grad_norm": 24.69635581970215, + "learning_rate": 1.971133656098455e-06, + "loss": 0.1706, + "num_input_tokens_seen": 23299136, + "step": 34590 + }, + { + "epoch": 0.8451616055505338, + "grad_norm": 8.943610191345215, + "learning_rate": 1.9711133107900715e-06, + "loss": 0.2031, + "num_input_tokens_seen": 23302208, + "step": 34595 + }, + { + "epoch": 0.845283756382381, + "grad_norm": 22.528676986694336, + "learning_rate": 1.971092958419491e-06, + "loss": 0.0252, + "num_input_tokens_seen": 23305344, + "step": 34600 + }, + { + "epoch": 0.8454059072142281, + "grad_norm": 0.08624982088804245, + "learning_rate": 1.971072598986862e-06, + "loss": 0.151, + "num_input_tokens_seen": 23308480, + "step": 34605 + }, + { + "epoch": 0.8455280580460753, + "grad_norm": 16.910186767578125, + "learning_rate": 1.971052232492331e-06, + "loss": 0.1835, + "num_input_tokens_seen": 23311744, + "step": 34610 + }, + { + "epoch": 0.8456502088779224, + "grad_norm": 1.0254402160644531, + "learning_rate": 1.9710318589360476e-06, + "loss": 0.0506, + "num_input_tokens_seen": 23314880, + "step": 34615 + }, + { + "epoch": 0.8457723597097696, + "grad_norm": 58.980690002441406, + "learning_rate": 1.971011478318159e-06, + "loss": 0.1065, + "num_input_tokens_seen": 23318208, + "step": 34620 + }, + { + "epoch": 0.8458945105416168, + "grad_norm": 25.34699821472168, + "learning_rate": 1.970991090638814e-06, + "loss": 0.1044, + "num_input_tokens_seen": 23321600, + "step": 34625 + }, + { + "epoch": 0.846016661373464, + "grad_norm": 33.81626510620117, + "learning_rate": 1.9709706958981602e-06, + "loss": 0.1925, + "num_input_tokens_seen": 23325312, + "step": 34630 + }, + { + "epoch": 0.8461388122053111, + "grad_norm": 0.3212079405784607, + "learning_rate": 1.9709502940963468e-06, + "loss": 0.1667, + "num_input_tokens_seen": 23329216, + "step": 34635 + }, + { + "epoch": 0.8462609630371583, + "grad_norm": 23.59168815612793, + "learning_rate": 1.9709298852335214e-06, + "loss": 0.0684, + "num_input_tokens_seen": 23333120, + "step": 34640 + }, + { + "epoch": 0.8463831138690054, + "grad_norm": 1.1817677021026611, + "learning_rate": 1.9709094693098328e-06, + "loss": 0.1576, + "num_input_tokens_seen": 23336768, + "step": 34645 + }, + { + "epoch": 0.8465052647008526, + "grad_norm": 0.2409643828868866, + "learning_rate": 1.970889046325429e-06, + "loss": 0.1096, + "num_input_tokens_seen": 23340608, + "step": 34650 + }, + { + "epoch": 0.8466274155326998, + "grad_norm": 22.38766860961914, + "learning_rate": 1.97086861628046e-06, + "loss": 0.1202, + "num_input_tokens_seen": 23343808, + "step": 34655 + }, + { + "epoch": 0.8467495663645469, + "grad_norm": 0.2981724739074707, + "learning_rate": 1.9708481791750726e-06, + "loss": 0.0725, + "num_input_tokens_seen": 23347328, + "step": 34660 + }, + { + "epoch": 0.8468717171963941, + "grad_norm": 28.143102645874023, + "learning_rate": 1.970827735009416e-06, + "loss": 0.2058, + "num_input_tokens_seen": 23351232, + "step": 34665 + }, + { + "epoch": 0.8469938680282413, + "grad_norm": 0.41867315769195557, + "learning_rate": 1.970807283783639e-06, + "loss": 0.1002, + "num_input_tokens_seen": 23354624, + "step": 34670 + }, + { + "epoch": 0.8471160188600885, + "grad_norm": 0.2982100546360016, + "learning_rate": 1.9707868254978904e-06, + "loss": 0.1547, + "num_input_tokens_seen": 23358016, + "step": 34675 + }, + { + "epoch": 0.8472381696919355, + "grad_norm": 2.644249677658081, + "learning_rate": 1.970766360152319e-06, + "loss": 0.0944, + "num_input_tokens_seen": 23361600, + "step": 34680 + }, + { + "epoch": 0.8473603205237827, + "grad_norm": 0.44487500190734863, + "learning_rate": 1.9707458877470735e-06, + "loss": 0.1293, + "num_input_tokens_seen": 23364928, + "step": 34685 + }, + { + "epoch": 0.8474824713556299, + "grad_norm": 30.027263641357422, + "learning_rate": 1.970725408282303e-06, + "loss": 0.0862, + "num_input_tokens_seen": 23368256, + "step": 34690 + }, + { + "epoch": 0.8476046221874771, + "grad_norm": 22.388761520385742, + "learning_rate": 1.970704921758156e-06, + "loss": 0.1469, + "num_input_tokens_seen": 23372160, + "step": 34695 + }, + { + "epoch": 0.8477267730193243, + "grad_norm": 0.25965288281440735, + "learning_rate": 1.9706844281747817e-06, + "loss": 0.1028, + "num_input_tokens_seen": 23375424, + "step": 34700 + }, + { + "epoch": 0.8478489238511714, + "grad_norm": 0.06832029670476913, + "learning_rate": 1.970663927532329e-06, + "loss": 0.0444, + "num_input_tokens_seen": 23379072, + "step": 34705 + }, + { + "epoch": 0.8479710746830186, + "grad_norm": 0.2789788544178009, + "learning_rate": 1.9706434198309472e-06, + "loss": 0.0764, + "num_input_tokens_seen": 23382912, + "step": 34710 + }, + { + "epoch": 0.8480932255148658, + "grad_norm": 4.7952446937561035, + "learning_rate": 1.9706229050707855e-06, + "loss": 0.1532, + "num_input_tokens_seen": 23385920, + "step": 34715 + }, + { + "epoch": 0.848215376346713, + "grad_norm": 0.19278299808502197, + "learning_rate": 1.9706023832519932e-06, + "loss": 0.0107, + "num_input_tokens_seen": 23389248, + "step": 34720 + }, + { + "epoch": 0.84833752717856, + "grad_norm": 29.569690704345703, + "learning_rate": 1.970581854374719e-06, + "loss": 0.2432, + "num_input_tokens_seen": 23392320, + "step": 34725 + }, + { + "epoch": 0.8484596780104072, + "grad_norm": 55.5435791015625, + "learning_rate": 1.9705613184391124e-06, + "loss": 0.0913, + "num_input_tokens_seen": 23395392, + "step": 34730 + }, + { + "epoch": 0.8485818288422544, + "grad_norm": 27.48069953918457, + "learning_rate": 1.970540775445323e-06, + "loss": 0.0944, + "num_input_tokens_seen": 23398592, + "step": 34735 + }, + { + "epoch": 0.8487039796741016, + "grad_norm": 15.223067283630371, + "learning_rate": 1.9705202253935e-06, + "loss": 0.1853, + "num_input_tokens_seen": 23402112, + "step": 34740 + }, + { + "epoch": 0.8488261305059488, + "grad_norm": 55.700496673583984, + "learning_rate": 1.970499668283793e-06, + "loss": 0.1728, + "num_input_tokens_seen": 23405376, + "step": 34745 + }, + { + "epoch": 0.8489482813377959, + "grad_norm": 0.5155206322669983, + "learning_rate": 1.9704791041163514e-06, + "loss": 0.0532, + "num_input_tokens_seen": 23409024, + "step": 34750 + }, + { + "epoch": 0.8490704321696431, + "grad_norm": 29.440126419067383, + "learning_rate": 1.9704585328913247e-06, + "loss": 0.1421, + "num_input_tokens_seen": 23412224, + "step": 34755 + }, + { + "epoch": 0.8491925830014903, + "grad_norm": 7.78788948059082, + "learning_rate": 1.9704379546088626e-06, + "loss": 0.1233, + "num_input_tokens_seen": 23415552, + "step": 34760 + }, + { + "epoch": 0.8493147338333374, + "grad_norm": 1.2759686708450317, + "learning_rate": 1.9704173692691142e-06, + "loss": 0.077, + "num_input_tokens_seen": 23419328, + "step": 34765 + }, + { + "epoch": 0.8494368846651845, + "grad_norm": 13.394909858703613, + "learning_rate": 1.9703967768722305e-06, + "loss": 0.1388, + "num_input_tokens_seen": 23422656, + "step": 34770 + }, + { + "epoch": 0.8495590354970317, + "grad_norm": 39.994712829589844, + "learning_rate": 1.97037617741836e-06, + "loss": 0.073, + "num_input_tokens_seen": 23425920, + "step": 34775 + }, + { + "epoch": 0.8496811863288789, + "grad_norm": 14.517159461975098, + "learning_rate": 1.9703555709076528e-06, + "loss": 0.1236, + "num_input_tokens_seen": 23429696, + "step": 34780 + }, + { + "epoch": 0.8498033371607261, + "grad_norm": 1.9515538215637207, + "learning_rate": 1.9703349573402587e-06, + "loss": 0.045, + "num_input_tokens_seen": 23433600, + "step": 34785 + }, + { + "epoch": 0.8499254879925733, + "grad_norm": 15.69411563873291, + "learning_rate": 1.970314336716328e-06, + "loss": 0.0993, + "num_input_tokens_seen": 23437184, + "step": 34790 + }, + { + "epoch": 0.8500476388244204, + "grad_norm": 69.99845886230469, + "learning_rate": 1.9702937090360107e-06, + "loss": 0.0301, + "num_input_tokens_seen": 23440768, + "step": 34795 + }, + { + "epoch": 0.8501697896562675, + "grad_norm": 6.935617446899414, + "learning_rate": 1.9702730742994566e-06, + "loss": 0.113, + "num_input_tokens_seen": 23443776, + "step": 34800 + }, + { + "epoch": 0.8502919404881147, + "grad_norm": 24.21588134765625, + "learning_rate": 1.9702524325068156e-06, + "loss": 0.1212, + "num_input_tokens_seen": 23447360, + "step": 34805 + }, + { + "epoch": 0.8504140913199619, + "grad_norm": 14.074463844299316, + "learning_rate": 1.9702317836582378e-06, + "loss": 0.1604, + "num_input_tokens_seen": 23450880, + "step": 34810 + }, + { + "epoch": 0.850536242151809, + "grad_norm": 47.931419372558594, + "learning_rate": 1.9702111277538737e-06, + "loss": 0.113, + "num_input_tokens_seen": 23454720, + "step": 34815 + }, + { + "epoch": 0.8506583929836562, + "grad_norm": 10.696855545043945, + "learning_rate": 1.970190464793873e-06, + "loss": 0.1991, + "num_input_tokens_seen": 23457920, + "step": 34820 + }, + { + "epoch": 0.8507805438155034, + "grad_norm": 7.281139373779297, + "learning_rate": 1.9701697947783866e-06, + "loss": 0.1783, + "num_input_tokens_seen": 23461504, + "step": 34825 + }, + { + "epoch": 0.8509026946473506, + "grad_norm": 7.84771203994751, + "learning_rate": 1.9701491177075645e-06, + "loss": 0.1332, + "num_input_tokens_seen": 23464960, + "step": 34830 + }, + { + "epoch": 0.8510248454791978, + "grad_norm": 19.61056137084961, + "learning_rate": 1.9701284335815573e-06, + "loss": 0.0566, + "num_input_tokens_seen": 23468480, + "step": 34835 + }, + { + "epoch": 0.8511469963110448, + "grad_norm": 4.750822067260742, + "learning_rate": 1.9701077424005148e-06, + "loss": 0.0771, + "num_input_tokens_seen": 23472192, + "step": 34840 + }, + { + "epoch": 0.851269147142892, + "grad_norm": 2.461702823638916, + "learning_rate": 1.970087044164588e-06, + "loss": 0.0344, + "num_input_tokens_seen": 23475520, + "step": 34845 + }, + { + "epoch": 0.8513912979747392, + "grad_norm": 2.9755985736846924, + "learning_rate": 1.970066338873927e-06, + "loss": 0.1812, + "num_input_tokens_seen": 23478528, + "step": 34850 + }, + { + "epoch": 0.8515134488065864, + "grad_norm": 15.43201732635498, + "learning_rate": 1.9700456265286827e-06, + "loss": 0.0944, + "num_input_tokens_seen": 23482240, + "step": 34855 + }, + { + "epoch": 0.8516355996384335, + "grad_norm": 0.9599284529685974, + "learning_rate": 1.970024907129006e-06, + "loss": 0.0797, + "num_input_tokens_seen": 23485568, + "step": 34860 + }, + { + "epoch": 0.8517577504702807, + "grad_norm": 7.2150492668151855, + "learning_rate": 1.9700041806750477e-06, + "loss": 0.0867, + "num_input_tokens_seen": 23488832, + "step": 34865 + }, + { + "epoch": 0.8518799013021279, + "grad_norm": 22.719148635864258, + "learning_rate": 1.9699834471669574e-06, + "loss": 0.1091, + "num_input_tokens_seen": 23492672, + "step": 34870 + }, + { + "epoch": 0.8520020521339751, + "grad_norm": 46.67292785644531, + "learning_rate": 1.9699627066048868e-06, + "loss": 0.0721, + "num_input_tokens_seen": 23496000, + "step": 34875 + }, + { + "epoch": 0.8521242029658221, + "grad_norm": 0.11148344725370407, + "learning_rate": 1.9699419589889863e-06, + "loss": 0.0762, + "num_input_tokens_seen": 23499968, + "step": 34880 + }, + { + "epoch": 0.8522463537976693, + "grad_norm": 20.346290588378906, + "learning_rate": 1.9699212043194075e-06, + "loss": 0.1507, + "num_input_tokens_seen": 23503168, + "step": 34885 + }, + { + "epoch": 0.8523685046295165, + "grad_norm": 52.53269577026367, + "learning_rate": 1.9699004425963003e-06, + "loss": 0.1667, + "num_input_tokens_seen": 23506368, + "step": 34890 + }, + { + "epoch": 0.8524906554613637, + "grad_norm": 1.005509376525879, + "learning_rate": 1.9698796738198163e-06, + "loss": 0.1714, + "num_input_tokens_seen": 23509440, + "step": 34895 + }, + { + "epoch": 0.8526128062932109, + "grad_norm": 6.515542030334473, + "learning_rate": 1.9698588979901064e-06, + "loss": 0.0708, + "num_input_tokens_seen": 23512640, + "step": 34900 + }, + { + "epoch": 0.852734957125058, + "grad_norm": 10.081113815307617, + "learning_rate": 1.969838115107322e-06, + "loss": 0.0841, + "num_input_tokens_seen": 23515968, + "step": 34905 + }, + { + "epoch": 0.8528571079569052, + "grad_norm": 42.108604431152344, + "learning_rate": 1.9698173251716138e-06, + "loss": 0.0816, + "num_input_tokens_seen": 23519360, + "step": 34910 + }, + { + "epoch": 0.8529792587887524, + "grad_norm": 38.693603515625, + "learning_rate": 1.9697965281831335e-06, + "loss": 0.1015, + "num_input_tokens_seen": 23522496, + "step": 34915 + }, + { + "epoch": 0.8531014096205995, + "grad_norm": 0.6154043078422546, + "learning_rate": 1.9697757241420315e-06, + "loss": 0.0559, + "num_input_tokens_seen": 23526272, + "step": 34920 + }, + { + "epoch": 0.8532235604524466, + "grad_norm": 0.0829826146364212, + "learning_rate": 1.96975491304846e-06, + "loss": 0.0682, + "num_input_tokens_seen": 23529536, + "step": 34925 + }, + { + "epoch": 0.8533457112842938, + "grad_norm": 13.311882972717285, + "learning_rate": 1.9697340949025697e-06, + "loss": 0.0585, + "num_input_tokens_seen": 23532672, + "step": 34930 + }, + { + "epoch": 0.853467862116141, + "grad_norm": 27.622638702392578, + "learning_rate": 1.9697132697045126e-06, + "loss": 0.0794, + "num_input_tokens_seen": 23536384, + "step": 34935 + }, + { + "epoch": 0.8535900129479882, + "grad_norm": 12.490110397338867, + "learning_rate": 1.969692437454439e-06, + "loss": 0.1116, + "num_input_tokens_seen": 23540096, + "step": 34940 + }, + { + "epoch": 0.8537121637798354, + "grad_norm": 24.080808639526367, + "learning_rate": 1.969671598152502e-06, + "loss": 0.1717, + "num_input_tokens_seen": 23544256, + "step": 34945 + }, + { + "epoch": 0.8538343146116825, + "grad_norm": 25.397422790527344, + "learning_rate": 1.9696507517988517e-06, + "loss": 0.1252, + "num_input_tokens_seen": 23547392, + "step": 34950 + }, + { + "epoch": 0.8539564654435297, + "grad_norm": 4.807941436767578, + "learning_rate": 1.9696298983936408e-06, + "loss": 0.1007, + "num_input_tokens_seen": 23550592, + "step": 34955 + }, + { + "epoch": 0.8540786162753768, + "grad_norm": 6.345379829406738, + "learning_rate": 1.9696090379370203e-06, + "loss": 0.0744, + "num_input_tokens_seen": 23554688, + "step": 34960 + }, + { + "epoch": 0.854200767107224, + "grad_norm": 0.38732364773750305, + "learning_rate": 1.9695881704291423e-06, + "loss": 0.145, + "num_input_tokens_seen": 23558464, + "step": 34965 + }, + { + "epoch": 0.8543229179390711, + "grad_norm": 1.831109881401062, + "learning_rate": 1.969567295870158e-06, + "loss": 0.0465, + "num_input_tokens_seen": 23561216, + "step": 34970 + }, + { + "epoch": 0.8544450687709183, + "grad_norm": 0.9281952977180481, + "learning_rate": 1.9695464142602195e-06, + "loss": 0.0931, + "num_input_tokens_seen": 23565056, + "step": 34975 + }, + { + "epoch": 0.8545672196027655, + "grad_norm": 0.18389178812503815, + "learning_rate": 1.9695255255994788e-06, + "loss": 0.0676, + "num_input_tokens_seen": 23568320, + "step": 34980 + }, + { + "epoch": 0.8546893704346127, + "grad_norm": 46.50515365600586, + "learning_rate": 1.969504629888088e-06, + "loss": 0.0429, + "num_input_tokens_seen": 23571840, + "step": 34985 + }, + { + "epoch": 0.8548115212664599, + "grad_norm": 29.618240356445312, + "learning_rate": 1.9694837271261985e-06, + "loss": 0.1533, + "num_input_tokens_seen": 23575232, + "step": 34990 + }, + { + "epoch": 0.854933672098307, + "grad_norm": 29.626087188720703, + "learning_rate": 1.9694628173139626e-06, + "loss": 0.1684, + "num_input_tokens_seen": 23579072, + "step": 34995 + }, + { + "epoch": 0.8550558229301541, + "grad_norm": 21.039466857910156, + "learning_rate": 1.969441900451532e-06, + "loss": 0.1663, + "num_input_tokens_seen": 23582464, + "step": 35000 + }, + { + "epoch": 0.8551779737620013, + "grad_norm": 9.503751754760742, + "learning_rate": 1.96942097653906e-06, + "loss": 0.2365, + "num_input_tokens_seen": 23585600, + "step": 35005 + }, + { + "epoch": 0.8553001245938485, + "grad_norm": 18.334392547607422, + "learning_rate": 1.969400045576697e-06, + "loss": 0.1384, + "num_input_tokens_seen": 23589376, + "step": 35010 + }, + { + "epoch": 0.8554222754256956, + "grad_norm": 18.294281005859375, + "learning_rate": 1.969379107564597e-06, + "loss": 0.0798, + "num_input_tokens_seen": 23592384, + "step": 35015 + }, + { + "epoch": 0.8555444262575428, + "grad_norm": 14.820401191711426, + "learning_rate": 1.9693581625029108e-06, + "loss": 0.0671, + "num_input_tokens_seen": 23595904, + "step": 35020 + }, + { + "epoch": 0.85566657708939, + "grad_norm": 13.708227157592773, + "learning_rate": 1.9693372103917913e-06, + "loss": 0.0827, + "num_input_tokens_seen": 23598976, + "step": 35025 + }, + { + "epoch": 0.8557887279212372, + "grad_norm": 15.605507850646973, + "learning_rate": 1.969316251231391e-06, + "loss": 0.1113, + "num_input_tokens_seen": 23602368, + "step": 35030 + }, + { + "epoch": 0.8559108787530844, + "grad_norm": 31.886110305786133, + "learning_rate": 1.9692952850218624e-06, + "loss": 0.1937, + "num_input_tokens_seen": 23606272, + "step": 35035 + }, + { + "epoch": 0.8560330295849314, + "grad_norm": 32.31554412841797, + "learning_rate": 1.9692743117633576e-06, + "loss": 0.1756, + "num_input_tokens_seen": 23609856, + "step": 35040 + }, + { + "epoch": 0.8561551804167786, + "grad_norm": 2.543952703475952, + "learning_rate": 1.969253331456029e-06, + "loss": 0.0429, + "num_input_tokens_seen": 23613248, + "step": 35045 + }, + { + "epoch": 0.8562773312486258, + "grad_norm": 15.292976379394531, + "learning_rate": 1.96923234410003e-06, + "loss": 0.0925, + "num_input_tokens_seen": 23616256, + "step": 35050 + }, + { + "epoch": 0.856399482080473, + "grad_norm": 33.541866302490234, + "learning_rate": 1.9692113496955124e-06, + "loss": 0.2194, + "num_input_tokens_seen": 23619392, + "step": 35055 + }, + { + "epoch": 0.8565216329123201, + "grad_norm": 13.459238052368164, + "learning_rate": 1.9691903482426295e-06, + "loss": 0.0587, + "num_input_tokens_seen": 23623552, + "step": 35060 + }, + { + "epoch": 0.8566437837441673, + "grad_norm": 1.0485910177230835, + "learning_rate": 1.9691693397415333e-06, + "loss": 0.0937, + "num_input_tokens_seen": 23627200, + "step": 35065 + }, + { + "epoch": 0.8567659345760145, + "grad_norm": 18.415218353271484, + "learning_rate": 1.9691483241923776e-06, + "loss": 0.154, + "num_input_tokens_seen": 23630272, + "step": 35070 + }, + { + "epoch": 0.8568880854078617, + "grad_norm": 23.374282836914062, + "learning_rate": 1.969127301595314e-06, + "loss": 0.0931, + "num_input_tokens_seen": 23633408, + "step": 35075 + }, + { + "epoch": 0.8570102362397088, + "grad_norm": 13.124170303344727, + "learning_rate": 1.9691062719504962e-06, + "loss": 0.2184, + "num_input_tokens_seen": 23636928, + "step": 35080 + }, + { + "epoch": 0.8571323870715559, + "grad_norm": 0.5865902900695801, + "learning_rate": 1.969085235258077e-06, + "loss": 0.0106, + "num_input_tokens_seen": 23640384, + "step": 35085 + }, + { + "epoch": 0.8572545379034031, + "grad_norm": 1.6755985021591187, + "learning_rate": 1.969064191518209e-06, + "loss": 0.0371, + "num_input_tokens_seen": 23644416, + "step": 35090 + }, + { + "epoch": 0.8573766887352503, + "grad_norm": 15.779130935668945, + "learning_rate": 1.969043140731046e-06, + "loss": 0.0719, + "num_input_tokens_seen": 23647488, + "step": 35095 + }, + { + "epoch": 0.8574988395670975, + "grad_norm": 18.528223037719727, + "learning_rate": 1.96902208289674e-06, + "loss": 0.105, + "num_input_tokens_seen": 23651264, + "step": 35100 + }, + { + "epoch": 0.8576209903989446, + "grad_norm": 9.245097160339355, + "learning_rate": 1.9690010180154454e-06, + "loss": 0.0889, + "num_input_tokens_seen": 23654784, + "step": 35105 + }, + { + "epoch": 0.8577431412307918, + "grad_norm": 7.738526344299316, + "learning_rate": 1.9689799460873147e-06, + "loss": 0.16, + "num_input_tokens_seen": 23658368, + "step": 35110 + }, + { + "epoch": 0.857865292062639, + "grad_norm": 1.2937458753585815, + "learning_rate": 1.968958867112501e-06, + "loss": 0.02, + "num_input_tokens_seen": 23662144, + "step": 35115 + }, + { + "epoch": 0.8579874428944861, + "grad_norm": 1.212963581085205, + "learning_rate": 1.9689377810911577e-06, + "loss": 0.1565, + "num_input_tokens_seen": 23665472, + "step": 35120 + }, + { + "epoch": 0.8581095937263333, + "grad_norm": 24.34621238708496, + "learning_rate": 1.9689166880234385e-06, + "loss": 0.046, + "num_input_tokens_seen": 23668864, + "step": 35125 + }, + { + "epoch": 0.8582317445581804, + "grad_norm": 12.43953800201416, + "learning_rate": 1.9688955879094966e-06, + "loss": 0.1591, + "num_input_tokens_seen": 23672128, + "step": 35130 + }, + { + "epoch": 0.8583538953900276, + "grad_norm": 21.296579360961914, + "learning_rate": 1.9688744807494853e-06, + "loss": 0.071, + "num_input_tokens_seen": 23675648, + "step": 35135 + }, + { + "epoch": 0.8584760462218748, + "grad_norm": 1.4470196962356567, + "learning_rate": 1.968853366543558e-06, + "loss": 0.0692, + "num_input_tokens_seen": 23679232, + "step": 35140 + }, + { + "epoch": 0.858598197053722, + "grad_norm": 23.92950439453125, + "learning_rate": 1.9688322452918686e-06, + "loss": 0.148, + "num_input_tokens_seen": 23682176, + "step": 35145 + }, + { + "epoch": 0.8587203478855691, + "grad_norm": 0.055989764630794525, + "learning_rate": 1.9688111169945706e-06, + "loss": 0.0971, + "num_input_tokens_seen": 23685440, + "step": 35150 + }, + { + "epoch": 0.8588424987174162, + "grad_norm": 34.370025634765625, + "learning_rate": 1.9687899816518173e-06, + "loss": 0.1121, + "num_input_tokens_seen": 23688512, + "step": 35155 + }, + { + "epoch": 0.8589646495492634, + "grad_norm": 23.321041107177734, + "learning_rate": 1.968768839263763e-06, + "loss": 0.0545, + "num_input_tokens_seen": 23691840, + "step": 35160 + }, + { + "epoch": 0.8590868003811106, + "grad_norm": 11.707565307617188, + "learning_rate": 1.968747689830561e-06, + "loss": 0.182, + "num_input_tokens_seen": 23695488, + "step": 35165 + }, + { + "epoch": 0.8592089512129577, + "grad_norm": 0.9634722471237183, + "learning_rate": 1.968726533352365e-06, + "loss": 0.1623, + "num_input_tokens_seen": 23699136, + "step": 35170 + }, + { + "epoch": 0.8593311020448049, + "grad_norm": 33.24665832519531, + "learning_rate": 1.9687053698293293e-06, + "loss": 0.315, + "num_input_tokens_seen": 23702400, + "step": 35175 + }, + { + "epoch": 0.8594532528766521, + "grad_norm": 20.123992919921875, + "learning_rate": 1.9686841992616077e-06, + "loss": 0.0951, + "num_input_tokens_seen": 23705728, + "step": 35180 + }, + { + "epoch": 0.8595754037084993, + "grad_norm": 48.02340316772461, + "learning_rate": 1.968663021649354e-06, + "loss": 0.2311, + "num_input_tokens_seen": 23709120, + "step": 35185 + }, + { + "epoch": 0.8596975545403465, + "grad_norm": 2.7457668781280518, + "learning_rate": 1.9686418369927224e-06, + "loss": 0.1489, + "num_input_tokens_seen": 23712512, + "step": 35190 + }, + { + "epoch": 0.8598197053721935, + "grad_norm": 19.584701538085938, + "learning_rate": 1.9686206452918667e-06, + "loss": 0.0941, + "num_input_tokens_seen": 23715904, + "step": 35195 + }, + { + "epoch": 0.8599418562040407, + "grad_norm": 14.326571464538574, + "learning_rate": 1.968599446546941e-06, + "loss": 0.0694, + "num_input_tokens_seen": 23720000, + "step": 35200 + }, + { + "epoch": 0.8600640070358879, + "grad_norm": 27.21695327758789, + "learning_rate": 1.9685782407580995e-06, + "loss": 0.1236, + "num_input_tokens_seen": 23723200, + "step": 35205 + }, + { + "epoch": 0.8601861578677351, + "grad_norm": 26.761911392211914, + "learning_rate": 1.9685570279254967e-06, + "loss": 0.0757, + "num_input_tokens_seen": 23726656, + "step": 35210 + }, + { + "epoch": 0.8603083086995822, + "grad_norm": 20.203405380249023, + "learning_rate": 1.9685358080492865e-06, + "loss": 0.0956, + "num_input_tokens_seen": 23730048, + "step": 35215 + }, + { + "epoch": 0.8604304595314294, + "grad_norm": 36.64809799194336, + "learning_rate": 1.9685145811296237e-06, + "loss": 0.074, + "num_input_tokens_seen": 23733312, + "step": 35220 + }, + { + "epoch": 0.8605526103632766, + "grad_norm": 6.595607757568359, + "learning_rate": 1.9684933471666626e-06, + "loss": 0.1863, + "num_input_tokens_seen": 23736640, + "step": 35225 + }, + { + "epoch": 0.8606747611951238, + "grad_norm": 60.852046966552734, + "learning_rate": 1.968472106160557e-06, + "loss": 0.0825, + "num_input_tokens_seen": 23740416, + "step": 35230 + }, + { + "epoch": 0.860796912026971, + "grad_norm": 36.590965270996094, + "learning_rate": 1.9684508581114616e-06, + "loss": 0.2678, + "num_input_tokens_seen": 23743680, + "step": 35235 + }, + { + "epoch": 0.860919062858818, + "grad_norm": 1.6028282642364502, + "learning_rate": 1.9684296030195317e-06, + "loss": 0.1087, + "num_input_tokens_seen": 23747072, + "step": 35240 + }, + { + "epoch": 0.8610412136906652, + "grad_norm": 1.05069899559021, + "learning_rate": 1.9684083408849206e-06, + "loss": 0.0148, + "num_input_tokens_seen": 23750592, + "step": 35245 + }, + { + "epoch": 0.8611633645225124, + "grad_norm": 51.769962310791016, + "learning_rate": 1.968387071707784e-06, + "loss": 0.2289, + "num_input_tokens_seen": 23753408, + "step": 35250 + }, + { + "epoch": 0.8612855153543596, + "grad_norm": 19.687625885009766, + "learning_rate": 1.9683657954882757e-06, + "loss": 0.1386, + "num_input_tokens_seen": 23756480, + "step": 35255 + }, + { + "epoch": 0.8614076661862067, + "grad_norm": 1.0411657094955444, + "learning_rate": 1.968344512226551e-06, + "loss": 0.1047, + "num_input_tokens_seen": 23759808, + "step": 35260 + }, + { + "epoch": 0.8615298170180539, + "grad_norm": 0.37326228618621826, + "learning_rate": 1.9683232219227646e-06, + "loss": 0.1218, + "num_input_tokens_seen": 23763200, + "step": 35265 + }, + { + "epoch": 0.8616519678499011, + "grad_norm": 4.8047027587890625, + "learning_rate": 1.9683019245770717e-06, + "loss": 0.1086, + "num_input_tokens_seen": 23766464, + "step": 35270 + }, + { + "epoch": 0.8617741186817482, + "grad_norm": 21.94441032409668, + "learning_rate": 1.9682806201896264e-06, + "loss": 0.234, + "num_input_tokens_seen": 23770240, + "step": 35275 + }, + { + "epoch": 0.8618962695135954, + "grad_norm": 36.986934661865234, + "learning_rate": 1.968259308760584e-06, + "loss": 0.0527, + "num_input_tokens_seen": 23773632, + "step": 35280 + }, + { + "epoch": 0.8620184203454425, + "grad_norm": 0.33001336455345154, + "learning_rate": 1.9682379902900995e-06, + "loss": 0.0753, + "num_input_tokens_seen": 23777408, + "step": 35285 + }, + { + "epoch": 0.8621405711772897, + "grad_norm": 41.58378219604492, + "learning_rate": 1.968216664778328e-06, + "loss": 0.1124, + "num_input_tokens_seen": 23780864, + "step": 35290 + }, + { + "epoch": 0.8622627220091369, + "grad_norm": 7.427133083343506, + "learning_rate": 1.9681953322254243e-06, + "loss": 0.184, + "num_input_tokens_seen": 23784320, + "step": 35295 + }, + { + "epoch": 0.8623848728409841, + "grad_norm": 38.231021881103516, + "learning_rate": 1.968173992631544e-06, + "loss": 0.0935, + "num_input_tokens_seen": 23788608, + "step": 35300 + }, + { + "epoch": 0.8625070236728312, + "grad_norm": 23.69070816040039, + "learning_rate": 1.968152645996842e-06, + "loss": 0.2388, + "num_input_tokens_seen": 23792000, + "step": 35305 + }, + { + "epoch": 0.8626291745046784, + "grad_norm": 13.36784839630127, + "learning_rate": 1.9681312923214734e-06, + "loss": 0.1465, + "num_input_tokens_seen": 23795584, + "step": 35310 + }, + { + "epoch": 0.8627513253365255, + "grad_norm": 1.5608197450637817, + "learning_rate": 1.9681099316055934e-06, + "loss": 0.1328, + "num_input_tokens_seen": 23798720, + "step": 35315 + }, + { + "epoch": 0.8628734761683727, + "grad_norm": 0.44920867681503296, + "learning_rate": 1.968088563849358e-06, + "loss": 0.2706, + "num_input_tokens_seen": 23801728, + "step": 35320 + }, + { + "epoch": 0.8629956270002199, + "grad_norm": 0.6280757188796997, + "learning_rate": 1.968067189052922e-06, + "loss": 0.078, + "num_input_tokens_seen": 23805504, + "step": 35325 + }, + { + "epoch": 0.863117777832067, + "grad_norm": 40.889556884765625, + "learning_rate": 1.968045807216441e-06, + "loss": 0.17, + "num_input_tokens_seen": 23809344, + "step": 35330 + }, + { + "epoch": 0.8632399286639142, + "grad_norm": 9.817305564880371, + "learning_rate": 1.968024418340071e-06, + "loss": 0.2026, + "num_input_tokens_seen": 23812416, + "step": 35335 + }, + { + "epoch": 0.8633620794957614, + "grad_norm": 10.511480331420898, + "learning_rate": 1.968003022423966e-06, + "loss": 0.1147, + "num_input_tokens_seen": 23815744, + "step": 35340 + }, + { + "epoch": 0.8634842303276086, + "grad_norm": 8.885690689086914, + "learning_rate": 1.9679816194682834e-06, + "loss": 0.1263, + "num_input_tokens_seen": 23819200, + "step": 35345 + }, + { + "epoch": 0.8636063811594556, + "grad_norm": 0.8410807847976685, + "learning_rate": 1.967960209473178e-06, + "loss": 0.0971, + "num_input_tokens_seen": 23822528, + "step": 35350 + }, + { + "epoch": 0.8637285319913028, + "grad_norm": 0.631669819355011, + "learning_rate": 1.9679387924388058e-06, + "loss": 0.1648, + "num_input_tokens_seen": 23826240, + "step": 35355 + }, + { + "epoch": 0.86385068282315, + "grad_norm": 22.954504013061523, + "learning_rate": 1.967917368365322e-06, + "loss": 0.0552, + "num_input_tokens_seen": 23829184, + "step": 35360 + }, + { + "epoch": 0.8639728336549972, + "grad_norm": 10.804740905761719, + "learning_rate": 1.9678959372528828e-06, + "loss": 0.1812, + "num_input_tokens_seen": 23833088, + "step": 35365 + }, + { + "epoch": 0.8640949844868444, + "grad_norm": 26.449541091918945, + "learning_rate": 1.967874499101644e-06, + "loss": 0.1272, + "num_input_tokens_seen": 23836160, + "step": 35370 + }, + { + "epoch": 0.8642171353186915, + "grad_norm": 0.31604552268981934, + "learning_rate": 1.967853053911762e-06, + "loss": 0.0314, + "num_input_tokens_seen": 23839808, + "step": 35375 + }, + { + "epoch": 0.8643392861505387, + "grad_norm": 11.573125839233398, + "learning_rate": 1.967831601683392e-06, + "loss": 0.1877, + "num_input_tokens_seen": 23842816, + "step": 35380 + }, + { + "epoch": 0.8644614369823859, + "grad_norm": 8.36064338684082, + "learning_rate": 1.96781014241669e-06, + "loss": 0.0687, + "num_input_tokens_seen": 23846144, + "step": 35385 + }, + { + "epoch": 0.8645835878142331, + "grad_norm": 16.87027359008789, + "learning_rate": 1.9677886761118126e-06, + "loss": 0.1174, + "num_input_tokens_seen": 23849536, + "step": 35390 + }, + { + "epoch": 0.8647057386460801, + "grad_norm": 3.9122729301452637, + "learning_rate": 1.9677672027689156e-06, + "loss": 0.177, + "num_input_tokens_seen": 23853120, + "step": 35395 + }, + { + "epoch": 0.8648278894779273, + "grad_norm": 7.3564066886901855, + "learning_rate": 1.9677457223881553e-06, + "loss": 0.2031, + "num_input_tokens_seen": 23856768, + "step": 35400 + }, + { + "epoch": 0.8649500403097745, + "grad_norm": 6.73646354675293, + "learning_rate": 1.967724234969688e-06, + "loss": 0.1071, + "num_input_tokens_seen": 23860096, + "step": 35405 + }, + { + "epoch": 0.8650721911416217, + "grad_norm": 9.172672271728516, + "learning_rate": 1.967702740513669e-06, + "loss": 0.1915, + "num_input_tokens_seen": 23863296, + "step": 35410 + }, + { + "epoch": 0.8651943419734689, + "grad_norm": 2.225639820098877, + "learning_rate": 1.967681239020256e-06, + "loss": 0.0757, + "num_input_tokens_seen": 23866368, + "step": 35415 + }, + { + "epoch": 0.865316492805316, + "grad_norm": 52.951171875, + "learning_rate": 1.9676597304896046e-06, + "loss": 0.1594, + "num_input_tokens_seen": 23869568, + "step": 35420 + }, + { + "epoch": 0.8654386436371632, + "grad_norm": 45.615421295166016, + "learning_rate": 1.967638214921871e-06, + "loss": 0.1046, + "num_input_tokens_seen": 23873024, + "step": 35425 + }, + { + "epoch": 0.8655607944690104, + "grad_norm": 0.6920754909515381, + "learning_rate": 1.967616692317213e-06, + "loss": 0.0774, + "num_input_tokens_seen": 23876544, + "step": 35430 + }, + { + "epoch": 0.8656829453008575, + "grad_norm": 0.914369523525238, + "learning_rate": 1.9675951626757854e-06, + "loss": 0.1239, + "num_input_tokens_seen": 23880192, + "step": 35435 + }, + { + "epoch": 0.8658050961327046, + "grad_norm": 8.883461952209473, + "learning_rate": 1.9675736259977455e-06, + "loss": 0.0905, + "num_input_tokens_seen": 23883904, + "step": 35440 + }, + { + "epoch": 0.8659272469645518, + "grad_norm": 0.3117360472679138, + "learning_rate": 1.9675520822832504e-06, + "loss": 0.0623, + "num_input_tokens_seen": 23887296, + "step": 35445 + }, + { + "epoch": 0.866049397796399, + "grad_norm": 28.35218048095703, + "learning_rate": 1.967530531532456e-06, + "loss": 0.0776, + "num_input_tokens_seen": 23890560, + "step": 35450 + }, + { + "epoch": 0.8661715486282462, + "grad_norm": 17.423656463623047, + "learning_rate": 1.967508973745519e-06, + "loss": 0.0951, + "num_input_tokens_seen": 23893632, + "step": 35455 + }, + { + "epoch": 0.8662936994600933, + "grad_norm": 5.609574794769287, + "learning_rate": 1.967487408922597e-06, + "loss": 0.0601, + "num_input_tokens_seen": 23896640, + "step": 35460 + }, + { + "epoch": 0.8664158502919405, + "grad_norm": 0.16245517134666443, + "learning_rate": 1.9674658370638462e-06, + "loss": 0.1787, + "num_input_tokens_seen": 23900416, + "step": 35465 + }, + { + "epoch": 0.8665380011237876, + "grad_norm": 12.920045852661133, + "learning_rate": 1.9674442581694238e-06, + "loss": 0.2974, + "num_input_tokens_seen": 23903808, + "step": 35470 + }, + { + "epoch": 0.8666601519556348, + "grad_norm": 0.45196205377578735, + "learning_rate": 1.967422672239487e-06, + "loss": 0.0575, + "num_input_tokens_seen": 23907392, + "step": 35475 + }, + { + "epoch": 0.866782302787482, + "grad_norm": 0.5795376896858215, + "learning_rate": 1.967401079274191e-06, + "loss": 0.1018, + "num_input_tokens_seen": 23911168, + "step": 35480 + }, + { + "epoch": 0.8669044536193291, + "grad_norm": 8.955952644348145, + "learning_rate": 1.967379479273695e-06, + "loss": 0.138, + "num_input_tokens_seen": 23914304, + "step": 35485 + }, + { + "epoch": 0.8670266044511763, + "grad_norm": 0.7784607410430908, + "learning_rate": 1.9673578722381552e-06, + "loss": 0.1295, + "num_input_tokens_seen": 23917888, + "step": 35490 + }, + { + "epoch": 0.8671487552830235, + "grad_norm": 3.1278843879699707, + "learning_rate": 1.967336258167729e-06, + "loss": 0.065, + "num_input_tokens_seen": 23921472, + "step": 35495 + }, + { + "epoch": 0.8672709061148707, + "grad_norm": 0.8848108053207397, + "learning_rate": 1.9673146370625727e-06, + "loss": 0.0754, + "num_input_tokens_seen": 23924736, + "step": 35500 + }, + { + "epoch": 0.8673930569467178, + "grad_norm": 41.079925537109375, + "learning_rate": 1.9672930089228448e-06, + "loss": 0.1791, + "num_input_tokens_seen": 23927744, + "step": 35505 + }, + { + "epoch": 0.867515207778565, + "grad_norm": 14.143448829650879, + "learning_rate": 1.9672713737487018e-06, + "loss": 0.1933, + "num_input_tokens_seen": 23930944, + "step": 35510 + }, + { + "epoch": 0.8676373586104121, + "grad_norm": 56.091392517089844, + "learning_rate": 1.967249731540301e-06, + "loss": 0.0915, + "num_input_tokens_seen": 23934336, + "step": 35515 + }, + { + "epoch": 0.8677595094422593, + "grad_norm": 28.087440490722656, + "learning_rate": 1.9672280822978e-06, + "loss": 0.093, + "num_input_tokens_seen": 23937856, + "step": 35520 + }, + { + "epoch": 0.8678816602741065, + "grad_norm": 1.3667480945587158, + "learning_rate": 1.9672064260213565e-06, + "loss": 0.1593, + "num_input_tokens_seen": 23941312, + "step": 35525 + }, + { + "epoch": 0.8680038111059536, + "grad_norm": 24.725561141967773, + "learning_rate": 1.9671847627111273e-06, + "loss": 0.1556, + "num_input_tokens_seen": 23944256, + "step": 35530 + }, + { + "epoch": 0.8681259619378008, + "grad_norm": 2.158342123031616, + "learning_rate": 1.967163092367271e-06, + "loss": 0.0886, + "num_input_tokens_seen": 23947840, + "step": 35535 + }, + { + "epoch": 0.868248112769648, + "grad_norm": 29.596519470214844, + "learning_rate": 1.9671414149899438e-06, + "loss": 0.0558, + "num_input_tokens_seen": 23951040, + "step": 35540 + }, + { + "epoch": 0.8683702636014952, + "grad_norm": 50.23384094238281, + "learning_rate": 1.9671197305793044e-06, + "loss": 0.1845, + "num_input_tokens_seen": 23953984, + "step": 35545 + }, + { + "epoch": 0.8684924144333422, + "grad_norm": 0.8262698650360107, + "learning_rate": 1.9670980391355104e-06, + "loss": 0.0353, + "num_input_tokens_seen": 23957184, + "step": 35550 + }, + { + "epoch": 0.8686145652651894, + "grad_norm": 24.694656372070312, + "learning_rate": 1.9670763406587192e-06, + "loss": 0.1873, + "num_input_tokens_seen": 23960640, + "step": 35555 + }, + { + "epoch": 0.8687367160970366, + "grad_norm": 0.3127105236053467, + "learning_rate": 1.967054635149089e-06, + "loss": 0.0577, + "num_input_tokens_seen": 23963904, + "step": 35560 + }, + { + "epoch": 0.8688588669288838, + "grad_norm": 12.424739837646484, + "learning_rate": 1.967032922606777e-06, + "loss": 0.121, + "num_input_tokens_seen": 23967104, + "step": 35565 + }, + { + "epoch": 0.868981017760731, + "grad_norm": 9.423223495483398, + "learning_rate": 1.9670112030319412e-06, + "loss": 0.0403, + "num_input_tokens_seen": 23970240, + "step": 35570 + }, + { + "epoch": 0.8691031685925781, + "grad_norm": 15.91766357421875, + "learning_rate": 1.9669894764247403e-06, + "loss": 0.187, + "num_input_tokens_seen": 23973760, + "step": 35575 + }, + { + "epoch": 0.8692253194244253, + "grad_norm": 13.034796714782715, + "learning_rate": 1.966967742785332e-06, + "loss": 0.1906, + "num_input_tokens_seen": 23976960, + "step": 35580 + }, + { + "epoch": 0.8693474702562725, + "grad_norm": 0.9588425755500793, + "learning_rate": 1.966946002113874e-06, + "loss": 0.0527, + "num_input_tokens_seen": 23980352, + "step": 35585 + }, + { + "epoch": 0.8694696210881196, + "grad_norm": 8.451891899108887, + "learning_rate": 1.9669242544105245e-06, + "loss": 0.0576, + "num_input_tokens_seen": 23983872, + "step": 35590 + }, + { + "epoch": 0.8695917719199667, + "grad_norm": 2.5281105041503906, + "learning_rate": 1.966902499675442e-06, + "loss": 0.0556, + "num_input_tokens_seen": 23987520, + "step": 35595 + }, + { + "epoch": 0.8697139227518139, + "grad_norm": 8.671502113342285, + "learning_rate": 1.9668807379087843e-06, + "loss": 0.1358, + "num_input_tokens_seen": 23991232, + "step": 35600 + }, + { + "epoch": 0.8698360735836611, + "grad_norm": 3.297170639038086, + "learning_rate": 1.9668589691107096e-06, + "loss": 0.1904, + "num_input_tokens_seen": 23994816, + "step": 35605 + }, + { + "epoch": 0.8699582244155083, + "grad_norm": 37.647254943847656, + "learning_rate": 1.966837193281377e-06, + "loss": 0.0965, + "num_input_tokens_seen": 23997952, + "step": 35610 + }, + { + "epoch": 0.8700803752473555, + "grad_norm": 20.210281372070312, + "learning_rate": 1.9668154104209438e-06, + "loss": 0.0625, + "num_input_tokens_seen": 24001088, + "step": 35615 + }, + { + "epoch": 0.8702025260792026, + "grad_norm": 1.265897512435913, + "learning_rate": 1.966793620529569e-06, + "loss": 0.1092, + "num_input_tokens_seen": 24004992, + "step": 35620 + }, + { + "epoch": 0.8703246769110498, + "grad_norm": 0.6645774841308594, + "learning_rate": 1.9667718236074106e-06, + "loss": 0.1368, + "num_input_tokens_seen": 24008960, + "step": 35625 + }, + { + "epoch": 0.870446827742897, + "grad_norm": 1.741341233253479, + "learning_rate": 1.966750019654628e-06, + "loss": 0.0791, + "num_input_tokens_seen": 24012352, + "step": 35630 + }, + { + "epoch": 0.8705689785747441, + "grad_norm": 0.28083935379981995, + "learning_rate": 1.966728208671379e-06, + "loss": 0.0905, + "num_input_tokens_seen": 24015936, + "step": 35635 + }, + { + "epoch": 0.8706911294065912, + "grad_norm": 22.628009796142578, + "learning_rate": 1.9667063906578226e-06, + "loss": 0.1447, + "num_input_tokens_seen": 24019200, + "step": 35640 + }, + { + "epoch": 0.8708132802384384, + "grad_norm": 0.0826408714056015, + "learning_rate": 1.966684565614117e-06, + "loss": 0.219, + "num_input_tokens_seen": 24022592, + "step": 35645 + }, + { + "epoch": 0.8709354310702856, + "grad_norm": 18.106290817260742, + "learning_rate": 1.9666627335404214e-06, + "loss": 0.1551, + "num_input_tokens_seen": 24025984, + "step": 35650 + }, + { + "epoch": 0.8710575819021328, + "grad_norm": 51.1240119934082, + "learning_rate": 1.9666408944368948e-06, + "loss": 0.1494, + "num_input_tokens_seen": 24030080, + "step": 35655 + }, + { + "epoch": 0.87117973273398, + "grad_norm": 1.2830450534820557, + "learning_rate": 1.966619048303695e-06, + "loss": 0.0704, + "num_input_tokens_seen": 24033344, + "step": 35660 + }, + { + "epoch": 0.871301883565827, + "grad_norm": 15.251612663269043, + "learning_rate": 1.966597195140982e-06, + "loss": 0.1243, + "num_input_tokens_seen": 24036480, + "step": 35665 + }, + { + "epoch": 0.8714240343976742, + "grad_norm": 10.089798927307129, + "learning_rate": 1.966575334948914e-06, + "loss": 0.267, + "num_input_tokens_seen": 24039552, + "step": 35670 + }, + { + "epoch": 0.8715461852295214, + "grad_norm": 2.0029706954956055, + "learning_rate": 1.9665534677276502e-06, + "loss": 0.0807, + "num_input_tokens_seen": 24042816, + "step": 35675 + }, + { + "epoch": 0.8716683360613686, + "grad_norm": 0.9593586921691895, + "learning_rate": 1.9665315934773495e-06, + "loss": 0.1276, + "num_input_tokens_seen": 24046016, + "step": 35680 + }, + { + "epoch": 0.8717904868932157, + "grad_norm": 16.183738708496094, + "learning_rate": 1.966509712198171e-06, + "loss": 0.083, + "num_input_tokens_seen": 24049664, + "step": 35685 + }, + { + "epoch": 0.8719126377250629, + "grad_norm": 10.109683990478516, + "learning_rate": 1.966487823890274e-06, + "loss": 0.1309, + "num_input_tokens_seen": 24053248, + "step": 35690 + }, + { + "epoch": 0.8720347885569101, + "grad_norm": 25.217132568359375, + "learning_rate": 1.966465928553818e-06, + "loss": 0.0964, + "num_input_tokens_seen": 24056896, + "step": 35695 + }, + { + "epoch": 0.8721569393887573, + "grad_norm": 11.477954864501953, + "learning_rate": 1.9664440261889614e-06, + "loss": 0.0599, + "num_input_tokens_seen": 24060480, + "step": 35700 + }, + { + "epoch": 0.8722790902206043, + "grad_norm": 19.288284301757812, + "learning_rate": 1.966422116795864e-06, + "loss": 0.076, + "num_input_tokens_seen": 24063616, + "step": 35705 + }, + { + "epoch": 0.8724012410524515, + "grad_norm": 2.0639655590057373, + "learning_rate": 1.966400200374685e-06, + "loss": 0.0638, + "num_input_tokens_seen": 24067136, + "step": 35710 + }, + { + "epoch": 0.8725233918842987, + "grad_norm": 9.67798137664795, + "learning_rate": 1.9663782769255837e-06, + "loss": 0.0867, + "num_input_tokens_seen": 24070784, + "step": 35715 + }, + { + "epoch": 0.8726455427161459, + "grad_norm": 30.0969295501709, + "learning_rate": 1.9663563464487197e-06, + "loss": 0.0867, + "num_input_tokens_seen": 24074240, + "step": 35720 + }, + { + "epoch": 0.8727676935479931, + "grad_norm": 42.898590087890625, + "learning_rate": 1.9663344089442524e-06, + "loss": 0.0729, + "num_input_tokens_seen": 24077952, + "step": 35725 + }, + { + "epoch": 0.8728898443798402, + "grad_norm": 2.0448718070983887, + "learning_rate": 1.9663124644123416e-06, + "loss": 0.2075, + "num_input_tokens_seen": 24081344, + "step": 35730 + }, + { + "epoch": 0.8730119952116874, + "grad_norm": 0.13667216897010803, + "learning_rate": 1.9662905128531464e-06, + "loss": 0.0498, + "num_input_tokens_seen": 24084608, + "step": 35735 + }, + { + "epoch": 0.8731341460435346, + "grad_norm": 3.772686243057251, + "learning_rate": 1.966268554266827e-06, + "loss": 0.0882, + "num_input_tokens_seen": 24088064, + "step": 35740 + }, + { + "epoch": 0.8732562968753818, + "grad_norm": 28.097793579101562, + "learning_rate": 1.9662465886535426e-06, + "loss": 0.1407, + "num_input_tokens_seen": 24091776, + "step": 35745 + }, + { + "epoch": 0.8733784477072288, + "grad_norm": 20.13945198059082, + "learning_rate": 1.966224616013453e-06, + "loss": 0.2092, + "num_input_tokens_seen": 24095104, + "step": 35750 + }, + { + "epoch": 0.873500598539076, + "grad_norm": 2.7947781085968018, + "learning_rate": 1.9662026363467183e-06, + "loss": 0.1278, + "num_input_tokens_seen": 24098112, + "step": 35755 + }, + { + "epoch": 0.8736227493709232, + "grad_norm": 11.682032585144043, + "learning_rate": 1.966180649653498e-06, + "loss": 0.2807, + "num_input_tokens_seen": 24101568, + "step": 35760 + }, + { + "epoch": 0.8737449002027704, + "grad_norm": 26.217021942138672, + "learning_rate": 1.966158655933952e-06, + "loss": 0.0905, + "num_input_tokens_seen": 24104960, + "step": 35765 + }, + { + "epoch": 0.8738670510346176, + "grad_norm": 8.426963806152344, + "learning_rate": 1.966136655188241e-06, + "loss": 0.1174, + "num_input_tokens_seen": 24108480, + "step": 35770 + }, + { + "epoch": 0.8739892018664647, + "grad_norm": 10.167957305908203, + "learning_rate": 1.966114647416524e-06, + "loss": 0.0826, + "num_input_tokens_seen": 24111552, + "step": 35775 + }, + { + "epoch": 0.8741113526983119, + "grad_norm": 13.67619800567627, + "learning_rate": 1.9660926326189613e-06, + "loss": 0.0744, + "num_input_tokens_seen": 24114752, + "step": 35780 + }, + { + "epoch": 0.874233503530159, + "grad_norm": 37.29083251953125, + "learning_rate": 1.9660706107957134e-06, + "loss": 0.1183, + "num_input_tokens_seen": 24118016, + "step": 35785 + }, + { + "epoch": 0.8743556543620062, + "grad_norm": 22.16767120361328, + "learning_rate": 1.9660485819469395e-06, + "loss": 0.174, + "num_input_tokens_seen": 24121280, + "step": 35790 + }, + { + "epoch": 0.8744778051938533, + "grad_norm": 0.2441328465938568, + "learning_rate": 1.9660265460728015e-06, + "loss": 0.0505, + "num_input_tokens_seen": 24125312, + "step": 35795 + }, + { + "epoch": 0.8745999560257005, + "grad_norm": 47.24000930786133, + "learning_rate": 1.9660045031734578e-06, + "loss": 0.0931, + "num_input_tokens_seen": 24128256, + "step": 35800 + }, + { + "epoch": 0.8747221068575477, + "grad_norm": 2.6885123252868652, + "learning_rate": 1.96598245324907e-06, + "loss": 0.1435, + "num_input_tokens_seen": 24131584, + "step": 35805 + }, + { + "epoch": 0.8748442576893949, + "grad_norm": 5.6425089836120605, + "learning_rate": 1.965960396299798e-06, + "loss": 0.0891, + "num_input_tokens_seen": 24134912, + "step": 35810 + }, + { + "epoch": 0.8749664085212421, + "grad_norm": 13.333474159240723, + "learning_rate": 1.9659383323258017e-06, + "loss": 0.1344, + "num_input_tokens_seen": 24138304, + "step": 35815 + }, + { + "epoch": 0.8750885593530892, + "grad_norm": 0.8057913184165955, + "learning_rate": 1.9659162613272424e-06, + "loss": 0.0714, + "num_input_tokens_seen": 24141760, + "step": 35820 + }, + { + "epoch": 0.8752107101849363, + "grad_norm": 30.415504455566406, + "learning_rate": 1.9658941833042804e-06, + "loss": 0.1576, + "num_input_tokens_seen": 24144960, + "step": 35825 + }, + { + "epoch": 0.8753328610167835, + "grad_norm": 5.3278326988220215, + "learning_rate": 1.9658720982570757e-06, + "loss": 0.0207, + "num_input_tokens_seen": 24147904, + "step": 35830 + }, + { + "epoch": 0.8754550118486307, + "grad_norm": 4.785195350646973, + "learning_rate": 1.9658500061857897e-06, + "loss": 0.1306, + "num_input_tokens_seen": 24151104, + "step": 35835 + }, + { + "epoch": 0.8755771626804778, + "grad_norm": 49.18978500366211, + "learning_rate": 1.9658279070905825e-06, + "loss": 0.1516, + "num_input_tokens_seen": 24154432, + "step": 35840 + }, + { + "epoch": 0.875699313512325, + "grad_norm": 11.545149803161621, + "learning_rate": 1.9658058009716147e-06, + "loss": 0.0642, + "num_input_tokens_seen": 24157696, + "step": 35845 + }, + { + "epoch": 0.8758214643441722, + "grad_norm": 10.62991714477539, + "learning_rate": 1.965783687829048e-06, + "loss": 0.0856, + "num_input_tokens_seen": 24161152, + "step": 35850 + }, + { + "epoch": 0.8759436151760194, + "grad_norm": 0.5424090623855591, + "learning_rate": 1.965761567663042e-06, + "loss": 0.2556, + "num_input_tokens_seen": 24164416, + "step": 35855 + }, + { + "epoch": 0.8760657660078666, + "grad_norm": 9.843696594238281, + "learning_rate": 1.9657394404737582e-06, + "loss": 0.0845, + "num_input_tokens_seen": 24168000, + "step": 35860 + }, + { + "epoch": 0.8761879168397136, + "grad_norm": 0.7926238179206848, + "learning_rate": 1.9657173062613575e-06, + "loss": 0.2504, + "num_input_tokens_seen": 24171584, + "step": 35865 + }, + { + "epoch": 0.8763100676715608, + "grad_norm": 18.188486099243164, + "learning_rate": 1.965695165026001e-06, + "loss": 0.1544, + "num_input_tokens_seen": 24175168, + "step": 35870 + }, + { + "epoch": 0.876432218503408, + "grad_norm": 1.0848253965377808, + "learning_rate": 1.9656730167678494e-06, + "loss": 0.0603, + "num_input_tokens_seen": 24178432, + "step": 35875 + }, + { + "epoch": 0.8765543693352552, + "grad_norm": 8.33212661743164, + "learning_rate": 1.965650861487064e-06, + "loss": 0.0844, + "num_input_tokens_seen": 24181696, + "step": 35880 + }, + { + "epoch": 0.8766765201671023, + "grad_norm": 8.785030364990234, + "learning_rate": 1.9656286991838056e-06, + "loss": 0.2487, + "num_input_tokens_seen": 24184832, + "step": 35885 + }, + { + "epoch": 0.8767986709989495, + "grad_norm": 34.26165771484375, + "learning_rate": 1.9656065298582355e-06, + "loss": 0.0238, + "num_input_tokens_seen": 24188224, + "step": 35890 + }, + { + "epoch": 0.8769208218307967, + "grad_norm": 27.247255325317383, + "learning_rate": 1.9655843535105154e-06, + "loss": 0.0667, + "num_input_tokens_seen": 24191872, + "step": 35895 + }, + { + "epoch": 0.8770429726626439, + "grad_norm": 4.9799957275390625, + "learning_rate": 1.9655621701408057e-06, + "loss": 0.063, + "num_input_tokens_seen": 24195648, + "step": 35900 + }, + { + "epoch": 0.877165123494491, + "grad_norm": 56.0616569519043, + "learning_rate": 1.9655399797492683e-06, + "loss": 0.1216, + "num_input_tokens_seen": 24198976, + "step": 35905 + }, + { + "epoch": 0.8772872743263381, + "grad_norm": 13.826311111450195, + "learning_rate": 1.9655177823360645e-06, + "loss": 0.0618, + "num_input_tokens_seen": 24202304, + "step": 35910 + }, + { + "epoch": 0.8774094251581853, + "grad_norm": 10.57601261138916, + "learning_rate": 1.9654955779013557e-06, + "loss": 0.1367, + "num_input_tokens_seen": 24205504, + "step": 35915 + }, + { + "epoch": 0.8775315759900325, + "grad_norm": 1.1815544366836548, + "learning_rate": 1.9654733664453037e-06, + "loss": 0.1328, + "num_input_tokens_seen": 24209344, + "step": 35920 + }, + { + "epoch": 0.8776537268218797, + "grad_norm": 12.932337760925293, + "learning_rate": 1.9654511479680693e-06, + "loss": 0.0406, + "num_input_tokens_seen": 24212736, + "step": 35925 + }, + { + "epoch": 0.8777758776537268, + "grad_norm": 17.29469871520996, + "learning_rate": 1.9654289224698144e-06, + "loss": 0.1151, + "num_input_tokens_seen": 24216000, + "step": 35930 + }, + { + "epoch": 0.877898028485574, + "grad_norm": 1.935131549835205, + "learning_rate": 1.965406689950701e-06, + "loss": 0.1295, + "num_input_tokens_seen": 24219392, + "step": 35935 + }, + { + "epoch": 0.8780201793174212, + "grad_norm": 0.7120591402053833, + "learning_rate": 1.9653844504108906e-06, + "loss": 0.1675, + "num_input_tokens_seen": 24222784, + "step": 35940 + }, + { + "epoch": 0.8781423301492683, + "grad_norm": 21.83226776123047, + "learning_rate": 1.965362203850545e-06, + "loss": 0.1002, + "num_input_tokens_seen": 24225664, + "step": 35945 + }, + { + "epoch": 0.8782644809811155, + "grad_norm": 0.8217087388038635, + "learning_rate": 1.965339950269825e-06, + "loss": 0.0102, + "num_input_tokens_seen": 24229312, + "step": 35950 + }, + { + "epoch": 0.8783866318129626, + "grad_norm": 17.43366050720215, + "learning_rate": 1.9653176896688936e-06, + "loss": 0.0219, + "num_input_tokens_seen": 24232576, + "step": 35955 + }, + { + "epoch": 0.8785087826448098, + "grad_norm": 15.183777809143066, + "learning_rate": 1.965295422047912e-06, + "loss": 0.0591, + "num_input_tokens_seen": 24236096, + "step": 35960 + }, + { + "epoch": 0.878630933476657, + "grad_norm": 10.468469619750977, + "learning_rate": 1.965273147407043e-06, + "loss": 0.1248, + "num_input_tokens_seen": 24239680, + "step": 35965 + }, + { + "epoch": 0.8787530843085042, + "grad_norm": 0.05369218438863754, + "learning_rate": 1.965250865746448e-06, + "loss": 0.0906, + "num_input_tokens_seen": 24242816, + "step": 35970 + }, + { + "epoch": 0.8788752351403513, + "grad_norm": 0.46325352787971497, + "learning_rate": 1.9652285770662893e-06, + "loss": 0.0539, + "num_input_tokens_seen": 24246208, + "step": 35975 + }, + { + "epoch": 0.8789973859721985, + "grad_norm": 47.057804107666016, + "learning_rate": 1.965206281366728e-06, + "loss": 0.088, + "num_input_tokens_seen": 24249152, + "step": 35980 + }, + { + "epoch": 0.8791195368040456, + "grad_norm": 24.42066192626953, + "learning_rate": 1.9651839786479276e-06, + "loss": 0.1986, + "num_input_tokens_seen": 24252608, + "step": 35985 + }, + { + "epoch": 0.8792416876358928, + "grad_norm": 1.8329250812530518, + "learning_rate": 1.9651616689100498e-06, + "loss": 0.1155, + "num_input_tokens_seen": 24255680, + "step": 35990 + }, + { + "epoch": 0.8793638384677399, + "grad_norm": 2.121000051498413, + "learning_rate": 1.9651393521532563e-06, + "loss": 0.1162, + "num_input_tokens_seen": 24258880, + "step": 35995 + }, + { + "epoch": 0.8794859892995871, + "grad_norm": 9.572766304016113, + "learning_rate": 1.9651170283777098e-06, + "loss": 0.19, + "num_input_tokens_seen": 24262464, + "step": 36000 + }, + { + "epoch": 0.8796081401314343, + "grad_norm": 23.756149291992188, + "learning_rate": 1.9650946975835733e-06, + "loss": 0.1447, + "num_input_tokens_seen": 24265792, + "step": 36005 + }, + { + "epoch": 0.8797302909632815, + "grad_norm": 12.380399703979492, + "learning_rate": 1.9650723597710078e-06, + "loss": 0.2048, + "num_input_tokens_seen": 24268800, + "step": 36010 + }, + { + "epoch": 0.8798524417951287, + "grad_norm": 18.560951232910156, + "learning_rate": 1.965050014940177e-06, + "loss": 0.1781, + "num_input_tokens_seen": 24272192, + "step": 36015 + }, + { + "epoch": 0.8799745926269757, + "grad_norm": 28.321802139282227, + "learning_rate": 1.9650276630912427e-06, + "loss": 0.0397, + "num_input_tokens_seen": 24275840, + "step": 36020 + }, + { + "epoch": 0.8800967434588229, + "grad_norm": 1.0200899839401245, + "learning_rate": 1.965005304224368e-06, + "loss": 0.1337, + "num_input_tokens_seen": 24279296, + "step": 36025 + }, + { + "epoch": 0.8802188942906701, + "grad_norm": 7.544238090515137, + "learning_rate": 1.964982938339715e-06, + "loss": 0.3578, + "num_input_tokens_seen": 24282240, + "step": 36030 + }, + { + "epoch": 0.8803410451225173, + "grad_norm": 23.50800323486328, + "learning_rate": 1.9649605654374466e-06, + "loss": 0.0886, + "num_input_tokens_seen": 24285696, + "step": 36035 + }, + { + "epoch": 0.8804631959543644, + "grad_norm": 0.366159588098526, + "learning_rate": 1.964938185517725e-06, + "loss": 0.0541, + "num_input_tokens_seen": 24288704, + "step": 36040 + }, + { + "epoch": 0.8805853467862116, + "grad_norm": 34.502220153808594, + "learning_rate": 1.964915798580714e-06, + "loss": 0.0778, + "num_input_tokens_seen": 24291520, + "step": 36045 + }, + { + "epoch": 0.8807074976180588, + "grad_norm": 32.538902282714844, + "learning_rate": 1.9648934046265755e-06, + "loss": 0.1126, + "num_input_tokens_seen": 24294720, + "step": 36050 + }, + { + "epoch": 0.880829648449906, + "grad_norm": 50.61782455444336, + "learning_rate": 1.9648710036554726e-06, + "loss": 0.1484, + "num_input_tokens_seen": 24297984, + "step": 36055 + }, + { + "epoch": 0.8809517992817532, + "grad_norm": 1.2384830713272095, + "learning_rate": 1.9648485956675683e-06, + "loss": 0.1794, + "num_input_tokens_seen": 24301312, + "step": 36060 + }, + { + "epoch": 0.8810739501136002, + "grad_norm": 14.007159233093262, + "learning_rate": 1.9648261806630255e-06, + "loss": 0.1554, + "num_input_tokens_seen": 24304640, + "step": 36065 + }, + { + "epoch": 0.8811961009454474, + "grad_norm": 24.65819549560547, + "learning_rate": 1.964803758642007e-06, + "loss": 0.0985, + "num_input_tokens_seen": 24308480, + "step": 36070 + }, + { + "epoch": 0.8813182517772946, + "grad_norm": 14.141555786132812, + "learning_rate": 1.9647813296046766e-06, + "loss": 0.1324, + "num_input_tokens_seen": 24311680, + "step": 36075 + }, + { + "epoch": 0.8814404026091418, + "grad_norm": 11.87048053741455, + "learning_rate": 1.964758893551196e-06, + "loss": 0.1911, + "num_input_tokens_seen": 24314880, + "step": 36080 + }, + { + "epoch": 0.8815625534409889, + "grad_norm": 19.78986358642578, + "learning_rate": 1.96473645048173e-06, + "loss": 0.0895, + "num_input_tokens_seen": 24318400, + "step": 36085 + }, + { + "epoch": 0.8816847042728361, + "grad_norm": 11.281794548034668, + "learning_rate": 1.964714000396441e-06, + "loss": 0.105, + "num_input_tokens_seen": 24321536, + "step": 36090 + }, + { + "epoch": 0.8818068551046833, + "grad_norm": 4.544544696807861, + "learning_rate": 1.964691543295492e-06, + "loss": 0.0495, + "num_input_tokens_seen": 24324864, + "step": 36095 + }, + { + "epoch": 0.8819290059365305, + "grad_norm": 13.858755111694336, + "learning_rate": 1.964669079179047e-06, + "loss": 0.0606, + "num_input_tokens_seen": 24328000, + "step": 36100 + }, + { + "epoch": 0.8820511567683776, + "grad_norm": 18.83074378967285, + "learning_rate": 1.9646466080472686e-06, + "loss": 0.0605, + "num_input_tokens_seen": 24331008, + "step": 36105 + }, + { + "epoch": 0.8821733076002247, + "grad_norm": 8.569042205810547, + "learning_rate": 1.9646241299003206e-06, + "loss": 0.1002, + "num_input_tokens_seen": 24334080, + "step": 36110 + }, + { + "epoch": 0.8822954584320719, + "grad_norm": 26.459529876708984, + "learning_rate": 1.9646016447383665e-06, + "loss": 0.1031, + "num_input_tokens_seen": 24337472, + "step": 36115 + }, + { + "epoch": 0.8824176092639191, + "grad_norm": 38.5592155456543, + "learning_rate": 1.96457915256157e-06, + "loss": 0.1362, + "num_input_tokens_seen": 24340928, + "step": 36120 + }, + { + "epoch": 0.8825397600957663, + "grad_norm": 17.598421096801758, + "learning_rate": 1.9645566533700945e-06, + "loss": 0.1261, + "num_input_tokens_seen": 24344512, + "step": 36125 + }, + { + "epoch": 0.8826619109276134, + "grad_norm": 5.009764671325684, + "learning_rate": 1.9645341471641036e-06, + "loss": 0.0493, + "num_input_tokens_seen": 24347776, + "step": 36130 + }, + { + "epoch": 0.8827840617594606, + "grad_norm": 8.929144859313965, + "learning_rate": 1.964511633943761e-06, + "loss": 0.0642, + "num_input_tokens_seen": 24350976, + "step": 36135 + }, + { + "epoch": 0.8829062125913077, + "grad_norm": 0.8995026350021362, + "learning_rate": 1.9644891137092298e-06, + "loss": 0.1568, + "num_input_tokens_seen": 24354240, + "step": 36140 + }, + { + "epoch": 0.8830283634231549, + "grad_norm": 0.6001954078674316, + "learning_rate": 1.9644665864606747e-06, + "loss": 0.0523, + "num_input_tokens_seen": 24357312, + "step": 36145 + }, + { + "epoch": 0.8831505142550021, + "grad_norm": 9.831707000732422, + "learning_rate": 1.9644440521982594e-06, + "loss": 0.1478, + "num_input_tokens_seen": 24361024, + "step": 36150 + }, + { + "epoch": 0.8832726650868492, + "grad_norm": 0.6541392207145691, + "learning_rate": 1.9644215109221475e-06, + "loss": 0.1884, + "num_input_tokens_seen": 24364480, + "step": 36155 + }, + { + "epoch": 0.8833948159186964, + "grad_norm": 7.485659122467041, + "learning_rate": 1.9643989626325024e-06, + "loss": 0.1435, + "num_input_tokens_seen": 24367616, + "step": 36160 + }, + { + "epoch": 0.8835169667505436, + "grad_norm": 15.767600059509277, + "learning_rate": 1.9643764073294893e-06, + "loss": 0.2141, + "num_input_tokens_seen": 24370880, + "step": 36165 + }, + { + "epoch": 0.8836391175823908, + "grad_norm": 21.12200164794922, + "learning_rate": 1.9643538450132713e-06, + "loss": 0.1681, + "num_input_tokens_seen": 24374144, + "step": 36170 + }, + { + "epoch": 0.8837612684142379, + "grad_norm": 31.78057289123535, + "learning_rate": 1.964331275684013e-06, + "loss": 0.0899, + "num_input_tokens_seen": 24377344, + "step": 36175 + }, + { + "epoch": 0.883883419246085, + "grad_norm": 0.7445668578147888, + "learning_rate": 1.964308699341878e-06, + "loss": 0.0782, + "num_input_tokens_seen": 24380480, + "step": 36180 + }, + { + "epoch": 0.8840055700779322, + "grad_norm": 4.764310836791992, + "learning_rate": 1.964286115987031e-06, + "loss": 0.0687, + "num_input_tokens_seen": 24384000, + "step": 36185 + }, + { + "epoch": 0.8841277209097794, + "grad_norm": 0.4389335513114929, + "learning_rate": 1.9642635256196356e-06, + "loss": 0.0591, + "num_input_tokens_seen": 24387520, + "step": 36190 + }, + { + "epoch": 0.8842498717416266, + "grad_norm": 16.75498390197754, + "learning_rate": 1.9642409282398573e-06, + "loss": 0.1161, + "num_input_tokens_seen": 24391424, + "step": 36195 + }, + { + "epoch": 0.8843720225734737, + "grad_norm": 35.38811111450195, + "learning_rate": 1.964218323847859e-06, + "loss": 0.1667, + "num_input_tokens_seen": 24394560, + "step": 36200 + }, + { + "epoch": 0.8844941734053209, + "grad_norm": 6.369503498077393, + "learning_rate": 1.9641957124438058e-06, + "loss": 0.0912, + "num_input_tokens_seen": 24398080, + "step": 36205 + }, + { + "epoch": 0.8846163242371681, + "grad_norm": 27.021238327026367, + "learning_rate": 1.9641730940278617e-06, + "loss": 0.097, + "num_input_tokens_seen": 24401408, + "step": 36210 + }, + { + "epoch": 0.8847384750690153, + "grad_norm": 17.28885269165039, + "learning_rate": 1.964150468600192e-06, + "loss": 0.1731, + "num_input_tokens_seen": 24404736, + "step": 36215 + }, + { + "epoch": 0.8848606259008623, + "grad_norm": 1.7445902824401855, + "learning_rate": 1.964127836160961e-06, + "loss": 0.0174, + "num_input_tokens_seen": 24408000, + "step": 36220 + }, + { + "epoch": 0.8849827767327095, + "grad_norm": 10.038021087646484, + "learning_rate": 1.964105196710332e-06, + "loss": 0.1235, + "num_input_tokens_seen": 24411136, + "step": 36225 + }, + { + "epoch": 0.8851049275645567, + "grad_norm": 25.007415771484375, + "learning_rate": 1.9640825502484716e-06, + "loss": 0.0469, + "num_input_tokens_seen": 24414720, + "step": 36230 + }, + { + "epoch": 0.8852270783964039, + "grad_norm": 16.24190330505371, + "learning_rate": 1.9640598967755435e-06, + "loss": 0.1697, + "num_input_tokens_seen": 24418112, + "step": 36235 + }, + { + "epoch": 0.885349229228251, + "grad_norm": 22.841869354248047, + "learning_rate": 1.9640372362917128e-06, + "loss": 0.1409, + "num_input_tokens_seen": 24421184, + "step": 36240 + }, + { + "epoch": 0.8854713800600982, + "grad_norm": 0.6201678514480591, + "learning_rate": 1.964014568797143e-06, + "loss": 0.0835, + "num_input_tokens_seen": 24424448, + "step": 36245 + }, + { + "epoch": 0.8855935308919454, + "grad_norm": 17.243867874145508, + "learning_rate": 1.963991894292001e-06, + "loss": 0.1017, + "num_input_tokens_seen": 24427968, + "step": 36250 + }, + { + "epoch": 0.8857156817237926, + "grad_norm": 40.84526443481445, + "learning_rate": 1.9639692127764504e-06, + "loss": 0.1615, + "num_input_tokens_seen": 24431808, + "step": 36255 + }, + { + "epoch": 0.8858378325556397, + "grad_norm": 12.356513023376465, + "learning_rate": 1.9639465242506563e-06, + "loss": 0.1469, + "num_input_tokens_seen": 24435648, + "step": 36260 + }, + { + "epoch": 0.8859599833874868, + "grad_norm": 11.762965202331543, + "learning_rate": 1.9639238287147836e-06, + "loss": 0.084, + "num_input_tokens_seen": 24438720, + "step": 36265 + }, + { + "epoch": 0.886082134219334, + "grad_norm": 0.16327081620693207, + "learning_rate": 1.963901126168998e-06, + "loss": 0.1053, + "num_input_tokens_seen": 24442240, + "step": 36270 + }, + { + "epoch": 0.8862042850511812, + "grad_norm": 7.906080722808838, + "learning_rate": 1.9638784166134636e-06, + "loss": 0.0492, + "num_input_tokens_seen": 24445184, + "step": 36275 + }, + { + "epoch": 0.8863264358830284, + "grad_norm": 3.6426432132720947, + "learning_rate": 1.9638557000483463e-06, + "loss": 0.0825, + "num_input_tokens_seen": 24449280, + "step": 36280 + }, + { + "epoch": 0.8864485867148755, + "grad_norm": 1.1091848611831665, + "learning_rate": 1.963832976473811e-06, + "loss": 0.0363, + "num_input_tokens_seen": 24452608, + "step": 36285 + }, + { + "epoch": 0.8865707375467227, + "grad_norm": 26.937637329101562, + "learning_rate": 1.9638102458900236e-06, + "loss": 0.0744, + "num_input_tokens_seen": 24455616, + "step": 36290 + }, + { + "epoch": 0.8866928883785699, + "grad_norm": 5.537595272064209, + "learning_rate": 1.963787508297148e-06, + "loss": 0.0468, + "num_input_tokens_seen": 24459072, + "step": 36295 + }, + { + "epoch": 0.886815039210417, + "grad_norm": 1.8377037048339844, + "learning_rate": 1.9637647636953513e-06, + "loss": 0.1084, + "num_input_tokens_seen": 24462400, + "step": 36300 + }, + { + "epoch": 0.8869371900422642, + "grad_norm": 2.740536689758301, + "learning_rate": 1.9637420120847976e-06, + "loss": 0.1197, + "num_input_tokens_seen": 24465152, + "step": 36305 + }, + { + "epoch": 0.8870593408741113, + "grad_norm": 1.6049529314041138, + "learning_rate": 1.9637192534656528e-06, + "loss": 0.0815, + "num_input_tokens_seen": 24468160, + "step": 36310 + }, + { + "epoch": 0.8871814917059585, + "grad_norm": 0.09656000882387161, + "learning_rate": 1.9636964878380824e-06, + "loss": 0.1247, + "num_input_tokens_seen": 24470976, + "step": 36315 + }, + { + "epoch": 0.8873036425378057, + "grad_norm": 0.3467960059642792, + "learning_rate": 1.963673715202252e-06, + "loss": 0.063, + "num_input_tokens_seen": 24474752, + "step": 36320 + }, + { + "epoch": 0.8874257933696529, + "grad_norm": 10.848044395446777, + "learning_rate": 1.9636509355583268e-06, + "loss": 0.1894, + "num_input_tokens_seen": 24478464, + "step": 36325 + }, + { + "epoch": 0.8875479442015, + "grad_norm": 19.020444869995117, + "learning_rate": 1.9636281489064732e-06, + "loss": 0.0863, + "num_input_tokens_seen": 24481856, + "step": 36330 + }, + { + "epoch": 0.8876700950333472, + "grad_norm": 30.788227081298828, + "learning_rate": 1.9636053552468565e-06, + "loss": 0.122, + "num_input_tokens_seen": 24485120, + "step": 36335 + }, + { + "epoch": 0.8877922458651943, + "grad_norm": 15.392607688903809, + "learning_rate": 1.963582554579642e-06, + "loss": 0.0674, + "num_input_tokens_seen": 24488576, + "step": 36340 + }, + { + "epoch": 0.8879143966970415, + "grad_norm": 11.759366035461426, + "learning_rate": 1.9635597469049963e-06, + "loss": 0.1177, + "num_input_tokens_seen": 24492096, + "step": 36345 + }, + { + "epoch": 0.8880365475288887, + "grad_norm": 11.81397533416748, + "learning_rate": 1.9635369322230852e-06, + "loss": 0.2048, + "num_input_tokens_seen": 24495424, + "step": 36350 + }, + { + "epoch": 0.8881586983607358, + "grad_norm": 11.798083305358887, + "learning_rate": 1.9635141105340742e-06, + "loss": 0.1205, + "num_input_tokens_seen": 24498880, + "step": 36355 + }, + { + "epoch": 0.888280849192583, + "grad_norm": 14.193384170532227, + "learning_rate": 1.9634912818381293e-06, + "loss": 0.1286, + "num_input_tokens_seen": 24502336, + "step": 36360 + }, + { + "epoch": 0.8884030000244302, + "grad_norm": 1.707037091255188, + "learning_rate": 1.963468446135417e-06, + "loss": 0.2572, + "num_input_tokens_seen": 24505408, + "step": 36365 + }, + { + "epoch": 0.8885251508562774, + "grad_norm": 11.981134414672852, + "learning_rate": 1.9634456034261025e-06, + "loss": 0.1699, + "num_input_tokens_seen": 24509056, + "step": 36370 + }, + { + "epoch": 0.8886473016881244, + "grad_norm": 0.7846937775611877, + "learning_rate": 1.963422753710353e-06, + "loss": 0.0129, + "num_input_tokens_seen": 24512128, + "step": 36375 + }, + { + "epoch": 0.8887694525199716, + "grad_norm": 10.940505027770996, + "learning_rate": 1.9633998969883335e-06, + "loss": 0.1042, + "num_input_tokens_seen": 24515648, + "step": 36380 + }, + { + "epoch": 0.8888916033518188, + "grad_norm": 13.387989044189453, + "learning_rate": 1.963377033260211e-06, + "loss": 0.0286, + "num_input_tokens_seen": 24518848, + "step": 36385 + }, + { + "epoch": 0.889013754183666, + "grad_norm": 1.929477572441101, + "learning_rate": 1.963354162526152e-06, + "loss": 0.0204, + "num_input_tokens_seen": 24523072, + "step": 36390 + }, + { + "epoch": 0.8891359050155132, + "grad_norm": 1.1878111362457275, + "learning_rate": 1.963331284786322e-06, + "loss": 0.1326, + "num_input_tokens_seen": 24526976, + "step": 36395 + }, + { + "epoch": 0.8892580558473603, + "grad_norm": 11.675740242004395, + "learning_rate": 1.9633084000408883e-06, + "loss": 0.1465, + "num_input_tokens_seen": 24529984, + "step": 36400 + }, + { + "epoch": 0.8893802066792075, + "grad_norm": 12.716568946838379, + "learning_rate": 1.9632855082900163e-06, + "loss": 0.0548, + "num_input_tokens_seen": 24533120, + "step": 36405 + }, + { + "epoch": 0.8895023575110547, + "grad_norm": 0.6486952900886536, + "learning_rate": 1.9632626095338735e-06, + "loss": 0.0965, + "num_input_tokens_seen": 24536576, + "step": 36410 + }, + { + "epoch": 0.8896245083429019, + "grad_norm": 0.16108357906341553, + "learning_rate": 1.963239703772625e-06, + "loss": 0.0375, + "num_input_tokens_seen": 24539968, + "step": 36415 + }, + { + "epoch": 0.8897466591747489, + "grad_norm": 0.5907543301582336, + "learning_rate": 1.963216791006439e-06, + "loss": 0.1732, + "num_input_tokens_seen": 24543296, + "step": 36420 + }, + { + "epoch": 0.8898688100065961, + "grad_norm": 34.28215026855469, + "learning_rate": 1.9631938712354815e-06, + "loss": 0.1472, + "num_input_tokens_seen": 24546368, + "step": 36425 + }, + { + "epoch": 0.8899909608384433, + "grad_norm": 17.69078254699707, + "learning_rate": 1.9631709444599187e-06, + "loss": 0.2331, + "num_input_tokens_seen": 24549952, + "step": 36430 + }, + { + "epoch": 0.8901131116702905, + "grad_norm": 0.20918723940849304, + "learning_rate": 1.963148010679918e-06, + "loss": 0.1199, + "num_input_tokens_seen": 24553088, + "step": 36435 + }, + { + "epoch": 0.8902352625021377, + "grad_norm": 17.097530364990234, + "learning_rate": 1.963125069895646e-06, + "loss": 0.1703, + "num_input_tokens_seen": 24556288, + "step": 36440 + }, + { + "epoch": 0.8903574133339848, + "grad_norm": 1.6573656797409058, + "learning_rate": 1.9631021221072693e-06, + "loss": 0.0402, + "num_input_tokens_seen": 24559360, + "step": 36445 + }, + { + "epoch": 0.890479564165832, + "grad_norm": 8.112147331237793, + "learning_rate": 1.9630791673149546e-06, + "loss": 0.0887, + "num_input_tokens_seen": 24562624, + "step": 36450 + }, + { + "epoch": 0.8906017149976792, + "grad_norm": 2.700888156890869, + "learning_rate": 1.9630562055188697e-06, + "loss": 0.0766, + "num_input_tokens_seen": 24566336, + "step": 36455 + }, + { + "epoch": 0.8907238658295263, + "grad_norm": 25.177814483642578, + "learning_rate": 1.963033236719181e-06, + "loss": 0.1509, + "num_input_tokens_seen": 24569856, + "step": 36460 + }, + { + "epoch": 0.8908460166613734, + "grad_norm": 2.9992785453796387, + "learning_rate": 1.963010260916055e-06, + "loss": 0.0133, + "num_input_tokens_seen": 24573440, + "step": 36465 + }, + { + "epoch": 0.8909681674932206, + "grad_norm": 36.274112701416016, + "learning_rate": 1.9629872781096597e-06, + "loss": 0.214, + "num_input_tokens_seen": 24576768, + "step": 36470 + }, + { + "epoch": 0.8910903183250678, + "grad_norm": 1.0101815462112427, + "learning_rate": 1.9629642883001624e-06, + "loss": 0.2347, + "num_input_tokens_seen": 24579904, + "step": 36475 + }, + { + "epoch": 0.891212469156915, + "grad_norm": 6.083399295806885, + "learning_rate": 1.962941291487729e-06, + "loss": 0.0076, + "num_input_tokens_seen": 24583040, + "step": 36480 + }, + { + "epoch": 0.8913346199887622, + "grad_norm": 27.649301528930664, + "learning_rate": 1.962918287672528e-06, + "loss": 0.1516, + "num_input_tokens_seen": 24586432, + "step": 36485 + }, + { + "epoch": 0.8914567708206093, + "grad_norm": 0.9985415935516357, + "learning_rate": 1.962895276854726e-06, + "loss": 0.1095, + "num_input_tokens_seen": 24590080, + "step": 36490 + }, + { + "epoch": 0.8915789216524564, + "grad_norm": 34.66645812988281, + "learning_rate": 1.9628722590344905e-06, + "loss": 0.1351, + "num_input_tokens_seen": 24593920, + "step": 36495 + }, + { + "epoch": 0.8917010724843036, + "grad_norm": 24.54486656188965, + "learning_rate": 1.9628492342119892e-06, + "loss": 0.1019, + "num_input_tokens_seen": 24597568, + "step": 36500 + }, + { + "epoch": 0.8918232233161508, + "grad_norm": 10.25721263885498, + "learning_rate": 1.9628262023873893e-06, + "loss": 0.1333, + "num_input_tokens_seen": 24600896, + "step": 36505 + }, + { + "epoch": 0.8919453741479979, + "grad_norm": 8.492855072021484, + "learning_rate": 1.962803163560858e-06, + "loss": 0.0895, + "num_input_tokens_seen": 24604288, + "step": 36510 + }, + { + "epoch": 0.8920675249798451, + "grad_norm": 18.10352325439453, + "learning_rate": 1.9627801177325635e-06, + "loss": 0.14, + "num_input_tokens_seen": 24607552, + "step": 36515 + }, + { + "epoch": 0.8921896758116923, + "grad_norm": 23.538808822631836, + "learning_rate": 1.9627570649026726e-06, + "loss": 0.1, + "num_input_tokens_seen": 24610944, + "step": 36520 + }, + { + "epoch": 0.8923118266435395, + "grad_norm": 5.911246299743652, + "learning_rate": 1.9627340050713535e-06, + "loss": 0.0558, + "num_input_tokens_seen": 24614016, + "step": 36525 + }, + { + "epoch": 0.8924339774753866, + "grad_norm": 15.545068740844727, + "learning_rate": 1.9627109382387743e-06, + "loss": 0.1489, + "num_input_tokens_seen": 24617344, + "step": 36530 + }, + { + "epoch": 0.8925561283072337, + "grad_norm": 25.84769058227539, + "learning_rate": 1.9626878644051014e-06, + "loss": 0.1632, + "num_input_tokens_seen": 24621376, + "step": 36535 + }, + { + "epoch": 0.8926782791390809, + "grad_norm": 20.257658004760742, + "learning_rate": 1.962664783570504e-06, + "loss": 0.1333, + "num_input_tokens_seen": 24624896, + "step": 36540 + }, + { + "epoch": 0.8928004299709281, + "grad_norm": 15.873553276062012, + "learning_rate": 1.962641695735149e-06, + "loss": 0.0889, + "num_input_tokens_seen": 24628096, + "step": 36545 + }, + { + "epoch": 0.8929225808027753, + "grad_norm": 1.569334864616394, + "learning_rate": 1.962618600899205e-06, + "loss": 0.0207, + "num_input_tokens_seen": 24631296, + "step": 36550 + }, + { + "epoch": 0.8930447316346224, + "grad_norm": 11.255741119384766, + "learning_rate": 1.9625954990628396e-06, + "loss": 0.0303, + "num_input_tokens_seen": 24634560, + "step": 36555 + }, + { + "epoch": 0.8931668824664696, + "grad_norm": 19.29964256286621, + "learning_rate": 1.9625723902262205e-06, + "loss": 0.1629, + "num_input_tokens_seen": 24638016, + "step": 36560 + }, + { + "epoch": 0.8932890332983168, + "grad_norm": 28.87752342224121, + "learning_rate": 1.9625492743895166e-06, + "loss": 0.0578, + "num_input_tokens_seen": 24641536, + "step": 36565 + }, + { + "epoch": 0.893411184130164, + "grad_norm": 10.156953811645508, + "learning_rate": 1.962526151552895e-06, + "loss": 0.0397, + "num_input_tokens_seen": 24644608, + "step": 36570 + }, + { + "epoch": 0.893533334962011, + "grad_norm": 28.341548919677734, + "learning_rate": 1.9625030217165243e-06, + "loss": 0.1283, + "num_input_tokens_seen": 24648000, + "step": 36575 + }, + { + "epoch": 0.8936554857938582, + "grad_norm": 14.793594360351562, + "learning_rate": 1.962479884880573e-06, + "loss": 0.0591, + "num_input_tokens_seen": 24651712, + "step": 36580 + }, + { + "epoch": 0.8937776366257054, + "grad_norm": 41.583290100097656, + "learning_rate": 1.962456741045209e-06, + "loss": 0.1834, + "num_input_tokens_seen": 24654528, + "step": 36585 + }, + { + "epoch": 0.8938997874575526, + "grad_norm": 0.33410540223121643, + "learning_rate": 1.9624335902106005e-06, + "loss": 0.0691, + "num_input_tokens_seen": 24657920, + "step": 36590 + }, + { + "epoch": 0.8940219382893998, + "grad_norm": 11.096932411193848, + "learning_rate": 1.9624104323769167e-06, + "loss": 0.096, + "num_input_tokens_seen": 24661632, + "step": 36595 + }, + { + "epoch": 0.8941440891212469, + "grad_norm": 17.332895278930664, + "learning_rate": 1.962387267544325e-06, + "loss": 0.1534, + "num_input_tokens_seen": 24664960, + "step": 36600 + }, + { + "epoch": 0.8942662399530941, + "grad_norm": 62.588287353515625, + "learning_rate": 1.962364095712994e-06, + "loss": 0.1002, + "num_input_tokens_seen": 24668480, + "step": 36605 + }, + { + "epoch": 0.8943883907849413, + "grad_norm": 47.255455017089844, + "learning_rate": 1.962340916883093e-06, + "loss": 0.1503, + "num_input_tokens_seen": 24672064, + "step": 36610 + }, + { + "epoch": 0.8945105416167884, + "grad_norm": 15.527400970458984, + "learning_rate": 1.9623177310547898e-06, + "loss": 0.0608, + "num_input_tokens_seen": 24675392, + "step": 36615 + }, + { + "epoch": 0.8946326924486355, + "grad_norm": 1.301206111907959, + "learning_rate": 1.9622945382282526e-06, + "loss": 0.2064, + "num_input_tokens_seen": 24678528, + "step": 36620 + }, + { + "epoch": 0.8947548432804827, + "grad_norm": 1.0320180654525757, + "learning_rate": 1.9622713384036517e-06, + "loss": 0.1047, + "num_input_tokens_seen": 24681984, + "step": 36625 + }, + { + "epoch": 0.8948769941123299, + "grad_norm": 1.01441490650177, + "learning_rate": 1.962248131581154e-06, + "loss": 0.0891, + "num_input_tokens_seen": 24685376, + "step": 36630 + }, + { + "epoch": 0.8949991449441771, + "grad_norm": 0.6056206822395325, + "learning_rate": 1.962224917760929e-06, + "loss": 0.0213, + "num_input_tokens_seen": 24688576, + "step": 36635 + }, + { + "epoch": 0.8951212957760243, + "grad_norm": 33.90119552612305, + "learning_rate": 1.9622016969431465e-06, + "loss": 0.1229, + "num_input_tokens_seen": 24692160, + "step": 36640 + }, + { + "epoch": 0.8952434466078714, + "grad_norm": 22.38607406616211, + "learning_rate": 1.9621784691279737e-06, + "loss": 0.0524, + "num_input_tokens_seen": 24695424, + "step": 36645 + }, + { + "epoch": 0.8953655974397186, + "grad_norm": 30.155961990356445, + "learning_rate": 1.962155234315581e-06, + "loss": 0.0897, + "num_input_tokens_seen": 24698496, + "step": 36650 + }, + { + "epoch": 0.8954877482715657, + "grad_norm": 13.02365779876709, + "learning_rate": 1.962131992506136e-06, + "loss": 0.1008, + "num_input_tokens_seen": 24701440, + "step": 36655 + }, + { + "epoch": 0.8956098991034129, + "grad_norm": 0.4950132966041565, + "learning_rate": 1.9621087436998083e-06, + "loss": 0.1414, + "num_input_tokens_seen": 24704576, + "step": 36660 + }, + { + "epoch": 0.89573204993526, + "grad_norm": 51.34440994262695, + "learning_rate": 1.9620854878967675e-06, + "loss": 0.0464, + "num_input_tokens_seen": 24707392, + "step": 36665 + }, + { + "epoch": 0.8958542007671072, + "grad_norm": 26.741159439086914, + "learning_rate": 1.9620622250971817e-06, + "loss": 0.1001, + "num_input_tokens_seen": 24710976, + "step": 36670 + }, + { + "epoch": 0.8959763515989544, + "grad_norm": 55.84865951538086, + "learning_rate": 1.9620389553012214e-06, + "loss": 0.1594, + "num_input_tokens_seen": 24713984, + "step": 36675 + }, + { + "epoch": 0.8960985024308016, + "grad_norm": 12.856290817260742, + "learning_rate": 1.9620156785090544e-06, + "loss": 0.0624, + "num_input_tokens_seen": 24717504, + "step": 36680 + }, + { + "epoch": 0.8962206532626488, + "grad_norm": 29.340457916259766, + "learning_rate": 1.961992394720851e-06, + "loss": 0.1965, + "num_input_tokens_seen": 24721024, + "step": 36685 + }, + { + "epoch": 0.8963428040944958, + "grad_norm": 1.2017858028411865, + "learning_rate": 1.9619691039367798e-06, + "loss": 0.0845, + "num_input_tokens_seen": 24724928, + "step": 36690 + }, + { + "epoch": 0.896464954926343, + "grad_norm": 0.448015421628952, + "learning_rate": 1.961945806157011e-06, + "loss": 0.0655, + "num_input_tokens_seen": 24728448, + "step": 36695 + }, + { + "epoch": 0.8965871057581902, + "grad_norm": 0.06794452667236328, + "learning_rate": 1.9619225013817133e-06, + "loss": 0.1331, + "num_input_tokens_seen": 24732032, + "step": 36700 + }, + { + "epoch": 0.8967092565900374, + "grad_norm": 8.08639144897461, + "learning_rate": 1.9618991896110565e-06, + "loss": 0.0997, + "num_input_tokens_seen": 24735488, + "step": 36705 + }, + { + "epoch": 0.8968314074218845, + "grad_norm": 0.6464542746543884, + "learning_rate": 1.96187587084521e-06, + "loss": 0.0924, + "num_input_tokens_seen": 24739008, + "step": 36710 + }, + { + "epoch": 0.8969535582537317, + "grad_norm": 22.804977416992188, + "learning_rate": 1.9618525450843432e-06, + "loss": 0.0832, + "num_input_tokens_seen": 24742080, + "step": 36715 + }, + { + "epoch": 0.8970757090855789, + "grad_norm": 12.513947486877441, + "learning_rate": 1.9618292123286264e-06, + "loss": 0.2628, + "num_input_tokens_seen": 24745792, + "step": 36720 + }, + { + "epoch": 0.8971978599174261, + "grad_norm": 7.804723262786865, + "learning_rate": 1.961805872578229e-06, + "loss": 0.0718, + "num_input_tokens_seen": 24748928, + "step": 36725 + }, + { + "epoch": 0.8973200107492733, + "grad_norm": 19.41393280029297, + "learning_rate": 1.9617825258333204e-06, + "loss": 0.0482, + "num_input_tokens_seen": 24752256, + "step": 36730 + }, + { + "epoch": 0.8974421615811203, + "grad_norm": 62.86684036254883, + "learning_rate": 1.9617591720940703e-06, + "loss": 0.049, + "num_input_tokens_seen": 24755776, + "step": 36735 + }, + { + "epoch": 0.8975643124129675, + "grad_norm": 38.602210998535156, + "learning_rate": 1.961735811360649e-06, + "loss": 0.0901, + "num_input_tokens_seen": 24759296, + "step": 36740 + }, + { + "epoch": 0.8976864632448147, + "grad_norm": 56.08420944213867, + "learning_rate": 1.9617124436332263e-06, + "loss": 0.1186, + "num_input_tokens_seen": 24762688, + "step": 36745 + }, + { + "epoch": 0.8978086140766619, + "grad_norm": 6.372490406036377, + "learning_rate": 1.961689068911972e-06, + "loss": 0.1298, + "num_input_tokens_seen": 24765952, + "step": 36750 + }, + { + "epoch": 0.897930764908509, + "grad_norm": 23.063125610351562, + "learning_rate": 1.9616656871970562e-06, + "loss": 0.0264, + "num_input_tokens_seen": 24769472, + "step": 36755 + }, + { + "epoch": 0.8980529157403562, + "grad_norm": 50.10791015625, + "learning_rate": 1.9616422984886485e-06, + "loss": 0.1623, + "num_input_tokens_seen": 24773184, + "step": 36760 + }, + { + "epoch": 0.8981750665722034, + "grad_norm": 29.54539680480957, + "learning_rate": 1.96161890278692e-06, + "loss": 0.2083, + "num_input_tokens_seen": 24776320, + "step": 36765 + }, + { + "epoch": 0.8982972174040506, + "grad_norm": 19.126075744628906, + "learning_rate": 1.9615955000920396e-06, + "loss": 0.0645, + "num_input_tokens_seen": 24779776, + "step": 36770 + }, + { + "epoch": 0.8984193682358977, + "grad_norm": 14.978646278381348, + "learning_rate": 1.9615720904041785e-06, + "loss": 0.1415, + "num_input_tokens_seen": 24782976, + "step": 36775 + }, + { + "epoch": 0.8985415190677448, + "grad_norm": 2.023380756378174, + "learning_rate": 1.9615486737235064e-06, + "loss": 0.1298, + "num_input_tokens_seen": 24786176, + "step": 36780 + }, + { + "epoch": 0.898663669899592, + "grad_norm": 2.8862316608428955, + "learning_rate": 1.9615252500501936e-06, + "loss": 0.0636, + "num_input_tokens_seen": 24789824, + "step": 36785 + }, + { + "epoch": 0.8987858207314392, + "grad_norm": 21.79472541809082, + "learning_rate": 1.9615018193844108e-06, + "loss": 0.0881, + "num_input_tokens_seen": 24793280, + "step": 36790 + }, + { + "epoch": 0.8989079715632864, + "grad_norm": 24.159772872924805, + "learning_rate": 1.961478381726328e-06, + "loss": 0.1375, + "num_input_tokens_seen": 24796800, + "step": 36795 + }, + { + "epoch": 0.8990301223951335, + "grad_norm": 20.305831909179688, + "learning_rate": 1.9614549370761153e-06, + "loss": 0.1272, + "num_input_tokens_seen": 24800064, + "step": 36800 + }, + { + "epoch": 0.8991522732269807, + "grad_norm": 23.235565185546875, + "learning_rate": 1.9614314854339445e-06, + "loss": 0.1704, + "num_input_tokens_seen": 24803456, + "step": 36805 + }, + { + "epoch": 0.8992744240588278, + "grad_norm": 0.05056135356426239, + "learning_rate": 1.961408026799985e-06, + "loss": 0.0022, + "num_input_tokens_seen": 24806656, + "step": 36810 + }, + { + "epoch": 0.899396574890675, + "grad_norm": 13.105710983276367, + "learning_rate": 1.961384561174408e-06, + "loss": 0.1131, + "num_input_tokens_seen": 24809536, + "step": 36815 + }, + { + "epoch": 0.8995187257225221, + "grad_norm": 0.12696394324302673, + "learning_rate": 1.9613610885573835e-06, + "loss": 0.0057, + "num_input_tokens_seen": 24812800, + "step": 36820 + }, + { + "epoch": 0.8996408765543693, + "grad_norm": 70.6884536743164, + "learning_rate": 1.961337608949083e-06, + "loss": 0.1304, + "num_input_tokens_seen": 24816320, + "step": 36825 + }, + { + "epoch": 0.8997630273862165, + "grad_norm": 12.785313606262207, + "learning_rate": 1.9613141223496763e-06, + "loss": 0.1914, + "num_input_tokens_seen": 24819392, + "step": 36830 + }, + { + "epoch": 0.8998851782180637, + "grad_norm": 21.849977493286133, + "learning_rate": 1.961290628759335e-06, + "loss": 0.0568, + "num_input_tokens_seen": 24822592, + "step": 36835 + }, + { + "epoch": 0.9000073290499109, + "grad_norm": 2.245121479034424, + "learning_rate": 1.9612671281782297e-06, + "loss": 0.0487, + "num_input_tokens_seen": 24825664, + "step": 36840 + }, + { + "epoch": 0.900129479881758, + "grad_norm": 0.4319484829902649, + "learning_rate": 1.9612436206065313e-06, + "loss": 0.1083, + "num_input_tokens_seen": 24828864, + "step": 36845 + }, + { + "epoch": 0.9002516307136051, + "grad_norm": 24.330896377563477, + "learning_rate": 1.961220106044411e-06, + "loss": 0.1028, + "num_input_tokens_seen": 24832256, + "step": 36850 + }, + { + "epoch": 0.9003737815454523, + "grad_norm": 1.2126699686050415, + "learning_rate": 1.961196584492039e-06, + "loss": 0.076, + "num_input_tokens_seen": 24835520, + "step": 36855 + }, + { + "epoch": 0.9004959323772995, + "grad_norm": 0.764924168586731, + "learning_rate": 1.9611730559495876e-06, + "loss": 0.071, + "num_input_tokens_seen": 24838784, + "step": 36860 + }, + { + "epoch": 0.9006180832091466, + "grad_norm": 26.342575073242188, + "learning_rate": 1.9611495204172266e-06, + "loss": 0.2169, + "num_input_tokens_seen": 24842368, + "step": 36865 + }, + { + "epoch": 0.9007402340409938, + "grad_norm": 0.21320796012878418, + "learning_rate": 1.961125977895128e-06, + "loss": 0.0943, + "num_input_tokens_seen": 24845952, + "step": 36870 + }, + { + "epoch": 0.900862384872841, + "grad_norm": 15.532743453979492, + "learning_rate": 1.961102428383463e-06, + "loss": 0.147, + "num_input_tokens_seen": 24849216, + "step": 36875 + }, + { + "epoch": 0.9009845357046882, + "grad_norm": 11.373522758483887, + "learning_rate": 1.9610788718824024e-06, + "loss": 0.0963, + "num_input_tokens_seen": 24853056, + "step": 36880 + }, + { + "epoch": 0.9011066865365354, + "grad_norm": 9.247547149658203, + "learning_rate": 1.9610553083921176e-06, + "loss": 0.1742, + "num_input_tokens_seen": 24856448, + "step": 36885 + }, + { + "epoch": 0.9012288373683824, + "grad_norm": 3.3656091690063477, + "learning_rate": 1.9610317379127803e-06, + "loss": 0.0552, + "num_input_tokens_seen": 24859520, + "step": 36890 + }, + { + "epoch": 0.9013509882002296, + "grad_norm": 0.7792104482650757, + "learning_rate": 1.9610081604445618e-06, + "loss": 0.1553, + "num_input_tokens_seen": 24862656, + "step": 36895 + }, + { + "epoch": 0.9014731390320768, + "grad_norm": 0.8532900214195251, + "learning_rate": 1.9609845759876332e-06, + "loss": 0.04, + "num_input_tokens_seen": 24866112, + "step": 36900 + }, + { + "epoch": 0.901595289863924, + "grad_norm": 1.0743188858032227, + "learning_rate": 1.9609609845421666e-06, + "loss": 0.1122, + "num_input_tokens_seen": 24869312, + "step": 36905 + }, + { + "epoch": 0.9017174406957711, + "grad_norm": 0.17355743050575256, + "learning_rate": 1.960937386108333e-06, + "loss": 0.1537, + "num_input_tokens_seen": 24872576, + "step": 36910 + }, + { + "epoch": 0.9018395915276183, + "grad_norm": 0.4766935706138611, + "learning_rate": 1.9609137806863044e-06, + "loss": 0.0725, + "num_input_tokens_seen": 24876032, + "step": 36915 + }, + { + "epoch": 0.9019617423594655, + "grad_norm": 0.37082409858703613, + "learning_rate": 1.9608901682762522e-06, + "loss": 0.2087, + "num_input_tokens_seen": 24879168, + "step": 36920 + }, + { + "epoch": 0.9020838931913127, + "grad_norm": 44.21205520629883, + "learning_rate": 1.9608665488783485e-06, + "loss": 0.1464, + "num_input_tokens_seen": 24883008, + "step": 36925 + }, + { + "epoch": 0.9022060440231598, + "grad_norm": 17.51647186279297, + "learning_rate": 1.960842922492765e-06, + "loss": 0.1137, + "num_input_tokens_seen": 24886400, + "step": 36930 + }, + { + "epoch": 0.9023281948550069, + "grad_norm": 4.397872447967529, + "learning_rate": 1.9608192891196725e-06, + "loss": 0.0908, + "num_input_tokens_seen": 24890240, + "step": 36935 + }, + { + "epoch": 0.9024503456868541, + "grad_norm": 29.296829223632812, + "learning_rate": 1.9607956487592446e-06, + "loss": 0.113, + "num_input_tokens_seen": 24893696, + "step": 36940 + }, + { + "epoch": 0.9025724965187013, + "grad_norm": 0.3690081536769867, + "learning_rate": 1.960772001411652e-06, + "loss": 0.0707, + "num_input_tokens_seen": 24897088, + "step": 36945 + }, + { + "epoch": 0.9026946473505485, + "grad_norm": 28.767560958862305, + "learning_rate": 1.9607483470770667e-06, + "loss": 0.1639, + "num_input_tokens_seen": 24900288, + "step": 36950 + }, + { + "epoch": 0.9028167981823956, + "grad_norm": 62.34882736206055, + "learning_rate": 1.960724685755661e-06, + "loss": 0.1239, + "num_input_tokens_seen": 24904640, + "step": 36955 + }, + { + "epoch": 0.9029389490142428, + "grad_norm": 18.672874450683594, + "learning_rate": 1.9607010174476073e-06, + "loss": 0.2087, + "num_input_tokens_seen": 24907968, + "step": 36960 + }, + { + "epoch": 0.90306109984609, + "grad_norm": 22.128129959106445, + "learning_rate": 1.9606773421530774e-06, + "loss": 0.0568, + "num_input_tokens_seen": 24911744, + "step": 36965 + }, + { + "epoch": 0.9031832506779371, + "grad_norm": 29.346067428588867, + "learning_rate": 1.960653659872243e-06, + "loss": 0.156, + "num_input_tokens_seen": 24915328, + "step": 36970 + }, + { + "epoch": 0.9033054015097843, + "grad_norm": 13.030354499816895, + "learning_rate": 1.9606299706052775e-06, + "loss": 0.2079, + "num_input_tokens_seen": 24918464, + "step": 36975 + }, + { + "epoch": 0.9034275523416314, + "grad_norm": 0.45906662940979004, + "learning_rate": 1.960606274352352e-06, + "loss": 0.0784, + "num_input_tokens_seen": 24921664, + "step": 36980 + }, + { + "epoch": 0.9035497031734786, + "grad_norm": 3.491370916366577, + "learning_rate": 1.9605825711136395e-06, + "loss": 0.0685, + "num_input_tokens_seen": 24925248, + "step": 36985 + }, + { + "epoch": 0.9036718540053258, + "grad_norm": 9.43264389038086, + "learning_rate": 1.9605588608893124e-06, + "loss": 0.1453, + "num_input_tokens_seen": 24928448, + "step": 36990 + }, + { + "epoch": 0.903794004837173, + "grad_norm": 11.920999526977539, + "learning_rate": 1.9605351436795426e-06, + "loss": 0.1158, + "num_input_tokens_seen": 24932160, + "step": 36995 + }, + { + "epoch": 0.9039161556690201, + "grad_norm": 0.8874409198760986, + "learning_rate": 1.960511419484503e-06, + "loss": 0.0426, + "num_input_tokens_seen": 24935424, + "step": 37000 + }, + { + "epoch": 0.9040383065008673, + "grad_norm": 22.666275024414062, + "learning_rate": 1.9604876883043655e-06, + "loss": 0.0423, + "num_input_tokens_seen": 24938752, + "step": 37005 + }, + { + "epoch": 0.9041604573327144, + "grad_norm": 18.00115203857422, + "learning_rate": 1.960463950139304e-06, + "loss": 0.1684, + "num_input_tokens_seen": 24942272, + "step": 37010 + }, + { + "epoch": 0.9042826081645616, + "grad_norm": 0.41937220096588135, + "learning_rate": 1.96044020498949e-06, + "loss": 0.1104, + "num_input_tokens_seen": 24945728, + "step": 37015 + }, + { + "epoch": 0.9044047589964088, + "grad_norm": 0.23672303557395935, + "learning_rate": 1.9604164528550966e-06, + "loss": 0.0333, + "num_input_tokens_seen": 24949248, + "step": 37020 + }, + { + "epoch": 0.9045269098282559, + "grad_norm": 20.977340698242188, + "learning_rate": 1.960392693736296e-06, + "loss": 0.1655, + "num_input_tokens_seen": 24952896, + "step": 37025 + }, + { + "epoch": 0.9046490606601031, + "grad_norm": 41.60050964355469, + "learning_rate": 1.9603689276332617e-06, + "loss": 0.1639, + "num_input_tokens_seen": 24956096, + "step": 37030 + }, + { + "epoch": 0.9047712114919503, + "grad_norm": 37.09358215332031, + "learning_rate": 1.960345154546166e-06, + "loss": 0.0335, + "num_input_tokens_seen": 24959104, + "step": 37035 + }, + { + "epoch": 0.9048933623237975, + "grad_norm": 9.280909538269043, + "learning_rate": 1.9603213744751824e-06, + "loss": 0.1253, + "num_input_tokens_seen": 24962816, + "step": 37040 + }, + { + "epoch": 0.9050155131556445, + "grad_norm": 0.6137846112251282, + "learning_rate": 1.9602975874204836e-06, + "loss": 0.2109, + "num_input_tokens_seen": 24966528, + "step": 37045 + }, + { + "epoch": 0.9051376639874917, + "grad_norm": 14.2232666015625, + "learning_rate": 1.960273793382242e-06, + "loss": 0.1559, + "num_input_tokens_seen": 24970624, + "step": 37050 + }, + { + "epoch": 0.9052598148193389, + "grad_norm": 8.7384672164917, + "learning_rate": 1.9602499923606314e-06, + "loss": 0.1607, + "num_input_tokens_seen": 24974272, + "step": 37055 + }, + { + "epoch": 0.9053819656511861, + "grad_norm": 4.077075004577637, + "learning_rate": 1.960226184355824e-06, + "loss": 0.1161, + "num_input_tokens_seen": 24978048, + "step": 37060 + }, + { + "epoch": 0.9055041164830332, + "grad_norm": 8.053204536437988, + "learning_rate": 1.9602023693679942e-06, + "loss": 0.1868, + "num_input_tokens_seen": 24981184, + "step": 37065 + }, + { + "epoch": 0.9056262673148804, + "grad_norm": 11.40694522857666, + "learning_rate": 1.9601785473973145e-06, + "loss": 0.0782, + "num_input_tokens_seen": 24984832, + "step": 37070 + }, + { + "epoch": 0.9057484181467276, + "grad_norm": 18.37397575378418, + "learning_rate": 1.9601547184439577e-06, + "loss": 0.1352, + "num_input_tokens_seen": 24988288, + "step": 37075 + }, + { + "epoch": 0.9058705689785748, + "grad_norm": 13.701996803283691, + "learning_rate": 1.960130882508098e-06, + "loss": 0.1083, + "num_input_tokens_seen": 24992192, + "step": 37080 + }, + { + "epoch": 0.905992719810422, + "grad_norm": 0.5049362778663635, + "learning_rate": 1.960107039589908e-06, + "loss": 0.1329, + "num_input_tokens_seen": 24995584, + "step": 37085 + }, + { + "epoch": 0.906114870642269, + "grad_norm": 15.812434196472168, + "learning_rate": 1.9600831896895615e-06, + "loss": 0.1082, + "num_input_tokens_seen": 24998976, + "step": 37090 + }, + { + "epoch": 0.9062370214741162, + "grad_norm": 11.897115707397461, + "learning_rate": 1.9600593328072317e-06, + "loss": 0.068, + "num_input_tokens_seen": 25002496, + "step": 37095 + }, + { + "epoch": 0.9063591723059634, + "grad_norm": 12.684399604797363, + "learning_rate": 1.960035468943092e-06, + "loss": 0.1702, + "num_input_tokens_seen": 25005952, + "step": 37100 + }, + { + "epoch": 0.9064813231378106, + "grad_norm": 34.3539924621582, + "learning_rate": 1.9600115980973167e-06, + "loss": 0.1441, + "num_input_tokens_seen": 25009408, + "step": 37105 + }, + { + "epoch": 0.9066034739696577, + "grad_norm": 1.0309791564941406, + "learning_rate": 1.9599877202700784e-06, + "loss": 0.0769, + "num_input_tokens_seen": 25012416, + "step": 37110 + }, + { + "epoch": 0.9067256248015049, + "grad_norm": 0.7294164896011353, + "learning_rate": 1.9599638354615517e-06, + "loss": 0.0935, + "num_input_tokens_seen": 25015744, + "step": 37115 + }, + { + "epoch": 0.9068477756333521, + "grad_norm": 9.473251342773438, + "learning_rate": 1.959939943671909e-06, + "loss": 0.131, + "num_input_tokens_seen": 25019904, + "step": 37120 + }, + { + "epoch": 0.9069699264651992, + "grad_norm": 15.669844627380371, + "learning_rate": 1.9599160449013255e-06, + "loss": 0.2101, + "num_input_tokens_seen": 25023104, + "step": 37125 + }, + { + "epoch": 0.9070920772970464, + "grad_norm": 18.923765182495117, + "learning_rate": 1.959892139149974e-06, + "loss": 0.0628, + "num_input_tokens_seen": 25026112, + "step": 37130 + }, + { + "epoch": 0.9072142281288935, + "grad_norm": 18.780458450317383, + "learning_rate": 1.9598682264180288e-06, + "loss": 0.0905, + "num_input_tokens_seen": 25029632, + "step": 37135 + }, + { + "epoch": 0.9073363789607407, + "grad_norm": 53.54373550415039, + "learning_rate": 1.959844306705664e-06, + "loss": 0.156, + "num_input_tokens_seen": 25032896, + "step": 37140 + }, + { + "epoch": 0.9074585297925879, + "grad_norm": 32.853431701660156, + "learning_rate": 1.9598203800130524e-06, + "loss": 0.052, + "num_input_tokens_seen": 25036352, + "step": 37145 + }, + { + "epoch": 0.9075806806244351, + "grad_norm": 31.716951370239258, + "learning_rate": 1.9597964463403695e-06, + "loss": 0.0664, + "num_input_tokens_seen": 25039488, + "step": 37150 + }, + { + "epoch": 0.9077028314562822, + "grad_norm": 31.609790802001953, + "learning_rate": 1.9597725056877886e-06, + "loss": 0.1988, + "num_input_tokens_seen": 25042816, + "step": 37155 + }, + { + "epoch": 0.9078249822881294, + "grad_norm": 0.3431050479412079, + "learning_rate": 1.959748558055484e-06, + "loss": 0.0328, + "num_input_tokens_seen": 25045888, + "step": 37160 + }, + { + "epoch": 0.9079471331199765, + "grad_norm": 26.98533821105957, + "learning_rate": 1.9597246034436293e-06, + "loss": 0.1926, + "num_input_tokens_seen": 25049536, + "step": 37165 + }, + { + "epoch": 0.9080692839518237, + "grad_norm": 14.834943771362305, + "learning_rate": 1.9597006418523995e-06, + "loss": 0.1487, + "num_input_tokens_seen": 25053248, + "step": 37170 + }, + { + "epoch": 0.9081914347836709, + "grad_norm": 16.529705047607422, + "learning_rate": 1.9596766732819684e-06, + "loss": 0.2229, + "num_input_tokens_seen": 25056576, + "step": 37175 + }, + { + "epoch": 0.908313585615518, + "grad_norm": 0.9060618877410889, + "learning_rate": 1.9596526977325106e-06, + "loss": 0.0712, + "num_input_tokens_seen": 25059968, + "step": 37180 + }, + { + "epoch": 0.9084357364473652, + "grad_norm": 14.722132682800293, + "learning_rate": 1.9596287152042e-06, + "loss": 0.1904, + "num_input_tokens_seen": 25063296, + "step": 37185 + }, + { + "epoch": 0.9085578872792124, + "grad_norm": 17.283632278442383, + "learning_rate": 1.9596047256972114e-06, + "loss": 0.0991, + "num_input_tokens_seen": 25066240, + "step": 37190 + }, + { + "epoch": 0.9086800381110596, + "grad_norm": 7.278754711151123, + "learning_rate": 1.959580729211719e-06, + "loss": 0.0686, + "num_input_tokens_seen": 25069824, + "step": 37195 + }, + { + "epoch": 0.9088021889429067, + "grad_norm": 2.6354358196258545, + "learning_rate": 1.9595567257478974e-06, + "loss": 0.022, + "num_input_tokens_seen": 25073664, + "step": 37200 + }, + { + "epoch": 0.9089243397747538, + "grad_norm": 21.20435905456543, + "learning_rate": 1.9595327153059214e-06, + "loss": 0.1277, + "num_input_tokens_seen": 25076672, + "step": 37205 + }, + { + "epoch": 0.909046490606601, + "grad_norm": 2.3268589973449707, + "learning_rate": 1.9595086978859653e-06, + "loss": 0.1181, + "num_input_tokens_seen": 25080064, + "step": 37210 + }, + { + "epoch": 0.9091686414384482, + "grad_norm": 8.44967269897461, + "learning_rate": 1.959484673488204e-06, + "loss": 0.1047, + "num_input_tokens_seen": 25083392, + "step": 37215 + }, + { + "epoch": 0.9092907922702954, + "grad_norm": 3.236764907836914, + "learning_rate": 1.9594606421128123e-06, + "loss": 0.0515, + "num_input_tokens_seen": 25086912, + "step": 37220 + }, + { + "epoch": 0.9094129431021425, + "grad_norm": 30.90077018737793, + "learning_rate": 1.9594366037599645e-06, + "loss": 0.1087, + "num_input_tokens_seen": 25090560, + "step": 37225 + }, + { + "epoch": 0.9095350939339897, + "grad_norm": 19.804662704467773, + "learning_rate": 1.959412558429835e-06, + "loss": 0.1335, + "num_input_tokens_seen": 25093760, + "step": 37230 + }, + { + "epoch": 0.9096572447658369, + "grad_norm": 22.87166976928711, + "learning_rate": 1.9593885061226002e-06, + "loss": 0.041, + "num_input_tokens_seen": 25097216, + "step": 37235 + }, + { + "epoch": 0.9097793955976841, + "grad_norm": 0.5402935147285461, + "learning_rate": 1.959364446838434e-06, + "loss": 0.1368, + "num_input_tokens_seen": 25100160, + "step": 37240 + }, + { + "epoch": 0.9099015464295311, + "grad_norm": 0.44300487637519836, + "learning_rate": 1.9593403805775113e-06, + "loss": 0.0885, + "num_input_tokens_seen": 25104320, + "step": 37245 + }, + { + "epoch": 0.9100236972613783, + "grad_norm": 26.597187042236328, + "learning_rate": 1.9593163073400075e-06, + "loss": 0.1744, + "num_input_tokens_seen": 25107584, + "step": 37250 + }, + { + "epoch": 0.9101458480932255, + "grad_norm": 35.40641403198242, + "learning_rate": 1.9592922271260973e-06, + "loss": 0.098, + "num_input_tokens_seen": 25111232, + "step": 37255 + }, + { + "epoch": 0.9102679989250727, + "grad_norm": 0.5295836329460144, + "learning_rate": 1.959268139935956e-06, + "loss": 0.1152, + "num_input_tokens_seen": 25114560, + "step": 37260 + }, + { + "epoch": 0.9103901497569199, + "grad_norm": 14.122322082519531, + "learning_rate": 1.959244045769759e-06, + "loss": 0.0152, + "num_input_tokens_seen": 25118208, + "step": 37265 + }, + { + "epoch": 0.910512300588767, + "grad_norm": 14.80065631866455, + "learning_rate": 1.9592199446276812e-06, + "loss": 0.0609, + "num_input_tokens_seen": 25121280, + "step": 37270 + }, + { + "epoch": 0.9106344514206142, + "grad_norm": 31.571918487548828, + "learning_rate": 1.959195836509898e-06, + "loss": 0.0292, + "num_input_tokens_seen": 25124928, + "step": 37275 + }, + { + "epoch": 0.9107566022524614, + "grad_norm": 23.620386123657227, + "learning_rate": 1.9591717214165844e-06, + "loss": 0.2271, + "num_input_tokens_seen": 25128320, + "step": 37280 + }, + { + "epoch": 0.9108787530843085, + "grad_norm": 34.001434326171875, + "learning_rate": 1.9591475993479162e-06, + "loss": 0.1586, + "num_input_tokens_seen": 25131776, + "step": 37285 + }, + { + "epoch": 0.9110009039161556, + "grad_norm": 11.455076217651367, + "learning_rate": 1.959123470304069e-06, + "loss": 0.1356, + "num_input_tokens_seen": 25135296, + "step": 37290 + }, + { + "epoch": 0.9111230547480028, + "grad_norm": 0.1452919840812683, + "learning_rate": 1.9590993342852175e-06, + "loss": 0.0101, + "num_input_tokens_seen": 25138624, + "step": 37295 + }, + { + "epoch": 0.91124520557985, + "grad_norm": 4.947422504425049, + "learning_rate": 1.959075191291538e-06, + "loss": 0.1217, + "num_input_tokens_seen": 25142016, + "step": 37300 + }, + { + "epoch": 0.9113673564116972, + "grad_norm": 1.445472240447998, + "learning_rate": 1.959051041323206e-06, + "loss": 0.0603, + "num_input_tokens_seen": 25145792, + "step": 37305 + }, + { + "epoch": 0.9114895072435444, + "grad_norm": 1.9578611850738525, + "learning_rate": 1.959026884380396e-06, + "loss": 0.1354, + "num_input_tokens_seen": 25149568, + "step": 37310 + }, + { + "epoch": 0.9116116580753915, + "grad_norm": 31.493968963623047, + "learning_rate": 1.959002720463285e-06, + "loss": 0.0892, + "num_input_tokens_seen": 25152768, + "step": 37315 + }, + { + "epoch": 0.9117338089072387, + "grad_norm": 0.8048754930496216, + "learning_rate": 1.958978549572048e-06, + "loss": 0.1312, + "num_input_tokens_seen": 25155776, + "step": 37320 + }, + { + "epoch": 0.9118559597390858, + "grad_norm": 16.5799617767334, + "learning_rate": 1.958954371706862e-06, + "loss": 0.106, + "num_input_tokens_seen": 25159360, + "step": 37325 + }, + { + "epoch": 0.911978110570933, + "grad_norm": 3.2302496433258057, + "learning_rate": 1.9589301868679013e-06, + "loss": 0.0733, + "num_input_tokens_seen": 25162752, + "step": 37330 + }, + { + "epoch": 0.9121002614027801, + "grad_norm": 0.7612953186035156, + "learning_rate": 1.958905995055342e-06, + "loss": 0.1728, + "num_input_tokens_seen": 25165824, + "step": 37335 + }, + { + "epoch": 0.9122224122346273, + "grad_norm": 18.163131713867188, + "learning_rate": 1.9588817962693607e-06, + "loss": 0.2231, + "num_input_tokens_seen": 25169152, + "step": 37340 + }, + { + "epoch": 0.9123445630664745, + "grad_norm": 6.804000377655029, + "learning_rate": 1.9588575905101333e-06, + "loss": 0.0484, + "num_input_tokens_seen": 25173120, + "step": 37345 + }, + { + "epoch": 0.9124667138983217, + "grad_norm": 16.38357925415039, + "learning_rate": 1.958833377777835e-06, + "loss": 0.1025, + "num_input_tokens_seen": 25176320, + "step": 37350 + }, + { + "epoch": 0.9125888647301688, + "grad_norm": 0.268574595451355, + "learning_rate": 1.958809158072643e-06, + "loss": 0.094, + "num_input_tokens_seen": 25179968, + "step": 37355 + }, + { + "epoch": 0.912711015562016, + "grad_norm": 18.270381927490234, + "learning_rate": 1.958784931394733e-06, + "loss": 0.0401, + "num_input_tokens_seen": 25183296, + "step": 37360 + }, + { + "epoch": 0.9128331663938631, + "grad_norm": 40.83668518066406, + "learning_rate": 1.958760697744281e-06, + "loss": 0.2674, + "num_input_tokens_seen": 25187008, + "step": 37365 + }, + { + "epoch": 0.9129553172257103, + "grad_norm": 59.549259185791016, + "learning_rate": 1.958736457121463e-06, + "loss": 0.0754, + "num_input_tokens_seen": 25190656, + "step": 37370 + }, + { + "epoch": 0.9130774680575575, + "grad_norm": 0.25304514169692993, + "learning_rate": 1.958712209526456e-06, + "loss": 0.1197, + "num_input_tokens_seen": 25194112, + "step": 37375 + }, + { + "epoch": 0.9131996188894046, + "grad_norm": 12.690157890319824, + "learning_rate": 1.9586879549594356e-06, + "loss": 0.0632, + "num_input_tokens_seen": 25197120, + "step": 37380 + }, + { + "epoch": 0.9133217697212518, + "grad_norm": 37.6353645324707, + "learning_rate": 1.958663693420579e-06, + "loss": 0.1644, + "num_input_tokens_seen": 25200448, + "step": 37385 + }, + { + "epoch": 0.913443920553099, + "grad_norm": 27.67104721069336, + "learning_rate": 1.9586394249100616e-06, + "loss": 0.0693, + "num_input_tokens_seen": 25203648, + "step": 37390 + }, + { + "epoch": 0.9135660713849462, + "grad_norm": 15.691805839538574, + "learning_rate": 1.9586151494280604e-06, + "loss": 0.0771, + "num_input_tokens_seen": 25207104, + "step": 37395 + }, + { + "epoch": 0.9136882222167932, + "grad_norm": 31.615686416625977, + "learning_rate": 1.9585908669747523e-06, + "loss": 0.2961, + "num_input_tokens_seen": 25210560, + "step": 37400 + }, + { + "epoch": 0.9138103730486404, + "grad_norm": 5.969202995300293, + "learning_rate": 1.958566577550314e-06, + "loss": 0.0501, + "num_input_tokens_seen": 25213888, + "step": 37405 + }, + { + "epoch": 0.9139325238804876, + "grad_norm": 3.6108410358428955, + "learning_rate": 1.958542281154921e-06, + "loss": 0.1711, + "num_input_tokens_seen": 25217344, + "step": 37410 + }, + { + "epoch": 0.9140546747123348, + "grad_norm": 0.9648376107215881, + "learning_rate": 1.9585179777887514e-06, + "loss": 0.1616, + "num_input_tokens_seen": 25220416, + "step": 37415 + }, + { + "epoch": 0.914176825544182, + "grad_norm": 0.8474432229995728, + "learning_rate": 1.9584936674519806e-06, + "loss": 0.0618, + "num_input_tokens_seen": 25224064, + "step": 37420 + }, + { + "epoch": 0.9142989763760291, + "grad_norm": 11.26650333404541, + "learning_rate": 1.9584693501447863e-06, + "loss": 0.0479, + "num_input_tokens_seen": 25227072, + "step": 37425 + }, + { + "epoch": 0.9144211272078763, + "grad_norm": 6.293632507324219, + "learning_rate": 1.958445025867345e-06, + "loss": 0.0213, + "num_input_tokens_seen": 25230528, + "step": 37430 + }, + { + "epoch": 0.9145432780397235, + "grad_norm": 11.405076026916504, + "learning_rate": 1.9584206946198342e-06, + "loss": 0.1662, + "num_input_tokens_seen": 25233984, + "step": 37435 + }, + { + "epoch": 0.9146654288715707, + "grad_norm": 0.46462714672088623, + "learning_rate": 1.9583963564024297e-06, + "loss": 0.0253, + "num_input_tokens_seen": 25237184, + "step": 37440 + }, + { + "epoch": 0.9147875797034177, + "grad_norm": 21.904184341430664, + "learning_rate": 1.9583720112153094e-06, + "loss": 0.1111, + "num_input_tokens_seen": 25240576, + "step": 37445 + }, + { + "epoch": 0.9149097305352649, + "grad_norm": 9.3959321975708, + "learning_rate": 1.95834765905865e-06, + "loss": 0.1968, + "num_input_tokens_seen": 25243712, + "step": 37450 + }, + { + "epoch": 0.9150318813671121, + "grad_norm": 24.887296676635742, + "learning_rate": 1.958323299932629e-06, + "loss": 0.1636, + "num_input_tokens_seen": 25247232, + "step": 37455 + }, + { + "epoch": 0.9151540321989593, + "grad_norm": 0.8720492720603943, + "learning_rate": 1.9582989338374227e-06, + "loss": 0.0255, + "num_input_tokens_seen": 25250880, + "step": 37460 + }, + { + "epoch": 0.9152761830308065, + "grad_norm": 34.30413055419922, + "learning_rate": 1.958274560773209e-06, + "loss": 0.081, + "num_input_tokens_seen": 25254592, + "step": 37465 + }, + { + "epoch": 0.9153983338626536, + "grad_norm": 13.46987533569336, + "learning_rate": 1.958250180740165e-06, + "loss": 0.0502, + "num_input_tokens_seen": 25258048, + "step": 37470 + }, + { + "epoch": 0.9155204846945008, + "grad_norm": 9.968213081359863, + "learning_rate": 1.958225793738468e-06, + "loss": 0.1089, + "num_input_tokens_seen": 25261248, + "step": 37475 + }, + { + "epoch": 0.915642635526348, + "grad_norm": 14.56130313873291, + "learning_rate": 1.958201399768295e-06, + "loss": 0.1609, + "num_input_tokens_seen": 25265024, + "step": 37480 + }, + { + "epoch": 0.9157647863581951, + "grad_norm": 0.8725386261940002, + "learning_rate": 1.958176998829824e-06, + "loss": 0.2316, + "num_input_tokens_seen": 25268032, + "step": 37485 + }, + { + "epoch": 0.9158869371900422, + "grad_norm": 2.6830427646636963, + "learning_rate": 1.958152590923232e-06, + "loss": 0.0842, + "num_input_tokens_seen": 25271360, + "step": 37490 + }, + { + "epoch": 0.9160090880218894, + "grad_norm": 20.3107967376709, + "learning_rate": 1.958128176048697e-06, + "loss": 0.0642, + "num_input_tokens_seen": 25274816, + "step": 37495 + }, + { + "epoch": 0.9161312388537366, + "grad_norm": 0.6933755874633789, + "learning_rate": 1.9581037542063955e-06, + "loss": 0.0984, + "num_input_tokens_seen": 25278208, + "step": 37500 + }, + { + "epoch": 0.9162533896855838, + "grad_norm": 8.6947021484375, + "learning_rate": 1.958079325396506e-06, + "loss": 0.1362, + "num_input_tokens_seen": 25281600, + "step": 37505 + }, + { + "epoch": 0.916375540517431, + "grad_norm": 2.2591030597686768, + "learning_rate": 1.9580548896192066e-06, + "loss": 0.0776, + "num_input_tokens_seen": 25284864, + "step": 37510 + }, + { + "epoch": 0.916497691349278, + "grad_norm": 0.5930300354957581, + "learning_rate": 1.9580304468746736e-06, + "loss": 0.1038, + "num_input_tokens_seen": 25288192, + "step": 37515 + }, + { + "epoch": 0.9166198421811252, + "grad_norm": 41.463714599609375, + "learning_rate": 1.958005997163086e-06, + "loss": 0.1667, + "num_input_tokens_seen": 25291584, + "step": 37520 + }, + { + "epoch": 0.9167419930129724, + "grad_norm": 15.766650199890137, + "learning_rate": 1.9579815404846207e-06, + "loss": 0.0361, + "num_input_tokens_seen": 25295040, + "step": 37525 + }, + { + "epoch": 0.9168641438448196, + "grad_norm": 21.16053581237793, + "learning_rate": 1.957957076839456e-06, + "loss": 0.1552, + "num_input_tokens_seen": 25298432, + "step": 37530 + }, + { + "epoch": 0.9169862946766667, + "grad_norm": 0.31387093663215637, + "learning_rate": 1.95793260622777e-06, + "loss": 0.0896, + "num_input_tokens_seen": 25301632, + "step": 37535 + }, + { + "epoch": 0.9171084455085139, + "grad_norm": 0.2981453537940979, + "learning_rate": 1.95790812864974e-06, + "loss": 0.134, + "num_input_tokens_seen": 25305536, + "step": 37540 + }, + { + "epoch": 0.9172305963403611, + "grad_norm": 15.161022186279297, + "learning_rate": 1.9578836441055453e-06, + "loss": 0.1619, + "num_input_tokens_seen": 25309504, + "step": 37545 + }, + { + "epoch": 0.9173527471722083, + "grad_norm": 34.08779525756836, + "learning_rate": 1.9578591525953625e-06, + "loss": 0.0792, + "num_input_tokens_seen": 25313152, + "step": 37550 + }, + { + "epoch": 0.9174748980040555, + "grad_norm": 18.717666625976562, + "learning_rate": 1.9578346541193705e-06, + "loss": 0.1003, + "num_input_tokens_seen": 25316352, + "step": 37555 + }, + { + "epoch": 0.9175970488359025, + "grad_norm": 23.372556686401367, + "learning_rate": 1.957810148677747e-06, + "loss": 0.2018, + "num_input_tokens_seen": 25319680, + "step": 37560 + }, + { + "epoch": 0.9177191996677497, + "grad_norm": 1.9030033349990845, + "learning_rate": 1.957785636270671e-06, + "loss": 0.0991, + "num_input_tokens_seen": 25323584, + "step": 37565 + }, + { + "epoch": 0.9178413504995969, + "grad_norm": 19.270458221435547, + "learning_rate": 1.95776111689832e-06, + "loss": 0.1829, + "num_input_tokens_seen": 25326912, + "step": 37570 + }, + { + "epoch": 0.9179635013314441, + "grad_norm": 26.126998901367188, + "learning_rate": 1.957736590560872e-06, + "loss": 0.1812, + "num_input_tokens_seen": 25330112, + "step": 37575 + }, + { + "epoch": 0.9180856521632912, + "grad_norm": 10.202178001403809, + "learning_rate": 1.9577120572585067e-06, + "loss": 0.1119, + "num_input_tokens_seen": 25333184, + "step": 37580 + }, + { + "epoch": 0.9182078029951384, + "grad_norm": 7.249847412109375, + "learning_rate": 1.9576875169914016e-06, + "loss": 0.0318, + "num_input_tokens_seen": 25336640, + "step": 37585 + }, + { + "epoch": 0.9183299538269856, + "grad_norm": 1.9853745698928833, + "learning_rate": 1.957662969759735e-06, + "loss": 0.0357, + "num_input_tokens_seen": 25340096, + "step": 37590 + }, + { + "epoch": 0.9184521046588328, + "grad_norm": 18.176387786865234, + "learning_rate": 1.957638415563686e-06, + "loss": 0.1304, + "num_input_tokens_seen": 25343360, + "step": 37595 + }, + { + "epoch": 0.9185742554906798, + "grad_norm": 17.54328727722168, + "learning_rate": 1.9576138544034327e-06, + "loss": 0.1276, + "num_input_tokens_seen": 25347200, + "step": 37600 + }, + { + "epoch": 0.918696406322527, + "grad_norm": 0.22984708845615387, + "learning_rate": 1.9575892862791537e-06, + "loss": 0.086, + "num_input_tokens_seen": 25350656, + "step": 37605 + }, + { + "epoch": 0.9188185571543742, + "grad_norm": 23.025104522705078, + "learning_rate": 1.9575647111910276e-06, + "loss": 0.1472, + "num_input_tokens_seen": 25353536, + "step": 37610 + }, + { + "epoch": 0.9189407079862214, + "grad_norm": 1.0077747106552124, + "learning_rate": 1.957540129139234e-06, + "loss": 0.2079, + "num_input_tokens_seen": 25356928, + "step": 37615 + }, + { + "epoch": 0.9190628588180686, + "grad_norm": 26.906963348388672, + "learning_rate": 1.957515540123951e-06, + "loss": 0.224, + "num_input_tokens_seen": 25360064, + "step": 37620 + }, + { + "epoch": 0.9191850096499157, + "grad_norm": 0.5911832451820374, + "learning_rate": 1.9574909441453573e-06, + "loss": 0.0315, + "num_input_tokens_seen": 25363136, + "step": 37625 + }, + { + "epoch": 0.9193071604817629, + "grad_norm": 30.879526138305664, + "learning_rate": 1.957466341203632e-06, + "loss": 0.0691, + "num_input_tokens_seen": 25366592, + "step": 37630 + }, + { + "epoch": 0.91942931131361, + "grad_norm": 23.551034927368164, + "learning_rate": 1.9574417312989535e-06, + "loss": 0.1469, + "num_input_tokens_seen": 25370304, + "step": 37635 + }, + { + "epoch": 0.9195514621454572, + "grad_norm": 9.431242942810059, + "learning_rate": 1.9574171144315016e-06, + "loss": 0.1158, + "num_input_tokens_seen": 25373248, + "step": 37640 + }, + { + "epoch": 0.9196736129773043, + "grad_norm": 1.5377452373504639, + "learning_rate": 1.957392490601455e-06, + "loss": 0.0728, + "num_input_tokens_seen": 25376832, + "step": 37645 + }, + { + "epoch": 0.9197957638091515, + "grad_norm": 7.705243110656738, + "learning_rate": 1.9573678598089924e-06, + "loss": 0.0135, + "num_input_tokens_seen": 25380288, + "step": 37650 + }, + { + "epoch": 0.9199179146409987, + "grad_norm": 6.997256278991699, + "learning_rate": 1.9573432220542933e-06, + "loss": 0.0112, + "num_input_tokens_seen": 25383808, + "step": 37655 + }, + { + "epoch": 0.9200400654728459, + "grad_norm": 0.8887893557548523, + "learning_rate": 1.957318577337537e-06, + "loss": 0.1431, + "num_input_tokens_seen": 25386880, + "step": 37660 + }, + { + "epoch": 0.9201622163046931, + "grad_norm": 1.3595986366271973, + "learning_rate": 1.9572939256589025e-06, + "loss": 0.138, + "num_input_tokens_seen": 25390720, + "step": 37665 + }, + { + "epoch": 0.9202843671365402, + "grad_norm": 0.3079362213611603, + "learning_rate": 1.957269267018569e-06, + "loss": 0.0168, + "num_input_tokens_seen": 25394496, + "step": 37670 + }, + { + "epoch": 0.9204065179683873, + "grad_norm": 0.5935457348823547, + "learning_rate": 1.957244601416716e-06, + "loss": 0.0043, + "num_input_tokens_seen": 25398080, + "step": 37675 + }, + { + "epoch": 0.9205286688002345, + "grad_norm": 10.200531959533691, + "learning_rate": 1.957219928853523e-06, + "loss": 0.2152, + "num_input_tokens_seen": 25401664, + "step": 37680 + }, + { + "epoch": 0.9206508196320817, + "grad_norm": 37.06196212768555, + "learning_rate": 1.9571952493291685e-06, + "loss": 0.1617, + "num_input_tokens_seen": 25405568, + "step": 37685 + }, + { + "epoch": 0.9207729704639288, + "grad_norm": 9.282423973083496, + "learning_rate": 1.957170562843833e-06, + "loss": 0.1938, + "num_input_tokens_seen": 25409344, + "step": 37690 + }, + { + "epoch": 0.920895121295776, + "grad_norm": 9.238424301147461, + "learning_rate": 1.957145869397696e-06, + "loss": 0.0743, + "num_input_tokens_seen": 25412928, + "step": 37695 + }, + { + "epoch": 0.9210172721276232, + "grad_norm": 24.76751708984375, + "learning_rate": 1.9571211689909366e-06, + "loss": 0.1165, + "num_input_tokens_seen": 25416256, + "step": 37700 + }, + { + "epoch": 0.9211394229594704, + "grad_norm": 2.6283040046691895, + "learning_rate": 1.9570964616237348e-06, + "loss": 0.0228, + "num_input_tokens_seen": 25419904, + "step": 37705 + }, + { + "epoch": 0.9212615737913176, + "grad_norm": 0.2652167081832886, + "learning_rate": 1.9570717472962697e-06, + "loss": 0.1253, + "num_input_tokens_seen": 25423040, + "step": 37710 + }, + { + "epoch": 0.9213837246231646, + "grad_norm": 16.254348754882812, + "learning_rate": 1.9570470260087217e-06, + "loss": 0.2298, + "num_input_tokens_seen": 25426560, + "step": 37715 + }, + { + "epoch": 0.9215058754550118, + "grad_norm": 0.7373325228691101, + "learning_rate": 1.9570222977612704e-06, + "loss": 0.059, + "num_input_tokens_seen": 25430016, + "step": 37720 + }, + { + "epoch": 0.921628026286859, + "grad_norm": 20.672805786132812, + "learning_rate": 1.9569975625540954e-06, + "loss": 0.1113, + "num_input_tokens_seen": 25433536, + "step": 37725 + }, + { + "epoch": 0.9217501771187062, + "grad_norm": 11.524206161499023, + "learning_rate": 1.9569728203873767e-06, + "loss": 0.0467, + "num_input_tokens_seen": 25436992, + "step": 37730 + }, + { + "epoch": 0.9218723279505533, + "grad_norm": 32.22142028808594, + "learning_rate": 1.9569480712612943e-06, + "loss": 0.0898, + "num_input_tokens_seen": 25440128, + "step": 37735 + }, + { + "epoch": 0.9219944787824005, + "grad_norm": 72.72855377197266, + "learning_rate": 1.956923315176028e-06, + "loss": 0.127, + "num_input_tokens_seen": 25443648, + "step": 37740 + }, + { + "epoch": 0.9221166296142477, + "grad_norm": 17.22328758239746, + "learning_rate": 1.956898552131758e-06, + "loss": 0.0535, + "num_input_tokens_seen": 25446784, + "step": 37745 + }, + { + "epoch": 0.9222387804460949, + "grad_norm": 16.77092933654785, + "learning_rate": 1.9568737821286645e-06, + "loss": 0.1066, + "num_input_tokens_seen": 25450240, + "step": 37750 + }, + { + "epoch": 0.922360931277942, + "grad_norm": 0.3280671536922455, + "learning_rate": 1.9568490051669276e-06, + "loss": 0.0087, + "num_input_tokens_seen": 25453824, + "step": 37755 + }, + { + "epoch": 0.9224830821097891, + "grad_norm": 22.226842880249023, + "learning_rate": 1.9568242212467273e-06, + "loss": 0.0612, + "num_input_tokens_seen": 25457984, + "step": 37760 + }, + { + "epoch": 0.9226052329416363, + "grad_norm": 0.11983904242515564, + "learning_rate": 1.9567994303682437e-06, + "loss": 0.1227, + "num_input_tokens_seen": 25461376, + "step": 37765 + }, + { + "epoch": 0.9227273837734835, + "grad_norm": 1.0264025926589966, + "learning_rate": 1.9567746325316575e-06, + "loss": 0.096, + "num_input_tokens_seen": 25464704, + "step": 37770 + }, + { + "epoch": 0.9228495346053307, + "grad_norm": 30.15180778503418, + "learning_rate": 1.956749827737149e-06, + "loss": 0.2156, + "num_input_tokens_seen": 25468032, + "step": 37775 + }, + { + "epoch": 0.9229716854371778, + "grad_norm": 56.08074188232422, + "learning_rate": 1.956725015984898e-06, + "loss": 0.1741, + "num_input_tokens_seen": 25471744, + "step": 37780 + }, + { + "epoch": 0.923093836269025, + "grad_norm": 8.849523544311523, + "learning_rate": 1.956700197275086e-06, + "loss": 0.165, + "num_input_tokens_seen": 25475200, + "step": 37785 + }, + { + "epoch": 0.9232159871008722, + "grad_norm": 10.861885070800781, + "learning_rate": 1.9566753716078922e-06, + "loss": 0.1686, + "num_input_tokens_seen": 25478720, + "step": 37790 + }, + { + "epoch": 0.9233381379327193, + "grad_norm": 8.872599601745605, + "learning_rate": 1.9566505389834978e-06, + "loss": 0.094, + "num_input_tokens_seen": 25482368, + "step": 37795 + }, + { + "epoch": 0.9234602887645665, + "grad_norm": 0.5979471802711487, + "learning_rate": 1.9566256994020833e-06, + "loss": 0.0531, + "num_input_tokens_seen": 25485568, + "step": 37800 + }, + { + "epoch": 0.9235824395964136, + "grad_norm": 0.13665713369846344, + "learning_rate": 1.95660085286383e-06, + "loss": 0.1456, + "num_input_tokens_seen": 25488704, + "step": 37805 + }, + { + "epoch": 0.9237045904282608, + "grad_norm": 0.41918784379959106, + "learning_rate": 1.956575999368918e-06, + "loss": 0.1239, + "num_input_tokens_seen": 25491968, + "step": 37810 + }, + { + "epoch": 0.923826741260108, + "grad_norm": 16.839773178100586, + "learning_rate": 1.9565511389175273e-06, + "loss": 0.1556, + "num_input_tokens_seen": 25496576, + "step": 37815 + }, + { + "epoch": 0.9239488920919552, + "grad_norm": 1.8773021697998047, + "learning_rate": 1.9565262715098396e-06, + "loss": 0.1001, + "num_input_tokens_seen": 25499968, + "step": 37820 + }, + { + "epoch": 0.9240710429238023, + "grad_norm": 0.22760513424873352, + "learning_rate": 1.9565013971460362e-06, + "loss": 0.069, + "num_input_tokens_seen": 25503232, + "step": 37825 + }, + { + "epoch": 0.9241931937556495, + "grad_norm": 28.925373077392578, + "learning_rate": 1.956476515826297e-06, + "loss": 0.1173, + "num_input_tokens_seen": 25506560, + "step": 37830 + }, + { + "epoch": 0.9243153445874966, + "grad_norm": 21.988372802734375, + "learning_rate": 1.9564516275508033e-06, + "loss": 0.15, + "num_input_tokens_seen": 25509952, + "step": 37835 + }, + { + "epoch": 0.9244374954193438, + "grad_norm": 5.837953090667725, + "learning_rate": 1.956426732319736e-06, + "loss": 0.1084, + "num_input_tokens_seen": 25513408, + "step": 37840 + }, + { + "epoch": 0.924559646251191, + "grad_norm": 6.572620868682861, + "learning_rate": 1.9564018301332765e-06, + "loss": 0.1143, + "num_input_tokens_seen": 25516864, + "step": 37845 + }, + { + "epoch": 0.9246817970830381, + "grad_norm": 0.9621378183364868, + "learning_rate": 1.9563769209916055e-06, + "loss": 0.206, + "num_input_tokens_seen": 25520128, + "step": 37850 + }, + { + "epoch": 0.9248039479148853, + "grad_norm": 16.34810447692871, + "learning_rate": 1.9563520048949043e-06, + "loss": 0.0725, + "num_input_tokens_seen": 25523776, + "step": 37855 + }, + { + "epoch": 0.9249260987467325, + "grad_norm": 18.789794921875, + "learning_rate": 1.956327081843354e-06, + "loss": 0.0724, + "num_input_tokens_seen": 25526784, + "step": 37860 + }, + { + "epoch": 0.9250482495785797, + "grad_norm": 0.49031341075897217, + "learning_rate": 1.9563021518371363e-06, + "loss": 0.0253, + "num_input_tokens_seen": 25530240, + "step": 37865 + }, + { + "epoch": 0.9251704004104268, + "grad_norm": 8.614137649536133, + "learning_rate": 1.9562772148764317e-06, + "loss": 0.1545, + "num_input_tokens_seen": 25534208, + "step": 37870 + }, + { + "epoch": 0.9252925512422739, + "grad_norm": 2.4033591747283936, + "learning_rate": 1.9562522709614223e-06, + "loss": 0.0044, + "num_input_tokens_seen": 25537344, + "step": 37875 + }, + { + "epoch": 0.9254147020741211, + "grad_norm": 0.6801878213882446, + "learning_rate": 1.956227320092289e-06, + "loss": 0.0535, + "num_input_tokens_seen": 25541376, + "step": 37880 + }, + { + "epoch": 0.9255368529059683, + "grad_norm": 17.762008666992188, + "learning_rate": 1.9562023622692132e-06, + "loss": 0.0822, + "num_input_tokens_seen": 25544576, + "step": 37885 + }, + { + "epoch": 0.9256590037378154, + "grad_norm": 0.22601798176765442, + "learning_rate": 1.9561773974923774e-06, + "loss": 0.0883, + "num_input_tokens_seen": 25548288, + "step": 37890 + }, + { + "epoch": 0.9257811545696626, + "grad_norm": 20.17107582092285, + "learning_rate": 1.9561524257619617e-06, + "loss": 0.0122, + "num_input_tokens_seen": 25552128, + "step": 37895 + }, + { + "epoch": 0.9259033054015098, + "grad_norm": 43.49441146850586, + "learning_rate": 1.9561274470781485e-06, + "loss": 0.1289, + "num_input_tokens_seen": 25555712, + "step": 37900 + }, + { + "epoch": 0.926025456233357, + "grad_norm": 20.65340232849121, + "learning_rate": 1.9561024614411197e-06, + "loss": 0.3542, + "num_input_tokens_seen": 25558848, + "step": 37905 + }, + { + "epoch": 0.9261476070652042, + "grad_norm": 24.24153709411621, + "learning_rate": 1.956077468851056e-06, + "loss": 0.1869, + "num_input_tokens_seen": 25562176, + "step": 37910 + }, + { + "epoch": 0.9262697578970512, + "grad_norm": 27.763818740844727, + "learning_rate": 1.9560524693081405e-06, + "loss": 0.1002, + "num_input_tokens_seen": 25565248, + "step": 37915 + }, + { + "epoch": 0.9263919087288984, + "grad_norm": 27.984113693237305, + "learning_rate": 1.956027462812554e-06, + "loss": 0.2334, + "num_input_tokens_seen": 25568128, + "step": 37920 + }, + { + "epoch": 0.9265140595607456, + "grad_norm": 24.396575927734375, + "learning_rate": 1.9560024493644786e-06, + "loss": 0.0868, + "num_input_tokens_seen": 25571072, + "step": 37925 + }, + { + "epoch": 0.9266362103925928, + "grad_norm": 19.87697982788086, + "learning_rate": 1.955977428964096e-06, + "loss": 0.1437, + "num_input_tokens_seen": 25574528, + "step": 37930 + }, + { + "epoch": 0.9267583612244399, + "grad_norm": 15.917418479919434, + "learning_rate": 1.9559524016115887e-06, + "loss": 0.159, + "num_input_tokens_seen": 25577920, + "step": 37935 + }, + { + "epoch": 0.9268805120562871, + "grad_norm": 6.159919261932373, + "learning_rate": 1.9559273673071384e-06, + "loss": 0.1351, + "num_input_tokens_seen": 25581312, + "step": 37940 + }, + { + "epoch": 0.9270026628881343, + "grad_norm": 9.192456245422363, + "learning_rate": 1.955902326050927e-06, + "loss": 0.0348, + "num_input_tokens_seen": 25584768, + "step": 37945 + }, + { + "epoch": 0.9271248137199815, + "grad_norm": 0.9583147764205933, + "learning_rate": 1.9558772778431373e-06, + "loss": 0.0667, + "num_input_tokens_seen": 25588096, + "step": 37950 + }, + { + "epoch": 0.9272469645518286, + "grad_norm": 0.444980263710022, + "learning_rate": 1.9558522226839506e-06, + "loss": 0.0897, + "num_input_tokens_seen": 25591744, + "step": 37955 + }, + { + "epoch": 0.9273691153836757, + "grad_norm": 77.95771789550781, + "learning_rate": 1.955827160573549e-06, + "loss": 0.1594, + "num_input_tokens_seen": 25595264, + "step": 37960 + }, + { + "epoch": 0.9274912662155229, + "grad_norm": 11.438605308532715, + "learning_rate": 1.9558020915121157e-06, + "loss": 0.1122, + "num_input_tokens_seen": 25598464, + "step": 37965 + }, + { + "epoch": 0.9276134170473701, + "grad_norm": 2.1093642711639404, + "learning_rate": 1.9557770154998326e-06, + "loss": 0.1329, + "num_input_tokens_seen": 25601856, + "step": 37970 + }, + { + "epoch": 0.9277355678792173, + "grad_norm": 14.986445426940918, + "learning_rate": 1.9557519325368818e-06, + "loss": 0.2071, + "num_input_tokens_seen": 25605440, + "step": 37975 + }, + { + "epoch": 0.9278577187110644, + "grad_norm": 72.62251281738281, + "learning_rate": 1.955726842623446e-06, + "loss": 0.121, + "num_input_tokens_seen": 25608704, + "step": 37980 + }, + { + "epoch": 0.9279798695429116, + "grad_norm": 28.621503829956055, + "learning_rate": 1.9557017457597073e-06, + "loss": 0.1347, + "num_input_tokens_seen": 25611904, + "step": 37985 + }, + { + "epoch": 0.9281020203747588, + "grad_norm": 38.730899810791016, + "learning_rate": 1.9556766419458487e-06, + "loss": 0.1338, + "num_input_tokens_seen": 25614976, + "step": 37990 + }, + { + "epoch": 0.9282241712066059, + "grad_norm": 14.130492210388184, + "learning_rate": 1.955651531182052e-06, + "loss": 0.0874, + "num_input_tokens_seen": 25618496, + "step": 37995 + }, + { + "epoch": 0.9283463220384531, + "grad_norm": 13.125405311584473, + "learning_rate": 1.955626413468501e-06, + "loss": 0.1874, + "num_input_tokens_seen": 25621504, + "step": 38000 + }, + { + "epoch": 0.9284684728703002, + "grad_norm": 24.891145706176758, + "learning_rate": 1.9556012888053775e-06, + "loss": 0.1164, + "num_input_tokens_seen": 25625088, + "step": 38005 + }, + { + "epoch": 0.9285906237021474, + "grad_norm": 17.112619400024414, + "learning_rate": 1.955576157192864e-06, + "loss": 0.2076, + "num_input_tokens_seen": 25628480, + "step": 38010 + }, + { + "epoch": 0.9287127745339946, + "grad_norm": 14.229789733886719, + "learning_rate": 1.9555510186311445e-06, + "loss": 0.135, + "num_input_tokens_seen": 25632000, + "step": 38015 + }, + { + "epoch": 0.9288349253658418, + "grad_norm": 7.512895584106445, + "learning_rate": 1.9555258731204e-06, + "loss": 0.1191, + "num_input_tokens_seen": 25635264, + "step": 38020 + }, + { + "epoch": 0.9289570761976889, + "grad_norm": 17.83555030822754, + "learning_rate": 1.955500720660815e-06, + "loss": 0.2239, + "num_input_tokens_seen": 25638656, + "step": 38025 + }, + { + "epoch": 0.929079227029536, + "grad_norm": 10.23780632019043, + "learning_rate": 1.9554755612525716e-06, + "loss": 0.0959, + "num_input_tokens_seen": 25641920, + "step": 38030 + }, + { + "epoch": 0.9292013778613832, + "grad_norm": 21.389467239379883, + "learning_rate": 1.9554503948958525e-06, + "loss": 0.074, + "num_input_tokens_seen": 25645760, + "step": 38035 + }, + { + "epoch": 0.9293235286932304, + "grad_norm": 12.27301025390625, + "learning_rate": 1.955425221590842e-06, + "loss": 0.149, + "num_input_tokens_seen": 25649216, + "step": 38040 + }, + { + "epoch": 0.9294456795250776, + "grad_norm": 1.4360170364379883, + "learning_rate": 1.9554000413377218e-06, + "loss": 0.0588, + "num_input_tokens_seen": 25652160, + "step": 38045 + }, + { + "epoch": 0.9295678303569247, + "grad_norm": 23.02863883972168, + "learning_rate": 1.9553748541366755e-06, + "loss": 0.1236, + "num_input_tokens_seen": 25655424, + "step": 38050 + }, + { + "epoch": 0.9296899811887719, + "grad_norm": 13.383890151977539, + "learning_rate": 1.9553496599878865e-06, + "loss": 0.0575, + "num_input_tokens_seen": 25659072, + "step": 38055 + }, + { + "epoch": 0.9298121320206191, + "grad_norm": 11.99791431427002, + "learning_rate": 1.9553244588915375e-06, + "loss": 0.0297, + "num_input_tokens_seen": 25662080, + "step": 38060 + }, + { + "epoch": 0.9299342828524663, + "grad_norm": 0.2585084140300751, + "learning_rate": 1.9552992508478124e-06, + "loss": 0.0378, + "num_input_tokens_seen": 25665152, + "step": 38065 + }, + { + "epoch": 0.9300564336843133, + "grad_norm": 19.530658721923828, + "learning_rate": 1.955274035856894e-06, + "loss": 0.1424, + "num_input_tokens_seen": 25668800, + "step": 38070 + }, + { + "epoch": 0.9301785845161605, + "grad_norm": 14.212098121643066, + "learning_rate": 1.955248813918966e-06, + "loss": 0.1317, + "num_input_tokens_seen": 25672064, + "step": 38075 + }, + { + "epoch": 0.9303007353480077, + "grad_norm": 3.2175631523132324, + "learning_rate": 1.9552235850342115e-06, + "loss": 0.073, + "num_input_tokens_seen": 25675520, + "step": 38080 + }, + { + "epoch": 0.9304228861798549, + "grad_norm": 83.01322174072266, + "learning_rate": 1.955198349202814e-06, + "loss": 0.1656, + "num_input_tokens_seen": 25678912, + "step": 38085 + }, + { + "epoch": 0.9305450370117021, + "grad_norm": 1.3272134065628052, + "learning_rate": 1.9551731064249577e-06, + "loss": 0.0914, + "num_input_tokens_seen": 25682112, + "step": 38090 + }, + { + "epoch": 0.9306671878435492, + "grad_norm": 22.99603271484375, + "learning_rate": 1.9551478567008254e-06, + "loss": 0.2259, + "num_input_tokens_seen": 25685120, + "step": 38095 + }, + { + "epoch": 0.9307893386753964, + "grad_norm": 1.4854393005371094, + "learning_rate": 1.955122600030601e-06, + "loss": 0.1399, + "num_input_tokens_seen": 25687936, + "step": 38100 + }, + { + "epoch": 0.9309114895072436, + "grad_norm": 9.913447380065918, + "learning_rate": 1.9550973364144683e-06, + "loss": 0.2016, + "num_input_tokens_seen": 25691264, + "step": 38105 + }, + { + "epoch": 0.9310336403390908, + "grad_norm": 12.232608795166016, + "learning_rate": 1.9550720658526106e-06, + "loss": 0.2067, + "num_input_tokens_seen": 25694528, + "step": 38110 + }, + { + "epoch": 0.9311557911709378, + "grad_norm": 4.132025241851807, + "learning_rate": 1.9550467883452123e-06, + "loss": 0.0582, + "num_input_tokens_seen": 25698112, + "step": 38115 + }, + { + "epoch": 0.931277942002785, + "grad_norm": 8.121847152709961, + "learning_rate": 1.955021503892457e-06, + "loss": 0.1722, + "num_input_tokens_seen": 25701440, + "step": 38120 + }, + { + "epoch": 0.9314000928346322, + "grad_norm": 8.623042106628418, + "learning_rate": 1.9549962124945276e-06, + "loss": 0.1415, + "num_input_tokens_seen": 25704640, + "step": 38125 + }, + { + "epoch": 0.9315222436664794, + "grad_norm": 0.715641438961029, + "learning_rate": 1.9549709141516097e-06, + "loss": 0.1866, + "num_input_tokens_seen": 25707904, + "step": 38130 + }, + { + "epoch": 0.9316443944983266, + "grad_norm": 1.868943452835083, + "learning_rate": 1.9549456088638863e-06, + "loss": 0.1155, + "num_input_tokens_seen": 25710912, + "step": 38135 + }, + { + "epoch": 0.9317665453301737, + "grad_norm": 9.373357772827148, + "learning_rate": 1.954920296631541e-06, + "loss": 0.0837, + "num_input_tokens_seen": 25714560, + "step": 38140 + }, + { + "epoch": 0.9318886961620209, + "grad_norm": 10.353510856628418, + "learning_rate": 1.9548949774547593e-06, + "loss": 0.0481, + "num_input_tokens_seen": 25717824, + "step": 38145 + }, + { + "epoch": 0.932010846993868, + "grad_norm": 30.065528869628906, + "learning_rate": 1.954869651333724e-06, + "loss": 0.1506, + "num_input_tokens_seen": 25720896, + "step": 38150 + }, + { + "epoch": 0.9321329978257152, + "grad_norm": 10.553356170654297, + "learning_rate": 1.95484431826862e-06, + "loss": 0.0912, + "num_input_tokens_seen": 25724160, + "step": 38155 + }, + { + "epoch": 0.9322551486575623, + "grad_norm": 1.0528043508529663, + "learning_rate": 1.9548189782596308e-06, + "loss": 0.0612, + "num_input_tokens_seen": 25727360, + "step": 38160 + }, + { + "epoch": 0.9323772994894095, + "grad_norm": 1.273256540298462, + "learning_rate": 1.9547936313069416e-06, + "loss": 0.134, + "num_input_tokens_seen": 25730752, + "step": 38165 + }, + { + "epoch": 0.9324994503212567, + "grad_norm": 22.34047508239746, + "learning_rate": 1.9547682774107368e-06, + "loss": 0.1049, + "num_input_tokens_seen": 25733888, + "step": 38170 + }, + { + "epoch": 0.9326216011531039, + "grad_norm": 0.28361839056015015, + "learning_rate": 1.954742916571199e-06, + "loss": 0.0668, + "num_input_tokens_seen": 25737216, + "step": 38175 + }, + { + "epoch": 0.932743751984951, + "grad_norm": 21.001340866088867, + "learning_rate": 1.954717548788515e-06, + "loss": 0.045, + "num_input_tokens_seen": 25740096, + "step": 38180 + }, + { + "epoch": 0.9328659028167982, + "grad_norm": 44.88008499145508, + "learning_rate": 1.954692174062868e-06, + "loss": 0.141, + "num_input_tokens_seen": 25743040, + "step": 38185 + }, + { + "epoch": 0.9329880536486453, + "grad_norm": 27.96893310546875, + "learning_rate": 1.9546667923944424e-06, + "loss": 0.2554, + "num_input_tokens_seen": 25746368, + "step": 38190 + }, + { + "epoch": 0.9331102044804925, + "grad_norm": 14.065231323242188, + "learning_rate": 1.954641403783423e-06, + "loss": 0.115, + "num_input_tokens_seen": 25749568, + "step": 38195 + }, + { + "epoch": 0.9332323553123397, + "grad_norm": 0.27887821197509766, + "learning_rate": 1.9546160082299952e-06, + "loss": 0.1343, + "num_input_tokens_seen": 25753216, + "step": 38200 + }, + { + "epoch": 0.9333545061441868, + "grad_norm": 0.0033274723682552576, + "learning_rate": 1.954590605734343e-06, + "loss": 0.1828, + "num_input_tokens_seen": 25756800, + "step": 38205 + }, + { + "epoch": 0.933476656976034, + "grad_norm": 7.273612976074219, + "learning_rate": 1.9545651962966507e-06, + "loss": 0.0638, + "num_input_tokens_seen": 25759872, + "step": 38210 + }, + { + "epoch": 0.9335988078078812, + "grad_norm": 6.034097671508789, + "learning_rate": 1.9545397799171034e-06, + "loss": 0.0519, + "num_input_tokens_seen": 25763456, + "step": 38215 + }, + { + "epoch": 0.9337209586397284, + "grad_norm": 8.82055950164795, + "learning_rate": 1.9545143565958865e-06, + "loss": 0.076, + "num_input_tokens_seen": 25766720, + "step": 38220 + }, + { + "epoch": 0.9338431094715754, + "grad_norm": 21.622636795043945, + "learning_rate": 1.954488926333184e-06, + "loss": 0.0628, + "num_input_tokens_seen": 25769728, + "step": 38225 + }, + { + "epoch": 0.9339652603034226, + "grad_norm": 6.863642692565918, + "learning_rate": 1.954463489129182e-06, + "loss": 0.0699, + "num_input_tokens_seen": 25773056, + "step": 38230 + }, + { + "epoch": 0.9340874111352698, + "grad_norm": 14.245474815368652, + "learning_rate": 1.9544380449840645e-06, + "loss": 0.1508, + "num_input_tokens_seen": 25776640, + "step": 38235 + }, + { + "epoch": 0.934209561967117, + "grad_norm": 11.309356689453125, + "learning_rate": 1.9544125938980164e-06, + "loss": 0.0959, + "num_input_tokens_seen": 25779776, + "step": 38240 + }, + { + "epoch": 0.9343317127989642, + "grad_norm": 0.8810875415802002, + "learning_rate": 1.9543871358712237e-06, + "loss": 0.0699, + "num_input_tokens_seen": 25782848, + "step": 38245 + }, + { + "epoch": 0.9344538636308113, + "grad_norm": 20.196170806884766, + "learning_rate": 1.954361670903871e-06, + "loss": 0.0802, + "num_input_tokens_seen": 25786048, + "step": 38250 + }, + { + "epoch": 0.9345760144626585, + "grad_norm": 2.611332416534424, + "learning_rate": 1.9543361989961432e-06, + "loss": 0.1007, + "num_input_tokens_seen": 25789184, + "step": 38255 + }, + { + "epoch": 0.9346981652945057, + "grad_norm": 14.635353088378906, + "learning_rate": 1.954310720148226e-06, + "loss": 0.1763, + "num_input_tokens_seen": 25792704, + "step": 38260 + }, + { + "epoch": 0.9348203161263529, + "grad_norm": 0.8673586845397949, + "learning_rate": 1.954285234360305e-06, + "loss": 0.1349, + "num_input_tokens_seen": 25796224, + "step": 38265 + }, + { + "epoch": 0.9349424669581999, + "grad_norm": 19.911422729492188, + "learning_rate": 1.9542597416325647e-06, + "loss": 0.262, + "num_input_tokens_seen": 25799424, + "step": 38270 + }, + { + "epoch": 0.9350646177900471, + "grad_norm": 11.518757820129395, + "learning_rate": 1.954234241965191e-06, + "loss": 0.1736, + "num_input_tokens_seen": 25802880, + "step": 38275 + }, + { + "epoch": 0.9351867686218943, + "grad_norm": 39.08585739135742, + "learning_rate": 1.9542087353583694e-06, + "loss": 0.1193, + "num_input_tokens_seen": 25806400, + "step": 38280 + }, + { + "epoch": 0.9353089194537415, + "grad_norm": 17.186824798583984, + "learning_rate": 1.9541832218122846e-06, + "loss": 0.0837, + "num_input_tokens_seen": 25809856, + "step": 38285 + }, + { + "epoch": 0.9354310702855887, + "grad_norm": 12.267500877380371, + "learning_rate": 1.9541577013271233e-06, + "loss": 0.0656, + "num_input_tokens_seen": 25812992, + "step": 38290 + }, + { + "epoch": 0.9355532211174358, + "grad_norm": 13.140397071838379, + "learning_rate": 1.9541321739030703e-06, + "loss": 0.0843, + "num_input_tokens_seen": 25816128, + "step": 38295 + }, + { + "epoch": 0.935675371949283, + "grad_norm": 26.44617462158203, + "learning_rate": 1.954106639540312e-06, + "loss": 0.1442, + "num_input_tokens_seen": 25819392, + "step": 38300 + }, + { + "epoch": 0.9357975227811302, + "grad_norm": 1.3397661447525024, + "learning_rate": 1.954081098239033e-06, + "loss": 0.055, + "num_input_tokens_seen": 25822912, + "step": 38305 + }, + { + "epoch": 0.9359196736129773, + "grad_norm": 12.671271324157715, + "learning_rate": 1.9540555499994197e-06, + "loss": 0.2042, + "num_input_tokens_seen": 25826048, + "step": 38310 + }, + { + "epoch": 0.9360418244448244, + "grad_norm": 22.01853370666504, + "learning_rate": 1.954029994821658e-06, + "loss": 0.2489, + "num_input_tokens_seen": 25829120, + "step": 38315 + }, + { + "epoch": 0.9361639752766716, + "grad_norm": 12.638045310974121, + "learning_rate": 1.9540044327059336e-06, + "loss": 0.1075, + "num_input_tokens_seen": 25832320, + "step": 38320 + }, + { + "epoch": 0.9362861261085188, + "grad_norm": 13.656826972961426, + "learning_rate": 1.9539788636524326e-06, + "loss": 0.1052, + "num_input_tokens_seen": 25835584, + "step": 38325 + }, + { + "epoch": 0.936408276940366, + "grad_norm": 0.3497765064239502, + "learning_rate": 1.9539532876613404e-06, + "loss": 0.0676, + "num_input_tokens_seen": 25838720, + "step": 38330 + }, + { + "epoch": 0.9365304277722132, + "grad_norm": 18.81248664855957, + "learning_rate": 1.9539277047328433e-06, + "loss": 0.0935, + "num_input_tokens_seen": 25842304, + "step": 38335 + }, + { + "epoch": 0.9366525786040603, + "grad_norm": 0.9873834848403931, + "learning_rate": 1.9539021148671274e-06, + "loss": 0.0629, + "num_input_tokens_seen": 25845504, + "step": 38340 + }, + { + "epoch": 0.9367747294359074, + "grad_norm": 25.43363380432129, + "learning_rate": 1.953876518064379e-06, + "loss": 0.2716, + "num_input_tokens_seen": 25848512, + "step": 38345 + }, + { + "epoch": 0.9368968802677546, + "grad_norm": 2.0168118476867676, + "learning_rate": 1.9538509143247834e-06, + "loss": 0.0158, + "num_input_tokens_seen": 25851968, + "step": 38350 + }, + { + "epoch": 0.9370190310996018, + "grad_norm": 8.315200805664062, + "learning_rate": 1.953825303648528e-06, + "loss": 0.0655, + "num_input_tokens_seen": 25854976, + "step": 38355 + }, + { + "epoch": 0.9371411819314489, + "grad_norm": 2.901071786880493, + "learning_rate": 1.9537996860357983e-06, + "loss": 0.0877, + "num_input_tokens_seen": 25858176, + "step": 38360 + }, + { + "epoch": 0.9372633327632961, + "grad_norm": 19.516382217407227, + "learning_rate": 1.9537740614867806e-06, + "loss": 0.077, + "num_input_tokens_seen": 25862080, + "step": 38365 + }, + { + "epoch": 0.9373854835951433, + "grad_norm": 10.567694664001465, + "learning_rate": 1.953748430001661e-06, + "loss": 0.1235, + "num_input_tokens_seen": 25865472, + "step": 38370 + }, + { + "epoch": 0.9375076344269905, + "grad_norm": 28.031982421875, + "learning_rate": 1.9537227915806273e-06, + "loss": 0.2081, + "num_input_tokens_seen": 25869376, + "step": 38375 + }, + { + "epoch": 0.9376297852588377, + "grad_norm": 27.6629638671875, + "learning_rate": 1.953697146223864e-06, + "loss": 0.0825, + "num_input_tokens_seen": 25872576, + "step": 38380 + }, + { + "epoch": 0.9377519360906847, + "grad_norm": 21.2484130859375, + "learning_rate": 1.953671493931559e-06, + "loss": 0.1068, + "num_input_tokens_seen": 25875776, + "step": 38385 + }, + { + "epoch": 0.9378740869225319, + "grad_norm": 5.103590488433838, + "learning_rate": 1.9536458347038986e-06, + "loss": 0.127, + "num_input_tokens_seen": 25879232, + "step": 38390 + }, + { + "epoch": 0.9379962377543791, + "grad_norm": 17.484661102294922, + "learning_rate": 1.9536201685410687e-06, + "loss": 0.1061, + "num_input_tokens_seen": 25882112, + "step": 38395 + }, + { + "epoch": 0.9381183885862263, + "grad_norm": 5.376446723937988, + "learning_rate": 1.9535944954432564e-06, + "loss": 0.0392, + "num_input_tokens_seen": 25885568, + "step": 38400 + }, + { + "epoch": 0.9382405394180734, + "grad_norm": 10.969557762145996, + "learning_rate": 1.953568815410649e-06, + "loss": 0.0759, + "num_input_tokens_seen": 25889088, + "step": 38405 + }, + { + "epoch": 0.9383626902499206, + "grad_norm": 39.0699577331543, + "learning_rate": 1.953543128443432e-06, + "loss": 0.1701, + "num_input_tokens_seen": 25892480, + "step": 38410 + }, + { + "epoch": 0.9384848410817678, + "grad_norm": 19.995424270629883, + "learning_rate": 1.9535174345417936e-06, + "loss": 0.106, + "num_input_tokens_seen": 25895296, + "step": 38415 + }, + { + "epoch": 0.938606991913615, + "grad_norm": 10.260697364807129, + "learning_rate": 1.9534917337059194e-06, + "loss": 0.152, + "num_input_tokens_seen": 25898944, + "step": 38420 + }, + { + "epoch": 0.938729142745462, + "grad_norm": 14.371946334838867, + "learning_rate": 1.9534660259359976e-06, + "loss": 0.2075, + "num_input_tokens_seen": 25902208, + "step": 38425 + }, + { + "epoch": 0.9388512935773092, + "grad_norm": 1.466731071472168, + "learning_rate": 1.9534403112322137e-06, + "loss": 0.0861, + "num_input_tokens_seen": 25905216, + "step": 38430 + }, + { + "epoch": 0.9389734444091564, + "grad_norm": 17.70195770263672, + "learning_rate": 1.9534145895947557e-06, + "loss": 0.1235, + "num_input_tokens_seen": 25908352, + "step": 38435 + }, + { + "epoch": 0.9390955952410036, + "grad_norm": 8.004560470581055, + "learning_rate": 1.95338886102381e-06, + "loss": 0.1441, + "num_input_tokens_seen": 25912128, + "step": 38440 + }, + { + "epoch": 0.9392177460728508, + "grad_norm": 12.48849105834961, + "learning_rate": 1.9533631255195643e-06, + "loss": 0.0406, + "num_input_tokens_seen": 25915456, + "step": 38445 + }, + { + "epoch": 0.9393398969046979, + "grad_norm": 19.446035385131836, + "learning_rate": 1.9533373830822056e-06, + "loss": 0.0632, + "num_input_tokens_seen": 25918784, + "step": 38450 + }, + { + "epoch": 0.9394620477365451, + "grad_norm": 17.027902603149414, + "learning_rate": 1.953311633711921e-06, + "loss": 0.1228, + "num_input_tokens_seen": 25922048, + "step": 38455 + }, + { + "epoch": 0.9395841985683923, + "grad_norm": 14.439995765686035, + "learning_rate": 1.953285877408898e-06, + "loss": 0.0834, + "num_input_tokens_seen": 25925120, + "step": 38460 + }, + { + "epoch": 0.9397063494002394, + "grad_norm": 1.492466926574707, + "learning_rate": 1.9532601141733232e-06, + "loss": 0.0435, + "num_input_tokens_seen": 25928832, + "step": 38465 + }, + { + "epoch": 0.9398285002320865, + "grad_norm": 1.7578264474868774, + "learning_rate": 1.953234344005385e-06, + "loss": 0.0766, + "num_input_tokens_seen": 25932224, + "step": 38470 + }, + { + "epoch": 0.9399506510639337, + "grad_norm": 17.704456329345703, + "learning_rate": 1.95320856690527e-06, + "loss": 0.1421, + "num_input_tokens_seen": 25935616, + "step": 38475 + }, + { + "epoch": 0.9400728018957809, + "grad_norm": 20.333786010742188, + "learning_rate": 1.953182782873166e-06, + "loss": 0.147, + "num_input_tokens_seen": 25938688, + "step": 38480 + }, + { + "epoch": 0.9401949527276281, + "grad_norm": 17.079853057861328, + "learning_rate": 1.95315699190926e-06, + "loss": 0.0807, + "num_input_tokens_seen": 25942080, + "step": 38485 + }, + { + "epoch": 0.9403171035594753, + "grad_norm": 10.847369194030762, + "learning_rate": 1.9531311940137404e-06, + "loss": 0.1887, + "num_input_tokens_seen": 25945344, + "step": 38490 + }, + { + "epoch": 0.9404392543913224, + "grad_norm": 8.297080993652344, + "learning_rate": 1.9531053891867944e-06, + "loss": 0.1246, + "num_input_tokens_seen": 25949056, + "step": 38495 + }, + { + "epoch": 0.9405614052231696, + "grad_norm": 13.134936332702637, + "learning_rate": 1.9530795774286096e-06, + "loss": 0.072, + "num_input_tokens_seen": 25952640, + "step": 38500 + }, + { + "epoch": 0.9406835560550167, + "grad_norm": 5.523425579071045, + "learning_rate": 1.9530537587393735e-06, + "loss": 0.1588, + "num_input_tokens_seen": 25955776, + "step": 38505 + }, + { + "epoch": 0.9408057068868639, + "grad_norm": 17.084697723388672, + "learning_rate": 1.9530279331192747e-06, + "loss": 0.0621, + "num_input_tokens_seen": 25958976, + "step": 38510 + }, + { + "epoch": 0.940927857718711, + "grad_norm": 23.85431480407715, + "learning_rate": 1.9530021005685e-06, + "loss": 0.0771, + "num_input_tokens_seen": 25962624, + "step": 38515 + }, + { + "epoch": 0.9410500085505582, + "grad_norm": 20.88892364501953, + "learning_rate": 1.952976261087238e-06, + "loss": 0.0954, + "num_input_tokens_seen": 25965824, + "step": 38520 + }, + { + "epoch": 0.9411721593824054, + "grad_norm": 15.951557159423828, + "learning_rate": 1.9529504146756757e-06, + "loss": 0.0415, + "num_input_tokens_seen": 25969536, + "step": 38525 + }, + { + "epoch": 0.9412943102142526, + "grad_norm": 2.573011636734009, + "learning_rate": 1.952924561334002e-06, + "loss": 0.0443, + "num_input_tokens_seen": 25972672, + "step": 38530 + }, + { + "epoch": 0.9414164610460998, + "grad_norm": 0.32983043789863586, + "learning_rate": 1.952898701062405e-06, + "loss": 0.1361, + "num_input_tokens_seen": 25976000, + "step": 38535 + }, + { + "epoch": 0.9415386118779469, + "grad_norm": 12.805581092834473, + "learning_rate": 1.952872833861072e-06, + "loss": 0.128, + "num_input_tokens_seen": 25979840, + "step": 38540 + }, + { + "epoch": 0.941660762709794, + "grad_norm": 9.775003433227539, + "learning_rate": 1.9528469597301915e-06, + "loss": 0.0814, + "num_input_tokens_seen": 25982720, + "step": 38545 + }, + { + "epoch": 0.9417829135416412, + "grad_norm": 5.329129219055176, + "learning_rate": 1.9528210786699516e-06, + "loss": 0.0383, + "num_input_tokens_seen": 25986112, + "step": 38550 + }, + { + "epoch": 0.9419050643734884, + "grad_norm": 1.2658426761627197, + "learning_rate": 1.9527951906805405e-06, + "loss": 0.1121, + "num_input_tokens_seen": 25989376, + "step": 38555 + }, + { + "epoch": 0.9420272152053355, + "grad_norm": 1.926620364189148, + "learning_rate": 1.9527692957621466e-06, + "loss": 0.051, + "num_input_tokens_seen": 25992832, + "step": 38560 + }, + { + "epoch": 0.9421493660371827, + "grad_norm": 0.40974071621894836, + "learning_rate": 1.952743393914958e-06, + "loss": 0.1363, + "num_input_tokens_seen": 25996160, + "step": 38565 + }, + { + "epoch": 0.9422715168690299, + "grad_norm": 18.52965545654297, + "learning_rate": 1.952717485139163e-06, + "loss": 0.0882, + "num_input_tokens_seen": 25999424, + "step": 38570 + }, + { + "epoch": 0.9423936677008771, + "grad_norm": 25.709993362426758, + "learning_rate": 1.9526915694349508e-06, + "loss": 0.2171, + "num_input_tokens_seen": 26003200, + "step": 38575 + }, + { + "epoch": 0.9425158185327243, + "grad_norm": 25.383703231811523, + "learning_rate": 1.9526656468025087e-06, + "loss": 0.1181, + "num_input_tokens_seen": 26006400, + "step": 38580 + }, + { + "epoch": 0.9426379693645713, + "grad_norm": 46.224788665771484, + "learning_rate": 1.9526397172420262e-06, + "loss": 0.1448, + "num_input_tokens_seen": 26010560, + "step": 38585 + }, + { + "epoch": 0.9427601201964185, + "grad_norm": 11.900120735168457, + "learning_rate": 1.9526137807536914e-06, + "loss": 0.1124, + "num_input_tokens_seen": 26013952, + "step": 38590 + }, + { + "epoch": 0.9428822710282657, + "grad_norm": 3.8370819091796875, + "learning_rate": 1.9525878373376925e-06, + "loss": 0.1023, + "num_input_tokens_seen": 26017536, + "step": 38595 + }, + { + "epoch": 0.9430044218601129, + "grad_norm": 3.7985072135925293, + "learning_rate": 1.952561886994219e-06, + "loss": 0.0568, + "num_input_tokens_seen": 26020992, + "step": 38600 + }, + { + "epoch": 0.94312657269196, + "grad_norm": 14.372045516967773, + "learning_rate": 1.952535929723459e-06, + "loss": 0.1432, + "num_input_tokens_seen": 26024192, + "step": 38605 + }, + { + "epoch": 0.9432487235238072, + "grad_norm": 13.311012268066406, + "learning_rate": 1.9525099655256017e-06, + "loss": 0.1304, + "num_input_tokens_seen": 26027264, + "step": 38610 + }, + { + "epoch": 0.9433708743556544, + "grad_norm": 37.545902252197266, + "learning_rate": 1.9524839944008356e-06, + "loss": 0.2119, + "num_input_tokens_seen": 26030784, + "step": 38615 + }, + { + "epoch": 0.9434930251875016, + "grad_norm": 2.7573108673095703, + "learning_rate": 1.9524580163493504e-06, + "loss": 0.1403, + "num_input_tokens_seen": 26034880, + "step": 38620 + }, + { + "epoch": 0.9436151760193487, + "grad_norm": 6.662299156188965, + "learning_rate": 1.9524320313713333e-06, + "loss": 0.1103, + "num_input_tokens_seen": 26038464, + "step": 38625 + }, + { + "epoch": 0.9437373268511958, + "grad_norm": 0.49036359786987305, + "learning_rate": 1.952406039466975e-06, + "loss": 0.1217, + "num_input_tokens_seen": 26041920, + "step": 38630 + }, + { + "epoch": 0.943859477683043, + "grad_norm": 1.0396976470947266, + "learning_rate": 1.9523800406364637e-06, + "loss": 0.0789, + "num_input_tokens_seen": 26045376, + "step": 38635 + }, + { + "epoch": 0.9439816285148902, + "grad_norm": 3.0205793380737305, + "learning_rate": 1.952354034879988e-06, + "loss": 0.1065, + "num_input_tokens_seen": 26048576, + "step": 38640 + }, + { + "epoch": 0.9441037793467374, + "grad_norm": 33.249732971191406, + "learning_rate": 1.9523280221977383e-06, + "loss": 0.0972, + "num_input_tokens_seen": 26051712, + "step": 38645 + }, + { + "epoch": 0.9442259301785845, + "grad_norm": 9.237648010253906, + "learning_rate": 1.9523020025899027e-06, + "loss": 0.1515, + "num_input_tokens_seen": 26054784, + "step": 38650 + }, + { + "epoch": 0.9443480810104317, + "grad_norm": 12.187664031982422, + "learning_rate": 1.952275976056671e-06, + "loss": 0.0833, + "num_input_tokens_seen": 26057792, + "step": 38655 + }, + { + "epoch": 0.9444702318422789, + "grad_norm": 26.437292098999023, + "learning_rate": 1.9522499425982325e-06, + "loss": 0.1098, + "num_input_tokens_seen": 26061120, + "step": 38660 + }, + { + "epoch": 0.944592382674126, + "grad_norm": 9.192244529724121, + "learning_rate": 1.9522239022147756e-06, + "loss": 0.0668, + "num_input_tokens_seen": 26064128, + "step": 38665 + }, + { + "epoch": 0.9447145335059732, + "grad_norm": 23.44373893737793, + "learning_rate": 1.952197854906491e-06, + "loss": 0.1106, + "num_input_tokens_seen": 26067840, + "step": 38670 + }, + { + "epoch": 0.9448366843378203, + "grad_norm": 10.469454765319824, + "learning_rate": 1.9521718006735673e-06, + "loss": 0.1118, + "num_input_tokens_seen": 26071040, + "step": 38675 + }, + { + "epoch": 0.9449588351696675, + "grad_norm": 1.4989031553268433, + "learning_rate": 1.952145739516194e-06, + "loss": 0.0134, + "num_input_tokens_seen": 26074752, + "step": 38680 + }, + { + "epoch": 0.9450809860015147, + "grad_norm": 1.131640076637268, + "learning_rate": 1.9521196714345607e-06, + "loss": 0.1698, + "num_input_tokens_seen": 26078080, + "step": 38685 + }, + { + "epoch": 0.9452031368333619, + "grad_norm": 2.65283465385437, + "learning_rate": 1.9520935964288574e-06, + "loss": 0.0761, + "num_input_tokens_seen": 26082496, + "step": 38690 + }, + { + "epoch": 0.945325287665209, + "grad_norm": 15.337343215942383, + "learning_rate": 1.9520675144992734e-06, + "loss": 0.0998, + "num_input_tokens_seen": 26085952, + "step": 38695 + }, + { + "epoch": 0.9454474384970561, + "grad_norm": 16.278797149658203, + "learning_rate": 1.952041425645998e-06, + "loss": 0.0951, + "num_input_tokens_seen": 26089280, + "step": 38700 + }, + { + "epoch": 0.9455695893289033, + "grad_norm": 0.27807071805000305, + "learning_rate": 1.9520153298692215e-06, + "loss": 0.1408, + "num_input_tokens_seen": 26092352, + "step": 38705 + }, + { + "epoch": 0.9456917401607505, + "grad_norm": 5.690235137939453, + "learning_rate": 1.9519892271691335e-06, + "loss": 0.1871, + "num_input_tokens_seen": 26095872, + "step": 38710 + }, + { + "epoch": 0.9458138909925976, + "grad_norm": 31.965085983276367, + "learning_rate": 1.951963117545924e-06, + "loss": 0.1672, + "num_input_tokens_seen": 26099200, + "step": 38715 + }, + { + "epoch": 0.9459360418244448, + "grad_norm": 12.31370735168457, + "learning_rate": 1.9519370009997825e-06, + "loss": 0.1707, + "num_input_tokens_seen": 26102272, + "step": 38720 + }, + { + "epoch": 0.946058192656292, + "grad_norm": 2.0016095638275146, + "learning_rate": 1.951910877530899e-06, + "loss": 0.0986, + "num_input_tokens_seen": 26105792, + "step": 38725 + }, + { + "epoch": 0.9461803434881392, + "grad_norm": 3.6056859493255615, + "learning_rate": 1.9518847471394633e-06, + "loss": 0.0627, + "num_input_tokens_seen": 26109760, + "step": 38730 + }, + { + "epoch": 0.9463024943199864, + "grad_norm": 7.464121341705322, + "learning_rate": 1.951858609825666e-06, + "loss": 0.0619, + "num_input_tokens_seen": 26112960, + "step": 38735 + }, + { + "epoch": 0.9464246451518334, + "grad_norm": 14.840576171875, + "learning_rate": 1.9518324655896967e-06, + "loss": 0.0992, + "num_input_tokens_seen": 26115968, + "step": 38740 + }, + { + "epoch": 0.9465467959836806, + "grad_norm": 1.0094752311706543, + "learning_rate": 1.9518063144317457e-06, + "loss": 0.1284, + "num_input_tokens_seen": 26119360, + "step": 38745 + }, + { + "epoch": 0.9466689468155278, + "grad_norm": 14.64438247680664, + "learning_rate": 1.9517801563520037e-06, + "loss": 0.0379, + "num_input_tokens_seen": 26122816, + "step": 38750 + }, + { + "epoch": 0.946791097647375, + "grad_norm": 0.7392638325691223, + "learning_rate": 1.95175399135066e-06, + "loss": 0.0435, + "num_input_tokens_seen": 26126400, + "step": 38755 + }, + { + "epoch": 0.9469132484792221, + "grad_norm": 39.79733657836914, + "learning_rate": 1.951727819427905e-06, + "loss": 0.1659, + "num_input_tokens_seen": 26129472, + "step": 38760 + }, + { + "epoch": 0.9470353993110693, + "grad_norm": 43.59321212768555, + "learning_rate": 1.9517016405839296e-06, + "loss": 0.2924, + "num_input_tokens_seen": 26132672, + "step": 38765 + }, + { + "epoch": 0.9471575501429165, + "grad_norm": 23.428409576416016, + "learning_rate": 1.951675454818924e-06, + "loss": 0.1363, + "num_input_tokens_seen": 26136064, + "step": 38770 + }, + { + "epoch": 0.9472797009747637, + "grad_norm": 17.838272094726562, + "learning_rate": 1.9516492621330785e-06, + "loss": 0.193, + "num_input_tokens_seen": 26139520, + "step": 38775 + }, + { + "epoch": 0.9474018518066109, + "grad_norm": 13.88142204284668, + "learning_rate": 1.9516230625265835e-06, + "loss": 0.1092, + "num_input_tokens_seen": 26143360, + "step": 38780 + }, + { + "epoch": 0.9475240026384579, + "grad_norm": 3.652280569076538, + "learning_rate": 1.9515968559996295e-06, + "loss": 0.1376, + "num_input_tokens_seen": 26146624, + "step": 38785 + }, + { + "epoch": 0.9476461534703051, + "grad_norm": 16.653409957885742, + "learning_rate": 1.9515706425524075e-06, + "loss": 0.1381, + "num_input_tokens_seen": 26149952, + "step": 38790 + }, + { + "epoch": 0.9477683043021523, + "grad_norm": 11.144438743591309, + "learning_rate": 1.9515444221851075e-06, + "loss": 0.1833, + "num_input_tokens_seen": 26153088, + "step": 38795 + }, + { + "epoch": 0.9478904551339995, + "grad_norm": 1.472706913948059, + "learning_rate": 1.951518194897921e-06, + "loss": 0.0472, + "num_input_tokens_seen": 26157312, + "step": 38800 + }, + { + "epoch": 0.9480126059658466, + "grad_norm": 2.764831304550171, + "learning_rate": 1.9514919606910378e-06, + "loss": 0.062, + "num_input_tokens_seen": 26160320, + "step": 38805 + }, + { + "epoch": 0.9481347567976938, + "grad_norm": 7.44785737991333, + "learning_rate": 1.9514657195646492e-06, + "loss": 0.0837, + "num_input_tokens_seen": 26163968, + "step": 38810 + }, + { + "epoch": 0.948256907629541, + "grad_norm": 5.181417465209961, + "learning_rate": 1.9514394715189464e-06, + "loss": 0.0822, + "num_input_tokens_seen": 26167296, + "step": 38815 + }, + { + "epoch": 0.9483790584613881, + "grad_norm": 0.2639814615249634, + "learning_rate": 1.9514132165541194e-06, + "loss": 0.0713, + "num_input_tokens_seen": 26170816, + "step": 38820 + }, + { + "epoch": 0.9485012092932353, + "grad_norm": 1.6539617776870728, + "learning_rate": 1.95138695467036e-06, + "loss": 0.109, + "num_input_tokens_seen": 26174720, + "step": 38825 + }, + { + "epoch": 0.9486233601250824, + "grad_norm": 16.51839828491211, + "learning_rate": 1.951360685867858e-06, + "loss": 0.073, + "num_input_tokens_seen": 26177984, + "step": 38830 + }, + { + "epoch": 0.9487455109569296, + "grad_norm": 27.56429100036621, + "learning_rate": 1.951334410146806e-06, + "loss": 0.0984, + "num_input_tokens_seen": 26181568, + "step": 38835 + }, + { + "epoch": 0.9488676617887768, + "grad_norm": 15.006582260131836, + "learning_rate": 1.951308127507394e-06, + "loss": 0.1195, + "num_input_tokens_seen": 26185024, + "step": 38840 + }, + { + "epoch": 0.948989812620624, + "grad_norm": 11.61131477355957, + "learning_rate": 1.9512818379498135e-06, + "loss": 0.1248, + "num_input_tokens_seen": 26188352, + "step": 38845 + }, + { + "epoch": 0.9491119634524711, + "grad_norm": 18.227680206298828, + "learning_rate": 1.9512555414742557e-06, + "loss": 0.0851, + "num_input_tokens_seen": 26191616, + "step": 38850 + }, + { + "epoch": 0.9492341142843183, + "grad_norm": 0.6630993485450745, + "learning_rate": 1.9512292380809116e-06, + "loss": 0.1412, + "num_input_tokens_seen": 26195840, + "step": 38855 + }, + { + "epoch": 0.9493562651161654, + "grad_norm": 0.3163129687309265, + "learning_rate": 1.9512029277699726e-06, + "loss": 0.1485, + "num_input_tokens_seen": 26199296, + "step": 38860 + }, + { + "epoch": 0.9494784159480126, + "grad_norm": 43.869964599609375, + "learning_rate": 1.95117661054163e-06, + "loss": 0.2412, + "num_input_tokens_seen": 26202304, + "step": 38865 + }, + { + "epoch": 0.9496005667798598, + "grad_norm": 1.4705532789230347, + "learning_rate": 1.9511502863960755e-06, + "loss": 0.1503, + "num_input_tokens_seen": 26205568, + "step": 38870 + }, + { + "epoch": 0.9497227176117069, + "grad_norm": 29.34637451171875, + "learning_rate": 1.9511239553334998e-06, + "loss": 0.0839, + "num_input_tokens_seen": 26208960, + "step": 38875 + }, + { + "epoch": 0.9498448684435541, + "grad_norm": 7.019904613494873, + "learning_rate": 1.9510976173540953e-06, + "loss": 0.0943, + "num_input_tokens_seen": 26212032, + "step": 38880 + }, + { + "epoch": 0.9499670192754013, + "grad_norm": 18.73554229736328, + "learning_rate": 1.951071272458053e-06, + "loss": 0.1021, + "num_input_tokens_seen": 26216064, + "step": 38885 + }, + { + "epoch": 0.9500891701072485, + "grad_norm": 4.497707843780518, + "learning_rate": 1.9510449206455644e-06, + "loss": 0.0941, + "num_input_tokens_seen": 26219136, + "step": 38890 + }, + { + "epoch": 0.9502113209390955, + "grad_norm": 0.6487525105476379, + "learning_rate": 1.9510185619168216e-06, + "loss": 0.0121, + "num_input_tokens_seen": 26222336, + "step": 38895 + }, + { + "epoch": 0.9503334717709427, + "grad_norm": 23.14124298095703, + "learning_rate": 1.9509921962720163e-06, + "loss": 0.1273, + "num_input_tokens_seen": 26225408, + "step": 38900 + }, + { + "epoch": 0.9504556226027899, + "grad_norm": 1.1773689985275269, + "learning_rate": 1.9509658237113394e-06, + "loss": 0.0294, + "num_input_tokens_seen": 26228480, + "step": 38905 + }, + { + "epoch": 0.9505777734346371, + "grad_norm": 2.263134717941284, + "learning_rate": 1.9509394442349836e-06, + "loss": 0.1051, + "num_input_tokens_seen": 26231488, + "step": 38910 + }, + { + "epoch": 0.9506999242664843, + "grad_norm": 1.4543462991714478, + "learning_rate": 1.9509130578431405e-06, + "loss": 0.1074, + "num_input_tokens_seen": 26234944, + "step": 38915 + }, + { + "epoch": 0.9508220750983314, + "grad_norm": 25.77239990234375, + "learning_rate": 1.9508866645360018e-06, + "loss": 0.1077, + "num_input_tokens_seen": 26238208, + "step": 38920 + }, + { + "epoch": 0.9509442259301786, + "grad_norm": 44.907447814941406, + "learning_rate": 1.9508602643137593e-06, + "loss": 0.1287, + "num_input_tokens_seen": 26241344, + "step": 38925 + }, + { + "epoch": 0.9510663767620258, + "grad_norm": 25.440441131591797, + "learning_rate": 1.950833857176605e-06, + "loss": 0.0629, + "num_input_tokens_seen": 26244544, + "step": 38930 + }, + { + "epoch": 0.951188527593873, + "grad_norm": 2.791712999343872, + "learning_rate": 1.9508074431247316e-06, + "loss": 0.0859, + "num_input_tokens_seen": 26247744, + "step": 38935 + }, + { + "epoch": 0.95131067842572, + "grad_norm": 26.376630783081055, + "learning_rate": 1.950781022158331e-06, + "loss": 0.1654, + "num_input_tokens_seen": 26251392, + "step": 38940 + }, + { + "epoch": 0.9514328292575672, + "grad_norm": 8.187178611755371, + "learning_rate": 1.950754594277594e-06, + "loss": 0.0961, + "num_input_tokens_seen": 26254592, + "step": 38945 + }, + { + "epoch": 0.9515549800894144, + "grad_norm": 0.9078636765480042, + "learning_rate": 1.9507281594827147e-06, + "loss": 0.0185, + "num_input_tokens_seen": 26258176, + "step": 38950 + }, + { + "epoch": 0.9516771309212616, + "grad_norm": 1.331112027168274, + "learning_rate": 1.9507017177738845e-06, + "loss": 0.0591, + "num_input_tokens_seen": 26261888, + "step": 38955 + }, + { + "epoch": 0.9517992817531087, + "grad_norm": 0.7054332494735718, + "learning_rate": 1.9506752691512955e-06, + "loss": 0.0263, + "num_input_tokens_seen": 26264768, + "step": 38960 + }, + { + "epoch": 0.9519214325849559, + "grad_norm": 1.5211477279663086, + "learning_rate": 1.9506488136151403e-06, + "loss": 0.0116, + "num_input_tokens_seen": 26268096, + "step": 38965 + }, + { + "epoch": 0.9520435834168031, + "grad_norm": 39.79738235473633, + "learning_rate": 1.9506223511656113e-06, + "loss": 0.3299, + "num_input_tokens_seen": 26271488, + "step": 38970 + }, + { + "epoch": 0.9521657342486503, + "grad_norm": 0.21544885635375977, + "learning_rate": 1.9505958818029006e-06, + "loss": 0.2123, + "num_input_tokens_seen": 26274624, + "step": 38975 + }, + { + "epoch": 0.9522878850804974, + "grad_norm": 2.099090337753296, + "learning_rate": 1.9505694055272012e-06, + "loss": 0.0553, + "num_input_tokens_seen": 26277824, + "step": 38980 + }, + { + "epoch": 0.9524100359123445, + "grad_norm": 31.99783706665039, + "learning_rate": 1.9505429223387055e-06, + "loss": 0.0606, + "num_input_tokens_seen": 26280960, + "step": 38985 + }, + { + "epoch": 0.9525321867441917, + "grad_norm": 1.8545265197753906, + "learning_rate": 1.9505164322376056e-06, + "loss": 0.1311, + "num_input_tokens_seen": 26284416, + "step": 38990 + }, + { + "epoch": 0.9526543375760389, + "grad_norm": 24.32158660888672, + "learning_rate": 1.950489935224095e-06, + "loss": 0.2482, + "num_input_tokens_seen": 26287872, + "step": 38995 + }, + { + "epoch": 0.9527764884078861, + "grad_norm": 15.769022941589355, + "learning_rate": 1.9504634312983655e-06, + "loss": 0.1956, + "num_input_tokens_seen": 26291136, + "step": 39000 + }, + { + "epoch": 0.9528986392397332, + "grad_norm": 15.915133476257324, + "learning_rate": 1.9504369204606107e-06, + "loss": 0.1214, + "num_input_tokens_seen": 26294208, + "step": 39005 + }, + { + "epoch": 0.9530207900715804, + "grad_norm": 21.899938583374023, + "learning_rate": 1.950410402711023e-06, + "loss": 0.1255, + "num_input_tokens_seen": 26297024, + "step": 39010 + }, + { + "epoch": 0.9531429409034275, + "grad_norm": 10.060094833374023, + "learning_rate": 1.950383878049795e-06, + "loss": 0.0935, + "num_input_tokens_seen": 26300736, + "step": 39015 + }, + { + "epoch": 0.9532650917352747, + "grad_norm": 24.96803855895996, + "learning_rate": 1.9503573464771197e-06, + "loss": 0.0614, + "num_input_tokens_seen": 26304128, + "step": 39020 + }, + { + "epoch": 0.9533872425671219, + "grad_norm": 0.47129759192466736, + "learning_rate": 1.9503308079931904e-06, + "loss": 0.081, + "num_input_tokens_seen": 26307264, + "step": 39025 + }, + { + "epoch": 0.953509393398969, + "grad_norm": 2.5152175426483154, + "learning_rate": 1.9503042625981994e-06, + "loss": 0.1635, + "num_input_tokens_seen": 26310784, + "step": 39030 + }, + { + "epoch": 0.9536315442308162, + "grad_norm": 19.16029930114746, + "learning_rate": 1.9502777102923407e-06, + "loss": 0.1404, + "num_input_tokens_seen": 26313856, + "step": 39035 + }, + { + "epoch": 0.9537536950626634, + "grad_norm": 1.4574499130249023, + "learning_rate": 1.950251151075807e-06, + "loss": 0.038, + "num_input_tokens_seen": 26317312, + "step": 39040 + }, + { + "epoch": 0.9538758458945106, + "grad_norm": 0.34809938073158264, + "learning_rate": 1.950224584948791e-06, + "loss": 0.2091, + "num_input_tokens_seen": 26320704, + "step": 39045 + }, + { + "epoch": 0.9539979967263577, + "grad_norm": 0.9987199902534485, + "learning_rate": 1.9501980119114863e-06, + "loss": 0.0838, + "num_input_tokens_seen": 26323968, + "step": 39050 + }, + { + "epoch": 0.9541201475582048, + "grad_norm": 0.7526427507400513, + "learning_rate": 1.9501714319640863e-06, + "loss": 0.0687, + "num_input_tokens_seen": 26327232, + "step": 39055 + }, + { + "epoch": 0.954242298390052, + "grad_norm": 8.890820503234863, + "learning_rate": 1.950144845106784e-06, + "loss": 0.1605, + "num_input_tokens_seen": 26330432, + "step": 39060 + }, + { + "epoch": 0.9543644492218992, + "grad_norm": 3.9952099323272705, + "learning_rate": 1.950118251339773e-06, + "loss": 0.1054, + "num_input_tokens_seen": 26333760, + "step": 39065 + }, + { + "epoch": 0.9544866000537464, + "grad_norm": 3.4970591068267822, + "learning_rate": 1.950091650663246e-06, + "loss": 0.0807, + "num_input_tokens_seen": 26337280, + "step": 39070 + }, + { + "epoch": 0.9546087508855935, + "grad_norm": 0.29328471422195435, + "learning_rate": 1.950065043077397e-06, + "loss": 0.0553, + "num_input_tokens_seen": 26340800, + "step": 39075 + }, + { + "epoch": 0.9547309017174407, + "grad_norm": 0.6970962882041931, + "learning_rate": 1.95003842858242e-06, + "loss": 0.0795, + "num_input_tokens_seen": 26344320, + "step": 39080 + }, + { + "epoch": 0.9548530525492879, + "grad_norm": 4.4035444259643555, + "learning_rate": 1.9500118071785072e-06, + "loss": 0.1429, + "num_input_tokens_seen": 26348864, + "step": 39085 + }, + { + "epoch": 0.9549752033811351, + "grad_norm": 39.284542083740234, + "learning_rate": 1.949985178865854e-06, + "loss": 0.0758, + "num_input_tokens_seen": 26352192, + "step": 39090 + }, + { + "epoch": 0.9550973542129821, + "grad_norm": 0.41842687129974365, + "learning_rate": 1.9499585436446522e-06, + "loss": 0.0745, + "num_input_tokens_seen": 26355136, + "step": 39095 + }, + { + "epoch": 0.9552195050448293, + "grad_norm": 39.18877410888672, + "learning_rate": 1.949931901515097e-06, + "loss": 0.1496, + "num_input_tokens_seen": 26358464, + "step": 39100 + }, + { + "epoch": 0.9553416558766765, + "grad_norm": 3.0865681171417236, + "learning_rate": 1.949905252477381e-06, + "loss": 0.0301, + "num_input_tokens_seen": 26362112, + "step": 39105 + }, + { + "epoch": 0.9554638067085237, + "grad_norm": 43.33049774169922, + "learning_rate": 1.949878596531699e-06, + "loss": 0.2, + "num_input_tokens_seen": 26365824, + "step": 39110 + }, + { + "epoch": 0.9555859575403709, + "grad_norm": 26.941396713256836, + "learning_rate": 1.9498519336782445e-06, + "loss": 0.0912, + "num_input_tokens_seen": 26369280, + "step": 39115 + }, + { + "epoch": 0.955708108372218, + "grad_norm": 56.721160888671875, + "learning_rate": 1.9498252639172107e-06, + "loss": 0.0706, + "num_input_tokens_seen": 26372672, + "step": 39120 + }, + { + "epoch": 0.9558302592040652, + "grad_norm": 8.66711139678955, + "learning_rate": 1.9497985872487926e-06, + "loss": 0.1052, + "num_input_tokens_seen": 26376000, + "step": 39125 + }, + { + "epoch": 0.9559524100359124, + "grad_norm": 0.9714411497116089, + "learning_rate": 1.949771903673183e-06, + "loss": 0.1322, + "num_input_tokens_seen": 26379264, + "step": 39130 + }, + { + "epoch": 0.9560745608677595, + "grad_norm": 0.6735852360725403, + "learning_rate": 1.949745213190577e-06, + "loss": 0.1274, + "num_input_tokens_seen": 26382592, + "step": 39135 + }, + { + "epoch": 0.9561967116996066, + "grad_norm": 1.7462108135223389, + "learning_rate": 1.9497185158011687e-06, + "loss": 0.0558, + "num_input_tokens_seen": 26385536, + "step": 39140 + }, + { + "epoch": 0.9563188625314538, + "grad_norm": 42.575992584228516, + "learning_rate": 1.9496918115051516e-06, + "loss": 0.0973, + "num_input_tokens_seen": 26389248, + "step": 39145 + }, + { + "epoch": 0.956441013363301, + "grad_norm": 22.976924896240234, + "learning_rate": 1.9496651003027204e-06, + "loss": 0.1796, + "num_input_tokens_seen": 26392384, + "step": 39150 + }, + { + "epoch": 0.9565631641951482, + "grad_norm": 12.775447845458984, + "learning_rate": 1.949638382194069e-06, + "loss": 0.1227, + "num_input_tokens_seen": 26395520, + "step": 39155 + }, + { + "epoch": 0.9566853150269954, + "grad_norm": 7.335485458374023, + "learning_rate": 1.949611657179392e-06, + "loss": 0.0913, + "num_input_tokens_seen": 26399104, + "step": 39160 + }, + { + "epoch": 0.9568074658588425, + "grad_norm": 0.8135385513305664, + "learning_rate": 1.9495849252588835e-06, + "loss": 0.0726, + "num_input_tokens_seen": 26402560, + "step": 39165 + }, + { + "epoch": 0.9569296166906897, + "grad_norm": 0.200018972158432, + "learning_rate": 1.9495581864327378e-06, + "loss": 0.2176, + "num_input_tokens_seen": 26406272, + "step": 39170 + }, + { + "epoch": 0.9570517675225368, + "grad_norm": 5.996309280395508, + "learning_rate": 1.94953144070115e-06, + "loss": 0.1195, + "num_input_tokens_seen": 26409536, + "step": 39175 + }, + { + "epoch": 0.957173918354384, + "grad_norm": 33.560302734375, + "learning_rate": 1.949504688064314e-06, + "loss": 0.0522, + "num_input_tokens_seen": 26412736, + "step": 39180 + }, + { + "epoch": 0.9572960691862311, + "grad_norm": 7.365058898925781, + "learning_rate": 1.949477928522424e-06, + "loss": 0.1914, + "num_input_tokens_seen": 26416320, + "step": 39185 + }, + { + "epoch": 0.9574182200180783, + "grad_norm": 4.897334575653076, + "learning_rate": 1.949451162075676e-06, + "loss": 0.0892, + "num_input_tokens_seen": 26419456, + "step": 39190 + }, + { + "epoch": 0.9575403708499255, + "grad_norm": 0.699462354183197, + "learning_rate": 1.9494243887242634e-06, + "loss": 0.0619, + "num_input_tokens_seen": 26423488, + "step": 39195 + }, + { + "epoch": 0.9576625216817727, + "grad_norm": 0.3113349378108978, + "learning_rate": 1.9493976084683814e-06, + "loss": 0.136, + "num_input_tokens_seen": 26427264, + "step": 39200 + }, + { + "epoch": 0.9577846725136199, + "grad_norm": 0.5005393028259277, + "learning_rate": 1.949370821308224e-06, + "loss": 0.0504, + "num_input_tokens_seen": 26430592, + "step": 39205 + }, + { + "epoch": 0.957906823345467, + "grad_norm": 24.41374969482422, + "learning_rate": 1.9493440272439873e-06, + "loss": 0.0703, + "num_input_tokens_seen": 26433856, + "step": 39210 + }, + { + "epoch": 0.9580289741773141, + "grad_norm": 20.471982955932617, + "learning_rate": 1.9493172262758656e-06, + "loss": 0.1319, + "num_input_tokens_seen": 26436992, + "step": 39215 + }, + { + "epoch": 0.9581511250091613, + "grad_norm": 34.05270767211914, + "learning_rate": 1.9492904184040532e-06, + "loss": 0.1383, + "num_input_tokens_seen": 26440128, + "step": 39220 + }, + { + "epoch": 0.9582732758410085, + "grad_norm": 6.32810640335083, + "learning_rate": 1.9492636036287457e-06, + "loss": 0.0417, + "num_input_tokens_seen": 26443584, + "step": 39225 + }, + { + "epoch": 0.9583954266728556, + "grad_norm": 22.411970138549805, + "learning_rate": 1.9492367819501383e-06, + "loss": 0.0836, + "num_input_tokens_seen": 26446912, + "step": 39230 + }, + { + "epoch": 0.9585175775047028, + "grad_norm": 18.24030876159668, + "learning_rate": 1.9492099533684254e-06, + "loss": 0.0688, + "num_input_tokens_seen": 26449792, + "step": 39235 + }, + { + "epoch": 0.95863972833655, + "grad_norm": 31.872873306274414, + "learning_rate": 1.949183117883802e-06, + "loss": 0.1123, + "num_input_tokens_seen": 26453120, + "step": 39240 + }, + { + "epoch": 0.9587618791683972, + "grad_norm": 22.10487174987793, + "learning_rate": 1.9491562754964644e-06, + "loss": 0.1026, + "num_input_tokens_seen": 26456384, + "step": 39245 + }, + { + "epoch": 0.9588840300002442, + "grad_norm": 0.6037797331809998, + "learning_rate": 1.949129426206607e-06, + "loss": 0.1303, + "num_input_tokens_seen": 26460352, + "step": 39250 + }, + { + "epoch": 0.9590061808320914, + "grad_norm": 67.08470916748047, + "learning_rate": 1.949102570014425e-06, + "loss": 0.1797, + "num_input_tokens_seen": 26463680, + "step": 39255 + }, + { + "epoch": 0.9591283316639386, + "grad_norm": 35.059085845947266, + "learning_rate": 1.9490757069201135e-06, + "loss": 0.0993, + "num_input_tokens_seen": 26466816, + "step": 39260 + }, + { + "epoch": 0.9592504824957858, + "grad_norm": 19.97450065612793, + "learning_rate": 1.9490488369238686e-06, + "loss": 0.1357, + "num_input_tokens_seen": 26470016, + "step": 39265 + }, + { + "epoch": 0.959372633327633, + "grad_norm": 0.4946680963039398, + "learning_rate": 1.949021960025885e-06, + "loss": 0.057, + "num_input_tokens_seen": 26473344, + "step": 39270 + }, + { + "epoch": 0.9594947841594801, + "grad_norm": 17.229921340942383, + "learning_rate": 1.9489950762263584e-06, + "loss": 0.0984, + "num_input_tokens_seen": 26476800, + "step": 39275 + }, + { + "epoch": 0.9596169349913273, + "grad_norm": 14.1148681640625, + "learning_rate": 1.948968185525485e-06, + "loss": 0.114, + "num_input_tokens_seen": 26480384, + "step": 39280 + }, + { + "epoch": 0.9597390858231745, + "grad_norm": 0.3873974680900574, + "learning_rate": 1.9489412879234587e-06, + "loss": 0.0964, + "num_input_tokens_seen": 26483584, + "step": 39285 + }, + { + "epoch": 0.9598612366550217, + "grad_norm": 11.539669036865234, + "learning_rate": 1.9489143834204768e-06, + "loss": 0.0955, + "num_input_tokens_seen": 26487168, + "step": 39290 + }, + { + "epoch": 0.9599833874868687, + "grad_norm": 0.8252934813499451, + "learning_rate": 1.948887472016734e-06, + "loss": 0.2023, + "num_input_tokens_seen": 26490624, + "step": 39295 + }, + { + "epoch": 0.9601055383187159, + "grad_norm": 0.29018840193748474, + "learning_rate": 1.9488605537124267e-06, + "loss": 0.0522, + "num_input_tokens_seen": 26493952, + "step": 39300 + }, + { + "epoch": 0.9602276891505631, + "grad_norm": 2.5185937881469727, + "learning_rate": 1.94883362850775e-06, + "loss": 0.0684, + "num_input_tokens_seen": 26497216, + "step": 39305 + }, + { + "epoch": 0.9603498399824103, + "grad_norm": 3.989112377166748, + "learning_rate": 1.9488066964029e-06, + "loss": 0.1094, + "num_input_tokens_seen": 26500288, + "step": 39310 + }, + { + "epoch": 0.9604719908142575, + "grad_norm": 11.158829689025879, + "learning_rate": 1.948779757398072e-06, + "loss": 0.0557, + "num_input_tokens_seen": 26503680, + "step": 39315 + }, + { + "epoch": 0.9605941416461046, + "grad_norm": 33.47761917114258, + "learning_rate": 1.948752811493463e-06, + "loss": 0.1563, + "num_input_tokens_seen": 26506944, + "step": 39320 + }, + { + "epoch": 0.9607162924779518, + "grad_norm": 21.618824005126953, + "learning_rate": 1.9487258586892685e-06, + "loss": 0.1214, + "num_input_tokens_seen": 26510272, + "step": 39325 + }, + { + "epoch": 0.960838443309799, + "grad_norm": 0.47546103596687317, + "learning_rate": 1.948698898985684e-06, + "loss": 0.1588, + "num_input_tokens_seen": 26513408, + "step": 39330 + }, + { + "epoch": 0.9609605941416461, + "grad_norm": 3.9736697673797607, + "learning_rate": 1.948671932382906e-06, + "loss": 0.1465, + "num_input_tokens_seen": 26516736, + "step": 39335 + }, + { + "epoch": 0.9610827449734932, + "grad_norm": 11.857345581054688, + "learning_rate": 1.9486449588811304e-06, + "loss": 0.0844, + "num_input_tokens_seen": 26520384, + "step": 39340 + }, + { + "epoch": 0.9612048958053404, + "grad_norm": 18.82761001586914, + "learning_rate": 1.948617978480554e-06, + "loss": 0.0408, + "num_input_tokens_seen": 26523776, + "step": 39345 + }, + { + "epoch": 0.9613270466371876, + "grad_norm": 38.90220642089844, + "learning_rate": 1.9485909911813717e-06, + "loss": 0.1468, + "num_input_tokens_seen": 26527616, + "step": 39350 + }, + { + "epoch": 0.9614491974690348, + "grad_norm": 5.618608474731445, + "learning_rate": 1.9485639969837815e-06, + "loss": 0.0336, + "num_input_tokens_seen": 26530880, + "step": 39355 + }, + { + "epoch": 0.961571348300882, + "grad_norm": 1.9378278255462646, + "learning_rate": 1.948536995887978e-06, + "loss": 0.1567, + "num_input_tokens_seen": 26533952, + "step": 39360 + }, + { + "epoch": 0.9616934991327291, + "grad_norm": 0.18136677145957947, + "learning_rate": 1.948509987894159e-06, + "loss": 0.0393, + "num_input_tokens_seen": 26537216, + "step": 39365 + }, + { + "epoch": 0.9618156499645762, + "grad_norm": 16.893386840820312, + "learning_rate": 1.9484829730025195e-06, + "loss": 0.1268, + "num_input_tokens_seen": 26540672, + "step": 39370 + }, + { + "epoch": 0.9619378007964234, + "grad_norm": 11.266032218933105, + "learning_rate": 1.9484559512132575e-06, + "loss": 0.2364, + "num_input_tokens_seen": 26544000, + "step": 39375 + }, + { + "epoch": 0.9620599516282706, + "grad_norm": 0.9606216549873352, + "learning_rate": 1.948428922526568e-06, + "loss": 0.1005, + "num_input_tokens_seen": 26547264, + "step": 39380 + }, + { + "epoch": 0.9621821024601177, + "grad_norm": 1.0269383192062378, + "learning_rate": 1.9484018869426487e-06, + "loss": 0.0725, + "num_input_tokens_seen": 26550528, + "step": 39385 + }, + { + "epoch": 0.9623042532919649, + "grad_norm": 13.481785774230957, + "learning_rate": 1.9483748444616957e-06, + "loss": 0.1236, + "num_input_tokens_seen": 26553856, + "step": 39390 + }, + { + "epoch": 0.9624264041238121, + "grad_norm": 1.561326265335083, + "learning_rate": 1.9483477950839057e-06, + "loss": 0.1777, + "num_input_tokens_seen": 26557248, + "step": 39395 + }, + { + "epoch": 0.9625485549556593, + "grad_norm": 28.832462310791016, + "learning_rate": 1.9483207388094756e-06, + "loss": 0.1267, + "num_input_tokens_seen": 26560960, + "step": 39400 + }, + { + "epoch": 0.9626707057875065, + "grad_norm": 15.7662992477417, + "learning_rate": 1.948293675638602e-06, + "loss": 0.1065, + "num_input_tokens_seen": 26564288, + "step": 39405 + }, + { + "epoch": 0.9627928566193535, + "grad_norm": 14.944591522216797, + "learning_rate": 1.9482666055714816e-06, + "loss": 0.0593, + "num_input_tokens_seen": 26567616, + "step": 39410 + }, + { + "epoch": 0.9629150074512007, + "grad_norm": 7.434094429016113, + "learning_rate": 1.9482395286083116e-06, + "loss": 0.091, + "num_input_tokens_seen": 26570752, + "step": 39415 + }, + { + "epoch": 0.9630371582830479, + "grad_norm": 11.297898292541504, + "learning_rate": 1.948212444749289e-06, + "loss": 0.1468, + "num_input_tokens_seen": 26573760, + "step": 39420 + }, + { + "epoch": 0.9631593091148951, + "grad_norm": 48.35212326049805, + "learning_rate": 1.9481853539946098e-06, + "loss": 0.192, + "num_input_tokens_seen": 26576832, + "step": 39425 + }, + { + "epoch": 0.9632814599467422, + "grad_norm": 19.07402801513672, + "learning_rate": 1.948158256344472e-06, + "loss": 0.109, + "num_input_tokens_seen": 26580608, + "step": 39430 + }, + { + "epoch": 0.9634036107785894, + "grad_norm": 24.01344108581543, + "learning_rate": 1.948131151799072e-06, + "loss": 0.085, + "num_input_tokens_seen": 26583488, + "step": 39435 + }, + { + "epoch": 0.9635257616104366, + "grad_norm": 16.847387313842773, + "learning_rate": 1.9481040403586074e-06, + "loss": 0.1276, + "num_input_tokens_seen": 26587200, + "step": 39440 + }, + { + "epoch": 0.9636479124422838, + "grad_norm": 12.600052833557129, + "learning_rate": 1.948076922023275e-06, + "loss": 0.0837, + "num_input_tokens_seen": 26590656, + "step": 39445 + }, + { + "epoch": 0.963770063274131, + "grad_norm": 4.261289596557617, + "learning_rate": 1.948049796793273e-06, + "loss": 0.1127, + "num_input_tokens_seen": 26593984, + "step": 39450 + }, + { + "epoch": 0.963892214105978, + "grad_norm": 28.10150909423828, + "learning_rate": 1.9480226646687976e-06, + "loss": 0.0991, + "num_input_tokens_seen": 26597376, + "step": 39455 + }, + { + "epoch": 0.9640143649378252, + "grad_norm": 29.681474685668945, + "learning_rate": 1.947995525650046e-06, + "loss": 0.0466, + "num_input_tokens_seen": 26600256, + "step": 39460 + }, + { + "epoch": 0.9641365157696724, + "grad_norm": 24.66652488708496, + "learning_rate": 1.947968379737216e-06, + "loss": 0.1007, + "num_input_tokens_seen": 26603904, + "step": 39465 + }, + { + "epoch": 0.9642586666015196, + "grad_norm": 0.7605220079421997, + "learning_rate": 1.947941226930505e-06, + "loss": 0.1234, + "num_input_tokens_seen": 26607552, + "step": 39470 + }, + { + "epoch": 0.9643808174333667, + "grad_norm": 0.7828469276428223, + "learning_rate": 1.947914067230111e-06, + "loss": 0.1156, + "num_input_tokens_seen": 26610688, + "step": 39475 + }, + { + "epoch": 0.9645029682652139, + "grad_norm": 1.205389142036438, + "learning_rate": 1.9478869006362305e-06, + "loss": 0.0695, + "num_input_tokens_seen": 26614016, + "step": 39480 + }, + { + "epoch": 0.9646251190970611, + "grad_norm": 1.7988033294677734, + "learning_rate": 1.9478597271490614e-06, + "loss": 0.0485, + "num_input_tokens_seen": 26617408, + "step": 39485 + }, + { + "epoch": 0.9647472699289082, + "grad_norm": 7.042235851287842, + "learning_rate": 1.9478325467688013e-06, + "loss": 0.0856, + "num_input_tokens_seen": 26620736, + "step": 39490 + }, + { + "epoch": 0.9648694207607553, + "grad_norm": 0.9602611660957336, + "learning_rate": 1.9478053594956484e-06, + "loss": 0.1242, + "num_input_tokens_seen": 26624512, + "step": 39495 + }, + { + "epoch": 0.9649915715926025, + "grad_norm": 2.938580274581909, + "learning_rate": 1.9477781653297996e-06, + "loss": 0.1077, + "num_input_tokens_seen": 26627776, + "step": 39500 + }, + { + "epoch": 0.9651137224244497, + "grad_norm": 43.11700439453125, + "learning_rate": 1.9477509642714535e-06, + "loss": 0.1278, + "num_input_tokens_seen": 26631552, + "step": 39505 + }, + { + "epoch": 0.9652358732562969, + "grad_norm": 20.765621185302734, + "learning_rate": 1.947723756320807e-06, + "loss": 0.1753, + "num_input_tokens_seen": 26634560, + "step": 39510 + }, + { + "epoch": 0.9653580240881441, + "grad_norm": 20.449045181274414, + "learning_rate": 1.9476965414780587e-06, + "loss": 0.1075, + "num_input_tokens_seen": 26637632, + "step": 39515 + }, + { + "epoch": 0.9654801749199912, + "grad_norm": 16.410585403442383, + "learning_rate": 1.9476693197434063e-06, + "loss": 0.2478, + "num_input_tokens_seen": 26640832, + "step": 39520 + }, + { + "epoch": 0.9656023257518384, + "grad_norm": 5.675082206726074, + "learning_rate": 1.9476420911170478e-06, + "loss": 0.0597, + "num_input_tokens_seen": 26644096, + "step": 39525 + }, + { + "epoch": 0.9657244765836855, + "grad_norm": 29.463457107543945, + "learning_rate": 1.947614855599181e-06, + "loss": 0.0845, + "num_input_tokens_seen": 26647104, + "step": 39530 + }, + { + "epoch": 0.9658466274155327, + "grad_norm": 11.038426399230957, + "learning_rate": 1.947587613190004e-06, + "loss": 0.0957, + "num_input_tokens_seen": 26650112, + "step": 39535 + }, + { + "epoch": 0.9659687782473798, + "grad_norm": 2.4716062545776367, + "learning_rate": 1.947560363889715e-06, + "loss": 0.0864, + "num_input_tokens_seen": 26653056, + "step": 39540 + }, + { + "epoch": 0.966090929079227, + "grad_norm": 4.964685440063477, + "learning_rate": 1.9475331076985124e-06, + "loss": 0.0976, + "num_input_tokens_seen": 26656576, + "step": 39545 + }, + { + "epoch": 0.9662130799110742, + "grad_norm": 19.059255599975586, + "learning_rate": 1.947505844616594e-06, + "loss": 0.115, + "num_input_tokens_seen": 26659776, + "step": 39550 + }, + { + "epoch": 0.9663352307429214, + "grad_norm": 39.80873107910156, + "learning_rate": 1.9474785746441584e-06, + "loss": 0.1513, + "num_input_tokens_seen": 26663296, + "step": 39555 + }, + { + "epoch": 0.9664573815747686, + "grad_norm": 0.6026878356933594, + "learning_rate": 1.9474512977814034e-06, + "loss": 0.1737, + "num_input_tokens_seen": 26667008, + "step": 39560 + }, + { + "epoch": 0.9665795324066156, + "grad_norm": 11.629830360412598, + "learning_rate": 1.947424014028528e-06, + "loss": 0.116, + "num_input_tokens_seen": 26670464, + "step": 39565 + }, + { + "epoch": 0.9667016832384628, + "grad_norm": 0.3201778829097748, + "learning_rate": 1.9473967233857306e-06, + "loss": 0.0084, + "num_input_tokens_seen": 26673792, + "step": 39570 + }, + { + "epoch": 0.96682383407031, + "grad_norm": 27.04475212097168, + "learning_rate": 1.947369425853209e-06, + "loss": 0.2801, + "num_input_tokens_seen": 26676928, + "step": 39575 + }, + { + "epoch": 0.9669459849021572, + "grad_norm": 0.3024914264678955, + "learning_rate": 1.9473421214311624e-06, + "loss": 0.0528, + "num_input_tokens_seen": 26680256, + "step": 39580 + }, + { + "epoch": 0.9670681357340043, + "grad_norm": 12.081028938293457, + "learning_rate": 1.947314810119789e-06, + "loss": 0.1016, + "num_input_tokens_seen": 26683456, + "step": 39585 + }, + { + "epoch": 0.9671902865658515, + "grad_norm": 9.354941368103027, + "learning_rate": 1.947287491919287e-06, + "loss": 0.1592, + "num_input_tokens_seen": 26687040, + "step": 39590 + }, + { + "epoch": 0.9673124373976987, + "grad_norm": 5.755664348602295, + "learning_rate": 1.947260166829856e-06, + "loss": 0.037, + "num_input_tokens_seen": 26690880, + "step": 39595 + }, + { + "epoch": 0.9674345882295459, + "grad_norm": 0.8709821105003357, + "learning_rate": 1.9472328348516942e-06, + "loss": 0.1352, + "num_input_tokens_seen": 26694400, + "step": 39600 + }, + { + "epoch": 0.9675567390613931, + "grad_norm": 0.0869455561041832, + "learning_rate": 1.947205495985001e-06, + "loss": 0.1238, + "num_input_tokens_seen": 26697664, + "step": 39605 + }, + { + "epoch": 0.9676788898932401, + "grad_norm": 18.37799072265625, + "learning_rate": 1.947178150229974e-06, + "loss": 0.0828, + "num_input_tokens_seen": 26700672, + "step": 39610 + }, + { + "epoch": 0.9678010407250873, + "grad_norm": 15.37241268157959, + "learning_rate": 1.9471507975868133e-06, + "loss": 0.0915, + "num_input_tokens_seen": 26703424, + "step": 39615 + }, + { + "epoch": 0.9679231915569345, + "grad_norm": 29.29631996154785, + "learning_rate": 1.9471234380557166e-06, + "loss": 0.0577, + "num_input_tokens_seen": 26706496, + "step": 39620 + }, + { + "epoch": 0.9680453423887817, + "grad_norm": 16.976232528686523, + "learning_rate": 1.947096071636884e-06, + "loss": 0.0826, + "num_input_tokens_seen": 26710272, + "step": 39625 + }, + { + "epoch": 0.9681674932206288, + "grad_norm": 0.2124803066253662, + "learning_rate": 1.9470686983305137e-06, + "loss": 0.2906, + "num_input_tokens_seen": 26713344, + "step": 39630 + }, + { + "epoch": 0.968289644052476, + "grad_norm": 14.788725852966309, + "learning_rate": 1.9470413181368055e-06, + "loss": 0.0887, + "num_input_tokens_seen": 26717056, + "step": 39635 + }, + { + "epoch": 0.9684117948843232, + "grad_norm": 2.2631194591522217, + "learning_rate": 1.9470139310559575e-06, + "loss": 0.1129, + "num_input_tokens_seen": 26720448, + "step": 39640 + }, + { + "epoch": 0.9685339457161704, + "grad_norm": 0.33006513118743896, + "learning_rate": 1.9469865370881697e-06, + "loss": 0.1292, + "num_input_tokens_seen": 26723584, + "step": 39645 + }, + { + "epoch": 0.9686560965480175, + "grad_norm": 37.54838180541992, + "learning_rate": 1.946959136233641e-06, + "loss": 0.1745, + "num_input_tokens_seen": 26726656, + "step": 39650 + }, + { + "epoch": 0.9687782473798646, + "grad_norm": 2.1035213470458984, + "learning_rate": 1.946931728492571e-06, + "loss": 0.0521, + "num_input_tokens_seen": 26729728, + "step": 39655 + }, + { + "epoch": 0.9689003982117118, + "grad_norm": 15.783363342285156, + "learning_rate": 1.9469043138651593e-06, + "loss": 0.1553, + "num_input_tokens_seen": 26733312, + "step": 39660 + }, + { + "epoch": 0.969022549043559, + "grad_norm": 10.670489311218262, + "learning_rate": 1.9468768923516038e-06, + "loss": 0.1097, + "num_input_tokens_seen": 26736832, + "step": 39665 + }, + { + "epoch": 0.9691446998754062, + "grad_norm": 27.422231674194336, + "learning_rate": 1.9468494639521054e-06, + "loss": 0.1189, + "num_input_tokens_seen": 26740352, + "step": 39670 + }, + { + "epoch": 0.9692668507072533, + "grad_norm": 0.7498286962509155, + "learning_rate": 1.9468220286668627e-06, + "loss": 0.0273, + "num_input_tokens_seen": 26743680, + "step": 39675 + }, + { + "epoch": 0.9693890015391005, + "grad_norm": 12.412466049194336, + "learning_rate": 1.9467945864960756e-06, + "loss": 0.1094, + "num_input_tokens_seen": 26747072, + "step": 39680 + }, + { + "epoch": 0.9695111523709476, + "grad_norm": 15.174398422241211, + "learning_rate": 1.946767137439944e-06, + "loss": 0.075, + "num_input_tokens_seen": 26749824, + "step": 39685 + }, + { + "epoch": 0.9696333032027948, + "grad_norm": 5.013666152954102, + "learning_rate": 1.9467396814986667e-06, + "loss": 0.0608, + "num_input_tokens_seen": 26753792, + "step": 39690 + }, + { + "epoch": 0.969755454034642, + "grad_norm": 0.363828182220459, + "learning_rate": 1.946712218672444e-06, + "loss": 0.0149, + "num_input_tokens_seen": 26757056, + "step": 39695 + }, + { + "epoch": 0.9698776048664891, + "grad_norm": 17.466541290283203, + "learning_rate": 1.9466847489614752e-06, + "loss": 0.1228, + "num_input_tokens_seen": 26760512, + "step": 39700 + }, + { + "epoch": 0.9699997556983363, + "grad_norm": 0.7071266174316406, + "learning_rate": 1.9466572723659605e-06, + "loss": 0.0055, + "num_input_tokens_seen": 26763712, + "step": 39705 + }, + { + "epoch": 0.9701219065301835, + "grad_norm": 16.308212280273438, + "learning_rate": 1.9466297888860996e-06, + "loss": 0.0845, + "num_input_tokens_seen": 26766848, + "step": 39710 + }, + { + "epoch": 0.9702440573620307, + "grad_norm": 8.815410614013672, + "learning_rate": 1.9466022985220923e-06, + "loss": 0.2151, + "num_input_tokens_seen": 26770240, + "step": 39715 + }, + { + "epoch": 0.9703662081938778, + "grad_norm": 22.193788528442383, + "learning_rate": 1.946574801274138e-06, + "loss": 0.1373, + "num_input_tokens_seen": 26773504, + "step": 39720 + }, + { + "epoch": 0.970488359025725, + "grad_norm": 0.9276455044746399, + "learning_rate": 1.9465472971424373e-06, + "loss": 0.0279, + "num_input_tokens_seen": 26776960, + "step": 39725 + }, + { + "epoch": 0.9706105098575721, + "grad_norm": 0.254749059677124, + "learning_rate": 1.9465197861271904e-06, + "loss": 0.1373, + "num_input_tokens_seen": 26780224, + "step": 39730 + }, + { + "epoch": 0.9707326606894193, + "grad_norm": 4.772306442260742, + "learning_rate": 1.9464922682285966e-06, + "loss": 0.0407, + "num_input_tokens_seen": 26783936, + "step": 39735 + }, + { + "epoch": 0.9708548115212665, + "grad_norm": 21.679712295532227, + "learning_rate": 1.946464743446857e-06, + "loss": 0.1103, + "num_input_tokens_seen": 26787136, + "step": 39740 + }, + { + "epoch": 0.9709769623531136, + "grad_norm": 0.8357428312301636, + "learning_rate": 1.9464372117821707e-06, + "loss": 0.0649, + "num_input_tokens_seen": 26790656, + "step": 39745 + }, + { + "epoch": 0.9710991131849608, + "grad_norm": 9.33323860168457, + "learning_rate": 1.9464096732347386e-06, + "loss": 0.1237, + "num_input_tokens_seen": 26793856, + "step": 39750 + }, + { + "epoch": 0.971221264016808, + "grad_norm": 0.6812987923622131, + "learning_rate": 1.9463821278047607e-06, + "loss": 0.0979, + "num_input_tokens_seen": 26797312, + "step": 39755 + }, + { + "epoch": 0.9713434148486552, + "grad_norm": 0.21187125146389008, + "learning_rate": 1.9463545754924376e-06, + "loss": 0.0288, + "num_input_tokens_seen": 26800896, + "step": 39760 + }, + { + "epoch": 0.9714655656805022, + "grad_norm": 28.24985694885254, + "learning_rate": 1.9463270162979697e-06, + "loss": 0.2784, + "num_input_tokens_seen": 26804032, + "step": 39765 + }, + { + "epoch": 0.9715877165123494, + "grad_norm": 6.990946292877197, + "learning_rate": 1.9462994502215565e-06, + "loss": 0.0278, + "num_input_tokens_seen": 26807424, + "step": 39770 + }, + { + "epoch": 0.9717098673441966, + "grad_norm": 13.497817039489746, + "learning_rate": 1.9462718772634e-06, + "loss": 0.1526, + "num_input_tokens_seen": 26810560, + "step": 39775 + }, + { + "epoch": 0.9718320181760438, + "grad_norm": 19.58252716064453, + "learning_rate": 1.9462442974236996e-06, + "loss": 0.0776, + "num_input_tokens_seen": 26813824, + "step": 39780 + }, + { + "epoch": 0.9719541690078909, + "grad_norm": 14.153072357177734, + "learning_rate": 1.946216710702656e-06, + "loss": 0.1508, + "num_input_tokens_seen": 26817088, + "step": 39785 + }, + { + "epoch": 0.9720763198397381, + "grad_norm": 6.661437034606934, + "learning_rate": 1.94618911710047e-06, + "loss": 0.1146, + "num_input_tokens_seen": 26820224, + "step": 39790 + }, + { + "epoch": 0.9721984706715853, + "grad_norm": 24.22982406616211, + "learning_rate": 1.946161516617342e-06, + "loss": 0.0732, + "num_input_tokens_seen": 26823488, + "step": 39795 + }, + { + "epoch": 0.9723206215034325, + "grad_norm": 38.69376754760742, + "learning_rate": 1.9461339092534733e-06, + "loss": 0.0899, + "num_input_tokens_seen": 26826752, + "step": 39800 + }, + { + "epoch": 0.9724427723352796, + "grad_norm": 35.627620697021484, + "learning_rate": 1.9461062950090645e-06, + "loss": 0.1212, + "num_input_tokens_seen": 26829952, + "step": 39805 + }, + { + "epoch": 0.9725649231671267, + "grad_norm": 12.235479354858398, + "learning_rate": 1.946078673884316e-06, + "loss": 0.0809, + "num_input_tokens_seen": 26832640, + "step": 39810 + }, + { + "epoch": 0.9726870739989739, + "grad_norm": 24.54392433166504, + "learning_rate": 1.9460510458794286e-06, + "loss": 0.0563, + "num_input_tokens_seen": 26835776, + "step": 39815 + }, + { + "epoch": 0.9728092248308211, + "grad_norm": 5.012967109680176, + "learning_rate": 1.9460234109946044e-06, + "loss": 0.1205, + "num_input_tokens_seen": 26838912, + "step": 39820 + }, + { + "epoch": 0.9729313756626683, + "grad_norm": 23.825641632080078, + "learning_rate": 1.9459957692300426e-06, + "loss": 0.0838, + "num_input_tokens_seen": 26842112, + "step": 39825 + }, + { + "epoch": 0.9730535264945154, + "grad_norm": 1.0864224433898926, + "learning_rate": 1.9459681205859457e-06, + "loss": 0.135, + "num_input_tokens_seen": 26845184, + "step": 39830 + }, + { + "epoch": 0.9731756773263626, + "grad_norm": 16.24464988708496, + "learning_rate": 1.945940465062514e-06, + "loss": 0.0454, + "num_input_tokens_seen": 26848704, + "step": 39835 + }, + { + "epoch": 0.9732978281582098, + "grad_norm": 31.012704849243164, + "learning_rate": 1.945912802659949e-06, + "loss": 0.0868, + "num_input_tokens_seen": 26852032, + "step": 39840 + }, + { + "epoch": 0.9734199789900569, + "grad_norm": 37.21706008911133, + "learning_rate": 1.9458851333784514e-06, + "loss": 0.0862, + "num_input_tokens_seen": 26855424, + "step": 39845 + }, + { + "epoch": 0.9735421298219041, + "grad_norm": 30.55225944519043, + "learning_rate": 1.945857457218223e-06, + "loss": 0.1357, + "num_input_tokens_seen": 26858944, + "step": 39850 + }, + { + "epoch": 0.9736642806537512, + "grad_norm": 15.448716163635254, + "learning_rate": 1.945829774179464e-06, + "loss": 0.2237, + "num_input_tokens_seen": 26862208, + "step": 39855 + }, + { + "epoch": 0.9737864314855984, + "grad_norm": 0.22018280625343323, + "learning_rate": 1.9458020842623774e-06, + "loss": 0.0235, + "num_input_tokens_seen": 26865728, + "step": 39860 + }, + { + "epoch": 0.9739085823174456, + "grad_norm": 12.636298179626465, + "learning_rate": 1.9457743874671633e-06, + "loss": 0.0422, + "num_input_tokens_seen": 26869312, + "step": 39865 + }, + { + "epoch": 0.9740307331492928, + "grad_norm": 28.28765869140625, + "learning_rate": 1.9457466837940234e-06, + "loss": 0.1107, + "num_input_tokens_seen": 26872704, + "step": 39870 + }, + { + "epoch": 0.9741528839811399, + "grad_norm": 18.39106559753418, + "learning_rate": 1.9457189732431594e-06, + "loss": 0.1688, + "num_input_tokens_seen": 26876032, + "step": 39875 + }, + { + "epoch": 0.974275034812987, + "grad_norm": 11.407864570617676, + "learning_rate": 1.9456912558147724e-06, + "loss": 0.1856, + "num_input_tokens_seen": 26879360, + "step": 39880 + }, + { + "epoch": 0.9743971856448342, + "grad_norm": 3.375286102294922, + "learning_rate": 1.9456635315090645e-06, + "loss": 0.1668, + "num_input_tokens_seen": 26882752, + "step": 39885 + }, + { + "epoch": 0.9745193364766814, + "grad_norm": 7.9621100425720215, + "learning_rate": 1.945635800326237e-06, + "loss": 0.1114, + "num_input_tokens_seen": 26886272, + "step": 39890 + }, + { + "epoch": 0.9746414873085286, + "grad_norm": 10.258283615112305, + "learning_rate": 1.9456080622664913e-06, + "loss": 0.1352, + "num_input_tokens_seen": 26889344, + "step": 39895 + }, + { + "epoch": 0.9747636381403757, + "grad_norm": 0.11732304841279984, + "learning_rate": 1.94558031733003e-06, + "loss": 0.1157, + "num_input_tokens_seen": 26892544, + "step": 39900 + }, + { + "epoch": 0.9748857889722229, + "grad_norm": 1.7938843965530396, + "learning_rate": 1.9455525655170537e-06, + "loss": 0.0424, + "num_input_tokens_seen": 26895936, + "step": 39905 + }, + { + "epoch": 0.9750079398040701, + "grad_norm": 0.14183376729488373, + "learning_rate": 1.9455248068277653e-06, + "loss": 0.0701, + "num_input_tokens_seen": 26899008, + "step": 39910 + }, + { + "epoch": 0.9751300906359173, + "grad_norm": 19.030942916870117, + "learning_rate": 1.945497041262366e-06, + "loss": 0.1555, + "num_input_tokens_seen": 26902208, + "step": 39915 + }, + { + "epoch": 0.9752522414677643, + "grad_norm": 9.254307746887207, + "learning_rate": 1.945469268821058e-06, + "loss": 0.1173, + "num_input_tokens_seen": 26905408, + "step": 39920 + }, + { + "epoch": 0.9753743922996115, + "grad_norm": 11.228315353393555, + "learning_rate": 1.945441489504043e-06, + "loss": 0.1268, + "num_input_tokens_seen": 26908608, + "step": 39925 + }, + { + "epoch": 0.9754965431314587, + "grad_norm": 16.239818572998047, + "learning_rate": 1.9454137033115234e-06, + "loss": 0.0952, + "num_input_tokens_seen": 26911680, + "step": 39930 + }, + { + "epoch": 0.9756186939633059, + "grad_norm": 12.501836776733398, + "learning_rate": 1.9453859102437007e-06, + "loss": 0.1024, + "num_input_tokens_seen": 26915264, + "step": 39935 + }, + { + "epoch": 0.9757408447951531, + "grad_norm": 25.5905818939209, + "learning_rate": 1.945358110300778e-06, + "loss": 0.1325, + "num_input_tokens_seen": 26918848, + "step": 39940 + }, + { + "epoch": 0.9758629956270002, + "grad_norm": 15.425862312316895, + "learning_rate": 1.9453303034829563e-06, + "loss": 0.1706, + "num_input_tokens_seen": 26922304, + "step": 39945 + }, + { + "epoch": 0.9759851464588474, + "grad_norm": 1.7378052473068237, + "learning_rate": 1.9453024897904387e-06, + "loss": 0.0871, + "num_input_tokens_seen": 26925952, + "step": 39950 + }, + { + "epoch": 0.9761072972906946, + "grad_norm": 25.161956787109375, + "learning_rate": 1.9452746692234267e-06, + "loss": 0.2264, + "num_input_tokens_seen": 26929280, + "step": 39955 + }, + { + "epoch": 0.9762294481225418, + "grad_norm": 15.880363464355469, + "learning_rate": 1.9452468417821235e-06, + "loss": 0.0949, + "num_input_tokens_seen": 26932224, + "step": 39960 + }, + { + "epoch": 0.9763515989543888, + "grad_norm": 1.2579472064971924, + "learning_rate": 1.945219007466731e-06, + "loss": 0.1502, + "num_input_tokens_seen": 26935360, + "step": 39965 + }, + { + "epoch": 0.976473749786236, + "grad_norm": 8.38668155670166, + "learning_rate": 1.9451911662774515e-06, + "loss": 0.0737, + "num_input_tokens_seen": 26938944, + "step": 39970 + }, + { + "epoch": 0.9765959006180832, + "grad_norm": 8.972485542297363, + "learning_rate": 1.9451633182144875e-06, + "loss": 0.18, + "num_input_tokens_seen": 26942336, + "step": 39975 + }, + { + "epoch": 0.9767180514499304, + "grad_norm": 10.496006965637207, + "learning_rate": 1.9451354632780418e-06, + "loss": 0.1195, + "num_input_tokens_seen": 26945664, + "step": 39980 + }, + { + "epoch": 0.9768402022817776, + "grad_norm": 0.6256728768348694, + "learning_rate": 1.9451076014683166e-06, + "loss": 0.0777, + "num_input_tokens_seen": 26949568, + "step": 39985 + }, + { + "epoch": 0.9769623531136247, + "grad_norm": 13.370022773742676, + "learning_rate": 1.945079732785515e-06, + "loss": 0.0916, + "num_input_tokens_seen": 26952960, + "step": 39990 + }, + { + "epoch": 0.9770845039454719, + "grad_norm": 22.760894775390625, + "learning_rate": 1.9450518572298394e-06, + "loss": 0.09, + "num_input_tokens_seen": 26955904, + "step": 39995 + }, + { + "epoch": 0.977206654777319, + "grad_norm": 1.959449291229248, + "learning_rate": 1.945023974801492e-06, + "loss": 0.0635, + "num_input_tokens_seen": 26959872, + "step": 40000 + }, + { + "epoch": 0.9773288056091662, + "grad_norm": 0.4411034882068634, + "learning_rate": 1.9449960855006766e-06, + "loss": 0.1043, + "num_input_tokens_seen": 26963264, + "step": 40005 + }, + { + "epoch": 0.9774509564410133, + "grad_norm": 33.63774108886719, + "learning_rate": 1.9449681893275956e-06, + "loss": 0.0797, + "num_input_tokens_seen": 26966528, + "step": 40010 + }, + { + "epoch": 0.9775731072728605, + "grad_norm": 0.423960417509079, + "learning_rate": 1.9449402862824512e-06, + "loss": 0.0364, + "num_input_tokens_seen": 26970048, + "step": 40015 + }, + { + "epoch": 0.9776952581047077, + "grad_norm": 33.858604431152344, + "learning_rate": 1.944912376365447e-06, + "loss": 0.2936, + "num_input_tokens_seen": 26972928, + "step": 40020 + }, + { + "epoch": 0.9778174089365549, + "grad_norm": 0.678591251373291, + "learning_rate": 1.9448844595767865e-06, + "loss": 0.0958, + "num_input_tokens_seen": 26976256, + "step": 40025 + }, + { + "epoch": 0.9779395597684021, + "grad_norm": 13.827244758605957, + "learning_rate": 1.9448565359166715e-06, + "loss": 0.2362, + "num_input_tokens_seen": 26979648, + "step": 40030 + }, + { + "epoch": 0.9780617106002492, + "grad_norm": 17.79885482788086, + "learning_rate": 1.9448286053853054e-06, + "loss": 0.0897, + "num_input_tokens_seen": 26982848, + "step": 40035 + }, + { + "epoch": 0.9781838614320963, + "grad_norm": 9.928762435913086, + "learning_rate": 1.944800667982892e-06, + "loss": 0.1079, + "num_input_tokens_seen": 26986176, + "step": 40040 + }, + { + "epoch": 0.9783060122639435, + "grad_norm": 13.501751899719238, + "learning_rate": 1.944772723709634e-06, + "loss": 0.1416, + "num_input_tokens_seen": 26989312, + "step": 40045 + }, + { + "epoch": 0.9784281630957907, + "grad_norm": 37.86274719238281, + "learning_rate": 1.9447447725657346e-06, + "loss": 0.0931, + "num_input_tokens_seen": 26992640, + "step": 40050 + }, + { + "epoch": 0.9785503139276378, + "grad_norm": 0.30410051345825195, + "learning_rate": 1.944716814551397e-06, + "loss": 0.081, + "num_input_tokens_seen": 26996032, + "step": 40055 + }, + { + "epoch": 0.978672464759485, + "grad_norm": 19.66057014465332, + "learning_rate": 1.944688849666825e-06, + "loss": 0.052, + "num_input_tokens_seen": 26999360, + "step": 40060 + }, + { + "epoch": 0.9787946155913322, + "grad_norm": 42.28346252441406, + "learning_rate": 1.944660877912221e-06, + "loss": 0.1148, + "num_input_tokens_seen": 27002496, + "step": 40065 + }, + { + "epoch": 0.9789167664231794, + "grad_norm": 4.766364574432373, + "learning_rate": 1.9446328992877896e-06, + "loss": 0.0271, + "num_input_tokens_seen": 27005760, + "step": 40070 + }, + { + "epoch": 0.9790389172550265, + "grad_norm": 7.8795013427734375, + "learning_rate": 1.944604913793733e-06, + "loss": 0.224, + "num_input_tokens_seen": 27009344, + "step": 40075 + }, + { + "epoch": 0.9791610680868736, + "grad_norm": 15.261124610900879, + "learning_rate": 1.944576921430256e-06, + "loss": 0.0777, + "num_input_tokens_seen": 27012480, + "step": 40080 + }, + { + "epoch": 0.9792832189187208, + "grad_norm": 0.24406935274600983, + "learning_rate": 1.944548922197561e-06, + "loss": 0.0154, + "num_input_tokens_seen": 27015552, + "step": 40085 + }, + { + "epoch": 0.979405369750568, + "grad_norm": 17.68901252746582, + "learning_rate": 1.9445209160958526e-06, + "loss": 0.0631, + "num_input_tokens_seen": 27018560, + "step": 40090 + }, + { + "epoch": 0.9795275205824152, + "grad_norm": 47.638511657714844, + "learning_rate": 1.9444929031253337e-06, + "loss": 0.3232, + "num_input_tokens_seen": 27021888, + "step": 40095 + }, + { + "epoch": 0.9796496714142623, + "grad_norm": 11.55770206451416, + "learning_rate": 1.944464883286209e-06, + "loss": 0.0867, + "num_input_tokens_seen": 27025152, + "step": 40100 + }, + { + "epoch": 0.9797718222461095, + "grad_norm": 11.932427406311035, + "learning_rate": 1.9444368565786813e-06, + "loss": 0.0918, + "num_input_tokens_seen": 27028224, + "step": 40105 + }, + { + "epoch": 0.9798939730779567, + "grad_norm": 6.641244888305664, + "learning_rate": 1.9444088230029548e-06, + "loss": 0.0568, + "num_input_tokens_seen": 27031552, + "step": 40110 + }, + { + "epoch": 0.9800161239098039, + "grad_norm": 23.15635871887207, + "learning_rate": 1.944380782559233e-06, + "loss": 0.0588, + "num_input_tokens_seen": 27035072, + "step": 40115 + }, + { + "epoch": 0.9801382747416509, + "grad_norm": 10.944313049316406, + "learning_rate": 1.944352735247721e-06, + "loss": 0.1429, + "num_input_tokens_seen": 27038784, + "step": 40120 + }, + { + "epoch": 0.9802604255734981, + "grad_norm": 8.97407341003418, + "learning_rate": 1.944324681068621e-06, + "loss": 0.144, + "num_input_tokens_seen": 27041920, + "step": 40125 + }, + { + "epoch": 0.9803825764053453, + "grad_norm": 24.198251724243164, + "learning_rate": 1.944296620022138e-06, + "loss": 0.1506, + "num_input_tokens_seen": 27045184, + "step": 40130 + }, + { + "epoch": 0.9805047272371925, + "grad_norm": 3.9922945499420166, + "learning_rate": 1.944268552108476e-06, + "loss": 0.0484, + "num_input_tokens_seen": 27048448, + "step": 40135 + }, + { + "epoch": 0.9806268780690397, + "grad_norm": 24.19867706298828, + "learning_rate": 1.9442404773278396e-06, + "loss": 0.1924, + "num_input_tokens_seen": 27051968, + "step": 40140 + }, + { + "epoch": 0.9807490289008868, + "grad_norm": 0.7200508713722229, + "learning_rate": 1.9442123956804323e-06, + "loss": 0.1174, + "num_input_tokens_seen": 27055168, + "step": 40145 + }, + { + "epoch": 0.980871179732734, + "grad_norm": 12.604074478149414, + "learning_rate": 1.9441843071664584e-06, + "loss": 0.0951, + "num_input_tokens_seen": 27058688, + "step": 40150 + }, + { + "epoch": 0.9809933305645812, + "grad_norm": 14.674346923828125, + "learning_rate": 1.9441562117861224e-06, + "loss": 0.1467, + "num_input_tokens_seen": 27061760, + "step": 40155 + }, + { + "epoch": 0.9811154813964283, + "grad_norm": 6.677186489105225, + "learning_rate": 1.944128109539628e-06, + "loss": 0.0662, + "num_input_tokens_seen": 27065344, + "step": 40160 + }, + { + "epoch": 0.9812376322282754, + "grad_norm": 6.980493068695068, + "learning_rate": 1.9441000004271805e-06, + "loss": 0.0604, + "num_input_tokens_seen": 27068352, + "step": 40165 + }, + { + "epoch": 0.9813597830601226, + "grad_norm": 14.983774185180664, + "learning_rate": 1.944071884448984e-06, + "loss": 0.0954, + "num_input_tokens_seen": 27071744, + "step": 40170 + }, + { + "epoch": 0.9814819338919698, + "grad_norm": 15.543222427368164, + "learning_rate": 1.9440437616052425e-06, + "loss": 0.1245, + "num_input_tokens_seen": 27074944, + "step": 40175 + }, + { + "epoch": 0.981604084723817, + "grad_norm": 1.5099848508834839, + "learning_rate": 1.944015631896161e-06, + "loss": 0.0687, + "num_input_tokens_seen": 27078272, + "step": 40180 + }, + { + "epoch": 0.9817262355556642, + "grad_norm": 20.4335994720459, + "learning_rate": 1.9439874953219437e-06, + "loss": 0.1081, + "num_input_tokens_seen": 27081600, + "step": 40185 + }, + { + "epoch": 0.9818483863875113, + "grad_norm": 1.7662246227264404, + "learning_rate": 1.9439593518827955e-06, + "loss": 0.0673, + "num_input_tokens_seen": 27084480, + "step": 40190 + }, + { + "epoch": 0.9819705372193585, + "grad_norm": 8.809418678283691, + "learning_rate": 1.9439312015789213e-06, + "loss": 0.0639, + "num_input_tokens_seen": 27087552, + "step": 40195 + }, + { + "epoch": 0.9820926880512056, + "grad_norm": 5.3898749351501465, + "learning_rate": 1.9439030444105253e-06, + "loss": 0.0221, + "num_input_tokens_seen": 27090496, + "step": 40200 + }, + { + "epoch": 0.9822148388830528, + "grad_norm": 20.789827346801758, + "learning_rate": 1.9438748803778123e-06, + "loss": 0.1202, + "num_input_tokens_seen": 27093888, + "step": 40205 + }, + { + "epoch": 0.9823369897148999, + "grad_norm": 15.2289400100708, + "learning_rate": 1.943846709480988e-06, + "loss": 0.0858, + "num_input_tokens_seen": 27097024, + "step": 40210 + }, + { + "epoch": 0.9824591405467471, + "grad_norm": 0.4235302209854126, + "learning_rate": 1.9438185317202557e-06, + "loss": 0.0748, + "num_input_tokens_seen": 27100544, + "step": 40215 + }, + { + "epoch": 0.9825812913785943, + "grad_norm": 22.20836067199707, + "learning_rate": 1.9437903470958216e-06, + "loss": 0.1275, + "num_input_tokens_seen": 27104000, + "step": 40220 + }, + { + "epoch": 0.9827034422104415, + "grad_norm": 28.422733306884766, + "learning_rate": 1.94376215560789e-06, + "loss": 0.0914, + "num_input_tokens_seen": 27106944, + "step": 40225 + }, + { + "epoch": 0.9828255930422887, + "grad_norm": 12.55518627166748, + "learning_rate": 1.9437339572566666e-06, + "loss": 0.1515, + "num_input_tokens_seen": 27109952, + "step": 40230 + }, + { + "epoch": 0.9829477438741357, + "grad_norm": 34.5494384765625, + "learning_rate": 1.9437057520423557e-06, + "loss": 0.2121, + "num_input_tokens_seen": 27113024, + "step": 40235 + }, + { + "epoch": 0.9830698947059829, + "grad_norm": 0.9313546419143677, + "learning_rate": 1.9436775399651628e-06, + "loss": 0.1038, + "num_input_tokens_seen": 27116160, + "step": 40240 + }, + { + "epoch": 0.9831920455378301, + "grad_norm": 0.2228740155696869, + "learning_rate": 1.9436493210252932e-06, + "loss": 0.115, + "num_input_tokens_seen": 27119424, + "step": 40245 + }, + { + "epoch": 0.9833141963696773, + "grad_norm": 18.93325424194336, + "learning_rate": 1.9436210952229517e-06, + "loss": 0.0432, + "num_input_tokens_seen": 27122368, + "step": 40250 + }, + { + "epoch": 0.9834363472015244, + "grad_norm": 9.483077049255371, + "learning_rate": 1.943592862558344e-06, + "loss": 0.1868, + "num_input_tokens_seen": 27125952, + "step": 40255 + }, + { + "epoch": 0.9835584980333716, + "grad_norm": 59.740150451660156, + "learning_rate": 1.943564623031675e-06, + "loss": 0.1231, + "num_input_tokens_seen": 27129280, + "step": 40260 + }, + { + "epoch": 0.9836806488652188, + "grad_norm": 3.750798463821411, + "learning_rate": 1.9435363766431504e-06, + "loss": 0.1181, + "num_input_tokens_seen": 27132736, + "step": 40265 + }, + { + "epoch": 0.983802799697066, + "grad_norm": 0.2580016255378723, + "learning_rate": 1.9435081233929755e-06, + "loss": 0.016, + "num_input_tokens_seen": 27136384, + "step": 40270 + }, + { + "epoch": 0.9839249505289132, + "grad_norm": 15.4879150390625, + "learning_rate": 1.9434798632813556e-06, + "loss": 0.0667, + "num_input_tokens_seen": 27139904, + "step": 40275 + }, + { + "epoch": 0.9840471013607602, + "grad_norm": 1.0932239294052124, + "learning_rate": 1.9434515963084965e-06, + "loss": 0.078, + "num_input_tokens_seen": 27143488, + "step": 40280 + }, + { + "epoch": 0.9841692521926074, + "grad_norm": 28.132898330688477, + "learning_rate": 1.943423322474603e-06, + "loss": 0.1724, + "num_input_tokens_seen": 27146688, + "step": 40285 + }, + { + "epoch": 0.9842914030244546, + "grad_norm": 34.23074722290039, + "learning_rate": 1.9433950417798823e-06, + "loss": 0.1108, + "num_input_tokens_seen": 27149824, + "step": 40290 + }, + { + "epoch": 0.9844135538563018, + "grad_norm": 16.52518653869629, + "learning_rate": 1.9433667542245385e-06, + "loss": 0.1305, + "num_input_tokens_seen": 27153280, + "step": 40295 + }, + { + "epoch": 0.9845357046881489, + "grad_norm": 19.801593780517578, + "learning_rate": 1.9433384598087784e-06, + "loss": 0.1398, + "num_input_tokens_seen": 27156416, + "step": 40300 + }, + { + "epoch": 0.9846578555199961, + "grad_norm": 1.1172616481781006, + "learning_rate": 1.943310158532807e-06, + "loss": 0.0858, + "num_input_tokens_seen": 27159616, + "step": 40305 + }, + { + "epoch": 0.9847800063518433, + "grad_norm": 29.34340476989746, + "learning_rate": 1.9432818503968304e-06, + "loss": 0.1594, + "num_input_tokens_seen": 27163072, + "step": 40310 + }, + { + "epoch": 0.9849021571836905, + "grad_norm": 0.5346962809562683, + "learning_rate": 1.9432535354010542e-06, + "loss": 0.062, + "num_input_tokens_seen": 27166400, + "step": 40315 + }, + { + "epoch": 0.9850243080155375, + "grad_norm": 17.92679214477539, + "learning_rate": 1.943225213545685e-06, + "loss": 0.1924, + "num_input_tokens_seen": 27169600, + "step": 40320 + }, + { + "epoch": 0.9851464588473847, + "grad_norm": 10.966693878173828, + "learning_rate": 1.9431968848309287e-06, + "loss": 0.1132, + "num_input_tokens_seen": 27173376, + "step": 40325 + }, + { + "epoch": 0.9852686096792319, + "grad_norm": 26.890884399414062, + "learning_rate": 1.9431685492569907e-06, + "loss": 0.1546, + "num_input_tokens_seen": 27176640, + "step": 40330 + }, + { + "epoch": 0.9853907605110791, + "grad_norm": 0.18494495749473572, + "learning_rate": 1.943140206824077e-06, + "loss": 0.0869, + "num_input_tokens_seen": 27179840, + "step": 40335 + }, + { + "epoch": 0.9855129113429263, + "grad_norm": 0.3660151958465576, + "learning_rate": 1.943111857532394e-06, + "loss": 0.0055, + "num_input_tokens_seen": 27183040, + "step": 40340 + }, + { + "epoch": 0.9856350621747734, + "grad_norm": 43.12258529663086, + "learning_rate": 1.943083501382148e-06, + "loss": 0.1628, + "num_input_tokens_seen": 27186240, + "step": 40345 + }, + { + "epoch": 0.9857572130066206, + "grad_norm": 1.3598332405090332, + "learning_rate": 1.9430551383735455e-06, + "loss": 0.0698, + "num_input_tokens_seen": 27189632, + "step": 40350 + }, + { + "epoch": 0.9858793638384677, + "grad_norm": 56.717376708984375, + "learning_rate": 1.943026768506792e-06, + "loss": 0.1483, + "num_input_tokens_seen": 27192960, + "step": 40355 + }, + { + "epoch": 0.9860015146703149, + "grad_norm": 32.48219299316406, + "learning_rate": 1.9429983917820944e-06, + "loss": 0.1704, + "num_input_tokens_seen": 27196352, + "step": 40360 + }, + { + "epoch": 0.986123665502162, + "grad_norm": 9.722297668457031, + "learning_rate": 1.9429700081996587e-06, + "loss": 0.1303, + "num_input_tokens_seen": 27199680, + "step": 40365 + }, + { + "epoch": 0.9862458163340092, + "grad_norm": 9.689001083374023, + "learning_rate": 1.9429416177596917e-06, + "loss": 0.0881, + "num_input_tokens_seen": 27203392, + "step": 40370 + }, + { + "epoch": 0.9863679671658564, + "grad_norm": 11.218616485595703, + "learning_rate": 1.9429132204623993e-06, + "loss": 0.1059, + "num_input_tokens_seen": 27206848, + "step": 40375 + }, + { + "epoch": 0.9864901179977036, + "grad_norm": 4.2435688972473145, + "learning_rate": 1.9428848163079884e-06, + "loss": 0.1589, + "num_input_tokens_seen": 27210688, + "step": 40380 + }, + { + "epoch": 0.9866122688295508, + "grad_norm": 0.222267284989357, + "learning_rate": 1.942856405296666e-06, + "loss": 0.08, + "num_input_tokens_seen": 27214016, + "step": 40385 + }, + { + "epoch": 0.9867344196613979, + "grad_norm": 12.685956001281738, + "learning_rate": 1.942827987428638e-06, + "loss": 0.1141, + "num_input_tokens_seen": 27217408, + "step": 40390 + }, + { + "epoch": 0.986856570493245, + "grad_norm": 22.542917251586914, + "learning_rate": 1.9427995627041107e-06, + "loss": 0.2218, + "num_input_tokens_seen": 27220672, + "step": 40395 + }, + { + "epoch": 0.9869787213250922, + "grad_norm": 10.268771171569824, + "learning_rate": 1.942771131123292e-06, + "loss": 0.1277, + "num_input_tokens_seen": 27224640, + "step": 40400 + }, + { + "epoch": 0.9871008721569394, + "grad_norm": 1.3927884101867676, + "learning_rate": 1.9427426926863876e-06, + "loss": 0.0716, + "num_input_tokens_seen": 27228608, + "step": 40405 + }, + { + "epoch": 0.9872230229887865, + "grad_norm": 10.72795295715332, + "learning_rate": 1.942714247393605e-06, + "loss": 0.1121, + "num_input_tokens_seen": 27231744, + "step": 40410 + }, + { + "epoch": 0.9873451738206337, + "grad_norm": 5.103229522705078, + "learning_rate": 1.942685795245151e-06, + "loss": 0.0523, + "num_input_tokens_seen": 27235712, + "step": 40415 + }, + { + "epoch": 0.9874673246524809, + "grad_norm": 10.616697311401367, + "learning_rate": 1.9426573362412323e-06, + "loss": 0.086, + "num_input_tokens_seen": 27238976, + "step": 40420 + }, + { + "epoch": 0.9875894754843281, + "grad_norm": 14.616541862487793, + "learning_rate": 1.942628870382056e-06, + "loss": 0.2343, + "num_input_tokens_seen": 27241856, + "step": 40425 + }, + { + "epoch": 0.9877116263161753, + "grad_norm": 1.1679258346557617, + "learning_rate": 1.942600397667829e-06, + "loss": 0.1004, + "num_input_tokens_seen": 27245440, + "step": 40430 + }, + { + "epoch": 0.9878337771480223, + "grad_norm": 24.777894973754883, + "learning_rate": 1.942571918098758e-06, + "loss": 0.0552, + "num_input_tokens_seen": 27248896, + "step": 40435 + }, + { + "epoch": 0.9879559279798695, + "grad_norm": 25.75714683532715, + "learning_rate": 1.9425434316750507e-06, + "loss": 0.2256, + "num_input_tokens_seen": 27252416, + "step": 40440 + }, + { + "epoch": 0.9880780788117167, + "grad_norm": 11.695103645324707, + "learning_rate": 1.9425149383969144e-06, + "loss": 0.039, + "num_input_tokens_seen": 27255808, + "step": 40445 + }, + { + "epoch": 0.9882002296435639, + "grad_norm": 34.632381439208984, + "learning_rate": 1.9424864382645553e-06, + "loss": 0.0572, + "num_input_tokens_seen": 27259072, + "step": 40450 + }, + { + "epoch": 0.988322380475411, + "grad_norm": 1.0183019638061523, + "learning_rate": 1.9424579312781817e-06, + "loss": 0.0936, + "num_input_tokens_seen": 27262528, + "step": 40455 + }, + { + "epoch": 0.9884445313072582, + "grad_norm": 25.458181381225586, + "learning_rate": 1.942429417438001e-06, + "loss": 0.0895, + "num_input_tokens_seen": 27265984, + "step": 40460 + }, + { + "epoch": 0.9885666821391054, + "grad_norm": 0.9326586127281189, + "learning_rate": 1.9424008967442193e-06, + "loss": 0.0736, + "num_input_tokens_seen": 27269632, + "step": 40465 + }, + { + "epoch": 0.9886888329709526, + "grad_norm": 26.359806060791016, + "learning_rate": 1.942372369197045e-06, + "loss": 0.073, + "num_input_tokens_seen": 27272640, + "step": 40470 + }, + { + "epoch": 0.9888109838027997, + "grad_norm": 5.261368274688721, + "learning_rate": 1.9423438347966857e-06, + "loss": 0.0215, + "num_input_tokens_seen": 27275776, + "step": 40475 + }, + { + "epoch": 0.9889331346346468, + "grad_norm": 16.494407653808594, + "learning_rate": 1.942315293543348e-06, + "loss": 0.0869, + "num_input_tokens_seen": 27278912, + "step": 40480 + }, + { + "epoch": 0.989055285466494, + "grad_norm": 28.753313064575195, + "learning_rate": 1.9422867454372406e-06, + "loss": 0.0808, + "num_input_tokens_seen": 27282624, + "step": 40485 + }, + { + "epoch": 0.9891774362983412, + "grad_norm": 12.6876859664917, + "learning_rate": 1.9422581904785704e-06, + "loss": 0.1726, + "num_input_tokens_seen": 27286016, + "step": 40490 + }, + { + "epoch": 0.9892995871301884, + "grad_norm": 0.036324337124824524, + "learning_rate": 1.9422296286675447e-06, + "loss": 0.1635, + "num_input_tokens_seen": 27289472, + "step": 40495 + }, + { + "epoch": 0.9894217379620355, + "grad_norm": 0.38467368483543396, + "learning_rate": 1.9422010600043722e-06, + "loss": 0.0267, + "num_input_tokens_seen": 27292800, + "step": 40500 + }, + { + "epoch": 0.9895438887938827, + "grad_norm": 22.768394470214844, + "learning_rate": 1.9421724844892606e-06, + "loss": 0.222, + "num_input_tokens_seen": 27295936, + "step": 40505 + }, + { + "epoch": 0.9896660396257299, + "grad_norm": 27.51454734802246, + "learning_rate": 1.9421439021224164e-06, + "loss": 0.0899, + "num_input_tokens_seen": 27299712, + "step": 40510 + }, + { + "epoch": 0.989788190457577, + "grad_norm": 0.10687658190727234, + "learning_rate": 1.942115312904049e-06, + "loss": 0.0432, + "num_input_tokens_seen": 27302912, + "step": 40515 + }, + { + "epoch": 0.9899103412894242, + "grad_norm": 47.35722351074219, + "learning_rate": 1.9420867168343652e-06, + "loss": 0.0669, + "num_input_tokens_seen": 27306688, + "step": 40520 + }, + { + "epoch": 0.9900324921212713, + "grad_norm": 1.9231311082839966, + "learning_rate": 1.9420581139135733e-06, + "loss": 0.0034, + "num_input_tokens_seen": 27310144, + "step": 40525 + }, + { + "epoch": 0.9901546429531185, + "grad_norm": 2.8595128059387207, + "learning_rate": 1.942029504141882e-06, + "loss": 0.0698, + "num_input_tokens_seen": 27313536, + "step": 40530 + }, + { + "epoch": 0.9902767937849657, + "grad_norm": 8.048189163208008, + "learning_rate": 1.9420008875194986e-06, + "loss": 0.1137, + "num_input_tokens_seen": 27317184, + "step": 40535 + }, + { + "epoch": 0.9903989446168129, + "grad_norm": 20.83180046081543, + "learning_rate": 1.941972264046631e-06, + "loss": 0.0686, + "num_input_tokens_seen": 27320704, + "step": 40540 + }, + { + "epoch": 0.99052109544866, + "grad_norm": 56.8033332824707, + "learning_rate": 1.941943633723488e-06, + "loss": 0.3562, + "num_input_tokens_seen": 27323968, + "step": 40545 + }, + { + "epoch": 0.9906432462805072, + "grad_norm": 0.15781305730342865, + "learning_rate": 1.9419149965502773e-06, + "loss": 0.0403, + "num_input_tokens_seen": 27327232, + "step": 40550 + }, + { + "epoch": 0.9907653971123543, + "grad_norm": 8.692741394042969, + "learning_rate": 1.9418863525272077e-06, + "loss": 0.1088, + "num_input_tokens_seen": 27330176, + "step": 40555 + }, + { + "epoch": 0.9908875479442015, + "grad_norm": 18.1630916595459, + "learning_rate": 1.941857701654487e-06, + "loss": 0.0694, + "num_input_tokens_seen": 27335936, + "step": 40560 + }, + { + "epoch": 0.9910096987760487, + "grad_norm": 0.15101860463619232, + "learning_rate": 1.9418290439323243e-06, + "loss": 0.0502, + "num_input_tokens_seen": 27339392, + "step": 40565 + }, + { + "epoch": 0.9911318496078958, + "grad_norm": 0.8477771878242493, + "learning_rate": 1.9418003793609267e-06, + "loss": 0.1334, + "num_input_tokens_seen": 27342976, + "step": 40570 + }, + { + "epoch": 0.991254000439743, + "grad_norm": 11.633031845092773, + "learning_rate": 1.941771707940504e-06, + "loss": 0.0908, + "num_input_tokens_seen": 27346112, + "step": 40575 + }, + { + "epoch": 0.9913761512715902, + "grad_norm": 8.685921669006348, + "learning_rate": 1.941743029671264e-06, + "loss": 0.0786, + "num_input_tokens_seen": 27349312, + "step": 40580 + }, + { + "epoch": 0.9914983021034374, + "grad_norm": 13.447248458862305, + "learning_rate": 1.9417143445534152e-06, + "loss": 0.1647, + "num_input_tokens_seen": 27352192, + "step": 40585 + }, + { + "epoch": 0.9916204529352844, + "grad_norm": 33.43043899536133, + "learning_rate": 1.9416856525871666e-06, + "loss": 0.1494, + "num_input_tokens_seen": 27355520, + "step": 40590 + }, + { + "epoch": 0.9917426037671316, + "grad_norm": 0.32424071431159973, + "learning_rate": 1.941656953772726e-06, + "loss": 0.1283, + "num_input_tokens_seen": 27359040, + "step": 40595 + }, + { + "epoch": 0.9918647545989788, + "grad_norm": 22.274261474609375, + "learning_rate": 1.9416282481103038e-06, + "loss": 0.1341, + "num_input_tokens_seen": 27361920, + "step": 40600 + }, + { + "epoch": 0.991986905430826, + "grad_norm": 4.439818859100342, + "learning_rate": 1.941599535600107e-06, + "loss": 0.0558, + "num_input_tokens_seen": 27365248, + "step": 40605 + }, + { + "epoch": 0.9921090562626731, + "grad_norm": 37.596778869628906, + "learning_rate": 1.9415708162423452e-06, + "loss": 0.1306, + "num_input_tokens_seen": 27368512, + "step": 40610 + }, + { + "epoch": 0.9922312070945203, + "grad_norm": 20.270496368408203, + "learning_rate": 1.9415420900372275e-06, + "loss": 0.1231, + "num_input_tokens_seen": 27371840, + "step": 40615 + }, + { + "epoch": 0.9923533579263675, + "grad_norm": 9.090349197387695, + "learning_rate": 1.9415133569849622e-06, + "loss": 0.205, + "num_input_tokens_seen": 27375040, + "step": 40620 + }, + { + "epoch": 0.9924755087582147, + "grad_norm": 0.16481897234916687, + "learning_rate": 1.9414846170857587e-06, + "loss": 0.0851, + "num_input_tokens_seen": 27378688, + "step": 40625 + }, + { + "epoch": 0.9925976595900619, + "grad_norm": 0.8626278638839722, + "learning_rate": 1.941455870339826e-06, + "loss": 0.0645, + "num_input_tokens_seen": 27381952, + "step": 40630 + }, + { + "epoch": 0.9927198104219089, + "grad_norm": 25.085214614868164, + "learning_rate": 1.9414271167473726e-06, + "loss": 0.1897, + "num_input_tokens_seen": 27385152, + "step": 40635 + }, + { + "epoch": 0.9928419612537561, + "grad_norm": 0.8803386092185974, + "learning_rate": 1.941398356308608e-06, + "loss": 0.0993, + "num_input_tokens_seen": 27388800, + "step": 40640 + }, + { + "epoch": 0.9929641120856033, + "grad_norm": 1.2641054391860962, + "learning_rate": 1.9413695890237418e-06, + "loss": 0.0957, + "num_input_tokens_seen": 27392512, + "step": 40645 + }, + { + "epoch": 0.9930862629174505, + "grad_norm": 1.7712982892990112, + "learning_rate": 1.9413408148929823e-06, + "loss": 0.1266, + "num_input_tokens_seen": 27397824, + "step": 40650 + }, + { + "epoch": 0.9932084137492976, + "grad_norm": 30.253278732299805, + "learning_rate": 1.941312033916539e-06, + "loss": 0.17, + "num_input_tokens_seen": 27400896, + "step": 40655 + }, + { + "epoch": 0.9933305645811448, + "grad_norm": 8.815448760986328, + "learning_rate": 1.941283246094622e-06, + "loss": 0.1171, + "num_input_tokens_seen": 27404352, + "step": 40660 + }, + { + "epoch": 0.993452715412992, + "grad_norm": 7.9292683601379395, + "learning_rate": 1.9412544514274395e-06, + "loss": 0.0449, + "num_input_tokens_seen": 27407616, + "step": 40665 + }, + { + "epoch": 0.9935748662448391, + "grad_norm": 0.6572582125663757, + "learning_rate": 1.941225649915202e-06, + "loss": 0.1234, + "num_input_tokens_seen": 27410880, + "step": 40670 + }, + { + "epoch": 0.9936970170766863, + "grad_norm": 13.578548431396484, + "learning_rate": 1.941196841558118e-06, + "loss": 0.1513, + "num_input_tokens_seen": 27414272, + "step": 40675 + }, + { + "epoch": 0.9938191679085334, + "grad_norm": 1.2818429470062256, + "learning_rate": 1.9411680263563976e-06, + "loss": 0.154, + "num_input_tokens_seen": 27417536, + "step": 40680 + }, + { + "epoch": 0.9939413187403806, + "grad_norm": 2.6503143310546875, + "learning_rate": 1.9411392043102502e-06, + "loss": 0.1063, + "num_input_tokens_seen": 27421120, + "step": 40685 + }, + { + "epoch": 0.9940634695722278, + "grad_norm": 21.41456413269043, + "learning_rate": 1.9411103754198852e-06, + "loss": 0.0305, + "num_input_tokens_seen": 27424640, + "step": 40690 + }, + { + "epoch": 0.994185620404075, + "grad_norm": 11.669831275939941, + "learning_rate": 1.9410815396855126e-06, + "loss": 0.1007, + "num_input_tokens_seen": 27427776, + "step": 40695 + }, + { + "epoch": 0.9943077712359221, + "grad_norm": 10.491043090820312, + "learning_rate": 1.941052697107342e-06, + "loss": 0.1402, + "num_input_tokens_seen": 27430976, + "step": 40700 + }, + { + "epoch": 0.9944299220677693, + "grad_norm": 0.19844773411750793, + "learning_rate": 1.941023847685583e-06, + "loss": 0.0659, + "num_input_tokens_seen": 27434368, + "step": 40705 + }, + { + "epoch": 0.9945520728996164, + "grad_norm": 6.634429931640625, + "learning_rate": 1.9409949914204454e-06, + "loss": 0.1137, + "num_input_tokens_seen": 27437696, + "step": 40710 + }, + { + "epoch": 0.9946742237314636, + "grad_norm": 4.638437271118164, + "learning_rate": 1.9409661283121393e-06, + "loss": 0.1326, + "num_input_tokens_seen": 27441088, + "step": 40715 + }, + { + "epoch": 0.9947963745633108, + "grad_norm": 8.209115028381348, + "learning_rate": 1.9409372583608743e-06, + "loss": 0.1082, + "num_input_tokens_seen": 27444672, + "step": 40720 + }, + { + "epoch": 0.9949185253951579, + "grad_norm": 20.867942810058594, + "learning_rate": 1.9409083815668604e-06, + "loss": 0.0943, + "num_input_tokens_seen": 27448256, + "step": 40725 + }, + { + "epoch": 0.9950406762270051, + "grad_norm": 41.91875076293945, + "learning_rate": 1.9408794979303077e-06, + "loss": 0.1296, + "num_input_tokens_seen": 27451904, + "step": 40730 + }, + { + "epoch": 0.9951628270588523, + "grad_norm": 20.121931076049805, + "learning_rate": 1.940850607451426e-06, + "loss": 0.0584, + "num_input_tokens_seen": 27455424, + "step": 40735 + }, + { + "epoch": 0.9952849778906995, + "grad_norm": 2.706618070602417, + "learning_rate": 1.940821710130426e-06, + "loss": 0.2389, + "num_input_tokens_seen": 27458944, + "step": 40740 + }, + { + "epoch": 0.9954071287225466, + "grad_norm": 17.6815128326416, + "learning_rate": 1.9407928059675176e-06, + "loss": 0.1535, + "num_input_tokens_seen": 27462272, + "step": 40745 + }, + { + "epoch": 0.9955292795543937, + "grad_norm": 7.666195869445801, + "learning_rate": 1.9407638949629102e-06, + "loss": 0.1028, + "num_input_tokens_seen": 27465792, + "step": 40750 + }, + { + "epoch": 0.9956514303862409, + "grad_norm": 10.247875213623047, + "learning_rate": 1.940734977116815e-06, + "loss": 0.0626, + "num_input_tokens_seen": 27469504, + "step": 40755 + }, + { + "epoch": 0.9957735812180881, + "grad_norm": 5.036080360412598, + "learning_rate": 1.9407060524294426e-06, + "loss": 0.1577, + "num_input_tokens_seen": 27472896, + "step": 40760 + }, + { + "epoch": 0.9958957320499353, + "grad_norm": 14.078655242919922, + "learning_rate": 1.9406771209010024e-06, + "loss": 0.1161, + "num_input_tokens_seen": 27476608, + "step": 40765 + }, + { + "epoch": 0.9960178828817824, + "grad_norm": 20.622859954833984, + "learning_rate": 1.9406481825317052e-06, + "loss": 0.1274, + "num_input_tokens_seen": 27480128, + "step": 40770 + }, + { + "epoch": 0.9961400337136296, + "grad_norm": 0.6095002889633179, + "learning_rate": 1.940619237321761e-06, + "loss": 0.0476, + "num_input_tokens_seen": 27483264, + "step": 40775 + }, + { + "epoch": 0.9962621845454768, + "grad_norm": 15.534045219421387, + "learning_rate": 1.9405902852713812e-06, + "loss": 0.0756, + "num_input_tokens_seen": 27486912, + "step": 40780 + }, + { + "epoch": 0.996384335377324, + "grad_norm": 26.785686492919922, + "learning_rate": 1.940561326380776e-06, + "loss": 0.1296, + "num_input_tokens_seen": 27490176, + "step": 40785 + }, + { + "epoch": 0.996506486209171, + "grad_norm": 10.619406700134277, + "learning_rate": 1.940532360650155e-06, + "loss": 0.0765, + "num_input_tokens_seen": 27493248, + "step": 40790 + }, + { + "epoch": 0.9966286370410182, + "grad_norm": 10.111905097961426, + "learning_rate": 1.9405033880797303e-06, + "loss": 0.0651, + "num_input_tokens_seen": 27496512, + "step": 40795 + }, + { + "epoch": 0.9967507878728654, + "grad_norm": 1.098646640777588, + "learning_rate": 1.940474408669712e-06, + "loss": 0.0417, + "num_input_tokens_seen": 27499776, + "step": 40800 + }, + { + "epoch": 0.9968729387047126, + "grad_norm": 0.4705561697483063, + "learning_rate": 1.9404454224203108e-06, + "loss": 0.016, + "num_input_tokens_seen": 27503168, + "step": 40805 + }, + { + "epoch": 0.9969950895365598, + "grad_norm": 22.709070205688477, + "learning_rate": 1.9404164293317374e-06, + "loss": 0.1012, + "num_input_tokens_seen": 27506304, + "step": 40810 + }, + { + "epoch": 0.9971172403684069, + "grad_norm": 8.215293884277344, + "learning_rate": 1.940387429404203e-06, + "loss": 0.2259, + "num_input_tokens_seen": 27509632, + "step": 40815 + }, + { + "epoch": 0.9972393912002541, + "grad_norm": 18.94011116027832, + "learning_rate": 1.940358422637918e-06, + "loss": 0.0741, + "num_input_tokens_seen": 27513152, + "step": 40820 + }, + { + "epoch": 0.9973615420321013, + "grad_norm": 29.055315017700195, + "learning_rate": 1.940329409033094e-06, + "loss": 0.1664, + "num_input_tokens_seen": 27516352, + "step": 40825 + }, + { + "epoch": 0.9974836928639484, + "grad_norm": 22.35759735107422, + "learning_rate": 1.9403003885899415e-06, + "loss": 0.1112, + "num_input_tokens_seen": 27519616, + "step": 40830 + }, + { + "epoch": 0.9976058436957955, + "grad_norm": 1.5217119455337524, + "learning_rate": 1.9402713613086716e-06, + "loss": 0.0942, + "num_input_tokens_seen": 27522880, + "step": 40835 + }, + { + "epoch": 0.9977279945276427, + "grad_norm": 19.438976287841797, + "learning_rate": 1.9402423271894952e-06, + "loss": 0.2131, + "num_input_tokens_seen": 27525824, + "step": 40840 + }, + { + "epoch": 0.9978501453594899, + "grad_norm": 2.120493173599243, + "learning_rate": 1.9402132862326242e-06, + "loss": 0.0197, + "num_input_tokens_seen": 27529216, + "step": 40845 + }, + { + "epoch": 0.9979722961913371, + "grad_norm": 0.46346285939216614, + "learning_rate": 1.940184238438269e-06, + "loss": 0.0347, + "num_input_tokens_seen": 27532800, + "step": 40850 + }, + { + "epoch": 0.9980944470231842, + "grad_norm": 11.514608383178711, + "learning_rate": 1.940155183806641e-06, + "loss": 0.0903, + "num_input_tokens_seen": 27536064, + "step": 40855 + }, + { + "epoch": 0.9982165978550314, + "grad_norm": 22.47312355041504, + "learning_rate": 1.940126122337952e-06, + "loss": 0.1097, + "num_input_tokens_seen": 27538880, + "step": 40860 + }, + { + "epoch": 0.9983387486868786, + "grad_norm": 0.6225314140319824, + "learning_rate": 1.9400970540324125e-06, + "loss": 0.032, + "num_input_tokens_seen": 27542144, + "step": 40865 + }, + { + "epoch": 0.9984608995187257, + "grad_norm": 9.540884971618652, + "learning_rate": 1.940067978890235e-06, + "loss": 0.09, + "num_input_tokens_seen": 27545472, + "step": 40870 + }, + { + "epoch": 0.9985830503505729, + "grad_norm": 1.1357994079589844, + "learning_rate": 1.9400388969116295e-06, + "loss": 0.1085, + "num_input_tokens_seen": 27548800, + "step": 40875 + }, + { + "epoch": 0.99870520118242, + "grad_norm": 27.24406623840332, + "learning_rate": 1.9400098080968087e-06, + "loss": 0.1362, + "num_input_tokens_seen": 27552448, + "step": 40880 + }, + { + "epoch": 0.9988273520142672, + "grad_norm": 14.14842414855957, + "learning_rate": 1.939980712445984e-06, + "loss": 0.0632, + "num_input_tokens_seen": 27556096, + "step": 40885 + }, + { + "epoch": 0.9989495028461144, + "grad_norm": 10.4360933303833, + "learning_rate": 1.9399516099593666e-06, + "loss": 0.0939, + "num_input_tokens_seen": 27559616, + "step": 40890 + }, + { + "epoch": 0.9990716536779616, + "grad_norm": 21.335147857666016, + "learning_rate": 1.9399225006371684e-06, + "loss": 0.1567, + "num_input_tokens_seen": 27562880, + "step": 40895 + }, + { + "epoch": 0.9991938045098087, + "grad_norm": 2.0332093238830566, + "learning_rate": 1.9398933844796006e-06, + "loss": 0.2327, + "num_input_tokens_seen": 27566336, + "step": 40900 + }, + { + "epoch": 0.9993159553416558, + "grad_norm": 1.2199807167053223, + "learning_rate": 1.9398642614868755e-06, + "loss": 0.091, + "num_input_tokens_seen": 27569920, + "step": 40905 + }, + { + "epoch": 0.999438106173503, + "grad_norm": 7.498171806335449, + "learning_rate": 1.9398351316592048e-06, + "loss": 0.0299, + "num_input_tokens_seen": 27573312, + "step": 40910 + }, + { + "epoch": 0.9995602570053502, + "grad_norm": 16.187875747680664, + "learning_rate": 1.9398059949967998e-06, + "loss": 0.0406, + "num_input_tokens_seen": 27577536, + "step": 40915 + }, + { + "epoch": 0.9996824078371974, + "grad_norm": 0.1693330556154251, + "learning_rate": 1.9397768514998736e-06, + "loss": 0.0677, + "num_input_tokens_seen": 27581056, + "step": 40920 + }, + { + "epoch": 0.9998045586690445, + "grad_norm": 9.364876747131348, + "learning_rate": 1.9397477011686366e-06, + "loss": 0.1449, + "num_input_tokens_seen": 27584320, + "step": 40925 + }, + { + "epoch": 0.9999267095008917, + "grad_norm": 1.984630823135376, + "learning_rate": 1.939718544003302e-06, + "loss": 0.0949, + "num_input_tokens_seen": 27587584, + "step": 40930 + }, + { + "epoch": 1.0000488603327389, + "grad_norm": 0.3797398507595062, + "learning_rate": 1.9396893800040813e-06, + "loss": 0.1214, + "num_input_tokens_seen": 27591136, + "step": 40935 + }, + { + "epoch": 1.0000732904991083, + "eval_loss": 0.12240181118249893, + "eval_runtime": 47.7677, + "eval_samples_per_second": 761.707, + "eval_steps_per_second": 95.232, + "num_input_tokens_seen": 27591776, + "step": 40936 + }, + { + "epoch": 1.000171011164586, + "grad_norm": 49.70656204223633, + "learning_rate": 1.9396602091711864e-06, + "loss": 0.0258, + "num_input_tokens_seen": 27594592, + "step": 40940 + }, + { + "epoch": 1.0002931619964333, + "grad_norm": 33.254581451416016, + "learning_rate": 1.93963103150483e-06, + "loss": 0.0086, + "num_input_tokens_seen": 27598112, + "step": 40945 + }, + { + "epoch": 1.0004153128282804, + "grad_norm": 15.725682258605957, + "learning_rate": 1.939601847005224e-06, + "loss": 0.0999, + "num_input_tokens_seen": 27601696, + "step": 40950 + }, + { + "epoch": 1.0005374636601276, + "grad_norm": 26.894102096557617, + "learning_rate": 1.9395726556725806e-06, + "loss": 0.0866, + "num_input_tokens_seen": 27605536, + "step": 40955 + }, + { + "epoch": 1.0006596144919746, + "grad_norm": 4.941879749298096, + "learning_rate": 1.939543457507112e-06, + "loss": 0.0141, + "num_input_tokens_seen": 27608608, + "step": 40960 + }, + { + "epoch": 1.0007817653238218, + "grad_norm": 0.27386680245399475, + "learning_rate": 1.939514252509031e-06, + "loss": 0.0677, + "num_input_tokens_seen": 27611872, + "step": 40965 + }, + { + "epoch": 1.000903916155669, + "grad_norm": 23.498291015625, + "learning_rate": 1.93948504067855e-06, + "loss": 0.0574, + "num_input_tokens_seen": 27615264, + "step": 40970 + }, + { + "epoch": 1.0010260669875162, + "grad_norm": 0.18982845544815063, + "learning_rate": 1.93945582201588e-06, + "loss": 0.0023, + "num_input_tokens_seen": 27618464, + "step": 40975 + }, + { + "epoch": 1.0011482178193634, + "grad_norm": 0.3378293514251709, + "learning_rate": 1.939426596521235e-06, + "loss": 0.0609, + "num_input_tokens_seen": 27622112, + "step": 40980 + }, + { + "epoch": 1.0012703686512106, + "grad_norm": 0.12434734404087067, + "learning_rate": 1.9393973641948275e-06, + "loss": 0.0448, + "num_input_tokens_seen": 27625312, + "step": 40985 + }, + { + "epoch": 1.0013925194830577, + "grad_norm": 28.14246368408203, + "learning_rate": 1.9393681250368696e-06, + "loss": 0.1127, + "num_input_tokens_seen": 27628320, + "step": 40990 + }, + { + "epoch": 1.001514670314905, + "grad_norm": 0.08595714718103409, + "learning_rate": 1.939338879047574e-06, + "loss": 0.0845, + "num_input_tokens_seen": 27631712, + "step": 40995 + }, + { + "epoch": 1.001636821146752, + "grad_norm": 60.2764892578125, + "learning_rate": 1.9393096262271533e-06, + "loss": 0.118, + "num_input_tokens_seen": 27634848, + "step": 41000 + }, + { + "epoch": 1.001758971978599, + "grad_norm": 0.04198668897151947, + "learning_rate": 1.9392803665758206e-06, + "loss": 0.0737, + "num_input_tokens_seen": 27638048, + "step": 41005 + }, + { + "epoch": 1.0018811228104463, + "grad_norm": 9.843551635742188, + "learning_rate": 1.939251100093788e-06, + "loss": 0.1221, + "num_input_tokens_seen": 27641184, + "step": 41010 + }, + { + "epoch": 1.0020032736422935, + "grad_norm": 0.172799751162529, + "learning_rate": 1.9392218267812687e-06, + "loss": 0.0264, + "num_input_tokens_seen": 27644704, + "step": 41015 + }, + { + "epoch": 1.0021254244741407, + "grad_norm": 1.0534111261367798, + "learning_rate": 1.939192546638476e-06, + "loss": 0.0347, + "num_input_tokens_seen": 27648160, + "step": 41020 + }, + { + "epoch": 1.0022475753059878, + "grad_norm": 0.2743847370147705, + "learning_rate": 1.9391632596656224e-06, + "loss": 0.0593, + "num_input_tokens_seen": 27651296, + "step": 41025 + }, + { + "epoch": 1.002369726137835, + "grad_norm": 0.5838879942893982, + "learning_rate": 1.9391339658629212e-06, + "loss": 0.0681, + "num_input_tokens_seen": 27654304, + "step": 41030 + }, + { + "epoch": 1.0024918769696822, + "grad_norm": 10.347956657409668, + "learning_rate": 1.939104665230585e-06, + "loss": 0.083, + "num_input_tokens_seen": 27657248, + "step": 41035 + }, + { + "epoch": 1.0026140278015294, + "grad_norm": 11.926654815673828, + "learning_rate": 1.939075357768827e-06, + "loss": 0.1933, + "num_input_tokens_seen": 27660576, + "step": 41040 + }, + { + "epoch": 1.0027361786333764, + "grad_norm": 0.05128807947039604, + "learning_rate": 1.9390460434778607e-06, + "loss": 0.0077, + "num_input_tokens_seen": 27664032, + "step": 41045 + }, + { + "epoch": 1.0028583294652236, + "grad_norm": 1.0889049768447876, + "learning_rate": 1.9390167223578984e-06, + "loss": 0.0751, + "num_input_tokens_seen": 27667360, + "step": 41050 + }, + { + "epoch": 1.0029804802970708, + "grad_norm": 9.116876602172852, + "learning_rate": 1.9389873944091544e-06, + "loss": 0.0029, + "num_input_tokens_seen": 27671136, + "step": 41055 + }, + { + "epoch": 1.003102631128918, + "grad_norm": 21.027864456176758, + "learning_rate": 1.9389580596318417e-06, + "loss": 0.0609, + "num_input_tokens_seen": 27674976, + "step": 41060 + }, + { + "epoch": 1.0032247819607651, + "grad_norm": 0.7300817370414734, + "learning_rate": 1.9389287180261733e-06, + "loss": 0.0066, + "num_input_tokens_seen": 27678624, + "step": 41065 + }, + { + "epoch": 1.0033469327926123, + "grad_norm": 23.967458724975586, + "learning_rate": 1.9388993695923627e-06, + "loss": 0.0896, + "num_input_tokens_seen": 27681888, + "step": 41070 + }, + { + "epoch": 1.0034690836244595, + "grad_norm": 4.293575763702393, + "learning_rate": 1.938870014330623e-06, + "loss": 0.0598, + "num_input_tokens_seen": 27685728, + "step": 41075 + }, + { + "epoch": 1.0035912344563067, + "grad_norm": 0.03171401470899582, + "learning_rate": 1.938840652241168e-06, + "loss": 0.1375, + "num_input_tokens_seen": 27689376, + "step": 41080 + }, + { + "epoch": 1.003713385288154, + "grad_norm": 22.17302703857422, + "learning_rate": 1.938811283324212e-06, + "loss": 0.1238, + "num_input_tokens_seen": 27692704, + "step": 41085 + }, + { + "epoch": 1.0038355361200009, + "grad_norm": 0.015287423506379128, + "learning_rate": 1.9387819075799674e-06, + "loss": 0.0372, + "num_input_tokens_seen": 27696224, + "step": 41090 + }, + { + "epoch": 1.003957686951848, + "grad_norm": 0.6027705669403076, + "learning_rate": 1.9387525250086482e-06, + "loss": 0.0507, + "num_input_tokens_seen": 27699424, + "step": 41095 + }, + { + "epoch": 1.0040798377836953, + "grad_norm": 15.068196296691895, + "learning_rate": 1.938723135610468e-06, + "loss": 0.0971, + "num_input_tokens_seen": 27702560, + "step": 41100 + }, + { + "epoch": 1.0042019886155424, + "grad_norm": 18.538557052612305, + "learning_rate": 1.938693739385641e-06, + "loss": 0.1414, + "num_input_tokens_seen": 27706400, + "step": 41105 + }, + { + "epoch": 1.0043241394473896, + "grad_norm": 0.23932264745235443, + "learning_rate": 1.9386643363343806e-06, + "loss": 0.0009, + "num_input_tokens_seen": 27709792, + "step": 41110 + }, + { + "epoch": 1.0044462902792368, + "grad_norm": 20.438920974731445, + "learning_rate": 1.9386349264569004e-06, + "loss": 0.0653, + "num_input_tokens_seen": 27712992, + "step": 41115 + }, + { + "epoch": 1.004568441111084, + "grad_norm": 0.7962263226509094, + "learning_rate": 1.938605509753415e-06, + "loss": 0.0738, + "num_input_tokens_seen": 27716448, + "step": 41120 + }, + { + "epoch": 1.0046905919429312, + "grad_norm": 17.484432220458984, + "learning_rate": 1.9385760862241374e-06, + "loss": 0.0532, + "num_input_tokens_seen": 27719584, + "step": 41125 + }, + { + "epoch": 1.0048127427747784, + "grad_norm": 0.027080107480287552, + "learning_rate": 1.9385466558692825e-06, + "loss": 0.0434, + "num_input_tokens_seen": 27723552, + "step": 41130 + }, + { + "epoch": 1.0049348936066254, + "grad_norm": 2.654306173324585, + "learning_rate": 1.9385172186890636e-06, + "loss": 0.002, + "num_input_tokens_seen": 27726752, + "step": 41135 + }, + { + "epoch": 1.0050570444384725, + "grad_norm": 48.38421630859375, + "learning_rate": 1.938487774683695e-06, + "loss": 0.1055, + "num_input_tokens_seen": 27730336, + "step": 41140 + }, + { + "epoch": 1.0051791952703197, + "grad_norm": 19.56807518005371, + "learning_rate": 1.938458323853391e-06, + "loss": 0.0844, + "num_input_tokens_seen": 27733472, + "step": 41145 + }, + { + "epoch": 1.005301346102167, + "grad_norm": 0.3958321213722229, + "learning_rate": 1.9384288661983656e-06, + "loss": 0.0021, + "num_input_tokens_seen": 27736928, + "step": 41150 + }, + { + "epoch": 1.0054234969340141, + "grad_norm": 38.7618408203125, + "learning_rate": 1.938399401718833e-06, + "loss": 0.1801, + "num_input_tokens_seen": 27740000, + "step": 41155 + }, + { + "epoch": 1.0055456477658613, + "grad_norm": 0.25790756940841675, + "learning_rate": 1.938369930415008e-06, + "loss": 0.0933, + "num_input_tokens_seen": 27743008, + "step": 41160 + }, + { + "epoch": 1.0056677985977085, + "grad_norm": 88.54906463623047, + "learning_rate": 1.938340452287104e-06, + "loss": 0.1244, + "num_input_tokens_seen": 27746464, + "step": 41165 + }, + { + "epoch": 1.0057899494295557, + "grad_norm": 0.13079579174518585, + "learning_rate": 1.938310967335336e-06, + "loss": 0.0529, + "num_input_tokens_seen": 27749920, + "step": 41170 + }, + { + "epoch": 1.0059121002614029, + "grad_norm": 1.1786655187606812, + "learning_rate": 1.9382814755599184e-06, + "loss": 0.04, + "num_input_tokens_seen": 27753248, + "step": 41175 + }, + { + "epoch": 1.0060342510932498, + "grad_norm": 1.638745903968811, + "learning_rate": 1.938251976961065e-06, + "loss": 0.0037, + "num_input_tokens_seen": 27756512, + "step": 41180 + }, + { + "epoch": 1.006156401925097, + "grad_norm": 23.87853240966797, + "learning_rate": 1.9382224715389914e-06, + "loss": 0.1446, + "num_input_tokens_seen": 27759712, + "step": 41185 + }, + { + "epoch": 1.0062785527569442, + "grad_norm": 3.2461514472961426, + "learning_rate": 1.938192959293912e-06, + "loss": 0.0467, + "num_input_tokens_seen": 27763360, + "step": 41190 + }, + { + "epoch": 1.0064007035887914, + "grad_norm": 30.97145652770996, + "learning_rate": 1.9381634402260403e-06, + "loss": 0.0773, + "num_input_tokens_seen": 27767008, + "step": 41195 + }, + { + "epoch": 1.0065228544206386, + "grad_norm": 0.04162455350160599, + "learning_rate": 1.938133914335592e-06, + "loss": 0.0522, + "num_input_tokens_seen": 27770528, + "step": 41200 + }, + { + "epoch": 1.0066450052524858, + "grad_norm": 31.05025291442871, + "learning_rate": 1.9381043816227812e-06, + "loss": 0.081, + "num_input_tokens_seen": 27773600, + "step": 41205 + }, + { + "epoch": 1.006767156084333, + "grad_norm": 2.75881290435791, + "learning_rate": 1.9380748420878235e-06, + "loss": 0.0333, + "num_input_tokens_seen": 27776800, + "step": 41210 + }, + { + "epoch": 1.0068893069161802, + "grad_norm": 0.2996499240398407, + "learning_rate": 1.938045295730933e-06, + "loss": 0.003, + "num_input_tokens_seen": 27779872, + "step": 41215 + }, + { + "epoch": 1.0070114577480274, + "grad_norm": 0.05380595102906227, + "learning_rate": 1.9380157425523252e-06, + "loss": 0.07, + "num_input_tokens_seen": 27783264, + "step": 41220 + }, + { + "epoch": 1.0071336085798743, + "grad_norm": 11.7682523727417, + "learning_rate": 1.937986182552214e-06, + "loss": 0.1728, + "num_input_tokens_seen": 27785952, + "step": 41225 + }, + { + "epoch": 1.0072557594117215, + "grad_norm": 0.028988046571612358, + "learning_rate": 1.9379566157308156e-06, + "loss": 0.0718, + "num_input_tokens_seen": 27788832, + "step": 41230 + }, + { + "epoch": 1.0073779102435687, + "grad_norm": 7.975637435913086, + "learning_rate": 1.937927042088344e-06, + "loss": 0.0436, + "num_input_tokens_seen": 27792672, + "step": 41235 + }, + { + "epoch": 1.007500061075416, + "grad_norm": 0.07105622440576553, + "learning_rate": 1.937897461625015e-06, + "loss": 0.0749, + "num_input_tokens_seen": 27796000, + "step": 41240 + }, + { + "epoch": 1.007622211907263, + "grad_norm": 0.10940805077552795, + "learning_rate": 1.9378678743410432e-06, + "loss": 0.0926, + "num_input_tokens_seen": 27799520, + "step": 41245 + }, + { + "epoch": 1.0077443627391103, + "grad_norm": 8.517210960388184, + "learning_rate": 1.937838280236644e-06, + "loss": 0.1212, + "num_input_tokens_seen": 27802720, + "step": 41250 + }, + { + "epoch": 1.0078665135709575, + "grad_norm": 0.07815233618021011, + "learning_rate": 1.9378086793120323e-06, + "loss": 0.0305, + "num_input_tokens_seen": 27806304, + "step": 41255 + }, + { + "epoch": 1.0079886644028047, + "grad_norm": 0.14662419259548187, + "learning_rate": 1.937779071567424e-06, + "loss": 0.0022, + "num_input_tokens_seen": 27810272, + "step": 41260 + }, + { + "epoch": 1.0081108152346518, + "grad_norm": 81.27674102783203, + "learning_rate": 1.937749457003034e-06, + "loss": 0.0504, + "num_input_tokens_seen": 27813792, + "step": 41265 + }, + { + "epoch": 1.0082329660664988, + "grad_norm": 0.04992228373885155, + "learning_rate": 1.9377198356190775e-06, + "loss": 0.0024, + "num_input_tokens_seen": 27817056, + "step": 41270 + }, + { + "epoch": 1.008355116898346, + "grad_norm": 0.1316540688276291, + "learning_rate": 1.93769020741577e-06, + "loss": 0.1432, + "num_input_tokens_seen": 27820384, + "step": 41275 + }, + { + "epoch": 1.0084772677301932, + "grad_norm": 26.181007385253906, + "learning_rate": 1.937660572393328e-06, + "loss": 0.2132, + "num_input_tokens_seen": 27823904, + "step": 41280 + }, + { + "epoch": 1.0085994185620404, + "grad_norm": 16.70596694946289, + "learning_rate": 1.9376309305519653e-06, + "loss": 0.1263, + "num_input_tokens_seen": 27827104, + "step": 41285 + }, + { + "epoch": 1.0087215693938876, + "grad_norm": 0.11371473222970963, + "learning_rate": 1.9376012818918984e-06, + "loss": 0.0712, + "num_input_tokens_seen": 27830624, + "step": 41290 + }, + { + "epoch": 1.0088437202257348, + "grad_norm": 0.513312816619873, + "learning_rate": 1.937571626413343e-06, + "loss": 0.037, + "num_input_tokens_seen": 27833696, + "step": 41295 + }, + { + "epoch": 1.008965871057582, + "grad_norm": 2.094923257827759, + "learning_rate": 1.9375419641165143e-06, + "loss": 0.0138, + "num_input_tokens_seen": 27837536, + "step": 41300 + }, + { + "epoch": 1.0090880218894291, + "grad_norm": 0.1514255851507187, + "learning_rate": 1.9375122950016287e-06, + "loss": 0.1085, + "num_input_tokens_seen": 27841056, + "step": 41305 + }, + { + "epoch": 1.0092101727212763, + "grad_norm": 0.4462115466594696, + "learning_rate": 1.9374826190689013e-06, + "loss": 0.0351, + "num_input_tokens_seen": 27844448, + "step": 41310 + }, + { + "epoch": 1.0093323235531233, + "grad_norm": 1.3606981039047241, + "learning_rate": 1.937452936318548e-06, + "loss": 0.0918, + "num_input_tokens_seen": 27847520, + "step": 41315 + }, + { + "epoch": 1.0094544743849705, + "grad_norm": 26.182024002075195, + "learning_rate": 1.937423246750785e-06, + "loss": 0.0846, + "num_input_tokens_seen": 27850976, + "step": 41320 + }, + { + "epoch": 1.0095766252168177, + "grad_norm": 11.671547889709473, + "learning_rate": 1.937393550365828e-06, + "loss": 0.0609, + "num_input_tokens_seen": 27853856, + "step": 41325 + }, + { + "epoch": 1.0096987760486649, + "grad_norm": 14.877876281738281, + "learning_rate": 1.9373638471638925e-06, + "loss": 0.0724, + "num_input_tokens_seen": 27857376, + "step": 41330 + }, + { + "epoch": 1.009820926880512, + "grad_norm": 4.672638893127441, + "learning_rate": 1.9373341371451956e-06, + "loss": 0.0478, + "num_input_tokens_seen": 27860704, + "step": 41335 + }, + { + "epoch": 1.0099430777123592, + "grad_norm": 31.55280876159668, + "learning_rate": 1.9373044203099527e-06, + "loss": 0.279, + "num_input_tokens_seen": 27864096, + "step": 41340 + }, + { + "epoch": 1.0100652285442064, + "grad_norm": 0.4305602014064789, + "learning_rate": 1.93727469665838e-06, + "loss": 0.0546, + "num_input_tokens_seen": 27867616, + "step": 41345 + }, + { + "epoch": 1.0101873793760536, + "grad_norm": 0.4437588155269623, + "learning_rate": 1.937244966190693e-06, + "loss": 0.2221, + "num_input_tokens_seen": 27871520, + "step": 41350 + }, + { + "epoch": 1.0103095302079008, + "grad_norm": 0.3621104955673218, + "learning_rate": 1.937215228907109e-06, + "loss": 0.0076, + "num_input_tokens_seen": 27874720, + "step": 41355 + }, + { + "epoch": 1.0104316810397478, + "grad_norm": 1.216752290725708, + "learning_rate": 1.9371854848078434e-06, + "loss": 0.0873, + "num_input_tokens_seen": 27877600, + "step": 41360 + }, + { + "epoch": 1.010553831871595, + "grad_norm": 7.180300235748291, + "learning_rate": 1.9371557338931133e-06, + "loss": 0.1743, + "num_input_tokens_seen": 27881120, + "step": 41365 + }, + { + "epoch": 1.0106759827034422, + "grad_norm": 0.08157268166542053, + "learning_rate": 1.9371259761631346e-06, + "loss": 0.1012, + "num_input_tokens_seen": 27884832, + "step": 41370 + }, + { + "epoch": 1.0107981335352894, + "grad_norm": 141.767333984375, + "learning_rate": 1.9370962116181235e-06, + "loss": 0.0264, + "num_input_tokens_seen": 27888096, + "step": 41375 + }, + { + "epoch": 1.0109202843671365, + "grad_norm": 36.21950149536133, + "learning_rate": 1.9370664402582966e-06, + "loss": 0.2076, + "num_input_tokens_seen": 27890912, + "step": 41380 + }, + { + "epoch": 1.0110424351989837, + "grad_norm": 0.8839470148086548, + "learning_rate": 1.937036662083871e-06, + "loss": 0.0556, + "num_input_tokens_seen": 27894176, + "step": 41385 + }, + { + "epoch": 1.011164586030831, + "grad_norm": 2.825273275375366, + "learning_rate": 1.9370068770950626e-06, + "loss": 0.1091, + "num_input_tokens_seen": 27897376, + "step": 41390 + }, + { + "epoch": 1.0112867368626781, + "grad_norm": 0.04474136605858803, + "learning_rate": 1.936977085292088e-06, + "loss": 0.0532, + "num_input_tokens_seen": 27900832, + "step": 41395 + }, + { + "epoch": 1.0114088876945253, + "grad_norm": 0.22467297315597534, + "learning_rate": 1.936947286675164e-06, + "loss": 0.0966, + "num_input_tokens_seen": 27904288, + "step": 41400 + }, + { + "epoch": 1.0115310385263723, + "grad_norm": 0.5134358406066895, + "learning_rate": 1.9369174812445073e-06, + "loss": 0.0164, + "num_input_tokens_seen": 27907744, + "step": 41405 + }, + { + "epoch": 1.0116531893582195, + "grad_norm": 0.31668102741241455, + "learning_rate": 1.9368876690003347e-06, + "loss": 0.0632, + "num_input_tokens_seen": 27911456, + "step": 41410 + }, + { + "epoch": 1.0117753401900667, + "grad_norm": 0.5316899418830872, + "learning_rate": 1.936857849942863e-06, + "loss": 0.044, + "num_input_tokens_seen": 27914912, + "step": 41415 + }, + { + "epoch": 1.0118974910219138, + "grad_norm": 0.7631782293319702, + "learning_rate": 1.9368280240723093e-06, + "loss": 0.1014, + "num_input_tokens_seen": 27917856, + "step": 41420 + }, + { + "epoch": 1.012019641853761, + "grad_norm": 0.3482151925563812, + "learning_rate": 1.93679819138889e-06, + "loss": 0.0542, + "num_input_tokens_seen": 27921760, + "step": 41425 + }, + { + "epoch": 1.0121417926856082, + "grad_norm": 65.02391052246094, + "learning_rate": 1.9367683518928226e-06, + "loss": 0.0742, + "num_input_tokens_seen": 27925344, + "step": 41430 + }, + { + "epoch": 1.0122639435174554, + "grad_norm": 0.10657556354999542, + "learning_rate": 1.9367385055843234e-06, + "loss": 0.1166, + "num_input_tokens_seen": 27928672, + "step": 41435 + }, + { + "epoch": 1.0123860943493026, + "grad_norm": 102.85113525390625, + "learning_rate": 1.93670865246361e-06, + "loss": 0.0719, + "num_input_tokens_seen": 27931872, + "step": 41440 + }, + { + "epoch": 1.0125082451811498, + "grad_norm": 17.976972579956055, + "learning_rate": 1.9366787925308992e-06, + "loss": 0.0874, + "num_input_tokens_seen": 27935264, + "step": 41445 + }, + { + "epoch": 1.0126303960129968, + "grad_norm": 0.10465455800294876, + "learning_rate": 1.9366489257864084e-06, + "loss": 0.0223, + "num_input_tokens_seen": 27938528, + "step": 41450 + }, + { + "epoch": 1.012752546844844, + "grad_norm": 0.28243395686149597, + "learning_rate": 1.9366190522303543e-06, + "loss": 0.0771, + "num_input_tokens_seen": 27942112, + "step": 41455 + }, + { + "epoch": 1.0128746976766911, + "grad_norm": 28.175445556640625, + "learning_rate": 1.936589171862955e-06, + "loss": 0.1288, + "num_input_tokens_seen": 27945376, + "step": 41460 + }, + { + "epoch": 1.0129968485085383, + "grad_norm": 0.10519753396511078, + "learning_rate": 1.936559284684427e-06, + "loss": 0.0402, + "num_input_tokens_seen": 27949152, + "step": 41465 + }, + { + "epoch": 1.0131189993403855, + "grad_norm": 0.23309500515460968, + "learning_rate": 1.9365293906949885e-06, + "loss": 0.0106, + "num_input_tokens_seen": 27952416, + "step": 41470 + }, + { + "epoch": 1.0132411501722327, + "grad_norm": 0.46023571491241455, + "learning_rate": 1.9364994898948557e-06, + "loss": 0.1945, + "num_input_tokens_seen": 27955936, + "step": 41475 + }, + { + "epoch": 1.01336330100408, + "grad_norm": 0.044198326766490936, + "learning_rate": 1.9364695822842473e-06, + "loss": 0.1181, + "num_input_tokens_seen": 27959328, + "step": 41480 + }, + { + "epoch": 1.013485451835927, + "grad_norm": 0.14169353246688843, + "learning_rate": 1.93643966786338e-06, + "loss": 0.0289, + "num_input_tokens_seen": 27962912, + "step": 41485 + }, + { + "epoch": 1.0136076026677743, + "grad_norm": 14.789438247680664, + "learning_rate": 1.9364097466324717e-06, + "loss": 0.1157, + "num_input_tokens_seen": 27966176, + "step": 41490 + }, + { + "epoch": 1.0137297534996212, + "grad_norm": 42.44548797607422, + "learning_rate": 1.9363798185917394e-06, + "loss": 0.0885, + "num_input_tokens_seen": 27969440, + "step": 41495 + }, + { + "epoch": 1.0138519043314684, + "grad_norm": 0.3688395321369171, + "learning_rate": 1.936349883741402e-06, + "loss": 0.1333, + "num_input_tokens_seen": 27972704, + "step": 41500 + }, + { + "epoch": 1.0139740551633156, + "grad_norm": 0.2943406403064728, + "learning_rate": 1.9363199420816753e-06, + "loss": 0.0017, + "num_input_tokens_seen": 27975904, + "step": 41505 + }, + { + "epoch": 1.0140962059951628, + "grad_norm": 0.2473595291376114, + "learning_rate": 1.936289993612779e-06, + "loss": 0.0493, + "num_input_tokens_seen": 27979424, + "step": 41510 + }, + { + "epoch": 1.01421835682701, + "grad_norm": 47.705326080322266, + "learning_rate": 1.9362600383349296e-06, + "loss": 0.0614, + "num_input_tokens_seen": 27982752, + "step": 41515 + }, + { + "epoch": 1.0143405076588572, + "grad_norm": 0.3024010956287384, + "learning_rate": 1.936230076248346e-06, + "loss": 0.0367, + "num_input_tokens_seen": 27986080, + "step": 41520 + }, + { + "epoch": 1.0144626584907044, + "grad_norm": 0.04337543249130249, + "learning_rate": 1.9362001073532448e-06, + "loss": 0.1619, + "num_input_tokens_seen": 27989344, + "step": 41525 + }, + { + "epoch": 1.0145848093225516, + "grad_norm": 28.89052963256836, + "learning_rate": 1.936170131649845e-06, + "loss": 0.1091, + "num_input_tokens_seen": 27993184, + "step": 41530 + }, + { + "epoch": 1.0147069601543985, + "grad_norm": 12.181092262268066, + "learning_rate": 1.936140149138364e-06, + "loss": 0.009, + "num_input_tokens_seen": 27996512, + "step": 41535 + }, + { + "epoch": 1.0148291109862457, + "grad_norm": 0.18677693605422974, + "learning_rate": 1.93611015981902e-06, + "loss": 0.0661, + "num_input_tokens_seen": 27999840, + "step": 41540 + }, + { + "epoch": 1.014951261818093, + "grad_norm": 1.2122114896774292, + "learning_rate": 1.936080163692031e-06, + "loss": 0.0219, + "num_input_tokens_seen": 28003296, + "step": 41545 + }, + { + "epoch": 1.01507341264994, + "grad_norm": 0.7211357355117798, + "learning_rate": 1.9360501607576155e-06, + "loss": 0.0188, + "num_input_tokens_seen": 28006816, + "step": 41550 + }, + { + "epoch": 1.0151955634817873, + "grad_norm": 0.09014793485403061, + "learning_rate": 1.9360201510159917e-06, + "loss": 0.0162, + "num_input_tokens_seen": 28010144, + "step": 41555 + }, + { + "epoch": 1.0153177143136345, + "grad_norm": 23.518695831298828, + "learning_rate": 1.9359901344673773e-06, + "loss": 0.0835, + "num_input_tokens_seen": 28013344, + "step": 41560 + }, + { + "epoch": 1.0154398651454817, + "grad_norm": 0.16022305190563202, + "learning_rate": 1.935960111111991e-06, + "loss": 0.08, + "num_input_tokens_seen": 28016864, + "step": 41565 + }, + { + "epoch": 1.0155620159773289, + "grad_norm": 87.44234466552734, + "learning_rate": 1.935930080950051e-06, + "loss": 0.0109, + "num_input_tokens_seen": 28020128, + "step": 41570 + }, + { + "epoch": 1.015684166809176, + "grad_norm": 1.3785067796707153, + "learning_rate": 1.9359000439817758e-06, + "loss": 0.0855, + "num_input_tokens_seen": 28023520, + "step": 41575 + }, + { + "epoch": 1.015806317641023, + "grad_norm": 9.503249168395996, + "learning_rate": 1.9358700002073833e-06, + "loss": 0.2021, + "num_input_tokens_seen": 28026592, + "step": 41580 + }, + { + "epoch": 1.0159284684728702, + "grad_norm": 0.3659454882144928, + "learning_rate": 1.935839949627093e-06, + "loss": 0.0022, + "num_input_tokens_seen": 28029920, + "step": 41585 + }, + { + "epoch": 1.0160506193047174, + "grad_norm": 3.5858285427093506, + "learning_rate": 1.9358098922411224e-06, + "loss": 0.0185, + "num_input_tokens_seen": 28033312, + "step": 41590 + }, + { + "epoch": 1.0161727701365646, + "grad_norm": 29.0914363861084, + "learning_rate": 1.935779828049691e-06, + "loss": 0.1054, + "num_input_tokens_seen": 28036704, + "step": 41595 + }, + { + "epoch": 1.0162949209684118, + "grad_norm": 0.0005874556954950094, + "learning_rate": 1.935749757053017e-06, + "loss": 0.1035, + "num_input_tokens_seen": 28040544, + "step": 41600 + }, + { + "epoch": 1.016417071800259, + "grad_norm": 25.869298934936523, + "learning_rate": 1.9357196792513188e-06, + "loss": 0.1913, + "num_input_tokens_seen": 28044064, + "step": 41605 + }, + { + "epoch": 1.0165392226321062, + "grad_norm": 4.090465068817139, + "learning_rate": 1.9356895946448154e-06, + "loss": 0.0951, + "num_input_tokens_seen": 28047776, + "step": 41610 + }, + { + "epoch": 1.0166613734639534, + "grad_norm": 0.31763580441474915, + "learning_rate": 1.9356595032337257e-06, + "loss": 0.1271, + "num_input_tokens_seen": 28050912, + "step": 41615 + }, + { + "epoch": 1.0167835242958005, + "grad_norm": 18.087068557739258, + "learning_rate": 1.935629405018269e-06, + "loss": 0.1427, + "num_input_tokens_seen": 28054304, + "step": 41620 + }, + { + "epoch": 1.0169056751276475, + "grad_norm": 45.21240997314453, + "learning_rate": 1.9355992999986627e-06, + "loss": 0.1011, + "num_input_tokens_seen": 28058144, + "step": 41625 + }, + { + "epoch": 1.0170278259594947, + "grad_norm": 81.54298400878906, + "learning_rate": 1.9355691881751272e-06, + "loss": 0.1637, + "num_input_tokens_seen": 28061728, + "step": 41630 + }, + { + "epoch": 1.017149976791342, + "grad_norm": 0.40756484866142273, + "learning_rate": 1.9355390695478805e-06, + "loss": 0.0612, + "num_input_tokens_seen": 28065376, + "step": 41635 + }, + { + "epoch": 1.017272127623189, + "grad_norm": 11.844685554504395, + "learning_rate": 1.9355089441171423e-06, + "loss": 0.1635, + "num_input_tokens_seen": 28069216, + "step": 41640 + }, + { + "epoch": 1.0173942784550363, + "grad_norm": 0.27210983633995056, + "learning_rate": 1.935478811883131e-06, + "loss": 0.0512, + "num_input_tokens_seen": 28072416, + "step": 41645 + }, + { + "epoch": 1.0175164292868835, + "grad_norm": 36.61519241333008, + "learning_rate": 1.935448672846067e-06, + "loss": 0.1672, + "num_input_tokens_seen": 28075488, + "step": 41650 + }, + { + "epoch": 1.0176385801187307, + "grad_norm": 13.378524780273438, + "learning_rate": 1.935418527006168e-06, + "loss": 0.1745, + "num_input_tokens_seen": 28078688, + "step": 41655 + }, + { + "epoch": 1.0177607309505778, + "grad_norm": 4.558134078979492, + "learning_rate": 1.9353883743636542e-06, + "loss": 0.0607, + "num_input_tokens_seen": 28081824, + "step": 41660 + }, + { + "epoch": 1.017882881782425, + "grad_norm": 31.944599151611328, + "learning_rate": 1.9353582149187444e-06, + "loss": 0.2085, + "num_input_tokens_seen": 28085408, + "step": 41665 + }, + { + "epoch": 1.018005032614272, + "grad_norm": 0.09376510232686996, + "learning_rate": 1.935328048671658e-06, + "loss": 0.0043, + "num_input_tokens_seen": 28089184, + "step": 41670 + }, + { + "epoch": 1.0181271834461192, + "grad_norm": 1.3944560289382935, + "learning_rate": 1.935297875622615e-06, + "loss": 0.0094, + "num_input_tokens_seen": 28092384, + "step": 41675 + }, + { + "epoch": 1.0182493342779664, + "grad_norm": 10.583409309387207, + "learning_rate": 1.9352676957718335e-06, + "loss": 0.201, + "num_input_tokens_seen": 28095520, + "step": 41680 + }, + { + "epoch": 1.0183714851098136, + "grad_norm": 0.2167307585477829, + "learning_rate": 1.9352375091195343e-06, + "loss": 0.0463, + "num_input_tokens_seen": 28098656, + "step": 41685 + }, + { + "epoch": 1.0184936359416608, + "grad_norm": 7.407865524291992, + "learning_rate": 1.935207315665936e-06, + "loss": 0.0568, + "num_input_tokens_seen": 28102752, + "step": 41690 + }, + { + "epoch": 1.018615786773508, + "grad_norm": 0.38910236954689026, + "learning_rate": 1.935177115411259e-06, + "loss": 0.048, + "num_input_tokens_seen": 28106592, + "step": 41695 + }, + { + "epoch": 1.0187379376053551, + "grad_norm": 10.388458251953125, + "learning_rate": 1.9351469083557223e-06, + "loss": 0.0385, + "num_input_tokens_seen": 28109856, + "step": 41700 + }, + { + "epoch": 1.0188600884372023, + "grad_norm": 73.03234100341797, + "learning_rate": 1.935116694499546e-06, + "loss": 0.0921, + "num_input_tokens_seen": 28113120, + "step": 41705 + }, + { + "epoch": 1.0189822392690495, + "grad_norm": 13.084054946899414, + "learning_rate": 1.9350864738429493e-06, + "loss": 0.0033, + "num_input_tokens_seen": 28116448, + "step": 41710 + }, + { + "epoch": 1.0191043901008965, + "grad_norm": 0.3512897491455078, + "learning_rate": 1.9350562463861524e-06, + "loss": 0.0259, + "num_input_tokens_seen": 28120096, + "step": 41715 + }, + { + "epoch": 1.0192265409327437, + "grad_norm": 0.3952520489692688, + "learning_rate": 1.9350260121293746e-06, + "loss": 0.0799, + "num_input_tokens_seen": 28123616, + "step": 41720 + }, + { + "epoch": 1.0193486917645909, + "grad_norm": 17.376245498657227, + "learning_rate": 1.9349957710728365e-06, + "loss": 0.0821, + "num_input_tokens_seen": 28126880, + "step": 41725 + }, + { + "epoch": 1.019470842596438, + "grad_norm": 52.214195251464844, + "learning_rate": 1.9349655232167575e-06, + "loss": 0.0694, + "num_input_tokens_seen": 28130528, + "step": 41730 + }, + { + "epoch": 1.0195929934282852, + "grad_norm": 2.797252893447876, + "learning_rate": 1.934935268561358e-06, + "loss": 0.1287, + "num_input_tokens_seen": 28133984, + "step": 41735 + }, + { + "epoch": 1.0197151442601324, + "grad_norm": 4.020275592803955, + "learning_rate": 1.9349050071068574e-06, + "loss": 0.0028, + "num_input_tokens_seen": 28137632, + "step": 41740 + }, + { + "epoch": 1.0198372950919796, + "grad_norm": 27.068878173828125, + "learning_rate": 1.9348747388534763e-06, + "loss": 0.1427, + "num_input_tokens_seen": 28141408, + "step": 41745 + }, + { + "epoch": 1.0199594459238268, + "grad_norm": 0.11553628742694855, + "learning_rate": 1.9348444638014343e-06, + "loss": 0.0793, + "num_input_tokens_seen": 28145184, + "step": 41750 + }, + { + "epoch": 1.020081596755674, + "grad_norm": 2.73980712890625, + "learning_rate": 1.9348141819509522e-06, + "loss": 0.0018, + "num_input_tokens_seen": 28148384, + "step": 41755 + }, + { + "epoch": 1.020203747587521, + "grad_norm": 41.98025131225586, + "learning_rate": 1.93478389330225e-06, + "loss": 0.2052, + "num_input_tokens_seen": 28151840, + "step": 41760 + }, + { + "epoch": 1.0203258984193682, + "grad_norm": 0.1199553906917572, + "learning_rate": 1.934753597855548e-06, + "loss": 0.0889, + "num_input_tokens_seen": 28155104, + "step": 41765 + }, + { + "epoch": 1.0204480492512153, + "grad_norm": 0.15397368371486664, + "learning_rate": 1.9347232956110663e-06, + "loss": 0.0625, + "num_input_tokens_seen": 28158048, + "step": 41770 + }, + { + "epoch": 1.0205702000830625, + "grad_norm": 25.988941192626953, + "learning_rate": 1.9346929865690258e-06, + "loss": 0.1216, + "num_input_tokens_seen": 28161440, + "step": 41775 + }, + { + "epoch": 1.0206923509149097, + "grad_norm": 0.09182299673557281, + "learning_rate": 1.934662670729646e-06, + "loss": 0.0483, + "num_input_tokens_seen": 28165216, + "step": 41780 + }, + { + "epoch": 1.020814501746757, + "grad_norm": 56.44713592529297, + "learning_rate": 1.9346323480931475e-06, + "loss": 0.0602, + "num_input_tokens_seen": 28168672, + "step": 41785 + }, + { + "epoch": 1.020936652578604, + "grad_norm": 0.13070183992385864, + "learning_rate": 1.934602018659752e-06, + "loss": 0.1541, + "num_input_tokens_seen": 28172128, + "step": 41790 + }, + { + "epoch": 1.0210588034104513, + "grad_norm": 2.3567676544189453, + "learning_rate": 1.9345716824296787e-06, + "loss": 0.1585, + "num_input_tokens_seen": 28177696, + "step": 41795 + }, + { + "epoch": 1.0211809542422985, + "grad_norm": 0.1473759561777115, + "learning_rate": 1.9345413394031487e-06, + "loss": 0.0411, + "num_input_tokens_seen": 28181216, + "step": 41800 + }, + { + "epoch": 1.0213031050741455, + "grad_norm": 0.11678832769393921, + "learning_rate": 1.9345109895803834e-06, + "loss": 0.0372, + "num_input_tokens_seen": 28184352, + "step": 41805 + }, + { + "epoch": 1.0214252559059926, + "grad_norm": 89.31144714355469, + "learning_rate": 1.934480632961602e-06, + "loss": 0.0806, + "num_input_tokens_seen": 28187616, + "step": 41810 + }, + { + "epoch": 1.0215474067378398, + "grad_norm": 0.1610347330570221, + "learning_rate": 1.9344502695470268e-06, + "loss": 0.1387, + "num_input_tokens_seen": 28191136, + "step": 41815 + }, + { + "epoch": 1.021669557569687, + "grad_norm": 9.442001342773438, + "learning_rate": 1.9344198993368776e-06, + "loss": 0.0032, + "num_input_tokens_seen": 28194336, + "step": 41820 + }, + { + "epoch": 1.0217917084015342, + "grad_norm": 0.17783841490745544, + "learning_rate": 1.9343895223313753e-06, + "loss": 0.094, + "num_input_tokens_seen": 28198240, + "step": 41825 + }, + { + "epoch": 1.0219138592333814, + "grad_norm": 0.03595606982707977, + "learning_rate": 1.9343591385307414e-06, + "loss": 0.105, + "num_input_tokens_seen": 28201248, + "step": 41830 + }, + { + "epoch": 1.0220360100652286, + "grad_norm": 11.369623184204102, + "learning_rate": 1.9343287479351964e-06, + "loss": 0.1136, + "num_input_tokens_seen": 28204448, + "step": 41835 + }, + { + "epoch": 1.0221581608970758, + "grad_norm": 44.781982421875, + "learning_rate": 1.9342983505449615e-06, + "loss": 0.1168, + "num_input_tokens_seen": 28207520, + "step": 41840 + }, + { + "epoch": 1.022280311728923, + "grad_norm": 0.11527753621339798, + "learning_rate": 1.9342679463602578e-06, + "loss": 0.1676, + "num_input_tokens_seen": 28210848, + "step": 41845 + }, + { + "epoch": 1.02240246256077, + "grad_norm": 8.457847595214844, + "learning_rate": 1.9342375353813062e-06, + "loss": 0.1082, + "num_input_tokens_seen": 28214496, + "step": 41850 + }, + { + "epoch": 1.0225246133926171, + "grad_norm": 3.7740113735198975, + "learning_rate": 1.9342071176083276e-06, + "loss": 0.0846, + "num_input_tokens_seen": 28217824, + "step": 41855 + }, + { + "epoch": 1.0226467642244643, + "grad_norm": 9.33716869354248, + "learning_rate": 1.934176693041544e-06, + "loss": 0.0428, + "num_input_tokens_seen": 28220896, + "step": 41860 + }, + { + "epoch": 1.0227689150563115, + "grad_norm": 8.659668922424316, + "learning_rate": 1.9341462616811765e-06, + "loss": 0.1289, + "num_input_tokens_seen": 28224544, + "step": 41865 + }, + { + "epoch": 1.0228910658881587, + "grad_norm": 36.60452651977539, + "learning_rate": 1.9341158235274455e-06, + "loss": 0.0307, + "num_input_tokens_seen": 28227360, + "step": 41870 + }, + { + "epoch": 1.023013216720006, + "grad_norm": 2.6350722312927246, + "learning_rate": 1.9340853785805733e-06, + "loss": 0.0926, + "num_input_tokens_seen": 28230752, + "step": 41875 + }, + { + "epoch": 1.023135367551853, + "grad_norm": 33.65208053588867, + "learning_rate": 1.934054926840781e-06, + "loss": 0.0647, + "num_input_tokens_seen": 28233888, + "step": 41880 + }, + { + "epoch": 1.0232575183837003, + "grad_norm": 5.014811992645264, + "learning_rate": 1.9340244683082898e-06, + "loss": 0.0762, + "num_input_tokens_seen": 28236960, + "step": 41885 + }, + { + "epoch": 1.0233796692155475, + "grad_norm": 0.15645256638526917, + "learning_rate": 1.933994002983322e-06, + "loss": 0.0394, + "num_input_tokens_seen": 28240480, + "step": 41890 + }, + { + "epoch": 1.0235018200473944, + "grad_norm": 41.017433166503906, + "learning_rate": 1.933963530866098e-06, + "loss": 0.0261, + "num_input_tokens_seen": 28243680, + "step": 41895 + }, + { + "epoch": 1.0236239708792416, + "grad_norm": 0.6035808324813843, + "learning_rate": 1.93393305195684e-06, + "loss": 0.095, + "num_input_tokens_seen": 28247456, + "step": 41900 + }, + { + "epoch": 1.0237461217110888, + "grad_norm": 0.4977245628833771, + "learning_rate": 1.93390256625577e-06, + "loss": 0.0239, + "num_input_tokens_seen": 28250720, + "step": 41905 + }, + { + "epoch": 1.023868272542936, + "grad_norm": 12.085318565368652, + "learning_rate": 1.9338720737631094e-06, + "loss": 0.0914, + "num_input_tokens_seen": 28253792, + "step": 41910 + }, + { + "epoch": 1.0239904233747832, + "grad_norm": 52.246482849121094, + "learning_rate": 1.9338415744790796e-06, + "loss": 0.0252, + "num_input_tokens_seen": 28257248, + "step": 41915 + }, + { + "epoch": 1.0241125742066304, + "grad_norm": 0.08823520690202713, + "learning_rate": 1.933811068403903e-06, + "loss": 0.1487, + "num_input_tokens_seen": 28260832, + "step": 41920 + }, + { + "epoch": 1.0242347250384776, + "grad_norm": 2.159844160079956, + "learning_rate": 1.933780555537801e-06, + "loss": 0.033, + "num_input_tokens_seen": 28264160, + "step": 41925 + }, + { + "epoch": 1.0243568758703248, + "grad_norm": 67.18186950683594, + "learning_rate": 1.9337500358809953e-06, + "loss": 0.1593, + "num_input_tokens_seen": 28267616, + "step": 41930 + }, + { + "epoch": 1.024479026702172, + "grad_norm": 0.1259065419435501, + "learning_rate": 1.9337195094337086e-06, + "loss": 0.0448, + "num_input_tokens_seen": 28270624, + "step": 41935 + }, + { + "epoch": 1.024601177534019, + "grad_norm": 0.07436679303646088, + "learning_rate": 1.9336889761961627e-06, + "loss": 0.0556, + "num_input_tokens_seen": 28273568, + "step": 41940 + }, + { + "epoch": 1.024723328365866, + "grad_norm": 11.188752174377441, + "learning_rate": 1.933658436168579e-06, + "loss": 0.1861, + "num_input_tokens_seen": 28276704, + "step": 41945 + }, + { + "epoch": 1.0248454791977133, + "grad_norm": 69.03654479980469, + "learning_rate": 1.93362788935118e-06, + "loss": 0.057, + "num_input_tokens_seen": 28279904, + "step": 41950 + }, + { + "epoch": 1.0249676300295605, + "grad_norm": 2.1229348182678223, + "learning_rate": 1.933597335744188e-06, + "loss": 0.1571, + "num_input_tokens_seen": 28283744, + "step": 41955 + }, + { + "epoch": 1.0250897808614077, + "grad_norm": 37.386741638183594, + "learning_rate": 1.933566775347825e-06, + "loss": 0.0467, + "num_input_tokens_seen": 28286944, + "step": 41960 + }, + { + "epoch": 1.0252119316932549, + "grad_norm": 0.11623575538396835, + "learning_rate": 1.9335362081623134e-06, + "loss": 0.0018, + "num_input_tokens_seen": 28290464, + "step": 41965 + }, + { + "epoch": 1.025334082525102, + "grad_norm": 67.46884155273438, + "learning_rate": 1.9335056341878754e-06, + "loss": 0.1227, + "num_input_tokens_seen": 28294816, + "step": 41970 + }, + { + "epoch": 1.0254562333569492, + "grad_norm": 0.10298215597867966, + "learning_rate": 1.9334750534247335e-06, + "loss": 0.0017, + "num_input_tokens_seen": 28299808, + "step": 41975 + }, + { + "epoch": 1.0255783841887964, + "grad_norm": 0.2926267981529236, + "learning_rate": 1.9334444658731095e-06, + "loss": 0.1451, + "num_input_tokens_seen": 28303136, + "step": 41980 + }, + { + "epoch": 1.0257005350206434, + "grad_norm": 0.856563150882721, + "learning_rate": 1.9334138715332267e-06, + "loss": 0.1345, + "num_input_tokens_seen": 28306272, + "step": 41985 + }, + { + "epoch": 1.0258226858524906, + "grad_norm": 0.43083885312080383, + "learning_rate": 1.933383270405307e-06, + "loss": 0.0926, + "num_input_tokens_seen": 28309472, + "step": 41990 + }, + { + "epoch": 1.0259448366843378, + "grad_norm": 2.0180540084838867, + "learning_rate": 1.933352662489573e-06, + "loss": 0.0502, + "num_input_tokens_seen": 28312480, + "step": 41995 + }, + { + "epoch": 1.026066987516185, + "grad_norm": 1.1164207458496094, + "learning_rate": 1.9333220477862476e-06, + "loss": 0.0655, + "num_input_tokens_seen": 28316192, + "step": 42000 + }, + { + "epoch": 1.0261891383480322, + "grad_norm": 0.37522655725479126, + "learning_rate": 1.9332914262955533e-06, + "loss": 0.0018, + "num_input_tokens_seen": 28320032, + "step": 42005 + }, + { + "epoch": 1.0263112891798793, + "grad_norm": 0.19359171390533447, + "learning_rate": 1.9332607980177124e-06, + "loss": 0.0015, + "num_input_tokens_seen": 28323936, + "step": 42010 + }, + { + "epoch": 1.0264334400117265, + "grad_norm": 47.79861068725586, + "learning_rate": 1.9332301629529484e-06, + "loss": 0.1486, + "num_input_tokens_seen": 28327072, + "step": 42015 + }, + { + "epoch": 1.0265555908435737, + "grad_norm": 9.178513526916504, + "learning_rate": 1.9331995211014833e-06, + "loss": 0.1867, + "num_input_tokens_seen": 28330272, + "step": 42020 + }, + { + "epoch": 1.026677741675421, + "grad_norm": 61.96234893798828, + "learning_rate": 1.9331688724635406e-06, + "loss": 0.0235, + "num_input_tokens_seen": 28333792, + "step": 42025 + }, + { + "epoch": 1.0267998925072679, + "grad_norm": 32.31514358520508, + "learning_rate": 1.9331382170393424e-06, + "loss": 0.0892, + "num_input_tokens_seen": 28337056, + "step": 42030 + }, + { + "epoch": 1.026922043339115, + "grad_norm": 7.391241073608398, + "learning_rate": 1.9331075548291125e-06, + "loss": 0.0346, + "num_input_tokens_seen": 28340576, + "step": 42035 + }, + { + "epoch": 1.0270441941709623, + "grad_norm": 23.004222869873047, + "learning_rate": 1.933076885833073e-06, + "loss": 0.0051, + "num_input_tokens_seen": 28343776, + "step": 42040 + }, + { + "epoch": 1.0271663450028095, + "grad_norm": 30.365352630615234, + "learning_rate": 1.933046210051448e-06, + "loss": 0.0485, + "num_input_tokens_seen": 28347232, + "step": 42045 + }, + { + "epoch": 1.0272884958346566, + "grad_norm": 0.05469789728522301, + "learning_rate": 1.9330155274844597e-06, + "loss": 0.1192, + "num_input_tokens_seen": 28350752, + "step": 42050 + }, + { + "epoch": 1.0274106466665038, + "grad_norm": 0.6367778182029724, + "learning_rate": 1.9329848381323318e-06, + "loss": 0.1301, + "num_input_tokens_seen": 28353952, + "step": 42055 + }, + { + "epoch": 1.027532797498351, + "grad_norm": 0.13280487060546875, + "learning_rate": 1.932954141995287e-06, + "loss": 0.0456, + "num_input_tokens_seen": 28358304, + "step": 42060 + }, + { + "epoch": 1.0276549483301982, + "grad_norm": 7.7528486251831055, + "learning_rate": 1.932923439073549e-06, + "loss": 0.1302, + "num_input_tokens_seen": 28361056, + "step": 42065 + }, + { + "epoch": 1.0277770991620452, + "grad_norm": 0.5485358834266663, + "learning_rate": 1.93289272936734e-06, + "loss": 0.091, + "num_input_tokens_seen": 28364256, + "step": 42070 + }, + { + "epoch": 1.0278992499938924, + "grad_norm": 27.97885513305664, + "learning_rate": 1.932862012876885e-06, + "loss": 0.1344, + "num_input_tokens_seen": 28367584, + "step": 42075 + }, + { + "epoch": 1.0280214008257396, + "grad_norm": 1.5125770568847656, + "learning_rate": 1.9328312896024063e-06, + "loss": 0.0863, + "num_input_tokens_seen": 28370976, + "step": 42080 + }, + { + "epoch": 1.0281435516575868, + "grad_norm": 77.2655029296875, + "learning_rate": 1.932800559544127e-06, + "loss": 0.0371, + "num_input_tokens_seen": 28374496, + "step": 42085 + }, + { + "epoch": 1.028265702489434, + "grad_norm": 1.292657494544983, + "learning_rate": 1.932769822702272e-06, + "loss": 0.0506, + "num_input_tokens_seen": 28377696, + "step": 42090 + }, + { + "epoch": 1.0283878533212811, + "grad_norm": 0.18110032379627228, + "learning_rate": 1.9327390790770636e-06, + "loss": 0.1733, + "num_input_tokens_seen": 28381216, + "step": 42095 + }, + { + "epoch": 1.0285100041531283, + "grad_norm": 19.233768463134766, + "learning_rate": 1.9327083286687256e-06, + "loss": 0.0751, + "num_input_tokens_seen": 28384544, + "step": 42100 + }, + { + "epoch": 1.0286321549849755, + "grad_norm": 23.182018280029297, + "learning_rate": 1.932677571477482e-06, + "loss": 0.2169, + "num_input_tokens_seen": 28388064, + "step": 42105 + }, + { + "epoch": 1.0287543058168227, + "grad_norm": 50.74353790283203, + "learning_rate": 1.9326468075035564e-06, + "loss": 0.1519, + "num_input_tokens_seen": 28391200, + "step": 42110 + }, + { + "epoch": 1.0288764566486697, + "grad_norm": 0.3187883794307709, + "learning_rate": 1.932616036747172e-06, + "loss": 0.0666, + "num_input_tokens_seen": 28394528, + "step": 42115 + }, + { + "epoch": 1.0289986074805169, + "grad_norm": 9.614219665527344, + "learning_rate": 1.932585259208553e-06, + "loss": 0.0394, + "num_input_tokens_seen": 28398048, + "step": 42120 + }, + { + "epoch": 1.029120758312364, + "grad_norm": 0.8373148441314697, + "learning_rate": 1.932554474887923e-06, + "loss": 0.0299, + "num_input_tokens_seen": 28401440, + "step": 42125 + }, + { + "epoch": 1.0292429091442112, + "grad_norm": 51.213287353515625, + "learning_rate": 1.9325236837855068e-06, + "loss": 0.0323, + "num_input_tokens_seen": 28404576, + "step": 42130 + }, + { + "epoch": 1.0293650599760584, + "grad_norm": 0.054223284125328064, + "learning_rate": 1.932492885901527e-06, + "loss": 0.1606, + "num_input_tokens_seen": 28407904, + "step": 42135 + }, + { + "epoch": 1.0294872108079056, + "grad_norm": 0.13917988538742065, + "learning_rate": 1.932462081236208e-06, + "loss": 0.0915, + "num_input_tokens_seen": 28410912, + "step": 42140 + }, + { + "epoch": 1.0296093616397528, + "grad_norm": 91.04314422607422, + "learning_rate": 1.932431269789774e-06, + "loss": 0.0902, + "num_input_tokens_seen": 28414560, + "step": 42145 + }, + { + "epoch": 1.0297315124716, + "grad_norm": 0.33241578936576843, + "learning_rate": 1.932400451562449e-06, + "loss": 0.0204, + "num_input_tokens_seen": 28418080, + "step": 42150 + }, + { + "epoch": 1.0298536633034472, + "grad_norm": 0.13492560386657715, + "learning_rate": 1.9323696265544572e-06, + "loss": 0.1347, + "num_input_tokens_seen": 28421152, + "step": 42155 + }, + { + "epoch": 1.0299758141352942, + "grad_norm": 14.48997688293457, + "learning_rate": 1.9323387947660227e-06, + "loss": 0.0876, + "num_input_tokens_seen": 28424480, + "step": 42160 + }, + { + "epoch": 1.0300979649671413, + "grad_norm": 0.041152097284793854, + "learning_rate": 1.93230795619737e-06, + "loss": 0.069, + "num_input_tokens_seen": 28428000, + "step": 42165 + }, + { + "epoch": 1.0302201157989885, + "grad_norm": 15.964864730834961, + "learning_rate": 1.9322771108487227e-06, + "loss": 0.0107, + "num_input_tokens_seen": 28431776, + "step": 42170 + }, + { + "epoch": 1.0303422666308357, + "grad_norm": 28.510271072387695, + "learning_rate": 1.9322462587203056e-06, + "loss": 0.1202, + "num_input_tokens_seen": 28434912, + "step": 42175 + }, + { + "epoch": 1.030464417462683, + "grad_norm": 0.6475540995597839, + "learning_rate": 1.932215399812343e-06, + "loss": 0.1102, + "num_input_tokens_seen": 28438432, + "step": 42180 + }, + { + "epoch": 1.03058656829453, + "grad_norm": 0.31698504090309143, + "learning_rate": 1.9321845341250592e-06, + "loss": 0.0436, + "num_input_tokens_seen": 28441568, + "step": 42185 + }, + { + "epoch": 1.0307087191263773, + "grad_norm": 11.586368560791016, + "learning_rate": 1.932153661658679e-06, + "loss": 0.1289, + "num_input_tokens_seen": 28444704, + "step": 42190 + }, + { + "epoch": 1.0308308699582245, + "grad_norm": 0.12284345924854279, + "learning_rate": 1.932122782413426e-06, + "loss": 0.0261, + "num_input_tokens_seen": 28447840, + "step": 42195 + }, + { + "epoch": 1.0309530207900717, + "grad_norm": 0.2812725007534027, + "learning_rate": 1.9320918963895262e-06, + "loss": 0.0126, + "num_input_tokens_seen": 28450784, + "step": 42200 + }, + { + "epoch": 1.0310751716219186, + "grad_norm": 26.277828216552734, + "learning_rate": 1.932061003587203e-06, + "loss": 0.0412, + "num_input_tokens_seen": 28454432, + "step": 42205 + }, + { + "epoch": 1.0311973224537658, + "grad_norm": 0.1749674528837204, + "learning_rate": 1.9320301040066816e-06, + "loss": 0.0308, + "num_input_tokens_seen": 28457568, + "step": 42210 + }, + { + "epoch": 1.031319473285613, + "grad_norm": 0.3987172544002533, + "learning_rate": 1.9319991976481863e-06, + "loss": 0.1595, + "num_input_tokens_seen": 28460896, + "step": 42215 + }, + { + "epoch": 1.0314416241174602, + "grad_norm": 29.454450607299805, + "learning_rate": 1.9319682845119425e-06, + "loss": 0.1558, + "num_input_tokens_seen": 28464032, + "step": 42220 + }, + { + "epoch": 1.0315637749493074, + "grad_norm": 73.37692260742188, + "learning_rate": 1.9319373645981748e-06, + "loss": 0.0608, + "num_input_tokens_seen": 28467104, + "step": 42225 + }, + { + "epoch": 1.0316859257811546, + "grad_norm": 4.168240547180176, + "learning_rate": 1.9319064379071075e-06, + "loss": 0.0129, + "num_input_tokens_seen": 28470432, + "step": 42230 + }, + { + "epoch": 1.0318080766130018, + "grad_norm": 5.503375053405762, + "learning_rate": 1.931875504438966e-06, + "loss": 0.0663, + "num_input_tokens_seen": 28476000, + "step": 42235 + }, + { + "epoch": 1.031930227444849, + "grad_norm": 0.5394633412361145, + "learning_rate": 1.931844564193976e-06, + "loss": 0.1376, + "num_input_tokens_seen": 28481120, + "step": 42240 + }, + { + "epoch": 1.0320523782766962, + "grad_norm": 141.67755126953125, + "learning_rate": 1.9318136171723606e-06, + "loss": 0.2042, + "num_input_tokens_seen": 28484896, + "step": 42245 + }, + { + "epoch": 1.0321745291085431, + "grad_norm": 0.05775103718042374, + "learning_rate": 1.9317826633743464e-06, + "loss": 0.0014, + "num_input_tokens_seen": 28488416, + "step": 42250 + }, + { + "epoch": 1.0322966799403903, + "grad_norm": 0.0917782261967659, + "learning_rate": 1.9317517028001584e-06, + "loss": 0.0013, + "num_input_tokens_seen": 28491936, + "step": 42255 + }, + { + "epoch": 1.0324188307722375, + "grad_norm": 8.065529823303223, + "learning_rate": 1.9317207354500206e-06, + "loss": 0.2312, + "num_input_tokens_seen": 28495392, + "step": 42260 + }, + { + "epoch": 1.0325409816040847, + "grad_norm": 25.31867027282715, + "learning_rate": 1.9316897613241596e-06, + "loss": 0.1211, + "num_input_tokens_seen": 28498592, + "step": 42265 + }, + { + "epoch": 1.0326631324359319, + "grad_norm": 0.08368490636348724, + "learning_rate": 1.9316587804228e-06, + "loss": 0.1664, + "num_input_tokens_seen": 28501728, + "step": 42270 + }, + { + "epoch": 1.032785283267779, + "grad_norm": 0.0791000947356224, + "learning_rate": 1.931627792746167e-06, + "loss": 0.0567, + "num_input_tokens_seen": 28504864, + "step": 42275 + }, + { + "epoch": 1.0329074340996263, + "grad_norm": 1.1830270290374756, + "learning_rate": 1.931596798294486e-06, + "loss": 0.0706, + "num_input_tokens_seen": 28508000, + "step": 42280 + }, + { + "epoch": 1.0330295849314735, + "grad_norm": 25.891298294067383, + "learning_rate": 1.9315657970679826e-06, + "loss": 0.1044, + "num_input_tokens_seen": 28511072, + "step": 42285 + }, + { + "epoch": 1.0331517357633206, + "grad_norm": 37.095394134521484, + "learning_rate": 1.9315347890668825e-06, + "loss": 0.2329, + "num_input_tokens_seen": 28514016, + "step": 42290 + }, + { + "epoch": 1.0332738865951676, + "grad_norm": 16.82368278503418, + "learning_rate": 1.9315037742914107e-06, + "loss": 0.1215, + "num_input_tokens_seen": 28517664, + "step": 42295 + }, + { + "epoch": 1.0333960374270148, + "grad_norm": 0.26243355870246887, + "learning_rate": 1.931472752741793e-06, + "loss": 0.0044, + "num_input_tokens_seen": 28520992, + "step": 42300 + }, + { + "epoch": 1.033518188258862, + "grad_norm": 40.032291412353516, + "learning_rate": 1.9314417244182547e-06, + "loss": 0.1998, + "num_input_tokens_seen": 28524512, + "step": 42305 + }, + { + "epoch": 1.0336403390907092, + "grad_norm": 0.18673402070999146, + "learning_rate": 1.9314106893210216e-06, + "loss": 0.0614, + "num_input_tokens_seen": 28528032, + "step": 42310 + }, + { + "epoch": 1.0337624899225564, + "grad_norm": 0.09959449619054794, + "learning_rate": 1.9313796474503194e-06, + "loss": 0.0382, + "num_input_tokens_seen": 28530848, + "step": 42315 + }, + { + "epoch": 1.0338846407544036, + "grad_norm": 0.6416058540344238, + "learning_rate": 1.931348598806374e-06, + "loss": 0.0943, + "num_input_tokens_seen": 28534304, + "step": 42320 + }, + { + "epoch": 1.0340067915862508, + "grad_norm": 27.448389053344727, + "learning_rate": 1.931317543389411e-06, + "loss": 0.1328, + "num_input_tokens_seen": 28537376, + "step": 42325 + }, + { + "epoch": 1.034128942418098, + "grad_norm": 29.5507755279541, + "learning_rate": 1.9312864811996567e-06, + "loss": 0.1759, + "num_input_tokens_seen": 28540128, + "step": 42330 + }, + { + "epoch": 1.0342510932499451, + "grad_norm": 3.888038396835327, + "learning_rate": 1.931255412237336e-06, + "loss": 0.0172, + "num_input_tokens_seen": 28543456, + "step": 42335 + }, + { + "epoch": 1.034373244081792, + "grad_norm": 0.9766087532043457, + "learning_rate": 1.931224336502676e-06, + "loss": 0.1055, + "num_input_tokens_seen": 28546912, + "step": 42340 + }, + { + "epoch": 1.0344953949136393, + "grad_norm": 23.447490692138672, + "learning_rate": 1.931193253995902e-06, + "loss": 0.0127, + "num_input_tokens_seen": 28550240, + "step": 42345 + }, + { + "epoch": 1.0346175457454865, + "grad_norm": 1.529118299484253, + "learning_rate": 1.93116216471724e-06, + "loss": 0.0732, + "num_input_tokens_seen": 28553696, + "step": 42350 + }, + { + "epoch": 1.0347396965773337, + "grad_norm": 0.10727009922266006, + "learning_rate": 1.9311310686669165e-06, + "loss": 0.07, + "num_input_tokens_seen": 28557408, + "step": 42355 + }, + { + "epoch": 1.0348618474091809, + "grad_norm": 40.918243408203125, + "learning_rate": 1.931099965845158e-06, + "loss": 0.136, + "num_input_tokens_seen": 28560672, + "step": 42360 + }, + { + "epoch": 1.034983998241028, + "grad_norm": 0.0714857280254364, + "learning_rate": 1.9310688562521894e-06, + "loss": 0.1912, + "num_input_tokens_seen": 28563744, + "step": 42365 + }, + { + "epoch": 1.0351061490728752, + "grad_norm": 0.17165929079055786, + "learning_rate": 1.9310377398882377e-06, + "loss": 0.0607, + "num_input_tokens_seen": 28567072, + "step": 42370 + }, + { + "epoch": 1.0352282999047224, + "grad_norm": 0.09585746377706528, + "learning_rate": 1.931006616753529e-06, + "loss": 0.1144, + "num_input_tokens_seen": 28570720, + "step": 42375 + }, + { + "epoch": 1.0353504507365696, + "grad_norm": 35.07247543334961, + "learning_rate": 1.93097548684829e-06, + "loss": 0.2082, + "num_input_tokens_seen": 28574176, + "step": 42380 + }, + { + "epoch": 1.0354726015684166, + "grad_norm": 1.625064730644226, + "learning_rate": 1.930944350172747e-06, + "loss": 0.0518, + "num_input_tokens_seen": 28577248, + "step": 42385 + }, + { + "epoch": 1.0355947524002638, + "grad_norm": 0.36407986283302307, + "learning_rate": 1.930913206727126e-06, + "loss": 0.0541, + "num_input_tokens_seen": 28580576, + "step": 42390 + }, + { + "epoch": 1.035716903232111, + "grad_norm": 0.07209677994251251, + "learning_rate": 1.9308820565116538e-06, + "loss": 0.002, + "num_input_tokens_seen": 28584032, + "step": 42395 + }, + { + "epoch": 1.0358390540639582, + "grad_norm": 0.2674114406108856, + "learning_rate": 1.930850899526557e-06, + "loss": 0.0064, + "num_input_tokens_seen": 28587552, + "step": 42400 + }, + { + "epoch": 1.0359612048958053, + "grad_norm": 0.15182150900363922, + "learning_rate": 1.930819735772062e-06, + "loss": 0.065, + "num_input_tokens_seen": 28591136, + "step": 42405 + }, + { + "epoch": 1.0360833557276525, + "grad_norm": 0.5361425876617432, + "learning_rate": 1.930788565248396e-06, + "loss": 0.1083, + "num_input_tokens_seen": 28594464, + "step": 42410 + }, + { + "epoch": 1.0362055065594997, + "grad_norm": 14.758516311645508, + "learning_rate": 1.9307573879557847e-06, + "loss": 0.095, + "num_input_tokens_seen": 28597856, + "step": 42415 + }, + { + "epoch": 1.036327657391347, + "grad_norm": 0.43900468945503235, + "learning_rate": 1.9307262038944552e-06, + "loss": 0.1319, + "num_input_tokens_seen": 28600928, + "step": 42420 + }, + { + "epoch": 1.036449808223194, + "grad_norm": 59.04403305053711, + "learning_rate": 1.9306950130646346e-06, + "loss": 0.2187, + "num_input_tokens_seen": 28603936, + "step": 42425 + }, + { + "epoch": 1.036571959055041, + "grad_norm": 0.13893799483776093, + "learning_rate": 1.9306638154665497e-06, + "loss": 0.0097, + "num_input_tokens_seen": 28606944, + "step": 42430 + }, + { + "epoch": 1.0366941098868883, + "grad_norm": 37.18202209472656, + "learning_rate": 1.930632611100427e-06, + "loss": 0.0701, + "num_input_tokens_seen": 28610208, + "step": 42435 + }, + { + "epoch": 1.0368162607187354, + "grad_norm": 0.16810859739780426, + "learning_rate": 1.9306013999664937e-06, + "loss": 0.0021, + "num_input_tokens_seen": 28613536, + "step": 42440 + }, + { + "epoch": 1.0369384115505826, + "grad_norm": 43.39802932739258, + "learning_rate": 1.930570182064977e-06, + "loss": 0.0961, + "num_input_tokens_seen": 28616864, + "step": 42445 + }, + { + "epoch": 1.0370605623824298, + "grad_norm": 0.3364641070365906, + "learning_rate": 1.9305389573961033e-06, + "loss": 0.132, + "num_input_tokens_seen": 28620128, + "step": 42450 + }, + { + "epoch": 1.037182713214277, + "grad_norm": 33.43479537963867, + "learning_rate": 1.9305077259601e-06, + "loss": 0.0375, + "num_input_tokens_seen": 28623520, + "step": 42455 + }, + { + "epoch": 1.0373048640461242, + "grad_norm": 56.155067443847656, + "learning_rate": 1.9304764877571944e-06, + "loss": 0.2168, + "num_input_tokens_seen": 28627040, + "step": 42460 + }, + { + "epoch": 1.0374270148779714, + "grad_norm": 21.936141967773438, + "learning_rate": 1.9304452427876138e-06, + "loss": 0.0926, + "num_input_tokens_seen": 28630368, + "step": 42465 + }, + { + "epoch": 1.0375491657098186, + "grad_norm": 0.8858507871627808, + "learning_rate": 1.9304139910515845e-06, + "loss": 0.0037, + "num_input_tokens_seen": 28633632, + "step": 42470 + }, + { + "epoch": 1.0376713165416656, + "grad_norm": 0.4281214773654938, + "learning_rate": 1.9303827325493346e-06, + "loss": 0.0369, + "num_input_tokens_seen": 28636704, + "step": 42475 + }, + { + "epoch": 1.0377934673735127, + "grad_norm": 0.19972863793373108, + "learning_rate": 1.9303514672810913e-06, + "loss": 0.0419, + "num_input_tokens_seen": 28640480, + "step": 42480 + }, + { + "epoch": 1.03791561820536, + "grad_norm": 0.09961410611867905, + "learning_rate": 1.930320195247082e-06, + "loss": 0.119, + "num_input_tokens_seen": 28643808, + "step": 42485 + }, + { + "epoch": 1.0380377690372071, + "grad_norm": 0.15113762021064758, + "learning_rate": 1.930288916447534e-06, + "loss": 0.0031, + "num_input_tokens_seen": 28647456, + "step": 42490 + }, + { + "epoch": 1.0381599198690543, + "grad_norm": 102.24231719970703, + "learning_rate": 1.930257630882675e-06, + "loss": 0.2149, + "num_input_tokens_seen": 28650976, + "step": 42495 + }, + { + "epoch": 1.0382820707009015, + "grad_norm": 0.0898851677775383, + "learning_rate": 1.930226338552732e-06, + "loss": 0.0508, + "num_input_tokens_seen": 28654304, + "step": 42500 + }, + { + "epoch": 1.0384042215327487, + "grad_norm": 73.28435516357422, + "learning_rate": 1.9301950394579328e-06, + "loss": 0.1642, + "num_input_tokens_seen": 28657440, + "step": 42505 + }, + { + "epoch": 1.0385263723645959, + "grad_norm": 12.236141204833984, + "learning_rate": 1.9301637335985052e-06, + "loss": 0.09, + "num_input_tokens_seen": 28661024, + "step": 42510 + }, + { + "epoch": 1.038648523196443, + "grad_norm": 14.757637023925781, + "learning_rate": 1.930132420974677e-06, + "loss": 0.0374, + "num_input_tokens_seen": 28664160, + "step": 42515 + }, + { + "epoch": 1.03877067402829, + "grad_norm": 137.90829467773438, + "learning_rate": 1.930101101586675e-06, + "loss": 0.1329, + "num_input_tokens_seen": 28667552, + "step": 42520 + }, + { + "epoch": 1.0388928248601372, + "grad_norm": 101.94804382324219, + "learning_rate": 1.9300697754347283e-06, + "loss": 0.0922, + "num_input_tokens_seen": 28670944, + "step": 42525 + }, + { + "epoch": 1.0390149756919844, + "grad_norm": 175.0195770263672, + "learning_rate": 1.9300384425190635e-06, + "loss": 0.1599, + "num_input_tokens_seen": 28673888, + "step": 42530 + }, + { + "epoch": 1.0391371265238316, + "grad_norm": 9.110740661621094, + "learning_rate": 1.9300071028399093e-06, + "loss": 0.114, + "num_input_tokens_seen": 28677088, + "step": 42535 + }, + { + "epoch": 1.0392592773556788, + "grad_norm": 8.481295585632324, + "learning_rate": 1.9299757563974934e-06, + "loss": 0.2603, + "num_input_tokens_seen": 28680224, + "step": 42540 + }, + { + "epoch": 1.039381428187526, + "grad_norm": 0.07923160493373871, + "learning_rate": 1.9299444031920437e-06, + "loss": 0.0781, + "num_input_tokens_seen": 28683680, + "step": 42545 + }, + { + "epoch": 1.0395035790193732, + "grad_norm": 0.047697048634290695, + "learning_rate": 1.9299130432237877e-06, + "loss": 0.0054, + "num_input_tokens_seen": 28687008, + "step": 42550 + }, + { + "epoch": 1.0396257298512204, + "grad_norm": 70.10228729248047, + "learning_rate": 1.929881676492954e-06, + "loss": 0.044, + "num_input_tokens_seen": 28690080, + "step": 42555 + }, + { + "epoch": 1.0397478806830676, + "grad_norm": 0.24712218344211578, + "learning_rate": 1.929850302999771e-06, + "loss": 0.1201, + "num_input_tokens_seen": 28693600, + "step": 42560 + }, + { + "epoch": 1.0398700315149145, + "grad_norm": 7.952376842498779, + "learning_rate": 1.9298189227444665e-06, + "loss": 0.1339, + "num_input_tokens_seen": 28697056, + "step": 42565 + }, + { + "epoch": 1.0399921823467617, + "grad_norm": 19.63153648376465, + "learning_rate": 1.9297875357272683e-06, + "loss": 0.0307, + "num_input_tokens_seen": 28700512, + "step": 42570 + }, + { + "epoch": 1.040114333178609, + "grad_norm": 7.9623894691467285, + "learning_rate": 1.9297561419484056e-06, + "loss": 0.0128, + "num_input_tokens_seen": 28703200, + "step": 42575 + }, + { + "epoch": 1.040236484010456, + "grad_norm": 1.8297635316848755, + "learning_rate": 1.9297247414081058e-06, + "loss": 0.0397, + "num_input_tokens_seen": 28706592, + "step": 42580 + }, + { + "epoch": 1.0403586348423033, + "grad_norm": 0.15404774248600006, + "learning_rate": 1.929693334106598e-06, + "loss": 0.0027, + "num_input_tokens_seen": 28709728, + "step": 42585 + }, + { + "epoch": 1.0404807856741505, + "grad_norm": 0.034117963165044785, + "learning_rate": 1.9296619200441095e-06, + "loss": 0.0827, + "num_input_tokens_seen": 28713376, + "step": 42590 + }, + { + "epoch": 1.0406029365059977, + "grad_norm": 0.0477776899933815, + "learning_rate": 1.9296304992208697e-06, + "loss": 0.1676, + "num_input_tokens_seen": 28716384, + "step": 42595 + }, + { + "epoch": 1.0407250873378449, + "grad_norm": 33.39387130737305, + "learning_rate": 1.9295990716371075e-06, + "loss": 0.0847, + "num_input_tokens_seen": 28719456, + "step": 42600 + }, + { + "epoch": 1.0408472381696918, + "grad_norm": 0.19198709726333618, + "learning_rate": 1.9295676372930505e-06, + "loss": 0.05, + "num_input_tokens_seen": 28723104, + "step": 42605 + }, + { + "epoch": 1.040969389001539, + "grad_norm": 7.402000427246094, + "learning_rate": 1.9295361961889272e-06, + "loss": 0.1016, + "num_input_tokens_seen": 28726368, + "step": 42610 + }, + { + "epoch": 1.0410915398333862, + "grad_norm": 2.166149139404297, + "learning_rate": 1.929504748324967e-06, + "loss": 0.1046, + "num_input_tokens_seen": 28729760, + "step": 42615 + }, + { + "epoch": 1.0412136906652334, + "grad_norm": 29.45623016357422, + "learning_rate": 1.929473293701398e-06, + "loss": 0.1763, + "num_input_tokens_seen": 28733024, + "step": 42620 + }, + { + "epoch": 1.0413358414970806, + "grad_norm": 15.659850120544434, + "learning_rate": 1.9294418323184495e-06, + "loss": 0.101, + "num_input_tokens_seen": 28736416, + "step": 42625 + }, + { + "epoch": 1.0414579923289278, + "grad_norm": 19.953767776489258, + "learning_rate": 1.92941036417635e-06, + "loss": 0.1033, + "num_input_tokens_seen": 28739744, + "step": 42630 + }, + { + "epoch": 1.041580143160775, + "grad_norm": 14.26479434967041, + "learning_rate": 1.929378889275328e-06, + "loss": 0.0464, + "num_input_tokens_seen": 28742560, + "step": 42635 + }, + { + "epoch": 1.0417022939926222, + "grad_norm": 0.19203825294971466, + "learning_rate": 1.929347407615613e-06, + "loss": 0.005, + "num_input_tokens_seen": 28745952, + "step": 42640 + }, + { + "epoch": 1.0418244448244693, + "grad_norm": 75.64021301269531, + "learning_rate": 1.9293159191974338e-06, + "loss": 0.0939, + "num_input_tokens_seen": 28749280, + "step": 42645 + }, + { + "epoch": 1.0419465956563165, + "grad_norm": 0.12982279062271118, + "learning_rate": 1.9292844240210193e-06, + "loss": 0.0669, + "num_input_tokens_seen": 28752416, + "step": 42650 + }, + { + "epoch": 1.0420687464881635, + "grad_norm": 0.053554948419332504, + "learning_rate": 1.9292529220865985e-06, + "loss": 0.0011, + "num_input_tokens_seen": 28756192, + "step": 42655 + }, + { + "epoch": 1.0421908973200107, + "grad_norm": 0.06555648893117905, + "learning_rate": 1.9292214133944003e-06, + "loss": 0.0752, + "num_input_tokens_seen": 28759328, + "step": 42660 + }, + { + "epoch": 1.0423130481518579, + "grad_norm": 0.14871549606323242, + "learning_rate": 1.929189897944654e-06, + "loss": 0.0056, + "num_input_tokens_seen": 28762656, + "step": 42665 + }, + { + "epoch": 1.042435198983705, + "grad_norm": 0.1613975614309311, + "learning_rate": 1.929158375737589e-06, + "loss": 0.0822, + "num_input_tokens_seen": 28766112, + "step": 42670 + }, + { + "epoch": 1.0425573498155523, + "grad_norm": 13.389020919799805, + "learning_rate": 1.9291268467734343e-06, + "loss": 0.1787, + "num_input_tokens_seen": 28769568, + "step": 42675 + }, + { + "epoch": 1.0426795006473994, + "grad_norm": 12.727326393127441, + "learning_rate": 1.92909531105242e-06, + "loss": 0.0615, + "num_input_tokens_seen": 28772896, + "step": 42680 + }, + { + "epoch": 1.0428016514792466, + "grad_norm": 0.05295789614319801, + "learning_rate": 1.929063768574774e-06, + "loss": 0.0421, + "num_input_tokens_seen": 28776224, + "step": 42685 + }, + { + "epoch": 1.0429238023110938, + "grad_norm": 22.07121467590332, + "learning_rate": 1.9290322193407264e-06, + "loss": 0.0861, + "num_input_tokens_seen": 28779488, + "step": 42690 + }, + { + "epoch": 1.0430459531429408, + "grad_norm": 98.79450988769531, + "learning_rate": 1.9290006633505065e-06, + "loss": 0.101, + "num_input_tokens_seen": 28782752, + "step": 42695 + }, + { + "epoch": 1.043168103974788, + "grad_norm": 0.07113608717918396, + "learning_rate": 1.928969100604344e-06, + "loss": 0.0381, + "num_input_tokens_seen": 28786080, + "step": 42700 + }, + { + "epoch": 1.0432902548066352, + "grad_norm": 8.405142784118652, + "learning_rate": 1.9289375311024683e-06, + "loss": 0.0541, + "num_input_tokens_seen": 28789664, + "step": 42705 + }, + { + "epoch": 1.0434124056384824, + "grad_norm": 0.5017864108085632, + "learning_rate": 1.9289059548451094e-06, + "loss": 0.0356, + "num_input_tokens_seen": 28792992, + "step": 42710 + }, + { + "epoch": 1.0435345564703296, + "grad_norm": 4.686799049377441, + "learning_rate": 1.9288743718324963e-06, + "loss": 0.1014, + "num_input_tokens_seen": 28796768, + "step": 42715 + }, + { + "epoch": 1.0436567073021767, + "grad_norm": 0.3890798091888428, + "learning_rate": 1.9288427820648586e-06, + "loss": 0.093, + "num_input_tokens_seen": 28800224, + "step": 42720 + }, + { + "epoch": 1.043778858134024, + "grad_norm": 0.38483116030693054, + "learning_rate": 1.9288111855424266e-06, + "loss": 0.0736, + "num_input_tokens_seen": 28803936, + "step": 42725 + }, + { + "epoch": 1.0439010089658711, + "grad_norm": 0.13553577661514282, + "learning_rate": 1.92877958226543e-06, + "loss": 0.0022, + "num_input_tokens_seen": 28807264, + "step": 42730 + }, + { + "epoch": 1.0440231597977183, + "grad_norm": 0.2688484489917755, + "learning_rate": 1.9287479722340985e-06, + "loss": 0.1002, + "num_input_tokens_seen": 28810592, + "step": 42735 + }, + { + "epoch": 1.0441453106295653, + "grad_norm": 89.76862335205078, + "learning_rate": 1.928716355448662e-06, + "loss": 0.1476, + "num_input_tokens_seen": 28813792, + "step": 42740 + }, + { + "epoch": 1.0442674614614125, + "grad_norm": 44.90713119506836, + "learning_rate": 1.92868473190935e-06, + "loss": 0.0419, + "num_input_tokens_seen": 28817568, + "step": 42745 + }, + { + "epoch": 1.0443896122932597, + "grad_norm": 50.53390121459961, + "learning_rate": 1.9286531016163934e-06, + "loss": 0.0304, + "num_input_tokens_seen": 28821216, + "step": 42750 + }, + { + "epoch": 1.0445117631251069, + "grad_norm": 1.4952304363250732, + "learning_rate": 1.928621464570021e-06, + "loss": 0.1421, + "num_input_tokens_seen": 28824352, + "step": 42755 + }, + { + "epoch": 1.044633913956954, + "grad_norm": 0.21666060388088226, + "learning_rate": 1.9285898207704637e-06, + "loss": 0.0645, + "num_input_tokens_seen": 28827616, + "step": 42760 + }, + { + "epoch": 1.0447560647888012, + "grad_norm": 83.15283203125, + "learning_rate": 1.928558170217952e-06, + "loss": 0.113, + "num_input_tokens_seen": 28830944, + "step": 42765 + }, + { + "epoch": 1.0448782156206484, + "grad_norm": 0.0197481419891119, + "learning_rate": 1.9285265129127147e-06, + "loss": 0.1068, + "num_input_tokens_seen": 28834336, + "step": 42770 + }, + { + "epoch": 1.0450003664524956, + "grad_norm": 1.7862377166748047, + "learning_rate": 1.9284948488549834e-06, + "loss": 0.2002, + "num_input_tokens_seen": 28837856, + "step": 42775 + }, + { + "epoch": 1.0451225172843428, + "grad_norm": 0.22275428473949432, + "learning_rate": 1.928463178044988e-06, + "loss": 0.0059, + "num_input_tokens_seen": 28840672, + "step": 42780 + }, + { + "epoch": 1.0452446681161898, + "grad_norm": 1.0340585708618164, + "learning_rate": 1.9284315004829582e-06, + "loss": 0.1131, + "num_input_tokens_seen": 28844128, + "step": 42785 + }, + { + "epoch": 1.045366818948037, + "grad_norm": 0.07115308195352554, + "learning_rate": 1.9283998161691247e-06, + "loss": 0.1306, + "num_input_tokens_seen": 28848032, + "step": 42790 + }, + { + "epoch": 1.0454889697798841, + "grad_norm": 0.12588505446910858, + "learning_rate": 1.9283681251037187e-06, + "loss": 0.1494, + "num_input_tokens_seen": 28851424, + "step": 42795 + }, + { + "epoch": 1.0456111206117313, + "grad_norm": 0.15897129476070404, + "learning_rate": 1.928336427286969e-06, + "loss": 0.0593, + "num_input_tokens_seen": 28854496, + "step": 42800 + }, + { + "epoch": 1.0457332714435785, + "grad_norm": 15.23776626586914, + "learning_rate": 1.928304722719108e-06, + "loss": 0.1006, + "num_input_tokens_seen": 28857888, + "step": 42805 + }, + { + "epoch": 1.0458554222754257, + "grad_norm": 29.71601676940918, + "learning_rate": 1.9282730114003652e-06, + "loss": 0.0862, + "num_input_tokens_seen": 28861280, + "step": 42810 + }, + { + "epoch": 1.045977573107273, + "grad_norm": 0.10771415382623672, + "learning_rate": 1.928241293330971e-06, + "loss": 0.0299, + "num_input_tokens_seen": 28864608, + "step": 42815 + }, + { + "epoch": 1.04609972393912, + "grad_norm": 0.10723359137773514, + "learning_rate": 1.928209568511157e-06, + "loss": 0.0559, + "num_input_tokens_seen": 28868320, + "step": 42820 + }, + { + "epoch": 1.0462218747709673, + "grad_norm": 0.06635193526744843, + "learning_rate": 1.928177836941153e-06, + "loss": 0.0955, + "num_input_tokens_seen": 28871456, + "step": 42825 + }, + { + "epoch": 1.0463440256028143, + "grad_norm": 0.2644246220588684, + "learning_rate": 1.92814609862119e-06, + "loss": 0.0599, + "num_input_tokens_seen": 28874592, + "step": 42830 + }, + { + "epoch": 1.0464661764346614, + "grad_norm": 44.2717170715332, + "learning_rate": 1.928114353551499e-06, + "loss": 0.0608, + "num_input_tokens_seen": 28878176, + "step": 42835 + }, + { + "epoch": 1.0465883272665086, + "grad_norm": 0.18394502997398376, + "learning_rate": 1.928082601732311e-06, + "loss": 0.0408, + "num_input_tokens_seen": 28881504, + "step": 42840 + }, + { + "epoch": 1.0467104780983558, + "grad_norm": 0.11878114193677902, + "learning_rate": 1.9280508431638567e-06, + "loss": 0.0309, + "num_input_tokens_seen": 28885216, + "step": 42845 + }, + { + "epoch": 1.046832628930203, + "grad_norm": 30.702194213867188, + "learning_rate": 1.928019077846367e-06, + "loss": 0.1235, + "num_input_tokens_seen": 28888544, + "step": 42850 + }, + { + "epoch": 1.0469547797620502, + "grad_norm": 0.15721218287944794, + "learning_rate": 1.927987305780073e-06, + "loss": 0.1238, + "num_input_tokens_seen": 28891744, + "step": 42855 + }, + { + "epoch": 1.0470769305938974, + "grad_norm": 0.7722503542900085, + "learning_rate": 1.9279555269652053e-06, + "loss": 0.1013, + "num_input_tokens_seen": 28894944, + "step": 42860 + }, + { + "epoch": 1.0471990814257446, + "grad_norm": 1.5233492851257324, + "learning_rate": 1.927923741401996e-06, + "loss": 0.0614, + "num_input_tokens_seen": 28897952, + "step": 42865 + }, + { + "epoch": 1.0473212322575918, + "grad_norm": 6.74429178237915, + "learning_rate": 1.9278919490906754e-06, + "loss": 0.0939, + "num_input_tokens_seen": 28901344, + "step": 42870 + }, + { + "epoch": 1.0474433830894387, + "grad_norm": 44.9547119140625, + "learning_rate": 1.9278601500314748e-06, + "loss": 0.0972, + "num_input_tokens_seen": 28904416, + "step": 42875 + }, + { + "epoch": 1.047565533921286, + "grad_norm": 3.7963287830352783, + "learning_rate": 1.927828344224626e-06, + "loss": 0.0583, + "num_input_tokens_seen": 28907872, + "step": 42880 + }, + { + "epoch": 1.0476876847531331, + "grad_norm": 7.7191009521484375, + "learning_rate": 1.9277965316703595e-06, + "loss": 0.0871, + "num_input_tokens_seen": 28910880, + "step": 42885 + }, + { + "epoch": 1.0478098355849803, + "grad_norm": 165.5267791748047, + "learning_rate": 1.9277647123689077e-06, + "loss": 0.0423, + "num_input_tokens_seen": 28914592, + "step": 42890 + }, + { + "epoch": 1.0479319864168275, + "grad_norm": 30.80902862548828, + "learning_rate": 1.9277328863205006e-06, + "loss": 0.0861, + "num_input_tokens_seen": 28918368, + "step": 42895 + }, + { + "epoch": 1.0480541372486747, + "grad_norm": 0.3250162899494171, + "learning_rate": 1.927701053525371e-06, + "loss": 0.0989, + "num_input_tokens_seen": 28922592, + "step": 42900 + }, + { + "epoch": 1.0481762880805219, + "grad_norm": 0.34292522072792053, + "learning_rate": 1.92766921398375e-06, + "loss": 0.0402, + "num_input_tokens_seen": 28926048, + "step": 42905 + }, + { + "epoch": 1.048298438912369, + "grad_norm": 34.1561164855957, + "learning_rate": 1.927637367695868e-06, + "loss": 0.0911, + "num_input_tokens_seen": 28929632, + "step": 42910 + }, + { + "epoch": 1.0484205897442163, + "grad_norm": 0.0897122249007225, + "learning_rate": 1.9276055146619582e-06, + "loss": 0.0014, + "num_input_tokens_seen": 28933088, + "step": 42915 + }, + { + "epoch": 1.0485427405760632, + "grad_norm": 0.08465170115232468, + "learning_rate": 1.9275736548822516e-06, + "loss": 0.0398, + "num_input_tokens_seen": 28936352, + "step": 42920 + }, + { + "epoch": 1.0486648914079104, + "grad_norm": 12.695818901062012, + "learning_rate": 1.9275417883569796e-06, + "loss": 0.1345, + "num_input_tokens_seen": 28939680, + "step": 42925 + }, + { + "epoch": 1.0487870422397576, + "grad_norm": 0.030476637184619904, + "learning_rate": 1.9275099150863747e-06, + "loss": 0.1114, + "num_input_tokens_seen": 28943648, + "step": 42930 + }, + { + "epoch": 1.0489091930716048, + "grad_norm": 0.08667637407779694, + "learning_rate": 1.9274780350706678e-06, + "loss": 0.0751, + "num_input_tokens_seen": 28946912, + "step": 42935 + }, + { + "epoch": 1.049031343903452, + "grad_norm": 0.3473820090293884, + "learning_rate": 1.9274461483100916e-06, + "loss": 0.0015, + "num_input_tokens_seen": 28950048, + "step": 42940 + }, + { + "epoch": 1.0491534947352992, + "grad_norm": 78.53522491455078, + "learning_rate": 1.927414254804877e-06, + "loss": 0.0286, + "num_input_tokens_seen": 28953504, + "step": 42945 + }, + { + "epoch": 1.0492756455671464, + "grad_norm": 80.54452514648438, + "learning_rate": 1.9273823545552573e-06, + "loss": 0.1131, + "num_input_tokens_seen": 28956704, + "step": 42950 + }, + { + "epoch": 1.0493977963989936, + "grad_norm": 0.013884730637073517, + "learning_rate": 1.927350447561463e-06, + "loss": 0.0012, + "num_input_tokens_seen": 28959968, + "step": 42955 + }, + { + "epoch": 1.0495199472308407, + "grad_norm": 0.05496250092983246, + "learning_rate": 1.927318533823727e-06, + "loss": 0.0587, + "num_input_tokens_seen": 28963744, + "step": 42960 + }, + { + "epoch": 1.0496420980626877, + "grad_norm": 41.326560974121094, + "learning_rate": 1.927286613342281e-06, + "loss": 0.0692, + "num_input_tokens_seen": 28967200, + "step": 42965 + }, + { + "epoch": 1.049764248894535, + "grad_norm": 0.02888176217675209, + "learning_rate": 1.9272546861173576e-06, + "loss": 0.0739, + "num_input_tokens_seen": 28971040, + "step": 42970 + }, + { + "epoch": 1.049886399726382, + "grad_norm": 12.679499626159668, + "learning_rate": 1.9272227521491887e-06, + "loss": 0.1976, + "num_input_tokens_seen": 28974240, + "step": 42975 + }, + { + "epoch": 1.0500085505582293, + "grad_norm": 15.679131507873535, + "learning_rate": 1.927190811438007e-06, + "loss": 0.1454, + "num_input_tokens_seen": 28977568, + "step": 42980 + }, + { + "epoch": 1.0501307013900765, + "grad_norm": 0.0508650504052639, + "learning_rate": 1.9271588639840434e-06, + "loss": 0.061, + "num_input_tokens_seen": 28981216, + "step": 42985 + }, + { + "epoch": 1.0502528522219237, + "grad_norm": 1.0628223419189453, + "learning_rate": 1.9271269097875317e-06, + "loss": 0.0465, + "num_input_tokens_seen": 28984672, + "step": 42990 + }, + { + "epoch": 1.0503750030537709, + "grad_norm": 0.9339186549186707, + "learning_rate": 1.9270949488487038e-06, + "loss": 0.0031, + "num_input_tokens_seen": 28988384, + "step": 42995 + }, + { + "epoch": 1.050497153885618, + "grad_norm": 59.791831970214844, + "learning_rate": 1.9270629811677917e-06, + "loss": 0.0765, + "num_input_tokens_seen": 28991648, + "step": 43000 + }, + { + "epoch": 1.0506193047174652, + "grad_norm": 0.22662672400474548, + "learning_rate": 1.927031006745029e-06, + "loss": 0.0022, + "num_input_tokens_seen": 28995104, + "step": 43005 + }, + { + "epoch": 1.0507414555493122, + "grad_norm": 0.057953156530857086, + "learning_rate": 1.9269990255806467e-06, + "loss": 0.0963, + "num_input_tokens_seen": 28998496, + "step": 43010 + }, + { + "epoch": 1.0508636063811594, + "grad_norm": 0.03372470661997795, + "learning_rate": 1.9269670376748783e-06, + "loss": 0.0566, + "num_input_tokens_seen": 29002272, + "step": 43015 + }, + { + "epoch": 1.0509857572130066, + "grad_norm": 0.13802820444107056, + "learning_rate": 1.9269350430279566e-06, + "loss": 0.0036, + "num_input_tokens_seen": 29005792, + "step": 43020 + }, + { + "epoch": 1.0511079080448538, + "grad_norm": 21.07268524169922, + "learning_rate": 1.926903041640114e-06, + "loss": 0.0948, + "num_input_tokens_seen": 29009056, + "step": 43025 + }, + { + "epoch": 1.051230058876701, + "grad_norm": 65.3128890991211, + "learning_rate": 1.9268710335115825e-06, + "loss": 0.0552, + "num_input_tokens_seen": 29012640, + "step": 43030 + }, + { + "epoch": 1.0513522097085481, + "grad_norm": 14.12056827545166, + "learning_rate": 1.926839018642596e-06, + "loss": 0.1364, + "num_input_tokens_seen": 29016352, + "step": 43035 + }, + { + "epoch": 1.0514743605403953, + "grad_norm": 0.07477191835641861, + "learning_rate": 1.926806997033387e-06, + "loss": 0.1925, + "num_input_tokens_seen": 29020512, + "step": 43040 + }, + { + "epoch": 1.0515965113722425, + "grad_norm": 0.4883725047111511, + "learning_rate": 1.926774968684188e-06, + "loss": 0.0892, + "num_input_tokens_seen": 29023904, + "step": 43045 + }, + { + "epoch": 1.0517186622040897, + "grad_norm": 0.05889091640710831, + "learning_rate": 1.926742933595232e-06, + "loss": 0.1294, + "num_input_tokens_seen": 29027040, + "step": 43050 + }, + { + "epoch": 1.0518408130359367, + "grad_norm": 16.98215675354004, + "learning_rate": 1.9267108917667528e-06, + "loss": 0.0738, + "num_input_tokens_seen": 29030048, + "step": 43055 + }, + { + "epoch": 1.0519629638677839, + "grad_norm": 0.4212515950202942, + "learning_rate": 1.926678843198982e-06, + "loss": 0.0333, + "num_input_tokens_seen": 29033568, + "step": 43060 + }, + { + "epoch": 1.052085114699631, + "grad_norm": 23.378690719604492, + "learning_rate": 1.926646787892154e-06, + "loss": 0.1935, + "num_input_tokens_seen": 29036832, + "step": 43065 + }, + { + "epoch": 1.0522072655314783, + "grad_norm": 0.46372488141059875, + "learning_rate": 1.926614725846501e-06, + "loss": 0.0139, + "num_input_tokens_seen": 29040608, + "step": 43070 + }, + { + "epoch": 1.0523294163633254, + "grad_norm": 60.715450286865234, + "learning_rate": 1.9265826570622565e-06, + "loss": 0.074, + "num_input_tokens_seen": 29043872, + "step": 43075 + }, + { + "epoch": 1.0524515671951726, + "grad_norm": 17.99675941467285, + "learning_rate": 1.9265505815396533e-06, + "loss": 0.1008, + "num_input_tokens_seen": 29047008, + "step": 43080 + }, + { + "epoch": 1.0525737180270198, + "grad_norm": 1.093004822731018, + "learning_rate": 1.926518499278926e-06, + "loss": 0.1009, + "num_input_tokens_seen": 29049888, + "step": 43085 + }, + { + "epoch": 1.052695868858867, + "grad_norm": 49.34836196899414, + "learning_rate": 1.9264864102803062e-06, + "loss": 0.0497, + "num_input_tokens_seen": 29053088, + "step": 43090 + }, + { + "epoch": 1.0528180196907142, + "grad_norm": 35.51371383666992, + "learning_rate": 1.9264543145440283e-06, + "loss": 0.0636, + "num_input_tokens_seen": 29056608, + "step": 43095 + }, + { + "epoch": 1.0529401705225612, + "grad_norm": 0.07104449719190598, + "learning_rate": 1.9264222120703253e-06, + "loss": 0.1812, + "num_input_tokens_seen": 29059744, + "step": 43100 + }, + { + "epoch": 1.0530623213544084, + "grad_norm": 28.080835342407227, + "learning_rate": 1.9263901028594307e-06, + "loss": 0.1036, + "num_input_tokens_seen": 29063648, + "step": 43105 + }, + { + "epoch": 1.0531844721862555, + "grad_norm": 0.15192584693431854, + "learning_rate": 1.9263579869115783e-06, + "loss": 0.1083, + "num_input_tokens_seen": 29066720, + "step": 43110 + }, + { + "epoch": 1.0533066230181027, + "grad_norm": 32.0471305847168, + "learning_rate": 1.9263258642270018e-06, + "loss": 0.0401, + "num_input_tokens_seen": 29070048, + "step": 43115 + }, + { + "epoch": 1.05342877384995, + "grad_norm": 39.45811080932617, + "learning_rate": 1.926293734805934e-06, + "loss": 0.1869, + "num_input_tokens_seen": 29072992, + "step": 43120 + }, + { + "epoch": 1.0535509246817971, + "grad_norm": 0.7179755568504333, + "learning_rate": 1.926261598648609e-06, + "loss": 0.0173, + "num_input_tokens_seen": 29076704, + "step": 43125 + }, + { + "epoch": 1.0536730755136443, + "grad_norm": 0.21337614953517914, + "learning_rate": 1.926229455755261e-06, + "loss": 0.047, + "num_input_tokens_seen": 29080288, + "step": 43130 + }, + { + "epoch": 1.0537952263454915, + "grad_norm": 1.0191816091537476, + "learning_rate": 1.926197306126123e-06, + "loss": 0.1349, + "num_input_tokens_seen": 29083168, + "step": 43135 + }, + { + "epoch": 1.0539173771773385, + "grad_norm": 87.6432876586914, + "learning_rate": 1.926165149761429e-06, + "loss": 0.0556, + "num_input_tokens_seen": 29086496, + "step": 43140 + }, + { + "epoch": 1.0540395280091857, + "grad_norm": 25.76075553894043, + "learning_rate": 1.9261329866614125e-06, + "loss": 0.1828, + "num_input_tokens_seen": 29089568, + "step": 43145 + }, + { + "epoch": 1.0541616788410328, + "grad_norm": 47.1466178894043, + "learning_rate": 1.9261008168263082e-06, + "loss": 0.0622, + "num_input_tokens_seen": 29093152, + "step": 43150 + }, + { + "epoch": 1.05428382967288, + "grad_norm": 0.02713550254702568, + "learning_rate": 1.92606864025635e-06, + "loss": 0.0211, + "num_input_tokens_seen": 29097312, + "step": 43155 + }, + { + "epoch": 1.0544059805047272, + "grad_norm": 11.254847526550293, + "learning_rate": 1.9260364569517715e-06, + "loss": 0.1547, + "num_input_tokens_seen": 29100576, + "step": 43160 + }, + { + "epoch": 1.0545281313365744, + "grad_norm": 0.06463029235601425, + "learning_rate": 1.926004266912806e-06, + "loss": 0.1126, + "num_input_tokens_seen": 29104032, + "step": 43165 + }, + { + "epoch": 1.0546502821684216, + "grad_norm": 0.1764514297246933, + "learning_rate": 1.9259720701396893e-06, + "loss": 0.0023, + "num_input_tokens_seen": 29107680, + "step": 43170 + }, + { + "epoch": 1.0547724330002688, + "grad_norm": 7.495051860809326, + "learning_rate": 1.9259398666326545e-06, + "loss": 0.0037, + "num_input_tokens_seen": 29110944, + "step": 43175 + }, + { + "epoch": 1.054894583832116, + "grad_norm": 0.2713419198989868, + "learning_rate": 1.9259076563919356e-06, + "loss": 0.0061, + "num_input_tokens_seen": 29113952, + "step": 43180 + }, + { + "epoch": 1.0550167346639632, + "grad_norm": 23.387500762939453, + "learning_rate": 1.9258754394177672e-06, + "loss": 0.0589, + "num_input_tokens_seen": 29117216, + "step": 43185 + }, + { + "epoch": 1.0551388854958101, + "grad_norm": 0.5097941160202026, + "learning_rate": 1.925843215710384e-06, + "loss": 0.0329, + "num_input_tokens_seen": 29120864, + "step": 43190 + }, + { + "epoch": 1.0552610363276573, + "grad_norm": 5.5870280265808105, + "learning_rate": 1.92581098527002e-06, + "loss": 0.0941, + "num_input_tokens_seen": 29124192, + "step": 43195 + }, + { + "epoch": 1.0553831871595045, + "grad_norm": 0.08644071966409683, + "learning_rate": 1.925778748096909e-06, + "loss": 0.0453, + "num_input_tokens_seen": 29127136, + "step": 43200 + }, + { + "epoch": 1.0555053379913517, + "grad_norm": 90.56088256835938, + "learning_rate": 1.925746504191286e-06, + "loss": 0.0303, + "num_input_tokens_seen": 29130272, + "step": 43205 + }, + { + "epoch": 1.055627488823199, + "grad_norm": 0.05298614501953125, + "learning_rate": 1.9257142535533857e-06, + "loss": 0.0251, + "num_input_tokens_seen": 29133728, + "step": 43210 + }, + { + "epoch": 1.055749639655046, + "grad_norm": 75.3792953491211, + "learning_rate": 1.925681996183442e-06, + "loss": 0.1662, + "num_input_tokens_seen": 29136992, + "step": 43215 + }, + { + "epoch": 1.0558717904868933, + "grad_norm": 11.935559272766113, + "learning_rate": 1.92564973208169e-06, + "loss": 0.2896, + "num_input_tokens_seen": 29140000, + "step": 43220 + }, + { + "epoch": 1.0559939413187405, + "grad_norm": 0.2589423954486847, + "learning_rate": 1.9256174612483644e-06, + "loss": 0.0322, + "num_input_tokens_seen": 29143328, + "step": 43225 + }, + { + "epoch": 1.0561160921505874, + "grad_norm": 110.4568099975586, + "learning_rate": 1.925585183683699e-06, + "loss": 0.1653, + "num_input_tokens_seen": 29146336, + "step": 43230 + }, + { + "epoch": 1.0562382429824346, + "grad_norm": 11.954909324645996, + "learning_rate": 1.92555289938793e-06, + "loss": 0.093, + "num_input_tokens_seen": 29149664, + "step": 43235 + }, + { + "epoch": 1.0563603938142818, + "grad_norm": 0.0636618435382843, + "learning_rate": 1.925520608361291e-06, + "loss": 0.0861, + "num_input_tokens_seen": 29152864, + "step": 43240 + }, + { + "epoch": 1.056482544646129, + "grad_norm": 27.24610710144043, + "learning_rate": 1.9254883106040173e-06, + "loss": 0.1102, + "num_input_tokens_seen": 29156128, + "step": 43245 + }, + { + "epoch": 1.0566046954779762, + "grad_norm": 43.97340774536133, + "learning_rate": 1.9254560061163437e-06, + "loss": 0.1114, + "num_input_tokens_seen": 29159200, + "step": 43250 + }, + { + "epoch": 1.0567268463098234, + "grad_norm": 0.12428581714630127, + "learning_rate": 1.9254236948985046e-06, + "loss": 0.0591, + "num_input_tokens_seen": 29162336, + "step": 43255 + }, + { + "epoch": 1.0568489971416706, + "grad_norm": 31.620731353759766, + "learning_rate": 1.9253913769507363e-06, + "loss": 0.1124, + "num_input_tokens_seen": 29165216, + "step": 43260 + }, + { + "epoch": 1.0569711479735178, + "grad_norm": 0.5041882395744324, + "learning_rate": 1.9253590522732727e-06, + "loss": 0.1745, + "num_input_tokens_seen": 29168352, + "step": 43265 + }, + { + "epoch": 1.057093298805365, + "grad_norm": 17.660442352294922, + "learning_rate": 1.9253267208663486e-06, + "loss": 0.1335, + "num_input_tokens_seen": 29171744, + "step": 43270 + }, + { + "epoch": 1.057215449637212, + "grad_norm": 0.2939334809780121, + "learning_rate": 1.9252943827302006e-06, + "loss": 0.0037, + "num_input_tokens_seen": 29174752, + "step": 43275 + }, + { + "epoch": 1.0573376004690591, + "grad_norm": 0.38700637221336365, + "learning_rate": 1.9252620378650627e-06, + "loss": 0.1023, + "num_input_tokens_seen": 29177952, + "step": 43280 + }, + { + "epoch": 1.0574597513009063, + "grad_norm": 0.7174057960510254, + "learning_rate": 1.92522968627117e-06, + "loss": 0.0288, + "num_input_tokens_seen": 29181024, + "step": 43285 + }, + { + "epoch": 1.0575819021327535, + "grad_norm": 0.19030173122882843, + "learning_rate": 1.9251973279487586e-06, + "loss": 0.1486, + "num_input_tokens_seen": 29184096, + "step": 43290 + }, + { + "epoch": 1.0577040529646007, + "grad_norm": 0.8026861548423767, + "learning_rate": 1.9251649628980633e-06, + "loss": 0.094, + "num_input_tokens_seen": 29187808, + "step": 43295 + }, + { + "epoch": 1.0578262037964479, + "grad_norm": 2.309544086456299, + "learning_rate": 1.925132591119319e-06, + "loss": 0.0648, + "num_input_tokens_seen": 29191072, + "step": 43300 + }, + { + "epoch": 1.057948354628295, + "grad_norm": 2.089252471923828, + "learning_rate": 1.9251002126127626e-06, + "loss": 0.1138, + "num_input_tokens_seen": 29194912, + "step": 43305 + }, + { + "epoch": 1.0580705054601423, + "grad_norm": 10.03940200805664, + "learning_rate": 1.9250678273786283e-06, + "loss": 0.0849, + "num_input_tokens_seen": 29198048, + "step": 43310 + }, + { + "epoch": 1.0581926562919894, + "grad_norm": 19.415321350097656, + "learning_rate": 1.9250354354171515e-06, + "loss": 0.0362, + "num_input_tokens_seen": 29201120, + "step": 43315 + }, + { + "epoch": 1.0583148071238364, + "grad_norm": 134.2286834716797, + "learning_rate": 1.9250030367285684e-06, + "loss": 0.018, + "num_input_tokens_seen": 29204192, + "step": 43320 + }, + { + "epoch": 1.0584369579556836, + "grad_norm": 0.22014103829860687, + "learning_rate": 1.9249706313131147e-06, + "loss": 0.0911, + "num_input_tokens_seen": 29207392, + "step": 43325 + }, + { + "epoch": 1.0585591087875308, + "grad_norm": 0.7637476325035095, + "learning_rate": 1.924938219171026e-06, + "loss": 0.0367, + "num_input_tokens_seen": 29210912, + "step": 43330 + }, + { + "epoch": 1.058681259619378, + "grad_norm": 0.233280748128891, + "learning_rate": 1.9249058003025367e-06, + "loss": 0.0325, + "num_input_tokens_seen": 29213536, + "step": 43335 + }, + { + "epoch": 1.0588034104512252, + "grad_norm": 27.464370727539062, + "learning_rate": 1.9248733747078847e-06, + "loss": 0.0867, + "num_input_tokens_seen": 29217056, + "step": 43340 + }, + { + "epoch": 1.0589255612830724, + "grad_norm": 85.70044708251953, + "learning_rate": 1.9248409423873044e-06, + "loss": 0.1057, + "num_input_tokens_seen": 29220384, + "step": 43345 + }, + { + "epoch": 1.0590477121149195, + "grad_norm": 7.4150004386901855, + "learning_rate": 1.9248085033410318e-06, + "loss": 0.0897, + "num_input_tokens_seen": 29223648, + "step": 43350 + }, + { + "epoch": 1.0591698629467667, + "grad_norm": 0.10273587703704834, + "learning_rate": 1.9247760575693036e-06, + "loss": 0.0721, + "num_input_tokens_seen": 29227360, + "step": 43355 + }, + { + "epoch": 1.059292013778614, + "grad_norm": 0.9129404425621033, + "learning_rate": 1.9247436050723545e-06, + "loss": 0.0285, + "num_input_tokens_seen": 29230944, + "step": 43360 + }, + { + "epoch": 1.059414164610461, + "grad_norm": 21.414705276489258, + "learning_rate": 1.9247111458504216e-06, + "loss": 0.1744, + "num_input_tokens_seen": 29234080, + "step": 43365 + }, + { + "epoch": 1.059536315442308, + "grad_norm": 0.11530768871307373, + "learning_rate": 1.9246786799037403e-06, + "loss": 0.0446, + "num_input_tokens_seen": 29237024, + "step": 43370 + }, + { + "epoch": 1.0596584662741553, + "grad_norm": 0.08778601139783859, + "learning_rate": 1.924646207232547e-06, + "loss": 0.0152, + "num_input_tokens_seen": 29240352, + "step": 43375 + }, + { + "epoch": 1.0597806171060025, + "grad_norm": 2.5082666873931885, + "learning_rate": 1.9246137278370783e-06, + "loss": 0.0404, + "num_input_tokens_seen": 29243616, + "step": 43380 + }, + { + "epoch": 1.0599027679378497, + "grad_norm": 7.4902167320251465, + "learning_rate": 1.9245812417175692e-06, + "loss": 0.1136, + "num_input_tokens_seen": 29247328, + "step": 43385 + }, + { + "epoch": 1.0600249187696968, + "grad_norm": 0.378722608089447, + "learning_rate": 1.9245487488742568e-06, + "loss": 0.0794, + "num_input_tokens_seen": 29250208, + "step": 43390 + }, + { + "epoch": 1.060147069601544, + "grad_norm": 0.16203396022319794, + "learning_rate": 1.9245162493073776e-06, + "loss": 0.0574, + "num_input_tokens_seen": 29253344, + "step": 43395 + }, + { + "epoch": 1.0602692204333912, + "grad_norm": 54.568172454833984, + "learning_rate": 1.924483743017167e-06, + "loss": 0.0906, + "num_input_tokens_seen": 29256864, + "step": 43400 + }, + { + "epoch": 1.0603913712652384, + "grad_norm": 0.26273974776268005, + "learning_rate": 1.9244512300038623e-06, + "loss": 0.0496, + "num_input_tokens_seen": 29260128, + "step": 43405 + }, + { + "epoch": 1.0605135220970854, + "grad_norm": 0.2209581881761551, + "learning_rate": 1.9244187102676993e-06, + "loss": 0.052, + "num_input_tokens_seen": 29263968, + "step": 43410 + }, + { + "epoch": 1.0606356729289326, + "grad_norm": 0.10516363382339478, + "learning_rate": 1.9243861838089153e-06, + "loss": 0.0027, + "num_input_tokens_seen": 29267232, + "step": 43415 + }, + { + "epoch": 1.0607578237607798, + "grad_norm": 1.000648021697998, + "learning_rate": 1.924353650627746e-06, + "loss": 0.0472, + "num_input_tokens_seen": 29270496, + "step": 43420 + }, + { + "epoch": 1.060879974592627, + "grad_norm": 0.04793846979737282, + "learning_rate": 1.9243211107244284e-06, + "loss": 0.0737, + "num_input_tokens_seen": 29273952, + "step": 43425 + }, + { + "epoch": 1.0610021254244741, + "grad_norm": 20.53110122680664, + "learning_rate": 1.924288564099199e-06, + "loss": 0.2243, + "num_input_tokens_seen": 29277280, + "step": 43430 + }, + { + "epoch": 1.0611242762563213, + "grad_norm": 2.2913577556610107, + "learning_rate": 1.9242560107522947e-06, + "loss": 0.1931, + "num_input_tokens_seen": 29280288, + "step": 43435 + }, + { + "epoch": 1.0612464270881685, + "grad_norm": 1.403280258178711, + "learning_rate": 1.9242234506839523e-06, + "loss": 0.0778, + "num_input_tokens_seen": 29283488, + "step": 43440 + }, + { + "epoch": 1.0613685779200157, + "grad_norm": 54.15962600708008, + "learning_rate": 1.9241908838944077e-06, + "loss": 0.0773, + "num_input_tokens_seen": 29287200, + "step": 43445 + }, + { + "epoch": 1.061490728751863, + "grad_norm": 34.25768280029297, + "learning_rate": 1.924158310383899e-06, + "loss": 0.0145, + "num_input_tokens_seen": 29290336, + "step": 43450 + }, + { + "epoch": 1.0616128795837099, + "grad_norm": 37.82624816894531, + "learning_rate": 1.9241257301526623e-06, + "loss": 0.0795, + "num_input_tokens_seen": 29293408, + "step": 43455 + }, + { + "epoch": 1.061735030415557, + "grad_norm": 0.4427298605442047, + "learning_rate": 1.9240931432009346e-06, + "loss": 0.065, + "num_input_tokens_seen": 29296992, + "step": 43460 + }, + { + "epoch": 1.0618571812474042, + "grad_norm": 0.4335486590862274, + "learning_rate": 1.9240605495289533e-06, + "loss": 0.1063, + "num_input_tokens_seen": 29300320, + "step": 43465 + }, + { + "epoch": 1.0619793320792514, + "grad_norm": 21.76557731628418, + "learning_rate": 1.924027949136955e-06, + "loss": 0.105, + "num_input_tokens_seen": 29303520, + "step": 43470 + }, + { + "epoch": 1.0621014829110986, + "grad_norm": 6.954076766967773, + "learning_rate": 1.9239953420251767e-06, + "loss": 0.024, + "num_input_tokens_seen": 29306592, + "step": 43475 + }, + { + "epoch": 1.0622236337429458, + "grad_norm": 2.9126462936401367, + "learning_rate": 1.9239627281938562e-06, + "loss": 0.1404, + "num_input_tokens_seen": 29310048, + "step": 43480 + }, + { + "epoch": 1.062345784574793, + "grad_norm": 0.0804481953382492, + "learning_rate": 1.92393010764323e-06, + "loss": 0.0554, + "num_input_tokens_seen": 29313440, + "step": 43485 + }, + { + "epoch": 1.0624679354066402, + "grad_norm": 57.70439910888672, + "learning_rate": 1.9238974803735357e-06, + "loss": 0.0256, + "num_input_tokens_seen": 29316896, + "step": 43490 + }, + { + "epoch": 1.0625900862384874, + "grad_norm": 20.333383560180664, + "learning_rate": 1.92386484638501e-06, + "loss": 0.102, + "num_input_tokens_seen": 29320288, + "step": 43495 + }, + { + "epoch": 1.0627122370703344, + "grad_norm": 10.79039192199707, + "learning_rate": 1.923832205677891e-06, + "loss": 0.0439, + "num_input_tokens_seen": 29323296, + "step": 43500 + }, + { + "epoch": 1.0628343879021815, + "grad_norm": 0.3620006740093231, + "learning_rate": 1.9237995582524154e-06, + "loss": 0.0404, + "num_input_tokens_seen": 29326304, + "step": 43505 + }, + { + "epoch": 1.0629565387340287, + "grad_norm": 18.919797897338867, + "learning_rate": 1.923766904108821e-06, + "loss": 0.2442, + "num_input_tokens_seen": 29330016, + "step": 43510 + }, + { + "epoch": 1.063078689565876, + "grad_norm": 0.08378148823976517, + "learning_rate": 1.9237342432473456e-06, + "loss": 0.1451, + "num_input_tokens_seen": 29333536, + "step": 43515 + }, + { + "epoch": 1.0632008403977231, + "grad_norm": 26.00210952758789, + "learning_rate": 1.923701575668226e-06, + "loss": 0.1617, + "num_input_tokens_seen": 29337120, + "step": 43520 + }, + { + "epoch": 1.0633229912295703, + "grad_norm": 0.38692906498908997, + "learning_rate": 1.9236689013717006e-06, + "loss": 0.0353, + "num_input_tokens_seen": 29340512, + "step": 43525 + }, + { + "epoch": 1.0634451420614175, + "grad_norm": 30.114299774169922, + "learning_rate": 1.9236362203580063e-06, + "loss": 0.0301, + "num_input_tokens_seen": 29343584, + "step": 43530 + }, + { + "epoch": 1.0635672928932647, + "grad_norm": 0.2355722337961197, + "learning_rate": 1.9236035326273806e-06, + "loss": 0.0267, + "num_input_tokens_seen": 29346848, + "step": 43535 + }, + { + "epoch": 1.0636894437251119, + "grad_norm": 0.0823436751961708, + "learning_rate": 1.923570838180062e-06, + "loss": 0.1429, + "num_input_tokens_seen": 29350432, + "step": 43540 + }, + { + "epoch": 1.0638115945569588, + "grad_norm": 0.10285349190235138, + "learning_rate": 1.9235381370162872e-06, + "loss": 0.1278, + "num_input_tokens_seen": 29353696, + "step": 43545 + }, + { + "epoch": 1.063933745388806, + "grad_norm": 11.621041297912598, + "learning_rate": 1.923505429136295e-06, + "loss": 0.3489, + "num_input_tokens_seen": 29357024, + "step": 43550 + }, + { + "epoch": 1.0640558962206532, + "grad_norm": 0.04566968232393265, + "learning_rate": 1.923472714540323e-06, + "loss": 0.1869, + "num_input_tokens_seen": 29360480, + "step": 43555 + }, + { + "epoch": 1.0641780470525004, + "grad_norm": 0.7290300130844116, + "learning_rate": 1.9234399932286093e-06, + "loss": 0.1381, + "num_input_tokens_seen": 29364384, + "step": 43560 + }, + { + "epoch": 1.0643001978843476, + "grad_norm": 0.6765745878219604, + "learning_rate": 1.9234072652013915e-06, + "loss": 0.1341, + "num_input_tokens_seen": 29368544, + "step": 43565 + }, + { + "epoch": 1.0644223487161948, + "grad_norm": 0.37754663825035095, + "learning_rate": 1.9233745304589074e-06, + "loss": 0.1317, + "num_input_tokens_seen": 29371552, + "step": 43570 + }, + { + "epoch": 1.064544499548042, + "grad_norm": 0.2278277426958084, + "learning_rate": 1.9233417890013956e-06, + "loss": 0.0815, + "num_input_tokens_seen": 29375008, + "step": 43575 + }, + { + "epoch": 1.0646666503798892, + "grad_norm": 58.44916534423828, + "learning_rate": 1.923309040829094e-06, + "loss": 0.0384, + "num_input_tokens_seen": 29378400, + "step": 43580 + }, + { + "epoch": 1.0647888012117361, + "grad_norm": 0.45989304780960083, + "learning_rate": 1.9232762859422404e-06, + "loss": 0.0058, + "num_input_tokens_seen": 29381600, + "step": 43585 + }, + { + "epoch": 1.0649109520435833, + "grad_norm": 13.566542625427246, + "learning_rate": 1.9232435243410735e-06, + "loss": 0.0437, + "num_input_tokens_seen": 29385184, + "step": 43590 + }, + { + "epoch": 1.0650331028754305, + "grad_norm": 114.31439971923828, + "learning_rate": 1.9232107560258317e-06, + "loss": 0.0138, + "num_input_tokens_seen": 29388128, + "step": 43595 + }, + { + "epoch": 1.0651552537072777, + "grad_norm": 1.0016684532165527, + "learning_rate": 1.9231779809967526e-06, + "loss": 0.1534, + "num_input_tokens_seen": 29391136, + "step": 43600 + }, + { + "epoch": 1.065277404539125, + "grad_norm": 4.255090236663818, + "learning_rate": 1.9231451992540747e-06, + "loss": 0.0552, + "num_input_tokens_seen": 29394272, + "step": 43605 + }, + { + "epoch": 1.065399555370972, + "grad_norm": 34.4926872253418, + "learning_rate": 1.923112410798037e-06, + "loss": 0.1975, + "num_input_tokens_seen": 29398176, + "step": 43610 + }, + { + "epoch": 1.0655217062028193, + "grad_norm": 10.679594993591309, + "learning_rate": 1.923079615628877e-06, + "loss": 0.0785, + "num_input_tokens_seen": 29401504, + "step": 43615 + }, + { + "epoch": 1.0656438570346665, + "grad_norm": 0.48413512110710144, + "learning_rate": 1.9230468137468344e-06, + "loss": 0.0434, + "num_input_tokens_seen": 29404512, + "step": 43620 + }, + { + "epoch": 1.0657660078665137, + "grad_norm": 21.062744140625, + "learning_rate": 1.923014005152147e-06, + "loss": 0.1786, + "num_input_tokens_seen": 29407584, + "step": 43625 + }, + { + "epoch": 1.0658881586983608, + "grad_norm": 0.29427990317344666, + "learning_rate": 1.9229811898450533e-06, + "loss": 0.0271, + "num_input_tokens_seen": 29410976, + "step": 43630 + }, + { + "epoch": 1.0660103095302078, + "grad_norm": 19.07900047302246, + "learning_rate": 1.9229483678257924e-06, + "loss": 0.1013, + "num_input_tokens_seen": 29414432, + "step": 43635 + }, + { + "epoch": 1.066132460362055, + "grad_norm": 0.23669935762882233, + "learning_rate": 1.9229155390946025e-06, + "loss": 0.0404, + "num_input_tokens_seen": 29418400, + "step": 43640 + }, + { + "epoch": 1.0662546111939022, + "grad_norm": 0.9076651930809021, + "learning_rate": 1.9228827036517227e-06, + "loss": 0.1124, + "num_input_tokens_seen": 29421600, + "step": 43645 + }, + { + "epoch": 1.0663767620257494, + "grad_norm": 41.32793426513672, + "learning_rate": 1.9228498614973917e-06, + "loss": 0.1655, + "num_input_tokens_seen": 29425120, + "step": 43650 + }, + { + "epoch": 1.0664989128575966, + "grad_norm": 0.12387000024318695, + "learning_rate": 1.922817012631848e-06, + "loss": 0.0103, + "num_input_tokens_seen": 29428640, + "step": 43655 + }, + { + "epoch": 1.0666210636894438, + "grad_norm": 62.622108459472656, + "learning_rate": 1.922784157055331e-06, + "loss": 0.102, + "num_input_tokens_seen": 29431968, + "step": 43660 + }, + { + "epoch": 1.066743214521291, + "grad_norm": 0.23542404174804688, + "learning_rate": 1.9227512947680795e-06, + "loss": 0.0826, + "num_input_tokens_seen": 29435040, + "step": 43665 + }, + { + "epoch": 1.0668653653531381, + "grad_norm": 18.37114715576172, + "learning_rate": 1.922718425770332e-06, + "loss": 0.147, + "num_input_tokens_seen": 29438368, + "step": 43670 + }, + { + "epoch": 1.066987516184985, + "grad_norm": 0.10454131662845612, + "learning_rate": 1.922685550062328e-06, + "loss": 0.019, + "num_input_tokens_seen": 29441632, + "step": 43675 + }, + { + "epoch": 1.0671096670168323, + "grad_norm": 25.471887588500977, + "learning_rate": 1.922652667644307e-06, + "loss": 0.0625, + "num_input_tokens_seen": 29444576, + "step": 43680 + }, + { + "epoch": 1.0672318178486795, + "grad_norm": 2.033235788345337, + "learning_rate": 1.922619778516507e-06, + "loss": 0.0015, + "num_input_tokens_seen": 29448032, + "step": 43685 + }, + { + "epoch": 1.0673539686805267, + "grad_norm": 0.3267309367656708, + "learning_rate": 1.922586882679168e-06, + "loss": 0.0739, + "num_input_tokens_seen": 29451424, + "step": 43690 + }, + { + "epoch": 1.0674761195123739, + "grad_norm": 133.7386016845703, + "learning_rate": 1.9225539801325293e-06, + "loss": 0.0986, + "num_input_tokens_seen": 29454624, + "step": 43695 + }, + { + "epoch": 1.067598270344221, + "grad_norm": 5.09596586227417, + "learning_rate": 1.92252107087683e-06, + "loss": 0.0972, + "num_input_tokens_seen": 29458016, + "step": 43700 + }, + { + "epoch": 1.0677204211760682, + "grad_norm": 0.07073884457349777, + "learning_rate": 1.922488154912309e-06, + "loss": 0.0258, + "num_input_tokens_seen": 29461664, + "step": 43705 + }, + { + "epoch": 1.0678425720079154, + "grad_norm": 0.4683375656604767, + "learning_rate": 1.9224552322392064e-06, + "loss": 0.0009, + "num_input_tokens_seen": 29464992, + "step": 43710 + }, + { + "epoch": 1.0679647228397626, + "grad_norm": 0.09089743345975876, + "learning_rate": 1.9224223028577613e-06, + "loss": 0.0303, + "num_input_tokens_seen": 29468192, + "step": 43715 + }, + { + "epoch": 1.0680868736716098, + "grad_norm": 45.598567962646484, + "learning_rate": 1.9223893667682125e-06, + "loss": 0.2321, + "num_input_tokens_seen": 29472160, + "step": 43720 + }, + { + "epoch": 1.0682090245034568, + "grad_norm": 0.31310445070266724, + "learning_rate": 1.9223564239708e-06, + "loss": 0.1665, + "num_input_tokens_seen": 29475808, + "step": 43725 + }, + { + "epoch": 1.068331175335304, + "grad_norm": 0.5182093977928162, + "learning_rate": 1.9223234744657644e-06, + "loss": 0.0035, + "num_input_tokens_seen": 29479904, + "step": 43730 + }, + { + "epoch": 1.0684533261671512, + "grad_norm": 0.7424346804618835, + "learning_rate": 1.922290518253344e-06, + "loss": 0.0922, + "num_input_tokens_seen": 29483104, + "step": 43735 + }, + { + "epoch": 1.0685754769989984, + "grad_norm": 17.64950180053711, + "learning_rate": 1.922257555333779e-06, + "loss": 0.0933, + "num_input_tokens_seen": 29486240, + "step": 43740 + }, + { + "epoch": 1.0686976278308455, + "grad_norm": 1.901727318763733, + "learning_rate": 1.9222245857073086e-06, + "loss": 0.0801, + "num_input_tokens_seen": 29489632, + "step": 43745 + }, + { + "epoch": 1.0688197786626927, + "grad_norm": 0.5220657587051392, + "learning_rate": 1.922191609374173e-06, + "loss": 0.033, + "num_input_tokens_seen": 29493600, + "step": 43750 + }, + { + "epoch": 1.06894192949454, + "grad_norm": 12.645177841186523, + "learning_rate": 1.9221586263346124e-06, + "loss": 0.0382, + "num_input_tokens_seen": 29496928, + "step": 43755 + }, + { + "epoch": 1.0690640803263871, + "grad_norm": 67.977783203125, + "learning_rate": 1.922125636588866e-06, + "loss": 0.2569, + "num_input_tokens_seen": 29500384, + "step": 43760 + }, + { + "epoch": 1.069186231158234, + "grad_norm": 0.14421771466732025, + "learning_rate": 1.9220926401371738e-06, + "loss": 0.001, + "num_input_tokens_seen": 29503712, + "step": 43765 + }, + { + "epoch": 1.0693083819900813, + "grad_norm": 21.54201316833496, + "learning_rate": 1.9220596369797765e-06, + "loss": 0.2355, + "num_input_tokens_seen": 29507488, + "step": 43770 + }, + { + "epoch": 1.0694305328219285, + "grad_norm": 11.607332229614258, + "learning_rate": 1.9220266271169127e-06, + "loss": 0.0427, + "num_input_tokens_seen": 29510816, + "step": 43775 + }, + { + "epoch": 1.0695526836537756, + "grad_norm": 2.3675055503845215, + "learning_rate": 1.921993610548824e-06, + "loss": 0.0012, + "num_input_tokens_seen": 29514016, + "step": 43780 + }, + { + "epoch": 1.0696748344856228, + "grad_norm": 0.29298821091651917, + "learning_rate": 1.9219605872757493e-06, + "loss": 0.0944, + "num_input_tokens_seen": 29517216, + "step": 43785 + }, + { + "epoch": 1.06979698531747, + "grad_norm": 21.916318893432617, + "learning_rate": 1.921927557297929e-06, + "loss": 0.0709, + "num_input_tokens_seen": 29520480, + "step": 43790 + }, + { + "epoch": 1.0699191361493172, + "grad_norm": 0.1172017902135849, + "learning_rate": 1.9218945206156043e-06, + "loss": 0.0458, + "num_input_tokens_seen": 29523552, + "step": 43795 + }, + { + "epoch": 1.0700412869811644, + "grad_norm": 0.7703777551651001, + "learning_rate": 1.921861477229014e-06, + "loss": 0.0711, + "num_input_tokens_seen": 29526752, + "step": 43800 + }, + { + "epoch": 1.0701634378130116, + "grad_norm": 0.26279208064079285, + "learning_rate": 1.9218284271384e-06, + "loss": 0.0032, + "num_input_tokens_seen": 29529760, + "step": 43805 + }, + { + "epoch": 1.0702855886448588, + "grad_norm": 11.817235946655273, + "learning_rate": 1.9217953703440007e-06, + "loss": 0.0755, + "num_input_tokens_seen": 29533088, + "step": 43810 + }, + { + "epoch": 1.0704077394767058, + "grad_norm": 17.30320167541504, + "learning_rate": 1.921762306846058e-06, + "loss": 0.1385, + "num_input_tokens_seen": 29536544, + "step": 43815 + }, + { + "epoch": 1.070529890308553, + "grad_norm": 0.43149492144584656, + "learning_rate": 1.921729236644812e-06, + "loss": 0.0231, + "num_input_tokens_seen": 29539680, + "step": 43820 + }, + { + "epoch": 1.0706520411404001, + "grad_norm": 0.49309828877449036, + "learning_rate": 1.9216961597405028e-06, + "loss": 0.0352, + "num_input_tokens_seen": 29543328, + "step": 43825 + }, + { + "epoch": 1.0707741919722473, + "grad_norm": 33.81893539428711, + "learning_rate": 1.9216630761333713e-06, + "loss": 0.1591, + "num_input_tokens_seen": 29546848, + "step": 43830 + }, + { + "epoch": 1.0708963428040945, + "grad_norm": 0.11472368985414505, + "learning_rate": 1.921629985823658e-06, + "loss": 0.0081, + "num_input_tokens_seen": 29550048, + "step": 43835 + }, + { + "epoch": 1.0710184936359417, + "grad_norm": 1.5263179540634155, + "learning_rate": 1.9215968888116038e-06, + "loss": 0.0762, + "num_input_tokens_seen": 29553312, + "step": 43840 + }, + { + "epoch": 1.071140644467789, + "grad_norm": 12.363460540771484, + "learning_rate": 1.9215637850974488e-06, + "loss": 0.1954, + "num_input_tokens_seen": 29556256, + "step": 43845 + }, + { + "epoch": 1.071262795299636, + "grad_norm": 23.935073852539062, + "learning_rate": 1.921530674681434e-06, + "loss": 0.1681, + "num_input_tokens_seen": 29559712, + "step": 43850 + }, + { + "epoch": 1.071384946131483, + "grad_norm": 2.8645405769348145, + "learning_rate": 1.921497557563801e-06, + "loss": 0.0359, + "num_input_tokens_seen": 29562976, + "step": 43855 + }, + { + "epoch": 1.0715070969633302, + "grad_norm": 0.6907225251197815, + "learning_rate": 1.921464433744789e-06, + "loss": 0.0823, + "num_input_tokens_seen": 29566496, + "step": 43860 + }, + { + "epoch": 1.0716292477951774, + "grad_norm": 0.04008014500141144, + "learning_rate": 1.9214313032246404e-06, + "loss": 0.0272, + "num_input_tokens_seen": 29569440, + "step": 43865 + }, + { + "epoch": 1.0717513986270246, + "grad_norm": 0.18733011186122894, + "learning_rate": 1.921398166003595e-06, + "loss": 0.0815, + "num_input_tokens_seen": 29572960, + "step": 43870 + }, + { + "epoch": 1.0718735494588718, + "grad_norm": 12.819915771484375, + "learning_rate": 1.921365022081895e-06, + "loss": 0.0707, + "num_input_tokens_seen": 29576096, + "step": 43875 + }, + { + "epoch": 1.071995700290719, + "grad_norm": 22.413814544677734, + "learning_rate": 1.9213318714597803e-06, + "loss": 0.1535, + "num_input_tokens_seen": 29579488, + "step": 43880 + }, + { + "epoch": 1.0721178511225662, + "grad_norm": 0.17186643183231354, + "learning_rate": 1.9212987141374924e-06, + "loss": 0.0779, + "num_input_tokens_seen": 29582880, + "step": 43885 + }, + { + "epoch": 1.0722400019544134, + "grad_norm": 0.30322572588920593, + "learning_rate": 1.9212655501152726e-06, + "loss": 0.1816, + "num_input_tokens_seen": 29586272, + "step": 43890 + }, + { + "epoch": 1.0723621527862606, + "grad_norm": 0.1031859815120697, + "learning_rate": 1.921232379393362e-06, + "loss": 0.0386, + "num_input_tokens_seen": 29589856, + "step": 43895 + }, + { + "epoch": 1.0724843036181075, + "grad_norm": 1.8539291620254517, + "learning_rate": 1.9211992019720015e-06, + "loss": 0.0355, + "num_input_tokens_seen": 29593824, + "step": 43900 + }, + { + "epoch": 1.0726064544499547, + "grad_norm": 71.8545150756836, + "learning_rate": 1.9211660178514326e-06, + "loss": 0.0126, + "num_input_tokens_seen": 29596960, + "step": 43905 + }, + { + "epoch": 1.072728605281802, + "grad_norm": 20.38343620300293, + "learning_rate": 1.921132827031897e-06, + "loss": 0.1277, + "num_input_tokens_seen": 29600416, + "step": 43910 + }, + { + "epoch": 1.072850756113649, + "grad_norm": 0.04561031609773636, + "learning_rate": 1.9210996295136356e-06, + "loss": 0.0773, + "num_input_tokens_seen": 29603616, + "step": 43915 + }, + { + "epoch": 1.0729729069454963, + "grad_norm": 0.04822149500250816, + "learning_rate": 1.92106642529689e-06, + "loss": 0.0019, + "num_input_tokens_seen": 29607328, + "step": 43920 + }, + { + "epoch": 1.0730950577773435, + "grad_norm": 0.28394651412963867, + "learning_rate": 1.9210332143819016e-06, + "loss": 0.0476, + "num_input_tokens_seen": 29610848, + "step": 43925 + }, + { + "epoch": 1.0732172086091907, + "grad_norm": 1.6123360395431519, + "learning_rate": 1.920999996768912e-06, + "loss": 0.0321, + "num_input_tokens_seen": 29613984, + "step": 43930 + }, + { + "epoch": 1.0733393594410379, + "grad_norm": 0.0532742440700531, + "learning_rate": 1.9209667724581623e-06, + "loss": 0.047, + "num_input_tokens_seen": 29617184, + "step": 43935 + }, + { + "epoch": 1.073461510272885, + "grad_norm": 1.2695121765136719, + "learning_rate": 1.9209335414498945e-06, + "loss": 0.1575, + "num_input_tokens_seen": 29620640, + "step": 43940 + }, + { + "epoch": 1.073583661104732, + "grad_norm": 0.005534649360924959, + "learning_rate": 1.9209003037443506e-06, + "loss": 0.0857, + "num_input_tokens_seen": 29623648, + "step": 43945 + }, + { + "epoch": 1.0737058119365792, + "grad_norm": 16.748472213745117, + "learning_rate": 1.920867059341772e-06, + "loss": 0.1343, + "num_input_tokens_seen": 29626784, + "step": 43950 + }, + { + "epoch": 1.0738279627684264, + "grad_norm": 8.2855224609375, + "learning_rate": 1.9208338082424006e-06, + "loss": 0.0476, + "num_input_tokens_seen": 29630176, + "step": 43955 + }, + { + "epoch": 1.0739501136002736, + "grad_norm": 0.3709075152873993, + "learning_rate": 1.920800550446478e-06, + "loss": 0.0022, + "num_input_tokens_seen": 29633312, + "step": 43960 + }, + { + "epoch": 1.0740722644321208, + "grad_norm": 72.69107055664062, + "learning_rate": 1.920767285954246e-06, + "loss": 0.0819, + "num_input_tokens_seen": 29636832, + "step": 43965 + }, + { + "epoch": 1.074194415263968, + "grad_norm": 10.143033981323242, + "learning_rate": 1.9207340147659465e-06, + "loss": 0.0907, + "num_input_tokens_seen": 29640416, + "step": 43970 + }, + { + "epoch": 1.0743165660958152, + "grad_norm": 0.12058486044406891, + "learning_rate": 1.9207007368818217e-06, + "loss": 0.08, + "num_input_tokens_seen": 29643680, + "step": 43975 + }, + { + "epoch": 1.0744387169276624, + "grad_norm": 56.001800537109375, + "learning_rate": 1.9206674523021135e-06, + "loss": 0.1525, + "num_input_tokens_seen": 29647328, + "step": 43980 + }, + { + "epoch": 1.0745608677595095, + "grad_norm": 0.35678938031196594, + "learning_rate": 1.9206341610270644e-06, + "loss": 0.0012, + "num_input_tokens_seen": 29651104, + "step": 43985 + }, + { + "epoch": 1.0746830185913565, + "grad_norm": 0.42853277921676636, + "learning_rate": 1.9206008630569157e-06, + "loss": 0.059, + "num_input_tokens_seen": 29654496, + "step": 43990 + }, + { + "epoch": 1.0748051694232037, + "grad_norm": 0.038223832845687866, + "learning_rate": 1.9205675583919096e-06, + "loss": 0.1075, + "num_input_tokens_seen": 29657760, + "step": 43995 + }, + { + "epoch": 1.0749273202550509, + "grad_norm": 12.201236724853516, + "learning_rate": 1.920534247032289e-06, + "loss": 0.284, + "num_input_tokens_seen": 29661024, + "step": 44000 + }, + { + "epoch": 1.075049471086898, + "grad_norm": 17.112329483032227, + "learning_rate": 1.9205009289782956e-06, + "loss": 0.1003, + "num_input_tokens_seen": 29663840, + "step": 44005 + }, + { + "epoch": 1.0751716219187453, + "grad_norm": 8.583478927612305, + "learning_rate": 1.9204676042301718e-06, + "loss": 0.112, + "num_input_tokens_seen": 29667488, + "step": 44010 + }, + { + "epoch": 1.0752937727505925, + "grad_norm": 0.381719708442688, + "learning_rate": 1.92043427278816e-06, + "loss": 0.2006, + "num_input_tokens_seen": 29671072, + "step": 44015 + }, + { + "epoch": 1.0754159235824396, + "grad_norm": 0.16106979548931122, + "learning_rate": 1.920400934652503e-06, + "loss": 0.0107, + "num_input_tokens_seen": 29674656, + "step": 44020 + }, + { + "epoch": 1.0755380744142868, + "grad_norm": 102.35822296142578, + "learning_rate": 1.9203675898234426e-06, + "loss": 0.1733, + "num_input_tokens_seen": 29678176, + "step": 44025 + }, + { + "epoch": 1.075660225246134, + "grad_norm": 0.7088550925254822, + "learning_rate": 1.9203342383012214e-06, + "loss": 0.0602, + "num_input_tokens_seen": 29681696, + "step": 44030 + }, + { + "epoch": 1.075782376077981, + "grad_norm": 16.169422149658203, + "learning_rate": 1.920300880086082e-06, + "loss": 0.0779, + "num_input_tokens_seen": 29684960, + "step": 44035 + }, + { + "epoch": 1.0759045269098282, + "grad_norm": 51.24101638793945, + "learning_rate": 1.9202675151782675e-06, + "loss": 0.1817, + "num_input_tokens_seen": 29688224, + "step": 44040 + }, + { + "epoch": 1.0760266777416754, + "grad_norm": 0.2482772320508957, + "learning_rate": 1.9202341435780197e-06, + "loss": 0.0289, + "num_input_tokens_seen": 29691616, + "step": 44045 + }, + { + "epoch": 1.0761488285735226, + "grad_norm": 10.518967628479004, + "learning_rate": 1.9202007652855822e-06, + "loss": 0.0737, + "num_input_tokens_seen": 29694688, + "step": 44050 + }, + { + "epoch": 1.0762709794053698, + "grad_norm": 12.700284957885742, + "learning_rate": 1.920167380301197e-06, + "loss": 0.0577, + "num_input_tokens_seen": 29697952, + "step": 44055 + }, + { + "epoch": 1.076393130237217, + "grad_norm": 0.1491347998380661, + "learning_rate": 1.920133988625107e-06, + "loss": 0.0635, + "num_input_tokens_seen": 29701152, + "step": 44060 + }, + { + "epoch": 1.0765152810690641, + "grad_norm": 80.09822082519531, + "learning_rate": 1.920100590257555e-06, + "loss": 0.0122, + "num_input_tokens_seen": 29704672, + "step": 44065 + }, + { + "epoch": 1.0766374319009113, + "grad_norm": 0.10389073938131332, + "learning_rate": 1.920067185198784e-06, + "loss": 0.065, + "num_input_tokens_seen": 29708192, + "step": 44070 + }, + { + "epoch": 1.0767595827327585, + "grad_norm": 28.060272216796875, + "learning_rate": 1.9200337734490374e-06, + "loss": 0.0481, + "num_input_tokens_seen": 29711264, + "step": 44075 + }, + { + "epoch": 1.0768817335646055, + "grad_norm": 19.16193962097168, + "learning_rate": 1.9200003550085575e-06, + "loss": 0.1977, + "num_input_tokens_seen": 29714400, + "step": 44080 + }, + { + "epoch": 1.0770038843964527, + "grad_norm": 0.10936179757118225, + "learning_rate": 1.919966929877587e-06, + "loss": 0.0458, + "num_input_tokens_seen": 29717920, + "step": 44085 + }, + { + "epoch": 1.0771260352282999, + "grad_norm": 0.5304973125457764, + "learning_rate": 1.9199334980563707e-06, + "loss": 0.0403, + "num_input_tokens_seen": 29721184, + "step": 44090 + }, + { + "epoch": 1.077248186060147, + "grad_norm": 0.2360510379076004, + "learning_rate": 1.91990005954515e-06, + "loss": 0.0826, + "num_input_tokens_seen": 29724832, + "step": 44095 + }, + { + "epoch": 1.0773703368919942, + "grad_norm": 0.24685044586658478, + "learning_rate": 1.919866614344169e-06, + "loss": 0.0805, + "num_input_tokens_seen": 29728416, + "step": 44100 + }, + { + "epoch": 1.0774924877238414, + "grad_norm": 0.09603308141231537, + "learning_rate": 1.9198331624536696e-06, + "loss": 0.146, + "num_input_tokens_seen": 29732064, + "step": 44105 + }, + { + "epoch": 1.0776146385556886, + "grad_norm": 18.899932861328125, + "learning_rate": 1.9197997038738967e-06, + "loss": 0.1583, + "num_input_tokens_seen": 29735840, + "step": 44110 + }, + { + "epoch": 1.0777367893875358, + "grad_norm": 1.469382405281067, + "learning_rate": 1.919766238605093e-06, + "loss": 0.0657, + "num_input_tokens_seen": 29739104, + "step": 44115 + }, + { + "epoch": 1.0778589402193828, + "grad_norm": 73.46088409423828, + "learning_rate": 1.9197327666475017e-06, + "loss": 0.0568, + "num_input_tokens_seen": 29742432, + "step": 44120 + }, + { + "epoch": 1.07798109105123, + "grad_norm": 0.0979587659239769, + "learning_rate": 1.9196992880013662e-06, + "loss": 0.1157, + "num_input_tokens_seen": 29745440, + "step": 44125 + }, + { + "epoch": 1.0781032418830772, + "grad_norm": 0.204317107796669, + "learning_rate": 1.9196658026669303e-06, + "loss": 0.0797, + "num_input_tokens_seen": 29748512, + "step": 44130 + }, + { + "epoch": 1.0782253927149243, + "grad_norm": 29.698213577270508, + "learning_rate": 1.9196323106444374e-06, + "loss": 0.2125, + "num_input_tokens_seen": 29751648, + "step": 44135 + }, + { + "epoch": 1.0783475435467715, + "grad_norm": 0.14883090555667877, + "learning_rate": 1.9195988119341306e-06, + "loss": 0.0615, + "num_input_tokens_seen": 29754592, + "step": 44140 + }, + { + "epoch": 1.0784696943786187, + "grad_norm": 1.3855115175247192, + "learning_rate": 1.9195653065362544e-06, + "loss": 0.0466, + "num_input_tokens_seen": 29757600, + "step": 44145 + }, + { + "epoch": 1.078591845210466, + "grad_norm": 24.51190185546875, + "learning_rate": 1.9195317944510517e-06, + "loss": 0.036, + "num_input_tokens_seen": 29761184, + "step": 44150 + }, + { + "epoch": 1.078713996042313, + "grad_norm": 22.161216735839844, + "learning_rate": 1.9194982756787662e-06, + "loss": 0.1806, + "num_input_tokens_seen": 29764768, + "step": 44155 + }, + { + "epoch": 1.0788361468741603, + "grad_norm": 0.10817687213420868, + "learning_rate": 1.9194647502196422e-06, + "loss": 0.0684, + "num_input_tokens_seen": 29768416, + "step": 44160 + }, + { + "epoch": 1.0789582977060075, + "grad_norm": 29.709800720214844, + "learning_rate": 1.9194312180739237e-06, + "loss": 0.0755, + "num_input_tokens_seen": 29771552, + "step": 44165 + }, + { + "epoch": 1.0790804485378545, + "grad_norm": 11.85411262512207, + "learning_rate": 1.9193976792418533e-06, + "loss": 0.0485, + "num_input_tokens_seen": 29775584, + "step": 44170 + }, + { + "epoch": 1.0792025993697016, + "grad_norm": 0.3583597242832184, + "learning_rate": 1.919364133723676e-06, + "loss": 0.1085, + "num_input_tokens_seen": 29778400, + "step": 44175 + }, + { + "epoch": 1.0793247502015488, + "grad_norm": 0.35112762451171875, + "learning_rate": 1.9193305815196355e-06, + "loss": 0.0418, + "num_input_tokens_seen": 29781728, + "step": 44180 + }, + { + "epoch": 1.079446901033396, + "grad_norm": 14.021635055541992, + "learning_rate": 1.9192970226299757e-06, + "loss": 0.1246, + "num_input_tokens_seen": 29785056, + "step": 44185 + }, + { + "epoch": 1.0795690518652432, + "grad_norm": 0.19013722240924835, + "learning_rate": 1.919263457054941e-06, + "loss": 0.0421, + "num_input_tokens_seen": 29788256, + "step": 44190 + }, + { + "epoch": 1.0796912026970904, + "grad_norm": 2.3619987964630127, + "learning_rate": 1.9192298847947746e-06, + "loss": 0.0286, + "num_input_tokens_seen": 29791392, + "step": 44195 + }, + { + "epoch": 1.0798133535289376, + "grad_norm": 2.7994532585144043, + "learning_rate": 1.9191963058497212e-06, + "loss": 0.004, + "num_input_tokens_seen": 29794336, + "step": 44200 + }, + { + "epoch": 1.0799355043607848, + "grad_norm": 0.09319982677698135, + "learning_rate": 1.9191627202200258e-06, + "loss": 0.0518, + "num_input_tokens_seen": 29797728, + "step": 44205 + }, + { + "epoch": 1.0800576551926317, + "grad_norm": 1.3856775760650635, + "learning_rate": 1.9191291279059312e-06, + "loss": 0.0466, + "num_input_tokens_seen": 29801056, + "step": 44210 + }, + { + "epoch": 1.080179806024479, + "grad_norm": 0.10315916687250137, + "learning_rate": 1.9190955289076825e-06, + "loss": 0.1837, + "num_input_tokens_seen": 29804512, + "step": 44215 + }, + { + "epoch": 1.0803019568563261, + "grad_norm": 0.18177196383476257, + "learning_rate": 1.9190619232255242e-06, + "loss": 0.1528, + "num_input_tokens_seen": 29808032, + "step": 44220 + }, + { + "epoch": 1.0804241076881733, + "grad_norm": 16.280742645263672, + "learning_rate": 1.9190283108597e-06, + "loss": 0.1278, + "num_input_tokens_seen": 29811296, + "step": 44225 + }, + { + "epoch": 1.0805462585200205, + "grad_norm": 0.3395654559135437, + "learning_rate": 1.918994691810455e-06, + "loss": 0.0481, + "num_input_tokens_seen": 29815904, + "step": 44230 + }, + { + "epoch": 1.0806684093518677, + "grad_norm": 28.75006103515625, + "learning_rate": 1.9189610660780335e-06, + "loss": 0.0786, + "num_input_tokens_seen": 29819040, + "step": 44235 + }, + { + "epoch": 1.0807905601837149, + "grad_norm": 0.04071475565433502, + "learning_rate": 1.9189274336626795e-06, + "loss": 0.0959, + "num_input_tokens_seen": 29822496, + "step": 44240 + }, + { + "epoch": 1.080912711015562, + "grad_norm": 37.24461364746094, + "learning_rate": 1.9188937945646386e-06, + "loss": 0.095, + "num_input_tokens_seen": 29825696, + "step": 44245 + }, + { + "epoch": 1.0810348618474093, + "grad_norm": 13.556794166564941, + "learning_rate": 1.9188601487841545e-06, + "loss": 0.076, + "num_input_tokens_seen": 29829024, + "step": 44250 + }, + { + "epoch": 1.0811570126792565, + "grad_norm": 0.16886696219444275, + "learning_rate": 1.9188264963214724e-06, + "loss": 0.0274, + "num_input_tokens_seen": 29832480, + "step": 44255 + }, + { + "epoch": 1.0812791635111034, + "grad_norm": 1.2870328426361084, + "learning_rate": 1.918792837176837e-06, + "loss": 0.0585, + "num_input_tokens_seen": 29835744, + "step": 44260 + }, + { + "epoch": 1.0814013143429506, + "grad_norm": 1.8540476560592651, + "learning_rate": 1.9187591713504925e-06, + "loss": 0.0317, + "num_input_tokens_seen": 29838752, + "step": 44265 + }, + { + "epoch": 1.0815234651747978, + "grad_norm": 20.007299423217773, + "learning_rate": 1.9187254988426846e-06, + "loss": 0.0282, + "num_input_tokens_seen": 29842080, + "step": 44270 + }, + { + "epoch": 1.081645616006645, + "grad_norm": 1.7691351175308228, + "learning_rate": 1.918691819653658e-06, + "loss": 0.102, + "num_input_tokens_seen": 29844832, + "step": 44275 + }, + { + "epoch": 1.0817677668384922, + "grad_norm": 27.982473373413086, + "learning_rate": 1.9186581337836567e-06, + "loss": 0.1044, + "num_input_tokens_seen": 29848096, + "step": 44280 + }, + { + "epoch": 1.0818899176703394, + "grad_norm": 0.024598833173513412, + "learning_rate": 1.918624441232927e-06, + "loss": 0.0015, + "num_input_tokens_seen": 29851552, + "step": 44285 + }, + { + "epoch": 1.0820120685021866, + "grad_norm": 89.78707885742188, + "learning_rate": 1.918590742001713e-06, + "loss": 0.1048, + "num_input_tokens_seen": 29855264, + "step": 44290 + }, + { + "epoch": 1.0821342193340338, + "grad_norm": 0.07963894307613373, + "learning_rate": 1.91855703609026e-06, + "loss": 0.1001, + "num_input_tokens_seen": 29858720, + "step": 44295 + }, + { + "epoch": 1.0822563701658807, + "grad_norm": 0.34227511286735535, + "learning_rate": 1.918523323498813e-06, + "loss": 0.0587, + "num_input_tokens_seen": 29862112, + "step": 44300 + }, + { + "epoch": 1.082378520997728, + "grad_norm": 0.024144131690263748, + "learning_rate": 1.9184896042276176e-06, + "loss": 0.0833, + "num_input_tokens_seen": 29865376, + "step": 44305 + }, + { + "epoch": 1.082500671829575, + "grad_norm": 0.03241460397839546, + "learning_rate": 1.9184558782769185e-06, + "loss": 0.0984, + "num_input_tokens_seen": 29868832, + "step": 44310 + }, + { + "epoch": 1.0826228226614223, + "grad_norm": 0.9405149221420288, + "learning_rate": 1.9184221456469615e-06, + "loss": 0.0488, + "num_input_tokens_seen": 29872096, + "step": 44315 + }, + { + "epoch": 1.0827449734932695, + "grad_norm": 0.9719510078430176, + "learning_rate": 1.9183884063379918e-06, + "loss": 0.0499, + "num_input_tokens_seen": 29875232, + "step": 44320 + }, + { + "epoch": 1.0828671243251167, + "grad_norm": 25.23493766784668, + "learning_rate": 1.9183546603502545e-06, + "loss": 0.0383, + "num_input_tokens_seen": 29878880, + "step": 44325 + }, + { + "epoch": 1.0829892751569639, + "grad_norm": 0.5960846543312073, + "learning_rate": 1.9183209076839944e-06, + "loss": 0.0354, + "num_input_tokens_seen": 29882400, + "step": 44330 + }, + { + "epoch": 1.083111425988811, + "grad_norm": 0.018635839223861694, + "learning_rate": 1.9182871483394585e-06, + "loss": 0.0176, + "num_input_tokens_seen": 29885920, + "step": 44335 + }, + { + "epoch": 1.0832335768206582, + "grad_norm": 24.572978973388672, + "learning_rate": 1.918253382316891e-06, + "loss": 0.0967, + "num_input_tokens_seen": 29889504, + "step": 44340 + }, + { + "epoch": 1.0833557276525054, + "grad_norm": 0.4108743965625763, + "learning_rate": 1.9182196096165383e-06, + "loss": 0.0563, + "num_input_tokens_seen": 29892896, + "step": 44345 + }, + { + "epoch": 1.0834778784843524, + "grad_norm": 0.16322478652000427, + "learning_rate": 1.9181858302386454e-06, + "loss": 0.1665, + "num_input_tokens_seen": 29896224, + "step": 44350 + }, + { + "epoch": 1.0836000293161996, + "grad_norm": 28.877195358276367, + "learning_rate": 1.9181520441834582e-06, + "loss": 0.1475, + "num_input_tokens_seen": 29899296, + "step": 44355 + }, + { + "epoch": 1.0837221801480468, + "grad_norm": 13.3076171875, + "learning_rate": 1.9181182514512222e-06, + "loss": 0.1069, + "num_input_tokens_seen": 29902368, + "step": 44360 + }, + { + "epoch": 1.083844330979894, + "grad_norm": 0.08094339072704315, + "learning_rate": 1.9180844520421838e-06, + "loss": 0.0788, + "num_input_tokens_seen": 29905888, + "step": 44365 + }, + { + "epoch": 1.0839664818117412, + "grad_norm": 82.42140197753906, + "learning_rate": 1.918050645956588e-06, + "loss": 0.0217, + "num_input_tokens_seen": 29909344, + "step": 44370 + }, + { + "epoch": 1.0840886326435883, + "grad_norm": 8.881879806518555, + "learning_rate": 1.918016833194681e-06, + "loss": 0.0903, + "num_input_tokens_seen": 29912480, + "step": 44375 + }, + { + "epoch": 1.0842107834754355, + "grad_norm": 40.43014144897461, + "learning_rate": 1.917983013756709e-06, + "loss": 0.2413, + "num_input_tokens_seen": 29916256, + "step": 44380 + }, + { + "epoch": 1.0843329343072827, + "grad_norm": 1.9137089252471924, + "learning_rate": 1.917949187642917e-06, + "loss": 0.0461, + "num_input_tokens_seen": 29919456, + "step": 44385 + }, + { + "epoch": 1.0844550851391297, + "grad_norm": 21.525697708129883, + "learning_rate": 1.917915354853552e-06, + "loss": 0.2388, + "num_input_tokens_seen": 29923232, + "step": 44390 + }, + { + "epoch": 1.0845772359709769, + "grad_norm": 0.4614849090576172, + "learning_rate": 1.9178815153888595e-06, + "loss": 0.1065, + "num_input_tokens_seen": 29926240, + "step": 44395 + }, + { + "epoch": 1.084699386802824, + "grad_norm": 0.8693162798881531, + "learning_rate": 1.917847669249086e-06, + "loss": 0.0584, + "num_input_tokens_seen": 29929696, + "step": 44400 + }, + { + "epoch": 1.0848215376346713, + "grad_norm": 0.8169465661048889, + "learning_rate": 1.917813816434477e-06, + "loss": 0.088, + "num_input_tokens_seen": 29933280, + "step": 44405 + }, + { + "epoch": 1.0849436884665185, + "grad_norm": 0.28742265701293945, + "learning_rate": 1.9177799569452793e-06, + "loss": 0.1019, + "num_input_tokens_seen": 29936608, + "step": 44410 + }, + { + "epoch": 1.0850658392983656, + "grad_norm": 0.9252832531929016, + "learning_rate": 1.917746090781739e-06, + "loss": 0.068, + "num_input_tokens_seen": 29940384, + "step": 44415 + }, + { + "epoch": 1.0851879901302128, + "grad_norm": 25.838499069213867, + "learning_rate": 1.917712217944102e-06, + "loss": 0.0874, + "num_input_tokens_seen": 29943840, + "step": 44420 + }, + { + "epoch": 1.08531014096206, + "grad_norm": 0.21891480684280396, + "learning_rate": 1.917678338432615e-06, + "loss": 0.0362, + "num_input_tokens_seen": 29947168, + "step": 44425 + }, + { + "epoch": 1.0854322917939072, + "grad_norm": 39.00025177001953, + "learning_rate": 1.917644452247524e-06, + "loss": 0.1176, + "num_input_tokens_seen": 29950432, + "step": 44430 + }, + { + "epoch": 1.0855544426257542, + "grad_norm": 0.486000120639801, + "learning_rate": 1.9176105593890765e-06, + "loss": 0.0287, + "num_input_tokens_seen": 29954208, + "step": 44435 + }, + { + "epoch": 1.0856765934576014, + "grad_norm": 25.918472290039062, + "learning_rate": 1.9175766598575177e-06, + "loss": 0.1624, + "num_input_tokens_seen": 29957280, + "step": 44440 + }, + { + "epoch": 1.0857987442894486, + "grad_norm": 16.659223556518555, + "learning_rate": 1.917542753653095e-06, + "loss": 0.128, + "num_input_tokens_seen": 29960416, + "step": 44445 + }, + { + "epoch": 1.0859208951212957, + "grad_norm": 18.979354858398438, + "learning_rate": 1.9175088407760543e-06, + "loss": 0.0584, + "num_input_tokens_seen": 29963808, + "step": 44450 + }, + { + "epoch": 1.086043045953143, + "grad_norm": 51.75289535522461, + "learning_rate": 1.917474921226642e-06, + "loss": 0.1099, + "num_input_tokens_seen": 29967072, + "step": 44455 + }, + { + "epoch": 1.0861651967849901, + "grad_norm": 1.5100349187850952, + "learning_rate": 1.917440995005106e-06, + "loss": 0.0261, + "num_input_tokens_seen": 29970464, + "step": 44460 + }, + { + "epoch": 1.0862873476168373, + "grad_norm": 4.650681018829346, + "learning_rate": 1.9174070621116924e-06, + "loss": 0.1018, + "num_input_tokens_seen": 29974560, + "step": 44465 + }, + { + "epoch": 1.0864094984486845, + "grad_norm": 0.549470067024231, + "learning_rate": 1.9173731225466477e-06, + "loss": 0.0687, + "num_input_tokens_seen": 29977504, + "step": 44470 + }, + { + "epoch": 1.0865316492805317, + "grad_norm": 15.055164337158203, + "learning_rate": 1.917339176310219e-06, + "loss": 0.1373, + "num_input_tokens_seen": 29980896, + "step": 44475 + }, + { + "epoch": 1.0866538001123787, + "grad_norm": 0.01459528598934412, + "learning_rate": 1.917305223402653e-06, + "loss": 0.0765, + "num_input_tokens_seen": 29984160, + "step": 44480 + }, + { + "epoch": 1.0867759509442259, + "grad_norm": 0.21965134143829346, + "learning_rate": 1.9172712638241964e-06, + "loss": 0.0376, + "num_input_tokens_seen": 29987424, + "step": 44485 + }, + { + "epoch": 1.086898101776073, + "grad_norm": 0.47681552171707153, + "learning_rate": 1.917237297575097e-06, + "loss": 0.0685, + "num_input_tokens_seen": 29990496, + "step": 44490 + }, + { + "epoch": 1.0870202526079202, + "grad_norm": 0.1720905601978302, + "learning_rate": 1.9172033246556008e-06, + "loss": 0.0267, + "num_input_tokens_seen": 29993760, + "step": 44495 + }, + { + "epoch": 1.0871424034397674, + "grad_norm": 8.774606704711914, + "learning_rate": 1.9171693450659556e-06, + "loss": 0.0543, + "num_input_tokens_seen": 29997280, + "step": 44500 + }, + { + "epoch": 1.0872645542716146, + "grad_norm": 0.053868431597948074, + "learning_rate": 1.917135358806408e-06, + "loss": 0.0768, + "num_input_tokens_seen": 30000480, + "step": 44505 + }, + { + "epoch": 1.0873867051034618, + "grad_norm": 16.72242546081543, + "learning_rate": 1.9171013658772055e-06, + "loss": 0.1496, + "num_input_tokens_seen": 30003872, + "step": 44510 + }, + { + "epoch": 1.087508855935309, + "grad_norm": 0.11925757676362991, + "learning_rate": 1.9170673662785953e-06, + "loss": 0.0012, + "num_input_tokens_seen": 30007264, + "step": 44515 + }, + { + "epoch": 1.0876310067671562, + "grad_norm": 0.10318820923566818, + "learning_rate": 1.9170333600108246e-06, + "loss": 0.1859, + "num_input_tokens_seen": 30011104, + "step": 44520 + }, + { + "epoch": 1.0877531575990032, + "grad_norm": 0.32234182953834534, + "learning_rate": 1.9169993470741407e-06, + "loss": 0.0786, + "num_input_tokens_seen": 30014368, + "step": 44525 + }, + { + "epoch": 1.0878753084308503, + "grad_norm": 37.324275970458984, + "learning_rate": 1.9169653274687905e-06, + "loss": 0.1248, + "num_input_tokens_seen": 30017952, + "step": 44530 + }, + { + "epoch": 1.0879974592626975, + "grad_norm": 5.065712928771973, + "learning_rate": 1.9169313011950223e-06, + "loss": 0.0366, + "num_input_tokens_seen": 30021408, + "step": 44535 + }, + { + "epoch": 1.0881196100945447, + "grad_norm": 0.061776500195264816, + "learning_rate": 1.9168972682530825e-06, + "loss": 0.0697, + "num_input_tokens_seen": 30025056, + "step": 44540 + }, + { + "epoch": 1.088241760926392, + "grad_norm": 0.0976782888174057, + "learning_rate": 1.9168632286432193e-06, + "loss": 0.0544, + "num_input_tokens_seen": 30028192, + "step": 44545 + }, + { + "epoch": 1.088363911758239, + "grad_norm": 27.349529266357422, + "learning_rate": 1.9168291823656804e-06, + "loss": 0.1594, + "num_input_tokens_seen": 30031200, + "step": 44550 + }, + { + "epoch": 1.0884860625900863, + "grad_norm": 1.1122671365737915, + "learning_rate": 1.916795129420713e-06, + "loss": 0.0597, + "num_input_tokens_seen": 30034208, + "step": 44555 + }, + { + "epoch": 1.0886082134219335, + "grad_norm": 0.43841981887817383, + "learning_rate": 1.9167610698085647e-06, + "loss": 0.0384, + "num_input_tokens_seen": 30037792, + "step": 44560 + }, + { + "epoch": 1.0887303642537807, + "grad_norm": 38.47297286987305, + "learning_rate": 1.9167270035294833e-06, + "loss": 0.1703, + "num_input_tokens_seen": 30041184, + "step": 44565 + }, + { + "epoch": 1.0888525150856276, + "grad_norm": 107.69474029541016, + "learning_rate": 1.9166929305837164e-06, + "loss": 0.0229, + "num_input_tokens_seen": 30044448, + "step": 44570 + }, + { + "epoch": 1.0889746659174748, + "grad_norm": 60.87364959716797, + "learning_rate": 1.9166588509715123e-06, + "loss": 0.1048, + "num_input_tokens_seen": 30047904, + "step": 44575 + }, + { + "epoch": 1.089096816749322, + "grad_norm": 1.1863831281661987, + "learning_rate": 1.916624764693118e-06, + "loss": 0.062, + "num_input_tokens_seen": 30051040, + "step": 44580 + }, + { + "epoch": 1.0892189675811692, + "grad_norm": 0.0778331458568573, + "learning_rate": 1.9165906717487824e-06, + "loss": 0.001, + "num_input_tokens_seen": 30054176, + "step": 44585 + }, + { + "epoch": 1.0893411184130164, + "grad_norm": 14.185705184936523, + "learning_rate": 1.916556572138753e-06, + "loss": 0.1293, + "num_input_tokens_seen": 30057888, + "step": 44590 + }, + { + "epoch": 1.0894632692448636, + "grad_norm": 38.385772705078125, + "learning_rate": 1.916522465863277e-06, + "loss": 0.1436, + "num_input_tokens_seen": 30061088, + "step": 44595 + }, + { + "epoch": 1.0895854200767108, + "grad_norm": 19.040313720703125, + "learning_rate": 1.916488352922604e-06, + "loss": 0.1403, + "num_input_tokens_seen": 30064480, + "step": 44600 + }, + { + "epoch": 1.089707570908558, + "grad_norm": 44.07444763183594, + "learning_rate": 1.9164542333169806e-06, + "loss": 0.1873, + "num_input_tokens_seen": 30067552, + "step": 44605 + }, + { + "epoch": 1.0898297217404052, + "grad_norm": 0.06985550373792648, + "learning_rate": 1.9164201070466556e-06, + "loss": 0.0671, + "num_input_tokens_seen": 30070816, + "step": 44610 + }, + { + "epoch": 1.0899518725722521, + "grad_norm": 0.9508460760116577, + "learning_rate": 1.916385974111877e-06, + "loss": 0.0633, + "num_input_tokens_seen": 30074400, + "step": 44615 + }, + { + "epoch": 1.0900740234040993, + "grad_norm": 0.3236066699028015, + "learning_rate": 1.9163518345128937e-06, + "loss": 0.0559, + "num_input_tokens_seen": 30077728, + "step": 44620 + }, + { + "epoch": 1.0901961742359465, + "grad_norm": 0.3575074076652527, + "learning_rate": 1.9163176882499526e-06, + "loss": 0.0022, + "num_input_tokens_seen": 30081376, + "step": 44625 + }, + { + "epoch": 1.0903183250677937, + "grad_norm": 4.681931972503662, + "learning_rate": 1.9162835353233034e-06, + "loss": 0.0444, + "num_input_tokens_seen": 30084448, + "step": 44630 + }, + { + "epoch": 1.0904404758996409, + "grad_norm": 100.06119537353516, + "learning_rate": 1.9162493757331934e-06, + "loss": 0.1226, + "num_input_tokens_seen": 30088096, + "step": 44635 + }, + { + "epoch": 1.090562626731488, + "grad_norm": 18.056100845336914, + "learning_rate": 1.9162152094798717e-06, + "loss": 0.0867, + "num_input_tokens_seen": 30091616, + "step": 44640 + }, + { + "epoch": 1.0906847775633353, + "grad_norm": 42.22257614135742, + "learning_rate": 1.9161810365635867e-06, + "loss": 0.1927, + "num_input_tokens_seen": 30094816, + "step": 44645 + }, + { + "epoch": 1.0908069283951825, + "grad_norm": 0.09566375613212585, + "learning_rate": 1.9161468569845867e-06, + "loss": 0.1116, + "num_input_tokens_seen": 30097952, + "step": 44650 + }, + { + "epoch": 1.0909290792270294, + "grad_norm": 0.2680375576019287, + "learning_rate": 1.91611267074312e-06, + "loss": 0.0607, + "num_input_tokens_seen": 30101728, + "step": 44655 + }, + { + "epoch": 1.0910512300588766, + "grad_norm": 8.587791442871094, + "learning_rate": 1.9160784778394362e-06, + "loss": 0.1164, + "num_input_tokens_seen": 30105248, + "step": 44660 + }, + { + "epoch": 1.0911733808907238, + "grad_norm": 48.21713638305664, + "learning_rate": 1.916044278273783e-06, + "loss": 0.012, + "num_input_tokens_seen": 30108640, + "step": 44665 + }, + { + "epoch": 1.091295531722571, + "grad_norm": 10.47347354888916, + "learning_rate": 1.916010072046409e-06, + "loss": 0.0913, + "num_input_tokens_seen": 30112160, + "step": 44670 + }, + { + "epoch": 1.0914176825544182, + "grad_norm": 7.425508499145508, + "learning_rate": 1.915975859157564e-06, + "loss": 0.1595, + "num_input_tokens_seen": 30115552, + "step": 44675 + }, + { + "epoch": 1.0915398333862654, + "grad_norm": 22.443424224853516, + "learning_rate": 1.915941639607496e-06, + "loss": 0.0373, + "num_input_tokens_seen": 30118688, + "step": 44680 + }, + { + "epoch": 1.0916619842181126, + "grad_norm": 0.346167653799057, + "learning_rate": 1.915907413396454e-06, + "loss": 0.0661, + "num_input_tokens_seen": 30121888, + "step": 44685 + }, + { + "epoch": 1.0917841350499597, + "grad_norm": 2.4050328731536865, + "learning_rate": 1.915873180524687e-06, + "loss": 0.0781, + "num_input_tokens_seen": 30125280, + "step": 44690 + }, + { + "epoch": 1.091906285881807, + "grad_norm": 0.14322443306446075, + "learning_rate": 1.9158389409924437e-06, + "loss": 0.0918, + "num_input_tokens_seen": 30128992, + "step": 44695 + }, + { + "epoch": 1.0920284367136541, + "grad_norm": 0.1610415279865265, + "learning_rate": 1.9158046947999737e-06, + "loss": 0.0144, + "num_input_tokens_seen": 30132448, + "step": 44700 + }, + { + "epoch": 1.092150587545501, + "grad_norm": 0.2604481279850006, + "learning_rate": 1.9157704419475255e-06, + "loss": 0.0917, + "num_input_tokens_seen": 30135712, + "step": 44705 + }, + { + "epoch": 1.0922727383773483, + "grad_norm": 1.2483242750167847, + "learning_rate": 1.915736182435348e-06, + "loss": 0.0669, + "num_input_tokens_seen": 30139296, + "step": 44710 + }, + { + "epoch": 1.0923948892091955, + "grad_norm": 24.37545394897461, + "learning_rate": 1.9157019162636906e-06, + "loss": 0.0049, + "num_input_tokens_seen": 30142688, + "step": 44715 + }, + { + "epoch": 1.0925170400410427, + "grad_norm": 1.0107009410858154, + "learning_rate": 1.915667643432803e-06, + "loss": 0.1349, + "num_input_tokens_seen": 30146592, + "step": 44720 + }, + { + "epoch": 1.0926391908728899, + "grad_norm": 12.480439186096191, + "learning_rate": 1.915633363942934e-06, + "loss": 0.2515, + "num_input_tokens_seen": 30149984, + "step": 44725 + }, + { + "epoch": 1.092761341704737, + "grad_norm": 2.2978947162628174, + "learning_rate": 1.9155990777943325e-06, + "loss": 0.0047, + "num_input_tokens_seen": 30154144, + "step": 44730 + }, + { + "epoch": 1.0928834925365842, + "grad_norm": 0.31146731972694397, + "learning_rate": 1.9155647849872487e-06, + "loss": 0.1377, + "num_input_tokens_seen": 30157984, + "step": 44735 + }, + { + "epoch": 1.0930056433684314, + "grad_norm": 66.38398742675781, + "learning_rate": 1.9155304855219316e-06, + "loss": 0.2204, + "num_input_tokens_seen": 30161056, + "step": 44740 + }, + { + "epoch": 1.0931277942002784, + "grad_norm": 1.1195906400680542, + "learning_rate": 1.91549617939863e-06, + "loss": 0.0376, + "num_input_tokens_seen": 30164704, + "step": 44745 + }, + { + "epoch": 1.0932499450321256, + "grad_norm": 2.009263753890991, + "learning_rate": 1.9154618666175942e-06, + "loss": 0.0082, + "num_input_tokens_seen": 30168352, + "step": 44750 + }, + { + "epoch": 1.0933720958639728, + "grad_norm": 0.39526981115341187, + "learning_rate": 1.9154275471790733e-06, + "loss": 0.0475, + "num_input_tokens_seen": 30171808, + "step": 44755 + }, + { + "epoch": 1.09349424669582, + "grad_norm": 0.5147704482078552, + "learning_rate": 1.9153932210833173e-06, + "loss": 0.1045, + "num_input_tokens_seen": 30175392, + "step": 44760 + }, + { + "epoch": 1.0936163975276672, + "grad_norm": 2.1972246170043945, + "learning_rate": 1.9153588883305756e-06, + "loss": 0.1195, + "num_input_tokens_seen": 30178720, + "step": 44765 + }, + { + "epoch": 1.0937385483595143, + "grad_norm": 0.09867444634437561, + "learning_rate": 1.9153245489210977e-06, + "loss": 0.0416, + "num_input_tokens_seen": 30181856, + "step": 44770 + }, + { + "epoch": 1.0938606991913615, + "grad_norm": 0.3307472765445709, + "learning_rate": 1.9152902028551335e-06, + "loss": 0.0724, + "num_input_tokens_seen": 30184992, + "step": 44775 + }, + { + "epoch": 1.0939828500232087, + "grad_norm": 51.22042465209961, + "learning_rate": 1.915255850132933e-06, + "loss": 0.0851, + "num_input_tokens_seen": 30188512, + "step": 44780 + }, + { + "epoch": 1.094105000855056, + "grad_norm": 41.89115524291992, + "learning_rate": 1.915221490754746e-06, + "loss": 0.059, + "num_input_tokens_seen": 30191584, + "step": 44785 + }, + { + "epoch": 1.094227151686903, + "grad_norm": 7.868800640106201, + "learning_rate": 1.9151871247208214e-06, + "loss": 0.1301, + "num_input_tokens_seen": 30194592, + "step": 44790 + }, + { + "epoch": 1.09434930251875, + "grad_norm": 9.530349731445312, + "learning_rate": 1.9151527520314105e-06, + "loss": 0.2542, + "num_input_tokens_seen": 30198560, + "step": 44795 + }, + { + "epoch": 1.0944714533505973, + "grad_norm": 1.1136250495910645, + "learning_rate": 1.9151183726867623e-06, + "loss": 0.0945, + "num_input_tokens_seen": 30201824, + "step": 44800 + }, + { + "epoch": 1.0945936041824444, + "grad_norm": 10.920148849487305, + "learning_rate": 1.9150839866871273e-06, + "loss": 0.0295, + "num_input_tokens_seen": 30205408, + "step": 44805 + }, + { + "epoch": 1.0947157550142916, + "grad_norm": 2.137712240219116, + "learning_rate": 1.9150495940327556e-06, + "loss": 0.0894, + "num_input_tokens_seen": 30208736, + "step": 44810 + }, + { + "epoch": 1.0948379058461388, + "grad_norm": 41.97331619262695, + "learning_rate": 1.915015194723897e-06, + "loss": 0.1374, + "num_input_tokens_seen": 30211872, + "step": 44815 + }, + { + "epoch": 1.094960056677986, + "grad_norm": 17.44566535949707, + "learning_rate": 1.9149807887608012e-06, + "loss": 0.0371, + "num_input_tokens_seen": 30215200, + "step": 44820 + }, + { + "epoch": 1.0950822075098332, + "grad_norm": 0.21711544692516327, + "learning_rate": 1.9149463761437196e-06, + "loss": 0.1393, + "num_input_tokens_seen": 30218272, + "step": 44825 + }, + { + "epoch": 1.0952043583416804, + "grad_norm": 0.7705076336860657, + "learning_rate": 1.914911956872902e-06, + "loss": 0.0718, + "num_input_tokens_seen": 30221600, + "step": 44830 + }, + { + "epoch": 1.0953265091735274, + "grad_norm": 0.15149593353271484, + "learning_rate": 1.9148775309485982e-06, + "loss": 0.0227, + "num_input_tokens_seen": 30225120, + "step": 44835 + }, + { + "epoch": 1.0954486600053746, + "grad_norm": 0.34287703037261963, + "learning_rate": 1.914843098371059e-06, + "loss": 0.0023, + "num_input_tokens_seen": 30228576, + "step": 44840 + }, + { + "epoch": 1.0955708108372217, + "grad_norm": 0.2694163918495178, + "learning_rate": 1.914808659140535e-06, + "loss": 0.0347, + "num_input_tokens_seen": 30232224, + "step": 44845 + }, + { + "epoch": 1.095692961669069, + "grad_norm": 25.289270401000977, + "learning_rate": 1.9147742132572763e-06, + "loss": 0.0849, + "num_input_tokens_seen": 30235296, + "step": 44850 + }, + { + "epoch": 1.0958151125009161, + "grad_norm": 38.71376037597656, + "learning_rate": 1.914739760721533e-06, + "loss": 0.0941, + "num_input_tokens_seen": 30238432, + "step": 44855 + }, + { + "epoch": 1.0959372633327633, + "grad_norm": 19.535490036010742, + "learning_rate": 1.9147053015335568e-06, + "loss": 0.031, + "num_input_tokens_seen": 30242592, + "step": 44860 + }, + { + "epoch": 1.0960594141646105, + "grad_norm": 29.093135833740234, + "learning_rate": 1.9146708356935974e-06, + "loss": 0.1462, + "num_input_tokens_seen": 30245728, + "step": 44865 + }, + { + "epoch": 1.0961815649964577, + "grad_norm": 0.1613461971282959, + "learning_rate": 1.9146363632019053e-06, + "loss": 0.1299, + "num_input_tokens_seen": 30249376, + "step": 44870 + }, + { + "epoch": 1.0963037158283049, + "grad_norm": 0.3770635426044464, + "learning_rate": 1.914601884058732e-06, + "loss": 0.1636, + "num_input_tokens_seen": 30252640, + "step": 44875 + }, + { + "epoch": 1.096425866660152, + "grad_norm": 0.2593759298324585, + "learning_rate": 1.9145673982643276e-06, + "loss": 0.003, + "num_input_tokens_seen": 30256288, + "step": 44880 + }, + { + "epoch": 1.096548017491999, + "grad_norm": 0.16447781026363373, + "learning_rate": 1.914532905818943e-06, + "loss": 0.0457, + "num_input_tokens_seen": 30259296, + "step": 44885 + }, + { + "epoch": 1.0966701683238462, + "grad_norm": 0.06459282338619232, + "learning_rate": 1.914498406722829e-06, + "loss": 0.1412, + "num_input_tokens_seen": 30262624, + "step": 44890 + }, + { + "epoch": 1.0967923191556934, + "grad_norm": 35.29350280761719, + "learning_rate": 1.914463900976237e-06, + "loss": 0.1249, + "num_input_tokens_seen": 30265760, + "step": 44895 + }, + { + "epoch": 1.0969144699875406, + "grad_norm": 52.1586799621582, + "learning_rate": 1.9144293885794177e-06, + "loss": 0.164, + "num_input_tokens_seen": 30268960, + "step": 44900 + }, + { + "epoch": 1.0970366208193878, + "grad_norm": 0.08863640576601028, + "learning_rate": 1.9143948695326217e-06, + "loss": 0.0011, + "num_input_tokens_seen": 30272480, + "step": 44905 + }, + { + "epoch": 1.097158771651235, + "grad_norm": 1.0056798458099365, + "learning_rate": 1.9143603438361e-06, + "loss": 0.0457, + "num_input_tokens_seen": 30275488, + "step": 44910 + }, + { + "epoch": 1.0972809224830822, + "grad_norm": 0.2724807560443878, + "learning_rate": 1.914325811490104e-06, + "loss": 0.0966, + "num_input_tokens_seen": 30278816, + "step": 44915 + }, + { + "epoch": 1.0974030733149294, + "grad_norm": 13.868060111999512, + "learning_rate": 1.914291272494885e-06, + "loss": 0.1837, + "num_input_tokens_seen": 30282016, + "step": 44920 + }, + { + "epoch": 1.0975252241467763, + "grad_norm": 0.04919269308447838, + "learning_rate": 1.914256726850694e-06, + "loss": 0.0419, + "num_input_tokens_seen": 30285344, + "step": 44925 + }, + { + "epoch": 1.0976473749786235, + "grad_norm": 0.14486579596996307, + "learning_rate": 1.914222174557782e-06, + "loss": 0.1165, + "num_input_tokens_seen": 30288608, + "step": 44930 + }, + { + "epoch": 1.0977695258104707, + "grad_norm": 16.598464965820312, + "learning_rate": 1.9141876156164006e-06, + "loss": 0.1182, + "num_input_tokens_seen": 30292000, + "step": 44935 + }, + { + "epoch": 1.097891676642318, + "grad_norm": 0.25447985529899597, + "learning_rate": 1.914153050026801e-06, + "loss": 0.0429, + "num_input_tokens_seen": 30295712, + "step": 44940 + }, + { + "epoch": 1.098013827474165, + "grad_norm": 1.241197109222412, + "learning_rate": 1.914118477789234e-06, + "loss": 0.085, + "num_input_tokens_seen": 30299296, + "step": 44945 + }, + { + "epoch": 1.0981359783060123, + "grad_norm": 15.943071365356445, + "learning_rate": 1.914083898903952e-06, + "loss": 0.0929, + "num_input_tokens_seen": 30303136, + "step": 44950 + }, + { + "epoch": 1.0982581291378595, + "grad_norm": 0.248790442943573, + "learning_rate": 1.914049313371206e-06, + "loss": 0.0196, + "num_input_tokens_seen": 30306464, + "step": 44955 + }, + { + "epoch": 1.0983802799697067, + "grad_norm": 0.08668647706508636, + "learning_rate": 1.914014721191248e-06, + "loss": 0.0349, + "num_input_tokens_seen": 30309728, + "step": 44960 + }, + { + "epoch": 1.0985024308015539, + "grad_norm": 10.536722183227539, + "learning_rate": 1.9139801223643283e-06, + "loss": 0.1206, + "num_input_tokens_seen": 30313056, + "step": 44965 + }, + { + "epoch": 1.0986245816334008, + "grad_norm": 0.3854491114616394, + "learning_rate": 1.9139455168907e-06, + "loss": 0.0054, + "num_input_tokens_seen": 30316384, + "step": 44970 + }, + { + "epoch": 1.098746732465248, + "grad_norm": 101.83487701416016, + "learning_rate": 1.9139109047706134e-06, + "loss": 0.1733, + "num_input_tokens_seen": 30319776, + "step": 44975 + }, + { + "epoch": 1.0988688832970952, + "grad_norm": 0.9956525564193726, + "learning_rate": 1.9138762860043213e-06, + "loss": 0.1076, + "num_input_tokens_seen": 30322912, + "step": 44980 + }, + { + "epoch": 1.0989910341289424, + "grad_norm": 19.65517234802246, + "learning_rate": 1.913841660592075e-06, + "loss": 0.0892, + "num_input_tokens_seen": 30325984, + "step": 44985 + }, + { + "epoch": 1.0991131849607896, + "grad_norm": 9.605228424072266, + "learning_rate": 1.913807028534126e-06, + "loss": 0.2093, + "num_input_tokens_seen": 30329120, + "step": 44990 + }, + { + "epoch": 1.0992353357926368, + "grad_norm": 0.535914957523346, + "learning_rate": 1.9137723898307275e-06, + "loss": 0.0831, + "num_input_tokens_seen": 30332256, + "step": 44995 + }, + { + "epoch": 1.099357486624484, + "grad_norm": 8.649102210998535, + "learning_rate": 1.9137377444821296e-06, + "loss": 0.0393, + "num_input_tokens_seen": 30335200, + "step": 45000 + }, + { + "epoch": 1.0994796374563311, + "grad_norm": 0.646960437297821, + "learning_rate": 1.913703092488585e-06, + "loss": 0.0218, + "num_input_tokens_seen": 30338656, + "step": 45005 + }, + { + "epoch": 1.0996017882881783, + "grad_norm": 0.20934827625751495, + "learning_rate": 1.9136684338503463e-06, + "loss": 0.0289, + "num_input_tokens_seen": 30341920, + "step": 45010 + }, + { + "epoch": 1.0997239391200253, + "grad_norm": 0.4389401972293854, + "learning_rate": 1.9136337685676644e-06, + "loss": 0.0406, + "num_input_tokens_seen": 30345248, + "step": 45015 + }, + { + "epoch": 1.0998460899518725, + "grad_norm": 0.13102486729621887, + "learning_rate": 1.9135990966407926e-06, + "loss": 0.0512, + "num_input_tokens_seen": 30350624, + "step": 45020 + }, + { + "epoch": 1.0999682407837197, + "grad_norm": 27.58184814453125, + "learning_rate": 1.913564418069982e-06, + "loss": 0.0966, + "num_input_tokens_seen": 30354336, + "step": 45025 + }, + { + "epoch": 1.1000903916155669, + "grad_norm": 0.514782726764679, + "learning_rate": 1.9135297328554853e-06, + "loss": 0.0896, + "num_input_tokens_seen": 30357344, + "step": 45030 + }, + { + "epoch": 1.100212542447414, + "grad_norm": 16.277013778686523, + "learning_rate": 1.9134950409975547e-06, + "loss": 0.0729, + "num_input_tokens_seen": 30360608, + "step": 45035 + }, + { + "epoch": 1.1003346932792613, + "grad_norm": 0.12448076903820038, + "learning_rate": 1.9134603424964425e-06, + "loss": 0.0882, + "num_input_tokens_seen": 30364000, + "step": 45040 + }, + { + "epoch": 1.1004568441111084, + "grad_norm": 15.496138572692871, + "learning_rate": 1.9134256373524008e-06, + "loss": 0.2351, + "num_input_tokens_seen": 30367648, + "step": 45045 + }, + { + "epoch": 1.1005789949429556, + "grad_norm": 0.2786150872707367, + "learning_rate": 1.9133909255656822e-06, + "loss": 0.03, + "num_input_tokens_seen": 30371040, + "step": 45050 + }, + { + "epoch": 1.1007011457748028, + "grad_norm": 0.04457584023475647, + "learning_rate": 1.91335620713654e-06, + "loss": 0.0026, + "num_input_tokens_seen": 30375072, + "step": 45055 + }, + { + "epoch": 1.1008232966066498, + "grad_norm": 12.544975280761719, + "learning_rate": 1.9133214820652247e-06, + "loss": 0.1178, + "num_input_tokens_seen": 30378400, + "step": 45060 + }, + { + "epoch": 1.100945447438497, + "grad_norm": 0.055558640509843826, + "learning_rate": 1.91328675035199e-06, + "loss": 0.101, + "num_input_tokens_seen": 30382496, + "step": 45065 + }, + { + "epoch": 1.1010675982703442, + "grad_norm": 28.11234474182129, + "learning_rate": 1.913252011997089e-06, + "loss": 0.1579, + "num_input_tokens_seen": 30385952, + "step": 45070 + }, + { + "epoch": 1.1011897491021914, + "grad_norm": 0.06535232812166214, + "learning_rate": 1.913217267000773e-06, + "loss": 0.0704, + "num_input_tokens_seen": 30389024, + "step": 45075 + }, + { + "epoch": 1.1013118999340386, + "grad_norm": 51.46257400512695, + "learning_rate": 1.913182515363296e-06, + "loss": 0.0606, + "num_input_tokens_seen": 30392288, + "step": 45080 + }, + { + "epoch": 1.1014340507658857, + "grad_norm": 0.3011631965637207, + "learning_rate": 1.9131477570849103e-06, + "loss": 0.1839, + "num_input_tokens_seen": 30395616, + "step": 45085 + }, + { + "epoch": 1.101556201597733, + "grad_norm": 15.269543647766113, + "learning_rate": 1.913112992165868e-06, + "loss": 0.0551, + "num_input_tokens_seen": 30398944, + "step": 45090 + }, + { + "epoch": 1.1016783524295801, + "grad_norm": 0.13697107136249542, + "learning_rate": 1.9130782206064228e-06, + "loss": 0.1362, + "num_input_tokens_seen": 30402464, + "step": 45095 + }, + { + "epoch": 1.1018005032614273, + "grad_norm": 54.185874938964844, + "learning_rate": 1.9130434424068265e-06, + "loss": 0.0087, + "num_input_tokens_seen": 30405984, + "step": 45100 + }, + { + "epoch": 1.1019226540932743, + "grad_norm": 9.86892318725586, + "learning_rate": 1.9130086575673335e-06, + "loss": 0.1299, + "num_input_tokens_seen": 30409568, + "step": 45105 + }, + { + "epoch": 1.1020448049251215, + "grad_norm": 5.100939750671387, + "learning_rate": 1.9129738660881956e-06, + "loss": 0.0292, + "num_input_tokens_seen": 30413152, + "step": 45110 + }, + { + "epoch": 1.1021669557569687, + "grad_norm": 1.465461254119873, + "learning_rate": 1.9129390679696663e-06, + "loss": 0.0517, + "num_input_tokens_seen": 30416800, + "step": 45115 + }, + { + "epoch": 1.1022891065888158, + "grad_norm": 0.06135892868041992, + "learning_rate": 1.9129042632119986e-06, + "loss": 0.0245, + "num_input_tokens_seen": 30420384, + "step": 45120 + }, + { + "epoch": 1.102411257420663, + "grad_norm": 0.11938874423503876, + "learning_rate": 1.9128694518154456e-06, + "loss": 0.0089, + "num_input_tokens_seen": 30423776, + "step": 45125 + }, + { + "epoch": 1.1025334082525102, + "grad_norm": 4.974461555480957, + "learning_rate": 1.91283463378026e-06, + "loss": 0.084, + "num_input_tokens_seen": 30427168, + "step": 45130 + }, + { + "epoch": 1.1026555590843574, + "grad_norm": 0.2367285192012787, + "learning_rate": 1.912799809106696e-06, + "loss": 0.0005, + "num_input_tokens_seen": 30430048, + "step": 45135 + }, + { + "epoch": 1.1027777099162046, + "grad_norm": 135.47918701171875, + "learning_rate": 1.912764977795006e-06, + "loss": 0.0656, + "num_input_tokens_seen": 30433056, + "step": 45140 + }, + { + "epoch": 1.1028998607480518, + "grad_norm": 36.52012634277344, + "learning_rate": 1.9127301398454436e-06, + "loss": 0.1529, + "num_input_tokens_seen": 30436128, + "step": 45145 + }, + { + "epoch": 1.1030220115798988, + "grad_norm": 0.22205692529678345, + "learning_rate": 1.912695295258262e-06, + "loss": 0.1235, + "num_input_tokens_seen": 30439328, + "step": 45150 + }, + { + "epoch": 1.103144162411746, + "grad_norm": 0.08349934220314026, + "learning_rate": 1.9126604440337145e-06, + "loss": 0.0019, + "num_input_tokens_seen": 30443040, + "step": 45155 + }, + { + "epoch": 1.1032663132435931, + "grad_norm": 58.70408630371094, + "learning_rate": 1.9126255861720552e-06, + "loss": 0.1519, + "num_input_tokens_seen": 30445920, + "step": 45160 + }, + { + "epoch": 1.1033884640754403, + "grad_norm": 0.7588253617286682, + "learning_rate": 1.912590721673537e-06, + "loss": 0.0032, + "num_input_tokens_seen": 30449120, + "step": 45165 + }, + { + "epoch": 1.1035106149072875, + "grad_norm": 14.000785827636719, + "learning_rate": 1.912555850538414e-06, + "loss": 0.0689, + "num_input_tokens_seen": 30452512, + "step": 45170 + }, + { + "epoch": 1.1036327657391347, + "grad_norm": 0.2592606544494629, + "learning_rate": 1.9125209727669385e-06, + "loss": 0.0416, + "num_input_tokens_seen": 30456224, + "step": 45175 + }, + { + "epoch": 1.103754916570982, + "grad_norm": 66.11405944824219, + "learning_rate": 1.912486088359366e-06, + "loss": 0.0878, + "num_input_tokens_seen": 30459424, + "step": 45180 + }, + { + "epoch": 1.103877067402829, + "grad_norm": 6.067470073699951, + "learning_rate": 1.9124511973159486e-06, + "loss": 0.0887, + "num_input_tokens_seen": 30462688, + "step": 45185 + }, + { + "epoch": 1.103999218234676, + "grad_norm": 0.31076791882514954, + "learning_rate": 1.912416299636941e-06, + "loss": 0.0715, + "num_input_tokens_seen": 30466272, + "step": 45190 + }, + { + "epoch": 1.1041213690665233, + "grad_norm": 0.39867663383483887, + "learning_rate": 1.912381395322597e-06, + "loss": 0.1073, + "num_input_tokens_seen": 30469536, + "step": 45195 + }, + { + "epoch": 1.1042435198983704, + "grad_norm": 0.05573923513293266, + "learning_rate": 1.912346484373169e-06, + "loss": 0.0988, + "num_input_tokens_seen": 30472608, + "step": 45200 + }, + { + "epoch": 1.1043656707302176, + "grad_norm": 14.342602729797363, + "learning_rate": 1.912311566788913e-06, + "loss": 0.0773, + "num_input_tokens_seen": 30475808, + "step": 45205 + }, + { + "epoch": 1.1044878215620648, + "grad_norm": 58.24125289916992, + "learning_rate": 1.9122766425700816e-06, + "loss": 0.0827, + "num_input_tokens_seen": 30479200, + "step": 45210 + }, + { + "epoch": 1.104609972393912, + "grad_norm": 0.0803033709526062, + "learning_rate": 1.912241711716929e-06, + "loss": 0.1808, + "num_input_tokens_seen": 30482912, + "step": 45215 + }, + { + "epoch": 1.1047321232257592, + "grad_norm": 0.3461464047431946, + "learning_rate": 1.9122067742297093e-06, + "loss": 0.0685, + "num_input_tokens_seen": 30486560, + "step": 45220 + }, + { + "epoch": 1.1048542740576064, + "grad_norm": 1.3804208040237427, + "learning_rate": 1.9121718301086766e-06, + "loss": 0.1057, + "num_input_tokens_seen": 30490016, + "step": 45225 + }, + { + "epoch": 1.1049764248894536, + "grad_norm": 32.63319778442383, + "learning_rate": 1.912136879354085e-06, + "loss": 0.1027, + "num_input_tokens_seen": 30493408, + "step": 45230 + }, + { + "epoch": 1.1050985757213008, + "grad_norm": 25.16427993774414, + "learning_rate": 1.912101921966189e-06, + "loss": 0.1223, + "num_input_tokens_seen": 30497312, + "step": 45235 + }, + { + "epoch": 1.1052207265531477, + "grad_norm": 6.807790756225586, + "learning_rate": 1.912066957945242e-06, + "loss": 0.0408, + "num_input_tokens_seen": 30500704, + "step": 45240 + }, + { + "epoch": 1.105342877384995, + "grad_norm": 47.124122619628906, + "learning_rate": 1.912031987291499e-06, + "loss": 0.1212, + "num_input_tokens_seen": 30503904, + "step": 45245 + }, + { + "epoch": 1.1054650282168421, + "grad_norm": 16.168676376342773, + "learning_rate": 1.911997010005214e-06, + "loss": 0.1472, + "num_input_tokens_seen": 30507296, + "step": 45250 + }, + { + "epoch": 1.1055871790486893, + "grad_norm": 10.315686225891113, + "learning_rate": 1.9119620260866415e-06, + "loss": 0.0507, + "num_input_tokens_seen": 30511264, + "step": 45255 + }, + { + "epoch": 1.1057093298805365, + "grad_norm": 15.87485408782959, + "learning_rate": 1.911927035536036e-06, + "loss": 0.132, + "num_input_tokens_seen": 30514272, + "step": 45260 + }, + { + "epoch": 1.1058314807123837, + "grad_norm": 0.41578197479248047, + "learning_rate": 1.9118920383536515e-06, + "loss": 0.0812, + "num_input_tokens_seen": 30517472, + "step": 45265 + }, + { + "epoch": 1.1059536315442309, + "grad_norm": 27.88473129272461, + "learning_rate": 1.911857034539743e-06, + "loss": 0.056, + "num_input_tokens_seen": 30520800, + "step": 45270 + }, + { + "epoch": 1.106075782376078, + "grad_norm": 14.108925819396973, + "learning_rate": 1.911822024094565e-06, + "loss": 0.0698, + "num_input_tokens_seen": 30524832, + "step": 45275 + }, + { + "epoch": 1.106197933207925, + "grad_norm": 44.32224655151367, + "learning_rate": 1.9117870070183718e-06, + "loss": 0.0387, + "num_input_tokens_seen": 30528160, + "step": 45280 + }, + { + "epoch": 1.1063200840397722, + "grad_norm": 5.657730579376221, + "learning_rate": 1.9117519833114185e-06, + "loss": 0.0378, + "num_input_tokens_seen": 30531040, + "step": 45285 + }, + { + "epoch": 1.1064422348716194, + "grad_norm": 83.99029541015625, + "learning_rate": 1.9117169529739595e-06, + "loss": 0.1398, + "num_input_tokens_seen": 30534304, + "step": 45290 + }, + { + "epoch": 1.1065643857034666, + "grad_norm": 0.23789186775684357, + "learning_rate": 1.9116819160062493e-06, + "loss": 0.0015, + "num_input_tokens_seen": 30537440, + "step": 45295 + }, + { + "epoch": 1.1066865365353138, + "grad_norm": 8.420495986938477, + "learning_rate": 1.9116468724085433e-06, + "loss": 0.0918, + "num_input_tokens_seen": 30541536, + "step": 45300 + }, + { + "epoch": 1.106808687367161, + "grad_norm": 107.93529510498047, + "learning_rate": 1.9116118221810956e-06, + "loss": 0.1732, + "num_input_tokens_seen": 30544992, + "step": 45305 + }, + { + "epoch": 1.1069308381990082, + "grad_norm": 0.3019404411315918, + "learning_rate": 1.911576765324162e-06, + "loss": 0.2395, + "num_input_tokens_seen": 30548128, + "step": 45310 + }, + { + "epoch": 1.1070529890308554, + "grad_norm": 1.3898652791976929, + "learning_rate": 1.911541701837997e-06, + "loss": 0.1195, + "num_input_tokens_seen": 30551008, + "step": 45315 + }, + { + "epoch": 1.1071751398627026, + "grad_norm": 2.8064727783203125, + "learning_rate": 1.9115066317228552e-06, + "loss": 0.0957, + "num_input_tokens_seen": 30554592, + "step": 45320 + }, + { + "epoch": 1.1072972906945497, + "grad_norm": 18.04161834716797, + "learning_rate": 1.911471554978992e-06, + "loss": 0.1277, + "num_input_tokens_seen": 30558304, + "step": 45325 + }, + { + "epoch": 1.1074194415263967, + "grad_norm": 0.3619232773780823, + "learning_rate": 1.911436471606663e-06, + "loss": 0.0031, + "num_input_tokens_seen": 30562080, + "step": 45330 + }, + { + "epoch": 1.107541592358244, + "grad_norm": 27.725862503051758, + "learning_rate": 1.9114013816061222e-06, + "loss": 0.1244, + "num_input_tokens_seen": 30565344, + "step": 45335 + }, + { + "epoch": 1.107663743190091, + "grad_norm": 48.7161979675293, + "learning_rate": 1.911366284977626e-06, + "loss": 0.0315, + "num_input_tokens_seen": 30569184, + "step": 45340 + }, + { + "epoch": 1.1077858940219383, + "grad_norm": 0.17331643402576447, + "learning_rate": 1.9113311817214287e-06, + "loss": 0.0021, + "num_input_tokens_seen": 30572256, + "step": 45345 + }, + { + "epoch": 1.1079080448537855, + "grad_norm": 10.249677658081055, + "learning_rate": 1.911296071837786e-06, + "loss": 0.1518, + "num_input_tokens_seen": 30575712, + "step": 45350 + }, + { + "epoch": 1.1080301956856327, + "grad_norm": 0.9612787961959839, + "learning_rate": 1.911260955326953e-06, + "loss": 0.0362, + "num_input_tokens_seen": 30579232, + "step": 45355 + }, + { + "epoch": 1.1081523465174798, + "grad_norm": 28.470977783203125, + "learning_rate": 1.9112258321891858e-06, + "loss": 0.1136, + "num_input_tokens_seen": 30583072, + "step": 45360 + }, + { + "epoch": 1.108274497349327, + "grad_norm": 0.3659687042236328, + "learning_rate": 1.9111907024247387e-06, + "loss": 0.0033, + "num_input_tokens_seen": 30586336, + "step": 45365 + }, + { + "epoch": 1.108396648181174, + "grad_norm": 0.03629086911678314, + "learning_rate": 1.9111555660338677e-06, + "loss": 0.0824, + "num_input_tokens_seen": 30589600, + "step": 45370 + }, + { + "epoch": 1.1085187990130212, + "grad_norm": 5.431779384613037, + "learning_rate": 1.9111204230168287e-06, + "loss": 0.1192, + "num_input_tokens_seen": 30593248, + "step": 45375 + }, + { + "epoch": 1.1086409498448684, + "grad_norm": 0.3708115816116333, + "learning_rate": 1.9110852733738766e-06, + "loss": 0.0516, + "num_input_tokens_seen": 30596384, + "step": 45380 + }, + { + "epoch": 1.1087631006767156, + "grad_norm": 4.54123592376709, + "learning_rate": 1.9110501171052676e-06, + "loss": 0.0061, + "num_input_tokens_seen": 30599840, + "step": 45385 + }, + { + "epoch": 1.1088852515085628, + "grad_norm": 11.170312881469727, + "learning_rate": 1.911014954211257e-06, + "loss": 0.0958, + "num_input_tokens_seen": 30603104, + "step": 45390 + }, + { + "epoch": 1.10900740234041, + "grad_norm": 0.07969980686903, + "learning_rate": 1.910979784692101e-06, + "loss": 0.0008, + "num_input_tokens_seen": 30606624, + "step": 45395 + }, + { + "epoch": 1.1091295531722571, + "grad_norm": 22.515426635742188, + "learning_rate": 1.9109446085480543e-06, + "loss": 0.1607, + "num_input_tokens_seen": 30609952, + "step": 45400 + }, + { + "epoch": 1.1092517040041043, + "grad_norm": 38.219364166259766, + "learning_rate": 1.9109094257793736e-06, + "loss": 0.0909, + "num_input_tokens_seen": 30613024, + "step": 45405 + }, + { + "epoch": 1.1093738548359515, + "grad_norm": 0.10983511060476303, + "learning_rate": 1.9108742363863147e-06, + "loss": 0.0454, + "num_input_tokens_seen": 30616736, + "step": 45410 + }, + { + "epoch": 1.1094960056677987, + "grad_norm": 24.83835220336914, + "learning_rate": 1.9108390403691333e-06, + "loss": 0.0845, + "num_input_tokens_seen": 30619808, + "step": 45415 + }, + { + "epoch": 1.1096181564996457, + "grad_norm": 51.3784065246582, + "learning_rate": 1.9108038377280856e-06, + "loss": 0.0218, + "num_input_tokens_seen": 30623200, + "step": 45420 + }, + { + "epoch": 1.1097403073314929, + "grad_norm": 34.15353775024414, + "learning_rate": 1.910768628463427e-06, + "loss": 0.1623, + "num_input_tokens_seen": 30626400, + "step": 45425 + }, + { + "epoch": 1.10986245816334, + "grad_norm": 0.046371277421712875, + "learning_rate": 1.9107334125754143e-06, + "loss": 0.0092, + "num_input_tokens_seen": 30629920, + "step": 45430 + }, + { + "epoch": 1.1099846089951872, + "grad_norm": 59.40081787109375, + "learning_rate": 1.910698190064303e-06, + "loss": 0.2519, + "num_input_tokens_seen": 30632928, + "step": 45435 + }, + { + "epoch": 1.1101067598270344, + "grad_norm": 37.21415328979492, + "learning_rate": 1.91066296093035e-06, + "loss": 0.3251, + "num_input_tokens_seen": 30636576, + "step": 45440 + }, + { + "epoch": 1.1102289106588816, + "grad_norm": 20.704212188720703, + "learning_rate": 1.9106277251738104e-06, + "loss": 0.1401, + "num_input_tokens_seen": 30639712, + "step": 45445 + }, + { + "epoch": 1.1103510614907288, + "grad_norm": 3.910080671310425, + "learning_rate": 1.9105924827949417e-06, + "loss": 0.0518, + "num_input_tokens_seen": 30643104, + "step": 45450 + }, + { + "epoch": 1.110473212322576, + "grad_norm": 58.14051055908203, + "learning_rate": 1.910557233793999e-06, + "loss": 0.134, + "num_input_tokens_seen": 30646304, + "step": 45455 + }, + { + "epoch": 1.110595363154423, + "grad_norm": 0.6852989196777344, + "learning_rate": 1.9105219781712396e-06, + "loss": 0.002, + "num_input_tokens_seen": 30649376, + "step": 45460 + }, + { + "epoch": 1.1107175139862702, + "grad_norm": 0.48834651708602905, + "learning_rate": 1.910486715926919e-06, + "loss": 0.077, + "num_input_tokens_seen": 30652512, + "step": 45465 + }, + { + "epoch": 1.1108396648181174, + "grad_norm": 0.4228566288948059, + "learning_rate": 1.9104514470612946e-06, + "loss": 0.1008, + "num_input_tokens_seen": 30656096, + "step": 45470 + }, + { + "epoch": 1.1109618156499645, + "grad_norm": 1.1528277397155762, + "learning_rate": 1.910416171574622e-06, + "loss": 0.0885, + "num_input_tokens_seen": 30659744, + "step": 45475 + }, + { + "epoch": 1.1110839664818117, + "grad_norm": 20.612672805786133, + "learning_rate": 1.9103808894671586e-06, + "loss": 0.062, + "num_input_tokens_seen": 30663008, + "step": 45480 + }, + { + "epoch": 1.111206117313659, + "grad_norm": 21.08039665222168, + "learning_rate": 1.91034560073916e-06, + "loss": 0.039, + "num_input_tokens_seen": 30666528, + "step": 45485 + }, + { + "epoch": 1.1113282681455061, + "grad_norm": 58.673824310302734, + "learning_rate": 1.9103103053908834e-06, + "loss": 0.0721, + "num_input_tokens_seen": 30669920, + "step": 45490 + }, + { + "epoch": 1.1114504189773533, + "grad_norm": 0.6854089498519897, + "learning_rate": 1.910275003422586e-06, + "loss": 0.0012, + "num_input_tokens_seen": 30673184, + "step": 45495 + }, + { + "epoch": 1.1115725698092005, + "grad_norm": 71.15158081054688, + "learning_rate": 1.910239694834523e-06, + "loss": 0.0413, + "num_input_tokens_seen": 30676384, + "step": 45500 + }, + { + "epoch": 1.1116947206410475, + "grad_norm": 0.11159415543079376, + "learning_rate": 1.910204379626953e-06, + "loss": 0.0017, + "num_input_tokens_seen": 30679840, + "step": 45505 + }, + { + "epoch": 1.1118168714728947, + "grad_norm": 0.036455605179071426, + "learning_rate": 1.9101690578001313e-06, + "loss": 0.0628, + "num_input_tokens_seen": 30683232, + "step": 45510 + }, + { + "epoch": 1.1119390223047418, + "grad_norm": 0.4710502028465271, + "learning_rate": 1.9101337293543156e-06, + "loss": 0.0996, + "num_input_tokens_seen": 30686560, + "step": 45515 + }, + { + "epoch": 1.112061173136589, + "grad_norm": 30.097923278808594, + "learning_rate": 1.910098394289763e-06, + "loss": 0.0881, + "num_input_tokens_seen": 30689632, + "step": 45520 + }, + { + "epoch": 1.1121833239684362, + "grad_norm": 93.26819610595703, + "learning_rate": 1.9100630526067292e-06, + "loss": 0.0683, + "num_input_tokens_seen": 30693024, + "step": 45525 + }, + { + "epoch": 1.1123054748002834, + "grad_norm": 67.23088836669922, + "learning_rate": 1.9100277043054727e-06, + "loss": 0.0671, + "num_input_tokens_seen": 30696160, + "step": 45530 + }, + { + "epoch": 1.1124276256321306, + "grad_norm": 97.0174560546875, + "learning_rate": 1.90999234938625e-06, + "loss": 0.0694, + "num_input_tokens_seen": 30699040, + "step": 45535 + }, + { + "epoch": 1.1125497764639778, + "grad_norm": 0.051360975950956345, + "learning_rate": 1.909956987849318e-06, + "loss": 0.0007, + "num_input_tokens_seen": 30702304, + "step": 45540 + }, + { + "epoch": 1.112671927295825, + "grad_norm": 73.1390151977539, + "learning_rate": 1.909921619694934e-06, + "loss": 0.0649, + "num_input_tokens_seen": 30705632, + "step": 45545 + }, + { + "epoch": 1.112794078127672, + "grad_norm": 0.31845179200172424, + "learning_rate": 1.909886244923356e-06, + "loss": 0.0619, + "num_input_tokens_seen": 30709088, + "step": 45550 + }, + { + "epoch": 1.1129162289595191, + "grad_norm": 10.11508846282959, + "learning_rate": 1.9098508635348398e-06, + "loss": 0.1361, + "num_input_tokens_seen": 30712480, + "step": 45555 + }, + { + "epoch": 1.1130383797913663, + "grad_norm": 0.17602814733982086, + "learning_rate": 1.909815475529643e-06, + "loss": 0.0475, + "num_input_tokens_seen": 30715680, + "step": 45560 + }, + { + "epoch": 1.1131605306232135, + "grad_norm": 1.3473693132400513, + "learning_rate": 1.909780080908024e-06, + "loss": 0.0958, + "num_input_tokens_seen": 30718880, + "step": 45565 + }, + { + "epoch": 1.1132826814550607, + "grad_norm": 73.33531188964844, + "learning_rate": 1.9097446796702395e-06, + "loss": 0.0781, + "num_input_tokens_seen": 30722144, + "step": 45570 + }, + { + "epoch": 1.113404832286908, + "grad_norm": 12.397170066833496, + "learning_rate": 1.909709271816547e-06, + "loss": 0.1832, + "num_input_tokens_seen": 30725280, + "step": 45575 + }, + { + "epoch": 1.113526983118755, + "grad_norm": 0.47446826100349426, + "learning_rate": 1.9096738573472035e-06, + "loss": 0.0437, + "num_input_tokens_seen": 30728928, + "step": 45580 + }, + { + "epoch": 1.1136491339506023, + "grad_norm": 0.17872124910354614, + "learning_rate": 1.9096384362624675e-06, + "loss": 0.2124, + "num_input_tokens_seen": 30732192, + "step": 45585 + }, + { + "epoch": 1.1137712847824495, + "grad_norm": 51.15095138549805, + "learning_rate": 1.909603008562596e-06, + "loss": 0.1358, + "num_input_tokens_seen": 30735264, + "step": 45590 + }, + { + "epoch": 1.1138934356142964, + "grad_norm": 63.00979232788086, + "learning_rate": 1.909567574247847e-06, + "loss": 0.0688, + "num_input_tokens_seen": 30738720, + "step": 45595 + }, + { + "epoch": 1.1140155864461436, + "grad_norm": 0.1376773715019226, + "learning_rate": 1.9095321333184777e-06, + "loss": 0.0368, + "num_input_tokens_seen": 30741984, + "step": 45600 + }, + { + "epoch": 1.1141377372779908, + "grad_norm": 8.297344207763672, + "learning_rate": 1.909496685774746e-06, + "loss": 0.06, + "num_input_tokens_seen": 30745248, + "step": 45605 + }, + { + "epoch": 1.114259888109838, + "grad_norm": 1.1917724609375, + "learning_rate": 1.90946123161691e-06, + "loss": 0.0493, + "num_input_tokens_seen": 30748832, + "step": 45610 + }, + { + "epoch": 1.1143820389416852, + "grad_norm": 45.727996826171875, + "learning_rate": 1.9094257708452275e-06, + "loss": 0.1171, + "num_input_tokens_seen": 30752352, + "step": 45615 + }, + { + "epoch": 1.1145041897735324, + "grad_norm": 0.2739153206348419, + "learning_rate": 1.909390303459956e-06, + "loss": 0.1067, + "num_input_tokens_seen": 30755296, + "step": 45620 + }, + { + "epoch": 1.1146263406053796, + "grad_norm": 106.9163818359375, + "learning_rate": 1.9093548294613533e-06, + "loss": 0.2377, + "num_input_tokens_seen": 30758432, + "step": 45625 + }, + { + "epoch": 1.1147484914372268, + "grad_norm": 1.4975440502166748, + "learning_rate": 1.9093193488496778e-06, + "loss": 0.0355, + "num_input_tokens_seen": 30761824, + "step": 45630 + }, + { + "epoch": 1.114870642269074, + "grad_norm": 1.395308494567871, + "learning_rate": 1.9092838616251877e-06, + "loss": 0.0493, + "num_input_tokens_seen": 30765728, + "step": 45635 + }, + { + "epoch": 1.114992793100921, + "grad_norm": 21.039077758789062, + "learning_rate": 1.9092483677881405e-06, + "loss": 0.1208, + "num_input_tokens_seen": 30769120, + "step": 45640 + }, + { + "epoch": 1.115114943932768, + "grad_norm": 0.11878981441259384, + "learning_rate": 1.909212867338795e-06, + "loss": 0.001, + "num_input_tokens_seen": 30772384, + "step": 45645 + }, + { + "epoch": 1.1152370947646153, + "grad_norm": 8.132143020629883, + "learning_rate": 1.9091773602774087e-06, + "loss": 0.0624, + "num_input_tokens_seen": 30775584, + "step": 45650 + }, + { + "epoch": 1.1153592455964625, + "grad_norm": 27.349586486816406, + "learning_rate": 1.90914184660424e-06, + "loss": 0.1317, + "num_input_tokens_seen": 30778464, + "step": 45655 + }, + { + "epoch": 1.1154813964283097, + "grad_norm": 44.20734786987305, + "learning_rate": 1.9091063263195473e-06, + "loss": 0.1104, + "num_input_tokens_seen": 30781984, + "step": 45660 + }, + { + "epoch": 1.1156035472601569, + "grad_norm": 10.095579147338867, + "learning_rate": 1.909070799423589e-06, + "loss": 0.1635, + "num_input_tokens_seen": 30785248, + "step": 45665 + }, + { + "epoch": 1.115725698092004, + "grad_norm": 8.791191101074219, + "learning_rate": 1.9090352659166232e-06, + "loss": 0.1662, + "num_input_tokens_seen": 30788896, + "step": 45670 + }, + { + "epoch": 1.1158478489238512, + "grad_norm": 11.370107650756836, + "learning_rate": 1.9089997257989084e-06, + "loss": 0.0861, + "num_input_tokens_seen": 30792416, + "step": 45675 + }, + { + "epoch": 1.1159699997556984, + "grad_norm": 0.14778165519237518, + "learning_rate": 1.9089641790707036e-06, + "loss": 0.0014, + "num_input_tokens_seen": 30796128, + "step": 45680 + }, + { + "epoch": 1.1160921505875454, + "grad_norm": 0.42808398604393005, + "learning_rate": 1.9089286257322664e-06, + "loss": 0.0489, + "num_input_tokens_seen": 30799328, + "step": 45685 + }, + { + "epoch": 1.1162143014193926, + "grad_norm": 13.010444641113281, + "learning_rate": 1.908893065783856e-06, + "loss": 0.2035, + "num_input_tokens_seen": 30803296, + "step": 45690 + }, + { + "epoch": 1.1163364522512398, + "grad_norm": 3.7101378440856934, + "learning_rate": 1.90885749922573e-06, + "loss": 0.0675, + "num_input_tokens_seen": 30806432, + "step": 45695 + }, + { + "epoch": 1.116458603083087, + "grad_norm": 12.728604316711426, + "learning_rate": 1.9088219260581488e-06, + "loss": 0.1025, + "num_input_tokens_seen": 30809824, + "step": 45700 + }, + { + "epoch": 1.1165807539149342, + "grad_norm": 25.37266731262207, + "learning_rate": 1.90878634628137e-06, + "loss": 0.1207, + "num_input_tokens_seen": 30813088, + "step": 45705 + }, + { + "epoch": 1.1167029047467814, + "grad_norm": 91.44532012939453, + "learning_rate": 1.908750759895652e-06, + "loss": 0.0226, + "num_input_tokens_seen": 30816800, + "step": 45710 + }, + { + "epoch": 1.1168250555786285, + "grad_norm": 0.25230729579925537, + "learning_rate": 1.908715166901254e-06, + "loss": 0.0618, + "num_input_tokens_seen": 30819680, + "step": 45715 + }, + { + "epoch": 1.1169472064104757, + "grad_norm": 0.22132541239261627, + "learning_rate": 1.908679567298435e-06, + "loss": 0.0726, + "num_input_tokens_seen": 30822816, + "step": 45720 + }, + { + "epoch": 1.1170693572423227, + "grad_norm": 0.8834930658340454, + "learning_rate": 1.908643961087454e-06, + "loss": 0.007, + "num_input_tokens_seen": 30825888, + "step": 45725 + }, + { + "epoch": 1.11719150807417, + "grad_norm": 23.24863052368164, + "learning_rate": 1.9086083482685696e-06, + "loss": 0.0397, + "num_input_tokens_seen": 30829408, + "step": 45730 + }, + { + "epoch": 1.117313658906017, + "grad_norm": 0.04711702838540077, + "learning_rate": 1.908572728842041e-06, + "loss": 0.0484, + "num_input_tokens_seen": 30832800, + "step": 45735 + }, + { + "epoch": 1.1174358097378643, + "grad_norm": 1.2767448425292969, + "learning_rate": 1.908537102808127e-06, + "loss": 0.0684, + "num_input_tokens_seen": 30836192, + "step": 45740 + }, + { + "epoch": 1.1175579605697115, + "grad_norm": 0.08316470682621002, + "learning_rate": 1.9085014701670866e-06, + "loss": 0.0011, + "num_input_tokens_seen": 30839904, + "step": 45745 + }, + { + "epoch": 1.1176801114015587, + "grad_norm": 7.2097015380859375, + "learning_rate": 1.9084658309191798e-06, + "loss": 0.0022, + "num_input_tokens_seen": 30842912, + "step": 45750 + }, + { + "epoch": 1.1178022622334058, + "grad_norm": 0.1650509536266327, + "learning_rate": 1.9084301850646645e-06, + "loss": 0.0953, + "num_input_tokens_seen": 30846560, + "step": 45755 + }, + { + "epoch": 1.117924413065253, + "grad_norm": 116.87432861328125, + "learning_rate": 1.908394532603801e-06, + "loss": 0.0214, + "num_input_tokens_seen": 30850016, + "step": 45760 + }, + { + "epoch": 1.1180465638971002, + "grad_norm": 0.04541773349046707, + "learning_rate": 1.908358873536848e-06, + "loss": 0.0006, + "num_input_tokens_seen": 30853152, + "step": 45765 + }, + { + "epoch": 1.1181687147289474, + "grad_norm": 0.08230971544981003, + "learning_rate": 1.9083232078640647e-06, + "loss": 0.0494, + "num_input_tokens_seen": 30856864, + "step": 45770 + }, + { + "epoch": 1.1182908655607944, + "grad_norm": 13.339716911315918, + "learning_rate": 1.908287535585711e-06, + "loss": 0.1459, + "num_input_tokens_seen": 30860128, + "step": 45775 + }, + { + "epoch": 1.1184130163926416, + "grad_norm": 0.05430752784013748, + "learning_rate": 1.9082518567020457e-06, + "loss": 0.1142, + "num_input_tokens_seen": 30863648, + "step": 45780 + }, + { + "epoch": 1.1185351672244888, + "grad_norm": 0.3133643567562103, + "learning_rate": 1.908216171213329e-06, + "loss": 0.0439, + "num_input_tokens_seen": 30866976, + "step": 45785 + }, + { + "epoch": 1.118657318056336, + "grad_norm": 5.240131378173828, + "learning_rate": 1.90818047911982e-06, + "loss": 0.1414, + "num_input_tokens_seen": 30870688, + "step": 45790 + }, + { + "epoch": 1.1187794688881831, + "grad_norm": 0.13645559549331665, + "learning_rate": 1.908144780421778e-06, + "loss": 0.0685, + "num_input_tokens_seen": 30873760, + "step": 45795 + }, + { + "epoch": 1.1189016197200303, + "grad_norm": 3.473702907562256, + "learning_rate": 1.908109075119463e-06, + "loss": 0.0432, + "num_input_tokens_seen": 30877152, + "step": 45800 + }, + { + "epoch": 1.1190237705518775, + "grad_norm": 0.1498257964849472, + "learning_rate": 1.9080733632131347e-06, + "loss": 0.0734, + "num_input_tokens_seen": 30881312, + "step": 45805 + }, + { + "epoch": 1.1191459213837247, + "grad_norm": 16.656753540039062, + "learning_rate": 1.9080376447030525e-06, + "loss": 0.1117, + "num_input_tokens_seen": 30884512, + "step": 45810 + }, + { + "epoch": 1.1192680722155717, + "grad_norm": 0.9916263818740845, + "learning_rate": 1.9080019195894766e-06, + "loss": 0.1565, + "num_input_tokens_seen": 30887648, + "step": 45815 + }, + { + "epoch": 1.1193902230474189, + "grad_norm": 0.260072261095047, + "learning_rate": 1.9079661878726663e-06, + "loss": 0.0634, + "num_input_tokens_seen": 30891296, + "step": 45820 + }, + { + "epoch": 1.119512373879266, + "grad_norm": 0.287136435508728, + "learning_rate": 1.9079304495528815e-06, + "loss": 0.0536, + "num_input_tokens_seen": 30894624, + "step": 45825 + }, + { + "epoch": 1.1196345247111132, + "grad_norm": 48.25025177001953, + "learning_rate": 1.9078947046303825e-06, + "loss": 0.0291, + "num_input_tokens_seen": 30897952, + "step": 45830 + }, + { + "epoch": 1.1197566755429604, + "grad_norm": 71.15177917480469, + "learning_rate": 1.907858953105429e-06, + "loss": 0.1195, + "num_input_tokens_seen": 30901152, + "step": 45835 + }, + { + "epoch": 1.1198788263748076, + "grad_norm": 42.36701202392578, + "learning_rate": 1.907823194978281e-06, + "loss": 0.2208, + "num_input_tokens_seen": 30904672, + "step": 45840 + }, + { + "epoch": 1.1200009772066548, + "grad_norm": 0.25921159982681274, + "learning_rate": 1.9077874302491985e-06, + "loss": 0.0397, + "num_input_tokens_seen": 30907808, + "step": 45845 + }, + { + "epoch": 1.120123128038502, + "grad_norm": 0.9302563071250916, + "learning_rate": 1.9077516589184416e-06, + "loss": 0.0989, + "num_input_tokens_seen": 30910880, + "step": 45850 + }, + { + "epoch": 1.1202452788703492, + "grad_norm": 29.44671058654785, + "learning_rate": 1.9077158809862707e-06, + "loss": 0.1054, + "num_input_tokens_seen": 30914336, + "step": 45855 + }, + { + "epoch": 1.1203674297021964, + "grad_norm": 0.4291466772556305, + "learning_rate": 1.9076800964529455e-06, + "loss": 0.0362, + "num_input_tokens_seen": 30917536, + "step": 45860 + }, + { + "epoch": 1.1204895805340434, + "grad_norm": 0.14217062294483185, + "learning_rate": 1.9076443053187265e-06, + "loss": 0.0847, + "num_input_tokens_seen": 30920736, + "step": 45865 + }, + { + "epoch": 1.1206117313658905, + "grad_norm": 0.11226175725460052, + "learning_rate": 1.907608507583874e-06, + "loss": 0.0048, + "num_input_tokens_seen": 30925216, + "step": 45870 + }, + { + "epoch": 1.1207338821977377, + "grad_norm": 0.032658837735652924, + "learning_rate": 1.9075727032486486e-06, + "loss": 0.0274, + "num_input_tokens_seen": 30928672, + "step": 45875 + }, + { + "epoch": 1.120856033029585, + "grad_norm": 11.843403816223145, + "learning_rate": 1.9075368923133102e-06, + "loss": 0.0966, + "num_input_tokens_seen": 30933024, + "step": 45880 + }, + { + "epoch": 1.120978183861432, + "grad_norm": 0.8221790790557861, + "learning_rate": 1.9075010747781194e-06, + "loss": 0.0922, + "num_input_tokens_seen": 30936032, + "step": 45885 + }, + { + "epoch": 1.1211003346932793, + "grad_norm": 18.793682098388672, + "learning_rate": 1.9074652506433367e-06, + "loss": 0.0999, + "num_input_tokens_seen": 30939424, + "step": 45890 + }, + { + "epoch": 1.1212224855251265, + "grad_norm": 0.1459447145462036, + "learning_rate": 1.9074294199092224e-06, + "loss": 0.1094, + "num_input_tokens_seen": 30942752, + "step": 45895 + }, + { + "epoch": 1.1213446363569737, + "grad_norm": 0.7048338651657104, + "learning_rate": 1.907393582576038e-06, + "loss": 0.132, + "num_input_tokens_seen": 30946208, + "step": 45900 + }, + { + "epoch": 1.1214667871888206, + "grad_norm": 0.9069101214408875, + "learning_rate": 1.9073577386440423e-06, + "loss": 0.0294, + "num_input_tokens_seen": 30949088, + "step": 45905 + }, + { + "epoch": 1.1215889380206678, + "grad_norm": 10.497543334960938, + "learning_rate": 1.9073218881134979e-06, + "loss": 0.1609, + "num_input_tokens_seen": 30952352, + "step": 45910 + }, + { + "epoch": 1.121711088852515, + "grad_norm": 63.032737731933594, + "learning_rate": 1.9072860309846647e-06, + "loss": 0.0919, + "num_input_tokens_seen": 30955296, + "step": 45915 + }, + { + "epoch": 1.1218332396843622, + "grad_norm": 43.05061340332031, + "learning_rate": 1.907250167257803e-06, + "loss": 0.0923, + "num_input_tokens_seen": 30958496, + "step": 45920 + }, + { + "epoch": 1.1219553905162094, + "grad_norm": 1.525328278541565, + "learning_rate": 1.9072142969331746e-06, + "loss": 0.1738, + "num_input_tokens_seen": 30961632, + "step": 45925 + }, + { + "epoch": 1.1220775413480566, + "grad_norm": 0.12426701933145523, + "learning_rate": 1.9071784200110392e-06, + "loss": 0.0542, + "num_input_tokens_seen": 30965024, + "step": 45930 + }, + { + "epoch": 1.1221996921799038, + "grad_norm": 27.84307098388672, + "learning_rate": 1.9071425364916588e-06, + "loss": 0.0523, + "num_input_tokens_seen": 30968352, + "step": 45935 + }, + { + "epoch": 1.122321843011751, + "grad_norm": 0.10840397328138351, + "learning_rate": 1.907106646375294e-06, + "loss": 0.133, + "num_input_tokens_seen": 30971808, + "step": 45940 + }, + { + "epoch": 1.1224439938435982, + "grad_norm": 30.482160568237305, + "learning_rate": 1.907070749662205e-06, + "loss": 0.0629, + "num_input_tokens_seen": 30975968, + "step": 45945 + }, + { + "epoch": 1.1225661446754454, + "grad_norm": 11.64674186706543, + "learning_rate": 1.907034846352654e-06, + "loss": 0.1863, + "num_input_tokens_seen": 30978976, + "step": 45950 + }, + { + "epoch": 1.1226882955072923, + "grad_norm": 16.049978256225586, + "learning_rate": 1.9069989364469016e-06, + "loss": 0.1616, + "num_input_tokens_seen": 30982368, + "step": 45955 + }, + { + "epoch": 1.1228104463391395, + "grad_norm": 30.9919376373291, + "learning_rate": 1.906963019945209e-06, + "loss": 0.1339, + "num_input_tokens_seen": 30985952, + "step": 45960 + }, + { + "epoch": 1.1229325971709867, + "grad_norm": 0.8775382041931152, + "learning_rate": 1.9069270968478376e-06, + "loss": 0.0615, + "num_input_tokens_seen": 30989536, + "step": 45965 + }, + { + "epoch": 1.123054748002834, + "grad_norm": 0.7894930839538574, + "learning_rate": 1.906891167155048e-06, + "loss": 0.0056, + "num_input_tokens_seen": 30992928, + "step": 45970 + }, + { + "epoch": 1.123176898834681, + "grad_norm": 0.265011727809906, + "learning_rate": 1.906855230867102e-06, + "loss": 0.0017, + "num_input_tokens_seen": 30996256, + "step": 45975 + }, + { + "epoch": 1.1232990496665283, + "grad_norm": 22.024242401123047, + "learning_rate": 1.906819287984261e-06, + "loss": 0.1181, + "num_input_tokens_seen": 30999648, + "step": 45980 + }, + { + "epoch": 1.1234212004983755, + "grad_norm": 101.3171157836914, + "learning_rate": 1.9067833385067862e-06, + "loss": 0.1356, + "num_input_tokens_seen": 31002528, + "step": 45985 + }, + { + "epoch": 1.1235433513302227, + "grad_norm": 10.967216491699219, + "learning_rate": 1.906747382434939e-06, + "loss": 0.1763, + "num_input_tokens_seen": 31005472, + "step": 45990 + }, + { + "epoch": 1.1236655021620696, + "grad_norm": 0.22470149397850037, + "learning_rate": 1.9067114197689809e-06, + "loss": 0.0701, + "num_input_tokens_seen": 31008864, + "step": 45995 + }, + { + "epoch": 1.1237876529939168, + "grad_norm": 37.859989166259766, + "learning_rate": 1.9066754505091735e-06, + "loss": 0.0851, + "num_input_tokens_seen": 31012192, + "step": 46000 + }, + { + "epoch": 1.123909803825764, + "grad_norm": 9.107433319091797, + "learning_rate": 1.9066394746557783e-06, + "loss": 0.0815, + "num_input_tokens_seen": 31015648, + "step": 46005 + }, + { + "epoch": 1.1240319546576112, + "grad_norm": 37.07771301269531, + "learning_rate": 1.9066034922090573e-06, + "loss": 0.1494, + "num_input_tokens_seen": 31018784, + "step": 46010 + }, + { + "epoch": 1.1241541054894584, + "grad_norm": 0.12652719020843506, + "learning_rate": 1.9065675031692718e-06, + "loss": 0.0123, + "num_input_tokens_seen": 31022240, + "step": 46015 + }, + { + "epoch": 1.1242762563213056, + "grad_norm": 18.91283416748047, + "learning_rate": 1.9065315075366834e-06, + "loss": 0.1601, + "num_input_tokens_seen": 31025504, + "step": 46020 + }, + { + "epoch": 1.1243984071531528, + "grad_norm": 11.62320327758789, + "learning_rate": 1.906495505311554e-06, + "loss": 0.0768, + "num_input_tokens_seen": 31028832, + "step": 46025 + }, + { + "epoch": 1.124520557985, + "grad_norm": 33.32771682739258, + "learning_rate": 1.9064594964941456e-06, + "loss": 0.1274, + "num_input_tokens_seen": 31032096, + "step": 46030 + }, + { + "epoch": 1.1246427088168471, + "grad_norm": 0.2060808539390564, + "learning_rate": 1.9064234810847198e-06, + "loss": 0.0304, + "num_input_tokens_seen": 31035488, + "step": 46035 + }, + { + "epoch": 1.124764859648694, + "grad_norm": 3.325789213180542, + "learning_rate": 1.9063874590835386e-06, + "loss": 0.0452, + "num_input_tokens_seen": 31038944, + "step": 46040 + }, + { + "epoch": 1.1248870104805413, + "grad_norm": 3.3198955059051514, + "learning_rate": 1.9063514304908641e-06, + "loss": 0.2326, + "num_input_tokens_seen": 31042528, + "step": 46045 + }, + { + "epoch": 1.1250091613123885, + "grad_norm": 20.069190979003906, + "learning_rate": 1.9063153953069583e-06, + "loss": 0.1244, + "num_input_tokens_seen": 31045664, + "step": 46050 + }, + { + "epoch": 1.1251313121442357, + "grad_norm": 0.09302736073732376, + "learning_rate": 1.906279353532083e-06, + "loss": 0.0525, + "num_input_tokens_seen": 31049248, + "step": 46055 + }, + { + "epoch": 1.1252534629760829, + "grad_norm": 0.030136937275528908, + "learning_rate": 1.9062433051665008e-06, + "loss": 0.0353, + "num_input_tokens_seen": 31052576, + "step": 46060 + }, + { + "epoch": 1.12537561380793, + "grad_norm": 0.379341185092926, + "learning_rate": 1.9062072502104734e-06, + "loss": 0.013, + "num_input_tokens_seen": 31055776, + "step": 46065 + }, + { + "epoch": 1.1254977646397772, + "grad_norm": 117.80951690673828, + "learning_rate": 1.906171188664263e-06, + "loss": 0.179, + "num_input_tokens_seen": 31059040, + "step": 46070 + }, + { + "epoch": 1.1256199154716244, + "grad_norm": 0.32162290811538696, + "learning_rate": 1.9061351205281322e-06, + "loss": 0.0592, + "num_input_tokens_seen": 31062432, + "step": 46075 + }, + { + "epoch": 1.1257420663034716, + "grad_norm": 6.650218486785889, + "learning_rate": 1.906099045802343e-06, + "loss": 0.0514, + "num_input_tokens_seen": 31065760, + "step": 46080 + }, + { + "epoch": 1.1258642171353186, + "grad_norm": 0.14563031494617462, + "learning_rate": 1.9060629644871576e-06, + "loss": 0.0684, + "num_input_tokens_seen": 31068896, + "step": 46085 + }, + { + "epoch": 1.1259863679671658, + "grad_norm": 7.294920444488525, + "learning_rate": 1.9060268765828388e-06, + "loss": 0.1949, + "num_input_tokens_seen": 31072288, + "step": 46090 + }, + { + "epoch": 1.126108518799013, + "grad_norm": 0.2821553945541382, + "learning_rate": 1.905990782089649e-06, + "loss": 0.0292, + "num_input_tokens_seen": 31075744, + "step": 46095 + }, + { + "epoch": 1.1262306696308602, + "grad_norm": 0.4164447486400604, + "learning_rate": 1.9059546810078504e-06, + "loss": 0.0117, + "num_input_tokens_seen": 31079456, + "step": 46100 + }, + { + "epoch": 1.1263528204627073, + "grad_norm": 0.48666372895240784, + "learning_rate": 1.9059185733377057e-06, + "loss": 0.0041, + "num_input_tokens_seen": 31082912, + "step": 46105 + }, + { + "epoch": 1.1264749712945545, + "grad_norm": 48.44977569580078, + "learning_rate": 1.9058824590794776e-06, + "loss": 0.0755, + "num_input_tokens_seen": 31086432, + "step": 46110 + }, + { + "epoch": 1.1265971221264017, + "grad_norm": 22.645042419433594, + "learning_rate": 1.9058463382334283e-06, + "loss": 0.0963, + "num_input_tokens_seen": 31090336, + "step": 46115 + }, + { + "epoch": 1.126719272958249, + "grad_norm": 0.06348301470279694, + "learning_rate": 1.9058102107998208e-06, + "loss": 0.0509, + "num_input_tokens_seen": 31093856, + "step": 46120 + }, + { + "epoch": 1.126841423790096, + "grad_norm": 96.59019470214844, + "learning_rate": 1.9057740767789182e-06, + "loss": 0.0786, + "num_input_tokens_seen": 31096928, + "step": 46125 + }, + { + "epoch": 1.1269635746219433, + "grad_norm": 0.15443776547908783, + "learning_rate": 1.9057379361709827e-06, + "loss": 0.1037, + "num_input_tokens_seen": 31100384, + "step": 46130 + }, + { + "epoch": 1.1270857254537903, + "grad_norm": 1.693228006362915, + "learning_rate": 1.9057017889762772e-06, + "loss": 0.0674, + "num_input_tokens_seen": 31103392, + "step": 46135 + }, + { + "epoch": 1.1272078762856375, + "grad_norm": 0.08480946719646454, + "learning_rate": 1.905665635195065e-06, + "loss": 0.0368, + "num_input_tokens_seen": 31107040, + "step": 46140 + }, + { + "epoch": 1.1273300271174846, + "grad_norm": 0.12390197813510895, + "learning_rate": 1.9056294748276081e-06, + "loss": 0.0386, + "num_input_tokens_seen": 31110112, + "step": 46145 + }, + { + "epoch": 1.1274521779493318, + "grad_norm": 0.7042220234870911, + "learning_rate": 1.9055933078741706e-06, + "loss": 0.0635, + "num_input_tokens_seen": 31113312, + "step": 46150 + }, + { + "epoch": 1.127574328781179, + "grad_norm": 48.234859466552734, + "learning_rate": 1.9055571343350148e-06, + "loss": 0.0946, + "num_input_tokens_seen": 31116768, + "step": 46155 + }, + { + "epoch": 1.1276964796130262, + "grad_norm": 0.1741168200969696, + "learning_rate": 1.905520954210404e-06, + "loss": 0.0064, + "num_input_tokens_seen": 31120544, + "step": 46160 + }, + { + "epoch": 1.1278186304448734, + "grad_norm": 0.08128256350755692, + "learning_rate": 1.9054847675006013e-06, + "loss": 0.0645, + "num_input_tokens_seen": 31123808, + "step": 46165 + }, + { + "epoch": 1.1279407812767204, + "grad_norm": 0.11791330575942993, + "learning_rate": 1.9054485742058697e-06, + "loss": 0.0966, + "num_input_tokens_seen": 31127264, + "step": 46170 + }, + { + "epoch": 1.1280629321085676, + "grad_norm": 26.097557067871094, + "learning_rate": 1.9054123743264725e-06, + "loss": 0.0776, + "num_input_tokens_seen": 31130976, + "step": 46175 + }, + { + "epoch": 1.1281850829404148, + "grad_norm": 0.1617422103881836, + "learning_rate": 1.9053761678626733e-06, + "loss": 0.0974, + "num_input_tokens_seen": 31133984, + "step": 46180 + }, + { + "epoch": 1.128307233772262, + "grad_norm": 0.15047034621238708, + "learning_rate": 1.9053399548147348e-06, + "loss": 0.0061, + "num_input_tokens_seen": 31137184, + "step": 46185 + }, + { + "epoch": 1.1284293846041091, + "grad_norm": 0.13080565631389618, + "learning_rate": 1.9053037351829207e-06, + "loss": 0.0486, + "num_input_tokens_seen": 31140448, + "step": 46190 + }, + { + "epoch": 1.1285515354359563, + "grad_norm": 4.27202844619751, + "learning_rate": 1.9052675089674942e-06, + "loss": 0.1009, + "num_input_tokens_seen": 31143712, + "step": 46195 + }, + { + "epoch": 1.1286736862678035, + "grad_norm": 0.387268602848053, + "learning_rate": 1.905231276168719e-06, + "loss": 0.0236, + "num_input_tokens_seen": 31146784, + "step": 46200 + }, + { + "epoch": 1.1287958370996507, + "grad_norm": 4.517419338226318, + "learning_rate": 1.9051950367868589e-06, + "loss": 0.1454, + "num_input_tokens_seen": 31150496, + "step": 46205 + }, + { + "epoch": 1.128917987931498, + "grad_norm": 7.835855960845947, + "learning_rate": 1.9051587908221766e-06, + "loss": 0.1363, + "num_input_tokens_seen": 31154208, + "step": 46210 + }, + { + "epoch": 1.129040138763345, + "grad_norm": 119.49268341064453, + "learning_rate": 1.905122538274936e-06, + "loss": 0.0925, + "num_input_tokens_seen": 31157600, + "step": 46215 + }, + { + "epoch": 1.129162289595192, + "grad_norm": 0.29845014214515686, + "learning_rate": 1.9050862791454011e-06, + "loss": 0.0013, + "num_input_tokens_seen": 31160800, + "step": 46220 + }, + { + "epoch": 1.1292844404270392, + "grad_norm": 116.61756896972656, + "learning_rate": 1.9050500134338353e-06, + "loss": 0.199, + "num_input_tokens_seen": 31164064, + "step": 46225 + }, + { + "epoch": 1.1294065912588864, + "grad_norm": 0.298088014125824, + "learning_rate": 1.9050137411405024e-06, + "loss": 0.0435, + "num_input_tokens_seen": 31167136, + "step": 46230 + }, + { + "epoch": 1.1295287420907336, + "grad_norm": 0.2157052755355835, + "learning_rate": 1.9049774622656661e-06, + "loss": 0.0609, + "num_input_tokens_seen": 31170144, + "step": 46235 + }, + { + "epoch": 1.1296508929225808, + "grad_norm": 0.09936691075563431, + "learning_rate": 1.90494117680959e-06, + "loss": 0.0446, + "num_input_tokens_seen": 31173280, + "step": 46240 + }, + { + "epoch": 1.129773043754428, + "grad_norm": 36.17173385620117, + "learning_rate": 1.9049048847725388e-06, + "loss": 0.0281, + "num_input_tokens_seen": 31177120, + "step": 46245 + }, + { + "epoch": 1.1298951945862752, + "grad_norm": 32.515846252441406, + "learning_rate": 1.9048685861547755e-06, + "loss": 0.1817, + "num_input_tokens_seen": 31180000, + "step": 46250 + }, + { + "epoch": 1.1300173454181224, + "grad_norm": 0.07410810887813568, + "learning_rate": 1.9048322809565644e-06, + "loss": 0.0721, + "num_input_tokens_seen": 31183200, + "step": 46255 + }, + { + "epoch": 1.1301394962499693, + "grad_norm": 0.28702202439308167, + "learning_rate": 1.90479596917817e-06, + "loss": 0.0749, + "num_input_tokens_seen": 31186720, + "step": 46260 + }, + { + "epoch": 1.1302616470818165, + "grad_norm": 0.07194984704256058, + "learning_rate": 1.9047596508198556e-06, + "loss": 0.0618, + "num_input_tokens_seen": 31189984, + "step": 46265 + }, + { + "epoch": 1.1303837979136637, + "grad_norm": 1.1287305355072021, + "learning_rate": 1.904723325881886e-06, + "loss": 0.0574, + "num_input_tokens_seen": 31193696, + "step": 46270 + }, + { + "epoch": 1.130505948745511, + "grad_norm": 0.10475568473339081, + "learning_rate": 1.9046869943645246e-06, + "loss": 0.0477, + "num_input_tokens_seen": 31197792, + "step": 46275 + }, + { + "epoch": 1.130628099577358, + "grad_norm": 10.711601257324219, + "learning_rate": 1.9046506562680365e-06, + "loss": 0.0344, + "num_input_tokens_seen": 31200992, + "step": 46280 + }, + { + "epoch": 1.1307502504092053, + "grad_norm": 42.35076904296875, + "learning_rate": 1.9046143115926851e-06, + "loss": 0.0564, + "num_input_tokens_seen": 31204128, + "step": 46285 + }, + { + "epoch": 1.1308724012410525, + "grad_norm": 0.016980910673737526, + "learning_rate": 1.9045779603387353e-06, + "loss": 0.0887, + "num_input_tokens_seen": 31207392, + "step": 46290 + }, + { + "epoch": 1.1309945520728997, + "grad_norm": 106.5681381225586, + "learning_rate": 1.9045416025064514e-06, + "loss": 0.0545, + "num_input_tokens_seen": 31210656, + "step": 46295 + }, + { + "epoch": 1.1311167029047469, + "grad_norm": 0.17704324424266815, + "learning_rate": 1.9045052380960972e-06, + "loss": 0.0667, + "num_input_tokens_seen": 31213792, + "step": 46300 + }, + { + "epoch": 1.131238853736594, + "grad_norm": 2.8407654762268066, + "learning_rate": 1.9044688671079382e-06, + "loss": 0.1294, + "num_input_tokens_seen": 31216672, + "step": 46305 + }, + { + "epoch": 1.131361004568441, + "grad_norm": 0.3381950557231903, + "learning_rate": 1.904432489542238e-06, + "loss": 0.0577, + "num_input_tokens_seen": 31220256, + "step": 46310 + }, + { + "epoch": 1.1314831554002882, + "grad_norm": 0.33855950832366943, + "learning_rate": 1.9043961053992616e-06, + "loss": 0.0775, + "num_input_tokens_seen": 31223776, + "step": 46315 + }, + { + "epoch": 1.1316053062321354, + "grad_norm": 1.119606614112854, + "learning_rate": 1.9043597146792733e-06, + "loss": 0.0025, + "num_input_tokens_seen": 31227168, + "step": 46320 + }, + { + "epoch": 1.1317274570639826, + "grad_norm": 48.65851974487305, + "learning_rate": 1.9043233173825382e-06, + "loss": 0.2185, + "num_input_tokens_seen": 31230368, + "step": 46325 + }, + { + "epoch": 1.1318496078958298, + "grad_norm": 0.20404629409313202, + "learning_rate": 1.9042869135093205e-06, + "loss": 0.0088, + "num_input_tokens_seen": 31233760, + "step": 46330 + }, + { + "epoch": 1.131971758727677, + "grad_norm": 4.307328224182129, + "learning_rate": 1.9042505030598853e-06, + "loss": 0.0758, + "num_input_tokens_seen": 31236896, + "step": 46335 + }, + { + "epoch": 1.1320939095595242, + "grad_norm": 0.06919888406991959, + "learning_rate": 1.904214086034497e-06, + "loss": 0.0013, + "num_input_tokens_seen": 31240928, + "step": 46340 + }, + { + "epoch": 1.1322160603913713, + "grad_norm": 62.974281311035156, + "learning_rate": 1.9041776624334206e-06, + "loss": 0.0048, + "num_input_tokens_seen": 31244448, + "step": 46345 + }, + { + "epoch": 1.1323382112232183, + "grad_norm": 0.3358139991760254, + "learning_rate": 1.9041412322569212e-06, + "loss": 0.1378, + "num_input_tokens_seen": 31247328, + "step": 46350 + }, + { + "epoch": 1.1324603620550655, + "grad_norm": 0.21907946467399597, + "learning_rate": 1.9041047955052639e-06, + "loss": 0.1222, + "num_input_tokens_seen": 31250464, + "step": 46355 + }, + { + "epoch": 1.1325825128869127, + "grad_norm": 0.13157333433628082, + "learning_rate": 1.9040683521787128e-06, + "loss": 0.1001, + "num_input_tokens_seen": 31253856, + "step": 46360 + }, + { + "epoch": 1.1327046637187599, + "grad_norm": 12.132460594177246, + "learning_rate": 1.9040319022775337e-06, + "loss": 0.2089, + "num_input_tokens_seen": 31256864, + "step": 46365 + }, + { + "epoch": 1.132826814550607, + "grad_norm": 0.45818689465522766, + "learning_rate": 1.9039954458019918e-06, + "loss": 0.156, + "num_input_tokens_seen": 31260064, + "step": 46370 + }, + { + "epoch": 1.1329489653824543, + "grad_norm": 10.195818901062012, + "learning_rate": 1.9039589827523512e-06, + "loss": 0.1985, + "num_input_tokens_seen": 31263328, + "step": 46375 + }, + { + "epoch": 1.1330711162143015, + "grad_norm": 0.5322380661964417, + "learning_rate": 1.903922513128878e-06, + "loss": 0.0926, + "num_input_tokens_seen": 31266592, + "step": 46380 + }, + { + "epoch": 1.1331932670461486, + "grad_norm": 0.13091324269771576, + "learning_rate": 1.9038860369318375e-06, + "loss": 0.035, + "num_input_tokens_seen": 31270560, + "step": 46385 + }, + { + "epoch": 1.1333154178779958, + "grad_norm": 15.070964813232422, + "learning_rate": 1.9038495541614945e-06, + "loss": 0.0376, + "num_input_tokens_seen": 31273760, + "step": 46390 + }, + { + "epoch": 1.133437568709843, + "grad_norm": 0.10301220417022705, + "learning_rate": 1.903813064818114e-06, + "loss": 0.0388, + "num_input_tokens_seen": 31277024, + "step": 46395 + }, + { + "epoch": 1.13355971954169, + "grad_norm": 0.348623126745224, + "learning_rate": 1.9037765689019622e-06, + "loss": 0.0996, + "num_input_tokens_seen": 31280160, + "step": 46400 + }, + { + "epoch": 1.1336818703735372, + "grad_norm": 0.0504717081785202, + "learning_rate": 1.9037400664133042e-06, + "loss": 0.1017, + "num_input_tokens_seen": 31283616, + "step": 46405 + }, + { + "epoch": 1.1338040212053844, + "grad_norm": 0.7688374519348145, + "learning_rate": 1.903703557352405e-06, + "loss": 0.0062, + "num_input_tokens_seen": 31287072, + "step": 46410 + }, + { + "epoch": 1.1339261720372316, + "grad_norm": 8.846529960632324, + "learning_rate": 1.9036670417195306e-06, + "loss": 0.1631, + "num_input_tokens_seen": 31290016, + "step": 46415 + }, + { + "epoch": 1.1340483228690788, + "grad_norm": 105.6480712890625, + "learning_rate": 1.9036305195149464e-06, + "loss": 0.1306, + "num_input_tokens_seen": 31293728, + "step": 46420 + }, + { + "epoch": 1.134170473700926, + "grad_norm": 0.15042132139205933, + "learning_rate": 1.9035939907389182e-06, + "loss": 0.061, + "num_input_tokens_seen": 31296672, + "step": 46425 + }, + { + "epoch": 1.1342926245327731, + "grad_norm": 0.11271711438894272, + "learning_rate": 1.9035574553917112e-06, + "loss": 0.1522, + "num_input_tokens_seen": 31300640, + "step": 46430 + }, + { + "epoch": 1.1344147753646203, + "grad_norm": 1.1958802938461304, + "learning_rate": 1.9035209134735916e-06, + "loss": 0.0435, + "num_input_tokens_seen": 31303968, + "step": 46435 + }, + { + "epoch": 1.1345369261964673, + "grad_norm": 45.54319381713867, + "learning_rate": 1.9034843649848248e-06, + "loss": 0.1184, + "num_input_tokens_seen": 31307040, + "step": 46440 + }, + { + "epoch": 1.1346590770283145, + "grad_norm": 1.4599428176879883, + "learning_rate": 1.9034478099256765e-06, + "loss": 0.1014, + "num_input_tokens_seen": 31310368, + "step": 46445 + }, + { + "epoch": 1.1347812278601617, + "grad_norm": 1.1676909923553467, + "learning_rate": 1.9034112482964128e-06, + "loss": 0.0286, + "num_input_tokens_seen": 31313376, + "step": 46450 + }, + { + "epoch": 1.1349033786920089, + "grad_norm": 0.18391495943069458, + "learning_rate": 1.9033746800972999e-06, + "loss": 0.0489, + "num_input_tokens_seen": 31316960, + "step": 46455 + }, + { + "epoch": 1.135025529523856, + "grad_norm": 0.20511262118816376, + "learning_rate": 1.903338105328603e-06, + "loss": 0.0743, + "num_input_tokens_seen": 31320096, + "step": 46460 + }, + { + "epoch": 1.1351476803557032, + "grad_norm": 0.14756199717521667, + "learning_rate": 1.9033015239905885e-06, + "loss": 0.0019, + "num_input_tokens_seen": 31323744, + "step": 46465 + }, + { + "epoch": 1.1352698311875504, + "grad_norm": 1.0843437910079956, + "learning_rate": 1.9032649360835222e-06, + "loss": 0.0281, + "num_input_tokens_seen": 31326944, + "step": 46470 + }, + { + "epoch": 1.1353919820193976, + "grad_norm": 0.18381133675575256, + "learning_rate": 1.9032283416076704e-06, + "loss": 0.0533, + "num_input_tokens_seen": 31330080, + "step": 46475 + }, + { + "epoch": 1.1355141328512448, + "grad_norm": 0.07421161234378815, + "learning_rate": 1.9031917405632993e-06, + "loss": 0.1661, + "num_input_tokens_seen": 31333856, + "step": 46480 + }, + { + "epoch": 1.135636283683092, + "grad_norm": 0.14352209866046906, + "learning_rate": 1.903155132950675e-06, + "loss": 0.0009, + "num_input_tokens_seen": 31336928, + "step": 46485 + }, + { + "epoch": 1.135758434514939, + "grad_norm": 124.78722381591797, + "learning_rate": 1.9031185187700634e-06, + "loss": 0.1065, + "num_input_tokens_seen": 31340320, + "step": 46490 + }, + { + "epoch": 1.1358805853467862, + "grad_norm": 1.1804207563400269, + "learning_rate": 1.9030818980217313e-06, + "loss": 0.0969, + "num_input_tokens_seen": 31343392, + "step": 46495 + }, + { + "epoch": 1.1360027361786333, + "grad_norm": 26.019861221313477, + "learning_rate": 1.9030452707059445e-06, + "loss": 0.1355, + "num_input_tokens_seen": 31346848, + "step": 46500 + }, + { + "epoch": 1.1361248870104805, + "grad_norm": 0.1707308143377304, + "learning_rate": 1.9030086368229696e-06, + "loss": 0.0748, + "num_input_tokens_seen": 31350432, + "step": 46505 + }, + { + "epoch": 1.1362470378423277, + "grad_norm": 8.230561256408691, + "learning_rate": 1.9029719963730732e-06, + "loss": 0.0875, + "num_input_tokens_seen": 31354656, + "step": 46510 + }, + { + "epoch": 1.136369188674175, + "grad_norm": 0.444359689950943, + "learning_rate": 1.9029353493565215e-06, + "loss": 0.0497, + "num_input_tokens_seen": 31358176, + "step": 46515 + }, + { + "epoch": 1.136491339506022, + "grad_norm": 0.6157476305961609, + "learning_rate": 1.9028986957735808e-06, + "loss": 0.0028, + "num_input_tokens_seen": 31361056, + "step": 46520 + }, + { + "epoch": 1.1366134903378693, + "grad_norm": 46.505859375, + "learning_rate": 1.9028620356245185e-06, + "loss": 0.0854, + "num_input_tokens_seen": 31363808, + "step": 46525 + }, + { + "epoch": 1.1367356411697163, + "grad_norm": 10.289785385131836, + "learning_rate": 1.9028253689096e-06, + "loss": 0.114, + "num_input_tokens_seen": 31367392, + "step": 46530 + }, + { + "epoch": 1.1368577920015634, + "grad_norm": 102.41519165039062, + "learning_rate": 1.902788695629093e-06, + "loss": 0.0493, + "num_input_tokens_seen": 31370912, + "step": 46535 + }, + { + "epoch": 1.1369799428334106, + "grad_norm": 0.1399715542793274, + "learning_rate": 1.902752015783264e-06, + "loss": 0.0007, + "num_input_tokens_seen": 31374240, + "step": 46540 + }, + { + "epoch": 1.1371020936652578, + "grad_norm": 0.10617326200008392, + "learning_rate": 1.902715329372379e-06, + "loss": 0.2546, + "num_input_tokens_seen": 31377632, + "step": 46545 + }, + { + "epoch": 1.137224244497105, + "grad_norm": 0.24258571863174438, + "learning_rate": 1.9026786363967056e-06, + "loss": 0.0117, + "num_input_tokens_seen": 31381088, + "step": 46550 + }, + { + "epoch": 1.1373463953289522, + "grad_norm": 6.267822265625, + "learning_rate": 1.9026419368565103e-06, + "loss": 0.0839, + "num_input_tokens_seen": 31384416, + "step": 46555 + }, + { + "epoch": 1.1374685461607994, + "grad_norm": 0.2888457179069519, + "learning_rate": 1.90260523075206e-06, + "loss": 0.1168, + "num_input_tokens_seen": 31387488, + "step": 46560 + }, + { + "epoch": 1.1375906969926466, + "grad_norm": 23.509075164794922, + "learning_rate": 1.9025685180836218e-06, + "loss": 0.2021, + "num_input_tokens_seen": 31391008, + "step": 46565 + }, + { + "epoch": 1.1377128478244938, + "grad_norm": 0.20501813292503357, + "learning_rate": 1.9025317988514624e-06, + "loss": 0.0107, + "num_input_tokens_seen": 31394208, + "step": 46570 + }, + { + "epoch": 1.137834998656341, + "grad_norm": 0.46571019291877747, + "learning_rate": 1.9024950730558493e-06, + "loss": 0.139, + "num_input_tokens_seen": 31397408, + "step": 46575 + }, + { + "epoch": 1.137957149488188, + "grad_norm": 0.25909340381622314, + "learning_rate": 1.902458340697049e-06, + "loss": 0.0533, + "num_input_tokens_seen": 31400992, + "step": 46580 + }, + { + "epoch": 1.1380793003200351, + "grad_norm": 0.20560549199581146, + "learning_rate": 1.902421601775329e-06, + "loss": 0.0471, + "num_input_tokens_seen": 31404448, + "step": 46585 + }, + { + "epoch": 1.1382014511518823, + "grad_norm": 0.46063530445098877, + "learning_rate": 1.9023848562909566e-06, + "loss": 0.1839, + "num_input_tokens_seen": 31407904, + "step": 46590 + }, + { + "epoch": 1.1383236019837295, + "grad_norm": 0.26652881503105164, + "learning_rate": 1.9023481042441985e-06, + "loss": 0.1041, + "num_input_tokens_seen": 31410976, + "step": 46595 + }, + { + "epoch": 1.1384457528155767, + "grad_norm": 0.34124669432640076, + "learning_rate": 1.9023113456353225e-06, + "loss": 0.0976, + "num_input_tokens_seen": 31414304, + "step": 46600 + }, + { + "epoch": 1.1385679036474239, + "grad_norm": 77.86670684814453, + "learning_rate": 1.9022745804645958e-06, + "loss": 0.1017, + "num_input_tokens_seen": 31417696, + "step": 46605 + }, + { + "epoch": 1.138690054479271, + "grad_norm": 0.6383596062660217, + "learning_rate": 1.9022378087322855e-06, + "loss": 0.0565, + "num_input_tokens_seen": 31420832, + "step": 46610 + }, + { + "epoch": 1.1388122053111183, + "grad_norm": 0.22451035678386688, + "learning_rate": 1.9022010304386588e-06, + "loss": 0.0414, + "num_input_tokens_seen": 31424288, + "step": 46615 + }, + { + "epoch": 1.1389343561429652, + "grad_norm": 0.03640579804778099, + "learning_rate": 1.902164245583984e-06, + "loss": 0.0571, + "num_input_tokens_seen": 31427680, + "step": 46620 + }, + { + "epoch": 1.1390565069748124, + "grad_norm": 0.8211766481399536, + "learning_rate": 1.9021274541685282e-06, + "loss": 0.0807, + "num_input_tokens_seen": 31430880, + "step": 46625 + }, + { + "epoch": 1.1391786578066596, + "grad_norm": 0.2223214954137802, + "learning_rate": 1.9020906561925587e-06, + "loss": 0.1928, + "num_input_tokens_seen": 31434144, + "step": 46630 + }, + { + "epoch": 1.1393008086385068, + "grad_norm": 0.1394394040107727, + "learning_rate": 1.9020538516563436e-06, + "loss": 0.0031, + "num_input_tokens_seen": 31437472, + "step": 46635 + }, + { + "epoch": 1.139422959470354, + "grad_norm": 8.12044906616211, + "learning_rate": 1.9020170405601498e-06, + "loss": 0.1425, + "num_input_tokens_seen": 31440672, + "step": 46640 + }, + { + "epoch": 1.1395451103022012, + "grad_norm": 0.6035500764846802, + "learning_rate": 1.9019802229042458e-06, + "loss": 0.0012, + "num_input_tokens_seen": 31443872, + "step": 46645 + }, + { + "epoch": 1.1396672611340484, + "grad_norm": 0.03213443607091904, + "learning_rate": 1.901943398688899e-06, + "loss": 0.0506, + "num_input_tokens_seen": 31447136, + "step": 46650 + }, + { + "epoch": 1.1397894119658956, + "grad_norm": 45.71347427368164, + "learning_rate": 1.901906567914377e-06, + "loss": 0.2119, + "num_input_tokens_seen": 31450656, + "step": 46655 + }, + { + "epoch": 1.1399115627977428, + "grad_norm": 0.019582994282245636, + "learning_rate": 1.9018697305809482e-06, + "loss": 0.0027, + "num_input_tokens_seen": 31453536, + "step": 46660 + }, + { + "epoch": 1.14003371362959, + "grad_norm": 73.76444244384766, + "learning_rate": 1.9018328866888798e-06, + "loss": 0.1224, + "num_input_tokens_seen": 31456672, + "step": 46665 + }, + { + "epoch": 1.140155864461437, + "grad_norm": 0.5211069583892822, + "learning_rate": 1.9017960362384402e-06, + "loss": 0.1004, + "num_input_tokens_seen": 31460192, + "step": 46670 + }, + { + "epoch": 1.140278015293284, + "grad_norm": 0.09799375385046005, + "learning_rate": 1.9017591792298974e-06, + "loss": 0.1509, + "num_input_tokens_seen": 31463328, + "step": 46675 + }, + { + "epoch": 1.1404001661251313, + "grad_norm": 0.06055364012718201, + "learning_rate": 1.9017223156635191e-06, + "loss": 0.0268, + "num_input_tokens_seen": 31466656, + "step": 46680 + }, + { + "epoch": 1.1405223169569785, + "grad_norm": 0.13429653644561768, + "learning_rate": 1.901685445539574e-06, + "loss": 0.1924, + "num_input_tokens_seen": 31469856, + "step": 46685 + }, + { + "epoch": 1.1406444677888257, + "grad_norm": 0.21500791609287262, + "learning_rate": 1.9016485688583295e-06, + "loss": 0.0013, + "num_input_tokens_seen": 31473184, + "step": 46690 + }, + { + "epoch": 1.1407666186206729, + "grad_norm": 151.6306610107422, + "learning_rate": 1.901611685620054e-06, + "loss": 0.0728, + "num_input_tokens_seen": 31476448, + "step": 46695 + }, + { + "epoch": 1.14088876945252, + "grad_norm": 116.54022216796875, + "learning_rate": 1.901574795825016e-06, + "loss": 0.0809, + "num_input_tokens_seen": 31479776, + "step": 46700 + }, + { + "epoch": 1.141010920284367, + "grad_norm": 0.4956272840499878, + "learning_rate": 1.9015378994734834e-06, + "loss": 0.2304, + "num_input_tokens_seen": 31483104, + "step": 46705 + }, + { + "epoch": 1.1411330711162142, + "grad_norm": 32.742679595947266, + "learning_rate": 1.901500996565725e-06, + "loss": 0.0266, + "num_input_tokens_seen": 31486432, + "step": 46710 + }, + { + "epoch": 1.1412552219480614, + "grad_norm": 10.730284690856934, + "learning_rate": 1.9014640871020084e-06, + "loss": 0.1238, + "num_input_tokens_seen": 31489952, + "step": 46715 + }, + { + "epoch": 1.1413773727799086, + "grad_norm": 169.406005859375, + "learning_rate": 1.9014271710826032e-06, + "loss": 0.0747, + "num_input_tokens_seen": 31493280, + "step": 46720 + }, + { + "epoch": 1.1414995236117558, + "grad_norm": 0.1027897372841835, + "learning_rate": 1.9013902485077767e-06, + "loss": 0.0029, + "num_input_tokens_seen": 31496608, + "step": 46725 + }, + { + "epoch": 1.141621674443603, + "grad_norm": 0.20650623738765717, + "learning_rate": 1.9013533193777977e-06, + "loss": 0.0033, + "num_input_tokens_seen": 31499616, + "step": 46730 + }, + { + "epoch": 1.1417438252754502, + "grad_norm": 0.04875501990318298, + "learning_rate": 1.9013163836929351e-06, + "loss": 0.1099, + "num_input_tokens_seen": 31502944, + "step": 46735 + }, + { + "epoch": 1.1418659761072973, + "grad_norm": 0.8934175968170166, + "learning_rate": 1.9012794414534574e-06, + "loss": 0.0803, + "num_input_tokens_seen": 31506592, + "step": 46740 + }, + { + "epoch": 1.1419881269391445, + "grad_norm": 0.6407132148742676, + "learning_rate": 1.9012424926596333e-06, + "loss": 0.0397, + "num_input_tokens_seen": 31509792, + "step": 46745 + }, + { + "epoch": 1.1421102777709917, + "grad_norm": 0.04105055704712868, + "learning_rate": 1.9012055373117312e-06, + "loss": 0.0027, + "num_input_tokens_seen": 31513056, + "step": 46750 + }, + { + "epoch": 1.1422324286028387, + "grad_norm": 35.02225875854492, + "learning_rate": 1.9011685754100202e-06, + "loss": 0.1939, + "num_input_tokens_seen": 31516896, + "step": 46755 + }, + { + "epoch": 1.1423545794346859, + "grad_norm": 31.574607849121094, + "learning_rate": 1.901131606954769e-06, + "loss": 0.1571, + "num_input_tokens_seen": 31520032, + "step": 46760 + }, + { + "epoch": 1.142476730266533, + "grad_norm": 0.2570955157279968, + "learning_rate": 1.901094631946246e-06, + "loss": 0.058, + "num_input_tokens_seen": 31523296, + "step": 46765 + }, + { + "epoch": 1.1425988810983803, + "grad_norm": 3.1613028049468994, + "learning_rate": 1.9010576503847207e-06, + "loss": 0.2385, + "num_input_tokens_seen": 31526368, + "step": 46770 + }, + { + "epoch": 1.1427210319302274, + "grad_norm": 0.1640394628047943, + "learning_rate": 1.901020662270462e-06, + "loss": 0.1712, + "num_input_tokens_seen": 31530080, + "step": 46775 + }, + { + "epoch": 1.1428431827620746, + "grad_norm": 1.545048713684082, + "learning_rate": 1.9009836676037382e-06, + "loss": 0.1039, + "num_input_tokens_seen": 31533856, + "step": 46780 + }, + { + "epoch": 1.1429653335939218, + "grad_norm": 21.144142150878906, + "learning_rate": 1.900946666384819e-06, + "loss": 0.0891, + "num_input_tokens_seen": 31537504, + "step": 46785 + }, + { + "epoch": 1.143087484425769, + "grad_norm": 0.20183810591697693, + "learning_rate": 1.9009096586139737e-06, + "loss": 0.1749, + "num_input_tokens_seen": 31540576, + "step": 46790 + }, + { + "epoch": 1.143209635257616, + "grad_norm": 9.08875846862793, + "learning_rate": 1.9008726442914708e-06, + "loss": 0.099, + "num_input_tokens_seen": 31543392, + "step": 46795 + }, + { + "epoch": 1.1433317860894632, + "grad_norm": 0.41724827885627747, + "learning_rate": 1.9008356234175794e-06, + "loss": 0.0932, + "num_input_tokens_seen": 31546848, + "step": 46800 + }, + { + "epoch": 1.1434539369213104, + "grad_norm": 0.13978880643844604, + "learning_rate": 1.9007985959925697e-06, + "loss": 0.0097, + "num_input_tokens_seen": 31550176, + "step": 46805 + }, + { + "epoch": 1.1435760877531576, + "grad_norm": 30.22905158996582, + "learning_rate": 1.9007615620167098e-06, + "loss": 0.073, + "num_input_tokens_seen": 31554016, + "step": 46810 + }, + { + "epoch": 1.1436982385850047, + "grad_norm": 0.1616167426109314, + "learning_rate": 1.9007245214902698e-06, + "loss": 0.0379, + "num_input_tokens_seen": 31557152, + "step": 46815 + }, + { + "epoch": 1.143820389416852, + "grad_norm": 2.9157519340515137, + "learning_rate": 1.900687474413519e-06, + "loss": 0.0507, + "num_input_tokens_seen": 31560480, + "step": 46820 + }, + { + "epoch": 1.1439425402486991, + "grad_norm": 0.09915733337402344, + "learning_rate": 1.9006504207867261e-06, + "loss": 0.0025, + "num_input_tokens_seen": 31563552, + "step": 46825 + }, + { + "epoch": 1.1440646910805463, + "grad_norm": 0.6402256488800049, + "learning_rate": 1.9006133606101615e-06, + "loss": 0.0561, + "num_input_tokens_seen": 31566752, + "step": 46830 + }, + { + "epoch": 1.1441868419123935, + "grad_norm": 10.9564790725708, + "learning_rate": 1.9005762938840942e-06, + "loss": 0.1262, + "num_input_tokens_seen": 31570080, + "step": 46835 + }, + { + "epoch": 1.1443089927442407, + "grad_norm": 1.3392218351364136, + "learning_rate": 1.9005392206087937e-06, + "loss": 0.0728, + "num_input_tokens_seen": 31573856, + "step": 46840 + }, + { + "epoch": 1.1444311435760877, + "grad_norm": 10.963790893554688, + "learning_rate": 1.9005021407845302e-06, + "loss": 0.082, + "num_input_tokens_seen": 31577376, + "step": 46845 + }, + { + "epoch": 1.1445532944079349, + "grad_norm": 86.67719268798828, + "learning_rate": 1.9004650544115726e-06, + "loss": 0.1102, + "num_input_tokens_seen": 31580704, + "step": 46850 + }, + { + "epoch": 1.144675445239782, + "grad_norm": 0.4809873104095459, + "learning_rate": 1.9004279614901908e-06, + "loss": 0.0784, + "num_input_tokens_seen": 31583648, + "step": 46855 + }, + { + "epoch": 1.1447975960716292, + "grad_norm": 17.987932205200195, + "learning_rate": 1.9003908620206548e-06, + "loss": 0.0909, + "num_input_tokens_seen": 31587296, + "step": 46860 + }, + { + "epoch": 1.1449197469034764, + "grad_norm": 44.341793060302734, + "learning_rate": 1.9003537560032344e-06, + "loss": 0.1251, + "num_input_tokens_seen": 31590496, + "step": 46865 + }, + { + "epoch": 1.1450418977353236, + "grad_norm": 0.029724106192588806, + "learning_rate": 1.9003166434381991e-06, + "loss": 0.0921, + "num_input_tokens_seen": 31594144, + "step": 46870 + }, + { + "epoch": 1.1451640485671708, + "grad_norm": 0.1720057874917984, + "learning_rate": 1.9002795243258194e-06, + "loss": 0.0178, + "num_input_tokens_seen": 31598240, + "step": 46875 + }, + { + "epoch": 1.145286199399018, + "grad_norm": 20.10298728942871, + "learning_rate": 1.9002423986663645e-06, + "loss": 0.0763, + "num_input_tokens_seen": 31601632, + "step": 46880 + }, + { + "epoch": 1.145408350230865, + "grad_norm": 0.6785484552383423, + "learning_rate": 1.9002052664601048e-06, + "loss": 0.0697, + "num_input_tokens_seen": 31605344, + "step": 46885 + }, + { + "epoch": 1.1455305010627121, + "grad_norm": 1.321736454963684, + "learning_rate": 1.9001681277073103e-06, + "loss": 0.0721, + "num_input_tokens_seen": 31608608, + "step": 46890 + }, + { + "epoch": 1.1456526518945593, + "grad_norm": 8.711379051208496, + "learning_rate": 1.9001309824082512e-06, + "loss": 0.1639, + "num_input_tokens_seen": 31612000, + "step": 46895 + }, + { + "epoch": 1.1457748027264065, + "grad_norm": 13.112317085266113, + "learning_rate": 1.9000938305631974e-06, + "loss": 0.2635, + "num_input_tokens_seen": 31615072, + "step": 46900 + }, + { + "epoch": 1.1458969535582537, + "grad_norm": 1.1117407083511353, + "learning_rate": 1.9000566721724193e-06, + "loss": 0.1127, + "num_input_tokens_seen": 31618272, + "step": 46905 + }, + { + "epoch": 1.146019104390101, + "grad_norm": 0.35874027013778687, + "learning_rate": 1.9000195072361866e-06, + "loss": 0.0044, + "num_input_tokens_seen": 31621536, + "step": 46910 + }, + { + "epoch": 1.146141255221948, + "grad_norm": 9.392590522766113, + "learning_rate": 1.89998233575477e-06, + "loss": 0.074, + "num_input_tokens_seen": 31624992, + "step": 46915 + }, + { + "epoch": 1.1462634060537953, + "grad_norm": 0.24224503338336945, + "learning_rate": 1.8999451577284403e-06, + "loss": 0.0017, + "num_input_tokens_seen": 31628064, + "step": 46920 + }, + { + "epoch": 1.1463855568856425, + "grad_norm": 85.96072387695312, + "learning_rate": 1.899907973157467e-06, + "loss": 0.1145, + "num_input_tokens_seen": 31631904, + "step": 46925 + }, + { + "epoch": 1.1465077077174897, + "grad_norm": 78.0488510131836, + "learning_rate": 1.899870782042121e-06, + "loss": 0.0367, + "num_input_tokens_seen": 31635680, + "step": 46930 + }, + { + "epoch": 1.1466298585493366, + "grad_norm": 37.96966552734375, + "learning_rate": 1.8998335843826724e-06, + "loss": 0.1674, + "num_input_tokens_seen": 31639008, + "step": 46935 + }, + { + "epoch": 1.1467520093811838, + "grad_norm": 0.22917182743549347, + "learning_rate": 1.899796380179392e-06, + "loss": 0.0481, + "num_input_tokens_seen": 31642208, + "step": 46940 + }, + { + "epoch": 1.146874160213031, + "grad_norm": 0.07978054881095886, + "learning_rate": 1.8997591694325505e-06, + "loss": 0.0481, + "num_input_tokens_seen": 31645856, + "step": 46945 + }, + { + "epoch": 1.1469963110448782, + "grad_norm": 0.15004077553749084, + "learning_rate": 1.8997219521424184e-06, + "loss": 0.0102, + "num_input_tokens_seen": 31649120, + "step": 46950 + }, + { + "epoch": 1.1471184618767254, + "grad_norm": 0.09087875485420227, + "learning_rate": 1.8996847283092658e-06, + "loss": 0.1831, + "num_input_tokens_seen": 31652128, + "step": 46955 + }, + { + "epoch": 1.1472406127085726, + "grad_norm": 113.01051330566406, + "learning_rate": 1.8996474979333645e-06, + "loss": 0.0683, + "num_input_tokens_seen": 31655392, + "step": 46960 + }, + { + "epoch": 1.1473627635404198, + "grad_norm": 43.2165412902832, + "learning_rate": 1.8996102610149843e-06, + "loss": 0.1242, + "num_input_tokens_seen": 31659552, + "step": 46965 + }, + { + "epoch": 1.147484914372267, + "grad_norm": 8.446593284606934, + "learning_rate": 1.8995730175543962e-06, + "loss": 0.0492, + "num_input_tokens_seen": 31662496, + "step": 46970 + }, + { + "epoch": 1.147607065204114, + "grad_norm": 0.12521806359291077, + "learning_rate": 1.899535767551871e-06, + "loss": 0.0566, + "num_input_tokens_seen": 31665696, + "step": 46975 + }, + { + "epoch": 1.1477292160359611, + "grad_norm": 49.258235931396484, + "learning_rate": 1.8994985110076802e-06, + "loss": 0.0329, + "num_input_tokens_seen": 31669856, + "step": 46980 + }, + { + "epoch": 1.1478513668678083, + "grad_norm": 0.5356413722038269, + "learning_rate": 1.8994612479220942e-06, + "loss": 0.0303, + "num_input_tokens_seen": 31672736, + "step": 46985 + }, + { + "epoch": 1.1479735176996555, + "grad_norm": 0.22916997969150543, + "learning_rate": 1.8994239782953838e-06, + "loss": 0.0517, + "num_input_tokens_seen": 31676128, + "step": 46990 + }, + { + "epoch": 1.1480956685315027, + "grad_norm": 2.0166921615600586, + "learning_rate": 1.8993867021278205e-06, + "loss": 0.0416, + "num_input_tokens_seen": 31679264, + "step": 46995 + }, + { + "epoch": 1.1482178193633499, + "grad_norm": 0.0886596217751503, + "learning_rate": 1.8993494194196754e-06, + "loss": 0.0559, + "num_input_tokens_seen": 31682144, + "step": 47000 + }, + { + "epoch": 1.148339970195197, + "grad_norm": 0.19293953478336334, + "learning_rate": 1.8993121301712192e-06, + "loss": 0.1114, + "num_input_tokens_seen": 31685024, + "step": 47005 + }, + { + "epoch": 1.1484621210270443, + "grad_norm": 0.08507690578699112, + "learning_rate": 1.8992748343827233e-06, + "loss": 0.0433, + "num_input_tokens_seen": 31688288, + "step": 47010 + }, + { + "epoch": 1.1485842718588914, + "grad_norm": 18.874502182006836, + "learning_rate": 1.8992375320544589e-06, + "loss": 0.2048, + "num_input_tokens_seen": 31691296, + "step": 47015 + }, + { + "epoch": 1.1487064226907386, + "grad_norm": 8.155969619750977, + "learning_rate": 1.8992002231866975e-06, + "loss": 0.134, + "num_input_tokens_seen": 31694688, + "step": 47020 + }, + { + "epoch": 1.1488285735225856, + "grad_norm": 5.913618564605713, + "learning_rate": 1.89916290777971e-06, + "loss": 0.0332, + "num_input_tokens_seen": 31698208, + "step": 47025 + }, + { + "epoch": 1.1489507243544328, + "grad_norm": 0.2968266010284424, + "learning_rate": 1.899125585833768e-06, + "loss": 0.124, + "num_input_tokens_seen": 31701600, + "step": 47030 + }, + { + "epoch": 1.14907287518628, + "grad_norm": 0.13521993160247803, + "learning_rate": 1.8990882573491432e-06, + "loss": 0.1013, + "num_input_tokens_seen": 31704928, + "step": 47035 + }, + { + "epoch": 1.1491950260181272, + "grad_norm": 17.11520767211914, + "learning_rate": 1.8990509223261064e-06, + "loss": 0.1103, + "num_input_tokens_seen": 31708640, + "step": 47040 + }, + { + "epoch": 1.1493171768499744, + "grad_norm": 69.05915069580078, + "learning_rate": 1.8990135807649295e-06, + "loss": 0.1072, + "num_input_tokens_seen": 31711968, + "step": 47045 + }, + { + "epoch": 1.1494393276818216, + "grad_norm": 0.7382350564002991, + "learning_rate": 1.898976232665884e-06, + "loss": 0.0083, + "num_input_tokens_seen": 31715552, + "step": 47050 + }, + { + "epoch": 1.1495614785136687, + "grad_norm": 0.20631414651870728, + "learning_rate": 1.8989388780292418e-06, + "loss": 0.0794, + "num_input_tokens_seen": 31718624, + "step": 47055 + }, + { + "epoch": 1.149683629345516, + "grad_norm": 0.3034517765045166, + "learning_rate": 1.8989015168552743e-06, + "loss": 0.0679, + "num_input_tokens_seen": 31722080, + "step": 47060 + }, + { + "epoch": 1.149805780177363, + "grad_norm": 0.11916504055261612, + "learning_rate": 1.898864149144253e-06, + "loss": 0.0021, + "num_input_tokens_seen": 31725536, + "step": 47065 + }, + { + "epoch": 1.14992793100921, + "grad_norm": 0.3561840355396271, + "learning_rate": 1.89882677489645e-06, + "loss": 0.126, + "num_input_tokens_seen": 31728480, + "step": 47070 + }, + { + "epoch": 1.1500500818410573, + "grad_norm": 0.2590155303478241, + "learning_rate": 1.898789394112137e-06, + "loss": 0.0025, + "num_input_tokens_seen": 31731552, + "step": 47075 + }, + { + "epoch": 1.1501722326729045, + "grad_norm": 0.07878569513559341, + "learning_rate": 1.8987520067915854e-06, + "loss": 0.0754, + "num_input_tokens_seen": 31735008, + "step": 47080 + }, + { + "epoch": 1.1502943835047517, + "grad_norm": 60.1794319152832, + "learning_rate": 1.8987146129350678e-06, + "loss": 0.0624, + "num_input_tokens_seen": 31737952, + "step": 47085 + }, + { + "epoch": 1.1504165343365989, + "grad_norm": 0.09747473150491714, + "learning_rate": 1.8986772125428558e-06, + "loss": 0.0826, + "num_input_tokens_seen": 31741408, + "step": 47090 + }, + { + "epoch": 1.150538685168446, + "grad_norm": 215.47508239746094, + "learning_rate": 1.8986398056152212e-06, + "loss": 0.1433, + "num_input_tokens_seen": 31744928, + "step": 47095 + }, + { + "epoch": 1.1506608360002932, + "grad_norm": 0.09226039052009583, + "learning_rate": 1.8986023921524364e-06, + "loss": 0.1718, + "num_input_tokens_seen": 31748640, + "step": 47100 + }, + { + "epoch": 1.1507829868321404, + "grad_norm": 0.1691044718027115, + "learning_rate": 1.8985649721547732e-06, + "loss": 0.1613, + "num_input_tokens_seen": 31752416, + "step": 47105 + }, + { + "epoch": 1.1509051376639876, + "grad_norm": 0.20411577820777893, + "learning_rate": 1.8985275456225038e-06, + "loss": 0.1354, + "num_input_tokens_seen": 31755744, + "step": 47110 + }, + { + "epoch": 1.1510272884958346, + "grad_norm": 0.38191160559654236, + "learning_rate": 1.8984901125559006e-06, + "loss": 0.0489, + "num_input_tokens_seen": 31759072, + "step": 47115 + }, + { + "epoch": 1.1511494393276818, + "grad_norm": 0.07923942804336548, + "learning_rate": 1.8984526729552354e-06, + "loss": 0.051, + "num_input_tokens_seen": 31762208, + "step": 47120 + }, + { + "epoch": 1.151271590159529, + "grad_norm": 0.2053736001253128, + "learning_rate": 1.898415226820781e-06, + "loss": 0.1468, + "num_input_tokens_seen": 31765472, + "step": 47125 + }, + { + "epoch": 1.1513937409913761, + "grad_norm": 84.87515258789062, + "learning_rate": 1.8983777741528094e-06, + "loss": 0.1513, + "num_input_tokens_seen": 31768352, + "step": 47130 + }, + { + "epoch": 1.1515158918232233, + "grad_norm": 0.20891521871089935, + "learning_rate": 1.8983403149515928e-06, + "loss": 0.0462, + "num_input_tokens_seen": 31772576, + "step": 47135 + }, + { + "epoch": 1.1516380426550705, + "grad_norm": 10.00893783569336, + "learning_rate": 1.8983028492174037e-06, + "loss": 0.0464, + "num_input_tokens_seen": 31775776, + "step": 47140 + }, + { + "epoch": 1.1517601934869177, + "grad_norm": 0.5158972144126892, + "learning_rate": 1.898265376950515e-06, + "loss": 0.0162, + "num_input_tokens_seen": 31779168, + "step": 47145 + }, + { + "epoch": 1.151882344318765, + "grad_norm": 10.742677688598633, + "learning_rate": 1.8982278981511986e-06, + "loss": 0.1282, + "num_input_tokens_seen": 31782304, + "step": 47150 + }, + { + "epoch": 1.1520044951506119, + "grad_norm": 0.17852243781089783, + "learning_rate": 1.8981904128197274e-06, + "loss": 0.1716, + "num_input_tokens_seen": 31785824, + "step": 47155 + }, + { + "epoch": 1.152126645982459, + "grad_norm": 0.6007935404777527, + "learning_rate": 1.898152920956374e-06, + "loss": 0.0282, + "num_input_tokens_seen": 31789088, + "step": 47160 + }, + { + "epoch": 1.1522487968143063, + "grad_norm": 1.4212846755981445, + "learning_rate": 1.8981154225614108e-06, + "loss": 0.0165, + "num_input_tokens_seen": 31792352, + "step": 47165 + }, + { + "epoch": 1.1523709476461534, + "grad_norm": 0.3089074194431305, + "learning_rate": 1.8980779176351112e-06, + "loss": 0.04, + "num_input_tokens_seen": 31795680, + "step": 47170 + }, + { + "epoch": 1.1524930984780006, + "grad_norm": 22.73480987548828, + "learning_rate": 1.8980404061777468e-06, + "loss": 0.1514, + "num_input_tokens_seen": 31798816, + "step": 47175 + }, + { + "epoch": 1.1526152493098478, + "grad_norm": 0.07772679626941681, + "learning_rate": 1.8980028881895916e-06, + "loss": 0.0017, + "num_input_tokens_seen": 31801888, + "step": 47180 + }, + { + "epoch": 1.152737400141695, + "grad_norm": 0.057387080043554306, + "learning_rate": 1.8979653636709173e-06, + "loss": 0.0878, + "num_input_tokens_seen": 31805408, + "step": 47185 + }, + { + "epoch": 1.1528595509735422, + "grad_norm": 0.061906807124614716, + "learning_rate": 1.8979278326219977e-06, + "loss": 0.0473, + "num_input_tokens_seen": 31808672, + "step": 47190 + }, + { + "epoch": 1.1529817018053894, + "grad_norm": 34.71351623535156, + "learning_rate": 1.8978902950431052e-06, + "loss": 0.0428, + "num_input_tokens_seen": 31812256, + "step": 47195 + }, + { + "epoch": 1.1531038526372366, + "grad_norm": 25.56401824951172, + "learning_rate": 1.897852750934513e-06, + "loss": 0.0926, + "num_input_tokens_seen": 31815840, + "step": 47200 + }, + { + "epoch": 1.1532260034690835, + "grad_norm": 0.13676181435585022, + "learning_rate": 1.8978152002964943e-06, + "loss": 0.0025, + "num_input_tokens_seen": 31819488, + "step": 47205 + }, + { + "epoch": 1.1533481543009307, + "grad_norm": 0.024842863902449608, + "learning_rate": 1.8977776431293218e-06, + "loss": 0.2126, + "num_input_tokens_seen": 31822880, + "step": 47210 + }, + { + "epoch": 1.153470305132778, + "grad_norm": 0.13829733431339264, + "learning_rate": 1.897740079433269e-06, + "loss": 0.2088, + "num_input_tokens_seen": 31825952, + "step": 47215 + }, + { + "epoch": 1.1535924559646251, + "grad_norm": 0.07277069985866547, + "learning_rate": 1.8977025092086087e-06, + "loss": 0.0432, + "num_input_tokens_seen": 31829024, + "step": 47220 + }, + { + "epoch": 1.1537146067964723, + "grad_norm": 8.050619125366211, + "learning_rate": 1.8976649324556143e-06, + "loss": 0.1844, + "num_input_tokens_seen": 31832672, + "step": 47225 + }, + { + "epoch": 1.1538367576283195, + "grad_norm": 0.4239726960659027, + "learning_rate": 1.897627349174559e-06, + "loss": 0.0456, + "num_input_tokens_seen": 31835680, + "step": 47230 + }, + { + "epoch": 1.1539589084601667, + "grad_norm": 5.3124918937683105, + "learning_rate": 1.8975897593657165e-06, + "loss": 0.0201, + "num_input_tokens_seen": 31839008, + "step": 47235 + }, + { + "epoch": 1.1540810592920137, + "grad_norm": 0.22926749289035797, + "learning_rate": 1.8975521630293595e-06, + "loss": 0.0469, + "num_input_tokens_seen": 31842976, + "step": 47240 + }, + { + "epoch": 1.1542032101238608, + "grad_norm": 0.3021707236766815, + "learning_rate": 1.897514560165762e-06, + "loss": 0.0565, + "num_input_tokens_seen": 31846944, + "step": 47245 + }, + { + "epoch": 1.154325360955708, + "grad_norm": 0.4421197175979614, + "learning_rate": 1.8974769507751968e-06, + "loss": 0.2177, + "num_input_tokens_seen": 31850080, + "step": 47250 + }, + { + "epoch": 1.1544475117875552, + "grad_norm": 50.06604766845703, + "learning_rate": 1.8974393348579383e-06, + "loss": 0.0344, + "num_input_tokens_seen": 31853408, + "step": 47255 + }, + { + "epoch": 1.1545696626194024, + "grad_norm": 0.12287864089012146, + "learning_rate": 1.8974017124142594e-06, + "loss": 0.0009, + "num_input_tokens_seen": 31857248, + "step": 47260 + }, + { + "epoch": 1.1546918134512496, + "grad_norm": 168.93858337402344, + "learning_rate": 1.897364083444434e-06, + "loss": 0.115, + "num_input_tokens_seen": 31860128, + "step": 47265 + }, + { + "epoch": 1.1548139642830968, + "grad_norm": 74.55368041992188, + "learning_rate": 1.8973264479487355e-06, + "loss": 0.01, + "num_input_tokens_seen": 31863712, + "step": 47270 + }, + { + "epoch": 1.154936115114944, + "grad_norm": 0.07216308265924454, + "learning_rate": 1.8972888059274377e-06, + "loss": 0.0607, + "num_input_tokens_seen": 31867232, + "step": 47275 + }, + { + "epoch": 1.1550582659467912, + "grad_norm": 90.53994750976562, + "learning_rate": 1.8972511573808144e-06, + "loss": 0.108, + "num_input_tokens_seen": 31871136, + "step": 47280 + }, + { + "epoch": 1.1551804167786384, + "grad_norm": 0.903574526309967, + "learning_rate": 1.8972135023091394e-06, + "loss": 0.0396, + "num_input_tokens_seen": 31873824, + "step": 47285 + }, + { + "epoch": 1.1553025676104853, + "grad_norm": 38.021968841552734, + "learning_rate": 1.8971758407126864e-06, + "loss": 0.2427, + "num_input_tokens_seen": 31877344, + "step": 47290 + }, + { + "epoch": 1.1554247184423325, + "grad_norm": 19.66106414794922, + "learning_rate": 1.8971381725917292e-06, + "loss": 0.1947, + "num_input_tokens_seen": 31880544, + "step": 47295 + }, + { + "epoch": 1.1555468692741797, + "grad_norm": 0.1368977427482605, + "learning_rate": 1.8971004979465422e-06, + "loss": 0.0869, + "num_input_tokens_seen": 31883872, + "step": 47300 + }, + { + "epoch": 1.155669020106027, + "grad_norm": 0.7672892808914185, + "learning_rate": 1.897062816777399e-06, + "loss": 0.0875, + "num_input_tokens_seen": 31887264, + "step": 47305 + }, + { + "epoch": 1.155791170937874, + "grad_norm": 0.3361578583717346, + "learning_rate": 1.8970251290845737e-06, + "loss": 0.0316, + "num_input_tokens_seen": 31890656, + "step": 47310 + }, + { + "epoch": 1.1559133217697213, + "grad_norm": 0.45389366149902344, + "learning_rate": 1.8969874348683404e-06, + "loss": 0.0475, + "num_input_tokens_seen": 31893984, + "step": 47315 + }, + { + "epoch": 1.1560354726015685, + "grad_norm": 0.08049551397562027, + "learning_rate": 1.8969497341289733e-06, + "loss": 0.0016, + "num_input_tokens_seen": 31897376, + "step": 47320 + }, + { + "epoch": 1.1561576234334157, + "grad_norm": 11.899868965148926, + "learning_rate": 1.8969120268667462e-06, + "loss": 0.1015, + "num_input_tokens_seen": 31900576, + "step": 47325 + }, + { + "epoch": 1.1562797742652626, + "grad_norm": 0.06697845458984375, + "learning_rate": 1.8968743130819338e-06, + "loss": 0.0299, + "num_input_tokens_seen": 31903904, + "step": 47330 + }, + { + "epoch": 1.1564019250971098, + "grad_norm": 18.530529022216797, + "learning_rate": 1.8968365927748102e-06, + "loss": 0.1661, + "num_input_tokens_seen": 31907040, + "step": 47335 + }, + { + "epoch": 1.156524075928957, + "grad_norm": 17.1173095703125, + "learning_rate": 1.8967988659456498e-06, + "loss": 0.0757, + "num_input_tokens_seen": 31910560, + "step": 47340 + }, + { + "epoch": 1.1566462267608042, + "grad_norm": 0.10376951843500137, + "learning_rate": 1.8967611325947266e-06, + "loss": 0.1961, + "num_input_tokens_seen": 31913696, + "step": 47345 + }, + { + "epoch": 1.1567683775926514, + "grad_norm": 0.13244301080703735, + "learning_rate": 1.896723392722315e-06, + "loss": 0.0376, + "num_input_tokens_seen": 31917152, + "step": 47350 + }, + { + "epoch": 1.1568905284244986, + "grad_norm": 0.53319251537323, + "learning_rate": 1.8966856463286903e-06, + "loss": 0.0034, + "num_input_tokens_seen": 31920480, + "step": 47355 + }, + { + "epoch": 1.1570126792563458, + "grad_norm": 26.181171417236328, + "learning_rate": 1.8966478934141262e-06, + "loss": 0.2035, + "num_input_tokens_seen": 31923872, + "step": 47360 + }, + { + "epoch": 1.157134830088193, + "grad_norm": 10.004261016845703, + "learning_rate": 1.8966101339788971e-06, + "loss": 0.0391, + "num_input_tokens_seen": 31927584, + "step": 47365 + }, + { + "epoch": 1.1572569809200401, + "grad_norm": 162.45281982421875, + "learning_rate": 1.8965723680232783e-06, + "loss": 0.0416, + "num_input_tokens_seen": 31931168, + "step": 47370 + }, + { + "epoch": 1.1573791317518873, + "grad_norm": 0.23322609066963196, + "learning_rate": 1.8965345955475441e-06, + "loss": 0.0387, + "num_input_tokens_seen": 31934304, + "step": 47375 + }, + { + "epoch": 1.1575012825837343, + "grad_norm": 0.39940202236175537, + "learning_rate": 1.896496816551969e-06, + "loss": 0.0539, + "num_input_tokens_seen": 31937760, + "step": 47380 + }, + { + "epoch": 1.1576234334155815, + "grad_norm": 0.027425643056631088, + "learning_rate": 1.8964590310368283e-06, + "loss": 0.0016, + "num_input_tokens_seen": 31941216, + "step": 47385 + }, + { + "epoch": 1.1577455842474287, + "grad_norm": 0.03762891888618469, + "learning_rate": 1.8964212390023959e-06, + "loss": 0.0559, + "num_input_tokens_seen": 31944736, + "step": 47390 + }, + { + "epoch": 1.1578677350792759, + "grad_norm": 12.185388565063477, + "learning_rate": 1.8963834404489474e-06, + "loss": 0.1978, + "num_input_tokens_seen": 31948128, + "step": 47395 + }, + { + "epoch": 1.157989885911123, + "grad_norm": 0.20006321370601654, + "learning_rate": 1.8963456353767575e-06, + "loss": 0.0536, + "num_input_tokens_seen": 31951392, + "step": 47400 + }, + { + "epoch": 1.1581120367429703, + "grad_norm": 0.40255168080329895, + "learning_rate": 1.8963078237861008e-06, + "loss": 0.0924, + "num_input_tokens_seen": 31955040, + "step": 47405 + }, + { + "epoch": 1.1582341875748174, + "grad_norm": 21.12218475341797, + "learning_rate": 1.8962700056772527e-06, + "loss": 0.0955, + "num_input_tokens_seen": 31957984, + "step": 47410 + }, + { + "epoch": 1.1583563384066646, + "grad_norm": 0.21121670305728912, + "learning_rate": 1.896232181050488e-06, + "loss": 0.1105, + "num_input_tokens_seen": 31961440, + "step": 47415 + }, + { + "epoch": 1.1584784892385116, + "grad_norm": 0.07470469921827316, + "learning_rate": 1.8961943499060818e-06, + "loss": 0.0438, + "num_input_tokens_seen": 31964832, + "step": 47420 + }, + { + "epoch": 1.1586006400703588, + "grad_norm": 0.14237113296985626, + "learning_rate": 1.8961565122443092e-06, + "loss": 0.1184, + "num_input_tokens_seen": 31968544, + "step": 47425 + }, + { + "epoch": 1.158722790902206, + "grad_norm": 0.10641762614250183, + "learning_rate": 1.8961186680654455e-06, + "loss": 0.0383, + "num_input_tokens_seen": 31971872, + "step": 47430 + }, + { + "epoch": 1.1588449417340532, + "grad_norm": 0.497207909822464, + "learning_rate": 1.896080817369766e-06, + "loss": 0.0883, + "num_input_tokens_seen": 31975072, + "step": 47435 + }, + { + "epoch": 1.1589670925659004, + "grad_norm": 0.15804211795330048, + "learning_rate": 1.8960429601575453e-06, + "loss": 0.1452, + "num_input_tokens_seen": 31978336, + "step": 47440 + }, + { + "epoch": 1.1590892433977475, + "grad_norm": 3.5672736167907715, + "learning_rate": 1.8960050964290595e-06, + "loss": 0.0778, + "num_input_tokens_seen": 31981664, + "step": 47445 + }, + { + "epoch": 1.1592113942295947, + "grad_norm": 62.97404479980469, + "learning_rate": 1.8959672261845836e-06, + "loss": 0.0991, + "num_input_tokens_seen": 31984928, + "step": 47450 + }, + { + "epoch": 1.159333545061442, + "grad_norm": 0.9200723171234131, + "learning_rate": 1.8959293494243931e-06, + "loss": 0.0803, + "num_input_tokens_seen": 31988448, + "step": 47455 + }, + { + "epoch": 1.1594556958932891, + "grad_norm": 4.727555274963379, + "learning_rate": 1.8958914661487632e-06, + "loss": 0.086, + "num_input_tokens_seen": 31991968, + "step": 47460 + }, + { + "epoch": 1.1595778467251363, + "grad_norm": 1.4091517925262451, + "learning_rate": 1.89585357635797e-06, + "loss": 0.0076, + "num_input_tokens_seen": 31995296, + "step": 47465 + }, + { + "epoch": 1.1596999975569833, + "grad_norm": 0.4299665093421936, + "learning_rate": 1.8958156800522884e-06, + "loss": 0.0947, + "num_input_tokens_seen": 31998688, + "step": 47470 + }, + { + "epoch": 1.1598221483888305, + "grad_norm": 0.6416081190109253, + "learning_rate": 1.8957777772319942e-06, + "loss": 0.0526, + "num_input_tokens_seen": 32001888, + "step": 47475 + }, + { + "epoch": 1.1599442992206777, + "grad_norm": 1.7206716537475586, + "learning_rate": 1.895739867897363e-06, + "loss": 0.0028, + "num_input_tokens_seen": 32005152, + "step": 47480 + }, + { + "epoch": 1.1600664500525248, + "grad_norm": 28.344221115112305, + "learning_rate": 1.8957019520486705e-06, + "loss": 0.243, + "num_input_tokens_seen": 32008480, + "step": 47485 + }, + { + "epoch": 1.160188600884372, + "grad_norm": 16.08212661743164, + "learning_rate": 1.8956640296861928e-06, + "loss": 0.0791, + "num_input_tokens_seen": 32011808, + "step": 47490 + }, + { + "epoch": 1.1603107517162192, + "grad_norm": 0.15828566253185272, + "learning_rate": 1.895626100810205e-06, + "loss": 0.0302, + "num_input_tokens_seen": 32014880, + "step": 47495 + }, + { + "epoch": 1.1604329025480664, + "grad_norm": 108.21931457519531, + "learning_rate": 1.8955881654209835e-06, + "loss": 0.1028, + "num_input_tokens_seen": 32018272, + "step": 47500 + }, + { + "epoch": 1.1605550533799136, + "grad_norm": 1.4304190874099731, + "learning_rate": 1.8955502235188042e-06, + "loss": 0.0414, + "num_input_tokens_seen": 32021728, + "step": 47505 + }, + { + "epoch": 1.1606772042117606, + "grad_norm": 0.02215048484504223, + "learning_rate": 1.8955122751039424e-06, + "loss": 0.0638, + "num_input_tokens_seen": 32024672, + "step": 47510 + }, + { + "epoch": 1.1607993550436078, + "grad_norm": 2.74535870552063, + "learning_rate": 1.8954743201766747e-06, + "loss": 0.0016, + "num_input_tokens_seen": 32027936, + "step": 47515 + }, + { + "epoch": 1.160921505875455, + "grad_norm": 2.150292158126831, + "learning_rate": 1.8954363587372768e-06, + "loss": 0.0406, + "num_input_tokens_seen": 32030816, + "step": 47520 + }, + { + "epoch": 1.1610436567073021, + "grad_norm": 143.7350616455078, + "learning_rate": 1.895398390786025e-06, + "loss": 0.0563, + "num_input_tokens_seen": 32034080, + "step": 47525 + }, + { + "epoch": 1.1611658075391493, + "grad_norm": 83.70098876953125, + "learning_rate": 1.895360416323195e-06, + "loss": 0.0666, + "num_input_tokens_seen": 32037152, + "step": 47530 + }, + { + "epoch": 1.1612879583709965, + "grad_norm": 35.97445297241211, + "learning_rate": 1.8953224353490636e-06, + "loss": 0.0549, + "num_input_tokens_seen": 32040608, + "step": 47535 + }, + { + "epoch": 1.1614101092028437, + "grad_norm": 11.100581169128418, + "learning_rate": 1.8952844478639064e-06, + "loss": 0.0822, + "num_input_tokens_seen": 32043808, + "step": 47540 + }, + { + "epoch": 1.161532260034691, + "grad_norm": 0.08772708475589752, + "learning_rate": 1.8952464538679997e-06, + "loss": 0.0543, + "num_input_tokens_seen": 32047072, + "step": 47545 + }, + { + "epoch": 1.161654410866538, + "grad_norm": 0.9782987833023071, + "learning_rate": 1.8952084533616203e-06, + "loss": 0.095, + "num_input_tokens_seen": 32050528, + "step": 47550 + }, + { + "epoch": 1.1617765616983853, + "grad_norm": 14.846198081970215, + "learning_rate": 1.8951704463450442e-06, + "loss": 0.1886, + "num_input_tokens_seen": 32054176, + "step": 47555 + }, + { + "epoch": 1.1618987125302322, + "grad_norm": 25.585895538330078, + "learning_rate": 1.8951324328185478e-06, + "loss": 0.0999, + "num_input_tokens_seen": 32057824, + "step": 47560 + }, + { + "epoch": 1.1620208633620794, + "grad_norm": 0.7053946256637573, + "learning_rate": 1.8950944127824076e-06, + "loss": 0.0235, + "num_input_tokens_seen": 32061664, + "step": 47565 + }, + { + "epoch": 1.1621430141939266, + "grad_norm": 0.5136181116104126, + "learning_rate": 1.8950563862369e-06, + "loss": 0.0019, + "num_input_tokens_seen": 32065376, + "step": 47570 + }, + { + "epoch": 1.1622651650257738, + "grad_norm": 0.23183301091194153, + "learning_rate": 1.8950183531823019e-06, + "loss": 0.1614, + "num_input_tokens_seen": 32068704, + "step": 47575 + }, + { + "epoch": 1.162387315857621, + "grad_norm": 0.27271634340286255, + "learning_rate": 1.8949803136188894e-06, + "loss": 0.0803, + "num_input_tokens_seen": 32072096, + "step": 47580 + }, + { + "epoch": 1.1625094666894682, + "grad_norm": 0.2536061406135559, + "learning_rate": 1.894942267546939e-06, + "loss": 0.0427, + "num_input_tokens_seen": 32076320, + "step": 47585 + }, + { + "epoch": 1.1626316175213154, + "grad_norm": 148.37466430664062, + "learning_rate": 1.8949042149667283e-06, + "loss": 0.1575, + "num_input_tokens_seen": 32079328, + "step": 47590 + }, + { + "epoch": 1.1627537683531626, + "grad_norm": 1.8467384576797485, + "learning_rate": 1.894866155878533e-06, + "loss": 0.0022, + "num_input_tokens_seen": 32083360, + "step": 47595 + }, + { + "epoch": 1.1628759191850095, + "grad_norm": 8.60280990600586, + "learning_rate": 1.8948280902826306e-06, + "loss": 0.061, + "num_input_tokens_seen": 32086752, + "step": 47600 + }, + { + "epoch": 1.1629980700168567, + "grad_norm": 0.04383387789130211, + "learning_rate": 1.8947900181792974e-06, + "loss": 0.006, + "num_input_tokens_seen": 32089568, + "step": 47605 + }, + { + "epoch": 1.163120220848704, + "grad_norm": 1.2437485456466675, + "learning_rate": 1.8947519395688109e-06, + "loss": 0.0014, + "num_input_tokens_seen": 32093280, + "step": 47610 + }, + { + "epoch": 1.1632423716805511, + "grad_norm": 12.266228675842285, + "learning_rate": 1.8947138544514473e-06, + "loss": 0.1059, + "num_input_tokens_seen": 32096480, + "step": 47615 + }, + { + "epoch": 1.1633645225123983, + "grad_norm": 26.271141052246094, + "learning_rate": 1.894675762827484e-06, + "loss": 0.0932, + "num_input_tokens_seen": 32099744, + "step": 47620 + }, + { + "epoch": 1.1634866733442455, + "grad_norm": 8.145895004272461, + "learning_rate": 1.894637664697198e-06, + "loss": 0.2224, + "num_input_tokens_seen": 32103200, + "step": 47625 + }, + { + "epoch": 1.1636088241760927, + "grad_norm": 26.563020706176758, + "learning_rate": 1.8945995600608662e-06, + "loss": 0.0968, + "num_input_tokens_seen": 32106592, + "step": 47630 + }, + { + "epoch": 1.1637309750079399, + "grad_norm": 58.50276184082031, + "learning_rate": 1.8945614489187658e-06, + "loss": 0.1202, + "num_input_tokens_seen": 32109920, + "step": 47635 + }, + { + "epoch": 1.163853125839787, + "grad_norm": 4.910950660705566, + "learning_rate": 1.8945233312711739e-06, + "loss": 0.079, + "num_input_tokens_seen": 32113376, + "step": 47640 + }, + { + "epoch": 1.1639752766716343, + "grad_norm": 75.29106140136719, + "learning_rate": 1.8944852071183676e-06, + "loss": 0.0071, + "num_input_tokens_seen": 32117024, + "step": 47645 + }, + { + "epoch": 1.1640974275034812, + "grad_norm": 0.8739251494407654, + "learning_rate": 1.8944470764606247e-06, + "loss": 0.0531, + "num_input_tokens_seen": 32120224, + "step": 47650 + }, + { + "epoch": 1.1642195783353284, + "grad_norm": 0.44417768716812134, + "learning_rate": 1.8944089392982216e-06, + "loss": 0.0014, + "num_input_tokens_seen": 32123744, + "step": 47655 + }, + { + "epoch": 1.1643417291671756, + "grad_norm": 8.347487449645996, + "learning_rate": 1.8943707956314364e-06, + "loss": 0.1023, + "num_input_tokens_seen": 32126880, + "step": 47660 + }, + { + "epoch": 1.1644638799990228, + "grad_norm": 0.23629111051559448, + "learning_rate": 1.8943326454605462e-06, + "loss": 0.0043, + "num_input_tokens_seen": 32130336, + "step": 47665 + }, + { + "epoch": 1.16458603083087, + "grad_norm": 0.08674504607915878, + "learning_rate": 1.8942944887858286e-06, + "loss": 0.0538, + "num_input_tokens_seen": 32133728, + "step": 47670 + }, + { + "epoch": 1.1647081816627172, + "grad_norm": 4.8898186683654785, + "learning_rate": 1.8942563256075607e-06, + "loss": 0.0024, + "num_input_tokens_seen": 32136864, + "step": 47675 + }, + { + "epoch": 1.1648303324945644, + "grad_norm": 1.8654694557189941, + "learning_rate": 1.8942181559260204e-06, + "loss": 0.0203, + "num_input_tokens_seen": 32140448, + "step": 47680 + }, + { + "epoch": 1.1649524833264115, + "grad_norm": 0.09039192646741867, + "learning_rate": 1.894179979741485e-06, + "loss": 0.1419, + "num_input_tokens_seen": 32143712, + "step": 47685 + }, + { + "epoch": 1.1650746341582585, + "grad_norm": 12.765069961547852, + "learning_rate": 1.8941417970542324e-06, + "loss": 0.1193, + "num_input_tokens_seen": 32147360, + "step": 47690 + }, + { + "epoch": 1.1651967849901057, + "grad_norm": 0.030316416174173355, + "learning_rate": 1.8941036078645403e-06, + "loss": 0.036, + "num_input_tokens_seen": 32151264, + "step": 47695 + }, + { + "epoch": 1.165318935821953, + "grad_norm": 0.04319749027490616, + "learning_rate": 1.894065412172686e-06, + "loss": 0.0627, + "num_input_tokens_seen": 32154784, + "step": 47700 + }, + { + "epoch": 1.1654410866538, + "grad_norm": 0.11999724805355072, + "learning_rate": 1.8940272099789476e-06, + "loss": 0.0013, + "num_input_tokens_seen": 32158048, + "step": 47705 + }, + { + "epoch": 1.1655632374856473, + "grad_norm": 8.9257173538208, + "learning_rate": 1.8939890012836032e-06, + "loss": 0.1186, + "num_input_tokens_seen": 32161376, + "step": 47710 + }, + { + "epoch": 1.1656853883174945, + "grad_norm": 0.18495075404644012, + "learning_rate": 1.89395078608693e-06, + "loss": 0.0024, + "num_input_tokens_seen": 32164448, + "step": 47715 + }, + { + "epoch": 1.1658075391493417, + "grad_norm": 8.209742546081543, + "learning_rate": 1.8939125643892062e-06, + "loss": 0.1112, + "num_input_tokens_seen": 32167648, + "step": 47720 + }, + { + "epoch": 1.1659296899811888, + "grad_norm": 22.166501998901367, + "learning_rate": 1.89387433619071e-06, + "loss": 0.0691, + "num_input_tokens_seen": 32170656, + "step": 47725 + }, + { + "epoch": 1.166051840813036, + "grad_norm": 0.02899726666510105, + "learning_rate": 1.893836101491719e-06, + "loss": 0.0222, + "num_input_tokens_seen": 32174048, + "step": 47730 + }, + { + "epoch": 1.1661739916448832, + "grad_norm": 32.36577224731445, + "learning_rate": 1.8937978602925114e-06, + "loss": 0.1264, + "num_input_tokens_seen": 32177632, + "step": 47735 + }, + { + "epoch": 1.1662961424767302, + "grad_norm": 13.967851638793945, + "learning_rate": 1.8937596125933654e-06, + "loss": 0.0959, + "num_input_tokens_seen": 32180704, + "step": 47740 + }, + { + "epoch": 1.1664182933085774, + "grad_norm": 0.299034982919693, + "learning_rate": 1.8937213583945595e-06, + "loss": 0.1216, + "num_input_tokens_seen": 32183904, + "step": 47745 + }, + { + "epoch": 1.1665404441404246, + "grad_norm": 0.8588279485702515, + "learning_rate": 1.8936830976963712e-06, + "loss": 0.1122, + "num_input_tokens_seen": 32186784, + "step": 47750 + }, + { + "epoch": 1.1666625949722718, + "grad_norm": 0.05717243626713753, + "learning_rate": 1.893644830499079e-06, + "loss": 0.0621, + "num_input_tokens_seen": 32190304, + "step": 47755 + }, + { + "epoch": 1.166784745804119, + "grad_norm": 2.23378849029541, + "learning_rate": 1.8936065568029614e-06, + "loss": 0.0988, + "num_input_tokens_seen": 32193504, + "step": 47760 + }, + { + "epoch": 1.1669068966359661, + "grad_norm": 11.925138473510742, + "learning_rate": 1.8935682766082964e-06, + "loss": 0.0803, + "num_input_tokens_seen": 32197216, + "step": 47765 + }, + { + "epoch": 1.1670290474678133, + "grad_norm": 382.5774841308594, + "learning_rate": 1.8935299899153625e-06, + "loss": 0.1822, + "num_input_tokens_seen": 32200672, + "step": 47770 + }, + { + "epoch": 1.1671511982996603, + "grad_norm": 0.1967589259147644, + "learning_rate": 1.8934916967244386e-06, + "loss": 0.0734, + "num_input_tokens_seen": 32203872, + "step": 47775 + }, + { + "epoch": 1.1672733491315075, + "grad_norm": 0.02376456931233406, + "learning_rate": 1.8934533970358022e-06, + "loss": 0.1033, + "num_input_tokens_seen": 32207776, + "step": 47780 + }, + { + "epoch": 1.1673954999633547, + "grad_norm": 14.840989112854004, + "learning_rate": 1.8934150908497327e-06, + "loss": 0.2013, + "num_input_tokens_seen": 32211168, + "step": 47785 + }, + { + "epoch": 1.1675176507952019, + "grad_norm": 0.14429239928722382, + "learning_rate": 1.8933767781665085e-06, + "loss": 0.0334, + "num_input_tokens_seen": 32214432, + "step": 47790 + }, + { + "epoch": 1.167639801627049, + "grad_norm": 8.509486198425293, + "learning_rate": 1.8933384589864077e-06, + "loss": 0.154, + "num_input_tokens_seen": 32217568, + "step": 47795 + }, + { + "epoch": 1.1677619524588962, + "grad_norm": 0.18389767408370972, + "learning_rate": 1.8933001333097094e-06, + "loss": 0.1517, + "num_input_tokens_seen": 32220832, + "step": 47800 + }, + { + "epoch": 1.1678841032907434, + "grad_norm": 178.11093139648438, + "learning_rate": 1.8932618011366922e-06, + "loss": 0.155, + "num_input_tokens_seen": 32224480, + "step": 47805 + }, + { + "epoch": 1.1680062541225906, + "grad_norm": 26.663612365722656, + "learning_rate": 1.8932234624676356e-06, + "loss": 0.167, + "num_input_tokens_seen": 32227680, + "step": 47810 + }, + { + "epoch": 1.1681284049544378, + "grad_norm": 0.439523309469223, + "learning_rate": 1.893185117302817e-06, + "loss": 0.0336, + "num_input_tokens_seen": 32231136, + "step": 47815 + }, + { + "epoch": 1.168250555786285, + "grad_norm": 10.76060962677002, + "learning_rate": 1.8931467656425163e-06, + "loss": 0.0647, + "num_input_tokens_seen": 32234336, + "step": 47820 + }, + { + "epoch": 1.168372706618132, + "grad_norm": 0.914696991443634, + "learning_rate": 1.8931084074870118e-06, + "loss": 0.0296, + "num_input_tokens_seen": 32237856, + "step": 47825 + }, + { + "epoch": 1.1684948574499792, + "grad_norm": 0.30933263897895813, + "learning_rate": 1.8930700428365832e-06, + "loss": 0.0017, + "num_input_tokens_seen": 32241440, + "step": 47830 + }, + { + "epoch": 1.1686170082818264, + "grad_norm": 14.791959762573242, + "learning_rate": 1.8930316716915087e-06, + "loss": 0.0681, + "num_input_tokens_seen": 32244512, + "step": 47835 + }, + { + "epoch": 1.1687391591136735, + "grad_norm": 8.420495986938477, + "learning_rate": 1.892993294052068e-06, + "loss": 0.0032, + "num_input_tokens_seen": 32247968, + "step": 47840 + }, + { + "epoch": 1.1688613099455207, + "grad_norm": 0.24798475205898285, + "learning_rate": 1.8929549099185396e-06, + "loss": 0.0858, + "num_input_tokens_seen": 32251104, + "step": 47845 + }, + { + "epoch": 1.168983460777368, + "grad_norm": 36.27674865722656, + "learning_rate": 1.892916519291203e-06, + "loss": 0.0914, + "num_input_tokens_seen": 32254432, + "step": 47850 + }, + { + "epoch": 1.1691056116092151, + "grad_norm": 0.011220994405448437, + "learning_rate": 1.8928781221703374e-06, + "loss": 0.0972, + "num_input_tokens_seen": 32258080, + "step": 47855 + }, + { + "epoch": 1.1692277624410623, + "grad_norm": 0.1429062783718109, + "learning_rate": 1.8928397185562217e-06, + "loss": 0.0008, + "num_input_tokens_seen": 32261152, + "step": 47860 + }, + { + "epoch": 1.1693499132729093, + "grad_norm": 25.986040115356445, + "learning_rate": 1.8928013084491354e-06, + "loss": 0.0328, + "num_input_tokens_seen": 32264288, + "step": 47865 + }, + { + "epoch": 1.1694720641047565, + "grad_norm": 0.3018045127391815, + "learning_rate": 1.8927628918493581e-06, + "loss": 0.228, + "num_input_tokens_seen": 32268256, + "step": 47870 + }, + { + "epoch": 1.1695942149366036, + "grad_norm": 0.09300398826599121, + "learning_rate": 1.8927244687571688e-06, + "loss": 0.0219, + "num_input_tokens_seen": 32271776, + "step": 47875 + }, + { + "epoch": 1.1697163657684508, + "grad_norm": 11.6989164352417, + "learning_rate": 1.8926860391728472e-06, + "loss": 0.1846, + "num_input_tokens_seen": 32275040, + "step": 47880 + }, + { + "epoch": 1.169838516600298, + "grad_norm": 134.87442016601562, + "learning_rate": 1.8926476030966724e-06, + "loss": 0.1796, + "num_input_tokens_seen": 32278368, + "step": 47885 + }, + { + "epoch": 1.1699606674321452, + "grad_norm": 1.2735921144485474, + "learning_rate": 1.8926091605289245e-06, + "loss": 0.0011, + "num_input_tokens_seen": 32281696, + "step": 47890 + }, + { + "epoch": 1.1700828182639924, + "grad_norm": 0.2746407389640808, + "learning_rate": 1.8925707114698823e-06, + "loss": 0.1402, + "num_input_tokens_seen": 32284960, + "step": 47895 + }, + { + "epoch": 1.1702049690958396, + "grad_norm": 0.2347487211227417, + "learning_rate": 1.892532255919826e-06, + "loss": 0.0995, + "num_input_tokens_seen": 32288352, + "step": 47900 + }, + { + "epoch": 1.1703271199276868, + "grad_norm": 242.01580810546875, + "learning_rate": 1.8924937938790348e-06, + "loss": 0.1636, + "num_input_tokens_seen": 32291744, + "step": 47905 + }, + { + "epoch": 1.170449270759534, + "grad_norm": 0.5199395418167114, + "learning_rate": 1.8924553253477891e-06, + "loss": 0.11, + "num_input_tokens_seen": 32295136, + "step": 47910 + }, + { + "epoch": 1.170571421591381, + "grad_norm": 36.90918731689453, + "learning_rate": 1.8924168503263682e-06, + "loss": 0.1014, + "num_input_tokens_seen": 32298528, + "step": 47915 + }, + { + "epoch": 1.1706935724232281, + "grad_norm": 9.800524711608887, + "learning_rate": 1.8923783688150517e-06, + "loss": 0.0434, + "num_input_tokens_seen": 32301600, + "step": 47920 + }, + { + "epoch": 1.1708157232550753, + "grad_norm": 61.35971450805664, + "learning_rate": 1.8923398808141195e-06, + "loss": 0.0966, + "num_input_tokens_seen": 32304544, + "step": 47925 + }, + { + "epoch": 1.1709378740869225, + "grad_norm": 1.0777499675750732, + "learning_rate": 1.8923013863238523e-06, + "loss": 0.1141, + "num_input_tokens_seen": 32308384, + "step": 47930 + }, + { + "epoch": 1.1710600249187697, + "grad_norm": 0.5199394822120667, + "learning_rate": 1.8922628853445288e-06, + "loss": 0.0021, + "num_input_tokens_seen": 32312160, + "step": 47935 + }, + { + "epoch": 1.171182175750617, + "grad_norm": 0.17201387882232666, + "learning_rate": 1.89222437787643e-06, + "loss": 0.2046, + "num_input_tokens_seen": 32315296, + "step": 47940 + }, + { + "epoch": 1.171304326582464, + "grad_norm": 9.953340530395508, + "learning_rate": 1.8921858639198354e-06, + "loss": 0.1025, + "num_input_tokens_seen": 32318304, + "step": 47945 + }, + { + "epoch": 1.1714264774143113, + "grad_norm": 6.807833194732666, + "learning_rate": 1.8921473434750254e-06, + "loss": 0.0589, + "num_input_tokens_seen": 32321632, + "step": 47950 + }, + { + "epoch": 1.1715486282461582, + "grad_norm": 0.25497400760650635, + "learning_rate": 1.8921088165422797e-06, + "loss": 0.1001, + "num_input_tokens_seen": 32325472, + "step": 47955 + }, + { + "epoch": 1.1716707790780054, + "grad_norm": 2.2793524265289307, + "learning_rate": 1.8920702831218787e-06, + "loss": 0.055, + "num_input_tokens_seen": 32328736, + "step": 47960 + }, + { + "epoch": 1.1717929299098526, + "grad_norm": 0.5788264274597168, + "learning_rate": 1.892031743214103e-06, + "loss": 0.1018, + "num_input_tokens_seen": 32331744, + "step": 47965 + }, + { + "epoch": 1.1719150807416998, + "grad_norm": 0.24290089309215546, + "learning_rate": 1.8919931968192322e-06, + "loss": 0.0292, + "num_input_tokens_seen": 32335264, + "step": 47970 + }, + { + "epoch": 1.172037231573547, + "grad_norm": 0.3527112305164337, + "learning_rate": 1.8919546439375468e-06, + "loss": 0.0015, + "num_input_tokens_seen": 32338784, + "step": 47975 + }, + { + "epoch": 1.1721593824053942, + "grad_norm": 0.06581027805805206, + "learning_rate": 1.8919160845693278e-06, + "loss": 0.0441, + "num_input_tokens_seen": 32341728, + "step": 47980 + }, + { + "epoch": 1.1722815332372414, + "grad_norm": 148.33258056640625, + "learning_rate": 1.891877518714855e-06, + "loss": 0.0108, + "num_input_tokens_seen": 32344992, + "step": 47985 + }, + { + "epoch": 1.1724036840690886, + "grad_norm": 0.21731750667095184, + "learning_rate": 1.891838946374409e-06, + "loss": 0.0336, + "num_input_tokens_seen": 32348192, + "step": 47990 + }, + { + "epoch": 1.1725258349009358, + "grad_norm": 122.77140808105469, + "learning_rate": 1.8918003675482702e-06, + "loss": 0.1033, + "num_input_tokens_seen": 32351136, + "step": 47995 + }, + { + "epoch": 1.172647985732783, + "grad_norm": 0.18697240948677063, + "learning_rate": 1.8917617822367193e-06, + "loss": 0.0012, + "num_input_tokens_seen": 32354464, + "step": 48000 + }, + { + "epoch": 1.17277013656463, + "grad_norm": 4.480820655822754, + "learning_rate": 1.8917231904400369e-06, + "loss": 0.1176, + "num_input_tokens_seen": 32357664, + "step": 48005 + }, + { + "epoch": 1.172892287396477, + "grad_norm": 0.833780825138092, + "learning_rate": 1.8916845921585036e-06, + "loss": 0.0013, + "num_input_tokens_seen": 32360992, + "step": 48010 + }, + { + "epoch": 1.1730144382283243, + "grad_norm": 24.22505760192871, + "learning_rate": 1.8916459873924e-06, + "loss": 0.1414, + "num_input_tokens_seen": 32364256, + "step": 48015 + }, + { + "epoch": 1.1731365890601715, + "grad_norm": 108.00692749023438, + "learning_rate": 1.8916073761420073e-06, + "loss": 0.0688, + "num_input_tokens_seen": 32367904, + "step": 48020 + }, + { + "epoch": 1.1732587398920187, + "grad_norm": 0.07697835564613342, + "learning_rate": 1.8915687584076054e-06, + "loss": 0.0011, + "num_input_tokens_seen": 32371232, + "step": 48025 + }, + { + "epoch": 1.1733808907238659, + "grad_norm": 0.14206215739250183, + "learning_rate": 1.8915301341894762e-06, + "loss": 0.0009, + "num_input_tokens_seen": 32374496, + "step": 48030 + }, + { + "epoch": 1.173503041555713, + "grad_norm": 8.598139762878418, + "learning_rate": 1.8914915034878997e-06, + "loss": 0.1076, + "num_input_tokens_seen": 32377696, + "step": 48035 + }, + { + "epoch": 1.1736251923875602, + "grad_norm": 0.032318364828825, + "learning_rate": 1.8914528663031575e-06, + "loss": 0.048, + "num_input_tokens_seen": 32381280, + "step": 48040 + }, + { + "epoch": 1.1737473432194072, + "grad_norm": 6.112025737762451, + "learning_rate": 1.89141422263553e-06, + "loss": 0.1592, + "num_input_tokens_seen": 32384992, + "step": 48045 + }, + { + "epoch": 1.1738694940512544, + "grad_norm": 13.649267196655273, + "learning_rate": 1.8913755724852988e-06, + "loss": 0.0489, + "num_input_tokens_seen": 32388576, + "step": 48050 + }, + { + "epoch": 1.1739916448831016, + "grad_norm": 0.0673687532544136, + "learning_rate": 1.8913369158527447e-06, + "loss": 0.0907, + "num_input_tokens_seen": 32391584, + "step": 48055 + }, + { + "epoch": 1.1741137957149488, + "grad_norm": 0.7464065551757812, + "learning_rate": 1.8912982527381486e-06, + "loss": 0.0203, + "num_input_tokens_seen": 32394848, + "step": 48060 + }, + { + "epoch": 1.174235946546796, + "grad_norm": 0.08196679502725601, + "learning_rate": 1.8912595831417919e-06, + "loss": 0.1048, + "num_input_tokens_seen": 32397920, + "step": 48065 + }, + { + "epoch": 1.1743580973786432, + "grad_norm": 9.292841911315918, + "learning_rate": 1.8912209070639558e-06, + "loss": 0.0444, + "num_input_tokens_seen": 32401184, + "step": 48070 + }, + { + "epoch": 1.1744802482104904, + "grad_norm": 0.3272796869277954, + "learning_rate": 1.8911822245049213e-06, + "loss": 0.1023, + "num_input_tokens_seen": 32404448, + "step": 48075 + }, + { + "epoch": 1.1746023990423375, + "grad_norm": 0.04792598634958267, + "learning_rate": 1.8911435354649705e-06, + "loss": 0.0377, + "num_input_tokens_seen": 32407584, + "step": 48080 + }, + { + "epoch": 1.1747245498741847, + "grad_norm": 0.18216323852539062, + "learning_rate": 1.8911048399443838e-06, + "loss": 0.0557, + "num_input_tokens_seen": 32411360, + "step": 48085 + }, + { + "epoch": 1.174846700706032, + "grad_norm": 37.21038055419922, + "learning_rate": 1.8910661379434432e-06, + "loss": 0.1424, + "num_input_tokens_seen": 32414816, + "step": 48090 + }, + { + "epoch": 1.1749688515378789, + "grad_norm": 0.2778770923614502, + "learning_rate": 1.89102742946243e-06, + "loss": 0.0771, + "num_input_tokens_seen": 32417888, + "step": 48095 + }, + { + "epoch": 1.175091002369726, + "grad_norm": 0.4837040603160858, + "learning_rate": 1.8909887145016257e-06, + "loss": 0.1183, + "num_input_tokens_seen": 32421472, + "step": 48100 + }, + { + "epoch": 1.1752131532015733, + "grad_norm": 0.7100688219070435, + "learning_rate": 1.8909499930613118e-06, + "loss": 0.1321, + "num_input_tokens_seen": 32424544, + "step": 48105 + }, + { + "epoch": 1.1753353040334205, + "grad_norm": 0.995244562625885, + "learning_rate": 1.8909112651417699e-06, + "loss": 0.0028, + "num_input_tokens_seen": 32427616, + "step": 48110 + }, + { + "epoch": 1.1754574548652676, + "grad_norm": 9.889370918273926, + "learning_rate": 1.8908725307432816e-06, + "loss": 0.1127, + "num_input_tokens_seen": 32430624, + "step": 48115 + }, + { + "epoch": 1.1755796056971148, + "grad_norm": 1.033982515335083, + "learning_rate": 1.8908337898661287e-06, + "loss": 0.0027, + "num_input_tokens_seen": 32434208, + "step": 48120 + }, + { + "epoch": 1.175701756528962, + "grad_norm": 0.1141548901796341, + "learning_rate": 1.8907950425105927e-06, + "loss": 0.0397, + "num_input_tokens_seen": 32437664, + "step": 48125 + }, + { + "epoch": 1.1758239073608092, + "grad_norm": 23.38401222229004, + "learning_rate": 1.8907562886769557e-06, + "loss": 0.1019, + "num_input_tokens_seen": 32441248, + "step": 48130 + }, + { + "epoch": 1.1759460581926562, + "grad_norm": 0.01714020036160946, + "learning_rate": 1.8907175283654992e-06, + "loss": 0.0376, + "num_input_tokens_seen": 32444320, + "step": 48135 + }, + { + "epoch": 1.1760682090245034, + "grad_norm": 0.18078669905662537, + "learning_rate": 1.8906787615765055e-06, + "loss": 0.0544, + "num_input_tokens_seen": 32447648, + "step": 48140 + }, + { + "epoch": 1.1761903598563506, + "grad_norm": 12.950847625732422, + "learning_rate": 1.8906399883102565e-06, + "loss": 0.205, + "num_input_tokens_seen": 32450848, + "step": 48145 + }, + { + "epoch": 1.1763125106881978, + "grad_norm": 14.540743827819824, + "learning_rate": 1.8906012085670336e-06, + "loss": 0.076, + "num_input_tokens_seen": 32454240, + "step": 48150 + }, + { + "epoch": 1.176434661520045, + "grad_norm": 0.9453197717666626, + "learning_rate": 1.890562422347119e-06, + "loss": 0.1783, + "num_input_tokens_seen": 32457440, + "step": 48155 + }, + { + "epoch": 1.1765568123518921, + "grad_norm": 7.787158489227295, + "learning_rate": 1.8905236296507953e-06, + "loss": 0.2149, + "num_input_tokens_seen": 32460704, + "step": 48160 + }, + { + "epoch": 1.1766789631837393, + "grad_norm": 0.22460059821605682, + "learning_rate": 1.890484830478344e-06, + "loss": 0.1459, + "num_input_tokens_seen": 32464800, + "step": 48165 + }, + { + "epoch": 1.1768011140155865, + "grad_norm": 64.1084976196289, + "learning_rate": 1.8904460248300478e-06, + "loss": 0.0858, + "num_input_tokens_seen": 32468192, + "step": 48170 + }, + { + "epoch": 1.1769232648474337, + "grad_norm": 0.4196472764015198, + "learning_rate": 1.8904072127061884e-06, + "loss": 0.0026, + "num_input_tokens_seen": 32471072, + "step": 48175 + }, + { + "epoch": 1.177045415679281, + "grad_norm": 0.14943639934062958, + "learning_rate": 1.8903683941070483e-06, + "loss": 0.0297, + "num_input_tokens_seen": 32473952, + "step": 48180 + }, + { + "epoch": 1.1771675665111279, + "grad_norm": 2.338526487350464, + "learning_rate": 1.8903295690329097e-06, + "loss": 0.0313, + "num_input_tokens_seen": 32476896, + "step": 48185 + }, + { + "epoch": 1.177289717342975, + "grad_norm": 0.20392240583896637, + "learning_rate": 1.890290737484055e-06, + "loss": 0.0741, + "num_input_tokens_seen": 32480224, + "step": 48190 + }, + { + "epoch": 1.1774118681748222, + "grad_norm": 9.235472679138184, + "learning_rate": 1.890251899460767e-06, + "loss": 0.0477, + "num_input_tokens_seen": 32483872, + "step": 48195 + }, + { + "epoch": 1.1775340190066694, + "grad_norm": 27.74861717224121, + "learning_rate": 1.8902130549633272e-06, + "loss": 0.1153, + "num_input_tokens_seen": 32487072, + "step": 48200 + }, + { + "epoch": 1.1776561698385166, + "grad_norm": 0.10655547678470612, + "learning_rate": 1.8901742039920188e-06, + "loss": 0.0391, + "num_input_tokens_seen": 32490272, + "step": 48205 + }, + { + "epoch": 1.1777783206703638, + "grad_norm": 104.34963989257812, + "learning_rate": 1.8901353465471242e-06, + "loss": 0.1538, + "num_input_tokens_seen": 32493536, + "step": 48210 + }, + { + "epoch": 1.177900471502211, + "grad_norm": 0.06965136528015137, + "learning_rate": 1.8900964826289258e-06, + "loss": 0.0322, + "num_input_tokens_seen": 32497248, + "step": 48215 + }, + { + "epoch": 1.1780226223340582, + "grad_norm": 0.0271898340433836, + "learning_rate": 1.8900576122377066e-06, + "loss": 0.1566, + "num_input_tokens_seen": 32500448, + "step": 48220 + }, + { + "epoch": 1.1781447731659052, + "grad_norm": 30.9471435546875, + "learning_rate": 1.8900187353737488e-06, + "loss": 0.0295, + "num_input_tokens_seen": 32503648, + "step": 48225 + }, + { + "epoch": 1.1782669239977523, + "grad_norm": 0.1662287563085556, + "learning_rate": 1.8899798520373356e-06, + "loss": 0.001, + "num_input_tokens_seen": 32507104, + "step": 48230 + }, + { + "epoch": 1.1783890748295995, + "grad_norm": 111.50906372070312, + "learning_rate": 1.8899409622287491e-06, + "loss": 0.2231, + "num_input_tokens_seen": 32510560, + "step": 48235 + }, + { + "epoch": 1.1785112256614467, + "grad_norm": 11.14577579498291, + "learning_rate": 1.8899020659482732e-06, + "loss": 0.1259, + "num_input_tokens_seen": 32514144, + "step": 48240 + }, + { + "epoch": 1.178633376493294, + "grad_norm": 17.156038284301758, + "learning_rate": 1.88986316319619e-06, + "loss": 0.1139, + "num_input_tokens_seen": 32517472, + "step": 48245 + }, + { + "epoch": 1.178755527325141, + "grad_norm": 0.5925399661064148, + "learning_rate": 1.8898242539727823e-06, + "loss": 0.0018, + "num_input_tokens_seen": 32520992, + "step": 48250 + }, + { + "epoch": 1.1788776781569883, + "grad_norm": 17.069896697998047, + "learning_rate": 1.8897853382783332e-06, + "loss": 0.1008, + "num_input_tokens_seen": 32523936, + "step": 48255 + }, + { + "epoch": 1.1789998289888355, + "grad_norm": 0.23743368685245514, + "learning_rate": 1.8897464161131258e-06, + "loss": 0.0019, + "num_input_tokens_seen": 32527328, + "step": 48260 + }, + { + "epoch": 1.1791219798206827, + "grad_norm": 0.3088131546974182, + "learning_rate": 1.8897074874774435e-06, + "loss": 0.0286, + "num_input_tokens_seen": 32530720, + "step": 48265 + }, + { + "epoch": 1.1792441306525299, + "grad_norm": 0.4604056775569916, + "learning_rate": 1.8896685523715687e-06, + "loss": 0.187, + "num_input_tokens_seen": 32534176, + "step": 48270 + }, + { + "epoch": 1.1793662814843768, + "grad_norm": 0.2785622179508209, + "learning_rate": 1.8896296107957853e-06, + "loss": 0.0253, + "num_input_tokens_seen": 32537696, + "step": 48275 + }, + { + "epoch": 1.179488432316224, + "grad_norm": 22.758668899536133, + "learning_rate": 1.8895906627503756e-06, + "loss": 0.1362, + "num_input_tokens_seen": 32541024, + "step": 48280 + }, + { + "epoch": 1.1796105831480712, + "grad_norm": 0.6522172093391418, + "learning_rate": 1.8895517082356236e-06, + "loss": 0.1035, + "num_input_tokens_seen": 32544416, + "step": 48285 + }, + { + "epoch": 1.1797327339799184, + "grad_norm": 23.65367889404297, + "learning_rate": 1.8895127472518121e-06, + "loss": 0.1123, + "num_input_tokens_seen": 32547936, + "step": 48290 + }, + { + "epoch": 1.1798548848117656, + "grad_norm": 4.1731672286987305, + "learning_rate": 1.8894737797992249e-06, + "loss": 0.038, + "num_input_tokens_seen": 32551072, + "step": 48295 + }, + { + "epoch": 1.1799770356436128, + "grad_norm": 0.1166360080242157, + "learning_rate": 1.8894348058781451e-06, + "loss": 0.0423, + "num_input_tokens_seen": 32554400, + "step": 48300 + }, + { + "epoch": 1.18009918647546, + "grad_norm": 177.94476318359375, + "learning_rate": 1.8893958254888562e-06, + "loss": 0.0501, + "num_input_tokens_seen": 32557536, + "step": 48305 + }, + { + "epoch": 1.180221337307307, + "grad_norm": 13.788195610046387, + "learning_rate": 1.8893568386316414e-06, + "loss": 0.0472, + "num_input_tokens_seen": 32560928, + "step": 48310 + }, + { + "epoch": 1.1803434881391541, + "grad_norm": 0.22638006508350372, + "learning_rate": 1.8893178453067846e-06, + "loss": 0.0604, + "num_input_tokens_seen": 32565280, + "step": 48315 + }, + { + "epoch": 1.1804656389710013, + "grad_norm": 57.36815643310547, + "learning_rate": 1.8892788455145694e-06, + "loss": 0.0701, + "num_input_tokens_seen": 32568736, + "step": 48320 + }, + { + "epoch": 1.1805877898028485, + "grad_norm": 0.09134076535701752, + "learning_rate": 1.8892398392552788e-06, + "loss": 0.0004, + "num_input_tokens_seen": 32572512, + "step": 48325 + }, + { + "epoch": 1.1807099406346957, + "grad_norm": 18.480314254760742, + "learning_rate": 1.8892008265291975e-06, + "loss": 0.1615, + "num_input_tokens_seen": 32575904, + "step": 48330 + }, + { + "epoch": 1.1808320914665429, + "grad_norm": 0.061544980853796005, + "learning_rate": 1.8891618073366082e-06, + "loss": 0.0992, + "num_input_tokens_seen": 32579104, + "step": 48335 + }, + { + "epoch": 1.18095424229839, + "grad_norm": 0.22377759218215942, + "learning_rate": 1.8891227816777953e-06, + "loss": 0.0066, + "num_input_tokens_seen": 32582688, + "step": 48340 + }, + { + "epoch": 1.1810763931302373, + "grad_norm": 51.224544525146484, + "learning_rate": 1.8890837495530423e-06, + "loss": 0.1201, + "num_input_tokens_seen": 32585952, + "step": 48345 + }, + { + "epoch": 1.1811985439620845, + "grad_norm": 14.230652809143066, + "learning_rate": 1.889044710962633e-06, + "loss": 0.1483, + "num_input_tokens_seen": 32588896, + "step": 48350 + }, + { + "epoch": 1.1813206947939316, + "grad_norm": 0.10260359942913055, + "learning_rate": 1.8890056659068516e-06, + "loss": 0.1178, + "num_input_tokens_seen": 32592096, + "step": 48355 + }, + { + "epoch": 1.1814428456257786, + "grad_norm": 222.6650848388672, + "learning_rate": 1.888966614385982e-06, + "loss": 0.1299, + "num_input_tokens_seen": 32595552, + "step": 48360 + }, + { + "epoch": 1.1815649964576258, + "grad_norm": 0.18788084387779236, + "learning_rate": 1.8889275564003078e-06, + "loss": 0.0912, + "num_input_tokens_seen": 32599520, + "step": 48365 + }, + { + "epoch": 1.181687147289473, + "grad_norm": 0.09443093091249466, + "learning_rate": 1.8888884919501136e-06, + "loss": 0.0017, + "num_input_tokens_seen": 32602656, + "step": 48370 + }, + { + "epoch": 1.1818092981213202, + "grad_norm": 0.46310916543006897, + "learning_rate": 1.888849421035683e-06, + "loss": 0.0739, + "num_input_tokens_seen": 32605984, + "step": 48375 + }, + { + "epoch": 1.1819314489531674, + "grad_norm": 35.772621154785156, + "learning_rate": 1.8888103436573003e-06, + "loss": 0.1791, + "num_input_tokens_seen": 32609056, + "step": 48380 + }, + { + "epoch": 1.1820535997850146, + "grad_norm": 0.2312948852777481, + "learning_rate": 1.8887712598152498e-06, + "loss": 0.1844, + "num_input_tokens_seen": 32612576, + "step": 48385 + }, + { + "epoch": 1.1821757506168618, + "grad_norm": 0.7724999189376831, + "learning_rate": 1.8887321695098157e-06, + "loss": 0.0289, + "num_input_tokens_seen": 32615776, + "step": 48390 + }, + { + "epoch": 1.182297901448709, + "grad_norm": 0.4551694989204407, + "learning_rate": 1.8886930727412822e-06, + "loss": 0.0107, + "num_input_tokens_seen": 32618720, + "step": 48395 + }, + { + "epoch": 1.182420052280556, + "grad_norm": 55.0472412109375, + "learning_rate": 1.8886539695099338e-06, + "loss": 0.2006, + "num_input_tokens_seen": 32622368, + "step": 48400 + }, + { + "epoch": 1.182542203112403, + "grad_norm": 0.09480097144842148, + "learning_rate": 1.8886148598160542e-06, + "loss": 0.0016, + "num_input_tokens_seen": 32625952, + "step": 48405 + }, + { + "epoch": 1.1826643539442503, + "grad_norm": 13.491443634033203, + "learning_rate": 1.888575743659929e-06, + "loss": 0.0473, + "num_input_tokens_seen": 32629600, + "step": 48410 + }, + { + "epoch": 1.1827865047760975, + "grad_norm": 0.2457229048013687, + "learning_rate": 1.8885366210418415e-06, + "loss": 0.0073, + "num_input_tokens_seen": 32632608, + "step": 48415 + }, + { + "epoch": 1.1829086556079447, + "grad_norm": 0.18898482620716095, + "learning_rate": 1.8884974919620769e-06, + "loss": 0.0715, + "num_input_tokens_seen": 32635936, + "step": 48420 + }, + { + "epoch": 1.1830308064397919, + "grad_norm": 19.41193389892578, + "learning_rate": 1.8884583564209196e-06, + "loss": 0.1009, + "num_input_tokens_seen": 32639456, + "step": 48425 + }, + { + "epoch": 1.183152957271639, + "grad_norm": 106.54630279541016, + "learning_rate": 1.8884192144186541e-06, + "loss": 0.2983, + "num_input_tokens_seen": 32642848, + "step": 48430 + }, + { + "epoch": 1.1832751081034862, + "grad_norm": 12.161974906921387, + "learning_rate": 1.8883800659555652e-06, + "loss": 0.0824, + "num_input_tokens_seen": 32646176, + "step": 48435 + }, + { + "epoch": 1.1833972589353334, + "grad_norm": 26.162527084350586, + "learning_rate": 1.8883409110319372e-06, + "loss": 0.0827, + "num_input_tokens_seen": 32649632, + "step": 48440 + }, + { + "epoch": 1.1835194097671806, + "grad_norm": 33.377681732177734, + "learning_rate": 1.8883017496480553e-06, + "loss": 0.1353, + "num_input_tokens_seen": 32652832, + "step": 48445 + }, + { + "epoch": 1.1836415605990276, + "grad_norm": 53.93454360961914, + "learning_rate": 1.8882625818042043e-06, + "loss": 0.0216, + "num_input_tokens_seen": 32656096, + "step": 48450 + }, + { + "epoch": 1.1837637114308748, + "grad_norm": 0.2480727583169937, + "learning_rate": 1.888223407500669e-06, + "loss": 0.0541, + "num_input_tokens_seen": 32659488, + "step": 48455 + }, + { + "epoch": 1.183885862262722, + "grad_norm": 29.658166885375977, + "learning_rate": 1.8881842267377339e-06, + "loss": 0.1097, + "num_input_tokens_seen": 32662688, + "step": 48460 + }, + { + "epoch": 1.1840080130945692, + "grad_norm": 54.706947326660156, + "learning_rate": 1.8881450395156844e-06, + "loss": 0.1229, + "num_input_tokens_seen": 32665760, + "step": 48465 + }, + { + "epoch": 1.1841301639264163, + "grad_norm": 60.36311721801758, + "learning_rate": 1.888105845834805e-06, + "loss": 0.0777, + "num_input_tokens_seen": 32669024, + "step": 48470 + }, + { + "epoch": 1.1842523147582635, + "grad_norm": 0.33893075585365295, + "learning_rate": 1.8880666456953812e-06, + "loss": 0.0822, + "num_input_tokens_seen": 32672224, + "step": 48475 + }, + { + "epoch": 1.1843744655901107, + "grad_norm": 16.1857967376709, + "learning_rate": 1.8880274390976983e-06, + "loss": 0.0383, + "num_input_tokens_seen": 32675488, + "step": 48480 + }, + { + "epoch": 1.184496616421958, + "grad_norm": 0.5863775014877319, + "learning_rate": 1.8879882260420406e-06, + "loss": 0.0351, + "num_input_tokens_seen": 32678688, + "step": 48485 + }, + { + "epoch": 1.1846187672538049, + "grad_norm": 50.410255432128906, + "learning_rate": 1.8879490065286937e-06, + "loss": 0.1213, + "num_input_tokens_seen": 32682784, + "step": 48490 + }, + { + "epoch": 1.184740918085652, + "grad_norm": 1.918444037437439, + "learning_rate": 1.8879097805579428e-06, + "loss": 0.1368, + "num_input_tokens_seen": 32685856, + "step": 48495 + }, + { + "epoch": 1.1848630689174993, + "grad_norm": 0.1359431892633438, + "learning_rate": 1.8878705481300732e-06, + "loss": 0.0819, + "num_input_tokens_seen": 32689056, + "step": 48500 + }, + { + "epoch": 1.1849852197493465, + "grad_norm": 13.425424575805664, + "learning_rate": 1.88783130924537e-06, + "loss": 0.1993, + "num_input_tokens_seen": 32692384, + "step": 48505 + }, + { + "epoch": 1.1851073705811936, + "grad_norm": 118.53060150146484, + "learning_rate": 1.887792063904119e-06, + "loss": 0.0644, + "num_input_tokens_seen": 32695712, + "step": 48510 + }, + { + "epoch": 1.1852295214130408, + "grad_norm": 2.1681196689605713, + "learning_rate": 1.887752812106605e-06, + "loss": 0.1064, + "num_input_tokens_seen": 32698976, + "step": 48515 + }, + { + "epoch": 1.185351672244888, + "grad_norm": 16.27510643005371, + "learning_rate": 1.8877135538531139e-06, + "loss": 0.0749, + "num_input_tokens_seen": 32702880, + "step": 48520 + }, + { + "epoch": 1.1854738230767352, + "grad_norm": 1.0383045673370361, + "learning_rate": 1.887674289143931e-06, + "loss": 0.0552, + "num_input_tokens_seen": 32706464, + "step": 48525 + }, + { + "epoch": 1.1855959739085824, + "grad_norm": 0.9187676906585693, + "learning_rate": 1.8876350179793423e-06, + "loss": 0.1513, + "num_input_tokens_seen": 32709664, + "step": 48530 + }, + { + "epoch": 1.1857181247404296, + "grad_norm": 1.7384788990020752, + "learning_rate": 1.8875957403596328e-06, + "loss": 0.0842, + "num_input_tokens_seen": 32713184, + "step": 48535 + }, + { + "epoch": 1.1858402755722766, + "grad_norm": 0.10159554332494736, + "learning_rate": 1.8875564562850882e-06, + "loss": 0.039, + "num_input_tokens_seen": 32716448, + "step": 48540 + }, + { + "epoch": 1.1859624264041237, + "grad_norm": 0.4575258791446686, + "learning_rate": 1.8875171657559943e-06, + "loss": 0.0392, + "num_input_tokens_seen": 32720096, + "step": 48545 + }, + { + "epoch": 1.186084577235971, + "grad_norm": 105.02020263671875, + "learning_rate": 1.8874778687726369e-06, + "loss": 0.0228, + "num_input_tokens_seen": 32723488, + "step": 48550 + }, + { + "epoch": 1.1862067280678181, + "grad_norm": 38.00514221191406, + "learning_rate": 1.8874385653353018e-06, + "loss": 0.0652, + "num_input_tokens_seen": 32726752, + "step": 48555 + }, + { + "epoch": 1.1863288788996653, + "grad_norm": 18.732927322387695, + "learning_rate": 1.8873992554442748e-06, + "loss": 0.0803, + "num_input_tokens_seen": 32730720, + "step": 48560 + }, + { + "epoch": 1.1864510297315125, + "grad_norm": 0.14288055896759033, + "learning_rate": 1.8873599390998419e-06, + "loss": 0.0438, + "num_input_tokens_seen": 32733984, + "step": 48565 + }, + { + "epoch": 1.1865731805633597, + "grad_norm": 1.4553184509277344, + "learning_rate": 1.8873206163022886e-06, + "loss": 0.0943, + "num_input_tokens_seen": 32737120, + "step": 48570 + }, + { + "epoch": 1.1866953313952069, + "grad_norm": 30.531328201293945, + "learning_rate": 1.887281287051901e-06, + "loss": 0.0423, + "num_input_tokens_seen": 32740512, + "step": 48575 + }, + { + "epoch": 1.1868174822270539, + "grad_norm": 0.10857908427715302, + "learning_rate": 1.8872419513489652e-06, + "loss": 0.0007, + "num_input_tokens_seen": 32743392, + "step": 48580 + }, + { + "epoch": 1.186939633058901, + "grad_norm": 4.534030437469482, + "learning_rate": 1.8872026091937676e-06, + "loss": 0.0018, + "num_input_tokens_seen": 32746400, + "step": 48585 + }, + { + "epoch": 1.1870617838907482, + "grad_norm": 0.03740508109331131, + "learning_rate": 1.8871632605865939e-06, + "loss": 0.0158, + "num_input_tokens_seen": 32749536, + "step": 48590 + }, + { + "epoch": 1.1871839347225954, + "grad_norm": 0.11498308926820755, + "learning_rate": 1.8871239055277304e-06, + "loss": 0.0004, + "num_input_tokens_seen": 32752992, + "step": 48595 + }, + { + "epoch": 1.1873060855544426, + "grad_norm": 0.03428078442811966, + "learning_rate": 1.8870845440174632e-06, + "loss": 0.1389, + "num_input_tokens_seen": 32756384, + "step": 48600 + }, + { + "epoch": 1.1874282363862898, + "grad_norm": 12.763643264770508, + "learning_rate": 1.8870451760560785e-06, + "loss": 0.0459, + "num_input_tokens_seen": 32759904, + "step": 48605 + }, + { + "epoch": 1.187550387218137, + "grad_norm": 166.57456970214844, + "learning_rate": 1.8870058016438629e-06, + "loss": 0.0503, + "num_input_tokens_seen": 32763232, + "step": 48610 + }, + { + "epoch": 1.1876725380499842, + "grad_norm": 28.10552978515625, + "learning_rate": 1.8869664207811025e-06, + "loss": 0.1476, + "num_input_tokens_seen": 32766752, + "step": 48615 + }, + { + "epoch": 1.1877946888818314, + "grad_norm": 0.1488976925611496, + "learning_rate": 1.8869270334680833e-06, + "loss": 0.05, + "num_input_tokens_seen": 32770336, + "step": 48620 + }, + { + "epoch": 1.1879168397136786, + "grad_norm": 0.13166551291942596, + "learning_rate": 1.8868876397050925e-06, + "loss": 0.1422, + "num_input_tokens_seen": 32773600, + "step": 48625 + }, + { + "epoch": 1.1880389905455255, + "grad_norm": 0.9259403347969055, + "learning_rate": 1.8868482394924163e-06, + "loss": 0.1041, + "num_input_tokens_seen": 32776800, + "step": 48630 + }, + { + "epoch": 1.1881611413773727, + "grad_norm": 0.08954665809869766, + "learning_rate": 1.886808832830341e-06, + "loss": 0.0052, + "num_input_tokens_seen": 32780064, + "step": 48635 + }, + { + "epoch": 1.18828329220922, + "grad_norm": 12.26470947265625, + "learning_rate": 1.8867694197191536e-06, + "loss": 0.0493, + "num_input_tokens_seen": 32783648, + "step": 48640 + }, + { + "epoch": 1.188405443041067, + "grad_norm": 24.66190528869629, + "learning_rate": 1.8867300001591402e-06, + "loss": 0.0694, + "num_input_tokens_seen": 32787296, + "step": 48645 + }, + { + "epoch": 1.1885275938729143, + "grad_norm": 0.8406387567520142, + "learning_rate": 1.8866905741505878e-06, + "loss": 0.0524, + "num_input_tokens_seen": 32790688, + "step": 48650 + }, + { + "epoch": 1.1886497447047615, + "grad_norm": 13.112702369689941, + "learning_rate": 1.8866511416937833e-06, + "loss": 0.1508, + "num_input_tokens_seen": 32793824, + "step": 48655 + }, + { + "epoch": 1.1887718955366087, + "grad_norm": 1.6888105869293213, + "learning_rate": 1.8866117027890128e-06, + "loss": 0.1519, + "num_input_tokens_seen": 32797664, + "step": 48660 + }, + { + "epoch": 1.1888940463684559, + "grad_norm": 9.892433166503906, + "learning_rate": 1.8865722574365639e-06, + "loss": 0.1448, + "num_input_tokens_seen": 32801376, + "step": 48665 + }, + { + "epoch": 1.1890161972003028, + "grad_norm": 90.39240264892578, + "learning_rate": 1.8865328056367229e-06, + "loss": 0.1891, + "num_input_tokens_seen": 32805280, + "step": 48670 + }, + { + "epoch": 1.18913834803215, + "grad_norm": 51.569114685058594, + "learning_rate": 1.886493347389777e-06, + "loss": 0.0652, + "num_input_tokens_seen": 32808608, + "step": 48675 + }, + { + "epoch": 1.1892604988639972, + "grad_norm": 22.322158813476562, + "learning_rate": 1.886453882696013e-06, + "loss": 0.098, + "num_input_tokens_seen": 32811808, + "step": 48680 + }, + { + "epoch": 1.1893826496958444, + "grad_norm": 0.790939450263977, + "learning_rate": 1.8864144115557177e-06, + "loss": 0.0018, + "num_input_tokens_seen": 32815456, + "step": 48685 + }, + { + "epoch": 1.1895048005276916, + "grad_norm": 0.37682557106018066, + "learning_rate": 1.8863749339691788e-06, + "loss": 0.0915, + "num_input_tokens_seen": 32818720, + "step": 48690 + }, + { + "epoch": 1.1896269513595388, + "grad_norm": 166.25892639160156, + "learning_rate": 1.8863354499366825e-06, + "loss": 0.0542, + "num_input_tokens_seen": 32821984, + "step": 48695 + }, + { + "epoch": 1.189749102191386, + "grad_norm": 0.7915534973144531, + "learning_rate": 1.8862959594585166e-06, + "loss": 0.0797, + "num_input_tokens_seen": 32825440, + "step": 48700 + }, + { + "epoch": 1.1898712530232332, + "grad_norm": 0.34774404764175415, + "learning_rate": 1.8862564625349683e-06, + "loss": 0.102, + "num_input_tokens_seen": 32828576, + "step": 48705 + }, + { + "epoch": 1.1899934038550803, + "grad_norm": 0.022061169147491455, + "learning_rate": 1.8862169591663247e-06, + "loss": 0.0545, + "num_input_tokens_seen": 32832608, + "step": 48710 + }, + { + "epoch": 1.1901155546869275, + "grad_norm": 0.333593487739563, + "learning_rate": 1.8861774493528725e-06, + "loss": 0.1036, + "num_input_tokens_seen": 32835936, + "step": 48715 + }, + { + "epoch": 1.1902377055187745, + "grad_norm": 13.503067016601562, + "learning_rate": 1.8861379330949002e-06, + "loss": 0.1614, + "num_input_tokens_seen": 32839264, + "step": 48720 + }, + { + "epoch": 1.1903598563506217, + "grad_norm": 0.42545977234840393, + "learning_rate": 1.886098410392694e-06, + "loss": 0.0346, + "num_input_tokens_seen": 32842656, + "step": 48725 + }, + { + "epoch": 1.1904820071824689, + "grad_norm": 10.114810943603516, + "learning_rate": 1.886058881246542e-06, + "loss": 0.0614, + "num_input_tokens_seen": 32846176, + "step": 48730 + }, + { + "epoch": 1.190604158014316, + "grad_norm": 0.23883959650993347, + "learning_rate": 1.8860193456567313e-06, + "loss": 0.0386, + "num_input_tokens_seen": 32849568, + "step": 48735 + }, + { + "epoch": 1.1907263088461633, + "grad_norm": 15.07218074798584, + "learning_rate": 1.8859798036235498e-06, + "loss": 0.1608, + "num_input_tokens_seen": 32853088, + "step": 48740 + }, + { + "epoch": 1.1908484596780105, + "grad_norm": 0.1844908893108368, + "learning_rate": 1.8859402551472847e-06, + "loss": 0.0523, + "num_input_tokens_seen": 32856544, + "step": 48745 + }, + { + "epoch": 1.1909706105098576, + "grad_norm": 14.992534637451172, + "learning_rate": 1.8859007002282242e-06, + "loss": 0.0688, + "num_input_tokens_seen": 32860064, + "step": 48750 + }, + { + "epoch": 1.1910927613417048, + "grad_norm": 16.308456420898438, + "learning_rate": 1.8858611388666552e-06, + "loss": 0.2705, + "num_input_tokens_seen": 32863136, + "step": 48755 + }, + { + "epoch": 1.1912149121735518, + "grad_norm": 0.43662139773368835, + "learning_rate": 1.8858215710628657e-06, + "loss": 0.0421, + "num_input_tokens_seen": 32866464, + "step": 48760 + }, + { + "epoch": 1.191337063005399, + "grad_norm": 76.70539855957031, + "learning_rate": 1.8857819968171436e-06, + "loss": 0.1451, + "num_input_tokens_seen": 32870176, + "step": 48765 + }, + { + "epoch": 1.1914592138372462, + "grad_norm": 8.730032920837402, + "learning_rate": 1.8857424161297764e-06, + "loss": 0.0495, + "num_input_tokens_seen": 32873760, + "step": 48770 + }, + { + "epoch": 1.1915813646690934, + "grad_norm": 20.521678924560547, + "learning_rate": 1.8857028290010524e-06, + "loss": 0.172, + "num_input_tokens_seen": 32876896, + "step": 48775 + }, + { + "epoch": 1.1917035155009406, + "grad_norm": 1.9542988538742065, + "learning_rate": 1.885663235431259e-06, + "loss": 0.0676, + "num_input_tokens_seen": 32880608, + "step": 48780 + }, + { + "epoch": 1.1918256663327877, + "grad_norm": 0.7331753373146057, + "learning_rate": 1.8856236354206843e-06, + "loss": 0.0525, + "num_input_tokens_seen": 32883680, + "step": 48785 + }, + { + "epoch": 1.191947817164635, + "grad_norm": 1.1589040756225586, + "learning_rate": 1.8855840289696165e-06, + "loss": 0.0794, + "num_input_tokens_seen": 32886816, + "step": 48790 + }, + { + "epoch": 1.1920699679964821, + "grad_norm": 12.341038703918457, + "learning_rate": 1.885544416078343e-06, + "loss": 0.0449, + "num_input_tokens_seen": 32890144, + "step": 48795 + }, + { + "epoch": 1.1921921188283293, + "grad_norm": 9.298717498779297, + "learning_rate": 1.885504796747153e-06, + "loss": 0.3148, + "num_input_tokens_seen": 32893024, + "step": 48800 + }, + { + "epoch": 1.1923142696601765, + "grad_norm": 31.702577590942383, + "learning_rate": 1.8854651709763334e-06, + "loss": 0.0766, + "num_input_tokens_seen": 32897056, + "step": 48805 + }, + { + "epoch": 1.1924364204920235, + "grad_norm": 17.743892669677734, + "learning_rate": 1.8854255387661734e-06, + "loss": 0.0641, + "num_input_tokens_seen": 32900064, + "step": 48810 + }, + { + "epoch": 1.1925585713238707, + "grad_norm": 0.3661781847476959, + "learning_rate": 1.8853859001169603e-06, + "loss": 0.1346, + "num_input_tokens_seen": 32903456, + "step": 48815 + }, + { + "epoch": 1.1926807221557179, + "grad_norm": 9.447279930114746, + "learning_rate": 1.8853462550289829e-06, + "loss": 0.1509, + "num_input_tokens_seen": 32906528, + "step": 48820 + }, + { + "epoch": 1.192802872987565, + "grad_norm": 0.5084399580955505, + "learning_rate": 1.8853066035025295e-06, + "loss": 0.1062, + "num_input_tokens_seen": 32909920, + "step": 48825 + }, + { + "epoch": 1.1929250238194122, + "grad_norm": 1.1484479904174805, + "learning_rate": 1.8852669455378884e-06, + "loss": 0.0804, + "num_input_tokens_seen": 32913824, + "step": 48830 + }, + { + "epoch": 1.1930471746512594, + "grad_norm": 26.59931182861328, + "learning_rate": 1.8852272811353477e-06, + "loss": 0.0714, + "num_input_tokens_seen": 32917216, + "step": 48835 + }, + { + "epoch": 1.1931693254831066, + "grad_norm": 21.390541076660156, + "learning_rate": 1.8851876102951964e-06, + "loss": 0.0757, + "num_input_tokens_seen": 32920544, + "step": 48840 + }, + { + "epoch": 1.1932914763149536, + "grad_norm": 0.13798239827156067, + "learning_rate": 1.8851479330177228e-06, + "loss": 0.0366, + "num_input_tokens_seen": 32924000, + "step": 48845 + }, + { + "epoch": 1.1934136271468008, + "grad_norm": 1.2368896007537842, + "learning_rate": 1.885108249303215e-06, + "loss": 0.0422, + "num_input_tokens_seen": 32927456, + "step": 48850 + }, + { + "epoch": 1.193535777978648, + "grad_norm": 0.20851171016693115, + "learning_rate": 1.885068559151962e-06, + "loss": 0.0846, + "num_input_tokens_seen": 32930912, + "step": 48855 + }, + { + "epoch": 1.1936579288104952, + "grad_norm": 0.3231092393398285, + "learning_rate": 1.8850288625642525e-06, + "loss": 0.0491, + "num_input_tokens_seen": 32934176, + "step": 48860 + }, + { + "epoch": 1.1937800796423423, + "grad_norm": 11.667762756347656, + "learning_rate": 1.8849891595403752e-06, + "loss": 0.0707, + "num_input_tokens_seen": 32937952, + "step": 48865 + }, + { + "epoch": 1.1939022304741895, + "grad_norm": 0.18369610607624054, + "learning_rate": 1.8849494500806187e-06, + "loss": 0.0488, + "num_input_tokens_seen": 32941088, + "step": 48870 + }, + { + "epoch": 1.1940243813060367, + "grad_norm": 9.249509811401367, + "learning_rate": 1.8849097341852716e-06, + "loss": 0.1003, + "num_input_tokens_seen": 32944032, + "step": 48875 + }, + { + "epoch": 1.194146532137884, + "grad_norm": 0.4781881868839264, + "learning_rate": 1.884870011854623e-06, + "loss": 0.0657, + "num_input_tokens_seen": 32947488, + "step": 48880 + }, + { + "epoch": 1.194268682969731, + "grad_norm": 0.17906363308429718, + "learning_rate": 1.8848302830889615e-06, + "loss": 0.0476, + "num_input_tokens_seen": 32950688, + "step": 48885 + }, + { + "epoch": 1.1943908338015783, + "grad_norm": 0.6338130235671997, + "learning_rate": 1.8847905478885764e-06, + "loss": 0.1174, + "num_input_tokens_seen": 32953952, + "step": 48890 + }, + { + "epoch": 1.1945129846334253, + "grad_norm": 18.3079891204834, + "learning_rate": 1.884750806253756e-06, + "loss": 0.0362, + "num_input_tokens_seen": 32957344, + "step": 48895 + }, + { + "epoch": 1.1946351354652724, + "grad_norm": 12.767355918884277, + "learning_rate": 1.8847110581847902e-06, + "loss": 0.0785, + "num_input_tokens_seen": 32960672, + "step": 48900 + }, + { + "epoch": 1.1947572862971196, + "grad_norm": 0.2379637509584427, + "learning_rate": 1.8846713036819677e-06, + "loss": 0.0789, + "num_input_tokens_seen": 32963872, + "step": 48905 + }, + { + "epoch": 1.1948794371289668, + "grad_norm": 0.39095956087112427, + "learning_rate": 1.8846315427455774e-06, + "loss": 0.099, + "num_input_tokens_seen": 32967328, + "step": 48910 + }, + { + "epoch": 1.195001587960814, + "grad_norm": 0.7817822694778442, + "learning_rate": 1.8845917753759086e-06, + "loss": 0.0887, + "num_input_tokens_seen": 32970848, + "step": 48915 + }, + { + "epoch": 1.1951237387926612, + "grad_norm": 8.41550350189209, + "learning_rate": 1.8845520015732503e-06, + "loss": 0.1821, + "num_input_tokens_seen": 32974176, + "step": 48920 + }, + { + "epoch": 1.1952458896245084, + "grad_norm": 0.13997584581375122, + "learning_rate": 1.8845122213378921e-06, + "loss": 0.0193, + "num_input_tokens_seen": 32977376, + "step": 48925 + }, + { + "epoch": 1.1953680404563556, + "grad_norm": 1.8125817775726318, + "learning_rate": 1.884472434670123e-06, + "loss": 0.1289, + "num_input_tokens_seen": 32980576, + "step": 48930 + }, + { + "epoch": 1.1954901912882026, + "grad_norm": 25.555908203125, + "learning_rate": 1.8844326415702328e-06, + "loss": 0.0802, + "num_input_tokens_seen": 32983584, + "step": 48935 + }, + { + "epoch": 1.1956123421200497, + "grad_norm": 12.59317684173584, + "learning_rate": 1.8843928420385101e-06, + "loss": 0.109, + "num_input_tokens_seen": 32986720, + "step": 48940 + }, + { + "epoch": 1.195734492951897, + "grad_norm": 18.369953155517578, + "learning_rate": 1.884353036075245e-06, + "loss": 0.0378, + "num_input_tokens_seen": 32990496, + "step": 48945 + }, + { + "epoch": 1.1958566437837441, + "grad_norm": 0.26828473806381226, + "learning_rate": 1.8843132236807268e-06, + "loss": 0.1408, + "num_input_tokens_seen": 32993760, + "step": 48950 + }, + { + "epoch": 1.1959787946155913, + "grad_norm": 0.13218581676483154, + "learning_rate": 1.8842734048552451e-06, + "loss": 0.0941, + "num_input_tokens_seen": 32997024, + "step": 48955 + }, + { + "epoch": 1.1961009454474385, + "grad_norm": 2.13213849067688, + "learning_rate": 1.884233579599089e-06, + "loss": 0.0022, + "num_input_tokens_seen": 33000992, + "step": 48960 + }, + { + "epoch": 1.1962230962792857, + "grad_norm": 0.08893398940563202, + "learning_rate": 1.8841937479125488e-06, + "loss": 0.048, + "num_input_tokens_seen": 33004256, + "step": 48965 + }, + { + "epoch": 1.1963452471111329, + "grad_norm": 33.54923629760742, + "learning_rate": 1.8841539097959135e-06, + "loss": 0.175, + "num_input_tokens_seen": 33008032, + "step": 48970 + }, + { + "epoch": 1.19646739794298, + "grad_norm": 11.464241981506348, + "learning_rate": 1.8841140652494736e-06, + "loss": 0.1101, + "num_input_tokens_seen": 33011680, + "step": 48975 + }, + { + "epoch": 1.1965895487748273, + "grad_norm": 18.33887481689453, + "learning_rate": 1.8840742142735179e-06, + "loss": 0.1273, + "num_input_tokens_seen": 33014880, + "step": 48980 + }, + { + "epoch": 1.1967116996066742, + "grad_norm": 21.554765701293945, + "learning_rate": 1.8840343568683373e-06, + "loss": 0.1129, + "num_input_tokens_seen": 33018272, + "step": 48985 + }, + { + "epoch": 1.1968338504385214, + "grad_norm": 21.3913631439209, + "learning_rate": 1.8839944930342207e-06, + "loss": 0.0782, + "num_input_tokens_seen": 33021664, + "step": 48990 + }, + { + "epoch": 1.1969560012703686, + "grad_norm": 14.357202529907227, + "learning_rate": 1.8839546227714584e-06, + "loss": 0.122, + "num_input_tokens_seen": 33024608, + "step": 48995 + }, + { + "epoch": 1.1970781521022158, + "grad_norm": 67.93797302246094, + "learning_rate": 1.8839147460803404e-06, + "loss": 0.0397, + "num_input_tokens_seen": 33027872, + "step": 49000 + }, + { + "epoch": 1.197200302934063, + "grad_norm": 22.977998733520508, + "learning_rate": 1.8838748629611568e-06, + "loss": 0.1236, + "num_input_tokens_seen": 33031136, + "step": 49005 + }, + { + "epoch": 1.1973224537659102, + "grad_norm": 0.3773641586303711, + "learning_rate": 1.8838349734141972e-06, + "loss": 0.0368, + "num_input_tokens_seen": 33034144, + "step": 49010 + }, + { + "epoch": 1.1974446045977574, + "grad_norm": 0.11255284398794174, + "learning_rate": 1.8837950774397519e-06, + "loss": 0.0434, + "num_input_tokens_seen": 33037344, + "step": 49015 + }, + { + "epoch": 1.1975667554296046, + "grad_norm": 0.18880581855773926, + "learning_rate": 1.8837551750381114e-06, + "loss": 0.0687, + "num_input_tokens_seen": 33040416, + "step": 49020 + }, + { + "epoch": 1.1976889062614515, + "grad_norm": 162.01095581054688, + "learning_rate": 1.8837152662095654e-06, + "loss": 0.0608, + "num_input_tokens_seen": 33043552, + "step": 49025 + }, + { + "epoch": 1.1978110570932987, + "grad_norm": 0.333495557308197, + "learning_rate": 1.8836753509544043e-06, + "loss": 0.0286, + "num_input_tokens_seen": 33046752, + "step": 49030 + }, + { + "epoch": 1.197933207925146, + "grad_norm": 10.324767112731934, + "learning_rate": 1.8836354292729184e-06, + "loss": 0.0565, + "num_input_tokens_seen": 33050080, + "step": 49035 + }, + { + "epoch": 1.198055358756993, + "grad_norm": 16.853994369506836, + "learning_rate": 1.8835955011653977e-06, + "loss": 0.2166, + "num_input_tokens_seen": 33053344, + "step": 49040 + }, + { + "epoch": 1.1981775095888403, + "grad_norm": 0.9257891774177551, + "learning_rate": 1.8835555666321333e-06, + "loss": 0.0579, + "num_input_tokens_seen": 33056544, + "step": 49045 + }, + { + "epoch": 1.1982996604206875, + "grad_norm": 9.326736450195312, + "learning_rate": 1.8835156256734148e-06, + "loss": 0.0819, + "num_input_tokens_seen": 33059616, + "step": 49050 + }, + { + "epoch": 1.1984218112525347, + "grad_norm": 17.777437210083008, + "learning_rate": 1.8834756782895331e-06, + "loss": 0.0732, + "num_input_tokens_seen": 33062944, + "step": 49055 + }, + { + "epoch": 1.1985439620843819, + "grad_norm": 1.6575427055358887, + "learning_rate": 1.883435724480779e-06, + "loss": 0.0728, + "num_input_tokens_seen": 33066464, + "step": 49060 + }, + { + "epoch": 1.198666112916229, + "grad_norm": 0.591008722782135, + "learning_rate": 1.8833957642474424e-06, + "loss": 0.0304, + "num_input_tokens_seen": 33070752, + "step": 49065 + }, + { + "epoch": 1.1987882637480762, + "grad_norm": 0.5434102416038513, + "learning_rate": 1.8833557975898141e-06, + "loss": 0.0928, + "num_input_tokens_seen": 33073952, + "step": 49070 + }, + { + "epoch": 1.1989104145799232, + "grad_norm": 0.11578580737113953, + "learning_rate": 1.883315824508185e-06, + "loss": 0.081, + "num_input_tokens_seen": 33077152, + "step": 49075 + }, + { + "epoch": 1.1990325654117704, + "grad_norm": 0.2963810861110687, + "learning_rate": 1.8832758450028456e-06, + "loss": 0.0835, + "num_input_tokens_seen": 33080544, + "step": 49080 + }, + { + "epoch": 1.1991547162436176, + "grad_norm": 0.2604496479034424, + "learning_rate": 1.883235859074087e-06, + "loss": 0.0907, + "num_input_tokens_seen": 33083872, + "step": 49085 + }, + { + "epoch": 1.1992768670754648, + "grad_norm": 8.561380386352539, + "learning_rate": 1.8831958667221992e-06, + "loss": 0.1061, + "num_input_tokens_seen": 33087520, + "step": 49090 + }, + { + "epoch": 1.199399017907312, + "grad_norm": 125.36334228515625, + "learning_rate": 1.8831558679474738e-06, + "loss": 0.1422, + "num_input_tokens_seen": 33090912, + "step": 49095 + }, + { + "epoch": 1.1995211687391591, + "grad_norm": 21.69564437866211, + "learning_rate": 1.8831158627502012e-06, + "loss": 0.1067, + "num_input_tokens_seen": 33094304, + "step": 49100 + }, + { + "epoch": 1.1996433195710063, + "grad_norm": 0.19535818696022034, + "learning_rate": 1.8830758511306726e-06, + "loss": 0.0296, + "num_input_tokens_seen": 33097504, + "step": 49105 + }, + { + "epoch": 1.1997654704028535, + "grad_norm": 0.19155330955982208, + "learning_rate": 1.8830358330891789e-06, + "loss": 0.1116, + "num_input_tokens_seen": 33100704, + "step": 49110 + }, + { + "epoch": 1.1998876212347005, + "grad_norm": 15.071712493896484, + "learning_rate": 1.882995808626011e-06, + "loss": 0.0305, + "num_input_tokens_seen": 33103968, + "step": 49115 + }, + { + "epoch": 1.2000097720665477, + "grad_norm": 0.22924207150936127, + "learning_rate": 1.8829557777414602e-06, + "loss": 0.0876, + "num_input_tokens_seen": 33107680, + "step": 49120 + }, + { + "epoch": 1.2001319228983949, + "grad_norm": 29.139514923095703, + "learning_rate": 1.8829157404358176e-06, + "loss": 0.2117, + "num_input_tokens_seen": 33110944, + "step": 49125 + }, + { + "epoch": 1.200254073730242, + "grad_norm": 1.1408504247665405, + "learning_rate": 1.882875696709374e-06, + "loss": 0.1363, + "num_input_tokens_seen": 33114208, + "step": 49130 + }, + { + "epoch": 1.2003762245620893, + "grad_norm": 1.3661071062088013, + "learning_rate": 1.882835646562421e-06, + "loss": 0.0076, + "num_input_tokens_seen": 33117600, + "step": 49135 + }, + { + "epoch": 1.2004983753939364, + "grad_norm": 34.19418716430664, + "learning_rate": 1.8827955899952497e-06, + "loss": 0.1247, + "num_input_tokens_seen": 33121248, + "step": 49140 + }, + { + "epoch": 1.2006205262257836, + "grad_norm": 0.4470491409301758, + "learning_rate": 1.8827555270081513e-06, + "loss": 0.0067, + "num_input_tokens_seen": 33124512, + "step": 49145 + }, + { + "epoch": 1.2007426770576308, + "grad_norm": 0.7858008146286011, + "learning_rate": 1.8827154576014178e-06, + "loss": 0.196, + "num_input_tokens_seen": 33127840, + "step": 49150 + }, + { + "epoch": 1.200864827889478, + "grad_norm": 54.617897033691406, + "learning_rate": 1.8826753817753396e-06, + "loss": 0.0318, + "num_input_tokens_seen": 33130784, + "step": 49155 + }, + { + "epoch": 1.2009869787213252, + "grad_norm": 50.12831115722656, + "learning_rate": 1.8826352995302086e-06, + "loss": 0.1143, + "num_input_tokens_seen": 33134176, + "step": 49160 + }, + { + "epoch": 1.2011091295531722, + "grad_norm": 1.1908376216888428, + "learning_rate": 1.8825952108663163e-06, + "loss": 0.0035, + "num_input_tokens_seen": 33137248, + "step": 49165 + }, + { + "epoch": 1.2012312803850194, + "grad_norm": 24.506649017333984, + "learning_rate": 1.8825551157839543e-06, + "loss": 0.1935, + "num_input_tokens_seen": 33140512, + "step": 49170 + }, + { + "epoch": 1.2013534312168666, + "grad_norm": 0.5442198514938354, + "learning_rate": 1.8825150142834143e-06, + "loss": 0.1047, + "num_input_tokens_seen": 33143968, + "step": 49175 + }, + { + "epoch": 1.2014755820487137, + "grad_norm": 0.2753124535083771, + "learning_rate": 1.8824749063649876e-06, + "loss": 0.0758, + "num_input_tokens_seen": 33147616, + "step": 49180 + }, + { + "epoch": 1.201597732880561, + "grad_norm": 27.034799575805664, + "learning_rate": 1.882434792028966e-06, + "loss": 0.2307, + "num_input_tokens_seen": 33151008, + "step": 49185 + }, + { + "epoch": 1.2017198837124081, + "grad_norm": 31.514392852783203, + "learning_rate": 1.8823946712756413e-06, + "loss": 0.0385, + "num_input_tokens_seen": 33154976, + "step": 49190 + }, + { + "epoch": 1.2018420345442553, + "grad_norm": 17.733539581298828, + "learning_rate": 1.8823545441053053e-06, + "loss": 0.1566, + "num_input_tokens_seen": 33158240, + "step": 49195 + }, + { + "epoch": 1.2019641853761025, + "grad_norm": 21.296964645385742, + "learning_rate": 1.8823144105182496e-06, + "loss": 0.1751, + "num_input_tokens_seen": 33162016, + "step": 49200 + }, + { + "epoch": 1.2020863362079495, + "grad_norm": 0.2633887827396393, + "learning_rate": 1.8822742705147663e-06, + "loss": 0.126, + "num_input_tokens_seen": 33165536, + "step": 49205 + }, + { + "epoch": 1.2022084870397967, + "grad_norm": 87.6427230834961, + "learning_rate": 1.8822341240951469e-06, + "loss": 0.1073, + "num_input_tokens_seen": 33168864, + "step": 49210 + }, + { + "epoch": 1.2023306378716438, + "grad_norm": 26.27444839477539, + "learning_rate": 1.882193971259684e-06, + "loss": 0.1445, + "num_input_tokens_seen": 33172192, + "step": 49215 + }, + { + "epoch": 1.202452788703491, + "grad_norm": 8.962058067321777, + "learning_rate": 1.8821538120086693e-06, + "loss": 0.0778, + "num_input_tokens_seen": 33175840, + "step": 49220 + }, + { + "epoch": 1.2025749395353382, + "grad_norm": 0.818984866142273, + "learning_rate": 1.8821136463423945e-06, + "loss": 0.1007, + "num_input_tokens_seen": 33179232, + "step": 49225 + }, + { + "epoch": 1.2026970903671854, + "grad_norm": 0.5337079763412476, + "learning_rate": 1.8820734742611522e-06, + "loss": 0.0891, + "num_input_tokens_seen": 33182496, + "step": 49230 + }, + { + "epoch": 1.2028192411990326, + "grad_norm": 1.8495513200759888, + "learning_rate": 1.8820332957652342e-06, + "loss": 0.0656, + "num_input_tokens_seen": 33185888, + "step": 49235 + }, + { + "epoch": 1.2029413920308798, + "grad_norm": 0.3298915922641754, + "learning_rate": 1.881993110854933e-06, + "loss": 0.0315, + "num_input_tokens_seen": 33189664, + "step": 49240 + }, + { + "epoch": 1.203063542862727, + "grad_norm": 81.36953735351562, + "learning_rate": 1.8819529195305405e-06, + "loss": 0.0093, + "num_input_tokens_seen": 33192736, + "step": 49245 + }, + { + "epoch": 1.2031856936945742, + "grad_norm": 0.44343122839927673, + "learning_rate": 1.8819127217923492e-06, + "loss": 0.0607, + "num_input_tokens_seen": 33196576, + "step": 49250 + }, + { + "epoch": 1.2033078445264211, + "grad_norm": 0.6930961012840271, + "learning_rate": 1.8818725176406515e-06, + "loss": 0.0382, + "num_input_tokens_seen": 33199776, + "step": 49255 + }, + { + "epoch": 1.2034299953582683, + "grad_norm": 3.0237956047058105, + "learning_rate": 1.8818323070757397e-06, + "loss": 0.106, + "num_input_tokens_seen": 33203040, + "step": 49260 + }, + { + "epoch": 1.2035521461901155, + "grad_norm": 0.3650870621204376, + "learning_rate": 1.881792090097906e-06, + "loss": 0.093, + "num_input_tokens_seen": 33206240, + "step": 49265 + }, + { + "epoch": 1.2036742970219627, + "grad_norm": 31.69427490234375, + "learning_rate": 1.881751866707443e-06, + "loss": 0.1329, + "num_input_tokens_seen": 33209248, + "step": 49270 + }, + { + "epoch": 1.20379644785381, + "grad_norm": 0.1792955845594406, + "learning_rate": 1.8817116369046435e-06, + "loss": 0.0011, + "num_input_tokens_seen": 33212384, + "step": 49275 + }, + { + "epoch": 1.203918598685657, + "grad_norm": 0.19762201607227325, + "learning_rate": 1.8816714006897998e-06, + "loss": 0.0017, + "num_input_tokens_seen": 33215840, + "step": 49280 + }, + { + "epoch": 1.2040407495175043, + "grad_norm": 154.009765625, + "learning_rate": 1.8816311580632042e-06, + "loss": 0.114, + "num_input_tokens_seen": 33219232, + "step": 49285 + }, + { + "epoch": 1.2041629003493515, + "grad_norm": 40.82319259643555, + "learning_rate": 1.88159090902515e-06, + "loss": 0.0705, + "num_input_tokens_seen": 33222752, + "step": 49290 + }, + { + "epoch": 1.2042850511811984, + "grad_norm": 0.04902821406722069, + "learning_rate": 1.8815506535759296e-06, + "loss": 0.1219, + "num_input_tokens_seen": 33225952, + "step": 49295 + }, + { + "epoch": 1.2044072020130456, + "grad_norm": 20.65225601196289, + "learning_rate": 1.8815103917158356e-06, + "loss": 0.1627, + "num_input_tokens_seen": 33229664, + "step": 49300 + }, + { + "epoch": 1.2045293528448928, + "grad_norm": 0.23126031458377838, + "learning_rate": 1.881470123445161e-06, + "loss": 0.0472, + "num_input_tokens_seen": 33233504, + "step": 49305 + }, + { + "epoch": 1.20465150367674, + "grad_norm": 0.09219006448984146, + "learning_rate": 1.8814298487641986e-06, + "loss": 0.0009, + "num_input_tokens_seen": 33236512, + "step": 49310 + }, + { + "epoch": 1.2047736545085872, + "grad_norm": 0.14880770444869995, + "learning_rate": 1.8813895676732411e-06, + "loss": 0.0599, + "num_input_tokens_seen": 33239968, + "step": 49315 + }, + { + "epoch": 1.2048958053404344, + "grad_norm": 110.7625503540039, + "learning_rate": 1.8813492801725818e-06, + "loss": 0.0182, + "num_input_tokens_seen": 33243104, + "step": 49320 + }, + { + "epoch": 1.2050179561722816, + "grad_norm": 0.23787957429885864, + "learning_rate": 1.8813089862625136e-06, + "loss": 0.1218, + "num_input_tokens_seen": 33246112, + "step": 49325 + }, + { + "epoch": 1.2051401070041288, + "grad_norm": 0.011504840105772018, + "learning_rate": 1.881268685943329e-06, + "loss": 0.0602, + "num_input_tokens_seen": 33249568, + "step": 49330 + }, + { + "epoch": 1.205262257835976, + "grad_norm": 0.07925833016633987, + "learning_rate": 1.881228379215322e-06, + "loss": 0.0861, + "num_input_tokens_seen": 33253472, + "step": 49335 + }, + { + "epoch": 1.2053844086678231, + "grad_norm": 0.21478639543056488, + "learning_rate": 1.8811880660787846e-06, + "loss": 0.1753, + "num_input_tokens_seen": 33257120, + "step": 49340 + }, + { + "epoch": 1.2055065594996701, + "grad_norm": 0.07961128652095795, + "learning_rate": 1.881147746534011e-06, + "loss": 0.0815, + "num_input_tokens_seen": 33260384, + "step": 49345 + }, + { + "epoch": 1.2056287103315173, + "grad_norm": 22.093706130981445, + "learning_rate": 1.8811074205812938e-06, + "loss": 0.0344, + "num_input_tokens_seen": 33263712, + "step": 49350 + }, + { + "epoch": 1.2057508611633645, + "grad_norm": 0.31056874990463257, + "learning_rate": 1.8810670882209264e-06, + "loss": 0.0385, + "num_input_tokens_seen": 33266656, + "step": 49355 + }, + { + "epoch": 1.2058730119952117, + "grad_norm": 21.597169876098633, + "learning_rate": 1.8810267494532025e-06, + "loss": 0.1348, + "num_input_tokens_seen": 33269792, + "step": 49360 + }, + { + "epoch": 1.2059951628270589, + "grad_norm": 0.34313809871673584, + "learning_rate": 1.8809864042784147e-06, + "loss": 0.059, + "num_input_tokens_seen": 33272992, + "step": 49365 + }, + { + "epoch": 1.206117313658906, + "grad_norm": 22.627899169921875, + "learning_rate": 1.880946052696857e-06, + "loss": 0.0039, + "num_input_tokens_seen": 33276320, + "step": 49370 + }, + { + "epoch": 1.2062394644907533, + "grad_norm": 0.4183606207370758, + "learning_rate": 1.8809056947088226e-06, + "loss": 0.1022, + "num_input_tokens_seen": 33279712, + "step": 49375 + }, + { + "epoch": 1.2063616153226002, + "grad_norm": 11.300501823425293, + "learning_rate": 1.880865330314605e-06, + "loss": 0.0726, + "num_input_tokens_seen": 33282912, + "step": 49380 + }, + { + "epoch": 1.2064837661544474, + "grad_norm": 0.0277280081063509, + "learning_rate": 1.880824959514498e-06, + "loss": 0.1336, + "num_input_tokens_seen": 33286048, + "step": 49385 + }, + { + "epoch": 1.2066059169862946, + "grad_norm": 0.16218620538711548, + "learning_rate": 1.8807845823087952e-06, + "loss": 0.0506, + "num_input_tokens_seen": 33289120, + "step": 49390 + }, + { + "epoch": 1.2067280678181418, + "grad_norm": 0.07739616930484772, + "learning_rate": 1.8807441986977894e-06, + "loss": 0.0213, + "num_input_tokens_seen": 33292640, + "step": 49395 + }, + { + "epoch": 1.206850218649989, + "grad_norm": 16.28183937072754, + "learning_rate": 1.8807038086817752e-06, + "loss": 0.1098, + "num_input_tokens_seen": 33296224, + "step": 49400 + }, + { + "epoch": 1.2069723694818362, + "grad_norm": 0.10570183396339417, + "learning_rate": 1.8806634122610461e-06, + "loss": 0.1692, + "num_input_tokens_seen": 33299488, + "step": 49405 + }, + { + "epoch": 1.2070945203136834, + "grad_norm": 0.07286083698272705, + "learning_rate": 1.8806230094358954e-06, + "loss": 0.1198, + "num_input_tokens_seen": 33302752, + "step": 49410 + }, + { + "epoch": 1.2072166711455306, + "grad_norm": 95.79071807861328, + "learning_rate": 1.8805826002066178e-06, + "loss": 0.0987, + "num_input_tokens_seen": 33305696, + "step": 49415 + }, + { + "epoch": 1.2073388219773777, + "grad_norm": 21.291419982910156, + "learning_rate": 1.8805421845735065e-06, + "loss": 0.1584, + "num_input_tokens_seen": 33308768, + "step": 49420 + }, + { + "epoch": 1.207460972809225, + "grad_norm": 0.056146129965782166, + "learning_rate": 1.8805017625368555e-06, + "loss": 0.0575, + "num_input_tokens_seen": 33312288, + "step": 49425 + }, + { + "epoch": 1.2075831236410721, + "grad_norm": 0.043821725994348526, + "learning_rate": 1.8804613340969592e-06, + "loss": 0.1136, + "num_input_tokens_seen": 33315616, + "step": 49430 + }, + { + "epoch": 1.207705274472919, + "grad_norm": 0.25115376710891724, + "learning_rate": 1.880420899254111e-06, + "loss": 0.0248, + "num_input_tokens_seen": 33318880, + "step": 49435 + }, + { + "epoch": 1.2078274253047663, + "grad_norm": 0.3757718503475189, + "learning_rate": 1.8803804580086053e-06, + "loss": 0.0634, + "num_input_tokens_seen": 33322336, + "step": 49440 + }, + { + "epoch": 1.2079495761366135, + "grad_norm": 34.809120178222656, + "learning_rate": 1.8803400103607362e-06, + "loss": 0.1465, + "num_input_tokens_seen": 33325536, + "step": 49445 + }, + { + "epoch": 1.2080717269684607, + "grad_norm": 0.46399661898612976, + "learning_rate": 1.8802995563107972e-06, + "loss": 0.0011, + "num_input_tokens_seen": 33329056, + "step": 49450 + }, + { + "epoch": 1.2081938778003078, + "grad_norm": 39.4713020324707, + "learning_rate": 1.8802590958590837e-06, + "loss": 0.1953, + "num_input_tokens_seen": 33332512, + "step": 49455 + }, + { + "epoch": 1.208316028632155, + "grad_norm": 126.51252746582031, + "learning_rate": 1.8802186290058887e-06, + "loss": 0.0797, + "num_input_tokens_seen": 33335840, + "step": 49460 + }, + { + "epoch": 1.2084381794640022, + "grad_norm": 6.298394680023193, + "learning_rate": 1.8801781557515078e-06, + "loss": 0.0014, + "num_input_tokens_seen": 33339168, + "step": 49465 + }, + { + "epoch": 1.2085603302958492, + "grad_norm": 0.023851916193962097, + "learning_rate": 1.8801376760962343e-06, + "loss": 0.0374, + "num_input_tokens_seen": 33342240, + "step": 49470 + }, + { + "epoch": 1.2086824811276964, + "grad_norm": 0.04978490248322487, + "learning_rate": 1.8800971900403626e-06, + "loss": 0.0638, + "num_input_tokens_seen": 33345888, + "step": 49475 + }, + { + "epoch": 1.2088046319595436, + "grad_norm": 0.058799147605895996, + "learning_rate": 1.8800566975841878e-06, + "loss": 0.023, + "num_input_tokens_seen": 33349536, + "step": 49480 + }, + { + "epoch": 1.2089267827913908, + "grad_norm": 0.6056734919548035, + "learning_rate": 1.8800161987280037e-06, + "loss": 0.0884, + "num_input_tokens_seen": 33352480, + "step": 49485 + }, + { + "epoch": 1.209048933623238, + "grad_norm": 0.07104521989822388, + "learning_rate": 1.8799756934721055e-06, + "loss": 0.174, + "num_input_tokens_seen": 33355808, + "step": 49490 + }, + { + "epoch": 1.2091710844550851, + "grad_norm": 0.1675105094909668, + "learning_rate": 1.879935181816787e-06, + "loss": 0.143, + "num_input_tokens_seen": 33359328, + "step": 49495 + }, + { + "epoch": 1.2092932352869323, + "grad_norm": 0.27164918184280396, + "learning_rate": 1.8798946637623434e-06, + "loss": 0.0526, + "num_input_tokens_seen": 33362592, + "step": 49500 + }, + { + "epoch": 1.2094153861187795, + "grad_norm": 15.787830352783203, + "learning_rate": 1.879854139309069e-06, + "loss": 0.2042, + "num_input_tokens_seen": 33365728, + "step": 49505 + }, + { + "epoch": 1.2095375369506267, + "grad_norm": 0.5783581733703613, + "learning_rate": 1.8798136084572587e-06, + "loss": 0.1629, + "num_input_tokens_seen": 33368672, + "step": 49510 + }, + { + "epoch": 1.209659687782474, + "grad_norm": 0.5477581024169922, + "learning_rate": 1.8797730712072072e-06, + "loss": 0.0338, + "num_input_tokens_seen": 33371744, + "step": 49515 + }, + { + "epoch": 1.2097818386143209, + "grad_norm": 30.166261672973633, + "learning_rate": 1.8797325275592094e-06, + "loss": 0.0987, + "num_input_tokens_seen": 33375072, + "step": 49520 + }, + { + "epoch": 1.209903989446168, + "grad_norm": 0.4482729732990265, + "learning_rate": 1.8796919775135597e-06, + "loss": 0.1201, + "num_input_tokens_seen": 33377952, + "step": 49525 + }, + { + "epoch": 1.2100261402780152, + "grad_norm": 0.22780552506446838, + "learning_rate": 1.8796514210705537e-06, + "loss": 0.0782, + "num_input_tokens_seen": 33381472, + "step": 49530 + }, + { + "epoch": 1.2101482911098624, + "grad_norm": 0.6763140559196472, + "learning_rate": 1.8796108582304857e-06, + "loss": 0.0364, + "num_input_tokens_seen": 33384800, + "step": 49535 + }, + { + "epoch": 1.2102704419417096, + "grad_norm": 0.12094619870185852, + "learning_rate": 1.8795702889936511e-06, + "loss": 0.1381, + "num_input_tokens_seen": 33388192, + "step": 49540 + }, + { + "epoch": 1.2103925927735568, + "grad_norm": 5.095489501953125, + "learning_rate": 1.8795297133603446e-06, + "loss": 0.1393, + "num_input_tokens_seen": 33391648, + "step": 49545 + }, + { + "epoch": 1.210514743605404, + "grad_norm": 0.1459597945213318, + "learning_rate": 1.8794891313308617e-06, + "loss": 0.1072, + "num_input_tokens_seen": 33394848, + "step": 49550 + }, + { + "epoch": 1.2106368944372512, + "grad_norm": 16.98642921447754, + "learning_rate": 1.8794485429054973e-06, + "loss": 0.0801, + "num_input_tokens_seen": 33397920, + "step": 49555 + }, + { + "epoch": 1.2107590452690982, + "grad_norm": 0.34635409712791443, + "learning_rate": 1.8794079480845464e-06, + "loss": 0.051, + "num_input_tokens_seen": 33401760, + "step": 49560 + }, + { + "epoch": 1.2108811961009454, + "grad_norm": 0.36914634704589844, + "learning_rate": 1.8793673468683044e-06, + "loss": 0.0797, + "num_input_tokens_seen": 33405216, + "step": 49565 + }, + { + "epoch": 1.2110033469327925, + "grad_norm": 0.21115657687187195, + "learning_rate": 1.8793267392570667e-06, + "loss": 0.0243, + "num_input_tokens_seen": 33408992, + "step": 49570 + }, + { + "epoch": 1.2111254977646397, + "grad_norm": 0.3436622619628906, + "learning_rate": 1.8792861252511282e-06, + "loss": 0.0737, + "num_input_tokens_seen": 33412512, + "step": 49575 + }, + { + "epoch": 1.211247648596487, + "grad_norm": 0.27743715047836304, + "learning_rate": 1.8792455048507847e-06, + "loss": 0.108, + "num_input_tokens_seen": 33415904, + "step": 49580 + }, + { + "epoch": 1.2113697994283341, + "grad_norm": 0.7445909380912781, + "learning_rate": 1.8792048780563311e-06, + "loss": 0.0952, + "num_input_tokens_seen": 33419744, + "step": 49585 + }, + { + "epoch": 1.2114919502601813, + "grad_norm": 0.9027830362319946, + "learning_rate": 1.8791642448680633e-06, + "loss": 0.0513, + "num_input_tokens_seen": 33422688, + "step": 49590 + }, + { + "epoch": 1.2116141010920285, + "grad_norm": 8.377222061157227, + "learning_rate": 1.879123605286277e-06, + "loss": 0.0624, + "num_input_tokens_seen": 33425888, + "step": 49595 + }, + { + "epoch": 1.2117362519238757, + "grad_norm": 174.65496826171875, + "learning_rate": 1.8790829593112669e-06, + "loss": 0.0268, + "num_input_tokens_seen": 33428832, + "step": 49600 + }, + { + "epoch": 1.2118584027557229, + "grad_norm": 1.2196311950683594, + "learning_rate": 1.8790423069433294e-06, + "loss": 0.0843, + "num_input_tokens_seen": 33432672, + "step": 49605 + }, + { + "epoch": 1.2119805535875698, + "grad_norm": 0.6116811633110046, + "learning_rate": 1.8790016481827596e-06, + "loss": 0.1263, + "num_input_tokens_seen": 33435744, + "step": 49610 + }, + { + "epoch": 1.212102704419417, + "grad_norm": 1.9640523195266724, + "learning_rate": 1.8789609830298534e-06, + "loss": 0.0523, + "num_input_tokens_seen": 33439328, + "step": 49615 + }, + { + "epoch": 1.2122248552512642, + "grad_norm": 1.1018770933151245, + "learning_rate": 1.8789203114849067e-06, + "loss": 0.0036, + "num_input_tokens_seen": 33442912, + "step": 49620 + }, + { + "epoch": 1.2123470060831114, + "grad_norm": 0.18859164416790009, + "learning_rate": 1.8788796335482148e-06, + "loss": 0.0131, + "num_input_tokens_seen": 33446112, + "step": 49625 + }, + { + "epoch": 1.2124691569149586, + "grad_norm": 23.194774627685547, + "learning_rate": 1.878838949220074e-06, + "loss": 0.0871, + "num_input_tokens_seen": 33449568, + "step": 49630 + }, + { + "epoch": 1.2125913077468058, + "grad_norm": 15.930150985717773, + "learning_rate": 1.87879825850078e-06, + "loss": 0.1442, + "num_input_tokens_seen": 33452768, + "step": 49635 + }, + { + "epoch": 1.212713458578653, + "grad_norm": 0.10830620676279068, + "learning_rate": 1.8787575613906287e-06, + "loss": 0.0307, + "num_input_tokens_seen": 33455904, + "step": 49640 + }, + { + "epoch": 1.2128356094105002, + "grad_norm": 0.17682579159736633, + "learning_rate": 1.878716857889916e-06, + "loss": 0.0752, + "num_input_tokens_seen": 33459296, + "step": 49645 + }, + { + "epoch": 1.2129577602423471, + "grad_norm": 86.1319351196289, + "learning_rate": 1.878676147998938e-06, + "loss": 0.1513, + "num_input_tokens_seen": 33462752, + "step": 49650 + }, + { + "epoch": 1.2130799110741943, + "grad_norm": 44.84431457519531, + "learning_rate": 1.8786354317179906e-06, + "loss": 0.1706, + "num_input_tokens_seen": 33466528, + "step": 49655 + }, + { + "epoch": 1.2132020619060415, + "grad_norm": 79.0781021118164, + "learning_rate": 1.8785947090473702e-06, + "loss": 0.0216, + "num_input_tokens_seen": 33469856, + "step": 49660 + }, + { + "epoch": 1.2133242127378887, + "grad_norm": 1.6315191984176636, + "learning_rate": 1.8785539799873727e-06, + "loss": 0.1069, + "num_input_tokens_seen": 33473120, + "step": 49665 + }, + { + "epoch": 1.213446363569736, + "grad_norm": 0.07979770004749298, + "learning_rate": 1.8785132445382944e-06, + "loss": 0.1485, + "num_input_tokens_seen": 33476384, + "step": 49670 + }, + { + "epoch": 1.213568514401583, + "grad_norm": 0.18451182544231415, + "learning_rate": 1.8784725027004313e-06, + "loss": 0.0381, + "num_input_tokens_seen": 33480096, + "step": 49675 + }, + { + "epoch": 1.2136906652334303, + "grad_norm": 23.493776321411133, + "learning_rate": 1.87843175447408e-06, + "loss": 0.0876, + "num_input_tokens_seen": 33483232, + "step": 49680 + }, + { + "epoch": 1.2138128160652775, + "grad_norm": 0.05314216390252113, + "learning_rate": 1.8783909998595368e-06, + "loss": 0.0839, + "num_input_tokens_seen": 33486560, + "step": 49685 + }, + { + "epoch": 1.2139349668971247, + "grad_norm": 30.70671272277832, + "learning_rate": 1.8783502388570978e-06, + "loss": 0.1646, + "num_input_tokens_seen": 33490208, + "step": 49690 + }, + { + "epoch": 1.2140571177289718, + "grad_norm": 0.7101901173591614, + "learning_rate": 1.8783094714670597e-06, + "loss": 0.0009, + "num_input_tokens_seen": 33493856, + "step": 49695 + }, + { + "epoch": 1.2141792685608188, + "grad_norm": 0.2500157356262207, + "learning_rate": 1.8782686976897192e-06, + "loss": 0.0665, + "num_input_tokens_seen": 33497056, + "step": 49700 + }, + { + "epoch": 1.214301419392666, + "grad_norm": 44.201995849609375, + "learning_rate": 1.878227917525372e-06, + "loss": 0.0379, + "num_input_tokens_seen": 33500896, + "step": 49705 + }, + { + "epoch": 1.2144235702245132, + "grad_norm": 15.220165252685547, + "learning_rate": 1.8781871309743153e-06, + "loss": 0.1151, + "num_input_tokens_seen": 33504800, + "step": 49710 + }, + { + "epoch": 1.2145457210563604, + "grad_norm": 154.23687744140625, + "learning_rate": 1.8781463380368455e-06, + "loss": 0.1571, + "num_input_tokens_seen": 33508000, + "step": 49715 + }, + { + "epoch": 1.2146678718882076, + "grad_norm": 3.0431413650512695, + "learning_rate": 1.8781055387132598e-06, + "loss": 0.1452, + "num_input_tokens_seen": 33511200, + "step": 49720 + }, + { + "epoch": 1.2147900227200548, + "grad_norm": 55.84829330444336, + "learning_rate": 1.8780647330038541e-06, + "loss": 0.2403, + "num_input_tokens_seen": 33514464, + "step": 49725 + }, + { + "epoch": 1.214912173551902, + "grad_norm": 45.058982849121094, + "learning_rate": 1.8780239209089254e-06, + "loss": 0.1515, + "num_input_tokens_seen": 33517472, + "step": 49730 + }, + { + "epoch": 1.2150343243837491, + "grad_norm": 0.10691691935062408, + "learning_rate": 1.8779831024287706e-06, + "loss": 0.0479, + "num_input_tokens_seen": 33521312, + "step": 49735 + }, + { + "epoch": 1.215156475215596, + "grad_norm": 0.10126947611570358, + "learning_rate": 1.8779422775636869e-06, + "loss": 0.0816, + "num_input_tokens_seen": 33524896, + "step": 49740 + }, + { + "epoch": 1.2152786260474433, + "grad_norm": 130.59033203125, + "learning_rate": 1.8779014463139706e-06, + "loss": 0.0805, + "num_input_tokens_seen": 33528096, + "step": 49745 + }, + { + "epoch": 1.2154007768792905, + "grad_norm": 9.010315895080566, + "learning_rate": 1.877860608679919e-06, + "loss": 0.094, + "num_input_tokens_seen": 33531104, + "step": 49750 + }, + { + "epoch": 1.2155229277111377, + "grad_norm": 0.2826998233795166, + "learning_rate": 1.8778197646618285e-06, + "loss": 0.0978, + "num_input_tokens_seen": 33534496, + "step": 49755 + }, + { + "epoch": 1.2156450785429849, + "grad_norm": 0.1441763937473297, + "learning_rate": 1.8777789142599968e-06, + "loss": 0.096, + "num_input_tokens_seen": 33537696, + "step": 49760 + }, + { + "epoch": 1.215767229374832, + "grad_norm": 28.714641571044922, + "learning_rate": 1.8777380574747208e-06, + "loss": 0.2209, + "num_input_tokens_seen": 33541024, + "step": 49765 + }, + { + "epoch": 1.2158893802066792, + "grad_norm": 119.21171569824219, + "learning_rate": 1.8776971943062975e-06, + "loss": 0.1062, + "num_input_tokens_seen": 33544288, + "step": 49770 + }, + { + "epoch": 1.2160115310385264, + "grad_norm": 0.9597406983375549, + "learning_rate": 1.8776563247550242e-06, + "loss": 0.0532, + "num_input_tokens_seen": 33547360, + "step": 49775 + }, + { + "epoch": 1.2161336818703736, + "grad_norm": 0.5183383822441101, + "learning_rate": 1.877615448821198e-06, + "loss": 0.0779, + "num_input_tokens_seen": 33551520, + "step": 49780 + }, + { + "epoch": 1.2162558327022208, + "grad_norm": 72.81488037109375, + "learning_rate": 1.8775745665051161e-06, + "loss": 0.0497, + "num_input_tokens_seen": 33554528, + "step": 49785 + }, + { + "epoch": 1.2163779835340678, + "grad_norm": 0.13180294632911682, + "learning_rate": 1.8775336778070762e-06, + "loss": 0.0029, + "num_input_tokens_seen": 33557984, + "step": 49790 + }, + { + "epoch": 1.216500134365915, + "grad_norm": 0.5422255992889404, + "learning_rate": 1.877492782727375e-06, + "loss": 0.0614, + "num_input_tokens_seen": 33560928, + "step": 49795 + }, + { + "epoch": 1.2166222851977622, + "grad_norm": 0.05892868712544441, + "learning_rate": 1.8774518812663104e-06, + "loss": 0.1512, + "num_input_tokens_seen": 33564256, + "step": 49800 + }, + { + "epoch": 1.2167444360296094, + "grad_norm": 0.9378458261489868, + "learning_rate": 1.8774109734241798e-06, + "loss": 0.0401, + "num_input_tokens_seen": 33567584, + "step": 49805 + }, + { + "epoch": 1.2168665868614565, + "grad_norm": 140.070068359375, + "learning_rate": 1.8773700592012806e-06, + "loss": 0.0408, + "num_input_tokens_seen": 33570976, + "step": 49810 + }, + { + "epoch": 1.2169887376933037, + "grad_norm": 0.2291070520877838, + "learning_rate": 1.8773291385979104e-06, + "loss": 0.024, + "num_input_tokens_seen": 33573984, + "step": 49815 + }, + { + "epoch": 1.217110888525151, + "grad_norm": 0.6731608510017395, + "learning_rate": 1.8772882116143667e-06, + "loss": 0.1288, + "num_input_tokens_seen": 33577504, + "step": 49820 + }, + { + "epoch": 1.2172330393569981, + "grad_norm": 13.080459594726562, + "learning_rate": 1.8772472782509473e-06, + "loss": 0.1204, + "num_input_tokens_seen": 33580960, + "step": 49825 + }, + { + "epoch": 1.217355190188845, + "grad_norm": 0.11996249854564667, + "learning_rate": 1.8772063385079493e-06, + "loss": 0.1673, + "num_input_tokens_seen": 33584032, + "step": 49830 + }, + { + "epoch": 1.2174773410206923, + "grad_norm": 0.5631680488586426, + "learning_rate": 1.877165392385671e-06, + "loss": 0.0721, + "num_input_tokens_seen": 33587296, + "step": 49835 + }, + { + "epoch": 1.2175994918525395, + "grad_norm": 9.505610466003418, + "learning_rate": 1.8771244398844104e-06, + "loss": 0.0028, + "num_input_tokens_seen": 33590368, + "step": 49840 + }, + { + "epoch": 1.2177216426843867, + "grad_norm": 0.05609140172600746, + "learning_rate": 1.8770834810044646e-06, + "loss": 0.0344, + "num_input_tokens_seen": 33593696, + "step": 49845 + }, + { + "epoch": 1.2178437935162338, + "grad_norm": 0.06473961472511292, + "learning_rate": 1.8770425157461318e-06, + "loss": 0.0828, + "num_input_tokens_seen": 33597024, + "step": 49850 + }, + { + "epoch": 1.217965944348081, + "grad_norm": 0.9672479629516602, + "learning_rate": 1.8770015441097103e-06, + "loss": 0.0105, + "num_input_tokens_seen": 33600288, + "step": 49855 + }, + { + "epoch": 1.2180880951799282, + "grad_norm": 109.47171783447266, + "learning_rate": 1.8769605660954975e-06, + "loss": 0.2466, + "num_input_tokens_seen": 33603232, + "step": 49860 + }, + { + "epoch": 1.2182102460117754, + "grad_norm": 0.003171207383275032, + "learning_rate": 1.8769195817037916e-06, + "loss": 0.0755, + "num_input_tokens_seen": 33606688, + "step": 49865 + }, + { + "epoch": 1.2183323968436226, + "grad_norm": 11.40949821472168, + "learning_rate": 1.8768785909348904e-06, + "loss": 0.0893, + "num_input_tokens_seen": 33610080, + "step": 49870 + }, + { + "epoch": 1.2184545476754698, + "grad_norm": 0.20446684956550598, + "learning_rate": 1.8768375937890926e-06, + "loss": 0.0612, + "num_input_tokens_seen": 33613280, + "step": 49875 + }, + { + "epoch": 1.2185766985073168, + "grad_norm": 0.7266244888305664, + "learning_rate": 1.8767965902666956e-06, + "loss": 0.0436, + "num_input_tokens_seen": 33616224, + "step": 49880 + }, + { + "epoch": 1.218698849339164, + "grad_norm": 0.1490468531847, + "learning_rate": 1.8767555803679981e-06, + "loss": 0.1213, + "num_input_tokens_seen": 33619680, + "step": 49885 + }, + { + "epoch": 1.2188210001710111, + "grad_norm": 14.057254791259766, + "learning_rate": 1.8767145640932984e-06, + "loss": 0.1133, + "num_input_tokens_seen": 33622880, + "step": 49890 + }, + { + "epoch": 1.2189431510028583, + "grad_norm": 56.923553466796875, + "learning_rate": 1.8766735414428943e-06, + "loss": 0.0274, + "num_input_tokens_seen": 33626080, + "step": 49895 + }, + { + "epoch": 1.2190653018347055, + "grad_norm": 2.8939082622528076, + "learning_rate": 1.8766325124170845e-06, + "loss": 0.1154, + "num_input_tokens_seen": 33630048, + "step": 49900 + }, + { + "epoch": 1.2191874526665527, + "grad_norm": 58.96417999267578, + "learning_rate": 1.8765914770161676e-06, + "loss": 0.1555, + "num_input_tokens_seen": 33633376, + "step": 49905 + }, + { + "epoch": 1.2193096034984, + "grad_norm": 0.5027133226394653, + "learning_rate": 1.8765504352404414e-06, + "loss": 0.1089, + "num_input_tokens_seen": 33638944, + "step": 49910 + }, + { + "epoch": 1.2194317543302469, + "grad_norm": 10.49686050415039, + "learning_rate": 1.8765093870902046e-06, + "loss": 0.0729, + "num_input_tokens_seen": 33642208, + "step": 49915 + }, + { + "epoch": 1.219553905162094, + "grad_norm": 12.41897964477539, + "learning_rate": 1.8764683325657558e-06, + "loss": 0.0803, + "num_input_tokens_seen": 33646240, + "step": 49920 + }, + { + "epoch": 1.2196760559939412, + "grad_norm": 1.074639081954956, + "learning_rate": 1.8764272716673936e-06, + "loss": 0.0596, + "num_input_tokens_seen": 33649888, + "step": 49925 + }, + { + "epoch": 1.2197982068257884, + "grad_norm": 0.3886198401451111, + "learning_rate": 1.8763862043954167e-06, + "loss": 0.1279, + "num_input_tokens_seen": 33652768, + "step": 49930 + }, + { + "epoch": 1.2199203576576356, + "grad_norm": 0.15774321556091309, + "learning_rate": 1.8763451307501234e-06, + "loss": 0.03, + "num_input_tokens_seen": 33656288, + "step": 49935 + }, + { + "epoch": 1.2200425084894828, + "grad_norm": 0.2555408477783203, + "learning_rate": 1.8763040507318126e-06, + "loss": 0.014, + "num_input_tokens_seen": 33659616, + "step": 49940 + }, + { + "epoch": 1.22016465932133, + "grad_norm": 2.39106822013855, + "learning_rate": 1.8762629643407832e-06, + "loss": 0.1982, + "num_input_tokens_seen": 33662688, + "step": 49945 + }, + { + "epoch": 1.2202868101531772, + "grad_norm": 0.30551204085350037, + "learning_rate": 1.876221871577334e-06, + "loss": 0.0014, + "num_input_tokens_seen": 33666016, + "step": 49950 + }, + { + "epoch": 1.2204089609850244, + "grad_norm": 0.18218585848808289, + "learning_rate": 1.8761807724417633e-06, + "loss": 0.073, + "num_input_tokens_seen": 33669536, + "step": 49955 + }, + { + "epoch": 1.2205311118168716, + "grad_norm": 16.104793548583984, + "learning_rate": 1.8761396669343705e-06, + "loss": 0.0847, + "num_input_tokens_seen": 33672672, + "step": 49960 + }, + { + "epoch": 1.2206532626487188, + "grad_norm": 32.72917938232422, + "learning_rate": 1.8760985550554545e-06, + "loss": 0.0568, + "num_input_tokens_seen": 33675808, + "step": 49965 + }, + { + "epoch": 1.2207754134805657, + "grad_norm": 0.12076198309659958, + "learning_rate": 1.876057436805314e-06, + "loss": 0.0013, + "num_input_tokens_seen": 33678816, + "step": 49970 + }, + { + "epoch": 1.220897564312413, + "grad_norm": 1.4278993606567383, + "learning_rate": 1.8760163121842483e-06, + "loss": 0.0405, + "num_input_tokens_seen": 33682272, + "step": 49975 + }, + { + "epoch": 1.22101971514426, + "grad_norm": 0.31052538752555847, + "learning_rate": 1.8759751811925564e-06, + "loss": 0.2167, + "num_input_tokens_seen": 33685792, + "step": 49980 + }, + { + "epoch": 1.2211418659761073, + "grad_norm": 0.14995524287223816, + "learning_rate": 1.875934043830537e-06, + "loss": 0.1503, + "num_input_tokens_seen": 33689056, + "step": 49985 + }, + { + "epoch": 1.2212640168079545, + "grad_norm": 0.036245085299015045, + "learning_rate": 1.87589290009849e-06, + "loss": 0.1675, + "num_input_tokens_seen": 33693280, + "step": 49990 + }, + { + "epoch": 1.2213861676398017, + "grad_norm": 0.24092210829257965, + "learning_rate": 1.8758517499967144e-06, + "loss": 0.0654, + "num_input_tokens_seen": 33696800, + "step": 49995 + }, + { + "epoch": 1.2215083184716489, + "grad_norm": 17.42069435119629, + "learning_rate": 1.8758105935255089e-06, + "loss": 0.1313, + "num_input_tokens_seen": 33700320, + "step": 50000 + }, + { + "epoch": 1.2216304693034958, + "grad_norm": 179.08494567871094, + "learning_rate": 1.8757694306851732e-06, + "loss": 0.1726, + "num_input_tokens_seen": 33703776, + "step": 50005 + }, + { + "epoch": 1.221752620135343, + "grad_norm": 9.46633243560791, + "learning_rate": 1.8757282614760071e-06, + "loss": 0.0512, + "num_input_tokens_seen": 33707488, + "step": 50010 + }, + { + "epoch": 1.2218747709671902, + "grad_norm": 1.2339563369750977, + "learning_rate": 1.8756870858983089e-06, + "loss": 0.0313, + "num_input_tokens_seen": 33710496, + "step": 50015 + }, + { + "epoch": 1.2219969217990374, + "grad_norm": 0.23303188383579254, + "learning_rate": 1.8756459039523791e-06, + "loss": 0.079, + "num_input_tokens_seen": 33714720, + "step": 50020 + }, + { + "epoch": 1.2221190726308846, + "grad_norm": 0.16474978625774384, + "learning_rate": 1.8756047156385169e-06, + "loss": 0.002, + "num_input_tokens_seen": 33717984, + "step": 50025 + }, + { + "epoch": 1.2222412234627318, + "grad_norm": 34.933135986328125, + "learning_rate": 1.8755635209570213e-06, + "loss": 0.1796, + "num_input_tokens_seen": 33720928, + "step": 50030 + }, + { + "epoch": 1.222363374294579, + "grad_norm": 0.29329726099967957, + "learning_rate": 1.8755223199081924e-06, + "loss": 0.1181, + "num_input_tokens_seen": 33724576, + "step": 50035 + }, + { + "epoch": 1.2224855251264262, + "grad_norm": 0.19066888093948364, + "learning_rate": 1.8754811124923298e-06, + "loss": 0.0019, + "num_input_tokens_seen": 33728480, + "step": 50040 + }, + { + "epoch": 1.2226076759582734, + "grad_norm": 0.22548232972621918, + "learning_rate": 1.8754398987097331e-06, + "loss": 0.0013, + "num_input_tokens_seen": 33731744, + "step": 50045 + }, + { + "epoch": 1.2227298267901205, + "grad_norm": 12.361126899719238, + "learning_rate": 1.8753986785607019e-06, + "loss": 0.0598, + "num_input_tokens_seen": 33735584, + "step": 50050 + }, + { + "epoch": 1.2228519776219675, + "grad_norm": 17.38907241821289, + "learning_rate": 1.8753574520455362e-06, + "loss": 0.1249, + "num_input_tokens_seen": 33739552, + "step": 50055 + }, + { + "epoch": 1.2229741284538147, + "grad_norm": 0.934760570526123, + "learning_rate": 1.8753162191645354e-06, + "loss": 0.09, + "num_input_tokens_seen": 33742688, + "step": 50060 + }, + { + "epoch": 1.223096279285662, + "grad_norm": 0.1813424676656723, + "learning_rate": 1.8752749799179997e-06, + "loss": 0.0686, + "num_input_tokens_seen": 33746080, + "step": 50065 + }, + { + "epoch": 1.223218430117509, + "grad_norm": 0.2869721055030823, + "learning_rate": 1.8752337343062291e-06, + "loss": 0.1781, + "num_input_tokens_seen": 33749216, + "step": 50070 + }, + { + "epoch": 1.2233405809493563, + "grad_norm": 1.4320014715194702, + "learning_rate": 1.8751924823295232e-06, + "loss": 0.0901, + "num_input_tokens_seen": 33752864, + "step": 50075 + }, + { + "epoch": 1.2234627317812035, + "grad_norm": 0.3322947323322296, + "learning_rate": 1.8751512239881824e-06, + "loss": 0.0491, + "num_input_tokens_seen": 33756192, + "step": 50080 + }, + { + "epoch": 1.2235848826130507, + "grad_norm": 0.4504927694797516, + "learning_rate": 1.8751099592825063e-06, + "loss": 0.0625, + "num_input_tokens_seen": 33759456, + "step": 50085 + }, + { + "epoch": 1.2237070334448978, + "grad_norm": 39.807037353515625, + "learning_rate": 1.8750686882127952e-06, + "loss": 0.0798, + "num_input_tokens_seen": 33762912, + "step": 50090 + }, + { + "epoch": 1.2238291842767448, + "grad_norm": 0.16581711173057556, + "learning_rate": 1.8750274107793492e-06, + "loss": 0.0012, + "num_input_tokens_seen": 33766304, + "step": 50095 + }, + { + "epoch": 1.223951335108592, + "grad_norm": 15.672653198242188, + "learning_rate": 1.8749861269824688e-06, + "loss": 0.1302, + "num_input_tokens_seen": 33769696, + "step": 50100 + }, + { + "epoch": 1.2240734859404392, + "grad_norm": 0.11208673566579819, + "learning_rate": 1.8749448368224536e-06, + "loss": 0.0476, + "num_input_tokens_seen": 33773088, + "step": 50105 + }, + { + "epoch": 1.2241956367722864, + "grad_norm": 17.239974975585938, + "learning_rate": 1.8749035402996042e-06, + "loss": 0.0521, + "num_input_tokens_seen": 33776864, + "step": 50110 + }, + { + "epoch": 1.2243177876041336, + "grad_norm": 23.186294555664062, + "learning_rate": 1.8748622374142213e-06, + "loss": 0.1457, + "num_input_tokens_seen": 33780192, + "step": 50115 + }, + { + "epoch": 1.2244399384359808, + "grad_norm": 12.526515007019043, + "learning_rate": 1.8748209281666047e-06, + "loss": 0.108, + "num_input_tokens_seen": 33783712, + "step": 50120 + }, + { + "epoch": 1.224562089267828, + "grad_norm": 0.32619011402130127, + "learning_rate": 1.874779612557055e-06, + "loss": 0.0042, + "num_input_tokens_seen": 33787296, + "step": 50125 + }, + { + "epoch": 1.2246842400996751, + "grad_norm": 50.9199104309082, + "learning_rate": 1.8747382905858728e-06, + "loss": 0.1485, + "num_input_tokens_seen": 33790752, + "step": 50130 + }, + { + "epoch": 1.2248063909315223, + "grad_norm": 0.3081582188606262, + "learning_rate": 1.8746969622533584e-06, + "loss": 0.0507, + "num_input_tokens_seen": 33794272, + "step": 50135 + }, + { + "epoch": 1.2249285417633695, + "grad_norm": 0.18558557331562042, + "learning_rate": 1.8746556275598122e-06, + "loss": 0.0425, + "num_input_tokens_seen": 33797664, + "step": 50140 + }, + { + "epoch": 1.2250506925952165, + "grad_norm": 0.13867150247097015, + "learning_rate": 1.8746142865055353e-06, + "loss": 0.1256, + "num_input_tokens_seen": 33801312, + "step": 50145 + }, + { + "epoch": 1.2251728434270637, + "grad_norm": 0.007238840684294701, + "learning_rate": 1.8745729390908278e-06, + "loss": 0.0746, + "num_input_tokens_seen": 33804896, + "step": 50150 + }, + { + "epoch": 1.2252949942589109, + "grad_norm": 0.06682219356298447, + "learning_rate": 1.8745315853159909e-06, + "loss": 0.1385, + "num_input_tokens_seen": 33808928, + "step": 50155 + }, + { + "epoch": 1.225417145090758, + "grad_norm": 0.026519853621721268, + "learning_rate": 1.874490225181325e-06, + "loss": 0.0433, + "num_input_tokens_seen": 33811808, + "step": 50160 + }, + { + "epoch": 1.2255392959226052, + "grad_norm": 22.701797485351562, + "learning_rate": 1.874448858687131e-06, + "loss": 0.0743, + "num_input_tokens_seen": 33815072, + "step": 50165 + }, + { + "epoch": 1.2256614467544524, + "grad_norm": 0.12054687738418579, + "learning_rate": 1.8744074858337097e-06, + "loss": 0.0852, + "num_input_tokens_seen": 33818656, + "step": 50170 + }, + { + "epoch": 1.2257835975862996, + "grad_norm": 0.3371358811855316, + "learning_rate": 1.874366106621362e-06, + "loss": 0.0143, + "num_input_tokens_seen": 33821920, + "step": 50175 + }, + { + "epoch": 1.2259057484181468, + "grad_norm": 0.2913208603858948, + "learning_rate": 1.8743247210503887e-06, + "loss": 0.0373, + "num_input_tokens_seen": 33825248, + "step": 50180 + }, + { + "epoch": 1.2260278992499938, + "grad_norm": 0.15739411115646362, + "learning_rate": 1.874283329121091e-06, + "loss": 0.1028, + "num_input_tokens_seen": 33828768, + "step": 50185 + }, + { + "epoch": 1.226150050081841, + "grad_norm": 0.07163192331790924, + "learning_rate": 1.8742419308337695e-06, + "loss": 0.1173, + "num_input_tokens_seen": 33832288, + "step": 50190 + }, + { + "epoch": 1.2262722009136882, + "grad_norm": 8.579046249389648, + "learning_rate": 1.874200526188726e-06, + "loss": 0.0366, + "num_input_tokens_seen": 33835616, + "step": 50195 + }, + { + "epoch": 1.2263943517455353, + "grad_norm": 1.7747700214385986, + "learning_rate": 1.8741591151862607e-06, + "loss": 0.027, + "num_input_tokens_seen": 33839520, + "step": 50200 + }, + { + "epoch": 1.2265165025773825, + "grad_norm": 64.0686264038086, + "learning_rate": 1.8741176978266755e-06, + "loss": 0.0038, + "num_input_tokens_seen": 33842720, + "step": 50205 + }, + { + "epoch": 1.2266386534092297, + "grad_norm": 0.3116965889930725, + "learning_rate": 1.8740762741102709e-06, + "loss": 0.0015, + "num_input_tokens_seen": 33846240, + "step": 50210 + }, + { + "epoch": 1.226760804241077, + "grad_norm": 18.133525848388672, + "learning_rate": 1.874034844037349e-06, + "loss": 0.1725, + "num_input_tokens_seen": 33849824, + "step": 50215 + }, + { + "epoch": 1.226882955072924, + "grad_norm": 0.09997110813856125, + "learning_rate": 1.8739934076082102e-06, + "loss": 0.1997, + "num_input_tokens_seen": 33853024, + "step": 50220 + }, + { + "epoch": 1.2270051059047713, + "grad_norm": 0.17489533126354218, + "learning_rate": 1.8739519648231568e-06, + "loss": 0.0583, + "num_input_tokens_seen": 33856672, + "step": 50225 + }, + { + "epoch": 1.2271272567366185, + "grad_norm": 10.11457347869873, + "learning_rate": 1.8739105156824893e-06, + "loss": 0.0894, + "num_input_tokens_seen": 33859744, + "step": 50230 + }, + { + "epoch": 1.2272494075684655, + "grad_norm": 0.14010979235172272, + "learning_rate": 1.8738690601865094e-06, + "loss": 0.0098, + "num_input_tokens_seen": 33863200, + "step": 50235 + }, + { + "epoch": 1.2273715584003126, + "grad_norm": 0.6131026148796082, + "learning_rate": 1.8738275983355188e-06, + "loss": 0.1171, + "num_input_tokens_seen": 33866336, + "step": 50240 + }, + { + "epoch": 1.2274937092321598, + "grad_norm": 0.23327378928661346, + "learning_rate": 1.8737861301298189e-06, + "loss": 0.1202, + "num_input_tokens_seen": 33869728, + "step": 50245 + }, + { + "epoch": 1.227615860064007, + "grad_norm": 14.709436416625977, + "learning_rate": 1.8737446555697112e-06, + "loss": 0.0908, + "num_input_tokens_seen": 33873568, + "step": 50250 + }, + { + "epoch": 1.2277380108958542, + "grad_norm": 18.70151138305664, + "learning_rate": 1.8737031746554972e-06, + "loss": 0.1399, + "num_input_tokens_seen": 33877344, + "step": 50255 + }, + { + "epoch": 1.2278601617277014, + "grad_norm": 0.35863348841667175, + "learning_rate": 1.8736616873874788e-06, + "loss": 0.0458, + "num_input_tokens_seen": 33880672, + "step": 50260 + }, + { + "epoch": 1.2279823125595486, + "grad_norm": 0.9880019426345825, + "learning_rate": 1.8736201937659577e-06, + "loss": 0.0731, + "num_input_tokens_seen": 33884512, + "step": 50265 + }, + { + "epoch": 1.2281044633913958, + "grad_norm": 24.686311721801758, + "learning_rate": 1.8735786937912358e-06, + "loss": 0.0707, + "num_input_tokens_seen": 33888544, + "step": 50270 + }, + { + "epoch": 1.2282266142232428, + "grad_norm": 45.313541412353516, + "learning_rate": 1.8735371874636142e-06, + "loss": 0.0657, + "num_input_tokens_seen": 33891680, + "step": 50275 + }, + { + "epoch": 1.22834876505509, + "grad_norm": 30.227420806884766, + "learning_rate": 1.8734956747833955e-06, + "loss": 0.0042, + "num_input_tokens_seen": 33895584, + "step": 50280 + }, + { + "epoch": 1.2284709158869371, + "grad_norm": 0.12649253010749817, + "learning_rate": 1.8734541557508811e-06, + "loss": 0.044, + "num_input_tokens_seen": 33898784, + "step": 50285 + }, + { + "epoch": 1.2285930667187843, + "grad_norm": 0.6267743706703186, + "learning_rate": 1.8734126303663733e-06, + "loss": 0.1212, + "num_input_tokens_seen": 33902112, + "step": 50290 + }, + { + "epoch": 1.2287152175506315, + "grad_norm": 17.582807540893555, + "learning_rate": 1.873371098630174e-06, + "loss": 0.0733, + "num_input_tokens_seen": 33905312, + "step": 50295 + }, + { + "epoch": 1.2288373683824787, + "grad_norm": 27.228029251098633, + "learning_rate": 1.8733295605425852e-06, + "loss": 0.1261, + "num_input_tokens_seen": 33908832, + "step": 50300 + }, + { + "epoch": 1.228959519214326, + "grad_norm": 145.7957763671875, + "learning_rate": 1.8732880161039088e-06, + "loss": 0.1365, + "num_input_tokens_seen": 33912352, + "step": 50305 + }, + { + "epoch": 1.229081670046173, + "grad_norm": 4.750298500061035, + "learning_rate": 1.873246465314447e-06, + "loss": 0.0527, + "num_input_tokens_seen": 33915360, + "step": 50310 + }, + { + "epoch": 1.2292038208780203, + "grad_norm": 0.14604468643665314, + "learning_rate": 1.873204908174502e-06, + "loss": 0.1051, + "num_input_tokens_seen": 33919456, + "step": 50315 + }, + { + "epoch": 1.2293259717098675, + "grad_norm": 41.39289855957031, + "learning_rate": 1.8731633446843765e-06, + "loss": 0.2591, + "num_input_tokens_seen": 33922464, + "step": 50320 + }, + { + "epoch": 1.2294481225417144, + "grad_norm": 21.35294532775879, + "learning_rate": 1.873121774844372e-06, + "loss": 0.1767, + "num_input_tokens_seen": 33925408, + "step": 50325 + }, + { + "epoch": 1.2295702733735616, + "grad_norm": 0.12231894582509995, + "learning_rate": 1.873080198654791e-06, + "loss": 0.0073, + "num_input_tokens_seen": 33928992, + "step": 50330 + }, + { + "epoch": 1.2296924242054088, + "grad_norm": 0.5598852038383484, + "learning_rate": 1.873038616115936e-06, + "loss": 0.0729, + "num_input_tokens_seen": 33932448, + "step": 50335 + }, + { + "epoch": 1.229814575037256, + "grad_norm": 103.16210174560547, + "learning_rate": 1.8729970272281092e-06, + "loss": 0.0924, + "num_input_tokens_seen": 33935776, + "step": 50340 + }, + { + "epoch": 1.2299367258691032, + "grad_norm": 0.37978076934814453, + "learning_rate": 1.8729554319916137e-06, + "loss": 0.0866, + "num_input_tokens_seen": 33939168, + "step": 50345 + }, + { + "epoch": 1.2300588767009504, + "grad_norm": 27.279321670532227, + "learning_rate": 1.872913830406751e-06, + "loss": 0.0694, + "num_input_tokens_seen": 33942432, + "step": 50350 + }, + { + "epoch": 1.2301810275327976, + "grad_norm": 0.4356905221939087, + "learning_rate": 1.8728722224738244e-06, + "loss": 0.1368, + "num_input_tokens_seen": 33945376, + "step": 50355 + }, + { + "epoch": 1.2303031783646448, + "grad_norm": 0.22323796153068542, + "learning_rate": 1.8728306081931362e-06, + "loss": 0.048, + "num_input_tokens_seen": 33948384, + "step": 50360 + }, + { + "epoch": 1.2304253291964917, + "grad_norm": 0.5146710276603699, + "learning_rate": 1.8727889875649892e-06, + "loss": 0.0403, + "num_input_tokens_seen": 33951520, + "step": 50365 + }, + { + "epoch": 1.230547480028339, + "grad_norm": 0.6455696821212769, + "learning_rate": 1.8727473605896856e-06, + "loss": 0.1404, + "num_input_tokens_seen": 33955040, + "step": 50370 + }, + { + "epoch": 1.230669630860186, + "grad_norm": 0.19283178448677063, + "learning_rate": 1.8727057272675286e-06, + "loss": 0.0039, + "num_input_tokens_seen": 33958304, + "step": 50375 + }, + { + "epoch": 1.2307917816920333, + "grad_norm": 14.63194465637207, + "learning_rate": 1.8726640875988209e-06, + "loss": 0.1388, + "num_input_tokens_seen": 33961888, + "step": 50380 + }, + { + "epoch": 1.2309139325238805, + "grad_norm": 0.23423582315444946, + "learning_rate": 1.8726224415838652e-06, + "loss": 0.1215, + "num_input_tokens_seen": 33965152, + "step": 50385 + }, + { + "epoch": 1.2310360833557277, + "grad_norm": 0.20280465483665466, + "learning_rate": 1.8725807892229644e-06, + "loss": 0.0899, + "num_input_tokens_seen": 33969184, + "step": 50390 + }, + { + "epoch": 1.2311582341875749, + "grad_norm": 0.10060182958841324, + "learning_rate": 1.8725391305164213e-06, + "loss": 0.0357, + "num_input_tokens_seen": 33972320, + "step": 50395 + }, + { + "epoch": 1.231280385019422, + "grad_norm": 0.036975398659706116, + "learning_rate": 1.8724974654645392e-06, + "loss": 0.0211, + "num_input_tokens_seen": 33975776, + "step": 50400 + }, + { + "epoch": 1.2314025358512692, + "grad_norm": 0.09926356375217438, + "learning_rate": 1.8724557940676206e-06, + "loss": 0.0521, + "num_input_tokens_seen": 33978720, + "step": 50405 + }, + { + "epoch": 1.2315246866831164, + "grad_norm": 0.05969943851232529, + "learning_rate": 1.872414116325969e-06, + "loss": 0.0016, + "num_input_tokens_seen": 33982048, + "step": 50410 + }, + { + "epoch": 1.2316468375149634, + "grad_norm": 57.690975189208984, + "learning_rate": 1.8723724322398874e-06, + "loss": 0.1, + "num_input_tokens_seen": 33984928, + "step": 50415 + }, + { + "epoch": 1.2317689883468106, + "grad_norm": 0.20247139036655426, + "learning_rate": 1.8723307418096782e-06, + "loss": 0.0651, + "num_input_tokens_seen": 33988128, + "step": 50420 + }, + { + "epoch": 1.2318911391786578, + "grad_norm": 0.06691229343414307, + "learning_rate": 1.8722890450356457e-06, + "loss": 0.1015, + "num_input_tokens_seen": 33991456, + "step": 50425 + }, + { + "epoch": 1.232013290010505, + "grad_norm": 0.37158066034317017, + "learning_rate": 1.8722473419180926e-06, + "loss": 0.0219, + "num_input_tokens_seen": 33994848, + "step": 50430 + }, + { + "epoch": 1.2321354408423522, + "grad_norm": 40.26687240600586, + "learning_rate": 1.8722056324573226e-06, + "loss": 0.11, + "num_input_tokens_seen": 33997984, + "step": 50435 + }, + { + "epoch": 1.2322575916741993, + "grad_norm": 22.51052474975586, + "learning_rate": 1.872163916653638e-06, + "loss": 0.0329, + "num_input_tokens_seen": 34001248, + "step": 50440 + }, + { + "epoch": 1.2323797425060465, + "grad_norm": 9.003214836120605, + "learning_rate": 1.8721221945073432e-06, + "loss": 0.0817, + "num_input_tokens_seen": 34005088, + "step": 50445 + }, + { + "epoch": 1.2325018933378937, + "grad_norm": 20.297090530395508, + "learning_rate": 1.872080466018741e-06, + "loss": 0.1316, + "num_input_tokens_seen": 34008544, + "step": 50450 + }, + { + "epoch": 1.2326240441697407, + "grad_norm": 38.89817810058594, + "learning_rate": 1.8720387311881352e-06, + "loss": 0.1039, + "num_input_tokens_seen": 34012192, + "step": 50455 + }, + { + "epoch": 1.2327461950015879, + "grad_norm": 1.270237684249878, + "learning_rate": 1.8719969900158293e-06, + "loss": 0.115, + "num_input_tokens_seen": 34015584, + "step": 50460 + }, + { + "epoch": 1.232868345833435, + "grad_norm": 0.2633684277534485, + "learning_rate": 1.8719552425021265e-06, + "loss": 0.0018, + "num_input_tokens_seen": 34018720, + "step": 50465 + }, + { + "epoch": 1.2329904966652823, + "grad_norm": 22.630613327026367, + "learning_rate": 1.8719134886473308e-06, + "loss": 0.0622, + "num_input_tokens_seen": 34022176, + "step": 50470 + }, + { + "epoch": 1.2331126474971295, + "grad_norm": 60.35623550415039, + "learning_rate": 1.8718717284517455e-06, + "loss": 0.029, + "num_input_tokens_seen": 34025248, + "step": 50475 + }, + { + "epoch": 1.2332347983289766, + "grad_norm": 0.048686183989048004, + "learning_rate": 1.871829961915675e-06, + "loss": 0.1482, + "num_input_tokens_seen": 34028704, + "step": 50480 + }, + { + "epoch": 1.2333569491608238, + "grad_norm": 0.037194494158029556, + "learning_rate": 1.871788189039422e-06, + "loss": 0.046, + "num_input_tokens_seen": 34032416, + "step": 50485 + }, + { + "epoch": 1.233479099992671, + "grad_norm": 60.3780632019043, + "learning_rate": 1.8717464098232912e-06, + "loss": 0.055, + "num_input_tokens_seen": 34036704, + "step": 50490 + }, + { + "epoch": 1.2336012508245182, + "grad_norm": 8.005125999450684, + "learning_rate": 1.8717046242675858e-06, + "loss": 0.1558, + "num_input_tokens_seen": 34039776, + "step": 50495 + }, + { + "epoch": 1.2337234016563654, + "grad_norm": 0.16520294547080994, + "learning_rate": 1.8716628323726099e-06, + "loss": 0.0559, + "num_input_tokens_seen": 34043040, + "step": 50500 + }, + { + "epoch": 1.2338455524882124, + "grad_norm": 22.122344970703125, + "learning_rate": 1.8716210341386676e-06, + "loss": 0.0497, + "num_input_tokens_seen": 34046048, + "step": 50505 + }, + { + "epoch": 1.2339677033200596, + "grad_norm": 0.7964180707931519, + "learning_rate": 1.8715792295660623e-06, + "loss": 0.1009, + "num_input_tokens_seen": 34049696, + "step": 50510 + }, + { + "epoch": 1.2340898541519068, + "grad_norm": 14.673161506652832, + "learning_rate": 1.8715374186550989e-06, + "loss": 0.1194, + "num_input_tokens_seen": 34052768, + "step": 50515 + }, + { + "epoch": 1.234212004983754, + "grad_norm": 0.02644650824368, + "learning_rate": 1.8714956014060808e-06, + "loss": 0.1266, + "num_input_tokens_seen": 34056096, + "step": 50520 + }, + { + "epoch": 1.2343341558156011, + "grad_norm": 0.41322630643844604, + "learning_rate": 1.8714537778193122e-06, + "loss": 0.1359, + "num_input_tokens_seen": 34059552, + "step": 50525 + }, + { + "epoch": 1.2344563066474483, + "grad_norm": 0.3122689723968506, + "learning_rate": 1.8714119478950974e-06, + "loss": 0.0233, + "num_input_tokens_seen": 34063136, + "step": 50530 + }, + { + "epoch": 1.2345784574792955, + "grad_norm": 0.4729823172092438, + "learning_rate": 1.8713701116337406e-06, + "loss": 0.1906, + "num_input_tokens_seen": 34066464, + "step": 50535 + }, + { + "epoch": 1.2347006083111425, + "grad_norm": 0.9967532753944397, + "learning_rate": 1.8713282690355459e-06, + "loss": 0.0603, + "num_input_tokens_seen": 34070112, + "step": 50540 + }, + { + "epoch": 1.2348227591429897, + "grad_norm": 0.2099456489086151, + "learning_rate": 1.8712864201008175e-06, + "loss": 0.0033, + "num_input_tokens_seen": 34073568, + "step": 50545 + }, + { + "epoch": 1.2349449099748369, + "grad_norm": 0.5129920840263367, + "learning_rate": 1.87124456482986e-06, + "loss": 0.1586, + "num_input_tokens_seen": 34076896, + "step": 50550 + }, + { + "epoch": 1.235067060806684, + "grad_norm": 17.669719696044922, + "learning_rate": 1.8712027032229778e-06, + "loss": 0.0601, + "num_input_tokens_seen": 34080160, + "step": 50555 + }, + { + "epoch": 1.2351892116385312, + "grad_norm": 0.3886941969394684, + "learning_rate": 1.8711608352804754e-06, + "loss": 0.1422, + "num_input_tokens_seen": 34083808, + "step": 50560 + }, + { + "epoch": 1.2353113624703784, + "grad_norm": 28.27507781982422, + "learning_rate": 1.8711189610026568e-06, + "loss": 0.1348, + "num_input_tokens_seen": 34087584, + "step": 50565 + }, + { + "epoch": 1.2354335133022256, + "grad_norm": 0.2292449027299881, + "learning_rate": 1.8710770803898268e-06, + "loss": 0.0303, + "num_input_tokens_seen": 34091040, + "step": 50570 + }, + { + "epoch": 1.2355556641340728, + "grad_norm": 0.25094738602638245, + "learning_rate": 1.8710351934422901e-06, + "loss": 0.0039, + "num_input_tokens_seen": 34095008, + "step": 50575 + }, + { + "epoch": 1.23567781496592, + "grad_norm": 124.43775939941406, + "learning_rate": 1.870993300160351e-06, + "loss": 0.0217, + "num_input_tokens_seen": 34098208, + "step": 50580 + }, + { + "epoch": 1.2357999657977672, + "grad_norm": 78.6186752319336, + "learning_rate": 1.8709514005443149e-06, + "loss": 0.0245, + "num_input_tokens_seen": 34101344, + "step": 50585 + }, + { + "epoch": 1.2359221166296142, + "grad_norm": 155.6714324951172, + "learning_rate": 1.8709094945944855e-06, + "loss": 0.0373, + "num_input_tokens_seen": 34104480, + "step": 50590 + }, + { + "epoch": 1.2360442674614613, + "grad_norm": 0.1335565149784088, + "learning_rate": 1.870867582311168e-06, + "loss": 0.0477, + "num_input_tokens_seen": 34107744, + "step": 50595 + }, + { + "epoch": 1.2361664182933085, + "grad_norm": 0.07351504266262054, + "learning_rate": 1.8708256636946671e-06, + "loss": 0.0901, + "num_input_tokens_seen": 34111072, + "step": 50600 + }, + { + "epoch": 1.2362885691251557, + "grad_norm": 0.7340015769004822, + "learning_rate": 1.870783738745288e-06, + "loss": 0.0008, + "num_input_tokens_seen": 34114912, + "step": 50605 + }, + { + "epoch": 1.236410719957003, + "grad_norm": 8.105185508728027, + "learning_rate": 1.8707418074633354e-06, + "loss": 0.1406, + "num_input_tokens_seen": 34118496, + "step": 50610 + }, + { + "epoch": 1.23653287078885, + "grad_norm": 0.1442503035068512, + "learning_rate": 1.870699869849114e-06, + "loss": 0.0007, + "num_input_tokens_seen": 34121696, + "step": 50615 + }, + { + "epoch": 1.2366550216206973, + "grad_norm": 0.697321355342865, + "learning_rate": 1.870657925902929e-06, + "loss": 0.0469, + "num_input_tokens_seen": 34124704, + "step": 50620 + }, + { + "epoch": 1.2367771724525445, + "grad_norm": 0.26186203956604004, + "learning_rate": 1.8706159756250855e-06, + "loss": 0.076, + "num_input_tokens_seen": 34128032, + "step": 50625 + }, + { + "epoch": 1.2368993232843914, + "grad_norm": 34.05559539794922, + "learning_rate": 1.8705740190158882e-06, + "loss": 0.1679, + "num_input_tokens_seen": 34131680, + "step": 50630 + }, + { + "epoch": 1.2370214741162386, + "grad_norm": 15.350863456726074, + "learning_rate": 1.8705320560756425e-06, + "loss": 0.1274, + "num_input_tokens_seen": 34134816, + "step": 50635 + }, + { + "epoch": 1.2371436249480858, + "grad_norm": 0.6870012283325195, + "learning_rate": 1.8704900868046537e-06, + "loss": 0.1526, + "num_input_tokens_seen": 34138272, + "step": 50640 + }, + { + "epoch": 1.237265775779933, + "grad_norm": 20.827638626098633, + "learning_rate": 1.8704481112032272e-06, + "loss": 0.2716, + "num_input_tokens_seen": 34141792, + "step": 50645 + }, + { + "epoch": 1.2373879266117802, + "grad_norm": 11.061613082885742, + "learning_rate": 1.8704061292716672e-06, + "loss": 0.0997, + "num_input_tokens_seen": 34145312, + "step": 50650 + }, + { + "epoch": 1.2375100774436274, + "grad_norm": 0.1646193563938141, + "learning_rate": 1.8703641410102802e-06, + "loss": 0.0765, + "num_input_tokens_seen": 34148704, + "step": 50655 + }, + { + "epoch": 1.2376322282754746, + "grad_norm": 0.5354453921318054, + "learning_rate": 1.8703221464193709e-06, + "loss": 0.0349, + "num_input_tokens_seen": 34151840, + "step": 50660 + }, + { + "epoch": 1.2377543791073218, + "grad_norm": 0.34802761673927307, + "learning_rate": 1.8702801454992448e-06, + "loss": 0.0264, + "num_input_tokens_seen": 34154848, + "step": 50665 + }, + { + "epoch": 1.237876529939169, + "grad_norm": 0.7380416393280029, + "learning_rate": 1.8702381382502076e-06, + "loss": 0.0021, + "num_input_tokens_seen": 34158752, + "step": 50670 + }, + { + "epoch": 1.2379986807710162, + "grad_norm": 0.05595047399401665, + "learning_rate": 1.8701961246725643e-06, + "loss": 0.0724, + "num_input_tokens_seen": 34161888, + "step": 50675 + }, + { + "epoch": 1.2381208316028631, + "grad_norm": 4.639143943786621, + "learning_rate": 1.870154104766621e-06, + "loss": 0.0506, + "num_input_tokens_seen": 34165280, + "step": 50680 + }, + { + "epoch": 1.2382429824347103, + "grad_norm": 14.277225494384766, + "learning_rate": 1.870112078532683e-06, + "loss": 0.1808, + "num_input_tokens_seen": 34168608, + "step": 50685 + }, + { + "epoch": 1.2383651332665575, + "grad_norm": 0.036859460175037384, + "learning_rate": 1.870070045971056e-06, + "loss": 0.1062, + "num_input_tokens_seen": 34171872, + "step": 50690 + }, + { + "epoch": 1.2384872840984047, + "grad_norm": 0.103855200111866, + "learning_rate": 1.870028007082045e-06, + "loss": 0.1096, + "num_input_tokens_seen": 34175520, + "step": 50695 + }, + { + "epoch": 1.2386094349302519, + "grad_norm": 0.5549172163009644, + "learning_rate": 1.869985961865957e-06, + "loss": 0.1077, + "num_input_tokens_seen": 34178592, + "step": 50700 + }, + { + "epoch": 1.238731585762099, + "grad_norm": 0.07171110808849335, + "learning_rate": 1.869943910323097e-06, + "loss": 0.1536, + "num_input_tokens_seen": 34182048, + "step": 50705 + }, + { + "epoch": 1.2388537365939463, + "grad_norm": 14.18834114074707, + "learning_rate": 1.8699018524537706e-06, + "loss": 0.201, + "num_input_tokens_seen": 34186016, + "step": 50710 + }, + { + "epoch": 1.2389758874257935, + "grad_norm": 0.6210215091705322, + "learning_rate": 1.8698597882582842e-06, + "loss": 0.079, + "num_input_tokens_seen": 34189792, + "step": 50715 + }, + { + "epoch": 1.2390980382576404, + "grad_norm": 0.7056021690368652, + "learning_rate": 1.8698177177369433e-06, + "loss": 0.1121, + "num_input_tokens_seen": 34193312, + "step": 50720 + }, + { + "epoch": 1.2392201890894876, + "grad_norm": 0.4129893183708191, + "learning_rate": 1.869775640890054e-06, + "loss": 0.0983, + "num_input_tokens_seen": 34196960, + "step": 50725 + }, + { + "epoch": 1.2393423399213348, + "grad_norm": 4.039682388305664, + "learning_rate": 1.8697335577179226e-06, + "loss": 0.0083, + "num_input_tokens_seen": 34200288, + "step": 50730 + }, + { + "epoch": 1.239464490753182, + "grad_norm": 28.408931732177734, + "learning_rate": 1.8696914682208544e-06, + "loss": 0.0796, + "num_input_tokens_seen": 34203296, + "step": 50735 + }, + { + "epoch": 1.2395866415850292, + "grad_norm": 0.11473660171031952, + "learning_rate": 1.8696493723991562e-06, + "loss": 0.0923, + "num_input_tokens_seen": 34206688, + "step": 50740 + }, + { + "epoch": 1.2397087924168764, + "grad_norm": 0.5018324851989746, + "learning_rate": 1.8696072702531339e-06, + "loss": 0.0365, + "num_input_tokens_seen": 34209760, + "step": 50745 + }, + { + "epoch": 1.2398309432487236, + "grad_norm": 30.176328659057617, + "learning_rate": 1.8695651617830934e-06, + "loss": 0.0933, + "num_input_tokens_seen": 34213152, + "step": 50750 + }, + { + "epoch": 1.2399530940805708, + "grad_norm": 0.46327489614486694, + "learning_rate": 1.8695230469893413e-06, + "loss": 0.1318, + "num_input_tokens_seen": 34216224, + "step": 50755 + }, + { + "epoch": 1.240075244912418, + "grad_norm": 0.4507075846195221, + "learning_rate": 1.8694809258721835e-06, + "loss": 0.0015, + "num_input_tokens_seen": 34219552, + "step": 50760 + }, + { + "epoch": 1.2401973957442651, + "grad_norm": 0.1781848669052124, + "learning_rate": 1.8694387984319268e-06, + "loss": 0.0057, + "num_input_tokens_seen": 34222624, + "step": 50765 + }, + { + "epoch": 1.240319546576112, + "grad_norm": 0.054551366716623306, + "learning_rate": 1.8693966646688774e-06, + "loss": 0.0006, + "num_input_tokens_seen": 34225504, + "step": 50770 + }, + { + "epoch": 1.2404416974079593, + "grad_norm": 0.09252890199422836, + "learning_rate": 1.8693545245833415e-06, + "loss": 0.0929, + "num_input_tokens_seen": 34228576, + "step": 50775 + }, + { + "epoch": 1.2405638482398065, + "grad_norm": 24.160945892333984, + "learning_rate": 1.8693123781756258e-06, + "loss": 0.1348, + "num_input_tokens_seen": 34231904, + "step": 50780 + }, + { + "epoch": 1.2406859990716537, + "grad_norm": 0.16220663487911224, + "learning_rate": 1.8692702254460363e-06, + "loss": 0.0009, + "num_input_tokens_seen": 34235040, + "step": 50785 + }, + { + "epoch": 1.2408081499035009, + "grad_norm": 0.23448047041893005, + "learning_rate": 1.8692280663948802e-06, + "loss": 0.0466, + "num_input_tokens_seen": 34238816, + "step": 50790 + }, + { + "epoch": 1.240930300735348, + "grad_norm": 46.69455337524414, + "learning_rate": 1.8691859010224636e-06, + "loss": 0.2587, + "num_input_tokens_seen": 34242208, + "step": 50795 + }, + { + "epoch": 1.2410524515671952, + "grad_norm": 0.009700641967356205, + "learning_rate": 1.8691437293290936e-06, + "loss": 0.0009, + "num_input_tokens_seen": 34245344, + "step": 50800 + }, + { + "epoch": 1.2411746023990424, + "grad_norm": 71.18877410888672, + "learning_rate": 1.8691015513150766e-06, + "loss": 0.0756, + "num_input_tokens_seen": 34248544, + "step": 50805 + }, + { + "epoch": 1.2412967532308894, + "grad_norm": 0.861115574836731, + "learning_rate": 1.8690593669807191e-06, + "loss": 0.1529, + "num_input_tokens_seen": 34251808, + "step": 50810 + }, + { + "epoch": 1.2414189040627366, + "grad_norm": 31.997013092041016, + "learning_rate": 1.8690171763263284e-06, + "loss": 0.093, + "num_input_tokens_seen": 34255328, + "step": 50815 + }, + { + "epoch": 1.2415410548945838, + "grad_norm": 43.22612762451172, + "learning_rate": 1.868974979352211e-06, + "loss": 0.1919, + "num_input_tokens_seen": 34258528, + "step": 50820 + }, + { + "epoch": 1.241663205726431, + "grad_norm": 26.389102935791016, + "learning_rate": 1.8689327760586737e-06, + "loss": 0.0842, + "num_input_tokens_seen": 34261920, + "step": 50825 + }, + { + "epoch": 1.2417853565582782, + "grad_norm": 5.921286106109619, + "learning_rate": 1.8688905664460237e-06, + "loss": 0.0521, + "num_input_tokens_seen": 34264992, + "step": 50830 + }, + { + "epoch": 1.2419075073901253, + "grad_norm": 0.04653450474143028, + "learning_rate": 1.8688483505145677e-06, + "loss": 0.0758, + "num_input_tokens_seen": 34268128, + "step": 50835 + }, + { + "epoch": 1.2420296582219725, + "grad_norm": 0.05636782944202423, + "learning_rate": 1.8688061282646129e-06, + "loss": 0.0313, + "num_input_tokens_seen": 34271200, + "step": 50840 + }, + { + "epoch": 1.2421518090538197, + "grad_norm": 0.5799795985221863, + "learning_rate": 1.868763899696466e-06, + "loss": 0.104, + "num_input_tokens_seen": 34274400, + "step": 50845 + }, + { + "epoch": 1.242273959885667, + "grad_norm": 14.093242645263672, + "learning_rate": 1.8687216648104344e-06, + "loss": 0.1647, + "num_input_tokens_seen": 34278240, + "step": 50850 + }, + { + "epoch": 1.242396110717514, + "grad_norm": 36.90916442871094, + "learning_rate": 1.8686794236068254e-06, + "loss": 0.1633, + "num_input_tokens_seen": 34281440, + "step": 50855 + }, + { + "epoch": 1.242518261549361, + "grad_norm": 15.721162796020508, + "learning_rate": 1.8686371760859458e-06, + "loss": 0.0966, + "num_input_tokens_seen": 34284576, + "step": 50860 + }, + { + "epoch": 1.2426404123812083, + "grad_norm": 87.52764129638672, + "learning_rate": 1.8685949222481034e-06, + "loss": 0.1385, + "num_input_tokens_seen": 34288032, + "step": 50865 + }, + { + "epoch": 1.2427625632130554, + "grad_norm": 0.8054718971252441, + "learning_rate": 1.8685526620936048e-06, + "loss": 0.0023, + "num_input_tokens_seen": 34291296, + "step": 50870 + }, + { + "epoch": 1.2428847140449026, + "grad_norm": 1.4423446655273438, + "learning_rate": 1.8685103956227578e-06, + "loss": 0.0586, + "num_input_tokens_seen": 34294496, + "step": 50875 + }, + { + "epoch": 1.2430068648767498, + "grad_norm": 49.275943756103516, + "learning_rate": 1.8684681228358694e-06, + "loss": 0.1438, + "num_input_tokens_seen": 34297632, + "step": 50880 + }, + { + "epoch": 1.243129015708597, + "grad_norm": 1.6575486660003662, + "learning_rate": 1.8684258437332472e-06, + "loss": 0.2076, + "num_input_tokens_seen": 34301280, + "step": 50885 + }, + { + "epoch": 1.2432511665404442, + "grad_norm": 0.05155455693602562, + "learning_rate": 1.8683835583151986e-06, + "loss": 0.0412, + "num_input_tokens_seen": 34304992, + "step": 50890 + }, + { + "epoch": 1.2433733173722914, + "grad_norm": 0.618511438369751, + "learning_rate": 1.8683412665820314e-06, + "loss": 0.0577, + "num_input_tokens_seen": 34308448, + "step": 50895 + }, + { + "epoch": 1.2434954682041384, + "grad_norm": 66.2889175415039, + "learning_rate": 1.868298968534053e-06, + "loss": 0.2242, + "num_input_tokens_seen": 34312160, + "step": 50900 + }, + { + "epoch": 1.2436176190359856, + "grad_norm": 54.50128936767578, + "learning_rate": 1.8682566641715709e-06, + "loss": 0.1221, + "num_input_tokens_seen": 34315680, + "step": 50905 + }, + { + "epoch": 1.2437397698678327, + "grad_norm": 1.51670503616333, + "learning_rate": 1.8682143534948928e-06, + "loss": 0.0311, + "num_input_tokens_seen": 34319008, + "step": 50910 + }, + { + "epoch": 1.24386192069968, + "grad_norm": 137.47225952148438, + "learning_rate": 1.8681720365043263e-06, + "loss": 0.0355, + "num_input_tokens_seen": 34322400, + "step": 50915 + }, + { + "epoch": 1.2439840715315271, + "grad_norm": 0.07625175267457962, + "learning_rate": 1.8681297132001794e-06, + "loss": 0.0827, + "num_input_tokens_seen": 34325984, + "step": 50920 + }, + { + "epoch": 1.2441062223633743, + "grad_norm": 0.4132697880268097, + "learning_rate": 1.8680873835827598e-06, + "loss": 0.0321, + "num_input_tokens_seen": 34329440, + "step": 50925 + }, + { + "epoch": 1.2442283731952215, + "grad_norm": 0.17457693815231323, + "learning_rate": 1.8680450476523748e-06, + "loss": 0.05, + "num_input_tokens_seen": 34332704, + "step": 50930 + }, + { + "epoch": 1.2443505240270687, + "grad_norm": 0.5495626330375671, + "learning_rate": 1.8680027054093332e-06, + "loss": 0.0462, + "num_input_tokens_seen": 34336032, + "step": 50935 + }, + { + "epoch": 1.2444726748589159, + "grad_norm": 8.839376449584961, + "learning_rate": 1.8679603568539423e-06, + "loss": 0.0424, + "num_input_tokens_seen": 34339040, + "step": 50940 + }, + { + "epoch": 1.244594825690763, + "grad_norm": 14.500417709350586, + "learning_rate": 1.8679180019865102e-06, + "loss": 0.2089, + "num_input_tokens_seen": 34342240, + "step": 50945 + }, + { + "epoch": 1.24471697652261, + "grad_norm": 0.2148173600435257, + "learning_rate": 1.867875640807345e-06, + "loss": 0.0468, + "num_input_tokens_seen": 34345696, + "step": 50950 + }, + { + "epoch": 1.2448391273544572, + "grad_norm": 0.01791485585272312, + "learning_rate": 1.8678332733167546e-06, + "loss": 0.0874, + "num_input_tokens_seen": 34349216, + "step": 50955 + }, + { + "epoch": 1.2449612781863044, + "grad_norm": 26.81791114807129, + "learning_rate": 1.8677908995150475e-06, + "loss": 0.1386, + "num_input_tokens_seen": 34352416, + "step": 50960 + }, + { + "epoch": 1.2450834290181516, + "grad_norm": 0.6094515323638916, + "learning_rate": 1.8677485194025313e-06, + "loss": 0.0869, + "num_input_tokens_seen": 34356000, + "step": 50965 + }, + { + "epoch": 1.2452055798499988, + "grad_norm": 168.68463134765625, + "learning_rate": 1.8677061329795145e-06, + "loss": 0.0388, + "num_input_tokens_seen": 34359712, + "step": 50970 + }, + { + "epoch": 1.245327730681846, + "grad_norm": 19.0019588470459, + "learning_rate": 1.8676637402463054e-06, + "loss": 0.1767, + "num_input_tokens_seen": 34363040, + "step": 50975 + }, + { + "epoch": 1.2454498815136932, + "grad_norm": 21.750730514526367, + "learning_rate": 1.867621341203212e-06, + "loss": 0.0821, + "num_input_tokens_seen": 34366176, + "step": 50980 + }, + { + "epoch": 1.2455720323455404, + "grad_norm": 21.11234474182129, + "learning_rate": 1.867578935850543e-06, + "loss": 0.0614, + "num_input_tokens_seen": 34369312, + "step": 50985 + }, + { + "epoch": 1.2456941831773873, + "grad_norm": 0.19964002072811127, + "learning_rate": 1.867536524188607e-06, + "loss": 0.003, + "num_input_tokens_seen": 34372576, + "step": 50990 + }, + { + "epoch": 1.2458163340092345, + "grad_norm": 0.3555033206939697, + "learning_rate": 1.8674941062177117e-06, + "loss": 0.1066, + "num_input_tokens_seen": 34376096, + "step": 50995 + }, + { + "epoch": 1.2459384848410817, + "grad_norm": 0.11165308207273483, + "learning_rate": 1.8674516819381657e-06, + "loss": 0.0379, + "num_input_tokens_seen": 34379616, + "step": 51000 + }, + { + "epoch": 1.246060635672929, + "grad_norm": 1.0125317573547363, + "learning_rate": 1.867409251350278e-06, + "loss": 0.0926, + "num_input_tokens_seen": 34382816, + "step": 51005 + }, + { + "epoch": 1.246182786504776, + "grad_norm": 36.29445266723633, + "learning_rate": 1.8673668144543567e-06, + "loss": 0.116, + "num_input_tokens_seen": 34386208, + "step": 51010 + }, + { + "epoch": 1.2463049373366233, + "grad_norm": 9.032966613769531, + "learning_rate": 1.867324371250711e-06, + "loss": 0.1146, + "num_input_tokens_seen": 34389344, + "step": 51015 + }, + { + "epoch": 1.2464270881684705, + "grad_norm": 3.523752450942993, + "learning_rate": 1.8672819217396491e-06, + "loss": 0.0864, + "num_input_tokens_seen": 34392800, + "step": 51020 + }, + { + "epoch": 1.2465492390003177, + "grad_norm": 0.07758628576993942, + "learning_rate": 1.8672394659214797e-06, + "loss": 0.056, + "num_input_tokens_seen": 34396064, + "step": 51025 + }, + { + "epoch": 1.2466713898321649, + "grad_norm": 17.795822143554688, + "learning_rate": 1.8671970037965116e-06, + "loss": 0.1655, + "num_input_tokens_seen": 34398880, + "step": 51030 + }, + { + "epoch": 1.246793540664012, + "grad_norm": 133.96359252929688, + "learning_rate": 1.8671545353650537e-06, + "loss": 0.1223, + "num_input_tokens_seen": 34402400, + "step": 51035 + }, + { + "epoch": 1.246915691495859, + "grad_norm": 0.14668966829776764, + "learning_rate": 1.8671120606274149e-06, + "loss": 0.0025, + "num_input_tokens_seen": 34405984, + "step": 51040 + }, + { + "epoch": 1.2470378423277062, + "grad_norm": 30.48691749572754, + "learning_rate": 1.8670695795839038e-06, + "loss": 0.0859, + "num_input_tokens_seen": 34409440, + "step": 51045 + }, + { + "epoch": 1.2471599931595534, + "grad_norm": 0.7731919288635254, + "learning_rate": 1.8670270922348296e-06, + "loss": 0.0481, + "num_input_tokens_seen": 34412576, + "step": 51050 + }, + { + "epoch": 1.2472821439914006, + "grad_norm": 0.43882834911346436, + "learning_rate": 1.866984598580501e-06, + "loss": 0.1475, + "num_input_tokens_seen": 34415776, + "step": 51055 + }, + { + "epoch": 1.2474042948232478, + "grad_norm": 0.40969663858413696, + "learning_rate": 1.8669420986212274e-06, + "loss": 0.0036, + "num_input_tokens_seen": 34419296, + "step": 51060 + }, + { + "epoch": 1.247526445655095, + "grad_norm": 3.498124361038208, + "learning_rate": 1.866899592357318e-06, + "loss": 0.1043, + "num_input_tokens_seen": 34422432, + "step": 51065 + }, + { + "epoch": 1.2476485964869422, + "grad_norm": 62.7524299621582, + "learning_rate": 1.866857079789081e-06, + "loss": 0.0421, + "num_input_tokens_seen": 34425568, + "step": 51070 + }, + { + "epoch": 1.2477707473187891, + "grad_norm": 38.86537170410156, + "learning_rate": 1.8668145609168265e-06, + "loss": 0.1972, + "num_input_tokens_seen": 34428960, + "step": 51075 + }, + { + "epoch": 1.2478928981506363, + "grad_norm": 42.30865478515625, + "learning_rate": 1.8667720357408632e-06, + "loss": 0.0566, + "num_input_tokens_seen": 34432352, + "step": 51080 + }, + { + "epoch": 1.2480150489824835, + "grad_norm": 18.48854637145996, + "learning_rate": 1.8667295042615006e-06, + "loss": 0.0437, + "num_input_tokens_seen": 34435680, + "step": 51085 + }, + { + "epoch": 1.2481371998143307, + "grad_norm": 0.07613479346036911, + "learning_rate": 1.866686966479048e-06, + "loss": 0.0038, + "num_input_tokens_seen": 34439904, + "step": 51090 + }, + { + "epoch": 1.2482593506461779, + "grad_norm": 199.28736877441406, + "learning_rate": 1.8666444223938145e-06, + "loss": 0.2072, + "num_input_tokens_seen": 34443104, + "step": 51095 + }, + { + "epoch": 1.248381501478025, + "grad_norm": 3.169609546661377, + "learning_rate": 1.8666018720061097e-06, + "loss": 0.2199, + "num_input_tokens_seen": 34446240, + "step": 51100 + }, + { + "epoch": 1.2485036523098723, + "grad_norm": 0.19156071543693542, + "learning_rate": 1.8665593153162429e-06, + "loss": 0.0821, + "num_input_tokens_seen": 34449888, + "step": 51105 + }, + { + "epoch": 1.2486258031417194, + "grad_norm": 0.11386224627494812, + "learning_rate": 1.8665167523245238e-06, + "loss": 0.0439, + "num_input_tokens_seen": 34453536, + "step": 51110 + }, + { + "epoch": 1.2487479539735666, + "grad_norm": 0.17927947640419006, + "learning_rate": 1.8664741830312618e-06, + "loss": 0.0081, + "num_input_tokens_seen": 34456928, + "step": 51115 + }, + { + "epoch": 1.2488701048054138, + "grad_norm": 0.2570532262325287, + "learning_rate": 1.8664316074367666e-06, + "loss": 0.0491, + "num_input_tokens_seen": 34459936, + "step": 51120 + }, + { + "epoch": 1.2489922556372608, + "grad_norm": 7.341938018798828, + "learning_rate": 1.8663890255413474e-06, + "loss": 0.0583, + "num_input_tokens_seen": 34463328, + "step": 51125 + }, + { + "epoch": 1.249114406469108, + "grad_norm": 0.4755401313304901, + "learning_rate": 1.8663464373453146e-06, + "loss": 0.1613, + "num_input_tokens_seen": 34466528, + "step": 51130 + }, + { + "epoch": 1.2492365573009552, + "grad_norm": 49.70253372192383, + "learning_rate": 1.8663038428489775e-06, + "loss": 0.1676, + "num_input_tokens_seen": 34470048, + "step": 51135 + }, + { + "epoch": 1.2493587081328024, + "grad_norm": 10.13922119140625, + "learning_rate": 1.8662612420526455e-06, + "loss": 0.0033, + "num_input_tokens_seen": 34473632, + "step": 51140 + }, + { + "epoch": 1.2494808589646496, + "grad_norm": 1.5358637571334839, + "learning_rate": 1.866218634956629e-06, + "loss": 0.0473, + "num_input_tokens_seen": 34476640, + "step": 51145 + }, + { + "epoch": 1.2496030097964967, + "grad_norm": 0.0326588898897171, + "learning_rate": 1.8661760215612374e-06, + "loss": 0.0546, + "num_input_tokens_seen": 34479904, + "step": 51150 + }, + { + "epoch": 1.249725160628344, + "grad_norm": 0.05521900951862335, + "learning_rate": 1.8661334018667806e-06, + "loss": 0.0025, + "num_input_tokens_seen": 34483360, + "step": 51155 + }, + { + "epoch": 1.2498473114601911, + "grad_norm": 188.50030517578125, + "learning_rate": 1.8660907758735693e-06, + "loss": 0.1366, + "num_input_tokens_seen": 34486240, + "step": 51160 + }, + { + "epoch": 1.249969462292038, + "grad_norm": 18.786170959472656, + "learning_rate": 1.8660481435819127e-06, + "loss": 0.1577, + "num_input_tokens_seen": 34489248, + "step": 51165 + }, + { + "epoch": 1.2500916131238853, + "grad_norm": 89.58645629882812, + "learning_rate": 1.8660055049921209e-06, + "loss": 0.1395, + "num_input_tokens_seen": 34492320, + "step": 51170 + }, + { + "epoch": 1.2500916131238853, + "eval_loss": 0.1528467833995819, + "eval_runtime": 47.3828, + "eval_samples_per_second": 767.894, + "eval_steps_per_second": 96.005, + "num_input_tokens_seen": 34492320, + "step": 51170 + }, + { + "epoch": 1.2502137639557325, + "grad_norm": 0.44812142848968506, + "learning_rate": 1.8659628601045043e-06, + "loss": 0.0012, + "num_input_tokens_seen": 34495200, + "step": 51175 + }, + { + "epoch": 1.2503359147875797, + "grad_norm": 0.7981209754943848, + "learning_rate": 1.8659202089193728e-06, + "loss": 0.1282, + "num_input_tokens_seen": 34498272, + "step": 51180 + }, + { + "epoch": 1.2504580656194269, + "grad_norm": 0.22657358646392822, + "learning_rate": 1.8658775514370366e-06, + "loss": 0.0379, + "num_input_tokens_seen": 34501344, + "step": 51185 + }, + { + "epoch": 1.250580216451274, + "grad_norm": 44.42075729370117, + "learning_rate": 1.865834887657806e-06, + "loss": 0.0774, + "num_input_tokens_seen": 34504608, + "step": 51190 + }, + { + "epoch": 1.2507023672831212, + "grad_norm": 31.185182571411133, + "learning_rate": 1.8657922175819913e-06, + "loss": 0.0997, + "num_input_tokens_seen": 34508256, + "step": 51195 + }, + { + "epoch": 1.2508245181149684, + "grad_norm": 0.07159898430109024, + "learning_rate": 1.8657495412099026e-06, + "loss": 0.0893, + "num_input_tokens_seen": 34511968, + "step": 51200 + }, + { + "epoch": 1.2509466689468156, + "grad_norm": 0.10588917136192322, + "learning_rate": 1.8657068585418502e-06, + "loss": 0.1052, + "num_input_tokens_seen": 34515232, + "step": 51205 + }, + { + "epoch": 1.2510688197786628, + "grad_norm": 81.37196350097656, + "learning_rate": 1.865664169578145e-06, + "loss": 0.1418, + "num_input_tokens_seen": 34518240, + "step": 51210 + }, + { + "epoch": 1.25119097061051, + "grad_norm": 0.0456620417535305, + "learning_rate": 1.8656214743190972e-06, + "loss": 0.0726, + "num_input_tokens_seen": 34521696, + "step": 51215 + }, + { + "epoch": 1.251313121442357, + "grad_norm": 74.22692108154297, + "learning_rate": 1.865578772765017e-06, + "loss": 0.2161, + "num_input_tokens_seen": 34524768, + "step": 51220 + }, + { + "epoch": 1.2514352722742041, + "grad_norm": 1.6251906156539917, + "learning_rate": 1.8655360649162151e-06, + "loss": 0.0474, + "num_input_tokens_seen": 34527776, + "step": 51225 + }, + { + "epoch": 1.2515574231060513, + "grad_norm": 0.29848966002464294, + "learning_rate": 1.8654933507730025e-06, + "loss": 0.065, + "num_input_tokens_seen": 34531040, + "step": 51230 + }, + { + "epoch": 1.2516795739378985, + "grad_norm": 9.70097827911377, + "learning_rate": 1.865450630335689e-06, + "loss": 0.0676, + "num_input_tokens_seen": 34534560, + "step": 51235 + }, + { + "epoch": 1.2518017247697457, + "grad_norm": 13.933403015136719, + "learning_rate": 1.865407903604586e-06, + "loss": 0.1243, + "num_input_tokens_seen": 34537568, + "step": 51240 + }, + { + "epoch": 1.251923875601593, + "grad_norm": 42.59235382080078, + "learning_rate": 1.865365170580004e-06, + "loss": 0.1417, + "num_input_tokens_seen": 34540960, + "step": 51245 + }, + { + "epoch": 1.2520460264334399, + "grad_norm": 2.9506120681762695, + "learning_rate": 1.8653224312622534e-06, + "loss": 0.1178, + "num_input_tokens_seen": 34544416, + "step": 51250 + }, + { + "epoch": 1.252168177265287, + "grad_norm": 0.5883849859237671, + "learning_rate": 1.8652796856516458e-06, + "loss": 0.0324, + "num_input_tokens_seen": 34547232, + "step": 51255 + }, + { + "epoch": 1.2522903280971343, + "grad_norm": 0.2781030833721161, + "learning_rate": 1.8652369337484912e-06, + "loss": 0.002, + "num_input_tokens_seen": 34550240, + "step": 51260 + }, + { + "epoch": 1.2524124789289814, + "grad_norm": 0.769974946975708, + "learning_rate": 1.8651941755531012e-06, + "loss": 0.1198, + "num_input_tokens_seen": 34553248, + "step": 51265 + }, + { + "epoch": 1.2525346297608286, + "grad_norm": 0.09712295234203339, + "learning_rate": 1.8651514110657863e-06, + "loss": 0.0548, + "num_input_tokens_seen": 34556384, + "step": 51270 + }, + { + "epoch": 1.2526567805926758, + "grad_norm": 0.042555950582027435, + "learning_rate": 1.8651086402868574e-06, + "loss": 0.1616, + "num_input_tokens_seen": 34560224, + "step": 51275 + }, + { + "epoch": 1.252778931424523, + "grad_norm": 0.05184526368975639, + "learning_rate": 1.865065863216626e-06, + "loss": 0.1452, + "num_input_tokens_seen": 34563296, + "step": 51280 + }, + { + "epoch": 1.2529010822563702, + "grad_norm": 7.814172267913818, + "learning_rate": 1.865023079855403e-06, + "loss": 0.0967, + "num_input_tokens_seen": 34566752, + "step": 51285 + }, + { + "epoch": 1.2530232330882174, + "grad_norm": 1.3137484788894653, + "learning_rate": 1.8649802902034995e-06, + "loss": 0.0183, + "num_input_tokens_seen": 34570528, + "step": 51290 + }, + { + "epoch": 1.2531453839200646, + "grad_norm": 1.5455907583236694, + "learning_rate": 1.8649374942612266e-06, + "loss": 0.0374, + "num_input_tokens_seen": 34573856, + "step": 51295 + }, + { + "epoch": 1.2532675347519118, + "grad_norm": 15.697102546691895, + "learning_rate": 1.8648946920288956e-06, + "loss": 0.1031, + "num_input_tokens_seen": 34577376, + "step": 51300 + }, + { + "epoch": 1.253389685583759, + "grad_norm": 0.39699557423591614, + "learning_rate": 1.864851883506818e-06, + "loss": 0.0986, + "num_input_tokens_seen": 34580640, + "step": 51305 + }, + { + "epoch": 1.253511836415606, + "grad_norm": 74.94749450683594, + "learning_rate": 1.8648090686953046e-06, + "loss": 0.0321, + "num_input_tokens_seen": 34584160, + "step": 51310 + }, + { + "epoch": 1.2536339872474531, + "grad_norm": 0.4404575228691101, + "learning_rate": 1.8647662475946673e-06, + "loss": 0.1095, + "num_input_tokens_seen": 34587488, + "step": 51315 + }, + { + "epoch": 1.2537561380793003, + "grad_norm": 1.1401361227035522, + "learning_rate": 1.864723420205217e-06, + "loss": 0.0014, + "num_input_tokens_seen": 34591264, + "step": 51320 + }, + { + "epoch": 1.2538782889111475, + "grad_norm": 9.906758308410645, + "learning_rate": 1.8646805865272655e-06, + "loss": 0.0824, + "num_input_tokens_seen": 34594848, + "step": 51325 + }, + { + "epoch": 1.2540004397429947, + "grad_norm": 10.644179344177246, + "learning_rate": 1.864637746561124e-06, + "loss": 0.0936, + "num_input_tokens_seen": 34598304, + "step": 51330 + }, + { + "epoch": 1.2541225905748419, + "grad_norm": 7.708846092224121, + "learning_rate": 1.8645949003071047e-06, + "loss": 0.1095, + "num_input_tokens_seen": 34602144, + "step": 51335 + }, + { + "epoch": 1.2542447414066888, + "grad_norm": 0.32949671149253845, + "learning_rate": 1.8645520477655184e-06, + "loss": 0.0402, + "num_input_tokens_seen": 34605280, + "step": 51340 + }, + { + "epoch": 1.254366892238536, + "grad_norm": 10.342193603515625, + "learning_rate": 1.8645091889366774e-06, + "loss": 0.198, + "num_input_tokens_seen": 34608224, + "step": 51345 + }, + { + "epoch": 1.2544890430703832, + "grad_norm": 1.9661191701889038, + "learning_rate": 1.8644663238208927e-06, + "loss": 0.093, + "num_input_tokens_seen": 34612000, + "step": 51350 + }, + { + "epoch": 1.2546111939022304, + "grad_norm": 5.437589168548584, + "learning_rate": 1.8644234524184762e-06, + "loss": 0.0053, + "num_input_tokens_seen": 34615072, + "step": 51355 + }, + { + "epoch": 1.2547333447340776, + "grad_norm": 5.895714282989502, + "learning_rate": 1.8643805747297402e-06, + "loss": 0.0437, + "num_input_tokens_seen": 34618272, + "step": 51360 + }, + { + "epoch": 1.2548554955659248, + "grad_norm": 9.616518020629883, + "learning_rate": 1.8643376907549963e-06, + "loss": 0.2466, + "num_input_tokens_seen": 34621792, + "step": 51365 + }, + { + "epoch": 1.254977646397772, + "grad_norm": 0.07669886201620102, + "learning_rate": 1.864294800494556e-06, + "loss": 0.0016, + "num_input_tokens_seen": 34626208, + "step": 51370 + }, + { + "epoch": 1.2550997972296192, + "grad_norm": 12.242938041687012, + "learning_rate": 1.8642519039487317e-06, + "loss": 0.0824, + "num_input_tokens_seen": 34629600, + "step": 51375 + }, + { + "epoch": 1.2552219480614664, + "grad_norm": 10.630461692810059, + "learning_rate": 1.8642090011178348e-06, + "loss": 0.1086, + "num_input_tokens_seen": 34633056, + "step": 51380 + }, + { + "epoch": 1.2553440988933136, + "grad_norm": 0.5824533700942993, + "learning_rate": 1.8641660920021778e-06, + "loss": 0.0059, + "num_input_tokens_seen": 34636896, + "step": 51385 + }, + { + "epoch": 1.2554662497251607, + "grad_norm": 19.69352149963379, + "learning_rate": 1.8641231766020724e-06, + "loss": 0.1896, + "num_input_tokens_seen": 34640160, + "step": 51390 + }, + { + "epoch": 1.2555884005570077, + "grad_norm": 4.431107521057129, + "learning_rate": 1.864080254917831e-06, + "loss": 0.1344, + "num_input_tokens_seen": 34643488, + "step": 51395 + }, + { + "epoch": 1.255710551388855, + "grad_norm": 8.024478912353516, + "learning_rate": 1.8640373269497653e-06, + "loss": 0.0929, + "num_input_tokens_seen": 34647072, + "step": 51400 + }, + { + "epoch": 1.255832702220702, + "grad_norm": 16.997468948364258, + "learning_rate": 1.8639943926981881e-06, + "loss": 0.105, + "num_input_tokens_seen": 34650656, + "step": 51405 + }, + { + "epoch": 1.2559548530525493, + "grad_norm": 7.817405700683594, + "learning_rate": 1.863951452163411e-06, + "loss": 0.0657, + "num_input_tokens_seen": 34653792, + "step": 51410 + }, + { + "epoch": 1.2560770038843965, + "grad_norm": 0.0734686404466629, + "learning_rate": 1.8639085053457469e-06, + "loss": 0.0307, + "num_input_tokens_seen": 34656864, + "step": 51415 + }, + { + "epoch": 1.2561991547162437, + "grad_norm": 0.5896303057670593, + "learning_rate": 1.8638655522455072e-06, + "loss": 0.0582, + "num_input_tokens_seen": 34660256, + "step": 51420 + }, + { + "epoch": 1.2563213055480908, + "grad_norm": 1.4299993515014648, + "learning_rate": 1.8638225928630053e-06, + "loss": 0.1335, + "num_input_tokens_seen": 34663328, + "step": 51425 + }, + { + "epoch": 1.2564434563799378, + "grad_norm": 0.17183057963848114, + "learning_rate": 1.8637796271985532e-06, + "loss": 0.0228, + "num_input_tokens_seen": 34666720, + "step": 51430 + }, + { + "epoch": 1.256565607211785, + "grad_norm": 44.34275817871094, + "learning_rate": 1.8637366552524632e-06, + "loss": 0.1212, + "num_input_tokens_seen": 34670176, + "step": 51435 + }, + { + "epoch": 1.2566877580436322, + "grad_norm": 0.44418561458587646, + "learning_rate": 1.863693677025048e-06, + "loss": 0.046, + "num_input_tokens_seen": 34673312, + "step": 51440 + }, + { + "epoch": 1.2568099088754794, + "grad_norm": 10.671781539916992, + "learning_rate": 1.86365069251662e-06, + "loss": 0.1023, + "num_input_tokens_seen": 34676448, + "step": 51445 + }, + { + "epoch": 1.2569320597073266, + "grad_norm": 0.2875211238861084, + "learning_rate": 1.8636077017274917e-06, + "loss": 0.0908, + "num_input_tokens_seen": 34679456, + "step": 51450 + }, + { + "epoch": 1.2570542105391738, + "grad_norm": 5.269903659820557, + "learning_rate": 1.8635647046579762e-06, + "loss": 0.0017, + "num_input_tokens_seen": 34682656, + "step": 51455 + }, + { + "epoch": 1.257176361371021, + "grad_norm": 0.19323447346687317, + "learning_rate": 1.863521701308386e-06, + "loss": 0.1742, + "num_input_tokens_seen": 34686240, + "step": 51460 + }, + { + "epoch": 1.2572985122028681, + "grad_norm": 0.09027817845344543, + "learning_rate": 1.8634786916790332e-06, + "loss": 0.1492, + "num_input_tokens_seen": 34689568, + "step": 51465 + }, + { + "epoch": 1.2574206630347153, + "grad_norm": 0.43901634216308594, + "learning_rate": 1.8634356757702316e-06, + "loss": 0.0579, + "num_input_tokens_seen": 34692640, + "step": 51470 + }, + { + "epoch": 1.2575428138665625, + "grad_norm": 0.5162341594696045, + "learning_rate": 1.8633926535822932e-06, + "loss": 0.0578, + "num_input_tokens_seen": 34695840, + "step": 51475 + }, + { + "epoch": 1.2576649646984097, + "grad_norm": 47.497135162353516, + "learning_rate": 1.8633496251155314e-06, + "loss": 0.0701, + "num_input_tokens_seen": 34701216, + "step": 51480 + }, + { + "epoch": 1.2577871155302567, + "grad_norm": 0.3074395954608917, + "learning_rate": 1.8633065903702588e-06, + "loss": 0.1401, + "num_input_tokens_seen": 34704480, + "step": 51485 + }, + { + "epoch": 1.2579092663621039, + "grad_norm": 0.1671035885810852, + "learning_rate": 1.8632635493467887e-06, + "loss": 0.0023, + "num_input_tokens_seen": 34708000, + "step": 51490 + }, + { + "epoch": 1.258031417193951, + "grad_norm": 0.5071985125541687, + "learning_rate": 1.8632205020454336e-06, + "loss": 0.0715, + "num_input_tokens_seen": 34711136, + "step": 51495 + }, + { + "epoch": 1.2581535680257983, + "grad_norm": 25.011734008789062, + "learning_rate": 1.8631774484665067e-06, + "loss": 0.0838, + "num_input_tokens_seen": 34714592, + "step": 51500 + }, + { + "epoch": 1.2582757188576454, + "grad_norm": 0.7971920967102051, + "learning_rate": 1.8631343886103218e-06, + "loss": 0.1563, + "num_input_tokens_seen": 34717984, + "step": 51505 + }, + { + "epoch": 1.2583978696894926, + "grad_norm": 19.045324325561523, + "learning_rate": 1.863091322477191e-06, + "loss": 0.1384, + "num_input_tokens_seen": 34721184, + "step": 51510 + }, + { + "epoch": 1.2585200205213398, + "grad_norm": 0.11327563226222992, + "learning_rate": 1.863048250067428e-06, + "loss": 0.1524, + "num_input_tokens_seen": 34724192, + "step": 51515 + }, + { + "epoch": 1.2586421713531868, + "grad_norm": 0.3514478802680969, + "learning_rate": 1.863005171381346e-06, + "loss": 0.0591, + "num_input_tokens_seen": 34727392, + "step": 51520 + }, + { + "epoch": 1.258764322185034, + "grad_norm": 14.835415840148926, + "learning_rate": 1.8629620864192588e-06, + "loss": 0.1017, + "num_input_tokens_seen": 34731808, + "step": 51525 + }, + { + "epoch": 1.2588864730168812, + "grad_norm": 5.161327838897705, + "learning_rate": 1.8629189951814785e-06, + "loss": 0.0119, + "num_input_tokens_seen": 34735072, + "step": 51530 + }, + { + "epoch": 1.2590086238487284, + "grad_norm": 2.658921003341675, + "learning_rate": 1.8628758976683195e-06, + "loss": 0.046, + "num_input_tokens_seen": 34738784, + "step": 51535 + }, + { + "epoch": 1.2591307746805755, + "grad_norm": 0.17709505558013916, + "learning_rate": 1.862832793880095e-06, + "loss": 0.0241, + "num_input_tokens_seen": 34742176, + "step": 51540 + }, + { + "epoch": 1.2592529255124227, + "grad_norm": 0.27550020813941956, + "learning_rate": 1.8627896838171182e-06, + "loss": 0.1454, + "num_input_tokens_seen": 34745696, + "step": 51545 + }, + { + "epoch": 1.25937507634427, + "grad_norm": 0.6853047013282776, + "learning_rate": 1.8627465674797027e-06, + "loss": 0.0427, + "num_input_tokens_seen": 34748704, + "step": 51550 + }, + { + "epoch": 1.2594972271761171, + "grad_norm": 0.29485732316970825, + "learning_rate": 1.862703444868162e-06, + "loss": 0.0989, + "num_input_tokens_seen": 34752352, + "step": 51555 + }, + { + "epoch": 1.2596193780079643, + "grad_norm": 2.1250858306884766, + "learning_rate": 1.8626603159828101e-06, + "loss": 0.0028, + "num_input_tokens_seen": 34755808, + "step": 51560 + }, + { + "epoch": 1.2597415288398115, + "grad_norm": 0.1356990784406662, + "learning_rate": 1.86261718082396e-06, + "loss": 0.1658, + "num_input_tokens_seen": 34759520, + "step": 51565 + }, + { + "epoch": 1.2598636796716587, + "grad_norm": 38.832767486572266, + "learning_rate": 1.862574039391926e-06, + "loss": 0.003, + "num_input_tokens_seen": 34762976, + "step": 51570 + }, + { + "epoch": 1.2599858305035057, + "grad_norm": 0.2098708599805832, + "learning_rate": 1.8625308916870215e-06, + "loss": 0.0021, + "num_input_tokens_seen": 34766176, + "step": 51575 + }, + { + "epoch": 1.2601079813353528, + "grad_norm": 34.0190544128418, + "learning_rate": 1.8624877377095604e-06, + "loss": 0.157, + "num_input_tokens_seen": 34769440, + "step": 51580 + }, + { + "epoch": 1.2602301321672, + "grad_norm": 103.00405883789062, + "learning_rate": 1.8624445774598565e-06, + "loss": 0.0354, + "num_input_tokens_seen": 34772576, + "step": 51585 + }, + { + "epoch": 1.2603522829990472, + "grad_norm": 196.07667541503906, + "learning_rate": 1.8624014109382236e-06, + "loss": 0.0539, + "num_input_tokens_seen": 34776352, + "step": 51590 + }, + { + "epoch": 1.2604744338308944, + "grad_norm": 3.750955820083618, + "learning_rate": 1.8623582381449757e-06, + "loss": 0.1369, + "num_input_tokens_seen": 34779616, + "step": 51595 + }, + { + "epoch": 1.2605965846627416, + "grad_norm": 123.82447814941406, + "learning_rate": 1.8623150590804269e-06, + "loss": 0.1751, + "num_input_tokens_seen": 34783136, + "step": 51600 + }, + { + "epoch": 1.2607187354945888, + "grad_norm": 0.05092499777674675, + "learning_rate": 1.8622718737448908e-06, + "loss": 0.0823, + "num_input_tokens_seen": 34786400, + "step": 51605 + }, + { + "epoch": 1.2608408863264358, + "grad_norm": 45.56595993041992, + "learning_rate": 1.862228682138682e-06, + "loss": 0.0585, + "num_input_tokens_seen": 34789408, + "step": 51610 + }, + { + "epoch": 1.260963037158283, + "grad_norm": 0.2559904456138611, + "learning_rate": 1.8621854842621141e-06, + "loss": 0.0389, + "num_input_tokens_seen": 34792864, + "step": 51615 + }, + { + "epoch": 1.2610851879901301, + "grad_norm": 12.372807502746582, + "learning_rate": 1.8621422801155014e-06, + "loss": 0.1365, + "num_input_tokens_seen": 34796000, + "step": 51620 + }, + { + "epoch": 1.2612073388219773, + "grad_norm": 0.3244743049144745, + "learning_rate": 1.8620990696991586e-06, + "loss": 0.1355, + "num_input_tokens_seen": 34799264, + "step": 51625 + }, + { + "epoch": 1.2613294896538245, + "grad_norm": 0.3361222445964813, + "learning_rate": 1.862055853013399e-06, + "loss": 0.1614, + "num_input_tokens_seen": 34802528, + "step": 51630 + }, + { + "epoch": 1.2614516404856717, + "grad_norm": 41.69643020629883, + "learning_rate": 1.8620126300585372e-06, + "loss": 0.1865, + "num_input_tokens_seen": 34805984, + "step": 51635 + }, + { + "epoch": 1.261573791317519, + "grad_norm": 22.042510986328125, + "learning_rate": 1.861969400834888e-06, + "loss": 0.0727, + "num_input_tokens_seen": 34809568, + "step": 51640 + }, + { + "epoch": 1.261695942149366, + "grad_norm": 0.43014460802078247, + "learning_rate": 1.8619261653427655e-06, + "loss": 0.0692, + "num_input_tokens_seen": 34813216, + "step": 51645 + }, + { + "epoch": 1.2618180929812133, + "grad_norm": 1.0715209245681763, + "learning_rate": 1.8618829235824841e-06, + "loss": 0.0024, + "num_input_tokens_seen": 34816480, + "step": 51650 + }, + { + "epoch": 1.2619402438130605, + "grad_norm": 0.30852288007736206, + "learning_rate": 1.8618396755543584e-06, + "loss": 0.0105, + "num_input_tokens_seen": 34819872, + "step": 51655 + }, + { + "epoch": 1.2620623946449077, + "grad_norm": 160.23773193359375, + "learning_rate": 1.8617964212587027e-06, + "loss": 0.0565, + "num_input_tokens_seen": 34823200, + "step": 51660 + }, + { + "epoch": 1.2621845454767546, + "grad_norm": 0.37034016847610474, + "learning_rate": 1.8617531606958315e-06, + "loss": 0.0384, + "num_input_tokens_seen": 34826528, + "step": 51665 + }, + { + "epoch": 1.2623066963086018, + "grad_norm": 0.008391081355512142, + "learning_rate": 1.8617098938660595e-06, + "loss": 0.0596, + "num_input_tokens_seen": 34829728, + "step": 51670 + }, + { + "epoch": 1.262428847140449, + "grad_norm": 13.188200950622559, + "learning_rate": 1.8616666207697015e-06, + "loss": 0.0799, + "num_input_tokens_seen": 34833184, + "step": 51675 + }, + { + "epoch": 1.2625509979722962, + "grad_norm": 3.760244846343994, + "learning_rate": 1.8616233414070721e-06, + "loss": 0.0877, + "num_input_tokens_seen": 34836576, + "step": 51680 + }, + { + "epoch": 1.2626731488041434, + "grad_norm": 0.5174466967582703, + "learning_rate": 1.861580055778486e-06, + "loss": 0.098, + "num_input_tokens_seen": 34840352, + "step": 51685 + }, + { + "epoch": 1.2627952996359906, + "grad_norm": 0.07368353754281998, + "learning_rate": 1.861536763884258e-06, + "loss": 0.0007, + "num_input_tokens_seen": 34843680, + "step": 51690 + }, + { + "epoch": 1.2629174504678378, + "grad_norm": 12.683340072631836, + "learning_rate": 1.8614934657247028e-06, + "loss": 0.1131, + "num_input_tokens_seen": 34847008, + "step": 51695 + }, + { + "epoch": 1.2630396012996847, + "grad_norm": 0.0876552015542984, + "learning_rate": 1.8614501613001354e-06, + "loss": 0.0387, + "num_input_tokens_seen": 34850720, + "step": 51700 + }, + { + "epoch": 1.263161752131532, + "grad_norm": 287.899169921875, + "learning_rate": 1.8614068506108708e-06, + "loss": 0.162, + "num_input_tokens_seen": 34854432, + "step": 51705 + }, + { + "epoch": 1.2632839029633791, + "grad_norm": 51.08266830444336, + "learning_rate": 1.861363533657224e-06, + "loss": 0.0343, + "num_input_tokens_seen": 34857632, + "step": 51710 + }, + { + "epoch": 1.2634060537952263, + "grad_norm": 63.332008361816406, + "learning_rate": 1.8613202104395098e-06, + "loss": 0.0383, + "num_input_tokens_seen": 34861024, + "step": 51715 + }, + { + "epoch": 1.2635282046270735, + "grad_norm": 1.2061738967895508, + "learning_rate": 1.8612768809580435e-06, + "loss": 0.0816, + "num_input_tokens_seen": 34864160, + "step": 51720 + }, + { + "epoch": 1.2636503554589207, + "grad_norm": 186.6913604736328, + "learning_rate": 1.8612335452131398e-06, + "loss": 0.1188, + "num_input_tokens_seen": 34867424, + "step": 51725 + }, + { + "epoch": 1.2637725062907679, + "grad_norm": 254.61875915527344, + "learning_rate": 1.8611902032051141e-06, + "loss": 0.1256, + "num_input_tokens_seen": 34870880, + "step": 51730 + }, + { + "epoch": 1.263894657122615, + "grad_norm": 2.1379923820495605, + "learning_rate": 1.861146854934282e-06, + "loss": 0.0437, + "num_input_tokens_seen": 34874464, + "step": 51735 + }, + { + "epoch": 1.2640168079544623, + "grad_norm": 0.7487949132919312, + "learning_rate": 1.861103500400958e-06, + "loss": 0.0037, + "num_input_tokens_seen": 34877664, + "step": 51740 + }, + { + "epoch": 1.2641389587863094, + "grad_norm": 0.02050768956542015, + "learning_rate": 1.8610601396054579e-06, + "loss": 0.1053, + "num_input_tokens_seen": 34881312, + "step": 51745 + }, + { + "epoch": 1.2642611096181566, + "grad_norm": 0.02770630270242691, + "learning_rate": 1.8610167725480967e-06, + "loss": 0.0473, + "num_input_tokens_seen": 34884384, + "step": 51750 + }, + { + "epoch": 1.2643832604500036, + "grad_norm": 1.7987079620361328, + "learning_rate": 1.86097339922919e-06, + "loss": 0.0283, + "num_input_tokens_seen": 34887712, + "step": 51755 + }, + { + "epoch": 1.2645054112818508, + "grad_norm": 1.5875380039215088, + "learning_rate": 1.8609300196490532e-06, + "loss": 0.0492, + "num_input_tokens_seen": 34891168, + "step": 51760 + }, + { + "epoch": 1.264627562113698, + "grad_norm": 2.108224868774414, + "learning_rate": 1.8608866338080018e-06, + "loss": 0.1076, + "num_input_tokens_seen": 34894688, + "step": 51765 + }, + { + "epoch": 1.2647497129455452, + "grad_norm": 0.34903234243392944, + "learning_rate": 1.8608432417063512e-06, + "loss": 0.1494, + "num_input_tokens_seen": 34897760, + "step": 51770 + }, + { + "epoch": 1.2648718637773924, + "grad_norm": 0.11632052809000015, + "learning_rate": 1.860799843344417e-06, + "loss": 0.0673, + "num_input_tokens_seen": 34901216, + "step": 51775 + }, + { + "epoch": 1.2649940146092395, + "grad_norm": 0.1959635615348816, + "learning_rate": 1.860756438722515e-06, + "loss": 0.0012, + "num_input_tokens_seen": 34904544, + "step": 51780 + }, + { + "epoch": 1.2651161654410865, + "grad_norm": 148.51904296875, + "learning_rate": 1.8607130278409603e-06, + "loss": 0.0723, + "num_input_tokens_seen": 34907552, + "step": 51785 + }, + { + "epoch": 1.2652383162729337, + "grad_norm": 0.23096473515033722, + "learning_rate": 1.8606696107000692e-06, + "loss": 0.1587, + "num_input_tokens_seen": 34911200, + "step": 51790 + }, + { + "epoch": 1.265360467104781, + "grad_norm": 0.1959235966205597, + "learning_rate": 1.860626187300157e-06, + "loss": 0.0537, + "num_input_tokens_seen": 34915232, + "step": 51795 + }, + { + "epoch": 1.265482617936628, + "grad_norm": 0.17887307703495026, + "learning_rate": 1.86058275764154e-06, + "loss": 0.1449, + "num_input_tokens_seen": 34918304, + "step": 51800 + }, + { + "epoch": 1.2656047687684753, + "grad_norm": 0.13273105025291443, + "learning_rate": 1.8605393217245336e-06, + "loss": 0.1132, + "num_input_tokens_seen": 34922144, + "step": 51805 + }, + { + "epoch": 1.2657269196003225, + "grad_norm": 0.9345943927764893, + "learning_rate": 1.8604958795494535e-06, + "loss": 0.0423, + "num_input_tokens_seen": 34925152, + "step": 51810 + }, + { + "epoch": 1.2658490704321697, + "grad_norm": 14.5985689163208, + "learning_rate": 1.8604524311166163e-06, + "loss": 0.1276, + "num_input_tokens_seen": 34928608, + "step": 51815 + }, + { + "epoch": 1.2659712212640168, + "grad_norm": 0.25379669666290283, + "learning_rate": 1.8604089764263375e-06, + "loss": 0.0426, + "num_input_tokens_seen": 34931744, + "step": 51820 + }, + { + "epoch": 1.266093372095864, + "grad_norm": 9.009862899780273, + "learning_rate": 1.8603655154789331e-06, + "loss": 0.0938, + "num_input_tokens_seen": 34935392, + "step": 51825 + }, + { + "epoch": 1.2662155229277112, + "grad_norm": 191.35870361328125, + "learning_rate": 1.8603220482747192e-06, + "loss": 0.2035, + "num_input_tokens_seen": 34938400, + "step": 51830 + }, + { + "epoch": 1.2663376737595584, + "grad_norm": 0.27894648909568787, + "learning_rate": 1.8602785748140122e-06, + "loss": 0.227, + "num_input_tokens_seen": 34942240, + "step": 51835 + }, + { + "epoch": 1.2664598245914056, + "grad_norm": 211.6515350341797, + "learning_rate": 1.8602350950971277e-06, + "loss": 0.0861, + "num_input_tokens_seen": 34945504, + "step": 51840 + }, + { + "epoch": 1.2665819754232526, + "grad_norm": 0.154354065656662, + "learning_rate": 1.8601916091243825e-06, + "loss": 0.1287, + "num_input_tokens_seen": 34948768, + "step": 51845 + }, + { + "epoch": 1.2667041262550998, + "grad_norm": 1.5715100765228271, + "learning_rate": 1.8601481168960925e-06, + "loss": 0.0453, + "num_input_tokens_seen": 34952224, + "step": 51850 + }, + { + "epoch": 1.266826277086947, + "grad_norm": 11.307893753051758, + "learning_rate": 1.860104618412574e-06, + "loss": 0.0872, + "num_input_tokens_seen": 34956128, + "step": 51855 + }, + { + "epoch": 1.2669484279187941, + "grad_norm": 0.08968228101730347, + "learning_rate": 1.8600611136741432e-06, + "loss": 0.0027, + "num_input_tokens_seen": 34959840, + "step": 51860 + }, + { + "epoch": 1.2670705787506413, + "grad_norm": 61.685367584228516, + "learning_rate": 1.8600176026811169e-06, + "loss": 0.1355, + "num_input_tokens_seen": 34962912, + "step": 51865 + }, + { + "epoch": 1.2671927295824885, + "grad_norm": 0.35035207867622375, + "learning_rate": 1.8599740854338112e-06, + "loss": 0.1379, + "num_input_tokens_seen": 34966560, + "step": 51870 + }, + { + "epoch": 1.2673148804143355, + "grad_norm": 0.2698616683483124, + "learning_rate": 1.8599305619325428e-06, + "loss": 0.009, + "num_input_tokens_seen": 34969952, + "step": 51875 + }, + { + "epoch": 1.2674370312461827, + "grad_norm": 9.4758882522583, + "learning_rate": 1.8598870321776278e-06, + "loss": 0.1323, + "num_input_tokens_seen": 34973344, + "step": 51880 + }, + { + "epoch": 1.2675591820780299, + "grad_norm": 18.896320343017578, + "learning_rate": 1.8598434961693833e-06, + "loss": 0.0956, + "num_input_tokens_seen": 34976800, + "step": 51885 + }, + { + "epoch": 1.267681332909877, + "grad_norm": 0.1476629227399826, + "learning_rate": 1.8597999539081255e-06, + "loss": 0.0017, + "num_input_tokens_seen": 34980384, + "step": 51890 + }, + { + "epoch": 1.2678034837417242, + "grad_norm": 8.873211860656738, + "learning_rate": 1.859756405394171e-06, + "loss": 0.0439, + "num_input_tokens_seen": 34983904, + "step": 51895 + }, + { + "epoch": 1.2679256345735714, + "grad_norm": 0.32919931411743164, + "learning_rate": 1.8597128506278365e-06, + "loss": 0.1104, + "num_input_tokens_seen": 34987232, + "step": 51900 + }, + { + "epoch": 1.2680477854054186, + "grad_norm": 9.547355651855469, + "learning_rate": 1.8596692896094394e-06, + "loss": 0.2373, + "num_input_tokens_seen": 34990624, + "step": 51905 + }, + { + "epoch": 1.2681699362372658, + "grad_norm": 0.020996596664190292, + "learning_rate": 1.8596257223392959e-06, + "loss": 0.0839, + "num_input_tokens_seen": 34993568, + "step": 51910 + }, + { + "epoch": 1.268292087069113, + "grad_norm": 0.9302226305007935, + "learning_rate": 1.8595821488177228e-06, + "loss": 0.0244, + "num_input_tokens_seen": 34996960, + "step": 51915 + }, + { + "epoch": 1.2684142379009602, + "grad_norm": 2.4010121822357178, + "learning_rate": 1.8595385690450374e-06, + "loss": 0.1089, + "num_input_tokens_seen": 34999840, + "step": 51920 + }, + { + "epoch": 1.2685363887328074, + "grad_norm": 0.9553894400596619, + "learning_rate": 1.8594949830215558e-06, + "loss": 0.0253, + "num_input_tokens_seen": 35003296, + "step": 51925 + }, + { + "epoch": 1.2686585395646544, + "grad_norm": 0.4104401767253876, + "learning_rate": 1.859451390747596e-06, + "loss": 0.0893, + "num_input_tokens_seen": 35006496, + "step": 51930 + }, + { + "epoch": 1.2687806903965015, + "grad_norm": 25.074024200439453, + "learning_rate": 1.8594077922234742e-06, + "loss": 0.1692, + "num_input_tokens_seen": 35009568, + "step": 51935 + }, + { + "epoch": 1.2689028412283487, + "grad_norm": 0.19158746302127838, + "learning_rate": 1.859364187449508e-06, + "loss": 0.1384, + "num_input_tokens_seen": 35012832, + "step": 51940 + }, + { + "epoch": 1.269024992060196, + "grad_norm": 34.13637924194336, + "learning_rate": 1.8593205764260142e-06, + "loss": 0.1384, + "num_input_tokens_seen": 35016224, + "step": 51945 + }, + { + "epoch": 1.2691471428920431, + "grad_norm": 0.3690553903579712, + "learning_rate": 1.8592769591533099e-06, + "loss": 0.0345, + "num_input_tokens_seen": 35020128, + "step": 51950 + }, + { + "epoch": 1.2692692937238903, + "grad_norm": 7.655487060546875, + "learning_rate": 1.8592333356317128e-06, + "loss": 0.1079, + "num_input_tokens_seen": 35023392, + "step": 51955 + }, + { + "epoch": 1.2693914445557375, + "grad_norm": 15.581256866455078, + "learning_rate": 1.8591897058615396e-06, + "loss": 0.1429, + "num_input_tokens_seen": 35026656, + "step": 51960 + }, + { + "epoch": 1.2695135953875845, + "grad_norm": 0.9874152541160583, + "learning_rate": 1.8591460698431076e-06, + "loss": 0.0967, + "num_input_tokens_seen": 35029728, + "step": 51965 + }, + { + "epoch": 1.2696357462194316, + "grad_norm": 0.16612599790096283, + "learning_rate": 1.8591024275767345e-06, + "loss": 0.0196, + "num_input_tokens_seen": 35033568, + "step": 51970 + }, + { + "epoch": 1.2697578970512788, + "grad_norm": 8.764939308166504, + "learning_rate": 1.8590587790627372e-06, + "loss": 0.1069, + "num_input_tokens_seen": 35037024, + "step": 51975 + }, + { + "epoch": 1.269880047883126, + "grad_norm": 0.25551822781562805, + "learning_rate": 1.8590151243014337e-06, + "loss": 0.0019, + "num_input_tokens_seen": 35040608, + "step": 51980 + }, + { + "epoch": 1.2700021987149732, + "grad_norm": 0.7786625623703003, + "learning_rate": 1.858971463293141e-06, + "loss": 0.0299, + "num_input_tokens_seen": 35044128, + "step": 51985 + }, + { + "epoch": 1.2701243495468204, + "grad_norm": 0.11077973991632462, + "learning_rate": 1.858927796038177e-06, + "loss": 0.0489, + "num_input_tokens_seen": 35047520, + "step": 51990 + }, + { + "epoch": 1.2702465003786676, + "grad_norm": 0.24186283349990845, + "learning_rate": 1.8588841225368587e-06, + "loss": 0.1065, + "num_input_tokens_seen": 35050528, + "step": 51995 + }, + { + "epoch": 1.2703686512105148, + "grad_norm": 0.4123366177082062, + "learning_rate": 1.8588404427895044e-06, + "loss": 0.032, + "num_input_tokens_seen": 35053728, + "step": 52000 + }, + { + "epoch": 1.270490802042362, + "grad_norm": 0.3522588312625885, + "learning_rate": 1.8587967567964312e-06, + "loss": 0.161, + "num_input_tokens_seen": 35056992, + "step": 52005 + }, + { + "epoch": 1.2706129528742092, + "grad_norm": 8.013328552246094, + "learning_rate": 1.858753064557957e-06, + "loss": 0.1344, + "num_input_tokens_seen": 35060448, + "step": 52010 + }, + { + "epoch": 1.2707351037060564, + "grad_norm": 36.01093673706055, + "learning_rate": 1.8587093660743997e-06, + "loss": 0.1726, + "num_input_tokens_seen": 35064032, + "step": 52015 + }, + { + "epoch": 1.2708572545379033, + "grad_norm": 0.21894040703773499, + "learning_rate": 1.8586656613460766e-06, + "loss": 0.0458, + "num_input_tokens_seen": 35067296, + "step": 52020 + }, + { + "epoch": 1.2709794053697505, + "grad_norm": 10.701966285705566, + "learning_rate": 1.8586219503733061e-06, + "loss": 0.0534, + "num_input_tokens_seen": 35070240, + "step": 52025 + }, + { + "epoch": 1.2711015562015977, + "grad_norm": 0.2591902017593384, + "learning_rate": 1.8585782331564057e-06, + "loss": 0.0728, + "num_input_tokens_seen": 35073632, + "step": 52030 + }, + { + "epoch": 1.271223707033445, + "grad_norm": 10.046820640563965, + "learning_rate": 1.8585345096956938e-06, + "loss": 0.0394, + "num_input_tokens_seen": 35076832, + "step": 52035 + }, + { + "epoch": 1.271345857865292, + "grad_norm": 21.24547576904297, + "learning_rate": 1.8584907799914874e-06, + "loss": 0.0483, + "num_input_tokens_seen": 35080480, + "step": 52040 + }, + { + "epoch": 1.2714680086971393, + "grad_norm": 11.880767822265625, + "learning_rate": 1.858447044044106e-06, + "loss": 0.1918, + "num_input_tokens_seen": 35083744, + "step": 52045 + }, + { + "epoch": 1.2715901595289865, + "grad_norm": 0.13276037573814392, + "learning_rate": 1.858403301853866e-06, + "loss": 0.1091, + "num_input_tokens_seen": 35087264, + "step": 52050 + }, + { + "epoch": 1.2717123103608334, + "grad_norm": 0.15990546345710754, + "learning_rate": 1.8583595534210868e-06, + "loss": 0.1953, + "num_input_tokens_seen": 35090336, + "step": 52055 + }, + { + "epoch": 1.2718344611926806, + "grad_norm": 0.265207439661026, + "learning_rate": 1.8583157987460859e-06, + "loss": 0.0675, + "num_input_tokens_seen": 35093856, + "step": 52060 + }, + { + "epoch": 1.2719566120245278, + "grad_norm": 180.78955078125, + "learning_rate": 1.8582720378291817e-06, + "loss": 0.0433, + "num_input_tokens_seen": 35098208, + "step": 52065 + }, + { + "epoch": 1.272078762856375, + "grad_norm": 0.7898123264312744, + "learning_rate": 1.8582282706706922e-06, + "loss": 0.002, + "num_input_tokens_seen": 35101280, + "step": 52070 + }, + { + "epoch": 1.2722009136882222, + "grad_norm": 12.134377479553223, + "learning_rate": 1.858184497270936e-06, + "loss": 0.1375, + "num_input_tokens_seen": 35104480, + "step": 52075 + }, + { + "epoch": 1.2723230645200694, + "grad_norm": 1.3107682466506958, + "learning_rate": 1.8581407176302313e-06, + "loss": 0.0917, + "num_input_tokens_seen": 35108064, + "step": 52080 + }, + { + "epoch": 1.2724452153519166, + "grad_norm": 0.24347443878650665, + "learning_rate": 1.8580969317488964e-06, + "loss": 0.0431, + "num_input_tokens_seen": 35111072, + "step": 52085 + }, + { + "epoch": 1.2725673661837638, + "grad_norm": 2.7623085975646973, + "learning_rate": 1.8580531396272501e-06, + "loss": 0.0015, + "num_input_tokens_seen": 35114464, + "step": 52090 + }, + { + "epoch": 1.272689517015611, + "grad_norm": 0.18461468815803528, + "learning_rate": 1.8580093412656104e-06, + "loss": 0.083, + "num_input_tokens_seen": 35118112, + "step": 52095 + }, + { + "epoch": 1.2728116678474581, + "grad_norm": 11.591814041137695, + "learning_rate": 1.857965536664296e-06, + "loss": 0.0866, + "num_input_tokens_seen": 35121440, + "step": 52100 + }, + { + "epoch": 1.2729338186793053, + "grad_norm": 16.923437118530273, + "learning_rate": 1.8579217258236254e-06, + "loss": 0.1703, + "num_input_tokens_seen": 35124320, + "step": 52105 + }, + { + "epoch": 1.2730559695111523, + "grad_norm": 3.304119825363159, + "learning_rate": 1.8578779087439172e-06, + "loss": 0.0437, + "num_input_tokens_seen": 35127328, + "step": 52110 + }, + { + "epoch": 1.2731781203429995, + "grad_norm": 0.3508507013320923, + "learning_rate": 1.8578340854254902e-06, + "loss": 0.0514, + "num_input_tokens_seen": 35130528, + "step": 52115 + }, + { + "epoch": 1.2733002711748467, + "grad_norm": 57.29536437988281, + "learning_rate": 1.8577902558686631e-06, + "loss": 0.0906, + "num_input_tokens_seen": 35133920, + "step": 52120 + }, + { + "epoch": 1.2734224220066939, + "grad_norm": 0.3323357105255127, + "learning_rate": 1.8577464200737544e-06, + "loss": 0.1203, + "num_input_tokens_seen": 35137184, + "step": 52125 + }, + { + "epoch": 1.273544572838541, + "grad_norm": 1.0517792701721191, + "learning_rate": 1.857702578041083e-06, + "loss": 0.0019, + "num_input_tokens_seen": 35140704, + "step": 52130 + }, + { + "epoch": 1.2736667236703882, + "grad_norm": 0.2565861642360687, + "learning_rate": 1.8576587297709678e-06, + "loss": 0.1035, + "num_input_tokens_seen": 35143968, + "step": 52135 + }, + { + "epoch": 1.2737888745022354, + "grad_norm": 0.4675154983997345, + "learning_rate": 1.857614875263728e-06, + "loss": 0.1608, + "num_input_tokens_seen": 35147424, + "step": 52140 + }, + { + "epoch": 1.2739110253340824, + "grad_norm": 0.13272573053836823, + "learning_rate": 1.8575710145196817e-06, + "loss": 0.0427, + "num_input_tokens_seen": 35150496, + "step": 52145 + }, + { + "epoch": 1.2740331761659296, + "grad_norm": 35.89418029785156, + "learning_rate": 1.8575271475391484e-06, + "loss": 0.2302, + "num_input_tokens_seen": 35153952, + "step": 52150 + }, + { + "epoch": 1.2741553269977768, + "grad_norm": 0.04896676540374756, + "learning_rate": 1.8574832743224471e-06, + "loss": 0.0811, + "num_input_tokens_seen": 35157536, + "step": 52155 + }, + { + "epoch": 1.274277477829624, + "grad_norm": 0.17201733589172363, + "learning_rate": 1.8574393948698967e-06, + "loss": 0.0011, + "num_input_tokens_seen": 35160928, + "step": 52160 + }, + { + "epoch": 1.2743996286614712, + "grad_norm": 28.33481788635254, + "learning_rate": 1.8573955091818166e-06, + "loss": 0.1059, + "num_input_tokens_seen": 35164128, + "step": 52165 + }, + { + "epoch": 1.2745217794933184, + "grad_norm": 0.08746245503425598, + "learning_rate": 1.8573516172585256e-06, + "loss": 0.0718, + "num_input_tokens_seen": 35168224, + "step": 52170 + }, + { + "epoch": 1.2746439303251655, + "grad_norm": 67.42756652832031, + "learning_rate": 1.8573077191003433e-06, + "loss": 0.0203, + "num_input_tokens_seen": 35171616, + "step": 52175 + }, + { + "epoch": 1.2747660811570127, + "grad_norm": 0.4297308325767517, + "learning_rate": 1.857263814707588e-06, + "loss": 0.0592, + "num_input_tokens_seen": 35175008, + "step": 52180 + }, + { + "epoch": 1.27488823198886, + "grad_norm": 0.16938745975494385, + "learning_rate": 1.8572199040805803e-06, + "loss": 0.1161, + "num_input_tokens_seen": 35177952, + "step": 52185 + }, + { + "epoch": 1.2750103828207071, + "grad_norm": 131.6497039794922, + "learning_rate": 1.8571759872196386e-06, + "loss": 0.1294, + "num_input_tokens_seen": 35181728, + "step": 52190 + }, + { + "epoch": 1.2751325336525543, + "grad_norm": 35.01044464111328, + "learning_rate": 1.8571320641250829e-06, + "loss": 0.1537, + "num_input_tokens_seen": 35185376, + "step": 52195 + }, + { + "epoch": 1.2752546844844013, + "grad_norm": 0.05216865614056587, + "learning_rate": 1.857088134797232e-06, + "loss": 0.0996, + "num_input_tokens_seen": 35189024, + "step": 52200 + }, + { + "epoch": 1.2753768353162485, + "grad_norm": 7.486613750457764, + "learning_rate": 1.8570441992364057e-06, + "loss": 0.0464, + "num_input_tokens_seen": 35192928, + "step": 52205 + }, + { + "epoch": 1.2754989861480956, + "grad_norm": 12.407038688659668, + "learning_rate": 1.8570002574429236e-06, + "loss": 0.0576, + "num_input_tokens_seen": 35196064, + "step": 52210 + }, + { + "epoch": 1.2756211369799428, + "grad_norm": 0.3270169496536255, + "learning_rate": 1.8569563094171048e-06, + "loss": 0.1012, + "num_input_tokens_seen": 35199584, + "step": 52215 + }, + { + "epoch": 1.27574328781179, + "grad_norm": 18.02934455871582, + "learning_rate": 1.8569123551592693e-06, + "loss": 0.0752, + "num_input_tokens_seen": 35202720, + "step": 52220 + }, + { + "epoch": 1.2758654386436372, + "grad_norm": 3.006917715072632, + "learning_rate": 1.8568683946697368e-06, + "loss": 0.2353, + "num_input_tokens_seen": 35206112, + "step": 52225 + }, + { + "epoch": 1.2759875894754844, + "grad_norm": 170.55621337890625, + "learning_rate": 1.856824427948827e-06, + "loss": 0.1563, + "num_input_tokens_seen": 35209824, + "step": 52230 + }, + { + "epoch": 1.2761097403073314, + "grad_norm": 0.22915074229240417, + "learning_rate": 1.8567804549968593e-06, + "loss": 0.0032, + "num_input_tokens_seen": 35213600, + "step": 52235 + }, + { + "epoch": 1.2762318911391786, + "grad_norm": 0.11022822558879852, + "learning_rate": 1.8567364758141539e-06, + "loss": 0.0018, + "num_input_tokens_seen": 35216800, + "step": 52240 + }, + { + "epoch": 1.2763540419710258, + "grad_norm": 18.70414924621582, + "learning_rate": 1.85669249040103e-06, + "loss": 0.1648, + "num_input_tokens_seen": 35220320, + "step": 52245 + }, + { + "epoch": 1.276476192802873, + "grad_norm": 14.830822944641113, + "learning_rate": 1.8566484987578083e-06, + "loss": 0.1565, + "num_input_tokens_seen": 35223264, + "step": 52250 + }, + { + "epoch": 1.2765983436347201, + "grad_norm": 84.62750244140625, + "learning_rate": 1.856604500884808e-06, + "loss": 0.1069, + "num_input_tokens_seen": 35226784, + "step": 52255 + }, + { + "epoch": 1.2767204944665673, + "grad_norm": 0.302837610244751, + "learning_rate": 1.85656049678235e-06, + "loss": 0.0816, + "num_input_tokens_seen": 35230432, + "step": 52260 + }, + { + "epoch": 1.2768426452984145, + "grad_norm": 0.25845223665237427, + "learning_rate": 1.856516486450753e-06, + "loss": 0.0797, + "num_input_tokens_seen": 35233504, + "step": 52265 + }, + { + "epoch": 1.2769647961302617, + "grad_norm": 35.83456802368164, + "learning_rate": 1.8564724698903378e-06, + "loss": 0.0503, + "num_input_tokens_seen": 35236640, + "step": 52270 + }, + { + "epoch": 1.277086946962109, + "grad_norm": 2.169076919555664, + "learning_rate": 1.8564284471014247e-06, + "loss": 0.0032, + "num_input_tokens_seen": 35239840, + "step": 52275 + }, + { + "epoch": 1.277209097793956, + "grad_norm": 1.791534662246704, + "learning_rate": 1.8563844180843335e-06, + "loss": 0.0338, + "num_input_tokens_seen": 35243168, + "step": 52280 + }, + { + "epoch": 1.2773312486258033, + "grad_norm": 18.362018585205078, + "learning_rate": 1.8563403828393845e-06, + "loss": 0.1058, + "num_input_tokens_seen": 35246624, + "step": 52285 + }, + { + "epoch": 1.2774533994576502, + "grad_norm": 0.2080661505460739, + "learning_rate": 1.8562963413668977e-06, + "loss": 0.001, + "num_input_tokens_seen": 35249696, + "step": 52290 + }, + { + "epoch": 1.2775755502894974, + "grad_norm": 10.347769737243652, + "learning_rate": 1.8562522936671936e-06, + "loss": 0.0666, + "num_input_tokens_seen": 35252896, + "step": 52295 + }, + { + "epoch": 1.2776977011213446, + "grad_norm": 0.09222155809402466, + "learning_rate": 1.8562082397405927e-06, + "loss": 0.001, + "num_input_tokens_seen": 35256352, + "step": 52300 + }, + { + "epoch": 1.2778198519531918, + "grad_norm": 0.015030997805297375, + "learning_rate": 1.8561641795874153e-06, + "loss": 0.0386, + "num_input_tokens_seen": 35260064, + "step": 52305 + }, + { + "epoch": 1.277942002785039, + "grad_norm": 19.347267150878906, + "learning_rate": 1.8561201132079814e-06, + "loss": 0.1596, + "num_input_tokens_seen": 35263584, + "step": 52310 + }, + { + "epoch": 1.2780641536168862, + "grad_norm": 0.30989161133766174, + "learning_rate": 1.8560760406026119e-06, + "loss": 0.0415, + "num_input_tokens_seen": 35266784, + "step": 52315 + }, + { + "epoch": 1.2781863044487332, + "grad_norm": 0.3502536714076996, + "learning_rate": 1.8560319617716272e-06, + "loss": 0.0582, + "num_input_tokens_seen": 35270240, + "step": 52320 + }, + { + "epoch": 1.2783084552805803, + "grad_norm": 35.52680587768555, + "learning_rate": 1.8559878767153479e-06, + "loss": 0.1099, + "num_input_tokens_seen": 35273312, + "step": 52325 + }, + { + "epoch": 1.2784306061124275, + "grad_norm": 0.20671890676021576, + "learning_rate": 1.8559437854340944e-06, + "loss": 0.0055, + "num_input_tokens_seen": 35277024, + "step": 52330 + }, + { + "epoch": 1.2785527569442747, + "grad_norm": 0.17156971991062164, + "learning_rate": 1.8558996879281875e-06, + "loss": 0.0301, + "num_input_tokens_seen": 35280480, + "step": 52335 + }, + { + "epoch": 1.278674907776122, + "grad_norm": 15.1109037399292, + "learning_rate": 1.8558555841979477e-06, + "loss": 0.1198, + "num_input_tokens_seen": 35284640, + "step": 52340 + }, + { + "epoch": 1.278797058607969, + "grad_norm": 0.019165517762303352, + "learning_rate": 1.855811474243696e-06, + "loss": 0.0244, + "num_input_tokens_seen": 35287904, + "step": 52345 + }, + { + "epoch": 1.2789192094398163, + "grad_norm": 78.49673461914062, + "learning_rate": 1.855767358065753e-06, + "loss": 0.1092, + "num_input_tokens_seen": 35291168, + "step": 52350 + }, + { + "epoch": 1.2790413602716635, + "grad_norm": 0.13271349668502808, + "learning_rate": 1.8557232356644402e-06, + "loss": 0.0594, + "num_input_tokens_seen": 35294880, + "step": 52355 + }, + { + "epoch": 1.2791635111035107, + "grad_norm": 0.1819307953119278, + "learning_rate": 1.8556791070400771e-06, + "loss": 0.0507, + "num_input_tokens_seen": 35298272, + "step": 52360 + }, + { + "epoch": 1.2792856619353579, + "grad_norm": 0.3739522397518158, + "learning_rate": 1.8556349721929857e-06, + "loss": 0.0006, + "num_input_tokens_seen": 35301600, + "step": 52365 + }, + { + "epoch": 1.279407812767205, + "grad_norm": 13.011190414428711, + "learning_rate": 1.8555908311234868e-06, + "loss": 0.1232, + "num_input_tokens_seen": 35304736, + "step": 52370 + }, + { + "epoch": 1.2795299635990522, + "grad_norm": 1.9926000833511353, + "learning_rate": 1.8555466838319012e-06, + "loss": 0.0618, + "num_input_tokens_seen": 35308256, + "step": 52375 + }, + { + "epoch": 1.2796521144308992, + "grad_norm": 7.410982608795166, + "learning_rate": 1.8555025303185497e-06, + "loss": 0.0386, + "num_input_tokens_seen": 35311008, + "step": 52380 + }, + { + "epoch": 1.2797742652627464, + "grad_norm": 11.874845504760742, + "learning_rate": 1.855458370583754e-06, + "loss": 0.0499, + "num_input_tokens_seen": 35314272, + "step": 52385 + }, + { + "epoch": 1.2798964160945936, + "grad_norm": 0.05146576091647148, + "learning_rate": 1.8554142046278347e-06, + "loss": 0.187, + "num_input_tokens_seen": 35317600, + "step": 52390 + }, + { + "epoch": 1.2800185669264408, + "grad_norm": 39.22667694091797, + "learning_rate": 1.8553700324511132e-06, + "loss": 0.0216, + "num_input_tokens_seen": 35320864, + "step": 52395 + }, + { + "epoch": 1.280140717758288, + "grad_norm": 0.18702971935272217, + "learning_rate": 1.8553258540539111e-06, + "loss": 0.0869, + "num_input_tokens_seen": 35323936, + "step": 52400 + }, + { + "epoch": 1.2802628685901352, + "grad_norm": 0.07560386508703232, + "learning_rate": 1.8552816694365489e-06, + "loss": 0.1137, + "num_input_tokens_seen": 35327136, + "step": 52405 + }, + { + "epoch": 1.2803850194219821, + "grad_norm": 0.2207210808992386, + "learning_rate": 1.8552374785993487e-06, + "loss": 0.0907, + "num_input_tokens_seen": 35330720, + "step": 52410 + }, + { + "epoch": 1.2805071702538293, + "grad_norm": 0.09498733282089233, + "learning_rate": 1.8551932815426315e-06, + "loss": 0.0615, + "num_input_tokens_seen": 35333920, + "step": 52415 + }, + { + "epoch": 1.2806293210856765, + "grad_norm": 0.7675701975822449, + "learning_rate": 1.8551490782667188e-06, + "loss": 0.0016, + "num_input_tokens_seen": 35337504, + "step": 52420 + }, + { + "epoch": 1.2807514719175237, + "grad_norm": 0.10342734307050705, + "learning_rate": 1.8551048687719315e-06, + "loss": 0.0842, + "num_input_tokens_seen": 35340512, + "step": 52425 + }, + { + "epoch": 1.2808736227493709, + "grad_norm": 0.13581562042236328, + "learning_rate": 1.8550606530585922e-06, + "loss": 0.0873, + "num_input_tokens_seen": 35343392, + "step": 52430 + }, + { + "epoch": 1.280995773581218, + "grad_norm": 0.09667894244194031, + "learning_rate": 1.8550164311270215e-06, + "loss": 0.0441, + "num_input_tokens_seen": 35346784, + "step": 52435 + }, + { + "epoch": 1.2811179244130653, + "grad_norm": 172.4896240234375, + "learning_rate": 1.8549722029775414e-06, + "loss": 0.0698, + "num_input_tokens_seen": 35350048, + "step": 52440 + }, + { + "epoch": 1.2812400752449125, + "grad_norm": 0.813492476940155, + "learning_rate": 1.8549279686104734e-06, + "loss": 0.0022, + "num_input_tokens_seen": 35353568, + "step": 52445 + }, + { + "epoch": 1.2813622260767596, + "grad_norm": 15.103371620178223, + "learning_rate": 1.8548837280261393e-06, + "loss": 0.0998, + "num_input_tokens_seen": 35356896, + "step": 52450 + }, + { + "epoch": 1.2814843769086068, + "grad_norm": 0.39661160111427307, + "learning_rate": 1.8548394812248612e-06, + "loss": 0.095, + "num_input_tokens_seen": 35360288, + "step": 52455 + }, + { + "epoch": 1.281606527740454, + "grad_norm": 14.238640785217285, + "learning_rate": 1.85479522820696e-06, + "loss": 0.1571, + "num_input_tokens_seen": 35363296, + "step": 52460 + }, + { + "epoch": 1.281728678572301, + "grad_norm": 1.1522719860076904, + "learning_rate": 1.854750968972758e-06, + "loss": 0.1396, + "num_input_tokens_seen": 35366880, + "step": 52465 + }, + { + "epoch": 1.2818508294041482, + "grad_norm": 19.622875213623047, + "learning_rate": 1.8547067035225775e-06, + "loss": 0.1012, + "num_input_tokens_seen": 35370400, + "step": 52470 + }, + { + "epoch": 1.2819729802359954, + "grad_norm": 0.05643227696418762, + "learning_rate": 1.8546624318567395e-06, + "loss": 0.0727, + "num_input_tokens_seen": 35373792, + "step": 52475 + }, + { + "epoch": 1.2820951310678426, + "grad_norm": 22.620677947998047, + "learning_rate": 1.8546181539755665e-06, + "loss": 0.0043, + "num_input_tokens_seen": 35377056, + "step": 52480 + }, + { + "epoch": 1.2822172818996898, + "grad_norm": 17.451501846313477, + "learning_rate": 1.8545738698793807e-06, + "loss": 0.074, + "num_input_tokens_seen": 35380576, + "step": 52485 + }, + { + "epoch": 1.282339432731537, + "grad_norm": 16.301881790161133, + "learning_rate": 1.8545295795685033e-06, + "loss": 0.0824, + "num_input_tokens_seen": 35384352, + "step": 52490 + }, + { + "epoch": 1.2824615835633841, + "grad_norm": 0.7614904046058655, + "learning_rate": 1.8544852830432576e-06, + "loss": 0.0362, + "num_input_tokens_seen": 35387616, + "step": 52495 + }, + { + "epoch": 1.282583734395231, + "grad_norm": 0.8684007525444031, + "learning_rate": 1.8544409803039647e-06, + "loss": 0.0025, + "num_input_tokens_seen": 35391072, + "step": 52500 + }, + { + "epoch": 1.2827058852270783, + "grad_norm": 59.57621383666992, + "learning_rate": 1.8543966713509472e-06, + "loss": 0.2443, + "num_input_tokens_seen": 35394848, + "step": 52505 + }, + { + "epoch": 1.2828280360589255, + "grad_norm": 41.55668640136719, + "learning_rate": 1.8543523561845276e-06, + "loss": 0.0889, + "num_input_tokens_seen": 35398240, + "step": 52510 + }, + { + "epoch": 1.2829501868907727, + "grad_norm": 5.202472686767578, + "learning_rate": 1.8543080348050274e-06, + "loss": 0.0338, + "num_input_tokens_seen": 35401632, + "step": 52515 + }, + { + "epoch": 1.2830723377226199, + "grad_norm": 88.77193450927734, + "learning_rate": 1.8542637072127695e-06, + "loss": 0.0603, + "num_input_tokens_seen": 35404640, + "step": 52520 + }, + { + "epoch": 1.283194488554467, + "grad_norm": 0.09404394775629044, + "learning_rate": 1.8542193734080764e-06, + "loss": 0.0614, + "num_input_tokens_seen": 35407456, + "step": 52525 + }, + { + "epoch": 1.2833166393863142, + "grad_norm": 0.25622621178627014, + "learning_rate": 1.8541750333912703e-06, + "loss": 0.1687, + "num_input_tokens_seen": 35410784, + "step": 52530 + }, + { + "epoch": 1.2834387902181614, + "grad_norm": 44.45455551147461, + "learning_rate": 1.8541306871626733e-06, + "loss": 0.0623, + "num_input_tokens_seen": 35413856, + "step": 52535 + }, + { + "epoch": 1.2835609410500086, + "grad_norm": 41.468326568603516, + "learning_rate": 1.8540863347226084e-06, + "loss": 0.1718, + "num_input_tokens_seen": 35417056, + "step": 52540 + }, + { + "epoch": 1.2836830918818558, + "grad_norm": 194.89041137695312, + "learning_rate": 1.8540419760713979e-06, + "loss": 0.1321, + "num_input_tokens_seen": 35420192, + "step": 52545 + }, + { + "epoch": 1.283805242713703, + "grad_norm": 0.43013298511505127, + "learning_rate": 1.8539976112093644e-06, + "loss": 0.0411, + "num_input_tokens_seen": 35423712, + "step": 52550 + }, + { + "epoch": 1.28392739354555, + "grad_norm": 30.879812240600586, + "learning_rate": 1.853953240136831e-06, + "loss": 0.1122, + "num_input_tokens_seen": 35426848, + "step": 52555 + }, + { + "epoch": 1.2840495443773972, + "grad_norm": 0.05143484100699425, + "learning_rate": 1.8539088628541193e-06, + "loss": 0.0471, + "num_input_tokens_seen": 35429984, + "step": 52560 + }, + { + "epoch": 1.2841716952092443, + "grad_norm": 21.9586238861084, + "learning_rate": 1.8538644793615532e-06, + "loss": 0.0956, + "num_input_tokens_seen": 35433184, + "step": 52565 + }, + { + "epoch": 1.2842938460410915, + "grad_norm": 2.9223499298095703, + "learning_rate": 1.8538200896594546e-06, + "loss": 0.0697, + "num_input_tokens_seen": 35436576, + "step": 52570 + }, + { + "epoch": 1.2844159968729387, + "grad_norm": 86.12430572509766, + "learning_rate": 1.8537756937481465e-06, + "loss": 0.0124, + "num_input_tokens_seen": 35440288, + "step": 52575 + }, + { + "epoch": 1.284538147704786, + "grad_norm": 20.265743255615234, + "learning_rate": 1.8537312916279524e-06, + "loss": 0.1329, + "num_input_tokens_seen": 35443488, + "step": 52580 + }, + { + "epoch": 1.284660298536633, + "grad_norm": 0.10588684678077698, + "learning_rate": 1.8536868832991946e-06, + "loss": 0.0242, + "num_input_tokens_seen": 35447072, + "step": 52585 + }, + { + "epoch": 1.28478244936848, + "grad_norm": 0.07364339381456375, + "learning_rate": 1.8536424687621958e-06, + "loss": 0.0297, + "num_input_tokens_seen": 35450592, + "step": 52590 + }, + { + "epoch": 1.2849046002003273, + "grad_norm": 0.6387683153152466, + "learning_rate": 1.8535980480172797e-06, + "loss": 0.0983, + "num_input_tokens_seen": 35453920, + "step": 52595 + }, + { + "epoch": 1.2850267510321745, + "grad_norm": 0.08949076384305954, + "learning_rate": 1.8535536210647691e-06, + "loss": 0.046, + "num_input_tokens_seen": 35456928, + "step": 52600 + }, + { + "epoch": 1.2851489018640216, + "grad_norm": 51.28281784057617, + "learning_rate": 1.8535091879049868e-06, + "loss": 0.1515, + "num_input_tokens_seen": 35460192, + "step": 52605 + }, + { + "epoch": 1.2852710526958688, + "grad_norm": 180.63563537597656, + "learning_rate": 1.8534647485382561e-06, + "loss": 0.1838, + "num_input_tokens_seen": 35463712, + "step": 52610 + }, + { + "epoch": 1.285393203527716, + "grad_norm": 0.5813500285148621, + "learning_rate": 1.8534203029649002e-06, + "loss": 0.0022, + "num_input_tokens_seen": 35466720, + "step": 52615 + }, + { + "epoch": 1.2855153543595632, + "grad_norm": 68.77971649169922, + "learning_rate": 1.8533758511852424e-06, + "loss": 0.0768, + "num_input_tokens_seen": 35470176, + "step": 52620 + }, + { + "epoch": 1.2856375051914104, + "grad_norm": 31.985763549804688, + "learning_rate": 1.853331393199606e-06, + "loss": 0.0344, + "num_input_tokens_seen": 35473568, + "step": 52625 + }, + { + "epoch": 1.2857596560232576, + "grad_norm": 0.49823251366615295, + "learning_rate": 1.8532869290083139e-06, + "loss": 0.0547, + "num_input_tokens_seen": 35477536, + "step": 52630 + }, + { + "epoch": 1.2858818068551048, + "grad_norm": 20.316926956176758, + "learning_rate": 1.8532424586116899e-06, + "loss": 0.1291, + "num_input_tokens_seen": 35480480, + "step": 52635 + }, + { + "epoch": 1.286003957686952, + "grad_norm": 80.80620574951172, + "learning_rate": 1.8531979820100574e-06, + "loss": 0.1439, + "num_input_tokens_seen": 35484000, + "step": 52640 + }, + { + "epoch": 1.286126108518799, + "grad_norm": 29.717788696289062, + "learning_rate": 1.8531534992037395e-06, + "loss": 0.0509, + "num_input_tokens_seen": 35487008, + "step": 52645 + }, + { + "epoch": 1.2862482593506461, + "grad_norm": 0.11400293558835983, + "learning_rate": 1.8531090101930595e-06, + "loss": 0.0082, + "num_input_tokens_seen": 35490528, + "step": 52650 + }, + { + "epoch": 1.2863704101824933, + "grad_norm": 0.2857948839664459, + "learning_rate": 1.853064514978342e-06, + "loss": 0.1251, + "num_input_tokens_seen": 35493728, + "step": 52655 + }, + { + "epoch": 1.2864925610143405, + "grad_norm": 9.247663497924805, + "learning_rate": 1.8530200135599095e-06, + "loss": 0.0998, + "num_input_tokens_seen": 35496736, + "step": 52660 + }, + { + "epoch": 1.2866147118461877, + "grad_norm": 0.12227342277765274, + "learning_rate": 1.8529755059380863e-06, + "loss": 0.0935, + "num_input_tokens_seen": 35500000, + "step": 52665 + }, + { + "epoch": 1.2867368626780349, + "grad_norm": 8.381556510925293, + "learning_rate": 1.8529309921131954e-06, + "loss": 0.0905, + "num_input_tokens_seen": 35503008, + "step": 52670 + }, + { + "epoch": 1.286859013509882, + "grad_norm": 0.7957706451416016, + "learning_rate": 1.8528864720855613e-06, + "loss": 0.0017, + "num_input_tokens_seen": 35506336, + "step": 52675 + }, + { + "epoch": 1.286981164341729, + "grad_norm": 0.2724474370479584, + "learning_rate": 1.8528419458555072e-06, + "loss": 0.0016, + "num_input_tokens_seen": 35510112, + "step": 52680 + }, + { + "epoch": 1.2871033151735762, + "grad_norm": 12.336623191833496, + "learning_rate": 1.8527974134233571e-06, + "loss": 0.0415, + "num_input_tokens_seen": 35513056, + "step": 52685 + }, + { + "epoch": 1.2872254660054234, + "grad_norm": 94.06697082519531, + "learning_rate": 1.8527528747894347e-06, + "loss": 0.0571, + "num_input_tokens_seen": 35516832, + "step": 52690 + }, + { + "epoch": 1.2873476168372706, + "grad_norm": 0.11085094511508942, + "learning_rate": 1.8527083299540641e-06, + "loss": 0.0006, + "num_input_tokens_seen": 35520096, + "step": 52695 + }, + { + "epoch": 1.2874697676691178, + "grad_norm": 21.436336517333984, + "learning_rate": 1.8526637789175696e-06, + "loss": 0.1375, + "num_input_tokens_seen": 35523872, + "step": 52700 + }, + { + "epoch": 1.287591918500965, + "grad_norm": 0.08883228898048401, + "learning_rate": 1.8526192216802742e-06, + "loss": 0.173, + "num_input_tokens_seen": 35527840, + "step": 52705 + }, + { + "epoch": 1.2877140693328122, + "grad_norm": 8.513446807861328, + "learning_rate": 1.8525746582425028e-06, + "loss": 0.1101, + "num_input_tokens_seen": 35530976, + "step": 52710 + }, + { + "epoch": 1.2878362201646594, + "grad_norm": 27.75189781188965, + "learning_rate": 1.8525300886045792e-06, + "loss": 0.2526, + "num_input_tokens_seen": 35534432, + "step": 52715 + }, + { + "epoch": 1.2879583709965066, + "grad_norm": 0.025965625420212746, + "learning_rate": 1.8524855127668272e-06, + "loss": 0.1337, + "num_input_tokens_seen": 35537312, + "step": 52720 + }, + { + "epoch": 1.2880805218283538, + "grad_norm": 4.335248947143555, + "learning_rate": 1.8524409307295716e-06, + "loss": 0.0739, + "num_input_tokens_seen": 35540640, + "step": 52725 + }, + { + "epoch": 1.288202672660201, + "grad_norm": 0.2505491077899933, + "learning_rate": 1.8523963424931361e-06, + "loss": 0.0419, + "num_input_tokens_seen": 35543776, + "step": 52730 + }, + { + "epoch": 1.288324823492048, + "grad_norm": 10.368141174316406, + "learning_rate": 1.852351748057845e-06, + "loss": 0.0466, + "num_input_tokens_seen": 35547104, + "step": 52735 + }, + { + "epoch": 1.288446974323895, + "grad_norm": 14.576863288879395, + "learning_rate": 1.8523071474240228e-06, + "loss": 0.0577, + "num_input_tokens_seen": 35550368, + "step": 52740 + }, + { + "epoch": 1.2885691251557423, + "grad_norm": 20.16292953491211, + "learning_rate": 1.8522625405919938e-06, + "loss": 0.1469, + "num_input_tokens_seen": 35553248, + "step": 52745 + }, + { + "epoch": 1.2886912759875895, + "grad_norm": 0.06736789643764496, + "learning_rate": 1.8522179275620825e-06, + "loss": 0.0592, + "num_input_tokens_seen": 35556256, + "step": 52750 + }, + { + "epoch": 1.2888134268194367, + "grad_norm": 0.46141862869262695, + "learning_rate": 1.8521733083346131e-06, + "loss": 0.1206, + "num_input_tokens_seen": 35559712, + "step": 52755 + }, + { + "epoch": 1.2889355776512839, + "grad_norm": 13.816710472106934, + "learning_rate": 1.8521286829099104e-06, + "loss": 0.0855, + "num_input_tokens_seen": 35562656, + "step": 52760 + }, + { + "epoch": 1.289057728483131, + "grad_norm": 0.1693304032087326, + "learning_rate": 1.8520840512882985e-06, + "loss": 0.1328, + "num_input_tokens_seen": 35566432, + "step": 52765 + }, + { + "epoch": 1.289179879314978, + "grad_norm": 21.824827194213867, + "learning_rate": 1.8520394134701022e-06, + "loss": 0.0984, + "num_input_tokens_seen": 35569952, + "step": 52770 + }, + { + "epoch": 1.2893020301468252, + "grad_norm": 15.071465492248535, + "learning_rate": 1.8519947694556461e-06, + "loss": 0.0838, + "num_input_tokens_seen": 35573024, + "step": 52775 + }, + { + "epoch": 1.2894241809786724, + "grad_norm": 0.7028993368148804, + "learning_rate": 1.8519501192452548e-06, + "loss": 0.1193, + "num_input_tokens_seen": 35576352, + "step": 52780 + }, + { + "epoch": 1.2895463318105196, + "grad_norm": 28.04998016357422, + "learning_rate": 1.8519054628392535e-06, + "loss": 0.0345, + "num_input_tokens_seen": 35579552, + "step": 52785 + }, + { + "epoch": 1.2896684826423668, + "grad_norm": 0.2715553641319275, + "learning_rate": 1.8518608002379664e-06, + "loss": 0.1339, + "num_input_tokens_seen": 35583264, + "step": 52790 + }, + { + "epoch": 1.289790633474214, + "grad_norm": 0.2599318325519562, + "learning_rate": 1.8518161314417181e-06, + "loss": 0.0316, + "num_input_tokens_seen": 35586592, + "step": 52795 + }, + { + "epoch": 1.2899127843060612, + "grad_norm": 1.9973633289337158, + "learning_rate": 1.851771456450834e-06, + "loss": 0.0029, + "num_input_tokens_seen": 35589792, + "step": 52800 + }, + { + "epoch": 1.2900349351379083, + "grad_norm": 0.6662908792495728, + "learning_rate": 1.8517267752656387e-06, + "loss": 0.0027, + "num_input_tokens_seen": 35592992, + "step": 52805 + }, + { + "epoch": 1.2901570859697555, + "grad_norm": 0.2350725382566452, + "learning_rate": 1.8516820878864574e-06, + "loss": 0.0313, + "num_input_tokens_seen": 35596704, + "step": 52810 + }, + { + "epoch": 1.2902792368016027, + "grad_norm": 12.004608154296875, + "learning_rate": 1.8516373943136147e-06, + "loss": 0.1372, + "num_input_tokens_seen": 35600160, + "step": 52815 + }, + { + "epoch": 1.29040138763345, + "grad_norm": 0.09558333456516266, + "learning_rate": 1.8515926945474357e-06, + "loss": 0.0029, + "num_input_tokens_seen": 35603104, + "step": 52820 + }, + { + "epoch": 1.2905235384652969, + "grad_norm": 27.230884552001953, + "learning_rate": 1.851547988588246e-06, + "loss": 0.1577, + "num_input_tokens_seen": 35606432, + "step": 52825 + }, + { + "epoch": 1.290645689297144, + "grad_norm": 98.923828125, + "learning_rate": 1.8515032764363698e-06, + "loss": 0.2623, + "num_input_tokens_seen": 35610080, + "step": 52830 + }, + { + "epoch": 1.2907678401289913, + "grad_norm": 7.860264778137207, + "learning_rate": 1.8514585580921328e-06, + "loss": 0.1614, + "num_input_tokens_seen": 35613344, + "step": 52835 + }, + { + "epoch": 1.2908899909608385, + "grad_norm": 24.293676376342773, + "learning_rate": 1.8514138335558604e-06, + "loss": 0.1081, + "num_input_tokens_seen": 35616608, + "step": 52840 + }, + { + "epoch": 1.2910121417926856, + "grad_norm": 0.30976778268814087, + "learning_rate": 1.8513691028278776e-06, + "loss": 0.001, + "num_input_tokens_seen": 35619744, + "step": 52845 + }, + { + "epoch": 1.2911342926245328, + "grad_norm": 0.3577079176902771, + "learning_rate": 1.8513243659085097e-06, + "loss": 0.0517, + "num_input_tokens_seen": 35623264, + "step": 52850 + }, + { + "epoch": 1.29125644345638, + "grad_norm": 0.2105468511581421, + "learning_rate": 1.8512796227980818e-06, + "loss": 0.0561, + "num_input_tokens_seen": 35626720, + "step": 52855 + }, + { + "epoch": 1.291378594288227, + "grad_norm": 0.14541052281856537, + "learning_rate": 1.8512348734969196e-06, + "loss": 0.0614, + "num_input_tokens_seen": 35629792, + "step": 52860 + }, + { + "epoch": 1.2915007451200742, + "grad_norm": 99.4928970336914, + "learning_rate": 1.8511901180053485e-06, + "loss": 0.1245, + "num_input_tokens_seen": 35633184, + "step": 52865 + }, + { + "epoch": 1.2916228959519214, + "grad_norm": 1.1235225200653076, + "learning_rate": 1.8511453563236938e-06, + "loss": 0.04, + "num_input_tokens_seen": 35636704, + "step": 52870 + }, + { + "epoch": 1.2917450467837686, + "grad_norm": 0.2470174878835678, + "learning_rate": 1.8511005884522813e-06, + "loss": 0.0387, + "num_input_tokens_seen": 35640352, + "step": 52875 + }, + { + "epoch": 1.2918671976156157, + "grad_norm": 0.2535180151462555, + "learning_rate": 1.8510558143914363e-06, + "loss": 0.0359, + "num_input_tokens_seen": 35644064, + "step": 52880 + }, + { + "epoch": 1.291989348447463, + "grad_norm": 0.05397181957960129, + "learning_rate": 1.8510110341414847e-06, + "loss": 0.0565, + "num_input_tokens_seen": 35647200, + "step": 52885 + }, + { + "epoch": 1.2921114992793101, + "grad_norm": 34.268272399902344, + "learning_rate": 1.8509662477027517e-06, + "loss": 0.1693, + "num_input_tokens_seen": 35650400, + "step": 52890 + }, + { + "epoch": 1.2922336501111573, + "grad_norm": 113.81163024902344, + "learning_rate": 1.8509214550755633e-06, + "loss": 0.1691, + "num_input_tokens_seen": 35653600, + "step": 52895 + }, + { + "epoch": 1.2923558009430045, + "grad_norm": 9.641225814819336, + "learning_rate": 1.8508766562602455e-06, + "loss": 0.1415, + "num_input_tokens_seen": 35657440, + "step": 52900 + }, + { + "epoch": 1.2924779517748517, + "grad_norm": 0.2733023464679718, + "learning_rate": 1.8508318512571238e-06, + "loss": 0.0871, + "num_input_tokens_seen": 35661152, + "step": 52905 + }, + { + "epoch": 1.2926001026066989, + "grad_norm": 115.42435455322266, + "learning_rate": 1.8507870400665236e-06, + "loss": 0.0049, + "num_input_tokens_seen": 35664928, + "step": 52910 + }, + { + "epoch": 1.2927222534385459, + "grad_norm": 9.324633598327637, + "learning_rate": 1.8507422226887712e-06, + "loss": 0.0415, + "num_input_tokens_seen": 35668384, + "step": 52915 + }, + { + "epoch": 1.292844404270393, + "grad_norm": 13.958195686340332, + "learning_rate": 1.850697399124193e-06, + "loss": 0.1226, + "num_input_tokens_seen": 35671648, + "step": 52920 + }, + { + "epoch": 1.2929665551022402, + "grad_norm": 59.904056549072266, + "learning_rate": 1.8506525693731141e-06, + "loss": 0.2342, + "num_input_tokens_seen": 35674912, + "step": 52925 + }, + { + "epoch": 1.2930887059340874, + "grad_norm": 0.11946405470371246, + "learning_rate": 1.8506077334358615e-06, + "loss": 0.0255, + "num_input_tokens_seen": 35678688, + "step": 52930 + }, + { + "epoch": 1.2932108567659346, + "grad_norm": 0.045888565480709076, + "learning_rate": 1.85056289131276e-06, + "loss": 0.0016, + "num_input_tokens_seen": 35682400, + "step": 52935 + }, + { + "epoch": 1.2933330075977818, + "grad_norm": 36.49285888671875, + "learning_rate": 1.8505180430041367e-06, + "loss": 0.0977, + "num_input_tokens_seen": 35685792, + "step": 52940 + }, + { + "epoch": 1.2934551584296288, + "grad_norm": 0.20501862466335297, + "learning_rate": 1.8504731885103175e-06, + "loss": 0.0011, + "num_input_tokens_seen": 35689120, + "step": 52945 + }, + { + "epoch": 1.293577309261476, + "grad_norm": 0.058447353541851044, + "learning_rate": 1.8504283278316284e-06, + "loss": 0.1232, + "num_input_tokens_seen": 35692896, + "step": 52950 + }, + { + "epoch": 1.2936994600933232, + "grad_norm": 0.02482379972934723, + "learning_rate": 1.8503834609683957e-06, + "loss": 0.1634, + "num_input_tokens_seen": 35695840, + "step": 52955 + }, + { + "epoch": 1.2938216109251703, + "grad_norm": 0.09336850047111511, + "learning_rate": 1.8503385879209457e-06, + "loss": 0.0694, + "num_input_tokens_seen": 35699296, + "step": 52960 + }, + { + "epoch": 1.2939437617570175, + "grad_norm": 0.09030751138925552, + "learning_rate": 1.8502937086896048e-06, + "loss": 0.1277, + "num_input_tokens_seen": 35702368, + "step": 52965 + }, + { + "epoch": 1.2940659125888647, + "grad_norm": 1.2011581659317017, + "learning_rate": 1.8502488232746996e-06, + "loss": 0.0476, + "num_input_tokens_seen": 35705696, + "step": 52970 + }, + { + "epoch": 1.294188063420712, + "grad_norm": 0.05959802493453026, + "learning_rate": 1.8502039316765562e-06, + "loss": 0.0302, + "num_input_tokens_seen": 35708832, + "step": 52975 + }, + { + "epoch": 1.294310214252559, + "grad_norm": 0.40464505553245544, + "learning_rate": 1.8501590338955008e-06, + "loss": 0.0026, + "num_input_tokens_seen": 35711968, + "step": 52980 + }, + { + "epoch": 1.2944323650844063, + "grad_norm": 0.19674935936927795, + "learning_rate": 1.8501141299318605e-06, + "loss": 0.0961, + "num_input_tokens_seen": 35715296, + "step": 52985 + }, + { + "epoch": 1.2945545159162535, + "grad_norm": 0.3937717080116272, + "learning_rate": 1.8500692197859616e-06, + "loss": 0.0009, + "num_input_tokens_seen": 35718368, + "step": 52990 + }, + { + "epoch": 1.2946766667481007, + "grad_norm": 0.4105014204978943, + "learning_rate": 1.850024303458131e-06, + "loss": 0.0488, + "num_input_tokens_seen": 35721824, + "step": 52995 + }, + { + "epoch": 1.2947988175799476, + "grad_norm": 0.04927727207541466, + "learning_rate": 1.8499793809486945e-06, + "loss": 0.1413, + "num_input_tokens_seen": 35725024, + "step": 53000 + }, + { + "epoch": 1.2949209684117948, + "grad_norm": 0.9458499550819397, + "learning_rate": 1.8499344522579794e-06, + "loss": 0.0305, + "num_input_tokens_seen": 35728416, + "step": 53005 + }, + { + "epoch": 1.295043119243642, + "grad_norm": 22.578027725219727, + "learning_rate": 1.8498895173863125e-06, + "loss": 0.0607, + "num_input_tokens_seen": 35731808, + "step": 53010 + }, + { + "epoch": 1.2951652700754892, + "grad_norm": 12.686452865600586, + "learning_rate": 1.8498445763340204e-06, + "loss": 0.1866, + "num_input_tokens_seen": 35734880, + "step": 53015 + }, + { + "epoch": 1.2952874209073364, + "grad_norm": 0.2515212595462799, + "learning_rate": 1.84979962910143e-06, + "loss": 0.0943, + "num_input_tokens_seen": 35738464, + "step": 53020 + }, + { + "epoch": 1.2954095717391836, + "grad_norm": 0.03380141779780388, + "learning_rate": 1.8497546756888683e-06, + "loss": 0.1724, + "num_input_tokens_seen": 35742176, + "step": 53025 + }, + { + "epoch": 1.2955317225710308, + "grad_norm": 31.458084106445312, + "learning_rate": 1.8497097160966616e-06, + "loss": 0.1477, + "num_input_tokens_seen": 35745696, + "step": 53030 + }, + { + "epoch": 1.2956538734028777, + "grad_norm": 21.03895378112793, + "learning_rate": 1.8496647503251377e-06, + "loss": 0.1135, + "num_input_tokens_seen": 35749472, + "step": 53035 + }, + { + "epoch": 1.295776024234725, + "grad_norm": 0.5178502798080444, + "learning_rate": 1.849619778374623e-06, + "loss": 0.1194, + "num_input_tokens_seen": 35752736, + "step": 53040 + }, + { + "epoch": 1.2958981750665721, + "grad_norm": 0.3661326766014099, + "learning_rate": 1.8495748002454446e-06, + "loss": 0.0285, + "num_input_tokens_seen": 35756128, + "step": 53045 + }, + { + "epoch": 1.2960203258984193, + "grad_norm": 7.51620626449585, + "learning_rate": 1.84952981593793e-06, + "loss": 0.1519, + "num_input_tokens_seen": 35759136, + "step": 53050 + }, + { + "epoch": 1.2961424767302665, + "grad_norm": 0.9327648878097534, + "learning_rate": 1.8494848254524062e-06, + "loss": 0.0572, + "num_input_tokens_seen": 35762336, + "step": 53055 + }, + { + "epoch": 1.2962646275621137, + "grad_norm": 0.03591260313987732, + "learning_rate": 1.8494398287892002e-06, + "loss": 0.0026, + "num_input_tokens_seen": 35765600, + "step": 53060 + }, + { + "epoch": 1.2963867783939609, + "grad_norm": 0.6520580649375916, + "learning_rate": 1.849394825948639e-06, + "loss": 0.0017, + "num_input_tokens_seen": 35769056, + "step": 53065 + }, + { + "epoch": 1.296508929225808, + "grad_norm": 0.5664069056510925, + "learning_rate": 1.8493498169310505e-06, + "loss": 0.0598, + "num_input_tokens_seen": 35772384, + "step": 53070 + }, + { + "epoch": 1.2966310800576553, + "grad_norm": 66.2616958618164, + "learning_rate": 1.8493048017367613e-06, + "loss": 0.1647, + "num_input_tokens_seen": 35775584, + "step": 53075 + }, + { + "epoch": 1.2967532308895025, + "grad_norm": 0.5477205514907837, + "learning_rate": 1.8492597803660995e-06, + "loss": 0.1373, + "num_input_tokens_seen": 35779232, + "step": 53080 + }, + { + "epoch": 1.2968753817213496, + "grad_norm": 9.841727256774902, + "learning_rate": 1.8492147528193919e-06, + "loss": 0.0436, + "num_input_tokens_seen": 35782816, + "step": 53085 + }, + { + "epoch": 1.2969975325531966, + "grad_norm": 166.41403198242188, + "learning_rate": 1.8491697190969664e-06, + "loss": 0.0809, + "num_input_tokens_seen": 35785952, + "step": 53090 + }, + { + "epoch": 1.2971196833850438, + "grad_norm": 0.22653362154960632, + "learning_rate": 1.8491246791991502e-06, + "loss": 0.0008, + "num_input_tokens_seen": 35789792, + "step": 53095 + }, + { + "epoch": 1.297241834216891, + "grad_norm": 26.30208969116211, + "learning_rate": 1.849079633126271e-06, + "loss": 0.0711, + "num_input_tokens_seen": 35793184, + "step": 53100 + }, + { + "epoch": 1.2973639850487382, + "grad_norm": 30.160884857177734, + "learning_rate": 1.8490345808786564e-06, + "loss": 0.2188, + "num_input_tokens_seen": 35796832, + "step": 53105 + }, + { + "epoch": 1.2974861358805854, + "grad_norm": 37.4919319152832, + "learning_rate": 1.8489895224566339e-06, + "loss": 0.0716, + "num_input_tokens_seen": 35800160, + "step": 53110 + }, + { + "epoch": 1.2976082867124326, + "grad_norm": 132.77760314941406, + "learning_rate": 1.848944457860531e-06, + "loss": 0.1335, + "num_input_tokens_seen": 35803616, + "step": 53115 + }, + { + "epoch": 1.2977304375442797, + "grad_norm": 8.152387619018555, + "learning_rate": 1.8488993870906761e-06, + "loss": 0.1742, + "num_input_tokens_seen": 35807584, + "step": 53120 + }, + { + "epoch": 1.2978525883761267, + "grad_norm": 0.08918684720993042, + "learning_rate": 1.8488543101473963e-06, + "loss": 0.1373, + "num_input_tokens_seen": 35811488, + "step": 53125 + }, + { + "epoch": 1.297974739207974, + "grad_norm": 0.4842546582221985, + "learning_rate": 1.8488092270310197e-06, + "loss": 0.2268, + "num_input_tokens_seen": 35814880, + "step": 53130 + }, + { + "epoch": 1.298096890039821, + "grad_norm": 0.5850440263748169, + "learning_rate": 1.848764137741874e-06, + "loss": 0.1117, + "num_input_tokens_seen": 35818016, + "step": 53135 + }, + { + "epoch": 1.2982190408716683, + "grad_norm": 0.5294996500015259, + "learning_rate": 1.8487190422802872e-06, + "loss": 0.0394, + "num_input_tokens_seen": 35821408, + "step": 53140 + }, + { + "epoch": 1.2983411917035155, + "grad_norm": 0.23933136463165283, + "learning_rate": 1.8486739406465874e-06, + "loss": 0.0022, + "num_input_tokens_seen": 35824672, + "step": 53145 + }, + { + "epoch": 1.2984633425353627, + "grad_norm": 11.323979377746582, + "learning_rate": 1.8486288328411024e-06, + "loss": 0.1339, + "num_input_tokens_seen": 35827936, + "step": 53150 + }, + { + "epoch": 1.2985854933672099, + "grad_norm": 49.09061050415039, + "learning_rate": 1.8485837188641602e-06, + "loss": 0.0335, + "num_input_tokens_seen": 35831136, + "step": 53155 + }, + { + "epoch": 1.298707644199057, + "grad_norm": 41.00819396972656, + "learning_rate": 1.848538598716089e-06, + "loss": 0.1079, + "num_input_tokens_seen": 35834784, + "step": 53160 + }, + { + "epoch": 1.2988297950309042, + "grad_norm": 0.033825136721134186, + "learning_rate": 1.8484934723972167e-06, + "loss": 0.029, + "num_input_tokens_seen": 35838176, + "step": 53165 + }, + { + "epoch": 1.2989519458627514, + "grad_norm": 17.26095962524414, + "learning_rate": 1.8484483399078718e-06, + "loss": 0.1262, + "num_input_tokens_seen": 35841120, + "step": 53170 + }, + { + "epoch": 1.2990740966945986, + "grad_norm": 0.11489014327526093, + "learning_rate": 1.8484032012483825e-06, + "loss": 0.0012, + "num_input_tokens_seen": 35844256, + "step": 53175 + }, + { + "epoch": 1.2991962475264456, + "grad_norm": 5.575078964233398, + "learning_rate": 1.8483580564190768e-06, + "loss": 0.0595, + "num_input_tokens_seen": 35848224, + "step": 53180 + }, + { + "epoch": 1.2993183983582928, + "grad_norm": 22.667858123779297, + "learning_rate": 1.848312905420283e-06, + "loss": 0.1228, + "num_input_tokens_seen": 35851808, + "step": 53185 + }, + { + "epoch": 1.29944054919014, + "grad_norm": 12.718514442443848, + "learning_rate": 1.84826774825233e-06, + "loss": 0.0527, + "num_input_tokens_seen": 35855200, + "step": 53190 + }, + { + "epoch": 1.2995627000219871, + "grad_norm": 13.314168930053711, + "learning_rate": 1.8482225849155455e-06, + "loss": 0.0597, + "num_input_tokens_seen": 35858720, + "step": 53195 + }, + { + "epoch": 1.2996848508538343, + "grad_norm": 0.3152402639389038, + "learning_rate": 1.8481774154102584e-06, + "loss": 0.0569, + "num_input_tokens_seen": 35862304, + "step": 53200 + }, + { + "epoch": 1.2998070016856815, + "grad_norm": 39.78969955444336, + "learning_rate": 1.8481322397367966e-06, + "loss": 0.1173, + "num_input_tokens_seen": 35865696, + "step": 53205 + }, + { + "epoch": 1.2999291525175287, + "grad_norm": 0.2929113209247589, + "learning_rate": 1.8480870578954893e-06, + "loss": 0.0653, + "num_input_tokens_seen": 35869216, + "step": 53210 + }, + { + "epoch": 1.3000513033493757, + "grad_norm": 2.2540366649627686, + "learning_rate": 1.8480418698866646e-06, + "loss": 0.0502, + "num_input_tokens_seen": 35872480, + "step": 53215 + }, + { + "epoch": 1.3001734541812229, + "grad_norm": 0.15386171638965607, + "learning_rate": 1.8479966757106516e-06, + "loss": 0.0023, + "num_input_tokens_seen": 35875872, + "step": 53220 + }, + { + "epoch": 1.30029560501307, + "grad_norm": 0.24568603932857513, + "learning_rate": 1.8479514753677785e-06, + "loss": 0.1281, + "num_input_tokens_seen": 35878688, + "step": 53225 + }, + { + "epoch": 1.3004177558449173, + "grad_norm": 0.803600013256073, + "learning_rate": 1.8479062688583743e-06, + "loss": 0.1363, + "num_input_tokens_seen": 35881760, + "step": 53230 + }, + { + "epoch": 1.3005399066767644, + "grad_norm": 4.497910499572754, + "learning_rate": 1.8478610561827676e-06, + "loss": 0.0084, + "num_input_tokens_seen": 35885024, + "step": 53235 + }, + { + "epoch": 1.3006620575086116, + "grad_norm": 12.895638465881348, + "learning_rate": 1.8478158373412872e-06, + "loss": 0.2536, + "num_input_tokens_seen": 35888288, + "step": 53240 + }, + { + "epoch": 1.3007842083404588, + "grad_norm": 1.133604884147644, + "learning_rate": 1.8477706123342623e-06, + "loss": 0.0507, + "num_input_tokens_seen": 35891424, + "step": 53245 + }, + { + "epoch": 1.300906359172306, + "grad_norm": 0.3395247459411621, + "learning_rate": 1.847725381162021e-06, + "loss": 0.0091, + "num_input_tokens_seen": 35894624, + "step": 53250 + }, + { + "epoch": 1.3010285100041532, + "grad_norm": 13.708138465881348, + "learning_rate": 1.8476801438248932e-06, + "loss": 0.1495, + "num_input_tokens_seen": 35897888, + "step": 53255 + }, + { + "epoch": 1.3011506608360004, + "grad_norm": 1.0327856540679932, + "learning_rate": 1.8476349003232073e-06, + "loss": 0.1332, + "num_input_tokens_seen": 35901472, + "step": 53260 + }, + { + "epoch": 1.3012728116678476, + "grad_norm": 0.15163828432559967, + "learning_rate": 1.847589650657292e-06, + "loss": 0.004, + "num_input_tokens_seen": 35904480, + "step": 53265 + }, + { + "epoch": 1.3013949624996946, + "grad_norm": 0.1811094433069229, + "learning_rate": 1.847544394827477e-06, + "loss": 0.0764, + "num_input_tokens_seen": 35907808, + "step": 53270 + }, + { + "epoch": 1.3015171133315417, + "grad_norm": 0.6422911286354065, + "learning_rate": 1.8474991328340915e-06, + "loss": 0.2005, + "num_input_tokens_seen": 35910752, + "step": 53275 + }, + { + "epoch": 1.301639264163389, + "grad_norm": 91.4090576171875, + "learning_rate": 1.847453864677464e-06, + "loss": 0.0881, + "num_input_tokens_seen": 35913952, + "step": 53280 + }, + { + "epoch": 1.3017614149952361, + "grad_norm": 0.15008161962032318, + "learning_rate": 1.8474085903579245e-06, + "loss": 0.1268, + "num_input_tokens_seen": 35917408, + "step": 53285 + }, + { + "epoch": 1.3018835658270833, + "grad_norm": 28.611242294311523, + "learning_rate": 1.8473633098758014e-06, + "loss": 0.0852, + "num_input_tokens_seen": 35920544, + "step": 53290 + }, + { + "epoch": 1.3020057166589305, + "grad_norm": 0.25884881615638733, + "learning_rate": 1.8473180232314244e-06, + "loss": 0.0741, + "num_input_tokens_seen": 35923552, + "step": 53295 + }, + { + "epoch": 1.3021278674907777, + "grad_norm": 0.07952199131250381, + "learning_rate": 1.8472727304251227e-06, + "loss": 0.0429, + "num_input_tokens_seen": 35927200, + "step": 53300 + }, + { + "epoch": 1.3022500183226247, + "grad_norm": 150.61093139648438, + "learning_rate": 1.8472274314572262e-06, + "loss": 0.0497, + "num_input_tokens_seen": 35930784, + "step": 53305 + }, + { + "epoch": 1.3023721691544718, + "grad_norm": 0.13954846560955048, + "learning_rate": 1.847182126328064e-06, + "loss": 0.0659, + "num_input_tokens_seen": 35933920, + "step": 53310 + }, + { + "epoch": 1.302494319986319, + "grad_norm": 0.19231536984443665, + "learning_rate": 1.8471368150379652e-06, + "loss": 0.0328, + "num_input_tokens_seen": 35937504, + "step": 53315 + }, + { + "epoch": 1.3026164708181662, + "grad_norm": 13.778584480285645, + "learning_rate": 1.8470914975872596e-06, + "loss": 0.0522, + "num_input_tokens_seen": 35941024, + "step": 53320 + }, + { + "epoch": 1.3027386216500134, + "grad_norm": 79.91692352294922, + "learning_rate": 1.847046173976277e-06, + "loss": 0.0972, + "num_input_tokens_seen": 35945120, + "step": 53325 + }, + { + "epoch": 1.3028607724818606, + "grad_norm": 9.358743667602539, + "learning_rate": 1.8470008442053468e-06, + "loss": 0.0486, + "num_input_tokens_seen": 35948448, + "step": 53330 + }, + { + "epoch": 1.3029829233137078, + "grad_norm": 0.1844514161348343, + "learning_rate": 1.8469555082747985e-06, + "loss": 0.1491, + "num_input_tokens_seen": 35951584, + "step": 53335 + }, + { + "epoch": 1.303105074145555, + "grad_norm": 121.97779083251953, + "learning_rate": 1.846910166184962e-06, + "loss": 0.1864, + "num_input_tokens_seen": 35954912, + "step": 53340 + }, + { + "epoch": 1.3032272249774022, + "grad_norm": 100.90186309814453, + "learning_rate": 1.846864817936167e-06, + "loss": 0.1499, + "num_input_tokens_seen": 35958368, + "step": 53345 + }, + { + "epoch": 1.3033493758092494, + "grad_norm": 11.588038444519043, + "learning_rate": 1.8468194635287432e-06, + "loss": 0.0402, + "num_input_tokens_seen": 35961632, + "step": 53350 + }, + { + "epoch": 1.3034715266410966, + "grad_norm": 0.1409424990415573, + "learning_rate": 1.8467741029630207e-06, + "loss": 0.0991, + "num_input_tokens_seen": 35965344, + "step": 53355 + }, + { + "epoch": 1.3035936774729435, + "grad_norm": 0.23103132843971252, + "learning_rate": 1.8467287362393288e-06, + "loss": 0.0474, + "num_input_tokens_seen": 35968736, + "step": 53360 + }, + { + "epoch": 1.3037158283047907, + "grad_norm": 7.011451721191406, + "learning_rate": 1.846683363357998e-06, + "loss": 0.0616, + "num_input_tokens_seen": 35971808, + "step": 53365 + }, + { + "epoch": 1.303837979136638, + "grad_norm": 0.4242778420448303, + "learning_rate": 1.8466379843193583e-06, + "loss": 0.0982, + "num_input_tokens_seen": 35975328, + "step": 53370 + }, + { + "epoch": 1.303960129968485, + "grad_norm": 23.884788513183594, + "learning_rate": 1.846592599123739e-06, + "loss": 0.1391, + "num_input_tokens_seen": 35978336, + "step": 53375 + }, + { + "epoch": 1.3040822808003323, + "grad_norm": 98.13916778564453, + "learning_rate": 1.8465472077714707e-06, + "loss": 0.1133, + "num_input_tokens_seen": 35981728, + "step": 53380 + }, + { + "epoch": 1.3042044316321795, + "grad_norm": 146.73292541503906, + "learning_rate": 1.8465018102628837e-06, + "loss": 0.0989, + "num_input_tokens_seen": 35984992, + "step": 53385 + }, + { + "epoch": 1.3043265824640267, + "grad_norm": 0.2913326621055603, + "learning_rate": 1.8464564065983077e-06, + "loss": 0.0663, + "num_input_tokens_seen": 35988192, + "step": 53390 + }, + { + "epoch": 1.3044487332958736, + "grad_norm": 0.06666086614131927, + "learning_rate": 1.846410996778073e-06, + "loss": 0.1115, + "num_input_tokens_seen": 35991264, + "step": 53395 + }, + { + "epoch": 1.3045708841277208, + "grad_norm": 0.7160685062408447, + "learning_rate": 1.8463655808025098e-06, + "loss": 0.1054, + "num_input_tokens_seen": 35994912, + "step": 53400 + }, + { + "epoch": 1.304693034959568, + "grad_norm": 0.5617475509643555, + "learning_rate": 1.8463201586719486e-06, + "loss": 0.1084, + "num_input_tokens_seen": 35998112, + "step": 53405 + }, + { + "epoch": 1.3048151857914152, + "grad_norm": 0.6565627455711365, + "learning_rate": 1.8462747303867197e-06, + "loss": 0.002, + "num_input_tokens_seen": 36001248, + "step": 53410 + }, + { + "epoch": 1.3049373366232624, + "grad_norm": 0.11988291144371033, + "learning_rate": 1.846229295947153e-06, + "loss": 0.0767, + "num_input_tokens_seen": 36004384, + "step": 53415 + }, + { + "epoch": 1.3050594874551096, + "grad_norm": 15.187780380249023, + "learning_rate": 1.8461838553535793e-06, + "loss": 0.0331, + "num_input_tokens_seen": 36008480, + "step": 53420 + }, + { + "epoch": 1.3051816382869568, + "grad_norm": 15.98518180847168, + "learning_rate": 1.8461384086063292e-06, + "loss": 0.0761, + "num_input_tokens_seen": 36012384, + "step": 53425 + }, + { + "epoch": 1.305303789118804, + "grad_norm": 78.05189514160156, + "learning_rate": 1.846092955705733e-06, + "loss": 0.0968, + "num_input_tokens_seen": 36015648, + "step": 53430 + }, + { + "epoch": 1.3054259399506511, + "grad_norm": 25.664134979248047, + "learning_rate": 1.846047496652121e-06, + "loss": 0.0418, + "num_input_tokens_seen": 36018912, + "step": 53435 + }, + { + "epoch": 1.3055480907824983, + "grad_norm": 14.023910522460938, + "learning_rate": 1.8460020314458244e-06, + "loss": 0.0844, + "num_input_tokens_seen": 36022560, + "step": 53440 + }, + { + "epoch": 1.3056702416143455, + "grad_norm": 6.5276312828063965, + "learning_rate": 1.8459565600871732e-06, + "loss": 0.0424, + "num_input_tokens_seen": 36025760, + "step": 53445 + }, + { + "epoch": 1.3057923924461925, + "grad_norm": 0.2286670058965683, + "learning_rate": 1.8459110825764986e-06, + "loss": 0.002, + "num_input_tokens_seen": 36029088, + "step": 53450 + }, + { + "epoch": 1.3059145432780397, + "grad_norm": 0.03177253156900406, + "learning_rate": 1.845865598914131e-06, + "loss": 0.0551, + "num_input_tokens_seen": 36032160, + "step": 53455 + }, + { + "epoch": 1.3060366941098869, + "grad_norm": 0.27544674277305603, + "learning_rate": 1.8458201091004011e-06, + "loss": 0.0005, + "num_input_tokens_seen": 36035808, + "step": 53460 + }, + { + "epoch": 1.306158844941734, + "grad_norm": 0.2392241656780243, + "learning_rate": 1.84577461313564e-06, + "loss": 0.0771, + "num_input_tokens_seen": 36039328, + "step": 53465 + }, + { + "epoch": 1.3062809957735813, + "grad_norm": 33.41484832763672, + "learning_rate": 1.8457291110201782e-06, + "loss": 0.1988, + "num_input_tokens_seen": 36042592, + "step": 53470 + }, + { + "epoch": 1.3064031466054284, + "grad_norm": 7.893235206604004, + "learning_rate": 1.8456836027543472e-06, + "loss": 0.1351, + "num_input_tokens_seen": 36045856, + "step": 53475 + }, + { + "epoch": 1.3065252974372754, + "grad_norm": 27.955108642578125, + "learning_rate": 1.8456380883384774e-06, + "loss": 0.0866, + "num_input_tokens_seen": 36049504, + "step": 53480 + }, + { + "epoch": 1.3066474482691226, + "grad_norm": 0.7363135814666748, + "learning_rate": 1.8455925677729e-06, + "loss": 0.092, + "num_input_tokens_seen": 36052576, + "step": 53485 + }, + { + "epoch": 1.3067695991009698, + "grad_norm": 0.09255994111299515, + "learning_rate": 1.8455470410579462e-06, + "loss": 0.1593, + "num_input_tokens_seen": 36055968, + "step": 53490 + }, + { + "epoch": 1.306891749932817, + "grad_norm": 1.4908761978149414, + "learning_rate": 1.8455015081939465e-06, + "loss": 0.108, + "num_input_tokens_seen": 36059872, + "step": 53495 + }, + { + "epoch": 1.3070139007646642, + "grad_norm": 0.4646112024784088, + "learning_rate": 1.8454559691812326e-06, + "loss": 0.1167, + "num_input_tokens_seen": 36063520, + "step": 53500 + }, + { + "epoch": 1.3071360515965114, + "grad_norm": 103.47087097167969, + "learning_rate": 1.8454104240201355e-06, + "loss": 0.0282, + "num_input_tokens_seen": 36066848, + "step": 53505 + }, + { + "epoch": 1.3072582024283586, + "grad_norm": 16.62038803100586, + "learning_rate": 1.8453648727109865e-06, + "loss": 0.1318, + "num_input_tokens_seen": 36070432, + "step": 53510 + }, + { + "epoch": 1.3073803532602057, + "grad_norm": 0.8345311284065247, + "learning_rate": 1.8453193152541167e-06, + "loss": 0.1191, + "num_input_tokens_seen": 36073632, + "step": 53515 + }, + { + "epoch": 1.307502504092053, + "grad_norm": 33.0948600769043, + "learning_rate": 1.8452737516498576e-06, + "loss": 0.0371, + "num_input_tokens_seen": 36076704, + "step": 53520 + }, + { + "epoch": 1.3076246549239001, + "grad_norm": 47.80204391479492, + "learning_rate": 1.8452281818985402e-06, + "loss": 0.074, + "num_input_tokens_seen": 36079712, + "step": 53525 + }, + { + "epoch": 1.3077468057557473, + "grad_norm": 0.07585328072309494, + "learning_rate": 1.845182606000496e-06, + "loss": 0.0565, + "num_input_tokens_seen": 36082656, + "step": 53530 + }, + { + "epoch": 1.3078689565875945, + "grad_norm": 24.113906860351562, + "learning_rate": 1.845137023956057e-06, + "loss": 0.0552, + "num_input_tokens_seen": 36085728, + "step": 53535 + }, + { + "epoch": 1.3079911074194415, + "grad_norm": 214.59718322753906, + "learning_rate": 1.8450914357655538e-06, + "loss": 0.02, + "num_input_tokens_seen": 36088928, + "step": 53540 + }, + { + "epoch": 1.3081132582512887, + "grad_norm": 0.5538395643234253, + "learning_rate": 1.8450458414293187e-06, + "loss": 0.1131, + "num_input_tokens_seen": 36091936, + "step": 53545 + }, + { + "epoch": 1.3082354090831358, + "grad_norm": 0.42593154311180115, + "learning_rate": 1.8450002409476828e-06, + "loss": 0.0659, + "num_input_tokens_seen": 36095328, + "step": 53550 + }, + { + "epoch": 1.308357559914983, + "grad_norm": 0.06701342761516571, + "learning_rate": 1.844954634320978e-06, + "loss": 0.0435, + "num_input_tokens_seen": 36098784, + "step": 53555 + }, + { + "epoch": 1.3084797107468302, + "grad_norm": 0.13026872277259827, + "learning_rate": 1.8449090215495358e-06, + "loss": 0.0249, + "num_input_tokens_seen": 36102112, + "step": 53560 + }, + { + "epoch": 1.3086018615786774, + "grad_norm": 42.17403793334961, + "learning_rate": 1.8448634026336877e-06, + "loss": 0.1132, + "num_input_tokens_seen": 36105376, + "step": 53565 + }, + { + "epoch": 1.3087240124105244, + "grad_norm": 0.14000383019447327, + "learning_rate": 1.844817777573766e-06, + "loss": 0.0413, + "num_input_tokens_seen": 36108896, + "step": 53570 + }, + { + "epoch": 1.3088461632423716, + "grad_norm": 0.750639021396637, + "learning_rate": 1.844772146370102e-06, + "loss": 0.0455, + "num_input_tokens_seen": 36112416, + "step": 53575 + }, + { + "epoch": 1.3089683140742188, + "grad_norm": 0.13125310838222504, + "learning_rate": 1.8447265090230277e-06, + "loss": 0.0558, + "num_input_tokens_seen": 36115552, + "step": 53580 + }, + { + "epoch": 1.309090464906066, + "grad_norm": 10.72737979888916, + "learning_rate": 1.8446808655328755e-06, + "loss": 0.0438, + "num_input_tokens_seen": 36119264, + "step": 53585 + }, + { + "epoch": 1.3092126157379131, + "grad_norm": 0.692608654499054, + "learning_rate": 1.8446352158999764e-06, + "loss": 0.1691, + "num_input_tokens_seen": 36122848, + "step": 53590 + }, + { + "epoch": 1.3093347665697603, + "grad_norm": 0.09286288172006607, + "learning_rate": 1.8445895601246628e-06, + "loss": 0.0925, + "num_input_tokens_seen": 36126752, + "step": 53595 + }, + { + "epoch": 1.3094569174016075, + "grad_norm": 0.0832144096493721, + "learning_rate": 1.844543898207267e-06, + "loss": 0.1044, + "num_input_tokens_seen": 36129696, + "step": 53600 + }, + { + "epoch": 1.3095790682334547, + "grad_norm": 0.653047502040863, + "learning_rate": 1.8444982301481207e-06, + "loss": 0.0014, + "num_input_tokens_seen": 36133216, + "step": 53605 + }, + { + "epoch": 1.309701219065302, + "grad_norm": 295.8388366699219, + "learning_rate": 1.8444525559475559e-06, + "loss": 0.0781, + "num_input_tokens_seen": 36136416, + "step": 53610 + }, + { + "epoch": 1.309823369897149, + "grad_norm": 0.0643298551440239, + "learning_rate": 1.8444068756059052e-06, + "loss": 0.0889, + "num_input_tokens_seen": 36140128, + "step": 53615 + }, + { + "epoch": 1.3099455207289963, + "grad_norm": 8.68905258178711, + "learning_rate": 1.8443611891235008e-06, + "loss": 0.1508, + "num_input_tokens_seen": 36143968, + "step": 53620 + }, + { + "epoch": 1.3100676715608432, + "grad_norm": 28.05567169189453, + "learning_rate": 1.8443154965006741e-06, + "loss": 0.1148, + "num_input_tokens_seen": 36147104, + "step": 53625 + }, + { + "epoch": 1.3101898223926904, + "grad_norm": 0.059366848319768906, + "learning_rate": 1.8442697977377586e-06, + "loss": 0.0471, + "num_input_tokens_seen": 36150560, + "step": 53630 + }, + { + "epoch": 1.3103119732245376, + "grad_norm": 0.25347110629081726, + "learning_rate": 1.8442240928350858e-06, + "loss": 0.063, + "num_input_tokens_seen": 36153760, + "step": 53635 + }, + { + "epoch": 1.3104341240563848, + "grad_norm": 20.07191276550293, + "learning_rate": 1.8441783817929885e-06, + "loss": 0.1102, + "num_input_tokens_seen": 36156896, + "step": 53640 + }, + { + "epoch": 1.310556274888232, + "grad_norm": 0.0963999480009079, + "learning_rate": 1.844132664611799e-06, + "loss": 0.0036, + "num_input_tokens_seen": 36160480, + "step": 53645 + }, + { + "epoch": 1.3106784257200792, + "grad_norm": 44.44879150390625, + "learning_rate": 1.8440869412918497e-06, + "loss": 0.126, + "num_input_tokens_seen": 36164192, + "step": 53650 + }, + { + "epoch": 1.3108005765519264, + "grad_norm": 0.14370276033878326, + "learning_rate": 1.8440412118334727e-06, + "loss": 0.0899, + "num_input_tokens_seen": 36167968, + "step": 53655 + }, + { + "epoch": 1.3109227273837734, + "grad_norm": 0.36140382289886475, + "learning_rate": 1.8439954762370015e-06, + "loss": 0.0861, + "num_input_tokens_seen": 36171296, + "step": 53660 + }, + { + "epoch": 1.3110448782156205, + "grad_norm": 0.23507973551750183, + "learning_rate": 1.8439497345027677e-06, + "loss": 0.0879, + "num_input_tokens_seen": 36174304, + "step": 53665 + }, + { + "epoch": 1.3111670290474677, + "grad_norm": 0.28569769859313965, + "learning_rate": 1.8439039866311049e-06, + "loss": 0.0018, + "num_input_tokens_seen": 36177248, + "step": 53670 + }, + { + "epoch": 1.311289179879315, + "grad_norm": 0.07938076555728912, + "learning_rate": 1.8438582326223451e-06, + "loss": 0.1782, + "num_input_tokens_seen": 36180512, + "step": 53675 + }, + { + "epoch": 1.3114113307111621, + "grad_norm": 8.001997947692871, + "learning_rate": 1.8438124724768213e-06, + "loss": 0.1314, + "num_input_tokens_seen": 36183648, + "step": 53680 + }, + { + "epoch": 1.3115334815430093, + "grad_norm": 7.661314487457275, + "learning_rate": 1.843766706194866e-06, + "loss": 0.0956, + "num_input_tokens_seen": 36186656, + "step": 53685 + }, + { + "epoch": 1.3116556323748565, + "grad_norm": 11.160171508789062, + "learning_rate": 1.8437209337768127e-06, + "loss": 0.0805, + "num_input_tokens_seen": 36189920, + "step": 53690 + }, + { + "epoch": 1.3117777832067037, + "grad_norm": 0.42267847061157227, + "learning_rate": 1.8436751552229937e-06, + "loss": 0.0928, + "num_input_tokens_seen": 36193120, + "step": 53695 + }, + { + "epoch": 1.3118999340385509, + "grad_norm": 133.1011199951172, + "learning_rate": 1.843629370533742e-06, + "loss": 0.0605, + "num_input_tokens_seen": 36196960, + "step": 53700 + }, + { + "epoch": 1.312022084870398, + "grad_norm": 30.683536529541016, + "learning_rate": 1.8435835797093906e-06, + "loss": 0.2534, + "num_input_tokens_seen": 36200224, + "step": 53705 + }, + { + "epoch": 1.3121442357022453, + "grad_norm": 27.335384368896484, + "learning_rate": 1.8435377827502724e-06, + "loss": 0.0969, + "num_input_tokens_seen": 36204192, + "step": 53710 + }, + { + "epoch": 1.3122663865340922, + "grad_norm": 14.113719940185547, + "learning_rate": 1.8434919796567208e-06, + "loss": 0.0439, + "num_input_tokens_seen": 36209568, + "step": 53715 + }, + { + "epoch": 1.3123885373659394, + "grad_norm": 8.532432556152344, + "learning_rate": 1.8434461704290685e-06, + "loss": 0.0879, + "num_input_tokens_seen": 36212640, + "step": 53720 + }, + { + "epoch": 1.3125106881977866, + "grad_norm": 0.20278561115264893, + "learning_rate": 1.8434003550676488e-06, + "loss": 0.0398, + "num_input_tokens_seen": 36216544, + "step": 53725 + }, + { + "epoch": 1.3126328390296338, + "grad_norm": 0.1209520697593689, + "learning_rate": 1.843354533572795e-06, + "loss": 0.1991, + "num_input_tokens_seen": 36219808, + "step": 53730 + }, + { + "epoch": 1.312754989861481, + "grad_norm": 0.5357292294502258, + "learning_rate": 1.84330870594484e-06, + "loss": 0.0356, + "num_input_tokens_seen": 36222944, + "step": 53735 + }, + { + "epoch": 1.3128771406933282, + "grad_norm": 1.1882165670394897, + "learning_rate": 1.8432628721841174e-06, + "loss": 0.0385, + "num_input_tokens_seen": 36226080, + "step": 53740 + }, + { + "epoch": 1.3129992915251754, + "grad_norm": 0.13929365575313568, + "learning_rate": 1.8432170322909602e-06, + "loss": 0.0696, + "num_input_tokens_seen": 36229472, + "step": 53745 + }, + { + "epoch": 1.3131214423570223, + "grad_norm": 0.17472250759601593, + "learning_rate": 1.8431711862657022e-06, + "loss": 0.1067, + "num_input_tokens_seen": 36232800, + "step": 53750 + }, + { + "epoch": 1.3132435931888695, + "grad_norm": 7.303803443908691, + "learning_rate": 1.8431253341086764e-06, + "loss": 0.1298, + "num_input_tokens_seen": 36236704, + "step": 53755 + }, + { + "epoch": 1.3133657440207167, + "grad_norm": 75.92008972167969, + "learning_rate": 1.8430794758202165e-06, + "loss": 0.1319, + "num_input_tokens_seen": 36239712, + "step": 53760 + }, + { + "epoch": 1.313487894852564, + "grad_norm": 0.2109651118516922, + "learning_rate": 1.8430336114006555e-06, + "loss": 0.0364, + "num_input_tokens_seen": 36243168, + "step": 53765 + }, + { + "epoch": 1.313610045684411, + "grad_norm": 0.046810101717710495, + "learning_rate": 1.8429877408503279e-06, + "loss": 0.0531, + "num_input_tokens_seen": 36246816, + "step": 53770 + }, + { + "epoch": 1.3137321965162583, + "grad_norm": 0.08798840641975403, + "learning_rate": 1.8429418641695665e-06, + "loss": 0.0306, + "num_input_tokens_seen": 36250144, + "step": 53775 + }, + { + "epoch": 1.3138543473481055, + "grad_norm": 0.4276348352432251, + "learning_rate": 1.8428959813587048e-06, + "loss": 0.0013, + "num_input_tokens_seen": 36253280, + "step": 53780 + }, + { + "epoch": 1.3139764981799527, + "grad_norm": 19.770280838012695, + "learning_rate": 1.8428500924180774e-06, + "loss": 0.2003, + "num_input_tokens_seen": 36256864, + "step": 53785 + }, + { + "epoch": 1.3140986490117998, + "grad_norm": 0.14534105360507965, + "learning_rate": 1.842804197348017e-06, + "loss": 0.0865, + "num_input_tokens_seen": 36260064, + "step": 53790 + }, + { + "epoch": 1.314220799843647, + "grad_norm": 15.357917785644531, + "learning_rate": 1.8427582961488579e-06, + "loss": 0.1247, + "num_input_tokens_seen": 36263584, + "step": 53795 + }, + { + "epoch": 1.3143429506754942, + "grad_norm": 30.096118927001953, + "learning_rate": 1.8427123888209337e-06, + "loss": 0.1986, + "num_input_tokens_seen": 36267424, + "step": 53800 + }, + { + "epoch": 1.3144651015073412, + "grad_norm": 191.9586181640625, + "learning_rate": 1.8426664753645786e-06, + "loss": 0.1484, + "num_input_tokens_seen": 36271584, + "step": 53805 + }, + { + "epoch": 1.3145872523391884, + "grad_norm": 11.76651382446289, + "learning_rate": 1.8426205557801259e-06, + "loss": 0.0482, + "num_input_tokens_seen": 36274912, + "step": 53810 + }, + { + "epoch": 1.3147094031710356, + "grad_norm": 66.1670913696289, + "learning_rate": 1.84257463006791e-06, + "loss": 0.0452, + "num_input_tokens_seen": 36279392, + "step": 53815 + }, + { + "epoch": 1.3148315540028828, + "grad_norm": 1.6978089809417725, + "learning_rate": 1.842528698228265e-06, + "loss": 0.0494, + "num_input_tokens_seen": 36283040, + "step": 53820 + }, + { + "epoch": 1.31495370483473, + "grad_norm": 0.1684218943119049, + "learning_rate": 1.8424827602615247e-06, + "loss": 0.0014, + "num_input_tokens_seen": 36286304, + "step": 53825 + }, + { + "epoch": 1.3150758556665771, + "grad_norm": 25.381460189819336, + "learning_rate": 1.842436816168023e-06, + "loss": 0.1553, + "num_input_tokens_seen": 36289632, + "step": 53830 + }, + { + "epoch": 1.3151980064984243, + "grad_norm": 0.4152490198612213, + "learning_rate": 1.8423908659480943e-06, + "loss": 0.0241, + "num_input_tokens_seen": 36292960, + "step": 53835 + }, + { + "epoch": 1.3153201573302713, + "grad_norm": 106.53073120117188, + "learning_rate": 1.8423449096020724e-06, + "loss": 0.113, + "num_input_tokens_seen": 36295840, + "step": 53840 + }, + { + "epoch": 1.3154423081621185, + "grad_norm": 8.367879867553711, + "learning_rate": 1.842298947130292e-06, + "loss": 0.1004, + "num_input_tokens_seen": 36299360, + "step": 53845 + }, + { + "epoch": 1.3155644589939657, + "grad_norm": 3.1824214458465576, + "learning_rate": 1.8422529785330872e-06, + "loss": 0.1731, + "num_input_tokens_seen": 36302624, + "step": 53850 + }, + { + "epoch": 1.3156866098258129, + "grad_norm": 0.230075404047966, + "learning_rate": 1.8422070038107918e-06, + "loss": 0.1034, + "num_input_tokens_seen": 36305760, + "step": 53855 + }, + { + "epoch": 1.31580876065766, + "grad_norm": 0.2761656939983368, + "learning_rate": 1.8421610229637405e-06, + "loss": 0.1065, + "num_input_tokens_seen": 36309280, + "step": 53860 + }, + { + "epoch": 1.3159309114895072, + "grad_norm": 32.37919616699219, + "learning_rate": 1.842115035992268e-06, + "loss": 0.1159, + "num_input_tokens_seen": 36312672, + "step": 53865 + }, + { + "epoch": 1.3160530623213544, + "grad_norm": 15.567728996276855, + "learning_rate": 1.8420690428967087e-06, + "loss": 0.0485, + "num_input_tokens_seen": 36315808, + "step": 53870 + }, + { + "epoch": 1.3161752131532016, + "grad_norm": 0.15238645672798157, + "learning_rate": 1.8420230436773965e-06, + "loss": 0.0246, + "num_input_tokens_seen": 36319008, + "step": 53875 + }, + { + "epoch": 1.3162973639850488, + "grad_norm": 0.3245013952255249, + "learning_rate": 1.8419770383346664e-06, + "loss": 0.0456, + "num_input_tokens_seen": 36322080, + "step": 53880 + }, + { + "epoch": 1.316419514816896, + "grad_norm": 0.5807885527610779, + "learning_rate": 1.8419310268688525e-06, + "loss": 0.1091, + "num_input_tokens_seen": 36325600, + "step": 53885 + }, + { + "epoch": 1.3165416656487432, + "grad_norm": 0.1590554565191269, + "learning_rate": 1.84188500928029e-06, + "loss": 0.091, + "num_input_tokens_seen": 36328800, + "step": 53890 + }, + { + "epoch": 1.3166638164805902, + "grad_norm": 0.3508022129535675, + "learning_rate": 1.8418389855693132e-06, + "loss": 0.1389, + "num_input_tokens_seen": 36331744, + "step": 53895 + }, + { + "epoch": 1.3167859673124374, + "grad_norm": 124.73145294189453, + "learning_rate": 1.841792955736257e-06, + "loss": 0.1134, + "num_input_tokens_seen": 36334944, + "step": 53900 + }, + { + "epoch": 1.3169081181442845, + "grad_norm": 66.41177368164062, + "learning_rate": 1.841746919781456e-06, + "loss": 0.1849, + "num_input_tokens_seen": 36338272, + "step": 53905 + }, + { + "epoch": 1.3170302689761317, + "grad_norm": 0.18778330087661743, + "learning_rate": 1.8417008777052447e-06, + "loss": 0.0561, + "num_input_tokens_seen": 36341664, + "step": 53910 + }, + { + "epoch": 1.317152419807979, + "grad_norm": 0.9680259823799133, + "learning_rate": 1.8416548295079583e-06, + "loss": 0.012, + "num_input_tokens_seen": 36344992, + "step": 53915 + }, + { + "epoch": 1.3172745706398261, + "grad_norm": 0.6024382710456848, + "learning_rate": 1.841608775189932e-06, + "loss": 0.0688, + "num_input_tokens_seen": 36348320, + "step": 53920 + }, + { + "epoch": 1.3173967214716733, + "grad_norm": 1.7419893741607666, + "learning_rate": 1.8415627147514998e-06, + "loss": 0.0525, + "num_input_tokens_seen": 36352032, + "step": 53925 + }, + { + "epoch": 1.3175188723035203, + "grad_norm": 5.147568225860596, + "learning_rate": 1.8415166481929976e-06, + "loss": 0.0305, + "num_input_tokens_seen": 36355744, + "step": 53930 + }, + { + "epoch": 1.3176410231353675, + "grad_norm": 0.10421989113092422, + "learning_rate": 1.8414705755147597e-06, + "loss": 0.0499, + "num_input_tokens_seen": 36359584, + "step": 53935 + }, + { + "epoch": 1.3177631739672147, + "grad_norm": 33.054359436035156, + "learning_rate": 1.8414244967171216e-06, + "loss": 0.0467, + "num_input_tokens_seen": 36363104, + "step": 53940 + }, + { + "epoch": 1.3178853247990618, + "grad_norm": 105.69279479980469, + "learning_rate": 1.8413784118004184e-06, + "loss": 0.1175, + "num_input_tokens_seen": 36366432, + "step": 53945 + }, + { + "epoch": 1.318007475630909, + "grad_norm": 0.07659657299518585, + "learning_rate": 1.8413323207649847e-06, + "loss": 0.0013, + "num_input_tokens_seen": 36370208, + "step": 53950 + }, + { + "epoch": 1.3181296264627562, + "grad_norm": 31.84999656677246, + "learning_rate": 1.8412862236111565e-06, + "loss": 0.1004, + "num_input_tokens_seen": 36373536, + "step": 53955 + }, + { + "epoch": 1.3182517772946034, + "grad_norm": 0.1787625253200531, + "learning_rate": 1.8412401203392681e-06, + "loss": 0.0384, + "num_input_tokens_seen": 36377184, + "step": 53960 + }, + { + "epoch": 1.3183739281264506, + "grad_norm": 18.62636947631836, + "learning_rate": 1.8411940109496556e-06, + "loss": 0.0789, + "num_input_tokens_seen": 36380576, + "step": 53965 + }, + { + "epoch": 1.3184960789582978, + "grad_norm": 1.6066945791244507, + "learning_rate": 1.841147895442654e-06, + "loss": 0.0648, + "num_input_tokens_seen": 36383840, + "step": 53970 + }, + { + "epoch": 1.318618229790145, + "grad_norm": 0.07495055347681046, + "learning_rate": 1.8411017738185985e-06, + "loss": 0.0579, + "num_input_tokens_seen": 36387296, + "step": 53975 + }, + { + "epoch": 1.3187403806219922, + "grad_norm": 8.100799560546875, + "learning_rate": 1.8410556460778248e-06, + "loss": 0.0875, + "num_input_tokens_seen": 36390624, + "step": 53980 + }, + { + "epoch": 1.3188625314538391, + "grad_norm": 10.08414077758789, + "learning_rate": 1.8410095122206682e-06, + "loss": 0.0979, + "num_input_tokens_seen": 36393760, + "step": 53985 + }, + { + "epoch": 1.3189846822856863, + "grad_norm": 49.63814163208008, + "learning_rate": 1.8409633722474642e-06, + "loss": 0.0856, + "num_input_tokens_seen": 36396704, + "step": 53990 + }, + { + "epoch": 1.3191068331175335, + "grad_norm": 0.1970123052597046, + "learning_rate": 1.8409172261585483e-06, + "loss": 0.0566, + "num_input_tokens_seen": 36399968, + "step": 53995 + }, + { + "epoch": 1.3192289839493807, + "grad_norm": 26.510272979736328, + "learning_rate": 1.8408710739542563e-06, + "loss": 0.213, + "num_input_tokens_seen": 36403808, + "step": 54000 + }, + { + "epoch": 1.319351134781228, + "grad_norm": 0.24183687567710876, + "learning_rate": 1.840824915634924e-06, + "loss": 0.0025, + "num_input_tokens_seen": 36406688, + "step": 54005 + }, + { + "epoch": 1.319473285613075, + "grad_norm": 28.09096336364746, + "learning_rate": 1.840778751200886e-06, + "loss": 0.2569, + "num_input_tokens_seen": 36409952, + "step": 54010 + }, + { + "epoch": 1.319595436444922, + "grad_norm": 101.31450653076172, + "learning_rate": 1.8407325806524795e-06, + "loss": 0.1203, + "num_input_tokens_seen": 36412960, + "step": 54015 + }, + { + "epoch": 1.3197175872767692, + "grad_norm": 0.6510666608810425, + "learning_rate": 1.840686403990039e-06, + "loss": 0.0017, + "num_input_tokens_seen": 36416736, + "step": 54020 + }, + { + "epoch": 1.3198397381086164, + "grad_norm": 0.13488955795764923, + "learning_rate": 1.8406402212139011e-06, + "loss": 0.0513, + "num_input_tokens_seen": 36419872, + "step": 54025 + }, + { + "epoch": 1.3199618889404636, + "grad_norm": 6.511417388916016, + "learning_rate": 1.8405940323244013e-06, + "loss": 0.0788, + "num_input_tokens_seen": 36423200, + "step": 54030 + }, + { + "epoch": 1.3200840397723108, + "grad_norm": 0.5640556216239929, + "learning_rate": 1.8405478373218757e-06, + "loss": 0.1123, + "num_input_tokens_seen": 36426720, + "step": 54035 + }, + { + "epoch": 1.320206190604158, + "grad_norm": 7.7084269523620605, + "learning_rate": 1.8405016362066604e-06, + "loss": 0.1202, + "num_input_tokens_seen": 36429920, + "step": 54040 + }, + { + "epoch": 1.3203283414360052, + "grad_norm": 1.3634628057479858, + "learning_rate": 1.8404554289790906e-06, + "loss": 0.0792, + "num_input_tokens_seen": 36433760, + "step": 54045 + }, + { + "epoch": 1.3204504922678524, + "grad_norm": 127.5936508178711, + "learning_rate": 1.8404092156395032e-06, + "loss": 0.109, + "num_input_tokens_seen": 36437024, + "step": 54050 + }, + { + "epoch": 1.3205726430996996, + "grad_norm": 31.527488708496094, + "learning_rate": 1.8403629961882338e-06, + "loss": 0.0546, + "num_input_tokens_seen": 36440736, + "step": 54055 + }, + { + "epoch": 1.3206947939315468, + "grad_norm": 45.58065414428711, + "learning_rate": 1.8403167706256188e-06, + "loss": 0.1726, + "num_input_tokens_seen": 36444640, + "step": 54060 + }, + { + "epoch": 1.320816944763394, + "grad_norm": 0.5619506239891052, + "learning_rate": 1.8402705389519941e-06, + "loss": 0.0416, + "num_input_tokens_seen": 36448160, + "step": 54065 + }, + { + "epoch": 1.3209390955952411, + "grad_norm": 0.17187543213367462, + "learning_rate": 1.8402243011676961e-06, + "loss": 0.07, + "num_input_tokens_seen": 36451296, + "step": 54070 + }, + { + "epoch": 1.321061246427088, + "grad_norm": 6.773105621337891, + "learning_rate": 1.8401780572730609e-06, + "loss": 0.0016, + "num_input_tokens_seen": 36455008, + "step": 54075 + }, + { + "epoch": 1.3211833972589353, + "grad_norm": 0.2591158151626587, + "learning_rate": 1.8401318072684248e-06, + "loss": 0.1247, + "num_input_tokens_seen": 36458080, + "step": 54080 + }, + { + "epoch": 1.3213055480907825, + "grad_norm": 0.9889698028564453, + "learning_rate": 1.8400855511541246e-06, + "loss": 0.0699, + "num_input_tokens_seen": 36461792, + "step": 54085 + }, + { + "epoch": 1.3214276989226297, + "grad_norm": 2.2531867027282715, + "learning_rate": 1.8400392889304961e-06, + "loss": 0.044, + "num_input_tokens_seen": 36464800, + "step": 54090 + }, + { + "epoch": 1.3215498497544769, + "grad_norm": 0.2619583010673523, + "learning_rate": 1.839993020597876e-06, + "loss": 0.0501, + "num_input_tokens_seen": 36469088, + "step": 54095 + }, + { + "epoch": 1.321672000586324, + "grad_norm": 0.28559795022010803, + "learning_rate": 1.8399467461566006e-06, + "loss": 0.042, + "num_input_tokens_seen": 36472288, + "step": 54100 + }, + { + "epoch": 1.321794151418171, + "grad_norm": 0.024589255452156067, + "learning_rate": 1.8399004656070067e-06, + "loss": 0.046, + "num_input_tokens_seen": 36475552, + "step": 54105 + }, + { + "epoch": 1.3219163022500182, + "grad_norm": 0.2193070650100708, + "learning_rate": 1.8398541789494307e-06, + "loss": 0.042, + "num_input_tokens_seen": 36478944, + "step": 54110 + }, + { + "epoch": 1.3220384530818654, + "grad_norm": 17.28835678100586, + "learning_rate": 1.839807886184209e-06, + "loss": 0.0808, + "num_input_tokens_seen": 36482208, + "step": 54115 + }, + { + "epoch": 1.3221606039137126, + "grad_norm": 41.440818786621094, + "learning_rate": 1.8397615873116785e-06, + "loss": 0.1379, + "num_input_tokens_seen": 36485152, + "step": 54120 + }, + { + "epoch": 1.3222827547455598, + "grad_norm": 0.27941083908081055, + "learning_rate": 1.8397152823321761e-06, + "loss": 0.0595, + "num_input_tokens_seen": 36488160, + "step": 54125 + }, + { + "epoch": 1.322404905577407, + "grad_norm": 0.16612698137760162, + "learning_rate": 1.8396689712460382e-06, + "loss": 0.0372, + "num_input_tokens_seen": 36491616, + "step": 54130 + }, + { + "epoch": 1.3225270564092542, + "grad_norm": 0.026570206508040428, + "learning_rate": 1.8396226540536017e-06, + "loss": 0.0994, + "num_input_tokens_seen": 36494880, + "step": 54135 + }, + { + "epoch": 1.3226492072411014, + "grad_norm": 96.77842712402344, + "learning_rate": 1.8395763307552034e-06, + "loss": 0.1139, + "num_input_tokens_seen": 36498144, + "step": 54140 + }, + { + "epoch": 1.3227713580729485, + "grad_norm": 76.5399169921875, + "learning_rate": 1.8395300013511803e-06, + "loss": 0.067, + "num_input_tokens_seen": 36501344, + "step": 54145 + }, + { + "epoch": 1.3228935089047957, + "grad_norm": 0.3292557895183563, + "learning_rate": 1.839483665841869e-06, + "loss": 0.0782, + "num_input_tokens_seen": 36504480, + "step": 54150 + }, + { + "epoch": 1.323015659736643, + "grad_norm": 10.224272727966309, + "learning_rate": 1.8394373242276069e-06, + "loss": 0.0399, + "num_input_tokens_seen": 36507936, + "step": 54155 + }, + { + "epoch": 1.32313781056849, + "grad_norm": 14.834850311279297, + "learning_rate": 1.8393909765087307e-06, + "loss": 0.0857, + "num_input_tokens_seen": 36511008, + "step": 54160 + }, + { + "epoch": 1.323259961400337, + "grad_norm": 40.261287689208984, + "learning_rate": 1.8393446226855779e-06, + "loss": 0.078, + "num_input_tokens_seen": 36513952, + "step": 54165 + }, + { + "epoch": 1.3233821122321843, + "grad_norm": 1.2118006944656372, + "learning_rate": 1.8392982627584845e-06, + "loss": 0.0536, + "num_input_tokens_seen": 36517088, + "step": 54170 + }, + { + "epoch": 1.3235042630640315, + "grad_norm": 0.18435494601726532, + "learning_rate": 1.839251896727789e-06, + "loss": 0.0804, + "num_input_tokens_seen": 36520544, + "step": 54175 + }, + { + "epoch": 1.3236264138958787, + "grad_norm": 0.12301947921514511, + "learning_rate": 1.8392055245938277e-06, + "loss": 0.0014, + "num_input_tokens_seen": 36523808, + "step": 54180 + }, + { + "epoch": 1.3237485647277258, + "grad_norm": 0.2818373739719391, + "learning_rate": 1.8391591463569383e-06, + "loss": 0.0521, + "num_input_tokens_seen": 36527072, + "step": 54185 + }, + { + "epoch": 1.323870715559573, + "grad_norm": 18.952112197875977, + "learning_rate": 1.8391127620174578e-06, + "loss": 0.091, + "num_input_tokens_seen": 36530336, + "step": 54190 + }, + { + "epoch": 1.32399286639142, + "grad_norm": 12.950271606445312, + "learning_rate": 1.8390663715757236e-06, + "loss": 0.151, + "num_input_tokens_seen": 36533344, + "step": 54195 + }, + { + "epoch": 1.3241150172232672, + "grad_norm": 15.284626007080078, + "learning_rate": 1.839019975032073e-06, + "loss": 0.2122, + "num_input_tokens_seen": 36536992, + "step": 54200 + }, + { + "epoch": 1.3242371680551144, + "grad_norm": 3.371776819229126, + "learning_rate": 1.8389735723868433e-06, + "loss": 0.0159, + "num_input_tokens_seen": 36540448, + "step": 54205 + }, + { + "epoch": 1.3243593188869616, + "grad_norm": 8.688360214233398, + "learning_rate": 1.8389271636403726e-06, + "loss": 0.0922, + "num_input_tokens_seen": 36544032, + "step": 54210 + }, + { + "epoch": 1.3244814697188088, + "grad_norm": 11.385930061340332, + "learning_rate": 1.8388807487929977e-06, + "loss": 0.1983, + "num_input_tokens_seen": 36547488, + "step": 54215 + }, + { + "epoch": 1.324603620550656, + "grad_norm": 0.7899482846260071, + "learning_rate": 1.8388343278450562e-06, + "loss": 0.0692, + "num_input_tokens_seen": 36550496, + "step": 54220 + }, + { + "epoch": 1.3247257713825031, + "grad_norm": 0.2948339283466339, + "learning_rate": 1.838787900796886e-06, + "loss": 0.1026, + "num_input_tokens_seen": 36554528, + "step": 54225 + }, + { + "epoch": 1.3248479222143503, + "grad_norm": 15.418424606323242, + "learning_rate": 1.8387414676488247e-06, + "loss": 0.0733, + "num_input_tokens_seen": 36557792, + "step": 54230 + }, + { + "epoch": 1.3249700730461975, + "grad_norm": 0.3756644129753113, + "learning_rate": 1.8386950284012097e-06, + "loss": 0.043, + "num_input_tokens_seen": 36560928, + "step": 54235 + }, + { + "epoch": 1.3250922238780447, + "grad_norm": 61.26029586791992, + "learning_rate": 1.8386485830543787e-06, + "loss": 0.029, + "num_input_tokens_seen": 36563616, + "step": 54240 + }, + { + "epoch": 1.325214374709892, + "grad_norm": 46.892730712890625, + "learning_rate": 1.83860213160867e-06, + "loss": 0.0231, + "num_input_tokens_seen": 36567072, + "step": 54245 + }, + { + "epoch": 1.3253365255417389, + "grad_norm": 18.61212921142578, + "learning_rate": 1.8385556740644207e-06, + "loss": 0.0864, + "num_input_tokens_seen": 36570208, + "step": 54250 + }, + { + "epoch": 1.325458676373586, + "grad_norm": 0.2406308799982071, + "learning_rate": 1.8385092104219692e-06, + "loss": 0.0398, + "num_input_tokens_seen": 36573856, + "step": 54255 + }, + { + "epoch": 1.3255808272054332, + "grad_norm": 14.590946197509766, + "learning_rate": 1.8384627406816532e-06, + "loss": 0.1231, + "num_input_tokens_seen": 36577248, + "step": 54260 + }, + { + "epoch": 1.3257029780372804, + "grad_norm": 10.663087844848633, + "learning_rate": 1.8384162648438104e-06, + "loss": 0.0889, + "num_input_tokens_seen": 36580768, + "step": 54265 + }, + { + "epoch": 1.3258251288691276, + "grad_norm": 29.073938369750977, + "learning_rate": 1.8383697829087792e-06, + "loss": 0.1318, + "num_input_tokens_seen": 36584416, + "step": 54270 + }, + { + "epoch": 1.3259472797009748, + "grad_norm": 31.9458065032959, + "learning_rate": 1.8383232948768975e-06, + "loss": 0.0856, + "num_input_tokens_seen": 36588000, + "step": 54275 + }, + { + "epoch": 1.326069430532822, + "grad_norm": 13.791265487670898, + "learning_rate": 1.8382768007485033e-06, + "loss": 0.1556, + "num_input_tokens_seen": 36591072, + "step": 54280 + }, + { + "epoch": 1.326191581364669, + "grad_norm": 22.632469177246094, + "learning_rate": 1.8382303005239346e-06, + "loss": 0.1886, + "num_input_tokens_seen": 36594464, + "step": 54285 + }, + { + "epoch": 1.3263137321965162, + "grad_norm": 7.899172306060791, + "learning_rate": 1.8381837942035299e-06, + "loss": 0.0695, + "num_input_tokens_seen": 36597920, + "step": 54290 + }, + { + "epoch": 1.3264358830283633, + "grad_norm": 0.49219897389411926, + "learning_rate": 1.838137281787627e-06, + "loss": 0.1333, + "num_input_tokens_seen": 36601056, + "step": 54295 + }, + { + "epoch": 1.3265580338602105, + "grad_norm": 0.22854328155517578, + "learning_rate": 1.8380907632765644e-06, + "loss": 0.0037, + "num_input_tokens_seen": 36604256, + "step": 54300 + }, + { + "epoch": 1.3266801846920577, + "grad_norm": 58.15485763549805, + "learning_rate": 1.8380442386706805e-06, + "loss": 0.1421, + "num_input_tokens_seen": 36607584, + "step": 54305 + }, + { + "epoch": 1.326802335523905, + "grad_norm": 0.4951092600822449, + "learning_rate": 1.8379977079703134e-06, + "loss": 0.0126, + "num_input_tokens_seen": 36611360, + "step": 54310 + }, + { + "epoch": 1.326924486355752, + "grad_norm": 0.1765560656785965, + "learning_rate": 1.8379511711758013e-06, + "loss": 0.1882, + "num_input_tokens_seen": 36614432, + "step": 54315 + }, + { + "epoch": 1.3270466371875993, + "grad_norm": 14.93560791015625, + "learning_rate": 1.8379046282874833e-06, + "loss": 0.1443, + "num_input_tokens_seen": 36617504, + "step": 54320 + }, + { + "epoch": 1.3271687880194465, + "grad_norm": 0.6865839958190918, + "learning_rate": 1.8378580793056972e-06, + "loss": 0.1574, + "num_input_tokens_seen": 36620512, + "step": 54325 + }, + { + "epoch": 1.3272909388512937, + "grad_norm": 0.17002366483211517, + "learning_rate": 1.837811524230782e-06, + "loss": 0.0449, + "num_input_tokens_seen": 36624032, + "step": 54330 + }, + { + "epoch": 1.3274130896831409, + "grad_norm": 68.06137084960938, + "learning_rate": 1.837764963063076e-06, + "loss": 0.0106, + "num_input_tokens_seen": 36627552, + "step": 54335 + }, + { + "epoch": 1.3275352405149878, + "grad_norm": 12.449389457702637, + "learning_rate": 1.837718395802918e-06, + "loss": 0.0677, + "num_input_tokens_seen": 36630880, + "step": 54340 + }, + { + "epoch": 1.327657391346835, + "grad_norm": 35.67185592651367, + "learning_rate": 1.8376718224506462e-06, + "loss": 0.0778, + "num_input_tokens_seen": 36634336, + "step": 54345 + }, + { + "epoch": 1.3277795421786822, + "grad_norm": 0.34582069516181946, + "learning_rate": 1.8376252430065996e-06, + "loss": 0.0222, + "num_input_tokens_seen": 36637472, + "step": 54350 + }, + { + "epoch": 1.3279016930105294, + "grad_norm": 25.928327560424805, + "learning_rate": 1.8375786574711172e-06, + "loss": 0.0163, + "num_input_tokens_seen": 36640864, + "step": 54355 + }, + { + "epoch": 1.3280238438423766, + "grad_norm": 44.198951721191406, + "learning_rate": 1.8375320658445373e-06, + "loss": 0.1653, + "num_input_tokens_seen": 36644256, + "step": 54360 + }, + { + "epoch": 1.3281459946742238, + "grad_norm": 22.284814834594727, + "learning_rate": 1.8374854681271991e-06, + "loss": 0.0613, + "num_input_tokens_seen": 36647584, + "step": 54365 + }, + { + "epoch": 1.328268145506071, + "grad_norm": 0.10727919638156891, + "learning_rate": 1.8374388643194415e-06, + "loss": 0.1438, + "num_input_tokens_seen": 36651040, + "step": 54370 + }, + { + "epoch": 1.328390296337918, + "grad_norm": 6.798345565795898, + "learning_rate": 1.8373922544216026e-06, + "loss": 0.003, + "num_input_tokens_seen": 36654304, + "step": 54375 + }, + { + "epoch": 1.3285124471697651, + "grad_norm": 24.0903263092041, + "learning_rate": 1.8373456384340224e-06, + "loss": 0.1305, + "num_input_tokens_seen": 36657504, + "step": 54380 + }, + { + "epoch": 1.3286345980016123, + "grad_norm": 0.21511498093605042, + "learning_rate": 1.8372990163570396e-06, + "loss": 0.033, + "num_input_tokens_seen": 36660704, + "step": 54385 + }, + { + "epoch": 1.3287567488334595, + "grad_norm": 0.841050922870636, + "learning_rate": 1.8372523881909929e-06, + "loss": 0.0023, + "num_input_tokens_seen": 36664288, + "step": 54390 + }, + { + "epoch": 1.3288788996653067, + "grad_norm": 37.736202239990234, + "learning_rate": 1.837205753936222e-06, + "loss": 0.1235, + "num_input_tokens_seen": 36667872, + "step": 54395 + }, + { + "epoch": 1.329001050497154, + "grad_norm": 12.41240406036377, + "learning_rate": 1.8371591135930653e-06, + "loss": 0.1215, + "num_input_tokens_seen": 36671328, + "step": 54400 + }, + { + "epoch": 1.329123201329001, + "grad_norm": 0.09699174016714096, + "learning_rate": 1.8371124671618627e-06, + "loss": 0.0267, + "num_input_tokens_seen": 36674400, + "step": 54405 + }, + { + "epoch": 1.3292453521608483, + "grad_norm": 0.32535770535469055, + "learning_rate": 1.8370658146429529e-06, + "loss": 0.0422, + "num_input_tokens_seen": 36678560, + "step": 54410 + }, + { + "epoch": 1.3293675029926955, + "grad_norm": 15.304163932800293, + "learning_rate": 1.8370191560366752e-06, + "loss": 0.0718, + "num_input_tokens_seen": 36681568, + "step": 54415 + }, + { + "epoch": 1.3294896538245426, + "grad_norm": 12.182839393615723, + "learning_rate": 1.8369724913433694e-06, + "loss": 0.1318, + "num_input_tokens_seen": 36684832, + "step": 54420 + }, + { + "epoch": 1.3296118046563898, + "grad_norm": 0.05617258697748184, + "learning_rate": 1.8369258205633741e-06, + "loss": 0.08, + "num_input_tokens_seen": 36688608, + "step": 54425 + }, + { + "epoch": 1.3297339554882368, + "grad_norm": 30.782344818115234, + "learning_rate": 1.8368791436970295e-06, + "loss": 0.26, + "num_input_tokens_seen": 36692000, + "step": 54430 + }, + { + "epoch": 1.329856106320084, + "grad_norm": 0.36918166279792786, + "learning_rate": 1.8368324607446747e-06, + "loss": 0.0638, + "num_input_tokens_seen": 36695904, + "step": 54435 + }, + { + "epoch": 1.3299782571519312, + "grad_norm": 0.805653989315033, + "learning_rate": 1.8367857717066485e-06, + "loss": 0.015, + "num_input_tokens_seen": 36699360, + "step": 54440 + }, + { + "epoch": 1.3301004079837784, + "grad_norm": 0.3441406786441803, + "learning_rate": 1.8367390765832917e-06, + "loss": 0.0694, + "num_input_tokens_seen": 36702624, + "step": 54445 + }, + { + "epoch": 1.3302225588156256, + "grad_norm": 28.947162628173828, + "learning_rate": 1.8366923753749433e-06, + "loss": 0.1328, + "num_input_tokens_seen": 36706272, + "step": 54450 + }, + { + "epoch": 1.3303447096474728, + "grad_norm": 0.245017409324646, + "learning_rate": 1.8366456680819428e-06, + "loss": 0.068, + "num_input_tokens_seen": 36709344, + "step": 54455 + }, + { + "epoch": 1.33046686047932, + "grad_norm": 97.548828125, + "learning_rate": 1.83659895470463e-06, + "loss": 0.1217, + "num_input_tokens_seen": 36712672, + "step": 54460 + }, + { + "epoch": 1.330589011311167, + "grad_norm": 18.679906845092773, + "learning_rate": 1.8365522352433445e-06, + "loss": 0.1103, + "num_input_tokens_seen": 36716000, + "step": 54465 + }, + { + "epoch": 1.330711162143014, + "grad_norm": 0.12989388406276703, + "learning_rate": 1.8365055096984264e-06, + "loss": 0.036, + "num_input_tokens_seen": 36719968, + "step": 54470 + }, + { + "epoch": 1.3308333129748613, + "grad_norm": 0.12728965282440186, + "learning_rate": 1.8364587780702147e-06, + "loss": 0.0812, + "num_input_tokens_seen": 36723360, + "step": 54475 + }, + { + "epoch": 1.3309554638067085, + "grad_norm": 0.06374026089906693, + "learning_rate": 1.8364120403590502e-06, + "loss": 0.1162, + "num_input_tokens_seen": 36726432, + "step": 54480 + }, + { + "epoch": 1.3310776146385557, + "grad_norm": 3.7236487865448, + "learning_rate": 1.8363652965652723e-06, + "loss": 0.1438, + "num_input_tokens_seen": 36729888, + "step": 54485 + }, + { + "epoch": 1.3311997654704029, + "grad_norm": 0.206837460398674, + "learning_rate": 1.836318546689221e-06, + "loss": 0.0286, + "num_input_tokens_seen": 36732896, + "step": 54490 + }, + { + "epoch": 1.33132191630225, + "grad_norm": 0.04891075938940048, + "learning_rate": 1.8362717907312364e-06, + "loss": 0.0012, + "num_input_tokens_seen": 36736352, + "step": 54495 + }, + { + "epoch": 1.3314440671340972, + "grad_norm": 0.18277254700660706, + "learning_rate": 1.8362250286916581e-06, + "loss": 0.0319, + "num_input_tokens_seen": 36740064, + "step": 54500 + }, + { + "epoch": 1.3315662179659444, + "grad_norm": 0.12982165813446045, + "learning_rate": 1.8361782605708267e-06, + "loss": 0.0768, + "num_input_tokens_seen": 36743328, + "step": 54505 + }, + { + "epoch": 1.3316883687977916, + "grad_norm": 0.2659108638763428, + "learning_rate": 1.836131486369082e-06, + "loss": 0.0441, + "num_input_tokens_seen": 36746464, + "step": 54510 + }, + { + "epoch": 1.3318105196296388, + "grad_norm": 0.06435118615627289, + "learning_rate": 1.8360847060867642e-06, + "loss": 0.0483, + "num_input_tokens_seen": 36750240, + "step": 54515 + }, + { + "epoch": 1.3319326704614858, + "grad_norm": 15.892475128173828, + "learning_rate": 1.8360379197242137e-06, + "loss": 0.0849, + "num_input_tokens_seen": 36753760, + "step": 54520 + }, + { + "epoch": 1.332054821293333, + "grad_norm": 28.929258346557617, + "learning_rate": 1.8359911272817706e-06, + "loss": 0.0359, + "num_input_tokens_seen": 36757216, + "step": 54525 + }, + { + "epoch": 1.3321769721251802, + "grad_norm": 59.461448669433594, + "learning_rate": 1.835944328759775e-06, + "loss": 0.0463, + "num_input_tokens_seen": 36760864, + "step": 54530 + }, + { + "epoch": 1.3322991229570273, + "grad_norm": 11.817412376403809, + "learning_rate": 1.8358975241585675e-06, + "loss": 0.1228, + "num_input_tokens_seen": 36764064, + "step": 54535 + }, + { + "epoch": 1.3324212737888745, + "grad_norm": 0.08971010148525238, + "learning_rate": 1.8358507134784882e-06, + "loss": 0.0567, + "num_input_tokens_seen": 36767520, + "step": 54540 + }, + { + "epoch": 1.3325434246207217, + "grad_norm": 0.039157934486866, + "learning_rate": 1.8358038967198776e-06, + "loss": 0.1073, + "num_input_tokens_seen": 36771296, + "step": 54545 + }, + { + "epoch": 1.3326655754525687, + "grad_norm": 0.17689518630504608, + "learning_rate": 1.8357570738830768e-06, + "loss": 0.0462, + "num_input_tokens_seen": 36774880, + "step": 54550 + }, + { + "epoch": 1.3327877262844159, + "grad_norm": 0.06715977936983109, + "learning_rate": 1.8357102449684254e-06, + "loss": 0.053, + "num_input_tokens_seen": 36778464, + "step": 54555 + }, + { + "epoch": 1.332909877116263, + "grad_norm": 11.433544158935547, + "learning_rate": 1.8356634099762643e-06, + "loss": 0.0801, + "num_input_tokens_seen": 36781792, + "step": 54560 + }, + { + "epoch": 1.3330320279481103, + "grad_norm": 0.11322541534900665, + "learning_rate": 1.8356165689069343e-06, + "loss": 0.0478, + "num_input_tokens_seen": 36784992, + "step": 54565 + }, + { + "epoch": 1.3331541787799575, + "grad_norm": 0.3144833445549011, + "learning_rate": 1.8355697217607758e-06, + "loss": 0.0293, + "num_input_tokens_seen": 36788320, + "step": 54570 + }, + { + "epoch": 1.3332763296118046, + "grad_norm": 1.0199109315872192, + "learning_rate": 1.8355228685381293e-06, + "loss": 0.0018, + "num_input_tokens_seen": 36791776, + "step": 54575 + }, + { + "epoch": 1.3333984804436518, + "grad_norm": 0.3016822636127472, + "learning_rate": 1.8354760092393363e-06, + "loss": 0.1309, + "num_input_tokens_seen": 36795040, + "step": 54580 + }, + { + "epoch": 1.333520631275499, + "grad_norm": 20.83384895324707, + "learning_rate": 1.8354291438647366e-06, + "loss": 0.0681, + "num_input_tokens_seen": 36798304, + "step": 54585 + }, + { + "epoch": 1.3336427821073462, + "grad_norm": 0.282366007566452, + "learning_rate": 1.8353822724146714e-06, + "loss": 0.1073, + "num_input_tokens_seen": 36802144, + "step": 54590 + }, + { + "epoch": 1.3337649329391934, + "grad_norm": 0.6827936172485352, + "learning_rate": 1.8353353948894819e-06, + "loss": 0.1152, + "num_input_tokens_seen": 36805408, + "step": 54595 + }, + { + "epoch": 1.3338870837710406, + "grad_norm": 0.4102949798107147, + "learning_rate": 1.8352885112895086e-06, + "loss": 0.0011, + "num_input_tokens_seen": 36809056, + "step": 54600 + }, + { + "epoch": 1.3340092346028878, + "grad_norm": 0.2885624170303345, + "learning_rate": 1.8352416216150926e-06, + "loss": 0.1124, + "num_input_tokens_seen": 36812128, + "step": 54605 + }, + { + "epoch": 1.3341313854347348, + "grad_norm": 0.47842562198638916, + "learning_rate": 1.8351947258665747e-06, + "loss": 0.1089, + "num_input_tokens_seen": 36816224, + "step": 54610 + }, + { + "epoch": 1.334253536266582, + "grad_norm": 220.07904052734375, + "learning_rate": 1.8351478240442963e-06, + "loss": 0.0894, + "num_input_tokens_seen": 36819424, + "step": 54615 + }, + { + "epoch": 1.3343756870984291, + "grad_norm": 56.795082092285156, + "learning_rate": 1.8351009161485983e-06, + "loss": 0.0718, + "num_input_tokens_seen": 36823136, + "step": 54620 + }, + { + "epoch": 1.3344978379302763, + "grad_norm": 0.40516746044158936, + "learning_rate": 1.835054002179822e-06, + "loss": 0.032, + "num_input_tokens_seen": 36826464, + "step": 54625 + }, + { + "epoch": 1.3346199887621235, + "grad_norm": 0.2434043288230896, + "learning_rate": 1.835007082138308e-06, + "loss": 0.0474, + "num_input_tokens_seen": 36829920, + "step": 54630 + }, + { + "epoch": 1.3347421395939707, + "grad_norm": 2.3370065689086914, + "learning_rate": 1.8349601560243983e-06, + "loss": 0.0734, + "num_input_tokens_seen": 36832928, + "step": 54635 + }, + { + "epoch": 1.3348642904258177, + "grad_norm": 0.26671692728996277, + "learning_rate": 1.8349132238384334e-06, + "loss": 0.0744, + "num_input_tokens_seen": 36836128, + "step": 54640 + }, + { + "epoch": 1.3349864412576649, + "grad_norm": 0.07411301136016846, + "learning_rate": 1.8348662855807552e-06, + "loss": 0.0449, + "num_input_tokens_seen": 36839776, + "step": 54645 + }, + { + "epoch": 1.335108592089512, + "grad_norm": 0.05784473940730095, + "learning_rate": 1.8348193412517051e-06, + "loss": 0.236, + "num_input_tokens_seen": 36842720, + "step": 54650 + }, + { + "epoch": 1.3352307429213592, + "grad_norm": 17.83575439453125, + "learning_rate": 1.8347723908516234e-06, + "loss": 0.0411, + "num_input_tokens_seen": 36846496, + "step": 54655 + }, + { + "epoch": 1.3353528937532064, + "grad_norm": 0.06487853825092316, + "learning_rate": 1.834725434380853e-06, + "loss": 0.1155, + "num_input_tokens_seen": 36849632, + "step": 54660 + }, + { + "epoch": 1.3354750445850536, + "grad_norm": 18.144908905029297, + "learning_rate": 1.8346784718397346e-06, + "loss": 0.0413, + "num_input_tokens_seen": 36852640, + "step": 54665 + }, + { + "epoch": 1.3355971954169008, + "grad_norm": 3.455583095550537, + "learning_rate": 1.8346315032286098e-06, + "loss": 0.0515, + "num_input_tokens_seen": 36856096, + "step": 54670 + }, + { + "epoch": 1.335719346248748, + "grad_norm": 13.571123123168945, + "learning_rate": 1.83458452854782e-06, + "loss": 0.1245, + "num_input_tokens_seen": 36859360, + "step": 54675 + }, + { + "epoch": 1.3358414970805952, + "grad_norm": 31.01885414123535, + "learning_rate": 1.8345375477977076e-06, + "loss": 0.1605, + "num_input_tokens_seen": 36862560, + "step": 54680 + }, + { + "epoch": 1.3359636479124424, + "grad_norm": 0.04093638062477112, + "learning_rate": 1.8344905609786132e-06, + "loss": 0.1335, + "num_input_tokens_seen": 36865696, + "step": 54685 + }, + { + "epoch": 1.3360857987442896, + "grad_norm": 0.0845719650387764, + "learning_rate": 1.8344435680908793e-06, + "loss": 0.0543, + "num_input_tokens_seen": 36868896, + "step": 54690 + }, + { + "epoch": 1.3362079495761365, + "grad_norm": 17.767742156982422, + "learning_rate": 1.8343965691348471e-06, + "loss": 0.1462, + "num_input_tokens_seen": 36872096, + "step": 54695 + }, + { + "epoch": 1.3363301004079837, + "grad_norm": 34.696651458740234, + "learning_rate": 1.8343495641108586e-06, + "loss": 0.103, + "num_input_tokens_seen": 36875616, + "step": 54700 + }, + { + "epoch": 1.336452251239831, + "grad_norm": 2.183518886566162, + "learning_rate": 1.8343025530192558e-06, + "loss": 0.1377, + "num_input_tokens_seen": 36878944, + "step": 54705 + }, + { + "epoch": 1.336574402071678, + "grad_norm": 0.4736291766166687, + "learning_rate": 1.8342555358603804e-06, + "loss": 0.018, + "num_input_tokens_seen": 36882336, + "step": 54710 + }, + { + "epoch": 1.3366965529035253, + "grad_norm": 13.378650665283203, + "learning_rate": 1.8342085126345743e-06, + "loss": 0.1011, + "num_input_tokens_seen": 36885792, + "step": 54715 + }, + { + "epoch": 1.3368187037353725, + "grad_norm": 0.12041685730218887, + "learning_rate": 1.8341614833421794e-06, + "loss": 0.0022, + "num_input_tokens_seen": 36889120, + "step": 54720 + }, + { + "epoch": 1.3369408545672197, + "grad_norm": 0.9578997492790222, + "learning_rate": 1.8341144479835382e-06, + "loss": 0.054, + "num_input_tokens_seen": 36892512, + "step": 54725 + }, + { + "epoch": 1.3370630053990666, + "grad_norm": 0.6917126774787903, + "learning_rate": 1.8340674065589923e-06, + "loss": 0.0393, + "num_input_tokens_seen": 36895904, + "step": 54730 + }, + { + "epoch": 1.3371851562309138, + "grad_norm": 0.4527418911457062, + "learning_rate": 1.8340203590688837e-06, + "loss": 0.0437, + "num_input_tokens_seen": 36899360, + "step": 54735 + }, + { + "epoch": 1.337307307062761, + "grad_norm": 4.271320343017578, + "learning_rate": 1.8339733055135546e-06, + "loss": 0.046, + "num_input_tokens_seen": 36902368, + "step": 54740 + }, + { + "epoch": 1.3374294578946082, + "grad_norm": 23.710651397705078, + "learning_rate": 1.8339262458933476e-06, + "loss": 0.097, + "num_input_tokens_seen": 36905696, + "step": 54745 + }, + { + "epoch": 1.3375516087264554, + "grad_norm": 31.41887664794922, + "learning_rate": 1.8338791802086045e-06, + "loss": 0.1131, + "num_input_tokens_seen": 36910176, + "step": 54750 + }, + { + "epoch": 1.3376737595583026, + "grad_norm": 33.71608352661133, + "learning_rate": 1.8338321084596678e-06, + "loss": 0.068, + "num_input_tokens_seen": 36913312, + "step": 54755 + }, + { + "epoch": 1.3377959103901498, + "grad_norm": 16.4295711517334, + "learning_rate": 1.8337850306468795e-06, + "loss": 0.068, + "num_input_tokens_seen": 36916576, + "step": 54760 + }, + { + "epoch": 1.337918061221997, + "grad_norm": 0.2625795304775238, + "learning_rate": 1.8337379467705824e-06, + "loss": 0.0019, + "num_input_tokens_seen": 36920352, + "step": 54765 + }, + { + "epoch": 1.3380402120538442, + "grad_norm": 0.9406046271324158, + "learning_rate": 1.8336908568311187e-06, + "loss": 0.1602, + "num_input_tokens_seen": 36923296, + "step": 54770 + }, + { + "epoch": 1.3381623628856913, + "grad_norm": 28.13994026184082, + "learning_rate": 1.8336437608288309e-06, + "loss": 0.0468, + "num_input_tokens_seen": 36926496, + "step": 54775 + }, + { + "epoch": 1.3382845137175385, + "grad_norm": 8.35828971862793, + "learning_rate": 1.8335966587640615e-06, + "loss": 0.2092, + "num_input_tokens_seen": 36929952, + "step": 54780 + }, + { + "epoch": 1.3384066645493855, + "grad_norm": 0.12866359949111938, + "learning_rate": 1.8335495506371529e-06, + "loss": 0.002, + "num_input_tokens_seen": 36933024, + "step": 54785 + }, + { + "epoch": 1.3385288153812327, + "grad_norm": 0.12256859987974167, + "learning_rate": 1.8335024364484477e-06, + "loss": 0.0973, + "num_input_tokens_seen": 36936608, + "step": 54790 + }, + { + "epoch": 1.3386509662130799, + "grad_norm": 0.11025379598140717, + "learning_rate": 1.8334553161982887e-06, + "loss": 0.1195, + "num_input_tokens_seen": 36940128, + "step": 54795 + }, + { + "epoch": 1.338773117044927, + "grad_norm": 0.2998100817203522, + "learning_rate": 1.8334081898870185e-06, + "loss": 0.0434, + "num_input_tokens_seen": 36944672, + "step": 54800 + }, + { + "epoch": 1.3388952678767743, + "grad_norm": 0.1619083136320114, + "learning_rate": 1.8333610575149795e-06, + "loss": 0.053, + "num_input_tokens_seen": 36948192, + "step": 54805 + }, + { + "epoch": 1.3390174187086215, + "grad_norm": 0.2661537230014801, + "learning_rate": 1.8333139190825149e-06, + "loss": 0.0582, + "num_input_tokens_seen": 36951456, + "step": 54810 + }, + { + "epoch": 1.3391395695404686, + "grad_norm": 7.476252555847168, + "learning_rate": 1.8332667745899672e-06, + "loss": 0.0817, + "num_input_tokens_seen": 36954400, + "step": 54815 + }, + { + "epoch": 1.3392617203723156, + "grad_norm": 0.09815490990877151, + "learning_rate": 1.8332196240376797e-06, + "loss": 0.1396, + "num_input_tokens_seen": 36957792, + "step": 54820 + }, + { + "epoch": 1.3393838712041628, + "grad_norm": 0.5720617771148682, + "learning_rate": 1.833172467425995e-06, + "loss": 0.0484, + "num_input_tokens_seen": 36960864, + "step": 54825 + }, + { + "epoch": 1.33950602203601, + "grad_norm": 0.2281419336795807, + "learning_rate": 1.8331253047552558e-06, + "loss": 0.0858, + "num_input_tokens_seen": 36964192, + "step": 54830 + }, + { + "epoch": 1.3396281728678572, + "grad_norm": 16.90079116821289, + "learning_rate": 1.8330781360258052e-06, + "loss": 0.1635, + "num_input_tokens_seen": 36968736, + "step": 54835 + }, + { + "epoch": 1.3397503236997044, + "grad_norm": 0.24762359261512756, + "learning_rate": 1.8330309612379867e-06, + "loss": 0.0901, + "num_input_tokens_seen": 36972192, + "step": 54840 + }, + { + "epoch": 1.3398724745315516, + "grad_norm": 3.342427968978882, + "learning_rate": 1.832983780392143e-06, + "loss": 0.0333, + "num_input_tokens_seen": 36975520, + "step": 54845 + }, + { + "epoch": 1.3399946253633988, + "grad_norm": 27.706703186035156, + "learning_rate": 1.8329365934886168e-06, + "loss": 0.0752, + "num_input_tokens_seen": 36979232, + "step": 54850 + }, + { + "epoch": 1.340116776195246, + "grad_norm": 0.2744644284248352, + "learning_rate": 1.8328894005277519e-06, + "loss": 0.0017, + "num_input_tokens_seen": 36982880, + "step": 54855 + }, + { + "epoch": 1.3402389270270931, + "grad_norm": 5.694372177124023, + "learning_rate": 1.8328422015098913e-06, + "loss": 0.0017, + "num_input_tokens_seen": 36986080, + "step": 54860 + }, + { + "epoch": 1.3403610778589403, + "grad_norm": 27.293333053588867, + "learning_rate": 1.832794996435378e-06, + "loss": 0.2505, + "num_input_tokens_seen": 36989216, + "step": 54865 + }, + { + "epoch": 1.3404832286907875, + "grad_norm": 0.42047351598739624, + "learning_rate": 1.8327477853045554e-06, + "loss": 0.0796, + "num_input_tokens_seen": 36992352, + "step": 54870 + }, + { + "epoch": 1.3406053795226345, + "grad_norm": 0.1544327735900879, + "learning_rate": 1.8327005681177674e-06, + "loss": 0.0009, + "num_input_tokens_seen": 36996128, + "step": 54875 + }, + { + "epoch": 1.3407275303544817, + "grad_norm": 10.551417350769043, + "learning_rate": 1.8326533448753565e-06, + "loss": 0.0478, + "num_input_tokens_seen": 36999136, + "step": 54880 + }, + { + "epoch": 1.3408496811863289, + "grad_norm": 33.77448654174805, + "learning_rate": 1.8326061155776666e-06, + "loss": 0.1097, + "num_input_tokens_seen": 37002656, + "step": 54885 + }, + { + "epoch": 1.340971832018176, + "grad_norm": 0.24251246452331543, + "learning_rate": 1.8325588802250411e-06, + "loss": 0.1133, + "num_input_tokens_seen": 37005728, + "step": 54890 + }, + { + "epoch": 1.3410939828500232, + "grad_norm": 0.3213561177253723, + "learning_rate": 1.8325116388178238e-06, + "loss": 0.0501, + "num_input_tokens_seen": 37008864, + "step": 54895 + }, + { + "epoch": 1.3412161336818704, + "grad_norm": 15.927510261535645, + "learning_rate": 1.8324643913563573e-06, + "loss": 0.1672, + "num_input_tokens_seen": 37012384, + "step": 54900 + }, + { + "epoch": 1.3413382845137176, + "grad_norm": 11.647205352783203, + "learning_rate": 1.8324171378409862e-06, + "loss": 0.2182, + "num_input_tokens_seen": 37015648, + "step": 54905 + }, + { + "epoch": 1.3414604353455646, + "grad_norm": 0.29703488945961, + "learning_rate": 1.832369878272054e-06, + "loss": 0.0635, + "num_input_tokens_seen": 37018976, + "step": 54910 + }, + { + "epoch": 1.3415825861774118, + "grad_norm": 2.9432787895202637, + "learning_rate": 1.832322612649904e-06, + "loss": 0.0387, + "num_input_tokens_seen": 37022112, + "step": 54915 + }, + { + "epoch": 1.341704737009259, + "grad_norm": 0.3091279864311218, + "learning_rate": 1.83227534097488e-06, + "loss": 0.1057, + "num_input_tokens_seen": 37025184, + "step": 54920 + }, + { + "epoch": 1.3418268878411062, + "grad_norm": 282.4009704589844, + "learning_rate": 1.8322280632473256e-06, + "loss": 0.0704, + "num_input_tokens_seen": 37028192, + "step": 54925 + }, + { + "epoch": 1.3419490386729533, + "grad_norm": 10.560799598693848, + "learning_rate": 1.8321807794675853e-06, + "loss": 0.0933, + "num_input_tokens_seen": 37031008, + "step": 54930 + }, + { + "epoch": 1.3420711895048005, + "grad_norm": 48.856502532958984, + "learning_rate": 1.8321334896360026e-06, + "loss": 0.1536, + "num_input_tokens_seen": 37034528, + "step": 54935 + }, + { + "epoch": 1.3421933403366477, + "grad_norm": 0.8152018785476685, + "learning_rate": 1.832086193752921e-06, + "loss": 0.0302, + "num_input_tokens_seen": 37037728, + "step": 54940 + }, + { + "epoch": 1.342315491168495, + "grad_norm": 0.5506560206413269, + "learning_rate": 1.832038891818685e-06, + "loss": 0.0906, + "num_input_tokens_seen": 37041312, + "step": 54945 + }, + { + "epoch": 1.342437642000342, + "grad_norm": 0.3046576678752899, + "learning_rate": 1.8319915838336387e-06, + "loss": 0.072, + "num_input_tokens_seen": 37044384, + "step": 54950 + }, + { + "epoch": 1.3425597928321893, + "grad_norm": 0.04380049556493759, + "learning_rate": 1.831944269798125e-06, + "loss": 0.0005, + "num_input_tokens_seen": 37048032, + "step": 54955 + }, + { + "epoch": 1.3426819436640365, + "grad_norm": 15.407459259033203, + "learning_rate": 1.8318969497124894e-06, + "loss": 0.1263, + "num_input_tokens_seen": 37051424, + "step": 54960 + }, + { + "epoch": 1.3428040944958834, + "grad_norm": 0.5923014879226685, + "learning_rate": 1.8318496235770756e-06, + "loss": 0.1187, + "num_input_tokens_seen": 37054816, + "step": 54965 + }, + { + "epoch": 1.3429262453277306, + "grad_norm": 0.29871129989624023, + "learning_rate": 1.8318022913922272e-06, + "loss": 0.1211, + "num_input_tokens_seen": 37058528, + "step": 54970 + }, + { + "epoch": 1.3430483961595778, + "grad_norm": 0.05903381481766701, + "learning_rate": 1.8317549531582888e-06, + "loss": 0.0724, + "num_input_tokens_seen": 37062112, + "step": 54975 + }, + { + "epoch": 1.343170546991425, + "grad_norm": 0.6294770836830139, + "learning_rate": 1.8317076088756047e-06, + "loss": 0.0456, + "num_input_tokens_seen": 37065184, + "step": 54980 + }, + { + "epoch": 1.3432926978232722, + "grad_norm": 0.3868195712566376, + "learning_rate": 1.8316602585445194e-06, + "loss": 0.0368, + "num_input_tokens_seen": 37068576, + "step": 54985 + }, + { + "epoch": 1.3434148486551194, + "grad_norm": 0.2004663050174713, + "learning_rate": 1.831612902165377e-06, + "loss": 0.0752, + "num_input_tokens_seen": 37071968, + "step": 54990 + }, + { + "epoch": 1.3435369994869666, + "grad_norm": 0.35088229179382324, + "learning_rate": 1.8315655397385217e-06, + "loss": 0.0529, + "num_input_tokens_seen": 37075104, + "step": 54995 + }, + { + "epoch": 1.3436591503188136, + "grad_norm": 0.14613264799118042, + "learning_rate": 1.8315181712642981e-06, + "loss": 0.0326, + "num_input_tokens_seen": 37078304, + "step": 55000 + }, + { + "epoch": 1.3437813011506607, + "grad_norm": 17.115861892700195, + "learning_rate": 1.8314707967430509e-06, + "loss": 0.2934, + "num_input_tokens_seen": 37081440, + "step": 55005 + }, + { + "epoch": 1.343903451982508, + "grad_norm": 224.58563232421875, + "learning_rate": 1.8314234161751242e-06, + "loss": 0.1698, + "num_input_tokens_seen": 37084128, + "step": 55010 + }, + { + "epoch": 1.3440256028143551, + "grad_norm": 0.42700228095054626, + "learning_rate": 1.8313760295608632e-06, + "loss": 0.1783, + "num_input_tokens_seen": 37088032, + "step": 55015 + }, + { + "epoch": 1.3441477536462023, + "grad_norm": 0.14409606158733368, + "learning_rate": 1.8313286369006119e-06, + "loss": 0.0167, + "num_input_tokens_seen": 37091104, + "step": 55020 + }, + { + "epoch": 1.3442699044780495, + "grad_norm": 0.18862088024616241, + "learning_rate": 1.8312812381947147e-06, + "loss": 0.0025, + "num_input_tokens_seen": 37094624, + "step": 55025 + }, + { + "epoch": 1.3443920553098967, + "grad_norm": 0.29231026768684387, + "learning_rate": 1.8312338334435174e-06, + "loss": 0.0877, + "num_input_tokens_seen": 37098272, + "step": 55030 + }, + { + "epoch": 1.3445142061417439, + "grad_norm": 0.12736621499061584, + "learning_rate": 1.8311864226473636e-06, + "loss": 0.1341, + "num_input_tokens_seen": 37101344, + "step": 55035 + }, + { + "epoch": 1.344636356973591, + "grad_norm": 0.0764683336019516, + "learning_rate": 1.831139005806599e-06, + "loss": 0.0253, + "num_input_tokens_seen": 37104800, + "step": 55040 + }, + { + "epoch": 1.3447585078054383, + "grad_norm": 6.596743106842041, + "learning_rate": 1.8310915829215677e-06, + "loss": 0.0942, + "num_input_tokens_seen": 37108384, + "step": 55045 + }, + { + "epoch": 1.3448806586372855, + "grad_norm": 0.1305336207151413, + "learning_rate": 1.831044153992615e-06, + "loss": 0.2179, + "num_input_tokens_seen": 37111776, + "step": 55050 + }, + { + "epoch": 1.3450028094691324, + "grad_norm": 35.683441162109375, + "learning_rate": 1.8309967190200855e-06, + "loss": 0.1414, + "num_input_tokens_seen": 37114976, + "step": 55055 + }, + { + "epoch": 1.3451249603009796, + "grad_norm": 12.118048667907715, + "learning_rate": 1.8309492780043243e-06, + "loss": 0.1777, + "num_input_tokens_seen": 37118304, + "step": 55060 + }, + { + "epoch": 1.3452471111328268, + "grad_norm": 93.1062240600586, + "learning_rate": 1.8309018309456767e-06, + "loss": 0.0744, + "num_input_tokens_seen": 37121568, + "step": 55065 + }, + { + "epoch": 1.345369261964674, + "grad_norm": 0.6298710703849792, + "learning_rate": 1.8308543778444875e-06, + "loss": 0.0465, + "num_input_tokens_seen": 37124704, + "step": 55070 + }, + { + "epoch": 1.3454914127965212, + "grad_norm": 21.240869522094727, + "learning_rate": 1.8308069187011017e-06, + "loss": 0.1673, + "num_input_tokens_seen": 37128288, + "step": 55075 + }, + { + "epoch": 1.3456135636283684, + "grad_norm": 0.5027111172676086, + "learning_rate": 1.8307594535158645e-06, + "loss": 0.0441, + "num_input_tokens_seen": 37131296, + "step": 55080 + }, + { + "epoch": 1.3457357144602153, + "grad_norm": 0.4280967116355896, + "learning_rate": 1.8307119822891213e-06, + "loss": 0.1084, + "num_input_tokens_seen": 37134432, + "step": 55085 + }, + { + "epoch": 1.3458578652920625, + "grad_norm": 40.367759704589844, + "learning_rate": 1.830664505021217e-06, + "loss": 0.0891, + "num_input_tokens_seen": 37137696, + "step": 55090 + }, + { + "epoch": 1.3459800161239097, + "grad_norm": 0.12468226999044418, + "learning_rate": 1.830617021712497e-06, + "loss": 0.0556, + "num_input_tokens_seen": 37141024, + "step": 55095 + }, + { + "epoch": 1.346102166955757, + "grad_norm": 10.89808177947998, + "learning_rate": 1.8305695323633065e-06, + "loss": 0.0951, + "num_input_tokens_seen": 37144224, + "step": 55100 + }, + { + "epoch": 1.346224317787604, + "grad_norm": 4.6593337059021, + "learning_rate": 1.830522036973991e-06, + "loss": 0.0283, + "num_input_tokens_seen": 37147744, + "step": 55105 + }, + { + "epoch": 1.3463464686194513, + "grad_norm": 10.192591667175293, + "learning_rate": 1.830474535544896e-06, + "loss": 0.1267, + "num_input_tokens_seen": 37150816, + "step": 55110 + }, + { + "epoch": 1.3464686194512985, + "grad_norm": 1.1582913398742676, + "learning_rate": 1.8304270280763667e-06, + "loss": 0.1288, + "num_input_tokens_seen": 37154016, + "step": 55115 + }, + { + "epoch": 1.3465907702831457, + "grad_norm": 0.21199598908424377, + "learning_rate": 1.8303795145687488e-06, + "loss": 0.0154, + "num_input_tokens_seen": 37157216, + "step": 55120 + }, + { + "epoch": 1.3467129211149929, + "grad_norm": 103.62749481201172, + "learning_rate": 1.8303319950223877e-06, + "loss": 0.0831, + "num_input_tokens_seen": 37160608, + "step": 55125 + }, + { + "epoch": 1.34683507194684, + "grad_norm": 0.7188388705253601, + "learning_rate": 1.8302844694376289e-06, + "loss": 0.0366, + "num_input_tokens_seen": 37164192, + "step": 55130 + }, + { + "epoch": 1.3469572227786872, + "grad_norm": 1.1744076013565063, + "learning_rate": 1.830236937814818e-06, + "loss": 0.0646, + "num_input_tokens_seen": 37167328, + "step": 55135 + }, + { + "epoch": 1.3470793736105344, + "grad_norm": 0.013013658113777637, + "learning_rate": 1.830189400154301e-06, + "loss": 0.0703, + "num_input_tokens_seen": 37170784, + "step": 55140 + }, + { + "epoch": 1.3472015244423814, + "grad_norm": 0.06146685406565666, + "learning_rate": 1.8301418564564238e-06, + "loss": 0.0018, + "num_input_tokens_seen": 37174560, + "step": 55145 + }, + { + "epoch": 1.3473236752742286, + "grad_norm": 0.30197107791900635, + "learning_rate": 1.830094306721531e-06, + "loss": 0.0507, + "num_input_tokens_seen": 37178016, + "step": 55150 + }, + { + "epoch": 1.3474458261060758, + "grad_norm": 8.145421028137207, + "learning_rate": 1.8300467509499695e-06, + "loss": 0.0356, + "num_input_tokens_seen": 37181600, + "step": 55155 + }, + { + "epoch": 1.347567976937923, + "grad_norm": 59.542667388916016, + "learning_rate": 1.8299991891420845e-06, + "loss": 0.0041, + "num_input_tokens_seen": 37184608, + "step": 55160 + }, + { + "epoch": 1.3476901277697702, + "grad_norm": 2.546189069747925, + "learning_rate": 1.8299516212982225e-06, + "loss": 0.0263, + "num_input_tokens_seen": 37187616, + "step": 55165 + }, + { + "epoch": 1.3478122786016173, + "grad_norm": 0.07865026593208313, + "learning_rate": 1.8299040474187288e-06, + "loss": 0.0432, + "num_input_tokens_seen": 37190880, + "step": 55170 + }, + { + "epoch": 1.3479344294334643, + "grad_norm": 0.05466358736157417, + "learning_rate": 1.8298564675039499e-06, + "loss": 0.1084, + "num_input_tokens_seen": 37193696, + "step": 55175 + }, + { + "epoch": 1.3480565802653115, + "grad_norm": 33.85342788696289, + "learning_rate": 1.8298088815542312e-06, + "loss": 0.1264, + "num_input_tokens_seen": 37196768, + "step": 55180 + }, + { + "epoch": 1.3481787310971587, + "grad_norm": 14.359149932861328, + "learning_rate": 1.8297612895699195e-06, + "loss": 0.2703, + "num_input_tokens_seen": 37200032, + "step": 55185 + }, + { + "epoch": 1.3483008819290059, + "grad_norm": 14.786344528198242, + "learning_rate": 1.8297136915513605e-06, + "loss": 0.2105, + "num_input_tokens_seen": 37203488, + "step": 55190 + }, + { + "epoch": 1.348423032760853, + "grad_norm": 0.13315944373607635, + "learning_rate": 1.8296660874989e-06, + "loss": 0.0619, + "num_input_tokens_seen": 37206880, + "step": 55195 + }, + { + "epoch": 1.3485451835927003, + "grad_norm": 0.07801298052072525, + "learning_rate": 1.829618477412885e-06, + "loss": 0.0022, + "num_input_tokens_seen": 37210208, + "step": 55200 + }, + { + "epoch": 1.3486673344245474, + "grad_norm": 0.10913047939538956, + "learning_rate": 1.8295708612936611e-06, + "loss": 0.0777, + "num_input_tokens_seen": 37213408, + "step": 55205 + }, + { + "epoch": 1.3487894852563946, + "grad_norm": 0.04249902069568634, + "learning_rate": 1.8295232391415747e-06, + "loss": 0.067, + "num_input_tokens_seen": 37216800, + "step": 55210 + }, + { + "epoch": 1.3489116360882418, + "grad_norm": 9.33828353881836, + "learning_rate": 1.8294756109569722e-06, + "loss": 0.1029, + "num_input_tokens_seen": 37220448, + "step": 55215 + }, + { + "epoch": 1.349033786920089, + "grad_norm": 0.07323089241981506, + "learning_rate": 1.8294279767402001e-06, + "loss": 0.0033, + "num_input_tokens_seen": 37223584, + "step": 55220 + }, + { + "epoch": 1.3491559377519362, + "grad_norm": 255.05075073242188, + "learning_rate": 1.8293803364916044e-06, + "loss": 0.1111, + "num_input_tokens_seen": 37226464, + "step": 55225 + }, + { + "epoch": 1.3492780885837832, + "grad_norm": 0.13870365917682648, + "learning_rate": 1.8293326902115323e-06, + "loss": 0.2226, + "num_input_tokens_seen": 37229728, + "step": 55230 + }, + { + "epoch": 1.3494002394156304, + "grad_norm": 196.775634765625, + "learning_rate": 1.8292850379003294e-06, + "loss": 0.1753, + "num_input_tokens_seen": 37232864, + "step": 55235 + }, + { + "epoch": 1.3495223902474776, + "grad_norm": 0.12823425233364105, + "learning_rate": 1.8292373795583425e-06, + "loss": 0.0801, + "num_input_tokens_seen": 37236320, + "step": 55240 + }, + { + "epoch": 1.3496445410793247, + "grad_norm": 0.6417508125305176, + "learning_rate": 1.8291897151859187e-06, + "loss": 0.0025, + "num_input_tokens_seen": 37239840, + "step": 55245 + }, + { + "epoch": 1.349766691911172, + "grad_norm": 39.938053131103516, + "learning_rate": 1.8291420447834043e-06, + "loss": 0.2231, + "num_input_tokens_seen": 37243040, + "step": 55250 + }, + { + "epoch": 1.3498888427430191, + "grad_norm": 0.058599360287189484, + "learning_rate": 1.8290943683511457e-06, + "loss": 0.0016, + "num_input_tokens_seen": 37246112, + "step": 55255 + }, + { + "epoch": 1.3500109935748663, + "grad_norm": 0.35725781321525574, + "learning_rate": 1.8290466858894899e-06, + "loss": 0.0456, + "num_input_tokens_seen": 37249952, + "step": 55260 + }, + { + "epoch": 1.3501331444067133, + "grad_norm": 0.5817795395851135, + "learning_rate": 1.8289989973987838e-06, + "loss": 0.0938, + "num_input_tokens_seen": 37253152, + "step": 55265 + }, + { + "epoch": 1.3502552952385605, + "grad_norm": 0.3643583655357361, + "learning_rate": 1.8289513028793739e-06, + "loss": 0.1001, + "num_input_tokens_seen": 37256352, + "step": 55270 + }, + { + "epoch": 1.3503774460704077, + "grad_norm": 0.15800811350345612, + "learning_rate": 1.8289036023316072e-06, + "loss": 0.0461, + "num_input_tokens_seen": 37259488, + "step": 55275 + }, + { + "epoch": 1.3504995969022549, + "grad_norm": 0.039382465183734894, + "learning_rate": 1.8288558957558301e-06, + "loss": 0.0754, + "num_input_tokens_seen": 37262752, + "step": 55280 + }, + { + "epoch": 1.350621747734102, + "grad_norm": 0.5504450798034668, + "learning_rate": 1.8288081831523907e-06, + "loss": 0.1319, + "num_input_tokens_seen": 37266080, + "step": 55285 + }, + { + "epoch": 1.3507438985659492, + "grad_norm": 0.5390429496765137, + "learning_rate": 1.8287604645216348e-06, + "loss": 0.0835, + "num_input_tokens_seen": 37269472, + "step": 55290 + }, + { + "epoch": 1.3508660493977964, + "grad_norm": 0.1276429444551468, + "learning_rate": 1.8287127398639102e-06, + "loss": 0.0482, + "num_input_tokens_seen": 37272544, + "step": 55295 + }, + { + "epoch": 1.3509882002296436, + "grad_norm": 16.06958770751953, + "learning_rate": 1.8286650091795638e-06, + "loss": 0.1202, + "num_input_tokens_seen": 37276448, + "step": 55300 + }, + { + "epoch": 1.3511103510614908, + "grad_norm": 0.48938968777656555, + "learning_rate": 1.828617272468942e-06, + "loss": 0.0391, + "num_input_tokens_seen": 37279904, + "step": 55305 + }, + { + "epoch": 1.351232501893338, + "grad_norm": 0.21368904411792755, + "learning_rate": 1.8285695297323928e-06, + "loss": 0.0494, + "num_input_tokens_seen": 37282784, + "step": 55310 + }, + { + "epoch": 1.3513546527251852, + "grad_norm": 0.7595393657684326, + "learning_rate": 1.828521780970263e-06, + "loss": 0.0919, + "num_input_tokens_seen": 37285856, + "step": 55315 + }, + { + "epoch": 1.3514768035570321, + "grad_norm": 10.359819412231445, + "learning_rate": 1.8284740261829002e-06, + "loss": 0.144, + "num_input_tokens_seen": 37288992, + "step": 55320 + }, + { + "epoch": 1.3515989543888793, + "grad_norm": 0.3968583941459656, + "learning_rate": 1.8284262653706515e-06, + "loss": 0.0438, + "num_input_tokens_seen": 37292320, + "step": 55325 + }, + { + "epoch": 1.3517211052207265, + "grad_norm": 0.9557807445526123, + "learning_rate": 1.8283784985338638e-06, + "loss": 0.0744, + "num_input_tokens_seen": 37295712, + "step": 55330 + }, + { + "epoch": 1.3518432560525737, + "grad_norm": 24.249900817871094, + "learning_rate": 1.828330725672885e-06, + "loss": 0.1685, + "num_input_tokens_seen": 37299040, + "step": 55335 + }, + { + "epoch": 1.351965406884421, + "grad_norm": 1.363347053527832, + "learning_rate": 1.8282829467880624e-06, + "loss": 0.0024, + "num_input_tokens_seen": 37302176, + "step": 55340 + }, + { + "epoch": 1.352087557716268, + "grad_norm": 0.12959076464176178, + "learning_rate": 1.8282351618797435e-06, + "loss": 0.0802, + "num_input_tokens_seen": 37305312, + "step": 55345 + }, + { + "epoch": 1.3522097085481153, + "grad_norm": 28.02058219909668, + "learning_rate": 1.8281873709482759e-06, + "loss": 0.0617, + "num_input_tokens_seen": 37308512, + "step": 55350 + }, + { + "epoch": 1.3523318593799623, + "grad_norm": 6.930314540863037, + "learning_rate": 1.8281395739940067e-06, + "loss": 0.0833, + "num_input_tokens_seen": 37311712, + "step": 55355 + }, + { + "epoch": 1.3524540102118094, + "grad_norm": 0.5654293298721313, + "learning_rate": 1.828091771017284e-06, + "loss": 0.0447, + "num_input_tokens_seen": 37315424, + "step": 55360 + }, + { + "epoch": 1.3525761610436566, + "grad_norm": 7.034704685211182, + "learning_rate": 1.8280439620184549e-06, + "loss": 0.0523, + "num_input_tokens_seen": 37318880, + "step": 55365 + }, + { + "epoch": 1.3526983118755038, + "grad_norm": 0.38624125719070435, + "learning_rate": 1.8279961469978676e-06, + "loss": 0.0028, + "num_input_tokens_seen": 37322208, + "step": 55370 + }, + { + "epoch": 1.352820462707351, + "grad_norm": 0.046895623207092285, + "learning_rate": 1.8279483259558694e-06, + "loss": 0.1267, + "num_input_tokens_seen": 37326048, + "step": 55375 + }, + { + "epoch": 1.3529426135391982, + "grad_norm": 0.30466148257255554, + "learning_rate": 1.8279004988928085e-06, + "loss": 0.0956, + "num_input_tokens_seen": 37329504, + "step": 55380 + }, + { + "epoch": 1.3530647643710454, + "grad_norm": 0.2624111473560333, + "learning_rate": 1.8278526658090325e-06, + "loss": 0.0289, + "num_input_tokens_seen": 37332896, + "step": 55385 + }, + { + "epoch": 1.3531869152028926, + "grad_norm": 0.18605853617191315, + "learning_rate": 1.8278048267048894e-06, + "loss": 0.0558, + "num_input_tokens_seen": 37336416, + "step": 55390 + }, + { + "epoch": 1.3533090660347398, + "grad_norm": 0.3882926106452942, + "learning_rate": 1.8277569815807266e-06, + "loss": 0.044, + "num_input_tokens_seen": 37339488, + "step": 55395 + }, + { + "epoch": 1.353431216866587, + "grad_norm": 16.696517944335938, + "learning_rate": 1.8277091304368926e-06, + "loss": 0.1294, + "num_input_tokens_seen": 37342816, + "step": 55400 + }, + { + "epoch": 1.3535533676984342, + "grad_norm": 0.25264209508895874, + "learning_rate": 1.8276612732737351e-06, + "loss": 0.1302, + "num_input_tokens_seen": 37346016, + "step": 55405 + }, + { + "epoch": 1.3536755185302811, + "grad_norm": 21.005630493164062, + "learning_rate": 1.8276134100916024e-06, + "loss": 0.1187, + "num_input_tokens_seen": 37349600, + "step": 55410 + }, + { + "epoch": 1.3537976693621283, + "grad_norm": 8.199868202209473, + "learning_rate": 1.8275655408908421e-06, + "loss": 0.1065, + "num_input_tokens_seen": 37352608, + "step": 55415 + }, + { + "epoch": 1.3539198201939755, + "grad_norm": 0.061909351497888565, + "learning_rate": 1.8275176656718025e-06, + "loss": 0.0017, + "num_input_tokens_seen": 37355808, + "step": 55420 + }, + { + "epoch": 1.3540419710258227, + "grad_norm": 0.8016281723976135, + "learning_rate": 1.8274697844348321e-06, + "loss": 0.0723, + "num_input_tokens_seen": 37359392, + "step": 55425 + }, + { + "epoch": 1.3541641218576699, + "grad_norm": 0.0208908561617136, + "learning_rate": 1.827421897180279e-06, + "loss": 0.0567, + "num_input_tokens_seen": 37362720, + "step": 55430 + }, + { + "epoch": 1.354286272689517, + "grad_norm": 0.1459772139787674, + "learning_rate": 1.827374003908491e-06, + "loss": 0.074, + "num_input_tokens_seen": 37366112, + "step": 55435 + }, + { + "epoch": 1.3544084235213643, + "grad_norm": 101.6792984008789, + "learning_rate": 1.8273261046198169e-06, + "loss": 0.1579, + "num_input_tokens_seen": 37369056, + "step": 55440 + }, + { + "epoch": 1.3545305743532112, + "grad_norm": 0.41175195574760437, + "learning_rate": 1.8272781993146046e-06, + "loss": 0.0733, + "num_input_tokens_seen": 37372192, + "step": 55445 + }, + { + "epoch": 1.3546527251850584, + "grad_norm": 0.5852706432342529, + "learning_rate": 1.827230287993203e-06, + "loss": 0.0467, + "num_input_tokens_seen": 37375712, + "step": 55450 + }, + { + "epoch": 1.3547748760169056, + "grad_norm": 2.3407347202301025, + "learning_rate": 1.8271823706559602e-06, + "loss": 0.1188, + "num_input_tokens_seen": 37379104, + "step": 55455 + }, + { + "epoch": 1.3548970268487528, + "grad_norm": 72.70645141601562, + "learning_rate": 1.8271344473032246e-06, + "loss": 0.0039, + "num_input_tokens_seen": 37382496, + "step": 55460 + }, + { + "epoch": 1.3550191776806, + "grad_norm": 16.15511131286621, + "learning_rate": 1.827086517935345e-06, + "loss": 0.1075, + "num_input_tokens_seen": 37385888, + "step": 55465 + }, + { + "epoch": 1.3551413285124472, + "grad_norm": 0.5082676410675049, + "learning_rate": 1.8270385825526698e-06, + "loss": 0.0014, + "num_input_tokens_seen": 37389408, + "step": 55470 + }, + { + "epoch": 1.3552634793442944, + "grad_norm": 0.10025462508201599, + "learning_rate": 1.8269906411555473e-06, + "loss": 0.1457, + "num_input_tokens_seen": 37393376, + "step": 55475 + }, + { + "epoch": 1.3553856301761416, + "grad_norm": 0.05350198969244957, + "learning_rate": 1.8269426937443266e-06, + "loss": 0.0955, + "num_input_tokens_seen": 37396832, + "step": 55480 + }, + { + "epoch": 1.3555077810079887, + "grad_norm": 0.14534598588943481, + "learning_rate": 1.8268947403193562e-06, + "loss": 0.0859, + "num_input_tokens_seen": 37400288, + "step": 55485 + }, + { + "epoch": 1.355629931839836, + "grad_norm": 6.821413993835449, + "learning_rate": 1.8268467808809849e-06, + "loss": 0.0015, + "num_input_tokens_seen": 37403872, + "step": 55490 + }, + { + "epoch": 1.3557520826716831, + "grad_norm": 5.831641674041748, + "learning_rate": 1.8267988154295612e-06, + "loss": 0.1744, + "num_input_tokens_seen": 37407136, + "step": 55495 + }, + { + "epoch": 1.35587423350353, + "grad_norm": 37.35738754272461, + "learning_rate": 1.8267508439654345e-06, + "loss": 0.1595, + "num_input_tokens_seen": 37410976, + "step": 55500 + }, + { + "epoch": 1.3559963843353773, + "grad_norm": 12.805477142333984, + "learning_rate": 1.826702866488953e-06, + "loss": 0.0964, + "num_input_tokens_seen": 37414816, + "step": 55505 + }, + { + "epoch": 1.3561185351672245, + "grad_norm": 30.72103500366211, + "learning_rate": 1.826654883000466e-06, + "loss": 0.1445, + "num_input_tokens_seen": 37418272, + "step": 55510 + }, + { + "epoch": 1.3562406859990717, + "grad_norm": 16.386653900146484, + "learning_rate": 1.8266068935003226e-06, + "loss": 0.0936, + "num_input_tokens_seen": 37421792, + "step": 55515 + }, + { + "epoch": 1.3563628368309188, + "grad_norm": 0.3761710822582245, + "learning_rate": 1.826558897988871e-06, + "loss": 0.0016, + "num_input_tokens_seen": 37425056, + "step": 55520 + }, + { + "epoch": 1.356484987662766, + "grad_norm": 7.814873695373535, + "learning_rate": 1.8265108964664608e-06, + "loss": 0.0576, + "num_input_tokens_seen": 37428256, + "step": 55525 + }, + { + "epoch": 1.3566071384946132, + "grad_norm": 0.6091344356536865, + "learning_rate": 1.8264628889334414e-06, + "loss": 0.0034, + "num_input_tokens_seen": 37431968, + "step": 55530 + }, + { + "epoch": 1.3567292893264602, + "grad_norm": 0.08361712843179703, + "learning_rate": 1.8264148753901616e-06, + "loss": 0.0277, + "num_input_tokens_seen": 37436192, + "step": 55535 + }, + { + "epoch": 1.3568514401583074, + "grad_norm": 0.1606522798538208, + "learning_rate": 1.8263668558369703e-06, + "loss": 0.0666, + "num_input_tokens_seen": 37439456, + "step": 55540 + }, + { + "epoch": 1.3569735909901546, + "grad_norm": 36.44951629638672, + "learning_rate": 1.8263188302742173e-06, + "loss": 0.0879, + "num_input_tokens_seen": 37442912, + "step": 55545 + }, + { + "epoch": 1.3570957418220018, + "grad_norm": 16.47828483581543, + "learning_rate": 1.8262707987022512e-06, + "loss": 0.105, + "num_input_tokens_seen": 37445856, + "step": 55550 + }, + { + "epoch": 1.357217892653849, + "grad_norm": 0.06848545372486115, + "learning_rate": 1.8262227611214218e-06, + "loss": 0.1269, + "num_input_tokens_seen": 37449184, + "step": 55555 + }, + { + "epoch": 1.3573400434856961, + "grad_norm": 10.186595916748047, + "learning_rate": 1.826174717532078e-06, + "loss": 0.061, + "num_input_tokens_seen": 37452192, + "step": 55560 + }, + { + "epoch": 1.3574621943175433, + "grad_norm": 1.475244164466858, + "learning_rate": 1.8261266679345696e-06, + "loss": 0.1319, + "num_input_tokens_seen": 37455392, + "step": 55565 + }, + { + "epoch": 1.3575843451493905, + "grad_norm": 0.04712071269750595, + "learning_rate": 1.8260786123292458e-06, + "loss": 0.0028, + "num_input_tokens_seen": 37458784, + "step": 55570 + }, + { + "epoch": 1.3577064959812377, + "grad_norm": 0.3326287567615509, + "learning_rate": 1.8260305507164565e-06, + "loss": 0.0717, + "num_input_tokens_seen": 37461984, + "step": 55575 + }, + { + "epoch": 1.357828646813085, + "grad_norm": 100.86023712158203, + "learning_rate": 1.8259824830965504e-06, + "loss": 0.2017, + "num_input_tokens_seen": 37465888, + "step": 55580 + }, + { + "epoch": 1.357950797644932, + "grad_norm": 21.718538284301758, + "learning_rate": 1.8259344094698777e-06, + "loss": 0.0853, + "num_input_tokens_seen": 37469664, + "step": 55585 + }, + { + "epoch": 1.358072948476779, + "grad_norm": 0.4292604327201843, + "learning_rate": 1.8258863298367877e-06, + "loss": 0.0399, + "num_input_tokens_seen": 37473056, + "step": 55590 + }, + { + "epoch": 1.3581950993086263, + "grad_norm": 0.042389508336782455, + "learning_rate": 1.8258382441976306e-06, + "loss": 0.0738, + "num_input_tokens_seen": 37476192, + "step": 55595 + }, + { + "epoch": 1.3583172501404734, + "grad_norm": 0.13057366013526917, + "learning_rate": 1.8257901525527553e-06, + "loss": 0.0733, + "num_input_tokens_seen": 37479520, + "step": 55600 + }, + { + "epoch": 1.3584394009723206, + "grad_norm": 0.26481014490127563, + "learning_rate": 1.8257420549025117e-06, + "loss": 0.0521, + "num_input_tokens_seen": 37482528, + "step": 55605 + }, + { + "epoch": 1.3585615518041678, + "grad_norm": 0.13100023567676544, + "learning_rate": 1.82569395124725e-06, + "loss": 0.002, + "num_input_tokens_seen": 37486048, + "step": 55610 + }, + { + "epoch": 1.358683702636015, + "grad_norm": 0.2691006660461426, + "learning_rate": 1.82564584158732e-06, + "loss": 0.1526, + "num_input_tokens_seen": 37489120, + "step": 55615 + }, + { + "epoch": 1.358805853467862, + "grad_norm": 0.04777399078011513, + "learning_rate": 1.8255977259230714e-06, + "loss": 0.0569, + "num_input_tokens_seen": 37492256, + "step": 55620 + }, + { + "epoch": 1.3589280042997092, + "grad_norm": 5.601587772369385, + "learning_rate": 1.8255496042548537e-06, + "loss": 0.0938, + "num_input_tokens_seen": 37495392, + "step": 55625 + }, + { + "epoch": 1.3590501551315564, + "grad_norm": 0.3111015856266022, + "learning_rate": 1.8255014765830174e-06, + "loss": 0.0346, + "num_input_tokens_seen": 37500512, + "step": 55630 + }, + { + "epoch": 1.3591723059634035, + "grad_norm": 20.101476669311523, + "learning_rate": 1.8254533429079125e-06, + "loss": 0.12, + "num_input_tokens_seen": 37504288, + "step": 55635 + }, + { + "epoch": 1.3592944567952507, + "grad_norm": 0.36264368891716003, + "learning_rate": 1.8254052032298886e-06, + "loss": 0.0534, + "num_input_tokens_seen": 37507744, + "step": 55640 + }, + { + "epoch": 1.359416607627098, + "grad_norm": 0.7125762104988098, + "learning_rate": 1.8253570575492963e-06, + "loss": 0.133, + "num_input_tokens_seen": 37511136, + "step": 55645 + }, + { + "epoch": 1.3595387584589451, + "grad_norm": 3.620149612426758, + "learning_rate": 1.8253089058664852e-06, + "loss": 0.0398, + "num_input_tokens_seen": 37514272, + "step": 55650 + }, + { + "epoch": 1.3596609092907923, + "grad_norm": 0.6286777257919312, + "learning_rate": 1.825260748181806e-06, + "loss": 0.0322, + "num_input_tokens_seen": 37517152, + "step": 55655 + }, + { + "epoch": 1.3597830601226395, + "grad_norm": 0.07149165123701096, + "learning_rate": 1.8252125844956083e-06, + "loss": 0.1159, + "num_input_tokens_seen": 37520480, + "step": 55660 + }, + { + "epoch": 1.3599052109544867, + "grad_norm": 0.33753979206085205, + "learning_rate": 1.8251644148082433e-06, + "loss": 0.0401, + "num_input_tokens_seen": 37523552, + "step": 55665 + }, + { + "epoch": 1.3600273617863339, + "grad_norm": 1.23895263671875, + "learning_rate": 1.8251162391200604e-06, + "loss": 0.0273, + "num_input_tokens_seen": 37527200, + "step": 55670 + }, + { + "epoch": 1.360149512618181, + "grad_norm": 0.11427648365497589, + "learning_rate": 1.8250680574314101e-06, + "loss": 0.0507, + "num_input_tokens_seen": 37530528, + "step": 55675 + }, + { + "epoch": 1.360271663450028, + "grad_norm": 39.01353073120117, + "learning_rate": 1.8250198697426434e-06, + "loss": 0.1472, + "num_input_tokens_seen": 37533664, + "step": 55680 + }, + { + "epoch": 1.3603938142818752, + "grad_norm": 0.10856301337480545, + "learning_rate": 1.82497167605411e-06, + "loss": 0.0013, + "num_input_tokens_seen": 37537120, + "step": 55685 + }, + { + "epoch": 1.3605159651137224, + "grad_norm": 21.2423038482666, + "learning_rate": 1.8249234763661608e-06, + "loss": 0.2504, + "num_input_tokens_seen": 37540256, + "step": 55690 + }, + { + "epoch": 1.3606381159455696, + "grad_norm": 0.3467349708080292, + "learning_rate": 1.8248752706791461e-06, + "loss": 0.096, + "num_input_tokens_seen": 37544096, + "step": 55695 + }, + { + "epoch": 1.3607602667774168, + "grad_norm": 0.029728004708886147, + "learning_rate": 1.8248270589934167e-06, + "loss": 0.1089, + "num_input_tokens_seen": 37547424, + "step": 55700 + }, + { + "epoch": 1.360882417609264, + "grad_norm": 0.06764955818653107, + "learning_rate": 1.824778841309323e-06, + "loss": 0.0008, + "num_input_tokens_seen": 37551264, + "step": 55705 + }, + { + "epoch": 1.361004568441111, + "grad_norm": 0.18158482015132904, + "learning_rate": 1.8247306176272157e-06, + "loss": 0.0011, + "num_input_tokens_seen": 37554400, + "step": 55710 + }, + { + "epoch": 1.3611267192729581, + "grad_norm": 161.26670837402344, + "learning_rate": 1.8246823879474458e-06, + "loss": 0.1135, + "num_input_tokens_seen": 37558048, + "step": 55715 + }, + { + "epoch": 1.3612488701048053, + "grad_norm": 0.35041800141334534, + "learning_rate": 1.8246341522703635e-06, + "loss": 0.0316, + "num_input_tokens_seen": 37561376, + "step": 55720 + }, + { + "epoch": 1.3613710209366525, + "grad_norm": 0.19455307722091675, + "learning_rate": 1.8245859105963197e-06, + "loss": 0.0492, + "num_input_tokens_seen": 37564768, + "step": 55725 + }, + { + "epoch": 1.3614931717684997, + "grad_norm": 18.347190856933594, + "learning_rate": 1.8245376629256657e-06, + "loss": 0.1975, + "num_input_tokens_seen": 37567968, + "step": 55730 + }, + { + "epoch": 1.361615322600347, + "grad_norm": 0.7373723387718201, + "learning_rate": 1.8244894092587517e-06, + "loss": 0.041, + "num_input_tokens_seen": 37571360, + "step": 55735 + }, + { + "epoch": 1.361737473432194, + "grad_norm": 0.1448211967945099, + "learning_rate": 1.8244411495959291e-06, + "loss": 0.0009, + "num_input_tokens_seen": 37574752, + "step": 55740 + }, + { + "epoch": 1.3618596242640413, + "grad_norm": 0.23177365958690643, + "learning_rate": 1.8243928839375488e-06, + "loss": 0.142, + "num_input_tokens_seen": 37578016, + "step": 55745 + }, + { + "epoch": 1.3619817750958885, + "grad_norm": 0.027521274983882904, + "learning_rate": 1.8243446122839615e-06, + "loss": 0.0089, + "num_input_tokens_seen": 37581408, + "step": 55750 + }, + { + "epoch": 1.3621039259277357, + "grad_norm": 0.07896178960800171, + "learning_rate": 1.8242963346355187e-06, + "loss": 0.076, + "num_input_tokens_seen": 37584864, + "step": 55755 + }, + { + "epoch": 1.3622260767595828, + "grad_norm": 0.21244436502456665, + "learning_rate": 1.8242480509925713e-06, + "loss": 0.0738, + "num_input_tokens_seen": 37588192, + "step": 55760 + }, + { + "epoch": 1.3623482275914298, + "grad_norm": 8.31706428527832, + "learning_rate": 1.8241997613554702e-06, + "loss": 0.1124, + "num_input_tokens_seen": 37591584, + "step": 55765 + }, + { + "epoch": 1.362470378423277, + "grad_norm": 165.4110107421875, + "learning_rate": 1.8241514657245669e-06, + "loss": 0.1641, + "num_input_tokens_seen": 37594912, + "step": 55770 + }, + { + "epoch": 1.3625925292551242, + "grad_norm": 0.0612252838909626, + "learning_rate": 1.8241031641002125e-06, + "loss": 0.0662, + "num_input_tokens_seen": 37598240, + "step": 55775 + }, + { + "epoch": 1.3627146800869714, + "grad_norm": 1.860999584197998, + "learning_rate": 1.8240548564827577e-06, + "loss": 0.0031, + "num_input_tokens_seen": 37601312, + "step": 55780 + }, + { + "epoch": 1.3628368309188186, + "grad_norm": 0.02734232135117054, + "learning_rate": 1.8240065428725552e-06, + "loss": 0.0013, + "num_input_tokens_seen": 37604640, + "step": 55785 + }, + { + "epoch": 1.3629589817506658, + "grad_norm": 6.640567779541016, + "learning_rate": 1.823958223269955e-06, + "loss": 0.0404, + "num_input_tokens_seen": 37608032, + "step": 55790 + }, + { + "epoch": 1.363081132582513, + "grad_norm": 0.0057974387891590595, + "learning_rate": 1.823909897675309e-06, + "loss": 0.0571, + "num_input_tokens_seen": 37611296, + "step": 55795 + }, + { + "epoch": 1.36320328341436, + "grad_norm": 1.323617696762085, + "learning_rate": 1.8238615660889685e-06, + "loss": 0.0555, + "num_input_tokens_seen": 37614432, + "step": 55800 + }, + { + "epoch": 1.3633254342462071, + "grad_norm": 0.33827343583106995, + "learning_rate": 1.8238132285112853e-06, + "loss": 0.0917, + "num_input_tokens_seen": 37618080, + "step": 55805 + }, + { + "epoch": 1.3634475850780543, + "grad_norm": 93.0377197265625, + "learning_rate": 1.8237648849426103e-06, + "loss": 0.0064, + "num_input_tokens_seen": 37621152, + "step": 55810 + }, + { + "epoch": 1.3635697359099015, + "grad_norm": 0.7247235178947449, + "learning_rate": 1.823716535383296e-06, + "loss": 0.3132, + "num_input_tokens_seen": 37624352, + "step": 55815 + }, + { + "epoch": 1.3636918867417487, + "grad_norm": 77.18084716796875, + "learning_rate": 1.8236681798336935e-06, + "loss": 0.1327, + "num_input_tokens_seen": 37627680, + "step": 55820 + }, + { + "epoch": 1.3638140375735959, + "grad_norm": 0.5121102929115295, + "learning_rate": 1.8236198182941543e-06, + "loss": 0.0672, + "num_input_tokens_seen": 37631008, + "step": 55825 + }, + { + "epoch": 1.363936188405443, + "grad_norm": 0.45688682794570923, + "learning_rate": 1.8235714507650302e-06, + "loss": 0.0468, + "num_input_tokens_seen": 37634784, + "step": 55830 + }, + { + "epoch": 1.3640583392372903, + "grad_norm": 19.641523361206055, + "learning_rate": 1.823523077246673e-06, + "loss": 0.0416, + "num_input_tokens_seen": 37637856, + "step": 55835 + }, + { + "epoch": 1.3641804900691374, + "grad_norm": 5.420958042144775, + "learning_rate": 1.8234746977394346e-06, + "loss": 0.0454, + "num_input_tokens_seen": 37641312, + "step": 55840 + }, + { + "epoch": 1.3643026409009846, + "grad_norm": 103.85394287109375, + "learning_rate": 1.8234263122436667e-06, + "loss": 0.1019, + "num_input_tokens_seen": 37644960, + "step": 55845 + }, + { + "epoch": 1.3644247917328318, + "grad_norm": 0.18337062001228333, + "learning_rate": 1.8233779207597211e-06, + "loss": 0.0462, + "num_input_tokens_seen": 37648096, + "step": 55850 + }, + { + "epoch": 1.3645469425646788, + "grad_norm": 0.46553394198417664, + "learning_rate": 1.8233295232879497e-06, + "loss": 0.0809, + "num_input_tokens_seen": 37651488, + "step": 55855 + }, + { + "epoch": 1.364669093396526, + "grad_norm": 112.44773864746094, + "learning_rate": 1.8232811198287048e-06, + "loss": 0.1134, + "num_input_tokens_seen": 37654368, + "step": 55860 + }, + { + "epoch": 1.3647912442283732, + "grad_norm": 10.225543022155762, + "learning_rate": 1.823232710382338e-06, + "loss": 0.1791, + "num_input_tokens_seen": 37657696, + "step": 55865 + }, + { + "epoch": 1.3649133950602204, + "grad_norm": 0.09325478971004486, + "learning_rate": 1.8231842949492016e-06, + "loss": 0.1053, + "num_input_tokens_seen": 37660896, + "step": 55870 + }, + { + "epoch": 1.3650355458920675, + "grad_norm": 0.08168121427297592, + "learning_rate": 1.8231358735296475e-06, + "loss": 0.0018, + "num_input_tokens_seen": 37664224, + "step": 55875 + }, + { + "epoch": 1.3651576967239147, + "grad_norm": 9.982057571411133, + "learning_rate": 1.823087446124028e-06, + "loss": 0.0786, + "num_input_tokens_seen": 37667424, + "step": 55880 + }, + { + "epoch": 1.365279847555762, + "grad_norm": 9.831002235412598, + "learning_rate": 1.8230390127326954e-06, + "loss": 0.1927, + "num_input_tokens_seen": 37670560, + "step": 55885 + }, + { + "epoch": 1.365401998387609, + "grad_norm": 9.530682563781738, + "learning_rate": 1.8229905733560011e-06, + "loss": 0.0488, + "num_input_tokens_seen": 37673696, + "step": 55890 + }, + { + "epoch": 1.365524149219456, + "grad_norm": 0.12205583602190018, + "learning_rate": 1.8229421279942985e-06, + "loss": 0.0618, + "num_input_tokens_seen": 37676640, + "step": 55895 + }, + { + "epoch": 1.3656463000513033, + "grad_norm": 165.70962524414062, + "learning_rate": 1.8228936766479394e-06, + "loss": 0.1043, + "num_input_tokens_seen": 37679776, + "step": 55900 + }, + { + "epoch": 1.3657684508831505, + "grad_norm": 0.35768038034439087, + "learning_rate": 1.822845219317276e-06, + "loss": 0.0794, + "num_input_tokens_seen": 37683104, + "step": 55905 + }, + { + "epoch": 1.3658906017149977, + "grad_norm": 8.801888465881348, + "learning_rate": 1.822796756002661e-06, + "loss": 0.1115, + "num_input_tokens_seen": 37686496, + "step": 55910 + }, + { + "epoch": 1.3660127525468448, + "grad_norm": 0.4240803122520447, + "learning_rate": 1.8227482867044466e-06, + "loss": 0.0018, + "num_input_tokens_seen": 37689824, + "step": 55915 + }, + { + "epoch": 1.366134903378692, + "grad_norm": 21.410913467407227, + "learning_rate": 1.8226998114229852e-06, + "loss": 0.1187, + "num_input_tokens_seen": 37693088, + "step": 55920 + }, + { + "epoch": 1.3662570542105392, + "grad_norm": 0.18422946333885193, + "learning_rate": 1.8226513301586298e-06, + "loss": 0.001, + "num_input_tokens_seen": 37696544, + "step": 55925 + }, + { + "epoch": 1.3663792050423864, + "grad_norm": 13.039327621459961, + "learning_rate": 1.8226028429117326e-06, + "loss": 0.1704, + "num_input_tokens_seen": 37700192, + "step": 55930 + }, + { + "epoch": 1.3665013558742336, + "grad_norm": 2.682344436645508, + "learning_rate": 1.8225543496826461e-06, + "loss": 0.107, + "num_input_tokens_seen": 37703456, + "step": 55935 + }, + { + "epoch": 1.3666235067060808, + "grad_norm": 0.18516357243061066, + "learning_rate": 1.8225058504717232e-06, + "loss": 0.0997, + "num_input_tokens_seen": 37706912, + "step": 55940 + }, + { + "epoch": 1.3667456575379278, + "grad_norm": 43.21474075317383, + "learning_rate": 1.8224573452793166e-06, + "loss": 0.1256, + "num_input_tokens_seen": 37710240, + "step": 55945 + }, + { + "epoch": 1.366867808369775, + "grad_norm": 0.15679652988910675, + "learning_rate": 1.822408834105779e-06, + "loss": 0.0811, + "num_input_tokens_seen": 37714144, + "step": 55950 + }, + { + "epoch": 1.3669899592016221, + "grad_norm": 0.360243022441864, + "learning_rate": 1.822360316951463e-06, + "loss": 0.0367, + "num_input_tokens_seen": 37717472, + "step": 55955 + }, + { + "epoch": 1.3671121100334693, + "grad_norm": 0.2765692174434662, + "learning_rate": 1.8223117938167217e-06, + "loss": 0.055, + "num_input_tokens_seen": 37720864, + "step": 55960 + }, + { + "epoch": 1.3672342608653165, + "grad_norm": 0.22037814557552338, + "learning_rate": 1.8222632647019079e-06, + "loss": 0.0378, + "num_input_tokens_seen": 37724064, + "step": 55965 + }, + { + "epoch": 1.3673564116971637, + "grad_norm": 1.3900530338287354, + "learning_rate": 1.8222147296073741e-06, + "loss": 0.0034, + "num_input_tokens_seen": 37727008, + "step": 55970 + }, + { + "epoch": 1.367478562529011, + "grad_norm": 14.512275695800781, + "learning_rate": 1.8221661885334741e-06, + "loss": 0.1093, + "num_input_tokens_seen": 37730080, + "step": 55975 + }, + { + "epoch": 1.3676007133608579, + "grad_norm": 0.17701953649520874, + "learning_rate": 1.8221176414805602e-06, + "loss": 0.0725, + "num_input_tokens_seen": 37733344, + "step": 55980 + }, + { + "epoch": 1.367722864192705, + "grad_norm": 0.06762678176164627, + "learning_rate": 1.8220690884489857e-06, + "loss": 0.0012, + "num_input_tokens_seen": 37736480, + "step": 55985 + }, + { + "epoch": 1.3678450150245522, + "grad_norm": 0.10125814378261566, + "learning_rate": 1.8220205294391037e-06, + "loss": 0.1238, + "num_input_tokens_seen": 37739936, + "step": 55990 + }, + { + "epoch": 1.3679671658563994, + "grad_norm": 13.808995246887207, + "learning_rate": 1.8219719644512672e-06, + "loss": 0.0506, + "num_input_tokens_seen": 37743008, + "step": 55995 + }, + { + "epoch": 1.3680893166882466, + "grad_norm": 19.54206085205078, + "learning_rate": 1.82192339348583e-06, + "loss": 0.0047, + "num_input_tokens_seen": 37746400, + "step": 56000 + }, + { + "epoch": 1.3682114675200938, + "grad_norm": 95.54154205322266, + "learning_rate": 1.8218748165431444e-06, + "loss": 0.0508, + "num_input_tokens_seen": 37750432, + "step": 56005 + }, + { + "epoch": 1.368333618351941, + "grad_norm": 20.368661880493164, + "learning_rate": 1.821826233623564e-06, + "loss": 0.2266, + "num_input_tokens_seen": 37754208, + "step": 56010 + }, + { + "epoch": 1.3684557691837882, + "grad_norm": 26.827817916870117, + "learning_rate": 1.8217776447274424e-06, + "loss": 0.0979, + "num_input_tokens_seen": 37757408, + "step": 56015 + }, + { + "epoch": 1.3685779200156354, + "grad_norm": 14.997940063476562, + "learning_rate": 1.8217290498551326e-06, + "loss": 0.2237, + "num_input_tokens_seen": 37760736, + "step": 56020 + }, + { + "epoch": 1.3687000708474826, + "grad_norm": 9.052905082702637, + "learning_rate": 1.8216804490069882e-06, + "loss": 0.0844, + "num_input_tokens_seen": 37763744, + "step": 56025 + }, + { + "epoch": 1.3688222216793298, + "grad_norm": 0.05417915806174278, + "learning_rate": 1.8216318421833625e-06, + "loss": 0.1457, + "num_input_tokens_seen": 37766880, + "step": 56030 + }, + { + "epoch": 1.3689443725111767, + "grad_norm": 18.598569869995117, + "learning_rate": 1.821583229384609e-06, + "loss": 0.13, + "num_input_tokens_seen": 37770016, + "step": 56035 + }, + { + "epoch": 1.369066523343024, + "grad_norm": 17.506576538085938, + "learning_rate": 1.8215346106110814e-06, + "loss": 0.2275, + "num_input_tokens_seen": 37773024, + "step": 56040 + }, + { + "epoch": 1.3691886741748711, + "grad_norm": 33.39712905883789, + "learning_rate": 1.8214859858631333e-06, + "loss": 0.0569, + "num_input_tokens_seen": 37775968, + "step": 56045 + }, + { + "epoch": 1.3693108250067183, + "grad_norm": 19.74469757080078, + "learning_rate": 1.8214373551411177e-06, + "loss": 0.1, + "num_input_tokens_seen": 37779296, + "step": 56050 + }, + { + "epoch": 1.3694329758385655, + "grad_norm": 49.746131896972656, + "learning_rate": 1.8213887184453892e-06, + "loss": 0.19, + "num_input_tokens_seen": 37782944, + "step": 56055 + }, + { + "epoch": 1.3695551266704127, + "grad_norm": 37.75736999511719, + "learning_rate": 1.8213400757763009e-06, + "loss": 0.153, + "num_input_tokens_seen": 37786912, + "step": 56060 + }, + { + "epoch": 1.3696772775022599, + "grad_norm": 0.30505508184432983, + "learning_rate": 1.8212914271342064e-06, + "loss": 0.0899, + "num_input_tokens_seen": 37790368, + "step": 56065 + }, + { + "epoch": 1.3697994283341068, + "grad_norm": 0.30792438983917236, + "learning_rate": 1.8212427725194599e-06, + "loss": 0.0811, + "num_input_tokens_seen": 37793696, + "step": 56070 + }, + { + "epoch": 1.369921579165954, + "grad_norm": 20.89712905883789, + "learning_rate": 1.821194111932415e-06, + "loss": 0.0796, + "num_input_tokens_seen": 37797088, + "step": 56075 + }, + { + "epoch": 1.3700437299978012, + "grad_norm": 61.594722747802734, + "learning_rate": 1.821145445373426e-06, + "loss": 0.0422, + "num_input_tokens_seen": 37800480, + "step": 56080 + }, + { + "epoch": 1.3701658808296484, + "grad_norm": 8.598461151123047, + "learning_rate": 1.8210967728428458e-06, + "loss": 0.1484, + "num_input_tokens_seen": 37803488, + "step": 56085 + }, + { + "epoch": 1.3702880316614956, + "grad_norm": 0.8642435073852539, + "learning_rate": 1.8210480943410296e-06, + "loss": 0.0048, + "num_input_tokens_seen": 37806496, + "step": 56090 + }, + { + "epoch": 1.3704101824933428, + "grad_norm": 9.462559700012207, + "learning_rate": 1.8209994098683306e-06, + "loss": 0.1011, + "num_input_tokens_seen": 37809888, + "step": 56095 + }, + { + "epoch": 1.37053233332519, + "grad_norm": 10.7000732421875, + "learning_rate": 1.8209507194251033e-06, + "loss": 0.1184, + "num_input_tokens_seen": 37813344, + "step": 56100 + }, + { + "epoch": 1.3706544841570372, + "grad_norm": 25.909149169921875, + "learning_rate": 1.8209020230117012e-06, + "loss": 0.1454, + "num_input_tokens_seen": 37816480, + "step": 56105 + }, + { + "epoch": 1.3707766349888844, + "grad_norm": 0.681384265422821, + "learning_rate": 1.8208533206284788e-06, + "loss": 0.0406, + "num_input_tokens_seen": 37820192, + "step": 56110 + }, + { + "epoch": 1.3708987858207315, + "grad_norm": 0.4287295937538147, + "learning_rate": 1.8208046122757903e-06, + "loss": 0.0023, + "num_input_tokens_seen": 37823904, + "step": 56115 + }, + { + "epoch": 1.3710209366525787, + "grad_norm": 0.18749327957630157, + "learning_rate": 1.8207558979539903e-06, + "loss": 0.0481, + "num_input_tokens_seen": 37827488, + "step": 56120 + }, + { + "epoch": 1.3711430874844257, + "grad_norm": 32.42850875854492, + "learning_rate": 1.820707177663432e-06, + "loss": 0.1668, + "num_input_tokens_seen": 37830816, + "step": 56125 + }, + { + "epoch": 1.371265238316273, + "grad_norm": 0.08906259387731552, + "learning_rate": 1.8206584514044709e-06, + "loss": 0.1436, + "num_input_tokens_seen": 37834016, + "step": 56130 + }, + { + "epoch": 1.37138738914812, + "grad_norm": 51.5129508972168, + "learning_rate": 1.8206097191774608e-06, + "loss": 0.0594, + "num_input_tokens_seen": 37837024, + "step": 56135 + }, + { + "epoch": 1.3715095399799673, + "grad_norm": 0.3601904511451721, + "learning_rate": 1.820560980982756e-06, + "loss": 0.0926, + "num_input_tokens_seen": 37840288, + "step": 56140 + }, + { + "epoch": 1.3716316908118145, + "grad_norm": 5.214352607727051, + "learning_rate": 1.8205122368207107e-06, + "loss": 0.0034, + "num_input_tokens_seen": 37843680, + "step": 56145 + }, + { + "epoch": 1.3717538416436617, + "grad_norm": 0.2316035032272339, + "learning_rate": 1.82046348669168e-06, + "loss": 0.0444, + "num_input_tokens_seen": 37846752, + "step": 56150 + }, + { + "epoch": 1.3718759924755086, + "grad_norm": 0.059439271688461304, + "learning_rate": 1.8204147305960182e-06, + "loss": 0.0316, + "num_input_tokens_seen": 37849824, + "step": 56155 + }, + { + "epoch": 1.3719981433073558, + "grad_norm": 0.20835110545158386, + "learning_rate": 1.8203659685340797e-06, + "loss": 0.0528, + "num_input_tokens_seen": 37853088, + "step": 56160 + }, + { + "epoch": 1.372120294139203, + "grad_norm": 0.10169114917516708, + "learning_rate": 1.8203172005062194e-06, + "loss": 0.0712, + "num_input_tokens_seen": 37856416, + "step": 56165 + }, + { + "epoch": 1.3722424449710502, + "grad_norm": 34.08015441894531, + "learning_rate": 1.8202684265127916e-06, + "loss": 0.068, + "num_input_tokens_seen": 37859488, + "step": 56170 + }, + { + "epoch": 1.3723645958028974, + "grad_norm": 29.192033767700195, + "learning_rate": 1.8202196465541513e-06, + "loss": 0.0865, + "num_input_tokens_seen": 37862624, + "step": 56175 + }, + { + "epoch": 1.3724867466347446, + "grad_norm": 0.05454200878739357, + "learning_rate": 1.820170860630653e-06, + "loss": 0.1067, + "num_input_tokens_seen": 37865760, + "step": 56180 + }, + { + "epoch": 1.3726088974665918, + "grad_norm": 22.10439682006836, + "learning_rate": 1.8201220687426515e-06, + "loss": 0.1904, + "num_input_tokens_seen": 37869024, + "step": 56185 + }, + { + "epoch": 1.372731048298439, + "grad_norm": 56.317832946777344, + "learning_rate": 1.8200732708905018e-06, + "loss": 0.0204, + "num_input_tokens_seen": 37872672, + "step": 56190 + }, + { + "epoch": 1.3728531991302861, + "grad_norm": 0.6248363852500916, + "learning_rate": 1.820024467074559e-06, + "loss": 0.0023, + "num_input_tokens_seen": 37875936, + "step": 56195 + }, + { + "epoch": 1.3729753499621333, + "grad_norm": 0.26846882700920105, + "learning_rate": 1.8199756572951775e-06, + "loss": 0.0177, + "num_input_tokens_seen": 37879008, + "step": 56200 + }, + { + "epoch": 1.3730975007939805, + "grad_norm": 0.04101533815264702, + "learning_rate": 1.8199268415527125e-06, + "loss": 0.029, + "num_input_tokens_seen": 37881952, + "step": 56205 + }, + { + "epoch": 1.3732196516258277, + "grad_norm": 1.919983983039856, + "learning_rate": 1.8198780198475189e-06, + "loss": 0.0261, + "num_input_tokens_seen": 37885088, + "step": 56210 + }, + { + "epoch": 1.3733418024576747, + "grad_norm": 0.13918881118297577, + "learning_rate": 1.8198291921799519e-06, + "loss": 0.0503, + "num_input_tokens_seen": 37888096, + "step": 56215 + }, + { + "epoch": 1.3734639532895219, + "grad_norm": 0.4293416738510132, + "learning_rate": 1.8197803585503665e-06, + "loss": 0.0466, + "num_input_tokens_seen": 37891616, + "step": 56220 + }, + { + "epoch": 1.373586104121369, + "grad_norm": 0.013383172452449799, + "learning_rate": 1.8197315189591175e-06, + "loss": 0.1277, + "num_input_tokens_seen": 37895136, + "step": 56225 + }, + { + "epoch": 1.3737082549532162, + "grad_norm": 90.03067779541016, + "learning_rate": 1.8196826734065608e-06, + "loss": 0.1435, + "num_input_tokens_seen": 37898336, + "step": 56230 + }, + { + "epoch": 1.3738304057850634, + "grad_norm": 29.73512077331543, + "learning_rate": 1.8196338218930513e-06, + "loss": 0.1152, + "num_input_tokens_seen": 37901856, + "step": 56235 + }, + { + "epoch": 1.3739525566169106, + "grad_norm": 0.12980686128139496, + "learning_rate": 1.819584964418944e-06, + "loss": 0.0549, + "num_input_tokens_seen": 37904928, + "step": 56240 + }, + { + "epoch": 1.3740747074487576, + "grad_norm": 22.57590675354004, + "learning_rate": 1.8195361009845945e-06, + "loss": 0.1053, + "num_input_tokens_seen": 37908448, + "step": 56245 + }, + { + "epoch": 1.3741968582806048, + "grad_norm": 0.1411312073469162, + "learning_rate": 1.819487231590358e-06, + "loss": 0.0571, + "num_input_tokens_seen": 37912352, + "step": 56250 + }, + { + "epoch": 1.374319009112452, + "grad_norm": 36.76695251464844, + "learning_rate": 1.8194383562365898e-06, + "loss": 0.1285, + "num_input_tokens_seen": 37916128, + "step": 56255 + }, + { + "epoch": 1.3744411599442992, + "grad_norm": 32.090065002441406, + "learning_rate": 1.8193894749236458e-06, + "loss": 0.0941, + "num_input_tokens_seen": 37919904, + "step": 56260 + }, + { + "epoch": 1.3745633107761464, + "grad_norm": 0.1330694556236267, + "learning_rate": 1.8193405876518808e-06, + "loss": 0.0929, + "num_input_tokens_seen": 37923232, + "step": 56265 + }, + { + "epoch": 1.3746854616079935, + "grad_norm": 1.796851396560669, + "learning_rate": 1.8192916944216507e-06, + "loss": 0.085, + "num_input_tokens_seen": 37926688, + "step": 56270 + }, + { + "epoch": 1.3748076124398407, + "grad_norm": 0.1381426304578781, + "learning_rate": 1.8192427952333112e-06, + "loss": 0.0989, + "num_input_tokens_seen": 37929888, + "step": 56275 + }, + { + "epoch": 1.374929763271688, + "grad_norm": 0.1486775428056717, + "learning_rate": 1.8191938900872177e-06, + "loss": 0.0382, + "num_input_tokens_seen": 37932896, + "step": 56280 + }, + { + "epoch": 1.3750519141035351, + "grad_norm": 0.06270882487297058, + "learning_rate": 1.8191449789837258e-06, + "loss": 0.002, + "num_input_tokens_seen": 37936736, + "step": 56285 + }, + { + "epoch": 1.3751740649353823, + "grad_norm": 9.44651985168457, + "learning_rate": 1.8190960619231915e-06, + "loss": 0.0336, + "num_input_tokens_seen": 37940576, + "step": 56290 + }, + { + "epoch": 1.3752962157672295, + "grad_norm": 65.55650329589844, + "learning_rate": 1.81904713890597e-06, + "loss": 0.0731, + "num_input_tokens_seen": 37943840, + "step": 56295 + }, + { + "epoch": 1.3754183665990765, + "grad_norm": 2.183594226837158, + "learning_rate": 1.8189982099324177e-06, + "loss": 0.081, + "num_input_tokens_seen": 37947040, + "step": 56300 + }, + { + "epoch": 1.3755405174309236, + "grad_norm": 47.141258239746094, + "learning_rate": 1.81894927500289e-06, + "loss": 0.1332, + "num_input_tokens_seen": 37950368, + "step": 56305 + }, + { + "epoch": 1.3756626682627708, + "grad_norm": 0.08392799645662308, + "learning_rate": 1.818900334117743e-06, + "loss": 0.1287, + "num_input_tokens_seen": 37953696, + "step": 56310 + }, + { + "epoch": 1.375784819094618, + "grad_norm": 0.17838051915168762, + "learning_rate": 1.8188513872773326e-06, + "loss": 0.0293, + "num_input_tokens_seen": 37956768, + "step": 56315 + }, + { + "epoch": 1.3759069699264652, + "grad_norm": 34.301727294921875, + "learning_rate": 1.8188024344820145e-06, + "loss": 0.2067, + "num_input_tokens_seen": 37960032, + "step": 56320 + }, + { + "epoch": 1.3760291207583124, + "grad_norm": 13.057069778442383, + "learning_rate": 1.8187534757321447e-06, + "loss": 0.0548, + "num_input_tokens_seen": 37964640, + "step": 56325 + }, + { + "epoch": 1.3761512715901596, + "grad_norm": 0.19041498005390167, + "learning_rate": 1.8187045110280796e-06, + "loss": 0.0159, + "num_input_tokens_seen": 37968288, + "step": 56330 + }, + { + "epoch": 1.3762734224220066, + "grad_norm": 66.84744262695312, + "learning_rate": 1.8186555403701753e-06, + "loss": 0.1024, + "num_input_tokens_seen": 37971744, + "step": 56335 + }, + { + "epoch": 1.3763955732538538, + "grad_norm": 12.135207176208496, + "learning_rate": 1.8186065637587876e-06, + "loss": 0.0608, + "num_input_tokens_seen": 37975456, + "step": 56340 + }, + { + "epoch": 1.376517724085701, + "grad_norm": 21.856000900268555, + "learning_rate": 1.8185575811942723e-06, + "loss": 0.162, + "num_input_tokens_seen": 37979168, + "step": 56345 + }, + { + "epoch": 1.3766398749175481, + "grad_norm": 19.666250228881836, + "learning_rate": 1.8185085926769867e-06, + "loss": 0.0232, + "num_input_tokens_seen": 37982560, + "step": 56350 + }, + { + "epoch": 1.3767620257493953, + "grad_norm": 0.049027394503355026, + "learning_rate": 1.8184595982072863e-06, + "loss": 0.2073, + "num_input_tokens_seen": 37985888, + "step": 56355 + }, + { + "epoch": 1.3768841765812425, + "grad_norm": 0.3508736193180084, + "learning_rate": 1.8184105977855276e-06, + "loss": 0.1076, + "num_input_tokens_seen": 37988960, + "step": 56360 + }, + { + "epoch": 1.3770063274130897, + "grad_norm": 0.4071526825428009, + "learning_rate": 1.8183615914120666e-06, + "loss": 0.0922, + "num_input_tokens_seen": 37992288, + "step": 56365 + }, + { + "epoch": 1.377128478244937, + "grad_norm": 1.8135322332382202, + "learning_rate": 1.8183125790872605e-06, + "loss": 0.0049, + "num_input_tokens_seen": 37995488, + "step": 56370 + }, + { + "epoch": 1.377250629076784, + "grad_norm": 0.19190309941768646, + "learning_rate": 1.8182635608114647e-06, + "loss": 0.0121, + "num_input_tokens_seen": 37998560, + "step": 56375 + }, + { + "epoch": 1.3773727799086313, + "grad_norm": 12.494829177856445, + "learning_rate": 1.8182145365850366e-06, + "loss": 0.1404, + "num_input_tokens_seen": 38001952, + "step": 56380 + }, + { + "epoch": 1.3774949307404785, + "grad_norm": 15.6360445022583, + "learning_rate": 1.8181655064083322e-06, + "loss": 0.0923, + "num_input_tokens_seen": 38005920, + "step": 56385 + }, + { + "epoch": 1.3776170815723254, + "grad_norm": 3.3667964935302734, + "learning_rate": 1.818116470281708e-06, + "loss": 0.0437, + "num_input_tokens_seen": 38009120, + "step": 56390 + }, + { + "epoch": 1.3777392324041726, + "grad_norm": 114.14451599121094, + "learning_rate": 1.818067428205521e-06, + "loss": 0.063, + "num_input_tokens_seen": 38012384, + "step": 56395 + }, + { + "epoch": 1.3778613832360198, + "grad_norm": 8.4616117477417, + "learning_rate": 1.8180183801801277e-06, + "loss": 0.0565, + "num_input_tokens_seen": 38015776, + "step": 56400 + }, + { + "epoch": 1.377983534067867, + "grad_norm": 0.40798187255859375, + "learning_rate": 1.8179693262058844e-06, + "loss": 0.0022, + "num_input_tokens_seen": 38019808, + "step": 56405 + }, + { + "epoch": 1.3781056848997142, + "grad_norm": 0.16288278996944427, + "learning_rate": 1.8179202662831483e-06, + "loss": 0.112, + "num_input_tokens_seen": 38022944, + "step": 56410 + }, + { + "epoch": 1.3782278357315614, + "grad_norm": 0.03791259601712227, + "learning_rate": 1.8178712004122763e-06, + "loss": 0.0337, + "num_input_tokens_seen": 38026336, + "step": 56415 + }, + { + "epoch": 1.3783499865634086, + "grad_norm": 53.5873908996582, + "learning_rate": 1.8178221285936246e-06, + "loss": 0.081, + "num_input_tokens_seen": 38030048, + "step": 56420 + }, + { + "epoch": 1.3784721373952555, + "grad_norm": 0.43458986282348633, + "learning_rate": 1.8177730508275504e-06, + "loss": 0.0586, + "num_input_tokens_seen": 38033632, + "step": 56425 + }, + { + "epoch": 1.3785942882271027, + "grad_norm": 0.22739242017269135, + "learning_rate": 1.8177239671144106e-06, + "loss": 0.0415, + "num_input_tokens_seen": 38036896, + "step": 56430 + }, + { + "epoch": 1.37871643905895, + "grad_norm": 0.29765599966049194, + "learning_rate": 1.8176748774545626e-06, + "loss": 0.1464, + "num_input_tokens_seen": 38040416, + "step": 56435 + }, + { + "epoch": 1.378838589890797, + "grad_norm": 0.030600015074014664, + "learning_rate": 1.8176257818483624e-06, + "loss": 0.0029, + "num_input_tokens_seen": 38044000, + "step": 56440 + }, + { + "epoch": 1.3789607407226443, + "grad_norm": 1.0469118356704712, + "learning_rate": 1.8175766802961681e-06, + "loss": 0.0743, + "num_input_tokens_seen": 38047392, + "step": 56445 + }, + { + "epoch": 1.3790828915544915, + "grad_norm": 50.24403381347656, + "learning_rate": 1.817527572798336e-06, + "loss": 0.0954, + "num_input_tokens_seen": 38050592, + "step": 56450 + }, + { + "epoch": 1.3792050423863387, + "grad_norm": 42.963157653808594, + "learning_rate": 1.8174784593552235e-06, + "loss": 0.0879, + "num_input_tokens_seen": 38053536, + "step": 56455 + }, + { + "epoch": 1.3793271932181859, + "grad_norm": 0.0433623380959034, + "learning_rate": 1.817429339967188e-06, + "loss": 0.1678, + "num_input_tokens_seen": 38056864, + "step": 56460 + }, + { + "epoch": 1.379449344050033, + "grad_norm": 16.934593200683594, + "learning_rate": 1.817380214634586e-06, + "loss": 0.1658, + "num_input_tokens_seen": 38060256, + "step": 56465 + }, + { + "epoch": 1.3795714948818802, + "grad_norm": 63.58816146850586, + "learning_rate": 1.8173310833577754e-06, + "loss": 0.1639, + "num_input_tokens_seen": 38063392, + "step": 56470 + }, + { + "epoch": 1.3796936457137274, + "grad_norm": 1.4449207782745361, + "learning_rate": 1.8172819461371138e-06, + "loss": 0.0794, + "num_input_tokens_seen": 38066912, + "step": 56475 + }, + { + "epoch": 1.3798157965455744, + "grad_norm": 225.5787811279297, + "learning_rate": 1.8172328029729577e-06, + "loss": 0.1965, + "num_input_tokens_seen": 38070240, + "step": 56480 + }, + { + "epoch": 1.3799379473774216, + "grad_norm": 0.6155655980110168, + "learning_rate": 1.8171836538656645e-06, + "loss": 0.1164, + "num_input_tokens_seen": 38073760, + "step": 56485 + }, + { + "epoch": 1.3800600982092688, + "grad_norm": 118.65497589111328, + "learning_rate": 1.8171344988155925e-06, + "loss": 0.0277, + "num_input_tokens_seen": 38076832, + "step": 56490 + }, + { + "epoch": 1.380182249041116, + "grad_norm": 24.945920944213867, + "learning_rate": 1.8170853378230985e-06, + "loss": 0.1765, + "num_input_tokens_seen": 38079840, + "step": 56495 + }, + { + "epoch": 1.3803043998729632, + "grad_norm": 1.0863746404647827, + "learning_rate": 1.8170361708885402e-06, + "loss": 0.143, + "num_input_tokens_seen": 38082848, + "step": 56500 + }, + { + "epoch": 1.3804265507048104, + "grad_norm": 17.139713287353516, + "learning_rate": 1.816986998012275e-06, + "loss": 0.098, + "num_input_tokens_seen": 38086304, + "step": 56505 + }, + { + "epoch": 1.3805487015366575, + "grad_norm": 1.9433549642562866, + "learning_rate": 1.8169378191946607e-06, + "loss": 0.0048, + "num_input_tokens_seen": 38090016, + "step": 56510 + }, + { + "epoch": 1.3806708523685045, + "grad_norm": 0.7481113076210022, + "learning_rate": 1.8168886344360549e-06, + "loss": 0.0051, + "num_input_tokens_seen": 38093152, + "step": 56515 + }, + { + "epoch": 1.3807930032003517, + "grad_norm": 7.925774097442627, + "learning_rate": 1.816839443736815e-06, + "loss": 0.1642, + "num_input_tokens_seen": 38096864, + "step": 56520 + }, + { + "epoch": 1.3809151540321989, + "grad_norm": 0.4268783926963806, + "learning_rate": 1.816790247097299e-06, + "loss": 0.0691, + "num_input_tokens_seen": 38099936, + "step": 56525 + }, + { + "epoch": 1.381037304864046, + "grad_norm": 0.5027146935462952, + "learning_rate": 1.8167410445178649e-06, + "loss": 0.1108, + "num_input_tokens_seen": 38103264, + "step": 56530 + }, + { + "epoch": 1.3811594556958933, + "grad_norm": 0.23187850415706635, + "learning_rate": 1.8166918359988702e-06, + "loss": 0.1469, + "num_input_tokens_seen": 38106592, + "step": 56535 + }, + { + "epoch": 1.3812816065277405, + "grad_norm": 1.227955937385559, + "learning_rate": 1.8166426215406726e-06, + "loss": 0.0857, + "num_input_tokens_seen": 38109600, + "step": 56540 + }, + { + "epoch": 1.3814037573595876, + "grad_norm": 7.709028244018555, + "learning_rate": 1.8165934011436303e-06, + "loss": 0.1107, + "num_input_tokens_seen": 38112672, + "step": 56545 + }, + { + "epoch": 1.3815259081914348, + "grad_norm": 0.14321696758270264, + "learning_rate": 1.8165441748081012e-06, + "loss": 0.0608, + "num_input_tokens_seen": 38115872, + "step": 56550 + }, + { + "epoch": 1.381648059023282, + "grad_norm": 0.7300055623054504, + "learning_rate": 1.8164949425344428e-06, + "loss": 0.0424, + "num_input_tokens_seen": 38120480, + "step": 56555 + }, + { + "epoch": 1.3817702098551292, + "grad_norm": 3.9435174465179443, + "learning_rate": 1.8164457043230144e-06, + "loss": 0.025, + "num_input_tokens_seen": 38123616, + "step": 56560 + }, + { + "epoch": 1.3818923606869764, + "grad_norm": 0.3503316342830658, + "learning_rate": 1.8163964601741726e-06, + "loss": 0.1618, + "num_input_tokens_seen": 38127712, + "step": 56565 + }, + { + "epoch": 1.3820145115188234, + "grad_norm": 0.2841152846813202, + "learning_rate": 1.8163472100882763e-06, + "loss": 0.063, + "num_input_tokens_seen": 38131488, + "step": 56570 + }, + { + "epoch": 1.3821366623506706, + "grad_norm": 0.612076461315155, + "learning_rate": 1.8162979540656837e-06, + "loss": 0.1056, + "num_input_tokens_seen": 38134496, + "step": 56575 + }, + { + "epoch": 1.3822588131825178, + "grad_norm": 0.1776486039161682, + "learning_rate": 1.8162486921067525e-06, + "loss": 0.1367, + "num_input_tokens_seen": 38137888, + "step": 56580 + }, + { + "epoch": 1.382380964014365, + "grad_norm": 0.20384326577186584, + "learning_rate": 1.8161994242118416e-06, + "loss": 0.0409, + "num_input_tokens_seen": 38141472, + "step": 56585 + }, + { + "epoch": 1.3825031148462121, + "grad_norm": 0.05330972000956535, + "learning_rate": 1.8161501503813085e-06, + "loss": 0.1291, + "num_input_tokens_seen": 38144992, + "step": 56590 + }, + { + "epoch": 1.3826252656780593, + "grad_norm": 1.6077696084976196, + "learning_rate": 1.8161008706155126e-06, + "loss": 0.0197, + "num_input_tokens_seen": 38148256, + "step": 56595 + }, + { + "epoch": 1.3827474165099065, + "grad_norm": 0.05597035586833954, + "learning_rate": 1.8160515849148112e-06, + "loss": 0.1117, + "num_input_tokens_seen": 38151392, + "step": 56600 + }, + { + "epoch": 1.3828695673417535, + "grad_norm": 2.726749897003174, + "learning_rate": 1.8160022932795632e-06, + "loss": 0.0846, + "num_input_tokens_seen": 38154528, + "step": 56605 + }, + { + "epoch": 1.3829917181736007, + "grad_norm": 110.53439331054688, + "learning_rate": 1.8159529957101273e-06, + "loss": 0.0342, + "num_input_tokens_seen": 38157920, + "step": 56610 + }, + { + "epoch": 1.3831138690054479, + "grad_norm": 0.10677741467952728, + "learning_rate": 1.8159036922068616e-06, + "loss": 0.0348, + "num_input_tokens_seen": 38161568, + "step": 56615 + }, + { + "epoch": 1.383236019837295, + "grad_norm": 43.46922302246094, + "learning_rate": 1.8158543827701249e-06, + "loss": 0.1753, + "num_input_tokens_seen": 38165024, + "step": 56620 + }, + { + "epoch": 1.3833581706691422, + "grad_norm": 0.23257319629192352, + "learning_rate": 1.8158050674002757e-06, + "loss": 0.0462, + "num_input_tokens_seen": 38168288, + "step": 56625 + }, + { + "epoch": 1.3834803215009894, + "grad_norm": 39.41869354248047, + "learning_rate": 1.8157557460976725e-06, + "loss": 0.2128, + "num_input_tokens_seen": 38171488, + "step": 56630 + }, + { + "epoch": 1.3836024723328366, + "grad_norm": 0.14419890940189362, + "learning_rate": 1.815706418862674e-06, + "loss": 0.0025, + "num_input_tokens_seen": 38174624, + "step": 56635 + }, + { + "epoch": 1.3837246231646838, + "grad_norm": 70.12617492675781, + "learning_rate": 1.8156570856956393e-06, + "loss": 0.0736, + "num_input_tokens_seen": 38177888, + "step": 56640 + }, + { + "epoch": 1.383846773996531, + "grad_norm": 0.39914312958717346, + "learning_rate": 1.8156077465969267e-06, + "loss": 0.0017, + "num_input_tokens_seen": 38180960, + "step": 56645 + }, + { + "epoch": 1.3839689248283782, + "grad_norm": 12.098282814025879, + "learning_rate": 1.8155584015668954e-06, + "loss": 0.0432, + "num_input_tokens_seen": 38184224, + "step": 56650 + }, + { + "epoch": 1.3840910756602254, + "grad_norm": 0.2898300290107727, + "learning_rate": 1.8155090506059039e-06, + "loss": 0.0316, + "num_input_tokens_seen": 38187552, + "step": 56655 + }, + { + "epoch": 1.3842132264920723, + "grad_norm": 0.002333325333893299, + "learning_rate": 1.815459693714311e-06, + "loss": 0.1797, + "num_input_tokens_seen": 38190752, + "step": 56660 + }, + { + "epoch": 1.3843353773239195, + "grad_norm": 15.202603340148926, + "learning_rate": 1.8154103308924763e-06, + "loss": 0.2378, + "num_input_tokens_seen": 38194720, + "step": 56665 + }, + { + "epoch": 1.3844575281557667, + "grad_norm": 93.10588836669922, + "learning_rate": 1.815360962140758e-06, + "loss": 0.0175, + "num_input_tokens_seen": 38198624, + "step": 56670 + }, + { + "epoch": 1.384579678987614, + "grad_norm": 1.7387146949768066, + "learning_rate": 1.8153115874595158e-06, + "loss": 0.1147, + "num_input_tokens_seen": 38201760, + "step": 56675 + }, + { + "epoch": 1.384701829819461, + "grad_norm": 7.717111587524414, + "learning_rate": 1.815262206849108e-06, + "loss": 0.0694, + "num_input_tokens_seen": 38205088, + "step": 56680 + }, + { + "epoch": 1.3848239806513083, + "grad_norm": 0.2825879752635956, + "learning_rate": 1.8152128203098943e-06, + "loss": 0.0993, + "num_input_tokens_seen": 38208736, + "step": 56685 + }, + { + "epoch": 1.3849461314831553, + "grad_norm": 109.01095581054688, + "learning_rate": 1.815163427842234e-06, + "loss": 0.0757, + "num_input_tokens_seen": 38212128, + "step": 56690 + }, + { + "epoch": 1.3850682823150025, + "grad_norm": 22.071168899536133, + "learning_rate": 1.8151140294464858e-06, + "loss": 0.114, + "num_input_tokens_seen": 38215392, + "step": 56695 + }, + { + "epoch": 1.3851904331468496, + "grad_norm": 0.2266807109117508, + "learning_rate": 1.8150646251230092e-06, + "loss": 0.0642, + "num_input_tokens_seen": 38218464, + "step": 56700 + }, + { + "epoch": 1.3853125839786968, + "grad_norm": 28.931777954101562, + "learning_rate": 1.8150152148721637e-06, + "loss": 0.1127, + "num_input_tokens_seen": 38221728, + "step": 56705 + }, + { + "epoch": 1.385434734810544, + "grad_norm": 3.359818458557129, + "learning_rate": 1.8149657986943078e-06, + "loss": 0.0424, + "num_input_tokens_seen": 38225120, + "step": 56710 + }, + { + "epoch": 1.3855568856423912, + "grad_norm": 38.96266174316406, + "learning_rate": 1.8149163765898016e-06, + "loss": 0.0771, + "num_input_tokens_seen": 38228256, + "step": 56715 + }, + { + "epoch": 1.3856790364742384, + "grad_norm": 0.7429246306419373, + "learning_rate": 1.8148669485590044e-06, + "loss": 0.0552, + "num_input_tokens_seen": 38232224, + "step": 56720 + }, + { + "epoch": 1.3858011873060856, + "grad_norm": 0.12821519374847412, + "learning_rate": 1.8148175146022758e-06, + "loss": 0.0013, + "num_input_tokens_seen": 38235424, + "step": 56725 + }, + { + "epoch": 1.3859233381379328, + "grad_norm": 0.5723388195037842, + "learning_rate": 1.8147680747199748e-06, + "loss": 0.0982, + "num_input_tokens_seen": 38238816, + "step": 56730 + }, + { + "epoch": 1.38604548896978, + "grad_norm": 0.63655686378479, + "learning_rate": 1.8147186289124611e-06, + "loss": 0.0017, + "num_input_tokens_seen": 38243168, + "step": 56735 + }, + { + "epoch": 1.3861676398016272, + "grad_norm": 0.07612661272287369, + "learning_rate": 1.8146691771800945e-06, + "loss": 0.045, + "num_input_tokens_seen": 38246368, + "step": 56740 + }, + { + "epoch": 1.3862897906334744, + "grad_norm": 0.20316410064697266, + "learning_rate": 1.8146197195232347e-06, + "loss": 0.0683, + "num_input_tokens_seen": 38249568, + "step": 56745 + }, + { + "epoch": 1.3864119414653213, + "grad_norm": 9.185523986816406, + "learning_rate": 1.814570255942241e-06, + "loss": 0.0041, + "num_input_tokens_seen": 38252960, + "step": 56750 + }, + { + "epoch": 1.3865340922971685, + "grad_norm": 0.09728414565324783, + "learning_rate": 1.8145207864374734e-06, + "loss": 0.0906, + "num_input_tokens_seen": 38256480, + "step": 56755 + }, + { + "epoch": 1.3866562431290157, + "grad_norm": 0.043544042855501175, + "learning_rate": 1.8144713110092915e-06, + "loss": 0.0711, + "num_input_tokens_seen": 38260064, + "step": 56760 + }, + { + "epoch": 1.3867783939608629, + "grad_norm": 18.682769775390625, + "learning_rate": 1.8144218296580553e-06, + "loss": 0.0695, + "num_input_tokens_seen": 38263328, + "step": 56765 + }, + { + "epoch": 1.38690054479271, + "grad_norm": 0.15236736834049225, + "learning_rate": 1.8143723423841241e-06, + "loss": 0.023, + "num_input_tokens_seen": 38267040, + "step": 56770 + }, + { + "epoch": 1.3870226956245573, + "grad_norm": 0.08610755205154419, + "learning_rate": 1.814322849187859e-06, + "loss": 0.0352, + "num_input_tokens_seen": 38270368, + "step": 56775 + }, + { + "epoch": 1.3871448464564042, + "grad_norm": 10.737128257751465, + "learning_rate": 1.814273350069618e-06, + "loss": 0.0862, + "num_input_tokens_seen": 38273632, + "step": 56780 + }, + { + "epoch": 1.3872669972882514, + "grad_norm": 0.05491538718342781, + "learning_rate": 1.8142238450297632e-06, + "loss": 0.1411, + "num_input_tokens_seen": 38276768, + "step": 56785 + }, + { + "epoch": 1.3873891481200986, + "grad_norm": 0.25832599401474, + "learning_rate": 1.814174334068653e-06, + "loss": 0.0709, + "num_input_tokens_seen": 38279968, + "step": 56790 + }, + { + "epoch": 1.3875112989519458, + "grad_norm": 24.963336944580078, + "learning_rate": 1.8141248171866482e-06, + "loss": 0.0428, + "num_input_tokens_seen": 38283424, + "step": 56795 + }, + { + "epoch": 1.387633449783793, + "grad_norm": 23.700716018676758, + "learning_rate": 1.814075294384109e-06, + "loss": 0.181, + "num_input_tokens_seen": 38286880, + "step": 56800 + }, + { + "epoch": 1.3877556006156402, + "grad_norm": 0.20313750207424164, + "learning_rate": 1.8140257656613952e-06, + "loss": 0.1029, + "num_input_tokens_seen": 38290208, + "step": 56805 + }, + { + "epoch": 1.3878777514474874, + "grad_norm": 0.13214872777462006, + "learning_rate": 1.8139762310188666e-06, + "loss": 0.0015, + "num_input_tokens_seen": 38293600, + "step": 56810 + }, + { + "epoch": 1.3879999022793346, + "grad_norm": 104.4972915649414, + "learning_rate": 1.8139266904568844e-06, + "loss": 0.0907, + "num_input_tokens_seen": 38297184, + "step": 56815 + }, + { + "epoch": 1.3881220531111818, + "grad_norm": 3.2046422958374023, + "learning_rate": 1.8138771439758083e-06, + "loss": 0.0459, + "num_input_tokens_seen": 38300448, + "step": 56820 + }, + { + "epoch": 1.388244203943029, + "grad_norm": 0.06722808629274368, + "learning_rate": 1.8138275915759986e-06, + "loss": 0.0013, + "num_input_tokens_seen": 38303712, + "step": 56825 + }, + { + "epoch": 1.3883663547748761, + "grad_norm": 54.982337951660156, + "learning_rate": 1.8137780332578158e-06, + "loss": 0.1807, + "num_input_tokens_seen": 38307296, + "step": 56830 + }, + { + "epoch": 1.388488505606723, + "grad_norm": 20.035268783569336, + "learning_rate": 1.8137284690216204e-06, + "loss": 0.2124, + "num_input_tokens_seen": 38310496, + "step": 56835 + }, + { + "epoch": 1.3886106564385703, + "grad_norm": 1.0956863164901733, + "learning_rate": 1.8136788988677725e-06, + "loss": 0.099, + "num_input_tokens_seen": 38313888, + "step": 56840 + }, + { + "epoch": 1.3887328072704175, + "grad_norm": 0.11295932531356812, + "learning_rate": 1.813629322796633e-06, + "loss": 0.0729, + "num_input_tokens_seen": 38317536, + "step": 56845 + }, + { + "epoch": 1.3888549581022647, + "grad_norm": 0.755732536315918, + "learning_rate": 1.8135797408085623e-06, + "loss": 0.1225, + "num_input_tokens_seen": 38321120, + "step": 56850 + }, + { + "epoch": 1.3889771089341119, + "grad_norm": 0.18139754235744476, + "learning_rate": 1.8135301529039207e-06, + "loss": 0.0755, + "num_input_tokens_seen": 38324320, + "step": 56855 + }, + { + "epoch": 1.389099259765959, + "grad_norm": 0.15731483697891235, + "learning_rate": 1.813480559083069e-06, + "loss": 0.0337, + "num_input_tokens_seen": 38327840, + "step": 56860 + }, + { + "epoch": 1.3892214105978062, + "grad_norm": 91.60194396972656, + "learning_rate": 1.813430959346368e-06, + "loss": 0.1575, + "num_input_tokens_seen": 38331104, + "step": 56865 + }, + { + "epoch": 1.3893435614296532, + "grad_norm": 0.30752700567245483, + "learning_rate": 1.813381353694178e-06, + "loss": 0.1537, + "num_input_tokens_seen": 38334432, + "step": 56870 + }, + { + "epoch": 1.3894657122615004, + "grad_norm": 0.6529026627540588, + "learning_rate": 1.8133317421268601e-06, + "loss": 0.0716, + "num_input_tokens_seen": 38338016, + "step": 56875 + }, + { + "epoch": 1.3895878630933476, + "grad_norm": 0.7751749157905579, + "learning_rate": 1.8132821246447753e-06, + "loss": 0.0679, + "num_input_tokens_seen": 38341344, + "step": 56880 + }, + { + "epoch": 1.3897100139251948, + "grad_norm": 170.2229766845703, + "learning_rate": 1.813232501248284e-06, + "loss": 0.1301, + "num_input_tokens_seen": 38344928, + "step": 56885 + }, + { + "epoch": 1.389832164757042, + "grad_norm": 0.23280911147594452, + "learning_rate": 1.813182871937747e-06, + "loss": 0.0464, + "num_input_tokens_seen": 38348128, + "step": 56890 + }, + { + "epoch": 1.3899543155888892, + "grad_norm": 38.519474029541016, + "learning_rate": 1.8131332367135256e-06, + "loss": 0.1033, + "num_input_tokens_seen": 38351392, + "step": 56895 + }, + { + "epoch": 1.3900764664207363, + "grad_norm": 0.19992072880268097, + "learning_rate": 1.8130835955759807e-06, + "loss": 0.0761, + "num_input_tokens_seen": 38354208, + "step": 56900 + }, + { + "epoch": 1.3901986172525835, + "grad_norm": 0.10394836962223053, + "learning_rate": 1.8130339485254731e-06, + "loss": 0.0616, + "num_input_tokens_seen": 38357728, + "step": 56905 + }, + { + "epoch": 1.3903207680844307, + "grad_norm": 0.1428837925195694, + "learning_rate": 1.812984295562364e-06, + "loss": 0.0903, + "num_input_tokens_seen": 38361056, + "step": 56910 + }, + { + "epoch": 1.390442918916278, + "grad_norm": 15.613577842712402, + "learning_rate": 1.8129346366870143e-06, + "loss": 0.0795, + "num_input_tokens_seen": 38364256, + "step": 56915 + }, + { + "epoch": 1.390565069748125, + "grad_norm": 0.44246914982795715, + "learning_rate": 1.8128849718997854e-06, + "loss": 0.0247, + "num_input_tokens_seen": 38367840, + "step": 56920 + }, + { + "epoch": 1.390687220579972, + "grad_norm": 0.33950066566467285, + "learning_rate": 1.8128353012010385e-06, + "loss": 0.035, + "num_input_tokens_seen": 38371680, + "step": 56925 + }, + { + "epoch": 1.3908093714118193, + "grad_norm": 15.934062004089355, + "learning_rate": 1.8127856245911343e-06, + "loss": 0.1329, + "num_input_tokens_seen": 38374944, + "step": 56930 + }, + { + "epoch": 1.3909315222436665, + "grad_norm": 0.11535021662712097, + "learning_rate": 1.8127359420704344e-06, + "loss": 0.0395, + "num_input_tokens_seen": 38378336, + "step": 56935 + }, + { + "epoch": 1.3910536730755136, + "grad_norm": 0.03485637903213501, + "learning_rate": 1.8126862536393005e-06, + "loss": 0.0486, + "num_input_tokens_seen": 38381664, + "step": 56940 + }, + { + "epoch": 1.3911758239073608, + "grad_norm": 16.718181610107422, + "learning_rate": 1.8126365592980935e-06, + "loss": 0.0424, + "num_input_tokens_seen": 38385056, + "step": 56945 + }, + { + "epoch": 1.391297974739208, + "grad_norm": 32.83033752441406, + "learning_rate": 1.8125868590471748e-06, + "loss": 0.1111, + "num_input_tokens_seen": 38388512, + "step": 56950 + }, + { + "epoch": 1.3914201255710552, + "grad_norm": 0.171127587556839, + "learning_rate": 1.8125371528869059e-06, + "loss": 0.003, + "num_input_tokens_seen": 38391712, + "step": 56955 + }, + { + "epoch": 1.3915422764029022, + "grad_norm": 0.08284179121255875, + "learning_rate": 1.812487440817648e-06, + "loss": 0.154, + "num_input_tokens_seen": 38395040, + "step": 56960 + }, + { + "epoch": 1.3916644272347494, + "grad_norm": 86.25468444824219, + "learning_rate": 1.8124377228397631e-06, + "loss": 0.0939, + "num_input_tokens_seen": 38398048, + "step": 56965 + }, + { + "epoch": 1.3917865780665966, + "grad_norm": 37.10374450683594, + "learning_rate": 1.8123879989536129e-06, + "loss": 0.1104, + "num_input_tokens_seen": 38401248, + "step": 56970 + }, + { + "epoch": 1.3919087288984437, + "grad_norm": 20.548166275024414, + "learning_rate": 1.8123382691595581e-06, + "loss": 0.0908, + "num_input_tokens_seen": 38404832, + "step": 56975 + }, + { + "epoch": 1.392030879730291, + "grad_norm": 0.08781076967716217, + "learning_rate": 1.8122885334579615e-06, + "loss": 0.0896, + "num_input_tokens_seen": 38408096, + "step": 56980 + }, + { + "epoch": 1.3921530305621381, + "grad_norm": 78.29131317138672, + "learning_rate": 1.8122387918491838e-06, + "loss": 0.1319, + "num_input_tokens_seen": 38412128, + "step": 56985 + }, + { + "epoch": 1.3922751813939853, + "grad_norm": 37.14921188354492, + "learning_rate": 1.8121890443335873e-06, + "loss": 0.1685, + "num_input_tokens_seen": 38415520, + "step": 56990 + }, + { + "epoch": 1.3923973322258325, + "grad_norm": 0.22196723520755768, + "learning_rate": 1.8121392909115334e-06, + "loss": 0.0021, + "num_input_tokens_seen": 38419424, + "step": 56995 + }, + { + "epoch": 1.3925194830576797, + "grad_norm": 47.17499923706055, + "learning_rate": 1.8120895315833842e-06, + "loss": 0.1008, + "num_input_tokens_seen": 38423392, + "step": 57000 + }, + { + "epoch": 1.3926416338895269, + "grad_norm": 109.18597412109375, + "learning_rate": 1.8120397663495015e-06, + "loss": 0.151, + "num_input_tokens_seen": 38427104, + "step": 57005 + }, + { + "epoch": 1.392763784721374, + "grad_norm": 0.2220344990491867, + "learning_rate": 1.8119899952102476e-06, + "loss": 0.0376, + "num_input_tokens_seen": 38430688, + "step": 57010 + }, + { + "epoch": 1.392885935553221, + "grad_norm": 86.03609466552734, + "learning_rate": 1.8119402181659837e-06, + "loss": 0.0085, + "num_input_tokens_seen": 38434400, + "step": 57015 + }, + { + "epoch": 1.3930080863850682, + "grad_norm": 0.1834421306848526, + "learning_rate": 1.811890435217072e-06, + "loss": 0.0804, + "num_input_tokens_seen": 38437920, + "step": 57020 + }, + { + "epoch": 1.3931302372169154, + "grad_norm": 19.444744110107422, + "learning_rate": 1.811840646363875e-06, + "loss": 0.0872, + "num_input_tokens_seen": 38441248, + "step": 57025 + }, + { + "epoch": 1.3932523880487626, + "grad_norm": 2.8895413875579834, + "learning_rate": 1.8117908516067542e-06, + "loss": 0.221, + "num_input_tokens_seen": 38444896, + "step": 57030 + }, + { + "epoch": 1.3933745388806098, + "grad_norm": 0.1735847443342209, + "learning_rate": 1.8117410509460723e-06, + "loss": 0.0527, + "num_input_tokens_seen": 38448096, + "step": 57035 + }, + { + "epoch": 1.393496689712457, + "grad_norm": 66.44129943847656, + "learning_rate": 1.811691244382191e-06, + "loss": 0.059, + "num_input_tokens_seen": 38451616, + "step": 57040 + }, + { + "epoch": 1.3936188405443042, + "grad_norm": 7.641853332519531, + "learning_rate": 1.8116414319154726e-06, + "loss": 0.0346, + "num_input_tokens_seen": 38455392, + "step": 57045 + }, + { + "epoch": 1.3937409913761512, + "grad_norm": 0.3213024139404297, + "learning_rate": 1.8115916135462794e-06, + "loss": 0.0558, + "num_input_tokens_seen": 38458720, + "step": 57050 + }, + { + "epoch": 1.3938631422079983, + "grad_norm": 4.296091079711914, + "learning_rate": 1.8115417892749738e-06, + "loss": 0.1203, + "num_input_tokens_seen": 38461856, + "step": 57055 + }, + { + "epoch": 1.3939852930398455, + "grad_norm": 63.78632736206055, + "learning_rate": 1.811491959101918e-06, + "loss": 0.137, + "num_input_tokens_seen": 38465056, + "step": 57060 + }, + { + "epoch": 1.3941074438716927, + "grad_norm": 0.44734886288642883, + "learning_rate": 1.8114421230274743e-06, + "loss": 0.0427, + "num_input_tokens_seen": 38468320, + "step": 57065 + }, + { + "epoch": 1.39422959470354, + "grad_norm": 0.3252753019332886, + "learning_rate": 1.8113922810520053e-06, + "loss": 0.047, + "num_input_tokens_seen": 38471520, + "step": 57070 + }, + { + "epoch": 1.394351745535387, + "grad_norm": 0.053497254848480225, + "learning_rate": 1.811342433175873e-06, + "loss": 0.085, + "num_input_tokens_seen": 38474720, + "step": 57075 + }, + { + "epoch": 1.3944738963672343, + "grad_norm": 1.370205283164978, + "learning_rate": 1.8112925793994408e-06, + "loss": 0.0732, + "num_input_tokens_seen": 38477856, + "step": 57080 + }, + { + "epoch": 1.3945960471990815, + "grad_norm": 72.65982818603516, + "learning_rate": 1.811242719723071e-06, + "loss": 0.1524, + "num_input_tokens_seen": 38481056, + "step": 57085 + }, + { + "epoch": 1.3947181980309287, + "grad_norm": 0.2532006800174713, + "learning_rate": 1.8111928541471254e-06, + "loss": 0.0897, + "num_input_tokens_seen": 38484256, + "step": 57090 + }, + { + "epoch": 1.3948403488627759, + "grad_norm": 0.07710134983062744, + "learning_rate": 1.8111429826719673e-06, + "loss": 0.0368, + "num_input_tokens_seen": 38487520, + "step": 57095 + }, + { + "epoch": 1.394962499694623, + "grad_norm": 32.57456588745117, + "learning_rate": 1.8110931052979593e-06, + "loss": 0.1924, + "num_input_tokens_seen": 38490720, + "step": 57100 + }, + { + "epoch": 1.39508465052647, + "grad_norm": 9.302146911621094, + "learning_rate": 1.8110432220254641e-06, + "loss": 0.2088, + "num_input_tokens_seen": 38494432, + "step": 57105 + }, + { + "epoch": 1.3952068013583172, + "grad_norm": 0.8289868831634521, + "learning_rate": 1.8109933328548443e-06, + "loss": 0.0304, + "num_input_tokens_seen": 38497824, + "step": 57110 + }, + { + "epoch": 1.3953289521901644, + "grad_norm": 0.10691874474287033, + "learning_rate": 1.8109434377864631e-06, + "loss": 0.0008, + "num_input_tokens_seen": 38500896, + "step": 57115 + }, + { + "epoch": 1.3954511030220116, + "grad_norm": 13.294049263000488, + "learning_rate": 1.810893536820683e-06, + "loss": 0.1938, + "num_input_tokens_seen": 38503776, + "step": 57120 + }, + { + "epoch": 1.3955732538538588, + "grad_norm": 0.11053433269262314, + "learning_rate": 1.8108436299578669e-06, + "loss": 0.0298, + "num_input_tokens_seen": 38507232, + "step": 57125 + }, + { + "epoch": 1.395695404685706, + "grad_norm": 0.09031997621059418, + "learning_rate": 1.810793717198378e-06, + "loss": 0.0875, + "num_input_tokens_seen": 38510944, + "step": 57130 + }, + { + "epoch": 1.3958175555175532, + "grad_norm": 86.59408569335938, + "learning_rate": 1.8107437985425792e-06, + "loss": 0.0342, + "num_input_tokens_seen": 38514208, + "step": 57135 + }, + { + "epoch": 1.3959397063494001, + "grad_norm": 19.59816551208496, + "learning_rate": 1.810693873990833e-06, + "loss": 0.1204, + "num_input_tokens_seen": 38517728, + "step": 57140 + }, + { + "epoch": 1.3960618571812473, + "grad_norm": 1.211097240447998, + "learning_rate": 1.8106439435435035e-06, + "loss": 0.1372, + "num_input_tokens_seen": 38521120, + "step": 57145 + }, + { + "epoch": 1.3961840080130945, + "grad_norm": 0.18598124384880066, + "learning_rate": 1.8105940072009527e-06, + "loss": 0.002, + "num_input_tokens_seen": 38524320, + "step": 57150 + }, + { + "epoch": 1.3963061588449417, + "grad_norm": 11.330933570861816, + "learning_rate": 1.8105440649635445e-06, + "loss": 0.1249, + "num_input_tokens_seen": 38528352, + "step": 57155 + }, + { + "epoch": 1.3964283096767889, + "grad_norm": 0.22256216406822205, + "learning_rate": 1.8104941168316416e-06, + "loss": 0.1006, + "num_input_tokens_seen": 38531296, + "step": 57160 + }, + { + "epoch": 1.396550460508636, + "grad_norm": 1.1994155645370483, + "learning_rate": 1.810444162805608e-06, + "loss": 0.0017, + "num_input_tokens_seen": 38534944, + "step": 57165 + }, + { + "epoch": 1.3966726113404833, + "grad_norm": 8.909801483154297, + "learning_rate": 1.8103942028858059e-06, + "loss": 0.1503, + "num_input_tokens_seen": 38538720, + "step": 57170 + }, + { + "epoch": 1.3967947621723305, + "grad_norm": 0.5872882008552551, + "learning_rate": 1.8103442370725995e-06, + "loss": 0.0017, + "num_input_tokens_seen": 38541984, + "step": 57175 + }, + { + "epoch": 1.3969169130041776, + "grad_norm": 0.8501310348510742, + "learning_rate": 1.8102942653663518e-06, + "loss": 0.0414, + "num_input_tokens_seen": 38546144, + "step": 57180 + }, + { + "epoch": 1.3970390638360248, + "grad_norm": 0.03388998657464981, + "learning_rate": 1.8102442877674261e-06, + "loss": 0.1137, + "num_input_tokens_seen": 38549600, + "step": 57185 + }, + { + "epoch": 1.397161214667872, + "grad_norm": 0.6265448927879333, + "learning_rate": 1.810194304276186e-06, + "loss": 0.0352, + "num_input_tokens_seen": 38552928, + "step": 57190 + }, + { + "epoch": 1.397283365499719, + "grad_norm": 0.2135477215051651, + "learning_rate": 1.8101443148929954e-06, + "loss": 0.0021, + "num_input_tokens_seen": 38556256, + "step": 57195 + }, + { + "epoch": 1.3974055163315662, + "grad_norm": 0.6835227012634277, + "learning_rate": 1.810094319618217e-06, + "loss": 0.0396, + "num_input_tokens_seen": 38559648, + "step": 57200 + }, + { + "epoch": 1.3975276671634134, + "grad_norm": 24.1704044342041, + "learning_rate": 1.810044318452215e-06, + "loss": 0.0853, + "num_input_tokens_seen": 38562784, + "step": 57205 + }, + { + "epoch": 1.3976498179952606, + "grad_norm": 134.97132873535156, + "learning_rate": 1.8099943113953529e-06, + "loss": 0.049, + "num_input_tokens_seen": 38566368, + "step": 57210 + }, + { + "epoch": 1.3977719688271077, + "grad_norm": 19.390241622924805, + "learning_rate": 1.8099442984479942e-06, + "loss": 0.1038, + "num_input_tokens_seen": 38569824, + "step": 57215 + }, + { + "epoch": 1.397894119658955, + "grad_norm": 0.13285577297210693, + "learning_rate": 1.8098942796105027e-06, + "loss": 0.0255, + "num_input_tokens_seen": 38573408, + "step": 57220 + }, + { + "epoch": 1.398016270490802, + "grad_norm": 0.08401376008987427, + "learning_rate": 1.8098442548832424e-06, + "loss": 0.1684, + "num_input_tokens_seen": 38576864, + "step": 57225 + }, + { + "epoch": 1.398138421322649, + "grad_norm": 0.12962745130062103, + "learning_rate": 1.8097942242665765e-06, + "loss": 0.2394, + "num_input_tokens_seen": 38580384, + "step": 57230 + }, + { + "epoch": 1.3982605721544963, + "grad_norm": 0.4125172793865204, + "learning_rate": 1.8097441877608695e-06, + "loss": 0.0485, + "num_input_tokens_seen": 38583648, + "step": 57235 + }, + { + "epoch": 1.3983827229863435, + "grad_norm": 211.63461303710938, + "learning_rate": 1.809694145366485e-06, + "loss": 0.0928, + "num_input_tokens_seen": 38587744, + "step": 57240 + }, + { + "epoch": 1.3985048738181907, + "grad_norm": 0.08615150302648544, + "learning_rate": 1.8096440970837866e-06, + "loss": 0.0088, + "num_input_tokens_seen": 38591520, + "step": 57245 + }, + { + "epoch": 1.3986270246500379, + "grad_norm": 20.34548568725586, + "learning_rate": 1.8095940429131386e-06, + "loss": 0.1195, + "num_input_tokens_seen": 38594784, + "step": 57250 + }, + { + "epoch": 1.398749175481885, + "grad_norm": 0.2246903032064438, + "learning_rate": 1.8095439828549051e-06, + "loss": 0.137, + "num_input_tokens_seen": 38598368, + "step": 57255 + }, + { + "epoch": 1.3988713263137322, + "grad_norm": 251.88229370117188, + "learning_rate": 1.80949391690945e-06, + "loss": 0.1184, + "num_input_tokens_seen": 38601888, + "step": 57260 + }, + { + "epoch": 1.3989934771455794, + "grad_norm": 0.17487865686416626, + "learning_rate": 1.8094438450771375e-06, + "loss": 0.0366, + "num_input_tokens_seen": 38605408, + "step": 57265 + }, + { + "epoch": 1.3991156279774266, + "grad_norm": 0.1770249605178833, + "learning_rate": 1.8093937673583315e-06, + "loss": 0.1262, + "num_input_tokens_seen": 38608800, + "step": 57270 + }, + { + "epoch": 1.3992377788092738, + "grad_norm": 27.551986694335938, + "learning_rate": 1.8093436837533961e-06, + "loss": 0.1281, + "num_input_tokens_seen": 38612192, + "step": 57275 + }, + { + "epoch": 1.399359929641121, + "grad_norm": 13.532742500305176, + "learning_rate": 1.809293594262696e-06, + "loss": 0.0841, + "num_input_tokens_seen": 38615200, + "step": 57280 + }, + { + "epoch": 1.399482080472968, + "grad_norm": 0.6043211221694946, + "learning_rate": 1.8092434988865953e-06, + "loss": 0.1067, + "num_input_tokens_seen": 38618208, + "step": 57285 + }, + { + "epoch": 1.3996042313048151, + "grad_norm": 0.6223563551902771, + "learning_rate": 1.809193397625458e-06, + "loss": 0.0774, + "num_input_tokens_seen": 38621856, + "step": 57290 + }, + { + "epoch": 1.3997263821366623, + "grad_norm": 0.7312188744544983, + "learning_rate": 1.8091432904796488e-06, + "loss": 0.0382, + "num_input_tokens_seen": 38624864, + "step": 57295 + }, + { + "epoch": 1.3998485329685095, + "grad_norm": 31.77884292602539, + "learning_rate": 1.8090931774495321e-06, + "loss": 0.1126, + "num_input_tokens_seen": 38628832, + "step": 57300 + }, + { + "epoch": 1.3999706838003567, + "grad_norm": 1.9252561330795288, + "learning_rate": 1.8090430585354719e-06, + "loss": 0.0725, + "num_input_tokens_seen": 38632160, + "step": 57305 + }, + { + "epoch": 1.400092834632204, + "grad_norm": 11.0371732711792, + "learning_rate": 1.808992933737833e-06, + "loss": 0.112, + "num_input_tokens_seen": 38635488, + "step": 57310 + }, + { + "epoch": 1.4002149854640509, + "grad_norm": 12.347792625427246, + "learning_rate": 1.80894280305698e-06, + "loss": 0.1026, + "num_input_tokens_seen": 38638880, + "step": 57315 + }, + { + "epoch": 1.400337136295898, + "grad_norm": 1.6804922819137573, + "learning_rate": 1.8088926664932775e-06, + "loss": 0.0514, + "num_input_tokens_seen": 38642336, + "step": 57320 + }, + { + "epoch": 1.4004592871277453, + "grad_norm": 12.734495162963867, + "learning_rate": 1.80884252404709e-06, + "loss": 0.0423, + "num_input_tokens_seen": 38645984, + "step": 57325 + }, + { + "epoch": 1.4005814379595924, + "grad_norm": 3.755786180496216, + "learning_rate": 1.8087923757187817e-06, + "loss": 0.0354, + "num_input_tokens_seen": 38649056, + "step": 57330 + }, + { + "epoch": 1.4007035887914396, + "grad_norm": 18.840024948120117, + "learning_rate": 1.808742221508718e-06, + "loss": 0.1126, + "num_input_tokens_seen": 38652640, + "step": 57335 + }, + { + "epoch": 1.4008257396232868, + "grad_norm": 30.551414489746094, + "learning_rate": 1.8086920614172633e-06, + "loss": 0.119, + "num_input_tokens_seen": 38655904, + "step": 57340 + }, + { + "epoch": 1.400947890455134, + "grad_norm": 0.1037088930606842, + "learning_rate": 1.8086418954447825e-06, + "loss": 0.0705, + "num_input_tokens_seen": 38659232, + "step": 57345 + }, + { + "epoch": 1.4010700412869812, + "grad_norm": 0.2562164068222046, + "learning_rate": 1.80859172359164e-06, + "loss": 0.0021, + "num_input_tokens_seen": 38662688, + "step": 57350 + }, + { + "epoch": 1.4011921921188284, + "grad_norm": 0.8684639930725098, + "learning_rate": 1.8085415458582012e-06, + "loss": 0.0354, + "num_input_tokens_seen": 38666208, + "step": 57355 + }, + { + "epoch": 1.4013143429506756, + "grad_norm": 0.16640126705169678, + "learning_rate": 1.808491362244831e-06, + "loss": 0.0081, + "num_input_tokens_seen": 38669664, + "step": 57360 + }, + { + "epoch": 1.4014364937825228, + "grad_norm": 229.04251098632812, + "learning_rate": 1.8084411727518938e-06, + "loss": 0.2416, + "num_input_tokens_seen": 38672672, + "step": 57365 + }, + { + "epoch": 1.4015586446143697, + "grad_norm": 45.303890228271484, + "learning_rate": 1.8083909773797555e-06, + "loss": 0.157, + "num_input_tokens_seen": 38676000, + "step": 57370 + }, + { + "epoch": 1.401680795446217, + "grad_norm": 0.21114136278629303, + "learning_rate": 1.8083407761287802e-06, + "loss": 0.0918, + "num_input_tokens_seen": 38679904, + "step": 57375 + }, + { + "epoch": 1.4018029462780641, + "grad_norm": 0.13060590624809265, + "learning_rate": 1.8082905689993333e-06, + "loss": 0.0186, + "num_input_tokens_seen": 38683360, + "step": 57380 + }, + { + "epoch": 1.4019250971099113, + "grad_norm": 0.2954758107662201, + "learning_rate": 1.8082403559917801e-06, + "loss": 0.0348, + "num_input_tokens_seen": 38686752, + "step": 57385 + }, + { + "epoch": 1.4020472479417585, + "grad_norm": 10.650467872619629, + "learning_rate": 1.8081901371064854e-06, + "loss": 0.0465, + "num_input_tokens_seen": 38690976, + "step": 57390 + }, + { + "epoch": 1.4021693987736057, + "grad_norm": 111.42206573486328, + "learning_rate": 1.8081399123438147e-06, + "loss": 0.2443, + "num_input_tokens_seen": 38693984, + "step": 57395 + }, + { + "epoch": 1.4022915496054529, + "grad_norm": 20.380661010742188, + "learning_rate": 1.8080896817041337e-06, + "loss": 0.0703, + "num_input_tokens_seen": 38697376, + "step": 57400 + }, + { + "epoch": 1.4024137004372998, + "grad_norm": 0.3858691155910492, + "learning_rate": 1.8080394451878066e-06, + "loss": 0.003, + "num_input_tokens_seen": 38700896, + "step": 57405 + }, + { + "epoch": 1.402535851269147, + "grad_norm": 144.43235778808594, + "learning_rate": 1.8079892027951997e-06, + "loss": 0.1784, + "num_input_tokens_seen": 38704480, + "step": 57410 + }, + { + "epoch": 1.4026580021009942, + "grad_norm": 85.30264282226562, + "learning_rate": 1.8079389545266776e-06, + "loss": 0.0667, + "num_input_tokens_seen": 38707680, + "step": 57415 + }, + { + "epoch": 1.4027801529328414, + "grad_norm": 3.402688503265381, + "learning_rate": 1.8078887003826067e-06, + "loss": 0.1021, + "num_input_tokens_seen": 38710752, + "step": 57420 + }, + { + "epoch": 1.4029023037646886, + "grad_norm": 0.0745268315076828, + "learning_rate": 1.8078384403633513e-06, + "loss": 0.0199, + "num_input_tokens_seen": 38713760, + "step": 57425 + }, + { + "epoch": 1.4030244545965358, + "grad_norm": 0.1893487423658371, + "learning_rate": 1.8077881744692778e-06, + "loss": 0.0928, + "num_input_tokens_seen": 38717280, + "step": 57430 + }, + { + "epoch": 1.403146605428383, + "grad_norm": 0.1066637858748436, + "learning_rate": 1.8077379027007513e-06, + "loss": 0.085, + "num_input_tokens_seen": 38720928, + "step": 57435 + }, + { + "epoch": 1.4032687562602302, + "grad_norm": 0.33574655652046204, + "learning_rate": 1.8076876250581376e-06, + "loss": 0.0015, + "num_input_tokens_seen": 38724320, + "step": 57440 + }, + { + "epoch": 1.4033909070920774, + "grad_norm": 0.13137076795101166, + "learning_rate": 1.807637341541802e-06, + "loss": 0.0119, + "num_input_tokens_seen": 38727328, + "step": 57445 + }, + { + "epoch": 1.4035130579239246, + "grad_norm": 3.9827892780303955, + "learning_rate": 1.807587052152111e-06, + "loss": 0.0021, + "num_input_tokens_seen": 38730208, + "step": 57450 + }, + { + "epoch": 1.4036352087557717, + "grad_norm": 19.064966201782227, + "learning_rate": 1.807536756889429e-06, + "loss": 0.1549, + "num_input_tokens_seen": 38733920, + "step": 57455 + }, + { + "epoch": 1.4037573595876187, + "grad_norm": 13.327079772949219, + "learning_rate": 1.807486455754123e-06, + "loss": 0.0534, + "num_input_tokens_seen": 38737120, + "step": 57460 + }, + { + "epoch": 1.403879510419466, + "grad_norm": 0.09143029898405075, + "learning_rate": 1.8074361487465582e-06, + "loss": 0.0009, + "num_input_tokens_seen": 38740576, + "step": 57465 + }, + { + "epoch": 1.404001661251313, + "grad_norm": 8.409533500671387, + "learning_rate": 1.8073858358671004e-06, + "loss": 0.0514, + "num_input_tokens_seen": 38743712, + "step": 57470 + }, + { + "epoch": 1.4041238120831603, + "grad_norm": 0.055469900369644165, + "learning_rate": 1.8073355171161157e-06, + "loss": 0.1495, + "num_input_tokens_seen": 38747104, + "step": 57475 + }, + { + "epoch": 1.4042459629150075, + "grad_norm": 17.54491424560547, + "learning_rate": 1.8072851924939702e-06, + "loss": 0.116, + "num_input_tokens_seen": 38750624, + "step": 57480 + }, + { + "epoch": 1.4043681137468547, + "grad_norm": 0.06926427781581879, + "learning_rate": 1.8072348620010294e-06, + "loss": 0.1466, + "num_input_tokens_seen": 38753696, + "step": 57485 + }, + { + "epoch": 1.4044902645787019, + "grad_norm": 0.17716003954410553, + "learning_rate": 1.8071845256376597e-06, + "loss": 0.0367, + "num_input_tokens_seen": 38757280, + "step": 57490 + }, + { + "epoch": 1.4046124154105488, + "grad_norm": 0.09693924337625504, + "learning_rate": 1.8071341834042268e-06, + "loss": 0.1259, + "num_input_tokens_seen": 38760160, + "step": 57495 + }, + { + "epoch": 1.404734566242396, + "grad_norm": 0.2468055784702301, + "learning_rate": 1.8070838353010973e-06, + "loss": 0.0536, + "num_input_tokens_seen": 38763104, + "step": 57500 + }, + { + "epoch": 1.4048567170742432, + "grad_norm": 195.17642211914062, + "learning_rate": 1.807033481328637e-06, + "loss": 0.0231, + "num_input_tokens_seen": 38766432, + "step": 57505 + }, + { + "epoch": 1.4049788679060904, + "grad_norm": 27.664806365966797, + "learning_rate": 1.806983121487212e-06, + "loss": 0.0559, + "num_input_tokens_seen": 38770144, + "step": 57510 + }, + { + "epoch": 1.4051010187379376, + "grad_norm": 0.06676580756902695, + "learning_rate": 1.8069327557771889e-06, + "loss": 0.0498, + "num_input_tokens_seen": 38773280, + "step": 57515 + }, + { + "epoch": 1.4052231695697848, + "grad_norm": 0.34984180331230164, + "learning_rate": 1.8068823841989338e-06, + "loss": 0.0375, + "num_input_tokens_seen": 38776608, + "step": 57520 + }, + { + "epoch": 1.405345320401632, + "grad_norm": 19.778776168823242, + "learning_rate": 1.8068320067528129e-06, + "loss": 0.0967, + "num_input_tokens_seen": 38779808, + "step": 57525 + }, + { + "epoch": 1.4054674712334791, + "grad_norm": 0.1447640359401703, + "learning_rate": 1.8067816234391925e-06, + "loss": 0.1562, + "num_input_tokens_seen": 38783200, + "step": 57530 + }, + { + "epoch": 1.4055896220653263, + "grad_norm": 0.1753024160861969, + "learning_rate": 1.8067312342584393e-06, + "loss": 0.0736, + "num_input_tokens_seen": 38786144, + "step": 57535 + }, + { + "epoch": 1.4057117728971735, + "grad_norm": 0.07908373326063156, + "learning_rate": 1.8066808392109193e-06, + "loss": 0.1177, + "num_input_tokens_seen": 38789344, + "step": 57540 + }, + { + "epoch": 1.4058339237290207, + "grad_norm": 0.6226028203964233, + "learning_rate": 1.8066304382969995e-06, + "loss": 0.0773, + "num_input_tokens_seen": 38792224, + "step": 57545 + }, + { + "epoch": 1.4059560745608677, + "grad_norm": 81.13831329345703, + "learning_rate": 1.8065800315170461e-06, + "loss": 0.0862, + "num_input_tokens_seen": 38796384, + "step": 57550 + }, + { + "epoch": 1.4060782253927149, + "grad_norm": 0.05820373818278313, + "learning_rate": 1.8065296188714259e-06, + "loss": 0.0964, + "num_input_tokens_seen": 38799712, + "step": 57555 + }, + { + "epoch": 1.406200376224562, + "grad_norm": 15.190373420715332, + "learning_rate": 1.8064792003605054e-06, + "loss": 0.1511, + "num_input_tokens_seen": 38803360, + "step": 57560 + }, + { + "epoch": 1.4063225270564093, + "grad_norm": 19.76511001586914, + "learning_rate": 1.806428775984651e-06, + "loss": 0.073, + "num_input_tokens_seen": 38806752, + "step": 57565 + }, + { + "epoch": 1.4064446778882564, + "grad_norm": 102.18384552001953, + "learning_rate": 1.80637834574423e-06, + "loss": 0.0458, + "num_input_tokens_seen": 38809760, + "step": 57570 + }, + { + "epoch": 1.4065668287201036, + "grad_norm": 0.060823310166597366, + "learning_rate": 1.8063279096396084e-06, + "loss": 0.1079, + "num_input_tokens_seen": 38813408, + "step": 57575 + }, + { + "epoch": 1.4066889795519508, + "grad_norm": 193.80770874023438, + "learning_rate": 1.8062774676711534e-06, + "loss": 0.0348, + "num_input_tokens_seen": 38816608, + "step": 57580 + }, + { + "epoch": 1.4068111303837978, + "grad_norm": 2.4402763843536377, + "learning_rate": 1.8062270198392322e-06, + "loss": 0.0337, + "num_input_tokens_seen": 38819552, + "step": 57585 + }, + { + "epoch": 1.406933281215645, + "grad_norm": 11.247885704040527, + "learning_rate": 1.8061765661442108e-06, + "loss": 0.0309, + "num_input_tokens_seen": 38822688, + "step": 57590 + }, + { + "epoch": 1.4070554320474922, + "grad_norm": 0.40253564715385437, + "learning_rate": 1.8061261065864568e-06, + "loss": 0.1429, + "num_input_tokens_seen": 38826208, + "step": 57595 + }, + { + "epoch": 1.4071775828793394, + "grad_norm": 8.919163703918457, + "learning_rate": 1.806075641166337e-06, + "loss": 0.2824, + "num_input_tokens_seen": 38829664, + "step": 57600 + }, + { + "epoch": 1.4072997337111866, + "grad_norm": 30.795196533203125, + "learning_rate": 1.8060251698842182e-06, + "loss": 0.0616, + "num_input_tokens_seen": 38833184, + "step": 57605 + }, + { + "epoch": 1.4074218845430337, + "grad_norm": 0.13794507086277008, + "learning_rate": 1.8059746927404676e-06, + "loss": 0.1233, + "num_input_tokens_seen": 38836384, + "step": 57610 + }, + { + "epoch": 1.407544035374881, + "grad_norm": 33.86235809326172, + "learning_rate": 1.8059242097354522e-06, + "loss": 0.0892, + "num_input_tokens_seen": 38840032, + "step": 57615 + }, + { + "epoch": 1.4076661862067281, + "grad_norm": 0.26369521021842957, + "learning_rate": 1.8058737208695391e-06, + "loss": 0.0226, + "num_input_tokens_seen": 38843360, + "step": 57620 + }, + { + "epoch": 1.4077883370385753, + "grad_norm": 0.03817284107208252, + "learning_rate": 1.8058232261430957e-06, + "loss": 0.2279, + "num_input_tokens_seen": 38846560, + "step": 57625 + }, + { + "epoch": 1.4079104878704225, + "grad_norm": 0.21635249257087708, + "learning_rate": 1.8057727255564892e-06, + "loss": 0.0297, + "num_input_tokens_seen": 38849888, + "step": 57630 + }, + { + "epoch": 1.4080326387022697, + "grad_norm": 0.21980540454387665, + "learning_rate": 1.8057222191100863e-06, + "loss": 0.1256, + "num_input_tokens_seen": 38853472, + "step": 57635 + }, + { + "epoch": 1.4081547895341167, + "grad_norm": 89.9726333618164, + "learning_rate": 1.805671706804255e-06, + "loss": 0.114, + "num_input_tokens_seen": 38857120, + "step": 57640 + }, + { + "epoch": 1.4082769403659638, + "grad_norm": 18.657129287719727, + "learning_rate": 1.8056211886393622e-06, + "loss": 0.1426, + "num_input_tokens_seen": 38860512, + "step": 57645 + }, + { + "epoch": 1.408399091197811, + "grad_norm": 93.33203125, + "learning_rate": 1.8055706646157756e-06, + "loss": 0.1036, + "num_input_tokens_seen": 38864160, + "step": 57650 + }, + { + "epoch": 1.4085212420296582, + "grad_norm": 0.14268730580806732, + "learning_rate": 1.8055201347338625e-06, + "loss": 0.0018, + "num_input_tokens_seen": 38867424, + "step": 57655 + }, + { + "epoch": 1.4086433928615054, + "grad_norm": 1.1824917793273926, + "learning_rate": 1.8054695989939904e-06, + "loss": 0.063, + "num_input_tokens_seen": 38870688, + "step": 57660 + }, + { + "epoch": 1.4087655436933526, + "grad_norm": 0.21655486524105072, + "learning_rate": 1.8054190573965263e-06, + "loss": 0.0545, + "num_input_tokens_seen": 38874080, + "step": 57665 + }, + { + "epoch": 1.4088876945251998, + "grad_norm": 181.86268615722656, + "learning_rate": 1.8053685099418385e-06, + "loss": 0.0517, + "num_input_tokens_seen": 38877088, + "step": 57670 + }, + { + "epoch": 1.4090098453570468, + "grad_norm": 2.238178253173828, + "learning_rate": 1.8053179566302942e-06, + "loss": 0.0338, + "num_input_tokens_seen": 38880224, + "step": 57675 + }, + { + "epoch": 1.409131996188894, + "grad_norm": 0.26335620880126953, + "learning_rate": 1.805267397462261e-06, + "loss": 0.1188, + "num_input_tokens_seen": 38883424, + "step": 57680 + }, + { + "epoch": 1.4092541470207411, + "grad_norm": 0.3708566725254059, + "learning_rate": 1.805216832438107e-06, + "loss": 0.0563, + "num_input_tokens_seen": 38886752, + "step": 57685 + }, + { + "epoch": 1.4093762978525883, + "grad_norm": 0.582304060459137, + "learning_rate": 1.8051662615581994e-06, + "loss": 0.0523, + "num_input_tokens_seen": 38890336, + "step": 57690 + }, + { + "epoch": 1.4094984486844355, + "grad_norm": 0.12347531318664551, + "learning_rate": 1.805115684822906e-06, + "loss": 0.0014, + "num_input_tokens_seen": 38893792, + "step": 57695 + }, + { + "epoch": 1.4096205995162827, + "grad_norm": 0.10339561849832535, + "learning_rate": 1.8050651022325952e-06, + "loss": 0.001, + "num_input_tokens_seen": 38897120, + "step": 57700 + }, + { + "epoch": 1.40974275034813, + "grad_norm": 3.116520643234253, + "learning_rate": 1.805014513787634e-06, + "loss": 0.0374, + "num_input_tokens_seen": 38900576, + "step": 57705 + }, + { + "epoch": 1.409864901179977, + "grad_norm": 28.094335556030273, + "learning_rate": 1.804963919488391e-06, + "loss": 0.142, + "num_input_tokens_seen": 38904544, + "step": 57710 + }, + { + "epoch": 1.4099870520118243, + "grad_norm": 0.1864064782857895, + "learning_rate": 1.804913319335234e-06, + "loss": 0.0544, + "num_input_tokens_seen": 38907616, + "step": 57715 + }, + { + "epoch": 1.4101092028436715, + "grad_norm": 10.022531509399414, + "learning_rate": 1.8048627133285306e-06, + "loss": 0.198, + "num_input_tokens_seen": 38910752, + "step": 57720 + }, + { + "epoch": 1.4102313536755187, + "grad_norm": 16.5054931640625, + "learning_rate": 1.804812101468649e-06, + "loss": 0.0391, + "num_input_tokens_seen": 38913760, + "step": 57725 + }, + { + "epoch": 1.4103535045073656, + "grad_norm": 17.392940521240234, + "learning_rate": 1.8047614837559574e-06, + "loss": 0.059, + "num_input_tokens_seen": 38916640, + "step": 57730 + }, + { + "epoch": 1.4104756553392128, + "grad_norm": 22.643449783325195, + "learning_rate": 1.8047108601908243e-06, + "loss": 0.0686, + "num_input_tokens_seen": 38919520, + "step": 57735 + }, + { + "epoch": 1.41059780617106, + "grad_norm": 0.13620342314243317, + "learning_rate": 1.8046602307736168e-06, + "loss": 0.0847, + "num_input_tokens_seen": 38922784, + "step": 57740 + }, + { + "epoch": 1.4107199570029072, + "grad_norm": 0.21663911640644073, + "learning_rate": 1.8046095955047038e-06, + "loss": 0.0265, + "num_input_tokens_seen": 38926688, + "step": 57745 + }, + { + "epoch": 1.4108421078347544, + "grad_norm": 1.4268443584442139, + "learning_rate": 1.8045589543844537e-06, + "loss": 0.1017, + "num_input_tokens_seen": 38929824, + "step": 57750 + }, + { + "epoch": 1.4109642586666016, + "grad_norm": 1.3341022729873657, + "learning_rate": 1.8045083074132341e-06, + "loss": 0.0632, + "num_input_tokens_seen": 38933408, + "step": 57755 + }, + { + "epoch": 1.4110864094984485, + "grad_norm": 0.027623578906059265, + "learning_rate": 1.804457654591414e-06, + "loss": 0.102, + "num_input_tokens_seen": 38937184, + "step": 57760 + }, + { + "epoch": 1.4112085603302957, + "grad_norm": 0.09516473114490509, + "learning_rate": 1.8044069959193612e-06, + "loss": 0.0575, + "num_input_tokens_seen": 38940512, + "step": 57765 + }, + { + "epoch": 1.411330711162143, + "grad_norm": 0.0526285395026207, + "learning_rate": 1.8043563313974445e-06, + "loss": 0.0731, + "num_input_tokens_seen": 38943968, + "step": 57770 + }, + { + "epoch": 1.4114528619939901, + "grad_norm": 0.2618235945701599, + "learning_rate": 1.8043056610260324e-06, + "loss": 0.0789, + "num_input_tokens_seen": 38947680, + "step": 57775 + }, + { + "epoch": 1.4115750128258373, + "grad_norm": 0.09851998090744019, + "learning_rate": 1.804254984805493e-06, + "loss": 0.0939, + "num_input_tokens_seen": 38951008, + "step": 57780 + }, + { + "epoch": 1.4116971636576845, + "grad_norm": 0.20549650490283966, + "learning_rate": 1.804204302736195e-06, + "loss": 0.1177, + "num_input_tokens_seen": 38954208, + "step": 57785 + }, + { + "epoch": 1.4118193144895317, + "grad_norm": 1.1573891639709473, + "learning_rate": 1.804153614818507e-06, + "loss": 0.0814, + "num_input_tokens_seen": 38957344, + "step": 57790 + }, + { + "epoch": 1.4119414653213789, + "grad_norm": 0.22255335748195648, + "learning_rate": 1.8041029210527976e-06, + "loss": 0.0297, + "num_input_tokens_seen": 38960928, + "step": 57795 + }, + { + "epoch": 1.412063616153226, + "grad_norm": 0.2250811904668808, + "learning_rate": 1.8040522214394356e-06, + "loss": 0.0021, + "num_input_tokens_seen": 38963936, + "step": 57800 + }, + { + "epoch": 1.4121857669850733, + "grad_norm": 0.12040767073631287, + "learning_rate": 1.8040015159787894e-06, + "loss": 0.0705, + "num_input_tokens_seen": 38967584, + "step": 57805 + }, + { + "epoch": 1.4123079178169204, + "grad_norm": 0.05583275854587555, + "learning_rate": 1.8039508046712281e-06, + "loss": 0.0421, + "num_input_tokens_seen": 38971104, + "step": 57810 + }, + { + "epoch": 1.4124300686487676, + "grad_norm": 68.72763061523438, + "learning_rate": 1.8039000875171202e-06, + "loss": 0.1292, + "num_input_tokens_seen": 38974112, + "step": 57815 + }, + { + "epoch": 1.4125522194806146, + "grad_norm": 0.1513207107782364, + "learning_rate": 1.8038493645168349e-06, + "loss": 0.0562, + "num_input_tokens_seen": 38977440, + "step": 57820 + }, + { + "epoch": 1.4126743703124618, + "grad_norm": 0.06547381728887558, + "learning_rate": 1.8037986356707404e-06, + "loss": 0.0516, + "num_input_tokens_seen": 38980896, + "step": 57825 + }, + { + "epoch": 1.412796521144309, + "grad_norm": 0.052669234573841095, + "learning_rate": 1.8037479009792062e-06, + "loss": 0.0619, + "num_input_tokens_seen": 38984032, + "step": 57830 + }, + { + "epoch": 1.4129186719761562, + "grad_norm": 49.396663665771484, + "learning_rate": 1.8036971604426015e-06, + "loss": 0.1209, + "num_input_tokens_seen": 38987168, + "step": 57835 + }, + { + "epoch": 1.4130408228080034, + "grad_norm": 0.13010339438915253, + "learning_rate": 1.8036464140612943e-06, + "loss": 0.0015, + "num_input_tokens_seen": 38990560, + "step": 57840 + }, + { + "epoch": 1.4131629736398506, + "grad_norm": 0.014206153340637684, + "learning_rate": 1.8035956618356546e-06, + "loss": 0.0598, + "num_input_tokens_seen": 38994208, + "step": 57845 + }, + { + "epoch": 1.4132851244716975, + "grad_norm": 327.2905578613281, + "learning_rate": 1.8035449037660508e-06, + "loss": 0.124, + "num_input_tokens_seen": 38997600, + "step": 57850 + }, + { + "epoch": 1.4134072753035447, + "grad_norm": 0.030538057908415794, + "learning_rate": 1.8034941398528525e-06, + "loss": 0.0878, + "num_input_tokens_seen": 39000992, + "step": 57855 + }, + { + "epoch": 1.413529426135392, + "grad_norm": 0.0992370992898941, + "learning_rate": 1.8034433700964287e-06, + "loss": 0.0991, + "num_input_tokens_seen": 39004192, + "step": 57860 + }, + { + "epoch": 1.413651576967239, + "grad_norm": 1.4549504518508911, + "learning_rate": 1.8033925944971484e-06, + "loss": 0.0457, + "num_input_tokens_seen": 39007648, + "step": 57865 + }, + { + "epoch": 1.4137737277990863, + "grad_norm": 0.08177211880683899, + "learning_rate": 1.8033418130553812e-06, + "loss": 0.1027, + "num_input_tokens_seen": 39011104, + "step": 57870 + }, + { + "epoch": 1.4138958786309335, + "grad_norm": 0.17393673956394196, + "learning_rate": 1.8032910257714966e-06, + "loss": 0.0397, + "num_input_tokens_seen": 39014624, + "step": 57875 + }, + { + "epoch": 1.4140180294627807, + "grad_norm": 0.10555342584848404, + "learning_rate": 1.803240232645863e-06, + "loss": 0.1179, + "num_input_tokens_seen": 39017696, + "step": 57880 + }, + { + "epoch": 1.4141401802946278, + "grad_norm": 0.06716419011354446, + "learning_rate": 1.803189433678851e-06, + "loss": 0.0424, + "num_input_tokens_seen": 39020896, + "step": 57885 + }, + { + "epoch": 1.414262331126475, + "grad_norm": 17.76363182067871, + "learning_rate": 1.803138628870829e-06, + "loss": 0.0841, + "num_input_tokens_seen": 39024864, + "step": 57890 + }, + { + "epoch": 1.4143844819583222, + "grad_norm": 0.2943059802055359, + "learning_rate": 1.803087818222167e-06, + "loss": 0.1301, + "num_input_tokens_seen": 39028896, + "step": 57895 + }, + { + "epoch": 1.4145066327901694, + "grad_norm": 14.014280319213867, + "learning_rate": 1.803037001733234e-06, + "loss": 0.11, + "num_input_tokens_seen": 39032224, + "step": 57900 + }, + { + "epoch": 1.4146287836220164, + "grad_norm": 0.12080468982458115, + "learning_rate": 1.8029861794044005e-06, + "loss": 0.0814, + "num_input_tokens_seen": 39035936, + "step": 57905 + }, + { + "epoch": 1.4147509344538636, + "grad_norm": 0.2700631022453308, + "learning_rate": 1.8029353512360354e-06, + "loss": 0.0946, + "num_input_tokens_seen": 39039200, + "step": 57910 + }, + { + "epoch": 1.4148730852857108, + "grad_norm": 9.89631462097168, + "learning_rate": 1.8028845172285083e-06, + "loss": 0.1305, + "num_input_tokens_seen": 39042464, + "step": 57915 + }, + { + "epoch": 1.414995236117558, + "grad_norm": 0.3272474706172943, + "learning_rate": 1.802833677382189e-06, + "loss": 0.0931, + "num_input_tokens_seen": 39045472, + "step": 57920 + }, + { + "epoch": 1.4151173869494051, + "grad_norm": 18.658485412597656, + "learning_rate": 1.8027828316974476e-06, + "loss": 0.1595, + "num_input_tokens_seen": 39048544, + "step": 57925 + }, + { + "epoch": 1.4152395377812523, + "grad_norm": 21.56070327758789, + "learning_rate": 1.8027319801746532e-06, + "loss": 0.21, + "num_input_tokens_seen": 39051936, + "step": 57930 + }, + { + "epoch": 1.4153616886130995, + "grad_norm": 0.30690234899520874, + "learning_rate": 1.8026811228141762e-06, + "loss": 0.13, + "num_input_tokens_seen": 39055648, + "step": 57935 + }, + { + "epoch": 1.4154838394449465, + "grad_norm": 0.8390116095542908, + "learning_rate": 1.8026302596163857e-06, + "loss": 0.0517, + "num_input_tokens_seen": 39058656, + "step": 57940 + }, + { + "epoch": 1.4156059902767937, + "grad_norm": 98.68684387207031, + "learning_rate": 1.8025793905816523e-06, + "loss": 0.045, + "num_input_tokens_seen": 39061920, + "step": 57945 + }, + { + "epoch": 1.4157281411086409, + "grad_norm": 0.09113955497741699, + "learning_rate": 1.802528515710346e-06, + "loss": 0.1386, + "num_input_tokens_seen": 39064864, + "step": 57950 + }, + { + "epoch": 1.415850291940488, + "grad_norm": 8.854911804199219, + "learning_rate": 1.8024776350028363e-06, + "loss": 0.1239, + "num_input_tokens_seen": 39068448, + "step": 57955 + }, + { + "epoch": 1.4159724427723352, + "grad_norm": 24.10318374633789, + "learning_rate": 1.8024267484594933e-06, + "loss": 0.1009, + "num_input_tokens_seen": 39072480, + "step": 57960 + }, + { + "epoch": 1.4160945936041824, + "grad_norm": 0.13761503994464874, + "learning_rate": 1.8023758560806873e-06, + "loss": 0.0409, + "num_input_tokens_seen": 39075744, + "step": 57965 + }, + { + "epoch": 1.4162167444360296, + "grad_norm": 0.2878974676132202, + "learning_rate": 1.802324957866788e-06, + "loss": 0.0018, + "num_input_tokens_seen": 39079328, + "step": 57970 + }, + { + "epoch": 1.4163388952678768, + "grad_norm": 0.13723404705524445, + "learning_rate": 1.8022740538181662e-06, + "loss": 0.016, + "num_input_tokens_seen": 39082592, + "step": 57975 + }, + { + "epoch": 1.416461046099724, + "grad_norm": 57.850948333740234, + "learning_rate": 1.8022231439351914e-06, + "loss": 0.2262, + "num_input_tokens_seen": 39085920, + "step": 57980 + }, + { + "epoch": 1.4165831969315712, + "grad_norm": 0.20545895397663116, + "learning_rate": 1.8021722282182342e-06, + "loss": 0.0397, + "num_input_tokens_seen": 39088928, + "step": 57985 + }, + { + "epoch": 1.4167053477634184, + "grad_norm": 1.8310734033584595, + "learning_rate": 1.802121306667665e-06, + "loss": 0.1332, + "num_input_tokens_seen": 39092064, + "step": 57990 + }, + { + "epoch": 1.4168274985952654, + "grad_norm": 0.4682214856147766, + "learning_rate": 1.8020703792838535e-06, + "loss": 0.1581, + "num_input_tokens_seen": 39095456, + "step": 57995 + }, + { + "epoch": 1.4169496494271125, + "grad_norm": 0.3079153001308441, + "learning_rate": 1.8020194460671707e-06, + "loss": 0.0014, + "num_input_tokens_seen": 39098336, + "step": 58000 + }, + { + "epoch": 1.4170718002589597, + "grad_norm": 0.013312513008713722, + "learning_rate": 1.8019685070179868e-06, + "loss": 0.0703, + "num_input_tokens_seen": 39101856, + "step": 58005 + }, + { + "epoch": 1.417193951090807, + "grad_norm": 15.343265533447266, + "learning_rate": 1.8019175621366722e-06, + "loss": 0.078, + "num_input_tokens_seen": 39104800, + "step": 58010 + }, + { + "epoch": 1.4173161019226541, + "grad_norm": 0.2046901136636734, + "learning_rate": 1.8018666114235973e-06, + "loss": 0.0502, + "num_input_tokens_seen": 39108192, + "step": 58015 + }, + { + "epoch": 1.4174382527545013, + "grad_norm": 22.222043991088867, + "learning_rate": 1.801815654879133e-06, + "loss": 0.0477, + "num_input_tokens_seen": 39111648, + "step": 58020 + }, + { + "epoch": 1.4175604035863485, + "grad_norm": 9.82966423034668, + "learning_rate": 1.8017646925036495e-06, + "loss": 0.0822, + "num_input_tokens_seen": 39114976, + "step": 58025 + }, + { + "epoch": 1.4176825544181955, + "grad_norm": 0.5586780905723572, + "learning_rate": 1.8017137242975174e-06, + "loss": 0.1607, + "num_input_tokens_seen": 39118688, + "step": 58030 + }, + { + "epoch": 1.4178047052500427, + "grad_norm": 0.5261523127555847, + "learning_rate": 1.8016627502611072e-06, + "loss": 0.1481, + "num_input_tokens_seen": 39121824, + "step": 58035 + }, + { + "epoch": 1.4179268560818898, + "grad_norm": 37.271671295166016, + "learning_rate": 1.8016117703947902e-06, + "loss": 0.1372, + "num_input_tokens_seen": 39125280, + "step": 58040 + }, + { + "epoch": 1.418049006913737, + "grad_norm": 0.5471317768096924, + "learning_rate": 1.8015607846989367e-06, + "loss": 0.0425, + "num_input_tokens_seen": 39128416, + "step": 58045 + }, + { + "epoch": 1.4181711577455842, + "grad_norm": 20.493061065673828, + "learning_rate": 1.8015097931739175e-06, + "loss": 0.067, + "num_input_tokens_seen": 39131296, + "step": 58050 + }, + { + "epoch": 1.4182933085774314, + "grad_norm": 0.3601451814174652, + "learning_rate": 1.8014587958201038e-06, + "loss": 0.118, + "num_input_tokens_seen": 39134304, + "step": 58055 + }, + { + "epoch": 1.4184154594092786, + "grad_norm": 31.987966537475586, + "learning_rate": 1.801407792637866e-06, + "loss": 0.0955, + "num_input_tokens_seen": 39138208, + "step": 58060 + }, + { + "epoch": 1.4185376102411258, + "grad_norm": 0.3367749750614166, + "learning_rate": 1.801356783627575e-06, + "loss": 0.0852, + "num_input_tokens_seen": 39141344, + "step": 58065 + }, + { + "epoch": 1.418659761072973, + "grad_norm": 108.01628112792969, + "learning_rate": 1.8013057687896022e-06, + "loss": 0.131, + "num_input_tokens_seen": 39144672, + "step": 58070 + }, + { + "epoch": 1.4187819119048202, + "grad_norm": 0.4019928574562073, + "learning_rate": 1.8012547481243182e-06, + "loss": 0.1219, + "num_input_tokens_seen": 39147744, + "step": 58075 + }, + { + "epoch": 1.4189040627366674, + "grad_norm": 0.054344676434993744, + "learning_rate": 1.8012037216320942e-06, + "loss": 0.0932, + "num_input_tokens_seen": 39151392, + "step": 58080 + }, + { + "epoch": 1.4190262135685143, + "grad_norm": 0.14216575026512146, + "learning_rate": 1.8011526893133012e-06, + "loss": 0.1037, + "num_input_tokens_seen": 39154464, + "step": 58085 + }, + { + "epoch": 1.4191483644003615, + "grad_norm": 10.013678550720215, + "learning_rate": 1.8011016511683103e-06, + "loss": 0.0417, + "num_input_tokens_seen": 39157920, + "step": 58090 + }, + { + "epoch": 1.4192705152322087, + "grad_norm": 21.88975715637207, + "learning_rate": 1.8010506071974926e-06, + "loss": 0.2024, + "num_input_tokens_seen": 39161312, + "step": 58095 + }, + { + "epoch": 1.419392666064056, + "grad_norm": 25.340179443359375, + "learning_rate": 1.8009995574012198e-06, + "loss": 0.1039, + "num_input_tokens_seen": 39164448, + "step": 58100 + }, + { + "epoch": 1.419514816895903, + "grad_norm": 21.190763473510742, + "learning_rate": 1.8009485017798624e-06, + "loss": 0.1066, + "num_input_tokens_seen": 39167776, + "step": 58105 + }, + { + "epoch": 1.4196369677277503, + "grad_norm": 0.21470917761325836, + "learning_rate": 1.8008974403337924e-06, + "loss": 0.1594, + "num_input_tokens_seen": 39171104, + "step": 58110 + }, + { + "epoch": 1.4197591185595975, + "grad_norm": 1.8665906190872192, + "learning_rate": 1.8008463730633807e-06, + "loss": 0.0865, + "num_input_tokens_seen": 39174560, + "step": 58115 + }, + { + "epoch": 1.4198812693914444, + "grad_norm": 0.051145486533641815, + "learning_rate": 1.8007952999689989e-06, + "loss": 0.0939, + "num_input_tokens_seen": 39177760, + "step": 58120 + }, + { + "epoch": 1.4200034202232916, + "grad_norm": 1.2939541339874268, + "learning_rate": 1.800744221051018e-06, + "loss": 0.049, + "num_input_tokens_seen": 39181152, + "step": 58125 + }, + { + "epoch": 1.4201255710551388, + "grad_norm": 11.657868385314941, + "learning_rate": 1.80069313630981e-06, + "loss": 0.1063, + "num_input_tokens_seen": 39184544, + "step": 58130 + }, + { + "epoch": 1.420247721886986, + "grad_norm": 0.22248221933841705, + "learning_rate": 1.8006420457457457e-06, + "loss": 0.0017, + "num_input_tokens_seen": 39188000, + "step": 58135 + }, + { + "epoch": 1.4203698727188332, + "grad_norm": 16.86621856689453, + "learning_rate": 1.8005909493591975e-06, + "loss": 0.1047, + "num_input_tokens_seen": 39190816, + "step": 58140 + }, + { + "epoch": 1.4204920235506804, + "grad_norm": 0.03387094661593437, + "learning_rate": 1.8005398471505364e-06, + "loss": 0.0524, + "num_input_tokens_seen": 39194208, + "step": 58145 + }, + { + "epoch": 1.4206141743825276, + "grad_norm": 0.009047556668519974, + "learning_rate": 1.8004887391201343e-06, + "loss": 0.2904, + "num_input_tokens_seen": 39198048, + "step": 58150 + }, + { + "epoch": 1.4207363252143748, + "grad_norm": 0.5070739388465881, + "learning_rate": 1.8004376252683629e-06, + "loss": 0.0442, + "num_input_tokens_seen": 39201376, + "step": 58155 + }, + { + "epoch": 1.420858476046222, + "grad_norm": 0.357709676027298, + "learning_rate": 1.8003865055955938e-06, + "loss": 0.061, + "num_input_tokens_seen": 39204960, + "step": 58160 + }, + { + "epoch": 1.4209806268780691, + "grad_norm": 2.56449556350708, + "learning_rate": 1.8003353801021985e-06, + "loss": 0.0012, + "num_input_tokens_seen": 39208544, + "step": 58165 + }, + { + "epoch": 1.4211027777099163, + "grad_norm": 16.43491554260254, + "learning_rate": 1.8002842487885493e-06, + "loss": 0.1808, + "num_input_tokens_seen": 39211808, + "step": 58170 + }, + { + "epoch": 1.4212249285417633, + "grad_norm": 0.1726624220609665, + "learning_rate": 1.8002331116550176e-06, + "loss": 0.0492, + "num_input_tokens_seen": 39215264, + "step": 58175 + }, + { + "epoch": 1.4213470793736105, + "grad_norm": 0.0881342962384224, + "learning_rate": 1.8001819687019758e-06, + "loss": 0.0165, + "num_input_tokens_seen": 39218400, + "step": 58180 + }, + { + "epoch": 1.4214692302054577, + "grad_norm": 0.7044225931167603, + "learning_rate": 1.800130819929795e-06, + "loss": 0.11, + "num_input_tokens_seen": 39221856, + "step": 58185 + }, + { + "epoch": 1.4215913810373049, + "grad_norm": 0.49497997760772705, + "learning_rate": 1.800079665338848e-06, + "loss": 0.0348, + "num_input_tokens_seen": 39225184, + "step": 58190 + }, + { + "epoch": 1.421713531869152, + "grad_norm": 12.24195671081543, + "learning_rate": 1.8000285049295066e-06, + "loss": 0.1837, + "num_input_tokens_seen": 39228128, + "step": 58195 + }, + { + "epoch": 1.4218356827009992, + "grad_norm": 4.202094078063965, + "learning_rate": 1.7999773387021423e-06, + "loss": 0.0519, + "num_input_tokens_seen": 39231584, + "step": 58200 + }, + { + "epoch": 1.4219578335328464, + "grad_norm": 0.598829448223114, + "learning_rate": 1.7999261666571281e-06, + "loss": 0.2783, + "num_input_tokens_seen": 39234720, + "step": 58205 + }, + { + "epoch": 1.4220799843646934, + "grad_norm": 0.19293074309825897, + "learning_rate": 1.7998749887948352e-06, + "loss": 0.1263, + "num_input_tokens_seen": 39238560, + "step": 58210 + }, + { + "epoch": 1.4222021351965406, + "grad_norm": 0.10075836628675461, + "learning_rate": 1.7998238051156367e-06, + "loss": 0.0268, + "num_input_tokens_seen": 39242272, + "step": 58215 + }, + { + "epoch": 1.4223242860283878, + "grad_norm": 0.21634642779827118, + "learning_rate": 1.799772615619904e-06, + "loss": 0.0969, + "num_input_tokens_seen": 39245600, + "step": 58220 + }, + { + "epoch": 1.422446436860235, + "grad_norm": 0.28032878041267395, + "learning_rate": 1.79972142030801e-06, + "loss": 0.0824, + "num_input_tokens_seen": 39249248, + "step": 58225 + }, + { + "epoch": 1.4225685876920822, + "grad_norm": 0.16427673399448395, + "learning_rate": 1.7996702191803265e-06, + "loss": 0.0967, + "num_input_tokens_seen": 39252512, + "step": 58230 + }, + { + "epoch": 1.4226907385239294, + "grad_norm": 0.6388681530952454, + "learning_rate": 1.7996190122372262e-06, + "loss": 0.0322, + "num_input_tokens_seen": 39255904, + "step": 58235 + }, + { + "epoch": 1.4228128893557765, + "grad_norm": 0.6887816190719604, + "learning_rate": 1.7995677994790813e-06, + "loss": 0.0035, + "num_input_tokens_seen": 39259104, + "step": 58240 + }, + { + "epoch": 1.4229350401876237, + "grad_norm": 0.16875456273555756, + "learning_rate": 1.7995165809062644e-06, + "loss": 0.1021, + "num_input_tokens_seen": 39262368, + "step": 58245 + }, + { + "epoch": 1.423057191019471, + "grad_norm": 16.76205825805664, + "learning_rate": 1.7994653565191478e-06, + "loss": 0.1678, + "num_input_tokens_seen": 39266016, + "step": 58250 + }, + { + "epoch": 1.4231793418513181, + "grad_norm": 22.382341384887695, + "learning_rate": 1.799414126318104e-06, + "loss": 0.2146, + "num_input_tokens_seen": 39269472, + "step": 58255 + }, + { + "epoch": 1.4233014926831653, + "grad_norm": 0.4394358694553375, + "learning_rate": 1.7993628903035058e-06, + "loss": 0.0499, + "num_input_tokens_seen": 39272800, + "step": 58260 + }, + { + "epoch": 1.4234236435150123, + "grad_norm": 1.7339340448379517, + "learning_rate": 1.7993116484757259e-06, + "loss": 0.1058, + "num_input_tokens_seen": 39276320, + "step": 58265 + }, + { + "epoch": 1.4235457943468595, + "grad_norm": 0.19091808795928955, + "learning_rate": 1.7992604008351364e-06, + "loss": 0.0026, + "num_input_tokens_seen": 39279776, + "step": 58270 + }, + { + "epoch": 1.4236679451787067, + "grad_norm": 33.9887809753418, + "learning_rate": 1.7992091473821102e-06, + "loss": 0.0919, + "num_input_tokens_seen": 39283360, + "step": 58275 + }, + { + "epoch": 1.4237900960105538, + "grad_norm": 107.04004669189453, + "learning_rate": 1.7991578881170203e-06, + "loss": 0.2179, + "num_input_tokens_seen": 39286880, + "step": 58280 + }, + { + "epoch": 1.423912246842401, + "grad_norm": 19.60547637939453, + "learning_rate": 1.7991066230402392e-06, + "loss": 0.1943, + "num_input_tokens_seen": 39290016, + "step": 58285 + }, + { + "epoch": 1.4240343976742482, + "grad_norm": 0.8691688776016235, + "learning_rate": 1.79905535215214e-06, + "loss": 0.0399, + "num_input_tokens_seen": 39293408, + "step": 58290 + }, + { + "epoch": 1.4241565485060952, + "grad_norm": 0.0896698608994484, + "learning_rate": 1.799004075453095e-06, + "loss": 0.1184, + "num_input_tokens_seen": 39297056, + "step": 58295 + }, + { + "epoch": 1.4242786993379424, + "grad_norm": 0.5838584303855896, + "learning_rate": 1.7989527929434777e-06, + "loss": 0.0444, + "num_input_tokens_seen": 39300128, + "step": 58300 + }, + { + "epoch": 1.4244008501697896, + "grad_norm": 11.378144264221191, + "learning_rate": 1.7989015046236608e-06, + "loss": 0.1591, + "num_input_tokens_seen": 39303456, + "step": 58305 + }, + { + "epoch": 1.4245230010016368, + "grad_norm": 20.538105010986328, + "learning_rate": 1.798850210494017e-06, + "loss": 0.101, + "num_input_tokens_seen": 39306848, + "step": 58310 + }, + { + "epoch": 1.424645151833484, + "grad_norm": 13.059782981872559, + "learning_rate": 1.79879891055492e-06, + "loss": 0.0354, + "num_input_tokens_seen": 39309984, + "step": 58315 + }, + { + "epoch": 1.4247673026653311, + "grad_norm": 41.89487075805664, + "learning_rate": 1.7987476048067425e-06, + "loss": 0.0058, + "num_input_tokens_seen": 39313120, + "step": 58320 + }, + { + "epoch": 1.4248894534971783, + "grad_norm": 38.37577438354492, + "learning_rate": 1.7986962932498572e-06, + "loss": 0.113, + "num_input_tokens_seen": 39316512, + "step": 58325 + }, + { + "epoch": 1.4250116043290255, + "grad_norm": 13.37543773651123, + "learning_rate": 1.7986449758846378e-06, + "loss": 0.1018, + "num_input_tokens_seen": 39319712, + "step": 58330 + }, + { + "epoch": 1.4251337551608727, + "grad_norm": 47.16141891479492, + "learning_rate": 1.7985936527114576e-06, + "loss": 0.1428, + "num_input_tokens_seen": 39322912, + "step": 58335 + }, + { + "epoch": 1.42525590599272, + "grad_norm": 0.9388689398765564, + "learning_rate": 1.798542323730689e-06, + "loss": 0.0386, + "num_input_tokens_seen": 39326304, + "step": 58340 + }, + { + "epoch": 1.425378056824567, + "grad_norm": 54.705787658691406, + "learning_rate": 1.7984909889427065e-06, + "loss": 0.1189, + "num_input_tokens_seen": 39329696, + "step": 58345 + }, + { + "epoch": 1.4255002076564143, + "grad_norm": 0.6181700825691223, + "learning_rate": 1.798439648347882e-06, + "loss": 0.0391, + "num_input_tokens_seen": 39332896, + "step": 58350 + }, + { + "epoch": 1.4256223584882612, + "grad_norm": 0.873293936252594, + "learning_rate": 1.7983883019465905e-06, + "loss": 0.0033, + "num_input_tokens_seen": 39336416, + "step": 58355 + }, + { + "epoch": 1.4257445093201084, + "grad_norm": 78.40597534179688, + "learning_rate": 1.7983369497392038e-06, + "loss": 0.1611, + "num_input_tokens_seen": 39339552, + "step": 58360 + }, + { + "epoch": 1.4258666601519556, + "grad_norm": 0.04144344478845596, + "learning_rate": 1.7982855917260965e-06, + "loss": 0.0557, + "num_input_tokens_seen": 39343008, + "step": 58365 + }, + { + "epoch": 1.4259888109838028, + "grad_norm": 23.530664443969727, + "learning_rate": 1.7982342279076415e-06, + "loss": 0.0352, + "num_input_tokens_seen": 39346848, + "step": 58370 + }, + { + "epoch": 1.42611096181565, + "grad_norm": 0.06019030511379242, + "learning_rate": 1.7981828582842122e-06, + "loss": 0.0015, + "num_input_tokens_seen": 39351584, + "step": 58375 + }, + { + "epoch": 1.4262331126474972, + "grad_norm": 99.20355987548828, + "learning_rate": 1.7981314828561829e-06, + "loss": 0.1958, + "num_input_tokens_seen": 39354976, + "step": 58380 + }, + { + "epoch": 1.4263552634793442, + "grad_norm": 0.35224005579948425, + "learning_rate": 1.7980801016239267e-06, + "loss": 0.0402, + "num_input_tokens_seen": 39357984, + "step": 58385 + }, + { + "epoch": 1.4264774143111913, + "grad_norm": 102.94215393066406, + "learning_rate": 1.7980287145878173e-06, + "loss": 0.1442, + "num_input_tokens_seen": 39361440, + "step": 58390 + }, + { + "epoch": 1.4265995651430385, + "grad_norm": 16.342992782592773, + "learning_rate": 1.7979773217482284e-06, + "loss": 0.1693, + "num_input_tokens_seen": 39364256, + "step": 58395 + }, + { + "epoch": 1.4267217159748857, + "grad_norm": 0.09097766876220703, + "learning_rate": 1.7979259231055338e-06, + "loss": 0.0666, + "num_input_tokens_seen": 39367712, + "step": 58400 + }, + { + "epoch": 1.426843866806733, + "grad_norm": 0.2545751929283142, + "learning_rate": 1.7978745186601075e-06, + "loss": 0.1043, + "num_input_tokens_seen": 39370656, + "step": 58405 + }, + { + "epoch": 1.42696601763858, + "grad_norm": 151.2428436279297, + "learning_rate": 1.7978231084123229e-06, + "loss": 0.0842, + "num_input_tokens_seen": 39374048, + "step": 58410 + }, + { + "epoch": 1.4270881684704273, + "grad_norm": 0.01164193358272314, + "learning_rate": 1.7977716923625538e-06, + "loss": 0.0402, + "num_input_tokens_seen": 39377568, + "step": 58415 + }, + { + "epoch": 1.4272103193022745, + "grad_norm": 0.5238284468650818, + "learning_rate": 1.7977202705111746e-06, + "loss": 0.0249, + "num_input_tokens_seen": 39381280, + "step": 58420 + }, + { + "epoch": 1.4273324701341217, + "grad_norm": 36.011661529541016, + "learning_rate": 1.7976688428585592e-06, + "loss": 0.2239, + "num_input_tokens_seen": 39384608, + "step": 58425 + }, + { + "epoch": 1.4274546209659689, + "grad_norm": 18.976518630981445, + "learning_rate": 1.7976174094050813e-06, + "loss": 0.0534, + "num_input_tokens_seen": 39388192, + "step": 58430 + }, + { + "epoch": 1.427576771797816, + "grad_norm": 14.480353355407715, + "learning_rate": 1.797565970151115e-06, + "loss": 0.103, + "num_input_tokens_seen": 39391520, + "step": 58435 + }, + { + "epoch": 1.427698922629663, + "grad_norm": 0.8790879845619202, + "learning_rate": 1.7975145250970346e-06, + "loss": 0.1048, + "num_input_tokens_seen": 39394784, + "step": 58440 + }, + { + "epoch": 1.4278210734615102, + "grad_norm": 0.1189805343747139, + "learning_rate": 1.797463074243214e-06, + "loss": 0.1401, + "num_input_tokens_seen": 39398048, + "step": 58445 + }, + { + "epoch": 1.4279432242933574, + "grad_norm": 59.93393325805664, + "learning_rate": 1.7974116175900273e-06, + "loss": 0.0479, + "num_input_tokens_seen": 39401568, + "step": 58450 + }, + { + "epoch": 1.4280653751252046, + "grad_norm": 22.313800811767578, + "learning_rate": 1.797360155137849e-06, + "loss": 0.0029, + "num_input_tokens_seen": 39404576, + "step": 58455 + }, + { + "epoch": 1.4281875259570518, + "grad_norm": 0.21892331540584564, + "learning_rate": 1.797308686887053e-06, + "loss": 0.1259, + "num_input_tokens_seen": 39407712, + "step": 58460 + }, + { + "epoch": 1.428309676788899, + "grad_norm": 0.3004763722419739, + "learning_rate": 1.797257212838014e-06, + "loss": 0.0671, + "num_input_tokens_seen": 39410976, + "step": 58465 + }, + { + "epoch": 1.4284318276207462, + "grad_norm": 0.31758275628089905, + "learning_rate": 1.797205732991106e-06, + "loss": 0.0049, + "num_input_tokens_seen": 39414432, + "step": 58470 + }, + { + "epoch": 1.4285539784525931, + "grad_norm": 16.61806297302246, + "learning_rate": 1.7971542473467036e-06, + "loss": 0.1524, + "num_input_tokens_seen": 39417760, + "step": 58475 + }, + { + "epoch": 1.4286761292844403, + "grad_norm": 9.997937202453613, + "learning_rate": 1.797102755905181e-06, + "loss": 0.097, + "num_input_tokens_seen": 39420960, + "step": 58480 + }, + { + "epoch": 1.4287982801162875, + "grad_norm": 12.6223726272583, + "learning_rate": 1.7970512586669128e-06, + "loss": 0.0374, + "num_input_tokens_seen": 39424224, + "step": 58485 + }, + { + "epoch": 1.4289204309481347, + "grad_norm": 3.157579183578491, + "learning_rate": 1.7969997556322736e-06, + "loss": 0.0026, + "num_input_tokens_seen": 39427872, + "step": 58490 + }, + { + "epoch": 1.429042581779982, + "grad_norm": 2.2822046279907227, + "learning_rate": 1.7969482468016377e-06, + "loss": 0.132, + "num_input_tokens_seen": 39431520, + "step": 58495 + }, + { + "epoch": 1.429164732611829, + "grad_norm": 0.06637066602706909, + "learning_rate": 1.7968967321753796e-06, + "loss": 0.0397, + "num_input_tokens_seen": 39435552, + "step": 58500 + }, + { + "epoch": 1.4292868834436763, + "grad_norm": 0.06641530990600586, + "learning_rate": 1.7968452117538742e-06, + "loss": 0.0854, + "num_input_tokens_seen": 39439200, + "step": 58505 + }, + { + "epoch": 1.4294090342755235, + "grad_norm": 1.0716360807418823, + "learning_rate": 1.7967936855374964e-06, + "loss": 0.0433, + "num_input_tokens_seen": 39442400, + "step": 58510 + }, + { + "epoch": 1.4295311851073706, + "grad_norm": 24.701417922973633, + "learning_rate": 1.7967421535266203e-06, + "loss": 0.1447, + "num_input_tokens_seen": 39445600, + "step": 58515 + }, + { + "epoch": 1.4296533359392178, + "grad_norm": 8.375645637512207, + "learning_rate": 1.796690615721621e-06, + "loss": 0.0557, + "num_input_tokens_seen": 39449056, + "step": 58520 + }, + { + "epoch": 1.429775486771065, + "grad_norm": 0.015680750831961632, + "learning_rate": 1.7966390721228733e-06, + "loss": 0.0828, + "num_input_tokens_seen": 39452768, + "step": 58525 + }, + { + "epoch": 1.429897637602912, + "grad_norm": 0.13656115531921387, + "learning_rate": 1.7965875227307522e-06, + "loss": 0.1749, + "num_input_tokens_seen": 39456352, + "step": 58530 + }, + { + "epoch": 1.4300197884347592, + "grad_norm": 39.53352737426758, + "learning_rate": 1.796535967545632e-06, + "loss": 0.1555, + "num_input_tokens_seen": 39459360, + "step": 58535 + }, + { + "epoch": 1.4301419392666064, + "grad_norm": 0.15889528393745422, + "learning_rate": 1.7964844065678882e-06, + "loss": 0.0359, + "num_input_tokens_seen": 39462944, + "step": 58540 + }, + { + "epoch": 1.4302640900984536, + "grad_norm": 13.821887969970703, + "learning_rate": 1.7964328397978954e-06, + "loss": 0.0913, + "num_input_tokens_seen": 39466336, + "step": 58545 + }, + { + "epoch": 1.4303862409303008, + "grad_norm": 30.154619216918945, + "learning_rate": 1.796381267236029e-06, + "loss": 0.0912, + "num_input_tokens_seen": 39469664, + "step": 58550 + }, + { + "epoch": 1.430508391762148, + "grad_norm": 13.006333351135254, + "learning_rate": 1.7963296888826638e-06, + "loss": 0.1042, + "num_input_tokens_seen": 39473056, + "step": 58555 + }, + { + "epoch": 1.4306305425939951, + "grad_norm": 3.447408676147461, + "learning_rate": 1.796278104738175e-06, + "loss": 0.0864, + "num_input_tokens_seen": 39476384, + "step": 58560 + }, + { + "epoch": 1.430752693425842, + "grad_norm": 0.3481005132198334, + "learning_rate": 1.7962265148029374e-06, + "loss": 0.0593, + "num_input_tokens_seen": 39479520, + "step": 58565 + }, + { + "epoch": 1.4308748442576893, + "grad_norm": 0.144733265042305, + "learning_rate": 1.7961749190773263e-06, + "loss": 0.1574, + "num_input_tokens_seen": 39483040, + "step": 58570 + }, + { + "epoch": 1.4309969950895365, + "grad_norm": 52.843868255615234, + "learning_rate": 1.7961233175617173e-06, + "loss": 0.1276, + "num_input_tokens_seen": 39486560, + "step": 58575 + }, + { + "epoch": 1.4311191459213837, + "grad_norm": 0.256605327129364, + "learning_rate": 1.7960717102564855e-06, + "loss": 0.0025, + "num_input_tokens_seen": 39489888, + "step": 58580 + }, + { + "epoch": 1.4312412967532309, + "grad_norm": 0.4557490646839142, + "learning_rate": 1.796020097162006e-06, + "loss": 0.1429, + "num_input_tokens_seen": 39493344, + "step": 58585 + }, + { + "epoch": 1.431363447585078, + "grad_norm": 89.59080505371094, + "learning_rate": 1.7959684782786542e-06, + "loss": 0.0477, + "num_input_tokens_seen": 39496608, + "step": 58590 + }, + { + "epoch": 1.4314855984169252, + "grad_norm": 0.42434826493263245, + "learning_rate": 1.7959168536068056e-06, + "loss": 0.0153, + "num_input_tokens_seen": 39499552, + "step": 58595 + }, + { + "epoch": 1.4316077492487724, + "grad_norm": 36.179927825927734, + "learning_rate": 1.7958652231468357e-06, + "loss": 0.1024, + "num_input_tokens_seen": 39503008, + "step": 58600 + }, + { + "epoch": 1.4317299000806196, + "grad_norm": 0.0644034817814827, + "learning_rate": 1.7958135868991195e-06, + "loss": 0.0562, + "num_input_tokens_seen": 39506400, + "step": 58605 + }, + { + "epoch": 1.4318520509124668, + "grad_norm": 0.04763718321919441, + "learning_rate": 1.7957619448640332e-06, + "loss": 0.1308, + "num_input_tokens_seen": 39510240, + "step": 58610 + }, + { + "epoch": 1.431974201744314, + "grad_norm": 18.100360870361328, + "learning_rate": 1.7957102970419516e-06, + "loss": 0.0285, + "num_input_tokens_seen": 39513504, + "step": 58615 + }, + { + "epoch": 1.432096352576161, + "grad_norm": 32.79466247558594, + "learning_rate": 1.795658643433251e-06, + "loss": 0.0449, + "num_input_tokens_seen": 39516960, + "step": 58620 + }, + { + "epoch": 1.4322185034080082, + "grad_norm": 0.055818889290094376, + "learning_rate": 1.7956069840383066e-06, + "loss": 0.0842, + "num_input_tokens_seen": 39520544, + "step": 58625 + }, + { + "epoch": 1.4323406542398553, + "grad_norm": 0.12097417563199997, + "learning_rate": 1.7955553188574944e-06, + "loss": 0.0956, + "num_input_tokens_seen": 39524256, + "step": 58630 + }, + { + "epoch": 1.4324628050717025, + "grad_norm": 0.05628423020243645, + "learning_rate": 1.7955036478911896e-06, + "loss": 0.1319, + "num_input_tokens_seen": 39528032, + "step": 58635 + }, + { + "epoch": 1.4325849559035497, + "grad_norm": 21.71570587158203, + "learning_rate": 1.7954519711397689e-06, + "loss": 0.0841, + "num_input_tokens_seen": 39531424, + "step": 58640 + }, + { + "epoch": 1.432707106735397, + "grad_norm": 0.11519166082143784, + "learning_rate": 1.795400288603607e-06, + "loss": 0.0688, + "num_input_tokens_seen": 39535200, + "step": 58645 + }, + { + "epoch": 1.432829257567244, + "grad_norm": 0.2426871657371521, + "learning_rate": 1.7953486002830802e-06, + "loss": 0.0019, + "num_input_tokens_seen": 39538720, + "step": 58650 + }, + { + "epoch": 1.432951408399091, + "grad_norm": 33.8471794128418, + "learning_rate": 1.7952969061785647e-06, + "loss": 0.008, + "num_input_tokens_seen": 39541920, + "step": 58655 + }, + { + "epoch": 1.4330735592309383, + "grad_norm": 63.138633728027344, + "learning_rate": 1.7952452062904362e-06, + "loss": 0.0869, + "num_input_tokens_seen": 39545312, + "step": 58660 + }, + { + "epoch": 1.4331957100627855, + "grad_norm": 0.07256802171468735, + "learning_rate": 1.7951935006190709e-06, + "loss": 0.0162, + "num_input_tokens_seen": 39548384, + "step": 58665 + }, + { + "epoch": 1.4333178608946326, + "grad_norm": 0.044713884592056274, + "learning_rate": 1.795141789164844e-06, + "loss": 0.0794, + "num_input_tokens_seen": 39552096, + "step": 58670 + }, + { + "epoch": 1.4334400117264798, + "grad_norm": 145.95172119140625, + "learning_rate": 1.7950900719281326e-06, + "loss": 0.0993, + "num_input_tokens_seen": 39555744, + "step": 58675 + }, + { + "epoch": 1.433562162558327, + "grad_norm": 25.724279403686523, + "learning_rate": 1.7950383489093118e-06, + "loss": 0.1404, + "num_input_tokens_seen": 39559200, + "step": 58680 + }, + { + "epoch": 1.4336843133901742, + "grad_norm": 0.8542588353157043, + "learning_rate": 1.7949866201087592e-06, + "loss": 0.0497, + "num_input_tokens_seen": 39562592, + "step": 58685 + }, + { + "epoch": 1.4338064642220214, + "grad_norm": 20.692792892456055, + "learning_rate": 1.7949348855268494e-06, + "loss": 0.0657, + "num_input_tokens_seen": 39565728, + "step": 58690 + }, + { + "epoch": 1.4339286150538686, + "grad_norm": 9.70878791809082, + "learning_rate": 1.7948831451639594e-06, + "loss": 0.0124, + "num_input_tokens_seen": 39569056, + "step": 58695 + }, + { + "epoch": 1.4340507658857158, + "grad_norm": 20.545381546020508, + "learning_rate": 1.7948313990204654e-06, + "loss": 0.0726, + "num_input_tokens_seen": 39572000, + "step": 58700 + }, + { + "epoch": 1.434172916717563, + "grad_norm": 0.12542176246643066, + "learning_rate": 1.7947796470967438e-06, + "loss": 0.0827, + "num_input_tokens_seen": 39575328, + "step": 58705 + }, + { + "epoch": 1.43429506754941, + "grad_norm": 0.3020426332950592, + "learning_rate": 1.7947278893931705e-06, + "loss": 0.085, + "num_input_tokens_seen": 39578656, + "step": 58710 + }, + { + "epoch": 1.4344172183812571, + "grad_norm": 0.0259727593511343, + "learning_rate": 1.7946761259101226e-06, + "loss": 0.0675, + "num_input_tokens_seen": 39582176, + "step": 58715 + }, + { + "epoch": 1.4345393692131043, + "grad_norm": 0.03346165642142296, + "learning_rate": 1.7946243566479762e-06, + "loss": 0.0015, + "num_input_tokens_seen": 39585824, + "step": 58720 + }, + { + "epoch": 1.4346615200449515, + "grad_norm": 0.2484491467475891, + "learning_rate": 1.7945725816071074e-06, + "loss": 0.0597, + "num_input_tokens_seen": 39588640, + "step": 58725 + }, + { + "epoch": 1.4347836708767987, + "grad_norm": 179.8692626953125, + "learning_rate": 1.794520800787893e-06, + "loss": 0.1699, + "num_input_tokens_seen": 39591840, + "step": 58730 + }, + { + "epoch": 1.434905821708646, + "grad_norm": 0.08298604935407639, + "learning_rate": 1.79446901419071e-06, + "loss": 0.0763, + "num_input_tokens_seen": 39595232, + "step": 58735 + }, + { + "epoch": 1.435027972540493, + "grad_norm": 1.8074345588684082, + "learning_rate": 1.7944172218159348e-06, + "loss": 0.0246, + "num_input_tokens_seen": 39598560, + "step": 58740 + }, + { + "epoch": 1.43515012337234, + "grad_norm": 33.335609436035156, + "learning_rate": 1.7943654236639436e-06, + "loss": 0.1222, + "num_input_tokens_seen": 39601632, + "step": 58745 + }, + { + "epoch": 1.4352722742041872, + "grad_norm": 0.4874430000782013, + "learning_rate": 1.7943136197351135e-06, + "loss": 0.0024, + "num_input_tokens_seen": 39605472, + "step": 58750 + }, + { + "epoch": 1.4353944250360344, + "grad_norm": 37.11099624633789, + "learning_rate": 1.794261810029821e-06, + "loss": 0.1361, + "num_input_tokens_seen": 39609056, + "step": 58755 + }, + { + "epoch": 1.4355165758678816, + "grad_norm": 0.23305939137935638, + "learning_rate": 1.794209994548443e-06, + "loss": 0.0786, + "num_input_tokens_seen": 39611936, + "step": 58760 + }, + { + "epoch": 1.4356387266997288, + "grad_norm": 10.859920501708984, + "learning_rate": 1.7941581732913562e-06, + "loss": 0.1309, + "num_input_tokens_seen": 39615520, + "step": 58765 + }, + { + "epoch": 1.435760877531576, + "grad_norm": 0.14673854410648346, + "learning_rate": 1.7941063462589376e-06, + "loss": 0.0376, + "num_input_tokens_seen": 39619296, + "step": 58770 + }, + { + "epoch": 1.4358830283634232, + "grad_norm": 0.037890564650297165, + "learning_rate": 1.7940545134515642e-06, + "loss": 0.1143, + "num_input_tokens_seen": 39622560, + "step": 58775 + }, + { + "epoch": 1.4360051791952704, + "grad_norm": 0.8310354948043823, + "learning_rate": 1.7940026748696128e-06, + "loss": 0.0009, + "num_input_tokens_seen": 39625888, + "step": 58780 + }, + { + "epoch": 1.4361273300271176, + "grad_norm": 2.9553492069244385, + "learning_rate": 1.7939508305134604e-06, + "loss": 0.0466, + "num_input_tokens_seen": 39629984, + "step": 58785 + }, + { + "epoch": 1.4362494808589648, + "grad_norm": 0.17864832282066345, + "learning_rate": 1.7938989803834838e-06, + "loss": 0.1889, + "num_input_tokens_seen": 39633312, + "step": 58790 + }, + { + "epoch": 1.436371631690812, + "grad_norm": 2.7538199424743652, + "learning_rate": 1.7938471244800603e-06, + "loss": 0.118, + "num_input_tokens_seen": 39636640, + "step": 58795 + }, + { + "epoch": 1.436493782522659, + "grad_norm": 0.39362823963165283, + "learning_rate": 1.7937952628035673e-06, + "loss": 0.0903, + "num_input_tokens_seen": 39639968, + "step": 58800 + }, + { + "epoch": 1.436615933354506, + "grad_norm": 0.21269036829471588, + "learning_rate": 1.7937433953543815e-06, + "loss": 0.0207, + "num_input_tokens_seen": 39643296, + "step": 58805 + }, + { + "epoch": 1.4367380841863533, + "grad_norm": 0.2682448923587799, + "learning_rate": 1.79369152213288e-06, + "loss": 0.1242, + "num_input_tokens_seen": 39647008, + "step": 58810 + }, + { + "epoch": 1.4368602350182005, + "grad_norm": 22.306591033935547, + "learning_rate": 1.7936396431394405e-06, + "loss": 0.0988, + "num_input_tokens_seen": 39650464, + "step": 58815 + }, + { + "epoch": 1.4369823858500477, + "grad_norm": 0.32385772466659546, + "learning_rate": 1.7935877583744402e-06, + "loss": 0.064, + "num_input_tokens_seen": 39653856, + "step": 58820 + }, + { + "epoch": 1.4371045366818949, + "grad_norm": 0.8138442039489746, + "learning_rate": 1.7935358678382563e-06, + "loss": 0.0035, + "num_input_tokens_seen": 39657184, + "step": 58825 + }, + { + "epoch": 1.437226687513742, + "grad_norm": 0.2751285135746002, + "learning_rate": 1.7934839715312657e-06, + "loss": 0.0999, + "num_input_tokens_seen": 39660384, + "step": 58830 + }, + { + "epoch": 1.437348838345589, + "grad_norm": 2.161569118499756, + "learning_rate": 1.7934320694538462e-06, + "loss": 0.1024, + "num_input_tokens_seen": 39663840, + "step": 58835 + }, + { + "epoch": 1.4374709891774362, + "grad_norm": 8.404568672180176, + "learning_rate": 1.7933801616063756e-06, + "loss": 0.0798, + "num_input_tokens_seen": 39666912, + "step": 58840 + }, + { + "epoch": 1.4375931400092834, + "grad_norm": 0.01838994212448597, + "learning_rate": 1.793328247989231e-06, + "loss": 0.0659, + "num_input_tokens_seen": 39670176, + "step": 58845 + }, + { + "epoch": 1.4377152908411306, + "grad_norm": 2.829005241394043, + "learning_rate": 1.7932763286027903e-06, + "loss": 0.0839, + "num_input_tokens_seen": 39673248, + "step": 58850 + }, + { + "epoch": 1.4378374416729778, + "grad_norm": 4.462026596069336, + "learning_rate": 1.7932244034474305e-06, + "loss": 0.0521, + "num_input_tokens_seen": 39677024, + "step": 58855 + }, + { + "epoch": 1.437959592504825, + "grad_norm": 14.132698059082031, + "learning_rate": 1.7931724725235294e-06, + "loss": 0.0915, + "num_input_tokens_seen": 39680032, + "step": 58860 + }, + { + "epoch": 1.4380817433366722, + "grad_norm": 0.03028332069516182, + "learning_rate": 1.7931205358314648e-06, + "loss": 0.0043, + "num_input_tokens_seen": 39683424, + "step": 58865 + }, + { + "epoch": 1.4382038941685193, + "grad_norm": 0.847586989402771, + "learning_rate": 1.7930685933716142e-06, + "loss": 0.0966, + "num_input_tokens_seen": 39686816, + "step": 58870 + }, + { + "epoch": 1.4383260450003665, + "grad_norm": 43.78037643432617, + "learning_rate": 1.7930166451443558e-06, + "loss": 0.1053, + "num_input_tokens_seen": 39690016, + "step": 58875 + }, + { + "epoch": 1.4384481958322137, + "grad_norm": 61.2498664855957, + "learning_rate": 1.7929646911500669e-06, + "loss": 0.1942, + "num_input_tokens_seen": 39693792, + "step": 58880 + }, + { + "epoch": 1.438570346664061, + "grad_norm": 0.8849436044692993, + "learning_rate": 1.7929127313891254e-06, + "loss": 0.069, + "num_input_tokens_seen": 39696992, + "step": 58885 + }, + { + "epoch": 1.4386924974959079, + "grad_norm": 15.736291885375977, + "learning_rate": 1.7928607658619095e-06, + "loss": 0.1596, + "num_input_tokens_seen": 39700192, + "step": 58890 + }, + { + "epoch": 1.438814648327755, + "grad_norm": 0.7857063412666321, + "learning_rate": 1.7928087945687963e-06, + "loss": 0.0381, + "num_input_tokens_seen": 39703264, + "step": 58895 + }, + { + "epoch": 1.4389367991596023, + "grad_norm": 29.244619369506836, + "learning_rate": 1.7927568175101652e-06, + "loss": 0.0723, + "num_input_tokens_seen": 39706208, + "step": 58900 + }, + { + "epoch": 1.4390589499914495, + "grad_norm": 10.40878677368164, + "learning_rate": 1.7927048346863925e-06, + "loss": 0.1089, + "num_input_tokens_seen": 39709280, + "step": 58905 + }, + { + "epoch": 1.4391811008232966, + "grad_norm": 11.405935287475586, + "learning_rate": 1.7926528460978573e-06, + "loss": 0.0524, + "num_input_tokens_seen": 39712800, + "step": 58910 + }, + { + "epoch": 1.4393032516551438, + "grad_norm": 2.9893431663513184, + "learning_rate": 1.7926008517449373e-06, + "loss": 0.0369, + "num_input_tokens_seen": 39715872, + "step": 58915 + }, + { + "epoch": 1.4394254024869908, + "grad_norm": 2.8281633853912354, + "learning_rate": 1.7925488516280113e-06, + "loss": 0.0941, + "num_input_tokens_seen": 39718944, + "step": 58920 + }, + { + "epoch": 1.439547553318838, + "grad_norm": 0.636328399181366, + "learning_rate": 1.7924968457474563e-06, + "loss": 0.0318, + "num_input_tokens_seen": 39722144, + "step": 58925 + }, + { + "epoch": 1.4396697041506852, + "grad_norm": 8.355908393859863, + "learning_rate": 1.7924448341036512e-06, + "loss": 0.0685, + "num_input_tokens_seen": 39725280, + "step": 58930 + }, + { + "epoch": 1.4397918549825324, + "grad_norm": 0.20460966229438782, + "learning_rate": 1.792392816696974e-06, + "loss": 0.0387, + "num_input_tokens_seen": 39729440, + "step": 58935 + }, + { + "epoch": 1.4399140058143796, + "grad_norm": 0.26626622676849365, + "learning_rate": 1.7923407935278032e-06, + "loss": 0.0736, + "num_input_tokens_seen": 39732896, + "step": 58940 + }, + { + "epoch": 1.4400361566462268, + "grad_norm": 10.729291915893555, + "learning_rate": 1.7922887645965173e-06, + "loss": 0.0424, + "num_input_tokens_seen": 39736032, + "step": 58945 + }, + { + "epoch": 1.440158307478074, + "grad_norm": 0.5907509326934814, + "learning_rate": 1.792236729903494e-06, + "loss": 0.1568, + "num_input_tokens_seen": 39739040, + "step": 58950 + }, + { + "epoch": 1.4402804583099211, + "grad_norm": 0.06551672518253326, + "learning_rate": 1.7921846894491121e-06, + "loss": 0.0775, + "num_input_tokens_seen": 39742496, + "step": 58955 + }, + { + "epoch": 1.4404026091417683, + "grad_norm": 0.35346364974975586, + "learning_rate": 1.7921326432337505e-06, + "loss": 0.1183, + "num_input_tokens_seen": 39745568, + "step": 58960 + }, + { + "epoch": 1.4405247599736155, + "grad_norm": 0.16065485775470734, + "learning_rate": 1.792080591257787e-06, + "loss": 0.0617, + "num_input_tokens_seen": 39748960, + "step": 58965 + }, + { + "epoch": 1.4406469108054627, + "grad_norm": 0.12054590880870819, + "learning_rate": 1.7920285335216004e-06, + "loss": 0.0014, + "num_input_tokens_seen": 39752224, + "step": 58970 + }, + { + "epoch": 1.4407690616373097, + "grad_norm": 0.05816735327243805, + "learning_rate": 1.7919764700255693e-06, + "loss": 0.02, + "num_input_tokens_seen": 39755616, + "step": 58975 + }, + { + "epoch": 1.4408912124691569, + "grad_norm": 26.84071922302246, + "learning_rate": 1.7919244007700725e-06, + "loss": 0.1716, + "num_input_tokens_seen": 39758816, + "step": 58980 + }, + { + "epoch": 1.441013363301004, + "grad_norm": 0.4252214729785919, + "learning_rate": 1.791872325755488e-06, + "loss": 0.0015, + "num_input_tokens_seen": 39761952, + "step": 58985 + }, + { + "epoch": 1.4411355141328512, + "grad_norm": 15.643749237060547, + "learning_rate": 1.7918202449821954e-06, + "loss": 0.3353, + "num_input_tokens_seen": 39765024, + "step": 58990 + }, + { + "epoch": 1.4412576649646984, + "grad_norm": 1.339182734489441, + "learning_rate": 1.7917681584505727e-06, + "loss": 0.1574, + "num_input_tokens_seen": 39768352, + "step": 58995 + }, + { + "epoch": 1.4413798157965456, + "grad_norm": 0.2828620374202728, + "learning_rate": 1.791716066160999e-06, + "loss": 0.0455, + "num_input_tokens_seen": 39772576, + "step": 59000 + }, + { + "epoch": 1.4415019666283928, + "grad_norm": 1.1742899417877197, + "learning_rate": 1.7916639681138532e-06, + "loss": 0.0619, + "num_input_tokens_seen": 39775456, + "step": 59005 + }, + { + "epoch": 1.4416241174602398, + "grad_norm": 10.849560737609863, + "learning_rate": 1.791611864309514e-06, + "loss": 0.0899, + "num_input_tokens_seen": 39779104, + "step": 59010 + }, + { + "epoch": 1.441746268292087, + "grad_norm": 12.845623016357422, + "learning_rate": 1.7915597547483606e-06, + "loss": 0.058, + "num_input_tokens_seen": 39782432, + "step": 59015 + }, + { + "epoch": 1.4418684191239342, + "grad_norm": 10.224414825439453, + "learning_rate": 1.7915076394307717e-06, + "loss": 0.1444, + "num_input_tokens_seen": 39785696, + "step": 59020 + }, + { + "epoch": 1.4419905699557813, + "grad_norm": 12.651299476623535, + "learning_rate": 1.7914555183571266e-06, + "loss": 0.0745, + "num_input_tokens_seen": 39788832, + "step": 59025 + }, + { + "epoch": 1.4421127207876285, + "grad_norm": 0.08147825300693512, + "learning_rate": 1.7914033915278036e-06, + "loss": 0.0778, + "num_input_tokens_seen": 39792032, + "step": 59030 + }, + { + "epoch": 1.4422348716194757, + "grad_norm": 11.500874519348145, + "learning_rate": 1.7913512589431825e-06, + "loss": 0.0806, + "num_input_tokens_seen": 39795232, + "step": 59035 + }, + { + "epoch": 1.442357022451323, + "grad_norm": 10.94676399230957, + "learning_rate": 1.7912991206036421e-06, + "loss": 0.0474, + "num_input_tokens_seen": 39798880, + "step": 59040 + }, + { + "epoch": 1.44247917328317, + "grad_norm": 0.040244702249765396, + "learning_rate": 1.791246976509562e-06, + "loss": 0.0484, + "num_input_tokens_seen": 39802144, + "step": 59045 + }, + { + "epoch": 1.4426013241150173, + "grad_norm": 43.799583435058594, + "learning_rate": 1.7911948266613205e-06, + "loss": 0.2378, + "num_input_tokens_seen": 39805472, + "step": 59050 + }, + { + "epoch": 1.4427234749468645, + "grad_norm": 150.8211669921875, + "learning_rate": 1.791142671059298e-06, + "loss": 0.1259, + "num_input_tokens_seen": 39808864, + "step": 59055 + }, + { + "epoch": 1.4428456257787117, + "grad_norm": 0.9879307746887207, + "learning_rate": 1.7910905097038728e-06, + "loss": 0.0342, + "num_input_tokens_seen": 39812320, + "step": 59060 + }, + { + "epoch": 1.4429677766105586, + "grad_norm": 31.04475212097168, + "learning_rate": 1.7910383425954248e-06, + "loss": 0.1464, + "num_input_tokens_seen": 39815904, + "step": 59065 + }, + { + "epoch": 1.4430899274424058, + "grad_norm": 32.14227294921875, + "learning_rate": 1.7909861697343333e-06, + "loss": 0.0687, + "num_input_tokens_seen": 39818976, + "step": 59070 + }, + { + "epoch": 1.443212078274253, + "grad_norm": 0.20150291919708252, + "learning_rate": 1.7909339911209775e-06, + "loss": 0.0014, + "num_input_tokens_seen": 39822624, + "step": 59075 + }, + { + "epoch": 1.4433342291061002, + "grad_norm": 140.92352294921875, + "learning_rate": 1.790881806755737e-06, + "loss": 0.1436, + "num_input_tokens_seen": 39825824, + "step": 59080 + }, + { + "epoch": 1.4434563799379474, + "grad_norm": 0.2697312533855438, + "learning_rate": 1.7908296166389914e-06, + "loss": 0.0311, + "num_input_tokens_seen": 39829472, + "step": 59085 + }, + { + "epoch": 1.4435785307697946, + "grad_norm": 15.324634552001953, + "learning_rate": 1.7907774207711199e-06, + "loss": 0.0496, + "num_input_tokens_seen": 39832800, + "step": 59090 + }, + { + "epoch": 1.4437006816016418, + "grad_norm": 14.72012996673584, + "learning_rate": 1.7907252191525023e-06, + "loss": 0.1983, + "num_input_tokens_seen": 39836640, + "step": 59095 + }, + { + "epoch": 1.4438228324334887, + "grad_norm": 0.4023982584476471, + "learning_rate": 1.7906730117835185e-06, + "loss": 0.0758, + "num_input_tokens_seen": 39840224, + "step": 59100 + }, + { + "epoch": 1.443944983265336, + "grad_norm": 11.866134643554688, + "learning_rate": 1.7906207986645477e-06, + "loss": 0.0342, + "num_input_tokens_seen": 39843872, + "step": 59105 + }, + { + "epoch": 1.4440671340971831, + "grad_norm": 19.953330993652344, + "learning_rate": 1.7905685797959697e-06, + "loss": 0.103, + "num_input_tokens_seen": 39846944, + "step": 59110 + }, + { + "epoch": 1.4441892849290303, + "grad_norm": 0.373773992061615, + "learning_rate": 1.7905163551781643e-06, + "loss": 0.1281, + "num_input_tokens_seen": 39851296, + "step": 59115 + }, + { + "epoch": 1.4443114357608775, + "grad_norm": 0.054920095950365067, + "learning_rate": 1.7904641248115117e-06, + "loss": 0.0785, + "num_input_tokens_seen": 39854880, + "step": 59120 + }, + { + "epoch": 1.4444335865927247, + "grad_norm": 24.973628997802734, + "learning_rate": 1.7904118886963913e-06, + "loss": 0.0942, + "num_input_tokens_seen": 39858016, + "step": 59125 + }, + { + "epoch": 1.4445557374245719, + "grad_norm": 0.299342542886734, + "learning_rate": 1.7903596468331829e-06, + "loss": 0.0749, + "num_input_tokens_seen": 39861408, + "step": 59130 + }, + { + "epoch": 1.444677888256419, + "grad_norm": 124.8744125366211, + "learning_rate": 1.7903073992222666e-06, + "loss": 0.0521, + "num_input_tokens_seen": 39864608, + "step": 59135 + }, + { + "epoch": 1.4448000390882663, + "grad_norm": 0.42814555764198303, + "learning_rate": 1.7902551458640222e-06, + "loss": 0.2063, + "num_input_tokens_seen": 39868000, + "step": 59140 + }, + { + "epoch": 1.4449221899201135, + "grad_norm": 19.024675369262695, + "learning_rate": 1.79020288675883e-06, + "loss": 0.1936, + "num_input_tokens_seen": 39871520, + "step": 59145 + }, + { + "epoch": 1.4450443407519606, + "grad_norm": 0.2149042934179306, + "learning_rate": 1.79015062190707e-06, + "loss": 0.1798, + "num_input_tokens_seen": 39874656, + "step": 59150 + }, + { + "epoch": 1.4451664915838076, + "grad_norm": 0.3116861581802368, + "learning_rate": 1.7900983513091218e-06, + "loss": 0.0061, + "num_input_tokens_seen": 39877728, + "step": 59155 + }, + { + "epoch": 1.4452886424156548, + "grad_norm": 13.638267517089844, + "learning_rate": 1.790046074965366e-06, + "loss": 0.0904, + "num_input_tokens_seen": 39880992, + "step": 59160 + }, + { + "epoch": 1.445410793247502, + "grad_norm": 0.12967146933078766, + "learning_rate": 1.7899937928761829e-06, + "loss": 0.1306, + "num_input_tokens_seen": 39884192, + "step": 59165 + }, + { + "epoch": 1.4455329440793492, + "grad_norm": 8.077123641967773, + "learning_rate": 1.789941505041952e-06, + "loss": 0.161, + "num_input_tokens_seen": 39887136, + "step": 59170 + }, + { + "epoch": 1.4456550949111964, + "grad_norm": 0.0998186245560646, + "learning_rate": 1.7898892114630542e-06, + "loss": 0.0333, + "num_input_tokens_seen": 39890720, + "step": 59175 + }, + { + "epoch": 1.4457772457430436, + "grad_norm": 0.12306854128837585, + "learning_rate": 1.78983691213987e-06, + "loss": 0.1145, + "num_input_tokens_seen": 39894176, + "step": 59180 + }, + { + "epoch": 1.4458993965748907, + "grad_norm": 67.00183868408203, + "learning_rate": 1.789784607072779e-06, + "loss": 0.0209, + "num_input_tokens_seen": 39897376, + "step": 59185 + }, + { + "epoch": 1.4460215474067377, + "grad_norm": 0.15235014259815216, + "learning_rate": 1.7897322962621616e-06, + "loss": 0.044, + "num_input_tokens_seen": 39900320, + "step": 59190 + }, + { + "epoch": 1.446143698238585, + "grad_norm": 69.80928802490234, + "learning_rate": 1.789679979708399e-06, + "loss": 0.0499, + "num_input_tokens_seen": 39903520, + "step": 59195 + }, + { + "epoch": 1.446265849070432, + "grad_norm": 0.33987465500831604, + "learning_rate": 1.7896276574118709e-06, + "loss": 0.2074, + "num_input_tokens_seen": 39907552, + "step": 59200 + }, + { + "epoch": 1.4463879999022793, + "grad_norm": 59.96290969848633, + "learning_rate": 1.7895753293729583e-06, + "loss": 0.104, + "num_input_tokens_seen": 39911392, + "step": 59205 + }, + { + "epoch": 1.4465101507341265, + "grad_norm": 40.25039291381836, + "learning_rate": 1.7895229955920414e-06, + "loss": 0.1042, + "num_input_tokens_seen": 39914912, + "step": 59210 + }, + { + "epoch": 1.4466323015659737, + "grad_norm": 0.11362236738204956, + "learning_rate": 1.789470656069501e-06, + "loss": 0.0912, + "num_input_tokens_seen": 39918176, + "step": 59215 + }, + { + "epoch": 1.4467544523978209, + "grad_norm": 0.18816527724266052, + "learning_rate": 1.7894183108057175e-06, + "loss": 0.0032, + "num_input_tokens_seen": 39921504, + "step": 59220 + }, + { + "epoch": 1.446876603229668, + "grad_norm": 16.205690383911133, + "learning_rate": 1.789365959801072e-06, + "loss": 0.0899, + "num_input_tokens_seen": 39924576, + "step": 59225 + }, + { + "epoch": 1.4469987540615152, + "grad_norm": 0.13214744627475739, + "learning_rate": 1.7893136030559445e-06, + "loss": 0.0011, + "num_input_tokens_seen": 39927968, + "step": 59230 + }, + { + "epoch": 1.4471209048933624, + "grad_norm": 52.55016326904297, + "learning_rate": 1.7892612405707168e-06, + "loss": 0.1049, + "num_input_tokens_seen": 39931424, + "step": 59235 + }, + { + "epoch": 1.4472430557252096, + "grad_norm": 0.0789654478430748, + "learning_rate": 1.7892088723457685e-06, + "loss": 0.0006, + "num_input_tokens_seen": 39934944, + "step": 59240 + }, + { + "epoch": 1.4473652065570566, + "grad_norm": 0.0595073476433754, + "learning_rate": 1.7891564983814813e-06, + "loss": 0.0215, + "num_input_tokens_seen": 39938272, + "step": 59245 + }, + { + "epoch": 1.4474873573889038, + "grad_norm": 0.2962166368961334, + "learning_rate": 1.7891041186782356e-06, + "loss": 0.1159, + "num_input_tokens_seen": 39941280, + "step": 59250 + }, + { + "epoch": 1.447609508220751, + "grad_norm": 0.09220468997955322, + "learning_rate": 1.7890517332364125e-06, + "loss": 0.0907, + "num_input_tokens_seen": 39944672, + "step": 59255 + }, + { + "epoch": 1.4477316590525982, + "grad_norm": 0.18392859399318695, + "learning_rate": 1.7889993420563934e-06, + "loss": 0.0762, + "num_input_tokens_seen": 39948064, + "step": 59260 + }, + { + "epoch": 1.4478538098844453, + "grad_norm": 0.08621831238269806, + "learning_rate": 1.7889469451385586e-06, + "loss": 0.0961, + "num_input_tokens_seen": 39951008, + "step": 59265 + }, + { + "epoch": 1.4479759607162925, + "grad_norm": 19.898418426513672, + "learning_rate": 1.7888945424832893e-06, + "loss": 0.1437, + "num_input_tokens_seen": 39954912, + "step": 59270 + }, + { + "epoch": 1.4480981115481397, + "grad_norm": 0.03776702284812927, + "learning_rate": 1.7888421340909666e-06, + "loss": 0.1528, + "num_input_tokens_seen": 39957984, + "step": 59275 + }, + { + "epoch": 1.4482202623799867, + "grad_norm": 46.947853088378906, + "learning_rate": 1.788789719961972e-06, + "loss": 0.1918, + "num_input_tokens_seen": 39960928, + "step": 59280 + }, + { + "epoch": 1.4483424132118339, + "grad_norm": 0.1397467702627182, + "learning_rate": 1.7887373000966864e-06, + "loss": 0.0038, + "num_input_tokens_seen": 39964448, + "step": 59285 + }, + { + "epoch": 1.448464564043681, + "grad_norm": 0.8697220087051392, + "learning_rate": 1.7886848744954909e-06, + "loss": 0.0225, + "num_input_tokens_seen": 39967840, + "step": 59290 + }, + { + "epoch": 1.4485867148755283, + "grad_norm": 12.394600868225098, + "learning_rate": 1.7886324431587668e-06, + "loss": 0.1449, + "num_input_tokens_seen": 39970848, + "step": 59295 + }, + { + "epoch": 1.4487088657073754, + "grad_norm": 0.15624871850013733, + "learning_rate": 1.7885800060868954e-06, + "loss": 0.1448, + "num_input_tokens_seen": 39974048, + "step": 59300 + }, + { + "epoch": 1.4488310165392226, + "grad_norm": 0.13877400755882263, + "learning_rate": 1.788527563280258e-06, + "loss": 0.0019, + "num_input_tokens_seen": 39977056, + "step": 59305 + }, + { + "epoch": 1.4489531673710698, + "grad_norm": 0.24705491960048676, + "learning_rate": 1.7884751147392364e-06, + "loss": 0.0009, + "num_input_tokens_seen": 39980384, + "step": 59310 + }, + { + "epoch": 1.449075318202917, + "grad_norm": 0.1556907594203949, + "learning_rate": 1.7884226604642117e-06, + "loss": 0.0523, + "num_input_tokens_seen": 39983712, + "step": 59315 + }, + { + "epoch": 1.4491974690347642, + "grad_norm": 0.7603535652160645, + "learning_rate": 1.7883702004555652e-06, + "loss": 0.1469, + "num_input_tokens_seen": 39986976, + "step": 59320 + }, + { + "epoch": 1.4493196198666114, + "grad_norm": 2.096450090408325, + "learning_rate": 1.7883177347136785e-06, + "loss": 0.0092, + "num_input_tokens_seen": 39990240, + "step": 59325 + }, + { + "epoch": 1.4494417706984586, + "grad_norm": 0.38189834356307983, + "learning_rate": 1.7882652632389332e-06, + "loss": 0.0009, + "num_input_tokens_seen": 39993440, + "step": 59330 + }, + { + "epoch": 1.4495639215303056, + "grad_norm": 0.08693571388721466, + "learning_rate": 1.788212786031711e-06, + "loss": 0.1739, + "num_input_tokens_seen": 39996768, + "step": 59335 + }, + { + "epoch": 1.4496860723621527, + "grad_norm": 11.836777687072754, + "learning_rate": 1.7881603030923935e-06, + "loss": 0.1538, + "num_input_tokens_seen": 40000608, + "step": 59340 + }, + { + "epoch": 1.449808223194, + "grad_norm": 0.20686087012290955, + "learning_rate": 1.788107814421362e-06, + "loss": 0.0392, + "num_input_tokens_seen": 40003872, + "step": 59345 + }, + { + "epoch": 1.4499303740258471, + "grad_norm": 0.1668231338262558, + "learning_rate": 1.7880553200189987e-06, + "loss": 0.085, + "num_input_tokens_seen": 40007328, + "step": 59350 + }, + { + "epoch": 1.4500525248576943, + "grad_norm": 0.15712271630764008, + "learning_rate": 1.7880028198856852e-06, + "loss": 0.0583, + "num_input_tokens_seen": 40011168, + "step": 59355 + }, + { + "epoch": 1.4501746756895415, + "grad_norm": 0.5437976121902466, + "learning_rate": 1.787950314021803e-06, + "loss": 0.0058, + "num_input_tokens_seen": 40014432, + "step": 59360 + }, + { + "epoch": 1.4502968265213887, + "grad_norm": 0.09233890473842621, + "learning_rate": 1.7878978024277344e-06, + "loss": 0.0448, + "num_input_tokens_seen": 40017888, + "step": 59365 + }, + { + "epoch": 1.4504189773532357, + "grad_norm": 0.13593538105487823, + "learning_rate": 1.7878452851038612e-06, + "loss": 0.0014, + "num_input_tokens_seen": 40021472, + "step": 59370 + }, + { + "epoch": 1.4505411281850829, + "grad_norm": 22.736013412475586, + "learning_rate": 1.7877927620505648e-06, + "loss": 0.0793, + "num_input_tokens_seen": 40024480, + "step": 59375 + }, + { + "epoch": 1.45066327901693, + "grad_norm": 2.9554624557495117, + "learning_rate": 1.7877402332682278e-06, + "loss": 0.0604, + "num_input_tokens_seen": 40028320, + "step": 59380 + }, + { + "epoch": 1.4507854298487772, + "grad_norm": 0.04954606294631958, + "learning_rate": 1.787687698757232e-06, + "loss": 0.0002, + "num_input_tokens_seen": 40031584, + "step": 59385 + }, + { + "epoch": 1.4509075806806244, + "grad_norm": 0.10807470232248306, + "learning_rate": 1.7876351585179593e-06, + "loss": 0.0494, + "num_input_tokens_seen": 40034976, + "step": 59390 + }, + { + "epoch": 1.4510297315124716, + "grad_norm": 8.378605842590332, + "learning_rate": 1.7875826125507917e-06, + "loss": 0.1674, + "num_input_tokens_seen": 40038176, + "step": 59395 + }, + { + "epoch": 1.4511518823443188, + "grad_norm": 38.70036697387695, + "learning_rate": 1.787530060856112e-06, + "loss": 0.1429, + "num_input_tokens_seen": 40041312, + "step": 59400 + }, + { + "epoch": 1.451274033176166, + "grad_norm": 0.11730583012104034, + "learning_rate": 1.7874775034343012e-06, + "loss": 0.005, + "num_input_tokens_seen": 40044576, + "step": 59405 + }, + { + "epoch": 1.4513961840080132, + "grad_norm": 12.826897621154785, + "learning_rate": 1.7874249402857426e-06, + "loss": 0.0218, + "num_input_tokens_seen": 40047584, + "step": 59410 + }, + { + "epoch": 1.4515183348398604, + "grad_norm": 0.23351694643497467, + "learning_rate": 1.787372371410818e-06, + "loss": 0.016, + "num_input_tokens_seen": 40051168, + "step": 59415 + }, + { + "epoch": 1.4516404856717076, + "grad_norm": 0.27510517835617065, + "learning_rate": 1.7873197968099097e-06, + "loss": 0.056, + "num_input_tokens_seen": 40054816, + "step": 59420 + }, + { + "epoch": 1.4517626365035545, + "grad_norm": 0.12724097073078156, + "learning_rate": 1.7872672164834e-06, + "loss": 0.0509, + "num_input_tokens_seen": 40058016, + "step": 59425 + }, + { + "epoch": 1.4518847873354017, + "grad_norm": 186.72894287109375, + "learning_rate": 1.7872146304316714e-06, + "loss": 0.3248, + "num_input_tokens_seen": 40061792, + "step": 59430 + }, + { + "epoch": 1.452006938167249, + "grad_norm": 0.6554990410804749, + "learning_rate": 1.7871620386551065e-06, + "loss": 0.1667, + "num_input_tokens_seen": 40064928, + "step": 59435 + }, + { + "epoch": 1.452129088999096, + "grad_norm": 0.29013246297836304, + "learning_rate": 1.7871094411540872e-06, + "loss": 0.0771, + "num_input_tokens_seen": 40068384, + "step": 59440 + }, + { + "epoch": 1.4522512398309433, + "grad_norm": 0.043106433004140854, + "learning_rate": 1.7870568379289965e-06, + "loss": 0.0294, + "num_input_tokens_seen": 40071648, + "step": 59445 + }, + { + "epoch": 1.4523733906627905, + "grad_norm": 0.246318057179451, + "learning_rate": 1.787004228980217e-06, + "loss": 0.0534, + "num_input_tokens_seen": 40074976, + "step": 59450 + }, + { + "epoch": 1.4524955414946374, + "grad_norm": 0.21862973272800446, + "learning_rate": 1.7869516143081307e-06, + "loss": 0.1416, + "num_input_tokens_seen": 40078688, + "step": 59455 + }, + { + "epoch": 1.4526176923264846, + "grad_norm": 1.7106530666351318, + "learning_rate": 1.7868989939131204e-06, + "loss": 0.0022, + "num_input_tokens_seen": 40081888, + "step": 59460 + }, + { + "epoch": 1.4527398431583318, + "grad_norm": 0.24451476335525513, + "learning_rate": 1.7868463677955697e-06, + "loss": 0.0479, + "num_input_tokens_seen": 40085088, + "step": 59465 + }, + { + "epoch": 1.452861993990179, + "grad_norm": 20.032474517822266, + "learning_rate": 1.78679373595586e-06, + "loss": 0.2927, + "num_input_tokens_seen": 40088352, + "step": 59470 + }, + { + "epoch": 1.4529841448220262, + "grad_norm": 42.15612030029297, + "learning_rate": 1.786741098394375e-06, + "loss": 0.1888, + "num_input_tokens_seen": 40091744, + "step": 59475 + }, + { + "epoch": 1.4531062956538734, + "grad_norm": 0.5244346857070923, + "learning_rate": 1.7866884551114968e-06, + "loss": 0.1454, + "num_input_tokens_seen": 40095072, + "step": 59480 + }, + { + "epoch": 1.4532284464857206, + "grad_norm": 0.15843136608600616, + "learning_rate": 1.7866358061076086e-06, + "loss": 0.0011, + "num_input_tokens_seen": 40098464, + "step": 59485 + }, + { + "epoch": 1.4533505973175678, + "grad_norm": 57.28195571899414, + "learning_rate": 1.7865831513830933e-06, + "loss": 0.1509, + "num_input_tokens_seen": 40101728, + "step": 59490 + }, + { + "epoch": 1.453472748149415, + "grad_norm": 15.683917999267578, + "learning_rate": 1.7865304909383338e-06, + "loss": 0.1103, + "num_input_tokens_seen": 40104736, + "step": 59495 + }, + { + "epoch": 1.4535948989812622, + "grad_norm": 64.17626190185547, + "learning_rate": 1.786477824773713e-06, + "loss": 0.0494, + "num_input_tokens_seen": 40108576, + "step": 59500 + }, + { + "epoch": 1.4537170498131093, + "grad_norm": 0.3834023177623749, + "learning_rate": 1.7864251528896139e-06, + "loss": 0.0026, + "num_input_tokens_seen": 40111584, + "step": 59505 + }, + { + "epoch": 1.4538392006449565, + "grad_norm": 0.7233591079711914, + "learning_rate": 1.7863724752864195e-06, + "loss": 0.0802, + "num_input_tokens_seen": 40115232, + "step": 59510 + }, + { + "epoch": 1.4539613514768035, + "grad_norm": 0.16249462962150574, + "learning_rate": 1.7863197919645133e-06, + "loss": 0.0333, + "num_input_tokens_seen": 40118432, + "step": 59515 + }, + { + "epoch": 1.4540835023086507, + "grad_norm": 31.28944206237793, + "learning_rate": 1.7862671029242775e-06, + "loss": 0.0861, + "num_input_tokens_seen": 40121760, + "step": 59520 + }, + { + "epoch": 1.4542056531404979, + "grad_norm": 0.1898259073495865, + "learning_rate": 1.7862144081660963e-06, + "loss": 0.0021, + "num_input_tokens_seen": 40125600, + "step": 59525 + }, + { + "epoch": 1.454327803972345, + "grad_norm": 0.05062809959053993, + "learning_rate": 1.7861617076903524e-06, + "loss": 0.0012, + "num_input_tokens_seen": 40128736, + "step": 59530 + }, + { + "epoch": 1.4544499548041923, + "grad_norm": 0.07426691800355911, + "learning_rate": 1.7861090014974289e-06, + "loss": 0.001, + "num_input_tokens_seen": 40132320, + "step": 59535 + }, + { + "epoch": 1.4545721056360394, + "grad_norm": 49.9919319152832, + "learning_rate": 1.7860562895877097e-06, + "loss": 0.0529, + "num_input_tokens_seen": 40135840, + "step": 59540 + }, + { + "epoch": 1.4546942564678864, + "grad_norm": 0.09730231761932373, + "learning_rate": 1.786003571961577e-06, + "loss": 0.0624, + "num_input_tokens_seen": 40139232, + "step": 59545 + }, + { + "epoch": 1.4548164072997336, + "grad_norm": 2.2113256454467773, + "learning_rate": 1.7859508486194156e-06, + "loss": 0.0007, + "num_input_tokens_seen": 40142048, + "step": 59550 + }, + { + "epoch": 1.4549385581315808, + "grad_norm": 15.290037155151367, + "learning_rate": 1.785898119561608e-06, + "loss": 0.049, + "num_input_tokens_seen": 40145440, + "step": 59555 + }, + { + "epoch": 1.455060708963428, + "grad_norm": 0.3437047302722931, + "learning_rate": 1.785845384788538e-06, + "loss": 0.0465, + "num_input_tokens_seen": 40148576, + "step": 59560 + }, + { + "epoch": 1.4551828597952752, + "grad_norm": 0.5709901452064514, + "learning_rate": 1.7857926443005888e-06, + "loss": 0.08, + "num_input_tokens_seen": 40151712, + "step": 59565 + }, + { + "epoch": 1.4553050106271224, + "grad_norm": 0.43648040294647217, + "learning_rate": 1.7857398980981442e-06, + "loss": 0.0449, + "num_input_tokens_seen": 40154784, + "step": 59570 + }, + { + "epoch": 1.4554271614589696, + "grad_norm": 16.983592987060547, + "learning_rate": 1.7856871461815878e-06, + "loss": 0.0793, + "num_input_tokens_seen": 40157856, + "step": 59575 + }, + { + "epoch": 1.4555493122908167, + "grad_norm": 0.21682091057300568, + "learning_rate": 1.785634388551303e-06, + "loss": 0.2216, + "num_input_tokens_seen": 40161184, + "step": 59580 + }, + { + "epoch": 1.455671463122664, + "grad_norm": 5.58907413482666, + "learning_rate": 1.7855816252076739e-06, + "loss": 0.0674, + "num_input_tokens_seen": 40164384, + "step": 59585 + }, + { + "epoch": 1.4557936139545111, + "grad_norm": 0.136368989944458, + "learning_rate": 1.7855288561510837e-06, + "loss": 0.0298, + "num_input_tokens_seen": 40167648, + "step": 59590 + }, + { + "epoch": 1.4559157647863583, + "grad_norm": 0.09996183216571808, + "learning_rate": 1.7854760813819166e-06, + "loss": 0.0039, + "num_input_tokens_seen": 40170912, + "step": 59595 + }, + { + "epoch": 1.4560379156182053, + "grad_norm": 0.14230936765670776, + "learning_rate": 1.785423300900556e-06, + "loss": 0.092, + "num_input_tokens_seen": 40174368, + "step": 59600 + }, + { + "epoch": 1.4561600664500525, + "grad_norm": 0.06128491833806038, + "learning_rate": 1.7853705147073859e-06, + "loss": 0.147, + "num_input_tokens_seen": 40177824, + "step": 59605 + }, + { + "epoch": 1.4562822172818997, + "grad_norm": 208.32565307617188, + "learning_rate": 1.78531772280279e-06, + "loss": 0.1516, + "num_input_tokens_seen": 40180896, + "step": 59610 + }, + { + "epoch": 1.4564043681137468, + "grad_norm": 0.5086117386817932, + "learning_rate": 1.7852649251871528e-06, + "loss": 0.0996, + "num_input_tokens_seen": 40183968, + "step": 59615 + }, + { + "epoch": 1.456526518945594, + "grad_norm": 24.724302291870117, + "learning_rate": 1.7852121218608573e-06, + "loss": 0.0661, + "num_input_tokens_seen": 40187168, + "step": 59620 + }, + { + "epoch": 1.4566486697774412, + "grad_norm": 0.044920340180397034, + "learning_rate": 1.7851593128242885e-06, + "loss": 0.1033, + "num_input_tokens_seen": 40190560, + "step": 59625 + }, + { + "epoch": 1.4567708206092884, + "grad_norm": 83.80802917480469, + "learning_rate": 1.78510649807783e-06, + "loss": 0.1114, + "num_input_tokens_seen": 40193952, + "step": 59630 + }, + { + "epoch": 1.4568929714411354, + "grad_norm": 28.874740600585938, + "learning_rate": 1.7850536776218656e-06, + "loss": 0.1104, + "num_input_tokens_seen": 40196896, + "step": 59635 + }, + { + "epoch": 1.4570151222729826, + "grad_norm": 0.782297670841217, + "learning_rate": 1.7850008514567797e-06, + "loss": 0.0943, + "num_input_tokens_seen": 40199648, + "step": 59640 + }, + { + "epoch": 1.4571372731048298, + "grad_norm": 0.2725517749786377, + "learning_rate": 1.784948019582957e-06, + "loss": 0.0485, + "num_input_tokens_seen": 40202656, + "step": 59645 + }, + { + "epoch": 1.457259423936677, + "grad_norm": 0.3396630883216858, + "learning_rate": 1.7848951820007807e-06, + "loss": 0.001, + "num_input_tokens_seen": 40205792, + "step": 59650 + }, + { + "epoch": 1.4573815747685241, + "grad_norm": 47.51850509643555, + "learning_rate": 1.7848423387106355e-06, + "loss": 0.0638, + "num_input_tokens_seen": 40208800, + "step": 59655 + }, + { + "epoch": 1.4575037256003713, + "grad_norm": 21.536033630371094, + "learning_rate": 1.7847894897129058e-06, + "loss": 0.0392, + "num_input_tokens_seen": 40212640, + "step": 59660 + }, + { + "epoch": 1.4576258764322185, + "grad_norm": 0.07097387313842773, + "learning_rate": 1.784736635007976e-06, + "loss": 0.0841, + "num_input_tokens_seen": 40216032, + "step": 59665 + }, + { + "epoch": 1.4577480272640657, + "grad_norm": 21.68965721130371, + "learning_rate": 1.7846837745962301e-06, + "loss": 0.1628, + "num_input_tokens_seen": 40219552, + "step": 59670 + }, + { + "epoch": 1.457870178095913, + "grad_norm": 1.3079677820205688, + "learning_rate": 1.784630908478053e-06, + "loss": 0.0269, + "num_input_tokens_seen": 40222752, + "step": 59675 + }, + { + "epoch": 1.45799232892776, + "grad_norm": 0.4254034161567688, + "learning_rate": 1.7845780366538285e-06, + "loss": 0.0403, + "num_input_tokens_seen": 40226080, + "step": 59680 + }, + { + "epoch": 1.4581144797596073, + "grad_norm": 67.33355712890625, + "learning_rate": 1.7845251591239418e-06, + "loss": 0.2522, + "num_input_tokens_seen": 40229408, + "step": 59685 + }, + { + "epoch": 1.4582366305914543, + "grad_norm": 0.17583763599395752, + "learning_rate": 1.7844722758887772e-06, + "loss": 0.0425, + "num_input_tokens_seen": 40232928, + "step": 59690 + }, + { + "epoch": 1.4583587814233014, + "grad_norm": 0.09350127726793289, + "learning_rate": 1.7844193869487189e-06, + "loss": 0.0396, + "num_input_tokens_seen": 40236384, + "step": 59695 + }, + { + "epoch": 1.4584809322551486, + "grad_norm": 0.47614389657974243, + "learning_rate": 1.7843664923041522e-06, + "loss": 0.0483, + "num_input_tokens_seen": 40239648, + "step": 59700 + }, + { + "epoch": 1.4586030830869958, + "grad_norm": 65.01331329345703, + "learning_rate": 1.784313591955461e-06, + "loss": 0.1454, + "num_input_tokens_seen": 40242784, + "step": 59705 + }, + { + "epoch": 1.458725233918843, + "grad_norm": 9.80173110961914, + "learning_rate": 1.784260685903031e-06, + "loss": 0.0302, + "num_input_tokens_seen": 40246304, + "step": 59710 + }, + { + "epoch": 1.4588473847506902, + "grad_norm": 25.71861457824707, + "learning_rate": 1.7842077741472457e-06, + "loss": 0.1307, + "num_input_tokens_seen": 40249760, + "step": 59715 + }, + { + "epoch": 1.4589695355825374, + "grad_norm": 0.6096875071525574, + "learning_rate": 1.7841548566884908e-06, + "loss": 0.0339, + "num_input_tokens_seen": 40252832, + "step": 59720 + }, + { + "epoch": 1.4590916864143844, + "grad_norm": 185.60562133789062, + "learning_rate": 1.784101933527151e-06, + "loss": 0.1555, + "num_input_tokens_seen": 40256736, + "step": 59725 + }, + { + "epoch": 1.4592138372462315, + "grad_norm": 65.35801696777344, + "learning_rate": 1.7840490046636108e-06, + "loss": 0.1282, + "num_input_tokens_seen": 40259936, + "step": 59730 + }, + { + "epoch": 1.4593359880780787, + "grad_norm": 31.8502140045166, + "learning_rate": 1.7839960700982555e-06, + "loss": 0.2229, + "num_input_tokens_seen": 40263264, + "step": 59735 + }, + { + "epoch": 1.459458138909926, + "grad_norm": 0.36327114701271057, + "learning_rate": 1.7839431298314698e-06, + "loss": 0.1385, + "num_input_tokens_seen": 40266208, + "step": 59740 + }, + { + "epoch": 1.4595802897417731, + "grad_norm": 51.076438903808594, + "learning_rate": 1.7838901838636389e-06, + "loss": 0.0454, + "num_input_tokens_seen": 40269152, + "step": 59745 + }, + { + "epoch": 1.4597024405736203, + "grad_norm": 21.2176513671875, + "learning_rate": 1.7838372321951478e-06, + "loss": 0.0643, + "num_input_tokens_seen": 40272416, + "step": 59750 + }, + { + "epoch": 1.4598245914054675, + "grad_norm": 0.8600711226463318, + "learning_rate": 1.7837842748263813e-06, + "loss": 0.1077, + "num_input_tokens_seen": 40275488, + "step": 59755 + }, + { + "epoch": 1.4599467422373147, + "grad_norm": 0.1487630307674408, + "learning_rate": 1.7837313117577251e-06, + "loss": 0.0021, + "num_input_tokens_seen": 40279136, + "step": 59760 + }, + { + "epoch": 1.4600688930691619, + "grad_norm": 18.513385772705078, + "learning_rate": 1.7836783429895636e-06, + "loss": 0.0646, + "num_input_tokens_seen": 40282592, + "step": 59765 + }, + { + "epoch": 1.460191043901009, + "grad_norm": 0.26967713236808777, + "learning_rate": 1.7836253685222827e-06, + "loss": 0.0802, + "num_input_tokens_seen": 40285984, + "step": 59770 + }, + { + "epoch": 1.4603131947328563, + "grad_norm": 26.209548950195312, + "learning_rate": 1.7835723883562673e-06, + "loss": 0.0581, + "num_input_tokens_seen": 40289504, + "step": 59775 + }, + { + "epoch": 1.4604353455647032, + "grad_norm": 18.547000885009766, + "learning_rate": 1.7835194024919026e-06, + "loss": 0.0379, + "num_input_tokens_seen": 40293152, + "step": 59780 + }, + { + "epoch": 1.4605574963965504, + "grad_norm": 0.2680453658103943, + "learning_rate": 1.783466410929574e-06, + "loss": 0.0462, + "num_input_tokens_seen": 40296928, + "step": 59785 + }, + { + "epoch": 1.4606796472283976, + "grad_norm": 0.46763402223587036, + "learning_rate": 1.7834134136696672e-06, + "loss": 0.0274, + "num_input_tokens_seen": 40300512, + "step": 59790 + }, + { + "epoch": 1.4608017980602448, + "grad_norm": 19.22868537902832, + "learning_rate": 1.783360410712567e-06, + "loss": 0.0352, + "num_input_tokens_seen": 40303968, + "step": 59795 + }, + { + "epoch": 1.460923948892092, + "grad_norm": 0.03059125877916813, + "learning_rate": 1.7833074020586597e-06, + "loss": 0.0287, + "num_input_tokens_seen": 40307296, + "step": 59800 + }, + { + "epoch": 1.4610460997239392, + "grad_norm": 14.651070594787598, + "learning_rate": 1.7832543877083302e-06, + "loss": 0.1239, + "num_input_tokens_seen": 40310496, + "step": 59805 + }, + { + "epoch": 1.4611682505557864, + "grad_norm": 0.4671648442745209, + "learning_rate": 1.7832013676619636e-06, + "loss": 0.0801, + "num_input_tokens_seen": 40313568, + "step": 59810 + }, + { + "epoch": 1.4612904013876333, + "grad_norm": 24.84051513671875, + "learning_rate": 1.7831483419199462e-06, + "loss": 0.0944, + "num_input_tokens_seen": 40316832, + "step": 59815 + }, + { + "epoch": 1.4614125522194805, + "grad_norm": 0.6817691922187805, + "learning_rate": 1.7830953104826638e-06, + "loss": 0.002, + "num_input_tokens_seen": 40319904, + "step": 59820 + }, + { + "epoch": 1.4615347030513277, + "grad_norm": 20.075729370117188, + "learning_rate": 1.7830422733505012e-06, + "loss": 0.0462, + "num_input_tokens_seen": 40323936, + "step": 59825 + }, + { + "epoch": 1.461656853883175, + "grad_norm": 0.13368147611618042, + "learning_rate": 1.782989230523845e-06, + "loss": 0.1381, + "num_input_tokens_seen": 40328032, + "step": 59830 + }, + { + "epoch": 1.461779004715022, + "grad_norm": 0.01761273667216301, + "learning_rate": 1.7829361820030803e-06, + "loss": 0.0352, + "num_input_tokens_seen": 40331424, + "step": 59835 + }, + { + "epoch": 1.4619011555468693, + "grad_norm": 91.9080810546875, + "learning_rate": 1.782883127788593e-06, + "loss": 0.0854, + "num_input_tokens_seen": 40334624, + "step": 59840 + }, + { + "epoch": 1.4620233063787165, + "grad_norm": 0.8963506817817688, + "learning_rate": 1.782830067880769e-06, + "loss": 0.0596, + "num_input_tokens_seen": 40338208, + "step": 59845 + }, + { + "epoch": 1.4621454572105637, + "grad_norm": 0.13214941322803497, + "learning_rate": 1.7827770022799947e-06, + "loss": 0.2008, + "num_input_tokens_seen": 40342112, + "step": 59850 + }, + { + "epoch": 1.4622676080424108, + "grad_norm": 0.053667087107896805, + "learning_rate": 1.7827239309866548e-06, + "loss": 0.0595, + "num_input_tokens_seen": 40345184, + "step": 59855 + }, + { + "epoch": 1.462389758874258, + "grad_norm": 16.44167137145996, + "learning_rate": 1.7826708540011363e-06, + "loss": 0.1361, + "num_input_tokens_seen": 40348576, + "step": 59860 + }, + { + "epoch": 1.4625119097061052, + "grad_norm": 0.0644519031047821, + "learning_rate": 1.7826177713238248e-06, + "loss": 0.0892, + "num_input_tokens_seen": 40352224, + "step": 59865 + }, + { + "epoch": 1.4626340605379522, + "grad_norm": 0.4683791995048523, + "learning_rate": 1.7825646829551064e-06, + "loss": 0.002, + "num_input_tokens_seen": 40355744, + "step": 59870 + }, + { + "epoch": 1.4627562113697994, + "grad_norm": 0.19856318831443787, + "learning_rate": 1.782511588895367e-06, + "loss": 0.0888, + "num_input_tokens_seen": 40359008, + "step": 59875 + }, + { + "epoch": 1.4628783622016466, + "grad_norm": 34.53307342529297, + "learning_rate": 1.782458489144993e-06, + "loss": 0.1662, + "num_input_tokens_seen": 40362208, + "step": 59880 + }, + { + "epoch": 1.4630005130334938, + "grad_norm": 0.33793848752975464, + "learning_rate": 1.7824053837043706e-06, + "loss": 0.1133, + "num_input_tokens_seen": 40365472, + "step": 59885 + }, + { + "epoch": 1.463122663865341, + "grad_norm": 0.9318715929985046, + "learning_rate": 1.7823522725738855e-06, + "loss": 0.0614, + "num_input_tokens_seen": 40368736, + "step": 59890 + }, + { + "epoch": 1.4632448146971881, + "grad_norm": 5.252325534820557, + "learning_rate": 1.7822991557539244e-06, + "loss": 0.0025, + "num_input_tokens_seen": 40372128, + "step": 59895 + }, + { + "epoch": 1.4633669655290353, + "grad_norm": 11.749712944030762, + "learning_rate": 1.7822460332448733e-06, + "loss": 0.1934, + "num_input_tokens_seen": 40375392, + "step": 59900 + }, + { + "epoch": 1.4634891163608823, + "grad_norm": 15.675751686096191, + "learning_rate": 1.7821929050471188e-06, + "loss": 0.0993, + "num_input_tokens_seen": 40378464, + "step": 59905 + }, + { + "epoch": 1.4636112671927295, + "grad_norm": 19.527395248413086, + "learning_rate": 1.7821397711610468e-06, + "loss": 0.0423, + "num_input_tokens_seen": 40382304, + "step": 59910 + }, + { + "epoch": 1.4637334180245767, + "grad_norm": 0.1722240298986435, + "learning_rate": 1.7820866315870444e-06, + "loss": 0.0396, + "num_input_tokens_seen": 40386272, + "step": 59915 + }, + { + "epoch": 1.4638555688564239, + "grad_norm": 90.19779968261719, + "learning_rate": 1.7820334863254974e-06, + "loss": 0.0489, + "num_input_tokens_seen": 40390112, + "step": 59920 + }, + { + "epoch": 1.463977719688271, + "grad_norm": 0.4945489168167114, + "learning_rate": 1.7819803353767926e-06, + "loss": 0.1565, + "num_input_tokens_seen": 40393888, + "step": 59925 + }, + { + "epoch": 1.4640998705201183, + "grad_norm": 15.186417579650879, + "learning_rate": 1.7819271787413164e-06, + "loss": 0.1662, + "num_input_tokens_seen": 40397088, + "step": 59930 + }, + { + "epoch": 1.4642220213519654, + "grad_norm": 0.28768110275268555, + "learning_rate": 1.7818740164194556e-06, + "loss": 0.1424, + "num_input_tokens_seen": 40400736, + "step": 59935 + }, + { + "epoch": 1.4643441721838126, + "grad_norm": 0.15257716178894043, + "learning_rate": 1.7818208484115967e-06, + "loss": 0.0331, + "num_input_tokens_seen": 40404768, + "step": 59940 + }, + { + "epoch": 1.4644663230156598, + "grad_norm": 0.33717256784439087, + "learning_rate": 1.781767674718126e-06, + "loss": 0.1021, + "num_input_tokens_seen": 40407968, + "step": 59945 + }, + { + "epoch": 1.464588473847507, + "grad_norm": 0.1763121485710144, + "learning_rate": 1.7817144953394307e-06, + "loss": 0.0847, + "num_input_tokens_seen": 40411104, + "step": 59950 + }, + { + "epoch": 1.4647106246793542, + "grad_norm": 64.01554870605469, + "learning_rate": 1.7816613102758976e-06, + "loss": 0.1078, + "num_input_tokens_seen": 40414368, + "step": 59955 + }, + { + "epoch": 1.4648327755112012, + "grad_norm": 2.6066277027130127, + "learning_rate": 1.781608119527913e-06, + "loss": 0.0459, + "num_input_tokens_seen": 40417568, + "step": 59960 + }, + { + "epoch": 1.4649549263430484, + "grad_norm": 0.572694718837738, + "learning_rate": 1.7815549230958637e-06, + "loss": 0.1725, + "num_input_tokens_seen": 40420640, + "step": 59965 + }, + { + "epoch": 1.4650770771748955, + "grad_norm": 44.864845275878906, + "learning_rate": 1.7815017209801369e-06, + "loss": 0.1199, + "num_input_tokens_seen": 40423648, + "step": 59970 + }, + { + "epoch": 1.4651992280067427, + "grad_norm": 0.10838565975427628, + "learning_rate": 1.7814485131811195e-06, + "loss": 0.0028, + "num_input_tokens_seen": 40427232, + "step": 59975 + }, + { + "epoch": 1.46532137883859, + "grad_norm": 12.047739028930664, + "learning_rate": 1.7813952996991984e-06, + "loss": 0.2917, + "num_input_tokens_seen": 40430176, + "step": 59980 + }, + { + "epoch": 1.4654435296704371, + "grad_norm": 0.120354562997818, + "learning_rate": 1.7813420805347602e-06, + "loss": 0.0253, + "num_input_tokens_seen": 40433696, + "step": 59985 + }, + { + "epoch": 1.465565680502284, + "grad_norm": 34.92924880981445, + "learning_rate": 1.7812888556881926e-06, + "loss": 0.1689, + "num_input_tokens_seen": 40436896, + "step": 59990 + }, + { + "epoch": 1.4656878313341313, + "grad_norm": 10.803354263305664, + "learning_rate": 1.781235625159882e-06, + "loss": 0.0497, + "num_input_tokens_seen": 40440544, + "step": 59995 + }, + { + "epoch": 1.4658099821659785, + "grad_norm": 19.33785629272461, + "learning_rate": 1.781182388950216e-06, + "loss": 0.048, + "num_input_tokens_seen": 40443936, + "step": 60000 + }, + { + "epoch": 1.4659321329978257, + "grad_norm": 0.15235191583633423, + "learning_rate": 1.7811291470595815e-06, + "loss": 0.1678, + "num_input_tokens_seen": 40447968, + "step": 60005 + }, + { + "epoch": 1.4660542838296728, + "grad_norm": 0.34952807426452637, + "learning_rate": 1.7810758994883656e-06, + "loss": 0.0026, + "num_input_tokens_seen": 40451744, + "step": 60010 + }, + { + "epoch": 1.46617643466152, + "grad_norm": 0.13237085938453674, + "learning_rate": 1.781022646236956e-06, + "loss": 0.0326, + "num_input_tokens_seen": 40454880, + "step": 60015 + }, + { + "epoch": 1.4662985854933672, + "grad_norm": 10.759251594543457, + "learning_rate": 1.7809693873057393e-06, + "loss": 0.1341, + "num_input_tokens_seen": 40458080, + "step": 60020 + }, + { + "epoch": 1.4664207363252144, + "grad_norm": 0.4018060863018036, + "learning_rate": 1.7809161226951032e-06, + "loss": 0.0014, + "num_input_tokens_seen": 40461856, + "step": 60025 + }, + { + "epoch": 1.4665428871570616, + "grad_norm": 35.23345184326172, + "learning_rate": 1.7808628524054352e-06, + "loss": 0.1523, + "num_input_tokens_seen": 40465056, + "step": 60030 + }, + { + "epoch": 1.4666650379889088, + "grad_norm": 9.999808311462402, + "learning_rate": 1.7808095764371225e-06, + "loss": 0.0853, + "num_input_tokens_seen": 40468256, + "step": 60035 + }, + { + "epoch": 1.466787188820756, + "grad_norm": 4.482977390289307, + "learning_rate": 1.7807562947905526e-06, + "loss": 0.1113, + "num_input_tokens_seen": 40471712, + "step": 60040 + }, + { + "epoch": 1.4669093396526032, + "grad_norm": 109.09564208984375, + "learning_rate": 1.7807030074661127e-06, + "loss": 0.0828, + "num_input_tokens_seen": 40475360, + "step": 60045 + }, + { + "epoch": 1.4670314904844501, + "grad_norm": 24.180208206176758, + "learning_rate": 1.7806497144641909e-06, + "loss": 0.0282, + "num_input_tokens_seen": 40478752, + "step": 60050 + }, + { + "epoch": 1.4671536413162973, + "grad_norm": 0.015254548750817776, + "learning_rate": 1.7805964157851739e-06, + "loss": 0.0005, + "num_input_tokens_seen": 40482016, + "step": 60055 + }, + { + "epoch": 1.4672757921481445, + "grad_norm": 22.02342987060547, + "learning_rate": 1.7805431114294503e-06, + "loss": 0.0425, + "num_input_tokens_seen": 40485280, + "step": 60060 + }, + { + "epoch": 1.4673979429799917, + "grad_norm": 0.04338202252984047, + "learning_rate": 1.7804898013974068e-06, + "loss": 0.1388, + "num_input_tokens_seen": 40489056, + "step": 60065 + }, + { + "epoch": 1.467520093811839, + "grad_norm": 0.06497588753700256, + "learning_rate": 1.780436485689432e-06, + "loss": 0.0005, + "num_input_tokens_seen": 40492768, + "step": 60070 + }, + { + "epoch": 1.467642244643686, + "grad_norm": 1.3534506559371948, + "learning_rate": 1.7803831643059128e-06, + "loss": 0.0839, + "num_input_tokens_seen": 40496160, + "step": 60075 + }, + { + "epoch": 1.467764395475533, + "grad_norm": 0.034730665385723114, + "learning_rate": 1.7803298372472373e-06, + "loss": 0.1656, + "num_input_tokens_seen": 40499552, + "step": 60080 + }, + { + "epoch": 1.4678865463073802, + "grad_norm": 57.326541900634766, + "learning_rate": 1.7802765045137935e-06, + "loss": 0.0605, + "num_input_tokens_seen": 40502432, + "step": 60085 + }, + { + "epoch": 1.4680086971392274, + "grad_norm": 10.931227684020996, + "learning_rate": 1.7802231661059692e-06, + "loss": 0.0969, + "num_input_tokens_seen": 40505824, + "step": 60090 + }, + { + "epoch": 1.4681308479710746, + "grad_norm": 0.2942550778388977, + "learning_rate": 1.780169822024152e-06, + "loss": 0.0764, + "num_input_tokens_seen": 40509920, + "step": 60095 + }, + { + "epoch": 1.4682529988029218, + "grad_norm": 2.650104522705078, + "learning_rate": 1.78011647226873e-06, + "loss": 0.2528, + "num_input_tokens_seen": 40513120, + "step": 60100 + }, + { + "epoch": 1.468375149634769, + "grad_norm": 0.17480523884296417, + "learning_rate": 1.7800631168400915e-06, + "loss": 0.0569, + "num_input_tokens_seen": 40516512, + "step": 60105 + }, + { + "epoch": 1.4684973004666162, + "grad_norm": 131.7166290283203, + "learning_rate": 1.7800097557386238e-06, + "loss": 0.2744, + "num_input_tokens_seen": 40519776, + "step": 60110 + }, + { + "epoch": 1.4686194512984634, + "grad_norm": 0.33879879117012024, + "learning_rate": 1.7799563889647156e-06, + "loss": 0.1327, + "num_input_tokens_seen": 40522976, + "step": 60115 + }, + { + "epoch": 1.4687416021303106, + "grad_norm": 0.4208439588546753, + "learning_rate": 1.7799030165187548e-06, + "loss": 0.0038, + "num_input_tokens_seen": 40526496, + "step": 60120 + }, + { + "epoch": 1.4688637529621578, + "grad_norm": 8.661870002746582, + "learning_rate": 1.7798496384011291e-06, + "loss": 0.0437, + "num_input_tokens_seen": 40529888, + "step": 60125 + }, + { + "epoch": 1.468985903794005, + "grad_norm": 2.0140016078948975, + "learning_rate": 1.7797962546122274e-06, + "loss": 0.0683, + "num_input_tokens_seen": 40533664, + "step": 60130 + }, + { + "epoch": 1.469108054625852, + "grad_norm": 14.874700546264648, + "learning_rate": 1.7797428651524378e-06, + "loss": 0.0433, + "num_input_tokens_seen": 40538016, + "step": 60135 + }, + { + "epoch": 1.4692302054576991, + "grad_norm": 25.21275520324707, + "learning_rate": 1.779689470022148e-06, + "loss": 0.0481, + "num_input_tokens_seen": 40541728, + "step": 60140 + }, + { + "epoch": 1.4693523562895463, + "grad_norm": 11.36154556274414, + "learning_rate": 1.7796360692217468e-06, + "loss": 0.0712, + "num_input_tokens_seen": 40544736, + "step": 60145 + }, + { + "epoch": 1.4694745071213935, + "grad_norm": 0.37468650937080383, + "learning_rate": 1.7795826627516224e-06, + "loss": 0.0833, + "num_input_tokens_seen": 40547808, + "step": 60150 + }, + { + "epoch": 1.4695966579532407, + "grad_norm": 0.36312851309776306, + "learning_rate": 1.779529250612163e-06, + "loss": 0.0974, + "num_input_tokens_seen": 40551712, + "step": 60155 + }, + { + "epoch": 1.4697188087850879, + "grad_norm": 10.961259841918945, + "learning_rate": 1.7794758328037575e-06, + "loss": 0.0931, + "num_input_tokens_seen": 40555424, + "step": 60160 + }, + { + "epoch": 1.469840959616935, + "grad_norm": 13.064085960388184, + "learning_rate": 1.779422409326794e-06, + "loss": 0.1147, + "num_input_tokens_seen": 40559072, + "step": 60165 + }, + { + "epoch": 1.469963110448782, + "grad_norm": 12.516934394836426, + "learning_rate": 1.779368980181661e-06, + "loss": 0.1045, + "num_input_tokens_seen": 40562208, + "step": 60170 + }, + { + "epoch": 1.4700852612806292, + "grad_norm": 30.780580520629883, + "learning_rate": 1.7793155453687473e-06, + "loss": 0.1029, + "num_input_tokens_seen": 40565664, + "step": 60175 + }, + { + "epoch": 1.4702074121124764, + "grad_norm": 40.40333938598633, + "learning_rate": 1.7792621048884412e-06, + "loss": 0.1246, + "num_input_tokens_seen": 40569056, + "step": 60180 + }, + { + "epoch": 1.4703295629443236, + "grad_norm": 9.920197486877441, + "learning_rate": 1.7792086587411315e-06, + "loss": 0.2031, + "num_input_tokens_seen": 40571936, + "step": 60185 + }, + { + "epoch": 1.4704517137761708, + "grad_norm": 18.63920783996582, + "learning_rate": 1.7791552069272071e-06, + "loss": 0.1175, + "num_input_tokens_seen": 40575520, + "step": 60190 + }, + { + "epoch": 1.470573864608018, + "grad_norm": 1.8878223896026611, + "learning_rate": 1.779101749447056e-06, + "loss": 0.0451, + "num_input_tokens_seen": 40579168, + "step": 60195 + }, + { + "epoch": 1.4706960154398652, + "grad_norm": 13.10407543182373, + "learning_rate": 1.779048286301068e-06, + "loss": 0.1231, + "num_input_tokens_seen": 40582240, + "step": 60200 + }, + { + "epoch": 1.4708181662717124, + "grad_norm": 53.87441635131836, + "learning_rate": 1.778994817489631e-06, + "loss": 0.0335, + "num_input_tokens_seen": 40585568, + "step": 60205 + }, + { + "epoch": 1.4709403171035595, + "grad_norm": 0.6808045506477356, + "learning_rate": 1.778941343013134e-06, + "loss": 0.0749, + "num_input_tokens_seen": 40588768, + "step": 60210 + }, + { + "epoch": 1.4710624679354067, + "grad_norm": 0.517785906791687, + "learning_rate": 1.7788878628719663e-06, + "loss": 0.1077, + "num_input_tokens_seen": 40591968, + "step": 60215 + }, + { + "epoch": 1.471184618767254, + "grad_norm": 0.20099209249019623, + "learning_rate": 1.7788343770665165e-06, + "loss": 0.043, + "num_input_tokens_seen": 40595424, + "step": 60220 + }, + { + "epoch": 1.471306769599101, + "grad_norm": 0.23198401927947998, + "learning_rate": 1.7787808855971737e-06, + "loss": 0.0018, + "num_input_tokens_seen": 40598560, + "step": 60225 + }, + { + "epoch": 1.471428920430948, + "grad_norm": 32.09675216674805, + "learning_rate": 1.7787273884643268e-06, + "loss": 0.1062, + "num_input_tokens_seen": 40601888, + "step": 60230 + }, + { + "epoch": 1.4715510712627953, + "grad_norm": 0.19732925295829773, + "learning_rate": 1.7786738856683647e-06, + "loss": 0.039, + "num_input_tokens_seen": 40605600, + "step": 60235 + }, + { + "epoch": 1.4716732220946425, + "grad_norm": 0.18621958792209625, + "learning_rate": 1.7786203772096768e-06, + "loss": 0.0543, + "num_input_tokens_seen": 40608928, + "step": 60240 + }, + { + "epoch": 1.4717953729264897, + "grad_norm": 24.08376693725586, + "learning_rate": 1.7785668630886521e-06, + "loss": 0.2321, + "num_input_tokens_seen": 40612192, + "step": 60245 + }, + { + "epoch": 1.4719175237583368, + "grad_norm": 1.6449941396713257, + "learning_rate": 1.77851334330568e-06, + "loss": 0.039, + "num_input_tokens_seen": 40615136, + "step": 60250 + }, + { + "epoch": 1.472039674590184, + "grad_norm": 0.0721750482916832, + "learning_rate": 1.7784598178611492e-06, + "loss": 0.0719, + "num_input_tokens_seen": 40618720, + "step": 60255 + }, + { + "epoch": 1.472161825422031, + "grad_norm": 9.434758186340332, + "learning_rate": 1.7784062867554493e-06, + "loss": 0.2505, + "num_input_tokens_seen": 40621984, + "step": 60260 + }, + { + "epoch": 1.4722839762538782, + "grad_norm": 0.3179467022418976, + "learning_rate": 1.7783527499889694e-06, + "loss": 0.1049, + "num_input_tokens_seen": 40625440, + "step": 60265 + }, + { + "epoch": 1.4724061270857254, + "grad_norm": 34.49399948120117, + "learning_rate": 1.778299207562099e-06, + "loss": 0.0771, + "num_input_tokens_seen": 40628192, + "step": 60270 + }, + { + "epoch": 1.4725282779175726, + "grad_norm": 0.4097166657447815, + "learning_rate": 1.7782456594752275e-06, + "loss": 0.0427, + "num_input_tokens_seen": 40631968, + "step": 60275 + }, + { + "epoch": 1.4726504287494198, + "grad_norm": 0.10307589173316956, + "learning_rate": 1.7781921057287442e-06, + "loss": 0.0634, + "num_input_tokens_seen": 40636128, + "step": 60280 + }, + { + "epoch": 1.472772579581267, + "grad_norm": 0.3781689405441284, + "learning_rate": 1.7781385463230385e-06, + "loss": 0.0008, + "num_input_tokens_seen": 40639136, + "step": 60285 + }, + { + "epoch": 1.4728947304131141, + "grad_norm": 9.588024139404297, + "learning_rate": 1.7780849812585e-06, + "loss": 0.1044, + "num_input_tokens_seen": 40641888, + "step": 60290 + }, + { + "epoch": 1.4730168812449613, + "grad_norm": 0.3940590023994446, + "learning_rate": 1.7780314105355183e-06, + "loss": 0.1011, + "num_input_tokens_seen": 40644832, + "step": 60295 + }, + { + "epoch": 1.4731390320768085, + "grad_norm": 0.2877940833568573, + "learning_rate": 1.7779778341544832e-06, + "loss": 0.1502, + "num_input_tokens_seen": 40648032, + "step": 60300 + }, + { + "epoch": 1.4732611829086557, + "grad_norm": 22.556068420410156, + "learning_rate": 1.7779242521157837e-06, + "loss": 0.1569, + "num_input_tokens_seen": 40651296, + "step": 60305 + }, + { + "epoch": 1.473383333740503, + "grad_norm": 1.5956579446792603, + "learning_rate": 1.77787066441981e-06, + "loss": 0.1258, + "num_input_tokens_seen": 40654624, + "step": 60310 + }, + { + "epoch": 1.4735054845723499, + "grad_norm": 0.47433945536613464, + "learning_rate": 1.7778170710669513e-06, + "loss": 0.0015, + "num_input_tokens_seen": 40658144, + "step": 60315 + }, + { + "epoch": 1.473627635404197, + "grad_norm": 0.32676297426223755, + "learning_rate": 1.7777634720575978e-06, + "loss": 0.0685, + "num_input_tokens_seen": 40661408, + "step": 60320 + }, + { + "epoch": 1.4737497862360442, + "grad_norm": 0.8634158372879028, + "learning_rate": 1.777709867392139e-06, + "loss": 0.1081, + "num_input_tokens_seen": 40664672, + "step": 60325 + }, + { + "epoch": 1.4738719370678914, + "grad_norm": 0.12412005662918091, + "learning_rate": 1.7776562570709652e-06, + "loss": 0.0795, + "num_input_tokens_seen": 40668128, + "step": 60330 + }, + { + "epoch": 1.4739940878997386, + "grad_norm": 1.616155743598938, + "learning_rate": 1.7776026410944659e-06, + "loss": 0.1111, + "num_input_tokens_seen": 40671200, + "step": 60335 + }, + { + "epoch": 1.4741162387315858, + "grad_norm": 0.20798847079277039, + "learning_rate": 1.7775490194630307e-06, + "loss": 0.1285, + "num_input_tokens_seen": 40674272, + "step": 60340 + }, + { + "epoch": 1.474238389563433, + "grad_norm": 1.0060458183288574, + "learning_rate": 1.7774953921770504e-06, + "loss": 0.0018, + "num_input_tokens_seen": 40677728, + "step": 60345 + }, + { + "epoch": 1.47436054039528, + "grad_norm": 0.6992678642272949, + "learning_rate": 1.7774417592369142e-06, + "loss": 0.1822, + "num_input_tokens_seen": 40681120, + "step": 60350 + }, + { + "epoch": 1.4744826912271272, + "grad_norm": 0.23237474262714386, + "learning_rate": 1.7773881206430122e-06, + "loss": 0.0315, + "num_input_tokens_seen": 40684512, + "step": 60355 + }, + { + "epoch": 1.4746048420589744, + "grad_norm": 17.29538917541504, + "learning_rate": 1.7773344763957349e-06, + "loss": 0.0442, + "num_input_tokens_seen": 40688480, + "step": 60360 + }, + { + "epoch": 1.4747269928908215, + "grad_norm": 8.166539192199707, + "learning_rate": 1.7772808264954724e-06, + "loss": 0.1618, + "num_input_tokens_seen": 40691872, + "step": 60365 + }, + { + "epoch": 1.4748491437226687, + "grad_norm": 71.33097076416016, + "learning_rate": 1.7772271709426145e-06, + "loss": 0.0412, + "num_input_tokens_seen": 40695264, + "step": 60370 + }, + { + "epoch": 1.474971294554516, + "grad_norm": 0.055804669857025146, + "learning_rate": 1.7771735097375514e-06, + "loss": 0.0019, + "num_input_tokens_seen": 40698976, + "step": 60375 + }, + { + "epoch": 1.4750934453863631, + "grad_norm": 0.2121654748916626, + "learning_rate": 1.777119842880674e-06, + "loss": 0.0681, + "num_input_tokens_seen": 40702432, + "step": 60380 + }, + { + "epoch": 1.4752155962182103, + "grad_norm": 0.056822698563337326, + "learning_rate": 1.7770661703723716e-06, + "loss": 0.1681, + "num_input_tokens_seen": 40705824, + "step": 60385 + }, + { + "epoch": 1.4753377470500575, + "grad_norm": 20.086894989013672, + "learning_rate": 1.7770124922130352e-06, + "loss": 0.2188, + "num_input_tokens_seen": 40709088, + "step": 60390 + }, + { + "epoch": 1.4754598978819047, + "grad_norm": 60.431453704833984, + "learning_rate": 1.7769588084030547e-06, + "loss": 0.1257, + "num_input_tokens_seen": 40712352, + "step": 60395 + }, + { + "epoch": 1.4755820487137519, + "grad_norm": 21.10099983215332, + "learning_rate": 1.776905118942821e-06, + "loss": 0.1547, + "num_input_tokens_seen": 40716192, + "step": 60400 + }, + { + "epoch": 1.4757041995455988, + "grad_norm": 0.06091950461268425, + "learning_rate": 1.7768514238327244e-06, + "loss": 0.0813, + "num_input_tokens_seen": 40719520, + "step": 60405 + }, + { + "epoch": 1.475826350377446, + "grad_norm": 12.754447937011719, + "learning_rate": 1.7767977230731552e-06, + "loss": 0.0564, + "num_input_tokens_seen": 40722528, + "step": 60410 + }, + { + "epoch": 1.4759485012092932, + "grad_norm": 85.40321350097656, + "learning_rate": 1.776744016664504e-06, + "loss": 0.2374, + "num_input_tokens_seen": 40725984, + "step": 60415 + }, + { + "epoch": 1.4760706520411404, + "grad_norm": 0.15571066737174988, + "learning_rate": 1.7766903046071613e-06, + "loss": 0.0318, + "num_input_tokens_seen": 40729248, + "step": 60420 + }, + { + "epoch": 1.4761928028729876, + "grad_norm": 0.17578119039535522, + "learning_rate": 1.776636586901518e-06, + "loss": 0.0308, + "num_input_tokens_seen": 40732576, + "step": 60425 + }, + { + "epoch": 1.4763149537048348, + "grad_norm": 95.00015258789062, + "learning_rate": 1.7765828635479645e-06, + "loss": 0.0675, + "num_input_tokens_seen": 40735840, + "step": 60430 + }, + { + "epoch": 1.476437104536682, + "grad_norm": 9.245221138000488, + "learning_rate": 1.7765291345468913e-06, + "loss": 0.1742, + "num_input_tokens_seen": 40739104, + "step": 60435 + }, + { + "epoch": 1.476559255368529, + "grad_norm": 2.94415545463562, + "learning_rate": 1.7764753998986898e-06, + "loss": 0.0036, + "num_input_tokens_seen": 40742944, + "step": 60440 + }, + { + "epoch": 1.4766814062003761, + "grad_norm": 57.272377014160156, + "learning_rate": 1.77642165960375e-06, + "loss": 0.1498, + "num_input_tokens_seen": 40746144, + "step": 60445 + }, + { + "epoch": 1.4768035570322233, + "grad_norm": 0.19033876061439514, + "learning_rate": 1.7763679136624632e-06, + "loss": 0.042, + "num_input_tokens_seen": 40749280, + "step": 60450 + }, + { + "epoch": 1.4769257078640705, + "grad_norm": 0.36067670583724976, + "learning_rate": 1.77631416207522e-06, + "loss": 0.1468, + "num_input_tokens_seen": 40752608, + "step": 60455 + }, + { + "epoch": 1.4770478586959177, + "grad_norm": 17.70503807067871, + "learning_rate": 1.7762604048424117e-06, + "loss": 0.1179, + "num_input_tokens_seen": 40756192, + "step": 60460 + }, + { + "epoch": 1.477170009527765, + "grad_norm": 0.3026616871356964, + "learning_rate": 1.7762066419644286e-06, + "loss": 0.0883, + "num_input_tokens_seen": 40759648, + "step": 60465 + }, + { + "epoch": 1.477292160359612, + "grad_norm": 0.1730181723833084, + "learning_rate": 1.7761528734416621e-06, + "loss": 0.0364, + "num_input_tokens_seen": 40763232, + "step": 60470 + }, + { + "epoch": 1.4774143111914593, + "grad_norm": 19.374736785888672, + "learning_rate": 1.7760990992745033e-06, + "loss": 0.0974, + "num_input_tokens_seen": 40766496, + "step": 60475 + }, + { + "epoch": 1.4775364620233065, + "grad_norm": 16.91132926940918, + "learning_rate": 1.776045319463343e-06, + "loss": 0.1396, + "num_input_tokens_seen": 40769760, + "step": 60480 + }, + { + "epoch": 1.4776586128551537, + "grad_norm": 10.6881742477417, + "learning_rate": 1.7759915340085724e-06, + "loss": 0.1247, + "num_input_tokens_seen": 40773088, + "step": 60485 + }, + { + "epoch": 1.4777807636870008, + "grad_norm": 0.2255319356918335, + "learning_rate": 1.7759377429105826e-06, + "loss": 0.2395, + "num_input_tokens_seen": 40776736, + "step": 60490 + }, + { + "epoch": 1.4779029145188478, + "grad_norm": 0.22949527204036713, + "learning_rate": 1.775883946169765e-06, + "loss": 0.0034, + "num_input_tokens_seen": 40779936, + "step": 60495 + }, + { + "epoch": 1.478025065350695, + "grad_norm": 31.79056167602539, + "learning_rate": 1.7758301437865107e-06, + "loss": 0.0059, + "num_input_tokens_seen": 40783584, + "step": 60500 + }, + { + "epoch": 1.4781472161825422, + "grad_norm": 0.13012264668941498, + "learning_rate": 1.7757763357612108e-06, + "loss": 0.084, + "num_input_tokens_seen": 40786720, + "step": 60505 + }, + { + "epoch": 1.4782693670143894, + "grad_norm": 82.19654083251953, + "learning_rate": 1.7757225220942567e-06, + "loss": 0.0243, + "num_input_tokens_seen": 40790496, + "step": 60510 + }, + { + "epoch": 1.4783915178462366, + "grad_norm": 0.14999359846115112, + "learning_rate": 1.7756687027860396e-06, + "loss": 0.0969, + "num_input_tokens_seen": 40793888, + "step": 60515 + }, + { + "epoch": 1.4785136686780838, + "grad_norm": 0.06497511267662048, + "learning_rate": 1.7756148778369512e-06, + "loss": 0.0723, + "num_input_tokens_seen": 40797024, + "step": 60520 + }, + { + "epoch": 1.4786358195099307, + "grad_norm": 12.169816970825195, + "learning_rate": 1.775561047247383e-06, + "loss": 0.1222, + "num_input_tokens_seen": 40800288, + "step": 60525 + }, + { + "epoch": 1.478757970341778, + "grad_norm": 0.07355663180351257, + "learning_rate": 1.775507211017726e-06, + "loss": 0.219, + "num_input_tokens_seen": 40803424, + "step": 60530 + }, + { + "epoch": 1.478880121173625, + "grad_norm": 0.49550914764404297, + "learning_rate": 1.7754533691483721e-06, + "loss": 0.0009, + "num_input_tokens_seen": 40806880, + "step": 60535 + }, + { + "epoch": 1.4790022720054723, + "grad_norm": 2.6531922817230225, + "learning_rate": 1.7753995216397128e-06, + "loss": 0.0022, + "num_input_tokens_seen": 40810080, + "step": 60540 + }, + { + "epoch": 1.4791244228373195, + "grad_norm": 14.86800479888916, + "learning_rate": 1.7753456684921395e-06, + "loss": 0.1097, + "num_input_tokens_seen": 40813472, + "step": 60545 + }, + { + "epoch": 1.4792465736691667, + "grad_norm": 2.2587924003601074, + "learning_rate": 1.775291809706044e-06, + "loss": 0.0336, + "num_input_tokens_seen": 40817120, + "step": 60550 + }, + { + "epoch": 1.4793687245010139, + "grad_norm": 20.083477020263672, + "learning_rate": 1.7752379452818179e-06, + "loss": 0.0927, + "num_input_tokens_seen": 40820320, + "step": 60555 + }, + { + "epoch": 1.479490875332861, + "grad_norm": 0.12122409045696259, + "learning_rate": 1.7751840752198528e-06, + "loss": 0.0943, + "num_input_tokens_seen": 40823904, + "step": 60560 + }, + { + "epoch": 1.4796130261647082, + "grad_norm": 0.036234304308891296, + "learning_rate": 1.7751301995205408e-06, + "loss": 0.0115, + "num_input_tokens_seen": 40827552, + "step": 60565 + }, + { + "epoch": 1.4797351769965554, + "grad_norm": 0.3813953101634979, + "learning_rate": 1.7750763181842735e-06, + "loss": 0.0008, + "num_input_tokens_seen": 40831136, + "step": 60570 + }, + { + "epoch": 1.4798573278284026, + "grad_norm": 0.12160413712263107, + "learning_rate": 1.7750224312114428e-06, + "loss": 0.0518, + "num_input_tokens_seen": 40834592, + "step": 60575 + }, + { + "epoch": 1.4799794786602498, + "grad_norm": 0.6403422355651855, + "learning_rate": 1.7749685386024405e-06, + "loss": 0.0595, + "num_input_tokens_seen": 40837408, + "step": 60580 + }, + { + "epoch": 1.4801016294920968, + "grad_norm": 14.806707382202148, + "learning_rate": 1.7749146403576585e-06, + "loss": 0.1728, + "num_input_tokens_seen": 40840736, + "step": 60585 + }, + { + "epoch": 1.480223780323944, + "grad_norm": 0.1330418586730957, + "learning_rate": 1.7748607364774886e-06, + "loss": 0.135, + "num_input_tokens_seen": 40844384, + "step": 60590 + }, + { + "epoch": 1.4803459311557912, + "grad_norm": 0.299897164106369, + "learning_rate": 1.7748068269623234e-06, + "loss": 0.0297, + "num_input_tokens_seen": 40847712, + "step": 60595 + }, + { + "epoch": 1.4804680819876384, + "grad_norm": 12.508942604064941, + "learning_rate": 1.7747529118125542e-06, + "loss": 0.3403, + "num_input_tokens_seen": 40851040, + "step": 60600 + }, + { + "epoch": 1.4805902328194855, + "grad_norm": 0.423649400472641, + "learning_rate": 1.7746989910285738e-06, + "loss": 0.0287, + "num_input_tokens_seen": 40854560, + "step": 60605 + }, + { + "epoch": 1.4807123836513327, + "grad_norm": 18.635122299194336, + "learning_rate": 1.7746450646107736e-06, + "loss": 0.0652, + "num_input_tokens_seen": 40857760, + "step": 60610 + }, + { + "epoch": 1.4808345344831797, + "grad_norm": 0.6192677021026611, + "learning_rate": 1.7745911325595463e-06, + "loss": 0.0025, + "num_input_tokens_seen": 40861280, + "step": 60615 + }, + { + "epoch": 1.4809566853150269, + "grad_norm": 58.895484924316406, + "learning_rate": 1.7745371948752838e-06, + "loss": 0.0533, + "num_input_tokens_seen": 40864672, + "step": 60620 + }, + { + "epoch": 1.481078836146874, + "grad_norm": 47.88134765625, + "learning_rate": 1.774483251558379e-06, + "loss": 0.1157, + "num_input_tokens_seen": 40868192, + "step": 60625 + }, + { + "epoch": 1.4812009869787213, + "grad_norm": 0.38593101501464844, + "learning_rate": 1.7744293026092233e-06, + "loss": 0.1239, + "num_input_tokens_seen": 40871648, + "step": 60630 + }, + { + "epoch": 1.4813231378105685, + "grad_norm": 8.302868843078613, + "learning_rate": 1.7743753480282094e-06, + "loss": 0.123, + "num_input_tokens_seen": 40874976, + "step": 60635 + }, + { + "epoch": 1.4814452886424156, + "grad_norm": 0.47456851601600647, + "learning_rate": 1.7743213878157297e-06, + "loss": 0.0655, + "num_input_tokens_seen": 40878752, + "step": 60640 + }, + { + "epoch": 1.4815674394742628, + "grad_norm": 0.27059075236320496, + "learning_rate": 1.7742674219721768e-06, + "loss": 0.0573, + "num_input_tokens_seen": 40882400, + "step": 60645 + }, + { + "epoch": 1.48168959030611, + "grad_norm": 0.7263038754463196, + "learning_rate": 1.7742134504979425e-06, + "loss": 0.0266, + "num_input_tokens_seen": 40885984, + "step": 60650 + }, + { + "epoch": 1.4818117411379572, + "grad_norm": 0.49012497067451477, + "learning_rate": 1.77415947339342e-06, + "loss": 0.0397, + "num_input_tokens_seen": 40889376, + "step": 60655 + }, + { + "epoch": 1.4819338919698044, + "grad_norm": 18.042882919311523, + "learning_rate": 1.774105490659002e-06, + "loss": 0.0456, + "num_input_tokens_seen": 40892512, + "step": 60660 + }, + { + "epoch": 1.4820560428016516, + "grad_norm": 0.13812461495399475, + "learning_rate": 1.77405150229508e-06, + "loss": 0.1718, + "num_input_tokens_seen": 40895520, + "step": 60665 + }, + { + "epoch": 1.4821781936334986, + "grad_norm": 0.17089881002902985, + "learning_rate": 1.7739975083020474e-06, + "loss": 0.0013, + "num_input_tokens_seen": 40898592, + "step": 60670 + }, + { + "epoch": 1.4823003444653458, + "grad_norm": 0.5600114464759827, + "learning_rate": 1.773943508680297e-06, + "loss": 0.1473, + "num_input_tokens_seen": 40901728, + "step": 60675 + }, + { + "epoch": 1.482422495297193, + "grad_norm": 413.35546875, + "learning_rate": 1.7738895034302212e-06, + "loss": 0.0816, + "num_input_tokens_seen": 40905184, + "step": 60680 + }, + { + "epoch": 1.4825446461290401, + "grad_norm": 13.743282318115234, + "learning_rate": 1.7738354925522128e-06, + "loss": 0.1157, + "num_input_tokens_seen": 40908960, + "step": 60685 + }, + { + "epoch": 1.4826667969608873, + "grad_norm": 0.12234126776456833, + "learning_rate": 1.7737814760466643e-06, + "loss": 0.0656, + "num_input_tokens_seen": 40912096, + "step": 60690 + }, + { + "epoch": 1.4827889477927345, + "grad_norm": 32.00730895996094, + "learning_rate": 1.773727453913969e-06, + "loss": 0.1437, + "num_input_tokens_seen": 40915424, + "step": 60695 + }, + { + "epoch": 1.4829110986245817, + "grad_norm": 23.14132308959961, + "learning_rate": 1.7736734261545196e-06, + "loss": 0.0724, + "num_input_tokens_seen": 40919008, + "step": 60700 + }, + { + "epoch": 1.4830332494564287, + "grad_norm": 123.06578826904297, + "learning_rate": 1.773619392768709e-06, + "loss": 0.1297, + "num_input_tokens_seen": 40922336, + "step": 60705 + }, + { + "epoch": 1.4831554002882759, + "grad_norm": 19.842958450317383, + "learning_rate": 1.7735653537569299e-06, + "loss": 0.128, + "num_input_tokens_seen": 40925792, + "step": 60710 + }, + { + "epoch": 1.483277551120123, + "grad_norm": 96.30461883544922, + "learning_rate": 1.7735113091195755e-06, + "loss": 0.1701, + "num_input_tokens_seen": 40928672, + "step": 60715 + }, + { + "epoch": 1.4833997019519702, + "grad_norm": 27.956899642944336, + "learning_rate": 1.773457258857039e-06, + "loss": 0.1278, + "num_input_tokens_seen": 40932256, + "step": 60720 + }, + { + "epoch": 1.4835218527838174, + "grad_norm": 17.01154327392578, + "learning_rate": 1.773403202969713e-06, + "loss": 0.0683, + "num_input_tokens_seen": 40935584, + "step": 60725 + }, + { + "epoch": 1.4836440036156646, + "grad_norm": 12.74868106842041, + "learning_rate": 1.773349141457991e-06, + "loss": 0.2014, + "num_input_tokens_seen": 40938784, + "step": 60730 + }, + { + "epoch": 1.4837661544475118, + "grad_norm": 2.696553945541382, + "learning_rate": 1.7732950743222661e-06, + "loss": 0.0533, + "num_input_tokens_seen": 40942368, + "step": 60735 + }, + { + "epoch": 1.483888305279359, + "grad_norm": 17.248987197875977, + "learning_rate": 1.7732410015629315e-06, + "loss": 0.1076, + "num_input_tokens_seen": 40945440, + "step": 60740 + }, + { + "epoch": 1.4840104561112062, + "grad_norm": 2.0968856811523438, + "learning_rate": 1.77318692318038e-06, + "loss": 0.0818, + "num_input_tokens_seen": 40948512, + "step": 60745 + }, + { + "epoch": 1.4841326069430534, + "grad_norm": 0.5884655117988586, + "learning_rate": 1.7731328391750055e-06, + "loss": 0.04, + "num_input_tokens_seen": 40951840, + "step": 60750 + }, + { + "epoch": 1.4842547577749006, + "grad_norm": 15.215673446655273, + "learning_rate": 1.773078749547201e-06, + "loss": 0.0523, + "num_input_tokens_seen": 40955168, + "step": 60755 + }, + { + "epoch": 1.4843769086067475, + "grad_norm": 190.5538330078125, + "learning_rate": 1.77302465429736e-06, + "loss": 0.051, + "num_input_tokens_seen": 40958944, + "step": 60760 + }, + { + "epoch": 1.4844990594385947, + "grad_norm": 10.600131034851074, + "learning_rate": 1.7729705534258757e-06, + "loss": 0.0605, + "num_input_tokens_seen": 40962336, + "step": 60765 + }, + { + "epoch": 1.484621210270442, + "grad_norm": 0.3233378827571869, + "learning_rate": 1.7729164469331418e-06, + "loss": 0.0227, + "num_input_tokens_seen": 40966112, + "step": 60770 + }, + { + "epoch": 1.484743361102289, + "grad_norm": 0.6192806959152222, + "learning_rate": 1.7728623348195515e-06, + "loss": 0.0049, + "num_input_tokens_seen": 40969184, + "step": 60775 + }, + { + "epoch": 1.4848655119341363, + "grad_norm": 10.349754333496094, + "learning_rate": 1.7728082170854983e-06, + "loss": 0.1831, + "num_input_tokens_seen": 40972768, + "step": 60780 + }, + { + "epoch": 1.4849876627659835, + "grad_norm": 17.2828426361084, + "learning_rate": 1.772754093731376e-06, + "loss": 0.073, + "num_input_tokens_seen": 40976672, + "step": 60785 + }, + { + "epoch": 1.4851098135978307, + "grad_norm": 0.6874222755432129, + "learning_rate": 1.772699964757578e-06, + "loss": 0.0274, + "num_input_tokens_seen": 40980192, + "step": 60790 + }, + { + "epoch": 1.4852319644296776, + "grad_norm": 0.15286195278167725, + "learning_rate": 1.7726458301644982e-06, + "loss": 0.0567, + "num_input_tokens_seen": 40983712, + "step": 60795 + }, + { + "epoch": 1.4853541152615248, + "grad_norm": 14.385998725891113, + "learning_rate": 1.7725916899525298e-06, + "loss": 0.0805, + "num_input_tokens_seen": 40987104, + "step": 60800 + }, + { + "epoch": 1.485476266093372, + "grad_norm": 0.2787397503852844, + "learning_rate": 1.7725375441220672e-06, + "loss": 0.0451, + "num_input_tokens_seen": 40990816, + "step": 60805 + }, + { + "epoch": 1.4855984169252192, + "grad_norm": 0.04248502850532532, + "learning_rate": 1.7724833926735037e-06, + "loss": 0.1048, + "num_input_tokens_seen": 40994336, + "step": 60810 + }, + { + "epoch": 1.4857205677570664, + "grad_norm": 0.4895806610584259, + "learning_rate": 1.772429235607233e-06, + "loss": 0.0065, + "num_input_tokens_seen": 40998240, + "step": 60815 + }, + { + "epoch": 1.4858427185889136, + "grad_norm": 18.34246063232422, + "learning_rate": 1.7723750729236492e-06, + "loss": 0.08, + "num_input_tokens_seen": 41001632, + "step": 60820 + }, + { + "epoch": 1.4859648694207608, + "grad_norm": 0.18905378878116608, + "learning_rate": 1.7723209046231462e-06, + "loss": 0.0097, + "num_input_tokens_seen": 41005344, + "step": 60825 + }, + { + "epoch": 1.486087020252608, + "grad_norm": 0.3081001341342926, + "learning_rate": 1.772266730706118e-06, + "loss": 0.1109, + "num_input_tokens_seen": 41008736, + "step": 60830 + }, + { + "epoch": 1.4862091710844552, + "grad_norm": 70.08404541015625, + "learning_rate": 1.772212551172958e-06, + "loss": 0.0916, + "num_input_tokens_seen": 41012064, + "step": 60835 + }, + { + "epoch": 1.4863313219163024, + "grad_norm": 0.1584898680448532, + "learning_rate": 1.772158366024061e-06, + "loss": 0.1368, + "num_input_tokens_seen": 41015264, + "step": 60840 + }, + { + "epoch": 1.4864534727481495, + "grad_norm": 0.09998618066310883, + "learning_rate": 1.7721041752598205e-06, + "loss": 0.1338, + "num_input_tokens_seen": 41018272, + "step": 60845 + }, + { + "epoch": 1.4865756235799965, + "grad_norm": 0.2783723771572113, + "learning_rate": 1.7720499788806307e-06, + "loss": 0.0726, + "num_input_tokens_seen": 41021472, + "step": 60850 + }, + { + "epoch": 1.4866977744118437, + "grad_norm": 0.23562151193618774, + "learning_rate": 1.771995776886886e-06, + "loss": 0.1353, + "num_input_tokens_seen": 41024800, + "step": 60855 + }, + { + "epoch": 1.4868199252436909, + "grad_norm": 0.4206966459751129, + "learning_rate": 1.7719415692789803e-06, + "loss": 0.0378, + "num_input_tokens_seen": 41028256, + "step": 60860 + }, + { + "epoch": 1.486942076075538, + "grad_norm": 1.2113614082336426, + "learning_rate": 1.771887356057308e-06, + "loss": 0.0713, + "num_input_tokens_seen": 41031840, + "step": 60865 + }, + { + "epoch": 1.4870642269073853, + "grad_norm": 15.251075744628906, + "learning_rate": 1.7718331372222629e-06, + "loss": 0.1079, + "num_input_tokens_seen": 41035936, + "step": 60870 + }, + { + "epoch": 1.4871863777392325, + "grad_norm": 0.31838202476501465, + "learning_rate": 1.7717789127742399e-06, + "loss": 0.1966, + "num_input_tokens_seen": 41039520, + "step": 60875 + }, + { + "epoch": 1.4873085285710796, + "grad_norm": 0.06436757743358612, + "learning_rate": 1.771724682713633e-06, + "loss": 0.0857, + "num_input_tokens_seen": 41042912, + "step": 60880 + }, + { + "epoch": 1.4874306794029266, + "grad_norm": 0.29811179637908936, + "learning_rate": 1.7716704470408365e-06, + "loss": 0.1083, + "num_input_tokens_seen": 41046112, + "step": 60885 + }, + { + "epoch": 1.4875528302347738, + "grad_norm": 16.720657348632812, + "learning_rate": 1.7716162057562451e-06, + "loss": 0.0346, + "num_input_tokens_seen": 41049824, + "step": 60890 + }, + { + "epoch": 1.487674981066621, + "grad_norm": 2.4709200859069824, + "learning_rate": 1.771561958860253e-06, + "loss": 0.0555, + "num_input_tokens_seen": 41053664, + "step": 60895 + }, + { + "epoch": 1.4877971318984682, + "grad_norm": 0.22559817135334015, + "learning_rate": 1.771507706353255e-06, + "loss": 0.0507, + "num_input_tokens_seen": 41057184, + "step": 60900 + }, + { + "epoch": 1.4879192827303154, + "grad_norm": 1.0803998708724976, + "learning_rate": 1.7714534482356454e-06, + "loss": 0.002, + "num_input_tokens_seen": 41060256, + "step": 60905 + }, + { + "epoch": 1.4880414335621626, + "grad_norm": 10.669313430786133, + "learning_rate": 1.7713991845078186e-06, + "loss": 0.1203, + "num_input_tokens_seen": 41063520, + "step": 60910 + }, + { + "epoch": 1.4881635843940098, + "grad_norm": 0.5857841968536377, + "learning_rate": 1.7713449151701698e-06, + "loss": 0.0725, + "num_input_tokens_seen": 41066720, + "step": 60915 + }, + { + "epoch": 1.488285735225857, + "grad_norm": 0.047439418733119965, + "learning_rate": 1.7712906402230933e-06, + "loss": 0.0618, + "num_input_tokens_seen": 41069920, + "step": 60920 + }, + { + "epoch": 1.4884078860577041, + "grad_norm": 0.353522926568985, + "learning_rate": 1.7712363596669835e-06, + "loss": 0.0585, + "num_input_tokens_seen": 41073120, + "step": 60925 + }, + { + "epoch": 1.4885300368895513, + "grad_norm": 0.3170434236526489, + "learning_rate": 1.7711820735022354e-06, + "loss": 0.0559, + "num_input_tokens_seen": 41076256, + "step": 60930 + }, + { + "epoch": 1.4886521877213985, + "grad_norm": 0.6093167066574097, + "learning_rate": 1.7711277817292443e-06, + "loss": 0.0344, + "num_input_tokens_seen": 41079392, + "step": 60935 + }, + { + "epoch": 1.4887743385532455, + "grad_norm": 0.14489717781543732, + "learning_rate": 1.7710734843484044e-06, + "loss": 0.0011, + "num_input_tokens_seen": 41083232, + "step": 60940 + }, + { + "epoch": 1.4888964893850927, + "grad_norm": 0.2810435891151428, + "learning_rate": 1.7710191813601102e-06, + "loss": 0.0877, + "num_input_tokens_seen": 41086560, + "step": 60945 + }, + { + "epoch": 1.4890186402169399, + "grad_norm": 0.062317151576280594, + "learning_rate": 1.770964872764758e-06, + "loss": 0.0294, + "num_input_tokens_seen": 41089888, + "step": 60950 + }, + { + "epoch": 1.489140791048787, + "grad_norm": 0.14095567166805267, + "learning_rate": 1.770910558562741e-06, + "loss": 0.0886, + "num_input_tokens_seen": 41093280, + "step": 60955 + }, + { + "epoch": 1.4892629418806342, + "grad_norm": 31.579681396484375, + "learning_rate": 1.7708562387544558e-06, + "loss": 0.0311, + "num_input_tokens_seen": 41096736, + "step": 60960 + }, + { + "epoch": 1.4893850927124814, + "grad_norm": 19.332107543945312, + "learning_rate": 1.7708019133402962e-06, + "loss": 0.0838, + "num_input_tokens_seen": 41100000, + "step": 60965 + }, + { + "epoch": 1.4895072435443286, + "grad_norm": 0.7163593769073486, + "learning_rate": 1.7707475823206582e-06, + "loss": 0.0983, + "num_input_tokens_seen": 41103456, + "step": 60970 + }, + { + "epoch": 1.4896293943761756, + "grad_norm": 17.53666877746582, + "learning_rate": 1.7706932456959362e-06, + "loss": 0.0961, + "num_input_tokens_seen": 41106720, + "step": 60975 + }, + { + "epoch": 1.4897515452080228, + "grad_norm": 0.1998463273048401, + "learning_rate": 1.7706389034665257e-06, + "loss": 0.0471, + "num_input_tokens_seen": 41110560, + "step": 60980 + }, + { + "epoch": 1.48987369603987, + "grad_norm": 0.6920112371444702, + "learning_rate": 1.7705845556328217e-06, + "loss": 0.0978, + "num_input_tokens_seen": 41114016, + "step": 60985 + }, + { + "epoch": 1.4899958468717172, + "grad_norm": 16.81520652770996, + "learning_rate": 1.7705302021952198e-06, + "loss": 0.2061, + "num_input_tokens_seen": 41117152, + "step": 60990 + }, + { + "epoch": 1.4901179977035643, + "grad_norm": 90.10795593261719, + "learning_rate": 1.7704758431541146e-06, + "loss": 0.0534, + "num_input_tokens_seen": 41120416, + "step": 60995 + }, + { + "epoch": 1.4902401485354115, + "grad_norm": 0.15558181703090668, + "learning_rate": 1.7704214785099024e-06, + "loss": 0.0008, + "num_input_tokens_seen": 41123872, + "step": 61000 + }, + { + "epoch": 1.4903622993672587, + "grad_norm": 25.905107498168945, + "learning_rate": 1.7703671082629776e-06, + "loss": 0.2285, + "num_input_tokens_seen": 41127520, + "step": 61005 + }, + { + "epoch": 1.490484450199106, + "grad_norm": 0.2514127492904663, + "learning_rate": 1.7703127324137358e-06, + "loss": 0.0022, + "num_input_tokens_seen": 41131296, + "step": 61010 + }, + { + "epoch": 1.490606601030953, + "grad_norm": 13.766162872314453, + "learning_rate": 1.7702583509625732e-06, + "loss": 0.2344, + "num_input_tokens_seen": 41134304, + "step": 61015 + }, + { + "epoch": 1.4907287518628003, + "grad_norm": 0.7958281636238098, + "learning_rate": 1.7702039639098842e-06, + "loss": 0.0826, + "num_input_tokens_seen": 41137568, + "step": 61020 + }, + { + "epoch": 1.4908509026946475, + "grad_norm": 8.671177864074707, + "learning_rate": 1.770149571256065e-06, + "loss": 0.1647, + "num_input_tokens_seen": 41140960, + "step": 61025 + }, + { + "epoch": 1.4909730535264945, + "grad_norm": 21.409822463989258, + "learning_rate": 1.7700951730015113e-06, + "loss": 0.1514, + "num_input_tokens_seen": 41144288, + "step": 61030 + }, + { + "epoch": 1.4910952043583416, + "grad_norm": 11.56128215789795, + "learning_rate": 1.770040769146618e-06, + "loss": 0.068, + "num_input_tokens_seen": 41148000, + "step": 61035 + }, + { + "epoch": 1.4912173551901888, + "grad_norm": 0.8195192813873291, + "learning_rate": 1.769986359691781e-06, + "loss": 0.0308, + "num_input_tokens_seen": 41151584, + "step": 61040 + }, + { + "epoch": 1.491339506022036, + "grad_norm": 0.4487052857875824, + "learning_rate": 1.7699319446373963e-06, + "loss": 0.16, + "num_input_tokens_seen": 41154848, + "step": 61045 + }, + { + "epoch": 1.4914616568538832, + "grad_norm": 43.25626754760742, + "learning_rate": 1.7698775239838596e-06, + "loss": 0.1018, + "num_input_tokens_seen": 41158304, + "step": 61050 + }, + { + "epoch": 1.4915838076857304, + "grad_norm": 0.4304538071155548, + "learning_rate": 1.769823097731566e-06, + "loss": 0.1217, + "num_input_tokens_seen": 41161696, + "step": 61055 + }, + { + "epoch": 1.4917059585175774, + "grad_norm": 2.894843578338623, + "learning_rate": 1.769768665880912e-06, + "loss": 0.1014, + "num_input_tokens_seen": 41165792, + "step": 61060 + }, + { + "epoch": 1.4918281093494246, + "grad_norm": 135.68406677246094, + "learning_rate": 1.7697142284322931e-06, + "loss": 0.1832, + "num_input_tokens_seen": 41169120, + "step": 61065 + }, + { + "epoch": 1.4919502601812717, + "grad_norm": 72.74583435058594, + "learning_rate": 1.7696597853861057e-06, + "loss": 0.1397, + "num_input_tokens_seen": 41172192, + "step": 61070 + }, + { + "epoch": 1.492072411013119, + "grad_norm": 0.8611149787902832, + "learning_rate": 1.769605336742745e-06, + "loss": 0.0759, + "num_input_tokens_seen": 41175584, + "step": 61075 + }, + { + "epoch": 1.4921945618449661, + "grad_norm": 0.2745153605937958, + "learning_rate": 1.7695508825026074e-06, + "loss": 0.0608, + "num_input_tokens_seen": 41178656, + "step": 61080 + }, + { + "epoch": 1.4923167126768133, + "grad_norm": 24.806541442871094, + "learning_rate": 1.7694964226660884e-06, + "loss": 0.0982, + "num_input_tokens_seen": 41181792, + "step": 61085 + }, + { + "epoch": 1.4924388635086605, + "grad_norm": 13.456315040588379, + "learning_rate": 1.769441957233585e-06, + "loss": 0.0757, + "num_input_tokens_seen": 41184864, + "step": 61090 + }, + { + "epoch": 1.4925610143405077, + "grad_norm": 29.400386810302734, + "learning_rate": 1.7693874862054928e-06, + "loss": 0.0755, + "num_input_tokens_seen": 41188640, + "step": 61095 + }, + { + "epoch": 1.4926831651723549, + "grad_norm": 0.4075230062007904, + "learning_rate": 1.7693330095822074e-06, + "loss": 0.0749, + "num_input_tokens_seen": 41192096, + "step": 61100 + }, + { + "epoch": 1.492805316004202, + "grad_norm": 0.12924809753894806, + "learning_rate": 1.7692785273641256e-06, + "loss": 0.0377, + "num_input_tokens_seen": 41195104, + "step": 61105 + }, + { + "epoch": 1.4929274668360493, + "grad_norm": 0.38088515400886536, + "learning_rate": 1.7692240395516435e-06, + "loss": 0.0257, + "num_input_tokens_seen": 41198816, + "step": 61110 + }, + { + "epoch": 1.4930496176678965, + "grad_norm": 0.2689894735813141, + "learning_rate": 1.7691695461451573e-06, + "loss": 0.0178, + "num_input_tokens_seen": 41202144, + "step": 61115 + }, + { + "epoch": 1.4931717684997434, + "grad_norm": 23.341785430908203, + "learning_rate": 1.769115047145063e-06, + "loss": 0.1223, + "num_input_tokens_seen": 41205344, + "step": 61120 + }, + { + "epoch": 1.4932939193315906, + "grad_norm": 0.17626403272151947, + "learning_rate": 1.7690605425517578e-06, + "loss": 0.1508, + "num_input_tokens_seen": 41208544, + "step": 61125 + }, + { + "epoch": 1.4934160701634378, + "grad_norm": 0.6624254584312439, + "learning_rate": 1.7690060323656368e-06, + "loss": 0.026, + "num_input_tokens_seen": 41211552, + "step": 61130 + }, + { + "epoch": 1.493538220995285, + "grad_norm": 53.69667053222656, + "learning_rate": 1.7689515165870974e-06, + "loss": 0.0044, + "num_input_tokens_seen": 41214944, + "step": 61135 + }, + { + "epoch": 1.4936603718271322, + "grad_norm": 25.466035842895508, + "learning_rate": 1.7688969952165358e-06, + "loss": 0.0931, + "num_input_tokens_seen": 41218400, + "step": 61140 + }, + { + "epoch": 1.4937825226589794, + "grad_norm": 71.1301040649414, + "learning_rate": 1.7688424682543483e-06, + "loss": 0.0658, + "num_input_tokens_seen": 41221472, + "step": 61145 + }, + { + "epoch": 1.4939046734908263, + "grad_norm": 76.20751190185547, + "learning_rate": 1.768787935700932e-06, + "loss": 0.0199, + "num_input_tokens_seen": 41225184, + "step": 61150 + }, + { + "epoch": 1.4940268243226735, + "grad_norm": 1.1861448287963867, + "learning_rate": 1.7687333975566828e-06, + "loss": 0.0259, + "num_input_tokens_seen": 41228448, + "step": 61155 + }, + { + "epoch": 1.4941489751545207, + "grad_norm": 0.051383551210165024, + "learning_rate": 1.7686788538219971e-06, + "loss": 0.091, + "num_input_tokens_seen": 41231904, + "step": 61160 + }, + { + "epoch": 1.494271125986368, + "grad_norm": 0.0947948694229126, + "learning_rate": 1.7686243044972727e-06, + "loss": 0.0311, + "num_input_tokens_seen": 41234912, + "step": 61165 + }, + { + "epoch": 1.494393276818215, + "grad_norm": 7.97659158706665, + "learning_rate": 1.7685697495829054e-06, + "loss": 0.2079, + "num_input_tokens_seen": 41237856, + "step": 61170 + }, + { + "epoch": 1.4945154276500623, + "grad_norm": 28.53188705444336, + "learning_rate": 1.768515189079292e-06, + "loss": 0.1181, + "num_input_tokens_seen": 41241056, + "step": 61175 + }, + { + "epoch": 1.4946375784819095, + "grad_norm": 17.43120574951172, + "learning_rate": 1.7684606229868294e-06, + "loss": 0.2076, + "num_input_tokens_seen": 41244128, + "step": 61180 + }, + { + "epoch": 1.4947597293137567, + "grad_norm": 0.34168973565101624, + "learning_rate": 1.7684060513059147e-06, + "loss": 0.0468, + "num_input_tokens_seen": 41247328, + "step": 61185 + }, + { + "epoch": 1.4948818801456039, + "grad_norm": 0.08489463478326797, + "learning_rate": 1.7683514740369442e-06, + "loss": 0.0097, + "num_input_tokens_seen": 41251040, + "step": 61190 + }, + { + "epoch": 1.495004030977451, + "grad_norm": 0.22557464241981506, + "learning_rate": 1.7682968911803157e-06, + "loss": 0.0571, + "num_input_tokens_seen": 41254240, + "step": 61195 + }, + { + "epoch": 1.4951261818092982, + "grad_norm": 10.98664665222168, + "learning_rate": 1.768242302736425e-06, + "loss": 0.0489, + "num_input_tokens_seen": 41257376, + "step": 61200 + }, + { + "epoch": 1.4952483326411452, + "grad_norm": 0.31744417548179626, + "learning_rate": 1.7681877087056699e-06, + "loss": 0.1054, + "num_input_tokens_seen": 41260832, + "step": 61205 + }, + { + "epoch": 1.4953704834729924, + "grad_norm": 18.24903106689453, + "learning_rate": 1.768133109088447e-06, + "loss": 0.1017, + "num_input_tokens_seen": 41263904, + "step": 61210 + }, + { + "epoch": 1.4954926343048396, + "grad_norm": 1.512769341468811, + "learning_rate": 1.7680785038851536e-06, + "loss": 0.1413, + "num_input_tokens_seen": 41267168, + "step": 61215 + }, + { + "epoch": 1.4956147851366868, + "grad_norm": 12.158363342285156, + "learning_rate": 1.768023893096187e-06, + "loss": 0.073, + "num_input_tokens_seen": 41270496, + "step": 61220 + }, + { + "epoch": 1.495736935968534, + "grad_norm": 108.42285919189453, + "learning_rate": 1.7679692767219437e-06, + "loss": 0.1188, + "num_input_tokens_seen": 41273824, + "step": 61225 + }, + { + "epoch": 1.4958590868003812, + "grad_norm": 0.15408147871494293, + "learning_rate": 1.7679146547628214e-06, + "loss": 0.0501, + "num_input_tokens_seen": 41277408, + "step": 61230 + }, + { + "epoch": 1.4959812376322283, + "grad_norm": 2.2216410636901855, + "learning_rate": 1.7678600272192172e-06, + "loss": 0.0318, + "num_input_tokens_seen": 41280864, + "step": 61235 + }, + { + "epoch": 1.4961033884640753, + "grad_norm": 0.5559858083724976, + "learning_rate": 1.7678053940915284e-06, + "loss": 0.0297, + "num_input_tokens_seen": 41284256, + "step": 61240 + }, + { + "epoch": 1.4962255392959225, + "grad_norm": 0.15119442343711853, + "learning_rate": 1.767750755380152e-06, + "loss": 0.0066, + "num_input_tokens_seen": 41287264, + "step": 61245 + }, + { + "epoch": 1.4963476901277697, + "grad_norm": 0.7975942492485046, + "learning_rate": 1.767696111085486e-06, + "loss": 0.0994, + "num_input_tokens_seen": 41290400, + "step": 61250 + }, + { + "epoch": 1.4964698409596169, + "grad_norm": 15.413094520568848, + "learning_rate": 1.767641461207927e-06, + "loss": 0.1207, + "num_input_tokens_seen": 41293664, + "step": 61255 + }, + { + "epoch": 1.496591991791464, + "grad_norm": 1.0722957849502563, + "learning_rate": 1.7675868057478733e-06, + "loss": 0.0529, + "num_input_tokens_seen": 41297184, + "step": 61260 + }, + { + "epoch": 1.4967141426233113, + "grad_norm": 0.5817152857780457, + "learning_rate": 1.7675321447057217e-06, + "loss": 0.0674, + "num_input_tokens_seen": 41300640, + "step": 61265 + }, + { + "epoch": 1.4968362934551585, + "grad_norm": 15.53848934173584, + "learning_rate": 1.7674774780818698e-06, + "loss": 0.0671, + "num_input_tokens_seen": 41303648, + "step": 61270 + }, + { + "epoch": 1.4969584442870056, + "grad_norm": 0.3177522122859955, + "learning_rate": 1.7674228058767151e-06, + "loss": 0.0012, + "num_input_tokens_seen": 41307232, + "step": 61275 + }, + { + "epoch": 1.4970805951188528, + "grad_norm": 44.76844024658203, + "learning_rate": 1.7673681280906556e-06, + "loss": 0.1176, + "num_input_tokens_seen": 41310432, + "step": 61280 + }, + { + "epoch": 1.4972027459507, + "grad_norm": 0.697921633720398, + "learning_rate": 1.7673134447240887e-06, + "loss": 0.0818, + "num_input_tokens_seen": 41314016, + "step": 61285 + }, + { + "epoch": 1.4973248967825472, + "grad_norm": 0.05985622480511665, + "learning_rate": 1.7672587557774117e-06, + "loss": 0.0908, + "num_input_tokens_seen": 41317152, + "step": 61290 + }, + { + "epoch": 1.4974470476143942, + "grad_norm": 27.428375244140625, + "learning_rate": 1.767204061251023e-06, + "loss": 0.163, + "num_input_tokens_seen": 41320288, + "step": 61295 + }, + { + "epoch": 1.4975691984462414, + "grad_norm": 108.94695281982422, + "learning_rate": 1.7671493611453202e-06, + "loss": 0.0073, + "num_input_tokens_seen": 41323488, + "step": 61300 + }, + { + "epoch": 1.4976913492780886, + "grad_norm": 47.66750717163086, + "learning_rate": 1.7670946554607006e-06, + "loss": 0.006, + "num_input_tokens_seen": 41327136, + "step": 61305 + }, + { + "epoch": 1.4978135001099357, + "grad_norm": 0.47193843126296997, + "learning_rate": 1.7670399441975622e-06, + "loss": 0.1182, + "num_input_tokens_seen": 41330784, + "step": 61310 + }, + { + "epoch": 1.497935650941783, + "grad_norm": 69.85272979736328, + "learning_rate": 1.766985227356303e-06, + "loss": 0.1859, + "num_input_tokens_seen": 41334112, + "step": 61315 + }, + { + "epoch": 1.4980578017736301, + "grad_norm": 1.0827215909957886, + "learning_rate": 1.766930504937321e-06, + "loss": 0.0557, + "num_input_tokens_seen": 41337696, + "step": 61320 + }, + { + "epoch": 1.4981799526054773, + "grad_norm": 0.1586698740720749, + "learning_rate": 1.7668757769410144e-06, + "loss": 0.178, + "num_input_tokens_seen": 41341472, + "step": 61325 + }, + { + "epoch": 1.4983021034373243, + "grad_norm": 2.0001184940338135, + "learning_rate": 1.7668210433677808e-06, + "loss": 0.0019, + "num_input_tokens_seen": 41345248, + "step": 61330 + }, + { + "epoch": 1.4984242542691715, + "grad_norm": 53.12262725830078, + "learning_rate": 1.7667663042180182e-06, + "loss": 0.0774, + "num_input_tokens_seen": 41348320, + "step": 61335 + }, + { + "epoch": 1.4985464051010187, + "grad_norm": 0.2784538269042969, + "learning_rate": 1.766711559492125e-06, + "loss": 0.0294, + "num_input_tokens_seen": 41351776, + "step": 61340 + }, + { + "epoch": 1.4986685559328659, + "grad_norm": 0.18959470093250275, + "learning_rate": 1.7666568091904989e-06, + "loss": 0.1105, + "num_input_tokens_seen": 41355104, + "step": 61345 + }, + { + "epoch": 1.498790706764713, + "grad_norm": 0.5925389528274536, + "learning_rate": 1.7666020533135382e-06, + "loss": 0.0619, + "num_input_tokens_seen": 41358240, + "step": 61350 + }, + { + "epoch": 1.4989128575965602, + "grad_norm": 12.532886505126953, + "learning_rate": 1.7665472918616412e-06, + "loss": 0.1805, + "num_input_tokens_seen": 41361504, + "step": 61355 + }, + { + "epoch": 1.4990350084284074, + "grad_norm": 0.5867086052894592, + "learning_rate": 1.7664925248352062e-06, + "loss": 0.0039, + "num_input_tokens_seen": 41364960, + "step": 61360 + }, + { + "epoch": 1.4991571592602546, + "grad_norm": 37.33353805541992, + "learning_rate": 1.7664377522346312e-06, + "loss": 0.1081, + "num_input_tokens_seen": 41368224, + "step": 61365 + }, + { + "epoch": 1.4992793100921018, + "grad_norm": 0.01031525433063507, + "learning_rate": 1.766382974060315e-06, + "loss": 0.0004, + "num_input_tokens_seen": 41371424, + "step": 61370 + }, + { + "epoch": 1.499401460923949, + "grad_norm": 1.8833508491516113, + "learning_rate": 1.7663281903126557e-06, + "loss": 0.036, + "num_input_tokens_seen": 41374496, + "step": 61375 + }, + { + "epoch": 1.4995236117557962, + "grad_norm": 0.07677330821752548, + "learning_rate": 1.7662734009920516e-06, + "loss": 0.12, + "num_input_tokens_seen": 41377888, + "step": 61380 + }, + { + "epoch": 1.4996457625876431, + "grad_norm": 12.478357315063477, + "learning_rate": 1.7662186060989011e-06, + "loss": 0.1095, + "num_input_tokens_seen": 41381152, + "step": 61385 + }, + { + "epoch": 1.4997679134194903, + "grad_norm": 0.14680707454681396, + "learning_rate": 1.7661638056336031e-06, + "loss": 0.1075, + "num_input_tokens_seen": 41384544, + "step": 61390 + }, + { + "epoch": 1.4998900642513375, + "grad_norm": 0.17693321406841278, + "learning_rate": 1.7661089995965556e-06, + "loss": 0.0568, + "num_input_tokens_seen": 41387872, + "step": 61395 + }, + { + "epoch": 1.5000122150831847, + "grad_norm": 55.276275634765625, + "learning_rate": 1.7660541879881574e-06, + "loss": 0.0425, + "num_input_tokens_seen": 41391200, + "step": 61400 + }, + { + "epoch": 1.5001099357486625, + "eval_loss": 0.15240158140659332, + "eval_runtime": 47.7435, + "eval_samples_per_second": 762.093, + "eval_steps_per_second": 95.28, + "num_input_tokens_seen": 41393504, + "step": 61404 + }, + { + "epoch": 1.500134365915032, + "grad_norm": 25.473384857177734, + "learning_rate": 1.765999370808807e-06, + "loss": 0.1068, + "num_input_tokens_seen": 41394336, + "step": 61405 + }, + { + "epoch": 1.500256516746879, + "grad_norm": 0.4577730596065521, + "learning_rate": 1.7659445480589034e-06, + "loss": 0.0767, + "num_input_tokens_seen": 41397856, + "step": 61410 + }, + { + "epoch": 1.500378667578726, + "grad_norm": 1.0948963165283203, + "learning_rate": 1.765889719738845e-06, + "loss": 0.0235, + "num_input_tokens_seen": 41401120, + "step": 61415 + }, + { + "epoch": 1.5005008184105733, + "grad_norm": 1.4936326742172241, + "learning_rate": 1.7658348858490304e-06, + "loss": 0.0499, + "num_input_tokens_seen": 41404512, + "step": 61420 + }, + { + "epoch": 1.5006229692424204, + "grad_norm": 128.0543670654297, + "learning_rate": 1.7657800463898587e-06, + "loss": 0.0994, + "num_input_tokens_seen": 41407968, + "step": 61425 + }, + { + "epoch": 1.5007451200742676, + "grad_norm": 0.02252628654241562, + "learning_rate": 1.7657252013617283e-06, + "loss": 0.048, + "num_input_tokens_seen": 41411424, + "step": 61430 + }, + { + "epoch": 1.5008672709061148, + "grad_norm": 1.9966330528259277, + "learning_rate": 1.7656703507650386e-06, + "loss": 0.1001, + "num_input_tokens_seen": 41414624, + "step": 61435 + }, + { + "epoch": 1.500989421737962, + "grad_norm": 12.924593925476074, + "learning_rate": 1.765615494600188e-06, + "loss": 0.0904, + "num_input_tokens_seen": 41418272, + "step": 61440 + }, + { + "epoch": 1.5011115725698092, + "grad_norm": 0.442061185836792, + "learning_rate": 1.7655606328675754e-06, + "loss": 0.1318, + "num_input_tokens_seen": 41421600, + "step": 61445 + }, + { + "epoch": 1.5012337234016564, + "grad_norm": 0.0899852067232132, + "learning_rate": 1.7655057655676003e-06, + "loss": 0.0189, + "num_input_tokens_seen": 41425184, + "step": 61450 + }, + { + "epoch": 1.5013558742335036, + "grad_norm": 0.10864616930484772, + "learning_rate": 1.7654508927006612e-06, + "loss": 0.0023, + "num_input_tokens_seen": 41428448, + "step": 61455 + }, + { + "epoch": 1.5014780250653508, + "grad_norm": 17.705018997192383, + "learning_rate": 1.7653960142671574e-06, + "loss": 0.137, + "num_input_tokens_seen": 41431840, + "step": 61460 + }, + { + "epoch": 1.501600175897198, + "grad_norm": 0.43008753657341003, + "learning_rate": 1.7653411302674877e-06, + "loss": 0.1144, + "num_input_tokens_seen": 41434848, + "step": 61465 + }, + { + "epoch": 1.5017223267290452, + "grad_norm": 12.536665916442871, + "learning_rate": 1.7652862407020517e-06, + "loss": 0.1333, + "num_input_tokens_seen": 41437856, + "step": 61470 + }, + { + "epoch": 1.5018444775608923, + "grad_norm": 0.07049085944890976, + "learning_rate": 1.7652313455712483e-06, + "loss": 0.0135, + "num_input_tokens_seen": 41443104, + "step": 61475 + }, + { + "epoch": 1.5019666283927393, + "grad_norm": 0.454694002866745, + "learning_rate": 1.7651764448754767e-06, + "loss": 0.085, + "num_input_tokens_seen": 41446240, + "step": 61480 + }, + { + "epoch": 1.5020887792245865, + "grad_norm": 0.4432663917541504, + "learning_rate": 1.7651215386151361e-06, + "loss": 0.1033, + "num_input_tokens_seen": 41449888, + "step": 61485 + }, + { + "epoch": 1.5022109300564337, + "grad_norm": 0.5971336960792542, + "learning_rate": 1.765066626790626e-06, + "loss": 0.0637, + "num_input_tokens_seen": 41453728, + "step": 61490 + }, + { + "epoch": 1.5023330808882809, + "grad_norm": 0.9203071594238281, + "learning_rate": 1.7650117094023456e-06, + "loss": 0.1901, + "num_input_tokens_seen": 41457184, + "step": 61495 + }, + { + "epoch": 1.5024552317201278, + "grad_norm": 20.82863426208496, + "learning_rate": 1.764956786450694e-06, + "loss": 0.0401, + "num_input_tokens_seen": 41461024, + "step": 61500 + }, + { + "epoch": 1.502577382551975, + "grad_norm": 0.18386538326740265, + "learning_rate": 1.7649018579360712e-06, + "loss": 0.147, + "num_input_tokens_seen": 41464544, + "step": 61505 + }, + { + "epoch": 1.5026995333838222, + "grad_norm": 0.1778634488582611, + "learning_rate": 1.7648469238588763e-06, + "loss": 0.0493, + "num_input_tokens_seen": 41467616, + "step": 61510 + }, + { + "epoch": 1.5028216842156694, + "grad_norm": 0.11675328761339188, + "learning_rate": 1.764791984219509e-06, + "loss": 0.0026, + "num_input_tokens_seen": 41471456, + "step": 61515 + }, + { + "epoch": 1.5029438350475166, + "grad_norm": 0.2592644691467285, + "learning_rate": 1.7647370390183686e-06, + "loss": 0.1136, + "num_input_tokens_seen": 41475168, + "step": 61520 + }, + { + "epoch": 1.5030659858793638, + "grad_norm": 0.22310876846313477, + "learning_rate": 1.7646820882558546e-06, + "loss": 0.0544, + "num_input_tokens_seen": 41478880, + "step": 61525 + }, + { + "epoch": 1.503188136711211, + "grad_norm": 17.4942569732666, + "learning_rate": 1.7646271319323667e-06, + "loss": 0.0734, + "num_input_tokens_seen": 41482016, + "step": 61530 + }, + { + "epoch": 1.5033102875430582, + "grad_norm": 22.144229888916016, + "learning_rate": 1.7645721700483049e-06, + "loss": 0.1048, + "num_input_tokens_seen": 41485536, + "step": 61535 + }, + { + "epoch": 1.5034324383749054, + "grad_norm": 0.22404468059539795, + "learning_rate": 1.7645172026040687e-06, + "loss": 0.0583, + "num_input_tokens_seen": 41488800, + "step": 61540 + }, + { + "epoch": 1.5035545892067526, + "grad_norm": 54.67439270019531, + "learning_rate": 1.7644622296000575e-06, + "loss": 0.1607, + "num_input_tokens_seen": 41491936, + "step": 61545 + }, + { + "epoch": 1.5036767400385997, + "grad_norm": 42.04545974731445, + "learning_rate": 1.7644072510366714e-06, + "loss": 0.1701, + "num_input_tokens_seen": 41495520, + "step": 61550 + }, + { + "epoch": 1.503798890870447, + "grad_norm": 12.226096153259277, + "learning_rate": 1.7643522669143103e-06, + "loss": 0.0329, + "num_input_tokens_seen": 41498848, + "step": 61555 + }, + { + "epoch": 1.5039210417022941, + "grad_norm": 0.7583820819854736, + "learning_rate": 1.764297277233374e-06, + "loss": 0.0945, + "num_input_tokens_seen": 41502304, + "step": 61560 + }, + { + "epoch": 1.5040431925341413, + "grad_norm": 1.9888880252838135, + "learning_rate": 1.764242281994262e-06, + "loss": 0.0717, + "num_input_tokens_seen": 41505504, + "step": 61565 + }, + { + "epoch": 1.5041653433659883, + "grad_norm": 0.3815326392650604, + "learning_rate": 1.7641872811973749e-06, + "loss": 0.0826, + "num_input_tokens_seen": 41508768, + "step": 61570 + }, + { + "epoch": 1.5042874941978355, + "grad_norm": 40.32561111450195, + "learning_rate": 1.7641322748431122e-06, + "loss": 0.1313, + "num_input_tokens_seen": 41511904, + "step": 61575 + }, + { + "epoch": 1.5044096450296827, + "grad_norm": 0.571736752986908, + "learning_rate": 1.764077262931874e-06, + "loss": 0.0715, + "num_input_tokens_seen": 41515424, + "step": 61580 + }, + { + "epoch": 1.5045317958615299, + "grad_norm": 27.67289161682129, + "learning_rate": 1.7640222454640602e-06, + "loss": 0.0735, + "num_input_tokens_seen": 41519264, + "step": 61585 + }, + { + "epoch": 1.5046539466933768, + "grad_norm": 0.2176530808210373, + "learning_rate": 1.7639672224400716e-06, + "loss": 0.0869, + "num_input_tokens_seen": 41522400, + "step": 61590 + }, + { + "epoch": 1.504776097525224, + "grad_norm": 43.00497055053711, + "learning_rate": 1.763912193860308e-06, + "loss": 0.0572, + "num_input_tokens_seen": 41525600, + "step": 61595 + }, + { + "epoch": 1.5048982483570712, + "grad_norm": 0.44729945063591003, + "learning_rate": 1.763857159725169e-06, + "loss": 0.0358, + "num_input_tokens_seen": 41528992, + "step": 61600 + }, + { + "epoch": 1.5050203991889184, + "grad_norm": 0.49896079301834106, + "learning_rate": 1.7638021200350555e-06, + "loss": 0.0787, + "num_input_tokens_seen": 41532320, + "step": 61605 + }, + { + "epoch": 1.5051425500207656, + "grad_norm": 128.6964874267578, + "learning_rate": 1.7637470747903675e-06, + "loss": 0.0351, + "num_input_tokens_seen": 41535776, + "step": 61610 + }, + { + "epoch": 1.5052647008526128, + "grad_norm": 0.9645973443984985, + "learning_rate": 1.7636920239915053e-06, + "loss": 0.0176, + "num_input_tokens_seen": 41539040, + "step": 61615 + }, + { + "epoch": 1.50538685168446, + "grad_norm": 0.09933934360742569, + "learning_rate": 1.7636369676388694e-06, + "loss": 0.0372, + "num_input_tokens_seen": 41541984, + "step": 61620 + }, + { + "epoch": 1.5055090025163071, + "grad_norm": 38.70368957519531, + "learning_rate": 1.76358190573286e-06, + "loss": 0.2492, + "num_input_tokens_seen": 41545312, + "step": 61625 + }, + { + "epoch": 1.5056311533481543, + "grad_norm": 0.6128526329994202, + "learning_rate": 1.7635268382738774e-06, + "loss": 0.0623, + "num_input_tokens_seen": 41548448, + "step": 61630 + }, + { + "epoch": 1.5057533041800015, + "grad_norm": 0.14343132078647614, + "learning_rate": 1.7634717652623228e-06, + "loss": 0.0943, + "num_input_tokens_seen": 41551904, + "step": 61635 + }, + { + "epoch": 1.5058754550118487, + "grad_norm": 32.26298522949219, + "learning_rate": 1.7634166866985958e-06, + "loss": 0.1596, + "num_input_tokens_seen": 41555232, + "step": 61640 + }, + { + "epoch": 1.505997605843696, + "grad_norm": 0.15098640322685242, + "learning_rate": 1.7633616025830972e-06, + "loss": 0.0694, + "num_input_tokens_seen": 41558624, + "step": 61645 + }, + { + "epoch": 1.506119756675543, + "grad_norm": 0.2517228424549103, + "learning_rate": 1.7633065129162282e-06, + "loss": 0.1143, + "num_input_tokens_seen": 41562208, + "step": 61650 + }, + { + "epoch": 1.5062419075073903, + "grad_norm": 0.30164381861686707, + "learning_rate": 1.7632514176983886e-06, + "loss": 0.0379, + "num_input_tokens_seen": 41565664, + "step": 61655 + }, + { + "epoch": 1.5063640583392373, + "grad_norm": 1.0749255418777466, + "learning_rate": 1.7631963169299794e-06, + "loss": 0.0141, + "num_input_tokens_seen": 41568672, + "step": 61660 + }, + { + "epoch": 1.5064862091710844, + "grad_norm": 9.241209030151367, + "learning_rate": 1.7631412106114014e-06, + "loss": 0.209, + "num_input_tokens_seen": 41572000, + "step": 61665 + }, + { + "epoch": 1.5066083600029316, + "grad_norm": 0.10479751229286194, + "learning_rate": 1.763086098743055e-06, + "loss": 0.0995, + "num_input_tokens_seen": 41575200, + "step": 61670 + }, + { + "epoch": 1.5067305108347788, + "grad_norm": 8.743705749511719, + "learning_rate": 1.7630309813253417e-06, + "loss": 0.3212, + "num_input_tokens_seen": 41578208, + "step": 61675 + }, + { + "epoch": 1.5068526616666258, + "grad_norm": 0.21430103480815887, + "learning_rate": 1.7629758583586613e-06, + "loss": 0.07, + "num_input_tokens_seen": 41581600, + "step": 61680 + }, + { + "epoch": 1.506974812498473, + "grad_norm": 0.16763003170490265, + "learning_rate": 1.7629207298434157e-06, + "loss": 0.0053, + "num_input_tokens_seen": 41584928, + "step": 61685 + }, + { + "epoch": 1.5070969633303202, + "grad_norm": 0.0599144846200943, + "learning_rate": 1.7628655957800054e-06, + "loss": 0.0005, + "num_input_tokens_seen": 41588448, + "step": 61690 + }, + { + "epoch": 1.5072191141621674, + "grad_norm": 0.06027615815401077, + "learning_rate": 1.7628104561688311e-06, + "loss": 0.0389, + "num_input_tokens_seen": 41592096, + "step": 61695 + }, + { + "epoch": 1.5073412649940146, + "grad_norm": 0.7883102893829346, + "learning_rate": 1.7627553110102936e-06, + "loss": 0.0391, + "num_input_tokens_seen": 41595168, + "step": 61700 + }, + { + "epoch": 1.5074634158258617, + "grad_norm": 0.34857890009880066, + "learning_rate": 1.762700160304795e-06, + "loss": 0.0008, + "num_input_tokens_seen": 41598560, + "step": 61705 + }, + { + "epoch": 1.507585566657709, + "grad_norm": 40.58872604370117, + "learning_rate": 1.7626450040527355e-06, + "loss": 0.0621, + "num_input_tokens_seen": 41601632, + "step": 61710 + }, + { + "epoch": 1.5077077174895561, + "grad_norm": 29.350317001342773, + "learning_rate": 1.7625898422545163e-06, + "loss": 0.1054, + "num_input_tokens_seen": 41604704, + "step": 61715 + }, + { + "epoch": 1.5078298683214033, + "grad_norm": 22.25473976135254, + "learning_rate": 1.7625346749105385e-06, + "loss": 0.1185, + "num_input_tokens_seen": 41608352, + "step": 61720 + }, + { + "epoch": 1.5079520191532505, + "grad_norm": 13.916818618774414, + "learning_rate": 1.7624795020212036e-06, + "loss": 0.1244, + "num_input_tokens_seen": 41611808, + "step": 61725 + }, + { + "epoch": 1.5080741699850977, + "grad_norm": 0.13384878635406494, + "learning_rate": 1.762424323586913e-06, + "loss": 0.0329, + "num_input_tokens_seen": 41615072, + "step": 61730 + }, + { + "epoch": 1.5081963208169449, + "grad_norm": 30.317041397094727, + "learning_rate": 1.7623691396080674e-06, + "loss": 0.0861, + "num_input_tokens_seen": 41618272, + "step": 61735 + }, + { + "epoch": 1.508318471648792, + "grad_norm": 1.0107468366622925, + "learning_rate": 1.7623139500850682e-06, + "loss": 0.0107, + "num_input_tokens_seen": 41621216, + "step": 61740 + }, + { + "epoch": 1.5084406224806393, + "grad_norm": 9.42230224609375, + "learning_rate": 1.762258755018317e-06, + "loss": 0.0473, + "num_input_tokens_seen": 41624544, + "step": 61745 + }, + { + "epoch": 1.5085627733124862, + "grad_norm": 0.12761496007442474, + "learning_rate": 1.7622035544082153e-06, + "loss": 0.0818, + "num_input_tokens_seen": 41627872, + "step": 61750 + }, + { + "epoch": 1.5086849241443334, + "grad_norm": 14.09807014465332, + "learning_rate": 1.762148348255164e-06, + "loss": 0.0524, + "num_input_tokens_seen": 41631200, + "step": 61755 + }, + { + "epoch": 1.5088070749761806, + "grad_norm": 120.56591796875, + "learning_rate": 1.7620931365595651e-06, + "loss": 0.1926, + "num_input_tokens_seen": 41634016, + "step": 61760 + }, + { + "epoch": 1.5089292258080278, + "grad_norm": 0.5521315336227417, + "learning_rate": 1.7620379193218198e-06, + "loss": 0.0595, + "num_input_tokens_seen": 41637408, + "step": 61765 + }, + { + "epoch": 1.5090513766398748, + "grad_norm": 89.52035522460938, + "learning_rate": 1.7619826965423301e-06, + "loss": 0.1163, + "num_input_tokens_seen": 41640928, + "step": 61770 + }, + { + "epoch": 1.509173527471722, + "grad_norm": 35.262062072753906, + "learning_rate": 1.7619274682214971e-06, + "loss": 0.1921, + "num_input_tokens_seen": 41644320, + "step": 61775 + }, + { + "epoch": 1.5092956783035691, + "grad_norm": 0.3857623040676117, + "learning_rate": 1.7618722343597225e-06, + "loss": 0.0986, + "num_input_tokens_seen": 41647328, + "step": 61780 + }, + { + "epoch": 1.5094178291354163, + "grad_norm": 0.6144522428512573, + "learning_rate": 1.7618169949574082e-06, + "loss": 0.1232, + "num_input_tokens_seen": 41650400, + "step": 61785 + }, + { + "epoch": 1.5095399799672635, + "grad_norm": 0.09201917052268982, + "learning_rate": 1.7617617500149558e-06, + "loss": 0.0418, + "num_input_tokens_seen": 41653536, + "step": 61790 + }, + { + "epoch": 1.5096621307991107, + "grad_norm": 0.29864656925201416, + "learning_rate": 1.7617064995327674e-06, + "loss": 0.0038, + "num_input_tokens_seen": 41656736, + "step": 61795 + }, + { + "epoch": 1.509784281630958, + "grad_norm": 0.046912241727113724, + "learning_rate": 1.761651243511244e-06, + "loss": 0.0024, + "num_input_tokens_seen": 41659616, + "step": 61800 + }, + { + "epoch": 1.509906432462805, + "grad_norm": 22.605022430419922, + "learning_rate": 1.761595981950788e-06, + "loss": 0.0471, + "num_input_tokens_seen": 41662816, + "step": 61805 + }, + { + "epoch": 1.5100285832946523, + "grad_norm": 17.55577278137207, + "learning_rate": 1.7615407148518014e-06, + "loss": 0.0431, + "num_input_tokens_seen": 41666016, + "step": 61810 + }, + { + "epoch": 1.5101507341264995, + "grad_norm": 12.887489318847656, + "learning_rate": 1.7614854422146855e-06, + "loss": 0.0486, + "num_input_tokens_seen": 41669280, + "step": 61815 + }, + { + "epoch": 1.5102728849583467, + "grad_norm": 0.6606060266494751, + "learning_rate": 1.7614301640398429e-06, + "loss": 0.1103, + "num_input_tokens_seen": 41672672, + "step": 61820 + }, + { + "epoch": 1.5103950357901939, + "grad_norm": 0.03383230045437813, + "learning_rate": 1.7613748803276752e-06, + "loss": 0.0483, + "num_input_tokens_seen": 41676320, + "step": 61825 + }, + { + "epoch": 1.510517186622041, + "grad_norm": 0.3259274959564209, + "learning_rate": 1.761319591078585e-06, + "loss": 0.0515, + "num_input_tokens_seen": 41679648, + "step": 61830 + }, + { + "epoch": 1.510639337453888, + "grad_norm": 0.23923155665397644, + "learning_rate": 1.7612642962929733e-06, + "loss": 0.1682, + "num_input_tokens_seen": 41683040, + "step": 61835 + }, + { + "epoch": 1.5107614882857352, + "grad_norm": 16.628183364868164, + "learning_rate": 1.7612089959712434e-06, + "loss": 0.1369, + "num_input_tokens_seen": 41686368, + "step": 61840 + }, + { + "epoch": 1.5108836391175824, + "grad_norm": 0.04525705799460411, + "learning_rate": 1.7611536901137969e-06, + "loss": 0.1149, + "num_input_tokens_seen": 41689568, + "step": 61845 + }, + { + "epoch": 1.5110057899494296, + "grad_norm": 0.886325478553772, + "learning_rate": 1.7610983787210357e-06, + "loss": 0.0051, + "num_input_tokens_seen": 41692832, + "step": 61850 + }, + { + "epoch": 1.5111279407812768, + "grad_norm": 0.14912337064743042, + "learning_rate": 1.7610430617933628e-06, + "loss": 0.087, + "num_input_tokens_seen": 41696480, + "step": 61855 + }, + { + "epoch": 1.5112500916131237, + "grad_norm": 0.42184045910835266, + "learning_rate": 1.7609877393311798e-06, + "loss": 0.002, + "num_input_tokens_seen": 41699616, + "step": 61860 + }, + { + "epoch": 1.511372242444971, + "grad_norm": 0.41546207666397095, + "learning_rate": 1.7609324113348892e-06, + "loss": 0.1238, + "num_input_tokens_seen": 41703072, + "step": 61865 + }, + { + "epoch": 1.5114943932768181, + "grad_norm": 0.3638473451137543, + "learning_rate": 1.7608770778048936e-06, + "loss": 0.1283, + "num_input_tokens_seen": 41706464, + "step": 61870 + }, + { + "epoch": 1.5116165441086653, + "grad_norm": 37.72365188598633, + "learning_rate": 1.7608217387415954e-06, + "loss": 0.1226, + "num_input_tokens_seen": 41709664, + "step": 61875 + }, + { + "epoch": 1.5117386949405125, + "grad_norm": 84.65232849121094, + "learning_rate": 1.7607663941453966e-06, + "loss": 0.2157, + "num_input_tokens_seen": 41713504, + "step": 61880 + }, + { + "epoch": 1.5118608457723597, + "grad_norm": 8.479555130004883, + "learning_rate": 1.7607110440167e-06, + "loss": 0.1551, + "num_input_tokens_seen": 41717216, + "step": 61885 + }, + { + "epoch": 1.5119829966042069, + "grad_norm": 0.47316303849220276, + "learning_rate": 1.7606556883559081e-06, + "loss": 0.0153, + "num_input_tokens_seen": 41720544, + "step": 61890 + }, + { + "epoch": 1.512105147436054, + "grad_norm": 14.604045867919922, + "learning_rate": 1.7606003271634235e-06, + "loss": 0.156, + "num_input_tokens_seen": 41724128, + "step": 61895 + }, + { + "epoch": 1.5122272982679013, + "grad_norm": 136.4307861328125, + "learning_rate": 1.760544960439649e-06, + "loss": 0.1273, + "num_input_tokens_seen": 41727840, + "step": 61900 + }, + { + "epoch": 1.5123494490997484, + "grad_norm": 1.5975284576416016, + "learning_rate": 1.7604895881849865e-06, + "loss": 0.0023, + "num_input_tokens_seen": 41731424, + "step": 61905 + }, + { + "epoch": 1.5124715999315956, + "grad_norm": 25.729183197021484, + "learning_rate": 1.7604342103998393e-06, + "loss": 0.0992, + "num_input_tokens_seen": 41734752, + "step": 61910 + }, + { + "epoch": 1.5125937507634428, + "grad_norm": 0.16049420833587646, + "learning_rate": 1.76037882708461e-06, + "loss": 0.0758, + "num_input_tokens_seen": 41738080, + "step": 61915 + }, + { + "epoch": 1.51271590159529, + "grad_norm": 0.9564996361732483, + "learning_rate": 1.7603234382397014e-06, + "loss": 0.0601, + "num_input_tokens_seen": 41741216, + "step": 61920 + }, + { + "epoch": 1.512838052427137, + "grad_norm": 23.576576232910156, + "learning_rate": 1.7602680438655164e-06, + "loss": 0.1144, + "num_input_tokens_seen": 41744992, + "step": 61925 + }, + { + "epoch": 1.5129602032589842, + "grad_norm": 18.271236419677734, + "learning_rate": 1.7602126439624576e-06, + "loss": 0.048, + "num_input_tokens_seen": 41748448, + "step": 61930 + }, + { + "epoch": 1.5130823540908314, + "grad_norm": 20.256929397583008, + "learning_rate": 1.7601572385309279e-06, + "loss": 0.072, + "num_input_tokens_seen": 41751648, + "step": 61935 + }, + { + "epoch": 1.5132045049226786, + "grad_norm": 0.0887921005487442, + "learning_rate": 1.7601018275713301e-06, + "loss": 0.0386, + "num_input_tokens_seen": 41755040, + "step": 61940 + }, + { + "epoch": 1.5133266557545257, + "grad_norm": 21.042631149291992, + "learning_rate": 1.760046411084068e-06, + "loss": 0.0827, + "num_input_tokens_seen": 41758240, + "step": 61945 + }, + { + "epoch": 1.5134488065863727, + "grad_norm": 34.03895950317383, + "learning_rate": 1.7599909890695434e-06, + "loss": 0.1892, + "num_input_tokens_seen": 41761504, + "step": 61950 + }, + { + "epoch": 1.51357095741822, + "grad_norm": 0.2431178241968155, + "learning_rate": 1.7599355615281602e-06, + "loss": 0.0432, + "num_input_tokens_seen": 41764768, + "step": 61955 + }, + { + "epoch": 1.513693108250067, + "grad_norm": 0.03246299922466278, + "learning_rate": 1.7598801284603211e-06, + "loss": 0.0385, + "num_input_tokens_seen": 41768800, + "step": 61960 + }, + { + "epoch": 1.5138152590819143, + "grad_norm": 0.05149323120713234, + "learning_rate": 1.7598246898664293e-06, + "loss": 0.0589, + "num_input_tokens_seen": 41771808, + "step": 61965 + }, + { + "epoch": 1.5139374099137615, + "grad_norm": 0.07109751552343369, + "learning_rate": 1.759769245746888e-06, + "loss": 0.0018, + "num_input_tokens_seen": 41775072, + "step": 61970 + }, + { + "epoch": 1.5140595607456087, + "grad_norm": 123.77120971679688, + "learning_rate": 1.7597137961021004e-06, + "loss": 0.2485, + "num_input_tokens_seen": 41778784, + "step": 61975 + }, + { + "epoch": 1.5141817115774558, + "grad_norm": 0.3029272258281708, + "learning_rate": 1.7596583409324697e-06, + "loss": 0.0541, + "num_input_tokens_seen": 41782816, + "step": 61980 + }, + { + "epoch": 1.514303862409303, + "grad_norm": 18.087604522705078, + "learning_rate": 1.7596028802383995e-06, + "loss": 0.1162, + "num_input_tokens_seen": 41786848, + "step": 61985 + }, + { + "epoch": 1.5144260132411502, + "grad_norm": 19.319604873657227, + "learning_rate": 1.7595474140202927e-06, + "loss": 0.0587, + "num_input_tokens_seen": 41790304, + "step": 61990 + }, + { + "epoch": 1.5145481640729974, + "grad_norm": 0.5831694006919861, + "learning_rate": 1.7594919422785525e-06, + "loss": 0.1085, + "num_input_tokens_seen": 41793440, + "step": 61995 + }, + { + "epoch": 1.5146703149048446, + "grad_norm": 0.0200160201638937, + "learning_rate": 1.7594364650135827e-06, + "loss": 0.0604, + "num_input_tokens_seen": 41797088, + "step": 62000 + }, + { + "epoch": 1.5147924657366918, + "grad_norm": 0.11137901246547699, + "learning_rate": 1.759380982225787e-06, + "loss": 0.0746, + "num_input_tokens_seen": 41800224, + "step": 62005 + }, + { + "epoch": 1.514914616568539, + "grad_norm": 0.20222550630569458, + "learning_rate": 1.7593254939155684e-06, + "loss": 0.0012, + "num_input_tokens_seen": 41803488, + "step": 62010 + }, + { + "epoch": 1.515036767400386, + "grad_norm": 0.36145535111427307, + "learning_rate": 1.7592700000833305e-06, + "loss": 0.1087, + "num_input_tokens_seen": 41806944, + "step": 62015 + }, + { + "epoch": 1.5151589182322331, + "grad_norm": 0.12234114110469818, + "learning_rate": 1.759214500729477e-06, + "loss": 0.0663, + "num_input_tokens_seen": 41810016, + "step": 62020 + }, + { + "epoch": 1.5152810690640803, + "grad_norm": 0.007487828843295574, + "learning_rate": 1.7591589958544113e-06, + "loss": 0.1765, + "num_input_tokens_seen": 41813664, + "step": 62025 + }, + { + "epoch": 1.5154032198959275, + "grad_norm": 0.17571020126342773, + "learning_rate": 1.7591034854585373e-06, + "loss": 0.0018, + "num_input_tokens_seen": 41817120, + "step": 62030 + }, + { + "epoch": 1.5155253707277745, + "grad_norm": 9.199395179748535, + "learning_rate": 1.7590479695422587e-06, + "loss": 0.086, + "num_input_tokens_seen": 41820512, + "step": 62035 + }, + { + "epoch": 1.5156475215596217, + "grad_norm": 11.8097505569458, + "learning_rate": 1.758992448105979e-06, + "loss": 0.1341, + "num_input_tokens_seen": 41824160, + "step": 62040 + }, + { + "epoch": 1.5157696723914689, + "grad_norm": 38.68931198120117, + "learning_rate": 1.7589369211501019e-06, + "loss": 0.0295, + "num_input_tokens_seen": 41827168, + "step": 62045 + }, + { + "epoch": 1.515891823223316, + "grad_norm": 0.1641501635313034, + "learning_rate": 1.7588813886750315e-06, + "loss": 0.0363, + "num_input_tokens_seen": 41830560, + "step": 62050 + }, + { + "epoch": 1.5160139740551632, + "grad_norm": 1.2902582883834839, + "learning_rate": 1.7588258506811716e-06, + "loss": 0.0621, + "num_input_tokens_seen": 41833568, + "step": 62055 + }, + { + "epoch": 1.5161361248870104, + "grad_norm": 50.38115310668945, + "learning_rate": 1.7587703071689259e-06, + "loss": 0.1542, + "num_input_tokens_seen": 41837024, + "step": 62060 + }, + { + "epoch": 1.5162582757188576, + "grad_norm": 0.12362977862358093, + "learning_rate": 1.7587147581386988e-06, + "loss": 0.0697, + "num_input_tokens_seen": 41840864, + "step": 62065 + }, + { + "epoch": 1.5163804265507048, + "grad_norm": 0.03375665470957756, + "learning_rate": 1.7586592035908935e-06, + "loss": 0.0945, + "num_input_tokens_seen": 41844896, + "step": 62070 + }, + { + "epoch": 1.516502577382552, + "grad_norm": 0.1380762904882431, + "learning_rate": 1.7586036435259147e-06, + "loss": 0.14, + "num_input_tokens_seen": 41848544, + "step": 62075 + }, + { + "epoch": 1.5166247282143992, + "grad_norm": 10.012866973876953, + "learning_rate": 1.758548077944166e-06, + "loss": 0.0913, + "num_input_tokens_seen": 41851680, + "step": 62080 + }, + { + "epoch": 1.5167468790462464, + "grad_norm": 0.35353362560272217, + "learning_rate": 1.7584925068460516e-06, + "loss": 0.0311, + "num_input_tokens_seen": 41854880, + "step": 62085 + }, + { + "epoch": 1.5168690298780936, + "grad_norm": 0.4082539677619934, + "learning_rate": 1.7584369302319757e-06, + "loss": 0.0375, + "num_input_tokens_seen": 41857760, + "step": 62090 + }, + { + "epoch": 1.5169911807099408, + "grad_norm": 0.18866710364818573, + "learning_rate": 1.7583813481023424e-06, + "loss": 0.0032, + "num_input_tokens_seen": 41860960, + "step": 62095 + }, + { + "epoch": 1.517113331541788, + "grad_norm": 0.3199007511138916, + "learning_rate": 1.758325760457556e-06, + "loss": 0.0452, + "num_input_tokens_seen": 41864736, + "step": 62100 + }, + { + "epoch": 1.517235482373635, + "grad_norm": 0.3004762828350067, + "learning_rate": 1.7582701672980208e-06, + "loss": 0.0375, + "num_input_tokens_seen": 41868128, + "step": 62105 + }, + { + "epoch": 1.5173576332054821, + "grad_norm": 0.13247863948345184, + "learning_rate": 1.7582145686241412e-06, + "loss": 0.1857, + "num_input_tokens_seen": 41871328, + "step": 62110 + }, + { + "epoch": 1.5174797840373293, + "grad_norm": 0.05241371691226959, + "learning_rate": 1.7581589644363208e-06, + "loss": 0.0529, + "num_input_tokens_seen": 41874400, + "step": 62115 + }, + { + "epoch": 1.5176019348691765, + "grad_norm": 0.058131348341703415, + "learning_rate": 1.7581033547349648e-06, + "loss": 0.0553, + "num_input_tokens_seen": 41877728, + "step": 62120 + }, + { + "epoch": 1.5177240857010235, + "grad_norm": 0.27324989438056946, + "learning_rate": 1.7580477395204774e-06, + "loss": 0.0991, + "num_input_tokens_seen": 41880736, + "step": 62125 + }, + { + "epoch": 1.5178462365328707, + "grad_norm": 0.12253384292125702, + "learning_rate": 1.7579921187932628e-06, + "loss": 0.1013, + "num_input_tokens_seen": 41884896, + "step": 62130 + }, + { + "epoch": 1.5179683873647178, + "grad_norm": 13.14607048034668, + "learning_rate": 1.7579364925537257e-06, + "loss": 0.2171, + "num_input_tokens_seen": 41888160, + "step": 62135 + }, + { + "epoch": 1.518090538196565, + "grad_norm": 34.0280876159668, + "learning_rate": 1.7578808608022704e-06, + "loss": 0.2605, + "num_input_tokens_seen": 41891168, + "step": 62140 + }, + { + "epoch": 1.5182126890284122, + "grad_norm": 0.339428573846817, + "learning_rate": 1.7578252235393017e-06, + "loss": 0.125, + "num_input_tokens_seen": 41894368, + "step": 62145 + }, + { + "epoch": 1.5183348398602594, + "grad_norm": 1.1708426475524902, + "learning_rate": 1.7577695807652243e-06, + "loss": 0.0043, + "num_input_tokens_seen": 41897312, + "step": 62150 + }, + { + "epoch": 1.5184569906921066, + "grad_norm": 184.12338256835938, + "learning_rate": 1.7577139324804424e-06, + "loss": 0.0465, + "num_input_tokens_seen": 41900704, + "step": 62155 + }, + { + "epoch": 1.5185791415239538, + "grad_norm": 9.056930541992188, + "learning_rate": 1.757658278685361e-06, + "loss": 0.1147, + "num_input_tokens_seen": 41903904, + "step": 62160 + }, + { + "epoch": 1.518701292355801, + "grad_norm": 0.4163637161254883, + "learning_rate": 1.7576026193803853e-06, + "loss": 0.109, + "num_input_tokens_seen": 41907104, + "step": 62165 + }, + { + "epoch": 1.5188234431876482, + "grad_norm": 86.08924102783203, + "learning_rate": 1.7575469545659192e-06, + "loss": 0.2651, + "num_input_tokens_seen": 41909984, + "step": 62170 + }, + { + "epoch": 1.5189455940194954, + "grad_norm": 65.60690307617188, + "learning_rate": 1.757491284242368e-06, + "loss": 0.0926, + "num_input_tokens_seen": 41912992, + "step": 62175 + }, + { + "epoch": 1.5190677448513425, + "grad_norm": 0.5181287527084351, + "learning_rate": 1.7574356084101362e-06, + "loss": 0.0986, + "num_input_tokens_seen": 41916064, + "step": 62180 + }, + { + "epoch": 1.5191898956831897, + "grad_norm": 0.423454225063324, + "learning_rate": 1.7573799270696293e-06, + "loss": 0.0296, + "num_input_tokens_seen": 41919392, + "step": 62185 + }, + { + "epoch": 1.519312046515037, + "grad_norm": 1.2272237539291382, + "learning_rate": 1.7573242402212515e-06, + "loss": 0.1071, + "num_input_tokens_seen": 41922720, + "step": 62190 + }, + { + "epoch": 1.519434197346884, + "grad_norm": 3.1436374187469482, + "learning_rate": 1.7572685478654083e-06, + "loss": 0.1806, + "num_input_tokens_seen": 41926624, + "step": 62195 + }, + { + "epoch": 1.519556348178731, + "grad_norm": 3.7778868675231934, + "learning_rate": 1.7572128500025048e-06, + "loss": 0.0039, + "num_input_tokens_seen": 41929760, + "step": 62200 + }, + { + "epoch": 1.5196784990105783, + "grad_norm": 9.93069076538086, + "learning_rate": 1.7571571466329454e-06, + "loss": 0.1229, + "num_input_tokens_seen": 41932960, + "step": 62205 + }, + { + "epoch": 1.5198006498424255, + "grad_norm": 147.24993896484375, + "learning_rate": 1.7571014377571358e-06, + "loss": 0.1609, + "num_input_tokens_seen": 41936416, + "step": 62210 + }, + { + "epoch": 1.5199228006742724, + "grad_norm": 0.2786606550216675, + "learning_rate": 1.757045723375481e-06, + "loss": 0.0593, + "num_input_tokens_seen": 41939488, + "step": 62215 + }, + { + "epoch": 1.5200449515061196, + "grad_norm": 0.8053382635116577, + "learning_rate": 1.7569900034883856e-06, + "loss": 0.1467, + "num_input_tokens_seen": 41942816, + "step": 62220 + }, + { + "epoch": 1.5201671023379668, + "grad_norm": 0.6396535038948059, + "learning_rate": 1.7569342780962555e-06, + "loss": 0.0073, + "num_input_tokens_seen": 41946528, + "step": 62225 + }, + { + "epoch": 1.520289253169814, + "grad_norm": 0.1954188495874405, + "learning_rate": 1.756878547199496e-06, + "loss": 0.0223, + "num_input_tokens_seen": 41950688, + "step": 62230 + }, + { + "epoch": 1.5204114040016612, + "grad_norm": 0.14850103855133057, + "learning_rate": 1.756822810798512e-06, + "loss": 0.1032, + "num_input_tokens_seen": 41953952, + "step": 62235 + }, + { + "epoch": 1.5205335548335084, + "grad_norm": 0.09374181181192398, + "learning_rate": 1.756767068893709e-06, + "loss": 0.0415, + "num_input_tokens_seen": 41957216, + "step": 62240 + }, + { + "epoch": 1.5206557056653556, + "grad_norm": 10.11430835723877, + "learning_rate": 1.7567113214854921e-06, + "loss": 0.086, + "num_input_tokens_seen": 41959904, + "step": 62245 + }, + { + "epoch": 1.5207778564972028, + "grad_norm": 1.3693947792053223, + "learning_rate": 1.756655568574267e-06, + "loss": 0.032, + "num_input_tokens_seen": 41963488, + "step": 62250 + }, + { + "epoch": 1.52090000732905, + "grad_norm": 0.07667896896600723, + "learning_rate": 1.756599810160439e-06, + "loss": 0.0492, + "num_input_tokens_seen": 41966304, + "step": 62255 + }, + { + "epoch": 1.5210221581608971, + "grad_norm": 11.006379127502441, + "learning_rate": 1.756544046244414e-06, + "loss": 0.159, + "num_input_tokens_seen": 41969888, + "step": 62260 + }, + { + "epoch": 1.5211443089927443, + "grad_norm": 0.1665768325328827, + "learning_rate": 1.756488276826597e-06, + "loss": 0.0011, + "num_input_tokens_seen": 41972832, + "step": 62265 + }, + { + "epoch": 1.5212664598245915, + "grad_norm": 0.045914895832538605, + "learning_rate": 1.756432501907394e-06, + "loss": 0.0602, + "num_input_tokens_seen": 41975840, + "step": 62270 + }, + { + "epoch": 1.5213886106564387, + "grad_norm": 0.14401188492774963, + "learning_rate": 1.7563767214872104e-06, + "loss": 0.0315, + "num_input_tokens_seen": 41979168, + "step": 62275 + }, + { + "epoch": 1.521510761488286, + "grad_norm": 0.042610686272382736, + "learning_rate": 1.7563209355664514e-06, + "loss": 0.228, + "num_input_tokens_seen": 41982304, + "step": 62280 + }, + { + "epoch": 1.5216329123201329, + "grad_norm": 0.048549313098192215, + "learning_rate": 1.7562651441455237e-06, + "loss": 0.0013, + "num_input_tokens_seen": 41985888, + "step": 62285 + }, + { + "epoch": 1.52175506315198, + "grad_norm": 1.941735029220581, + "learning_rate": 1.7562093472248321e-06, + "loss": 0.174, + "num_input_tokens_seen": 41989152, + "step": 62290 + }, + { + "epoch": 1.5218772139838272, + "grad_norm": 0.39207449555397034, + "learning_rate": 1.7561535448047828e-06, + "loss": 0.0824, + "num_input_tokens_seen": 41992224, + "step": 62295 + }, + { + "epoch": 1.5219993648156744, + "grad_norm": 0.11600027978420258, + "learning_rate": 1.7560977368857814e-06, + "loss": 0.0686, + "num_input_tokens_seen": 41995488, + "step": 62300 + }, + { + "epoch": 1.5221215156475214, + "grad_norm": 0.003041791031137109, + "learning_rate": 1.756041923468234e-06, + "loss": 0.1924, + "num_input_tokens_seen": 41999008, + "step": 62305 + }, + { + "epoch": 1.5222436664793686, + "grad_norm": 0.44268959760665894, + "learning_rate": 1.7559861045525467e-06, + "loss": 0.0175, + "num_input_tokens_seen": 42002592, + "step": 62310 + }, + { + "epoch": 1.5223658173112158, + "grad_norm": 0.3703887164592743, + "learning_rate": 1.7559302801391247e-06, + "loss": 0.0774, + "num_input_tokens_seen": 42006176, + "step": 62315 + }, + { + "epoch": 1.522487968143063, + "grad_norm": 0.3298993706703186, + "learning_rate": 1.7558744502283745e-06, + "loss": 0.039, + "num_input_tokens_seen": 42009760, + "step": 62320 + }, + { + "epoch": 1.5226101189749102, + "grad_norm": 94.69880676269531, + "learning_rate": 1.7558186148207018e-06, + "loss": 0.079, + "num_input_tokens_seen": 42013088, + "step": 62325 + }, + { + "epoch": 1.5227322698067574, + "grad_norm": 0.14409717917442322, + "learning_rate": 1.7557627739165133e-06, + "loss": 0.0347, + "num_input_tokens_seen": 42016864, + "step": 62330 + }, + { + "epoch": 1.5228544206386045, + "grad_norm": 134.7539825439453, + "learning_rate": 1.7557069275162145e-06, + "loss": 0.119, + "num_input_tokens_seen": 42020192, + "step": 62335 + }, + { + "epoch": 1.5229765714704517, + "grad_norm": 0.2854374051094055, + "learning_rate": 1.7556510756202114e-06, + "loss": 0.038, + "num_input_tokens_seen": 42023712, + "step": 62340 + }, + { + "epoch": 1.523098722302299, + "grad_norm": 2.959113836288452, + "learning_rate": 1.7555952182289104e-06, + "loss": 0.0948, + "num_input_tokens_seen": 42026784, + "step": 62345 + }, + { + "epoch": 1.5232208731341461, + "grad_norm": 24.652820587158203, + "learning_rate": 1.755539355342718e-06, + "loss": 0.1393, + "num_input_tokens_seen": 42029792, + "step": 62350 + }, + { + "epoch": 1.5233430239659933, + "grad_norm": 1.0689424276351929, + "learning_rate": 1.75548348696204e-06, + "loss": 0.0318, + "num_input_tokens_seen": 42032800, + "step": 62355 + }, + { + "epoch": 1.5234651747978405, + "grad_norm": 14.94307804107666, + "learning_rate": 1.7554276130872832e-06, + "loss": 0.1744, + "num_input_tokens_seen": 42036000, + "step": 62360 + }, + { + "epoch": 1.5235873256296877, + "grad_norm": 0.11315063387155533, + "learning_rate": 1.7553717337188534e-06, + "loss": 0.1032, + "num_input_tokens_seen": 42040032, + "step": 62365 + }, + { + "epoch": 1.5237094764615347, + "grad_norm": 0.4759387671947479, + "learning_rate": 1.7553158488571572e-06, + "loss": 0.0543, + "num_input_tokens_seen": 42043296, + "step": 62370 + }, + { + "epoch": 1.5238316272933818, + "grad_norm": 29.490541458129883, + "learning_rate": 1.755259958502601e-06, + "loss": 0.1868, + "num_input_tokens_seen": 42046880, + "step": 62375 + }, + { + "epoch": 1.523953778125229, + "grad_norm": 0.1935776025056839, + "learning_rate": 1.755204062655591e-06, + "loss": 0.0293, + "num_input_tokens_seen": 42050080, + "step": 62380 + }, + { + "epoch": 1.5240759289570762, + "grad_norm": 0.09843038767576218, + "learning_rate": 1.7551481613165341e-06, + "loss": 0.0824, + "num_input_tokens_seen": 42053600, + "step": 62385 + }, + { + "epoch": 1.5241980797889234, + "grad_norm": 0.8339051008224487, + "learning_rate": 1.755092254485837e-06, + "loss": 0.0032, + "num_input_tokens_seen": 42056736, + "step": 62390 + }, + { + "epoch": 1.5243202306207704, + "grad_norm": 0.03356233239173889, + "learning_rate": 1.7550363421639056e-06, + "loss": 0.1097, + "num_input_tokens_seen": 42060768, + "step": 62395 + }, + { + "epoch": 1.5244423814526176, + "grad_norm": 0.30012017488479614, + "learning_rate": 1.7549804243511469e-06, + "loss": 0.1254, + "num_input_tokens_seen": 42064288, + "step": 62400 + }, + { + "epoch": 1.5245645322844648, + "grad_norm": 15.219573020935059, + "learning_rate": 1.7549245010479674e-06, + "loss": 0.1479, + "num_input_tokens_seen": 42067616, + "step": 62405 + }, + { + "epoch": 1.524686683116312, + "grad_norm": 1.6977224349975586, + "learning_rate": 1.7548685722547738e-06, + "loss": 0.0592, + "num_input_tokens_seen": 42071136, + "step": 62410 + }, + { + "epoch": 1.5248088339481591, + "grad_norm": 0.7301633358001709, + "learning_rate": 1.7548126379719732e-06, + "loss": 0.0017, + "num_input_tokens_seen": 42074464, + "step": 62415 + }, + { + "epoch": 1.5249309847800063, + "grad_norm": 0.2103739231824875, + "learning_rate": 1.754756698199972e-06, + "loss": 0.1663, + "num_input_tokens_seen": 42077792, + "step": 62420 + }, + { + "epoch": 1.5250531356118535, + "grad_norm": 20.333240509033203, + "learning_rate": 1.7547007529391769e-06, + "loss": 0.1711, + "num_input_tokens_seen": 42080992, + "step": 62425 + }, + { + "epoch": 1.5251752864437007, + "grad_norm": 1.7456847429275513, + "learning_rate": 1.7546448021899952e-06, + "loss": 0.0016, + "num_input_tokens_seen": 42084256, + "step": 62430 + }, + { + "epoch": 1.525297437275548, + "grad_norm": 0.07221655547618866, + "learning_rate": 1.754588845952833e-06, + "loss": 0.0055, + "num_input_tokens_seen": 42087456, + "step": 62435 + }, + { + "epoch": 1.525419588107395, + "grad_norm": 0.10700752586126328, + "learning_rate": 1.7545328842280985e-06, + "loss": 0.0075, + "num_input_tokens_seen": 42090528, + "step": 62440 + }, + { + "epoch": 1.5255417389392423, + "grad_norm": 31.133052825927734, + "learning_rate": 1.7544769170161973e-06, + "loss": 0.0679, + "num_input_tokens_seen": 42094240, + "step": 62445 + }, + { + "epoch": 1.5256638897710895, + "grad_norm": 14.079894065856934, + "learning_rate": 1.7544209443175372e-06, + "loss": 0.0625, + "num_input_tokens_seen": 42097568, + "step": 62450 + }, + { + "epoch": 1.5257860406029367, + "grad_norm": 0.0958448052406311, + "learning_rate": 1.7543649661325254e-06, + "loss": 0.1018, + "num_input_tokens_seen": 42100832, + "step": 62455 + }, + { + "epoch": 1.5259081914347836, + "grad_norm": 0.026826851069927216, + "learning_rate": 1.7543089824615682e-06, + "loss": 0.0524, + "num_input_tokens_seen": 42104032, + "step": 62460 + }, + { + "epoch": 1.5260303422666308, + "grad_norm": 1.1849817037582397, + "learning_rate": 1.7542529933050735e-06, + "loss": 0.0736, + "num_input_tokens_seen": 42107744, + "step": 62465 + }, + { + "epoch": 1.526152493098478, + "grad_norm": 0.04849791154265404, + "learning_rate": 1.754196998663448e-06, + "loss": 0.0697, + "num_input_tokens_seen": 42111392, + "step": 62470 + }, + { + "epoch": 1.5262746439303252, + "grad_norm": 0.4404328763484955, + "learning_rate": 1.7541409985370993e-06, + "loss": 0.0861, + "num_input_tokens_seen": 42114528, + "step": 62475 + }, + { + "epoch": 1.5263967947621724, + "grad_norm": 14.965144157409668, + "learning_rate": 1.754084992926434e-06, + "loss": 0.2115, + "num_input_tokens_seen": 42117920, + "step": 62480 + }, + { + "epoch": 1.5265189455940193, + "grad_norm": 34.34897994995117, + "learning_rate": 1.75402898183186e-06, + "loss": 0.0692, + "num_input_tokens_seen": 42121376, + "step": 62485 + }, + { + "epoch": 1.5266410964258665, + "grad_norm": 0.1501975655555725, + "learning_rate": 1.7539729652537848e-06, + "loss": 0.1354, + "num_input_tokens_seen": 42124512, + "step": 62490 + }, + { + "epoch": 1.5267632472577137, + "grad_norm": 6.973504543304443, + "learning_rate": 1.753916943192615e-06, + "loss": 0.063, + "num_input_tokens_seen": 42127840, + "step": 62495 + }, + { + "epoch": 1.526885398089561, + "grad_norm": 0.12668637931346893, + "learning_rate": 1.7538609156487585e-06, + "loss": 0.0009, + "num_input_tokens_seen": 42131360, + "step": 62500 + }, + { + "epoch": 1.527007548921408, + "grad_norm": 0.4277445375919342, + "learning_rate": 1.7538048826226225e-06, + "loss": 0.1256, + "num_input_tokens_seen": 42134816, + "step": 62505 + }, + { + "epoch": 1.5271296997532553, + "grad_norm": 0.07875073701143265, + "learning_rate": 1.753748844114615e-06, + "loss": 0.0323, + "num_input_tokens_seen": 42137824, + "step": 62510 + }, + { + "epoch": 1.5272518505851025, + "grad_norm": 0.3303001821041107, + "learning_rate": 1.753692800125143e-06, + "loss": 0.1573, + "num_input_tokens_seen": 42141280, + "step": 62515 + }, + { + "epoch": 1.5273740014169497, + "grad_norm": 118.35843658447266, + "learning_rate": 1.753636750654614e-06, + "loss": 0.0461, + "num_input_tokens_seen": 42144736, + "step": 62520 + }, + { + "epoch": 1.5274961522487969, + "grad_norm": 1.3716565370559692, + "learning_rate": 1.7535806957034365e-06, + "loss": 0.0953, + "num_input_tokens_seen": 42148384, + "step": 62525 + }, + { + "epoch": 1.527618303080644, + "grad_norm": 0.14611275494098663, + "learning_rate": 1.7535246352720167e-06, + "loss": 0.0707, + "num_input_tokens_seen": 42151904, + "step": 62530 + }, + { + "epoch": 1.5277404539124912, + "grad_norm": 0.5512341260910034, + "learning_rate": 1.7534685693607637e-06, + "loss": 0.0573, + "num_input_tokens_seen": 42155360, + "step": 62535 + }, + { + "epoch": 1.5278626047443384, + "grad_norm": 0.0678791031241417, + "learning_rate": 1.753412497970084e-06, + "loss": 0.1038, + "num_input_tokens_seen": 42158496, + "step": 62540 + }, + { + "epoch": 1.5279847555761856, + "grad_norm": 14.49067211151123, + "learning_rate": 1.7533564211003865e-06, + "loss": 0.1688, + "num_input_tokens_seen": 42161888, + "step": 62545 + }, + { + "epoch": 1.5281069064080326, + "grad_norm": 12.960949897766113, + "learning_rate": 1.7533003387520784e-06, + "loss": 0.1191, + "num_input_tokens_seen": 42165600, + "step": 62550 + }, + { + "epoch": 1.5282290572398798, + "grad_norm": 0.16139183938503265, + "learning_rate": 1.7532442509255673e-06, + "loss": 0.0018, + "num_input_tokens_seen": 42168928, + "step": 62555 + }, + { + "epoch": 1.528351208071727, + "grad_norm": 0.18819627165794373, + "learning_rate": 1.753188157621262e-06, + "loss": 0.0421, + "num_input_tokens_seen": 42172192, + "step": 62560 + }, + { + "epoch": 1.5284733589035742, + "grad_norm": 68.75452423095703, + "learning_rate": 1.7531320588395693e-06, + "loss": 0.1245, + "num_input_tokens_seen": 42177504, + "step": 62565 + }, + { + "epoch": 1.5285955097354211, + "grad_norm": 22.286762237548828, + "learning_rate": 1.7530759545808977e-06, + "loss": 0.0582, + "num_input_tokens_seen": 42180640, + "step": 62570 + }, + { + "epoch": 1.5287176605672683, + "grad_norm": 24.4525089263916, + "learning_rate": 1.7530198448456556e-06, + "loss": 0.0592, + "num_input_tokens_seen": 42183968, + "step": 62575 + }, + { + "epoch": 1.5288398113991155, + "grad_norm": 0.3057885766029358, + "learning_rate": 1.7529637296342502e-06, + "loss": 0.001, + "num_input_tokens_seen": 42187744, + "step": 62580 + }, + { + "epoch": 1.5289619622309627, + "grad_norm": 0.1315106451511383, + "learning_rate": 1.7529076089470905e-06, + "loss": 0.0325, + "num_input_tokens_seen": 42191264, + "step": 62585 + }, + { + "epoch": 1.52908411306281, + "grad_norm": 0.6707960963249207, + "learning_rate": 1.752851482784584e-06, + "loss": 0.0993, + "num_input_tokens_seen": 42195040, + "step": 62590 + }, + { + "epoch": 1.529206263894657, + "grad_norm": 0.04225427657365799, + "learning_rate": 1.7527953511471387e-06, + "loss": 0.0747, + "num_input_tokens_seen": 42198368, + "step": 62595 + }, + { + "epoch": 1.5293284147265043, + "grad_norm": 0.10846621543169022, + "learning_rate": 1.7527392140351634e-06, + "loss": 0.0597, + "num_input_tokens_seen": 42201696, + "step": 62600 + }, + { + "epoch": 1.5294505655583515, + "grad_norm": 34.9310417175293, + "learning_rate": 1.7526830714490662e-06, + "loss": 0.0322, + "num_input_tokens_seen": 42204960, + "step": 62605 + }, + { + "epoch": 1.5295727163901987, + "grad_norm": 0.08704297244548798, + "learning_rate": 1.752626923389255e-06, + "loss": 0.1846, + "num_input_tokens_seen": 42208288, + "step": 62610 + }, + { + "epoch": 1.5296948672220458, + "grad_norm": 7.316606044769287, + "learning_rate": 1.7525707698561382e-06, + "loss": 0.0035, + "num_input_tokens_seen": 42211936, + "step": 62615 + }, + { + "epoch": 1.529817018053893, + "grad_norm": 0.4856109917163849, + "learning_rate": 1.7525146108501248e-06, + "loss": 0.0012, + "num_input_tokens_seen": 42215328, + "step": 62620 + }, + { + "epoch": 1.5299391688857402, + "grad_norm": 16.65413475036621, + "learning_rate": 1.7524584463716226e-06, + "loss": 0.0523, + "num_input_tokens_seen": 42218528, + "step": 62625 + }, + { + "epoch": 1.5300613197175874, + "grad_norm": 0.06861788034439087, + "learning_rate": 1.7524022764210401e-06, + "loss": 0.1023, + "num_input_tokens_seen": 42221536, + "step": 62630 + }, + { + "epoch": 1.5301834705494346, + "grad_norm": 0.09857258200645447, + "learning_rate": 1.7523461009987862e-06, + "loss": 0.0021, + "num_input_tokens_seen": 42225120, + "step": 62635 + }, + { + "epoch": 1.5303056213812816, + "grad_norm": 0.22179517149925232, + "learning_rate": 1.7522899201052686e-06, + "loss": 0.2342, + "num_input_tokens_seen": 42228704, + "step": 62640 + }, + { + "epoch": 1.5304277722131288, + "grad_norm": 20.480735778808594, + "learning_rate": 1.7522337337408968e-06, + "loss": 0.1148, + "num_input_tokens_seen": 42231904, + "step": 62645 + }, + { + "epoch": 1.530549923044976, + "grad_norm": 0.1441388875246048, + "learning_rate": 1.7521775419060786e-06, + "loss": 0.0011, + "num_input_tokens_seen": 42235040, + "step": 62650 + }, + { + "epoch": 1.5306720738768231, + "grad_norm": 0.2403523474931717, + "learning_rate": 1.7521213446012232e-06, + "loss": 0.0391, + "num_input_tokens_seen": 42238624, + "step": 62655 + }, + { + "epoch": 1.53079422470867, + "grad_norm": 0.18997099995613098, + "learning_rate": 1.752065141826739e-06, + "loss": 0.0433, + "num_input_tokens_seen": 42242016, + "step": 62660 + }, + { + "epoch": 1.5309163755405173, + "grad_norm": 31.88978385925293, + "learning_rate": 1.7520089335830348e-06, + "loss": 0.1549, + "num_input_tokens_seen": 42245984, + "step": 62665 + }, + { + "epoch": 1.5310385263723645, + "grad_norm": 17.45441246032715, + "learning_rate": 1.7519527198705193e-06, + "loss": 0.0822, + "num_input_tokens_seen": 42249312, + "step": 62670 + }, + { + "epoch": 1.5311606772042117, + "grad_norm": 0.6886013150215149, + "learning_rate": 1.7518965006896016e-06, + "loss": 0.0554, + "num_input_tokens_seen": 42252384, + "step": 62675 + }, + { + "epoch": 1.5312828280360589, + "grad_norm": 0.599663257598877, + "learning_rate": 1.7518402760406903e-06, + "loss": 0.1014, + "num_input_tokens_seen": 42255712, + "step": 62680 + }, + { + "epoch": 1.531404978867906, + "grad_norm": 6.776517391204834, + "learning_rate": 1.7517840459241944e-06, + "loss": 0.0966, + "num_input_tokens_seen": 42258912, + "step": 62685 + }, + { + "epoch": 1.5315271296997532, + "grad_norm": 0.3810465931892395, + "learning_rate": 1.7517278103405225e-06, + "loss": 0.0772, + "num_input_tokens_seen": 42262048, + "step": 62690 + }, + { + "epoch": 1.5316492805316004, + "grad_norm": 1.3760406970977783, + "learning_rate": 1.7516715692900834e-06, + "loss": 0.1186, + "num_input_tokens_seen": 42265632, + "step": 62695 + }, + { + "epoch": 1.5317714313634476, + "grad_norm": 0.25284773111343384, + "learning_rate": 1.751615322773287e-06, + "loss": 0.0012, + "num_input_tokens_seen": 42268832, + "step": 62700 + }, + { + "epoch": 1.5318935821952948, + "grad_norm": 6.275792598724365, + "learning_rate": 1.7515590707905416e-06, + "loss": 0.0519, + "num_input_tokens_seen": 42271968, + "step": 62705 + }, + { + "epoch": 1.532015733027142, + "grad_norm": 0.36964312195777893, + "learning_rate": 1.7515028133422566e-06, + "loss": 0.0301, + "num_input_tokens_seen": 42275360, + "step": 62710 + }, + { + "epoch": 1.5321378838589892, + "grad_norm": 0.2160339504480362, + "learning_rate": 1.751446550428841e-06, + "loss": 0.0958, + "num_input_tokens_seen": 42278560, + "step": 62715 + }, + { + "epoch": 1.5322600346908364, + "grad_norm": 0.16960953176021576, + "learning_rate": 1.7513902820507038e-06, + "loss": 0.0023, + "num_input_tokens_seen": 42282272, + "step": 62720 + }, + { + "epoch": 1.5323821855226836, + "grad_norm": 3.283630609512329, + "learning_rate": 1.7513340082082547e-06, + "loss": 0.1115, + "num_input_tokens_seen": 42286112, + "step": 62725 + }, + { + "epoch": 1.5325043363545305, + "grad_norm": 15.866968154907227, + "learning_rate": 1.7512777289019022e-06, + "loss": 0.0755, + "num_input_tokens_seen": 42289376, + "step": 62730 + }, + { + "epoch": 1.5326264871863777, + "grad_norm": 3.386458158493042, + "learning_rate": 1.7512214441320564e-06, + "loss": 0.0407, + "num_input_tokens_seen": 42292768, + "step": 62735 + }, + { + "epoch": 1.532748638018225, + "grad_norm": 0.09874772280454636, + "learning_rate": 1.751165153899126e-06, + "loss": 0.0592, + "num_input_tokens_seen": 42295776, + "step": 62740 + }, + { + "epoch": 1.532870788850072, + "grad_norm": 1.7841368913650513, + "learning_rate": 1.7511088582035204e-06, + "loss": 0.0503, + "num_input_tokens_seen": 42298784, + "step": 62745 + }, + { + "epoch": 1.532992939681919, + "grad_norm": 0.28716370463371277, + "learning_rate": 1.7510525570456496e-06, + "loss": 0.0556, + "num_input_tokens_seen": 42302432, + "step": 62750 + }, + { + "epoch": 1.5331150905137663, + "grad_norm": 0.11394919455051422, + "learning_rate": 1.7509962504259223e-06, + "loss": 0.0758, + "num_input_tokens_seen": 42305952, + "step": 62755 + }, + { + "epoch": 1.5332372413456135, + "grad_norm": 0.3958131670951843, + "learning_rate": 1.7509399383447482e-06, + "loss": 0.1926, + "num_input_tokens_seen": 42308960, + "step": 62760 + }, + { + "epoch": 1.5333593921774606, + "grad_norm": 0.1787228137254715, + "learning_rate": 1.7508836208025367e-06, + "loss": 0.1107, + "num_input_tokens_seen": 42312544, + "step": 62765 + }, + { + "epoch": 1.5334815430093078, + "grad_norm": 0.3476884961128235, + "learning_rate": 1.750827297799698e-06, + "loss": 0.0015, + "num_input_tokens_seen": 42316064, + "step": 62770 + }, + { + "epoch": 1.533603693841155, + "grad_norm": 0.13672898709774017, + "learning_rate": 1.7507709693366412e-06, + "loss": 0.0993, + "num_input_tokens_seen": 42319328, + "step": 62775 + }, + { + "epoch": 1.5337258446730022, + "grad_norm": 24.389223098754883, + "learning_rate": 1.7507146354137759e-06, + "loss": 0.0441, + "num_input_tokens_seen": 42322656, + "step": 62780 + }, + { + "epoch": 1.5338479955048494, + "grad_norm": 2.3359413146972656, + "learning_rate": 1.7506582960315117e-06, + "loss": 0.1276, + "num_input_tokens_seen": 42326304, + "step": 62785 + }, + { + "epoch": 1.5339701463366966, + "grad_norm": 15.77255630493164, + "learning_rate": 1.7506019511902586e-06, + "loss": 0.1918, + "num_input_tokens_seen": 42329504, + "step": 62790 + }, + { + "epoch": 1.5340922971685438, + "grad_norm": 22.603723526000977, + "learning_rate": 1.750545600890426e-06, + "loss": 0.1111, + "num_input_tokens_seen": 42332960, + "step": 62795 + }, + { + "epoch": 1.534214448000391, + "grad_norm": 0.08420402556657791, + "learning_rate": 1.7504892451324241e-06, + "loss": 0.1264, + "num_input_tokens_seen": 42336352, + "step": 62800 + }, + { + "epoch": 1.5343365988322382, + "grad_norm": 0.03107322007417679, + "learning_rate": 1.7504328839166628e-06, + "loss": 0.0363, + "num_input_tokens_seen": 42339296, + "step": 62805 + }, + { + "epoch": 1.5344587496640854, + "grad_norm": 0.05710528418421745, + "learning_rate": 1.7503765172435515e-06, + "loss": 0.1388, + "num_input_tokens_seen": 42342304, + "step": 62810 + }, + { + "epoch": 1.5345809004959325, + "grad_norm": 0.04239802435040474, + "learning_rate": 1.7503201451135002e-06, + "loss": 0.0102, + "num_input_tokens_seen": 42346016, + "step": 62815 + }, + { + "epoch": 1.5347030513277795, + "grad_norm": 71.819580078125, + "learning_rate": 1.7502637675269192e-06, + "loss": 0.1099, + "num_input_tokens_seen": 42349344, + "step": 62820 + }, + { + "epoch": 1.5348252021596267, + "grad_norm": 0.10941146314144135, + "learning_rate": 1.7502073844842183e-06, + "loss": 0.0517, + "num_input_tokens_seen": 42352480, + "step": 62825 + }, + { + "epoch": 1.534947352991474, + "grad_norm": 0.17934951186180115, + "learning_rate": 1.7501509959858074e-06, + "loss": 0.0806, + "num_input_tokens_seen": 42355680, + "step": 62830 + }, + { + "epoch": 1.535069503823321, + "grad_norm": 0.6699011325836182, + "learning_rate": 1.7500946020320967e-06, + "loss": 0.112, + "num_input_tokens_seen": 42358752, + "step": 62835 + }, + { + "epoch": 1.535191654655168, + "grad_norm": 9.080674171447754, + "learning_rate": 1.7500382026234964e-06, + "loss": 0.2533, + "num_input_tokens_seen": 42362336, + "step": 62840 + }, + { + "epoch": 1.5353138054870152, + "grad_norm": 7.4552154541015625, + "learning_rate": 1.7499817977604163e-06, + "loss": 0.0778, + "num_input_tokens_seen": 42365728, + "step": 62845 + }, + { + "epoch": 1.5354359563188624, + "grad_norm": 0.44541171193122864, + "learning_rate": 1.7499253874432672e-06, + "loss": 0.0658, + "num_input_tokens_seen": 42369888, + "step": 62850 + }, + { + "epoch": 1.5355581071507096, + "grad_norm": 0.41716626286506653, + "learning_rate": 1.7498689716724586e-06, + "loss": 0.1032, + "num_input_tokens_seen": 42373472, + "step": 62855 + }, + { + "epoch": 1.5356802579825568, + "grad_norm": 0.11591717600822449, + "learning_rate": 1.7498125504484014e-06, + "loss": 0.0364, + "num_input_tokens_seen": 42377056, + "step": 62860 + }, + { + "epoch": 1.535802408814404, + "grad_norm": 27.05655860900879, + "learning_rate": 1.7497561237715055e-06, + "loss": 0.1149, + "num_input_tokens_seen": 42380384, + "step": 62865 + }, + { + "epoch": 1.5359245596462512, + "grad_norm": 0.4578407406806946, + "learning_rate": 1.7496996916421818e-06, + "loss": 0.0337, + "num_input_tokens_seen": 42383776, + "step": 62870 + }, + { + "epoch": 1.5360467104780984, + "grad_norm": 18.179445266723633, + "learning_rate": 1.7496432540608398e-06, + "loss": 0.2404, + "num_input_tokens_seen": 42387232, + "step": 62875 + }, + { + "epoch": 1.5361688613099456, + "grad_norm": 49.866397857666016, + "learning_rate": 1.7495868110278905e-06, + "loss": 0.0842, + "num_input_tokens_seen": 42390624, + "step": 62880 + }, + { + "epoch": 1.5362910121417928, + "grad_norm": 8.857733726501465, + "learning_rate": 1.7495303625437447e-06, + "loss": 0.2111, + "num_input_tokens_seen": 42394080, + "step": 62885 + }, + { + "epoch": 1.53641316297364, + "grad_norm": 1.7023998498916626, + "learning_rate": 1.749473908608812e-06, + "loss": 0.04, + "num_input_tokens_seen": 42397024, + "step": 62890 + }, + { + "epoch": 1.5365353138054871, + "grad_norm": 10.899248123168945, + "learning_rate": 1.7494174492235038e-06, + "loss": 0.0856, + "num_input_tokens_seen": 42400480, + "step": 62895 + }, + { + "epoch": 1.5366574646373343, + "grad_norm": 23.474842071533203, + "learning_rate": 1.7493609843882302e-06, + "loss": 0.0542, + "num_input_tokens_seen": 42404128, + "step": 62900 + }, + { + "epoch": 1.5367796154691813, + "grad_norm": 8.95337200164795, + "learning_rate": 1.749304514103402e-06, + "loss": 0.0997, + "num_input_tokens_seen": 42407392, + "step": 62905 + }, + { + "epoch": 1.5369017663010285, + "grad_norm": 20.889585494995117, + "learning_rate": 1.74924803836943e-06, + "loss": 0.1064, + "num_input_tokens_seen": 42410784, + "step": 62910 + }, + { + "epoch": 1.5370239171328757, + "grad_norm": 0.41612470149993896, + "learning_rate": 1.7491915571867245e-06, + "loss": 0.0706, + "num_input_tokens_seen": 42414880, + "step": 62915 + }, + { + "epoch": 1.5371460679647229, + "grad_norm": 10.44105339050293, + "learning_rate": 1.7491350705556967e-06, + "loss": 0.0611, + "num_input_tokens_seen": 42417888, + "step": 62920 + }, + { + "epoch": 1.53726821879657, + "grad_norm": 1.0386707782745361, + "learning_rate": 1.749078578476757e-06, + "loss": 0.0141, + "num_input_tokens_seen": 42421344, + "step": 62925 + }, + { + "epoch": 1.537390369628417, + "grad_norm": 9.058958053588867, + "learning_rate": 1.7490220809503163e-06, + "loss": 0.0567, + "num_input_tokens_seen": 42425312, + "step": 62930 + }, + { + "epoch": 1.5375125204602642, + "grad_norm": 28.129384994506836, + "learning_rate": 1.7489655779767856e-06, + "loss": 0.1468, + "num_input_tokens_seen": 42428192, + "step": 62935 + }, + { + "epoch": 1.5376346712921114, + "grad_norm": 1.2702558040618896, + "learning_rate": 1.748909069556576e-06, + "loss": 0.1312, + "num_input_tokens_seen": 42431840, + "step": 62940 + }, + { + "epoch": 1.5377568221239586, + "grad_norm": 3.0834596157073975, + "learning_rate": 1.7488525556900981e-06, + "loss": 0.073, + "num_input_tokens_seen": 42435616, + "step": 62945 + }, + { + "epoch": 1.5378789729558058, + "grad_norm": 10.420439720153809, + "learning_rate": 1.748796036377763e-06, + "loss": 0.0668, + "num_input_tokens_seen": 42440672, + "step": 62950 + }, + { + "epoch": 1.538001123787653, + "grad_norm": 9.713109016418457, + "learning_rate": 1.7487395116199815e-06, + "loss": 0.0608, + "num_input_tokens_seen": 42444192, + "step": 62955 + }, + { + "epoch": 1.5381232746195002, + "grad_norm": 64.52426147460938, + "learning_rate": 1.7486829814171653e-06, + "loss": 0.1059, + "num_input_tokens_seen": 42447200, + "step": 62960 + }, + { + "epoch": 1.5382454254513473, + "grad_norm": 1.0289015769958496, + "learning_rate": 1.7486264457697249e-06, + "loss": 0.0915, + "num_input_tokens_seen": 42450528, + "step": 62965 + }, + { + "epoch": 1.5383675762831945, + "grad_norm": 143.9002685546875, + "learning_rate": 1.7485699046780714e-06, + "loss": 0.1535, + "num_input_tokens_seen": 42453920, + "step": 62970 + }, + { + "epoch": 1.5384897271150417, + "grad_norm": 0.10126427561044693, + "learning_rate": 1.7485133581426165e-06, + "loss": 0.1188, + "num_input_tokens_seen": 42456800, + "step": 62975 + }, + { + "epoch": 1.538611877946889, + "grad_norm": 0.7094244360923767, + "learning_rate": 1.7484568061637712e-06, + "loss": 0.0455, + "num_input_tokens_seen": 42460512, + "step": 62980 + }, + { + "epoch": 1.538734028778736, + "grad_norm": 2.6824791431427, + "learning_rate": 1.7484002487419466e-06, + "loss": 0.1353, + "num_input_tokens_seen": 42463840, + "step": 62985 + }, + { + "epoch": 1.5388561796105833, + "grad_norm": 19.794893264770508, + "learning_rate": 1.748343685877554e-06, + "loss": 0.0933, + "num_input_tokens_seen": 42467552, + "step": 62990 + }, + { + "epoch": 1.5389783304424303, + "grad_norm": 0.8362667560577393, + "learning_rate": 1.7482871175710048e-06, + "loss": 0.0359, + "num_input_tokens_seen": 42470880, + "step": 62995 + }, + { + "epoch": 1.5391004812742775, + "grad_norm": 0.17811979353427887, + "learning_rate": 1.7482305438227104e-06, + "loss": 0.0564, + "num_input_tokens_seen": 42474016, + "step": 63000 + }, + { + "epoch": 1.5392226321061246, + "grad_norm": 0.18364813923835754, + "learning_rate": 1.7481739646330822e-06, + "loss": 0.0762, + "num_input_tokens_seen": 42477408, + "step": 63005 + }, + { + "epoch": 1.5393447829379718, + "grad_norm": 16.569730758666992, + "learning_rate": 1.748117380002532e-06, + "loss": 0.0955, + "num_input_tokens_seen": 42480672, + "step": 63010 + }, + { + "epoch": 1.539466933769819, + "grad_norm": 0.3204478919506073, + "learning_rate": 1.7480607899314707e-06, + "loss": 0.0247, + "num_input_tokens_seen": 42484192, + "step": 63015 + }, + { + "epoch": 1.539589084601666, + "grad_norm": 0.3662739396095276, + "learning_rate": 1.7480041944203102e-06, + "loss": 0.1316, + "num_input_tokens_seen": 42487392, + "step": 63020 + }, + { + "epoch": 1.5397112354335132, + "grad_norm": 0.3306255042552948, + "learning_rate": 1.7479475934694623e-06, + "loss": 0.1054, + "num_input_tokens_seen": 42490656, + "step": 63025 + }, + { + "epoch": 1.5398333862653604, + "grad_norm": 9.560601234436035, + "learning_rate": 1.7478909870793378e-06, + "loss": 0.1108, + "num_input_tokens_seen": 42493792, + "step": 63030 + }, + { + "epoch": 1.5399555370972076, + "grad_norm": 0.8780872225761414, + "learning_rate": 1.7478343752503494e-06, + "loss": 0.0703, + "num_input_tokens_seen": 42497504, + "step": 63035 + }, + { + "epoch": 1.5400776879290548, + "grad_norm": 0.5312938690185547, + "learning_rate": 1.747777757982908e-06, + "loss": 0.0612, + "num_input_tokens_seen": 42500832, + "step": 63040 + }, + { + "epoch": 1.540199838760902, + "grad_norm": 0.25888314843177795, + "learning_rate": 1.7477211352774254e-06, + "loss": 0.0314, + "num_input_tokens_seen": 42504352, + "step": 63045 + }, + { + "epoch": 1.5403219895927491, + "grad_norm": 0.16010764241218567, + "learning_rate": 1.7476645071343141e-06, + "loss": 0.0689, + "num_input_tokens_seen": 42507744, + "step": 63050 + }, + { + "epoch": 1.5404441404245963, + "grad_norm": 0.1844155341386795, + "learning_rate": 1.7476078735539853e-06, + "loss": 0.1989, + "num_input_tokens_seen": 42510816, + "step": 63055 + }, + { + "epoch": 1.5405662912564435, + "grad_norm": 2.511029005050659, + "learning_rate": 1.7475512345368509e-06, + "loss": 0.1072, + "num_input_tokens_seen": 42514016, + "step": 63060 + }, + { + "epoch": 1.5406884420882907, + "grad_norm": 9.338394165039062, + "learning_rate": 1.7474945900833227e-06, + "loss": 0.0739, + "num_input_tokens_seen": 42518112, + "step": 63065 + }, + { + "epoch": 1.540810592920138, + "grad_norm": 0.3963111937046051, + "learning_rate": 1.7474379401938125e-06, + "loss": 0.0852, + "num_input_tokens_seen": 42521312, + "step": 63070 + }, + { + "epoch": 1.540932743751985, + "grad_norm": 0.3336862027645111, + "learning_rate": 1.7473812848687334e-06, + "loss": 0.0435, + "num_input_tokens_seen": 42524448, + "step": 63075 + }, + { + "epoch": 1.5410548945838323, + "grad_norm": 19.525541305541992, + "learning_rate": 1.7473246241084958e-06, + "loss": 0.1233, + "num_input_tokens_seen": 42527712, + "step": 63080 + }, + { + "epoch": 1.5411770454156792, + "grad_norm": 0.3767625689506531, + "learning_rate": 1.7472679579135129e-06, + "loss": 0.0974, + "num_input_tokens_seen": 42531424, + "step": 63085 + }, + { + "epoch": 1.5412991962475264, + "grad_norm": 112.72207641601562, + "learning_rate": 1.7472112862841963e-06, + "loss": 0.1039, + "num_input_tokens_seen": 42534560, + "step": 63090 + }, + { + "epoch": 1.5414213470793736, + "grad_norm": 1.469272255897522, + "learning_rate": 1.7471546092209585e-06, + "loss": 0.101, + "num_input_tokens_seen": 42537632, + "step": 63095 + }, + { + "epoch": 1.5415434979112208, + "grad_norm": 8.400092124938965, + "learning_rate": 1.7470979267242111e-06, + "loss": 0.1874, + "num_input_tokens_seen": 42540640, + "step": 63100 + }, + { + "epoch": 1.541665648743068, + "grad_norm": 14.375085830688477, + "learning_rate": 1.7470412387943668e-06, + "loss": 0.127, + "num_input_tokens_seen": 42543648, + "step": 63105 + }, + { + "epoch": 1.541787799574915, + "grad_norm": 0.7379078269004822, + "learning_rate": 1.7469845454318374e-06, + "loss": 0.0244, + "num_input_tokens_seen": 42546784, + "step": 63110 + }, + { + "epoch": 1.5419099504067622, + "grad_norm": 0.09788034111261368, + "learning_rate": 1.7469278466370359e-06, + "loss": 0.0402, + "num_input_tokens_seen": 42550112, + "step": 63115 + }, + { + "epoch": 1.5420321012386093, + "grad_norm": 0.040544167160987854, + "learning_rate": 1.7468711424103742e-06, + "loss": 0.0152, + "num_input_tokens_seen": 42553120, + "step": 63120 + }, + { + "epoch": 1.5421542520704565, + "grad_norm": 64.45174407958984, + "learning_rate": 1.7468144327522644e-06, + "loss": 0.164, + "num_input_tokens_seen": 42556512, + "step": 63125 + }, + { + "epoch": 1.5422764029023037, + "grad_norm": 1.8336378335952759, + "learning_rate": 1.7467577176631192e-06, + "loss": 0.0329, + "num_input_tokens_seen": 42559904, + "step": 63130 + }, + { + "epoch": 1.542398553734151, + "grad_norm": 16.188873291015625, + "learning_rate": 1.746700997143351e-06, + "loss": 0.0034, + "num_input_tokens_seen": 42563168, + "step": 63135 + }, + { + "epoch": 1.542520704565998, + "grad_norm": 0.4830506443977356, + "learning_rate": 1.7466442711933724e-06, + "loss": 0.042, + "num_input_tokens_seen": 42566240, + "step": 63140 + }, + { + "epoch": 1.5426428553978453, + "grad_norm": 0.06538606435060501, + "learning_rate": 1.7465875398135958e-06, + "loss": 0.0024, + "num_input_tokens_seen": 42569376, + "step": 63145 + }, + { + "epoch": 1.5427650062296925, + "grad_norm": 265.35064697265625, + "learning_rate": 1.746530803004434e-06, + "loss": 0.0882, + "num_input_tokens_seen": 42573024, + "step": 63150 + }, + { + "epoch": 1.5428871570615397, + "grad_norm": 10.878697395324707, + "learning_rate": 1.7464740607662991e-06, + "loss": 0.0602, + "num_input_tokens_seen": 42576480, + "step": 63155 + }, + { + "epoch": 1.5430093078933869, + "grad_norm": 0.4172748327255249, + "learning_rate": 1.746417313099604e-06, + "loss": 0.0833, + "num_input_tokens_seen": 42579360, + "step": 63160 + }, + { + "epoch": 1.543131458725234, + "grad_norm": 31.220434188842773, + "learning_rate": 1.7463605600047618e-06, + "loss": 0.2162, + "num_input_tokens_seen": 42582624, + "step": 63165 + }, + { + "epoch": 1.5432536095570812, + "grad_norm": 0.008305324241518974, + "learning_rate": 1.7463038014821848e-06, + "loss": 0.0718, + "num_input_tokens_seen": 42586016, + "step": 63170 + }, + { + "epoch": 1.5433757603889282, + "grad_norm": 0.0460757315158844, + "learning_rate": 1.7462470375322856e-06, + "loss": 0.0469, + "num_input_tokens_seen": 42589088, + "step": 63175 + }, + { + "epoch": 1.5434979112207754, + "grad_norm": 0.18197746574878693, + "learning_rate": 1.7461902681554773e-06, + "loss": 0.0732, + "num_input_tokens_seen": 42592800, + "step": 63180 + }, + { + "epoch": 1.5436200620526226, + "grad_norm": 87.91574096679688, + "learning_rate": 1.7461334933521725e-06, + "loss": 0.1156, + "num_input_tokens_seen": 42596320, + "step": 63185 + }, + { + "epoch": 1.5437422128844698, + "grad_norm": 0.7851141095161438, + "learning_rate": 1.7460767131227844e-06, + "loss": 0.0074, + "num_input_tokens_seen": 42599456, + "step": 63190 + }, + { + "epoch": 1.5438643637163167, + "grad_norm": 2.0627529621124268, + "learning_rate": 1.7460199274677262e-06, + "loss": 0.0416, + "num_input_tokens_seen": 42602656, + "step": 63195 + }, + { + "epoch": 1.543986514548164, + "grad_norm": 0.045423515141010284, + "learning_rate": 1.7459631363874098e-06, + "loss": 0.1269, + "num_input_tokens_seen": 42605856, + "step": 63200 + }, + { + "epoch": 1.5441086653800111, + "grad_norm": 37.60093688964844, + "learning_rate": 1.745906339882249e-06, + "loss": 0.0604, + "num_input_tokens_seen": 42609248, + "step": 63205 + }, + { + "epoch": 1.5442308162118583, + "grad_norm": 8.284845352172852, + "learning_rate": 1.7458495379526568e-06, + "loss": 0.1087, + "num_input_tokens_seen": 42612512, + "step": 63210 + }, + { + "epoch": 1.5443529670437055, + "grad_norm": 0.04278009012341499, + "learning_rate": 1.745792730599046e-06, + "loss": 0.0718, + "num_input_tokens_seen": 42615584, + "step": 63215 + }, + { + "epoch": 1.5444751178755527, + "grad_norm": 153.6990203857422, + "learning_rate": 1.7457359178218304e-06, + "loss": 0.2104, + "num_input_tokens_seen": 42618784, + "step": 63220 + }, + { + "epoch": 1.5445972687073999, + "grad_norm": 0.11112756282091141, + "learning_rate": 1.745679099621422e-06, + "loss": 0.0914, + "num_input_tokens_seen": 42621920, + "step": 63225 + }, + { + "epoch": 1.544719419539247, + "grad_norm": 0.2005205750465393, + "learning_rate": 1.7456222759982348e-06, + "loss": 0.0784, + "num_input_tokens_seen": 42625568, + "step": 63230 + }, + { + "epoch": 1.5448415703710943, + "grad_norm": 0.27125903964042664, + "learning_rate": 1.745565446952682e-06, + "loss": 0.0009, + "num_input_tokens_seen": 42628896, + "step": 63235 + }, + { + "epoch": 1.5449637212029415, + "grad_norm": 115.29658508300781, + "learning_rate": 1.7455086124851764e-06, + "loss": 0.0905, + "num_input_tokens_seen": 42631776, + "step": 63240 + }, + { + "epoch": 1.5450858720347886, + "grad_norm": 13.909765243530273, + "learning_rate": 1.7454517725961319e-06, + "loss": 0.0461, + "num_input_tokens_seen": 42635232, + "step": 63245 + }, + { + "epoch": 1.5452080228666358, + "grad_norm": 58.41990280151367, + "learning_rate": 1.7453949272859619e-06, + "loss": 0.0771, + "num_input_tokens_seen": 42638624, + "step": 63250 + }, + { + "epoch": 1.545330173698483, + "grad_norm": 0.1262422800064087, + "learning_rate": 1.745338076555079e-06, + "loss": 0.0015, + "num_input_tokens_seen": 42641824, + "step": 63255 + }, + { + "epoch": 1.5454523245303302, + "grad_norm": 12.13207721710205, + "learning_rate": 1.7452812204038972e-06, + "loss": 0.0622, + "num_input_tokens_seen": 42645408, + "step": 63260 + }, + { + "epoch": 1.5455744753621772, + "grad_norm": 38.522159576416016, + "learning_rate": 1.74522435883283e-06, + "loss": 0.1353, + "num_input_tokens_seen": 42649056, + "step": 63265 + }, + { + "epoch": 1.5456966261940244, + "grad_norm": 14.3047513961792, + "learning_rate": 1.745167491842291e-06, + "loss": 0.1149, + "num_input_tokens_seen": 42652320, + "step": 63270 + }, + { + "epoch": 1.5458187770258716, + "grad_norm": 0.16538095474243164, + "learning_rate": 1.7451106194326933e-06, + "loss": 0.0959, + "num_input_tokens_seen": 42655776, + "step": 63275 + }, + { + "epoch": 1.5459409278577187, + "grad_norm": 26.421430587768555, + "learning_rate": 1.745053741604451e-06, + "loss": 0.0981, + "num_input_tokens_seen": 42658912, + "step": 63280 + }, + { + "epoch": 1.5460630786895657, + "grad_norm": 22.397008895874023, + "learning_rate": 1.7449968583579776e-06, + "loss": 0.1071, + "num_input_tokens_seen": 42662496, + "step": 63285 + }, + { + "epoch": 1.546185229521413, + "grad_norm": 0.22859802842140198, + "learning_rate": 1.7449399696936862e-06, + "loss": 0.1257, + "num_input_tokens_seen": 42666592, + "step": 63290 + }, + { + "epoch": 1.54630738035326, + "grad_norm": 22.230684280395508, + "learning_rate": 1.7448830756119912e-06, + "loss": 0.0495, + "num_input_tokens_seen": 42669856, + "step": 63295 + }, + { + "epoch": 1.5464295311851073, + "grad_norm": 30.695005416870117, + "learning_rate": 1.7448261761133062e-06, + "loss": 0.0459, + "num_input_tokens_seen": 42673120, + "step": 63300 + }, + { + "epoch": 1.5465516820169545, + "grad_norm": 0.09240459650754929, + "learning_rate": 1.7447692711980448e-06, + "loss": 0.0671, + "num_input_tokens_seen": 42676512, + "step": 63305 + }, + { + "epoch": 1.5466738328488017, + "grad_norm": 0.0510527528822422, + "learning_rate": 1.744712360866621e-06, + "loss": 0.0339, + "num_input_tokens_seen": 42680096, + "step": 63310 + }, + { + "epoch": 1.5467959836806489, + "grad_norm": 0.5635117888450623, + "learning_rate": 1.7446554451194486e-06, + "loss": 0.1685, + "num_input_tokens_seen": 42683232, + "step": 63315 + }, + { + "epoch": 1.546918134512496, + "grad_norm": 36.4852409362793, + "learning_rate": 1.7445985239569416e-06, + "loss": 0.1446, + "num_input_tokens_seen": 42686560, + "step": 63320 + }, + { + "epoch": 1.5470402853443432, + "grad_norm": 9.156826972961426, + "learning_rate": 1.7445415973795137e-06, + "loss": 0.0934, + "num_input_tokens_seen": 42689888, + "step": 63325 + }, + { + "epoch": 1.5471624361761904, + "grad_norm": 17.986148834228516, + "learning_rate": 1.7444846653875791e-06, + "loss": 0.1435, + "num_input_tokens_seen": 42693216, + "step": 63330 + }, + { + "epoch": 1.5472845870080376, + "grad_norm": 0.1627344936132431, + "learning_rate": 1.7444277279815518e-06, + "loss": 0.1261, + "num_input_tokens_seen": 42696800, + "step": 63335 + }, + { + "epoch": 1.5474067378398848, + "grad_norm": 26.671512603759766, + "learning_rate": 1.744370785161846e-06, + "loss": 0.1507, + "num_input_tokens_seen": 42700640, + "step": 63340 + }, + { + "epoch": 1.547528888671732, + "grad_norm": 3.354576587677002, + "learning_rate": 1.7443138369288754e-06, + "loss": 0.0884, + "num_input_tokens_seen": 42704096, + "step": 63345 + }, + { + "epoch": 1.5476510395035792, + "grad_norm": 0.390531986951828, + "learning_rate": 1.7442568832830546e-06, + "loss": 0.0721, + "num_input_tokens_seen": 42707616, + "step": 63350 + }, + { + "epoch": 1.5477731903354262, + "grad_norm": 10.482186317443848, + "learning_rate": 1.7441999242247974e-06, + "loss": 0.1018, + "num_input_tokens_seen": 42710688, + "step": 63355 + }, + { + "epoch": 1.5478953411672733, + "grad_norm": 0.33957600593566895, + "learning_rate": 1.7441429597545181e-06, + "loss": 0.0021, + "num_input_tokens_seen": 42714080, + "step": 63360 + }, + { + "epoch": 1.5480174919991205, + "grad_norm": 0.203145369887352, + "learning_rate": 1.7440859898726312e-06, + "loss": 0.032, + "num_input_tokens_seen": 42717472, + "step": 63365 + }, + { + "epoch": 1.5481396428309677, + "grad_norm": 0.8855333924293518, + "learning_rate": 1.7440290145795507e-06, + "loss": 0.1066, + "num_input_tokens_seen": 42720672, + "step": 63370 + }, + { + "epoch": 1.5482617936628147, + "grad_norm": 0.3422999680042267, + "learning_rate": 1.7439720338756913e-06, + "loss": 0.129, + "num_input_tokens_seen": 42723808, + "step": 63375 + }, + { + "epoch": 1.5483839444946619, + "grad_norm": 0.5624834895133972, + "learning_rate": 1.743915047761467e-06, + "loss": 0.046, + "num_input_tokens_seen": 42726944, + "step": 63380 + }, + { + "epoch": 1.548506095326509, + "grad_norm": 8.767424583435059, + "learning_rate": 1.7438580562372925e-06, + "loss": 0.1086, + "num_input_tokens_seen": 42730336, + "step": 63385 + }, + { + "epoch": 1.5486282461583563, + "grad_norm": 1.0835167169570923, + "learning_rate": 1.7438010593035822e-06, + "loss": 0.1548, + "num_input_tokens_seen": 42733472, + "step": 63390 + }, + { + "epoch": 1.5487503969902034, + "grad_norm": 0.2543872594833374, + "learning_rate": 1.7437440569607502e-06, + "loss": 0.1006, + "num_input_tokens_seen": 42737248, + "step": 63395 + }, + { + "epoch": 1.5488725478220506, + "grad_norm": 2.4009156227111816, + "learning_rate": 1.7436870492092117e-06, + "loss": 0.0539, + "num_input_tokens_seen": 42740448, + "step": 63400 + }, + { + "epoch": 1.5489946986538978, + "grad_norm": 12.928868293762207, + "learning_rate": 1.7436300360493808e-06, + "loss": 0.074, + "num_input_tokens_seen": 42743840, + "step": 63405 + }, + { + "epoch": 1.549116849485745, + "grad_norm": 18.121219635009766, + "learning_rate": 1.7435730174816725e-06, + "loss": 0.31, + "num_input_tokens_seen": 42747104, + "step": 63410 + }, + { + "epoch": 1.5492390003175922, + "grad_norm": 6.432738780975342, + "learning_rate": 1.743515993506501e-06, + "loss": 0.0478, + "num_input_tokens_seen": 42750880, + "step": 63415 + }, + { + "epoch": 1.5493611511494394, + "grad_norm": 0.18056033551692963, + "learning_rate": 1.7434589641242812e-06, + "loss": 0.0756, + "num_input_tokens_seen": 42755296, + "step": 63420 + }, + { + "epoch": 1.5494833019812866, + "grad_norm": 64.7015380859375, + "learning_rate": 1.7434019293354278e-06, + "loss": 0.1218, + "num_input_tokens_seen": 42758624, + "step": 63425 + }, + { + "epoch": 1.5496054528131338, + "grad_norm": 12.332176208496094, + "learning_rate": 1.7433448891403559e-06, + "loss": 0.1732, + "num_input_tokens_seen": 42761760, + "step": 63430 + }, + { + "epoch": 1.549727603644981, + "grad_norm": 0.3980131149291992, + "learning_rate": 1.7432878435394795e-06, + "loss": 0.0417, + "num_input_tokens_seen": 42765088, + "step": 63435 + }, + { + "epoch": 1.549849754476828, + "grad_norm": 0.6029239892959595, + "learning_rate": 1.7432307925332146e-06, + "loss": 0.0317, + "num_input_tokens_seen": 42768160, + "step": 63440 + }, + { + "epoch": 1.5499719053086751, + "grad_norm": 0.5722103714942932, + "learning_rate": 1.743173736121975e-06, + "loss": 0.0716, + "num_input_tokens_seen": 42772000, + "step": 63445 + }, + { + "epoch": 1.5500940561405223, + "grad_norm": 0.04331221058964729, + "learning_rate": 1.7431166743061762e-06, + "loss": 0.0256, + "num_input_tokens_seen": 42775072, + "step": 63450 + }, + { + "epoch": 1.5502162069723695, + "grad_norm": 0.3946976959705353, + "learning_rate": 1.7430596070862332e-06, + "loss": 0.0631, + "num_input_tokens_seen": 42778272, + "step": 63455 + }, + { + "epoch": 1.5503383578042167, + "grad_norm": 14.206043243408203, + "learning_rate": 1.743002534462561e-06, + "loss": 0.0389, + "num_input_tokens_seen": 42781984, + "step": 63460 + }, + { + "epoch": 1.5504605086360637, + "grad_norm": 0.05685468763113022, + "learning_rate": 1.7429454564355744e-06, + "loss": 0.0364, + "num_input_tokens_seen": 42785056, + "step": 63465 + }, + { + "epoch": 1.5505826594679109, + "grad_norm": 13.921191215515137, + "learning_rate": 1.7428883730056884e-06, + "loss": 0.1069, + "num_input_tokens_seen": 42788192, + "step": 63470 + }, + { + "epoch": 1.550704810299758, + "grad_norm": 11.072580337524414, + "learning_rate": 1.7428312841733187e-06, + "loss": 0.0492, + "num_input_tokens_seen": 42791712, + "step": 63475 + }, + { + "epoch": 1.5508269611316052, + "grad_norm": 0.28202781081199646, + "learning_rate": 1.7427741899388798e-06, + "loss": 0.1006, + "num_input_tokens_seen": 42795360, + "step": 63480 + }, + { + "epoch": 1.5509491119634524, + "grad_norm": 11.548276901245117, + "learning_rate": 1.7427170903027874e-06, + "loss": 0.1221, + "num_input_tokens_seen": 42798496, + "step": 63485 + }, + { + "epoch": 1.5510712627952996, + "grad_norm": 0.10354668647050858, + "learning_rate": 1.7426599852654564e-06, + "loss": 0.1871, + "num_input_tokens_seen": 42802080, + "step": 63490 + }, + { + "epoch": 1.5511934136271468, + "grad_norm": 0.5099055171012878, + "learning_rate": 1.7426028748273023e-06, + "loss": 0.0024, + "num_input_tokens_seen": 42805536, + "step": 63495 + }, + { + "epoch": 1.551315564458994, + "grad_norm": 0.38024604320526123, + "learning_rate": 1.7425457589887405e-06, + "loss": 0.0444, + "num_input_tokens_seen": 42808736, + "step": 63500 + }, + { + "epoch": 1.5514377152908412, + "grad_norm": 25.034109115600586, + "learning_rate": 1.7424886377501862e-06, + "loss": 0.0526, + "num_input_tokens_seen": 42812192, + "step": 63505 + }, + { + "epoch": 1.5515598661226884, + "grad_norm": 0.5092397332191467, + "learning_rate": 1.7424315111120547e-06, + "loss": 0.0499, + "num_input_tokens_seen": 42815712, + "step": 63510 + }, + { + "epoch": 1.5516820169545356, + "grad_norm": 0.198876291513443, + "learning_rate": 1.7423743790747616e-06, + "loss": 0.0718, + "num_input_tokens_seen": 42819168, + "step": 63515 + }, + { + "epoch": 1.5518041677863827, + "grad_norm": 0.14343546330928802, + "learning_rate": 1.7423172416387221e-06, + "loss": 0.0942, + "num_input_tokens_seen": 42822496, + "step": 63520 + }, + { + "epoch": 1.55192631861823, + "grad_norm": 0.5389144420623779, + "learning_rate": 1.7422600988043521e-06, + "loss": 0.0979, + "num_input_tokens_seen": 42825504, + "step": 63525 + }, + { + "epoch": 1.552048469450077, + "grad_norm": 0.06327793747186661, + "learning_rate": 1.7422029505720671e-06, + "loss": 0.0011, + "num_input_tokens_seen": 42828448, + "step": 63530 + }, + { + "epoch": 1.552170620281924, + "grad_norm": 0.1345394402742386, + "learning_rate": 1.7421457969422828e-06, + "loss": 0.0825, + "num_input_tokens_seen": 42832224, + "step": 63535 + }, + { + "epoch": 1.5522927711137713, + "grad_norm": 22.174745559692383, + "learning_rate": 1.7420886379154145e-06, + "loss": 0.044, + "num_input_tokens_seen": 42835808, + "step": 63540 + }, + { + "epoch": 1.5524149219456185, + "grad_norm": 71.50115966796875, + "learning_rate": 1.742031473491878e-06, + "loss": 0.23, + "num_input_tokens_seen": 42839072, + "step": 63545 + }, + { + "epoch": 1.5525370727774657, + "grad_norm": 36.79838943481445, + "learning_rate": 1.7419743036720892e-06, + "loss": 0.0474, + "num_input_tokens_seen": 42842144, + "step": 63550 + }, + { + "epoch": 1.5526592236093126, + "grad_norm": 0.07557157427072525, + "learning_rate": 1.7419171284564634e-06, + "loss": 0.0748, + "num_input_tokens_seen": 42845920, + "step": 63555 + }, + { + "epoch": 1.5527813744411598, + "grad_norm": 0.1075301319360733, + "learning_rate": 1.7418599478454165e-06, + "loss": 0.1036, + "num_input_tokens_seen": 42849184, + "step": 63560 + }, + { + "epoch": 1.552903525273007, + "grad_norm": 7.113918304443359, + "learning_rate": 1.7418027618393651e-06, + "loss": 0.1284, + "num_input_tokens_seen": 42852192, + "step": 63565 + }, + { + "epoch": 1.5530256761048542, + "grad_norm": 0.17635847628116608, + "learning_rate": 1.741745570438724e-06, + "loss": 0.0016, + "num_input_tokens_seen": 42855840, + "step": 63570 + }, + { + "epoch": 1.5531478269367014, + "grad_norm": 0.34732112288475037, + "learning_rate": 1.7416883736439098e-06, + "loss": 0.1145, + "num_input_tokens_seen": 42858912, + "step": 63575 + }, + { + "epoch": 1.5532699777685486, + "grad_norm": 0.2674591839313507, + "learning_rate": 1.7416311714553385e-06, + "loss": 0.0483, + "num_input_tokens_seen": 42862112, + "step": 63580 + }, + { + "epoch": 1.5533921286003958, + "grad_norm": 21.65047836303711, + "learning_rate": 1.7415739638734257e-06, + "loss": 0.0595, + "num_input_tokens_seen": 42865312, + "step": 63585 + }, + { + "epoch": 1.553514279432243, + "grad_norm": 0.06598281860351562, + "learning_rate": 1.7415167508985876e-06, + "loss": 0.0916, + "num_input_tokens_seen": 42868640, + "step": 63590 + }, + { + "epoch": 1.5536364302640902, + "grad_norm": 12.143324851989746, + "learning_rate": 1.74145953253124e-06, + "loss": 0.1145, + "num_input_tokens_seen": 42871712, + "step": 63595 + }, + { + "epoch": 1.5537585810959373, + "grad_norm": 8.922744750976562, + "learning_rate": 1.7414023087717996e-06, + "loss": 0.0532, + "num_input_tokens_seen": 42875232, + "step": 63600 + }, + { + "epoch": 1.5538807319277845, + "grad_norm": 13.498004913330078, + "learning_rate": 1.741345079620682e-06, + "loss": 0.0717, + "num_input_tokens_seen": 42879072, + "step": 63605 + }, + { + "epoch": 1.5540028827596317, + "grad_norm": 0.186791330575943, + "learning_rate": 1.7412878450783036e-06, + "loss": 0.0011, + "num_input_tokens_seen": 42882272, + "step": 63610 + }, + { + "epoch": 1.554125033591479, + "grad_norm": 0.030450141057372093, + "learning_rate": 1.7412306051450806e-06, + "loss": 0.0703, + "num_input_tokens_seen": 42885536, + "step": 63615 + }, + { + "epoch": 1.5542471844233259, + "grad_norm": 28.711727142333984, + "learning_rate": 1.741173359821429e-06, + "loss": 0.077, + "num_input_tokens_seen": 42888864, + "step": 63620 + }, + { + "epoch": 1.554369335255173, + "grad_norm": 0.3964475989341736, + "learning_rate": 1.7411161091077657e-06, + "loss": 0.1488, + "num_input_tokens_seen": 42892128, + "step": 63625 + }, + { + "epoch": 1.5544914860870203, + "grad_norm": 0.155262753367424, + "learning_rate": 1.7410588530045067e-06, + "loss": 0.1056, + "num_input_tokens_seen": 42895328, + "step": 63630 + }, + { + "epoch": 1.5546136369188674, + "grad_norm": 16.852354049682617, + "learning_rate": 1.7410015915120684e-06, + "loss": 0.1505, + "num_input_tokens_seen": 42898656, + "step": 63635 + }, + { + "epoch": 1.5547357877507146, + "grad_norm": 39.86412048339844, + "learning_rate": 1.7409443246308674e-06, + "loss": 0.1335, + "num_input_tokens_seen": 42901664, + "step": 63640 + }, + { + "epoch": 1.5548579385825616, + "grad_norm": 0.5770293474197388, + "learning_rate": 1.7408870523613194e-06, + "loss": 0.0452, + "num_input_tokens_seen": 42905184, + "step": 63645 + }, + { + "epoch": 1.5549800894144088, + "grad_norm": 0.6164991855621338, + "learning_rate": 1.7408297747038422e-06, + "loss": 0.0443, + "num_input_tokens_seen": 42908256, + "step": 63650 + }, + { + "epoch": 1.555102240246256, + "grad_norm": 49.42633819580078, + "learning_rate": 1.740772491658851e-06, + "loss": 0.0963, + "num_input_tokens_seen": 42911904, + "step": 63655 + }, + { + "epoch": 1.5552243910781032, + "grad_norm": 0.07455496490001678, + "learning_rate": 1.7407152032267635e-06, + "loss": 0.0008, + "num_input_tokens_seen": 42915040, + "step": 63660 + }, + { + "epoch": 1.5553465419099504, + "grad_norm": 0.21186937391757965, + "learning_rate": 1.7406579094079957e-06, + "loss": 0.0613, + "num_input_tokens_seen": 42918112, + "step": 63665 + }, + { + "epoch": 1.5554686927417976, + "grad_norm": 3.4493956565856934, + "learning_rate": 1.740600610202964e-06, + "loss": 0.1516, + "num_input_tokens_seen": 42921632, + "step": 63670 + }, + { + "epoch": 1.5555908435736447, + "grad_norm": 56.91830062866211, + "learning_rate": 1.7405433056120857e-06, + "loss": 0.1109, + "num_input_tokens_seen": 42924832, + "step": 63675 + }, + { + "epoch": 1.555712994405492, + "grad_norm": 0.1591949760913849, + "learning_rate": 1.7404859956357774e-06, + "loss": 0.078, + "num_input_tokens_seen": 42928352, + "step": 63680 + }, + { + "epoch": 1.5558351452373391, + "grad_norm": 0.17558668553829193, + "learning_rate": 1.7404286802744556e-06, + "loss": 0.0986, + "num_input_tokens_seen": 42931616, + "step": 63685 + }, + { + "epoch": 1.5559572960691863, + "grad_norm": 0.4763665795326233, + "learning_rate": 1.7403713595285374e-06, + "loss": 0.074, + "num_input_tokens_seen": 42934880, + "step": 63690 + }, + { + "epoch": 1.5560794469010335, + "grad_norm": 0.38771793246269226, + "learning_rate": 1.7403140333984397e-06, + "loss": 0.0027, + "num_input_tokens_seen": 42938336, + "step": 63695 + }, + { + "epoch": 1.5562015977328807, + "grad_norm": 9.273704528808594, + "learning_rate": 1.7402567018845788e-06, + "loss": 0.0601, + "num_input_tokens_seen": 42941600, + "step": 63700 + }, + { + "epoch": 1.5563237485647279, + "grad_norm": 0.10114264488220215, + "learning_rate": 1.7401993649873722e-06, + "loss": 0.0707, + "num_input_tokens_seen": 42945184, + "step": 63705 + }, + { + "epoch": 1.5564458993965749, + "grad_norm": 1.2880104780197144, + "learning_rate": 1.740142022707237e-06, + "loss": 0.0307, + "num_input_tokens_seen": 42948640, + "step": 63710 + }, + { + "epoch": 1.556568050228422, + "grad_norm": 95.30414581298828, + "learning_rate": 1.7400846750445898e-06, + "loss": 0.0839, + "num_input_tokens_seen": 42952352, + "step": 63715 + }, + { + "epoch": 1.5566902010602692, + "grad_norm": 28.39178466796875, + "learning_rate": 1.7400273219998476e-06, + "loss": 0.0931, + "num_input_tokens_seen": 42955872, + "step": 63720 + }, + { + "epoch": 1.5568123518921164, + "grad_norm": 0.11044997721910477, + "learning_rate": 1.739969963573428e-06, + "loss": 0.0014, + "num_input_tokens_seen": 42958944, + "step": 63725 + }, + { + "epoch": 1.5569345027239634, + "grad_norm": 109.26212310791016, + "learning_rate": 1.7399125997657476e-06, + "loss": 0.0107, + "num_input_tokens_seen": 42962208, + "step": 63730 + }, + { + "epoch": 1.5570566535558106, + "grad_norm": 0.16350312530994415, + "learning_rate": 1.7398552305772238e-06, + "loss": 0.0131, + "num_input_tokens_seen": 42965408, + "step": 63735 + }, + { + "epoch": 1.5571788043876578, + "grad_norm": 0.10667629539966583, + "learning_rate": 1.7397978560082737e-06, + "loss": 0.0463, + "num_input_tokens_seen": 42968928, + "step": 63740 + }, + { + "epoch": 1.557300955219505, + "grad_norm": 37.49443435668945, + "learning_rate": 1.7397404760593147e-06, + "loss": 0.2915, + "num_input_tokens_seen": 42972064, + "step": 63745 + }, + { + "epoch": 1.5574231060513521, + "grad_norm": 0.09544025361537933, + "learning_rate": 1.739683090730764e-06, + "loss": 0.0011, + "num_input_tokens_seen": 42975328, + "step": 63750 + }, + { + "epoch": 1.5575452568831993, + "grad_norm": 0.026848675683140755, + "learning_rate": 1.7396257000230388e-06, + "loss": 0.0585, + "num_input_tokens_seen": 42978848, + "step": 63755 + }, + { + "epoch": 1.5576674077150465, + "grad_norm": 12.65404987335205, + "learning_rate": 1.7395683039365564e-06, + "loss": 0.1041, + "num_input_tokens_seen": 42982176, + "step": 63760 + }, + { + "epoch": 1.5577895585468937, + "grad_norm": 11.774423599243164, + "learning_rate": 1.7395109024717347e-06, + "loss": 0.1248, + "num_input_tokens_seen": 42985824, + "step": 63765 + }, + { + "epoch": 1.557911709378741, + "grad_norm": 0.6977723836898804, + "learning_rate": 1.7394534956289908e-06, + "loss": 0.0651, + "num_input_tokens_seen": 42989472, + "step": 63770 + }, + { + "epoch": 1.558033860210588, + "grad_norm": 17.038700103759766, + "learning_rate": 1.7393960834087422e-06, + "loss": 0.1488, + "num_input_tokens_seen": 42992480, + "step": 63775 + }, + { + "epoch": 1.5581560110424353, + "grad_norm": 57.71630859375, + "learning_rate": 1.7393386658114063e-06, + "loss": 0.0823, + "num_input_tokens_seen": 42995872, + "step": 63780 + }, + { + "epoch": 1.5582781618742825, + "grad_norm": 38.44292449951172, + "learning_rate": 1.7392812428374009e-06, + "loss": 0.0981, + "num_input_tokens_seen": 42999200, + "step": 63785 + }, + { + "epoch": 1.5584003127061297, + "grad_norm": 15.2906494140625, + "learning_rate": 1.7392238144871433e-06, + "loss": 0.0597, + "num_input_tokens_seen": 43003040, + "step": 63790 + }, + { + "epoch": 1.5585224635379769, + "grad_norm": 1.041969656944275, + "learning_rate": 1.7391663807610513e-06, + "loss": 0.2189, + "num_input_tokens_seen": 43006368, + "step": 63795 + }, + { + "epoch": 1.5586446143698238, + "grad_norm": 0.5180288553237915, + "learning_rate": 1.7391089416595426e-06, + "loss": 0.0449, + "num_input_tokens_seen": 43010528, + "step": 63800 + }, + { + "epoch": 1.558766765201671, + "grad_norm": 21.01852035522461, + "learning_rate": 1.7390514971830348e-06, + "loss": 0.1947, + "num_input_tokens_seen": 43013600, + "step": 63805 + }, + { + "epoch": 1.5588889160335182, + "grad_norm": 13.4508638381958, + "learning_rate": 1.7389940473319458e-06, + "loss": 0.1185, + "num_input_tokens_seen": 43017056, + "step": 63810 + }, + { + "epoch": 1.5590110668653654, + "grad_norm": 23.188987731933594, + "learning_rate": 1.7389365921066935e-06, + "loss": 0.1306, + "num_input_tokens_seen": 43020384, + "step": 63815 + }, + { + "epoch": 1.5591332176972124, + "grad_norm": 8.736359596252441, + "learning_rate": 1.7388791315076952e-06, + "loss": 0.0493, + "num_input_tokens_seen": 43023776, + "step": 63820 + }, + { + "epoch": 1.5592553685290595, + "grad_norm": 0.2003953605890274, + "learning_rate": 1.7388216655353694e-06, + "loss": 0.0326, + "num_input_tokens_seen": 43027232, + "step": 63825 + }, + { + "epoch": 1.5593775193609067, + "grad_norm": 14.04684066772461, + "learning_rate": 1.7387641941901334e-06, + "loss": 0.0376, + "num_input_tokens_seen": 43030624, + "step": 63830 + }, + { + "epoch": 1.559499670192754, + "grad_norm": 0.3257925808429718, + "learning_rate": 1.738706717472406e-06, + "loss": 0.032, + "num_input_tokens_seen": 43034272, + "step": 63835 + }, + { + "epoch": 1.5596218210246011, + "grad_norm": 0.9033854603767395, + "learning_rate": 1.7386492353826043e-06, + "loss": 0.0396, + "num_input_tokens_seen": 43037472, + "step": 63840 + }, + { + "epoch": 1.5597439718564483, + "grad_norm": 34.160926818847656, + "learning_rate": 1.7385917479211466e-06, + "loss": 0.0999, + "num_input_tokens_seen": 43040800, + "step": 63845 + }, + { + "epoch": 1.5598661226882955, + "grad_norm": 0.5950321555137634, + "learning_rate": 1.7385342550884514e-06, + "loss": 0.1058, + "num_input_tokens_seen": 43044000, + "step": 63850 + }, + { + "epoch": 1.5599882735201427, + "grad_norm": 53.574180603027344, + "learning_rate": 1.7384767568849363e-06, + "loss": 0.1929, + "num_input_tokens_seen": 43047392, + "step": 63855 + }, + { + "epoch": 1.5601104243519899, + "grad_norm": 0.0721253752708435, + "learning_rate": 1.7384192533110195e-06, + "loss": 0.0006, + "num_input_tokens_seen": 43050656, + "step": 63860 + }, + { + "epoch": 1.560232575183837, + "grad_norm": 0.22721441090106964, + "learning_rate": 1.7383617443671192e-06, + "loss": 0.0021, + "num_input_tokens_seen": 43054176, + "step": 63865 + }, + { + "epoch": 1.5603547260156843, + "grad_norm": 0.8160129189491272, + "learning_rate": 1.738304230053654e-06, + "loss": 0.2202, + "num_input_tokens_seen": 43057312, + "step": 63870 + }, + { + "epoch": 1.5604768768475314, + "grad_norm": 0.3547270894050598, + "learning_rate": 1.7382467103710417e-06, + "loss": 0.0036, + "num_input_tokens_seen": 43060768, + "step": 63875 + }, + { + "epoch": 1.5605990276793786, + "grad_norm": 0.08533972501754761, + "learning_rate": 1.738189185319701e-06, + "loss": 0.1038, + "num_input_tokens_seen": 43064224, + "step": 63880 + }, + { + "epoch": 1.5607211785112258, + "grad_norm": 0.0986095517873764, + "learning_rate": 1.7381316549000496e-06, + "loss": 0.0009, + "num_input_tokens_seen": 43067680, + "step": 63885 + }, + { + "epoch": 1.5608433293430728, + "grad_norm": 8.909045219421387, + "learning_rate": 1.7380741191125063e-06, + "loss": 0.1021, + "num_input_tokens_seen": 43070880, + "step": 63890 + }, + { + "epoch": 1.56096548017492, + "grad_norm": 0.10495179146528244, + "learning_rate": 1.7380165779574899e-06, + "loss": 0.0362, + "num_input_tokens_seen": 43074272, + "step": 63895 + }, + { + "epoch": 1.5610876310067672, + "grad_norm": 71.92479705810547, + "learning_rate": 1.7379590314354178e-06, + "loss": 0.1742, + "num_input_tokens_seen": 43077472, + "step": 63900 + }, + { + "epoch": 1.5612097818386144, + "grad_norm": 0.01743456721305847, + "learning_rate": 1.7379014795467097e-06, + "loss": 0.0004, + "num_input_tokens_seen": 43080672, + "step": 63905 + }, + { + "epoch": 1.5613319326704613, + "grad_norm": 0.5658043622970581, + "learning_rate": 1.7378439222917834e-06, + "loss": 0.0006, + "num_input_tokens_seen": 43083872, + "step": 63910 + }, + { + "epoch": 1.5614540835023085, + "grad_norm": 0.8468763828277588, + "learning_rate": 1.7377863596710575e-06, + "loss": 0.0017, + "num_input_tokens_seen": 43087264, + "step": 63915 + }, + { + "epoch": 1.5615762343341557, + "grad_norm": 0.7965541481971741, + "learning_rate": 1.737728791684951e-06, + "loss": 0.0293, + "num_input_tokens_seen": 43090464, + "step": 63920 + }, + { + "epoch": 1.561698385166003, + "grad_norm": 176.9981231689453, + "learning_rate": 1.7376712183338823e-06, + "loss": 0.1468, + "num_input_tokens_seen": 43093664, + "step": 63925 + }, + { + "epoch": 1.56182053599785, + "grad_norm": 20.599937438964844, + "learning_rate": 1.7376136396182696e-06, + "loss": 0.232, + "num_input_tokens_seen": 43096672, + "step": 63930 + }, + { + "epoch": 1.5619426868296973, + "grad_norm": 72.59481048583984, + "learning_rate": 1.7375560555385324e-06, + "loss": 0.1323, + "num_input_tokens_seen": 43100064, + "step": 63935 + }, + { + "epoch": 1.5620648376615445, + "grad_norm": 0.28954315185546875, + "learning_rate": 1.7374984660950896e-06, + "loss": 0.0674, + "num_input_tokens_seen": 43103264, + "step": 63940 + }, + { + "epoch": 1.5621869884933917, + "grad_norm": 0.06325811892747879, + "learning_rate": 1.737440871288359e-06, + "loss": 0.0028, + "num_input_tokens_seen": 43106208, + "step": 63945 + }, + { + "epoch": 1.5623091393252388, + "grad_norm": 74.24653625488281, + "learning_rate": 1.7373832711187604e-06, + "loss": 0.1143, + "num_input_tokens_seen": 43109792, + "step": 63950 + }, + { + "epoch": 1.562431290157086, + "grad_norm": 38.554996490478516, + "learning_rate": 1.737325665586712e-06, + "loss": 0.0738, + "num_input_tokens_seen": 43112992, + "step": 63955 + }, + { + "epoch": 1.5625534409889332, + "grad_norm": 0.032150134444236755, + "learning_rate": 1.7372680546926333e-06, + "loss": 0.0492, + "num_input_tokens_seen": 43116064, + "step": 63960 + }, + { + "epoch": 1.5626755918207804, + "grad_norm": 106.1802978515625, + "learning_rate": 1.737210438436943e-06, + "loss": 0.0771, + "num_input_tokens_seen": 43119072, + "step": 63965 + }, + { + "epoch": 1.5627977426526276, + "grad_norm": 11.328579902648926, + "learning_rate": 1.7371528168200603e-06, + "loss": 0.0857, + "num_input_tokens_seen": 43122528, + "step": 63970 + }, + { + "epoch": 1.5629198934844746, + "grad_norm": 0.25265300273895264, + "learning_rate": 1.7370951898424036e-06, + "loss": 0.0913, + "num_input_tokens_seen": 43126048, + "step": 63975 + }, + { + "epoch": 1.5630420443163218, + "grad_norm": 0.15176236629486084, + "learning_rate": 1.7370375575043927e-06, + "loss": 0.0444, + "num_input_tokens_seen": 43129376, + "step": 63980 + }, + { + "epoch": 1.563164195148169, + "grad_norm": 0.054856766015291214, + "learning_rate": 1.7369799198064463e-06, + "loss": 0.0455, + "num_input_tokens_seen": 43132704, + "step": 63985 + }, + { + "epoch": 1.5632863459800161, + "grad_norm": 36.12868118286133, + "learning_rate": 1.736922276748984e-06, + "loss": 0.1218, + "num_input_tokens_seen": 43135968, + "step": 63990 + }, + { + "epoch": 1.5634084968118633, + "grad_norm": 0.5585662722587585, + "learning_rate": 1.7368646283324245e-06, + "loss": 0.0014, + "num_input_tokens_seen": 43139104, + "step": 63995 + }, + { + "epoch": 1.5635306476437103, + "grad_norm": 0.11328171193599701, + "learning_rate": 1.7368069745571869e-06, + "loss": 0.1232, + "num_input_tokens_seen": 43142304, + "step": 64000 + }, + { + "epoch": 1.5636527984755575, + "grad_norm": 39.63161849975586, + "learning_rate": 1.7367493154236913e-06, + "loss": 0.1279, + "num_input_tokens_seen": 43145312, + "step": 64005 + }, + { + "epoch": 1.5637749493074047, + "grad_norm": 107.07310485839844, + "learning_rate": 1.736691650932356e-06, + "loss": 0.1902, + "num_input_tokens_seen": 43148384, + "step": 64010 + }, + { + "epoch": 1.5638971001392519, + "grad_norm": 0.07622405141592026, + "learning_rate": 1.7366339810836012e-06, + "loss": 0.0904, + "num_input_tokens_seen": 43151392, + "step": 64015 + }, + { + "epoch": 1.564019250971099, + "grad_norm": 0.05828621983528137, + "learning_rate": 1.736576305877846e-06, + "loss": 0.0997, + "num_input_tokens_seen": 43154592, + "step": 64020 + }, + { + "epoch": 1.5641414018029463, + "grad_norm": 32.318519592285156, + "learning_rate": 1.7365186253155097e-06, + "loss": 0.2109, + "num_input_tokens_seen": 43157920, + "step": 64025 + }, + { + "epoch": 1.5642635526347934, + "grad_norm": 17.8074893951416, + "learning_rate": 1.736460939397012e-06, + "loss": 0.0488, + "num_input_tokens_seen": 43161568, + "step": 64030 + }, + { + "epoch": 1.5643857034666406, + "grad_norm": 18.10813331604004, + "learning_rate": 1.736403248122772e-06, + "loss": 0.1417, + "num_input_tokens_seen": 43164832, + "step": 64035 + }, + { + "epoch": 1.5645078542984878, + "grad_norm": 8.896696090698242, + "learning_rate": 1.7363455514932097e-06, + "loss": 0.1029, + "num_input_tokens_seen": 43168160, + "step": 64040 + }, + { + "epoch": 1.564630005130335, + "grad_norm": 34.16524887084961, + "learning_rate": 1.7362878495087446e-06, + "loss": 0.1021, + "num_input_tokens_seen": 43171424, + "step": 64045 + }, + { + "epoch": 1.5647521559621822, + "grad_norm": 36.849449157714844, + "learning_rate": 1.7362301421697963e-06, + "loss": 0.0037, + "num_input_tokens_seen": 43174304, + "step": 64050 + }, + { + "epoch": 1.5648743067940294, + "grad_norm": 30.30545425415039, + "learning_rate": 1.7361724294767839e-06, + "loss": 0.0322, + "num_input_tokens_seen": 43177952, + "step": 64055 + }, + { + "epoch": 1.5649964576258766, + "grad_norm": 0.8645069003105164, + "learning_rate": 1.7361147114301279e-06, + "loss": 0.1224, + "num_input_tokens_seen": 43181152, + "step": 64060 + }, + { + "epoch": 1.5651186084577235, + "grad_norm": 0.04241948202252388, + "learning_rate": 1.7360569880302478e-06, + "loss": 0.038, + "num_input_tokens_seen": 43184416, + "step": 64065 + }, + { + "epoch": 1.5652407592895707, + "grad_norm": 0.1801944077014923, + "learning_rate": 1.735999259277563e-06, + "loss": 0.0331, + "num_input_tokens_seen": 43187680, + "step": 64070 + }, + { + "epoch": 1.565362910121418, + "grad_norm": 0.2641149163246155, + "learning_rate": 1.7359415251724938e-06, + "loss": 0.107, + "num_input_tokens_seen": 43191264, + "step": 64075 + }, + { + "epoch": 1.5654850609532651, + "grad_norm": 0.15654908120632172, + "learning_rate": 1.73588378571546e-06, + "loss": 0.1043, + "num_input_tokens_seen": 43194336, + "step": 64080 + }, + { + "epoch": 1.5656072117851123, + "grad_norm": 0.35645878314971924, + "learning_rate": 1.7358260409068813e-06, + "loss": 0.0851, + "num_input_tokens_seen": 43197088, + "step": 64085 + }, + { + "epoch": 1.5657293626169593, + "grad_norm": 0.330046147108078, + "learning_rate": 1.7357682907471776e-06, + "loss": 0.0457, + "num_input_tokens_seen": 43200672, + "step": 64090 + }, + { + "epoch": 1.5658515134488065, + "grad_norm": 0.18050967156887054, + "learning_rate": 1.7357105352367692e-06, + "loss": 0.0474, + "num_input_tokens_seen": 43203872, + "step": 64095 + }, + { + "epoch": 1.5659736642806537, + "grad_norm": 0.2091437578201294, + "learning_rate": 1.7356527743760756e-06, + "loss": 0.0015, + "num_input_tokens_seen": 43207136, + "step": 64100 + }, + { + "epoch": 1.5660958151125008, + "grad_norm": 464.6640930175781, + "learning_rate": 1.7355950081655175e-06, + "loss": 0.0337, + "num_input_tokens_seen": 43210208, + "step": 64105 + }, + { + "epoch": 1.566217965944348, + "grad_norm": 73.33601379394531, + "learning_rate": 1.7355372366055145e-06, + "loss": 0.1464, + "num_input_tokens_seen": 43213792, + "step": 64110 + }, + { + "epoch": 1.5663401167761952, + "grad_norm": 0.3620937466621399, + "learning_rate": 1.7354794596964869e-06, + "loss": 0.1498, + "num_input_tokens_seen": 43216928, + "step": 64115 + }, + { + "epoch": 1.5664622676080424, + "grad_norm": 1.3183008432388306, + "learning_rate": 1.7354216774388549e-06, + "loss": 0.0867, + "num_input_tokens_seen": 43219872, + "step": 64120 + }, + { + "epoch": 1.5665844184398896, + "grad_norm": 21.02619743347168, + "learning_rate": 1.7353638898330384e-06, + "loss": 0.1274, + "num_input_tokens_seen": 43223264, + "step": 64125 + }, + { + "epoch": 1.5667065692717368, + "grad_norm": 1.7390692234039307, + "learning_rate": 1.7353060968794582e-06, + "loss": 0.0383, + "num_input_tokens_seen": 43226656, + "step": 64130 + }, + { + "epoch": 1.566828720103584, + "grad_norm": 0.12568193674087524, + "learning_rate": 1.735248298578534e-06, + "loss": 0.0377, + "num_input_tokens_seen": 43229728, + "step": 64135 + }, + { + "epoch": 1.5669508709354312, + "grad_norm": 27.419641494750977, + "learning_rate": 1.7351904949306867e-06, + "loss": 0.0776, + "num_input_tokens_seen": 43233056, + "step": 64140 + }, + { + "epoch": 1.5670730217672784, + "grad_norm": 1.1645451784133911, + "learning_rate": 1.7351326859363363e-06, + "loss": 0.0346, + "num_input_tokens_seen": 43236832, + "step": 64145 + }, + { + "epoch": 1.5671951725991256, + "grad_norm": 13.680094718933105, + "learning_rate": 1.7350748715959035e-06, + "loss": 0.1614, + "num_input_tokens_seen": 43240224, + "step": 64150 + }, + { + "epoch": 1.5673173234309725, + "grad_norm": 0.7623006105422974, + "learning_rate": 1.7350170519098079e-06, + "loss": 0.0474, + "num_input_tokens_seen": 43242848, + "step": 64155 + }, + { + "epoch": 1.5674394742628197, + "grad_norm": 42.0316276550293, + "learning_rate": 1.7349592268784712e-06, + "loss": 0.0847, + "num_input_tokens_seen": 43246240, + "step": 64160 + }, + { + "epoch": 1.567561625094667, + "grad_norm": 0.2583397328853607, + "learning_rate": 1.7349013965023129e-06, + "loss": 0.0447, + "num_input_tokens_seen": 43249824, + "step": 64165 + }, + { + "epoch": 1.567683775926514, + "grad_norm": 46.822105407714844, + "learning_rate": 1.7348435607817544e-06, + "loss": 0.1189, + "num_input_tokens_seen": 43253536, + "step": 64170 + }, + { + "epoch": 1.5678059267583613, + "grad_norm": 0.4105156660079956, + "learning_rate": 1.7347857197172155e-06, + "loss": 0.0759, + "num_input_tokens_seen": 43256992, + "step": 64175 + }, + { + "epoch": 1.5679280775902082, + "grad_norm": 32.56477355957031, + "learning_rate": 1.7347278733091174e-06, + "loss": 0.1785, + "num_input_tokens_seen": 43260256, + "step": 64180 + }, + { + "epoch": 1.5680502284220554, + "grad_norm": 31.76134490966797, + "learning_rate": 1.7346700215578808e-06, + "loss": 0.0056, + "num_input_tokens_seen": 43263456, + "step": 64185 + }, + { + "epoch": 1.5681723792539026, + "grad_norm": 27.205013275146484, + "learning_rate": 1.7346121644639258e-06, + "loss": 0.1561, + "num_input_tokens_seen": 43267168, + "step": 64190 + }, + { + "epoch": 1.5682945300857498, + "grad_norm": 57.560577392578125, + "learning_rate": 1.7345543020276735e-06, + "loss": 0.1017, + "num_input_tokens_seen": 43270560, + "step": 64195 + }, + { + "epoch": 1.568416680917597, + "grad_norm": 0.020825877785682678, + "learning_rate": 1.734496434249545e-06, + "loss": 0.168, + "num_input_tokens_seen": 43273504, + "step": 64200 + }, + { + "epoch": 1.5685388317494442, + "grad_norm": 0.14802296459674835, + "learning_rate": 1.734438561129961e-06, + "loss": 0.021, + "num_input_tokens_seen": 43276896, + "step": 64205 + }, + { + "epoch": 1.5686609825812914, + "grad_norm": 0.1316024214029312, + "learning_rate": 1.734380682669342e-06, + "loss": 0.0446, + "num_input_tokens_seen": 43280160, + "step": 64210 + }, + { + "epoch": 1.5687831334131386, + "grad_norm": 7.745893478393555, + "learning_rate": 1.734322798868109e-06, + "loss": 0.1164, + "num_input_tokens_seen": 43283232, + "step": 64215 + }, + { + "epoch": 1.5689052842449858, + "grad_norm": 73.67494201660156, + "learning_rate": 1.7342649097266837e-06, + "loss": 0.0437, + "num_input_tokens_seen": 43286688, + "step": 64220 + }, + { + "epoch": 1.569027435076833, + "grad_norm": 21.934650421142578, + "learning_rate": 1.734207015245486e-06, + "loss": 0.1511, + "num_input_tokens_seen": 43290080, + "step": 64225 + }, + { + "epoch": 1.5691495859086801, + "grad_norm": 0.7329850792884827, + "learning_rate": 1.7341491154249374e-06, + "loss": 0.0503, + "num_input_tokens_seen": 43293728, + "step": 64230 + }, + { + "epoch": 1.5692717367405273, + "grad_norm": 0.7669029235839844, + "learning_rate": 1.734091210265459e-06, + "loss": 0.0409, + "num_input_tokens_seen": 43297120, + "step": 64235 + }, + { + "epoch": 1.5693938875723745, + "grad_norm": 0.008779598399996758, + "learning_rate": 1.7340332997674722e-06, + "loss": 0.0017, + "num_input_tokens_seen": 43300064, + "step": 64240 + }, + { + "epoch": 1.5695160384042215, + "grad_norm": 0.07726084440946579, + "learning_rate": 1.7339753839313972e-06, + "loss": 0.1356, + "num_input_tokens_seen": 43303328, + "step": 64245 + }, + { + "epoch": 1.5696381892360687, + "grad_norm": 26.37903594970703, + "learning_rate": 1.7339174627576564e-06, + "loss": 0.0647, + "num_input_tokens_seen": 43306272, + "step": 64250 + }, + { + "epoch": 1.5697603400679159, + "grad_norm": 0.27737849950790405, + "learning_rate": 1.7338595362466702e-06, + "loss": 0.1772, + "num_input_tokens_seen": 43309472, + "step": 64255 + }, + { + "epoch": 1.569882490899763, + "grad_norm": 0.08733111619949341, + "learning_rate": 1.73380160439886e-06, + "loss": 0.0525, + "num_input_tokens_seen": 43312608, + "step": 64260 + }, + { + "epoch": 1.57000464173161, + "grad_norm": 0.07386315613985062, + "learning_rate": 1.7337436672146472e-06, + "loss": 0.0366, + "num_input_tokens_seen": 43316000, + "step": 64265 + }, + { + "epoch": 1.5701267925634572, + "grad_norm": 0.4418039321899414, + "learning_rate": 1.7336857246944532e-06, + "loss": 0.074, + "num_input_tokens_seen": 43318752, + "step": 64270 + }, + { + "epoch": 1.5702489433953044, + "grad_norm": 0.2571321725845337, + "learning_rate": 1.7336277768386992e-06, + "loss": 0.0944, + "num_input_tokens_seen": 43321760, + "step": 64275 + }, + { + "epoch": 1.5703710942271516, + "grad_norm": 0.031792718917131424, + "learning_rate": 1.7335698236478065e-06, + "loss": 0.1143, + "num_input_tokens_seen": 43324896, + "step": 64280 + }, + { + "epoch": 1.5704932450589988, + "grad_norm": 17.855899810791016, + "learning_rate": 1.733511865122197e-06, + "loss": 0.0621, + "num_input_tokens_seen": 43328480, + "step": 64285 + }, + { + "epoch": 1.570615395890846, + "grad_norm": 0.48485782742500305, + "learning_rate": 1.7334539012622918e-06, + "loss": 0.0431, + "num_input_tokens_seen": 43331680, + "step": 64290 + }, + { + "epoch": 1.5707375467226932, + "grad_norm": 0.012995784170925617, + "learning_rate": 1.7333959320685125e-06, + "loss": 0.2625, + "num_input_tokens_seen": 43334880, + "step": 64295 + }, + { + "epoch": 1.5708596975545404, + "grad_norm": 125.5101547241211, + "learning_rate": 1.7333379575412809e-06, + "loss": 0.0224, + "num_input_tokens_seen": 43338208, + "step": 64300 + }, + { + "epoch": 1.5709818483863875, + "grad_norm": 1.2389947175979614, + "learning_rate": 1.7332799776810184e-06, + "loss": 0.0158, + "num_input_tokens_seen": 43341280, + "step": 64305 + }, + { + "epoch": 1.5711039992182347, + "grad_norm": 20.650035858154297, + "learning_rate": 1.7332219924881465e-06, + "loss": 0.1509, + "num_input_tokens_seen": 43344864, + "step": 64310 + }, + { + "epoch": 1.571226150050082, + "grad_norm": 0.08857346326112747, + "learning_rate": 1.7331640019630874e-06, + "loss": 0.0489, + "num_input_tokens_seen": 43348192, + "step": 64315 + }, + { + "epoch": 1.5713483008819291, + "grad_norm": 0.0540444515645504, + "learning_rate": 1.733106006106262e-06, + "loss": 0.1044, + "num_input_tokens_seen": 43351776, + "step": 64320 + }, + { + "epoch": 1.5714704517137763, + "grad_norm": 160.40611267089844, + "learning_rate": 1.7330480049180927e-06, + "loss": 0.1271, + "num_input_tokens_seen": 43355744, + "step": 64325 + }, + { + "epoch": 1.5715926025456235, + "grad_norm": 0.03458704426884651, + "learning_rate": 1.7329899983990013e-06, + "loss": 0.0984, + "num_input_tokens_seen": 43359392, + "step": 64330 + }, + { + "epoch": 1.5717147533774705, + "grad_norm": 34.95131301879883, + "learning_rate": 1.7329319865494094e-06, + "loss": 0.1279, + "num_input_tokens_seen": 43362912, + "step": 64335 + }, + { + "epoch": 1.5718369042093177, + "grad_norm": 0.5489761829376221, + "learning_rate": 1.7328739693697389e-06, + "loss": 0.0015, + "num_input_tokens_seen": 43366048, + "step": 64340 + }, + { + "epoch": 1.5719590550411648, + "grad_norm": 0.08397287130355835, + "learning_rate": 1.7328159468604118e-06, + "loss": 0.062, + "num_input_tokens_seen": 43368992, + "step": 64345 + }, + { + "epoch": 1.572081205873012, + "grad_norm": 38.11424255371094, + "learning_rate": 1.73275791902185e-06, + "loss": 0.0844, + "num_input_tokens_seen": 43372576, + "step": 64350 + }, + { + "epoch": 1.572203356704859, + "grad_norm": 0.03804247826337814, + "learning_rate": 1.7326998858544757e-06, + "loss": 0.0953, + "num_input_tokens_seen": 43375968, + "step": 64355 + }, + { + "epoch": 1.5723255075367062, + "grad_norm": 9.633445739746094, + "learning_rate": 1.7326418473587108e-06, + "loss": 0.0172, + "num_input_tokens_seen": 43379040, + "step": 64360 + }, + { + "epoch": 1.5724476583685534, + "grad_norm": 22.06827735900879, + "learning_rate": 1.732583803534977e-06, + "loss": 0.128, + "num_input_tokens_seen": 43382496, + "step": 64365 + }, + { + "epoch": 1.5725698092004006, + "grad_norm": 88.87641143798828, + "learning_rate": 1.732525754383697e-06, + "loss": 0.0183, + "num_input_tokens_seen": 43385696, + "step": 64370 + }, + { + "epoch": 1.5726919600322478, + "grad_norm": 18.26456642150879, + "learning_rate": 1.7324676999052925e-06, + "loss": 0.0768, + "num_input_tokens_seen": 43389472, + "step": 64375 + }, + { + "epoch": 1.572814110864095, + "grad_norm": 0.2304680347442627, + "learning_rate": 1.7324096401001862e-06, + "loss": 0.0202, + "num_input_tokens_seen": 43393120, + "step": 64380 + }, + { + "epoch": 1.5729362616959421, + "grad_norm": 0.14339575171470642, + "learning_rate": 1.7323515749687997e-06, + "loss": 0.0321, + "num_input_tokens_seen": 43396128, + "step": 64385 + }, + { + "epoch": 1.5730584125277893, + "grad_norm": 0.18540513515472412, + "learning_rate": 1.7322935045115557e-06, + "loss": 0.0721, + "num_input_tokens_seen": 43399840, + "step": 64390 + }, + { + "epoch": 1.5731805633596365, + "grad_norm": 9.878387451171875, + "learning_rate": 1.732235428728876e-06, + "loss": 0.0916, + "num_input_tokens_seen": 43402912, + "step": 64395 + }, + { + "epoch": 1.5733027141914837, + "grad_norm": 1.50212824344635, + "learning_rate": 1.732177347621184e-06, + "loss": 0.18, + "num_input_tokens_seen": 43405856, + "step": 64400 + }, + { + "epoch": 1.573424865023331, + "grad_norm": 1.5309032201766968, + "learning_rate": 1.7321192611889008e-06, + "loss": 0.0016, + "num_input_tokens_seen": 43409248, + "step": 64405 + }, + { + "epoch": 1.573547015855178, + "grad_norm": 26.32704734802246, + "learning_rate": 1.7320611694324497e-06, + "loss": 0.0976, + "num_input_tokens_seen": 43413216, + "step": 64410 + }, + { + "epoch": 1.5736691666870253, + "grad_norm": 2.3686509132385254, + "learning_rate": 1.7320030723522527e-06, + "loss": 0.0812, + "num_input_tokens_seen": 43416352, + "step": 64415 + }, + { + "epoch": 1.5737913175188725, + "grad_norm": 0.5000460147857666, + "learning_rate": 1.7319449699487327e-06, + "loss": 0.0454, + "num_input_tokens_seen": 43419488, + "step": 64420 + }, + { + "epoch": 1.5739134683507194, + "grad_norm": 25.742910385131836, + "learning_rate": 1.731886862222312e-06, + "loss": 0.1577, + "num_input_tokens_seen": 43423456, + "step": 64425 + }, + { + "epoch": 1.5740356191825666, + "grad_norm": 0.22029350697994232, + "learning_rate": 1.7318287491734131e-06, + "loss": 0.0902, + "num_input_tokens_seen": 43427040, + "step": 64430 + }, + { + "epoch": 1.5741577700144138, + "grad_norm": 0.23451527953147888, + "learning_rate": 1.7317706308024587e-06, + "loss": 0.2328, + "num_input_tokens_seen": 43430432, + "step": 64435 + }, + { + "epoch": 1.574279920846261, + "grad_norm": 1.194338321685791, + "learning_rate": 1.7317125071098712e-06, + "loss": 0.006, + "num_input_tokens_seen": 43433952, + "step": 64440 + }, + { + "epoch": 1.574402071678108, + "grad_norm": 0.13677367568016052, + "learning_rate": 1.731654378096074e-06, + "loss": 0.1223, + "num_input_tokens_seen": 43436896, + "step": 64445 + }, + { + "epoch": 1.5745242225099552, + "grad_norm": 1.720357894897461, + "learning_rate": 1.731596243761489e-06, + "loss": 0.0534, + "num_input_tokens_seen": 43440096, + "step": 64450 + }, + { + "epoch": 1.5746463733418024, + "grad_norm": 0.05810659006237984, + "learning_rate": 1.7315381041065396e-06, + "loss": 0.0694, + "num_input_tokens_seen": 43443424, + "step": 64455 + }, + { + "epoch": 1.5747685241736495, + "grad_norm": 18.547910690307617, + "learning_rate": 1.7314799591316483e-06, + "loss": 0.0658, + "num_input_tokens_seen": 43446560, + "step": 64460 + }, + { + "epoch": 1.5748906750054967, + "grad_norm": 24.63653564453125, + "learning_rate": 1.7314218088372378e-06, + "loss": 0.1655, + "num_input_tokens_seen": 43450336, + "step": 64465 + }, + { + "epoch": 1.575012825837344, + "grad_norm": 0.2034972906112671, + "learning_rate": 1.7313636532237315e-06, + "loss": 0.1515, + "num_input_tokens_seen": 43453984, + "step": 64470 + }, + { + "epoch": 1.5751349766691911, + "grad_norm": 12.121557235717773, + "learning_rate": 1.7313054922915518e-06, + "loss": 0.0834, + "num_input_tokens_seen": 43457248, + "step": 64475 + }, + { + "epoch": 1.5752571275010383, + "grad_norm": 0.231455460190773, + "learning_rate": 1.7312473260411217e-06, + "loss": 0.0865, + "num_input_tokens_seen": 43460384, + "step": 64480 + }, + { + "epoch": 1.5753792783328855, + "grad_norm": 26.974008560180664, + "learning_rate": 1.7311891544728645e-06, + "loss": 0.146, + "num_input_tokens_seen": 43463392, + "step": 64485 + }, + { + "epoch": 1.5755014291647327, + "grad_norm": 9.78650188446045, + "learning_rate": 1.7311309775872031e-06, + "loss": 0.0538, + "num_input_tokens_seen": 43466528, + "step": 64490 + }, + { + "epoch": 1.5756235799965799, + "grad_norm": 0.1319877803325653, + "learning_rate": 1.7310727953845607e-06, + "loss": 0.1977, + "num_input_tokens_seen": 43469984, + "step": 64495 + }, + { + "epoch": 1.575745730828427, + "grad_norm": 91.83895874023438, + "learning_rate": 1.7310146078653602e-06, + "loss": 0.0642, + "num_input_tokens_seen": 43473440, + "step": 64500 + }, + { + "epoch": 1.5758678816602743, + "grad_norm": 0.08182687312364578, + "learning_rate": 1.7309564150300248e-06, + "loss": 0.0194, + "num_input_tokens_seen": 43477024, + "step": 64505 + }, + { + "epoch": 1.5759900324921212, + "grad_norm": 14.08334732055664, + "learning_rate": 1.7308982168789779e-06, + "loss": 0.0588, + "num_input_tokens_seen": 43480672, + "step": 64510 + }, + { + "epoch": 1.5761121833239684, + "grad_norm": 11.436392784118652, + "learning_rate": 1.7308400134126427e-06, + "loss": 0.1166, + "num_input_tokens_seen": 43484128, + "step": 64515 + }, + { + "epoch": 1.5762343341558156, + "grad_norm": 17.357013702392578, + "learning_rate": 1.730781804631442e-06, + "loss": 0.0807, + "num_input_tokens_seen": 43487520, + "step": 64520 + }, + { + "epoch": 1.5763564849876628, + "grad_norm": 15.593206405639648, + "learning_rate": 1.7307235905357996e-06, + "loss": 0.1008, + "num_input_tokens_seen": 43491232, + "step": 64525 + }, + { + "epoch": 1.57647863581951, + "grad_norm": 3.3694682121276855, + "learning_rate": 1.7306653711261387e-06, + "loss": 0.1098, + "num_input_tokens_seen": 43494240, + "step": 64530 + }, + { + "epoch": 1.576600786651357, + "grad_norm": 0.666233479976654, + "learning_rate": 1.7306071464028826e-06, + "loss": 0.0432, + "num_input_tokens_seen": 43497632, + "step": 64535 + }, + { + "epoch": 1.5767229374832041, + "grad_norm": 18.11580467224121, + "learning_rate": 1.730548916366455e-06, + "loss": 0.0777, + "num_input_tokens_seen": 43500896, + "step": 64540 + }, + { + "epoch": 1.5768450883150513, + "grad_norm": 0.9629843235015869, + "learning_rate": 1.730490681017279e-06, + "loss": 0.0021, + "num_input_tokens_seen": 43504288, + "step": 64545 + }, + { + "epoch": 1.5769672391468985, + "grad_norm": 15.17105484008789, + "learning_rate": 1.7304324403557783e-06, + "loss": 0.0614, + "num_input_tokens_seen": 43508192, + "step": 64550 + }, + { + "epoch": 1.5770893899787457, + "grad_norm": 65.02957916259766, + "learning_rate": 1.7303741943823767e-06, + "loss": 0.1901, + "num_input_tokens_seen": 43511776, + "step": 64555 + }, + { + "epoch": 1.577211540810593, + "grad_norm": 0.9925330877304077, + "learning_rate": 1.7303159430974974e-06, + "loss": 0.0285, + "num_input_tokens_seen": 43514976, + "step": 64560 + }, + { + "epoch": 1.57733369164244, + "grad_norm": 0.08024363964796066, + "learning_rate": 1.7302576865015642e-06, + "loss": 0.1113, + "num_input_tokens_seen": 43518432, + "step": 64565 + }, + { + "epoch": 1.5774558424742873, + "grad_norm": 0.06430874764919281, + "learning_rate": 1.7301994245950004e-06, + "loss": 0.0049, + "num_input_tokens_seen": 43522080, + "step": 64570 + }, + { + "epoch": 1.5775779933061345, + "grad_norm": 28.217370986938477, + "learning_rate": 1.7301411573782301e-06, + "loss": 0.0845, + "num_input_tokens_seen": 43525344, + "step": 64575 + }, + { + "epoch": 1.5777001441379817, + "grad_norm": 0.19830197095870972, + "learning_rate": 1.7300828848516771e-06, + "loss": 0.0425, + "num_input_tokens_seen": 43529248, + "step": 64580 + }, + { + "epoch": 1.5778222949698288, + "grad_norm": 0.25371474027633667, + "learning_rate": 1.730024607015765e-06, + "loss": 0.1888, + "num_input_tokens_seen": 43532384, + "step": 64585 + }, + { + "epoch": 1.577944445801676, + "grad_norm": 21.84136962890625, + "learning_rate": 1.7299663238709172e-06, + "loss": 0.0429, + "num_input_tokens_seen": 43535776, + "step": 64590 + }, + { + "epoch": 1.5780665966335232, + "grad_norm": 0.4660642147064209, + "learning_rate": 1.7299080354175584e-06, + "loss": 0.0589, + "num_input_tokens_seen": 43539296, + "step": 64595 + }, + { + "epoch": 1.5781887474653702, + "grad_norm": 12.314225196838379, + "learning_rate": 1.7298497416561118e-06, + "loss": 0.0614, + "num_input_tokens_seen": 43542496, + "step": 64600 + }, + { + "epoch": 1.5783108982972174, + "grad_norm": 16.878976821899414, + "learning_rate": 1.7297914425870017e-06, + "loss": 0.0759, + "num_input_tokens_seen": 43545952, + "step": 64605 + }, + { + "epoch": 1.5784330491290646, + "grad_norm": 0.6693881750106812, + "learning_rate": 1.7297331382106517e-06, + "loss": 0.057, + "num_input_tokens_seen": 43549088, + "step": 64610 + }, + { + "epoch": 1.5785551999609118, + "grad_norm": 0.040294088423252106, + "learning_rate": 1.7296748285274863e-06, + "loss": 0.0591, + "num_input_tokens_seen": 43552288, + "step": 64615 + }, + { + "epoch": 1.578677350792759, + "grad_norm": 16.48003578186035, + "learning_rate": 1.7296165135379292e-06, + "loss": 0.131, + "num_input_tokens_seen": 43555680, + "step": 64620 + }, + { + "epoch": 1.578799501624606, + "grad_norm": 1.05326509475708, + "learning_rate": 1.7295581932424045e-06, + "loss": 0.0414, + "num_input_tokens_seen": 43559968, + "step": 64625 + }, + { + "epoch": 1.578921652456453, + "grad_norm": 0.1204957589507103, + "learning_rate": 1.7294998676413367e-06, + "loss": 0.0777, + "num_input_tokens_seen": 43563360, + "step": 64630 + }, + { + "epoch": 1.5790438032883003, + "grad_norm": 17.75843048095703, + "learning_rate": 1.7294415367351492e-06, + "loss": 0.0833, + "num_input_tokens_seen": 43566880, + "step": 64635 + }, + { + "epoch": 1.5791659541201475, + "grad_norm": 0.19816263020038605, + "learning_rate": 1.7293832005242668e-06, + "loss": 0.0345, + "num_input_tokens_seen": 43570272, + "step": 64640 + }, + { + "epoch": 1.5792881049519947, + "grad_norm": 0.08913075178861618, + "learning_rate": 1.7293248590091138e-06, + "loss": 0.0457, + "num_input_tokens_seen": 43573536, + "step": 64645 + }, + { + "epoch": 1.5794102557838419, + "grad_norm": 12.546802520751953, + "learning_rate": 1.7292665121901142e-06, + "loss": 0.1409, + "num_input_tokens_seen": 43576672, + "step": 64650 + }, + { + "epoch": 1.579532406615689, + "grad_norm": 34.74605941772461, + "learning_rate": 1.7292081600676922e-06, + "loss": 0.0689, + "num_input_tokens_seen": 43580192, + "step": 64655 + }, + { + "epoch": 1.5796545574475362, + "grad_norm": 0.2504430413246155, + "learning_rate": 1.7291498026422724e-06, + "loss": 0.0539, + "num_input_tokens_seen": 43583584, + "step": 64660 + }, + { + "epoch": 1.5797767082793834, + "grad_norm": 12.61136245727539, + "learning_rate": 1.7290914399142792e-06, + "loss": 0.238, + "num_input_tokens_seen": 43587488, + "step": 64665 + }, + { + "epoch": 1.5798988591112306, + "grad_norm": 18.86175537109375, + "learning_rate": 1.729033071884137e-06, + "loss": 0.0774, + "num_input_tokens_seen": 43590432, + "step": 64670 + }, + { + "epoch": 1.5800210099430778, + "grad_norm": 39.11001968383789, + "learning_rate": 1.72897469855227e-06, + "loss": 0.0615, + "num_input_tokens_seen": 43593440, + "step": 64675 + }, + { + "epoch": 1.580143160774925, + "grad_norm": 0.5635188221931458, + "learning_rate": 1.728916319919103e-06, + "loss": 0.0023, + "num_input_tokens_seen": 43596768, + "step": 64680 + }, + { + "epoch": 1.5802653116067722, + "grad_norm": 0.09311782568693161, + "learning_rate": 1.7288579359850606e-06, + "loss": 0.0012, + "num_input_tokens_seen": 43600288, + "step": 64685 + }, + { + "epoch": 1.5803874624386192, + "grad_norm": 0.1712763011455536, + "learning_rate": 1.728799546750567e-06, + "loss": 0.0733, + "num_input_tokens_seen": 43603936, + "step": 64690 + }, + { + "epoch": 1.5805096132704664, + "grad_norm": 0.16141662001609802, + "learning_rate": 1.728741152216047e-06, + "loss": 0.0009, + "num_input_tokens_seen": 43608096, + "step": 64695 + }, + { + "epoch": 1.5806317641023135, + "grad_norm": 11.50343132019043, + "learning_rate": 1.7286827523819256e-06, + "loss": 0.1137, + "num_input_tokens_seen": 43612000, + "step": 64700 + }, + { + "epoch": 1.5807539149341607, + "grad_norm": 17.6826229095459, + "learning_rate": 1.7286243472486274e-06, + "loss": 0.2981, + "num_input_tokens_seen": 43615840, + "step": 64705 + }, + { + "epoch": 1.580876065766008, + "grad_norm": 0.3164464831352234, + "learning_rate": 1.7285659368165766e-06, + "loss": 0.0367, + "num_input_tokens_seen": 43619296, + "step": 64710 + }, + { + "epoch": 1.5809982165978549, + "grad_norm": 25.950359344482422, + "learning_rate": 1.7285075210861986e-06, + "loss": 0.1975, + "num_input_tokens_seen": 43622496, + "step": 64715 + }, + { + "epoch": 1.581120367429702, + "grad_norm": 0.5272526144981384, + "learning_rate": 1.7284491000579178e-06, + "loss": 0.0018, + "num_input_tokens_seen": 43625952, + "step": 64720 + }, + { + "epoch": 1.5812425182615493, + "grad_norm": 0.36506155133247375, + "learning_rate": 1.7283906737321592e-06, + "loss": 0.0131, + "num_input_tokens_seen": 43629344, + "step": 64725 + }, + { + "epoch": 1.5813646690933965, + "grad_norm": 1.3668677806854248, + "learning_rate": 1.7283322421093478e-06, + "loss": 0.1193, + "num_input_tokens_seen": 43632736, + "step": 64730 + }, + { + "epoch": 1.5814868199252436, + "grad_norm": 0.4250573217868805, + "learning_rate": 1.7282738051899084e-06, + "loss": 0.0439, + "num_input_tokens_seen": 43635872, + "step": 64735 + }, + { + "epoch": 1.5816089707570908, + "grad_norm": 13.346234321594238, + "learning_rate": 1.728215362974266e-06, + "loss": 0.0818, + "num_input_tokens_seen": 43639136, + "step": 64740 + }, + { + "epoch": 1.581731121588938, + "grad_norm": 0.7282446622848511, + "learning_rate": 1.7281569154628456e-06, + "loss": 0.068, + "num_input_tokens_seen": 43642208, + "step": 64745 + }, + { + "epoch": 1.5818532724207852, + "grad_norm": 0.3492739796638489, + "learning_rate": 1.7280984626560725e-06, + "loss": 0.0308, + "num_input_tokens_seen": 43645920, + "step": 64750 + }, + { + "epoch": 1.5819754232526324, + "grad_norm": 0.5976834297180176, + "learning_rate": 1.728040004554371e-06, + "loss": 0.106, + "num_input_tokens_seen": 43649696, + "step": 64755 + }, + { + "epoch": 1.5820975740844796, + "grad_norm": 0.11738604307174683, + "learning_rate": 1.7279815411581674e-06, + "loss": 0.0527, + "num_input_tokens_seen": 43653408, + "step": 64760 + }, + { + "epoch": 1.5822197249163268, + "grad_norm": 10.756890296936035, + "learning_rate": 1.727923072467886e-06, + "loss": 0.143, + "num_input_tokens_seen": 43656480, + "step": 64765 + }, + { + "epoch": 1.582341875748174, + "grad_norm": 23.856487274169922, + "learning_rate": 1.727864598483952e-06, + "loss": 0.1607, + "num_input_tokens_seen": 43659936, + "step": 64770 + }, + { + "epoch": 1.5824640265800212, + "grad_norm": 0.2920784056186676, + "learning_rate": 1.7278061192067913e-06, + "loss": 0.0015, + "num_input_tokens_seen": 43663072, + "step": 64775 + }, + { + "epoch": 1.5825861774118681, + "grad_norm": 17.303253173828125, + "learning_rate": 1.7277476346368284e-06, + "loss": 0.1688, + "num_input_tokens_seen": 43666272, + "step": 64780 + }, + { + "epoch": 1.5827083282437153, + "grad_norm": 0.15339502692222595, + "learning_rate": 1.7276891447744888e-06, + "loss": 0.0072, + "num_input_tokens_seen": 43669408, + "step": 64785 + }, + { + "epoch": 1.5828304790755625, + "grad_norm": 0.4489547312259674, + "learning_rate": 1.7276306496201983e-06, + "loss": 0.0011, + "num_input_tokens_seen": 43672672, + "step": 64790 + }, + { + "epoch": 1.5829526299074097, + "grad_norm": 0.1969112902879715, + "learning_rate": 1.727572149174382e-06, + "loss": 0.1677, + "num_input_tokens_seen": 43675680, + "step": 64795 + }, + { + "epoch": 1.5830747807392567, + "grad_norm": 0.2483512908220291, + "learning_rate": 1.727513643437465e-06, + "loss": 0.0625, + "num_input_tokens_seen": 43678816, + "step": 64800 + }, + { + "epoch": 1.5831969315711039, + "grad_norm": 14.387808799743652, + "learning_rate": 1.7274551324098736e-06, + "loss": 0.0381, + "num_input_tokens_seen": 43682208, + "step": 64805 + }, + { + "epoch": 1.583319082402951, + "grad_norm": 0.1599772721529007, + "learning_rate": 1.7273966160920326e-06, + "loss": 0.1027, + "num_input_tokens_seen": 43685536, + "step": 64810 + }, + { + "epoch": 1.5834412332347982, + "grad_norm": 0.1974867284297943, + "learning_rate": 1.7273380944843678e-06, + "loss": 0.0851, + "num_input_tokens_seen": 43688800, + "step": 64815 + }, + { + "epoch": 1.5835633840666454, + "grad_norm": 13.60886001586914, + "learning_rate": 1.727279567587305e-06, + "loss": 0.048, + "num_input_tokens_seen": 43692000, + "step": 64820 + }, + { + "epoch": 1.5836855348984926, + "grad_norm": 1.444703459739685, + "learning_rate": 1.727221035401269e-06, + "loss": 0.0625, + "num_input_tokens_seen": 43695648, + "step": 64825 + }, + { + "epoch": 1.5838076857303398, + "grad_norm": 18.226131439208984, + "learning_rate": 1.7271624979266864e-06, + "loss": 0.0375, + "num_input_tokens_seen": 43698784, + "step": 64830 + }, + { + "epoch": 1.583929836562187, + "grad_norm": 51.74169158935547, + "learning_rate": 1.7271039551639826e-06, + "loss": 0.0834, + "num_input_tokens_seen": 43702112, + "step": 64835 + }, + { + "epoch": 1.5840519873940342, + "grad_norm": 0.7884621024131775, + "learning_rate": 1.727045407113583e-06, + "loss": 0.0592, + "num_input_tokens_seen": 43705696, + "step": 64840 + }, + { + "epoch": 1.5841741382258814, + "grad_norm": 0.3161929249763489, + "learning_rate": 1.7269868537759137e-06, + "loss": 0.0441, + "num_input_tokens_seen": 43709216, + "step": 64845 + }, + { + "epoch": 1.5842962890577286, + "grad_norm": 0.4939757287502289, + "learning_rate": 1.7269282951514006e-06, + "loss": 0.0017, + "num_input_tokens_seen": 43712544, + "step": 64850 + }, + { + "epoch": 1.5844184398895758, + "grad_norm": 14.162518501281738, + "learning_rate": 1.7268697312404694e-06, + "loss": 0.1, + "num_input_tokens_seen": 43716128, + "step": 64855 + }, + { + "epoch": 1.584540590721423, + "grad_norm": 0.12434427440166473, + "learning_rate": 1.726811162043546e-06, + "loss": 0.096, + "num_input_tokens_seen": 43719200, + "step": 64860 + }, + { + "epoch": 1.5846627415532701, + "grad_norm": 0.20762816071510315, + "learning_rate": 1.7267525875610562e-06, + "loss": 0.0464, + "num_input_tokens_seen": 43722912, + "step": 64865 + }, + { + "epoch": 1.584784892385117, + "grad_norm": 0.6017253994941711, + "learning_rate": 1.7266940077934262e-06, + "loss": 0.0985, + "num_input_tokens_seen": 43726240, + "step": 64870 + }, + { + "epoch": 1.5849070432169643, + "grad_norm": 3.5484423637390137, + "learning_rate": 1.726635422741082e-06, + "loss": 0.0261, + "num_input_tokens_seen": 43729504, + "step": 64875 + }, + { + "epoch": 1.5850291940488115, + "grad_norm": 10.419028282165527, + "learning_rate": 1.7265768324044495e-06, + "loss": 0.0808, + "num_input_tokens_seen": 43732960, + "step": 64880 + }, + { + "epoch": 1.5851513448806587, + "grad_norm": 2.4890594482421875, + "learning_rate": 1.7265182367839548e-06, + "loss": 0.0526, + "num_input_tokens_seen": 43736096, + "step": 64885 + }, + { + "epoch": 1.5852734957125056, + "grad_norm": 17.033557891845703, + "learning_rate": 1.7264596358800244e-06, + "loss": 0.1818, + "num_input_tokens_seen": 43739680, + "step": 64890 + }, + { + "epoch": 1.5853956465443528, + "grad_norm": 10.960592269897461, + "learning_rate": 1.7264010296930836e-06, + "loss": 0.1195, + "num_input_tokens_seen": 43743136, + "step": 64895 + }, + { + "epoch": 1.5855177973762, + "grad_norm": 0.5055883526802063, + "learning_rate": 1.7263424182235595e-06, + "loss": 0.1465, + "num_input_tokens_seen": 43746272, + "step": 64900 + }, + { + "epoch": 1.5856399482080472, + "grad_norm": 22.364749908447266, + "learning_rate": 1.7262838014718777e-06, + "loss": 0.1043, + "num_input_tokens_seen": 43749664, + "step": 64905 + }, + { + "epoch": 1.5857620990398944, + "grad_norm": 0.5056003928184509, + "learning_rate": 1.7262251794384648e-06, + "loss": 0.0007, + "num_input_tokens_seen": 43752800, + "step": 64910 + }, + { + "epoch": 1.5858842498717416, + "grad_norm": 0.07914532721042633, + "learning_rate": 1.7261665521237472e-06, + "loss": 0.0954, + "num_input_tokens_seen": 43756320, + "step": 64915 + }, + { + "epoch": 1.5860064007035888, + "grad_norm": 0.12225379794836044, + "learning_rate": 1.7261079195281512e-06, + "loss": 0.1966, + "num_input_tokens_seen": 43759968, + "step": 64920 + }, + { + "epoch": 1.586128551535436, + "grad_norm": 52.27254867553711, + "learning_rate": 1.7260492816521032e-06, + "loss": 0.0482, + "num_input_tokens_seen": 43763104, + "step": 64925 + }, + { + "epoch": 1.5862507023672832, + "grad_norm": 0.3266526758670807, + "learning_rate": 1.7259906384960293e-06, + "loss": 0.1171, + "num_input_tokens_seen": 43766368, + "step": 64930 + }, + { + "epoch": 1.5863728531991304, + "grad_norm": 0.229824960231781, + "learning_rate": 1.7259319900603562e-06, + "loss": 0.0357, + "num_input_tokens_seen": 43769376, + "step": 64935 + }, + { + "epoch": 1.5864950040309775, + "grad_norm": 0.10840941965579987, + "learning_rate": 1.7258733363455104e-06, + "loss": 0.0418, + "num_input_tokens_seen": 43772768, + "step": 64940 + }, + { + "epoch": 1.5866171548628247, + "grad_norm": 19.677263259887695, + "learning_rate": 1.7258146773519187e-06, + "loss": 0.1548, + "num_input_tokens_seen": 43775904, + "step": 64945 + }, + { + "epoch": 1.586739305694672, + "grad_norm": 0.18879550695419312, + "learning_rate": 1.725756013080007e-06, + "loss": 0.1765, + "num_input_tokens_seen": 43779040, + "step": 64950 + }, + { + "epoch": 1.586861456526519, + "grad_norm": 0.5741985440254211, + "learning_rate": 1.7256973435302027e-06, + "loss": 0.003, + "num_input_tokens_seen": 43782368, + "step": 64955 + }, + { + "epoch": 1.586983607358366, + "grad_norm": 0.23880359530448914, + "learning_rate": 1.725638668702932e-06, + "loss": 0.0608, + "num_input_tokens_seen": 43785504, + "step": 64960 + }, + { + "epoch": 1.5871057581902133, + "grad_norm": 0.7572619318962097, + "learning_rate": 1.725579988598622e-06, + "loss": 0.0023, + "num_input_tokens_seen": 43788640, + "step": 64965 + }, + { + "epoch": 1.5872279090220605, + "grad_norm": 21.19196319580078, + "learning_rate": 1.725521303217699e-06, + "loss": 0.1631, + "num_input_tokens_seen": 43792032, + "step": 64970 + }, + { + "epoch": 1.5873500598539076, + "grad_norm": 0.24510979652404785, + "learning_rate": 1.7254626125605898e-06, + "loss": 0.0757, + "num_input_tokens_seen": 43795552, + "step": 64975 + }, + { + "epoch": 1.5874722106857546, + "grad_norm": 10.422627449035645, + "learning_rate": 1.7254039166277213e-06, + "loss": 0.0851, + "num_input_tokens_seen": 43799840, + "step": 64980 + }, + { + "epoch": 1.5875943615176018, + "grad_norm": 21.904415130615234, + "learning_rate": 1.7253452154195206e-06, + "loss": 0.0863, + "num_input_tokens_seen": 43802720, + "step": 64985 + }, + { + "epoch": 1.587716512349449, + "grad_norm": 0.7586684823036194, + "learning_rate": 1.7252865089364144e-06, + "loss": 0.001, + "num_input_tokens_seen": 43807456, + "step": 64990 + }, + { + "epoch": 1.5878386631812962, + "grad_norm": 0.05671500787138939, + "learning_rate": 1.7252277971788298e-06, + "loss": 0.1012, + "num_input_tokens_seen": 43810656, + "step": 64995 + }, + { + "epoch": 1.5879608140131434, + "grad_norm": 0.18258820474147797, + "learning_rate": 1.7251690801471934e-06, + "loss": 0.0888, + "num_input_tokens_seen": 43813984, + "step": 65000 + }, + { + "epoch": 1.5880829648449906, + "grad_norm": 0.19848677515983582, + "learning_rate": 1.7251103578419323e-06, + "loss": 0.0607, + "num_input_tokens_seen": 43817504, + "step": 65005 + }, + { + "epoch": 1.5882051156768378, + "grad_norm": 0.6028345227241516, + "learning_rate": 1.725051630263474e-06, + "loss": 0.0944, + "num_input_tokens_seen": 43820896, + "step": 65010 + }, + { + "epoch": 1.588327266508685, + "grad_norm": 14.807291984558105, + "learning_rate": 1.7249928974122448e-06, + "loss": 0.1087, + "num_input_tokens_seen": 43823968, + "step": 65015 + }, + { + "epoch": 1.5884494173405321, + "grad_norm": 0.07424309104681015, + "learning_rate": 1.7249341592886721e-06, + "loss": 0.0211, + "num_input_tokens_seen": 43827552, + "step": 65020 + }, + { + "epoch": 1.5885715681723793, + "grad_norm": 45.41783142089844, + "learning_rate": 1.7248754158931838e-06, + "loss": 0.1244, + "num_input_tokens_seen": 43830816, + "step": 65025 + }, + { + "epoch": 1.5886937190042265, + "grad_norm": 70.42790985107422, + "learning_rate": 1.724816667226206e-06, + "loss": 0.1491, + "num_input_tokens_seen": 43834272, + "step": 65030 + }, + { + "epoch": 1.5888158698360737, + "grad_norm": 0.2781122922897339, + "learning_rate": 1.7247579132881668e-06, + "loss": 0.0019, + "num_input_tokens_seen": 43837664, + "step": 65035 + }, + { + "epoch": 1.588938020667921, + "grad_norm": 0.11927325278520584, + "learning_rate": 1.724699154079493e-06, + "loss": 0.1295, + "num_input_tokens_seen": 43840800, + "step": 65040 + }, + { + "epoch": 1.5890601714997679, + "grad_norm": 0.09100142866373062, + "learning_rate": 1.724640389600612e-06, + "loss": 0.1518, + "num_input_tokens_seen": 43844768, + "step": 65045 + }, + { + "epoch": 1.589182322331615, + "grad_norm": 0.07561935484409332, + "learning_rate": 1.7245816198519511e-06, + "loss": 0.0872, + "num_input_tokens_seen": 43848032, + "step": 65050 + }, + { + "epoch": 1.5893044731634622, + "grad_norm": 10.376869201660156, + "learning_rate": 1.7245228448339383e-06, + "loss": 0.0401, + "num_input_tokens_seen": 43851168, + "step": 65055 + }, + { + "epoch": 1.5894266239953094, + "grad_norm": 97.4819564819336, + "learning_rate": 1.724464064547e-06, + "loss": 0.1346, + "num_input_tokens_seen": 43854112, + "step": 65060 + }, + { + "epoch": 1.5895487748271566, + "grad_norm": 0.20881225168704987, + "learning_rate": 1.724405278991564e-06, + "loss": 0.0352, + "num_input_tokens_seen": 43857440, + "step": 65065 + }, + { + "epoch": 1.5896709256590036, + "grad_norm": 0.13099144399166107, + "learning_rate": 1.7243464881680583e-06, + "loss": 0.0423, + "num_input_tokens_seen": 43860768, + "step": 65070 + }, + { + "epoch": 1.5897930764908508, + "grad_norm": 21.404760360717773, + "learning_rate": 1.7242876920769102e-06, + "loss": 0.0743, + "num_input_tokens_seen": 43863776, + "step": 65075 + }, + { + "epoch": 1.589915227322698, + "grad_norm": 0.13627037405967712, + "learning_rate": 1.7242288907185469e-06, + "loss": 0.0569, + "num_input_tokens_seen": 43867424, + "step": 65080 + }, + { + "epoch": 1.5900373781545452, + "grad_norm": 10.283689498901367, + "learning_rate": 1.7241700840933964e-06, + "loss": 0.1019, + "num_input_tokens_seen": 43870688, + "step": 65085 + }, + { + "epoch": 1.5901595289863923, + "grad_norm": 0.56661057472229, + "learning_rate": 1.7241112722018864e-06, + "loss": 0.084, + "num_input_tokens_seen": 43873696, + "step": 65090 + }, + { + "epoch": 1.5902816798182395, + "grad_norm": 0.2931710481643677, + "learning_rate": 1.7240524550444442e-06, + "loss": 0.0284, + "num_input_tokens_seen": 43876704, + "step": 65095 + }, + { + "epoch": 1.5904038306500867, + "grad_norm": 79.61620330810547, + "learning_rate": 1.7239936326214978e-06, + "loss": 0.01, + "num_input_tokens_seen": 43879904, + "step": 65100 + }, + { + "epoch": 1.590525981481934, + "grad_norm": 1.0331748723983765, + "learning_rate": 1.7239348049334754e-06, + "loss": 0.0773, + "num_input_tokens_seen": 43883488, + "step": 65105 + }, + { + "epoch": 1.590648132313781, + "grad_norm": 5.423376083374023, + "learning_rate": 1.7238759719808043e-06, + "loss": 0.0556, + "num_input_tokens_seen": 43886560, + "step": 65110 + }, + { + "epoch": 1.5907702831456283, + "grad_norm": 8.856134414672852, + "learning_rate": 1.7238171337639122e-06, + "loss": 0.089, + "num_input_tokens_seen": 43889568, + "step": 65115 + }, + { + "epoch": 1.5908924339774755, + "grad_norm": 58.78102493286133, + "learning_rate": 1.7237582902832273e-06, + "loss": 0.1761, + "num_input_tokens_seen": 43893344, + "step": 65120 + }, + { + "epoch": 1.5910145848093227, + "grad_norm": 0.2834632992744446, + "learning_rate": 1.7236994415391774e-06, + "loss": 0.1145, + "num_input_tokens_seen": 43896544, + "step": 65125 + }, + { + "epoch": 1.5911367356411699, + "grad_norm": 33.866458892822266, + "learning_rate": 1.7236405875321904e-06, + "loss": 0.1264, + "num_input_tokens_seen": 43900064, + "step": 65130 + }, + { + "epoch": 1.5912588864730168, + "grad_norm": 0.4411328136920929, + "learning_rate": 1.7235817282626947e-06, + "loss": 0.0535, + "num_input_tokens_seen": 43903264, + "step": 65135 + }, + { + "epoch": 1.591381037304864, + "grad_norm": 0.19796854257583618, + "learning_rate": 1.7235228637311179e-06, + "loss": 0.0065, + "num_input_tokens_seen": 43906336, + "step": 65140 + }, + { + "epoch": 1.5915031881367112, + "grad_norm": 0.765709638595581, + "learning_rate": 1.723463993937888e-06, + "loss": 0.0023, + "num_input_tokens_seen": 43909664, + "step": 65145 + }, + { + "epoch": 1.5916253389685584, + "grad_norm": 4.744302749633789, + "learning_rate": 1.7234051188834338e-06, + "loss": 0.0792, + "num_input_tokens_seen": 43912608, + "step": 65150 + }, + { + "epoch": 1.5917474898004056, + "grad_norm": 0.09163492918014526, + "learning_rate": 1.7233462385681828e-06, + "loss": 0.0474, + "num_input_tokens_seen": 43915872, + "step": 65155 + }, + { + "epoch": 1.5918696406322526, + "grad_norm": 35.5812873840332, + "learning_rate": 1.723287352992563e-06, + "loss": 0.2247, + "num_input_tokens_seen": 43919072, + "step": 65160 + }, + { + "epoch": 1.5919917914640997, + "grad_norm": 12.814735412597656, + "learning_rate": 1.7232284621570037e-06, + "loss": 0.0595, + "num_input_tokens_seen": 43922272, + "step": 65165 + }, + { + "epoch": 1.592113942295947, + "grad_norm": 20.20494842529297, + "learning_rate": 1.7231695660619323e-06, + "loss": 0.0626, + "num_input_tokens_seen": 43925344, + "step": 65170 + }, + { + "epoch": 1.5922360931277941, + "grad_norm": 0.18822571635246277, + "learning_rate": 1.723110664707777e-06, + "loss": 0.0018, + "num_input_tokens_seen": 43928608, + "step": 65175 + }, + { + "epoch": 1.5923582439596413, + "grad_norm": 56.161495208740234, + "learning_rate": 1.7230517580949666e-06, + "loss": 0.1544, + "num_input_tokens_seen": 43931936, + "step": 65180 + }, + { + "epoch": 1.5924803947914885, + "grad_norm": 0.06047762185335159, + "learning_rate": 1.7229928462239296e-06, + "loss": 0.0791, + "num_input_tokens_seen": 43935456, + "step": 65185 + }, + { + "epoch": 1.5926025456233357, + "grad_norm": 0.25422996282577515, + "learning_rate": 1.7229339290950938e-06, + "loss": 0.0047, + "num_input_tokens_seen": 43938848, + "step": 65190 + }, + { + "epoch": 1.5927246964551829, + "grad_norm": 0.334971159696579, + "learning_rate": 1.7228750067088882e-06, + "loss": 0.0549, + "num_input_tokens_seen": 43942432, + "step": 65195 + }, + { + "epoch": 1.59284684728703, + "grad_norm": 0.35727155208587646, + "learning_rate": 1.7228160790657414e-06, + "loss": 0.1257, + "num_input_tokens_seen": 43945824, + "step": 65200 + }, + { + "epoch": 1.5929689981188773, + "grad_norm": 0.2726553976535797, + "learning_rate": 1.722757146166081e-06, + "loss": 0.008, + "num_input_tokens_seen": 43949344, + "step": 65205 + }, + { + "epoch": 1.5930911489507245, + "grad_norm": 20.234010696411133, + "learning_rate": 1.7226982080103367e-06, + "loss": 0.1465, + "num_input_tokens_seen": 43952992, + "step": 65210 + }, + { + "epoch": 1.5932132997825716, + "grad_norm": 1.7432730197906494, + "learning_rate": 1.7226392645989365e-06, + "loss": 0.1148, + "num_input_tokens_seen": 43956320, + "step": 65215 + }, + { + "epoch": 1.5933354506144188, + "grad_norm": 0.2908968925476074, + "learning_rate": 1.7225803159323094e-06, + "loss": 0.0726, + "num_input_tokens_seen": 43959328, + "step": 65220 + }, + { + "epoch": 1.5934576014462658, + "grad_norm": 0.14125913381576538, + "learning_rate": 1.7225213620108835e-06, + "loss": 0.0435, + "num_input_tokens_seen": 43962528, + "step": 65225 + }, + { + "epoch": 1.593579752278113, + "grad_norm": 121.56201934814453, + "learning_rate": 1.7224624028350885e-06, + "loss": 0.0547, + "num_input_tokens_seen": 43965600, + "step": 65230 + }, + { + "epoch": 1.5937019031099602, + "grad_norm": 0.18387913703918457, + "learning_rate": 1.722403438405352e-06, + "loss": 0.0571, + "num_input_tokens_seen": 43968992, + "step": 65235 + }, + { + "epoch": 1.5938240539418074, + "grad_norm": 0.11335892230272293, + "learning_rate": 1.7223444687221038e-06, + "loss": 0.0006, + "num_input_tokens_seen": 43972704, + "step": 65240 + }, + { + "epoch": 1.5939462047736546, + "grad_norm": 17.0925350189209, + "learning_rate": 1.722285493785772e-06, + "loss": 0.1366, + "num_input_tokens_seen": 43975904, + "step": 65245 + }, + { + "epoch": 1.5940683556055015, + "grad_norm": 14.52631664276123, + "learning_rate": 1.722226513596786e-06, + "loss": 0.1517, + "num_input_tokens_seen": 43979040, + "step": 65250 + }, + { + "epoch": 1.5941905064373487, + "grad_norm": 237.27642822265625, + "learning_rate": 1.7221675281555745e-06, + "loss": 0.0374, + "num_input_tokens_seen": 43982624, + "step": 65255 + }, + { + "epoch": 1.594312657269196, + "grad_norm": 1.3043992519378662, + "learning_rate": 1.7221085374625665e-06, + "loss": 0.0302, + "num_input_tokens_seen": 43985888, + "step": 65260 + }, + { + "epoch": 1.594434808101043, + "grad_norm": 33.467750549316406, + "learning_rate": 1.7220495415181913e-06, + "loss": 0.212, + "num_input_tokens_seen": 43989344, + "step": 65265 + }, + { + "epoch": 1.5945569589328903, + "grad_norm": 142.94471740722656, + "learning_rate": 1.721990540322877e-06, + "loss": 0.0446, + "num_input_tokens_seen": 43992608, + "step": 65270 + }, + { + "epoch": 1.5946791097647375, + "grad_norm": 0.17905016243457794, + "learning_rate": 1.7219315338770536e-06, + "loss": 0.0013, + "num_input_tokens_seen": 43995680, + "step": 65275 + }, + { + "epoch": 1.5948012605965847, + "grad_norm": 32.415428161621094, + "learning_rate": 1.7218725221811501e-06, + "loss": 0.086, + "num_input_tokens_seen": 43999264, + "step": 65280 + }, + { + "epoch": 1.5949234114284319, + "grad_norm": 23.203704833984375, + "learning_rate": 1.7218135052355954e-06, + "loss": 0.1713, + "num_input_tokens_seen": 44003552, + "step": 65285 + }, + { + "epoch": 1.595045562260279, + "grad_norm": 2.7013700008392334, + "learning_rate": 1.7217544830408187e-06, + "loss": 0.0031, + "num_input_tokens_seen": 44006688, + "step": 65290 + }, + { + "epoch": 1.5951677130921262, + "grad_norm": 0.369367390871048, + "learning_rate": 1.7216954555972492e-06, + "loss": 0.0326, + "num_input_tokens_seen": 44010336, + "step": 65295 + }, + { + "epoch": 1.5952898639239734, + "grad_norm": 0.4860530197620392, + "learning_rate": 1.7216364229053162e-06, + "loss": 0.0907, + "num_input_tokens_seen": 44013472, + "step": 65300 + }, + { + "epoch": 1.5954120147558206, + "grad_norm": 11.849100112915039, + "learning_rate": 1.721577384965449e-06, + "loss": 0.1265, + "num_input_tokens_seen": 44016928, + "step": 65305 + }, + { + "epoch": 1.5955341655876678, + "grad_norm": 20.93488121032715, + "learning_rate": 1.7215183417780771e-06, + "loss": 0.0761, + "num_input_tokens_seen": 44020512, + "step": 65310 + }, + { + "epoch": 1.5956563164195148, + "grad_norm": 23.495223999023438, + "learning_rate": 1.7214592933436298e-06, + "loss": 0.0912, + "num_input_tokens_seen": 44023840, + "step": 65315 + }, + { + "epoch": 1.595778467251362, + "grad_norm": 0.09012192487716675, + "learning_rate": 1.7214002396625365e-06, + "loss": 0.2207, + "num_input_tokens_seen": 44027296, + "step": 65320 + }, + { + "epoch": 1.5959006180832092, + "grad_norm": 14.036751747131348, + "learning_rate": 1.7213411807352265e-06, + "loss": 0.0872, + "num_input_tokens_seen": 44030496, + "step": 65325 + }, + { + "epoch": 1.5960227689150563, + "grad_norm": 1.248896837234497, + "learning_rate": 1.7212821165621295e-06, + "loss": 0.0273, + "num_input_tokens_seen": 44033952, + "step": 65330 + }, + { + "epoch": 1.5961449197469033, + "grad_norm": 2.6545727252960205, + "learning_rate": 1.7212230471436748e-06, + "loss": 0.0352, + "num_input_tokens_seen": 44037344, + "step": 65335 + }, + { + "epoch": 1.5962670705787505, + "grad_norm": 13.713151931762695, + "learning_rate": 1.7211639724802921e-06, + "loss": 0.0852, + "num_input_tokens_seen": 44040800, + "step": 65340 + }, + { + "epoch": 1.5963892214105977, + "grad_norm": 256.8730773925781, + "learning_rate": 1.7211048925724112e-06, + "loss": 0.084, + "num_input_tokens_seen": 44043744, + "step": 65345 + }, + { + "epoch": 1.5965113722424449, + "grad_norm": 0.1393524557352066, + "learning_rate": 1.7210458074204614e-06, + "loss": 0.0648, + "num_input_tokens_seen": 44047008, + "step": 65350 + }, + { + "epoch": 1.596633523074292, + "grad_norm": 80.3912124633789, + "learning_rate": 1.7209867170248726e-06, + "loss": 0.2229, + "num_input_tokens_seen": 44050208, + "step": 65355 + }, + { + "epoch": 1.5967556739061393, + "grad_norm": 0.04061346873641014, + "learning_rate": 1.7209276213860747e-06, + "loss": 0.0578, + "num_input_tokens_seen": 44053408, + "step": 65360 + }, + { + "epoch": 1.5968778247379865, + "grad_norm": 20.790264129638672, + "learning_rate": 1.7208685205044971e-06, + "loss": 0.0661, + "num_input_tokens_seen": 44056672, + "step": 65365 + }, + { + "epoch": 1.5969999755698336, + "grad_norm": 0.7041231989860535, + "learning_rate": 1.7208094143805695e-06, + "loss": 0.1116, + "num_input_tokens_seen": 44060256, + "step": 65370 + }, + { + "epoch": 1.5971221264016808, + "grad_norm": 1.3291122913360596, + "learning_rate": 1.7207503030147222e-06, + "loss": 0.1478, + "num_input_tokens_seen": 44063456, + "step": 65375 + }, + { + "epoch": 1.597244277233528, + "grad_norm": 2.3246588706970215, + "learning_rate": 1.7206911864073848e-06, + "loss": 0.0438, + "num_input_tokens_seen": 44066912, + "step": 65380 + }, + { + "epoch": 1.5973664280653752, + "grad_norm": 0.02847222425043583, + "learning_rate": 1.720632064558987e-06, + "loss": 0.0284, + "num_input_tokens_seen": 44070432, + "step": 65385 + }, + { + "epoch": 1.5974885788972224, + "grad_norm": 32.99223327636719, + "learning_rate": 1.7205729374699594e-06, + "loss": 0.0805, + "num_input_tokens_seen": 44073888, + "step": 65390 + }, + { + "epoch": 1.5976107297290696, + "grad_norm": 0.5882880091667175, + "learning_rate": 1.7205138051407312e-06, + "loss": 0.1458, + "num_input_tokens_seen": 44076896, + "step": 65395 + }, + { + "epoch": 1.5977328805609168, + "grad_norm": 20.05073356628418, + "learning_rate": 1.7204546675717333e-06, + "loss": 0.0646, + "num_input_tokens_seen": 44080224, + "step": 65400 + }, + { + "epoch": 1.5978550313927637, + "grad_norm": 0.05178692564368248, + "learning_rate": 1.720395524763395e-06, + "loss": 0.1099, + "num_input_tokens_seen": 44083808, + "step": 65405 + }, + { + "epoch": 1.597977182224611, + "grad_norm": 0.40675821900367737, + "learning_rate": 1.7203363767161468e-06, + "loss": 0.084, + "num_input_tokens_seen": 44086944, + "step": 65410 + }, + { + "epoch": 1.5980993330564581, + "grad_norm": 16.000343322753906, + "learning_rate": 1.7202772234304184e-06, + "loss": 0.0325, + "num_input_tokens_seen": 44089888, + "step": 65415 + }, + { + "epoch": 1.5982214838883053, + "grad_norm": 20.65907859802246, + "learning_rate": 1.7202180649066405e-06, + "loss": 0.1516, + "num_input_tokens_seen": 44093216, + "step": 65420 + }, + { + "epoch": 1.5983436347201523, + "grad_norm": 0.039159201085567474, + "learning_rate": 1.720158901145243e-06, + "loss": 0.0362, + "num_input_tokens_seen": 44097120, + "step": 65425 + }, + { + "epoch": 1.5984657855519995, + "grad_norm": 15.833292961120605, + "learning_rate": 1.7200997321466563e-06, + "loss": 0.1402, + "num_input_tokens_seen": 44100320, + "step": 65430 + }, + { + "epoch": 1.5985879363838467, + "grad_norm": 1.0264534950256348, + "learning_rate": 1.7200405579113108e-06, + "loss": 0.0018, + "num_input_tokens_seen": 44103456, + "step": 65435 + }, + { + "epoch": 1.5987100872156939, + "grad_norm": 0.41825219988822937, + "learning_rate": 1.7199813784396366e-06, + "loss": 0.0501, + "num_input_tokens_seen": 44106592, + "step": 65440 + }, + { + "epoch": 1.598832238047541, + "grad_norm": 0.1690363883972168, + "learning_rate": 1.7199221937320645e-06, + "loss": 0.0764, + "num_input_tokens_seen": 44109792, + "step": 65445 + }, + { + "epoch": 1.5989543888793882, + "grad_norm": 0.024017304182052612, + "learning_rate": 1.7198630037890243e-06, + "loss": 0.1109, + "num_input_tokens_seen": 44113248, + "step": 65450 + }, + { + "epoch": 1.5990765397112354, + "grad_norm": 26.482099533081055, + "learning_rate": 1.7198038086109467e-06, + "loss": 0.2167, + "num_input_tokens_seen": 44116640, + "step": 65455 + }, + { + "epoch": 1.5991986905430826, + "grad_norm": 0.3642503023147583, + "learning_rate": 1.7197446081982623e-06, + "loss": 0.0713, + "num_input_tokens_seen": 44120032, + "step": 65460 + }, + { + "epoch": 1.5993208413749298, + "grad_norm": 17.329883575439453, + "learning_rate": 1.719685402551401e-06, + "loss": 0.07, + "num_input_tokens_seen": 44123872, + "step": 65465 + }, + { + "epoch": 1.599442992206777, + "grad_norm": 473.49810791015625, + "learning_rate": 1.7196261916707947e-06, + "loss": 0.0373, + "num_input_tokens_seen": 44127136, + "step": 65470 + }, + { + "epoch": 1.5995651430386242, + "grad_norm": 0.20757238566875458, + "learning_rate": 1.7195669755568727e-06, + "loss": 0.0014, + "num_input_tokens_seen": 44130144, + "step": 65475 + }, + { + "epoch": 1.5996872938704714, + "grad_norm": 0.28598150610923767, + "learning_rate": 1.7195077542100663e-06, + "loss": 0.0558, + "num_input_tokens_seen": 44133024, + "step": 65480 + }, + { + "epoch": 1.5998094447023186, + "grad_norm": 0.23164817690849304, + "learning_rate": 1.7194485276308057e-06, + "loss": 0.1076, + "num_input_tokens_seen": 44136224, + "step": 65485 + }, + { + "epoch": 1.5999315955341658, + "grad_norm": 47.41874694824219, + "learning_rate": 1.7193892958195222e-06, + "loss": 0.1035, + "num_input_tokens_seen": 44139552, + "step": 65490 + }, + { + "epoch": 1.6000537463660127, + "grad_norm": 186.92962646484375, + "learning_rate": 1.719330058776646e-06, + "loss": 0.0076, + "num_input_tokens_seen": 44142816, + "step": 65495 + }, + { + "epoch": 1.60017589719786, + "grad_norm": 0.5157283544540405, + "learning_rate": 1.7192708165026084e-06, + "loss": 0.0379, + "num_input_tokens_seen": 44146080, + "step": 65500 + }, + { + "epoch": 1.600298048029707, + "grad_norm": 14.778525352478027, + "learning_rate": 1.7192115689978398e-06, + "loss": 0.1149, + "num_input_tokens_seen": 44149728, + "step": 65505 + }, + { + "epoch": 1.6004201988615543, + "grad_norm": 17.854949951171875, + "learning_rate": 1.7191523162627712e-06, + "loss": 0.1016, + "num_input_tokens_seen": 44153312, + "step": 65510 + }, + { + "epoch": 1.6005423496934013, + "grad_norm": 0.2640315592288971, + "learning_rate": 1.7190930582978335e-06, + "loss": 0.001, + "num_input_tokens_seen": 44156960, + "step": 65515 + }, + { + "epoch": 1.6006645005252484, + "grad_norm": 13.823732376098633, + "learning_rate": 1.7190337951034577e-06, + "loss": 0.1725, + "num_input_tokens_seen": 44160416, + "step": 65520 + }, + { + "epoch": 1.6007866513570956, + "grad_norm": 0.19923101365566254, + "learning_rate": 1.7189745266800748e-06, + "loss": 0.098, + "num_input_tokens_seen": 44163488, + "step": 65525 + }, + { + "epoch": 1.6009088021889428, + "grad_norm": 0.05441410467028618, + "learning_rate": 1.718915253028116e-06, + "loss": 0.1148, + "num_input_tokens_seen": 44166816, + "step": 65530 + }, + { + "epoch": 1.60103095302079, + "grad_norm": 0.20565907657146454, + "learning_rate": 1.7188559741480117e-06, + "loss": 0.0477, + "num_input_tokens_seen": 44170720, + "step": 65535 + }, + { + "epoch": 1.6011531038526372, + "grad_norm": 0.03282782807946205, + "learning_rate": 1.7187966900401936e-06, + "loss": 0.0029, + "num_input_tokens_seen": 44174112, + "step": 65540 + }, + { + "epoch": 1.6012752546844844, + "grad_norm": 0.2828992009162903, + "learning_rate": 1.7187374007050926e-06, + "loss": 0.0929, + "num_input_tokens_seen": 44177376, + "step": 65545 + }, + { + "epoch": 1.6013974055163316, + "grad_norm": 0.222542405128479, + "learning_rate": 1.7186781061431398e-06, + "loss": 0.0586, + "num_input_tokens_seen": 44180512, + "step": 65550 + }, + { + "epoch": 1.6015195563481788, + "grad_norm": 8.429993629455566, + "learning_rate": 1.7186188063547666e-06, + "loss": 0.1894, + "num_input_tokens_seen": 44184032, + "step": 65555 + }, + { + "epoch": 1.601641707180026, + "grad_norm": 13.292488098144531, + "learning_rate": 1.7185595013404044e-06, + "loss": 0.0665, + "num_input_tokens_seen": 44187168, + "step": 65560 + }, + { + "epoch": 1.6017638580118732, + "grad_norm": 1.3764312267303467, + "learning_rate": 1.718500191100484e-06, + "loss": 0.0818, + "num_input_tokens_seen": 44190304, + "step": 65565 + }, + { + "epoch": 1.6018860088437203, + "grad_norm": 8.841894149780273, + "learning_rate": 1.718440875635437e-06, + "loss": 0.1852, + "num_input_tokens_seen": 44193632, + "step": 65570 + }, + { + "epoch": 1.6020081596755675, + "grad_norm": 0.5281084775924683, + "learning_rate": 1.7183815549456946e-06, + "loss": 0.0989, + "num_input_tokens_seen": 44196960, + "step": 65575 + }, + { + "epoch": 1.6021303105074145, + "grad_norm": 18.164323806762695, + "learning_rate": 1.7183222290316883e-06, + "loss": 0.0721, + "num_input_tokens_seen": 44200288, + "step": 65580 + }, + { + "epoch": 1.6022524613392617, + "grad_norm": 24.348045349121094, + "learning_rate": 1.7182628978938498e-06, + "loss": 0.1025, + "num_input_tokens_seen": 44203616, + "step": 65585 + }, + { + "epoch": 1.6023746121711089, + "grad_norm": 50.18940734863281, + "learning_rate": 1.71820356153261e-06, + "loss": 0.0754, + "num_input_tokens_seen": 44206624, + "step": 65590 + }, + { + "epoch": 1.602496763002956, + "grad_norm": 36.11235809326172, + "learning_rate": 1.7181442199484009e-06, + "loss": 0.0047, + "num_input_tokens_seen": 44209888, + "step": 65595 + }, + { + "epoch": 1.6026189138348033, + "grad_norm": 20.994815826416016, + "learning_rate": 1.7180848731416542e-06, + "loss": 0.114, + "num_input_tokens_seen": 44214176, + "step": 65600 + }, + { + "epoch": 1.6027410646666502, + "grad_norm": 13.22746753692627, + "learning_rate": 1.7180255211128007e-06, + "loss": 0.0821, + "num_input_tokens_seen": 44217376, + "step": 65605 + }, + { + "epoch": 1.6028632154984974, + "grad_norm": 6.7888994216918945, + "learning_rate": 1.7179661638622726e-06, + "loss": 0.0539, + "num_input_tokens_seen": 44220512, + "step": 65610 + }, + { + "epoch": 1.6029853663303446, + "grad_norm": 11.641776084899902, + "learning_rate": 1.7179068013905014e-06, + "loss": 0.1417, + "num_input_tokens_seen": 44223840, + "step": 65615 + }, + { + "epoch": 1.6031075171621918, + "grad_norm": 19.492708206176758, + "learning_rate": 1.717847433697919e-06, + "loss": 0.0381, + "num_input_tokens_seen": 44227168, + "step": 65620 + }, + { + "epoch": 1.603229667994039, + "grad_norm": 0.5217481255531311, + "learning_rate": 1.7177880607849568e-06, + "loss": 0.1578, + "num_input_tokens_seen": 44230752, + "step": 65625 + }, + { + "epoch": 1.6033518188258862, + "grad_norm": 0.8416579365730286, + "learning_rate": 1.717728682652047e-06, + "loss": 0.09, + "num_input_tokens_seen": 44233888, + "step": 65630 + }, + { + "epoch": 1.6034739696577334, + "grad_norm": 8.32114315032959, + "learning_rate": 1.717669299299621e-06, + "loss": 0.004, + "num_input_tokens_seen": 44237280, + "step": 65635 + }, + { + "epoch": 1.6035961204895806, + "grad_norm": 0.1598159223794937, + "learning_rate": 1.7176099107281106e-06, + "loss": 0.1614, + "num_input_tokens_seen": 44240288, + "step": 65640 + }, + { + "epoch": 1.6037182713214277, + "grad_norm": 1.3760980367660522, + "learning_rate": 1.7175505169379483e-06, + "loss": 0.0475, + "num_input_tokens_seen": 44243936, + "step": 65645 + }, + { + "epoch": 1.603840422153275, + "grad_norm": 1.223433494567871, + "learning_rate": 1.7174911179295654e-06, + "loss": 0.0748, + "num_input_tokens_seen": 44247200, + "step": 65650 + }, + { + "epoch": 1.6039625729851221, + "grad_norm": 0.18195028603076935, + "learning_rate": 1.7174317137033944e-06, + "loss": 0.0416, + "num_input_tokens_seen": 44250720, + "step": 65655 + }, + { + "epoch": 1.6040847238169693, + "grad_norm": 0.07075879722833633, + "learning_rate": 1.7173723042598667e-06, + "loss": 0.0533, + "num_input_tokens_seen": 44253792, + "step": 65660 + }, + { + "epoch": 1.6042068746488165, + "grad_norm": 0.3297138214111328, + "learning_rate": 1.7173128895994148e-06, + "loss": 0.0099, + "num_input_tokens_seen": 44257184, + "step": 65665 + }, + { + "epoch": 1.6043290254806635, + "grad_norm": 1.093284010887146, + "learning_rate": 1.7172534697224708e-06, + "loss": 0.2448, + "num_input_tokens_seen": 44260640, + "step": 65670 + }, + { + "epoch": 1.6044511763125107, + "grad_norm": 0.09150218218564987, + "learning_rate": 1.7171940446294664e-06, + "loss": 0.0923, + "num_input_tokens_seen": 44264032, + "step": 65675 + }, + { + "epoch": 1.6045733271443579, + "grad_norm": 0.9866107106208801, + "learning_rate": 1.717134614320834e-06, + "loss": 0.0069, + "num_input_tokens_seen": 44267360, + "step": 65680 + }, + { + "epoch": 1.604695477976205, + "grad_norm": 0.14237858355045319, + "learning_rate": 1.717075178797006e-06, + "loss": 0.1055, + "num_input_tokens_seen": 44270368, + "step": 65685 + }, + { + "epoch": 1.6048176288080522, + "grad_norm": 0.20932254195213318, + "learning_rate": 1.7170157380584143e-06, + "loss": 0.0554, + "num_input_tokens_seen": 44273504, + "step": 65690 + }, + { + "epoch": 1.6049397796398992, + "grad_norm": 0.7047435641288757, + "learning_rate": 1.7169562921054913e-06, + "loss": 0.0332, + "num_input_tokens_seen": 44277024, + "step": 65695 + }, + { + "epoch": 1.6050619304717464, + "grad_norm": 46.69514465332031, + "learning_rate": 1.716896840938669e-06, + "loss": 0.0248, + "num_input_tokens_seen": 44280224, + "step": 65700 + }, + { + "epoch": 1.6051840813035936, + "grad_norm": 0.255687415599823, + "learning_rate": 1.7168373845583805e-06, + "loss": 0.0531, + "num_input_tokens_seen": 44283680, + "step": 65705 + }, + { + "epoch": 1.6053062321354408, + "grad_norm": 34.751155853271484, + "learning_rate": 1.7167779229650576e-06, + "loss": 0.0277, + "num_input_tokens_seen": 44287456, + "step": 65710 + }, + { + "epoch": 1.605428382967288, + "grad_norm": 0.03389814496040344, + "learning_rate": 1.7167184561591328e-06, + "loss": 0.0948, + "num_input_tokens_seen": 44291360, + "step": 65715 + }, + { + "epoch": 1.6055505337991351, + "grad_norm": 9.623665809631348, + "learning_rate": 1.7166589841410387e-06, + "loss": 0.1253, + "num_input_tokens_seen": 44294816, + "step": 65720 + }, + { + "epoch": 1.6056726846309823, + "grad_norm": 0.047234319150447845, + "learning_rate": 1.7165995069112077e-06, + "loss": 0.093, + "num_input_tokens_seen": 44298016, + "step": 65725 + }, + { + "epoch": 1.6057948354628295, + "grad_norm": 34.660728454589844, + "learning_rate": 1.7165400244700723e-06, + "loss": 0.0925, + "num_input_tokens_seen": 44301344, + "step": 65730 + }, + { + "epoch": 1.6059169862946767, + "grad_norm": 0.3465474843978882, + "learning_rate": 1.7164805368180652e-06, + "loss": 0.0401, + "num_input_tokens_seen": 44304992, + "step": 65735 + }, + { + "epoch": 1.606039137126524, + "grad_norm": 0.5020141005516052, + "learning_rate": 1.7164210439556187e-06, + "loss": 0.0368, + "num_input_tokens_seen": 44308192, + "step": 65740 + }, + { + "epoch": 1.606161287958371, + "grad_norm": 25.945310592651367, + "learning_rate": 1.716361545883166e-06, + "loss": 0.1424, + "num_input_tokens_seen": 44311712, + "step": 65745 + }, + { + "epoch": 1.6062834387902183, + "grad_norm": 0.5995503067970276, + "learning_rate": 1.7163020426011393e-06, + "loss": 0.1523, + "num_input_tokens_seen": 44315232, + "step": 65750 + }, + { + "epoch": 1.6064055896220655, + "grad_norm": 0.045718465000391006, + "learning_rate": 1.7162425341099715e-06, + "loss": 0.1506, + "num_input_tokens_seen": 44318560, + "step": 65755 + }, + { + "epoch": 1.6065277404539124, + "grad_norm": 0.055583883076906204, + "learning_rate": 1.7161830204100952e-06, + "loss": 0.0675, + "num_input_tokens_seen": 44322144, + "step": 65760 + }, + { + "epoch": 1.6066498912857596, + "grad_norm": 80.08256530761719, + "learning_rate": 1.7161235015019435e-06, + "loss": 0.1056, + "num_input_tokens_seen": 44325472, + "step": 65765 + }, + { + "epoch": 1.6067720421176068, + "grad_norm": 16.74420166015625, + "learning_rate": 1.7160639773859491e-06, + "loss": 0.1892, + "num_input_tokens_seen": 44328672, + "step": 65770 + }, + { + "epoch": 1.606894192949454, + "grad_norm": 38.66650390625, + "learning_rate": 1.7160044480625447e-06, + "loss": 0.1966, + "num_input_tokens_seen": 44332192, + "step": 65775 + }, + { + "epoch": 1.6070163437813012, + "grad_norm": 0.3365372121334076, + "learning_rate": 1.7159449135321636e-06, + "loss": 0.0466, + "num_input_tokens_seen": 44336352, + "step": 65780 + }, + { + "epoch": 1.6071384946131482, + "grad_norm": 0.8096862435340881, + "learning_rate": 1.7158853737952383e-06, + "loss": 0.092, + "num_input_tokens_seen": 44339616, + "step": 65785 + }, + { + "epoch": 1.6072606454449954, + "grad_norm": 25.179733276367188, + "learning_rate": 1.715825828852202e-06, + "loss": 0.1559, + "num_input_tokens_seen": 44342624, + "step": 65790 + }, + { + "epoch": 1.6073827962768426, + "grad_norm": 0.2534705102443695, + "learning_rate": 1.715766278703488e-06, + "loss": 0.0751, + "num_input_tokens_seen": 44345376, + "step": 65795 + }, + { + "epoch": 1.6075049471086897, + "grad_norm": 0.39350154995918274, + "learning_rate": 1.7157067233495289e-06, + "loss": 0.0654, + "num_input_tokens_seen": 44348448, + "step": 65800 + }, + { + "epoch": 1.607627097940537, + "grad_norm": 117.36756896972656, + "learning_rate": 1.715647162790758e-06, + "loss": 0.0446, + "num_input_tokens_seen": 44352032, + "step": 65805 + }, + { + "epoch": 1.6077492487723841, + "grad_norm": 1.1069661378860474, + "learning_rate": 1.7155875970276086e-06, + "loss": 0.0896, + "num_input_tokens_seen": 44355104, + "step": 65810 + }, + { + "epoch": 1.6078713996042313, + "grad_norm": 17.73491096496582, + "learning_rate": 1.7155280260605137e-06, + "loss": 0.0153, + "num_input_tokens_seen": 44358240, + "step": 65815 + }, + { + "epoch": 1.6079935504360785, + "grad_norm": 0.22711671888828278, + "learning_rate": 1.7154684498899063e-06, + "loss": 0.044, + "num_input_tokens_seen": 44361824, + "step": 65820 + }, + { + "epoch": 1.6081157012679257, + "grad_norm": 0.28996729850769043, + "learning_rate": 1.7154088685162203e-06, + "loss": 0.0039, + "num_input_tokens_seen": 44365536, + "step": 65825 + }, + { + "epoch": 1.6082378520997729, + "grad_norm": 1.2250621318817139, + "learning_rate": 1.7153492819398881e-06, + "loss": 0.0479, + "num_input_tokens_seen": 44368864, + "step": 65830 + }, + { + "epoch": 1.60836000293162, + "grad_norm": 0.9239709973335266, + "learning_rate": 1.7152896901613439e-06, + "loss": 0.0011, + "num_input_tokens_seen": 44372448, + "step": 65835 + }, + { + "epoch": 1.6084821537634673, + "grad_norm": 1.7483092546463013, + "learning_rate": 1.7152300931810206e-06, + "loss": 0.0603, + "num_input_tokens_seen": 44375392, + "step": 65840 + }, + { + "epoch": 1.6086043045953144, + "grad_norm": 0.5381038784980774, + "learning_rate": 1.7151704909993515e-06, + "loss": 0.0437, + "num_input_tokens_seen": 44378464, + "step": 65845 + }, + { + "epoch": 1.6087264554271614, + "grad_norm": 0.05454336851835251, + "learning_rate": 1.7151108836167705e-06, + "loss": 0.1134, + "num_input_tokens_seen": 44381856, + "step": 65850 + }, + { + "epoch": 1.6088486062590086, + "grad_norm": 0.3915286362171173, + "learning_rate": 1.7150512710337105e-06, + "loss": 0.0011, + "num_input_tokens_seen": 44385312, + "step": 65855 + }, + { + "epoch": 1.6089707570908558, + "grad_norm": 26.920024871826172, + "learning_rate": 1.7149916532506055e-06, + "loss": 0.0016, + "num_input_tokens_seen": 44388320, + "step": 65860 + }, + { + "epoch": 1.609092907922703, + "grad_norm": 3.6959805488586426, + "learning_rate": 1.7149320302678892e-06, + "loss": 0.173, + "num_input_tokens_seen": 44391968, + "step": 65865 + }, + { + "epoch": 1.60921505875455, + "grad_norm": 0.21909748017787933, + "learning_rate": 1.7148724020859943e-06, + "loss": 0.0882, + "num_input_tokens_seen": 44395296, + "step": 65870 + }, + { + "epoch": 1.6093372095863971, + "grad_norm": 23.31797218322754, + "learning_rate": 1.7148127687053553e-06, + "loss": 0.1272, + "num_input_tokens_seen": 44398624, + "step": 65875 + }, + { + "epoch": 1.6094593604182443, + "grad_norm": 37.92839813232422, + "learning_rate": 1.7147531301264056e-06, + "loss": 0.1108, + "num_input_tokens_seen": 44402016, + "step": 65880 + }, + { + "epoch": 1.6095815112500915, + "grad_norm": 0.0698784664273262, + "learning_rate": 1.7146934863495787e-06, + "loss": 0.0632, + "num_input_tokens_seen": 44405664, + "step": 65885 + }, + { + "epoch": 1.6097036620819387, + "grad_norm": 0.12959900498390198, + "learning_rate": 1.714633837375309e-06, + "loss": 0.1824, + "num_input_tokens_seen": 44409056, + "step": 65890 + }, + { + "epoch": 1.609825812913786, + "grad_norm": 84.17263793945312, + "learning_rate": 1.7145741832040294e-06, + "loss": 0.0606, + "num_input_tokens_seen": 44412384, + "step": 65895 + }, + { + "epoch": 1.609947963745633, + "grad_norm": 10.694718360900879, + "learning_rate": 1.7145145238361743e-06, + "loss": 0.2215, + "num_input_tokens_seen": 44415648, + "step": 65900 + }, + { + "epoch": 1.6100701145774803, + "grad_norm": 0.30155470967292786, + "learning_rate": 1.7144548592721772e-06, + "loss": 0.0994, + "num_input_tokens_seen": 44418848, + "step": 65905 + }, + { + "epoch": 1.6101922654093275, + "grad_norm": 0.2619548439979553, + "learning_rate": 1.7143951895124724e-06, + "loss": 0.091, + "num_input_tokens_seen": 44421664, + "step": 65910 + }, + { + "epoch": 1.6103144162411747, + "grad_norm": 0.08416949957609177, + "learning_rate": 1.714335514557494e-06, + "loss": 0.0934, + "num_input_tokens_seen": 44425440, + "step": 65915 + }, + { + "epoch": 1.6104365670730219, + "grad_norm": 0.12342800199985504, + "learning_rate": 1.714275834407675e-06, + "loss": 0.0027, + "num_input_tokens_seen": 44429152, + "step": 65920 + }, + { + "epoch": 1.610558717904869, + "grad_norm": 38.95140075683594, + "learning_rate": 1.71421614906345e-06, + "loss": 0.0914, + "num_input_tokens_seen": 44432416, + "step": 65925 + }, + { + "epoch": 1.6106808687367162, + "grad_norm": 0.13258185982704163, + "learning_rate": 1.7141564585252534e-06, + "loss": 0.0421, + "num_input_tokens_seen": 44435616, + "step": 65930 + }, + { + "epoch": 1.6108030195685634, + "grad_norm": 11.2254638671875, + "learning_rate": 1.714096762793519e-06, + "loss": 0.1913, + "num_input_tokens_seen": 44438752, + "step": 65935 + }, + { + "epoch": 1.6109251704004104, + "grad_norm": 252.81646728515625, + "learning_rate": 1.7140370618686807e-06, + "loss": 0.0242, + "num_input_tokens_seen": 44442208, + "step": 65940 + }, + { + "epoch": 1.6110473212322576, + "grad_norm": 25.23392677307129, + "learning_rate": 1.7139773557511727e-06, + "loss": 0.1016, + "num_input_tokens_seen": 44445664, + "step": 65945 + }, + { + "epoch": 1.6111694720641048, + "grad_norm": 8.368292808532715, + "learning_rate": 1.7139176444414296e-06, + "loss": 0.05, + "num_input_tokens_seen": 44449184, + "step": 65950 + }, + { + "epoch": 1.611291622895952, + "grad_norm": 59.63330078125, + "learning_rate": 1.7138579279398853e-06, + "loss": 0.1257, + "num_input_tokens_seen": 44453088, + "step": 65955 + }, + { + "epoch": 1.611413773727799, + "grad_norm": 0.263844758272171, + "learning_rate": 1.7137982062469737e-06, + "loss": 0.0661, + "num_input_tokens_seen": 44456544, + "step": 65960 + }, + { + "epoch": 1.6115359245596461, + "grad_norm": 0.11820437759160995, + "learning_rate": 1.7137384793631302e-06, + "loss": 0.1086, + "num_input_tokens_seen": 44460000, + "step": 65965 + }, + { + "epoch": 1.6116580753914933, + "grad_norm": 0.3315877914428711, + "learning_rate": 1.7136787472887884e-06, + "loss": 0.0199, + "num_input_tokens_seen": 44463904, + "step": 65970 + }, + { + "epoch": 1.6117802262233405, + "grad_norm": 10.633949279785156, + "learning_rate": 1.7136190100243826e-06, + "loss": 0.154, + "num_input_tokens_seen": 44467040, + "step": 65975 + }, + { + "epoch": 1.6119023770551877, + "grad_norm": 27.93235969543457, + "learning_rate": 1.7135592675703475e-06, + "loss": 0.0465, + "num_input_tokens_seen": 44469984, + "step": 65980 + }, + { + "epoch": 1.6120245278870349, + "grad_norm": 7.1234235763549805, + "learning_rate": 1.7134995199271174e-06, + "loss": 0.1811, + "num_input_tokens_seen": 44473248, + "step": 65985 + }, + { + "epoch": 1.612146678718882, + "grad_norm": 0.2649827301502228, + "learning_rate": 1.7134397670951268e-06, + "loss": 0.0015, + "num_input_tokens_seen": 44476640, + "step": 65990 + }, + { + "epoch": 1.6122688295507293, + "grad_norm": 49.69743347167969, + "learning_rate": 1.7133800090748106e-06, + "loss": 0.1421, + "num_input_tokens_seen": 44479520, + "step": 65995 + }, + { + "epoch": 1.6123909803825764, + "grad_norm": 0.11934787780046463, + "learning_rate": 1.713320245866603e-06, + "loss": 0.1589, + "num_input_tokens_seen": 44482592, + "step": 66000 + }, + { + "epoch": 1.6125131312144236, + "grad_norm": 81.90103912353516, + "learning_rate": 1.7132604774709385e-06, + "loss": 0.2063, + "num_input_tokens_seen": 44485984, + "step": 66005 + }, + { + "epoch": 1.6126352820462708, + "grad_norm": 1.1120253801345825, + "learning_rate": 1.7132007038882522e-06, + "loss": 0.0966, + "num_input_tokens_seen": 44489184, + "step": 66010 + }, + { + "epoch": 1.612757432878118, + "grad_norm": 18.164979934692383, + "learning_rate": 1.7131409251189783e-06, + "loss": 0.0427, + "num_input_tokens_seen": 44492704, + "step": 66015 + }, + { + "epoch": 1.6128795837099652, + "grad_norm": 0.24019227921962738, + "learning_rate": 1.7130811411635522e-06, + "loss": 0.0581, + "num_input_tokens_seen": 44495904, + "step": 66020 + }, + { + "epoch": 1.6130017345418124, + "grad_norm": 0.2978065013885498, + "learning_rate": 1.713021352022408e-06, + "loss": 0.004, + "num_input_tokens_seen": 44499616, + "step": 66025 + }, + { + "epoch": 1.6131238853736594, + "grad_norm": 19.480119705200195, + "learning_rate": 1.7129615576959804e-06, + "loss": 0.1521, + "num_input_tokens_seen": 44503008, + "step": 66030 + }, + { + "epoch": 1.6132460362055066, + "grad_norm": 13.071788787841797, + "learning_rate": 1.7129017581847052e-06, + "loss": 0.1051, + "num_input_tokens_seen": 44506208, + "step": 66035 + }, + { + "epoch": 1.6133681870373537, + "grad_norm": 0.3018631637096405, + "learning_rate": 1.7128419534890162e-06, + "loss": 0.0537, + "num_input_tokens_seen": 44509216, + "step": 66040 + }, + { + "epoch": 1.613490337869201, + "grad_norm": 1.6079626083374023, + "learning_rate": 1.712782143609349e-06, + "loss": 0.0643, + "num_input_tokens_seen": 44512800, + "step": 66045 + }, + { + "epoch": 1.613612488701048, + "grad_norm": 36.73057174682617, + "learning_rate": 1.7127223285461385e-06, + "loss": 0.1507, + "num_input_tokens_seen": 44516640, + "step": 66050 + }, + { + "epoch": 1.613734639532895, + "grad_norm": 1.0381956100463867, + "learning_rate": 1.7126625082998195e-06, + "loss": 0.0514, + "num_input_tokens_seen": 44520160, + "step": 66055 + }, + { + "epoch": 1.6138567903647423, + "grad_norm": 18.15398597717285, + "learning_rate": 1.7126026828708266e-06, + "loss": 0.1484, + "num_input_tokens_seen": 44523360, + "step": 66060 + }, + { + "epoch": 1.6139789411965895, + "grad_norm": 0.37995773553848267, + "learning_rate": 1.7125428522595956e-06, + "loss": 0.0504, + "num_input_tokens_seen": 44527264, + "step": 66065 + }, + { + "epoch": 1.6141010920284367, + "grad_norm": 13.00422191619873, + "learning_rate": 1.7124830164665616e-06, + "loss": 0.0785, + "num_input_tokens_seen": 44530528, + "step": 66070 + }, + { + "epoch": 1.6142232428602838, + "grad_norm": 1.3020102977752686, + "learning_rate": 1.7124231754921592e-06, + "loss": 0.1119, + "num_input_tokens_seen": 44533344, + "step": 66075 + }, + { + "epoch": 1.614345393692131, + "grad_norm": 0.14015237987041473, + "learning_rate": 1.7123633293368239e-06, + "loss": 0.0351, + "num_input_tokens_seen": 44536416, + "step": 66080 + }, + { + "epoch": 1.6144675445239782, + "grad_norm": 0.17543506622314453, + "learning_rate": 1.7123034780009906e-06, + "loss": 0.0275, + "num_input_tokens_seen": 44539552, + "step": 66085 + }, + { + "epoch": 1.6145896953558254, + "grad_norm": 13.367767333984375, + "learning_rate": 1.7122436214850952e-06, + "loss": 0.0778, + "num_input_tokens_seen": 44542880, + "step": 66090 + }, + { + "epoch": 1.6147118461876726, + "grad_norm": 20.807472229003906, + "learning_rate": 1.7121837597895725e-06, + "loss": 0.0436, + "num_input_tokens_seen": 44545760, + "step": 66095 + }, + { + "epoch": 1.6148339970195198, + "grad_norm": 17.611282348632812, + "learning_rate": 1.712123892914858e-06, + "loss": 0.1088, + "num_input_tokens_seen": 44548832, + "step": 66100 + }, + { + "epoch": 1.614956147851367, + "grad_norm": 0.49443256855010986, + "learning_rate": 1.712064020861387e-06, + "loss": 0.0025, + "num_input_tokens_seen": 44552224, + "step": 66105 + }, + { + "epoch": 1.6150782986832142, + "grad_norm": 13.236289978027344, + "learning_rate": 1.7120041436295947e-06, + "loss": 0.1381, + "num_input_tokens_seen": 44555424, + "step": 66110 + }, + { + "epoch": 1.6152004495150611, + "grad_norm": 11.525460243225098, + "learning_rate": 1.7119442612199169e-06, + "loss": 0.066, + "num_input_tokens_seen": 44558496, + "step": 66115 + }, + { + "epoch": 1.6153226003469083, + "grad_norm": 20.939476013183594, + "learning_rate": 1.7118843736327891e-06, + "loss": 0.1894, + "num_input_tokens_seen": 44561888, + "step": 66120 + }, + { + "epoch": 1.6154447511787555, + "grad_norm": 0.06147155165672302, + "learning_rate": 1.7118244808686464e-06, + "loss": 0.2398, + "num_input_tokens_seen": 44565216, + "step": 66125 + }, + { + "epoch": 1.6155669020106027, + "grad_norm": 0.08817419409751892, + "learning_rate": 1.7117645829279245e-06, + "loss": 0.0939, + "num_input_tokens_seen": 44568416, + "step": 66130 + }, + { + "epoch": 1.61568905284245, + "grad_norm": 33.70576095581055, + "learning_rate": 1.7117046798110594e-06, + "loss": 0.0843, + "num_input_tokens_seen": 44571360, + "step": 66135 + }, + { + "epoch": 1.6158112036742969, + "grad_norm": 0.171207457780838, + "learning_rate": 1.7116447715184866e-06, + "loss": 0.0584, + "num_input_tokens_seen": 44575008, + "step": 66140 + }, + { + "epoch": 1.615933354506144, + "grad_norm": 0.44628486037254333, + "learning_rate": 1.7115848580506413e-06, + "loss": 0.0668, + "num_input_tokens_seen": 44578848, + "step": 66145 + }, + { + "epoch": 1.6160555053379912, + "grad_norm": 0.09966251999139786, + "learning_rate": 1.7115249394079596e-06, + "loss": 0.0433, + "num_input_tokens_seen": 44582880, + "step": 66150 + }, + { + "epoch": 1.6161776561698384, + "grad_norm": 0.0741414725780487, + "learning_rate": 1.7114650155908771e-06, + "loss": 0.0462, + "num_input_tokens_seen": 44586144, + "step": 66155 + }, + { + "epoch": 1.6162998070016856, + "grad_norm": 0.5622798800468445, + "learning_rate": 1.71140508659983e-06, + "loss": 0.0029, + "num_input_tokens_seen": 44589792, + "step": 66160 + }, + { + "epoch": 1.6164219578335328, + "grad_norm": 0.09299297630786896, + "learning_rate": 1.7113451524352533e-06, + "loss": 0.0599, + "num_input_tokens_seen": 44593056, + "step": 66165 + }, + { + "epoch": 1.61654410866538, + "grad_norm": 0.10339483618736267, + "learning_rate": 1.7112852130975838e-06, + "loss": 0.0373, + "num_input_tokens_seen": 44596320, + "step": 66170 + }, + { + "epoch": 1.6166662594972272, + "grad_norm": 102.27527618408203, + "learning_rate": 1.7112252685872566e-06, + "loss": 0.1563, + "num_input_tokens_seen": 44599520, + "step": 66175 + }, + { + "epoch": 1.6167884103290744, + "grad_norm": 0.0032880015205591917, + "learning_rate": 1.7111653189047076e-06, + "loss": 0.0564, + "num_input_tokens_seen": 44603040, + "step": 66180 + }, + { + "epoch": 1.6169105611609216, + "grad_norm": 0.06474665552377701, + "learning_rate": 1.7111053640503737e-06, + "loss": 0.0227, + "num_input_tokens_seen": 44606880, + "step": 66185 + }, + { + "epoch": 1.6170327119927688, + "grad_norm": 0.08754179626703262, + "learning_rate": 1.71104540402469e-06, + "loss": 0.0364, + "num_input_tokens_seen": 44610464, + "step": 66190 + }, + { + "epoch": 1.617154862824616, + "grad_norm": 35.25015640258789, + "learning_rate": 1.7109854388280932e-06, + "loss": 0.1687, + "num_input_tokens_seen": 44613728, + "step": 66195 + }, + { + "epoch": 1.6172770136564631, + "grad_norm": 0.07697348296642303, + "learning_rate": 1.710925468461019e-06, + "loss": 0.2382, + "num_input_tokens_seen": 44617696, + "step": 66200 + }, + { + "epoch": 1.6173991644883101, + "grad_norm": 10.768204689025879, + "learning_rate": 1.7108654929239033e-06, + "loss": 0.1017, + "num_input_tokens_seen": 44621024, + "step": 66205 + }, + { + "epoch": 1.6175213153201573, + "grad_norm": 0.8216606378555298, + "learning_rate": 1.7108055122171825e-06, + "loss": 0.1305, + "num_input_tokens_seen": 44623968, + "step": 66210 + }, + { + "epoch": 1.6176434661520045, + "grad_norm": 64.47852325439453, + "learning_rate": 1.710745526341293e-06, + "loss": 0.073, + "num_input_tokens_seen": 44627232, + "step": 66215 + }, + { + "epoch": 1.6177656169838517, + "grad_norm": 0.25741147994995117, + "learning_rate": 1.710685535296671e-06, + "loss": 0.0392, + "num_input_tokens_seen": 44630432, + "step": 66220 + }, + { + "epoch": 1.6178877678156989, + "grad_norm": 0.2642552852630615, + "learning_rate": 1.7106255390837525e-06, + "loss": 0.0633, + "num_input_tokens_seen": 44634080, + "step": 66225 + }, + { + "epoch": 1.6180099186475458, + "grad_norm": 103.3814697265625, + "learning_rate": 1.710565537702974e-06, + "loss": 0.1125, + "num_input_tokens_seen": 44637472, + "step": 66230 + }, + { + "epoch": 1.618132069479393, + "grad_norm": 18.73349952697754, + "learning_rate": 1.7105055311547716e-06, + "loss": 0.0341, + "num_input_tokens_seen": 44640992, + "step": 66235 + }, + { + "epoch": 1.6182542203112402, + "grad_norm": 16.76979637145996, + "learning_rate": 1.7104455194395822e-06, + "loss": 0.1151, + "num_input_tokens_seen": 44644000, + "step": 66240 + }, + { + "epoch": 1.6183763711430874, + "grad_norm": 24.473278045654297, + "learning_rate": 1.7103855025578416e-06, + "loss": 0.1159, + "num_input_tokens_seen": 44648224, + "step": 66245 + }, + { + "epoch": 1.6184985219749346, + "grad_norm": 24.458818435668945, + "learning_rate": 1.7103254805099867e-06, + "loss": 0.1734, + "num_input_tokens_seen": 44651616, + "step": 66250 + }, + { + "epoch": 1.6186206728067818, + "grad_norm": 89.10078430175781, + "learning_rate": 1.7102654532964538e-06, + "loss": 0.0259, + "num_input_tokens_seen": 44655072, + "step": 66255 + }, + { + "epoch": 1.618742823638629, + "grad_norm": 0.7390759587287903, + "learning_rate": 1.7102054209176794e-06, + "loss": 0.0601, + "num_input_tokens_seen": 44658656, + "step": 66260 + }, + { + "epoch": 1.6188649744704762, + "grad_norm": 0.2607508599758148, + "learning_rate": 1.7101453833741005e-06, + "loss": 0.0548, + "num_input_tokens_seen": 44662304, + "step": 66265 + }, + { + "epoch": 1.6189871253023234, + "grad_norm": 1.9803550243377686, + "learning_rate": 1.710085340666153e-06, + "loss": 0.0321, + "num_input_tokens_seen": 44665312, + "step": 66270 + }, + { + "epoch": 1.6191092761341705, + "grad_norm": 9.75775146484375, + "learning_rate": 1.710025292794274e-06, + "loss": 0.0417, + "num_input_tokens_seen": 44669024, + "step": 66275 + }, + { + "epoch": 1.6192314269660177, + "grad_norm": 0.05687793344259262, + "learning_rate": 1.7099652397589002e-06, + "loss": 0.0017, + "num_input_tokens_seen": 44672160, + "step": 66280 + }, + { + "epoch": 1.619353577797865, + "grad_norm": 10.78559684753418, + "learning_rate": 1.7099051815604681e-06, + "loss": 0.1311, + "num_input_tokens_seen": 44675872, + "step": 66285 + }, + { + "epoch": 1.6194757286297121, + "grad_norm": 0.29572921991348267, + "learning_rate": 1.7098451181994147e-06, + "loss": 0.1039, + "num_input_tokens_seen": 44679840, + "step": 66290 + }, + { + "epoch": 1.619597879461559, + "grad_norm": 0.10324651747941971, + "learning_rate": 1.7097850496761764e-06, + "loss": 0.2029, + "num_input_tokens_seen": 44683232, + "step": 66295 + }, + { + "epoch": 1.6197200302934063, + "grad_norm": 9.582807540893555, + "learning_rate": 1.709724975991191e-06, + "loss": 0.2378, + "num_input_tokens_seen": 44686240, + "step": 66300 + }, + { + "epoch": 1.6198421811252535, + "grad_norm": 0.16958528757095337, + "learning_rate": 1.7096648971448938e-06, + "loss": 0.0358, + "num_input_tokens_seen": 44690272, + "step": 66305 + }, + { + "epoch": 1.6199643319571007, + "grad_norm": 57.915733337402344, + "learning_rate": 1.709604813137723e-06, + "loss": 0.0322, + "num_input_tokens_seen": 44693344, + "step": 66310 + }, + { + "epoch": 1.6200864827889478, + "grad_norm": 1.0083634853363037, + "learning_rate": 1.7095447239701153e-06, + "loss": 0.0937, + "num_input_tokens_seen": 44696928, + "step": 66315 + }, + { + "epoch": 1.6202086336207948, + "grad_norm": 0.18606296181678772, + "learning_rate": 1.7094846296425072e-06, + "loss": 0.0822, + "num_input_tokens_seen": 44700512, + "step": 66320 + }, + { + "epoch": 1.620330784452642, + "grad_norm": 0.2539637088775635, + "learning_rate": 1.7094245301553362e-06, + "loss": 0.045, + "num_input_tokens_seen": 44703392, + "step": 66325 + }, + { + "epoch": 1.6204529352844892, + "grad_norm": 11.006232261657715, + "learning_rate": 1.7093644255090394e-06, + "loss": 0.0862, + "num_input_tokens_seen": 44706720, + "step": 66330 + }, + { + "epoch": 1.6205750861163364, + "grad_norm": 0.09770510345697403, + "learning_rate": 1.7093043157040533e-06, + "loss": 0.0469, + "num_input_tokens_seen": 44709856, + "step": 66335 + }, + { + "epoch": 1.6206972369481836, + "grad_norm": 0.7676712870597839, + "learning_rate": 1.709244200740816e-06, + "loss": 0.0538, + "num_input_tokens_seen": 44713184, + "step": 66340 + }, + { + "epoch": 1.6208193877800308, + "grad_norm": 1.2458648681640625, + "learning_rate": 1.7091840806197636e-06, + "loss": 0.0821, + "num_input_tokens_seen": 44716256, + "step": 66345 + }, + { + "epoch": 1.620941538611878, + "grad_norm": 0.07486743479967117, + "learning_rate": 1.709123955341334e-06, + "loss": 0.0016, + "num_input_tokens_seen": 44719904, + "step": 66350 + }, + { + "epoch": 1.6210636894437251, + "grad_norm": 0.9548220634460449, + "learning_rate": 1.7090638249059641e-06, + "loss": 0.0014, + "num_input_tokens_seen": 44723232, + "step": 66355 + }, + { + "epoch": 1.6211858402755723, + "grad_norm": 1.062659502029419, + "learning_rate": 1.7090036893140915e-06, + "loss": 0.0441, + "num_input_tokens_seen": 44727072, + "step": 66360 + }, + { + "epoch": 1.6213079911074195, + "grad_norm": 0.13680623471736908, + "learning_rate": 1.7089435485661535e-06, + "loss": 0.0897, + "num_input_tokens_seen": 44730720, + "step": 66365 + }, + { + "epoch": 1.6214301419392667, + "grad_norm": 6.874294281005859, + "learning_rate": 1.7088834026625869e-06, + "loss": 0.0022, + "num_input_tokens_seen": 44733920, + "step": 66370 + }, + { + "epoch": 1.621552292771114, + "grad_norm": 253.76951599121094, + "learning_rate": 1.70882325160383e-06, + "loss": 0.1949, + "num_input_tokens_seen": 44737312, + "step": 66375 + }, + { + "epoch": 1.621674443602961, + "grad_norm": 0.1610388159751892, + "learning_rate": 1.7087630953903197e-06, + "loss": 0.1604, + "num_input_tokens_seen": 44740704, + "step": 66380 + }, + { + "epoch": 1.621796594434808, + "grad_norm": 11.13759708404541, + "learning_rate": 1.7087029340224933e-06, + "loss": 0.2189, + "num_input_tokens_seen": 44744288, + "step": 66385 + }, + { + "epoch": 1.6219187452666552, + "grad_norm": 29.627588272094727, + "learning_rate": 1.7086427675007886e-06, + "loss": 0.0373, + "num_input_tokens_seen": 44747808, + "step": 66390 + }, + { + "epoch": 1.6220408960985024, + "grad_norm": 0.06932839751243591, + "learning_rate": 1.7085825958256431e-06, + "loss": 0.04, + "num_input_tokens_seen": 44751136, + "step": 66395 + }, + { + "epoch": 1.6221630469303496, + "grad_norm": 0.04329407215118408, + "learning_rate": 1.7085224189974944e-06, + "loss": 0.0008, + "num_input_tokens_seen": 44754464, + "step": 66400 + }, + { + "epoch": 1.6222851977621966, + "grad_norm": 0.03631928190588951, + "learning_rate": 1.7084622370167803e-06, + "loss": 0.0907, + "num_input_tokens_seen": 44757920, + "step": 66405 + }, + { + "epoch": 1.6224073485940438, + "grad_norm": 0.1588095873594284, + "learning_rate": 1.708402049883938e-06, + "loss": 0.1325, + "num_input_tokens_seen": 44761504, + "step": 66410 + }, + { + "epoch": 1.622529499425891, + "grad_norm": 0.6479094624519348, + "learning_rate": 1.7083418575994055e-06, + "loss": 0.0319, + "num_input_tokens_seen": 44764960, + "step": 66415 + }, + { + "epoch": 1.6226516502577382, + "grad_norm": 0.24373003840446472, + "learning_rate": 1.7082816601636205e-06, + "loss": 0.0828, + "num_input_tokens_seen": 44768224, + "step": 66420 + }, + { + "epoch": 1.6227738010895854, + "grad_norm": 0.11970167607069016, + "learning_rate": 1.7082214575770209e-06, + "loss": 0.0197, + "num_input_tokens_seen": 44771488, + "step": 66425 + }, + { + "epoch": 1.6228959519214325, + "grad_norm": 0.31107163429260254, + "learning_rate": 1.7081612498400442e-06, + "loss": 0.0376, + "num_input_tokens_seen": 44774432, + "step": 66430 + }, + { + "epoch": 1.6230181027532797, + "grad_norm": 0.07137488573789597, + "learning_rate": 1.7081010369531286e-06, + "loss": 0.1097, + "num_input_tokens_seen": 44777824, + "step": 66435 + }, + { + "epoch": 1.623140253585127, + "grad_norm": 0.04188014194369316, + "learning_rate": 1.7080408189167116e-06, + "loss": 0.0008, + "num_input_tokens_seen": 44781152, + "step": 66440 + }, + { + "epoch": 1.6232624044169741, + "grad_norm": 0.25597256422042847, + "learning_rate": 1.7079805957312315e-06, + "loss": 0.0708, + "num_input_tokens_seen": 44784224, + "step": 66445 + }, + { + "epoch": 1.6233845552488213, + "grad_norm": 11.878859519958496, + "learning_rate": 1.707920367397126e-06, + "loss": 0.0751, + "num_input_tokens_seen": 44787808, + "step": 66450 + }, + { + "epoch": 1.6235067060806685, + "grad_norm": 25.821226119995117, + "learning_rate": 1.7078601339148332e-06, + "loss": 0.0875, + "num_input_tokens_seen": 44791072, + "step": 66455 + }, + { + "epoch": 1.6236288569125157, + "grad_norm": 1.8043272495269775, + "learning_rate": 1.7077998952847912e-06, + "loss": 0.0368, + "num_input_tokens_seen": 44794080, + "step": 66460 + }, + { + "epoch": 1.6237510077443629, + "grad_norm": 137.92518615722656, + "learning_rate": 1.7077396515074379e-06, + "loss": 0.124, + "num_input_tokens_seen": 44796960, + "step": 66465 + }, + { + "epoch": 1.62387315857621, + "grad_norm": 15.676214218139648, + "learning_rate": 1.7076794025832112e-06, + "loss": 0.121, + "num_input_tokens_seen": 44800160, + "step": 66470 + }, + { + "epoch": 1.623995309408057, + "grad_norm": 0.04584408178925514, + "learning_rate": 1.70761914851255e-06, + "loss": 0.1473, + "num_input_tokens_seen": 44803488, + "step": 66475 + }, + { + "epoch": 1.6241174602399042, + "grad_norm": 0.16386158764362335, + "learning_rate": 1.7075588892958917e-06, + "loss": 0.0533, + "num_input_tokens_seen": 44807008, + "step": 66480 + }, + { + "epoch": 1.6242396110717514, + "grad_norm": 41.72587966918945, + "learning_rate": 1.7074986249336751e-06, + "loss": 0.0263, + "num_input_tokens_seen": 44810336, + "step": 66485 + }, + { + "epoch": 1.6243617619035986, + "grad_norm": 33.555728912353516, + "learning_rate": 1.707438355426338e-06, + "loss": 0.1167, + "num_input_tokens_seen": 44813152, + "step": 66490 + }, + { + "epoch": 1.6244839127354456, + "grad_norm": 0.2910143733024597, + "learning_rate": 1.707378080774319e-06, + "loss": 0.115, + "num_input_tokens_seen": 44816992, + "step": 66495 + }, + { + "epoch": 1.6246060635672928, + "grad_norm": 12.734646797180176, + "learning_rate": 1.7073178009780564e-06, + "loss": 0.156, + "num_input_tokens_seen": 44820064, + "step": 66500 + }, + { + "epoch": 1.62472821439914, + "grad_norm": 0.8806232810020447, + "learning_rate": 1.7072575160379886e-06, + "loss": 0.0938, + "num_input_tokens_seen": 44823776, + "step": 66505 + }, + { + "epoch": 1.6248503652309871, + "grad_norm": 0.18991605937480927, + "learning_rate": 1.7071972259545535e-06, + "loss": 0.0759, + "num_input_tokens_seen": 44827296, + "step": 66510 + }, + { + "epoch": 1.6249725160628343, + "grad_norm": 1.1119245290756226, + "learning_rate": 1.7071369307281903e-06, + "loss": 0.0549, + "num_input_tokens_seen": 44830304, + "step": 66515 + }, + { + "epoch": 1.6250946668946815, + "grad_norm": 39.90916061401367, + "learning_rate": 1.7070766303593369e-06, + "loss": 0.0036, + "num_input_tokens_seen": 44834144, + "step": 66520 + }, + { + "epoch": 1.6252168177265287, + "grad_norm": 10.871047973632812, + "learning_rate": 1.7070163248484323e-06, + "loss": 0.296, + "num_input_tokens_seen": 44837344, + "step": 66525 + }, + { + "epoch": 1.625338968558376, + "grad_norm": 0.11879879981279373, + "learning_rate": 1.706956014195915e-06, + "loss": 0.0014, + "num_input_tokens_seen": 44840800, + "step": 66530 + }, + { + "epoch": 1.625461119390223, + "grad_norm": 0.1101454347372055, + "learning_rate": 1.7068956984022229e-06, + "loss": 0.1023, + "num_input_tokens_seen": 44843616, + "step": 66535 + }, + { + "epoch": 1.6255832702220703, + "grad_norm": 0.17406058311462402, + "learning_rate": 1.7068353774677956e-06, + "loss": 0.0053, + "num_input_tokens_seen": 44846560, + "step": 66540 + }, + { + "epoch": 1.6257054210539175, + "grad_norm": 63.51289749145508, + "learning_rate": 1.706775051393071e-06, + "loss": 0.1193, + "num_input_tokens_seen": 44850016, + "step": 66545 + }, + { + "epoch": 1.6258275718857647, + "grad_norm": 163.7349395751953, + "learning_rate": 1.7067147201784882e-06, + "loss": 0.0211, + "num_input_tokens_seen": 44853536, + "step": 66550 + }, + { + "epoch": 1.6259497227176118, + "grad_norm": 60.688262939453125, + "learning_rate": 1.7066543838244857e-06, + "loss": 0.0221, + "num_input_tokens_seen": 44856928, + "step": 66555 + }, + { + "epoch": 1.626071873549459, + "grad_norm": 0.15369991958141327, + "learning_rate": 1.7065940423315032e-06, + "loss": 0.0409, + "num_input_tokens_seen": 44860384, + "step": 66560 + }, + { + "epoch": 1.626194024381306, + "grad_norm": 10.165790557861328, + "learning_rate": 1.706533695699978e-06, + "loss": 0.0624, + "num_input_tokens_seen": 44863328, + "step": 66565 + }, + { + "epoch": 1.6263161752131532, + "grad_norm": 0.4016942083835602, + "learning_rate": 1.7064733439303497e-06, + "loss": 0.0517, + "num_input_tokens_seen": 44866656, + "step": 66570 + }, + { + "epoch": 1.6264383260450004, + "grad_norm": 49.472816467285156, + "learning_rate": 1.7064129870230576e-06, + "loss": 0.0994, + "num_input_tokens_seen": 44870368, + "step": 66575 + }, + { + "epoch": 1.6265604768768476, + "grad_norm": 0.06103126332163811, + "learning_rate": 1.7063526249785403e-06, + "loss": 0.0459, + "num_input_tokens_seen": 44873376, + "step": 66580 + }, + { + "epoch": 1.6266826277086945, + "grad_norm": 0.10283923149108887, + "learning_rate": 1.7062922577972366e-06, + "loss": 0.0376, + "num_input_tokens_seen": 44876704, + "step": 66585 + }, + { + "epoch": 1.6268047785405417, + "grad_norm": 0.15717634558677673, + "learning_rate": 1.7062318854795854e-06, + "loss": 0.3196, + "num_input_tokens_seen": 44880096, + "step": 66590 + }, + { + "epoch": 1.626926929372389, + "grad_norm": 0.3138972222805023, + "learning_rate": 1.7061715080260264e-06, + "loss": 0.0331, + "num_input_tokens_seen": 44883552, + "step": 66595 + }, + { + "epoch": 1.627049080204236, + "grad_norm": 246.54254150390625, + "learning_rate": 1.706111125436998e-06, + "loss": 0.099, + "num_input_tokens_seen": 44886816, + "step": 66600 + }, + { + "epoch": 1.6271712310360833, + "grad_norm": 0.12112493067979813, + "learning_rate": 1.7060507377129396e-06, + "loss": 0.0374, + "num_input_tokens_seen": 44890464, + "step": 66605 + }, + { + "epoch": 1.6272933818679305, + "grad_norm": 0.10736341774463654, + "learning_rate": 1.7059903448542903e-06, + "loss": 0.0543, + "num_input_tokens_seen": 44893728, + "step": 66610 + }, + { + "epoch": 1.6274155326997777, + "grad_norm": 0.24470113217830658, + "learning_rate": 1.7059299468614893e-06, + "loss": 0.0874, + "num_input_tokens_seen": 44897440, + "step": 66615 + }, + { + "epoch": 1.6275376835316249, + "grad_norm": 0.12989413738250732, + "learning_rate": 1.705869543734976e-06, + "loss": 0.049, + "num_input_tokens_seen": 44901344, + "step": 66620 + }, + { + "epoch": 1.627659834363472, + "grad_norm": 0.07117544859647751, + "learning_rate": 1.7058091354751895e-06, + "loss": 0.0819, + "num_input_tokens_seen": 44904608, + "step": 66625 + }, + { + "epoch": 1.6277819851953192, + "grad_norm": 0.21030397713184357, + "learning_rate": 1.705748722082569e-06, + "loss": 0.0016, + "num_input_tokens_seen": 44907936, + "step": 66630 + }, + { + "epoch": 1.6279041360271664, + "grad_norm": 0.3971431255340576, + "learning_rate": 1.7056883035575542e-06, + "loss": 0.0496, + "num_input_tokens_seen": 44911264, + "step": 66635 + }, + { + "epoch": 1.6280262868590136, + "grad_norm": 0.25932618975639343, + "learning_rate": 1.7056278799005841e-06, + "loss": 0.0025, + "num_input_tokens_seen": 44915104, + "step": 66640 + }, + { + "epoch": 1.6281484376908608, + "grad_norm": 105.23665618896484, + "learning_rate": 1.705567451112098e-06, + "loss": 0.1403, + "num_input_tokens_seen": 44918624, + "step": 66645 + }, + { + "epoch": 1.6282705885227078, + "grad_norm": 0.11826230585575104, + "learning_rate": 1.705507017192536e-06, + "loss": 0.0961, + "num_input_tokens_seen": 44922336, + "step": 66650 + }, + { + "epoch": 1.628392739354555, + "grad_norm": 0.07961612194776535, + "learning_rate": 1.7054465781423373e-06, + "loss": 0.0438, + "num_input_tokens_seen": 44925536, + "step": 66655 + }, + { + "epoch": 1.6285148901864022, + "grad_norm": 8.724510192871094, + "learning_rate": 1.7053861339619408e-06, + "loss": 0.0817, + "num_input_tokens_seen": 44928864, + "step": 66660 + }, + { + "epoch": 1.6286370410182494, + "grad_norm": 0.1295159012079239, + "learning_rate": 1.7053256846517874e-06, + "loss": 0.0014, + "num_input_tokens_seen": 44932000, + "step": 66665 + }, + { + "epoch": 1.6287591918500965, + "grad_norm": 46.75648880004883, + "learning_rate": 1.7052652302123152e-06, + "loss": 0.1552, + "num_input_tokens_seen": 44935456, + "step": 66670 + }, + { + "epoch": 1.6288813426819435, + "grad_norm": 0.3630995750427246, + "learning_rate": 1.7052047706439648e-06, + "loss": 0.0643, + "num_input_tokens_seen": 44938720, + "step": 66675 + }, + { + "epoch": 1.6290034935137907, + "grad_norm": 0.10988431423902512, + "learning_rate": 1.7051443059471758e-06, + "loss": 0.1296, + "num_input_tokens_seen": 44941792, + "step": 66680 + }, + { + "epoch": 1.629125644345638, + "grad_norm": 28.34921646118164, + "learning_rate": 1.7050838361223874e-06, + "loss": 0.1599, + "num_input_tokens_seen": 44945248, + "step": 66685 + }, + { + "epoch": 1.629247795177485, + "grad_norm": 12.040345191955566, + "learning_rate": 1.7050233611700399e-06, + "loss": 0.1362, + "num_input_tokens_seen": 44948256, + "step": 66690 + }, + { + "epoch": 1.6293699460093323, + "grad_norm": 19.319721221923828, + "learning_rate": 1.704962881090573e-06, + "loss": 0.0916, + "num_input_tokens_seen": 44951392, + "step": 66695 + }, + { + "epoch": 1.6294920968411795, + "grad_norm": 80.89275360107422, + "learning_rate": 1.7049023958844261e-06, + "loss": 0.1602, + "num_input_tokens_seen": 44954592, + "step": 66700 + }, + { + "epoch": 1.6296142476730267, + "grad_norm": 20.608259201049805, + "learning_rate": 1.7048419055520396e-06, + "loss": 0.0779, + "num_input_tokens_seen": 44957856, + "step": 66705 + }, + { + "epoch": 1.6297363985048738, + "grad_norm": 133.53970336914062, + "learning_rate": 1.704781410093853e-06, + "loss": 0.0228, + "num_input_tokens_seen": 44961312, + "step": 66710 + }, + { + "epoch": 1.629858549336721, + "grad_norm": 0.08650583773851395, + "learning_rate": 1.704720909510307e-06, + "loss": 0.099, + "num_input_tokens_seen": 44964512, + "step": 66715 + }, + { + "epoch": 1.6299807001685682, + "grad_norm": 28.098222732543945, + "learning_rate": 1.7046604038018404e-06, + "loss": 0.0452, + "num_input_tokens_seen": 44967968, + "step": 66720 + }, + { + "epoch": 1.6301028510004154, + "grad_norm": 0.07196227461099625, + "learning_rate": 1.704599892968894e-06, + "loss": 0.0016, + "num_input_tokens_seen": 44970912, + "step": 66725 + }, + { + "epoch": 1.6302250018322626, + "grad_norm": 25.876453399658203, + "learning_rate": 1.7045393770119075e-06, + "loss": 0.1082, + "num_input_tokens_seen": 44974240, + "step": 66730 + }, + { + "epoch": 1.6303471526641098, + "grad_norm": 12.297942161560059, + "learning_rate": 1.7044788559313214e-06, + "loss": 0.1062, + "num_input_tokens_seen": 44977504, + "step": 66735 + }, + { + "epoch": 1.6304693034959568, + "grad_norm": 23.527793884277344, + "learning_rate": 1.7044183297275753e-06, + "loss": 0.1591, + "num_input_tokens_seen": 44980704, + "step": 66740 + }, + { + "epoch": 1.630591454327804, + "grad_norm": 17.326770782470703, + "learning_rate": 1.7043577984011099e-06, + "loss": 0.1346, + "num_input_tokens_seen": 44983904, + "step": 66745 + }, + { + "epoch": 1.6307136051596511, + "grad_norm": 0.08420751988887787, + "learning_rate": 1.7042972619523651e-06, + "loss": 0.1032, + "num_input_tokens_seen": 44987296, + "step": 66750 + }, + { + "epoch": 1.6308357559914983, + "grad_norm": 20.060701370239258, + "learning_rate": 1.7042367203817812e-06, + "loss": 0.1449, + "num_input_tokens_seen": 44990688, + "step": 66755 + }, + { + "epoch": 1.6309579068233455, + "grad_norm": 0.18126173317432404, + "learning_rate": 1.7041761736897984e-06, + "loss": 0.0012, + "num_input_tokens_seen": 44994080, + "step": 66760 + }, + { + "epoch": 1.6310800576551925, + "grad_norm": 16.160730361938477, + "learning_rate": 1.7041156218768571e-06, + "loss": 0.0847, + "num_input_tokens_seen": 44997216, + "step": 66765 + }, + { + "epoch": 1.6312022084870397, + "grad_norm": 11.004593849182129, + "learning_rate": 1.7040550649433975e-06, + "loss": 0.0459, + "num_input_tokens_seen": 45000672, + "step": 66770 + }, + { + "epoch": 1.6313243593188869, + "grad_norm": 38.77311325073242, + "learning_rate": 1.70399450288986e-06, + "loss": 0.0595, + "num_input_tokens_seen": 45004000, + "step": 66775 + }, + { + "epoch": 1.631446510150734, + "grad_norm": 11.385759353637695, + "learning_rate": 1.7039339357166854e-06, + "loss": 0.1191, + "num_input_tokens_seen": 45008224, + "step": 66780 + }, + { + "epoch": 1.6315686609825812, + "grad_norm": 29.47661590576172, + "learning_rate": 1.703873363424314e-06, + "loss": 0.1004, + "num_input_tokens_seen": 45011232, + "step": 66785 + }, + { + "epoch": 1.6316908118144284, + "grad_norm": 37.67548370361328, + "learning_rate": 1.7038127860131859e-06, + "loss": 0.2868, + "num_input_tokens_seen": 45014880, + "step": 66790 + }, + { + "epoch": 1.6318129626462756, + "grad_norm": 0.49903541803359985, + "learning_rate": 1.7037522034837418e-06, + "loss": 0.0023, + "num_input_tokens_seen": 45018016, + "step": 66795 + }, + { + "epoch": 1.6319351134781228, + "grad_norm": 0.042273178696632385, + "learning_rate": 1.7036916158364227e-06, + "loss": 0.0195, + "num_input_tokens_seen": 45020960, + "step": 66800 + }, + { + "epoch": 1.63205726430997, + "grad_norm": 0.08780739456415176, + "learning_rate": 1.7036310230716686e-06, + "loss": 0.0265, + "num_input_tokens_seen": 45024032, + "step": 66805 + }, + { + "epoch": 1.6321794151418172, + "grad_norm": 0.3247887194156647, + "learning_rate": 1.7035704251899207e-06, + "loss": 0.0013, + "num_input_tokens_seen": 45027488, + "step": 66810 + }, + { + "epoch": 1.6323015659736644, + "grad_norm": 25.774038314819336, + "learning_rate": 1.7035098221916195e-06, + "loss": 0.0521, + "num_input_tokens_seen": 45030944, + "step": 66815 + }, + { + "epoch": 1.6324237168055116, + "grad_norm": 0.09247536212205887, + "learning_rate": 1.7034492140772057e-06, + "loss": 0.0854, + "num_input_tokens_seen": 45034208, + "step": 66820 + }, + { + "epoch": 1.6325458676373588, + "grad_norm": 0.2177923172712326, + "learning_rate": 1.7033886008471196e-06, + "loss": 0.1261, + "num_input_tokens_seen": 45037792, + "step": 66825 + }, + { + "epoch": 1.6326680184692057, + "grad_norm": 0.06111787632107735, + "learning_rate": 1.7033279825018026e-06, + "loss": 0.0007, + "num_input_tokens_seen": 45041184, + "step": 66830 + }, + { + "epoch": 1.632790169301053, + "grad_norm": 18.811080932617188, + "learning_rate": 1.7032673590416953e-06, + "loss": 0.1032, + "num_input_tokens_seen": 45044384, + "step": 66835 + }, + { + "epoch": 1.6329123201329, + "grad_norm": 0.5662484169006348, + "learning_rate": 1.7032067304672387e-06, + "loss": 0.0113, + "num_input_tokens_seen": 45047776, + "step": 66840 + }, + { + "epoch": 1.6330344709647473, + "grad_norm": 0.045659150928258896, + "learning_rate": 1.7031460967788735e-06, + "loss": 0.0011, + "num_input_tokens_seen": 45050976, + "step": 66845 + }, + { + "epoch": 1.6331566217965945, + "grad_norm": 0.10969070345163345, + "learning_rate": 1.7030854579770408e-06, + "loss": 0.1239, + "num_input_tokens_seen": 45053984, + "step": 66850 + }, + { + "epoch": 1.6332787726284415, + "grad_norm": 1.7151886224746704, + "learning_rate": 1.7030248140621816e-06, + "loss": 0.0526, + "num_input_tokens_seen": 45057312, + "step": 66855 + }, + { + "epoch": 1.6334009234602886, + "grad_norm": 0.0635419562458992, + "learning_rate": 1.7029641650347368e-06, + "loss": 0.0331, + "num_input_tokens_seen": 45060576, + "step": 66860 + }, + { + "epoch": 1.6335230742921358, + "grad_norm": 27.04867172241211, + "learning_rate": 1.7029035108951474e-06, + "loss": 0.1633, + "num_input_tokens_seen": 45063776, + "step": 66865 + }, + { + "epoch": 1.633645225123983, + "grad_norm": 125.61565399169922, + "learning_rate": 1.7028428516438549e-06, + "loss": 0.0636, + "num_input_tokens_seen": 45067104, + "step": 66870 + }, + { + "epoch": 1.6337673759558302, + "grad_norm": 211.0738525390625, + "learning_rate": 1.7027821872813002e-06, + "loss": 0.1001, + "num_input_tokens_seen": 45070112, + "step": 66875 + }, + { + "epoch": 1.6338895267876774, + "grad_norm": 1.0476380586624146, + "learning_rate": 1.7027215178079242e-06, + "loss": 0.0053, + "num_input_tokens_seen": 45073952, + "step": 66880 + }, + { + "epoch": 1.6340116776195246, + "grad_norm": 3.8595800399780273, + "learning_rate": 1.7026608432241683e-06, + "loss": 0.111, + "num_input_tokens_seen": 45077280, + "step": 66885 + }, + { + "epoch": 1.6341338284513718, + "grad_norm": 0.18746982514858246, + "learning_rate": 1.702600163530474e-06, + "loss": 0.0501, + "num_input_tokens_seen": 45080672, + "step": 66890 + }, + { + "epoch": 1.634255979283219, + "grad_norm": 14.622883796691895, + "learning_rate": 1.702539478727282e-06, + "loss": 0.2267, + "num_input_tokens_seen": 45083744, + "step": 66895 + }, + { + "epoch": 1.6343781301150662, + "grad_norm": 14.971632957458496, + "learning_rate": 1.7024787888150339e-06, + "loss": 0.1211, + "num_input_tokens_seen": 45086880, + "step": 66900 + }, + { + "epoch": 1.6345002809469134, + "grad_norm": 13.314606666564941, + "learning_rate": 1.7024180937941712e-06, + "loss": 0.0809, + "num_input_tokens_seen": 45090336, + "step": 66905 + }, + { + "epoch": 1.6346224317787605, + "grad_norm": 0.21442019939422607, + "learning_rate": 1.7023573936651355e-06, + "loss": 0.0365, + "num_input_tokens_seen": 45093792, + "step": 66910 + }, + { + "epoch": 1.6347445826106077, + "grad_norm": 0.02547086589038372, + "learning_rate": 1.7022966884283677e-06, + "loss": 0.0009, + "num_input_tokens_seen": 45096928, + "step": 66915 + }, + { + "epoch": 1.6348667334424547, + "grad_norm": 0.12754671275615692, + "learning_rate": 1.7022359780843095e-06, + "loss": 0.0279, + "num_input_tokens_seen": 45100000, + "step": 66920 + }, + { + "epoch": 1.634988884274302, + "grad_norm": 0.4442298710346222, + "learning_rate": 1.702175262633402e-06, + "loss": 0.0012, + "num_input_tokens_seen": 45103264, + "step": 66925 + }, + { + "epoch": 1.635111035106149, + "grad_norm": 0.027086948975920677, + "learning_rate": 1.7021145420760877e-06, + "loss": 0.096, + "num_input_tokens_seen": 45106720, + "step": 66930 + }, + { + "epoch": 1.6352331859379963, + "grad_norm": 26.187314987182617, + "learning_rate": 1.7020538164128074e-06, + "loss": 0.2329, + "num_input_tokens_seen": 45109920, + "step": 66935 + }, + { + "epoch": 1.6353553367698432, + "grad_norm": 138.03111267089844, + "learning_rate": 1.7019930856440027e-06, + "loss": 0.1619, + "num_input_tokens_seen": 45113568, + "step": 66940 + }, + { + "epoch": 1.6354774876016904, + "grad_norm": 1.1136021614074707, + "learning_rate": 1.7019323497701159e-06, + "loss": 0.0393, + "num_input_tokens_seen": 45116704, + "step": 66945 + }, + { + "epoch": 1.6355996384335376, + "grad_norm": 94.63319396972656, + "learning_rate": 1.7018716087915882e-06, + "loss": 0.0604, + "num_input_tokens_seen": 45119776, + "step": 66950 + }, + { + "epoch": 1.6357217892653848, + "grad_norm": 0.4391125440597534, + "learning_rate": 1.701810862708861e-06, + "loss": 0.0023, + "num_input_tokens_seen": 45122976, + "step": 66955 + }, + { + "epoch": 1.635843940097232, + "grad_norm": 0.6100500226020813, + "learning_rate": 1.7017501115223766e-06, + "loss": 0.0294, + "num_input_tokens_seen": 45126048, + "step": 66960 + }, + { + "epoch": 1.6359660909290792, + "grad_norm": 0.3072821795940399, + "learning_rate": 1.7016893552325766e-06, + "loss": 0.0043, + "num_input_tokens_seen": 45129248, + "step": 66965 + }, + { + "epoch": 1.6360882417609264, + "grad_norm": 0.6370717883110046, + "learning_rate": 1.701628593839903e-06, + "loss": 0.0391, + "num_input_tokens_seen": 45132768, + "step": 66970 + }, + { + "epoch": 1.6362103925927736, + "grad_norm": 0.20756691694259644, + "learning_rate": 1.7015678273447977e-06, + "loss": 0.1749, + "num_input_tokens_seen": 45136352, + "step": 66975 + }, + { + "epoch": 1.6363325434246208, + "grad_norm": 0.08585977554321289, + "learning_rate": 1.7015070557477022e-06, + "loss": 0.0005, + "num_input_tokens_seen": 45139552, + "step": 66980 + }, + { + "epoch": 1.636454694256468, + "grad_norm": 0.10425865650177002, + "learning_rate": 1.7014462790490586e-06, + "loss": 0.01, + "num_input_tokens_seen": 45143328, + "step": 66985 + }, + { + "epoch": 1.6365768450883151, + "grad_norm": 14.16010570526123, + "learning_rate": 1.7013854972493093e-06, + "loss": 0.0569, + "num_input_tokens_seen": 45146272, + "step": 66990 + }, + { + "epoch": 1.6366989959201623, + "grad_norm": 0.08292701095342636, + "learning_rate": 1.7013247103488962e-06, + "loss": 0.0698, + "num_input_tokens_seen": 45149600, + "step": 66995 + }, + { + "epoch": 1.6368211467520095, + "grad_norm": 0.12289398908615112, + "learning_rate": 1.7012639183482609e-06, + "loss": 0.0003, + "num_input_tokens_seen": 45153248, + "step": 67000 + }, + { + "epoch": 1.6369432975838567, + "grad_norm": 0.17947377264499664, + "learning_rate": 1.7012031212478456e-06, + "loss": 0.0504, + "num_input_tokens_seen": 45156192, + "step": 67005 + }, + { + "epoch": 1.6370654484157037, + "grad_norm": 12.637063980102539, + "learning_rate": 1.7011423190480926e-06, + "loss": 0.1407, + "num_input_tokens_seen": 45159264, + "step": 67010 + }, + { + "epoch": 1.6371875992475509, + "grad_norm": 0.3538811504840851, + "learning_rate": 1.7010815117494444e-06, + "loss": 0.0457, + "num_input_tokens_seen": 45162528, + "step": 67015 + }, + { + "epoch": 1.637309750079398, + "grad_norm": 0.1899852603673935, + "learning_rate": 1.7010206993523425e-06, + "loss": 0.084, + "num_input_tokens_seen": 45165792, + "step": 67020 + }, + { + "epoch": 1.6374319009112452, + "grad_norm": 0.1661899983882904, + "learning_rate": 1.70095988185723e-06, + "loss": 0.0945, + "num_input_tokens_seen": 45169056, + "step": 67025 + }, + { + "epoch": 1.6375540517430922, + "grad_norm": 0.22782708704471588, + "learning_rate": 1.7008990592645483e-06, + "loss": 0.2022, + "num_input_tokens_seen": 45172384, + "step": 67030 + }, + { + "epoch": 1.6376762025749394, + "grad_norm": 17.975170135498047, + "learning_rate": 1.7008382315747402e-06, + "loss": 0.1716, + "num_input_tokens_seen": 45175712, + "step": 67035 + }, + { + "epoch": 1.6377983534067866, + "grad_norm": 8.688823699951172, + "learning_rate": 1.700777398788248e-06, + "loss": 0.1536, + "num_input_tokens_seen": 45178656, + "step": 67040 + }, + { + "epoch": 1.6379205042386338, + "grad_norm": 21.929100036621094, + "learning_rate": 1.700716560905514e-06, + "loss": 0.0731, + "num_input_tokens_seen": 45182368, + "step": 67045 + }, + { + "epoch": 1.638042655070481, + "grad_norm": 0.026093896478414536, + "learning_rate": 1.7006557179269806e-06, + "loss": 0.0579, + "num_input_tokens_seen": 45185504, + "step": 67050 + }, + { + "epoch": 1.6381648059023282, + "grad_norm": 0.4132900834083557, + "learning_rate": 1.7005948698530907e-06, + "loss": 0.0516, + "num_input_tokens_seen": 45188640, + "step": 67055 + }, + { + "epoch": 1.6382869567341753, + "grad_norm": 0.28247717022895813, + "learning_rate": 1.7005340166842866e-06, + "loss": 0.0013, + "num_input_tokens_seen": 45191904, + "step": 67060 + }, + { + "epoch": 1.6384091075660225, + "grad_norm": 8.85312557220459, + "learning_rate": 1.7004731584210102e-06, + "loss": 0.0958, + "num_input_tokens_seen": 45195040, + "step": 67065 + }, + { + "epoch": 1.6385312583978697, + "grad_norm": 1.7231601476669312, + "learning_rate": 1.700412295063705e-06, + "loss": 0.0385, + "num_input_tokens_seen": 45198432, + "step": 67070 + }, + { + "epoch": 1.638653409229717, + "grad_norm": 0.08209192752838135, + "learning_rate": 1.700351426612813e-06, + "loss": 0.066, + "num_input_tokens_seen": 45201504, + "step": 67075 + }, + { + "epoch": 1.638775560061564, + "grad_norm": 22.106382369995117, + "learning_rate": 1.7002905530687767e-06, + "loss": 0.1887, + "num_input_tokens_seen": 45204704, + "step": 67080 + }, + { + "epoch": 1.6388977108934113, + "grad_norm": 0.6420429348945618, + "learning_rate": 1.7002296744320396e-06, + "loss": 0.109, + "num_input_tokens_seen": 45207840, + "step": 67085 + }, + { + "epoch": 1.6390198617252585, + "grad_norm": 0.14940212666988373, + "learning_rate": 1.700168790703044e-06, + "loss": 0.0866, + "num_input_tokens_seen": 45211168, + "step": 67090 + }, + { + "epoch": 1.6391420125571057, + "grad_norm": 0.1484726071357727, + "learning_rate": 1.7001079018822325e-06, + "loss": 0.0852, + "num_input_tokens_seen": 45214752, + "step": 67095 + }, + { + "epoch": 1.6392641633889526, + "grad_norm": 23.14564323425293, + "learning_rate": 1.7000470079700482e-06, + "loss": 0.067, + "num_input_tokens_seen": 45217824, + "step": 67100 + }, + { + "epoch": 1.6393863142207998, + "grad_norm": 0.2891077697277069, + "learning_rate": 1.6999861089669337e-06, + "loss": 0.0357, + "num_input_tokens_seen": 45221408, + "step": 67105 + }, + { + "epoch": 1.639508465052647, + "grad_norm": 0.6765737533569336, + "learning_rate": 1.6999252048733314e-06, + "loss": 0.0021, + "num_input_tokens_seen": 45224672, + "step": 67110 + }, + { + "epoch": 1.6396306158844942, + "grad_norm": 10.747055053710938, + "learning_rate": 1.6998642956896853e-06, + "loss": 0.1089, + "num_input_tokens_seen": 45228128, + "step": 67115 + }, + { + "epoch": 1.6397527667163412, + "grad_norm": 0.43889570236206055, + "learning_rate": 1.699803381416438e-06, + "loss": 0.0386, + "num_input_tokens_seen": 45231840, + "step": 67120 + }, + { + "epoch": 1.6398749175481884, + "grad_norm": 18.788455963134766, + "learning_rate": 1.699742462054032e-06, + "loss": 0.0364, + "num_input_tokens_seen": 45235424, + "step": 67125 + }, + { + "epoch": 1.6399970683800356, + "grad_norm": 0.07699179649353027, + "learning_rate": 1.6996815376029105e-06, + "loss": 0.0412, + "num_input_tokens_seen": 45238752, + "step": 67130 + }, + { + "epoch": 1.6401192192118828, + "grad_norm": 0.29230740666389465, + "learning_rate": 1.6996206080635167e-06, + "loss": 0.0014, + "num_input_tokens_seen": 45242272, + "step": 67135 + }, + { + "epoch": 1.64024137004373, + "grad_norm": 42.41120910644531, + "learning_rate": 1.6995596734362937e-06, + "loss": 0.1312, + "num_input_tokens_seen": 45245600, + "step": 67140 + }, + { + "epoch": 1.6403635208755771, + "grad_norm": 0.2593778073787689, + "learning_rate": 1.6994987337216845e-06, + "loss": 0.1014, + "num_input_tokens_seen": 45249184, + "step": 67145 + }, + { + "epoch": 1.6404856717074243, + "grad_norm": 0.2428825944662094, + "learning_rate": 1.6994377889201328e-06, + "loss": 0.1057, + "num_input_tokens_seen": 45252704, + "step": 67150 + }, + { + "epoch": 1.6406078225392715, + "grad_norm": 22.88860321044922, + "learning_rate": 1.699376839032081e-06, + "loss": 0.1044, + "num_input_tokens_seen": 45256224, + "step": 67155 + }, + { + "epoch": 1.6407299733711187, + "grad_norm": 0.028404124081134796, + "learning_rate": 1.6993158840579728e-06, + "loss": 0.0037, + "num_input_tokens_seen": 45260064, + "step": 67160 + }, + { + "epoch": 1.640852124202966, + "grad_norm": 0.25550132989883423, + "learning_rate": 1.6992549239982515e-06, + "loss": 0.0722, + "num_input_tokens_seen": 45263392, + "step": 67165 + }, + { + "epoch": 1.640974275034813, + "grad_norm": 21.50788116455078, + "learning_rate": 1.6991939588533601e-06, + "loss": 0.159, + "num_input_tokens_seen": 45266208, + "step": 67170 + }, + { + "epoch": 1.6410964258666603, + "grad_norm": 0.05193869397044182, + "learning_rate": 1.6991329886237421e-06, + "loss": 0.1023, + "num_input_tokens_seen": 45269984, + "step": 67175 + }, + { + "epoch": 1.6412185766985075, + "grad_norm": 0.48035043478012085, + "learning_rate": 1.6990720133098412e-06, + "loss": 0.0088, + "num_input_tokens_seen": 45272992, + "step": 67180 + }, + { + "epoch": 1.6413407275303544, + "grad_norm": 0.20229879021644592, + "learning_rate": 1.6990110329121005e-06, + "loss": 0.002, + "num_input_tokens_seen": 45276320, + "step": 67185 + }, + { + "epoch": 1.6414628783622016, + "grad_norm": 0.06353387981653214, + "learning_rate": 1.6989500474309637e-06, + "loss": 0.0477, + "num_input_tokens_seen": 45279520, + "step": 67190 + }, + { + "epoch": 1.6415850291940488, + "grad_norm": 0.2843955159187317, + "learning_rate": 1.6988890568668741e-06, + "loss": 0.1012, + "num_input_tokens_seen": 45282848, + "step": 67195 + }, + { + "epoch": 1.641707180025896, + "grad_norm": 0.19131289422512054, + "learning_rate": 1.6988280612202751e-06, + "loss": 0.0607, + "num_input_tokens_seen": 45285856, + "step": 67200 + }, + { + "epoch": 1.6418293308577432, + "grad_norm": 13.557336807250977, + "learning_rate": 1.6987670604916106e-06, + "loss": 0.302, + "num_input_tokens_seen": 45288864, + "step": 67205 + }, + { + "epoch": 1.6419514816895902, + "grad_norm": 3.8761701583862305, + "learning_rate": 1.6987060546813242e-06, + "loss": 0.1141, + "num_input_tokens_seen": 45292448, + "step": 67210 + }, + { + "epoch": 1.6420736325214373, + "grad_norm": 131.6263885498047, + "learning_rate": 1.6986450437898592e-06, + "loss": 0.1661, + "num_input_tokens_seen": 45295456, + "step": 67215 + }, + { + "epoch": 1.6421957833532845, + "grad_norm": 72.59944915771484, + "learning_rate": 1.6985840278176596e-06, + "loss": 0.0886, + "num_input_tokens_seen": 45298592, + "step": 67220 + }, + { + "epoch": 1.6423179341851317, + "grad_norm": 8.932476997375488, + "learning_rate": 1.6985230067651695e-06, + "loss": 0.2015, + "num_input_tokens_seen": 45301792, + "step": 67225 + }, + { + "epoch": 1.642440085016979, + "grad_norm": 11.90882396697998, + "learning_rate": 1.6984619806328317e-06, + "loss": 0.159, + "num_input_tokens_seen": 45306208, + "step": 67230 + }, + { + "epoch": 1.642562235848826, + "grad_norm": 0.7757220268249512, + "learning_rate": 1.6984009494210904e-06, + "loss": 0.0794, + "num_input_tokens_seen": 45309344, + "step": 67235 + }, + { + "epoch": 1.6426843866806733, + "grad_norm": 0.2992716133594513, + "learning_rate": 1.69833991313039e-06, + "loss": 0.0459, + "num_input_tokens_seen": 45312800, + "step": 67240 + }, + { + "epoch": 1.6428065375125205, + "grad_norm": 97.35224151611328, + "learning_rate": 1.6982788717611735e-06, + "loss": 0.1316, + "num_input_tokens_seen": 45316000, + "step": 67245 + }, + { + "epoch": 1.6429286883443677, + "grad_norm": 0.13269208371639252, + "learning_rate": 1.6982178253138857e-06, + "loss": 0.0026, + "num_input_tokens_seen": 45319392, + "step": 67250 + }, + { + "epoch": 1.6430508391762149, + "grad_norm": 0.40496477484703064, + "learning_rate": 1.6981567737889698e-06, + "loss": 0.0425, + "num_input_tokens_seen": 45323488, + "step": 67255 + }, + { + "epoch": 1.643172990008062, + "grad_norm": 0.16997823119163513, + "learning_rate": 1.6980957171868702e-06, + "loss": 0.0061, + "num_input_tokens_seen": 45326688, + "step": 67260 + }, + { + "epoch": 1.6432951408399092, + "grad_norm": 1.7047306299209595, + "learning_rate": 1.6980346555080306e-06, + "loss": 0.1156, + "num_input_tokens_seen": 45330208, + "step": 67265 + }, + { + "epoch": 1.6434172916717564, + "grad_norm": 0.08190785348415375, + "learning_rate": 1.6979735887528954e-06, + "loss": 0.0174, + "num_input_tokens_seen": 45333536, + "step": 67270 + }, + { + "epoch": 1.6435394425036034, + "grad_norm": 34.33685302734375, + "learning_rate": 1.6979125169219085e-06, + "loss": 0.1974, + "num_input_tokens_seen": 45336928, + "step": 67275 + }, + { + "epoch": 1.6436615933354506, + "grad_norm": 0.03678745776414871, + "learning_rate": 1.6978514400155137e-06, + "loss": 0.0702, + "num_input_tokens_seen": 45340192, + "step": 67280 + }, + { + "epoch": 1.6437837441672978, + "grad_norm": 0.08641359955072403, + "learning_rate": 1.697790358034156e-06, + "loss": 0.0019, + "num_input_tokens_seen": 45343520, + "step": 67285 + }, + { + "epoch": 1.643905894999145, + "grad_norm": 0.09858337789773941, + "learning_rate": 1.6977292709782792e-06, + "loss": 0.0007, + "num_input_tokens_seen": 45346656, + "step": 67290 + }, + { + "epoch": 1.6440280458309922, + "grad_norm": 0.03240935131907463, + "learning_rate": 1.6976681788483268e-06, + "loss": 0.1003, + "num_input_tokens_seen": 45349984, + "step": 67295 + }, + { + "epoch": 1.6441501966628391, + "grad_norm": 0.8038926720619202, + "learning_rate": 1.6976070816447443e-06, + "loss": 0.0783, + "num_input_tokens_seen": 45353248, + "step": 67300 + }, + { + "epoch": 1.6442723474946863, + "grad_norm": 0.22960323095321655, + "learning_rate": 1.6975459793679753e-06, + "loss": 0.0014, + "num_input_tokens_seen": 45356448, + "step": 67305 + }, + { + "epoch": 1.6443944983265335, + "grad_norm": 0.05000200867652893, + "learning_rate": 1.6974848720184647e-06, + "loss": 0.0959, + "num_input_tokens_seen": 45359712, + "step": 67310 + }, + { + "epoch": 1.6445166491583807, + "grad_norm": 177.49916076660156, + "learning_rate": 1.697423759596656e-06, + "loss": 0.15, + "num_input_tokens_seen": 45362912, + "step": 67315 + }, + { + "epoch": 1.6446387999902279, + "grad_norm": 0.19165050983428955, + "learning_rate": 1.6973626421029944e-06, + "loss": 0.0369, + "num_input_tokens_seen": 45366432, + "step": 67320 + }, + { + "epoch": 1.644760950822075, + "grad_norm": 0.19500640034675598, + "learning_rate": 1.697301519537924e-06, + "loss": 0.149, + "num_input_tokens_seen": 45369760, + "step": 67325 + }, + { + "epoch": 1.6448831016539223, + "grad_norm": 25.245399475097656, + "learning_rate": 1.6972403919018895e-06, + "loss": 0.0926, + "num_input_tokens_seen": 45373408, + "step": 67330 + }, + { + "epoch": 1.6450052524857695, + "grad_norm": 0.12566934525966644, + "learning_rate": 1.6971792591953352e-06, + "loss": 0.0958, + "num_input_tokens_seen": 45376864, + "step": 67335 + }, + { + "epoch": 1.6451274033176166, + "grad_norm": 0.1085042655467987, + "learning_rate": 1.6971181214187058e-06, + "loss": 0.0568, + "num_input_tokens_seen": 45380192, + "step": 67340 + }, + { + "epoch": 1.6452495541494638, + "grad_norm": 9.45516586303711, + "learning_rate": 1.697056978572446e-06, + "loss": 0.0958, + "num_input_tokens_seen": 45383072, + "step": 67345 + }, + { + "epoch": 1.645371704981311, + "grad_norm": 19.096742630004883, + "learning_rate": 1.6969958306570002e-06, + "loss": 0.1171, + "num_input_tokens_seen": 45386208, + "step": 67350 + }, + { + "epoch": 1.6454938558131582, + "grad_norm": 27.534833908081055, + "learning_rate": 1.6969346776728134e-06, + "loss": 0.1764, + "num_input_tokens_seen": 45390048, + "step": 67355 + }, + { + "epoch": 1.6456160066450054, + "grad_norm": 12.015131950378418, + "learning_rate": 1.6968735196203303e-06, + "loss": 0.1151, + "num_input_tokens_seen": 45393824, + "step": 67360 + }, + { + "epoch": 1.6457381574768524, + "grad_norm": 3.8008766174316406, + "learning_rate": 1.6968123564999952e-06, + "loss": 0.1273, + "num_input_tokens_seen": 45397088, + "step": 67365 + }, + { + "epoch": 1.6458603083086996, + "grad_norm": 0.44486188888549805, + "learning_rate": 1.6967511883122536e-06, + "loss": 0.1678, + "num_input_tokens_seen": 45400480, + "step": 67370 + }, + { + "epoch": 1.6459824591405467, + "grad_norm": 0.8876047134399414, + "learning_rate": 1.6966900150575498e-06, + "loss": 0.1328, + "num_input_tokens_seen": 45403936, + "step": 67375 + }, + { + "epoch": 1.646104609972394, + "grad_norm": 0.7775227427482605, + "learning_rate": 1.696628836736329e-06, + "loss": 0.0541, + "num_input_tokens_seen": 45407392, + "step": 67380 + }, + { + "epoch": 1.6462267608042411, + "grad_norm": 70.5943374633789, + "learning_rate": 1.6965676533490357e-06, + "loss": 0.0406, + "num_input_tokens_seen": 45411040, + "step": 67385 + }, + { + "epoch": 1.646348911636088, + "grad_norm": 45.5142936706543, + "learning_rate": 1.6965064648961146e-06, + "loss": 0.0364, + "num_input_tokens_seen": 45414240, + "step": 67390 + }, + { + "epoch": 1.6464710624679353, + "grad_norm": 44.99553680419922, + "learning_rate": 1.696445271378012e-06, + "loss": 0.0662, + "num_input_tokens_seen": 45417760, + "step": 67395 + }, + { + "epoch": 1.6465932132997825, + "grad_norm": 0.03843267634510994, + "learning_rate": 1.6963840727951717e-06, + "loss": 0.1032, + "num_input_tokens_seen": 45421024, + "step": 67400 + }, + { + "epoch": 1.6467153641316297, + "grad_norm": 0.28163111209869385, + "learning_rate": 1.6963228691480391e-06, + "loss": 0.0662, + "num_input_tokens_seen": 45424672, + "step": 67405 + }, + { + "epoch": 1.6468375149634769, + "grad_norm": 223.5466766357422, + "learning_rate": 1.6962616604370595e-06, + "loss": 0.2042, + "num_input_tokens_seen": 45427936, + "step": 67410 + }, + { + "epoch": 1.646959665795324, + "grad_norm": 9.289713859558105, + "learning_rate": 1.6962004466626776e-06, + "loss": 0.1869, + "num_input_tokens_seen": 45430816, + "step": 67415 + }, + { + "epoch": 1.6470818166271712, + "grad_norm": 11.627837181091309, + "learning_rate": 1.6961392278253386e-06, + "loss": 0.1323, + "num_input_tokens_seen": 45434336, + "step": 67420 + }, + { + "epoch": 1.6472039674590184, + "grad_norm": 0.8815947771072388, + "learning_rate": 1.6960780039254882e-06, + "loss": 0.084, + "num_input_tokens_seen": 45437664, + "step": 67425 + }, + { + "epoch": 1.6473261182908656, + "grad_norm": 10.567502975463867, + "learning_rate": 1.6960167749635714e-06, + "loss": 0.1477, + "num_input_tokens_seen": 45441056, + "step": 67430 + }, + { + "epoch": 1.6474482691227128, + "grad_norm": 0.7220805883407593, + "learning_rate": 1.6959555409400332e-06, + "loss": 0.0439, + "num_input_tokens_seen": 45444384, + "step": 67435 + }, + { + "epoch": 1.64757041995456, + "grad_norm": 39.08369827270508, + "learning_rate": 1.6958943018553194e-06, + "loss": 0.0756, + "num_input_tokens_seen": 45447712, + "step": 67440 + }, + { + "epoch": 1.6476925707864072, + "grad_norm": 0.0401715449988842, + "learning_rate": 1.695833057709875e-06, + "loss": 0.0039, + "num_input_tokens_seen": 45451232, + "step": 67445 + }, + { + "epoch": 1.6478147216182544, + "grad_norm": 0.2429829090833664, + "learning_rate": 1.6957718085041453e-06, + "loss": 0.0736, + "num_input_tokens_seen": 45454304, + "step": 67450 + }, + { + "epoch": 1.6479368724501013, + "grad_norm": 0.35649529099464417, + "learning_rate": 1.6957105542385758e-06, + "loss": 0.1157, + "num_input_tokens_seen": 45457376, + "step": 67455 + }, + { + "epoch": 1.6480590232819485, + "grad_norm": 180.90599060058594, + "learning_rate": 1.695649294913612e-06, + "loss": 0.0474, + "num_input_tokens_seen": 45460448, + "step": 67460 + }, + { + "epoch": 1.6481811741137957, + "grad_norm": 58.405635833740234, + "learning_rate": 1.6955880305296996e-06, + "loss": 0.2013, + "num_input_tokens_seen": 45463904, + "step": 67465 + }, + { + "epoch": 1.648303324945643, + "grad_norm": 124.4507064819336, + "learning_rate": 1.695526761087284e-06, + "loss": 0.0781, + "num_input_tokens_seen": 45467424, + "step": 67470 + }, + { + "epoch": 1.6484254757774899, + "grad_norm": 0.10375184565782547, + "learning_rate": 1.6954654865868107e-06, + "loss": 0.0341, + "num_input_tokens_seen": 45470816, + "step": 67475 + }, + { + "epoch": 1.648547626609337, + "grad_norm": 20.57008934020996, + "learning_rate": 1.695404207028725e-06, + "loss": 0.0845, + "num_input_tokens_seen": 45474400, + "step": 67480 + }, + { + "epoch": 1.6486697774411843, + "grad_norm": 36.42217254638672, + "learning_rate": 1.6953429224134731e-06, + "loss": 0.2447, + "num_input_tokens_seen": 45478112, + "step": 67485 + }, + { + "epoch": 1.6487919282730314, + "grad_norm": 0.3584747910499573, + "learning_rate": 1.6952816327415004e-06, + "loss": 0.0339, + "num_input_tokens_seen": 45481184, + "step": 67490 + }, + { + "epoch": 1.6489140791048786, + "grad_norm": 0.2505834400653839, + "learning_rate": 1.6952203380132529e-06, + "loss": 0.1602, + "num_input_tokens_seen": 45484704, + "step": 67495 + }, + { + "epoch": 1.6490362299367258, + "grad_norm": 0.3703761100769043, + "learning_rate": 1.6951590382291761e-06, + "loss": 0.0029, + "num_input_tokens_seen": 45488608, + "step": 67500 + }, + { + "epoch": 1.649158380768573, + "grad_norm": 0.5771975517272949, + "learning_rate": 1.6950977333897156e-06, + "loss": 0.1515, + "num_input_tokens_seen": 45491680, + "step": 67505 + }, + { + "epoch": 1.6492805316004202, + "grad_norm": 0.5081115961074829, + "learning_rate": 1.6950364234953173e-06, + "loss": 0.0019, + "num_input_tokens_seen": 45495200, + "step": 67510 + }, + { + "epoch": 1.6494026824322674, + "grad_norm": 0.19448351860046387, + "learning_rate": 1.6949751085464273e-06, + "loss": 0.0996, + "num_input_tokens_seen": 45498336, + "step": 67515 + }, + { + "epoch": 1.6495248332641146, + "grad_norm": 0.21856345236301422, + "learning_rate": 1.6949137885434914e-06, + "loss": 0.0285, + "num_input_tokens_seen": 45501536, + "step": 67520 + }, + { + "epoch": 1.6496469840959618, + "grad_norm": 28.951377868652344, + "learning_rate": 1.6948524634869555e-06, + "loss": 0.1134, + "num_input_tokens_seen": 45505056, + "step": 67525 + }, + { + "epoch": 1.649769134927809, + "grad_norm": 0.367519736289978, + "learning_rate": 1.6947911333772657e-06, + "loss": 0.0424, + "num_input_tokens_seen": 45508256, + "step": 67530 + }, + { + "epoch": 1.6498912857596562, + "grad_norm": 15.366878509521484, + "learning_rate": 1.6947297982148678e-06, + "loss": 0.2835, + "num_input_tokens_seen": 45511456, + "step": 67535 + }, + { + "epoch": 1.6500134365915033, + "grad_norm": 27.542980194091797, + "learning_rate": 1.694668458000208e-06, + "loss": 0.1688, + "num_input_tokens_seen": 45514976, + "step": 67540 + }, + { + "epoch": 1.6501355874233503, + "grad_norm": 0.08444590866565704, + "learning_rate": 1.6946071127337323e-06, + "loss": 0.019, + "num_input_tokens_seen": 45518432, + "step": 67545 + }, + { + "epoch": 1.6502577382551975, + "grad_norm": 0.12355171144008636, + "learning_rate": 1.694545762415887e-06, + "loss": 0.0685, + "num_input_tokens_seen": 45522272, + "step": 67550 + }, + { + "epoch": 1.6503798890870447, + "grad_norm": 0.13620570302009583, + "learning_rate": 1.6944844070471178e-06, + "loss": 0.1469, + "num_input_tokens_seen": 45525792, + "step": 67555 + }, + { + "epoch": 1.6505020399188919, + "grad_norm": 0.13408538699150085, + "learning_rate": 1.6944230466278712e-06, + "loss": 0.1414, + "num_input_tokens_seen": 45529568, + "step": 67560 + }, + { + "epoch": 1.6506241907507389, + "grad_norm": 0.328299343585968, + "learning_rate": 1.6943616811585936e-06, + "loss": 0.0635, + "num_input_tokens_seen": 45532640, + "step": 67565 + }, + { + "epoch": 1.650746341582586, + "grad_norm": 3.6790201663970947, + "learning_rate": 1.6943003106397313e-06, + "loss": 0.0022, + "num_input_tokens_seen": 45536096, + "step": 67570 + }, + { + "epoch": 1.6508684924144332, + "grad_norm": 3.0552306175231934, + "learning_rate": 1.69423893507173e-06, + "loss": 0.0309, + "num_input_tokens_seen": 45539424, + "step": 67575 + }, + { + "epoch": 1.6509906432462804, + "grad_norm": 30.636735916137695, + "learning_rate": 1.6941775544550368e-06, + "loss": 0.1266, + "num_input_tokens_seen": 45542944, + "step": 67580 + }, + { + "epoch": 1.6511127940781276, + "grad_norm": 21.842836380004883, + "learning_rate": 1.6941161687900975e-06, + "loss": 0.0458, + "num_input_tokens_seen": 45546336, + "step": 67585 + }, + { + "epoch": 1.6512349449099748, + "grad_norm": 18.858291625976562, + "learning_rate": 1.694054778077359e-06, + "loss": 0.21, + "num_input_tokens_seen": 45549664, + "step": 67590 + }, + { + "epoch": 1.651357095741822, + "grad_norm": 22.735301971435547, + "learning_rate": 1.693993382317267e-06, + "loss": 0.0357, + "num_input_tokens_seen": 45552608, + "step": 67595 + }, + { + "epoch": 1.6514792465736692, + "grad_norm": 13.76203441619873, + "learning_rate": 1.6939319815102686e-06, + "loss": 0.0687, + "num_input_tokens_seen": 45555616, + "step": 67600 + }, + { + "epoch": 1.6516013974055164, + "grad_norm": 0.1456945240497589, + "learning_rate": 1.6938705756568106e-06, + "loss": 0.0286, + "num_input_tokens_seen": 45559136, + "step": 67605 + }, + { + "epoch": 1.6517235482373636, + "grad_norm": 13.13014030456543, + "learning_rate": 1.6938091647573385e-06, + "loss": 0.1458, + "num_input_tokens_seen": 45562400, + "step": 67610 + }, + { + "epoch": 1.6518456990692107, + "grad_norm": 7.869939804077148, + "learning_rate": 1.6937477488122997e-06, + "loss": 0.0453, + "num_input_tokens_seen": 45565728, + "step": 67615 + }, + { + "epoch": 1.651967849901058, + "grad_norm": 0.9261747598648071, + "learning_rate": 1.693686327822141e-06, + "loss": 0.1273, + "num_input_tokens_seen": 45569184, + "step": 67620 + }, + { + "epoch": 1.6520900007329051, + "grad_norm": 20.965072631835938, + "learning_rate": 1.6936249017873086e-06, + "loss": 0.0947, + "num_input_tokens_seen": 45572384, + "step": 67625 + }, + { + "epoch": 1.6522121515647523, + "grad_norm": 153.28591918945312, + "learning_rate": 1.6935634707082494e-06, + "loss": 0.0361, + "num_input_tokens_seen": 45575776, + "step": 67630 + }, + { + "epoch": 1.6523343023965993, + "grad_norm": 0.2594250440597534, + "learning_rate": 1.69350203458541e-06, + "loss": 0.1171, + "num_input_tokens_seen": 45579232, + "step": 67635 + }, + { + "epoch": 1.6524564532284465, + "grad_norm": 0.5927287340164185, + "learning_rate": 1.6934405934192372e-06, + "loss": 0.0348, + "num_input_tokens_seen": 45582496, + "step": 67640 + }, + { + "epoch": 1.6525786040602937, + "grad_norm": 2.2029905319213867, + "learning_rate": 1.693379147210178e-06, + "loss": 0.0035, + "num_input_tokens_seen": 45586080, + "step": 67645 + }, + { + "epoch": 1.6527007548921409, + "grad_norm": 0.10701204091310501, + "learning_rate": 1.6933176959586792e-06, + "loss": 0.2543, + "num_input_tokens_seen": 45589216, + "step": 67650 + }, + { + "epoch": 1.6528229057239878, + "grad_norm": 0.11217296123504639, + "learning_rate": 1.6932562396651874e-06, + "loss": 0.0465, + "num_input_tokens_seen": 45592288, + "step": 67655 + }, + { + "epoch": 1.652945056555835, + "grad_norm": 0.21580864489078522, + "learning_rate": 1.6931947783301502e-06, + "loss": 0.049, + "num_input_tokens_seen": 45595296, + "step": 67660 + }, + { + "epoch": 1.6530672073876822, + "grad_norm": 0.8896117806434631, + "learning_rate": 1.6931333119540138e-06, + "loss": 0.1545, + "num_input_tokens_seen": 45598688, + "step": 67665 + }, + { + "epoch": 1.6531893582195294, + "grad_norm": 0.05038753151893616, + "learning_rate": 1.6930718405372254e-06, + "loss": 0.1369, + "num_input_tokens_seen": 45601952, + "step": 67670 + }, + { + "epoch": 1.6533115090513766, + "grad_norm": 30.97816276550293, + "learning_rate": 1.6930103640802327e-06, + "loss": 0.1289, + "num_input_tokens_seen": 45605472, + "step": 67675 + }, + { + "epoch": 1.6534336598832238, + "grad_norm": 17.580766677856445, + "learning_rate": 1.6929488825834816e-06, + "loss": 0.194, + "num_input_tokens_seen": 45608480, + "step": 67680 + }, + { + "epoch": 1.653555810715071, + "grad_norm": 0.264511376619339, + "learning_rate": 1.6928873960474204e-06, + "loss": 0.0867, + "num_input_tokens_seen": 45611872, + "step": 67685 + }, + { + "epoch": 1.6536779615469182, + "grad_norm": 0.17803645133972168, + "learning_rate": 1.6928259044724954e-06, + "loss": 0.0908, + "num_input_tokens_seen": 45615136, + "step": 67690 + }, + { + "epoch": 1.6538001123787653, + "grad_norm": 74.42533111572266, + "learning_rate": 1.6927644078591539e-06, + "loss": 0.0776, + "num_input_tokens_seen": 45618848, + "step": 67695 + }, + { + "epoch": 1.6539222632106125, + "grad_norm": 0.20543590188026428, + "learning_rate": 1.6927029062078435e-06, + "loss": 0.0745, + "num_input_tokens_seen": 45622560, + "step": 67700 + }, + { + "epoch": 1.6540444140424597, + "grad_norm": 0.21554772555828094, + "learning_rate": 1.6926413995190112e-06, + "loss": 0.0492, + "num_input_tokens_seen": 45626336, + "step": 67705 + }, + { + "epoch": 1.654166564874307, + "grad_norm": 0.43790730834007263, + "learning_rate": 1.6925798877931046e-06, + "loss": 0.0027, + "num_input_tokens_seen": 45629600, + "step": 67710 + }, + { + "epoch": 1.654288715706154, + "grad_norm": 9.921793937683105, + "learning_rate": 1.6925183710305704e-06, + "loss": 0.1117, + "num_input_tokens_seen": 45632736, + "step": 67715 + }, + { + "epoch": 1.6544108665380013, + "grad_norm": 0.18384996056556702, + "learning_rate": 1.6924568492318566e-06, + "loss": 0.0551, + "num_input_tokens_seen": 45636384, + "step": 67720 + }, + { + "epoch": 1.6545330173698483, + "grad_norm": 16.222341537475586, + "learning_rate": 1.6923953223974103e-06, + "loss": 0.0821, + "num_input_tokens_seen": 45639712, + "step": 67725 + }, + { + "epoch": 1.6546551682016954, + "grad_norm": 0.13005872070789337, + "learning_rate": 1.692333790527679e-06, + "loss": 0.0315, + "num_input_tokens_seen": 45642720, + "step": 67730 + }, + { + "epoch": 1.6547773190335426, + "grad_norm": 0.10399110615253448, + "learning_rate": 1.69227225362311e-06, + "loss": 0.0022, + "num_input_tokens_seen": 45646304, + "step": 67735 + }, + { + "epoch": 1.6548994698653898, + "grad_norm": 11.264019012451172, + "learning_rate": 1.692210711684151e-06, + "loss": 0.1646, + "num_input_tokens_seen": 45649888, + "step": 67740 + }, + { + "epoch": 1.6550216206972368, + "grad_norm": 0.09520275890827179, + "learning_rate": 1.6921491647112497e-06, + "loss": 0.0147, + "num_input_tokens_seen": 45653216, + "step": 67745 + }, + { + "epoch": 1.655143771529084, + "grad_norm": 0.1157083511352539, + "learning_rate": 1.6920876127048534e-06, + "loss": 0.0367, + "num_input_tokens_seen": 45656416, + "step": 67750 + }, + { + "epoch": 1.6552659223609312, + "grad_norm": 0.22962799668312073, + "learning_rate": 1.6920260556654098e-06, + "loss": 0.0492, + "num_input_tokens_seen": 45659360, + "step": 67755 + }, + { + "epoch": 1.6553880731927784, + "grad_norm": 0.038359832018613815, + "learning_rate": 1.6919644935933666e-06, + "loss": 0.0991, + "num_input_tokens_seen": 45662752, + "step": 67760 + }, + { + "epoch": 1.6555102240246256, + "grad_norm": 0.17788651585578918, + "learning_rate": 1.6919029264891713e-06, + "loss": 0.062, + "num_input_tokens_seen": 45666144, + "step": 67765 + }, + { + "epoch": 1.6556323748564727, + "grad_norm": 0.0891755148768425, + "learning_rate": 1.6918413543532722e-06, + "loss": 0.0816, + "num_input_tokens_seen": 45669088, + "step": 67770 + }, + { + "epoch": 1.65575452568832, + "grad_norm": 11.52059268951416, + "learning_rate": 1.6917797771861165e-06, + "loss": 0.0582, + "num_input_tokens_seen": 45672032, + "step": 67775 + }, + { + "epoch": 1.6558766765201671, + "grad_norm": 0.11397583037614822, + "learning_rate": 1.691718194988152e-06, + "loss": 0.0879, + "num_input_tokens_seen": 45675104, + "step": 67780 + }, + { + "epoch": 1.6559988273520143, + "grad_norm": 0.13635925948619843, + "learning_rate": 1.6916566077598272e-06, + "loss": 0.1022, + "num_input_tokens_seen": 45677984, + "step": 67785 + }, + { + "epoch": 1.6561209781838615, + "grad_norm": 0.19257278740406036, + "learning_rate": 1.6915950155015892e-06, + "loss": 0.0397, + "num_input_tokens_seen": 45681376, + "step": 67790 + }, + { + "epoch": 1.6562431290157087, + "grad_norm": 0.24093586206436157, + "learning_rate": 1.6915334182138863e-06, + "loss": 0.001, + "num_input_tokens_seen": 45684512, + "step": 67795 + }, + { + "epoch": 1.6563652798475559, + "grad_norm": 9.37103271484375, + "learning_rate": 1.6914718158971662e-06, + "loss": 0.1561, + "num_input_tokens_seen": 45688096, + "step": 67800 + }, + { + "epoch": 1.656487430679403, + "grad_norm": 0.19032365083694458, + "learning_rate": 1.6914102085518773e-06, + "loss": 0.0419, + "num_input_tokens_seen": 45691040, + "step": 67805 + }, + { + "epoch": 1.65660958151125, + "grad_norm": 229.4463653564453, + "learning_rate": 1.6913485961784672e-06, + "loss": 0.0523, + "num_input_tokens_seen": 45694432, + "step": 67810 + }, + { + "epoch": 1.6567317323430972, + "grad_norm": 8.384881973266602, + "learning_rate": 1.6912869787773842e-06, + "loss": 0.0485, + "num_input_tokens_seen": 45697696, + "step": 67815 + }, + { + "epoch": 1.6568538831749444, + "grad_norm": 0.07127055525779724, + "learning_rate": 1.6912253563490765e-06, + "loss": 0.044, + "num_input_tokens_seen": 45701024, + "step": 67820 + }, + { + "epoch": 1.6569760340067916, + "grad_norm": 15.135320663452148, + "learning_rate": 1.6911637288939922e-06, + "loss": 0.1131, + "num_input_tokens_seen": 45704480, + "step": 67825 + }, + { + "epoch": 1.6570981848386388, + "grad_norm": 26.051050186157227, + "learning_rate": 1.6911020964125791e-06, + "loss": 0.0481, + "num_input_tokens_seen": 45707552, + "step": 67830 + }, + { + "epoch": 1.6572203356704858, + "grad_norm": 0.23497499525547028, + "learning_rate": 1.6910404589052857e-06, + "loss": 0.0008, + "num_input_tokens_seen": 45711072, + "step": 67835 + }, + { + "epoch": 1.657342486502333, + "grad_norm": 0.049059245735406876, + "learning_rate": 1.6909788163725605e-06, + "loss": 0.0647, + "num_input_tokens_seen": 45714400, + "step": 67840 + }, + { + "epoch": 1.6574646373341801, + "grad_norm": 16.081838607788086, + "learning_rate": 1.6909171688148512e-06, + "loss": 0.1721, + "num_input_tokens_seen": 45717728, + "step": 67845 + }, + { + "epoch": 1.6575867881660273, + "grad_norm": 0.1584680676460266, + "learning_rate": 1.6908555162326064e-06, + "loss": 0.0284, + "num_input_tokens_seen": 45721248, + "step": 67850 + }, + { + "epoch": 1.6577089389978745, + "grad_norm": 10.100417137145996, + "learning_rate": 1.6907938586262747e-06, + "loss": 0.0729, + "num_input_tokens_seen": 45724640, + "step": 67855 + }, + { + "epoch": 1.6578310898297217, + "grad_norm": 86.2887954711914, + "learning_rate": 1.690732195996304e-06, + "loss": 0.0603, + "num_input_tokens_seen": 45727840, + "step": 67860 + }, + { + "epoch": 1.657953240661569, + "grad_norm": 0.12551839649677277, + "learning_rate": 1.6906705283431432e-06, + "loss": 0.24, + "num_input_tokens_seen": 45731168, + "step": 67865 + }, + { + "epoch": 1.658075391493416, + "grad_norm": 9.954278945922852, + "learning_rate": 1.6906088556672405e-06, + "loss": 0.0306, + "num_input_tokens_seen": 45734432, + "step": 67870 + }, + { + "epoch": 1.6581975423252633, + "grad_norm": 0.18424488604068756, + "learning_rate": 1.6905471779690443e-06, + "loss": 0.0692, + "num_input_tokens_seen": 45737952, + "step": 67875 + }, + { + "epoch": 1.6583196931571105, + "grad_norm": 1.1578551530838013, + "learning_rate": 1.6904854952490035e-06, + "loss": 0.1682, + "num_input_tokens_seen": 45741216, + "step": 67880 + }, + { + "epoch": 1.6584418439889577, + "grad_norm": 0.32225939631462097, + "learning_rate": 1.6904238075075665e-06, + "loss": 0.075, + "num_input_tokens_seen": 45744864, + "step": 67885 + }, + { + "epoch": 1.6585639948208049, + "grad_norm": 0.1321701854467392, + "learning_rate": 1.6903621147451816e-06, + "loss": 0.0613, + "num_input_tokens_seen": 45747936, + "step": 67890 + }, + { + "epoch": 1.658686145652652, + "grad_norm": 0.5468298196792603, + "learning_rate": 1.6903004169622976e-06, + "loss": 0.0063, + "num_input_tokens_seen": 45751456, + "step": 67895 + }, + { + "epoch": 1.658808296484499, + "grad_norm": 46.02381134033203, + "learning_rate": 1.6902387141593637e-06, + "loss": 0.1953, + "num_input_tokens_seen": 45755104, + "step": 67900 + }, + { + "epoch": 1.6589304473163462, + "grad_norm": 0.20815016329288483, + "learning_rate": 1.6901770063368281e-06, + "loss": 0.089, + "num_input_tokens_seen": 45758816, + "step": 67905 + }, + { + "epoch": 1.6590525981481934, + "grad_norm": 8.63784408569336, + "learning_rate": 1.6901152934951397e-06, + "loss": 0.1403, + "num_input_tokens_seen": 45762272, + "step": 67910 + }, + { + "epoch": 1.6591747489800406, + "grad_norm": 0.018525369465351105, + "learning_rate": 1.6900535756347472e-06, + "loss": 0.1207, + "num_input_tokens_seen": 45765216, + "step": 67915 + }, + { + "epoch": 1.6592968998118878, + "grad_norm": 323.701904296875, + "learning_rate": 1.6899918527560995e-06, + "loss": 0.1671, + "num_input_tokens_seen": 45768672, + "step": 67920 + }, + { + "epoch": 1.6594190506437347, + "grad_norm": 0.3565976619720459, + "learning_rate": 1.6899301248596454e-06, + "loss": 0.0019, + "num_input_tokens_seen": 45772000, + "step": 67925 + }, + { + "epoch": 1.659541201475582, + "grad_norm": 0.3739701807498932, + "learning_rate": 1.6898683919458342e-06, + "loss": 0.1282, + "num_input_tokens_seen": 45775648, + "step": 67930 + }, + { + "epoch": 1.6596633523074291, + "grad_norm": 10.951455116271973, + "learning_rate": 1.689806654015114e-06, + "loss": 0.1585, + "num_input_tokens_seen": 45778912, + "step": 67935 + }, + { + "epoch": 1.6597855031392763, + "grad_norm": 531.7941284179688, + "learning_rate": 1.6897449110679344e-06, + "loss": 0.0189, + "num_input_tokens_seen": 45782304, + "step": 67940 + }, + { + "epoch": 1.6599076539711235, + "grad_norm": 31.341873168945312, + "learning_rate": 1.6896831631047444e-06, + "loss": 0.0686, + "num_input_tokens_seen": 45785632, + "step": 67945 + }, + { + "epoch": 1.6600298048029707, + "grad_norm": 38.764163970947266, + "learning_rate": 1.6896214101259928e-06, + "loss": 0.2004, + "num_input_tokens_seen": 45788640, + "step": 67950 + }, + { + "epoch": 1.6601519556348179, + "grad_norm": 19.04844093322754, + "learning_rate": 1.6895596521321292e-06, + "loss": 0.0817, + "num_input_tokens_seen": 45792160, + "step": 67955 + }, + { + "epoch": 1.660274106466665, + "grad_norm": 43.606266021728516, + "learning_rate": 1.689497889123602e-06, + "loss": 0.2003, + "num_input_tokens_seen": 45795680, + "step": 67960 + }, + { + "epoch": 1.6603962572985123, + "grad_norm": 28.905261993408203, + "learning_rate": 1.6894361211008608e-06, + "loss": 0.0544, + "num_input_tokens_seen": 45798624, + "step": 67965 + }, + { + "epoch": 1.6605184081303594, + "grad_norm": 0.7634138464927673, + "learning_rate": 1.6893743480643546e-06, + "loss": 0.0039, + "num_input_tokens_seen": 45801632, + "step": 67970 + }, + { + "epoch": 1.6606405589622066, + "grad_norm": 0.33794939517974854, + "learning_rate": 1.689312570014533e-06, + "loss": 0.0492, + "num_input_tokens_seen": 45805280, + "step": 67975 + }, + { + "epoch": 1.6607627097940538, + "grad_norm": 17.452777862548828, + "learning_rate": 1.6892507869518447e-06, + "loss": 0.1252, + "num_input_tokens_seen": 45808480, + "step": 67980 + }, + { + "epoch": 1.660884860625901, + "grad_norm": 0.7841615080833435, + "learning_rate": 1.6891889988767392e-06, + "loss": 0.0444, + "num_input_tokens_seen": 45811680, + "step": 67985 + }, + { + "epoch": 1.661007011457748, + "grad_norm": 203.92428588867188, + "learning_rate": 1.6891272057896661e-06, + "loss": 0.0777, + "num_input_tokens_seen": 45814944, + "step": 67990 + }, + { + "epoch": 1.6611291622895952, + "grad_norm": 20.686752319335938, + "learning_rate": 1.689065407691075e-06, + "loss": 0.0692, + "num_input_tokens_seen": 45818272, + "step": 67995 + }, + { + "epoch": 1.6612513131214424, + "grad_norm": 103.55084991455078, + "learning_rate": 1.6890036045814142e-06, + "loss": 0.2293, + "num_input_tokens_seen": 45821472, + "step": 68000 + }, + { + "epoch": 1.6613734639532896, + "grad_norm": 0.2858763039112091, + "learning_rate": 1.6889417964611343e-06, + "loss": 0.0691, + "num_input_tokens_seen": 45824736, + "step": 68005 + }, + { + "epoch": 1.6614956147851365, + "grad_norm": 1.4139119386672974, + "learning_rate": 1.6888799833306842e-06, + "loss": 0.0022, + "num_input_tokens_seen": 45827872, + "step": 68010 + }, + { + "epoch": 1.6616177656169837, + "grad_norm": 0.013525686226785183, + "learning_rate": 1.6888181651905136e-06, + "loss": 0.0388, + "num_input_tokens_seen": 45831456, + "step": 68015 + }, + { + "epoch": 1.661739916448831, + "grad_norm": 44.468807220458984, + "learning_rate": 1.688756342041072e-06, + "loss": 0.0825, + "num_input_tokens_seen": 45835616, + "step": 68020 + }, + { + "epoch": 1.661862067280678, + "grad_norm": 0.33210596442222595, + "learning_rate": 1.688694513882809e-06, + "loss": 0.0442, + "num_input_tokens_seen": 45839776, + "step": 68025 + }, + { + "epoch": 1.6619842181125253, + "grad_norm": 0.23905229568481445, + "learning_rate": 1.6886326807161746e-06, + "loss": 0.0012, + "num_input_tokens_seen": 45843680, + "step": 68030 + }, + { + "epoch": 1.6621063689443725, + "grad_norm": 0.04572295397520065, + "learning_rate": 1.6885708425416178e-06, + "loss": 0.1546, + "num_input_tokens_seen": 45846688, + "step": 68035 + }, + { + "epoch": 1.6622285197762197, + "grad_norm": 20.764741897583008, + "learning_rate": 1.688508999359589e-06, + "loss": 0.0749, + "num_input_tokens_seen": 45850272, + "step": 68040 + }, + { + "epoch": 1.6623506706080668, + "grad_norm": 17.399734497070312, + "learning_rate": 1.688447151170537e-06, + "loss": 0.1484, + "num_input_tokens_seen": 45853728, + "step": 68045 + }, + { + "epoch": 1.662472821439914, + "grad_norm": 0.7581236958503723, + "learning_rate": 1.6883852979749124e-06, + "loss": 0.0673, + "num_input_tokens_seen": 45856864, + "step": 68050 + }, + { + "epoch": 1.6625949722717612, + "grad_norm": 1.076304316520691, + "learning_rate": 1.6883234397731647e-06, + "loss": 0.0156, + "num_input_tokens_seen": 45860192, + "step": 68055 + }, + { + "epoch": 1.6627171231036084, + "grad_norm": 2.357579231262207, + "learning_rate": 1.688261576565744e-06, + "loss": 0.0551, + "num_input_tokens_seen": 45863584, + "step": 68060 + }, + { + "epoch": 1.6628392739354556, + "grad_norm": 0.12220649421215057, + "learning_rate": 1.6881997083530999e-06, + "loss": 0.0017, + "num_input_tokens_seen": 45866976, + "step": 68065 + }, + { + "epoch": 1.6629614247673028, + "grad_norm": 0.12620511651039124, + "learning_rate": 1.6881378351356825e-06, + "loss": 0.0667, + "num_input_tokens_seen": 45870240, + "step": 68070 + }, + { + "epoch": 1.66308357559915, + "grad_norm": 0.01762893982231617, + "learning_rate": 1.6880759569139414e-06, + "loss": 0.0887, + "num_input_tokens_seen": 45873696, + "step": 68075 + }, + { + "epoch": 1.663205726430997, + "grad_norm": 0.046484898775815964, + "learning_rate": 1.688014073688327e-06, + "loss": 0.1637, + "num_input_tokens_seen": 45877536, + "step": 68080 + }, + { + "epoch": 1.6633278772628441, + "grad_norm": 0.13141871988773346, + "learning_rate": 1.6879521854592893e-06, + "loss": 0.0667, + "num_input_tokens_seen": 45881120, + "step": 68085 + }, + { + "epoch": 1.6634500280946913, + "grad_norm": 14.069496154785156, + "learning_rate": 1.6878902922272781e-06, + "loss": 0.0385, + "num_input_tokens_seen": 45884576, + "step": 68090 + }, + { + "epoch": 1.6635721789265385, + "grad_norm": 0.33704790472984314, + "learning_rate": 1.687828393992744e-06, + "loss": 0.0879, + "num_input_tokens_seen": 45887776, + "step": 68095 + }, + { + "epoch": 1.6636943297583855, + "grad_norm": 17.2185115814209, + "learning_rate": 1.6877664907561367e-06, + "loss": 0.18, + "num_input_tokens_seen": 45891104, + "step": 68100 + }, + { + "epoch": 1.6638164805902327, + "grad_norm": 0.07757803797721863, + "learning_rate": 1.6877045825179063e-06, + "loss": 0.0007, + "num_input_tokens_seen": 45894880, + "step": 68105 + }, + { + "epoch": 1.6639386314220799, + "grad_norm": 191.504150390625, + "learning_rate": 1.6876426692785032e-06, + "loss": 0.0989, + "num_input_tokens_seen": 45898592, + "step": 68110 + }, + { + "epoch": 1.664060782253927, + "grad_norm": 0.2756825089454651, + "learning_rate": 1.6875807510383777e-06, + "loss": 0.0914, + "num_input_tokens_seen": 45901920, + "step": 68115 + }, + { + "epoch": 1.6641829330857743, + "grad_norm": 0.012194006703794003, + "learning_rate": 1.6875188277979802e-06, + "loss": 0.0416, + "num_input_tokens_seen": 45905312, + "step": 68120 + }, + { + "epoch": 1.6643050839176214, + "grad_norm": 0.0791403129696846, + "learning_rate": 1.6874568995577608e-06, + "loss": 0.1037, + "num_input_tokens_seen": 45908448, + "step": 68125 + }, + { + "epoch": 1.6644272347494686, + "grad_norm": 8.660093307495117, + "learning_rate": 1.6873949663181698e-06, + "loss": 0.1828, + "num_input_tokens_seen": 45911392, + "step": 68130 + }, + { + "epoch": 1.6645493855813158, + "grad_norm": 35.485111236572266, + "learning_rate": 1.6873330280796578e-06, + "loss": 0.1889, + "num_input_tokens_seen": 45914656, + "step": 68135 + }, + { + "epoch": 1.664671536413163, + "grad_norm": 0.08751770853996277, + "learning_rate": 1.6872710848426752e-06, + "loss": 0.1231, + "num_input_tokens_seen": 45918240, + "step": 68140 + }, + { + "epoch": 1.6647936872450102, + "grad_norm": 0.339227557182312, + "learning_rate": 1.6872091366076725e-06, + "loss": 0.0487, + "num_input_tokens_seen": 45922016, + "step": 68145 + }, + { + "epoch": 1.6649158380768574, + "grad_norm": 0.7971973419189453, + "learning_rate": 1.6871471833751e-06, + "loss": 0.1104, + "num_input_tokens_seen": 45924960, + "step": 68150 + }, + { + "epoch": 1.6650379889087046, + "grad_norm": 12.896892547607422, + "learning_rate": 1.6870852251454082e-06, + "loss": 0.1008, + "num_input_tokens_seen": 45928608, + "step": 68155 + }, + { + "epoch": 1.6651601397405518, + "grad_norm": 0.27142223715782166, + "learning_rate": 1.687023261919048e-06, + "loss": 0.0479, + "num_input_tokens_seen": 45932448, + "step": 68160 + }, + { + "epoch": 1.665282290572399, + "grad_norm": 0.1016705185174942, + "learning_rate": 1.6869612936964699e-06, + "loss": 0.0546, + "num_input_tokens_seen": 45935712, + "step": 68165 + }, + { + "epoch": 1.665404441404246, + "grad_norm": 3.339890480041504, + "learning_rate": 1.6868993204781242e-06, + "loss": 0.0772, + "num_input_tokens_seen": 45938912, + "step": 68170 + }, + { + "epoch": 1.6655265922360931, + "grad_norm": 8.35922622680664, + "learning_rate": 1.6868373422644623e-06, + "loss": 0.095, + "num_input_tokens_seen": 45942560, + "step": 68175 + }, + { + "epoch": 1.6656487430679403, + "grad_norm": 22.813560485839844, + "learning_rate": 1.6867753590559346e-06, + "loss": 0.092, + "num_input_tokens_seen": 45945632, + "step": 68180 + }, + { + "epoch": 1.6657708938997875, + "grad_norm": 0.4119601249694824, + "learning_rate": 1.6867133708529915e-06, + "loss": 0.1687, + "num_input_tokens_seen": 45949152, + "step": 68185 + }, + { + "epoch": 1.6658930447316345, + "grad_norm": 7.913878440856934, + "learning_rate": 1.686651377656084e-06, + "loss": 0.1097, + "num_input_tokens_seen": 45952800, + "step": 68190 + }, + { + "epoch": 1.6660151955634817, + "grad_norm": 0.6191280484199524, + "learning_rate": 1.6865893794656631e-06, + "loss": 0.1661, + "num_input_tokens_seen": 45955808, + "step": 68195 + }, + { + "epoch": 1.6661373463953288, + "grad_norm": 1.6392009258270264, + "learning_rate": 1.6865273762821794e-06, + "loss": 0.0467, + "num_input_tokens_seen": 45959648, + "step": 68200 + }, + { + "epoch": 1.666259497227176, + "grad_norm": 0.5763128995895386, + "learning_rate": 1.6864653681060841e-06, + "loss": 0.0033, + "num_input_tokens_seen": 45963040, + "step": 68205 + }, + { + "epoch": 1.6663816480590232, + "grad_norm": 0.14941108226776123, + "learning_rate": 1.686403354937828e-06, + "loss": 0.1121, + "num_input_tokens_seen": 45966432, + "step": 68210 + }, + { + "epoch": 1.6665037988908704, + "grad_norm": 0.26819905638694763, + "learning_rate": 1.6863413367778622e-06, + "loss": 0.0397, + "num_input_tokens_seen": 45969568, + "step": 68215 + }, + { + "epoch": 1.6666259497227176, + "grad_norm": 0.2349829375743866, + "learning_rate": 1.6862793136266376e-06, + "loss": 0.1143, + "num_input_tokens_seen": 45972640, + "step": 68220 + }, + { + "epoch": 1.6667481005545648, + "grad_norm": 0.09882470965385437, + "learning_rate": 1.686217285484605e-06, + "loss": 0.0426, + "num_input_tokens_seen": 45976352, + "step": 68225 + }, + { + "epoch": 1.666870251386412, + "grad_norm": 18.198345184326172, + "learning_rate": 1.6861552523522157e-06, + "loss": 0.0423, + "num_input_tokens_seen": 45979680, + "step": 68230 + }, + { + "epoch": 1.6669924022182592, + "grad_norm": 101.0886459350586, + "learning_rate": 1.6860932142299212e-06, + "loss": 0.0519, + "num_input_tokens_seen": 45983072, + "step": 68235 + }, + { + "epoch": 1.6671145530501064, + "grad_norm": 18.422719955444336, + "learning_rate": 1.6860311711181722e-06, + "loss": 0.1198, + "num_input_tokens_seen": 45986400, + "step": 68240 + }, + { + "epoch": 1.6672367038819536, + "grad_norm": 0.1812196522951126, + "learning_rate": 1.6859691230174198e-06, + "loss": 0.0444, + "num_input_tokens_seen": 45989408, + "step": 68245 + }, + { + "epoch": 1.6673588547138007, + "grad_norm": 0.20358893275260925, + "learning_rate": 1.6859070699281155e-06, + "loss": 0.0011, + "num_input_tokens_seen": 45993184, + "step": 68250 + }, + { + "epoch": 1.667481005545648, + "grad_norm": 20.3399658203125, + "learning_rate": 1.6858450118507107e-06, + "loss": 0.0943, + "num_input_tokens_seen": 45996832, + "step": 68255 + }, + { + "epoch": 1.667603156377495, + "grad_norm": 0.04567122459411621, + "learning_rate": 1.6857829487856563e-06, + "loss": 0.0301, + "num_input_tokens_seen": 46000352, + "step": 68260 + }, + { + "epoch": 1.667725307209342, + "grad_norm": 63.65852737426758, + "learning_rate": 1.6857208807334038e-06, + "loss": 0.0598, + "num_input_tokens_seen": 46003424, + "step": 68265 + }, + { + "epoch": 1.6678474580411893, + "grad_norm": 0.3260882794857025, + "learning_rate": 1.6856588076944048e-06, + "loss": 0.0956, + "num_input_tokens_seen": 46006816, + "step": 68270 + }, + { + "epoch": 1.6679696088730365, + "grad_norm": 26.87339210510254, + "learning_rate": 1.6855967296691104e-06, + "loss": 0.0745, + "num_input_tokens_seen": 46009888, + "step": 68275 + }, + { + "epoch": 1.6680917597048834, + "grad_norm": 0.2553943395614624, + "learning_rate": 1.6855346466579725e-06, + "loss": 0.0364, + "num_input_tokens_seen": 46013792, + "step": 68280 + }, + { + "epoch": 1.6682139105367306, + "grad_norm": 0.247576043009758, + "learning_rate": 1.6854725586614419e-06, + "loss": 0.0034, + "num_input_tokens_seen": 46017120, + "step": 68285 + }, + { + "epoch": 1.6683360613685778, + "grad_norm": 0.5242090225219727, + "learning_rate": 1.6854104656799707e-06, + "loss": 0.0019, + "num_input_tokens_seen": 46020256, + "step": 68290 + }, + { + "epoch": 1.668458212200425, + "grad_norm": 2.042462110519409, + "learning_rate": 1.6853483677140098e-06, + "loss": 0.0043, + "num_input_tokens_seen": 46023904, + "step": 68295 + }, + { + "epoch": 1.6685803630322722, + "grad_norm": 0.25644490122795105, + "learning_rate": 1.6852862647640116e-06, + "loss": 0.1332, + "num_input_tokens_seen": 46027232, + "step": 68300 + }, + { + "epoch": 1.6687025138641194, + "grad_norm": 0.1754622459411621, + "learning_rate": 1.6852241568304274e-06, + "loss": 0.046, + "num_input_tokens_seen": 46030560, + "step": 68305 + }, + { + "epoch": 1.6688246646959666, + "grad_norm": 16.908388137817383, + "learning_rate": 1.6851620439137087e-06, + "loss": 0.0841, + "num_input_tokens_seen": 46034208, + "step": 68310 + }, + { + "epoch": 1.6689468155278138, + "grad_norm": 0.1532597690820694, + "learning_rate": 1.6850999260143076e-06, + "loss": 0.0816, + "num_input_tokens_seen": 46037536, + "step": 68315 + }, + { + "epoch": 1.669068966359661, + "grad_norm": 0.12587259709835052, + "learning_rate": 1.6850378031326752e-06, + "loss": 0.0013, + "num_input_tokens_seen": 46040992, + "step": 68320 + }, + { + "epoch": 1.6691911171915081, + "grad_norm": 10.260220527648926, + "learning_rate": 1.6849756752692636e-06, + "loss": 0.0786, + "num_input_tokens_seen": 46044384, + "step": 68325 + }, + { + "epoch": 1.6693132680233553, + "grad_norm": 0.05229521170258522, + "learning_rate": 1.684913542424525e-06, + "loss": 0.067, + "num_input_tokens_seen": 46047904, + "step": 68330 + }, + { + "epoch": 1.6694354188552025, + "grad_norm": 0.24571794271469116, + "learning_rate": 1.6848514045989108e-06, + "loss": 0.0374, + "num_input_tokens_seen": 46051040, + "step": 68335 + }, + { + "epoch": 1.6695575696870497, + "grad_norm": 12.776507377624512, + "learning_rate": 1.6847892617928729e-06, + "loss": 0.1782, + "num_input_tokens_seen": 46054432, + "step": 68340 + }, + { + "epoch": 1.6696797205188967, + "grad_norm": 57.233673095703125, + "learning_rate": 1.6847271140068633e-06, + "loss": 0.175, + "num_input_tokens_seen": 46058080, + "step": 68345 + }, + { + "epoch": 1.6698018713507439, + "grad_norm": 123.28487396240234, + "learning_rate": 1.684664961241334e-06, + "loss": 0.0545, + "num_input_tokens_seen": 46061280, + "step": 68350 + }, + { + "epoch": 1.669924022182591, + "grad_norm": 0.09082359075546265, + "learning_rate": 1.684602803496737e-06, + "loss": 0.2261, + "num_input_tokens_seen": 46064864, + "step": 68355 + }, + { + "epoch": 1.6700461730144383, + "grad_norm": 0.5304872393608093, + "learning_rate": 1.684540640773524e-06, + "loss": 0.0986, + "num_input_tokens_seen": 46068000, + "step": 68360 + }, + { + "epoch": 1.6701683238462854, + "grad_norm": 0.37393248081207275, + "learning_rate": 1.6844784730721476e-06, + "loss": 0.0021, + "num_input_tokens_seen": 46071712, + "step": 68365 + }, + { + "epoch": 1.6702904746781324, + "grad_norm": 268.9071350097656, + "learning_rate": 1.6844163003930599e-06, + "loss": 0.0439, + "num_input_tokens_seen": 46075296, + "step": 68370 + }, + { + "epoch": 1.6704126255099796, + "grad_norm": 0.05923445522785187, + "learning_rate": 1.6843541227367121e-06, + "loss": 0.0014, + "num_input_tokens_seen": 46078624, + "step": 68375 + }, + { + "epoch": 1.6705347763418268, + "grad_norm": 0.42169424891471863, + "learning_rate": 1.6842919401035575e-06, + "loss": 0.1804, + "num_input_tokens_seen": 46081696, + "step": 68380 + }, + { + "epoch": 1.670656927173674, + "grad_norm": 6.835901260375977, + "learning_rate": 1.6842297524940477e-06, + "loss": 0.112, + "num_input_tokens_seen": 46085216, + "step": 68385 + }, + { + "epoch": 1.6707790780055212, + "grad_norm": 0.5587440133094788, + "learning_rate": 1.6841675599086354e-06, + "loss": 0.0388, + "num_input_tokens_seen": 46088608, + "step": 68390 + }, + { + "epoch": 1.6709012288373684, + "grad_norm": 0.23931415379047394, + "learning_rate": 1.6841053623477723e-06, + "loss": 0.1337, + "num_input_tokens_seen": 46091616, + "step": 68395 + }, + { + "epoch": 1.6710233796692155, + "grad_norm": 0.36907368898391724, + "learning_rate": 1.6840431598119112e-06, + "loss": 0.0462, + "num_input_tokens_seen": 46096864, + "step": 68400 + }, + { + "epoch": 1.6711455305010627, + "grad_norm": 0.272148460149765, + "learning_rate": 1.683980952301504e-06, + "loss": 0.0923, + "num_input_tokens_seen": 46100448, + "step": 68405 + }, + { + "epoch": 1.67126768133291, + "grad_norm": 0.0677834302186966, + "learning_rate": 1.6839187398170033e-06, + "loss": 0.0824, + "num_input_tokens_seen": 46103584, + "step": 68410 + }, + { + "epoch": 1.6713898321647571, + "grad_norm": 0.13646988570690155, + "learning_rate": 1.683856522358862e-06, + "loss": 0.0901, + "num_input_tokens_seen": 46106592, + "step": 68415 + }, + { + "epoch": 1.6715119829966043, + "grad_norm": 0.26839596033096313, + "learning_rate": 1.6837942999275318e-06, + "loss": 0.0017, + "num_input_tokens_seen": 46109920, + "step": 68420 + }, + { + "epoch": 1.6716341338284515, + "grad_norm": 0.13362592458724976, + "learning_rate": 1.6837320725234657e-06, + "loss": 0.0331, + "num_input_tokens_seen": 46113248, + "step": 68425 + }, + { + "epoch": 1.6717562846602987, + "grad_norm": 38.74449157714844, + "learning_rate": 1.6836698401471158e-06, + "loss": 0.0887, + "num_input_tokens_seen": 46116896, + "step": 68430 + }, + { + "epoch": 1.6718784354921457, + "grad_norm": 293.8094482421875, + "learning_rate": 1.6836076027989351e-06, + "loss": 0.0547, + "num_input_tokens_seen": 46120224, + "step": 68435 + }, + { + "epoch": 1.6720005863239928, + "grad_norm": 10.379058837890625, + "learning_rate": 1.683545360479376e-06, + "loss": 0.0575, + "num_input_tokens_seen": 46123616, + "step": 68440 + }, + { + "epoch": 1.67212273715584, + "grad_norm": 0.09068046510219574, + "learning_rate": 1.6834831131888914e-06, + "loss": 0.0708, + "num_input_tokens_seen": 46126560, + "step": 68445 + }, + { + "epoch": 1.6722448879876872, + "grad_norm": 1.8615421056747437, + "learning_rate": 1.6834208609279336e-06, + "loss": 0.026, + "num_input_tokens_seen": 46129888, + "step": 68450 + }, + { + "epoch": 1.6723670388195344, + "grad_norm": 16.04012107849121, + "learning_rate": 1.6833586036969556e-06, + "loss": 0.1095, + "num_input_tokens_seen": 46133664, + "step": 68455 + }, + { + "epoch": 1.6724891896513814, + "grad_norm": 0.149423286318779, + "learning_rate": 1.6832963414964098e-06, + "loss": 0.0005, + "num_input_tokens_seen": 46136608, + "step": 68460 + }, + { + "epoch": 1.6726113404832286, + "grad_norm": 0.07547707855701447, + "learning_rate": 1.6832340743267493e-06, + "loss": 0.0005, + "num_input_tokens_seen": 46139872, + "step": 68465 + }, + { + "epoch": 1.6727334913150758, + "grad_norm": 33.1000862121582, + "learning_rate": 1.683171802188427e-06, + "loss": 0.1449, + "num_input_tokens_seen": 46143200, + "step": 68470 + }, + { + "epoch": 1.672855642146923, + "grad_norm": 0.10011862218379974, + "learning_rate": 1.6831095250818956e-06, + "loss": 0.0376, + "num_input_tokens_seen": 46146720, + "step": 68475 + }, + { + "epoch": 1.6729777929787701, + "grad_norm": 25.451860427856445, + "learning_rate": 1.6830472430076076e-06, + "loss": 0.1879, + "num_input_tokens_seen": 46149984, + "step": 68480 + }, + { + "epoch": 1.6730999438106173, + "grad_norm": 0.10208665579557419, + "learning_rate": 1.6829849559660167e-06, + "loss": 0.0003, + "num_input_tokens_seen": 46153632, + "step": 68485 + }, + { + "epoch": 1.6732220946424645, + "grad_norm": 0.20292365550994873, + "learning_rate": 1.6829226639575756e-06, + "loss": 0.0528, + "num_input_tokens_seen": 46156768, + "step": 68490 + }, + { + "epoch": 1.6733442454743117, + "grad_norm": 0.09113001823425293, + "learning_rate": 1.6828603669827368e-06, + "loss": 0.0007, + "num_input_tokens_seen": 46159968, + "step": 68495 + }, + { + "epoch": 1.673466396306159, + "grad_norm": 20.79863739013672, + "learning_rate": 1.682798065041954e-06, + "loss": 0.114, + "num_input_tokens_seen": 46164320, + "step": 68500 + }, + { + "epoch": 1.673588547138006, + "grad_norm": 0.12156138569116592, + "learning_rate": 1.68273575813568e-06, + "loss": 0.1651, + "num_input_tokens_seen": 46167968, + "step": 68505 + }, + { + "epoch": 1.6737106979698533, + "grad_norm": 0.05434705317020416, + "learning_rate": 1.682673446264368e-06, + "loss": 0.1471, + "num_input_tokens_seen": 46170976, + "step": 68510 + }, + { + "epoch": 1.6738328488017005, + "grad_norm": 71.84229278564453, + "learning_rate": 1.682611129428471e-06, + "loss": 0.0332, + "num_input_tokens_seen": 46174432, + "step": 68515 + }, + { + "epoch": 1.6739549996335477, + "grad_norm": 53.085418701171875, + "learning_rate": 1.6825488076284424e-06, + "loss": 0.0901, + "num_input_tokens_seen": 46177888, + "step": 68520 + }, + { + "epoch": 1.6740771504653946, + "grad_norm": 0.19759757816791534, + "learning_rate": 1.682486480864735e-06, + "loss": 0.0046, + "num_input_tokens_seen": 46181408, + "step": 68525 + }, + { + "epoch": 1.6741993012972418, + "grad_norm": 14.114509582519531, + "learning_rate": 1.6824241491378025e-06, + "loss": 0.1017, + "num_input_tokens_seen": 46184480, + "step": 68530 + }, + { + "epoch": 1.674321452129089, + "grad_norm": 0.03609864413738251, + "learning_rate": 1.6823618124480984e-06, + "loss": 0.0012, + "num_input_tokens_seen": 46187616, + "step": 68535 + }, + { + "epoch": 1.6744436029609362, + "grad_norm": 15.648370742797852, + "learning_rate": 1.682299470796075e-06, + "loss": 0.0433, + "num_input_tokens_seen": 46191008, + "step": 68540 + }, + { + "epoch": 1.6745657537927832, + "grad_norm": 2.687039613723755, + "learning_rate": 1.6822371241821864e-06, + "loss": 0.0571, + "num_input_tokens_seen": 46194592, + "step": 68545 + }, + { + "epoch": 1.6746879046246304, + "grad_norm": 0.2621817886829376, + "learning_rate": 1.6821747726068865e-06, + "loss": 0.1668, + "num_input_tokens_seen": 46197856, + "step": 68550 + }, + { + "epoch": 1.6748100554564775, + "grad_norm": 0.1592407524585724, + "learning_rate": 1.6821124160706276e-06, + "loss": 0.0703, + "num_input_tokens_seen": 46202080, + "step": 68555 + }, + { + "epoch": 1.6749322062883247, + "grad_norm": 0.02570989541709423, + "learning_rate": 1.6820500545738642e-06, + "loss": 0.0012, + "num_input_tokens_seen": 46205600, + "step": 68560 + }, + { + "epoch": 1.675054357120172, + "grad_norm": 0.15985862910747528, + "learning_rate": 1.6819876881170491e-06, + "loss": 0.0521, + "num_input_tokens_seen": 46209248, + "step": 68565 + }, + { + "epoch": 1.6751765079520191, + "grad_norm": 0.19796176254749298, + "learning_rate": 1.6819253167006359e-06, + "loss": 0.0331, + "num_input_tokens_seen": 46212448, + "step": 68570 + }, + { + "epoch": 1.6752986587838663, + "grad_norm": 0.055551838129758835, + "learning_rate": 1.6818629403250787e-06, + "loss": 0.0933, + "num_input_tokens_seen": 46215392, + "step": 68575 + }, + { + "epoch": 1.6754208096157135, + "grad_norm": 0.15789180994033813, + "learning_rate": 1.6818005589908308e-06, + "loss": 0.0253, + "num_input_tokens_seen": 46218464, + "step": 68580 + }, + { + "epoch": 1.6755429604475607, + "grad_norm": 8.036543846130371, + "learning_rate": 1.681738172698346e-06, + "loss": 0.1125, + "num_input_tokens_seen": 46221984, + "step": 68585 + }, + { + "epoch": 1.6756651112794079, + "grad_norm": 0.08947142213582993, + "learning_rate": 1.6816757814480775e-06, + "loss": 0.0936, + "num_input_tokens_seen": 46224928, + "step": 68590 + }, + { + "epoch": 1.675787262111255, + "grad_norm": 6.953040599822998, + "learning_rate": 1.6816133852404795e-06, + "loss": 0.0543, + "num_input_tokens_seen": 46228192, + "step": 68595 + }, + { + "epoch": 1.6759094129431023, + "grad_norm": 0.3942214548587799, + "learning_rate": 1.6815509840760055e-06, + "loss": 0.0585, + "num_input_tokens_seen": 46231712, + "step": 68600 + }, + { + "epoch": 1.6760315637749494, + "grad_norm": 19.978500366210938, + "learning_rate": 1.6814885779551096e-06, + "loss": 0.1653, + "num_input_tokens_seen": 46234848, + "step": 68605 + }, + { + "epoch": 1.6761537146067966, + "grad_norm": 0.3641318380832672, + "learning_rate": 1.6814261668782454e-06, + "loss": 0.111, + "num_input_tokens_seen": 46238752, + "step": 68610 + }, + { + "epoch": 1.6762758654386436, + "grad_norm": 0.5056087970733643, + "learning_rate": 1.681363750845867e-06, + "loss": 0.115, + "num_input_tokens_seen": 46241952, + "step": 68615 + }, + { + "epoch": 1.6763980162704908, + "grad_norm": 205.91683959960938, + "learning_rate": 1.681301329858428e-06, + "loss": 0.0271, + "num_input_tokens_seen": 46245408, + "step": 68620 + }, + { + "epoch": 1.676520167102338, + "grad_norm": 21.606521606445312, + "learning_rate": 1.6812389039163824e-06, + "loss": 0.1538, + "num_input_tokens_seen": 46249184, + "step": 68625 + }, + { + "epoch": 1.6766423179341852, + "grad_norm": 0.33925703167915344, + "learning_rate": 1.6811764730201844e-06, + "loss": 0.121, + "num_input_tokens_seen": 46252512, + "step": 68630 + }, + { + "epoch": 1.6767644687660321, + "grad_norm": 8.783360481262207, + "learning_rate": 1.6811140371702876e-06, + "loss": 0.2316, + "num_input_tokens_seen": 46255648, + "step": 68635 + }, + { + "epoch": 1.6768866195978793, + "grad_norm": 34.9462890625, + "learning_rate": 1.6810515963671465e-06, + "loss": 0.1531, + "num_input_tokens_seen": 46259104, + "step": 68640 + }, + { + "epoch": 1.6770087704297265, + "grad_norm": 18.75974464416504, + "learning_rate": 1.680989150611215e-06, + "loss": 0.1858, + "num_input_tokens_seen": 46262368, + "step": 68645 + }, + { + "epoch": 1.6771309212615737, + "grad_norm": 26.14114761352539, + "learning_rate": 1.6809266999029475e-06, + "loss": 0.0424, + "num_input_tokens_seen": 46265184, + "step": 68650 + }, + { + "epoch": 1.677253072093421, + "grad_norm": 4.71269416809082, + "learning_rate": 1.6808642442427975e-06, + "loss": 0.072, + "num_input_tokens_seen": 46268576, + "step": 68655 + }, + { + "epoch": 1.677375222925268, + "grad_norm": 0.7655762434005737, + "learning_rate": 1.6808017836312198e-06, + "loss": 0.0042, + "num_input_tokens_seen": 46271840, + "step": 68660 + }, + { + "epoch": 1.6774973737571153, + "grad_norm": 0.3685450553894043, + "learning_rate": 1.6807393180686683e-06, + "loss": 0.1212, + "num_input_tokens_seen": 46274976, + "step": 68665 + }, + { + "epoch": 1.6776195245889625, + "grad_norm": 0.5734411478042603, + "learning_rate": 1.6806768475555973e-06, + "loss": 0.1753, + "num_input_tokens_seen": 46277856, + "step": 68670 + }, + { + "epoch": 1.6777416754208097, + "grad_norm": 8.355131149291992, + "learning_rate": 1.6806143720924616e-06, + "loss": 0.2848, + "num_input_tokens_seen": 46281696, + "step": 68675 + }, + { + "epoch": 1.6778638262526568, + "grad_norm": 0.27669835090637207, + "learning_rate": 1.6805518916797149e-06, + "loss": 0.1093, + "num_input_tokens_seen": 46284960, + "step": 68680 + }, + { + "epoch": 1.677985977084504, + "grad_norm": 0.5218971967697144, + "learning_rate": 1.6804894063178114e-06, + "loss": 0.0304, + "num_input_tokens_seen": 46288288, + "step": 68685 + }, + { + "epoch": 1.6781081279163512, + "grad_norm": 24.64486312866211, + "learning_rate": 1.6804269160072064e-06, + "loss": 0.048, + "num_input_tokens_seen": 46291616, + "step": 68690 + }, + { + "epoch": 1.6782302787481984, + "grad_norm": 18.304548263549805, + "learning_rate": 1.6803644207483535e-06, + "loss": 0.143, + "num_input_tokens_seen": 46294624, + "step": 68695 + }, + { + "epoch": 1.6783524295800456, + "grad_norm": 0.279045432806015, + "learning_rate": 1.6803019205417076e-06, + "loss": 0.0056, + "num_input_tokens_seen": 46298080, + "step": 68700 + }, + { + "epoch": 1.6784745804118926, + "grad_norm": 0.15253521502017975, + "learning_rate": 1.6802394153877236e-06, + "loss": 0.1018, + "num_input_tokens_seen": 46301472, + "step": 68705 + }, + { + "epoch": 1.6785967312437398, + "grad_norm": 71.8766098022461, + "learning_rate": 1.6801769052868553e-06, + "loss": 0.1373, + "num_input_tokens_seen": 46304544, + "step": 68710 + }, + { + "epoch": 1.678718882075587, + "grad_norm": 7.808777332305908, + "learning_rate": 1.6801143902395576e-06, + "loss": 0.0873, + "num_input_tokens_seen": 46307616, + "step": 68715 + }, + { + "epoch": 1.6788410329074341, + "grad_norm": 0.20652924478054047, + "learning_rate": 1.6800518702462851e-06, + "loss": 0.038, + "num_input_tokens_seen": 46311264, + "step": 68720 + }, + { + "epoch": 1.678963183739281, + "grad_norm": 0.9998967051506042, + "learning_rate": 1.6799893453074924e-06, + "loss": 0.0029, + "num_input_tokens_seen": 46314656, + "step": 68725 + }, + { + "epoch": 1.6790853345711283, + "grad_norm": 1.2813893556594849, + "learning_rate": 1.6799268154236346e-06, + "loss": 0.0564, + "num_input_tokens_seen": 46317856, + "step": 68730 + }, + { + "epoch": 1.6792074854029755, + "grad_norm": 0.016885561868548393, + "learning_rate": 1.679864280595166e-06, + "loss": 0.0444, + "num_input_tokens_seen": 46321248, + "step": 68735 + }, + { + "epoch": 1.6793296362348227, + "grad_norm": 38.7974739074707, + "learning_rate": 1.6798017408225414e-06, + "loss": 0.1281, + "num_input_tokens_seen": 46324256, + "step": 68740 + }, + { + "epoch": 1.6794517870666699, + "grad_norm": 0.31258314847946167, + "learning_rate": 1.6797391961062157e-06, + "loss": 0.0977, + "num_input_tokens_seen": 46327712, + "step": 68745 + }, + { + "epoch": 1.679573937898517, + "grad_norm": 0.10686086118221283, + "learning_rate": 1.6796766464466436e-06, + "loss": 0.1268, + "num_input_tokens_seen": 46330976, + "step": 68750 + }, + { + "epoch": 1.6796960887303642, + "grad_norm": 1.2706555128097534, + "learning_rate": 1.6796140918442803e-06, + "loss": 0.0773, + "num_input_tokens_seen": 46334624, + "step": 68755 + }, + { + "epoch": 1.6798182395622114, + "grad_norm": 0.030615871772170067, + "learning_rate": 1.6795515322995804e-06, + "loss": 0.0809, + "num_input_tokens_seen": 46338144, + "step": 68760 + }, + { + "epoch": 1.6799403903940586, + "grad_norm": 30.333724975585938, + "learning_rate": 1.679488967812999e-06, + "loss": 0.1333, + "num_input_tokens_seen": 46341216, + "step": 68765 + }, + { + "epoch": 1.6800625412259058, + "grad_norm": 5.713530540466309, + "learning_rate": 1.6794263983849913e-06, + "loss": 0.1215, + "num_input_tokens_seen": 46344800, + "step": 68770 + }, + { + "epoch": 1.680184692057753, + "grad_norm": 47.80161666870117, + "learning_rate": 1.6793638240160117e-06, + "loss": 0.0794, + "num_input_tokens_seen": 46347872, + "step": 68775 + }, + { + "epoch": 1.6803068428896002, + "grad_norm": 9.830896377563477, + "learning_rate": 1.679301244706516e-06, + "loss": 0.1, + "num_input_tokens_seen": 46351264, + "step": 68780 + }, + { + "epoch": 1.6804289937214474, + "grad_norm": 9.594369888305664, + "learning_rate": 1.6792386604569588e-06, + "loss": 0.1632, + "num_input_tokens_seen": 46354592, + "step": 68785 + }, + { + "epoch": 1.6805511445532946, + "grad_norm": 0.4086272716522217, + "learning_rate": 1.6791760712677955e-06, + "loss": 0.0026, + "num_input_tokens_seen": 46357856, + "step": 68790 + }, + { + "epoch": 1.6806732953851415, + "grad_norm": 0.39797380566596985, + "learning_rate": 1.6791134771394807e-06, + "loss": 0.0588, + "num_input_tokens_seen": 46360928, + "step": 68795 + }, + { + "epoch": 1.6807954462169887, + "grad_norm": 8.402929306030273, + "learning_rate": 1.6790508780724705e-06, + "loss": 0.105, + "num_input_tokens_seen": 46364192, + "step": 68800 + }, + { + "epoch": 1.680917597048836, + "grad_norm": 212.35462951660156, + "learning_rate": 1.6789882740672194e-06, + "loss": 0.0054, + "num_input_tokens_seen": 46367072, + "step": 68805 + }, + { + "epoch": 1.681039747880683, + "grad_norm": 128.37506103515625, + "learning_rate": 1.6789256651241832e-06, + "loss": 0.056, + "num_input_tokens_seen": 46370208, + "step": 68810 + }, + { + "epoch": 1.68116189871253, + "grad_norm": 0.21154679358005524, + "learning_rate": 1.6788630512438168e-06, + "loss": 0.0143, + "num_input_tokens_seen": 46374112, + "step": 68815 + }, + { + "epoch": 1.6812840495443773, + "grad_norm": 27.879682540893555, + "learning_rate": 1.6788004324265757e-06, + "loss": 0.0289, + "num_input_tokens_seen": 46377120, + "step": 68820 + }, + { + "epoch": 1.6814062003762245, + "grad_norm": 0.17257338762283325, + "learning_rate": 1.6787378086729152e-06, + "loss": 0.1079, + "num_input_tokens_seen": 46380256, + "step": 68825 + }, + { + "epoch": 1.6815283512080716, + "grad_norm": 0.2479773759841919, + "learning_rate": 1.678675179983291e-06, + "loss": 0.0743, + "num_input_tokens_seen": 46383392, + "step": 68830 + }, + { + "epoch": 1.6816505020399188, + "grad_norm": 191.01602172851562, + "learning_rate": 1.6786125463581585e-06, + "loss": 0.1769, + "num_input_tokens_seen": 46386592, + "step": 68835 + }, + { + "epoch": 1.681772652871766, + "grad_norm": 18.10806655883789, + "learning_rate": 1.6785499077979726e-06, + "loss": 0.0417, + "num_input_tokens_seen": 46389856, + "step": 68840 + }, + { + "epoch": 1.6818948037036132, + "grad_norm": 0.41562801599502563, + "learning_rate": 1.6784872643031896e-06, + "loss": 0.1182, + "num_input_tokens_seen": 46393376, + "step": 68845 + }, + { + "epoch": 1.6820169545354604, + "grad_norm": 0.07078684866428375, + "learning_rate": 1.6784246158742643e-06, + "loss": 0.0869, + "num_input_tokens_seen": 46396512, + "step": 68850 + }, + { + "epoch": 1.6821391053673076, + "grad_norm": 0.12547147274017334, + "learning_rate": 1.678361962511653e-06, + "loss": 0.0886, + "num_input_tokens_seen": 46399776, + "step": 68855 + }, + { + "epoch": 1.6822612561991548, + "grad_norm": 0.07189838588237762, + "learning_rate": 1.6782993042158112e-06, + "loss": 0.0896, + "num_input_tokens_seen": 46403104, + "step": 68860 + }, + { + "epoch": 1.682383407031002, + "grad_norm": 10.442744255065918, + "learning_rate": 1.678236640987194e-06, + "loss": 0.1263, + "num_input_tokens_seen": 46406368, + "step": 68865 + }, + { + "epoch": 1.6825055578628492, + "grad_norm": 9.556235313415527, + "learning_rate": 1.6781739728262579e-06, + "loss": 0.1121, + "num_input_tokens_seen": 46409632, + "step": 68870 + }, + { + "epoch": 1.6826277086946964, + "grad_norm": 0.1713194102048874, + "learning_rate": 1.6781112997334582e-06, + "loss": 0.0018, + "num_input_tokens_seen": 46413344, + "step": 68875 + }, + { + "epoch": 1.6827498595265433, + "grad_norm": 14.221380233764648, + "learning_rate": 1.6780486217092507e-06, + "loss": 0.1148, + "num_input_tokens_seen": 46416672, + "step": 68880 + }, + { + "epoch": 1.6828720103583905, + "grad_norm": 0.2144031971693039, + "learning_rate": 1.677985938754091e-06, + "loss": 0.0273, + "num_input_tokens_seen": 46420000, + "step": 68885 + }, + { + "epoch": 1.6829941611902377, + "grad_norm": 0.7320120334625244, + "learning_rate": 1.6779232508684355e-06, + "loss": 0.0751, + "num_input_tokens_seen": 46423840, + "step": 68890 + }, + { + "epoch": 1.683116312022085, + "grad_norm": 19.1189022064209, + "learning_rate": 1.6778605580527398e-06, + "loss": 0.1406, + "num_input_tokens_seen": 46427552, + "step": 68895 + }, + { + "epoch": 1.683238462853932, + "grad_norm": 0.19422592222690582, + "learning_rate": 1.6777978603074595e-06, + "loss": 0.1186, + "num_input_tokens_seen": 46431008, + "step": 68900 + }, + { + "epoch": 1.683360613685779, + "grad_norm": 22.207813262939453, + "learning_rate": 1.6777351576330512e-06, + "loss": 0.1768, + "num_input_tokens_seen": 46434400, + "step": 68905 + }, + { + "epoch": 1.6834827645176262, + "grad_norm": 8.345748901367188, + "learning_rate": 1.6776724500299704e-06, + "loss": 0.0808, + "num_input_tokens_seen": 46437536, + "step": 68910 + }, + { + "epoch": 1.6836049153494734, + "grad_norm": 1.5099958181381226, + "learning_rate": 1.6776097374986732e-06, + "loss": 0.0464, + "num_input_tokens_seen": 46440608, + "step": 68915 + }, + { + "epoch": 1.6837270661813206, + "grad_norm": 0.21554777026176453, + "learning_rate": 1.6775470200396159e-06, + "loss": 0.1128, + "num_input_tokens_seen": 46443872, + "step": 68920 + }, + { + "epoch": 1.6838492170131678, + "grad_norm": 1.5570076704025269, + "learning_rate": 1.6774842976532542e-06, + "loss": 0.1164, + "num_input_tokens_seen": 46447072, + "step": 68925 + }, + { + "epoch": 1.683971367845015, + "grad_norm": 0.06811830401420593, + "learning_rate": 1.6774215703400447e-06, + "loss": 0.0688, + "num_input_tokens_seen": 46450656, + "step": 68930 + }, + { + "epoch": 1.6840935186768622, + "grad_norm": 24.41603660583496, + "learning_rate": 1.677358838100443e-06, + "loss": 0.1232, + "num_input_tokens_seen": 46454048, + "step": 68935 + }, + { + "epoch": 1.6842156695087094, + "grad_norm": 10.065936088562012, + "learning_rate": 1.6772961009349063e-06, + "loss": 0.1119, + "num_input_tokens_seen": 46457440, + "step": 68940 + }, + { + "epoch": 1.6843378203405566, + "grad_norm": 1.9582791328430176, + "learning_rate": 1.6772333588438893e-06, + "loss": 0.0454, + "num_input_tokens_seen": 46461024, + "step": 68945 + }, + { + "epoch": 1.6844599711724038, + "grad_norm": 22.67094612121582, + "learning_rate": 1.67717061182785e-06, + "loss": 0.1713, + "num_input_tokens_seen": 46464096, + "step": 68950 + }, + { + "epoch": 1.684582122004251, + "grad_norm": 20.608978271484375, + "learning_rate": 1.6771078598872435e-06, + "loss": 0.0426, + "num_input_tokens_seen": 46467488, + "step": 68955 + }, + { + "epoch": 1.6847042728360981, + "grad_norm": 0.4787451922893524, + "learning_rate": 1.6770451030225267e-06, + "loss": 0.0021, + "num_input_tokens_seen": 46471200, + "step": 68960 + }, + { + "epoch": 1.6848264236679453, + "grad_norm": 0.26517850160598755, + "learning_rate": 1.6769823412341553e-06, + "loss": 0.0053, + "num_input_tokens_seen": 46474592, + "step": 68965 + }, + { + "epoch": 1.6849485744997923, + "grad_norm": 58.144775390625, + "learning_rate": 1.6769195745225866e-06, + "loss": 0.0448, + "num_input_tokens_seen": 46478048, + "step": 68970 + }, + { + "epoch": 1.6850707253316395, + "grad_norm": 4.901430606842041, + "learning_rate": 1.6768568028882767e-06, + "loss": 0.0328, + "num_input_tokens_seen": 46482080, + "step": 68975 + }, + { + "epoch": 1.6851928761634867, + "grad_norm": 0.4926772117614746, + "learning_rate": 1.6767940263316817e-06, + "loss": 0.0858, + "num_input_tokens_seen": 46485472, + "step": 68980 + }, + { + "epoch": 1.6853150269953339, + "grad_norm": 0.352978378534317, + "learning_rate": 1.676731244853259e-06, + "loss": 0.0376, + "num_input_tokens_seen": 46488736, + "step": 68985 + }, + { + "epoch": 1.685437177827181, + "grad_norm": 12.455333709716797, + "learning_rate": 1.6766684584534647e-06, + "loss": 0.0457, + "num_input_tokens_seen": 46492320, + "step": 68990 + }, + { + "epoch": 1.685559328659028, + "grad_norm": 25.381471633911133, + "learning_rate": 1.6766056671327551e-06, + "loss": 0.0196, + "num_input_tokens_seen": 46495520, + "step": 68995 + }, + { + "epoch": 1.6856814794908752, + "grad_norm": 19.42300033569336, + "learning_rate": 1.6765428708915871e-06, + "loss": 0.1296, + "num_input_tokens_seen": 46498848, + "step": 69000 + }, + { + "epoch": 1.6858036303227224, + "grad_norm": 0.03760528564453125, + "learning_rate": 1.6764800697304172e-06, + "loss": 0.0889, + "num_input_tokens_seen": 46502496, + "step": 69005 + }, + { + "epoch": 1.6859257811545696, + "grad_norm": 14.339239120483398, + "learning_rate": 1.6764172636497026e-06, + "loss": 0.1051, + "num_input_tokens_seen": 46506336, + "step": 69010 + }, + { + "epoch": 1.6860479319864168, + "grad_norm": 0.08472023904323578, + "learning_rate": 1.6763544526499e-06, + "loss": 0.0354, + "num_input_tokens_seen": 46509856, + "step": 69015 + }, + { + "epoch": 1.686170082818264, + "grad_norm": 12.50338363647461, + "learning_rate": 1.6762916367314651e-06, + "loss": 0.1313, + "num_input_tokens_seen": 46513120, + "step": 69020 + }, + { + "epoch": 1.6862922336501112, + "grad_norm": 0.1388133466243744, + "learning_rate": 1.6762288158948562e-06, + "loss": 0.0722, + "num_input_tokens_seen": 46516704, + "step": 69025 + }, + { + "epoch": 1.6864143844819584, + "grad_norm": 0.10646776854991913, + "learning_rate": 1.6761659901405291e-06, + "loss": 0.0932, + "num_input_tokens_seen": 46520288, + "step": 69030 + }, + { + "epoch": 1.6865365353138055, + "grad_norm": 35.494781494140625, + "learning_rate": 1.6761031594689414e-06, + "loss": 0.1082, + "num_input_tokens_seen": 46523552, + "step": 69035 + }, + { + "epoch": 1.6866586861456527, + "grad_norm": 1.7615808248519897, + "learning_rate": 1.6760403238805494e-06, + "loss": 0.0745, + "num_input_tokens_seen": 46527200, + "step": 69040 + }, + { + "epoch": 1.6867808369775, + "grad_norm": 0.13375285267829895, + "learning_rate": 1.6759774833758104e-06, + "loss": 0.0354, + "num_input_tokens_seen": 46530528, + "step": 69045 + }, + { + "epoch": 1.686902987809347, + "grad_norm": 0.13198687136173248, + "learning_rate": 1.6759146379551812e-06, + "loss": 0.0381, + "num_input_tokens_seen": 46533856, + "step": 69050 + }, + { + "epoch": 1.6870251386411943, + "grad_norm": 0.4579293727874756, + "learning_rate": 1.675851787619119e-06, + "loss": 0.0202, + "num_input_tokens_seen": 46537184, + "step": 69055 + }, + { + "epoch": 1.6871472894730413, + "grad_norm": 16.71668815612793, + "learning_rate": 1.6757889323680811e-06, + "loss": 0.0344, + "num_input_tokens_seen": 46540640, + "step": 69060 + }, + { + "epoch": 1.6872694403048885, + "grad_norm": 11.475341796875, + "learning_rate": 1.675726072202524e-06, + "loss": 0.1578, + "num_input_tokens_seen": 46544096, + "step": 69065 + }, + { + "epoch": 1.6873915911367356, + "grad_norm": 25.129030227661133, + "learning_rate": 1.6756632071229053e-06, + "loss": 0.1053, + "num_input_tokens_seen": 46547168, + "step": 69070 + }, + { + "epoch": 1.6875137419685828, + "grad_norm": 0.5593408942222595, + "learning_rate": 1.6756003371296822e-06, + "loss": 0.0037, + "num_input_tokens_seen": 46550624, + "step": 69075 + }, + { + "epoch": 1.68763589280043, + "grad_norm": 0.16857105493545532, + "learning_rate": 1.6755374622233114e-06, + "loss": 0.1352, + "num_input_tokens_seen": 46554848, + "step": 69080 + }, + { + "epoch": 1.687758043632277, + "grad_norm": 0.42032769322395325, + "learning_rate": 1.6754745824042505e-06, + "loss": 0.078, + "num_input_tokens_seen": 46558112, + "step": 69085 + }, + { + "epoch": 1.6878801944641242, + "grad_norm": 43.47393798828125, + "learning_rate": 1.675411697672957e-06, + "loss": 0.0805, + "num_input_tokens_seen": 46561056, + "step": 69090 + }, + { + "epoch": 1.6880023452959714, + "grad_norm": 0.16293494403362274, + "learning_rate": 1.6753488080298877e-06, + "loss": 0.0913, + "num_input_tokens_seen": 46564128, + "step": 69095 + }, + { + "epoch": 1.6881244961278186, + "grad_norm": 0.2841865122318268, + "learning_rate": 1.6752859134755003e-06, + "loss": 0.1935, + "num_input_tokens_seen": 46567584, + "step": 69100 + }, + { + "epoch": 1.6882466469596658, + "grad_norm": 0.5506531596183777, + "learning_rate": 1.6752230140102522e-06, + "loss": 0.0271, + "num_input_tokens_seen": 46571104, + "step": 69105 + }, + { + "epoch": 1.688368797791513, + "grad_norm": 0.11144746840000153, + "learning_rate": 1.6751601096346006e-06, + "loss": 0.0652, + "num_input_tokens_seen": 46574624, + "step": 69110 + }, + { + "epoch": 1.6884909486233601, + "grad_norm": 0.2003175914287567, + "learning_rate": 1.675097200349003e-06, + "loss": 0.0684, + "num_input_tokens_seen": 46578144, + "step": 69115 + }, + { + "epoch": 1.6886130994552073, + "grad_norm": 0.19120670855045319, + "learning_rate": 1.6750342861539174e-06, + "loss": 0.0776, + "num_input_tokens_seen": 46581600, + "step": 69120 + }, + { + "epoch": 1.6887352502870545, + "grad_norm": 0.9610084295272827, + "learning_rate": 1.6749713670498007e-06, + "loss": 0.0025, + "num_input_tokens_seen": 46584480, + "step": 69125 + }, + { + "epoch": 1.6888574011189017, + "grad_norm": 10.52871322631836, + "learning_rate": 1.6749084430371103e-06, + "loss": 0.0582, + "num_input_tokens_seen": 46588192, + "step": 69130 + }, + { + "epoch": 1.688979551950749, + "grad_norm": 69.1805648803711, + "learning_rate": 1.6748455141163048e-06, + "loss": 0.0433, + "num_input_tokens_seen": 46591712, + "step": 69135 + }, + { + "epoch": 1.689101702782596, + "grad_norm": 0.05372300744056702, + "learning_rate": 1.6747825802878408e-06, + "loss": 0.0615, + "num_input_tokens_seen": 46594912, + "step": 69140 + }, + { + "epoch": 1.6892238536144433, + "grad_norm": 0.12427949160337448, + "learning_rate": 1.6747196415521768e-06, + "loss": 0.0818, + "num_input_tokens_seen": 46598432, + "step": 69145 + }, + { + "epoch": 1.6893460044462902, + "grad_norm": 0.06056433171033859, + "learning_rate": 1.6746566979097697e-06, + "loss": 0.1306, + "num_input_tokens_seen": 46601504, + "step": 69150 + }, + { + "epoch": 1.6894681552781374, + "grad_norm": 23.436250686645508, + "learning_rate": 1.6745937493610776e-06, + "loss": 0.0447, + "num_input_tokens_seen": 46605024, + "step": 69155 + }, + { + "epoch": 1.6895903061099846, + "grad_norm": 1.130778193473816, + "learning_rate": 1.6745307959065584e-06, + "loss": 0.002, + "num_input_tokens_seen": 46608416, + "step": 69160 + }, + { + "epoch": 1.6897124569418318, + "grad_norm": 15.070996284484863, + "learning_rate": 1.6744678375466697e-06, + "loss": 0.1776, + "num_input_tokens_seen": 46612256, + "step": 69165 + }, + { + "epoch": 1.6898346077736788, + "grad_norm": 0.3131832480430603, + "learning_rate": 1.6744048742818698e-06, + "loss": 0.0016, + "num_input_tokens_seen": 46615776, + "step": 69170 + }, + { + "epoch": 1.689956758605526, + "grad_norm": 0.5031534433364868, + "learning_rate": 1.674341906112616e-06, + "loss": 0.1415, + "num_input_tokens_seen": 46618912, + "step": 69175 + }, + { + "epoch": 1.6900789094373732, + "grad_norm": 0.9567481279373169, + "learning_rate": 1.6742789330393668e-06, + "loss": 0.0341, + "num_input_tokens_seen": 46622496, + "step": 69180 + }, + { + "epoch": 1.6902010602692203, + "grad_norm": 19.70442008972168, + "learning_rate": 1.6742159550625794e-06, + "loss": 0.0437, + "num_input_tokens_seen": 46625696, + "step": 69185 + }, + { + "epoch": 1.6903232111010675, + "grad_norm": 27.411558151245117, + "learning_rate": 1.6741529721827123e-06, + "loss": 0.1078, + "num_input_tokens_seen": 46629088, + "step": 69190 + }, + { + "epoch": 1.6904453619329147, + "grad_norm": 51.215877532958984, + "learning_rate": 1.6740899844002238e-06, + "loss": 0.3351, + "num_input_tokens_seen": 46632416, + "step": 69195 + }, + { + "epoch": 1.690567512764762, + "grad_norm": 0.056414637714624405, + "learning_rate": 1.6740269917155715e-06, + "loss": 0.1243, + "num_input_tokens_seen": 46635872, + "step": 69200 + }, + { + "epoch": 1.690689663596609, + "grad_norm": 43.76165771484375, + "learning_rate": 1.6739639941292134e-06, + "loss": 0.1398, + "num_input_tokens_seen": 46639264, + "step": 69205 + }, + { + "epoch": 1.6908118144284563, + "grad_norm": 0.24457961320877075, + "learning_rate": 1.673900991641608e-06, + "loss": 0.1354, + "num_input_tokens_seen": 46642720, + "step": 69210 + }, + { + "epoch": 1.6909339652603035, + "grad_norm": 0.11181172728538513, + "learning_rate": 1.6738379842532134e-06, + "loss": 0.1438, + "num_input_tokens_seen": 46646368, + "step": 69215 + }, + { + "epoch": 1.6910561160921507, + "grad_norm": 0.517985999584198, + "learning_rate": 1.6737749719644877e-06, + "loss": 0.0384, + "num_input_tokens_seen": 46650208, + "step": 69220 + }, + { + "epoch": 1.6911782669239979, + "grad_norm": 64.62832641601562, + "learning_rate": 1.673711954775889e-06, + "loss": 0.1072, + "num_input_tokens_seen": 46653216, + "step": 69225 + }, + { + "epoch": 1.691300417755845, + "grad_norm": 0.5788198113441467, + "learning_rate": 1.673648932687876e-06, + "loss": 0.1493, + "num_input_tokens_seen": 46656544, + "step": 69230 + }, + { + "epoch": 1.6914225685876922, + "grad_norm": 0.27993226051330566, + "learning_rate": 1.6735859057009068e-06, + "loss": 0.0677, + "num_input_tokens_seen": 46659808, + "step": 69235 + }, + { + "epoch": 1.6915447194195392, + "grad_norm": 48.72805404663086, + "learning_rate": 1.6735228738154397e-06, + "loss": 0.1851, + "num_input_tokens_seen": 46663008, + "step": 69240 + }, + { + "epoch": 1.6916668702513864, + "grad_norm": 104.7187728881836, + "learning_rate": 1.6734598370319328e-06, + "loss": 0.0098, + "num_input_tokens_seen": 46666144, + "step": 69245 + }, + { + "epoch": 1.6917890210832336, + "grad_norm": 0.23837001621723175, + "learning_rate": 1.673396795350845e-06, + "loss": 0.1005, + "num_input_tokens_seen": 46669984, + "step": 69250 + }, + { + "epoch": 1.6919111719150808, + "grad_norm": 78.72986602783203, + "learning_rate": 1.6733337487726346e-06, + "loss": 0.0059, + "num_input_tokens_seen": 46673248, + "step": 69255 + }, + { + "epoch": 1.6920333227469277, + "grad_norm": 0.32147687673568726, + "learning_rate": 1.67327069729776e-06, + "loss": 0.0398, + "num_input_tokens_seen": 46676640, + "step": 69260 + }, + { + "epoch": 1.692155473578775, + "grad_norm": 0.1801147311925888, + "learning_rate": 1.6732076409266802e-06, + "loss": 0.0572, + "num_input_tokens_seen": 46680352, + "step": 69265 + }, + { + "epoch": 1.6922776244106221, + "grad_norm": 40.23493576049805, + "learning_rate": 1.673144579659853e-06, + "loss": 0.095, + "num_input_tokens_seen": 46684192, + "step": 69270 + }, + { + "epoch": 1.6923997752424693, + "grad_norm": 0.26882249116897583, + "learning_rate": 1.6730815134977374e-06, + "loss": 0.0931, + "num_input_tokens_seen": 46687200, + "step": 69275 + }, + { + "epoch": 1.6925219260743165, + "grad_norm": 25.242671966552734, + "learning_rate": 1.6730184424407922e-06, + "loss": 0.0697, + "num_input_tokens_seen": 46690912, + "step": 69280 + }, + { + "epoch": 1.6926440769061637, + "grad_norm": 34.11570739746094, + "learning_rate": 1.6729553664894756e-06, + "loss": 0.0688, + "num_input_tokens_seen": 46694304, + "step": 69285 + }, + { + "epoch": 1.6927662277380109, + "grad_norm": 15.972042083740234, + "learning_rate": 1.6728922856442465e-06, + "loss": 0.0371, + "num_input_tokens_seen": 46697632, + "step": 69290 + }, + { + "epoch": 1.692888378569858, + "grad_norm": 0.31541693210601807, + "learning_rate": 1.672829199905564e-06, + "loss": 0.0344, + "num_input_tokens_seen": 46700960, + "step": 69295 + }, + { + "epoch": 1.6930105294017053, + "grad_norm": 22.75095558166504, + "learning_rate": 1.6727661092738865e-06, + "loss": 0.1221, + "num_input_tokens_seen": 46704032, + "step": 69300 + }, + { + "epoch": 1.6931326802335525, + "grad_norm": 21.048616409301758, + "learning_rate": 1.6727030137496728e-06, + "loss": 0.0472, + "num_input_tokens_seen": 46707424, + "step": 69305 + }, + { + "epoch": 1.6932548310653996, + "grad_norm": 74.95085144042969, + "learning_rate": 1.672639913333382e-06, + "loss": 0.1567, + "num_input_tokens_seen": 46710624, + "step": 69310 + }, + { + "epoch": 1.6933769818972468, + "grad_norm": 0.23408760130405426, + "learning_rate": 1.6725768080254726e-06, + "loss": 0.0425, + "num_input_tokens_seen": 46714208, + "step": 69315 + }, + { + "epoch": 1.693499132729094, + "grad_norm": 0.7480437755584717, + "learning_rate": 1.6725136978264038e-06, + "loss": 0.0015, + "num_input_tokens_seen": 46717536, + "step": 69320 + }, + { + "epoch": 1.6936212835609412, + "grad_norm": 0.23599670827388763, + "learning_rate": 1.6724505827366349e-06, + "loss": 0.0483, + "num_input_tokens_seen": 46720736, + "step": 69325 + }, + { + "epoch": 1.6937434343927882, + "grad_norm": 0.039702676236629486, + "learning_rate": 1.6723874627566242e-06, + "loss": 0.0668, + "num_input_tokens_seen": 46724192, + "step": 69330 + }, + { + "epoch": 1.6938655852246354, + "grad_norm": 1.4262231588363647, + "learning_rate": 1.672324337886831e-06, + "loss": 0.0038, + "num_input_tokens_seen": 46727904, + "step": 69335 + }, + { + "epoch": 1.6939877360564826, + "grad_norm": 0.09134913980960846, + "learning_rate": 1.6722612081277143e-06, + "loss": 0.1054, + "num_input_tokens_seen": 46730912, + "step": 69340 + }, + { + "epoch": 1.6941098868883298, + "grad_norm": 0.017633700743317604, + "learning_rate": 1.6721980734797334e-06, + "loss": 0.0426, + "num_input_tokens_seen": 46734048, + "step": 69345 + }, + { + "epoch": 1.6942320377201767, + "grad_norm": 0.31877022981643677, + "learning_rate": 1.6721349339433472e-06, + "loss": 0.0033, + "num_input_tokens_seen": 46737184, + "step": 69350 + }, + { + "epoch": 1.694354188552024, + "grad_norm": 18.597759246826172, + "learning_rate": 1.672071789519015e-06, + "loss": 0.0835, + "num_input_tokens_seen": 46740384, + "step": 69355 + }, + { + "epoch": 1.694476339383871, + "grad_norm": 71.34232330322266, + "learning_rate": 1.672008640207196e-06, + "loss": 0.1602, + "num_input_tokens_seen": 46743328, + "step": 69360 + }, + { + "epoch": 1.6945984902157183, + "grad_norm": 0.0754992663860321, + "learning_rate": 1.6719454860083495e-06, + "loss": 0.0006, + "num_input_tokens_seen": 46746464, + "step": 69365 + }, + { + "epoch": 1.6947206410475655, + "grad_norm": 20.865156173706055, + "learning_rate": 1.6718823269229348e-06, + "loss": 0.1186, + "num_input_tokens_seen": 46749536, + "step": 69370 + }, + { + "epoch": 1.6948427918794127, + "grad_norm": 0.1701429784297943, + "learning_rate": 1.6718191629514112e-06, + "loss": 0.0287, + "num_input_tokens_seen": 46752800, + "step": 69375 + }, + { + "epoch": 1.6949649427112599, + "grad_norm": 0.1040308028459549, + "learning_rate": 1.6717559940942373e-06, + "loss": 0.0619, + "num_input_tokens_seen": 46756256, + "step": 69380 + }, + { + "epoch": 1.695087093543107, + "grad_norm": 1.0910602807998657, + "learning_rate": 1.6716928203518736e-06, + "loss": 0.1257, + "num_input_tokens_seen": 46760096, + "step": 69385 + }, + { + "epoch": 1.6952092443749542, + "grad_norm": 0.39105257391929626, + "learning_rate": 1.671629641724779e-06, + "loss": 0.1073, + "num_input_tokens_seen": 46763616, + "step": 69390 + }, + { + "epoch": 1.6953313952068014, + "grad_norm": 0.020331593230366707, + "learning_rate": 1.671566458213413e-06, + "loss": 0.0021, + "num_input_tokens_seen": 46767328, + "step": 69395 + }, + { + "epoch": 1.6954535460386486, + "grad_norm": 1.1094465255737305, + "learning_rate": 1.6715032698182352e-06, + "loss": 0.0018, + "num_input_tokens_seen": 46770912, + "step": 69400 + }, + { + "epoch": 1.6955756968704958, + "grad_norm": 17.54354476928711, + "learning_rate": 1.6714400765397047e-06, + "loss": 0.2174, + "num_input_tokens_seen": 46774816, + "step": 69405 + }, + { + "epoch": 1.695697847702343, + "grad_norm": 180.63446044921875, + "learning_rate": 1.6713768783782815e-06, + "loss": 0.0606, + "num_input_tokens_seen": 46777888, + "step": 69410 + }, + { + "epoch": 1.69581999853419, + "grad_norm": 72.77506256103516, + "learning_rate": 1.6713136753344253e-06, + "loss": 0.1072, + "num_input_tokens_seen": 46781280, + "step": 69415 + }, + { + "epoch": 1.6959421493660372, + "grad_norm": 0.4094438850879669, + "learning_rate": 1.6712504674085951e-06, + "loss": 0.0504, + "num_input_tokens_seen": 46784800, + "step": 69420 + }, + { + "epoch": 1.6960643001978843, + "grad_norm": 0.544030487537384, + "learning_rate": 1.6711872546012512e-06, + "loss": 0.0243, + "num_input_tokens_seen": 46788064, + "step": 69425 + }, + { + "epoch": 1.6961864510297315, + "grad_norm": 24.769922256469727, + "learning_rate": 1.671124036912853e-06, + "loss": 0.2521, + "num_input_tokens_seen": 46791904, + "step": 69430 + }, + { + "epoch": 1.6963086018615787, + "grad_norm": 0.060142744332551956, + "learning_rate": 1.6710608143438606e-06, + "loss": 0.0017, + "num_input_tokens_seen": 46795168, + "step": 69435 + }, + { + "epoch": 1.6964307526934257, + "grad_norm": 0.5809279084205627, + "learning_rate": 1.670997586894733e-06, + "loss": 0.1179, + "num_input_tokens_seen": 46798944, + "step": 69440 + }, + { + "epoch": 1.6965529035252729, + "grad_norm": 1.001853346824646, + "learning_rate": 1.6709343545659307e-06, + "loss": 0.0248, + "num_input_tokens_seen": 46801952, + "step": 69445 + }, + { + "epoch": 1.69667505435712, + "grad_norm": 0.9293713569641113, + "learning_rate": 1.670871117357913e-06, + "loss": 0.0537, + "num_input_tokens_seen": 46805216, + "step": 69450 + }, + { + "epoch": 1.6967972051889673, + "grad_norm": 0.30881839990615845, + "learning_rate": 1.6708078752711408e-06, + "loss": 0.0364, + "num_input_tokens_seen": 46809120, + "step": 69455 + }, + { + "epoch": 1.6969193560208145, + "grad_norm": 0.004009113647043705, + "learning_rate": 1.6707446283060727e-06, + "loss": 0.0009, + "num_input_tokens_seen": 46812384, + "step": 69460 + }, + { + "epoch": 1.6970415068526616, + "grad_norm": 0.1301659345626831, + "learning_rate": 1.6706813764631696e-06, + "loss": 0.0731, + "num_input_tokens_seen": 46815456, + "step": 69465 + }, + { + "epoch": 1.6971636576845088, + "grad_norm": 20.536996841430664, + "learning_rate": 1.6706181197428908e-06, + "loss": 0.0398, + "num_input_tokens_seen": 46818592, + "step": 69470 + }, + { + "epoch": 1.697285808516356, + "grad_norm": 11.145803451538086, + "learning_rate": 1.6705548581456967e-06, + "loss": 0.1567, + "num_input_tokens_seen": 46822048, + "step": 69475 + }, + { + "epoch": 1.6974079593482032, + "grad_norm": 0.12410733848810196, + "learning_rate": 1.6704915916720474e-06, + "loss": 0.0009, + "num_input_tokens_seen": 46825568, + "step": 69480 + }, + { + "epoch": 1.6975301101800504, + "grad_norm": 1.088709831237793, + "learning_rate": 1.670428320322403e-06, + "loss": 0.0305, + "num_input_tokens_seen": 46828768, + "step": 69485 + }, + { + "epoch": 1.6976522610118976, + "grad_norm": 0.5592111349105835, + "learning_rate": 1.6703650440972235e-06, + "loss": 0.031, + "num_input_tokens_seen": 46831840, + "step": 69490 + }, + { + "epoch": 1.6977744118437448, + "grad_norm": 18.33035659790039, + "learning_rate": 1.670301762996969e-06, + "loss": 0.0762, + "num_input_tokens_seen": 46835104, + "step": 69495 + }, + { + "epoch": 1.697896562675592, + "grad_norm": 9.992659568786621, + "learning_rate": 1.6702384770220998e-06, + "loss": 0.1448, + "num_input_tokens_seen": 46838432, + "step": 69500 + }, + { + "epoch": 1.698018713507439, + "grad_norm": 0.07260085642337799, + "learning_rate": 1.6701751861730763e-06, + "loss": 0.1469, + "num_input_tokens_seen": 46841632, + "step": 69505 + }, + { + "epoch": 1.6981408643392861, + "grad_norm": 50.94322204589844, + "learning_rate": 1.6701118904503581e-06, + "loss": 0.1473, + "num_input_tokens_seen": 46845088, + "step": 69510 + }, + { + "epoch": 1.6982630151711333, + "grad_norm": 0.3196463882923126, + "learning_rate": 1.6700485898544067e-06, + "loss": 0.1444, + "num_input_tokens_seen": 46848288, + "step": 69515 + }, + { + "epoch": 1.6983851660029805, + "grad_norm": 12.756587028503418, + "learning_rate": 1.6699852843856813e-06, + "loss": 0.0402, + "num_input_tokens_seen": 46851552, + "step": 69520 + }, + { + "epoch": 1.6985073168348277, + "grad_norm": 0.03349412605166435, + "learning_rate": 1.6699219740446426e-06, + "loss": 0.1171, + "num_input_tokens_seen": 46854880, + "step": 69525 + }, + { + "epoch": 1.6986294676666747, + "grad_norm": 9.584224700927734, + "learning_rate": 1.6698586588317515e-06, + "loss": 0.0676, + "num_input_tokens_seen": 46858208, + "step": 69530 + }, + { + "epoch": 1.6987516184985219, + "grad_norm": 0.9124466776847839, + "learning_rate": 1.669795338747468e-06, + "loss": 0.0055, + "num_input_tokens_seen": 46861792, + "step": 69535 + }, + { + "epoch": 1.698873769330369, + "grad_norm": 40.700469970703125, + "learning_rate": 1.6697320137922524e-06, + "loss": 0.0251, + "num_input_tokens_seen": 46865184, + "step": 69540 + }, + { + "epoch": 1.6989959201622162, + "grad_norm": 30.935789108276367, + "learning_rate": 1.6696686839665655e-06, + "loss": 0.1008, + "num_input_tokens_seen": 46868512, + "step": 69545 + }, + { + "epoch": 1.6991180709940634, + "grad_norm": 0.03304780647158623, + "learning_rate": 1.669605349270868e-06, + "loss": 0.0005, + "num_input_tokens_seen": 46871712, + "step": 69550 + }, + { + "epoch": 1.6992402218259106, + "grad_norm": 11.477385520935059, + "learning_rate": 1.66954200970562e-06, + "loss": 0.0801, + "num_input_tokens_seen": 46875040, + "step": 69555 + }, + { + "epoch": 1.6993623726577578, + "grad_norm": 0.036940060555934906, + "learning_rate": 1.6694786652712827e-06, + "loss": 0.0201, + "num_input_tokens_seen": 46878432, + "step": 69560 + }, + { + "epoch": 1.699484523489605, + "grad_norm": 0.2850690484046936, + "learning_rate": 1.6694153159683162e-06, + "loss": 0.1146, + "num_input_tokens_seen": 46881248, + "step": 69565 + }, + { + "epoch": 1.6996066743214522, + "grad_norm": 0.2382366955280304, + "learning_rate": 1.6693519617971816e-06, + "loss": 0.1317, + "num_input_tokens_seen": 46885408, + "step": 69570 + }, + { + "epoch": 1.6997288251532994, + "grad_norm": 0.17419885098934174, + "learning_rate": 1.6692886027583397e-06, + "loss": 0.0317, + "num_input_tokens_seen": 46888928, + "step": 69575 + }, + { + "epoch": 1.6998509759851466, + "grad_norm": 0.36933374404907227, + "learning_rate": 1.669225238852251e-06, + "loss": 0.1065, + "num_input_tokens_seen": 46892320, + "step": 69580 + }, + { + "epoch": 1.6999731268169938, + "grad_norm": 3.722100019454956, + "learning_rate": 1.6691618700793763e-06, + "loss": 0.2065, + "num_input_tokens_seen": 46895648, + "step": 69585 + }, + { + "epoch": 1.700095277648841, + "grad_norm": 20.817859649658203, + "learning_rate": 1.6690984964401764e-06, + "loss": 0.1437, + "num_input_tokens_seen": 46898784, + "step": 69590 + }, + { + "epoch": 1.700217428480688, + "grad_norm": 23.722640991210938, + "learning_rate": 1.6690351179351123e-06, + "loss": 0.1236, + "num_input_tokens_seen": 46901728, + "step": 69595 + }, + { + "epoch": 1.700339579312535, + "grad_norm": 0.49860385060310364, + "learning_rate": 1.668971734564645e-06, + "loss": 0.161, + "num_input_tokens_seen": 46905248, + "step": 69600 + }, + { + "epoch": 1.7004617301443823, + "grad_norm": 13.744087219238281, + "learning_rate": 1.668908346329235e-06, + "loss": 0.0781, + "num_input_tokens_seen": 46908384, + "step": 69605 + }, + { + "epoch": 1.7005838809762295, + "grad_norm": 37.584285736083984, + "learning_rate": 1.668844953229344e-06, + "loss": 0.2057, + "num_input_tokens_seen": 46911648, + "step": 69610 + }, + { + "epoch": 1.7007060318080767, + "grad_norm": 0.019656836986541748, + "learning_rate": 1.6687815552654325e-06, + "loss": 0.0927, + "num_input_tokens_seen": 46914976, + "step": 69615 + }, + { + "epoch": 1.7008281826399236, + "grad_norm": 0.4337562024593353, + "learning_rate": 1.6687181524379613e-06, + "loss": 0.0305, + "num_input_tokens_seen": 46918752, + "step": 69620 + }, + { + "epoch": 1.7009503334717708, + "grad_norm": 0.05799011513590813, + "learning_rate": 1.6686547447473924e-06, + "loss": 0.0596, + "num_input_tokens_seen": 46922336, + "step": 69625 + }, + { + "epoch": 1.701072484303618, + "grad_norm": 0.18245580792427063, + "learning_rate": 1.668591332194186e-06, + "loss": 0.0261, + "num_input_tokens_seen": 46925792, + "step": 69630 + }, + { + "epoch": 1.7011946351354652, + "grad_norm": 0.5550912022590637, + "learning_rate": 1.6685279147788036e-06, + "loss": 0.0987, + "num_input_tokens_seen": 46930272, + "step": 69635 + }, + { + "epoch": 1.7013167859673124, + "grad_norm": 21.118289947509766, + "learning_rate": 1.6684644925017067e-06, + "loss": 0.1937, + "num_input_tokens_seen": 46933536, + "step": 69640 + }, + { + "epoch": 1.7014389367991596, + "grad_norm": 71.64469146728516, + "learning_rate": 1.6684010653633559e-06, + "loss": 0.149, + "num_input_tokens_seen": 46936864, + "step": 69645 + }, + { + "epoch": 1.7015610876310068, + "grad_norm": 3.9675886631011963, + "learning_rate": 1.6683376333642127e-06, + "loss": 0.0415, + "num_input_tokens_seen": 46940128, + "step": 69650 + }, + { + "epoch": 1.701683238462854, + "grad_norm": 1.2720179557800293, + "learning_rate": 1.6682741965047386e-06, + "loss": 0.0025, + "num_input_tokens_seen": 46943264, + "step": 69655 + }, + { + "epoch": 1.7018053892947012, + "grad_norm": 19.108028411865234, + "learning_rate": 1.6682107547853948e-06, + "loss": 0.1639, + "num_input_tokens_seen": 46946656, + "step": 69660 + }, + { + "epoch": 1.7019275401265483, + "grad_norm": 0.23258452117443085, + "learning_rate": 1.6681473082066426e-06, + "loss": 0.0259, + "num_input_tokens_seen": 46949920, + "step": 69665 + }, + { + "epoch": 1.7020496909583955, + "grad_norm": 35.97227096557617, + "learning_rate": 1.6680838567689436e-06, + "loss": 0.0954, + "num_input_tokens_seen": 46954144, + "step": 69670 + }, + { + "epoch": 1.7021718417902427, + "grad_norm": 12.28347396850586, + "learning_rate": 1.6680204004727592e-06, + "loss": 0.1621, + "num_input_tokens_seen": 46957472, + "step": 69675 + }, + { + "epoch": 1.70229399262209, + "grad_norm": 52.67514419555664, + "learning_rate": 1.6679569393185506e-06, + "loss": 0.0807, + "num_input_tokens_seen": 46961376, + "step": 69680 + }, + { + "epoch": 1.7024161434539369, + "grad_norm": 13.373403549194336, + "learning_rate": 1.6678934733067793e-06, + "loss": 0.0841, + "num_input_tokens_seen": 46964640, + "step": 69685 + }, + { + "epoch": 1.702538294285784, + "grad_norm": 0.8186175227165222, + "learning_rate": 1.6678300024379073e-06, + "loss": 0.0441, + "num_input_tokens_seen": 46968480, + "step": 69690 + }, + { + "epoch": 1.7026604451176313, + "grad_norm": 1.1019536256790161, + "learning_rate": 1.6677665267123956e-06, + "loss": 0.0706, + "num_input_tokens_seen": 46971744, + "step": 69695 + }, + { + "epoch": 1.7027825959494785, + "grad_norm": 28.882869720458984, + "learning_rate": 1.6677030461307065e-06, + "loss": 0.0741, + "num_input_tokens_seen": 46975200, + "step": 69700 + }, + { + "epoch": 1.7029047467813254, + "grad_norm": 65.26024627685547, + "learning_rate": 1.667639560693301e-06, + "loss": 0.1458, + "num_input_tokens_seen": 46979232, + "step": 69705 + }, + { + "epoch": 1.7030268976131726, + "grad_norm": 0.39398840069770813, + "learning_rate": 1.6675760704006412e-06, + "loss": 0.0283, + "num_input_tokens_seen": 46983520, + "step": 69710 + }, + { + "epoch": 1.7031490484450198, + "grad_norm": 0.35874584317207336, + "learning_rate": 1.6675125752531884e-06, + "loss": 0.1395, + "num_input_tokens_seen": 46986720, + "step": 69715 + }, + { + "epoch": 1.703271199276867, + "grad_norm": 28.241769790649414, + "learning_rate": 1.667449075251405e-06, + "loss": 0.1001, + "num_input_tokens_seen": 46989920, + "step": 69720 + }, + { + "epoch": 1.7033933501087142, + "grad_norm": 9.246172904968262, + "learning_rate": 1.6673855703957523e-06, + "loss": 0.0372, + "num_input_tokens_seen": 46993184, + "step": 69725 + }, + { + "epoch": 1.7035155009405614, + "grad_norm": 15.649399757385254, + "learning_rate": 1.667322060686692e-06, + "loss": 0.1044, + "num_input_tokens_seen": 46996640, + "step": 69730 + }, + { + "epoch": 1.7036376517724086, + "grad_norm": 18.9222412109375, + "learning_rate": 1.667258546124686e-06, + "loss": 0.0357, + "num_input_tokens_seen": 46999968, + "step": 69735 + }, + { + "epoch": 1.7037598026042557, + "grad_norm": 0.4368104338645935, + "learning_rate": 1.6671950267101972e-06, + "loss": 0.0337, + "num_input_tokens_seen": 47003488, + "step": 69740 + }, + { + "epoch": 1.703881953436103, + "grad_norm": 0.14030565321445465, + "learning_rate": 1.667131502443686e-06, + "loss": 0.0329, + "num_input_tokens_seen": 47007136, + "step": 69745 + }, + { + "epoch": 1.7040041042679501, + "grad_norm": 0.21858637034893036, + "learning_rate": 1.6670679733256154e-06, + "loss": 0.1352, + "num_input_tokens_seen": 47010528, + "step": 69750 + }, + { + "epoch": 1.7041262550997973, + "grad_norm": 0.08449557423591614, + "learning_rate": 1.6670044393564467e-06, + "loss": 0.0414, + "num_input_tokens_seen": 47013792, + "step": 69755 + }, + { + "epoch": 1.7042484059316445, + "grad_norm": 0.11067601293325424, + "learning_rate": 1.6669409005366426e-06, + "loss": 0.0764, + "num_input_tokens_seen": 47016928, + "step": 69760 + }, + { + "epoch": 1.7043705567634917, + "grad_norm": 0.1474686861038208, + "learning_rate": 1.666877356866665e-06, + "loss": 0.1261, + "num_input_tokens_seen": 47020320, + "step": 69765 + }, + { + "epoch": 1.7044927075953389, + "grad_norm": 0.42805859446525574, + "learning_rate": 1.6668138083469756e-06, + "loss": 0.03, + "num_input_tokens_seen": 47023648, + "step": 69770 + }, + { + "epoch": 1.7046148584271859, + "grad_norm": 28.6177978515625, + "learning_rate": 1.666750254978037e-06, + "loss": 0.2081, + "num_input_tokens_seen": 47027360, + "step": 69775 + }, + { + "epoch": 1.704737009259033, + "grad_norm": 0.14059709012508392, + "learning_rate": 1.6666866967603113e-06, + "loss": 0.004, + "num_input_tokens_seen": 47031200, + "step": 69780 + }, + { + "epoch": 1.7048591600908802, + "grad_norm": 0.14516912400722504, + "learning_rate": 1.6666231336942604e-06, + "loss": 0.0831, + "num_input_tokens_seen": 47034848, + "step": 69785 + }, + { + "epoch": 1.7049813109227274, + "grad_norm": 0.589957594871521, + "learning_rate": 1.666559565780347e-06, + "loss": 0.0354, + "num_input_tokens_seen": 47038496, + "step": 69790 + }, + { + "epoch": 1.7051034617545744, + "grad_norm": 0.5143163204193115, + "learning_rate": 1.666495993019033e-06, + "loss": 0.0632, + "num_input_tokens_seen": 47041824, + "step": 69795 + }, + { + "epoch": 1.7052256125864216, + "grad_norm": 0.0627521201968193, + "learning_rate": 1.6664324154107807e-06, + "loss": 0.0357, + "num_input_tokens_seen": 47045088, + "step": 69800 + }, + { + "epoch": 1.7053477634182688, + "grad_norm": 28.07826805114746, + "learning_rate": 1.666368832956053e-06, + "loss": 0.1022, + "num_input_tokens_seen": 47048416, + "step": 69805 + }, + { + "epoch": 1.705469914250116, + "grad_norm": 0.01447698101401329, + "learning_rate": 1.666305245655312e-06, + "loss": 0.0911, + "num_input_tokens_seen": 47051488, + "step": 69810 + }, + { + "epoch": 1.7055920650819631, + "grad_norm": 0.7165692448616028, + "learning_rate": 1.6662416535090196e-06, + "loss": 0.2176, + "num_input_tokens_seen": 47054944, + "step": 69815 + }, + { + "epoch": 1.7057142159138103, + "grad_norm": 22.16487693786621, + "learning_rate": 1.6661780565176388e-06, + "loss": 0.1212, + "num_input_tokens_seen": 47058208, + "step": 69820 + }, + { + "epoch": 1.7058363667456575, + "grad_norm": 0.23730194568634033, + "learning_rate": 1.6661144546816321e-06, + "loss": 0.0909, + "num_input_tokens_seen": 47061728, + "step": 69825 + }, + { + "epoch": 1.7059585175775047, + "grad_norm": 1.4801957607269287, + "learning_rate": 1.6660508480014618e-06, + "loss": 0.0594, + "num_input_tokens_seen": 47064864, + "step": 69830 + }, + { + "epoch": 1.706080668409352, + "grad_norm": 1.1917213201522827, + "learning_rate": 1.665987236477591e-06, + "loss": 0.0653, + "num_input_tokens_seen": 47067936, + "step": 69835 + }, + { + "epoch": 1.706202819241199, + "grad_norm": 3.69415545463562, + "learning_rate": 1.6659236201104814e-06, + "loss": 0.1373, + "num_input_tokens_seen": 47071200, + "step": 69840 + }, + { + "epoch": 1.7063249700730463, + "grad_norm": 58.802913665771484, + "learning_rate": 1.665859998900596e-06, + "loss": 0.1086, + "num_input_tokens_seen": 47075040, + "step": 69845 + }, + { + "epoch": 1.7064471209048935, + "grad_norm": 1.404065728187561, + "learning_rate": 1.6657963728483981e-06, + "loss": 0.1358, + "num_input_tokens_seen": 47078944, + "step": 69850 + }, + { + "epoch": 1.7065692717367407, + "grad_norm": 0.05396494269371033, + "learning_rate": 1.6657327419543496e-06, + "loss": 0.1373, + "num_input_tokens_seen": 47081952, + "step": 69855 + }, + { + "epoch": 1.7066914225685879, + "grad_norm": 0.3325052857398987, + "learning_rate": 1.665669106218914e-06, + "loss": 0.0671, + "num_input_tokens_seen": 47085408, + "step": 69860 + }, + { + "epoch": 1.7068135734004348, + "grad_norm": 11.508901596069336, + "learning_rate": 1.665605465642553e-06, + "loss": 0.0399, + "num_input_tokens_seen": 47089568, + "step": 69865 + }, + { + "epoch": 1.706935724232282, + "grad_norm": 14.941136360168457, + "learning_rate": 1.6655418202257305e-06, + "loss": 0.033, + "num_input_tokens_seen": 47093024, + "step": 69870 + }, + { + "epoch": 1.7070578750641292, + "grad_norm": 12.48923397064209, + "learning_rate": 1.6654781699689086e-06, + "loss": 0.0416, + "num_input_tokens_seen": 47096544, + "step": 69875 + }, + { + "epoch": 1.7071800258959764, + "grad_norm": 0.9678665995597839, + "learning_rate": 1.6654145148725506e-06, + "loss": 0.0335, + "num_input_tokens_seen": 47100128, + "step": 69880 + }, + { + "epoch": 1.7073021767278234, + "grad_norm": 0.45745396614074707, + "learning_rate": 1.665350854937119e-06, + "loss": 0.0633, + "num_input_tokens_seen": 47103840, + "step": 69885 + }, + { + "epoch": 1.7074243275596706, + "grad_norm": 29.906553268432617, + "learning_rate": 1.6652871901630772e-06, + "loss": 0.2042, + "num_input_tokens_seen": 47107040, + "step": 69890 + }, + { + "epoch": 1.7075464783915177, + "grad_norm": 2.247661590576172, + "learning_rate": 1.665223520550888e-06, + "loss": 0.111, + "num_input_tokens_seen": 47110560, + "step": 69895 + }, + { + "epoch": 1.707668629223365, + "grad_norm": 0.3283466398715973, + "learning_rate": 1.6651598461010146e-06, + "loss": 0.0022, + "num_input_tokens_seen": 47114016, + "step": 69900 + }, + { + "epoch": 1.7077907800552121, + "grad_norm": 0.268053263425827, + "learning_rate": 1.6650961668139197e-06, + "loss": 0.0567, + "num_input_tokens_seen": 47116704, + "step": 69905 + }, + { + "epoch": 1.7079129308870593, + "grad_norm": 0.3636005222797394, + "learning_rate": 1.6650324826900666e-06, + "loss": 0.0021, + "num_input_tokens_seen": 47120288, + "step": 69910 + }, + { + "epoch": 1.7080350817189065, + "grad_norm": 0.027881575748324394, + "learning_rate": 1.6649687937299183e-06, + "loss": 0.0011, + "num_input_tokens_seen": 47123168, + "step": 69915 + }, + { + "epoch": 1.7081572325507537, + "grad_norm": 1.2808572053909302, + "learning_rate": 1.6649050999339382e-06, + "loss": 0.0612, + "num_input_tokens_seen": 47126304, + "step": 69920 + }, + { + "epoch": 1.7082793833826009, + "grad_norm": 21.61374282836914, + "learning_rate": 1.6648414013025895e-06, + "loss": 0.1222, + "num_input_tokens_seen": 47130016, + "step": 69925 + }, + { + "epoch": 1.708401534214448, + "grad_norm": 0.17723463475704193, + "learning_rate": 1.6647776978363354e-06, + "loss": 0.0545, + "num_input_tokens_seen": 47133728, + "step": 69930 + }, + { + "epoch": 1.7085236850462953, + "grad_norm": 0.22193682193756104, + "learning_rate": 1.6647139895356388e-06, + "loss": 0.0524, + "num_input_tokens_seen": 47137056, + "step": 69935 + }, + { + "epoch": 1.7086458358781424, + "grad_norm": 106.50350952148438, + "learning_rate": 1.6646502764009633e-06, + "loss": 0.0917, + "num_input_tokens_seen": 47140512, + "step": 69940 + }, + { + "epoch": 1.7087679867099896, + "grad_norm": 1.4736918210983276, + "learning_rate": 1.6645865584327723e-06, + "loss": 0.0343, + "num_input_tokens_seen": 47143904, + "step": 69945 + }, + { + "epoch": 1.7088901375418366, + "grad_norm": 133.668701171875, + "learning_rate": 1.664522835631529e-06, + "loss": 0.1482, + "num_input_tokens_seen": 47147296, + "step": 69950 + }, + { + "epoch": 1.7090122883736838, + "grad_norm": 15.187579154968262, + "learning_rate": 1.6644591079976971e-06, + "loss": 0.1285, + "num_input_tokens_seen": 47150304, + "step": 69955 + }, + { + "epoch": 1.709134439205531, + "grad_norm": 0.03582199290394783, + "learning_rate": 1.6643953755317397e-06, + "loss": 0.0495, + "num_input_tokens_seen": 47153888, + "step": 69960 + }, + { + "epoch": 1.7092565900373782, + "grad_norm": 10.01606559753418, + "learning_rate": 1.6643316382341204e-06, + "loss": 0.0723, + "num_input_tokens_seen": 47157280, + "step": 69965 + }, + { + "epoch": 1.7093787408692254, + "grad_norm": 0.12903539836406708, + "learning_rate": 1.664267896105303e-06, + "loss": 0.1263, + "num_input_tokens_seen": 47160608, + "step": 69970 + }, + { + "epoch": 1.7095008917010723, + "grad_norm": 21.337604522705078, + "learning_rate": 1.6642041491457507e-06, + "loss": 0.1161, + "num_input_tokens_seen": 47163808, + "step": 69975 + }, + { + "epoch": 1.7096230425329195, + "grad_norm": 0.10576992481946945, + "learning_rate": 1.6641403973559268e-06, + "loss": 0.1276, + "num_input_tokens_seen": 47167072, + "step": 69980 + }, + { + "epoch": 1.7097451933647667, + "grad_norm": 0.13519780337810516, + "learning_rate": 1.6640766407362955e-06, + "loss": 0.0363, + "num_input_tokens_seen": 47170592, + "step": 69985 + }, + { + "epoch": 1.709867344196614, + "grad_norm": 0.6011918783187866, + "learning_rate": 1.6640128792873205e-06, + "loss": 0.0511, + "num_input_tokens_seen": 47174304, + "step": 69990 + }, + { + "epoch": 1.709989495028461, + "grad_norm": 0.9644982814788818, + "learning_rate": 1.663949113009465e-06, + "loss": 0.1105, + "num_input_tokens_seen": 47177632, + "step": 69995 + }, + { + "epoch": 1.7101116458603083, + "grad_norm": 0.28336676955223083, + "learning_rate": 1.663885341903193e-06, + "loss": 0.0849, + "num_input_tokens_seen": 47181280, + "step": 70000 + }, + { + "epoch": 1.7102337966921555, + "grad_norm": 0.11140467971563339, + "learning_rate": 1.6638215659689683e-06, + "loss": 0.0892, + "num_input_tokens_seen": 47184288, + "step": 70005 + }, + { + "epoch": 1.7103559475240027, + "grad_norm": 0.16174258291721344, + "learning_rate": 1.6637577852072547e-06, + "loss": 0.0731, + "num_input_tokens_seen": 47187872, + "step": 70010 + }, + { + "epoch": 1.7104780983558499, + "grad_norm": 41.12967300415039, + "learning_rate": 1.6636939996185157e-06, + "loss": 0.0885, + "num_input_tokens_seen": 47191200, + "step": 70015 + }, + { + "epoch": 1.710600249187697, + "grad_norm": 0.4021644592285156, + "learning_rate": 1.6636302092032155e-06, + "loss": 0.256, + "num_input_tokens_seen": 47194592, + "step": 70020 + }, + { + "epoch": 1.7107224000195442, + "grad_norm": 7.773596286773682, + "learning_rate": 1.6635664139618183e-06, + "loss": 0.0816, + "num_input_tokens_seen": 47197792, + "step": 70025 + }, + { + "epoch": 1.7108445508513914, + "grad_norm": 0.6293780207633972, + "learning_rate": 1.6635026138947873e-06, + "loss": 0.0045, + "num_input_tokens_seen": 47201120, + "step": 70030 + }, + { + "epoch": 1.7109667016832386, + "grad_norm": 0.10260359942913055, + "learning_rate": 1.6634388090025867e-06, + "loss": 0.001, + "num_input_tokens_seen": 47204768, + "step": 70035 + }, + { + "epoch": 1.7110888525150856, + "grad_norm": 11.693562507629395, + "learning_rate": 1.663374999285681e-06, + "loss": 0.0746, + "num_input_tokens_seen": 47208160, + "step": 70040 + }, + { + "epoch": 1.7112110033469328, + "grad_norm": 0.05745501071214676, + "learning_rate": 1.6633111847445336e-06, + "loss": 0.0009, + "num_input_tokens_seen": 47211552, + "step": 70045 + }, + { + "epoch": 1.71133315417878, + "grad_norm": 15.584721565246582, + "learning_rate": 1.6632473653796088e-06, + "loss": 0.0466, + "num_input_tokens_seen": 47214752, + "step": 70050 + }, + { + "epoch": 1.7114553050106271, + "grad_norm": 0.18859045207500458, + "learning_rate": 1.6631835411913713e-06, + "loss": 0.1216, + "num_input_tokens_seen": 47218336, + "step": 70055 + }, + { + "epoch": 1.7115774558424743, + "grad_norm": 14.733503341674805, + "learning_rate": 1.6631197121802843e-06, + "loss": 0.084, + "num_input_tokens_seen": 47221920, + "step": 70060 + }, + { + "epoch": 1.7116996066743213, + "grad_norm": 156.82640075683594, + "learning_rate": 1.6630558783468122e-06, + "loss": 0.0601, + "num_input_tokens_seen": 47225056, + "step": 70065 + }, + { + "epoch": 1.7118217575061685, + "grad_norm": 126.34097290039062, + "learning_rate": 1.66299203969142e-06, + "loss": 0.0522, + "num_input_tokens_seen": 47228768, + "step": 70070 + }, + { + "epoch": 1.7119439083380157, + "grad_norm": 0.05393688380718231, + "learning_rate": 1.6629281962145706e-06, + "loss": 0.0836, + "num_input_tokens_seen": 47231904, + "step": 70075 + }, + { + "epoch": 1.7120660591698629, + "grad_norm": 0.07792874425649643, + "learning_rate": 1.6628643479167297e-06, + "loss": 0.0383, + "num_input_tokens_seen": 47235808, + "step": 70080 + }, + { + "epoch": 1.71218821000171, + "grad_norm": 13.525362014770508, + "learning_rate": 1.6628004947983606e-06, + "loss": 0.0637, + "num_input_tokens_seen": 47239008, + "step": 70085 + }, + { + "epoch": 1.7123103608335573, + "grad_norm": 0.12766079604625702, + "learning_rate": 1.6627366368599285e-06, + "loss": 0.0009, + "num_input_tokens_seen": 47242336, + "step": 70090 + }, + { + "epoch": 1.7124325116654044, + "grad_norm": 0.15544456243515015, + "learning_rate": 1.6626727741018967e-06, + "loss": 0.071, + "num_input_tokens_seen": 47245472, + "step": 70095 + }, + { + "epoch": 1.7125546624972516, + "grad_norm": 0.012700174935162067, + "learning_rate": 1.6626089065247306e-06, + "loss": 0.0495, + "num_input_tokens_seen": 47248608, + "step": 70100 + }, + { + "epoch": 1.7126768133290988, + "grad_norm": 0.15384520590305328, + "learning_rate": 1.6625450341288943e-06, + "loss": 0.157, + "num_input_tokens_seen": 47252384, + "step": 70105 + }, + { + "epoch": 1.712798964160946, + "grad_norm": 0.03365683928132057, + "learning_rate": 1.6624811569148523e-06, + "loss": 0.0948, + "num_input_tokens_seen": 47255456, + "step": 70110 + }, + { + "epoch": 1.7129211149927932, + "grad_norm": 0.5229410529136658, + "learning_rate": 1.662417274883069e-06, + "loss": 0.0011, + "num_input_tokens_seen": 47258656, + "step": 70115 + }, + { + "epoch": 1.7130432658246404, + "grad_norm": 0.28059008717536926, + "learning_rate": 1.6623533880340093e-06, + "loss": 0.0389, + "num_input_tokens_seen": 47261984, + "step": 70120 + }, + { + "epoch": 1.7131654166564876, + "grad_norm": 0.13389243185520172, + "learning_rate": 1.6622894963681376e-06, + "loss": 0.1023, + "num_input_tokens_seen": 47265440, + "step": 70125 + }, + { + "epoch": 1.7132875674883346, + "grad_norm": 27.324216842651367, + "learning_rate": 1.6622255998859183e-06, + "loss": 0.1753, + "num_input_tokens_seen": 47268512, + "step": 70130 + }, + { + "epoch": 1.7134097183201817, + "grad_norm": 32.270782470703125, + "learning_rate": 1.6621616985878166e-06, + "loss": 0.1303, + "num_input_tokens_seen": 47272096, + "step": 70135 + }, + { + "epoch": 1.713531869152029, + "grad_norm": 20.028017044067383, + "learning_rate": 1.6620977924742967e-06, + "loss": 0.1832, + "num_input_tokens_seen": 47275168, + "step": 70140 + }, + { + "epoch": 1.7136540199838761, + "grad_norm": 0.21445448696613312, + "learning_rate": 1.6620338815458237e-06, + "loss": 0.0486, + "num_input_tokens_seen": 47278624, + "step": 70145 + }, + { + "epoch": 1.7137761708157233, + "grad_norm": 0.2595180869102478, + "learning_rate": 1.661969965802862e-06, + "loss": 0.0282, + "num_input_tokens_seen": 47281760, + "step": 70150 + }, + { + "epoch": 1.7138983216475703, + "grad_norm": 0.051422055810689926, + "learning_rate": 1.6619060452458773e-06, + "loss": 0.0466, + "num_input_tokens_seen": 47285408, + "step": 70155 + }, + { + "epoch": 1.7140204724794175, + "grad_norm": 21.490816116333008, + "learning_rate": 1.661842119875333e-06, + "loss": 0.1555, + "num_input_tokens_seen": 47289504, + "step": 70160 + }, + { + "epoch": 1.7141426233112647, + "grad_norm": 13.273415565490723, + "learning_rate": 1.6617781896916955e-06, + "loss": 0.038, + "num_input_tokens_seen": 47292832, + "step": 70165 + }, + { + "epoch": 1.7142647741431118, + "grad_norm": 0.24678562581539154, + "learning_rate": 1.6617142546954286e-06, + "loss": 0.0519, + "num_input_tokens_seen": 47296096, + "step": 70170 + }, + { + "epoch": 1.714386924974959, + "grad_norm": 89.38595581054688, + "learning_rate": 1.6616503148869977e-06, + "loss": 0.1749, + "num_input_tokens_seen": 47299424, + "step": 70175 + }, + { + "epoch": 1.7145090758068062, + "grad_norm": 0.040162667632102966, + "learning_rate": 1.661586370266868e-06, + "loss": 0.0288, + "num_input_tokens_seen": 47302752, + "step": 70180 + }, + { + "epoch": 1.7146312266386534, + "grad_norm": 13.061795234680176, + "learning_rate": 1.661522420835504e-06, + "loss": 0.0801, + "num_input_tokens_seen": 47306080, + "step": 70185 + }, + { + "epoch": 1.7147533774705006, + "grad_norm": 0.04859983175992966, + "learning_rate": 1.6614584665933711e-06, + "loss": 0.1098, + "num_input_tokens_seen": 47309536, + "step": 70190 + }, + { + "epoch": 1.7148755283023478, + "grad_norm": 1.0230239629745483, + "learning_rate": 1.661394507540934e-06, + "loss": 0.007, + "num_input_tokens_seen": 47312864, + "step": 70195 + }, + { + "epoch": 1.714997679134195, + "grad_norm": 1.9080485105514526, + "learning_rate": 1.661330543678659e-06, + "loss": 0.173, + "num_input_tokens_seen": 47316128, + "step": 70200 + }, + { + "epoch": 1.7151198299660422, + "grad_norm": 0.044832922518253326, + "learning_rate": 1.6612665750070097e-06, + "loss": 0.1017, + "num_input_tokens_seen": 47319456, + "step": 70205 + }, + { + "epoch": 1.7152419807978894, + "grad_norm": 0.2528229057788849, + "learning_rate": 1.6612026015264522e-06, + "loss": 0.0431, + "num_input_tokens_seen": 47322720, + "step": 70210 + }, + { + "epoch": 1.7153641316297366, + "grad_norm": 30.157514572143555, + "learning_rate": 1.6611386232374516e-06, + "loss": 0.0533, + "num_input_tokens_seen": 47325792, + "step": 70215 + }, + { + "epoch": 1.7154862824615835, + "grad_norm": 23.45171546936035, + "learning_rate": 1.6610746401404728e-06, + "loss": 0.0897, + "num_input_tokens_seen": 47329056, + "step": 70220 + }, + { + "epoch": 1.7156084332934307, + "grad_norm": 33.5923957824707, + "learning_rate": 1.6610106522359816e-06, + "loss": 0.0649, + "num_input_tokens_seen": 47331936, + "step": 70225 + }, + { + "epoch": 1.715730584125278, + "grad_norm": 1.0114668607711792, + "learning_rate": 1.6609466595244432e-06, + "loss": 0.0611, + "num_input_tokens_seen": 47334944, + "step": 70230 + }, + { + "epoch": 1.715852734957125, + "grad_norm": 0.21693406999111176, + "learning_rate": 1.660882662006323e-06, + "loss": 0.034, + "num_input_tokens_seen": 47338208, + "step": 70235 + }, + { + "epoch": 1.715974885788972, + "grad_norm": 0.12061869353055954, + "learning_rate": 1.6608186596820863e-06, + "loss": 0.0561, + "num_input_tokens_seen": 47341472, + "step": 70240 + }, + { + "epoch": 1.7160970366208192, + "grad_norm": 227.574951171875, + "learning_rate": 1.6607546525521984e-06, + "loss": 0.0466, + "num_input_tokens_seen": 47344480, + "step": 70245 + }, + { + "epoch": 1.7162191874526664, + "grad_norm": 12.42898941040039, + "learning_rate": 1.660690640617125e-06, + "loss": 0.0434, + "num_input_tokens_seen": 47347872, + "step": 70250 + }, + { + "epoch": 1.7163413382845136, + "grad_norm": 9.49234676361084, + "learning_rate": 1.6606266238773317e-06, + "loss": 0.1255, + "num_input_tokens_seen": 47351328, + "step": 70255 + }, + { + "epoch": 1.7164634891163608, + "grad_norm": 0.019718781113624573, + "learning_rate": 1.6605626023332836e-06, + "loss": 0.0415, + "num_input_tokens_seen": 47354592, + "step": 70260 + }, + { + "epoch": 1.716585639948208, + "grad_norm": 6.5894455909729, + "learning_rate": 1.660498575985447e-06, + "loss": 0.1191, + "num_input_tokens_seen": 47358944, + "step": 70265 + }, + { + "epoch": 1.7167077907800552, + "grad_norm": 0.34928131103515625, + "learning_rate": 1.660434544834287e-06, + "loss": 0.139, + "num_input_tokens_seen": 47362080, + "step": 70270 + }, + { + "epoch": 1.7168299416119024, + "grad_norm": 0.05047084018588066, + "learning_rate": 1.6603705088802692e-06, + "loss": 0.0686, + "num_input_tokens_seen": 47365280, + "step": 70275 + }, + { + "epoch": 1.7169520924437496, + "grad_norm": 0.19307588040828705, + "learning_rate": 1.6603064681238595e-06, + "loss": 0.1732, + "num_input_tokens_seen": 47368544, + "step": 70280 + }, + { + "epoch": 1.7170742432755968, + "grad_norm": 3.607551097869873, + "learning_rate": 1.6602424225655236e-06, + "loss": 0.096, + "num_input_tokens_seen": 47371616, + "step": 70285 + }, + { + "epoch": 1.717196394107444, + "grad_norm": 168.88800048828125, + "learning_rate": 1.6601783722057273e-06, + "loss": 0.0898, + "num_input_tokens_seen": 47375328, + "step": 70290 + }, + { + "epoch": 1.7173185449392911, + "grad_norm": 17.703678131103516, + "learning_rate": 1.660114317044936e-06, + "loss": 0.0325, + "num_input_tokens_seen": 47378592, + "step": 70295 + }, + { + "epoch": 1.7174406957711383, + "grad_norm": 25.313291549682617, + "learning_rate": 1.6600502570836162e-06, + "loss": 0.1035, + "num_input_tokens_seen": 47381664, + "step": 70300 + }, + { + "epoch": 1.7175628466029855, + "grad_norm": 0.20463314652442932, + "learning_rate": 1.6599861923222332e-06, + "loss": 0.0312, + "num_input_tokens_seen": 47384864, + "step": 70305 + }, + { + "epoch": 1.7176849974348325, + "grad_norm": 0.32572174072265625, + "learning_rate": 1.659922122761253e-06, + "loss": 0.1744, + "num_input_tokens_seen": 47388320, + "step": 70310 + }, + { + "epoch": 1.7178071482666797, + "grad_norm": 0.2342342883348465, + "learning_rate": 1.6598580484011415e-06, + "loss": 0.0006, + "num_input_tokens_seen": 47392416, + "step": 70315 + }, + { + "epoch": 1.7179292990985269, + "grad_norm": 0.9499707818031311, + "learning_rate": 1.659793969242365e-06, + "loss": 0.1155, + "num_input_tokens_seen": 47395872, + "step": 70320 + }, + { + "epoch": 1.718051449930374, + "grad_norm": 0.0458591990172863, + "learning_rate": 1.6597298852853894e-06, + "loss": 0.0345, + "num_input_tokens_seen": 47399264, + "step": 70325 + }, + { + "epoch": 1.718173600762221, + "grad_norm": 0.1385032832622528, + "learning_rate": 1.6596657965306807e-06, + "loss": 0.1316, + "num_input_tokens_seen": 47402400, + "step": 70330 + }, + { + "epoch": 1.7182957515940682, + "grad_norm": 2.4102401733398438, + "learning_rate": 1.6596017029787048e-06, + "loss": 0.0012, + "num_input_tokens_seen": 47405600, + "step": 70335 + }, + { + "epoch": 1.7184179024259154, + "grad_norm": 23.362293243408203, + "learning_rate": 1.6595376046299276e-06, + "loss": 0.151, + "num_input_tokens_seen": 47408928, + "step": 70340 + }, + { + "epoch": 1.7185400532577626, + "grad_norm": 0.18636535108089447, + "learning_rate": 1.6594735014848161e-06, + "loss": 0.1138, + "num_input_tokens_seen": 47412576, + "step": 70345 + }, + { + "epoch": 1.7186622040896098, + "grad_norm": 11.467135429382324, + "learning_rate": 1.6594093935438354e-06, + "loss": 0.0559, + "num_input_tokens_seen": 47416032, + "step": 70350 + }, + { + "epoch": 1.718784354921457, + "grad_norm": 0.03606804087758064, + "learning_rate": 1.6593452808074524e-06, + "loss": 0.0006, + "num_input_tokens_seen": 47419040, + "step": 70355 + }, + { + "epoch": 1.7189065057533042, + "grad_norm": 77.13521575927734, + "learning_rate": 1.6592811632761335e-06, + "loss": 0.0244, + "num_input_tokens_seen": 47422432, + "step": 70360 + }, + { + "epoch": 1.7190286565851514, + "grad_norm": 0.0497257299721241, + "learning_rate": 1.6592170409503444e-06, + "loss": 0.0547, + "num_input_tokens_seen": 47425888, + "step": 70365 + }, + { + "epoch": 1.7191508074169985, + "grad_norm": 12.063864707946777, + "learning_rate": 1.6591529138305515e-06, + "loss": 0.0812, + "num_input_tokens_seen": 47429216, + "step": 70370 + }, + { + "epoch": 1.7192729582488457, + "grad_norm": 0.09922768175601959, + "learning_rate": 1.6590887819172215e-06, + "loss": 0.1318, + "num_input_tokens_seen": 47432800, + "step": 70375 + }, + { + "epoch": 1.719395109080693, + "grad_norm": 0.570949137210846, + "learning_rate": 1.6590246452108206e-06, + "loss": 0.156, + "num_input_tokens_seen": 47436512, + "step": 70380 + }, + { + "epoch": 1.7195172599125401, + "grad_norm": 27.616113662719727, + "learning_rate": 1.6589605037118153e-06, + "loss": 0.2372, + "num_input_tokens_seen": 47440160, + "step": 70385 + }, + { + "epoch": 1.7196394107443873, + "grad_norm": 0.17594002187252045, + "learning_rate": 1.6588963574206719e-06, + "loss": 0.0403, + "num_input_tokens_seen": 47443424, + "step": 70390 + }, + { + "epoch": 1.7197615615762345, + "grad_norm": 35.87648010253906, + "learning_rate": 1.6588322063378567e-06, + "loss": 0.0508, + "num_input_tokens_seen": 47446880, + "step": 70395 + }, + { + "epoch": 1.7198837124080815, + "grad_norm": 21.05569076538086, + "learning_rate": 1.6587680504638368e-06, + "loss": 0.1788, + "num_input_tokens_seen": 47450272, + "step": 70400 + }, + { + "epoch": 1.7200058632399287, + "grad_norm": 17.221059799194336, + "learning_rate": 1.6587038897990783e-06, + "loss": 0.051, + "num_input_tokens_seen": 47453728, + "step": 70405 + }, + { + "epoch": 1.7201280140717758, + "grad_norm": 16.515247344970703, + "learning_rate": 1.6586397243440483e-06, + "loss": 0.0631, + "num_input_tokens_seen": 47456672, + "step": 70410 + }, + { + "epoch": 1.720250164903623, + "grad_norm": 0.07961339503526688, + "learning_rate": 1.6585755540992125e-06, + "loss": 0.1466, + "num_input_tokens_seen": 47459936, + "step": 70415 + }, + { + "epoch": 1.72037231573547, + "grad_norm": 34.02830505371094, + "learning_rate": 1.6585113790650388e-06, + "loss": 0.1025, + "num_input_tokens_seen": 47463328, + "step": 70420 + }, + { + "epoch": 1.7204944665673172, + "grad_norm": 0.5048316121101379, + "learning_rate": 1.6584471992419927e-06, + "loss": 0.0013, + "num_input_tokens_seen": 47467232, + "step": 70425 + }, + { + "epoch": 1.7206166173991644, + "grad_norm": 0.17081189155578613, + "learning_rate": 1.6583830146305418e-06, + "loss": 0.0816, + "num_input_tokens_seen": 47470496, + "step": 70430 + }, + { + "epoch": 1.7207387682310116, + "grad_norm": 0.06982258707284927, + "learning_rate": 1.6583188252311522e-06, + "loss": 0.0989, + "num_input_tokens_seen": 47474080, + "step": 70435 + }, + { + "epoch": 1.7208609190628588, + "grad_norm": 0.29233846068382263, + "learning_rate": 1.6582546310442913e-06, + "loss": 0.1682, + "num_input_tokens_seen": 47477664, + "step": 70440 + }, + { + "epoch": 1.720983069894706, + "grad_norm": 0.3050183951854706, + "learning_rate": 1.6581904320704254e-06, + "loss": 0.0531, + "num_input_tokens_seen": 47481376, + "step": 70445 + }, + { + "epoch": 1.7211052207265531, + "grad_norm": 23.65858268737793, + "learning_rate": 1.658126228310022e-06, + "loss": 0.0985, + "num_input_tokens_seen": 47484384, + "step": 70450 + }, + { + "epoch": 1.7212273715584003, + "grad_norm": 13.939019203186035, + "learning_rate": 1.6580620197635473e-06, + "loss": 0.0026, + "num_input_tokens_seen": 47487840, + "step": 70455 + }, + { + "epoch": 1.7213495223902475, + "grad_norm": 0.7158112525939941, + "learning_rate": 1.6579978064314688e-06, + "loss": 0.0616, + "num_input_tokens_seen": 47491168, + "step": 70460 + }, + { + "epoch": 1.7214716732220947, + "grad_norm": 0.05182803422212601, + "learning_rate": 1.6579335883142534e-06, + "loss": 0.0845, + "num_input_tokens_seen": 47494688, + "step": 70465 + }, + { + "epoch": 1.721593824053942, + "grad_norm": 3.6295721530914307, + "learning_rate": 1.6578693654123676e-06, + "loss": 0.0646, + "num_input_tokens_seen": 47498080, + "step": 70470 + }, + { + "epoch": 1.721715974885789, + "grad_norm": 0.44479724764823914, + "learning_rate": 1.6578051377262792e-06, + "loss": 0.085, + "num_input_tokens_seen": 47501088, + "step": 70475 + }, + { + "epoch": 1.7218381257176363, + "grad_norm": 120.40805053710938, + "learning_rate": 1.6577409052564545e-06, + "loss": 0.0571, + "num_input_tokens_seen": 47504480, + "step": 70480 + }, + { + "epoch": 1.7219602765494832, + "grad_norm": 0.19627024233341217, + "learning_rate": 1.6576766680033613e-06, + "loss": 0.0518, + "num_input_tokens_seen": 47508576, + "step": 70485 + }, + { + "epoch": 1.7220824273813304, + "grad_norm": 10.544469833374023, + "learning_rate": 1.6576124259674667e-06, + "loss": 0.0763, + "num_input_tokens_seen": 47511840, + "step": 70490 + }, + { + "epoch": 1.7222045782131776, + "grad_norm": 0.26938116550445557, + "learning_rate": 1.6575481791492374e-06, + "loss": 0.008, + "num_input_tokens_seen": 47515168, + "step": 70495 + }, + { + "epoch": 1.7223267290450248, + "grad_norm": 0.15729977190494537, + "learning_rate": 1.657483927549141e-06, + "loss": 0.1826, + "num_input_tokens_seen": 47518624, + "step": 70500 + }, + { + "epoch": 1.722448879876872, + "grad_norm": 0.2031847983598709, + "learning_rate": 1.6574196711676444e-06, + "loss": 0.0811, + "num_input_tokens_seen": 47521760, + "step": 70505 + }, + { + "epoch": 1.722571030708719, + "grad_norm": 14.030067443847656, + "learning_rate": 1.6573554100052154e-06, + "loss": 0.1212, + "num_input_tokens_seen": 47524896, + "step": 70510 + }, + { + "epoch": 1.7226931815405662, + "grad_norm": 0.7857367992401123, + "learning_rate": 1.657291144062321e-06, + "loss": 0.0577, + "num_input_tokens_seen": 47528736, + "step": 70515 + }, + { + "epoch": 1.7228153323724134, + "grad_norm": 0.08645419031381607, + "learning_rate": 1.6572268733394283e-06, + "loss": 0.0864, + "num_input_tokens_seen": 47532192, + "step": 70520 + }, + { + "epoch": 1.7229374832042605, + "grad_norm": 23.457870483398438, + "learning_rate": 1.6571625978370055e-06, + "loss": 0.1231, + "num_input_tokens_seen": 47535328, + "step": 70525 + }, + { + "epoch": 1.7230596340361077, + "grad_norm": 10.771952629089355, + "learning_rate": 1.657098317555519e-06, + "loss": 0.2692, + "num_input_tokens_seen": 47538400, + "step": 70530 + }, + { + "epoch": 1.723181784867955, + "grad_norm": 2.8880395889282227, + "learning_rate": 1.6570340324954374e-06, + "loss": 0.0742, + "num_input_tokens_seen": 47541792, + "step": 70535 + }, + { + "epoch": 1.7233039356998021, + "grad_norm": 0.0832524523139, + "learning_rate": 1.656969742657227e-06, + "loss": 0.0373, + "num_input_tokens_seen": 47545184, + "step": 70540 + }, + { + "epoch": 1.7234260865316493, + "grad_norm": 180.890869140625, + "learning_rate": 1.6569054480413564e-06, + "loss": 0.1764, + "num_input_tokens_seen": 47548192, + "step": 70545 + }, + { + "epoch": 1.7235482373634965, + "grad_norm": 0.5971752405166626, + "learning_rate": 1.6568411486482923e-06, + "loss": 0.051, + "num_input_tokens_seen": 47551456, + "step": 70550 + }, + { + "epoch": 1.7236703881953437, + "grad_norm": 0.18406574428081512, + "learning_rate": 1.656776844478503e-06, + "loss": 0.1628, + "num_input_tokens_seen": 47555296, + "step": 70555 + }, + { + "epoch": 1.7237925390271909, + "grad_norm": 0.5610669255256653, + "learning_rate": 1.6567125355324555e-06, + "loss": 0.0636, + "num_input_tokens_seen": 47558688, + "step": 70560 + }, + { + "epoch": 1.723914689859038, + "grad_norm": 48.64958190917969, + "learning_rate": 1.6566482218106184e-06, + "loss": 0.1408, + "num_input_tokens_seen": 47562080, + "step": 70565 + }, + { + "epoch": 1.7240368406908853, + "grad_norm": 81.54186248779297, + "learning_rate": 1.6565839033134584e-06, + "loss": 0.0796, + "num_input_tokens_seen": 47565216, + "step": 70570 + }, + { + "epoch": 1.7241589915227322, + "grad_norm": 18.318082809448242, + "learning_rate": 1.6565195800414434e-06, + "loss": 0.0569, + "num_input_tokens_seen": 47568544, + "step": 70575 + }, + { + "epoch": 1.7242811423545794, + "grad_norm": 0.33882206678390503, + "learning_rate": 1.656455251995042e-06, + "loss": 0.0396, + "num_input_tokens_seen": 47571808, + "step": 70580 + }, + { + "epoch": 1.7244032931864266, + "grad_norm": 0.1423991471529007, + "learning_rate": 1.6563909191747212e-06, + "loss": 0.0013, + "num_input_tokens_seen": 47575072, + "step": 70585 + }, + { + "epoch": 1.7245254440182738, + "grad_norm": 32.475624084472656, + "learning_rate": 1.656326581580949e-06, + "loss": 0.1305, + "num_input_tokens_seen": 47578592, + "step": 70590 + }, + { + "epoch": 1.724647594850121, + "grad_norm": 0.16308899223804474, + "learning_rate": 1.656262239214193e-06, + "loss": 0.0725, + "num_input_tokens_seen": 47581728, + "step": 70595 + }, + { + "epoch": 1.724769745681968, + "grad_norm": 0.12075145542621613, + "learning_rate": 1.6561978920749223e-06, + "loss": 0.0006, + "num_input_tokens_seen": 47585184, + "step": 70600 + }, + { + "epoch": 1.7248918965138151, + "grad_norm": 0.24487103521823883, + "learning_rate": 1.6561335401636036e-06, + "loss": 0.0488, + "num_input_tokens_seen": 47588384, + "step": 70605 + }, + { + "epoch": 1.7250140473456623, + "grad_norm": 0.39849162101745605, + "learning_rate": 1.6560691834807052e-06, + "loss": 0.032, + "num_input_tokens_seen": 47591840, + "step": 70610 + }, + { + "epoch": 1.7251361981775095, + "grad_norm": 0.7076511979103088, + "learning_rate": 1.6560048220266955e-06, + "loss": 0.0013, + "num_input_tokens_seen": 47595424, + "step": 70615 + }, + { + "epoch": 1.7252583490093567, + "grad_norm": 0.3278559446334839, + "learning_rate": 1.6559404558020424e-06, + "loss": 0.1268, + "num_input_tokens_seen": 47599264, + "step": 70620 + }, + { + "epoch": 1.725380499841204, + "grad_norm": 0.050880298018455505, + "learning_rate": 1.6558760848072135e-06, + "loss": 0.1171, + "num_input_tokens_seen": 47602144, + "step": 70625 + }, + { + "epoch": 1.725502650673051, + "grad_norm": 40.476478576660156, + "learning_rate": 1.6558117090426772e-06, + "loss": 0.2168, + "num_input_tokens_seen": 47605536, + "step": 70630 + }, + { + "epoch": 1.7256248015048983, + "grad_norm": 12.389641761779785, + "learning_rate": 1.6557473285089023e-06, + "loss": 0.0798, + "num_input_tokens_seen": 47608416, + "step": 70635 + }, + { + "epoch": 1.7257469523367455, + "grad_norm": 0.38528257608413696, + "learning_rate": 1.6556829432063562e-06, + "loss": 0.1222, + "num_input_tokens_seen": 47611936, + "step": 70640 + }, + { + "epoch": 1.7258691031685927, + "grad_norm": 0.23907269537448883, + "learning_rate": 1.6556185531355074e-06, + "loss": 0.064, + "num_input_tokens_seen": 47615328, + "step": 70645 + }, + { + "epoch": 1.7259912540004398, + "grad_norm": 0.34818893671035767, + "learning_rate": 1.655554158296824e-06, + "loss": 0.0015, + "num_input_tokens_seen": 47619040, + "step": 70650 + }, + { + "epoch": 1.726113404832287, + "grad_norm": 14.175068855285645, + "learning_rate": 1.6554897586907746e-06, + "loss": 0.1151, + "num_input_tokens_seen": 47622368, + "step": 70655 + }, + { + "epoch": 1.7262355556641342, + "grad_norm": 0.10468627512454987, + "learning_rate": 1.6554253543178272e-06, + "loss": 0.029, + "num_input_tokens_seen": 47625952, + "step": 70660 + }, + { + "epoch": 1.7263577064959812, + "grad_norm": 24.79138946533203, + "learning_rate": 1.6553609451784505e-06, + "loss": 0.1943, + "num_input_tokens_seen": 47629088, + "step": 70665 + }, + { + "epoch": 1.7264798573278284, + "grad_norm": 113.18524169921875, + "learning_rate": 1.655296531273113e-06, + "loss": 0.0506, + "num_input_tokens_seen": 47632416, + "step": 70670 + }, + { + "epoch": 1.7266020081596756, + "grad_norm": 41.06086730957031, + "learning_rate": 1.6552321126022824e-06, + "loss": 0.0964, + "num_input_tokens_seen": 47635936, + "step": 70675 + }, + { + "epoch": 1.7267241589915228, + "grad_norm": 0.1683439463376999, + "learning_rate": 1.6551676891664278e-06, + "loss": 0.1082, + "num_input_tokens_seen": 47639264, + "step": 70680 + }, + { + "epoch": 1.72684630982337, + "grad_norm": 0.3442506492137909, + "learning_rate": 1.6551032609660174e-06, + "loss": 0.1175, + "num_input_tokens_seen": 47642208, + "step": 70685 + }, + { + "epoch": 1.726968460655217, + "grad_norm": 19.647884368896484, + "learning_rate": 1.6550388280015199e-06, + "loss": 0.1192, + "num_input_tokens_seen": 47645920, + "step": 70690 + }, + { + "epoch": 1.727090611487064, + "grad_norm": 20.30986976623535, + "learning_rate": 1.654974390273404e-06, + "loss": 0.0551, + "num_input_tokens_seen": 47649312, + "step": 70695 + }, + { + "epoch": 1.7272127623189113, + "grad_norm": 50.00175476074219, + "learning_rate": 1.6549099477821384e-06, + "loss": 0.0847, + "num_input_tokens_seen": 47652448, + "step": 70700 + }, + { + "epoch": 1.7273349131507585, + "grad_norm": 0.24548085033893585, + "learning_rate": 1.6548455005281912e-06, + "loss": 0.1452, + "num_input_tokens_seen": 47655840, + "step": 70705 + }, + { + "epoch": 1.7274570639826057, + "grad_norm": 0.5623430013656616, + "learning_rate": 1.6547810485120315e-06, + "loss": 0.0222, + "num_input_tokens_seen": 47658848, + "step": 70710 + }, + { + "epoch": 1.7275792148144529, + "grad_norm": 0.13892212510108948, + "learning_rate": 1.6547165917341274e-06, + "loss": 0.0349, + "num_input_tokens_seen": 47662048, + "step": 70715 + }, + { + "epoch": 1.7277013656463, + "grad_norm": 0.058080315589904785, + "learning_rate": 1.6546521301949489e-06, + "loss": 0.1226, + "num_input_tokens_seen": 47665120, + "step": 70720 + }, + { + "epoch": 1.7278235164781472, + "grad_norm": 12.511260986328125, + "learning_rate": 1.6545876638949636e-06, + "loss": 0.134, + "num_input_tokens_seen": 47668576, + "step": 70725 + }, + { + "epoch": 1.7279456673099944, + "grad_norm": 10.45933723449707, + "learning_rate": 1.6545231928346411e-06, + "loss": 0.1153, + "num_input_tokens_seen": 47672224, + "step": 70730 + }, + { + "epoch": 1.7280678181418416, + "grad_norm": 0.14672933518886566, + "learning_rate": 1.6544587170144496e-06, + "loss": 0.0368, + "num_input_tokens_seen": 47675552, + "step": 70735 + }, + { + "epoch": 1.7281899689736888, + "grad_norm": 0.03236455097794533, + "learning_rate": 1.6543942364348583e-06, + "loss": 0.0445, + "num_input_tokens_seen": 47679520, + "step": 70740 + }, + { + "epoch": 1.728312119805536, + "grad_norm": 0.19849258661270142, + "learning_rate": 1.6543297510963362e-06, + "loss": 0.0499, + "num_input_tokens_seen": 47682592, + "step": 70745 + }, + { + "epoch": 1.7284342706373832, + "grad_norm": 0.2941298186779022, + "learning_rate": 1.6542652609993519e-06, + "loss": 0.0015, + "num_input_tokens_seen": 47685600, + "step": 70750 + }, + { + "epoch": 1.7285564214692302, + "grad_norm": 0.624882698059082, + "learning_rate": 1.6542007661443749e-06, + "loss": 0.1076, + "num_input_tokens_seen": 47688800, + "step": 70755 + }, + { + "epoch": 1.7286785723010774, + "grad_norm": 66.73082733154297, + "learning_rate": 1.654136266531874e-06, + "loss": 0.1117, + "num_input_tokens_seen": 47692448, + "step": 70760 + }, + { + "epoch": 1.7288007231329245, + "grad_norm": 19.269960403442383, + "learning_rate": 1.6540717621623182e-06, + "loss": 0.2228, + "num_input_tokens_seen": 47695712, + "step": 70765 + }, + { + "epoch": 1.7289228739647717, + "grad_norm": 0.282610684633255, + "learning_rate": 1.6540072530361767e-06, + "loss": 0.0448, + "num_input_tokens_seen": 47698784, + "step": 70770 + }, + { + "epoch": 1.7290450247966187, + "grad_norm": 0.4029996991157532, + "learning_rate": 1.6539427391539183e-06, + "loss": 0.0365, + "num_input_tokens_seen": 47701600, + "step": 70775 + }, + { + "epoch": 1.729167175628466, + "grad_norm": 144.61106872558594, + "learning_rate": 1.6538782205160124e-06, + "loss": 0.0411, + "num_input_tokens_seen": 47704928, + "step": 70780 + }, + { + "epoch": 1.729289326460313, + "grad_norm": 0.9398413300514221, + "learning_rate": 1.6538136971229284e-06, + "loss": 0.0804, + "num_input_tokens_seen": 47708128, + "step": 70785 + }, + { + "epoch": 1.7294114772921603, + "grad_norm": 0.23816479742527008, + "learning_rate": 1.6537491689751352e-06, + "loss": 0.0767, + "num_input_tokens_seen": 47711456, + "step": 70790 + }, + { + "epoch": 1.7295336281240075, + "grad_norm": 0.2797488868236542, + "learning_rate": 1.6536846360731022e-06, + "loss": 0.0489, + "num_input_tokens_seen": 47714784, + "step": 70795 + }, + { + "epoch": 1.7296557789558547, + "grad_norm": 14.246989250183105, + "learning_rate": 1.653620098417299e-06, + "loss": 0.1632, + "num_input_tokens_seen": 47717856, + "step": 70800 + }, + { + "epoch": 1.7297779297877018, + "grad_norm": 20.030927658081055, + "learning_rate": 1.6535555560081945e-06, + "loss": 0.0536, + "num_input_tokens_seen": 47721440, + "step": 70805 + }, + { + "epoch": 1.729900080619549, + "grad_norm": 0.24360552430152893, + "learning_rate": 1.653491008846258e-06, + "loss": 0.0379, + "num_input_tokens_seen": 47724832, + "step": 70810 + }, + { + "epoch": 1.7300222314513962, + "grad_norm": 0.24199669063091278, + "learning_rate": 1.6534264569319594e-06, + "loss": 0.1593, + "num_input_tokens_seen": 47727968, + "step": 70815 + }, + { + "epoch": 1.7301443822832434, + "grad_norm": 0.16798633337020874, + "learning_rate": 1.6533619002657676e-06, + "loss": 0.121, + "num_input_tokens_seen": 47731360, + "step": 70820 + }, + { + "epoch": 1.7302665331150906, + "grad_norm": 143.9950714111328, + "learning_rate": 1.6532973388481523e-06, + "loss": 0.0291, + "num_input_tokens_seen": 47735200, + "step": 70825 + }, + { + "epoch": 1.7303886839469378, + "grad_norm": 1.209952712059021, + "learning_rate": 1.6532327726795834e-06, + "loss": 0.088, + "num_input_tokens_seen": 47738464, + "step": 70830 + }, + { + "epoch": 1.730510834778785, + "grad_norm": 63.46141052246094, + "learning_rate": 1.65316820176053e-06, + "loss": 0.2023, + "num_input_tokens_seen": 47741664, + "step": 70835 + }, + { + "epoch": 1.7306329856106322, + "grad_norm": 16.589221954345703, + "learning_rate": 1.6531036260914615e-06, + "loss": 0.0257, + "num_input_tokens_seen": 47745632, + "step": 70840 + }, + { + "epoch": 1.7307551364424791, + "grad_norm": 0.10137242823839188, + "learning_rate": 1.6530390456728478e-06, + "loss": 0.0009, + "num_input_tokens_seen": 47748832, + "step": 70845 + }, + { + "epoch": 1.7308772872743263, + "grad_norm": 2.2535622119903564, + "learning_rate": 1.6529744605051586e-06, + "loss": 0.1395, + "num_input_tokens_seen": 47752608, + "step": 70850 + }, + { + "epoch": 1.7309994381061735, + "grad_norm": 0.13834618031978607, + "learning_rate": 1.6529098705888636e-06, + "loss": 0.0413, + "num_input_tokens_seen": 47755936, + "step": 70855 + }, + { + "epoch": 1.7311215889380207, + "grad_norm": 194.70693969726562, + "learning_rate": 1.6528452759244322e-06, + "loss": 0.1253, + "num_input_tokens_seen": 47759200, + "step": 70860 + }, + { + "epoch": 1.7312437397698677, + "grad_norm": 0.24020245671272278, + "learning_rate": 1.6527806765123345e-06, + "loss": 0.0245, + "num_input_tokens_seen": 47762848, + "step": 70865 + }, + { + "epoch": 1.7313658906017149, + "grad_norm": 0.7342332601547241, + "learning_rate": 1.6527160723530403e-06, + "loss": 0.0019, + "num_input_tokens_seen": 47766048, + "step": 70870 + }, + { + "epoch": 1.731488041433562, + "grad_norm": 0.24523884057998657, + "learning_rate": 1.6526514634470188e-06, + "loss": 0.0652, + "num_input_tokens_seen": 47769696, + "step": 70875 + }, + { + "epoch": 1.7316101922654092, + "grad_norm": 0.05468997359275818, + "learning_rate": 1.6525868497947406e-06, + "loss": 0.0015, + "num_input_tokens_seen": 47772896, + "step": 70880 + }, + { + "epoch": 1.7317323430972564, + "grad_norm": 163.22579956054688, + "learning_rate": 1.6525222313966754e-06, + "loss": 0.0574, + "num_input_tokens_seen": 47776096, + "step": 70885 + }, + { + "epoch": 1.7318544939291036, + "grad_norm": 227.71888732910156, + "learning_rate": 1.6524576082532927e-06, + "loss": 0.1191, + "num_input_tokens_seen": 47779360, + "step": 70890 + }, + { + "epoch": 1.7319766447609508, + "grad_norm": 39.0536003112793, + "learning_rate": 1.6523929803650632e-06, + "loss": 0.1471, + "num_input_tokens_seen": 47783008, + "step": 70895 + }, + { + "epoch": 1.732098795592798, + "grad_norm": 2.195404291152954, + "learning_rate": 1.6523283477324561e-06, + "loss": 0.0375, + "num_input_tokens_seen": 47786080, + "step": 70900 + }, + { + "epoch": 1.7322209464246452, + "grad_norm": 44.34257507324219, + "learning_rate": 1.652263710355942e-06, + "loss": 0.1278, + "num_input_tokens_seen": 47789088, + "step": 70905 + }, + { + "epoch": 1.7323430972564924, + "grad_norm": 0.3584393560886383, + "learning_rate": 1.6521990682359906e-06, + "loss": 0.1328, + "num_input_tokens_seen": 47792992, + "step": 70910 + }, + { + "epoch": 1.7324652480883396, + "grad_norm": 0.08577506244182587, + "learning_rate": 1.6521344213730723e-06, + "loss": 0.089, + "num_input_tokens_seen": 47796512, + "step": 70915 + }, + { + "epoch": 1.7325873989201868, + "grad_norm": 29.132362365722656, + "learning_rate": 1.652069769767657e-06, + "loss": 0.091, + "num_input_tokens_seen": 47800032, + "step": 70920 + }, + { + "epoch": 1.732709549752034, + "grad_norm": 36.29575729370117, + "learning_rate": 1.6520051134202154e-06, + "loss": 0.0469, + "num_input_tokens_seen": 47803872, + "step": 70925 + }, + { + "epoch": 1.7328317005838811, + "grad_norm": 0.9603968262672424, + "learning_rate": 1.6519404523312166e-06, + "loss": 0.1223, + "num_input_tokens_seen": 47807584, + "step": 70930 + }, + { + "epoch": 1.732953851415728, + "grad_norm": 0.1118437722325325, + "learning_rate": 1.6518757865011316e-06, + "loss": 0.0338, + "num_input_tokens_seen": 47811040, + "step": 70935 + }, + { + "epoch": 1.7330760022475753, + "grad_norm": 103.07813262939453, + "learning_rate": 1.651811115930431e-06, + "loss": 0.0911, + "num_input_tokens_seen": 47814048, + "step": 70940 + }, + { + "epoch": 1.7331981530794225, + "grad_norm": 10.315584182739258, + "learning_rate": 1.651746440619584e-06, + "loss": 0.1733, + "num_input_tokens_seen": 47817440, + "step": 70945 + }, + { + "epoch": 1.7333203039112697, + "grad_norm": 12.482510566711426, + "learning_rate": 1.651681760569062e-06, + "loss": 0.0495, + "num_input_tokens_seen": 47820512, + "step": 70950 + }, + { + "epoch": 1.7334424547431166, + "grad_norm": 11.532674789428711, + "learning_rate": 1.651617075779335e-06, + "loss": 0.0826, + "num_input_tokens_seen": 47823648, + "step": 70955 + }, + { + "epoch": 1.7335646055749638, + "grad_norm": 0.13834688067436218, + "learning_rate": 1.651552386250873e-06, + "loss": 0.0421, + "num_input_tokens_seen": 47827168, + "step": 70960 + }, + { + "epoch": 1.733686756406811, + "grad_norm": 0.4201122522354126, + "learning_rate": 1.6514876919841472e-06, + "loss": 0.0399, + "num_input_tokens_seen": 47830560, + "step": 70965 + }, + { + "epoch": 1.7338089072386582, + "grad_norm": 0.19618572294712067, + "learning_rate": 1.6514229929796274e-06, + "loss": 0.1524, + "num_input_tokens_seen": 47833696, + "step": 70970 + }, + { + "epoch": 1.7339310580705054, + "grad_norm": 0.6358382701873779, + "learning_rate": 1.6513582892377846e-06, + "loss": 0.0509, + "num_input_tokens_seen": 47836960, + "step": 70975 + }, + { + "epoch": 1.7340532089023526, + "grad_norm": 0.05346240848302841, + "learning_rate": 1.651293580759089e-06, + "loss": 0.0013, + "num_input_tokens_seen": 47840032, + "step": 70980 + }, + { + "epoch": 1.7341753597341998, + "grad_norm": 8.918015480041504, + "learning_rate": 1.6512288675440113e-06, + "loss": 0.0849, + "num_input_tokens_seen": 47842912, + "step": 70985 + }, + { + "epoch": 1.734297510566047, + "grad_norm": 1.396060824394226, + "learning_rate": 1.6511641495930224e-06, + "loss": 0.0021, + "num_input_tokens_seen": 47846624, + "step": 70990 + }, + { + "epoch": 1.7344196613978942, + "grad_norm": 13.514362335205078, + "learning_rate": 1.651099426906592e-06, + "loss": 0.1011, + "num_input_tokens_seen": 47849632, + "step": 70995 + }, + { + "epoch": 1.7345418122297414, + "grad_norm": 10.2088041305542, + "learning_rate": 1.651034699485192e-06, + "loss": 0.2572, + "num_input_tokens_seen": 47853792, + "step": 71000 + }, + { + "epoch": 1.7346639630615885, + "grad_norm": 30.044353485107422, + "learning_rate": 1.6509699673292925e-06, + "loss": 0.1634, + "num_input_tokens_seen": 47857248, + "step": 71005 + }, + { + "epoch": 1.7347861138934357, + "grad_norm": 19.223125457763672, + "learning_rate": 1.6509052304393643e-06, + "loss": 0.2716, + "num_input_tokens_seen": 47860448, + "step": 71010 + }, + { + "epoch": 1.734908264725283, + "grad_norm": 0.053986601531505585, + "learning_rate": 1.650840488815878e-06, + "loss": 0.0248, + "num_input_tokens_seen": 47864672, + "step": 71015 + }, + { + "epoch": 1.73503041555713, + "grad_norm": 72.20088195800781, + "learning_rate": 1.6507757424593047e-06, + "loss": 0.0304, + "num_input_tokens_seen": 47868448, + "step": 71020 + }, + { + "epoch": 1.735152566388977, + "grad_norm": 0.3802885413169861, + "learning_rate": 1.6507109913701154e-06, + "loss": 0.187, + "num_input_tokens_seen": 47872288, + "step": 71025 + }, + { + "epoch": 1.7352747172208243, + "grad_norm": 3.7849714756011963, + "learning_rate": 1.6506462355487804e-06, + "loss": 0.038, + "num_input_tokens_seen": 47875744, + "step": 71030 + }, + { + "epoch": 1.7353968680526715, + "grad_norm": 1.352379322052002, + "learning_rate": 1.650581474995771e-06, + "loss": 0.0689, + "num_input_tokens_seen": 47879392, + "step": 71035 + }, + { + "epoch": 1.7355190188845186, + "grad_norm": 18.819459915161133, + "learning_rate": 1.6505167097115581e-06, + "loss": 0.0723, + "num_input_tokens_seen": 47882528, + "step": 71040 + }, + { + "epoch": 1.7356411697163656, + "grad_norm": 15.091285705566406, + "learning_rate": 1.650451939696613e-06, + "loss": 0.0519, + "num_input_tokens_seen": 47886432, + "step": 71045 + }, + { + "epoch": 1.7357633205482128, + "grad_norm": 0.20803719758987427, + "learning_rate": 1.6503871649514064e-06, + "loss": 0.0505, + "num_input_tokens_seen": 47889504, + "step": 71050 + }, + { + "epoch": 1.73588547138006, + "grad_norm": 153.0974578857422, + "learning_rate": 1.6503223854764093e-06, + "loss": 0.0962, + "num_input_tokens_seen": 47892768, + "step": 71055 + }, + { + "epoch": 1.7360076222119072, + "grad_norm": 68.89737701416016, + "learning_rate": 1.6502576012720928e-06, + "loss": 0.057, + "num_input_tokens_seen": 47896480, + "step": 71060 + }, + { + "epoch": 1.7361297730437544, + "grad_norm": 112.9688949584961, + "learning_rate": 1.6501928123389282e-06, + "loss": 0.0938, + "num_input_tokens_seen": 47900256, + "step": 71065 + }, + { + "epoch": 1.7362519238756016, + "grad_norm": 0.1029193177819252, + "learning_rate": 1.6501280186773867e-06, + "loss": 0.079, + "num_input_tokens_seen": 47903648, + "step": 71070 + }, + { + "epoch": 1.7363740747074488, + "grad_norm": 35.86996841430664, + "learning_rate": 1.6500632202879392e-06, + "loss": 0.1451, + "num_input_tokens_seen": 47906912, + "step": 71075 + }, + { + "epoch": 1.736496225539296, + "grad_norm": 93.33722686767578, + "learning_rate": 1.6499984171710572e-06, + "loss": 0.0872, + "num_input_tokens_seen": 47910560, + "step": 71080 + }, + { + "epoch": 1.7366183763711431, + "grad_norm": 0.13678595423698425, + "learning_rate": 1.6499336093272121e-06, + "loss": 0.0756, + "num_input_tokens_seen": 47913632, + "step": 71085 + }, + { + "epoch": 1.7367405272029903, + "grad_norm": 0.1121816635131836, + "learning_rate": 1.6498687967568745e-06, + "loss": 0.0782, + "num_input_tokens_seen": 47917152, + "step": 71090 + }, + { + "epoch": 1.7368626780348375, + "grad_norm": 0.06260692328214645, + "learning_rate": 1.6498039794605166e-06, + "loss": 0.0485, + "num_input_tokens_seen": 47920288, + "step": 71095 + }, + { + "epoch": 1.7369848288666847, + "grad_norm": 11.609285354614258, + "learning_rate": 1.649739157438609e-06, + "loss": 0.0867, + "num_input_tokens_seen": 47923872, + "step": 71100 + }, + { + "epoch": 1.737106979698532, + "grad_norm": 14.361102104187012, + "learning_rate": 1.649674330691624e-06, + "loss": 0.036, + "num_input_tokens_seen": 47927200, + "step": 71105 + }, + { + "epoch": 1.7372291305303789, + "grad_norm": 19.945722579956055, + "learning_rate": 1.6496094992200322e-06, + "loss": 0.0798, + "num_input_tokens_seen": 47930464, + "step": 71110 + }, + { + "epoch": 1.737351281362226, + "grad_norm": 0.09397384524345398, + "learning_rate": 1.6495446630243056e-06, + "loss": 0.0412, + "num_input_tokens_seen": 47933984, + "step": 71115 + }, + { + "epoch": 1.7374734321940732, + "grad_norm": 3.3554627895355225, + "learning_rate": 1.649479822104915e-06, + "loss": 0.059, + "num_input_tokens_seen": 47937184, + "step": 71120 + }, + { + "epoch": 1.7375955830259204, + "grad_norm": 14.1300630569458, + "learning_rate": 1.649414976462333e-06, + "loss": 0.0557, + "num_input_tokens_seen": 47940512, + "step": 71125 + }, + { + "epoch": 1.7377177338577676, + "grad_norm": 0.28302502632141113, + "learning_rate": 1.6493501260970306e-06, + "loss": 0.0509, + "num_input_tokens_seen": 47944096, + "step": 71130 + }, + { + "epoch": 1.7378398846896146, + "grad_norm": 0.15091900527477264, + "learning_rate": 1.6492852710094792e-06, + "loss": 0.0978, + "num_input_tokens_seen": 47947872, + "step": 71135 + }, + { + "epoch": 1.7379620355214618, + "grad_norm": 123.89974975585938, + "learning_rate": 1.649220411200151e-06, + "loss": 0.0629, + "num_input_tokens_seen": 47951200, + "step": 71140 + }, + { + "epoch": 1.738084186353309, + "grad_norm": 0.14565247297286987, + "learning_rate": 1.649155546669517e-06, + "loss": 0.0905, + "num_input_tokens_seen": 47954400, + "step": 71145 + }, + { + "epoch": 1.7382063371851562, + "grad_norm": 26.535411834716797, + "learning_rate": 1.6490906774180493e-06, + "loss": 0.12, + "num_input_tokens_seen": 47957664, + "step": 71150 + }, + { + "epoch": 1.7383284880170033, + "grad_norm": 64.44357299804688, + "learning_rate": 1.6490258034462196e-06, + "loss": 0.0773, + "num_input_tokens_seen": 47960928, + "step": 71155 + }, + { + "epoch": 1.7384506388488505, + "grad_norm": 1.9539140462875366, + "learning_rate": 1.6489609247544998e-06, + "loss": 0.0025, + "num_input_tokens_seen": 47964512, + "step": 71160 + }, + { + "epoch": 1.7385727896806977, + "grad_norm": 0.23583951592445374, + "learning_rate": 1.6488960413433617e-06, + "loss": 0.1051, + "num_input_tokens_seen": 47967456, + "step": 71165 + }, + { + "epoch": 1.738694940512545, + "grad_norm": 53.65757751464844, + "learning_rate": 1.6488311532132768e-06, + "loss": 0.0377, + "num_input_tokens_seen": 47970336, + "step": 71170 + }, + { + "epoch": 1.738817091344392, + "grad_norm": 88.5074691772461, + "learning_rate": 1.6487662603647174e-06, + "loss": 0.1525, + "num_input_tokens_seen": 47973984, + "step": 71175 + }, + { + "epoch": 1.7389392421762393, + "grad_norm": 27.1817569732666, + "learning_rate": 1.6487013627981554e-06, + "loss": 0.045, + "num_input_tokens_seen": 47977248, + "step": 71180 + }, + { + "epoch": 1.7390613930080865, + "grad_norm": 26.244455337524414, + "learning_rate": 1.648636460514062e-06, + "loss": 0.1077, + "num_input_tokens_seen": 47980384, + "step": 71185 + }, + { + "epoch": 1.7391835438399337, + "grad_norm": 139.1673583984375, + "learning_rate": 1.6485715535129107e-06, + "loss": 0.0564, + "num_input_tokens_seen": 47983584, + "step": 71190 + }, + { + "epoch": 1.7393056946717809, + "grad_norm": 0.4898732006549835, + "learning_rate": 1.648506641795172e-06, + "loss": 0.0383, + "num_input_tokens_seen": 47986656, + "step": 71195 + }, + { + "epoch": 1.7394278455036278, + "grad_norm": 0.0008758667972870171, + "learning_rate": 1.6484417253613184e-06, + "loss": 0.0527, + "num_input_tokens_seen": 47990240, + "step": 71200 + }, + { + "epoch": 1.739549996335475, + "grad_norm": 3.5209832191467285, + "learning_rate": 1.6483768042118227e-06, + "loss": 0.1255, + "num_input_tokens_seen": 47993568, + "step": 71205 + }, + { + "epoch": 1.7396721471673222, + "grad_norm": 12.4790678024292, + "learning_rate": 1.6483118783471563e-06, + "loss": 0.1047, + "num_input_tokens_seen": 47996832, + "step": 71210 + }, + { + "epoch": 1.7397942979991694, + "grad_norm": 80.48369598388672, + "learning_rate": 1.6482469477677916e-06, + "loss": 0.0045, + "num_input_tokens_seen": 48000224, + "step": 71215 + }, + { + "epoch": 1.7399164488310166, + "grad_norm": 33.51426315307617, + "learning_rate": 1.6481820124742005e-06, + "loss": 0.1856, + "num_input_tokens_seen": 48003744, + "step": 71220 + }, + { + "epoch": 1.7400385996628636, + "grad_norm": 29.704303741455078, + "learning_rate": 1.6481170724668556e-06, + "loss": 0.0031, + "num_input_tokens_seen": 48006880, + "step": 71225 + }, + { + "epoch": 1.7401607504947108, + "grad_norm": 10.239999771118164, + "learning_rate": 1.648052127746229e-06, + "loss": 0.0432, + "num_input_tokens_seen": 48010144, + "step": 71230 + }, + { + "epoch": 1.740282901326558, + "grad_norm": 0.9402587413787842, + "learning_rate": 1.6479871783127932e-06, + "loss": 0.0296, + "num_input_tokens_seen": 48013216, + "step": 71235 + }, + { + "epoch": 1.7404050521584051, + "grad_norm": 55.33900833129883, + "learning_rate": 1.6479222241670204e-06, + "loss": 0.1239, + "num_input_tokens_seen": 48016544, + "step": 71240 + }, + { + "epoch": 1.7405272029902523, + "grad_norm": 0.043433524668216705, + "learning_rate": 1.6478572653093826e-06, + "loss": 0.0498, + "num_input_tokens_seen": 48019744, + "step": 71245 + }, + { + "epoch": 1.7406493538220995, + "grad_norm": 0.17277486622333527, + "learning_rate": 1.6477923017403526e-06, + "loss": 0.0319, + "num_input_tokens_seen": 48023200, + "step": 71250 + }, + { + "epoch": 1.7407715046539467, + "grad_norm": 37.4304313659668, + "learning_rate": 1.647727333460403e-06, + "loss": 0.1408, + "num_input_tokens_seen": 48027104, + "step": 71255 + }, + { + "epoch": 1.740893655485794, + "grad_norm": 0.046346426010131836, + "learning_rate": 1.6476623604700058e-06, + "loss": 0.0516, + "num_input_tokens_seen": 48031072, + "step": 71260 + }, + { + "epoch": 1.741015806317641, + "grad_norm": 0.047822318971157074, + "learning_rate": 1.6475973827696336e-06, + "loss": 0.0052, + "num_input_tokens_seen": 48034144, + "step": 71265 + }, + { + "epoch": 1.7411379571494883, + "grad_norm": 0.5275576114654541, + "learning_rate": 1.6475324003597591e-06, + "loss": 0.0012, + "num_input_tokens_seen": 48037216, + "step": 71270 + }, + { + "epoch": 1.7412601079813355, + "grad_norm": 34.40401840209961, + "learning_rate": 1.6474674132408548e-06, + "loss": 0.1353, + "num_input_tokens_seen": 48040416, + "step": 71275 + }, + { + "epoch": 1.7413822588131826, + "grad_norm": 22.45237159729004, + "learning_rate": 1.6474024214133935e-06, + "loss": 0.0746, + "num_input_tokens_seen": 48044064, + "step": 71280 + }, + { + "epoch": 1.7415044096450298, + "grad_norm": 28.317441940307617, + "learning_rate": 1.6473374248778475e-06, + "loss": 0.145, + "num_input_tokens_seen": 48047520, + "step": 71285 + }, + { + "epoch": 1.7416265604768768, + "grad_norm": 0.4682207703590393, + "learning_rate": 1.6472724236346897e-06, + "loss": 0.1019, + "num_input_tokens_seen": 48050848, + "step": 71290 + }, + { + "epoch": 1.741748711308724, + "grad_norm": 0.2123502492904663, + "learning_rate": 1.647207417684393e-06, + "loss": 0.2008, + "num_input_tokens_seen": 48054560, + "step": 71295 + }, + { + "epoch": 1.7418708621405712, + "grad_norm": 0.3460741937160492, + "learning_rate": 1.6471424070274295e-06, + "loss": 0.0145, + "num_input_tokens_seen": 48058144, + "step": 71300 + }, + { + "epoch": 1.7419930129724184, + "grad_norm": 0.04934366047382355, + "learning_rate": 1.6470773916642726e-06, + "loss": 0.0253, + "num_input_tokens_seen": 48061088, + "step": 71305 + }, + { + "epoch": 1.7421151638042653, + "grad_norm": 54.11192321777344, + "learning_rate": 1.6470123715953944e-06, + "loss": 0.1064, + "num_input_tokens_seen": 48064224, + "step": 71310 + }, + { + "epoch": 1.7422373146361125, + "grad_norm": 0.3336058259010315, + "learning_rate": 1.6469473468212688e-06, + "loss": 0.0435, + "num_input_tokens_seen": 48067296, + "step": 71315 + }, + { + "epoch": 1.7423594654679597, + "grad_norm": 0.8524634838104248, + "learning_rate": 1.646882317342368e-06, + "loss": 0.0313, + "num_input_tokens_seen": 48071712, + "step": 71320 + }, + { + "epoch": 1.742481616299807, + "grad_norm": 0.20961619913578033, + "learning_rate": 1.6468172831591647e-06, + "loss": 0.0538, + "num_input_tokens_seen": 48075104, + "step": 71325 + }, + { + "epoch": 1.742603767131654, + "grad_norm": 0.06928694993257523, + "learning_rate": 1.6467522442721325e-06, + "loss": 0.0011, + "num_input_tokens_seen": 48078688, + "step": 71330 + }, + { + "epoch": 1.7427259179635013, + "grad_norm": 30.175479888916016, + "learning_rate": 1.6466872006817436e-06, + "loss": 0.152, + "num_input_tokens_seen": 48082272, + "step": 71335 + }, + { + "epoch": 1.7428480687953485, + "grad_norm": 0.060041431337594986, + "learning_rate": 1.6466221523884715e-06, + "loss": 0.0009, + "num_input_tokens_seen": 48085856, + "step": 71340 + }, + { + "epoch": 1.7429702196271957, + "grad_norm": 0.18987978994846344, + "learning_rate": 1.6465570993927895e-06, + "loss": 0.1235, + "num_input_tokens_seen": 48089312, + "step": 71345 + }, + { + "epoch": 1.7430923704590429, + "grad_norm": 12.799039840698242, + "learning_rate": 1.6464920416951702e-06, + "loss": 0.0428, + "num_input_tokens_seen": 48092704, + "step": 71350 + }, + { + "epoch": 1.74321452129089, + "grad_norm": 0.3737916648387909, + "learning_rate": 1.6464269792960867e-06, + "loss": 0.0574, + "num_input_tokens_seen": 48096224, + "step": 71355 + }, + { + "epoch": 1.7433366721227372, + "grad_norm": 0.11154882609844208, + "learning_rate": 1.6463619121960127e-06, + "loss": 0.0495, + "num_input_tokens_seen": 48099424, + "step": 71360 + }, + { + "epoch": 1.7434588229545844, + "grad_norm": 0.8025643229484558, + "learning_rate": 1.646296840395421e-06, + "loss": 0.0956, + "num_input_tokens_seen": 48102560, + "step": 71365 + }, + { + "epoch": 1.7435809737864316, + "grad_norm": 0.3329102098941803, + "learning_rate": 1.6462317638947846e-06, + "loss": 0.1285, + "num_input_tokens_seen": 48105824, + "step": 71370 + }, + { + "epoch": 1.7437031246182788, + "grad_norm": 0.03304976969957352, + "learning_rate": 1.646166682694577e-06, + "loss": 0.0709, + "num_input_tokens_seen": 48109472, + "step": 71375 + }, + { + "epoch": 1.7438252754501258, + "grad_norm": 0.25978147983551025, + "learning_rate": 1.6461015967952717e-06, + "loss": 0.08, + "num_input_tokens_seen": 48112608, + "step": 71380 + }, + { + "epoch": 1.743947426281973, + "grad_norm": 0.16290909051895142, + "learning_rate": 1.6460365061973418e-06, + "loss": 0.0549, + "num_input_tokens_seen": 48115936, + "step": 71385 + }, + { + "epoch": 1.7440695771138202, + "grad_norm": 0.23594853281974792, + "learning_rate": 1.6459714109012603e-06, + "loss": 0.0378, + "num_input_tokens_seen": 48119968, + "step": 71390 + }, + { + "epoch": 1.7441917279456673, + "grad_norm": 24.003435134887695, + "learning_rate": 1.6459063109075014e-06, + "loss": 0.1648, + "num_input_tokens_seen": 48123232, + "step": 71395 + }, + { + "epoch": 1.7443138787775143, + "grad_norm": 9.70009708404541, + "learning_rate": 1.6458412062165378e-06, + "loss": 0.1554, + "num_input_tokens_seen": 48126816, + "step": 71400 + }, + { + "epoch": 1.7444360296093615, + "grad_norm": 35.708526611328125, + "learning_rate": 1.6457760968288432e-06, + "loss": 0.1034, + "num_input_tokens_seen": 48129888, + "step": 71405 + }, + { + "epoch": 1.7445581804412087, + "grad_norm": 9.53857135772705, + "learning_rate": 1.6457109827448914e-06, + "loss": 0.1703, + "num_input_tokens_seen": 48133152, + "step": 71410 + }, + { + "epoch": 1.7446803312730559, + "grad_norm": 25.09573745727539, + "learning_rate": 1.6456458639651553e-06, + "loss": 0.1, + "num_input_tokens_seen": 48136480, + "step": 71415 + }, + { + "epoch": 1.744802482104903, + "grad_norm": 0.6554991602897644, + "learning_rate": 1.6455807404901093e-06, + "loss": 0.0925, + "num_input_tokens_seen": 48139808, + "step": 71420 + }, + { + "epoch": 1.7449246329367503, + "grad_norm": 0.8519078493118286, + "learning_rate": 1.6455156123202264e-06, + "loss": 0.0316, + "num_input_tokens_seen": 48142688, + "step": 71425 + }, + { + "epoch": 1.7450467837685975, + "grad_norm": 0.08944659680128098, + "learning_rate": 1.64545047945598e-06, + "loss": 0.0916, + "num_input_tokens_seen": 48146144, + "step": 71430 + }, + { + "epoch": 1.7451689346004446, + "grad_norm": 0.468730628490448, + "learning_rate": 1.6453853418978444e-06, + "loss": 0.0066, + "num_input_tokens_seen": 48149472, + "step": 71435 + }, + { + "epoch": 1.7452910854322918, + "grad_norm": 0.04151911288499832, + "learning_rate": 1.6453201996462928e-06, + "loss": 0.1033, + "num_input_tokens_seen": 48152608, + "step": 71440 + }, + { + "epoch": 1.745413236264139, + "grad_norm": 0.18563562631607056, + "learning_rate": 1.6452550527017994e-06, + "loss": 0.0029, + "num_input_tokens_seen": 48155552, + "step": 71445 + }, + { + "epoch": 1.7455353870959862, + "grad_norm": 9.495387077331543, + "learning_rate": 1.6451899010648377e-06, + "loss": 0.0464, + "num_input_tokens_seen": 48158752, + "step": 71450 + }, + { + "epoch": 1.7456575379278334, + "grad_norm": 48.27766036987305, + "learning_rate": 1.6451247447358812e-06, + "loss": 0.1674, + "num_input_tokens_seen": 48162144, + "step": 71455 + }, + { + "epoch": 1.7457796887596806, + "grad_norm": 0.08520246297121048, + "learning_rate": 1.6450595837154042e-06, + "loss": 0.0626, + "num_input_tokens_seen": 48165792, + "step": 71460 + }, + { + "epoch": 1.7459018395915278, + "grad_norm": 29.665040969848633, + "learning_rate": 1.6449944180038805e-06, + "loss": 0.0679, + "num_input_tokens_seen": 48168864, + "step": 71465 + }, + { + "epoch": 1.7460239904233747, + "grad_norm": 0.4952899217605591, + "learning_rate": 1.6449292476017835e-06, + "loss": 0.0022, + "num_input_tokens_seen": 48172128, + "step": 71470 + }, + { + "epoch": 1.746146141255222, + "grad_norm": 0.07937739044427872, + "learning_rate": 1.6448640725095882e-06, + "loss": 0.1354, + "num_input_tokens_seen": 48175328, + "step": 71475 + }, + { + "epoch": 1.7462682920870691, + "grad_norm": 10.36311149597168, + "learning_rate": 1.6447988927277674e-06, + "loss": 0.0472, + "num_input_tokens_seen": 48178848, + "step": 71480 + }, + { + "epoch": 1.7463904429189163, + "grad_norm": 1.0270441770553589, + "learning_rate": 1.6447337082567958e-06, + "loss": 0.1125, + "num_input_tokens_seen": 48182432, + "step": 71485 + }, + { + "epoch": 1.7465125937507633, + "grad_norm": 0.06908803433179855, + "learning_rate": 1.6446685190971472e-06, + "loss": 0.042, + "num_input_tokens_seen": 48186208, + "step": 71490 + }, + { + "epoch": 1.7466347445826105, + "grad_norm": 164.5870361328125, + "learning_rate": 1.6446033252492958e-06, + "loss": 0.1288, + "num_input_tokens_seen": 48189792, + "step": 71495 + }, + { + "epoch": 1.7467568954144577, + "grad_norm": 1.4709440469741821, + "learning_rate": 1.6445381267137158e-06, + "loss": 0.0895, + "num_input_tokens_seen": 48193312, + "step": 71500 + }, + { + "epoch": 1.7468790462463049, + "grad_norm": 24.193714141845703, + "learning_rate": 1.644472923490881e-06, + "loss": 0.1459, + "num_input_tokens_seen": 48196384, + "step": 71505 + }, + { + "epoch": 1.747001197078152, + "grad_norm": 0.11595480889081955, + "learning_rate": 1.6444077155812656e-06, + "loss": 0.0364, + "num_input_tokens_seen": 48199840, + "step": 71510 + }, + { + "epoch": 1.7471233479099992, + "grad_norm": 2.7352027893066406, + "learning_rate": 1.6443425029853442e-06, + "loss": 0.003, + "num_input_tokens_seen": 48203616, + "step": 71515 + }, + { + "epoch": 1.7472454987418464, + "grad_norm": 0.22880008816719055, + "learning_rate": 1.6442772857035906e-06, + "loss": 0.0822, + "num_input_tokens_seen": 48206816, + "step": 71520 + }, + { + "epoch": 1.7473676495736936, + "grad_norm": 9.08481502532959, + "learning_rate": 1.6442120637364796e-06, + "loss": 0.0369, + "num_input_tokens_seen": 48210464, + "step": 71525 + }, + { + "epoch": 1.7474898004055408, + "grad_norm": 23.009002685546875, + "learning_rate": 1.6441468370844848e-06, + "loss": 0.0411, + "num_input_tokens_seen": 48214112, + "step": 71530 + }, + { + "epoch": 1.747611951237388, + "grad_norm": 0.7533755898475647, + "learning_rate": 1.6440816057480812e-06, + "loss": 0.2124, + "num_input_tokens_seen": 48217824, + "step": 71535 + }, + { + "epoch": 1.7477341020692352, + "grad_norm": 0.3032033443450928, + "learning_rate": 1.6440163697277432e-06, + "loss": 0.0437, + "num_input_tokens_seen": 48221216, + "step": 71540 + }, + { + "epoch": 1.7478562529010824, + "grad_norm": 12.668747901916504, + "learning_rate": 1.6439511290239447e-06, + "loss": 0.1492, + "num_input_tokens_seen": 48224992, + "step": 71545 + }, + { + "epoch": 1.7479784037329296, + "grad_norm": 23.0964412689209, + "learning_rate": 1.6438858836371604e-06, + "loss": 0.0295, + "num_input_tokens_seen": 48228384, + "step": 71550 + }, + { + "epoch": 1.7481005545647765, + "grad_norm": 0.36234384775161743, + "learning_rate": 1.6438206335678647e-06, + "loss": 0.0376, + "num_input_tokens_seen": 48231584, + "step": 71555 + }, + { + "epoch": 1.7482227053966237, + "grad_norm": 0.10900892317295074, + "learning_rate": 1.6437553788165319e-06, + "loss": 0.0459, + "num_input_tokens_seen": 48234720, + "step": 71560 + }, + { + "epoch": 1.748344856228471, + "grad_norm": 0.11470355093479156, + "learning_rate": 1.6436901193836372e-06, + "loss": 0.0754, + "num_input_tokens_seen": 48238368, + "step": 71565 + }, + { + "epoch": 1.748467007060318, + "grad_norm": 0.06794214248657227, + "learning_rate": 1.6436248552696547e-06, + "loss": 0.06, + "num_input_tokens_seen": 48241760, + "step": 71570 + }, + { + "epoch": 1.7485891578921653, + "grad_norm": 12.417028427124023, + "learning_rate": 1.6435595864750592e-06, + "loss": 0.2301, + "num_input_tokens_seen": 48245344, + "step": 71575 + }, + { + "epoch": 1.7487113087240123, + "grad_norm": 20.654052734375, + "learning_rate": 1.6434943130003253e-06, + "loss": 0.0589, + "num_input_tokens_seen": 48248992, + "step": 71580 + }, + { + "epoch": 1.7488334595558594, + "grad_norm": 0.12384731322526932, + "learning_rate": 1.6434290348459279e-06, + "loss": 0.0801, + "num_input_tokens_seen": 48252128, + "step": 71585 + }, + { + "epoch": 1.7489556103877066, + "grad_norm": 35.29301452636719, + "learning_rate": 1.643363752012341e-06, + "loss": 0.0718, + "num_input_tokens_seen": 48255456, + "step": 71590 + }, + { + "epoch": 1.7490777612195538, + "grad_norm": 13.87523078918457, + "learning_rate": 1.6432984645000403e-06, + "loss": 0.2395, + "num_input_tokens_seen": 48258464, + "step": 71595 + }, + { + "epoch": 1.749199912051401, + "grad_norm": 0.09350656718015671, + "learning_rate": 1.6432331723095e-06, + "loss": 0.0602, + "num_input_tokens_seen": 48261728, + "step": 71600 + }, + { + "epoch": 1.7493220628832482, + "grad_norm": 0.042109668254852295, + "learning_rate": 1.6431678754411951e-06, + "loss": 0.1416, + "num_input_tokens_seen": 48265248, + "step": 71605 + }, + { + "epoch": 1.7494442137150954, + "grad_norm": 11.492691040039062, + "learning_rate": 1.6431025738956002e-06, + "loss": 0.1015, + "num_input_tokens_seen": 48268768, + "step": 71610 + }, + { + "epoch": 1.7495663645469426, + "grad_norm": 19.275936126708984, + "learning_rate": 1.6430372676731904e-06, + "loss": 0.15, + "num_input_tokens_seen": 48272096, + "step": 71615 + }, + { + "epoch": 1.7496885153787898, + "grad_norm": 15.951652526855469, + "learning_rate": 1.6429719567744406e-06, + "loss": 0.0779, + "num_input_tokens_seen": 48275552, + "step": 71620 + }, + { + "epoch": 1.749810666210637, + "grad_norm": 0.5586774349212646, + "learning_rate": 1.6429066411998261e-06, + "loss": 0.1512, + "num_input_tokens_seen": 48278944, + "step": 71625 + }, + { + "epoch": 1.7499328170424842, + "grad_norm": 0.03820788115262985, + "learning_rate": 1.6428413209498216e-06, + "loss": 0.1058, + "num_input_tokens_seen": 48282080, + "step": 71630 + }, + { + "epoch": 1.7500549678743313, + "grad_norm": 168.43922424316406, + "learning_rate": 1.6427759960249018e-06, + "loss": 0.1097, + "num_input_tokens_seen": 48285280, + "step": 71635 + }, + { + "epoch": 1.7501282583734397, + "eval_loss": 0.12312835454940796, + "eval_runtime": 47.5799, + "eval_samples_per_second": 764.714, + "eval_steps_per_second": 95.608, + "num_input_tokens_seen": 48287456, + "step": 71638 + }, + { + "epoch": 1.7501771187061785, + "grad_norm": 88.0875015258789, + "learning_rate": 1.6427106664255423e-06, + "loss": 0.0614, + "num_input_tokens_seen": 48288800, + "step": 71640 + }, + { + "epoch": 1.7502992695380255, + "grad_norm": 0.25800928473472595, + "learning_rate": 1.642645332152218e-06, + "loss": 0.0532, + "num_input_tokens_seen": 48291744, + "step": 71645 + }, + { + "epoch": 1.7504214203698727, + "grad_norm": 1.028090000152588, + "learning_rate": 1.6425799932054037e-06, + "loss": 0.0864, + "num_input_tokens_seen": 48295264, + "step": 71650 + }, + { + "epoch": 1.7505435712017199, + "grad_norm": 0.6299525499343872, + "learning_rate": 1.642514649585575e-06, + "loss": 0.1333, + "num_input_tokens_seen": 48298400, + "step": 71655 + }, + { + "epoch": 1.750665722033567, + "grad_norm": 94.1126708984375, + "learning_rate": 1.6424493012932072e-06, + "loss": 0.1662, + "num_input_tokens_seen": 48301600, + "step": 71660 + }, + { + "epoch": 1.7507878728654143, + "grad_norm": 0.396215558052063, + "learning_rate": 1.6423839483287751e-06, + "loss": 0.0255, + "num_input_tokens_seen": 48305312, + "step": 71665 + }, + { + "epoch": 1.7509100236972612, + "grad_norm": 0.31599822640419006, + "learning_rate": 1.6423185906927542e-06, + "loss": 0.0415, + "num_input_tokens_seen": 48308448, + "step": 71670 + }, + { + "epoch": 1.7510321745291084, + "grad_norm": 0.15240201354026794, + "learning_rate": 1.6422532283856195e-06, + "loss": 0.0536, + "num_input_tokens_seen": 48311968, + "step": 71675 + }, + { + "epoch": 1.7511543253609556, + "grad_norm": 201.6836395263672, + "learning_rate": 1.6421878614078466e-06, + "loss": 0.0186, + "num_input_tokens_seen": 48315424, + "step": 71680 + }, + { + "epoch": 1.7512764761928028, + "grad_norm": 0.1291874200105667, + "learning_rate": 1.642122489759911e-06, + "loss": 0.0714, + "num_input_tokens_seen": 48319008, + "step": 71685 + }, + { + "epoch": 1.75139862702465, + "grad_norm": 0.5408225059509277, + "learning_rate": 1.642057113442288e-06, + "loss": 0.1259, + "num_input_tokens_seen": 48323168, + "step": 71690 + }, + { + "epoch": 1.7515207778564972, + "grad_norm": 0.1968991905450821, + "learning_rate": 1.641991732455453e-06, + "loss": 0.0668, + "num_input_tokens_seen": 48326752, + "step": 71695 + }, + { + "epoch": 1.7516429286883444, + "grad_norm": 107.3658218383789, + "learning_rate": 1.6419263467998813e-06, + "loss": 0.0899, + "num_input_tokens_seen": 48330144, + "step": 71700 + }, + { + "epoch": 1.7517650795201916, + "grad_norm": 22.380523681640625, + "learning_rate": 1.6418609564760485e-06, + "loss": 0.1817, + "num_input_tokens_seen": 48333280, + "step": 71705 + }, + { + "epoch": 1.7518872303520387, + "grad_norm": 0.2987278699874878, + "learning_rate": 1.6417955614844304e-06, + "loss": 0.1945, + "num_input_tokens_seen": 48336544, + "step": 71710 + }, + { + "epoch": 1.752009381183886, + "grad_norm": 0.23216158151626587, + "learning_rate": 1.6417301618255021e-06, + "loss": 0.0362, + "num_input_tokens_seen": 48339744, + "step": 71715 + }, + { + "epoch": 1.7521315320157331, + "grad_norm": 0.3558136522769928, + "learning_rate": 1.6416647574997397e-06, + "loss": 0.0275, + "num_input_tokens_seen": 48343200, + "step": 71720 + }, + { + "epoch": 1.7522536828475803, + "grad_norm": 55.75593948364258, + "learning_rate": 1.6415993485076184e-06, + "loss": 0.0569, + "num_input_tokens_seen": 48346848, + "step": 71725 + }, + { + "epoch": 1.7523758336794275, + "grad_norm": 0.14112365245819092, + "learning_rate": 1.6415339348496144e-06, + "loss": 0.002, + "num_input_tokens_seen": 48350560, + "step": 71730 + }, + { + "epoch": 1.7524979845112745, + "grad_norm": 7.328372955322266, + "learning_rate": 1.6414685165262027e-06, + "loss": 0.1212, + "num_input_tokens_seen": 48354016, + "step": 71735 + }, + { + "epoch": 1.7526201353431217, + "grad_norm": 11.065155029296875, + "learning_rate": 1.6414030935378597e-06, + "loss": 0.1804, + "num_input_tokens_seen": 48357728, + "step": 71740 + }, + { + "epoch": 1.7527422861749689, + "grad_norm": 0.29226332902908325, + "learning_rate": 1.6413376658850607e-06, + "loss": 0.0572, + "num_input_tokens_seen": 48361248, + "step": 71745 + }, + { + "epoch": 1.752864437006816, + "grad_norm": 0.9687400460243225, + "learning_rate": 1.6412722335682818e-06, + "loss": 0.0051, + "num_input_tokens_seen": 48364704, + "step": 71750 + }, + { + "epoch": 1.7529865878386632, + "grad_norm": 144.53562927246094, + "learning_rate": 1.6412067965879986e-06, + "loss": 0.096, + "num_input_tokens_seen": 48367840, + "step": 71755 + }, + { + "epoch": 1.7531087386705102, + "grad_norm": 0.12270642817020416, + "learning_rate": 1.6411413549446873e-06, + "loss": 0.0214, + "num_input_tokens_seen": 48371552, + "step": 71760 + }, + { + "epoch": 1.7532308895023574, + "grad_norm": 0.2248445302248001, + "learning_rate": 1.6410759086388235e-06, + "loss": 0.0951, + "num_input_tokens_seen": 48374624, + "step": 71765 + }, + { + "epoch": 1.7533530403342046, + "grad_norm": 22.86701774597168, + "learning_rate": 1.6410104576708835e-06, + "loss": 0.0808, + "num_input_tokens_seen": 48378208, + "step": 71770 + }, + { + "epoch": 1.7534751911660518, + "grad_norm": 19.745939254760742, + "learning_rate": 1.6409450020413424e-06, + "loss": 0.1468, + "num_input_tokens_seen": 48381536, + "step": 71775 + }, + { + "epoch": 1.753597341997899, + "grad_norm": 20.355432510375977, + "learning_rate": 1.6408795417506773e-06, + "loss": 0.1083, + "num_input_tokens_seen": 48385440, + "step": 71780 + }, + { + "epoch": 1.7537194928297462, + "grad_norm": 0.256071001291275, + "learning_rate": 1.6408140767993639e-06, + "loss": 0.0812, + "num_input_tokens_seen": 48388640, + "step": 71785 + }, + { + "epoch": 1.7538416436615933, + "grad_norm": 0.07664134353399277, + "learning_rate": 1.640748607187878e-06, + "loss": 0.155, + "num_input_tokens_seen": 48392096, + "step": 71790 + }, + { + "epoch": 1.7539637944934405, + "grad_norm": 7.759392261505127, + "learning_rate": 1.640683132916696e-06, + "loss": 0.0755, + "num_input_tokens_seen": 48395552, + "step": 71795 + }, + { + "epoch": 1.7540859453252877, + "grad_norm": 1.141500473022461, + "learning_rate": 1.6406176539862936e-06, + "loss": 0.1561, + "num_input_tokens_seen": 48398816, + "step": 71800 + }, + { + "epoch": 1.754208096157135, + "grad_norm": 18.568471908569336, + "learning_rate": 1.6405521703971476e-06, + "loss": 0.1175, + "num_input_tokens_seen": 48402400, + "step": 71805 + }, + { + "epoch": 1.754330246988982, + "grad_norm": 90.30744171142578, + "learning_rate": 1.640486682149734e-06, + "loss": 0.0259, + "num_input_tokens_seen": 48405728, + "step": 71810 + }, + { + "epoch": 1.7544523978208293, + "grad_norm": 11.252006530761719, + "learning_rate": 1.6404211892445288e-06, + "loss": 0.1611, + "num_input_tokens_seen": 48408800, + "step": 71815 + }, + { + "epoch": 1.7545745486526765, + "grad_norm": 0.3409070670604706, + "learning_rate": 1.6403556916820088e-06, + "loss": 0.0316, + "num_input_tokens_seen": 48412000, + "step": 71820 + }, + { + "epoch": 1.7546966994845234, + "grad_norm": 0.1269320547580719, + "learning_rate": 1.6402901894626497e-06, + "loss": 0.0616, + "num_input_tokens_seen": 48415584, + "step": 71825 + }, + { + "epoch": 1.7548188503163706, + "grad_norm": 0.20163556933403015, + "learning_rate": 1.6402246825869281e-06, + "loss": 0.0016, + "num_input_tokens_seen": 48418976, + "step": 71830 + }, + { + "epoch": 1.7549410011482178, + "grad_norm": 0.2925082743167877, + "learning_rate": 1.6401591710553201e-06, + "loss": 0.0401, + "num_input_tokens_seen": 48422240, + "step": 71835 + }, + { + "epoch": 1.755063151980065, + "grad_norm": 0.11735465377569199, + "learning_rate": 1.6400936548683028e-06, + "loss": 0.0016, + "num_input_tokens_seen": 48425632, + "step": 71840 + }, + { + "epoch": 1.755185302811912, + "grad_norm": 91.04149627685547, + "learning_rate": 1.6400281340263524e-06, + "loss": 0.0936, + "num_input_tokens_seen": 48428448, + "step": 71845 + }, + { + "epoch": 1.7553074536437592, + "grad_norm": 0.017427001148462296, + "learning_rate": 1.6399626085299452e-06, + "loss": 0.0006, + "num_input_tokens_seen": 48432416, + "step": 71850 + }, + { + "epoch": 1.7554296044756064, + "grad_norm": 35.123146057128906, + "learning_rate": 1.6398970783795577e-06, + "loss": 0.1226, + "num_input_tokens_seen": 48436128, + "step": 71855 + }, + { + "epoch": 1.7555517553074536, + "grad_norm": 13.244025230407715, + "learning_rate": 1.6398315435756666e-06, + "loss": 0.1049, + "num_input_tokens_seen": 48439520, + "step": 71860 + }, + { + "epoch": 1.7556739061393007, + "grad_norm": 61.519649505615234, + "learning_rate": 1.6397660041187482e-06, + "loss": 0.0826, + "num_input_tokens_seen": 48442272, + "step": 71865 + }, + { + "epoch": 1.755796056971148, + "grad_norm": 0.528657078742981, + "learning_rate": 1.6397004600092794e-06, + "loss": 0.0228, + "num_input_tokens_seen": 48445472, + "step": 71870 + }, + { + "epoch": 1.7559182078029951, + "grad_norm": 0.6462512612342834, + "learning_rate": 1.639634911247737e-06, + "loss": 0.0483, + "num_input_tokens_seen": 48448928, + "step": 71875 + }, + { + "epoch": 1.7560403586348423, + "grad_norm": 0.5328592658042908, + "learning_rate": 1.6395693578345973e-06, + "loss": 0.1246, + "num_input_tokens_seen": 48452576, + "step": 71880 + }, + { + "epoch": 1.7561625094666895, + "grad_norm": 1.7375816106796265, + "learning_rate": 1.6395037997703373e-06, + "loss": 0.0114, + "num_input_tokens_seen": 48455840, + "step": 71885 + }, + { + "epoch": 1.7562846602985367, + "grad_norm": 0.04445521533489227, + "learning_rate": 1.6394382370554337e-06, + "loss": 0.0011, + "num_input_tokens_seen": 48458848, + "step": 71890 + }, + { + "epoch": 1.7564068111303839, + "grad_norm": 0.13666929304599762, + "learning_rate": 1.6393726696903634e-06, + "loss": 0.002, + "num_input_tokens_seen": 48462304, + "step": 71895 + }, + { + "epoch": 1.756528961962231, + "grad_norm": 52.5565185546875, + "learning_rate": 1.6393070976756027e-06, + "loss": 0.1411, + "num_input_tokens_seen": 48465888, + "step": 71900 + }, + { + "epoch": 1.7566511127940783, + "grad_norm": 1.7487032413482666, + "learning_rate": 1.639241521011629e-06, + "loss": 0.0123, + "num_input_tokens_seen": 48469152, + "step": 71905 + }, + { + "epoch": 1.7567732636259255, + "grad_norm": 5.133216857910156, + "learning_rate": 1.6391759396989188e-06, + "loss": 0.0587, + "num_input_tokens_seen": 48472480, + "step": 71910 + }, + { + "epoch": 1.7568954144577724, + "grad_norm": 0.04294794425368309, + "learning_rate": 1.6391103537379496e-06, + "loss": 0.0357, + "num_input_tokens_seen": 48476128, + "step": 71915 + }, + { + "epoch": 1.7570175652896196, + "grad_norm": 45.32661056518555, + "learning_rate": 1.639044763129198e-06, + "loss": 0.3049, + "num_input_tokens_seen": 48479392, + "step": 71920 + }, + { + "epoch": 1.7571397161214668, + "grad_norm": 0.3233184516429901, + "learning_rate": 1.638979167873141e-06, + "loss": 0.0465, + "num_input_tokens_seen": 48482976, + "step": 71925 + }, + { + "epoch": 1.757261866953314, + "grad_norm": 0.39100873470306396, + "learning_rate": 1.6389135679702554e-06, + "loss": 0.0277, + "num_input_tokens_seen": 48486048, + "step": 71930 + }, + { + "epoch": 1.757384017785161, + "grad_norm": 12.223175048828125, + "learning_rate": 1.6388479634210187e-06, + "loss": 0.0618, + "num_input_tokens_seen": 48489120, + "step": 71935 + }, + { + "epoch": 1.7575061686170081, + "grad_norm": 0.32122930884361267, + "learning_rate": 1.6387823542259075e-06, + "loss": 0.0703, + "num_input_tokens_seen": 48492448, + "step": 71940 + }, + { + "epoch": 1.7576283194488553, + "grad_norm": 0.18866363167762756, + "learning_rate": 1.6387167403853994e-06, + "loss": 0.0019, + "num_input_tokens_seen": 48496096, + "step": 71945 + }, + { + "epoch": 1.7577504702807025, + "grad_norm": 0.5957804918289185, + "learning_rate": 1.6386511218999714e-06, + "loss": 0.1197, + "num_input_tokens_seen": 48499680, + "step": 71950 + }, + { + "epoch": 1.7578726211125497, + "grad_norm": 11.999557495117188, + "learning_rate": 1.6385854987701007e-06, + "loss": 0.2407, + "num_input_tokens_seen": 48503584, + "step": 71955 + }, + { + "epoch": 1.757994771944397, + "grad_norm": 0.06358401477336884, + "learning_rate": 1.6385198709962642e-06, + "loss": 0.0942, + "num_input_tokens_seen": 48506912, + "step": 71960 + }, + { + "epoch": 1.758116922776244, + "grad_norm": 45.37395095825195, + "learning_rate": 1.6384542385789397e-06, + "loss": 0.2849, + "num_input_tokens_seen": 48509856, + "step": 71965 + }, + { + "epoch": 1.7582390736080913, + "grad_norm": 0.21417827904224396, + "learning_rate": 1.638388601518604e-06, + "loss": 0.0458, + "num_input_tokens_seen": 48513120, + "step": 71970 + }, + { + "epoch": 1.7583612244399385, + "grad_norm": 0.17939996719360352, + "learning_rate": 1.6383229598157353e-06, + "loss": 0.0368, + "num_input_tokens_seen": 48516320, + "step": 71975 + }, + { + "epoch": 1.7584833752717857, + "grad_norm": 0.11681331694126129, + "learning_rate": 1.63825731347081e-06, + "loss": 0.0925, + "num_input_tokens_seen": 48520032, + "step": 71980 + }, + { + "epoch": 1.7586055261036329, + "grad_norm": 17.2757511138916, + "learning_rate": 1.6381916624843058e-06, + "loss": 0.1507, + "num_input_tokens_seen": 48523168, + "step": 71985 + }, + { + "epoch": 1.75872767693548, + "grad_norm": 0.13226760923862457, + "learning_rate": 1.6381260068567e-06, + "loss": 0.0392, + "num_input_tokens_seen": 48526560, + "step": 71990 + }, + { + "epoch": 1.7588498277673272, + "grad_norm": 0.06085921451449394, + "learning_rate": 1.6380603465884706e-06, + "loss": 0.0382, + "num_input_tokens_seen": 48529632, + "step": 71995 + }, + { + "epoch": 1.7589719785991744, + "grad_norm": 0.6363232135772705, + "learning_rate": 1.6379946816800945e-06, + "loss": 0.1274, + "num_input_tokens_seen": 48533216, + "step": 72000 + }, + { + "epoch": 1.7590941294310214, + "grad_norm": 15.696700096130371, + "learning_rate": 1.6379290121320495e-06, + "loss": 0.1549, + "num_input_tokens_seen": 48536224, + "step": 72005 + }, + { + "epoch": 1.7592162802628686, + "grad_norm": 0.08707629889249802, + "learning_rate": 1.6378633379448133e-06, + "loss": 0.1014, + "num_input_tokens_seen": 48539552, + "step": 72010 + }, + { + "epoch": 1.7593384310947158, + "grad_norm": 0.519372284412384, + "learning_rate": 1.637797659118863e-06, + "loss": 0.1593, + "num_input_tokens_seen": 48542880, + "step": 72015 + }, + { + "epoch": 1.759460581926563, + "grad_norm": 5.493022918701172, + "learning_rate": 1.6377319756546771e-06, + "loss": 0.0546, + "num_input_tokens_seen": 48546016, + "step": 72020 + }, + { + "epoch": 1.75958273275841, + "grad_norm": 0.174288809299469, + "learning_rate": 1.637666287552732e-06, + "loss": 0.0447, + "num_input_tokens_seen": 48549408, + "step": 72025 + }, + { + "epoch": 1.7597048835902571, + "grad_norm": 0.8021009564399719, + "learning_rate": 1.6376005948135068e-06, + "loss": 0.0023, + "num_input_tokens_seen": 48552672, + "step": 72030 + }, + { + "epoch": 1.7598270344221043, + "grad_norm": 0.18382374942302704, + "learning_rate": 1.6375348974374784e-06, + "loss": 0.04, + "num_input_tokens_seen": 48556384, + "step": 72035 + }, + { + "epoch": 1.7599491852539515, + "grad_norm": 36.59886932373047, + "learning_rate": 1.6374691954251247e-06, + "loss": 0.1235, + "num_input_tokens_seen": 48559392, + "step": 72040 + }, + { + "epoch": 1.7600713360857987, + "grad_norm": 0.23298950493335724, + "learning_rate": 1.6374034887769238e-06, + "loss": 0.0937, + "num_input_tokens_seen": 48562592, + "step": 72045 + }, + { + "epoch": 1.7601934869176459, + "grad_norm": 0.3184853196144104, + "learning_rate": 1.6373377774933528e-06, + "loss": 0.0362, + "num_input_tokens_seen": 48565536, + "step": 72050 + }, + { + "epoch": 1.760315637749493, + "grad_norm": 0.3271631896495819, + "learning_rate": 1.6372720615748903e-06, + "loss": 0.089, + "num_input_tokens_seen": 48569120, + "step": 72055 + }, + { + "epoch": 1.7604377885813403, + "grad_norm": 0.6756600141525269, + "learning_rate": 1.637206341022014e-06, + "loss": 0.0921, + "num_input_tokens_seen": 48572384, + "step": 72060 + }, + { + "epoch": 1.7605599394131874, + "grad_norm": 0.039529088884592056, + "learning_rate": 1.6371406158352016e-06, + "loss": 0.0427, + "num_input_tokens_seen": 48576416, + "step": 72065 + }, + { + "epoch": 1.7606820902450346, + "grad_norm": 40.706382751464844, + "learning_rate": 1.6370748860149316e-06, + "loss": 0.1243, + "num_input_tokens_seen": 48579232, + "step": 72070 + }, + { + "epoch": 1.7608042410768818, + "grad_norm": 0.6725810170173645, + "learning_rate": 1.6370091515616817e-06, + "loss": 0.0553, + "num_input_tokens_seen": 48582432, + "step": 72075 + }, + { + "epoch": 1.760926391908729, + "grad_norm": 13.003739356994629, + "learning_rate": 1.63694341247593e-06, + "loss": 0.1632, + "num_input_tokens_seen": 48586016, + "step": 72080 + }, + { + "epoch": 1.7610485427405762, + "grad_norm": 12.829425811767578, + "learning_rate": 1.6368776687581538e-06, + "loss": 0.1723, + "num_input_tokens_seen": 48589344, + "step": 72085 + }, + { + "epoch": 1.7611706935724232, + "grad_norm": 1.4721359014511108, + "learning_rate": 1.6368119204088323e-06, + "loss": 0.1579, + "num_input_tokens_seen": 48592672, + "step": 72090 + }, + { + "epoch": 1.7612928444042704, + "grad_norm": 0.20425674319267273, + "learning_rate": 1.6367461674284432e-06, + "loss": 0.0904, + "num_input_tokens_seen": 48595552, + "step": 72095 + }, + { + "epoch": 1.7614149952361176, + "grad_norm": 8.486249923706055, + "learning_rate": 1.6366804098174648e-06, + "loss": 0.1016, + "num_input_tokens_seen": 48598688, + "step": 72100 + }, + { + "epoch": 1.7615371460679647, + "grad_norm": 0.8847968578338623, + "learning_rate": 1.6366146475763754e-06, + "loss": 0.0079, + "num_input_tokens_seen": 48601824, + "step": 72105 + }, + { + "epoch": 1.761659296899812, + "grad_norm": 10.223726272583008, + "learning_rate": 1.6365488807056528e-06, + "loss": 0.0842, + "num_input_tokens_seen": 48604896, + "step": 72110 + }, + { + "epoch": 1.761781447731659, + "grad_norm": 11.736681938171387, + "learning_rate": 1.6364831092057752e-06, + "loss": 0.0733, + "num_input_tokens_seen": 48609248, + "step": 72115 + }, + { + "epoch": 1.761903598563506, + "grad_norm": 0.1761174350976944, + "learning_rate": 1.6364173330772217e-06, + "loss": 0.0018, + "num_input_tokens_seen": 48612256, + "step": 72120 + }, + { + "epoch": 1.7620257493953533, + "grad_norm": 0.1778940111398697, + "learning_rate": 1.63635155232047e-06, + "loss": 0.0627, + "num_input_tokens_seen": 48615584, + "step": 72125 + }, + { + "epoch": 1.7621479002272005, + "grad_norm": 0.12312234938144684, + "learning_rate": 1.636285766935999e-06, + "loss": 0.0468, + "num_input_tokens_seen": 48618976, + "step": 72130 + }, + { + "epoch": 1.7622700510590477, + "grad_norm": 267.4752197265625, + "learning_rate": 1.6362199769242863e-06, + "loss": 0.0157, + "num_input_tokens_seen": 48622816, + "step": 72135 + }, + { + "epoch": 1.7623922018908948, + "grad_norm": 0.9525810480117798, + "learning_rate": 1.636154182285811e-06, + "loss": 0.0765, + "num_input_tokens_seen": 48625760, + "step": 72140 + }, + { + "epoch": 1.762514352722742, + "grad_norm": 14.541299819946289, + "learning_rate": 1.6360883830210515e-06, + "loss": 0.1066, + "num_input_tokens_seen": 48628832, + "step": 72145 + }, + { + "epoch": 1.7626365035545892, + "grad_norm": 11.16724967956543, + "learning_rate": 1.636022579130486e-06, + "loss": 0.1817, + "num_input_tokens_seen": 48631968, + "step": 72150 + }, + { + "epoch": 1.7627586543864364, + "grad_norm": 19.138643264770508, + "learning_rate": 1.6359567706145931e-06, + "loss": 0.1552, + "num_input_tokens_seen": 48635424, + "step": 72155 + }, + { + "epoch": 1.7628808052182836, + "grad_norm": 0.2547426223754883, + "learning_rate": 1.635890957473852e-06, + "loss": 0.0411, + "num_input_tokens_seen": 48638560, + "step": 72160 + }, + { + "epoch": 1.7630029560501308, + "grad_norm": 0.5138275027275085, + "learning_rate": 1.6358251397087405e-06, + "loss": 0.0705, + "num_input_tokens_seen": 48642208, + "step": 72165 + }, + { + "epoch": 1.763125106881978, + "grad_norm": 11.29153060913086, + "learning_rate": 1.6357593173197378e-06, + "loss": 0.0825, + "num_input_tokens_seen": 48645664, + "step": 72170 + }, + { + "epoch": 1.7632472577138252, + "grad_norm": 11.48965835571289, + "learning_rate": 1.6356934903073221e-06, + "loss": 0.1601, + "num_input_tokens_seen": 48649056, + "step": 72175 + }, + { + "epoch": 1.7633694085456721, + "grad_norm": 22.086517333984375, + "learning_rate": 1.6356276586719722e-06, + "loss": 0.1951, + "num_input_tokens_seen": 48652320, + "step": 72180 + }, + { + "epoch": 1.7634915593775193, + "grad_norm": 0.4593576490879059, + "learning_rate": 1.6355618224141672e-06, + "loss": 0.0517, + "num_input_tokens_seen": 48655712, + "step": 72185 + }, + { + "epoch": 1.7636137102093665, + "grad_norm": 0.20573964715003967, + "learning_rate": 1.6354959815343859e-06, + "loss": 0.1116, + "num_input_tokens_seen": 48659808, + "step": 72190 + }, + { + "epoch": 1.7637358610412137, + "grad_norm": 16.481679916381836, + "learning_rate": 1.6354301360331064e-06, + "loss": 0.0629, + "num_input_tokens_seen": 48663456, + "step": 72195 + }, + { + "epoch": 1.763858011873061, + "grad_norm": 0.2852122485637665, + "learning_rate": 1.6353642859108084e-06, + "loss": 0.058, + "num_input_tokens_seen": 48666272, + "step": 72200 + }, + { + "epoch": 1.7639801627049079, + "grad_norm": 22.044973373413086, + "learning_rate": 1.6352984311679704e-06, + "loss": 0.161, + "num_input_tokens_seen": 48669536, + "step": 72205 + }, + { + "epoch": 1.764102313536755, + "grad_norm": 0.2694436311721802, + "learning_rate": 1.6352325718050713e-06, + "loss": 0.0984, + "num_input_tokens_seen": 48672992, + "step": 72210 + }, + { + "epoch": 1.7642244643686023, + "grad_norm": 1.510804533958435, + "learning_rate": 1.6351667078225902e-06, + "loss": 0.0499, + "num_input_tokens_seen": 48676448, + "step": 72215 + }, + { + "epoch": 1.7643466152004494, + "grad_norm": 0.21066798269748688, + "learning_rate": 1.6351008392210055e-06, + "loss": 0.0657, + "num_input_tokens_seen": 48679776, + "step": 72220 + }, + { + "epoch": 1.7644687660322966, + "grad_norm": 0.34505897760391235, + "learning_rate": 1.635034966000797e-06, + "loss": 0.0016, + "num_input_tokens_seen": 48683360, + "step": 72225 + }, + { + "epoch": 1.7645909168641438, + "grad_norm": 0.53311687707901, + "learning_rate": 1.6349690881624437e-06, + "loss": 0.0697, + "num_input_tokens_seen": 48686496, + "step": 72230 + }, + { + "epoch": 1.764713067695991, + "grad_norm": 0.930202305316925, + "learning_rate": 1.634903205706424e-06, + "loss": 0.0867, + "num_input_tokens_seen": 48689952, + "step": 72235 + }, + { + "epoch": 1.7648352185278382, + "grad_norm": 0.12176904827356339, + "learning_rate": 1.6348373186332175e-06, + "loss": 0.053, + "num_input_tokens_seen": 48693472, + "step": 72240 + }, + { + "epoch": 1.7649573693596854, + "grad_norm": 0.03570260852575302, + "learning_rate": 1.6347714269433032e-06, + "loss": 0.0437, + "num_input_tokens_seen": 48697120, + "step": 72245 + }, + { + "epoch": 1.7650795201915326, + "grad_norm": 19.815536499023438, + "learning_rate": 1.6347055306371606e-06, + "loss": 0.0846, + "num_input_tokens_seen": 48700256, + "step": 72250 + }, + { + "epoch": 1.7652016710233798, + "grad_norm": 73.4245834350586, + "learning_rate": 1.6346396297152688e-06, + "loss": 0.1, + "num_input_tokens_seen": 48704160, + "step": 72255 + }, + { + "epoch": 1.765323821855227, + "grad_norm": 0.6247529983520508, + "learning_rate": 1.6345737241781064e-06, + "loss": 0.0857, + "num_input_tokens_seen": 48707936, + "step": 72260 + }, + { + "epoch": 1.7654459726870741, + "grad_norm": 0.030399378389120102, + "learning_rate": 1.6345078140261536e-06, + "loss": 0.0014, + "num_input_tokens_seen": 48711392, + "step": 72265 + }, + { + "epoch": 1.7655681235189211, + "grad_norm": 40.35014724731445, + "learning_rate": 1.634441899259889e-06, + "loss": 0.1794, + "num_input_tokens_seen": 48714848, + "step": 72270 + }, + { + "epoch": 1.7656902743507683, + "grad_norm": 9.394624710083008, + "learning_rate": 1.6343759798797926e-06, + "loss": 0.0416, + "num_input_tokens_seen": 48718048, + "step": 72275 + }, + { + "epoch": 1.7658124251826155, + "grad_norm": 0.10819534957408905, + "learning_rate": 1.6343100558863432e-06, + "loss": 0.0587, + "num_input_tokens_seen": 48721312, + "step": 72280 + }, + { + "epoch": 1.7659345760144627, + "grad_norm": 37.31603240966797, + "learning_rate": 1.6342441272800205e-06, + "loss": 0.164, + "num_input_tokens_seen": 48724448, + "step": 72285 + }, + { + "epoch": 1.7660567268463099, + "grad_norm": 69.5995101928711, + "learning_rate": 1.634178194061304e-06, + "loss": 0.0512, + "num_input_tokens_seen": 48727712, + "step": 72290 + }, + { + "epoch": 1.7661788776781568, + "grad_norm": 12.813841819763184, + "learning_rate": 1.634112256230673e-06, + "loss": 0.0474, + "num_input_tokens_seen": 48730912, + "step": 72295 + }, + { + "epoch": 1.766301028510004, + "grad_norm": 26.569725036621094, + "learning_rate": 1.634046313788607e-06, + "loss": 0.0715, + "num_input_tokens_seen": 48734112, + "step": 72300 + }, + { + "epoch": 1.7664231793418512, + "grad_norm": 11.442526817321777, + "learning_rate": 1.633980366735586e-06, + "loss": 0.0877, + "num_input_tokens_seen": 48737760, + "step": 72305 + }, + { + "epoch": 1.7665453301736984, + "grad_norm": 0.3668510615825653, + "learning_rate": 1.6339144150720889e-06, + "loss": 0.1014, + "num_input_tokens_seen": 48740896, + "step": 72310 + }, + { + "epoch": 1.7666674810055456, + "grad_norm": 0.25583159923553467, + "learning_rate": 1.633848458798596e-06, + "loss": 0.0031, + "num_input_tokens_seen": 48744480, + "step": 72315 + }, + { + "epoch": 1.7667896318373928, + "grad_norm": 9.13083553314209, + "learning_rate": 1.6337824979155866e-06, + "loss": 0.1545, + "num_input_tokens_seen": 48747680, + "step": 72320 + }, + { + "epoch": 1.76691178266924, + "grad_norm": 15.840726852416992, + "learning_rate": 1.6337165324235402e-06, + "loss": 0.1265, + "num_input_tokens_seen": 48751200, + "step": 72325 + }, + { + "epoch": 1.7670339335010872, + "grad_norm": 0.15436357259750366, + "learning_rate": 1.6336505623229368e-06, + "loss": 0.1546, + "num_input_tokens_seen": 48754656, + "step": 72330 + }, + { + "epoch": 1.7671560843329344, + "grad_norm": 0.25369733572006226, + "learning_rate": 1.633584587614256e-06, + "loss": 0.0522, + "num_input_tokens_seen": 48758560, + "step": 72335 + }, + { + "epoch": 1.7672782351647816, + "grad_norm": 77.1099853515625, + "learning_rate": 1.6335186082979778e-06, + "loss": 0.1567, + "num_input_tokens_seen": 48761760, + "step": 72340 + }, + { + "epoch": 1.7674003859966287, + "grad_norm": 11.019214630126953, + "learning_rate": 1.6334526243745819e-06, + "loss": 0.0445, + "num_input_tokens_seen": 48765280, + "step": 72345 + }, + { + "epoch": 1.767522536828476, + "grad_norm": 0.6401473879814148, + "learning_rate": 1.633386635844548e-06, + "loss": 0.0527, + "num_input_tokens_seen": 48768544, + "step": 72350 + }, + { + "epoch": 1.7676446876603231, + "grad_norm": 29.522113800048828, + "learning_rate": 1.633320642708356e-06, + "loss": 0.0989, + "num_input_tokens_seen": 48772384, + "step": 72355 + }, + { + "epoch": 1.76776683849217, + "grad_norm": 57.17478561401367, + "learning_rate": 1.6332546449664865e-06, + "loss": 0.1298, + "num_input_tokens_seen": 48775520, + "step": 72360 + }, + { + "epoch": 1.7678889893240173, + "grad_norm": 0.05358585715293884, + "learning_rate": 1.6331886426194184e-06, + "loss": 0.0787, + "num_input_tokens_seen": 48779808, + "step": 72365 + }, + { + "epoch": 1.7680111401558645, + "grad_norm": 0.36916717886924744, + "learning_rate": 1.6331226356676324e-06, + "loss": 0.081, + "num_input_tokens_seen": 48783392, + "step": 72370 + }, + { + "epoch": 1.7681332909877117, + "grad_norm": 8.119627952575684, + "learning_rate": 1.633056624111608e-06, + "loss": 0.135, + "num_input_tokens_seen": 48786336, + "step": 72375 + }, + { + "epoch": 1.7682554418195586, + "grad_norm": 0.031201016157865524, + "learning_rate": 1.6329906079518262e-06, + "loss": 0.0253, + "num_input_tokens_seen": 48789600, + "step": 72380 + }, + { + "epoch": 1.7683775926514058, + "grad_norm": 36.73933029174805, + "learning_rate": 1.632924587188766e-06, + "loss": 0.0988, + "num_input_tokens_seen": 48792672, + "step": 72385 + }, + { + "epoch": 1.768499743483253, + "grad_norm": 0.22203010320663452, + "learning_rate": 1.6328585618229077e-06, + "loss": 0.0365, + "num_input_tokens_seen": 48796128, + "step": 72390 + }, + { + "epoch": 1.7686218943151002, + "grad_norm": 0.7779269814491272, + "learning_rate": 1.632792531854732e-06, + "loss": 0.045, + "num_input_tokens_seen": 48799520, + "step": 72395 + }, + { + "epoch": 1.7687440451469474, + "grad_norm": 0.07156257331371307, + "learning_rate": 1.632726497284719e-06, + "loss": 0.0014, + "num_input_tokens_seen": 48802464, + "step": 72400 + }, + { + "epoch": 1.7688661959787946, + "grad_norm": 0.24317099153995514, + "learning_rate": 1.6326604581133484e-06, + "loss": 0.1229, + "num_input_tokens_seen": 48805728, + "step": 72405 + }, + { + "epoch": 1.7689883468106418, + "grad_norm": 0.3279658555984497, + "learning_rate": 1.632594414341101e-06, + "loss": 0.0568, + "num_input_tokens_seen": 48808928, + "step": 72410 + }, + { + "epoch": 1.769110497642489, + "grad_norm": 202.01376342773438, + "learning_rate": 1.632528365968457e-06, + "loss": 0.0917, + "num_input_tokens_seen": 48812448, + "step": 72415 + }, + { + "epoch": 1.7692326484743361, + "grad_norm": 13.737601280212402, + "learning_rate": 1.6324623129958966e-06, + "loss": 0.25, + "num_input_tokens_seen": 48815968, + "step": 72420 + }, + { + "epoch": 1.7693547993061833, + "grad_norm": 10.069300651550293, + "learning_rate": 1.6323962554238997e-06, + "loss": 0.1087, + "num_input_tokens_seen": 48819168, + "step": 72425 + }, + { + "epoch": 1.7694769501380305, + "grad_norm": 2.0507044792175293, + "learning_rate": 1.6323301932529475e-06, + "loss": 0.0829, + "num_input_tokens_seen": 48823264, + "step": 72430 + }, + { + "epoch": 1.7695991009698777, + "grad_norm": 0.16294556856155396, + "learning_rate": 1.6322641264835198e-06, + "loss": 0.058, + "num_input_tokens_seen": 48826464, + "step": 72435 + }, + { + "epoch": 1.769721251801725, + "grad_norm": 0.5161846280097961, + "learning_rate": 1.6321980551160976e-06, + "loss": 0.0264, + "num_input_tokens_seen": 48830176, + "step": 72440 + }, + { + "epoch": 1.769843402633572, + "grad_norm": 0.369511216878891, + "learning_rate": 1.6321319791511607e-06, + "loss": 0.0425, + "num_input_tokens_seen": 48833760, + "step": 72445 + }, + { + "epoch": 1.769965553465419, + "grad_norm": 19.229990005493164, + "learning_rate": 1.6320658985891904e-06, + "loss": 0.1903, + "num_input_tokens_seen": 48837088, + "step": 72450 + }, + { + "epoch": 1.7700877042972663, + "grad_norm": 0.26264458894729614, + "learning_rate": 1.6319998134306668e-06, + "loss": 0.0487, + "num_input_tokens_seen": 48840544, + "step": 72455 + }, + { + "epoch": 1.7702098551291134, + "grad_norm": 0.5501359105110168, + "learning_rate": 1.6319337236760706e-06, + "loss": 0.0658, + "num_input_tokens_seen": 48844192, + "step": 72460 + }, + { + "epoch": 1.7703320059609606, + "grad_norm": 107.52610778808594, + "learning_rate": 1.6318676293258822e-06, + "loss": 0.2244, + "num_input_tokens_seen": 48847712, + "step": 72465 + }, + { + "epoch": 1.7704541567928076, + "grad_norm": 0.16678033769130707, + "learning_rate": 1.6318015303805827e-06, + "loss": 0.1484, + "num_input_tokens_seen": 48850912, + "step": 72470 + }, + { + "epoch": 1.7705763076246548, + "grad_norm": 15.765122413635254, + "learning_rate": 1.6317354268406524e-06, + "loss": 0.1223, + "num_input_tokens_seen": 48854112, + "step": 72475 + }, + { + "epoch": 1.770698458456502, + "grad_norm": 0.19218267500400543, + "learning_rate": 1.6316693187065723e-06, + "loss": 0.0031, + "num_input_tokens_seen": 48857440, + "step": 72480 + }, + { + "epoch": 1.7708206092883492, + "grad_norm": 0.17282667756080627, + "learning_rate": 1.6316032059788229e-06, + "loss": 0.0671, + "num_input_tokens_seen": 48861600, + "step": 72485 + }, + { + "epoch": 1.7709427601201964, + "grad_norm": 0.11995959281921387, + "learning_rate": 1.6315370886578848e-06, + "loss": 0.036, + "num_input_tokens_seen": 48864736, + "step": 72490 + }, + { + "epoch": 1.7710649109520435, + "grad_norm": 10.746925354003906, + "learning_rate": 1.6314709667442395e-06, + "loss": 0.0519, + "num_input_tokens_seen": 48867936, + "step": 72495 + }, + { + "epoch": 1.7711870617838907, + "grad_norm": 0.24249105155467987, + "learning_rate": 1.6314048402383675e-06, + "loss": 0.002, + "num_input_tokens_seen": 48871200, + "step": 72500 + }, + { + "epoch": 1.771309212615738, + "grad_norm": 88.96400451660156, + "learning_rate": 1.6313387091407496e-06, + "loss": 0.0519, + "num_input_tokens_seen": 48874272, + "step": 72505 + }, + { + "epoch": 1.7714313634475851, + "grad_norm": 21.104406356811523, + "learning_rate": 1.6312725734518668e-06, + "loss": 0.0645, + "num_input_tokens_seen": 48878176, + "step": 72510 + }, + { + "epoch": 1.7715535142794323, + "grad_norm": 0.1095280647277832, + "learning_rate": 1.6312064331722e-06, + "loss": 0.1158, + "num_input_tokens_seen": 48881760, + "step": 72515 + }, + { + "epoch": 1.7716756651112795, + "grad_norm": 20.461257934570312, + "learning_rate": 1.6311402883022302e-06, + "loss": 0.0697, + "num_input_tokens_seen": 48885472, + "step": 72520 + }, + { + "epoch": 1.7717978159431267, + "grad_norm": 46.38597106933594, + "learning_rate": 1.6310741388424388e-06, + "loss": 0.0456, + "num_input_tokens_seen": 48888800, + "step": 72525 + }, + { + "epoch": 1.7719199667749739, + "grad_norm": 99.10078430175781, + "learning_rate": 1.631007984793306e-06, + "loss": 0.2052, + "num_input_tokens_seen": 48892064, + "step": 72530 + }, + { + "epoch": 1.772042117606821, + "grad_norm": 13.505167961120605, + "learning_rate": 1.6309418261553139e-06, + "loss": 0.0733, + "num_input_tokens_seen": 48895904, + "step": 72535 + }, + { + "epoch": 1.772164268438668, + "grad_norm": 0.2780132293701172, + "learning_rate": 1.6308756629289429e-06, + "loss": 0.1274, + "num_input_tokens_seen": 48899296, + "step": 72540 + }, + { + "epoch": 1.7722864192705152, + "grad_norm": 14.029288291931152, + "learning_rate": 1.6308094951146742e-06, + "loss": 0.1346, + "num_input_tokens_seen": 48902816, + "step": 72545 + }, + { + "epoch": 1.7724085701023624, + "grad_norm": 22.366783142089844, + "learning_rate": 1.6307433227129895e-06, + "loss": 0.0313, + "num_input_tokens_seen": 48906400, + "step": 72550 + }, + { + "epoch": 1.7725307209342096, + "grad_norm": 1.1991052627563477, + "learning_rate": 1.6306771457243696e-06, + "loss": 0.0037, + "num_input_tokens_seen": 48909600, + "step": 72555 + }, + { + "epoch": 1.7726528717660566, + "grad_norm": 0.1703871339559555, + "learning_rate": 1.6306109641492958e-06, + "loss": 0.1203, + "num_input_tokens_seen": 48912672, + "step": 72560 + }, + { + "epoch": 1.7727750225979038, + "grad_norm": 142.6514892578125, + "learning_rate": 1.6305447779882497e-06, + "loss": 0.0387, + "num_input_tokens_seen": 48916000, + "step": 72565 + }, + { + "epoch": 1.772897173429751, + "grad_norm": 15.24199104309082, + "learning_rate": 1.6304785872417121e-06, + "loss": 0.1363, + "num_input_tokens_seen": 48918880, + "step": 72570 + }, + { + "epoch": 1.7730193242615981, + "grad_norm": 1.303954839706421, + "learning_rate": 1.630412391910165e-06, + "loss": 0.0632, + "num_input_tokens_seen": 48922272, + "step": 72575 + }, + { + "epoch": 1.7731414750934453, + "grad_norm": 10.182337760925293, + "learning_rate": 1.630346191994089e-06, + "loss": 0.0572, + "num_input_tokens_seen": 48925408, + "step": 72580 + }, + { + "epoch": 1.7732636259252925, + "grad_norm": 15.994193077087402, + "learning_rate": 1.630279987493966e-06, + "loss": 0.167, + "num_input_tokens_seen": 48928608, + "step": 72585 + }, + { + "epoch": 1.7733857767571397, + "grad_norm": 11.406634330749512, + "learning_rate": 1.6302137784102774e-06, + "loss": 0.0463, + "num_input_tokens_seen": 48934048, + "step": 72590 + }, + { + "epoch": 1.773507927588987, + "grad_norm": 0.16068458557128906, + "learning_rate": 1.630147564743505e-06, + "loss": 0.0021, + "num_input_tokens_seen": 48936800, + "step": 72595 + }, + { + "epoch": 1.773630078420834, + "grad_norm": 0.2641652524471283, + "learning_rate": 1.63008134649413e-06, + "loss": 0.0102, + "num_input_tokens_seen": 48940128, + "step": 72600 + }, + { + "epoch": 1.7737522292526813, + "grad_norm": 42.87362289428711, + "learning_rate": 1.6300151236626336e-06, + "loss": 0.1173, + "num_input_tokens_seen": 48943584, + "step": 72605 + }, + { + "epoch": 1.7738743800845285, + "grad_norm": 24.8110408782959, + "learning_rate": 1.629948896249498e-06, + "loss": 0.1184, + "num_input_tokens_seen": 48947040, + "step": 72610 + }, + { + "epoch": 1.7739965309163757, + "grad_norm": 0.45504873991012573, + "learning_rate": 1.6298826642552043e-06, + "loss": 0.0622, + "num_input_tokens_seen": 48950496, + "step": 72615 + }, + { + "epoch": 1.7741186817482228, + "grad_norm": 16.71000862121582, + "learning_rate": 1.629816427680235e-06, + "loss": 0.1365, + "num_input_tokens_seen": 48953824, + "step": 72620 + }, + { + "epoch": 1.7742408325800698, + "grad_norm": 0.1281004250049591, + "learning_rate": 1.6297501865250708e-06, + "loss": 0.0999, + "num_input_tokens_seen": 48957152, + "step": 72625 + }, + { + "epoch": 1.774362983411917, + "grad_norm": 14.322222709655762, + "learning_rate": 1.629683940790194e-06, + "loss": 0.0882, + "num_input_tokens_seen": 48960224, + "step": 72630 + }, + { + "epoch": 1.7744851342437642, + "grad_norm": 0.14506135880947113, + "learning_rate": 1.6296176904760866e-06, + "loss": 0.0013, + "num_input_tokens_seen": 48963232, + "step": 72635 + }, + { + "epoch": 1.7746072850756114, + "grad_norm": 66.70890808105469, + "learning_rate": 1.6295514355832296e-06, + "loss": 0.1249, + "num_input_tokens_seen": 48966496, + "step": 72640 + }, + { + "epoch": 1.7747294359074586, + "grad_norm": 0.7553220391273499, + "learning_rate": 1.629485176112105e-06, + "loss": 0.1268, + "num_input_tokens_seen": 48969760, + "step": 72645 + }, + { + "epoch": 1.7748515867393055, + "grad_norm": 1.3736454248428345, + "learning_rate": 1.6294189120631954e-06, + "loss": 0.1543, + "num_input_tokens_seen": 48972896, + "step": 72650 + }, + { + "epoch": 1.7749737375711527, + "grad_norm": 0.14934030175209045, + "learning_rate": 1.6293526434369818e-06, + "loss": 0.0008, + "num_input_tokens_seen": 48976288, + "step": 72655 + }, + { + "epoch": 1.775095888403, + "grad_norm": 9.05644702911377, + "learning_rate": 1.6292863702339466e-06, + "loss": 0.1141, + "num_input_tokens_seen": 48979680, + "step": 72660 + }, + { + "epoch": 1.7752180392348471, + "grad_norm": 1.1264457702636719, + "learning_rate": 1.6292200924545715e-06, + "loss": 0.003, + "num_input_tokens_seen": 48983136, + "step": 72665 + }, + { + "epoch": 1.7753401900666943, + "grad_norm": 9.034127235412598, + "learning_rate": 1.6291538100993391e-06, + "loss": 0.1314, + "num_input_tokens_seen": 48986144, + "step": 72670 + }, + { + "epoch": 1.7754623408985415, + "grad_norm": 0.23916614055633545, + "learning_rate": 1.6290875231687306e-06, + "loss": 0.1202, + "num_input_tokens_seen": 48990048, + "step": 72675 + }, + { + "epoch": 1.7755844917303887, + "grad_norm": 1.9375858306884766, + "learning_rate": 1.6290212316632285e-06, + "loss": 0.038, + "num_input_tokens_seen": 48993696, + "step": 72680 + }, + { + "epoch": 1.7757066425622359, + "grad_norm": 0.254934161901474, + "learning_rate": 1.628954935583315e-06, + "loss": 0.0612, + "num_input_tokens_seen": 48997088, + "step": 72685 + }, + { + "epoch": 1.775828793394083, + "grad_norm": 0.21548773348331451, + "learning_rate": 1.628888634929472e-06, + "loss": 0.0965, + "num_input_tokens_seen": 49000608, + "step": 72690 + }, + { + "epoch": 1.7759509442259303, + "grad_norm": 30.685073852539062, + "learning_rate": 1.6288223297021814e-06, + "loss": 0.0807, + "num_input_tokens_seen": 49004192, + "step": 72695 + }, + { + "epoch": 1.7760730950577774, + "grad_norm": 11.643782615661621, + "learning_rate": 1.628756019901926e-06, + "loss": 0.0388, + "num_input_tokens_seen": 49007200, + "step": 72700 + }, + { + "epoch": 1.7761952458896246, + "grad_norm": 0.5212379693984985, + "learning_rate": 1.6286897055291874e-06, + "loss": 0.1639, + "num_input_tokens_seen": 49010144, + "step": 72705 + }, + { + "epoch": 1.7763173967214718, + "grad_norm": 17.569440841674805, + "learning_rate": 1.6286233865844486e-06, + "loss": 0.1141, + "num_input_tokens_seen": 49013920, + "step": 72710 + }, + { + "epoch": 1.7764395475533188, + "grad_norm": 0.24917493760585785, + "learning_rate": 1.6285570630681914e-06, + "loss": 0.0585, + "num_input_tokens_seen": 49017120, + "step": 72715 + }, + { + "epoch": 1.776561698385166, + "grad_norm": 61.48064041137695, + "learning_rate": 1.6284907349808976e-06, + "loss": 0.1279, + "num_input_tokens_seen": 49020576, + "step": 72720 + }, + { + "epoch": 1.7766838492170132, + "grad_norm": 0.10875872522592545, + "learning_rate": 1.6284244023230507e-06, + "loss": 0.032, + "num_input_tokens_seen": 49023840, + "step": 72725 + }, + { + "epoch": 1.7768060000488604, + "grad_norm": 0.7517804503440857, + "learning_rate": 1.6283580650951324e-06, + "loss": 0.0013, + "num_input_tokens_seen": 49027424, + "step": 72730 + }, + { + "epoch": 1.7769281508807075, + "grad_norm": 0.1774255931377411, + "learning_rate": 1.6282917232976252e-06, + "loss": 0.0744, + "num_input_tokens_seen": 49030432, + "step": 72735 + }, + { + "epoch": 1.7770503017125545, + "grad_norm": 0.1688397228717804, + "learning_rate": 1.6282253769310115e-06, + "loss": 0.0605, + "num_input_tokens_seen": 49034080, + "step": 72740 + }, + { + "epoch": 1.7771724525444017, + "grad_norm": 0.04853454604744911, + "learning_rate": 1.628159025995774e-06, + "loss": 0.0023, + "num_input_tokens_seen": 49037600, + "step": 72745 + }, + { + "epoch": 1.777294603376249, + "grad_norm": 0.39164552092552185, + "learning_rate": 1.6280926704923949e-06, + "loss": 0.014, + "num_input_tokens_seen": 49040800, + "step": 72750 + }, + { + "epoch": 1.777416754208096, + "grad_norm": 13.104290008544922, + "learning_rate": 1.6280263104213572e-06, + "loss": 0.1248, + "num_input_tokens_seen": 49044064, + "step": 72755 + }, + { + "epoch": 1.7775389050399433, + "grad_norm": 0.2612842321395874, + "learning_rate": 1.6279599457831431e-06, + "loss": 0.0014, + "num_input_tokens_seen": 49047200, + "step": 72760 + }, + { + "epoch": 1.7776610558717905, + "grad_norm": 24.87212562561035, + "learning_rate": 1.6278935765782356e-06, + "loss": 0.1021, + "num_input_tokens_seen": 49050528, + "step": 72765 + }, + { + "epoch": 1.7777832067036377, + "grad_norm": 0.3829386830329895, + "learning_rate": 1.6278272028071168e-06, + "loss": 0.0831, + "num_input_tokens_seen": 49054112, + "step": 72770 + }, + { + "epoch": 1.7779053575354848, + "grad_norm": 57.15519332885742, + "learning_rate": 1.62776082447027e-06, + "loss": 0.1695, + "num_input_tokens_seen": 49057184, + "step": 72775 + }, + { + "epoch": 1.778027508367332, + "grad_norm": 0.10678430646657944, + "learning_rate": 1.6276944415681776e-06, + "loss": 0.0281, + "num_input_tokens_seen": 49060896, + "step": 72780 + }, + { + "epoch": 1.7781496591991792, + "grad_norm": 20.0386962890625, + "learning_rate": 1.6276280541013223e-06, + "loss": 0.1584, + "num_input_tokens_seen": 49064544, + "step": 72785 + }, + { + "epoch": 1.7782718100310264, + "grad_norm": 0.7888308763504028, + "learning_rate": 1.627561662070187e-06, + "loss": 0.1369, + "num_input_tokens_seen": 49068320, + "step": 72790 + }, + { + "epoch": 1.7783939608628736, + "grad_norm": 0.6277143955230713, + "learning_rate": 1.6274952654752547e-06, + "loss": 0.0878, + "num_input_tokens_seen": 49071712, + "step": 72795 + }, + { + "epoch": 1.7785161116947208, + "grad_norm": 0.11778520047664642, + "learning_rate": 1.6274288643170078e-06, + "loss": 0.0296, + "num_input_tokens_seen": 49075168, + "step": 72800 + }, + { + "epoch": 1.7786382625265678, + "grad_norm": 29.370920181274414, + "learning_rate": 1.6273624585959295e-06, + "loss": 0.0897, + "num_input_tokens_seen": 49078816, + "step": 72805 + }, + { + "epoch": 1.778760413358415, + "grad_norm": 225.03904724121094, + "learning_rate": 1.6272960483125026e-06, + "loss": 0.11, + "num_input_tokens_seen": 49082272, + "step": 72810 + }, + { + "epoch": 1.7788825641902621, + "grad_norm": 10.878161430358887, + "learning_rate": 1.6272296334672101e-06, + "loss": 0.1619, + "num_input_tokens_seen": 49085664, + "step": 72815 + }, + { + "epoch": 1.7790047150221093, + "grad_norm": 0.03372422978281975, + "learning_rate": 1.6271632140605351e-06, + "loss": 0.1086, + "num_input_tokens_seen": 49089376, + "step": 72820 + }, + { + "epoch": 1.7791268658539565, + "grad_norm": 0.4881143569946289, + "learning_rate": 1.6270967900929607e-06, + "loss": 0.1185, + "num_input_tokens_seen": 49092640, + "step": 72825 + }, + { + "epoch": 1.7792490166858035, + "grad_norm": 10.149094581604004, + "learning_rate": 1.6270303615649695e-06, + "loss": 0.1189, + "num_input_tokens_seen": 49095968, + "step": 72830 + }, + { + "epoch": 1.7793711675176507, + "grad_norm": 6.329721450805664, + "learning_rate": 1.6269639284770448e-06, + "loss": 0.0279, + "num_input_tokens_seen": 49099360, + "step": 72835 + }, + { + "epoch": 1.7794933183494979, + "grad_norm": 0.6420906186103821, + "learning_rate": 1.62689749082967e-06, + "loss": 0.0224, + "num_input_tokens_seen": 49102560, + "step": 72840 + }, + { + "epoch": 1.779615469181345, + "grad_norm": 18.043487548828125, + "learning_rate": 1.6268310486233282e-06, + "loss": 0.2352, + "num_input_tokens_seen": 49105824, + "step": 72845 + }, + { + "epoch": 1.7797376200131922, + "grad_norm": 23.664506912231445, + "learning_rate": 1.626764601858502e-06, + "loss": 0.0645, + "num_input_tokens_seen": 49108896, + "step": 72850 + }, + { + "epoch": 1.7798597708450394, + "grad_norm": 0.12446639686822891, + "learning_rate": 1.6266981505356752e-06, + "loss": 0.0024, + "num_input_tokens_seen": 49112352, + "step": 72855 + }, + { + "epoch": 1.7799819216768866, + "grad_norm": 19.845417022705078, + "learning_rate": 1.626631694655331e-06, + "loss": 0.128, + "num_input_tokens_seen": 49115744, + "step": 72860 + }, + { + "epoch": 1.7801040725087338, + "grad_norm": 0.07817646861076355, + "learning_rate": 1.6265652342179523e-06, + "loss": 0.0014, + "num_input_tokens_seen": 49119264, + "step": 72865 + }, + { + "epoch": 1.780226223340581, + "grad_norm": 0.2219124287366867, + "learning_rate": 1.626498769224023e-06, + "loss": 0.0843, + "num_input_tokens_seen": 49122592, + "step": 72870 + }, + { + "epoch": 1.7803483741724282, + "grad_norm": 0.1201687902212143, + "learning_rate": 1.6264322996740258e-06, + "loss": 0.0023, + "num_input_tokens_seen": 49126176, + "step": 72875 + }, + { + "epoch": 1.7804705250042754, + "grad_norm": 0.20573893189430237, + "learning_rate": 1.6263658255684447e-06, + "loss": 0.0575, + "num_input_tokens_seen": 49129504, + "step": 72880 + }, + { + "epoch": 1.7805926758361226, + "grad_norm": 0.45061349868774414, + "learning_rate": 1.6262993469077628e-06, + "loss": 0.001, + "num_input_tokens_seen": 49132576, + "step": 72885 + }, + { + "epoch": 1.7807148266679698, + "grad_norm": 0.11227881163358688, + "learning_rate": 1.6262328636924635e-06, + "loss": 0.143, + "num_input_tokens_seen": 49136032, + "step": 72890 + }, + { + "epoch": 1.7808369774998167, + "grad_norm": 0.24952024221420288, + "learning_rate": 1.6261663759230303e-06, + "loss": 0.0331, + "num_input_tokens_seen": 49139168, + "step": 72895 + }, + { + "epoch": 1.780959128331664, + "grad_norm": 0.6618596911430359, + "learning_rate": 1.6260998835999472e-06, + "loss": 0.0545, + "num_input_tokens_seen": 49142176, + "step": 72900 + }, + { + "epoch": 1.781081279163511, + "grad_norm": 10.293169975280762, + "learning_rate": 1.626033386723697e-06, + "loss": 0.1411, + "num_input_tokens_seen": 49145504, + "step": 72905 + }, + { + "epoch": 1.7812034299953583, + "grad_norm": 0.8537778258323669, + "learning_rate": 1.6259668852947637e-06, + "loss": 0.0998, + "num_input_tokens_seen": 49149024, + "step": 72910 + }, + { + "epoch": 1.7813255808272053, + "grad_norm": 19.98094367980957, + "learning_rate": 1.6259003793136309e-06, + "loss": 0.1419, + "num_input_tokens_seen": 49152416, + "step": 72915 + }, + { + "epoch": 1.7814477316590525, + "grad_norm": 0.12676870822906494, + "learning_rate": 1.625833868780782e-06, + "loss": 0.0397, + "num_input_tokens_seen": 49155744, + "step": 72920 + }, + { + "epoch": 1.7815698824908996, + "grad_norm": 198.677490234375, + "learning_rate": 1.625767353696701e-06, + "loss": 0.0564, + "num_input_tokens_seen": 49159776, + "step": 72925 + }, + { + "epoch": 1.7816920333227468, + "grad_norm": 0.27948421239852905, + "learning_rate": 1.6257008340618715e-06, + "loss": 0.001, + "num_input_tokens_seen": 49163104, + "step": 72930 + }, + { + "epoch": 1.781814184154594, + "grad_norm": 0.081379234790802, + "learning_rate": 1.6256343098767773e-06, + "loss": 0.1116, + "num_input_tokens_seen": 49166304, + "step": 72935 + }, + { + "epoch": 1.7819363349864412, + "grad_norm": 0.48774203658103943, + "learning_rate": 1.6255677811419022e-06, + "loss": 0.0013, + "num_input_tokens_seen": 49169312, + "step": 72940 + }, + { + "epoch": 1.7820584858182884, + "grad_norm": 1.4804660081863403, + "learning_rate": 1.6255012478577296e-06, + "loss": 0.1343, + "num_input_tokens_seen": 49173408, + "step": 72945 + }, + { + "epoch": 1.7821806366501356, + "grad_norm": 161.1129608154297, + "learning_rate": 1.625434710024744e-06, + "loss": 0.1763, + "num_input_tokens_seen": 49177120, + "step": 72950 + }, + { + "epoch": 1.7823027874819828, + "grad_norm": 14.122322082519531, + "learning_rate": 1.6253681676434289e-06, + "loss": 0.0555, + "num_input_tokens_seen": 49181024, + "step": 72955 + }, + { + "epoch": 1.78242493831383, + "grad_norm": 15.058162689208984, + "learning_rate": 1.6253016207142682e-06, + "loss": 0.1329, + "num_input_tokens_seen": 49184096, + "step": 72960 + }, + { + "epoch": 1.7825470891456772, + "grad_norm": 10.185879707336426, + "learning_rate": 1.625235069237746e-06, + "loss": 0.2357, + "num_input_tokens_seen": 49187488, + "step": 72965 + }, + { + "epoch": 1.7826692399775244, + "grad_norm": 0.4589506685733795, + "learning_rate": 1.6251685132143463e-06, + "loss": 0.0376, + "num_input_tokens_seen": 49190624, + "step": 72970 + }, + { + "epoch": 1.7827913908093715, + "grad_norm": 3.827714204788208, + "learning_rate": 1.625101952644553e-06, + "loss": 0.0964, + "num_input_tokens_seen": 49193568, + "step": 72975 + }, + { + "epoch": 1.7829135416412187, + "grad_norm": 0.05556550249457359, + "learning_rate": 1.6250353875288501e-06, + "loss": 0.0367, + "num_input_tokens_seen": 49196832, + "step": 72980 + }, + { + "epoch": 1.7830356924730657, + "grad_norm": 0.19241148233413696, + "learning_rate": 1.6249688178677215e-06, + "loss": 0.1076, + "num_input_tokens_seen": 49199840, + "step": 72985 + }, + { + "epoch": 1.783157843304913, + "grad_norm": 105.45845031738281, + "learning_rate": 1.6249022436616518e-06, + "loss": 0.1231, + "num_input_tokens_seen": 49203808, + "step": 72990 + }, + { + "epoch": 1.78327999413676, + "grad_norm": 0.3520190119743347, + "learning_rate": 1.624835664911125e-06, + "loss": 0.1205, + "num_input_tokens_seen": 49207136, + "step": 72995 + }, + { + "epoch": 1.7834021449686073, + "grad_norm": 0.20569193363189697, + "learning_rate": 1.624769081616625e-06, + "loss": 0.0732, + "num_input_tokens_seen": 49210784, + "step": 73000 + }, + { + "epoch": 1.7835242958004542, + "grad_norm": 9.263427734375, + "learning_rate": 1.6247024937786364e-06, + "loss": 0.0901, + "num_input_tokens_seen": 49214368, + "step": 73005 + }, + { + "epoch": 1.7836464466323014, + "grad_norm": 7.391837120056152, + "learning_rate": 1.6246359013976432e-06, + "loss": 0.108, + "num_input_tokens_seen": 49218144, + "step": 73010 + }, + { + "epoch": 1.7837685974641486, + "grad_norm": 0.1080571860074997, + "learning_rate": 1.6245693044741296e-06, + "loss": 0.0013, + "num_input_tokens_seen": 49221600, + "step": 73015 + }, + { + "epoch": 1.7838907482959958, + "grad_norm": 8.889108657836914, + "learning_rate": 1.6245027030085798e-06, + "loss": 0.0907, + "num_input_tokens_seen": 49225120, + "step": 73020 + }, + { + "epoch": 1.784012899127843, + "grad_norm": 0.280179500579834, + "learning_rate": 1.624436097001479e-06, + "loss": 0.0778, + "num_input_tokens_seen": 49228512, + "step": 73025 + }, + { + "epoch": 1.7841350499596902, + "grad_norm": 0.154941126704216, + "learning_rate": 1.6243694864533103e-06, + "loss": 0.106, + "num_input_tokens_seen": 49231904, + "step": 73030 + }, + { + "epoch": 1.7842572007915374, + "grad_norm": 12.580093383789062, + "learning_rate": 1.6243028713645592e-06, + "loss": 0.0837, + "num_input_tokens_seen": 49235360, + "step": 73035 + }, + { + "epoch": 1.7843793516233846, + "grad_norm": 16.738759994506836, + "learning_rate": 1.6242362517357095e-06, + "loss": 0.0717, + "num_input_tokens_seen": 49238304, + "step": 73040 + }, + { + "epoch": 1.7845015024552318, + "grad_norm": 0.8823060393333435, + "learning_rate": 1.6241696275672458e-06, + "loss": 0.0952, + "num_input_tokens_seen": 49241312, + "step": 73045 + }, + { + "epoch": 1.784623653287079, + "grad_norm": 0.06604723632335663, + "learning_rate": 1.6241029988596528e-06, + "loss": 0.0681, + "num_input_tokens_seen": 49244384, + "step": 73050 + }, + { + "epoch": 1.7847458041189261, + "grad_norm": 16.02294921875, + "learning_rate": 1.624036365613415e-06, + "loss": 0.0327, + "num_input_tokens_seen": 49248160, + "step": 73055 + }, + { + "epoch": 1.7848679549507733, + "grad_norm": 0.2083832174539566, + "learning_rate": 1.623969727829017e-06, + "loss": 0.1776, + "num_input_tokens_seen": 49251360, + "step": 73060 + }, + { + "epoch": 1.7849901057826205, + "grad_norm": 1.2275177240371704, + "learning_rate": 1.6239030855069432e-06, + "loss": 0.0851, + "num_input_tokens_seen": 49254816, + "step": 73065 + }, + { + "epoch": 1.7851122566144677, + "grad_norm": 11.574962615966797, + "learning_rate": 1.6238364386476783e-06, + "loss": 0.0367, + "num_input_tokens_seen": 49258144, + "step": 73070 + }, + { + "epoch": 1.7852344074463147, + "grad_norm": 21.744110107421875, + "learning_rate": 1.623769787251707e-06, + "loss": 0.1875, + "num_input_tokens_seen": 49261408, + "step": 73075 + }, + { + "epoch": 1.7853565582781619, + "grad_norm": 57.97651290893555, + "learning_rate": 1.623703131319514e-06, + "loss": 0.1415, + "num_input_tokens_seen": 49264416, + "step": 73080 + }, + { + "epoch": 1.785478709110009, + "grad_norm": 0.6353474855422974, + "learning_rate": 1.6236364708515842e-06, + "loss": 0.0578, + "num_input_tokens_seen": 49267616, + "step": 73085 + }, + { + "epoch": 1.7856008599418562, + "grad_norm": 13.432452201843262, + "learning_rate": 1.623569805848402e-06, + "loss": 0.0612, + "num_input_tokens_seen": 49271008, + "step": 73090 + }, + { + "epoch": 1.7857230107737032, + "grad_norm": 16.59269905090332, + "learning_rate": 1.6235031363104528e-06, + "loss": 0.1369, + "num_input_tokens_seen": 49274272, + "step": 73095 + }, + { + "epoch": 1.7858451616055504, + "grad_norm": 0.1436922550201416, + "learning_rate": 1.623436462238221e-06, + "loss": 0.0156, + "num_input_tokens_seen": 49277536, + "step": 73100 + }, + { + "epoch": 1.7859673124373976, + "grad_norm": 0.5524347424507141, + "learning_rate": 1.6233697836321913e-06, + "loss": 0.1298, + "num_input_tokens_seen": 49280992, + "step": 73105 + }, + { + "epoch": 1.7860894632692448, + "grad_norm": 1.849530816078186, + "learning_rate": 1.623303100492849e-06, + "loss": 0.071, + "num_input_tokens_seen": 49284192, + "step": 73110 + }, + { + "epoch": 1.786211614101092, + "grad_norm": 11.363874435424805, + "learning_rate": 1.623236412820679e-06, + "loss": 0.1023, + "num_input_tokens_seen": 49287264, + "step": 73115 + }, + { + "epoch": 1.7863337649329392, + "grad_norm": 0.2736768424510956, + "learning_rate": 1.6231697206161661e-06, + "loss": 0.11, + "num_input_tokens_seen": 49290912, + "step": 73120 + }, + { + "epoch": 1.7864559157647864, + "grad_norm": 0.707730770111084, + "learning_rate": 1.6231030238797956e-06, + "loss": 0.1004, + "num_input_tokens_seen": 49294048, + "step": 73125 + }, + { + "epoch": 1.7865780665966335, + "grad_norm": 0.5541930794715881, + "learning_rate": 1.623036322612052e-06, + "loss": 0.0276, + "num_input_tokens_seen": 49297120, + "step": 73130 + }, + { + "epoch": 1.7867002174284807, + "grad_norm": 0.1723814308643341, + "learning_rate": 1.622969616813421e-06, + "loss": 0.1215, + "num_input_tokens_seen": 49300512, + "step": 73135 + }, + { + "epoch": 1.786822368260328, + "grad_norm": 0.07303165644407272, + "learning_rate": 1.6229029064843871e-06, + "loss": 0.2217, + "num_input_tokens_seen": 49303392, + "step": 73140 + }, + { + "epoch": 1.786944519092175, + "grad_norm": 8.014287948608398, + "learning_rate": 1.6228361916254358e-06, + "loss": 0.0755, + "num_input_tokens_seen": 49307232, + "step": 73145 + }, + { + "epoch": 1.7870666699240223, + "grad_norm": 13.333250999450684, + "learning_rate": 1.6227694722370525e-06, + "loss": 0.003, + "num_input_tokens_seen": 49310560, + "step": 73150 + }, + { + "epoch": 1.7871888207558695, + "grad_norm": 18.574920654296875, + "learning_rate": 1.6227027483197214e-06, + "loss": 0.025, + "num_input_tokens_seen": 49314272, + "step": 73155 + }, + { + "epoch": 1.7873109715877165, + "grad_norm": 12.966170310974121, + "learning_rate": 1.622636019873929e-06, + "loss": 0.0363, + "num_input_tokens_seen": 49317472, + "step": 73160 + }, + { + "epoch": 1.7874331224195636, + "grad_norm": 11.653533935546875, + "learning_rate": 1.62256928690016e-06, + "loss": 0.1551, + "num_input_tokens_seen": 49320544, + "step": 73165 + }, + { + "epoch": 1.7875552732514108, + "grad_norm": 15.924564361572266, + "learning_rate": 1.6225025493988995e-06, + "loss": 0.0316, + "num_input_tokens_seen": 49323744, + "step": 73170 + }, + { + "epoch": 1.787677424083258, + "grad_norm": 224.7387237548828, + "learning_rate": 1.6224358073706327e-06, + "loss": 0.1097, + "num_input_tokens_seen": 49327456, + "step": 73175 + }, + { + "epoch": 1.7877995749151052, + "grad_norm": 35.19367599487305, + "learning_rate": 1.622369060815846e-06, + "loss": 0.124, + "num_input_tokens_seen": 49331040, + "step": 73180 + }, + { + "epoch": 1.7879217257469522, + "grad_norm": 0.002536064712330699, + "learning_rate": 1.6223023097350238e-06, + "loss": 0.0979, + "num_input_tokens_seen": 49334816, + "step": 73185 + }, + { + "epoch": 1.7880438765787994, + "grad_norm": 23.19019889831543, + "learning_rate": 1.6222355541286517e-06, + "loss": 0.1201, + "num_input_tokens_seen": 49338016, + "step": 73190 + }, + { + "epoch": 1.7881660274106466, + "grad_norm": 22.717191696166992, + "learning_rate": 1.6221687939972154e-06, + "loss": 0.1417, + "num_input_tokens_seen": 49341152, + "step": 73195 + }, + { + "epoch": 1.7882881782424938, + "grad_norm": 0.4923929274082184, + "learning_rate": 1.6221020293412003e-06, + "loss": 0.0307, + "num_input_tokens_seen": 49344608, + "step": 73200 + }, + { + "epoch": 1.788410329074341, + "grad_norm": 27.623798370361328, + "learning_rate": 1.6220352601610916e-06, + "loss": 0.0766, + "num_input_tokens_seen": 49347936, + "step": 73205 + }, + { + "epoch": 1.7885324799061881, + "grad_norm": 66.23554229736328, + "learning_rate": 1.6219684864573755e-06, + "loss": 0.0262, + "num_input_tokens_seen": 49351264, + "step": 73210 + }, + { + "epoch": 1.7886546307380353, + "grad_norm": 0.4153870940208435, + "learning_rate": 1.6219017082305373e-06, + "loss": 0.0033, + "num_input_tokens_seen": 49354528, + "step": 73215 + }, + { + "epoch": 1.7887767815698825, + "grad_norm": 92.46559143066406, + "learning_rate": 1.6218349254810627e-06, + "loss": 0.0614, + "num_input_tokens_seen": 49357728, + "step": 73220 + }, + { + "epoch": 1.7888989324017297, + "grad_norm": 8.169533729553223, + "learning_rate": 1.621768138209437e-06, + "loss": 0.0965, + "num_input_tokens_seen": 49361312, + "step": 73225 + }, + { + "epoch": 1.789021083233577, + "grad_norm": 0.24197158217430115, + "learning_rate": 1.621701346416146e-06, + "loss": 0.0449, + "num_input_tokens_seen": 49365024, + "step": 73230 + }, + { + "epoch": 1.789143234065424, + "grad_norm": 0.07424899935722351, + "learning_rate": 1.621634550101676e-06, + "loss": 0.054, + "num_input_tokens_seen": 49368416, + "step": 73235 + }, + { + "epoch": 1.7892653848972713, + "grad_norm": 0.09136585891246796, + "learning_rate": 1.621567749266512e-06, + "loss": 0.0255, + "num_input_tokens_seen": 49371936, + "step": 73240 + }, + { + "epoch": 1.7893875357291185, + "grad_norm": 0.4230661988258362, + "learning_rate": 1.6215009439111404e-06, + "loss": 0.0013, + "num_input_tokens_seen": 49375392, + "step": 73245 + }, + { + "epoch": 1.7895096865609654, + "grad_norm": 2.4775891304016113, + "learning_rate": 1.621434134036047e-06, + "loss": 0.0731, + "num_input_tokens_seen": 49378592, + "step": 73250 + }, + { + "epoch": 1.7896318373928126, + "grad_norm": 32.23170852661133, + "learning_rate": 1.621367319641717e-06, + "loss": 0.2222, + "num_input_tokens_seen": 49381856, + "step": 73255 + }, + { + "epoch": 1.7897539882246598, + "grad_norm": 0.35263219475746155, + "learning_rate": 1.621300500728637e-06, + "loss": 0.0916, + "num_input_tokens_seen": 49385568, + "step": 73260 + }, + { + "epoch": 1.789876139056507, + "grad_norm": 0.02084103785455227, + "learning_rate": 1.6212336772972926e-06, + "loss": 0.0386, + "num_input_tokens_seen": 49389152, + "step": 73265 + }, + { + "epoch": 1.7899982898883542, + "grad_norm": 0.027315644547343254, + "learning_rate": 1.6211668493481697e-06, + "loss": 0.0428, + "num_input_tokens_seen": 49392096, + "step": 73270 + }, + { + "epoch": 1.7901204407202012, + "grad_norm": 6.037578582763672, + "learning_rate": 1.6211000168817544e-06, + "loss": 0.0436, + "num_input_tokens_seen": 49395680, + "step": 73275 + }, + { + "epoch": 1.7902425915520483, + "grad_norm": 0.10098898410797119, + "learning_rate": 1.6210331798985325e-06, + "loss": 0.1954, + "num_input_tokens_seen": 49398752, + "step": 73280 + }, + { + "epoch": 1.7903647423838955, + "grad_norm": 0.04568028822541237, + "learning_rate": 1.6209663383989907e-06, + "loss": 0.0974, + "num_input_tokens_seen": 49401952, + "step": 73285 + }, + { + "epoch": 1.7904868932157427, + "grad_norm": 0.14879471063613892, + "learning_rate": 1.6208994923836145e-06, + "loss": 0.0597, + "num_input_tokens_seen": 49405088, + "step": 73290 + }, + { + "epoch": 1.79060904404759, + "grad_norm": 340.20709228515625, + "learning_rate": 1.6208326418528903e-06, + "loss": 0.0971, + "num_input_tokens_seen": 49408352, + "step": 73295 + }, + { + "epoch": 1.790731194879437, + "grad_norm": 32.714664459228516, + "learning_rate": 1.6207657868073037e-06, + "loss": 0.1478, + "num_input_tokens_seen": 49411680, + "step": 73300 + }, + { + "epoch": 1.7908533457112843, + "grad_norm": 0.08181426674127579, + "learning_rate": 1.620698927247342e-06, + "loss": 0.067, + "num_input_tokens_seen": 49415072, + "step": 73305 + }, + { + "epoch": 1.7909754965431315, + "grad_norm": 0.3073747158050537, + "learning_rate": 1.6206320631734903e-06, + "loss": 0.0026, + "num_input_tokens_seen": 49418208, + "step": 73310 + }, + { + "epoch": 1.7910976473749787, + "grad_norm": 20.518089294433594, + "learning_rate": 1.6205651945862355e-06, + "loss": 0.1575, + "num_input_tokens_seen": 49421856, + "step": 73315 + }, + { + "epoch": 1.7912197982068259, + "grad_norm": 0.11983784288167953, + "learning_rate": 1.6204983214860634e-06, + "loss": 0.1557, + "num_input_tokens_seen": 49425248, + "step": 73320 + }, + { + "epoch": 1.791341949038673, + "grad_norm": 44.16547393798828, + "learning_rate": 1.620431443873461e-06, + "loss": 0.0876, + "num_input_tokens_seen": 49428256, + "step": 73325 + }, + { + "epoch": 1.7914640998705202, + "grad_norm": 3.9383909702301025, + "learning_rate": 1.620364561748914e-06, + "loss": 0.0589, + "num_input_tokens_seen": 49431200, + "step": 73330 + }, + { + "epoch": 1.7915862507023674, + "grad_norm": 0.316801518201828, + "learning_rate": 1.6202976751129092e-06, + "loss": 0.0266, + "num_input_tokens_seen": 49434528, + "step": 73335 + }, + { + "epoch": 1.7917084015342144, + "grad_norm": 22.89853286743164, + "learning_rate": 1.6202307839659328e-06, + "loss": 0.1441, + "num_input_tokens_seen": 49437728, + "step": 73340 + }, + { + "epoch": 1.7918305523660616, + "grad_norm": 0.22337937355041504, + "learning_rate": 1.6201638883084714e-06, + "loss": 0.1626, + "num_input_tokens_seen": 49440928, + "step": 73345 + }, + { + "epoch": 1.7919527031979088, + "grad_norm": 10.1043119430542, + "learning_rate": 1.6200969881410113e-06, + "loss": 0.1022, + "num_input_tokens_seen": 49443872, + "step": 73350 + }, + { + "epoch": 1.792074854029756, + "grad_norm": 0.4906000792980194, + "learning_rate": 1.620030083464039e-06, + "loss": 0.0422, + "num_input_tokens_seen": 49447136, + "step": 73355 + }, + { + "epoch": 1.7921970048616032, + "grad_norm": 98.32612609863281, + "learning_rate": 1.6199631742780415e-06, + "loss": 0.1179, + "num_input_tokens_seen": 49450464, + "step": 73360 + }, + { + "epoch": 1.7923191556934501, + "grad_norm": 0.3492704927921295, + "learning_rate": 1.6198962605835046e-06, + "loss": 0.0342, + "num_input_tokens_seen": 49454368, + "step": 73365 + }, + { + "epoch": 1.7924413065252973, + "grad_norm": 0.2637254595756531, + "learning_rate": 1.6198293423809157e-06, + "loss": 0.1297, + "num_input_tokens_seen": 49457440, + "step": 73370 + }, + { + "epoch": 1.7925634573571445, + "grad_norm": 0.23060853779315948, + "learning_rate": 1.619762419670761e-06, + "loss": 0.0668, + "num_input_tokens_seen": 49460896, + "step": 73375 + }, + { + "epoch": 1.7926856081889917, + "grad_norm": 13.903261184692383, + "learning_rate": 1.6196954924535274e-06, + "loss": 0.0754, + "num_input_tokens_seen": 49464032, + "step": 73380 + }, + { + "epoch": 1.7928077590208389, + "grad_norm": 9.217611312866211, + "learning_rate": 1.6196285607297013e-06, + "loss": 0.0775, + "num_input_tokens_seen": 49467104, + "step": 73385 + }, + { + "epoch": 1.792929909852686, + "grad_norm": 0.06661586463451385, + "learning_rate": 1.6195616244997698e-06, + "loss": 0.1124, + "num_input_tokens_seen": 49470432, + "step": 73390 + }, + { + "epoch": 1.7930520606845333, + "grad_norm": 0.15379488468170166, + "learning_rate": 1.6194946837642194e-06, + "loss": 0.0208, + "num_input_tokens_seen": 49473824, + "step": 73395 + }, + { + "epoch": 1.7931742115163805, + "grad_norm": 329.20697021484375, + "learning_rate": 1.6194277385235372e-06, + "loss": 0.0871, + "num_input_tokens_seen": 49477536, + "step": 73400 + }, + { + "epoch": 1.7932963623482276, + "grad_norm": 0.23400792479515076, + "learning_rate": 1.6193607887782098e-06, + "loss": 0.001, + "num_input_tokens_seen": 49481120, + "step": 73405 + }, + { + "epoch": 1.7934185131800748, + "grad_norm": 0.6813299059867859, + "learning_rate": 1.619293834528724e-06, + "loss": 0.0447, + "num_input_tokens_seen": 49484512, + "step": 73410 + }, + { + "epoch": 1.793540664011922, + "grad_norm": 22.358814239501953, + "learning_rate": 1.6192268757755674e-06, + "loss": 0.1016, + "num_input_tokens_seen": 49487648, + "step": 73415 + }, + { + "epoch": 1.7936628148437692, + "grad_norm": 0.401123970746994, + "learning_rate": 1.6191599125192256e-06, + "loss": 0.0026, + "num_input_tokens_seen": 49491104, + "step": 73420 + }, + { + "epoch": 1.7937849656756164, + "grad_norm": 0.22458702325820923, + "learning_rate": 1.6190929447601872e-06, + "loss": 0.094, + "num_input_tokens_seen": 49494496, + "step": 73425 + }, + { + "epoch": 1.7939071165074634, + "grad_norm": 12.835310935974121, + "learning_rate": 1.6190259724989378e-06, + "loss": 0.0112, + "num_input_tokens_seen": 49498016, + "step": 73430 + }, + { + "epoch": 1.7940292673393106, + "grad_norm": 24.49115562438965, + "learning_rate": 1.6189589957359652e-06, + "loss": 0.1724, + "num_input_tokens_seen": 49501280, + "step": 73435 + }, + { + "epoch": 1.7941514181711578, + "grad_norm": 19.634645462036133, + "learning_rate": 1.6188920144717564e-06, + "loss": 0.1396, + "num_input_tokens_seen": 49504224, + "step": 73440 + }, + { + "epoch": 1.794273569003005, + "grad_norm": 0.5965172052383423, + "learning_rate": 1.6188250287067984e-06, + "loss": 0.0372, + "num_input_tokens_seen": 49507808, + "step": 73445 + }, + { + "epoch": 1.794395719834852, + "grad_norm": 0.1391860544681549, + "learning_rate": 1.6187580384415785e-06, + "loss": 0.0516, + "num_input_tokens_seen": 49511264, + "step": 73450 + }, + { + "epoch": 1.794517870666699, + "grad_norm": 27.543590545654297, + "learning_rate": 1.6186910436765833e-06, + "loss": 0.0438, + "num_input_tokens_seen": 49514656, + "step": 73455 + }, + { + "epoch": 1.7946400214985463, + "grad_norm": 0.5109656453132629, + "learning_rate": 1.6186240444123005e-06, + "loss": 0.0227, + "num_input_tokens_seen": 49517728, + "step": 73460 + }, + { + "epoch": 1.7947621723303935, + "grad_norm": 0.40214264392852783, + "learning_rate": 1.6185570406492174e-06, + "loss": 0.1171, + "num_input_tokens_seen": 49520864, + "step": 73465 + }, + { + "epoch": 1.7948843231622407, + "grad_norm": 0.30075138807296753, + "learning_rate": 1.6184900323878211e-06, + "loss": 0.0544, + "num_input_tokens_seen": 49523872, + "step": 73470 + }, + { + "epoch": 1.7950064739940879, + "grad_norm": 1.6809254884719849, + "learning_rate": 1.618423019628599e-06, + "loss": 0.1353, + "num_input_tokens_seen": 49527584, + "step": 73475 + }, + { + "epoch": 1.795128624825935, + "grad_norm": 0.1620369851589203, + "learning_rate": 1.6183560023720384e-06, + "loss": 0.001, + "num_input_tokens_seen": 49530976, + "step": 73480 + }, + { + "epoch": 1.7952507756577822, + "grad_norm": 3.483069658279419, + "learning_rate": 1.6182889806186264e-06, + "loss": 0.0426, + "num_input_tokens_seen": 49533792, + "step": 73485 + }, + { + "epoch": 1.7953729264896294, + "grad_norm": 33.021671295166016, + "learning_rate": 1.6182219543688507e-06, + "loss": 0.1981, + "num_input_tokens_seen": 49537056, + "step": 73490 + }, + { + "epoch": 1.7954950773214766, + "grad_norm": 0.7459858655929565, + "learning_rate": 1.6181549236231989e-06, + "loss": 0.0319, + "num_input_tokens_seen": 49540000, + "step": 73495 + }, + { + "epoch": 1.7956172281533238, + "grad_norm": 20.98183822631836, + "learning_rate": 1.618087888382158e-06, + "loss": 0.042, + "num_input_tokens_seen": 49543904, + "step": 73500 + }, + { + "epoch": 1.795739378985171, + "grad_norm": 0.34212106466293335, + "learning_rate": 1.6180208486462159e-06, + "loss": 0.0453, + "num_input_tokens_seen": 49547360, + "step": 73505 + }, + { + "epoch": 1.7958615298170182, + "grad_norm": 0.07674697786569595, + "learning_rate": 1.61795380441586e-06, + "loss": 0.116, + "num_input_tokens_seen": 49550624, + "step": 73510 + }, + { + "epoch": 1.7959836806488654, + "grad_norm": 0.45899268984794617, + "learning_rate": 1.6178867556915775e-06, + "loss": 0.0884, + "num_input_tokens_seen": 49553696, + "step": 73515 + }, + { + "epoch": 1.7961058314807123, + "grad_norm": 1.0446715354919434, + "learning_rate": 1.6178197024738566e-06, + "loss": 0.0022, + "num_input_tokens_seen": 49557152, + "step": 73520 + }, + { + "epoch": 1.7962279823125595, + "grad_norm": 0.03729706257581711, + "learning_rate": 1.6177526447631845e-06, + "loss": 0.0508, + "num_input_tokens_seen": 49560224, + "step": 73525 + }, + { + "epoch": 1.7963501331444067, + "grad_norm": 0.20750153064727783, + "learning_rate": 1.617685582560049e-06, + "loss": 0.07, + "num_input_tokens_seen": 49563552, + "step": 73530 + }, + { + "epoch": 1.796472283976254, + "grad_norm": 16.814863204956055, + "learning_rate": 1.617618515864938e-06, + "loss": 0.0935, + "num_input_tokens_seen": 49567200, + "step": 73535 + }, + { + "epoch": 1.7965944348081009, + "grad_norm": 0.13789507746696472, + "learning_rate": 1.617551444678339e-06, + "loss": 0.0344, + "num_input_tokens_seen": 49570720, + "step": 73540 + }, + { + "epoch": 1.796716585639948, + "grad_norm": 0.23944073915481567, + "learning_rate": 1.6174843690007396e-06, + "loss": 0.001, + "num_input_tokens_seen": 49573728, + "step": 73545 + }, + { + "epoch": 1.7968387364717953, + "grad_norm": 0.19656451046466827, + "learning_rate": 1.6174172888326279e-06, + "loss": 0.0986, + "num_input_tokens_seen": 49577120, + "step": 73550 + }, + { + "epoch": 1.7969608873036425, + "grad_norm": 22.739328384399414, + "learning_rate": 1.6173502041744915e-06, + "loss": 0.0424, + "num_input_tokens_seen": 49580448, + "step": 73555 + }, + { + "epoch": 1.7970830381354896, + "grad_norm": 0.4079742431640625, + "learning_rate": 1.6172831150268188e-06, + "loss": 0.1051, + "num_input_tokens_seen": 49584544, + "step": 73560 + }, + { + "epoch": 1.7972051889673368, + "grad_norm": 0.04261103272438049, + "learning_rate": 1.6172160213900967e-06, + "loss": 0.0408, + "num_input_tokens_seen": 49587616, + "step": 73565 + }, + { + "epoch": 1.797327339799184, + "grad_norm": 0.020871929824352264, + "learning_rate": 1.617148923264814e-06, + "loss": 0.11, + "num_input_tokens_seen": 49591072, + "step": 73570 + }, + { + "epoch": 1.7974494906310312, + "grad_norm": 9.203412055969238, + "learning_rate": 1.617081820651458e-06, + "loss": 0.184, + "num_input_tokens_seen": 49594336, + "step": 73575 + }, + { + "epoch": 1.7975716414628784, + "grad_norm": 0.29414746165275574, + "learning_rate": 1.6170147135505175e-06, + "loss": 0.0354, + "num_input_tokens_seen": 49598176, + "step": 73580 + }, + { + "epoch": 1.7976937922947256, + "grad_norm": 342.13507080078125, + "learning_rate": 1.6169476019624796e-06, + "loss": 0.0947, + "num_input_tokens_seen": 49601120, + "step": 73585 + }, + { + "epoch": 1.7978159431265728, + "grad_norm": 13.11091423034668, + "learning_rate": 1.616880485887833e-06, + "loss": 0.1298, + "num_input_tokens_seen": 49604704, + "step": 73590 + }, + { + "epoch": 1.79793809395842, + "grad_norm": 25.429052352905273, + "learning_rate": 1.6168133653270657e-06, + "loss": 0.0295, + "num_input_tokens_seen": 49607968, + "step": 73595 + }, + { + "epoch": 1.7980602447902672, + "grad_norm": 0.16069746017456055, + "learning_rate": 1.6167462402806658e-06, + "loss": 0.1576, + "num_input_tokens_seen": 49611360, + "step": 73600 + }, + { + "epoch": 1.7981823956221143, + "grad_norm": 0.32684841752052307, + "learning_rate": 1.6166791107491212e-06, + "loss": 0.1108, + "num_input_tokens_seen": 49614944, + "step": 73605 + }, + { + "epoch": 1.7983045464539613, + "grad_norm": 48.709228515625, + "learning_rate": 1.61661197673292e-06, + "loss": 0.1571, + "num_input_tokens_seen": 49618528, + "step": 73610 + }, + { + "epoch": 1.7984266972858085, + "grad_norm": 0.20360717177391052, + "learning_rate": 1.616544838232551e-06, + "loss": 0.0018, + "num_input_tokens_seen": 49622304, + "step": 73615 + }, + { + "epoch": 1.7985488481176557, + "grad_norm": 0.22650174796581268, + "learning_rate": 1.6164776952485017e-06, + "loss": 0.0008, + "num_input_tokens_seen": 49625376, + "step": 73620 + }, + { + "epoch": 1.7986709989495029, + "grad_norm": 0.02403288520872593, + "learning_rate": 1.6164105477812612e-06, + "loss": 0.0985, + "num_input_tokens_seen": 49628576, + "step": 73625 + }, + { + "epoch": 1.7987931497813499, + "grad_norm": 8.939688682556152, + "learning_rate": 1.6163433958313174e-06, + "loss": 0.0531, + "num_input_tokens_seen": 49632352, + "step": 73630 + }, + { + "epoch": 1.798915300613197, + "grad_norm": 0.2442442625761032, + "learning_rate": 1.6162762393991585e-06, + "loss": 0.1582, + "num_input_tokens_seen": 49636000, + "step": 73635 + }, + { + "epoch": 1.7990374514450442, + "grad_norm": 0.18538600206375122, + "learning_rate": 1.6162090784852728e-06, + "loss": 0.1563, + "num_input_tokens_seen": 49639136, + "step": 73640 + }, + { + "epoch": 1.7991596022768914, + "grad_norm": 0.099936343729496, + "learning_rate": 1.616141913090149e-06, + "loss": 0.0032, + "num_input_tokens_seen": 49642848, + "step": 73645 + }, + { + "epoch": 1.7992817531087386, + "grad_norm": 0.20571479201316833, + "learning_rate": 1.616074743214276e-06, + "loss": 0.1117, + "num_input_tokens_seen": 49645856, + "step": 73650 + }, + { + "epoch": 1.7994039039405858, + "grad_norm": 0.04828282445669174, + "learning_rate": 1.6160075688581414e-06, + "loss": 0.0952, + "num_input_tokens_seen": 49649120, + "step": 73655 + }, + { + "epoch": 1.799526054772433, + "grad_norm": 0.1694461703300476, + "learning_rate": 1.6159403900222342e-06, + "loss": 0.0019, + "num_input_tokens_seen": 49653280, + "step": 73660 + }, + { + "epoch": 1.7996482056042802, + "grad_norm": 8.066333770751953, + "learning_rate": 1.6158732067070426e-06, + "loss": 0.0895, + "num_input_tokens_seen": 49656480, + "step": 73665 + }, + { + "epoch": 1.7997703564361274, + "grad_norm": 2.2907662391662598, + "learning_rate": 1.6158060189130556e-06, + "loss": 0.0036, + "num_input_tokens_seen": 49659744, + "step": 73670 + }, + { + "epoch": 1.7998925072679746, + "grad_norm": 0.21309258043766022, + "learning_rate": 1.6157388266407614e-06, + "loss": 0.1183, + "num_input_tokens_seen": 49663136, + "step": 73675 + }, + { + "epoch": 1.8000146580998218, + "grad_norm": 0.2838139235973358, + "learning_rate": 1.6156716298906487e-06, + "loss": 0.1008, + "num_input_tokens_seen": 49666336, + "step": 73680 + }, + { + "epoch": 1.800136808931669, + "grad_norm": 13.1321439743042, + "learning_rate": 1.615604428663207e-06, + "loss": 0.0426, + "num_input_tokens_seen": 49669408, + "step": 73685 + }, + { + "epoch": 1.8002589597635161, + "grad_norm": 0.43830278515815735, + "learning_rate": 1.6155372229589234e-06, + "loss": 0.0701, + "num_input_tokens_seen": 49672800, + "step": 73690 + }, + { + "epoch": 1.8003811105953633, + "grad_norm": 12.03991413116455, + "learning_rate": 1.6154700127782883e-06, + "loss": 0.0654, + "num_input_tokens_seen": 49676000, + "step": 73695 + }, + { + "epoch": 1.8005032614272103, + "grad_norm": 0.11567030102014542, + "learning_rate": 1.6154027981217894e-06, + "loss": 0.1242, + "num_input_tokens_seen": 49678752, + "step": 73700 + }, + { + "epoch": 1.8006254122590575, + "grad_norm": 17.359439849853516, + "learning_rate": 1.6153355789899159e-06, + "loss": 0.1069, + "num_input_tokens_seen": 49681952, + "step": 73705 + }, + { + "epoch": 1.8007475630909047, + "grad_norm": 0.09139929711818695, + "learning_rate": 1.6152683553831565e-06, + "loss": 0.0015, + "num_input_tokens_seen": 49685536, + "step": 73710 + }, + { + "epoch": 1.8008697139227519, + "grad_norm": 15.379899024963379, + "learning_rate": 1.6152011273020002e-06, + "loss": 0.1863, + "num_input_tokens_seen": 49689056, + "step": 73715 + }, + { + "epoch": 1.8009918647545988, + "grad_norm": 20.884328842163086, + "learning_rate": 1.6151338947469358e-06, + "loss": 0.0266, + "num_input_tokens_seen": 49692128, + "step": 73720 + }, + { + "epoch": 1.801114015586446, + "grad_norm": 0.5158682465553284, + "learning_rate": 1.6150666577184521e-06, + "loss": 0.0509, + "num_input_tokens_seen": 49695072, + "step": 73725 + }, + { + "epoch": 1.8012361664182932, + "grad_norm": 0.5207684636116028, + "learning_rate": 1.6149994162170386e-06, + "loss": 0.0198, + "num_input_tokens_seen": 49698720, + "step": 73730 + }, + { + "epoch": 1.8013583172501404, + "grad_norm": 137.9433135986328, + "learning_rate": 1.6149321702431836e-06, + "loss": 0.1982, + "num_input_tokens_seen": 49702176, + "step": 73735 + }, + { + "epoch": 1.8014804680819876, + "grad_norm": 0.596860408782959, + "learning_rate": 1.6148649197973768e-06, + "loss": 0.0983, + "num_input_tokens_seen": 49705824, + "step": 73740 + }, + { + "epoch": 1.8016026189138348, + "grad_norm": 26.139259338378906, + "learning_rate": 1.6147976648801068e-06, + "loss": 0.1295, + "num_input_tokens_seen": 49708960, + "step": 73745 + }, + { + "epoch": 1.801724769745682, + "grad_norm": 0.34380877017974854, + "learning_rate": 1.6147304054918626e-06, + "loss": 0.0654, + "num_input_tokens_seen": 49712544, + "step": 73750 + }, + { + "epoch": 1.8018469205775292, + "grad_norm": 0.10611290484666824, + "learning_rate": 1.6146631416331338e-06, + "loss": 0.0359, + "num_input_tokens_seen": 49716128, + "step": 73755 + }, + { + "epoch": 1.8019690714093763, + "grad_norm": 0.09845145046710968, + "learning_rate": 1.6145958733044092e-06, + "loss": 0.1364, + "num_input_tokens_seen": 49719456, + "step": 73760 + }, + { + "epoch": 1.8020912222412235, + "grad_norm": 17.626033782958984, + "learning_rate": 1.614528600506178e-06, + "loss": 0.1124, + "num_input_tokens_seen": 49722400, + "step": 73765 + }, + { + "epoch": 1.8022133730730707, + "grad_norm": 0.046197183430194855, + "learning_rate": 1.6144613232389295e-06, + "loss": 0.0819, + "num_input_tokens_seen": 49726112, + "step": 73770 + }, + { + "epoch": 1.802335523904918, + "grad_norm": 0.08827891200780869, + "learning_rate": 1.614394041503153e-06, + "loss": 0.075, + "num_input_tokens_seen": 49729248, + "step": 73775 + }, + { + "epoch": 1.802457674736765, + "grad_norm": 8.221805572509766, + "learning_rate": 1.6143267552993382e-06, + "loss": 0.182, + "num_input_tokens_seen": 49732640, + "step": 73780 + }, + { + "epoch": 1.802579825568612, + "grad_norm": 0.11934688687324524, + "learning_rate": 1.6142594646279738e-06, + "loss": 0.165, + "num_input_tokens_seen": 49736480, + "step": 73785 + }, + { + "epoch": 1.8027019764004593, + "grad_norm": 0.350424200296402, + "learning_rate": 1.614192169489549e-06, + "loss": 0.1129, + "num_input_tokens_seen": 49740064, + "step": 73790 + }, + { + "epoch": 1.8028241272323065, + "grad_norm": 0.7355371117591858, + "learning_rate": 1.6141248698845538e-06, + "loss": 0.0624, + "num_input_tokens_seen": 49743264, + "step": 73795 + }, + { + "epoch": 1.8029462780641536, + "grad_norm": 7.172179222106934, + "learning_rate": 1.6140575658134772e-06, + "loss": 0.101, + "num_input_tokens_seen": 49747488, + "step": 73800 + }, + { + "epoch": 1.8030684288960008, + "grad_norm": 0.3119280934333801, + "learning_rate": 1.6139902572768094e-06, + "loss": 0.1085, + "num_input_tokens_seen": 49750752, + "step": 73805 + }, + { + "epoch": 1.8031905797278478, + "grad_norm": 0.2874053120613098, + "learning_rate": 1.6139229442750385e-06, + "loss": 0.0637, + "num_input_tokens_seen": 49753824, + "step": 73810 + }, + { + "epoch": 1.803312730559695, + "grad_norm": 0.2166033685207367, + "learning_rate": 1.6138556268086557e-06, + "loss": 0.0009, + "num_input_tokens_seen": 49757216, + "step": 73815 + }, + { + "epoch": 1.8034348813915422, + "grad_norm": 0.30320924520492554, + "learning_rate": 1.613788304878149e-06, + "loss": 0.0381, + "num_input_tokens_seen": 49760736, + "step": 73820 + }, + { + "epoch": 1.8035570322233894, + "grad_norm": 0.18933552503585815, + "learning_rate": 1.6137209784840086e-06, + "loss": 0.0885, + "num_input_tokens_seen": 49764576, + "step": 73825 + }, + { + "epoch": 1.8036791830552366, + "grad_norm": 121.48233795166016, + "learning_rate": 1.6136536476267243e-06, + "loss": 0.0437, + "num_input_tokens_seen": 49767712, + "step": 73830 + }, + { + "epoch": 1.8038013338870837, + "grad_norm": 0.2928767204284668, + "learning_rate": 1.6135863123067858e-06, + "loss": 0.0381, + "num_input_tokens_seen": 49771296, + "step": 73835 + }, + { + "epoch": 1.803923484718931, + "grad_norm": 109.94903564453125, + "learning_rate": 1.6135189725246828e-06, + "loss": 0.0836, + "num_input_tokens_seen": 49774624, + "step": 73840 + }, + { + "epoch": 1.8040456355507781, + "grad_norm": 12.922600746154785, + "learning_rate": 1.6134516282809045e-06, + "loss": 0.1184, + "num_input_tokens_seen": 49778784, + "step": 73845 + }, + { + "epoch": 1.8041677863826253, + "grad_norm": 14.157644271850586, + "learning_rate": 1.6133842795759408e-06, + "loss": 0.082, + "num_input_tokens_seen": 49782240, + "step": 73850 + }, + { + "epoch": 1.8042899372144725, + "grad_norm": 0.6490665078163147, + "learning_rate": 1.613316926410282e-06, + "loss": 0.0021, + "num_input_tokens_seen": 49785184, + "step": 73855 + }, + { + "epoch": 1.8044120880463197, + "grad_norm": 0.5274012684822083, + "learning_rate": 1.6132495687844174e-06, + "loss": 0.0884, + "num_input_tokens_seen": 49788384, + "step": 73860 + }, + { + "epoch": 1.8045342388781669, + "grad_norm": 30.976333618164062, + "learning_rate": 1.6131822066988372e-06, + "loss": 0.1359, + "num_input_tokens_seen": 49791776, + "step": 73865 + }, + { + "epoch": 1.804656389710014, + "grad_norm": 51.13031768798828, + "learning_rate": 1.6131148401540307e-06, + "loss": 0.1146, + "num_input_tokens_seen": 49795680, + "step": 73870 + }, + { + "epoch": 1.804778540541861, + "grad_norm": 0.002726664301007986, + "learning_rate": 1.6130474691504885e-06, + "loss": 0.0008, + "num_input_tokens_seen": 49799264, + "step": 73875 + }, + { + "epoch": 1.8049006913737082, + "grad_norm": 16.781251907348633, + "learning_rate": 1.6129800936887002e-06, + "loss": 0.0959, + "num_input_tokens_seen": 49802720, + "step": 73880 + }, + { + "epoch": 1.8050228422055554, + "grad_norm": 0.9871700406074524, + "learning_rate": 1.6129127137691554e-06, + "loss": 0.0705, + "num_input_tokens_seen": 49806560, + "step": 73885 + }, + { + "epoch": 1.8051449930374026, + "grad_norm": 31.406795501708984, + "learning_rate": 1.6128453293923446e-06, + "loss": 0.1185, + "num_input_tokens_seen": 49809760, + "step": 73890 + }, + { + "epoch": 1.8052671438692498, + "grad_norm": 0.21563898026943207, + "learning_rate": 1.6127779405587578e-06, + "loss": 0.038, + "num_input_tokens_seen": 49813344, + "step": 73895 + }, + { + "epoch": 1.8053892947010968, + "grad_norm": 0.48208603262901306, + "learning_rate": 1.6127105472688852e-06, + "loss": 0.0756, + "num_input_tokens_seen": 49816928, + "step": 73900 + }, + { + "epoch": 1.805511445532944, + "grad_norm": 1.9552193880081177, + "learning_rate": 1.6126431495232167e-06, + "loss": 0.0234, + "num_input_tokens_seen": 49820320, + "step": 73905 + }, + { + "epoch": 1.8056335963647911, + "grad_norm": 137.20310974121094, + "learning_rate": 1.6125757473222423e-06, + "loss": 0.1326, + "num_input_tokens_seen": 49823712, + "step": 73910 + }, + { + "epoch": 1.8057557471966383, + "grad_norm": 0.2588064670562744, + "learning_rate": 1.6125083406664523e-06, + "loss": 0.1187, + "num_input_tokens_seen": 49826848, + "step": 73915 + }, + { + "epoch": 1.8058778980284855, + "grad_norm": 0.5283862352371216, + "learning_rate": 1.6124409295563369e-06, + "loss": 0.1636, + "num_input_tokens_seen": 49829856, + "step": 73920 + }, + { + "epoch": 1.8060000488603327, + "grad_norm": 1.8500924110412598, + "learning_rate": 1.612373513992386e-06, + "loss": 0.0391, + "num_input_tokens_seen": 49833056, + "step": 73925 + }, + { + "epoch": 1.80612219969218, + "grad_norm": 0.8077684044837952, + "learning_rate": 1.6123060939750908e-06, + "loss": 0.0296, + "num_input_tokens_seen": 49836832, + "step": 73930 + }, + { + "epoch": 1.806244350524027, + "grad_norm": 0.27247530221939087, + "learning_rate": 1.6122386695049409e-06, + "loss": 0.0689, + "num_input_tokens_seen": 49840160, + "step": 73935 + }, + { + "epoch": 1.8063665013558743, + "grad_norm": 0.2970154285430908, + "learning_rate": 1.6121712405824263e-06, + "loss": 0.0879, + "num_input_tokens_seen": 49843808, + "step": 73940 + }, + { + "epoch": 1.8064886521877215, + "grad_norm": 0.5835685133934021, + "learning_rate": 1.6121038072080382e-06, + "loss": 0.0351, + "num_input_tokens_seen": 49847008, + "step": 73945 + }, + { + "epoch": 1.8066108030195687, + "grad_norm": 31.69612693786621, + "learning_rate": 1.6120363693822663e-06, + "loss": 0.1674, + "num_input_tokens_seen": 49850528, + "step": 73950 + }, + { + "epoch": 1.8067329538514159, + "grad_norm": 11.472599029541016, + "learning_rate": 1.6119689271056013e-06, + "loss": 0.1243, + "num_input_tokens_seen": 49853792, + "step": 73955 + }, + { + "epoch": 1.806855104683263, + "grad_norm": 2.412147283554077, + "learning_rate": 1.6119014803785338e-06, + "loss": 0.0015, + "num_input_tokens_seen": 49856928, + "step": 73960 + }, + { + "epoch": 1.80697725551511, + "grad_norm": 0.26464715600013733, + "learning_rate": 1.6118340292015545e-06, + "loss": 0.0041, + "num_input_tokens_seen": 49860512, + "step": 73965 + }, + { + "epoch": 1.8070994063469572, + "grad_norm": 0.1113317459821701, + "learning_rate": 1.6117665735751529e-06, + "loss": 0.0398, + "num_input_tokens_seen": 49864416, + "step": 73970 + }, + { + "epoch": 1.8072215571788044, + "grad_norm": 1.2402263879776, + "learning_rate": 1.6116991134998208e-06, + "loss": 0.0755, + "num_input_tokens_seen": 49867552, + "step": 73975 + }, + { + "epoch": 1.8073437080106516, + "grad_norm": 0.6913524866104126, + "learning_rate": 1.6116316489760477e-06, + "loss": 0.1772, + "num_input_tokens_seen": 49870752, + "step": 73980 + }, + { + "epoch": 1.8074658588424986, + "grad_norm": 0.17152690887451172, + "learning_rate": 1.6115641800043252e-06, + "loss": 0.1199, + "num_input_tokens_seen": 49873952, + "step": 73985 + }, + { + "epoch": 1.8075880096743457, + "grad_norm": 0.07484672963619232, + "learning_rate": 1.6114967065851431e-06, + "loss": 0.0553, + "num_input_tokens_seen": 49877152, + "step": 73990 + }, + { + "epoch": 1.807710160506193, + "grad_norm": 0.34017398953437805, + "learning_rate": 1.6114292287189928e-06, + "loss": 0.0014, + "num_input_tokens_seen": 49880672, + "step": 73995 + }, + { + "epoch": 1.8078323113380401, + "grad_norm": 0.23076596856117249, + "learning_rate": 1.6113617464063646e-06, + "loss": 0.1862, + "num_input_tokens_seen": 49884128, + "step": 74000 + }, + { + "epoch": 1.8079544621698873, + "grad_norm": 19.423137664794922, + "learning_rate": 1.6112942596477491e-06, + "loss": 0.1815, + "num_input_tokens_seen": 49887264, + "step": 74005 + }, + { + "epoch": 1.8080766130017345, + "grad_norm": 0.2476540058851242, + "learning_rate": 1.6112267684436378e-06, + "loss": 0.0047, + "num_input_tokens_seen": 49890400, + "step": 74010 + }, + { + "epoch": 1.8081987638335817, + "grad_norm": 0.1816900074481964, + "learning_rate": 1.6111592727945205e-06, + "loss": 0.0378, + "num_input_tokens_seen": 49893792, + "step": 74015 + }, + { + "epoch": 1.8083209146654289, + "grad_norm": 0.04045751318335533, + "learning_rate": 1.611091772700889e-06, + "loss": 0.0564, + "num_input_tokens_seen": 49897056, + "step": 74020 + }, + { + "epoch": 1.808443065497276, + "grad_norm": 18.38286781311035, + "learning_rate": 1.6110242681632335e-06, + "loss": 0.1873, + "num_input_tokens_seen": 49900128, + "step": 74025 + }, + { + "epoch": 1.8085652163291233, + "grad_norm": 13.320504188537598, + "learning_rate": 1.6109567591820454e-06, + "loss": 0.1264, + "num_input_tokens_seen": 49903584, + "step": 74030 + }, + { + "epoch": 1.8086873671609704, + "grad_norm": 0.5575301647186279, + "learning_rate": 1.6108892457578151e-06, + "loss": 0.0015, + "num_input_tokens_seen": 49906848, + "step": 74035 + }, + { + "epoch": 1.8088095179928176, + "grad_norm": 1.4660645723342896, + "learning_rate": 1.6108217278910342e-06, + "loss": 0.0855, + "num_input_tokens_seen": 49910688, + "step": 74040 + }, + { + "epoch": 1.8089316688246648, + "grad_norm": 13.347358703613281, + "learning_rate": 1.6107542055821934e-06, + "loss": 0.0939, + "num_input_tokens_seen": 49913824, + "step": 74045 + }, + { + "epoch": 1.809053819656512, + "grad_norm": 0.15625794231891632, + "learning_rate": 1.6106866788317837e-06, + "loss": 0.0009, + "num_input_tokens_seen": 49917216, + "step": 74050 + }, + { + "epoch": 1.809175970488359, + "grad_norm": 0.2677502930164337, + "learning_rate": 1.6106191476402961e-06, + "loss": 0.0365, + "num_input_tokens_seen": 49920800, + "step": 74055 + }, + { + "epoch": 1.8092981213202062, + "grad_norm": 34.20901107788086, + "learning_rate": 1.6105516120082218e-06, + "loss": 0.2032, + "num_input_tokens_seen": 49924192, + "step": 74060 + }, + { + "epoch": 1.8094202721520534, + "grad_norm": 30.46437644958496, + "learning_rate": 1.610484071936052e-06, + "loss": 0.0806, + "num_input_tokens_seen": 49927456, + "step": 74065 + }, + { + "epoch": 1.8095424229839006, + "grad_norm": 0.4633569121360779, + "learning_rate": 1.6104165274242782e-06, + "loss": 0.0486, + "num_input_tokens_seen": 49930528, + "step": 74070 + }, + { + "epoch": 1.8096645738157475, + "grad_norm": 0.09732931852340698, + "learning_rate": 1.610348978473391e-06, + "loss": 0.0669, + "num_input_tokens_seen": 49933920, + "step": 74075 + }, + { + "epoch": 1.8097867246475947, + "grad_norm": 0.17056681215763092, + "learning_rate": 1.6102814250838814e-06, + "loss": 0.0025, + "num_input_tokens_seen": 49937120, + "step": 74080 + }, + { + "epoch": 1.809908875479442, + "grad_norm": 268.8205261230469, + "learning_rate": 1.6102138672562417e-06, + "loss": 0.1609, + "num_input_tokens_seen": 49940448, + "step": 74085 + }, + { + "epoch": 1.810031026311289, + "grad_norm": 0.40876808762550354, + "learning_rate": 1.6101463049909626e-06, + "loss": 0.0982, + "num_input_tokens_seen": 49943712, + "step": 74090 + }, + { + "epoch": 1.8101531771431363, + "grad_norm": 0.37469613552093506, + "learning_rate": 1.6100787382885352e-06, + "loss": 0.1006, + "num_input_tokens_seen": 49946784, + "step": 74095 + }, + { + "epoch": 1.8102753279749835, + "grad_norm": 2.2056779861450195, + "learning_rate": 1.6100111671494511e-06, + "loss": 0.1143, + "num_input_tokens_seen": 49949920, + "step": 74100 + }, + { + "epoch": 1.8103974788068307, + "grad_norm": 0.019310932606458664, + "learning_rate": 1.6099435915742018e-06, + "loss": 0.0009, + "num_input_tokens_seen": 49953568, + "step": 74105 + }, + { + "epoch": 1.8105196296386779, + "grad_norm": 0.16825686395168304, + "learning_rate": 1.6098760115632785e-06, + "loss": 0.0568, + "num_input_tokens_seen": 49957152, + "step": 74110 + }, + { + "epoch": 1.810641780470525, + "grad_norm": 15.703826904296875, + "learning_rate": 1.6098084271171732e-06, + "loss": 0.0408, + "num_input_tokens_seen": 49960736, + "step": 74115 + }, + { + "epoch": 1.8107639313023722, + "grad_norm": 0.25060200691223145, + "learning_rate": 1.6097408382363768e-06, + "loss": 0.0006, + "num_input_tokens_seen": 49964256, + "step": 74120 + }, + { + "epoch": 1.8108860821342194, + "grad_norm": 0.20446979999542236, + "learning_rate": 1.6096732449213812e-06, + "loss": 0.166, + "num_input_tokens_seen": 49967456, + "step": 74125 + }, + { + "epoch": 1.8110082329660666, + "grad_norm": 33.712005615234375, + "learning_rate": 1.6096056471726775e-06, + "loss": 0.0413, + "num_input_tokens_seen": 49970912, + "step": 74130 + }, + { + "epoch": 1.8111303837979138, + "grad_norm": 0.1862323135137558, + "learning_rate": 1.6095380449907577e-06, + "loss": 0.0854, + "num_input_tokens_seen": 49973984, + "step": 74135 + }, + { + "epoch": 1.811252534629761, + "grad_norm": 0.0933285802602768, + "learning_rate": 1.609470438376113e-06, + "loss": 0.0806, + "num_input_tokens_seen": 49977248, + "step": 74140 + }, + { + "epoch": 1.811374685461608, + "grad_norm": 13.727092742919922, + "learning_rate": 1.609402827329236e-06, + "loss": 0.1203, + "num_input_tokens_seen": 49980384, + "step": 74145 + }, + { + "epoch": 1.8114968362934551, + "grad_norm": 0.2599273920059204, + "learning_rate": 1.609335211850617e-06, + "loss": 0.0476, + "num_input_tokens_seen": 49983584, + "step": 74150 + }, + { + "epoch": 1.8116189871253023, + "grad_norm": 0.024421192705631256, + "learning_rate": 1.6092675919407487e-06, + "loss": 0.1712, + "num_input_tokens_seen": 49987104, + "step": 74155 + }, + { + "epoch": 1.8117411379571495, + "grad_norm": 0.3774043023586273, + "learning_rate": 1.6091999676001228e-06, + "loss": 0.0908, + "num_input_tokens_seen": 49990752, + "step": 74160 + }, + { + "epoch": 1.8118632887889965, + "grad_norm": 0.11246316134929657, + "learning_rate": 1.609132338829231e-06, + "loss": 0.0425, + "num_input_tokens_seen": 49994208, + "step": 74165 + }, + { + "epoch": 1.8119854396208437, + "grad_norm": 12.335349082946777, + "learning_rate": 1.6090647056285645e-06, + "loss": 0.0904, + "num_input_tokens_seen": 49997536, + "step": 74170 + }, + { + "epoch": 1.8121075904526909, + "grad_norm": 0.5349389910697937, + "learning_rate": 1.608997067998616e-06, + "loss": 0.06, + "num_input_tokens_seen": 50000544, + "step": 74175 + }, + { + "epoch": 1.812229741284538, + "grad_norm": 0.22816990315914154, + "learning_rate": 1.608929425939877e-06, + "loss": 0.096, + "num_input_tokens_seen": 50003680, + "step": 74180 + }, + { + "epoch": 1.8123518921163853, + "grad_norm": 0.25701552629470825, + "learning_rate": 1.6088617794528392e-06, + "loss": 0.1466, + "num_input_tokens_seen": 50006880, + "step": 74185 + }, + { + "epoch": 1.8124740429482324, + "grad_norm": 21.37340545654297, + "learning_rate": 1.608794128537995e-06, + "loss": 0.1236, + "num_input_tokens_seen": 50010528, + "step": 74190 + }, + { + "epoch": 1.8125961937800796, + "grad_norm": 0.05798621103167534, + "learning_rate": 1.608726473195836e-06, + "loss": 0.0713, + "num_input_tokens_seen": 50014176, + "step": 74195 + }, + { + "epoch": 1.8127183446119268, + "grad_norm": 0.3602031469345093, + "learning_rate": 1.6086588134268544e-06, + "loss": 0.1177, + "num_input_tokens_seen": 50017312, + "step": 74200 + }, + { + "epoch": 1.812840495443774, + "grad_norm": 0.13574683666229248, + "learning_rate": 1.6085911492315423e-06, + "loss": 0.0225, + "num_input_tokens_seen": 50021024, + "step": 74205 + }, + { + "epoch": 1.8129626462756212, + "grad_norm": 38.42686462402344, + "learning_rate": 1.6085234806103918e-06, + "loss": 0.1781, + "num_input_tokens_seen": 50024416, + "step": 74210 + }, + { + "epoch": 1.8130847971074684, + "grad_norm": 0.257497102022171, + "learning_rate": 1.6084558075638946e-06, + "loss": 0.0969, + "num_input_tokens_seen": 50027680, + "step": 74215 + }, + { + "epoch": 1.8132069479393156, + "grad_norm": 23.47085189819336, + "learning_rate": 1.608388130092543e-06, + "loss": 0.2283, + "num_input_tokens_seen": 50030752, + "step": 74220 + }, + { + "epoch": 1.8133290987711628, + "grad_norm": 1.341367483139038, + "learning_rate": 1.6083204481968297e-06, + "loss": 0.004, + "num_input_tokens_seen": 50033824, + "step": 74225 + }, + { + "epoch": 1.81345124960301, + "grad_norm": 0.4435931444168091, + "learning_rate": 1.6082527618772462e-06, + "loss": 0.0022, + "num_input_tokens_seen": 50036896, + "step": 74230 + }, + { + "epoch": 1.813573400434857, + "grad_norm": 0.06710055470466614, + "learning_rate": 1.608185071134285e-06, + "loss": 0.1087, + "num_input_tokens_seen": 50040544, + "step": 74235 + }, + { + "epoch": 1.8136955512667041, + "grad_norm": 0.07674826681613922, + "learning_rate": 1.6081173759684385e-06, + "loss": 0.0011, + "num_input_tokens_seen": 50044128, + "step": 74240 + }, + { + "epoch": 1.8138177020985513, + "grad_norm": 0.14639919996261597, + "learning_rate": 1.6080496763801989e-06, + "loss": 0.0458, + "num_input_tokens_seen": 50047520, + "step": 74245 + }, + { + "epoch": 1.8139398529303985, + "grad_norm": 0.4655853807926178, + "learning_rate": 1.6079819723700585e-06, + "loss": 0.1124, + "num_input_tokens_seen": 50050528, + "step": 74250 + }, + { + "epoch": 1.8140620037622455, + "grad_norm": 0.3685029447078705, + "learning_rate": 1.6079142639385096e-06, + "loss": 0.0276, + "num_input_tokens_seen": 50054176, + "step": 74255 + }, + { + "epoch": 1.8141841545940927, + "grad_norm": 93.46351623535156, + "learning_rate": 1.6078465510860446e-06, + "loss": 0.1728, + "num_input_tokens_seen": 50057440, + "step": 74260 + }, + { + "epoch": 1.8143063054259398, + "grad_norm": 21.421630859375, + "learning_rate": 1.607778833813156e-06, + "loss": 0.0582, + "num_input_tokens_seen": 50060832, + "step": 74265 + }, + { + "epoch": 1.814428456257787, + "grad_norm": 37.1475830078125, + "learning_rate": 1.6077111121203364e-06, + "loss": 0.1701, + "num_input_tokens_seen": 50064480, + "step": 74270 + }, + { + "epoch": 1.8145506070896342, + "grad_norm": 17.186304092407227, + "learning_rate": 1.607643386008078e-06, + "loss": 0.0996, + "num_input_tokens_seen": 50067936, + "step": 74275 + }, + { + "epoch": 1.8146727579214814, + "grad_norm": 0.339376300573349, + "learning_rate": 1.6075756554768736e-06, + "loss": 0.0023, + "num_input_tokens_seen": 50071328, + "step": 74280 + }, + { + "epoch": 1.8147949087533286, + "grad_norm": 113.74297332763672, + "learning_rate": 1.6075079205272155e-06, + "loss": 0.0529, + "num_input_tokens_seen": 50074592, + "step": 74285 + }, + { + "epoch": 1.8149170595851758, + "grad_norm": 125.77494812011719, + "learning_rate": 1.6074401811595965e-06, + "loss": 0.2788, + "num_input_tokens_seen": 50078304, + "step": 74290 + }, + { + "epoch": 1.815039210417023, + "grad_norm": 0.16352109611034393, + "learning_rate": 1.6073724373745088e-06, + "loss": 0.038, + "num_input_tokens_seen": 50081952, + "step": 74295 + }, + { + "epoch": 1.8151613612488702, + "grad_norm": 9.420287132263184, + "learning_rate": 1.6073046891724458e-06, + "loss": 0.045, + "num_input_tokens_seen": 50085472, + "step": 74300 + }, + { + "epoch": 1.8152835120807174, + "grad_norm": 110.37330627441406, + "learning_rate": 1.6072369365538996e-06, + "loss": 0.1823, + "num_input_tokens_seen": 50088416, + "step": 74305 + }, + { + "epoch": 1.8154056629125646, + "grad_norm": 12.090911865234375, + "learning_rate": 1.607169179519363e-06, + "loss": 0.0917, + "num_input_tokens_seen": 50092128, + "step": 74310 + }, + { + "epoch": 1.8155278137444117, + "grad_norm": 0.09399466961622238, + "learning_rate": 1.607101418069329e-06, + "loss": 0.1018, + "num_input_tokens_seen": 50094880, + "step": 74315 + }, + { + "epoch": 1.8156499645762587, + "grad_norm": 8.643115043640137, + "learning_rate": 1.60703365220429e-06, + "loss": 0.1279, + "num_input_tokens_seen": 50098592, + "step": 74320 + }, + { + "epoch": 1.815772115408106, + "grad_norm": 8.991259574890137, + "learning_rate": 1.606965881924739e-06, + "loss": 0.0388, + "num_input_tokens_seen": 50102112, + "step": 74325 + }, + { + "epoch": 1.815894266239953, + "grad_norm": 23.121740341186523, + "learning_rate": 1.6068981072311689e-06, + "loss": 0.0739, + "num_input_tokens_seen": 50105376, + "step": 74330 + }, + { + "epoch": 1.8160164170718003, + "grad_norm": 0.6355023384094238, + "learning_rate": 1.6068303281240725e-06, + "loss": 0.0028, + "num_input_tokens_seen": 50109024, + "step": 74335 + }, + { + "epoch": 1.8161385679036475, + "grad_norm": 0.07773970812559128, + "learning_rate": 1.6067625446039428e-06, + "loss": 0.034, + "num_input_tokens_seen": 50112608, + "step": 74340 + }, + { + "epoch": 1.8162607187354944, + "grad_norm": 18.94000816345215, + "learning_rate": 1.6066947566712728e-06, + "loss": 0.0482, + "num_input_tokens_seen": 50115808, + "step": 74345 + }, + { + "epoch": 1.8163828695673416, + "grad_norm": 0.08719933778047562, + "learning_rate": 1.6066269643265551e-06, + "loss": 0.0401, + "num_input_tokens_seen": 50119264, + "step": 74350 + }, + { + "epoch": 1.8165050203991888, + "grad_norm": 0.2026486098766327, + "learning_rate": 1.606559167570283e-06, + "loss": 0.1096, + "num_input_tokens_seen": 50122848, + "step": 74355 + }, + { + "epoch": 1.816627171231036, + "grad_norm": 0.22614862024784088, + "learning_rate": 1.6064913664029497e-06, + "loss": 0.094, + "num_input_tokens_seen": 50125984, + "step": 74360 + }, + { + "epoch": 1.8167493220628832, + "grad_norm": 25.180818557739258, + "learning_rate": 1.6064235608250479e-06, + "loss": 0.1342, + "num_input_tokens_seen": 50129248, + "step": 74365 + }, + { + "epoch": 1.8168714728947304, + "grad_norm": 0.04529504477977753, + "learning_rate": 1.6063557508370708e-06, + "loss": 0.0858, + "num_input_tokens_seen": 50132384, + "step": 74370 + }, + { + "epoch": 1.8169936237265776, + "grad_norm": 4.388415336608887, + "learning_rate": 1.6062879364395117e-06, + "loss": 0.0504, + "num_input_tokens_seen": 50135712, + "step": 74375 + }, + { + "epoch": 1.8171157745584248, + "grad_norm": 60.08488845825195, + "learning_rate": 1.6062201176328636e-06, + "loss": 0.1098, + "num_input_tokens_seen": 50138912, + "step": 74380 + }, + { + "epoch": 1.817237925390272, + "grad_norm": 0.33900535106658936, + "learning_rate": 1.6061522944176198e-06, + "loss": 0.0416, + "num_input_tokens_seen": 50142176, + "step": 74385 + }, + { + "epoch": 1.8173600762221191, + "grad_norm": 17.24686050415039, + "learning_rate": 1.6060844667942733e-06, + "loss": 0.12, + "num_input_tokens_seen": 50145504, + "step": 74390 + }, + { + "epoch": 1.8174822270539663, + "grad_norm": 12.73847484588623, + "learning_rate": 1.6060166347633177e-06, + "loss": 0.078, + "num_input_tokens_seen": 50148512, + "step": 74395 + }, + { + "epoch": 1.8176043778858135, + "grad_norm": 0.10699830204248428, + "learning_rate": 1.6059487983252462e-06, + "loss": 0.0009, + "num_input_tokens_seen": 50151904, + "step": 74400 + }, + { + "epoch": 1.8177265287176607, + "grad_norm": 0.09634699672460556, + "learning_rate": 1.605880957480552e-06, + "loss": 0.0013, + "num_input_tokens_seen": 50154912, + "step": 74405 + }, + { + "epoch": 1.8178486795495077, + "grad_norm": 0.02464139088988304, + "learning_rate": 1.6058131122297285e-06, + "loss": 0.0562, + "num_input_tokens_seen": 50158304, + "step": 74410 + }, + { + "epoch": 1.8179708303813549, + "grad_norm": 98.05704498291016, + "learning_rate": 1.605745262573269e-06, + "loss": 0.1851, + "num_input_tokens_seen": 50161248, + "step": 74415 + }, + { + "epoch": 1.818092981213202, + "grad_norm": 1.038590908050537, + "learning_rate": 1.6056774085116671e-06, + "loss": 0.0014, + "num_input_tokens_seen": 50164320, + "step": 74420 + }, + { + "epoch": 1.8182151320450493, + "grad_norm": 0.07926052808761597, + "learning_rate": 1.605609550045416e-06, + "loss": 0.0429, + "num_input_tokens_seen": 50167712, + "step": 74425 + }, + { + "epoch": 1.8183372828768964, + "grad_norm": 0.05045356974005699, + "learning_rate": 1.6055416871750098e-06, + "loss": 0.0007, + "num_input_tokens_seen": 50171680, + "step": 74430 + }, + { + "epoch": 1.8184594337087434, + "grad_norm": 0.08700229227542877, + "learning_rate": 1.6054738199009412e-06, + "loss": 0.1016, + "num_input_tokens_seen": 50175392, + "step": 74435 + }, + { + "epoch": 1.8185815845405906, + "grad_norm": 0.2734203040599823, + "learning_rate": 1.6054059482237043e-06, + "loss": 0.0608, + "num_input_tokens_seen": 50178720, + "step": 74440 + }, + { + "epoch": 1.8187037353724378, + "grad_norm": 0.25550442934036255, + "learning_rate": 1.6053380721437927e-06, + "loss": 0.0404, + "num_input_tokens_seen": 50181536, + "step": 74445 + }, + { + "epoch": 1.818825886204285, + "grad_norm": 33.2105827331543, + "learning_rate": 1.6052701916616993e-06, + "loss": 0.1494, + "num_input_tokens_seen": 50184800, + "step": 74450 + }, + { + "epoch": 1.8189480370361322, + "grad_norm": 0.3790890872478485, + "learning_rate": 1.6052023067779189e-06, + "loss": 0.0619, + "num_input_tokens_seen": 50187936, + "step": 74455 + }, + { + "epoch": 1.8190701878679794, + "grad_norm": 0.04852888360619545, + "learning_rate": 1.605134417492944e-06, + "loss": 0.0656, + "num_input_tokens_seen": 50191008, + "step": 74460 + }, + { + "epoch": 1.8191923386998265, + "grad_norm": 0.017970800399780273, + "learning_rate": 1.6050665238072689e-06, + "loss": 0.0818, + "num_input_tokens_seen": 50194400, + "step": 74465 + }, + { + "epoch": 1.8193144895316737, + "grad_norm": 0.09376344829797745, + "learning_rate": 1.6049986257213878e-06, + "loss": 0.053, + "num_input_tokens_seen": 50197280, + "step": 74470 + }, + { + "epoch": 1.819436640363521, + "grad_norm": 14.67425537109375, + "learning_rate": 1.6049307232357935e-06, + "loss": 0.0762, + "num_input_tokens_seen": 50200480, + "step": 74475 + }, + { + "epoch": 1.8195587911953681, + "grad_norm": 0.39991647005081177, + "learning_rate": 1.6048628163509803e-06, + "loss": 0.0584, + "num_input_tokens_seen": 50203936, + "step": 74480 + }, + { + "epoch": 1.8196809420272153, + "grad_norm": 0.068689726293087, + "learning_rate": 1.6047949050674422e-06, + "loss": 0.042, + "num_input_tokens_seen": 50207136, + "step": 74485 + }, + { + "epoch": 1.8198030928590625, + "grad_norm": 38.983455657958984, + "learning_rate": 1.6047269893856728e-06, + "loss": 0.1156, + "num_input_tokens_seen": 50210144, + "step": 74490 + }, + { + "epoch": 1.8199252436909097, + "grad_norm": 79.4344482421875, + "learning_rate": 1.604659069306166e-06, + "loss": 0.1282, + "num_input_tokens_seen": 50213792, + "step": 74495 + }, + { + "epoch": 1.8200473945227567, + "grad_norm": 13.209699630737305, + "learning_rate": 1.604591144829416e-06, + "loss": 0.1632, + "num_input_tokens_seen": 50217312, + "step": 74500 + }, + { + "epoch": 1.8201695453546038, + "grad_norm": 8.681307792663574, + "learning_rate": 1.6045232159559166e-06, + "loss": 0.1738, + "num_input_tokens_seen": 50220384, + "step": 74505 + }, + { + "epoch": 1.820291696186451, + "grad_norm": 2.21926212310791, + "learning_rate": 1.6044552826861613e-06, + "loss": 0.0321, + "num_input_tokens_seen": 50223840, + "step": 74510 + }, + { + "epoch": 1.8204138470182982, + "grad_norm": 0.26441994309425354, + "learning_rate": 1.604387345020645e-06, + "loss": 0.0891, + "num_input_tokens_seen": 50227232, + "step": 74515 + }, + { + "epoch": 1.8205359978501452, + "grad_norm": 0.08761029690504074, + "learning_rate": 1.6043194029598612e-06, + "loss": 0.0439, + "num_input_tokens_seen": 50230752, + "step": 74520 + }, + { + "epoch": 1.8206581486819924, + "grad_norm": 12.762765884399414, + "learning_rate": 1.6042514565043047e-06, + "loss": 0.1376, + "num_input_tokens_seen": 50234016, + "step": 74525 + }, + { + "epoch": 1.8207802995138396, + "grad_norm": 2.54213285446167, + "learning_rate": 1.6041835056544683e-06, + "loss": 0.0032, + "num_input_tokens_seen": 50237216, + "step": 74530 + }, + { + "epoch": 1.8209024503456868, + "grad_norm": 0.26932165026664734, + "learning_rate": 1.6041155504108477e-06, + "loss": 0.0703, + "num_input_tokens_seen": 50240992, + "step": 74535 + }, + { + "epoch": 1.821024601177534, + "grad_norm": 0.16381070017814636, + "learning_rate": 1.6040475907739356e-06, + "loss": 0.104, + "num_input_tokens_seen": 50244320, + "step": 74540 + }, + { + "epoch": 1.8211467520093811, + "grad_norm": 10.372252464294434, + "learning_rate": 1.6039796267442273e-06, + "loss": 0.0845, + "num_input_tokens_seen": 50247776, + "step": 74545 + }, + { + "epoch": 1.8212689028412283, + "grad_norm": 2.9311392307281494, + "learning_rate": 1.6039116583222168e-06, + "loss": 0.1112, + "num_input_tokens_seen": 50251616, + "step": 74550 + }, + { + "epoch": 1.8213910536730755, + "grad_norm": 0.2916661500930786, + "learning_rate": 1.603843685508398e-06, + "loss": 0.0036, + "num_input_tokens_seen": 50255264, + "step": 74555 + }, + { + "epoch": 1.8215132045049227, + "grad_norm": 0.24182464182376862, + "learning_rate": 1.603775708303266e-06, + "loss": 0.0749, + "num_input_tokens_seen": 50258592, + "step": 74560 + }, + { + "epoch": 1.82163535533677, + "grad_norm": 0.12705932557582855, + "learning_rate": 1.6037077267073143e-06, + "loss": 0.0439, + "num_input_tokens_seen": 50261472, + "step": 74565 + }, + { + "epoch": 1.821757506168617, + "grad_norm": 0.017557255923748016, + "learning_rate": 1.6036397407210376e-06, + "loss": 0.0977, + "num_input_tokens_seen": 50264992, + "step": 74570 + }, + { + "epoch": 1.8218796570004643, + "grad_norm": 0.5035400390625, + "learning_rate": 1.6035717503449302e-06, + "loss": 0.0846, + "num_input_tokens_seen": 50268576, + "step": 74575 + }, + { + "epoch": 1.8220018078323115, + "grad_norm": 1.3602540493011475, + "learning_rate": 1.603503755579487e-06, + "loss": 0.0368, + "num_input_tokens_seen": 50271904, + "step": 74580 + }, + { + "epoch": 1.8221239586641587, + "grad_norm": 192.67697143554688, + "learning_rate": 1.6034357564252021e-06, + "loss": 0.303, + "num_input_tokens_seen": 50275296, + "step": 74585 + }, + { + "epoch": 1.8222461094960056, + "grad_norm": 3.1577250957489014, + "learning_rate": 1.6033677528825699e-06, + "loss": 0.0453, + "num_input_tokens_seen": 50278624, + "step": 74590 + }, + { + "epoch": 1.8223682603278528, + "grad_norm": 12.087418556213379, + "learning_rate": 1.6032997449520855e-06, + "loss": 0.0841, + "num_input_tokens_seen": 50282016, + "step": 74595 + }, + { + "epoch": 1.8224904111597, + "grad_norm": 119.16487121582031, + "learning_rate": 1.6032317326342427e-06, + "loss": 0.0473, + "num_input_tokens_seen": 50285152, + "step": 74600 + }, + { + "epoch": 1.8226125619915472, + "grad_norm": 0.6227423548698425, + "learning_rate": 1.6031637159295366e-06, + "loss": 0.0353, + "num_input_tokens_seen": 50288672, + "step": 74605 + }, + { + "epoch": 1.8227347128233942, + "grad_norm": 0.19224794209003448, + "learning_rate": 1.6030956948384618e-06, + "loss": 0.0296, + "num_input_tokens_seen": 50291744, + "step": 74610 + }, + { + "epoch": 1.8228568636552414, + "grad_norm": 12.381990432739258, + "learning_rate": 1.6030276693615129e-06, + "loss": 0.221, + "num_input_tokens_seen": 50295136, + "step": 74615 + }, + { + "epoch": 1.8229790144870885, + "grad_norm": 14.696539878845215, + "learning_rate": 1.6029596394991844e-06, + "loss": 0.1465, + "num_input_tokens_seen": 50298656, + "step": 74620 + }, + { + "epoch": 1.8231011653189357, + "grad_norm": 27.2199764251709, + "learning_rate": 1.6028916052519714e-06, + "loss": 0.1616, + "num_input_tokens_seen": 50302112, + "step": 74625 + }, + { + "epoch": 1.823223316150783, + "grad_norm": 292.4683837890625, + "learning_rate": 1.6028235666203687e-06, + "loss": 0.1432, + "num_input_tokens_seen": 50305696, + "step": 74630 + }, + { + "epoch": 1.8233454669826301, + "grad_norm": 0.1818641871213913, + "learning_rate": 1.6027555236048705e-06, + "loss": 0.0013, + "num_input_tokens_seen": 50309472, + "step": 74635 + }, + { + "epoch": 1.8234676178144773, + "grad_norm": 2.184302806854248, + "learning_rate": 1.6026874762059722e-06, + "loss": 0.0759, + "num_input_tokens_seen": 50312672, + "step": 74640 + }, + { + "epoch": 1.8235897686463245, + "grad_norm": 11.973766326904297, + "learning_rate": 1.6026194244241683e-06, + "loss": 0.1408, + "num_input_tokens_seen": 50315744, + "step": 74645 + }, + { + "epoch": 1.8237119194781717, + "grad_norm": 6.665470600128174, + "learning_rate": 1.602551368259954e-06, + "loss": 0.1048, + "num_input_tokens_seen": 50319392, + "step": 74650 + }, + { + "epoch": 1.8238340703100189, + "grad_norm": 0.46375200152397156, + "learning_rate": 1.602483307713824e-06, + "loss": 0.1153, + "num_input_tokens_seen": 50322528, + "step": 74655 + }, + { + "epoch": 1.823956221141866, + "grad_norm": 7.4403533935546875, + "learning_rate": 1.6024152427862733e-06, + "loss": 0.1064, + "num_input_tokens_seen": 50325920, + "step": 74660 + }, + { + "epoch": 1.8240783719737133, + "grad_norm": 0.5627632141113281, + "learning_rate": 1.6023471734777971e-06, + "loss": 0.0453, + "num_input_tokens_seen": 50328992, + "step": 74665 + }, + { + "epoch": 1.8242005228055604, + "grad_norm": 1.347942590713501, + "learning_rate": 1.6022790997888903e-06, + "loss": 0.0076, + "num_input_tokens_seen": 50332320, + "step": 74670 + }, + { + "epoch": 1.8243226736374076, + "grad_norm": 0.6452302932739258, + "learning_rate": 1.6022110217200478e-06, + "loss": 0.0306, + "num_input_tokens_seen": 50336096, + "step": 74675 + }, + { + "epoch": 1.8244448244692546, + "grad_norm": 8.13100814819336, + "learning_rate": 1.6021429392717645e-06, + "loss": 0.1109, + "num_input_tokens_seen": 50339104, + "step": 74680 + }, + { + "epoch": 1.8245669753011018, + "grad_norm": 0.6359624266624451, + "learning_rate": 1.6020748524445361e-06, + "loss": 0.0014, + "num_input_tokens_seen": 50342432, + "step": 74685 + }, + { + "epoch": 1.824689126132949, + "grad_norm": 0.28832563757896423, + "learning_rate": 1.6020067612388575e-06, + "loss": 0.0579, + "num_input_tokens_seen": 50345952, + "step": 74690 + }, + { + "epoch": 1.8248112769647962, + "grad_norm": 0.3576095402240753, + "learning_rate": 1.6019386656552234e-06, + "loss": 0.1848, + "num_input_tokens_seen": 50349088, + "step": 74695 + }, + { + "epoch": 1.8249334277966431, + "grad_norm": 0.17821946740150452, + "learning_rate": 1.6018705656941299e-06, + "loss": 0.0851, + "num_input_tokens_seen": 50352352, + "step": 74700 + }, + { + "epoch": 1.8250555786284903, + "grad_norm": 1.7348898649215698, + "learning_rate": 1.6018024613560717e-06, + "loss": 0.0545, + "num_input_tokens_seen": 50356128, + "step": 74705 + }, + { + "epoch": 1.8251777294603375, + "grad_norm": 2.7360644340515137, + "learning_rate": 1.601734352641544e-06, + "loss": 0.0043, + "num_input_tokens_seen": 50359904, + "step": 74710 + }, + { + "epoch": 1.8252998802921847, + "grad_norm": 0.2862713634967804, + "learning_rate": 1.6016662395510422e-06, + "loss": 0.1019, + "num_input_tokens_seen": 50363872, + "step": 74715 + }, + { + "epoch": 1.825422031124032, + "grad_norm": 9.152181625366211, + "learning_rate": 1.6015981220850616e-06, + "loss": 0.0392, + "num_input_tokens_seen": 50367200, + "step": 74720 + }, + { + "epoch": 1.825544181955879, + "grad_norm": 40.95872116088867, + "learning_rate": 1.601530000244098e-06, + "loss": 0.166, + "num_input_tokens_seen": 50370912, + "step": 74725 + }, + { + "epoch": 1.8256663327877263, + "grad_norm": 4.845951557159424, + "learning_rate": 1.6014618740286458e-06, + "loss": 0.0225, + "num_input_tokens_seen": 50374240, + "step": 74730 + }, + { + "epoch": 1.8257884836195735, + "grad_norm": 0.17217379808425903, + "learning_rate": 1.6013937434392015e-06, + "loss": 0.1129, + "num_input_tokens_seen": 50377440, + "step": 74735 + }, + { + "epoch": 1.8259106344514207, + "grad_norm": 3.039346218109131, + "learning_rate": 1.6013256084762603e-06, + "loss": 0.0816, + "num_input_tokens_seen": 50380704, + "step": 74740 + }, + { + "epoch": 1.8260327852832678, + "grad_norm": 0.3498546779155731, + "learning_rate": 1.6012574691403174e-06, + "loss": 0.0287, + "num_input_tokens_seen": 50383776, + "step": 74745 + }, + { + "epoch": 1.826154936115115, + "grad_norm": 0.5262460708618164, + "learning_rate": 1.6011893254318682e-06, + "loss": 0.0916, + "num_input_tokens_seen": 50386848, + "step": 74750 + }, + { + "epoch": 1.8262770869469622, + "grad_norm": 0.03679165989160538, + "learning_rate": 1.601121177351409e-06, + "loss": 0.0883, + "num_input_tokens_seen": 50390112, + "step": 74755 + }, + { + "epoch": 1.8263992377788094, + "grad_norm": 123.02760314941406, + "learning_rate": 1.6010530248994345e-06, + "loss": 0.0841, + "num_input_tokens_seen": 50393504, + "step": 74760 + }, + { + "epoch": 1.8265213886106566, + "grad_norm": 0.201580211520195, + "learning_rate": 1.6009848680764409e-06, + "loss": 0.0606, + "num_input_tokens_seen": 50396576, + "step": 74765 + }, + { + "epoch": 1.8266435394425036, + "grad_norm": 230.34939575195312, + "learning_rate": 1.6009167068829239e-06, + "loss": 0.0721, + "num_input_tokens_seen": 50400032, + "step": 74770 + }, + { + "epoch": 1.8267656902743508, + "grad_norm": 11.82022762298584, + "learning_rate": 1.6008485413193786e-06, + "loss": 0.1413, + "num_input_tokens_seen": 50403360, + "step": 74775 + }, + { + "epoch": 1.826887841106198, + "grad_norm": 0.478629469871521, + "learning_rate": 1.6007803713863015e-06, + "loss": 0.0292, + "num_input_tokens_seen": 50406560, + "step": 74780 + }, + { + "epoch": 1.8270099919380451, + "grad_norm": 0.9284724593162537, + "learning_rate": 1.6007121970841877e-06, + "loss": 0.0033, + "num_input_tokens_seen": 50410464, + "step": 74785 + }, + { + "epoch": 1.827132142769892, + "grad_norm": 20.248388290405273, + "learning_rate": 1.6006440184135333e-06, + "loss": 0.1076, + "num_input_tokens_seen": 50413472, + "step": 74790 + }, + { + "epoch": 1.8272542936017393, + "grad_norm": 1.305576205253601, + "learning_rate": 1.6005758353748338e-06, + "loss": 0.0026, + "num_input_tokens_seen": 50416544, + "step": 74795 + }, + { + "epoch": 1.8273764444335865, + "grad_norm": 9.549858093261719, + "learning_rate": 1.6005076479685854e-06, + "loss": 0.0919, + "num_input_tokens_seen": 50419680, + "step": 74800 + }, + { + "epoch": 1.8274985952654337, + "grad_norm": 0.11238489300012589, + "learning_rate": 1.600439456195284e-06, + "loss": 0.0169, + "num_input_tokens_seen": 50423072, + "step": 74805 + }, + { + "epoch": 1.8276207460972809, + "grad_norm": 22.02189064025879, + "learning_rate": 1.6003712600554255e-06, + "loss": 0.1338, + "num_input_tokens_seen": 50426016, + "step": 74810 + }, + { + "epoch": 1.827742896929128, + "grad_norm": 0.10497987270355225, + "learning_rate": 1.6003030595495056e-06, + "loss": 0.0809, + "num_input_tokens_seen": 50429472, + "step": 74815 + }, + { + "epoch": 1.8278650477609752, + "grad_norm": 21.62155532836914, + "learning_rate": 1.6002348546780202e-06, + "loss": 0.1086, + "num_input_tokens_seen": 50432736, + "step": 74820 + }, + { + "epoch": 1.8279871985928224, + "grad_norm": 53.237098693847656, + "learning_rate": 1.6001666454414657e-06, + "loss": 0.3054, + "num_input_tokens_seen": 50436320, + "step": 74825 + }, + { + "epoch": 1.8281093494246696, + "grad_norm": 0.15742164850234985, + "learning_rate": 1.6000984318403376e-06, + "loss": 0.0274, + "num_input_tokens_seen": 50439776, + "step": 74830 + }, + { + "epoch": 1.8282315002565168, + "grad_norm": 18.078733444213867, + "learning_rate": 1.6000302138751328e-06, + "loss": 0.1356, + "num_input_tokens_seen": 50443168, + "step": 74835 + }, + { + "epoch": 1.828353651088364, + "grad_norm": 4.11641263961792, + "learning_rate": 1.5999619915463466e-06, + "loss": 0.0999, + "num_input_tokens_seen": 50446752, + "step": 74840 + }, + { + "epoch": 1.8284758019202112, + "grad_norm": 38.77667999267578, + "learning_rate": 1.5998937648544756e-06, + "loss": 0.0559, + "num_input_tokens_seen": 50450144, + "step": 74845 + }, + { + "epoch": 1.8285979527520584, + "grad_norm": 0.16692113876342773, + "learning_rate": 1.5998255338000157e-06, + "loss": 0.0009, + "num_input_tokens_seen": 50453280, + "step": 74850 + }, + { + "epoch": 1.8287201035839054, + "grad_norm": 0.014067198149859905, + "learning_rate": 1.599757298383463e-06, + "loss": 0.053, + "num_input_tokens_seen": 50456416, + "step": 74855 + }, + { + "epoch": 1.8288422544157525, + "grad_norm": 0.4518554210662842, + "learning_rate": 1.599689058605314e-06, + "loss": 0.0388, + "num_input_tokens_seen": 50459488, + "step": 74860 + }, + { + "epoch": 1.8289644052475997, + "grad_norm": 32.26401138305664, + "learning_rate": 1.599620814466065e-06, + "loss": 0.075, + "num_input_tokens_seen": 50463776, + "step": 74865 + }, + { + "epoch": 1.829086556079447, + "grad_norm": 0.04703347757458687, + "learning_rate": 1.599552565966212e-06, + "loss": 0.071, + "num_input_tokens_seen": 50467040, + "step": 74870 + }, + { + "epoch": 1.8292087069112941, + "grad_norm": 0.25102993845939636, + "learning_rate": 1.5994843131062519e-06, + "loss": 0.0455, + "num_input_tokens_seen": 50471200, + "step": 74875 + }, + { + "epoch": 1.829330857743141, + "grad_norm": 11.341893196105957, + "learning_rate": 1.5994160558866802e-06, + "loss": 0.0644, + "num_input_tokens_seen": 50474272, + "step": 74880 + }, + { + "epoch": 1.8294530085749883, + "grad_norm": 19.64974594116211, + "learning_rate": 1.5993477943079937e-06, + "loss": 0.1855, + "num_input_tokens_seen": 50477792, + "step": 74885 + }, + { + "epoch": 1.8295751594068355, + "grad_norm": 0.3654881715774536, + "learning_rate": 1.599279528370689e-06, + "loss": 0.0278, + "num_input_tokens_seen": 50481248, + "step": 74890 + }, + { + "epoch": 1.8296973102386827, + "grad_norm": 0.16216430068016052, + "learning_rate": 1.5992112580752623e-06, + "loss": 0.0006, + "num_input_tokens_seen": 50484896, + "step": 74895 + }, + { + "epoch": 1.8298194610705298, + "grad_norm": 19.21926498413086, + "learning_rate": 1.5991429834222104e-06, + "loss": 0.1846, + "num_input_tokens_seen": 50488160, + "step": 74900 + }, + { + "epoch": 1.829941611902377, + "grad_norm": 10.870716094970703, + "learning_rate": 1.5990747044120294e-06, + "loss": 0.005, + "num_input_tokens_seen": 50491872, + "step": 74905 + }, + { + "epoch": 1.8300637627342242, + "grad_norm": 0.14991120994091034, + "learning_rate": 1.5990064210452158e-06, + "loss": 0.1366, + "num_input_tokens_seen": 50495392, + "step": 74910 + }, + { + "epoch": 1.8301859135660714, + "grad_norm": 0.1929914802312851, + "learning_rate": 1.5989381333222664e-06, + "loss": 0.0999, + "num_input_tokens_seen": 50498848, + "step": 74915 + }, + { + "epoch": 1.8303080643979186, + "grad_norm": 18.372793197631836, + "learning_rate": 1.5988698412436783e-06, + "loss": 0.034, + "num_input_tokens_seen": 50502368, + "step": 74920 + }, + { + "epoch": 1.8304302152297658, + "grad_norm": 13.448803901672363, + "learning_rate": 1.5988015448099472e-06, + "loss": 0.0404, + "num_input_tokens_seen": 50505760, + "step": 74925 + }, + { + "epoch": 1.830552366061613, + "grad_norm": 0.15691198408603668, + "learning_rate": 1.5987332440215705e-06, + "loss": 0.0499, + "num_input_tokens_seen": 50509280, + "step": 74930 + }, + { + "epoch": 1.8306745168934602, + "grad_norm": 21.244211196899414, + "learning_rate": 1.5986649388790443e-06, + "loss": 0.1397, + "num_input_tokens_seen": 50512416, + "step": 74935 + }, + { + "epoch": 1.8307966677253074, + "grad_norm": 329.8246765136719, + "learning_rate": 1.5985966293828659e-06, + "loss": 0.0294, + "num_input_tokens_seen": 50516064, + "step": 74940 + }, + { + "epoch": 1.8309188185571543, + "grad_norm": 0.13184209167957306, + "learning_rate": 1.5985283155335316e-06, + "loss": 0.0787, + "num_input_tokens_seen": 50519456, + "step": 74945 + }, + { + "epoch": 1.8310409693890015, + "grad_norm": 13.898524284362793, + "learning_rate": 1.5984599973315385e-06, + "loss": 0.1047, + "num_input_tokens_seen": 50522592, + "step": 74950 + }, + { + "epoch": 1.8311631202208487, + "grad_norm": 0.10374942421913147, + "learning_rate": 1.5983916747773834e-06, + "loss": 0.0326, + "num_input_tokens_seen": 50526368, + "step": 74955 + }, + { + "epoch": 1.831285271052696, + "grad_norm": 0.08185227960348129, + "learning_rate": 1.598323347871563e-06, + "loss": 0.0006, + "num_input_tokens_seen": 50529696, + "step": 74960 + }, + { + "epoch": 1.831407421884543, + "grad_norm": 13.482980728149414, + "learning_rate": 1.5982550166145744e-06, + "loss": 0.0736, + "num_input_tokens_seen": 50532960, + "step": 74965 + }, + { + "epoch": 1.83152957271639, + "grad_norm": 0.312678724527359, + "learning_rate": 1.5981866810069142e-06, + "loss": 0.082, + "num_input_tokens_seen": 50536416, + "step": 74970 + }, + { + "epoch": 1.8316517235482372, + "grad_norm": 0.11006156355142593, + "learning_rate": 1.5981183410490796e-06, + "loss": 0.0615, + "num_input_tokens_seen": 50540000, + "step": 74975 + }, + { + "epoch": 1.8317738743800844, + "grad_norm": 0.10871271789073944, + "learning_rate": 1.5980499967415677e-06, + "loss": 0.1137, + "num_input_tokens_seen": 50543776, + "step": 74980 + }, + { + "epoch": 1.8318960252119316, + "grad_norm": 0.22636280953884125, + "learning_rate": 1.5979816480848754e-06, + "loss": 0.0009, + "num_input_tokens_seen": 50546976, + "step": 74985 + }, + { + "epoch": 1.8320181760437788, + "grad_norm": 12.59542465209961, + "learning_rate": 1.5979132950794996e-06, + "loss": 0.1536, + "num_input_tokens_seen": 50551264, + "step": 74990 + }, + { + "epoch": 1.832140326875626, + "grad_norm": 17.66059684753418, + "learning_rate": 1.5978449377259376e-06, + "loss": 0.1114, + "num_input_tokens_seen": 50555104, + "step": 74995 + }, + { + "epoch": 1.8322624777074732, + "grad_norm": 4.7083964347839355, + "learning_rate": 1.5977765760246863e-06, + "loss": 0.1184, + "num_input_tokens_seen": 50558560, + "step": 75000 + }, + { + "epoch": 1.8323846285393204, + "grad_norm": 0.0550217367708683, + "learning_rate": 1.597708209976243e-06, + "loss": 0.0198, + "num_input_tokens_seen": 50561760, + "step": 75005 + }, + { + "epoch": 1.8325067793711676, + "grad_norm": 165.42160034179688, + "learning_rate": 1.5976398395811046e-06, + "loss": 0.0595, + "num_input_tokens_seen": 50564960, + "step": 75010 + }, + { + "epoch": 1.8326289302030148, + "grad_norm": 0.008861426264047623, + "learning_rate": 1.5975714648397686e-06, + "loss": 0.0956, + "num_input_tokens_seen": 50568160, + "step": 75015 + }, + { + "epoch": 1.832751081034862, + "grad_norm": 0.027347374707460403, + "learning_rate": 1.5975030857527326e-06, + "loss": 0.0812, + "num_input_tokens_seen": 50571040, + "step": 75020 + }, + { + "epoch": 1.8328732318667091, + "grad_norm": 16.737857818603516, + "learning_rate": 1.5974347023204932e-06, + "loss": 0.1012, + "num_input_tokens_seen": 50574176, + "step": 75025 + }, + { + "epoch": 1.8329953826985563, + "grad_norm": 0.5741835832595825, + "learning_rate": 1.5973663145435482e-06, + "loss": 0.0047, + "num_input_tokens_seen": 50577120, + "step": 75030 + }, + { + "epoch": 1.8331175335304033, + "grad_norm": 1.5658107995986938, + "learning_rate": 1.5972979224223942e-06, + "loss": 0.1798, + "num_input_tokens_seen": 50580640, + "step": 75035 + }, + { + "epoch": 1.8332396843622505, + "grad_norm": 0.02867000922560692, + "learning_rate": 1.597229525957529e-06, + "loss": 0.0024, + "num_input_tokens_seen": 50584352, + "step": 75040 + }, + { + "epoch": 1.8333618351940977, + "grad_norm": 0.545559287071228, + "learning_rate": 1.5971611251494505e-06, + "loss": 0.0988, + "num_input_tokens_seen": 50587360, + "step": 75045 + }, + { + "epoch": 1.8334839860259449, + "grad_norm": 0.14214016497135162, + "learning_rate": 1.5970927199986557e-06, + "loss": 0.1133, + "num_input_tokens_seen": 50590688, + "step": 75050 + }, + { + "epoch": 1.8336061368577918, + "grad_norm": 19.883060455322266, + "learning_rate": 1.5970243105056418e-06, + "loss": 0.1702, + "num_input_tokens_seen": 50593632, + "step": 75055 + }, + { + "epoch": 1.833728287689639, + "grad_norm": 36.7739372253418, + "learning_rate": 1.5969558966709066e-06, + "loss": 0.0866, + "num_input_tokens_seen": 50596896, + "step": 75060 + }, + { + "epoch": 1.8338504385214862, + "grad_norm": 16.683443069458008, + "learning_rate": 1.5968874784949476e-06, + "loss": 0.1055, + "num_input_tokens_seen": 50599968, + "step": 75065 + }, + { + "epoch": 1.8339725893533334, + "grad_norm": 15.718001365661621, + "learning_rate": 1.5968190559782622e-06, + "loss": 0.0348, + "num_input_tokens_seen": 50603104, + "step": 75070 + }, + { + "epoch": 1.8340947401851806, + "grad_norm": 10.17602252960205, + "learning_rate": 1.5967506291213481e-06, + "loss": 0.1494, + "num_input_tokens_seen": 50606496, + "step": 75075 + }, + { + "epoch": 1.8342168910170278, + "grad_norm": 0.4043470621109009, + "learning_rate": 1.5966821979247031e-06, + "loss": 0.0829, + "num_input_tokens_seen": 50610016, + "step": 75080 + }, + { + "epoch": 1.834339041848875, + "grad_norm": 0.36872056126594543, + "learning_rate": 1.5966137623888246e-06, + "loss": 0.072, + "num_input_tokens_seen": 50612960, + "step": 75085 + }, + { + "epoch": 1.8344611926807222, + "grad_norm": 0.2587651014328003, + "learning_rate": 1.5965453225142102e-06, + "loss": 0.0437, + "num_input_tokens_seen": 50616672, + "step": 75090 + }, + { + "epoch": 1.8345833435125694, + "grad_norm": 0.0520302951335907, + "learning_rate": 1.5964768783013579e-06, + "loss": 0.0035, + "num_input_tokens_seen": 50619872, + "step": 75095 + }, + { + "epoch": 1.8347054943444165, + "grad_norm": 31.207691192626953, + "learning_rate": 1.5964084297507652e-06, + "loss": 0.1989, + "num_input_tokens_seen": 50623584, + "step": 75100 + }, + { + "epoch": 1.8348276451762637, + "grad_norm": 16.3104305267334, + "learning_rate": 1.5963399768629299e-06, + "loss": 0.0274, + "num_input_tokens_seen": 50626592, + "step": 75105 + }, + { + "epoch": 1.834949796008111, + "grad_norm": 0.9252538681030273, + "learning_rate": 1.5962715196383503e-06, + "loss": 0.0848, + "num_input_tokens_seen": 50630304, + "step": 75110 + }, + { + "epoch": 1.8350719468399581, + "grad_norm": 0.3111483156681061, + "learning_rate": 1.5962030580775236e-06, + "loss": 0.0054, + "num_input_tokens_seen": 50633696, + "step": 75115 + }, + { + "epoch": 1.8351940976718053, + "grad_norm": 0.1261308789253235, + "learning_rate": 1.596134592180948e-06, + "loss": 0.1591, + "num_input_tokens_seen": 50637024, + "step": 75120 + }, + { + "epoch": 1.8353162485036523, + "grad_norm": 0.15849518775939941, + "learning_rate": 1.5960661219491208e-06, + "loss": 0.0389, + "num_input_tokens_seen": 50640416, + "step": 75125 + }, + { + "epoch": 1.8354383993354995, + "grad_norm": 0.5551608800888062, + "learning_rate": 1.595997647382541e-06, + "loss": 0.1209, + "num_input_tokens_seen": 50643616, + "step": 75130 + }, + { + "epoch": 1.8355605501673466, + "grad_norm": 0.15090814232826233, + "learning_rate": 1.5959291684817057e-06, + "loss": 0.1069, + "num_input_tokens_seen": 50646752, + "step": 75135 + }, + { + "epoch": 1.8356827009991938, + "grad_norm": 0.0902179405093193, + "learning_rate": 1.5958606852471132e-06, + "loss": 0.0722, + "num_input_tokens_seen": 50650272, + "step": 75140 + }, + { + "epoch": 1.8358048518310408, + "grad_norm": 259.31622314453125, + "learning_rate": 1.595792197679262e-06, + "loss": 0.0127, + "num_input_tokens_seen": 50654176, + "step": 75145 + }, + { + "epoch": 1.835927002662888, + "grad_norm": 0.12596093118190765, + "learning_rate": 1.5957237057786492e-06, + "loss": 0.0315, + "num_input_tokens_seen": 50657440, + "step": 75150 + }, + { + "epoch": 1.8360491534947352, + "grad_norm": 0.2953812777996063, + "learning_rate": 1.595655209545774e-06, + "loss": 0.0672, + "num_input_tokens_seen": 50660640, + "step": 75155 + }, + { + "epoch": 1.8361713043265824, + "grad_norm": 10.196359634399414, + "learning_rate": 1.5955867089811332e-06, + "loss": 0.089, + "num_input_tokens_seen": 50664288, + "step": 75160 + }, + { + "epoch": 1.8362934551584296, + "grad_norm": 0.03514920175075531, + "learning_rate": 1.5955182040852257e-06, + "loss": 0.0342, + "num_input_tokens_seen": 50668000, + "step": 75165 + }, + { + "epoch": 1.8364156059902768, + "grad_norm": 71.40430450439453, + "learning_rate": 1.59544969485855e-06, + "loss": 0.0254, + "num_input_tokens_seen": 50671328, + "step": 75170 + }, + { + "epoch": 1.836537756822124, + "grad_norm": 0.9081661105155945, + "learning_rate": 1.5953811813016037e-06, + "loss": 0.003, + "num_input_tokens_seen": 50674720, + "step": 75175 + }, + { + "epoch": 1.8366599076539711, + "grad_norm": 0.2207932323217392, + "learning_rate": 1.5953126634148855e-06, + "loss": 0.1863, + "num_input_tokens_seen": 50678112, + "step": 75180 + }, + { + "epoch": 1.8367820584858183, + "grad_norm": 41.23016357421875, + "learning_rate": 1.5952441411988934e-06, + "loss": 0.1438, + "num_input_tokens_seen": 50681440, + "step": 75185 + }, + { + "epoch": 1.8369042093176655, + "grad_norm": 21.08063316345215, + "learning_rate": 1.5951756146541257e-06, + "loss": 0.1104, + "num_input_tokens_seen": 50684704, + "step": 75190 + }, + { + "epoch": 1.8370263601495127, + "grad_norm": 162.88192749023438, + "learning_rate": 1.5951070837810808e-06, + "loss": 0.089, + "num_input_tokens_seen": 50688736, + "step": 75195 + }, + { + "epoch": 1.83714851098136, + "grad_norm": 0.09662999957799911, + "learning_rate": 1.5950385485802574e-06, + "loss": 0.0431, + "num_input_tokens_seen": 50691808, + "step": 75200 + }, + { + "epoch": 1.837270661813207, + "grad_norm": 0.15366408228874207, + "learning_rate": 1.5949700090521536e-06, + "loss": 0.0347, + "num_input_tokens_seen": 50695136, + "step": 75205 + }, + { + "epoch": 1.8373928126450543, + "grad_norm": 16.026094436645508, + "learning_rate": 1.594901465197268e-06, + "loss": 0.1796, + "num_input_tokens_seen": 50698848, + "step": 75210 + }, + { + "epoch": 1.8375149634769012, + "grad_norm": 33.631309509277344, + "learning_rate": 1.5948329170160983e-06, + "loss": 0.0567, + "num_input_tokens_seen": 50701984, + "step": 75215 + }, + { + "epoch": 1.8376371143087484, + "grad_norm": 0.02875753492116928, + "learning_rate": 1.5947643645091442e-06, + "loss": 0.2215, + "num_input_tokens_seen": 50705440, + "step": 75220 + }, + { + "epoch": 1.8377592651405956, + "grad_norm": 42.001739501953125, + "learning_rate": 1.5946958076769035e-06, + "loss": 0.1647, + "num_input_tokens_seen": 50709472, + "step": 75225 + }, + { + "epoch": 1.8378814159724428, + "grad_norm": 0.2549021244049072, + "learning_rate": 1.5946272465198748e-06, + "loss": 0.0869, + "num_input_tokens_seen": 50712608, + "step": 75230 + }, + { + "epoch": 1.8380035668042898, + "grad_norm": 0.1781689077615738, + "learning_rate": 1.5945586810385572e-06, + "loss": 0.1903, + "num_input_tokens_seen": 50716000, + "step": 75235 + }, + { + "epoch": 1.838125717636137, + "grad_norm": 21.01819610595703, + "learning_rate": 1.5944901112334486e-06, + "loss": 0.2179, + "num_input_tokens_seen": 50719392, + "step": 75240 + }, + { + "epoch": 1.8382478684679842, + "grad_norm": 0.1993514448404312, + "learning_rate": 1.5944215371050482e-06, + "loss": 0.0641, + "num_input_tokens_seen": 50723232, + "step": 75245 + }, + { + "epoch": 1.8383700192998313, + "grad_norm": 91.43272399902344, + "learning_rate": 1.5943529586538543e-06, + "loss": 0.1439, + "num_input_tokens_seen": 50726560, + "step": 75250 + }, + { + "epoch": 1.8384921701316785, + "grad_norm": 30.449052810668945, + "learning_rate": 1.594284375880366e-06, + "loss": 0.0482, + "num_input_tokens_seen": 50729824, + "step": 75255 + }, + { + "epoch": 1.8386143209635257, + "grad_norm": 1.8023955821990967, + "learning_rate": 1.5942157887850818e-06, + "loss": 0.0023, + "num_input_tokens_seen": 50733024, + "step": 75260 + }, + { + "epoch": 1.838736471795373, + "grad_norm": 10.874223709106445, + "learning_rate": 1.5941471973685007e-06, + "loss": 0.0956, + "num_input_tokens_seen": 50736544, + "step": 75265 + }, + { + "epoch": 1.83885862262722, + "grad_norm": 20.578174591064453, + "learning_rate": 1.5940786016311214e-06, + "loss": 0.0363, + "num_input_tokens_seen": 50740064, + "step": 75270 + }, + { + "epoch": 1.8389807734590673, + "grad_norm": 14.255890846252441, + "learning_rate": 1.5940100015734426e-06, + "loss": 0.0902, + "num_input_tokens_seen": 50743136, + "step": 75275 + }, + { + "epoch": 1.8391029242909145, + "grad_norm": 22.841188430786133, + "learning_rate": 1.5939413971959632e-06, + "loss": 0.0455, + "num_input_tokens_seen": 50746400, + "step": 75280 + }, + { + "epoch": 1.8392250751227617, + "grad_norm": 21.676034927368164, + "learning_rate": 1.5938727884991824e-06, + "loss": 0.1632, + "num_input_tokens_seen": 50750048, + "step": 75285 + }, + { + "epoch": 1.8393472259546089, + "grad_norm": 31.792327880859375, + "learning_rate": 1.5938041754835987e-06, + "loss": 0.1416, + "num_input_tokens_seen": 50753440, + "step": 75290 + }, + { + "epoch": 1.839469376786456, + "grad_norm": 1.4194421768188477, + "learning_rate": 1.5937355581497115e-06, + "loss": 0.0531, + "num_input_tokens_seen": 50756704, + "step": 75295 + }, + { + "epoch": 1.8395915276183032, + "grad_norm": 41.14981460571289, + "learning_rate": 1.5936669364980198e-06, + "loss": 0.1068, + "num_input_tokens_seen": 50759904, + "step": 75300 + }, + { + "epoch": 1.8397136784501502, + "grad_norm": 20.266094207763672, + "learning_rate": 1.5935983105290221e-06, + "loss": 0.1172, + "num_input_tokens_seen": 50763040, + "step": 75305 + }, + { + "epoch": 1.8398358292819974, + "grad_norm": 0.4391668140888214, + "learning_rate": 1.593529680243218e-06, + "loss": 0.0771, + "num_input_tokens_seen": 50766432, + "step": 75310 + }, + { + "epoch": 1.8399579801138446, + "grad_norm": 45.64616775512695, + "learning_rate": 1.5934610456411064e-06, + "loss": 0.0495, + "num_input_tokens_seen": 50770016, + "step": 75315 + }, + { + "epoch": 1.8400801309456918, + "grad_norm": 14.22166633605957, + "learning_rate": 1.5933924067231864e-06, + "loss": 0.1261, + "num_input_tokens_seen": 50773664, + "step": 75320 + }, + { + "epoch": 1.8402022817775388, + "grad_norm": 26.305788040161133, + "learning_rate": 1.5933237634899573e-06, + "loss": 0.1299, + "num_input_tokens_seen": 50777120, + "step": 75325 + }, + { + "epoch": 1.840324432609386, + "grad_norm": 0.10604507476091385, + "learning_rate": 1.5932551159419184e-06, + "loss": 0.0197, + "num_input_tokens_seen": 50780384, + "step": 75330 + }, + { + "epoch": 1.8404465834412331, + "grad_norm": 0.2902368903160095, + "learning_rate": 1.5931864640795684e-06, + "loss": 0.0031, + "num_input_tokens_seen": 50783328, + "step": 75335 + }, + { + "epoch": 1.8405687342730803, + "grad_norm": 0.4008534550666809, + "learning_rate": 1.5931178079034072e-06, + "loss": 0.0049, + "num_input_tokens_seen": 50786336, + "step": 75340 + }, + { + "epoch": 1.8406908851049275, + "grad_norm": 1.7231696844100952, + "learning_rate": 1.5930491474139337e-06, + "loss": 0.0367, + "num_input_tokens_seen": 50789472, + "step": 75345 + }, + { + "epoch": 1.8408130359367747, + "grad_norm": 0.048120759427547455, + "learning_rate": 1.592980482611647e-06, + "loss": 0.0835, + "num_input_tokens_seen": 50792672, + "step": 75350 + }, + { + "epoch": 1.840935186768622, + "grad_norm": 0.7223793268203735, + "learning_rate": 1.5929118134970468e-06, + "loss": 0.002, + "num_input_tokens_seen": 50795808, + "step": 75355 + }, + { + "epoch": 1.841057337600469, + "grad_norm": 5.6044602394104, + "learning_rate": 1.5928431400706326e-06, + "loss": 0.1351, + "num_input_tokens_seen": 50799392, + "step": 75360 + }, + { + "epoch": 1.8411794884323163, + "grad_norm": 11.443306922912598, + "learning_rate": 1.5927744623329034e-06, + "loss": 0.0389, + "num_input_tokens_seen": 50802592, + "step": 75365 + }, + { + "epoch": 1.8413016392641635, + "grad_norm": 74.66730499267578, + "learning_rate": 1.5927057802843591e-06, + "loss": 0.0911, + "num_input_tokens_seen": 50806432, + "step": 75370 + }, + { + "epoch": 1.8414237900960106, + "grad_norm": 0.2384769469499588, + "learning_rate": 1.5926370939254987e-06, + "loss": 0.236, + "num_input_tokens_seen": 50809824, + "step": 75375 + }, + { + "epoch": 1.8415459409278578, + "grad_norm": 1.9501762390136719, + "learning_rate": 1.5925684032568221e-06, + "loss": 0.002, + "num_input_tokens_seen": 50813408, + "step": 75380 + }, + { + "epoch": 1.841668091759705, + "grad_norm": 199.66368103027344, + "learning_rate": 1.592499708278829e-06, + "loss": 0.1323, + "num_input_tokens_seen": 50816544, + "step": 75385 + }, + { + "epoch": 1.841790242591552, + "grad_norm": 27.194421768188477, + "learning_rate": 1.5924310089920181e-06, + "loss": 0.0376, + "num_input_tokens_seen": 50819808, + "step": 75390 + }, + { + "epoch": 1.8419123934233992, + "grad_norm": 2.1573281288146973, + "learning_rate": 1.59236230539689e-06, + "loss": 0.1633, + "num_input_tokens_seen": 50823264, + "step": 75395 + }, + { + "epoch": 1.8420345442552464, + "grad_norm": 0.855754017829895, + "learning_rate": 1.5922935974939438e-06, + "loss": 0.0342, + "num_input_tokens_seen": 50826592, + "step": 75400 + }, + { + "epoch": 1.8421566950870936, + "grad_norm": 143.57122802734375, + "learning_rate": 1.592224885283679e-06, + "loss": 0.0632, + "num_input_tokens_seen": 50829856, + "step": 75405 + }, + { + "epoch": 1.8422788459189408, + "grad_norm": 14.12177848815918, + "learning_rate": 1.592156168766596e-06, + "loss": 0.1241, + "num_input_tokens_seen": 50833056, + "step": 75410 + }, + { + "epoch": 1.8424009967507877, + "grad_norm": 0.26234906911849976, + "learning_rate": 1.5920874479431935e-06, + "loss": 0.03, + "num_input_tokens_seen": 50836256, + "step": 75415 + }, + { + "epoch": 1.842523147582635, + "grad_norm": 17.115942001342773, + "learning_rate": 1.592018722813972e-06, + "loss": 0.2169, + "num_input_tokens_seen": 50840096, + "step": 75420 + }, + { + "epoch": 1.842645298414482, + "grad_norm": 10.910876274108887, + "learning_rate": 1.5919499933794313e-06, + "loss": 0.1498, + "num_input_tokens_seen": 50843232, + "step": 75425 + }, + { + "epoch": 1.8427674492463293, + "grad_norm": 20.35202980041504, + "learning_rate": 1.591881259640071e-06, + "loss": 0.0988, + "num_input_tokens_seen": 50846752, + "step": 75430 + }, + { + "epoch": 1.8428896000781765, + "grad_norm": 0.1045135036110878, + "learning_rate": 1.591812521596391e-06, + "loss": 0.0021, + "num_input_tokens_seen": 50850208, + "step": 75435 + }, + { + "epoch": 1.8430117509100237, + "grad_norm": 0.29922038316726685, + "learning_rate": 1.5917437792488913e-06, + "loss": 0.0425, + "num_input_tokens_seen": 50853664, + "step": 75440 + }, + { + "epoch": 1.8431339017418709, + "grad_norm": 0.24386277794837952, + "learning_rate": 1.5916750325980713e-06, + "loss": 0.1005, + "num_input_tokens_seen": 50857120, + "step": 75445 + }, + { + "epoch": 1.843256052573718, + "grad_norm": 0.11934429407119751, + "learning_rate": 1.5916062816444313e-06, + "loss": 0.0014, + "num_input_tokens_seen": 50860256, + "step": 75450 + }, + { + "epoch": 1.8433782034055652, + "grad_norm": 47.07133865356445, + "learning_rate": 1.5915375263884716e-06, + "loss": 0.0911, + "num_input_tokens_seen": 50863712, + "step": 75455 + }, + { + "epoch": 1.8435003542374124, + "grad_norm": 46.206600189208984, + "learning_rate": 1.591468766830692e-06, + "loss": 0.0649, + "num_input_tokens_seen": 50867488, + "step": 75460 + }, + { + "epoch": 1.8436225050692596, + "grad_norm": 0.06218447536230087, + "learning_rate": 1.5914000029715922e-06, + "loss": 0.0338, + "num_input_tokens_seen": 50870816, + "step": 75465 + }, + { + "epoch": 1.8437446559011068, + "grad_norm": 0.04517967998981476, + "learning_rate": 1.5913312348116726e-06, + "loss": 0.0766, + "num_input_tokens_seen": 50874144, + "step": 75470 + }, + { + "epoch": 1.843866806732954, + "grad_norm": 0.13802210986614227, + "learning_rate": 1.591262462351433e-06, + "loss": 0.0731, + "num_input_tokens_seen": 50877664, + "step": 75475 + }, + { + "epoch": 1.843988957564801, + "grad_norm": 0.2226584255695343, + "learning_rate": 1.5911936855913738e-06, + "loss": 0.0005, + "num_input_tokens_seen": 50880992, + "step": 75480 + }, + { + "epoch": 1.8441111083966482, + "grad_norm": 38.50403594970703, + "learning_rate": 1.5911249045319954e-06, + "loss": 0.1547, + "num_input_tokens_seen": 50883936, + "step": 75485 + }, + { + "epoch": 1.8442332592284953, + "grad_norm": 0.5120588541030884, + "learning_rate": 1.5910561191737975e-06, + "loss": 0.001, + "num_input_tokens_seen": 50887776, + "step": 75490 + }, + { + "epoch": 1.8443554100603425, + "grad_norm": 6.489198207855225, + "learning_rate": 1.5909873295172807e-06, + "loss": 0.0598, + "num_input_tokens_seen": 50891424, + "step": 75495 + }, + { + "epoch": 1.8444775608921897, + "grad_norm": 0.11778406798839569, + "learning_rate": 1.590918535562945e-06, + "loss": 0.0868, + "num_input_tokens_seen": 50895008, + "step": 75500 + }, + { + "epoch": 1.8445997117240367, + "grad_norm": 26.87613868713379, + "learning_rate": 1.5908497373112903e-06, + "loss": 0.1014, + "num_input_tokens_seen": 50898272, + "step": 75505 + }, + { + "epoch": 1.8447218625558839, + "grad_norm": 0.16408833861351013, + "learning_rate": 1.590780934762818e-06, + "loss": 0.0017, + "num_input_tokens_seen": 50901344, + "step": 75510 + }, + { + "epoch": 1.844844013387731, + "grad_norm": 0.22428208589553833, + "learning_rate": 1.5907121279180276e-06, + "loss": 0.0018, + "num_input_tokens_seen": 50904800, + "step": 75515 + }, + { + "epoch": 1.8449661642195783, + "grad_norm": 0.16060960292816162, + "learning_rate": 1.5906433167774198e-06, + "loss": 0.084, + "num_input_tokens_seen": 50908192, + "step": 75520 + }, + { + "epoch": 1.8450883150514255, + "grad_norm": 0.32337161898612976, + "learning_rate": 1.5905745013414949e-06, + "loss": 0.0587, + "num_input_tokens_seen": 50911264, + "step": 75525 + }, + { + "epoch": 1.8452104658832726, + "grad_norm": 34.4917106628418, + "learning_rate": 1.5905056816107533e-06, + "loss": 0.1477, + "num_input_tokens_seen": 50914528, + "step": 75530 + }, + { + "epoch": 1.8453326167151198, + "grad_norm": 0.016017381101846695, + "learning_rate": 1.5904368575856958e-06, + "loss": 0.085, + "num_input_tokens_seen": 50917984, + "step": 75535 + }, + { + "epoch": 1.845454767546967, + "grad_norm": 0.15001077950000763, + "learning_rate": 1.5903680292668224e-06, + "loss": 0.0291, + "num_input_tokens_seen": 50920928, + "step": 75540 + }, + { + "epoch": 1.8455769183788142, + "grad_norm": 41.64552307128906, + "learning_rate": 1.590299196654634e-06, + "loss": 0.2256, + "num_input_tokens_seen": 50924384, + "step": 75545 + }, + { + "epoch": 1.8456990692106614, + "grad_norm": 0.10622581094503403, + "learning_rate": 1.5902303597496309e-06, + "loss": 0.0007, + "num_input_tokens_seen": 50928352, + "step": 75550 + }, + { + "epoch": 1.8458212200425086, + "grad_norm": 0.18985126912593842, + "learning_rate": 1.590161518552314e-06, + "loss": 0.1029, + "num_input_tokens_seen": 50931616, + "step": 75555 + }, + { + "epoch": 1.8459433708743558, + "grad_norm": 0.15927618741989136, + "learning_rate": 1.590092673063184e-06, + "loss": 0.0211, + "num_input_tokens_seen": 50935200, + "step": 75560 + }, + { + "epoch": 1.846065521706203, + "grad_norm": 0.08424060046672821, + "learning_rate": 1.5900238232827412e-06, + "loss": 0.1201, + "num_input_tokens_seen": 50938656, + "step": 75565 + }, + { + "epoch": 1.84618767253805, + "grad_norm": 0.03481528162956238, + "learning_rate": 1.5899549692114864e-06, + "loss": 0.1135, + "num_input_tokens_seen": 50942048, + "step": 75570 + }, + { + "epoch": 1.8463098233698971, + "grad_norm": 0.04872078076004982, + "learning_rate": 1.5898861108499205e-06, + "loss": 0.1495, + "num_input_tokens_seen": 50945312, + "step": 75575 + }, + { + "epoch": 1.8464319742017443, + "grad_norm": 0.22296403348445892, + "learning_rate": 1.5898172481985442e-06, + "loss": 0.0314, + "num_input_tokens_seen": 50948448, + "step": 75580 + }, + { + "epoch": 1.8465541250335915, + "grad_norm": 22.515535354614258, + "learning_rate": 1.589748381257858e-06, + "loss": 0.0422, + "num_input_tokens_seen": 50951904, + "step": 75585 + }, + { + "epoch": 1.8466762758654387, + "grad_norm": 0.6905881762504578, + "learning_rate": 1.5896795100283631e-06, + "loss": 0.0283, + "num_input_tokens_seen": 50955360, + "step": 75590 + }, + { + "epoch": 1.8467984266972857, + "grad_norm": 7.03764009475708, + "learning_rate": 1.5896106345105601e-06, + "loss": 0.0983, + "num_input_tokens_seen": 50958688, + "step": 75595 + }, + { + "epoch": 1.8469205775291329, + "grad_norm": 0.14742566645145416, + "learning_rate": 1.5895417547049502e-06, + "loss": 0.0621, + "num_input_tokens_seen": 50962848, + "step": 75600 + }, + { + "epoch": 1.84704272836098, + "grad_norm": 0.4235324263572693, + "learning_rate": 1.5894728706120336e-06, + "loss": 0.0487, + "num_input_tokens_seen": 50966048, + "step": 75605 + }, + { + "epoch": 1.8471648791928272, + "grad_norm": 2.398937940597534, + "learning_rate": 1.5894039822323121e-06, + "loss": 0.0017, + "num_input_tokens_seen": 50969440, + "step": 75610 + }, + { + "epoch": 1.8472870300246744, + "grad_norm": 0.14467774331569672, + "learning_rate": 1.5893350895662865e-06, + "loss": 0.0604, + "num_input_tokens_seen": 50972448, + "step": 75615 + }, + { + "epoch": 1.8474091808565216, + "grad_norm": 308.7352600097656, + "learning_rate": 1.5892661926144575e-06, + "loss": 0.0731, + "num_input_tokens_seen": 50975904, + "step": 75620 + }, + { + "epoch": 1.8475313316883688, + "grad_norm": 0.09449799358844757, + "learning_rate": 1.5891972913773263e-06, + "loss": 0.0026, + "num_input_tokens_seen": 50978912, + "step": 75625 + }, + { + "epoch": 1.847653482520216, + "grad_norm": 281.8724670410156, + "learning_rate": 1.5891283858553935e-06, + "loss": 0.1608, + "num_input_tokens_seen": 50982048, + "step": 75630 + }, + { + "epoch": 1.8477756333520632, + "grad_norm": 9.985726356506348, + "learning_rate": 1.5890594760491606e-06, + "loss": 0.1126, + "num_input_tokens_seen": 50985440, + "step": 75635 + }, + { + "epoch": 1.8478977841839104, + "grad_norm": 0.03078383021056652, + "learning_rate": 1.5889905619591292e-06, + "loss": 0.0486, + "num_input_tokens_seen": 50988576, + "step": 75640 + }, + { + "epoch": 1.8480199350157576, + "grad_norm": 70.28709411621094, + "learning_rate": 1.5889216435858001e-06, + "loss": 0.1633, + "num_input_tokens_seen": 50991648, + "step": 75645 + }, + { + "epoch": 1.8481420858476048, + "grad_norm": 0.9800973534584045, + "learning_rate": 1.5888527209296743e-06, + "loss": 0.1638, + "num_input_tokens_seen": 50995424, + "step": 75650 + }, + { + "epoch": 1.848264236679452, + "grad_norm": 0.2955226004123688, + "learning_rate": 1.588783793991253e-06, + "loss": 0.0348, + "num_input_tokens_seen": 50998816, + "step": 75655 + }, + { + "epoch": 1.848386387511299, + "grad_norm": 35.41645812988281, + "learning_rate": 1.5887148627710372e-06, + "loss": 0.0945, + "num_input_tokens_seen": 51002208, + "step": 75660 + }, + { + "epoch": 1.848508538343146, + "grad_norm": 0.2809849679470062, + "learning_rate": 1.5886459272695292e-06, + "loss": 0.0817, + "num_input_tokens_seen": 51005472, + "step": 75665 + }, + { + "epoch": 1.8486306891749933, + "grad_norm": 53.2969856262207, + "learning_rate": 1.5885769874872294e-06, + "loss": 0.2804, + "num_input_tokens_seen": 51009120, + "step": 75670 + }, + { + "epoch": 1.8487528400068405, + "grad_norm": 27.16531753540039, + "learning_rate": 1.5885080434246394e-06, + "loss": 0.1359, + "num_input_tokens_seen": 51012128, + "step": 75675 + }, + { + "epoch": 1.8488749908386874, + "grad_norm": 0.28885719180107117, + "learning_rate": 1.5884390950822608e-06, + "loss": 0.0364, + "num_input_tokens_seen": 51015712, + "step": 75680 + }, + { + "epoch": 1.8489971416705346, + "grad_norm": 5.167070388793945, + "learning_rate": 1.5883701424605947e-06, + "loss": 0.067, + "num_input_tokens_seen": 51018848, + "step": 75685 + }, + { + "epoch": 1.8491192925023818, + "grad_norm": 23.454187393188477, + "learning_rate": 1.5883011855601427e-06, + "loss": 0.1145, + "num_input_tokens_seen": 51022240, + "step": 75690 + }, + { + "epoch": 1.849241443334229, + "grad_norm": 0.4352051019668579, + "learning_rate": 1.5882322243814063e-06, + "loss": 0.002, + "num_input_tokens_seen": 51025504, + "step": 75695 + }, + { + "epoch": 1.8493635941660762, + "grad_norm": 59.894744873046875, + "learning_rate": 1.588163258924887e-06, + "loss": 0.0279, + "num_input_tokens_seen": 51028896, + "step": 75700 + }, + { + "epoch": 1.8494857449979234, + "grad_norm": 0.07904600352048874, + "learning_rate": 1.588094289191086e-06, + "loss": 0.0345, + "num_input_tokens_seen": 51032736, + "step": 75705 + }, + { + "epoch": 1.8496078958297706, + "grad_norm": 0.4518510699272156, + "learning_rate": 1.5880253151805054e-06, + "loss": 0.1506, + "num_input_tokens_seen": 51036128, + "step": 75710 + }, + { + "epoch": 1.8497300466616178, + "grad_norm": 0.1093616634607315, + "learning_rate": 1.5879563368936463e-06, + "loss": 0.0747, + "num_input_tokens_seen": 51039392, + "step": 75715 + }, + { + "epoch": 1.849852197493465, + "grad_norm": 1.2742111682891846, + "learning_rate": 1.5878873543310109e-06, + "loss": 0.0353, + "num_input_tokens_seen": 51042464, + "step": 75720 + }, + { + "epoch": 1.8499743483253122, + "grad_norm": 9.084955215454102, + "learning_rate": 1.5878183674931005e-06, + "loss": 0.0627, + "num_input_tokens_seen": 51046112, + "step": 75725 + }, + { + "epoch": 1.8500964991571593, + "grad_norm": 2.01727557182312, + "learning_rate": 1.5877493763804167e-06, + "loss": 0.0591, + "num_input_tokens_seen": 51049504, + "step": 75730 + }, + { + "epoch": 1.8502186499890065, + "grad_norm": 54.07387924194336, + "learning_rate": 1.5876803809934613e-06, + "loss": 0.1982, + "num_input_tokens_seen": 51053088, + "step": 75735 + }, + { + "epoch": 1.8503408008208537, + "grad_norm": 0.08706037700176239, + "learning_rate": 1.5876113813327363e-06, + "loss": 0.0013, + "num_input_tokens_seen": 51056416, + "step": 75740 + }, + { + "epoch": 1.850462951652701, + "grad_norm": 12.536171913146973, + "learning_rate": 1.587542377398743e-06, + "loss": 0.1587, + "num_input_tokens_seen": 51059744, + "step": 75745 + }, + { + "epoch": 1.8505851024845479, + "grad_norm": 35.133575439453125, + "learning_rate": 1.587473369191984e-06, + "loss": 0.1871, + "num_input_tokens_seen": 51063072, + "step": 75750 + }, + { + "epoch": 1.850707253316395, + "grad_norm": 2.3288958072662354, + "learning_rate": 1.58740435671296e-06, + "loss": 0.0596, + "num_input_tokens_seen": 51066336, + "step": 75755 + }, + { + "epoch": 1.8508294041482423, + "grad_norm": 0.4715663194656372, + "learning_rate": 1.5873353399621737e-06, + "loss": 0.0658, + "num_input_tokens_seen": 51069792, + "step": 75760 + }, + { + "epoch": 1.8509515549800895, + "grad_norm": 1.3501707315444946, + "learning_rate": 1.5872663189401272e-06, + "loss": 0.0011, + "num_input_tokens_seen": 51072864, + "step": 75765 + }, + { + "epoch": 1.8510737058119364, + "grad_norm": 0.39104995131492615, + "learning_rate": 1.5871972936473217e-06, + "loss": 0.1394, + "num_input_tokens_seen": 51075872, + "step": 75770 + }, + { + "epoch": 1.8511958566437836, + "grad_norm": 19.183887481689453, + "learning_rate": 1.5871282640842601e-06, + "loss": 0.037, + "num_input_tokens_seen": 51079008, + "step": 75775 + }, + { + "epoch": 1.8513180074756308, + "grad_norm": 13.764801979064941, + "learning_rate": 1.5870592302514431e-06, + "loss": 0.1345, + "num_input_tokens_seen": 51082464, + "step": 75780 + }, + { + "epoch": 1.851440158307478, + "grad_norm": 100.37277221679688, + "learning_rate": 1.5869901921493738e-06, + "loss": 0.1049, + "num_input_tokens_seen": 51086304, + "step": 75785 + }, + { + "epoch": 1.8515623091393252, + "grad_norm": 27.52959442138672, + "learning_rate": 1.5869211497785539e-06, + "loss": 0.1019, + "num_input_tokens_seen": 51089696, + "step": 75790 + }, + { + "epoch": 1.8516844599711724, + "grad_norm": 0.3320782780647278, + "learning_rate": 1.5868521031394858e-06, + "loss": 0.0703, + "num_input_tokens_seen": 51093024, + "step": 75795 + }, + { + "epoch": 1.8518066108030196, + "grad_norm": 0.0150426235049963, + "learning_rate": 1.586783052232671e-06, + "loss": 0.0664, + "num_input_tokens_seen": 51096416, + "step": 75800 + }, + { + "epoch": 1.8519287616348667, + "grad_norm": 0.34932833909988403, + "learning_rate": 1.5867139970586124e-06, + "loss": 0.1082, + "num_input_tokens_seen": 51099680, + "step": 75805 + }, + { + "epoch": 1.852050912466714, + "grad_norm": 0.6799086332321167, + "learning_rate": 1.5866449376178115e-06, + "loss": 0.0262, + "num_input_tokens_seen": 51103392, + "step": 75810 + }, + { + "epoch": 1.8521730632985611, + "grad_norm": 0.31468117237091064, + "learning_rate": 1.5865758739107707e-06, + "loss": 0.0365, + "num_input_tokens_seen": 51106592, + "step": 75815 + }, + { + "epoch": 1.8522952141304083, + "grad_norm": 0.13998661935329437, + "learning_rate": 1.5865068059379926e-06, + "loss": 0.0021, + "num_input_tokens_seen": 51109792, + "step": 75820 + }, + { + "epoch": 1.8524173649622555, + "grad_norm": 1.9981356859207153, + "learning_rate": 1.5864377336999795e-06, + "loss": 0.0016, + "num_input_tokens_seen": 51113120, + "step": 75825 + }, + { + "epoch": 1.8525395157941027, + "grad_norm": 0.25120308995246887, + "learning_rate": 1.5863686571972332e-06, + "loss": 0.0932, + "num_input_tokens_seen": 51116512, + "step": 75830 + }, + { + "epoch": 1.8526616666259499, + "grad_norm": 0.23657932877540588, + "learning_rate": 1.5862995764302562e-06, + "loss": 0.0318, + "num_input_tokens_seen": 51119968, + "step": 75835 + }, + { + "epoch": 1.8527838174577969, + "grad_norm": 1.5655349493026733, + "learning_rate": 1.5862304913995513e-06, + "loss": 0.0396, + "num_input_tokens_seen": 51123232, + "step": 75840 + }, + { + "epoch": 1.852905968289644, + "grad_norm": 24.702547073364258, + "learning_rate": 1.58616140210562e-06, + "loss": 0.1938, + "num_input_tokens_seen": 51126496, + "step": 75845 + }, + { + "epoch": 1.8530281191214912, + "grad_norm": 21.289745330810547, + "learning_rate": 1.5860923085489656e-06, + "loss": 0.0655, + "num_input_tokens_seen": 51129760, + "step": 75850 + }, + { + "epoch": 1.8531502699533384, + "grad_norm": 0.23596727848052979, + "learning_rate": 1.5860232107300906e-06, + "loss": 0.0028, + "num_input_tokens_seen": 51133408, + "step": 75855 + }, + { + "epoch": 1.8532724207851854, + "grad_norm": 0.6734174489974976, + "learning_rate": 1.585954108649497e-06, + "loss": 0.2307, + "num_input_tokens_seen": 51136672, + "step": 75860 + }, + { + "epoch": 1.8533945716170326, + "grad_norm": 20.81391143798828, + "learning_rate": 1.5858850023076874e-06, + "loss": 0.0909, + "num_input_tokens_seen": 51140000, + "step": 75865 + }, + { + "epoch": 1.8535167224488798, + "grad_norm": 22.20842933654785, + "learning_rate": 1.585815891705165e-06, + "loss": 0.0877, + "num_input_tokens_seen": 51143072, + "step": 75870 + }, + { + "epoch": 1.853638873280727, + "grad_norm": 0.023959221318364143, + "learning_rate": 1.5857467768424312e-06, + "loss": 0.001, + "num_input_tokens_seen": 51146592, + "step": 75875 + }, + { + "epoch": 1.8537610241125742, + "grad_norm": 32.944183349609375, + "learning_rate": 1.5856776577199895e-06, + "loss": 0.1578, + "num_input_tokens_seen": 51150304, + "step": 75880 + }, + { + "epoch": 1.8538831749444213, + "grad_norm": 0.3040035367012024, + "learning_rate": 1.5856085343383426e-06, + "loss": 0.0734, + "num_input_tokens_seen": 51153568, + "step": 75885 + }, + { + "epoch": 1.8540053257762685, + "grad_norm": 0.096220001578331, + "learning_rate": 1.5855394066979925e-06, + "loss": 0.0787, + "num_input_tokens_seen": 51156960, + "step": 75890 + }, + { + "epoch": 1.8541274766081157, + "grad_norm": 1.3828034400939941, + "learning_rate": 1.5854702747994427e-06, + "loss": 0.0606, + "num_input_tokens_seen": 51160288, + "step": 75895 + }, + { + "epoch": 1.854249627439963, + "grad_norm": 0.2657002806663513, + "learning_rate": 1.5854011386431955e-06, + "loss": 0.0024, + "num_input_tokens_seen": 51163616, + "step": 75900 + }, + { + "epoch": 1.85437177827181, + "grad_norm": 0.6370258927345276, + "learning_rate": 1.5853319982297538e-06, + "loss": 0.0779, + "num_input_tokens_seen": 51166816, + "step": 75905 + }, + { + "epoch": 1.8544939291036573, + "grad_norm": 0.1296832263469696, + "learning_rate": 1.58526285355962e-06, + "loss": 0.1529, + "num_input_tokens_seen": 51170208, + "step": 75910 + }, + { + "epoch": 1.8546160799355045, + "grad_norm": 0.043997667729854584, + "learning_rate": 1.5851937046332976e-06, + "loss": 0.0861, + "num_input_tokens_seen": 51173856, + "step": 75915 + }, + { + "epoch": 1.8547382307673517, + "grad_norm": 0.2165103554725647, + "learning_rate": 1.5851245514512895e-06, + "loss": 0.0991, + "num_input_tokens_seen": 51176864, + "step": 75920 + }, + { + "epoch": 1.8548603815991986, + "grad_norm": 1.8013496398925781, + "learning_rate": 1.5850553940140979e-06, + "loss": 0.0879, + "num_input_tokens_seen": 51180448, + "step": 75925 + }, + { + "epoch": 1.8549825324310458, + "grad_norm": 68.27543640136719, + "learning_rate": 1.584986232322226e-06, + "loss": 0.132, + "num_input_tokens_seen": 51183776, + "step": 75930 + }, + { + "epoch": 1.855104683262893, + "grad_norm": 113.5135498046875, + "learning_rate": 1.5849170663761772e-06, + "loss": 0.0056, + "num_input_tokens_seen": 51187232, + "step": 75935 + }, + { + "epoch": 1.8552268340947402, + "grad_norm": 0.03377193957567215, + "learning_rate": 1.584847896176454e-06, + "loss": 0.002, + "num_input_tokens_seen": 51190304, + "step": 75940 + }, + { + "epoch": 1.8553489849265874, + "grad_norm": 0.14906467497348785, + "learning_rate": 1.5847787217235595e-06, + "loss": 0.0393, + "num_input_tokens_seen": 51193888, + "step": 75945 + }, + { + "epoch": 1.8554711357584344, + "grad_norm": 0.892361044883728, + "learning_rate": 1.5847095430179972e-06, + "loss": 0.0694, + "num_input_tokens_seen": 51197536, + "step": 75950 + }, + { + "epoch": 1.8555932865902816, + "grad_norm": 0.5475803017616272, + "learning_rate": 1.5846403600602695e-06, + "loss": 0.1415, + "num_input_tokens_seen": 51200992, + "step": 75955 + }, + { + "epoch": 1.8557154374221287, + "grad_norm": 95.13793182373047, + "learning_rate": 1.5845711728508802e-06, + "loss": 0.0605, + "num_input_tokens_seen": 51204448, + "step": 75960 + }, + { + "epoch": 1.855837588253976, + "grad_norm": 0.030163267627358437, + "learning_rate": 1.5845019813903318e-06, + "loss": 0.1219, + "num_input_tokens_seen": 51207648, + "step": 75965 + }, + { + "epoch": 1.8559597390858231, + "grad_norm": 28.8455867767334, + "learning_rate": 1.5844327856791276e-06, + "loss": 0.1198, + "num_input_tokens_seen": 51211040, + "step": 75970 + }, + { + "epoch": 1.8560818899176703, + "grad_norm": 0.10754287987947464, + "learning_rate": 1.5843635857177712e-06, + "loss": 0.0704, + "num_input_tokens_seen": 51214304, + "step": 75975 + }, + { + "epoch": 1.8562040407495175, + "grad_norm": 41.19390106201172, + "learning_rate": 1.584294381506766e-06, + "loss": 0.2049, + "num_input_tokens_seen": 51217248, + "step": 75980 + }, + { + "epoch": 1.8563261915813647, + "grad_norm": 22.16908836364746, + "learning_rate": 1.5842251730466143e-06, + "loss": 0.0761, + "num_input_tokens_seen": 51221088, + "step": 75985 + }, + { + "epoch": 1.8564483424132119, + "grad_norm": 26.360881805419922, + "learning_rate": 1.5841559603378204e-06, + "loss": 0.1281, + "num_input_tokens_seen": 51224224, + "step": 75990 + }, + { + "epoch": 1.856570493245059, + "grad_norm": 0.0927048996090889, + "learning_rate": 1.584086743380887e-06, + "loss": 0.0007, + "num_input_tokens_seen": 51226976, + "step": 75995 + }, + { + "epoch": 1.8566926440769063, + "grad_norm": 24.040475845336914, + "learning_rate": 1.584017522176318e-06, + "loss": 0.1139, + "num_input_tokens_seen": 51230688, + "step": 76000 + }, + { + "epoch": 1.8568147949087535, + "grad_norm": 0.10228940844535828, + "learning_rate": 1.5839482967246162e-06, + "loss": 0.0939, + "num_input_tokens_seen": 51234464, + "step": 76005 + }, + { + "epoch": 1.8569369457406006, + "grad_norm": 11.87697982788086, + "learning_rate": 1.5838790670262853e-06, + "loss": 0.0364, + "num_input_tokens_seen": 51237728, + "step": 76010 + }, + { + "epoch": 1.8570590965724476, + "grad_norm": 88.89161682128906, + "learning_rate": 1.583809833081829e-06, + "loss": 0.0908, + "num_input_tokens_seen": 51240672, + "step": 76015 + }, + { + "epoch": 1.8571812474042948, + "grad_norm": 14.098811149597168, + "learning_rate": 1.5837405948917506e-06, + "loss": 0.0514, + "num_input_tokens_seen": 51243808, + "step": 76020 + }, + { + "epoch": 1.857303398236142, + "grad_norm": 0.14344164729118347, + "learning_rate": 1.5836713524565535e-06, + "loss": 0.0146, + "num_input_tokens_seen": 51247520, + "step": 76025 + }, + { + "epoch": 1.8574255490679892, + "grad_norm": 28.8133602142334, + "learning_rate": 1.583602105776741e-06, + "loss": 0.0418, + "num_input_tokens_seen": 51250912, + "step": 76030 + }, + { + "epoch": 1.8575476998998364, + "grad_norm": 1.3341981172561646, + "learning_rate": 1.5835328548528173e-06, + "loss": 0.1308, + "num_input_tokens_seen": 51255200, + "step": 76035 + }, + { + "epoch": 1.8576698507316833, + "grad_norm": 0.05615212023258209, + "learning_rate": 1.5834635996852858e-06, + "loss": 0.0411, + "num_input_tokens_seen": 51258464, + "step": 76040 + }, + { + "epoch": 1.8577920015635305, + "grad_norm": 23.91923713684082, + "learning_rate": 1.58339434027465e-06, + "loss": 0.0802, + "num_input_tokens_seen": 51261856, + "step": 76045 + }, + { + "epoch": 1.8579141523953777, + "grad_norm": 27.17865753173828, + "learning_rate": 1.583325076621414e-06, + "loss": 0.0297, + "num_input_tokens_seen": 51264864, + "step": 76050 + }, + { + "epoch": 1.858036303227225, + "grad_norm": 31.009824752807617, + "learning_rate": 1.5832558087260806e-06, + "loss": 0.0735, + "num_input_tokens_seen": 51267872, + "step": 76055 + }, + { + "epoch": 1.858158454059072, + "grad_norm": 0.057755716145038605, + "learning_rate": 1.5831865365891544e-06, + "loss": 0.0784, + "num_input_tokens_seen": 51270624, + "step": 76060 + }, + { + "epoch": 1.8582806048909193, + "grad_norm": 26.972583770751953, + "learning_rate": 1.5831172602111385e-06, + "loss": 0.0406, + "num_input_tokens_seen": 51274272, + "step": 76065 + }, + { + "epoch": 1.8584027557227665, + "grad_norm": 0.13842856884002686, + "learning_rate": 1.5830479795925372e-06, + "loss": 0.0465, + "num_input_tokens_seen": 51277664, + "step": 76070 + }, + { + "epoch": 1.8585249065546137, + "grad_norm": 0.19427655637264252, + "learning_rate": 1.5829786947338544e-06, + "loss": 0.2475, + "num_input_tokens_seen": 51281184, + "step": 76075 + }, + { + "epoch": 1.8586470573864609, + "grad_norm": 24.42550277709961, + "learning_rate": 1.5829094056355934e-06, + "loss": 0.1517, + "num_input_tokens_seen": 51284576, + "step": 76080 + }, + { + "epoch": 1.858769208218308, + "grad_norm": 0.07590050995349884, + "learning_rate": 1.5828401122982589e-06, + "loss": 0.127, + "num_input_tokens_seen": 51287904, + "step": 76085 + }, + { + "epoch": 1.8588913590501552, + "grad_norm": 55.993324279785156, + "learning_rate": 1.582770814722354e-06, + "loss": 0.1128, + "num_input_tokens_seen": 51291360, + "step": 76090 + }, + { + "epoch": 1.8590135098820024, + "grad_norm": 0.5621238350868225, + "learning_rate": 1.582701512908383e-06, + "loss": 0.1007, + "num_input_tokens_seen": 51295008, + "step": 76095 + }, + { + "epoch": 1.8591356607138496, + "grad_norm": 14.658206939697266, + "learning_rate": 1.5826322068568497e-06, + "loss": 0.0684, + "num_input_tokens_seen": 51298592, + "step": 76100 + }, + { + "epoch": 1.8592578115456966, + "grad_norm": 64.94924926757812, + "learning_rate": 1.5825628965682585e-06, + "loss": 0.1603, + "num_input_tokens_seen": 51302048, + "step": 76105 + }, + { + "epoch": 1.8593799623775438, + "grad_norm": 0.025940556079149246, + "learning_rate": 1.5824935820431132e-06, + "loss": 0.0359, + "num_input_tokens_seen": 51305568, + "step": 76110 + }, + { + "epoch": 1.859502113209391, + "grad_norm": 0.47116488218307495, + "learning_rate": 1.582424263281918e-06, + "loss": 0.0026, + "num_input_tokens_seen": 51308768, + "step": 76115 + }, + { + "epoch": 1.8596242640412382, + "grad_norm": 2.6873984336853027, + "learning_rate": 1.5823549402851768e-06, + "loss": 0.0414, + "num_input_tokens_seen": 51312096, + "step": 76120 + }, + { + "epoch": 1.8597464148730853, + "grad_norm": 0.17809846997261047, + "learning_rate": 1.5822856130533937e-06, + "loss": 0.0744, + "num_input_tokens_seen": 51315104, + "step": 76125 + }, + { + "epoch": 1.8598685657049323, + "grad_norm": 0.41667261719703674, + "learning_rate": 1.5822162815870734e-06, + "loss": 0.0508, + "num_input_tokens_seen": 51318432, + "step": 76130 + }, + { + "epoch": 1.8599907165367795, + "grad_norm": 11.715912818908691, + "learning_rate": 1.5821469458867194e-06, + "loss": 0.1015, + "num_input_tokens_seen": 51322528, + "step": 76135 + }, + { + "epoch": 1.8601128673686267, + "grad_norm": 0.06440366059541702, + "learning_rate": 1.5820776059528363e-06, + "loss": 0.1237, + "num_input_tokens_seen": 51325728, + "step": 76140 + }, + { + "epoch": 1.8602350182004739, + "grad_norm": 0.03905702009797096, + "learning_rate": 1.5820082617859283e-06, + "loss": 0.0658, + "num_input_tokens_seen": 51329120, + "step": 76145 + }, + { + "epoch": 1.860357169032321, + "grad_norm": 15.647366523742676, + "learning_rate": 1.5819389133864997e-06, + "loss": 0.1557, + "num_input_tokens_seen": 51332320, + "step": 76150 + }, + { + "epoch": 1.8604793198641683, + "grad_norm": 0.25174450874328613, + "learning_rate": 1.5818695607550544e-06, + "loss": 0.0885, + "num_input_tokens_seen": 51335264, + "step": 76155 + }, + { + "epoch": 1.8606014706960154, + "grad_norm": 0.3436072766780853, + "learning_rate": 1.5818002038920977e-06, + "loss": 0.0928, + "num_input_tokens_seen": 51338336, + "step": 76160 + }, + { + "epoch": 1.8607236215278626, + "grad_norm": 220.35096740722656, + "learning_rate": 1.5817308427981332e-06, + "loss": 0.1215, + "num_input_tokens_seen": 51341664, + "step": 76165 + }, + { + "epoch": 1.8608457723597098, + "grad_norm": 0.21717888116836548, + "learning_rate": 1.5816614774736656e-06, + "loss": 0.118, + "num_input_tokens_seen": 51344864, + "step": 76170 + }, + { + "epoch": 1.860967923191557, + "grad_norm": 0.24044664204120636, + "learning_rate": 1.5815921079191994e-06, + "loss": 0.0739, + "num_input_tokens_seen": 51347744, + "step": 76175 + }, + { + "epoch": 1.8610900740234042, + "grad_norm": 0.2545783519744873, + "learning_rate": 1.5815227341352389e-06, + "loss": 0.0379, + "num_input_tokens_seen": 51351136, + "step": 76180 + }, + { + "epoch": 1.8612122248552514, + "grad_norm": 0.3077756464481354, + "learning_rate": 1.5814533561222885e-06, + "loss": 0.0019, + "num_input_tokens_seen": 51354592, + "step": 76185 + }, + { + "epoch": 1.8613343756870986, + "grad_norm": 74.25428009033203, + "learning_rate": 1.581383973880853e-06, + "loss": 0.0707, + "num_input_tokens_seen": 51357728, + "step": 76190 + }, + { + "epoch": 1.8614565265189456, + "grad_norm": 0.5155385732650757, + "learning_rate": 1.5813145874114366e-06, + "loss": 0.1285, + "num_input_tokens_seen": 51360800, + "step": 76195 + }, + { + "epoch": 1.8615786773507927, + "grad_norm": 0.3103267252445221, + "learning_rate": 1.5812451967145445e-06, + "loss": 0.0436, + "num_input_tokens_seen": 51363680, + "step": 76200 + }, + { + "epoch": 1.86170082818264, + "grad_norm": 0.4102085530757904, + "learning_rate": 1.5811758017906809e-06, + "loss": 0.0765, + "num_input_tokens_seen": 51366880, + "step": 76205 + }, + { + "epoch": 1.8618229790144871, + "grad_norm": 0.8872631788253784, + "learning_rate": 1.5811064026403507e-06, + "loss": 0.0246, + "num_input_tokens_seen": 51370208, + "step": 76210 + }, + { + "epoch": 1.861945129846334, + "grad_norm": 14.930642127990723, + "learning_rate": 1.5810369992640583e-06, + "loss": 0.0803, + "num_input_tokens_seen": 51373344, + "step": 76215 + }, + { + "epoch": 1.8620672806781813, + "grad_norm": 13.179936408996582, + "learning_rate": 1.5809675916623087e-06, + "loss": 0.0507, + "num_input_tokens_seen": 51376608, + "step": 76220 + }, + { + "epoch": 1.8621894315100285, + "grad_norm": 0.05479566380381584, + "learning_rate": 1.5808981798356063e-06, + "loss": 0.057, + "num_input_tokens_seen": 51380128, + "step": 76225 + }, + { + "epoch": 1.8623115823418757, + "grad_norm": 0.8399147987365723, + "learning_rate": 1.5808287637844559e-06, + "loss": 0.002, + "num_input_tokens_seen": 51383712, + "step": 76230 + }, + { + "epoch": 1.8624337331737228, + "grad_norm": 36.5905647277832, + "learning_rate": 1.580759343509363e-06, + "loss": 0.087, + "num_input_tokens_seen": 51386720, + "step": 76235 + }, + { + "epoch": 1.86255588400557, + "grad_norm": 40.69601821899414, + "learning_rate": 1.5806899190108318e-06, + "loss": 0.1132, + "num_input_tokens_seen": 51389856, + "step": 76240 + }, + { + "epoch": 1.8626780348374172, + "grad_norm": 30.452613830566406, + "learning_rate": 1.5806204902893674e-06, + "loss": 0.093, + "num_input_tokens_seen": 51393056, + "step": 76245 + }, + { + "epoch": 1.8628001856692644, + "grad_norm": 22.763813018798828, + "learning_rate": 1.5805510573454744e-06, + "loss": 0.2589, + "num_input_tokens_seen": 51396128, + "step": 76250 + }, + { + "epoch": 1.8629223365011116, + "grad_norm": 10.09261417388916, + "learning_rate": 1.580481620179658e-06, + "loss": 0.0593, + "num_input_tokens_seen": 51399456, + "step": 76255 + }, + { + "epoch": 1.8630444873329588, + "grad_norm": 10.72971248626709, + "learning_rate": 1.580412178792423e-06, + "loss": 0.1307, + "num_input_tokens_seen": 51403168, + "step": 76260 + }, + { + "epoch": 1.863166638164806, + "grad_norm": 29.890047073364258, + "learning_rate": 1.5803427331842748e-06, + "loss": 0.1434, + "num_input_tokens_seen": 51406688, + "step": 76265 + }, + { + "epoch": 1.8632887889966532, + "grad_norm": 0.7407713532447815, + "learning_rate": 1.5802732833557182e-06, + "loss": 0.0023, + "num_input_tokens_seen": 51410272, + "step": 76270 + }, + { + "epoch": 1.8634109398285004, + "grad_norm": 18.422584533691406, + "learning_rate": 1.580203829307258e-06, + "loss": 0.111, + "num_input_tokens_seen": 51413984, + "step": 76275 + }, + { + "epoch": 1.8635330906603476, + "grad_norm": 0.01969938911497593, + "learning_rate": 1.5801343710393997e-06, + "loss": 0.0689, + "num_input_tokens_seen": 51417376, + "step": 76280 + }, + { + "epoch": 1.8636552414921945, + "grad_norm": 0.7834815979003906, + "learning_rate": 1.5800649085526478e-06, + "loss": 0.0344, + "num_input_tokens_seen": 51420704, + "step": 76285 + }, + { + "epoch": 1.8637773923240417, + "grad_norm": 11.597505569458008, + "learning_rate": 1.5799954418475081e-06, + "loss": 0.1379, + "num_input_tokens_seen": 51423904, + "step": 76290 + }, + { + "epoch": 1.863899543155889, + "grad_norm": 15.526674270629883, + "learning_rate": 1.579925970924486e-06, + "loss": 0.1141, + "num_input_tokens_seen": 51427296, + "step": 76295 + }, + { + "epoch": 1.864021693987736, + "grad_norm": 0.04713597893714905, + "learning_rate": 1.5798564957840856e-06, + "loss": 0.0843, + "num_input_tokens_seen": 51431328, + "step": 76300 + }, + { + "epoch": 1.864143844819583, + "grad_norm": 0.9391548037528992, + "learning_rate": 1.579787016426813e-06, + "loss": 0.0035, + "num_input_tokens_seen": 51434912, + "step": 76305 + }, + { + "epoch": 1.8642659956514303, + "grad_norm": 0.10538437217473984, + "learning_rate": 1.5797175328531733e-06, + "loss": 0.001, + "num_input_tokens_seen": 51438304, + "step": 76310 + }, + { + "epoch": 1.8643881464832774, + "grad_norm": 4.421464443206787, + "learning_rate": 1.5796480450636719e-06, + "loss": 0.0259, + "num_input_tokens_seen": 51441696, + "step": 76315 + }, + { + "epoch": 1.8645102973151246, + "grad_norm": 43.166439056396484, + "learning_rate": 1.5795785530588138e-06, + "loss": 0.1345, + "num_input_tokens_seen": 51445024, + "step": 76320 + }, + { + "epoch": 1.8646324481469718, + "grad_norm": 0.16540168225765228, + "learning_rate": 1.5795090568391048e-06, + "loss": 0.1112, + "num_input_tokens_seen": 51448352, + "step": 76325 + }, + { + "epoch": 1.864754598978819, + "grad_norm": 47.41411209106445, + "learning_rate": 1.5794395564050499e-06, + "loss": 0.0865, + "num_input_tokens_seen": 51451808, + "step": 76330 + }, + { + "epoch": 1.8648767498106662, + "grad_norm": 0.0872948095202446, + "learning_rate": 1.5793700517571547e-06, + "loss": 0.0862, + "num_input_tokens_seen": 51454688, + "step": 76335 + }, + { + "epoch": 1.8649989006425134, + "grad_norm": 10.607466697692871, + "learning_rate": 1.5793005428959245e-06, + "loss": 0.1357, + "num_input_tokens_seen": 51458272, + "step": 76340 + }, + { + "epoch": 1.8651210514743606, + "grad_norm": 3.705434560775757, + "learning_rate": 1.5792310298218651e-06, + "loss": 0.0021, + "num_input_tokens_seen": 51461536, + "step": 76345 + }, + { + "epoch": 1.8652432023062078, + "grad_norm": 0.37921032309532166, + "learning_rate": 1.579161512535482e-06, + "loss": 0.0451, + "num_input_tokens_seen": 51464608, + "step": 76350 + }, + { + "epoch": 1.865365353138055, + "grad_norm": 253.26100158691406, + "learning_rate": 1.5790919910372806e-06, + "loss": 0.1823, + "num_input_tokens_seen": 51467744, + "step": 76355 + }, + { + "epoch": 1.8654875039699021, + "grad_norm": 18.376888275146484, + "learning_rate": 1.579022465327766e-06, + "loss": 0.0786, + "num_input_tokens_seen": 51470752, + "step": 76360 + }, + { + "epoch": 1.8656096548017493, + "grad_norm": 0.1428607702255249, + "learning_rate": 1.578952935407445e-06, + "loss": 0.0775, + "num_input_tokens_seen": 51473760, + "step": 76365 + }, + { + "epoch": 1.8657318056335965, + "grad_norm": 0.2554032802581787, + "learning_rate": 1.578883401276822e-06, + "loss": 0.0017, + "num_input_tokens_seen": 51476832, + "step": 76370 + }, + { + "epoch": 1.8658539564654435, + "grad_norm": 27.811237335205078, + "learning_rate": 1.5788138629364033e-06, + "loss": 0.1384, + "num_input_tokens_seen": 51480224, + "step": 76375 + }, + { + "epoch": 1.8659761072972907, + "grad_norm": 41.41610336303711, + "learning_rate": 1.5787443203866947e-06, + "loss": 0.0977, + "num_input_tokens_seen": 51483360, + "step": 76380 + }, + { + "epoch": 1.8660982581291379, + "grad_norm": 0.9137465953826904, + "learning_rate": 1.5786747736282019e-06, + "loss": 0.0297, + "num_input_tokens_seen": 51486624, + "step": 76385 + }, + { + "epoch": 1.866220408960985, + "grad_norm": 0.2629835903644562, + "learning_rate": 1.5786052226614301e-06, + "loss": 0.1164, + "num_input_tokens_seen": 51489952, + "step": 76390 + }, + { + "epoch": 1.866342559792832, + "grad_norm": 2.6729328632354736, + "learning_rate": 1.5785356674868857e-06, + "loss": 0.0587, + "num_input_tokens_seen": 51493536, + "step": 76395 + }, + { + "epoch": 1.8664647106246792, + "grad_norm": 0.1776157021522522, + "learning_rate": 1.5784661081050743e-06, + "loss": 0.0442, + "num_input_tokens_seen": 51496672, + "step": 76400 + }, + { + "epoch": 1.8665868614565264, + "grad_norm": 0.07628049701452255, + "learning_rate": 1.5783965445165018e-06, + "loss": 0.1155, + "num_input_tokens_seen": 51499872, + "step": 76405 + }, + { + "epoch": 1.8667090122883736, + "grad_norm": 30.831989288330078, + "learning_rate": 1.5783269767216738e-06, + "loss": 0.1668, + "num_input_tokens_seen": 51503968, + "step": 76410 + }, + { + "epoch": 1.8668311631202208, + "grad_norm": 18.025569915771484, + "learning_rate": 1.5782574047210968e-06, + "loss": 0.1493, + "num_input_tokens_seen": 51507232, + "step": 76415 + }, + { + "epoch": 1.866953313952068, + "grad_norm": 0.03697647899389267, + "learning_rate": 1.5781878285152765e-06, + "loss": 0.0012, + "num_input_tokens_seen": 51510752, + "step": 76420 + }, + { + "epoch": 1.8670754647839152, + "grad_norm": 0.6512537598609924, + "learning_rate": 1.5781182481047184e-06, + "loss": 0.0636, + "num_input_tokens_seen": 51513760, + "step": 76425 + }, + { + "epoch": 1.8671976156157624, + "grad_norm": 0.23654574155807495, + "learning_rate": 1.5780486634899291e-06, + "loss": 0.0952, + "num_input_tokens_seen": 51517408, + "step": 76430 + }, + { + "epoch": 1.8673197664476096, + "grad_norm": 41.54343795776367, + "learning_rate": 1.5779790746714145e-06, + "loss": 0.1567, + "num_input_tokens_seen": 51520736, + "step": 76435 + }, + { + "epoch": 1.8674419172794567, + "grad_norm": 0.069539874792099, + "learning_rate": 1.5779094816496806e-06, + "loss": 0.0368, + "num_input_tokens_seen": 51524192, + "step": 76440 + }, + { + "epoch": 1.867564068111304, + "grad_norm": 8.968692779541016, + "learning_rate": 1.5778398844252334e-06, + "loss": 0.1951, + "num_input_tokens_seen": 51527904, + "step": 76445 + }, + { + "epoch": 1.8676862189431511, + "grad_norm": 0.03013712167739868, + "learning_rate": 1.5777702829985794e-06, + "loss": 0.0265, + "num_input_tokens_seen": 51531296, + "step": 76450 + }, + { + "epoch": 1.8678083697749983, + "grad_norm": 0.4158450961112976, + "learning_rate": 1.577700677370224e-06, + "loss": 0.0533, + "num_input_tokens_seen": 51534496, + "step": 76455 + }, + { + "epoch": 1.8679305206068453, + "grad_norm": 0.07961289584636688, + "learning_rate": 1.5776310675406743e-06, + "loss": 0.1515, + "num_input_tokens_seen": 51538144, + "step": 76460 + }, + { + "epoch": 1.8680526714386925, + "grad_norm": 0.5942521691322327, + "learning_rate": 1.577561453510436e-06, + "loss": 0.0012, + "num_input_tokens_seen": 51541536, + "step": 76465 + }, + { + "epoch": 1.8681748222705397, + "grad_norm": 114.15911865234375, + "learning_rate": 1.5774918352800156e-06, + "loss": 0.0686, + "num_input_tokens_seen": 51545376, + "step": 76470 + }, + { + "epoch": 1.8682969731023868, + "grad_norm": 0.8016293048858643, + "learning_rate": 1.5774222128499188e-06, + "loss": 0.0261, + "num_input_tokens_seen": 51549152, + "step": 76475 + }, + { + "epoch": 1.868419123934234, + "grad_norm": 0.21990229189395905, + "learning_rate": 1.5773525862206528e-06, + "loss": 0.0014, + "num_input_tokens_seen": 51552736, + "step": 76480 + }, + { + "epoch": 1.868541274766081, + "grad_norm": 32.91740798950195, + "learning_rate": 1.5772829553927235e-06, + "loss": 0.078, + "num_input_tokens_seen": 51556576, + "step": 76485 + }, + { + "epoch": 1.8686634255979282, + "grad_norm": 0.42278850078582764, + "learning_rate": 1.577213320366637e-06, + "loss": 0.0085, + "num_input_tokens_seen": 51559776, + "step": 76490 + }, + { + "epoch": 1.8687855764297754, + "grad_norm": 0.015430174767971039, + "learning_rate": 1.5771436811429002e-06, + "loss": 0.0239, + "num_input_tokens_seen": 51562912, + "step": 76495 + }, + { + "epoch": 1.8689077272616226, + "grad_norm": 34.49660110473633, + "learning_rate": 1.5770740377220192e-06, + "loss": 0.1157, + "num_input_tokens_seen": 51566368, + "step": 76500 + }, + { + "epoch": 1.8690298780934698, + "grad_norm": 0.05728043243288994, + "learning_rate": 1.5770043901045007e-06, + "loss": 0.0966, + "num_input_tokens_seen": 51569952, + "step": 76505 + }, + { + "epoch": 1.869152028925317, + "grad_norm": 7.740024566650391, + "learning_rate": 1.5769347382908511e-06, + "loss": 0.2293, + "num_input_tokens_seen": 51573024, + "step": 76510 + }, + { + "epoch": 1.8692741797571641, + "grad_norm": 93.66651153564453, + "learning_rate": 1.5768650822815767e-06, + "loss": 0.1675, + "num_input_tokens_seen": 51576544, + "step": 76515 + }, + { + "epoch": 1.8693963305890113, + "grad_norm": 20.219310760498047, + "learning_rate": 1.5767954220771844e-06, + "loss": 0.0407, + "num_input_tokens_seen": 51579616, + "step": 76520 + }, + { + "epoch": 1.8695184814208585, + "grad_norm": 0.25965866446495056, + "learning_rate": 1.5767257576781808e-06, + "loss": 0.1259, + "num_input_tokens_seen": 51583136, + "step": 76525 + }, + { + "epoch": 1.8696406322527057, + "grad_norm": 0.3182509243488312, + "learning_rate": 1.576656089085072e-06, + "loss": 0.0756, + "num_input_tokens_seen": 51586400, + "step": 76530 + }, + { + "epoch": 1.869762783084553, + "grad_norm": 6.768677711486816, + "learning_rate": 1.5765864162983654e-06, + "loss": 0.1691, + "num_input_tokens_seen": 51589600, + "step": 76535 + }, + { + "epoch": 1.8698849339164, + "grad_norm": 2.752490758895874, + "learning_rate": 1.576516739318567e-06, + "loss": 0.1084, + "num_input_tokens_seen": 51592800, + "step": 76540 + }, + { + "epoch": 1.8700070847482473, + "grad_norm": 0.23286566138267517, + "learning_rate": 1.5764470581461842e-06, + "loss": 0.0868, + "num_input_tokens_seen": 51596128, + "step": 76545 + }, + { + "epoch": 1.8701292355800943, + "grad_norm": 0.8963605165481567, + "learning_rate": 1.576377372781723e-06, + "loss": 0.1062, + "num_input_tokens_seen": 51599392, + "step": 76550 + }, + { + "epoch": 1.8702513864119414, + "grad_norm": 17.253986358642578, + "learning_rate": 1.5763076832256905e-06, + "loss": 0.0709, + "num_input_tokens_seen": 51603104, + "step": 76555 + }, + { + "epoch": 1.8703735372437886, + "grad_norm": 0.5030640959739685, + "learning_rate": 1.5762379894785938e-06, + "loss": 0.0825, + "num_input_tokens_seen": 51606304, + "step": 76560 + }, + { + "epoch": 1.8704956880756358, + "grad_norm": 0.284263014793396, + "learning_rate": 1.5761682915409389e-06, + "loss": 0.0386, + "num_input_tokens_seen": 51609632, + "step": 76565 + }, + { + "epoch": 1.870617838907483, + "grad_norm": 0.16007032990455627, + "learning_rate": 1.5760985894132336e-06, + "loss": 0.0324, + "num_input_tokens_seen": 51612768, + "step": 76570 + }, + { + "epoch": 1.87073998973933, + "grad_norm": 0.1427338570356369, + "learning_rate": 1.5760288830959846e-06, + "loss": 0.0845, + "num_input_tokens_seen": 51615968, + "step": 76575 + }, + { + "epoch": 1.8708621405711772, + "grad_norm": 0.4901255667209625, + "learning_rate": 1.5759591725896986e-06, + "loss": 0.0019, + "num_input_tokens_seen": 51619168, + "step": 76580 + }, + { + "epoch": 1.8709842914030244, + "grad_norm": 0.03649323433637619, + "learning_rate": 1.5758894578948823e-06, + "loss": 0.047, + "num_input_tokens_seen": 51622624, + "step": 76585 + }, + { + "epoch": 1.8711064422348715, + "grad_norm": 20.135515213012695, + "learning_rate": 1.575819739012043e-06, + "loss": 0.102, + "num_input_tokens_seen": 51626016, + "step": 76590 + }, + { + "epoch": 1.8712285930667187, + "grad_norm": 0.1097368523478508, + "learning_rate": 1.5757500159416877e-06, + "loss": 0.0222, + "num_input_tokens_seen": 51629408, + "step": 76595 + }, + { + "epoch": 1.871350743898566, + "grad_norm": 0.6177487969398499, + "learning_rate": 1.5756802886843237e-06, + "loss": 0.072, + "num_input_tokens_seen": 51632864, + "step": 76600 + }, + { + "epoch": 1.8714728947304131, + "grad_norm": 48.99726486206055, + "learning_rate": 1.5756105572404575e-06, + "loss": 0.0833, + "num_input_tokens_seen": 51635936, + "step": 76605 + }, + { + "epoch": 1.8715950455622603, + "grad_norm": 0.3567311763763428, + "learning_rate": 1.5755408216105966e-06, + "loss": 0.1141, + "num_input_tokens_seen": 51639520, + "step": 76610 + }, + { + "epoch": 1.8717171963941075, + "grad_norm": 0.19925457239151, + "learning_rate": 1.5754710817952481e-06, + "loss": 0.1466, + "num_input_tokens_seen": 51642656, + "step": 76615 + }, + { + "epoch": 1.8718393472259547, + "grad_norm": 0.13557025790214539, + "learning_rate": 1.5754013377949189e-06, + "loss": 0.1726, + "num_input_tokens_seen": 51646368, + "step": 76620 + }, + { + "epoch": 1.8719614980578019, + "grad_norm": 172.94630432128906, + "learning_rate": 1.5753315896101165e-06, + "loss": 0.0896, + "num_input_tokens_seen": 51649632, + "step": 76625 + }, + { + "epoch": 1.872083648889649, + "grad_norm": 14.283888816833496, + "learning_rate": 1.575261837241348e-06, + "loss": 0.0933, + "num_input_tokens_seen": 51654624, + "step": 76630 + }, + { + "epoch": 1.8722057997214963, + "grad_norm": 0.4296637177467346, + "learning_rate": 1.575192080689121e-06, + "loss": 0.0481, + "num_input_tokens_seen": 51658208, + "step": 76635 + }, + { + "epoch": 1.8723279505533432, + "grad_norm": 12.624166488647461, + "learning_rate": 1.5751223199539422e-06, + "loss": 0.1073, + "num_input_tokens_seen": 51661344, + "step": 76640 + }, + { + "epoch": 1.8724501013851904, + "grad_norm": 0.1500295251607895, + "learning_rate": 1.5750525550363192e-06, + "loss": 0.0008, + "num_input_tokens_seen": 51664608, + "step": 76645 + }, + { + "epoch": 1.8725722522170376, + "grad_norm": 0.16735275089740753, + "learning_rate": 1.5749827859367594e-06, + "loss": 0.0017, + "num_input_tokens_seen": 51668192, + "step": 76650 + }, + { + "epoch": 1.8726944030488848, + "grad_norm": 134.92787170410156, + "learning_rate": 1.57491301265577e-06, + "loss": 0.1194, + "num_input_tokens_seen": 51672096, + "step": 76655 + }, + { + "epoch": 1.872816553880732, + "grad_norm": 0.06441762298345566, + "learning_rate": 1.5748432351938587e-06, + "loss": 0.0222, + "num_input_tokens_seen": 51675552, + "step": 76660 + }, + { + "epoch": 1.872938704712579, + "grad_norm": 0.14489486813545227, + "learning_rate": 1.5747734535515327e-06, + "loss": 0.0675, + "num_input_tokens_seen": 51679264, + "step": 76665 + }, + { + "epoch": 1.8730608555444261, + "grad_norm": 38.53899002075195, + "learning_rate": 1.5747036677292998e-06, + "loss": 0.0698, + "num_input_tokens_seen": 51682464, + "step": 76670 + }, + { + "epoch": 1.8731830063762733, + "grad_norm": 0.45673030614852905, + "learning_rate": 1.5746338777276668e-06, + "loss": 0.1214, + "num_input_tokens_seen": 51685600, + "step": 76675 + }, + { + "epoch": 1.8733051572081205, + "grad_norm": 0.7562420964241028, + "learning_rate": 1.5745640835471422e-06, + "loss": 0.0604, + "num_input_tokens_seen": 51688992, + "step": 76680 + }, + { + "epoch": 1.8734273080399677, + "grad_norm": 0.14636746048927307, + "learning_rate": 1.5744942851882326e-06, + "loss": 0.0427, + "num_input_tokens_seen": 51692128, + "step": 76685 + }, + { + "epoch": 1.873549458871815, + "grad_norm": 0.37488046288490295, + "learning_rate": 1.5744244826514463e-06, + "loss": 0.1883, + "num_input_tokens_seen": 51695520, + "step": 76690 + }, + { + "epoch": 1.873671609703662, + "grad_norm": 0.9592730402946472, + "learning_rate": 1.5743546759372906e-06, + "loss": 0.0018, + "num_input_tokens_seen": 51698976, + "step": 76695 + }, + { + "epoch": 1.8737937605355093, + "grad_norm": 13.799247741699219, + "learning_rate": 1.5742848650462731e-06, + "loss": 0.1953, + "num_input_tokens_seen": 51702368, + "step": 76700 + }, + { + "epoch": 1.8739159113673565, + "grad_norm": 133.1254119873047, + "learning_rate": 1.574215049978902e-06, + "loss": 0.2132, + "num_input_tokens_seen": 51706208, + "step": 76705 + }, + { + "epoch": 1.8740380621992037, + "grad_norm": 0.19949565827846527, + "learning_rate": 1.5741452307356842e-06, + "loss": 0.0495, + "num_input_tokens_seen": 51709408, + "step": 76710 + }, + { + "epoch": 1.8741602130310508, + "grad_norm": 0.18503406643867493, + "learning_rate": 1.574075407317128e-06, + "loss": 0.0024, + "num_input_tokens_seen": 51712864, + "step": 76715 + }, + { + "epoch": 1.874282363862898, + "grad_norm": 0.4176699221134186, + "learning_rate": 1.5740055797237408e-06, + "loss": 0.0219, + "num_input_tokens_seen": 51715936, + "step": 76720 + }, + { + "epoch": 1.8744045146947452, + "grad_norm": 143.21966552734375, + "learning_rate": 1.573935747956031e-06, + "loss": 0.0642, + "num_input_tokens_seen": 51719136, + "step": 76725 + }, + { + "epoch": 1.8745266655265922, + "grad_norm": 0.2589048743247986, + "learning_rate": 1.5738659120145057e-06, + "loss": 0.1189, + "num_input_tokens_seen": 51722592, + "step": 76730 + }, + { + "epoch": 1.8746488163584394, + "grad_norm": 0.07927648723125458, + "learning_rate": 1.5737960718996734e-06, + "loss": 0.1064, + "num_input_tokens_seen": 51725792, + "step": 76735 + }, + { + "epoch": 1.8747709671902866, + "grad_norm": 13.073819160461426, + "learning_rate": 1.5737262276120417e-06, + "loss": 0.0906, + "num_input_tokens_seen": 51729696, + "step": 76740 + }, + { + "epoch": 1.8748931180221338, + "grad_norm": 0.2299795001745224, + "learning_rate": 1.5736563791521188e-06, + "loss": 0.0356, + "num_input_tokens_seen": 51733024, + "step": 76745 + }, + { + "epoch": 1.8750152688539807, + "grad_norm": 0.06773748993873596, + "learning_rate": 1.5735865265204118e-06, + "loss": 0.0008, + "num_input_tokens_seen": 51737312, + "step": 76750 + }, + { + "epoch": 1.875137419685828, + "grad_norm": 0.18835890293121338, + "learning_rate": 1.5735166697174296e-06, + "loss": 0.1585, + "num_input_tokens_seen": 51740448, + "step": 76755 + }, + { + "epoch": 1.8752595705176751, + "grad_norm": 0.07563555985689163, + "learning_rate": 1.5734468087436801e-06, + "loss": 0.0831, + "num_input_tokens_seen": 51743712, + "step": 76760 + }, + { + "epoch": 1.8753817213495223, + "grad_norm": 3.672663927078247, + "learning_rate": 1.573376943599671e-06, + "loss": 0.0483, + "num_input_tokens_seen": 51747360, + "step": 76765 + }, + { + "epoch": 1.8755038721813695, + "grad_norm": 0.13904969394207, + "learning_rate": 1.5733070742859105e-06, + "loss": 0.0009, + "num_input_tokens_seen": 51750560, + "step": 76770 + }, + { + "epoch": 1.8756260230132167, + "grad_norm": 0.7363821864128113, + "learning_rate": 1.5732372008029069e-06, + "loss": 0.0011, + "num_input_tokens_seen": 51754144, + "step": 76775 + }, + { + "epoch": 1.8757481738450639, + "grad_norm": 0.575900673866272, + "learning_rate": 1.5731673231511683e-06, + "loss": 0.0656, + "num_input_tokens_seen": 51757728, + "step": 76780 + }, + { + "epoch": 1.875870324676911, + "grad_norm": 0.02883243001997471, + "learning_rate": 1.5730974413312023e-06, + "loss": 0.1484, + "num_input_tokens_seen": 51761440, + "step": 76785 + }, + { + "epoch": 1.8759924755087583, + "grad_norm": 25.203136444091797, + "learning_rate": 1.573027555343518e-06, + "loss": 0.0709, + "num_input_tokens_seen": 51765088, + "step": 76790 + }, + { + "epoch": 1.8761146263406054, + "grad_norm": 0.15883779525756836, + "learning_rate": 1.5729576651886229e-06, + "loss": 0.0387, + "num_input_tokens_seen": 51769056, + "step": 76795 + }, + { + "epoch": 1.8762367771724526, + "grad_norm": 17.070417404174805, + "learning_rate": 1.5728877708670258e-06, + "loss": 0.117, + "num_input_tokens_seen": 51772384, + "step": 76800 + }, + { + "epoch": 1.8763589280042998, + "grad_norm": 3.9865686893463135, + "learning_rate": 1.5728178723792347e-06, + "loss": 0.0016, + "num_input_tokens_seen": 51775520, + "step": 76805 + }, + { + "epoch": 1.876481078836147, + "grad_norm": 47.375892639160156, + "learning_rate": 1.5727479697257578e-06, + "loss": 0.2674, + "num_input_tokens_seen": 51778720, + "step": 76810 + }, + { + "epoch": 1.8766032296679942, + "grad_norm": 8.242105484008789, + "learning_rate": 1.5726780629071037e-06, + "loss": 0.1556, + "num_input_tokens_seen": 51781920, + "step": 76815 + }, + { + "epoch": 1.8767253804998412, + "grad_norm": 0.23152075707912445, + "learning_rate": 1.572608151923781e-06, + "loss": 0.0942, + "num_input_tokens_seen": 51784992, + "step": 76820 + }, + { + "epoch": 1.8768475313316884, + "grad_norm": 0.5574703216552734, + "learning_rate": 1.5725382367762972e-06, + "loss": 0.0206, + "num_input_tokens_seen": 51788576, + "step": 76825 + }, + { + "epoch": 1.8769696821635355, + "grad_norm": 136.82997131347656, + "learning_rate": 1.5724683174651616e-06, + "loss": 0.021, + "num_input_tokens_seen": 51791968, + "step": 76830 + }, + { + "epoch": 1.8770918329953827, + "grad_norm": 0.15793973207473755, + "learning_rate": 1.5723983939908826e-06, + "loss": 0.0357, + "num_input_tokens_seen": 51795616, + "step": 76835 + }, + { + "epoch": 1.8772139838272297, + "grad_norm": 0.1076064333319664, + "learning_rate": 1.5723284663539684e-06, + "loss": 0.0541, + "num_input_tokens_seen": 51798688, + "step": 76840 + }, + { + "epoch": 1.877336134659077, + "grad_norm": 0.09073779731988907, + "learning_rate": 1.5722585345549276e-06, + "loss": 0.2528, + "num_input_tokens_seen": 51801632, + "step": 76845 + }, + { + "epoch": 1.877458285490924, + "grad_norm": 0.10438819974660873, + "learning_rate": 1.5721885985942689e-06, + "loss": 0.0017, + "num_input_tokens_seen": 51805024, + "step": 76850 + }, + { + "epoch": 1.8775804363227713, + "grad_norm": 0.3387993574142456, + "learning_rate": 1.5721186584725007e-06, + "loss": 0.0908, + "num_input_tokens_seen": 51808224, + "step": 76855 + }, + { + "epoch": 1.8777025871546185, + "grad_norm": 0.10911522060632706, + "learning_rate": 1.572048714190132e-06, + "loss": 0.0377, + "num_input_tokens_seen": 51812256, + "step": 76860 + }, + { + "epoch": 1.8778247379864657, + "grad_norm": 0.5376071929931641, + "learning_rate": 1.571978765747671e-06, + "loss": 0.1122, + "num_input_tokens_seen": 51815584, + "step": 76865 + }, + { + "epoch": 1.8779468888183128, + "grad_norm": 20.533918380737305, + "learning_rate": 1.5719088131456264e-06, + "loss": 0.1408, + "num_input_tokens_seen": 51818848, + "step": 76870 + }, + { + "epoch": 1.87806903965016, + "grad_norm": 14.685124397277832, + "learning_rate": 1.5718388563845073e-06, + "loss": 0.0538, + "num_input_tokens_seen": 51822368, + "step": 76875 + }, + { + "epoch": 1.8781911904820072, + "grad_norm": 0.15065699815750122, + "learning_rate": 1.5717688954648223e-06, + "loss": 0.0606, + "num_input_tokens_seen": 51826016, + "step": 76880 + }, + { + "epoch": 1.8783133413138544, + "grad_norm": 0.8592997193336487, + "learning_rate": 1.5716989303870797e-06, + "loss": 0.093, + "num_input_tokens_seen": 51829856, + "step": 76885 + }, + { + "epoch": 1.8784354921457016, + "grad_norm": 0.5073649287223816, + "learning_rate": 1.5716289611517892e-06, + "loss": 0.0452, + "num_input_tokens_seen": 51833056, + "step": 76890 + }, + { + "epoch": 1.8785576429775488, + "grad_norm": 176.64024353027344, + "learning_rate": 1.571558987759459e-06, + "loss": 0.0483, + "num_input_tokens_seen": 51836704, + "step": 76895 + }, + { + "epoch": 1.878679793809396, + "grad_norm": 18.90421485900879, + "learning_rate": 1.5714890102105983e-06, + "loss": 0.1351, + "num_input_tokens_seen": 51839392, + "step": 76900 + }, + { + "epoch": 1.8788019446412432, + "grad_norm": 0.08553940802812576, + "learning_rate": 1.5714190285057152e-06, + "loss": 0.0565, + "num_input_tokens_seen": 51843296, + "step": 76905 + }, + { + "epoch": 1.8789240954730901, + "grad_norm": 233.33682250976562, + "learning_rate": 1.5713490426453198e-06, + "loss": 0.009, + "num_input_tokens_seen": 51846624, + "step": 76910 + }, + { + "epoch": 1.8790462463049373, + "grad_norm": 130.19361877441406, + "learning_rate": 1.5712790526299203e-06, + "loss": 0.2575, + "num_input_tokens_seen": 51849888, + "step": 76915 + }, + { + "epoch": 1.8791683971367845, + "grad_norm": 1.1826436519622803, + "learning_rate": 1.5712090584600256e-06, + "loss": 0.1942, + "num_input_tokens_seen": 51853728, + "step": 76920 + }, + { + "epoch": 1.8792905479686317, + "grad_norm": 0.44003385305404663, + "learning_rate": 1.5711390601361454e-06, + "loss": 0.0867, + "num_input_tokens_seen": 51857440, + "step": 76925 + }, + { + "epoch": 1.8794126988004787, + "grad_norm": 0.6665119528770447, + "learning_rate": 1.5710690576587883e-06, + "loss": 0.0122, + "num_input_tokens_seen": 51860960, + "step": 76930 + }, + { + "epoch": 1.8795348496323259, + "grad_norm": 0.19465108215808868, + "learning_rate": 1.5709990510284632e-06, + "loss": 0.0533, + "num_input_tokens_seen": 51864480, + "step": 76935 + }, + { + "epoch": 1.879657000464173, + "grad_norm": 0.41062381863594055, + "learning_rate": 1.5709290402456795e-06, + "loss": 0.0375, + "num_input_tokens_seen": 51867552, + "step": 76940 + }, + { + "epoch": 1.8797791512960202, + "grad_norm": 0.5571926832199097, + "learning_rate": 1.5708590253109462e-06, + "loss": 0.041, + "num_input_tokens_seen": 51870816, + "step": 76945 + }, + { + "epoch": 1.8799013021278674, + "grad_norm": 2.615623712539673, + "learning_rate": 1.5707890062247727e-06, + "loss": 0.0976, + "num_input_tokens_seen": 51874080, + "step": 76950 + }, + { + "epoch": 1.8800234529597146, + "grad_norm": 0.13177724182605743, + "learning_rate": 1.5707189829876678e-06, + "loss": 0.0668, + "num_input_tokens_seen": 51877600, + "step": 76955 + }, + { + "epoch": 1.8801456037915618, + "grad_norm": 0.5526846647262573, + "learning_rate": 1.5706489556001411e-06, + "loss": 0.0864, + "num_input_tokens_seen": 51881376, + "step": 76960 + }, + { + "epoch": 1.880267754623409, + "grad_norm": 0.015390805900096893, + "learning_rate": 1.5705789240627017e-06, + "loss": 0.106, + "num_input_tokens_seen": 51884576, + "step": 76965 + }, + { + "epoch": 1.8803899054552562, + "grad_norm": 0.27080556750297546, + "learning_rate": 1.570508888375859e-06, + "loss": 0.0831, + "num_input_tokens_seen": 51887776, + "step": 76970 + }, + { + "epoch": 1.8805120562871034, + "grad_norm": 0.2317759245634079, + "learning_rate": 1.5704388485401221e-06, + "loss": 0.0864, + "num_input_tokens_seen": 51890976, + "step": 76975 + }, + { + "epoch": 1.8806342071189506, + "grad_norm": 0.05991874635219574, + "learning_rate": 1.5703688045560004e-06, + "loss": 0.0714, + "num_input_tokens_seen": 51894368, + "step": 76980 + }, + { + "epoch": 1.8807563579507978, + "grad_norm": 181.63429260253906, + "learning_rate": 1.5702987564240035e-06, + "loss": 0.0956, + "num_input_tokens_seen": 51897824, + "step": 76985 + }, + { + "epoch": 1.880878508782645, + "grad_norm": 13.823509216308594, + "learning_rate": 1.5702287041446406e-06, + "loss": 0.0912, + "num_input_tokens_seen": 51901280, + "step": 76990 + }, + { + "epoch": 1.881000659614492, + "grad_norm": 34.22384262084961, + "learning_rate": 1.5701586477184212e-06, + "loss": 0.1558, + "num_input_tokens_seen": 51904800, + "step": 76995 + }, + { + "epoch": 1.881122810446339, + "grad_norm": 15.785733222961426, + "learning_rate": 1.5700885871458546e-06, + "loss": 0.258, + "num_input_tokens_seen": 51908192, + "step": 77000 + }, + { + "epoch": 1.8812449612781863, + "grad_norm": 0.17888937890529633, + "learning_rate": 1.5700185224274504e-06, + "loss": 0.085, + "num_input_tokens_seen": 51911712, + "step": 77005 + }, + { + "epoch": 1.8813671121100335, + "grad_norm": 2.345381736755371, + "learning_rate": 1.5699484535637183e-06, + "loss": 0.0468, + "num_input_tokens_seen": 51915104, + "step": 77010 + }, + { + "epoch": 1.8814892629418807, + "grad_norm": 16.388687133789062, + "learning_rate": 1.5698783805551682e-06, + "loss": 0.1079, + "num_input_tokens_seen": 51918176, + "step": 77015 + }, + { + "epoch": 1.8816114137737276, + "grad_norm": 0.1745808720588684, + "learning_rate": 1.5698083034023086e-06, + "loss": 0.1028, + "num_input_tokens_seen": 51921184, + "step": 77020 + }, + { + "epoch": 1.8817335646055748, + "grad_norm": 2.841417074203491, + "learning_rate": 1.5697382221056501e-06, + "loss": 0.0429, + "num_input_tokens_seen": 51925216, + "step": 77025 + }, + { + "epoch": 1.881855715437422, + "grad_norm": 55.37129592895508, + "learning_rate": 1.5696681366657018e-06, + "loss": 0.0533, + "num_input_tokens_seen": 51928224, + "step": 77030 + }, + { + "epoch": 1.8819778662692692, + "grad_norm": 76.16033935546875, + "learning_rate": 1.5695980470829736e-06, + "loss": 0.0829, + "num_input_tokens_seen": 51931872, + "step": 77035 + }, + { + "epoch": 1.8821000171011164, + "grad_norm": 0.11781585216522217, + "learning_rate": 1.5695279533579754e-06, + "loss": 0.0499, + "num_input_tokens_seen": 51935136, + "step": 77040 + }, + { + "epoch": 1.8822221679329636, + "grad_norm": 4.420412063598633, + "learning_rate": 1.5694578554912167e-06, + "loss": 0.0416, + "num_input_tokens_seen": 51938592, + "step": 77045 + }, + { + "epoch": 1.8823443187648108, + "grad_norm": 10.299237251281738, + "learning_rate": 1.5693877534832072e-06, + "loss": 0.1148, + "num_input_tokens_seen": 51942048, + "step": 77050 + }, + { + "epoch": 1.882466469596658, + "grad_norm": 0.2998434901237488, + "learning_rate": 1.569317647334457e-06, + "loss": 0.0878, + "num_input_tokens_seen": 51945376, + "step": 77055 + }, + { + "epoch": 1.8825886204285052, + "grad_norm": 0.10223901271820068, + "learning_rate": 1.5692475370454754e-06, + "loss": 0.0298, + "num_input_tokens_seen": 51948512, + "step": 77060 + }, + { + "epoch": 1.8827107712603524, + "grad_norm": 0.3003741502761841, + "learning_rate": 1.569177422616773e-06, + "loss": 0.3041, + "num_input_tokens_seen": 51951840, + "step": 77065 + }, + { + "epoch": 1.8828329220921995, + "grad_norm": 3.530810832977295, + "learning_rate": 1.569107304048859e-06, + "loss": 0.0568, + "num_input_tokens_seen": 51955168, + "step": 77070 + }, + { + "epoch": 1.8829550729240467, + "grad_norm": 0.7776597738265991, + "learning_rate": 1.5690371813422437e-06, + "loss": 0.0458, + "num_input_tokens_seen": 51958624, + "step": 77075 + }, + { + "epoch": 1.883077223755894, + "grad_norm": 76.65692138671875, + "learning_rate": 1.5689670544974369e-06, + "loss": 0.1906, + "num_input_tokens_seen": 51962080, + "step": 77080 + }, + { + "epoch": 1.883199374587741, + "grad_norm": 0.09555058926343918, + "learning_rate": 1.5688969235149487e-06, + "loss": 0.0247, + "num_input_tokens_seen": 51965472, + "step": 77085 + }, + { + "epoch": 1.883321525419588, + "grad_norm": 0.4007159471511841, + "learning_rate": 1.568826788395289e-06, + "loss": 0.0012, + "num_input_tokens_seen": 51968928, + "step": 77090 + }, + { + "epoch": 1.8834436762514353, + "grad_norm": 0.09671560674905777, + "learning_rate": 1.568756649138968e-06, + "loss": 0.0317, + "num_input_tokens_seen": 51972000, + "step": 77095 + }, + { + "epoch": 1.8835658270832825, + "grad_norm": 9.398812294006348, + "learning_rate": 1.5686865057464958e-06, + "loss": 0.0484, + "num_input_tokens_seen": 51975648, + "step": 77100 + }, + { + "epoch": 1.8836879779151297, + "grad_norm": 17.455774307250977, + "learning_rate": 1.568616358218382e-06, + "loss": 0.1971, + "num_input_tokens_seen": 51978528, + "step": 77105 + }, + { + "epoch": 1.8838101287469766, + "grad_norm": 11.775774955749512, + "learning_rate": 1.5685462065551373e-06, + "loss": 0.183, + "num_input_tokens_seen": 51981728, + "step": 77110 + }, + { + "epoch": 1.8839322795788238, + "grad_norm": 0.1156628429889679, + "learning_rate": 1.5684760507572716e-06, + "loss": 0.0013, + "num_input_tokens_seen": 51985184, + "step": 77115 + }, + { + "epoch": 1.884054430410671, + "grad_norm": 66.80492401123047, + "learning_rate": 1.5684058908252952e-06, + "loss": 0.0107, + "num_input_tokens_seen": 51989344, + "step": 77120 + }, + { + "epoch": 1.8841765812425182, + "grad_norm": 33.782405853271484, + "learning_rate": 1.5683357267597183e-06, + "loss": 0.0973, + "num_input_tokens_seen": 51992736, + "step": 77125 + }, + { + "epoch": 1.8842987320743654, + "grad_norm": 0.2235114723443985, + "learning_rate": 1.5682655585610514e-06, + "loss": 0.0648, + "num_input_tokens_seen": 51996064, + "step": 77130 + }, + { + "epoch": 1.8844208829062126, + "grad_norm": 39.23573684692383, + "learning_rate": 1.5681953862298043e-06, + "loss": 0.1465, + "num_input_tokens_seen": 51999712, + "step": 77135 + }, + { + "epoch": 1.8845430337380598, + "grad_norm": 19.437856674194336, + "learning_rate": 1.5681252097664875e-06, + "loss": 0.036, + "num_input_tokens_seen": 52003360, + "step": 77140 + }, + { + "epoch": 1.884665184569907, + "grad_norm": 0.020282620564103127, + "learning_rate": 1.5680550291716113e-06, + "loss": 0.0753, + "num_input_tokens_seen": 52006816, + "step": 77145 + }, + { + "epoch": 1.8847873354017541, + "grad_norm": 1.5308507680892944, + "learning_rate": 1.5679848444456862e-06, + "loss": 0.0653, + "num_input_tokens_seen": 52010208, + "step": 77150 + }, + { + "epoch": 1.8849094862336013, + "grad_norm": 17.294179916381836, + "learning_rate": 1.5679146555892223e-06, + "loss": 0.1015, + "num_input_tokens_seen": 52013472, + "step": 77155 + }, + { + "epoch": 1.8850316370654485, + "grad_norm": 0.08894924819469452, + "learning_rate": 1.5678444626027308e-06, + "loss": 0.0014, + "num_input_tokens_seen": 52016736, + "step": 77160 + }, + { + "epoch": 1.8851537878972957, + "grad_norm": 0.4388797879219055, + "learning_rate": 1.567774265486721e-06, + "loss": 0.1694, + "num_input_tokens_seen": 52020640, + "step": 77165 + }, + { + "epoch": 1.885275938729143, + "grad_norm": 32.14431381225586, + "learning_rate": 1.5677040642417048e-06, + "loss": 0.0343, + "num_input_tokens_seen": 52024032, + "step": 77170 + }, + { + "epoch": 1.8853980895609899, + "grad_norm": 21.51976776123047, + "learning_rate": 1.5676338588681914e-06, + "loss": 0.1471, + "num_input_tokens_seen": 52027296, + "step": 77175 + }, + { + "epoch": 1.885520240392837, + "grad_norm": 0.0837312787771225, + "learning_rate": 1.567563649366692e-06, + "loss": 0.1674, + "num_input_tokens_seen": 52030496, + "step": 77180 + }, + { + "epoch": 1.8856423912246842, + "grad_norm": 63.973487854003906, + "learning_rate": 1.5674934357377168e-06, + "loss": 0.1197, + "num_input_tokens_seen": 52033568, + "step": 77185 + }, + { + "epoch": 1.8857645420565314, + "grad_norm": 3.189739227294922, + "learning_rate": 1.5674232179817773e-06, + "loss": 0.0036, + "num_input_tokens_seen": 52037344, + "step": 77190 + }, + { + "epoch": 1.8858866928883786, + "grad_norm": 62.23170852661133, + "learning_rate": 1.5673529960993832e-06, + "loss": 0.1402, + "num_input_tokens_seen": 52041120, + "step": 77195 + }, + { + "epoch": 1.8860088437202256, + "grad_norm": 0.8425273299217224, + "learning_rate": 1.5672827700910456e-06, + "loss": 0.0463, + "num_input_tokens_seen": 52044192, + "step": 77200 + }, + { + "epoch": 1.8861309945520728, + "grad_norm": 23.748029708862305, + "learning_rate": 1.5672125399572748e-06, + "loss": 0.071, + "num_input_tokens_seen": 52047840, + "step": 77205 + }, + { + "epoch": 1.88625314538392, + "grad_norm": 0.06841769814491272, + "learning_rate": 1.5671423056985824e-06, + "loss": 0.0378, + "num_input_tokens_seen": 52051296, + "step": 77210 + }, + { + "epoch": 1.8863752962157672, + "grad_norm": 0.13464903831481934, + "learning_rate": 1.5670720673154783e-06, + "loss": 0.0576, + "num_input_tokens_seen": 52055456, + "step": 77215 + }, + { + "epoch": 1.8864974470476144, + "grad_norm": 0.1433732807636261, + "learning_rate": 1.5670018248084735e-06, + "loss": 0.0263, + "num_input_tokens_seen": 52058592, + "step": 77220 + }, + { + "epoch": 1.8866195978794615, + "grad_norm": 0.3391604423522949, + "learning_rate": 1.566931578178079e-06, + "loss": 0.1117, + "num_input_tokens_seen": 52061856, + "step": 77225 + }, + { + "epoch": 1.8867417487113087, + "grad_norm": 0.9799418449401855, + "learning_rate": 1.5668613274248056e-06, + "loss": 0.0423, + "num_input_tokens_seen": 52065504, + "step": 77230 + }, + { + "epoch": 1.886863899543156, + "grad_norm": 21.551013946533203, + "learning_rate": 1.5667910725491645e-06, + "loss": 0.2192, + "num_input_tokens_seen": 52069024, + "step": 77235 + }, + { + "epoch": 1.886986050375003, + "grad_norm": 84.48568725585938, + "learning_rate": 1.5667208135516658e-06, + "loss": 0.0861, + "num_input_tokens_seen": 52073376, + "step": 77240 + }, + { + "epoch": 1.8871082012068503, + "grad_norm": 0.10802389681339264, + "learning_rate": 1.566650550432821e-06, + "loss": 0.0812, + "num_input_tokens_seen": 52076256, + "step": 77245 + }, + { + "epoch": 1.8872303520386975, + "grad_norm": 28.95943260192871, + "learning_rate": 1.5665802831931412e-06, + "loss": 0.1082, + "num_input_tokens_seen": 52079456, + "step": 77250 + }, + { + "epoch": 1.8873525028705447, + "grad_norm": 5.773714542388916, + "learning_rate": 1.5665100118331371e-06, + "loss": 0.0032, + "num_input_tokens_seen": 52082528, + "step": 77255 + }, + { + "epoch": 1.8874746537023919, + "grad_norm": 0.03499281033873558, + "learning_rate": 1.5664397363533198e-06, + "loss": 0.0648, + "num_input_tokens_seen": 52085792, + "step": 77260 + }, + { + "epoch": 1.8875968045342388, + "grad_norm": 0.3379361033439636, + "learning_rate": 1.5663694567542004e-06, + "loss": 0.167, + "num_input_tokens_seen": 52089184, + "step": 77265 + }, + { + "epoch": 1.887718955366086, + "grad_norm": 0.5242000222206116, + "learning_rate": 1.5662991730362899e-06, + "loss": 0.0372, + "num_input_tokens_seen": 52092448, + "step": 77270 + }, + { + "epoch": 1.8878411061979332, + "grad_norm": 29.624061584472656, + "learning_rate": 1.5662288852000995e-06, + "loss": 0.0688, + "num_input_tokens_seen": 52096096, + "step": 77275 + }, + { + "epoch": 1.8879632570297804, + "grad_norm": 0.6497032642364502, + "learning_rate": 1.5661585932461403e-06, + "loss": 0.0309, + "num_input_tokens_seen": 52099488, + "step": 77280 + }, + { + "epoch": 1.8880854078616274, + "grad_norm": 8.505046844482422, + "learning_rate": 1.5660882971749237e-06, + "loss": 0.0414, + "num_input_tokens_seen": 52103008, + "step": 77285 + }, + { + "epoch": 1.8882075586934746, + "grad_norm": 0.12121704965829849, + "learning_rate": 1.5660179969869604e-06, + "loss": 0.0011, + "num_input_tokens_seen": 52106464, + "step": 77290 + }, + { + "epoch": 1.8883297095253218, + "grad_norm": 0.01899469457566738, + "learning_rate": 1.5659476926827625e-06, + "loss": 0.0005, + "num_input_tokens_seen": 52109856, + "step": 77295 + }, + { + "epoch": 1.888451860357169, + "grad_norm": 0.14597168564796448, + "learning_rate": 1.5658773842628405e-06, + "loss": 0.1511, + "num_input_tokens_seen": 52113056, + "step": 77300 + }, + { + "epoch": 1.8885740111890161, + "grad_norm": 30.08360481262207, + "learning_rate": 1.565807071727706e-06, + "loss": 0.0981, + "num_input_tokens_seen": 52116512, + "step": 77305 + }, + { + "epoch": 1.8886961620208633, + "grad_norm": 79.73748016357422, + "learning_rate": 1.5657367550778702e-06, + "loss": 0.1299, + "num_input_tokens_seen": 52119968, + "step": 77310 + }, + { + "epoch": 1.8888183128527105, + "grad_norm": 26.3062801361084, + "learning_rate": 1.5656664343138447e-06, + "loss": 0.1116, + "num_input_tokens_seen": 52123232, + "step": 77315 + }, + { + "epoch": 1.8889404636845577, + "grad_norm": 0.47073596715927124, + "learning_rate": 1.5655961094361403e-06, + "loss": 0.0592, + "num_input_tokens_seen": 52126496, + "step": 77320 + }, + { + "epoch": 1.889062614516405, + "grad_norm": 13.913512229919434, + "learning_rate": 1.5655257804452696e-06, + "loss": 0.1957, + "num_input_tokens_seen": 52129952, + "step": 77325 + }, + { + "epoch": 1.889184765348252, + "grad_norm": 1.774534821510315, + "learning_rate": 1.5654554473417428e-06, + "loss": 0.1567, + "num_input_tokens_seen": 52133344, + "step": 77330 + }, + { + "epoch": 1.8893069161800993, + "grad_norm": 30.571760177612305, + "learning_rate": 1.565385110126072e-06, + "loss": 0.0769, + "num_input_tokens_seen": 52136480, + "step": 77335 + }, + { + "epoch": 1.8894290670119465, + "grad_norm": 0.2755120098590851, + "learning_rate": 1.5653147687987684e-06, + "loss": 0.0255, + "num_input_tokens_seen": 52139616, + "step": 77340 + }, + { + "epoch": 1.8895512178437937, + "grad_norm": 0.5474405884742737, + "learning_rate": 1.565244423360344e-06, + "loss": 0.0013, + "num_input_tokens_seen": 52142816, + "step": 77345 + }, + { + "epoch": 1.8896733686756408, + "grad_norm": 112.4779052734375, + "learning_rate": 1.5651740738113101e-06, + "loss": 0.0546, + "num_input_tokens_seen": 52145760, + "step": 77350 + }, + { + "epoch": 1.8897955195074878, + "grad_norm": 5.316534519195557, + "learning_rate": 1.5651037201521784e-06, + "loss": 0.0848, + "num_input_tokens_seen": 52149024, + "step": 77355 + }, + { + "epoch": 1.889917670339335, + "grad_norm": 0.46590232849121094, + "learning_rate": 1.5650333623834607e-06, + "loss": 0.0013, + "num_input_tokens_seen": 52152992, + "step": 77360 + }, + { + "epoch": 1.8900398211711822, + "grad_norm": 12.223104476928711, + "learning_rate": 1.564963000505668e-06, + "loss": 0.1025, + "num_input_tokens_seen": 52156256, + "step": 77365 + }, + { + "epoch": 1.8901619720030294, + "grad_norm": 0.6928628087043762, + "learning_rate": 1.5648926345193123e-06, + "loss": 0.0752, + "num_input_tokens_seen": 52159712, + "step": 77370 + }, + { + "epoch": 1.8902841228348763, + "grad_norm": 41.46940994262695, + "learning_rate": 1.564822264424906e-06, + "loss": 0.1883, + "num_input_tokens_seen": 52162592, + "step": 77375 + }, + { + "epoch": 1.8904062736667235, + "grad_norm": 15.121652603149414, + "learning_rate": 1.5647518902229594e-06, + "loss": 0.0896, + "num_input_tokens_seen": 52166112, + "step": 77380 + }, + { + "epoch": 1.8905284244985707, + "grad_norm": 26.036794662475586, + "learning_rate": 1.564681511913986e-06, + "loss": 0.0747, + "num_input_tokens_seen": 52169888, + "step": 77385 + }, + { + "epoch": 1.890650575330418, + "grad_norm": 0.16673624515533447, + "learning_rate": 1.5646111294984963e-06, + "loss": 0.1649, + "num_input_tokens_seen": 52173280, + "step": 77390 + }, + { + "epoch": 1.890772726162265, + "grad_norm": 0.116355299949646, + "learning_rate": 1.5645407429770025e-06, + "loss": 0.0928, + "num_input_tokens_seen": 52176800, + "step": 77395 + }, + { + "epoch": 1.8908948769941123, + "grad_norm": 0.3401149809360504, + "learning_rate": 1.564470352350017e-06, + "loss": 0.0702, + "num_input_tokens_seen": 52180000, + "step": 77400 + }, + { + "epoch": 1.8910170278259595, + "grad_norm": 0.05878855660557747, + "learning_rate": 1.5643999576180509e-06, + "loss": 0.0429, + "num_input_tokens_seen": 52183392, + "step": 77405 + }, + { + "epoch": 1.8911391786578067, + "grad_norm": 8.723549842834473, + "learning_rate": 1.5643295587816167e-06, + "loss": 0.143, + "num_input_tokens_seen": 52186720, + "step": 77410 + }, + { + "epoch": 1.8912613294896539, + "grad_norm": 123.5528564453125, + "learning_rate": 1.5642591558412263e-06, + "loss": 0.0799, + "num_input_tokens_seen": 52189728, + "step": 77415 + }, + { + "epoch": 1.891383480321501, + "grad_norm": 0.3717953562736511, + "learning_rate": 1.5641887487973914e-06, + "loss": 0.0017, + "num_input_tokens_seen": 52193248, + "step": 77420 + }, + { + "epoch": 1.8915056311533482, + "grad_norm": 0.2517523765563965, + "learning_rate": 1.564118337650624e-06, + "loss": 0.0022, + "num_input_tokens_seen": 52196512, + "step": 77425 + }, + { + "epoch": 1.8916277819851954, + "grad_norm": 0.0997331365942955, + "learning_rate": 1.5640479224014364e-06, + "loss": 0.0443, + "num_input_tokens_seen": 52199648, + "step": 77430 + }, + { + "epoch": 1.8917499328170426, + "grad_norm": 0.14800933003425598, + "learning_rate": 1.5639775030503409e-06, + "loss": 0.0391, + "num_input_tokens_seen": 52203104, + "step": 77435 + }, + { + "epoch": 1.8918720836488898, + "grad_norm": 0.1393311619758606, + "learning_rate": 1.5639070795978491e-06, + "loss": 0.0855, + "num_input_tokens_seen": 52206688, + "step": 77440 + }, + { + "epoch": 1.8919942344807368, + "grad_norm": 24.84368133544922, + "learning_rate": 1.5638366520444732e-06, + "loss": 0.1721, + "num_input_tokens_seen": 52210976, + "step": 77445 + }, + { + "epoch": 1.892116385312584, + "grad_norm": 32.104915618896484, + "learning_rate": 1.5637662203907255e-06, + "loss": 0.0824, + "num_input_tokens_seen": 52214240, + "step": 77450 + }, + { + "epoch": 1.8922385361444312, + "grad_norm": 0.10727355629205704, + "learning_rate": 1.5636957846371184e-06, + "loss": 0.0363, + "num_input_tokens_seen": 52217184, + "step": 77455 + }, + { + "epoch": 1.8923606869762784, + "grad_norm": 0.18539845943450928, + "learning_rate": 1.563625344784164e-06, + "loss": 0.0547, + "num_input_tokens_seen": 52220384, + "step": 77460 + }, + { + "epoch": 1.8924828378081253, + "grad_norm": 9.1328763961792, + "learning_rate": 1.5635549008323742e-06, + "loss": 0.166, + "num_input_tokens_seen": 52223584, + "step": 77465 + }, + { + "epoch": 1.8926049886399725, + "grad_norm": 15.195426940917969, + "learning_rate": 1.5634844527822617e-06, + "loss": 0.0522, + "num_input_tokens_seen": 52226592, + "step": 77470 + }, + { + "epoch": 1.8927271394718197, + "grad_norm": 0.5996956825256348, + "learning_rate": 1.563414000634339e-06, + "loss": 0.0969, + "num_input_tokens_seen": 52229664, + "step": 77475 + }, + { + "epoch": 1.8928492903036669, + "grad_norm": 1.2738440036773682, + "learning_rate": 1.563343544389118e-06, + "loss": 0.0367, + "num_input_tokens_seen": 52232800, + "step": 77480 + }, + { + "epoch": 1.892971441135514, + "grad_norm": 0.11556824296712875, + "learning_rate": 1.563273084047111e-06, + "loss": 0.0511, + "num_input_tokens_seen": 52236320, + "step": 77485 + }, + { + "epoch": 1.8930935919673613, + "grad_norm": 0.18463782966136932, + "learning_rate": 1.5632026196088308e-06, + "loss": 0.0613, + "num_input_tokens_seen": 52240032, + "step": 77490 + }, + { + "epoch": 1.8932157427992085, + "grad_norm": 0.044787704944610596, + "learning_rate": 1.5631321510747894e-06, + "loss": 0.0434, + "num_input_tokens_seen": 52243424, + "step": 77495 + }, + { + "epoch": 1.8933378936310556, + "grad_norm": 11.942049980163574, + "learning_rate": 1.5630616784455e-06, + "loss": 0.1223, + "num_input_tokens_seen": 52246880, + "step": 77500 + }, + { + "epoch": 1.8934600444629028, + "grad_norm": 0.16144710779190063, + "learning_rate": 1.5629912017214744e-06, + "loss": 0.0921, + "num_input_tokens_seen": 52250336, + "step": 77505 + }, + { + "epoch": 1.89358219529475, + "grad_norm": 16.349254608154297, + "learning_rate": 1.5629207209032252e-06, + "loss": 0.1228, + "num_input_tokens_seen": 52253792, + "step": 77510 + }, + { + "epoch": 1.8937043461265972, + "grad_norm": 38.79521179199219, + "learning_rate": 1.5628502359912652e-06, + "loss": 0.164, + "num_input_tokens_seen": 52257184, + "step": 77515 + }, + { + "epoch": 1.8938264969584444, + "grad_norm": 0.5846835374832153, + "learning_rate": 1.562779746986107e-06, + "loss": 0.1079, + "num_input_tokens_seen": 52260576, + "step": 77520 + }, + { + "epoch": 1.8939486477902916, + "grad_norm": 7.383497714996338, + "learning_rate": 1.5627092538882632e-06, + "loss": 0.0671, + "num_input_tokens_seen": 52264352, + "step": 77525 + }, + { + "epoch": 1.8940707986221386, + "grad_norm": 15.111196517944336, + "learning_rate": 1.562638756698246e-06, + "loss": 0.0792, + "num_input_tokens_seen": 52267808, + "step": 77530 + }, + { + "epoch": 1.8941929494539858, + "grad_norm": 0.34534600377082825, + "learning_rate": 1.562568255416569e-06, + "loss": 0.1215, + "num_input_tokens_seen": 52271520, + "step": 77535 + }, + { + "epoch": 1.894315100285833, + "grad_norm": 0.1826377511024475, + "learning_rate": 1.5624977500437437e-06, + "loss": 0.0386, + "num_input_tokens_seen": 52274656, + "step": 77540 + }, + { + "epoch": 1.8944372511176801, + "grad_norm": 0.25956910848617554, + "learning_rate": 1.5624272405802838e-06, + "loss": 0.0021, + "num_input_tokens_seen": 52277792, + "step": 77545 + }, + { + "epoch": 1.8945594019495273, + "grad_norm": 0.15048423409461975, + "learning_rate": 1.5623567270267018e-06, + "loss": 0.0528, + "num_input_tokens_seen": 52281120, + "step": 77550 + }, + { + "epoch": 1.8946815527813743, + "grad_norm": 0.8038405179977417, + "learning_rate": 1.5622862093835102e-06, + "loss": 0.026, + "num_input_tokens_seen": 52284448, + "step": 77555 + }, + { + "epoch": 1.8948037036132215, + "grad_norm": 0.24553032219409943, + "learning_rate": 1.5622156876512223e-06, + "loss": 0.0388, + "num_input_tokens_seen": 52287456, + "step": 77560 + }, + { + "epoch": 1.8949258544450687, + "grad_norm": 0.2718951106071472, + "learning_rate": 1.5621451618303505e-06, + "loss": 0.0014, + "num_input_tokens_seen": 52290592, + "step": 77565 + }, + { + "epoch": 1.8950480052769159, + "grad_norm": 75.5166244506836, + "learning_rate": 1.5620746319214078e-06, + "loss": 0.1395, + "num_input_tokens_seen": 52294112, + "step": 77570 + }, + { + "epoch": 1.895170156108763, + "grad_norm": 13.819537162780762, + "learning_rate": 1.5620040979249074e-06, + "loss": 0.0677, + "num_input_tokens_seen": 52297184, + "step": 77575 + }, + { + "epoch": 1.8952923069406102, + "grad_norm": 0.12002184242010117, + "learning_rate": 1.561933559841362e-06, + "loss": 0.0555, + "num_input_tokens_seen": 52301152, + "step": 77580 + }, + { + "epoch": 1.8954144577724574, + "grad_norm": 21.226789474487305, + "learning_rate": 1.5618630176712846e-06, + "loss": 0.231, + "num_input_tokens_seen": 52304416, + "step": 77585 + }, + { + "epoch": 1.8955366086043046, + "grad_norm": 0.043165311217308044, + "learning_rate": 1.561792471415188e-06, + "loss": 0.1493, + "num_input_tokens_seen": 52308000, + "step": 77590 + }, + { + "epoch": 1.8956587594361518, + "grad_norm": 0.0803232491016388, + "learning_rate": 1.5617219210735858e-06, + "loss": 0.1698, + "num_input_tokens_seen": 52311200, + "step": 77595 + }, + { + "epoch": 1.895780910267999, + "grad_norm": 0.17685692012310028, + "learning_rate": 1.5616513666469904e-06, + "loss": 0.0042, + "num_input_tokens_seen": 52314976, + "step": 77600 + }, + { + "epoch": 1.8959030610998462, + "grad_norm": 8.60054874420166, + "learning_rate": 1.5615808081359154e-06, + "loss": 0.1115, + "num_input_tokens_seen": 52317856, + "step": 77605 + }, + { + "epoch": 1.8960252119316934, + "grad_norm": 8.479482650756836, + "learning_rate": 1.5615102455408735e-06, + "loss": 0.0836, + "num_input_tokens_seen": 52321056, + "step": 77610 + }, + { + "epoch": 1.8961473627635406, + "grad_norm": 0.04393242672085762, + "learning_rate": 1.5614396788623786e-06, + "loss": 0.0015, + "num_input_tokens_seen": 52324256, + "step": 77615 + }, + { + "epoch": 1.8962695135953875, + "grad_norm": 0.5366582274436951, + "learning_rate": 1.5613691081009428e-06, + "loss": 0.0791, + "num_input_tokens_seen": 52327328, + "step": 77620 + }, + { + "epoch": 1.8963916644272347, + "grad_norm": 39.71879196166992, + "learning_rate": 1.56129853325708e-06, + "loss": 0.0862, + "num_input_tokens_seen": 52330528, + "step": 77625 + }, + { + "epoch": 1.896513815259082, + "grad_norm": 2.7838211059570312, + "learning_rate": 1.5612279543313033e-06, + "loss": 0.0818, + "num_input_tokens_seen": 52334880, + "step": 77630 + }, + { + "epoch": 1.896635966090929, + "grad_norm": 8.047002792358398, + "learning_rate": 1.561157371324126e-06, + "loss": 0.0512, + "num_input_tokens_seen": 52338720, + "step": 77635 + }, + { + "epoch": 1.8967581169227763, + "grad_norm": 0.09864991903305054, + "learning_rate": 1.5610867842360614e-06, + "loss": 0.0703, + "num_input_tokens_seen": 52341984, + "step": 77640 + }, + { + "epoch": 1.8968802677546233, + "grad_norm": 7.230635166168213, + "learning_rate": 1.5610161930676226e-06, + "loss": 0.064, + "num_input_tokens_seen": 52344928, + "step": 77645 + }, + { + "epoch": 1.8970024185864705, + "grad_norm": 182.32345581054688, + "learning_rate": 1.5609455978193232e-06, + "loss": 0.0177, + "num_input_tokens_seen": 52348448, + "step": 77650 + }, + { + "epoch": 1.8971245694183176, + "grad_norm": 0.32675567269325256, + "learning_rate": 1.5608749984916767e-06, + "loss": 0.071, + "num_input_tokens_seen": 52351648, + "step": 77655 + }, + { + "epoch": 1.8972467202501648, + "grad_norm": 44.629539489746094, + "learning_rate": 1.5608043950851964e-06, + "loss": 0.0611, + "num_input_tokens_seen": 52354912, + "step": 77660 + }, + { + "epoch": 1.897368871082012, + "grad_norm": 0.3594166338443756, + "learning_rate": 1.5607337876003954e-06, + "loss": 0.0366, + "num_input_tokens_seen": 52358112, + "step": 77665 + }, + { + "epoch": 1.8974910219138592, + "grad_norm": 0.12800997495651245, + "learning_rate": 1.5606631760377878e-06, + "loss": 0.0577, + "num_input_tokens_seen": 52360928, + "step": 77670 + }, + { + "epoch": 1.8976131727457064, + "grad_norm": 0.0739443376660347, + "learning_rate": 1.5605925603978866e-06, + "loss": 0.044, + "num_input_tokens_seen": 52364256, + "step": 77675 + }, + { + "epoch": 1.8977353235775536, + "grad_norm": 14.31065845489502, + "learning_rate": 1.5605219406812054e-06, + "loss": 0.1288, + "num_input_tokens_seen": 52367392, + "step": 77680 + }, + { + "epoch": 1.8978574744094008, + "grad_norm": 13.595717430114746, + "learning_rate": 1.5604513168882582e-06, + "loss": 0.0254, + "num_input_tokens_seen": 52370976, + "step": 77685 + }, + { + "epoch": 1.897979625241248, + "grad_norm": 0.24001628160476685, + "learning_rate": 1.560380689019558e-06, + "loss": 0.1965, + "num_input_tokens_seen": 52374240, + "step": 77690 + }, + { + "epoch": 1.8981017760730952, + "grad_norm": 184.75628662109375, + "learning_rate": 1.5603100570756192e-06, + "loss": 0.0589, + "num_input_tokens_seen": 52377248, + "step": 77695 + }, + { + "epoch": 1.8982239269049423, + "grad_norm": 10.1908597946167, + "learning_rate": 1.5602394210569544e-06, + "loss": 0.1434, + "num_input_tokens_seen": 52380320, + "step": 77700 + }, + { + "epoch": 1.8983460777367895, + "grad_norm": 0.0293444711714983, + "learning_rate": 1.560168780964078e-06, + "loss": 0.0915, + "num_input_tokens_seen": 52383904, + "step": 77705 + }, + { + "epoch": 1.8984682285686365, + "grad_norm": 0.5846049785614014, + "learning_rate": 1.5600981367975037e-06, + "loss": 0.0484, + "num_input_tokens_seen": 52387168, + "step": 77710 + }, + { + "epoch": 1.8985903794004837, + "grad_norm": 1.261268138885498, + "learning_rate": 1.5600274885577446e-06, + "loss": 0.0511, + "num_input_tokens_seen": 52389984, + "step": 77715 + }, + { + "epoch": 1.8987125302323309, + "grad_norm": 0.9083055853843689, + "learning_rate": 1.5599568362453158e-06, + "loss": 0.0516, + "num_input_tokens_seen": 52393056, + "step": 77720 + }, + { + "epoch": 1.898834681064178, + "grad_norm": 0.023565126582980156, + "learning_rate": 1.5598861798607297e-06, + "loss": 0.0975, + "num_input_tokens_seen": 52396512, + "step": 77725 + }, + { + "epoch": 1.8989568318960253, + "grad_norm": 47.272361755371094, + "learning_rate": 1.5598155194045007e-06, + "loss": 0.1994, + "num_input_tokens_seen": 52399840, + "step": 77730 + }, + { + "epoch": 1.8990789827278722, + "grad_norm": 0.0744083821773529, + "learning_rate": 1.559744854877143e-06, + "loss": 0.0337, + "num_input_tokens_seen": 52403360, + "step": 77735 + }, + { + "epoch": 1.8992011335597194, + "grad_norm": 20.310832977294922, + "learning_rate": 1.55967418627917e-06, + "loss": 0.1266, + "num_input_tokens_seen": 52407136, + "step": 77740 + }, + { + "epoch": 1.8993232843915666, + "grad_norm": 20.577260971069336, + "learning_rate": 1.5596035136110957e-06, + "loss": 0.1016, + "num_input_tokens_seen": 52410464, + "step": 77745 + }, + { + "epoch": 1.8994454352234138, + "grad_norm": 4.613851547241211, + "learning_rate": 1.559532836873434e-06, + "loss": 0.0045, + "num_input_tokens_seen": 52413728, + "step": 77750 + }, + { + "epoch": 1.899567586055261, + "grad_norm": 53.65876770019531, + "learning_rate": 1.5594621560666994e-06, + "loss": 0.004, + "num_input_tokens_seen": 52416800, + "step": 77755 + }, + { + "epoch": 1.8996897368871082, + "grad_norm": 0.1271817535161972, + "learning_rate": 1.5593914711914054e-06, + "loss": 0.09, + "num_input_tokens_seen": 52420064, + "step": 77760 + }, + { + "epoch": 1.8998118877189554, + "grad_norm": 0.3270379900932312, + "learning_rate": 1.5593207822480661e-06, + "loss": 0.0992, + "num_input_tokens_seen": 52423776, + "step": 77765 + }, + { + "epoch": 1.8999340385508026, + "grad_norm": 23.36074447631836, + "learning_rate": 1.5592500892371958e-06, + "loss": 0.0913, + "num_input_tokens_seen": 52427104, + "step": 77770 + }, + { + "epoch": 1.9000561893826498, + "grad_norm": 0.04623517394065857, + "learning_rate": 1.5591793921593079e-06, + "loss": 0.0226, + "num_input_tokens_seen": 52430048, + "step": 77775 + }, + { + "epoch": 1.900178340214497, + "grad_norm": 0.12228715419769287, + "learning_rate": 1.5591086910149174e-06, + "loss": 0.144, + "num_input_tokens_seen": 52433632, + "step": 77780 + }, + { + "epoch": 1.9003004910463441, + "grad_norm": 203.8829345703125, + "learning_rate": 1.5590379858045384e-06, + "loss": 0.0424, + "num_input_tokens_seen": 52436896, + "step": 77785 + }, + { + "epoch": 1.9004226418781913, + "grad_norm": 36.67129135131836, + "learning_rate": 1.5589672765286846e-06, + "loss": 0.1, + "num_input_tokens_seen": 52440096, + "step": 77790 + }, + { + "epoch": 1.9005447927100385, + "grad_norm": 0.8187667727470398, + "learning_rate": 1.5588965631878704e-06, + "loss": 0.0882, + "num_input_tokens_seen": 52443552, + "step": 77795 + }, + { + "epoch": 1.9006669435418855, + "grad_norm": 0.08393049985170364, + "learning_rate": 1.5588258457826098e-06, + "loss": 0.001, + "num_input_tokens_seen": 52447456, + "step": 77800 + }, + { + "epoch": 1.9007890943737327, + "grad_norm": 0.029797382652759552, + "learning_rate": 1.5587551243134173e-06, + "loss": 0.0901, + "num_input_tokens_seen": 52451040, + "step": 77805 + }, + { + "epoch": 1.9009112452055799, + "grad_norm": 270.3726806640625, + "learning_rate": 1.5586843987808078e-06, + "loss": 0.0213, + "num_input_tokens_seen": 52454560, + "step": 77810 + }, + { + "epoch": 1.901033396037427, + "grad_norm": 0.9959912300109863, + "learning_rate": 1.558613669185295e-06, + "loss": 0.0803, + "num_input_tokens_seen": 52458016, + "step": 77815 + }, + { + "epoch": 1.901155546869274, + "grad_norm": 0.7972824573516846, + "learning_rate": 1.558542935527393e-06, + "loss": 0.0008, + "num_input_tokens_seen": 52461088, + "step": 77820 + }, + { + "epoch": 1.9012776977011212, + "grad_norm": 23.410091400146484, + "learning_rate": 1.5584721978076167e-06, + "loss": 0.2709, + "num_input_tokens_seen": 52465184, + "step": 77825 + }, + { + "epoch": 1.9013998485329684, + "grad_norm": 0.019310766831040382, + "learning_rate": 1.5584014560264803e-06, + "loss": 0.0012, + "num_input_tokens_seen": 52468384, + "step": 77830 + }, + { + "epoch": 1.9015219993648156, + "grad_norm": 50.934814453125, + "learning_rate": 1.5583307101844984e-06, + "loss": 0.0785, + "num_input_tokens_seen": 52471456, + "step": 77835 + }, + { + "epoch": 1.9016441501966628, + "grad_norm": 11.939221382141113, + "learning_rate": 1.5582599602821854e-06, + "loss": 0.1133, + "num_input_tokens_seen": 52474912, + "step": 77840 + }, + { + "epoch": 1.90176630102851, + "grad_norm": 8.909550666809082, + "learning_rate": 1.5581892063200556e-06, + "loss": 0.128, + "num_input_tokens_seen": 52478496, + "step": 77845 + }, + { + "epoch": 1.9018884518603572, + "grad_norm": 0.1402382254600525, + "learning_rate": 1.5581184482986242e-06, + "loss": 0.0018, + "num_input_tokens_seen": 52482144, + "step": 77850 + }, + { + "epoch": 1.9020106026922043, + "grad_norm": 0.1215723529458046, + "learning_rate": 1.558047686218405e-06, + "loss": 0.0545, + "num_input_tokens_seen": 52485664, + "step": 77855 + }, + { + "epoch": 1.9021327535240515, + "grad_norm": 0.1959735006093979, + "learning_rate": 1.5579769200799132e-06, + "loss": 0.0012, + "num_input_tokens_seen": 52489120, + "step": 77860 + }, + { + "epoch": 1.9022549043558987, + "grad_norm": 0.03554701805114746, + "learning_rate": 1.557906149883663e-06, + "loss": 0.0627, + "num_input_tokens_seen": 52492192, + "step": 77865 + }, + { + "epoch": 1.902377055187746, + "grad_norm": 14.456132888793945, + "learning_rate": 1.557835375630169e-06, + "loss": 0.0922, + "num_input_tokens_seen": 52495840, + "step": 77870 + }, + { + "epoch": 1.902499206019593, + "grad_norm": 1.007307767868042, + "learning_rate": 1.5577645973199465e-06, + "loss": 0.0438, + "num_input_tokens_seen": 52498784, + "step": 77875 + }, + { + "epoch": 1.9026213568514403, + "grad_norm": 7.78939151763916, + "learning_rate": 1.5576938149535096e-06, + "loss": 0.1144, + "num_input_tokens_seen": 52502176, + "step": 77880 + }, + { + "epoch": 1.9027435076832875, + "grad_norm": 1.2881722450256348, + "learning_rate": 1.5576230285313732e-06, + "loss": 0.0658, + "num_input_tokens_seen": 52505696, + "step": 77885 + }, + { + "epoch": 1.9028656585151345, + "grad_norm": 0.14054358005523682, + "learning_rate": 1.5575522380540522e-06, + "loss": 0.0224, + "num_input_tokens_seen": 52508832, + "step": 77890 + }, + { + "epoch": 1.9029878093469816, + "grad_norm": 0.10390869528055191, + "learning_rate": 1.5574814435220616e-06, + "loss": 0.037, + "num_input_tokens_seen": 52511840, + "step": 77895 + }, + { + "epoch": 1.9031099601788288, + "grad_norm": 0.19694241881370544, + "learning_rate": 1.5574106449359157e-06, + "loss": 0.1035, + "num_input_tokens_seen": 52515104, + "step": 77900 + }, + { + "epoch": 1.903232111010676, + "grad_norm": 0.08465810120105743, + "learning_rate": 1.55733984229613e-06, + "loss": 0.1224, + "num_input_tokens_seen": 52518752, + "step": 77905 + }, + { + "epoch": 1.903354261842523, + "grad_norm": 0.5364916324615479, + "learning_rate": 1.5572690356032187e-06, + "loss": 0.1126, + "num_input_tokens_seen": 52522208, + "step": 77910 + }, + { + "epoch": 1.9034764126743702, + "grad_norm": 1.2812743186950684, + "learning_rate": 1.557198224857697e-06, + "loss": 0.0724, + "num_input_tokens_seen": 52525472, + "step": 77915 + }, + { + "epoch": 1.9035985635062174, + "grad_norm": 75.62810516357422, + "learning_rate": 1.5571274100600805e-06, + "loss": 0.1327, + "num_input_tokens_seen": 52528928, + "step": 77920 + }, + { + "epoch": 1.9037207143380646, + "grad_norm": 0.19514200091362, + "learning_rate": 1.5570565912108833e-06, + "loss": 0.0025, + "num_input_tokens_seen": 52532832, + "step": 77925 + }, + { + "epoch": 1.9038428651699117, + "grad_norm": 32.084651947021484, + "learning_rate": 1.5569857683106205e-06, + "loss": 0.0865, + "num_input_tokens_seen": 52536224, + "step": 77930 + }, + { + "epoch": 1.903965016001759, + "grad_norm": 0.9640393853187561, + "learning_rate": 1.5569149413598077e-06, + "loss": 0.1001, + "num_input_tokens_seen": 52539680, + "step": 77935 + }, + { + "epoch": 1.9040871668336061, + "grad_norm": 2.7728748321533203, + "learning_rate": 1.5568441103589596e-06, + "loss": 0.0035, + "num_input_tokens_seen": 52542624, + "step": 77940 + }, + { + "epoch": 1.9042093176654533, + "grad_norm": 0.49883055686950684, + "learning_rate": 1.5567732753085915e-06, + "loss": 0.0354, + "num_input_tokens_seen": 52545952, + "step": 77945 + }, + { + "epoch": 1.9043314684973005, + "grad_norm": 0.12617257237434387, + "learning_rate": 1.556702436209218e-06, + "loss": 0.0308, + "num_input_tokens_seen": 52549408, + "step": 77950 + }, + { + "epoch": 1.9044536193291477, + "grad_norm": 1.3749181032180786, + "learning_rate": 1.556631593061355e-06, + "loss": 0.0018, + "num_input_tokens_seen": 52552544, + "step": 77955 + }, + { + "epoch": 1.9045757701609949, + "grad_norm": 48.934146881103516, + "learning_rate": 1.556560745865517e-06, + "loss": 0.1904, + "num_input_tokens_seen": 52556000, + "step": 77960 + }, + { + "epoch": 1.904697920992842, + "grad_norm": 27.332305908203125, + "learning_rate": 1.5564898946222198e-06, + "loss": 0.1821, + "num_input_tokens_seen": 52559712, + "step": 77965 + }, + { + "epoch": 1.9048200718246893, + "grad_norm": 0.02467111311852932, + "learning_rate": 1.5564190393319784e-06, + "loss": 0.0012, + "num_input_tokens_seen": 52563168, + "step": 77970 + }, + { + "epoch": 1.9049422226565365, + "grad_norm": 0.9920415282249451, + "learning_rate": 1.5563481799953082e-06, + "loss": 0.0939, + "num_input_tokens_seen": 52566944, + "step": 77975 + }, + { + "epoch": 1.9050643734883834, + "grad_norm": 26.86354637145996, + "learning_rate": 1.556277316612724e-06, + "loss": 0.1157, + "num_input_tokens_seen": 52570144, + "step": 77980 + }, + { + "epoch": 1.9051865243202306, + "grad_norm": 0.035159774124622345, + "learning_rate": 1.556206449184742e-06, + "loss": 0.0557, + "num_input_tokens_seen": 52573216, + "step": 77985 + }, + { + "epoch": 1.9053086751520778, + "grad_norm": 14.330402374267578, + "learning_rate": 1.5561355777118768e-06, + "loss": 0.158, + "num_input_tokens_seen": 52575904, + "step": 77990 + }, + { + "epoch": 1.905430825983925, + "grad_norm": 0.6020281910896301, + "learning_rate": 1.5560647021946442e-06, + "loss": 0.0497, + "num_input_tokens_seen": 52579232, + "step": 77995 + }, + { + "epoch": 1.905552976815772, + "grad_norm": 6.074164867401123, + "learning_rate": 1.5559938226335593e-06, + "loss": 0.0926, + "num_input_tokens_seen": 52582432, + "step": 78000 + }, + { + "epoch": 1.9056751276476191, + "grad_norm": 0.13777603209018707, + "learning_rate": 1.5559229390291382e-06, + "loss": 0.0307, + "num_input_tokens_seen": 52585696, + "step": 78005 + }, + { + "epoch": 1.9057972784794663, + "grad_norm": 25.201900482177734, + "learning_rate": 1.5558520513818958e-06, + "loss": 0.3789, + "num_input_tokens_seen": 52588448, + "step": 78010 + }, + { + "epoch": 1.9059194293113135, + "grad_norm": 0.08686228096485138, + "learning_rate": 1.5557811596923477e-06, + "loss": 0.047, + "num_input_tokens_seen": 52591712, + "step": 78015 + }, + { + "epoch": 1.9060415801431607, + "grad_norm": 0.028829792514443398, + "learning_rate": 1.5557102639610095e-06, + "loss": 0.0008, + "num_input_tokens_seen": 52594976, + "step": 78020 + }, + { + "epoch": 1.906163730975008, + "grad_norm": 16.483469009399414, + "learning_rate": 1.555639364188397e-06, + "loss": 0.3379, + "num_input_tokens_seen": 52598560, + "step": 78025 + }, + { + "epoch": 1.906285881806855, + "grad_norm": 0.33806437253952026, + "learning_rate": 1.5555684603750252e-06, + "loss": 0.0404, + "num_input_tokens_seen": 52601440, + "step": 78030 + }, + { + "epoch": 1.9064080326387023, + "grad_norm": 12.354910850524902, + "learning_rate": 1.5554975525214104e-06, + "loss": 0.0938, + "num_input_tokens_seen": 52604640, + "step": 78035 + }, + { + "epoch": 1.9065301834705495, + "grad_norm": 0.7182216644287109, + "learning_rate": 1.555426640628068e-06, + "loss": 0.1132, + "num_input_tokens_seen": 52607904, + "step": 78040 + }, + { + "epoch": 1.9066523343023967, + "grad_norm": 0.13206681609153748, + "learning_rate": 1.5553557246955137e-06, + "loss": 0.0164, + "num_input_tokens_seen": 52610848, + "step": 78045 + }, + { + "epoch": 1.9067744851342439, + "grad_norm": 0.39031508564949036, + "learning_rate": 1.555284804724263e-06, + "loss": 0.0401, + "num_input_tokens_seen": 52614304, + "step": 78050 + }, + { + "epoch": 1.906896635966091, + "grad_norm": 0.11643342673778534, + "learning_rate": 1.5552138807148318e-06, + "loss": 0.1338, + "num_input_tokens_seen": 52618208, + "step": 78055 + }, + { + "epoch": 1.9070187867979382, + "grad_norm": 0.06826582551002502, + "learning_rate": 1.5551429526677363e-06, + "loss": 0.0733, + "num_input_tokens_seen": 52621216, + "step": 78060 + }, + { + "epoch": 1.9071409376297852, + "grad_norm": 0.1077682301402092, + "learning_rate": 1.5550720205834917e-06, + "loss": 0.0397, + "num_input_tokens_seen": 52624736, + "step": 78065 + }, + { + "epoch": 1.9072630884616324, + "grad_norm": 8.253615379333496, + "learning_rate": 1.555001084462614e-06, + "loss": 0.154, + "num_input_tokens_seen": 52627936, + "step": 78070 + }, + { + "epoch": 1.9073852392934796, + "grad_norm": 13.16157341003418, + "learning_rate": 1.5549301443056192e-06, + "loss": 0.1156, + "num_input_tokens_seen": 52631200, + "step": 78075 + }, + { + "epoch": 1.9075073901253268, + "grad_norm": 1.2118772268295288, + "learning_rate": 1.5548592001130234e-06, + "loss": 0.0882, + "num_input_tokens_seen": 52635104, + "step": 78080 + }, + { + "epoch": 1.907629540957174, + "grad_norm": 82.68314361572266, + "learning_rate": 1.5547882518853417e-06, + "loss": 0.2286, + "num_input_tokens_seen": 52638048, + "step": 78085 + }, + { + "epoch": 1.907751691789021, + "grad_norm": 16.325532913208008, + "learning_rate": 1.554717299623091e-06, + "loss": 0.203, + "num_input_tokens_seen": 52641312, + "step": 78090 + }, + { + "epoch": 1.9078738426208681, + "grad_norm": 13.200202941894531, + "learning_rate": 1.554646343326787e-06, + "loss": 0.0695, + "num_input_tokens_seen": 52644704, + "step": 78095 + }, + { + "epoch": 1.9079959934527153, + "grad_norm": 0.7149518728256226, + "learning_rate": 1.5545753829969455e-06, + "loss": 0.0021, + "num_input_tokens_seen": 52647904, + "step": 78100 + }, + { + "epoch": 1.9081181442845625, + "grad_norm": 38.67225646972656, + "learning_rate": 1.5545044186340826e-06, + "loss": 0.0808, + "num_input_tokens_seen": 52651424, + "step": 78105 + }, + { + "epoch": 1.9082402951164097, + "grad_norm": 0.16422489285469055, + "learning_rate": 1.554433450238714e-06, + "loss": 0.0022, + "num_input_tokens_seen": 52654624, + "step": 78110 + }, + { + "epoch": 1.9083624459482569, + "grad_norm": 0.5577579736709595, + "learning_rate": 1.5543624778113568e-06, + "loss": 0.0401, + "num_input_tokens_seen": 52657824, + "step": 78115 + }, + { + "epoch": 1.908484596780104, + "grad_norm": 0.15506233274936676, + "learning_rate": 1.5542915013525265e-06, + "loss": 0.0846, + "num_input_tokens_seen": 52660832, + "step": 78120 + }, + { + "epoch": 1.9086067476119513, + "grad_norm": 0.20442821085453033, + "learning_rate": 1.5542205208627393e-06, + "loss": 0.1013, + "num_input_tokens_seen": 52664288, + "step": 78125 + }, + { + "epoch": 1.9087288984437984, + "grad_norm": 0.047798607498407364, + "learning_rate": 1.5541495363425113e-06, + "loss": 0.0475, + "num_input_tokens_seen": 52667552, + "step": 78130 + }, + { + "epoch": 1.9088510492756456, + "grad_norm": 1.5664831399917603, + "learning_rate": 1.5540785477923587e-06, + "loss": 0.0094, + "num_input_tokens_seen": 52671072, + "step": 78135 + }, + { + "epoch": 1.9089732001074928, + "grad_norm": 7.628649711608887, + "learning_rate": 1.5540075552127982e-06, + "loss": 0.1499, + "num_input_tokens_seen": 52674272, + "step": 78140 + }, + { + "epoch": 1.90909535093934, + "grad_norm": 38.61286926269531, + "learning_rate": 1.5539365586043456e-06, + "loss": 0.1028, + "num_input_tokens_seen": 52677600, + "step": 78145 + }, + { + "epoch": 1.9092175017711872, + "grad_norm": 0.0798473060131073, + "learning_rate": 1.553865557967517e-06, + "loss": 0.0684, + "num_input_tokens_seen": 52680928, + "step": 78150 + }, + { + "epoch": 1.9093396526030342, + "grad_norm": 0.15310020744800568, + "learning_rate": 1.5537945533028296e-06, + "loss": 0.1008, + "num_input_tokens_seen": 52684256, + "step": 78155 + }, + { + "epoch": 1.9094618034348814, + "grad_norm": 0.4250839650630951, + "learning_rate": 1.553723544610799e-06, + "loss": 0.1565, + "num_input_tokens_seen": 52687776, + "step": 78160 + }, + { + "epoch": 1.9095839542667286, + "grad_norm": 1.4815961122512817, + "learning_rate": 1.553652531891942e-06, + "loss": 0.0459, + "num_input_tokens_seen": 52691168, + "step": 78165 + }, + { + "epoch": 1.9097061050985757, + "grad_norm": 0.09083965420722961, + "learning_rate": 1.5535815151467747e-06, + "loss": 0.1741, + "num_input_tokens_seen": 52694496, + "step": 78170 + }, + { + "epoch": 1.909828255930423, + "grad_norm": 131.01443481445312, + "learning_rate": 1.5535104943758137e-06, + "loss": 0.1741, + "num_input_tokens_seen": 52697568, + "step": 78175 + }, + { + "epoch": 1.90995040676227, + "grad_norm": 84.11591339111328, + "learning_rate": 1.5534394695795757e-06, + "loss": 0.1526, + "num_input_tokens_seen": 52700896, + "step": 78180 + }, + { + "epoch": 1.910072557594117, + "grad_norm": 0.23095948994159698, + "learning_rate": 1.553368440758577e-06, + "loss": 0.0386, + "num_input_tokens_seen": 52703968, + "step": 78185 + }, + { + "epoch": 1.9101947084259643, + "grad_norm": 0.20871803164482117, + "learning_rate": 1.5532974079133339e-06, + "loss": 0.0855, + "num_input_tokens_seen": 52707296, + "step": 78190 + }, + { + "epoch": 1.9103168592578115, + "grad_norm": 1.4178715944290161, + "learning_rate": 1.5532263710443636e-06, + "loss": 0.0046, + "num_input_tokens_seen": 52710496, + "step": 78195 + }, + { + "epoch": 1.9104390100896587, + "grad_norm": 13.72677230834961, + "learning_rate": 1.5531553301521824e-06, + "loss": 0.0676, + "num_input_tokens_seen": 52713824, + "step": 78200 + }, + { + "epoch": 1.9105611609215059, + "grad_norm": 0.8392394781112671, + "learning_rate": 1.5530842852373063e-06, + "loss": 0.0933, + "num_input_tokens_seen": 52717088, + "step": 78205 + }, + { + "epoch": 1.910683311753353, + "grad_norm": 0.3146764039993286, + "learning_rate": 1.5530132363002528e-06, + "loss": 0.0364, + "num_input_tokens_seen": 52720928, + "step": 78210 + }, + { + "epoch": 1.9108054625852002, + "grad_norm": 0.3037707805633545, + "learning_rate": 1.5529421833415383e-06, + "loss": 0.0951, + "num_input_tokens_seen": 52724192, + "step": 78215 + }, + { + "epoch": 1.9109276134170474, + "grad_norm": 0.6358781456947327, + "learning_rate": 1.5528711263616795e-06, + "loss": 0.0037, + "num_input_tokens_seen": 52727904, + "step": 78220 + }, + { + "epoch": 1.9110497642488946, + "grad_norm": 0.17406722903251648, + "learning_rate": 1.5528000653611932e-06, + "loss": 0.0388, + "num_input_tokens_seen": 52730976, + "step": 78225 + }, + { + "epoch": 1.9111719150807418, + "grad_norm": 0.07249428331851959, + "learning_rate": 1.5527290003405961e-06, + "loss": 0.1138, + "num_input_tokens_seen": 52734176, + "step": 78230 + }, + { + "epoch": 1.911294065912589, + "grad_norm": 0.38793230056762695, + "learning_rate": 1.5526579313004053e-06, + "loss": 0.0956, + "num_input_tokens_seen": 52737312, + "step": 78235 + }, + { + "epoch": 1.9114162167444362, + "grad_norm": 0.16493968665599823, + "learning_rate": 1.552586858241137e-06, + "loss": 0.1298, + "num_input_tokens_seen": 52740576, + "step": 78240 + }, + { + "epoch": 1.9115383675762831, + "grad_norm": 0.046525854617357254, + "learning_rate": 1.5525157811633087e-06, + "loss": 0.1594, + "num_input_tokens_seen": 52743712, + "step": 78245 + }, + { + "epoch": 1.9116605184081303, + "grad_norm": 17.563119888305664, + "learning_rate": 1.552444700067437e-06, + "loss": 0.1102, + "num_input_tokens_seen": 52746720, + "step": 78250 + }, + { + "epoch": 1.9117826692399775, + "grad_norm": 1.223860502243042, + "learning_rate": 1.5523736149540388e-06, + "loss": 0.0974, + "num_input_tokens_seen": 52750048, + "step": 78255 + }, + { + "epoch": 1.9119048200718247, + "grad_norm": 0.09920724481344223, + "learning_rate": 1.5523025258236312e-06, + "loss": 0.0353, + "num_input_tokens_seen": 52753440, + "step": 78260 + }, + { + "epoch": 1.912026970903672, + "grad_norm": 101.08455657958984, + "learning_rate": 1.5522314326767309e-06, + "loss": 0.0222, + "num_input_tokens_seen": 52756896, + "step": 78265 + }, + { + "epoch": 1.9121491217355189, + "grad_norm": 8.766607284545898, + "learning_rate": 1.5521603355138552e-06, + "loss": 0.0442, + "num_input_tokens_seen": 52760736, + "step": 78270 + }, + { + "epoch": 1.912271272567366, + "grad_norm": 0.4223634600639343, + "learning_rate": 1.5520892343355208e-06, + "loss": 0.0553, + "num_input_tokens_seen": 52764768, + "step": 78275 + }, + { + "epoch": 1.9123934233992133, + "grad_norm": 16.334505081176758, + "learning_rate": 1.5520181291422454e-06, + "loss": 0.0684, + "num_input_tokens_seen": 52768096, + "step": 78280 + }, + { + "epoch": 1.9125155742310604, + "grad_norm": 1.246199131011963, + "learning_rate": 1.5519470199345455e-06, + "loss": 0.0949, + "num_input_tokens_seen": 52771616, + "step": 78285 + }, + { + "epoch": 1.9126377250629076, + "grad_norm": 24.813953399658203, + "learning_rate": 1.5518759067129383e-06, + "loss": 0.261, + "num_input_tokens_seen": 52774624, + "step": 78290 + }, + { + "epoch": 1.9127598758947548, + "grad_norm": 0.43296635150909424, + "learning_rate": 1.5518047894779413e-06, + "loss": 0.0024, + "num_input_tokens_seen": 52778336, + "step": 78295 + }, + { + "epoch": 1.912882026726602, + "grad_norm": 1.1578298807144165, + "learning_rate": 1.5517336682300711e-06, + "loss": 0.0392, + "num_input_tokens_seen": 52781664, + "step": 78300 + }, + { + "epoch": 1.9130041775584492, + "grad_norm": 98.7223892211914, + "learning_rate": 1.5516625429698455e-06, + "loss": 0.2153, + "num_input_tokens_seen": 52784800, + "step": 78305 + }, + { + "epoch": 1.9131263283902964, + "grad_norm": 36.56499099731445, + "learning_rate": 1.5515914136977815e-06, + "loss": 0.0306, + "num_input_tokens_seen": 52788448, + "step": 78310 + }, + { + "epoch": 1.9132484792221436, + "grad_norm": 6.932656764984131, + "learning_rate": 1.5515202804143964e-06, + "loss": 0.0718, + "num_input_tokens_seen": 52791648, + "step": 78315 + }, + { + "epoch": 1.9133706300539908, + "grad_norm": 0.1303882896900177, + "learning_rate": 1.5514491431202075e-06, + "loss": 0.0966, + "num_input_tokens_seen": 52795168, + "step": 78320 + }, + { + "epoch": 1.913492780885838, + "grad_norm": 0.4207264482975006, + "learning_rate": 1.5513780018157321e-06, + "loss": 0.0456, + "num_input_tokens_seen": 52798752, + "step": 78325 + }, + { + "epoch": 1.9136149317176852, + "grad_norm": 114.55884552001953, + "learning_rate": 1.5513068565014875e-06, + "loss": 0.0781, + "num_input_tokens_seen": 52802336, + "step": 78330 + }, + { + "epoch": 1.9137370825495321, + "grad_norm": 126.00515747070312, + "learning_rate": 1.5512357071779912e-06, + "loss": 0.2495, + "num_input_tokens_seen": 52805600, + "step": 78335 + }, + { + "epoch": 1.9138592333813793, + "grad_norm": 0.026732511818408966, + "learning_rate": 1.5511645538457604e-06, + "loss": 0.167, + "num_input_tokens_seen": 52808928, + "step": 78340 + }, + { + "epoch": 1.9139813842132265, + "grad_norm": 11.265633583068848, + "learning_rate": 1.551093396505313e-06, + "loss": 0.0669, + "num_input_tokens_seen": 52812064, + "step": 78345 + }, + { + "epoch": 1.9141035350450737, + "grad_norm": 1.563814401626587, + "learning_rate": 1.551022235157166e-06, + "loss": 0.0021, + "num_input_tokens_seen": 52815712, + "step": 78350 + }, + { + "epoch": 1.9142256858769207, + "grad_norm": 12.1010160446167, + "learning_rate": 1.550951069801837e-06, + "loss": 0.051, + "num_input_tokens_seen": 52819360, + "step": 78355 + }, + { + "epoch": 1.9143478367087678, + "grad_norm": 10.78503704071045, + "learning_rate": 1.550879900439844e-06, + "loss": 0.0383, + "num_input_tokens_seen": 52822688, + "step": 78360 + }, + { + "epoch": 1.914469987540615, + "grad_norm": 0.09379678964614868, + "learning_rate": 1.5508087270717041e-06, + "loss": 0.0459, + "num_input_tokens_seen": 52825952, + "step": 78365 + }, + { + "epoch": 1.9145921383724622, + "grad_norm": 84.47173309326172, + "learning_rate": 1.550737549697935e-06, + "loss": 0.1156, + "num_input_tokens_seen": 52829152, + "step": 78370 + }, + { + "epoch": 1.9147142892043094, + "grad_norm": 30.063278198242188, + "learning_rate": 1.550666368319054e-06, + "loss": 0.1214, + "num_input_tokens_seen": 52832480, + "step": 78375 + }, + { + "epoch": 1.9148364400361566, + "grad_norm": 0.1367904096841812, + "learning_rate": 1.5505951829355791e-06, + "loss": 0.1197, + "num_input_tokens_seen": 52835872, + "step": 78380 + }, + { + "epoch": 1.9149585908680038, + "grad_norm": 63.807098388671875, + "learning_rate": 1.5505239935480283e-06, + "loss": 0.1606, + "num_input_tokens_seen": 52839712, + "step": 78385 + }, + { + "epoch": 1.915080741699851, + "grad_norm": 0.2028699666261673, + "learning_rate": 1.550452800156919e-06, + "loss": 0.002, + "num_input_tokens_seen": 52843360, + "step": 78390 + }, + { + "epoch": 1.9152028925316982, + "grad_norm": 0.36074304580688477, + "learning_rate": 1.5503816027627684e-06, + "loss": 0.0617, + "num_input_tokens_seen": 52846304, + "step": 78395 + }, + { + "epoch": 1.9153250433635454, + "grad_norm": 0.44605323672294617, + "learning_rate": 1.5503104013660946e-06, + "loss": 0.174, + "num_input_tokens_seen": 52849696, + "step": 78400 + }, + { + "epoch": 1.9154471941953926, + "grad_norm": 0.6416441202163696, + "learning_rate": 1.550239195967416e-06, + "loss": 0.0981, + "num_input_tokens_seen": 52853024, + "step": 78405 + }, + { + "epoch": 1.9155693450272397, + "grad_norm": 20.85800552368164, + "learning_rate": 1.55016798656725e-06, + "loss": 0.3347, + "num_input_tokens_seen": 52856160, + "step": 78410 + }, + { + "epoch": 1.915691495859087, + "grad_norm": 0.6915989518165588, + "learning_rate": 1.5500967731661146e-06, + "loss": 0.0996, + "num_input_tokens_seen": 52859360, + "step": 78415 + }, + { + "epoch": 1.9158136466909341, + "grad_norm": 0.4440751373767853, + "learning_rate": 1.550025555764527e-06, + "loss": 0.0661, + "num_input_tokens_seen": 52862752, + "step": 78420 + }, + { + "epoch": 1.915935797522781, + "grad_norm": 0.34711381793022156, + "learning_rate": 1.5499543343630056e-06, + "loss": 0.0543, + "num_input_tokens_seen": 52866336, + "step": 78425 + }, + { + "epoch": 1.9160579483546283, + "grad_norm": 4.468153953552246, + "learning_rate": 1.5498831089620686e-06, + "loss": 0.0118, + "num_input_tokens_seen": 52869152, + "step": 78430 + }, + { + "epoch": 1.9161800991864755, + "grad_norm": 0.1526588648557663, + "learning_rate": 1.549811879562234e-06, + "loss": 0.0841, + "num_input_tokens_seen": 52872736, + "step": 78435 + }, + { + "epoch": 1.9163022500183227, + "grad_norm": 47.45873260498047, + "learning_rate": 1.549740646164019e-06, + "loss": 0.0809, + "num_input_tokens_seen": 52876256, + "step": 78440 + }, + { + "epoch": 1.9164244008501696, + "grad_norm": 1.7582581043243408, + "learning_rate": 1.5496694087679427e-06, + "loss": 0.2138, + "num_input_tokens_seen": 52879776, + "step": 78445 + }, + { + "epoch": 1.9165465516820168, + "grad_norm": 1.4794515371322632, + "learning_rate": 1.5495981673745222e-06, + "loss": 0.0528, + "num_input_tokens_seen": 52882848, + "step": 78450 + }, + { + "epoch": 1.916668702513864, + "grad_norm": 18.945890426635742, + "learning_rate": 1.549526921984276e-06, + "loss": 0.1183, + "num_input_tokens_seen": 52886304, + "step": 78455 + }, + { + "epoch": 1.9167908533457112, + "grad_norm": 102.38660430908203, + "learning_rate": 1.5494556725977224e-06, + "loss": 0.0998, + "num_input_tokens_seen": 52889952, + "step": 78460 + }, + { + "epoch": 1.9169130041775584, + "grad_norm": 0.22804000973701477, + "learning_rate": 1.5493844192153794e-06, + "loss": 0.0358, + "num_input_tokens_seen": 52893280, + "step": 78465 + }, + { + "epoch": 1.9170351550094056, + "grad_norm": 8.567605018615723, + "learning_rate": 1.549313161837765e-06, + "loss": 0.0706, + "num_input_tokens_seen": 52896608, + "step": 78470 + }, + { + "epoch": 1.9171573058412528, + "grad_norm": 0.02231750637292862, + "learning_rate": 1.5492419004653977e-06, + "loss": 0.1005, + "num_input_tokens_seen": 52899808, + "step": 78475 + }, + { + "epoch": 1.9172794566731, + "grad_norm": 0.3702198266983032, + "learning_rate": 1.5491706350987954e-06, + "loss": 0.0561, + "num_input_tokens_seen": 52903136, + "step": 78480 + }, + { + "epoch": 1.9174016075049471, + "grad_norm": 11.615361213684082, + "learning_rate": 1.5490993657384766e-06, + "loss": 0.1255, + "num_input_tokens_seen": 52906720, + "step": 78485 + }, + { + "epoch": 1.9175237583367943, + "grad_norm": 10.031021118164062, + "learning_rate": 1.5490280923849595e-06, + "loss": 0.1982, + "num_input_tokens_seen": 52910624, + "step": 78490 + }, + { + "epoch": 1.9176459091686415, + "grad_norm": 0.313969224691391, + "learning_rate": 1.5489568150387624e-06, + "loss": 0.0295, + "num_input_tokens_seen": 52914208, + "step": 78495 + }, + { + "epoch": 1.9177680600004887, + "grad_norm": 120.33798217773438, + "learning_rate": 1.5488855337004035e-06, + "loss": 0.1131, + "num_input_tokens_seen": 52917344, + "step": 78500 + }, + { + "epoch": 1.917890210832336, + "grad_norm": 1.7783594131469727, + "learning_rate": 1.548814248370402e-06, + "loss": 0.0533, + "num_input_tokens_seen": 52920928, + "step": 78505 + }, + { + "epoch": 1.918012361664183, + "grad_norm": 1.2943834066390991, + "learning_rate": 1.548742959049275e-06, + "loss": 0.151, + "num_input_tokens_seen": 52924704, + "step": 78510 + }, + { + "epoch": 1.91813451249603, + "grad_norm": 14.105642318725586, + "learning_rate": 1.548671665737542e-06, + "loss": 0.0807, + "num_input_tokens_seen": 52927712, + "step": 78515 + }, + { + "epoch": 1.9182566633278773, + "grad_norm": 0.6679994463920593, + "learning_rate": 1.5486003684357209e-06, + "loss": 0.16, + "num_input_tokens_seen": 52930976, + "step": 78520 + }, + { + "epoch": 1.9183788141597244, + "grad_norm": 38.2706413269043, + "learning_rate": 1.5485290671443306e-06, + "loss": 0.1026, + "num_input_tokens_seen": 52934496, + "step": 78525 + }, + { + "epoch": 1.9185009649915716, + "grad_norm": 328.0785827636719, + "learning_rate": 1.5484577618638892e-06, + "loss": 0.1407, + "num_input_tokens_seen": 52937952, + "step": 78530 + }, + { + "epoch": 1.9186231158234186, + "grad_norm": 20.970483779907227, + "learning_rate": 1.5483864525949156e-06, + "loss": 0.1558, + "num_input_tokens_seen": 52941280, + "step": 78535 + }, + { + "epoch": 1.9187452666552658, + "grad_norm": 2.9742214679718018, + "learning_rate": 1.5483151393379278e-06, + "loss": 0.0748, + "num_input_tokens_seen": 52944544, + "step": 78540 + }, + { + "epoch": 1.918867417487113, + "grad_norm": 0.6080707907676697, + "learning_rate": 1.5482438220934453e-06, + "loss": 0.1002, + "num_input_tokens_seen": 52948000, + "step": 78545 + }, + { + "epoch": 1.9189895683189602, + "grad_norm": 26.098880767822266, + "learning_rate": 1.5481725008619857e-06, + "loss": 0.1008, + "num_input_tokens_seen": 52951520, + "step": 78550 + }, + { + "epoch": 1.9191117191508074, + "grad_norm": 38.22494888305664, + "learning_rate": 1.5481011756440688e-06, + "loss": 0.0848, + "num_input_tokens_seen": 52954656, + "step": 78555 + }, + { + "epoch": 1.9192338699826546, + "grad_norm": 0.06840623915195465, + "learning_rate": 1.5480298464402127e-06, + "loss": 0.07, + "num_input_tokens_seen": 52958368, + "step": 78560 + }, + { + "epoch": 1.9193560208145017, + "grad_norm": 0.19684047996997833, + "learning_rate": 1.5479585132509358e-06, + "loss": 0.09, + "num_input_tokens_seen": 52961504, + "step": 78565 + }, + { + "epoch": 1.919478171646349, + "grad_norm": 0.4583105742931366, + "learning_rate": 1.5478871760767574e-06, + "loss": 0.0319, + "num_input_tokens_seen": 52964704, + "step": 78570 + }, + { + "epoch": 1.9196003224781961, + "grad_norm": 4.073686122894287, + "learning_rate": 1.5478158349181963e-06, + "loss": 0.0083, + "num_input_tokens_seen": 52967904, + "step": 78575 + }, + { + "epoch": 1.9197224733100433, + "grad_norm": 67.97811889648438, + "learning_rate": 1.5477444897757707e-06, + "loss": 0.055, + "num_input_tokens_seen": 52971360, + "step": 78580 + }, + { + "epoch": 1.9198446241418905, + "grad_norm": 0.246597558259964, + "learning_rate": 1.54767314065e-06, + "loss": 0.0767, + "num_input_tokens_seen": 52974624, + "step": 78585 + }, + { + "epoch": 1.9199667749737377, + "grad_norm": 226.3640594482422, + "learning_rate": 1.547601787541403e-06, + "loss": 0.0667, + "num_input_tokens_seen": 52977696, + "step": 78590 + }, + { + "epoch": 1.9200889258055849, + "grad_norm": 0.27767878770828247, + "learning_rate": 1.5475304304504983e-06, + "loss": 0.0052, + "num_input_tokens_seen": 52980960, + "step": 78595 + }, + { + "epoch": 1.9202110766374318, + "grad_norm": 0.8122377991676331, + "learning_rate": 1.5474590693778054e-06, + "loss": 0.0763, + "num_input_tokens_seen": 52984800, + "step": 78600 + }, + { + "epoch": 1.920333227469279, + "grad_norm": 0.2373126596212387, + "learning_rate": 1.5473877043238428e-06, + "loss": 0.0017, + "num_input_tokens_seen": 52987872, + "step": 78605 + }, + { + "epoch": 1.9204553783011262, + "grad_norm": 0.38778069615364075, + "learning_rate": 1.5473163352891295e-06, + "loss": 0.0367, + "num_input_tokens_seen": 52991008, + "step": 78610 + }, + { + "epoch": 1.9205775291329734, + "grad_norm": 0.26472291350364685, + "learning_rate": 1.5472449622741844e-06, + "loss": 0.0514, + "num_input_tokens_seen": 52994720, + "step": 78615 + }, + { + "epoch": 1.9206996799648206, + "grad_norm": 23.649662017822266, + "learning_rate": 1.547173585279527e-06, + "loss": 0.2303, + "num_input_tokens_seen": 52998176, + "step": 78620 + }, + { + "epoch": 1.9208218307966676, + "grad_norm": 30.59381675720215, + "learning_rate": 1.5471022043056761e-06, + "loss": 0.1555, + "num_input_tokens_seen": 53001568, + "step": 78625 + }, + { + "epoch": 1.9209439816285148, + "grad_norm": 0.15853320062160492, + "learning_rate": 1.5470308193531505e-06, + "loss": 0.0836, + "num_input_tokens_seen": 53004704, + "step": 78630 + }, + { + "epoch": 1.921066132460362, + "grad_norm": 0.23013024032115936, + "learning_rate": 1.54695943042247e-06, + "loss": 0.057, + "num_input_tokens_seen": 53007968, + "step": 78635 + }, + { + "epoch": 1.9211882832922091, + "grad_norm": 17.81313705444336, + "learning_rate": 1.5468880375141535e-06, + "loss": 0.0948, + "num_input_tokens_seen": 53011488, + "step": 78640 + }, + { + "epoch": 1.9213104341240563, + "grad_norm": 0.38524720072746277, + "learning_rate": 1.5468166406287197e-06, + "loss": 0.0349, + "num_input_tokens_seen": 53014944, + "step": 78645 + }, + { + "epoch": 1.9214325849559035, + "grad_norm": 0.1310257464647293, + "learning_rate": 1.5467452397666885e-06, + "loss": 0.05, + "num_input_tokens_seen": 53018208, + "step": 78650 + }, + { + "epoch": 1.9215547357877507, + "grad_norm": 1.4099310636520386, + "learning_rate": 1.5466738349285788e-06, + "loss": 0.0014, + "num_input_tokens_seen": 53021408, + "step": 78655 + }, + { + "epoch": 1.921676886619598, + "grad_norm": 0.2426673024892807, + "learning_rate": 1.54660242611491e-06, + "loss": 0.0496, + "num_input_tokens_seen": 53024672, + "step": 78660 + }, + { + "epoch": 1.921799037451445, + "grad_norm": 89.316162109375, + "learning_rate": 1.5465310133262014e-06, + "loss": 0.1131, + "num_input_tokens_seen": 53027936, + "step": 78665 + }, + { + "epoch": 1.9219211882832923, + "grad_norm": 20.908267974853516, + "learning_rate": 1.5464595965629719e-06, + "loss": 0.1438, + "num_input_tokens_seen": 53031648, + "step": 78670 + }, + { + "epoch": 1.9220433391151395, + "grad_norm": 0.5099788308143616, + "learning_rate": 1.5463881758257414e-06, + "loss": 0.0672, + "num_input_tokens_seen": 53034720, + "step": 78675 + }, + { + "epoch": 1.9221654899469867, + "grad_norm": 0.08424662798643112, + "learning_rate": 1.5463167511150292e-06, + "loss": 0.0052, + "num_input_tokens_seen": 53038432, + "step": 78680 + }, + { + "epoch": 1.9222876407788339, + "grad_norm": 326.7347717285156, + "learning_rate": 1.5462453224313547e-06, + "loss": 0.1965, + "num_input_tokens_seen": 53042080, + "step": 78685 + }, + { + "epoch": 1.9224097916106808, + "grad_norm": 14.694061279296875, + "learning_rate": 1.5461738897752371e-06, + "loss": 0.0696, + "num_input_tokens_seen": 53045280, + "step": 78690 + }, + { + "epoch": 1.922531942442528, + "grad_norm": 0.0032516128849238157, + "learning_rate": 1.5461024531471961e-06, + "loss": 0.0011, + "num_input_tokens_seen": 53048416, + "step": 78695 + }, + { + "epoch": 1.9226540932743752, + "grad_norm": 0.2199341207742691, + "learning_rate": 1.5460310125477516e-06, + "loss": 0.0433, + "num_input_tokens_seen": 53051104, + "step": 78700 + }, + { + "epoch": 1.9227762441062224, + "grad_norm": 0.2078741043806076, + "learning_rate": 1.5459595679774223e-06, + "loss": 0.1933, + "num_input_tokens_seen": 53054048, + "step": 78705 + }, + { + "epoch": 1.9228983949380696, + "grad_norm": 0.2788570821285248, + "learning_rate": 1.5458881194367282e-06, + "loss": 0.0407, + "num_input_tokens_seen": 53057568, + "step": 78710 + }, + { + "epoch": 1.9230205457699165, + "grad_norm": 22.922252655029297, + "learning_rate": 1.5458166669261888e-06, + "loss": 0.2197, + "num_input_tokens_seen": 53060512, + "step": 78715 + }, + { + "epoch": 1.9231426966017637, + "grad_norm": 36.99504852294922, + "learning_rate": 1.545745210446324e-06, + "loss": 0.1353, + "num_input_tokens_seen": 53063456, + "step": 78720 + }, + { + "epoch": 1.923264847433611, + "grad_norm": 52.599510192871094, + "learning_rate": 1.5456737499976532e-06, + "loss": 0.17, + "num_input_tokens_seen": 53067424, + "step": 78725 + }, + { + "epoch": 1.9233869982654581, + "grad_norm": 0.23235607147216797, + "learning_rate": 1.5456022855806961e-06, + "loss": 0.0516, + "num_input_tokens_seen": 53070944, + "step": 78730 + }, + { + "epoch": 1.9235091490973053, + "grad_norm": 0.22378988564014435, + "learning_rate": 1.5455308171959724e-06, + "loss": 0.0501, + "num_input_tokens_seen": 53074016, + "step": 78735 + }, + { + "epoch": 1.9236312999291525, + "grad_norm": 39.33481216430664, + "learning_rate": 1.5454593448440018e-06, + "loss": 0.1306, + "num_input_tokens_seen": 53077728, + "step": 78740 + }, + { + "epoch": 1.9237534507609997, + "grad_norm": 45.01771926879883, + "learning_rate": 1.5453878685253043e-06, + "loss": 0.062, + "num_input_tokens_seen": 53080928, + "step": 78745 + }, + { + "epoch": 1.9238756015928469, + "grad_norm": 0.009599471464753151, + "learning_rate": 1.5453163882403994e-06, + "loss": 0.0918, + "num_input_tokens_seen": 53084768, + "step": 78750 + }, + { + "epoch": 1.923997752424694, + "grad_norm": 0.0814778283238411, + "learning_rate": 1.5452449039898073e-06, + "loss": 0.0855, + "num_input_tokens_seen": 53088544, + "step": 78755 + }, + { + "epoch": 1.9241199032565413, + "grad_norm": 0.13089479506015778, + "learning_rate": 1.5451734157740471e-06, + "loss": 0.0711, + "num_input_tokens_seen": 53091872, + "step": 78760 + }, + { + "epoch": 1.9242420540883884, + "grad_norm": 0.35896846652030945, + "learning_rate": 1.5451019235936396e-06, + "loss": 0.0743, + "num_input_tokens_seen": 53095008, + "step": 78765 + }, + { + "epoch": 1.9243642049202356, + "grad_norm": 0.2049417793750763, + "learning_rate": 1.5450304274491043e-06, + "loss": 0.0544, + "num_input_tokens_seen": 53098592, + "step": 78770 + }, + { + "epoch": 1.9244863557520828, + "grad_norm": 0.10769752413034439, + "learning_rate": 1.5449589273409608e-06, + "loss": 0.0351, + "num_input_tokens_seen": 53101984, + "step": 78775 + }, + { + "epoch": 1.9246085065839298, + "grad_norm": 6.025005340576172, + "learning_rate": 1.5448874232697298e-06, + "loss": 0.1972, + "num_input_tokens_seen": 53105440, + "step": 78780 + }, + { + "epoch": 1.924730657415777, + "grad_norm": 0.20146672427654266, + "learning_rate": 1.5448159152359307e-06, + "loss": 0.1779, + "num_input_tokens_seen": 53108128, + "step": 78785 + }, + { + "epoch": 1.9248528082476242, + "grad_norm": 0.3935956656932831, + "learning_rate": 1.544744403240084e-06, + "loss": 0.0853, + "num_input_tokens_seen": 53111584, + "step": 78790 + }, + { + "epoch": 1.9249749590794714, + "grad_norm": 0.06383128464221954, + "learning_rate": 1.5446728872827091e-06, + "loss": 0.0016, + "num_input_tokens_seen": 53115168, + "step": 78795 + }, + { + "epoch": 1.9250971099113185, + "grad_norm": 0.9724140167236328, + "learning_rate": 1.5446013673643266e-06, + "loss": 0.0082, + "num_input_tokens_seen": 53118944, + "step": 78800 + }, + { + "epoch": 1.9252192607431655, + "grad_norm": 0.08090006560087204, + "learning_rate": 1.5445298434854563e-06, + "loss": 0.1288, + "num_input_tokens_seen": 53122144, + "step": 78805 + }, + { + "epoch": 1.9253414115750127, + "grad_norm": 0.15845705568790436, + "learning_rate": 1.5444583156466187e-06, + "loss": 0.1123, + "num_input_tokens_seen": 53125728, + "step": 78810 + }, + { + "epoch": 1.92546356240686, + "grad_norm": 0.0770653635263443, + "learning_rate": 1.544386783848334e-06, + "loss": 0.0863, + "num_input_tokens_seen": 53129696, + "step": 78815 + }, + { + "epoch": 1.925585713238707, + "grad_norm": 0.22629189491271973, + "learning_rate": 1.544315248091122e-06, + "loss": 0.0478, + "num_input_tokens_seen": 53132832, + "step": 78820 + }, + { + "epoch": 1.9257078640705543, + "grad_norm": 0.29965588450431824, + "learning_rate": 1.544243708375503e-06, + "loss": 0.1206, + "num_input_tokens_seen": 53136352, + "step": 78825 + }, + { + "epoch": 1.9258300149024015, + "grad_norm": 0.3292113244533539, + "learning_rate": 1.5441721647019974e-06, + "loss": 0.0627, + "num_input_tokens_seen": 53139936, + "step": 78830 + }, + { + "epoch": 1.9259521657342487, + "grad_norm": 14.793441772460938, + "learning_rate": 1.5441006170711255e-06, + "loss": 0.0372, + "num_input_tokens_seen": 53143328, + "step": 78835 + }, + { + "epoch": 1.9260743165660958, + "grad_norm": 0.19428370893001556, + "learning_rate": 1.5440290654834075e-06, + "loss": 0.0459, + "num_input_tokens_seen": 53146656, + "step": 78840 + }, + { + "epoch": 1.926196467397943, + "grad_norm": 21.550212860107422, + "learning_rate": 1.5439575099393639e-06, + "loss": 0.1206, + "num_input_tokens_seen": 53149920, + "step": 78845 + }, + { + "epoch": 1.9263186182297902, + "grad_norm": 0.11072822660207748, + "learning_rate": 1.543885950439515e-06, + "loss": 0.0809, + "num_input_tokens_seen": 53153312, + "step": 78850 + }, + { + "epoch": 1.9264407690616374, + "grad_norm": 41.060089111328125, + "learning_rate": 1.543814386984381e-06, + "loss": 0.0791, + "num_input_tokens_seen": 53156896, + "step": 78855 + }, + { + "epoch": 1.9265629198934846, + "grad_norm": 1.5634219646453857, + "learning_rate": 1.5437428195744829e-06, + "loss": 0.1982, + "num_input_tokens_seen": 53160288, + "step": 78860 + }, + { + "epoch": 1.9266850707253318, + "grad_norm": 0.5574802160263062, + "learning_rate": 1.5436712482103401e-06, + "loss": 0.1215, + "num_input_tokens_seen": 53163808, + "step": 78865 + }, + { + "epoch": 1.9268072215571788, + "grad_norm": 0.07435950636863708, + "learning_rate": 1.5435996728924744e-06, + "loss": 0.0611, + "num_input_tokens_seen": 53167136, + "step": 78870 + }, + { + "epoch": 1.926929372389026, + "grad_norm": 0.4004596471786499, + "learning_rate": 1.5435280936214055e-06, + "loss": 0.1073, + "num_input_tokens_seen": 53170528, + "step": 78875 + }, + { + "epoch": 1.9270515232208731, + "grad_norm": 0.15348993241786957, + "learning_rate": 1.543456510397654e-06, + "loss": 0.0009, + "num_input_tokens_seen": 53173472, + "step": 78880 + }, + { + "epoch": 1.9271736740527203, + "grad_norm": 17.38145637512207, + "learning_rate": 1.5433849232217407e-06, + "loss": 0.103, + "num_input_tokens_seen": 53177056, + "step": 78885 + }, + { + "epoch": 1.9272958248845673, + "grad_norm": 0.4298340678215027, + "learning_rate": 1.543313332094186e-06, + "loss": 0.0019, + "num_input_tokens_seen": 53180768, + "step": 78890 + }, + { + "epoch": 1.9274179757164145, + "grad_norm": 18.220590591430664, + "learning_rate": 1.5432417370155104e-06, + "loss": 0.0486, + "num_input_tokens_seen": 53183968, + "step": 78895 + }, + { + "epoch": 1.9275401265482617, + "grad_norm": 0.14308039844036102, + "learning_rate": 1.5431701379862353e-06, + "loss": 0.0708, + "num_input_tokens_seen": 53187296, + "step": 78900 + }, + { + "epoch": 1.9276622773801089, + "grad_norm": 1.517258644104004, + "learning_rate": 1.5430985350068804e-06, + "loss": 0.0107, + "num_input_tokens_seen": 53190752, + "step": 78905 + }, + { + "epoch": 1.927784428211956, + "grad_norm": 1.682701587677002, + "learning_rate": 1.543026928077967e-06, + "loss": 0.0949, + "num_input_tokens_seen": 53194400, + "step": 78910 + }, + { + "epoch": 1.9279065790438032, + "grad_norm": 0.18249306082725525, + "learning_rate": 1.5429553172000157e-06, + "loss": 0.1189, + "num_input_tokens_seen": 53197856, + "step": 78915 + }, + { + "epoch": 1.9280287298756504, + "grad_norm": 0.07325104624032974, + "learning_rate": 1.5428837023735475e-06, + "loss": 0.0013, + "num_input_tokens_seen": 53200928, + "step": 78920 + }, + { + "epoch": 1.9281508807074976, + "grad_norm": 0.027356114238500595, + "learning_rate": 1.5428120835990829e-06, + "loss": 0.0833, + "num_input_tokens_seen": 53204384, + "step": 78925 + }, + { + "epoch": 1.9282730315393448, + "grad_norm": 17.057506561279297, + "learning_rate": 1.5427404608771427e-06, + "loss": 0.1537, + "num_input_tokens_seen": 53207392, + "step": 78930 + }, + { + "epoch": 1.928395182371192, + "grad_norm": 0.018845299258828163, + "learning_rate": 1.542668834208248e-06, + "loss": 0.058, + "num_input_tokens_seen": 53210912, + "step": 78935 + }, + { + "epoch": 1.9285173332030392, + "grad_norm": 26.701641082763672, + "learning_rate": 1.5425972035929196e-06, + "loss": 0.0759, + "num_input_tokens_seen": 53214432, + "step": 78940 + }, + { + "epoch": 1.9286394840348864, + "grad_norm": 0.2233164757490158, + "learning_rate": 1.5425255690316783e-06, + "loss": 0.0417, + "num_input_tokens_seen": 53217760, + "step": 78945 + }, + { + "epoch": 1.9287616348667336, + "grad_norm": 10.752738952636719, + "learning_rate": 1.5424539305250452e-06, + "loss": 0.1116, + "num_input_tokens_seen": 53220896, + "step": 78950 + }, + { + "epoch": 1.9288837856985808, + "grad_norm": 13.85923957824707, + "learning_rate": 1.542382288073541e-06, + "loss": 0.2375, + "num_input_tokens_seen": 53224160, + "step": 78955 + }, + { + "epoch": 1.9290059365304277, + "grad_norm": 0.13441646099090576, + "learning_rate": 1.5423106416776873e-06, + "loss": 0.1627, + "num_input_tokens_seen": 53227360, + "step": 78960 + }, + { + "epoch": 1.929128087362275, + "grad_norm": 16.526451110839844, + "learning_rate": 1.5422389913380046e-06, + "loss": 0.0888, + "num_input_tokens_seen": 53230560, + "step": 78965 + }, + { + "epoch": 1.9292502381941221, + "grad_norm": 21.69394874572754, + "learning_rate": 1.5421673370550142e-06, + "loss": 0.0429, + "num_input_tokens_seen": 53233824, + "step": 78970 + }, + { + "epoch": 1.9293723890259693, + "grad_norm": 0.10255489498376846, + "learning_rate": 1.542095678829237e-06, + "loss": 0.0709, + "num_input_tokens_seen": 53237984, + "step": 78975 + }, + { + "epoch": 1.9294945398578163, + "grad_norm": 9.572564125061035, + "learning_rate": 1.5420240166611942e-06, + "loss": 0.264, + "num_input_tokens_seen": 53241312, + "step": 78980 + }, + { + "epoch": 1.9296166906896635, + "grad_norm": 17.74254608154297, + "learning_rate": 1.5419523505514068e-06, + "loss": 0.1324, + "num_input_tokens_seen": 53245024, + "step": 78985 + }, + { + "epoch": 1.9297388415215107, + "grad_norm": 30.855518341064453, + "learning_rate": 1.5418806805003964e-06, + "loss": 0.0684, + "num_input_tokens_seen": 53248480, + "step": 78990 + }, + { + "epoch": 1.9298609923533578, + "grad_norm": 22.330480575561523, + "learning_rate": 1.5418090065086838e-06, + "loss": 0.066, + "num_input_tokens_seen": 53251744, + "step": 78995 + }, + { + "epoch": 1.929983143185205, + "grad_norm": 0.38473808765411377, + "learning_rate": 1.5417373285767903e-06, + "loss": 0.0335, + "num_input_tokens_seen": 53255328, + "step": 79000 + }, + { + "epoch": 1.9301052940170522, + "grad_norm": 145.94451904296875, + "learning_rate": 1.5416656467052374e-06, + "loss": 0.0654, + "num_input_tokens_seen": 53258784, + "step": 79005 + }, + { + "epoch": 1.9302274448488994, + "grad_norm": 9.839098930358887, + "learning_rate": 1.5415939608945463e-06, + "loss": 0.1352, + "num_input_tokens_seen": 53262368, + "step": 79010 + }, + { + "epoch": 1.9303495956807466, + "grad_norm": 37.14972686767578, + "learning_rate": 1.5415222711452382e-06, + "loss": 0.093, + "num_input_tokens_seen": 53266144, + "step": 79015 + }, + { + "epoch": 1.9304717465125938, + "grad_norm": 0.1843390315771103, + "learning_rate": 1.5414505774578342e-06, + "loss": 0.0653, + "num_input_tokens_seen": 53269472, + "step": 79020 + }, + { + "epoch": 1.930593897344441, + "grad_norm": 75.80435180664062, + "learning_rate": 1.5413788798328563e-06, + "loss": 0.0632, + "num_input_tokens_seen": 53272864, + "step": 79025 + }, + { + "epoch": 1.9307160481762882, + "grad_norm": 16.962350845336914, + "learning_rate": 1.5413071782708254e-06, + "loss": 0.1579, + "num_input_tokens_seen": 53275680, + "step": 79030 + }, + { + "epoch": 1.9308381990081354, + "grad_norm": 0.4690840542316437, + "learning_rate": 1.5412354727722631e-06, + "loss": 0.0986, + "num_input_tokens_seen": 53279456, + "step": 79035 + }, + { + "epoch": 1.9309603498399825, + "grad_norm": 0.4627876281738281, + "learning_rate": 1.541163763337691e-06, + "loss": 0.0759, + "num_input_tokens_seen": 53282592, + "step": 79040 + }, + { + "epoch": 1.9310825006718297, + "grad_norm": 0.4975791871547699, + "learning_rate": 1.5410920499676303e-06, + "loss": 0.0426, + "num_input_tokens_seen": 53286240, + "step": 79045 + }, + { + "epoch": 1.9312046515036767, + "grad_norm": 0.12812969088554382, + "learning_rate": 1.5410203326626028e-06, + "loss": 0.0322, + "num_input_tokens_seen": 53289632, + "step": 79050 + }, + { + "epoch": 1.931326802335524, + "grad_norm": 22.060348510742188, + "learning_rate": 1.54094861142313e-06, + "loss": 0.0042, + "num_input_tokens_seen": 53292896, + "step": 79055 + }, + { + "epoch": 1.931448953167371, + "grad_norm": 54.32301330566406, + "learning_rate": 1.5408768862497332e-06, + "loss": 0.1168, + "num_input_tokens_seen": 53296480, + "step": 79060 + }, + { + "epoch": 1.9315711039992183, + "grad_norm": 22.543865203857422, + "learning_rate": 1.5408051571429344e-06, + "loss": 0.11, + "num_input_tokens_seen": 53299744, + "step": 79065 + }, + { + "epoch": 1.9316932548310652, + "grad_norm": 60.06733322143555, + "learning_rate": 1.540733424103255e-06, + "loss": 0.046, + "num_input_tokens_seen": 53302624, + "step": 79070 + }, + { + "epoch": 1.9318154056629124, + "grad_norm": 87.2182846069336, + "learning_rate": 1.5406616871312166e-06, + "loss": 0.0843, + "num_input_tokens_seen": 53305824, + "step": 79075 + }, + { + "epoch": 1.9319375564947596, + "grad_norm": 10.24471378326416, + "learning_rate": 1.540589946227341e-06, + "loss": 0.1187, + "num_input_tokens_seen": 53308704, + "step": 79080 + }, + { + "epoch": 1.9320597073266068, + "grad_norm": 0.9021111726760864, + "learning_rate": 1.5405182013921498e-06, + "loss": 0.0444, + "num_input_tokens_seen": 53311712, + "step": 79085 + }, + { + "epoch": 1.932181858158454, + "grad_norm": 0.08363750576972961, + "learning_rate": 1.5404464526261651e-06, + "loss": 0.175, + "num_input_tokens_seen": 53315104, + "step": 79090 + }, + { + "epoch": 1.9323040089903012, + "grad_norm": 0.9060919284820557, + "learning_rate": 1.5403746999299083e-06, + "loss": 0.0021, + "num_input_tokens_seen": 53317920, + "step": 79095 + }, + { + "epoch": 1.9324261598221484, + "grad_norm": 98.47150421142578, + "learning_rate": 1.540302943303901e-06, + "loss": 0.103, + "num_input_tokens_seen": 53321376, + "step": 79100 + }, + { + "epoch": 1.9325483106539956, + "grad_norm": 0.3717132806777954, + "learning_rate": 1.5402311827486663e-06, + "loss": 0.1041, + "num_input_tokens_seen": 53324448, + "step": 79105 + }, + { + "epoch": 1.9326704614858428, + "grad_norm": 8.272316932678223, + "learning_rate": 1.5401594182647241e-06, + "loss": 0.1032, + "num_input_tokens_seen": 53328032, + "step": 79110 + }, + { + "epoch": 1.93279261231769, + "grad_norm": 0.6474543809890747, + "learning_rate": 1.5400876498525978e-06, + "loss": 0.0018, + "num_input_tokens_seen": 53331552, + "step": 79115 + }, + { + "epoch": 1.9329147631495371, + "grad_norm": 0.18280108273029327, + "learning_rate": 1.540015877512809e-06, + "loss": 0.0707, + "num_input_tokens_seen": 53335456, + "step": 79120 + }, + { + "epoch": 1.9330369139813843, + "grad_norm": 1.0236232280731201, + "learning_rate": 1.5399441012458793e-06, + "loss": 0.1359, + "num_input_tokens_seen": 53338720, + "step": 79125 + }, + { + "epoch": 1.9331590648132315, + "grad_norm": 1.5323357582092285, + "learning_rate": 1.5398723210523313e-06, + "loss": 0.1184, + "num_input_tokens_seen": 53341920, + "step": 79130 + }, + { + "epoch": 1.9332812156450785, + "grad_norm": 0.17041514813899994, + "learning_rate": 1.5398005369326859e-06, + "loss": 0.035, + "num_input_tokens_seen": 53345568, + "step": 79135 + }, + { + "epoch": 1.9334033664769257, + "grad_norm": 1.063998818397522, + "learning_rate": 1.5397287488874662e-06, + "loss": 0.2139, + "num_input_tokens_seen": 53348768, + "step": 79140 + }, + { + "epoch": 1.9335255173087729, + "grad_norm": 0.6377407908439636, + "learning_rate": 1.5396569569171935e-06, + "loss": 0.088, + "num_input_tokens_seen": 53352224, + "step": 79145 + }, + { + "epoch": 1.93364766814062, + "grad_norm": 0.3253001570701599, + "learning_rate": 1.5395851610223906e-06, + "loss": 0.0501, + "num_input_tokens_seen": 53355104, + "step": 79150 + }, + { + "epoch": 1.9337698189724672, + "grad_norm": 9.995173454284668, + "learning_rate": 1.5395133612035794e-06, + "loss": 0.1531, + "num_input_tokens_seen": 53358304, + "step": 79155 + }, + { + "epoch": 1.9338919698043142, + "grad_norm": 0.03718879073858261, + "learning_rate": 1.5394415574612816e-06, + "loss": 0.0222, + "num_input_tokens_seen": 53362016, + "step": 79160 + }, + { + "epoch": 1.9340141206361614, + "grad_norm": 1.7763270139694214, + "learning_rate": 1.5393697497960196e-06, + "loss": 0.0414, + "num_input_tokens_seen": 53365536, + "step": 79165 + }, + { + "epoch": 1.9341362714680086, + "grad_norm": 14.749547958374023, + "learning_rate": 1.5392979382083163e-06, + "loss": 0.2279, + "num_input_tokens_seen": 53369440, + "step": 79170 + }, + { + "epoch": 1.9342584222998558, + "grad_norm": 28.31888771057129, + "learning_rate": 1.5392261226986926e-06, + "loss": 0.0723, + "num_input_tokens_seen": 53372576, + "step": 79175 + }, + { + "epoch": 1.934380573131703, + "grad_norm": 0.3445660173892975, + "learning_rate": 1.5391543032676721e-06, + "loss": 0.2502, + "num_input_tokens_seen": 53375584, + "step": 79180 + }, + { + "epoch": 1.9345027239635502, + "grad_norm": 0.6460484862327576, + "learning_rate": 1.5390824799157763e-06, + "loss": 0.0783, + "num_input_tokens_seen": 53378528, + "step": 79185 + }, + { + "epoch": 1.9346248747953974, + "grad_norm": 0.07000091671943665, + "learning_rate": 1.5390106526435277e-06, + "loss": 0.0312, + "num_input_tokens_seen": 53381984, + "step": 79190 + }, + { + "epoch": 1.9347470256272445, + "grad_norm": 51.154441833496094, + "learning_rate": 1.5389388214514485e-06, + "loss": 0.0512, + "num_input_tokens_seen": 53385440, + "step": 79195 + }, + { + "epoch": 1.9348691764590917, + "grad_norm": 7.436317443847656, + "learning_rate": 1.5388669863400614e-06, + "loss": 0.0864, + "num_input_tokens_seen": 53388896, + "step": 79200 + }, + { + "epoch": 1.934991327290939, + "grad_norm": 0.16629944741725922, + "learning_rate": 1.5387951473098883e-06, + "loss": 0.036, + "num_input_tokens_seen": 53392288, + "step": 79205 + }, + { + "epoch": 1.9351134781227861, + "grad_norm": 0.2667045593261719, + "learning_rate": 1.5387233043614525e-06, + "loss": 0.2508, + "num_input_tokens_seen": 53395936, + "step": 79210 + }, + { + "epoch": 1.9352356289546333, + "grad_norm": 0.6180437207221985, + "learning_rate": 1.5386514574952756e-06, + "loss": 0.0236, + "num_input_tokens_seen": 53399200, + "step": 79215 + }, + { + "epoch": 1.9353577797864805, + "grad_norm": 22.34016990661621, + "learning_rate": 1.5385796067118805e-06, + "loss": 0.0322, + "num_input_tokens_seen": 53402784, + "step": 79220 + }, + { + "epoch": 1.9354799306183275, + "grad_norm": 17.962881088256836, + "learning_rate": 1.5385077520117898e-06, + "loss": 0.1729, + "num_input_tokens_seen": 53406624, + "step": 79225 + }, + { + "epoch": 1.9356020814501746, + "grad_norm": 14.11638069152832, + "learning_rate": 1.5384358933955257e-06, + "loss": 0.2673, + "num_input_tokens_seen": 53409824, + "step": 79230 + }, + { + "epoch": 1.9357242322820218, + "grad_norm": 0.1589454859495163, + "learning_rate": 1.5383640308636108e-06, + "loss": 0.0014, + "num_input_tokens_seen": 53413280, + "step": 79235 + }, + { + "epoch": 1.935846383113869, + "grad_norm": 113.4941177368164, + "learning_rate": 1.5382921644165682e-06, + "loss": 0.0711, + "num_input_tokens_seen": 53417312, + "step": 79240 + }, + { + "epoch": 1.9359685339457162, + "grad_norm": 9.330788612365723, + "learning_rate": 1.53822029405492e-06, + "loss": 0.0745, + "num_input_tokens_seen": 53420832, + "step": 79245 + }, + { + "epoch": 1.9360906847775632, + "grad_norm": 0.9437325596809387, + "learning_rate": 1.5381484197791891e-06, + "loss": 0.0018, + "num_input_tokens_seen": 53423840, + "step": 79250 + }, + { + "epoch": 1.9362128356094104, + "grad_norm": 0.37375345826148987, + "learning_rate": 1.5380765415898984e-06, + "loss": 0.1126, + "num_input_tokens_seen": 53426976, + "step": 79255 + }, + { + "epoch": 1.9363349864412576, + "grad_norm": 0.38319942355155945, + "learning_rate": 1.53800465948757e-06, + "loss": 0.0283, + "num_input_tokens_seen": 53430176, + "step": 79260 + }, + { + "epoch": 1.9364571372731048, + "grad_norm": 6.067543029785156, + "learning_rate": 1.537932773472727e-06, + "loss": 0.0509, + "num_input_tokens_seen": 53433824, + "step": 79265 + }, + { + "epoch": 1.936579288104952, + "grad_norm": 44.16752624511719, + "learning_rate": 1.5378608835458922e-06, + "loss": 0.0054, + "num_input_tokens_seen": 53437024, + "step": 79270 + }, + { + "epoch": 1.9367014389367991, + "grad_norm": 0.036722905933856964, + "learning_rate": 1.5377889897075886e-06, + "loss": 0.0606, + "num_input_tokens_seen": 53440224, + "step": 79275 + }, + { + "epoch": 1.9368235897686463, + "grad_norm": 0.9072578549385071, + "learning_rate": 1.537717091958339e-06, + "loss": 0.0886, + "num_input_tokens_seen": 53443808, + "step": 79280 + }, + { + "epoch": 1.9369457406004935, + "grad_norm": 0.0430944450199604, + "learning_rate": 1.5376451902986659e-06, + "loss": 0.1185, + "num_input_tokens_seen": 53446880, + "step": 79285 + }, + { + "epoch": 1.9370678914323407, + "grad_norm": 11.946255683898926, + "learning_rate": 1.5375732847290923e-06, + "loss": 0.0366, + "num_input_tokens_seen": 53450464, + "step": 79290 + }, + { + "epoch": 1.937190042264188, + "grad_norm": 0.02077353373169899, + "learning_rate": 1.5375013752501412e-06, + "loss": 0.0236, + "num_input_tokens_seen": 53453792, + "step": 79295 + }, + { + "epoch": 1.937312193096035, + "grad_norm": 13.418227195739746, + "learning_rate": 1.5374294618623354e-06, + "loss": 0.0492, + "num_input_tokens_seen": 53457120, + "step": 79300 + }, + { + "epoch": 1.9374343439278823, + "grad_norm": 0.3700249493122101, + "learning_rate": 1.537357544566198e-06, + "loss": 0.043, + "num_input_tokens_seen": 53460448, + "step": 79305 + }, + { + "epoch": 1.9375564947597295, + "grad_norm": 0.8246507048606873, + "learning_rate": 1.537285623362252e-06, + "loss": 0.0276, + "num_input_tokens_seen": 53463776, + "step": 79310 + }, + { + "epoch": 1.9376786455915764, + "grad_norm": 13.733869552612305, + "learning_rate": 1.5372136982510203e-06, + "loss": 0.0755, + "num_input_tokens_seen": 53467232, + "step": 79315 + }, + { + "epoch": 1.9378007964234236, + "grad_norm": 0.0676179900765419, + "learning_rate": 1.5371417692330267e-06, + "loss": 0.0909, + "num_input_tokens_seen": 53470752, + "step": 79320 + }, + { + "epoch": 1.9379229472552708, + "grad_norm": 226.75962829589844, + "learning_rate": 1.537069836308793e-06, + "loss": 0.0852, + "num_input_tokens_seen": 53473824, + "step": 79325 + }, + { + "epoch": 1.938045098087118, + "grad_norm": 9.021869659423828, + "learning_rate": 1.5369978994788436e-06, + "loss": 0.1114, + "num_input_tokens_seen": 53476512, + "step": 79330 + }, + { + "epoch": 1.9381672489189652, + "grad_norm": 14.622756004333496, + "learning_rate": 1.5369259587437006e-06, + "loss": 0.0344, + "num_input_tokens_seen": 53479520, + "step": 79335 + }, + { + "epoch": 1.9382893997508122, + "grad_norm": 9.884684562683105, + "learning_rate": 1.5368540141038876e-06, + "loss": 0.1046, + "num_input_tokens_seen": 53482656, + "step": 79340 + }, + { + "epoch": 1.9384115505826593, + "grad_norm": 0.8400244116783142, + "learning_rate": 1.5367820655599283e-06, + "loss": 0.0904, + "num_input_tokens_seen": 53486240, + "step": 79345 + }, + { + "epoch": 1.9385337014145065, + "grad_norm": 113.14087677001953, + "learning_rate": 1.536710113112345e-06, + "loss": 0.1344, + "num_input_tokens_seen": 53489760, + "step": 79350 + }, + { + "epoch": 1.9386558522463537, + "grad_norm": 0.09375862032175064, + "learning_rate": 1.5366381567616615e-06, + "loss": 0.2347, + "num_input_tokens_seen": 53492960, + "step": 79355 + }, + { + "epoch": 1.938778003078201, + "grad_norm": 14.247235298156738, + "learning_rate": 1.5365661965084008e-06, + "loss": 0.0645, + "num_input_tokens_seen": 53496224, + "step": 79360 + }, + { + "epoch": 1.938900153910048, + "grad_norm": 17.835500717163086, + "learning_rate": 1.5364942323530868e-06, + "loss": 0.1529, + "num_input_tokens_seen": 53499168, + "step": 79365 + }, + { + "epoch": 1.9390223047418953, + "grad_norm": 0.8982129693031311, + "learning_rate": 1.536422264296242e-06, + "loss": 0.1296, + "num_input_tokens_seen": 53502176, + "step": 79370 + }, + { + "epoch": 1.9391444555737425, + "grad_norm": 0.0543377548456192, + "learning_rate": 1.5363502923383906e-06, + "loss": 0.1098, + "num_input_tokens_seen": 53505120, + "step": 79375 + }, + { + "epoch": 1.9392666064055897, + "grad_norm": 0.25972822308540344, + "learning_rate": 1.5362783164800554e-06, + "loss": 0.0626, + "num_input_tokens_seen": 53508256, + "step": 79380 + }, + { + "epoch": 1.9393887572374369, + "grad_norm": 0.461689293384552, + "learning_rate": 1.5362063367217603e-06, + "loss": 0.0417, + "num_input_tokens_seen": 53511648, + "step": 79385 + }, + { + "epoch": 1.939510908069284, + "grad_norm": 1.3230669498443604, + "learning_rate": 1.5361343530640283e-06, + "loss": 0.1514, + "num_input_tokens_seen": 53514656, + "step": 79390 + }, + { + "epoch": 1.9396330589011312, + "grad_norm": 0.2136191427707672, + "learning_rate": 1.536062365507383e-06, + "loss": 0.0348, + "num_input_tokens_seen": 53517664, + "step": 79395 + }, + { + "epoch": 1.9397552097329784, + "grad_norm": 1.2831095457077026, + "learning_rate": 1.5359903740523481e-06, + "loss": 0.1085, + "num_input_tokens_seen": 53520992, + "step": 79400 + }, + { + "epoch": 1.9398773605648254, + "grad_norm": 0.11328689008951187, + "learning_rate": 1.535918378699447e-06, + "loss": 0.1096, + "num_input_tokens_seen": 53524000, + "step": 79405 + }, + { + "epoch": 1.9399995113966726, + "grad_norm": 0.14550715684890747, + "learning_rate": 1.5358463794492034e-06, + "loss": 0.1842, + "num_input_tokens_seen": 53527200, + "step": 79410 + }, + { + "epoch": 1.9401216622285198, + "grad_norm": 2.1023430824279785, + "learning_rate": 1.5357743763021407e-06, + "loss": 0.0709, + "num_input_tokens_seen": 53530784, + "step": 79415 + }, + { + "epoch": 1.940243813060367, + "grad_norm": 0.47813543677330017, + "learning_rate": 1.5357023692587827e-06, + "loss": 0.0449, + "num_input_tokens_seen": 53534048, + "step": 79420 + }, + { + "epoch": 1.940365963892214, + "grad_norm": 0.7654673457145691, + "learning_rate": 1.5356303583196528e-06, + "loss": 0.0918, + "num_input_tokens_seen": 53537440, + "step": 79425 + }, + { + "epoch": 1.9404881147240611, + "grad_norm": 0.6435978412628174, + "learning_rate": 1.5355583434852749e-06, + "loss": 0.068, + "num_input_tokens_seen": 53540064, + "step": 79430 + }, + { + "epoch": 1.9406102655559083, + "grad_norm": 16.76521873474121, + "learning_rate": 1.535486324756173e-06, + "loss": 0.0767, + "num_input_tokens_seen": 53543648, + "step": 79435 + }, + { + "epoch": 1.9407324163877555, + "grad_norm": 0.1286541223526001, + "learning_rate": 1.5354143021328704e-06, + "loss": 0.0317, + "num_input_tokens_seen": 53547488, + "step": 79440 + }, + { + "epoch": 1.9408545672196027, + "grad_norm": 16.738767623901367, + "learning_rate": 1.5353422756158909e-06, + "loss": 0.0711, + "num_input_tokens_seen": 53551136, + "step": 79445 + }, + { + "epoch": 1.94097671805145, + "grad_norm": 97.97506713867188, + "learning_rate": 1.5352702452057584e-06, + "loss": 0.0091, + "num_input_tokens_seen": 53554144, + "step": 79450 + }, + { + "epoch": 1.941098868883297, + "grad_norm": 13.813468933105469, + "learning_rate": 1.5351982109029964e-06, + "loss": 0.0815, + "num_input_tokens_seen": 53557280, + "step": 79455 + }, + { + "epoch": 1.9412210197151443, + "grad_norm": 0.16498295962810516, + "learning_rate": 1.5351261727081295e-06, + "loss": 0.0037, + "num_input_tokens_seen": 53560288, + "step": 79460 + }, + { + "epoch": 1.9413431705469915, + "grad_norm": 27.545860290527344, + "learning_rate": 1.5350541306216809e-06, + "loss": 0.1259, + "num_input_tokens_seen": 53563808, + "step": 79465 + }, + { + "epoch": 1.9414653213788386, + "grad_norm": 0.7710163593292236, + "learning_rate": 1.5349820846441748e-06, + "loss": 0.001, + "num_input_tokens_seen": 53567392, + "step": 79470 + }, + { + "epoch": 1.9415874722106858, + "grad_norm": 56.430171966552734, + "learning_rate": 1.5349100347761353e-06, + "loss": 0.1426, + "num_input_tokens_seen": 53570400, + "step": 79475 + }, + { + "epoch": 1.941709623042533, + "grad_norm": 43.03308868408203, + "learning_rate": 1.5348379810180858e-06, + "loss": 0.0804, + "num_input_tokens_seen": 53573792, + "step": 79480 + }, + { + "epoch": 1.9418317738743802, + "grad_norm": 0.030352571979165077, + "learning_rate": 1.5347659233705507e-06, + "loss": 0.0546, + "num_input_tokens_seen": 53576992, + "step": 79485 + }, + { + "epoch": 1.9419539247062274, + "grad_norm": 0.024639304727315903, + "learning_rate": 1.534693861834054e-06, + "loss": 0.1037, + "num_input_tokens_seen": 53580256, + "step": 79490 + }, + { + "epoch": 1.9420760755380744, + "grad_norm": 0.3017178177833557, + "learning_rate": 1.5346217964091198e-06, + "loss": 0.0331, + "num_input_tokens_seen": 53584224, + "step": 79495 + }, + { + "epoch": 1.9421982263699216, + "grad_norm": 0.5502581596374512, + "learning_rate": 1.5345497270962724e-06, + "loss": 0.0263, + "num_input_tokens_seen": 53587744, + "step": 79500 + }, + { + "epoch": 1.9423203772017688, + "grad_norm": 31.735069274902344, + "learning_rate": 1.5344776538960353e-06, + "loss": 0.0533, + "num_input_tokens_seen": 53590880, + "step": 79505 + }, + { + "epoch": 1.942442528033616, + "grad_norm": 0.3141069710254669, + "learning_rate": 1.534405576808933e-06, + "loss": 0.144, + "num_input_tokens_seen": 53594208, + "step": 79510 + }, + { + "epoch": 1.942564678865463, + "grad_norm": 0.16800308227539062, + "learning_rate": 1.5343334958354893e-06, + "loss": 0.0418, + "num_input_tokens_seen": 53597792, + "step": 79515 + }, + { + "epoch": 1.94268682969731, + "grad_norm": 0.10520976036787033, + "learning_rate": 1.534261410976229e-06, + "loss": 0.0889, + "num_input_tokens_seen": 53600864, + "step": 79520 + }, + { + "epoch": 1.9428089805291573, + "grad_norm": 0.51318359375, + "learning_rate": 1.5341893222316759e-06, + "loss": 0.1442, + "num_input_tokens_seen": 53604320, + "step": 79525 + }, + { + "epoch": 1.9429311313610045, + "grad_norm": 0.11362060904502869, + "learning_rate": 1.5341172296023545e-06, + "loss": 0.0782, + "num_input_tokens_seen": 53607776, + "step": 79530 + }, + { + "epoch": 1.9430532821928517, + "grad_norm": 13.606107711791992, + "learning_rate": 1.5340451330887891e-06, + "loss": 0.0684, + "num_input_tokens_seen": 53611552, + "step": 79535 + }, + { + "epoch": 1.9431754330246989, + "grad_norm": 0.14235706627368927, + "learning_rate": 1.5339730326915038e-06, + "loss": 0.1107, + "num_input_tokens_seen": 53615648, + "step": 79540 + }, + { + "epoch": 1.943297583856546, + "grad_norm": 0.2929399907588959, + "learning_rate": 1.5339009284110228e-06, + "loss": 0.1097, + "num_input_tokens_seen": 53618848, + "step": 79545 + }, + { + "epoch": 1.9434197346883932, + "grad_norm": 0.07972901314496994, + "learning_rate": 1.5338288202478706e-06, + "loss": 0.0017, + "num_input_tokens_seen": 53622432, + "step": 79550 + }, + { + "epoch": 1.9435418855202404, + "grad_norm": 1.1873873472213745, + "learning_rate": 1.5337567082025714e-06, + "loss": 0.0393, + "num_input_tokens_seen": 53626016, + "step": 79555 + }, + { + "epoch": 1.9436640363520876, + "grad_norm": 40.611732482910156, + "learning_rate": 1.5336845922756502e-06, + "loss": 0.1167, + "num_input_tokens_seen": 53629792, + "step": 79560 + }, + { + "epoch": 1.9437861871839348, + "grad_norm": 0.14855530858039856, + "learning_rate": 1.5336124724676314e-06, + "loss": 0.012, + "num_input_tokens_seen": 53632736, + "step": 79565 + }, + { + "epoch": 1.943908338015782, + "grad_norm": 0.19300849735736847, + "learning_rate": 1.533540348779039e-06, + "loss": 0.0345, + "num_input_tokens_seen": 53636192, + "step": 79570 + }, + { + "epoch": 1.9440304888476292, + "grad_norm": 0.4298967123031616, + "learning_rate": 1.5334682212103973e-06, + "loss": 0.0412, + "num_input_tokens_seen": 53639968, + "step": 79575 + }, + { + "epoch": 1.9441526396794764, + "grad_norm": 8.890560150146484, + "learning_rate": 1.5333960897622313e-06, + "loss": 0.0968, + "num_input_tokens_seen": 53643104, + "step": 79580 + }, + { + "epoch": 1.9442747905113233, + "grad_norm": 0.10859589278697968, + "learning_rate": 1.5333239544350656e-06, + "loss": 0.0289, + "num_input_tokens_seen": 53645856, + "step": 79585 + }, + { + "epoch": 1.9443969413431705, + "grad_norm": 0.24548016488552094, + "learning_rate": 1.533251815229425e-06, + "loss": 0.0682, + "num_input_tokens_seen": 53649696, + "step": 79590 + }, + { + "epoch": 1.9445190921750177, + "grad_norm": 12.39043140411377, + "learning_rate": 1.5331796721458332e-06, + "loss": 0.2259, + "num_input_tokens_seen": 53653152, + "step": 79595 + }, + { + "epoch": 1.944641243006865, + "grad_norm": 0.2160811573266983, + "learning_rate": 1.5331075251848159e-06, + "loss": 0.1408, + "num_input_tokens_seen": 53656672, + "step": 79600 + }, + { + "epoch": 1.9447633938387119, + "grad_norm": 32.371551513671875, + "learning_rate": 1.5330353743468968e-06, + "loss": 0.1304, + "num_input_tokens_seen": 53659744, + "step": 79605 + }, + { + "epoch": 1.944885544670559, + "grad_norm": 0.23671525716781616, + "learning_rate": 1.5329632196326015e-06, + "loss": 0.0999, + "num_input_tokens_seen": 53663136, + "step": 79610 + }, + { + "epoch": 1.9450076955024063, + "grad_norm": 0.14334982633590698, + "learning_rate": 1.532891061042454e-06, + "loss": 0.0075, + "num_input_tokens_seen": 53666208, + "step": 79615 + }, + { + "epoch": 1.9451298463342535, + "grad_norm": 12.645910263061523, + "learning_rate": 1.5328188985769795e-06, + "loss": 0.0496, + "num_input_tokens_seen": 53669792, + "step": 79620 + }, + { + "epoch": 1.9452519971661006, + "grad_norm": 80.78424835205078, + "learning_rate": 1.5327467322367028e-06, + "loss": 0.0075, + "num_input_tokens_seen": 53673312, + "step": 79625 + }, + { + "epoch": 1.9453741479979478, + "grad_norm": 0.27641037106513977, + "learning_rate": 1.5326745620221484e-06, + "loss": 0.0013, + "num_input_tokens_seen": 53676832, + "step": 79630 + }, + { + "epoch": 1.945496298829795, + "grad_norm": 0.14950011670589447, + "learning_rate": 1.5326023879338411e-06, + "loss": 0.1459, + "num_input_tokens_seen": 53679968, + "step": 79635 + }, + { + "epoch": 1.9456184496616422, + "grad_norm": 2.9096691608428955, + "learning_rate": 1.5325302099723065e-06, + "loss": 0.1007, + "num_input_tokens_seen": 53683488, + "step": 79640 + }, + { + "epoch": 1.9457406004934894, + "grad_norm": 0.5228842496871948, + "learning_rate": 1.5324580281380689e-06, + "loss": 0.079, + "num_input_tokens_seen": 53686496, + "step": 79645 + }, + { + "epoch": 1.9458627513253366, + "grad_norm": 27.07952308654785, + "learning_rate": 1.5323858424316529e-06, + "loss": 0.0798, + "num_input_tokens_seen": 53690592, + "step": 79650 + }, + { + "epoch": 1.9459849021571838, + "grad_norm": 2.3000648021698, + "learning_rate": 1.5323136528535842e-06, + "loss": 0.0389, + "num_input_tokens_seen": 53693920, + "step": 79655 + }, + { + "epoch": 1.946107052989031, + "grad_norm": 42.30258560180664, + "learning_rate": 1.5322414594043874e-06, + "loss": 0.2223, + "num_input_tokens_seen": 53697376, + "step": 79660 + }, + { + "epoch": 1.9462292038208782, + "grad_norm": 0.7183613181114197, + "learning_rate": 1.5321692620845875e-06, + "loss": 0.1392, + "num_input_tokens_seen": 53700768, + "step": 79665 + }, + { + "epoch": 1.9463513546527251, + "grad_norm": 2.183073043823242, + "learning_rate": 1.5320970608947093e-06, + "loss": 0.1499, + "num_input_tokens_seen": 53704032, + "step": 79670 + }, + { + "epoch": 1.9464735054845723, + "grad_norm": 6.9765777587890625, + "learning_rate": 1.5320248558352784e-06, + "loss": 0.1676, + "num_input_tokens_seen": 53707360, + "step": 79675 + }, + { + "epoch": 1.9465956563164195, + "grad_norm": 18.949779510498047, + "learning_rate": 1.5319526469068196e-06, + "loss": 0.0718, + "num_input_tokens_seen": 53710560, + "step": 79680 + }, + { + "epoch": 1.9467178071482667, + "grad_norm": 33.51317596435547, + "learning_rate": 1.5318804341098583e-06, + "loss": 0.1137, + "num_input_tokens_seen": 53713504, + "step": 79685 + }, + { + "epoch": 1.9468399579801139, + "grad_norm": 0.09625236690044403, + "learning_rate": 1.5318082174449192e-06, + "loss": 0.1201, + "num_input_tokens_seen": 53716704, + "step": 79690 + }, + { + "epoch": 1.9469621088119609, + "grad_norm": 1.1682844161987305, + "learning_rate": 1.5317359969125279e-06, + "loss": 0.0778, + "num_input_tokens_seen": 53720480, + "step": 79695 + }, + { + "epoch": 1.947084259643808, + "grad_norm": 21.76727294921875, + "learning_rate": 1.5316637725132094e-06, + "loss": 0.1615, + "num_input_tokens_seen": 53723616, + "step": 79700 + }, + { + "epoch": 1.9472064104756552, + "grad_norm": 0.10677886009216309, + "learning_rate": 1.5315915442474887e-06, + "loss": 0.0031, + "num_input_tokens_seen": 53726816, + "step": 79705 + }, + { + "epoch": 1.9473285613075024, + "grad_norm": 31.727231979370117, + "learning_rate": 1.5315193121158915e-06, + "loss": 0.1203, + "num_input_tokens_seen": 53730080, + "step": 79710 + }, + { + "epoch": 1.9474507121393496, + "grad_norm": 0.15762321650981903, + "learning_rate": 1.5314470761189429e-06, + "loss": 0.0737, + "num_input_tokens_seen": 53733088, + "step": 79715 + }, + { + "epoch": 1.9475728629711968, + "grad_norm": 12.666200637817383, + "learning_rate": 1.5313748362571681e-06, + "loss": 0.2403, + "num_input_tokens_seen": 53736544, + "step": 79720 + }, + { + "epoch": 1.947695013803044, + "grad_norm": 8.358912467956543, + "learning_rate": 1.5313025925310928e-06, + "loss": 0.0776, + "num_input_tokens_seen": 53739680, + "step": 79725 + }, + { + "epoch": 1.9478171646348912, + "grad_norm": 15.929183959960938, + "learning_rate": 1.5312303449412419e-06, + "loss": 0.0806, + "num_input_tokens_seen": 53743072, + "step": 79730 + }, + { + "epoch": 1.9479393154667384, + "grad_norm": 0.23921556770801544, + "learning_rate": 1.531158093488141e-06, + "loss": 0.0024, + "num_input_tokens_seen": 53746144, + "step": 79735 + }, + { + "epoch": 1.9480614662985856, + "grad_norm": 34.633052825927734, + "learning_rate": 1.5310858381723154e-06, + "loss": 0.0449, + "num_input_tokens_seen": 53749344, + "step": 79740 + }, + { + "epoch": 1.9481836171304328, + "grad_norm": 0.1212652325630188, + "learning_rate": 1.5310135789942915e-06, + "loss": 0.0016, + "num_input_tokens_seen": 53752928, + "step": 79745 + }, + { + "epoch": 1.94830576796228, + "grad_norm": 0.3784651756286621, + "learning_rate": 1.5309413159545935e-06, + "loss": 0.0685, + "num_input_tokens_seen": 53756576, + "step": 79750 + }, + { + "epoch": 1.9484279187941271, + "grad_norm": 23.24256706237793, + "learning_rate": 1.5308690490537477e-06, + "loss": 0.1998, + "num_input_tokens_seen": 53759776, + "step": 79755 + }, + { + "epoch": 1.948550069625974, + "grad_norm": 38.22763442993164, + "learning_rate": 1.530796778292279e-06, + "loss": 0.1284, + "num_input_tokens_seen": 53763104, + "step": 79760 + }, + { + "epoch": 1.9486722204578213, + "grad_norm": 0.16112475097179413, + "learning_rate": 1.5307245036707136e-06, + "loss": 0.0915, + "num_input_tokens_seen": 53766304, + "step": 79765 + }, + { + "epoch": 1.9487943712896685, + "grad_norm": 192.25909423828125, + "learning_rate": 1.5306522251895766e-06, + "loss": 0.2308, + "num_input_tokens_seen": 53769632, + "step": 79770 + }, + { + "epoch": 1.9489165221215157, + "grad_norm": 0.2769120931625366, + "learning_rate": 1.5305799428493944e-06, + "loss": 0.0016, + "num_input_tokens_seen": 53773216, + "step": 79775 + }, + { + "epoch": 1.9490386729533629, + "grad_norm": 1.8229011297225952, + "learning_rate": 1.5305076566506918e-06, + "loss": 0.0651, + "num_input_tokens_seen": 53776544, + "step": 79780 + }, + { + "epoch": 1.9491608237852098, + "grad_norm": 0.07343819737434387, + "learning_rate": 1.530435366593995e-06, + "loss": 0.0341, + "num_input_tokens_seen": 53780192, + "step": 79785 + }, + { + "epoch": 1.949282974617057, + "grad_norm": 6.312066555023193, + "learning_rate": 1.5303630726798294e-06, + "loss": 0.0921, + "num_input_tokens_seen": 53783520, + "step": 79790 + }, + { + "epoch": 1.9494051254489042, + "grad_norm": 23.09063720703125, + "learning_rate": 1.5302907749087209e-06, + "loss": 0.0783, + "num_input_tokens_seen": 53787616, + "step": 79795 + }, + { + "epoch": 1.9495272762807514, + "grad_norm": 0.6398938894271851, + "learning_rate": 1.5302184732811952e-06, + "loss": 0.0736, + "num_input_tokens_seen": 53791072, + "step": 79800 + }, + { + "epoch": 1.9496494271125986, + "grad_norm": 0.10749911516904831, + "learning_rate": 1.5301461677977782e-06, + "loss": 0.0424, + "num_input_tokens_seen": 53794528, + "step": 79805 + }, + { + "epoch": 1.9497715779444458, + "grad_norm": 0.19549265503883362, + "learning_rate": 1.530073858458996e-06, + "loss": 0.0995, + "num_input_tokens_seen": 53798048, + "step": 79810 + }, + { + "epoch": 1.949893728776293, + "grad_norm": 0.5635766983032227, + "learning_rate": 1.5300015452653737e-06, + "loss": 0.045, + "num_input_tokens_seen": 53801568, + "step": 79815 + }, + { + "epoch": 1.9500158796081402, + "grad_norm": 0.032533228397369385, + "learning_rate": 1.529929228217438e-06, + "loss": 0.0998, + "num_input_tokens_seen": 53804768, + "step": 79820 + }, + { + "epoch": 1.9501380304399873, + "grad_norm": 0.07323387265205383, + "learning_rate": 1.5298569073157138e-06, + "loss": 0.1129, + "num_input_tokens_seen": 53807968, + "step": 79825 + }, + { + "epoch": 1.9502601812718345, + "grad_norm": 140.5348358154297, + "learning_rate": 1.529784582560728e-06, + "loss": 0.0385, + "num_input_tokens_seen": 53811680, + "step": 79830 + }, + { + "epoch": 1.9503823321036817, + "grad_norm": 0.1245734691619873, + "learning_rate": 1.5297122539530061e-06, + "loss": 0.1016, + "num_input_tokens_seen": 53815520, + "step": 79835 + }, + { + "epoch": 1.950504482935529, + "grad_norm": 15.518820762634277, + "learning_rate": 1.5296399214930746e-06, + "loss": 0.1642, + "num_input_tokens_seen": 53818784, + "step": 79840 + }, + { + "epoch": 1.950626633767376, + "grad_norm": 0.4381111264228821, + "learning_rate": 1.529567585181459e-06, + "loss": 0.1013, + "num_input_tokens_seen": 53822048, + "step": 79845 + }, + { + "epoch": 1.950748784599223, + "grad_norm": 27.99272346496582, + "learning_rate": 1.529495245018685e-06, + "loss": 0.0947, + "num_input_tokens_seen": 53825056, + "step": 79850 + }, + { + "epoch": 1.9508709354310703, + "grad_norm": 0.1914575845003128, + "learning_rate": 1.5294229010052799e-06, + "loss": 0.0969, + "num_input_tokens_seen": 53828448, + "step": 79855 + }, + { + "epoch": 1.9509930862629175, + "grad_norm": 23.358551025390625, + "learning_rate": 1.5293505531417686e-06, + "loss": 0.0795, + "num_input_tokens_seen": 53831648, + "step": 79860 + }, + { + "epoch": 1.9511152370947646, + "grad_norm": 20.271875381469727, + "learning_rate": 1.5292782014286778e-06, + "loss": 0.1232, + "num_input_tokens_seen": 53834720, + "step": 79865 + }, + { + "epoch": 1.9512373879266118, + "grad_norm": 0.1967715322971344, + "learning_rate": 1.5292058458665336e-06, + "loss": 0.0317, + "num_input_tokens_seen": 53837664, + "step": 79870 + }, + { + "epoch": 1.9513595387584588, + "grad_norm": 0.1904851347208023, + "learning_rate": 1.5291334864558621e-06, + "loss": 0.1238, + "num_input_tokens_seen": 53840928, + "step": 79875 + }, + { + "epoch": 1.951481689590306, + "grad_norm": 2.9282398223876953, + "learning_rate": 1.5290611231971895e-06, + "loss": 0.1142, + "num_input_tokens_seen": 53844576, + "step": 79880 + }, + { + "epoch": 1.9516038404221532, + "grad_norm": 11.948089599609375, + "learning_rate": 1.5289887560910422e-06, + "loss": 0.0476, + "num_input_tokens_seen": 53848032, + "step": 79885 + }, + { + "epoch": 1.9517259912540004, + "grad_norm": 175.88540649414062, + "learning_rate": 1.528916385137946e-06, + "loss": 0.1525, + "num_input_tokens_seen": 53851296, + "step": 79890 + }, + { + "epoch": 1.9518481420858476, + "grad_norm": 126.39527130126953, + "learning_rate": 1.528844010338428e-06, + "loss": 0.1233, + "num_input_tokens_seen": 53854496, + "step": 79895 + }, + { + "epoch": 1.9519702929176947, + "grad_norm": 0.3504171669483185, + "learning_rate": 1.5287716316930146e-06, + "loss": 0.1436, + "num_input_tokens_seen": 53858080, + "step": 79900 + }, + { + "epoch": 1.952092443749542, + "grad_norm": 18.548446655273438, + "learning_rate": 1.528699249202231e-06, + "loss": 0.1101, + "num_input_tokens_seen": 53861472, + "step": 79905 + }, + { + "epoch": 1.9522145945813891, + "grad_norm": 0.4505668878555298, + "learning_rate": 1.5286268628666044e-06, + "loss": 0.1607, + "num_input_tokens_seen": 53864416, + "step": 79910 + }, + { + "epoch": 1.9523367454132363, + "grad_norm": 26.039945602416992, + "learning_rate": 1.5285544726866611e-06, + "loss": 0.0703, + "num_input_tokens_seen": 53867808, + "step": 79915 + }, + { + "epoch": 1.9524588962450835, + "grad_norm": 10.314916610717773, + "learning_rate": 1.5284820786629274e-06, + "loss": 0.1685, + "num_input_tokens_seen": 53870752, + "step": 79920 + }, + { + "epoch": 1.9525810470769307, + "grad_norm": 12.654891967773438, + "learning_rate": 1.52840968079593e-06, + "loss": 0.0704, + "num_input_tokens_seen": 53873952, + "step": 79925 + }, + { + "epoch": 1.9527031979087779, + "grad_norm": 0.6298151016235352, + "learning_rate": 1.528337279086195e-06, + "loss": 0.0026, + "num_input_tokens_seen": 53877664, + "step": 79930 + }, + { + "epoch": 1.952825348740625, + "grad_norm": 0.339324414730072, + "learning_rate": 1.5282648735342495e-06, + "loss": 0.0327, + "num_input_tokens_seen": 53880800, + "step": 79935 + }, + { + "epoch": 1.952947499572472, + "grad_norm": 113.13359069824219, + "learning_rate": 1.5281924641406198e-06, + "loss": 0.0877, + "num_input_tokens_seen": 53884960, + "step": 79940 + }, + { + "epoch": 1.9530696504043192, + "grad_norm": 0.9290973544120789, + "learning_rate": 1.5281200509058322e-06, + "loss": 0.0775, + "num_input_tokens_seen": 53887968, + "step": 79945 + }, + { + "epoch": 1.9531918012361664, + "grad_norm": 0.639180600643158, + "learning_rate": 1.5280476338304139e-06, + "loss": 0.1576, + "num_input_tokens_seen": 53891168, + "step": 79950 + }, + { + "epoch": 1.9533139520680136, + "grad_norm": 7.0396294593811035, + "learning_rate": 1.527975212914891e-06, + "loss": 0.0053, + "num_input_tokens_seen": 53894560, + "step": 79955 + }, + { + "epoch": 1.9534361028998606, + "grad_norm": 55.963619232177734, + "learning_rate": 1.5279027881597904e-06, + "loss": 0.0029, + "num_input_tokens_seen": 53897824, + "step": 79960 + }, + { + "epoch": 1.9535582537317078, + "grad_norm": 0.18510715663433075, + "learning_rate": 1.5278303595656384e-06, + "loss": 0.0022, + "num_input_tokens_seen": 53901600, + "step": 79965 + }, + { + "epoch": 1.953680404563555, + "grad_norm": 0.17064093053340912, + "learning_rate": 1.5277579271329623e-06, + "loss": 0.0011, + "num_input_tokens_seen": 53904992, + "step": 79970 + }, + { + "epoch": 1.9538025553954022, + "grad_norm": 0.2390449494123459, + "learning_rate": 1.5276854908622887e-06, + "loss": 0.0596, + "num_input_tokens_seen": 53908000, + "step": 79975 + }, + { + "epoch": 1.9539247062272493, + "grad_norm": 0.059372562915086746, + "learning_rate": 1.527613050754144e-06, + "loss": 0.0008, + "num_input_tokens_seen": 53911328, + "step": 79980 + }, + { + "epoch": 1.9540468570590965, + "grad_norm": 17.186986923217773, + "learning_rate": 1.5275406068090555e-06, + "loss": 0.2629, + "num_input_tokens_seen": 53914464, + "step": 79985 + }, + { + "epoch": 1.9541690078909437, + "grad_norm": 0.08534011989831924, + "learning_rate": 1.5274681590275495e-06, + "loss": 0.0013, + "num_input_tokens_seen": 53917856, + "step": 79990 + }, + { + "epoch": 1.954291158722791, + "grad_norm": 0.15575003623962402, + "learning_rate": 1.5273957074101539e-06, + "loss": 0.0438, + "num_input_tokens_seen": 53921376, + "step": 79995 + }, + { + "epoch": 1.954413309554638, + "grad_norm": 31.548913955688477, + "learning_rate": 1.5273232519573943e-06, + "loss": 0.074, + "num_input_tokens_seen": 53924512, + "step": 80000 + }, + { + "epoch": 1.9545354603864853, + "grad_norm": 87.1722183227539, + "learning_rate": 1.5272507926697983e-06, + "loss": 0.0981, + "num_input_tokens_seen": 53927904, + "step": 80005 + }, + { + "epoch": 1.9546576112183325, + "grad_norm": 10.903924942016602, + "learning_rate": 1.527178329547893e-06, + "loss": 0.21, + "num_input_tokens_seen": 53931296, + "step": 80010 + }, + { + "epoch": 1.9547797620501797, + "grad_norm": 0.21467016637325287, + "learning_rate": 1.5271058625922044e-06, + "loss": 0.0498, + "num_input_tokens_seen": 53934688, + "step": 80015 + }, + { + "epoch": 1.9549019128820269, + "grad_norm": 21.4910888671875, + "learning_rate": 1.5270333918032607e-06, + "loss": 0.1497, + "num_input_tokens_seen": 53938016, + "step": 80020 + }, + { + "epoch": 1.955024063713874, + "grad_norm": 0.37493425607681274, + "learning_rate": 1.5269609171815884e-06, + "loss": 0.0401, + "num_input_tokens_seen": 53940960, + "step": 80025 + }, + { + "epoch": 1.955146214545721, + "grad_norm": 0.16464658081531525, + "learning_rate": 1.5268884387277143e-06, + "loss": 0.1183, + "num_input_tokens_seen": 53944608, + "step": 80030 + }, + { + "epoch": 1.9552683653775682, + "grad_norm": 0.4630794823169708, + "learning_rate": 1.5268159564421658e-06, + "loss": 0.1696, + "num_input_tokens_seen": 53947552, + "step": 80035 + }, + { + "epoch": 1.9553905162094154, + "grad_norm": 0.6731862425804138, + "learning_rate": 1.5267434703254701e-06, + "loss": 0.0395, + "num_input_tokens_seen": 53950880, + "step": 80040 + }, + { + "epoch": 1.9555126670412626, + "grad_norm": 0.07116980850696564, + "learning_rate": 1.5266709803781544e-06, + "loss": 0.065, + "num_input_tokens_seen": 53954336, + "step": 80045 + }, + { + "epoch": 1.9556348178731096, + "grad_norm": 30.472640991210938, + "learning_rate": 1.5265984866007453e-06, + "loss": 0.0143, + "num_input_tokens_seen": 53957728, + "step": 80050 + }, + { + "epoch": 1.9557569687049567, + "grad_norm": 0.06493127346038818, + "learning_rate": 1.5265259889937708e-06, + "loss": 0.0829, + "num_input_tokens_seen": 53961056, + "step": 80055 + }, + { + "epoch": 1.955879119536804, + "grad_norm": 0.16695833206176758, + "learning_rate": 1.5264534875577575e-06, + "loss": 0.1419, + "num_input_tokens_seen": 53964320, + "step": 80060 + }, + { + "epoch": 1.9560012703686511, + "grad_norm": 24.053009033203125, + "learning_rate": 1.526380982293233e-06, + "loss": 0.1244, + "num_input_tokens_seen": 53967328, + "step": 80065 + }, + { + "epoch": 1.9561234212004983, + "grad_norm": 8.561441421508789, + "learning_rate": 1.5263084732007242e-06, + "loss": 0.0039, + "num_input_tokens_seen": 53970400, + "step": 80070 + }, + { + "epoch": 1.9562455720323455, + "grad_norm": 8.988033294677734, + "learning_rate": 1.5262359602807583e-06, + "loss": 0.0678, + "num_input_tokens_seen": 53973536, + "step": 80075 + }, + { + "epoch": 1.9563677228641927, + "grad_norm": 41.8480339050293, + "learning_rate": 1.5261634435338632e-06, + "loss": 0.0714, + "num_input_tokens_seen": 53976736, + "step": 80080 + }, + { + "epoch": 1.9564898736960399, + "grad_norm": 10.159090042114258, + "learning_rate": 1.526090922960566e-06, + "loss": 0.0319, + "num_input_tokens_seen": 53980064, + "step": 80085 + }, + { + "epoch": 1.956612024527887, + "grad_norm": 0.9245445132255554, + "learning_rate": 1.5260183985613945e-06, + "loss": 0.0608, + "num_input_tokens_seen": 53983584, + "step": 80090 + }, + { + "epoch": 1.9567341753597343, + "grad_norm": 0.111895851790905, + "learning_rate": 1.5259458703368754e-06, + "loss": 0.0483, + "num_input_tokens_seen": 53986976, + "step": 80095 + }, + { + "epoch": 1.9568563261915815, + "grad_norm": 20.46241569519043, + "learning_rate": 1.5258733382875365e-06, + "loss": 0.2075, + "num_input_tokens_seen": 53990688, + "step": 80100 + }, + { + "epoch": 1.9569784770234286, + "grad_norm": 256.2488098144531, + "learning_rate": 1.5258008024139052e-06, + "loss": 0.1296, + "num_input_tokens_seen": 53993824, + "step": 80105 + }, + { + "epoch": 1.9571006278552758, + "grad_norm": 0.30050423741340637, + "learning_rate": 1.5257282627165093e-06, + "loss": 0.1, + "num_input_tokens_seen": 53998304, + "step": 80110 + }, + { + "epoch": 1.957222778687123, + "grad_norm": 2.183509349822998, + "learning_rate": 1.5256557191958756e-06, + "loss": 0.0711, + "num_input_tokens_seen": 54001952, + "step": 80115 + }, + { + "epoch": 1.95734492951897, + "grad_norm": 0.1657891422510147, + "learning_rate": 1.5255831718525324e-06, + "loss": 0.0077, + "num_input_tokens_seen": 54005216, + "step": 80120 + }, + { + "epoch": 1.9574670803508172, + "grad_norm": 0.2397499829530716, + "learning_rate": 1.5255106206870073e-06, + "loss": 0.1188, + "num_input_tokens_seen": 54010592, + "step": 80125 + }, + { + "epoch": 1.9575892311826644, + "grad_norm": 0.7385396957397461, + "learning_rate": 1.525438065699827e-06, + "loss": 0.0665, + "num_input_tokens_seen": 54013728, + "step": 80130 + }, + { + "epoch": 1.9577113820145116, + "grad_norm": 0.024819033220410347, + "learning_rate": 1.52536550689152e-06, + "loss": 0.0343, + "num_input_tokens_seen": 54016864, + "step": 80135 + }, + { + "epoch": 1.9578335328463585, + "grad_norm": 97.39778137207031, + "learning_rate": 1.525292944262614e-06, + "loss": 0.0164, + "num_input_tokens_seen": 54020384, + "step": 80140 + }, + { + "epoch": 1.9579556836782057, + "grad_norm": 0.9959608912467957, + "learning_rate": 1.525220377813636e-06, + "loss": 0.127, + "num_input_tokens_seen": 54024096, + "step": 80145 + }, + { + "epoch": 1.958077834510053, + "grad_norm": 0.8587321639060974, + "learning_rate": 1.5251478075451145e-06, + "loss": 0.1639, + "num_input_tokens_seen": 54027232, + "step": 80150 + }, + { + "epoch": 1.9581999853419, + "grad_norm": 31.005212783813477, + "learning_rate": 1.525075233457577e-06, + "loss": 0.1081, + "num_input_tokens_seen": 54030368, + "step": 80155 + }, + { + "epoch": 1.9583221361737473, + "grad_norm": 42.02476501464844, + "learning_rate": 1.5250026555515508e-06, + "loss": 0.0384, + "num_input_tokens_seen": 54033760, + "step": 80160 + }, + { + "epoch": 1.9584442870055945, + "grad_norm": 0.03223037347197533, + "learning_rate": 1.5249300738275642e-06, + "loss": 0.1221, + "num_input_tokens_seen": 54036960, + "step": 80165 + }, + { + "epoch": 1.9585664378374417, + "grad_norm": 0.610651969909668, + "learning_rate": 1.5248574882861448e-06, + "loss": 0.1412, + "num_input_tokens_seen": 54040544, + "step": 80170 + }, + { + "epoch": 1.9586885886692889, + "grad_norm": 0.4873940944671631, + "learning_rate": 1.5247848989278209e-06, + "loss": 0.0873, + "num_input_tokens_seen": 54043936, + "step": 80175 + }, + { + "epoch": 1.958810739501136, + "grad_norm": 0.03432140499353409, + "learning_rate": 1.5247123057531197e-06, + "loss": 0.1489, + "num_input_tokens_seen": 54047008, + "step": 80180 + }, + { + "epoch": 1.9589328903329832, + "grad_norm": 0.025498438626527786, + "learning_rate": 1.5246397087625698e-06, + "loss": 0.0386, + "num_input_tokens_seen": 54050464, + "step": 80185 + }, + { + "epoch": 1.9590550411648304, + "grad_norm": 48.72001266479492, + "learning_rate": 1.5245671079566987e-06, + "loss": 0.1232, + "num_input_tokens_seen": 54053856, + "step": 80190 + }, + { + "epoch": 1.9591771919966776, + "grad_norm": 1.187605857849121, + "learning_rate": 1.5244945033360343e-06, + "loss": 0.0367, + "num_input_tokens_seen": 54056992, + "step": 80195 + }, + { + "epoch": 1.9592993428285248, + "grad_norm": 0.10178008675575256, + "learning_rate": 1.524421894901105e-06, + "loss": 0.0035, + "num_input_tokens_seen": 54060256, + "step": 80200 + }, + { + "epoch": 1.959421493660372, + "grad_norm": 32.345829010009766, + "learning_rate": 1.5243492826524388e-06, + "loss": 0.0954, + "num_input_tokens_seen": 54063584, + "step": 80205 + }, + { + "epoch": 1.959543644492219, + "grad_norm": 34.569976806640625, + "learning_rate": 1.5242766665905635e-06, + "loss": 0.0685, + "num_input_tokens_seen": 54067424, + "step": 80210 + }, + { + "epoch": 1.9596657953240662, + "grad_norm": 0.30393078923225403, + "learning_rate": 1.5242040467160071e-06, + "loss": 0.1259, + "num_input_tokens_seen": 54070560, + "step": 80215 + }, + { + "epoch": 1.9597879461559133, + "grad_norm": 0.013163371942937374, + "learning_rate": 1.524131423029298e-06, + "loss": 0.0437, + "num_input_tokens_seen": 54074016, + "step": 80220 + }, + { + "epoch": 1.9599100969877605, + "grad_norm": 0.25483468174934387, + "learning_rate": 1.5240587955309642e-06, + "loss": 0.0531, + "num_input_tokens_seen": 54077344, + "step": 80225 + }, + { + "epoch": 1.9600322478196075, + "grad_norm": 33.77492141723633, + "learning_rate": 1.5239861642215336e-06, + "loss": 0.1066, + "num_input_tokens_seen": 54080864, + "step": 80230 + }, + { + "epoch": 1.9601543986514547, + "grad_norm": 23.985240936279297, + "learning_rate": 1.5239135291015349e-06, + "loss": 0.0723, + "num_input_tokens_seen": 54084704, + "step": 80235 + }, + { + "epoch": 1.9602765494833019, + "grad_norm": 0.08957021683454514, + "learning_rate": 1.523840890171496e-06, + "loss": 0.0152, + "num_input_tokens_seen": 54088224, + "step": 80240 + }, + { + "epoch": 1.960398700315149, + "grad_norm": 0.14128699898719788, + "learning_rate": 1.5237682474319455e-06, + "loss": 0.0367, + "num_input_tokens_seen": 54091360, + "step": 80245 + }, + { + "epoch": 1.9605208511469963, + "grad_norm": 0.04403325542807579, + "learning_rate": 1.5236956008834114e-06, + "loss": 0.0373, + "num_input_tokens_seen": 54094624, + "step": 80250 + }, + { + "epoch": 1.9606430019788434, + "grad_norm": 0.16336075961589813, + "learning_rate": 1.523622950526422e-06, + "loss": 0.0512, + "num_input_tokens_seen": 54097696, + "step": 80255 + }, + { + "epoch": 1.9607651528106906, + "grad_norm": 27.28483772277832, + "learning_rate": 1.5235502963615054e-06, + "loss": 0.0995, + "num_input_tokens_seen": 54101280, + "step": 80260 + }, + { + "epoch": 1.9608873036425378, + "grad_norm": 0.08058968931436539, + "learning_rate": 1.5234776383891906e-06, + "loss": 0.1105, + "num_input_tokens_seen": 54104416, + "step": 80265 + }, + { + "epoch": 1.961009454474385, + "grad_norm": 0.2745673358440399, + "learning_rate": 1.5234049766100055e-06, + "loss": 0.0724, + "num_input_tokens_seen": 54107872, + "step": 80270 + }, + { + "epoch": 1.9611316053062322, + "grad_norm": 13.895827293395996, + "learning_rate": 1.5233323110244785e-06, + "loss": 0.1278, + "num_input_tokens_seen": 54111136, + "step": 80275 + }, + { + "epoch": 1.9612537561380794, + "grad_norm": 25.77663230895996, + "learning_rate": 1.523259641633138e-06, + "loss": 0.0964, + "num_input_tokens_seen": 54114656, + "step": 80280 + }, + { + "epoch": 1.9613759069699266, + "grad_norm": 0.32250508666038513, + "learning_rate": 1.523186968436513e-06, + "loss": 0.051, + "num_input_tokens_seen": 54117856, + "step": 80285 + }, + { + "epoch": 1.9614980578017738, + "grad_norm": 0.6567564606666565, + "learning_rate": 1.5231142914351316e-06, + "loss": 0.0274, + "num_input_tokens_seen": 54121056, + "step": 80290 + }, + { + "epoch": 1.9616202086336207, + "grad_norm": 0.4896738827228546, + "learning_rate": 1.5230416106295221e-06, + "loss": 0.0569, + "num_input_tokens_seen": 54124640, + "step": 80295 + }, + { + "epoch": 1.961742359465468, + "grad_norm": 131.39283752441406, + "learning_rate": 1.5229689260202134e-06, + "loss": 0.0749, + "num_input_tokens_seen": 54127584, + "step": 80300 + }, + { + "epoch": 1.9618645102973151, + "grad_norm": 0.7090680003166199, + "learning_rate": 1.5228962376077344e-06, + "loss": 0.0353, + "num_input_tokens_seen": 54130528, + "step": 80305 + }, + { + "epoch": 1.9619866611291623, + "grad_norm": 11.709858894348145, + "learning_rate": 1.5228235453926131e-06, + "loss": 0.1514, + "num_input_tokens_seen": 54133600, + "step": 80310 + }, + { + "epoch": 1.9621088119610095, + "grad_norm": 0.08688928186893463, + "learning_rate": 1.5227508493753783e-06, + "loss": 0.0796, + "num_input_tokens_seen": 54136608, + "step": 80315 + }, + { + "epoch": 1.9622309627928565, + "grad_norm": 0.44097834825515747, + "learning_rate": 1.5226781495565588e-06, + "loss": 0.1009, + "num_input_tokens_seen": 54139936, + "step": 80320 + }, + { + "epoch": 1.9623531136247037, + "grad_norm": 3.780630111694336, + "learning_rate": 1.5226054459366831e-06, + "loss": 0.0519, + "num_input_tokens_seen": 54143136, + "step": 80325 + }, + { + "epoch": 1.9624752644565508, + "grad_norm": 1.3856637477874756, + "learning_rate": 1.5225327385162801e-06, + "loss": 0.0286, + "num_input_tokens_seen": 54146528, + "step": 80330 + }, + { + "epoch": 1.962597415288398, + "grad_norm": 0.13977724313735962, + "learning_rate": 1.5224600272958785e-06, + "loss": 0.2561, + "num_input_tokens_seen": 54149664, + "step": 80335 + }, + { + "epoch": 1.9627195661202452, + "grad_norm": 0.2395777404308319, + "learning_rate": 1.522387312276007e-06, + "loss": 0.0868, + "num_input_tokens_seen": 54153184, + "step": 80340 + }, + { + "epoch": 1.9628417169520924, + "grad_norm": 0.7118207812309265, + "learning_rate": 1.5223145934571944e-06, + "loss": 0.1178, + "num_input_tokens_seen": 54156256, + "step": 80345 + }, + { + "epoch": 1.9629638677839396, + "grad_norm": 195.8506317138672, + "learning_rate": 1.5222418708399696e-06, + "loss": 0.1157, + "num_input_tokens_seen": 54159776, + "step": 80350 + }, + { + "epoch": 1.9630860186157868, + "grad_norm": 197.17930603027344, + "learning_rate": 1.5221691444248615e-06, + "loss": 0.1211, + "num_input_tokens_seen": 54162912, + "step": 80355 + }, + { + "epoch": 1.963208169447634, + "grad_norm": 82.97769165039062, + "learning_rate": 1.522096414212399e-06, + "loss": 0.1338, + "num_input_tokens_seen": 54166112, + "step": 80360 + }, + { + "epoch": 1.9633303202794812, + "grad_norm": 0.5258122086524963, + "learning_rate": 1.522023680203111e-06, + "loss": 0.1737, + "num_input_tokens_seen": 54169952, + "step": 80365 + }, + { + "epoch": 1.9634524711113284, + "grad_norm": 0.33753493428230286, + "learning_rate": 1.5219509423975262e-06, + "loss": 0.0497, + "num_input_tokens_seen": 54173216, + "step": 80370 + }, + { + "epoch": 1.9635746219431756, + "grad_norm": 0.5717714428901672, + "learning_rate": 1.5218782007961738e-06, + "loss": 0.0026, + "num_input_tokens_seen": 54176416, + "step": 80375 + }, + { + "epoch": 1.9636967727750227, + "grad_norm": 38.53734588623047, + "learning_rate": 1.5218054553995829e-06, + "loss": 0.0518, + "num_input_tokens_seen": 54179808, + "step": 80380 + }, + { + "epoch": 1.9638189236068697, + "grad_norm": 16.9853515625, + "learning_rate": 1.521732706208282e-06, + "loss": 0.1372, + "num_input_tokens_seen": 54183520, + "step": 80385 + }, + { + "epoch": 1.963941074438717, + "grad_norm": 15.189033508300781, + "learning_rate": 1.521659953222801e-06, + "loss": 0.12, + "num_input_tokens_seen": 54187232, + "step": 80390 + }, + { + "epoch": 1.964063225270564, + "grad_norm": 0.04450573772192001, + "learning_rate": 1.5215871964436683e-06, + "loss": 0.021, + "num_input_tokens_seen": 54190624, + "step": 80395 + }, + { + "epoch": 1.9641853761024113, + "grad_norm": 0.09292330592870712, + "learning_rate": 1.5215144358714134e-06, + "loss": 0.0013, + "num_input_tokens_seen": 54194720, + "step": 80400 + }, + { + "epoch": 1.9643075269342585, + "grad_norm": 0.19563770294189453, + "learning_rate": 1.521441671506565e-06, + "loss": 0.0366, + "num_input_tokens_seen": 54198240, + "step": 80405 + }, + { + "epoch": 1.9644296777661054, + "grad_norm": 38.16703796386719, + "learning_rate": 1.5213689033496526e-06, + "loss": 0.1229, + "num_input_tokens_seen": 54201376, + "step": 80410 + }, + { + "epoch": 1.9645518285979526, + "grad_norm": 1.607812523841858, + "learning_rate": 1.5212961314012054e-06, + "loss": 0.0042, + "num_input_tokens_seen": 54205024, + "step": 80415 + }, + { + "epoch": 1.9646739794297998, + "grad_norm": 0.6975657343864441, + "learning_rate": 1.5212233556617524e-06, + "loss": 0.0394, + "num_input_tokens_seen": 54208224, + "step": 80420 + }, + { + "epoch": 1.964796130261647, + "grad_norm": 33.357261657714844, + "learning_rate": 1.5211505761318231e-06, + "loss": 0.0308, + "num_input_tokens_seen": 54211424, + "step": 80425 + }, + { + "epoch": 1.9649182810934942, + "grad_norm": 19.344261169433594, + "learning_rate": 1.5210777928119466e-06, + "loss": 0.0389, + "num_input_tokens_seen": 54214496, + "step": 80430 + }, + { + "epoch": 1.9650404319253414, + "grad_norm": 22.692310333251953, + "learning_rate": 1.5210050057026521e-06, + "loss": 0.0804, + "num_input_tokens_seen": 54218336, + "step": 80435 + }, + { + "epoch": 1.9651625827571886, + "grad_norm": 0.6270188689231873, + "learning_rate": 1.520932214804469e-06, + "loss": 0.1344, + "num_input_tokens_seen": 54221984, + "step": 80440 + }, + { + "epoch": 1.9652847335890358, + "grad_norm": 16.021167755126953, + "learning_rate": 1.520859420117927e-06, + "loss": 0.1511, + "num_input_tokens_seen": 54225248, + "step": 80445 + }, + { + "epoch": 1.965406884420883, + "grad_norm": 1.0109308958053589, + "learning_rate": 1.520786621643555e-06, + "loss": 0.2087, + "num_input_tokens_seen": 54228512, + "step": 80450 + }, + { + "epoch": 1.9655290352527302, + "grad_norm": 1.2305643558502197, + "learning_rate": 1.5207138193818824e-06, + "loss": 0.1028, + "num_input_tokens_seen": 54231776, + "step": 80455 + }, + { + "epoch": 1.9656511860845773, + "grad_norm": 0.3660091459751129, + "learning_rate": 1.5206410133334393e-06, + "loss": 0.0016, + "num_input_tokens_seen": 54235360, + "step": 80460 + }, + { + "epoch": 1.9657733369164245, + "grad_norm": 319.0777282714844, + "learning_rate": 1.5205682034987547e-06, + "loss": 0.0289, + "num_input_tokens_seen": 54238496, + "step": 80465 + }, + { + "epoch": 1.9658954877482717, + "grad_norm": 0.11898450553417206, + "learning_rate": 1.520495389878358e-06, + "loss": 0.0339, + "num_input_tokens_seen": 54241568, + "step": 80470 + }, + { + "epoch": 1.9660176385801187, + "grad_norm": 10.782960891723633, + "learning_rate": 1.5204225724727789e-06, + "loss": 0.1891, + "num_input_tokens_seen": 54244960, + "step": 80475 + }, + { + "epoch": 1.9661397894119659, + "grad_norm": 26.708084106445312, + "learning_rate": 1.5203497512825465e-06, + "loss": 0.0773, + "num_input_tokens_seen": 54248480, + "step": 80480 + }, + { + "epoch": 1.966261940243813, + "grad_norm": 2.7352306842803955, + "learning_rate": 1.5202769263081908e-06, + "loss": 0.0412, + "num_input_tokens_seen": 54251680, + "step": 80485 + }, + { + "epoch": 1.9663840910756603, + "grad_norm": 0.025932036340236664, + "learning_rate": 1.5202040975502417e-06, + "loss": 0.221, + "num_input_tokens_seen": 54254816, + "step": 80490 + }, + { + "epoch": 1.9665062419075072, + "grad_norm": 11.335835456848145, + "learning_rate": 1.5201312650092283e-06, + "loss": 0.1006, + "num_input_tokens_seen": 54257952, + "step": 80495 + }, + { + "epoch": 1.9666283927393544, + "grad_norm": 28.126676559448242, + "learning_rate": 1.5200584286856808e-06, + "loss": 0.1159, + "num_input_tokens_seen": 54261024, + "step": 80500 + }, + { + "epoch": 1.9667505435712016, + "grad_norm": 1.0174667835235596, + "learning_rate": 1.519985588580128e-06, + "loss": 0.0986, + "num_input_tokens_seen": 54264416, + "step": 80505 + }, + { + "epoch": 1.9668726944030488, + "grad_norm": 9.535443305969238, + "learning_rate": 1.5199127446931e-06, + "loss": 0.0933, + "num_input_tokens_seen": 54267680, + "step": 80510 + }, + { + "epoch": 1.966994845234896, + "grad_norm": 0.37511146068573, + "learning_rate": 1.5198398970251273e-06, + "loss": 0.0496, + "num_input_tokens_seen": 54271392, + "step": 80515 + }, + { + "epoch": 1.9671169960667432, + "grad_norm": 51.843143463134766, + "learning_rate": 1.519767045576739e-06, + "loss": 0.0501, + "num_input_tokens_seen": 54274784, + "step": 80520 + }, + { + "epoch": 1.9672391468985904, + "grad_norm": 19.047075271606445, + "learning_rate": 1.5196941903484648e-06, + "loss": 0.1235, + "num_input_tokens_seen": 54278176, + "step": 80525 + }, + { + "epoch": 1.9673612977304376, + "grad_norm": 0.09213992953300476, + "learning_rate": 1.5196213313408346e-06, + "loss": 0.0017, + "num_input_tokens_seen": 54281568, + "step": 80530 + }, + { + "epoch": 1.9674834485622847, + "grad_norm": 0.04056869074702263, + "learning_rate": 1.5195484685543783e-06, + "loss": 0.0486, + "num_input_tokens_seen": 54284768, + "step": 80535 + }, + { + "epoch": 1.967605599394132, + "grad_norm": 78.23375701904297, + "learning_rate": 1.5194756019896256e-06, + "loss": 0.054, + "num_input_tokens_seen": 54288288, + "step": 80540 + }, + { + "epoch": 1.9677277502259791, + "grad_norm": 17.906198501586914, + "learning_rate": 1.5194027316471068e-06, + "loss": 0.0774, + "num_input_tokens_seen": 54291744, + "step": 80545 + }, + { + "epoch": 1.9678499010578263, + "grad_norm": 5.588540554046631, + "learning_rate": 1.5193298575273517e-06, + "loss": 0.0913, + "num_input_tokens_seen": 54294496, + "step": 80550 + }, + { + "epoch": 1.9679720518896735, + "grad_norm": 0.35017406940460205, + "learning_rate": 1.51925697963089e-06, + "loss": 0.0978, + "num_input_tokens_seen": 54298144, + "step": 80555 + }, + { + "epoch": 1.9680942027215207, + "grad_norm": 0.2523149251937866, + "learning_rate": 1.5191840979582522e-06, + "loss": 0.0596, + "num_input_tokens_seen": 54301024, + "step": 80560 + }, + { + "epoch": 1.9682163535533677, + "grad_norm": 3.521493673324585, + "learning_rate": 1.5191112125099678e-06, + "loss": 0.0024, + "num_input_tokens_seen": 54304288, + "step": 80565 + }, + { + "epoch": 1.9683385043852148, + "grad_norm": 0.1956097036600113, + "learning_rate": 1.519038323286567e-06, + "loss": 0.0716, + "num_input_tokens_seen": 54307488, + "step": 80570 + }, + { + "epoch": 1.968460655217062, + "grad_norm": 0.2618773579597473, + "learning_rate": 1.5189654302885798e-06, + "loss": 0.0178, + "num_input_tokens_seen": 54310752, + "step": 80575 + }, + { + "epoch": 1.9685828060489092, + "grad_norm": 0.04372239485383034, + "learning_rate": 1.5188925335165369e-06, + "loss": 0.0965, + "num_input_tokens_seen": 54314080, + "step": 80580 + }, + { + "epoch": 1.9687049568807562, + "grad_norm": 0.05728540197014809, + "learning_rate": 1.5188196329709675e-06, + "loss": 0.0889, + "num_input_tokens_seen": 54317344, + "step": 80585 + }, + { + "epoch": 1.9688271077126034, + "grad_norm": 14.37979507446289, + "learning_rate": 1.5187467286524022e-06, + "loss": 0.0476, + "num_input_tokens_seen": 54321696, + "step": 80590 + }, + { + "epoch": 1.9689492585444506, + "grad_norm": 0.176511749625206, + "learning_rate": 1.5186738205613714e-06, + "loss": 0.0349, + "num_input_tokens_seen": 54324896, + "step": 80595 + }, + { + "epoch": 1.9690714093762978, + "grad_norm": 0.5647554397583008, + "learning_rate": 1.5186009086984048e-06, + "loss": 0.04, + "num_input_tokens_seen": 54327904, + "step": 80600 + }, + { + "epoch": 1.969193560208145, + "grad_norm": 0.11472149938344955, + "learning_rate": 1.5185279930640329e-06, + "loss": 0.1077, + "num_input_tokens_seen": 54331616, + "step": 80605 + }, + { + "epoch": 1.9693157110399921, + "grad_norm": 0.41294869780540466, + "learning_rate": 1.518455073658786e-06, + "loss": 0.0114, + "num_input_tokens_seen": 54334624, + "step": 80610 + }, + { + "epoch": 1.9694378618718393, + "grad_norm": 0.6599180102348328, + "learning_rate": 1.5183821504831946e-06, + "loss": 0.0017, + "num_input_tokens_seen": 54338080, + "step": 80615 + }, + { + "epoch": 1.9695600127036865, + "grad_norm": 91.60684967041016, + "learning_rate": 1.5183092235377887e-06, + "loss": 0.1975, + "num_input_tokens_seen": 54341152, + "step": 80620 + }, + { + "epoch": 1.9696821635355337, + "grad_norm": 13.102996826171875, + "learning_rate": 1.5182362928230984e-06, + "loss": 0.034, + "num_input_tokens_seen": 54344608, + "step": 80625 + }, + { + "epoch": 1.969804314367381, + "grad_norm": 0.4793389141559601, + "learning_rate": 1.518163358339655e-06, + "loss": 0.0687, + "num_input_tokens_seen": 54347936, + "step": 80630 + }, + { + "epoch": 1.969926465199228, + "grad_norm": 0.47380492091178894, + "learning_rate": 1.5180904200879876e-06, + "loss": 0.0011, + "num_input_tokens_seen": 54351264, + "step": 80635 + }, + { + "epoch": 1.9700486160310753, + "grad_norm": 19.74294090270996, + "learning_rate": 1.5180174780686277e-06, + "loss": 0.1065, + "num_input_tokens_seen": 54354592, + "step": 80640 + }, + { + "epoch": 1.9701707668629225, + "grad_norm": 8.016862869262695, + "learning_rate": 1.5179445322821055e-06, + "loss": 0.0454, + "num_input_tokens_seen": 54357856, + "step": 80645 + }, + { + "epoch": 1.9702929176947697, + "grad_norm": 0.6117857098579407, + "learning_rate": 1.5178715827289508e-06, + "loss": 0.0426, + "num_input_tokens_seen": 54360928, + "step": 80650 + }, + { + "epoch": 1.9704150685266166, + "grad_norm": 0.5771762728691101, + "learning_rate": 1.5177986294096951e-06, + "loss": 0.154, + "num_input_tokens_seen": 54364128, + "step": 80655 + }, + { + "epoch": 1.9705372193584638, + "grad_norm": 0.1816076785326004, + "learning_rate": 1.5177256723248685e-06, + "loss": 0.0668, + "num_input_tokens_seen": 54367200, + "step": 80660 + }, + { + "epoch": 1.970659370190311, + "grad_norm": 0.4547644853591919, + "learning_rate": 1.5176527114750012e-06, + "loss": 0.0838, + "num_input_tokens_seen": 54370784, + "step": 80665 + }, + { + "epoch": 1.9707815210221582, + "grad_norm": 0.1515176147222519, + "learning_rate": 1.5175797468606243e-06, + "loss": 0.1594, + "num_input_tokens_seen": 54373920, + "step": 80670 + }, + { + "epoch": 1.9709036718540052, + "grad_norm": 22.52135467529297, + "learning_rate": 1.517506778482269e-06, + "loss": 0.1475, + "num_input_tokens_seen": 54377376, + "step": 80675 + }, + { + "epoch": 1.9710258226858524, + "grad_norm": 14.372892379760742, + "learning_rate": 1.5174338063404644e-06, + "loss": 0.11, + "num_input_tokens_seen": 54380768, + "step": 80680 + }, + { + "epoch": 1.9711479735176995, + "grad_norm": 1.638512134552002, + "learning_rate": 1.5173608304357422e-06, + "loss": 0.0018, + "num_input_tokens_seen": 54384160, + "step": 80685 + }, + { + "epoch": 1.9712701243495467, + "grad_norm": 0.21261566877365112, + "learning_rate": 1.5172878507686329e-06, + "loss": 0.0318, + "num_input_tokens_seen": 54387744, + "step": 80690 + }, + { + "epoch": 1.971392275181394, + "grad_norm": 0.11088140308856964, + "learning_rate": 1.5172148673396673e-06, + "loss": 0.0724, + "num_input_tokens_seen": 54390880, + "step": 80695 + }, + { + "epoch": 1.9715144260132411, + "grad_norm": 0.2497844099998474, + "learning_rate": 1.5171418801493757e-06, + "loss": 0.1306, + "num_input_tokens_seen": 54394272, + "step": 80700 + }, + { + "epoch": 1.9716365768450883, + "grad_norm": 0.1071532815694809, + "learning_rate": 1.5170688891982895e-06, + "loss": 0.1369, + "num_input_tokens_seen": 54397408, + "step": 80705 + }, + { + "epoch": 1.9717587276769355, + "grad_norm": 0.11594808101654053, + "learning_rate": 1.5169958944869393e-06, + "loss": 0.0628, + "num_input_tokens_seen": 54400800, + "step": 80710 + }, + { + "epoch": 1.9718808785087827, + "grad_norm": 0.5123701691627502, + "learning_rate": 1.5169228960158557e-06, + "loss": 0.0422, + "num_input_tokens_seen": 54403872, + "step": 80715 + }, + { + "epoch": 1.9720030293406299, + "grad_norm": 0.08987320214509964, + "learning_rate": 1.51684989378557e-06, + "loss": 0.0017, + "num_input_tokens_seen": 54407584, + "step": 80720 + }, + { + "epoch": 1.972125180172477, + "grad_norm": 0.7225486040115356, + "learning_rate": 1.5167768877966128e-06, + "loss": 0.0369, + "num_input_tokens_seen": 54410848, + "step": 80725 + }, + { + "epoch": 1.9722473310043243, + "grad_norm": 0.14365102350711823, + "learning_rate": 1.5167038780495151e-06, + "loss": 0.0298, + "num_input_tokens_seen": 54413984, + "step": 80730 + }, + { + "epoch": 1.9723694818361714, + "grad_norm": 27.856651306152344, + "learning_rate": 1.5166308645448077e-06, + "loss": 0.0452, + "num_input_tokens_seen": 54417504, + "step": 80735 + }, + { + "epoch": 1.9724916326680186, + "grad_norm": 0.09453947842121124, + "learning_rate": 1.516557847283022e-06, + "loss": 0.0985, + "num_input_tokens_seen": 54421024, + "step": 80740 + }, + { + "epoch": 1.9726137834998656, + "grad_norm": 10.844173431396484, + "learning_rate": 1.5164848262646883e-06, + "loss": 0.0931, + "num_input_tokens_seen": 54424480, + "step": 80745 + }, + { + "epoch": 1.9727359343317128, + "grad_norm": 0.4086884558200836, + "learning_rate": 1.5164118014903382e-06, + "loss": 0.0469, + "num_input_tokens_seen": 54427296, + "step": 80750 + }, + { + "epoch": 1.97285808516356, + "grad_norm": 0.2042410671710968, + "learning_rate": 1.5163387729605028e-06, + "loss": 0.0497, + "num_input_tokens_seen": 54430624, + "step": 80755 + }, + { + "epoch": 1.9729802359954072, + "grad_norm": 0.1424875557422638, + "learning_rate": 1.5162657406757125e-06, + "loss": 0.0958, + "num_input_tokens_seen": 54434080, + "step": 80760 + }, + { + "epoch": 1.9731023868272541, + "grad_norm": 0.2018500119447708, + "learning_rate": 1.516192704636499e-06, + "loss": 0.1314, + "num_input_tokens_seen": 54437024, + "step": 80765 + }, + { + "epoch": 1.9732245376591013, + "grad_norm": 0.5857983231544495, + "learning_rate": 1.5161196648433936e-06, + "loss": 0.102, + "num_input_tokens_seen": 54439840, + "step": 80770 + }, + { + "epoch": 1.9733466884909485, + "grad_norm": 67.6739501953125, + "learning_rate": 1.516046621296927e-06, + "loss": 0.1239, + "num_input_tokens_seen": 54442912, + "step": 80775 + }, + { + "epoch": 1.9734688393227957, + "grad_norm": 26.2843074798584, + "learning_rate": 1.5159735739976307e-06, + "loss": 0.0033, + "num_input_tokens_seen": 54445920, + "step": 80780 + }, + { + "epoch": 1.973590990154643, + "grad_norm": 0.5349447131156921, + "learning_rate": 1.515900522946036e-06, + "loss": 0.0027, + "num_input_tokens_seen": 54449184, + "step": 80785 + }, + { + "epoch": 1.97371314098649, + "grad_norm": 0.2931225597858429, + "learning_rate": 1.5158274681426732e-06, + "loss": 0.078, + "num_input_tokens_seen": 54452704, + "step": 80790 + }, + { + "epoch": 1.9738352918183373, + "grad_norm": 0.07107269763946533, + "learning_rate": 1.5157544095880747e-06, + "loss": 0.0011, + "num_input_tokens_seen": 54455904, + "step": 80795 + }, + { + "epoch": 1.9739574426501845, + "grad_norm": 0.1544140875339508, + "learning_rate": 1.5156813472827717e-06, + "loss": 0.0354, + "num_input_tokens_seen": 54459360, + "step": 80800 + }, + { + "epoch": 1.9740795934820317, + "grad_norm": 0.10960984230041504, + "learning_rate": 1.515608281227295e-06, + "loss": 0.1044, + "num_input_tokens_seen": 54462432, + "step": 80805 + }, + { + "epoch": 1.9742017443138788, + "grad_norm": 202.6284942626953, + "learning_rate": 1.515535211422176e-06, + "loss": 0.0775, + "num_input_tokens_seen": 54465568, + "step": 80810 + }, + { + "epoch": 1.974323895145726, + "grad_norm": 18.3772029876709, + "learning_rate": 1.5154621378679467e-06, + "loss": 0.0375, + "num_input_tokens_seen": 54469216, + "step": 80815 + }, + { + "epoch": 1.9744460459775732, + "grad_norm": 0.4663357734680176, + "learning_rate": 1.5153890605651377e-06, + "loss": 0.0969, + "num_input_tokens_seen": 54472416, + "step": 80820 + }, + { + "epoch": 1.9745681968094204, + "grad_norm": 0.13823716342449188, + "learning_rate": 1.5153159795142809e-06, + "loss": 0.0172, + "num_input_tokens_seen": 54476128, + "step": 80825 + }, + { + "epoch": 1.9746903476412674, + "grad_norm": 0.3354507386684418, + "learning_rate": 1.5152428947159077e-06, + "loss": 0.1739, + "num_input_tokens_seen": 54479328, + "step": 80830 + }, + { + "epoch": 1.9748124984731146, + "grad_norm": 0.10311849415302277, + "learning_rate": 1.5151698061705497e-06, + "loss": 0.0405, + "num_input_tokens_seen": 54482912, + "step": 80835 + }, + { + "epoch": 1.9749346493049618, + "grad_norm": 0.3807520270347595, + "learning_rate": 1.5150967138787384e-06, + "loss": 0.045, + "num_input_tokens_seen": 54486304, + "step": 80840 + }, + { + "epoch": 1.975056800136809, + "grad_norm": 21.44227409362793, + "learning_rate": 1.5150236178410052e-06, + "loss": 0.0367, + "num_input_tokens_seen": 54489760, + "step": 80845 + }, + { + "epoch": 1.9751789509686561, + "grad_norm": 55.89031219482422, + "learning_rate": 1.5149505180578818e-06, + "loss": 0.1388, + "num_input_tokens_seen": 54493024, + "step": 80850 + }, + { + "epoch": 1.9753011018005031, + "grad_norm": 34.56232452392578, + "learning_rate": 1.514877414529899e-06, + "loss": 0.2005, + "num_input_tokens_seen": 54496096, + "step": 80855 + }, + { + "epoch": 1.9754232526323503, + "grad_norm": 21.254436492919922, + "learning_rate": 1.5148043072575899e-06, + "loss": 0.0668, + "num_input_tokens_seen": 54499616, + "step": 80860 + }, + { + "epoch": 1.9755454034641975, + "grad_norm": 0.16856719553470612, + "learning_rate": 1.5147311962414852e-06, + "loss": 0.0071, + "num_input_tokens_seen": 54503264, + "step": 80865 + }, + { + "epoch": 1.9756675542960447, + "grad_norm": 0.2091529816389084, + "learning_rate": 1.514658081482117e-06, + "loss": 0.0546, + "num_input_tokens_seen": 54507232, + "step": 80870 + }, + { + "epoch": 1.9757897051278919, + "grad_norm": 0.3104109764099121, + "learning_rate": 1.5145849629800166e-06, + "loss": 0.0021, + "num_input_tokens_seen": 54510240, + "step": 80875 + }, + { + "epoch": 1.975911855959739, + "grad_norm": 0.293990820646286, + "learning_rate": 1.514511840735716e-06, + "loss": 0.0716, + "num_input_tokens_seen": 54513760, + "step": 80880 + }, + { + "epoch": 1.9760340067915863, + "grad_norm": 0.12436817586421967, + "learning_rate": 1.5144387147497469e-06, + "loss": 0.0341, + "num_input_tokens_seen": 54517280, + "step": 80885 + }, + { + "epoch": 1.9761561576234334, + "grad_norm": 0.32219186425209045, + "learning_rate": 1.514365585022641e-06, + "loss": 0.1111, + "num_input_tokens_seen": 54520736, + "step": 80890 + }, + { + "epoch": 1.9762783084552806, + "grad_norm": 0.2317805290222168, + "learning_rate": 1.5142924515549306e-06, + "loss": 0.1001, + "num_input_tokens_seen": 54524128, + "step": 80895 + }, + { + "epoch": 1.9764004592871278, + "grad_norm": 11.194290161132812, + "learning_rate": 1.5142193143471467e-06, + "loss": 0.1185, + "num_input_tokens_seen": 54527584, + "step": 80900 + }, + { + "epoch": 1.976522610118975, + "grad_norm": 26.01723289489746, + "learning_rate": 1.5141461733998217e-06, + "loss": 0.1664, + "num_input_tokens_seen": 54530464, + "step": 80905 + }, + { + "epoch": 1.9766447609508222, + "grad_norm": 0.5856109857559204, + "learning_rate": 1.5140730287134876e-06, + "loss": 0.0421, + "num_input_tokens_seen": 54533856, + "step": 80910 + }, + { + "epoch": 1.9767669117826694, + "grad_norm": 0.4464196562767029, + "learning_rate": 1.513999880288676e-06, + "loss": 0.1165, + "num_input_tokens_seen": 54537376, + "step": 80915 + }, + { + "epoch": 1.9768890626145164, + "grad_norm": 0.8154142498970032, + "learning_rate": 1.513926728125919e-06, + "loss": 0.102, + "num_input_tokens_seen": 54541088, + "step": 80920 + }, + { + "epoch": 1.9770112134463635, + "grad_norm": 39.605228424072266, + "learning_rate": 1.5138535722257488e-06, + "loss": 0.1535, + "num_input_tokens_seen": 54544544, + "step": 80925 + }, + { + "epoch": 1.9771333642782107, + "grad_norm": 43.534088134765625, + "learning_rate": 1.5137804125886973e-06, + "loss": 0.1311, + "num_input_tokens_seen": 54547872, + "step": 80930 + }, + { + "epoch": 1.977255515110058, + "grad_norm": 0.26322290301322937, + "learning_rate": 1.5137072492152962e-06, + "loss": 0.2092, + "num_input_tokens_seen": 54551200, + "step": 80935 + }, + { + "epoch": 1.9773776659419051, + "grad_norm": 10.208868980407715, + "learning_rate": 1.513634082106078e-06, + "loss": 0.2748, + "num_input_tokens_seen": 54554528, + "step": 80940 + }, + { + "epoch": 1.977499816773752, + "grad_norm": 0.43643590807914734, + "learning_rate": 1.5135609112615746e-06, + "loss": 0.0636, + "num_input_tokens_seen": 54557856, + "step": 80945 + }, + { + "epoch": 1.9776219676055993, + "grad_norm": 20.197067260742188, + "learning_rate": 1.5134877366823178e-06, + "loss": 0.1343, + "num_input_tokens_seen": 54561184, + "step": 80950 + }, + { + "epoch": 1.9777441184374465, + "grad_norm": 6.695237159729004, + "learning_rate": 1.5134145583688406e-06, + "loss": 0.058, + "num_input_tokens_seen": 54564320, + "step": 80955 + }, + { + "epoch": 1.9778662692692937, + "grad_norm": 0.9582432508468628, + "learning_rate": 1.5133413763216742e-06, + "loss": 0.0711, + "num_input_tokens_seen": 54567648, + "step": 80960 + }, + { + "epoch": 1.9779884201011408, + "grad_norm": 105.59188842773438, + "learning_rate": 1.5132681905413515e-06, + "loss": 0.0481, + "num_input_tokens_seen": 54570912, + "step": 80965 + }, + { + "epoch": 1.978110570932988, + "grad_norm": 24.445819854736328, + "learning_rate": 1.5131950010284043e-06, + "loss": 0.1138, + "num_input_tokens_seen": 54574112, + "step": 80970 + }, + { + "epoch": 1.9782327217648352, + "grad_norm": 0.8526203632354736, + "learning_rate": 1.513121807783365e-06, + "loss": 0.0475, + "num_input_tokens_seen": 54577888, + "step": 80975 + }, + { + "epoch": 1.9783548725966824, + "grad_norm": 0.6153159141540527, + "learning_rate": 1.513048610806766e-06, + "loss": 0.0688, + "num_input_tokens_seen": 54581600, + "step": 80980 + }, + { + "epoch": 1.9784770234285296, + "grad_norm": 0.9376423954963684, + "learning_rate": 1.5129754100991394e-06, + "loss": 0.0358, + "num_input_tokens_seen": 54584608, + "step": 80985 + }, + { + "epoch": 1.9785991742603768, + "grad_norm": 0.151042178273201, + "learning_rate": 1.512902205661018e-06, + "loss": 0.0421, + "num_input_tokens_seen": 54588000, + "step": 80990 + }, + { + "epoch": 1.978721325092224, + "grad_norm": 19.37543487548828, + "learning_rate": 1.5128289974929334e-06, + "loss": 0.1467, + "num_input_tokens_seen": 54591136, + "step": 80995 + }, + { + "epoch": 1.9788434759240712, + "grad_norm": 0.05743106082081795, + "learning_rate": 1.5127557855954186e-06, + "loss": 0.0667, + "num_input_tokens_seen": 54594400, + "step": 81000 + }, + { + "epoch": 1.9789656267559184, + "grad_norm": 2.3568992614746094, + "learning_rate": 1.5126825699690056e-06, + "loss": 0.0786, + "num_input_tokens_seen": 54598432, + "step": 81005 + }, + { + "epoch": 1.9790877775877653, + "grad_norm": 122.1474380493164, + "learning_rate": 1.512609350614227e-06, + "loss": 0.1151, + "num_input_tokens_seen": 54601696, + "step": 81010 + }, + { + "epoch": 1.9792099284196125, + "grad_norm": 13.598340034484863, + "learning_rate": 1.5125361275316157e-06, + "loss": 0.1662, + "num_input_tokens_seen": 54604960, + "step": 81015 + }, + { + "epoch": 1.9793320792514597, + "grad_norm": 16.693645477294922, + "learning_rate": 1.5124629007217036e-06, + "loss": 0.069, + "num_input_tokens_seen": 54608480, + "step": 81020 + }, + { + "epoch": 1.979454230083307, + "grad_norm": 0.4208316206932068, + "learning_rate": 1.5123896701850237e-06, + "loss": 0.0018, + "num_input_tokens_seen": 54611936, + "step": 81025 + }, + { + "epoch": 1.9795763809151539, + "grad_norm": 55.367950439453125, + "learning_rate": 1.512316435922108e-06, + "loss": 0.145, + "num_input_tokens_seen": 54614816, + "step": 81030 + }, + { + "epoch": 1.979698531747001, + "grad_norm": 10.672937393188477, + "learning_rate": 1.5122431979334894e-06, + "loss": 0.0485, + "num_input_tokens_seen": 54618016, + "step": 81035 + }, + { + "epoch": 1.9798206825788482, + "grad_norm": 0.3374331593513489, + "learning_rate": 1.5121699562197006e-06, + "loss": 0.1454, + "num_input_tokens_seen": 54621408, + "step": 81040 + }, + { + "epoch": 1.9799428334106954, + "grad_norm": 0.8995051383972168, + "learning_rate": 1.5120967107812738e-06, + "loss": 0.0675, + "num_input_tokens_seen": 54624800, + "step": 81045 + }, + { + "epoch": 1.9800649842425426, + "grad_norm": 52.55927276611328, + "learning_rate": 1.5120234616187423e-06, + "loss": 0.0325, + "num_input_tokens_seen": 54628128, + "step": 81050 + }, + { + "epoch": 1.9801871350743898, + "grad_norm": 104.41256713867188, + "learning_rate": 1.5119502087326387e-06, + "loss": 0.0427, + "num_input_tokens_seen": 54631456, + "step": 81055 + }, + { + "epoch": 1.980309285906237, + "grad_norm": 0.05695815756917, + "learning_rate": 1.511876952123495e-06, + "loss": 0.1117, + "num_input_tokens_seen": 54634592, + "step": 81060 + }, + { + "epoch": 1.9804314367380842, + "grad_norm": 0.5008662939071655, + "learning_rate": 1.511803691791845e-06, + "loss": 0.0639, + "num_input_tokens_seen": 54637792, + "step": 81065 + }, + { + "epoch": 1.9805535875699314, + "grad_norm": 0.3222808241844177, + "learning_rate": 1.5117304277382204e-06, + "loss": 0.0815, + "num_input_tokens_seen": 54641184, + "step": 81070 + }, + { + "epoch": 1.9806757384017786, + "grad_norm": 0.11780696362257004, + "learning_rate": 1.5116571599631544e-06, + "loss": 0.0257, + "num_input_tokens_seen": 54644640, + "step": 81075 + }, + { + "epoch": 1.9807978892336258, + "grad_norm": 0.8247701525688171, + "learning_rate": 1.51158388846718e-06, + "loss": 0.0029, + "num_input_tokens_seen": 54647648, + "step": 81080 + }, + { + "epoch": 1.980920040065473, + "grad_norm": 21.25235366821289, + "learning_rate": 1.5115106132508305e-06, + "loss": 0.0702, + "num_input_tokens_seen": 54650720, + "step": 81085 + }, + { + "epoch": 1.9810421908973201, + "grad_norm": 0.10468591004610062, + "learning_rate": 1.5114373343146375e-06, + "loss": 0.0007, + "num_input_tokens_seen": 54653984, + "step": 81090 + }, + { + "epoch": 1.9811643417291673, + "grad_norm": 0.09131321310997009, + "learning_rate": 1.5113640516591354e-06, + "loss": 0.1593, + "num_input_tokens_seen": 54657568, + "step": 81095 + }, + { + "epoch": 1.9812864925610143, + "grad_norm": 1.060448169708252, + "learning_rate": 1.5112907652848556e-06, + "loss": 0.1267, + "num_input_tokens_seen": 54660640, + "step": 81100 + }, + { + "epoch": 1.9814086433928615, + "grad_norm": 0.17581862211227417, + "learning_rate": 1.5112174751923324e-06, + "loss": 0.1972, + "num_input_tokens_seen": 54663648, + "step": 81105 + }, + { + "epoch": 1.9815307942247087, + "grad_norm": 23.455965042114258, + "learning_rate": 1.511144181382098e-06, + "loss": 0.1394, + "num_input_tokens_seen": 54666912, + "step": 81110 + }, + { + "epoch": 1.9816529450565559, + "grad_norm": 0.5338903665542603, + "learning_rate": 1.5110708838546856e-06, + "loss": 0.0018, + "num_input_tokens_seen": 54670048, + "step": 81115 + }, + { + "epoch": 1.9817750958884028, + "grad_norm": 0.14137008786201477, + "learning_rate": 1.5109975826106285e-06, + "loss": 0.0385, + "num_input_tokens_seen": 54673312, + "step": 81120 + }, + { + "epoch": 1.98189724672025, + "grad_norm": 17.056808471679688, + "learning_rate": 1.5109242776504591e-06, + "loss": 0.012, + "num_input_tokens_seen": 54676640, + "step": 81125 + }, + { + "epoch": 1.9820193975520972, + "grad_norm": 0.04783693701028824, + "learning_rate": 1.5108509689747115e-06, + "loss": 0.0411, + "num_input_tokens_seen": 54679968, + "step": 81130 + }, + { + "epoch": 1.9821415483839444, + "grad_norm": 0.04254491254687309, + "learning_rate": 1.5107776565839177e-06, + "loss": 0.001, + "num_input_tokens_seen": 54684064, + "step": 81135 + }, + { + "epoch": 1.9822636992157916, + "grad_norm": 13.71863842010498, + "learning_rate": 1.510704340478612e-06, + "loss": 0.1699, + "num_input_tokens_seen": 54687392, + "step": 81140 + }, + { + "epoch": 1.9823858500476388, + "grad_norm": 9.231290817260742, + "learning_rate": 1.5106310206593265e-06, + "loss": 0.1538, + "num_input_tokens_seen": 54690464, + "step": 81145 + }, + { + "epoch": 1.982508000879486, + "grad_norm": 0.3563421964645386, + "learning_rate": 1.510557697126595e-06, + "loss": 0.2718, + "num_input_tokens_seen": 54693984, + "step": 81150 + }, + { + "epoch": 1.9826301517113332, + "grad_norm": 546.980712890625, + "learning_rate": 1.5104843698809506e-06, + "loss": 0.1438, + "num_input_tokens_seen": 54697632, + "step": 81155 + }, + { + "epoch": 1.9827523025431804, + "grad_norm": 0.5848055481910706, + "learning_rate": 1.5104110389229265e-06, + "loss": 0.0019, + "num_input_tokens_seen": 54701344, + "step": 81160 + }, + { + "epoch": 1.9828744533750275, + "grad_norm": 11.758065223693848, + "learning_rate": 1.5103377042530561e-06, + "loss": 0.1028, + "num_input_tokens_seen": 54704544, + "step": 81165 + }, + { + "epoch": 1.9829966042068747, + "grad_norm": 27.06719207763672, + "learning_rate": 1.5102643658718726e-06, + "loss": 0.0757, + "num_input_tokens_seen": 54708704, + "step": 81170 + }, + { + "epoch": 1.983118755038722, + "grad_norm": 0.3434150815010071, + "learning_rate": 1.5101910237799093e-06, + "loss": 0.0797, + "num_input_tokens_seen": 54711776, + "step": 81175 + }, + { + "epoch": 1.9832409058705691, + "grad_norm": 0.2370101511478424, + "learning_rate": 1.5101176779776999e-06, + "loss": 0.044, + "num_input_tokens_seen": 54714784, + "step": 81180 + }, + { + "epoch": 1.9833630567024163, + "grad_norm": 0.17519745230674744, + "learning_rate": 1.5100443284657773e-06, + "loss": 0.0014, + "num_input_tokens_seen": 54718048, + "step": 81185 + }, + { + "epoch": 1.9834852075342633, + "grad_norm": 0.06736115366220474, + "learning_rate": 1.5099709752446754e-06, + "loss": 0.1347, + "num_input_tokens_seen": 54721184, + "step": 81190 + }, + { + "epoch": 1.9836073583661105, + "grad_norm": 0.07850868999958038, + "learning_rate": 1.5098976183149272e-06, + "loss": 0.1472, + "num_input_tokens_seen": 54724576, + "step": 81195 + }, + { + "epoch": 1.9837295091979577, + "grad_norm": 0.20919404923915863, + "learning_rate": 1.5098242576770666e-06, + "loss": 0.1159, + "num_input_tokens_seen": 54727648, + "step": 81200 + }, + { + "epoch": 1.9838516600298048, + "grad_norm": 0.24704480171203613, + "learning_rate": 1.5097508933316267e-06, + "loss": 0.0484, + "num_input_tokens_seen": 54731232, + "step": 81205 + }, + { + "epoch": 1.9839738108616518, + "grad_norm": 8.519333839416504, + "learning_rate": 1.5096775252791414e-06, + "loss": 0.1048, + "num_input_tokens_seen": 54735136, + "step": 81210 + }, + { + "epoch": 1.984095961693499, + "grad_norm": 0.39622700214385986, + "learning_rate": 1.5096041535201435e-06, + "loss": 0.0012, + "num_input_tokens_seen": 54738656, + "step": 81215 + }, + { + "epoch": 1.9842181125253462, + "grad_norm": 3.677612066268921, + "learning_rate": 1.5095307780551676e-06, + "loss": 0.0957, + "num_input_tokens_seen": 54741728, + "step": 81220 + }, + { + "epoch": 1.9843402633571934, + "grad_norm": 0.1543489396572113, + "learning_rate": 1.5094573988847468e-06, + "loss": 0.0014, + "num_input_tokens_seen": 54744864, + "step": 81225 + }, + { + "epoch": 1.9844624141890406, + "grad_norm": 0.38194113969802856, + "learning_rate": 1.5093840160094145e-06, + "loss": 0.0008, + "num_input_tokens_seen": 54748128, + "step": 81230 + }, + { + "epoch": 1.9845845650208878, + "grad_norm": 103.0581283569336, + "learning_rate": 1.509310629429705e-06, + "loss": 0.0874, + "num_input_tokens_seen": 54751712, + "step": 81235 + }, + { + "epoch": 1.984706715852735, + "grad_norm": 74.20256042480469, + "learning_rate": 1.5092372391461515e-06, + "loss": 0.0342, + "num_input_tokens_seen": 54754976, + "step": 81240 + }, + { + "epoch": 1.9848288666845821, + "grad_norm": 0.07519308477640152, + "learning_rate": 1.5091638451592878e-06, + "loss": 0.0006, + "num_input_tokens_seen": 54758176, + "step": 81245 + }, + { + "epoch": 1.9849510175164293, + "grad_norm": 0.26383519172668457, + "learning_rate": 1.5090904474696478e-06, + "loss": 0.0261, + "num_input_tokens_seen": 54761888, + "step": 81250 + }, + { + "epoch": 1.9850731683482765, + "grad_norm": 0.06626437604427338, + "learning_rate": 1.5090170460777647e-06, + "loss": 0.0012, + "num_input_tokens_seen": 54765408, + "step": 81255 + }, + { + "epoch": 1.9851953191801237, + "grad_norm": 0.7158876061439514, + "learning_rate": 1.508943640984173e-06, + "loss": 0.0779, + "num_input_tokens_seen": 54768928, + "step": 81260 + }, + { + "epoch": 1.985317470011971, + "grad_norm": 30.467967987060547, + "learning_rate": 1.5088702321894062e-06, + "loss": 0.0576, + "num_input_tokens_seen": 54772448, + "step": 81265 + }, + { + "epoch": 1.985439620843818, + "grad_norm": 22.207685470581055, + "learning_rate": 1.5087968196939985e-06, + "loss": 0.1292, + "num_input_tokens_seen": 54775648, + "step": 81270 + }, + { + "epoch": 1.9855617716756653, + "grad_norm": 0.11838986724615097, + "learning_rate": 1.5087234034984833e-06, + "loss": 0.0034, + "num_input_tokens_seen": 54778976, + "step": 81275 + }, + { + "epoch": 1.9856839225075122, + "grad_norm": 14.396102905273438, + "learning_rate": 1.5086499836033945e-06, + "loss": 0.1485, + "num_input_tokens_seen": 54782048, + "step": 81280 + }, + { + "epoch": 1.9858060733393594, + "grad_norm": 0.539726972579956, + "learning_rate": 1.5085765600092663e-06, + "loss": 0.0958, + "num_input_tokens_seen": 54785568, + "step": 81285 + }, + { + "epoch": 1.9859282241712066, + "grad_norm": 18.099950790405273, + "learning_rate": 1.5085031327166324e-06, + "loss": 0.0731, + "num_input_tokens_seen": 54788896, + "step": 81290 + }, + { + "epoch": 1.9860503750030538, + "grad_norm": 0.7998467683792114, + "learning_rate": 1.5084297017260274e-06, + "loss": 0.092, + "num_input_tokens_seen": 54792224, + "step": 81295 + }, + { + "epoch": 1.9861725258349008, + "grad_norm": 20.516544342041016, + "learning_rate": 1.5083562670379847e-06, + "loss": 0.1245, + "num_input_tokens_seen": 54795552, + "step": 81300 + }, + { + "epoch": 1.986294676666748, + "grad_norm": 0.3030303716659546, + "learning_rate": 1.5082828286530385e-06, + "loss": 0.18, + "num_input_tokens_seen": 54799392, + "step": 81305 + }, + { + "epoch": 1.9864168274985952, + "grad_norm": 10.065673828125, + "learning_rate": 1.5082093865717226e-06, + "loss": 0.0018, + "num_input_tokens_seen": 54802720, + "step": 81310 + }, + { + "epoch": 1.9865389783304424, + "grad_norm": 16.148216247558594, + "learning_rate": 1.5081359407945717e-06, + "loss": 0.0463, + "num_input_tokens_seen": 54805984, + "step": 81315 + }, + { + "epoch": 1.9866611291622895, + "grad_norm": 37.96180725097656, + "learning_rate": 1.5080624913221192e-06, + "loss": 0.1515, + "num_input_tokens_seen": 54809376, + "step": 81320 + }, + { + "epoch": 1.9867832799941367, + "grad_norm": 0.017173312604427338, + "learning_rate": 1.5079890381549e-06, + "loss": 0.0629, + "num_input_tokens_seen": 54812256, + "step": 81325 + }, + { + "epoch": 1.986905430825984, + "grad_norm": 21.360572814941406, + "learning_rate": 1.5079155812934474e-06, + "loss": 0.1602, + "num_input_tokens_seen": 54815776, + "step": 81330 + }, + { + "epoch": 1.987027581657831, + "grad_norm": 129.48104858398438, + "learning_rate": 1.5078421207382963e-06, + "loss": 0.0121, + "num_input_tokens_seen": 54819552, + "step": 81335 + }, + { + "epoch": 1.9871497324896783, + "grad_norm": 0.5234513878822327, + "learning_rate": 1.5077686564899808e-06, + "loss": 0.003, + "num_input_tokens_seen": 54822624, + "step": 81340 + }, + { + "epoch": 1.9872718833215255, + "grad_norm": 5.080391883850098, + "learning_rate": 1.507695188549035e-06, + "loss": 0.0501, + "num_input_tokens_seen": 54825888, + "step": 81345 + }, + { + "epoch": 1.9873940341533727, + "grad_norm": 22.208263397216797, + "learning_rate": 1.5076217169159933e-06, + "loss": 0.1933, + "num_input_tokens_seen": 54829280, + "step": 81350 + }, + { + "epoch": 1.9875161849852199, + "grad_norm": 0.19975244998931885, + "learning_rate": 1.5075482415913899e-06, + "loss": 0.1009, + "num_input_tokens_seen": 54832416, + "step": 81355 + }, + { + "epoch": 1.987638335817067, + "grad_norm": 0.25327712297439575, + "learning_rate": 1.5074747625757591e-06, + "loss": 0.0637, + "num_input_tokens_seen": 54836000, + "step": 81360 + }, + { + "epoch": 1.987760486648914, + "grad_norm": 27.042043685913086, + "learning_rate": 1.5074012798696356e-06, + "loss": 0.1048, + "num_input_tokens_seen": 54839328, + "step": 81365 + }, + { + "epoch": 1.9878826374807612, + "grad_norm": 23.07643699645996, + "learning_rate": 1.5073277934735531e-06, + "loss": 0.1378, + "num_input_tokens_seen": 54842656, + "step": 81370 + }, + { + "epoch": 1.9880047883126084, + "grad_norm": 0.5719969272613525, + "learning_rate": 1.5072543033880466e-06, + "loss": 0.1105, + "num_input_tokens_seen": 54846176, + "step": 81375 + }, + { + "epoch": 1.9881269391444556, + "grad_norm": 52.8463249206543, + "learning_rate": 1.5071808096136503e-06, + "loss": 0.0333, + "num_input_tokens_seen": 54849952, + "step": 81380 + }, + { + "epoch": 1.9882490899763028, + "grad_norm": 17.789306640625, + "learning_rate": 1.507107312150899e-06, + "loss": 0.084, + "num_input_tokens_seen": 54852768, + "step": 81385 + }, + { + "epoch": 1.9883712408081498, + "grad_norm": 0.5003214478492737, + "learning_rate": 1.5070338110003266e-06, + "loss": 0.0249, + "num_input_tokens_seen": 54855968, + "step": 81390 + }, + { + "epoch": 1.988493391639997, + "grad_norm": 0.13848499953746796, + "learning_rate": 1.5069603061624683e-06, + "loss": 0.0502, + "num_input_tokens_seen": 54859296, + "step": 81395 + }, + { + "epoch": 1.9886155424718441, + "grad_norm": 0.16113311052322388, + "learning_rate": 1.5068867976378582e-06, + "loss": 0.0339, + "num_input_tokens_seen": 54863136, + "step": 81400 + }, + { + "epoch": 1.9887376933036913, + "grad_norm": 0.14104273915290833, + "learning_rate": 1.506813285427031e-06, + "loss": 0.0677, + "num_input_tokens_seen": 54866400, + "step": 81405 + }, + { + "epoch": 1.9888598441355385, + "grad_norm": 60.22726058959961, + "learning_rate": 1.5067397695305212e-06, + "loss": 0.1462, + "num_input_tokens_seen": 54869920, + "step": 81410 + }, + { + "epoch": 1.9889819949673857, + "grad_norm": 1.4266036748886108, + "learning_rate": 1.5066662499488634e-06, + "loss": 0.0628, + "num_input_tokens_seen": 54873248, + "step": 81415 + }, + { + "epoch": 1.989104145799233, + "grad_norm": 19.480825424194336, + "learning_rate": 1.5065927266825923e-06, + "loss": 0.0587, + "num_input_tokens_seen": 54877280, + "step": 81420 + }, + { + "epoch": 1.98922629663108, + "grad_norm": 10.97437572479248, + "learning_rate": 1.5065191997322426e-06, + "loss": 0.0877, + "num_input_tokens_seen": 54879968, + "step": 81425 + }, + { + "epoch": 1.9893484474629273, + "grad_norm": 17.77425765991211, + "learning_rate": 1.5064456690983493e-06, + "loss": 0.041, + "num_input_tokens_seen": 54883104, + "step": 81430 + }, + { + "epoch": 1.9894705982947745, + "grad_norm": 0.09516488760709763, + "learning_rate": 1.5063721347814468e-06, + "loss": 0.0558, + "num_input_tokens_seen": 54886624, + "step": 81435 + }, + { + "epoch": 1.9895927491266217, + "grad_norm": 0.2274305522441864, + "learning_rate": 1.50629859678207e-06, + "loss": 0.0027, + "num_input_tokens_seen": 54889952, + "step": 81440 + }, + { + "epoch": 1.9897148999584688, + "grad_norm": 0.13979065418243408, + "learning_rate": 1.5062250551007533e-06, + "loss": 0.0344, + "num_input_tokens_seen": 54893792, + "step": 81445 + }, + { + "epoch": 1.989837050790316, + "grad_norm": 0.2326391339302063, + "learning_rate": 1.5061515097380323e-06, + "loss": 0.0756, + "num_input_tokens_seen": 54897504, + "step": 81450 + }, + { + "epoch": 1.989959201622163, + "grad_norm": 0.6787064671516418, + "learning_rate": 1.5060779606944412e-06, + "loss": 0.0261, + "num_input_tokens_seen": 54900704, + "step": 81455 + }, + { + "epoch": 1.9900813524540102, + "grad_norm": 3.1736743450164795, + "learning_rate": 1.506004407970515e-06, + "loss": 0.0433, + "num_input_tokens_seen": 54904224, + "step": 81460 + }, + { + "epoch": 1.9902035032858574, + "grad_norm": 0.01049636211246252, + "learning_rate": 1.5059308515667888e-06, + "loss": 0.1088, + "num_input_tokens_seen": 54907360, + "step": 81465 + }, + { + "epoch": 1.9903256541177046, + "grad_norm": 0.04513026773929596, + "learning_rate": 1.5058572914837973e-06, + "loss": 0.0366, + "num_input_tokens_seen": 54911776, + "step": 81470 + }, + { + "epoch": 1.9904478049495518, + "grad_norm": 33.00575637817383, + "learning_rate": 1.505783727722075e-06, + "loss": 0.0852, + "num_input_tokens_seen": 54915168, + "step": 81475 + }, + { + "epoch": 1.9905699557813987, + "grad_norm": 0.5446120500564575, + "learning_rate": 1.505710160282158e-06, + "loss": 0.0808, + "num_input_tokens_seen": 54918240, + "step": 81480 + }, + { + "epoch": 1.990692106613246, + "grad_norm": 9.04538631439209, + "learning_rate": 1.5056365891645805e-06, + "loss": 0.1603, + "num_input_tokens_seen": 54921568, + "step": 81485 + }, + { + "epoch": 1.990814257445093, + "grad_norm": 1.6430739164352417, + "learning_rate": 1.5055630143698778e-06, + "loss": 0.0425, + "num_input_tokens_seen": 54924704, + "step": 81490 + }, + { + "epoch": 1.9909364082769403, + "grad_norm": 114.92130279541016, + "learning_rate": 1.505489435898585e-06, + "loss": 0.0634, + "num_input_tokens_seen": 54928096, + "step": 81495 + }, + { + "epoch": 1.9910585591087875, + "grad_norm": 19.503250122070312, + "learning_rate": 1.505415853751237e-06, + "loss": 0.0872, + "num_input_tokens_seen": 54931424, + "step": 81500 + }, + { + "epoch": 1.9911807099406347, + "grad_norm": 67.42121887207031, + "learning_rate": 1.5053422679283688e-06, + "loss": 0.1399, + "num_input_tokens_seen": 54934752, + "step": 81505 + }, + { + "epoch": 1.9913028607724819, + "grad_norm": 21.0851993560791, + "learning_rate": 1.5052686784305158e-06, + "loss": 0.0519, + "num_input_tokens_seen": 54938080, + "step": 81510 + }, + { + "epoch": 1.991425011604329, + "grad_norm": 0.4135420024394989, + "learning_rate": 1.505195085258213e-06, + "loss": 0.039, + "num_input_tokens_seen": 54941856, + "step": 81515 + }, + { + "epoch": 1.9915471624361762, + "grad_norm": 27.66628074645996, + "learning_rate": 1.5051214884119956e-06, + "loss": 0.0728, + "num_input_tokens_seen": 54945056, + "step": 81520 + }, + { + "epoch": 1.9916693132680234, + "grad_norm": 0.38148805499076843, + "learning_rate": 1.505047887892399e-06, + "loss": 0.1029, + "num_input_tokens_seen": 54948448, + "step": 81525 + }, + { + "epoch": 1.9917914640998706, + "grad_norm": 0.16693571209907532, + "learning_rate": 1.5049742836999584e-06, + "loss": 0.0036, + "num_input_tokens_seen": 54951840, + "step": 81530 + }, + { + "epoch": 1.9919136149317178, + "grad_norm": 23.07744598388672, + "learning_rate": 1.5049006758352088e-06, + "loss": 0.2146, + "num_input_tokens_seen": 54955040, + "step": 81535 + }, + { + "epoch": 1.992035765763565, + "grad_norm": 61.181007385253906, + "learning_rate": 1.5048270642986855e-06, + "loss": 0.0869, + "num_input_tokens_seen": 54958496, + "step": 81540 + }, + { + "epoch": 1.992157916595412, + "grad_norm": 297.8876037597656, + "learning_rate": 1.5047534490909243e-06, + "loss": 0.0977, + "num_input_tokens_seen": 54961760, + "step": 81545 + }, + { + "epoch": 1.9922800674272592, + "grad_norm": 0.23931384086608887, + "learning_rate": 1.5046798302124603e-06, + "loss": 0.1243, + "num_input_tokens_seen": 54965152, + "step": 81550 + }, + { + "epoch": 1.9924022182591064, + "grad_norm": 0.10794756561517715, + "learning_rate": 1.5046062076638288e-06, + "loss": 0.0381, + "num_input_tokens_seen": 54968736, + "step": 81555 + }, + { + "epoch": 1.9925243690909535, + "grad_norm": 16.027257919311523, + "learning_rate": 1.5045325814455657e-06, + "loss": 0.1014, + "num_input_tokens_seen": 54972256, + "step": 81560 + }, + { + "epoch": 1.9926465199228007, + "grad_norm": 0.3013782799243927, + "learning_rate": 1.5044589515582051e-06, + "loss": 0.0302, + "num_input_tokens_seen": 54975776, + "step": 81565 + }, + { + "epoch": 1.9927686707546477, + "grad_norm": 0.038673609495162964, + "learning_rate": 1.5043853180022837e-06, + "loss": 0.0014, + "num_input_tokens_seen": 54978656, + "step": 81570 + }, + { + "epoch": 1.9928908215864949, + "grad_norm": 0.4944474995136261, + "learning_rate": 1.5043116807783364e-06, + "loss": 0.2286, + "num_input_tokens_seen": 54982048, + "step": 81575 + }, + { + "epoch": 1.993012972418342, + "grad_norm": 0.6024330854415894, + "learning_rate": 1.5042380398868991e-06, + "loss": 0.0634, + "num_input_tokens_seen": 54985504, + "step": 81580 + }, + { + "epoch": 1.9931351232501893, + "grad_norm": 0.741905152797699, + "learning_rate": 1.5041643953285074e-06, + "loss": 0.001, + "num_input_tokens_seen": 54989152, + "step": 81585 + }, + { + "epoch": 1.9932572740820365, + "grad_norm": 452.8699951171875, + "learning_rate": 1.5040907471036962e-06, + "loss": 0.0087, + "num_input_tokens_seen": 54992672, + "step": 81590 + }, + { + "epoch": 1.9933794249138836, + "grad_norm": 0.4961146116256714, + "learning_rate": 1.5040170952130019e-06, + "loss": 0.133, + "num_input_tokens_seen": 54995936, + "step": 81595 + }, + { + "epoch": 1.9935015757457308, + "grad_norm": 0.024277739226818085, + "learning_rate": 1.5039434396569592e-06, + "loss": 0.2358, + "num_input_tokens_seen": 54999008, + "step": 81600 + }, + { + "epoch": 1.993623726577578, + "grad_norm": 0.5896772742271423, + "learning_rate": 1.5038697804361046e-06, + "loss": 0.1468, + "num_input_tokens_seen": 55002784, + "step": 81605 + }, + { + "epoch": 1.9937458774094252, + "grad_norm": 10.540372848510742, + "learning_rate": 1.5037961175509737e-06, + "loss": 0.1263, + "num_input_tokens_seen": 55005920, + "step": 81610 + }, + { + "epoch": 1.9938680282412724, + "grad_norm": 0.5984711647033691, + "learning_rate": 1.5037224510021016e-06, + "loss": 0.0316, + "num_input_tokens_seen": 55009504, + "step": 81615 + }, + { + "epoch": 1.9939901790731196, + "grad_norm": 0.5763449668884277, + "learning_rate": 1.5036487807900243e-06, + "loss": 0.0791, + "num_input_tokens_seen": 55012768, + "step": 81620 + }, + { + "epoch": 1.9941123299049668, + "grad_norm": 23.172338485717773, + "learning_rate": 1.5035751069152775e-06, + "loss": 0.0762, + "num_input_tokens_seen": 55016288, + "step": 81625 + }, + { + "epoch": 1.994234480736814, + "grad_norm": 24.80535888671875, + "learning_rate": 1.5035014293783972e-06, + "loss": 0.1994, + "num_input_tokens_seen": 55019296, + "step": 81630 + }, + { + "epoch": 1.994356631568661, + "grad_norm": 0.14634768664836884, + "learning_rate": 1.503427748179919e-06, + "loss": 0.045, + "num_input_tokens_seen": 55022176, + "step": 81635 + }, + { + "epoch": 1.9944787824005081, + "grad_norm": 30.956127166748047, + "learning_rate": 1.503354063320379e-06, + "loss": 0.1222, + "num_input_tokens_seen": 55025184, + "step": 81640 + }, + { + "epoch": 1.9946009332323553, + "grad_norm": 151.15733337402344, + "learning_rate": 1.503280374800313e-06, + "loss": 0.1907, + "num_input_tokens_seen": 55028768, + "step": 81645 + }, + { + "epoch": 1.9947230840642025, + "grad_norm": 0.30794233083724976, + "learning_rate": 1.5032066826202563e-06, + "loss": 0.0371, + "num_input_tokens_seen": 55032608, + "step": 81650 + }, + { + "epoch": 1.9948452348960495, + "grad_norm": 0.3560013473033905, + "learning_rate": 1.5031329867807457e-06, + "loss": 0.0012, + "num_input_tokens_seen": 55036064, + "step": 81655 + }, + { + "epoch": 1.9949673857278967, + "grad_norm": 41.600990295410156, + "learning_rate": 1.5030592872823164e-06, + "loss": 0.1023, + "num_input_tokens_seen": 55039072, + "step": 81660 + }, + { + "epoch": 1.9950895365597439, + "grad_norm": 2.1408729553222656, + "learning_rate": 1.5029855841255047e-06, + "loss": 0.092, + "num_input_tokens_seen": 55042272, + "step": 81665 + }, + { + "epoch": 1.995211687391591, + "grad_norm": 54.596038818359375, + "learning_rate": 1.5029118773108467e-06, + "loss": 0.1157, + "num_input_tokens_seen": 55045216, + "step": 81670 + }, + { + "epoch": 1.9953338382234382, + "grad_norm": 16.272363662719727, + "learning_rate": 1.5028381668388783e-06, + "loss": 0.0362, + "num_input_tokens_seen": 55048608, + "step": 81675 + }, + { + "epoch": 1.9954559890552854, + "grad_norm": 0.11136430501937866, + "learning_rate": 1.5027644527101353e-06, + "loss": 0.1336, + "num_input_tokens_seen": 55051872, + "step": 81680 + }, + { + "epoch": 1.9955781398871326, + "grad_norm": 17.665876388549805, + "learning_rate": 1.5026907349251538e-06, + "loss": 0.1057, + "num_input_tokens_seen": 55055264, + "step": 81685 + }, + { + "epoch": 1.9957002907189798, + "grad_norm": 0.5513327121734619, + "learning_rate": 1.5026170134844705e-06, + "loss": 0.1327, + "num_input_tokens_seen": 55058336, + "step": 81690 + }, + { + "epoch": 1.995822441550827, + "grad_norm": 139.2957000732422, + "learning_rate": 1.5025432883886208e-06, + "loss": 0.0102, + "num_input_tokens_seen": 55061216, + "step": 81695 + }, + { + "epoch": 1.9959445923826742, + "grad_norm": 13.870861053466797, + "learning_rate": 1.502469559638141e-06, + "loss": 0.1804, + "num_input_tokens_seen": 55064800, + "step": 81700 + }, + { + "epoch": 1.9960667432145214, + "grad_norm": 108.46243286132812, + "learning_rate": 1.5023958272335677e-06, + "loss": 0.0232, + "num_input_tokens_seen": 55068064, + "step": 81705 + }, + { + "epoch": 1.9961888940463686, + "grad_norm": 32.1049919128418, + "learning_rate": 1.5023220911754368e-06, + "loss": 0.1199, + "num_input_tokens_seen": 55071840, + "step": 81710 + }, + { + "epoch": 1.9963110448782158, + "grad_norm": 1.4670863151550293, + "learning_rate": 1.502248351464285e-06, + "loss": 0.102, + "num_input_tokens_seen": 55075232, + "step": 81715 + }, + { + "epoch": 1.996433195710063, + "grad_norm": 0.029370633885264397, + "learning_rate": 1.5021746081006474e-06, + "loss": 0.0461, + "num_input_tokens_seen": 55078368, + "step": 81720 + }, + { + "epoch": 1.99655534654191, + "grad_norm": 0.2155640423297882, + "learning_rate": 1.502100861085061e-06, + "loss": 0.0729, + "num_input_tokens_seen": 55082080, + "step": 81725 + }, + { + "epoch": 1.996677497373757, + "grad_norm": 12.511549949645996, + "learning_rate": 1.5020271104180623e-06, + "loss": 0.1241, + "num_input_tokens_seen": 55085024, + "step": 81730 + }, + { + "epoch": 1.9967996482056043, + "grad_norm": 8.582071304321289, + "learning_rate": 1.5019533561001875e-06, + "loss": 0.1156, + "num_input_tokens_seen": 55088544, + "step": 81735 + }, + { + "epoch": 1.9969217990374515, + "grad_norm": 57.26712417602539, + "learning_rate": 1.5018795981319727e-06, + "loss": 0.1221, + "num_input_tokens_seen": 55091936, + "step": 81740 + }, + { + "epoch": 1.9970439498692985, + "grad_norm": 1.2923898696899414, + "learning_rate": 1.5018058365139546e-06, + "loss": 0.0466, + "num_input_tokens_seen": 55095328, + "step": 81745 + }, + { + "epoch": 1.9971661007011456, + "grad_norm": 12.235097885131836, + "learning_rate": 1.5017320712466695e-06, + "loss": 0.1147, + "num_input_tokens_seen": 55098656, + "step": 81750 + }, + { + "epoch": 1.9972882515329928, + "grad_norm": 32.15383529663086, + "learning_rate": 1.5016583023306538e-06, + "loss": 0.0567, + "num_input_tokens_seen": 55102048, + "step": 81755 + }, + { + "epoch": 1.99741040236484, + "grad_norm": 0.5230231285095215, + "learning_rate": 1.5015845297664437e-06, + "loss": 0.0016, + "num_input_tokens_seen": 55105120, + "step": 81760 + }, + { + "epoch": 1.9975325531966872, + "grad_norm": 16.578676223754883, + "learning_rate": 1.5015107535545765e-06, + "loss": 0.0671, + "num_input_tokens_seen": 55108192, + "step": 81765 + }, + { + "epoch": 1.9976547040285344, + "grad_norm": 14.295267105102539, + "learning_rate": 1.501436973695588e-06, + "loss": 0.2056, + "num_input_tokens_seen": 55111200, + "step": 81770 + }, + { + "epoch": 1.9977768548603816, + "grad_norm": 10.518472671508789, + "learning_rate": 1.5013631901900147e-06, + "loss": 0.188, + "num_input_tokens_seen": 55114400, + "step": 81775 + }, + { + "epoch": 1.9978990056922288, + "grad_norm": 12.591024398803711, + "learning_rate": 1.501289403038394e-06, + "loss": 0.0903, + "num_input_tokens_seen": 55117728, + "step": 81780 + }, + { + "epoch": 1.998021156524076, + "grad_norm": 0.04502028971910477, + "learning_rate": 1.5012156122412615e-06, + "loss": 0.0297, + "num_input_tokens_seen": 55121568, + "step": 81785 + }, + { + "epoch": 1.9981433073559232, + "grad_norm": 1.9730950593948364, + "learning_rate": 1.501141817799154e-06, + "loss": 0.0873, + "num_input_tokens_seen": 55124896, + "step": 81790 + }, + { + "epoch": 1.9982654581877703, + "grad_norm": 292.1108093261719, + "learning_rate": 1.5010680197126089e-06, + "loss": 0.0424, + "num_input_tokens_seen": 55127904, + "step": 81795 + }, + { + "epoch": 1.9983876090196175, + "grad_norm": 10.42750358581543, + "learning_rate": 1.5009942179821624e-06, + "loss": 0.1556, + "num_input_tokens_seen": 55130784, + "step": 81800 + }, + { + "epoch": 1.9985097598514647, + "grad_norm": 15.448634147644043, + "learning_rate": 1.5009204126083507e-06, + "loss": 0.0675, + "num_input_tokens_seen": 55133984, + "step": 81805 + }, + { + "epoch": 1.998631910683312, + "grad_norm": 12.578239440917969, + "learning_rate": 1.5008466035917117e-06, + "loss": 0.0825, + "num_input_tokens_seen": 55137376, + "step": 81810 + }, + { + "epoch": 1.9987540615151589, + "grad_norm": 286.89434814453125, + "learning_rate": 1.500772790932781e-06, + "loss": 0.0692, + "num_input_tokens_seen": 55140640, + "step": 81815 + }, + { + "epoch": 1.998876212347006, + "grad_norm": 20.852294921875, + "learning_rate": 1.5006989746320962e-06, + "loss": 0.1433, + "num_input_tokens_seen": 55144032, + "step": 81820 + }, + { + "epoch": 1.9989983631788533, + "grad_norm": 0.14329631626605988, + "learning_rate": 1.5006251546901936e-06, + "loss": 0.0021, + "num_input_tokens_seen": 55147680, + "step": 81825 + }, + { + "epoch": 1.9991205140107005, + "grad_norm": 0.04360923171043396, + "learning_rate": 1.5005513311076103e-06, + "loss": 0.0312, + "num_input_tokens_seen": 55151136, + "step": 81830 + }, + { + "epoch": 1.9992426648425474, + "grad_norm": 7.934567451477051, + "learning_rate": 1.500477503884883e-06, + "loss": 0.0798, + "num_input_tokens_seen": 55154080, + "step": 81835 + }, + { + "epoch": 1.9993648156743946, + "grad_norm": 17.159452438354492, + "learning_rate": 1.5004036730225486e-06, + "loss": 0.0722, + "num_input_tokens_seen": 55157152, + "step": 81840 + }, + { + "epoch": 1.9994869665062418, + "grad_norm": 103.14313507080078, + "learning_rate": 1.5003298385211443e-06, + "loss": 0.0506, + "num_input_tokens_seen": 55160864, + "step": 81845 + }, + { + "epoch": 1.999609117338089, + "grad_norm": 1.8348734378814697, + "learning_rate": 1.5002560003812064e-06, + "loss": 0.0443, + "num_input_tokens_seen": 55164448, + "step": 81850 + }, + { + "epoch": 1.9997312681699362, + "grad_norm": 13.934343338012695, + "learning_rate": 1.5001821586032729e-06, + "loss": 0.1503, + "num_input_tokens_seen": 55167904, + "step": 81855 + }, + { + "epoch": 1.9998534190017834, + "grad_norm": 0.1302555799484253, + "learning_rate": 1.50010831318788e-06, + "loss": 0.0473, + "num_input_tokens_seen": 55171040, + "step": 81860 + }, + { + "epoch": 1.9999755698336306, + "grad_norm": 12.425285339355469, + "learning_rate": 1.500034464135565e-06, + "loss": 0.2202, + "num_input_tokens_seen": 55174048, + "step": 81865 + }, + { + "epoch": 2.0000977206654778, + "grad_norm": 0.4271923303604126, + "learning_rate": 1.4999606114468647e-06, + "loss": 0.0019, + "num_input_tokens_seen": 55177384, + "step": 81870 + }, + { + "epoch": 2.0001465809982166, + "eval_loss": 0.12374971061944962, + "eval_runtime": 47.6455, + "eval_samples_per_second": 763.661, + "eval_steps_per_second": 95.476, + "num_input_tokens_seen": 55178600, + "step": 81872 + }, + { + "epoch": 2.000219871497325, + "grad_norm": 0.012282461859285831, + "learning_rate": 1.4998867551223164e-06, + "loss": 0.0235, + "num_input_tokens_seen": 55180584, + "step": 81875 + }, + { + "epoch": 2.000342022329172, + "grad_norm": 0.15548351407051086, + "learning_rate": 1.4998128951624572e-06, + "loss": 0.0016, + "num_input_tokens_seen": 55183592, + "step": 81880 + }, + { + "epoch": 2.0004641731610193, + "grad_norm": 0.16244475543498993, + "learning_rate": 1.4997390315678242e-06, + "loss": 0.0287, + "num_input_tokens_seen": 55187368, + "step": 81885 + }, + { + "epoch": 2.0005863239928665, + "grad_norm": 0.03867373988032341, + "learning_rate": 1.4996651643389545e-06, + "loss": 0.0262, + "num_input_tokens_seen": 55191016, + "step": 81890 + }, + { + "epoch": 2.0007084748247137, + "grad_norm": 1.0324246883392334, + "learning_rate": 1.4995912934763854e-06, + "loss": 0.0651, + "num_input_tokens_seen": 55194600, + "step": 81895 + }, + { + "epoch": 2.000830625656561, + "grad_norm": 143.933837890625, + "learning_rate": 1.4995174189806542e-06, + "loss": 0.0267, + "num_input_tokens_seen": 55197736, + "step": 81900 + }, + { + "epoch": 2.000952776488408, + "grad_norm": 0.10085337609052658, + "learning_rate": 1.4994435408522976e-06, + "loss": 0.047, + "num_input_tokens_seen": 55201320, + "step": 81905 + }, + { + "epoch": 2.0010749273202553, + "grad_norm": 0.27604278922080994, + "learning_rate": 1.4993696590918533e-06, + "loss": 0.0019, + "num_input_tokens_seen": 55204520, + "step": 81910 + }, + { + "epoch": 2.001197078152102, + "grad_norm": 14.148911476135254, + "learning_rate": 1.4992957736998589e-06, + "loss": 0.0854, + "num_input_tokens_seen": 55207656, + "step": 81915 + }, + { + "epoch": 2.001319228983949, + "grad_norm": 0.190697580575943, + "learning_rate": 1.4992218846768509e-06, + "loss": 0.0359, + "num_input_tokens_seen": 55210856, + "step": 81920 + }, + { + "epoch": 2.0014413798157964, + "grad_norm": 0.6571767926216125, + "learning_rate": 1.4991479920233673e-06, + "loss": 0.0026, + "num_input_tokens_seen": 55214312, + "step": 81925 + }, + { + "epoch": 2.0015635306476436, + "grad_norm": 0.1770526021718979, + "learning_rate": 1.4990740957399452e-06, + "loss": 0.0017, + "num_input_tokens_seen": 55217256, + "step": 81930 + }, + { + "epoch": 2.0016856814794908, + "grad_norm": 0.2347087413072586, + "learning_rate": 1.499000195827122e-06, + "loss": 0.0011, + "num_input_tokens_seen": 55220648, + "step": 81935 + }, + { + "epoch": 2.001807832311338, + "grad_norm": 18.45768928527832, + "learning_rate": 1.4989262922854353e-06, + "loss": 0.0394, + "num_input_tokens_seen": 55223848, + "step": 81940 + }, + { + "epoch": 2.001929983143185, + "grad_norm": 0.006956683937460184, + "learning_rate": 1.4988523851154221e-06, + "loss": 0.0271, + "num_input_tokens_seen": 55227048, + "step": 81945 + }, + { + "epoch": 2.0020521339750323, + "grad_norm": 0.07157830893993378, + "learning_rate": 1.4987784743176206e-06, + "loss": 0.0005, + "num_input_tokens_seen": 55230568, + "step": 81950 + }, + { + "epoch": 2.0021742848068795, + "grad_norm": 0.054001644253730774, + "learning_rate": 1.4987045598925678e-06, + "loss": 0.1124, + "num_input_tokens_seen": 55233448, + "step": 81955 + }, + { + "epoch": 2.0022964356387267, + "grad_norm": 0.013696554116904736, + "learning_rate": 1.4986306418408011e-06, + "loss": 0.0502, + "num_input_tokens_seen": 55236392, + "step": 81960 + }, + { + "epoch": 2.002418586470574, + "grad_norm": 0.04159333184361458, + "learning_rate": 1.4985567201628584e-06, + "loss": 0.0002, + "num_input_tokens_seen": 55239592, + "step": 81965 + }, + { + "epoch": 2.002540737302421, + "grad_norm": 0.21539679169654846, + "learning_rate": 1.498482794859277e-06, + "loss": 0.0293, + "num_input_tokens_seen": 55242920, + "step": 81970 + }, + { + "epoch": 2.0026628881342683, + "grad_norm": 0.0375695638358593, + "learning_rate": 1.4984088659305949e-06, + "loss": 0.0001, + "num_input_tokens_seen": 55246312, + "step": 81975 + }, + { + "epoch": 2.0027850389661155, + "grad_norm": 0.021181389689445496, + "learning_rate": 1.4983349333773493e-06, + "loss": 0.0007, + "num_input_tokens_seen": 55249320, + "step": 81980 + }, + { + "epoch": 2.0029071897979627, + "grad_norm": 0.10235611349344254, + "learning_rate": 1.4982609972000779e-06, + "loss": 0.0017, + "num_input_tokens_seen": 55252584, + "step": 81985 + }, + { + "epoch": 2.00302934062981, + "grad_norm": 0.08041008561849594, + "learning_rate": 1.4981870573993187e-06, + "loss": 0.0005, + "num_input_tokens_seen": 55256424, + "step": 81990 + }, + { + "epoch": 2.003151491461657, + "grad_norm": 16.012470245361328, + "learning_rate": 1.498113113975609e-06, + "loss": 0.0694, + "num_input_tokens_seen": 55260136, + "step": 81995 + }, + { + "epoch": 2.003273642293504, + "grad_norm": 33.74158477783203, + "learning_rate": 1.4980391669294872e-06, + "loss": 0.0646, + "num_input_tokens_seen": 55263464, + "step": 82000 + }, + { + "epoch": 2.003395793125351, + "grad_norm": 0.030892247334122658, + "learning_rate": 1.4979652162614902e-06, + "loss": 0.0001, + "num_input_tokens_seen": 55267048, + "step": 82005 + }, + { + "epoch": 2.003517943957198, + "grad_norm": 26.975217819213867, + "learning_rate": 1.4978912619721563e-06, + "loss": 0.0811, + "num_input_tokens_seen": 55270056, + "step": 82010 + }, + { + "epoch": 2.0036400947890454, + "grad_norm": 0.011399905197322369, + "learning_rate": 1.4978173040620233e-06, + "loss": 0.0692, + "num_input_tokens_seen": 55273384, + "step": 82015 + }, + { + "epoch": 2.0037622456208926, + "grad_norm": 0.23399126529693604, + "learning_rate": 1.497743342531629e-06, + "loss": 0.0016, + "num_input_tokens_seen": 55276136, + "step": 82020 + }, + { + "epoch": 2.0038843964527397, + "grad_norm": 0.0349041149020195, + "learning_rate": 1.4976693773815113e-06, + "loss": 0.0005, + "num_input_tokens_seen": 55280040, + "step": 82025 + }, + { + "epoch": 2.004006547284587, + "grad_norm": 0.43788591027259827, + "learning_rate": 1.497595408612208e-06, + "loss": 0.0006, + "num_input_tokens_seen": 55283240, + "step": 82030 + }, + { + "epoch": 2.004128698116434, + "grad_norm": 0.2341459095478058, + "learning_rate": 1.4975214362242567e-06, + "loss": 0.0575, + "num_input_tokens_seen": 55286760, + "step": 82035 + }, + { + "epoch": 2.0042508489482813, + "grad_norm": 0.0617409273982048, + "learning_rate": 1.4974474602181962e-06, + "loss": 0.0749, + "num_input_tokens_seen": 55290344, + "step": 82040 + }, + { + "epoch": 2.0043729997801285, + "grad_norm": 15.370429992675781, + "learning_rate": 1.4973734805945635e-06, + "loss": 0.1332, + "num_input_tokens_seen": 55293928, + "step": 82045 + }, + { + "epoch": 2.0044951506119757, + "grad_norm": 0.027430595830082893, + "learning_rate": 1.4972994973538976e-06, + "loss": 0.0002, + "num_input_tokens_seen": 55296936, + "step": 82050 + }, + { + "epoch": 2.004617301443823, + "grad_norm": 0.2160617858171463, + "learning_rate": 1.4972255104967355e-06, + "loss": 0.0304, + "num_input_tokens_seen": 55300456, + "step": 82055 + }, + { + "epoch": 2.00473945227567, + "grad_norm": 0.0136873172596097, + "learning_rate": 1.497151520023616e-06, + "loss": 0.0007, + "num_input_tokens_seen": 55303528, + "step": 82060 + }, + { + "epoch": 2.0048616031075173, + "grad_norm": 0.04217163100838661, + "learning_rate": 1.4970775259350767e-06, + "loss": 0.0003, + "num_input_tokens_seen": 55306600, + "step": 82065 + }, + { + "epoch": 2.0049837539393645, + "grad_norm": 0.10878748446702957, + "learning_rate": 1.4970035282316562e-06, + "loss": 0.0004, + "num_input_tokens_seen": 55309992, + "step": 82070 + }, + { + "epoch": 2.0051059047712116, + "grad_norm": 0.9541506171226501, + "learning_rate": 1.4969295269138924e-06, + "loss": 0.0445, + "num_input_tokens_seen": 55313576, + "step": 82075 + }, + { + "epoch": 2.005228055603059, + "grad_norm": 0.03260614722967148, + "learning_rate": 1.4968555219823233e-06, + "loss": 0.0393, + "num_input_tokens_seen": 55317096, + "step": 82080 + }, + { + "epoch": 2.005350206434906, + "grad_norm": 0.04927986115217209, + "learning_rate": 1.4967815134374872e-06, + "loss": 0.0003, + "num_input_tokens_seen": 55320808, + "step": 82085 + }, + { + "epoch": 2.0054723572667528, + "grad_norm": 0.3475458323955536, + "learning_rate": 1.4967075012799224e-06, + "loss": 0.0002, + "num_input_tokens_seen": 55323944, + "step": 82090 + }, + { + "epoch": 2.0055945080986, + "grad_norm": 0.06237736716866493, + "learning_rate": 1.4966334855101667e-06, + "loss": 0.0001, + "num_input_tokens_seen": 55327080, + "step": 82095 + }, + { + "epoch": 2.005716658930447, + "grad_norm": 0.0665455311536789, + "learning_rate": 1.496559466128759e-06, + "loss": 0.0004, + "num_input_tokens_seen": 55330664, + "step": 82100 + }, + { + "epoch": 2.0058388097622943, + "grad_norm": 0.006148200482130051, + "learning_rate": 1.4964854431362372e-06, + "loss": 0.0006, + "num_input_tokens_seen": 55333864, + "step": 82105 + }, + { + "epoch": 2.0059609605941415, + "grad_norm": 0.07519830018281937, + "learning_rate": 1.49641141653314e-06, + "loss": 0.0004, + "num_input_tokens_seen": 55336808, + "step": 82110 + }, + { + "epoch": 2.0060831114259887, + "grad_norm": 17.028867721557617, + "learning_rate": 1.4963373863200053e-06, + "loss": 0.0759, + "num_input_tokens_seen": 55340328, + "step": 82115 + }, + { + "epoch": 2.006205262257836, + "grad_norm": 0.02648085355758667, + "learning_rate": 1.4962633524973716e-06, + "loss": 0.0007, + "num_input_tokens_seen": 55343656, + "step": 82120 + }, + { + "epoch": 2.006327413089683, + "grad_norm": 2.6933555603027344, + "learning_rate": 1.4961893150657775e-06, + "loss": 0.0006, + "num_input_tokens_seen": 55347048, + "step": 82125 + }, + { + "epoch": 2.0064495639215303, + "grad_norm": 21.312503814697266, + "learning_rate": 1.496115274025761e-06, + "loss": 0.0942, + "num_input_tokens_seen": 55350248, + "step": 82130 + }, + { + "epoch": 2.0065717147533775, + "grad_norm": 0.0015857619000598788, + "learning_rate": 1.4960412293778609e-06, + "loss": 0.0362, + "num_input_tokens_seen": 55354024, + "step": 82135 + }, + { + "epoch": 2.0066938655852247, + "grad_norm": 0.007982099428772926, + "learning_rate": 1.4959671811226152e-06, + "loss": 0.0268, + "num_input_tokens_seen": 55357480, + "step": 82140 + }, + { + "epoch": 2.006816016417072, + "grad_norm": 0.003060021670535207, + "learning_rate": 1.4958931292605631e-06, + "loss": 0.0377, + "num_input_tokens_seen": 55361256, + "step": 82145 + }, + { + "epoch": 2.006938167248919, + "grad_norm": 0.052214279770851135, + "learning_rate": 1.495819073792243e-06, + "loss": 0.049, + "num_input_tokens_seen": 55364200, + "step": 82150 + }, + { + "epoch": 2.0070603180807662, + "grad_norm": 0.572651207447052, + "learning_rate": 1.4957450147181928e-06, + "loss": 0.0635, + "num_input_tokens_seen": 55368168, + "step": 82155 + }, + { + "epoch": 2.0071824689126134, + "grad_norm": 0.002212380524724722, + "learning_rate": 1.4956709520389517e-06, + "loss": 0.0004, + "num_input_tokens_seen": 55371688, + "step": 82160 + }, + { + "epoch": 2.0073046197444606, + "grad_norm": 21.469194412231445, + "learning_rate": 1.495596885755058e-06, + "loss": 0.0643, + "num_input_tokens_seen": 55375016, + "step": 82165 + }, + { + "epoch": 2.007426770576308, + "grad_norm": 0.4995213449001312, + "learning_rate": 1.4955228158670509e-06, + "loss": 0.0005, + "num_input_tokens_seen": 55380456, + "step": 82170 + }, + { + "epoch": 2.007548921408155, + "grad_norm": 0.023713387548923492, + "learning_rate": 1.4954487423754682e-06, + "loss": 0.0001, + "num_input_tokens_seen": 55383784, + "step": 82175 + }, + { + "epoch": 2.0076710722400017, + "grad_norm": 0.020805804058909416, + "learning_rate": 1.4953746652808492e-06, + "loss": 0.0392, + "num_input_tokens_seen": 55386856, + "step": 82180 + }, + { + "epoch": 2.007793223071849, + "grad_norm": 0.3754969835281372, + "learning_rate": 1.4953005845837322e-06, + "loss": 0.0635, + "num_input_tokens_seen": 55390568, + "step": 82185 + }, + { + "epoch": 2.007915373903696, + "grad_norm": 0.010111669078469276, + "learning_rate": 1.495226500284656e-06, + "loss": 0.0001, + "num_input_tokens_seen": 55393576, + "step": 82190 + }, + { + "epoch": 2.0080375247355433, + "grad_norm": 0.008723114617168903, + "learning_rate": 1.4951524123841598e-06, + "loss": 0.0582, + "num_input_tokens_seen": 55396968, + "step": 82195 + }, + { + "epoch": 2.0081596755673905, + "grad_norm": 0.0035427026450634003, + "learning_rate": 1.495078320882782e-06, + "loss": 0.0018, + "num_input_tokens_seen": 55400040, + "step": 82200 + }, + { + "epoch": 2.0082818263992377, + "grad_norm": 0.23637795448303223, + "learning_rate": 1.4950042257810616e-06, + "loss": 0.0003, + "num_input_tokens_seen": 55403432, + "step": 82205 + }, + { + "epoch": 2.008403977231085, + "grad_norm": 0.1661052256822586, + "learning_rate": 1.4949301270795372e-06, + "loss": 0.0285, + "num_input_tokens_seen": 55406632, + "step": 82210 + }, + { + "epoch": 2.008526128062932, + "grad_norm": 0.16679638624191284, + "learning_rate": 1.4948560247787477e-06, + "loss": 0.0309, + "num_input_tokens_seen": 55409832, + "step": 82215 + }, + { + "epoch": 2.0086482788947793, + "grad_norm": 5.093140125274658, + "learning_rate": 1.494781918879232e-06, + "loss": 0.002, + "num_input_tokens_seen": 55413224, + "step": 82220 + }, + { + "epoch": 2.0087704297266264, + "grad_norm": 0.006286388263106346, + "learning_rate": 1.4947078093815294e-06, + "loss": 0.0609, + "num_input_tokens_seen": 55416936, + "step": 82225 + }, + { + "epoch": 2.0088925805584736, + "grad_norm": 7.307034969329834, + "learning_rate": 1.4946336962861782e-06, + "loss": 0.0009, + "num_input_tokens_seen": 55419880, + "step": 82230 + }, + { + "epoch": 2.009014731390321, + "grad_norm": 16.302734375, + "learning_rate": 1.494559579593718e-06, + "loss": 0.0786, + "num_input_tokens_seen": 55423080, + "step": 82235 + }, + { + "epoch": 2.009136882222168, + "grad_norm": 0.00019041105406358838, + "learning_rate": 1.4944854593046876e-06, + "loss": 0.0002, + "num_input_tokens_seen": 55426280, + "step": 82240 + }, + { + "epoch": 2.009259033054015, + "grad_norm": 12.382462501525879, + "learning_rate": 1.4944113354196258e-06, + "loss": 0.0316, + "num_input_tokens_seen": 55429800, + "step": 82245 + }, + { + "epoch": 2.0093811838858624, + "grad_norm": 0.1528623104095459, + "learning_rate": 1.4943372079390718e-06, + "loss": 0.0985, + "num_input_tokens_seen": 55433320, + "step": 82250 + }, + { + "epoch": 2.0095033347177096, + "grad_norm": 0.5868679285049438, + "learning_rate": 1.4942630768635644e-06, + "loss": 0.0003, + "num_input_tokens_seen": 55437160, + "step": 82255 + }, + { + "epoch": 2.009625485549557, + "grad_norm": 0.033141035586595535, + "learning_rate": 1.4941889421936433e-06, + "loss": 0.0002, + "num_input_tokens_seen": 55441000, + "step": 82260 + }, + { + "epoch": 2.009747636381404, + "grad_norm": 0.05835259333252907, + "learning_rate": 1.4941148039298472e-06, + "loss": 0.0276, + "num_input_tokens_seen": 55443944, + "step": 82265 + }, + { + "epoch": 2.0098697872132507, + "grad_norm": 0.14883288741111755, + "learning_rate": 1.4940406620727154e-06, + "loss": 0.0445, + "num_input_tokens_seen": 55447272, + "step": 82270 + }, + { + "epoch": 2.009991938045098, + "grad_norm": 28.439109802246094, + "learning_rate": 1.493966516622787e-06, + "loss": 0.1243, + "num_input_tokens_seen": 55450920, + "step": 82275 + }, + { + "epoch": 2.010114088876945, + "grad_norm": 0.02109772339463234, + "learning_rate": 1.4938923675806012e-06, + "loss": 0.0006, + "num_input_tokens_seen": 55454440, + "step": 82280 + }, + { + "epoch": 2.0102362397087923, + "grad_norm": 100.99060821533203, + "learning_rate": 1.4938182149466974e-06, + "loss": 0.0021, + "num_input_tokens_seen": 55458152, + "step": 82285 + }, + { + "epoch": 2.0103583905406395, + "grad_norm": 0.028115229681134224, + "learning_rate": 1.4937440587216144e-06, + "loss": 0.0004, + "num_input_tokens_seen": 55461544, + "step": 82290 + }, + { + "epoch": 2.0104805413724867, + "grad_norm": 0.009398140013217926, + "learning_rate": 1.493669898905892e-06, + "loss": 0.0002, + "num_input_tokens_seen": 55465256, + "step": 82295 + }, + { + "epoch": 2.010602692204334, + "grad_norm": 0.023891514167189598, + "learning_rate": 1.4935957355000693e-06, + "loss": 0.0696, + "num_input_tokens_seen": 55469032, + "step": 82300 + }, + { + "epoch": 2.010724843036181, + "grad_norm": 0.007383616175502539, + "learning_rate": 1.4935215685046858e-06, + "loss": 0.103, + "num_input_tokens_seen": 55472424, + "step": 82305 + }, + { + "epoch": 2.0108469938680282, + "grad_norm": 0.05158611014485359, + "learning_rate": 1.4934473979202804e-06, + "loss": 0.0419, + "num_input_tokens_seen": 55475752, + "step": 82310 + }, + { + "epoch": 2.0109691446998754, + "grad_norm": 0.03322657570242882, + "learning_rate": 1.4933732237473928e-06, + "loss": 0.0002, + "num_input_tokens_seen": 55479912, + "step": 82315 + }, + { + "epoch": 2.0110912955317226, + "grad_norm": 0.027250945568084717, + "learning_rate": 1.4932990459865626e-06, + "loss": 0.0835, + "num_input_tokens_seen": 55483496, + "step": 82320 + }, + { + "epoch": 2.01121344636357, + "grad_norm": 0.0014734260039404035, + "learning_rate": 1.493224864638329e-06, + "loss": 0.0489, + "num_input_tokens_seen": 55486952, + "step": 82325 + }, + { + "epoch": 2.011335597195417, + "grad_norm": 0.007549791131168604, + "learning_rate": 1.4931506797032316e-06, + "loss": 0.0007, + "num_input_tokens_seen": 55490216, + "step": 82330 + }, + { + "epoch": 2.011457748027264, + "grad_norm": 30.353553771972656, + "learning_rate": 1.49307649118181e-06, + "loss": 0.0442, + "num_input_tokens_seen": 55493352, + "step": 82335 + }, + { + "epoch": 2.0115798988591114, + "grad_norm": 21.21133804321289, + "learning_rate": 1.4930022990746034e-06, + "loss": 0.1068, + "num_input_tokens_seen": 55496424, + "step": 82340 + }, + { + "epoch": 2.0117020496909586, + "grad_norm": 0.3378548324108124, + "learning_rate": 1.4929281033821513e-06, + "loss": 0.0003, + "num_input_tokens_seen": 55499816, + "step": 82345 + }, + { + "epoch": 2.0118242005228058, + "grad_norm": 0.03759992495179176, + "learning_rate": 1.4928539041049935e-06, + "loss": 0.0016, + "num_input_tokens_seen": 55503464, + "step": 82350 + }, + { + "epoch": 2.011946351354653, + "grad_norm": 0.14137744903564453, + "learning_rate": 1.4927797012436694e-06, + "loss": 0.0003, + "num_input_tokens_seen": 55507048, + "step": 82355 + }, + { + "epoch": 2.0120685021864997, + "grad_norm": 0.012416354380548, + "learning_rate": 1.492705494798719e-06, + "loss": 0.0003, + "num_input_tokens_seen": 55510440, + "step": 82360 + }, + { + "epoch": 2.012190653018347, + "grad_norm": 0.033463358879089355, + "learning_rate": 1.4926312847706817e-06, + "loss": 0.0236, + "num_input_tokens_seen": 55514024, + "step": 82365 + }, + { + "epoch": 2.012312803850194, + "grad_norm": 0.016904111951589584, + "learning_rate": 1.4925570711600972e-06, + "loss": 0.0004, + "num_input_tokens_seen": 55517224, + "step": 82370 + }, + { + "epoch": 2.0124349546820413, + "grad_norm": 0.18639439344406128, + "learning_rate": 1.492482853967505e-06, + "loss": 0.0002, + "num_input_tokens_seen": 55520808, + "step": 82375 + }, + { + "epoch": 2.0125571055138884, + "grad_norm": 0.2579617202281952, + "learning_rate": 1.4924086331934454e-06, + "loss": 0.0009, + "num_input_tokens_seen": 55523816, + "step": 82380 + }, + { + "epoch": 2.0126792563457356, + "grad_norm": 1.6947616338729858, + "learning_rate": 1.4923344088384576e-06, + "loss": 0.0028, + "num_input_tokens_seen": 55527208, + "step": 82385 + }, + { + "epoch": 2.012801407177583, + "grad_norm": 0.005877126008272171, + "learning_rate": 1.4922601809030814e-06, + "loss": 0.0118, + "num_input_tokens_seen": 55530408, + "step": 82390 + }, + { + "epoch": 2.01292355800943, + "grad_norm": 0.07566092163324356, + "learning_rate": 1.492185949387857e-06, + "loss": 0.0002, + "num_input_tokens_seen": 55533608, + "step": 82395 + }, + { + "epoch": 2.013045708841277, + "grad_norm": 0.016146007925271988, + "learning_rate": 1.492111714293324e-06, + "loss": 0.0002, + "num_input_tokens_seen": 55536744, + "step": 82400 + }, + { + "epoch": 2.0131678596731244, + "grad_norm": 0.009761742316186428, + "learning_rate": 1.492037475620022e-06, + "loss": 0.0285, + "num_input_tokens_seen": 55540392, + "step": 82405 + }, + { + "epoch": 2.0132900105049716, + "grad_norm": 0.1334611028432846, + "learning_rate": 1.4919632333684913e-06, + "loss": 0.0002, + "num_input_tokens_seen": 55543464, + "step": 82410 + }, + { + "epoch": 2.0134121613368188, + "grad_norm": 0.005181447137147188, + "learning_rate": 1.4918889875392716e-06, + "loss": 0.041, + "num_input_tokens_seen": 55546600, + "step": 82415 + }, + { + "epoch": 2.013534312168666, + "grad_norm": 32.899227142333984, + "learning_rate": 1.4918147381329028e-06, + "loss": 0.092, + "num_input_tokens_seen": 55549608, + "step": 82420 + }, + { + "epoch": 2.013656463000513, + "grad_norm": 0.08666107058525085, + "learning_rate": 1.491740485149925e-06, + "loss": 0.0001, + "num_input_tokens_seen": 55553128, + "step": 82425 + }, + { + "epoch": 2.0137786138323603, + "grad_norm": 0.022960102185606956, + "learning_rate": 1.491666228590878e-06, + "loss": 0.0594, + "num_input_tokens_seen": 55556328, + "step": 82430 + }, + { + "epoch": 2.0139007646642075, + "grad_norm": 0.004995742812752724, + "learning_rate": 1.4915919684563023e-06, + "loss": 0.1018, + "num_input_tokens_seen": 55559336, + "step": 82435 + }, + { + "epoch": 2.0140229154960547, + "grad_norm": 0.036234088242053986, + "learning_rate": 1.4915177047467374e-06, + "loss": 0.0006, + "num_input_tokens_seen": 55562792, + "step": 82440 + }, + { + "epoch": 2.014145066327902, + "grad_norm": 0.0267754215747118, + "learning_rate": 1.4914434374627237e-06, + "loss": 0.0002, + "num_input_tokens_seen": 55565736, + "step": 82445 + }, + { + "epoch": 2.0142672171597487, + "grad_norm": 0.008292196318507195, + "learning_rate": 1.491369166604801e-06, + "loss": 0.0002, + "num_input_tokens_seen": 55569704, + "step": 82450 + }, + { + "epoch": 2.014389367991596, + "grad_norm": 0.29497432708740234, + "learning_rate": 1.4912948921735093e-06, + "loss": 0.0369, + "num_input_tokens_seen": 55572712, + "step": 82455 + }, + { + "epoch": 2.014511518823443, + "grad_norm": 0.017537608742713928, + "learning_rate": 1.4912206141693893e-06, + "loss": 0.0076, + "num_input_tokens_seen": 55575976, + "step": 82460 + }, + { + "epoch": 2.0146336696552902, + "grad_norm": 0.05061454698443413, + "learning_rate": 1.491146332592981e-06, + "loss": 0.0001, + "num_input_tokens_seen": 55579624, + "step": 82465 + }, + { + "epoch": 2.0147558204871374, + "grad_norm": 0.0060595618560910225, + "learning_rate": 1.491072047444824e-06, + "loss": 0.0561, + "num_input_tokens_seen": 55582824, + "step": 82470 + }, + { + "epoch": 2.0148779713189846, + "grad_norm": 0.016623610630631447, + "learning_rate": 1.4909977587254595e-06, + "loss": 0.0205, + "num_input_tokens_seen": 55586472, + "step": 82475 + }, + { + "epoch": 2.015000122150832, + "grad_norm": 0.011867456138134003, + "learning_rate": 1.4909234664354266e-06, + "loss": 0.0476, + "num_input_tokens_seen": 55589288, + "step": 82480 + }, + { + "epoch": 2.015122272982679, + "grad_norm": 0.5147221088409424, + "learning_rate": 1.490849170575267e-06, + "loss": 0.0005, + "num_input_tokens_seen": 55592488, + "step": 82485 + }, + { + "epoch": 2.015244423814526, + "grad_norm": 0.07372906059026718, + "learning_rate": 1.4907748711455198e-06, + "loss": 0.0001, + "num_input_tokens_seen": 55596392, + "step": 82490 + }, + { + "epoch": 2.0153665746463734, + "grad_norm": 17.638710021972656, + "learning_rate": 1.4907005681467257e-06, + "loss": 0.0414, + "num_input_tokens_seen": 55599720, + "step": 82495 + }, + { + "epoch": 2.0154887254782206, + "grad_norm": 0.024421432986855507, + "learning_rate": 1.490626261579425e-06, + "loss": 0.0001, + "num_input_tokens_seen": 55602920, + "step": 82500 + }, + { + "epoch": 2.0156108763100677, + "grad_norm": 0.9297134280204773, + "learning_rate": 1.4905519514441585e-06, + "loss": 0.0006, + "num_input_tokens_seen": 55606248, + "step": 82505 + }, + { + "epoch": 2.015733027141915, + "grad_norm": 0.0029619180131703615, + "learning_rate": 1.490477637741466e-06, + "loss": 0.0062, + "num_input_tokens_seen": 55609384, + "step": 82510 + }, + { + "epoch": 2.015855177973762, + "grad_norm": 0.04868128523230553, + "learning_rate": 1.4904033204718881e-06, + "loss": 0.087, + "num_input_tokens_seen": 55612584, + "step": 82515 + }, + { + "epoch": 2.0159773288056093, + "grad_norm": 0.0035741401370614767, + "learning_rate": 1.4903289996359659e-06, + "loss": 0.0778, + "num_input_tokens_seen": 55615976, + "step": 82520 + }, + { + "epoch": 2.0160994796374565, + "grad_norm": 0.02756413072347641, + "learning_rate": 1.4902546752342389e-06, + "loss": 0.0, + "num_input_tokens_seen": 55618920, + "step": 82525 + }, + { + "epoch": 2.0162216304693037, + "grad_norm": 0.002254326129332185, + "learning_rate": 1.490180347267248e-06, + "loss": 0.0633, + "num_input_tokens_seen": 55621992, + "step": 82530 + }, + { + "epoch": 2.016343781301151, + "grad_norm": 0.015056909061968327, + "learning_rate": 1.4901060157355338e-06, + "loss": 0.0003, + "num_input_tokens_seen": 55625192, + "step": 82535 + }, + { + "epoch": 2.0164659321329976, + "grad_norm": 0.0272979699075222, + "learning_rate": 1.490031680639637e-06, + "loss": 0.0003, + "num_input_tokens_seen": 55628264, + "step": 82540 + }, + { + "epoch": 2.016588082964845, + "grad_norm": 0.007410817313939333, + "learning_rate": 1.4899573419800979e-06, + "loss": 0.0002, + "num_input_tokens_seen": 55631784, + "step": 82545 + }, + { + "epoch": 2.016710233796692, + "grad_norm": 0.0029097087681293488, + "learning_rate": 1.489882999757457e-06, + "loss": 0.1244, + "num_input_tokens_seen": 55635368, + "step": 82550 + }, + { + "epoch": 2.016832384628539, + "grad_norm": 0.08003532141447067, + "learning_rate": 1.4898086539722556e-06, + "loss": 0.0248, + "num_input_tokens_seen": 55638440, + "step": 82555 + }, + { + "epoch": 2.0169545354603864, + "grad_norm": 0.37841618061065674, + "learning_rate": 1.4897343046250337e-06, + "loss": 0.0003, + "num_input_tokens_seen": 55641704, + "step": 82560 + }, + { + "epoch": 2.0170766862922336, + "grad_norm": 0.002581874141469598, + "learning_rate": 1.489659951716332e-06, + "loss": 0.0002, + "num_input_tokens_seen": 55644648, + "step": 82565 + }, + { + "epoch": 2.0171988371240808, + "grad_norm": 0.05569832772016525, + "learning_rate": 1.4895855952466918e-06, + "loss": 0.0004, + "num_input_tokens_seen": 55647976, + "step": 82570 + }, + { + "epoch": 2.017320987955928, + "grad_norm": 0.011651813052594662, + "learning_rate": 1.4895112352166533e-06, + "loss": 0.0796, + "num_input_tokens_seen": 55651176, + "step": 82575 + }, + { + "epoch": 2.017443138787775, + "grad_norm": 0.09822391718626022, + "learning_rate": 1.4894368716267573e-06, + "loss": 0.0703, + "num_input_tokens_seen": 55654952, + "step": 82580 + }, + { + "epoch": 2.0175652896196223, + "grad_norm": 0.003922508098185062, + "learning_rate": 1.4893625044775451e-06, + "loss": 0.0001, + "num_input_tokens_seen": 55658344, + "step": 82585 + }, + { + "epoch": 2.0176874404514695, + "grad_norm": 0.21082554757595062, + "learning_rate": 1.4892881337695569e-06, + "loss": 0.1, + "num_input_tokens_seen": 55661864, + "step": 82590 + }, + { + "epoch": 2.0178095912833167, + "grad_norm": 0.008030406199395657, + "learning_rate": 1.4892137595033338e-06, + "loss": 0.0391, + "num_input_tokens_seen": 55665576, + "step": 82595 + }, + { + "epoch": 2.017931742115164, + "grad_norm": 0.06815065443515778, + "learning_rate": 1.4891393816794167e-06, + "loss": 0.0122, + "num_input_tokens_seen": 55668904, + "step": 82600 + }, + { + "epoch": 2.018053892947011, + "grad_norm": 0.04512683302164078, + "learning_rate": 1.4890650002983466e-06, + "loss": 0.0012, + "num_input_tokens_seen": 55672168, + "step": 82605 + }, + { + "epoch": 2.0181760437788583, + "grad_norm": 0.002467342419549823, + "learning_rate": 1.4889906153606639e-06, + "loss": 0.0278, + "num_input_tokens_seen": 55675624, + "step": 82610 + }, + { + "epoch": 2.0182981946107055, + "grad_norm": 0.011767430230975151, + "learning_rate": 1.4889162268669103e-06, + "loss": 0.0216, + "num_input_tokens_seen": 55678696, + "step": 82615 + }, + { + "epoch": 2.0184203454425527, + "grad_norm": 16.49197769165039, + "learning_rate": 1.4888418348176265e-06, + "loss": 0.0693, + "num_input_tokens_seen": 55681832, + "step": 82620 + }, + { + "epoch": 2.0185424962743994, + "grad_norm": 0.0088810408487916, + "learning_rate": 1.4887674392133528e-06, + "loss": 0.0889, + "num_input_tokens_seen": 55684648, + "step": 82625 + }, + { + "epoch": 2.0186646471062466, + "grad_norm": 0.23101457953453064, + "learning_rate": 1.488693040054631e-06, + "loss": 0.0317, + "num_input_tokens_seen": 55688552, + "step": 82630 + }, + { + "epoch": 2.018786797938094, + "grad_norm": 0.008358001708984375, + "learning_rate": 1.4886186373420022e-06, + "loss": 0.0002, + "num_input_tokens_seen": 55691688, + "step": 82635 + }, + { + "epoch": 2.018908948769941, + "grad_norm": 16.869508743286133, + "learning_rate": 1.4885442310760073e-06, + "loss": 0.0431, + "num_input_tokens_seen": 55695016, + "step": 82640 + }, + { + "epoch": 2.019031099601788, + "grad_norm": 0.4266534745693207, + "learning_rate": 1.4884698212571873e-06, + "loss": 0.0524, + "num_input_tokens_seen": 55698984, + "step": 82645 + }, + { + "epoch": 2.0191532504336354, + "grad_norm": 76.40272521972656, + "learning_rate": 1.4883954078860833e-06, + "loss": 0.0188, + "num_input_tokens_seen": 55702056, + "step": 82650 + }, + { + "epoch": 2.0192754012654826, + "grad_norm": 0.3047426640987396, + "learning_rate": 1.4883209909632365e-06, + "loss": 0.0008, + "num_input_tokens_seen": 55705320, + "step": 82655 + }, + { + "epoch": 2.0193975520973297, + "grad_norm": 0.015569807961583138, + "learning_rate": 1.488246570489188e-06, + "loss": 0.0615, + "num_input_tokens_seen": 55708776, + "step": 82660 + }, + { + "epoch": 2.019519702929177, + "grad_norm": 0.3473021388053894, + "learning_rate": 1.4881721464644792e-06, + "loss": 0.0442, + "num_input_tokens_seen": 55711976, + "step": 82665 + }, + { + "epoch": 2.019641853761024, + "grad_norm": 207.76756286621094, + "learning_rate": 1.4880977188896514e-06, + "loss": 0.0361, + "num_input_tokens_seen": 55715240, + "step": 82670 + }, + { + "epoch": 2.0197640045928713, + "grad_norm": 0.05208823084831238, + "learning_rate": 1.4880232877652454e-06, + "loss": 0.0426, + "num_input_tokens_seen": 55718696, + "step": 82675 + }, + { + "epoch": 2.0198861554247185, + "grad_norm": 14.826360702514648, + "learning_rate": 1.4879488530918032e-06, + "loss": 0.0392, + "num_input_tokens_seen": 55722408, + "step": 82680 + }, + { + "epoch": 2.0200083062565657, + "grad_norm": 65.82763671875, + "learning_rate": 1.4878744148698655e-06, + "loss": 0.0549, + "num_input_tokens_seen": 55725352, + "step": 82685 + }, + { + "epoch": 2.020130457088413, + "grad_norm": 15.8949556350708, + "learning_rate": 1.4877999730999738e-06, + "loss": 0.1252, + "num_input_tokens_seen": 55729320, + "step": 82690 + }, + { + "epoch": 2.02025260792026, + "grad_norm": 31.089956283569336, + "learning_rate": 1.4877255277826694e-06, + "loss": 0.0441, + "num_input_tokens_seen": 55732776, + "step": 82695 + }, + { + "epoch": 2.0203747587521073, + "grad_norm": 0.20987153053283691, + "learning_rate": 1.4876510789184939e-06, + "loss": 0.1352, + "num_input_tokens_seen": 55736104, + "step": 82700 + }, + { + "epoch": 2.0204969095839544, + "grad_norm": 0.026835085824131966, + "learning_rate": 1.4875766265079888e-06, + "loss": 0.0021, + "num_input_tokens_seen": 55739432, + "step": 82705 + }, + { + "epoch": 2.0206190604158016, + "grad_norm": 0.049756210297346115, + "learning_rate": 1.487502170551695e-06, + "loss": 0.028, + "num_input_tokens_seen": 55742568, + "step": 82710 + }, + { + "epoch": 2.0207412112476484, + "grad_norm": 0.08421991765499115, + "learning_rate": 1.4874277110501545e-06, + "loss": 0.06, + "num_input_tokens_seen": 55746344, + "step": 82715 + }, + { + "epoch": 2.0208633620794956, + "grad_norm": 0.017324473708868027, + "learning_rate": 1.4873532480039084e-06, + "loss": 0.0002, + "num_input_tokens_seen": 55749096, + "step": 82720 + }, + { + "epoch": 2.0209855129113428, + "grad_norm": 0.3781276345252991, + "learning_rate": 1.4872787814134983e-06, + "loss": 0.0545, + "num_input_tokens_seen": 55752680, + "step": 82725 + }, + { + "epoch": 2.02110766374319, + "grad_norm": 0.02490142732858658, + "learning_rate": 1.487204311279466e-06, + "loss": 0.001, + "num_input_tokens_seen": 55756712, + "step": 82730 + }, + { + "epoch": 2.021229814575037, + "grad_norm": 0.017400478944182396, + "learning_rate": 1.4871298376023531e-06, + "loss": 0.1147, + "num_input_tokens_seen": 55759976, + "step": 82735 + }, + { + "epoch": 2.0213519654068843, + "grad_norm": 0.02002478949725628, + "learning_rate": 1.4870553603827007e-06, + "loss": 0.0001, + "num_input_tokens_seen": 55763432, + "step": 82740 + }, + { + "epoch": 2.0214741162387315, + "grad_norm": 0.09595032036304474, + "learning_rate": 1.486980879621051e-06, + "loss": 0.0474, + "num_input_tokens_seen": 55766888, + "step": 82745 + }, + { + "epoch": 2.0215962670705787, + "grad_norm": 0.18604975938796997, + "learning_rate": 1.4869063953179452e-06, + "loss": 0.0007, + "num_input_tokens_seen": 55770280, + "step": 82750 + }, + { + "epoch": 2.021718417902426, + "grad_norm": 0.14785557985305786, + "learning_rate": 1.4868319074739252e-06, + "loss": 0.0002, + "num_input_tokens_seen": 55774056, + "step": 82755 + }, + { + "epoch": 2.021840568734273, + "grad_norm": 0.016452603042125702, + "learning_rate": 1.4867574160895327e-06, + "loss": 0.0274, + "num_input_tokens_seen": 55777320, + "step": 82760 + }, + { + "epoch": 2.0219627195661203, + "grad_norm": 42.86761474609375, + "learning_rate": 1.4866829211653092e-06, + "loss": 0.0312, + "num_input_tokens_seen": 55780904, + "step": 82765 + }, + { + "epoch": 2.0220848703979675, + "grad_norm": 0.06126724183559418, + "learning_rate": 1.4866084227017966e-06, + "loss": 0.0282, + "num_input_tokens_seen": 55783784, + "step": 82770 + }, + { + "epoch": 2.0222070212298147, + "grad_norm": 45.65149688720703, + "learning_rate": 1.4865339206995367e-06, + "loss": 0.0368, + "num_input_tokens_seen": 55787560, + "step": 82775 + }, + { + "epoch": 2.022329172061662, + "grad_norm": 0.022739626467227936, + "learning_rate": 1.486459415159071e-06, + "loss": 0.0002, + "num_input_tokens_seen": 55791336, + "step": 82780 + }, + { + "epoch": 2.022451322893509, + "grad_norm": 0.04010211303830147, + "learning_rate": 1.486384906080942e-06, + "loss": 0.0192, + "num_input_tokens_seen": 55796456, + "step": 82785 + }, + { + "epoch": 2.0225734737253562, + "grad_norm": 0.02531035989522934, + "learning_rate": 1.4863103934656908e-06, + "loss": 0.0002, + "num_input_tokens_seen": 55800744, + "step": 82790 + }, + { + "epoch": 2.0226956245572034, + "grad_norm": 0.09000199288129807, + "learning_rate": 1.4862358773138599e-06, + "loss": 0.0401, + "num_input_tokens_seen": 55804136, + "step": 82795 + }, + { + "epoch": 2.0228177753890506, + "grad_norm": 0.01600159890949726, + "learning_rate": 1.486161357625991e-06, + "loss": 0.0001, + "num_input_tokens_seen": 55807528, + "step": 82800 + }, + { + "epoch": 2.0229399262208974, + "grad_norm": 0.04003525152802467, + "learning_rate": 1.4860868344026258e-06, + "loss": 0.0002, + "num_input_tokens_seen": 55810920, + "step": 82805 + }, + { + "epoch": 2.0230620770527445, + "grad_norm": 44.31636428833008, + "learning_rate": 1.486012307644306e-06, + "loss": 0.0823, + "num_input_tokens_seen": 55814120, + "step": 82810 + }, + { + "epoch": 2.0231842278845917, + "grad_norm": 0.14130207896232605, + "learning_rate": 1.4859377773515745e-06, + "loss": 0.0001, + "num_input_tokens_seen": 55817448, + "step": 82815 + }, + { + "epoch": 2.023306378716439, + "grad_norm": 0.048643555492162704, + "learning_rate": 1.4858632435249728e-06, + "loss": 0.0418, + "num_input_tokens_seen": 55820712, + "step": 82820 + }, + { + "epoch": 2.023428529548286, + "grad_norm": 0.0007211269694380462, + "learning_rate": 1.4857887061650426e-06, + "loss": 0.0001, + "num_input_tokens_seen": 55824296, + "step": 82825 + }, + { + "epoch": 2.0235506803801333, + "grad_norm": 0.0032669936772435904, + "learning_rate": 1.4857141652723264e-06, + "loss": 0.1306, + "num_input_tokens_seen": 55827688, + "step": 82830 + }, + { + "epoch": 2.0236728312119805, + "grad_norm": 0.05028359591960907, + "learning_rate": 1.4856396208473662e-06, + "loss": 0.0003, + "num_input_tokens_seen": 55832168, + "step": 82835 + }, + { + "epoch": 2.0237949820438277, + "grad_norm": 23.42269515991211, + "learning_rate": 1.4855650728907038e-06, + "loss": 0.0759, + "num_input_tokens_seen": 55835624, + "step": 82840 + }, + { + "epoch": 2.023917132875675, + "grad_norm": 0.004075738601386547, + "learning_rate": 1.4854905214028817e-06, + "loss": 0.0002, + "num_input_tokens_seen": 55839080, + "step": 82845 + }, + { + "epoch": 2.024039283707522, + "grad_norm": 0.13513143360614777, + "learning_rate": 1.4854159663844423e-06, + "loss": 0.0642, + "num_input_tokens_seen": 55842216, + "step": 82850 + }, + { + "epoch": 2.0241614345393693, + "grad_norm": 0.07616754621267319, + "learning_rate": 1.4853414078359272e-06, + "loss": 0.0002, + "num_input_tokens_seen": 55845672, + "step": 82855 + }, + { + "epoch": 2.0242835853712164, + "grad_norm": 0.0007707093027420342, + "learning_rate": 1.485266845757879e-06, + "loss": 0.0749, + "num_input_tokens_seen": 55849704, + "step": 82860 + }, + { + "epoch": 2.0244057362030636, + "grad_norm": 0.025444896891713142, + "learning_rate": 1.4851922801508393e-06, + "loss": 0.007, + "num_input_tokens_seen": 55853288, + "step": 82865 + }, + { + "epoch": 2.024527887034911, + "grad_norm": 19.272645950317383, + "learning_rate": 1.4851177110153512e-06, + "loss": 0.0792, + "num_input_tokens_seen": 55856616, + "step": 82870 + }, + { + "epoch": 2.024650037866758, + "grad_norm": 0.09565642476081848, + "learning_rate": 1.4850431383519563e-06, + "loss": 0.0004, + "num_input_tokens_seen": 55860200, + "step": 82875 + }, + { + "epoch": 2.024772188698605, + "grad_norm": 0.13231025636196136, + "learning_rate": 1.4849685621611976e-06, + "loss": 0.0001, + "num_input_tokens_seen": 55863784, + "step": 82880 + }, + { + "epoch": 2.0248943395304524, + "grad_norm": 0.023808278143405914, + "learning_rate": 1.4848939824436171e-06, + "loss": 0.0114, + "num_input_tokens_seen": 55867048, + "step": 82885 + }, + { + "epoch": 2.0250164903622996, + "grad_norm": 0.07147572934627533, + "learning_rate": 1.4848193991997572e-06, + "loss": 0.0002, + "num_input_tokens_seen": 55870376, + "step": 82890 + }, + { + "epoch": 2.0251386411941463, + "grad_norm": 0.002442893572151661, + "learning_rate": 1.4847448124301598e-06, + "loss": 0.0001, + "num_input_tokens_seen": 55874088, + "step": 82895 + }, + { + "epoch": 2.0252607920259935, + "grad_norm": 1.2758766412734985, + "learning_rate": 1.484670222135368e-06, + "loss": 0.0276, + "num_input_tokens_seen": 55877224, + "step": 82900 + }, + { + "epoch": 2.0253829428578407, + "grad_norm": 0.016900723800063133, + "learning_rate": 1.484595628315924e-06, + "loss": 0.0953, + "num_input_tokens_seen": 55880808, + "step": 82905 + }, + { + "epoch": 2.025505093689688, + "grad_norm": 0.021285761147737503, + "learning_rate": 1.48452103097237e-06, + "loss": 0.0002, + "num_input_tokens_seen": 55884200, + "step": 82910 + }, + { + "epoch": 2.025627244521535, + "grad_norm": 0.003971973434090614, + "learning_rate": 1.4844464301052494e-06, + "loss": 0.0919, + "num_input_tokens_seen": 55887784, + "step": 82915 + }, + { + "epoch": 2.0257493953533823, + "grad_norm": 0.038204390555620193, + "learning_rate": 1.4843718257151034e-06, + "loss": 0.0002, + "num_input_tokens_seen": 55890920, + "step": 82920 + }, + { + "epoch": 2.0258715461852295, + "grad_norm": 0.005248145200312138, + "learning_rate": 1.4842972178024753e-06, + "loss": 0.0001, + "num_input_tokens_seen": 55894248, + "step": 82925 + }, + { + "epoch": 2.0259936970170767, + "grad_norm": 0.012961753644049168, + "learning_rate": 1.4842226063679077e-06, + "loss": 0.0377, + "num_input_tokens_seen": 55897192, + "step": 82930 + }, + { + "epoch": 2.026115847848924, + "grad_norm": 0.08081217110157013, + "learning_rate": 1.484147991411943e-06, + "loss": 0.0003, + "num_input_tokens_seen": 55900136, + "step": 82935 + }, + { + "epoch": 2.026237998680771, + "grad_norm": 0.13495701551437378, + "learning_rate": 1.484073372935124e-06, + "loss": 0.0004, + "num_input_tokens_seen": 55903912, + "step": 82940 + }, + { + "epoch": 2.0263601495126182, + "grad_norm": 0.006603249814361334, + "learning_rate": 1.4839987509379933e-06, + "loss": 0.0002, + "num_input_tokens_seen": 55906920, + "step": 82945 + }, + { + "epoch": 2.0264823003444654, + "grad_norm": 33.194488525390625, + "learning_rate": 1.4839241254210932e-06, + "loss": 0.0399, + "num_input_tokens_seen": 55910440, + "step": 82950 + }, + { + "epoch": 2.0266044511763126, + "grad_norm": 2.8557236194610596, + "learning_rate": 1.483849496384967e-06, + "loss": 0.0004, + "num_input_tokens_seen": 55913768, + "step": 82955 + }, + { + "epoch": 2.02672660200816, + "grad_norm": 0.006654535885900259, + "learning_rate": 1.483774863830157e-06, + "loss": 0.0001, + "num_input_tokens_seen": 55916712, + "step": 82960 + }, + { + "epoch": 2.026848752840007, + "grad_norm": 2.445951223373413, + "learning_rate": 1.483700227757206e-06, + "loss": 0.0005, + "num_input_tokens_seen": 55919848, + "step": 82965 + }, + { + "epoch": 2.026970903671854, + "grad_norm": 0.012774002738296986, + "learning_rate": 1.4836255881666568e-06, + "loss": 0.159, + "num_input_tokens_seen": 55923368, + "step": 82970 + }, + { + "epoch": 2.0270930545037014, + "grad_norm": 0.0121014304459095, + "learning_rate": 1.4835509450590525e-06, + "loss": 0.0001, + "num_input_tokens_seen": 55926376, + "step": 82975 + }, + { + "epoch": 2.0272152053355486, + "grad_norm": 0.02520882338285446, + "learning_rate": 1.4834762984349354e-06, + "loss": 0.0001, + "num_input_tokens_seen": 55930152, + "step": 82980 + }, + { + "epoch": 2.0273373561673953, + "grad_norm": 0.008401707746088505, + "learning_rate": 1.4834016482948489e-06, + "loss": 0.0002, + "num_input_tokens_seen": 55933288, + "step": 82985 + }, + { + "epoch": 2.0274595069992425, + "grad_norm": 0.006767922546714544, + "learning_rate": 1.4833269946393353e-06, + "loss": 0.0002, + "num_input_tokens_seen": 55937704, + "step": 82990 + }, + { + "epoch": 2.0275816578310897, + "grad_norm": 0.025144563987851143, + "learning_rate": 1.483252337468938e-06, + "loss": 0.0213, + "num_input_tokens_seen": 55940776, + "step": 82995 + }, + { + "epoch": 2.027703808662937, + "grad_norm": 0.03302784636616707, + "learning_rate": 1.4831776767841996e-06, + "loss": 0.0004, + "num_input_tokens_seen": 55943912, + "step": 83000 + }, + { + "epoch": 2.027825959494784, + "grad_norm": 0.19085726141929626, + "learning_rate": 1.4831030125856633e-06, + "loss": 0.048, + "num_input_tokens_seen": 55947368, + "step": 83005 + }, + { + "epoch": 2.0279481103266312, + "grad_norm": 0.0025873398408293724, + "learning_rate": 1.4830283448738718e-06, + "loss": 0.0642, + "num_input_tokens_seen": 55950824, + "step": 83010 + }, + { + "epoch": 2.0280702611584784, + "grad_norm": 0.28894203901290894, + "learning_rate": 1.4829536736493685e-06, + "loss": 0.1304, + "num_input_tokens_seen": 55954600, + "step": 83015 + }, + { + "epoch": 2.0281924119903256, + "grad_norm": 0.006737336050719023, + "learning_rate": 1.482878998912696e-06, + "loss": 0.0001, + "num_input_tokens_seen": 55958376, + "step": 83020 + }, + { + "epoch": 2.028314562822173, + "grad_norm": 0.04847032576799393, + "learning_rate": 1.4828043206643976e-06, + "loss": 0.0407, + "num_input_tokens_seen": 55961320, + "step": 83025 + }, + { + "epoch": 2.02843671365402, + "grad_norm": 0.0007056460017338395, + "learning_rate": 1.4827296389050161e-06, + "loss": 0.0004, + "num_input_tokens_seen": 55964648, + "step": 83030 + }, + { + "epoch": 2.028558864485867, + "grad_norm": 0.14868846535682678, + "learning_rate": 1.482654953635095e-06, + "loss": 0.0479, + "num_input_tokens_seen": 55967976, + "step": 83035 + }, + { + "epoch": 2.0286810153177144, + "grad_norm": 0.004213292151689529, + "learning_rate": 1.4825802648551774e-06, + "loss": 0.0001, + "num_input_tokens_seen": 55970920, + "step": 83040 + }, + { + "epoch": 2.0288031661495616, + "grad_norm": 0.019025111570954323, + "learning_rate": 1.482505572565806e-06, + "loss": 0.0491, + "num_input_tokens_seen": 55974184, + "step": 83045 + }, + { + "epoch": 2.0289253169814088, + "grad_norm": 56.368988037109375, + "learning_rate": 1.4824308767675247e-06, + "loss": 0.1344, + "num_input_tokens_seen": 55977640, + "step": 83050 + }, + { + "epoch": 2.029047467813256, + "grad_norm": 0.04584551602602005, + "learning_rate": 1.4823561774608759e-06, + "loss": 0.0002, + "num_input_tokens_seen": 55980904, + "step": 83055 + }, + { + "epoch": 2.029169618645103, + "grad_norm": 0.1676466315984726, + "learning_rate": 1.4822814746464034e-06, + "loss": 0.034, + "num_input_tokens_seen": 55984552, + "step": 83060 + }, + { + "epoch": 2.0292917694769503, + "grad_norm": 0.10654141008853912, + "learning_rate": 1.4822067683246503e-06, + "loss": 0.0361, + "num_input_tokens_seen": 55987816, + "step": 83065 + }, + { + "epoch": 2.029413920308797, + "grad_norm": 0.03309716284275055, + "learning_rate": 1.4821320584961601e-06, + "loss": 0.0002, + "num_input_tokens_seen": 55991592, + "step": 83070 + }, + { + "epoch": 2.0295360711406443, + "grad_norm": 0.06819537281990051, + "learning_rate": 1.4820573451614757e-06, + "loss": 0.0004, + "num_input_tokens_seen": 55994920, + "step": 83075 + }, + { + "epoch": 2.0296582219724915, + "grad_norm": 0.08601061999797821, + "learning_rate": 1.4819826283211407e-06, + "loss": 0.0004, + "num_input_tokens_seen": 55998184, + "step": 83080 + }, + { + "epoch": 2.0297803728043387, + "grad_norm": 0.19331924617290497, + "learning_rate": 1.4819079079756982e-06, + "loss": 0.0003, + "num_input_tokens_seen": 56001704, + "step": 83085 + }, + { + "epoch": 2.029902523636186, + "grad_norm": 0.0026453707832843065, + "learning_rate": 1.4818331841256919e-06, + "loss": 0.1135, + "num_input_tokens_seen": 56004840, + "step": 83090 + }, + { + "epoch": 2.030024674468033, + "grad_norm": 0.051979582756757736, + "learning_rate": 1.481758456771665e-06, + "loss": 0.0003, + "num_input_tokens_seen": 56007784, + "step": 83095 + }, + { + "epoch": 2.03014682529988, + "grad_norm": 0.013150978833436966, + "learning_rate": 1.481683725914161e-06, + "loss": 0.0512, + "num_input_tokens_seen": 56011496, + "step": 83100 + }, + { + "epoch": 2.0302689761317274, + "grad_norm": 0.11980671435594559, + "learning_rate": 1.4816089915537235e-06, + "loss": 0.0002, + "num_input_tokens_seen": 56015016, + "step": 83105 + }, + { + "epoch": 2.0303911269635746, + "grad_norm": 0.020589416846632957, + "learning_rate": 1.4815342536908962e-06, + "loss": 0.0489, + "num_input_tokens_seen": 56018088, + "step": 83110 + }, + { + "epoch": 2.030513277795422, + "grad_norm": 0.0686459019780159, + "learning_rate": 1.4814595123262218e-06, + "loss": 0.0001, + "num_input_tokens_seen": 56021544, + "step": 83115 + }, + { + "epoch": 2.030635428627269, + "grad_norm": 930.4236450195312, + "learning_rate": 1.4813847674602447e-06, + "loss": 0.1347, + "num_input_tokens_seen": 56025192, + "step": 83120 + }, + { + "epoch": 2.030757579459116, + "grad_norm": 0.17426523566246033, + "learning_rate": 1.4813100190935077e-06, + "loss": 0.124, + "num_input_tokens_seen": 56028392, + "step": 83125 + }, + { + "epoch": 2.0308797302909634, + "grad_norm": 0.061261072754859924, + "learning_rate": 1.4812352672265549e-06, + "loss": 0.0594, + "num_input_tokens_seen": 56031912, + "step": 83130 + }, + { + "epoch": 2.0310018811228105, + "grad_norm": 0.05861698463559151, + "learning_rate": 1.48116051185993e-06, + "loss": 0.0001, + "num_input_tokens_seen": 56035304, + "step": 83135 + }, + { + "epoch": 2.0311240319546577, + "grad_norm": 0.025122350081801414, + "learning_rate": 1.4810857529941762e-06, + "loss": 0.0616, + "num_input_tokens_seen": 56038696, + "step": 83140 + }, + { + "epoch": 2.031246182786505, + "grad_norm": 0.013969638384878635, + "learning_rate": 1.4810109906298375e-06, + "loss": 0.076, + "num_input_tokens_seen": 56041896, + "step": 83145 + }, + { + "epoch": 2.031368333618352, + "grad_norm": 0.015171969309449196, + "learning_rate": 1.4809362247674578e-06, + "loss": 0.1202, + "num_input_tokens_seen": 56045416, + "step": 83150 + }, + { + "epoch": 2.0314904844501993, + "grad_norm": 0.002995851216837764, + "learning_rate": 1.48086145540758e-06, + "loss": 0.0196, + "num_input_tokens_seen": 56048936, + "step": 83155 + }, + { + "epoch": 2.031612635282046, + "grad_norm": 0.024294119328260422, + "learning_rate": 1.4807866825507487e-06, + "loss": 0.0005, + "num_input_tokens_seen": 56052264, + "step": 83160 + }, + { + "epoch": 2.0317347861138932, + "grad_norm": 0.28206366300582886, + "learning_rate": 1.4807119061975074e-06, + "loss": 0.0346, + "num_input_tokens_seen": 56055464, + "step": 83165 + }, + { + "epoch": 2.0318569369457404, + "grad_norm": 16.086278915405273, + "learning_rate": 1.4806371263483995e-06, + "loss": 0.0758, + "num_input_tokens_seen": 56058792, + "step": 83170 + }, + { + "epoch": 2.0319790877775876, + "grad_norm": 0.005598350428044796, + "learning_rate": 1.4805623430039693e-06, + "loss": 0.025, + "num_input_tokens_seen": 56062248, + "step": 83175 + }, + { + "epoch": 2.032101238609435, + "grad_norm": 17.215137481689453, + "learning_rate": 1.4804875561647604e-06, + "loss": 0.0016, + "num_input_tokens_seen": 56065768, + "step": 83180 + }, + { + "epoch": 2.032223389441282, + "grad_norm": 11.782296180725098, + "learning_rate": 1.4804127658313168e-06, + "loss": 0.0915, + "num_input_tokens_seen": 56069032, + "step": 83185 + }, + { + "epoch": 2.032345540273129, + "grad_norm": 29.0299129486084, + "learning_rate": 1.4803379720041824e-06, + "loss": 0.0537, + "num_input_tokens_seen": 56072360, + "step": 83190 + }, + { + "epoch": 2.0324676911049764, + "grad_norm": 0.00915348157286644, + "learning_rate": 1.480263174683901e-06, + "loss": 0.0415, + "num_input_tokens_seen": 56075816, + "step": 83195 + }, + { + "epoch": 2.0325898419368236, + "grad_norm": 0.12145712971687317, + "learning_rate": 1.4801883738710168e-06, + "loss": 0.0252, + "num_input_tokens_seen": 56079080, + "step": 83200 + }, + { + "epoch": 2.0327119927686708, + "grad_norm": 266.5506591796875, + "learning_rate": 1.4801135695660734e-06, + "loss": 0.01, + "num_input_tokens_seen": 56082344, + "step": 83205 + }, + { + "epoch": 2.032834143600518, + "grad_norm": 0.19172163307666779, + "learning_rate": 1.480038761769615e-06, + "loss": 0.0004, + "num_input_tokens_seen": 56085672, + "step": 83210 + }, + { + "epoch": 2.032956294432365, + "grad_norm": 15.215696334838867, + "learning_rate": 1.4799639504821857e-06, + "loss": 0.0973, + "num_input_tokens_seen": 56088872, + "step": 83215 + }, + { + "epoch": 2.0330784452642123, + "grad_norm": 0.0854988619685173, + "learning_rate": 1.4798891357043296e-06, + "loss": 0.0001, + "num_input_tokens_seen": 56092200, + "step": 83220 + }, + { + "epoch": 2.0332005960960595, + "grad_norm": 1.005537986755371, + "learning_rate": 1.4798143174365902e-06, + "loss": 0.0547, + "num_input_tokens_seen": 56095720, + "step": 83225 + }, + { + "epoch": 2.0333227469279067, + "grad_norm": 0.24813531339168549, + "learning_rate": 1.4797394956795125e-06, + "loss": 0.0004, + "num_input_tokens_seen": 56098856, + "step": 83230 + }, + { + "epoch": 2.033444897759754, + "grad_norm": 0.06855268031358719, + "learning_rate": 1.4796646704336397e-06, + "loss": 0.0008, + "num_input_tokens_seen": 56102248, + "step": 83235 + }, + { + "epoch": 2.033567048591601, + "grad_norm": 0.032730553299188614, + "learning_rate": 1.4795898416995167e-06, + "loss": 0.1074, + "num_input_tokens_seen": 56105640, + "step": 83240 + }, + { + "epoch": 2.0336891994234483, + "grad_norm": 0.02327229455113411, + "learning_rate": 1.479515009477687e-06, + "loss": 0.0002, + "num_input_tokens_seen": 56108840, + "step": 83245 + }, + { + "epoch": 2.033811350255295, + "grad_norm": 0.020187703892588615, + "learning_rate": 1.4794401737686956e-06, + "loss": 0.0893, + "num_input_tokens_seen": 56112040, + "step": 83250 + }, + { + "epoch": 2.033933501087142, + "grad_norm": 0.05681902542710304, + "learning_rate": 1.4793653345730864e-06, + "loss": 0.0386, + "num_input_tokens_seen": 56115688, + "step": 83255 + }, + { + "epoch": 2.0340556519189894, + "grad_norm": 33.550838470458984, + "learning_rate": 1.4792904918914034e-06, + "loss": 0.0768, + "num_input_tokens_seen": 56118696, + "step": 83260 + }, + { + "epoch": 2.0341778027508366, + "grad_norm": 0.5451377630233765, + "learning_rate": 1.4792156457241912e-06, + "loss": 0.001, + "num_input_tokens_seen": 56122472, + "step": 83265 + }, + { + "epoch": 2.034299953582684, + "grad_norm": 1.9952231645584106, + "learning_rate": 1.4791407960719935e-06, + "loss": 0.0035, + "num_input_tokens_seen": 56125608, + "step": 83270 + }, + { + "epoch": 2.034422104414531, + "grad_norm": 0.2995298206806183, + "learning_rate": 1.4790659429353553e-06, + "loss": 0.0427, + "num_input_tokens_seen": 56128680, + "step": 83275 + }, + { + "epoch": 2.034544255246378, + "grad_norm": 0.5154494047164917, + "learning_rate": 1.4789910863148206e-06, + "loss": 0.0005, + "num_input_tokens_seen": 56132968, + "step": 83280 + }, + { + "epoch": 2.0346664060782254, + "grad_norm": 18.13103675842285, + "learning_rate": 1.4789162262109338e-06, + "loss": 0.1166, + "num_input_tokens_seen": 56136168, + "step": 83285 + }, + { + "epoch": 2.0347885569100725, + "grad_norm": 0.30100521445274353, + "learning_rate": 1.4788413626242396e-06, + "loss": 0.0022, + "num_input_tokens_seen": 56139304, + "step": 83290 + }, + { + "epoch": 2.0349107077419197, + "grad_norm": 0.09149324893951416, + "learning_rate": 1.4787664955552822e-06, + "loss": 0.0004, + "num_input_tokens_seen": 56142376, + "step": 83295 + }, + { + "epoch": 2.035032858573767, + "grad_norm": 0.21753954887390137, + "learning_rate": 1.4786916250046063e-06, + "loss": 0.0003, + "num_input_tokens_seen": 56145832, + "step": 83300 + }, + { + "epoch": 2.035155009405614, + "grad_norm": 37.52893829345703, + "learning_rate": 1.4786167509727556e-06, + "loss": 0.002, + "num_input_tokens_seen": 56149544, + "step": 83305 + }, + { + "epoch": 2.0352771602374613, + "grad_norm": 0.015320549719035625, + "learning_rate": 1.4785418734602752e-06, + "loss": 0.0006, + "num_input_tokens_seen": 56153512, + "step": 83310 + }, + { + "epoch": 2.0353993110693085, + "grad_norm": 54.01702117919922, + "learning_rate": 1.4784669924677102e-06, + "loss": 0.0235, + "num_input_tokens_seen": 56156904, + "step": 83315 + }, + { + "epoch": 2.0355214619011557, + "grad_norm": 0.06559678912162781, + "learning_rate": 1.4783921079956042e-06, + "loss": 0.0004, + "num_input_tokens_seen": 56160360, + "step": 83320 + }, + { + "epoch": 2.035643612733003, + "grad_norm": 10.981175422668457, + "learning_rate": 1.478317220044502e-06, + "loss": 0.0439, + "num_input_tokens_seen": 56163112, + "step": 83325 + }, + { + "epoch": 2.03576576356485, + "grad_norm": 0.0281961802393198, + "learning_rate": 1.4782423286149484e-06, + "loss": 0.0, + "num_input_tokens_seen": 56166120, + "step": 83330 + }, + { + "epoch": 2.0358879143966973, + "grad_norm": 0.09851742535829544, + "learning_rate": 1.478167433707488e-06, + "loss": 0.0421, + "num_input_tokens_seen": 56169448, + "step": 83335 + }, + { + "epoch": 2.036010065228544, + "grad_norm": 168.35960388183594, + "learning_rate": 1.4780925353226651e-06, + "loss": 0.0176, + "num_input_tokens_seen": 56172648, + "step": 83340 + }, + { + "epoch": 2.036132216060391, + "grad_norm": 0.10178277641534805, + "learning_rate": 1.478017633461025e-06, + "loss": 0.0003, + "num_input_tokens_seen": 56175976, + "step": 83345 + }, + { + "epoch": 2.0362543668922384, + "grad_norm": 0.021149465814232826, + "learning_rate": 1.477942728123112e-06, + "loss": 0.0514, + "num_input_tokens_seen": 56179496, + "step": 83350 + }, + { + "epoch": 2.0363765177240856, + "grad_norm": 0.0038066126871854067, + "learning_rate": 1.4778678193094712e-06, + "loss": 0.0001, + "num_input_tokens_seen": 56183144, + "step": 83355 + }, + { + "epoch": 2.0364986685559328, + "grad_norm": 0.02866493910551071, + "learning_rate": 1.477792907020647e-06, + "loss": 0.0002, + "num_input_tokens_seen": 56186600, + "step": 83360 + }, + { + "epoch": 2.03662081938778, + "grad_norm": 0.0018447516486048698, + "learning_rate": 1.477717991257184e-06, + "loss": 0.0156, + "num_input_tokens_seen": 56189928, + "step": 83365 + }, + { + "epoch": 2.036742970219627, + "grad_norm": 0.04497027024626732, + "learning_rate": 1.4776430720196275e-06, + "loss": 0.0004, + "num_input_tokens_seen": 56193000, + "step": 83370 + }, + { + "epoch": 2.0368651210514743, + "grad_norm": 0.0963190421462059, + "learning_rate": 1.4775681493085218e-06, + "loss": 0.001, + "num_input_tokens_seen": 56196456, + "step": 83375 + }, + { + "epoch": 2.0369872718833215, + "grad_norm": 0.007222073618322611, + "learning_rate": 1.4774932231244125e-06, + "loss": 0.0004, + "num_input_tokens_seen": 56200360, + "step": 83380 + }, + { + "epoch": 2.0371094227151687, + "grad_norm": 0.10533749312162399, + "learning_rate": 1.4774182934678438e-06, + "loss": 0.0095, + "num_input_tokens_seen": 56203944, + "step": 83385 + }, + { + "epoch": 2.037231573547016, + "grad_norm": 39.62832260131836, + "learning_rate": 1.477343360339361e-06, + "loss": 0.0367, + "num_input_tokens_seen": 56207848, + "step": 83390 + }, + { + "epoch": 2.037353724378863, + "grad_norm": 0.0017830976285040379, + "learning_rate": 1.4772684237395088e-06, + "loss": 0.0001, + "num_input_tokens_seen": 56211880, + "step": 83395 + }, + { + "epoch": 2.0374758752107103, + "grad_norm": 0.002838310319930315, + "learning_rate": 1.4771934836688322e-06, + "loss": 0.0001, + "num_input_tokens_seen": 56215144, + "step": 83400 + }, + { + "epoch": 2.0375980260425575, + "grad_norm": 0.017945390194654465, + "learning_rate": 1.477118540127876e-06, + "loss": 0.0001, + "num_input_tokens_seen": 56218664, + "step": 83405 + }, + { + "epoch": 2.0377201768744047, + "grad_norm": 0.005239225924015045, + "learning_rate": 1.477043593117186e-06, + "loss": 0.0437, + "num_input_tokens_seen": 56222568, + "step": 83410 + }, + { + "epoch": 2.037842327706252, + "grad_norm": 0.061735525727272034, + "learning_rate": 1.4769686426373065e-06, + "loss": 0.0001, + "num_input_tokens_seen": 56226536, + "step": 83415 + }, + { + "epoch": 2.037964478538099, + "grad_norm": 0.0004880604974459857, + "learning_rate": 1.4768936886887826e-06, + "loss": 0.059, + "num_input_tokens_seen": 56230120, + "step": 83420 + }, + { + "epoch": 2.0380866293699462, + "grad_norm": 0.0011679143644869328, + "learning_rate": 1.4768187312721598e-06, + "loss": 0.0001, + "num_input_tokens_seen": 56233384, + "step": 83425 + }, + { + "epoch": 2.038208780201793, + "grad_norm": 0.0034079423639923334, + "learning_rate": 1.4767437703879825e-06, + "loss": 0.0003, + "num_input_tokens_seen": 56236904, + "step": 83430 + }, + { + "epoch": 2.03833093103364, + "grad_norm": 0.08557354658842087, + "learning_rate": 1.4766688060367965e-06, + "loss": 0.0001, + "num_input_tokens_seen": 56239848, + "step": 83435 + }, + { + "epoch": 2.0384530818654873, + "grad_norm": 0.8352116346359253, + "learning_rate": 1.4765938382191468e-06, + "loss": 0.0003, + "num_input_tokens_seen": 56243176, + "step": 83440 + }, + { + "epoch": 2.0385752326973345, + "grad_norm": 0.005509098991751671, + "learning_rate": 1.4765188669355784e-06, + "loss": 0.0522, + "num_input_tokens_seen": 56246760, + "step": 83445 + }, + { + "epoch": 2.0386973835291817, + "grad_norm": 0.07742209732532501, + "learning_rate": 1.4764438921866367e-06, + "loss": 0.0001, + "num_input_tokens_seen": 56249640, + "step": 83450 + }, + { + "epoch": 2.038819534361029, + "grad_norm": 0.008906389586627483, + "learning_rate": 1.476368913972867e-06, + "loss": 0.0604, + "num_input_tokens_seen": 56252648, + "step": 83455 + }, + { + "epoch": 2.038941685192876, + "grad_norm": 0.009594157338142395, + "learning_rate": 1.4762939322948142e-06, + "loss": 0.0002, + "num_input_tokens_seen": 56255720, + "step": 83460 + }, + { + "epoch": 2.0390638360247233, + "grad_norm": 0.20015442371368408, + "learning_rate": 1.4762189471530237e-06, + "loss": 0.1104, + "num_input_tokens_seen": 56258536, + "step": 83465 + }, + { + "epoch": 2.0391859868565705, + "grad_norm": 18.550580978393555, + "learning_rate": 1.4761439585480413e-06, + "loss": 0.1059, + "num_input_tokens_seen": 56262248, + "step": 83470 + }, + { + "epoch": 2.0393081376884177, + "grad_norm": 0.43347489833831787, + "learning_rate": 1.4760689664804117e-06, + "loss": 0.0003, + "num_input_tokens_seen": 56266408, + "step": 83475 + }, + { + "epoch": 2.039430288520265, + "grad_norm": 0.0659785196185112, + "learning_rate": 1.4759939709506808e-06, + "loss": 0.0001, + "num_input_tokens_seen": 56269928, + "step": 83480 + }, + { + "epoch": 2.039552439352112, + "grad_norm": 0.0010263713775202632, + "learning_rate": 1.4759189719593936e-06, + "loss": 0.0291, + "num_input_tokens_seen": 56273704, + "step": 83485 + }, + { + "epoch": 2.0396745901839592, + "grad_norm": 0.37888064980506897, + "learning_rate": 1.4758439695070956e-06, + "loss": 0.0007, + "num_input_tokens_seen": 56277288, + "step": 83490 + }, + { + "epoch": 2.0397967410158064, + "grad_norm": 501.5647888183594, + "learning_rate": 1.475768963594332e-06, + "loss": 0.0345, + "num_input_tokens_seen": 56280744, + "step": 83495 + }, + { + "epoch": 2.0399188918476536, + "grad_norm": 0.025557933375239372, + "learning_rate": 1.4756939542216488e-06, + "loss": 0.0004, + "num_input_tokens_seen": 56284008, + "step": 83500 + }, + { + "epoch": 2.040041042679501, + "grad_norm": 0.005212422460317612, + "learning_rate": 1.4756189413895912e-06, + "loss": 0.0383, + "num_input_tokens_seen": 56287400, + "step": 83505 + }, + { + "epoch": 2.040163193511348, + "grad_norm": 0.025140509009361267, + "learning_rate": 1.4755439250987046e-06, + "loss": 0.0001, + "num_input_tokens_seen": 56290536, + "step": 83510 + }, + { + "epoch": 2.040285344343195, + "grad_norm": 0.016255099326372147, + "learning_rate": 1.475468905349535e-06, + "loss": 0.0001, + "num_input_tokens_seen": 56293864, + "step": 83515 + }, + { + "epoch": 2.040407495175042, + "grad_norm": 1.7284395694732666, + "learning_rate": 1.4753938821426274e-06, + "loss": 0.0621, + "num_input_tokens_seen": 56296872, + "step": 83520 + }, + { + "epoch": 2.040529646006889, + "grad_norm": 613.140625, + "learning_rate": 1.4753188554785276e-06, + "loss": 0.0216, + "num_input_tokens_seen": 56300008, + "step": 83525 + }, + { + "epoch": 2.0406517968387363, + "grad_norm": 0.010204845108091831, + "learning_rate": 1.4752438253577816e-06, + "loss": 0.0002, + "num_input_tokens_seen": 56303272, + "step": 83530 + }, + { + "epoch": 2.0407739476705835, + "grad_norm": 0.018574582412838936, + "learning_rate": 1.4751687917809342e-06, + "loss": 0.0003, + "num_input_tokens_seen": 56306664, + "step": 83535 + }, + { + "epoch": 2.0408960985024307, + "grad_norm": 31.733015060424805, + "learning_rate": 1.4750937547485316e-06, + "loss": 0.0503, + "num_input_tokens_seen": 56309608, + "step": 83540 + }, + { + "epoch": 2.041018249334278, + "grad_norm": 0.0498012974858284, + "learning_rate": 1.4750187142611195e-06, + "loss": 0.083, + "num_input_tokens_seen": 56313384, + "step": 83545 + }, + { + "epoch": 2.041140400166125, + "grad_norm": 0.008056207560002804, + "learning_rate": 1.4749436703192436e-06, + "loss": 0.0003, + "num_input_tokens_seen": 56316392, + "step": 83550 + }, + { + "epoch": 2.0412625509979723, + "grad_norm": 0.011606397107243538, + "learning_rate": 1.4748686229234497e-06, + "loss": 0.0001, + "num_input_tokens_seen": 56319784, + "step": 83555 + }, + { + "epoch": 2.0413847018298195, + "grad_norm": 29.21710968017578, + "learning_rate": 1.474793572074283e-06, + "loss": 0.1114, + "num_input_tokens_seen": 56322856, + "step": 83560 + }, + { + "epoch": 2.0415068526616666, + "grad_norm": 0.05728091299533844, + "learning_rate": 1.47471851777229e-06, + "loss": 0.0002, + "num_input_tokens_seen": 56325928, + "step": 83565 + }, + { + "epoch": 2.041629003493514, + "grad_norm": 0.02401016652584076, + "learning_rate": 1.4746434600180165e-06, + "loss": 0.0001, + "num_input_tokens_seen": 56329000, + "step": 83570 + }, + { + "epoch": 2.041751154325361, + "grad_norm": 33.782012939453125, + "learning_rate": 1.4745683988120079e-06, + "loss": 0.0626, + "num_input_tokens_seen": 56332392, + "step": 83575 + }, + { + "epoch": 2.041873305157208, + "grad_norm": 0.0074986666440963745, + "learning_rate": 1.4744933341548105e-06, + "loss": 0.0003, + "num_input_tokens_seen": 56335720, + "step": 83580 + }, + { + "epoch": 2.0419954559890554, + "grad_norm": 0.004022131208330393, + "learning_rate": 1.4744182660469697e-06, + "loss": 0.0433, + "num_input_tokens_seen": 56338984, + "step": 83585 + }, + { + "epoch": 2.0421176068209026, + "grad_norm": 0.6381987929344177, + "learning_rate": 1.4743431944890315e-06, + "loss": 0.0005, + "num_input_tokens_seen": 56342504, + "step": 83590 + }, + { + "epoch": 2.04223975765275, + "grad_norm": 33.13918685913086, + "learning_rate": 1.4742681194815423e-06, + "loss": 0.107, + "num_input_tokens_seen": 56346088, + "step": 83595 + }, + { + "epoch": 2.042361908484597, + "grad_norm": 0.03665100038051605, + "learning_rate": 1.4741930410250477e-06, + "loss": 0.0558, + "num_input_tokens_seen": 56349672, + "step": 83600 + }, + { + "epoch": 2.042484059316444, + "grad_norm": 19.979780197143555, + "learning_rate": 1.4741179591200936e-06, + "loss": 0.0592, + "num_input_tokens_seen": 56353064, + "step": 83605 + }, + { + "epoch": 2.042606210148291, + "grad_norm": 0.08430862426757812, + "learning_rate": 1.4740428737672263e-06, + "loss": 0.0792, + "num_input_tokens_seen": 56356072, + "step": 83610 + }, + { + "epoch": 2.042728360980138, + "grad_norm": 50.88820266723633, + "learning_rate": 1.4739677849669919e-06, + "loss": 0.0275, + "num_input_tokens_seen": 56359656, + "step": 83615 + }, + { + "epoch": 2.0428505118119853, + "grad_norm": 0.003031768836081028, + "learning_rate": 1.4738926927199358e-06, + "loss": 0.0653, + "num_input_tokens_seen": 56363048, + "step": 83620 + }, + { + "epoch": 2.0429726626438325, + "grad_norm": 12.282257080078125, + "learning_rate": 1.473817597026605e-06, + "loss": 0.0515, + "num_input_tokens_seen": 56366632, + "step": 83625 + }, + { + "epoch": 2.0430948134756797, + "grad_norm": 0.33622220158576965, + "learning_rate": 1.4737424978875453e-06, + "loss": 0.0004, + "num_input_tokens_seen": 56370152, + "step": 83630 + }, + { + "epoch": 2.043216964307527, + "grad_norm": 0.0007160211098380387, + "learning_rate": 1.4736673953033023e-06, + "loss": 0.0693, + "num_input_tokens_seen": 56373416, + "step": 83635 + }, + { + "epoch": 2.043339115139374, + "grad_norm": 0.8499990105628967, + "learning_rate": 1.473592289274423e-06, + "loss": 0.0005, + "num_input_tokens_seen": 56376488, + "step": 83640 + }, + { + "epoch": 2.0434612659712212, + "grad_norm": 0.2281034141778946, + "learning_rate": 1.473517179801453e-06, + "loss": 0.0482, + "num_input_tokens_seen": 56379624, + "step": 83645 + }, + { + "epoch": 2.0435834168030684, + "grad_norm": 0.23095224797725677, + "learning_rate": 1.4734420668849384e-06, + "loss": 0.0008, + "num_input_tokens_seen": 56382760, + "step": 83650 + }, + { + "epoch": 2.0437055676349156, + "grad_norm": 0.08366420865058899, + "learning_rate": 1.4733669505254263e-06, + "loss": 0.0539, + "num_input_tokens_seen": 56386856, + "step": 83655 + }, + { + "epoch": 2.043827718466763, + "grad_norm": 0.39161205291748047, + "learning_rate": 1.473291830723462e-06, + "loss": 0.0019, + "num_input_tokens_seen": 56390504, + "step": 83660 + }, + { + "epoch": 2.04394986929861, + "grad_norm": 0.48570919036865234, + "learning_rate": 1.4732167074795925e-06, + "loss": 0.0344, + "num_input_tokens_seen": 56393448, + "step": 83665 + }, + { + "epoch": 2.044072020130457, + "grad_norm": 0.008141516707837582, + "learning_rate": 1.4731415807943638e-06, + "loss": 0.0002, + "num_input_tokens_seen": 56397288, + "step": 83670 + }, + { + "epoch": 2.0441941709623044, + "grad_norm": 0.05445622652769089, + "learning_rate": 1.4730664506683219e-06, + "loss": 0.0003, + "num_input_tokens_seen": 56400744, + "step": 83675 + }, + { + "epoch": 2.0443163217941516, + "grad_norm": 0.005790103226900101, + "learning_rate": 1.4729913171020138e-06, + "loss": 0.0002, + "num_input_tokens_seen": 56404200, + "step": 83680 + }, + { + "epoch": 2.0444384726259988, + "grad_norm": 0.17963790893554688, + "learning_rate": 1.4729161800959857e-06, + "loss": 0.0814, + "num_input_tokens_seen": 56407272, + "step": 83685 + }, + { + "epoch": 2.044560623457846, + "grad_norm": 0.009207534603774548, + "learning_rate": 1.4728410396507839e-06, + "loss": 0.0003, + "num_input_tokens_seen": 56410344, + "step": 83690 + }, + { + "epoch": 2.0446827742896927, + "grad_norm": 0.03656099736690521, + "learning_rate": 1.4727658957669548e-06, + "loss": 0.0005, + "num_input_tokens_seen": 56413480, + "step": 83695 + }, + { + "epoch": 2.04480492512154, + "grad_norm": 0.007025151047855616, + "learning_rate": 1.472690748445045e-06, + "loss": 0.0, + "num_input_tokens_seen": 56416680, + "step": 83700 + }, + { + "epoch": 2.044927075953387, + "grad_norm": 0.008257608860731125, + "learning_rate": 1.4726155976856012e-06, + "loss": 0.0007, + "num_input_tokens_seen": 56420072, + "step": 83705 + }, + { + "epoch": 2.0450492267852343, + "grad_norm": 0.005688189063221216, + "learning_rate": 1.4725404434891693e-06, + "loss": 0.0592, + "num_input_tokens_seen": 56423784, + "step": 83710 + }, + { + "epoch": 2.0451713776170815, + "grad_norm": 0.0242256298661232, + "learning_rate": 1.472465285856296e-06, + "loss": 0.0205, + "num_input_tokens_seen": 56427688, + "step": 83715 + }, + { + "epoch": 2.0452935284489286, + "grad_norm": 0.026224689558148384, + "learning_rate": 1.4723901247875283e-06, + "loss": 0.0001, + "num_input_tokens_seen": 56430888, + "step": 83720 + }, + { + "epoch": 2.045415679280776, + "grad_norm": 0.013745622709393501, + "learning_rate": 1.4723149602834127e-06, + "loss": 0.0001, + "num_input_tokens_seen": 56434152, + "step": 83725 + }, + { + "epoch": 2.045537830112623, + "grad_norm": 0.00482986168935895, + "learning_rate": 1.4722397923444955e-06, + "loss": 0.0, + "num_input_tokens_seen": 56437416, + "step": 83730 + }, + { + "epoch": 2.04565998094447, + "grad_norm": 0.018875837326049805, + "learning_rate": 1.4721646209713239e-06, + "loss": 0.0, + "num_input_tokens_seen": 56440744, + "step": 83735 + }, + { + "epoch": 2.0457821317763174, + "grad_norm": 0.013076196424663067, + "learning_rate": 1.472089446164444e-06, + "loss": 0.0001, + "num_input_tokens_seen": 56443752, + "step": 83740 + }, + { + "epoch": 2.0459042826081646, + "grad_norm": 0.03415194898843765, + "learning_rate": 1.4720142679244022e-06, + "loss": 0.0003, + "num_input_tokens_seen": 56447016, + "step": 83745 + }, + { + "epoch": 2.046026433440012, + "grad_norm": 0.008563662879168987, + "learning_rate": 1.471939086251746e-06, + "loss": 0.049, + "num_input_tokens_seen": 56450536, + "step": 83750 + }, + { + "epoch": 2.046148584271859, + "grad_norm": 0.009224042296409607, + "learning_rate": 1.471863901147022e-06, + "loss": 0.0, + "num_input_tokens_seen": 56453864, + "step": 83755 + }, + { + "epoch": 2.046270735103706, + "grad_norm": 20.467226028442383, + "learning_rate": 1.4717887126107766e-06, + "loss": 0.062, + "num_input_tokens_seen": 56457640, + "step": 83760 + }, + { + "epoch": 2.0463928859355534, + "grad_norm": 0.025449080392718315, + "learning_rate": 1.471713520643557e-06, + "loss": 0.0396, + "num_input_tokens_seen": 56461224, + "step": 83765 + }, + { + "epoch": 2.0465150367674005, + "grad_norm": 0.004977928474545479, + "learning_rate": 1.4716383252459096e-06, + "loss": 0.0001, + "num_input_tokens_seen": 56464680, + "step": 83770 + }, + { + "epoch": 2.0466371875992477, + "grad_norm": 95.45176696777344, + "learning_rate": 1.4715631264183812e-06, + "loss": 0.0774, + "num_input_tokens_seen": 56468648, + "step": 83775 + }, + { + "epoch": 2.046759338431095, + "grad_norm": 0.08478084206581116, + "learning_rate": 1.4714879241615195e-06, + "loss": 0.0593, + "num_input_tokens_seen": 56472040, + "step": 83780 + }, + { + "epoch": 2.0468814892629417, + "grad_norm": 0.03638599067926407, + "learning_rate": 1.4714127184758703e-06, + "loss": 0.0001, + "num_input_tokens_seen": 56475240, + "step": 83785 + }, + { + "epoch": 2.047003640094789, + "grad_norm": 0.023271914571523666, + "learning_rate": 1.4713375093619812e-06, + "loss": 0.0007, + "num_input_tokens_seen": 56478760, + "step": 83790 + }, + { + "epoch": 2.047125790926636, + "grad_norm": 0.013252345845103264, + "learning_rate": 1.471262296820399e-06, + "loss": 0.0231, + "num_input_tokens_seen": 56482408, + "step": 83795 + }, + { + "epoch": 2.0472479417584832, + "grad_norm": 0.011788555420935154, + "learning_rate": 1.4711870808516706e-06, + "loss": 0.0009, + "num_input_tokens_seen": 56486056, + "step": 83800 + }, + { + "epoch": 2.0473700925903304, + "grad_norm": 0.037248142063617706, + "learning_rate": 1.4711118614563427e-06, + "loss": 0.0644, + "num_input_tokens_seen": 56489768, + "step": 83805 + }, + { + "epoch": 2.0474922434221776, + "grad_norm": 0.0038712075911462307, + "learning_rate": 1.4710366386349631e-06, + "loss": 0.0003, + "num_input_tokens_seen": 56492904, + "step": 83810 + }, + { + "epoch": 2.047614394254025, + "grad_norm": 0.07566796988248825, + "learning_rate": 1.4709614123880783e-06, + "loss": 0.0652, + "num_input_tokens_seen": 56496168, + "step": 83815 + }, + { + "epoch": 2.047736545085872, + "grad_norm": 47.76158905029297, + "learning_rate": 1.470886182716235e-06, + "loss": 0.043, + "num_input_tokens_seen": 56499624, + "step": 83820 + }, + { + "epoch": 2.047858695917719, + "grad_norm": 13.864922523498535, + "learning_rate": 1.4708109496199815e-06, + "loss": 0.044, + "num_input_tokens_seen": 56502888, + "step": 83825 + }, + { + "epoch": 2.0479808467495664, + "grad_norm": 0.012407737784087658, + "learning_rate": 1.4707357130998635e-06, + "loss": 0.0002, + "num_input_tokens_seen": 56506152, + "step": 83830 + }, + { + "epoch": 2.0481029975814136, + "grad_norm": 0.026929527521133423, + "learning_rate": 1.470660473156429e-06, + "loss": 0.1097, + "num_input_tokens_seen": 56509416, + "step": 83835 + }, + { + "epoch": 2.0482251484132608, + "grad_norm": 0.03948600962758064, + "learning_rate": 1.4705852297902248e-06, + "loss": 0.1012, + "num_input_tokens_seen": 56513320, + "step": 83840 + }, + { + "epoch": 2.048347299245108, + "grad_norm": 0.06307268887758255, + "learning_rate": 1.4705099830017983e-06, + "loss": 0.1403, + "num_input_tokens_seen": 56516776, + "step": 83845 + }, + { + "epoch": 2.048469450076955, + "grad_norm": 0.013860165141522884, + "learning_rate": 1.470434732791697e-06, + "loss": 0.1199, + "num_input_tokens_seen": 56520232, + "step": 83850 + }, + { + "epoch": 2.0485916009088023, + "grad_norm": 0.21431736648082733, + "learning_rate": 1.4703594791604674e-06, + "loss": 0.0007, + "num_input_tokens_seen": 56523688, + "step": 83855 + }, + { + "epoch": 2.0487137517406495, + "grad_norm": 0.3006341755390167, + "learning_rate": 1.4702842221086573e-06, + "loss": 0.0463, + "num_input_tokens_seen": 56526888, + "step": 83860 + }, + { + "epoch": 2.0488359025724967, + "grad_norm": 0.27715471386909485, + "learning_rate": 1.470208961636814e-06, + "loss": 0.0004, + "num_input_tokens_seen": 56530152, + "step": 83865 + }, + { + "epoch": 2.048958053404344, + "grad_norm": 0.04112695902585983, + "learning_rate": 1.4701336977454841e-06, + "loss": 0.0006, + "num_input_tokens_seen": 56533800, + "step": 83870 + }, + { + "epoch": 2.0490802042361906, + "grad_norm": 0.04875229671597481, + "learning_rate": 1.470058430435216e-06, + "loss": 0.0003, + "num_input_tokens_seen": 56537512, + "step": 83875 + }, + { + "epoch": 2.049202355068038, + "grad_norm": 0.16153334081172943, + "learning_rate": 1.4699831597065565e-06, + "loss": 0.0003, + "num_input_tokens_seen": 56540904, + "step": 83880 + }, + { + "epoch": 2.049324505899885, + "grad_norm": 0.021278617903590202, + "learning_rate": 1.469907885560053e-06, + "loss": 0.0006, + "num_input_tokens_seen": 56544040, + "step": 83885 + }, + { + "epoch": 2.049446656731732, + "grad_norm": 338.5051574707031, + "learning_rate": 1.4698326079962532e-06, + "loss": 0.0098, + "num_input_tokens_seen": 56547368, + "step": 83890 + }, + { + "epoch": 2.0495688075635794, + "grad_norm": 0.8432843685150146, + "learning_rate": 1.4697573270157038e-06, + "loss": 0.0327, + "num_input_tokens_seen": 56550696, + "step": 83895 + }, + { + "epoch": 2.0496909583954266, + "grad_norm": 0.017997203394770622, + "learning_rate": 1.469682042618953e-06, + "loss": 0.0614, + "num_input_tokens_seen": 56553960, + "step": 83900 + }, + { + "epoch": 2.0498131092272738, + "grad_norm": 0.022712845355272293, + "learning_rate": 1.469606754806548e-06, + "loss": 0.0002, + "num_input_tokens_seen": 56557096, + "step": 83905 + }, + { + "epoch": 2.049935260059121, + "grad_norm": 0.005200402345508337, + "learning_rate": 1.4695314635790366e-06, + "loss": 0.0003, + "num_input_tokens_seen": 56560232, + "step": 83910 + }, + { + "epoch": 2.050057410890968, + "grad_norm": 0.027068452909588814, + "learning_rate": 1.4694561689369657e-06, + "loss": 0.0001, + "num_input_tokens_seen": 56563240, + "step": 83915 + }, + { + "epoch": 2.0501795617228153, + "grad_norm": 0.0025052025448530912, + "learning_rate": 1.4693808708808837e-06, + "loss": 0.0111, + "num_input_tokens_seen": 56567016, + "step": 83920 + }, + { + "epoch": 2.0503017125546625, + "grad_norm": 0.07576905190944672, + "learning_rate": 1.4693055694113377e-06, + "loss": 0.0002, + "num_input_tokens_seen": 56570408, + "step": 83925 + }, + { + "epoch": 2.0504238633865097, + "grad_norm": 0.06434017419815063, + "learning_rate": 1.469230264528875e-06, + "loss": 0.0001, + "num_input_tokens_seen": 56573608, + "step": 83930 + }, + { + "epoch": 2.050546014218357, + "grad_norm": 0.15855221450328827, + "learning_rate": 1.469154956234044e-06, + "loss": 0.0001, + "num_input_tokens_seen": 56576872, + "step": 83935 + }, + { + "epoch": 2.050668165050204, + "grad_norm": 0.2547149360179901, + "learning_rate": 1.4690796445273918e-06, + "loss": 0.0714, + "num_input_tokens_seen": 56580200, + "step": 83940 + }, + { + "epoch": 2.0507903158820513, + "grad_norm": 0.0009536282741464674, + "learning_rate": 1.4690043294094665e-06, + "loss": 0.0373, + "num_input_tokens_seen": 56583784, + "step": 83945 + }, + { + "epoch": 2.0509124667138985, + "grad_norm": 0.00821103435009718, + "learning_rate": 1.4689290108808152e-06, + "loss": 0.0004, + "num_input_tokens_seen": 56586920, + "step": 83950 + }, + { + "epoch": 2.0510346175457457, + "grad_norm": 0.00451961625367403, + "learning_rate": 1.4688536889419861e-06, + "loss": 0.1476, + "num_input_tokens_seen": 56589928, + "step": 83955 + }, + { + "epoch": 2.051156768377593, + "grad_norm": 0.020787635818123817, + "learning_rate": 1.468778363593527e-06, + "loss": 0.0411, + "num_input_tokens_seen": 56593384, + "step": 83960 + }, + { + "epoch": 2.0512789192094396, + "grad_norm": 0.005360802169889212, + "learning_rate": 1.4687030348359855e-06, + "loss": 0.0005, + "num_input_tokens_seen": 56596648, + "step": 83965 + }, + { + "epoch": 2.051401070041287, + "grad_norm": 255.1057891845703, + "learning_rate": 1.4686277026699094e-06, + "loss": 0.0247, + "num_input_tokens_seen": 56600488, + "step": 83970 + }, + { + "epoch": 2.051523220873134, + "grad_norm": 29.247344970703125, + "learning_rate": 1.4685523670958466e-06, + "loss": 0.0652, + "num_input_tokens_seen": 56603752, + "step": 83975 + }, + { + "epoch": 2.051645371704981, + "grad_norm": 0.009687669575214386, + "learning_rate": 1.468477028114345e-06, + "loss": 0.0007, + "num_input_tokens_seen": 56606760, + "step": 83980 + }, + { + "epoch": 2.0517675225368284, + "grad_norm": 0.019696874544024467, + "learning_rate": 1.4684016857259524e-06, + "loss": 0.0728, + "num_input_tokens_seen": 56610280, + "step": 83985 + }, + { + "epoch": 2.0518896733686756, + "grad_norm": 0.7689847350120544, + "learning_rate": 1.4683263399312171e-06, + "loss": 0.0004, + "num_input_tokens_seen": 56613480, + "step": 83990 + }, + { + "epoch": 2.0520118242005227, + "grad_norm": 0.022250818088650703, + "learning_rate": 1.4682509907306863e-06, + "loss": 0.0446, + "num_input_tokens_seen": 56616744, + "step": 83995 + }, + { + "epoch": 2.05213397503237, + "grad_norm": 0.06082061305642128, + "learning_rate": 1.4681756381249085e-06, + "loss": 0.0003, + "num_input_tokens_seen": 56620584, + "step": 84000 + }, + { + "epoch": 2.052256125864217, + "grad_norm": 0.33939114212989807, + "learning_rate": 1.4681002821144315e-06, + "loss": 0.0361, + "num_input_tokens_seen": 56624104, + "step": 84005 + }, + { + "epoch": 2.0523782766960643, + "grad_norm": 0.008764307014644146, + "learning_rate": 1.4680249226998033e-06, + "loss": 0.0764, + "num_input_tokens_seen": 56627560, + "step": 84010 + }, + { + "epoch": 2.0525004275279115, + "grad_norm": 9.919015884399414, + "learning_rate": 1.467949559881572e-06, + "loss": 0.0414, + "num_input_tokens_seen": 56630888, + "step": 84015 + }, + { + "epoch": 2.0526225783597587, + "grad_norm": 1.171688437461853, + "learning_rate": 1.467874193660286e-06, + "loss": 0.0008, + "num_input_tokens_seen": 56634280, + "step": 84020 + }, + { + "epoch": 2.052744729191606, + "grad_norm": 0.02687976323068142, + "learning_rate": 1.4677988240364922e-06, + "loss": 0.0001, + "num_input_tokens_seen": 56637288, + "step": 84025 + }, + { + "epoch": 2.052866880023453, + "grad_norm": 104.11189270019531, + "learning_rate": 1.4677234510107402e-06, + "loss": 0.0377, + "num_input_tokens_seen": 56640552, + "step": 84030 + }, + { + "epoch": 2.0529890308553003, + "grad_norm": 0.005070993211120367, + "learning_rate": 1.4676480745835774e-06, + "loss": 0.0002, + "num_input_tokens_seen": 56644008, + "step": 84035 + }, + { + "epoch": 2.0531111816871475, + "grad_norm": 0.042603764683008194, + "learning_rate": 1.4675726947555519e-06, + "loss": 0.0301, + "num_input_tokens_seen": 56647208, + "step": 84040 + }, + { + "epoch": 2.0532333325189946, + "grad_norm": 0.03279830515384674, + "learning_rate": 1.467497311527212e-06, + "loss": 0.0034, + "num_input_tokens_seen": 56651304, + "step": 84045 + }, + { + "epoch": 2.053355483350842, + "grad_norm": 0.023159446194767952, + "learning_rate": 1.467421924899106e-06, + "loss": 0.0001, + "num_input_tokens_seen": 56654504, + "step": 84050 + }, + { + "epoch": 2.0534776341826886, + "grad_norm": 0.006754196248948574, + "learning_rate": 1.4673465348717817e-06, + "loss": 0.0613, + "num_input_tokens_seen": 56657832, + "step": 84055 + }, + { + "epoch": 2.0535997850145358, + "grad_norm": 0.013502690009772778, + "learning_rate": 1.4672711414457879e-06, + "loss": 0.025, + "num_input_tokens_seen": 56661736, + "step": 84060 + }, + { + "epoch": 2.053721935846383, + "grad_norm": 0.037468913942575455, + "learning_rate": 1.4671957446216728e-06, + "loss": 0.0004, + "num_input_tokens_seen": 56664744, + "step": 84065 + }, + { + "epoch": 2.05384408667823, + "grad_norm": 0.05100453272461891, + "learning_rate": 1.4671203443999844e-06, + "loss": 0.0001, + "num_input_tokens_seen": 56668328, + "step": 84070 + }, + { + "epoch": 2.0539662375100773, + "grad_norm": 0.013385049998760223, + "learning_rate": 1.4670449407812715e-06, + "loss": 0.0857, + "num_input_tokens_seen": 56671400, + "step": 84075 + }, + { + "epoch": 2.0540883883419245, + "grad_norm": 0.022386562079191208, + "learning_rate": 1.4669695337660818e-06, + "loss": 0.0003, + "num_input_tokens_seen": 56674728, + "step": 84080 + }, + { + "epoch": 2.0542105391737717, + "grad_norm": 0.11775214225053787, + "learning_rate": 1.4668941233549642e-06, + "loss": 0.151, + "num_input_tokens_seen": 56678120, + "step": 84085 + }, + { + "epoch": 2.054332690005619, + "grad_norm": 0.08483375608921051, + "learning_rate": 1.4668187095484673e-06, + "loss": 0.1097, + "num_input_tokens_seen": 56681448, + "step": 84090 + }, + { + "epoch": 2.054454840837466, + "grad_norm": 0.05424126237630844, + "learning_rate": 1.4667432923471389e-06, + "loss": 0.0005, + "num_input_tokens_seen": 56685160, + "step": 84095 + }, + { + "epoch": 2.0545769916693133, + "grad_norm": 0.3192615509033203, + "learning_rate": 1.4666678717515275e-06, + "loss": 0.0368, + "num_input_tokens_seen": 56688424, + "step": 84100 + }, + { + "epoch": 2.0546991425011605, + "grad_norm": 0.17142631113529205, + "learning_rate": 1.4665924477621824e-06, + "loss": 0.0007, + "num_input_tokens_seen": 56691880, + "step": 84105 + }, + { + "epoch": 2.0548212933330077, + "grad_norm": 67.49655151367188, + "learning_rate": 1.466517020379651e-06, + "loss": 0.0392, + "num_input_tokens_seen": 56695464, + "step": 84110 + }, + { + "epoch": 2.054943444164855, + "grad_norm": 0.03887330740690231, + "learning_rate": 1.4664415896044826e-06, + "loss": 0.0001, + "num_input_tokens_seen": 56699176, + "step": 84115 + }, + { + "epoch": 2.055065594996702, + "grad_norm": 19.39141845703125, + "learning_rate": 1.466366155437225e-06, + "loss": 0.0491, + "num_input_tokens_seen": 56702248, + "step": 84120 + }, + { + "epoch": 2.0551877458285492, + "grad_norm": 0.12292356789112091, + "learning_rate": 1.4662907178784277e-06, + "loss": 0.0004, + "num_input_tokens_seen": 56705832, + "step": 84125 + }, + { + "epoch": 2.0553098966603964, + "grad_norm": 0.062030475586652756, + "learning_rate": 1.466215276928639e-06, + "loss": 0.0002, + "num_input_tokens_seen": 56709160, + "step": 84130 + }, + { + "epoch": 2.0554320474922436, + "grad_norm": 598.9610595703125, + "learning_rate": 1.4661398325884074e-06, + "loss": 0.0376, + "num_input_tokens_seen": 56712936, + "step": 84135 + }, + { + "epoch": 2.0555541983240904, + "grad_norm": 20.79981231689453, + "learning_rate": 1.4660643848582813e-06, + "loss": 0.1215, + "num_input_tokens_seen": 56716456, + "step": 84140 + }, + { + "epoch": 2.0556763491559376, + "grad_norm": 0.03508620709180832, + "learning_rate": 1.4659889337388099e-06, + "loss": 0.057, + "num_input_tokens_seen": 56719656, + "step": 84145 + }, + { + "epoch": 2.0557984999877847, + "grad_norm": 0.03408919274806976, + "learning_rate": 1.4659134792305415e-06, + "loss": 0.0001, + "num_input_tokens_seen": 56723112, + "step": 84150 + }, + { + "epoch": 2.055920650819632, + "grad_norm": 0.2693452835083008, + "learning_rate": 1.4658380213340249e-06, + "loss": 0.0401, + "num_input_tokens_seen": 56726248, + "step": 84155 + }, + { + "epoch": 2.056042801651479, + "grad_norm": 0.03239583224058151, + "learning_rate": 1.465762560049809e-06, + "loss": 0.0643, + "num_input_tokens_seen": 56729384, + "step": 84160 + }, + { + "epoch": 2.0561649524833263, + "grad_norm": 0.010898438282310963, + "learning_rate": 1.4656870953784426e-06, + "loss": 0.0002, + "num_input_tokens_seen": 56733288, + "step": 84165 + }, + { + "epoch": 2.0562871033151735, + "grad_norm": 0.011616937816143036, + "learning_rate": 1.4656116273204742e-06, + "loss": 0.0004, + "num_input_tokens_seen": 56736424, + "step": 84170 + }, + { + "epoch": 2.0564092541470207, + "grad_norm": 0.018615955486893654, + "learning_rate": 1.465536155876453e-06, + "loss": 0.0329, + "num_input_tokens_seen": 56739816, + "step": 84175 + }, + { + "epoch": 2.056531404978868, + "grad_norm": 0.007698644418269396, + "learning_rate": 1.4654606810469275e-06, + "loss": 0.1285, + "num_input_tokens_seen": 56743208, + "step": 84180 + }, + { + "epoch": 2.056653555810715, + "grad_norm": 0.06628992408514023, + "learning_rate": 1.4653852028324467e-06, + "loss": 0.0529, + "num_input_tokens_seen": 56746984, + "step": 84185 + }, + { + "epoch": 2.0567757066425623, + "grad_norm": 0.07687515765428543, + "learning_rate": 1.4653097212335594e-06, + "loss": 0.0003, + "num_input_tokens_seen": 56750824, + "step": 84190 + }, + { + "epoch": 2.0568978574744095, + "grad_norm": 0.02513556368649006, + "learning_rate": 1.465234236250815e-06, + "loss": 0.0904, + "num_input_tokens_seen": 56754472, + "step": 84195 + }, + { + "epoch": 2.0570200083062566, + "grad_norm": 433.2476501464844, + "learning_rate": 1.4651587478847623e-06, + "loss": 0.0169, + "num_input_tokens_seen": 56757736, + "step": 84200 + }, + { + "epoch": 2.057142159138104, + "grad_norm": 17.649721145629883, + "learning_rate": 1.4650832561359496e-06, + "loss": 0.0745, + "num_input_tokens_seen": 56761128, + "step": 84205 + }, + { + "epoch": 2.057264309969951, + "grad_norm": 0.15260326862335205, + "learning_rate": 1.4650077610049264e-06, + "loss": 0.0949, + "num_input_tokens_seen": 56764584, + "step": 84210 + }, + { + "epoch": 2.057386460801798, + "grad_norm": 0.0442718043923378, + "learning_rate": 1.4649322624922418e-06, + "loss": 0.0992, + "num_input_tokens_seen": 56768488, + "step": 84215 + }, + { + "epoch": 2.0575086116336454, + "grad_norm": 0.018568266183137894, + "learning_rate": 1.4648567605984447e-06, + "loss": 0.0001, + "num_input_tokens_seen": 56771880, + "step": 84220 + }, + { + "epoch": 2.0576307624654926, + "grad_norm": 0.47161176800727844, + "learning_rate": 1.4647812553240844e-06, + "loss": 0.0006, + "num_input_tokens_seen": 56775464, + "step": 84225 + }, + { + "epoch": 2.0577529132973393, + "grad_norm": 0.22203360497951508, + "learning_rate": 1.4647057466697094e-06, + "loss": 0.0006, + "num_input_tokens_seen": 56778984, + "step": 84230 + }, + { + "epoch": 2.0578750641291865, + "grad_norm": 0.007652414962649345, + "learning_rate": 1.4646302346358697e-06, + "loss": 0.0472, + "num_input_tokens_seen": 56782504, + "step": 84235 + }, + { + "epoch": 2.0579972149610337, + "grad_norm": 0.05677541717886925, + "learning_rate": 1.4645547192231134e-06, + "loss": 0.0006, + "num_input_tokens_seen": 56785576, + "step": 84240 + }, + { + "epoch": 2.058119365792881, + "grad_norm": 0.020264064893126488, + "learning_rate": 1.4644792004319909e-06, + "loss": 0.0006, + "num_input_tokens_seen": 56788968, + "step": 84245 + }, + { + "epoch": 2.058241516624728, + "grad_norm": 0.036386922001838684, + "learning_rate": 1.4644036782630502e-06, + "loss": 0.0503, + "num_input_tokens_seen": 56792040, + "step": 84250 + }, + { + "epoch": 2.0583636674565753, + "grad_norm": 0.1485571414232254, + "learning_rate": 1.4643281527168414e-06, + "loss": 0.11, + "num_input_tokens_seen": 56795688, + "step": 84255 + }, + { + "epoch": 2.0584858182884225, + "grad_norm": 0.04273013770580292, + "learning_rate": 1.464252623793913e-06, + "loss": 0.0372, + "num_input_tokens_seen": 56799016, + "step": 84260 + }, + { + "epoch": 2.0586079691202697, + "grad_norm": 0.012745685875415802, + "learning_rate": 1.464177091494815e-06, + "loss": 0.0007, + "num_input_tokens_seen": 56801960, + "step": 84265 + }, + { + "epoch": 2.058730119952117, + "grad_norm": 0.023471122607588768, + "learning_rate": 1.4641015558200962e-06, + "loss": 0.0004, + "num_input_tokens_seen": 56805352, + "step": 84270 + }, + { + "epoch": 2.058852270783964, + "grad_norm": 0.024831267073750496, + "learning_rate": 1.4640260167703058e-06, + "loss": 0.0008, + "num_input_tokens_seen": 56809576, + "step": 84275 + }, + { + "epoch": 2.0589744216158112, + "grad_norm": 0.020891066640615463, + "learning_rate": 1.4639504743459937e-06, + "loss": 0.0138, + "num_input_tokens_seen": 56812904, + "step": 84280 + }, + { + "epoch": 2.0590965724476584, + "grad_norm": 0.05312500149011612, + "learning_rate": 1.463874928547709e-06, + "loss": 0.0002, + "num_input_tokens_seen": 56815784, + "step": 84285 + }, + { + "epoch": 2.0592187232795056, + "grad_norm": 0.2950860261917114, + "learning_rate": 1.463799379376001e-06, + "loss": 0.0387, + "num_input_tokens_seen": 56818792, + "step": 84290 + }, + { + "epoch": 2.059340874111353, + "grad_norm": 0.0019791696686297655, + "learning_rate": 1.463723826831419e-06, + "loss": 0.0357, + "num_input_tokens_seen": 56822568, + "step": 84295 + }, + { + "epoch": 2.0594630249432, + "grad_norm": 0.0492100790143013, + "learning_rate": 1.463648270914513e-06, + "loss": 0.0306, + "num_input_tokens_seen": 56825704, + "step": 84300 + }, + { + "epoch": 2.059585175775047, + "grad_norm": 52.4030876159668, + "learning_rate": 1.4635727116258316e-06, + "loss": 0.0431, + "num_input_tokens_seen": 56829032, + "step": 84305 + }, + { + "epoch": 2.0597073266068944, + "grad_norm": 0.014774279668927193, + "learning_rate": 1.4634971489659251e-06, + "loss": 0.0383, + "num_input_tokens_seen": 56832424, + "step": 84310 + }, + { + "epoch": 2.0598294774387416, + "grad_norm": 0.026415273547172546, + "learning_rate": 1.4634215829353425e-06, + "loss": 0.0003, + "num_input_tokens_seen": 56835816, + "step": 84315 + }, + { + "epoch": 2.0599516282705883, + "grad_norm": 0.0518280453979969, + "learning_rate": 1.4633460135346334e-06, + "loss": 0.082, + "num_input_tokens_seen": 56839272, + "step": 84320 + }, + { + "epoch": 2.0600737791024355, + "grad_norm": 0.10253674536943436, + "learning_rate": 1.4632704407643477e-06, + "loss": 0.0002, + "num_input_tokens_seen": 56842280, + "step": 84325 + }, + { + "epoch": 2.0601959299342827, + "grad_norm": 29.64972496032715, + "learning_rate": 1.4631948646250347e-06, + "loss": 0.0491, + "num_input_tokens_seen": 56845544, + "step": 84330 + }, + { + "epoch": 2.06031808076613, + "grad_norm": 0.05544205382466316, + "learning_rate": 1.4631192851172437e-06, + "loss": 0.0378, + "num_input_tokens_seen": 56849000, + "step": 84335 + }, + { + "epoch": 2.060440231597977, + "grad_norm": 0.2877916991710663, + "learning_rate": 1.4630437022415252e-06, + "loss": 0.1917, + "num_input_tokens_seen": 56852584, + "step": 84340 + }, + { + "epoch": 2.0605623824298243, + "grad_norm": 0.04226279631257057, + "learning_rate": 1.462968115998428e-06, + "loss": 0.0425, + "num_input_tokens_seen": 56856040, + "step": 84345 + }, + { + "epoch": 2.0606845332616714, + "grad_norm": 0.08056000620126724, + "learning_rate": 1.4628925263885025e-06, + "loss": 0.0002, + "num_input_tokens_seen": 56859624, + "step": 84350 + }, + { + "epoch": 2.0608066840935186, + "grad_norm": 0.6615628600120544, + "learning_rate": 1.4628169334122979e-06, + "loss": 0.0103, + "num_input_tokens_seen": 56862696, + "step": 84355 + }, + { + "epoch": 2.060928834925366, + "grad_norm": 0.0882110670208931, + "learning_rate": 1.462741337070364e-06, + "loss": 0.0621, + "num_input_tokens_seen": 56866088, + "step": 84360 + }, + { + "epoch": 2.061050985757213, + "grad_norm": 0.029368186369538307, + "learning_rate": 1.4626657373632504e-06, + "loss": 0.0002, + "num_input_tokens_seen": 56869416, + "step": 84365 + }, + { + "epoch": 2.06117313658906, + "grad_norm": 0.06895339488983154, + "learning_rate": 1.4625901342915074e-06, + "loss": 0.0006, + "num_input_tokens_seen": 56873768, + "step": 84370 + }, + { + "epoch": 2.0612952874209074, + "grad_norm": 0.004868913907557726, + "learning_rate": 1.4625145278556846e-06, + "loss": 0.0001, + "num_input_tokens_seen": 56876904, + "step": 84375 + }, + { + "epoch": 2.0614174382527546, + "grad_norm": 0.0036217966116964817, + "learning_rate": 1.4624389180563314e-06, + "loss": 0.0656, + "num_input_tokens_seen": 56880936, + "step": 84380 + }, + { + "epoch": 2.0615395890846018, + "grad_norm": 0.03403620421886444, + "learning_rate": 1.4623633048939984e-06, + "loss": 0.0701, + "num_input_tokens_seen": 56884392, + "step": 84385 + }, + { + "epoch": 2.061661739916449, + "grad_norm": 1.1728190183639526, + "learning_rate": 1.462287688369235e-06, + "loss": 0.0007, + "num_input_tokens_seen": 56887784, + "step": 84390 + }, + { + "epoch": 2.061783890748296, + "grad_norm": 0.02199065126478672, + "learning_rate": 1.4622120684825912e-06, + "loss": 0.0458, + "num_input_tokens_seen": 56891368, + "step": 84395 + }, + { + "epoch": 2.0619060415801433, + "grad_norm": 0.600640594959259, + "learning_rate": 1.4621364452346168e-06, + "loss": 0.0004, + "num_input_tokens_seen": 56894888, + "step": 84400 + }, + { + "epoch": 2.0620281924119905, + "grad_norm": 412.3470153808594, + "learning_rate": 1.4620608186258617e-06, + "loss": 0.1089, + "num_input_tokens_seen": 56898216, + "step": 84405 + }, + { + "epoch": 2.0621503432438373, + "grad_norm": 0.09077963978052139, + "learning_rate": 1.4619851886568764e-06, + "loss": 0.0679, + "num_input_tokens_seen": 56901672, + "step": 84410 + }, + { + "epoch": 2.0622724940756845, + "grad_norm": 11.91611385345459, + "learning_rate": 1.4619095553282104e-06, + "loss": 0.1514, + "num_input_tokens_seen": 56905192, + "step": 84415 + }, + { + "epoch": 2.0623946449075317, + "grad_norm": 0.22250288724899292, + "learning_rate": 1.4618339186404138e-06, + "loss": 0.0006, + "num_input_tokens_seen": 56908584, + "step": 84420 + }, + { + "epoch": 2.062516795739379, + "grad_norm": 1.0068647861480713, + "learning_rate": 1.4617582785940369e-06, + "loss": 0.0012, + "num_input_tokens_seen": 56912232, + "step": 84425 + }, + { + "epoch": 2.062638946571226, + "grad_norm": 0.07770141959190369, + "learning_rate": 1.4616826351896294e-06, + "loss": 0.034, + "num_input_tokens_seen": 56915688, + "step": 84430 + }, + { + "epoch": 2.0627610974030732, + "grad_norm": 0.06902618706226349, + "learning_rate": 1.4616069884277417e-06, + "loss": 0.039, + "num_input_tokens_seen": 56919208, + "step": 84435 + }, + { + "epoch": 2.0628832482349204, + "grad_norm": 0.06890308111906052, + "learning_rate": 1.4615313383089238e-06, + "loss": 0.0011, + "num_input_tokens_seen": 56922600, + "step": 84440 + }, + { + "epoch": 2.0630053990667676, + "grad_norm": 0.021308647468686104, + "learning_rate": 1.4614556848337261e-06, + "loss": 0.0004, + "num_input_tokens_seen": 56926120, + "step": 84445 + }, + { + "epoch": 2.063127549898615, + "grad_norm": 280.44140625, + "learning_rate": 1.4613800280026983e-06, + "loss": 0.0896, + "num_input_tokens_seen": 56929448, + "step": 84450 + }, + { + "epoch": 2.063249700730462, + "grad_norm": 0.09632374346256256, + "learning_rate": 1.4613043678163908e-06, + "loss": 0.0335, + "num_input_tokens_seen": 56933032, + "step": 84455 + }, + { + "epoch": 2.063371851562309, + "grad_norm": 24.449237823486328, + "learning_rate": 1.461228704275354e-06, + "loss": 0.1152, + "num_input_tokens_seen": 56936104, + "step": 84460 + }, + { + "epoch": 2.0634940023941564, + "grad_norm": 0.04614495486021042, + "learning_rate": 1.4611530373801379e-06, + "loss": 0.0003, + "num_input_tokens_seen": 56939240, + "step": 84465 + }, + { + "epoch": 2.0636161532260036, + "grad_norm": 5.583603382110596, + "learning_rate": 1.461077367131293e-06, + "loss": 0.001, + "num_input_tokens_seen": 56942824, + "step": 84470 + }, + { + "epoch": 2.0637383040578507, + "grad_norm": 0.7003459334373474, + "learning_rate": 1.4610016935293695e-06, + "loss": 0.0352, + "num_input_tokens_seen": 56946408, + "step": 84475 + }, + { + "epoch": 2.063860454889698, + "grad_norm": 0.06893998384475708, + "learning_rate": 1.4609260165749175e-06, + "loss": 0.038, + "num_input_tokens_seen": 56949736, + "step": 84480 + }, + { + "epoch": 2.063982605721545, + "grad_norm": 0.03078891895711422, + "learning_rate": 1.4608503362684875e-06, + "loss": 0.0259, + "num_input_tokens_seen": 56952936, + "step": 84485 + }, + { + "epoch": 2.0641047565533923, + "grad_norm": 0.4498542249202728, + "learning_rate": 1.4607746526106299e-06, + "loss": 0.0005, + "num_input_tokens_seen": 56956328, + "step": 84490 + }, + { + "epoch": 2.0642269073852395, + "grad_norm": 0.4549460709095001, + "learning_rate": 1.4606989656018953e-06, + "loss": 0.0674, + "num_input_tokens_seen": 56959400, + "step": 84495 + }, + { + "epoch": 2.0643490582170863, + "grad_norm": 0.02741813100874424, + "learning_rate": 1.4606232752428338e-06, + "loss": 0.0004, + "num_input_tokens_seen": 56963048, + "step": 84500 + }, + { + "epoch": 2.0644712090489334, + "grad_norm": 0.032297734171152115, + "learning_rate": 1.460547581533996e-06, + "loss": 0.035, + "num_input_tokens_seen": 56966120, + "step": 84505 + }, + { + "epoch": 2.0645933598807806, + "grad_norm": 0.060209326446056366, + "learning_rate": 1.4604718844759325e-06, + "loss": 0.03, + "num_input_tokens_seen": 56969768, + "step": 84510 + }, + { + "epoch": 2.064715510712628, + "grad_norm": 0.008528118953108788, + "learning_rate": 1.4603961840691934e-06, + "loss": 0.0011, + "num_input_tokens_seen": 56972968, + "step": 84515 + }, + { + "epoch": 2.064837661544475, + "grad_norm": 73.88001251220703, + "learning_rate": 1.4603204803143293e-06, + "loss": 0.0861, + "num_input_tokens_seen": 56975912, + "step": 84520 + }, + { + "epoch": 2.064959812376322, + "grad_norm": 0.005481546279042959, + "learning_rate": 1.4602447732118907e-06, + "loss": 0.0002, + "num_input_tokens_seen": 56979752, + "step": 84525 + }, + { + "epoch": 2.0650819632081694, + "grad_norm": 0.0511334091424942, + "learning_rate": 1.4601690627624288e-06, + "loss": 0.0003, + "num_input_tokens_seen": 56983080, + "step": 84530 + }, + { + "epoch": 2.0652041140400166, + "grad_norm": 0.09094161540269852, + "learning_rate": 1.4600933489664934e-06, + "loss": 0.0565, + "num_input_tokens_seen": 56986600, + "step": 84535 + }, + { + "epoch": 2.0653262648718638, + "grad_norm": 0.052177079021930695, + "learning_rate": 1.4600176318246356e-06, + "loss": 0.0001, + "num_input_tokens_seen": 56989928, + "step": 84540 + }, + { + "epoch": 2.065448415703711, + "grad_norm": 54.41684341430664, + "learning_rate": 1.4599419113374057e-06, + "loss": 0.1238, + "num_input_tokens_seen": 56993384, + "step": 84545 + }, + { + "epoch": 2.065570566535558, + "grad_norm": 0.010534948669373989, + "learning_rate": 1.4598661875053545e-06, + "loss": 0.0001, + "num_input_tokens_seen": 56996840, + "step": 84550 + }, + { + "epoch": 2.0656927173674053, + "grad_norm": 99.29583740234375, + "learning_rate": 1.459790460329033e-06, + "loss": 0.0555, + "num_input_tokens_seen": 57000104, + "step": 84555 + }, + { + "epoch": 2.0658148681992525, + "grad_norm": 1.3880772590637207, + "learning_rate": 1.4597147298089914e-06, + "loss": 0.0003, + "num_input_tokens_seen": 57003560, + "step": 84560 + }, + { + "epoch": 2.0659370190310997, + "grad_norm": 0.003567177802324295, + "learning_rate": 1.4596389959457803e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57007080, + "step": 84565 + }, + { + "epoch": 2.066059169862947, + "grad_norm": 0.10701990872621536, + "learning_rate": 1.4595632587399513e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57010472, + "step": 84570 + }, + { + "epoch": 2.066181320694794, + "grad_norm": 0.3246977925300598, + "learning_rate": 1.4594875181920546e-06, + "loss": 0.0467, + "num_input_tokens_seen": 57013864, + "step": 84575 + }, + { + "epoch": 2.0663034715266413, + "grad_norm": 0.0017067211447283626, + "learning_rate": 1.4594117743026407e-06, + "loss": 0.0653, + "num_input_tokens_seen": 57017320, + "step": 84580 + }, + { + "epoch": 2.066425622358488, + "grad_norm": 37.886878967285156, + "learning_rate": 1.459336027072261e-06, + "loss": 0.0534, + "num_input_tokens_seen": 57021096, + "step": 84585 + }, + { + "epoch": 2.0665477731903352, + "grad_norm": 0.05324367433786392, + "learning_rate": 1.459260276501466e-06, + "loss": 0.0085, + "num_input_tokens_seen": 57024744, + "step": 84590 + }, + { + "epoch": 2.0666699240221824, + "grad_norm": 0.031217625364661217, + "learning_rate": 1.4591845225908073e-06, + "loss": 0.0676, + "num_input_tokens_seen": 57028008, + "step": 84595 + }, + { + "epoch": 2.0667920748540296, + "grad_norm": 0.09894023090600967, + "learning_rate": 1.4591087653408347e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57030952, + "step": 84600 + }, + { + "epoch": 2.066914225685877, + "grad_norm": 39.6179313659668, + "learning_rate": 1.4590330047521e-06, + "loss": 0.0689, + "num_input_tokens_seen": 57034216, + "step": 84605 + }, + { + "epoch": 2.067036376517724, + "grad_norm": 0.9363937973976135, + "learning_rate": 1.458957240825154e-06, + "loss": 0.0004, + "num_input_tokens_seen": 57037672, + "step": 84610 + }, + { + "epoch": 2.067158527349571, + "grad_norm": 0.242109477519989, + "learning_rate": 1.458881473560547e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57040936, + "step": 84615 + }, + { + "epoch": 2.0672806781814184, + "grad_norm": 0.004208797123283148, + "learning_rate": 1.4588057029588308e-06, + "loss": 0.0725, + "num_input_tokens_seen": 57044264, + "step": 84620 + }, + { + "epoch": 2.0674028290132656, + "grad_norm": 33.177547454833984, + "learning_rate": 1.458729929020556e-06, + "loss": 0.0572, + "num_input_tokens_seen": 57047976, + "step": 84625 + }, + { + "epoch": 2.0675249798451127, + "grad_norm": 0.010071083903312683, + "learning_rate": 1.4586541517462739e-06, + "loss": 0.0851, + "num_input_tokens_seen": 57051752, + "step": 84630 + }, + { + "epoch": 2.06764713067696, + "grad_norm": 0.19519588351249695, + "learning_rate": 1.4585783711365355e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57055208, + "step": 84635 + }, + { + "epoch": 2.067769281508807, + "grad_norm": 0.014187058433890343, + "learning_rate": 1.4585025871918913e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57058792, + "step": 84640 + }, + { + "epoch": 2.0678914323406543, + "grad_norm": 0.22282736003398895, + "learning_rate": 1.4584267999128934e-06, + "loss": 0.0003, + "num_input_tokens_seen": 57062760, + "step": 84645 + }, + { + "epoch": 2.0680135831725015, + "grad_norm": 14.674356460571289, + "learning_rate": 1.4583510093000923e-06, + "loss": 0.0536, + "num_input_tokens_seen": 57066216, + "step": 84650 + }, + { + "epoch": 2.0681357340043487, + "grad_norm": 0.17305071651935577, + "learning_rate": 1.4582752153540397e-06, + "loss": 0.0005, + "num_input_tokens_seen": 57069416, + "step": 84655 + }, + { + "epoch": 2.068257884836196, + "grad_norm": 0.8630813360214233, + "learning_rate": 1.4581994180752863e-06, + "loss": 0.0008, + "num_input_tokens_seen": 57073192, + "step": 84660 + }, + { + "epoch": 2.068380035668043, + "grad_norm": 0.07105296105146408, + "learning_rate": 1.4581236174643836e-06, + "loss": 0.0502, + "num_input_tokens_seen": 57076456, + "step": 84665 + }, + { + "epoch": 2.0685021864998903, + "grad_norm": 0.009249145165085793, + "learning_rate": 1.4580478135218828e-06, + "loss": 0.0004, + "num_input_tokens_seen": 57080104, + "step": 84670 + }, + { + "epoch": 2.0686243373317375, + "grad_norm": 14.103489875793457, + "learning_rate": 1.4579720062483348e-06, + "loss": 0.0383, + "num_input_tokens_seen": 57083176, + "step": 84675 + }, + { + "epoch": 2.068746488163584, + "grad_norm": 0.008026017807424068, + "learning_rate": 1.4578961956442913e-06, + "loss": 0.0003, + "num_input_tokens_seen": 57086440, + "step": 84680 + }, + { + "epoch": 2.0688686389954314, + "grad_norm": 0.046018462628126144, + "learning_rate": 1.4578203817103036e-06, + "loss": 0.0003, + "num_input_tokens_seen": 57089512, + "step": 84685 + }, + { + "epoch": 2.0689907898272786, + "grad_norm": 0.26206257939338684, + "learning_rate": 1.4577445644469229e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57092584, + "step": 84690 + }, + { + "epoch": 2.0691129406591258, + "grad_norm": 0.18885137140750885, + "learning_rate": 1.4576687438547003e-06, + "loss": 0.064, + "num_input_tokens_seen": 57095656, + "step": 84695 + }, + { + "epoch": 2.069235091490973, + "grad_norm": 0.15367726981639862, + "learning_rate": 1.457592919934188e-06, + "loss": 0.0564, + "num_input_tokens_seen": 57099560, + "step": 84700 + }, + { + "epoch": 2.06935724232282, + "grad_norm": 0.02957310900092125, + "learning_rate": 1.4575170926859368e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57102952, + "step": 84705 + }, + { + "epoch": 2.0694793931546673, + "grad_norm": 0.05982162058353424, + "learning_rate": 1.4574412621104982e-06, + "loss": 0.0062, + "num_input_tokens_seen": 57106088, + "step": 84710 + }, + { + "epoch": 2.0696015439865145, + "grad_norm": 0.007052605506032705, + "learning_rate": 1.4573654282084236e-06, + "loss": 0.0901, + "num_input_tokens_seen": 57109352, + "step": 84715 + }, + { + "epoch": 2.0697236948183617, + "grad_norm": 0.0015864939196035266, + "learning_rate": 1.4572895909802644e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57112616, + "step": 84720 + }, + { + "epoch": 2.069845845650209, + "grad_norm": 0.12181193381547928, + "learning_rate": 1.4572137504265727e-06, + "loss": 0.0393, + "num_input_tokens_seen": 57116072, + "step": 84725 + }, + { + "epoch": 2.069967996482056, + "grad_norm": 0.0038857213221490383, + "learning_rate": 1.4571379065478995e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57119208, + "step": 84730 + }, + { + "epoch": 2.0700901473139033, + "grad_norm": 0.016026942059397697, + "learning_rate": 1.4570620593447967e-06, + "loss": 0.0017, + "num_input_tokens_seen": 57122344, + "step": 84735 + }, + { + "epoch": 2.0702122981457505, + "grad_norm": 0.017551347613334656, + "learning_rate": 1.4569862088178151e-06, + "loss": 0.0432, + "num_input_tokens_seen": 57125480, + "step": 84740 + }, + { + "epoch": 2.0703344489775977, + "grad_norm": 21.246379852294922, + "learning_rate": 1.4569103549675073e-06, + "loss": 0.0536, + "num_input_tokens_seen": 57128936, + "step": 84745 + }, + { + "epoch": 2.070456599809445, + "grad_norm": 0.07237745076417923, + "learning_rate": 1.4568344977944242e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57132328, + "step": 84750 + }, + { + "epoch": 2.070578750641292, + "grad_norm": 0.014093225821852684, + "learning_rate": 1.456758637299118e-06, + "loss": 0.0, + "num_input_tokens_seen": 57135592, + "step": 84755 + }, + { + "epoch": 2.0707009014731392, + "grad_norm": 0.0032465895637869835, + "learning_rate": 1.4566827734821403e-06, + "loss": 0.0598, + "num_input_tokens_seen": 57138728, + "step": 84760 + }, + { + "epoch": 2.070823052304986, + "grad_norm": 75.4856185913086, + "learning_rate": 1.4566069063440424e-06, + "loss": 0.0354, + "num_input_tokens_seen": 57141608, + "step": 84765 + }, + { + "epoch": 2.070945203136833, + "grad_norm": 0.03004450909793377, + "learning_rate": 1.4565310358853762e-06, + "loss": 0.0017, + "num_input_tokens_seen": 57144744, + "step": 84770 + }, + { + "epoch": 2.0710673539686804, + "grad_norm": 93.50947570800781, + "learning_rate": 1.4564551621066937e-06, + "loss": 0.0326, + "num_input_tokens_seen": 57148136, + "step": 84775 + }, + { + "epoch": 2.0711895048005275, + "grad_norm": 0.0619245208799839, + "learning_rate": 1.4563792850085464e-06, + "loss": 0.0411, + "num_input_tokens_seen": 57151400, + "step": 84780 + }, + { + "epoch": 2.0713116556323747, + "grad_norm": 0.23388482630252838, + "learning_rate": 1.456303404591486e-06, + "loss": 0.12, + "num_input_tokens_seen": 57154920, + "step": 84785 + }, + { + "epoch": 2.071433806464222, + "grad_norm": 0.08751174062490463, + "learning_rate": 1.456227520856065e-06, + "loss": 0.0385, + "num_input_tokens_seen": 57158120, + "step": 84790 + }, + { + "epoch": 2.071555957296069, + "grad_norm": 0.019446225836873055, + "learning_rate": 1.4561516338028343e-06, + "loss": 0.0526, + "num_input_tokens_seen": 57160936, + "step": 84795 + }, + { + "epoch": 2.0716781081279163, + "grad_norm": 0.013060618191957474, + "learning_rate": 1.4560757434323463e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57164776, + "step": 84800 + }, + { + "epoch": 2.0718002589597635, + "grad_norm": 0.009493088349699974, + "learning_rate": 1.455999849745153e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57168872, + "step": 84805 + }, + { + "epoch": 2.0719224097916107, + "grad_norm": 0.033220697194337845, + "learning_rate": 1.4559239527418062e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57172136, + "step": 84810 + }, + { + "epoch": 2.072044560623458, + "grad_norm": 0.3769541382789612, + "learning_rate": 1.4558480524228576e-06, + "loss": 0.0007, + "num_input_tokens_seen": 57176168, + "step": 84815 + }, + { + "epoch": 2.072166711455305, + "grad_norm": 0.17406384646892548, + "learning_rate": 1.4557721487888594e-06, + "loss": 0.0006, + "num_input_tokens_seen": 57179816, + "step": 84820 + }, + { + "epoch": 2.0722888622871523, + "grad_norm": 0.07895692437887192, + "learning_rate": 1.4556962418403637e-06, + "loss": 0.0003, + "num_input_tokens_seen": 57183080, + "step": 84825 + }, + { + "epoch": 2.0724110131189994, + "grad_norm": 0.050496265292167664, + "learning_rate": 1.4556203315779222e-06, + "loss": 0.1638, + "num_input_tokens_seen": 57186344, + "step": 84830 + }, + { + "epoch": 2.0725331639508466, + "grad_norm": 0.20722278952598572, + "learning_rate": 1.4555444180020867e-06, + "loss": 0.0708, + "num_input_tokens_seen": 57189480, + "step": 84835 + }, + { + "epoch": 2.072655314782694, + "grad_norm": 0.4477522671222687, + "learning_rate": 1.4554685011134102e-06, + "loss": 0.0537, + "num_input_tokens_seen": 57192360, + "step": 84840 + }, + { + "epoch": 2.072777465614541, + "grad_norm": 16.252840042114258, + "learning_rate": 1.4553925809124443e-06, + "loss": 0.0546, + "num_input_tokens_seen": 57195880, + "step": 84845 + }, + { + "epoch": 2.072899616446388, + "grad_norm": 0.4574112892150879, + "learning_rate": 1.455316657399741e-06, + "loss": 0.0422, + "num_input_tokens_seen": 57199272, + "step": 84850 + }, + { + "epoch": 2.073021767278235, + "grad_norm": 0.017640331760048866, + "learning_rate": 1.4552407305758524e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57202728, + "step": 84855 + }, + { + "epoch": 2.073143918110082, + "grad_norm": 0.012551628053188324, + "learning_rate": 1.4551648004413307e-06, + "loss": 0.0311, + "num_input_tokens_seen": 57206120, + "step": 84860 + }, + { + "epoch": 2.0732660689419293, + "grad_norm": 14.287816047668457, + "learning_rate": 1.4550888669967281e-06, + "loss": 0.0477, + "num_input_tokens_seen": 57209576, + "step": 84865 + }, + { + "epoch": 2.0733882197737765, + "grad_norm": 1.5717146396636963, + "learning_rate": 1.4550129302425972e-06, + "loss": 0.129, + "num_input_tokens_seen": 57212776, + "step": 84870 + }, + { + "epoch": 2.0735103706056237, + "grad_norm": 0.011195817030966282, + "learning_rate": 1.4549369901794894e-06, + "loss": 0.0003, + "num_input_tokens_seen": 57216104, + "step": 84875 + }, + { + "epoch": 2.073632521437471, + "grad_norm": 20.77536392211914, + "learning_rate": 1.4548610468079578e-06, + "loss": 0.1224, + "num_input_tokens_seen": 57219112, + "step": 84880 + }, + { + "epoch": 2.073754672269318, + "grad_norm": 10.758296966552734, + "learning_rate": 1.4547851001285542e-06, + "loss": 0.0338, + "num_input_tokens_seen": 57222440, + "step": 84885 + }, + { + "epoch": 2.0738768231011653, + "grad_norm": 1.7609142065048218, + "learning_rate": 1.4547091501418312e-06, + "loss": 0.0008, + "num_input_tokens_seen": 57225960, + "step": 84890 + }, + { + "epoch": 2.0739989739330125, + "grad_norm": 0.15660512447357178, + "learning_rate": 1.4546331968483405e-06, + "loss": 0.0004, + "num_input_tokens_seen": 57229480, + "step": 84895 + }, + { + "epoch": 2.0741211247648597, + "grad_norm": 0.09017514437437057, + "learning_rate": 1.4545572402486352e-06, + "loss": 0.0561, + "num_input_tokens_seen": 57232680, + "step": 84900 + }, + { + "epoch": 2.074243275596707, + "grad_norm": 0.6560986638069153, + "learning_rate": 1.4544812803432676e-06, + "loss": 0.0009, + "num_input_tokens_seen": 57236200, + "step": 84905 + }, + { + "epoch": 2.074365426428554, + "grad_norm": 0.05695900693535805, + "learning_rate": 1.4544053171327897e-06, + "loss": 0.0004, + "num_input_tokens_seen": 57239144, + "step": 84910 + }, + { + "epoch": 2.0744875772604012, + "grad_norm": 0.09295591711997986, + "learning_rate": 1.4543293506177538e-06, + "loss": 0.0445, + "num_input_tokens_seen": 57242216, + "step": 84915 + }, + { + "epoch": 2.0746097280922484, + "grad_norm": 0.1317606270313263, + "learning_rate": 1.4542533807987132e-06, + "loss": 0.0581, + "num_input_tokens_seen": 57245800, + "step": 84920 + }, + { + "epoch": 2.0747318789240956, + "grad_norm": 32.33819580078125, + "learning_rate": 1.4541774076762197e-06, + "loss": 0.0395, + "num_input_tokens_seen": 57249064, + "step": 84925 + }, + { + "epoch": 2.074854029755943, + "grad_norm": 0.05708523467183113, + "learning_rate": 1.4541014312508257e-06, + "loss": 0.0599, + "num_input_tokens_seen": 57252200, + "step": 84930 + }, + { + "epoch": 2.07497618058779, + "grad_norm": 0.10730971395969391, + "learning_rate": 1.454025451523084e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57255272, + "step": 84935 + }, + { + "epoch": 2.075098331419637, + "grad_norm": 28.495298385620117, + "learning_rate": 1.4539494684935473e-06, + "loss": 0.0823, + "num_input_tokens_seen": 57259048, + "step": 84940 + }, + { + "epoch": 2.075220482251484, + "grad_norm": 0.28393909335136414, + "learning_rate": 1.4538734821627679e-06, + "loss": 0.0443, + "num_input_tokens_seen": 57262568, + "step": 84945 + }, + { + "epoch": 2.075342633083331, + "grad_norm": 0.0327572263777256, + "learning_rate": 1.4537974925312986e-06, + "loss": 0.0003, + "num_input_tokens_seen": 57265640, + "step": 84950 + }, + { + "epoch": 2.0754647839151783, + "grad_norm": 0.19717465341091156, + "learning_rate": 1.4537214995996914e-06, + "loss": 0.1051, + "num_input_tokens_seen": 57269352, + "step": 84955 + }, + { + "epoch": 2.0755869347470255, + "grad_norm": 0.15424473583698273, + "learning_rate": 1.4536455033684995e-06, + "loss": 0.0354, + "num_input_tokens_seen": 57272488, + "step": 84960 + }, + { + "epoch": 2.0757090855788727, + "grad_norm": 0.6080400943756104, + "learning_rate": 1.4535695038382759e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57276520, + "step": 84965 + }, + { + "epoch": 2.07583123641072, + "grad_norm": 0.002733609639108181, + "learning_rate": 1.4534935010095727e-06, + "loss": 0.0572, + "num_input_tokens_seen": 57280104, + "step": 84970 + }, + { + "epoch": 2.075953387242567, + "grad_norm": 0.15572920441627502, + "learning_rate": 1.453417494882943e-06, + "loss": 0.0803, + "num_input_tokens_seen": 57283368, + "step": 84975 + }, + { + "epoch": 2.0760755380744143, + "grad_norm": 0.08092348277568817, + "learning_rate": 1.453341485458939e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57286312, + "step": 84980 + }, + { + "epoch": 2.0761976889062614, + "grad_norm": 1.1062157154083252, + "learning_rate": 1.4532654727381139e-06, + "loss": 0.1466, + "num_input_tokens_seen": 57289256, + "step": 84985 + }, + { + "epoch": 2.0763198397381086, + "grad_norm": 1.6455450057983398, + "learning_rate": 1.45318945672102e-06, + "loss": 0.0008, + "num_input_tokens_seen": 57292584, + "step": 84990 + }, + { + "epoch": 2.076441990569956, + "grad_norm": 0.004664831329137087, + "learning_rate": 1.453113437408211e-06, + "loss": 0.0323, + "num_input_tokens_seen": 57295784, + "step": 84995 + }, + { + "epoch": 2.076564141401803, + "grad_norm": 0.08192527294158936, + "learning_rate": 1.4530374148002391e-06, + "loss": 0.0315, + "num_input_tokens_seen": 57299176, + "step": 85000 + }, + { + "epoch": 2.07668629223365, + "grad_norm": 0.020025499165058136, + "learning_rate": 1.4529613888976572e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57302440, + "step": 85005 + }, + { + "epoch": 2.0768084430654974, + "grad_norm": 0.009146141819655895, + "learning_rate": 1.452885359701018e-06, + "loss": 0.0547, + "num_input_tokens_seen": 57305832, + "step": 85010 + }, + { + "epoch": 2.0769305938973446, + "grad_norm": 0.024533890187740326, + "learning_rate": 1.452809327210875e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57309544, + "step": 85015 + }, + { + "epoch": 2.0770527447291918, + "grad_norm": 18.826255798339844, + "learning_rate": 1.4527332914277807e-06, + "loss": 0.0516, + "num_input_tokens_seen": 57313000, + "step": 85020 + }, + { + "epoch": 2.077174895561039, + "grad_norm": 0.02399023249745369, + "learning_rate": 1.4526572523522882e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57316264, + "step": 85025 + }, + { + "epoch": 2.077297046392886, + "grad_norm": 0.017879458144307137, + "learning_rate": 1.4525812099849502e-06, + "loss": 0.0015, + "num_input_tokens_seen": 57319656, + "step": 85030 + }, + { + "epoch": 2.077419197224733, + "grad_norm": 8.3052396774292, + "learning_rate": 1.45250516432632e-06, + "loss": 0.0015, + "num_input_tokens_seen": 57322600, + "step": 85035 + }, + { + "epoch": 2.07754134805658, + "grad_norm": 0.005566952284425497, + "learning_rate": 1.4524291153769505e-06, + "loss": 0.0003, + "num_input_tokens_seen": 57325864, + "step": 85040 + }, + { + "epoch": 2.0776634988884273, + "grad_norm": 0.0613483190536499, + "learning_rate": 1.452353063137395e-06, + "loss": 0.0458, + "num_input_tokens_seen": 57328744, + "step": 85045 + }, + { + "epoch": 2.0777856497202745, + "grad_norm": 0.14459988474845886, + "learning_rate": 1.452277007608206e-06, + "loss": 0.0301, + "num_input_tokens_seen": 57332392, + "step": 85050 + }, + { + "epoch": 2.0779078005521217, + "grad_norm": 24.976425170898438, + "learning_rate": 1.452200948789937e-06, + "loss": 0.0471, + "num_input_tokens_seen": 57335784, + "step": 85055 + }, + { + "epoch": 2.078029951383969, + "grad_norm": 0.11778343468904495, + "learning_rate": 1.4521248866831415e-06, + "loss": 0.0004, + "num_input_tokens_seen": 57339048, + "step": 85060 + }, + { + "epoch": 2.078152102215816, + "grad_norm": 166.90821838378906, + "learning_rate": 1.452048821288372e-06, + "loss": 0.1021, + "num_input_tokens_seen": 57341928, + "step": 85065 + }, + { + "epoch": 2.0782742530476632, + "grad_norm": 0.06748581677675247, + "learning_rate": 1.4519727526061818e-06, + "loss": 0.0493, + "num_input_tokens_seen": 57345000, + "step": 85070 + }, + { + "epoch": 2.0783964038795104, + "grad_norm": 11.902010917663574, + "learning_rate": 1.451896680637124e-06, + "loss": 0.0419, + "num_input_tokens_seen": 57348584, + "step": 85075 + }, + { + "epoch": 2.0785185547113576, + "grad_norm": 0.03496910631656647, + "learning_rate": 1.4518206053817524e-06, + "loss": 0.038, + "num_input_tokens_seen": 57351848, + "step": 85080 + }, + { + "epoch": 2.078640705543205, + "grad_norm": 0.007627247367054224, + "learning_rate": 1.4517445268406196e-06, + "loss": 0.0302, + "num_input_tokens_seen": 57355240, + "step": 85085 + }, + { + "epoch": 2.078762856375052, + "grad_norm": 0.020205846056342125, + "learning_rate": 1.451668445014279e-06, + "loss": 0.0365, + "num_input_tokens_seen": 57358248, + "step": 85090 + }, + { + "epoch": 2.078885007206899, + "grad_norm": 0.03811624273657799, + "learning_rate": 1.4515923599032841e-06, + "loss": 0.0004, + "num_input_tokens_seen": 57361832, + "step": 85095 + }, + { + "epoch": 2.0790071580387464, + "grad_norm": 0.26009073853492737, + "learning_rate": 1.451516271508188e-06, + "loss": 0.0381, + "num_input_tokens_seen": 57365736, + "step": 85100 + }, + { + "epoch": 2.0791293088705936, + "grad_norm": 0.03631829842925072, + "learning_rate": 1.4514401798295444e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57369768, + "step": 85105 + }, + { + "epoch": 2.0792514597024407, + "grad_norm": 0.03883937746286392, + "learning_rate": 1.4513640848679063e-06, + "loss": 0.0254, + "num_input_tokens_seen": 57373288, + "step": 85110 + }, + { + "epoch": 2.079373610534288, + "grad_norm": 53.71021270751953, + "learning_rate": 1.451287986623827e-06, + "loss": 0.1445, + "num_input_tokens_seen": 57376680, + "step": 85115 + }, + { + "epoch": 2.079495761366135, + "grad_norm": 88.04527282714844, + "learning_rate": 1.45121188509786e-06, + "loss": 0.0622, + "num_input_tokens_seen": 57379752, + "step": 85120 + }, + { + "epoch": 2.079617912197982, + "grad_norm": 0.0611591562628746, + "learning_rate": 1.4511357802905591e-06, + "loss": 0.0591, + "num_input_tokens_seen": 57382952, + "step": 85125 + }, + { + "epoch": 2.079740063029829, + "grad_norm": 0.004782163072377443, + "learning_rate": 1.4510596722024775e-06, + "loss": 0.0716, + "num_input_tokens_seen": 57386280, + "step": 85130 + }, + { + "epoch": 2.0798622138616762, + "grad_norm": 0.09725327044725418, + "learning_rate": 1.4509835608341685e-06, + "loss": 0.0018, + "num_input_tokens_seen": 57389160, + "step": 85135 + }, + { + "epoch": 2.0799843646935234, + "grad_norm": 0.09552471339702606, + "learning_rate": 1.450907446186186e-06, + "loss": 0.0007, + "num_input_tokens_seen": 57392360, + "step": 85140 + }, + { + "epoch": 2.0801065155253706, + "grad_norm": 0.3419274389743805, + "learning_rate": 1.4508313282590827e-06, + "loss": 0.0005, + "num_input_tokens_seen": 57395432, + "step": 85145 + }, + { + "epoch": 2.080228666357218, + "grad_norm": 0.016205064952373505, + "learning_rate": 1.450755207053413e-06, + "loss": 0.0236, + "num_input_tokens_seen": 57398952, + "step": 85150 + }, + { + "epoch": 2.080350817189065, + "grad_norm": 0.010081671178340912, + "learning_rate": 1.45067908256973e-06, + "loss": 0.1286, + "num_input_tokens_seen": 57402344, + "step": 85155 + }, + { + "epoch": 2.080472968020912, + "grad_norm": 0.055685579776763916, + "learning_rate": 1.450602954808588e-06, + "loss": 0.0012, + "num_input_tokens_seen": 57405480, + "step": 85160 + }, + { + "epoch": 2.0805951188527594, + "grad_norm": 7.206017017364502, + "learning_rate": 1.4505268237705396e-06, + "loss": 0.1086, + "num_input_tokens_seen": 57410920, + "step": 85165 + }, + { + "epoch": 2.0807172696846066, + "grad_norm": 0.1437099277973175, + "learning_rate": 1.4504506894561394e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57414440, + "step": 85170 + }, + { + "epoch": 2.0808394205164538, + "grad_norm": 0.009064151905477047, + "learning_rate": 1.4503745518659404e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57417512, + "step": 85175 + }, + { + "epoch": 2.080961571348301, + "grad_norm": 0.015507766976952553, + "learning_rate": 1.4502984110004967e-06, + "loss": 0.0484, + "num_input_tokens_seen": 57420584, + "step": 85180 + }, + { + "epoch": 2.081083722180148, + "grad_norm": 14.597432136535645, + "learning_rate": 1.4502222668603616e-06, + "loss": 0.0516, + "num_input_tokens_seen": 57424168, + "step": 85185 + }, + { + "epoch": 2.0812058730119953, + "grad_norm": 0.00974042434245348, + "learning_rate": 1.450146119446089e-06, + "loss": 0.0012, + "num_input_tokens_seen": 57427304, + "step": 85190 + }, + { + "epoch": 2.0813280238438425, + "grad_norm": 0.024810034781694412, + "learning_rate": 1.4500699687582332e-06, + "loss": 0.0311, + "num_input_tokens_seen": 57431400, + "step": 85195 + }, + { + "epoch": 2.0814501746756897, + "grad_norm": 0.0483444482088089, + "learning_rate": 1.4499938147973472e-06, + "loss": 0.0544, + "num_input_tokens_seen": 57434728, + "step": 85200 + }, + { + "epoch": 2.081572325507537, + "grad_norm": 0.1792319118976593, + "learning_rate": 1.4499176575639851e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57438248, + "step": 85205 + }, + { + "epoch": 2.0816944763393836, + "grad_norm": 0.06439114362001419, + "learning_rate": 1.4498414970587008e-06, + "loss": 0.0341, + "num_input_tokens_seen": 57441512, + "step": 85210 + }, + { + "epoch": 2.081816627171231, + "grad_norm": 0.15774594247341156, + "learning_rate": 1.4497653332820482e-06, + "loss": 0.0003, + "num_input_tokens_seen": 57444712, + "step": 85215 + }, + { + "epoch": 2.081938778003078, + "grad_norm": 0.009701626375317574, + "learning_rate": 1.449689166234581e-06, + "loss": 0.0003, + "num_input_tokens_seen": 57447976, + "step": 85220 + }, + { + "epoch": 2.082060928834925, + "grad_norm": 0.06566569954156876, + "learning_rate": 1.4496129959168535e-06, + "loss": 0.001, + "num_input_tokens_seen": 57451432, + "step": 85225 + }, + { + "epoch": 2.0821830796667724, + "grad_norm": 0.7955803871154785, + "learning_rate": 1.4495368223294194e-06, + "loss": 0.0444, + "num_input_tokens_seen": 57455208, + "step": 85230 + }, + { + "epoch": 2.0823052304986196, + "grad_norm": 0.011005281470716, + "learning_rate": 1.4494606454728323e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57458920, + "step": 85235 + }, + { + "epoch": 2.082427381330467, + "grad_norm": 2.7995944023132324, + "learning_rate": 1.4493844653476468e-06, + "loss": 0.0336, + "num_input_tokens_seen": 57462056, + "step": 85240 + }, + { + "epoch": 2.082549532162314, + "grad_norm": 0.12895996868610382, + "learning_rate": 1.4493082819544165e-06, + "loss": 0.0319, + "num_input_tokens_seen": 57465704, + "step": 85245 + }, + { + "epoch": 2.082671682994161, + "grad_norm": 0.134056955575943, + "learning_rate": 1.4492320952936954e-06, + "loss": 0.0516, + "num_input_tokens_seen": 57468776, + "step": 85250 + }, + { + "epoch": 2.0827938338260084, + "grad_norm": 0.019701072946190834, + "learning_rate": 1.4491559053660377e-06, + "loss": 0.0449, + "num_input_tokens_seen": 57472360, + "step": 85255 + }, + { + "epoch": 2.0829159846578555, + "grad_norm": 0.005952196195721626, + "learning_rate": 1.4490797121719976e-06, + "loss": 0.0, + "num_input_tokens_seen": 57475496, + "step": 85260 + }, + { + "epoch": 2.0830381354897027, + "grad_norm": 0.029273463413119316, + "learning_rate": 1.4490035157121287e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57479080, + "step": 85265 + }, + { + "epoch": 2.08316028632155, + "grad_norm": 0.006493884138762951, + "learning_rate": 1.4489273159869858e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57482536, + "step": 85270 + }, + { + "epoch": 2.083282437153397, + "grad_norm": 0.12485459446907043, + "learning_rate": 1.4488511129971226e-06, + "loss": 0.0003, + "num_input_tokens_seen": 57486056, + "step": 85275 + }, + { + "epoch": 2.0834045879852443, + "grad_norm": 0.026284409686923027, + "learning_rate": 1.4487749067430931e-06, + "loss": 0.062, + "num_input_tokens_seen": 57489448, + "step": 85280 + }, + { + "epoch": 2.0835267388170915, + "grad_norm": 0.015463829971849918, + "learning_rate": 1.4486986972254525e-06, + "loss": 0.0004, + "num_input_tokens_seen": 57492520, + "step": 85285 + }, + { + "epoch": 2.0836488896489387, + "grad_norm": 0.1737455427646637, + "learning_rate": 1.448622484444754e-06, + "loss": 0.0004, + "num_input_tokens_seen": 57495656, + "step": 85290 + }, + { + "epoch": 2.083771040480786, + "grad_norm": 65.79161071777344, + "learning_rate": 1.448546268401552e-06, + "loss": 0.0884, + "num_input_tokens_seen": 57499112, + "step": 85295 + }, + { + "epoch": 2.083893191312633, + "grad_norm": 33.39630889892578, + "learning_rate": 1.4484700490964007e-06, + "loss": 0.1168, + "num_input_tokens_seen": 57502568, + "step": 85300 + }, + { + "epoch": 2.08401534214448, + "grad_norm": 0.05079827457666397, + "learning_rate": 1.4483938265298545e-06, + "loss": 0.0005, + "num_input_tokens_seen": 57506216, + "step": 85305 + }, + { + "epoch": 2.084137492976327, + "grad_norm": 0.005445330403745174, + "learning_rate": 1.448317600702468e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57509992, + "step": 85310 + }, + { + "epoch": 2.084259643808174, + "grad_norm": 0.022690260782837868, + "learning_rate": 1.4482413716147954e-06, + "loss": 0.0003, + "num_input_tokens_seen": 57513256, + "step": 85315 + }, + { + "epoch": 2.0843817946400214, + "grad_norm": 0.0060226828791201115, + "learning_rate": 1.448165139267391e-06, + "loss": 0.1029, + "num_input_tokens_seen": 57516456, + "step": 85320 + }, + { + "epoch": 2.0845039454718686, + "grad_norm": 0.20211663842201233, + "learning_rate": 1.448088903660809e-06, + "loss": 0.0475, + "num_input_tokens_seen": 57520424, + "step": 85325 + }, + { + "epoch": 2.0846260963037158, + "grad_norm": 42.50727081298828, + "learning_rate": 1.4480126647956044e-06, + "loss": 0.0373, + "num_input_tokens_seen": 57523816, + "step": 85330 + }, + { + "epoch": 2.084748247135563, + "grad_norm": 0.020627789199352264, + "learning_rate": 1.4479364226723308e-06, + "loss": 0.0637, + "num_input_tokens_seen": 57527080, + "step": 85335 + }, + { + "epoch": 2.08487039796741, + "grad_norm": 0.10394884645938873, + "learning_rate": 1.447860177291543e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57529960, + "step": 85340 + }, + { + "epoch": 2.0849925487992573, + "grad_norm": 33.26313400268555, + "learning_rate": 1.4477839286537958e-06, + "loss": 0.1198, + "num_input_tokens_seen": 57532904, + "step": 85345 + }, + { + "epoch": 2.0851146996311045, + "grad_norm": 152.42120361328125, + "learning_rate": 1.4477076767596433e-06, + "loss": 0.0724, + "num_input_tokens_seen": 57535976, + "step": 85350 + }, + { + "epoch": 2.0852368504629517, + "grad_norm": 0.007731488440185785, + "learning_rate": 1.4476314216096402e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57539176, + "step": 85355 + }, + { + "epoch": 2.085359001294799, + "grad_norm": 0.00451649259775877, + "learning_rate": 1.4475551632043408e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57542504, + "step": 85360 + }, + { + "epoch": 2.085481152126646, + "grad_norm": 0.0020277267321944237, + "learning_rate": 1.4474789015443001e-06, + "loss": 0.0807, + "num_input_tokens_seen": 57546088, + "step": 85365 + }, + { + "epoch": 2.0856033029584933, + "grad_norm": 0.08840125054121017, + "learning_rate": 1.4474026366300724e-06, + "loss": 0.0412, + "num_input_tokens_seen": 57549672, + "step": 85370 + }, + { + "epoch": 2.0857254537903405, + "grad_norm": 0.5002579092979431, + "learning_rate": 1.4473263684622124e-06, + "loss": 0.093, + "num_input_tokens_seen": 57553320, + "step": 85375 + }, + { + "epoch": 2.0858476046221877, + "grad_norm": 0.12200009077787399, + "learning_rate": 1.4472500970412747e-06, + "loss": 0.0004, + "num_input_tokens_seen": 57556520, + "step": 85380 + }, + { + "epoch": 2.085969755454035, + "grad_norm": 0.007834597490727901, + "learning_rate": 1.4471738223678141e-06, + "loss": 0.0426, + "num_input_tokens_seen": 57559976, + "step": 85385 + }, + { + "epoch": 2.0860919062858816, + "grad_norm": 0.11974035948514938, + "learning_rate": 1.4470975444423853e-06, + "loss": 0.0004, + "num_input_tokens_seen": 57563752, + "step": 85390 + }, + { + "epoch": 2.086214057117729, + "grad_norm": 16.715085983276367, + "learning_rate": 1.4470212632655425e-06, + "loss": 0.0388, + "num_input_tokens_seen": 57567080, + "step": 85395 + }, + { + "epoch": 2.086336207949576, + "grad_norm": 0.1420247107744217, + "learning_rate": 1.4469449788378411e-06, + "loss": 0.0465, + "num_input_tokens_seen": 57570600, + "step": 85400 + }, + { + "epoch": 2.086458358781423, + "grad_norm": 0.23512078821659088, + "learning_rate": 1.4468686911598356e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57573672, + "step": 85405 + }, + { + "epoch": 2.0865805096132704, + "grad_norm": 95.10938262939453, + "learning_rate": 1.4467924002320807e-06, + "loss": 0.0926, + "num_input_tokens_seen": 57576872, + "step": 85410 + }, + { + "epoch": 2.0867026604451175, + "grad_norm": 65.81708526611328, + "learning_rate": 1.4467161060551313e-06, + "loss": 0.0384, + "num_input_tokens_seen": 57580008, + "step": 85415 + }, + { + "epoch": 2.0868248112769647, + "grad_norm": 0.5347601771354675, + "learning_rate": 1.4466398086295422e-06, + "loss": 0.0281, + "num_input_tokens_seen": 57583400, + "step": 85420 + }, + { + "epoch": 2.086946962108812, + "grad_norm": 0.03770304098725319, + "learning_rate": 1.4465635079558683e-06, + "loss": 0.0689, + "num_input_tokens_seen": 57586728, + "step": 85425 + }, + { + "epoch": 2.087069112940659, + "grad_norm": 0.03212061896920204, + "learning_rate": 1.4464872040346646e-06, + "loss": 0.0012, + "num_input_tokens_seen": 57590120, + "step": 85430 + }, + { + "epoch": 2.0871912637725063, + "grad_norm": 0.1276782602071762, + "learning_rate": 1.4464108968664857e-06, + "loss": 0.0006, + "num_input_tokens_seen": 57593896, + "step": 85435 + }, + { + "epoch": 2.0873134146043535, + "grad_norm": 0.007270419038832188, + "learning_rate": 1.4463345864518867e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57597160, + "step": 85440 + }, + { + "epoch": 2.0874355654362007, + "grad_norm": 0.06667440384626389, + "learning_rate": 1.4462582727914228e-06, + "loss": 0.048, + "num_input_tokens_seen": 57600360, + "step": 85445 + }, + { + "epoch": 2.087557716268048, + "grad_norm": 0.18071290850639343, + "learning_rate": 1.4461819558856484e-06, + "loss": 0.038, + "num_input_tokens_seen": 57603560, + "step": 85450 + }, + { + "epoch": 2.087679867099895, + "grad_norm": 0.4118402600288391, + "learning_rate": 1.446105635735119e-06, + "loss": 0.0011, + "num_input_tokens_seen": 57607016, + "step": 85455 + }, + { + "epoch": 2.0878020179317422, + "grad_norm": 0.07384537905454636, + "learning_rate": 1.4460293123403893e-06, + "loss": 0.0248, + "num_input_tokens_seen": 57610344, + "step": 85460 + }, + { + "epoch": 2.0879241687635894, + "grad_norm": 0.04173500835895538, + "learning_rate": 1.4459529857020144e-06, + "loss": 0.0407, + "num_input_tokens_seen": 57613992, + "step": 85465 + }, + { + "epoch": 2.0880463195954366, + "grad_norm": 0.006109563168138266, + "learning_rate": 1.4458766558205495e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57617320, + "step": 85470 + }, + { + "epoch": 2.088168470427284, + "grad_norm": 0.09735125303268433, + "learning_rate": 1.4458003226965496e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57620776, + "step": 85475 + }, + { + "epoch": 2.0882906212591306, + "grad_norm": 0.016522737219929695, + "learning_rate": 1.4457239863305702e-06, + "loss": 0.0502, + "num_input_tokens_seen": 57624296, + "step": 85480 + }, + { + "epoch": 2.0884127720909778, + "grad_norm": 0.02506168559193611, + "learning_rate": 1.4456476467231658e-06, + "loss": 0.0003, + "num_input_tokens_seen": 57628072, + "step": 85485 + }, + { + "epoch": 2.088534922922825, + "grad_norm": 0.011805309914052486, + "learning_rate": 1.4455713038748918e-06, + "loss": 0.0523, + "num_input_tokens_seen": 57631208, + "step": 85490 + }, + { + "epoch": 2.088657073754672, + "grad_norm": 0.010718842037022114, + "learning_rate": 1.4454949577863036e-06, + "loss": 0.0003, + "num_input_tokens_seen": 57634344, + "step": 85495 + }, + { + "epoch": 2.0887792245865193, + "grad_norm": 133.2101593017578, + "learning_rate": 1.4454186084579561e-06, + "loss": 0.002, + "num_input_tokens_seen": 57637992, + "step": 85500 + }, + { + "epoch": 2.0889013754183665, + "grad_norm": 16.67730712890625, + "learning_rate": 1.4453422558904047e-06, + "loss": 0.0525, + "num_input_tokens_seen": 57641128, + "step": 85505 + }, + { + "epoch": 2.0890235262502137, + "grad_norm": 0.01813196763396263, + "learning_rate": 1.4452659000842047e-06, + "loss": 0.0004, + "num_input_tokens_seen": 57644520, + "step": 85510 + }, + { + "epoch": 2.089145677082061, + "grad_norm": 8.29789924621582, + "learning_rate": 1.4451895410399111e-06, + "loss": 0.0081, + "num_input_tokens_seen": 57647400, + "step": 85515 + }, + { + "epoch": 2.089267827913908, + "grad_norm": 0.0024462079163640738, + "learning_rate": 1.4451131787580795e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57650728, + "step": 85520 + }, + { + "epoch": 2.0893899787457553, + "grad_norm": 0.07815330475568771, + "learning_rate": 1.4450368132392652e-06, + "loss": 0.0126, + "num_input_tokens_seen": 57654120, + "step": 85525 + }, + { + "epoch": 2.0895121295776025, + "grad_norm": 0.14444968104362488, + "learning_rate": 1.4449604444840236e-06, + "loss": 0.0686, + "num_input_tokens_seen": 57657896, + "step": 85530 + }, + { + "epoch": 2.0896342804094497, + "grad_norm": 0.08869968354701996, + "learning_rate": 1.4448840724929098e-06, + "loss": 0.0994, + "num_input_tokens_seen": 57661224, + "step": 85535 + }, + { + "epoch": 2.089756431241297, + "grad_norm": 0.0426652766764164, + "learning_rate": 1.4448076972664795e-06, + "loss": 0.0443, + "num_input_tokens_seen": 57664296, + "step": 85540 + }, + { + "epoch": 2.089878582073144, + "grad_norm": 0.017624255269765854, + "learning_rate": 1.4447313188052878e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57667816, + "step": 85545 + }, + { + "epoch": 2.090000732904991, + "grad_norm": 0.04679286852478981, + "learning_rate": 1.4446549371098907e-06, + "loss": 0.0417, + "num_input_tokens_seen": 57670824, + "step": 85550 + }, + { + "epoch": 2.0901228837368384, + "grad_norm": 603.0365600585938, + "learning_rate": 1.4445785521808428e-06, + "loss": 0.1099, + "num_input_tokens_seen": 57674920, + "step": 85555 + }, + { + "epoch": 2.0902450345686856, + "grad_norm": 10.674302101135254, + "learning_rate": 1.4445021640187005e-06, + "loss": 0.0588, + "num_input_tokens_seen": 57678184, + "step": 85560 + }, + { + "epoch": 2.090367185400533, + "grad_norm": 0.0036526755429804325, + "learning_rate": 1.4444257726240187e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57681256, + "step": 85565 + }, + { + "epoch": 2.0904893362323795, + "grad_norm": 187.0728302001953, + "learning_rate": 1.4443493779973533e-06, + "loss": 0.0317, + "num_input_tokens_seen": 57684456, + "step": 85570 + }, + { + "epoch": 2.0906114870642267, + "grad_norm": 0.030918600037693977, + "learning_rate": 1.4442729801392597e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57687912, + "step": 85575 + }, + { + "epoch": 2.090733637896074, + "grad_norm": 0.4674592912197113, + "learning_rate": 1.4441965790502933e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57691240, + "step": 85580 + }, + { + "epoch": 2.090855788727921, + "grad_norm": 0.009129178710281849, + "learning_rate": 1.44412017473101e-06, + "loss": 0.0367, + "num_input_tokens_seen": 57694632, + "step": 85585 + }, + { + "epoch": 2.0909779395597683, + "grad_norm": 0.17762425541877747, + "learning_rate": 1.4440437671819652e-06, + "loss": 0.0003, + "num_input_tokens_seen": 57697960, + "step": 85590 + }, + { + "epoch": 2.0911000903916155, + "grad_norm": 0.07223685830831528, + "learning_rate": 1.4439673564037152e-06, + "loss": 0.0392, + "num_input_tokens_seen": 57701992, + "step": 85595 + }, + { + "epoch": 2.0912222412234627, + "grad_norm": 0.04923141375184059, + "learning_rate": 1.4438909423968148e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57705320, + "step": 85600 + }, + { + "epoch": 2.09134439205531, + "grad_norm": 0.010022669099271297, + "learning_rate": 1.4438145251618198e-06, + "loss": 0.0004, + "num_input_tokens_seen": 57708328, + "step": 85605 + }, + { + "epoch": 2.091466542887157, + "grad_norm": 0.10577555745840073, + "learning_rate": 1.4437381046992865e-06, + "loss": 0.0731, + "num_input_tokens_seen": 57711528, + "step": 85610 + }, + { + "epoch": 2.0915886937190042, + "grad_norm": 0.014822940342128277, + "learning_rate": 1.4436616810097704e-06, + "loss": 0.0679, + "num_input_tokens_seen": 57714856, + "step": 85615 + }, + { + "epoch": 2.0917108445508514, + "grad_norm": 14.269906997680664, + "learning_rate": 1.4435852540938272e-06, + "loss": 0.053, + "num_input_tokens_seen": 57717992, + "step": 85620 + }, + { + "epoch": 2.0918329953826986, + "grad_norm": 0.09064082056283951, + "learning_rate": 1.4435088239520125e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57721896, + "step": 85625 + }, + { + "epoch": 2.091955146214546, + "grad_norm": 0.03526727482676506, + "learning_rate": 1.4434323905848826e-06, + "loss": 0.0005, + "num_input_tokens_seen": 57725352, + "step": 85630 + }, + { + "epoch": 2.092077297046393, + "grad_norm": 0.02126150205731392, + "learning_rate": 1.443355953992993e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57728744, + "step": 85635 + }, + { + "epoch": 2.09219944787824, + "grad_norm": 0.044596266001462936, + "learning_rate": 1.4432795141768999e-06, + "loss": 0.0466, + "num_input_tokens_seen": 57731624, + "step": 85640 + }, + { + "epoch": 2.0923215987100874, + "grad_norm": 47.65426254272461, + "learning_rate": 1.4432030711371586e-06, + "loss": 0.0797, + "num_input_tokens_seen": 57734760, + "step": 85645 + }, + { + "epoch": 2.0924437495419346, + "grad_norm": 0.016679583117365837, + "learning_rate": 1.4431266248743254e-06, + "loss": 0.0006, + "num_input_tokens_seen": 57737832, + "step": 85650 + }, + { + "epoch": 2.0925659003737813, + "grad_norm": 0.01737331971526146, + "learning_rate": 1.4430501753889563e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57741288, + "step": 85655 + }, + { + "epoch": 2.0926880512056285, + "grad_norm": 0.009950646199285984, + "learning_rate": 1.4429737226816072e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57744616, + "step": 85660 + }, + { + "epoch": 2.0928102020374757, + "grad_norm": 0.14951828122138977, + "learning_rate": 1.4428972667528338e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57748008, + "step": 85665 + }, + { + "epoch": 2.092932352869323, + "grad_norm": 0.06490232795476913, + "learning_rate": 1.4428208076031925e-06, + "loss": 0.0468, + "num_input_tokens_seen": 57750952, + "step": 85670 + }, + { + "epoch": 2.09305450370117, + "grad_norm": 0.004932792857289314, + "learning_rate": 1.4427443452332392e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57754088, + "step": 85675 + }, + { + "epoch": 2.0931766545330173, + "grad_norm": 0.020456485450267792, + "learning_rate": 1.4426678796435301e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57757480, + "step": 85680 + }, + { + "epoch": 2.0932988053648645, + "grad_norm": 0.0004195286310277879, + "learning_rate": 1.4425914108346209e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57760872, + "step": 85685 + }, + { + "epoch": 2.0934209561967116, + "grad_norm": 0.008397513069212437, + "learning_rate": 1.442514938807068e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57764584, + "step": 85690 + }, + { + "epoch": 2.093543107028559, + "grad_norm": 0.004736917093396187, + "learning_rate": 1.4424384635614274e-06, + "loss": 0.0003, + "num_input_tokens_seen": 57768104, + "step": 85695 + }, + { + "epoch": 2.093665257860406, + "grad_norm": 0.007678162772208452, + "learning_rate": 1.4423619850982554e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57771048, + "step": 85700 + }, + { + "epoch": 2.093787408692253, + "grad_norm": 0.05174483731389046, + "learning_rate": 1.442285503418108e-06, + "loss": 0.1421, + "num_input_tokens_seen": 57774248, + "step": 85705 + }, + { + "epoch": 2.0939095595241004, + "grad_norm": 0.011872244998812675, + "learning_rate": 1.4422090185215413e-06, + "loss": 0.0003, + "num_input_tokens_seen": 57777832, + "step": 85710 + }, + { + "epoch": 2.0940317103559476, + "grad_norm": 0.006190591957420111, + "learning_rate": 1.4421325304091118e-06, + "loss": 0.0822, + "num_input_tokens_seen": 57781032, + "step": 85715 + }, + { + "epoch": 2.094153861187795, + "grad_norm": 0.31522297859191895, + "learning_rate": 1.4420560390813755e-06, + "loss": 0.0003, + "num_input_tokens_seen": 57784744, + "step": 85720 + }, + { + "epoch": 2.094276012019642, + "grad_norm": 0.022517632693052292, + "learning_rate": 1.4419795445388892e-06, + "loss": 0.033, + "num_input_tokens_seen": 57788648, + "step": 85725 + }, + { + "epoch": 2.094398162851489, + "grad_norm": 0.0021469765342772007, + "learning_rate": 1.4419030467822084e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57792232, + "step": 85730 + }, + { + "epoch": 2.0945203136833364, + "grad_norm": 0.06888701021671295, + "learning_rate": 1.4418265458118897e-06, + "loss": 0.0003, + "num_input_tokens_seen": 57795624, + "step": 85735 + }, + { + "epoch": 2.0946424645151835, + "grad_norm": 0.09264671057462692, + "learning_rate": 1.4417500416284898e-06, + "loss": 0.0614, + "num_input_tokens_seen": 57798888, + "step": 85740 + }, + { + "epoch": 2.0947646153470307, + "grad_norm": 0.030658259987831116, + "learning_rate": 1.4416735342325646e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57802280, + "step": 85745 + }, + { + "epoch": 2.0948867661788775, + "grad_norm": 0.015449795871973038, + "learning_rate": 1.441597023624671e-06, + "loss": 0.0057, + "num_input_tokens_seen": 57805736, + "step": 85750 + }, + { + "epoch": 2.0950089170107247, + "grad_norm": 0.02338220365345478, + "learning_rate": 1.4415205098053647e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57809000, + "step": 85755 + }, + { + "epoch": 2.095131067842572, + "grad_norm": 0.005242054350674152, + "learning_rate": 1.4414439927752026e-06, + "loss": 0.0598, + "num_input_tokens_seen": 57812712, + "step": 85760 + }, + { + "epoch": 2.095253218674419, + "grad_norm": 643.7587890625, + "learning_rate": 1.4413674725347408e-06, + "loss": 0.0097, + "num_input_tokens_seen": 57815848, + "step": 85765 + }, + { + "epoch": 2.0953753695062662, + "grad_norm": 254.23770141601562, + "learning_rate": 1.4412909490845364e-06, + "loss": 0.0621, + "num_input_tokens_seen": 57819560, + "step": 85770 + }, + { + "epoch": 2.0954975203381134, + "grad_norm": 0.009493929333984852, + "learning_rate": 1.4412144224251454e-06, + "loss": 0.0743, + "num_input_tokens_seen": 57822824, + "step": 85775 + }, + { + "epoch": 2.0956196711699606, + "grad_norm": 0.09335071593523026, + "learning_rate": 1.4411378925571246e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57826792, + "step": 85780 + }, + { + "epoch": 2.095741822001808, + "grad_norm": 0.026658060029149055, + "learning_rate": 1.4410613594810302e-06, + "loss": 0.0003, + "num_input_tokens_seen": 57830440, + "step": 85785 + }, + { + "epoch": 2.095863972833655, + "grad_norm": 0.02904869057238102, + "learning_rate": 1.440984823197419e-06, + "loss": 0.0003, + "num_input_tokens_seen": 57833576, + "step": 85790 + }, + { + "epoch": 2.095986123665502, + "grad_norm": 0.038817401975393295, + "learning_rate": 1.4409082837068476e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57836968, + "step": 85795 + }, + { + "epoch": 2.0961082744973494, + "grad_norm": 0.008263013325631618, + "learning_rate": 1.4408317410098725e-06, + "loss": 0.0003, + "num_input_tokens_seen": 57840104, + "step": 85800 + }, + { + "epoch": 2.0962304253291966, + "grad_norm": 0.0016716079553589225, + "learning_rate": 1.4407551951070504e-06, + "loss": 0.0468, + "num_input_tokens_seen": 57843304, + "step": 85805 + }, + { + "epoch": 2.0963525761610438, + "grad_norm": 0.006488029845058918, + "learning_rate": 1.440678645998938e-06, + "loss": 0.1659, + "num_input_tokens_seen": 57846568, + "step": 85810 + }, + { + "epoch": 2.096474726992891, + "grad_norm": 0.05113215744495392, + "learning_rate": 1.4406020936860921e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57849896, + "step": 85815 + }, + { + "epoch": 2.096596877824738, + "grad_norm": 0.05706647038459778, + "learning_rate": 1.4405255381690692e-06, + "loss": 0.0798, + "num_input_tokens_seen": 57852968, + "step": 85820 + }, + { + "epoch": 2.0967190286565853, + "grad_norm": 0.06567330658435822, + "learning_rate": 1.440448979448426e-06, + "loss": 0.0424, + "num_input_tokens_seen": 57856936, + "step": 85825 + }, + { + "epoch": 2.0968411794884325, + "grad_norm": 0.03565848246216774, + "learning_rate": 1.4403724175247191e-06, + "loss": 0.0004, + "num_input_tokens_seen": 57859880, + "step": 85830 + }, + { + "epoch": 2.0969633303202793, + "grad_norm": 0.05197528749704361, + "learning_rate": 1.4402958523985061e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57863080, + "step": 85835 + }, + { + "epoch": 2.0970854811521265, + "grad_norm": 15.99670124053955, + "learning_rate": 1.440219284070343e-06, + "loss": 0.0918, + "num_input_tokens_seen": 57866280, + "step": 85840 + }, + { + "epoch": 2.0972076319839736, + "grad_norm": 0.04562178999185562, + "learning_rate": 1.4401427125407866e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57869672, + "step": 85845 + }, + { + "epoch": 2.097329782815821, + "grad_norm": 0.23651406168937683, + "learning_rate": 1.4400661378103944e-06, + "loss": 0.0704, + "num_input_tokens_seen": 57872744, + "step": 85850 + }, + { + "epoch": 2.097451933647668, + "grad_norm": 0.027074666693806648, + "learning_rate": 1.4399895598797226e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57876136, + "step": 85855 + }, + { + "epoch": 2.097574084479515, + "grad_norm": 0.04177837446331978, + "learning_rate": 1.4399129787493288e-06, + "loss": 0.0378, + "num_input_tokens_seen": 57879656, + "step": 85860 + }, + { + "epoch": 2.0976962353113624, + "grad_norm": 0.009644084610044956, + "learning_rate": 1.4398363944197688e-06, + "loss": 0.0577, + "num_input_tokens_seen": 57882984, + "step": 85865 + }, + { + "epoch": 2.0978183861432096, + "grad_norm": 0.12638725340366364, + "learning_rate": 1.439759806891601e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57886440, + "step": 85870 + }, + { + "epoch": 2.097940536975057, + "grad_norm": 0.6511943340301514, + "learning_rate": 1.4396832161653811e-06, + "loss": 0.0005, + "num_input_tokens_seen": 57889448, + "step": 85875 + }, + { + "epoch": 2.098062687806904, + "grad_norm": 0.23605570197105408, + "learning_rate": 1.4396066222416668e-06, + "loss": 0.03, + "num_input_tokens_seen": 57892520, + "step": 85880 + }, + { + "epoch": 2.098184838638751, + "grad_norm": 0.01215800829231739, + "learning_rate": 1.4395300251210147e-06, + "loss": 0.0006, + "num_input_tokens_seen": 57895528, + "step": 85885 + }, + { + "epoch": 2.0983069894705983, + "grad_norm": 0.005944849457591772, + "learning_rate": 1.439453424803982e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57898728, + "step": 85890 + }, + { + "epoch": 2.0984291403024455, + "grad_norm": 0.010880285874009132, + "learning_rate": 1.4393768212911259e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57901992, + "step": 85895 + }, + { + "epoch": 2.0985512911342927, + "grad_norm": 0.0785965770483017, + "learning_rate": 1.4393002145830035e-06, + "loss": 0.0879, + "num_input_tokens_seen": 57905704, + "step": 85900 + }, + { + "epoch": 2.09867344196614, + "grad_norm": 2.173537015914917, + "learning_rate": 1.439223604680172e-06, + "loss": 0.0004, + "num_input_tokens_seen": 57909288, + "step": 85905 + }, + { + "epoch": 2.098795592797987, + "grad_norm": 0.008875908330082893, + "learning_rate": 1.439146991583188e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57912744, + "step": 85910 + }, + { + "epoch": 2.0989177436298343, + "grad_norm": 0.05505775660276413, + "learning_rate": 1.439070375292609e-06, + "loss": 0.0537, + "num_input_tokens_seen": 57916328, + "step": 85915 + }, + { + "epoch": 2.0990398944616815, + "grad_norm": 0.03384046256542206, + "learning_rate": 1.4389937558089919e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57919720, + "step": 85920 + }, + { + "epoch": 2.0991620452935287, + "grad_norm": 0.01677374541759491, + "learning_rate": 1.4389171331328945e-06, + "loss": 0.1204, + "num_input_tokens_seen": 57923240, + "step": 85925 + }, + { + "epoch": 2.0992841961253754, + "grad_norm": 0.0018917974084615707, + "learning_rate": 1.4388405072648735e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57926440, + "step": 85930 + }, + { + "epoch": 2.0994063469572226, + "grad_norm": 0.016427690163254738, + "learning_rate": 1.4387638782054863e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57930024, + "step": 85935 + }, + { + "epoch": 2.09952849778907, + "grad_norm": 0.00689767487347126, + "learning_rate": 1.4386872459552902e-06, + "loss": 0.0444, + "num_input_tokens_seen": 57933224, + "step": 85940 + }, + { + "epoch": 2.099650648620917, + "grad_norm": 0.3828757107257843, + "learning_rate": 1.4386106105148425e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57936936, + "step": 85945 + }, + { + "epoch": 2.099772799452764, + "grad_norm": 0.01371038518846035, + "learning_rate": 1.4385339718847002e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57940136, + "step": 85950 + }, + { + "epoch": 2.0998949502846114, + "grad_norm": 51.16667556762695, + "learning_rate": 1.4384573300654213e-06, + "loss": 0.049, + "num_input_tokens_seen": 57944040, + "step": 85955 + }, + { + "epoch": 2.1000171011164586, + "grad_norm": 0.03594563156366348, + "learning_rate": 1.4383806850575627e-06, + "loss": 0.0636, + "num_input_tokens_seen": 57947496, + "step": 85960 + }, + { + "epoch": 2.1001392519483058, + "grad_norm": 0.0012629919219762087, + "learning_rate": 1.4383040368616816e-06, + "loss": 0.0, + "num_input_tokens_seen": 57951336, + "step": 85965 + }, + { + "epoch": 2.100261402780153, + "grad_norm": 0.13319532573223114, + "learning_rate": 1.4382273854783358e-06, + "loss": 0.0366, + "num_input_tokens_seen": 57954920, + "step": 85970 + }, + { + "epoch": 2.100383553612, + "grad_norm": 22.986907958984375, + "learning_rate": 1.4381507309080827e-06, + "loss": 0.1009, + "num_input_tokens_seen": 57958312, + "step": 85975 + }, + { + "epoch": 2.1005057044438473, + "grad_norm": 0.0110292574390769, + "learning_rate": 1.4380740731514793e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57961960, + "step": 85980 + }, + { + "epoch": 2.1006278552756945, + "grad_norm": 0.008005023933947086, + "learning_rate": 1.4379974122090835e-06, + "loss": 0.0796, + "num_input_tokens_seen": 57965352, + "step": 85985 + }, + { + "epoch": 2.1007500061075417, + "grad_norm": 0.05698973312973976, + "learning_rate": 1.4379207480814527e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57968488, + "step": 85990 + }, + { + "epoch": 2.100872156939389, + "grad_norm": 0.0639193058013916, + "learning_rate": 1.4378440807691447e-06, + "loss": 0.0513, + "num_input_tokens_seen": 57972136, + "step": 85995 + }, + { + "epoch": 2.100994307771236, + "grad_norm": 0.311469703912735, + "learning_rate": 1.4377674102727166e-06, + "loss": 0.0003, + "num_input_tokens_seen": 57975464, + "step": 86000 + }, + { + "epoch": 2.1011164586030833, + "grad_norm": 0.024497320875525475, + "learning_rate": 1.4376907365927262e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57979048, + "step": 86005 + }, + { + "epoch": 2.1012386094349305, + "grad_norm": 0.4448446035385132, + "learning_rate": 1.437614059729731e-06, + "loss": 0.0003, + "num_input_tokens_seen": 57982248, + "step": 86010 + }, + { + "epoch": 2.101360760266777, + "grad_norm": 0.0009100750903598964, + "learning_rate": 1.4375373796842887e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57985640, + "step": 86015 + }, + { + "epoch": 2.1014829110986244, + "grad_norm": 0.041483260691165924, + "learning_rate": 1.4374606964569569e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57988776, + "step": 86020 + }, + { + "epoch": 2.1016050619304716, + "grad_norm": 0.017451265826821327, + "learning_rate": 1.4373840100482932e-06, + "loss": 0.0001, + "num_input_tokens_seen": 57992296, + "step": 86025 + }, + { + "epoch": 2.1017272127623188, + "grad_norm": 0.030862795189023018, + "learning_rate": 1.4373073204588556e-06, + "loss": 0.0388, + "num_input_tokens_seen": 57995624, + "step": 86030 + }, + { + "epoch": 2.101849363594166, + "grad_norm": 0.0727197602391243, + "learning_rate": 1.437230627689201e-06, + "loss": 0.0002, + "num_input_tokens_seen": 57999016, + "step": 86035 + }, + { + "epoch": 2.101971514426013, + "grad_norm": 0.06713508814573288, + "learning_rate": 1.4371539317398882e-06, + "loss": 0.0003, + "num_input_tokens_seen": 58002280, + "step": 86040 + }, + { + "epoch": 2.1020936652578603, + "grad_norm": 0.006942774634808302, + "learning_rate": 1.437077232611474e-06, + "loss": 0.0003, + "num_input_tokens_seen": 58005416, + "step": 86045 + }, + { + "epoch": 2.1022158160897075, + "grad_norm": 37.54534149169922, + "learning_rate": 1.4370005303045168e-06, + "loss": 0.0501, + "num_input_tokens_seen": 58008680, + "step": 86050 + }, + { + "epoch": 2.1023379669215547, + "grad_norm": 0.006586906500160694, + "learning_rate": 1.436923824819574e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58012200, + "step": 86055 + }, + { + "epoch": 2.102460117753402, + "grad_norm": 0.03484753891825676, + "learning_rate": 1.4368471161572042e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58015784, + "step": 86060 + }, + { + "epoch": 2.102582268585249, + "grad_norm": 0.003911359701305628, + "learning_rate": 1.4367704043179643e-06, + "loss": 0.0371, + "num_input_tokens_seen": 58019048, + "step": 86065 + }, + { + "epoch": 2.1027044194170963, + "grad_norm": 0.0028033980634063482, + "learning_rate": 1.4366936893024124e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58022056, + "step": 86070 + }, + { + "epoch": 2.1028265702489435, + "grad_norm": 0.005028760991990566, + "learning_rate": 1.4366169711111068e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58026344, + "step": 86075 + }, + { + "epoch": 2.1029487210807907, + "grad_norm": 0.2529297173023224, + "learning_rate": 1.4365402497446048e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58029416, + "step": 86080 + }, + { + "epoch": 2.103070871912638, + "grad_norm": 0.010397453792393208, + "learning_rate": 1.436463525203465e-06, + "loss": 0.0372, + "num_input_tokens_seen": 58032744, + "step": 86085 + }, + { + "epoch": 2.103193022744485, + "grad_norm": 0.03357874974608421, + "learning_rate": 1.4363867974882448e-06, + "loss": 0.0073, + "num_input_tokens_seen": 58036328, + "step": 86090 + }, + { + "epoch": 2.1033151735763322, + "grad_norm": 0.31488898396492004, + "learning_rate": 1.436310066599503e-06, + "loss": 0.0002, + "num_input_tokens_seen": 58039848, + "step": 86095 + }, + { + "epoch": 2.1034373244081794, + "grad_norm": 0.003075455315411091, + "learning_rate": 1.4362333325377964e-06, + "loss": 0.0752, + "num_input_tokens_seen": 58042728, + "step": 86100 + }, + { + "epoch": 2.103559475240026, + "grad_norm": 0.002096857177093625, + "learning_rate": 1.436156595303684e-06, + "loss": 0.0003, + "num_input_tokens_seen": 58045992, + "step": 86105 + }, + { + "epoch": 2.1036816260718734, + "grad_norm": 0.09626047313213348, + "learning_rate": 1.4360798548977235e-06, + "loss": 0.0467, + "num_input_tokens_seen": 58049448, + "step": 86110 + }, + { + "epoch": 2.1038037769037206, + "grad_norm": 0.04785279184579849, + "learning_rate": 1.4360031113204729e-06, + "loss": 0.045, + "num_input_tokens_seen": 58052776, + "step": 86115 + }, + { + "epoch": 2.1039259277355677, + "grad_norm": 0.004664618521928787, + "learning_rate": 1.4359263645724905e-06, + "loss": 0.0454, + "num_input_tokens_seen": 58056040, + "step": 86120 + }, + { + "epoch": 2.104048078567415, + "grad_norm": 0.0005619783769361675, + "learning_rate": 1.4358496146543343e-06, + "loss": 0.0878, + "num_input_tokens_seen": 58059304, + "step": 86125 + }, + { + "epoch": 2.104170229399262, + "grad_norm": 0.028854873031377792, + "learning_rate": 1.4357728615665626e-06, + "loss": 0.0716, + "num_input_tokens_seen": 58062568, + "step": 86130 + }, + { + "epoch": 2.1042923802311093, + "grad_norm": 0.6892449855804443, + "learning_rate": 1.435696105309733e-06, + "loss": 0.0006, + "num_input_tokens_seen": 58066344, + "step": 86135 + }, + { + "epoch": 2.1044145310629565, + "grad_norm": 0.1289759874343872, + "learning_rate": 1.4356193458844045e-06, + "loss": 0.0009, + "num_input_tokens_seen": 58069544, + "step": 86140 + }, + { + "epoch": 2.1045366818948037, + "grad_norm": 0.12908408045768738, + "learning_rate": 1.4355425832911348e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58072936, + "step": 86145 + }, + { + "epoch": 2.104658832726651, + "grad_norm": 0.13376732170581818, + "learning_rate": 1.4354658175304824e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58077032, + "step": 86150 + }, + { + "epoch": 2.104780983558498, + "grad_norm": 0.0008134989766404033, + "learning_rate": 1.4353890486030054e-06, + "loss": 0.0608, + "num_input_tokens_seen": 58080552, + "step": 86155 + }, + { + "epoch": 2.1049031343903453, + "grad_norm": 36.05324935913086, + "learning_rate": 1.4353122765092622e-06, + "loss": 0.0652, + "num_input_tokens_seen": 58084136, + "step": 86160 + }, + { + "epoch": 2.1050252852221925, + "grad_norm": 0.035665228962898254, + "learning_rate": 1.435235501249811e-06, + "loss": 0.0525, + "num_input_tokens_seen": 58087720, + "step": 86165 + }, + { + "epoch": 2.1051474360540396, + "grad_norm": 0.06152858957648277, + "learning_rate": 1.4351587228252102e-06, + "loss": 0.0515, + "num_input_tokens_seen": 58090856, + "step": 86170 + }, + { + "epoch": 2.105269586885887, + "grad_norm": 18.646289825439453, + "learning_rate": 1.4350819412360182e-06, + "loss": 0.0427, + "num_input_tokens_seen": 58094056, + "step": 86175 + }, + { + "epoch": 2.105391737717734, + "grad_norm": 17.207197189331055, + "learning_rate": 1.4350051564827932e-06, + "loss": 0.0004, + "num_input_tokens_seen": 58097832, + "step": 86180 + }, + { + "epoch": 2.105513888549581, + "grad_norm": 0.006788597907871008, + "learning_rate": 1.4349283685660935e-06, + "loss": 0.0803, + "num_input_tokens_seen": 58101032, + "step": 86185 + }, + { + "epoch": 2.1056360393814284, + "grad_norm": 0.01485143881291151, + "learning_rate": 1.434851577486478e-06, + "loss": 0.0002, + "num_input_tokens_seen": 58103976, + "step": 86190 + }, + { + "epoch": 2.105758190213275, + "grad_norm": 0.016216788440942764, + "learning_rate": 1.4347747832445047e-06, + "loss": 0.0004, + "num_input_tokens_seen": 58107496, + "step": 86195 + }, + { + "epoch": 2.1058803410451223, + "grad_norm": 0.06826010346412659, + "learning_rate": 1.4346979858407323e-06, + "loss": 0.0007, + "num_input_tokens_seen": 58110760, + "step": 86200 + }, + { + "epoch": 2.1060024918769695, + "grad_norm": 0.0071051702834665775, + "learning_rate": 1.434621185275719e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58114280, + "step": 86205 + }, + { + "epoch": 2.1061246427088167, + "grad_norm": 161.30799865722656, + "learning_rate": 1.434544381550024e-06, + "loss": 0.0036, + "num_input_tokens_seen": 58117864, + "step": 86210 + }, + { + "epoch": 2.106246793540664, + "grad_norm": 0.39801251888275146, + "learning_rate": 1.4344675746642054e-06, + "loss": 0.0799, + "num_input_tokens_seen": 58121000, + "step": 86215 + }, + { + "epoch": 2.106368944372511, + "grad_norm": 0.12843145430088043, + "learning_rate": 1.4343907646188217e-06, + "loss": 0.0002, + "num_input_tokens_seen": 58124136, + "step": 86220 + }, + { + "epoch": 2.1064910952043583, + "grad_norm": 0.005412922706454992, + "learning_rate": 1.434313951414431e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58127912, + "step": 86225 + }, + { + "epoch": 2.1066132460362055, + "grad_norm": 0.03722435235977173, + "learning_rate": 1.4342371350515927e-06, + "loss": 0.0545, + "num_input_tokens_seen": 58130792, + "step": 86230 + }, + { + "epoch": 2.1067353968680527, + "grad_norm": 0.011225051246583462, + "learning_rate": 1.4341603155308653e-06, + "loss": 0.0466, + "num_input_tokens_seen": 58134568, + "step": 86235 + }, + { + "epoch": 2.1068575476999, + "grad_norm": 0.013920868746936321, + "learning_rate": 1.4340834928528072e-06, + "loss": 0.0418, + "num_input_tokens_seen": 58137640, + "step": 86240 + }, + { + "epoch": 2.106979698531747, + "grad_norm": 0.26774507761001587, + "learning_rate": 1.434006667017977e-06, + "loss": 0.0003, + "num_input_tokens_seen": 58141672, + "step": 86245 + }, + { + "epoch": 2.1071018493635942, + "grad_norm": 0.006552118342369795, + "learning_rate": 1.433929838026934e-06, + "loss": 0.0, + "num_input_tokens_seen": 58145000, + "step": 86250 + }, + { + "epoch": 2.1072240001954414, + "grad_norm": 0.015171009115874767, + "learning_rate": 1.4338530058802363e-06, + "loss": 0.0004, + "num_input_tokens_seen": 58148968, + "step": 86255 + }, + { + "epoch": 2.1073461510272886, + "grad_norm": 0.014841420575976372, + "learning_rate": 1.4337761705784427e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58152296, + "step": 86260 + }, + { + "epoch": 2.107468301859136, + "grad_norm": 0.008177024312317371, + "learning_rate": 1.4336993321221123e-06, + "loss": 0.0693, + "num_input_tokens_seen": 58155624, + "step": 86265 + }, + { + "epoch": 2.107590452690983, + "grad_norm": 0.007435632403939962, + "learning_rate": 1.4336224905118038e-06, + "loss": 0.0002, + "num_input_tokens_seen": 58158888, + "step": 86270 + }, + { + "epoch": 2.10771260352283, + "grad_norm": 0.009212308563292027, + "learning_rate": 1.4335456457480758e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58162216, + "step": 86275 + }, + { + "epoch": 2.107834754354677, + "grad_norm": 0.017764560878276825, + "learning_rate": 1.4334687978314873e-06, + "loss": 0.0513, + "num_input_tokens_seen": 58166184, + "step": 86280 + }, + { + "epoch": 2.107956905186524, + "grad_norm": 0.012132183648645878, + "learning_rate": 1.433391946762597e-06, + "loss": 0.0002, + "num_input_tokens_seen": 58169320, + "step": 86285 + }, + { + "epoch": 2.1080790560183713, + "grad_norm": 0.011081576347351074, + "learning_rate": 1.4333150925419639e-06, + "loss": 0.0479, + "num_input_tokens_seen": 58172712, + "step": 86290 + }, + { + "epoch": 2.1082012068502185, + "grad_norm": 0.015859754756093025, + "learning_rate": 1.4332382351701467e-06, + "loss": 0.1066, + "num_input_tokens_seen": 58176040, + "step": 86295 + }, + { + "epoch": 2.1083233576820657, + "grad_norm": 0.012283757328987122, + "learning_rate": 1.4331613746477049e-06, + "loss": 0.0539, + "num_input_tokens_seen": 58179176, + "step": 86300 + }, + { + "epoch": 2.108445508513913, + "grad_norm": 0.008912608027458191, + "learning_rate": 1.4330845109751967e-06, + "loss": 0.0003, + "num_input_tokens_seen": 58183016, + "step": 86305 + }, + { + "epoch": 2.10856765934576, + "grad_norm": 14.167522430419922, + "learning_rate": 1.433007644153182e-06, + "loss": 0.0009, + "num_input_tokens_seen": 58186536, + "step": 86310 + }, + { + "epoch": 2.1086898101776073, + "grad_norm": 1.1770907640457153, + "learning_rate": 1.432930774182219e-06, + "loss": 0.0005, + "num_input_tokens_seen": 58189864, + "step": 86315 + }, + { + "epoch": 2.1088119610094544, + "grad_norm": 0.015262456610798836, + "learning_rate": 1.4328539010628668e-06, + "loss": 0.0823, + "num_input_tokens_seen": 58193000, + "step": 86320 + }, + { + "epoch": 2.1089341118413016, + "grad_norm": 0.007925025187432766, + "learning_rate": 1.4327770247956847e-06, + "loss": 0.0003, + "num_input_tokens_seen": 58196584, + "step": 86325 + }, + { + "epoch": 2.109056262673149, + "grad_norm": 0.2659885883331299, + "learning_rate": 1.4327001453812318e-06, + "loss": 0.0003, + "num_input_tokens_seen": 58199976, + "step": 86330 + }, + { + "epoch": 2.109178413504996, + "grad_norm": 0.019817404448986053, + "learning_rate": 1.432623262820067e-06, + "loss": 0.0452, + "num_input_tokens_seen": 58203304, + "step": 86335 + }, + { + "epoch": 2.109300564336843, + "grad_norm": 131.40748596191406, + "learning_rate": 1.4325463771127492e-06, + "loss": 0.0019, + "num_input_tokens_seen": 58206696, + "step": 86340 + }, + { + "epoch": 2.1094227151686904, + "grad_norm": 0.05100691691040993, + "learning_rate": 1.432469488259838e-06, + "loss": 0.0006, + "num_input_tokens_seen": 58210408, + "step": 86345 + }, + { + "epoch": 2.1095448660005376, + "grad_norm": 0.06888487935066223, + "learning_rate": 1.4323925962618925e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58213928, + "step": 86350 + }, + { + "epoch": 2.109667016832385, + "grad_norm": 0.02099837362766266, + "learning_rate": 1.4323157011194716e-06, + "loss": 0.0002, + "num_input_tokens_seen": 58217064, + "step": 86355 + }, + { + "epoch": 2.109789167664232, + "grad_norm": 0.08666683733463287, + "learning_rate": 1.4322388028331344e-06, + "loss": 0.0753, + "num_input_tokens_seen": 58220648, + "step": 86360 + }, + { + "epoch": 2.109911318496079, + "grad_norm": 0.05943422019481659, + "learning_rate": 1.432161901403441e-06, + "loss": 0.0756, + "num_input_tokens_seen": 58224040, + "step": 86365 + }, + { + "epoch": 2.1100334693279263, + "grad_norm": 8.62483024597168, + "learning_rate": 1.4320849968309497e-06, + "loss": 0.0342, + "num_input_tokens_seen": 58227752, + "step": 86370 + }, + { + "epoch": 2.110155620159773, + "grad_norm": 0.06837952882051468, + "learning_rate": 1.4320080891162201e-06, + "loss": 0.0595, + "num_input_tokens_seen": 58230696, + "step": 86375 + }, + { + "epoch": 2.1102777709916203, + "grad_norm": 0.02601909264922142, + "learning_rate": 1.4319311782598113e-06, + "loss": 0.0007, + "num_input_tokens_seen": 58233896, + "step": 86380 + }, + { + "epoch": 2.1103999218234675, + "grad_norm": 0.007753228303045034, + "learning_rate": 1.4318542642622828e-06, + "loss": 0.0008, + "num_input_tokens_seen": 58237096, + "step": 86385 + }, + { + "epoch": 2.1105220726553147, + "grad_norm": 0.01853867433965206, + "learning_rate": 1.431777347124194e-06, + "loss": 0.0002, + "num_input_tokens_seen": 58240360, + "step": 86390 + }, + { + "epoch": 2.110644223487162, + "grad_norm": 0.028115682303905487, + "learning_rate": 1.4317004268461044e-06, + "loss": 0.0005, + "num_input_tokens_seen": 58244008, + "step": 86395 + }, + { + "epoch": 2.110766374319009, + "grad_norm": 0.03277409449219704, + "learning_rate": 1.431623503428573e-06, + "loss": 0.0002, + "num_input_tokens_seen": 58247400, + "step": 86400 + }, + { + "epoch": 2.1108885251508562, + "grad_norm": 39.95744705200195, + "learning_rate": 1.4315465768721593e-06, + "loss": 0.0651, + "num_input_tokens_seen": 58250856, + "step": 86405 + }, + { + "epoch": 2.1110106759827034, + "grad_norm": 0.06884709000587463, + "learning_rate": 1.431469647177423e-06, + "loss": 0.0787, + "num_input_tokens_seen": 58254440, + "step": 86410 + }, + { + "epoch": 2.1111328268145506, + "grad_norm": 0.00929190032184124, + "learning_rate": 1.4313927143449235e-06, + "loss": 0.0002, + "num_input_tokens_seen": 58258152, + "step": 86415 + }, + { + "epoch": 2.111254977646398, + "grad_norm": 0.04569040238857269, + "learning_rate": 1.43131577837522e-06, + "loss": 0.0628, + "num_input_tokens_seen": 58261480, + "step": 86420 + }, + { + "epoch": 2.111377128478245, + "grad_norm": 0.020279861986637115, + "learning_rate": 1.431238839268872e-06, + "loss": 0.0005, + "num_input_tokens_seen": 58265064, + "step": 86425 + }, + { + "epoch": 2.111499279310092, + "grad_norm": 0.33997291326522827, + "learning_rate": 1.4311618970264392e-06, + "loss": 0.0327, + "num_input_tokens_seen": 58268456, + "step": 86430 + }, + { + "epoch": 2.1116214301419394, + "grad_norm": 0.03252893313765526, + "learning_rate": 1.4310849516484813e-06, + "loss": 0.0004, + "num_input_tokens_seen": 58271912, + "step": 86435 + }, + { + "epoch": 2.1117435809737866, + "grad_norm": 0.036082323640584946, + "learning_rate": 1.4310080031355575e-06, + "loss": 0.0008, + "num_input_tokens_seen": 58274984, + "step": 86440 + }, + { + "epoch": 2.1118657318056338, + "grad_norm": 0.09711766242980957, + "learning_rate": 1.4309310514882277e-06, + "loss": 0.0764, + "num_input_tokens_seen": 58278312, + "step": 86445 + }, + { + "epoch": 2.111987882637481, + "grad_norm": 0.011256006546318531, + "learning_rate": 1.4308540967070513e-06, + "loss": 0.0497, + "num_input_tokens_seen": 58281832, + "step": 86450 + }, + { + "epoch": 2.112110033469328, + "grad_norm": 0.09498558193445206, + "learning_rate": 1.430777138792588e-06, + "loss": 0.0046, + "num_input_tokens_seen": 58285288, + "step": 86455 + }, + { + "epoch": 2.112232184301175, + "grad_norm": 0.029148347675800323, + "learning_rate": 1.4307001777453977e-06, + "loss": 0.0002, + "num_input_tokens_seen": 58289128, + "step": 86460 + }, + { + "epoch": 2.112354335133022, + "grad_norm": 0.12875746190547943, + "learning_rate": 1.4306232135660397e-06, + "loss": 0.0495, + "num_input_tokens_seen": 58292776, + "step": 86465 + }, + { + "epoch": 2.1124764859648693, + "grad_norm": 0.014981545507907867, + "learning_rate": 1.430546246255074e-06, + "loss": 0.0523, + "num_input_tokens_seen": 58296872, + "step": 86470 + }, + { + "epoch": 2.1125986367967164, + "grad_norm": 0.016068417578935623, + "learning_rate": 1.4304692758130599e-06, + "loss": 0.0002, + "num_input_tokens_seen": 58300456, + "step": 86475 + }, + { + "epoch": 2.1127207876285636, + "grad_norm": 0.2005162090063095, + "learning_rate": 1.4303923022405577e-06, + "loss": 0.0002, + "num_input_tokens_seen": 58303656, + "step": 86480 + }, + { + "epoch": 2.112842938460411, + "grad_norm": 0.017666978761553764, + "learning_rate": 1.430315325538127e-06, + "loss": 0.0004, + "num_input_tokens_seen": 58306920, + "step": 86485 + }, + { + "epoch": 2.112965089292258, + "grad_norm": 0.13356828689575195, + "learning_rate": 1.4302383457063272e-06, + "loss": 0.0003, + "num_input_tokens_seen": 58310184, + "step": 86490 + }, + { + "epoch": 2.113087240124105, + "grad_norm": 0.006724830716848373, + "learning_rate": 1.4301613627457186e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58313704, + "step": 86495 + }, + { + "epoch": 2.1132093909559524, + "grad_norm": 0.03474617004394531, + "learning_rate": 1.4300843766568609e-06, + "loss": 0.0513, + "num_input_tokens_seen": 58316904, + "step": 86500 + }, + { + "epoch": 2.1133315417877996, + "grad_norm": 356.2970275878906, + "learning_rate": 1.4300073874403139e-06, + "loss": 0.005, + "num_input_tokens_seen": 58320424, + "step": 86505 + }, + { + "epoch": 2.1134536926196468, + "grad_norm": 0.009362326003611088, + "learning_rate": 1.4299303950966372e-06, + "loss": 0.0002, + "num_input_tokens_seen": 58323880, + "step": 86510 + }, + { + "epoch": 2.113575843451494, + "grad_norm": 15.823705673217773, + "learning_rate": 1.4298533996263916e-06, + "loss": 0.0696, + "num_input_tokens_seen": 58327336, + "step": 86515 + }, + { + "epoch": 2.113697994283341, + "grad_norm": 0.013039246201515198, + "learning_rate": 1.429776401030136e-06, + "loss": 0.0517, + "num_input_tokens_seen": 58330536, + "step": 86520 + }, + { + "epoch": 2.1138201451151883, + "grad_norm": 0.12527424097061157, + "learning_rate": 1.4296993993084313e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58333928, + "step": 86525 + }, + { + "epoch": 2.1139422959470355, + "grad_norm": 0.6121593713760376, + "learning_rate": 1.4296223944618366e-06, + "loss": 0.0405, + "num_input_tokens_seen": 58337384, + "step": 86530 + }, + { + "epoch": 2.1140644467788827, + "grad_norm": 0.014215141534805298, + "learning_rate": 1.4295453864909125e-06, + "loss": 0.0003, + "num_input_tokens_seen": 58341032, + "step": 86535 + }, + { + "epoch": 2.11418659761073, + "grad_norm": 0.04024295508861542, + "learning_rate": 1.4294683753962187e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58344232, + "step": 86540 + }, + { + "epoch": 2.114308748442577, + "grad_norm": 0.017275016754865646, + "learning_rate": 1.429391361178315e-06, + "loss": 0.1218, + "num_input_tokens_seen": 58347816, + "step": 86545 + }, + { + "epoch": 2.114430899274424, + "grad_norm": 0.02299557812511921, + "learning_rate": 1.4293143438377624e-06, + "loss": 0.0559, + "num_input_tokens_seen": 58351144, + "step": 86550 + }, + { + "epoch": 2.114553050106271, + "grad_norm": 0.07241678982973099, + "learning_rate": 1.4292373233751202e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58354472, + "step": 86555 + }, + { + "epoch": 2.1146752009381182, + "grad_norm": 0.3192169964313507, + "learning_rate": 1.4291602997909489e-06, + "loss": 0.0856, + "num_input_tokens_seen": 58357928, + "step": 86560 + }, + { + "epoch": 2.1147973517699654, + "grad_norm": 0.19804099202156067, + "learning_rate": 1.4290832730858082e-06, + "loss": 0.0006, + "num_input_tokens_seen": 58361320, + "step": 86565 + }, + { + "epoch": 2.1149195026018126, + "grad_norm": 0.1991894245147705, + "learning_rate": 1.4290062432602588e-06, + "loss": 0.0375, + "num_input_tokens_seen": 58364904, + "step": 86570 + }, + { + "epoch": 2.11504165343366, + "grad_norm": 0.08356073498725891, + "learning_rate": 1.4289292103148604e-06, + "loss": 0.0238, + "num_input_tokens_seen": 58368168, + "step": 86575 + }, + { + "epoch": 2.115163804265507, + "grad_norm": 8.472793579101562, + "learning_rate": 1.4288521742501734e-06, + "loss": 0.1127, + "num_input_tokens_seen": 58371112, + "step": 86580 + }, + { + "epoch": 2.115285955097354, + "grad_norm": 18.845136642456055, + "learning_rate": 1.4287751350667584e-06, + "loss": 0.0591, + "num_input_tokens_seen": 58374376, + "step": 86585 + }, + { + "epoch": 2.1154081059292014, + "grad_norm": 0.05907197669148445, + "learning_rate": 1.4286980927651749e-06, + "loss": 0.0003, + "num_input_tokens_seen": 58377512, + "step": 86590 + }, + { + "epoch": 2.1155302567610486, + "grad_norm": 66.45479583740234, + "learning_rate": 1.4286210473459837e-06, + "loss": 0.0732, + "num_input_tokens_seen": 58380840, + "step": 86595 + }, + { + "epoch": 2.1156524075928957, + "grad_norm": 0.5579411387443542, + "learning_rate": 1.428543998809745e-06, + "loss": 0.0357, + "num_input_tokens_seen": 58384232, + "step": 86600 + }, + { + "epoch": 2.115774558424743, + "grad_norm": 0.16705289483070374, + "learning_rate": 1.4284669471570188e-06, + "loss": 0.0002, + "num_input_tokens_seen": 58387624, + "step": 86605 + }, + { + "epoch": 2.11589670925659, + "grad_norm": 0.11491364985704422, + "learning_rate": 1.428389892388366e-06, + "loss": 0.0002, + "num_input_tokens_seen": 58391144, + "step": 86610 + }, + { + "epoch": 2.1160188600884373, + "grad_norm": 0.024216242134571075, + "learning_rate": 1.4283128345043464e-06, + "loss": 0.0005, + "num_input_tokens_seen": 58394728, + "step": 86615 + }, + { + "epoch": 2.1161410109202845, + "grad_norm": 100.37772369384766, + "learning_rate": 1.428235773505521e-06, + "loss": 0.007, + "num_input_tokens_seen": 58398248, + "step": 86620 + }, + { + "epoch": 2.1162631617521317, + "grad_norm": 0.06938160955905914, + "learning_rate": 1.4281587093924496e-06, + "loss": 0.0003, + "num_input_tokens_seen": 58401768, + "step": 86625 + }, + { + "epoch": 2.116385312583979, + "grad_norm": 0.04211665689945221, + "learning_rate": 1.4280816421656932e-06, + "loss": 0.0226, + "num_input_tokens_seen": 58405352, + "step": 86630 + }, + { + "epoch": 2.116507463415826, + "grad_norm": 0.024318603798747063, + "learning_rate": 1.428004571825812e-06, + "loss": 0.0361, + "num_input_tokens_seen": 58408808, + "step": 86635 + }, + { + "epoch": 2.116629614247673, + "grad_norm": 9.751173973083496, + "learning_rate": 1.427927498373366e-06, + "loss": 0.057, + "num_input_tokens_seen": 58412456, + "step": 86640 + }, + { + "epoch": 2.11675176507952, + "grad_norm": 0.14358443021774292, + "learning_rate": 1.4278504218089164e-06, + "loss": 0.0002, + "num_input_tokens_seen": 58416616, + "step": 86645 + }, + { + "epoch": 2.116873915911367, + "grad_norm": 0.046868305653333664, + "learning_rate": 1.4277733421330233e-06, + "loss": 0.0239, + "num_input_tokens_seen": 58420456, + "step": 86650 + }, + { + "epoch": 2.1169960667432144, + "grad_norm": 0.0075447880662977695, + "learning_rate": 1.4276962593462476e-06, + "loss": 0.0007, + "num_input_tokens_seen": 58423912, + "step": 86655 + }, + { + "epoch": 2.1171182175750616, + "grad_norm": 0.2818554937839508, + "learning_rate": 1.4276191734491497e-06, + "loss": 0.047, + "num_input_tokens_seen": 58427560, + "step": 86660 + }, + { + "epoch": 2.1172403684069088, + "grad_norm": 0.009356479160487652, + "learning_rate": 1.4275420844422898e-06, + "loss": 0.0261, + "num_input_tokens_seen": 58431016, + "step": 86665 + }, + { + "epoch": 2.117362519238756, + "grad_norm": 33.352073669433594, + "learning_rate": 1.4274649923262292e-06, + "loss": 0.0992, + "num_input_tokens_seen": 58434152, + "step": 86670 + }, + { + "epoch": 2.117484670070603, + "grad_norm": 0.08762264996767044, + "learning_rate": 1.427387897101528e-06, + "loss": 0.0008, + "num_input_tokens_seen": 58437096, + "step": 86675 + }, + { + "epoch": 2.1176068209024503, + "grad_norm": 0.22447797656059265, + "learning_rate": 1.4273107987687477e-06, + "loss": 0.0003, + "num_input_tokens_seen": 58440488, + "step": 86680 + }, + { + "epoch": 2.1177289717342975, + "grad_norm": 123.31275939941406, + "learning_rate": 1.4272336973284476e-06, + "loss": 0.0415, + "num_input_tokens_seen": 58444136, + "step": 86685 + }, + { + "epoch": 2.1178511225661447, + "grad_norm": 0.04530341550707817, + "learning_rate": 1.4271565927811894e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58447784, + "step": 86690 + }, + { + "epoch": 2.117973273397992, + "grad_norm": 0.023438459262251854, + "learning_rate": 1.4270794851275336e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58451112, + "step": 86695 + }, + { + "epoch": 2.118095424229839, + "grad_norm": 0.04608852416276932, + "learning_rate": 1.4270023743680407e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58454248, + "step": 86700 + }, + { + "epoch": 2.1182175750616863, + "grad_norm": 0.01683771423995495, + "learning_rate": 1.4269252605032718e-06, + "loss": 0.0002, + "num_input_tokens_seen": 58457704, + "step": 86705 + }, + { + "epoch": 2.1183397258935335, + "grad_norm": 0.07870247960090637, + "learning_rate": 1.4268481435337875e-06, + "loss": 0.0748, + "num_input_tokens_seen": 58461160, + "step": 86710 + }, + { + "epoch": 2.1184618767253807, + "grad_norm": 0.00973932072520256, + "learning_rate": 1.4267710234601488e-06, + "loss": 0.1228, + "num_input_tokens_seen": 58464552, + "step": 86715 + }, + { + "epoch": 2.118584027557228, + "grad_norm": 2.5605545043945312, + "learning_rate": 1.4266939002829163e-06, + "loss": 0.0006, + "num_input_tokens_seen": 58468136, + "step": 86720 + }, + { + "epoch": 2.1187061783890746, + "grad_norm": 0.007359154988080263, + "learning_rate": 1.4266167740026513e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58471336, + "step": 86725 + }, + { + "epoch": 2.118828329220922, + "grad_norm": 0.039782486855983734, + "learning_rate": 1.4265396446199142e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58474600, + "step": 86730 + }, + { + "epoch": 2.118950480052769, + "grad_norm": 41.74934768676758, + "learning_rate": 1.426462512135266e-06, + "loss": 0.1686, + "num_input_tokens_seen": 58477672, + "step": 86735 + }, + { + "epoch": 2.119072630884616, + "grad_norm": 34.31242370605469, + "learning_rate": 1.426385376549268e-06, + "loss": 0.0006, + "num_input_tokens_seen": 58481000, + "step": 86740 + }, + { + "epoch": 2.1191947817164634, + "grad_norm": 0.22472701966762543, + "learning_rate": 1.4263082378624804e-06, + "loss": 0.0455, + "num_input_tokens_seen": 58484328, + "step": 86745 + }, + { + "epoch": 2.1193169325483106, + "grad_norm": 22.060367584228516, + "learning_rate": 1.4262310960754649e-06, + "loss": 0.0405, + "num_input_tokens_seen": 58487656, + "step": 86750 + }, + { + "epoch": 2.1194390833801577, + "grad_norm": 0.008212032727897167, + "learning_rate": 1.4261539511887822e-06, + "loss": 0.0003, + "num_input_tokens_seen": 58490728, + "step": 86755 + }, + { + "epoch": 2.119561234212005, + "grad_norm": 0.662553608417511, + "learning_rate": 1.4260768032029932e-06, + "loss": 0.0004, + "num_input_tokens_seen": 58494184, + "step": 86760 + }, + { + "epoch": 2.119683385043852, + "grad_norm": 0.18204031884670258, + "learning_rate": 1.4259996521186591e-06, + "loss": 0.0002, + "num_input_tokens_seen": 58497576, + "step": 86765 + }, + { + "epoch": 2.1198055358756993, + "grad_norm": 0.005318759009242058, + "learning_rate": 1.4259224979363413e-06, + "loss": 0.0115, + "num_input_tokens_seen": 58500648, + "step": 86770 + }, + { + "epoch": 2.1199276867075465, + "grad_norm": 0.008218302391469479, + "learning_rate": 1.4258453406566002e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58504232, + "step": 86775 + }, + { + "epoch": 2.1200498375393937, + "grad_norm": 0.00433523952960968, + "learning_rate": 1.4257681802799973e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58507560, + "step": 86780 + }, + { + "epoch": 2.120171988371241, + "grad_norm": 0.038819458335638046, + "learning_rate": 1.4256910168070938e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58511144, + "step": 86785 + }, + { + "epoch": 2.120294139203088, + "grad_norm": 0.15920616686344147, + "learning_rate": 1.4256138502384508e-06, + "loss": 0.0501, + "num_input_tokens_seen": 58514024, + "step": 86790 + }, + { + "epoch": 2.1204162900349353, + "grad_norm": 0.36001694202423096, + "learning_rate": 1.425536680574629e-06, + "loss": 0.0004, + "num_input_tokens_seen": 58517544, + "step": 86795 + }, + { + "epoch": 2.1205384408667824, + "grad_norm": 0.013765096664428711, + "learning_rate": 1.4254595078161905e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58521256, + "step": 86800 + }, + { + "epoch": 2.1206605916986296, + "grad_norm": 0.06415829807519913, + "learning_rate": 1.4253823319636958e-06, + "loss": 0.0389, + "num_input_tokens_seen": 58524840, + "step": 86805 + }, + { + "epoch": 2.120782742530477, + "grad_norm": 0.0557989776134491, + "learning_rate": 1.4253051530177063e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58528040, + "step": 86810 + }, + { + "epoch": 2.120904893362324, + "grad_norm": 0.6082044839859009, + "learning_rate": 1.4252279709787834e-06, + "loss": 0.0864, + "num_input_tokens_seen": 58531944, + "step": 86815 + }, + { + "epoch": 2.1210270441941708, + "grad_norm": 0.0982634648680687, + "learning_rate": 1.4251507858474882e-06, + "loss": 0.0879, + "num_input_tokens_seen": 58535080, + "step": 86820 + }, + { + "epoch": 2.121149195026018, + "grad_norm": 0.3374102711677551, + "learning_rate": 1.4250735976243823e-06, + "loss": 0.0402, + "num_input_tokens_seen": 58538216, + "step": 86825 + }, + { + "epoch": 2.121271345857865, + "grad_norm": 0.05706077441573143, + "learning_rate": 1.4249964063100266e-06, + "loss": 0.0003, + "num_input_tokens_seen": 58541672, + "step": 86830 + }, + { + "epoch": 2.1213934966897123, + "grad_norm": 0.00961530301719904, + "learning_rate": 1.4249192119049832e-06, + "loss": 0.0004, + "num_input_tokens_seen": 58544872, + "step": 86835 + }, + { + "epoch": 2.1215156475215595, + "grad_norm": 0.2719575762748718, + "learning_rate": 1.4248420144098128e-06, + "loss": 0.054, + "num_input_tokens_seen": 58548264, + "step": 86840 + }, + { + "epoch": 2.1216377983534067, + "grad_norm": 0.04637189581990242, + "learning_rate": 1.4247648138250768e-06, + "loss": 0.0455, + "num_input_tokens_seen": 58551656, + "step": 86845 + }, + { + "epoch": 2.121759949185254, + "grad_norm": 0.02738938108086586, + "learning_rate": 1.4246876101513369e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58555240, + "step": 86850 + }, + { + "epoch": 2.121882100017101, + "grad_norm": 19.862667083740234, + "learning_rate": 1.4246104033891545e-06, + "loss": 0.0448, + "num_input_tokens_seen": 58559080, + "step": 86855 + }, + { + "epoch": 2.1220042508489483, + "grad_norm": 0.0276873130351305, + "learning_rate": 1.4245331935390913e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58562728, + "step": 86860 + }, + { + "epoch": 2.1221264016807955, + "grad_norm": 0.032264113426208496, + "learning_rate": 1.424455980601708e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58566120, + "step": 86865 + }, + { + "epoch": 2.1222485525126427, + "grad_norm": 0.8636866807937622, + "learning_rate": 1.424378764577567e-06, + "loss": 0.0003, + "num_input_tokens_seen": 58569768, + "step": 86870 + }, + { + "epoch": 2.12237070334449, + "grad_norm": 0.05388767644762993, + "learning_rate": 1.4243015454672294e-06, + "loss": 0.0004, + "num_input_tokens_seen": 58573096, + "step": 86875 + }, + { + "epoch": 2.122492854176337, + "grad_norm": 0.08161472529172897, + "learning_rate": 1.4242243232712569e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58577128, + "step": 86880 + }, + { + "epoch": 2.1226150050081842, + "grad_norm": 0.09701462835073471, + "learning_rate": 1.424147097990211e-06, + "loss": 0.0515, + "num_input_tokens_seen": 58580328, + "step": 86885 + }, + { + "epoch": 2.1227371558400314, + "grad_norm": 22.87986183166504, + "learning_rate": 1.4240698696246535e-06, + "loss": 0.048, + "num_input_tokens_seen": 58583976, + "step": 86890 + }, + { + "epoch": 2.1228593066718786, + "grad_norm": 1.0535171031951904, + "learning_rate": 1.4239926381751455e-06, + "loss": 0.0004, + "num_input_tokens_seen": 58587304, + "step": 86895 + }, + { + "epoch": 2.122981457503726, + "grad_norm": 0.009154610335826874, + "learning_rate": 1.423915403642249e-06, + "loss": 0.0003, + "num_input_tokens_seen": 58590504, + "step": 86900 + }, + { + "epoch": 2.1231036083355725, + "grad_norm": 0.23902444541454315, + "learning_rate": 1.4238381660265259e-06, + "loss": 0.0417, + "num_input_tokens_seen": 58593768, + "step": 86905 + }, + { + "epoch": 2.1232257591674197, + "grad_norm": 432.938232421875, + "learning_rate": 1.4237609253285377e-06, + "loss": 0.0994, + "num_input_tokens_seen": 58597352, + "step": 86910 + }, + { + "epoch": 2.123347909999267, + "grad_norm": 12.413274765014648, + "learning_rate": 1.4236836815488458e-06, + "loss": 0.1099, + "num_input_tokens_seen": 58600616, + "step": 86915 + }, + { + "epoch": 2.123470060831114, + "grad_norm": 31.247962951660156, + "learning_rate": 1.4236064346880123e-06, + "loss": 0.0416, + "num_input_tokens_seen": 58604264, + "step": 86920 + }, + { + "epoch": 2.1235922116629613, + "grad_norm": 0.13186196982860565, + "learning_rate": 1.423529184746599e-06, + "loss": 0.032, + "num_input_tokens_seen": 58607848, + "step": 86925 + }, + { + "epoch": 2.1237143624948085, + "grad_norm": 9.545666694641113, + "learning_rate": 1.4234519317251674e-06, + "loss": 0.0918, + "num_input_tokens_seen": 58610920, + "step": 86930 + }, + { + "epoch": 2.1238365133266557, + "grad_norm": 0.056094422936439514, + "learning_rate": 1.4233746756242795e-06, + "loss": 0.0003, + "num_input_tokens_seen": 58614440, + "step": 86935 + }, + { + "epoch": 2.123958664158503, + "grad_norm": 0.004741915967315435, + "learning_rate": 1.4232974164444972e-06, + "loss": 0.0003, + "num_input_tokens_seen": 58617704, + "step": 86940 + }, + { + "epoch": 2.12408081499035, + "grad_norm": 0.052919551730155945, + "learning_rate": 1.4232201541863822e-06, + "loss": 0.0412, + "num_input_tokens_seen": 58621032, + "step": 86945 + }, + { + "epoch": 2.1242029658221973, + "grad_norm": 0.21680483222007751, + "learning_rate": 1.4231428888504964e-06, + "loss": 0.1003, + "num_input_tokens_seen": 58625128, + "step": 86950 + }, + { + "epoch": 2.1243251166540444, + "grad_norm": 19.620275497436523, + "learning_rate": 1.4230656204374017e-06, + "loss": 0.0425, + "num_input_tokens_seen": 58628584, + "step": 86955 + }, + { + "epoch": 2.1244472674858916, + "grad_norm": 0.05666939914226532, + "learning_rate": 1.4229883489476599e-06, + "loss": 0.0337, + "num_input_tokens_seen": 58631976, + "step": 86960 + }, + { + "epoch": 2.124569418317739, + "grad_norm": 0.74814373254776, + "learning_rate": 1.422911074381833e-06, + "loss": 0.0371, + "num_input_tokens_seen": 58635304, + "step": 86965 + }, + { + "epoch": 2.124691569149586, + "grad_norm": 0.12086189538240433, + "learning_rate": 1.4228337967404833e-06, + "loss": 0.0003, + "num_input_tokens_seen": 58638376, + "step": 86970 + }, + { + "epoch": 2.124813719981433, + "grad_norm": 0.035326484590768814, + "learning_rate": 1.4227565160241724e-06, + "loss": 0.0988, + "num_input_tokens_seen": 58641512, + "step": 86975 + }, + { + "epoch": 2.1249358708132804, + "grad_norm": 0.014093923382461071, + "learning_rate": 1.4226792322334622e-06, + "loss": 0.0436, + "num_input_tokens_seen": 58644968, + "step": 86980 + }, + { + "epoch": 2.1250580216451276, + "grad_norm": 88.9645767211914, + "learning_rate": 1.4226019453689151e-06, + "loss": 0.1034, + "num_input_tokens_seen": 58648296, + "step": 86985 + }, + { + "epoch": 2.1251801724769748, + "grad_norm": 0.2062487155199051, + "learning_rate": 1.422524655431093e-06, + "loss": 0.0004, + "num_input_tokens_seen": 58651880, + "step": 86990 + }, + { + "epoch": 2.125302323308822, + "grad_norm": 0.2350393831729889, + "learning_rate": 1.422447362420558e-06, + "loss": 0.0016, + "num_input_tokens_seen": 58655848, + "step": 86995 + }, + { + "epoch": 2.1254244741406687, + "grad_norm": 0.06757447868585587, + "learning_rate": 1.422370066337872e-06, + "loss": 0.0008, + "num_input_tokens_seen": 58659688, + "step": 87000 + }, + { + "epoch": 2.125546624972516, + "grad_norm": 0.2243819236755371, + "learning_rate": 1.4222927671835976e-06, + "loss": 0.0005, + "num_input_tokens_seen": 58663144, + "step": 87005 + }, + { + "epoch": 2.125668775804363, + "grad_norm": 0.18053650856018066, + "learning_rate": 1.4222154649582963e-06, + "loss": 0.0488, + "num_input_tokens_seen": 58666472, + "step": 87010 + }, + { + "epoch": 2.1257909266362103, + "grad_norm": 0.00022531415743287653, + "learning_rate": 1.4221381596625307e-06, + "loss": 0.005, + "num_input_tokens_seen": 58670184, + "step": 87015 + }, + { + "epoch": 2.1259130774680575, + "grad_norm": 0.03191062808036804, + "learning_rate": 1.4220608512968627e-06, + "loss": 0.0002, + "num_input_tokens_seen": 58673384, + "step": 87020 + }, + { + "epoch": 2.1260352282999047, + "grad_norm": 0.20486636459827423, + "learning_rate": 1.4219835398618548e-06, + "loss": 0.0002, + "num_input_tokens_seen": 58676520, + "step": 87025 + }, + { + "epoch": 2.126157379131752, + "grad_norm": 0.008991490118205547, + "learning_rate": 1.4219062253580691e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58680104, + "step": 87030 + }, + { + "epoch": 2.126279529963599, + "grad_norm": 0.015981163829565048, + "learning_rate": 1.421828907786068e-06, + "loss": 0.0004, + "num_input_tokens_seen": 58683240, + "step": 87035 + }, + { + "epoch": 2.1264016807954462, + "grad_norm": 0.20896996557712555, + "learning_rate": 1.4217515871464132e-06, + "loss": 0.062, + "num_input_tokens_seen": 58686888, + "step": 87040 + }, + { + "epoch": 2.1265238316272934, + "grad_norm": 0.138705775141716, + "learning_rate": 1.4216742634396677e-06, + "loss": 0.0225, + "num_input_tokens_seen": 58690088, + "step": 87045 + }, + { + "epoch": 2.1266459824591406, + "grad_norm": 0.12179794907569885, + "learning_rate": 1.4215969366663936e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58693288, + "step": 87050 + }, + { + "epoch": 2.126768133290988, + "grad_norm": 0.041572365909814835, + "learning_rate": 1.4215196068271531e-06, + "loss": 0.0004, + "num_input_tokens_seen": 58696616, + "step": 87055 + }, + { + "epoch": 2.126890284122835, + "grad_norm": 0.3251390755176544, + "learning_rate": 1.4214422739225087e-06, + "loss": 0.0502, + "num_input_tokens_seen": 58699752, + "step": 87060 + }, + { + "epoch": 2.127012434954682, + "grad_norm": 18.905942916870117, + "learning_rate": 1.4213649379530228e-06, + "loss": 0.1728, + "num_input_tokens_seen": 58703272, + "step": 87065 + }, + { + "epoch": 2.1271345857865294, + "grad_norm": 30.930639266967773, + "learning_rate": 1.4212875989192573e-06, + "loss": 0.0559, + "num_input_tokens_seen": 58706536, + "step": 87070 + }, + { + "epoch": 2.1272567366183766, + "grad_norm": 34.58064651489258, + "learning_rate": 1.4212102568217755e-06, + "loss": 0.1699, + "num_input_tokens_seen": 58710120, + "step": 87075 + }, + { + "epoch": 2.1273788874502237, + "grad_norm": 41.41680145263672, + "learning_rate": 1.4211329116611392e-06, + "loss": 0.1176, + "num_input_tokens_seen": 58713128, + "step": 87080 + }, + { + "epoch": 2.1275010382820705, + "grad_norm": 0.3319126069545746, + "learning_rate": 1.4210555634379113e-06, + "loss": 0.0312, + "num_input_tokens_seen": 58715752, + "step": 87085 + }, + { + "epoch": 2.1276231891139177, + "grad_norm": 2.9074134826660156, + "learning_rate": 1.420978212152654e-06, + "loss": 0.0013, + "num_input_tokens_seen": 58719016, + "step": 87090 + }, + { + "epoch": 2.127745339945765, + "grad_norm": 124.58538055419922, + "learning_rate": 1.4209008578059299e-06, + "loss": 0.0369, + "num_input_tokens_seen": 58722216, + "step": 87095 + }, + { + "epoch": 2.127867490777612, + "grad_norm": 0.031058380380272865, + "learning_rate": 1.4208235003983017e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58725800, + "step": 87100 + }, + { + "epoch": 2.1279896416094592, + "grad_norm": 0.14676395058631897, + "learning_rate": 1.4207461399303316e-06, + "loss": 0.0003, + "num_input_tokens_seen": 58728744, + "step": 87105 + }, + { + "epoch": 2.1281117924413064, + "grad_norm": 0.11780012398958206, + "learning_rate": 1.4206687764025825e-06, + "loss": 0.0396, + "num_input_tokens_seen": 58732392, + "step": 87110 + }, + { + "epoch": 2.1282339432731536, + "grad_norm": 0.019878914579749107, + "learning_rate": 1.4205914098156168e-06, + "loss": 0.0004, + "num_input_tokens_seen": 58735592, + "step": 87115 + }, + { + "epoch": 2.128356094105001, + "grad_norm": 0.05015110224485397, + "learning_rate": 1.4205140401699973e-06, + "loss": 0.0325, + "num_input_tokens_seen": 58739048, + "step": 87120 + }, + { + "epoch": 2.128478244936848, + "grad_norm": 0.05444740504026413, + "learning_rate": 1.4204366674662867e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58742696, + "step": 87125 + }, + { + "epoch": 2.128600395768695, + "grad_norm": 16.788597106933594, + "learning_rate": 1.4203592917050476e-06, + "loss": 0.0418, + "num_input_tokens_seen": 58746216, + "step": 87130 + }, + { + "epoch": 2.1287225466005424, + "grad_norm": 0.09274864196777344, + "learning_rate": 1.4202819128868422e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58749608, + "step": 87135 + }, + { + "epoch": 2.1288446974323896, + "grad_norm": 0.013930370099842548, + "learning_rate": 1.4202045310122341e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58752680, + "step": 87140 + }, + { + "epoch": 2.1289668482642368, + "grad_norm": 0.3440989851951599, + "learning_rate": 1.4201271460817859e-06, + "loss": 0.0069, + "num_input_tokens_seen": 58756200, + "step": 87145 + }, + { + "epoch": 2.129088999096084, + "grad_norm": 220.0663604736328, + "learning_rate": 1.4200497580960597e-06, + "loss": 0.0305, + "num_input_tokens_seen": 58759208, + "step": 87150 + }, + { + "epoch": 2.129211149927931, + "grad_norm": 34.489627838134766, + "learning_rate": 1.4199723670556187e-06, + "loss": 0.1128, + "num_input_tokens_seen": 58762600, + "step": 87155 + }, + { + "epoch": 2.1293333007597783, + "grad_norm": 0.28581130504608154, + "learning_rate": 1.419894972961026e-06, + "loss": 0.0003, + "num_input_tokens_seen": 58765928, + "step": 87160 + }, + { + "epoch": 2.1294554515916255, + "grad_norm": 159.50994873046875, + "learning_rate": 1.4198175758128436e-06, + "loss": 0.1243, + "num_input_tokens_seen": 58769192, + "step": 87165 + }, + { + "epoch": 2.1295776024234723, + "grad_norm": 0.1996682584285736, + "learning_rate": 1.4197401756116352e-06, + "loss": 0.0542, + "num_input_tokens_seen": 58772072, + "step": 87170 + }, + { + "epoch": 2.1296997532553195, + "grad_norm": 0.0032133408822119236, + "learning_rate": 1.419662772357963e-06, + "loss": 0.0302, + "num_input_tokens_seen": 58775336, + "step": 87175 + }, + { + "epoch": 2.1298219040871667, + "grad_norm": 0.0018217507749795914, + "learning_rate": 1.4195853660523907e-06, + "loss": 0.0539, + "num_input_tokens_seen": 58779112, + "step": 87180 + }, + { + "epoch": 2.129944054919014, + "grad_norm": 0.006405272521078587, + "learning_rate": 1.4195079566954805e-06, + "loss": 0.0675, + "num_input_tokens_seen": 58782760, + "step": 87185 + }, + { + "epoch": 2.130066205750861, + "grad_norm": 0.30736804008483887, + "learning_rate": 1.419430544287796e-06, + "loss": 0.0288, + "num_input_tokens_seen": 58786664, + "step": 87190 + }, + { + "epoch": 2.130188356582708, + "grad_norm": 36.743751525878906, + "learning_rate": 1.4193531288298993e-06, + "loss": 0.0721, + "num_input_tokens_seen": 58789928, + "step": 87195 + }, + { + "epoch": 2.1303105074145554, + "grad_norm": 0.030499042943120003, + "learning_rate": 1.419275710322354e-06, + "loss": 0.0002, + "num_input_tokens_seen": 58793512, + "step": 87200 + }, + { + "epoch": 2.1304326582464026, + "grad_norm": 0.0741528794169426, + "learning_rate": 1.419198288765723e-06, + "loss": 0.0003, + "num_input_tokens_seen": 58796456, + "step": 87205 + }, + { + "epoch": 2.13055480907825, + "grad_norm": 0.9557086229324341, + "learning_rate": 1.4191208641605693e-06, + "loss": 0.0005, + "num_input_tokens_seen": 58799528, + "step": 87210 + }, + { + "epoch": 2.130676959910097, + "grad_norm": 191.2623291015625, + "learning_rate": 1.4190434365074559e-06, + "loss": 0.0044, + "num_input_tokens_seen": 58803048, + "step": 87215 + }, + { + "epoch": 2.130799110741944, + "grad_norm": 0.003294636495411396, + "learning_rate": 1.418966005806946e-06, + "loss": 0.0366, + "num_input_tokens_seen": 58806248, + "step": 87220 + }, + { + "epoch": 2.1309212615737914, + "grad_norm": 0.1290232241153717, + "learning_rate": 1.4188885720596022e-06, + "loss": 0.1019, + "num_input_tokens_seen": 58809448, + "step": 87225 + }, + { + "epoch": 2.1310434124056385, + "grad_norm": 0.012942397966980934, + "learning_rate": 1.4188111352659884e-06, + "loss": 0.0567, + "num_input_tokens_seen": 58812648, + "step": 87230 + }, + { + "epoch": 2.1311655632374857, + "grad_norm": 0.004310682415962219, + "learning_rate": 1.4187336954266674e-06, + "loss": 0.0002, + "num_input_tokens_seen": 58815784, + "step": 87235 + }, + { + "epoch": 2.131287714069333, + "grad_norm": 11.728301048278809, + "learning_rate": 1.4186562525422025e-06, + "loss": 0.1964, + "num_input_tokens_seen": 58819240, + "step": 87240 + }, + { + "epoch": 2.13140986490118, + "grad_norm": 1.623407006263733, + "learning_rate": 1.4185788066131566e-06, + "loss": 0.0005, + "num_input_tokens_seen": 58822760, + "step": 87245 + }, + { + "epoch": 2.1315320157330273, + "grad_norm": 0.38209009170532227, + "learning_rate": 1.4185013576400928e-06, + "loss": 0.0262, + "num_input_tokens_seen": 58825704, + "step": 87250 + }, + { + "epoch": 2.1316541665648745, + "grad_norm": 0.17902544140815735, + "learning_rate": 1.418423905623575e-06, + "loss": 0.0006, + "num_input_tokens_seen": 58829416, + "step": 87255 + }, + { + "epoch": 2.1317763173967217, + "grad_norm": 559.6621704101562, + "learning_rate": 1.4183464505641656e-06, + "loss": 0.1206, + "num_input_tokens_seen": 58832744, + "step": 87260 + }, + { + "epoch": 2.1318984682285684, + "grad_norm": 0.02113385498523712, + "learning_rate": 1.4182689924624285e-06, + "loss": 0.0003, + "num_input_tokens_seen": 58835752, + "step": 87265 + }, + { + "epoch": 2.1320206190604156, + "grad_norm": 0.0700068548321724, + "learning_rate": 1.4181915313189269e-06, + "loss": 0.0479, + "num_input_tokens_seen": 58838952, + "step": 87270 + }, + { + "epoch": 2.132142769892263, + "grad_norm": 0.02595849707722664, + "learning_rate": 1.4181140671342235e-06, + "loss": 0.044, + "num_input_tokens_seen": 58842024, + "step": 87275 + }, + { + "epoch": 2.13226492072411, + "grad_norm": 22.93673324584961, + "learning_rate": 1.4180365999088826e-06, + "loss": 0.1059, + "num_input_tokens_seen": 58845800, + "step": 87280 + }, + { + "epoch": 2.132387071555957, + "grad_norm": 0.012322576716542244, + "learning_rate": 1.4179591296434669e-06, + "loss": 0.0003, + "num_input_tokens_seen": 58849896, + "step": 87285 + }, + { + "epoch": 2.1325092223878044, + "grad_norm": 0.16245467960834503, + "learning_rate": 1.4178816563385398e-06, + "loss": 0.0013, + "num_input_tokens_seen": 58853032, + "step": 87290 + }, + { + "epoch": 2.1326313732196516, + "grad_norm": 20.192350387573242, + "learning_rate": 1.4178041799946653e-06, + "loss": 0.0812, + "num_input_tokens_seen": 58856296, + "step": 87295 + }, + { + "epoch": 2.1327535240514988, + "grad_norm": 130.31716918945312, + "learning_rate": 1.4177267006124064e-06, + "loss": 0.0668, + "num_input_tokens_seen": 58859688, + "step": 87300 + }, + { + "epoch": 2.132875674883346, + "grad_norm": 73.06332397460938, + "learning_rate": 1.4176492181923267e-06, + "loss": 0.064, + "num_input_tokens_seen": 58863656, + "step": 87305 + }, + { + "epoch": 2.132997825715193, + "grad_norm": 0.1604689657688141, + "learning_rate": 1.4175717327349893e-06, + "loss": 0.0002, + "num_input_tokens_seen": 58867304, + "step": 87310 + }, + { + "epoch": 2.1331199765470403, + "grad_norm": 0.010926165618002415, + "learning_rate": 1.417494244240958e-06, + "loss": 0.0009, + "num_input_tokens_seen": 58870952, + "step": 87315 + }, + { + "epoch": 2.1332421273788875, + "grad_norm": 0.008155797608196735, + "learning_rate": 1.4174167527107961e-06, + "loss": 0.042, + "num_input_tokens_seen": 58874600, + "step": 87320 + }, + { + "epoch": 2.1333642782107347, + "grad_norm": 23.913652420043945, + "learning_rate": 1.4173392581450674e-06, + "loss": 0.0888, + "num_input_tokens_seen": 58877864, + "step": 87325 + }, + { + "epoch": 2.133486429042582, + "grad_norm": 0.018106626346707344, + "learning_rate": 1.4172617605443353e-06, + "loss": 0.0483, + "num_input_tokens_seen": 58881000, + "step": 87330 + }, + { + "epoch": 2.133608579874429, + "grad_norm": 0.0024407675955444574, + "learning_rate": 1.4171842599091636e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58884328, + "step": 87335 + }, + { + "epoch": 2.1337307307062763, + "grad_norm": 0.4352200925350189, + "learning_rate": 1.4171067562401157e-06, + "loss": 0.0466, + "num_input_tokens_seen": 58887400, + "step": 87340 + }, + { + "epoch": 2.1338528815381235, + "grad_norm": 0.054799050092697144, + "learning_rate": 1.4170292495377554e-06, + "loss": 0.0215, + "num_input_tokens_seen": 58891048, + "step": 87345 + }, + { + "epoch": 2.13397503236997, + "grad_norm": 1.654451847076416, + "learning_rate": 1.416951739802646e-06, + "loss": 0.0014, + "num_input_tokens_seen": 58894504, + "step": 87350 + }, + { + "epoch": 2.1340971832018174, + "grad_norm": 0.7009220123291016, + "learning_rate": 1.4168742270353515e-06, + "loss": 0.0004, + "num_input_tokens_seen": 58897704, + "step": 87355 + }, + { + "epoch": 2.1342193340336646, + "grad_norm": 0.09969975799322128, + "learning_rate": 1.4167967112364357e-06, + "loss": 0.001, + "num_input_tokens_seen": 58900776, + "step": 87360 + }, + { + "epoch": 2.134341484865512, + "grad_norm": 0.19023500382900238, + "learning_rate": 1.416719192406462e-06, + "loss": 0.0003, + "num_input_tokens_seen": 58904168, + "step": 87365 + }, + { + "epoch": 2.134463635697359, + "grad_norm": 0.1158929094672203, + "learning_rate": 1.4166416705459941e-06, + "loss": 0.1239, + "num_input_tokens_seen": 58907240, + "step": 87370 + }, + { + "epoch": 2.134585786529206, + "grad_norm": 0.24258743226528168, + "learning_rate": 1.4165641456555959e-06, + "loss": 0.0004, + "num_input_tokens_seen": 58910504, + "step": 87375 + }, + { + "epoch": 2.1347079373610534, + "grad_norm": 0.011560074985027313, + "learning_rate": 1.4164866177358312e-06, + "loss": 0.0003, + "num_input_tokens_seen": 58913896, + "step": 87380 + }, + { + "epoch": 2.1348300881929005, + "grad_norm": 0.0013848430244252086, + "learning_rate": 1.4164090867872638e-06, + "loss": 0.0005, + "num_input_tokens_seen": 58917480, + "step": 87385 + }, + { + "epoch": 2.1349522390247477, + "grad_norm": 0.1996019333600998, + "learning_rate": 1.4163315528104576e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58921000, + "step": 87390 + }, + { + "epoch": 2.135074389856595, + "grad_norm": 23.325389862060547, + "learning_rate": 1.4162540158059765e-06, + "loss": 0.0933, + "num_input_tokens_seen": 58924008, + "step": 87395 + }, + { + "epoch": 2.135196540688442, + "grad_norm": 0.017512032762169838, + "learning_rate": 1.416176475774384e-06, + "loss": 0.0377, + "num_input_tokens_seen": 58927784, + "step": 87400 + }, + { + "epoch": 2.1353186915202893, + "grad_norm": 0.0444110669195652, + "learning_rate": 1.4160989327162443e-06, + "loss": 0.0003, + "num_input_tokens_seen": 58930920, + "step": 87405 + }, + { + "epoch": 2.1354408423521365, + "grad_norm": 0.00017387409752700478, + "learning_rate": 1.4160213866321216e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58934248, + "step": 87410 + }, + { + "epoch": 2.1355629931839837, + "grad_norm": 0.006010678131133318, + "learning_rate": 1.4159438375225788e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58937576, + "step": 87415 + }, + { + "epoch": 2.135685144015831, + "grad_norm": 0.02587669901549816, + "learning_rate": 1.4158662853881809e-06, + "loss": 0.0002, + "num_input_tokens_seen": 58940840, + "step": 87420 + }, + { + "epoch": 2.135807294847678, + "grad_norm": 0.43707767128944397, + "learning_rate": 1.4157887302294916e-06, + "loss": 0.0003, + "num_input_tokens_seen": 58943912, + "step": 87425 + }, + { + "epoch": 2.1359294456795253, + "grad_norm": 0.0007447099778801203, + "learning_rate": 1.4157111720470746e-06, + "loss": 0.0692, + "num_input_tokens_seen": 58947624, + "step": 87430 + }, + { + "epoch": 2.1360515965113724, + "grad_norm": 0.004575371742248535, + "learning_rate": 1.4156336108414944e-06, + "loss": 0.0002, + "num_input_tokens_seen": 58951080, + "step": 87435 + }, + { + "epoch": 2.1361737473432196, + "grad_norm": 26.027252197265625, + "learning_rate": 1.4155560466133146e-06, + "loss": 0.0458, + "num_input_tokens_seen": 58954920, + "step": 87440 + }, + { + "epoch": 2.1362958981750664, + "grad_norm": 0.03521507978439331, + "learning_rate": 1.4154784793630993e-06, + "loss": 0.0405, + "num_input_tokens_seen": 58958376, + "step": 87445 + }, + { + "epoch": 2.1364180490069136, + "grad_norm": 38.19405746459961, + "learning_rate": 1.415400909091413e-06, + "loss": 0.068, + "num_input_tokens_seen": 58961512, + "step": 87450 + }, + { + "epoch": 2.1365401998387608, + "grad_norm": 0.009486167691648006, + "learning_rate": 1.4153233357988197e-06, + "loss": 0.0, + "num_input_tokens_seen": 58964968, + "step": 87455 + }, + { + "epoch": 2.136662350670608, + "grad_norm": 0.016952499747276306, + "learning_rate": 1.4152457594858834e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58968232, + "step": 87460 + }, + { + "epoch": 2.136784501502455, + "grad_norm": 0.0029374232981354, + "learning_rate": 1.415168180153168e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58971688, + "step": 87465 + }, + { + "epoch": 2.1369066523343023, + "grad_norm": 0.18648330867290497, + "learning_rate": 1.415090597801238e-06, + "loss": 0.0002, + "num_input_tokens_seen": 58975016, + "step": 87470 + }, + { + "epoch": 2.1370288031661495, + "grad_norm": 0.0014150391798466444, + "learning_rate": 1.4150130124306574e-06, + "loss": 0.0005, + "num_input_tokens_seen": 58978408, + "step": 87475 + }, + { + "epoch": 2.1371509539979967, + "grad_norm": 33.36257553100586, + "learning_rate": 1.4149354240419906e-06, + "loss": 0.3102, + "num_input_tokens_seen": 58981416, + "step": 87480 + }, + { + "epoch": 2.137273104829844, + "grad_norm": 0.2093086987733841, + "learning_rate": 1.414857832635802e-06, + "loss": 0.0778, + "num_input_tokens_seen": 58984488, + "step": 87485 + }, + { + "epoch": 2.137395255661691, + "grad_norm": 0.030884187668561935, + "learning_rate": 1.4147802382126553e-06, + "loss": 0.0527, + "num_input_tokens_seen": 58988520, + "step": 87490 + }, + { + "epoch": 2.1375174064935383, + "grad_norm": 0.04900249466300011, + "learning_rate": 1.4147026407731156e-06, + "loss": 0.0001, + "num_input_tokens_seen": 58992040, + "step": 87495 + }, + { + "epoch": 2.1376395573253855, + "grad_norm": 0.12208317965269089, + "learning_rate": 1.4146250403177464e-06, + "loss": 0.0316, + "num_input_tokens_seen": 58995432, + "step": 87500 + }, + { + "epoch": 2.1377617081572327, + "grad_norm": 6.88958740234375, + "learning_rate": 1.4145474368471124e-06, + "loss": 0.0014, + "num_input_tokens_seen": 58999464, + "step": 87505 + }, + { + "epoch": 2.13788385898908, + "grad_norm": 0.01789010316133499, + "learning_rate": 1.4144698303617782e-06, + "loss": 0.0005, + "num_input_tokens_seen": 59002472, + "step": 87510 + }, + { + "epoch": 2.138006009820927, + "grad_norm": 0.26290953159332275, + "learning_rate": 1.4143922208623078e-06, + "loss": 0.0044, + "num_input_tokens_seen": 59005352, + "step": 87515 + }, + { + "epoch": 2.1381281606527742, + "grad_norm": 0.011401143856346607, + "learning_rate": 1.4143146083492656e-06, + "loss": 0.0865, + "num_input_tokens_seen": 59008808, + "step": 87520 + }, + { + "epoch": 2.1382503114846214, + "grad_norm": 0.038983121514320374, + "learning_rate": 1.4142369928232164e-06, + "loss": 0.0002, + "num_input_tokens_seen": 59012136, + "step": 87525 + }, + { + "epoch": 2.138372462316468, + "grad_norm": 0.15900737047195435, + "learning_rate": 1.414159374284724e-06, + "loss": 0.0601, + "num_input_tokens_seen": 59015656, + "step": 87530 + }, + { + "epoch": 2.1384946131483153, + "grad_norm": 0.14519399404525757, + "learning_rate": 1.4140817527343534e-06, + "loss": 0.0923, + "num_input_tokens_seen": 59019304, + "step": 87535 + }, + { + "epoch": 2.1386167639801625, + "grad_norm": 0.28216826915740967, + "learning_rate": 1.4140041281726686e-06, + "loss": 0.0002, + "num_input_tokens_seen": 59022696, + "step": 87540 + }, + { + "epoch": 2.1387389148120097, + "grad_norm": 19.394559860229492, + "learning_rate": 1.413926500600235e-06, + "loss": 0.0358, + "num_input_tokens_seen": 59026280, + "step": 87545 + }, + { + "epoch": 2.138861065643857, + "grad_norm": 0.026544447988271713, + "learning_rate": 1.4138488700176163e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59029736, + "step": 87550 + }, + { + "epoch": 2.138983216475704, + "grad_norm": 0.17702056467533112, + "learning_rate": 1.4137712364253774e-06, + "loss": 0.0029, + "num_input_tokens_seen": 59033192, + "step": 87555 + }, + { + "epoch": 2.1391053673075513, + "grad_norm": 0.13731959462165833, + "learning_rate": 1.4136935998240827e-06, + "loss": 0.0051, + "num_input_tokens_seen": 59036072, + "step": 87560 + }, + { + "epoch": 2.1392275181393985, + "grad_norm": 0.007630325388163328, + "learning_rate": 1.413615960214297e-06, + "loss": 0.0003, + "num_input_tokens_seen": 59039144, + "step": 87565 + }, + { + "epoch": 2.1393496689712457, + "grad_norm": 28.514596939086914, + "learning_rate": 1.4135383175965844e-06, + "loss": 0.0824, + "num_input_tokens_seen": 59042792, + "step": 87570 + }, + { + "epoch": 2.139471819803093, + "grad_norm": 0.005110455676913261, + "learning_rate": 1.41346067197151e-06, + "loss": 0.0279, + "num_input_tokens_seen": 59045800, + "step": 87575 + }, + { + "epoch": 2.13959397063494, + "grad_norm": 41.230953216552734, + "learning_rate": 1.4133830233396386e-06, + "loss": 0.0739, + "num_input_tokens_seen": 59049000, + "step": 87580 + }, + { + "epoch": 2.1397161214667872, + "grad_norm": 0.053071048110723495, + "learning_rate": 1.413305371701535e-06, + "loss": 0.0615, + "num_input_tokens_seen": 59052136, + "step": 87585 + }, + { + "epoch": 2.1398382722986344, + "grad_norm": 0.1270586997270584, + "learning_rate": 1.413227717057763e-06, + "loss": 0.0559, + "num_input_tokens_seen": 59055400, + "step": 87590 + }, + { + "epoch": 2.1399604231304816, + "grad_norm": 0.13777196407318115, + "learning_rate": 1.413150059408888e-06, + "loss": 0.0003, + "num_input_tokens_seen": 59058536, + "step": 87595 + }, + { + "epoch": 2.140082573962329, + "grad_norm": 0.22946496307849884, + "learning_rate": 1.4130723987554747e-06, + "loss": 0.0394, + "num_input_tokens_seen": 59061416, + "step": 87600 + }, + { + "epoch": 2.140204724794176, + "grad_norm": 164.1780242919922, + "learning_rate": 1.4129947350980878e-06, + "loss": 0.121, + "num_input_tokens_seen": 59064488, + "step": 87605 + }, + { + "epoch": 2.140326875626023, + "grad_norm": 0.3987724184989929, + "learning_rate": 1.4129170684372921e-06, + "loss": 0.076, + "num_input_tokens_seen": 59067880, + "step": 87610 + }, + { + "epoch": 2.1404490264578704, + "grad_norm": 0.3457280993461609, + "learning_rate": 1.4128393987736526e-06, + "loss": 0.0013, + "num_input_tokens_seen": 59070888, + "step": 87615 + }, + { + "epoch": 2.1405711772897176, + "grad_norm": 0.03645173832774162, + "learning_rate": 1.412761726107734e-06, + "loss": 0.0106, + "num_input_tokens_seen": 59073960, + "step": 87620 + }, + { + "epoch": 2.1406933281215643, + "grad_norm": 0.3854512870311737, + "learning_rate": 1.412684050440101e-06, + "loss": 0.0318, + "num_input_tokens_seen": 59077416, + "step": 87625 + }, + { + "epoch": 2.1408154789534115, + "grad_norm": 0.014370037242770195, + "learning_rate": 1.4126063717713183e-06, + "loss": 0.0445, + "num_input_tokens_seen": 59080680, + "step": 87630 + }, + { + "epoch": 2.1409376297852587, + "grad_norm": 0.035824961960315704, + "learning_rate": 1.4125286901019513e-06, + "loss": 0.0002, + "num_input_tokens_seen": 59084008, + "step": 87635 + }, + { + "epoch": 2.141059780617106, + "grad_norm": 0.15429726243019104, + "learning_rate": 1.4124510054325648e-06, + "loss": 0.0002, + "num_input_tokens_seen": 59087272, + "step": 87640 + }, + { + "epoch": 2.141181931448953, + "grad_norm": 0.05572713911533356, + "learning_rate": 1.4123733177637236e-06, + "loss": 0.0414, + "num_input_tokens_seen": 59090920, + "step": 87645 + }, + { + "epoch": 2.1413040822808003, + "grad_norm": 0.38019487261772156, + "learning_rate": 1.4122956270959927e-06, + "loss": 0.0004, + "num_input_tokens_seen": 59094440, + "step": 87650 + }, + { + "epoch": 2.1414262331126475, + "grad_norm": 0.1866544634103775, + "learning_rate": 1.412217933429937e-06, + "loss": 0.0589, + "num_input_tokens_seen": 59097704, + "step": 87655 + }, + { + "epoch": 2.1415483839444946, + "grad_norm": 0.01762351393699646, + "learning_rate": 1.4121402367661217e-06, + "loss": 0.0003, + "num_input_tokens_seen": 59100712, + "step": 87660 + }, + { + "epoch": 2.141670534776342, + "grad_norm": 0.14558862149715424, + "learning_rate": 1.4120625371051119e-06, + "loss": 0.0344, + "num_input_tokens_seen": 59104040, + "step": 87665 + }, + { + "epoch": 2.141792685608189, + "grad_norm": 0.004253696650266647, + "learning_rate": 1.4119848344474723e-06, + "loss": 0.0003, + "num_input_tokens_seen": 59107432, + "step": 87670 + }, + { + "epoch": 2.141914836440036, + "grad_norm": 0.03200583904981613, + "learning_rate": 1.4119071287937683e-06, + "loss": 0.0582, + "num_input_tokens_seen": 59110888, + "step": 87675 + }, + { + "epoch": 2.1420369872718834, + "grad_norm": 0.024416061118245125, + "learning_rate": 1.4118294201445648e-06, + "loss": 0.0766, + "num_input_tokens_seen": 59114024, + "step": 87680 + }, + { + "epoch": 2.1421591381037306, + "grad_norm": 0.23416262865066528, + "learning_rate": 1.411751708500427e-06, + "loss": 0.0003, + "num_input_tokens_seen": 59117160, + "step": 87685 + }, + { + "epoch": 2.142281288935578, + "grad_norm": 0.008143766783177853, + "learning_rate": 1.41167399386192e-06, + "loss": 0.0003, + "num_input_tokens_seen": 59120360, + "step": 87690 + }, + { + "epoch": 2.142403439767425, + "grad_norm": 0.030633976683020592, + "learning_rate": 1.4115962762296088e-06, + "loss": 0.0321, + "num_input_tokens_seen": 59123496, + "step": 87695 + }, + { + "epoch": 2.142525590599272, + "grad_norm": 0.056163497269153595, + "learning_rate": 1.411518555604059e-06, + "loss": 0.001, + "num_input_tokens_seen": 59126952, + "step": 87700 + }, + { + "epoch": 2.1426477414311194, + "grad_norm": 0.0033839961979538202, + "learning_rate": 1.4114408319858355e-06, + "loss": 0.0225, + "num_input_tokens_seen": 59130920, + "step": 87705 + }, + { + "epoch": 2.142769892262966, + "grad_norm": 633.6400756835938, + "learning_rate": 1.4113631053755037e-06, + "loss": 0.0648, + "num_input_tokens_seen": 59133992, + "step": 87710 + }, + { + "epoch": 2.1428920430948133, + "grad_norm": 14.898810386657715, + "learning_rate": 1.4112853757736288e-06, + "loss": 0.0513, + "num_input_tokens_seen": 59137448, + "step": 87715 + }, + { + "epoch": 2.1430141939266605, + "grad_norm": 1.018584132194519, + "learning_rate": 1.411207643180776e-06, + "loss": 0.0005, + "num_input_tokens_seen": 59140712, + "step": 87720 + }, + { + "epoch": 2.1431363447585077, + "grad_norm": 0.012547546997666359, + "learning_rate": 1.4111299075975103e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59143976, + "step": 87725 + }, + { + "epoch": 2.143258495590355, + "grad_norm": 0.04866180196404457, + "learning_rate": 1.4110521690243977e-06, + "loss": 0.0377, + "num_input_tokens_seen": 59147944, + "step": 87730 + }, + { + "epoch": 2.143380646422202, + "grad_norm": 0.005499622318893671, + "learning_rate": 1.4109744274620031e-06, + "loss": 0.0271, + "num_input_tokens_seen": 59151272, + "step": 87735 + }, + { + "epoch": 2.1435027972540492, + "grad_norm": 0.0666104257106781, + "learning_rate": 1.410896682910892e-06, + "loss": 0.0003, + "num_input_tokens_seen": 59154792, + "step": 87740 + }, + { + "epoch": 2.1436249480858964, + "grad_norm": 0.014532984234392643, + "learning_rate": 1.4108189353716292e-06, + "loss": 0.0006, + "num_input_tokens_seen": 59158184, + "step": 87745 + }, + { + "epoch": 2.1437470989177436, + "grad_norm": 0.028684677556157112, + "learning_rate": 1.4107411848447813e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59161512, + "step": 87750 + }, + { + "epoch": 2.143869249749591, + "grad_norm": 26.549978256225586, + "learning_rate": 1.4106634313309124e-06, + "loss": 0.0964, + "num_input_tokens_seen": 59164456, + "step": 87755 + }, + { + "epoch": 2.143991400581438, + "grad_norm": 67.54285430908203, + "learning_rate": 1.4105856748305889e-06, + "loss": 0.1318, + "num_input_tokens_seen": 59167784, + "step": 87760 + }, + { + "epoch": 2.144113551413285, + "grad_norm": 0.00807912740856409, + "learning_rate": 1.410507915344376e-06, + "loss": 0.0, + "num_input_tokens_seen": 59170792, + "step": 87765 + }, + { + "epoch": 2.1442357022451324, + "grad_norm": 0.05754079297184944, + "learning_rate": 1.4104301528728393e-06, + "loss": 0.0351, + "num_input_tokens_seen": 59174440, + "step": 87770 + }, + { + "epoch": 2.1443578530769796, + "grad_norm": 0.09071355313062668, + "learning_rate": 1.410352387416544e-06, + "loss": 0.049, + "num_input_tokens_seen": 59178216, + "step": 87775 + }, + { + "epoch": 2.1444800039088268, + "grad_norm": 0.04613567888736725, + "learning_rate": 1.4102746189760555e-06, + "loss": 0.0053, + "num_input_tokens_seen": 59181736, + "step": 87780 + }, + { + "epoch": 2.144602154740674, + "grad_norm": 0.14142712950706482, + "learning_rate": 1.4101968475519398e-06, + "loss": 0.0006, + "num_input_tokens_seen": 59185512, + "step": 87785 + }, + { + "epoch": 2.144724305572521, + "grad_norm": 0.06886684149503708, + "learning_rate": 1.410119073144762e-06, + "loss": 0.0002, + "num_input_tokens_seen": 59189096, + "step": 87790 + }, + { + "epoch": 2.144846456404368, + "grad_norm": 0.0034576309844851494, + "learning_rate": 1.4100412957550884e-06, + "loss": 0.0262, + "num_input_tokens_seen": 59192296, + "step": 87795 + }, + { + "epoch": 2.144968607236215, + "grad_norm": 0.015819577500224113, + "learning_rate": 1.4099635153834842e-06, + "loss": 0.0, + "num_input_tokens_seen": 59195560, + "step": 87800 + }, + { + "epoch": 2.1450907580680623, + "grad_norm": 0.0029255012050271034, + "learning_rate": 1.409885732030515e-06, + "loss": 0.0383, + "num_input_tokens_seen": 59198504, + "step": 87805 + }, + { + "epoch": 2.1452129088999095, + "grad_norm": 0.021271195262670517, + "learning_rate": 1.4098079456967462e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59202152, + "step": 87810 + }, + { + "epoch": 2.1453350597317566, + "grad_norm": 0.294455349445343, + "learning_rate": 1.4097301563827443e-06, + "loss": 0.0004, + "num_input_tokens_seen": 59205480, + "step": 87815 + }, + { + "epoch": 2.145457210563604, + "grad_norm": 0.002129147993400693, + "learning_rate": 1.409652364089074e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59208488, + "step": 87820 + }, + { + "epoch": 2.145579361395451, + "grad_norm": 0.0035055570770055056, + "learning_rate": 1.4095745688163016e-06, + "loss": 0.073, + "num_input_tokens_seen": 59211752, + "step": 87825 + }, + { + "epoch": 2.145701512227298, + "grad_norm": 0.0019378452561795712, + "learning_rate": 1.4094967705649932e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59214888, + "step": 87830 + }, + { + "epoch": 2.1458236630591454, + "grad_norm": 0.006830559112131596, + "learning_rate": 1.4094189693357138e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59218152, + "step": 87835 + }, + { + "epoch": 2.1459458138909926, + "grad_norm": 0.024698445573449135, + "learning_rate": 1.4093411651290295e-06, + "loss": 0.0577, + "num_input_tokens_seen": 59221736, + "step": 87840 + }, + { + "epoch": 2.14606796472284, + "grad_norm": 0.009774798527359962, + "learning_rate": 1.4092633579455062e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59224936, + "step": 87845 + }, + { + "epoch": 2.146190115554687, + "grad_norm": 0.0053397067822515965, + "learning_rate": 1.4091855477857099e-06, + "loss": 0.1333, + "num_input_tokens_seen": 59228200, + "step": 87850 + }, + { + "epoch": 2.146312266386534, + "grad_norm": 0.003685411997139454, + "learning_rate": 1.4091077346502059e-06, + "loss": 0.0534, + "num_input_tokens_seen": 59231592, + "step": 87855 + }, + { + "epoch": 2.1464344172183814, + "grad_norm": 0.5126121640205383, + "learning_rate": 1.4090299185395607e-06, + "loss": 0.0003, + "num_input_tokens_seen": 59235112, + "step": 87860 + }, + { + "epoch": 2.1465565680502285, + "grad_norm": 0.0015000292332842946, + "learning_rate": 1.4089520994543395e-06, + "loss": 0.0397, + "num_input_tokens_seen": 59239016, + "step": 87865 + }, + { + "epoch": 2.1466787188820757, + "grad_norm": 0.06686501950025558, + "learning_rate": 1.408874277395109e-06, + "loss": 0.0015, + "num_input_tokens_seen": 59242344, + "step": 87870 + }, + { + "epoch": 2.146800869713923, + "grad_norm": 0.029950261116027832, + "learning_rate": 1.4087964523624352e-06, + "loss": 0.0439, + "num_input_tokens_seen": 59246312, + "step": 87875 + }, + { + "epoch": 2.14692302054577, + "grad_norm": 178.24267578125, + "learning_rate": 1.408718624356883e-06, + "loss": 0.1187, + "num_input_tokens_seen": 59249896, + "step": 87880 + }, + { + "epoch": 2.1470451713776173, + "grad_norm": 0.7557882070541382, + "learning_rate": 1.4086407933790189e-06, + "loss": 0.0004, + "num_input_tokens_seen": 59255080, + "step": 87885 + }, + { + "epoch": 2.147167322209464, + "grad_norm": 0.2542136609554291, + "learning_rate": 1.4085629594294094e-06, + "loss": 0.0009, + "num_input_tokens_seen": 59258216, + "step": 87890 + }, + { + "epoch": 2.1472894730413112, + "grad_norm": 95.09031677246094, + "learning_rate": 1.4084851225086204e-06, + "loss": 0.0389, + "num_input_tokens_seen": 59261352, + "step": 87895 + }, + { + "epoch": 2.1474116238731584, + "grad_norm": 0.16496166586875916, + "learning_rate": 1.4084072826172171e-06, + "loss": 0.0003, + "num_input_tokens_seen": 59264296, + "step": 87900 + }, + { + "epoch": 2.1475337747050056, + "grad_norm": 0.011495031416416168, + "learning_rate": 1.4083294397557665e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59268072, + "step": 87905 + }, + { + "epoch": 2.147655925536853, + "grad_norm": 0.06308521330356598, + "learning_rate": 1.4082515939248342e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59271208, + "step": 87910 + }, + { + "epoch": 2.1477780763687, + "grad_norm": 0.0014856046764180064, + "learning_rate": 1.4081737451249868e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59274216, + "step": 87915 + }, + { + "epoch": 2.147900227200547, + "grad_norm": 0.0024500424042344093, + "learning_rate": 1.4080958933567901e-06, + "loss": 0.0, + "num_input_tokens_seen": 59277864, + "step": 87920 + }, + { + "epoch": 2.1480223780323944, + "grad_norm": 0.024505984038114548, + "learning_rate": 1.4080180386208105e-06, + "loss": 0.0003, + "num_input_tokens_seen": 59281384, + "step": 87925 + }, + { + "epoch": 2.1481445288642416, + "grad_norm": 0.012265844270586967, + "learning_rate": 1.4079401809176136e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59285096, + "step": 87930 + }, + { + "epoch": 2.1482666796960888, + "grad_norm": 0.006017817184329033, + "learning_rate": 1.4078623202477662e-06, + "loss": 0.0002, + "num_input_tokens_seen": 59288552, + "step": 87935 + }, + { + "epoch": 2.148388830527936, + "grad_norm": 0.29101741313934326, + "learning_rate": 1.407784456611834e-06, + "loss": 0.0002, + "num_input_tokens_seen": 59291752, + "step": 87940 + }, + { + "epoch": 2.148510981359783, + "grad_norm": 0.03301127254962921, + "learning_rate": 1.4077065900103836e-06, + "loss": 0.0, + "num_input_tokens_seen": 59295400, + "step": 87945 + }, + { + "epoch": 2.1486331321916303, + "grad_norm": 0.001643263385631144, + "learning_rate": 1.4076287204439817e-06, + "loss": 0.0437, + "num_input_tokens_seen": 59298728, + "step": 87950 + }, + { + "epoch": 2.1487552830234775, + "grad_norm": 0.039305005222558975, + "learning_rate": 1.4075508479131936e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59302376, + "step": 87955 + }, + { + "epoch": 2.1488774338553247, + "grad_norm": 56.951690673828125, + "learning_rate": 1.4074729724185864e-06, + "loss": 0.0992, + "num_input_tokens_seen": 59305384, + "step": 87960 + }, + { + "epoch": 2.148999584687172, + "grad_norm": 0.00010884566290769726, + "learning_rate": 1.407395093960726e-06, + "loss": 0.0002, + "num_input_tokens_seen": 59309032, + "step": 87965 + }, + { + "epoch": 2.149121735519019, + "grad_norm": 44.920066833496094, + "learning_rate": 1.4073172125401792e-06, + "loss": 0.1488, + "num_input_tokens_seen": 59312552, + "step": 87970 + }, + { + "epoch": 2.149243886350866, + "grad_norm": 0.010293465107679367, + "learning_rate": 1.4072393281575117e-06, + "loss": 0.0, + "num_input_tokens_seen": 59316264, + "step": 87975 + }, + { + "epoch": 2.149366037182713, + "grad_norm": 27.4898624420166, + "learning_rate": 1.4071614408132903e-06, + "loss": 0.1237, + "num_input_tokens_seen": 59319592, + "step": 87980 + }, + { + "epoch": 2.14948818801456, + "grad_norm": 0.035651687532663345, + "learning_rate": 1.4070835505080816e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59322920, + "step": 87985 + }, + { + "epoch": 2.1496103388464074, + "grad_norm": 699.4857788085938, + "learning_rate": 1.4070056572424519e-06, + "loss": 0.0455, + "num_input_tokens_seen": 59326056, + "step": 87990 + }, + { + "epoch": 2.1497324896782546, + "grad_norm": 0.002766907447949052, + "learning_rate": 1.4069277610169672e-06, + "loss": 0.0002, + "num_input_tokens_seen": 59330792, + "step": 87995 + }, + { + "epoch": 2.1498546405101018, + "grad_norm": 0.7083733677864075, + "learning_rate": 1.4068498618321946e-06, + "loss": 0.0004, + "num_input_tokens_seen": 59333736, + "step": 88000 + }, + { + "epoch": 2.149976791341949, + "grad_norm": 0.010112437419593334, + "learning_rate": 1.4067719596887003e-06, + "loss": 0.0963, + "num_input_tokens_seen": 59337128, + "step": 88005 + }, + { + "epoch": 2.150098942173796, + "grad_norm": 0.00013781416055280715, + "learning_rate": 1.4066940545870506e-06, + "loss": 0.0003, + "num_input_tokens_seen": 59340584, + "step": 88010 + }, + { + "epoch": 2.1502210930056433, + "grad_norm": 0.02945570833981037, + "learning_rate": 1.406616146527813e-06, + "loss": 0.0193, + "num_input_tokens_seen": 59343848, + "step": 88015 + }, + { + "epoch": 2.1503432438374905, + "grad_norm": 0.03614374250173569, + "learning_rate": 1.4065382355115532e-06, + "loss": 0.026, + "num_input_tokens_seen": 59347240, + "step": 88020 + }, + { + "epoch": 2.1504653946693377, + "grad_norm": 0.17033955454826355, + "learning_rate": 1.4064603215388378e-06, + "loss": 0.0296, + "num_input_tokens_seen": 59351016, + "step": 88025 + }, + { + "epoch": 2.150587545501185, + "grad_norm": 0.009592707268893719, + "learning_rate": 1.4063824046102338e-06, + "loss": 0.0935, + "num_input_tokens_seen": 59354536, + "step": 88030 + }, + { + "epoch": 2.150709696333032, + "grad_norm": 0.008734858594834805, + "learning_rate": 1.4063044847263074e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59357544, + "step": 88035 + }, + { + "epoch": 2.1508318471648793, + "grad_norm": 0.04149794578552246, + "learning_rate": 1.4062265618876258e-06, + "loss": 0.1847, + "num_input_tokens_seen": 59360808, + "step": 88040 + }, + { + "epoch": 2.1509539979967265, + "grad_norm": 0.012698042206466198, + "learning_rate": 1.4061486360947555e-06, + "loss": 0.0882, + "num_input_tokens_seen": 59363880, + "step": 88045 + }, + { + "epoch": 2.1510761488285737, + "grad_norm": 0.13717693090438843, + "learning_rate": 1.4060707073482628e-06, + "loss": 0.0865, + "num_input_tokens_seen": 59367144, + "step": 88050 + }, + { + "epoch": 2.151198299660421, + "grad_norm": 0.03378063067793846, + "learning_rate": 1.4059927756487147e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59370664, + "step": 88055 + }, + { + "epoch": 2.151320450492268, + "grad_norm": 0.011890546418726444, + "learning_rate": 1.4059148409966778e-06, + "loss": 0.0487, + "num_input_tokens_seen": 59373928, + "step": 88060 + }, + { + "epoch": 2.1514426013241152, + "grad_norm": 0.07420341670513153, + "learning_rate": 1.405836903392719e-06, + "loss": 0.0006, + "num_input_tokens_seen": 59377064, + "step": 88065 + }, + { + "epoch": 2.151564752155962, + "grad_norm": 0.07378300279378891, + "learning_rate": 1.4057589628374053e-06, + "loss": 0.0003, + "num_input_tokens_seen": 59380776, + "step": 88070 + }, + { + "epoch": 2.151686902987809, + "grad_norm": 0.03141964226961136, + "learning_rate": 1.4056810193313034e-06, + "loss": 0.0338, + "num_input_tokens_seen": 59383720, + "step": 88075 + }, + { + "epoch": 2.1518090538196564, + "grad_norm": 0.052101925015449524, + "learning_rate": 1.40560307287498e-06, + "loss": 0.0002, + "num_input_tokens_seen": 59386792, + "step": 88080 + }, + { + "epoch": 2.1519312046515036, + "grad_norm": 0.031124841421842575, + "learning_rate": 1.405525123469002e-06, + "loss": 0.0005, + "num_input_tokens_seen": 59389800, + "step": 88085 + }, + { + "epoch": 2.1520533554833507, + "grad_norm": 0.329045832157135, + "learning_rate": 1.405447171113936e-06, + "loss": 0.0629, + "num_input_tokens_seen": 59392936, + "step": 88090 + }, + { + "epoch": 2.152175506315198, + "grad_norm": 0.007800894323736429, + "learning_rate": 1.405369215810349e-06, + "loss": 0.0463, + "num_input_tokens_seen": 59396008, + "step": 88095 + }, + { + "epoch": 2.152297657147045, + "grad_norm": 0.04379410296678543, + "learning_rate": 1.405291257558808e-06, + "loss": 0.0004, + "num_input_tokens_seen": 59399208, + "step": 88100 + }, + { + "epoch": 2.1524198079788923, + "grad_norm": 0.1042783185839653, + "learning_rate": 1.4052132963598804e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59403048, + "step": 88105 + }, + { + "epoch": 2.1525419588107395, + "grad_norm": 57.833778381347656, + "learning_rate": 1.4051353322141324e-06, + "loss": 0.0848, + "num_input_tokens_seen": 59406312, + "step": 88110 + }, + { + "epoch": 2.1526641096425867, + "grad_norm": 0.002779137110337615, + "learning_rate": 1.4050573651221313e-06, + "loss": 0.0654, + "num_input_tokens_seen": 59409512, + "step": 88115 + }, + { + "epoch": 2.152786260474434, + "grad_norm": 0.007450256962329149, + "learning_rate": 1.404979395084444e-06, + "loss": 0.0005, + "num_input_tokens_seen": 59412712, + "step": 88120 + }, + { + "epoch": 2.152908411306281, + "grad_norm": 0.028415225446224213, + "learning_rate": 1.404901422101638e-06, + "loss": 0.0003, + "num_input_tokens_seen": 59415976, + "step": 88125 + }, + { + "epoch": 2.1530305621381283, + "grad_norm": 20.51732063293457, + "learning_rate": 1.4048234461742798e-06, + "loss": 0.0564, + "num_input_tokens_seen": 59418984, + "step": 88130 + }, + { + "epoch": 2.1531527129699755, + "grad_norm": 3.080681324005127, + "learning_rate": 1.4047454673029366e-06, + "loss": 0.0007, + "num_input_tokens_seen": 59422440, + "step": 88135 + }, + { + "epoch": 2.1532748638018226, + "grad_norm": 0.10545322299003601, + "learning_rate": 1.4046674854881756e-06, + "loss": 0.1477, + "num_input_tokens_seen": 59425704, + "step": 88140 + }, + { + "epoch": 2.15339701463367, + "grad_norm": 0.01797093264758587, + "learning_rate": 1.4045895007305635e-06, + "loss": 0.0002, + "num_input_tokens_seen": 59428712, + "step": 88145 + }, + { + "epoch": 2.153519165465517, + "grad_norm": 0.0754646360874176, + "learning_rate": 1.4045115130306679e-06, + "loss": 0.0002, + "num_input_tokens_seen": 59431720, + "step": 88150 + }, + { + "epoch": 2.1536413162973638, + "grad_norm": 0.01984359510242939, + "learning_rate": 1.4044335223890557e-06, + "loss": 0.0198, + "num_input_tokens_seen": 59435048, + "step": 88155 + }, + { + "epoch": 2.153763467129211, + "grad_norm": 0.004418569151312113, + "learning_rate": 1.4043555288062941e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59438760, + "step": 88160 + }, + { + "epoch": 2.153885617961058, + "grad_norm": 11.380366325378418, + "learning_rate": 1.4042775322829506e-06, + "loss": 0.0004, + "num_input_tokens_seen": 59441896, + "step": 88165 + }, + { + "epoch": 2.1540077687929053, + "grad_norm": 0.07178980112075806, + "learning_rate": 1.4041995328195919e-06, + "loss": 0.0868, + "num_input_tokens_seen": 59445224, + "step": 88170 + }, + { + "epoch": 2.1541299196247525, + "grad_norm": 0.0017138965195044875, + "learning_rate": 1.4041215304167855e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59448104, + "step": 88175 + }, + { + "epoch": 2.1542520704565997, + "grad_norm": 0.012803579680621624, + "learning_rate": 1.4040435250750988e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59451688, + "step": 88180 + }, + { + "epoch": 2.154374221288447, + "grad_norm": 0.008812888525426388, + "learning_rate": 1.4039655167950987e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59454760, + "step": 88185 + }, + { + "epoch": 2.154496372120294, + "grad_norm": 5.979305267333984, + "learning_rate": 1.403887505577353e-06, + "loss": 0.0007, + "num_input_tokens_seen": 59458024, + "step": 88190 + }, + { + "epoch": 2.1546185229521413, + "grad_norm": 16.921903610229492, + "learning_rate": 1.4038094914224285e-06, + "loss": 0.0656, + "num_input_tokens_seen": 59461288, + "step": 88195 + }, + { + "epoch": 2.1547406737839885, + "grad_norm": 0.007237650454044342, + "learning_rate": 1.4037314743308928e-06, + "loss": 0.0229, + "num_input_tokens_seen": 59465000, + "step": 88200 + }, + { + "epoch": 2.1548628246158357, + "grad_norm": 0.024005282670259476, + "learning_rate": 1.4036534543033133e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59468264, + "step": 88205 + }, + { + "epoch": 2.154984975447683, + "grad_norm": 0.01715189404785633, + "learning_rate": 1.4035754313402573e-06, + "loss": 0.0668, + "num_input_tokens_seen": 59471848, + "step": 88210 + }, + { + "epoch": 2.15510712627953, + "grad_norm": 0.004532475955784321, + "learning_rate": 1.403497405442292e-06, + "loss": 0.2547, + "num_input_tokens_seen": 59475304, + "step": 88215 + }, + { + "epoch": 2.1552292771113772, + "grad_norm": 1.9837899208068848, + "learning_rate": 1.403419376609985e-06, + "loss": 0.0004, + "num_input_tokens_seen": 59478696, + "step": 88220 + }, + { + "epoch": 2.1553514279432244, + "grad_norm": 0.04568152502179146, + "learning_rate": 1.4033413448439042e-06, + "loss": 0.0002, + "num_input_tokens_seen": 59482216, + "step": 88225 + }, + { + "epoch": 2.1554735787750716, + "grad_norm": 0.12637126445770264, + "learning_rate": 1.4032633101446166e-06, + "loss": 0.0004, + "num_input_tokens_seen": 59485672, + "step": 88230 + }, + { + "epoch": 2.155595729606919, + "grad_norm": 0.05594087392091751, + "learning_rate": 1.4031852725126897e-06, + "loss": 0.0825, + "num_input_tokens_seen": 59488808, + "step": 88235 + }, + { + "epoch": 2.1557178804387656, + "grad_norm": 0.010974193923175335, + "learning_rate": 1.4031072319486907e-06, + "loss": 0.1709, + "num_input_tokens_seen": 59492072, + "step": 88240 + }, + { + "epoch": 2.155840031270613, + "grad_norm": 0.011812552809715271, + "learning_rate": 1.403029188453188e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59495976, + "step": 88245 + }, + { + "epoch": 2.15596218210246, + "grad_norm": 0.008403713814914227, + "learning_rate": 1.4029511420267484e-06, + "loss": 0.0004, + "num_input_tokens_seen": 59499240, + "step": 88250 + }, + { + "epoch": 2.156084332934307, + "grad_norm": 0.17212870717048645, + "learning_rate": 1.4028730926699395e-06, + "loss": 0.0003, + "num_input_tokens_seen": 59502184, + "step": 88255 + }, + { + "epoch": 2.1562064837661543, + "grad_norm": 0.020884985104203224, + "learning_rate": 1.4027950403833294e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59506024, + "step": 88260 + }, + { + "epoch": 2.1563286345980015, + "grad_norm": 0.03501040115952492, + "learning_rate": 1.4027169851674851e-06, + "loss": 0.0002, + "num_input_tokens_seen": 59509032, + "step": 88265 + }, + { + "epoch": 2.1564507854298487, + "grad_norm": 0.7422922849655151, + "learning_rate": 1.402638927022975e-06, + "loss": 0.0009, + "num_input_tokens_seen": 59512488, + "step": 88270 + }, + { + "epoch": 2.156572936261696, + "grad_norm": 0.024366816505789757, + "learning_rate": 1.402560865950366e-06, + "loss": 0.0002, + "num_input_tokens_seen": 59515816, + "step": 88275 + }, + { + "epoch": 2.156695087093543, + "grad_norm": 0.0642450600862503, + "learning_rate": 1.4024828019502262e-06, + "loss": 0.0007, + "num_input_tokens_seen": 59519016, + "step": 88280 + }, + { + "epoch": 2.1568172379253903, + "grad_norm": 75.12712097167969, + "learning_rate": 1.4024047350231234e-06, + "loss": 0.0403, + "num_input_tokens_seen": 59522024, + "step": 88285 + }, + { + "epoch": 2.1569393887572375, + "grad_norm": 0.1974213570356369, + "learning_rate": 1.4023266651696249e-06, + "loss": 0.0002, + "num_input_tokens_seen": 59525224, + "step": 88290 + }, + { + "epoch": 2.1570615395890846, + "grad_norm": 0.040102358907461166, + "learning_rate": 1.4022485923902988e-06, + "loss": 0.0436, + "num_input_tokens_seen": 59528424, + "step": 88295 + }, + { + "epoch": 2.157183690420932, + "grad_norm": 0.014993073418736458, + "learning_rate": 1.4021705166857126e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59532072, + "step": 88300 + }, + { + "epoch": 2.157305841252779, + "grad_norm": 0.008291895501315594, + "learning_rate": 1.4020924380564342e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59535208, + "step": 88305 + }, + { + "epoch": 2.157427992084626, + "grad_norm": 303.73114013671875, + "learning_rate": 1.4020143565030318e-06, + "loss": 0.0055, + "num_input_tokens_seen": 59538536, + "step": 88310 + }, + { + "epoch": 2.1575501429164734, + "grad_norm": 103.22540283203125, + "learning_rate": 1.4019362720260723e-06, + "loss": 0.0424, + "num_input_tokens_seen": 59541544, + "step": 88315 + }, + { + "epoch": 2.1576722937483206, + "grad_norm": 0.002675070893019438, + "learning_rate": 1.4018581846261246e-06, + "loss": 0.0404, + "num_input_tokens_seen": 59545128, + "step": 88320 + }, + { + "epoch": 2.157794444580168, + "grad_norm": 0.10408946126699448, + "learning_rate": 1.4017800943037558e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59548712, + "step": 88325 + }, + { + "epoch": 2.157916595412015, + "grad_norm": 0.07306545227766037, + "learning_rate": 1.4017020010595344e-06, + "loss": 0.0377, + "num_input_tokens_seen": 59551912, + "step": 88330 + }, + { + "epoch": 2.1580387462438617, + "grad_norm": 0.04442272335290909, + "learning_rate": 1.401623904894028e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59554984, + "step": 88335 + }, + { + "epoch": 2.158160897075709, + "grad_norm": 48.30341720581055, + "learning_rate": 1.4015458058078042e-06, + "loss": 0.049, + "num_input_tokens_seen": 59558312, + "step": 88340 + }, + { + "epoch": 2.158283047907556, + "grad_norm": 0.021329158917069435, + "learning_rate": 1.4014677038014315e-06, + "loss": 0.0583, + "num_input_tokens_seen": 59561448, + "step": 88345 + }, + { + "epoch": 2.1584051987394033, + "grad_norm": 0.03582082316279411, + "learning_rate": 1.4013895988754776e-06, + "loss": 0.0027, + "num_input_tokens_seen": 59564264, + "step": 88350 + }, + { + "epoch": 2.1585273495712505, + "grad_norm": 0.03441407531499863, + "learning_rate": 1.4013114910305107e-06, + "loss": 0.0367, + "num_input_tokens_seen": 59567848, + "step": 88355 + }, + { + "epoch": 2.1586495004030977, + "grad_norm": 0.009749101474881172, + "learning_rate": 1.4012333802670985e-06, + "loss": 0.0436, + "num_input_tokens_seen": 59572136, + "step": 88360 + }, + { + "epoch": 2.158771651234945, + "grad_norm": 0.00150212156586349, + "learning_rate": 1.4011552665858094e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59575784, + "step": 88365 + }, + { + "epoch": 2.158893802066792, + "grad_norm": 0.0041437577456235886, + "learning_rate": 1.4010771499872114e-06, + "loss": 0.0019, + "num_input_tokens_seen": 59579112, + "step": 88370 + }, + { + "epoch": 2.1590159528986392, + "grad_norm": 0.01603412814438343, + "learning_rate": 1.4009990304718722e-06, + "loss": 0.056, + "num_input_tokens_seen": 59582184, + "step": 88375 + }, + { + "epoch": 2.1591381037304864, + "grad_norm": 0.05813143774867058, + "learning_rate": 1.4009209080403603e-06, + "loss": 0.0347, + "num_input_tokens_seen": 59584936, + "step": 88380 + }, + { + "epoch": 2.1592602545623336, + "grad_norm": 0.04797271639108658, + "learning_rate": 1.400842782693244e-06, + "loss": 0.0294, + "num_input_tokens_seen": 59588200, + "step": 88385 + }, + { + "epoch": 2.159382405394181, + "grad_norm": 14.291007041931152, + "learning_rate": 1.4007646544310912e-06, + "loss": 0.0552, + "num_input_tokens_seen": 59591528, + "step": 88390 + }, + { + "epoch": 2.159504556226028, + "grad_norm": 0.10813934355974197, + "learning_rate": 1.4006865232544696e-06, + "loss": 0.0003, + "num_input_tokens_seen": 59595112, + "step": 88395 + }, + { + "epoch": 2.159626707057875, + "grad_norm": 0.24442404508590698, + "learning_rate": 1.4006083891639481e-06, + "loss": 0.1146, + "num_input_tokens_seen": 59598632, + "step": 88400 + }, + { + "epoch": 2.1597488578897224, + "grad_norm": 0.008931396529078484, + "learning_rate": 1.4005302521600945e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59602024, + "step": 88405 + }, + { + "epoch": 2.1598710087215696, + "grad_norm": 0.06182999163866043, + "learning_rate": 1.4004521122434772e-06, + "loss": 0.0182, + "num_input_tokens_seen": 59605352, + "step": 88410 + }, + { + "epoch": 2.1599931595534168, + "grad_norm": 0.007702971808612347, + "learning_rate": 1.4003739694146644e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59608296, + "step": 88415 + }, + { + "epoch": 2.1601153103852635, + "grad_norm": 0.16781401634216309, + "learning_rate": 1.4002958236742246e-06, + "loss": 0.0517, + "num_input_tokens_seen": 59611368, + "step": 88420 + }, + { + "epoch": 2.1602374612171107, + "grad_norm": 0.08598300069570541, + "learning_rate": 1.4002176750227257e-06, + "loss": 0.0537, + "num_input_tokens_seen": 59614440, + "step": 88425 + }, + { + "epoch": 2.160359612048958, + "grad_norm": 0.02408665232360363, + "learning_rate": 1.4001395234607362e-06, + "loss": 0.0573, + "num_input_tokens_seen": 59617832, + "step": 88430 + }, + { + "epoch": 2.160481762880805, + "grad_norm": 0.01334523968398571, + "learning_rate": 1.4000613689888248e-06, + "loss": 0.0303, + "num_input_tokens_seen": 59621224, + "step": 88435 + }, + { + "epoch": 2.1606039137126523, + "grad_norm": 0.004405375570058823, + "learning_rate": 1.399983211607559e-06, + "loss": 0.0767, + "num_input_tokens_seen": 59624360, + "step": 88440 + }, + { + "epoch": 2.1607260645444994, + "grad_norm": 0.02678143046796322, + "learning_rate": 1.3999050513175081e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59627496, + "step": 88445 + }, + { + "epoch": 2.1608482153763466, + "grad_norm": 0.0012523168697953224, + "learning_rate": 1.39982688811924e-06, + "loss": 0.0005, + "num_input_tokens_seen": 59631208, + "step": 88450 + }, + { + "epoch": 2.160970366208194, + "grad_norm": 0.0031171184964478016, + "learning_rate": 1.3997487220133232e-06, + "loss": 0.0441, + "num_input_tokens_seen": 59634280, + "step": 88455 + }, + { + "epoch": 2.161092517040041, + "grad_norm": 0.02987041138112545, + "learning_rate": 1.3996705530003262e-06, + "loss": 0.0314, + "num_input_tokens_seen": 59637672, + "step": 88460 + }, + { + "epoch": 2.161214667871888, + "grad_norm": 0.003604191355407238, + "learning_rate": 1.3995923810808176e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59641256, + "step": 88465 + }, + { + "epoch": 2.1613368187037354, + "grad_norm": 0.8536860942840576, + "learning_rate": 1.3995142062553654e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59644584, + "step": 88470 + }, + { + "epoch": 2.1614589695355826, + "grad_norm": 0.1137804314494133, + "learning_rate": 1.3994360285245386e-06, + "loss": 0.0431, + "num_input_tokens_seen": 59647912, + "step": 88475 + }, + { + "epoch": 2.1615811203674298, + "grad_norm": 0.14593037962913513, + "learning_rate": 1.3993578478889054e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59651304, + "step": 88480 + }, + { + "epoch": 2.161703271199277, + "grad_norm": 13.66635513305664, + "learning_rate": 1.3992796643490348e-06, + "loss": 0.0991, + "num_input_tokens_seen": 59654568, + "step": 88485 + }, + { + "epoch": 2.161825422031124, + "grad_norm": 0.046653177589178085, + "learning_rate": 1.399201477905495e-06, + "loss": 0.0002, + "num_input_tokens_seen": 59659112, + "step": 88490 + }, + { + "epoch": 2.1619475728629713, + "grad_norm": 0.016694676131010056, + "learning_rate": 1.3991232885588546e-06, + "loss": 0.0597, + "num_input_tokens_seen": 59662568, + "step": 88495 + }, + { + "epoch": 2.1620697236948185, + "grad_norm": 0.09434916824102402, + "learning_rate": 1.3990450963096824e-06, + "loss": 0.0002, + "num_input_tokens_seen": 59665832, + "step": 88500 + }, + { + "epoch": 2.1621918745266657, + "grad_norm": 3.1131694316864014, + "learning_rate": 1.398966901158547e-06, + "loss": 0.0536, + "num_input_tokens_seen": 59669544, + "step": 88505 + }, + { + "epoch": 2.162314025358513, + "grad_norm": 0.0042829355224967, + "learning_rate": 1.3988887031060168e-06, + "loss": 0.0708, + "num_input_tokens_seen": 59672872, + "step": 88510 + }, + { + "epoch": 2.1624361761903597, + "grad_norm": 21.86163902282715, + "learning_rate": 1.3988105021526608e-06, + "loss": 0.1252, + "num_input_tokens_seen": 59676200, + "step": 88515 + }, + { + "epoch": 2.162558327022207, + "grad_norm": 0.009639190509915352, + "learning_rate": 1.3987322982990474e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59679144, + "step": 88520 + }, + { + "epoch": 2.162680477854054, + "grad_norm": 0.0002484617580194026, + "learning_rate": 1.3986540915457457e-06, + "loss": 0.0002, + "num_input_tokens_seen": 59682600, + "step": 88525 + }, + { + "epoch": 2.1628026286859012, + "grad_norm": 0.04339480400085449, + "learning_rate": 1.398575881893324e-06, + "loss": 0.1044, + "num_input_tokens_seen": 59685928, + "step": 88530 + }, + { + "epoch": 2.1629247795177484, + "grad_norm": 0.05362813547253609, + "learning_rate": 1.3984976693423512e-06, + "loss": 0.0002, + "num_input_tokens_seen": 59689320, + "step": 88535 + }, + { + "epoch": 2.1630469303495956, + "grad_norm": 0.27106356620788574, + "learning_rate": 1.3984194538933961e-06, + "loss": 0.0003, + "num_input_tokens_seen": 59692904, + "step": 88540 + }, + { + "epoch": 2.163169081181443, + "grad_norm": 0.2880617678165436, + "learning_rate": 1.3983412355470283e-06, + "loss": 0.0003, + "num_input_tokens_seen": 59696040, + "step": 88545 + }, + { + "epoch": 2.16329123201329, + "grad_norm": 25.8460636138916, + "learning_rate": 1.3982630143038154e-06, + "loss": 0.1038, + "num_input_tokens_seen": 59699368, + "step": 88550 + }, + { + "epoch": 2.163413382845137, + "grad_norm": 0.028124157339334488, + "learning_rate": 1.3981847901643266e-06, + "loss": 0.0558, + "num_input_tokens_seen": 59702248, + "step": 88555 + }, + { + "epoch": 2.1635355336769844, + "grad_norm": 0.03156181052327156, + "learning_rate": 1.398106563129131e-06, + "loss": 0.0054, + "num_input_tokens_seen": 59705512, + "step": 88560 + }, + { + "epoch": 2.1636576845088316, + "grad_norm": 0.01643490605056286, + "learning_rate": 1.3980283331987973e-06, + "loss": 0.0636, + "num_input_tokens_seen": 59708712, + "step": 88565 + }, + { + "epoch": 2.1637798353406787, + "grad_norm": 0.14175744354724884, + "learning_rate": 1.3979501003738948e-06, + "loss": 0.0157, + "num_input_tokens_seen": 59712168, + "step": 88570 + }, + { + "epoch": 2.163901986172526, + "grad_norm": 0.3935987651348114, + "learning_rate": 1.397871864654992e-06, + "loss": 0.0006, + "num_input_tokens_seen": 59715496, + "step": 88575 + }, + { + "epoch": 2.164024137004373, + "grad_norm": 0.011017389595508575, + "learning_rate": 1.397793626042658e-06, + "loss": 0.0006, + "num_input_tokens_seen": 59718888, + "step": 88580 + }, + { + "epoch": 2.1641462878362203, + "grad_norm": 0.04418168216943741, + "learning_rate": 1.3977153845374616e-06, + "loss": 0.1219, + "num_input_tokens_seen": 59722280, + "step": 88585 + }, + { + "epoch": 2.1642684386680675, + "grad_norm": 0.007941069081425667, + "learning_rate": 1.397637140139972e-06, + "loss": 0.0005, + "num_input_tokens_seen": 59725608, + "step": 88590 + }, + { + "epoch": 2.1643905894999147, + "grad_norm": 0.0148239741101861, + "learning_rate": 1.3975588928507583e-06, + "loss": 0.0559, + "num_input_tokens_seen": 59729000, + "step": 88595 + }, + { + "epoch": 2.1645127403317614, + "grad_norm": 49.897762298583984, + "learning_rate": 1.3974806426703894e-06, + "loss": 0.0894, + "num_input_tokens_seen": 59732520, + "step": 88600 + }, + { + "epoch": 2.1646348911636086, + "grad_norm": 0.09293018281459808, + "learning_rate": 1.3974023895994342e-06, + "loss": 0.0502, + "num_input_tokens_seen": 59736232, + "step": 88605 + }, + { + "epoch": 2.164757041995456, + "grad_norm": 20.600238800048828, + "learning_rate": 1.3973241336384622e-06, + "loss": 0.0663, + "num_input_tokens_seen": 59739048, + "step": 88610 + }, + { + "epoch": 2.164879192827303, + "grad_norm": 0.13299432396888733, + "learning_rate": 1.397245874788042e-06, + "loss": 0.0003, + "num_input_tokens_seen": 59742760, + "step": 88615 + }, + { + "epoch": 2.16500134365915, + "grad_norm": 0.0673794373869896, + "learning_rate": 1.397167613048743e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59746216, + "step": 88620 + }, + { + "epoch": 2.1651234944909974, + "grad_norm": 0.001309892744757235, + "learning_rate": 1.397089348421134e-06, + "loss": 0.0612, + "num_input_tokens_seen": 59749544, + "step": 88625 + }, + { + "epoch": 2.1652456453228446, + "grad_norm": 0.04341740533709526, + "learning_rate": 1.397011080905785e-06, + "loss": 0.0002, + "num_input_tokens_seen": 59753128, + "step": 88630 + }, + { + "epoch": 2.1653677961546918, + "grad_norm": 0.05454143509268761, + "learning_rate": 1.3969328105032643e-06, + "loss": 0.0003, + "num_input_tokens_seen": 59756392, + "step": 88635 + }, + { + "epoch": 2.165489946986539, + "grad_norm": 0.22605502605438232, + "learning_rate": 1.3968545372141416e-06, + "loss": 0.0002, + "num_input_tokens_seen": 59759528, + "step": 88640 + }, + { + "epoch": 2.165612097818386, + "grad_norm": 0.08796440064907074, + "learning_rate": 1.3967762610389858e-06, + "loss": 0.0256, + "num_input_tokens_seen": 59763240, + "step": 88645 + }, + { + "epoch": 2.1657342486502333, + "grad_norm": 0.006114218384027481, + "learning_rate": 1.3966979819783666e-06, + "loss": 0.0002, + "num_input_tokens_seen": 59766504, + "step": 88650 + }, + { + "epoch": 2.1658563994820805, + "grad_norm": 496.6544494628906, + "learning_rate": 1.3966197000328528e-06, + "loss": 0.0406, + "num_input_tokens_seen": 59769768, + "step": 88655 + }, + { + "epoch": 2.1659785503139277, + "grad_norm": 0.05280788987874985, + "learning_rate": 1.3965414152030138e-06, + "loss": 0.0002, + "num_input_tokens_seen": 59772968, + "step": 88660 + }, + { + "epoch": 2.166100701145775, + "grad_norm": 0.9186227321624756, + "learning_rate": 1.3964631274894189e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59776232, + "step": 88665 + }, + { + "epoch": 2.166222851977622, + "grad_norm": 0.027731165289878845, + "learning_rate": 1.3963848368926376e-06, + "loss": 0.0396, + "num_input_tokens_seen": 59779880, + "step": 88670 + }, + { + "epoch": 2.1663450028094693, + "grad_norm": 0.006741840858012438, + "learning_rate": 1.3963065434132392e-06, + "loss": 0.0005, + "num_input_tokens_seen": 59783592, + "step": 88675 + }, + { + "epoch": 2.1664671536413165, + "grad_norm": 0.2137477844953537, + "learning_rate": 1.3962282470517933e-06, + "loss": 0.0002, + "num_input_tokens_seen": 59786792, + "step": 88680 + }, + { + "epoch": 2.1665893044731637, + "grad_norm": 0.24367313086986542, + "learning_rate": 1.3961499478088685e-06, + "loss": 0.0003, + "num_input_tokens_seen": 59789928, + "step": 88685 + }, + { + "epoch": 2.166711455305011, + "grad_norm": 0.004783410578966141, + "learning_rate": 1.3960716456850347e-06, + "loss": 0.0839, + "num_input_tokens_seen": 59793512, + "step": 88690 + }, + { + "epoch": 2.1668336061368576, + "grad_norm": 0.010234296321868896, + "learning_rate": 1.3959933406808616e-06, + "loss": 0.0556, + "num_input_tokens_seen": 59796584, + "step": 88695 + }, + { + "epoch": 2.166955756968705, + "grad_norm": 0.026050550863146782, + "learning_rate": 1.3959150327969188e-06, + "loss": 0.1148, + "num_input_tokens_seen": 59799976, + "step": 88700 + }, + { + "epoch": 2.167077907800552, + "grad_norm": 0.5169499516487122, + "learning_rate": 1.395836722033775e-06, + "loss": 0.0005, + "num_input_tokens_seen": 59803368, + "step": 88705 + }, + { + "epoch": 2.167200058632399, + "grad_norm": 0.01806020550429821, + "learning_rate": 1.395758408392e-06, + "loss": 0.0003, + "num_input_tokens_seen": 59806760, + "step": 88710 + }, + { + "epoch": 2.1673222094642464, + "grad_norm": 0.049956656992435455, + "learning_rate": 1.3956800918721637e-06, + "loss": 0.0002, + "num_input_tokens_seen": 59809896, + "step": 88715 + }, + { + "epoch": 2.1674443602960936, + "grad_norm": 0.01550681795924902, + "learning_rate": 1.3956017724748347e-06, + "loss": 0.0, + "num_input_tokens_seen": 59813352, + "step": 88720 + }, + { + "epoch": 2.1675665111279407, + "grad_norm": 0.06467894464731216, + "learning_rate": 1.395523450200584e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59816488, + "step": 88725 + }, + { + "epoch": 2.167688661959788, + "grad_norm": 0.13828709721565247, + "learning_rate": 1.39544512504998e-06, + "loss": 0.0002, + "num_input_tokens_seen": 59819624, + "step": 88730 + }, + { + "epoch": 2.167810812791635, + "grad_norm": 0.388836532831192, + "learning_rate": 1.3953667970235928e-06, + "loss": 0.0002, + "num_input_tokens_seen": 59823144, + "step": 88735 + }, + { + "epoch": 2.1679329636234823, + "grad_norm": 0.1045452430844307, + "learning_rate": 1.3952884661219917e-06, + "loss": 0.1275, + "num_input_tokens_seen": 59826664, + "step": 88740 + }, + { + "epoch": 2.1680551144553295, + "grad_norm": 0.026057695969939232, + "learning_rate": 1.395210132345747e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59830184, + "step": 88745 + }, + { + "epoch": 2.1681772652871767, + "grad_norm": 0.022811226546764374, + "learning_rate": 1.3951317956954274e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59833256, + "step": 88750 + }, + { + "epoch": 2.168299416119024, + "grad_norm": 0.05536481365561485, + "learning_rate": 1.3950534561716035e-06, + "loss": 0.0739, + "num_input_tokens_seen": 59836648, + "step": 88755 + }, + { + "epoch": 2.168421566950871, + "grad_norm": 0.003870234126225114, + "learning_rate": 1.3949751137748442e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59840616, + "step": 88760 + }, + { + "epoch": 2.1685437177827183, + "grad_norm": 0.008767131716012955, + "learning_rate": 1.39489676850572e-06, + "loss": 0.0628, + "num_input_tokens_seen": 59843688, + "step": 88765 + }, + { + "epoch": 2.1686658686145655, + "grad_norm": 0.036568425595760345, + "learning_rate": 1.3948184203648002e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59847144, + "step": 88770 + }, + { + "epoch": 2.1687880194464126, + "grad_norm": 0.06900046020746231, + "learning_rate": 1.3947400693526545e-06, + "loss": 0.0427, + "num_input_tokens_seen": 59850216, + "step": 88775 + }, + { + "epoch": 2.1689101702782594, + "grad_norm": 24.118446350097656, + "learning_rate": 1.3946617154698529e-06, + "loss": 0.0568, + "num_input_tokens_seen": 59853480, + "step": 88780 + }, + { + "epoch": 2.1690323211101066, + "grad_norm": 0.021634750068187714, + "learning_rate": 1.3945833587169653e-06, + "loss": 0.0002, + "num_input_tokens_seen": 59857320, + "step": 88785 + }, + { + "epoch": 2.1691544719419538, + "grad_norm": 0.050916168838739395, + "learning_rate": 1.3945049990945613e-06, + "loss": 0.0002, + "num_input_tokens_seen": 59860392, + "step": 88790 + }, + { + "epoch": 2.169276622773801, + "grad_norm": 0.20405644178390503, + "learning_rate": 1.3944266366032107e-06, + "loss": 0.0002, + "num_input_tokens_seen": 59863976, + "step": 88795 + }, + { + "epoch": 2.169398773605648, + "grad_norm": 0.0013021642807871103, + "learning_rate": 1.3943482712434837e-06, + "loss": 0.0, + "num_input_tokens_seen": 59866920, + "step": 88800 + }, + { + "epoch": 2.1695209244374953, + "grad_norm": 0.7083492875099182, + "learning_rate": 1.39426990301595e-06, + "loss": 0.0003, + "num_input_tokens_seen": 59870504, + "step": 88805 + }, + { + "epoch": 2.1696430752693425, + "grad_norm": 0.008465035818517208, + "learning_rate": 1.3941915319211797e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59874088, + "step": 88810 + }, + { + "epoch": 2.1697652261011897, + "grad_norm": 107.73428344726562, + "learning_rate": 1.394113157959742e-06, + "loss": 0.0558, + "num_input_tokens_seen": 59877544, + "step": 88815 + }, + { + "epoch": 2.169887376933037, + "grad_norm": 0.7204272747039795, + "learning_rate": 1.3940347811322078e-06, + "loss": 0.0003, + "num_input_tokens_seen": 59881448, + "step": 88820 + }, + { + "epoch": 2.170009527764884, + "grad_norm": 20.292095184326172, + "learning_rate": 1.3939564014391468e-06, + "loss": 0.0664, + "num_input_tokens_seen": 59885416, + "step": 88825 + }, + { + "epoch": 2.1701316785967313, + "grad_norm": 0.005525513086467981, + "learning_rate": 1.3938780188811286e-06, + "loss": 0.0, + "num_input_tokens_seen": 59889000, + "step": 88830 + }, + { + "epoch": 2.1702538294285785, + "grad_norm": 0.5506719350814819, + "learning_rate": 1.3937996334587235e-06, + "loss": 0.0475, + "num_input_tokens_seen": 59892328, + "step": 88835 + }, + { + "epoch": 2.1703759802604257, + "grad_norm": 0.016354860737919807, + "learning_rate": 1.3937212451725018e-06, + "loss": 0.0813, + "num_input_tokens_seen": 59895720, + "step": 88840 + }, + { + "epoch": 2.170498131092273, + "grad_norm": 0.017355183139443398, + "learning_rate": 1.3936428540230328e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59899496, + "step": 88845 + }, + { + "epoch": 2.17062028192412, + "grad_norm": 0.013150669634342194, + "learning_rate": 1.3935644600108875e-06, + "loss": 0.0, + "num_input_tokens_seen": 59902696, + "step": 88850 + }, + { + "epoch": 2.1707424327559672, + "grad_norm": 26.938447952270508, + "learning_rate": 1.3934860631366358e-06, + "loss": 0.1509, + "num_input_tokens_seen": 59906024, + "step": 88855 + }, + { + "epoch": 2.1708645835878144, + "grad_norm": 0.05356710031628609, + "learning_rate": 1.3934076634008474e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59909672, + "step": 88860 + }, + { + "epoch": 2.170986734419661, + "grad_norm": 0.07025501132011414, + "learning_rate": 1.3933292608040927e-06, + "loss": 0.0836, + "num_input_tokens_seen": 59912872, + "step": 88865 + }, + { + "epoch": 2.1711088852515084, + "grad_norm": 18.241281509399414, + "learning_rate": 1.3932508553469417e-06, + "loss": 0.1332, + "num_input_tokens_seen": 59916200, + "step": 88870 + }, + { + "epoch": 2.1712310360833555, + "grad_norm": 0.16294845938682556, + "learning_rate": 1.3931724470299646e-06, + "loss": 0.0531, + "num_input_tokens_seen": 59919784, + "step": 88875 + }, + { + "epoch": 2.1713531869152027, + "grad_norm": 0.03206993266940117, + "learning_rate": 1.393094035853732e-06, + "loss": 0.0003, + "num_input_tokens_seen": 59923368, + "step": 88880 + }, + { + "epoch": 2.17147533774705, + "grad_norm": 0.07751631736755371, + "learning_rate": 1.3930156218188137e-06, + "loss": 0.0502, + "num_input_tokens_seen": 59926632, + "step": 88885 + }, + { + "epoch": 2.171597488578897, + "grad_norm": 16.496200561523438, + "learning_rate": 1.3929372049257802e-06, + "loss": 0.0377, + "num_input_tokens_seen": 59929832, + "step": 88890 + }, + { + "epoch": 2.1717196394107443, + "grad_norm": 0.1615595519542694, + "learning_rate": 1.3928587851752015e-06, + "loss": 0.0008, + "num_input_tokens_seen": 59933480, + "step": 88895 + }, + { + "epoch": 2.1718417902425915, + "grad_norm": 19.633365631103516, + "learning_rate": 1.392780362567648e-06, + "loss": 0.048, + "num_input_tokens_seen": 59937256, + "step": 88900 + }, + { + "epoch": 2.1719639410744387, + "grad_norm": 0.04594242572784424, + "learning_rate": 1.3927019371036903e-06, + "loss": 0.074, + "num_input_tokens_seen": 59940328, + "step": 88905 + }, + { + "epoch": 2.172086091906286, + "grad_norm": 0.3396862745285034, + "learning_rate": 1.3926235087838982e-06, + "loss": 0.0447, + "num_input_tokens_seen": 59943528, + "step": 88910 + }, + { + "epoch": 2.172208242738133, + "grad_norm": 27.388818740844727, + "learning_rate": 1.3925450776088426e-06, + "loss": 0.0433, + "num_input_tokens_seen": 59946856, + "step": 88915 + }, + { + "epoch": 2.1723303935699803, + "grad_norm": 0.012639293447136879, + "learning_rate": 1.3924666435790936e-06, + "loss": 0.0003, + "num_input_tokens_seen": 59949928, + "step": 88920 + }, + { + "epoch": 2.1724525444018274, + "grad_norm": 0.05224275588989258, + "learning_rate": 1.3923882066952216e-06, + "loss": 0.0431, + "num_input_tokens_seen": 59953704, + "step": 88925 + }, + { + "epoch": 2.1725746952336746, + "grad_norm": 0.11708616465330124, + "learning_rate": 1.3923097669577967e-06, + "loss": 0.0004, + "num_input_tokens_seen": 59957032, + "step": 88930 + }, + { + "epoch": 2.172696846065522, + "grad_norm": 0.07801163196563721, + "learning_rate": 1.3922313243673899e-06, + "loss": 0.0538, + "num_input_tokens_seen": 59960104, + "step": 88935 + }, + { + "epoch": 2.172818996897369, + "grad_norm": 0.0019404669292271137, + "learning_rate": 1.3921528789245713e-06, + "loss": 0.0267, + "num_input_tokens_seen": 59963368, + "step": 88940 + }, + { + "epoch": 2.172941147729216, + "grad_norm": 0.010919238440692425, + "learning_rate": 1.3920744306299117e-06, + "loss": 0.0488, + "num_input_tokens_seen": 59966248, + "step": 88945 + }, + { + "epoch": 2.1730632985610634, + "grad_norm": 0.022607387974858284, + "learning_rate": 1.391995979483981e-06, + "loss": 0.0003, + "num_input_tokens_seen": 59970344, + "step": 88950 + }, + { + "epoch": 2.1731854493929106, + "grad_norm": 0.10743965208530426, + "learning_rate": 1.3919175254873505e-06, + "loss": 0.0002, + "num_input_tokens_seen": 59973352, + "step": 88955 + }, + { + "epoch": 2.1733076002247573, + "grad_norm": 0.06325899064540863, + "learning_rate": 1.3918390686405903e-06, + "loss": 0.0513, + "num_input_tokens_seen": 59976488, + "step": 88960 + }, + { + "epoch": 2.1734297510566045, + "grad_norm": 41.05293655395508, + "learning_rate": 1.391760608944271e-06, + "loss": 0.0526, + "num_input_tokens_seen": 59980136, + "step": 88965 + }, + { + "epoch": 2.1735519018884517, + "grad_norm": 0.0012355023063719273, + "learning_rate": 1.3916821463989629e-06, + "loss": 0.0488, + "num_input_tokens_seen": 59984040, + "step": 88970 + }, + { + "epoch": 2.173674052720299, + "grad_norm": 0.010925422422587872, + "learning_rate": 1.3916036810052373e-06, + "loss": 0.1476, + "num_input_tokens_seen": 59987112, + "step": 88975 + }, + { + "epoch": 2.173796203552146, + "grad_norm": 19.451017379760742, + "learning_rate": 1.391525212763664e-06, + "loss": 0.0713, + "num_input_tokens_seen": 59990376, + "step": 88980 + }, + { + "epoch": 2.1739183543839933, + "grad_norm": 0.040949687361717224, + "learning_rate": 1.3914467416748144e-06, + "loss": 0.0001, + "num_input_tokens_seen": 59993896, + "step": 88985 + }, + { + "epoch": 2.1740405052158405, + "grad_norm": 0.4370739161968231, + "learning_rate": 1.3913682677392587e-06, + "loss": 0.0669, + "num_input_tokens_seen": 59996840, + "step": 88990 + }, + { + "epoch": 2.1741626560476877, + "grad_norm": 0.016194364055991173, + "learning_rate": 1.3912897909575675e-06, + "loss": 0.0655, + "num_input_tokens_seen": 60000232, + "step": 88995 + }, + { + "epoch": 2.174284806879535, + "grad_norm": 1.7703710794448853, + "learning_rate": 1.3912113113303117e-06, + "loss": 0.0543, + "num_input_tokens_seen": 60003944, + "step": 89000 + }, + { + "epoch": 2.174406957711382, + "grad_norm": 0.024700865149497986, + "learning_rate": 1.3911328288580621e-06, + "loss": 0.0481, + "num_input_tokens_seen": 60007272, + "step": 89005 + }, + { + "epoch": 2.1745291085432292, + "grad_norm": 0.38946980237960815, + "learning_rate": 1.3910543435413898e-06, + "loss": 0.0006, + "num_input_tokens_seen": 60010536, + "step": 89010 + }, + { + "epoch": 2.1746512593750764, + "grad_norm": 0.041124243289232254, + "learning_rate": 1.3909758553808646e-06, + "loss": 0.0003, + "num_input_tokens_seen": 60013992, + "step": 89015 + }, + { + "epoch": 2.1747734102069236, + "grad_norm": 0.11730754375457764, + "learning_rate": 1.390897364377058e-06, + "loss": 0.0004, + "num_input_tokens_seen": 60017384, + "step": 89020 + }, + { + "epoch": 2.174895561038771, + "grad_norm": 0.04993576556444168, + "learning_rate": 1.3908188705305405e-06, + "loss": 0.002, + "num_input_tokens_seen": 60020584, + "step": 89025 + }, + { + "epoch": 2.175017711870618, + "grad_norm": 0.03723525628447533, + "learning_rate": 1.390740373841883e-06, + "loss": 0.0003, + "num_input_tokens_seen": 60024616, + "step": 89030 + }, + { + "epoch": 2.175139862702465, + "grad_norm": 0.03435581177473068, + "learning_rate": 1.3906618743116567e-06, + "loss": 0.0017, + "num_input_tokens_seen": 60028264, + "step": 89035 + }, + { + "epoch": 2.1752620135343124, + "grad_norm": 0.015532208606600761, + "learning_rate": 1.390583371940432e-06, + "loss": 0.0001, + "num_input_tokens_seen": 60031464, + "step": 89040 + }, + { + "epoch": 2.175384164366159, + "grad_norm": 11.067805290222168, + "learning_rate": 1.3905048667287799e-06, + "loss": 0.0005, + "num_input_tokens_seen": 60034856, + "step": 89045 + }, + { + "epoch": 2.1755063151980063, + "grad_norm": 0.007327179424464703, + "learning_rate": 1.3904263586772716e-06, + "loss": 0.0001, + "num_input_tokens_seen": 60038248, + "step": 89050 + }, + { + "epoch": 2.1756284660298535, + "grad_norm": 0.013096985407173634, + "learning_rate": 1.3903478477864776e-06, + "loss": 0.0004, + "num_input_tokens_seen": 60041384, + "step": 89055 + }, + { + "epoch": 2.1757506168617007, + "grad_norm": 18.17698860168457, + "learning_rate": 1.390269334056969e-06, + "loss": 0.0701, + "num_input_tokens_seen": 60044840, + "step": 89060 + }, + { + "epoch": 2.175872767693548, + "grad_norm": 0.020957941189408302, + "learning_rate": 1.390190817489317e-06, + "loss": 0.0752, + "num_input_tokens_seen": 60048232, + "step": 89065 + }, + { + "epoch": 2.175994918525395, + "grad_norm": 0.006288980599492788, + "learning_rate": 1.3901122980840928e-06, + "loss": 0.0856, + "num_input_tokens_seen": 60051944, + "step": 89070 + }, + { + "epoch": 2.1761170693572423, + "grad_norm": 0.010946640744805336, + "learning_rate": 1.3900337758418665e-06, + "loss": 0.0001, + "num_input_tokens_seen": 60055208, + "step": 89075 + }, + { + "epoch": 2.1762392201890894, + "grad_norm": 14.283775329589844, + "learning_rate": 1.3899552507632098e-06, + "loss": 0.0582, + "num_input_tokens_seen": 60058920, + "step": 89080 + }, + { + "epoch": 2.1763613710209366, + "grad_norm": 0.020361153408885002, + "learning_rate": 1.3898767228486936e-06, + "loss": 0.0328, + "num_input_tokens_seen": 60061864, + "step": 89085 + }, + { + "epoch": 2.176483521852784, + "grad_norm": 0.007297951728105545, + "learning_rate": 1.389798192098889e-06, + "loss": 0.0001, + "num_input_tokens_seen": 60065192, + "step": 89090 + }, + { + "epoch": 2.176605672684631, + "grad_norm": 0.02128020115196705, + "learning_rate": 1.389719658514367e-06, + "loss": 0.0613, + "num_input_tokens_seen": 60068456, + "step": 89095 + }, + { + "epoch": 2.176727823516478, + "grad_norm": 0.008222533389925957, + "learning_rate": 1.3896411220956991e-06, + "loss": 0.07, + "num_input_tokens_seen": 60072296, + "step": 89100 + }, + { + "epoch": 2.1768499743483254, + "grad_norm": 4.085846900939941, + "learning_rate": 1.3895625828434561e-06, + "loss": 0.0383, + "num_input_tokens_seen": 60075560, + "step": 89105 + }, + { + "epoch": 2.1769721251801726, + "grad_norm": 0.0071534449234604836, + "learning_rate": 1.3894840407582092e-06, + "loss": 0.0001, + "num_input_tokens_seen": 60078760, + "step": 89110 + }, + { + "epoch": 2.1770942760120198, + "grad_norm": 0.048654671758413315, + "learning_rate": 1.3894054958405295e-06, + "loss": 0.0005, + "num_input_tokens_seen": 60082152, + "step": 89115 + }, + { + "epoch": 2.177216426843867, + "grad_norm": 4.770420074462891, + "learning_rate": 1.3893269480909886e-06, + "loss": 0.0011, + "num_input_tokens_seen": 60085800, + "step": 89120 + }, + { + "epoch": 2.177338577675714, + "grad_norm": 0.008286378346383572, + "learning_rate": 1.389248397510157e-06, + "loss": 0.0004, + "num_input_tokens_seen": 60089128, + "step": 89125 + }, + { + "epoch": 2.1774607285075613, + "grad_norm": 317.94000244140625, + "learning_rate": 1.3891698440986063e-06, + "loss": 0.0704, + "num_input_tokens_seen": 60092264, + "step": 89130 + }, + { + "epoch": 2.1775828793394085, + "grad_norm": 0.0218020211905241, + "learning_rate": 1.389091287856908e-06, + "loss": 0.0661, + "num_input_tokens_seen": 60095208, + "step": 89135 + }, + { + "epoch": 2.1777050301712553, + "grad_norm": 0.042609941214323044, + "learning_rate": 1.3890127287856334e-06, + "loss": 0.0504, + "num_input_tokens_seen": 60098920, + "step": 89140 + }, + { + "epoch": 2.1778271810031025, + "grad_norm": 0.5689958333969116, + "learning_rate": 1.3889341668853536e-06, + "loss": 0.0005, + "num_input_tokens_seen": 60102184, + "step": 89145 + }, + { + "epoch": 2.1779493318349497, + "grad_norm": 0.026712533086538315, + "learning_rate": 1.3888556021566397e-06, + "loss": 0.0372, + "num_input_tokens_seen": 60105960, + "step": 89150 + }, + { + "epoch": 2.178071482666797, + "grad_norm": 0.05201387032866478, + "learning_rate": 1.3887770346000632e-06, + "loss": 0.1155, + "num_input_tokens_seen": 60109352, + "step": 89155 + }, + { + "epoch": 2.178193633498644, + "grad_norm": 0.13787080347537994, + "learning_rate": 1.3886984642161957e-06, + "loss": 0.011, + "num_input_tokens_seen": 60112360, + "step": 89160 + }, + { + "epoch": 2.1783157843304912, + "grad_norm": 0.010372105054557323, + "learning_rate": 1.3886198910056086e-06, + "loss": 0.0003, + "num_input_tokens_seen": 60116136, + "step": 89165 + }, + { + "epoch": 2.1784379351623384, + "grad_norm": 0.0984877273440361, + "learning_rate": 1.388541314968873e-06, + "loss": 0.0004, + "num_input_tokens_seen": 60119336, + "step": 89170 + }, + { + "epoch": 2.1785600859941856, + "grad_norm": 0.010339641943573952, + "learning_rate": 1.3884627361065604e-06, + "loss": 0.0002, + "num_input_tokens_seen": 60122792, + "step": 89175 + }, + { + "epoch": 2.178682236826033, + "grad_norm": 0.00042667731759138405, + "learning_rate": 1.3883841544192424e-06, + "loss": 0.0071, + "num_input_tokens_seen": 60126184, + "step": 89180 + }, + { + "epoch": 2.17880438765788, + "grad_norm": 0.23866403102874756, + "learning_rate": 1.38830556990749e-06, + "loss": 0.0002, + "num_input_tokens_seen": 60129320, + "step": 89185 + }, + { + "epoch": 2.178926538489727, + "grad_norm": 14.645671844482422, + "learning_rate": 1.3882269825718753e-06, + "loss": 0.1062, + "num_input_tokens_seen": 60133096, + "step": 89190 + }, + { + "epoch": 2.1790486893215744, + "grad_norm": 0.08448100090026855, + "learning_rate": 1.3881483924129693e-06, + "loss": 0.0008, + "num_input_tokens_seen": 60136104, + "step": 89195 + }, + { + "epoch": 2.1791708401534216, + "grad_norm": 0.007324859034270048, + "learning_rate": 1.3880697994313442e-06, + "loss": 0.0559, + "num_input_tokens_seen": 60139368, + "step": 89200 + }, + { + "epoch": 2.1792929909852687, + "grad_norm": 0.2774519920349121, + "learning_rate": 1.3879912036275712e-06, + "loss": 0.1086, + "num_input_tokens_seen": 60142568, + "step": 89205 + }, + { + "epoch": 2.179415141817116, + "grad_norm": 0.007932466454803944, + "learning_rate": 1.3879126050022213e-06, + "loss": 0.0001, + "num_input_tokens_seen": 60146088, + "step": 89210 + }, + { + "epoch": 2.179537292648963, + "grad_norm": 0.2126796841621399, + "learning_rate": 1.3878340035558671e-06, + "loss": 0.001, + "num_input_tokens_seen": 60149352, + "step": 89215 + }, + { + "epoch": 2.1796594434808103, + "grad_norm": 0.05298115313053131, + "learning_rate": 1.3877553992890796e-06, + "loss": 0.0847, + "num_input_tokens_seen": 60152680, + "step": 89220 + }, + { + "epoch": 2.179781594312657, + "grad_norm": 0.04341858625411987, + "learning_rate": 1.3876767922024305e-06, + "loss": 0.0003, + "num_input_tokens_seen": 60156392, + "step": 89225 + }, + { + "epoch": 2.1799037451445042, + "grad_norm": 0.020297808572649956, + "learning_rate": 1.3875981822964912e-06, + "loss": 0.0506, + "num_input_tokens_seen": 60159656, + "step": 89230 + }, + { + "epoch": 2.1800258959763514, + "grad_norm": 23.527563095092773, + "learning_rate": 1.387519569571834e-06, + "loss": 0.1247, + "num_input_tokens_seen": 60163048, + "step": 89235 + }, + { + "epoch": 2.1801480468081986, + "grad_norm": 0.08387105166912079, + "learning_rate": 1.38744095402903e-06, + "loss": 0.1293, + "num_input_tokens_seen": 60166056, + "step": 89240 + }, + { + "epoch": 2.180270197640046, + "grad_norm": 0.036509111523628235, + "learning_rate": 1.3873623356686517e-06, + "loss": 0.0432, + "num_input_tokens_seen": 60169896, + "step": 89245 + }, + { + "epoch": 2.180392348471893, + "grad_norm": 0.03160225227475166, + "learning_rate": 1.3872837144912696e-06, + "loss": 0.0004, + "num_input_tokens_seen": 60173608, + "step": 89250 + }, + { + "epoch": 2.18051449930374, + "grad_norm": 15.129281044006348, + "learning_rate": 1.3872050904974566e-06, + "loss": 0.0363, + "num_input_tokens_seen": 60176936, + "step": 89255 + }, + { + "epoch": 2.1806366501355874, + "grad_norm": 0.3375871181488037, + "learning_rate": 1.387126463687784e-06, + "loss": 0.0466, + "num_input_tokens_seen": 60180008, + "step": 89260 + }, + { + "epoch": 2.1807588009674346, + "grad_norm": 0.048338957130908966, + "learning_rate": 1.3870478340628235e-06, + "loss": 0.0394, + "num_input_tokens_seen": 60182888, + "step": 89265 + }, + { + "epoch": 2.1808809517992818, + "grad_norm": 0.030270587652921677, + "learning_rate": 1.3869692016231473e-06, + "loss": 0.0002, + "num_input_tokens_seen": 60185832, + "step": 89270 + }, + { + "epoch": 2.181003102631129, + "grad_norm": 0.05345119908452034, + "learning_rate": 1.3868905663693272e-06, + "loss": 0.0005, + "num_input_tokens_seen": 60188840, + "step": 89275 + }, + { + "epoch": 2.181125253462976, + "grad_norm": 0.006547401659190655, + "learning_rate": 1.386811928301934e-06, + "loss": 0.1913, + "num_input_tokens_seen": 60192104, + "step": 89280 + }, + { + "epoch": 2.1812474042948233, + "grad_norm": 0.11802975833415985, + "learning_rate": 1.386733287421541e-06, + "loss": 0.068, + "num_input_tokens_seen": 60195368, + "step": 89285 + }, + { + "epoch": 2.1813695551266705, + "grad_norm": 0.07596275210380554, + "learning_rate": 1.3866546437287195e-06, + "loss": 0.0343, + "num_input_tokens_seen": 60198824, + "step": 89290 + }, + { + "epoch": 2.1814917059585177, + "grad_norm": 0.08031544834375381, + "learning_rate": 1.3865759972240411e-06, + "loss": 0.0516, + "num_input_tokens_seen": 60202024, + "step": 89295 + }, + { + "epoch": 2.181613856790365, + "grad_norm": 0.038348667323589325, + "learning_rate": 1.3864973479080786e-06, + "loss": 0.0002, + "num_input_tokens_seen": 60205224, + "step": 89300 + }, + { + "epoch": 2.181736007622212, + "grad_norm": 0.05317453667521477, + "learning_rate": 1.386418695781403e-06, + "loss": 0.0002, + "num_input_tokens_seen": 60208808, + "step": 89305 + }, + { + "epoch": 2.181858158454059, + "grad_norm": 0.013864437118172646, + "learning_rate": 1.3863400408445867e-06, + "loss": 0.0017, + "num_input_tokens_seen": 60212200, + "step": 89310 + }, + { + "epoch": 2.1819803092859065, + "grad_norm": 0.031996458768844604, + "learning_rate": 1.3862613830982018e-06, + "loss": 0.0004, + "num_input_tokens_seen": 60215464, + "step": 89315 + }, + { + "epoch": 2.182102460117753, + "grad_norm": 43.89045715332031, + "learning_rate": 1.3861827225428204e-06, + "loss": 0.0729, + "num_input_tokens_seen": 60218600, + "step": 89320 + }, + { + "epoch": 2.1822246109496004, + "grad_norm": 0.05054045841097832, + "learning_rate": 1.3861040591790144e-06, + "loss": 0.0524, + "num_input_tokens_seen": 60221992, + "step": 89325 + }, + { + "epoch": 2.1823467617814476, + "grad_norm": 0.053709473460912704, + "learning_rate": 1.3860253930073555e-06, + "loss": 0.1776, + "num_input_tokens_seen": 60225256, + "step": 89330 + }, + { + "epoch": 2.182468912613295, + "grad_norm": 0.014191855676472187, + "learning_rate": 1.3859467240284165e-06, + "loss": 0.0133, + "num_input_tokens_seen": 60228840, + "step": 89335 + }, + { + "epoch": 2.182591063445142, + "grad_norm": 0.028932316228747368, + "learning_rate": 1.3858680522427686e-06, + "loss": 0.0005, + "num_input_tokens_seen": 60232424, + "step": 89340 + }, + { + "epoch": 2.182713214276989, + "grad_norm": 0.0772460401058197, + "learning_rate": 1.3857893776509849e-06, + "loss": 0.0009, + "num_input_tokens_seen": 60235624, + "step": 89345 + }, + { + "epoch": 2.1828353651088364, + "grad_norm": 0.15768887102603912, + "learning_rate": 1.385710700253637e-06, + "loss": 0.0492, + "num_input_tokens_seen": 60238888, + "step": 89350 + }, + { + "epoch": 2.1829575159406835, + "grad_norm": 0.21225495636463165, + "learning_rate": 1.385632020051297e-06, + "loss": 0.0399, + "num_input_tokens_seen": 60242408, + "step": 89355 + }, + { + "epoch": 2.1830796667725307, + "grad_norm": 0.05385947600007057, + "learning_rate": 1.3855533370445374e-06, + "loss": 0.0133, + "num_input_tokens_seen": 60246376, + "step": 89360 + }, + { + "epoch": 2.183201817604378, + "grad_norm": 0.05395977944135666, + "learning_rate": 1.3854746512339301e-06, + "loss": 0.0004, + "num_input_tokens_seen": 60249576, + "step": 89365 + }, + { + "epoch": 2.183323968436225, + "grad_norm": 0.01952124759554863, + "learning_rate": 1.3853959626200475e-06, + "loss": 0.0538, + "num_input_tokens_seen": 60252776, + "step": 89370 + }, + { + "epoch": 2.1834461192680723, + "grad_norm": 0.03978893533349037, + "learning_rate": 1.3853172712034617e-06, + "loss": 0.1073, + "num_input_tokens_seen": 60255784, + "step": 89375 + }, + { + "epoch": 2.1835682700999195, + "grad_norm": 0.03560865297913551, + "learning_rate": 1.3852385769847453e-06, + "loss": 0.0619, + "num_input_tokens_seen": 60259240, + "step": 89380 + }, + { + "epoch": 2.1836904209317667, + "grad_norm": 0.03531051427125931, + "learning_rate": 1.3851598799644702e-06, + "loss": 0.0001, + "num_input_tokens_seen": 60262760, + "step": 89385 + }, + { + "epoch": 2.183812571763614, + "grad_norm": 0.022893404588103294, + "learning_rate": 1.3850811801432087e-06, + "loss": 0.0006, + "num_input_tokens_seen": 60266408, + "step": 89390 + }, + { + "epoch": 2.183934722595461, + "grad_norm": 0.14207857847213745, + "learning_rate": 1.3850024775215337e-06, + "loss": 0.0434, + "num_input_tokens_seen": 60269608, + "step": 89395 + }, + { + "epoch": 2.1840568734273083, + "grad_norm": 41.99595260620117, + "learning_rate": 1.384923772100017e-06, + "loss": 0.0347, + "num_input_tokens_seen": 60272616, + "step": 89400 + }, + { + "epoch": 2.184179024259155, + "grad_norm": 0.13540434837341309, + "learning_rate": 1.3848450638792305e-06, + "loss": 0.0004, + "num_input_tokens_seen": 60276392, + "step": 89405 + }, + { + "epoch": 2.184301175091002, + "grad_norm": 17.306758880615234, + "learning_rate": 1.3847663528597477e-06, + "loss": 0.0013, + "num_input_tokens_seen": 60280104, + "step": 89410 + }, + { + "epoch": 2.1844233259228494, + "grad_norm": 0.04647582396864891, + "learning_rate": 1.3846876390421405e-06, + "loss": 0.0448, + "num_input_tokens_seen": 60283368, + "step": 89415 + }, + { + "epoch": 2.1845454767546966, + "grad_norm": 0.07750110328197479, + "learning_rate": 1.3846089224269815e-06, + "loss": 0.0003, + "num_input_tokens_seen": 60286888, + "step": 89420 + }, + { + "epoch": 2.1846676275865438, + "grad_norm": 0.10336387157440186, + "learning_rate": 1.3845302030148428e-06, + "loss": 0.0002, + "num_input_tokens_seen": 60290472, + "step": 89425 + }, + { + "epoch": 2.184789778418391, + "grad_norm": 0.36430805921554565, + "learning_rate": 1.384451480806297e-06, + "loss": 0.0003, + "num_input_tokens_seen": 60293736, + "step": 89430 + }, + { + "epoch": 2.184911929250238, + "grad_norm": 0.011753383092582226, + "learning_rate": 1.3843727558019166e-06, + "loss": 0.0455, + "num_input_tokens_seen": 60297128, + "step": 89435 + }, + { + "epoch": 2.1850340800820853, + "grad_norm": 0.021449893712997437, + "learning_rate": 1.3842940280022738e-06, + "loss": 0.0537, + "num_input_tokens_seen": 60300520, + "step": 89440 + }, + { + "epoch": 2.1851562309139325, + "grad_norm": 0.01641305908560753, + "learning_rate": 1.384215297407942e-06, + "loss": 0.0003, + "num_input_tokens_seen": 60304040, + "step": 89445 + }, + { + "epoch": 2.1852783817457797, + "grad_norm": 0.03722817450761795, + "learning_rate": 1.384136564019493e-06, + "loss": 0.0528, + "num_input_tokens_seen": 60307304, + "step": 89450 + }, + { + "epoch": 2.185400532577627, + "grad_norm": 0.034665465354919434, + "learning_rate": 1.3840578278374996e-06, + "loss": 0.0003, + "num_input_tokens_seen": 60310440, + "step": 89455 + }, + { + "epoch": 2.185522683409474, + "grad_norm": 0.02144056372344494, + "learning_rate": 1.3839790888625345e-06, + "loss": 0.0004, + "num_input_tokens_seen": 60313576, + "step": 89460 + }, + { + "epoch": 2.1856448342413213, + "grad_norm": 0.04060309752821922, + "learning_rate": 1.38390034709517e-06, + "loss": 0.0003, + "num_input_tokens_seen": 60316776, + "step": 89465 + }, + { + "epoch": 2.1857669850731685, + "grad_norm": 0.013973901979625225, + "learning_rate": 1.383821602535979e-06, + "loss": 0.0006, + "num_input_tokens_seen": 60320424, + "step": 89470 + }, + { + "epoch": 2.1858891359050157, + "grad_norm": 0.0008165006875060499, + "learning_rate": 1.3837428551855342e-06, + "loss": 0.0001, + "num_input_tokens_seen": 60323816, + "step": 89475 + }, + { + "epoch": 2.186011286736863, + "grad_norm": 0.06573706865310669, + "learning_rate": 1.383664105044408e-06, + "loss": 0.0001, + "num_input_tokens_seen": 60327016, + "step": 89480 + }, + { + "epoch": 2.18613343756871, + "grad_norm": 0.12895028293132782, + "learning_rate": 1.3835853521131733e-06, + "loss": 0.0106, + "num_input_tokens_seen": 60329960, + "step": 89485 + }, + { + "epoch": 2.186255588400557, + "grad_norm": 0.0590890534222126, + "learning_rate": 1.3835065963924026e-06, + "loss": 0.0446, + "num_input_tokens_seen": 60333288, + "step": 89490 + }, + { + "epoch": 2.186377739232404, + "grad_norm": 0.09835655242204666, + "learning_rate": 1.3834278378826687e-06, + "loss": 0.0002, + "num_input_tokens_seen": 60336680, + "step": 89495 + }, + { + "epoch": 2.186499890064251, + "grad_norm": 0.01371688675135374, + "learning_rate": 1.3833490765845445e-06, + "loss": 0.0001, + "num_input_tokens_seen": 60340136, + "step": 89500 + }, + { + "epoch": 2.1866220408960984, + "grad_norm": 0.035382047295570374, + "learning_rate": 1.383270312498603e-06, + "loss": 0.0001, + "num_input_tokens_seen": 60343592, + "step": 89505 + }, + { + "epoch": 2.1867441917279455, + "grad_norm": 0.00039148752694018185, + "learning_rate": 1.3831915456254164e-06, + "loss": 0.062, + "num_input_tokens_seen": 60346792, + "step": 89510 + }, + { + "epoch": 2.1868663425597927, + "grad_norm": 0.034438714385032654, + "learning_rate": 1.383112775965558e-06, + "loss": 0.0006, + "num_input_tokens_seen": 60350632, + "step": 89515 + }, + { + "epoch": 2.18698849339164, + "grad_norm": 0.001694877864792943, + "learning_rate": 1.3830340035196004e-06, + "loss": 0.0999, + "num_input_tokens_seen": 60353768, + "step": 89520 + }, + { + "epoch": 2.187110644223487, + "grad_norm": 12.802435874938965, + "learning_rate": 1.3829552282881165e-06, + "loss": 0.0445, + "num_input_tokens_seen": 60356968, + "step": 89525 + }, + { + "epoch": 2.1872327950553343, + "grad_norm": 0.022081870585680008, + "learning_rate": 1.3828764502716793e-06, + "loss": 0.0009, + "num_input_tokens_seen": 60360232, + "step": 89530 + }, + { + "epoch": 2.1873549458871815, + "grad_norm": 0.021757181733846664, + "learning_rate": 1.3827976694708614e-06, + "loss": 0.0298, + "num_input_tokens_seen": 60363048, + "step": 89535 + }, + { + "epoch": 2.1874770967190287, + "grad_norm": 0.12817370891571045, + "learning_rate": 1.3827188858862359e-06, + "loss": 0.0005, + "num_input_tokens_seen": 60366504, + "step": 89540 + }, + { + "epoch": 2.187599247550876, + "grad_norm": 0.120881088078022, + "learning_rate": 1.3826400995183755e-06, + "loss": 0.0493, + "num_input_tokens_seen": 60369832, + "step": 89545 + }, + { + "epoch": 2.187721398382723, + "grad_norm": 0.5409322381019592, + "learning_rate": 1.3825613103678539e-06, + "loss": 0.0621, + "num_input_tokens_seen": 60373032, + "step": 89550 + }, + { + "epoch": 2.1878435492145702, + "grad_norm": 14.173566818237305, + "learning_rate": 1.382482518435243e-06, + "loss": 0.1244, + "num_input_tokens_seen": 60376616, + "step": 89555 + }, + { + "epoch": 2.1879657000464174, + "grad_norm": 0.16862067580223083, + "learning_rate": 1.3824037237211166e-06, + "loss": 0.0003, + "num_input_tokens_seen": 60379752, + "step": 89560 + }, + { + "epoch": 2.1880878508782646, + "grad_norm": 0.1347108781337738, + "learning_rate": 1.3823249262260476e-06, + "loss": 0.0001, + "num_input_tokens_seen": 60383208, + "step": 89565 + }, + { + "epoch": 2.188210001710112, + "grad_norm": 0.014512077905237675, + "learning_rate": 1.3822461259506088e-06, + "loss": 0.0004, + "num_input_tokens_seen": 60386344, + "step": 89570 + }, + { + "epoch": 2.188332152541959, + "grad_norm": 1.0698745250701904, + "learning_rate": 1.3821673228953735e-06, + "loss": 0.0005, + "num_input_tokens_seen": 60389608, + "step": 89575 + }, + { + "epoch": 2.188454303373806, + "grad_norm": 0.024578392505645752, + "learning_rate": 1.3820885170609142e-06, + "loss": 0.0001, + "num_input_tokens_seen": 60393064, + "step": 89580 + }, + { + "epoch": 2.188576454205653, + "grad_norm": 0.015470233745872974, + "learning_rate": 1.382009708447805e-06, + "loss": 0.0001, + "num_input_tokens_seen": 60396712, + "step": 89585 + }, + { + "epoch": 2.1886986050375, + "grad_norm": 0.07507811486721039, + "learning_rate": 1.3819308970566178e-06, + "loss": 0.0706, + "num_input_tokens_seen": 60400360, + "step": 89590 + }, + { + "epoch": 2.1888207558693473, + "grad_norm": 0.027581792324781418, + "learning_rate": 1.381852082887927e-06, + "loss": 0.0002, + "num_input_tokens_seen": 60403752, + "step": 89595 + }, + { + "epoch": 2.1889429067011945, + "grad_norm": 0.02173568122088909, + "learning_rate": 1.3817732659423048e-06, + "loss": 0.0001, + "num_input_tokens_seen": 60407208, + "step": 89600 + }, + { + "epoch": 2.1890650575330417, + "grad_norm": 0.20994439721107483, + "learning_rate": 1.3816944462203251e-06, + "loss": 0.0672, + "num_input_tokens_seen": 60410728, + "step": 89605 + }, + { + "epoch": 2.189187208364889, + "grad_norm": 0.05724925175309181, + "learning_rate": 1.3816156237225602e-06, + "loss": 0.1354, + "num_input_tokens_seen": 60414184, + "step": 89610 + }, + { + "epoch": 2.189309359196736, + "grad_norm": 0.202612042427063, + "learning_rate": 1.3815367984495842e-06, + "loss": 0.0521, + "num_input_tokens_seen": 60417640, + "step": 89615 + }, + { + "epoch": 2.1894315100285833, + "grad_norm": 0.0011157530825585127, + "learning_rate": 1.3814579704019697e-06, + "loss": 0.0002, + "num_input_tokens_seen": 60421160, + "step": 89620 + }, + { + "epoch": 2.1895536608604305, + "grad_norm": 10.986202239990234, + "learning_rate": 1.3813791395802905e-06, + "loss": 0.0438, + "num_input_tokens_seen": 60424488, + "step": 89625 + }, + { + "epoch": 2.1896758116922777, + "grad_norm": 0.03978127986192703, + "learning_rate": 1.3813003059851198e-06, + "loss": 0.0001, + "num_input_tokens_seen": 60427944, + "step": 89630 + }, + { + "epoch": 2.189797962524125, + "grad_norm": 0.07209401577711105, + "learning_rate": 1.3812214696170303e-06, + "loss": 0.0456, + "num_input_tokens_seen": 60431336, + "step": 89635 + }, + { + "epoch": 2.189920113355972, + "grad_norm": 0.007610888220369816, + "learning_rate": 1.381142630476596e-06, + "loss": 0.044, + "num_input_tokens_seen": 60434536, + "step": 89640 + }, + { + "epoch": 2.190042264187819, + "grad_norm": 0.00974766630679369, + "learning_rate": 1.3810637885643898e-06, + "loss": 0.0, + "num_input_tokens_seen": 60437672, + "step": 89645 + }, + { + "epoch": 2.1901644150196664, + "grad_norm": 251.47662353515625, + "learning_rate": 1.3809849438809853e-06, + "loss": 0.1284, + "num_input_tokens_seen": 60440616, + "step": 89650 + }, + { + "epoch": 2.1902865658515136, + "grad_norm": 53.546661376953125, + "learning_rate": 1.3809060964269557e-06, + "loss": 0.1287, + "num_input_tokens_seen": 60444008, + "step": 89655 + }, + { + "epoch": 2.190408716683361, + "grad_norm": 61.50639343261719, + "learning_rate": 1.3808272462028747e-06, + "loss": 0.0911, + "num_input_tokens_seen": 60447656, + "step": 89660 + }, + { + "epoch": 2.190530867515208, + "grad_norm": 0.013333199545741081, + "learning_rate": 1.3807483932093157e-06, + "loss": 0.0001, + "num_input_tokens_seen": 60451304, + "step": 89665 + }, + { + "epoch": 2.1906530183470547, + "grad_norm": 0.07766670733690262, + "learning_rate": 1.3806695374468515e-06, + "loss": 0.0006, + "num_input_tokens_seen": 60454248, + "step": 89670 + }, + { + "epoch": 2.190775169178902, + "grad_norm": 0.0028038809541612864, + "learning_rate": 1.3805906789160564e-06, + "loss": 0.0001, + "num_input_tokens_seen": 60457320, + "step": 89675 + }, + { + "epoch": 2.190897320010749, + "grad_norm": 0.08032268285751343, + "learning_rate": 1.3805118176175033e-06, + "loss": 0.001, + "num_input_tokens_seen": 60460392, + "step": 89680 + }, + { + "epoch": 2.1910194708425963, + "grad_norm": 0.0032088463194668293, + "learning_rate": 1.380432953551766e-06, + "loss": 0.0002, + "num_input_tokens_seen": 60464104, + "step": 89685 + }, + { + "epoch": 2.1911416216744435, + "grad_norm": 0.011546256020665169, + "learning_rate": 1.3803540867194182e-06, + "loss": 0.0002, + "num_input_tokens_seen": 60467560, + "step": 89690 + }, + { + "epoch": 2.1912637725062907, + "grad_norm": 1.7818785905838013, + "learning_rate": 1.3802752171210329e-06, + "loss": 0.0008, + "num_input_tokens_seen": 60470696, + "step": 89695 + }, + { + "epoch": 2.191385923338138, + "grad_norm": 0.06002940610051155, + "learning_rate": 1.3801963447571837e-06, + "loss": 0.0003, + "num_input_tokens_seen": 60474216, + "step": 89700 + }, + { + "epoch": 2.191508074169985, + "grad_norm": 0.0799216628074646, + "learning_rate": 1.380117469628445e-06, + "loss": 0.0018, + "num_input_tokens_seen": 60477608, + "step": 89705 + }, + { + "epoch": 2.1916302250018322, + "grad_norm": 0.004125783685594797, + "learning_rate": 1.3800385917353894e-06, + "loss": 0.0002, + "num_input_tokens_seen": 60480616, + "step": 89710 + }, + { + "epoch": 2.1917523758336794, + "grad_norm": 0.03527636453509331, + "learning_rate": 1.379959711078591e-06, + "loss": 0.0001, + "num_input_tokens_seen": 60484392, + "step": 89715 + }, + { + "epoch": 2.1918745266655266, + "grad_norm": 0.025349846109747887, + "learning_rate": 1.3798808276586233e-06, + "loss": 0.0001, + "num_input_tokens_seen": 60487592, + "step": 89720 + }, + { + "epoch": 2.191996677497374, + "grad_norm": 0.028911394998431206, + "learning_rate": 1.3798019414760603e-06, + "loss": 0.1269, + "num_input_tokens_seen": 60490600, + "step": 89725 + }, + { + "epoch": 2.192118828329221, + "grad_norm": 0.015381722711026669, + "learning_rate": 1.3797230525314754e-06, + "loss": 0.0008, + "num_input_tokens_seen": 60493672, + "step": 89730 + }, + { + "epoch": 2.192240979161068, + "grad_norm": 0.014268257655203342, + "learning_rate": 1.379644160825442e-06, + "loss": 0.1136, + "num_input_tokens_seen": 60496936, + "step": 89735 + }, + { + "epoch": 2.1923631299929154, + "grad_norm": 0.08037779480218887, + "learning_rate": 1.3795652663585347e-06, + "loss": 0.0825, + "num_input_tokens_seen": 60500072, + "step": 89740 + }, + { + "epoch": 2.1924852808247626, + "grad_norm": 0.07356099039316177, + "learning_rate": 1.3794863691313264e-06, + "loss": 0.0013, + "num_input_tokens_seen": 60503400, + "step": 89745 + }, + { + "epoch": 2.1926074316566098, + "grad_norm": 0.00598715478554368, + "learning_rate": 1.379407469144391e-06, + "loss": 0.0067, + "num_input_tokens_seen": 60506792, + "step": 89750 + }, + { + "epoch": 2.192729582488457, + "grad_norm": 0.006357715930789709, + "learning_rate": 1.3793285663983026e-06, + "loss": 0.0003, + "num_input_tokens_seen": 60509928, + "step": 89755 + }, + { + "epoch": 2.192851733320304, + "grad_norm": 67.47779846191406, + "learning_rate": 1.3792496608936348e-06, + "loss": 0.0691, + "num_input_tokens_seen": 60513576, + "step": 89760 + }, + { + "epoch": 2.192973884152151, + "grad_norm": 0.016489354893565178, + "learning_rate": 1.3791707526309615e-06, + "loss": 0.0001, + "num_input_tokens_seen": 60517096, + "step": 89765 + }, + { + "epoch": 2.193096034983998, + "grad_norm": 0.08872917294502258, + "learning_rate": 1.3790918416108567e-06, + "loss": 0.065, + "num_input_tokens_seen": 60520872, + "step": 89770 + }, + { + "epoch": 2.1932181858158453, + "grad_norm": 0.05728995054960251, + "learning_rate": 1.3790129278338936e-06, + "loss": 0.0002, + "num_input_tokens_seen": 60524776, + "step": 89775 + }, + { + "epoch": 2.1933403366476925, + "grad_norm": 0.017248833552002907, + "learning_rate": 1.3789340113006466e-06, + "loss": 0.0001, + "num_input_tokens_seen": 60528552, + "step": 89780 + }, + { + "epoch": 2.1934624874795396, + "grad_norm": 28.960683822631836, + "learning_rate": 1.3788550920116899e-06, + "loss": 0.2405, + "num_input_tokens_seen": 60531560, + "step": 89785 + }, + { + "epoch": 2.193584638311387, + "grad_norm": 0.0018641628557816148, + "learning_rate": 1.378776169967597e-06, + "loss": 0.1328, + "num_input_tokens_seen": 60534760, + "step": 89790 + }, + { + "epoch": 2.193706789143234, + "grad_norm": 0.1414841264486313, + "learning_rate": 1.3786972451689419e-06, + "loss": 0.0013, + "num_input_tokens_seen": 60538344, + "step": 89795 + }, + { + "epoch": 2.193828939975081, + "grad_norm": 0.04694800451397896, + "learning_rate": 1.3786183176162985e-06, + "loss": 0.0002, + "num_input_tokens_seen": 60541288, + "step": 89800 + }, + { + "epoch": 2.1939510908069284, + "grad_norm": 0.1992703676223755, + "learning_rate": 1.3785393873102407e-06, + "loss": 0.0005, + "num_input_tokens_seen": 60545512, + "step": 89805 + }, + { + "epoch": 2.1940732416387756, + "grad_norm": 0.01089775562286377, + "learning_rate": 1.3784604542513428e-06, + "loss": 0.0011, + "num_input_tokens_seen": 60549032, + "step": 89810 + }, + { + "epoch": 2.194195392470623, + "grad_norm": 0.03481999412178993, + "learning_rate": 1.3783815184401788e-06, + "loss": 0.089, + "num_input_tokens_seen": 60552424, + "step": 89815 + }, + { + "epoch": 2.19431754330247, + "grad_norm": 0.004920847248286009, + "learning_rate": 1.3783025798773224e-06, + "loss": 0.0489, + "num_input_tokens_seen": 60555496, + "step": 89820 + }, + { + "epoch": 2.194439694134317, + "grad_norm": 0.018685011193156242, + "learning_rate": 1.378223638563348e-06, + "loss": 0.0003, + "num_input_tokens_seen": 60559528, + "step": 89825 + }, + { + "epoch": 2.1945618449661644, + "grad_norm": 0.13076385855674744, + "learning_rate": 1.3781446944988297e-06, + "loss": 0.0002, + "num_input_tokens_seen": 60563176, + "step": 89830 + }, + { + "epoch": 2.1946839957980115, + "grad_norm": 0.05780500918626785, + "learning_rate": 1.3780657476843414e-06, + "loss": 0.0481, + "num_input_tokens_seen": 60566376, + "step": 89835 + }, + { + "epoch": 2.1948061466298587, + "grad_norm": 1.6194716691970825, + "learning_rate": 1.3779867981204571e-06, + "loss": 0.0743, + "num_input_tokens_seen": 60569832, + "step": 89840 + }, + { + "epoch": 2.194928297461706, + "grad_norm": 0.0012161084450781345, + "learning_rate": 1.3779078458077513e-06, + "loss": 0.0873, + "num_input_tokens_seen": 60573032, + "step": 89845 + }, + { + "epoch": 2.1950504482935527, + "grad_norm": 0.01662173680961132, + "learning_rate": 1.3778288907467982e-06, + "loss": 0.0001, + "num_input_tokens_seen": 60576296, + "step": 89850 + }, + { + "epoch": 2.1951725991254, + "grad_norm": 0.003397295018658042, + "learning_rate": 1.3777499329381714e-06, + "loss": 0.0002, + "num_input_tokens_seen": 60579496, + "step": 89855 + }, + { + "epoch": 2.195294749957247, + "grad_norm": 0.1815873384475708, + "learning_rate": 1.3776709723824459e-06, + "loss": 0.046, + "num_input_tokens_seen": 60583208, + "step": 89860 + }, + { + "epoch": 2.1954169007890942, + "grad_norm": 0.025489671155810356, + "learning_rate": 1.377592009080195e-06, + "loss": 0.1189, + "num_input_tokens_seen": 60586280, + "step": 89865 + }, + { + "epoch": 2.1955390516209414, + "grad_norm": 0.17918871343135834, + "learning_rate": 1.3775130430319936e-06, + "loss": 0.0671, + "num_input_tokens_seen": 60589544, + "step": 89870 + }, + { + "epoch": 2.1956612024527886, + "grad_norm": 0.028614962473511696, + "learning_rate": 1.377434074238416e-06, + "loss": 0.0458, + "num_input_tokens_seen": 60593064, + "step": 89875 + }, + { + "epoch": 2.195783353284636, + "grad_norm": 0.03311797231435776, + "learning_rate": 1.377355102700036e-06, + "loss": 0.0002, + "num_input_tokens_seen": 60596008, + "step": 89880 + }, + { + "epoch": 2.195905504116483, + "grad_norm": 49.83849334716797, + "learning_rate": 1.3772761284174286e-06, + "loss": 0.087, + "num_input_tokens_seen": 60599528, + "step": 89885 + }, + { + "epoch": 2.19602765494833, + "grad_norm": 0.08110342174768448, + "learning_rate": 1.3771971513911675e-06, + "loss": 0.0002, + "num_input_tokens_seen": 60603304, + "step": 89890 + }, + { + "epoch": 2.1961498057801774, + "grad_norm": 15.620611190795898, + "learning_rate": 1.3771181716218277e-06, + "loss": 0.0338, + "num_input_tokens_seen": 60606504, + "step": 89895 + }, + { + "epoch": 2.1962719566120246, + "grad_norm": 0.09303645044565201, + "learning_rate": 1.3770391891099824e-06, + "loss": 0.0557, + "num_input_tokens_seen": 60609960, + "step": 89900 + }, + { + "epoch": 2.1963941074438718, + "grad_norm": 0.007297168485820293, + "learning_rate": 1.376960203856207e-06, + "loss": 0.0003, + "num_input_tokens_seen": 60613416, + "step": 89905 + }, + { + "epoch": 2.196516258275719, + "grad_norm": 0.0076292455196380615, + "learning_rate": 1.3768812158610757e-06, + "loss": 0.0001, + "num_input_tokens_seen": 60616616, + "step": 89910 + }, + { + "epoch": 2.196638409107566, + "grad_norm": 0.003555135801434517, + "learning_rate": 1.3768022251251627e-06, + "loss": 0.0, + "num_input_tokens_seen": 60620072, + "step": 89915 + }, + { + "epoch": 2.1967605599394133, + "grad_norm": 13.932856559753418, + "learning_rate": 1.3767232316490428e-06, + "loss": 0.0513, + "num_input_tokens_seen": 60623400, + "step": 89920 + }, + { + "epoch": 2.1968827107712605, + "grad_norm": 46.25975036621094, + "learning_rate": 1.3766442354332899e-06, + "loss": 0.0836, + "num_input_tokens_seen": 60626600, + "step": 89925 + }, + { + "epoch": 2.1970048616031077, + "grad_norm": 0.12795943021774292, + "learning_rate": 1.3765652364784787e-06, + "loss": 0.0261, + "num_input_tokens_seen": 60629864, + "step": 89930 + }, + { + "epoch": 2.1971270124349545, + "grad_norm": 0.22673307359218597, + "learning_rate": 1.3764862347851844e-06, + "loss": 0.0002, + "num_input_tokens_seen": 60633640, + "step": 89935 + }, + { + "epoch": 2.1972491632668016, + "grad_norm": 0.02614295296370983, + "learning_rate": 1.3764072303539806e-06, + "loss": 0.0729, + "num_input_tokens_seen": 60636904, + "step": 89940 + }, + { + "epoch": 2.197371314098649, + "grad_norm": 37.92133331298828, + "learning_rate": 1.3763282231854425e-06, + "loss": 0.0451, + "num_input_tokens_seen": 60640680, + "step": 89945 + }, + { + "epoch": 2.197493464930496, + "grad_norm": 0.6765152812004089, + "learning_rate": 1.376249213280144e-06, + "loss": 0.0003, + "num_input_tokens_seen": 60643880, + "step": 89950 + }, + { + "epoch": 2.197615615762343, + "grad_norm": 0.031104207038879395, + "learning_rate": 1.37617020063866e-06, + "loss": 0.0311, + "num_input_tokens_seen": 60647016, + "step": 89955 + }, + { + "epoch": 2.1977377665941904, + "grad_norm": 0.03291548416018486, + "learning_rate": 1.3760911852615654e-06, + "loss": 0.0001, + "num_input_tokens_seen": 60650216, + "step": 89960 + }, + { + "epoch": 2.1978599174260376, + "grad_norm": 0.13013535737991333, + "learning_rate": 1.376012167149434e-06, + "loss": 0.0002, + "num_input_tokens_seen": 60653224, + "step": 89965 + }, + { + "epoch": 2.197982068257885, + "grad_norm": 0.0047679804265499115, + "learning_rate": 1.3759331463028414e-06, + "loss": 0.0841, + "num_input_tokens_seen": 60656552, + "step": 89970 + }, + { + "epoch": 2.198104219089732, + "grad_norm": 0.024526601657271385, + "learning_rate": 1.3758541227223618e-06, + "loss": 0.097, + "num_input_tokens_seen": 60660776, + "step": 89975 + }, + { + "epoch": 2.198226369921579, + "grad_norm": 241.2824249267578, + "learning_rate": 1.3757750964085698e-06, + "loss": 0.0182, + "num_input_tokens_seen": 60664168, + "step": 89980 + }, + { + "epoch": 2.1983485207534263, + "grad_norm": 0.0037704810965806246, + "learning_rate": 1.3756960673620403e-06, + "loss": 0.0001, + "num_input_tokens_seen": 60667688, + "step": 89985 + }, + { + "epoch": 2.1984706715852735, + "grad_norm": 0.014645399525761604, + "learning_rate": 1.375617035583348e-06, + "loss": 0.0001, + "num_input_tokens_seen": 60670952, + "step": 89990 + }, + { + "epoch": 2.1985928224171207, + "grad_norm": 0.301076203584671, + "learning_rate": 1.3755380010730677e-06, + "loss": 0.0002, + "num_input_tokens_seen": 60674600, + "step": 89995 + }, + { + "epoch": 2.198714973248968, + "grad_norm": 0.014310811646282673, + "learning_rate": 1.375458963831774e-06, + "loss": 0.0679, + "num_input_tokens_seen": 60677800, + "step": 90000 + }, + { + "epoch": 2.198837124080815, + "grad_norm": 0.003124415874481201, + "learning_rate": 1.3753799238600416e-06, + "loss": 0.0548, + "num_input_tokens_seen": 60681192, + "step": 90005 + }, + { + "epoch": 2.1989592749126623, + "grad_norm": 1.3749229907989502, + "learning_rate": 1.3753008811584455e-06, + "loss": 0.0006, + "num_input_tokens_seen": 60684328, + "step": 90010 + }, + { + "epoch": 2.1990814257445095, + "grad_norm": 0.003533211536705494, + "learning_rate": 1.3752218357275605e-06, + "loss": 0.0643, + "num_input_tokens_seen": 60687528, + "step": 90015 + }, + { + "epoch": 2.1992035765763567, + "grad_norm": 1.0003328323364258, + "learning_rate": 1.3751427875679613e-06, + "loss": 0.0002, + "num_input_tokens_seen": 60691304, + "step": 90020 + }, + { + "epoch": 2.199325727408204, + "grad_norm": 14.11498737335205, + "learning_rate": 1.3750637366802227e-06, + "loss": 0.1813, + "num_input_tokens_seen": 60694376, + "step": 90025 + }, + { + "epoch": 2.1994478782400506, + "grad_norm": 0.04383881017565727, + "learning_rate": 1.37498468306492e-06, + "loss": 0.0651, + "num_input_tokens_seen": 60697320, + "step": 90030 + }, + { + "epoch": 2.199570029071898, + "grad_norm": 0.08588389307260513, + "learning_rate": 1.3749056267226276e-06, + "loss": 0.0005, + "num_input_tokens_seen": 60700648, + "step": 90035 + }, + { + "epoch": 2.199692179903745, + "grad_norm": 0.12183842808008194, + "learning_rate": 1.3748265676539207e-06, + "loss": 0.0003, + "num_input_tokens_seen": 60703656, + "step": 90040 + }, + { + "epoch": 2.199814330735592, + "grad_norm": 248.8981170654297, + "learning_rate": 1.3747475058593742e-06, + "loss": 0.0727, + "num_input_tokens_seen": 60706856, + "step": 90045 + }, + { + "epoch": 2.1999364815674394, + "grad_norm": 22.585060119628906, + "learning_rate": 1.3746684413395634e-06, + "loss": 0.0516, + "num_input_tokens_seen": 60710632, + "step": 90050 + }, + { + "epoch": 2.2000586323992866, + "grad_norm": 0.40043529868125916, + "learning_rate": 1.3745893740950622e-06, + "loss": 0.0003, + "num_input_tokens_seen": 60714344, + "step": 90055 + }, + { + "epoch": 2.2001807832311338, + "grad_norm": 0.006907534785568714, + "learning_rate": 1.374510304126447e-06, + "loss": 0.0949, + "num_input_tokens_seen": 60717480, + "step": 90060 + }, + { + "epoch": 2.200302934062981, + "grad_norm": 36.79753494262695, + "learning_rate": 1.3744312314342918e-06, + "loss": 0.1318, + "num_input_tokens_seen": 60721576, + "step": 90065 + }, + { + "epoch": 2.200425084894828, + "grad_norm": 3.2954399585723877, + "learning_rate": 1.374352156019172e-06, + "loss": 0.0243, + "num_input_tokens_seen": 60724712, + "step": 90070 + }, + { + "epoch": 2.2005472357266753, + "grad_norm": 0.00985129177570343, + "learning_rate": 1.3742730778816626e-06, + "loss": 0.0779, + "num_input_tokens_seen": 60728168, + "step": 90075 + }, + { + "epoch": 2.2006693865585225, + "grad_norm": 0.004111927468329668, + "learning_rate": 1.3741939970223388e-06, + "loss": 0.0005, + "num_input_tokens_seen": 60732072, + "step": 90080 + }, + { + "epoch": 2.2007915373903697, + "grad_norm": 2.2431719303131104, + "learning_rate": 1.3741149134417756e-06, + "loss": 0.0424, + "num_input_tokens_seen": 60735464, + "step": 90085 + }, + { + "epoch": 2.200913688222217, + "grad_norm": 0.14430655539035797, + "learning_rate": 1.3740358271405481e-06, + "loss": 0.0006, + "num_input_tokens_seen": 60738664, + "step": 90090 + }, + { + "epoch": 2.201035839054064, + "grad_norm": 18.08392333984375, + "learning_rate": 1.3739567381192316e-06, + "loss": 0.0478, + "num_input_tokens_seen": 60742056, + "step": 90095 + }, + { + "epoch": 2.2011579898859113, + "grad_norm": 0.01610242761671543, + "learning_rate": 1.373877646378401e-06, + "loss": 0.0002, + "num_input_tokens_seen": 60745000, + "step": 90100 + }, + { + "epoch": 2.2012801407177585, + "grad_norm": 0.08473029732704163, + "learning_rate": 1.3737985519186316e-06, + "loss": 0.0397, + "num_input_tokens_seen": 60748328, + "step": 90105 + }, + { + "epoch": 2.2014022915496056, + "grad_norm": 18.636194229125977, + "learning_rate": 1.3737194547404986e-06, + "loss": 0.0622, + "num_input_tokens_seen": 60751528, + "step": 90110 + }, + { + "epoch": 2.2015244423814524, + "grad_norm": 0.43971750140190125, + "learning_rate": 1.373640354844577e-06, + "loss": 0.0006, + "num_input_tokens_seen": 60754472, + "step": 90115 + }, + { + "epoch": 2.2016465932132996, + "grad_norm": 0.31794047355651855, + "learning_rate": 1.3735612522314423e-06, + "loss": 0.0004, + "num_input_tokens_seen": 60757672, + "step": 90120 + }, + { + "epoch": 2.2017687440451468, + "grad_norm": 0.01444853376597166, + "learning_rate": 1.37348214690167e-06, + "loss": 0.0477, + "num_input_tokens_seen": 60760808, + "step": 90125 + }, + { + "epoch": 2.201890894876994, + "grad_norm": 0.0342915803194046, + "learning_rate": 1.373403038855835e-06, + "loss": 0.0005, + "num_input_tokens_seen": 60764520, + "step": 90130 + }, + { + "epoch": 2.202013045708841, + "grad_norm": 0.07567956298589706, + "learning_rate": 1.3733239280945124e-06, + "loss": 0.0005, + "num_input_tokens_seen": 60768168, + "step": 90135 + }, + { + "epoch": 2.2021351965406883, + "grad_norm": 0.0931648463010788, + "learning_rate": 1.373244814618278e-06, + "loss": 0.0753, + "num_input_tokens_seen": 60771304, + "step": 90140 + }, + { + "epoch": 2.2022573473725355, + "grad_norm": 268.7383728027344, + "learning_rate": 1.3731656984277069e-06, + "loss": 0.0331, + "num_input_tokens_seen": 60774632, + "step": 90145 + }, + { + "epoch": 2.2023794982043827, + "grad_norm": 3.1963915824890137, + "learning_rate": 1.3730865795233744e-06, + "loss": 0.0361, + "num_input_tokens_seen": 60777896, + "step": 90150 + }, + { + "epoch": 2.20250164903623, + "grad_norm": 0.13845311105251312, + "learning_rate": 1.373007457905856e-06, + "loss": 0.0006, + "num_input_tokens_seen": 60781096, + "step": 90155 + }, + { + "epoch": 2.202623799868077, + "grad_norm": 0.012970576994121075, + "learning_rate": 1.3729283335757272e-06, + "loss": 0.0003, + "num_input_tokens_seen": 60784616, + "step": 90160 + }, + { + "epoch": 2.2027459506999243, + "grad_norm": 0.005554935894906521, + "learning_rate": 1.372849206533563e-06, + "loss": 0.0003, + "num_input_tokens_seen": 60787816, + "step": 90165 + }, + { + "epoch": 2.2028681015317715, + "grad_norm": 0.021228192374110222, + "learning_rate": 1.3727700767799393e-06, + "loss": 0.0515, + "num_input_tokens_seen": 60790760, + "step": 90170 + }, + { + "epoch": 2.2029902523636187, + "grad_norm": 0.03763711079955101, + "learning_rate": 1.372690944315431e-06, + "loss": 0.0002, + "num_input_tokens_seen": 60793768, + "step": 90175 + }, + { + "epoch": 2.203112403195466, + "grad_norm": 23.194503784179688, + "learning_rate": 1.372611809140614e-06, + "loss": 0.0848, + "num_input_tokens_seen": 60797032, + "step": 90180 + }, + { + "epoch": 2.203234554027313, + "grad_norm": 0.2247171700000763, + "learning_rate": 1.3725326712560638e-06, + "loss": 0.0307, + "num_input_tokens_seen": 60800232, + "step": 90185 + }, + { + "epoch": 2.2033567048591602, + "grad_norm": 0.0721370056271553, + "learning_rate": 1.3724535306623558e-06, + "loss": 0.0562, + "num_input_tokens_seen": 60803880, + "step": 90190 + }, + { + "epoch": 2.2034788556910074, + "grad_norm": 0.08112762123346329, + "learning_rate": 1.3723743873600658e-06, + "loss": 0.0002, + "num_input_tokens_seen": 60807400, + "step": 90195 + }, + { + "epoch": 2.2036010065228546, + "grad_norm": 15.60988712310791, + "learning_rate": 1.3722952413497689e-06, + "loss": 0.0491, + "num_input_tokens_seen": 60810472, + "step": 90200 + }, + { + "epoch": 2.203723157354702, + "grad_norm": 0.002482697134837508, + "learning_rate": 1.372216092632041e-06, + "loss": 0.0502, + "num_input_tokens_seen": 60814120, + "step": 90205 + }, + { + "epoch": 2.2038453081865486, + "grad_norm": 0.2348630726337433, + "learning_rate": 1.372136941207457e-06, + "loss": 0.0286, + "num_input_tokens_seen": 60817512, + "step": 90210 + }, + { + "epoch": 2.2039674590183957, + "grad_norm": 0.005988630000501871, + "learning_rate": 1.3720577870765934e-06, + "loss": 0.0015, + "num_input_tokens_seen": 60821160, + "step": 90215 + }, + { + "epoch": 2.204089609850243, + "grad_norm": 0.21893872320652008, + "learning_rate": 1.3719786302400258e-06, + "loss": 0.0001, + "num_input_tokens_seen": 60824808, + "step": 90220 + }, + { + "epoch": 2.20421176068209, + "grad_norm": 194.6992950439453, + "learning_rate": 1.3718994706983293e-06, + "loss": 0.0117, + "num_input_tokens_seen": 60828136, + "step": 90225 + }, + { + "epoch": 2.2043339115139373, + "grad_norm": 0.3046874403953552, + "learning_rate": 1.3718203084520798e-06, + "loss": 0.0005, + "num_input_tokens_seen": 60831208, + "step": 90230 + }, + { + "epoch": 2.2044560623457845, + "grad_norm": 53.637351989746094, + "learning_rate": 1.371741143501853e-06, + "loss": 0.0699, + "num_input_tokens_seen": 60834344, + "step": 90235 + }, + { + "epoch": 2.2045782131776317, + "grad_norm": 0.007826529443264008, + "learning_rate": 1.3716619758482249e-06, + "loss": 0.0004, + "num_input_tokens_seen": 60837672, + "step": 90240 + }, + { + "epoch": 2.204700364009479, + "grad_norm": 0.0040454016998410225, + "learning_rate": 1.3715828054917705e-06, + "loss": 0.0489, + "num_input_tokens_seen": 60841576, + "step": 90245 + }, + { + "epoch": 2.204822514841326, + "grad_norm": 0.01919216848909855, + "learning_rate": 1.3715036324330665e-06, + "loss": 0.044, + "num_input_tokens_seen": 60845032, + "step": 90250 + }, + { + "epoch": 2.2049446656731733, + "grad_norm": 6.789739563828334e-05, + "learning_rate": 1.3714244566726878e-06, + "loss": 0.0004, + "num_input_tokens_seen": 60848936, + "step": 90255 + }, + { + "epoch": 2.2050668165050205, + "grad_norm": 30.994749069213867, + "learning_rate": 1.3713452782112107e-06, + "loss": 0.0502, + "num_input_tokens_seen": 60852520, + "step": 90260 + }, + { + "epoch": 2.2051889673368676, + "grad_norm": 0.2674175202846527, + "learning_rate": 1.3712660970492108e-06, + "loss": 0.0002, + "num_input_tokens_seen": 60855976, + "step": 90265 + }, + { + "epoch": 2.205311118168715, + "grad_norm": 0.35465195775032043, + "learning_rate": 1.371186913187264e-06, + "loss": 0.0004, + "num_input_tokens_seen": 60859240, + "step": 90270 + }, + { + "epoch": 2.205433269000562, + "grad_norm": 0.05108984559774399, + "learning_rate": 1.3711077266259459e-06, + "loss": 0.0915, + "num_input_tokens_seen": 60862312, + "step": 90275 + }, + { + "epoch": 2.205555419832409, + "grad_norm": 13.732041358947754, + "learning_rate": 1.3710285373658328e-06, + "loss": 0.1077, + "num_input_tokens_seen": 60865640, + "step": 90280 + }, + { + "epoch": 2.2056775706642564, + "grad_norm": 0.12964510917663574, + "learning_rate": 1.3709493454075004e-06, + "loss": 0.0458, + "num_input_tokens_seen": 60869160, + "step": 90285 + }, + { + "epoch": 2.2057997214961036, + "grad_norm": 0.15524911880493164, + "learning_rate": 1.3708701507515245e-06, + "loss": 0.0004, + "num_input_tokens_seen": 60872488, + "step": 90290 + }, + { + "epoch": 2.2059218723279503, + "grad_norm": 0.061491161584854126, + "learning_rate": 1.3707909533984811e-06, + "loss": 0.0002, + "num_input_tokens_seen": 60875496, + "step": 90295 + }, + { + "epoch": 2.2060440231597975, + "grad_norm": 0.01920522190630436, + "learning_rate": 1.3707117533489463e-06, + "loss": 0.0952, + "num_input_tokens_seen": 60878632, + "step": 90300 + }, + { + "epoch": 2.2061661739916447, + "grad_norm": 0.08350993692874908, + "learning_rate": 1.370632550603496e-06, + "loss": 0.0002, + "num_input_tokens_seen": 60881704, + "step": 90305 + }, + { + "epoch": 2.206288324823492, + "grad_norm": 0.5707146525382996, + "learning_rate": 1.3705533451627058e-06, + "loss": 0.0509, + "num_input_tokens_seen": 60884968, + "step": 90310 + }, + { + "epoch": 2.206410475655339, + "grad_norm": 0.0054165455512702465, + "learning_rate": 1.3704741370271522e-06, + "loss": 0.0392, + "num_input_tokens_seen": 60888488, + "step": 90315 + }, + { + "epoch": 2.2065326264871863, + "grad_norm": 0.04952197149395943, + "learning_rate": 1.370394926197411e-06, + "loss": 0.0003, + "num_input_tokens_seen": 60891816, + "step": 90320 + }, + { + "epoch": 2.2066547773190335, + "grad_norm": 0.012744433246552944, + "learning_rate": 1.3703157126740583e-06, + "loss": 0.0002, + "num_input_tokens_seen": 60895272, + "step": 90325 + }, + { + "epoch": 2.2067769281508807, + "grad_norm": 0.2308608442544937, + "learning_rate": 1.37023649645767e-06, + "loss": 0.0567, + "num_input_tokens_seen": 60898728, + "step": 90330 + }, + { + "epoch": 2.206899078982728, + "grad_norm": 0.019291376695036888, + "learning_rate": 1.3701572775488225e-06, + "loss": 0.0215, + "num_input_tokens_seen": 60902120, + "step": 90335 + }, + { + "epoch": 2.207021229814575, + "grad_norm": 0.010389735922217369, + "learning_rate": 1.3700780559480913e-06, + "loss": 0.1538, + "num_input_tokens_seen": 60905640, + "step": 90340 + }, + { + "epoch": 2.2071433806464222, + "grad_norm": 0.0885319635272026, + "learning_rate": 1.3699988316560536e-06, + "loss": 0.0001, + "num_input_tokens_seen": 60909288, + "step": 90345 + }, + { + "epoch": 2.2072655314782694, + "grad_norm": 16.90826988220215, + "learning_rate": 1.3699196046732844e-06, + "loss": 0.0392, + "num_input_tokens_seen": 60912680, + "step": 90350 + }, + { + "epoch": 2.2073876823101166, + "grad_norm": 36.88505554199219, + "learning_rate": 1.3698403750003604e-06, + "loss": 0.0567, + "num_input_tokens_seen": 60916264, + "step": 90355 + }, + { + "epoch": 2.207509833141964, + "grad_norm": 0.008276794105768204, + "learning_rate": 1.3697611426378582e-06, + "loss": 0.064, + "num_input_tokens_seen": 60919400, + "step": 90360 + }, + { + "epoch": 2.207631983973811, + "grad_norm": 0.14579278230667114, + "learning_rate": 1.3696819075863527e-06, + "loss": 0.0507, + "num_input_tokens_seen": 60922408, + "step": 90365 + }, + { + "epoch": 2.207754134805658, + "grad_norm": 1.9445993900299072, + "learning_rate": 1.3696026698464216e-06, + "loss": 0.0576, + "num_input_tokens_seen": 60925608, + "step": 90370 + }, + { + "epoch": 2.2078762856375054, + "grad_norm": 0.6416146755218506, + "learning_rate": 1.3695234294186403e-06, + "loss": 0.0005, + "num_input_tokens_seen": 60928616, + "step": 90375 + }, + { + "epoch": 2.207998436469352, + "grad_norm": 0.6925631165504456, + "learning_rate": 1.369444186303585e-06, + "loss": 0.039, + "num_input_tokens_seen": 60932136, + "step": 90380 + }, + { + "epoch": 2.2081205873011998, + "grad_norm": 2.5282368659973145, + "learning_rate": 1.3693649405018323e-06, + "loss": 0.0706, + "num_input_tokens_seen": 60935656, + "step": 90385 + }, + { + "epoch": 2.2082427381330465, + "grad_norm": 0.6798292398452759, + "learning_rate": 1.3692856920139586e-06, + "loss": 0.0013, + "num_input_tokens_seen": 60938664, + "step": 90390 + }, + { + "epoch": 2.2083648889648937, + "grad_norm": 0.047019269317388535, + "learning_rate": 1.36920644084054e-06, + "loss": 0.1045, + "num_input_tokens_seen": 60942120, + "step": 90395 + }, + { + "epoch": 2.208487039796741, + "grad_norm": 0.004635084420442581, + "learning_rate": 1.3691271869821526e-06, + "loss": 0.0011, + "num_input_tokens_seen": 60945640, + "step": 90400 + }, + { + "epoch": 2.208609190628588, + "grad_norm": 0.024701261892914772, + "learning_rate": 1.369047930439373e-06, + "loss": 0.0944, + "num_input_tokens_seen": 60949096, + "step": 90405 + }, + { + "epoch": 2.2087313414604353, + "grad_norm": 0.15579397976398468, + "learning_rate": 1.368968671212778e-06, + "loss": 0.0002, + "num_input_tokens_seen": 60952808, + "step": 90410 + }, + { + "epoch": 2.2088534922922824, + "grad_norm": 0.4236942529678345, + "learning_rate": 1.3688894093029432e-06, + "loss": 0.0506, + "num_input_tokens_seen": 60955880, + "step": 90415 + }, + { + "epoch": 2.2089756431241296, + "grad_norm": 0.33078476786613464, + "learning_rate": 1.3688101447104456e-06, + "loss": 0.0004, + "num_input_tokens_seen": 60959208, + "step": 90420 + }, + { + "epoch": 2.209097793955977, + "grad_norm": 0.03016742318868637, + "learning_rate": 1.3687308774358616e-06, + "loss": 0.0005, + "num_input_tokens_seen": 60962600, + "step": 90425 + }, + { + "epoch": 2.209219944787824, + "grad_norm": 0.010646332986652851, + "learning_rate": 1.368651607479767e-06, + "loss": 0.0002, + "num_input_tokens_seen": 60966248, + "step": 90430 + }, + { + "epoch": 2.209342095619671, + "grad_norm": 0.028630100190639496, + "learning_rate": 1.3685723348427388e-06, + "loss": 0.0005, + "num_input_tokens_seen": 60969448, + "step": 90435 + }, + { + "epoch": 2.2094642464515184, + "grad_norm": 0.015928685665130615, + "learning_rate": 1.3684930595253538e-06, + "loss": 0.0094, + "num_input_tokens_seen": 60972776, + "step": 90440 + }, + { + "epoch": 2.2095863972833656, + "grad_norm": 0.03674129769206047, + "learning_rate": 1.3684137815281882e-06, + "loss": 0.0001, + "num_input_tokens_seen": 60976040, + "step": 90445 + }, + { + "epoch": 2.209708548115213, + "grad_norm": 0.01847653090953827, + "learning_rate": 1.3683345008518181e-06, + "loss": 0.0887, + "num_input_tokens_seen": 60979304, + "step": 90450 + }, + { + "epoch": 2.20983069894706, + "grad_norm": 0.0479811355471611, + "learning_rate": 1.3682552174968208e-06, + "loss": 0.074, + "num_input_tokens_seen": 60982632, + "step": 90455 + }, + { + "epoch": 2.209952849778907, + "grad_norm": 0.058019086718559265, + "learning_rate": 1.3681759314637723e-06, + "loss": 0.0298, + "num_input_tokens_seen": 60986408, + "step": 90460 + }, + { + "epoch": 2.2100750006107543, + "grad_norm": 0.02313101850450039, + "learning_rate": 1.3680966427532494e-06, + "loss": 0.0002, + "num_input_tokens_seen": 60989992, + "step": 90465 + }, + { + "epoch": 2.2101971514426015, + "grad_norm": 0.3875404894351959, + "learning_rate": 1.3680173513658289e-06, + "loss": 0.0217, + "num_input_tokens_seen": 60993320, + "step": 90470 + }, + { + "epoch": 2.2103193022744483, + "grad_norm": 23.808727264404297, + "learning_rate": 1.367938057302087e-06, + "loss": 0.0378, + "num_input_tokens_seen": 60996712, + "step": 90475 + }, + { + "epoch": 2.2104414531062955, + "grad_norm": 0.011256799101829529, + "learning_rate": 1.3678587605626007e-06, + "loss": 0.0004, + "num_input_tokens_seen": 61000168, + "step": 90480 + }, + { + "epoch": 2.2105636039381427, + "grad_norm": 0.009701249189674854, + "learning_rate": 1.3677794611479466e-06, + "loss": 0.0011, + "num_input_tokens_seen": 61003304, + "step": 90485 + }, + { + "epoch": 2.21068575476999, + "grad_norm": 0.31478777527809143, + "learning_rate": 1.367700159058701e-06, + "loss": 0.0577, + "num_input_tokens_seen": 61006824, + "step": 90490 + }, + { + "epoch": 2.210807905601837, + "grad_norm": 0.023821784183382988, + "learning_rate": 1.3676208542954414e-06, + "loss": 0.0446, + "num_input_tokens_seen": 61010152, + "step": 90495 + }, + { + "epoch": 2.2109300564336842, + "grad_norm": 0.018663931638002396, + "learning_rate": 1.3675415468587436e-06, + "loss": 0.1367, + "num_input_tokens_seen": 61013608, + "step": 90500 + }, + { + "epoch": 2.2110522072655314, + "grad_norm": 0.009511927142739296, + "learning_rate": 1.3674622367491852e-06, + "loss": 0.0008, + "num_input_tokens_seen": 61016872, + "step": 90505 + }, + { + "epoch": 2.2111743580973786, + "grad_norm": 0.05456589534878731, + "learning_rate": 1.3673829239673424e-06, + "loss": 0.0004, + "num_input_tokens_seen": 61020200, + "step": 90510 + }, + { + "epoch": 2.211296508929226, + "grad_norm": 6.82743501663208, + "learning_rate": 1.3673036085137926e-06, + "loss": 0.0008, + "num_input_tokens_seen": 61023464, + "step": 90515 + }, + { + "epoch": 2.211418659761073, + "grad_norm": 0.06412504613399506, + "learning_rate": 1.3672242903891117e-06, + "loss": 0.0002, + "num_input_tokens_seen": 61027176, + "step": 90520 + }, + { + "epoch": 2.21154081059292, + "grad_norm": 0.0497899204492569, + "learning_rate": 1.3671449695938768e-06, + "loss": 0.0002, + "num_input_tokens_seen": 61030440, + "step": 90525 + }, + { + "epoch": 2.2116629614247674, + "grad_norm": 0.008111564442515373, + "learning_rate": 1.3670656461286655e-06, + "loss": 0.0004, + "num_input_tokens_seen": 61033896, + "step": 90530 + }, + { + "epoch": 2.2117851122566146, + "grad_norm": 0.06232449412345886, + "learning_rate": 1.3669863199940538e-06, + "loss": 0.011, + "num_input_tokens_seen": 61037096, + "step": 90535 + }, + { + "epoch": 2.2119072630884618, + "grad_norm": 0.052241094410419464, + "learning_rate": 1.3669069911906189e-06, + "loss": 0.0002, + "num_input_tokens_seen": 61040680, + "step": 90540 + }, + { + "epoch": 2.212029413920309, + "grad_norm": 0.2057826668024063, + "learning_rate": 1.3668276597189375e-06, + "loss": 0.1162, + "num_input_tokens_seen": 61044072, + "step": 90545 + }, + { + "epoch": 2.212151564752156, + "grad_norm": 0.09444887936115265, + "learning_rate": 1.3667483255795868e-06, + "loss": 0.0005, + "num_input_tokens_seen": 61047208, + "step": 90550 + }, + { + "epoch": 2.2122737155840033, + "grad_norm": 0.004477665759623051, + "learning_rate": 1.3666689887731434e-06, + "loss": 0.0625, + "num_input_tokens_seen": 61050536, + "step": 90555 + }, + { + "epoch": 2.21239586641585, + "grad_norm": 0.010698872618377209, + "learning_rate": 1.366589649300185e-06, + "loss": 0.0355, + "num_input_tokens_seen": 61053992, + "step": 90560 + }, + { + "epoch": 2.2125180172476973, + "grad_norm": 0.14260634779930115, + "learning_rate": 1.366510307161288e-06, + "loss": 0.1054, + "num_input_tokens_seen": 61057448, + "step": 90565 + }, + { + "epoch": 2.2126401680795444, + "grad_norm": 0.05694550275802612, + "learning_rate": 1.3664309623570293e-06, + "loss": 0.0379, + "num_input_tokens_seen": 61060456, + "step": 90570 + }, + { + "epoch": 2.2127623189113916, + "grad_norm": 0.012118209153413773, + "learning_rate": 1.3663516148879861e-06, + "loss": 0.1337, + "num_input_tokens_seen": 61064168, + "step": 90575 + }, + { + "epoch": 2.212884469743239, + "grad_norm": 73.87699890136719, + "learning_rate": 1.3662722647547355e-06, + "loss": 0.1482, + "num_input_tokens_seen": 61067688, + "step": 90580 + }, + { + "epoch": 2.213006620575086, + "grad_norm": 0.34617283940315247, + "learning_rate": 1.366192911957854e-06, + "loss": 0.0408, + "num_input_tokens_seen": 61071016, + "step": 90585 + }, + { + "epoch": 2.213128771406933, + "grad_norm": 1.8424748182296753, + "learning_rate": 1.3661135564979198e-06, + "loss": 0.0242, + "num_input_tokens_seen": 61074024, + "step": 90590 + }, + { + "epoch": 2.2132509222387804, + "grad_norm": 0.06380114704370499, + "learning_rate": 1.366034198375509e-06, + "loss": 0.0005, + "num_input_tokens_seen": 61077160, + "step": 90595 + }, + { + "epoch": 2.2133730730706276, + "grad_norm": 0.5055896043777466, + "learning_rate": 1.3659548375911992e-06, + "loss": 0.0005, + "num_input_tokens_seen": 61080232, + "step": 90600 + }, + { + "epoch": 2.2134952239024748, + "grad_norm": 0.08805550634860992, + "learning_rate": 1.3658754741455674e-06, + "loss": 0.1485, + "num_input_tokens_seen": 61084584, + "step": 90605 + }, + { + "epoch": 2.213617374734322, + "grad_norm": 0.40562599897384644, + "learning_rate": 1.3657961080391907e-06, + "loss": 0.0731, + "num_input_tokens_seen": 61088040, + "step": 90610 + }, + { + "epoch": 2.213739525566169, + "grad_norm": 0.041347797960042953, + "learning_rate": 1.3657167392726463e-06, + "loss": 0.0745, + "num_input_tokens_seen": 61091304, + "step": 90615 + }, + { + "epoch": 2.2138616763980163, + "grad_norm": 0.05579262226819992, + "learning_rate": 1.3656373678465114e-06, + "loss": 0.0939, + "num_input_tokens_seen": 61095016, + "step": 90620 + }, + { + "epoch": 2.2139838272298635, + "grad_norm": 0.01780310645699501, + "learning_rate": 1.3655579937613633e-06, + "loss": 0.0014, + "num_input_tokens_seen": 61098280, + "step": 90625 + }, + { + "epoch": 2.2141059780617107, + "grad_norm": 0.34165212512016296, + "learning_rate": 1.365478617017779e-06, + "loss": 0.0004, + "num_input_tokens_seen": 61101608, + "step": 90630 + }, + { + "epoch": 2.214228128893558, + "grad_norm": 0.11793646216392517, + "learning_rate": 1.3653992376163359e-06, + "loss": 0.0006, + "num_input_tokens_seen": 61104872, + "step": 90635 + }, + { + "epoch": 2.214350279725405, + "grad_norm": 225.5457763671875, + "learning_rate": 1.3653198555576113e-06, + "loss": 0.1049, + "num_input_tokens_seen": 61108328, + "step": 90640 + }, + { + "epoch": 2.2144724305572523, + "grad_norm": 0.031289275735616684, + "learning_rate": 1.3652404708421823e-06, + "loss": 0.0003, + "num_input_tokens_seen": 61111336, + "step": 90645 + }, + { + "epoch": 2.2145945813890995, + "grad_norm": 0.016135146841406822, + "learning_rate": 1.3651610834706266e-06, + "loss": 0.0296, + "num_input_tokens_seen": 61114408, + "step": 90650 + }, + { + "epoch": 2.2147167322209462, + "grad_norm": 0.06828448176383972, + "learning_rate": 1.3650816934435211e-06, + "loss": 0.0348, + "num_input_tokens_seen": 61118120, + "step": 90655 + }, + { + "epoch": 2.2148388830527934, + "grad_norm": 14.590534210205078, + "learning_rate": 1.3650023007614436e-06, + "loss": 0.0594, + "num_input_tokens_seen": 61121448, + "step": 90660 + }, + { + "epoch": 2.2149610338846406, + "grad_norm": 0.016529662534594536, + "learning_rate": 1.3649229054249709e-06, + "loss": 0.0411, + "num_input_tokens_seen": 61125160, + "step": 90665 + }, + { + "epoch": 2.215083184716488, + "grad_norm": 0.03518630936741829, + "learning_rate": 1.3648435074346812e-06, + "loss": 0.0007, + "num_input_tokens_seen": 61128296, + "step": 90670 + }, + { + "epoch": 2.215205335548335, + "grad_norm": 25.680883407592773, + "learning_rate": 1.364764106791151e-06, + "loss": 0.04, + "num_input_tokens_seen": 61131560, + "step": 90675 + }, + { + "epoch": 2.215327486380182, + "grad_norm": 0.016405398026108742, + "learning_rate": 1.3646847034949577e-06, + "loss": 0.0004, + "num_input_tokens_seen": 61134632, + "step": 90680 + }, + { + "epoch": 2.2154496372120294, + "grad_norm": 0.10307609289884567, + "learning_rate": 1.3646052975466798e-06, + "loss": 0.0002, + "num_input_tokens_seen": 61138152, + "step": 90685 + }, + { + "epoch": 2.2155717880438766, + "grad_norm": 0.07581989467144012, + "learning_rate": 1.3645258889468938e-06, + "loss": 0.0352, + "num_input_tokens_seen": 61141480, + "step": 90690 + }, + { + "epoch": 2.2156939388757237, + "grad_norm": 0.03339642286300659, + "learning_rate": 1.3644464776961778e-06, + "loss": 0.0001, + "num_input_tokens_seen": 61145320, + "step": 90695 + }, + { + "epoch": 2.215816089707571, + "grad_norm": 0.17576716840267181, + "learning_rate": 1.3643670637951086e-06, + "loss": 0.0874, + "num_input_tokens_seen": 61148520, + "step": 90700 + }, + { + "epoch": 2.215938240539418, + "grad_norm": 0.08091838657855988, + "learning_rate": 1.3642876472442642e-06, + "loss": 0.1244, + "num_input_tokens_seen": 61151528, + "step": 90705 + }, + { + "epoch": 2.2160603913712653, + "grad_norm": 0.018327750265598297, + "learning_rate": 1.3642082280442219e-06, + "loss": 0.0002, + "num_input_tokens_seen": 61155112, + "step": 90710 + }, + { + "epoch": 2.2161825422031125, + "grad_norm": 0.01857393980026245, + "learning_rate": 1.3641288061955599e-06, + "loss": 0.0001, + "num_input_tokens_seen": 61158568, + "step": 90715 + }, + { + "epoch": 2.2163046930349597, + "grad_norm": 0.012956013903021812, + "learning_rate": 1.364049381698855e-06, + "loss": 0.0565, + "num_input_tokens_seen": 61162024, + "step": 90720 + }, + { + "epoch": 2.216426843866807, + "grad_norm": 0.11538074910640717, + "learning_rate": 1.363969954554685e-06, + "loss": 0.0587, + "num_input_tokens_seen": 61165544, + "step": 90725 + }, + { + "epoch": 2.216548994698654, + "grad_norm": 210.1100311279297, + "learning_rate": 1.3638905247636276e-06, + "loss": 0.1062, + "num_input_tokens_seen": 61168936, + "step": 90730 + }, + { + "epoch": 2.2166711455305013, + "grad_norm": 0.04966253042221069, + "learning_rate": 1.3638110923262608e-06, + "loss": 0.0007, + "num_input_tokens_seen": 61172072, + "step": 90735 + }, + { + "epoch": 2.216793296362348, + "grad_norm": 0.04618317633867264, + "learning_rate": 1.3637316572431613e-06, + "loss": 0.0465, + "num_input_tokens_seen": 61175080, + "step": 90740 + }, + { + "epoch": 2.216915447194195, + "grad_norm": 0.05596147105097771, + "learning_rate": 1.3636522195149077e-06, + "loss": 0.0372, + "num_input_tokens_seen": 61178536, + "step": 90745 + }, + { + "epoch": 2.2170375980260424, + "grad_norm": 35.95188903808594, + "learning_rate": 1.363572779142077e-06, + "loss": 0.0348, + "num_input_tokens_seen": 61181800, + "step": 90750 + }, + { + "epoch": 2.2171597488578896, + "grad_norm": 0.37679508328437805, + "learning_rate": 1.3634933361252477e-06, + "loss": 0.0004, + "num_input_tokens_seen": 61185192, + "step": 90755 + }, + { + "epoch": 2.2172818996897368, + "grad_norm": 0.2642463743686676, + "learning_rate": 1.3634138904649969e-06, + "loss": 0.0004, + "num_input_tokens_seen": 61189672, + "step": 90760 + }, + { + "epoch": 2.217404050521584, + "grad_norm": 374.71319580078125, + "learning_rate": 1.3633344421619027e-06, + "loss": 0.0411, + "num_input_tokens_seen": 61192744, + "step": 90765 + }, + { + "epoch": 2.217526201353431, + "grad_norm": 0.31421008706092834, + "learning_rate": 1.3632549912165425e-06, + "loss": 0.0002, + "num_input_tokens_seen": 61196072, + "step": 90770 + }, + { + "epoch": 2.2176483521852783, + "grad_norm": 0.08052527159452438, + "learning_rate": 1.3631755376294944e-06, + "loss": 0.0159, + "num_input_tokens_seen": 61199336, + "step": 90775 + }, + { + "epoch": 2.2177705030171255, + "grad_norm": 0.09668438136577606, + "learning_rate": 1.363096081401336e-06, + "loss": 0.0001, + "num_input_tokens_seen": 61202664, + "step": 90780 + }, + { + "epoch": 2.2178926538489727, + "grad_norm": 0.08282022178173065, + "learning_rate": 1.3630166225326453e-06, + "loss": 0.0003, + "num_input_tokens_seen": 61206056, + "step": 90785 + }, + { + "epoch": 2.21801480468082, + "grad_norm": 0.07777266949415207, + "learning_rate": 1.3629371610240004e-06, + "loss": 0.0214, + "num_input_tokens_seen": 61209384, + "step": 90790 + }, + { + "epoch": 2.218136955512667, + "grad_norm": 0.006129283923655748, + "learning_rate": 1.3628576968759784e-06, + "loss": 0.0001, + "num_input_tokens_seen": 61212648, + "step": 90795 + }, + { + "epoch": 2.2182591063445143, + "grad_norm": 0.017785819247364998, + "learning_rate": 1.3627782300891575e-06, + "loss": 0.0001, + "num_input_tokens_seen": 61215656, + "step": 90800 + }, + { + "epoch": 2.2183812571763615, + "grad_norm": 0.0023185538593679667, + "learning_rate": 1.362698760664116e-06, + "loss": 0.0282, + "num_input_tokens_seen": 61219048, + "step": 90805 + }, + { + "epoch": 2.2185034080082087, + "grad_norm": 0.024504944682121277, + "learning_rate": 1.3626192886014317e-06, + "loss": 0.0379, + "num_input_tokens_seen": 61222312, + "step": 90810 + }, + { + "epoch": 2.218625558840056, + "grad_norm": 0.050144314765930176, + "learning_rate": 1.3625398139016824e-06, + "loss": 0.0011, + "num_input_tokens_seen": 61226152, + "step": 90815 + }, + { + "epoch": 2.218747709671903, + "grad_norm": 0.03911041468381882, + "learning_rate": 1.362460336565446e-06, + "loss": 0.0003, + "num_input_tokens_seen": 61229608, + "step": 90820 + }, + { + "epoch": 2.2188698605037502, + "grad_norm": 0.23010863363742828, + "learning_rate": 1.3623808565933005e-06, + "loss": 0.0404, + "num_input_tokens_seen": 61233000, + "step": 90825 + }, + { + "epoch": 2.2189920113355974, + "grad_norm": 0.043779559433460236, + "learning_rate": 1.362301373985824e-06, + "loss": 0.0087, + "num_input_tokens_seen": 61236200, + "step": 90830 + }, + { + "epoch": 2.219114162167444, + "grad_norm": 0.378178209066391, + "learning_rate": 1.3622218887435942e-06, + "loss": 0.0058, + "num_input_tokens_seen": 61239848, + "step": 90835 + }, + { + "epoch": 2.2192363129992914, + "grad_norm": 0.07556562125682831, + "learning_rate": 1.3621424008671895e-06, + "loss": 0.0002, + "num_input_tokens_seen": 61243176, + "step": 90840 + }, + { + "epoch": 2.2193584638311386, + "grad_norm": 0.7005949020385742, + "learning_rate": 1.362062910357188e-06, + "loss": 0.0003, + "num_input_tokens_seen": 61246824, + "step": 90845 + }, + { + "epoch": 2.2194806146629857, + "grad_norm": 0.023375436663627625, + "learning_rate": 1.3619834172141675e-06, + "loss": 0.0817, + "num_input_tokens_seen": 61250152, + "step": 90850 + }, + { + "epoch": 2.219602765494833, + "grad_norm": 0.0043090349063277245, + "learning_rate": 1.3619039214387065e-06, + "loss": 0.0001, + "num_input_tokens_seen": 61253864, + "step": 90855 + }, + { + "epoch": 2.21972491632668, + "grad_norm": 0.030831608921289444, + "learning_rate": 1.3618244230313826e-06, + "loss": 0.0001, + "num_input_tokens_seen": 61257064, + "step": 90860 + }, + { + "epoch": 2.2198470671585273, + "grad_norm": 0.4308274984359741, + "learning_rate": 1.361744921992774e-06, + "loss": 0.0004, + "num_input_tokens_seen": 61260200, + "step": 90865 + }, + { + "epoch": 2.2199692179903745, + "grad_norm": 0.033835988491773605, + "learning_rate": 1.3616654183234596e-06, + "loss": 0.0524, + "num_input_tokens_seen": 61263464, + "step": 90870 + }, + { + "epoch": 2.2200913688222217, + "grad_norm": 0.011145480908453465, + "learning_rate": 1.3615859120240165e-06, + "loss": 0.0334, + "num_input_tokens_seen": 61266792, + "step": 90875 + }, + { + "epoch": 2.220213519654069, + "grad_norm": 0.9979506731033325, + "learning_rate": 1.3615064030950236e-06, + "loss": 0.0002, + "num_input_tokens_seen": 61270184, + "step": 90880 + }, + { + "epoch": 2.220335670485916, + "grad_norm": 0.016083527356386185, + "learning_rate": 1.361426891537059e-06, + "loss": 0.049, + "num_input_tokens_seen": 61273640, + "step": 90885 + }, + { + "epoch": 2.2204578213177633, + "grad_norm": 0.0046060411259531975, + "learning_rate": 1.3613473773507007e-06, + "loss": 0.0811, + "num_input_tokens_seen": 61276968, + "step": 90890 + }, + { + "epoch": 2.2205799721496104, + "grad_norm": 0.010451358743011951, + "learning_rate": 1.3612678605365268e-06, + "loss": 0.0, + "num_input_tokens_seen": 61279848, + "step": 90895 + }, + { + "epoch": 2.2207021229814576, + "grad_norm": 0.0008068532915785909, + "learning_rate": 1.3611883410951162e-06, + "loss": 0.0002, + "num_input_tokens_seen": 61283432, + "step": 90900 + }, + { + "epoch": 2.220824273813305, + "grad_norm": 1.482542872428894, + "learning_rate": 1.3611088190270467e-06, + "loss": 0.0475, + "num_input_tokens_seen": 61287080, + "step": 90905 + }, + { + "epoch": 2.220946424645152, + "grad_norm": 0.016690026968717575, + "learning_rate": 1.361029294332897e-06, + "loss": 0.0, + "num_input_tokens_seen": 61290216, + "step": 90910 + }, + { + "epoch": 2.221068575476999, + "grad_norm": 0.0038016163744032383, + "learning_rate": 1.3609497670132448e-06, + "loss": 0.087, + "num_input_tokens_seen": 61293416, + "step": 90915 + }, + { + "epoch": 2.221190726308846, + "grad_norm": 0.000619497848674655, + "learning_rate": 1.3608702370686689e-06, + "loss": 0.0002, + "num_input_tokens_seen": 61296424, + "step": 90920 + }, + { + "epoch": 2.221312877140693, + "grad_norm": 23.480012893676758, + "learning_rate": 1.3607907044997476e-06, + "loss": 0.1818, + "num_input_tokens_seen": 61299880, + "step": 90925 + }, + { + "epoch": 2.2214350279725403, + "grad_norm": 0.06933058053255081, + "learning_rate": 1.3607111693070595e-06, + "loss": 0.0001, + "num_input_tokens_seen": 61303720, + "step": 90930 + }, + { + "epoch": 2.2215571788043875, + "grad_norm": 0.037259768694639206, + "learning_rate": 1.3606316314911826e-06, + "loss": 0.0741, + "num_input_tokens_seen": 61306920, + "step": 90935 + }, + { + "epoch": 2.2216793296362347, + "grad_norm": 0.015051459893584251, + "learning_rate": 1.3605520910526953e-06, + "loss": 0.0001, + "num_input_tokens_seen": 61310120, + "step": 90940 + }, + { + "epoch": 2.221801480468082, + "grad_norm": 0.06870535016059875, + "learning_rate": 1.3604725479921765e-06, + "loss": 0.0002, + "num_input_tokens_seen": 61313512, + "step": 90945 + }, + { + "epoch": 2.221923631299929, + "grad_norm": 970.2088623046875, + "learning_rate": 1.3603930023102042e-06, + "loss": 0.0292, + "num_input_tokens_seen": 61316584, + "step": 90950 + }, + { + "epoch": 2.2220457821317763, + "grad_norm": 0.002735344460234046, + "learning_rate": 1.3603134540073571e-06, + "loss": 0.062, + "num_input_tokens_seen": 61320232, + "step": 90955 + }, + { + "epoch": 2.2221679329636235, + "grad_norm": 0.005101367831230164, + "learning_rate": 1.3602339030842135e-06, + "loss": 0.0001, + "num_input_tokens_seen": 61323432, + "step": 90960 + }, + { + "epoch": 2.2222900837954707, + "grad_norm": 0.21339182555675507, + "learning_rate": 1.3601543495413521e-06, + "loss": 0.0421, + "num_input_tokens_seen": 61326760, + "step": 90965 + }, + { + "epoch": 2.222412234627318, + "grad_norm": 14.178060531616211, + "learning_rate": 1.3600747933793516e-06, + "loss": 0.0841, + "num_input_tokens_seen": 61330152, + "step": 90970 + }, + { + "epoch": 2.222534385459165, + "grad_norm": 0.11821889132261276, + "learning_rate": 1.3599952345987902e-06, + "loss": 0.0005, + "num_input_tokens_seen": 61333480, + "step": 90975 + }, + { + "epoch": 2.2226565362910122, + "grad_norm": 13.821526527404785, + "learning_rate": 1.3599156732002467e-06, + "loss": 0.0673, + "num_input_tokens_seen": 61336936, + "step": 90980 + }, + { + "epoch": 2.2227786871228594, + "grad_norm": 0.028573287650942802, + "learning_rate": 1.3598361091842999e-06, + "loss": 0.0565, + "num_input_tokens_seen": 61340392, + "step": 90985 + }, + { + "epoch": 2.2229008379547066, + "grad_norm": 47.76668930053711, + "learning_rate": 1.3597565425515273e-06, + "loss": 0.0946, + "num_input_tokens_seen": 61343400, + "step": 90990 + }, + { + "epoch": 2.223022988786554, + "grad_norm": 0.028161996975541115, + "learning_rate": 1.359676973302509e-06, + "loss": 0.0033, + "num_input_tokens_seen": 61346536, + "step": 90995 + }, + { + "epoch": 2.223145139618401, + "grad_norm": 0.05201875790953636, + "learning_rate": 1.359597401437823e-06, + "loss": 0.0479, + "num_input_tokens_seen": 61350056, + "step": 91000 + }, + { + "epoch": 2.2232672904502477, + "grad_norm": 0.01158086210489273, + "learning_rate": 1.3595178269580478e-06, + "loss": 0.0855, + "num_input_tokens_seen": 61353640, + "step": 91005 + }, + { + "epoch": 2.223389441282095, + "grad_norm": 147.93606567382812, + "learning_rate": 1.3594382498637625e-06, + "loss": 0.0065, + "num_input_tokens_seen": 61356712, + "step": 91010 + }, + { + "epoch": 2.223511592113942, + "grad_norm": 0.26321476697921753, + "learning_rate": 1.3593586701555454e-06, + "loss": 0.0441, + "num_input_tokens_seen": 61359912, + "step": 91015 + }, + { + "epoch": 2.2236337429457893, + "grad_norm": 26.365808486938477, + "learning_rate": 1.359279087833975e-06, + "loss": 0.0261, + "num_input_tokens_seen": 61363624, + "step": 91020 + }, + { + "epoch": 2.2237558937776365, + "grad_norm": 0.005364961456507444, + "learning_rate": 1.359199502899631e-06, + "loss": 0.0001, + "num_input_tokens_seen": 61366952, + "step": 91025 + }, + { + "epoch": 2.2238780446094837, + "grad_norm": 3.7186083793640137, + "learning_rate": 1.3591199153530916e-06, + "loss": 0.0011, + "num_input_tokens_seen": 61371048, + "step": 91030 + }, + { + "epoch": 2.224000195441331, + "grad_norm": 0.005052721593528986, + "learning_rate": 1.3590403251949354e-06, + "loss": 0.0367, + "num_input_tokens_seen": 61373992, + "step": 91035 + }, + { + "epoch": 2.224122346273178, + "grad_norm": 0.14445973932743073, + "learning_rate": 1.3589607324257415e-06, + "loss": 0.0014, + "num_input_tokens_seen": 61377192, + "step": 91040 + }, + { + "epoch": 2.2242444971050253, + "grad_norm": 89.84341430664062, + "learning_rate": 1.3588811370460884e-06, + "loss": 0.1273, + "num_input_tokens_seen": 61380648, + "step": 91045 + }, + { + "epoch": 2.2243666479368724, + "grad_norm": 7.445562839508057, + "learning_rate": 1.3588015390565551e-06, + "loss": 0.0011, + "num_input_tokens_seen": 61384104, + "step": 91050 + }, + { + "epoch": 2.2244887987687196, + "grad_norm": 0.06055779755115509, + "learning_rate": 1.3587219384577207e-06, + "loss": 0.035, + "num_input_tokens_seen": 61387880, + "step": 91055 + }, + { + "epoch": 2.224610949600567, + "grad_norm": 0.003716902807354927, + "learning_rate": 1.3586423352501637e-06, + "loss": 0.0001, + "num_input_tokens_seen": 61390952, + "step": 91060 + }, + { + "epoch": 2.224733100432414, + "grad_norm": 18.314638137817383, + "learning_rate": 1.3585627294344635e-06, + "loss": 0.0524, + "num_input_tokens_seen": 61394152, + "step": 91065 + }, + { + "epoch": 2.224855251264261, + "grad_norm": 0.034229401499032974, + "learning_rate": 1.3584831210111985e-06, + "loss": 0.0001, + "num_input_tokens_seen": 61397416, + "step": 91070 + }, + { + "epoch": 2.2249774020961084, + "grad_norm": 0.04336778447031975, + "learning_rate": 1.3584035099809477e-06, + "loss": 0.0377, + "num_input_tokens_seen": 61400872, + "step": 91075 + }, + { + "epoch": 2.2250995529279556, + "grad_norm": 0.009136239066720009, + "learning_rate": 1.3583238963442904e-06, + "loss": 0.0003, + "num_input_tokens_seen": 61404584, + "step": 91080 + }, + { + "epoch": 2.2252217037598028, + "grad_norm": 0.1326877623796463, + "learning_rate": 1.3582442801018052e-06, + "loss": 0.0001, + "num_input_tokens_seen": 61407912, + "step": 91085 + }, + { + "epoch": 2.22534385459165, + "grad_norm": 0.32096967101097107, + "learning_rate": 1.3581646612540713e-06, + "loss": 0.0009, + "num_input_tokens_seen": 61412136, + "step": 91090 + }, + { + "epoch": 2.225466005423497, + "grad_norm": 0.017594700679183006, + "learning_rate": 1.3580850398016676e-06, + "loss": 0.0003, + "num_input_tokens_seen": 61415656, + "step": 91095 + }, + { + "epoch": 2.225588156255344, + "grad_norm": 21.886730194091797, + "learning_rate": 1.3580054157451732e-06, + "loss": 0.0772, + "num_input_tokens_seen": 61418856, + "step": 91100 + }, + { + "epoch": 2.225710307087191, + "grad_norm": 29.80620765686035, + "learning_rate": 1.3579257890851673e-06, + "loss": 0.0622, + "num_input_tokens_seen": 61422248, + "step": 91105 + }, + { + "epoch": 2.2258324579190383, + "grad_norm": 0.08096987754106522, + "learning_rate": 1.3578461598222286e-06, + "loss": 0.0008, + "num_input_tokens_seen": 61425320, + "step": 91110 + }, + { + "epoch": 2.2259546087508855, + "grad_norm": 0.020485376939177513, + "learning_rate": 1.357766527956936e-06, + "loss": 0.0423, + "num_input_tokens_seen": 61429032, + "step": 91115 + }, + { + "epoch": 2.2260767595827327, + "grad_norm": 0.1648709774017334, + "learning_rate": 1.3576868934898696e-06, + "loss": 0.0304, + "num_input_tokens_seen": 61432680, + "step": 91120 + }, + { + "epoch": 2.22619891041458, + "grad_norm": 0.0049276407808065414, + "learning_rate": 1.3576072564216077e-06, + "loss": 0.0004, + "num_input_tokens_seen": 61435944, + "step": 91125 + }, + { + "epoch": 2.226321061246427, + "grad_norm": 0.36935797333717346, + "learning_rate": 1.3575276167527297e-06, + "loss": 0.0435, + "num_input_tokens_seen": 61439272, + "step": 91130 + }, + { + "epoch": 2.2264432120782742, + "grad_norm": 0.015589174814522266, + "learning_rate": 1.3574479744838147e-06, + "loss": 0.0525, + "num_input_tokens_seen": 61442792, + "step": 91135 + }, + { + "epoch": 2.2265653629101214, + "grad_norm": 41.798988342285156, + "learning_rate": 1.357368329615442e-06, + "loss": 0.2178, + "num_input_tokens_seen": 61446120, + "step": 91140 + }, + { + "epoch": 2.2266875137419686, + "grad_norm": 0.14397107064723969, + "learning_rate": 1.3572886821481905e-06, + "loss": 0.0001, + "num_input_tokens_seen": 61449448, + "step": 91145 + }, + { + "epoch": 2.226809664573816, + "grad_norm": 0.08464895933866501, + "learning_rate": 1.3572090320826395e-06, + "loss": 0.0003, + "num_input_tokens_seen": 61452584, + "step": 91150 + }, + { + "epoch": 2.226931815405663, + "grad_norm": 7.609369277954102, + "learning_rate": 1.3571293794193684e-06, + "loss": 0.0008, + "num_input_tokens_seen": 61455720, + "step": 91155 + }, + { + "epoch": 2.22705396623751, + "grad_norm": 6.147663593292236, + "learning_rate": 1.3570497241589564e-06, + "loss": 0.0008, + "num_input_tokens_seen": 61459048, + "step": 91160 + }, + { + "epoch": 2.2271761170693574, + "grad_norm": 0.4475948214530945, + "learning_rate": 1.356970066301983e-06, + "loss": 0.0007, + "num_input_tokens_seen": 61462376, + "step": 91165 + }, + { + "epoch": 2.2272982679012046, + "grad_norm": 250.3100128173828, + "learning_rate": 1.3568904058490272e-06, + "loss": 0.0588, + "num_input_tokens_seen": 61466408, + "step": 91170 + }, + { + "epoch": 2.2274204187330517, + "grad_norm": 0.02227071113884449, + "learning_rate": 1.356810742800668e-06, + "loss": 0.0003, + "num_input_tokens_seen": 61470056, + "step": 91175 + }, + { + "epoch": 2.227542569564899, + "grad_norm": 0.08421640843153, + "learning_rate": 1.3567310771574853e-06, + "loss": 0.0002, + "num_input_tokens_seen": 61472872, + "step": 91180 + }, + { + "epoch": 2.2276647203967457, + "grad_norm": 0.001982541289180517, + "learning_rate": 1.3566514089200584e-06, + "loss": 0.182, + "num_input_tokens_seen": 61476136, + "step": 91185 + }, + { + "epoch": 2.227786871228593, + "grad_norm": 0.01702370122075081, + "learning_rate": 1.3565717380889664e-06, + "loss": 0.0003, + "num_input_tokens_seen": 61479848, + "step": 91190 + }, + { + "epoch": 2.22790902206044, + "grad_norm": 0.023320624604821205, + "learning_rate": 1.356492064664789e-06, + "loss": 0.0004, + "num_input_tokens_seen": 61483112, + "step": 91195 + }, + { + "epoch": 2.2280311728922872, + "grad_norm": 17.018516540527344, + "learning_rate": 1.3564123886481054e-06, + "loss": 0.0375, + "num_input_tokens_seen": 61486760, + "step": 91200 + }, + { + "epoch": 2.2281533237241344, + "grad_norm": 0.004173085559159517, + "learning_rate": 1.3563327100394947e-06, + "loss": 0.0001, + "num_input_tokens_seen": 61490152, + "step": 91205 + }, + { + "epoch": 2.2282754745559816, + "grad_norm": 0.04542386531829834, + "learning_rate": 1.356253028839537e-06, + "loss": 0.0001, + "num_input_tokens_seen": 61493352, + "step": 91210 + }, + { + "epoch": 2.228397625387829, + "grad_norm": 0.009219934232532978, + "learning_rate": 1.3561733450488113e-06, + "loss": 0.0001, + "num_input_tokens_seen": 61496744, + "step": 91215 + }, + { + "epoch": 2.228519776219676, + "grad_norm": 56.5626335144043, + "learning_rate": 1.3560936586678974e-06, + "loss": 0.131, + "num_input_tokens_seen": 61499752, + "step": 91220 + }, + { + "epoch": 2.228641927051523, + "grad_norm": 0.10610431432723999, + "learning_rate": 1.3560139696973747e-06, + "loss": 0.0001, + "num_input_tokens_seen": 61503080, + "step": 91225 + }, + { + "epoch": 2.2287640778833704, + "grad_norm": 0.029894093051552773, + "learning_rate": 1.3559342781378225e-06, + "loss": 0.0002, + "num_input_tokens_seen": 61506792, + "step": 91230 + }, + { + "epoch": 2.2288862287152176, + "grad_norm": 0.03196987137198448, + "learning_rate": 1.3558545839898206e-06, + "loss": 0.0003, + "num_input_tokens_seen": 61510312, + "step": 91235 + }, + { + "epoch": 2.2290083795470648, + "grad_norm": 1.6127263307571411, + "learning_rate": 1.3557748872539484e-06, + "loss": 0.0984, + "num_input_tokens_seen": 61513320, + "step": 91240 + }, + { + "epoch": 2.229130530378912, + "grad_norm": 0.008849059231579304, + "learning_rate": 1.3556951879307855e-06, + "loss": 0.0001, + "num_input_tokens_seen": 61516520, + "step": 91245 + }, + { + "epoch": 2.229252681210759, + "grad_norm": 0.4685281813144684, + "learning_rate": 1.3556154860209114e-06, + "loss": 0.0013, + "num_input_tokens_seen": 61519784, + "step": 91250 + }, + { + "epoch": 2.2293748320426063, + "grad_norm": 0.024512961506843567, + "learning_rate": 1.355535781524906e-06, + "loss": 0.0002, + "num_input_tokens_seen": 61522792, + "step": 91255 + }, + { + "epoch": 2.2294969828744535, + "grad_norm": 37.255821228027344, + "learning_rate": 1.3554560744433488e-06, + "loss": 0.1264, + "num_input_tokens_seen": 61526824, + "step": 91260 + }, + { + "epoch": 2.2296191337063007, + "grad_norm": 397.60003662109375, + "learning_rate": 1.3553763647768192e-06, + "loss": 0.0239, + "num_input_tokens_seen": 61529896, + "step": 91265 + }, + { + "epoch": 2.229741284538148, + "grad_norm": 0.05338110774755478, + "learning_rate": 1.355296652525897e-06, + "loss": 0.0419, + "num_input_tokens_seen": 61533288, + "step": 91270 + }, + { + "epoch": 2.229863435369995, + "grad_norm": 0.0043336073867976665, + "learning_rate": 1.3552169376911625e-06, + "loss": 0.0001, + "num_input_tokens_seen": 61536936, + "step": 91275 + }, + { + "epoch": 2.229985586201842, + "grad_norm": 0.0046080537140369415, + "learning_rate": 1.3551372202731945e-06, + "loss": 0.0001, + "num_input_tokens_seen": 61540456, + "step": 91280 + }, + { + "epoch": 2.230107737033689, + "grad_norm": 0.011748994700610638, + "learning_rate": 1.3550575002725732e-06, + "loss": 0.0275, + "num_input_tokens_seen": 61543592, + "step": 91285 + }, + { + "epoch": 2.230229887865536, + "grad_norm": 1.6699762344360352, + "learning_rate": 1.3549777776898786e-06, + "loss": 0.0034, + "num_input_tokens_seen": 61546792, + "step": 91290 + }, + { + "epoch": 2.2303520386973834, + "grad_norm": 0.01124012004584074, + "learning_rate": 1.3548980525256897e-06, + "loss": 0.0002, + "num_input_tokens_seen": 61550184, + "step": 91295 + }, + { + "epoch": 2.2304741895292306, + "grad_norm": 0.0027650375850498676, + "learning_rate": 1.3548183247805867e-06, + "loss": 0.0004, + "num_input_tokens_seen": 61553768, + "step": 91300 + }, + { + "epoch": 2.230596340361078, + "grad_norm": 0.05154336616396904, + "learning_rate": 1.3547385944551495e-06, + "loss": 0.0225, + "num_input_tokens_seen": 61556968, + "step": 91305 + }, + { + "epoch": 2.230718491192925, + "grad_norm": 0.00040175282629206777, + "learning_rate": 1.3546588615499576e-06, + "loss": 0.0303, + "num_input_tokens_seen": 61560424, + "step": 91310 + }, + { + "epoch": 2.230840642024772, + "grad_norm": 0.08982338011264801, + "learning_rate": 1.3545791260655915e-06, + "loss": 0.0561, + "num_input_tokens_seen": 61563368, + "step": 91315 + }, + { + "epoch": 2.2309627928566194, + "grad_norm": 0.0134674571454525, + "learning_rate": 1.3544993880026305e-06, + "loss": 0.0004, + "num_input_tokens_seen": 61566696, + "step": 91320 + }, + { + "epoch": 2.2310849436884665, + "grad_norm": 0.01658649742603302, + "learning_rate": 1.3544196473616544e-06, + "loss": 0.0397, + "num_input_tokens_seen": 61569896, + "step": 91325 + }, + { + "epoch": 2.2312070945203137, + "grad_norm": 0.08493436127901077, + "learning_rate": 1.3543399041432432e-06, + "loss": 0.0002, + "num_input_tokens_seen": 61572968, + "step": 91330 + }, + { + "epoch": 2.231329245352161, + "grad_norm": 0.02740195393562317, + "learning_rate": 1.3542601583479774e-06, + "loss": 0.0932, + "num_input_tokens_seen": 61576232, + "step": 91335 + }, + { + "epoch": 2.231451396184008, + "grad_norm": 0.20961058139801025, + "learning_rate": 1.3541804099764362e-06, + "loss": 0.0369, + "num_input_tokens_seen": 61579240, + "step": 91340 + }, + { + "epoch": 2.2315735470158553, + "grad_norm": 0.08280511945486069, + "learning_rate": 1.3541006590291998e-06, + "loss": 0.0008, + "num_input_tokens_seen": 61582696, + "step": 91345 + }, + { + "epoch": 2.2316956978477025, + "grad_norm": 0.005895423702895641, + "learning_rate": 1.3540209055068484e-06, + "loss": 0.0001, + "num_input_tokens_seen": 61586088, + "step": 91350 + }, + { + "epoch": 2.2318178486795497, + "grad_norm": 0.02408262901008129, + "learning_rate": 1.3539411494099614e-06, + "loss": 0.0001, + "num_input_tokens_seen": 61589224, + "step": 91355 + }, + { + "epoch": 2.231939999511397, + "grad_norm": 0.005107124801725149, + "learning_rate": 1.353861390739119e-06, + "loss": 0.0002, + "num_input_tokens_seen": 61592808, + "step": 91360 + }, + { + "epoch": 2.2320621503432436, + "grad_norm": 0.010785761289298534, + "learning_rate": 1.3537816294949017e-06, + "loss": 0.0002, + "num_input_tokens_seen": 61596008, + "step": 91365 + }, + { + "epoch": 2.232184301175091, + "grad_norm": 47.25584411621094, + "learning_rate": 1.353701865677889e-06, + "loss": 0.2142, + "num_input_tokens_seen": 61599272, + "step": 91370 + }, + { + "epoch": 2.232306452006938, + "grad_norm": 0.009135080501437187, + "learning_rate": 1.3536220992886615e-06, + "loss": 0.0002, + "num_input_tokens_seen": 61602920, + "step": 91375 + }, + { + "epoch": 2.232428602838785, + "grad_norm": 0.0020226880442351103, + "learning_rate": 1.3535423303277989e-06, + "loss": 0.0007, + "num_input_tokens_seen": 61605992, + "step": 91380 + }, + { + "epoch": 2.2325507536706324, + "grad_norm": 32.131187438964844, + "learning_rate": 1.3534625587958814e-06, + "loss": 0.0978, + "num_input_tokens_seen": 61609320, + "step": 91385 + }, + { + "epoch": 2.2326729045024796, + "grad_norm": 0.05036914721131325, + "learning_rate": 1.353382784693489e-06, + "loss": 0.0008, + "num_input_tokens_seen": 61612712, + "step": 91390 + }, + { + "epoch": 2.2327950553343268, + "grad_norm": 0.2048310786485672, + "learning_rate": 1.353303008021202e-06, + "loss": 0.0578, + "num_input_tokens_seen": 61616168, + "step": 91395 + }, + { + "epoch": 2.232917206166174, + "grad_norm": 0.8726524114608765, + "learning_rate": 1.3532232287796007e-06, + "loss": 0.0456, + "num_input_tokens_seen": 61619560, + "step": 91400 + }, + { + "epoch": 2.233039356998021, + "grad_norm": 0.06254065781831741, + "learning_rate": 1.353143446969265e-06, + "loss": 0.0002, + "num_input_tokens_seen": 61623144, + "step": 91405 + }, + { + "epoch": 2.2331615078298683, + "grad_norm": 0.007272529415786266, + "learning_rate": 1.3530636625907747e-06, + "loss": 0.0538, + "num_input_tokens_seen": 61626600, + "step": 91410 + }, + { + "epoch": 2.2332836586617155, + "grad_norm": 0.24082855880260468, + "learning_rate": 1.352983875644711e-06, + "loss": 0.0005, + "num_input_tokens_seen": 61629480, + "step": 91415 + }, + { + "epoch": 2.2334058094935627, + "grad_norm": 0.12771330773830414, + "learning_rate": 1.3529040861316535e-06, + "loss": 0.0611, + "num_input_tokens_seen": 61632872, + "step": 91420 + }, + { + "epoch": 2.23352796032541, + "grad_norm": 3.524974822998047, + "learning_rate": 1.3528242940521821e-06, + "loss": 0.0003, + "num_input_tokens_seen": 61636200, + "step": 91425 + }, + { + "epoch": 2.233650111157257, + "grad_norm": 0.03709175065159798, + "learning_rate": 1.352744499406878e-06, + "loss": 0.0005, + "num_input_tokens_seen": 61639272, + "step": 91430 + }, + { + "epoch": 2.2337722619891043, + "grad_norm": 0.04541729390621185, + "learning_rate": 1.352664702196321e-06, + "loss": 0.0002, + "num_input_tokens_seen": 61642536, + "step": 91435 + }, + { + "epoch": 2.2338944128209515, + "grad_norm": 0.008953643962740898, + "learning_rate": 1.3525849024210913e-06, + "loss": 0.0003, + "num_input_tokens_seen": 61645736, + "step": 91440 + }, + { + "epoch": 2.2340165636527987, + "grad_norm": 23.101348876953125, + "learning_rate": 1.3525051000817699e-06, + "loss": 0.1131, + "num_input_tokens_seen": 61649640, + "step": 91445 + }, + { + "epoch": 2.2341387144846454, + "grad_norm": 10.648353576660156, + "learning_rate": 1.352425295178936e-06, + "loss": 0.0431, + "num_input_tokens_seen": 61652904, + "step": 91450 + }, + { + "epoch": 2.234260865316493, + "grad_norm": 0.06287828832864761, + "learning_rate": 1.3523454877131703e-06, + "loss": 0.0002, + "num_input_tokens_seen": 61656296, + "step": 91455 + }, + { + "epoch": 2.23438301614834, + "grad_norm": 0.0150414127856493, + "learning_rate": 1.352265677685054e-06, + "loss": 0.0075, + "num_input_tokens_seen": 61659688, + "step": 91460 + }, + { + "epoch": 2.234505166980187, + "grad_norm": 0.065169557929039, + "learning_rate": 1.352185865095167e-06, + "loss": 0.0005, + "num_input_tokens_seen": 61662696, + "step": 91465 + }, + { + "epoch": 2.234627317812034, + "grad_norm": 2.2550110816955566, + "learning_rate": 1.3521060499440893e-06, + "loss": 0.0008, + "num_input_tokens_seen": 61666152, + "step": 91470 + }, + { + "epoch": 2.2347494686438814, + "grad_norm": 0.026500001549720764, + "learning_rate": 1.352026232232402e-06, + "loss": 0.0003, + "num_input_tokens_seen": 61669288, + "step": 91475 + }, + { + "epoch": 2.2348716194757285, + "grad_norm": 0.2615768015384674, + "learning_rate": 1.351946411960685e-06, + "loss": 0.0586, + "num_input_tokens_seen": 61672808, + "step": 91480 + }, + { + "epoch": 2.2349937703075757, + "grad_norm": 12.543548583984375, + "learning_rate": 1.351866589129519e-06, + "loss": 0.0469, + "num_input_tokens_seen": 61675752, + "step": 91485 + }, + { + "epoch": 2.235115921139423, + "grad_norm": 0.040481340140104294, + "learning_rate": 1.3517867637394846e-06, + "loss": 0.0579, + "num_input_tokens_seen": 61679144, + "step": 91490 + }, + { + "epoch": 2.23523807197127, + "grad_norm": 0.01803850382566452, + "learning_rate": 1.3517069357911626e-06, + "loss": 0.0002, + "num_input_tokens_seen": 61683304, + "step": 91495 + }, + { + "epoch": 2.2353602228031173, + "grad_norm": 38.861106872558594, + "learning_rate": 1.351627105285133e-06, + "loss": 0.1406, + "num_input_tokens_seen": 61686248, + "step": 91500 + }, + { + "epoch": 2.2354823736349645, + "grad_norm": 0.0414762981235981, + "learning_rate": 1.3515472722219763e-06, + "loss": 0.0408, + "num_input_tokens_seen": 61689960, + "step": 91505 + }, + { + "epoch": 2.2356045244668117, + "grad_norm": 0.12040600925683975, + "learning_rate": 1.3514674366022734e-06, + "loss": 0.1092, + "num_input_tokens_seen": 61693672, + "step": 91510 + }, + { + "epoch": 2.235726675298659, + "grad_norm": 0.214358851313591, + "learning_rate": 1.3513875984266045e-06, + "loss": 0.0002, + "num_input_tokens_seen": 61697256, + "step": 91515 + }, + { + "epoch": 2.235848826130506, + "grad_norm": 0.328814834356308, + "learning_rate": 1.3513077576955506e-06, + "loss": 0.1026, + "num_input_tokens_seen": 61700456, + "step": 91520 + }, + { + "epoch": 2.2359709769623533, + "grad_norm": 0.03226625919342041, + "learning_rate": 1.3512279144096924e-06, + "loss": 0.0003, + "num_input_tokens_seen": 61703464, + "step": 91525 + }, + { + "epoch": 2.2360931277942004, + "grad_norm": 0.03546365350484848, + "learning_rate": 1.3511480685696101e-06, + "loss": 0.0644, + "num_input_tokens_seen": 61706728, + "step": 91530 + }, + { + "epoch": 2.2362152786260476, + "grad_norm": 0.2883140444755554, + "learning_rate": 1.3510682201758847e-06, + "loss": 0.0004, + "num_input_tokens_seen": 61710248, + "step": 91535 + }, + { + "epoch": 2.236337429457895, + "grad_norm": 0.028474008664488792, + "learning_rate": 1.350988369229097e-06, + "loss": 0.0503, + "num_input_tokens_seen": 61713192, + "step": 91540 + }, + { + "epoch": 2.2364595802897416, + "grad_norm": 0.12916983664035797, + "learning_rate": 1.3509085157298272e-06, + "loss": 0.0582, + "num_input_tokens_seen": 61716136, + "step": 91545 + }, + { + "epoch": 2.2365817311215888, + "grad_norm": 0.20014813542366028, + "learning_rate": 1.3508286596786565e-06, + "loss": 0.0005, + "num_input_tokens_seen": 61719400, + "step": 91550 + }, + { + "epoch": 2.236703881953436, + "grad_norm": 11.671493530273438, + "learning_rate": 1.3507488010761651e-06, + "loss": 0.1024, + "num_input_tokens_seen": 61722472, + "step": 91555 + }, + { + "epoch": 2.236826032785283, + "grad_norm": 0.2516132593154907, + "learning_rate": 1.3506689399229342e-06, + "loss": 0.0002, + "num_input_tokens_seen": 61726248, + "step": 91560 + }, + { + "epoch": 2.2369481836171303, + "grad_norm": 0.03510146215558052, + "learning_rate": 1.3505890762195446e-06, + "loss": 0.0502, + "num_input_tokens_seen": 61729768, + "step": 91565 + }, + { + "epoch": 2.2370703344489775, + "grad_norm": 0.05277731269598007, + "learning_rate": 1.3505092099665771e-06, + "loss": 0.0005, + "num_input_tokens_seen": 61733032, + "step": 91570 + }, + { + "epoch": 2.2371924852808247, + "grad_norm": 0.01292071770876646, + "learning_rate": 1.3504293411646122e-06, + "loss": 0.0007, + "num_input_tokens_seen": 61736680, + "step": 91575 + }, + { + "epoch": 2.237314636112672, + "grad_norm": 43.903099060058594, + "learning_rate": 1.3503494698142305e-06, + "loss": 0.0538, + "num_input_tokens_seen": 61740584, + "step": 91580 + }, + { + "epoch": 2.237436786944519, + "grad_norm": 0.020390598103404045, + "learning_rate": 1.3502695959160136e-06, + "loss": 0.0003, + "num_input_tokens_seen": 61744040, + "step": 91585 + }, + { + "epoch": 2.2375589377763663, + "grad_norm": 0.07818000018596649, + "learning_rate": 1.350189719470542e-06, + "loss": 0.029, + "num_input_tokens_seen": 61747624, + "step": 91590 + }, + { + "epoch": 2.2376810886082135, + "grad_norm": 0.07038668543100357, + "learning_rate": 1.3501098404783963e-06, + "loss": 0.0006, + "num_input_tokens_seen": 61751016, + "step": 91595 + }, + { + "epoch": 2.2378032394400607, + "grad_norm": 0.04207471385598183, + "learning_rate": 1.3500299589401581e-06, + "loss": 0.0001, + "num_input_tokens_seen": 61754344, + "step": 91600 + }, + { + "epoch": 2.237925390271908, + "grad_norm": 0.023803038522601128, + "learning_rate": 1.3499500748564076e-06, + "loss": 0.0421, + "num_input_tokens_seen": 61757416, + "step": 91605 + }, + { + "epoch": 2.238047541103755, + "grad_norm": 18.444482803344727, + "learning_rate": 1.349870188227726e-06, + "loss": 0.0563, + "num_input_tokens_seen": 61760744, + "step": 91610 + }, + { + "epoch": 2.2381696919356022, + "grad_norm": 0.013019073754549026, + "learning_rate": 1.3497902990546942e-06, + "loss": 0.0428, + "num_input_tokens_seen": 61764008, + "step": 91615 + }, + { + "epoch": 2.2382918427674494, + "grad_norm": 17.95228385925293, + "learning_rate": 1.3497104073378936e-06, + "loss": 0.0697, + "num_input_tokens_seen": 61767528, + "step": 91620 + }, + { + "epoch": 2.2384139935992966, + "grad_norm": 0.06719338148832321, + "learning_rate": 1.3496305130779044e-06, + "loss": 0.0008, + "num_input_tokens_seen": 61770984, + "step": 91625 + }, + { + "epoch": 2.2385361444311433, + "grad_norm": 0.1391330361366272, + "learning_rate": 1.3495506162753085e-06, + "loss": 0.0846, + "num_input_tokens_seen": 61774568, + "step": 91630 + }, + { + "epoch": 2.2386582952629905, + "grad_norm": 110.32960510253906, + "learning_rate": 1.3494707169306866e-06, + "loss": 0.0028, + "num_input_tokens_seen": 61777576, + "step": 91635 + }, + { + "epoch": 2.2387804460948377, + "grad_norm": 0.09409749507904053, + "learning_rate": 1.349390815044619e-06, + "loss": 0.0422, + "num_input_tokens_seen": 61780904, + "step": 91640 + }, + { + "epoch": 2.238902596926685, + "grad_norm": 0.8571845889091492, + "learning_rate": 1.3493109106176879e-06, + "loss": 0.1066, + "num_input_tokens_seen": 61784104, + "step": 91645 + }, + { + "epoch": 2.239024747758532, + "grad_norm": 0.03418490290641785, + "learning_rate": 1.349231003650474e-06, + "loss": 0.0829, + "num_input_tokens_seen": 61787240, + "step": 91650 + }, + { + "epoch": 2.2391468985903793, + "grad_norm": 0.5480493903160095, + "learning_rate": 1.349151094143558e-06, + "loss": 0.0011, + "num_input_tokens_seen": 61790632, + "step": 91655 + }, + { + "epoch": 2.2392690494222265, + "grad_norm": 0.00613664323464036, + "learning_rate": 1.3490711820975217e-06, + "loss": 0.0003, + "num_input_tokens_seen": 61793640, + "step": 91660 + }, + { + "epoch": 2.2393912002540737, + "grad_norm": 0.047891564667224884, + "learning_rate": 1.3489912675129455e-06, + "loss": 0.0489, + "num_input_tokens_seen": 61797160, + "step": 91665 + }, + { + "epoch": 2.239513351085921, + "grad_norm": 0.014169635251164436, + "learning_rate": 1.348911350390411e-06, + "loss": 0.0002, + "num_input_tokens_seen": 61801064, + "step": 91670 + }, + { + "epoch": 2.239635501917768, + "grad_norm": 0.06814318895339966, + "learning_rate": 1.3488314307304994e-06, + "loss": 0.0253, + "num_input_tokens_seen": 61804264, + "step": 91675 + }, + { + "epoch": 2.2397576527496152, + "grad_norm": 19.467390060424805, + "learning_rate": 1.3487515085337917e-06, + "loss": 0.0518, + "num_input_tokens_seen": 61807336, + "step": 91680 + }, + { + "epoch": 2.2398798035814624, + "grad_norm": 0.11614019423723221, + "learning_rate": 1.3486715838008693e-06, + "loss": 0.0004, + "num_input_tokens_seen": 61810920, + "step": 91685 + }, + { + "epoch": 2.2400019544133096, + "grad_norm": 0.04380062595009804, + "learning_rate": 1.3485916565323135e-06, + "loss": 0.0458, + "num_input_tokens_seen": 61814184, + "step": 91690 + }, + { + "epoch": 2.240124105245157, + "grad_norm": 0.2181423455476761, + "learning_rate": 1.3485117267287053e-06, + "loss": 0.0025, + "num_input_tokens_seen": 61817576, + "step": 91695 + }, + { + "epoch": 2.240246256077004, + "grad_norm": 0.1650727093219757, + "learning_rate": 1.348431794390626e-06, + "loss": 0.0373, + "num_input_tokens_seen": 61820840, + "step": 91700 + }, + { + "epoch": 2.240368406908851, + "grad_norm": 0.1175679862499237, + "learning_rate": 1.3483518595186572e-06, + "loss": 0.0001, + "num_input_tokens_seen": 61824424, + "step": 91705 + }, + { + "epoch": 2.2404905577406984, + "grad_norm": 0.04276014491915703, + "learning_rate": 1.3482719221133799e-06, + "loss": 0.0001, + "num_input_tokens_seen": 61827624, + "step": 91710 + }, + { + "epoch": 2.2406127085725456, + "grad_norm": 725.5232543945312, + "learning_rate": 1.3481919821753754e-06, + "loss": 0.0285, + "num_input_tokens_seen": 61830760, + "step": 91715 + }, + { + "epoch": 2.2407348594043928, + "grad_norm": 0.016906613484025, + "learning_rate": 1.348112039705225e-06, + "loss": 0.0002, + "num_input_tokens_seen": 61834088, + "step": 91720 + }, + { + "epoch": 2.2408570102362395, + "grad_norm": 0.0037014957051724195, + "learning_rate": 1.3480320947035106e-06, + "loss": 0.0002, + "num_input_tokens_seen": 61837672, + "step": 91725 + }, + { + "epoch": 2.2409791610680867, + "grad_norm": 0.13906598091125488, + "learning_rate": 1.347952147170813e-06, + "loss": 0.0367, + "num_input_tokens_seen": 61840680, + "step": 91730 + }, + { + "epoch": 2.241101311899934, + "grad_norm": 0.21275968849658966, + "learning_rate": 1.3478721971077137e-06, + "loss": 0.0004, + "num_input_tokens_seen": 61843880, + "step": 91735 + }, + { + "epoch": 2.241223462731781, + "grad_norm": 0.06388302892446518, + "learning_rate": 1.3477922445147943e-06, + "loss": 0.0173, + "num_input_tokens_seen": 61847336, + "step": 91740 + }, + { + "epoch": 2.2413456135636283, + "grad_norm": 0.003205791814252734, + "learning_rate": 1.347712289392636e-06, + "loss": 0.0384, + "num_input_tokens_seen": 61850600, + "step": 91745 + }, + { + "epoch": 2.2414677643954755, + "grad_norm": 0.15189264714717865, + "learning_rate": 1.3476323317418208e-06, + "loss": 0.0002, + "num_input_tokens_seen": 61853864, + "step": 91750 + }, + { + "epoch": 2.2415899152273226, + "grad_norm": 25.96412467956543, + "learning_rate": 1.3475523715629296e-06, + "loss": 0.1162, + "num_input_tokens_seen": 61857064, + "step": 91755 + }, + { + "epoch": 2.24171206605917, + "grad_norm": 0.05080273002386093, + "learning_rate": 1.3474724088565442e-06, + "loss": 0.0001, + "num_input_tokens_seen": 61860328, + "step": 91760 + }, + { + "epoch": 2.241834216891017, + "grad_norm": 0.10395577549934387, + "learning_rate": 1.3473924436232456e-06, + "loss": 0.0004, + "num_input_tokens_seen": 61863464, + "step": 91765 + }, + { + "epoch": 2.241956367722864, + "grad_norm": 0.040744926780462265, + "learning_rate": 1.347312475863616e-06, + "loss": 0.0389, + "num_input_tokens_seen": 61866920, + "step": 91770 + }, + { + "epoch": 2.2420785185547114, + "grad_norm": 75.73230743408203, + "learning_rate": 1.3472325055782366e-06, + "loss": 0.0905, + "num_input_tokens_seen": 61870056, + "step": 91775 + }, + { + "epoch": 2.2422006693865586, + "grad_norm": 3.3570287227630615, + "learning_rate": 1.347152532767689e-06, + "loss": 0.0055, + "num_input_tokens_seen": 61873512, + "step": 91780 + }, + { + "epoch": 2.242322820218406, + "grad_norm": 0.03612152487039566, + "learning_rate": 1.347072557432555e-06, + "loss": 0.0001, + "num_input_tokens_seen": 61877736, + "step": 91785 + }, + { + "epoch": 2.242444971050253, + "grad_norm": 34.28316116333008, + "learning_rate": 1.3469925795734155e-06, + "loss": 0.1082, + "num_input_tokens_seen": 61880808, + "step": 91790 + }, + { + "epoch": 2.2425671218821, + "grad_norm": 0.10308131575584412, + "learning_rate": 1.346912599190853e-06, + "loss": 0.0716, + "num_input_tokens_seen": 61884648, + "step": 91795 + }, + { + "epoch": 2.2426892727139474, + "grad_norm": 0.3189535439014435, + "learning_rate": 1.346832616285449e-06, + "loss": 0.0777, + "num_input_tokens_seen": 61887784, + "step": 91800 + }, + { + "epoch": 2.2428114235457945, + "grad_norm": 0.056606896221637726, + "learning_rate": 1.3467526308577846e-06, + "loss": 0.0003, + "num_input_tokens_seen": 61891240, + "step": 91805 + }, + { + "epoch": 2.2429335743776413, + "grad_norm": 0.0325242318212986, + "learning_rate": 1.3466726429084418e-06, + "loss": 0.0001, + "num_input_tokens_seen": 61894568, + "step": 91810 + }, + { + "epoch": 2.2430557252094885, + "grad_norm": 17.941125869750977, + "learning_rate": 1.3465926524380024e-06, + "loss": 0.1248, + "num_input_tokens_seen": 61897896, + "step": 91815 + }, + { + "epoch": 2.2431778760413357, + "grad_norm": 0.6458048224449158, + "learning_rate": 1.3465126594470481e-06, + "loss": 0.0005, + "num_input_tokens_seen": 61901160, + "step": 91820 + }, + { + "epoch": 2.243300026873183, + "grad_norm": 0.11034848541021347, + "learning_rate": 1.3464326639361604e-06, + "loss": 0.0002, + "num_input_tokens_seen": 61904424, + "step": 91825 + }, + { + "epoch": 2.24342217770503, + "grad_norm": 0.27809590101242065, + "learning_rate": 1.346352665905921e-06, + "loss": 0.0004, + "num_input_tokens_seen": 61907752, + "step": 91830 + }, + { + "epoch": 2.2435443285368772, + "grad_norm": 0.01850729063153267, + "learning_rate": 1.3462726653569121e-06, + "loss": 0.0002, + "num_input_tokens_seen": 61910760, + "step": 91835 + }, + { + "epoch": 2.2436664793687244, + "grad_norm": 0.6994618773460388, + "learning_rate": 1.3461926622897153e-06, + "loss": 0.0691, + "num_input_tokens_seen": 61914216, + "step": 91840 + }, + { + "epoch": 2.2437886302005716, + "grad_norm": 465.07183837890625, + "learning_rate": 1.3461126567049123e-06, + "loss": 0.1318, + "num_input_tokens_seen": 61917416, + "step": 91845 + }, + { + "epoch": 2.243910781032419, + "grad_norm": 0.026234636083245277, + "learning_rate": 1.3460326486030849e-06, + "loss": 0.1174, + "num_input_tokens_seen": 61920680, + "step": 91850 + }, + { + "epoch": 2.244032931864266, + "grad_norm": 0.005727563053369522, + "learning_rate": 1.345952637984815e-06, + "loss": 0.0346, + "num_input_tokens_seen": 61923880, + "step": 91855 + }, + { + "epoch": 2.244155082696113, + "grad_norm": 0.09239700436592102, + "learning_rate": 1.3458726248506844e-06, + "loss": 0.0004, + "num_input_tokens_seen": 61926888, + "step": 91860 + }, + { + "epoch": 2.2442772335279604, + "grad_norm": 3.4369235038757324, + "learning_rate": 1.3457926092012752e-06, + "loss": 0.0443, + "num_input_tokens_seen": 61930344, + "step": 91865 + }, + { + "epoch": 2.2443993843598076, + "grad_norm": 81.35676574707031, + "learning_rate": 1.3457125910371692e-06, + "loss": 0.0434, + "num_input_tokens_seen": 61933736, + "step": 91870 + }, + { + "epoch": 2.2445215351916548, + "grad_norm": 0.09962138533592224, + "learning_rate": 1.345632570358948e-06, + "loss": 0.0007, + "num_input_tokens_seen": 61937064, + "step": 91875 + }, + { + "epoch": 2.244643686023502, + "grad_norm": 0.06848546862602234, + "learning_rate": 1.345552547167194e-06, + "loss": 0.0007, + "num_input_tokens_seen": 61940712, + "step": 91880 + }, + { + "epoch": 2.244765836855349, + "grad_norm": 0.01280384510755539, + "learning_rate": 1.345472521462489e-06, + "loss": 0.0627, + "num_input_tokens_seen": 61944040, + "step": 91885 + }, + { + "epoch": 2.2448879876871963, + "grad_norm": 0.19952426850795746, + "learning_rate": 1.3453924932454145e-06, + "loss": 0.0617, + "num_input_tokens_seen": 61947112, + "step": 91890 + }, + { + "epoch": 2.2450101385190435, + "grad_norm": 0.06788770854473114, + "learning_rate": 1.3453124625165533e-06, + "loss": 0.0385, + "num_input_tokens_seen": 61950376, + "step": 91895 + }, + { + "epoch": 2.2451322893508907, + "grad_norm": 0.01820383220911026, + "learning_rate": 1.3452324292764866e-06, + "loss": 0.0001, + "num_input_tokens_seen": 61953832, + "step": 91900 + }, + { + "epoch": 2.2452544401827375, + "grad_norm": 0.05058905854821205, + "learning_rate": 1.345152393525797e-06, + "loss": 0.053, + "num_input_tokens_seen": 61956840, + "step": 91905 + }, + { + "epoch": 2.2453765910145846, + "grad_norm": 0.037442367523908615, + "learning_rate": 1.3450723552650667e-06, + "loss": 0.0002, + "num_input_tokens_seen": 61960488, + "step": 91910 + }, + { + "epoch": 2.245498741846432, + "grad_norm": 0.12428835779428482, + "learning_rate": 1.3449923144948772e-06, + "loss": 0.0002, + "num_input_tokens_seen": 61963688, + "step": 91915 + }, + { + "epoch": 2.245620892678279, + "grad_norm": 0.008461522869765759, + "learning_rate": 1.3449122712158106e-06, + "loss": 0.0002, + "num_input_tokens_seen": 61967336, + "step": 91920 + }, + { + "epoch": 2.245743043510126, + "grad_norm": 0.03980327025055885, + "learning_rate": 1.3448322254284495e-06, + "loss": 0.0567, + "num_input_tokens_seen": 61970792, + "step": 91925 + }, + { + "epoch": 2.2458651943419734, + "grad_norm": 70.60750579833984, + "learning_rate": 1.3447521771333754e-06, + "loss": 0.0668, + "num_input_tokens_seen": 61974056, + "step": 91930 + }, + { + "epoch": 2.2459873451738206, + "grad_norm": 0.00029991817427799106, + "learning_rate": 1.344672126331171e-06, + "loss": 0.001, + "num_input_tokens_seen": 61977256, + "step": 91935 + }, + { + "epoch": 2.246109496005668, + "grad_norm": 59.51887893676758, + "learning_rate": 1.3445920730224177e-06, + "loss": 0.0492, + "num_input_tokens_seen": 61980456, + "step": 91940 + }, + { + "epoch": 2.246231646837515, + "grad_norm": 74.84333801269531, + "learning_rate": 1.3445120172076987e-06, + "loss": 0.0787, + "num_input_tokens_seen": 61983848, + "step": 91945 + }, + { + "epoch": 2.246353797669362, + "grad_norm": 0.0036761623341590166, + "learning_rate": 1.3444319588875955e-06, + "loss": 0.0002, + "num_input_tokens_seen": 61987112, + "step": 91950 + }, + { + "epoch": 2.2464759485012094, + "grad_norm": 0.02540251985192299, + "learning_rate": 1.3443518980626904e-06, + "loss": 0.0004, + "num_input_tokens_seen": 61990568, + "step": 91955 + }, + { + "epoch": 2.2465980993330565, + "grad_norm": 0.748134195804596, + "learning_rate": 1.3442718347335658e-06, + "loss": 0.0004, + "num_input_tokens_seen": 61994152, + "step": 91960 + }, + { + "epoch": 2.2467202501649037, + "grad_norm": 0.026896782219409943, + "learning_rate": 1.3441917689008038e-06, + "loss": 0.039, + "num_input_tokens_seen": 61997736, + "step": 91965 + }, + { + "epoch": 2.246842400996751, + "grad_norm": 0.016510486602783203, + "learning_rate": 1.3441117005649867e-06, + "loss": 0.0001, + "num_input_tokens_seen": 62001640, + "step": 91970 + }, + { + "epoch": 2.246964551828598, + "grad_norm": 0.005256003234535456, + "learning_rate": 1.3440316297266967e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62004776, + "step": 91975 + }, + { + "epoch": 2.2470867026604453, + "grad_norm": 0.004536962136626244, + "learning_rate": 1.343951556386516e-06, + "loss": 0.0322, + "num_input_tokens_seen": 62008232, + "step": 91980 + }, + { + "epoch": 2.2472088534922925, + "grad_norm": 0.2646792531013489, + "learning_rate": 1.343871480545027e-06, + "loss": 0.0467, + "num_input_tokens_seen": 62011496, + "step": 91985 + }, + { + "epoch": 2.2473310043241392, + "grad_norm": 0.029792839661240578, + "learning_rate": 1.3437914022028122e-06, + "loss": 0.0005, + "num_input_tokens_seen": 62014888, + "step": 91990 + }, + { + "epoch": 2.2474531551559864, + "grad_norm": 0.21611711382865906, + "learning_rate": 1.343711321360454e-06, + "loss": 0.0544, + "num_input_tokens_seen": 62018280, + "step": 91995 + }, + { + "epoch": 2.2475753059878336, + "grad_norm": 0.004224136937409639, + "learning_rate": 1.3436312380185345e-06, + "loss": 0.0003, + "num_input_tokens_seen": 62021736, + "step": 92000 + }, + { + "epoch": 2.247697456819681, + "grad_norm": 1.352657437324524, + "learning_rate": 1.3435511521776363e-06, + "loss": 0.0888, + "num_input_tokens_seen": 62025192, + "step": 92005 + }, + { + "epoch": 2.247819607651528, + "grad_norm": 0.04550763592123985, + "learning_rate": 1.343471063838342e-06, + "loss": 0.0008, + "num_input_tokens_seen": 62028904, + "step": 92010 + }, + { + "epoch": 2.247941758483375, + "grad_norm": 28.128278732299805, + "learning_rate": 1.3433909730012334e-06, + "loss": 0.0931, + "num_input_tokens_seen": 62032552, + "step": 92015 + }, + { + "epoch": 2.2480639093152224, + "grad_norm": 49.448265075683594, + "learning_rate": 1.3433108796668933e-06, + "loss": 0.0638, + "num_input_tokens_seen": 62035944, + "step": 92020 + }, + { + "epoch": 2.2481860601470696, + "grad_norm": 0.18086501955986023, + "learning_rate": 1.3432307838359043e-06, + "loss": 0.0744, + "num_input_tokens_seen": 62039336, + "step": 92025 + }, + { + "epoch": 2.2483082109789168, + "grad_norm": 295.6267395019531, + "learning_rate": 1.3431506855088483e-06, + "loss": 0.1497, + "num_input_tokens_seen": 62042408, + "step": 92030 + }, + { + "epoch": 2.248430361810764, + "grad_norm": 28.082304000854492, + "learning_rate": 1.3430705846863086e-06, + "loss": 0.0363, + "num_input_tokens_seen": 62045992, + "step": 92035 + }, + { + "epoch": 2.248552512642611, + "grad_norm": 0.6816192865371704, + "learning_rate": 1.3429904813688674e-06, + "loss": 0.0425, + "num_input_tokens_seen": 62049576, + "step": 92040 + }, + { + "epoch": 2.2486746634744583, + "grad_norm": 83.5420913696289, + "learning_rate": 1.3429103755571066e-06, + "loss": 0.08, + "num_input_tokens_seen": 62052840, + "step": 92045 + }, + { + "epoch": 2.2487968143063055, + "grad_norm": 0.04537109658122063, + "learning_rate": 1.34283026725161e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62055912, + "step": 92050 + }, + { + "epoch": 2.2489189651381527, + "grad_norm": 0.021133245900273323, + "learning_rate": 1.342750156452959e-06, + "loss": 0.0343, + "num_input_tokens_seen": 62059304, + "step": 92055 + }, + { + "epoch": 2.24904111597, + "grad_norm": 0.06331618875265121, + "learning_rate": 1.342670043161737e-06, + "loss": 0.0355, + "num_input_tokens_seen": 62064808, + "step": 92060 + }, + { + "epoch": 2.249163266801847, + "grad_norm": 0.020840996876358986, + "learning_rate": 1.3425899273785262e-06, + "loss": 0.0001, + "num_input_tokens_seen": 62068136, + "step": 92065 + }, + { + "epoch": 2.2492854176336943, + "grad_norm": 0.39848390221595764, + "learning_rate": 1.3425098091039095e-06, + "loss": 0.0363, + "num_input_tokens_seen": 62070888, + "step": 92070 + }, + { + "epoch": 2.249407568465541, + "grad_norm": 0.003665744327008724, + "learning_rate": 1.3424296883384688e-06, + "loss": 0.0171, + "num_input_tokens_seen": 62073832, + "step": 92075 + }, + { + "epoch": 2.249529719297388, + "grad_norm": 0.005300711374729872, + "learning_rate": 1.3423495650827877e-06, + "loss": 0.0, + "num_input_tokens_seen": 62077288, + "step": 92080 + }, + { + "epoch": 2.2496518701292354, + "grad_norm": 0.0019977991469204426, + "learning_rate": 1.3422694393374484e-06, + "loss": 0.0009, + "num_input_tokens_seen": 62080360, + "step": 92085 + }, + { + "epoch": 2.2497740209610826, + "grad_norm": 0.1273031085729599, + "learning_rate": 1.3421893111030338e-06, + "loss": 0.0001, + "num_input_tokens_seen": 62083496, + "step": 92090 + }, + { + "epoch": 2.2498961717929298, + "grad_norm": 0.059631090611219406, + "learning_rate": 1.3421091803801262e-06, + "loss": 0.1333, + "num_input_tokens_seen": 62086760, + "step": 92095 + }, + { + "epoch": 2.250018322624777, + "grad_norm": 0.0072194174863398075, + "learning_rate": 1.342029047169309e-06, + "loss": 0.0001, + "num_input_tokens_seen": 62090024, + "step": 92100 + }, + { + "epoch": 2.250140473456624, + "grad_norm": 0.014334439300000668, + "learning_rate": 1.341948911471164e-06, + "loss": 0.0001, + "num_input_tokens_seen": 62093160, + "step": 92105 + }, + { + "epoch": 2.2501649036229936, + "eval_loss": 0.2040521502494812, + "eval_runtime": 47.5159, + "eval_samples_per_second": 765.743, + "eval_steps_per_second": 95.736, + "num_input_tokens_seen": 62093992, + "step": 92106 + }, + { + "epoch": 2.2502626242884713, + "grad_norm": 0.042339157313108444, + "learning_rate": 1.341868773286275e-06, + "loss": 0.0001, + "num_input_tokens_seen": 62097000, + "step": 92110 + }, + { + "epoch": 2.2503847751203185, + "grad_norm": 0.02461964450776577, + "learning_rate": 1.3417886326152247e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62100072, + "step": 92115 + }, + { + "epoch": 2.2505069259521657, + "grad_norm": 0.015833193436264992, + "learning_rate": 1.3417084894585948e-06, + "loss": 0.043, + "num_input_tokens_seen": 62103272, + "step": 92120 + }, + { + "epoch": 2.250629076784013, + "grad_norm": 0.13749870657920837, + "learning_rate": 1.341628343816969e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62106344, + "step": 92125 + }, + { + "epoch": 2.25075122761586, + "grad_norm": 0.00958644412457943, + "learning_rate": 1.3415481956909305e-06, + "loss": 0.0598, + "num_input_tokens_seen": 62109480, + "step": 92130 + }, + { + "epoch": 2.2508733784477073, + "grad_norm": 0.1764431893825531, + "learning_rate": 1.341468045081061e-06, + "loss": 0.1393, + "num_input_tokens_seen": 62113064, + "step": 92135 + }, + { + "epoch": 2.2509955292795545, + "grad_norm": 40.462764739990234, + "learning_rate": 1.3413878919879443e-06, + "loss": 0.1327, + "num_input_tokens_seen": 62116776, + "step": 92140 + }, + { + "epoch": 2.2511176801114017, + "grad_norm": 0.11698302626609802, + "learning_rate": 1.341307736412163e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62120168, + "step": 92145 + }, + { + "epoch": 2.251239830943249, + "grad_norm": 0.05886290222406387, + "learning_rate": 1.3412275783543002e-06, + "loss": 0.0004, + "num_input_tokens_seen": 62123304, + "step": 92150 + }, + { + "epoch": 2.251361981775096, + "grad_norm": 0.08000266551971436, + "learning_rate": 1.3411474178149384e-06, + "loss": 0.0531, + "num_input_tokens_seen": 62126824, + "step": 92155 + }, + { + "epoch": 2.2514841326069432, + "grad_norm": 0.04475337266921997, + "learning_rate": 1.341067254794661e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62130664, + "step": 92160 + }, + { + "epoch": 2.2516062834387904, + "grad_norm": 0.02364620752632618, + "learning_rate": 1.340987089294051e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62134184, + "step": 92165 + }, + { + "epoch": 2.251728434270637, + "grad_norm": 0.017477652058005333, + "learning_rate": 1.3409069213136908e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62137640, + "step": 92170 + }, + { + "epoch": 2.2518505851024844, + "grad_norm": 0.1361808478832245, + "learning_rate": 1.3408267508541639e-06, + "loss": 0.0003, + "num_input_tokens_seen": 62140904, + "step": 92175 + }, + { + "epoch": 2.2519727359343316, + "grad_norm": 892.4404296875, + "learning_rate": 1.3407465779160532e-06, + "loss": 0.0098, + "num_input_tokens_seen": 62144424, + "step": 92180 + }, + { + "epoch": 2.2520948867661787, + "grad_norm": 11.566713333129883, + "learning_rate": 1.3406664024999417e-06, + "loss": 0.0006, + "num_input_tokens_seen": 62147432, + "step": 92185 + }, + { + "epoch": 2.252217037598026, + "grad_norm": 0.12640665471553802, + "learning_rate": 1.3405862246064126e-06, + "loss": 0.038, + "num_input_tokens_seen": 62150312, + "step": 92190 + }, + { + "epoch": 2.252339188429873, + "grad_norm": 350.6678771972656, + "learning_rate": 1.3405060442360488e-06, + "loss": 0.1214, + "num_input_tokens_seen": 62153768, + "step": 92195 + }, + { + "epoch": 2.2524613392617203, + "grad_norm": 0.06017898768186569, + "learning_rate": 1.3404258613894332e-06, + "loss": 0.0414, + "num_input_tokens_seen": 62157160, + "step": 92200 + }, + { + "epoch": 2.2525834900935675, + "grad_norm": 0.04330041632056236, + "learning_rate": 1.3403456760671494e-06, + "loss": 0.0925, + "num_input_tokens_seen": 62160744, + "step": 92205 + }, + { + "epoch": 2.2527056409254147, + "grad_norm": 0.09254544973373413, + "learning_rate": 1.3402654882697803e-06, + "loss": 0.0628, + "num_input_tokens_seen": 62163816, + "step": 92210 + }, + { + "epoch": 2.252827791757262, + "grad_norm": 0.040174610912799835, + "learning_rate": 1.3401852979979094e-06, + "loss": 0.0385, + "num_input_tokens_seen": 62167144, + "step": 92215 + }, + { + "epoch": 2.252949942589109, + "grad_norm": 0.08553251624107361, + "learning_rate": 1.340105105252119e-06, + "loss": 0.0001, + "num_input_tokens_seen": 62170792, + "step": 92220 + }, + { + "epoch": 2.2530720934209563, + "grad_norm": 0.037372469902038574, + "learning_rate": 1.3400249100329932e-06, + "loss": 0.0983, + "num_input_tokens_seen": 62174248, + "step": 92225 + }, + { + "epoch": 2.2531942442528035, + "grad_norm": 0.02428109012544155, + "learning_rate": 1.3399447123411146e-06, + "loss": 0.0312, + "num_input_tokens_seen": 62177384, + "step": 92230 + }, + { + "epoch": 2.2533163950846506, + "grad_norm": 0.20832835137844086, + "learning_rate": 1.3398645121770664e-06, + "loss": 0.0005, + "num_input_tokens_seen": 62180712, + "step": 92235 + }, + { + "epoch": 2.253438545916498, + "grad_norm": 0.023863671347498894, + "learning_rate": 1.3397843095414324e-06, + "loss": 0.0003, + "num_input_tokens_seen": 62184104, + "step": 92240 + }, + { + "epoch": 2.253560696748345, + "grad_norm": 731.2637939453125, + "learning_rate": 1.3397041044347953e-06, + "loss": 0.0966, + "num_input_tokens_seen": 62187752, + "step": 92245 + }, + { + "epoch": 2.253682847580192, + "grad_norm": 188.05191040039062, + "learning_rate": 1.3396238968577387e-06, + "loss": 0.0053, + "num_input_tokens_seen": 62191272, + "step": 92250 + }, + { + "epoch": 2.253804998412039, + "grad_norm": 0.0365290492773056, + "learning_rate": 1.339543686810846e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62194536, + "step": 92255 + }, + { + "epoch": 2.2539271492438866, + "grad_norm": 0.060201194137334824, + "learning_rate": 1.3394634742946998e-06, + "loss": 0.0527, + "num_input_tokens_seen": 62197608, + "step": 92260 + }, + { + "epoch": 2.2540493000757333, + "grad_norm": 0.01749134622514248, + "learning_rate": 1.339383259309884e-06, + "loss": 0.0004, + "num_input_tokens_seen": 62200744, + "step": 92265 + }, + { + "epoch": 2.2541714509075805, + "grad_norm": 46.34138107299805, + "learning_rate": 1.3393030418569821e-06, + "loss": 0.0025, + "num_input_tokens_seen": 62204200, + "step": 92270 + }, + { + "epoch": 2.2542936017394277, + "grad_norm": 0.11681878566741943, + "learning_rate": 1.3392228219365772e-06, + "loss": 0.1194, + "num_input_tokens_seen": 62207656, + "step": 92275 + }, + { + "epoch": 2.254415752571275, + "grad_norm": 0.004872969351708889, + "learning_rate": 1.3391425995492524e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62211432, + "step": 92280 + }, + { + "epoch": 2.254537903403122, + "grad_norm": 0.02966301515698433, + "learning_rate": 1.3390623746955918e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62214952, + "step": 92285 + }, + { + "epoch": 2.2546600542349693, + "grad_norm": 63.925384521484375, + "learning_rate": 1.3389821473761783e-06, + "loss": 0.0381, + "num_input_tokens_seen": 62218344, + "step": 92290 + }, + { + "epoch": 2.2547822050668165, + "grad_norm": 0.01595384255051613, + "learning_rate": 1.338901917591595e-06, + "loss": 0.0961, + "num_input_tokens_seen": 62221992, + "step": 92295 + }, + { + "epoch": 2.2549043558986637, + "grad_norm": 0.4929356276988983, + "learning_rate": 1.3388216853424262e-06, + "loss": 0.0986, + "num_input_tokens_seen": 62225320, + "step": 92300 + }, + { + "epoch": 2.255026506730511, + "grad_norm": 0.05259265378117561, + "learning_rate": 1.3387414506292548e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62228264, + "step": 92305 + }, + { + "epoch": 2.255148657562358, + "grad_norm": 42.33659362792969, + "learning_rate": 1.3386612134526648e-06, + "loss": 0.0362, + "num_input_tokens_seen": 62231528, + "step": 92310 + }, + { + "epoch": 2.2552708083942052, + "grad_norm": 0.002288345480337739, + "learning_rate": 1.3385809738132392e-06, + "loss": 0.0891, + "num_input_tokens_seen": 62235048, + "step": 92315 + }, + { + "epoch": 2.2553929592260524, + "grad_norm": 0.0041145095601677895, + "learning_rate": 1.3385007317115614e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62238120, + "step": 92320 + }, + { + "epoch": 2.2555151100578996, + "grad_norm": 0.019636070355772972, + "learning_rate": 1.3384204871482156e-06, + "loss": 0.0315, + "num_input_tokens_seen": 62241576, + "step": 92325 + }, + { + "epoch": 2.255637260889747, + "grad_norm": 0.023572169244289398, + "learning_rate": 1.3383402401237848e-06, + "loss": 0.0003, + "num_input_tokens_seen": 62244840, + "step": 92330 + }, + { + "epoch": 2.255759411721594, + "grad_norm": 0.05980467051267624, + "learning_rate": 1.3382599906388529e-06, + "loss": 0.0001, + "num_input_tokens_seen": 62247592, + "step": 92335 + }, + { + "epoch": 2.2558815625534407, + "grad_norm": 0.0034921050537377596, + "learning_rate": 1.3381797386940035e-06, + "loss": 0.0001, + "num_input_tokens_seen": 62251112, + "step": 92340 + }, + { + "epoch": 2.2560037133852884, + "grad_norm": 0.009633969515562057, + "learning_rate": 1.33809948428982e-06, + "loss": 0.0969, + "num_input_tokens_seen": 62254184, + "step": 92345 + }, + { + "epoch": 2.256125864217135, + "grad_norm": 0.24233314394950867, + "learning_rate": 1.3380192274268859e-06, + "loss": 0.0001, + "num_input_tokens_seen": 62258216, + "step": 92350 + }, + { + "epoch": 2.2562480150489823, + "grad_norm": 0.006902449764311314, + "learning_rate": 1.337938968105785e-06, + "loss": 0.0489, + "num_input_tokens_seen": 62261736, + "step": 92355 + }, + { + "epoch": 2.2563701658808295, + "grad_norm": 21.458951950073242, + "learning_rate": 1.337858706327101e-06, + "loss": 0.0563, + "num_input_tokens_seen": 62265064, + "step": 92360 + }, + { + "epoch": 2.2564923167126767, + "grad_norm": 0.04538079351186752, + "learning_rate": 1.337778442091418e-06, + "loss": 0.0648, + "num_input_tokens_seen": 62268072, + "step": 92365 + }, + { + "epoch": 2.256614467544524, + "grad_norm": 0.004993412643671036, + "learning_rate": 1.337698175399319e-06, + "loss": 0.0, + "num_input_tokens_seen": 62271592, + "step": 92370 + }, + { + "epoch": 2.256736618376371, + "grad_norm": 1.0822269916534424, + "learning_rate": 1.3376179062513884e-06, + "loss": 0.0006, + "num_input_tokens_seen": 62274600, + "step": 92375 + }, + { + "epoch": 2.2568587692082183, + "grad_norm": 0.021430518478155136, + "learning_rate": 1.3375376346482094e-06, + "loss": 0.0789, + "num_input_tokens_seen": 62277992, + "step": 92380 + }, + { + "epoch": 2.2569809200400655, + "grad_norm": 0.038618966937065125, + "learning_rate": 1.337457360590366e-06, + "loss": 0.0431, + "num_input_tokens_seen": 62281896, + "step": 92385 + }, + { + "epoch": 2.2571030708719126, + "grad_norm": 0.08098925650119781, + "learning_rate": 1.3373770840784416e-06, + "loss": 0.0203, + "num_input_tokens_seen": 62285544, + "step": 92390 + }, + { + "epoch": 2.25722522170376, + "grad_norm": 0.04787323251366615, + "learning_rate": 1.3372968051130205e-06, + "loss": 0.1419, + "num_input_tokens_seen": 62289192, + "step": 92395 + }, + { + "epoch": 2.257347372535607, + "grad_norm": 36.374874114990234, + "learning_rate": 1.3372165236946864e-06, + "loss": 0.0342, + "num_input_tokens_seen": 62292392, + "step": 92400 + }, + { + "epoch": 2.257469523367454, + "grad_norm": 0.0471741184592247, + "learning_rate": 1.3371362398240228e-06, + "loss": 0.0007, + "num_input_tokens_seen": 62295272, + "step": 92405 + }, + { + "epoch": 2.2575916741993014, + "grad_norm": 0.07099335640668869, + "learning_rate": 1.3370559535016138e-06, + "loss": 0.0346, + "num_input_tokens_seen": 62298920, + "step": 92410 + }, + { + "epoch": 2.2577138250311486, + "grad_norm": 0.0030002526473253965, + "learning_rate": 1.3369756647280436e-06, + "loss": 0.1336, + "num_input_tokens_seen": 62302120, + "step": 92415 + }, + { + "epoch": 2.257835975862996, + "grad_norm": 0.037754494696855545, + "learning_rate": 1.3368953735038951e-06, + "loss": 0.0003, + "num_input_tokens_seen": 62305192, + "step": 92420 + }, + { + "epoch": 2.257958126694843, + "grad_norm": 0.003917124588042498, + "learning_rate": 1.3368150798297535e-06, + "loss": 0.0837, + "num_input_tokens_seen": 62308776, + "step": 92425 + }, + { + "epoch": 2.25808027752669, + "grad_norm": 0.027190549299120903, + "learning_rate": 1.336734783706202e-06, + "loss": 0.0004, + "num_input_tokens_seen": 62311848, + "step": 92430 + }, + { + "epoch": 2.258202428358537, + "grad_norm": 0.0012887478806078434, + "learning_rate": 1.3366544851338244e-06, + "loss": 0.0527, + "num_input_tokens_seen": 62315944, + "step": 92435 + }, + { + "epoch": 2.258324579190384, + "grad_norm": 0.010424080304801464, + "learning_rate": 1.3365741841132047e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62319656, + "step": 92440 + }, + { + "epoch": 2.2584467300222313, + "grad_norm": 0.14947333931922913, + "learning_rate": 1.3364938806449271e-06, + "loss": 0.1209, + "num_input_tokens_seen": 62323368, + "step": 92445 + }, + { + "epoch": 2.2585688808540785, + "grad_norm": 0.005441546440124512, + "learning_rate": 1.3364135747295752e-06, + "loss": 0.0005, + "num_input_tokens_seen": 62326632, + "step": 92450 + }, + { + "epoch": 2.2586910316859257, + "grad_norm": 0.0159373190253973, + "learning_rate": 1.3363332663677338e-06, + "loss": 0.0502, + "num_input_tokens_seen": 62330664, + "step": 92455 + }, + { + "epoch": 2.258813182517773, + "grad_norm": 0.2237975001335144, + "learning_rate": 1.3362529555599861e-06, + "loss": 0.0453, + "num_input_tokens_seen": 62334248, + "step": 92460 + }, + { + "epoch": 2.25893533334962, + "grad_norm": 0.01939545013010502, + "learning_rate": 1.3361726423069165e-06, + "loss": 0.0003, + "num_input_tokens_seen": 62337768, + "step": 92465 + }, + { + "epoch": 2.2590574841814672, + "grad_norm": 158.32150268554688, + "learning_rate": 1.3360923266091093e-06, + "loss": 0.0844, + "num_input_tokens_seen": 62341096, + "step": 92470 + }, + { + "epoch": 2.2591796350133144, + "grad_norm": 0.02396237477660179, + "learning_rate": 1.3360120084671481e-06, + "loss": 0.0006, + "num_input_tokens_seen": 62344616, + "step": 92475 + }, + { + "epoch": 2.2593017858451616, + "grad_norm": 0.05617120489478111, + "learning_rate": 1.3359316878816174e-06, + "loss": 0.001, + "num_input_tokens_seen": 62347944, + "step": 92480 + }, + { + "epoch": 2.259423936677009, + "grad_norm": 0.031269315630197525, + "learning_rate": 1.3358513648531008e-06, + "loss": 0.0912, + "num_input_tokens_seen": 62351144, + "step": 92485 + }, + { + "epoch": 2.259546087508856, + "grad_norm": 0.038197267800569534, + "learning_rate": 1.3357710393821828e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62354216, + "step": 92490 + }, + { + "epoch": 2.259668238340703, + "grad_norm": 0.006710516754537821, + "learning_rate": 1.3356907114694477e-06, + "loss": 0.1095, + "num_input_tokens_seen": 62357416, + "step": 92495 + }, + { + "epoch": 2.2597903891725504, + "grad_norm": 13.797673225402832, + "learning_rate": 1.3356103811154792e-06, + "loss": 0.1062, + "num_input_tokens_seen": 62361448, + "step": 92500 + }, + { + "epoch": 2.2599125400043976, + "grad_norm": 0.003856425639241934, + "learning_rate": 1.3355300483208621e-06, + "loss": 0.0005, + "num_input_tokens_seen": 62365032, + "step": 92505 + }, + { + "epoch": 2.2600346908362448, + "grad_norm": 0.1337459236383438, + "learning_rate": 1.3354497130861796e-06, + "loss": 0.0007, + "num_input_tokens_seen": 62368168, + "step": 92510 + }, + { + "epoch": 2.260156841668092, + "grad_norm": 0.06924041360616684, + "learning_rate": 1.335369375412017e-06, + "loss": 0.0373, + "num_input_tokens_seen": 62371368, + "step": 92515 + }, + { + "epoch": 2.2602789924999387, + "grad_norm": 0.06534288823604584, + "learning_rate": 1.335289035298958e-06, + "loss": 0.0004, + "num_input_tokens_seen": 62374760, + "step": 92520 + }, + { + "epoch": 2.2604011433317863, + "grad_norm": 8.957152366638184, + "learning_rate": 1.3352086927475872e-06, + "loss": 0.1364, + "num_input_tokens_seen": 62377704, + "step": 92525 + }, + { + "epoch": 2.260523294163633, + "grad_norm": 0.004728742875158787, + "learning_rate": 1.3351283477584883e-06, + "loss": 0.0001, + "num_input_tokens_seen": 62380904, + "step": 92530 + }, + { + "epoch": 2.2606454449954803, + "grad_norm": 0.2021309733390808, + "learning_rate": 1.3350480003322463e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62384424, + "step": 92535 + }, + { + "epoch": 2.2607675958273274, + "grad_norm": 0.004681295249611139, + "learning_rate": 1.334967650469445e-06, + "loss": 0.0008, + "num_input_tokens_seen": 62387688, + "step": 92540 + }, + { + "epoch": 2.2608897466591746, + "grad_norm": 0.2683848440647125, + "learning_rate": 1.3348872981706685e-06, + "loss": 0.0004, + "num_input_tokens_seen": 62391272, + "step": 92545 + }, + { + "epoch": 2.261011897491022, + "grad_norm": 0.4058259427547455, + "learning_rate": 1.3348069434365017e-06, + "loss": 0.0563, + "num_input_tokens_seen": 62394536, + "step": 92550 + }, + { + "epoch": 2.261134048322869, + "grad_norm": 0.12689943611621857, + "learning_rate": 1.3347265862675288e-06, + "loss": 0.0943, + "num_input_tokens_seen": 62397864, + "step": 92555 + }, + { + "epoch": 2.261256199154716, + "grad_norm": 0.039587125182151794, + "learning_rate": 1.3346462266643342e-06, + "loss": 0.0003, + "num_input_tokens_seen": 62401256, + "step": 92560 + }, + { + "epoch": 2.2613783499865634, + "grad_norm": 0.006709498818963766, + "learning_rate": 1.334565864627502e-06, + "loss": 0.0395, + "num_input_tokens_seen": 62404648, + "step": 92565 + }, + { + "epoch": 2.2615005008184106, + "grad_norm": 0.02049305848777294, + "learning_rate": 1.334485500157617e-06, + "loss": 0.0004, + "num_input_tokens_seen": 62407976, + "step": 92570 + }, + { + "epoch": 2.2616226516502578, + "grad_norm": 0.08842063695192337, + "learning_rate": 1.3344051332552635e-06, + "loss": 0.0003, + "num_input_tokens_seen": 62411240, + "step": 92575 + }, + { + "epoch": 2.261744802482105, + "grad_norm": 0.0080694779753685, + "learning_rate": 1.334324763921026e-06, + "loss": 0.0004, + "num_input_tokens_seen": 62414952, + "step": 92580 + }, + { + "epoch": 2.261866953313952, + "grad_norm": 23.28043556213379, + "learning_rate": 1.334244392155489e-06, + "loss": 0.0822, + "num_input_tokens_seen": 62417960, + "step": 92585 + }, + { + "epoch": 2.2619891041457993, + "grad_norm": 0.1557777374982834, + "learning_rate": 1.3341640179592363e-06, + "loss": 0.004, + "num_input_tokens_seen": 62422120, + "step": 92590 + }, + { + "epoch": 2.2621112549776465, + "grad_norm": 11.396211624145508, + "learning_rate": 1.3340836413328536e-06, + "loss": 0.1738, + "num_input_tokens_seen": 62425384, + "step": 92595 + }, + { + "epoch": 2.2622334058094937, + "grad_norm": 0.22009818255901337, + "learning_rate": 1.3340032622769245e-06, + "loss": 0.0003, + "num_input_tokens_seen": 62428584, + "step": 92600 + }, + { + "epoch": 2.262355556641341, + "grad_norm": 0.5843344330787659, + "learning_rate": 1.3339228807920337e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62432296, + "step": 92605 + }, + { + "epoch": 2.262477707473188, + "grad_norm": 0.17534910142421722, + "learning_rate": 1.333842496878766e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62435880, + "step": 92610 + }, + { + "epoch": 2.262599858305035, + "grad_norm": 10.780598640441895, + "learning_rate": 1.333762110537706e-06, + "loss": 0.0238, + "num_input_tokens_seen": 62439272, + "step": 92615 + }, + { + "epoch": 2.262722009136882, + "grad_norm": 0.1346311718225479, + "learning_rate": 1.3336817217694383e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62442664, + "step": 92620 + }, + { + "epoch": 2.2628441599687292, + "grad_norm": 571.826416015625, + "learning_rate": 1.333601330574547e-06, + "loss": 0.0877, + "num_input_tokens_seen": 62446120, + "step": 92625 + }, + { + "epoch": 2.2629663108005764, + "grad_norm": 0.005242965184152126, + "learning_rate": 1.3335209369536174e-06, + "loss": 0.0961, + "num_input_tokens_seen": 62449640, + "step": 92630 + }, + { + "epoch": 2.2630884616324236, + "grad_norm": 0.14449675381183624, + "learning_rate": 1.3334405409072336e-06, + "loss": 0.0885, + "num_input_tokens_seen": 62453160, + "step": 92635 + }, + { + "epoch": 2.263210612464271, + "grad_norm": 32.600486755371094, + "learning_rate": 1.3333601424359806e-06, + "loss": 0.046, + "num_input_tokens_seen": 62456552, + "step": 92640 + }, + { + "epoch": 2.263332763296118, + "grad_norm": 0.040783826261758804, + "learning_rate": 1.3332797415404431e-06, + "loss": 0.0005, + "num_input_tokens_seen": 62460008, + "step": 92645 + }, + { + "epoch": 2.263454914127965, + "grad_norm": 0.06487621366977692, + "learning_rate": 1.3331993382212058e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62463400, + "step": 92650 + }, + { + "epoch": 2.2635770649598124, + "grad_norm": 0.0489821583032608, + "learning_rate": 1.333118932478853e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62466856, + "step": 92655 + }, + { + "epoch": 2.2636992157916596, + "grad_norm": 0.2700420320034027, + "learning_rate": 1.3330385243139697e-06, + "loss": 0.0005, + "num_input_tokens_seen": 62469928, + "step": 92660 + }, + { + "epoch": 2.2638213666235067, + "grad_norm": 8.69739818572998, + "learning_rate": 1.332958113727141e-06, + "loss": 0.0006, + "num_input_tokens_seen": 62472936, + "step": 92665 + }, + { + "epoch": 2.263943517455354, + "grad_norm": 0.021106649190187454, + "learning_rate": 1.3328777007189507e-06, + "loss": 0.009, + "num_input_tokens_seen": 62476200, + "step": 92670 + }, + { + "epoch": 2.264065668287201, + "grad_norm": 0.10658904165029526, + "learning_rate": 1.3327972852899847e-06, + "loss": 0.0429, + "num_input_tokens_seen": 62479400, + "step": 92675 + }, + { + "epoch": 2.2641878191190483, + "grad_norm": 0.09654999524354935, + "learning_rate": 1.3327168674408273e-06, + "loss": 0.0715, + "num_input_tokens_seen": 62483432, + "step": 92680 + }, + { + "epoch": 2.2643099699508955, + "grad_norm": 0.13167276978492737, + "learning_rate": 1.3326364471720632e-06, + "loss": 0.0844, + "num_input_tokens_seen": 62487080, + "step": 92685 + }, + { + "epoch": 2.2644321207827427, + "grad_norm": 0.17308863997459412, + "learning_rate": 1.332556024484278e-06, + "loss": 0.0573, + "num_input_tokens_seen": 62489896, + "step": 92690 + }, + { + "epoch": 2.26455427161459, + "grad_norm": 0.03751340135931969, + "learning_rate": 1.3324755993780554e-06, + "loss": 0.039, + "num_input_tokens_seen": 62493736, + "step": 92695 + }, + { + "epoch": 2.2646764224464366, + "grad_norm": 0.036548275500535965, + "learning_rate": 1.3323951718539808e-06, + "loss": 0.0007, + "num_input_tokens_seen": 62497128, + "step": 92700 + }, + { + "epoch": 2.2647985732782843, + "grad_norm": 0.049087874591350555, + "learning_rate": 1.332314741912639e-06, + "loss": 0.0034, + "num_input_tokens_seen": 62500520, + "step": 92705 + }, + { + "epoch": 2.264920724110131, + "grad_norm": 0.05646089091897011, + "learning_rate": 1.3322343095546153e-06, + "loss": 0.0006, + "num_input_tokens_seen": 62503528, + "step": 92710 + }, + { + "epoch": 2.265042874941978, + "grad_norm": 15.631532669067383, + "learning_rate": 1.3321538747804942e-06, + "loss": 0.083, + "num_input_tokens_seen": 62506408, + "step": 92715 + }, + { + "epoch": 2.2651650257738254, + "grad_norm": 50.563568115234375, + "learning_rate": 1.3320734375908608e-06, + "loss": 0.0493, + "num_input_tokens_seen": 62509608, + "step": 92720 + }, + { + "epoch": 2.2652871766056726, + "grad_norm": 0.07339024543762207, + "learning_rate": 1.3319929979863e-06, + "loss": 0.0335, + "num_input_tokens_seen": 62512744, + "step": 92725 + }, + { + "epoch": 2.2654093274375198, + "grad_norm": 0.013352487236261368, + "learning_rate": 1.3319125559673968e-06, + "loss": 0.0001, + "num_input_tokens_seen": 62516136, + "step": 92730 + }, + { + "epoch": 2.265531478269367, + "grad_norm": 80.81047058105469, + "learning_rate": 1.3318321115347364e-06, + "loss": 0.0446, + "num_input_tokens_seen": 62519592, + "step": 92735 + }, + { + "epoch": 2.265653629101214, + "grad_norm": 0.11078570783138275, + "learning_rate": 1.3317516646889036e-06, + "loss": 0.0003, + "num_input_tokens_seen": 62522728, + "step": 92740 + }, + { + "epoch": 2.2657757799330613, + "grad_norm": 0.06942203640937805, + "learning_rate": 1.3316712154304835e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62526312, + "step": 92745 + }, + { + "epoch": 2.2658979307649085, + "grad_norm": 0.015332935377955437, + "learning_rate": 1.331590763760061e-06, + "loss": 0.0003, + "num_input_tokens_seen": 62529640, + "step": 92750 + }, + { + "epoch": 2.2660200815967557, + "grad_norm": 0.1101103127002716, + "learning_rate": 1.3315103096782215e-06, + "loss": 0.0001, + "num_input_tokens_seen": 62532904, + "step": 92755 + }, + { + "epoch": 2.266142232428603, + "grad_norm": 0.006774018984287977, + "learning_rate": 1.3314298531855492e-06, + "loss": 0.0367, + "num_input_tokens_seen": 62536040, + "step": 92760 + }, + { + "epoch": 2.26626438326045, + "grad_norm": 44.68410873413086, + "learning_rate": 1.3313493942826304e-06, + "loss": 0.0282, + "num_input_tokens_seen": 62539304, + "step": 92765 + }, + { + "epoch": 2.2663865340922973, + "grad_norm": 0.03351452574133873, + "learning_rate": 1.3312689329700497e-06, + "loss": 0.0003, + "num_input_tokens_seen": 62542696, + "step": 92770 + }, + { + "epoch": 2.2665086849241445, + "grad_norm": 0.010534519329667091, + "learning_rate": 1.331188469248392e-06, + "loss": 0.0318, + "num_input_tokens_seen": 62546024, + "step": 92775 + }, + { + "epoch": 2.2666308357559917, + "grad_norm": 0.11503587663173676, + "learning_rate": 1.3311080031182428e-06, + "loss": 0.0005, + "num_input_tokens_seen": 62549032, + "step": 92780 + }, + { + "epoch": 2.2667529865878384, + "grad_norm": 0.00525831151753664, + "learning_rate": 1.331027534580187e-06, + "loss": 0.0931, + "num_input_tokens_seen": 62552360, + "step": 92785 + }, + { + "epoch": 2.266875137419686, + "grad_norm": 0.010182957164943218, + "learning_rate": 1.3309470636348103e-06, + "loss": 0.0513, + "num_input_tokens_seen": 62555752, + "step": 92790 + }, + { + "epoch": 2.266997288251533, + "grad_norm": 44.56449508666992, + "learning_rate": 1.3308665902826972e-06, + "loss": 0.0737, + "num_input_tokens_seen": 62559208, + "step": 92795 + }, + { + "epoch": 2.26711943908338, + "grad_norm": 0.00906476378440857, + "learning_rate": 1.3307861145244335e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62562472, + "step": 92800 + }, + { + "epoch": 2.267241589915227, + "grad_norm": 0.006960950791835785, + "learning_rate": 1.330705636360604e-06, + "loss": 0.081, + "num_input_tokens_seen": 62565480, + "step": 92805 + }, + { + "epoch": 2.2673637407470744, + "grad_norm": 0.6850554943084717, + "learning_rate": 1.3306251557917942e-06, + "loss": 0.0004, + "num_input_tokens_seen": 62568936, + "step": 92810 + }, + { + "epoch": 2.2674858915789216, + "grad_norm": 0.059107035398483276, + "learning_rate": 1.3305446728185894e-06, + "loss": 0.0497, + "num_input_tokens_seen": 62572264, + "step": 92815 + }, + { + "epoch": 2.2676080424107687, + "grad_norm": 0.08122166246175766, + "learning_rate": 1.330464187441575e-06, + "loss": 0.0001, + "num_input_tokens_seen": 62576360, + "step": 92820 + }, + { + "epoch": 2.267730193242616, + "grad_norm": 0.02651078999042511, + "learning_rate": 1.3303836996613359e-06, + "loss": 0.0281, + "num_input_tokens_seen": 62579688, + "step": 92825 + }, + { + "epoch": 2.267852344074463, + "grad_norm": 0.04520462080836296, + "learning_rate": 1.3303032094784575e-06, + "loss": 0.048, + "num_input_tokens_seen": 62582632, + "step": 92830 + }, + { + "epoch": 2.2679744949063103, + "grad_norm": 0.026764435693621635, + "learning_rate": 1.3302227168935255e-06, + "loss": 0.0001, + "num_input_tokens_seen": 62585896, + "step": 92835 + }, + { + "epoch": 2.2680966457381575, + "grad_norm": 0.014396784827113152, + "learning_rate": 1.3301422219071252e-06, + "loss": 0.0001, + "num_input_tokens_seen": 62589800, + "step": 92840 + }, + { + "epoch": 2.2682187965700047, + "grad_norm": 0.07177358120679855, + "learning_rate": 1.330061724519842e-06, + "loss": 0.0001, + "num_input_tokens_seen": 62593704, + "step": 92845 + }, + { + "epoch": 2.268340947401852, + "grad_norm": 0.008762385696172714, + "learning_rate": 1.329981224732261e-06, + "loss": 0.0553, + "num_input_tokens_seen": 62596520, + "step": 92850 + }, + { + "epoch": 2.268463098233699, + "grad_norm": 0.05073574557900429, + "learning_rate": 1.3299007225449677e-06, + "loss": 0.0001, + "num_input_tokens_seen": 62599784, + "step": 92855 + }, + { + "epoch": 2.2685852490655463, + "grad_norm": 0.043613236397504807, + "learning_rate": 1.3298202179585475e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62603176, + "step": 92860 + }, + { + "epoch": 2.2687073998973935, + "grad_norm": 0.2333085983991623, + "learning_rate": 1.3297397109735862e-06, + "loss": 0.002, + "num_input_tokens_seen": 62606888, + "step": 92865 + }, + { + "epoch": 2.2688295507292406, + "grad_norm": 0.4680311679840088, + "learning_rate": 1.329659201590669e-06, + "loss": 0.0004, + "num_input_tokens_seen": 62609960, + "step": 92870 + }, + { + "epoch": 2.268951701561088, + "grad_norm": 0.013720767572522163, + "learning_rate": 1.3295786898103814e-06, + "loss": 0.0005, + "num_input_tokens_seen": 62613032, + "step": 92875 + }, + { + "epoch": 2.2690738523929346, + "grad_norm": 0.00633205333724618, + "learning_rate": 1.3294981756333087e-06, + "loss": 0.0001, + "num_input_tokens_seen": 62616104, + "step": 92880 + }, + { + "epoch": 2.269196003224782, + "grad_norm": 0.015389472246170044, + "learning_rate": 1.3294176590600368e-06, + "loss": 0.0644, + "num_input_tokens_seen": 62619240, + "step": 92885 + }, + { + "epoch": 2.269318154056629, + "grad_norm": 0.04347032308578491, + "learning_rate": 1.3293371400911513e-06, + "loss": 0.0001, + "num_input_tokens_seen": 62622568, + "step": 92890 + }, + { + "epoch": 2.269440304888476, + "grad_norm": 0.0029214448295533657, + "learning_rate": 1.3292566187272374e-06, + "loss": 0.0679, + "num_input_tokens_seen": 62626152, + "step": 92895 + }, + { + "epoch": 2.2695624557203233, + "grad_norm": 42.35622787475586, + "learning_rate": 1.3291760949688806e-06, + "loss": 0.0608, + "num_input_tokens_seen": 62629224, + "step": 92900 + }, + { + "epoch": 2.2696846065521705, + "grad_norm": 0.01074572466313839, + "learning_rate": 1.329095568816667e-06, + "loss": 0.0001, + "num_input_tokens_seen": 62632872, + "step": 92905 + }, + { + "epoch": 2.2698067573840177, + "grad_norm": 0.012575157918035984, + "learning_rate": 1.3290150402711817e-06, + "loss": 0.0001, + "num_input_tokens_seen": 62636136, + "step": 92910 + }, + { + "epoch": 2.269928908215865, + "grad_norm": 0.08424925804138184, + "learning_rate": 1.3289345093330104e-06, + "loss": 0.1128, + "num_input_tokens_seen": 62639272, + "step": 92915 + }, + { + "epoch": 2.270051059047712, + "grad_norm": 2.9216606616973877, + "learning_rate": 1.3288539760027391e-06, + "loss": 0.0308, + "num_input_tokens_seen": 62642472, + "step": 92920 + }, + { + "epoch": 2.2701732098795593, + "grad_norm": 241.16482543945312, + "learning_rate": 1.3287734402809533e-06, + "loss": 0.0691, + "num_input_tokens_seen": 62645992, + "step": 92925 + }, + { + "epoch": 2.2702953607114065, + "grad_norm": 0.0212919432669878, + "learning_rate": 1.3286929021682385e-06, + "loss": 0.0029, + "num_input_tokens_seen": 62649704, + "step": 92930 + }, + { + "epoch": 2.2704175115432537, + "grad_norm": 0.13828763365745544, + "learning_rate": 1.3286123616651806e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62652968, + "step": 92935 + }, + { + "epoch": 2.270539662375101, + "grad_norm": 0.008991804905235767, + "learning_rate": 1.3285318187723652e-06, + "loss": 0.0502, + "num_input_tokens_seen": 62656424, + "step": 92940 + }, + { + "epoch": 2.270661813206948, + "grad_norm": 0.016623621806502342, + "learning_rate": 1.3284512734903779e-06, + "loss": 0.0548, + "num_input_tokens_seen": 62659304, + "step": 92945 + }, + { + "epoch": 2.2707839640387952, + "grad_norm": 0.018210897222161293, + "learning_rate": 1.3283707258198047e-06, + "loss": 0.0646, + "num_input_tokens_seen": 62662888, + "step": 92950 + }, + { + "epoch": 2.2709061148706424, + "grad_norm": 0.004802016541361809, + "learning_rate": 1.3282901757612314e-06, + "loss": 0.0716, + "num_input_tokens_seen": 62666280, + "step": 92955 + }, + { + "epoch": 2.2710282657024896, + "grad_norm": 0.012645594775676727, + "learning_rate": 1.3282096233152435e-06, + "loss": 0.0642, + "num_input_tokens_seen": 62669992, + "step": 92960 + }, + { + "epoch": 2.2711504165343364, + "grad_norm": 0.010564680211246014, + "learning_rate": 1.3281290684824268e-06, + "loss": 0.013, + "num_input_tokens_seen": 62673128, + "step": 92965 + }, + { + "epoch": 2.271272567366184, + "grad_norm": 0.008313731290400028, + "learning_rate": 1.3280485112633675e-06, + "loss": 0.1342, + "num_input_tokens_seen": 62676328, + "step": 92970 + }, + { + "epoch": 2.2713947181980307, + "grad_norm": 0.023614229634404182, + "learning_rate": 1.327967951658651e-06, + "loss": 0.0004, + "num_input_tokens_seen": 62680232, + "step": 92975 + }, + { + "epoch": 2.271516869029878, + "grad_norm": 0.0680750384926796, + "learning_rate": 1.3278873896688633e-06, + "loss": 0.0007, + "num_input_tokens_seen": 62683624, + "step": 92980 + }, + { + "epoch": 2.271639019861725, + "grad_norm": 0.023569390177726746, + "learning_rate": 1.3278068252945908e-06, + "loss": 0.0001, + "num_input_tokens_seen": 62686952, + "step": 92985 + }, + { + "epoch": 2.2717611706935723, + "grad_norm": 104.64521026611328, + "learning_rate": 1.327726258536418e-06, + "loss": 0.1115, + "num_input_tokens_seen": 62690600, + "step": 92990 + }, + { + "epoch": 2.2718833215254195, + "grad_norm": 0.01823197677731514, + "learning_rate": 1.3276456893949325e-06, + "loss": 0.0355, + "num_input_tokens_seen": 62693928, + "step": 92995 + }, + { + "epoch": 2.2720054723572667, + "grad_norm": 0.02083517052233219, + "learning_rate": 1.3275651178707194e-06, + "loss": 0.0633, + "num_input_tokens_seen": 62697320, + "step": 93000 + }, + { + "epoch": 2.272127623189114, + "grad_norm": 0.01520773395895958, + "learning_rate": 1.3274845439643645e-06, + "loss": 0.0125, + "num_input_tokens_seen": 62700712, + "step": 93005 + }, + { + "epoch": 2.272249774020961, + "grad_norm": 0.005817287135869265, + "learning_rate": 1.3274039676764535e-06, + "loss": 0.0001, + "num_input_tokens_seen": 62703656, + "step": 93010 + }, + { + "epoch": 2.2723719248528083, + "grad_norm": 0.012480477802455425, + "learning_rate": 1.3273233890075733e-06, + "loss": 0.0003, + "num_input_tokens_seen": 62707240, + "step": 93015 + }, + { + "epoch": 2.2724940756846554, + "grad_norm": 0.010067179799079895, + "learning_rate": 1.327242807958309e-06, + "loss": 0.0003, + "num_input_tokens_seen": 62710376, + "step": 93020 + }, + { + "epoch": 2.2726162265165026, + "grad_norm": 0.05216876044869423, + "learning_rate": 1.3271622245292473e-06, + "loss": 0.0431, + "num_input_tokens_seen": 62713512, + "step": 93025 + }, + { + "epoch": 2.27273837734835, + "grad_norm": 0.0039581493474543095, + "learning_rate": 1.3270816387209738e-06, + "loss": 0.0336, + "num_input_tokens_seen": 62716904, + "step": 93030 + }, + { + "epoch": 2.272860528180197, + "grad_norm": 0.012547915801405907, + "learning_rate": 1.3270010505340748e-06, + "loss": 0.0526, + "num_input_tokens_seen": 62720296, + "step": 93035 + }, + { + "epoch": 2.272982679012044, + "grad_norm": 0.6097759008407593, + "learning_rate": 1.3269204599691357e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62723432, + "step": 93040 + }, + { + "epoch": 2.2731048298438914, + "grad_norm": 0.04886188358068466, + "learning_rate": 1.3268398670267438e-06, + "loss": 0.0525, + "num_input_tokens_seen": 62726824, + "step": 93045 + }, + { + "epoch": 2.2732269806757386, + "grad_norm": 0.056709174066782, + "learning_rate": 1.326759271707484e-06, + "loss": 0.0008, + "num_input_tokens_seen": 62730024, + "step": 93050 + }, + { + "epoch": 2.2733491315075858, + "grad_norm": 0.023183194920420647, + "learning_rate": 1.3266786740119428e-06, + "loss": 0.0525, + "num_input_tokens_seen": 62733096, + "step": 93055 + }, + { + "epoch": 2.2734712823394325, + "grad_norm": 0.004460521508008242, + "learning_rate": 1.3265980739407068e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62737320, + "step": 93060 + }, + { + "epoch": 2.2735934331712797, + "grad_norm": 0.025767408311367035, + "learning_rate": 1.3265174714943618e-06, + "loss": 0.0004, + "num_input_tokens_seen": 62740904, + "step": 93065 + }, + { + "epoch": 2.273715584003127, + "grad_norm": 0.015531780198216438, + "learning_rate": 1.3264368666734933e-06, + "loss": 0.0359, + "num_input_tokens_seen": 62744296, + "step": 93070 + }, + { + "epoch": 2.273837734834974, + "grad_norm": 0.019820986315608025, + "learning_rate": 1.3263562594786886e-06, + "loss": 0.0389, + "num_input_tokens_seen": 62747304, + "step": 93075 + }, + { + "epoch": 2.2739598856668213, + "grad_norm": 21.325904846191406, + "learning_rate": 1.3262756499105333e-06, + "loss": 0.0492, + "num_input_tokens_seen": 62750632, + "step": 93080 + }, + { + "epoch": 2.2740820364986685, + "grad_norm": 0.04072573408484459, + "learning_rate": 1.3261950379696136e-06, + "loss": 0.1146, + "num_input_tokens_seen": 62753832, + "step": 93085 + }, + { + "epoch": 2.2742041873305157, + "grad_norm": 0.11378677934408188, + "learning_rate": 1.326114423656516e-06, + "loss": 0.0013, + "num_input_tokens_seen": 62757416, + "step": 93090 + }, + { + "epoch": 2.274326338162363, + "grad_norm": 86.40130615234375, + "learning_rate": 1.3260338069718266e-06, + "loss": 0.037, + "num_input_tokens_seen": 62760424, + "step": 93095 + }, + { + "epoch": 2.27444848899421, + "grad_norm": 0.27668333053588867, + "learning_rate": 1.3259531879161316e-06, + "loss": 0.0509, + "num_input_tokens_seen": 62764072, + "step": 93100 + }, + { + "epoch": 2.2745706398260572, + "grad_norm": 0.05763066187500954, + "learning_rate": 1.3258725664900173e-06, + "loss": 0.0347, + "num_input_tokens_seen": 62767720, + "step": 93105 + }, + { + "epoch": 2.2746927906579044, + "grad_norm": 0.0003041908785235137, + "learning_rate": 1.3257919426940703e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62771112, + "step": 93110 + }, + { + "epoch": 2.2748149414897516, + "grad_norm": 0.09503401070833206, + "learning_rate": 1.3257113165288764e-06, + "loss": 0.0004, + "num_input_tokens_seen": 62774440, + "step": 93115 + }, + { + "epoch": 2.274937092321599, + "grad_norm": 0.01079515554010868, + "learning_rate": 1.3256306879950224e-06, + "loss": 0.0004, + "num_input_tokens_seen": 62777960, + "step": 93120 + }, + { + "epoch": 2.275059243153446, + "grad_norm": 25.221628189086914, + "learning_rate": 1.3255500570930945e-06, + "loss": 0.0501, + "num_input_tokens_seen": 62781160, + "step": 93125 + }, + { + "epoch": 2.275181393985293, + "grad_norm": 0.0537249855697155, + "learning_rate": 1.3254694238236788e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62784168, + "step": 93130 + }, + { + "epoch": 2.2753035448171404, + "grad_norm": 0.0006929787923581898, + "learning_rate": 1.3253887881873618e-06, + "loss": 0.0001, + "num_input_tokens_seen": 62787112, + "step": 93135 + }, + { + "epoch": 2.2754256956489876, + "grad_norm": 0.08567481487989426, + "learning_rate": 1.32530815018473e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62790760, + "step": 93140 + }, + { + "epoch": 2.2755478464808343, + "grad_norm": 0.028937041759490967, + "learning_rate": 1.3252275098163701e-06, + "loss": 0.0524, + "num_input_tokens_seen": 62794344, + "step": 93145 + }, + { + "epoch": 2.275669997312682, + "grad_norm": 41.68051528930664, + "learning_rate": 1.3251468670828683e-06, + "loss": 0.1383, + "num_input_tokens_seen": 62797672, + "step": 93150 + }, + { + "epoch": 2.2757921481445287, + "grad_norm": 0.09774192422628403, + "learning_rate": 1.325066221984811e-06, + "loss": 0.0647, + "num_input_tokens_seen": 62801320, + "step": 93155 + }, + { + "epoch": 2.275914298976376, + "grad_norm": 0.010497506707906723, + "learning_rate": 1.3249855745227847e-06, + "loss": 0.1175, + "num_input_tokens_seen": 62804392, + "step": 93160 + }, + { + "epoch": 2.276036449808223, + "grad_norm": 0.33297374844551086, + "learning_rate": 1.3249049246973757e-06, + "loss": 0.0003, + "num_input_tokens_seen": 62807528, + "step": 93165 + }, + { + "epoch": 2.2761586006400703, + "grad_norm": 0.11804129928350449, + "learning_rate": 1.3248242725091707e-06, + "loss": 0.0396, + "num_input_tokens_seen": 62810792, + "step": 93170 + }, + { + "epoch": 2.2762807514719174, + "grad_norm": 0.05837703496217728, + "learning_rate": 1.3247436179587563e-06, + "loss": 0.0005, + "num_input_tokens_seen": 62814120, + "step": 93175 + }, + { + "epoch": 2.2764029023037646, + "grad_norm": 0.28625261783599854, + "learning_rate": 1.324662961046719e-06, + "loss": 0.0692, + "num_input_tokens_seen": 62817576, + "step": 93180 + }, + { + "epoch": 2.276525053135612, + "grad_norm": 0.04373976215720177, + "learning_rate": 1.3245823017736454e-06, + "loss": 0.0486, + "num_input_tokens_seen": 62821224, + "step": 93185 + }, + { + "epoch": 2.276647203967459, + "grad_norm": 0.07923418283462524, + "learning_rate": 1.324501640140122e-06, + "loss": 0.0341, + "num_input_tokens_seen": 62824552, + "step": 93190 + }, + { + "epoch": 2.276769354799306, + "grad_norm": 25.96709442138672, + "learning_rate": 1.3244209761467352e-06, + "loss": 0.0629, + "num_input_tokens_seen": 62827752, + "step": 93195 + }, + { + "epoch": 2.2768915056311534, + "grad_norm": 0.0062352376990020275, + "learning_rate": 1.324340309794072e-06, + "loss": 0.0069, + "num_input_tokens_seen": 62830696, + "step": 93200 + }, + { + "epoch": 2.2770136564630006, + "grad_norm": 14.839695930480957, + "learning_rate": 1.3242596410827187e-06, + "loss": 0.0998, + "num_input_tokens_seen": 62834088, + "step": 93205 + }, + { + "epoch": 2.2771358072948478, + "grad_norm": 0.013603686355054379, + "learning_rate": 1.3241789700132621e-06, + "loss": 0.0689, + "num_input_tokens_seen": 62837288, + "step": 93210 + }, + { + "epoch": 2.277257958126695, + "grad_norm": 0.03261496499180794, + "learning_rate": 1.324098296586289e-06, + "loss": 0.1573, + "num_input_tokens_seen": 62840488, + "step": 93215 + }, + { + "epoch": 2.277380108958542, + "grad_norm": 0.0007041191565804183, + "learning_rate": 1.324017620802386e-06, + "loss": 0.0571, + "num_input_tokens_seen": 62843880, + "step": 93220 + }, + { + "epoch": 2.2775022597903893, + "grad_norm": 13.467567443847656, + "learning_rate": 1.3239369426621391e-06, + "loss": 0.1587, + "num_input_tokens_seen": 62847144, + "step": 93225 + }, + { + "epoch": 2.2776244106222365, + "grad_norm": 0.07765517383813858, + "learning_rate": 1.323856262166136e-06, + "loss": 0.0686, + "num_input_tokens_seen": 62850600, + "step": 93230 + }, + { + "epoch": 2.2777465614540837, + "grad_norm": 0.0022951867431402206, + "learning_rate": 1.323775579314963e-06, + "loss": 0.0006, + "num_input_tokens_seen": 62854248, + "step": 93235 + }, + { + "epoch": 2.2778687122859305, + "grad_norm": 0.04524902254343033, + "learning_rate": 1.323694894109207e-06, + "loss": 0.0004, + "num_input_tokens_seen": 62857512, + "step": 93240 + }, + { + "epoch": 2.2779908631177777, + "grad_norm": 0.06177098676562309, + "learning_rate": 1.3236142065494546e-06, + "loss": 0.0003, + "num_input_tokens_seen": 62861096, + "step": 93245 + }, + { + "epoch": 2.278113013949625, + "grad_norm": 39.986083984375, + "learning_rate": 1.3235335166362926e-06, + "loss": 0.1099, + "num_input_tokens_seen": 62864424, + "step": 93250 + }, + { + "epoch": 2.278235164781472, + "grad_norm": 0.056117504835128784, + "learning_rate": 1.323452824370308e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62867624, + "step": 93255 + }, + { + "epoch": 2.2783573156133192, + "grad_norm": 0.06814853101968765, + "learning_rate": 1.3233721297520875e-06, + "loss": 0.0283, + "num_input_tokens_seen": 62871080, + "step": 93260 + }, + { + "epoch": 2.2784794664451664, + "grad_norm": 0.08036066591739655, + "learning_rate": 1.3232914327822177e-06, + "loss": 0.0589, + "num_input_tokens_seen": 62874024, + "step": 93265 + }, + { + "epoch": 2.2786016172770136, + "grad_norm": 0.01860756240785122, + "learning_rate": 1.3232107334612858e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62877160, + "step": 93270 + }, + { + "epoch": 2.278723768108861, + "grad_norm": 0.06197257712483406, + "learning_rate": 1.3231300317898786e-06, + "loss": 0.0421, + "num_input_tokens_seen": 62880296, + "step": 93275 + }, + { + "epoch": 2.278845918940708, + "grad_norm": 27.19537925720215, + "learning_rate": 1.3230493277685826e-06, + "loss": 0.0779, + "num_input_tokens_seen": 62883560, + "step": 93280 + }, + { + "epoch": 2.278968069772555, + "grad_norm": 0.1904158592224121, + "learning_rate": 1.322968621397985e-06, + "loss": 0.0413, + "num_input_tokens_seen": 62886952, + "step": 93285 + }, + { + "epoch": 2.2790902206044024, + "grad_norm": 0.03217314928770065, + "learning_rate": 1.322887912678673e-06, + "loss": 0.0428, + "num_input_tokens_seen": 62890344, + "step": 93290 + }, + { + "epoch": 2.2792123714362496, + "grad_norm": 0.2809275984764099, + "learning_rate": 1.322807201611233e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62894248, + "step": 93295 + }, + { + "epoch": 2.2793345222680967, + "grad_norm": 0.024791276082396507, + "learning_rate": 1.3227264881962522e-06, + "loss": 0.0004, + "num_input_tokens_seen": 62898216, + "step": 93300 + }, + { + "epoch": 2.279456673099944, + "grad_norm": 0.37984398007392883, + "learning_rate": 1.322645772434318e-06, + "loss": 0.0003, + "num_input_tokens_seen": 62902184, + "step": 93305 + }, + { + "epoch": 2.279578823931791, + "grad_norm": 0.07157126814126968, + "learning_rate": 1.3225650543260168e-06, + "loss": 0.1207, + "num_input_tokens_seen": 62905640, + "step": 93310 + }, + { + "epoch": 2.2797009747636383, + "grad_norm": 0.19514942169189453, + "learning_rate": 1.3224843338719356e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62908712, + "step": 93315 + }, + { + "epoch": 2.2798231255954855, + "grad_norm": 0.960155725479126, + "learning_rate": 1.3224036110726614e-06, + "loss": 0.0415, + "num_input_tokens_seen": 62912104, + "step": 93320 + }, + { + "epoch": 2.2799452764273322, + "grad_norm": 0.04093187674880028, + "learning_rate": 1.3223228859287815e-06, + "loss": 0.0006, + "num_input_tokens_seen": 62915688, + "step": 93325 + }, + { + "epoch": 2.28006742725918, + "grad_norm": 0.006360192783176899, + "learning_rate": 1.3222421584408832e-06, + "loss": 0.0001, + "num_input_tokens_seen": 62919144, + "step": 93330 + }, + { + "epoch": 2.2801895780910266, + "grad_norm": 390.7582092285156, + "learning_rate": 1.3221614286095531e-06, + "loss": 0.0564, + "num_input_tokens_seen": 62922728, + "step": 93335 + }, + { + "epoch": 2.280311728922874, + "grad_norm": 0.014665957540273666, + "learning_rate": 1.3220806964353784e-06, + "loss": 0.0039, + "num_input_tokens_seen": 62926120, + "step": 93340 + }, + { + "epoch": 2.280433879754721, + "grad_norm": 0.0038363176863640547, + "learning_rate": 1.3219999619189462e-06, + "loss": 0.036, + "num_input_tokens_seen": 62929384, + "step": 93345 + }, + { + "epoch": 2.280556030586568, + "grad_norm": 0.015091368928551674, + "learning_rate": 1.3219192250608436e-06, + "loss": 0.0003, + "num_input_tokens_seen": 62932584, + "step": 93350 + }, + { + "epoch": 2.2806781814184154, + "grad_norm": 0.01974570006132126, + "learning_rate": 1.321838485861658e-06, + "loss": 0.0001, + "num_input_tokens_seen": 62935976, + "step": 93355 + }, + { + "epoch": 2.2808003322502626, + "grad_norm": 0.009472187608480453, + "learning_rate": 1.3217577443219763e-06, + "loss": 0.0001, + "num_input_tokens_seen": 62939176, + "step": 93360 + }, + { + "epoch": 2.2809224830821098, + "grad_norm": 0.018192993476986885, + "learning_rate": 1.3216770004423858e-06, + "loss": 0.0003, + "num_input_tokens_seen": 62942312, + "step": 93365 + }, + { + "epoch": 2.281044633913957, + "grad_norm": 0.002576603088527918, + "learning_rate": 1.3215962542234735e-06, + "loss": 0.0, + "num_input_tokens_seen": 62945448, + "step": 93370 + }, + { + "epoch": 2.281166784745804, + "grad_norm": 0.1858045905828476, + "learning_rate": 1.321515505665827e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62948584, + "step": 93375 + }, + { + "epoch": 2.2812889355776513, + "grad_norm": 0.042001720517873764, + "learning_rate": 1.321434754770033e-06, + "loss": 0.0001, + "num_input_tokens_seen": 62951528, + "step": 93380 + }, + { + "epoch": 2.2814110864094985, + "grad_norm": 0.17240160703659058, + "learning_rate": 1.3213540015366789e-06, + "loss": 0.0001, + "num_input_tokens_seen": 62954664, + "step": 93385 + }, + { + "epoch": 2.2815332372413457, + "grad_norm": 0.08950382471084595, + "learning_rate": 1.3212732459663524e-06, + "loss": 0.0003, + "num_input_tokens_seen": 62958504, + "step": 93390 + }, + { + "epoch": 2.281655388073193, + "grad_norm": 0.003479085164144635, + "learning_rate": 1.32119248805964e-06, + "loss": 0.0001, + "num_input_tokens_seen": 62961384, + "step": 93395 + }, + { + "epoch": 2.28177753890504, + "grad_norm": 0.0005125111783854663, + "learning_rate": 1.3211117278171297e-06, + "loss": 0.0355, + "num_input_tokens_seen": 62965224, + "step": 93400 + }, + { + "epoch": 2.2818996897368873, + "grad_norm": 0.018022790551185608, + "learning_rate": 1.3210309652394087e-06, + "loss": 0.0006, + "num_input_tokens_seen": 62968488, + "step": 93405 + }, + { + "epoch": 2.282021840568734, + "grad_norm": 133.34310913085938, + "learning_rate": 1.3209502003270641e-06, + "loss": 0.0325, + "num_input_tokens_seen": 62972264, + "step": 93410 + }, + { + "epoch": 2.2821439914005817, + "grad_norm": 0.027756785973906517, + "learning_rate": 1.3208694330806834e-06, + "loss": 0.1406, + "num_input_tokens_seen": 62975464, + "step": 93415 + }, + { + "epoch": 2.2822661422324284, + "grad_norm": 0.0014253148110583425, + "learning_rate": 1.3207886635008535e-06, + "loss": 0.0667, + "num_input_tokens_seen": 62978792, + "step": 93420 + }, + { + "epoch": 2.2823882930642756, + "grad_norm": 0.0035388257820159197, + "learning_rate": 1.3207078915881624e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62982312, + "step": 93425 + }, + { + "epoch": 2.282510443896123, + "grad_norm": 0.05845514312386513, + "learning_rate": 1.3206271173431973e-06, + "loss": 0.0, + "num_input_tokens_seen": 62985640, + "step": 93430 + }, + { + "epoch": 2.28263259472797, + "grad_norm": 0.18427708745002747, + "learning_rate": 1.3205463407665456e-06, + "loss": 0.0002, + "num_input_tokens_seen": 62988840, + "step": 93435 + }, + { + "epoch": 2.282754745559817, + "grad_norm": 0.00430617481470108, + "learning_rate": 1.3204655618587946e-06, + "loss": 0.1984, + "num_input_tokens_seen": 62992168, + "step": 93440 + }, + { + "epoch": 2.2828768963916644, + "grad_norm": 20.258394241333008, + "learning_rate": 1.3203847806205316e-06, + "loss": 0.0423, + "num_input_tokens_seen": 62995432, + "step": 93445 + }, + { + "epoch": 2.2829990472235115, + "grad_norm": 0.0059243980795145035, + "learning_rate": 1.3203039970523446e-06, + "loss": 0.0516, + "num_input_tokens_seen": 62998696, + "step": 93450 + }, + { + "epoch": 2.2831211980553587, + "grad_norm": 19.95654296875, + "learning_rate": 1.3202232111548208e-06, + "loss": 0.0456, + "num_input_tokens_seen": 63002024, + "step": 93455 + }, + { + "epoch": 2.283243348887206, + "grad_norm": 0.07648692280054092, + "learning_rate": 1.3201424229285476e-06, + "loss": 0.0001, + "num_input_tokens_seen": 63005480, + "step": 93460 + }, + { + "epoch": 2.283365499719053, + "grad_norm": 0.11834115535020828, + "learning_rate": 1.3200616323741129e-06, + "loss": 0.0004, + "num_input_tokens_seen": 63008872, + "step": 93465 + }, + { + "epoch": 2.2834876505509003, + "grad_norm": 0.12660598754882812, + "learning_rate": 1.3199808394921034e-06, + "loss": 0.0004, + "num_input_tokens_seen": 63012328, + "step": 93470 + }, + { + "epoch": 2.2836098013827475, + "grad_norm": 0.01059301383793354, + "learning_rate": 1.3199000442831074e-06, + "loss": 0.062, + "num_input_tokens_seen": 63015912, + "step": 93475 + }, + { + "epoch": 2.2837319522145947, + "grad_norm": 0.0022777384147047997, + "learning_rate": 1.3198192467477122e-06, + "loss": 0.0001, + "num_input_tokens_seen": 63019304, + "step": 93480 + }, + { + "epoch": 2.283854103046442, + "grad_norm": 0.13538259267807007, + "learning_rate": 1.3197384468865057e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63022632, + "step": 93485 + }, + { + "epoch": 2.283976253878289, + "grad_norm": 34.09814453125, + "learning_rate": 1.3196576447000748e-06, + "loss": 0.1495, + "num_input_tokens_seen": 63025704, + "step": 93490 + }, + { + "epoch": 2.2840984047101363, + "grad_norm": 21.362201690673828, + "learning_rate": 1.3195768401890077e-06, + "loss": 0.128, + "num_input_tokens_seen": 63028968, + "step": 93495 + }, + { + "epoch": 2.2842205555419834, + "grad_norm": 0.09496381878852844, + "learning_rate": 1.3194960333538918e-06, + "loss": 0.0016, + "num_input_tokens_seen": 63032488, + "step": 93500 + }, + { + "epoch": 2.28434270637383, + "grad_norm": 5.9453537687659264e-05, + "learning_rate": 1.3194152241953148e-06, + "loss": 0.0014, + "num_input_tokens_seen": 63035816, + "step": 93505 + }, + { + "epoch": 2.2844648572056774, + "grad_norm": 0.012698279693722725, + "learning_rate": 1.3193344127138647e-06, + "loss": 0.0005, + "num_input_tokens_seen": 63039336, + "step": 93510 + }, + { + "epoch": 2.2845870080375246, + "grad_norm": 0.024744637310504913, + "learning_rate": 1.3192535989101285e-06, + "loss": 0.0943, + "num_input_tokens_seen": 63043304, + "step": 93515 + }, + { + "epoch": 2.2847091588693718, + "grad_norm": 0.05719945207238197, + "learning_rate": 1.3191727827846945e-06, + "loss": 0.0004, + "num_input_tokens_seen": 63046888, + "step": 93520 + }, + { + "epoch": 2.284831309701219, + "grad_norm": 0.016214219853281975, + "learning_rate": 1.31909196433815e-06, + "loss": 0.0266, + "num_input_tokens_seen": 63050344, + "step": 93525 + }, + { + "epoch": 2.284953460533066, + "grad_norm": 0.10411170870065689, + "learning_rate": 1.3190111435710828e-06, + "loss": 0.0393, + "num_input_tokens_seen": 63053416, + "step": 93530 + }, + { + "epoch": 2.2850756113649133, + "grad_norm": 0.6027932167053223, + "learning_rate": 1.3189303204840809e-06, + "loss": 0.0315, + "num_input_tokens_seen": 63056680, + "step": 93535 + }, + { + "epoch": 2.2851977621967605, + "grad_norm": 0.018875345587730408, + "learning_rate": 1.3188494950777318e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63060200, + "step": 93540 + }, + { + "epoch": 2.2853199130286077, + "grad_norm": 0.019894888624548912, + "learning_rate": 1.3187686673526238e-06, + "loss": 0.0001, + "num_input_tokens_seen": 63063592, + "step": 93545 + }, + { + "epoch": 2.285442063860455, + "grad_norm": 0.08889276534318924, + "learning_rate": 1.3186878373093438e-06, + "loss": 0.0578, + "num_input_tokens_seen": 63067432, + "step": 93550 + }, + { + "epoch": 2.285564214692302, + "grad_norm": 0.030163150280714035, + "learning_rate": 1.3186070049484806e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63070632, + "step": 93555 + }, + { + "epoch": 2.2856863655241493, + "grad_norm": 0.026965975761413574, + "learning_rate": 1.3185261702706211e-06, + "loss": 0.0592, + "num_input_tokens_seen": 63074472, + "step": 93560 + }, + { + "epoch": 2.2858085163559965, + "grad_norm": 0.012265127152204514, + "learning_rate": 1.3184453332763542e-06, + "loss": 0.1175, + "num_input_tokens_seen": 63077480, + "step": 93565 + }, + { + "epoch": 2.2859306671878437, + "grad_norm": 70.07109832763672, + "learning_rate": 1.3183644939662668e-06, + "loss": 0.0708, + "num_input_tokens_seen": 63082664, + "step": 93570 + }, + { + "epoch": 2.286052818019691, + "grad_norm": 0.15027621388435364, + "learning_rate": 1.318283652340947e-06, + "loss": 0.0003, + "num_input_tokens_seen": 63086248, + "step": 93575 + }, + { + "epoch": 2.286174968851538, + "grad_norm": 390.86004638671875, + "learning_rate": 1.3182028084009832e-06, + "loss": 0.0194, + "num_input_tokens_seen": 63089704, + "step": 93580 + }, + { + "epoch": 2.2862971196833852, + "grad_norm": 135.47552490234375, + "learning_rate": 1.318121962146963e-06, + "loss": 0.1173, + "num_input_tokens_seen": 63092840, + "step": 93585 + }, + { + "epoch": 2.286419270515232, + "grad_norm": 0.01921449974179268, + "learning_rate": 1.3180411135794742e-06, + "loss": 0.0005, + "num_input_tokens_seen": 63096104, + "step": 93590 + }, + { + "epoch": 2.2865414213470796, + "grad_norm": 0.05642332881689072, + "learning_rate": 1.317960262699105e-06, + "loss": 0.0478, + "num_input_tokens_seen": 63099560, + "step": 93595 + }, + { + "epoch": 2.2866635721789264, + "grad_norm": 36.93173599243164, + "learning_rate": 1.317879409506443e-06, + "loss": 0.0823, + "num_input_tokens_seen": 63102952, + "step": 93600 + }, + { + "epoch": 2.2867857230107735, + "grad_norm": 0.023783983662724495, + "learning_rate": 1.3177985540020765e-06, + "loss": 0.1719, + "num_input_tokens_seen": 63106728, + "step": 93605 + }, + { + "epoch": 2.2869078738426207, + "grad_norm": 0.04622891917824745, + "learning_rate": 1.3177176961865934e-06, + "loss": 0.0003, + "num_input_tokens_seen": 63110440, + "step": 93610 + }, + { + "epoch": 2.287030024674468, + "grad_norm": 0.06299111992120743, + "learning_rate": 1.3176368360605818e-06, + "loss": 0.0495, + "num_input_tokens_seen": 63113960, + "step": 93615 + }, + { + "epoch": 2.287152175506315, + "grad_norm": 0.5457538962364197, + "learning_rate": 1.3175559736246302e-06, + "loss": 0.0435, + "num_input_tokens_seen": 63117032, + "step": 93620 + }, + { + "epoch": 2.2872743263381623, + "grad_norm": 0.15435582399368286, + "learning_rate": 1.3174751088793257e-06, + "loss": 0.0382, + "num_input_tokens_seen": 63120360, + "step": 93625 + }, + { + "epoch": 2.2873964771700095, + "grad_norm": 0.006368239410221577, + "learning_rate": 1.3173942418252566e-06, + "loss": 0.0003, + "num_input_tokens_seen": 63123368, + "step": 93630 + }, + { + "epoch": 2.2875186280018567, + "grad_norm": 0.010606862604618073, + "learning_rate": 1.3173133724630114e-06, + "loss": 0.0003, + "num_input_tokens_seen": 63127144, + "step": 93635 + }, + { + "epoch": 2.287640778833704, + "grad_norm": 19.65880012512207, + "learning_rate": 1.3172325007931782e-06, + "loss": 0.0272, + "num_input_tokens_seen": 63130664, + "step": 93640 + }, + { + "epoch": 2.287762929665551, + "grad_norm": 0.015704812481999397, + "learning_rate": 1.3171516268163447e-06, + "loss": 0.0012, + "num_input_tokens_seen": 63133672, + "step": 93645 + }, + { + "epoch": 2.2878850804973982, + "grad_norm": 0.0535929799079895, + "learning_rate": 1.3170707505330993e-06, + "loss": 0.0635, + "num_input_tokens_seen": 63137512, + "step": 93650 + }, + { + "epoch": 2.2880072313292454, + "grad_norm": 0.030435847118496895, + "learning_rate": 1.3169898719440301e-06, + "loss": 0.1236, + "num_input_tokens_seen": 63140776, + "step": 93655 + }, + { + "epoch": 2.2881293821610926, + "grad_norm": 0.016993314027786255, + "learning_rate": 1.3169089910497254e-06, + "loss": 0.0006, + "num_input_tokens_seen": 63144232, + "step": 93660 + }, + { + "epoch": 2.28825153299294, + "grad_norm": 0.0037211801391094923, + "learning_rate": 1.3168281078507735e-06, + "loss": 0.0117, + "num_input_tokens_seen": 63147624, + "step": 93665 + }, + { + "epoch": 2.288373683824787, + "grad_norm": 0.058646202087402344, + "learning_rate": 1.316747222347762e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63150824, + "step": 93670 + }, + { + "epoch": 2.288495834656634, + "grad_norm": 0.15246830880641937, + "learning_rate": 1.3166663345412796e-06, + "loss": 0.0422, + "num_input_tokens_seen": 63154088, + "step": 93675 + }, + { + "epoch": 2.2886179854884814, + "grad_norm": 0.34976163506507874, + "learning_rate": 1.3165854444319148e-06, + "loss": 0.0003, + "num_input_tokens_seen": 63158632, + "step": 93680 + }, + { + "epoch": 2.288740136320328, + "grad_norm": 0.10934649407863617, + "learning_rate": 1.316504552020255e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63161704, + "step": 93685 + }, + { + "epoch": 2.2888622871521753, + "grad_norm": 0.0155659019947052, + "learning_rate": 1.316423657306889e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63164712, + "step": 93690 + }, + { + "epoch": 2.2889844379840225, + "grad_norm": 0.018553772941231728, + "learning_rate": 1.3163427602924052e-06, + "loss": 0.0003, + "num_input_tokens_seen": 63168040, + "step": 93695 + }, + { + "epoch": 2.2891065888158697, + "grad_norm": 0.09233409911394119, + "learning_rate": 1.3162618609773917e-06, + "loss": 0.0003, + "num_input_tokens_seen": 63170984, + "step": 93700 + }, + { + "epoch": 2.289228739647717, + "grad_norm": 0.9830764532089233, + "learning_rate": 1.316180959362437e-06, + "loss": 0.0006, + "num_input_tokens_seen": 63174312, + "step": 93705 + }, + { + "epoch": 2.289350890479564, + "grad_norm": 0.07397404313087463, + "learning_rate": 1.3161000554481292e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63177512, + "step": 93710 + }, + { + "epoch": 2.2894730413114113, + "grad_norm": 0.06915701925754547, + "learning_rate": 1.3160191492350568e-06, + "loss": 0.0007, + "num_input_tokens_seen": 63180776, + "step": 93715 + }, + { + "epoch": 2.2895951921432585, + "grad_norm": 0.0010076145408675075, + "learning_rate": 1.3159382407238083e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63183976, + "step": 93720 + }, + { + "epoch": 2.2897173429751057, + "grad_norm": 0.018281618133187294, + "learning_rate": 1.3158573299149716e-06, + "loss": 0.0764, + "num_input_tokens_seen": 63186856, + "step": 93725 + }, + { + "epoch": 2.289839493806953, + "grad_norm": 0.0028682551346719265, + "learning_rate": 1.3157764168091356e-06, + "loss": 0.0001, + "num_input_tokens_seen": 63190376, + "step": 93730 + }, + { + "epoch": 2.2899616446388, + "grad_norm": 0.02593367174267769, + "learning_rate": 1.3156955014068886e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63193512, + "step": 93735 + }, + { + "epoch": 2.290083795470647, + "grad_norm": 0.05124660208821297, + "learning_rate": 1.3156145837088192e-06, + "loss": 0.0001, + "num_input_tokens_seen": 63196840, + "step": 93740 + }, + { + "epoch": 2.2902059463024944, + "grad_norm": 26.817052841186523, + "learning_rate": 1.3155336637155154e-06, + "loss": 0.0659, + "num_input_tokens_seen": 63200040, + "step": 93745 + }, + { + "epoch": 2.2903280971343416, + "grad_norm": 0.020981481298804283, + "learning_rate": 1.315452741427566e-06, + "loss": 0.1073, + "num_input_tokens_seen": 63203752, + "step": 93750 + }, + { + "epoch": 2.290450247966189, + "grad_norm": 0.016080111265182495, + "learning_rate": 1.3153718168455595e-06, + "loss": 0.1151, + "num_input_tokens_seen": 63207144, + "step": 93755 + }, + { + "epoch": 2.290572398798036, + "grad_norm": 0.04538425803184509, + "learning_rate": 1.315290889970084e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63210536, + "step": 93760 + }, + { + "epoch": 2.290694549629883, + "grad_norm": 0.043526507914066315, + "learning_rate": 1.3152099608017286e-06, + "loss": 0.0005, + "num_input_tokens_seen": 63213800, + "step": 93765 + }, + { + "epoch": 2.29081670046173, + "grad_norm": 797.885498046875, + "learning_rate": 1.3151290293410818e-06, + "loss": 0.0102, + "num_input_tokens_seen": 63217320, + "step": 93770 + }, + { + "epoch": 2.2909388512935775, + "grad_norm": 0.13713596761226654, + "learning_rate": 1.315048095588732e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63220712, + "step": 93775 + }, + { + "epoch": 2.2910610021254243, + "grad_norm": 0.03999638929963112, + "learning_rate": 1.3149671595452674e-06, + "loss": 0.0384, + "num_input_tokens_seen": 63224104, + "step": 93780 + }, + { + "epoch": 2.2911831529572715, + "grad_norm": 0.03355813026428223, + "learning_rate": 1.3148862212112765e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63227304, + "step": 93785 + }, + { + "epoch": 2.2913053037891187, + "grad_norm": 0.0039594462141394615, + "learning_rate": 1.314805280587349e-06, + "loss": 0.0001, + "num_input_tokens_seen": 63230376, + "step": 93790 + }, + { + "epoch": 2.291427454620966, + "grad_norm": 0.012313015758991241, + "learning_rate": 1.3147243376740724e-06, + "loss": 0.002, + "num_input_tokens_seen": 63233320, + "step": 93795 + }, + { + "epoch": 2.291549605452813, + "grad_norm": 0.011920818127691746, + "learning_rate": 1.314643392472036e-06, + "loss": 0.0005, + "num_input_tokens_seen": 63236648, + "step": 93800 + }, + { + "epoch": 2.2916717562846602, + "grad_norm": 0.3594484329223633, + "learning_rate": 1.3145624449818283e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63239848, + "step": 93805 + }, + { + "epoch": 2.2917939071165074, + "grad_norm": 0.04723503440618515, + "learning_rate": 1.3144814952040375e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63243368, + "step": 93810 + }, + { + "epoch": 2.2919160579483546, + "grad_norm": 0.05795615538954735, + "learning_rate": 1.314400543139253e-06, + "loss": 0.084, + "num_input_tokens_seen": 63246568, + "step": 93815 + }, + { + "epoch": 2.292038208780202, + "grad_norm": 0.10335791110992432, + "learning_rate": 1.3143195887880631e-06, + "loss": 0.0701, + "num_input_tokens_seen": 63250024, + "step": 93820 + }, + { + "epoch": 2.292160359612049, + "grad_norm": 0.006964336149394512, + "learning_rate": 1.3142386321510565e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63253608, + "step": 93825 + }, + { + "epoch": 2.292282510443896, + "grad_norm": 0.008066564798355103, + "learning_rate": 1.3141576732288223e-06, + "loss": 0.0008, + "num_input_tokens_seen": 63256616, + "step": 93830 + }, + { + "epoch": 2.2924046612757434, + "grad_norm": 0.005343073047697544, + "learning_rate": 1.314076712021949e-06, + "loss": 0.099, + "num_input_tokens_seen": 63259752, + "step": 93835 + }, + { + "epoch": 2.2925268121075906, + "grad_norm": 0.1738848239183426, + "learning_rate": 1.3139957485310251e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63263528, + "step": 93840 + }, + { + "epoch": 2.2926489629394378, + "grad_norm": 0.01025724783539772, + "learning_rate": 1.31391478275664e-06, + "loss": 0.0001, + "num_input_tokens_seen": 63267112, + "step": 93845 + }, + { + "epoch": 2.292771113771285, + "grad_norm": 19.27631378173828, + "learning_rate": 1.3138338146993814e-06, + "loss": 0.054, + "num_input_tokens_seen": 63270568, + "step": 93850 + }, + { + "epoch": 2.2928932646031317, + "grad_norm": 0.3640555739402771, + "learning_rate": 1.3137528443598398e-06, + "loss": 0.0007, + "num_input_tokens_seen": 63274088, + "step": 93855 + }, + { + "epoch": 2.2930154154349793, + "grad_norm": 0.019612310454249382, + "learning_rate": 1.3136718717386025e-06, + "loss": 0.1433, + "num_input_tokens_seen": 63277288, + "step": 93860 + }, + { + "epoch": 2.293137566266826, + "grad_norm": 0.043557919561862946, + "learning_rate": 1.3135908968362596e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63280360, + "step": 93865 + }, + { + "epoch": 2.2932597170986733, + "grad_norm": 0.08848714083433151, + "learning_rate": 1.313509919653399e-06, + "loss": 0.0003, + "num_input_tokens_seen": 63283560, + "step": 93870 + }, + { + "epoch": 2.2933818679305205, + "grad_norm": 338.09820556640625, + "learning_rate": 1.3134289401906099e-06, + "loss": 0.0597, + "num_input_tokens_seen": 63287528, + "step": 93875 + }, + { + "epoch": 2.2935040187623676, + "grad_norm": 0.005891845561563969, + "learning_rate": 1.3133479584484812e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63291112, + "step": 93880 + }, + { + "epoch": 2.293626169594215, + "grad_norm": 0.007043534889817238, + "learning_rate": 1.3132669744276022e-06, + "loss": 0.0593, + "num_input_tokens_seen": 63294568, + "step": 93885 + }, + { + "epoch": 2.293748320426062, + "grad_norm": 0.030527032911777496, + "learning_rate": 1.3131859881285612e-06, + "loss": 0.0417, + "num_input_tokens_seen": 63298664, + "step": 93890 + }, + { + "epoch": 2.293870471257909, + "grad_norm": 0.02705656923353672, + "learning_rate": 1.3131049995519474e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63301800, + "step": 93895 + }, + { + "epoch": 2.2939926220897564, + "grad_norm": 0.013978756964206696, + "learning_rate": 1.3130240086983499e-06, + "loss": 0.0001, + "num_input_tokens_seen": 63305448, + "step": 93900 + }, + { + "epoch": 2.2941147729216036, + "grad_norm": 0.18781529366970062, + "learning_rate": 1.3129430155683579e-06, + "loss": 0.0466, + "num_input_tokens_seen": 63308968, + "step": 93905 + }, + { + "epoch": 2.294236923753451, + "grad_norm": 14.817819595336914, + "learning_rate": 1.3128620201625596e-06, + "loss": 0.1974, + "num_input_tokens_seen": 63312808, + "step": 93910 + }, + { + "epoch": 2.294359074585298, + "grad_norm": 0.0723084881901741, + "learning_rate": 1.3127810224815447e-06, + "loss": 0.0494, + "num_input_tokens_seen": 63315880, + "step": 93915 + }, + { + "epoch": 2.294481225417145, + "grad_norm": 0.10592442750930786, + "learning_rate": 1.3127000225259025e-06, + "loss": 0.0008, + "num_input_tokens_seen": 63318952, + "step": 93920 + }, + { + "epoch": 2.2946033762489924, + "grad_norm": 0.20150475203990936, + "learning_rate": 1.3126190202962213e-06, + "loss": 0.0001, + "num_input_tokens_seen": 63322024, + "step": 93925 + }, + { + "epoch": 2.2947255270808395, + "grad_norm": 0.14693403244018555, + "learning_rate": 1.3125380157930908e-06, + "loss": 0.0004, + "num_input_tokens_seen": 63325736, + "step": 93930 + }, + { + "epoch": 2.2948476779126867, + "grad_norm": 0.008689050562679768, + "learning_rate": 1.3124570090170994e-06, + "loss": 0.1105, + "num_input_tokens_seen": 63329448, + "step": 93935 + }, + { + "epoch": 2.294969828744534, + "grad_norm": 18.56661033630371, + "learning_rate": 1.3123759999688367e-06, + "loss": 0.1233, + "num_input_tokens_seen": 63333672, + "step": 93940 + }, + { + "epoch": 2.295091979576381, + "grad_norm": 18.206514358520508, + "learning_rate": 1.3122949886488913e-06, + "loss": 0.0527, + "num_input_tokens_seen": 63337320, + "step": 93945 + }, + { + "epoch": 2.295214130408228, + "grad_norm": 0.25258105993270874, + "learning_rate": 1.3122139750578533e-06, + "loss": 0.0094, + "num_input_tokens_seen": 63340328, + "step": 93950 + }, + { + "epoch": 2.2953362812400755, + "grad_norm": 0.03619527071714401, + "learning_rate": 1.3121329591963112e-06, + "loss": 0.0312, + "num_input_tokens_seen": 63344104, + "step": 93955 + }, + { + "epoch": 2.2954584320719222, + "grad_norm": 0.03644130378961563, + "learning_rate": 1.3120519410648543e-06, + "loss": 0.0003, + "num_input_tokens_seen": 63347496, + "step": 93960 + }, + { + "epoch": 2.2955805829037694, + "grad_norm": 0.023112885653972626, + "learning_rate": 1.3119709206640716e-06, + "loss": 0.0006, + "num_input_tokens_seen": 63350376, + "step": 93965 + }, + { + "epoch": 2.2957027337356166, + "grad_norm": 0.00579156493768096, + "learning_rate": 1.3118898979945528e-06, + "loss": 0.1245, + "num_input_tokens_seen": 63354280, + "step": 93970 + }, + { + "epoch": 2.295824884567464, + "grad_norm": 0.004455236252397299, + "learning_rate": 1.3118088730568863e-06, + "loss": 0.0408, + "num_input_tokens_seen": 63357864, + "step": 93975 + }, + { + "epoch": 2.295947035399311, + "grad_norm": 0.006268753204494715, + "learning_rate": 1.3117278458516622e-06, + "loss": 0.0003, + "num_input_tokens_seen": 63361320, + "step": 93980 + }, + { + "epoch": 2.296069186231158, + "grad_norm": 0.007929849438369274, + "learning_rate": 1.3116468163794691e-06, + "loss": 0.0355, + "num_input_tokens_seen": 63364392, + "step": 93985 + }, + { + "epoch": 2.2961913370630054, + "grad_norm": 0.26138797402381897, + "learning_rate": 1.3115657846408965e-06, + "loss": 0.0518, + "num_input_tokens_seen": 63367400, + "step": 93990 + }, + { + "epoch": 2.2963134878948526, + "grad_norm": 0.0377698689699173, + "learning_rate": 1.3114847506365338e-06, + "loss": 0.0567, + "num_input_tokens_seen": 63371048, + "step": 93995 + }, + { + "epoch": 2.2964356387266998, + "grad_norm": 7.9693803787231445, + "learning_rate": 1.3114037143669702e-06, + "loss": 0.0216, + "num_input_tokens_seen": 63374056, + "step": 94000 + }, + { + "epoch": 2.296557789558547, + "grad_norm": 25.192068099975586, + "learning_rate": 1.3113226758327952e-06, + "loss": 0.0643, + "num_input_tokens_seen": 63377512, + "step": 94005 + }, + { + "epoch": 2.296679940390394, + "grad_norm": 0.246130108833313, + "learning_rate": 1.3112416350345977e-06, + "loss": 0.0006, + "num_input_tokens_seen": 63380712, + "step": 94010 + }, + { + "epoch": 2.2968020912222413, + "grad_norm": 0.03563880920410156, + "learning_rate": 1.3111605919729676e-06, + "loss": 0.038, + "num_input_tokens_seen": 63384040, + "step": 94015 + }, + { + "epoch": 2.2969242420540885, + "grad_norm": 15.70835018157959, + "learning_rate": 1.3110795466484939e-06, + "loss": 0.0514, + "num_input_tokens_seen": 63387432, + "step": 94020 + }, + { + "epoch": 2.2970463928859357, + "grad_norm": 0.05262516438961029, + "learning_rate": 1.3109984990617658e-06, + "loss": 0.0426, + "num_input_tokens_seen": 63391656, + "step": 94025 + }, + { + "epoch": 2.297168543717783, + "grad_norm": 0.1655740588903427, + "learning_rate": 1.3109174492133732e-06, + "loss": 0.0015, + "num_input_tokens_seen": 63394792, + "step": 94030 + }, + { + "epoch": 2.2972906945496296, + "grad_norm": 13.976034164428711, + "learning_rate": 1.3108363971039053e-06, + "loss": 0.0007, + "num_input_tokens_seen": 63397992, + "step": 94035 + }, + { + "epoch": 2.2974128453814773, + "grad_norm": 0.5051965117454529, + "learning_rate": 1.3107553427339515e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63401512, + "step": 94040 + }, + { + "epoch": 2.297534996213324, + "grad_norm": 17.933237075805664, + "learning_rate": 1.310674286104101e-06, + "loss": 0.0331, + "num_input_tokens_seen": 63404776, + "step": 94045 + }, + { + "epoch": 2.297657147045171, + "grad_norm": 0.023094022646546364, + "learning_rate": 1.310593227214944e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63408936, + "step": 94050 + }, + { + "epoch": 2.2977792978770184, + "grad_norm": 0.041221149265766144, + "learning_rate": 1.3105121660670692e-06, + "loss": 0.0001, + "num_input_tokens_seen": 63412712, + "step": 94055 + }, + { + "epoch": 2.2979014487088656, + "grad_norm": 0.01685006357729435, + "learning_rate": 1.3104311026610666e-06, + "loss": 0.0001, + "num_input_tokens_seen": 63416296, + "step": 94060 + }, + { + "epoch": 2.298023599540713, + "grad_norm": 8.67899227142334, + "learning_rate": 1.310350036997525e-06, + "loss": 0.0934, + "num_input_tokens_seen": 63419112, + "step": 94065 + }, + { + "epoch": 2.29814575037256, + "grad_norm": 0.00395004590973258, + "learning_rate": 1.310268969077035e-06, + "loss": 0.0353, + "num_input_tokens_seen": 63422888, + "step": 94070 + }, + { + "epoch": 2.298267901204407, + "grad_norm": 32.90062713623047, + "learning_rate": 1.3101878989001856e-06, + "loss": 0.1447, + "num_input_tokens_seen": 63426344, + "step": 94075 + }, + { + "epoch": 2.2983900520362543, + "grad_norm": 0.5018079876899719, + "learning_rate": 1.3101068264675662e-06, + "loss": 0.001, + "num_input_tokens_seen": 63429992, + "step": 94080 + }, + { + "epoch": 2.2985122028681015, + "grad_norm": 0.061042070388793945, + "learning_rate": 1.3100257517797668e-06, + "loss": 0.0001, + "num_input_tokens_seen": 63433512, + "step": 94085 + }, + { + "epoch": 2.2986343536999487, + "grad_norm": 88.58975982666016, + "learning_rate": 1.3099446748373764e-06, + "loss": 0.1512, + "num_input_tokens_seen": 63436520, + "step": 94090 + }, + { + "epoch": 2.298756504531796, + "grad_norm": 0.05687016621232033, + "learning_rate": 1.3098635956409851e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63439720, + "step": 94095 + }, + { + "epoch": 2.298878655363643, + "grad_norm": 0.039876531809568405, + "learning_rate": 1.3097825141911821e-06, + "loss": 0.0005, + "num_input_tokens_seen": 63443176, + "step": 94100 + }, + { + "epoch": 2.2990008061954903, + "grad_norm": 0.13145720958709717, + "learning_rate": 1.3097014304885578e-06, + "loss": 0.1533, + "num_input_tokens_seen": 63446376, + "step": 94105 + }, + { + "epoch": 2.2991229570273375, + "grad_norm": 10.550003051757812, + "learning_rate": 1.3096203445337013e-06, + "loss": 0.113, + "num_input_tokens_seen": 63449832, + "step": 94110 + }, + { + "epoch": 2.2992451078591847, + "grad_norm": 0.14364692568778992, + "learning_rate": 1.309539256327202e-06, + "loss": 0.1354, + "num_input_tokens_seen": 63452776, + "step": 94115 + }, + { + "epoch": 2.299367258691032, + "grad_norm": 0.009568443521857262, + "learning_rate": 1.3094581658696505e-06, + "loss": 0.0712, + "num_input_tokens_seen": 63456104, + "step": 94120 + }, + { + "epoch": 2.299489409522879, + "grad_norm": 0.9077567458152771, + "learning_rate": 1.3093770731616358e-06, + "loss": 0.0532, + "num_input_tokens_seen": 63458856, + "step": 94125 + }, + { + "epoch": 2.299611560354726, + "grad_norm": 0.018772780895233154, + "learning_rate": 1.3092959782037478e-06, + "loss": 0.0367, + "num_input_tokens_seen": 63461800, + "step": 94130 + }, + { + "epoch": 2.299733711186573, + "grad_norm": 27.450637817382812, + "learning_rate": 1.3092148809965763e-06, + "loss": 0.0827, + "num_input_tokens_seen": 63465640, + "step": 94135 + }, + { + "epoch": 2.29985586201842, + "grad_norm": 0.020906388759613037, + "learning_rate": 1.3091337815407108e-06, + "loss": 0.0007, + "num_input_tokens_seen": 63468584, + "step": 94140 + }, + { + "epoch": 2.2999780128502674, + "grad_norm": 15.979035377502441, + "learning_rate": 1.3090526798367414e-06, + "loss": 0.033, + "num_input_tokens_seen": 63472488, + "step": 94145 + }, + { + "epoch": 2.3001001636821146, + "grad_norm": 0.022671345621347427, + "learning_rate": 1.3089715758852578e-06, + "loss": 0.053, + "num_input_tokens_seen": 63475752, + "step": 94150 + }, + { + "epoch": 2.3002223145139618, + "grad_norm": 0.09151072800159454, + "learning_rate": 1.3088904696868498e-06, + "loss": 0.001, + "num_input_tokens_seen": 63479208, + "step": 94155 + }, + { + "epoch": 2.300344465345809, + "grad_norm": 0.7091579437255859, + "learning_rate": 1.308809361242107e-06, + "loss": 0.0005, + "num_input_tokens_seen": 63482856, + "step": 94160 + }, + { + "epoch": 2.300466616177656, + "grad_norm": 0.010377227328717709, + "learning_rate": 1.3087282505516197e-06, + "loss": 0.0007, + "num_input_tokens_seen": 63486248, + "step": 94165 + }, + { + "epoch": 2.3005887670095033, + "grad_norm": 0.10020996630191803, + "learning_rate": 1.3086471376159777e-06, + "loss": 0.0003, + "num_input_tokens_seen": 63489512, + "step": 94170 + }, + { + "epoch": 2.3007109178413505, + "grad_norm": 0.018191087990999222, + "learning_rate": 1.3085660224357703e-06, + "loss": 0.0005, + "num_input_tokens_seen": 63492456, + "step": 94175 + }, + { + "epoch": 2.3008330686731977, + "grad_norm": 0.7195940613746643, + "learning_rate": 1.3084849050115883e-06, + "loss": 0.0003, + "num_input_tokens_seen": 63495976, + "step": 94180 + }, + { + "epoch": 2.300955219505045, + "grad_norm": 0.06771797686815262, + "learning_rate": 1.3084037853440206e-06, + "loss": 0.0457, + "num_input_tokens_seen": 63499176, + "step": 94185 + }, + { + "epoch": 2.301077370336892, + "grad_norm": 0.08082715421915054, + "learning_rate": 1.308322663433658e-06, + "loss": 0.0295, + "num_input_tokens_seen": 63502440, + "step": 94190 + }, + { + "epoch": 2.3011995211687393, + "grad_norm": 32.88578796386719, + "learning_rate": 1.3082415392810896e-06, + "loss": 0.0837, + "num_input_tokens_seen": 63505640, + "step": 94195 + }, + { + "epoch": 2.3013216720005865, + "grad_norm": 0.005474249366670847, + "learning_rate": 1.3081604128869064e-06, + "loss": 0.0005, + "num_input_tokens_seen": 63509224, + "step": 94200 + }, + { + "epoch": 2.3014438228324336, + "grad_norm": 0.17572320997714996, + "learning_rate": 1.3080792842516974e-06, + "loss": 0.038, + "num_input_tokens_seen": 63512488, + "step": 94205 + }, + { + "epoch": 2.301565973664281, + "grad_norm": 0.04181716963648796, + "learning_rate": 1.3079981533760532e-06, + "loss": 0.0001, + "num_input_tokens_seen": 63516008, + "step": 94210 + }, + { + "epoch": 2.3016881244961276, + "grad_norm": 0.004344448447227478, + "learning_rate": 1.3079170202605633e-06, + "loss": 0.0405, + "num_input_tokens_seen": 63519208, + "step": 94215 + }, + { + "epoch": 2.301810275327975, + "grad_norm": 0.004446935374289751, + "learning_rate": 1.3078358849058182e-06, + "loss": 0.0309, + "num_input_tokens_seen": 63522216, + "step": 94220 + }, + { + "epoch": 2.301932426159822, + "grad_norm": 0.041591398417949677, + "learning_rate": 1.3077547473124076e-06, + "loss": 0.0003, + "num_input_tokens_seen": 63525928, + "step": 94225 + }, + { + "epoch": 2.302054576991669, + "grad_norm": 0.023177804425358772, + "learning_rate": 1.3076736074809219e-06, + "loss": 0.0373, + "num_input_tokens_seen": 63529064, + "step": 94230 + }, + { + "epoch": 2.3021767278235163, + "grad_norm": 0.021942226216197014, + "learning_rate": 1.3075924654119507e-06, + "loss": 0.0007, + "num_input_tokens_seen": 63532328, + "step": 94235 + }, + { + "epoch": 2.3022988786553635, + "grad_norm": 0.04694988951086998, + "learning_rate": 1.307511321106085e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63535976, + "step": 94240 + }, + { + "epoch": 2.3024210294872107, + "grad_norm": 0.058997299522161484, + "learning_rate": 1.3074301745639138e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63539368, + "step": 94245 + }, + { + "epoch": 2.302543180319058, + "grad_norm": 0.013562050648033619, + "learning_rate": 1.3073490257860278e-06, + "loss": 0.001, + "num_input_tokens_seen": 63542632, + "step": 94250 + }, + { + "epoch": 2.302665331150905, + "grad_norm": 0.3930176794528961, + "learning_rate": 1.3072678747730166e-06, + "loss": 0.0003, + "num_input_tokens_seen": 63546408, + "step": 94255 + }, + { + "epoch": 2.3027874819827523, + "grad_norm": 0.0023700748570263386, + "learning_rate": 1.307186721525471e-06, + "loss": 0.0001, + "num_input_tokens_seen": 63549800, + "step": 94260 + }, + { + "epoch": 2.3029096328145995, + "grad_norm": 0.025645380839705467, + "learning_rate": 1.3071055660439811e-06, + "loss": 0.0697, + "num_input_tokens_seen": 63553768, + "step": 94265 + }, + { + "epoch": 2.3030317836464467, + "grad_norm": 0.21440570056438446, + "learning_rate": 1.3070244083291368e-06, + "loss": 0.0515, + "num_input_tokens_seen": 63556840, + "step": 94270 + }, + { + "epoch": 2.303153934478294, + "grad_norm": 0.11980664730072021, + "learning_rate": 1.3069432483815285e-06, + "loss": 0.14, + "num_input_tokens_seen": 63560616, + "step": 94275 + }, + { + "epoch": 2.303276085310141, + "grad_norm": 2.025825023651123, + "learning_rate": 1.3068620862017466e-06, + "loss": 0.0005, + "num_input_tokens_seen": 63564200, + "step": 94280 + }, + { + "epoch": 2.3033982361419882, + "grad_norm": 0.009357582777738571, + "learning_rate": 1.3067809217903807e-06, + "loss": 0.0003, + "num_input_tokens_seen": 63567400, + "step": 94285 + }, + { + "epoch": 2.3035203869738354, + "grad_norm": 69.97030639648438, + "learning_rate": 1.3066997551480215e-06, + "loss": 0.0739, + "num_input_tokens_seen": 63571624, + "step": 94290 + }, + { + "epoch": 2.3036425378056826, + "grad_norm": 0.189670592546463, + "learning_rate": 1.3066185862752592e-06, + "loss": 0.0889, + "num_input_tokens_seen": 63575272, + "step": 94295 + }, + { + "epoch": 2.30376468863753, + "grad_norm": 0.019184645265340805, + "learning_rate": 1.3065374151726842e-06, + "loss": 0.0001, + "num_input_tokens_seen": 63578664, + "step": 94300 + }, + { + "epoch": 2.303886839469377, + "grad_norm": 35.09974670410156, + "learning_rate": 1.3064562418408863e-06, + "loss": 0.1545, + "num_input_tokens_seen": 63581992, + "step": 94305 + }, + { + "epoch": 2.3040089903012237, + "grad_norm": 0.017247380688786507, + "learning_rate": 1.3063750662804567e-06, + "loss": 0.0004, + "num_input_tokens_seen": 63585512, + "step": 94310 + }, + { + "epoch": 2.304131141133071, + "grad_norm": 0.01316153910011053, + "learning_rate": 1.3062938884919844e-06, + "loss": 0.0003, + "num_input_tokens_seen": 63588712, + "step": 94315 + }, + { + "epoch": 2.304253291964918, + "grad_norm": 0.0003070076054427773, + "learning_rate": 1.3062127084760613e-06, + "loss": 0.0758, + "num_input_tokens_seen": 63592232, + "step": 94320 + }, + { + "epoch": 2.3043754427967653, + "grad_norm": 0.03631015121936798, + "learning_rate": 1.3061315262332768e-06, + "loss": 0.0004, + "num_input_tokens_seen": 63595944, + "step": 94325 + }, + { + "epoch": 2.3044975936286125, + "grad_norm": 0.006778942886739969, + "learning_rate": 1.3060503417642218e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63599400, + "step": 94330 + }, + { + "epoch": 2.3046197444604597, + "grad_norm": 0.02677575685083866, + "learning_rate": 1.3059691550694858e-06, + "loss": 0.0429, + "num_input_tokens_seen": 63603112, + "step": 94335 + }, + { + "epoch": 2.304741895292307, + "grad_norm": 0.07894811779260635, + "learning_rate": 1.3058879661496602e-06, + "loss": 0.0378, + "num_input_tokens_seen": 63606888, + "step": 94340 + }, + { + "epoch": 2.304864046124154, + "grad_norm": 0.45543399453163147, + "learning_rate": 1.305806775005335e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63609832, + "step": 94345 + }, + { + "epoch": 2.3049861969560013, + "grad_norm": 0.05380082130432129, + "learning_rate": 1.3057255816371007e-06, + "loss": 0.0001, + "num_input_tokens_seen": 63613096, + "step": 94350 + }, + { + "epoch": 2.3051083477878485, + "grad_norm": 0.11246181279420853, + "learning_rate": 1.3056443860455476e-06, + "loss": 0.0699, + "num_input_tokens_seen": 63616680, + "step": 94355 + }, + { + "epoch": 2.3052304986196956, + "grad_norm": 0.012971539981663227, + "learning_rate": 1.3055631882312664e-06, + "loss": 0.0706, + "num_input_tokens_seen": 63619752, + "step": 94360 + }, + { + "epoch": 2.305352649451543, + "grad_norm": 0.010960533283650875, + "learning_rate": 1.3054819881948476e-06, + "loss": 0.0004, + "num_input_tokens_seen": 63622952, + "step": 94365 + }, + { + "epoch": 2.30547480028339, + "grad_norm": 122.7162857055664, + "learning_rate": 1.3054007859368813e-06, + "loss": 0.0342, + "num_input_tokens_seen": 63626792, + "step": 94370 + }, + { + "epoch": 2.305596951115237, + "grad_norm": 0.2455373853445053, + "learning_rate": 1.3053195814579587e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63629992, + "step": 94375 + }, + { + "epoch": 2.3057191019470844, + "grad_norm": 22.135835647583008, + "learning_rate": 1.3052383747586697e-06, + "loss": 0.0437, + "num_input_tokens_seen": 63633128, + "step": 94380 + }, + { + "epoch": 2.3058412527789316, + "grad_norm": 0.011918697506189346, + "learning_rate": 1.3051571658396053e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63636392, + "step": 94385 + }, + { + "epoch": 2.305963403610779, + "grad_norm": 0.01846807263791561, + "learning_rate": 1.3050759547013558e-06, + "loss": 0.0, + "num_input_tokens_seen": 63639720, + "step": 94390 + }, + { + "epoch": 2.3060855544426255, + "grad_norm": 0.04838526248931885, + "learning_rate": 1.3049947413445123e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63642728, + "step": 94395 + }, + { + "epoch": 2.306207705274473, + "grad_norm": 0.020267801359295845, + "learning_rate": 1.3049135257696646e-06, + "loss": 0.0001, + "num_input_tokens_seen": 63645800, + "step": 94400 + }, + { + "epoch": 2.30632985610632, + "grad_norm": 0.009767679497599602, + "learning_rate": 1.304832307977404e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63649000, + "step": 94405 + }, + { + "epoch": 2.306452006938167, + "grad_norm": 0.09315386414527893, + "learning_rate": 1.3047510879683206e-06, + "loss": 0.0448, + "num_input_tokens_seen": 63652200, + "step": 94410 + }, + { + "epoch": 2.3065741577700143, + "grad_norm": 0.010149150155484676, + "learning_rate": 1.3046698657430053e-06, + "loss": 0.0001, + "num_input_tokens_seen": 63655976, + "step": 94415 + }, + { + "epoch": 2.3066963086018615, + "grad_norm": 45.44666290283203, + "learning_rate": 1.3045886413020491e-06, + "loss": 0.1456, + "num_input_tokens_seen": 63659048, + "step": 94420 + }, + { + "epoch": 2.3068184594337087, + "grad_norm": 0.021444687619805336, + "learning_rate": 1.304507414646042e-06, + "loss": 0.0004, + "num_input_tokens_seen": 63662248, + "step": 94425 + }, + { + "epoch": 2.306940610265556, + "grad_norm": 0.0023106755688786507, + "learning_rate": 1.3044261857755753e-06, + "loss": 0.0001, + "num_input_tokens_seen": 63665320, + "step": 94430 + }, + { + "epoch": 2.307062761097403, + "grad_norm": 0.004815931431949139, + "learning_rate": 1.3043449546912394e-06, + "loss": 0.0, + "num_input_tokens_seen": 63668968, + "step": 94435 + }, + { + "epoch": 2.3071849119292502, + "grad_norm": 0.04161781445145607, + "learning_rate": 1.3042637213936255e-06, + "loss": 0.1804, + "num_input_tokens_seen": 63672296, + "step": 94440 + }, + { + "epoch": 2.3073070627610974, + "grad_norm": 1.1624021530151367, + "learning_rate": 1.3041824858833235e-06, + "loss": 0.0012, + "num_input_tokens_seen": 63675304, + "step": 94445 + }, + { + "epoch": 2.3074292135929446, + "grad_norm": 0.016916943714022636, + "learning_rate": 1.3041012481609248e-06, + "loss": 0.0335, + "num_input_tokens_seen": 63678568, + "step": 94450 + }, + { + "epoch": 2.307551364424792, + "grad_norm": 53.67993927001953, + "learning_rate": 1.3040200082270202e-06, + "loss": 0.1145, + "num_input_tokens_seen": 63682088, + "step": 94455 + }, + { + "epoch": 2.307673515256639, + "grad_norm": 0.032480210065841675, + "learning_rate": 1.3039387660822e-06, + "loss": 0.0367, + "num_input_tokens_seen": 63685672, + "step": 94460 + }, + { + "epoch": 2.307795666088486, + "grad_norm": 1.5269211530685425, + "learning_rate": 1.3038575217270555e-06, + "loss": 0.0003, + "num_input_tokens_seen": 63689128, + "step": 94465 + }, + { + "epoch": 2.3079178169203334, + "grad_norm": 0.0025496436282992363, + "learning_rate": 1.3037762751621773e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63692520, + "step": 94470 + }, + { + "epoch": 2.3080399677521806, + "grad_norm": 0.010646814480423927, + "learning_rate": 1.3036950263881563e-06, + "loss": 0.0533, + "num_input_tokens_seen": 63695784, + "step": 94475 + }, + { + "epoch": 2.3081621185840273, + "grad_norm": 0.0811719000339508, + "learning_rate": 1.3036137754055835e-06, + "loss": 0.0001, + "num_input_tokens_seen": 63698984, + "step": 94480 + }, + { + "epoch": 2.308284269415875, + "grad_norm": 0.004129193257540464, + "learning_rate": 1.30353252221505e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63702376, + "step": 94485 + }, + { + "epoch": 2.3084064202477217, + "grad_norm": 0.18701672554016113, + "learning_rate": 1.3034512668171457e-06, + "loss": 0.0008, + "num_input_tokens_seen": 63705704, + "step": 94490 + }, + { + "epoch": 2.308528571079569, + "grad_norm": 195.37823486328125, + "learning_rate": 1.3033700092124626e-06, + "loss": 0.0522, + "num_input_tokens_seen": 63708904, + "step": 94495 + }, + { + "epoch": 2.308650721911416, + "grad_norm": 0.030486810952425003, + "learning_rate": 1.3032887494015913e-06, + "loss": 0.047, + "num_input_tokens_seen": 63712040, + "step": 94500 + }, + { + "epoch": 2.3087728727432633, + "grad_norm": 0.02747334912419319, + "learning_rate": 1.3032074873851224e-06, + "loss": 0.0001, + "num_input_tokens_seen": 63715368, + "step": 94505 + }, + { + "epoch": 2.3088950235751105, + "grad_norm": 0.035794202238321304, + "learning_rate": 1.303126223163647e-06, + "loss": 0.0001, + "num_input_tokens_seen": 63719016, + "step": 94510 + }, + { + "epoch": 2.3090171744069576, + "grad_norm": 19.372329711914062, + "learning_rate": 1.3030449567377565e-06, + "loss": 0.0573, + "num_input_tokens_seen": 63722344, + "step": 94515 + }, + { + "epoch": 2.309139325238805, + "grad_norm": 0.3616223633289337, + "learning_rate": 1.3029636881080412e-06, + "loss": 0.0007, + "num_input_tokens_seen": 63725480, + "step": 94520 + }, + { + "epoch": 2.309261476070652, + "grad_norm": 0.006535464432090521, + "learning_rate": 1.3028824172750927e-06, + "loss": 0.0003, + "num_input_tokens_seen": 63728488, + "step": 94525 + }, + { + "epoch": 2.309383626902499, + "grad_norm": 35.66829299926758, + "learning_rate": 1.3028011442395017e-06, + "loss": 0.0389, + "num_input_tokens_seen": 63732136, + "step": 94530 + }, + { + "epoch": 2.3095057777343464, + "grad_norm": 0.06885640323162079, + "learning_rate": 1.3027198690018592e-06, + "loss": 0.0534, + "num_input_tokens_seen": 63735528, + "step": 94535 + }, + { + "epoch": 2.3096279285661936, + "grad_norm": 0.009454714134335518, + "learning_rate": 1.3026385915627566e-06, + "loss": 0.0, + "num_input_tokens_seen": 63738984, + "step": 94540 + }, + { + "epoch": 2.309750079398041, + "grad_norm": 7.843992710113525, + "learning_rate": 1.3025573119227847e-06, + "loss": 0.0932, + "num_input_tokens_seen": 63742248, + "step": 94545 + }, + { + "epoch": 2.309872230229888, + "grad_norm": 163.3396453857422, + "learning_rate": 1.3024760300825348e-06, + "loss": 0.0247, + "num_input_tokens_seen": 63746024, + "step": 94550 + }, + { + "epoch": 2.309994381061735, + "grad_norm": 5.96230411529541, + "learning_rate": 1.302394746042598e-06, + "loss": 0.0825, + "num_input_tokens_seen": 63749096, + "step": 94555 + }, + { + "epoch": 2.3101165318935823, + "grad_norm": 0.0034930650144815445, + "learning_rate": 1.3023134598035647e-06, + "loss": 0.0284, + "num_input_tokens_seen": 63752616, + "step": 94560 + }, + { + "epoch": 2.3102386827254295, + "grad_norm": 0.0070127821527421474, + "learning_rate": 1.3022321713660268e-06, + "loss": 0.0539, + "num_input_tokens_seen": 63756136, + "step": 94565 + }, + { + "epoch": 2.3103608335572767, + "grad_norm": 0.0699462965130806, + "learning_rate": 1.3021508807305754e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63759720, + "step": 94570 + }, + { + "epoch": 2.3104829843891235, + "grad_norm": 0.01093141920864582, + "learning_rate": 1.3020695878978015e-06, + "loss": 0.0007, + "num_input_tokens_seen": 63763496, + "step": 94575 + }, + { + "epoch": 2.3106051352209707, + "grad_norm": 16.72857093811035, + "learning_rate": 1.3019882928682963e-06, + "loss": 0.0664, + "num_input_tokens_seen": 63766760, + "step": 94580 + }, + { + "epoch": 2.310727286052818, + "grad_norm": 0.03342173248529434, + "learning_rate": 1.3019069956426511e-06, + "loss": 0.0004, + "num_input_tokens_seen": 63770024, + "step": 94585 + }, + { + "epoch": 2.310849436884665, + "grad_norm": 0.12329500913619995, + "learning_rate": 1.301825696221457e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63773608, + "step": 94590 + }, + { + "epoch": 2.3109715877165122, + "grad_norm": 0.01329412218183279, + "learning_rate": 1.301744394605305e-06, + "loss": 0.0001, + "num_input_tokens_seen": 63776936, + "step": 94595 + }, + { + "epoch": 2.3110937385483594, + "grad_norm": 0.04466555267572403, + "learning_rate": 1.3016630907947868e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63780072, + "step": 94600 + }, + { + "epoch": 2.3112158893802066, + "grad_norm": 101.69499969482422, + "learning_rate": 1.3015817847904934e-06, + "loss": 0.2443, + "num_input_tokens_seen": 63783208, + "step": 94605 + }, + { + "epoch": 2.311338040212054, + "grad_norm": 0.30019229650497437, + "learning_rate": 1.3015004765930164e-06, + "loss": 0.1325, + "num_input_tokens_seen": 63786536, + "step": 94610 + }, + { + "epoch": 2.311460191043901, + "grad_norm": 0.07922214269638062, + "learning_rate": 1.3014191662029466e-06, + "loss": 0.0448, + "num_input_tokens_seen": 63790184, + "step": 94615 + }, + { + "epoch": 2.311582341875748, + "grad_norm": 0.13556693494319916, + "learning_rate": 1.3013378536208757e-06, + "loss": 0.0543, + "num_input_tokens_seen": 63793512, + "step": 94620 + }, + { + "epoch": 2.3117044927075954, + "grad_norm": 0.1541840136051178, + "learning_rate": 1.3012565388473947e-06, + "loss": 0.0007, + "num_input_tokens_seen": 63796712, + "step": 94625 + }, + { + "epoch": 2.3118266435394426, + "grad_norm": 0.04244079068303108, + "learning_rate": 1.301175221883095e-06, + "loss": 0.0001, + "num_input_tokens_seen": 63799592, + "step": 94630 + }, + { + "epoch": 2.3119487943712898, + "grad_norm": 0.11242552846670151, + "learning_rate": 1.3010939027285684e-06, + "loss": 0.0326, + "num_input_tokens_seen": 63803304, + "step": 94635 + }, + { + "epoch": 2.312070945203137, + "grad_norm": 0.00623678881675005, + "learning_rate": 1.3010125813844057e-06, + "loss": 0.0001, + "num_input_tokens_seen": 63806568, + "step": 94640 + }, + { + "epoch": 2.312193096034984, + "grad_norm": 0.01181286945939064, + "learning_rate": 1.3009312578511988e-06, + "loss": 0.0019, + "num_input_tokens_seen": 63810024, + "step": 94645 + }, + { + "epoch": 2.3123152468668313, + "grad_norm": 589.23291015625, + "learning_rate": 1.3008499321295388e-06, + "loss": 0.2024, + "num_input_tokens_seen": 63813736, + "step": 94650 + }, + { + "epoch": 2.3124373976986785, + "grad_norm": 0.0026681246235966682, + "learning_rate": 1.300768604220017e-06, + "loss": 0.0003, + "num_input_tokens_seen": 63817320, + "step": 94655 + }, + { + "epoch": 2.3125595485305253, + "grad_norm": 0.02497980371117592, + "learning_rate": 1.3006872741232252e-06, + "loss": 0.0001, + "num_input_tokens_seen": 63821096, + "step": 94660 + }, + { + "epoch": 2.312681699362373, + "grad_norm": 0.028597678989171982, + "learning_rate": 1.3006059418397545e-06, + "loss": 0.0955, + "num_input_tokens_seen": 63825000, + "step": 94665 + }, + { + "epoch": 2.3128038501942196, + "grad_norm": 42.822933197021484, + "learning_rate": 1.3005246073701965e-06, + "loss": 0.0467, + "num_input_tokens_seen": 63828712, + "step": 94670 + }, + { + "epoch": 2.312926001026067, + "grad_norm": 0.1000511422753334, + "learning_rate": 1.3004432707151428e-06, + "loss": 0.0992, + "num_input_tokens_seen": 63832360, + "step": 94675 + }, + { + "epoch": 2.313048151857914, + "grad_norm": 1.0598971843719482, + "learning_rate": 1.300361931875185e-06, + "loss": 0.0536, + "num_input_tokens_seen": 63835944, + "step": 94680 + }, + { + "epoch": 2.313170302689761, + "grad_norm": 0.026618486270308495, + "learning_rate": 1.3002805908509144e-06, + "loss": 0.0001, + "num_input_tokens_seen": 63839592, + "step": 94685 + }, + { + "epoch": 2.3132924535216084, + "grad_norm": 0.10733477026224136, + "learning_rate": 1.3001992476429221e-06, + "loss": 0.0001, + "num_input_tokens_seen": 63842792, + "step": 94690 + }, + { + "epoch": 2.3134146043534556, + "grad_norm": 0.0665910467505455, + "learning_rate": 1.3001179022518006e-06, + "loss": 0.0005, + "num_input_tokens_seen": 63845736, + "step": 94695 + }, + { + "epoch": 2.3135367551853028, + "grad_norm": 0.20579425990581512, + "learning_rate": 1.300036554678141e-06, + "loss": 0.0839, + "num_input_tokens_seen": 63849128, + "step": 94700 + }, + { + "epoch": 2.31365890601715, + "grad_norm": 0.03927788883447647, + "learning_rate": 1.299955204922535e-06, + "loss": 0.0553, + "num_input_tokens_seen": 63852200, + "step": 94705 + }, + { + "epoch": 2.313781056848997, + "grad_norm": 0.08418112993240356, + "learning_rate": 1.299873852985574e-06, + "loss": 0.0305, + "num_input_tokens_seen": 63855784, + "step": 94710 + }, + { + "epoch": 2.3139032076808443, + "grad_norm": 0.014477049000561237, + "learning_rate": 1.2997924988678496e-06, + "loss": 0.0003, + "num_input_tokens_seen": 63858920, + "step": 94715 + }, + { + "epoch": 2.3140253585126915, + "grad_norm": 0.3408757150173187, + "learning_rate": 1.2997111425699532e-06, + "loss": 0.0399, + "num_input_tokens_seen": 63862056, + "step": 94720 + }, + { + "epoch": 2.3141475093445387, + "grad_norm": 0.4299645721912384, + "learning_rate": 1.299629784092477e-06, + "loss": 0.0012, + "num_input_tokens_seen": 63865576, + "step": 94725 + }, + { + "epoch": 2.314269660176386, + "grad_norm": 7.6998701095581055, + "learning_rate": 1.2995484234360123e-06, + "loss": 0.0367, + "num_input_tokens_seen": 63868968, + "step": 94730 + }, + { + "epoch": 2.314391811008233, + "grad_norm": 45.46992111206055, + "learning_rate": 1.299467060601151e-06, + "loss": 0.047, + "num_input_tokens_seen": 63872616, + "step": 94735 + }, + { + "epoch": 2.3145139618400803, + "grad_norm": 0.43784013390541077, + "learning_rate": 1.299385695588485e-06, + "loss": 0.0569, + "num_input_tokens_seen": 63876200, + "step": 94740 + }, + { + "epoch": 2.3146361126719275, + "grad_norm": 0.0034390855580568314, + "learning_rate": 1.2993043283986055e-06, + "loss": 0.0926, + "num_input_tokens_seen": 63879784, + "step": 94745 + }, + { + "epoch": 2.3147582635037747, + "grad_norm": 0.01996810920536518, + "learning_rate": 1.2992229590321042e-06, + "loss": 0.0635, + "num_input_tokens_seen": 63882984, + "step": 94750 + }, + { + "epoch": 2.3148804143356214, + "grad_norm": 277.53094482421875, + "learning_rate": 1.299141587489573e-06, + "loss": 0.0324, + "num_input_tokens_seen": 63886312, + "step": 94755 + }, + { + "epoch": 2.3150025651674686, + "grad_norm": 0.7463566064834595, + "learning_rate": 1.299060213771604e-06, + "loss": 0.0005, + "num_input_tokens_seen": 63889256, + "step": 94760 + }, + { + "epoch": 2.315124715999316, + "grad_norm": 0.27979183197021484, + "learning_rate": 1.2989788378787886e-06, + "loss": 0.0006, + "num_input_tokens_seen": 63892264, + "step": 94765 + }, + { + "epoch": 2.315246866831163, + "grad_norm": 0.1391063630580902, + "learning_rate": 1.2988974598117188e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63895528, + "step": 94770 + }, + { + "epoch": 2.31536901766301, + "grad_norm": 0.08584363013505936, + "learning_rate": 1.2988160795709861e-06, + "loss": 0.0003, + "num_input_tokens_seen": 63898984, + "step": 94775 + }, + { + "epoch": 2.3154911684948574, + "grad_norm": 0.18024323880672455, + "learning_rate": 1.2987346971571823e-06, + "loss": 0.0001, + "num_input_tokens_seen": 63902376, + "step": 94780 + }, + { + "epoch": 2.3156133193267046, + "grad_norm": 0.012486801482737064, + "learning_rate": 1.2986533125708998e-06, + "loss": 0.0693, + "num_input_tokens_seen": 63905704, + "step": 94785 + }, + { + "epoch": 2.3157354701585517, + "grad_norm": 0.04421261325478554, + "learning_rate": 1.2985719258127299e-06, + "loss": 0.0716, + "num_input_tokens_seen": 63908648, + "step": 94790 + }, + { + "epoch": 2.315857620990399, + "grad_norm": 0.010590645484626293, + "learning_rate": 1.298490536883265e-06, + "loss": 0.0001, + "num_input_tokens_seen": 63911912, + "step": 94795 + }, + { + "epoch": 2.315979771822246, + "grad_norm": 14.425555229187012, + "learning_rate": 1.2984091457830961e-06, + "loss": 0.0525, + "num_input_tokens_seen": 63916008, + "step": 94800 + }, + { + "epoch": 2.3161019226540933, + "grad_norm": 0.01041900459676981, + "learning_rate": 1.298327752512816e-06, + "loss": 0.0595, + "num_input_tokens_seen": 63919208, + "step": 94805 + }, + { + "epoch": 2.3162240734859405, + "grad_norm": 0.006424942519515753, + "learning_rate": 1.298246357073016e-06, + "loss": 0.0679, + "num_input_tokens_seen": 63922920, + "step": 94810 + }, + { + "epoch": 2.3163462243177877, + "grad_norm": 72.95399475097656, + "learning_rate": 1.2981649594642886e-06, + "loss": 0.0776, + "num_input_tokens_seen": 63926248, + "step": 94815 + }, + { + "epoch": 2.316468375149635, + "grad_norm": 0.033855266869068146, + "learning_rate": 1.298083559687225e-06, + "loss": 0.0362, + "num_input_tokens_seen": 63929384, + "step": 94820 + }, + { + "epoch": 2.316590525981482, + "grad_norm": 0.008695218712091446, + "learning_rate": 1.2980021577424178e-06, + "loss": 0.0431, + "num_input_tokens_seen": 63932520, + "step": 94825 + }, + { + "epoch": 2.3167126768133293, + "grad_norm": 0.09830284863710403, + "learning_rate": 1.2979207536304588e-06, + "loss": 0.0002, + "num_input_tokens_seen": 63935720, + "step": 94830 + }, + { + "epoch": 2.3168348276451765, + "grad_norm": 17.72136688232422, + "learning_rate": 1.29783934735194e-06, + "loss": 0.1531, + "num_input_tokens_seen": 63939368, + "step": 94835 + }, + { + "epoch": 2.316956978477023, + "grad_norm": 0.014057500287890434, + "learning_rate": 1.2977579389074533e-06, + "loss": 0.0001, + "num_input_tokens_seen": 63943016, + "step": 94840 + }, + { + "epoch": 2.317079129308871, + "grad_norm": 81.38212585449219, + "learning_rate": 1.2976765282975905e-06, + "loss": 0.1494, + "num_input_tokens_seen": 63946408, + "step": 94845 + }, + { + "epoch": 2.3172012801407176, + "grad_norm": 0.003075582906603813, + "learning_rate": 1.297595115522944e-06, + "loss": 0.001, + "num_input_tokens_seen": 63949608, + "step": 94850 + }, + { + "epoch": 2.3173234309725648, + "grad_norm": 0.08976204693317413, + "learning_rate": 1.297513700584106e-06, + "loss": 0.0007, + "num_input_tokens_seen": 63953128, + "step": 94855 + }, + { + "epoch": 2.317445581804412, + "grad_norm": 0.11242163181304932, + "learning_rate": 1.2974322834816684e-06, + "loss": 0.001, + "num_input_tokens_seen": 63956136, + "step": 94860 + }, + { + "epoch": 2.317567732636259, + "grad_norm": 0.008920049294829369, + "learning_rate": 1.2973508642162233e-06, + "loss": 0.0416, + "num_input_tokens_seen": 63960040, + "step": 94865 + }, + { + "epoch": 2.3176898834681063, + "grad_norm": 0.14441661536693573, + "learning_rate": 1.2972694427883625e-06, + "loss": 0.0003, + "num_input_tokens_seen": 63963304, + "step": 94870 + }, + { + "epoch": 2.3178120342999535, + "grad_norm": 0.06034664064645767, + "learning_rate": 1.2971880191986785e-06, + "loss": 0.0003, + "num_input_tokens_seen": 63966632, + "step": 94875 + }, + { + "epoch": 2.3179341851318007, + "grad_norm": 0.16961410641670227, + "learning_rate": 1.297106593447763e-06, + "loss": 0.0303, + "num_input_tokens_seen": 63970088, + "step": 94880 + }, + { + "epoch": 2.318056335963648, + "grad_norm": 0.46061891317367554, + "learning_rate": 1.2970251655362086e-06, + "loss": 0.0005, + "num_input_tokens_seen": 63973352, + "step": 94885 + }, + { + "epoch": 2.318178486795495, + "grad_norm": 0.13485822081565857, + "learning_rate": 1.2969437354646073e-06, + "loss": 0.0743, + "num_input_tokens_seen": 63976552, + "step": 94890 + }, + { + "epoch": 2.3183006376273423, + "grad_norm": 0.03163136541843414, + "learning_rate": 1.2968623032335515e-06, + "loss": 0.0535, + "num_input_tokens_seen": 63979752, + "step": 94895 + }, + { + "epoch": 2.3184227884591895, + "grad_norm": 0.06518939882516861, + "learning_rate": 1.296780868843633e-06, + "loss": 0.0007, + "num_input_tokens_seen": 63983720, + "step": 94900 + }, + { + "epoch": 2.3185449392910367, + "grad_norm": 1.7363409996032715, + "learning_rate": 1.2966994322954439e-06, + "loss": 0.0007, + "num_input_tokens_seen": 63987368, + "step": 94905 + }, + { + "epoch": 2.318667090122884, + "grad_norm": 0.023380771279335022, + "learning_rate": 1.2966179935895774e-06, + "loss": 0.0444, + "num_input_tokens_seen": 63990568, + "step": 94910 + }, + { + "epoch": 2.318789240954731, + "grad_norm": 0.09954951703548431, + "learning_rate": 1.2965365527266245e-06, + "loss": 0.1065, + "num_input_tokens_seen": 63993704, + "step": 94915 + }, + { + "epoch": 2.3189113917865782, + "grad_norm": 0.03531714528799057, + "learning_rate": 1.2964551097071784e-06, + "loss": 0.0003, + "num_input_tokens_seen": 63997160, + "step": 94920 + }, + { + "epoch": 2.3190335426184254, + "grad_norm": 0.3534320294857025, + "learning_rate": 1.2963736645318308e-06, + "loss": 0.0519, + "num_input_tokens_seen": 64000488, + "step": 94925 + }, + { + "epoch": 2.3191556934502726, + "grad_norm": 0.08531579375267029, + "learning_rate": 1.2962922172011744e-06, + "loss": 0.0711, + "num_input_tokens_seen": 64004008, + "step": 94930 + }, + { + "epoch": 2.3192778442821194, + "grad_norm": 0.012248575687408447, + "learning_rate": 1.296210767715801e-06, + "loss": 0.0287, + "num_input_tokens_seen": 64007528, + "step": 94935 + }, + { + "epoch": 2.3193999951139666, + "grad_norm": 0.025879565626382828, + "learning_rate": 1.296129316076303e-06, + "loss": 0.049, + "num_input_tokens_seen": 64011112, + "step": 94940 + }, + { + "epoch": 2.3195221459458137, + "grad_norm": 0.04887163266539574, + "learning_rate": 1.2960478622832736e-06, + "loss": 0.0318, + "num_input_tokens_seen": 64014440, + "step": 94945 + }, + { + "epoch": 2.319644296777661, + "grad_norm": 0.005958162248134613, + "learning_rate": 1.295966406337304e-06, + "loss": 0.0549, + "num_input_tokens_seen": 64018024, + "step": 94950 + }, + { + "epoch": 2.319766447609508, + "grad_norm": 0.026194145902991295, + "learning_rate": 1.2958849482389876e-06, + "loss": 0.0363, + "num_input_tokens_seen": 64021352, + "step": 94955 + }, + { + "epoch": 2.3198885984413553, + "grad_norm": 0.0223369337618351, + "learning_rate": 1.2958034879889158e-06, + "loss": 0.0001, + "num_input_tokens_seen": 64024552, + "step": 94960 + }, + { + "epoch": 2.3200107492732025, + "grad_norm": 0.022756468504667282, + "learning_rate": 1.295722025587682e-06, + "loss": 0.0935, + "num_input_tokens_seen": 64027944, + "step": 94965 + }, + { + "epoch": 2.3201329001050497, + "grad_norm": 0.5314485430717468, + "learning_rate": 1.2956405610358776e-06, + "loss": 0.0005, + "num_input_tokens_seen": 64031528, + "step": 94970 + }, + { + "epoch": 2.320255050936897, + "grad_norm": 0.2667846977710724, + "learning_rate": 1.2955590943340956e-06, + "loss": 0.0001, + "num_input_tokens_seen": 64034728, + "step": 94975 + }, + { + "epoch": 2.320377201768744, + "grad_norm": 0.019529344514012337, + "learning_rate": 1.2954776254829288e-06, + "loss": 0.0003, + "num_input_tokens_seen": 64037864, + "step": 94980 + }, + { + "epoch": 2.3204993526005913, + "grad_norm": 0.005352795589715242, + "learning_rate": 1.2953961544829688e-06, + "loss": 0.141, + "num_input_tokens_seen": 64041576, + "step": 94985 + }, + { + "epoch": 2.3206215034324384, + "grad_norm": 0.08801307529211044, + "learning_rate": 1.2953146813348085e-06, + "loss": 0.0004, + "num_input_tokens_seen": 64044584, + "step": 94990 + }, + { + "epoch": 2.3207436542642856, + "grad_norm": 0.14798977971076965, + "learning_rate": 1.2952332060390405e-06, + "loss": 0.0626, + "num_input_tokens_seen": 64047848, + "step": 94995 + }, + { + "epoch": 2.320865805096133, + "grad_norm": 20.214397430419922, + "learning_rate": 1.2951517285962571e-06, + "loss": 0.0465, + "num_input_tokens_seen": 64050856, + "step": 95000 + }, + { + "epoch": 2.32098795592798, + "grad_norm": 0.031365349888801575, + "learning_rate": 1.2950702490070514e-06, + "loss": 0.0296, + "num_input_tokens_seen": 64054312, + "step": 95005 + }, + { + "epoch": 2.321110106759827, + "grad_norm": 0.16067151725292206, + "learning_rate": 1.294988767272015e-06, + "loss": 0.0689, + "num_input_tokens_seen": 64057640, + "step": 95010 + }, + { + "epoch": 2.3212322575916744, + "grad_norm": 0.003375637112185359, + "learning_rate": 1.2949072833917414e-06, + "loss": 0.0004, + "num_input_tokens_seen": 64061160, + "step": 95015 + }, + { + "epoch": 2.321354408423521, + "grad_norm": 0.04902317747473717, + "learning_rate": 1.2948257973668224e-06, + "loss": 0.0004, + "num_input_tokens_seen": 64064296, + "step": 95020 + }, + { + "epoch": 2.3214765592553688, + "grad_norm": 0.186828151345253, + "learning_rate": 1.294744309197851e-06, + "loss": 0.0002, + "num_input_tokens_seen": 64067688, + "step": 95025 + }, + { + "epoch": 2.3215987100872155, + "grad_norm": 0.013217689469456673, + "learning_rate": 1.294662818885419e-06, + "loss": 0.0288, + "num_input_tokens_seen": 64070696, + "step": 95030 + }, + { + "epoch": 2.3217208609190627, + "grad_norm": 0.012650329619646072, + "learning_rate": 1.2945813264301207e-06, + "loss": 0.0002, + "num_input_tokens_seen": 64073704, + "step": 95035 + }, + { + "epoch": 2.32184301175091, + "grad_norm": 0.08450979739427567, + "learning_rate": 1.2944998318325474e-06, + "loss": 0.0003, + "num_input_tokens_seen": 64077160, + "step": 95040 + }, + { + "epoch": 2.321965162582757, + "grad_norm": 0.042767226696014404, + "learning_rate": 1.2944183350932922e-06, + "loss": 0.0002, + "num_input_tokens_seen": 64080552, + "step": 95045 + }, + { + "epoch": 2.3220873134146043, + "grad_norm": 0.0025647578295320272, + "learning_rate": 1.2943368362129477e-06, + "loss": 0.0017, + "num_input_tokens_seen": 64084648, + "step": 95050 + }, + { + "epoch": 2.3222094642464515, + "grad_norm": 19.454233169555664, + "learning_rate": 1.2942553351921063e-06, + "loss": 0.047, + "num_input_tokens_seen": 64088040, + "step": 95055 + }, + { + "epoch": 2.3223316150782987, + "grad_norm": 0.5485376119613647, + "learning_rate": 1.294173832031361e-06, + "loss": 0.0577, + "num_input_tokens_seen": 64091752, + "step": 95060 + }, + { + "epoch": 2.322453765910146, + "grad_norm": 0.5301234126091003, + "learning_rate": 1.2940923267313049e-06, + "loss": 0.0907, + "num_input_tokens_seen": 64094824, + "step": 95065 + }, + { + "epoch": 2.322575916741993, + "grad_norm": 0.02889396995306015, + "learning_rate": 1.2940108192925298e-06, + "loss": 0.0004, + "num_input_tokens_seen": 64098088, + "step": 95070 + }, + { + "epoch": 2.3226980675738402, + "grad_norm": 0.06749209761619568, + "learning_rate": 1.2939293097156295e-06, + "loss": 0.0001, + "num_input_tokens_seen": 64101800, + "step": 95075 + }, + { + "epoch": 2.3228202184056874, + "grad_norm": 34.30602264404297, + "learning_rate": 1.2938477980011958e-06, + "loss": 0.0591, + "num_input_tokens_seen": 64105320, + "step": 95080 + }, + { + "epoch": 2.3229423692375346, + "grad_norm": 0.2598949670791626, + "learning_rate": 1.2937662841498218e-06, + "loss": 0.1721, + "num_input_tokens_seen": 64108776, + "step": 95085 + }, + { + "epoch": 2.323064520069382, + "grad_norm": 0.3084917366504669, + "learning_rate": 1.2936847681621003e-06, + "loss": 0.0525, + "num_input_tokens_seen": 64112232, + "step": 95090 + }, + { + "epoch": 2.323186670901229, + "grad_norm": 0.2027728110551834, + "learning_rate": 1.2936032500386242e-06, + "loss": 0.06, + "num_input_tokens_seen": 64115368, + "step": 95095 + }, + { + "epoch": 2.323308821733076, + "grad_norm": 1.021079659461975, + "learning_rate": 1.2935217297799866e-06, + "loss": 0.0291, + "num_input_tokens_seen": 64119144, + "step": 95100 + }, + { + "epoch": 2.323430972564923, + "grad_norm": 0.01855880580842495, + "learning_rate": 1.2934402073867798e-06, + "loss": 0.0002, + "num_input_tokens_seen": 64123048, + "step": 95105 + }, + { + "epoch": 2.3235531233967706, + "grad_norm": 0.014141903258860111, + "learning_rate": 1.293358682859597e-06, + "loss": 0.0822, + "num_input_tokens_seen": 64126568, + "step": 95110 + }, + { + "epoch": 2.3236752742286173, + "grad_norm": 249.31118774414062, + "learning_rate": 1.293277156199031e-06, + "loss": 0.0745, + "num_input_tokens_seen": 64130216, + "step": 95115 + }, + { + "epoch": 2.3237974250604645, + "grad_norm": 0.007649078033864498, + "learning_rate": 1.2931956274056747e-06, + "loss": 0.055, + "num_input_tokens_seen": 64134120, + "step": 95120 + }, + { + "epoch": 2.3239195758923117, + "grad_norm": 0.0565241314470768, + "learning_rate": 1.2931140964801208e-06, + "loss": 0.1924, + "num_input_tokens_seen": 64137128, + "step": 95125 + }, + { + "epoch": 2.324041726724159, + "grad_norm": 0.04068436101078987, + "learning_rate": 1.293032563422962e-06, + "loss": 0.0003, + "num_input_tokens_seen": 64140648, + "step": 95130 + }, + { + "epoch": 2.324163877556006, + "grad_norm": 0.06296813488006592, + "learning_rate": 1.2929510282347922e-06, + "loss": 0.0003, + "num_input_tokens_seen": 64144104, + "step": 95135 + }, + { + "epoch": 2.3242860283878533, + "grad_norm": 677.3106079101562, + "learning_rate": 1.2928694909162036e-06, + "loss": 0.082, + "num_input_tokens_seen": 64147432, + "step": 95140 + }, + { + "epoch": 2.3244081792197004, + "grad_norm": 0.04508993402123451, + "learning_rate": 1.2927879514677891e-06, + "loss": 0.089, + "num_input_tokens_seen": 64150696, + "step": 95145 + }, + { + "epoch": 2.3245303300515476, + "grad_norm": 100.00861358642578, + "learning_rate": 1.2927064098901418e-06, + "loss": 0.0979, + "num_input_tokens_seen": 64154088, + "step": 95150 + }, + { + "epoch": 2.324652480883395, + "grad_norm": 0.8293306231498718, + "learning_rate": 1.2926248661838548e-06, + "loss": 0.0014, + "num_input_tokens_seen": 64157416, + "step": 95155 + }, + { + "epoch": 2.324774631715242, + "grad_norm": 0.0203672107309103, + "learning_rate": 1.2925433203495213e-06, + "loss": 0.0361, + "num_input_tokens_seen": 64160872, + "step": 95160 + }, + { + "epoch": 2.324896782547089, + "grad_norm": 0.14864440262317657, + "learning_rate": 1.2924617723877337e-06, + "loss": 0.0002, + "num_input_tokens_seen": 64164200, + "step": 95165 + }, + { + "epoch": 2.3250189333789364, + "grad_norm": 32.7774543762207, + "learning_rate": 1.292380222299086e-06, + "loss": 0.0714, + "num_input_tokens_seen": 64167784, + "step": 95170 + }, + { + "epoch": 2.3251410842107836, + "grad_norm": 0.17909160256385803, + "learning_rate": 1.2922986700841704e-06, + "loss": 0.0007, + "num_input_tokens_seen": 64171368, + "step": 95175 + }, + { + "epoch": 2.3252632350426308, + "grad_norm": 0.01783503033220768, + "learning_rate": 1.2922171157435802e-06, + "loss": 0.0005, + "num_input_tokens_seen": 64174696, + "step": 95180 + }, + { + "epoch": 2.325385385874478, + "grad_norm": 0.2182997614145279, + "learning_rate": 1.2921355592779083e-06, + "loss": 0.0627, + "num_input_tokens_seen": 64178280, + "step": 95185 + }, + { + "epoch": 2.325507536706325, + "grad_norm": 0.0018611556151881814, + "learning_rate": 1.2920540006877483e-06, + "loss": 0.0005, + "num_input_tokens_seen": 64182120, + "step": 95190 + }, + { + "epoch": 2.3256296875381723, + "grad_norm": 5.40140962600708, + "learning_rate": 1.2919724399736931e-06, + "loss": 0.0009, + "num_input_tokens_seen": 64185064, + "step": 95195 + }, + { + "epoch": 2.325751838370019, + "grad_norm": 0.4457661807537079, + "learning_rate": 1.2918908771363358e-06, + "loss": 0.0502, + "num_input_tokens_seen": 64188456, + "step": 95200 + }, + { + "epoch": 2.3258739892018663, + "grad_norm": 0.22518938779830933, + "learning_rate": 1.2918093121762694e-06, + "loss": 0.0002, + "num_input_tokens_seen": 64191720, + "step": 95205 + }, + { + "epoch": 2.3259961400337135, + "grad_norm": 0.08013353496789932, + "learning_rate": 1.2917277450940874e-06, + "loss": 0.0004, + "num_input_tokens_seen": 64195048, + "step": 95210 + }, + { + "epoch": 2.3261182908655607, + "grad_norm": 0.12415852397680283, + "learning_rate": 1.2916461758903823e-06, + "loss": 0.0004, + "num_input_tokens_seen": 64198440, + "step": 95215 + }, + { + "epoch": 2.326240441697408, + "grad_norm": 0.011967546306550503, + "learning_rate": 1.2915646045657483e-06, + "loss": 0.049, + "num_input_tokens_seen": 64202088, + "step": 95220 + }, + { + "epoch": 2.326362592529255, + "grad_norm": 0.09109295904636383, + "learning_rate": 1.2914830311207778e-06, + "loss": 0.0608, + "num_input_tokens_seen": 64205096, + "step": 95225 + }, + { + "epoch": 2.3264847433611022, + "grad_norm": 0.07817357033491135, + "learning_rate": 1.2914014555560643e-06, + "loss": 0.0003, + "num_input_tokens_seen": 64208232, + "step": 95230 + }, + { + "epoch": 2.3266068941929494, + "grad_norm": 44.14269256591797, + "learning_rate": 1.2913198778722013e-06, + "loss": 0.0907, + "num_input_tokens_seen": 64211624, + "step": 95235 + }, + { + "epoch": 2.3267290450247966, + "grad_norm": 0.034214459359645844, + "learning_rate": 1.2912382980697816e-06, + "loss": 0.0001, + "num_input_tokens_seen": 64214760, + "step": 95240 + }, + { + "epoch": 2.326851195856644, + "grad_norm": 19.30805206298828, + "learning_rate": 1.2911567161493985e-06, + "loss": 0.0725, + "num_input_tokens_seen": 64217960, + "step": 95245 + }, + { + "epoch": 2.326973346688491, + "grad_norm": 0.19850492477416992, + "learning_rate": 1.2910751321116455e-06, + "loss": 0.0575, + "num_input_tokens_seen": 64221288, + "step": 95250 + }, + { + "epoch": 2.327095497520338, + "grad_norm": 0.016696002334356308, + "learning_rate": 1.2909935459571159e-06, + "loss": 0.0007, + "num_input_tokens_seen": 64225000, + "step": 95255 + }, + { + "epoch": 2.3272176483521854, + "grad_norm": 17.366100311279297, + "learning_rate": 1.290911957686403e-06, + "loss": 0.0886, + "num_input_tokens_seen": 64228328, + "step": 95260 + }, + { + "epoch": 2.3273397991840326, + "grad_norm": 0.14225991070270538, + "learning_rate": 1.2908303673001e-06, + "loss": 0.1204, + "num_input_tokens_seen": 64231912, + "step": 95265 + }, + { + "epoch": 2.3274619500158797, + "grad_norm": 0.05388676002621651, + "learning_rate": 1.2907487747988007e-06, + "loss": 0.0006, + "num_input_tokens_seen": 64235112, + "step": 95270 + }, + { + "epoch": 2.327584100847727, + "grad_norm": 0.008519626222550869, + "learning_rate": 1.2906671801830978e-06, + "loss": 0.0002, + "num_input_tokens_seen": 64238824, + "step": 95275 + }, + { + "epoch": 2.327706251679574, + "grad_norm": 23.24995231628418, + "learning_rate": 1.290585583453585e-06, + "loss": 0.0632, + "num_input_tokens_seen": 64241896, + "step": 95280 + }, + { + "epoch": 2.327828402511421, + "grad_norm": 0.05381803587079048, + "learning_rate": 1.2905039846108558e-06, + "loss": 0.044, + "num_input_tokens_seen": 64245352, + "step": 95285 + }, + { + "epoch": 2.3279505533432685, + "grad_norm": 39.409934997558594, + "learning_rate": 1.2904223836555035e-06, + "loss": 0.044, + "num_input_tokens_seen": 64248872, + "step": 95290 + }, + { + "epoch": 2.3280727041751152, + "grad_norm": 0.16154932975769043, + "learning_rate": 1.2903407805881215e-06, + "loss": 0.0008, + "num_input_tokens_seen": 64252584, + "step": 95295 + }, + { + "epoch": 2.3281948550069624, + "grad_norm": 0.07096408307552338, + "learning_rate": 1.290259175409303e-06, + "loss": 0.0401, + "num_input_tokens_seen": 64255656, + "step": 95300 + }, + { + "epoch": 2.3283170058388096, + "grad_norm": 0.02677714265882969, + "learning_rate": 1.290177568119642e-06, + "loss": 0.0003, + "num_input_tokens_seen": 64259240, + "step": 95305 + }, + { + "epoch": 2.328439156670657, + "grad_norm": 0.06973837316036224, + "learning_rate": 1.2900959587197314e-06, + "loss": 0.0001, + "num_input_tokens_seen": 64262952, + "step": 95310 + }, + { + "epoch": 2.328561307502504, + "grad_norm": 0.02655634470283985, + "learning_rate": 1.2900143472101652e-06, + "loss": 0.0008, + "num_input_tokens_seen": 64266024, + "step": 95315 + }, + { + "epoch": 2.328683458334351, + "grad_norm": 0.021072372794151306, + "learning_rate": 1.2899327335915364e-06, + "loss": 0.0002, + "num_input_tokens_seen": 64269288, + "step": 95320 + }, + { + "epoch": 2.3288056091661984, + "grad_norm": 0.011516228318214417, + "learning_rate": 1.2898511178644394e-06, + "loss": 0.0527, + "num_input_tokens_seen": 64272488, + "step": 95325 + }, + { + "epoch": 2.3289277599980456, + "grad_norm": 0.009769659489393234, + "learning_rate": 1.2897695000294666e-06, + "loss": 0.0001, + "num_input_tokens_seen": 64276008, + "step": 95330 + }, + { + "epoch": 2.3290499108298928, + "grad_norm": 0.003023561555892229, + "learning_rate": 1.2896878800872122e-06, + "loss": 0.0, + "num_input_tokens_seen": 64279208, + "step": 95335 + }, + { + "epoch": 2.32917206166174, + "grad_norm": 0.006055025849491358, + "learning_rate": 1.2896062580382693e-06, + "loss": 0.0003, + "num_input_tokens_seen": 64282088, + "step": 95340 + }, + { + "epoch": 2.329294212493587, + "grad_norm": 0.8509352803230286, + "learning_rate": 1.289524633883232e-06, + "loss": 0.0003, + "num_input_tokens_seen": 64285864, + "step": 95345 + }, + { + "epoch": 2.3294163633254343, + "grad_norm": 12.91662883758545, + "learning_rate": 1.2894430076226939e-06, + "loss": 0.0603, + "num_input_tokens_seen": 64289256, + "step": 95350 + }, + { + "epoch": 2.3295385141572815, + "grad_norm": 0.005020641256123781, + "learning_rate": 1.2893613792572482e-06, + "loss": 0.0, + "num_input_tokens_seen": 64292776, + "step": 95355 + }, + { + "epoch": 2.3296606649891287, + "grad_norm": 0.010004975832998753, + "learning_rate": 1.2892797487874888e-06, + "loss": 0.0003, + "num_input_tokens_seen": 64295656, + "step": 95360 + }, + { + "epoch": 2.329782815820976, + "grad_norm": 0.0014859403017908335, + "learning_rate": 1.289198116214009e-06, + "loss": 0.0526, + "num_input_tokens_seen": 64298600, + "step": 95365 + }, + { + "epoch": 2.329904966652823, + "grad_norm": 0.16149680316448212, + "learning_rate": 1.289116481537403e-06, + "loss": 0.0571, + "num_input_tokens_seen": 64301864, + "step": 95370 + }, + { + "epoch": 2.3300271174846703, + "grad_norm": 212.5308837890625, + "learning_rate": 1.2890348447582642e-06, + "loss": 0.1041, + "num_input_tokens_seen": 64304552, + "step": 95375 + }, + { + "epoch": 2.330149268316517, + "grad_norm": 0.029706710949540138, + "learning_rate": 1.288953205877186e-06, + "loss": 0.0001, + "num_input_tokens_seen": 64307624, + "step": 95380 + }, + { + "epoch": 2.330271419148364, + "grad_norm": 32.41347122192383, + "learning_rate": 1.2888715648947629e-06, + "loss": 0.1376, + "num_input_tokens_seen": 64311016, + "step": 95385 + }, + { + "epoch": 2.3303935699802114, + "grad_norm": 0.09344623237848282, + "learning_rate": 1.2887899218115876e-06, + "loss": 0.0712, + "num_input_tokens_seen": 64314280, + "step": 95390 + }, + { + "epoch": 2.3305157208120586, + "grad_norm": 0.004723446909338236, + "learning_rate": 1.2887082766282545e-06, + "loss": 0.0002, + "num_input_tokens_seen": 64317416, + "step": 95395 + }, + { + "epoch": 2.330637871643906, + "grad_norm": 0.014271215535700321, + "learning_rate": 1.288626629345357e-06, + "loss": 0.0003, + "num_input_tokens_seen": 64320936, + "step": 95400 + }, + { + "epoch": 2.330760022475753, + "grad_norm": 0.02553671970963478, + "learning_rate": 1.2885449799634888e-06, + "loss": 0.0454, + "num_input_tokens_seen": 64324200, + "step": 95405 + }, + { + "epoch": 2.3308821733076, + "grad_norm": 0.14257365465164185, + "learning_rate": 1.2884633284832446e-06, + "loss": 0.1252, + "num_input_tokens_seen": 64327464, + "step": 95410 + }, + { + "epoch": 2.3310043241394474, + "grad_norm": 0.06047545745968819, + "learning_rate": 1.288381674905217e-06, + "loss": 0.1476, + "num_input_tokens_seen": 64330728, + "step": 95415 + }, + { + "epoch": 2.3311264749712945, + "grad_norm": 0.005019609350711107, + "learning_rate": 1.2883000192300003e-06, + "loss": 0.0612, + "num_input_tokens_seen": 64334120, + "step": 95420 + }, + { + "epoch": 2.3312486258031417, + "grad_norm": 0.009447050280869007, + "learning_rate": 1.2882183614581885e-06, + "loss": 0.0001, + "num_input_tokens_seen": 64337192, + "step": 95425 + }, + { + "epoch": 2.331370776634989, + "grad_norm": 0.034353192895650864, + "learning_rate": 1.2881367015903752e-06, + "loss": 0.001, + "num_input_tokens_seen": 64340328, + "step": 95430 + }, + { + "epoch": 2.331492927466836, + "grad_norm": 0.10627281665802002, + "learning_rate": 1.2880550396271543e-06, + "loss": 0.0007, + "num_input_tokens_seen": 64343400, + "step": 95435 + }, + { + "epoch": 2.3316150782986833, + "grad_norm": 0.005389807280153036, + "learning_rate": 1.2879733755691196e-06, + "loss": 0.0337, + "num_input_tokens_seen": 64346472, + "step": 95440 + }, + { + "epoch": 2.3317372291305305, + "grad_norm": 0.010320378467440605, + "learning_rate": 1.287891709416865e-06, + "loss": 0.0002, + "num_input_tokens_seen": 64350056, + "step": 95445 + }, + { + "epoch": 2.3318593799623777, + "grad_norm": 0.26166412234306335, + "learning_rate": 1.2878100411709847e-06, + "loss": 0.0443, + "num_input_tokens_seen": 64353128, + "step": 95450 + }, + { + "epoch": 2.331981530794225, + "grad_norm": 0.2837740480899811, + "learning_rate": 1.2877283708320724e-06, + "loss": 0.0003, + "num_input_tokens_seen": 64356392, + "step": 95455 + }, + { + "epoch": 2.332103681626072, + "grad_norm": 0.01635722443461418, + "learning_rate": 1.2876466984007217e-06, + "loss": 0.0379, + "num_input_tokens_seen": 64359592, + "step": 95460 + }, + { + "epoch": 2.332225832457919, + "grad_norm": 0.0667223259806633, + "learning_rate": 1.2875650238775268e-06, + "loss": 0.0756, + "num_input_tokens_seen": 64363240, + "step": 95465 + }, + { + "epoch": 2.3323479832897664, + "grad_norm": 0.02749939262866974, + "learning_rate": 1.2874833472630819e-06, + "loss": 0.0002, + "num_input_tokens_seen": 64367016, + "step": 95470 + }, + { + "epoch": 2.332470134121613, + "grad_norm": 0.019013844430446625, + "learning_rate": 1.2874016685579807e-06, + "loss": 0.0408, + "num_input_tokens_seen": 64370152, + "step": 95475 + }, + { + "epoch": 2.3325922849534604, + "grad_norm": 0.08791220188140869, + "learning_rate": 1.2873199877628177e-06, + "loss": 0.0529, + "num_input_tokens_seen": 64373672, + "step": 95480 + }, + { + "epoch": 2.3327144357853076, + "grad_norm": 0.15154674649238586, + "learning_rate": 1.2872383048781862e-06, + "loss": 0.0004, + "num_input_tokens_seen": 64376488, + "step": 95485 + }, + { + "epoch": 2.3328365866171548, + "grad_norm": 0.005082068033516407, + "learning_rate": 1.2871566199046801e-06, + "loss": 0.0001, + "num_input_tokens_seen": 64379944, + "step": 95490 + }, + { + "epoch": 2.332958737449002, + "grad_norm": 0.07538677006959915, + "learning_rate": 1.287074932842894e-06, + "loss": 0.0003, + "num_input_tokens_seen": 64382760, + "step": 95495 + }, + { + "epoch": 2.333080888280849, + "grad_norm": 0.051306627690792084, + "learning_rate": 1.286993243693422e-06, + "loss": 0.0001, + "num_input_tokens_seen": 64386024, + "step": 95500 + }, + { + "epoch": 2.3332030391126963, + "grad_norm": 0.0033834660425782204, + "learning_rate": 1.2869115524568577e-06, + "loss": 0.0001, + "num_input_tokens_seen": 64389224, + "step": 95505 + }, + { + "epoch": 2.3333251899445435, + "grad_norm": 0.002595925237983465, + "learning_rate": 1.2868298591337955e-06, + "loss": 0.1076, + "num_input_tokens_seen": 64392296, + "step": 95510 + }, + { + "epoch": 2.3334473407763907, + "grad_norm": 0.07373785972595215, + "learning_rate": 1.2867481637248294e-06, + "loss": 0.113, + "num_input_tokens_seen": 64395880, + "step": 95515 + }, + { + "epoch": 2.333569491608238, + "grad_norm": 0.0118758799508214, + "learning_rate": 1.2866664662305537e-06, + "loss": 0.0001, + "num_input_tokens_seen": 64399208, + "step": 95520 + }, + { + "epoch": 2.333691642440085, + "grad_norm": 1.4112813472747803, + "learning_rate": 1.2865847666515622e-06, + "loss": 0.0492, + "num_input_tokens_seen": 64402024, + "step": 95525 + }, + { + "epoch": 2.3338137932719323, + "grad_norm": 76.51634216308594, + "learning_rate": 1.2865030649884493e-06, + "loss": 0.0528, + "num_input_tokens_seen": 64405608, + "step": 95530 + }, + { + "epoch": 2.3339359441037795, + "grad_norm": 0.04707219451665878, + "learning_rate": 1.2864213612418088e-06, + "loss": 0.0001, + "num_input_tokens_seen": 64409128, + "step": 95535 + }, + { + "epoch": 2.3340580949356267, + "grad_norm": 73.23444366455078, + "learning_rate": 1.2863396554122355e-06, + "loss": 0.0845, + "num_input_tokens_seen": 64412776, + "step": 95540 + }, + { + "epoch": 2.334180245767474, + "grad_norm": 0.012980327010154724, + "learning_rate": 1.286257947500323e-06, + "loss": 0.1, + "num_input_tokens_seen": 64416104, + "step": 95545 + }, + { + "epoch": 2.3343023965993206, + "grad_norm": 0.0025070449337363243, + "learning_rate": 1.2861762375066658e-06, + "loss": 0.0377, + "num_input_tokens_seen": 64419240, + "step": 95550 + }, + { + "epoch": 2.3344245474311682, + "grad_norm": 0.1651676595211029, + "learning_rate": 1.2860945254318578e-06, + "loss": 0.0003, + "num_input_tokens_seen": 64422696, + "step": 95555 + }, + { + "epoch": 2.334546698263015, + "grad_norm": 0.9793582558631897, + "learning_rate": 1.2860128112764934e-06, + "loss": 0.0908, + "num_input_tokens_seen": 64425768, + "step": 95560 + }, + { + "epoch": 2.334668849094862, + "grad_norm": 0.0149913365021348, + "learning_rate": 1.2859310950411672e-06, + "loss": 0.0001, + "num_input_tokens_seen": 64429096, + "step": 95565 + }, + { + "epoch": 2.3347909999267094, + "grad_norm": 0.0045698219910264015, + "learning_rate": 1.285849376726473e-06, + "loss": 0.0002, + "num_input_tokens_seen": 64433064, + "step": 95570 + }, + { + "epoch": 2.3349131507585565, + "grad_norm": 0.029431438073515892, + "learning_rate": 1.2857676563330055e-06, + "loss": 0.0002, + "num_input_tokens_seen": 64436200, + "step": 95575 + }, + { + "epoch": 2.3350353015904037, + "grad_norm": 0.42357417941093445, + "learning_rate": 1.2856859338613585e-06, + "loss": 0.0737, + "num_input_tokens_seen": 64439656, + "step": 95580 + }, + { + "epoch": 2.335157452422251, + "grad_norm": 0.01732746884226799, + "learning_rate": 1.2856042093121267e-06, + "loss": 0.1409, + "num_input_tokens_seen": 64442792, + "step": 95585 + }, + { + "epoch": 2.335279603254098, + "grad_norm": 0.03916094824671745, + "learning_rate": 1.2855224826859045e-06, + "loss": 0.0001, + "num_input_tokens_seen": 64445992, + "step": 95590 + }, + { + "epoch": 2.3354017540859453, + "grad_norm": 0.006325630936771631, + "learning_rate": 1.2854407539832855e-06, + "loss": 0.0564, + "num_input_tokens_seen": 64449640, + "step": 95595 + }, + { + "epoch": 2.3355239049177925, + "grad_norm": 0.034851573407649994, + "learning_rate": 1.2853590232048648e-06, + "loss": 0.0468, + "num_input_tokens_seen": 64452648, + "step": 95600 + }, + { + "epoch": 2.3356460557496397, + "grad_norm": 47.87200164794922, + "learning_rate": 1.2852772903512366e-06, + "loss": 0.0495, + "num_input_tokens_seen": 64455784, + "step": 95605 + }, + { + "epoch": 2.335768206581487, + "grad_norm": 0.03328532353043556, + "learning_rate": 1.285195555422995e-06, + "loss": 0.0705, + "num_input_tokens_seen": 64458664, + "step": 95610 + }, + { + "epoch": 2.335890357413334, + "grad_norm": 0.1640070378780365, + "learning_rate": 1.2851138184207345e-06, + "loss": 0.0775, + "num_input_tokens_seen": 64462184, + "step": 95615 + }, + { + "epoch": 2.3360125082451813, + "grad_norm": 0.1048487052321434, + "learning_rate": 1.2850320793450497e-06, + "loss": 0.035, + "num_input_tokens_seen": 64465768, + "step": 95620 + }, + { + "epoch": 2.3361346590770284, + "grad_norm": 1.1335134506225586, + "learning_rate": 1.2849503381965354e-06, + "loss": 0.0754, + "num_input_tokens_seen": 64469096, + "step": 95625 + }, + { + "epoch": 2.3362568099088756, + "grad_norm": 0.07938817888498306, + "learning_rate": 1.2848685949757853e-06, + "loss": 0.0429, + "num_input_tokens_seen": 64472296, + "step": 95630 + }, + { + "epoch": 2.336378960740723, + "grad_norm": 0.04152943566441536, + "learning_rate": 1.2847868496833942e-06, + "loss": 0.0127, + "num_input_tokens_seen": 64475112, + "step": 95635 + }, + { + "epoch": 2.33650111157257, + "grad_norm": 0.021501827985048294, + "learning_rate": 1.2847051023199566e-06, + "loss": 0.0723, + "num_input_tokens_seen": 64478568, + "step": 95640 + }, + { + "epoch": 2.3366232624044168, + "grad_norm": 0.050651174038648605, + "learning_rate": 1.2846233528860667e-06, + "loss": 0.0002, + "num_input_tokens_seen": 64482088, + "step": 95645 + }, + { + "epoch": 2.336745413236264, + "grad_norm": 0.13994047045707703, + "learning_rate": 1.2845416013823195e-06, + "loss": 0.0469, + "num_input_tokens_seen": 64485288, + "step": 95650 + }, + { + "epoch": 2.336867564068111, + "grad_norm": 0.02746342495083809, + "learning_rate": 1.284459847809309e-06, + "loss": 0.0005, + "num_input_tokens_seen": 64488936, + "step": 95655 + }, + { + "epoch": 2.3369897148999583, + "grad_norm": 0.2462010383605957, + "learning_rate": 1.28437809216763e-06, + "loss": 0.0004, + "num_input_tokens_seen": 64492840, + "step": 95660 + }, + { + "epoch": 2.3371118657318055, + "grad_norm": 0.20011571049690247, + "learning_rate": 1.284296334457877e-06, + "loss": 0.0005, + "num_input_tokens_seen": 64496616, + "step": 95665 + }, + { + "epoch": 2.3372340165636527, + "grad_norm": 0.021411439403891563, + "learning_rate": 1.2842145746806448e-06, + "loss": 0.0029, + "num_input_tokens_seen": 64500008, + "step": 95670 + }, + { + "epoch": 2.3373561673955, + "grad_norm": 0.018261928111314774, + "learning_rate": 1.2841328128365275e-06, + "loss": 0.0001, + "num_input_tokens_seen": 64503720, + "step": 95675 + }, + { + "epoch": 2.337478318227347, + "grad_norm": 0.01709025539457798, + "learning_rate": 1.2840510489261202e-06, + "loss": 0.0002, + "num_input_tokens_seen": 64506792, + "step": 95680 + }, + { + "epoch": 2.3376004690591943, + "grad_norm": 0.031307484954595566, + "learning_rate": 1.2839692829500172e-06, + "loss": 0.0001, + "num_input_tokens_seen": 64509864, + "step": 95685 + }, + { + "epoch": 2.3377226198910415, + "grad_norm": 108.65628814697266, + "learning_rate": 1.2838875149088133e-06, + "loss": 0.034, + "num_input_tokens_seen": 64513000, + "step": 95690 + }, + { + "epoch": 2.3378447707228887, + "grad_norm": 0.01993979513645172, + "learning_rate": 1.2838057448031028e-06, + "loss": 0.0001, + "num_input_tokens_seen": 64516520, + "step": 95695 + }, + { + "epoch": 2.337966921554736, + "grad_norm": 0.001427598879672587, + "learning_rate": 1.2837239726334807e-06, + "loss": 0.0477, + "num_input_tokens_seen": 64520552, + "step": 95700 + }, + { + "epoch": 2.338089072386583, + "grad_norm": 0.017566991969943047, + "learning_rate": 1.2836421984005416e-06, + "loss": 0.0413, + "num_input_tokens_seen": 64523624, + "step": 95705 + }, + { + "epoch": 2.3382112232184302, + "grad_norm": 0.02006099559366703, + "learning_rate": 1.2835604221048801e-06, + "loss": 0.0001, + "num_input_tokens_seen": 64526568, + "step": 95710 + }, + { + "epoch": 2.3383333740502774, + "grad_norm": 0.008515398018062115, + "learning_rate": 1.283478643747091e-06, + "loss": 0.0001, + "num_input_tokens_seen": 64530472, + "step": 95715 + }, + { + "epoch": 2.3384555248821246, + "grad_norm": 0.01195614319294691, + "learning_rate": 1.2833968633277685e-06, + "loss": 0.0059, + "num_input_tokens_seen": 64533928, + "step": 95720 + }, + { + "epoch": 2.338577675713972, + "grad_norm": 0.06796823441982269, + "learning_rate": 1.2833150808475085e-06, + "loss": 0.0002, + "num_input_tokens_seen": 64537256, + "step": 95725 + }, + { + "epoch": 2.3386998265458185, + "grad_norm": 0.013403164222836494, + "learning_rate": 1.2832332963069045e-06, + "loss": 0.0001, + "num_input_tokens_seen": 64540520, + "step": 95730 + }, + { + "epoch": 2.338821977377666, + "grad_norm": 0.001977914245799184, + "learning_rate": 1.2831515097065521e-06, + "loss": 0.0003, + "num_input_tokens_seen": 64544168, + "step": 95735 + }, + { + "epoch": 2.338944128209513, + "grad_norm": 0.006951137911528349, + "learning_rate": 1.2830697210470455e-06, + "loss": 0.0001, + "num_input_tokens_seen": 64547624, + "step": 95740 + }, + { + "epoch": 2.33906627904136, + "grad_norm": 0.0032081448007375, + "learning_rate": 1.28298793032898e-06, + "loss": 0.056, + "num_input_tokens_seen": 64551144, + "step": 95745 + }, + { + "epoch": 2.3391884298732073, + "grad_norm": 0.08706554770469666, + "learning_rate": 1.2829061375529503e-06, + "loss": 0.0003, + "num_input_tokens_seen": 64554344, + "step": 95750 + }, + { + "epoch": 2.3393105807050545, + "grad_norm": 0.002248481148853898, + "learning_rate": 1.2828243427195507e-06, + "loss": 0.0604, + "num_input_tokens_seen": 64557736, + "step": 95755 + }, + { + "epoch": 2.3394327315369017, + "grad_norm": 0.016986599192023277, + "learning_rate": 1.2827425458293766e-06, + "loss": 0.0002, + "num_input_tokens_seen": 64560808, + "step": 95760 + }, + { + "epoch": 2.339554882368749, + "grad_norm": 0.0013638599775731564, + "learning_rate": 1.2826607468830227e-06, + "loss": 0.0002, + "num_input_tokens_seen": 64564712, + "step": 95765 + }, + { + "epoch": 2.339677033200596, + "grad_norm": 0.022970302030444145, + "learning_rate": 1.2825789458810836e-06, + "loss": 0.0019, + "num_input_tokens_seen": 64567656, + "step": 95770 + }, + { + "epoch": 2.3397991840324432, + "grad_norm": 0.4640812873840332, + "learning_rate": 1.2824971428241544e-06, + "loss": 0.0279, + "num_input_tokens_seen": 64570792, + "step": 95775 + }, + { + "epoch": 2.3399213348642904, + "grad_norm": 0.05848347395658493, + "learning_rate": 1.2824153377128301e-06, + "loss": 0.0007, + "num_input_tokens_seen": 64574440, + "step": 95780 + }, + { + "epoch": 2.3400434856961376, + "grad_norm": 0.0471373051404953, + "learning_rate": 1.2823335305477058e-06, + "loss": 0.0764, + "num_input_tokens_seen": 64577896, + "step": 95785 + }, + { + "epoch": 2.340165636527985, + "grad_norm": 0.19193512201309204, + "learning_rate": 1.2822517213293756e-06, + "loss": 0.0002, + "num_input_tokens_seen": 64580904, + "step": 95790 + }, + { + "epoch": 2.340287787359832, + "grad_norm": 0.17396211624145508, + "learning_rate": 1.2821699100584354e-06, + "loss": 0.1267, + "num_input_tokens_seen": 64584616, + "step": 95795 + }, + { + "epoch": 2.340409938191679, + "grad_norm": 0.05379107594490051, + "learning_rate": 1.2820880967354798e-06, + "loss": 0.0413, + "num_input_tokens_seen": 64587752, + "step": 95800 + }, + { + "epoch": 2.3405320890235264, + "grad_norm": 0.06677083671092987, + "learning_rate": 1.2820062813611033e-06, + "loss": 0.0001, + "num_input_tokens_seen": 64591208, + "step": 95805 + }, + { + "epoch": 2.3406542398553736, + "grad_norm": 0.035529084503650665, + "learning_rate": 1.2819244639359012e-06, + "loss": 0.0549, + "num_input_tokens_seen": 64594856, + "step": 95810 + }, + { + "epoch": 2.3407763906872208, + "grad_norm": 0.007604526821523905, + "learning_rate": 1.2818426444604686e-06, + "loss": 0.0697, + "num_input_tokens_seen": 64598120, + "step": 95815 + }, + { + "epoch": 2.340898541519068, + "grad_norm": 0.015153356827795506, + "learning_rate": 1.2817608229354009e-06, + "loss": 0.0001, + "num_input_tokens_seen": 64601448, + "step": 95820 + }, + { + "epoch": 2.3410206923509147, + "grad_norm": 23.192989349365234, + "learning_rate": 1.2816789993612925e-06, + "loss": 0.1384, + "num_input_tokens_seen": 64604648, + "step": 95825 + }, + { + "epoch": 2.341142843182762, + "grad_norm": 0.0845850259065628, + "learning_rate": 1.2815971737387385e-06, + "loss": 0.0585, + "num_input_tokens_seen": 64607912, + "step": 95830 + }, + { + "epoch": 2.341264994014609, + "grad_norm": 94.90465545654297, + "learning_rate": 1.2815153460683343e-06, + "loss": 0.0027, + "num_input_tokens_seen": 64611432, + "step": 95835 + }, + { + "epoch": 2.3413871448464563, + "grad_norm": 0.023676864802837372, + "learning_rate": 1.2814335163506746e-06, + "loss": 0.0001, + "num_input_tokens_seen": 64614632, + "step": 95840 + }, + { + "epoch": 2.3415092956783035, + "grad_norm": 0.013862925581634045, + "learning_rate": 1.281351684586355e-06, + "loss": 0.0002, + "num_input_tokens_seen": 64617640, + "step": 95845 + }, + { + "epoch": 2.3416314465101506, + "grad_norm": 0.08961334079504013, + "learning_rate": 1.28126985077597e-06, + "loss": 0.0746, + "num_input_tokens_seen": 64620712, + "step": 95850 + }, + { + "epoch": 2.341753597341998, + "grad_norm": 0.023721953853964806, + "learning_rate": 1.281188014920115e-06, + "loss": 0.0003, + "num_input_tokens_seen": 64623912, + "step": 95855 + }, + { + "epoch": 2.341875748173845, + "grad_norm": 0.040392301976680756, + "learning_rate": 1.2811061770193852e-06, + "loss": 0.0004, + "num_input_tokens_seen": 64627560, + "step": 95860 + }, + { + "epoch": 2.341997899005692, + "grad_norm": 264.14202880859375, + "learning_rate": 1.2810243370743758e-06, + "loss": 0.0355, + "num_input_tokens_seen": 64631272, + "step": 95865 + }, + { + "epoch": 2.3421200498375394, + "grad_norm": 0.0721978172659874, + "learning_rate": 1.2809424950856814e-06, + "loss": 0.0002, + "num_input_tokens_seen": 64634792, + "step": 95870 + }, + { + "epoch": 2.3422422006693866, + "grad_norm": 0.15529726445674896, + "learning_rate": 1.280860651053898e-06, + "loss": 0.0259, + "num_input_tokens_seen": 64638632, + "step": 95875 + }, + { + "epoch": 2.342364351501234, + "grad_norm": 0.18332402408123016, + "learning_rate": 1.2807788049796201e-06, + "loss": 0.0002, + "num_input_tokens_seen": 64642024, + "step": 95880 + }, + { + "epoch": 2.342486502333081, + "grad_norm": 0.049804773181676865, + "learning_rate": 1.2806969568634436e-06, + "loss": 0.1175, + "num_input_tokens_seen": 64645416, + "step": 95885 + }, + { + "epoch": 2.342608653164928, + "grad_norm": 59.60102844238281, + "learning_rate": 1.2806151067059632e-06, + "loss": 0.0397, + "num_input_tokens_seen": 64648680, + "step": 95890 + }, + { + "epoch": 2.3427308039967754, + "grad_norm": 0.15184377133846283, + "learning_rate": 1.280533254507774e-06, + "loss": 0.0945, + "num_input_tokens_seen": 64651944, + "step": 95895 + }, + { + "epoch": 2.3428529548286225, + "grad_norm": 0.13011978566646576, + "learning_rate": 1.280451400269472e-06, + "loss": 0.0003, + "num_input_tokens_seen": 64655400, + "step": 95900 + }, + { + "epoch": 2.3429751056604697, + "grad_norm": 0.016899550333619118, + "learning_rate": 1.2803695439916515e-06, + "loss": 0.0002, + "num_input_tokens_seen": 64659112, + "step": 95905 + }, + { + "epoch": 2.3430972564923165, + "grad_norm": 0.003829409135505557, + "learning_rate": 1.2802876856749088e-06, + "loss": 0.0001, + "num_input_tokens_seen": 64662312, + "step": 95910 + }, + { + "epoch": 2.343219407324164, + "grad_norm": 0.017823584377765656, + "learning_rate": 1.2802058253198383e-06, + "loss": 0.0, + "num_input_tokens_seen": 64665832, + "step": 95915 + }, + { + "epoch": 2.343341558156011, + "grad_norm": 0.08013685047626495, + "learning_rate": 1.2801239629270356e-06, + "loss": 0.0914, + "num_input_tokens_seen": 64669160, + "step": 95920 + }, + { + "epoch": 2.343463708987858, + "grad_norm": 109.04890441894531, + "learning_rate": 1.2800420984970962e-06, + "loss": 0.0702, + "num_input_tokens_seen": 64672552, + "step": 95925 + }, + { + "epoch": 2.3435858598197052, + "grad_norm": 0.05170980468392372, + "learning_rate": 1.2799602320306151e-06, + "loss": 0.067, + "num_input_tokens_seen": 64675752, + "step": 95930 + }, + { + "epoch": 2.3437080106515524, + "grad_norm": 0.0404609739780426, + "learning_rate": 1.2798783635281881e-06, + "loss": 0.0002, + "num_input_tokens_seen": 64679272, + "step": 95935 + }, + { + "epoch": 2.3438301614833996, + "grad_norm": 0.017396869137883186, + "learning_rate": 1.2797964929904106e-06, + "loss": 0.0002, + "num_input_tokens_seen": 64682792, + "step": 95940 + }, + { + "epoch": 2.343952312315247, + "grad_norm": 0.023043381050229073, + "learning_rate": 1.2797146204178775e-06, + "loss": 0.0002, + "num_input_tokens_seen": 64686184, + "step": 95945 + }, + { + "epoch": 2.344074463147094, + "grad_norm": 135.3673553466797, + "learning_rate": 1.2796327458111847e-06, + "loss": 0.029, + "num_input_tokens_seen": 64689256, + "step": 95950 + }, + { + "epoch": 2.344196613978941, + "grad_norm": 0.03290106728672981, + "learning_rate": 1.2795508691709272e-06, + "loss": 0.0989, + "num_input_tokens_seen": 64692648, + "step": 95955 + }, + { + "epoch": 2.3443187648107884, + "grad_norm": 0.007633588742464781, + "learning_rate": 1.2794689904977006e-06, + "loss": 0.0515, + "num_input_tokens_seen": 64695912, + "step": 95960 + }, + { + "epoch": 2.3444409156426356, + "grad_norm": 0.28993934392929077, + "learning_rate": 1.2793871097921e-06, + "loss": 0.0006, + "num_input_tokens_seen": 64698856, + "step": 95965 + }, + { + "epoch": 2.3445630664744828, + "grad_norm": 0.01689867116510868, + "learning_rate": 1.2793052270547215e-06, + "loss": 0.0502, + "num_input_tokens_seen": 64701928, + "step": 95970 + }, + { + "epoch": 2.34468521730633, + "grad_norm": 31.987777709960938, + "learning_rate": 1.2792233422861603e-06, + "loss": 0.1532, + "num_input_tokens_seen": 64705128, + "step": 95975 + }, + { + "epoch": 2.344807368138177, + "grad_norm": 0.19975918531417847, + "learning_rate": 1.2791414554870116e-06, + "loss": 0.0481, + "num_input_tokens_seen": 64708072, + "step": 95980 + }, + { + "epoch": 2.3449295189700243, + "grad_norm": 0.05912720412015915, + "learning_rate": 1.2790595666578717e-06, + "loss": 0.037, + "num_input_tokens_seen": 64711144, + "step": 95985 + }, + { + "epoch": 2.3450516698018715, + "grad_norm": 0.5126543641090393, + "learning_rate": 1.2789776757993352e-06, + "loss": 0.0006, + "num_input_tokens_seen": 64714536, + "step": 95990 + }, + { + "epoch": 2.3451738206337187, + "grad_norm": 0.07353532314300537, + "learning_rate": 1.278895782911998e-06, + "loss": 0.0004, + "num_input_tokens_seen": 64717480, + "step": 95995 + }, + { + "epoch": 2.345295971465566, + "grad_norm": 0.0022288633044809103, + "learning_rate": 1.2788138879964557e-06, + "loss": 0.0002, + "num_input_tokens_seen": 64721000, + "step": 96000 + }, + { + "epoch": 2.3454181222974126, + "grad_norm": 10.458391189575195, + "learning_rate": 1.2787319910533036e-06, + "loss": 0.041, + "num_input_tokens_seen": 64724456, + "step": 96005 + }, + { + "epoch": 2.34554027312926, + "grad_norm": 0.010931176133453846, + "learning_rate": 1.2786500920831377e-06, + "loss": 0.049, + "num_input_tokens_seen": 64728104, + "step": 96010 + }, + { + "epoch": 2.345662423961107, + "grad_norm": 0.0860881358385086, + "learning_rate": 1.2785681910865535e-06, + "loss": 0.0442, + "num_input_tokens_seen": 64731112, + "step": 96015 + }, + { + "epoch": 2.345784574792954, + "grad_norm": 0.17197354137897491, + "learning_rate": 1.2784862880641462e-06, + "loss": 0.0736, + "num_input_tokens_seen": 64734376, + "step": 96020 + }, + { + "epoch": 2.3459067256248014, + "grad_norm": 0.14629042148590088, + "learning_rate": 1.2784043830165119e-06, + "loss": 0.0002, + "num_input_tokens_seen": 64737768, + "step": 96025 + }, + { + "epoch": 2.3460288764566486, + "grad_norm": 0.0716601237654686, + "learning_rate": 1.2783224759442459e-06, + "loss": 0.0594, + "num_input_tokens_seen": 64740776, + "step": 96030 + }, + { + "epoch": 2.346151027288496, + "grad_norm": 0.009555949829518795, + "learning_rate": 1.2782405668479442e-06, + "loss": 0.0458, + "num_input_tokens_seen": 64743976, + "step": 96035 + }, + { + "epoch": 2.346273178120343, + "grad_norm": 0.02965528704226017, + "learning_rate": 1.2781586557282022e-06, + "loss": 0.0798, + "num_input_tokens_seen": 64747176, + "step": 96040 + }, + { + "epoch": 2.34639532895219, + "grad_norm": 1.9350935220718384, + "learning_rate": 1.2780767425856155e-06, + "loss": 0.0426, + "num_input_tokens_seen": 64750248, + "step": 96045 + }, + { + "epoch": 2.3465174797840374, + "grad_norm": 0.12773877382278442, + "learning_rate": 1.2779948274207802e-06, + "loss": 0.0006, + "num_input_tokens_seen": 64753192, + "step": 96050 + }, + { + "epoch": 2.3466396306158845, + "grad_norm": 29.7601261138916, + "learning_rate": 1.2779129102342915e-06, + "loss": 0.0522, + "num_input_tokens_seen": 64756584, + "step": 96055 + }, + { + "epoch": 2.3467617814477317, + "grad_norm": 0.022628452628850937, + "learning_rate": 1.2778309910267454e-06, + "loss": 0.0005, + "num_input_tokens_seen": 64760168, + "step": 96060 + }, + { + "epoch": 2.346883932279579, + "grad_norm": 0.03361308202147484, + "learning_rate": 1.2777490697987378e-06, + "loss": 0.0003, + "num_input_tokens_seen": 64763432, + "step": 96065 + }, + { + "epoch": 2.347006083111426, + "grad_norm": 0.05504537373781204, + "learning_rate": 1.277667146550864e-06, + "loss": 0.0005, + "num_input_tokens_seen": 64766568, + "step": 96070 + }, + { + "epoch": 2.3471282339432733, + "grad_norm": 0.10005243122577667, + "learning_rate": 1.2775852212837202e-06, + "loss": 0.0002, + "num_input_tokens_seen": 64769768, + "step": 96075 + }, + { + "epoch": 2.3472503847751205, + "grad_norm": 0.3429010808467865, + "learning_rate": 1.277503293997902e-06, + "loss": 0.0669, + "num_input_tokens_seen": 64772648, + "step": 96080 + }, + { + "epoch": 2.3473725356069677, + "grad_norm": 0.10272646695375443, + "learning_rate": 1.277421364694005e-06, + "loss": 0.0002, + "num_input_tokens_seen": 64775912, + "step": 96085 + }, + { + "epoch": 2.3474946864388144, + "grad_norm": 0.3535402715206146, + "learning_rate": 1.2773394333726253e-06, + "loss": 0.0228, + "num_input_tokens_seen": 64779176, + "step": 96090 + }, + { + "epoch": 2.347616837270662, + "grad_norm": 0.01437195111066103, + "learning_rate": 1.2772575000343589e-06, + "loss": 0.0003, + "num_input_tokens_seen": 64782696, + "step": 96095 + }, + { + "epoch": 2.347738988102509, + "grad_norm": 72.5531234741211, + "learning_rate": 1.277175564679801e-06, + "loss": 0.0837, + "num_input_tokens_seen": 64785960, + "step": 96100 + }, + { + "epoch": 2.347861138934356, + "grad_norm": 0.07231893390417099, + "learning_rate": 1.2770936273095483e-06, + "loss": 0.0741, + "num_input_tokens_seen": 64789224, + "step": 96105 + }, + { + "epoch": 2.347983289766203, + "grad_norm": 0.04119507223367691, + "learning_rate": 1.2770116879241961e-06, + "loss": 0.0006, + "num_input_tokens_seen": 64792360, + "step": 96110 + }, + { + "epoch": 2.3481054405980504, + "grad_norm": 19.756370544433594, + "learning_rate": 1.27692974652434e-06, + "loss": 0.0465, + "num_input_tokens_seen": 64796072, + "step": 96115 + }, + { + "epoch": 2.3482275914298976, + "grad_norm": 0.024013273417949677, + "learning_rate": 1.2768478031105764e-06, + "loss": 0.0628, + "num_input_tokens_seen": 64799016, + "step": 96120 + }, + { + "epoch": 2.3483497422617448, + "grad_norm": 0.012543603777885437, + "learning_rate": 1.2767658576835015e-06, + "loss": 0.0003, + "num_input_tokens_seen": 64802600, + "step": 96125 + }, + { + "epoch": 2.348471893093592, + "grad_norm": 0.00011490545875858516, + "learning_rate": 1.2766839102437105e-06, + "loss": 0.0532, + "num_input_tokens_seen": 64805928, + "step": 96130 + }, + { + "epoch": 2.348594043925439, + "grad_norm": 0.8810346722602844, + "learning_rate": 1.2766019607917997e-06, + "loss": 0.0017, + "num_input_tokens_seen": 64809320, + "step": 96135 + }, + { + "epoch": 2.3487161947572863, + "grad_norm": 0.004579383414238691, + "learning_rate": 1.2765200093283653e-06, + "loss": 0.063, + "num_input_tokens_seen": 64812712, + "step": 96140 + }, + { + "epoch": 2.3488383455891335, + "grad_norm": 0.06198953837156296, + "learning_rate": 1.2764380558540028e-06, + "loss": 0.0786, + "num_input_tokens_seen": 64815912, + "step": 96145 + }, + { + "epoch": 2.3489604964209807, + "grad_norm": 0.029107533395290375, + "learning_rate": 1.2763561003693087e-06, + "loss": 0.0001, + "num_input_tokens_seen": 64819560, + "step": 96150 + }, + { + "epoch": 2.349082647252828, + "grad_norm": 0.009671103209257126, + "learning_rate": 1.2762741428748785e-06, + "loss": 0.0348, + "num_input_tokens_seen": 64823144, + "step": 96155 + }, + { + "epoch": 2.349204798084675, + "grad_norm": 0.005305442493408918, + "learning_rate": 1.2761921833713082e-06, + "loss": 0.1014, + "num_input_tokens_seen": 64826920, + "step": 96160 + }, + { + "epoch": 2.3493269489165223, + "grad_norm": 0.0013102086959406734, + "learning_rate": 1.2761102218591943e-06, + "loss": 0.0003, + "num_input_tokens_seen": 64830248, + "step": 96165 + }, + { + "epoch": 2.3494490997483695, + "grad_norm": 0.24055279791355133, + "learning_rate": 1.2760282583391327e-06, + "loss": 0.0028, + "num_input_tokens_seen": 64833320, + "step": 96170 + }, + { + "epoch": 2.349571250580216, + "grad_norm": 94.44710540771484, + "learning_rate": 1.275946292811719e-06, + "loss": 0.1198, + "num_input_tokens_seen": 64836648, + "step": 96175 + }, + { + "epoch": 2.349693401412064, + "grad_norm": 1.151880145072937, + "learning_rate": 1.2758643252775498e-06, + "loss": 0.1153, + "num_input_tokens_seen": 64839784, + "step": 96180 + }, + { + "epoch": 2.3498155522439106, + "grad_norm": 0.24090705811977386, + "learning_rate": 1.275782355737221e-06, + "loss": 0.0293, + "num_input_tokens_seen": 64842984, + "step": 96185 + }, + { + "epoch": 2.3499377030757578, + "grad_norm": 0.03649113327264786, + "learning_rate": 1.275700384191329e-06, + "loss": 0.0003, + "num_input_tokens_seen": 64846184, + "step": 96190 + }, + { + "epoch": 2.350059853907605, + "grad_norm": 0.009620449505746365, + "learning_rate": 1.2756184106404693e-06, + "loss": 0.0002, + "num_input_tokens_seen": 64849192, + "step": 96195 + }, + { + "epoch": 2.350182004739452, + "grad_norm": 0.021057967096567154, + "learning_rate": 1.2755364350852387e-06, + "loss": 0.0001, + "num_input_tokens_seen": 64852584, + "step": 96200 + }, + { + "epoch": 2.3503041555712993, + "grad_norm": 0.005199748557060957, + "learning_rate": 1.2754544575262327e-06, + "loss": 0.0001, + "num_input_tokens_seen": 64857768, + "step": 96205 + }, + { + "epoch": 2.3504263064031465, + "grad_norm": 0.0033945294562727213, + "learning_rate": 1.2753724779640483e-06, + "loss": 0.0503, + "num_input_tokens_seen": 64861224, + "step": 96210 + }, + { + "epoch": 2.3505484572349937, + "grad_norm": 0.008970481343567371, + "learning_rate": 1.2752904963992807e-06, + "loss": 0.0816, + "num_input_tokens_seen": 64864552, + "step": 96215 + }, + { + "epoch": 2.350670608066841, + "grad_norm": 0.019377127289772034, + "learning_rate": 1.2752085128325267e-06, + "loss": 0.0001, + "num_input_tokens_seen": 64868072, + "step": 96220 + }, + { + "epoch": 2.350792758898688, + "grad_norm": 0.05443967506289482, + "learning_rate": 1.2751265272643826e-06, + "loss": 0.0004, + "num_input_tokens_seen": 64871144, + "step": 96225 + }, + { + "epoch": 2.3509149097305353, + "grad_norm": 38.987884521484375, + "learning_rate": 1.275044539695444e-06, + "loss": 0.0466, + "num_input_tokens_seen": 64874600, + "step": 96230 + }, + { + "epoch": 2.3510370605623825, + "grad_norm": 0.016544029116630554, + "learning_rate": 1.2749625501263076e-06, + "loss": 0.0395, + "num_input_tokens_seen": 64877992, + "step": 96235 + }, + { + "epoch": 2.3511592113942297, + "grad_norm": 0.031460560858249664, + "learning_rate": 1.2748805585575699e-06, + "loss": 0.0002, + "num_input_tokens_seen": 64881128, + "step": 96240 + }, + { + "epoch": 2.351281362226077, + "grad_norm": 0.03763119876384735, + "learning_rate": 1.2747985649898265e-06, + "loss": 0.0001, + "num_input_tokens_seen": 64884136, + "step": 96245 + }, + { + "epoch": 2.351403513057924, + "grad_norm": 0.11334902048110962, + "learning_rate": 1.2747165694236741e-06, + "loss": 0.1235, + "num_input_tokens_seen": 64887016, + "step": 96250 + }, + { + "epoch": 2.3515256638897712, + "grad_norm": 0.01264060940593481, + "learning_rate": 1.274634571859709e-06, + "loss": 0.0001, + "num_input_tokens_seen": 64890472, + "step": 96255 + }, + { + "epoch": 2.3516478147216184, + "grad_norm": 0.5173587203025818, + "learning_rate": 1.2745525722985276e-06, + "loss": 0.0003, + "num_input_tokens_seen": 64893352, + "step": 96260 + }, + { + "epoch": 2.3517699655534656, + "grad_norm": 0.02787947840988636, + "learning_rate": 1.2744705707407259e-06, + "loss": 0.0322, + "num_input_tokens_seen": 64896360, + "step": 96265 + }, + { + "epoch": 2.3518921163853124, + "grad_norm": 29.06874656677246, + "learning_rate": 1.2743885671869003e-06, + "loss": 0.1283, + "num_input_tokens_seen": 64899752, + "step": 96270 + }, + { + "epoch": 2.3520142672171596, + "grad_norm": 203.33737182617188, + "learning_rate": 1.2743065616376472e-06, + "loss": 0.0737, + "num_input_tokens_seen": 64903208, + "step": 96275 + }, + { + "epoch": 2.3521364180490067, + "grad_norm": 0.17877565324306488, + "learning_rate": 1.274224554093563e-06, + "loss": 0.0397, + "num_input_tokens_seen": 64906152, + "step": 96280 + }, + { + "epoch": 2.352258568880854, + "grad_norm": 0.0367661714553833, + "learning_rate": 1.2741425445552442e-06, + "loss": 0.0003, + "num_input_tokens_seen": 64909480, + "step": 96285 + }, + { + "epoch": 2.352380719712701, + "grad_norm": 680.6268920898438, + "learning_rate": 1.274060533023287e-06, + "loss": 0.015, + "num_input_tokens_seen": 64912360, + "step": 96290 + }, + { + "epoch": 2.3525028705445483, + "grad_norm": 30.654756546020508, + "learning_rate": 1.273978519498288e-06, + "loss": 0.1568, + "num_input_tokens_seen": 64915688, + "step": 96295 + }, + { + "epoch": 2.3526250213763955, + "grad_norm": 0.08082496374845505, + "learning_rate": 1.2738965039808433e-06, + "loss": 0.0002, + "num_input_tokens_seen": 64918696, + "step": 96300 + }, + { + "epoch": 2.3527471722082427, + "grad_norm": 22.63221549987793, + "learning_rate": 1.2738144864715498e-06, + "loss": 0.2386, + "num_input_tokens_seen": 64921896, + "step": 96305 + }, + { + "epoch": 2.35286932304009, + "grad_norm": 0.04736287146806717, + "learning_rate": 1.2737324669710036e-06, + "loss": 0.0751, + "num_input_tokens_seen": 64925992, + "step": 96310 + }, + { + "epoch": 2.352991473871937, + "grad_norm": 0.09722006320953369, + "learning_rate": 1.2736504454798013e-06, + "loss": 0.0601, + "num_input_tokens_seen": 64929064, + "step": 96315 + }, + { + "epoch": 2.3531136247037843, + "grad_norm": 0.23957017064094543, + "learning_rate": 1.2735684219985395e-06, + "loss": 0.0003, + "num_input_tokens_seen": 64932648, + "step": 96320 + }, + { + "epoch": 2.3532357755356315, + "grad_norm": 0.1643843650817871, + "learning_rate": 1.2734863965278143e-06, + "loss": 0.001, + "num_input_tokens_seen": 64936104, + "step": 96325 + }, + { + "epoch": 2.3533579263674786, + "grad_norm": 0.5928179025650024, + "learning_rate": 1.2734043690682228e-06, + "loss": 0.0004, + "num_input_tokens_seen": 64939176, + "step": 96330 + }, + { + "epoch": 2.353480077199326, + "grad_norm": 0.006342485547065735, + "learning_rate": 1.2733223396203606e-06, + "loss": 0.0729, + "num_input_tokens_seen": 64942696, + "step": 96335 + }, + { + "epoch": 2.353602228031173, + "grad_norm": 0.018328078091144562, + "learning_rate": 1.2732403081848254e-06, + "loss": 0.1607, + "num_input_tokens_seen": 64946088, + "step": 96340 + }, + { + "epoch": 2.35372437886302, + "grad_norm": 0.010952993296086788, + "learning_rate": 1.273158274762213e-06, + "loss": 0.0001, + "num_input_tokens_seen": 64949736, + "step": 96345 + }, + { + "epoch": 2.3538465296948674, + "grad_norm": 3.82192325592041, + "learning_rate": 1.27307623935312e-06, + "loss": 0.001, + "num_input_tokens_seen": 64952872, + "step": 96350 + }, + { + "epoch": 2.353968680526714, + "grad_norm": 0.029450977221131325, + "learning_rate": 1.2729942019581433e-06, + "loss": 0.0491, + "num_input_tokens_seen": 64957096, + "step": 96355 + }, + { + "epoch": 2.354090831358562, + "grad_norm": 3.583500623703003, + "learning_rate": 1.2729121625778793e-06, + "loss": 0.0009, + "num_input_tokens_seen": 64960744, + "step": 96360 + }, + { + "epoch": 2.3542129821904085, + "grad_norm": 0.03824234753847122, + "learning_rate": 1.2728301212129246e-06, + "loss": 0.0507, + "num_input_tokens_seen": 64964584, + "step": 96365 + }, + { + "epoch": 2.3543351330222557, + "grad_norm": 0.051010388880968094, + "learning_rate": 1.272748077863876e-06, + "loss": 0.0248, + "num_input_tokens_seen": 64968232, + "step": 96370 + }, + { + "epoch": 2.354457283854103, + "grad_norm": 0.03927096351981163, + "learning_rate": 1.27266603253133e-06, + "loss": 0.0002, + "num_input_tokens_seen": 64971496, + "step": 96375 + }, + { + "epoch": 2.35457943468595, + "grad_norm": 0.09921469539403915, + "learning_rate": 1.272583985215883e-06, + "loss": 0.0775, + "num_input_tokens_seen": 64974824, + "step": 96380 + }, + { + "epoch": 2.3547015855177973, + "grad_norm": 1501.3050537109375, + "learning_rate": 1.2725019359181323e-06, + "loss": 0.0183, + "num_input_tokens_seen": 64978088, + "step": 96385 + }, + { + "epoch": 2.3548237363496445, + "grad_norm": 38.874263763427734, + "learning_rate": 1.2724198846386743e-06, + "loss": 0.1264, + "num_input_tokens_seen": 64981608, + "step": 96390 + }, + { + "epoch": 2.3549458871814917, + "grad_norm": 0.051000069826841354, + "learning_rate": 1.2723378313781053e-06, + "loss": 0.0001, + "num_input_tokens_seen": 64984744, + "step": 96395 + }, + { + "epoch": 2.355068038013339, + "grad_norm": 0.05670246481895447, + "learning_rate": 1.2722557761370224e-06, + "loss": 0.0253, + "num_input_tokens_seen": 64988456, + "step": 96400 + }, + { + "epoch": 2.355190188845186, + "grad_norm": 0.020489536225795746, + "learning_rate": 1.2721737189160221e-06, + "loss": 0.0001, + "num_input_tokens_seen": 64991784, + "step": 96405 + }, + { + "epoch": 2.3553123396770332, + "grad_norm": 0.017961619421839714, + "learning_rate": 1.2720916597157017e-06, + "loss": 0.0002, + "num_input_tokens_seen": 64994920, + "step": 96410 + }, + { + "epoch": 2.3554344905088804, + "grad_norm": 0.4348694086074829, + "learning_rate": 1.2720095985366578e-06, + "loss": 0.0005, + "num_input_tokens_seen": 64998376, + "step": 96415 + }, + { + "epoch": 2.3555566413407276, + "grad_norm": 0.15724071860313416, + "learning_rate": 1.2719275353794863e-06, + "loss": 0.0002, + "num_input_tokens_seen": 65001448, + "step": 96420 + }, + { + "epoch": 2.355678792172575, + "grad_norm": 0.0027769957669079304, + "learning_rate": 1.271845470244785e-06, + "loss": 0.0178, + "num_input_tokens_seen": 65006824, + "step": 96425 + }, + { + "epoch": 2.355800943004422, + "grad_norm": 294.8739013671875, + "learning_rate": 1.27176340313315e-06, + "loss": 0.0458, + "num_input_tokens_seen": 65010024, + "step": 96430 + }, + { + "epoch": 2.355923093836269, + "grad_norm": 1.417127013206482, + "learning_rate": 1.2716813340451787e-06, + "loss": 0.0004, + "num_input_tokens_seen": 65013288, + "step": 96435 + }, + { + "epoch": 2.3560452446681164, + "grad_norm": 0.038331493735313416, + "learning_rate": 1.2715992629814673e-06, + "loss": 0.0004, + "num_input_tokens_seen": 65016680, + "step": 96440 + }, + { + "epoch": 2.3561673954999636, + "grad_norm": 0.003267770865932107, + "learning_rate": 1.2715171899426134e-06, + "loss": 0.0003, + "num_input_tokens_seen": 65019560, + "step": 96445 + }, + { + "epoch": 2.3562895463318103, + "grad_norm": 0.0478217639029026, + "learning_rate": 1.2714351149292135e-06, + "loss": 0.0581, + "num_input_tokens_seen": 65022888, + "step": 96450 + }, + { + "epoch": 2.3564116971636575, + "grad_norm": 0.014052304439246655, + "learning_rate": 1.2713530379418642e-06, + "loss": 0.104, + "num_input_tokens_seen": 65025768, + "step": 96455 + }, + { + "epoch": 2.3565338479955047, + "grad_norm": 0.023245450109243393, + "learning_rate": 1.2712709589811628e-06, + "loss": 0.0005, + "num_input_tokens_seen": 65029672, + "step": 96460 + }, + { + "epoch": 2.356655998827352, + "grad_norm": 0.01749522238969803, + "learning_rate": 1.271188878047706e-06, + "loss": 0.0004, + "num_input_tokens_seen": 65032936, + "step": 96465 + }, + { + "epoch": 2.356778149659199, + "grad_norm": 0.03706004470586777, + "learning_rate": 1.2711067951420906e-06, + "loss": 0.0393, + "num_input_tokens_seen": 65035944, + "step": 96470 + }, + { + "epoch": 2.3569003004910463, + "grad_norm": 0.17029958963394165, + "learning_rate": 1.2710247102649138e-06, + "loss": 0.0338, + "num_input_tokens_seen": 65039080, + "step": 96475 + }, + { + "epoch": 2.3570224513228935, + "grad_norm": 0.4401090443134308, + "learning_rate": 1.2709426234167723e-06, + "loss": 0.0011, + "num_input_tokens_seen": 65042536, + "step": 96480 + }, + { + "epoch": 2.3571446021547406, + "grad_norm": 41.670413970947266, + "learning_rate": 1.2708605345982634e-06, + "loss": 0.129, + "num_input_tokens_seen": 65046056, + "step": 96485 + }, + { + "epoch": 2.357266752986588, + "grad_norm": 9.80988597869873, + "learning_rate": 1.2707784438099833e-06, + "loss": 0.0008, + "num_input_tokens_seen": 65049512, + "step": 96490 + }, + { + "epoch": 2.357388903818435, + "grad_norm": 0.06891076266765594, + "learning_rate": 1.27069635105253e-06, + "loss": 0.0002, + "num_input_tokens_seen": 65052968, + "step": 96495 + }, + { + "epoch": 2.357511054650282, + "grad_norm": 0.016416283324360847, + "learning_rate": 1.2706142563264999e-06, + "loss": 0.0453, + "num_input_tokens_seen": 65055976, + "step": 96500 + }, + { + "epoch": 2.3576332054821294, + "grad_norm": 85.0042724609375, + "learning_rate": 1.2705321596324901e-06, + "loss": 0.0924, + "num_input_tokens_seen": 65059432, + "step": 96505 + }, + { + "epoch": 2.3577553563139766, + "grad_norm": 0.14299145340919495, + "learning_rate": 1.2704500609710977e-06, + "loss": 0.0001, + "num_input_tokens_seen": 65062760, + "step": 96510 + }, + { + "epoch": 2.357877507145824, + "grad_norm": 0.006079481448978186, + "learning_rate": 1.2703679603429198e-06, + "loss": 0.145, + "num_input_tokens_seen": 65066088, + "step": 96515 + }, + { + "epoch": 2.357999657977671, + "grad_norm": 37.76004409790039, + "learning_rate": 1.2702858577485533e-06, + "loss": 0.0889, + "num_input_tokens_seen": 65069352, + "step": 96520 + }, + { + "epoch": 2.358121808809518, + "grad_norm": 0.027957482263445854, + "learning_rate": 1.2702037531885954e-06, + "loss": 0.0003, + "num_input_tokens_seen": 65072936, + "step": 96525 + }, + { + "epoch": 2.3582439596413654, + "grad_norm": 0.3856599032878876, + "learning_rate": 1.270121646663643e-06, + "loss": 0.0004, + "num_input_tokens_seen": 65076200, + "step": 96530 + }, + { + "epoch": 2.358366110473212, + "grad_norm": 0.018702376633882523, + "learning_rate": 1.2700395381742937e-06, + "loss": 0.0002, + "num_input_tokens_seen": 65079400, + "step": 96535 + }, + { + "epoch": 2.3584882613050597, + "grad_norm": 51.89600372314453, + "learning_rate": 1.269957427721144e-06, + "loss": 0.0814, + "num_input_tokens_seen": 65082536, + "step": 96540 + }, + { + "epoch": 2.3586104121369065, + "grad_norm": 28.521587371826172, + "learning_rate": 1.2698753153047913e-06, + "loss": 0.1191, + "num_input_tokens_seen": 65085928, + "step": 96545 + }, + { + "epoch": 2.3587325629687537, + "grad_norm": 0.2891164720058441, + "learning_rate": 1.2697932009258324e-06, + "loss": 0.0002, + "num_input_tokens_seen": 65089320, + "step": 96550 + }, + { + "epoch": 2.358854713800601, + "grad_norm": 0.14744800329208374, + "learning_rate": 1.269711084584865e-06, + "loss": 0.0001, + "num_input_tokens_seen": 65092136, + "step": 96555 + }, + { + "epoch": 2.358976864632448, + "grad_norm": 0.021144427359104156, + "learning_rate": 1.2696289662824863e-06, + "loss": 0.0002, + "num_input_tokens_seen": 65095528, + "step": 96560 + }, + { + "epoch": 2.3590990154642952, + "grad_norm": 0.054040584713220596, + "learning_rate": 1.2695468460192928e-06, + "loss": 0.0002, + "num_input_tokens_seen": 65098472, + "step": 96565 + }, + { + "epoch": 2.3592211662961424, + "grad_norm": 0.10927646607160568, + "learning_rate": 1.2694647237958827e-06, + "loss": 0.0438, + "num_input_tokens_seen": 65101992, + "step": 96570 + }, + { + "epoch": 2.3593433171279896, + "grad_norm": 2.334500551223755, + "learning_rate": 1.2693825996128524e-06, + "loss": 0.0005, + "num_input_tokens_seen": 65105576, + "step": 96575 + }, + { + "epoch": 2.359465467959837, + "grad_norm": 0.20562472939491272, + "learning_rate": 1.2693004734707993e-06, + "loss": 0.0029, + "num_input_tokens_seen": 65108968, + "step": 96580 + }, + { + "epoch": 2.359587618791684, + "grad_norm": 0.20102748274803162, + "learning_rate": 1.2692183453703205e-06, + "loss": 0.0005, + "num_input_tokens_seen": 65111976, + "step": 96585 + }, + { + "epoch": 2.359709769623531, + "grad_norm": 0.008200347423553467, + "learning_rate": 1.2691362153120135e-06, + "loss": 0.0001, + "num_input_tokens_seen": 65115560, + "step": 96590 + }, + { + "epoch": 2.3598319204553784, + "grad_norm": 0.03276849538087845, + "learning_rate": 1.269054083296476e-06, + "loss": 0.0001, + "num_input_tokens_seen": 65118888, + "step": 96595 + }, + { + "epoch": 2.3599540712872256, + "grad_norm": 0.19144579768180847, + "learning_rate": 1.2689719493243046e-06, + "loss": 0.0454, + "num_input_tokens_seen": 65122600, + "step": 96600 + }, + { + "epoch": 2.3600762221190728, + "grad_norm": 0.05740584433078766, + "learning_rate": 1.2688898133960968e-06, + "loss": 0.0003, + "num_input_tokens_seen": 65126248, + "step": 96605 + }, + { + "epoch": 2.36019837295092, + "grad_norm": 0.009974388405680656, + "learning_rate": 1.2688076755124499e-06, + "loss": 0.1508, + "num_input_tokens_seen": 65129640, + "step": 96610 + }, + { + "epoch": 2.360320523782767, + "grad_norm": 0.0036886536981910467, + "learning_rate": 1.2687255356739615e-06, + "loss": 0.0001, + "num_input_tokens_seen": 65132712, + "step": 96615 + }, + { + "epoch": 2.360442674614614, + "grad_norm": 0.005350802559405565, + "learning_rate": 1.2686433938812287e-06, + "loss": 0.0591, + "num_input_tokens_seen": 65136104, + "step": 96620 + }, + { + "epoch": 2.3605648254464615, + "grad_norm": 0.13386641442775726, + "learning_rate": 1.2685612501348486e-06, + "loss": 0.0002, + "num_input_tokens_seen": 65139624, + "step": 96625 + }, + { + "epoch": 2.3606869762783083, + "grad_norm": 0.006131905596703291, + "learning_rate": 1.268479104435419e-06, + "loss": 0.0002, + "num_input_tokens_seen": 65143208, + "step": 96630 + }, + { + "epoch": 2.3608091271101554, + "grad_norm": 0.014093791134655476, + "learning_rate": 1.2683969567835372e-06, + "loss": 0.0001, + "num_input_tokens_seen": 65146536, + "step": 96635 + }, + { + "epoch": 2.3609312779420026, + "grad_norm": 0.11882232129573822, + "learning_rate": 1.2683148071798006e-06, + "loss": 0.0007, + "num_input_tokens_seen": 65149928, + "step": 96640 + }, + { + "epoch": 2.36105342877385, + "grad_norm": 0.006873989012092352, + "learning_rate": 1.2682326556248066e-06, + "loss": 0.0001, + "num_input_tokens_seen": 65153256, + "step": 96645 + }, + { + "epoch": 2.361175579605697, + "grad_norm": 0.007074476219713688, + "learning_rate": 1.2681505021191523e-06, + "loss": 0.0002, + "num_input_tokens_seen": 65156328, + "step": 96650 + }, + { + "epoch": 2.361297730437544, + "grad_norm": 0.005505382549017668, + "learning_rate": 1.2680683466634355e-06, + "loss": 0.0003, + "num_input_tokens_seen": 65159528, + "step": 96655 + }, + { + "epoch": 2.3614198812693914, + "grad_norm": 22.611209869384766, + "learning_rate": 1.2679861892582535e-06, + "loss": 0.1168, + "num_input_tokens_seen": 65163176, + "step": 96660 + }, + { + "epoch": 2.3615420321012386, + "grad_norm": 0.21755099296569824, + "learning_rate": 1.2679040299042041e-06, + "loss": 0.0502, + "num_input_tokens_seen": 65166504, + "step": 96665 + }, + { + "epoch": 2.3616641829330858, + "grad_norm": 0.017916874960064888, + "learning_rate": 1.2678218686018848e-06, + "loss": 0.0626, + "num_input_tokens_seen": 65169704, + "step": 96670 + }, + { + "epoch": 2.361786333764933, + "grad_norm": 744.1478271484375, + "learning_rate": 1.267739705351892e-06, + "loss": 0.0961, + "num_input_tokens_seen": 65173608, + "step": 96675 + }, + { + "epoch": 2.36190848459678, + "grad_norm": 0.18370923399925232, + "learning_rate": 1.2676575401548248e-06, + "loss": 0.0542, + "num_input_tokens_seen": 65176680, + "step": 96680 + }, + { + "epoch": 2.3620306354286273, + "grad_norm": 0.1509036123752594, + "learning_rate": 1.2675753730112798e-06, + "loss": 0.0672, + "num_input_tokens_seen": 65179688, + "step": 96685 + }, + { + "epoch": 2.3621527862604745, + "grad_norm": 0.9493951797485352, + "learning_rate": 1.2674932039218545e-06, + "loss": 0.0005, + "num_input_tokens_seen": 65183208, + "step": 96690 + }, + { + "epoch": 2.3622749370923217, + "grad_norm": 0.038394249975681305, + "learning_rate": 1.2674110328871469e-06, + "loss": 0.0004, + "num_input_tokens_seen": 65186536, + "step": 96695 + }, + { + "epoch": 2.362397087924169, + "grad_norm": 18.89492416381836, + "learning_rate": 1.2673288599077543e-06, + "loss": 0.1285, + "num_input_tokens_seen": 65189992, + "step": 96700 + }, + { + "epoch": 2.362519238756016, + "grad_norm": 43.87201690673828, + "learning_rate": 1.2672466849842742e-06, + "loss": 0.1172, + "num_input_tokens_seen": 65193832, + "step": 96705 + }, + { + "epoch": 2.3626413895878633, + "grad_norm": 54.42050552368164, + "learning_rate": 1.2671645081173044e-06, + "loss": 0.0933, + "num_input_tokens_seen": 65197224, + "step": 96710 + }, + { + "epoch": 2.36276354041971, + "grad_norm": 0.047918014228343964, + "learning_rate": 1.2670823293074423e-06, + "loss": 0.0002, + "num_input_tokens_seen": 65200872, + "step": 96715 + }, + { + "epoch": 2.3628856912515572, + "grad_norm": 0.08623618632555008, + "learning_rate": 1.2670001485552858e-06, + "loss": 0.004, + "num_input_tokens_seen": 65204008, + "step": 96720 + }, + { + "epoch": 2.3630078420834044, + "grad_norm": 0.017091054469347, + "learning_rate": 1.2669179658614327e-06, + "loss": 0.0002, + "num_input_tokens_seen": 65207464, + "step": 96725 + }, + { + "epoch": 2.3631299929152516, + "grad_norm": 0.004988627042621374, + "learning_rate": 1.26683578122648e-06, + "loss": 0.0003, + "num_input_tokens_seen": 65210344, + "step": 96730 + }, + { + "epoch": 2.363252143747099, + "grad_norm": 0.0018072007223963737, + "learning_rate": 1.2667535946510258e-06, + "loss": 0.0606, + "num_input_tokens_seen": 65214056, + "step": 96735 + }, + { + "epoch": 2.363374294578946, + "grad_norm": 0.009117374196648598, + "learning_rate": 1.2666714061356675e-06, + "loss": 0.0378, + "num_input_tokens_seen": 65216936, + "step": 96740 + }, + { + "epoch": 2.363496445410793, + "grad_norm": 0.5432358384132385, + "learning_rate": 1.2665892156810035e-06, + "loss": 0.0013, + "num_input_tokens_seen": 65219816, + "step": 96745 + }, + { + "epoch": 2.3636185962426404, + "grad_norm": 0.029280368238687515, + "learning_rate": 1.2665070232876304e-06, + "loss": 0.0414, + "num_input_tokens_seen": 65223080, + "step": 96750 + }, + { + "epoch": 2.3637407470744876, + "grad_norm": 0.0141510721296072, + "learning_rate": 1.266424828956147e-06, + "loss": 0.0002, + "num_input_tokens_seen": 65226472, + "step": 96755 + }, + { + "epoch": 2.3638628979063347, + "grad_norm": 0.06533598899841309, + "learning_rate": 1.2663426326871505e-06, + "loss": 0.0013, + "num_input_tokens_seen": 65229544, + "step": 96760 + }, + { + "epoch": 2.363985048738182, + "grad_norm": 25.035097122192383, + "learning_rate": 1.2662604344812387e-06, + "loss": 0.0692, + "num_input_tokens_seen": 65233128, + "step": 96765 + }, + { + "epoch": 2.364107199570029, + "grad_norm": 0.02839895896613598, + "learning_rate": 1.2661782343390096e-06, + "loss": 0.0002, + "num_input_tokens_seen": 65236456, + "step": 96770 + }, + { + "epoch": 2.3642293504018763, + "grad_norm": 0.1953248828649521, + "learning_rate": 1.2660960322610605e-06, + "loss": 0.0439, + "num_input_tokens_seen": 65239848, + "step": 96775 + }, + { + "epoch": 2.3643515012337235, + "grad_norm": 1421.1453857421875, + "learning_rate": 1.2660138282479894e-06, + "loss": 0.0145, + "num_input_tokens_seen": 65243176, + "step": 96780 + }, + { + "epoch": 2.3644736520655707, + "grad_norm": 0.013044201768934727, + "learning_rate": 1.2659316223003945e-06, + "loss": 0.0001, + "num_input_tokens_seen": 65247336, + "step": 96785 + }, + { + "epoch": 2.364595802897418, + "grad_norm": 0.005308858584612608, + "learning_rate": 1.2658494144188732e-06, + "loss": 0.0001, + "num_input_tokens_seen": 65250408, + "step": 96790 + }, + { + "epoch": 2.364717953729265, + "grad_norm": 0.006218060851097107, + "learning_rate": 1.2657672046040235e-06, + "loss": 0.0001, + "num_input_tokens_seen": 65253608, + "step": 96795 + }, + { + "epoch": 2.364840104561112, + "grad_norm": 23.112071990966797, + "learning_rate": 1.265684992856443e-06, + "loss": 0.1215, + "num_input_tokens_seen": 65256936, + "step": 96800 + }, + { + "epoch": 2.3649622553929595, + "grad_norm": 0.15702813863754272, + "learning_rate": 1.2656027791767299e-06, + "loss": 0.038, + "num_input_tokens_seen": 65260520, + "step": 96805 + }, + { + "epoch": 2.365084406224806, + "grad_norm": 0.011202758178114891, + "learning_rate": 1.2655205635654819e-06, + "loss": 0.0, + "num_input_tokens_seen": 65263976, + "step": 96810 + }, + { + "epoch": 2.3652065570566534, + "grad_norm": 8.87405776977539, + "learning_rate": 1.2654383460232972e-06, + "loss": 0.0403, + "num_input_tokens_seen": 65267176, + "step": 96815 + }, + { + "epoch": 2.3653287078885006, + "grad_norm": 17.453929901123047, + "learning_rate": 1.265356126550773e-06, + "loss": 0.0907, + "num_input_tokens_seen": 65270504, + "step": 96820 + }, + { + "epoch": 2.3654508587203478, + "grad_norm": 0.015709292143583298, + "learning_rate": 1.2652739051485083e-06, + "loss": 0.0002, + "num_input_tokens_seen": 65273640, + "step": 96825 + }, + { + "epoch": 2.365573009552195, + "grad_norm": 0.05382462218403816, + "learning_rate": 1.2651916818170998e-06, + "loss": 0.0002, + "num_input_tokens_seen": 65276840, + "step": 96830 + }, + { + "epoch": 2.365695160384042, + "grad_norm": 0.015843816101551056, + "learning_rate": 1.2651094565571465e-06, + "loss": 0.0513, + "num_input_tokens_seen": 65280040, + "step": 96835 + }, + { + "epoch": 2.3658173112158893, + "grad_norm": 0.004987873136997223, + "learning_rate": 1.2650272293692457e-06, + "loss": 0.049, + "num_input_tokens_seen": 65283304, + "step": 96840 + }, + { + "epoch": 2.3659394620477365, + "grad_norm": 0.30553194880485535, + "learning_rate": 1.2649450002539957e-06, + "loss": 0.044, + "num_input_tokens_seen": 65286824, + "step": 96845 + }, + { + "epoch": 2.3660616128795837, + "grad_norm": 0.05274348706007004, + "learning_rate": 1.2648627692119942e-06, + "loss": 0.0003, + "num_input_tokens_seen": 65290024, + "step": 96850 + }, + { + "epoch": 2.366183763711431, + "grad_norm": 0.018941234797239304, + "learning_rate": 1.2647805362438395e-06, + "loss": 0.1146, + "num_input_tokens_seen": 65293096, + "step": 96855 + }, + { + "epoch": 2.366305914543278, + "grad_norm": 0.05804905667901039, + "learning_rate": 1.2646983013501298e-06, + "loss": 0.0003, + "num_input_tokens_seen": 65296744, + "step": 96860 + }, + { + "epoch": 2.3664280653751253, + "grad_norm": 59.49602508544922, + "learning_rate": 1.2646160645314623e-06, + "loss": 0.0361, + "num_input_tokens_seen": 65299816, + "step": 96865 + }, + { + "epoch": 2.3665502162069725, + "grad_norm": 0.015892786905169487, + "learning_rate": 1.264533825788436e-06, + "loss": 0.0456, + "num_input_tokens_seen": 65302952, + "step": 96870 + }, + { + "epoch": 2.3666723670388197, + "grad_norm": 0.01757962629199028, + "learning_rate": 1.2644515851216487e-06, + "loss": 0.08, + "num_input_tokens_seen": 65306664, + "step": 96875 + }, + { + "epoch": 2.366794517870667, + "grad_norm": 0.08031915873289108, + "learning_rate": 1.2643693425316981e-06, + "loss": 0.0327, + "num_input_tokens_seen": 65309736, + "step": 96880 + }, + { + "epoch": 2.366916668702514, + "grad_norm": 0.025443589314818382, + "learning_rate": 1.2642870980191827e-06, + "loss": 0.0004, + "num_input_tokens_seen": 65312744, + "step": 96885 + }, + { + "epoch": 2.3670388195343612, + "grad_norm": 0.0021331454627215862, + "learning_rate": 1.2642048515847003e-06, + "loss": 0.0504, + "num_input_tokens_seen": 65315752, + "step": 96890 + }, + { + "epoch": 2.367160970366208, + "grad_norm": 0.1670653522014618, + "learning_rate": 1.264122603228849e-06, + "loss": 0.0005, + "num_input_tokens_seen": 65319528, + "step": 96895 + }, + { + "epoch": 2.367283121198055, + "grad_norm": 0.010267873294651508, + "learning_rate": 1.2640403529522272e-06, + "loss": 0.0331, + "num_input_tokens_seen": 65322536, + "step": 96900 + }, + { + "epoch": 2.3674052720299024, + "grad_norm": 0.3293243944644928, + "learning_rate": 1.263958100755433e-06, + "loss": 0.0002, + "num_input_tokens_seen": 65325352, + "step": 96905 + }, + { + "epoch": 2.3675274228617496, + "grad_norm": 0.022090457379817963, + "learning_rate": 1.2638758466390647e-06, + "loss": 0.0001, + "num_input_tokens_seen": 65328808, + "step": 96910 + }, + { + "epoch": 2.3676495736935967, + "grad_norm": 0.003714184043928981, + "learning_rate": 1.2637935906037199e-06, + "loss": 0.0001, + "num_input_tokens_seen": 65332264, + "step": 96915 + }, + { + "epoch": 2.367771724525444, + "grad_norm": 0.03657980635762215, + "learning_rate": 1.2637113326499973e-06, + "loss": 0.0002, + "num_input_tokens_seen": 65335656, + "step": 96920 + }, + { + "epoch": 2.367893875357291, + "grad_norm": 0.023362930864095688, + "learning_rate": 1.2636290727784951e-06, + "loss": 0.0001, + "num_input_tokens_seen": 65338792, + "step": 96925 + }, + { + "epoch": 2.3680160261891383, + "grad_norm": 0.018264321610331535, + "learning_rate": 1.2635468109898112e-06, + "loss": 0.0439, + "num_input_tokens_seen": 65342184, + "step": 96930 + }, + { + "epoch": 2.3681381770209855, + "grad_norm": 0.5096086263656616, + "learning_rate": 1.263464547284544e-06, + "loss": 0.0003, + "num_input_tokens_seen": 65345064, + "step": 96935 + }, + { + "epoch": 2.3682603278528327, + "grad_norm": 0.009335456416010857, + "learning_rate": 1.263382281663292e-06, + "loss": 0.0641, + "num_input_tokens_seen": 65348136, + "step": 96940 + }, + { + "epoch": 2.36838247868468, + "grad_norm": 0.058331821113824844, + "learning_rate": 1.263300014126653e-06, + "loss": 0.0008, + "num_input_tokens_seen": 65351464, + "step": 96945 + }, + { + "epoch": 2.368504629516527, + "grad_norm": 0.010250688530504704, + "learning_rate": 1.2632177446752255e-06, + "loss": 0.1049, + "num_input_tokens_seen": 65355176, + "step": 96950 + }, + { + "epoch": 2.3686267803483743, + "grad_norm": 0.014257263392210007, + "learning_rate": 1.2631354733096075e-06, + "loss": 0.0001, + "num_input_tokens_seen": 65359080, + "step": 96955 + }, + { + "epoch": 2.3687489311802215, + "grad_norm": 0.0008305838564410806, + "learning_rate": 1.2630532000303978e-06, + "loss": 0.0001, + "num_input_tokens_seen": 65362536, + "step": 96960 + }, + { + "epoch": 2.3688710820120686, + "grad_norm": 102.84041595458984, + "learning_rate": 1.2629709248381946e-06, + "loss": 0.1348, + "num_input_tokens_seen": 65366312, + "step": 96965 + }, + { + "epoch": 2.368993232843916, + "grad_norm": 0.0019097891636192799, + "learning_rate": 1.2628886477335958e-06, + "loss": 0.0415, + "num_input_tokens_seen": 65369128, + "step": 96970 + }, + { + "epoch": 2.369115383675763, + "grad_norm": 0.024086622521281242, + "learning_rate": 1.2628063687172004e-06, + "loss": 0.0003, + "num_input_tokens_seen": 65372584, + "step": 96975 + }, + { + "epoch": 2.3692375345076098, + "grad_norm": 3.4141321182250977, + "learning_rate": 1.2627240877896063e-06, + "loss": 0.0008, + "num_input_tokens_seen": 65375272, + "step": 96980 + }, + { + "epoch": 2.3693596853394574, + "grad_norm": 20.49992561340332, + "learning_rate": 1.2626418049514118e-06, + "loss": 0.0454, + "num_input_tokens_seen": 65378536, + "step": 96985 + }, + { + "epoch": 2.369481836171304, + "grad_norm": 0.021800873801112175, + "learning_rate": 1.2625595202032156e-06, + "loss": 0.0002, + "num_input_tokens_seen": 65381736, + "step": 96990 + }, + { + "epoch": 2.3696039870031513, + "grad_norm": 0.024180782958865166, + "learning_rate": 1.262477233545616e-06, + "loss": 0.0004, + "num_input_tokens_seen": 65384872, + "step": 96995 + }, + { + "epoch": 2.3697261378349985, + "grad_norm": 0.10953272134065628, + "learning_rate": 1.2623949449792112e-06, + "loss": 0.0601, + "num_input_tokens_seen": 65388008, + "step": 97000 + }, + { + "epoch": 2.3698482886668457, + "grad_norm": 0.11298985034227371, + "learning_rate": 1.2623126545045999e-06, + "loss": 0.0001, + "num_input_tokens_seen": 65391528, + "step": 97005 + }, + { + "epoch": 2.369970439498693, + "grad_norm": 0.08407048881053925, + "learning_rate": 1.2622303621223804e-06, + "loss": 0.0024, + "num_input_tokens_seen": 65394664, + "step": 97010 + }, + { + "epoch": 2.37009259033054, + "grad_norm": 0.018757490441203117, + "learning_rate": 1.2621480678331513e-06, + "loss": 0.0382, + "num_input_tokens_seen": 65397928, + "step": 97015 + }, + { + "epoch": 2.3702147411623873, + "grad_norm": 0.12693895399570465, + "learning_rate": 1.2620657716375104e-06, + "loss": 0.0002, + "num_input_tokens_seen": 65401192, + "step": 97020 + }, + { + "epoch": 2.3703368919942345, + "grad_norm": 30.074405670166016, + "learning_rate": 1.2619834735360573e-06, + "loss": 0.1228, + "num_input_tokens_seen": 65404136, + "step": 97025 + }, + { + "epoch": 2.3704590428260817, + "grad_norm": 0.007254903670400381, + "learning_rate": 1.2619011735293897e-06, + "loss": 0.068, + "num_input_tokens_seen": 65407720, + "step": 97030 + }, + { + "epoch": 2.370581193657929, + "grad_norm": 0.017863744869828224, + "learning_rate": 1.2618188716181065e-06, + "loss": 0.0719, + "num_input_tokens_seen": 65411048, + "step": 97035 + }, + { + "epoch": 2.370703344489776, + "grad_norm": 0.032780423760414124, + "learning_rate": 1.261736567802806e-06, + "loss": 0.0726, + "num_input_tokens_seen": 65414056, + "step": 97040 + }, + { + "epoch": 2.3708254953216232, + "grad_norm": 0.1686081439256668, + "learning_rate": 1.2616542620840867e-06, + "loss": 0.049, + "num_input_tokens_seen": 65417512, + "step": 97045 + }, + { + "epoch": 2.3709476461534704, + "grad_norm": 0.36529478430747986, + "learning_rate": 1.261571954462547e-06, + "loss": 0.0768, + "num_input_tokens_seen": 65420840, + "step": 97050 + }, + { + "epoch": 2.3710697969853176, + "grad_norm": 0.007805853616446257, + "learning_rate": 1.261489644938786e-06, + "loss": 0.0577, + "num_input_tokens_seen": 65423720, + "step": 97055 + }, + { + "epoch": 2.371191947817165, + "grad_norm": 0.06817847490310669, + "learning_rate": 1.2614073335134018e-06, + "loss": 0.0003, + "num_input_tokens_seen": 65426792, + "step": 97060 + }, + { + "epoch": 2.371314098649012, + "grad_norm": 0.009680322371423244, + "learning_rate": 1.2613250201869931e-06, + "loss": 0.0704, + "num_input_tokens_seen": 65430312, + "step": 97065 + }, + { + "epoch": 2.371436249480859, + "grad_norm": 0.02266172133386135, + "learning_rate": 1.2612427049601589e-06, + "loss": 0.0835, + "num_input_tokens_seen": 65433704, + "step": 97070 + }, + { + "epoch": 2.371558400312706, + "grad_norm": 0.04810142144560814, + "learning_rate": 1.261160387833497e-06, + "loss": 0.0434, + "num_input_tokens_seen": 65436840, + "step": 97075 + }, + { + "epoch": 2.371680551144553, + "grad_norm": 0.018566487357020378, + "learning_rate": 1.261078068807607e-06, + "loss": 0.0316, + "num_input_tokens_seen": 65439656, + "step": 97080 + }, + { + "epoch": 2.3718027019764003, + "grad_norm": 0.09787236154079437, + "learning_rate": 1.260995747883087e-06, + "loss": 0.0409, + "num_input_tokens_seen": 65442600, + "step": 97085 + }, + { + "epoch": 2.3719248528082475, + "grad_norm": 0.02488056570291519, + "learning_rate": 1.2609134250605355e-06, + "loss": 0.0391, + "num_input_tokens_seen": 65445928, + "step": 97090 + }, + { + "epoch": 2.3720470036400947, + "grad_norm": 0.17376767098903656, + "learning_rate": 1.2608311003405513e-06, + "loss": 0.0358, + "num_input_tokens_seen": 65449192, + "step": 97095 + }, + { + "epoch": 2.372169154471942, + "grad_norm": 0.14807315170764923, + "learning_rate": 1.2607487737237334e-06, + "loss": 0.0344, + "num_input_tokens_seen": 65452520, + "step": 97100 + }, + { + "epoch": 2.372291305303789, + "grad_norm": 0.0272610392421484, + "learning_rate": 1.2606664452106804e-06, + "loss": 0.0459, + "num_input_tokens_seen": 65455848, + "step": 97105 + }, + { + "epoch": 2.3724134561356363, + "grad_norm": 0.15240778028964996, + "learning_rate": 1.2605841148019907e-06, + "loss": 0.0682, + "num_input_tokens_seen": 65459240, + "step": 97110 + }, + { + "epoch": 2.3725356069674834, + "grad_norm": 26.267230987548828, + "learning_rate": 1.260501782498263e-06, + "loss": 0.0521, + "num_input_tokens_seen": 65462888, + "step": 97115 + }, + { + "epoch": 2.3726577577993306, + "grad_norm": 0.36245620250701904, + "learning_rate": 1.2604194483000966e-06, + "loss": 0.038, + "num_input_tokens_seen": 65465960, + "step": 97120 + }, + { + "epoch": 2.372779908631178, + "grad_norm": 0.4815026521682739, + "learning_rate": 1.2603371122080901e-06, + "loss": 0.0486, + "num_input_tokens_seen": 65469224, + "step": 97125 + }, + { + "epoch": 2.372902059463025, + "grad_norm": 2.3331916332244873, + "learning_rate": 1.2602547742228417e-06, + "loss": 0.001, + "num_input_tokens_seen": 65472296, + "step": 97130 + }, + { + "epoch": 2.373024210294872, + "grad_norm": 52.48902893066406, + "learning_rate": 1.260172434344951e-06, + "loss": 0.0912, + "num_input_tokens_seen": 65475944, + "step": 97135 + }, + { + "epoch": 2.3731463611267194, + "grad_norm": 13.921010971069336, + "learning_rate": 1.260090092575016e-06, + "loss": 0.0719, + "num_input_tokens_seen": 65479464, + "step": 97140 + }, + { + "epoch": 2.3732685119585666, + "grad_norm": 0.18770751357078552, + "learning_rate": 1.260007748913636e-06, + "loss": 0.0739, + "num_input_tokens_seen": 65482664, + "step": 97145 + }, + { + "epoch": 2.3733906627904138, + "grad_norm": 0.022564081475138664, + "learning_rate": 1.2599254033614098e-06, + "loss": 0.0003, + "num_input_tokens_seen": 65485992, + "step": 97150 + }, + { + "epoch": 2.373512813622261, + "grad_norm": 43.48125457763672, + "learning_rate": 1.259843055918936e-06, + "loss": 0.0751, + "num_input_tokens_seen": 65489512, + "step": 97155 + }, + { + "epoch": 2.3736349644541077, + "grad_norm": 0.06907090544700623, + "learning_rate": 1.2597607065868138e-06, + "loss": 0.0396, + "num_input_tokens_seen": 65492712, + "step": 97160 + }, + { + "epoch": 2.3737571152859553, + "grad_norm": 13.063774108886719, + "learning_rate": 1.2596783553656418e-06, + "loss": 0.0246, + "num_input_tokens_seen": 65495720, + "step": 97165 + }, + { + "epoch": 2.373879266117802, + "grad_norm": 26.079654693603516, + "learning_rate": 1.259596002256019e-06, + "loss": 0.0728, + "num_input_tokens_seen": 65498664, + "step": 97170 + }, + { + "epoch": 2.3740014169496493, + "grad_norm": 0.0327020138502121, + "learning_rate": 1.259513647258544e-06, + "loss": 0.0002, + "num_input_tokens_seen": 65502120, + "step": 97175 + }, + { + "epoch": 2.3741235677814965, + "grad_norm": 0.2324591726064682, + "learning_rate": 1.2594312903738161e-06, + "loss": 0.0014, + "num_input_tokens_seen": 65505512, + "step": 97180 + }, + { + "epoch": 2.3742457186133437, + "grad_norm": 0.06149037554860115, + "learning_rate": 1.259348931602434e-06, + "loss": 0.0003, + "num_input_tokens_seen": 65508840, + "step": 97185 + }, + { + "epoch": 2.374367869445191, + "grad_norm": 47.23630905151367, + "learning_rate": 1.2592665709449972e-06, + "loss": 0.0836, + "num_input_tokens_seen": 65512040, + "step": 97190 + }, + { + "epoch": 2.374490020277038, + "grad_norm": 0.10036757588386536, + "learning_rate": 1.2591842084021037e-06, + "loss": 0.0004, + "num_input_tokens_seen": 65515944, + "step": 97195 + }, + { + "epoch": 2.3746121711088852, + "grad_norm": 0.12383761256933212, + "learning_rate": 1.259101843974353e-06, + "loss": 0.0224, + "num_input_tokens_seen": 65519336, + "step": 97200 + }, + { + "epoch": 2.3747343219407324, + "grad_norm": 21.063034057617188, + "learning_rate": 1.259019477662344e-06, + "loss": 0.1061, + "num_input_tokens_seen": 65522408, + "step": 97205 + }, + { + "epoch": 2.3748564727725796, + "grad_norm": 0.007072287146002054, + "learning_rate": 1.2589371094666757e-06, + "loss": 0.0096, + "num_input_tokens_seen": 65526248, + "step": 97210 + }, + { + "epoch": 2.374978623604427, + "grad_norm": 1.5783196687698364, + "learning_rate": 1.2588547393879472e-06, + "loss": 0.0008, + "num_input_tokens_seen": 65529896, + "step": 97215 + }, + { + "epoch": 2.375100774436274, + "grad_norm": 0.046201128512620926, + "learning_rate": 1.2587723674267572e-06, + "loss": 0.0001, + "num_input_tokens_seen": 65533736, + "step": 97220 + }, + { + "epoch": 2.375222925268121, + "grad_norm": 0.006796675268560648, + "learning_rate": 1.258689993583705e-06, + "loss": 0.0004, + "num_input_tokens_seen": 65537128, + "step": 97225 + }, + { + "epoch": 2.3753450760999684, + "grad_norm": 18.510995864868164, + "learning_rate": 1.2586076178593896e-06, + "loss": 0.0525, + "num_input_tokens_seen": 65540200, + "step": 97230 + }, + { + "epoch": 2.3754672269318156, + "grad_norm": 0.011776096187531948, + "learning_rate": 1.2585252402544101e-06, + "loss": 0.1004, + "num_input_tokens_seen": 65543080, + "step": 97235 + }, + { + "epoch": 2.3755893777636627, + "grad_norm": 0.2864418625831604, + "learning_rate": 1.2584428607693655e-06, + "loss": 0.001, + "num_input_tokens_seen": 65546728, + "step": 97240 + }, + { + "epoch": 2.3757115285955095, + "grad_norm": 0.002647153800353408, + "learning_rate": 1.258360479404855e-06, + "loss": 0.0003, + "num_input_tokens_seen": 65549864, + "step": 97245 + }, + { + "epoch": 2.375833679427357, + "grad_norm": 0.05344594642519951, + "learning_rate": 1.2582780961614776e-06, + "loss": 0.0002, + "num_input_tokens_seen": 65553192, + "step": 97250 + }, + { + "epoch": 2.375955830259204, + "grad_norm": 0.008940442465245724, + "learning_rate": 1.2581957110398322e-06, + "loss": 0.069, + "num_input_tokens_seen": 65556584, + "step": 97255 + }, + { + "epoch": 2.376077981091051, + "grad_norm": 0.0033488385379314423, + "learning_rate": 1.2581133240405184e-06, + "loss": 0.0304, + "num_input_tokens_seen": 65560168, + "step": 97260 + }, + { + "epoch": 2.3762001319228983, + "grad_norm": 0.02567724883556366, + "learning_rate": 1.258030935164135e-06, + "loss": 0.0003, + "num_input_tokens_seen": 65563432, + "step": 97265 + }, + { + "epoch": 2.3763222827547454, + "grad_norm": 0.06815133988857269, + "learning_rate": 1.257948544411281e-06, + "loss": 0.0467, + "num_input_tokens_seen": 65567080, + "step": 97270 + }, + { + "epoch": 2.3764444335865926, + "grad_norm": 0.03765109181404114, + "learning_rate": 1.257866151782556e-06, + "loss": 0.1122, + "num_input_tokens_seen": 65570728, + "step": 97275 + }, + { + "epoch": 2.37656658441844, + "grad_norm": 0.008491848595440388, + "learning_rate": 1.257783757278559e-06, + "loss": 0.0001, + "num_input_tokens_seen": 65574056, + "step": 97280 + }, + { + "epoch": 2.376688735250287, + "grad_norm": 0.12625138461589813, + "learning_rate": 1.2577013608998892e-06, + "loss": 0.0002, + "num_input_tokens_seen": 65577512, + "step": 97285 + }, + { + "epoch": 2.376810886082134, + "grad_norm": 0.005301265046000481, + "learning_rate": 1.2576189626471459e-06, + "loss": 0.0001, + "num_input_tokens_seen": 65580520, + "step": 97290 + }, + { + "epoch": 2.3769330369139814, + "grad_norm": 18.142616271972656, + "learning_rate": 1.257536562520928e-06, + "loss": 0.0573, + "num_input_tokens_seen": 65583720, + "step": 97295 + }, + { + "epoch": 2.3770551877458286, + "grad_norm": 0.047586724162101746, + "learning_rate": 1.257454160521835e-06, + "loss": 0.0002, + "num_input_tokens_seen": 65586664, + "step": 97300 + }, + { + "epoch": 2.3771773385776758, + "grad_norm": 0.2373700588941574, + "learning_rate": 1.257371756650466e-06, + "loss": 0.0003, + "num_input_tokens_seen": 65589672, + "step": 97305 + }, + { + "epoch": 2.377299489409523, + "grad_norm": 0.7016290426254272, + "learning_rate": 1.2572893509074206e-06, + "loss": 0.0648, + "num_input_tokens_seen": 65592936, + "step": 97310 + }, + { + "epoch": 2.37742164024137, + "grad_norm": 0.010557304136455059, + "learning_rate": 1.2572069432932978e-06, + "loss": 0.0001, + "num_input_tokens_seen": 65596392, + "step": 97315 + }, + { + "epoch": 2.3775437910732173, + "grad_norm": 0.04777985066175461, + "learning_rate": 1.2571245338086966e-06, + "loss": 0.1605, + "num_input_tokens_seen": 65599784, + "step": 97320 + }, + { + "epoch": 2.3776659419050645, + "grad_norm": 0.004487090278416872, + "learning_rate": 1.2570421224542169e-06, + "loss": 0.0001, + "num_input_tokens_seen": 65602856, + "step": 97325 + }, + { + "epoch": 2.3777880927369117, + "grad_norm": 0.06862083077430725, + "learning_rate": 1.2569597092304576e-06, + "loss": 0.0008, + "num_input_tokens_seen": 65606504, + "step": 97330 + }, + { + "epoch": 2.377910243568759, + "grad_norm": 0.05558573827147484, + "learning_rate": 1.2568772941380183e-06, + "loss": 0.0003, + "num_input_tokens_seen": 65609768, + "step": 97335 + }, + { + "epoch": 2.3780323944006057, + "grad_norm": 0.06246393918991089, + "learning_rate": 1.2567948771774984e-06, + "loss": 0.0022, + "num_input_tokens_seen": 65613480, + "step": 97340 + }, + { + "epoch": 2.378154545232453, + "grad_norm": 19.32091522216797, + "learning_rate": 1.256712458349497e-06, + "loss": 0.2043, + "num_input_tokens_seen": 65617128, + "step": 97345 + }, + { + "epoch": 2.3782766960643, + "grad_norm": 0.12600833177566528, + "learning_rate": 1.2566300376546135e-06, + "loss": 0.0008, + "num_input_tokens_seen": 65620136, + "step": 97350 + }, + { + "epoch": 2.3783988468961472, + "grad_norm": 31.323135375976562, + "learning_rate": 1.2565476150934472e-06, + "loss": 0.068, + "num_input_tokens_seen": 65623464, + "step": 97355 + }, + { + "epoch": 2.3785209977279944, + "grad_norm": 0.008119119331240654, + "learning_rate": 1.2564651906665979e-06, + "loss": 0.0003, + "num_input_tokens_seen": 65627176, + "step": 97360 + }, + { + "epoch": 2.3786431485598416, + "grad_norm": 0.08318277448415756, + "learning_rate": 1.2563827643746644e-06, + "loss": 0.0387, + "num_input_tokens_seen": 65632552, + "step": 97365 + }, + { + "epoch": 2.378765299391689, + "grad_norm": 0.00928101222962141, + "learning_rate": 1.2563003362182466e-06, + "loss": 0.1144, + "num_input_tokens_seen": 65635880, + "step": 97370 + }, + { + "epoch": 2.378887450223536, + "grad_norm": 0.005901527125388384, + "learning_rate": 1.256217906197944e-06, + "loss": 0.0004, + "num_input_tokens_seen": 65639208, + "step": 97375 + }, + { + "epoch": 2.379009601055383, + "grad_norm": 88.21249389648438, + "learning_rate": 1.2561354743143558e-06, + "loss": 0.1245, + "num_input_tokens_seen": 65642344, + "step": 97380 + }, + { + "epoch": 2.3791317518872304, + "grad_norm": 0.2763185203075409, + "learning_rate": 1.2560530405680813e-06, + "loss": 0.0055, + "num_input_tokens_seen": 65645608, + "step": 97385 + }, + { + "epoch": 2.3792539027190776, + "grad_norm": 0.38322851061820984, + "learning_rate": 1.2559706049597205e-06, + "loss": 0.0488, + "num_input_tokens_seen": 65649064, + "step": 97390 + }, + { + "epoch": 2.3793760535509247, + "grad_norm": 0.07286795973777771, + "learning_rate": 1.2558881674898727e-06, + "loss": 0.0002, + "num_input_tokens_seen": 65651944, + "step": 97395 + }, + { + "epoch": 2.379498204382772, + "grad_norm": 0.026501847431063652, + "learning_rate": 1.2558057281591373e-06, + "loss": 0.0002, + "num_input_tokens_seen": 65655336, + "step": 97400 + }, + { + "epoch": 2.379620355214619, + "grad_norm": 3.2439448833465576, + "learning_rate": 1.2557232869681136e-06, + "loss": 0.0006, + "num_input_tokens_seen": 65658536, + "step": 97405 + }, + { + "epoch": 2.3797425060464663, + "grad_norm": 0.4015377461910248, + "learning_rate": 1.2556408439174016e-06, + "loss": 0.0114, + "num_input_tokens_seen": 65661736, + "step": 97410 + }, + { + "epoch": 2.3798646568783135, + "grad_norm": 0.0072961426340043545, + "learning_rate": 1.2555583990076005e-06, + "loss": 0.0002, + "num_input_tokens_seen": 65665064, + "step": 97415 + }, + { + "epoch": 2.3799868077101607, + "grad_norm": 0.00903349183499813, + "learning_rate": 1.25547595223931e-06, + "loss": 0.0342, + "num_input_tokens_seen": 65668648, + "step": 97420 + }, + { + "epoch": 2.3801089585420074, + "grad_norm": 0.011133073829114437, + "learning_rate": 1.2553935036131294e-06, + "loss": 0.0004, + "num_input_tokens_seen": 65671656, + "step": 97425 + }, + { + "epoch": 2.380231109373855, + "grad_norm": 0.004269387573003769, + "learning_rate": 1.2553110531296588e-06, + "loss": 0.0378, + "num_input_tokens_seen": 65674920, + "step": 97430 + }, + { + "epoch": 2.380353260205702, + "grad_norm": 19.349868774414062, + "learning_rate": 1.2552286007894974e-06, + "loss": 0.0491, + "num_input_tokens_seen": 65678632, + "step": 97435 + }, + { + "epoch": 2.380475411037549, + "grad_norm": 107.51361083984375, + "learning_rate": 1.2551461465932453e-06, + "loss": 0.2491, + "num_input_tokens_seen": 65681768, + "step": 97440 + }, + { + "epoch": 2.380597561869396, + "grad_norm": 0.22212591767311096, + "learning_rate": 1.2550636905415014e-06, + "loss": 0.0065, + "num_input_tokens_seen": 65685352, + "step": 97445 + }, + { + "epoch": 2.3807197127012434, + "grad_norm": 0.5093609094619751, + "learning_rate": 1.2549812326348662e-06, + "loss": 0.0148, + "num_input_tokens_seen": 65688488, + "step": 97450 + }, + { + "epoch": 2.3808418635330906, + "grad_norm": 0.23147644102573395, + "learning_rate": 1.2548987728739386e-06, + "loss": 0.0004, + "num_input_tokens_seen": 65692008, + "step": 97455 + }, + { + "epoch": 2.3809640143649378, + "grad_norm": 24.48149871826172, + "learning_rate": 1.2548163112593187e-06, + "loss": 0.095, + "num_input_tokens_seen": 65695464, + "step": 97460 + }, + { + "epoch": 2.381086165196785, + "grad_norm": 0.06188417226076126, + "learning_rate": 1.2547338477916058e-06, + "loss": 0.0882, + "num_input_tokens_seen": 65698728, + "step": 97465 + }, + { + "epoch": 2.381208316028632, + "grad_norm": 0.04154938459396362, + "learning_rate": 1.2546513824714e-06, + "loss": 0.0001, + "num_input_tokens_seen": 65701928, + "step": 97470 + }, + { + "epoch": 2.3813304668604793, + "grad_norm": 0.083378367125988, + "learning_rate": 1.2545689152993008e-06, + "loss": 0.0002, + "num_input_tokens_seen": 65705320, + "step": 97475 + }, + { + "epoch": 2.3814526176923265, + "grad_norm": 0.017329366877675056, + "learning_rate": 1.2544864462759083e-06, + "loss": 0.0001, + "num_input_tokens_seen": 65708904, + "step": 97480 + }, + { + "epoch": 2.3815747685241737, + "grad_norm": 150.7119903564453, + "learning_rate": 1.2544039754018213e-06, + "loss": 0.0393, + "num_input_tokens_seen": 65712296, + "step": 97485 + }, + { + "epoch": 2.381696919356021, + "grad_norm": 3.2090935707092285, + "learning_rate": 1.2543215026776406e-06, + "loss": 0.0427, + "num_input_tokens_seen": 65715880, + "step": 97490 + }, + { + "epoch": 2.381819070187868, + "grad_norm": 0.0032223211601376534, + "learning_rate": 1.2542390281039654e-06, + "loss": 0.0008, + "num_input_tokens_seen": 65719528, + "step": 97495 + }, + { + "epoch": 2.3819412210197153, + "grad_norm": 64.20941162109375, + "learning_rate": 1.254156551681396e-06, + "loss": 0.04, + "num_input_tokens_seen": 65722920, + "step": 97500 + }, + { + "epoch": 2.3820633718515625, + "grad_norm": 0.18008512258529663, + "learning_rate": 1.2540740734105313e-06, + "loss": 0.0423, + "num_input_tokens_seen": 65726376, + "step": 97505 + }, + { + "epoch": 2.3821855226834097, + "grad_norm": 0.01593446172773838, + "learning_rate": 1.2539915932919717e-06, + "loss": 0.0005, + "num_input_tokens_seen": 65729704, + "step": 97510 + }, + { + "epoch": 2.382307673515257, + "grad_norm": 44.185508728027344, + "learning_rate": 1.2539091113263172e-06, + "loss": 0.0778, + "num_input_tokens_seen": 65733160, + "step": 97515 + }, + { + "epoch": 2.3824298243471036, + "grad_norm": 0.08736875653266907, + "learning_rate": 1.2538266275141667e-06, + "loss": 0.0717, + "num_input_tokens_seen": 65736616, + "step": 97520 + }, + { + "epoch": 2.382551975178951, + "grad_norm": 0.12912006676197052, + "learning_rate": 1.2537441418561213e-06, + "loss": 0.0003, + "num_input_tokens_seen": 65739752, + "step": 97525 + }, + { + "epoch": 2.382674126010798, + "grad_norm": 0.06348052620887756, + "learning_rate": 1.25366165435278e-06, + "loss": 0.0002, + "num_input_tokens_seen": 65743144, + "step": 97530 + }, + { + "epoch": 2.382796276842645, + "grad_norm": 0.16505594551563263, + "learning_rate": 1.2535791650047428e-06, + "loss": 0.0535, + "num_input_tokens_seen": 65746344, + "step": 97535 + }, + { + "epoch": 2.3829184276744924, + "grad_norm": 0.020926062017679214, + "learning_rate": 1.25349667381261e-06, + "loss": 0.0053, + "num_input_tokens_seen": 65750568, + "step": 97540 + }, + { + "epoch": 2.3830405785063395, + "grad_norm": 0.2248907834291458, + "learning_rate": 1.2534141807769811e-06, + "loss": 0.0004, + "num_input_tokens_seen": 65753960, + "step": 97545 + }, + { + "epoch": 2.3831627293381867, + "grad_norm": 0.0069709401577711105, + "learning_rate": 1.253331685898456e-06, + "loss": 0.0001, + "num_input_tokens_seen": 65757160, + "step": 97550 + }, + { + "epoch": 2.383284880170034, + "grad_norm": 0.34722745418548584, + "learning_rate": 1.253249189177635e-06, + "loss": 0.0002, + "num_input_tokens_seen": 65760424, + "step": 97555 + }, + { + "epoch": 2.383407031001881, + "grad_norm": 0.013133928179740906, + "learning_rate": 1.2531666906151177e-06, + "loss": 0.0003, + "num_input_tokens_seen": 65763432, + "step": 97560 + }, + { + "epoch": 2.3835291818337283, + "grad_norm": 0.01156538538634777, + "learning_rate": 1.253084190211504e-06, + "loss": 0.0001, + "num_input_tokens_seen": 65766824, + "step": 97565 + }, + { + "epoch": 2.3836513326655755, + "grad_norm": 0.005557337775826454, + "learning_rate": 1.2530016879673942e-06, + "loss": 0.0003, + "num_input_tokens_seen": 65770024, + "step": 97570 + }, + { + "epoch": 2.3837734834974227, + "grad_norm": 0.03268285468220711, + "learning_rate": 1.252919183883388e-06, + "loss": 0.0373, + "num_input_tokens_seen": 65773544, + "step": 97575 + }, + { + "epoch": 2.38389563432927, + "grad_norm": 37.92142105102539, + "learning_rate": 1.252836677960085e-06, + "loss": 0.0311, + "num_input_tokens_seen": 65777256, + "step": 97580 + }, + { + "epoch": 2.384017785161117, + "grad_norm": 0.02641063742339611, + "learning_rate": 1.2527541701980861e-06, + "loss": 0.0398, + "num_input_tokens_seen": 65780392, + "step": 97585 + }, + { + "epoch": 2.3841399359929643, + "grad_norm": 0.00245059747248888, + "learning_rate": 1.2526716605979909e-06, + "loss": 0.0601, + "num_input_tokens_seen": 65783976, + "step": 97590 + }, + { + "epoch": 2.3842620868248114, + "grad_norm": 0.020249001681804657, + "learning_rate": 1.2525891491603995e-06, + "loss": 0.0002, + "num_input_tokens_seen": 65787304, + "step": 97595 + }, + { + "epoch": 2.3843842376566586, + "grad_norm": 0.02965971827507019, + "learning_rate": 1.2525066358859119e-06, + "loss": 0.1335, + "num_input_tokens_seen": 65791144, + "step": 97600 + }, + { + "epoch": 2.3845063884885054, + "grad_norm": 0.004216925706714392, + "learning_rate": 1.2524241207751278e-06, + "loss": 0.0001, + "num_input_tokens_seen": 65794792, + "step": 97605 + }, + { + "epoch": 2.384628539320353, + "grad_norm": 0.0560830794274807, + "learning_rate": 1.2523416038286478e-06, + "loss": 0.0413, + "num_input_tokens_seen": 65798184, + "step": 97610 + }, + { + "epoch": 2.3847506901521998, + "grad_norm": 0.05167970433831215, + "learning_rate": 1.2522590850470717e-06, + "loss": 0.0003, + "num_input_tokens_seen": 65801512, + "step": 97615 + }, + { + "epoch": 2.384872840984047, + "grad_norm": 0.002556778723374009, + "learning_rate": 1.2521765644309998e-06, + "loss": 0.0397, + "num_input_tokens_seen": 65804520, + "step": 97620 + }, + { + "epoch": 2.384994991815894, + "grad_norm": 0.025669759139418602, + "learning_rate": 1.252094041981032e-06, + "loss": 0.0002, + "num_input_tokens_seen": 65807848, + "step": 97625 + }, + { + "epoch": 2.3851171426477413, + "grad_norm": 30.663978576660156, + "learning_rate": 1.2520115176977686e-06, + "loss": 0.0603, + "num_input_tokens_seen": 65811304, + "step": 97630 + }, + { + "epoch": 2.3852392934795885, + "grad_norm": 339.6863098144531, + "learning_rate": 1.2519289915818096e-06, + "loss": 0.0478, + "num_input_tokens_seen": 65814632, + "step": 97635 + }, + { + "epoch": 2.3853614443114357, + "grad_norm": 129.6995849609375, + "learning_rate": 1.2518464636337552e-06, + "loss": 0.0407, + "num_input_tokens_seen": 65817896, + "step": 97640 + }, + { + "epoch": 2.385483595143283, + "grad_norm": 0.0230780690908432, + "learning_rate": 1.2517639338542056e-06, + "loss": 0.0489, + "num_input_tokens_seen": 65821288, + "step": 97645 + }, + { + "epoch": 2.38560574597513, + "grad_norm": 0.1822880506515503, + "learning_rate": 1.251681402243761e-06, + "loss": 0.0002, + "num_input_tokens_seen": 65824936, + "step": 97650 + }, + { + "epoch": 2.3857278968069773, + "grad_norm": 0.02899126335978508, + "learning_rate": 1.2515988688030217e-06, + "loss": 0.1391, + "num_input_tokens_seen": 65828072, + "step": 97655 + }, + { + "epoch": 2.3858500476388245, + "grad_norm": 488.391845703125, + "learning_rate": 1.2515163335325875e-06, + "loss": 0.0664, + "num_input_tokens_seen": 65831528, + "step": 97660 + }, + { + "epoch": 2.3859721984706717, + "grad_norm": 0.3630223870277405, + "learning_rate": 1.251433796433059e-06, + "loss": 0.0578, + "num_input_tokens_seen": 65834984, + "step": 97665 + }, + { + "epoch": 2.386094349302519, + "grad_norm": 0.00430646538734436, + "learning_rate": 1.2513512575050365e-06, + "loss": 0.0484, + "num_input_tokens_seen": 65838056, + "step": 97670 + }, + { + "epoch": 2.386216500134366, + "grad_norm": 0.08162271231412888, + "learning_rate": 1.2512687167491193e-06, + "loss": 0.0519, + "num_input_tokens_seen": 65841320, + "step": 97675 + }, + { + "epoch": 2.3863386509662132, + "grad_norm": 0.004340833052992821, + "learning_rate": 1.2511861741659092e-06, + "loss": 0.0002, + "num_input_tokens_seen": 65845288, + "step": 97680 + }, + { + "epoch": 2.3864608017980604, + "grad_norm": 0.03369579836726189, + "learning_rate": 1.2511036297560054e-06, + "loss": 0.0007, + "num_input_tokens_seen": 65848680, + "step": 97685 + }, + { + "epoch": 2.386582952629907, + "grad_norm": 0.15540599822998047, + "learning_rate": 1.2510210835200082e-06, + "loss": 0.0004, + "num_input_tokens_seen": 65851944, + "step": 97690 + }, + { + "epoch": 2.386705103461755, + "grad_norm": 0.08113245666027069, + "learning_rate": 1.2509385354585187e-06, + "loss": 0.0586, + "num_input_tokens_seen": 65854952, + "step": 97695 + }, + { + "epoch": 2.3868272542936015, + "grad_norm": 0.09177146852016449, + "learning_rate": 1.2508559855721363e-06, + "loss": 0.0007, + "num_input_tokens_seen": 65858216, + "step": 97700 + }, + { + "epoch": 2.3869494051254487, + "grad_norm": 0.0013687072787433863, + "learning_rate": 1.250773433861462e-06, + "loss": 0.0336, + "num_input_tokens_seen": 65861352, + "step": 97705 + }, + { + "epoch": 2.387071555957296, + "grad_norm": 0.0910479724407196, + "learning_rate": 1.2506908803270954e-06, + "loss": 0.0408, + "num_input_tokens_seen": 65864616, + "step": 97710 + }, + { + "epoch": 2.387193706789143, + "grad_norm": 0.05656975135207176, + "learning_rate": 1.2506083249696374e-06, + "loss": 0.0982, + "num_input_tokens_seen": 65867816, + "step": 97715 + }, + { + "epoch": 2.3873158576209903, + "grad_norm": 0.009938366711139679, + "learning_rate": 1.2505257677896887e-06, + "loss": 0.0005, + "num_input_tokens_seen": 65871272, + "step": 97720 + }, + { + "epoch": 2.3874380084528375, + "grad_norm": 0.011079080402851105, + "learning_rate": 1.250443208787849e-06, + "loss": 0.0002, + "num_input_tokens_seen": 65874728, + "step": 97725 + }, + { + "epoch": 2.3875601592846847, + "grad_norm": 0.027049781754612923, + "learning_rate": 1.2503606479647189e-06, + "loss": 0.0001, + "num_input_tokens_seen": 65877928, + "step": 97730 + }, + { + "epoch": 2.387682310116532, + "grad_norm": 35.81193923950195, + "learning_rate": 1.2502780853208986e-06, + "loss": 0.1042, + "num_input_tokens_seen": 65881768, + "step": 97735 + }, + { + "epoch": 2.387804460948379, + "grad_norm": 93.66885375976562, + "learning_rate": 1.2501955208569887e-06, + "loss": 0.0326, + "num_input_tokens_seen": 65885032, + "step": 97740 + }, + { + "epoch": 2.3879266117802262, + "grad_norm": 0.7266661524772644, + "learning_rate": 1.25011295457359e-06, + "loss": 0.0472, + "num_input_tokens_seen": 65888552, + "step": 97745 + }, + { + "epoch": 2.3880487626120734, + "grad_norm": 0.011747072450816631, + "learning_rate": 1.2500303864713027e-06, + "loss": 0.0001, + "num_input_tokens_seen": 65891560, + "step": 97750 + }, + { + "epoch": 2.3881709134439206, + "grad_norm": 24.473697662353516, + "learning_rate": 1.249947816550727e-06, + "loss": 0.0461, + "num_input_tokens_seen": 65894760, + "step": 97755 + }, + { + "epoch": 2.388293064275768, + "grad_norm": 0.13327303528785706, + "learning_rate": 1.2498652448124634e-06, + "loss": 0.0464, + "num_input_tokens_seen": 65897832, + "step": 97760 + }, + { + "epoch": 2.388415215107615, + "grad_norm": 0.055127665400505066, + "learning_rate": 1.2497826712571126e-06, + "loss": 0.0001, + "num_input_tokens_seen": 65900840, + "step": 97765 + }, + { + "epoch": 2.388537365939462, + "grad_norm": 0.11277513951063156, + "learning_rate": 1.2497000958852753e-06, + "loss": 0.0001, + "num_input_tokens_seen": 65904104, + "step": 97770 + }, + { + "epoch": 2.3886595167713094, + "grad_norm": 0.03402625396847725, + "learning_rate": 1.2496175186975514e-06, + "loss": 0.0938, + "num_input_tokens_seen": 65907560, + "step": 97775 + }, + { + "epoch": 2.3887816676031566, + "grad_norm": 0.017354309558868408, + "learning_rate": 1.249534939694542e-06, + "loss": 0.0753, + "num_input_tokens_seen": 65910760, + "step": 97780 + }, + { + "epoch": 2.3889038184350033, + "grad_norm": 0.1345100700855255, + "learning_rate": 1.2494523588768473e-06, + "loss": 0.0403, + "num_input_tokens_seen": 65914152, + "step": 97785 + }, + { + "epoch": 2.3890259692668505, + "grad_norm": 0.025590751320123672, + "learning_rate": 1.2493697762450681e-06, + "loss": 0.0392, + "num_input_tokens_seen": 65917672, + "step": 97790 + }, + { + "epoch": 2.3891481200986977, + "grad_norm": 0.5286394357681274, + "learning_rate": 1.2492871917998048e-06, + "loss": 0.0004, + "num_input_tokens_seen": 65920808, + "step": 97795 + }, + { + "epoch": 2.389270270930545, + "grad_norm": 112.20993041992188, + "learning_rate": 1.2492046055416576e-06, + "loss": 0.0705, + "num_input_tokens_seen": 65923944, + "step": 97800 + }, + { + "epoch": 2.389392421762392, + "grad_norm": 0.00426692608743906, + "learning_rate": 1.249122017471228e-06, + "loss": 0.0001, + "num_input_tokens_seen": 65927272, + "step": 97805 + }, + { + "epoch": 2.3895145725942393, + "grad_norm": 0.056970562785863876, + "learning_rate": 1.2490394275891159e-06, + "loss": 0.0001, + "num_input_tokens_seen": 65930280, + "step": 97810 + }, + { + "epoch": 2.3896367234260865, + "grad_norm": 0.036483284085989, + "learning_rate": 1.248956835895922e-06, + "loss": 0.0004, + "num_input_tokens_seen": 65933736, + "step": 97815 + }, + { + "epoch": 2.3897588742579337, + "grad_norm": 54.07392120361328, + "learning_rate": 1.2488742423922472e-06, + "loss": 0.1203, + "num_input_tokens_seen": 65937384, + "step": 97820 + }, + { + "epoch": 2.389881025089781, + "grad_norm": 0.02449539117515087, + "learning_rate": 1.2487916470786916e-06, + "loss": 0.0294, + "num_input_tokens_seen": 65941032, + "step": 97825 + }, + { + "epoch": 2.390003175921628, + "grad_norm": 0.036588121205568314, + "learning_rate": 1.2487090499558563e-06, + "loss": 0.0692, + "num_input_tokens_seen": 65945192, + "step": 97830 + }, + { + "epoch": 2.390125326753475, + "grad_norm": 1.6816258430480957, + "learning_rate": 1.248626451024342e-06, + "loss": 0.0003, + "num_input_tokens_seen": 65948008, + "step": 97835 + }, + { + "epoch": 2.3902474775853224, + "grad_norm": 0.0152506772428751, + "learning_rate": 1.2485438502847494e-06, + "loss": 0.0314, + "num_input_tokens_seen": 65951016, + "step": 97840 + }, + { + "epoch": 2.3903696284171696, + "grad_norm": 0.0030578149016946554, + "learning_rate": 1.248461247737679e-06, + "loss": 0.0001, + "num_input_tokens_seen": 65954600, + "step": 97845 + }, + { + "epoch": 2.390491779249017, + "grad_norm": 0.012720318511128426, + "learning_rate": 1.2483786433837319e-06, + "loss": 0.0002, + "num_input_tokens_seen": 65957864, + "step": 97850 + }, + { + "epoch": 2.390613930080864, + "grad_norm": 0.03128203749656677, + "learning_rate": 1.2482960372235082e-06, + "loss": 0.1456, + "num_input_tokens_seen": 65961320, + "step": 97855 + }, + { + "epoch": 2.390736080912711, + "grad_norm": 0.0449058972299099, + "learning_rate": 1.2482134292576088e-06, + "loss": 0.0886, + "num_input_tokens_seen": 65964584, + "step": 97860 + }, + { + "epoch": 2.3908582317445584, + "grad_norm": 0.036220669746398926, + "learning_rate": 1.2481308194866347e-06, + "loss": 0.0002, + "num_input_tokens_seen": 65967656, + "step": 97865 + }, + { + "epoch": 2.390980382576405, + "grad_norm": 0.45427876710891724, + "learning_rate": 1.2480482079111864e-06, + "loss": 0.0497, + "num_input_tokens_seen": 65970728, + "step": 97870 + }, + { + "epoch": 2.3911025334082527, + "grad_norm": 0.7156953811645508, + "learning_rate": 1.2479655945318652e-06, + "loss": 0.0006, + "num_input_tokens_seen": 65974312, + "step": 97875 + }, + { + "epoch": 2.3912246842400995, + "grad_norm": 0.025812193751335144, + "learning_rate": 1.2478829793492712e-06, + "loss": 0.0443, + "num_input_tokens_seen": 65977768, + "step": 97880 + }, + { + "epoch": 2.3913468350719467, + "grad_norm": 0.11862697452306747, + "learning_rate": 1.2478003623640056e-06, + "loss": 0.0368, + "num_input_tokens_seen": 65981096, + "step": 97885 + }, + { + "epoch": 2.391468985903794, + "grad_norm": 0.016623547300696373, + "learning_rate": 1.2477177435766687e-06, + "loss": 0.0628, + "num_input_tokens_seen": 65984488, + "step": 97890 + }, + { + "epoch": 2.391591136735641, + "grad_norm": 0.08937716484069824, + "learning_rate": 1.2476351229878624e-06, + "loss": 0.0001, + "num_input_tokens_seen": 65988008, + "step": 97895 + }, + { + "epoch": 2.3917132875674882, + "grad_norm": 0.01355107594281435, + "learning_rate": 1.2475525005981867e-06, + "loss": 0.0003, + "num_input_tokens_seen": 65991016, + "step": 97900 + }, + { + "epoch": 2.3918354383993354, + "grad_norm": 0.031602438539266586, + "learning_rate": 1.2474698764082423e-06, + "loss": 0.001, + "num_input_tokens_seen": 65994408, + "step": 97905 + }, + { + "epoch": 2.3919575892311826, + "grad_norm": 0.09495049715042114, + "learning_rate": 1.2473872504186306e-06, + "loss": 0.0003, + "num_input_tokens_seen": 65997800, + "step": 97910 + }, + { + "epoch": 2.39207974006303, + "grad_norm": 0.06340105086565018, + "learning_rate": 1.2473046226299523e-06, + "loss": 0.0003, + "num_input_tokens_seen": 66001512, + "step": 97915 + }, + { + "epoch": 2.392201890894877, + "grad_norm": 0.6001046299934387, + "learning_rate": 1.2472219930428086e-06, + "loss": 0.0561, + "num_input_tokens_seen": 66004584, + "step": 97920 + }, + { + "epoch": 2.392324041726724, + "grad_norm": 0.11289553344249725, + "learning_rate": 1.2471393616577995e-06, + "loss": 0.0565, + "num_input_tokens_seen": 66008104, + "step": 97925 + }, + { + "epoch": 2.3924461925585714, + "grad_norm": 0.6755673289299011, + "learning_rate": 1.2470567284755267e-06, + "loss": 0.0005, + "num_input_tokens_seen": 66011496, + "step": 97930 + }, + { + "epoch": 2.3925683433904186, + "grad_norm": 0.03471721336245537, + "learning_rate": 1.246974093496591e-06, + "loss": 0.0514, + "num_input_tokens_seen": 66014824, + "step": 97935 + }, + { + "epoch": 2.3926904942222658, + "grad_norm": 0.019346168264746666, + "learning_rate": 1.2468914567215933e-06, + "loss": 0.0001, + "num_input_tokens_seen": 66018344, + "step": 97940 + }, + { + "epoch": 2.392812645054113, + "grad_norm": 0.0018133146222680807, + "learning_rate": 1.2468088181511345e-06, + "loss": 0.0001, + "num_input_tokens_seen": 66021160, + "step": 97945 + }, + { + "epoch": 2.39293479588596, + "grad_norm": 0.002835739403963089, + "learning_rate": 1.2467261777858156e-06, + "loss": 0.0004, + "num_input_tokens_seen": 66024616, + "step": 97950 + }, + { + "epoch": 2.3930569467178073, + "grad_norm": 0.01715417392551899, + "learning_rate": 1.2466435356262372e-06, + "loss": 0.0239, + "num_input_tokens_seen": 66027944, + "step": 97955 + }, + { + "epoch": 2.3931790975496545, + "grad_norm": 11.423762321472168, + "learning_rate": 1.246560891673001e-06, + "loss": 0.1521, + "num_input_tokens_seen": 66031080, + "step": 97960 + }, + { + "epoch": 2.3933012483815013, + "grad_norm": 0.007263594772666693, + "learning_rate": 1.2464782459267078e-06, + "loss": 0.088, + "num_input_tokens_seen": 66035432, + "step": 97965 + }, + { + "epoch": 2.3934233992133485, + "grad_norm": 0.10601934045553207, + "learning_rate": 1.2463955983879584e-06, + "loss": 0.0542, + "num_input_tokens_seen": 66038440, + "step": 97970 + }, + { + "epoch": 2.3935455500451956, + "grad_norm": 0.0238367710262537, + "learning_rate": 1.2463129490573538e-06, + "loss": 0.0005, + "num_input_tokens_seen": 66041512, + "step": 97975 + }, + { + "epoch": 2.393667700877043, + "grad_norm": 0.04439342021942139, + "learning_rate": 1.2462302979354955e-06, + "loss": 0.0001, + "num_input_tokens_seen": 66044840, + "step": 97980 + }, + { + "epoch": 2.39378985170889, + "grad_norm": 132.5022735595703, + "learning_rate": 1.2461476450229838e-06, + "loss": 0.057, + "num_input_tokens_seen": 66048296, + "step": 97985 + }, + { + "epoch": 2.393912002540737, + "grad_norm": 0.003174891695380211, + "learning_rate": 1.2460649903204204e-06, + "loss": 0.0002, + "num_input_tokens_seen": 66051496, + "step": 97990 + }, + { + "epoch": 2.3940341533725844, + "grad_norm": 0.10849446803331375, + "learning_rate": 1.245982333828406e-06, + "loss": 0.0002, + "num_input_tokens_seen": 66055400, + "step": 97995 + }, + { + "epoch": 2.3941563042044316, + "grad_norm": 25.214004516601562, + "learning_rate": 1.2458996755475424e-06, + "loss": 0.1055, + "num_input_tokens_seen": 66058728, + "step": 98000 + }, + { + "epoch": 2.394278455036279, + "grad_norm": 0.28973275423049927, + "learning_rate": 1.24581701547843e-06, + "loss": 0.0003, + "num_input_tokens_seen": 66061992, + "step": 98005 + }, + { + "epoch": 2.394400605868126, + "grad_norm": 0.06445897370576859, + "learning_rate": 1.24573435362167e-06, + "loss": 0.0459, + "num_input_tokens_seen": 66065704, + "step": 98010 + }, + { + "epoch": 2.394522756699973, + "grad_norm": 0.02486269921064377, + "learning_rate": 1.245651689977864e-06, + "loss": 0.0003, + "num_input_tokens_seen": 66069032, + "step": 98015 + }, + { + "epoch": 2.3946449075318204, + "grad_norm": 0.20119556784629822, + "learning_rate": 1.2455690245476126e-06, + "loss": 0.0468, + "num_input_tokens_seen": 66071912, + "step": 98020 + }, + { + "epoch": 2.3947670583636675, + "grad_norm": 0.03414197266101837, + "learning_rate": 1.2454863573315174e-06, + "loss": 0.0004, + "num_input_tokens_seen": 66075432, + "step": 98025 + }, + { + "epoch": 2.3948892091955147, + "grad_norm": 0.08597314357757568, + "learning_rate": 1.245403688330179e-06, + "loss": 0.0001, + "num_input_tokens_seen": 66078696, + "step": 98030 + }, + { + "epoch": 2.395011360027362, + "grad_norm": 0.6161647439002991, + "learning_rate": 1.2453210175441993e-06, + "loss": 0.1373, + "num_input_tokens_seen": 66082408, + "step": 98035 + }, + { + "epoch": 2.395133510859209, + "grad_norm": 0.04381314292550087, + "learning_rate": 1.245238344974179e-06, + "loss": 0.0438, + "num_input_tokens_seen": 66085416, + "step": 98040 + }, + { + "epoch": 2.3952556616910563, + "grad_norm": 0.03218008577823639, + "learning_rate": 1.2451556706207194e-06, + "loss": 0.0366, + "num_input_tokens_seen": 66088936, + "step": 98045 + }, + { + "epoch": 2.395377812522903, + "grad_norm": 0.002943948609754443, + "learning_rate": 1.245072994484422e-06, + "loss": 0.0444, + "num_input_tokens_seen": 66092648, + "step": 98050 + }, + { + "epoch": 2.3954999633547507, + "grad_norm": 0.1787538379430771, + "learning_rate": 1.2449903165658879e-06, + "loss": 0.0002, + "num_input_tokens_seen": 66096296, + "step": 98055 + }, + { + "epoch": 2.3956221141865974, + "grad_norm": 0.030882876366376877, + "learning_rate": 1.2449076368657184e-06, + "loss": 0.0005, + "num_input_tokens_seen": 66100264, + "step": 98060 + }, + { + "epoch": 2.3957442650184446, + "grad_norm": 36.35651397705078, + "learning_rate": 1.2448249553845146e-06, + "loss": 0.1342, + "num_input_tokens_seen": 66103592, + "step": 98065 + }, + { + "epoch": 2.395866415850292, + "grad_norm": 0.0072379345074296, + "learning_rate": 1.2447422721228777e-06, + "loss": 0.0447, + "num_input_tokens_seen": 66107048, + "step": 98070 + }, + { + "epoch": 2.395988566682139, + "grad_norm": 0.1984623819589615, + "learning_rate": 1.2446595870814096e-06, + "loss": 0.043, + "num_input_tokens_seen": 66110440, + "step": 98075 + }, + { + "epoch": 2.396110717513986, + "grad_norm": 0.0784531831741333, + "learning_rate": 1.2445769002607108e-06, + "loss": 0.0005, + "num_input_tokens_seen": 66113448, + "step": 98080 + }, + { + "epoch": 2.3962328683458334, + "grad_norm": 107.5459213256836, + "learning_rate": 1.244494211661383e-06, + "loss": 0.0386, + "num_input_tokens_seen": 66116776, + "step": 98085 + }, + { + "epoch": 2.3963550191776806, + "grad_norm": 0.011617016047239304, + "learning_rate": 1.2444115212840276e-06, + "loss": 0.0002, + "num_input_tokens_seen": 66119976, + "step": 98090 + }, + { + "epoch": 2.3964771700095278, + "grad_norm": 0.014332075603306293, + "learning_rate": 1.244328829129246e-06, + "loss": 0.1071, + "num_input_tokens_seen": 66123496, + "step": 98095 + }, + { + "epoch": 2.396599320841375, + "grad_norm": 0.0055480338633060455, + "learning_rate": 1.2442461351976395e-06, + "loss": 0.0257, + "num_input_tokens_seen": 66127592, + "step": 98100 + }, + { + "epoch": 2.396721471673222, + "grad_norm": 0.02290935628116131, + "learning_rate": 1.244163439489809e-06, + "loss": 0.0559, + "num_input_tokens_seen": 66131048, + "step": 98105 + }, + { + "epoch": 2.3968436225050693, + "grad_norm": 0.0215010903775692, + "learning_rate": 1.2440807420063565e-06, + "loss": 0.0539, + "num_input_tokens_seen": 66134376, + "step": 98110 + }, + { + "epoch": 2.3969657733369165, + "grad_norm": 0.0604538656771183, + "learning_rate": 1.2439980427478833e-06, + "loss": 0.0912, + "num_input_tokens_seen": 66137448, + "step": 98115 + }, + { + "epoch": 2.3970879241687637, + "grad_norm": 0.04391892999410629, + "learning_rate": 1.2439153417149908e-06, + "loss": 0.0001, + "num_input_tokens_seen": 66141160, + "step": 98120 + }, + { + "epoch": 2.397210075000611, + "grad_norm": 17.283260345458984, + "learning_rate": 1.2438326389082803e-06, + "loss": 0.0504, + "num_input_tokens_seen": 66144424, + "step": 98125 + }, + { + "epoch": 2.397332225832458, + "grad_norm": 0.017710374668240547, + "learning_rate": 1.243749934328353e-06, + "loss": 0.0387, + "num_input_tokens_seen": 66148136, + "step": 98130 + }, + { + "epoch": 2.3974543766643053, + "grad_norm": 37.92856979370117, + "learning_rate": 1.2436672279758108e-06, + "loss": 0.0435, + "num_input_tokens_seen": 66151208, + "step": 98135 + }, + { + "epoch": 2.3975765274961525, + "grad_norm": 34.837039947509766, + "learning_rate": 1.2435845198512547e-06, + "loss": 0.063, + "num_input_tokens_seen": 66154728, + "step": 98140 + }, + { + "epoch": 2.397698678327999, + "grad_norm": 0.16700446605682373, + "learning_rate": 1.2435018099552867e-06, + "loss": 0.0013, + "num_input_tokens_seen": 66157992, + "step": 98145 + }, + { + "epoch": 2.3978208291598464, + "grad_norm": 0.26901018619537354, + "learning_rate": 1.2434190982885082e-06, + "loss": 0.0432, + "num_input_tokens_seen": 66161640, + "step": 98150 + }, + { + "epoch": 2.3979429799916936, + "grad_norm": 0.2465468794107437, + "learning_rate": 1.2433363848515204e-06, + "loss": 0.0003, + "num_input_tokens_seen": 66164840, + "step": 98155 + }, + { + "epoch": 2.398065130823541, + "grad_norm": 0.008369375951588154, + "learning_rate": 1.243253669644925e-06, + "loss": 0.0707, + "num_input_tokens_seen": 66168296, + "step": 98160 + }, + { + "epoch": 2.398187281655388, + "grad_norm": 0.007154883351176977, + "learning_rate": 1.2431709526693234e-06, + "loss": 0.0504, + "num_input_tokens_seen": 66171624, + "step": 98165 + }, + { + "epoch": 2.398309432487235, + "grad_norm": 0.47677215933799744, + "learning_rate": 1.2430882339253172e-06, + "loss": 0.0005, + "num_input_tokens_seen": 66175080, + "step": 98170 + }, + { + "epoch": 2.3984315833190823, + "grad_norm": 0.022384552285075188, + "learning_rate": 1.243005513413508e-06, + "loss": 0.0003, + "num_input_tokens_seen": 66178536, + "step": 98175 + }, + { + "epoch": 2.3985537341509295, + "grad_norm": 0.0022679297253489494, + "learning_rate": 1.2429227911344976e-06, + "loss": 0.0017, + "num_input_tokens_seen": 66181928, + "step": 98180 + }, + { + "epoch": 2.3986758849827767, + "grad_norm": 0.11913125962018967, + "learning_rate": 1.242840067088887e-06, + "loss": 0.0002, + "num_input_tokens_seen": 66185192, + "step": 98185 + }, + { + "epoch": 2.398798035814624, + "grad_norm": 0.15732306241989136, + "learning_rate": 1.2427573412772783e-06, + "loss": 0.0319, + "num_input_tokens_seen": 66188456, + "step": 98190 + }, + { + "epoch": 2.398920186646471, + "grad_norm": 0.07400466501712799, + "learning_rate": 1.2426746137002727e-06, + "loss": 0.0581, + "num_input_tokens_seen": 66191784, + "step": 98195 + }, + { + "epoch": 2.3990423374783183, + "grad_norm": 0.3544338047504425, + "learning_rate": 1.2425918843584721e-06, + "loss": 0.0005, + "num_input_tokens_seen": 66194984, + "step": 98200 + }, + { + "epoch": 2.3991644883101655, + "grad_norm": 0.22692422568798065, + "learning_rate": 1.2425091532524783e-06, + "loss": 0.0467, + "num_input_tokens_seen": 66198120, + "step": 98205 + }, + { + "epoch": 2.3992866391420127, + "grad_norm": 0.007906322367489338, + "learning_rate": 1.2424264203828924e-06, + "loss": 0.0259, + "num_input_tokens_seen": 66201832, + "step": 98210 + }, + { + "epoch": 2.39940878997386, + "grad_norm": 0.015309485606849194, + "learning_rate": 1.2423436857503167e-06, + "loss": 0.0009, + "num_input_tokens_seen": 66205480, + "step": 98215 + }, + { + "epoch": 2.399530940805707, + "grad_norm": 0.14849808812141418, + "learning_rate": 1.2422609493553522e-06, + "loss": 0.0456, + "num_input_tokens_seen": 66208616, + "step": 98220 + }, + { + "epoch": 2.3996530916375542, + "grad_norm": 0.013945171609520912, + "learning_rate": 1.2421782111986013e-06, + "loss": 0.0009, + "num_input_tokens_seen": 66212200, + "step": 98225 + }, + { + "epoch": 2.399775242469401, + "grad_norm": 18.396347045898438, + "learning_rate": 1.2420954712806653e-06, + "loss": 0.0394, + "num_input_tokens_seen": 66215400, + "step": 98230 + }, + { + "epoch": 2.3998973933012486, + "grad_norm": 0.014513997361063957, + "learning_rate": 1.2420127296021454e-06, + "loss": 0.0604, + "num_input_tokens_seen": 66218664, + "step": 98235 + }, + { + "epoch": 2.4000195441330954, + "grad_norm": 0.3597651422023773, + "learning_rate": 1.241929986163644e-06, + "loss": 0.0012, + "num_input_tokens_seen": 66221672, + "step": 98240 + }, + { + "epoch": 2.4001416949649426, + "grad_norm": 0.2162315994501114, + "learning_rate": 1.241847240965763e-06, + "loss": 0.0004, + "num_input_tokens_seen": 66225064, + "step": 98245 + }, + { + "epoch": 2.4002638457967898, + "grad_norm": 0.2530384659767151, + "learning_rate": 1.2417644940091036e-06, + "loss": 0.0325, + "num_input_tokens_seen": 66228200, + "step": 98250 + }, + { + "epoch": 2.400385996628637, + "grad_norm": 0.8703799247741699, + "learning_rate": 1.2416817452942678e-06, + "loss": 0.0371, + "num_input_tokens_seen": 66231464, + "step": 98255 + }, + { + "epoch": 2.400508147460484, + "grad_norm": 0.13068102300167084, + "learning_rate": 1.2415989948218575e-06, + "loss": 0.0214, + "num_input_tokens_seen": 66234984, + "step": 98260 + }, + { + "epoch": 2.4006302982923313, + "grad_norm": 0.002861426677554846, + "learning_rate": 1.2415162425924739e-06, + "loss": 0.0574, + "num_input_tokens_seen": 66238504, + "step": 98265 + }, + { + "epoch": 2.4007524491241785, + "grad_norm": 0.16854959726333618, + "learning_rate": 1.2414334886067196e-06, + "loss": 0.0002, + "num_input_tokens_seen": 66241640, + "step": 98270 + }, + { + "epoch": 2.4008745999560257, + "grad_norm": 0.36802446842193604, + "learning_rate": 1.241350732865196e-06, + "loss": 0.0002, + "num_input_tokens_seen": 66245096, + "step": 98275 + }, + { + "epoch": 2.400996750787873, + "grad_norm": 0.13454042375087738, + "learning_rate": 1.241267975368505e-06, + "loss": 0.0002, + "num_input_tokens_seen": 66248296, + "step": 98280 + }, + { + "epoch": 2.40111890161972, + "grad_norm": 0.5747636556625366, + "learning_rate": 1.2411852161172482e-06, + "loss": 0.0999, + "num_input_tokens_seen": 66251752, + "step": 98285 + }, + { + "epoch": 2.4012410524515673, + "grad_norm": 0.11840732395648956, + "learning_rate": 1.2411024551120277e-06, + "loss": 0.0002, + "num_input_tokens_seen": 66255144, + "step": 98290 + }, + { + "epoch": 2.4013632032834145, + "grad_norm": 0.0020689095836132765, + "learning_rate": 1.2410196923534454e-06, + "loss": 0.0001, + "num_input_tokens_seen": 66258664, + "step": 98295 + }, + { + "epoch": 2.4014853541152617, + "grad_norm": 0.011177745647728443, + "learning_rate": 1.2409369278421026e-06, + "loss": 0.0864, + "num_input_tokens_seen": 66262312, + "step": 98300 + }, + { + "epoch": 2.401607504947109, + "grad_norm": 0.0040663969703018665, + "learning_rate": 1.2408541615786022e-06, + "loss": 0.0399, + "num_input_tokens_seen": 66265832, + "step": 98305 + }, + { + "epoch": 2.401729655778956, + "grad_norm": 0.03280526027083397, + "learning_rate": 1.2407713935635453e-06, + "loss": 0.0001, + "num_input_tokens_seen": 66268968, + "step": 98310 + }, + { + "epoch": 2.4018518066108028, + "grad_norm": 0.04107336327433586, + "learning_rate": 1.2406886237975342e-06, + "loss": 0.0456, + "num_input_tokens_seen": 66272104, + "step": 98315 + }, + { + "epoch": 2.4019739574426504, + "grad_norm": 0.2942955493927002, + "learning_rate": 1.240605852281171e-06, + "loss": 0.0015, + "num_input_tokens_seen": 66275752, + "step": 98320 + }, + { + "epoch": 2.402096108274497, + "grad_norm": 16.743667602539062, + "learning_rate": 1.2405230790150566e-06, + "loss": 0.0334, + "num_input_tokens_seen": 66279144, + "step": 98325 + }, + { + "epoch": 2.4022182591063443, + "grad_norm": 0.021431000903248787, + "learning_rate": 1.240440303999794e-06, + "loss": 0.1057, + "num_input_tokens_seen": 66282408, + "step": 98330 + }, + { + "epoch": 2.4023404099381915, + "grad_norm": 0.13238425552845, + "learning_rate": 1.2403575272359853e-06, + "loss": 0.0004, + "num_input_tokens_seen": 66285864, + "step": 98335 + }, + { + "epoch": 2.4024625607700387, + "grad_norm": 0.05844270810484886, + "learning_rate": 1.2402747487242313e-06, + "loss": 0.0415, + "num_input_tokens_seen": 66288872, + "step": 98340 + }, + { + "epoch": 2.402584711601886, + "grad_norm": 0.12406209856271744, + "learning_rate": 1.240191968465135e-06, + "loss": 0.0706, + "num_input_tokens_seen": 66292264, + "step": 98345 + }, + { + "epoch": 2.402706862433733, + "grad_norm": 0.0026762220077216625, + "learning_rate": 1.2401091864592984e-06, + "loss": 0.0002, + "num_input_tokens_seen": 66295592, + "step": 98350 + }, + { + "epoch": 2.4028290132655803, + "grad_norm": 0.05921659618616104, + "learning_rate": 1.2400264027073227e-06, + "loss": 0.0002, + "num_input_tokens_seen": 66298792, + "step": 98355 + }, + { + "epoch": 2.4029511640974275, + "grad_norm": 0.0028167401906102896, + "learning_rate": 1.2399436172098106e-06, + "loss": 0.0626, + "num_input_tokens_seen": 66302312, + "step": 98360 + }, + { + "epoch": 2.4030733149292747, + "grad_norm": 0.003555183531716466, + "learning_rate": 1.239860829967364e-06, + "loss": 0.0002, + "num_input_tokens_seen": 66305704, + "step": 98365 + }, + { + "epoch": 2.403195465761122, + "grad_norm": 0.003496664110571146, + "learning_rate": 1.239778040980585e-06, + "loss": 0.0761, + "num_input_tokens_seen": 66309800, + "step": 98370 + }, + { + "epoch": 2.403317616592969, + "grad_norm": 19.096784591674805, + "learning_rate": 1.2396952502500756e-06, + "loss": 0.0349, + "num_input_tokens_seen": 66313064, + "step": 98375 + }, + { + "epoch": 2.4034397674248162, + "grad_norm": 0.007529269903898239, + "learning_rate": 1.2396124577764378e-06, + "loss": 0.0001, + "num_input_tokens_seen": 66316264, + "step": 98380 + }, + { + "epoch": 2.4035619182566634, + "grad_norm": 0.011298858560621738, + "learning_rate": 1.239529663560274e-06, + "loss": 0.0003, + "num_input_tokens_seen": 66319656, + "step": 98385 + }, + { + "epoch": 2.4036840690885106, + "grad_norm": 12.470608711242676, + "learning_rate": 1.2394468676021856e-06, + "loss": 0.0759, + "num_input_tokens_seen": 66323112, + "step": 98390 + }, + { + "epoch": 2.403806219920358, + "grad_norm": 0.0077195316553115845, + "learning_rate": 1.2393640699027757e-06, + "loss": 0.0001, + "num_input_tokens_seen": 66326376, + "step": 98395 + }, + { + "epoch": 2.403928370752205, + "grad_norm": 20.43804359436035, + "learning_rate": 1.2392812704626453e-06, + "loss": 0.0869, + "num_input_tokens_seen": 66329832, + "step": 98400 + }, + { + "epoch": 2.404050521584052, + "grad_norm": 0.029997482895851135, + "learning_rate": 1.2391984692823976e-06, + "loss": 0.0004, + "num_input_tokens_seen": 66333160, + "step": 98405 + }, + { + "epoch": 2.404172672415899, + "grad_norm": 16.685693740844727, + "learning_rate": 1.2391156663626343e-06, + "loss": 0.0592, + "num_input_tokens_seen": 66336104, + "step": 98410 + }, + { + "epoch": 2.404294823247746, + "grad_norm": 0.04722894728183746, + "learning_rate": 1.2390328617039574e-06, + "loss": 0.0003, + "num_input_tokens_seen": 66339560, + "step": 98415 + }, + { + "epoch": 2.4044169740795933, + "grad_norm": 0.03532855212688446, + "learning_rate": 1.238950055306969e-06, + "loss": 0.0004, + "num_input_tokens_seen": 66342888, + "step": 98420 + }, + { + "epoch": 2.4045391249114405, + "grad_norm": 0.1769874095916748, + "learning_rate": 1.2388672471722719e-06, + "loss": 0.0005, + "num_input_tokens_seen": 66346344, + "step": 98425 + }, + { + "epoch": 2.4046612757432877, + "grad_norm": 0.003971653990447521, + "learning_rate": 1.238784437300468e-06, + "loss": 0.0001, + "num_input_tokens_seen": 66349480, + "step": 98430 + }, + { + "epoch": 2.404783426575135, + "grad_norm": 0.07422498613595963, + "learning_rate": 1.2387016256921593e-06, + "loss": 0.0003, + "num_input_tokens_seen": 66353256, + "step": 98435 + }, + { + "epoch": 2.404905577406982, + "grad_norm": 18.69518280029297, + "learning_rate": 1.2386188123479482e-06, + "loss": 0.0448, + "num_input_tokens_seen": 66356520, + "step": 98440 + }, + { + "epoch": 2.4050277282388293, + "grad_norm": 1.4093151092529297, + "learning_rate": 1.238535997268437e-06, + "loss": 0.03, + "num_input_tokens_seen": 66359912, + "step": 98445 + }, + { + "epoch": 2.4051498790706765, + "grad_norm": 0.47100839018821716, + "learning_rate": 1.2384531804542272e-06, + "loss": 0.0022, + "num_input_tokens_seen": 66363176, + "step": 98450 + }, + { + "epoch": 2.4052720299025236, + "grad_norm": 0.013687855564057827, + "learning_rate": 1.2383703619059225e-06, + "loss": 0.0555, + "num_input_tokens_seen": 66366504, + "step": 98455 + }, + { + "epoch": 2.405394180734371, + "grad_norm": 13.551589965820312, + "learning_rate": 1.238287541624124e-06, + "loss": 0.0763, + "num_input_tokens_seen": 66370024, + "step": 98460 + }, + { + "epoch": 2.405516331566218, + "grad_norm": 0.012431265786290169, + "learning_rate": 1.2382047196094348e-06, + "loss": 0.0281, + "num_input_tokens_seen": 66373096, + "step": 98465 + }, + { + "epoch": 2.405638482398065, + "grad_norm": 0.04757985472679138, + "learning_rate": 1.2381218958624565e-06, + "loss": 0.0863, + "num_input_tokens_seen": 66376168, + "step": 98470 + }, + { + "epoch": 2.4057606332299124, + "grad_norm": 0.23747007548809052, + "learning_rate": 1.238039070383792e-06, + "loss": 0.0351, + "num_input_tokens_seen": 66379688, + "step": 98475 + }, + { + "epoch": 2.4058827840617596, + "grad_norm": 0.014675687067210674, + "learning_rate": 1.237956243174043e-06, + "loss": 0.0002, + "num_input_tokens_seen": 66383016, + "step": 98480 + }, + { + "epoch": 2.406004934893607, + "grad_norm": 0.0017657901626080275, + "learning_rate": 1.2378734142338126e-06, + "loss": 0.0001, + "num_input_tokens_seen": 66386216, + "step": 98485 + }, + { + "epoch": 2.406127085725454, + "grad_norm": 0.28223177790641785, + "learning_rate": 1.2377905835637024e-06, + "loss": 0.0553, + "num_input_tokens_seen": 66389608, + "step": 98490 + }, + { + "epoch": 2.4062492365573007, + "grad_norm": 0.2891443371772766, + "learning_rate": 1.2377077511643152e-06, + "loss": 0.0818, + "num_input_tokens_seen": 66392552, + "step": 98495 + }, + { + "epoch": 2.4063713873891484, + "grad_norm": 0.014545445330440998, + "learning_rate": 1.2376249170362533e-06, + "loss": 0.0648, + "num_input_tokens_seen": 66395624, + "step": 98500 + }, + { + "epoch": 2.406493538220995, + "grad_norm": 466.7126159667969, + "learning_rate": 1.237542081180119e-06, + "loss": 0.0115, + "num_input_tokens_seen": 66399144, + "step": 98505 + }, + { + "epoch": 2.4066156890528423, + "grad_norm": 0.0069570522755384445, + "learning_rate": 1.2374592435965152e-06, + "loss": 0.0002, + "num_input_tokens_seen": 66402472, + "step": 98510 + }, + { + "epoch": 2.4067378398846895, + "grad_norm": 169.1098175048828, + "learning_rate": 1.2373764042860434e-06, + "loss": 0.0815, + "num_input_tokens_seen": 66406056, + "step": 98515 + }, + { + "epoch": 2.4068599907165367, + "grad_norm": 2.4282772541046143, + "learning_rate": 1.2372935632493068e-06, + "loss": 0.0011, + "num_input_tokens_seen": 66409640, + "step": 98520 + }, + { + "epoch": 2.406982141548384, + "grad_norm": 62.906150817871094, + "learning_rate": 1.2372107204869075e-06, + "loss": 0.0797, + "num_input_tokens_seen": 66412776, + "step": 98525 + }, + { + "epoch": 2.407104292380231, + "grad_norm": 0.0374017059803009, + "learning_rate": 1.237127875999448e-06, + "loss": 0.1063, + "num_input_tokens_seen": 66416424, + "step": 98530 + }, + { + "epoch": 2.4072264432120782, + "grad_norm": 1.4092093706130981, + "learning_rate": 1.2370450297875312e-06, + "loss": 0.0633, + "num_input_tokens_seen": 66419880, + "step": 98535 + }, + { + "epoch": 2.4073485940439254, + "grad_norm": 0.13621792197227478, + "learning_rate": 1.236962181851759e-06, + "loss": 0.0002, + "num_input_tokens_seen": 66423080, + "step": 98540 + }, + { + "epoch": 2.4074707448757726, + "grad_norm": 0.03624136745929718, + "learning_rate": 1.2368793321927338e-06, + "loss": 0.0003, + "num_input_tokens_seen": 66426088, + "step": 98545 + }, + { + "epoch": 2.40759289570762, + "grad_norm": 0.07511377334594727, + "learning_rate": 1.2367964808110585e-06, + "loss": 0.0004, + "num_input_tokens_seen": 66429736, + "step": 98550 + }, + { + "epoch": 2.407715046539467, + "grad_norm": 0.08834755420684814, + "learning_rate": 1.2367136277073358e-06, + "loss": 0.0002, + "num_input_tokens_seen": 66433576, + "step": 98555 + }, + { + "epoch": 2.407837197371314, + "grad_norm": 0.21426711976528168, + "learning_rate": 1.2366307728821676e-06, + "loss": 0.0003, + "num_input_tokens_seen": 66436904, + "step": 98560 + }, + { + "epoch": 2.4079593482031614, + "grad_norm": 2.439096450805664, + "learning_rate": 1.236547916336157e-06, + "loss": 0.1034, + "num_input_tokens_seen": 66440616, + "step": 98565 + }, + { + "epoch": 2.4080814990350086, + "grad_norm": 0.015720663592219353, + "learning_rate": 1.236465058069906e-06, + "loss": 0.0001, + "num_input_tokens_seen": 66443816, + "step": 98570 + }, + { + "epoch": 2.4082036498668558, + "grad_norm": 0.010523266158998013, + "learning_rate": 1.2363821980840173e-06, + "loss": 0.0004, + "num_input_tokens_seen": 66447400, + "step": 98575 + }, + { + "epoch": 2.408325800698703, + "grad_norm": 0.07485683262348175, + "learning_rate": 1.2362993363790943e-06, + "loss": 0.049, + "num_input_tokens_seen": 66450984, + "step": 98580 + }, + { + "epoch": 2.40844795153055, + "grad_norm": 62.42988204956055, + "learning_rate": 1.236216472955739e-06, + "loss": 0.0301, + "num_input_tokens_seen": 66453928, + "step": 98585 + }, + { + "epoch": 2.408570102362397, + "grad_norm": 0.056043241173028946, + "learning_rate": 1.2361336078145536e-06, + "loss": 0.0167, + "num_input_tokens_seen": 66457000, + "step": 98590 + }, + { + "epoch": 2.408692253194244, + "grad_norm": 0.05428202077746391, + "learning_rate": 1.2360507409561413e-06, + "loss": 0.0003, + "num_input_tokens_seen": 66460008, + "step": 98595 + }, + { + "epoch": 2.4088144040260913, + "grad_norm": 0.04799538478255272, + "learning_rate": 1.2359678723811045e-06, + "loss": 0.0003, + "num_input_tokens_seen": 66463144, + "step": 98600 + }, + { + "epoch": 2.4089365548579385, + "grad_norm": 0.003943683113902807, + "learning_rate": 1.2358850020900454e-06, + "loss": 0.0982, + "num_input_tokens_seen": 66466024, + "step": 98605 + }, + { + "epoch": 2.4090587056897856, + "grad_norm": 0.03562261164188385, + "learning_rate": 1.2358021300835676e-06, + "loss": 0.0003, + "num_input_tokens_seen": 66469224, + "step": 98610 + }, + { + "epoch": 2.409180856521633, + "grad_norm": 2.066300630569458, + "learning_rate": 1.235719256362273e-06, + "loss": 0.0229, + "num_input_tokens_seen": 66472744, + "step": 98615 + }, + { + "epoch": 2.40930300735348, + "grad_norm": 194.5032958984375, + "learning_rate": 1.235636380926765e-06, + "loss": 0.1211, + "num_input_tokens_seen": 66475816, + "step": 98620 + }, + { + "epoch": 2.409425158185327, + "grad_norm": 0.015606533735990524, + "learning_rate": 1.2355535037776456e-06, + "loss": 0.0007, + "num_input_tokens_seen": 66479016, + "step": 98625 + }, + { + "epoch": 2.4095473090171744, + "grad_norm": 0.02473052591085434, + "learning_rate": 1.2354706249155177e-06, + "loss": 0.0003, + "num_input_tokens_seen": 66482280, + "step": 98630 + }, + { + "epoch": 2.4096694598490216, + "grad_norm": 0.1488429754972458, + "learning_rate": 1.2353877443409844e-06, + "loss": 0.0004, + "num_input_tokens_seen": 66485608, + "step": 98635 + }, + { + "epoch": 2.409791610680869, + "grad_norm": 0.014244853518903255, + "learning_rate": 1.2353048620546477e-06, + "loss": 0.0002, + "num_input_tokens_seen": 66489192, + "step": 98640 + }, + { + "epoch": 2.409913761512716, + "grad_norm": 0.017912834882736206, + "learning_rate": 1.235221978057111e-06, + "loss": 0.0002, + "num_input_tokens_seen": 66492648, + "step": 98645 + }, + { + "epoch": 2.410035912344563, + "grad_norm": 156.58143615722656, + "learning_rate": 1.235139092348977e-06, + "loss": 0.0544, + "num_input_tokens_seen": 66495848, + "step": 98650 + }, + { + "epoch": 2.4101580631764103, + "grad_norm": 39.045799255371094, + "learning_rate": 1.2350562049308477e-06, + "loss": 0.0525, + "num_input_tokens_seen": 66499432, + "step": 98655 + }, + { + "epoch": 2.4102802140082575, + "grad_norm": 0.024942591786384583, + "learning_rate": 1.2349733158033268e-06, + "loss": 0.0456, + "num_input_tokens_seen": 66502760, + "step": 98660 + }, + { + "epoch": 2.4104023648401047, + "grad_norm": 0.03383241221308708, + "learning_rate": 1.2348904249670169e-06, + "loss": 0.0002, + "num_input_tokens_seen": 66506088, + "step": 98665 + }, + { + "epoch": 2.410524515671952, + "grad_norm": 0.015606016851961613, + "learning_rate": 1.2348075324225202e-06, + "loss": 0.0002, + "num_input_tokens_seen": 66509160, + "step": 98670 + }, + { + "epoch": 2.4106466665037987, + "grad_norm": 0.0175799373537302, + "learning_rate": 1.2347246381704402e-06, + "loss": 0.0, + "num_input_tokens_seen": 66512488, + "step": 98675 + }, + { + "epoch": 2.4107688173356463, + "grad_norm": 0.0061505944468081, + "learning_rate": 1.2346417422113794e-06, + "loss": 0.0355, + "num_input_tokens_seen": 66516264, + "step": 98680 + }, + { + "epoch": 2.410890968167493, + "grad_norm": 0.0010680512059479952, + "learning_rate": 1.234558844545941e-06, + "loss": 0.0002, + "num_input_tokens_seen": 66519528, + "step": 98685 + }, + { + "epoch": 2.4110131189993402, + "grad_norm": 0.0005795100587420166, + "learning_rate": 1.2344759451747275e-06, + "loss": 0.0001, + "num_input_tokens_seen": 66523624, + "step": 98690 + }, + { + "epoch": 2.4111352698311874, + "grad_norm": 0.002949797548353672, + "learning_rate": 1.2343930440983422e-06, + "loss": 0.0004, + "num_input_tokens_seen": 66526760, + "step": 98695 + }, + { + "epoch": 2.4112574206630346, + "grad_norm": 95.02862548828125, + "learning_rate": 1.2343101413173869e-06, + "loss": 0.0466, + "num_input_tokens_seen": 66530344, + "step": 98700 + }, + { + "epoch": 2.411379571494882, + "grad_norm": 0.38339921832084656, + "learning_rate": 1.2342272368324658e-06, + "loss": 0.0003, + "num_input_tokens_seen": 66533608, + "step": 98705 + }, + { + "epoch": 2.411501722326729, + "grad_norm": 0.005964651238173246, + "learning_rate": 1.234144330644181e-06, + "loss": 0.108, + "num_input_tokens_seen": 66536936, + "step": 98710 + }, + { + "epoch": 2.411623873158576, + "grad_norm": 0.007162902038544416, + "learning_rate": 1.2340614227531355e-06, + "loss": 0.0361, + "num_input_tokens_seen": 66540648, + "step": 98715 + }, + { + "epoch": 2.4117460239904234, + "grad_norm": 0.03665280342102051, + "learning_rate": 1.2339785131599328e-06, + "loss": 0.0001, + "num_input_tokens_seen": 66544104, + "step": 98720 + }, + { + "epoch": 2.4118681748222706, + "grad_norm": 0.1304071545600891, + "learning_rate": 1.2338956018651749e-06, + "loss": 0.0428, + "num_input_tokens_seen": 66547368, + "step": 98725 + }, + { + "epoch": 2.4119903256541178, + "grad_norm": 0.10976487398147583, + "learning_rate": 1.2338126888694656e-06, + "loss": 0.0545, + "num_input_tokens_seen": 66551016, + "step": 98730 + }, + { + "epoch": 2.412112476485965, + "grad_norm": 0.5006261467933655, + "learning_rate": 1.2337297741734075e-06, + "loss": 0.0401, + "num_input_tokens_seen": 66554600, + "step": 98735 + }, + { + "epoch": 2.412234627317812, + "grad_norm": 0.0057154325768351555, + "learning_rate": 1.2336468577776037e-06, + "loss": 0.0004, + "num_input_tokens_seen": 66558568, + "step": 98740 + }, + { + "epoch": 2.4123567781496593, + "grad_norm": 0.07972795516252518, + "learning_rate": 1.2335639396826572e-06, + "loss": 0.1005, + "num_input_tokens_seen": 66562664, + "step": 98745 + }, + { + "epoch": 2.4124789289815065, + "grad_norm": 0.015096195042133331, + "learning_rate": 1.2334810198891705e-06, + "loss": 0.0755, + "num_input_tokens_seen": 66566056, + "step": 98750 + }, + { + "epoch": 2.4126010798133537, + "grad_norm": 0.24497045576572418, + "learning_rate": 1.2333980983977474e-06, + "loss": 0.0719, + "num_input_tokens_seen": 66569512, + "step": 98755 + }, + { + "epoch": 2.4127232306452004, + "grad_norm": 0.020139671862125397, + "learning_rate": 1.2333151752089901e-06, + "loss": 0.0695, + "num_input_tokens_seen": 66572776, + "step": 98760 + }, + { + "epoch": 2.412845381477048, + "grad_norm": 0.1416095346212387, + "learning_rate": 1.2332322503235024e-06, + "loss": 0.0005, + "num_input_tokens_seen": 66576936, + "step": 98765 + }, + { + "epoch": 2.412967532308895, + "grad_norm": 0.16571195423603058, + "learning_rate": 1.2331493237418871e-06, + "loss": 0.0237, + "num_input_tokens_seen": 66580264, + "step": 98770 + }, + { + "epoch": 2.413089683140742, + "grad_norm": 0.3631545603275299, + "learning_rate": 1.2330663954647471e-06, + "loss": 0.0456, + "num_input_tokens_seen": 66584232, + "step": 98775 + }, + { + "epoch": 2.413211833972589, + "grad_norm": 0.07778457552194595, + "learning_rate": 1.2329834654926855e-06, + "loss": 0.049, + "num_input_tokens_seen": 66587496, + "step": 98780 + }, + { + "epoch": 2.4133339848044364, + "grad_norm": 54.81132507324219, + "learning_rate": 1.2329005338263058e-06, + "loss": 0.0841, + "num_input_tokens_seen": 66590888, + "step": 98785 + }, + { + "epoch": 2.4134561356362836, + "grad_norm": 0.08354758471250534, + "learning_rate": 1.2328176004662105e-06, + "loss": 0.0065, + "num_input_tokens_seen": 66595048, + "step": 98790 + }, + { + "epoch": 2.4135782864681308, + "grad_norm": 0.055788785219192505, + "learning_rate": 1.232734665413003e-06, + "loss": 0.0562, + "num_input_tokens_seen": 66598440, + "step": 98795 + }, + { + "epoch": 2.413700437299978, + "grad_norm": 0.04762701317667961, + "learning_rate": 1.2326517286672867e-06, + "loss": 0.0518, + "num_input_tokens_seen": 66601832, + "step": 98800 + }, + { + "epoch": 2.413822588131825, + "grad_norm": 11.54391860961914, + "learning_rate": 1.2325687902296642e-06, + "loss": 0.1387, + "num_input_tokens_seen": 66604776, + "step": 98805 + }, + { + "epoch": 2.4139447389636723, + "grad_norm": 0.04480794444680214, + "learning_rate": 1.2324858501007389e-06, + "loss": 0.0728, + "num_input_tokens_seen": 66607784, + "step": 98810 + }, + { + "epoch": 2.4140668897955195, + "grad_norm": 0.24305419623851776, + "learning_rate": 1.232402908281114e-06, + "loss": 0.0007, + "num_input_tokens_seen": 66611432, + "step": 98815 + }, + { + "epoch": 2.4141890406273667, + "grad_norm": 14.967013359069824, + "learning_rate": 1.2323199647713927e-06, + "loss": 0.0464, + "num_input_tokens_seen": 66614568, + "step": 98820 + }, + { + "epoch": 2.414311191459214, + "grad_norm": 21.968698501586914, + "learning_rate": 1.232237019572178e-06, + "loss": 0.0378, + "num_input_tokens_seen": 66618536, + "step": 98825 + }, + { + "epoch": 2.414433342291061, + "grad_norm": 0.05005514249205589, + "learning_rate": 1.2321540726840734e-06, + "loss": 0.0525, + "num_input_tokens_seen": 66621736, + "step": 98830 + }, + { + "epoch": 2.4145554931229083, + "grad_norm": 0.08329369872808456, + "learning_rate": 1.2320711241076817e-06, + "loss": 0.0022, + "num_input_tokens_seen": 66625128, + "step": 98835 + }, + { + "epoch": 2.4146776439547555, + "grad_norm": 0.05002454295754433, + "learning_rate": 1.2319881738436065e-06, + "loss": 0.0003, + "num_input_tokens_seen": 66628392, + "step": 98840 + }, + { + "epoch": 2.4147997947866027, + "grad_norm": 0.6276405453681946, + "learning_rate": 1.2319052218924509e-06, + "loss": 0.0008, + "num_input_tokens_seen": 66631848, + "step": 98845 + }, + { + "epoch": 2.41492194561845, + "grad_norm": 0.012492536567151546, + "learning_rate": 1.2318222682548185e-06, + "loss": 0.0005, + "num_input_tokens_seen": 66634920, + "step": 98850 + }, + { + "epoch": 2.4150440964502966, + "grad_norm": 0.06913726031780243, + "learning_rate": 1.2317393129313115e-06, + "loss": 0.0004, + "num_input_tokens_seen": 66638312, + "step": 98855 + }, + { + "epoch": 2.4151662472821442, + "grad_norm": 0.04986165091395378, + "learning_rate": 1.2316563559225345e-06, + "loss": 0.0002, + "num_input_tokens_seen": 66641576, + "step": 98860 + }, + { + "epoch": 2.415288398113991, + "grad_norm": 0.026278551667928696, + "learning_rate": 1.2315733972290897e-06, + "loss": 0.0401, + "num_input_tokens_seen": 66645096, + "step": 98865 + }, + { + "epoch": 2.415410548945838, + "grad_norm": 0.010731449350714684, + "learning_rate": 1.2314904368515813e-06, + "loss": 0.0005, + "num_input_tokens_seen": 66648040, + "step": 98870 + }, + { + "epoch": 2.4155326997776854, + "grad_norm": 0.016922397539019585, + "learning_rate": 1.231407474790612e-06, + "loss": 0.0006, + "num_input_tokens_seen": 66651176, + "step": 98875 + }, + { + "epoch": 2.4156548506095326, + "grad_norm": 0.006709430366754532, + "learning_rate": 1.2313245110467853e-06, + "loss": 0.0577, + "num_input_tokens_seen": 66654568, + "step": 98880 + }, + { + "epoch": 2.4157770014413797, + "grad_norm": 0.006941602099686861, + "learning_rate": 1.2312415456207045e-06, + "loss": 0.0002, + "num_input_tokens_seen": 66657960, + "step": 98885 + }, + { + "epoch": 2.415899152273227, + "grad_norm": 0.3153388500213623, + "learning_rate": 1.2311585785129727e-06, + "loss": 0.0001, + "num_input_tokens_seen": 66661032, + "step": 98890 + }, + { + "epoch": 2.416021303105074, + "grad_norm": 14.619956016540527, + "learning_rate": 1.2310756097241942e-06, + "loss": 0.0951, + "num_input_tokens_seen": 66664424, + "step": 98895 + }, + { + "epoch": 2.4161434539369213, + "grad_norm": 0.0042695761658251286, + "learning_rate": 1.2309926392549713e-06, + "loss": 0.0481, + "num_input_tokens_seen": 66667816, + "step": 98900 + }, + { + "epoch": 2.4162656047687685, + "grad_norm": 26.597707748413086, + "learning_rate": 1.230909667105908e-06, + "loss": 0.0678, + "num_input_tokens_seen": 66671464, + "step": 98905 + }, + { + "epoch": 2.4163877556006157, + "grad_norm": 0.018522268161177635, + "learning_rate": 1.2308266932776073e-06, + "loss": 0.0602, + "num_input_tokens_seen": 66674664, + "step": 98910 + }, + { + "epoch": 2.416509906432463, + "grad_norm": 0.03735198453068733, + "learning_rate": 1.2307437177706727e-06, + "loss": 0.0001, + "num_input_tokens_seen": 66678376, + "step": 98915 + }, + { + "epoch": 2.41663205726431, + "grad_norm": 0.052860476076602936, + "learning_rate": 1.2306607405857078e-06, + "loss": 0.0001, + "num_input_tokens_seen": 66681960, + "step": 98920 + }, + { + "epoch": 2.4167542080961573, + "grad_norm": 0.03746561333537102, + "learning_rate": 1.2305777617233162e-06, + "loss": 0.036, + "num_input_tokens_seen": 66684904, + "step": 98925 + }, + { + "epoch": 2.4168763589280045, + "grad_norm": 0.16554565727710724, + "learning_rate": 1.2304947811841008e-06, + "loss": 0.0003, + "num_input_tokens_seen": 66688232, + "step": 98930 + }, + { + "epoch": 2.4169985097598516, + "grad_norm": 0.17963381111621857, + "learning_rate": 1.2304117989686655e-06, + "loss": 0.0452, + "num_input_tokens_seen": 66691432, + "step": 98935 + }, + { + "epoch": 2.4171206605916984, + "grad_norm": 0.06290189176797867, + "learning_rate": 1.2303288150776138e-06, + "loss": 0.0003, + "num_input_tokens_seen": 66695208, + "step": 98940 + }, + { + "epoch": 2.417242811423546, + "grad_norm": 0.012594391591846943, + "learning_rate": 1.2302458295115488e-06, + "loss": 0.0, + "num_input_tokens_seen": 66698472, + "step": 98945 + }, + { + "epoch": 2.4173649622553928, + "grad_norm": 0.13371609151363373, + "learning_rate": 1.2301628422710742e-06, + "loss": 0.095, + "num_input_tokens_seen": 66701608, + "step": 98950 + }, + { + "epoch": 2.41748711308724, + "grad_norm": 0.004092858172953129, + "learning_rate": 1.2300798533567935e-06, + "loss": 0.0, + "num_input_tokens_seen": 66704872, + "step": 98955 + }, + { + "epoch": 2.417609263919087, + "grad_norm": 0.021656107157468796, + "learning_rate": 1.2299968627693102e-06, + "loss": 0.0558, + "num_input_tokens_seen": 66707816, + "step": 98960 + }, + { + "epoch": 2.4177314147509343, + "grad_norm": 27.706756591796875, + "learning_rate": 1.229913870509228e-06, + "loss": 0.1187, + "num_input_tokens_seen": 66711144, + "step": 98965 + }, + { + "epoch": 2.4178535655827815, + "grad_norm": 87.51176452636719, + "learning_rate": 1.22983087657715e-06, + "loss": 0.1665, + "num_input_tokens_seen": 66714408, + "step": 98970 + }, + { + "epoch": 2.4179757164146287, + "grad_norm": 0.009178809821605682, + "learning_rate": 1.2297478809736804e-06, + "loss": 0.0467, + "num_input_tokens_seen": 66717864, + "step": 98975 + }, + { + "epoch": 2.418097867246476, + "grad_norm": 0.034699052572250366, + "learning_rate": 1.229664883699422e-06, + "loss": 0.0003, + "num_input_tokens_seen": 66721256, + "step": 98980 + }, + { + "epoch": 2.418220018078323, + "grad_norm": 0.23443113267421722, + "learning_rate": 1.229581884754979e-06, + "loss": 0.0032, + "num_input_tokens_seen": 66724456, + "step": 98985 + }, + { + "epoch": 2.4183421689101703, + "grad_norm": 0.05177519470453262, + "learning_rate": 1.229498884140955e-06, + "loss": 0.0001, + "num_input_tokens_seen": 66728232, + "step": 98990 + }, + { + "epoch": 2.4184643197420175, + "grad_norm": 25.600658416748047, + "learning_rate": 1.2294158818579533e-06, + "loss": 0.0465, + "num_input_tokens_seen": 66731624, + "step": 98995 + }, + { + "epoch": 2.4185864705738647, + "grad_norm": 0.06634686142206192, + "learning_rate": 1.2293328779065774e-06, + "loss": 0.0002, + "num_input_tokens_seen": 66735464, + "step": 99000 + }, + { + "epoch": 2.418708621405712, + "grad_norm": 0.03814494609832764, + "learning_rate": 1.2292498722874316e-06, + "loss": 0.0344, + "num_input_tokens_seen": 66739176, + "step": 99005 + }, + { + "epoch": 2.418830772237559, + "grad_norm": 0.02642938122153282, + "learning_rate": 1.2291668650011185e-06, + "loss": 0.0004, + "num_input_tokens_seen": 66742248, + "step": 99010 + }, + { + "epoch": 2.4189529230694062, + "grad_norm": 0.021579941734671593, + "learning_rate": 1.2290838560482427e-06, + "loss": 0.0399, + "num_input_tokens_seen": 66745512, + "step": 99015 + }, + { + "epoch": 2.4190750739012534, + "grad_norm": 0.036266956478357315, + "learning_rate": 1.2290008454294072e-06, + "loss": 0.0005, + "num_input_tokens_seen": 66748968, + "step": 99020 + }, + { + "epoch": 2.4191972247331006, + "grad_norm": 42.17207717895508, + "learning_rate": 1.228917833145216e-06, + "loss": 0.128, + "num_input_tokens_seen": 66752552, + "step": 99025 + }, + { + "epoch": 2.419319375564948, + "grad_norm": 0.12942864000797272, + "learning_rate": 1.228834819196273e-06, + "loss": 0.0296, + "num_input_tokens_seen": 66755688, + "step": 99030 + }, + { + "epoch": 2.4194415263967946, + "grad_norm": 0.011230261996388435, + "learning_rate": 1.2287518035831815e-06, + "loss": 0.1457, + "num_input_tokens_seen": 66758888, + "step": 99035 + }, + { + "epoch": 2.4195636772286417, + "grad_norm": 0.5837439298629761, + "learning_rate": 1.228668786306545e-06, + "loss": 0.0006, + "num_input_tokens_seen": 66762408, + "step": 99040 + }, + { + "epoch": 2.419685828060489, + "grad_norm": 0.6264066100120544, + "learning_rate": 1.228585767366968e-06, + "loss": 0.0956, + "num_input_tokens_seen": 66765352, + "step": 99045 + }, + { + "epoch": 2.419807978892336, + "grad_norm": 0.04524848982691765, + "learning_rate": 1.228502746765054e-06, + "loss": 0.0024, + "num_input_tokens_seen": 66768168, + "step": 99050 + }, + { + "epoch": 2.4199301297241833, + "grad_norm": 0.04734393209218979, + "learning_rate": 1.2284197245014062e-06, + "loss": 0.0001, + "num_input_tokens_seen": 66771240, + "step": 99055 + }, + { + "epoch": 2.4200522805560305, + "grad_norm": 33.75642395019531, + "learning_rate": 1.2283367005766288e-06, + "loss": 0.034, + "num_input_tokens_seen": 66774248, + "step": 99060 + }, + { + "epoch": 2.4201744313878777, + "grad_norm": 0.0772058367729187, + "learning_rate": 1.2282536749913255e-06, + "loss": 0.0001, + "num_input_tokens_seen": 66777576, + "step": 99065 + }, + { + "epoch": 2.420296582219725, + "grad_norm": 0.39729130268096924, + "learning_rate": 1.2281706477461002e-06, + "loss": 0.0316, + "num_input_tokens_seen": 66780712, + "step": 99070 + }, + { + "epoch": 2.420418733051572, + "grad_norm": 27.436330795288086, + "learning_rate": 1.2280876188415562e-06, + "loss": 0.1086, + "num_input_tokens_seen": 66784168, + "step": 99075 + }, + { + "epoch": 2.4205408838834193, + "grad_norm": 0.021735528483986855, + "learning_rate": 1.2280045882782978e-06, + "loss": 0.0006, + "num_input_tokens_seen": 66787432, + "step": 99080 + }, + { + "epoch": 2.4206630347152664, + "grad_norm": 0.11444593220949173, + "learning_rate": 1.227921556056929e-06, + "loss": 0.0001, + "num_input_tokens_seen": 66790504, + "step": 99085 + }, + { + "epoch": 2.4207851855471136, + "grad_norm": 0.021713944151997566, + "learning_rate": 1.2278385221780534e-06, + "loss": 0.0944, + "num_input_tokens_seen": 66793896, + "step": 99090 + }, + { + "epoch": 2.420907336378961, + "grad_norm": 0.01930270716547966, + "learning_rate": 1.2277554866422746e-06, + "loss": 0.1299, + "num_input_tokens_seen": 66797352, + "step": 99095 + }, + { + "epoch": 2.421029487210808, + "grad_norm": 0.027717571705579758, + "learning_rate": 1.2276724494501966e-06, + "loss": 0.0002, + "num_input_tokens_seen": 66800552, + "step": 99100 + }, + { + "epoch": 2.421151638042655, + "grad_norm": 434.61480712890625, + "learning_rate": 1.2275894106024234e-06, + "loss": 0.0049, + "num_input_tokens_seen": 66803880, + "step": 99105 + }, + { + "epoch": 2.4212737888745024, + "grad_norm": 14.324984550476074, + "learning_rate": 1.2275063700995587e-06, + "loss": 0.0443, + "num_input_tokens_seen": 66807336, + "step": 99110 + }, + { + "epoch": 2.4213959397063496, + "grad_norm": 15.495723724365234, + "learning_rate": 1.2274233279422065e-06, + "loss": 0.043, + "num_input_tokens_seen": 66810600, + "step": 99115 + }, + { + "epoch": 2.4215180905381963, + "grad_norm": 0.16907991468906403, + "learning_rate": 1.2273402841309709e-06, + "loss": 0.1082, + "num_input_tokens_seen": 66814376, + "step": 99120 + }, + { + "epoch": 2.421640241370044, + "grad_norm": 0.03562447056174278, + "learning_rate": 1.2272572386664552e-06, + "loss": 0.0003, + "num_input_tokens_seen": 66817832, + "step": 99125 + }, + { + "epoch": 2.4217623922018907, + "grad_norm": 0.0556715689599514, + "learning_rate": 1.2271741915492642e-06, + "loss": 0.0002, + "num_input_tokens_seen": 66821160, + "step": 99130 + }, + { + "epoch": 2.421884543033738, + "grad_norm": 0.009768993593752384, + "learning_rate": 1.2270911427800008e-06, + "loss": 0.0349, + "num_input_tokens_seen": 66824296, + "step": 99135 + }, + { + "epoch": 2.422006693865585, + "grad_norm": 0.31641116738319397, + "learning_rate": 1.2270080923592699e-06, + "loss": 0.0006, + "num_input_tokens_seen": 66828264, + "step": 99140 + }, + { + "epoch": 2.4221288446974323, + "grad_norm": 0.11315816640853882, + "learning_rate": 1.2269250402876749e-06, + "loss": 0.116, + "num_input_tokens_seen": 66831464, + "step": 99145 + }, + { + "epoch": 2.4222509955292795, + "grad_norm": 0.07480554282665253, + "learning_rate": 1.2268419865658204e-06, + "loss": 0.0007, + "num_input_tokens_seen": 66834728, + "step": 99150 + }, + { + "epoch": 2.4223731463611267, + "grad_norm": 0.06518545746803284, + "learning_rate": 1.2267589311943096e-06, + "loss": 0.0002, + "num_input_tokens_seen": 66837992, + "step": 99155 + }, + { + "epoch": 2.422495297192974, + "grad_norm": 66.28703308105469, + "learning_rate": 1.2266758741737472e-06, + "loss": 0.1335, + "num_input_tokens_seen": 66841448, + "step": 99160 + }, + { + "epoch": 2.422617448024821, + "grad_norm": 0.03924314305186272, + "learning_rate": 1.2265928155047365e-06, + "loss": 0.1043, + "num_input_tokens_seen": 66844648, + "step": 99165 + }, + { + "epoch": 2.4227395988566682, + "grad_norm": 0.014301169663667679, + "learning_rate": 1.226509755187882e-06, + "loss": 0.0985, + "num_input_tokens_seen": 66847976, + "step": 99170 + }, + { + "epoch": 2.4228617496885154, + "grad_norm": 0.48280882835388184, + "learning_rate": 1.2264266932237878e-06, + "loss": 0.0708, + "num_input_tokens_seen": 66851112, + "step": 99175 + }, + { + "epoch": 2.4229839005203626, + "grad_norm": 0.22303539514541626, + "learning_rate": 1.2263436296130577e-06, + "loss": 0.0514, + "num_input_tokens_seen": 66854184, + "step": 99180 + }, + { + "epoch": 2.42310605135221, + "grad_norm": 0.048168718814849854, + "learning_rate": 1.2262605643562956e-06, + "loss": 0.0003, + "num_input_tokens_seen": 66857896, + "step": 99185 + }, + { + "epoch": 2.423228202184057, + "grad_norm": 43.30610656738281, + "learning_rate": 1.2261774974541062e-06, + "loss": 0.0948, + "num_input_tokens_seen": 66861352, + "step": 99190 + }, + { + "epoch": 2.423350353015904, + "grad_norm": 0.08721016347408295, + "learning_rate": 1.2260944289070928e-06, + "loss": 0.0002, + "num_input_tokens_seen": 66864616, + "step": 99195 + }, + { + "epoch": 2.4234725038477514, + "grad_norm": 0.059328023344278336, + "learning_rate": 1.22601135871586e-06, + "loss": 0.0003, + "num_input_tokens_seen": 66867624, + "step": 99200 + }, + { + "epoch": 2.4235946546795986, + "grad_norm": 0.020462198182940483, + "learning_rate": 1.2259282868810122e-06, + "loss": 0.0001, + "num_input_tokens_seen": 66871336, + "step": 99205 + }, + { + "epoch": 2.4237168055114457, + "grad_norm": 0.009214747697114944, + "learning_rate": 1.225845213403153e-06, + "loss": 0.0507, + "num_input_tokens_seen": 66874536, + "step": 99210 + }, + { + "epoch": 2.4238389563432925, + "grad_norm": 0.6257734894752502, + "learning_rate": 1.2257621382828864e-06, + "loss": 0.0006, + "num_input_tokens_seen": 66878248, + "step": 99215 + }, + { + "epoch": 2.4239611071751397, + "grad_norm": 0.10356702655553818, + "learning_rate": 1.225679061520817e-06, + "loss": 0.0824, + "num_input_tokens_seen": 66881768, + "step": 99220 + }, + { + "epoch": 2.424083258006987, + "grad_norm": 0.17674194276332855, + "learning_rate": 1.2255959831175486e-06, + "loss": 0.0933, + "num_input_tokens_seen": 66885096, + "step": 99225 + }, + { + "epoch": 2.424205408838834, + "grad_norm": 92.4852294921875, + "learning_rate": 1.2255129030736856e-06, + "loss": 0.0997, + "num_input_tokens_seen": 66888360, + "step": 99230 + }, + { + "epoch": 2.4243275596706813, + "grad_norm": 0.02182663045823574, + "learning_rate": 1.225429821389832e-06, + "loss": 0.0002, + "num_input_tokens_seen": 66892008, + "step": 99235 + }, + { + "epoch": 2.4244497105025284, + "grad_norm": 0.03820806369185448, + "learning_rate": 1.2253467380665923e-06, + "loss": 0.0515, + "num_input_tokens_seen": 66895656, + "step": 99240 + }, + { + "epoch": 2.4245718613343756, + "grad_norm": 0.13037490844726562, + "learning_rate": 1.2252636531045704e-06, + "loss": 0.0004, + "num_input_tokens_seen": 66899112, + "step": 99245 + }, + { + "epoch": 2.424694012166223, + "grad_norm": 0.6307566165924072, + "learning_rate": 1.2251805665043708e-06, + "loss": 0.0008, + "num_input_tokens_seen": 66902056, + "step": 99250 + }, + { + "epoch": 2.42481616299807, + "grad_norm": 1.1177254915237427, + "learning_rate": 1.2250974782665976e-06, + "loss": 0.0014, + "num_input_tokens_seen": 66905320, + "step": 99255 + }, + { + "epoch": 2.424938313829917, + "grad_norm": 0.002898323815315962, + "learning_rate": 1.2250143883918546e-06, + "loss": 0.0665, + "num_input_tokens_seen": 66908584, + "step": 99260 + }, + { + "epoch": 2.4250604646617644, + "grad_norm": 0.09665067493915558, + "learning_rate": 1.224931296880747e-06, + "loss": 0.0006, + "num_input_tokens_seen": 66912360, + "step": 99265 + }, + { + "epoch": 2.4251826154936116, + "grad_norm": 0.014954613521695137, + "learning_rate": 1.2248482037338778e-06, + "loss": 0.0001, + "num_input_tokens_seen": 66916520, + "step": 99270 + }, + { + "epoch": 2.4253047663254588, + "grad_norm": 0.0005372444284148514, + "learning_rate": 1.2247651089518524e-06, + "loss": 0.0003, + "num_input_tokens_seen": 66920104, + "step": 99275 + }, + { + "epoch": 2.425426917157306, + "grad_norm": 0.36292293667793274, + "learning_rate": 1.2246820125352747e-06, + "loss": 0.0004, + "num_input_tokens_seen": 66923560, + "step": 99280 + }, + { + "epoch": 2.425549067989153, + "grad_norm": 0.039235059171915054, + "learning_rate": 1.224598914484749e-06, + "loss": 0.1132, + "num_input_tokens_seen": 66926760, + "step": 99285 + }, + { + "epoch": 2.4256712188210003, + "grad_norm": 0.027509810402989388, + "learning_rate": 1.2245158148008795e-06, + "loss": 0.0003, + "num_input_tokens_seen": 66930024, + "step": 99290 + }, + { + "epoch": 2.4257933696528475, + "grad_norm": 240.3977508544922, + "learning_rate": 1.2244327134842704e-06, + "loss": 0.0972, + "num_input_tokens_seen": 66933032, + "step": 99295 + }, + { + "epoch": 2.4259155204846943, + "grad_norm": 0.06561959534883499, + "learning_rate": 1.2243496105355265e-06, + "loss": 0.0512, + "num_input_tokens_seen": 66936104, + "step": 99300 + }, + { + "epoch": 2.426037671316542, + "grad_norm": 0.10798732191324234, + "learning_rate": 1.224266505955252e-06, + "loss": 0.0399, + "num_input_tokens_seen": 66939240, + "step": 99305 + }, + { + "epoch": 2.4261598221483887, + "grad_norm": 0.009463850408792496, + "learning_rate": 1.224183399744051e-06, + "loss": 0.0002, + "num_input_tokens_seen": 66942440, + "step": 99310 + }, + { + "epoch": 2.426281972980236, + "grad_norm": 0.05178477242588997, + "learning_rate": 1.224100291902528e-06, + "loss": 0.0368, + "num_input_tokens_seen": 66946280, + "step": 99315 + }, + { + "epoch": 2.426404123812083, + "grad_norm": 0.022696619853377342, + "learning_rate": 1.2240171824312873e-06, + "loss": 0.0006, + "num_input_tokens_seen": 66949928, + "step": 99320 + }, + { + "epoch": 2.4265262746439302, + "grad_norm": 0.016703089699149132, + "learning_rate": 1.2239340713309335e-06, + "loss": 0.0002, + "num_input_tokens_seen": 66953704, + "step": 99325 + }, + { + "epoch": 2.4266484254757774, + "grad_norm": 22.868074417114258, + "learning_rate": 1.2238509586020708e-06, + "loss": 0.0491, + "num_input_tokens_seen": 66957544, + "step": 99330 + }, + { + "epoch": 2.4267705763076246, + "grad_norm": 0.015933876857161522, + "learning_rate": 1.2237678442453042e-06, + "loss": 0.0001, + "num_input_tokens_seen": 66960936, + "step": 99335 + }, + { + "epoch": 2.426892727139472, + "grad_norm": 26.713512420654297, + "learning_rate": 1.223684728261237e-06, + "loss": 0.0567, + "num_input_tokens_seen": 66964200, + "step": 99340 + }, + { + "epoch": 2.427014877971319, + "grad_norm": 0.10175715386867523, + "learning_rate": 1.2236016106504747e-06, + "loss": 0.072, + "num_input_tokens_seen": 66967400, + "step": 99345 + }, + { + "epoch": 2.427137028803166, + "grad_norm": 21.69265365600586, + "learning_rate": 1.223518491413621e-06, + "loss": 0.0574, + "num_input_tokens_seen": 66970280, + "step": 99350 + }, + { + "epoch": 2.4272591796350134, + "grad_norm": 17.81764030456543, + "learning_rate": 1.223435370551281e-06, + "loss": 0.052, + "num_input_tokens_seen": 66973416, + "step": 99355 + }, + { + "epoch": 2.4273813304668606, + "grad_norm": 0.100733183324337, + "learning_rate": 1.223352248064059e-06, + "loss": 0.0002, + "num_input_tokens_seen": 66977192, + "step": 99360 + }, + { + "epoch": 2.4275034812987077, + "grad_norm": 0.0022963222581893206, + "learning_rate": 1.2232691239525592e-06, + "loss": 0.0706, + "num_input_tokens_seen": 66980456, + "step": 99365 + }, + { + "epoch": 2.427625632130555, + "grad_norm": 0.03133883699774742, + "learning_rate": 1.2231859982173862e-06, + "loss": 0.0003, + "num_input_tokens_seen": 66984296, + "step": 99370 + }, + { + "epoch": 2.427747782962402, + "grad_norm": 4.77004861831665, + "learning_rate": 1.2231028708591447e-06, + "loss": 0.0855, + "num_input_tokens_seen": 66987944, + "step": 99375 + }, + { + "epoch": 2.4278699337942493, + "grad_norm": 0.007516190409660339, + "learning_rate": 1.2230197418784391e-06, + "loss": 0.0503, + "num_input_tokens_seen": 66992296, + "step": 99380 + }, + { + "epoch": 2.427992084626096, + "grad_norm": 0.1313624083995819, + "learning_rate": 1.2229366112758739e-06, + "loss": 0.0108, + "num_input_tokens_seen": 66995944, + "step": 99385 + }, + { + "epoch": 2.4281142354579437, + "grad_norm": 51.022216796875, + "learning_rate": 1.2228534790520537e-06, + "loss": 0.049, + "num_input_tokens_seen": 66999080, + "step": 99390 + }, + { + "epoch": 2.4282363862897904, + "grad_norm": 0.03542424738407135, + "learning_rate": 1.222770345207583e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67002408, + "step": 99395 + }, + { + "epoch": 2.4283585371216376, + "grad_norm": 36.8632698059082, + "learning_rate": 1.2226872097430665e-06, + "loss": 0.1616, + "num_input_tokens_seen": 67005608, + "step": 99400 + }, + { + "epoch": 2.428480687953485, + "grad_norm": 137.3507080078125, + "learning_rate": 1.2226040726591088e-06, + "loss": 0.0432, + "num_input_tokens_seen": 67008872, + "step": 99405 + }, + { + "epoch": 2.428602838785332, + "grad_norm": 143.63348388671875, + "learning_rate": 1.2225209339563143e-06, + "loss": 0.0721, + "num_input_tokens_seen": 67012840, + "step": 99410 + }, + { + "epoch": 2.428724989617179, + "grad_norm": 32.90876007080078, + "learning_rate": 1.222437793635288e-06, + "loss": 0.0391, + "num_input_tokens_seen": 67016040, + "step": 99415 + }, + { + "epoch": 2.4288471404490264, + "grad_norm": 0.024623477831482887, + "learning_rate": 1.2223546516966339e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67019560, + "step": 99420 + }, + { + "epoch": 2.4289692912808736, + "grad_norm": 0.026823585852980614, + "learning_rate": 1.2222715081409572e-06, + "loss": 0.0425, + "num_input_tokens_seen": 67022760, + "step": 99425 + }, + { + "epoch": 2.4290914421127208, + "grad_norm": 0.207112655043602, + "learning_rate": 1.2221883629688622e-06, + "loss": 0.0563, + "num_input_tokens_seen": 67026344, + "step": 99430 + }, + { + "epoch": 2.429213592944568, + "grad_norm": 0.018758624792099, + "learning_rate": 1.2221052161809535e-06, + "loss": 0.0584, + "num_input_tokens_seen": 67030184, + "step": 99435 + }, + { + "epoch": 2.429335743776415, + "grad_norm": 0.0312594436109066, + "learning_rate": 1.222022067777836e-06, + "loss": 0.0001, + "num_input_tokens_seen": 67034088, + "step": 99440 + }, + { + "epoch": 2.4294578946082623, + "grad_norm": 0.032845884561538696, + "learning_rate": 1.2219389177601142e-06, + "loss": 0.031, + "num_input_tokens_seen": 67037288, + "step": 99445 + }, + { + "epoch": 2.4295800454401095, + "grad_norm": 0.030439363792538643, + "learning_rate": 1.2218557661283932e-06, + "loss": 0.0184, + "num_input_tokens_seen": 67040808, + "step": 99450 + }, + { + "epoch": 2.4297021962719567, + "grad_norm": 0.06845355033874512, + "learning_rate": 1.2217726128832773e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67043944, + "step": 99455 + }, + { + "epoch": 2.429824347103804, + "grad_norm": 0.02207336015999317, + "learning_rate": 1.2216894580253711e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67047336, + "step": 99460 + }, + { + "epoch": 2.429946497935651, + "grad_norm": 0.04571019858121872, + "learning_rate": 1.2216063015552798e-06, + "loss": 0.041, + "num_input_tokens_seen": 67051368, + "step": 99465 + }, + { + "epoch": 2.4300686487674983, + "grad_norm": 0.037082310765981674, + "learning_rate": 1.221523143473608e-06, + "loss": 0.0402, + "num_input_tokens_seen": 67054632, + "step": 99470 + }, + { + "epoch": 2.4301907995993455, + "grad_norm": 0.025185855105519295, + "learning_rate": 1.2214399837809599e-06, + "loss": 0.1122, + "num_input_tokens_seen": 67057640, + "step": 99475 + }, + { + "epoch": 2.430312950431192, + "grad_norm": 0.07133536785840988, + "learning_rate": 1.2213568224779408e-06, + "loss": 0.0461, + "num_input_tokens_seen": 67061352, + "step": 99480 + }, + { + "epoch": 2.4304351012630394, + "grad_norm": 0.049883488565683365, + "learning_rate": 1.2212736595651555e-06, + "loss": 0.0007, + "num_input_tokens_seen": 67064424, + "step": 99485 + }, + { + "epoch": 2.4305572520948866, + "grad_norm": 0.10151875764131546, + "learning_rate": 1.2211904950432086e-06, + "loss": 0.0001, + "num_input_tokens_seen": 67067752, + "step": 99490 + }, + { + "epoch": 2.430679402926734, + "grad_norm": 40.1695442199707, + "learning_rate": 1.221107328912705e-06, + "loss": 0.0542, + "num_input_tokens_seen": 67070696, + "step": 99495 + }, + { + "epoch": 2.430801553758581, + "grad_norm": 0.01168416440486908, + "learning_rate": 1.2210241611742494e-06, + "loss": 0.0716, + "num_input_tokens_seen": 67074408, + "step": 99500 + }, + { + "epoch": 2.430923704590428, + "grad_norm": 18.22470474243164, + "learning_rate": 1.2209409918284465e-06, + "loss": 0.0489, + "num_input_tokens_seen": 67077992, + "step": 99505 + }, + { + "epoch": 2.4310458554222754, + "grad_norm": 0.09461220353841782, + "learning_rate": 1.220857820875901e-06, + "loss": 0.0348, + "num_input_tokens_seen": 67081384, + "step": 99510 + }, + { + "epoch": 2.4311680062541225, + "grad_norm": 0.003890714608132839, + "learning_rate": 1.2207746483172185e-06, + "loss": 0.1613, + "num_input_tokens_seen": 67084456, + "step": 99515 + }, + { + "epoch": 2.4312901570859697, + "grad_norm": 0.1612948328256607, + "learning_rate": 1.2206914741530034e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67087912, + "step": 99520 + }, + { + "epoch": 2.431412307917817, + "grad_norm": 12.028178215026855, + "learning_rate": 1.2206082983838606e-06, + "loss": 0.0345, + "num_input_tokens_seen": 67091176, + "step": 99525 + }, + { + "epoch": 2.431534458749664, + "grad_norm": 0.07618942856788635, + "learning_rate": 1.2205251210103945e-06, + "loss": 0.0293, + "num_input_tokens_seen": 67094952, + "step": 99530 + }, + { + "epoch": 2.4316566095815113, + "grad_norm": 0.372517466545105, + "learning_rate": 1.2204419420332108e-06, + "loss": 0.0385, + "num_input_tokens_seen": 67098088, + "step": 99535 + }, + { + "epoch": 2.4317787604133585, + "grad_norm": 17.607189178466797, + "learning_rate": 1.2203587614529136e-06, + "loss": 0.0706, + "num_input_tokens_seen": 67101160, + "step": 99540 + }, + { + "epoch": 2.4319009112452057, + "grad_norm": 0.5471447706222534, + "learning_rate": 1.2202755792701085e-06, + "loss": 0.0004, + "num_input_tokens_seen": 67104552, + "step": 99545 + }, + { + "epoch": 2.432023062077053, + "grad_norm": 0.04835929721593857, + "learning_rate": 1.2201923954854e-06, + "loss": 0.1449, + "num_input_tokens_seen": 67107816, + "step": 99550 + }, + { + "epoch": 2.4321452129089, + "grad_norm": 0.1779869645833969, + "learning_rate": 1.2201092100993933e-06, + "loss": 0.0387, + "num_input_tokens_seen": 67111144, + "step": 99555 + }, + { + "epoch": 2.4322673637407473, + "grad_norm": 0.5402523279190063, + "learning_rate": 1.2200260231126933e-06, + "loss": 0.0329, + "num_input_tokens_seen": 67114152, + "step": 99560 + }, + { + "epoch": 2.432389514572594, + "grad_norm": 0.26101481914520264, + "learning_rate": 1.2199428345259047e-06, + "loss": 0.0382, + "num_input_tokens_seen": 67117032, + "step": 99565 + }, + { + "epoch": 2.4325116654044416, + "grad_norm": 25.253210067749023, + "learning_rate": 1.2198596443396328e-06, + "loss": 0.027, + "num_input_tokens_seen": 67120040, + "step": 99570 + }, + { + "epoch": 2.4326338162362884, + "grad_norm": 0.04243769869208336, + "learning_rate": 1.2197764525544822e-06, + "loss": 0.0008, + "num_input_tokens_seen": 67123688, + "step": 99575 + }, + { + "epoch": 2.4327559670681356, + "grad_norm": 0.025501498952507973, + "learning_rate": 1.2196932591710583e-06, + "loss": 0.0007, + "num_input_tokens_seen": 67127016, + "step": 99580 + }, + { + "epoch": 2.4328781178999828, + "grad_norm": 0.03055681847035885, + "learning_rate": 1.219610064189966e-06, + "loss": 0.0012, + "num_input_tokens_seen": 67130344, + "step": 99585 + }, + { + "epoch": 2.43300026873183, + "grad_norm": 0.14057831466197968, + "learning_rate": 1.21952686761181e-06, + "loss": 0.0003, + "num_input_tokens_seen": 67134184, + "step": 99590 + }, + { + "epoch": 2.433122419563677, + "grad_norm": 0.5264701247215271, + "learning_rate": 1.2194436694371959e-06, + "loss": 0.0229, + "num_input_tokens_seen": 67137896, + "step": 99595 + }, + { + "epoch": 2.4332445703955243, + "grad_norm": 0.02670673280954361, + "learning_rate": 1.219360469666728e-06, + "loss": 0.0008, + "num_input_tokens_seen": 67141032, + "step": 99600 + }, + { + "epoch": 2.4333667212273715, + "grad_norm": 0.08604729175567627, + "learning_rate": 1.219277268301012e-06, + "loss": 0.0664, + "num_input_tokens_seen": 67143976, + "step": 99605 + }, + { + "epoch": 2.4334888720592187, + "grad_norm": 0.074459008872509, + "learning_rate": 1.2191940653406528e-06, + "loss": 0.0192, + "num_input_tokens_seen": 67147048, + "step": 99610 + }, + { + "epoch": 2.433611022891066, + "grad_norm": 2.0646824836730957, + "learning_rate": 1.2191108607862553e-06, + "loss": 0.0024, + "num_input_tokens_seen": 67150120, + "step": 99615 + }, + { + "epoch": 2.433733173722913, + "grad_norm": 0.05667717754840851, + "learning_rate": 1.2190276546384247e-06, + "loss": 0.0001, + "num_input_tokens_seen": 67153320, + "step": 99620 + }, + { + "epoch": 2.4338553245547603, + "grad_norm": 16.081130981445312, + "learning_rate": 1.2189444468977664e-06, + "loss": 0.065, + "num_input_tokens_seen": 67156328, + "step": 99625 + }, + { + "epoch": 2.4339774753866075, + "grad_norm": 0.2092624306678772, + "learning_rate": 1.2188612375648846e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67159976, + "step": 99630 + }, + { + "epoch": 2.4340996262184547, + "grad_norm": 0.04626304656267166, + "learning_rate": 1.2187780266403853e-06, + "loss": 0.0561, + "num_input_tokens_seen": 67163368, + "step": 99635 + }, + { + "epoch": 2.434221777050302, + "grad_norm": 0.006048401352018118, + "learning_rate": 1.218694814124873e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67166440, + "step": 99640 + }, + { + "epoch": 2.434343927882149, + "grad_norm": 0.13066811859607697, + "learning_rate": 1.2186116000189536e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67169576, + "step": 99645 + }, + { + "epoch": 2.4344660787139962, + "grad_norm": 0.009472579695284367, + "learning_rate": 1.218528384323232e-06, + "loss": 0.0346, + "num_input_tokens_seen": 67172840, + "step": 99650 + }, + { + "epoch": 2.4345882295458434, + "grad_norm": 20.248411178588867, + "learning_rate": 1.2184451670383129e-06, + "loss": 0.0457, + "num_input_tokens_seen": 67176040, + "step": 99655 + }, + { + "epoch": 2.43471038037769, + "grad_norm": 0.01613772287964821, + "learning_rate": 1.218361948164802e-06, + "loss": 0.042, + "num_input_tokens_seen": 67179240, + "step": 99660 + }, + { + "epoch": 2.4348325312095374, + "grad_norm": 32.162349700927734, + "learning_rate": 1.218278727703304e-06, + "loss": 0.0396, + "num_input_tokens_seen": 67182312, + "step": 99665 + }, + { + "epoch": 2.4349546820413845, + "grad_norm": 0.007790977600961924, + "learning_rate": 1.2181955056544244e-06, + "loss": 0.0075, + "num_input_tokens_seen": 67185704, + "step": 99670 + }, + { + "epoch": 2.4350768328732317, + "grad_norm": 0.21455731987953186, + "learning_rate": 1.2181122820187689e-06, + "loss": 0.0602, + "num_input_tokens_seen": 67188456, + "step": 99675 + }, + { + "epoch": 2.435198983705079, + "grad_norm": 170.60511779785156, + "learning_rate": 1.2180290567969417e-06, + "loss": 0.0023, + "num_input_tokens_seen": 67191720, + "step": 99680 + }, + { + "epoch": 2.435321134536926, + "grad_norm": 0.011751042678952217, + "learning_rate": 1.217945829989549e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67195368, + "step": 99685 + }, + { + "epoch": 2.4354432853687733, + "grad_norm": 0.008488446474075317, + "learning_rate": 1.217862601597195e-06, + "loss": 0.0401, + "num_input_tokens_seen": 67198888, + "step": 99690 + }, + { + "epoch": 2.4355654362006205, + "grad_norm": 0.23472584784030914, + "learning_rate": 1.2177793716204858e-06, + "loss": 0.0534, + "num_input_tokens_seen": 67202344, + "step": 99695 + }, + { + "epoch": 2.4356875870324677, + "grad_norm": 0.1314506083726883, + "learning_rate": 1.2176961400600265e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67205928, + "step": 99700 + }, + { + "epoch": 2.435809737864315, + "grad_norm": 0.00579680735245347, + "learning_rate": 1.2176129069164225e-06, + "loss": 0.0001, + "num_input_tokens_seen": 67209000, + "step": 99705 + }, + { + "epoch": 2.435931888696162, + "grad_norm": 1.2763606309890747, + "learning_rate": 1.2175296721902786e-06, + "loss": 0.0008, + "num_input_tokens_seen": 67212328, + "step": 99710 + }, + { + "epoch": 2.4360540395280093, + "grad_norm": 3.3028550148010254, + "learning_rate": 1.2174464358822005e-06, + "loss": 0.001, + "num_input_tokens_seen": 67215592, + "step": 99715 + }, + { + "epoch": 2.4361761903598564, + "grad_norm": 0.00807754322886467, + "learning_rate": 1.2173631979927935e-06, + "loss": 0.0495, + "num_input_tokens_seen": 67219176, + "step": 99720 + }, + { + "epoch": 2.4362983411917036, + "grad_norm": 0.03470252826809883, + "learning_rate": 1.217279958522663e-06, + "loss": 0.0502, + "num_input_tokens_seen": 67222440, + "step": 99725 + }, + { + "epoch": 2.436420492023551, + "grad_norm": 0.022196587175130844, + "learning_rate": 1.217196717472414e-06, + "loss": 0.1152, + "num_input_tokens_seen": 67225960, + "step": 99730 + }, + { + "epoch": 2.436542642855398, + "grad_norm": 0.018160829320549965, + "learning_rate": 1.2171134748426522e-06, + "loss": 0.0001, + "num_input_tokens_seen": 67229480, + "step": 99735 + }, + { + "epoch": 2.436664793687245, + "grad_norm": 0.04221296310424805, + "learning_rate": 1.2170302306339825e-06, + "loss": 0.0763, + "num_input_tokens_seen": 67232744, + "step": 99740 + }, + { + "epoch": 2.436786944519092, + "grad_norm": 0.13677258789539337, + "learning_rate": 1.216946984847011e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67235816, + "step": 99745 + }, + { + "epoch": 2.4369090953509396, + "grad_norm": 21.423381805419922, + "learning_rate": 1.2168637374823425e-06, + "loss": 0.0852, + "num_input_tokens_seen": 67239144, + "step": 99750 + }, + { + "epoch": 2.4370312461827863, + "grad_norm": 0.33186808228492737, + "learning_rate": 1.2167804885405825e-06, + "loss": 0.0001, + "num_input_tokens_seen": 67242280, + "step": 99755 + }, + { + "epoch": 2.4371533970146335, + "grad_norm": 0.030644388869404793, + "learning_rate": 1.2166972380223363e-06, + "loss": 0.0001, + "num_input_tokens_seen": 67245608, + "step": 99760 + }, + { + "epoch": 2.4372755478464807, + "grad_norm": 0.02295338734984398, + "learning_rate": 1.2166139859282098e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67249064, + "step": 99765 + }, + { + "epoch": 2.437397698678328, + "grad_norm": 0.46199899911880493, + "learning_rate": 1.2165307322588082e-06, + "loss": 0.0003, + "num_input_tokens_seen": 67252264, + "step": 99770 + }, + { + "epoch": 2.437519849510175, + "grad_norm": 0.007493645418435335, + "learning_rate": 1.2164474770147365e-06, + "loss": 0.0372, + "num_input_tokens_seen": 67255848, + "step": 99775 + }, + { + "epoch": 2.4376420003420223, + "grad_norm": 0.04148320481181145, + "learning_rate": 1.2163642201966011e-06, + "loss": 0.0001, + "num_input_tokens_seen": 67259176, + "step": 99780 + }, + { + "epoch": 2.4377641511738695, + "grad_norm": 586.2407836914062, + "learning_rate": 1.2162809618050065e-06, + "loss": 0.0832, + "num_input_tokens_seen": 67262184, + "step": 99785 + }, + { + "epoch": 2.4378863020057167, + "grad_norm": 0.13672468066215515, + "learning_rate": 1.2161977018405586e-06, + "loss": 0.0491, + "num_input_tokens_seen": 67265320, + "step": 99790 + }, + { + "epoch": 2.438008452837564, + "grad_norm": 0.00598291028290987, + "learning_rate": 1.2161144403038629e-06, + "loss": 0.1263, + "num_input_tokens_seen": 67268776, + "step": 99795 + }, + { + "epoch": 2.438130603669411, + "grad_norm": 0.046379100531339645, + "learning_rate": 1.2160311771955246e-06, + "loss": 0.0476, + "num_input_tokens_seen": 67272104, + "step": 99800 + }, + { + "epoch": 2.4382527545012582, + "grad_norm": 0.0232473723590374, + "learning_rate": 1.21594791251615e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67275304, + "step": 99805 + }, + { + "epoch": 2.4383749053331054, + "grad_norm": 0.05157886818051338, + "learning_rate": 1.2158646462663437e-06, + "loss": 0.0281, + "num_input_tokens_seen": 67278184, + "step": 99810 + }, + { + "epoch": 2.4384970561649526, + "grad_norm": 0.07750022411346436, + "learning_rate": 1.215781378446712e-06, + "loss": 0.1022, + "num_input_tokens_seen": 67281512, + "step": 99815 + }, + { + "epoch": 2.4386192069968, + "grad_norm": 0.865932047367096, + "learning_rate": 1.2156981090578594e-06, + "loss": 0.0003, + "num_input_tokens_seen": 67285352, + "step": 99820 + }, + { + "epoch": 2.438741357828647, + "grad_norm": 323.0584411621094, + "learning_rate": 1.2156148381003926e-06, + "loss": 0.0044, + "num_input_tokens_seen": 67288360, + "step": 99825 + }, + { + "epoch": 2.4388635086604937, + "grad_norm": 0.030158882960677147, + "learning_rate": 1.215531565574917e-06, + "loss": 0.0003, + "num_input_tokens_seen": 67291624, + "step": 99830 + }, + { + "epoch": 2.4389856594923414, + "grad_norm": 0.0562431737780571, + "learning_rate": 1.2154482914820375e-06, + "loss": 0.0004, + "num_input_tokens_seen": 67294824, + "step": 99835 + }, + { + "epoch": 2.439107810324188, + "grad_norm": 0.21903370320796967, + "learning_rate": 1.2153650158223602e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67298088, + "step": 99840 + }, + { + "epoch": 2.4392299611560353, + "grad_norm": 0.0879155844449997, + "learning_rate": 1.2152817385964906e-06, + "loss": 0.0006, + "num_input_tokens_seen": 67301352, + "step": 99845 + }, + { + "epoch": 2.4393521119878825, + "grad_norm": 0.03366367518901825, + "learning_rate": 1.215198459805034e-06, + "loss": 0.0, + "num_input_tokens_seen": 67305000, + "step": 99850 + }, + { + "epoch": 2.4394742628197297, + "grad_norm": 0.013038181699812412, + "learning_rate": 1.2151151794485966e-06, + "loss": 0.0001, + "num_input_tokens_seen": 67308520, + "step": 99855 + }, + { + "epoch": 2.439596413651577, + "grad_norm": 0.0064660487696528435, + "learning_rate": 1.2150318975277835e-06, + "loss": 0.0883, + "num_input_tokens_seen": 67311784, + "step": 99860 + }, + { + "epoch": 2.439718564483424, + "grad_norm": 0.8282192945480347, + "learning_rate": 1.2149486140432008e-06, + "loss": 0.0014, + "num_input_tokens_seen": 67315112, + "step": 99865 + }, + { + "epoch": 2.4398407153152712, + "grad_norm": 39.81499099731445, + "learning_rate": 1.214865328995454e-06, + "loss": 0.1583, + "num_input_tokens_seen": 67318056, + "step": 99870 + }, + { + "epoch": 2.4399628661471184, + "grad_norm": 0.4565242826938629, + "learning_rate": 1.2147820423851487e-06, + "loss": 0.0569, + "num_input_tokens_seen": 67321832, + "step": 99875 + }, + { + "epoch": 2.4400850169789656, + "grad_norm": 0.058376945555210114, + "learning_rate": 1.2146987542128906e-06, + "loss": 0.0001, + "num_input_tokens_seen": 67324968, + "step": 99880 + }, + { + "epoch": 2.440207167810813, + "grad_norm": 0.017881063744425774, + "learning_rate": 1.2146154644792855e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67328232, + "step": 99885 + }, + { + "epoch": 2.44032931864266, + "grad_norm": 0.022865792736411095, + "learning_rate": 1.214532173184939e-06, + "loss": 0.0005, + "num_input_tokens_seen": 67331368, + "step": 99890 + }, + { + "epoch": 2.440451469474507, + "grad_norm": 0.0678458958864212, + "learning_rate": 1.2144488803304566e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67334568, + "step": 99895 + }, + { + "epoch": 2.4405736203063544, + "grad_norm": 0.03220222890377045, + "learning_rate": 1.2143655859164445e-06, + "loss": 0.0142, + "num_input_tokens_seen": 67338280, + "step": 99900 + }, + { + "epoch": 2.4406957711382016, + "grad_norm": 0.016223371028900146, + "learning_rate": 1.2142822899435083e-06, + "loss": 0.0004, + "num_input_tokens_seen": 67342056, + "step": 99905 + }, + { + "epoch": 2.4408179219700488, + "grad_norm": 0.025257963687181473, + "learning_rate": 1.2141989924122534e-06, + "loss": 0.0276, + "num_input_tokens_seen": 67345896, + "step": 99910 + }, + { + "epoch": 2.440940072801896, + "grad_norm": 0.0486016608774662, + "learning_rate": 1.2141156933232856e-06, + "loss": 0.0574, + "num_input_tokens_seen": 67348904, + "step": 99915 + }, + { + "epoch": 2.441062223633743, + "grad_norm": 29.583646774291992, + "learning_rate": 1.2140323926772114e-06, + "loss": 0.034, + "num_input_tokens_seen": 67352744, + "step": 99920 + }, + { + "epoch": 2.44118437446559, + "grad_norm": 0.04151540622115135, + "learning_rate": 1.2139490904746359e-06, + "loss": 0.0001, + "num_input_tokens_seen": 67356328, + "step": 99925 + }, + { + "epoch": 2.4413065252974375, + "grad_norm": 0.13054241240024567, + "learning_rate": 1.213865786716165e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67359976, + "step": 99930 + }, + { + "epoch": 2.4414286761292843, + "grad_norm": 0.4463544189929962, + "learning_rate": 1.2137824814024048e-06, + "loss": 0.0003, + "num_input_tokens_seen": 67363496, + "step": 99935 + }, + { + "epoch": 2.4415508269611315, + "grad_norm": 0.1418827623128891, + "learning_rate": 1.2136991745339606e-06, + "loss": 0.0665, + "num_input_tokens_seen": 67366504, + "step": 99940 + }, + { + "epoch": 2.4416729777929786, + "grad_norm": 0.02598101831972599, + "learning_rate": 1.2136158661114387e-06, + "loss": 0.0004, + "num_input_tokens_seen": 67370088, + "step": 99945 + }, + { + "epoch": 2.441795128624826, + "grad_norm": 958.7318115234375, + "learning_rate": 1.2135325561354446e-06, + "loss": 0.101, + "num_input_tokens_seen": 67373480, + "step": 99950 + }, + { + "epoch": 2.441917279456673, + "grad_norm": 19.178232192993164, + "learning_rate": 1.2134492446065844e-06, + "loss": 0.0351, + "num_input_tokens_seen": 67376680, + "step": 99955 + }, + { + "epoch": 2.44203943028852, + "grad_norm": 0.021837705746293068, + "learning_rate": 1.2133659315254637e-06, + "loss": 0.0004, + "num_input_tokens_seen": 67380072, + "step": 99960 + }, + { + "epoch": 2.4421615811203674, + "grad_norm": 0.0334324985742569, + "learning_rate": 1.2132826168926888e-06, + "loss": 0.0005, + "num_input_tokens_seen": 67383144, + "step": 99965 + }, + { + "epoch": 2.4422837319522146, + "grad_norm": 0.002274291357025504, + "learning_rate": 1.2131993007088654e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67386472, + "step": 99970 + }, + { + "epoch": 2.442405882784062, + "grad_norm": 0.10472066700458527, + "learning_rate": 1.2131159829745991e-06, + "loss": 0.0466, + "num_input_tokens_seen": 67389800, + "step": 99975 + }, + { + "epoch": 2.442528033615909, + "grad_norm": 0.11208847165107727, + "learning_rate": 1.2130326636904963e-06, + "loss": 0.0412, + "num_input_tokens_seen": 67393512, + "step": 99980 + }, + { + "epoch": 2.442650184447756, + "grad_norm": 0.4897066354751587, + "learning_rate": 1.2129493428571627e-06, + "loss": 0.0408, + "num_input_tokens_seen": 67396648, + "step": 99985 + }, + { + "epoch": 2.4427723352796034, + "grad_norm": 0.02697628177702427, + "learning_rate": 1.2128660204752042e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67400552, + "step": 99990 + }, + { + "epoch": 2.4428944861114505, + "grad_norm": 0.007688530255109072, + "learning_rate": 1.2127826965452266e-06, + "loss": 0.0639, + "num_input_tokens_seen": 67404008, + "step": 99995 + }, + { + "epoch": 2.4430166369432977, + "grad_norm": 17.038576126098633, + "learning_rate": 1.212699371067836e-06, + "loss": 0.1636, + "num_input_tokens_seen": 67407336, + "step": 100000 + }, + { + "epoch": 2.443138787775145, + "grad_norm": 0.03481506183743477, + "learning_rate": 1.212616044043638e-06, + "loss": 0.0291, + "num_input_tokens_seen": 67410664, + "step": 100005 + }, + { + "epoch": 2.4432609386069917, + "grad_norm": 0.06357225775718689, + "learning_rate": 1.2125327154732394e-06, + "loss": 0.0003, + "num_input_tokens_seen": 67413800, + "step": 100010 + }, + { + "epoch": 2.4433830894388393, + "grad_norm": 0.060711268335580826, + "learning_rate": 1.2124493853572458e-06, + "loss": 0.0801, + "num_input_tokens_seen": 67416616, + "step": 100015 + }, + { + "epoch": 2.443505240270686, + "grad_norm": 0.0068696048110723495, + "learning_rate": 1.2123660536962628e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67419560, + "step": 100020 + }, + { + "epoch": 2.4436273911025332, + "grad_norm": 0.019549276679754257, + "learning_rate": 1.212282720490897e-06, + "loss": 0.043, + "num_input_tokens_seen": 67423272, + "step": 100025 + }, + { + "epoch": 2.4437495419343804, + "grad_norm": 0.01690533384680748, + "learning_rate": 1.2121993857417542e-06, + "loss": 0.0004, + "num_input_tokens_seen": 67426920, + "step": 100030 + }, + { + "epoch": 2.4438716927662276, + "grad_norm": 0.12328990548849106, + "learning_rate": 1.21211604944944e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67430248, + "step": 100035 + }, + { + "epoch": 2.443993843598075, + "grad_norm": 0.009458489716053009, + "learning_rate": 1.2120327116145611e-06, + "loss": 0.082, + "num_input_tokens_seen": 67433576, + "step": 100040 + }, + { + "epoch": 2.444115994429922, + "grad_norm": 21.805879592895508, + "learning_rate": 1.2119493722377233e-06, + "loss": 0.0875, + "num_input_tokens_seen": 67437032, + "step": 100045 + }, + { + "epoch": 2.444238145261769, + "grad_norm": 0.032339856028556824, + "learning_rate": 1.2118660313195327e-06, + "loss": 0.0665, + "num_input_tokens_seen": 67440552, + "step": 100050 + }, + { + "epoch": 2.4443602960936164, + "grad_norm": 0.07892021536827087, + "learning_rate": 1.2117826888605953e-06, + "loss": 0.0003, + "num_input_tokens_seen": 67443624, + "step": 100055 + }, + { + "epoch": 2.4444824469254636, + "grad_norm": 0.06196204572916031, + "learning_rate": 1.2116993448615173e-06, + "loss": 0.0004, + "num_input_tokens_seen": 67447080, + "step": 100060 + }, + { + "epoch": 2.4446045977573108, + "grad_norm": 0.10552898794412613, + "learning_rate": 1.2116159993229045e-06, + "loss": 0.12, + "num_input_tokens_seen": 67450728, + "step": 100065 + }, + { + "epoch": 2.444726748589158, + "grad_norm": 0.17518259584903717, + "learning_rate": 1.2115326522453632e-06, + "loss": 0.0004, + "num_input_tokens_seen": 67454376, + "step": 100070 + }, + { + "epoch": 2.444848899421005, + "grad_norm": 0.05008700117468834, + "learning_rate": 1.2114493036294996e-06, + "loss": 0.0526, + "num_input_tokens_seen": 67457768, + "step": 100075 + }, + { + "epoch": 2.4449710502528523, + "grad_norm": 25.26511573791504, + "learning_rate": 1.21136595347592e-06, + "loss": 0.0651, + "num_input_tokens_seen": 67461160, + "step": 100080 + }, + { + "epoch": 2.4450932010846995, + "grad_norm": 0.05229969322681427, + "learning_rate": 1.2112826017852303e-06, + "loss": 0.0733, + "num_input_tokens_seen": 67464040, + "step": 100085 + }, + { + "epoch": 2.4452153519165467, + "grad_norm": 0.011544210836291313, + "learning_rate": 1.211199248558037e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67467496, + "step": 100090 + }, + { + "epoch": 2.445337502748394, + "grad_norm": 0.011759086512029171, + "learning_rate": 1.2111158937949456e-06, + "loss": 0.04, + "num_input_tokens_seen": 67470824, + "step": 100095 + }, + { + "epoch": 2.445459653580241, + "grad_norm": 2.3236122131347656, + "learning_rate": 1.2110325374965624e-06, + "loss": 0.0986, + "num_input_tokens_seen": 67474088, + "step": 100100 + }, + { + "epoch": 2.445581804412088, + "grad_norm": 0.016584821045398712, + "learning_rate": 1.2109491796634942e-06, + "loss": 0.0009, + "num_input_tokens_seen": 67477160, + "step": 100105 + }, + { + "epoch": 2.445703955243935, + "grad_norm": 0.03389434516429901, + "learning_rate": 1.2108658202963464e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67480552, + "step": 100110 + }, + { + "epoch": 2.445826106075782, + "grad_norm": 0.03657127916812897, + "learning_rate": 1.210782459395726e-06, + "loss": 0.0446, + "num_input_tokens_seen": 67483816, + "step": 100115 + }, + { + "epoch": 2.4459482569076294, + "grad_norm": 19.909534454345703, + "learning_rate": 1.2106990969622388e-06, + "loss": 0.0379, + "num_input_tokens_seen": 67487464, + "step": 100120 + }, + { + "epoch": 2.4460704077394766, + "grad_norm": 0.20571334660053253, + "learning_rate": 1.2106157329964913e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67490792, + "step": 100125 + }, + { + "epoch": 2.446192558571324, + "grad_norm": 0.005827835761010647, + "learning_rate": 1.210532367499089e-06, + "loss": 0.0465, + "num_input_tokens_seen": 67494056, + "step": 100130 + }, + { + "epoch": 2.446314709403171, + "grad_norm": 0.5593209266662598, + "learning_rate": 1.210449000470639e-06, + "loss": 0.0004, + "num_input_tokens_seen": 67497512, + "step": 100135 + }, + { + "epoch": 2.446436860235018, + "grad_norm": 13.949110984802246, + "learning_rate": 1.2103656319117474e-06, + "loss": 0.0529, + "num_input_tokens_seen": 67500840, + "step": 100140 + }, + { + "epoch": 2.4465590110668654, + "grad_norm": 0.0062551493756473064, + "learning_rate": 1.2102822618230204e-06, + "loss": 0.0396, + "num_input_tokens_seen": 67504168, + "step": 100145 + }, + { + "epoch": 2.4466811618987125, + "grad_norm": 0.0040052300319075584, + "learning_rate": 1.210198890205064e-06, + "loss": 0.0005, + "num_input_tokens_seen": 67507496, + "step": 100150 + }, + { + "epoch": 2.4468033127305597, + "grad_norm": 0.14093728363513947, + "learning_rate": 1.210115517058485e-06, + "loss": 0.0004, + "num_input_tokens_seen": 67510824, + "step": 100155 + }, + { + "epoch": 2.446925463562407, + "grad_norm": 0.26041334867477417, + "learning_rate": 1.2100321423838889e-06, + "loss": 0.0614, + "num_input_tokens_seen": 67514216, + "step": 100160 + }, + { + "epoch": 2.447047614394254, + "grad_norm": 0.06177394092082977, + "learning_rate": 1.2099487661818829e-06, + "loss": 0.0004, + "num_input_tokens_seen": 67517608, + "step": 100165 + }, + { + "epoch": 2.4471697652261013, + "grad_norm": 30.27055549621582, + "learning_rate": 1.209865388453073e-06, + "loss": 0.0595, + "num_input_tokens_seen": 67520872, + "step": 100170 + }, + { + "epoch": 2.4472919160579485, + "grad_norm": 0.05899616703391075, + "learning_rate": 1.2097820091980654e-06, + "loss": 0.0971, + "num_input_tokens_seen": 67523880, + "step": 100175 + }, + { + "epoch": 2.4474140668897957, + "grad_norm": 22.625675201416016, + "learning_rate": 1.209698628417467e-06, + "loss": 0.166, + "num_input_tokens_seen": 67526888, + "step": 100180 + }, + { + "epoch": 2.447536217721643, + "grad_norm": 0.04590220749378204, + "learning_rate": 1.2096152461118836e-06, + "loss": 0.0003, + "num_input_tokens_seen": 67530088, + "step": 100185 + }, + { + "epoch": 2.4476583685534896, + "grad_norm": 0.10016443580389023, + "learning_rate": 1.2095318622819216e-06, + "loss": 0.0469, + "num_input_tokens_seen": 67533032, + "step": 100190 + }, + { + "epoch": 2.4477805193853373, + "grad_norm": 0.08563831448554993, + "learning_rate": 1.2094484769281877e-06, + "loss": 0.0003, + "num_input_tokens_seen": 67536040, + "step": 100195 + }, + { + "epoch": 2.447902670217184, + "grad_norm": 0.09388595819473267, + "learning_rate": 1.2093650900512879e-06, + "loss": 0.0562, + "num_input_tokens_seen": 67539496, + "step": 100200 + }, + { + "epoch": 2.448024821049031, + "grad_norm": 15.039061546325684, + "learning_rate": 1.2092817016518291e-06, + "loss": 0.0657, + "num_input_tokens_seen": 67542824, + "step": 100205 + }, + { + "epoch": 2.4481469718808784, + "grad_norm": 0.07398822903633118, + "learning_rate": 1.2091983117304174e-06, + "loss": 0.0311, + "num_input_tokens_seen": 67545832, + "step": 100210 + }, + { + "epoch": 2.4482691227127256, + "grad_norm": 0.012557965703308582, + "learning_rate": 1.2091149202876593e-06, + "loss": 0.0398, + "num_input_tokens_seen": 67549288, + "step": 100215 + }, + { + "epoch": 2.4483912735445728, + "grad_norm": 0.4743223488330841, + "learning_rate": 1.2090315273241613e-06, + "loss": 0.0006, + "num_input_tokens_seen": 67552296, + "step": 100220 + }, + { + "epoch": 2.44851342437642, + "grad_norm": 0.2512113153934479, + "learning_rate": 1.2089481328405295e-06, + "loss": 0.0542, + "num_input_tokens_seen": 67555624, + "step": 100225 + }, + { + "epoch": 2.448635575208267, + "grad_norm": 0.02216065302491188, + "learning_rate": 1.208864736837371e-06, + "loss": 0.0003, + "num_input_tokens_seen": 67559144, + "step": 100230 + }, + { + "epoch": 2.4487577260401143, + "grad_norm": 0.07935440540313721, + "learning_rate": 1.2087813393152919e-06, + "loss": 0.0007, + "num_input_tokens_seen": 67562600, + "step": 100235 + }, + { + "epoch": 2.4488798768719615, + "grad_norm": 18.39009666442871, + "learning_rate": 1.208697940274899e-06, + "loss": 0.0804, + "num_input_tokens_seen": 67565736, + "step": 100240 + }, + { + "epoch": 2.4490020277038087, + "grad_norm": 55.88145446777344, + "learning_rate": 1.2086145397167981e-06, + "loss": 0.0418, + "num_input_tokens_seen": 67569000, + "step": 100245 + }, + { + "epoch": 2.449124178535656, + "grad_norm": 0.10761766880750656, + "learning_rate": 1.2085311376415965e-06, + "loss": 0.0003, + "num_input_tokens_seen": 67572136, + "step": 100250 + }, + { + "epoch": 2.449246329367503, + "grad_norm": 0.07407483458518982, + "learning_rate": 1.2084477340499003e-06, + "loss": 0.0004, + "num_input_tokens_seen": 67575272, + "step": 100255 + }, + { + "epoch": 2.4493684801993503, + "grad_norm": 0.02553796023130417, + "learning_rate": 1.208364328942316e-06, + "loss": 0.003, + "num_input_tokens_seen": 67578536, + "step": 100260 + }, + { + "epoch": 2.4494906310311975, + "grad_norm": 0.02047552913427353, + "learning_rate": 1.2082809223194502e-06, + "loss": 0.0006, + "num_input_tokens_seen": 67581864, + "step": 100265 + }, + { + "epoch": 2.4496127818630447, + "grad_norm": 0.007181069348007441, + "learning_rate": 1.2081975141819097e-06, + "loss": 0.0401, + "num_input_tokens_seen": 67585000, + "step": 100270 + }, + { + "epoch": 2.449734932694892, + "grad_norm": 0.008215844631195068, + "learning_rate": 1.2081141045303007e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67588264, + "step": 100275 + }, + { + "epoch": 2.449857083526739, + "grad_norm": 0.007480736821889877, + "learning_rate": 1.20803069336523e-06, + "loss": 0.0003, + "num_input_tokens_seen": 67592104, + "step": 100280 + }, + { + "epoch": 2.4499792343585858, + "grad_norm": 0.0785703957080841, + "learning_rate": 1.207947280687304e-06, + "loss": 0.0428, + "num_input_tokens_seen": 67595560, + "step": 100285 + }, + { + "epoch": 2.450101385190433, + "grad_norm": 0.008701242506504059, + "learning_rate": 1.2078638664971297e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67598760, + "step": 100290 + }, + { + "epoch": 2.45022353602228, + "grad_norm": 4.186373233795166, + "learning_rate": 1.2077804507953135e-06, + "loss": 0.009, + "num_input_tokens_seen": 67602152, + "step": 100295 + }, + { + "epoch": 2.4503456868541273, + "grad_norm": 0.03363225609064102, + "learning_rate": 1.2076970335824618e-06, + "loss": 0.1083, + "num_input_tokens_seen": 67605608, + "step": 100300 + }, + { + "epoch": 2.4504678376859745, + "grad_norm": 0.016309145838022232, + "learning_rate": 1.2076136148591817e-06, + "loss": 0.0788, + "num_input_tokens_seen": 67608488, + "step": 100305 + }, + { + "epoch": 2.4505899885178217, + "grad_norm": 0.013027573004364967, + "learning_rate": 1.2075301946260795e-06, + "loss": 0.0001, + "num_input_tokens_seen": 67611944, + "step": 100310 + }, + { + "epoch": 2.450712139349669, + "grad_norm": 0.035746023058891296, + "learning_rate": 1.2074467728837615e-06, + "loss": 0.0004, + "num_input_tokens_seen": 67615272, + "step": 100315 + }, + { + "epoch": 2.450834290181516, + "grad_norm": 0.025673598051071167, + "learning_rate": 1.2073633496328348e-06, + "loss": 0.0182, + "num_input_tokens_seen": 67618600, + "step": 100320 + }, + { + "epoch": 2.4509564410133633, + "grad_norm": 0.017467927187681198, + "learning_rate": 1.2072799248739062e-06, + "loss": 0.097, + "num_input_tokens_seen": 67621736, + "step": 100325 + }, + { + "epoch": 2.4510785918452105, + "grad_norm": 0.007844069972634315, + "learning_rate": 1.2071964986075825e-06, + "loss": 0.0042, + "num_input_tokens_seen": 67625448, + "step": 100330 + }, + { + "epoch": 2.4512007426770577, + "grad_norm": 0.18830718100070953, + "learning_rate": 1.2071130708344698e-06, + "loss": 0.0003, + "num_input_tokens_seen": 67628776, + "step": 100335 + }, + { + "epoch": 2.451322893508905, + "grad_norm": 0.05894733592867851, + "learning_rate": 1.2070296415551754e-06, + "loss": 0.0886, + "num_input_tokens_seen": 67632168, + "step": 100340 + }, + { + "epoch": 2.451445044340752, + "grad_norm": 0.11676836758852005, + "learning_rate": 1.2069462107703055e-06, + "loss": 0.0001, + "num_input_tokens_seen": 67635880, + "step": 100345 + }, + { + "epoch": 2.4515671951725992, + "grad_norm": 0.05520134046673775, + "learning_rate": 1.2068627784804673e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67639272, + "step": 100350 + }, + { + "epoch": 2.4516893460044464, + "grad_norm": 32.44426727294922, + "learning_rate": 1.206779344686267e-06, + "loss": 0.0013, + "num_input_tokens_seen": 67643048, + "step": 100355 + }, + { + "epoch": 2.4518114968362936, + "grad_norm": 1.8535118103027344, + "learning_rate": 1.2066959093883122e-06, + "loss": 0.0665, + "num_input_tokens_seen": 67646504, + "step": 100360 + }, + { + "epoch": 2.451933647668141, + "grad_norm": 0.03558404743671417, + "learning_rate": 1.206612472587209e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67650216, + "step": 100365 + }, + { + "epoch": 2.4520557984999876, + "grad_norm": 0.021302519366145134, + "learning_rate": 1.206529034283564e-06, + "loss": 0.0328, + "num_input_tokens_seen": 67653672, + "step": 100370 + }, + { + "epoch": 2.452177949331835, + "grad_norm": 0.019249480217695236, + "learning_rate": 1.2064455944779845e-06, + "loss": 0.0379, + "num_input_tokens_seen": 67657256, + "step": 100375 + }, + { + "epoch": 2.452300100163682, + "grad_norm": 0.1599765121936798, + "learning_rate": 1.206362153171077e-06, + "loss": 0.0913, + "num_input_tokens_seen": 67660456, + "step": 100380 + }, + { + "epoch": 2.452422250995529, + "grad_norm": 0.02373223938047886, + "learning_rate": 1.2062787103634486e-06, + "loss": 0.0008, + "num_input_tokens_seen": 67663976, + "step": 100385 + }, + { + "epoch": 2.4525444018273763, + "grad_norm": 0.004647197667509317, + "learning_rate": 1.206195266055706e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67667560, + "step": 100390 + }, + { + "epoch": 2.4526665526592235, + "grad_norm": 0.05331575870513916, + "learning_rate": 1.2061118202484556e-06, + "loss": 0.0678, + "num_input_tokens_seen": 67670824, + "step": 100395 + }, + { + "epoch": 2.4527887034910707, + "grad_norm": 21.77971839904785, + "learning_rate": 1.206028372942305e-06, + "loss": 0.0514, + "num_input_tokens_seen": 67674344, + "step": 100400 + }, + { + "epoch": 2.452910854322918, + "grad_norm": 21.462068557739258, + "learning_rate": 1.2059449241378608e-06, + "loss": 0.0522, + "num_input_tokens_seen": 67678376, + "step": 100405 + }, + { + "epoch": 2.453033005154765, + "grad_norm": 0.0774010568857193, + "learning_rate": 1.2058614738357294e-06, + "loss": 0.0158, + "num_input_tokens_seen": 67681448, + "step": 100410 + }, + { + "epoch": 2.4531551559866123, + "grad_norm": 0.007746968884021044, + "learning_rate": 1.205778022036518e-06, + "loss": 0.0392, + "num_input_tokens_seen": 67684904, + "step": 100415 + }, + { + "epoch": 2.4532773068184595, + "grad_norm": 0.08811292052268982, + "learning_rate": 1.2056945687408334e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67687912, + "step": 100420 + }, + { + "epoch": 2.4533994576503066, + "grad_norm": 239.14468383789062, + "learning_rate": 1.2056111139492827e-06, + "loss": 0.0553, + "num_input_tokens_seen": 67691176, + "step": 100425 + }, + { + "epoch": 2.453521608482154, + "grad_norm": 0.01934845559298992, + "learning_rate": 1.2055276576624727e-06, + "loss": 0.0639, + "num_input_tokens_seen": 67694568, + "step": 100430 + }, + { + "epoch": 2.453643759314001, + "grad_norm": 0.09107211232185364, + "learning_rate": 1.2054441998810103e-06, + "loss": 0.1097, + "num_input_tokens_seen": 67698088, + "step": 100435 + }, + { + "epoch": 2.453765910145848, + "grad_norm": 0.0451941080391407, + "learning_rate": 1.205360740605502e-06, + "loss": 0.0523, + "num_input_tokens_seen": 67701544, + "step": 100440 + }, + { + "epoch": 2.4538880609776954, + "grad_norm": 0.056204136461019516, + "learning_rate": 1.2052772798365556e-06, + "loss": 0.0754, + "num_input_tokens_seen": 67704616, + "step": 100445 + }, + { + "epoch": 2.4540102118095426, + "grad_norm": 0.027863988652825356, + "learning_rate": 1.2051938175747777e-06, + "loss": 0.0731, + "num_input_tokens_seen": 67708136, + "step": 100450 + }, + { + "epoch": 2.4541323626413893, + "grad_norm": 0.09919628500938416, + "learning_rate": 1.2051103538207752e-06, + "loss": 0.0003, + "num_input_tokens_seen": 67711784, + "step": 100455 + }, + { + "epoch": 2.454254513473237, + "grad_norm": 64.31039428710938, + "learning_rate": 1.2050268885751547e-06, + "loss": 0.2318, + "num_input_tokens_seen": 67715176, + "step": 100460 + }, + { + "epoch": 2.4543766643050837, + "grad_norm": 0.015285374596714973, + "learning_rate": 1.2049434218385236e-06, + "loss": 0.0004, + "num_input_tokens_seen": 67718632, + "step": 100465 + }, + { + "epoch": 2.454498815136931, + "grad_norm": 14.737951278686523, + "learning_rate": 1.2048599536114887e-06, + "loss": 0.0355, + "num_input_tokens_seen": 67722472, + "step": 100470 + }, + { + "epoch": 2.454620965968778, + "grad_norm": 4.654276371002197, + "learning_rate": 1.2047764838946574e-06, + "loss": 0.0015, + "num_input_tokens_seen": 67725864, + "step": 100475 + }, + { + "epoch": 2.4547431168006253, + "grad_norm": 14.541358947753906, + "learning_rate": 1.2046930126886362e-06, + "loss": 0.0008, + "num_input_tokens_seen": 67729128, + "step": 100480 + }, + { + "epoch": 2.4548652676324725, + "grad_norm": 0.00767884124070406, + "learning_rate": 1.2046095399940326e-06, + "loss": 0.0001, + "num_input_tokens_seen": 67732328, + "step": 100485 + }, + { + "epoch": 2.4549874184643197, + "grad_norm": 0.023718344047665596, + "learning_rate": 1.2045260658114534e-06, + "loss": 0.095, + "num_input_tokens_seen": 67735656, + "step": 100490 + }, + { + "epoch": 2.455109569296167, + "grad_norm": 0.27190932631492615, + "learning_rate": 1.2044425901415053e-06, + "loss": 0.0004, + "num_input_tokens_seen": 67738920, + "step": 100495 + }, + { + "epoch": 2.455231720128014, + "grad_norm": 0.017982225865125656, + "learning_rate": 1.204359112984796e-06, + "loss": 0.0003, + "num_input_tokens_seen": 67742568, + "step": 100500 + }, + { + "epoch": 2.4553538709598612, + "grad_norm": 0.030523277819156647, + "learning_rate": 1.2042756343419324e-06, + "loss": 0.0003, + "num_input_tokens_seen": 67746344, + "step": 100505 + }, + { + "epoch": 2.4554760217917084, + "grad_norm": 2.859264850616455, + "learning_rate": 1.2041921542135209e-06, + "loss": 0.0011, + "num_input_tokens_seen": 67749544, + "step": 100510 + }, + { + "epoch": 2.4555981726235556, + "grad_norm": 0.15216529369354248, + "learning_rate": 1.2041086726001696e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67752744, + "step": 100515 + }, + { + "epoch": 2.455720323455403, + "grad_norm": 0.024804210290312767, + "learning_rate": 1.204025189502485e-06, + "loss": 0.0983, + "num_input_tokens_seen": 67756456, + "step": 100520 + }, + { + "epoch": 2.45584247428725, + "grad_norm": 0.03291197121143341, + "learning_rate": 1.2039417049210743e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67759848, + "step": 100525 + }, + { + "epoch": 2.455964625119097, + "grad_norm": 35.64726257324219, + "learning_rate": 1.2038582188565448e-06, + "loss": 0.0562, + "num_input_tokens_seen": 67763176, + "step": 100530 + }, + { + "epoch": 2.4560867759509444, + "grad_norm": 0.017125124111771584, + "learning_rate": 1.2037747313095032e-06, + "loss": 0.0707, + "num_input_tokens_seen": 67766376, + "step": 100535 + }, + { + "epoch": 2.4562089267827916, + "grad_norm": 0.009626665152609348, + "learning_rate": 1.2036912422805572e-06, + "loss": 0.0001, + "num_input_tokens_seen": 67769704, + "step": 100540 + }, + { + "epoch": 2.4563310776146388, + "grad_norm": 0.10648053884506226, + "learning_rate": 1.2036077517703136e-06, + "loss": 0.0005, + "num_input_tokens_seen": 67772648, + "step": 100545 + }, + { + "epoch": 2.4564532284464855, + "grad_norm": 4.779392242431641, + "learning_rate": 1.2035242597793796e-06, + "loss": 0.0008, + "num_input_tokens_seen": 67776040, + "step": 100550 + }, + { + "epoch": 2.4565753792783327, + "grad_norm": 39.19169998168945, + "learning_rate": 1.2034407663083626e-06, + "loss": 0.0917, + "num_input_tokens_seen": 67779368, + "step": 100555 + }, + { + "epoch": 2.45669753011018, + "grad_norm": 0.06931298226118088, + "learning_rate": 1.2033572713578698e-06, + "loss": 0.0638, + "num_input_tokens_seen": 67782824, + "step": 100560 + }, + { + "epoch": 2.456819680942027, + "grad_norm": 0.03799625486135483, + "learning_rate": 1.2032737749285077e-06, + "loss": 0.0003, + "num_input_tokens_seen": 67786344, + "step": 100565 + }, + { + "epoch": 2.4569418317738743, + "grad_norm": 16.83151626586914, + "learning_rate": 1.2031902770208846e-06, + "loss": 0.0584, + "num_input_tokens_seen": 67789864, + "step": 100570 + }, + { + "epoch": 2.4570639826057215, + "grad_norm": 0.01382389385253191, + "learning_rate": 1.2031067776356068e-06, + "loss": 0.0627, + "num_input_tokens_seen": 67793192, + "step": 100575 + }, + { + "epoch": 2.4571861334375686, + "grad_norm": 0.02206377312541008, + "learning_rate": 1.203023276773282e-06, + "loss": 0.1053, + "num_input_tokens_seen": 67796520, + "step": 100580 + }, + { + "epoch": 2.457308284269416, + "grad_norm": 33.02057647705078, + "learning_rate": 1.2029397744345173e-06, + "loss": 0.1509, + "num_input_tokens_seen": 67799848, + "step": 100585 + }, + { + "epoch": 2.457430435101263, + "grad_norm": 0.11031505465507507, + "learning_rate": 1.2028562706199198e-06, + "loss": 0.0352, + "num_input_tokens_seen": 67803048, + "step": 100590 + }, + { + "epoch": 2.45755258593311, + "grad_norm": 0.03389930725097656, + "learning_rate": 1.202772765330097e-06, + "loss": 0.1097, + "num_input_tokens_seen": 67806312, + "step": 100595 + }, + { + "epoch": 2.4576747367649574, + "grad_norm": 0.1475170999765396, + "learning_rate": 1.2026892585656564e-06, + "loss": 0.0007, + "num_input_tokens_seen": 67810024, + "step": 100600 + }, + { + "epoch": 2.4577968875968046, + "grad_norm": 0.26105520129203796, + "learning_rate": 1.2026057503272048e-06, + "loss": 0.0495, + "num_input_tokens_seen": 67813224, + "step": 100605 + }, + { + "epoch": 2.457919038428652, + "grad_norm": 3.085169792175293, + "learning_rate": 1.2025222406153499e-06, + "loss": 0.0053, + "num_input_tokens_seen": 67816488, + "step": 100610 + }, + { + "epoch": 2.458041189260499, + "grad_norm": 0.14629432559013367, + "learning_rate": 1.2024387294306986e-06, + "loss": 0.056, + "num_input_tokens_seen": 67819752, + "step": 100615 + }, + { + "epoch": 2.458163340092346, + "grad_norm": 0.026330020278692245, + "learning_rate": 1.2023552167738585e-06, + "loss": 0.0334, + "num_input_tokens_seen": 67823144, + "step": 100620 + }, + { + "epoch": 2.4582854909241934, + "grad_norm": 0.03332911804318428, + "learning_rate": 1.2022717026454365e-06, + "loss": 0.0162, + "num_input_tokens_seen": 67826600, + "step": 100625 + }, + { + "epoch": 2.4584076417560405, + "grad_norm": 1.5771613121032715, + "learning_rate": 1.2021881870460404e-06, + "loss": 0.0006, + "num_input_tokens_seen": 67829800, + "step": 100630 + }, + { + "epoch": 2.4585297925878873, + "grad_norm": 0.4669386148452759, + "learning_rate": 1.2021046699762777e-06, + "loss": 0.0005, + "num_input_tokens_seen": 67833192, + "step": 100635 + }, + { + "epoch": 2.458651943419735, + "grad_norm": 0.07576936483383179, + "learning_rate": 1.2020211514367552e-06, + "loss": 0.0717, + "num_input_tokens_seen": 67836584, + "step": 100640 + }, + { + "epoch": 2.4587740942515817, + "grad_norm": 0.06343811750411987, + "learning_rate": 1.2019376314280808e-06, + "loss": 0.0005, + "num_input_tokens_seen": 67839784, + "step": 100645 + }, + { + "epoch": 2.458896245083429, + "grad_norm": 0.4868296980857849, + "learning_rate": 1.2018541099508614e-06, + "loss": 0.0005, + "num_input_tokens_seen": 67843112, + "step": 100650 + }, + { + "epoch": 2.459018395915276, + "grad_norm": 0.2107633501291275, + "learning_rate": 1.201770587005705e-06, + "loss": 0.0463, + "num_input_tokens_seen": 67847080, + "step": 100655 + }, + { + "epoch": 2.4591405467471232, + "grad_norm": 0.014766460284590721, + "learning_rate": 1.2016870625932182e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67850536, + "step": 100660 + }, + { + "epoch": 2.4592626975789704, + "grad_norm": 0.07846330851316452, + "learning_rate": 1.201603536714009e-06, + "loss": 0.0466, + "num_input_tokens_seen": 67854248, + "step": 100665 + }, + { + "epoch": 2.4593848484108176, + "grad_norm": 0.01824275404214859, + "learning_rate": 1.2015200093686845e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67857512, + "step": 100670 + }, + { + "epoch": 2.459506999242665, + "grad_norm": 0.001176109304651618, + "learning_rate": 1.2014364805578525e-06, + "loss": 0.0477, + "num_input_tokens_seen": 67860776, + "step": 100675 + }, + { + "epoch": 2.459629150074512, + "grad_norm": 0.013056579045951366, + "learning_rate": 1.2013529502821203e-06, + "loss": 0.0445, + "num_input_tokens_seen": 67863976, + "step": 100680 + }, + { + "epoch": 2.459751300906359, + "grad_norm": 0.03527745604515076, + "learning_rate": 1.201269418542095e-06, + "loss": 0.0361, + "num_input_tokens_seen": 67867112, + "step": 100685 + }, + { + "epoch": 2.4598734517382064, + "grad_norm": 0.0246686153113842, + "learning_rate": 1.2011858853383846e-06, + "loss": 0.0516, + "num_input_tokens_seen": 67870632, + "step": 100690 + }, + { + "epoch": 2.4599956025700536, + "grad_norm": 0.012224989011883736, + "learning_rate": 1.201102350671596e-06, + "loss": 0.0327, + "num_input_tokens_seen": 67873896, + "step": 100695 + }, + { + "epoch": 2.4601177534019008, + "grad_norm": 0.08171892166137695, + "learning_rate": 1.2010188145423373e-06, + "loss": 0.0366, + "num_input_tokens_seen": 67877224, + "step": 100700 + }, + { + "epoch": 2.460239904233748, + "grad_norm": 36.575340270996094, + "learning_rate": 1.2009352769512157e-06, + "loss": 0.0995, + "num_input_tokens_seen": 67881064, + "step": 100705 + }, + { + "epoch": 2.460362055065595, + "grad_norm": 0.02174554392695427, + "learning_rate": 1.2008517378988387e-06, + "loss": 0.0004, + "num_input_tokens_seen": 67884520, + "step": 100710 + }, + { + "epoch": 2.4604842058974423, + "grad_norm": 0.008866322226822376, + "learning_rate": 1.200768197385814e-06, + "loss": 0.0346, + "num_input_tokens_seen": 67888104, + "step": 100715 + }, + { + "epoch": 2.4606063567292895, + "grad_norm": 0.06225938722491264, + "learning_rate": 1.2006846554127485e-06, + "loss": 0.0423, + "num_input_tokens_seen": 67891304, + "step": 100720 + }, + { + "epoch": 2.4607285075611367, + "grad_norm": 0.024569325149059296, + "learning_rate": 1.2006011119802506e-06, + "loss": 0.0481, + "num_input_tokens_seen": 67894568, + "step": 100725 + }, + { + "epoch": 2.4608506583929834, + "grad_norm": 0.6258397102355957, + "learning_rate": 1.2005175670889273e-06, + "loss": 0.0716, + "num_input_tokens_seen": 67897960, + "step": 100730 + }, + { + "epoch": 2.4609728092248306, + "grad_norm": 0.1836250126361847, + "learning_rate": 1.2004340207393866e-06, + "loss": 0.0449, + "num_input_tokens_seen": 67901352, + "step": 100735 + }, + { + "epoch": 2.461094960056678, + "grad_norm": 0.0073592402040958405, + "learning_rate": 1.2003504729322355e-06, + "loss": 0.0001, + "num_input_tokens_seen": 67904616, + "step": 100740 + }, + { + "epoch": 2.461217110888525, + "grad_norm": 0.009784862399101257, + "learning_rate": 1.200266923668082e-06, + "loss": 0.0502, + "num_input_tokens_seen": 67908008, + "step": 100745 + }, + { + "epoch": 2.461339261720372, + "grad_norm": 0.7223156690597534, + "learning_rate": 1.2001833729475332e-06, + "loss": 0.0611, + "num_input_tokens_seen": 67911208, + "step": 100750 + }, + { + "epoch": 2.4614614125522194, + "grad_norm": 0.04937265068292618, + "learning_rate": 1.2000998207711974e-06, + "loss": 0.0005, + "num_input_tokens_seen": 67915048, + "step": 100755 + }, + { + "epoch": 2.4615835633840666, + "grad_norm": 1.4051700830459595, + "learning_rate": 1.200016267139682e-06, + "loss": 0.033, + "num_input_tokens_seen": 67918312, + "step": 100760 + }, + { + "epoch": 2.4617057142159138, + "grad_norm": 1.7179447412490845, + "learning_rate": 1.1999327120535945e-06, + "loss": 0.0005, + "num_input_tokens_seen": 67921576, + "step": 100765 + }, + { + "epoch": 2.461827865047761, + "grad_norm": 0.006037765648216009, + "learning_rate": 1.1998491555135424e-06, + "loss": 0.0006, + "num_input_tokens_seen": 67924520, + "step": 100770 + }, + { + "epoch": 2.461950015879608, + "grad_norm": 0.15003329515457153, + "learning_rate": 1.1997655975201335e-06, + "loss": 0.0009, + "num_input_tokens_seen": 67927656, + "step": 100775 + }, + { + "epoch": 2.4620721667114553, + "grad_norm": 0.005590246059000492, + "learning_rate": 1.1996820380739754e-06, + "loss": 0.0867, + "num_input_tokens_seen": 67931304, + "step": 100780 + }, + { + "epoch": 2.4621943175433025, + "grad_norm": 0.004409489221870899, + "learning_rate": 1.1995984771756757e-06, + "loss": 0.0005, + "num_input_tokens_seen": 67934760, + "step": 100785 + }, + { + "epoch": 2.4623164683751497, + "grad_norm": 0.008362567983567715, + "learning_rate": 1.1995149148258423e-06, + "loss": 0.0, + "num_input_tokens_seen": 67937960, + "step": 100790 + }, + { + "epoch": 2.462438619206997, + "grad_norm": 13.45290470123291, + "learning_rate": 1.1994313510250828e-06, + "loss": 0.098, + "num_input_tokens_seen": 67941160, + "step": 100795 + }, + { + "epoch": 2.462560770038844, + "grad_norm": 0.001858392613939941, + "learning_rate": 1.1993477857740049e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67944488, + "step": 100800 + }, + { + "epoch": 2.4626829208706913, + "grad_norm": 0.06624038517475128, + "learning_rate": 1.1992642190732163e-06, + "loss": 0.1641, + "num_input_tokens_seen": 67947880, + "step": 100805 + }, + { + "epoch": 2.4628050717025385, + "grad_norm": 0.02120389975607395, + "learning_rate": 1.1991806509233246e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67951144, + "step": 100810 + }, + { + "epoch": 2.4629272225343852, + "grad_norm": 0.006992959883064032, + "learning_rate": 1.199097081324938e-06, + "loss": 0.142, + "num_input_tokens_seen": 67954984, + "step": 100815 + }, + { + "epoch": 2.463049373366233, + "grad_norm": 0.049475252628326416, + "learning_rate": 1.1990135102786634e-06, + "loss": 0.0753, + "num_input_tokens_seen": 67958440, + "step": 100820 + }, + { + "epoch": 2.4631715241980796, + "grad_norm": 0.046576276421546936, + "learning_rate": 1.1989299377851093e-06, + "loss": 0.097, + "num_input_tokens_seen": 67962152, + "step": 100825 + }, + { + "epoch": 2.463293675029927, + "grad_norm": 0.056836869567632675, + "learning_rate": 1.1988463638448832e-06, + "loss": 0.0011, + "num_input_tokens_seen": 67965608, + "step": 100830 + }, + { + "epoch": 2.463415825861774, + "grad_norm": 0.04384641721844673, + "learning_rate": 1.1987627884585927e-06, + "loss": 0.0433, + "num_input_tokens_seen": 67968744, + "step": 100835 + }, + { + "epoch": 2.463537976693621, + "grad_norm": 0.26525595784187317, + "learning_rate": 1.1986792116268458e-06, + "loss": 0.0758, + "num_input_tokens_seen": 67971752, + "step": 100840 + }, + { + "epoch": 2.4636601275254684, + "grad_norm": 0.03251001983880997, + "learning_rate": 1.19859563335025e-06, + "loss": 0.0019, + "num_input_tokens_seen": 67975208, + "step": 100845 + }, + { + "epoch": 2.4637822783573156, + "grad_norm": 0.03581248223781586, + "learning_rate": 1.1985120536294135e-06, + "loss": 0.0311, + "num_input_tokens_seen": 67978152, + "step": 100850 + }, + { + "epoch": 2.4639044291891627, + "grad_norm": 0.050688888877630234, + "learning_rate": 1.198428472464944e-06, + "loss": 0.002, + "num_input_tokens_seen": 67981480, + "step": 100855 + }, + { + "epoch": 2.46402658002101, + "grad_norm": 0.1690601259469986, + "learning_rate": 1.1983448898574493e-06, + "loss": 0.0004, + "num_input_tokens_seen": 67984680, + "step": 100860 + }, + { + "epoch": 2.464148730852857, + "grad_norm": 1.0517950057983398, + "learning_rate": 1.1982613058075372e-06, + "loss": 0.132, + "num_input_tokens_seen": 67987944, + "step": 100865 + }, + { + "epoch": 2.4642708816847043, + "grad_norm": 0.02967626415193081, + "learning_rate": 1.198177720315816e-06, + "loss": 0.0002, + "num_input_tokens_seen": 67992104, + "step": 100870 + }, + { + "epoch": 2.4643930325165515, + "grad_norm": 0.02798345312476158, + "learning_rate": 1.1980941333828924e-06, + "loss": 0.1005, + "num_input_tokens_seen": 67995368, + "step": 100875 + }, + { + "epoch": 2.4645151833483987, + "grad_norm": 0.02776155062019825, + "learning_rate": 1.1980105450093754e-06, + "loss": 0.0001, + "num_input_tokens_seen": 67998952, + "step": 100880 + }, + { + "epoch": 2.464637334180246, + "grad_norm": 0.13066832721233368, + "learning_rate": 1.1979269551958722e-06, + "loss": 0.0002, + "num_input_tokens_seen": 68002408, + "step": 100885 + }, + { + "epoch": 2.464759485012093, + "grad_norm": 68.17784881591797, + "learning_rate": 1.197843363942991e-06, + "loss": 0.0338, + "num_input_tokens_seen": 68005736, + "step": 100890 + }, + { + "epoch": 2.4648816358439403, + "grad_norm": 0.05373845994472504, + "learning_rate": 1.19775977125134e-06, + "loss": 0.0468, + "num_input_tokens_seen": 68009512, + "step": 100895 + }, + { + "epoch": 2.4650037866757875, + "grad_norm": 0.07720531523227692, + "learning_rate": 1.1976761771215262e-06, + "loss": 0.0015, + "num_input_tokens_seen": 68012584, + "step": 100900 + }, + { + "epoch": 2.4651259375076346, + "grad_norm": 18.308761596679688, + "learning_rate": 1.1975925815541582e-06, + "loss": 0.149, + "num_input_tokens_seen": 68016168, + "step": 100905 + }, + { + "epoch": 2.4652480883394814, + "grad_norm": 0.0015869983471930027, + "learning_rate": 1.197508984549844e-06, + "loss": 0.0001, + "num_input_tokens_seen": 68019688, + "step": 100910 + }, + { + "epoch": 2.4653702391713286, + "grad_norm": 0.05763440206646919, + "learning_rate": 1.1974253861091914e-06, + "loss": 0.039, + "num_input_tokens_seen": 68022824, + "step": 100915 + }, + { + "epoch": 2.4654923900031758, + "grad_norm": 0.07990226149559021, + "learning_rate": 1.1973417862328084e-06, + "loss": 0.0191, + "num_input_tokens_seen": 68026344, + "step": 100920 + }, + { + "epoch": 2.465614540835023, + "grad_norm": 0.38577136397361755, + "learning_rate": 1.1972581849213024e-06, + "loss": 0.0004, + "num_input_tokens_seen": 68029480, + "step": 100925 + }, + { + "epoch": 2.46573669166687, + "grad_norm": 0.041660867631435394, + "learning_rate": 1.197174582175282e-06, + "loss": 0.0001, + "num_input_tokens_seen": 68032744, + "step": 100930 + }, + { + "epoch": 2.4658588424987173, + "grad_norm": 50.8360481262207, + "learning_rate": 1.1970909779953553e-06, + "loss": 0.1133, + "num_input_tokens_seen": 68035880, + "step": 100935 + }, + { + "epoch": 2.4659809933305645, + "grad_norm": 0.009769965894520283, + "learning_rate": 1.1970073723821294e-06, + "loss": 0.0452, + "num_input_tokens_seen": 68039208, + "step": 100940 + }, + { + "epoch": 2.4661031441624117, + "grad_norm": 0.029366832226514816, + "learning_rate": 1.1969237653362135e-06, + "loss": 0.0796, + "num_input_tokens_seen": 68042664, + "step": 100945 + }, + { + "epoch": 2.466225294994259, + "grad_norm": 0.02519991248846054, + "learning_rate": 1.1968401568582145e-06, + "loss": 0.0028, + "num_input_tokens_seen": 68046248, + "step": 100950 + }, + { + "epoch": 2.466347445826106, + "grad_norm": 0.08081067353487015, + "learning_rate": 1.1967565469487413e-06, + "loss": 0.0006, + "num_input_tokens_seen": 68049512, + "step": 100955 + }, + { + "epoch": 2.4664695966579533, + "grad_norm": 0.3698766231536865, + "learning_rate": 1.1966729356084016e-06, + "loss": 0.0002, + "num_input_tokens_seen": 68052840, + "step": 100960 + }, + { + "epoch": 2.4665917474898005, + "grad_norm": 0.046203531324863434, + "learning_rate": 1.1965893228378032e-06, + "loss": 0.0533, + "num_input_tokens_seen": 68056040, + "step": 100965 + }, + { + "epoch": 2.4667138983216477, + "grad_norm": 0.027350353077054024, + "learning_rate": 1.1965057086375546e-06, + "loss": 0.0005, + "num_input_tokens_seen": 68059880, + "step": 100970 + }, + { + "epoch": 2.466836049153495, + "grad_norm": 17.96902847290039, + "learning_rate": 1.1964220930082633e-06, + "loss": 0.0501, + "num_input_tokens_seen": 68063272, + "step": 100975 + }, + { + "epoch": 2.466958199985342, + "grad_norm": 0.010453293099999428, + "learning_rate": 1.1963384759505378e-06, + "loss": 0.0002, + "num_input_tokens_seen": 68066280, + "step": 100980 + }, + { + "epoch": 2.4670803508171892, + "grad_norm": 2.16798734664917, + "learning_rate": 1.1962548574649863e-06, + "loss": 0.0009, + "num_input_tokens_seen": 68069416, + "step": 100985 + }, + { + "epoch": 2.4672025016490364, + "grad_norm": 0.06394907832145691, + "learning_rate": 1.1961712375522166e-06, + "loss": 0.0002, + "num_input_tokens_seen": 68072744, + "step": 100990 + }, + { + "epoch": 2.467324652480883, + "grad_norm": 0.20608043670654297, + "learning_rate": 1.1960876162128368e-06, + "loss": 0.0306, + "num_input_tokens_seen": 68076392, + "step": 100995 + }, + { + "epoch": 2.467446803312731, + "grad_norm": 0.017128368839621544, + "learning_rate": 1.1960039934474552e-06, + "loss": 0.0401, + "num_input_tokens_seen": 68079720, + "step": 101000 + }, + { + "epoch": 2.4675689541445776, + "grad_norm": 0.016166547313332558, + "learning_rate": 1.1959203692566797e-06, + "loss": 0.0006, + "num_input_tokens_seen": 68083048, + "step": 101005 + }, + { + "epoch": 2.4676911049764247, + "grad_norm": 1.9010814428329468, + "learning_rate": 1.1958367436411189e-06, + "loss": 0.0004, + "num_input_tokens_seen": 68086376, + "step": 101010 + }, + { + "epoch": 2.467813255808272, + "grad_norm": 0.11727231740951538, + "learning_rate": 1.1957531166013803e-06, + "loss": 0.0602, + "num_input_tokens_seen": 68089704, + "step": 101015 + }, + { + "epoch": 2.467935406640119, + "grad_norm": 0.36076483130455017, + "learning_rate": 1.1956694881380724e-06, + "loss": 0.059, + "num_input_tokens_seen": 68092840, + "step": 101020 + }, + { + "epoch": 2.4680575574719663, + "grad_norm": 0.04420606046915054, + "learning_rate": 1.1955858582518036e-06, + "loss": 0.0007, + "num_input_tokens_seen": 68096680, + "step": 101025 + }, + { + "epoch": 2.4681797083038135, + "grad_norm": 0.03951043635606766, + "learning_rate": 1.1955022269431816e-06, + "loss": 0.0001, + "num_input_tokens_seen": 68099944, + "step": 101030 + }, + { + "epoch": 2.4683018591356607, + "grad_norm": 0.09191124141216278, + "learning_rate": 1.195418594212815e-06, + "loss": 0.0002, + "num_input_tokens_seen": 68102952, + "step": 101035 + }, + { + "epoch": 2.468424009967508, + "grad_norm": 0.06286905705928802, + "learning_rate": 1.1953349600613116e-06, + "loss": 0.0489, + "num_input_tokens_seen": 68106024, + "step": 101040 + }, + { + "epoch": 2.468546160799355, + "grad_norm": 0.027167772874236107, + "learning_rate": 1.1952513244892802e-06, + "loss": 0.0, + "num_input_tokens_seen": 68109352, + "step": 101045 + }, + { + "epoch": 2.4686683116312023, + "grad_norm": 0.007415004540234804, + "learning_rate": 1.1951676874973284e-06, + "loss": 0.0354, + "num_input_tokens_seen": 68112872, + "step": 101050 + }, + { + "epoch": 2.4687904624630495, + "grad_norm": 22.62297821044922, + "learning_rate": 1.1950840490860647e-06, + "loss": 0.0776, + "num_input_tokens_seen": 68115816, + "step": 101055 + }, + { + "epoch": 2.4689126132948966, + "grad_norm": 0.00367325940169394, + "learning_rate": 1.1950004092560973e-06, + "loss": 0.1308, + "num_input_tokens_seen": 68119080, + "step": 101060 + }, + { + "epoch": 2.469034764126744, + "grad_norm": 0.018181709572672844, + "learning_rate": 1.1949167680080344e-06, + "loss": 0.0002, + "num_input_tokens_seen": 68122984, + "step": 101065 + }, + { + "epoch": 2.469156914958591, + "grad_norm": 0.11490341275930405, + "learning_rate": 1.1948331253424846e-06, + "loss": 0.0423, + "num_input_tokens_seen": 68126184, + "step": 101070 + }, + { + "epoch": 2.469279065790438, + "grad_norm": 0.007061179727315903, + "learning_rate": 1.1947494812600558e-06, + "loss": 0.0003, + "num_input_tokens_seen": 68129576, + "step": 101075 + }, + { + "epoch": 2.469401216622285, + "grad_norm": 0.03727791830897331, + "learning_rate": 1.1946658357613564e-06, + "loss": 0.0579, + "num_input_tokens_seen": 68132776, + "step": 101080 + }, + { + "epoch": 2.4695233674541326, + "grad_norm": 37.90617370605469, + "learning_rate": 1.1945821888469946e-06, + "loss": 0.0374, + "num_input_tokens_seen": 68135784, + "step": 101085 + }, + { + "epoch": 2.4696455182859793, + "grad_norm": 0.022134313359856606, + "learning_rate": 1.1944985405175788e-06, + "loss": 0.0795, + "num_input_tokens_seen": 68138856, + "step": 101090 + }, + { + "epoch": 2.4697676691178265, + "grad_norm": 0.31639614701271057, + "learning_rate": 1.1944148907737171e-06, + "loss": 0.0528, + "num_input_tokens_seen": 68142056, + "step": 101095 + }, + { + "epoch": 2.4698898199496737, + "grad_norm": 0.10037325322628021, + "learning_rate": 1.1943312396160181e-06, + "loss": 0.077, + "num_input_tokens_seen": 68145512, + "step": 101100 + }, + { + "epoch": 2.470011970781521, + "grad_norm": 0.0351799838244915, + "learning_rate": 1.1942475870450904e-06, + "loss": 0.0002, + "num_input_tokens_seen": 68149032, + "step": 101105 + }, + { + "epoch": 2.470134121613368, + "grad_norm": 0.1054639145731926, + "learning_rate": 1.1941639330615419e-06, + "loss": 0.0004, + "num_input_tokens_seen": 68152552, + "step": 101110 + }, + { + "epoch": 2.4702562724452153, + "grad_norm": 0.04764983057975769, + "learning_rate": 1.1940802776659808e-06, + "loss": 0.0003, + "num_input_tokens_seen": 68155944, + "step": 101115 + }, + { + "epoch": 2.4703784232770625, + "grad_norm": 0.046450525522232056, + "learning_rate": 1.193996620859016e-06, + "loss": 0.0378, + "num_input_tokens_seen": 68159528, + "step": 101120 + }, + { + "epoch": 2.4705005741089097, + "grad_norm": 308.5850830078125, + "learning_rate": 1.1939129626412553e-06, + "loss": 0.0356, + "num_input_tokens_seen": 68162728, + "step": 101125 + }, + { + "epoch": 2.470622724940757, + "grad_norm": 0.29687780141830444, + "learning_rate": 1.1938293030133075e-06, + "loss": 0.0308, + "num_input_tokens_seen": 68166952, + "step": 101130 + }, + { + "epoch": 2.470744875772604, + "grad_norm": 72.99173736572266, + "learning_rate": 1.193745641975781e-06, + "loss": 0.0009, + "num_input_tokens_seen": 68170216, + "step": 101135 + }, + { + "epoch": 2.4708670266044512, + "grad_norm": 0.06350903958082199, + "learning_rate": 1.193661979529284e-06, + "loss": 0.0002, + "num_input_tokens_seen": 68173544, + "step": 101140 + }, + { + "epoch": 2.4709891774362984, + "grad_norm": 0.06977628171443939, + "learning_rate": 1.193578315674425e-06, + "loss": 0.0003, + "num_input_tokens_seen": 68177320, + "step": 101145 + }, + { + "epoch": 2.4711113282681456, + "grad_norm": 0.041446931660175323, + "learning_rate": 1.1934946504118123e-06, + "loss": 0.0582, + "num_input_tokens_seen": 68181032, + "step": 101150 + }, + { + "epoch": 2.471233479099993, + "grad_norm": 0.004509053658694029, + "learning_rate": 1.1934109837420544e-06, + "loss": 0.0014, + "num_input_tokens_seen": 68184232, + "step": 101155 + }, + { + "epoch": 2.47135562993184, + "grad_norm": 164.20571899414062, + "learning_rate": 1.1933273156657602e-06, + "loss": 0.091, + "num_input_tokens_seen": 68187688, + "step": 101160 + }, + { + "epoch": 2.471477780763687, + "grad_norm": 53.392967224121094, + "learning_rate": 1.1932436461835376e-06, + "loss": 0.0287, + "num_input_tokens_seen": 68191464, + "step": 101165 + }, + { + "epoch": 2.4715999315955344, + "grad_norm": 3.405658006668091, + "learning_rate": 1.193159975295995e-06, + "loss": 0.0449, + "num_input_tokens_seen": 68195432, + "step": 101170 + }, + { + "epoch": 2.471722082427381, + "grad_norm": 72.32855224609375, + "learning_rate": 1.1930763030037413e-06, + "loss": 0.0272, + "num_input_tokens_seen": 68198824, + "step": 101175 + }, + { + "epoch": 2.4718442332592283, + "grad_norm": 0.2518201172351837, + "learning_rate": 1.1929926293073852e-06, + "loss": 0.0009, + "num_input_tokens_seen": 68202728, + "step": 101180 + }, + { + "epoch": 2.4719663840910755, + "grad_norm": 201.95413208007812, + "learning_rate": 1.192908954207534e-06, + "loss": 0.005, + "num_input_tokens_seen": 68206312, + "step": 101185 + }, + { + "epoch": 2.4720885349229227, + "grad_norm": 0.07108218967914581, + "learning_rate": 1.1928252777047974e-06, + "loss": 0.0002, + "num_input_tokens_seen": 68210088, + "step": 101190 + }, + { + "epoch": 2.47221068575477, + "grad_norm": 0.021677618846297264, + "learning_rate": 1.1927415997997834e-06, + "loss": 0.0007, + "num_input_tokens_seen": 68213480, + "step": 101195 + }, + { + "epoch": 2.472332836586617, + "grad_norm": 0.011132234707474709, + "learning_rate": 1.192657920493101e-06, + "loss": 0.0008, + "num_input_tokens_seen": 68216744, + "step": 101200 + }, + { + "epoch": 2.4724549874184643, + "grad_norm": 46.65589904785156, + "learning_rate": 1.192574239785358e-06, + "loss": 0.0942, + "num_input_tokens_seen": 68220008, + "step": 101205 + }, + { + "epoch": 2.4725771382503114, + "grad_norm": 0.051826756447553635, + "learning_rate": 1.1924905576771634e-06, + "loss": 0.0419, + "num_input_tokens_seen": 68223592, + "step": 101210 + }, + { + "epoch": 2.4726992890821586, + "grad_norm": 0.3045022487640381, + "learning_rate": 1.1924068741691258e-06, + "loss": 0.0008, + "num_input_tokens_seen": 68227048, + "step": 101215 + }, + { + "epoch": 2.472821439914006, + "grad_norm": 0.0067571792751550674, + "learning_rate": 1.1923231892618532e-06, + "loss": 0.0426, + "num_input_tokens_seen": 68230440, + "step": 101220 + }, + { + "epoch": 2.472943590745853, + "grad_norm": 0.033845242112874985, + "learning_rate": 1.1922395029559554e-06, + "loss": 0.0001, + "num_input_tokens_seen": 68233512, + "step": 101225 + }, + { + "epoch": 2.4730657415777, + "grad_norm": 41.13069152832031, + "learning_rate": 1.1921558152520399e-06, + "loss": 0.0836, + "num_input_tokens_seen": 68237416, + "step": 101230 + }, + { + "epoch": 2.4731878924095474, + "grad_norm": 0.005356302484869957, + "learning_rate": 1.1920721261507156e-06, + "loss": 0.0423, + "num_input_tokens_seen": 68240168, + "step": 101235 + }, + { + "epoch": 2.4733100432413946, + "grad_norm": 0.3754318356513977, + "learning_rate": 1.191988435652591e-06, + "loss": 0.0702, + "num_input_tokens_seen": 68243304, + "step": 101240 + }, + { + "epoch": 2.4734321940732418, + "grad_norm": 0.030581409111618996, + "learning_rate": 1.191904743758275e-06, + "loss": 0.0002, + "num_input_tokens_seen": 68247016, + "step": 101245 + }, + { + "epoch": 2.473554344905089, + "grad_norm": 13.41250991821289, + "learning_rate": 1.1918210504683759e-06, + "loss": 0.0774, + "num_input_tokens_seen": 68250472, + "step": 101250 + }, + { + "epoch": 2.473676495736936, + "grad_norm": 0.1623183786869049, + "learning_rate": 1.1917373557835026e-06, + "loss": 0.0002, + "num_input_tokens_seen": 68254120, + "step": 101255 + }, + { + "epoch": 2.473798646568783, + "grad_norm": 0.40426185727119446, + "learning_rate": 1.191653659704264e-06, + "loss": 0.089, + "num_input_tokens_seen": 68257448, + "step": 101260 + }, + { + "epoch": 2.4739207974006305, + "grad_norm": 0.0006512034451588988, + "learning_rate": 1.191569962231268e-06, + "loss": 0.0752, + "num_input_tokens_seen": 68260584, + "step": 101265 + }, + { + "epoch": 2.4740429482324773, + "grad_norm": 0.08026715368032455, + "learning_rate": 1.191486263365124e-06, + "loss": 0.0001, + "num_input_tokens_seen": 68263784, + "step": 101270 + }, + { + "epoch": 2.4741650990643245, + "grad_norm": 0.0661499872803688, + "learning_rate": 1.1914025631064403e-06, + "loss": 0.0138, + "num_input_tokens_seen": 68266856, + "step": 101275 + }, + { + "epoch": 2.4742872498961717, + "grad_norm": 0.09698405861854553, + "learning_rate": 1.1913188614558255e-06, + "loss": 0.0002, + "num_input_tokens_seen": 68270056, + "step": 101280 + }, + { + "epoch": 2.474409400728019, + "grad_norm": 0.27747029066085815, + "learning_rate": 1.1912351584138889e-06, + "loss": 0.0739, + "num_input_tokens_seen": 68273064, + "step": 101285 + }, + { + "epoch": 2.474531551559866, + "grad_norm": 0.0036097955889999866, + "learning_rate": 1.1911514539812386e-06, + "loss": 0.0001, + "num_input_tokens_seen": 68276584, + "step": 101290 + }, + { + "epoch": 2.4746537023917132, + "grad_norm": 0.22168563306331635, + "learning_rate": 1.1910677481584835e-06, + "loss": 0.0782, + "num_input_tokens_seen": 68279848, + "step": 101295 + }, + { + "epoch": 2.4747758532235604, + "grad_norm": 0.002510837512090802, + "learning_rate": 1.1909840409462322e-06, + "loss": 0.0576, + "num_input_tokens_seen": 68283496, + "step": 101300 + }, + { + "epoch": 2.4748980040554076, + "grad_norm": 9.333548545837402, + "learning_rate": 1.1909003323450938e-06, + "loss": 0.1167, + "num_input_tokens_seen": 68287080, + "step": 101305 + }, + { + "epoch": 2.475020154887255, + "grad_norm": 0.042019180953502655, + "learning_rate": 1.1908166223556766e-06, + "loss": 0.0003, + "num_input_tokens_seen": 68290408, + "step": 101310 + }, + { + "epoch": 2.475142305719102, + "grad_norm": 25.9899845123291, + "learning_rate": 1.1907329109785895e-06, + "loss": 0.0942, + "num_input_tokens_seen": 68293544, + "step": 101315 + }, + { + "epoch": 2.475264456550949, + "grad_norm": 27.839702606201172, + "learning_rate": 1.1906491982144417e-06, + "loss": 0.1748, + "num_input_tokens_seen": 68297384, + "step": 101320 + }, + { + "epoch": 2.4753866073827964, + "grad_norm": 0.1217912808060646, + "learning_rate": 1.1905654840638417e-06, + "loss": 0.0603, + "num_input_tokens_seen": 68300520, + "step": 101325 + }, + { + "epoch": 2.4755087582146436, + "grad_norm": 0.010269726626574993, + "learning_rate": 1.190481768527398e-06, + "loss": 0.0008, + "num_input_tokens_seen": 68304040, + "step": 101330 + }, + { + "epoch": 2.4756309090464907, + "grad_norm": 1.610625982284546, + "learning_rate": 1.19039805160572e-06, + "loss": 0.0688, + "num_input_tokens_seen": 68307304, + "step": 101335 + }, + { + "epoch": 2.475753059878338, + "grad_norm": 0.009682337753474712, + "learning_rate": 1.1903143332994156e-06, + "loss": 0.0007, + "num_input_tokens_seen": 68310568, + "step": 101340 + }, + { + "epoch": 2.475875210710185, + "grad_norm": 37.2939453125, + "learning_rate": 1.1902306136090947e-06, + "loss": 0.06, + "num_input_tokens_seen": 68314792, + "step": 101345 + }, + { + "epoch": 2.4759973615420323, + "grad_norm": 0.0182523000985384, + "learning_rate": 1.1901468925353652e-06, + "loss": 0.0807, + "num_input_tokens_seen": 68317992, + "step": 101350 + }, + { + "epoch": 2.476119512373879, + "grad_norm": 0.03435177356004715, + "learning_rate": 1.1900631700788366e-06, + "loss": 0.0612, + "num_input_tokens_seen": 68321512, + "step": 101355 + }, + { + "epoch": 2.4762416632057263, + "grad_norm": 0.1018286719918251, + "learning_rate": 1.1899794462401176e-06, + "loss": 0.1757, + "num_input_tokens_seen": 68324840, + "step": 101360 + }, + { + "epoch": 2.4763638140375734, + "grad_norm": 0.9780299663543701, + "learning_rate": 1.1898957210198168e-06, + "loss": 0.0006, + "num_input_tokens_seen": 68328104, + "step": 101365 + }, + { + "epoch": 2.4764859648694206, + "grad_norm": 0.014705418609082699, + "learning_rate": 1.1898119944185432e-06, + "loss": 0.0344, + "num_input_tokens_seen": 68331880, + "step": 101370 + }, + { + "epoch": 2.476608115701268, + "grad_norm": 8.638080596923828, + "learning_rate": 1.1897282664369058e-06, + "loss": 0.0518, + "num_input_tokens_seen": 68335080, + "step": 101375 + }, + { + "epoch": 2.476730266533115, + "grad_norm": 0.31917837262153625, + "learning_rate": 1.1896445370755135e-06, + "loss": 0.0003, + "num_input_tokens_seen": 68338216, + "step": 101380 + }, + { + "epoch": 2.476852417364962, + "grad_norm": 0.2948130667209625, + "learning_rate": 1.189560806334975e-06, + "loss": 0.0006, + "num_input_tokens_seen": 68341544, + "step": 101385 + }, + { + "epoch": 2.4769745681968094, + "grad_norm": 413.6029052734375, + "learning_rate": 1.1894770742158992e-06, + "loss": 0.1015, + "num_input_tokens_seen": 68344872, + "step": 101390 + }, + { + "epoch": 2.4770967190286566, + "grad_norm": 16.92646026611328, + "learning_rate": 1.1893933407188957e-06, + "loss": 0.0425, + "num_input_tokens_seen": 68348392, + "step": 101395 + }, + { + "epoch": 2.4772188698605038, + "grad_norm": 0.10089614987373352, + "learning_rate": 1.1893096058445723e-06, + "loss": 0.0001, + "num_input_tokens_seen": 68351848, + "step": 101400 + }, + { + "epoch": 2.477341020692351, + "grad_norm": 0.018931131809949875, + "learning_rate": 1.1892258695935383e-06, + "loss": 0.0002, + "num_input_tokens_seen": 68355368, + "step": 101405 + }, + { + "epoch": 2.477463171524198, + "grad_norm": 42.50852584838867, + "learning_rate": 1.1891421319664034e-06, + "loss": 0.0958, + "num_input_tokens_seen": 68358504, + "step": 101410 + }, + { + "epoch": 2.4775853223560453, + "grad_norm": 0.08458676189184189, + "learning_rate": 1.1890583929637761e-06, + "loss": 0.0002, + "num_input_tokens_seen": 68361768, + "step": 101415 + }, + { + "epoch": 2.4777074731878925, + "grad_norm": 0.01854422688484192, + "learning_rate": 1.188974652586265e-06, + "loss": 0.0479, + "num_input_tokens_seen": 68364968, + "step": 101420 + }, + { + "epoch": 2.4778296240197397, + "grad_norm": 0.05844534561038017, + "learning_rate": 1.1888909108344797e-06, + "loss": 0.0002, + "num_input_tokens_seen": 68368296, + "step": 101425 + }, + { + "epoch": 2.477951774851587, + "grad_norm": 0.013152677565813065, + "learning_rate": 1.1888071677090288e-06, + "loss": 0.0514, + "num_input_tokens_seen": 68372328, + "step": 101430 + }, + { + "epoch": 2.478073925683434, + "grad_norm": 0.27791768312454224, + "learning_rate": 1.1887234232105215e-06, + "loss": 0.0002, + "num_input_tokens_seen": 68375656, + "step": 101435 + }, + { + "epoch": 2.478196076515281, + "grad_norm": 0.007701204624027014, + "learning_rate": 1.1886396773395664e-06, + "loss": 0.0457, + "num_input_tokens_seen": 68378920, + "step": 101440 + }, + { + "epoch": 2.4783182273471285, + "grad_norm": 10.394662857055664, + "learning_rate": 1.1885559300967728e-06, + "loss": 0.0011, + "num_input_tokens_seen": 68382184, + "step": 101445 + }, + { + "epoch": 2.4784403781789752, + "grad_norm": 0.023853451013565063, + "learning_rate": 1.18847218148275e-06, + "loss": 0.0422, + "num_input_tokens_seen": 68385448, + "step": 101450 + }, + { + "epoch": 2.4785625290108224, + "grad_norm": 0.04058440774679184, + "learning_rate": 1.188388431498107e-06, + "loss": 0.0003, + "num_input_tokens_seen": 68388840, + "step": 101455 + }, + { + "epoch": 2.4786846798426696, + "grad_norm": 0.013206390663981438, + "learning_rate": 1.1883046801434524e-06, + "loss": 0.0001, + "num_input_tokens_seen": 68392552, + "step": 101460 + }, + { + "epoch": 2.478806830674517, + "grad_norm": 0.981783390045166, + "learning_rate": 1.1882209274193954e-06, + "loss": 0.0005, + "num_input_tokens_seen": 68395880, + "step": 101465 + }, + { + "epoch": 2.478928981506364, + "grad_norm": 50.186222076416016, + "learning_rate": 1.1881371733265451e-06, + "loss": 0.0385, + "num_input_tokens_seen": 68399272, + "step": 101470 + }, + { + "epoch": 2.479051132338211, + "grad_norm": 0.006814572494477034, + "learning_rate": 1.188053417865511e-06, + "loss": 0.031, + "num_input_tokens_seen": 68402408, + "step": 101475 + }, + { + "epoch": 2.4791732831700584, + "grad_norm": 0.09142502397298813, + "learning_rate": 1.1879696610369017e-06, + "loss": 0.0004, + "num_input_tokens_seen": 68406248, + "step": 101480 + }, + { + "epoch": 2.4792954340019056, + "grad_norm": 0.2830347716808319, + "learning_rate": 1.1878859028413267e-06, + "loss": 0.0001, + "num_input_tokens_seen": 68409384, + "step": 101485 + }, + { + "epoch": 2.4794175848337527, + "grad_norm": 30.16297149658203, + "learning_rate": 1.1878021432793948e-06, + "loss": 0.115, + "num_input_tokens_seen": 68412712, + "step": 101490 + }, + { + "epoch": 2.4795397356656, + "grad_norm": 0.01764572784304619, + "learning_rate": 1.187718382351715e-06, + "loss": 0.0002, + "num_input_tokens_seen": 68416040, + "step": 101495 + }, + { + "epoch": 2.479661886497447, + "grad_norm": 0.006279618013650179, + "learning_rate": 1.1876346200588966e-06, + "loss": 0.0624, + "num_input_tokens_seen": 68419112, + "step": 101500 + }, + { + "epoch": 2.4797840373292943, + "grad_norm": 0.5235111117362976, + "learning_rate": 1.1875508564015488e-06, + "loss": 0.0002, + "num_input_tokens_seen": 68422248, + "step": 101505 + }, + { + "epoch": 2.4799061881611415, + "grad_norm": 0.12128138542175293, + "learning_rate": 1.187467091380281e-06, + "loss": 0.0699, + "num_input_tokens_seen": 68425384, + "step": 101510 + }, + { + "epoch": 2.4800283389929887, + "grad_norm": 0.2382190227508545, + "learning_rate": 1.187383324995702e-06, + "loss": 0.0005, + "num_input_tokens_seen": 68428584, + "step": 101515 + }, + { + "epoch": 2.480150489824836, + "grad_norm": 0.02181437611579895, + "learning_rate": 1.1872995572484208e-06, + "loss": 0.0838, + "num_input_tokens_seen": 68431912, + "step": 101520 + }, + { + "epoch": 2.4802726406566826, + "grad_norm": 0.02534443512558937, + "learning_rate": 1.187215788139047e-06, + "loss": 0.0483, + "num_input_tokens_seen": 68434984, + "step": 101525 + }, + { + "epoch": 2.4803947914885303, + "grad_norm": 0.029036127030849457, + "learning_rate": 1.1871320176681896e-06, + "loss": 0.0374, + "num_input_tokens_seen": 68437928, + "step": 101530 + }, + { + "epoch": 2.480516942320377, + "grad_norm": 0.02246944047510624, + "learning_rate": 1.1870482458364582e-06, + "loss": 0.0006, + "num_input_tokens_seen": 68441640, + "step": 101535 + }, + { + "epoch": 2.480639093152224, + "grad_norm": 0.059610191732645035, + "learning_rate": 1.186964472644461e-06, + "loss": 0.0002, + "num_input_tokens_seen": 68444968, + "step": 101540 + }, + { + "epoch": 2.4807612439840714, + "grad_norm": 0.005097515881061554, + "learning_rate": 1.1868806980928084e-06, + "loss": 0.0745, + "num_input_tokens_seen": 68448808, + "step": 101545 + }, + { + "epoch": 2.4808833948159186, + "grad_norm": 13.624731063842773, + "learning_rate": 1.186796922182109e-06, + "loss": 0.0394, + "num_input_tokens_seen": 68452392, + "step": 101550 + }, + { + "epoch": 2.4810055456477658, + "grad_norm": 0.0009953816188499331, + "learning_rate": 1.186713144912972e-06, + "loss": 0.0417, + "num_input_tokens_seen": 68455464, + "step": 101555 + }, + { + "epoch": 2.481127696479613, + "grad_norm": 0.11448115110397339, + "learning_rate": 1.1866293662860066e-06, + "loss": 0.0427, + "num_input_tokens_seen": 68458408, + "step": 101560 + }, + { + "epoch": 2.48124984731146, + "grad_norm": 0.14324010908603668, + "learning_rate": 1.1865455863018226e-06, + "loss": 0.0351, + "num_input_tokens_seen": 68461480, + "step": 101565 + }, + { + "epoch": 2.4813719981433073, + "grad_norm": 19.067188262939453, + "learning_rate": 1.1864618049610287e-06, + "loss": 0.1186, + "num_input_tokens_seen": 68465896, + "step": 101570 + }, + { + "epoch": 2.4814941489751545, + "grad_norm": 0.02945057861506939, + "learning_rate": 1.1863780222642346e-06, + "loss": 0.0002, + "num_input_tokens_seen": 68469672, + "step": 101575 + }, + { + "epoch": 2.4816162998070017, + "grad_norm": 1.2527203559875488, + "learning_rate": 1.1862942382120492e-06, + "loss": 0.0668, + "num_input_tokens_seen": 68472936, + "step": 101580 + }, + { + "epoch": 2.481738450638849, + "grad_norm": 0.9003691077232361, + "learning_rate": 1.1862104528050823e-06, + "loss": 0.0545, + "num_input_tokens_seen": 68476968, + "step": 101585 + }, + { + "epoch": 2.481860601470696, + "grad_norm": 70.0113296508789, + "learning_rate": 1.1861266660439427e-06, + "loss": 0.0706, + "num_input_tokens_seen": 68479976, + "step": 101590 + }, + { + "epoch": 2.4819827523025433, + "grad_norm": 26.04130744934082, + "learning_rate": 1.1860428779292398e-06, + "loss": 0.0895, + "num_input_tokens_seen": 68483240, + "step": 101595 + }, + { + "epoch": 2.4821049031343905, + "grad_norm": 0.817161500453949, + "learning_rate": 1.1859590884615832e-06, + "loss": 0.0595, + "num_input_tokens_seen": 68486696, + "step": 101600 + }, + { + "epoch": 2.4822270539662377, + "grad_norm": 0.051058027893304825, + "learning_rate": 1.185875297641582e-06, + "loss": 0.0004, + "num_input_tokens_seen": 68490536, + "step": 101605 + }, + { + "epoch": 2.482349204798085, + "grad_norm": 0.024212127551436424, + "learning_rate": 1.1857915054698457e-06, + "loss": 0.1793, + "num_input_tokens_seen": 68493928, + "step": 101610 + }, + { + "epoch": 2.482471355629932, + "grad_norm": 0.006492579821497202, + "learning_rate": 1.1857077119469835e-06, + "loss": 0.0518, + "num_input_tokens_seen": 68496936, + "step": 101615 + }, + { + "epoch": 2.482593506461779, + "grad_norm": 0.0478641577064991, + "learning_rate": 1.1856239170736048e-06, + "loss": 0.108, + "num_input_tokens_seen": 68500008, + "step": 101620 + }, + { + "epoch": 2.482715657293626, + "grad_norm": 0.037426840513944626, + "learning_rate": 1.1855401208503192e-06, + "loss": 0.0003, + "num_input_tokens_seen": 68503592, + "step": 101625 + }, + { + "epoch": 2.482837808125473, + "grad_norm": 0.020461494103074074, + "learning_rate": 1.1854563232777362e-06, + "loss": 0.0004, + "num_input_tokens_seen": 68506664, + "step": 101630 + }, + { + "epoch": 2.4829599589573204, + "grad_norm": 0.11810333281755447, + "learning_rate": 1.1853725243564645e-06, + "loss": 0.0424, + "num_input_tokens_seen": 68509928, + "step": 101635 + }, + { + "epoch": 2.4830821097891675, + "grad_norm": 0.3965718150138855, + "learning_rate": 1.1852887240871142e-06, + "loss": 0.0455, + "num_input_tokens_seen": 68513768, + "step": 101640 + }, + { + "epoch": 2.4832042606210147, + "grad_norm": 0.022183619439601898, + "learning_rate": 1.1852049224702947e-06, + "loss": 0.0006, + "num_input_tokens_seen": 68516840, + "step": 101645 + }, + { + "epoch": 2.483326411452862, + "grad_norm": 0.14527912437915802, + "learning_rate": 1.1851211195066148e-06, + "loss": 0.0005, + "num_input_tokens_seen": 68519976, + "step": 101650 + }, + { + "epoch": 2.483448562284709, + "grad_norm": 0.33538344502449036, + "learning_rate": 1.1850373151966845e-06, + "loss": 0.001, + "num_input_tokens_seen": 68523304, + "step": 101655 + }, + { + "epoch": 2.4835707131165563, + "grad_norm": 0.11240077763795853, + "learning_rate": 1.1849535095411127e-06, + "loss": 0.0003, + "num_input_tokens_seen": 68526632, + "step": 101660 + }, + { + "epoch": 2.4836928639484035, + "grad_norm": 0.00868434738367796, + "learning_rate": 1.1848697025405096e-06, + "loss": 0.0443, + "num_input_tokens_seen": 68529768, + "step": 101665 + }, + { + "epoch": 2.4838150147802507, + "grad_norm": 0.1269422471523285, + "learning_rate": 1.1847858941954843e-06, + "loss": 0.0367, + "num_input_tokens_seen": 68533032, + "step": 101670 + }, + { + "epoch": 2.483937165612098, + "grad_norm": 0.02361040934920311, + "learning_rate": 1.1847020845066462e-06, + "loss": 0.0002, + "num_input_tokens_seen": 68536488, + "step": 101675 + }, + { + "epoch": 2.484059316443945, + "grad_norm": 0.019039446488022804, + "learning_rate": 1.1846182734746048e-06, + "loss": 0.0306, + "num_input_tokens_seen": 68539752, + "step": 101680 + }, + { + "epoch": 2.4841814672757923, + "grad_norm": 0.06745230406522751, + "learning_rate": 1.1845344610999694e-06, + "loss": 0.1347, + "num_input_tokens_seen": 68543528, + "step": 101685 + }, + { + "epoch": 2.4843036181076394, + "grad_norm": 0.10040964186191559, + "learning_rate": 1.1844506473833504e-06, + "loss": 0.0296, + "num_input_tokens_seen": 68547112, + "step": 101690 + }, + { + "epoch": 2.4844257689394866, + "grad_norm": 0.054083868861198425, + "learning_rate": 1.1843668323253564e-06, + "loss": 0.0396, + "num_input_tokens_seen": 68550376, + "step": 101695 + }, + { + "epoch": 2.484547919771334, + "grad_norm": 0.05380721390247345, + "learning_rate": 1.184283015926597e-06, + "loss": 0.0443, + "num_input_tokens_seen": 68553960, + "step": 101700 + }, + { + "epoch": 2.4846700706031806, + "grad_norm": 0.7454630732536316, + "learning_rate": 1.1841991981876823e-06, + "loss": 0.0003, + "num_input_tokens_seen": 68557352, + "step": 101705 + }, + { + "epoch": 2.484792221435028, + "grad_norm": 27.55868911743164, + "learning_rate": 1.1841153791092213e-06, + "loss": 0.1567, + "num_input_tokens_seen": 68560936, + "step": 101710 + }, + { + "epoch": 2.484914372266875, + "grad_norm": 35.94108581542969, + "learning_rate": 1.1840315586918234e-06, + "loss": 0.1355, + "num_input_tokens_seen": 68563944, + "step": 101715 + }, + { + "epoch": 2.485036523098722, + "grad_norm": 0.08266687393188477, + "learning_rate": 1.183947736936099e-06, + "loss": 0.0716, + "num_input_tokens_seen": 68566760, + "step": 101720 + }, + { + "epoch": 2.4851586739305693, + "grad_norm": 0.024817267432808876, + "learning_rate": 1.1838639138426572e-06, + "loss": 0.0006, + "num_input_tokens_seen": 68570280, + "step": 101725 + }, + { + "epoch": 2.4852808247624165, + "grad_norm": 0.12349018454551697, + "learning_rate": 1.1837800894121072e-06, + "loss": 0.0537, + "num_input_tokens_seen": 68573352, + "step": 101730 + }, + { + "epoch": 2.4854029755942637, + "grad_norm": 26.148344039916992, + "learning_rate": 1.183696263645059e-06, + "loss": 0.0771, + "num_input_tokens_seen": 68576744, + "step": 101735 + }, + { + "epoch": 2.485525126426111, + "grad_norm": 0.007630622014403343, + "learning_rate": 1.1836124365421225e-06, + "loss": 0.0349, + "num_input_tokens_seen": 68580456, + "step": 101740 + }, + { + "epoch": 2.485647277257958, + "grad_norm": 717.2020263671875, + "learning_rate": 1.1835286081039068e-06, + "loss": 0.0135, + "num_input_tokens_seen": 68584296, + "step": 101745 + }, + { + "epoch": 2.4857694280898053, + "grad_norm": 0.9846317768096924, + "learning_rate": 1.1834447783310217e-06, + "loss": 0.0005, + "num_input_tokens_seen": 68587176, + "step": 101750 + }, + { + "epoch": 2.4858915789216525, + "grad_norm": 0.05773583799600601, + "learning_rate": 1.183360947224077e-06, + "loss": 0.0003, + "num_input_tokens_seen": 68590376, + "step": 101755 + }, + { + "epoch": 2.4860137297534997, + "grad_norm": 0.026842717081308365, + "learning_rate": 1.183277114783682e-06, + "loss": 0.0009, + "num_input_tokens_seen": 68593832, + "step": 101760 + }, + { + "epoch": 2.486135880585347, + "grad_norm": 109.32928466796875, + "learning_rate": 1.1831932810104463e-06, + "loss": 0.0956, + "num_input_tokens_seen": 68597736, + "step": 101765 + }, + { + "epoch": 2.486258031417194, + "grad_norm": 0.016676966100931168, + "learning_rate": 1.1831094459049802e-06, + "loss": 0.0408, + "num_input_tokens_seen": 68601320, + "step": 101770 + }, + { + "epoch": 2.4863801822490412, + "grad_norm": 0.11005407571792603, + "learning_rate": 1.1830256094678927e-06, + "loss": 0.0697, + "num_input_tokens_seen": 68604712, + "step": 101775 + }, + { + "epoch": 2.4865023330808884, + "grad_norm": 0.2985239028930664, + "learning_rate": 1.1829417716997936e-06, + "loss": 0.0003, + "num_input_tokens_seen": 68607720, + "step": 101780 + }, + { + "epoch": 2.4866244839127356, + "grad_norm": 0.016494203358888626, + "learning_rate": 1.1828579326012928e-06, + "loss": 0.0001, + "num_input_tokens_seen": 68611240, + "step": 101785 + }, + { + "epoch": 2.486746634744583, + "grad_norm": 0.007882504723966122, + "learning_rate": 1.1827740921730003e-06, + "loss": 0.0001, + "num_input_tokens_seen": 68614824, + "step": 101790 + }, + { + "epoch": 2.48686878557643, + "grad_norm": 0.3112851083278656, + "learning_rate": 1.1826902504155253e-06, + "loss": 0.0302, + "num_input_tokens_seen": 68618920, + "step": 101795 + }, + { + "epoch": 2.4869909364082767, + "grad_norm": 1.156801462173462, + "learning_rate": 1.1826064073294778e-06, + "loss": 0.0936, + "num_input_tokens_seen": 68621992, + "step": 101800 + }, + { + "epoch": 2.487113087240124, + "grad_norm": 0.1341012418270111, + "learning_rate": 1.182522562915467e-06, + "loss": 0.0003, + "num_input_tokens_seen": 68625128, + "step": 101805 + }, + { + "epoch": 2.487235238071971, + "grad_norm": 0.004828187171369791, + "learning_rate": 1.182438717174103e-06, + "loss": 0.0002, + "num_input_tokens_seen": 68629032, + "step": 101810 + }, + { + "epoch": 2.4873573889038183, + "grad_norm": 485.93035888671875, + "learning_rate": 1.1823548701059957e-06, + "loss": 0.0124, + "num_input_tokens_seen": 68632616, + "step": 101815 + }, + { + "epoch": 2.4874795397356655, + "grad_norm": 0.028452308848500252, + "learning_rate": 1.182271021711755e-06, + "loss": 0.0003, + "num_input_tokens_seen": 68635624, + "step": 101820 + }, + { + "epoch": 2.4876016905675127, + "grad_norm": 0.5252352952957153, + "learning_rate": 1.1821871719919902e-06, + "loss": 0.0373, + "num_input_tokens_seen": 68638888, + "step": 101825 + }, + { + "epoch": 2.48772384139936, + "grad_norm": 0.010361897759139538, + "learning_rate": 1.1821033209473113e-06, + "loss": 0.0001, + "num_input_tokens_seen": 68642600, + "step": 101830 + }, + { + "epoch": 2.487845992231207, + "grad_norm": 0.0020655151456594467, + "learning_rate": 1.182019468578328e-06, + "loss": 0.0001, + "num_input_tokens_seen": 68645800, + "step": 101835 + }, + { + "epoch": 2.4879681430630542, + "grad_norm": 0.05361203849315643, + "learning_rate": 1.1819356148856501e-06, + "loss": 0.0612, + "num_input_tokens_seen": 68649448, + "step": 101840 + }, + { + "epoch": 2.4880902938949014, + "grad_norm": 0.11180011928081512, + "learning_rate": 1.181851759869888e-06, + "loss": 0.0002, + "num_input_tokens_seen": 68652712, + "step": 101845 + }, + { + "epoch": 2.4882124447267486, + "grad_norm": 0.006292127072811127, + "learning_rate": 1.1817679035316504e-06, + "loss": 0.0752, + "num_input_tokens_seen": 68655848, + "step": 101850 + }, + { + "epoch": 2.488334595558596, + "grad_norm": 0.020426765084266663, + "learning_rate": 1.181684045871548e-06, + "loss": 0.0002, + "num_input_tokens_seen": 68659240, + "step": 101855 + }, + { + "epoch": 2.488456746390443, + "grad_norm": 0.0005153603851795197, + "learning_rate": 1.1816001868901902e-06, + "loss": 0.0001, + "num_input_tokens_seen": 68662952, + "step": 101860 + }, + { + "epoch": 2.48857889722229, + "grad_norm": 0.3125733435153961, + "learning_rate": 1.181516326588187e-06, + "loss": 0.0012, + "num_input_tokens_seen": 68666280, + "step": 101865 + }, + { + "epoch": 2.4887010480541374, + "grad_norm": 475.8556823730469, + "learning_rate": 1.1814324649661483e-06, + "loss": 0.1029, + "num_input_tokens_seen": 68669672, + "step": 101870 + }, + { + "epoch": 2.4888231988859846, + "grad_norm": 0.10539945960044861, + "learning_rate": 1.181348602024684e-06, + "loss": 0.0001, + "num_input_tokens_seen": 68673512, + "step": 101875 + }, + { + "epoch": 2.4889453497178318, + "grad_norm": 0.06662607192993164, + "learning_rate": 1.1812647377644035e-06, + "loss": 0.0225, + "num_input_tokens_seen": 68677608, + "step": 101880 + }, + { + "epoch": 2.4890675005496785, + "grad_norm": 0.02495940588414669, + "learning_rate": 1.1811808721859175e-06, + "loss": 0.0001, + "num_input_tokens_seen": 68681512, + "step": 101885 + }, + { + "epoch": 2.489189651381526, + "grad_norm": 13.704965591430664, + "learning_rate": 1.1810970052898355e-06, + "loss": 0.1399, + "num_input_tokens_seen": 68685160, + "step": 101890 + }, + { + "epoch": 2.489311802213373, + "grad_norm": 0.12245207279920578, + "learning_rate": 1.1810131370767671e-06, + "loss": 0.0001, + "num_input_tokens_seen": 68688488, + "step": 101895 + }, + { + "epoch": 2.48943395304522, + "grad_norm": 0.06405623257160187, + "learning_rate": 1.1809292675473226e-06, + "loss": 0.0006, + "num_input_tokens_seen": 68691560, + "step": 101900 + }, + { + "epoch": 2.4895561038770673, + "grad_norm": 0.020485732704401016, + "learning_rate": 1.1808453967021119e-06, + "loss": 0.0558, + "num_input_tokens_seen": 68694952, + "step": 101905 + }, + { + "epoch": 2.4896782547089145, + "grad_norm": 955.6651000976562, + "learning_rate": 1.1807615245417447e-06, + "loss": 0.0548, + "num_input_tokens_seen": 68698792, + "step": 101910 + }, + { + "epoch": 2.4898004055407617, + "grad_norm": 32.87169647216797, + "learning_rate": 1.1806776510668312e-06, + "loss": 0.0802, + "num_input_tokens_seen": 68701864, + "step": 101915 + }, + { + "epoch": 2.489922556372609, + "grad_norm": 0.8651986122131348, + "learning_rate": 1.1805937762779812e-06, + "loss": 0.001, + "num_input_tokens_seen": 68705000, + "step": 101920 + }, + { + "epoch": 2.490044707204456, + "grad_norm": 0.009764413349330425, + "learning_rate": 1.1805099001758045e-06, + "loss": 0.055, + "num_input_tokens_seen": 68708392, + "step": 101925 + }, + { + "epoch": 2.490166858036303, + "grad_norm": 0.00625853706151247, + "learning_rate": 1.1804260227609116e-06, + "loss": 0.0283, + "num_input_tokens_seen": 68711720, + "step": 101930 + }, + { + "epoch": 2.4902890088681504, + "grad_norm": 0.28316229581832886, + "learning_rate": 1.1803421440339113e-06, + "loss": 0.0226, + "num_input_tokens_seen": 68714920, + "step": 101935 + }, + { + "epoch": 2.4904111596999976, + "grad_norm": 0.03392892703413963, + "learning_rate": 1.1802582639954152e-06, + "loss": 0.0003, + "num_input_tokens_seen": 68718440, + "step": 101940 + }, + { + "epoch": 2.490533310531845, + "grad_norm": 0.27008190751075745, + "learning_rate": 1.1801743826460324e-06, + "loss": 0.0002, + "num_input_tokens_seen": 68721960, + "step": 101945 + }, + { + "epoch": 2.490655461363692, + "grad_norm": 0.01899190992116928, + "learning_rate": 1.180090499986373e-06, + "loss": 0.1339, + "num_input_tokens_seen": 68724904, + "step": 101950 + }, + { + "epoch": 2.490777612195539, + "grad_norm": 0.038128241896629333, + "learning_rate": 1.1800066160170472e-06, + "loss": 0.0326, + "num_input_tokens_seen": 68728040, + "step": 101955 + }, + { + "epoch": 2.4908997630273864, + "grad_norm": 0.19664481282234192, + "learning_rate": 1.1799227307386648e-06, + "loss": 0.0002, + "num_input_tokens_seen": 68731368, + "step": 101960 + }, + { + "epoch": 2.4910219138592335, + "grad_norm": 0.00493689626455307, + "learning_rate": 1.1798388441518357e-06, + "loss": 0.0001, + "num_input_tokens_seen": 68734376, + "step": 101965 + }, + { + "epoch": 2.4911440646910807, + "grad_norm": 0.012185181491076946, + "learning_rate": 1.1797549562571702e-06, + "loss": 0.0001, + "num_input_tokens_seen": 68737768, + "step": 101970 + }, + { + "epoch": 2.491266215522928, + "grad_norm": 0.0166007112711668, + "learning_rate": 1.1796710670552783e-06, + "loss": 0.0001, + "num_input_tokens_seen": 68741480, + "step": 101975 + }, + { + "epoch": 2.4913883663547747, + "grad_norm": 0.005818501580506563, + "learning_rate": 1.1795871765467701e-06, + "loss": 0.0002, + "num_input_tokens_seen": 68744808, + "step": 101980 + }, + { + "epoch": 2.491510517186622, + "grad_norm": 0.0035260694567114115, + "learning_rate": 1.179503284732256e-06, + "loss": 0.0002, + "num_input_tokens_seen": 68748456, + "step": 101985 + }, + { + "epoch": 2.491632668018469, + "grad_norm": 0.009216583333909512, + "learning_rate": 1.1794193916123453e-06, + "loss": 0.1791, + "num_input_tokens_seen": 68752104, + "step": 101990 + }, + { + "epoch": 2.4917548188503162, + "grad_norm": 0.44074198603630066, + "learning_rate": 1.1793354971876483e-06, + "loss": 0.0003, + "num_input_tokens_seen": 68756136, + "step": 101995 + }, + { + "epoch": 2.4918769696821634, + "grad_norm": 0.061944544315338135, + "learning_rate": 1.179251601458776e-06, + "loss": 0.0004, + "num_input_tokens_seen": 68759528, + "step": 102000 + }, + { + "epoch": 2.4919991205140106, + "grad_norm": 0.49484783411026, + "learning_rate": 1.1791677044263375e-06, + "loss": 0.0004, + "num_input_tokens_seen": 68762728, + "step": 102005 + }, + { + "epoch": 2.492121271345858, + "grad_norm": 0.052835285663604736, + "learning_rate": 1.1790838060909434e-06, + "loss": 0.0001, + "num_input_tokens_seen": 68766248, + "step": 102010 + }, + { + "epoch": 2.492243422177705, + "grad_norm": 1.7878988981246948, + "learning_rate": 1.1789999064532034e-06, + "loss": 0.0407, + "num_input_tokens_seen": 68769192, + "step": 102015 + }, + { + "epoch": 2.492365573009552, + "grad_norm": 26.050460815429688, + "learning_rate": 1.1789160055137282e-06, + "loss": 0.1691, + "num_input_tokens_seen": 68772328, + "step": 102020 + }, + { + "epoch": 2.4924877238413994, + "grad_norm": 0.045988693833351135, + "learning_rate": 1.1788321032731274e-06, + "loss": 0.1607, + "num_input_tokens_seen": 68776232, + "step": 102025 + }, + { + "epoch": 2.4926098746732466, + "grad_norm": 0.045895230025053024, + "learning_rate": 1.1787481997320117e-06, + "loss": 0.0006, + "num_input_tokens_seen": 68779432, + "step": 102030 + }, + { + "epoch": 2.4927320255050938, + "grad_norm": 0.10627572983503342, + "learning_rate": 1.178664294890991e-06, + "loss": 0.0666, + "num_input_tokens_seen": 68782632, + "step": 102035 + }, + { + "epoch": 2.492854176336941, + "grad_norm": 0.14017941057682037, + "learning_rate": 1.1785803887506756e-06, + "loss": 0.0004, + "num_input_tokens_seen": 68785896, + "step": 102040 + }, + { + "epoch": 2.492976327168788, + "grad_norm": 0.26814335584640503, + "learning_rate": 1.1784964813116754e-06, + "loss": 0.001, + "num_input_tokens_seen": 68788968, + "step": 102045 + }, + { + "epoch": 2.4930984780006353, + "grad_norm": 0.005131382029503584, + "learning_rate": 1.1784125725746008e-06, + "loss": 0.0002, + "num_input_tokens_seen": 68792552, + "step": 102050 + }, + { + "epoch": 2.4932206288324825, + "grad_norm": 17.653770446777344, + "learning_rate": 1.1783286625400619e-06, + "loss": 0.0465, + "num_input_tokens_seen": 68795816, + "step": 102055 + }, + { + "epoch": 2.4933427796643297, + "grad_norm": 0.32243382930755615, + "learning_rate": 1.1782447512086693e-06, + "loss": 0.0494, + "num_input_tokens_seen": 68799400, + "step": 102060 + }, + { + "epoch": 2.4934649304961765, + "grad_norm": 0.04632522165775299, + "learning_rate": 1.1781608385810327e-06, + "loss": 0.0899, + "num_input_tokens_seen": 68802920, + "step": 102065 + }, + { + "epoch": 2.493587081328024, + "grad_norm": 32.26042938232422, + "learning_rate": 1.1780769246577625e-06, + "loss": 0.0366, + "num_input_tokens_seen": 68805864, + "step": 102070 + }, + { + "epoch": 2.493709232159871, + "grad_norm": 0.5655348300933838, + "learning_rate": 1.1779930094394692e-06, + "loss": 0.0005, + "num_input_tokens_seen": 68808808, + "step": 102075 + }, + { + "epoch": 2.493831382991718, + "grad_norm": 134.5360565185547, + "learning_rate": 1.1779090929267628e-06, + "loss": 0.0685, + "num_input_tokens_seen": 68812200, + "step": 102080 + }, + { + "epoch": 2.493953533823565, + "grad_norm": 0.015330174006521702, + "learning_rate": 1.1778251751202534e-06, + "loss": 0.0001, + "num_input_tokens_seen": 68815976, + "step": 102085 + }, + { + "epoch": 2.4940756846554124, + "grad_norm": 0.011960971169173717, + "learning_rate": 1.1777412560205515e-06, + "loss": 0.0001, + "num_input_tokens_seen": 68819752, + "step": 102090 + }, + { + "epoch": 2.4941978354872596, + "grad_norm": 0.007198326289653778, + "learning_rate": 1.1776573356282677e-06, + "loss": 0.0002, + "num_input_tokens_seen": 68823272, + "step": 102095 + }, + { + "epoch": 2.494319986319107, + "grad_norm": 0.028509464114904404, + "learning_rate": 1.1775734139440116e-06, + "loss": 0.0002, + "num_input_tokens_seen": 68826536, + "step": 102100 + }, + { + "epoch": 2.494442137150954, + "grad_norm": 0.04958329722285271, + "learning_rate": 1.1774894909683943e-06, + "loss": 0.0002, + "num_input_tokens_seen": 68829992, + "step": 102105 + }, + { + "epoch": 2.494564287982801, + "grad_norm": 0.005899414885789156, + "learning_rate": 1.1774055667020258e-06, + "loss": 0.0443, + "num_input_tokens_seen": 68833384, + "step": 102110 + }, + { + "epoch": 2.4946864388146484, + "grad_norm": 1.5522427558898926, + "learning_rate": 1.1773216411455158e-06, + "loss": 0.0002, + "num_input_tokens_seen": 68836456, + "step": 102115 + }, + { + "epoch": 2.4948085896464955, + "grad_norm": 0.028691526502370834, + "learning_rate": 1.1772377142994752e-06, + "loss": 0.07, + "num_input_tokens_seen": 68839784, + "step": 102120 + }, + { + "epoch": 2.4949307404783427, + "grad_norm": 48.998382568359375, + "learning_rate": 1.1771537861645143e-06, + "loss": 0.0823, + "num_input_tokens_seen": 68842920, + "step": 102125 + }, + { + "epoch": 2.49505289131019, + "grad_norm": 0.053425900638103485, + "learning_rate": 1.1770698567412437e-06, + "loss": 0.0001, + "num_input_tokens_seen": 68846248, + "step": 102130 + }, + { + "epoch": 2.495175042142037, + "grad_norm": 76.71073913574219, + "learning_rate": 1.1769859260302733e-06, + "loss": 0.0799, + "num_input_tokens_seen": 68849448, + "step": 102135 + }, + { + "epoch": 2.4952971929738843, + "grad_norm": 0.07131334394216537, + "learning_rate": 1.1769019940322137e-06, + "loss": 0.0002, + "num_input_tokens_seen": 68853224, + "step": 102140 + }, + { + "epoch": 2.4954193438057315, + "grad_norm": 3.223170757293701, + "learning_rate": 1.176818060747675e-06, + "loss": 0.0007, + "num_input_tokens_seen": 68857256, + "step": 102145 + }, + { + "epoch": 2.4955414946375782, + "grad_norm": 0.1664036363363266, + "learning_rate": 1.176734126177268e-06, + "loss": 0.0003, + "num_input_tokens_seen": 68860584, + "step": 102150 + }, + { + "epoch": 2.495663645469426, + "grad_norm": 0.010533252730965614, + "learning_rate": 1.1766501903216028e-06, + "loss": 0.0017, + "num_input_tokens_seen": 68864296, + "step": 102155 + }, + { + "epoch": 2.4957857963012726, + "grad_norm": 32.75882339477539, + "learning_rate": 1.17656625318129e-06, + "loss": 0.1199, + "num_input_tokens_seen": 68867688, + "step": 102160 + }, + { + "epoch": 2.49590794713312, + "grad_norm": 0.8329938054084778, + "learning_rate": 1.1764823147569399e-06, + "loss": 0.0003, + "num_input_tokens_seen": 68870952, + "step": 102165 + }, + { + "epoch": 2.496030097964967, + "grad_norm": 0.10802672058343887, + "learning_rate": 1.1763983750491629e-06, + "loss": 0.0366, + "num_input_tokens_seen": 68874088, + "step": 102170 + }, + { + "epoch": 2.496152248796814, + "grad_norm": 0.7539714574813843, + "learning_rate": 1.1763144340585695e-06, + "loss": 0.0152, + "num_input_tokens_seen": 68877288, + "step": 102175 + }, + { + "epoch": 2.4962743996286614, + "grad_norm": 28.428529739379883, + "learning_rate": 1.17623049178577e-06, + "loss": 0.1067, + "num_input_tokens_seen": 68880552, + "step": 102180 + }, + { + "epoch": 2.4963965504605086, + "grad_norm": 0.030778715386986732, + "learning_rate": 1.176146548231375e-06, + "loss": 0.0001, + "num_input_tokens_seen": 68884264, + "step": 102185 + }, + { + "epoch": 2.4965187012923558, + "grad_norm": 0.00747660081833601, + "learning_rate": 1.176062603395995e-06, + "loss": 0.0002, + "num_input_tokens_seen": 68887976, + "step": 102190 + }, + { + "epoch": 2.496640852124203, + "grad_norm": 0.8309156894683838, + "learning_rate": 1.1759786572802405e-06, + "loss": 0.1329, + "num_input_tokens_seen": 68891176, + "step": 102195 + }, + { + "epoch": 2.49676300295605, + "grad_norm": 0.04314163699746132, + "learning_rate": 1.1758947098847217e-06, + "loss": 0.1105, + "num_input_tokens_seen": 68894568, + "step": 102200 + }, + { + "epoch": 2.4968851537878973, + "grad_norm": 0.005910862237215042, + "learning_rate": 1.1758107612100491e-06, + "loss": 0.0012, + "num_input_tokens_seen": 68897832, + "step": 102205 + }, + { + "epoch": 2.4970073046197445, + "grad_norm": 10.59837532043457, + "learning_rate": 1.1757268112568337e-06, + "loss": 0.0991, + "num_input_tokens_seen": 68901352, + "step": 102210 + }, + { + "epoch": 2.4971294554515917, + "grad_norm": 0.04170943796634674, + "learning_rate": 1.1756428600256855e-06, + "loss": 0.0007, + "num_input_tokens_seen": 68904488, + "step": 102215 + }, + { + "epoch": 2.497251606283439, + "grad_norm": 0.0058653769083321095, + "learning_rate": 1.1755589075172152e-06, + "loss": 0.1971, + "num_input_tokens_seen": 68907560, + "step": 102220 + }, + { + "epoch": 2.497373757115286, + "grad_norm": 0.035103026777505875, + "learning_rate": 1.1754749537320333e-06, + "loss": 0.0004, + "num_input_tokens_seen": 68910760, + "step": 102225 + }, + { + "epoch": 2.4974959079471333, + "grad_norm": 0.015648502856492996, + "learning_rate": 1.1753909986707505e-06, + "loss": 0.0293, + "num_input_tokens_seen": 68914472, + "step": 102230 + }, + { + "epoch": 2.4976180587789805, + "grad_norm": 0.0711943730711937, + "learning_rate": 1.1753070423339768e-06, + "loss": 0.1082, + "num_input_tokens_seen": 68917608, + "step": 102235 + }, + { + "epoch": 2.4977402096108277, + "grad_norm": 42.90780258178711, + "learning_rate": 1.1752230847223235e-06, + "loss": 0.0013, + "num_input_tokens_seen": 68920808, + "step": 102240 + }, + { + "epoch": 2.4978623604426744, + "grad_norm": 0.019687887281179428, + "learning_rate": 1.1751391258364005e-06, + "loss": 0.0066, + "num_input_tokens_seen": 68924264, + "step": 102245 + }, + { + "epoch": 2.4979845112745216, + "grad_norm": 0.008903698064386845, + "learning_rate": 1.1750551656768188e-06, + "loss": 0.0508, + "num_input_tokens_seen": 68927528, + "step": 102250 + }, + { + "epoch": 2.498106662106369, + "grad_norm": 0.19585798680782318, + "learning_rate": 1.174971204244189e-06, + "loss": 0.0005, + "num_input_tokens_seen": 68930344, + "step": 102255 + }, + { + "epoch": 2.498228812938216, + "grad_norm": 0.023192299529910088, + "learning_rate": 1.1748872415391214e-06, + "loss": 0.0003, + "num_input_tokens_seen": 68933544, + "step": 102260 + }, + { + "epoch": 2.498350963770063, + "grad_norm": 0.05166854336857796, + "learning_rate": 1.1748032775622269e-06, + "loss": 0.0003, + "num_input_tokens_seen": 68937384, + "step": 102265 + }, + { + "epoch": 2.4984731146019103, + "grad_norm": 0.006690154317766428, + "learning_rate": 1.174719312314116e-06, + "loss": 0.0345, + "num_input_tokens_seen": 68940648, + "step": 102270 + }, + { + "epoch": 2.4985952654337575, + "grad_norm": 0.036399852484464645, + "learning_rate": 1.1746353457953988e-06, + "loss": 0.0002, + "num_input_tokens_seen": 68944360, + "step": 102275 + }, + { + "epoch": 2.4987174162656047, + "grad_norm": 137.89358520507812, + "learning_rate": 1.1745513780066867e-06, + "loss": 0.035, + "num_input_tokens_seen": 68948072, + "step": 102280 + }, + { + "epoch": 2.498839567097452, + "grad_norm": 10.838881492614746, + "learning_rate": 1.17446740894859e-06, + "loss": 0.1122, + "num_input_tokens_seen": 68951336, + "step": 102285 + }, + { + "epoch": 2.498961717929299, + "grad_norm": 35.52821350097656, + "learning_rate": 1.1743834386217192e-06, + "loss": 0.098, + "num_input_tokens_seen": 68954600, + "step": 102290 + }, + { + "epoch": 2.4990838687611463, + "grad_norm": 660.7169189453125, + "learning_rate": 1.1742994670266856e-06, + "loss": 0.0631, + "num_input_tokens_seen": 68957800, + "step": 102295 + }, + { + "epoch": 2.4992060195929935, + "grad_norm": 1.6077616214752197, + "learning_rate": 1.1742154941640989e-06, + "loss": 0.0015, + "num_input_tokens_seen": 68961384, + "step": 102300 + }, + { + "epoch": 2.4993281704248407, + "grad_norm": 0.5358433723449707, + "learning_rate": 1.1741315200345703e-06, + "loss": 0.0263, + "num_input_tokens_seen": 68964648, + "step": 102305 + }, + { + "epoch": 2.499450321256688, + "grad_norm": 0.03443678468465805, + "learning_rate": 1.174047544638711e-06, + "loss": 0.0331, + "num_input_tokens_seen": 68968168, + "step": 102310 + }, + { + "epoch": 2.499572472088535, + "grad_norm": 0.06915648281574249, + "learning_rate": 1.1739635679771306e-06, + "loss": 0.0007, + "num_input_tokens_seen": 68971368, + "step": 102315 + }, + { + "epoch": 2.4996946229203822, + "grad_norm": 0.26230019330978394, + "learning_rate": 1.1738795900504406e-06, + "loss": 0.0006, + "num_input_tokens_seen": 68974760, + "step": 102320 + }, + { + "epoch": 2.4998167737522294, + "grad_norm": 229.71182250976562, + "learning_rate": 1.1737956108592512e-06, + "loss": 0.0299, + "num_input_tokens_seen": 68978216, + "step": 102325 + }, + { + "epoch": 2.499938924584076, + "grad_norm": 0.014974378049373627, + "learning_rate": 1.1737116304041736e-06, + "loss": 0.0428, + "num_input_tokens_seen": 68981480, + "step": 102330 + }, + { + "epoch": 2.500061075415924, + "grad_norm": 0.036249011754989624, + "learning_rate": 1.173627648685818e-06, + "loss": 0.0745, + "num_input_tokens_seen": 68984744, + "step": 102335 + }, + { + "epoch": 2.5001832262477706, + "grad_norm": 0.03688354417681694, + "learning_rate": 1.173543665704796e-06, + "loss": 0.0236, + "num_input_tokens_seen": 68988456, + "step": 102340 + }, + { + "epoch": 2.5001832262477706, + "eval_loss": 0.18347086012363434, + "eval_runtime": 47.7866, + "eval_samples_per_second": 761.406, + "eval_steps_per_second": 95.194, + "num_input_tokens_seen": 68988456, + "step": 102340 + }, + { + "epoch": 2.5003053770796178, + "grad_norm": 0.02084536850452423, + "learning_rate": 1.1734596814617173e-06, + "loss": 0.0005, + "num_input_tokens_seen": 68991656, + "step": 102345 + }, + { + "epoch": 2.500427527911465, + "grad_norm": 11.465497016906738, + "learning_rate": 1.1733756959571933e-06, + "loss": 0.063, + "num_input_tokens_seen": 68994728, + "step": 102350 + }, + { + "epoch": 2.500549678743312, + "grad_norm": 0.03728627413511276, + "learning_rate": 1.1732917091918347e-06, + "loss": 0.0003, + "num_input_tokens_seen": 68998056, + "step": 102355 + }, + { + "epoch": 2.5006718295751593, + "grad_norm": 0.0972631648182869, + "learning_rate": 1.173207721166252e-06, + "loss": 0.0003, + "num_input_tokens_seen": 69001192, + "step": 102360 + }, + { + "epoch": 2.5007939804070065, + "grad_norm": 0.03852716460824013, + "learning_rate": 1.1731237318810562e-06, + "loss": 0.0551, + "num_input_tokens_seen": 69004584, + "step": 102365 + }, + { + "epoch": 2.5009161312388537, + "grad_norm": 0.027538815513253212, + "learning_rate": 1.1730397413368583e-06, + "loss": 0.0401, + "num_input_tokens_seen": 69007464, + "step": 102370 + }, + { + "epoch": 2.501038282070701, + "grad_norm": 0.10168246179819107, + "learning_rate": 1.1729557495342685e-06, + "loss": 0.0504, + "num_input_tokens_seen": 69010536, + "step": 102375 + }, + { + "epoch": 2.501160432902548, + "grad_norm": 0.024673402309417725, + "learning_rate": 1.1728717564738983e-06, + "loss": 0.0621, + "num_input_tokens_seen": 69014184, + "step": 102380 + }, + { + "epoch": 2.5012825837343953, + "grad_norm": 0.021501636132597923, + "learning_rate": 1.172787762156358e-06, + "loss": 0.0002, + "num_input_tokens_seen": 69018152, + "step": 102385 + }, + { + "epoch": 2.5014047345662425, + "grad_norm": 0.022808056324720383, + "learning_rate": 1.1727037665822588e-06, + "loss": 0.054, + "num_input_tokens_seen": 69021608, + "step": 102390 + }, + { + "epoch": 2.5015268853980897, + "grad_norm": 0.4708763659000397, + "learning_rate": 1.172619769752211e-06, + "loss": 0.0003, + "num_input_tokens_seen": 69024680, + "step": 102395 + }, + { + "epoch": 2.501649036229937, + "grad_norm": 34.90665054321289, + "learning_rate": 1.172535771666826e-06, + "loss": 0.1057, + "num_input_tokens_seen": 69027752, + "step": 102400 + }, + { + "epoch": 2.501771187061784, + "grad_norm": 0.05201762914657593, + "learning_rate": 1.1724517723267143e-06, + "loss": 0.0002, + "num_input_tokens_seen": 69031080, + "step": 102405 + }, + { + "epoch": 2.501893337893631, + "grad_norm": 0.03720242902636528, + "learning_rate": 1.172367771732487e-06, + "loss": 0.0002, + "num_input_tokens_seen": 69034408, + "step": 102410 + }, + { + "epoch": 2.502015488725478, + "grad_norm": 0.011985605582594872, + "learning_rate": 1.1722837698847552e-06, + "loss": 0.0434, + "num_input_tokens_seen": 69038568, + "step": 102415 + }, + { + "epoch": 2.5021376395573256, + "grad_norm": 11.999226570129395, + "learning_rate": 1.1721997667841295e-06, + "loss": 0.124, + "num_input_tokens_seen": 69041960, + "step": 102420 + }, + { + "epoch": 2.5022597903891723, + "grad_norm": 0.3938823938369751, + "learning_rate": 1.1721157624312206e-06, + "loss": 0.0012, + "num_input_tokens_seen": 69045544, + "step": 102425 + }, + { + "epoch": 2.50238194122102, + "grad_norm": 0.01563347317278385, + "learning_rate": 1.1720317568266393e-06, + "loss": 0.0541, + "num_input_tokens_seen": 69049064, + "step": 102430 + }, + { + "epoch": 2.5025040920528667, + "grad_norm": 43.03242111206055, + "learning_rate": 1.1719477499709971e-06, + "loss": 0.0013, + "num_input_tokens_seen": 69052456, + "step": 102435 + }, + { + "epoch": 2.502626242884714, + "grad_norm": 0.052847519516944885, + "learning_rate": 1.1718637418649047e-06, + "loss": 0.0623, + "num_input_tokens_seen": 69055720, + "step": 102440 + }, + { + "epoch": 2.502748393716561, + "grad_norm": 0.025785338133573532, + "learning_rate": 1.1717797325089727e-06, + "loss": 0.0002, + "num_input_tokens_seen": 69058856, + "step": 102445 + }, + { + "epoch": 2.5028705445484083, + "grad_norm": 0.35239148139953613, + "learning_rate": 1.1716957219038123e-06, + "loss": 0.0003, + "num_input_tokens_seen": 69062440, + "step": 102450 + }, + { + "epoch": 2.5029926953802555, + "grad_norm": 0.024918891489505768, + "learning_rate": 1.1716117100500347e-06, + "loss": 0.0627, + "num_input_tokens_seen": 69065704, + "step": 102455 + }, + { + "epoch": 2.5031148462121027, + "grad_norm": 1.4856153726577759, + "learning_rate": 1.1715276969482502e-06, + "loss": 0.0004, + "num_input_tokens_seen": 69069096, + "step": 102460 + }, + { + "epoch": 2.50323699704395, + "grad_norm": 0.26246124505996704, + "learning_rate": 1.1714436825990706e-06, + "loss": 0.0859, + "num_input_tokens_seen": 69072680, + "step": 102465 + }, + { + "epoch": 2.503359147875797, + "grad_norm": 0.3725166618824005, + "learning_rate": 1.1713596670031061e-06, + "loss": 0.0266, + "num_input_tokens_seen": 69076008, + "step": 102470 + }, + { + "epoch": 2.5034812987076442, + "grad_norm": 0.059920698404312134, + "learning_rate": 1.1712756501609681e-06, + "loss": 0.0594, + "num_input_tokens_seen": 69079656, + "step": 102475 + }, + { + "epoch": 2.5036034495394914, + "grad_norm": 19.280452728271484, + "learning_rate": 1.1711916320732675e-06, + "loss": 0.0634, + "num_input_tokens_seen": 69082664, + "step": 102480 + }, + { + "epoch": 2.5037256003713386, + "grad_norm": 0.05751289427280426, + "learning_rate": 1.1711076127406155e-06, + "loss": 0.047, + "num_input_tokens_seen": 69085864, + "step": 102485 + }, + { + "epoch": 2.503847751203186, + "grad_norm": 0.044818613678216934, + "learning_rate": 1.1710235921636228e-06, + "loss": 0.0005, + "num_input_tokens_seen": 69089512, + "step": 102490 + }, + { + "epoch": 2.503969902035033, + "grad_norm": 0.032787542790174484, + "learning_rate": 1.1709395703429002e-06, + "loss": 0.0004, + "num_input_tokens_seen": 69093032, + "step": 102495 + }, + { + "epoch": 2.5040920528668797, + "grad_norm": 0.039422884583473206, + "learning_rate": 1.1708555472790593e-06, + "loss": 0.0003, + "num_input_tokens_seen": 69096232, + "step": 102500 + }, + { + "epoch": 2.5042142036987274, + "grad_norm": 0.07437865436077118, + "learning_rate": 1.170771522972711e-06, + "loss": 0.0004, + "num_input_tokens_seen": 69099432, + "step": 102505 + }, + { + "epoch": 2.504336354530574, + "grad_norm": 0.008755282498896122, + "learning_rate": 1.1706874974244661e-06, + "loss": 0.0921, + "num_input_tokens_seen": 69102504, + "step": 102510 + }, + { + "epoch": 2.5044585053624218, + "grad_norm": 0.11593923717737198, + "learning_rate": 1.1706034706349358e-06, + "loss": 0.0003, + "num_input_tokens_seen": 69106152, + "step": 102515 + }, + { + "epoch": 2.5045806561942685, + "grad_norm": 0.037901975214481354, + "learning_rate": 1.1705194426047314e-06, + "loss": 0.0002, + "num_input_tokens_seen": 69109800, + "step": 102520 + }, + { + "epoch": 2.5047028070261157, + "grad_norm": 0.011591866612434387, + "learning_rate": 1.1704354133344635e-06, + "loss": 0.0436, + "num_input_tokens_seen": 69113064, + "step": 102525 + }, + { + "epoch": 2.504824957857963, + "grad_norm": 0.015728740021586418, + "learning_rate": 1.1703513828247436e-06, + "loss": 0.0002, + "num_input_tokens_seen": 69116328, + "step": 102530 + }, + { + "epoch": 2.50494710868981, + "grad_norm": 0.12637105584144592, + "learning_rate": 1.1702673510761827e-06, + "loss": 0.0006, + "num_input_tokens_seen": 69119592, + "step": 102535 + }, + { + "epoch": 2.5050692595216573, + "grad_norm": 0.15325936675071716, + "learning_rate": 1.1701833180893917e-06, + "loss": 0.0006, + "num_input_tokens_seen": 69123048, + "step": 102540 + }, + { + "epoch": 2.5051914103535045, + "grad_norm": 1.882394552230835, + "learning_rate": 1.1700992838649819e-06, + "loss": 0.0006, + "num_input_tokens_seen": 69126888, + "step": 102545 + }, + { + "epoch": 2.5053135611853516, + "grad_norm": 0.033663101494312286, + "learning_rate": 1.170015248403564e-06, + "loss": 0.1027, + "num_input_tokens_seen": 69130088, + "step": 102550 + }, + { + "epoch": 2.505435712017199, + "grad_norm": 0.43850404024124146, + "learning_rate": 1.1699312117057498e-06, + "loss": 0.0005, + "num_input_tokens_seen": 69133096, + "step": 102555 + }, + { + "epoch": 2.505557862849046, + "grad_norm": 12.489361763000488, + "learning_rate": 1.16984717377215e-06, + "loss": 0.0943, + "num_input_tokens_seen": 69136616, + "step": 102560 + }, + { + "epoch": 2.505680013680893, + "grad_norm": 0.17170336842536926, + "learning_rate": 1.169763134603376e-06, + "loss": 0.0563, + "num_input_tokens_seen": 69139880, + "step": 102565 + }, + { + "epoch": 2.5058021645127404, + "grad_norm": 0.04106225445866585, + "learning_rate": 1.1696790942000389e-06, + "loss": 0.0428, + "num_input_tokens_seen": 69143848, + "step": 102570 + }, + { + "epoch": 2.5059243153445876, + "grad_norm": 0.2668132185935974, + "learning_rate": 1.1695950525627499e-06, + "loss": 0.1425, + "num_input_tokens_seen": 69146984, + "step": 102575 + }, + { + "epoch": 2.506046466176435, + "grad_norm": 33.77798843383789, + "learning_rate": 1.16951100969212e-06, + "loss": 0.0607, + "num_input_tokens_seen": 69150440, + "step": 102580 + }, + { + "epoch": 2.506168617008282, + "grad_norm": 0.1751384735107422, + "learning_rate": 1.1694269655887602e-06, + "loss": 0.0006, + "num_input_tokens_seen": 69153896, + "step": 102585 + }, + { + "epoch": 2.506290767840129, + "grad_norm": 0.3074788451194763, + "learning_rate": 1.169342920253282e-06, + "loss": 0.0629, + "num_input_tokens_seen": 69157032, + "step": 102590 + }, + { + "epoch": 2.506412918671976, + "grad_norm": 0.03330891206860542, + "learning_rate": 1.1692588736862966e-06, + "loss": 0.042, + "num_input_tokens_seen": 69160168, + "step": 102595 + }, + { + "epoch": 2.5065350695038235, + "grad_norm": 0.21258597075939178, + "learning_rate": 1.169174825888415e-06, + "loss": 0.0005, + "num_input_tokens_seen": 69163560, + "step": 102600 + }, + { + "epoch": 2.5066572203356703, + "grad_norm": 0.07895040512084961, + "learning_rate": 1.1690907768602487e-06, + "loss": 0.032, + "num_input_tokens_seen": 69167080, + "step": 102605 + }, + { + "epoch": 2.506779371167518, + "grad_norm": 0.14544044435024261, + "learning_rate": 1.1690067266024086e-06, + "loss": 0.0282, + "num_input_tokens_seen": 69170792, + "step": 102610 + }, + { + "epoch": 2.5069015219993647, + "grad_norm": 0.08901277184486389, + "learning_rate": 1.1689226751155062e-06, + "loss": 0.097, + "num_input_tokens_seen": 69174056, + "step": 102615 + }, + { + "epoch": 2.507023672831212, + "grad_norm": 0.16903647780418396, + "learning_rate": 1.168838622400153e-06, + "loss": 0.0013, + "num_input_tokens_seen": 69177064, + "step": 102620 + }, + { + "epoch": 2.507145823663059, + "grad_norm": 0.6867279410362244, + "learning_rate": 1.1687545684569598e-06, + "loss": 0.0284, + "num_input_tokens_seen": 69180264, + "step": 102625 + }, + { + "epoch": 2.5072679744949062, + "grad_norm": 0.04904516041278839, + "learning_rate": 1.1686705132865377e-06, + "loss": 0.0003, + "num_input_tokens_seen": 69183784, + "step": 102630 + }, + { + "epoch": 2.5073901253267534, + "grad_norm": 28.1914005279541, + "learning_rate": 1.1685864568894984e-06, + "loss": 0.0509, + "num_input_tokens_seen": 69187496, + "step": 102635 + }, + { + "epoch": 2.5075122761586006, + "grad_norm": 0.06639095395803452, + "learning_rate": 1.1685023992664533e-06, + "loss": 0.0002, + "num_input_tokens_seen": 69190888, + "step": 102640 + }, + { + "epoch": 2.507634426990448, + "grad_norm": 0.3889292776584625, + "learning_rate": 1.1684183404180132e-06, + "loss": 0.0588, + "num_input_tokens_seen": 69194856, + "step": 102645 + }, + { + "epoch": 2.507756577822295, + "grad_norm": 1.6642292737960815, + "learning_rate": 1.1683342803447894e-06, + "loss": 0.0007, + "num_input_tokens_seen": 69198120, + "step": 102650 + }, + { + "epoch": 2.507878728654142, + "grad_norm": 0.02274067886173725, + "learning_rate": 1.1682502190473937e-06, + "loss": 0.0005, + "num_input_tokens_seen": 69201384, + "step": 102655 + }, + { + "epoch": 2.5080008794859894, + "grad_norm": 0.057451456785202026, + "learning_rate": 1.168166156526437e-06, + "loss": 0.0002, + "num_input_tokens_seen": 69204648, + "step": 102660 + }, + { + "epoch": 2.5081230303178366, + "grad_norm": 0.7137342691421509, + "learning_rate": 1.168082092782531e-06, + "loss": 0.0459, + "num_input_tokens_seen": 69207720, + "step": 102665 + }, + { + "epoch": 2.5082451811496838, + "grad_norm": 0.05443684384226799, + "learning_rate": 1.167998027816287e-06, + "loss": 0.0002, + "num_input_tokens_seen": 69211048, + "step": 102670 + }, + { + "epoch": 2.508367331981531, + "grad_norm": 0.12076818197965622, + "learning_rate": 1.1679139616283155e-06, + "loss": 0.0464, + "num_input_tokens_seen": 69214120, + "step": 102675 + }, + { + "epoch": 2.5084894828133777, + "grad_norm": 0.4422742426395416, + "learning_rate": 1.1678298942192292e-06, + "loss": 0.0003, + "num_input_tokens_seen": 69217896, + "step": 102680 + }, + { + "epoch": 2.5086116336452253, + "grad_norm": 0.18747763335704803, + "learning_rate": 1.1677458255896384e-06, + "loss": 0.0001, + "num_input_tokens_seen": 69220776, + "step": 102685 + }, + { + "epoch": 2.508733784477072, + "grad_norm": 0.008999832905828953, + "learning_rate": 1.1676617557401547e-06, + "loss": 0.0002, + "num_input_tokens_seen": 69224104, + "step": 102690 + }, + { + "epoch": 2.5088559353089197, + "grad_norm": 0.024260293692350388, + "learning_rate": 1.1675776846713899e-06, + "loss": 0.0048, + "num_input_tokens_seen": 69227112, + "step": 102695 + }, + { + "epoch": 2.5089780861407665, + "grad_norm": 42.969993591308594, + "learning_rate": 1.167493612383955e-06, + "loss": 0.0385, + "num_input_tokens_seen": 69230696, + "step": 102700 + }, + { + "epoch": 2.5091002369726136, + "grad_norm": 0.03098537214100361, + "learning_rate": 1.1674095388784616e-06, + "loss": 0.0003, + "num_input_tokens_seen": 69234152, + "step": 102705 + }, + { + "epoch": 2.509222387804461, + "grad_norm": 0.0010812964756041765, + "learning_rate": 1.1673254641555206e-06, + "loss": 0.0002, + "num_input_tokens_seen": 69237544, + "step": 102710 + }, + { + "epoch": 2.509344538636308, + "grad_norm": 0.02446441911160946, + "learning_rate": 1.1672413882157442e-06, + "loss": 0.0215, + "num_input_tokens_seen": 69240616, + "step": 102715 + }, + { + "epoch": 2.509466689468155, + "grad_norm": 0.3052109479904175, + "learning_rate": 1.1671573110597434e-06, + "loss": 0.0633, + "num_input_tokens_seen": 69243624, + "step": 102720 + }, + { + "epoch": 2.5095888403000024, + "grad_norm": 102.80419158935547, + "learning_rate": 1.1670732326881297e-06, + "loss": 0.1228, + "num_input_tokens_seen": 69246888, + "step": 102725 + }, + { + "epoch": 2.5097109911318496, + "grad_norm": 0.17125174403190613, + "learning_rate": 1.1669891531015145e-06, + "loss": 0.0002, + "num_input_tokens_seen": 69250664, + "step": 102730 + }, + { + "epoch": 2.509833141963697, + "grad_norm": 0.008751153945922852, + "learning_rate": 1.1669050723005095e-06, + "loss": 0.0002, + "num_input_tokens_seen": 69254440, + "step": 102735 + }, + { + "epoch": 2.509955292795544, + "grad_norm": 0.023152705281972885, + "learning_rate": 1.1668209902857253e-06, + "loss": 0.0003, + "num_input_tokens_seen": 69257640, + "step": 102740 + }, + { + "epoch": 2.510077443627391, + "grad_norm": 0.016359543427824974, + "learning_rate": 1.1667369070577744e-06, + "loss": 0.0002, + "num_input_tokens_seen": 69261288, + "step": 102745 + }, + { + "epoch": 2.5101995944592383, + "grad_norm": 0.13440461456775665, + "learning_rate": 1.1666528226172678e-06, + "loss": 0.0002, + "num_input_tokens_seen": 69264360, + "step": 102750 + }, + { + "epoch": 2.5103217452910855, + "grad_norm": 0.004721632227301598, + "learning_rate": 1.1665687369648172e-06, + "loss": 0.0681, + "num_input_tokens_seen": 69267624, + "step": 102755 + }, + { + "epoch": 2.5104438961229327, + "grad_norm": 0.0034103114157915115, + "learning_rate": 1.1664846501010336e-06, + "loss": 0.0329, + "num_input_tokens_seen": 69270888, + "step": 102760 + }, + { + "epoch": 2.51056604695478, + "grad_norm": 0.7425034642219543, + "learning_rate": 1.1664005620265292e-06, + "loss": 0.092, + "num_input_tokens_seen": 69274536, + "step": 102765 + }, + { + "epoch": 2.510688197786627, + "grad_norm": 0.026685822755098343, + "learning_rate": 1.166316472741915e-06, + "loss": 0.0003, + "num_input_tokens_seen": 69278120, + "step": 102770 + }, + { + "epoch": 2.510810348618474, + "grad_norm": 20.21971893310547, + "learning_rate": 1.1662323822478026e-06, + "loss": 0.0467, + "num_input_tokens_seen": 69282024, + "step": 102775 + }, + { + "epoch": 2.5109324994503215, + "grad_norm": 0.1762675642967224, + "learning_rate": 1.166148290544804e-06, + "loss": 0.0007, + "num_input_tokens_seen": 69285288, + "step": 102780 + }, + { + "epoch": 2.5110546502821682, + "grad_norm": 0.01969783566892147, + "learning_rate": 1.16606419763353e-06, + "loss": 0.0006, + "num_input_tokens_seen": 69288808, + "step": 102785 + }, + { + "epoch": 2.5111768011140154, + "grad_norm": 0.013307438232004642, + "learning_rate": 1.1659801035145925e-06, + "loss": 0.0018, + "num_input_tokens_seen": 69292072, + "step": 102790 + }, + { + "epoch": 2.5112989519458626, + "grad_norm": 33.22414016723633, + "learning_rate": 1.165896008188603e-06, + "loss": 0.0431, + "num_input_tokens_seen": 69295400, + "step": 102795 + }, + { + "epoch": 2.51142110277771, + "grad_norm": 0.007968748919665813, + "learning_rate": 1.1658119116561732e-06, + "loss": 0.1111, + "num_input_tokens_seen": 69298408, + "step": 102800 + }, + { + "epoch": 2.511543253609557, + "grad_norm": 0.14272573590278625, + "learning_rate": 1.1657278139179143e-06, + "loss": 0.0003, + "num_input_tokens_seen": 69301928, + "step": 102805 + }, + { + "epoch": 2.511665404441404, + "grad_norm": 0.02590774931013584, + "learning_rate": 1.1656437149744384e-06, + "loss": 0.0001, + "num_input_tokens_seen": 69305320, + "step": 102810 + }, + { + "epoch": 2.5117875552732514, + "grad_norm": 0.013551932759582996, + "learning_rate": 1.1655596148263568e-06, + "loss": 0.0, + "num_input_tokens_seen": 69308520, + "step": 102815 + }, + { + "epoch": 2.5119097061050986, + "grad_norm": 0.19894510507583618, + "learning_rate": 1.1654755134742814e-06, + "loss": 0.0502, + "num_input_tokens_seen": 69311720, + "step": 102820 + }, + { + "epoch": 2.5120318569369458, + "grad_norm": 0.01751531846821308, + "learning_rate": 1.1653914109188233e-06, + "loss": 0.0321, + "num_input_tokens_seen": 69315240, + "step": 102825 + }, + { + "epoch": 2.512154007768793, + "grad_norm": 0.010112373158335686, + "learning_rate": 1.1653073071605945e-06, + "loss": 0.0001, + "num_input_tokens_seen": 69318888, + "step": 102830 + }, + { + "epoch": 2.51227615860064, + "grad_norm": 0.024472983554005623, + "learning_rate": 1.1652232022002064e-06, + "loss": 0.0001, + "num_input_tokens_seen": 69322024, + "step": 102835 + }, + { + "epoch": 2.5123983094324873, + "grad_norm": 0.03016134537756443, + "learning_rate": 1.1651390960382707e-06, + "loss": 0.0501, + "num_input_tokens_seen": 69325480, + "step": 102840 + }, + { + "epoch": 2.5125204602643345, + "grad_norm": 0.02304493635892868, + "learning_rate": 1.1650549886753994e-06, + "loss": 0.0132, + "num_input_tokens_seen": 69329320, + "step": 102845 + }, + { + "epoch": 2.5126426110961817, + "grad_norm": 8.705880165100098, + "learning_rate": 1.1649708801122036e-06, + "loss": 0.0525, + "num_input_tokens_seen": 69332712, + "step": 102850 + }, + { + "epoch": 2.512764761928029, + "grad_norm": 80.08638763427734, + "learning_rate": 1.1648867703492951e-06, + "loss": 0.0337, + "num_input_tokens_seen": 69336168, + "step": 102855 + }, + { + "epoch": 2.5128869127598756, + "grad_norm": 0.08520685136318207, + "learning_rate": 1.1648026593872858e-06, + "loss": 0.0858, + "num_input_tokens_seen": 69339432, + "step": 102860 + }, + { + "epoch": 2.5130090635917233, + "grad_norm": 127.86909484863281, + "learning_rate": 1.1647185472267868e-06, + "loss": 0.0842, + "num_input_tokens_seen": 69342888, + "step": 102865 + }, + { + "epoch": 2.51313121442357, + "grad_norm": 0.01910681650042534, + "learning_rate": 1.1646344338684107e-06, + "loss": 0.1118, + "num_input_tokens_seen": 69346344, + "step": 102870 + }, + { + "epoch": 2.5132533652554176, + "grad_norm": 0.018942229449748993, + "learning_rate": 1.1645503193127685e-06, + "loss": 0.0001, + "num_input_tokens_seen": 69349416, + "step": 102875 + }, + { + "epoch": 2.5133755160872644, + "grad_norm": 18.96344566345215, + "learning_rate": 1.1644662035604725e-06, + "loss": 0.0941, + "num_input_tokens_seen": 69352552, + "step": 102880 + }, + { + "epoch": 2.5134976669191116, + "grad_norm": 0.36074990034103394, + "learning_rate": 1.1643820866121338e-06, + "loss": 0.0003, + "num_input_tokens_seen": 69355944, + "step": 102885 + }, + { + "epoch": 2.5136198177509588, + "grad_norm": 0.0732511654496193, + "learning_rate": 1.1642979684683642e-06, + "loss": 0.0003, + "num_input_tokens_seen": 69358952, + "step": 102890 + }, + { + "epoch": 2.513741968582806, + "grad_norm": 0.14155447483062744, + "learning_rate": 1.1642138491297756e-06, + "loss": 0.0004, + "num_input_tokens_seen": 69362728, + "step": 102895 + }, + { + "epoch": 2.513864119414653, + "grad_norm": 0.11240936070680618, + "learning_rate": 1.1641297285969798e-06, + "loss": 0.0002, + "num_input_tokens_seen": 69366120, + "step": 102900 + }, + { + "epoch": 2.5139862702465003, + "grad_norm": 0.016462501138448715, + "learning_rate": 1.1640456068705886e-06, + "loss": 0.0772, + "num_input_tokens_seen": 69369576, + "step": 102905 + }, + { + "epoch": 2.5141084210783475, + "grad_norm": 0.06560775637626648, + "learning_rate": 1.1639614839512133e-06, + "loss": 0.0373, + "num_input_tokens_seen": 69372776, + "step": 102910 + }, + { + "epoch": 2.5142305719101947, + "grad_norm": 108.05178833007812, + "learning_rate": 1.1638773598394663e-06, + "loss": 0.0064, + "num_input_tokens_seen": 69375976, + "step": 102915 + }, + { + "epoch": 2.514352722742042, + "grad_norm": 0.010637856088578701, + "learning_rate": 1.1637932345359588e-06, + "loss": 0.0004, + "num_input_tokens_seen": 69379048, + "step": 102920 + }, + { + "epoch": 2.514474873573889, + "grad_norm": 0.133377805352211, + "learning_rate": 1.1637091080413032e-06, + "loss": 0.0003, + "num_input_tokens_seen": 69382184, + "step": 102925 + }, + { + "epoch": 2.5145970244057363, + "grad_norm": 0.04459724947810173, + "learning_rate": 1.1636249803561106e-06, + "loss": 0.155, + "num_input_tokens_seen": 69385576, + "step": 102930 + }, + { + "epoch": 2.5147191752375835, + "grad_norm": 0.09748226404190063, + "learning_rate": 1.1635408514809934e-06, + "loss": 0.0001, + "num_input_tokens_seen": 69389032, + "step": 102935 + }, + { + "epoch": 2.5148413260694307, + "grad_norm": 0.03192561864852905, + "learning_rate": 1.163456721416563e-06, + "loss": 0.0005, + "num_input_tokens_seen": 69392360, + "step": 102940 + }, + { + "epoch": 2.514963476901278, + "grad_norm": 0.036991093307733536, + "learning_rate": 1.1633725901634312e-06, + "loss": 0.0002, + "num_input_tokens_seen": 69395688, + "step": 102945 + }, + { + "epoch": 2.515085627733125, + "grad_norm": 0.6261094212532043, + "learning_rate": 1.1632884577222105e-06, + "loss": 0.0509, + "num_input_tokens_seen": 69399208, + "step": 102950 + }, + { + "epoch": 2.515207778564972, + "grad_norm": 0.004038868006318808, + "learning_rate": 1.1632043240935118e-06, + "loss": 0.0452, + "num_input_tokens_seen": 69402856, + "step": 102955 + }, + { + "epoch": 2.5153299293968194, + "grad_norm": 0.015494892373681068, + "learning_rate": 1.1631201892779473e-06, + "loss": 0.0503, + "num_input_tokens_seen": 69405992, + "step": 102960 + }, + { + "epoch": 2.515452080228666, + "grad_norm": 0.043261006474494934, + "learning_rate": 1.1630360532761287e-06, + "loss": 0.0001, + "num_input_tokens_seen": 69409320, + "step": 102965 + }, + { + "epoch": 2.5155742310605134, + "grad_norm": 0.003127552103251219, + "learning_rate": 1.1629519160886685e-06, + "loss": 0.0001, + "num_input_tokens_seen": 69413224, + "step": 102970 + }, + { + "epoch": 2.5156963818923606, + "grad_norm": 26.568111419677734, + "learning_rate": 1.1628677777161782e-06, + "loss": 0.0417, + "num_input_tokens_seen": 69416488, + "step": 102975 + }, + { + "epoch": 2.5158185327242077, + "grad_norm": 0.005540535319596529, + "learning_rate": 1.1627836381592694e-06, + "loss": 0.0454, + "num_input_tokens_seen": 69419816, + "step": 102980 + }, + { + "epoch": 2.515940683556055, + "grad_norm": 0.06050194054841995, + "learning_rate": 1.1626994974185542e-06, + "loss": 0.0751, + "num_input_tokens_seen": 69423464, + "step": 102985 + }, + { + "epoch": 2.516062834387902, + "grad_norm": 0.021542632952332497, + "learning_rate": 1.1626153554946446e-06, + "loss": 0.1411, + "num_input_tokens_seen": 69426600, + "step": 102990 + }, + { + "epoch": 2.5161849852197493, + "grad_norm": 24.883790969848633, + "learning_rate": 1.1625312123881522e-06, + "loss": 0.0443, + "num_input_tokens_seen": 69429928, + "step": 102995 + }, + { + "epoch": 2.5163071360515965, + "grad_norm": 0.012859504669904709, + "learning_rate": 1.1624470680996894e-06, + "loss": 0.0634, + "num_input_tokens_seen": 69433128, + "step": 103000 + }, + { + "epoch": 2.5164292868834437, + "grad_norm": 0.07678893953561783, + "learning_rate": 1.1623629226298677e-06, + "loss": 0.0001, + "num_input_tokens_seen": 69437224, + "step": 103005 + }, + { + "epoch": 2.516551437715291, + "grad_norm": 9.300538063049316, + "learning_rate": 1.1622787759792991e-06, + "loss": 0.1557, + "num_input_tokens_seen": 69440488, + "step": 103010 + }, + { + "epoch": 2.516673588547138, + "grad_norm": 38.39460754394531, + "learning_rate": 1.1621946281485957e-06, + "loss": 0.2014, + "num_input_tokens_seen": 69443624, + "step": 103015 + }, + { + "epoch": 2.5167957393789853, + "grad_norm": 0.023470517247915268, + "learning_rate": 1.1621104791383688e-06, + "loss": 0.0351, + "num_input_tokens_seen": 69447720, + "step": 103020 + }, + { + "epoch": 2.5169178902108325, + "grad_norm": 0.2565382122993469, + "learning_rate": 1.1620263289492316e-06, + "loss": 0.0007, + "num_input_tokens_seen": 69450920, + "step": 103025 + }, + { + "epoch": 2.5170400410426796, + "grad_norm": 0.04461226239800453, + "learning_rate": 1.161942177581795e-06, + "loss": 0.0003, + "num_input_tokens_seen": 69454248, + "step": 103030 + }, + { + "epoch": 2.517162191874527, + "grad_norm": 16.27156639099121, + "learning_rate": 1.1618580250366714e-06, + "loss": 0.0843, + "num_input_tokens_seen": 69457512, + "step": 103035 + }, + { + "epoch": 2.5172843427063736, + "grad_norm": 0.05952126532793045, + "learning_rate": 1.161773871314473e-06, + "loss": 0.0816, + "num_input_tokens_seen": 69460648, + "step": 103040 + }, + { + "epoch": 2.517406493538221, + "grad_norm": 0.14907532930374146, + "learning_rate": 1.1616897164158112e-06, + "loss": 0.0288, + "num_input_tokens_seen": 69463912, + "step": 103045 + }, + { + "epoch": 2.517528644370068, + "grad_norm": 0.07751405984163284, + "learning_rate": 1.1616055603412982e-06, + "loss": 0.0004, + "num_input_tokens_seen": 69467560, + "step": 103050 + }, + { + "epoch": 2.5176507952019156, + "grad_norm": 12.210570335388184, + "learning_rate": 1.1615214030915463e-06, + "loss": 0.0506, + "num_input_tokens_seen": 69470952, + "step": 103055 + }, + { + "epoch": 2.5177729460337623, + "grad_norm": 0.07794451713562012, + "learning_rate": 1.1614372446671672e-06, + "loss": 0.0483, + "num_input_tokens_seen": 69474216, + "step": 103060 + }, + { + "epoch": 2.5178950968656095, + "grad_norm": 0.12036775052547455, + "learning_rate": 1.1613530850687731e-06, + "loss": 0.0006, + "num_input_tokens_seen": 69477864, + "step": 103065 + }, + { + "epoch": 2.5180172476974567, + "grad_norm": 0.2737639546394348, + "learning_rate": 1.161268924296976e-06, + "loss": 0.0548, + "num_input_tokens_seen": 69480808, + "step": 103070 + }, + { + "epoch": 2.518139398529304, + "grad_norm": 0.018827343359589577, + "learning_rate": 1.161184762352388e-06, + "loss": 0.0295, + "num_input_tokens_seen": 69484712, + "step": 103075 + }, + { + "epoch": 2.518261549361151, + "grad_norm": 0.06971325725317001, + "learning_rate": 1.1611005992356208e-06, + "loss": 0.1276, + "num_input_tokens_seen": 69488296, + "step": 103080 + }, + { + "epoch": 2.5183837001929983, + "grad_norm": 0.0024040297139436007, + "learning_rate": 1.1610164349472868e-06, + "loss": 0.0009, + "num_input_tokens_seen": 69491176, + "step": 103085 + }, + { + "epoch": 2.5185058510248455, + "grad_norm": 0.05693919584155083, + "learning_rate": 1.160932269487998e-06, + "loss": 0.0004, + "num_input_tokens_seen": 69494440, + "step": 103090 + }, + { + "epoch": 2.5186280018566927, + "grad_norm": 280.9774475097656, + "learning_rate": 1.1608481028583666e-06, + "loss": 0.0757, + "num_input_tokens_seen": 69497960, + "step": 103095 + }, + { + "epoch": 2.51875015268854, + "grad_norm": 0.0662260353565216, + "learning_rate": 1.1607639350590042e-06, + "loss": 0.0007, + "num_input_tokens_seen": 69501608, + "step": 103100 + }, + { + "epoch": 2.518872303520387, + "grad_norm": 12.876449584960938, + "learning_rate": 1.1606797660905235e-06, + "loss": 0.1427, + "num_input_tokens_seen": 69505192, + "step": 103105 + }, + { + "epoch": 2.5189944543522342, + "grad_norm": 0.147857666015625, + "learning_rate": 1.1605955959535363e-06, + "loss": 0.0422, + "num_input_tokens_seen": 69508456, + "step": 103110 + }, + { + "epoch": 2.5191166051840814, + "grad_norm": 0.06809548288583755, + "learning_rate": 1.1605114246486545e-06, + "loss": 0.0303, + "num_input_tokens_seen": 69511656, + "step": 103115 + }, + { + "epoch": 2.5192387560159286, + "grad_norm": 0.08468685299158096, + "learning_rate": 1.1604272521764904e-06, + "loss": 0.1495, + "num_input_tokens_seen": 69515112, + "step": 103120 + }, + { + "epoch": 2.5193609068477754, + "grad_norm": 0.20446525514125824, + "learning_rate": 1.1603430785376564e-06, + "loss": 0.0458, + "num_input_tokens_seen": 69518376, + "step": 103125 + }, + { + "epoch": 2.519483057679623, + "grad_norm": 0.10892233997583389, + "learning_rate": 1.1602589037327644e-06, + "loss": 0.0005, + "num_input_tokens_seen": 69522216, + "step": 103130 + }, + { + "epoch": 2.5196052085114697, + "grad_norm": 0.059261370450258255, + "learning_rate": 1.1601747277624265e-06, + "loss": 0.0009, + "num_input_tokens_seen": 69525864, + "step": 103135 + }, + { + "epoch": 2.5197273593433174, + "grad_norm": 0.18825897574424744, + "learning_rate": 1.1600905506272552e-06, + "loss": 0.0003, + "num_input_tokens_seen": 69528936, + "step": 103140 + }, + { + "epoch": 2.519849510175164, + "grad_norm": 0.08533114939928055, + "learning_rate": 1.1600063723278618e-06, + "loss": 0.0003, + "num_input_tokens_seen": 69532136, + "step": 103145 + }, + { + "epoch": 2.5199716610070113, + "grad_norm": 0.1688610017299652, + "learning_rate": 1.1599221928648595e-06, + "loss": 0.0004, + "num_input_tokens_seen": 69535272, + "step": 103150 + }, + { + "epoch": 2.5200938118388585, + "grad_norm": 0.06158433482050896, + "learning_rate": 1.1598380122388598e-06, + "loss": 0.1149, + "num_input_tokens_seen": 69538664, + "step": 103155 + }, + { + "epoch": 2.5202159626707057, + "grad_norm": 0.44061222672462463, + "learning_rate": 1.1597538304504751e-06, + "loss": 0.0008, + "num_input_tokens_seen": 69541800, + "step": 103160 + }, + { + "epoch": 2.520338113502553, + "grad_norm": 0.03005329892039299, + "learning_rate": 1.1596696475003176e-06, + "loss": 0.0886, + "num_input_tokens_seen": 69545320, + "step": 103165 + }, + { + "epoch": 2.5204602643344, + "grad_norm": 0.027707532048225403, + "learning_rate": 1.1595854633889994e-06, + "loss": 0.0001, + "num_input_tokens_seen": 69548520, + "step": 103170 + }, + { + "epoch": 2.5205824151662473, + "grad_norm": 120.6915283203125, + "learning_rate": 1.1595012781171326e-06, + "loss": 0.0377, + "num_input_tokens_seen": 69552040, + "step": 103175 + }, + { + "epoch": 2.5207045659980944, + "grad_norm": 0.06590377539396286, + "learning_rate": 1.1594170916853298e-06, + "loss": 0.0368, + "num_input_tokens_seen": 69556200, + "step": 103180 + }, + { + "epoch": 2.5208267168299416, + "grad_norm": 21.437572479248047, + "learning_rate": 1.1593329040942032e-06, + "loss": 0.0416, + "num_input_tokens_seen": 69559144, + "step": 103185 + }, + { + "epoch": 2.520948867661789, + "grad_norm": 0.020117169246077538, + "learning_rate": 1.159248715344365e-06, + "loss": 0.0662, + "num_input_tokens_seen": 69562984, + "step": 103190 + }, + { + "epoch": 2.521071018493636, + "grad_norm": 0.006265338975936174, + "learning_rate": 1.159164525436427e-06, + "loss": 0.1218, + "num_input_tokens_seen": 69566056, + "step": 103195 + }, + { + "epoch": 2.521193169325483, + "grad_norm": 7.672004222869873, + "learning_rate": 1.1590803343710018e-06, + "loss": 0.0007, + "num_input_tokens_seen": 69569256, + "step": 103200 + }, + { + "epoch": 2.5213153201573304, + "grad_norm": 0.02416401356458664, + "learning_rate": 1.1589961421487017e-06, + "loss": 0.0293, + "num_input_tokens_seen": 69572456, + "step": 103205 + }, + { + "epoch": 2.5214374709891776, + "grad_norm": 1.4142011404037476, + "learning_rate": 1.1589119487701386e-06, + "loss": 0.0006, + "num_input_tokens_seen": 69575912, + "step": 103210 + }, + { + "epoch": 2.5215596218210248, + "grad_norm": 0.10865466296672821, + "learning_rate": 1.1588277542359253e-06, + "loss": 0.0392, + "num_input_tokens_seen": 69579432, + "step": 103215 + }, + { + "epoch": 2.5216817726528715, + "grad_norm": 0.03473128750920296, + "learning_rate": 1.1587435585466738e-06, + "loss": 0.0003, + "num_input_tokens_seen": 69583016, + "step": 103220 + }, + { + "epoch": 2.521803923484719, + "grad_norm": 0.35919907689094543, + "learning_rate": 1.1586593617029966e-06, + "loss": 0.0011, + "num_input_tokens_seen": 69586216, + "step": 103225 + }, + { + "epoch": 2.521926074316566, + "grad_norm": 0.10506638139486313, + "learning_rate": 1.1585751637055056e-06, + "loss": 0.0006, + "num_input_tokens_seen": 69589352, + "step": 103230 + }, + { + "epoch": 2.5220482251484135, + "grad_norm": 0.1405552625656128, + "learning_rate": 1.1584909645548136e-06, + "loss": 0.0373, + "num_input_tokens_seen": 69592936, + "step": 103235 + }, + { + "epoch": 2.5221703759802603, + "grad_norm": 0.13142479956150055, + "learning_rate": 1.1584067642515325e-06, + "loss": 0.0005, + "num_input_tokens_seen": 69595816, + "step": 103240 + }, + { + "epoch": 2.5222925268121075, + "grad_norm": 1.2676528692245483, + "learning_rate": 1.158322562796275e-06, + "loss": 0.001, + "num_input_tokens_seen": 69598888, + "step": 103245 + }, + { + "epoch": 2.5224146776439547, + "grad_norm": 0.06140557676553726, + "learning_rate": 1.158238360189653e-06, + "loss": 0.0006, + "num_input_tokens_seen": 69602728, + "step": 103250 + }, + { + "epoch": 2.522536828475802, + "grad_norm": 0.4262479543685913, + "learning_rate": 1.1581541564322792e-06, + "loss": 0.0002, + "num_input_tokens_seen": 69605928, + "step": 103255 + }, + { + "epoch": 2.522658979307649, + "grad_norm": 0.046194881200790405, + "learning_rate": 1.1580699515247658e-06, + "loss": 0.0002, + "num_input_tokens_seen": 69609384, + "step": 103260 + }, + { + "epoch": 2.5227811301394962, + "grad_norm": 0.0063371495343744755, + "learning_rate": 1.1579857454677253e-06, + "loss": 0.0614, + "num_input_tokens_seen": 69612520, + "step": 103265 + }, + { + "epoch": 2.5229032809713434, + "grad_norm": 0.04240964353084564, + "learning_rate": 1.1579015382617696e-06, + "loss": 0.0446, + "num_input_tokens_seen": 69616424, + "step": 103270 + }, + { + "epoch": 2.5230254318031906, + "grad_norm": 0.011216072365641594, + "learning_rate": 1.1578173299075118e-06, + "loss": 0.0894, + "num_input_tokens_seen": 69619688, + "step": 103275 + }, + { + "epoch": 2.523147582635038, + "grad_norm": 0.011698591522872448, + "learning_rate": 1.1577331204055638e-06, + "loss": 0.0003, + "num_input_tokens_seen": 69623080, + "step": 103280 + }, + { + "epoch": 2.523269733466885, + "grad_norm": 0.015079408884048462, + "learning_rate": 1.1576489097565383e-06, + "loss": 0.0001, + "num_input_tokens_seen": 69626408, + "step": 103285 + }, + { + "epoch": 2.523391884298732, + "grad_norm": 28.438676834106445, + "learning_rate": 1.1575646979610475e-06, + "loss": 0.0268, + "num_input_tokens_seen": 69630120, + "step": 103290 + }, + { + "epoch": 2.5235140351305794, + "grad_norm": 0.2645500898361206, + "learning_rate": 1.1574804850197037e-06, + "loss": 0.0504, + "num_input_tokens_seen": 69633704, + "step": 103295 + }, + { + "epoch": 2.5236361859624266, + "grad_norm": 0.026053164154291153, + "learning_rate": 1.1573962709331196e-06, + "loss": 0.0002, + "num_input_tokens_seen": 69636968, + "step": 103300 + }, + { + "epoch": 2.5237583367942733, + "grad_norm": 0.05643535032868385, + "learning_rate": 1.1573120557019071e-06, + "loss": 0.0004, + "num_input_tokens_seen": 69640232, + "step": 103305 + }, + { + "epoch": 2.523880487626121, + "grad_norm": 26.065277099609375, + "learning_rate": 1.1572278393266794e-06, + "loss": 0.0786, + "num_input_tokens_seen": 69643496, + "step": 103310 + }, + { + "epoch": 2.5240026384579677, + "grad_norm": 0.014052963815629482, + "learning_rate": 1.1571436218080485e-06, + "loss": 0.0001, + "num_input_tokens_seen": 69647208, + "step": 103315 + }, + { + "epoch": 2.5241247892898153, + "grad_norm": 0.11288266628980637, + "learning_rate": 1.157059403146627e-06, + "loss": 0.0002, + "num_input_tokens_seen": 69650600, + "step": 103320 + }, + { + "epoch": 2.524246940121662, + "grad_norm": 0.29468193650245667, + "learning_rate": 1.156975183343027e-06, + "loss": 0.0005, + "num_input_tokens_seen": 69654184, + "step": 103325 + }, + { + "epoch": 2.5243690909535093, + "grad_norm": 0.07013779133558273, + "learning_rate": 1.1568909623978612e-06, + "loss": 0.1288, + "num_input_tokens_seen": 69657448, + "step": 103330 + }, + { + "epoch": 2.5244912417853564, + "grad_norm": 0.12776708602905273, + "learning_rate": 1.1568067403117426e-06, + "loss": 0.0426, + "num_input_tokens_seen": 69660712, + "step": 103335 + }, + { + "epoch": 2.5246133926172036, + "grad_norm": 0.009856940247118473, + "learning_rate": 1.1567225170852828e-06, + "loss": 0.0002, + "num_input_tokens_seen": 69664104, + "step": 103340 + }, + { + "epoch": 2.524735543449051, + "grad_norm": 0.042370233684778214, + "learning_rate": 1.156638292719095e-06, + "loss": 0.0005, + "num_input_tokens_seen": 69667496, + "step": 103345 + }, + { + "epoch": 2.524857694280898, + "grad_norm": 0.05852619186043739, + "learning_rate": 1.1565540672137913e-06, + "loss": 0.0001, + "num_input_tokens_seen": 69670504, + "step": 103350 + }, + { + "epoch": 2.524979845112745, + "grad_norm": 51.45964813232422, + "learning_rate": 1.1564698405699843e-06, + "loss": 0.0945, + "num_input_tokens_seen": 69673512, + "step": 103355 + }, + { + "epoch": 2.5251019959445924, + "grad_norm": 0.035364434123039246, + "learning_rate": 1.1563856127882865e-06, + "loss": 0.0491, + "num_input_tokens_seen": 69677032, + "step": 103360 + }, + { + "epoch": 2.5252241467764396, + "grad_norm": 62.330257415771484, + "learning_rate": 1.1563013838693102e-06, + "loss": 0.0574, + "num_input_tokens_seen": 69680680, + "step": 103365 + }, + { + "epoch": 2.5253462976082868, + "grad_norm": 0.4028855860233307, + "learning_rate": 1.1562171538136684e-06, + "loss": 0.0349, + "num_input_tokens_seen": 69684200, + "step": 103370 + }, + { + "epoch": 2.525468448440134, + "grad_norm": 0.04933023825287819, + "learning_rate": 1.1561329226219736e-06, + "loss": 0.0358, + "num_input_tokens_seen": 69687656, + "step": 103375 + }, + { + "epoch": 2.525590599271981, + "grad_norm": 0.025662919506430626, + "learning_rate": 1.156048690294838e-06, + "loss": 0.0501, + "num_input_tokens_seen": 69691112, + "step": 103380 + }, + { + "epoch": 2.5257127501038283, + "grad_norm": 0.020099427551031113, + "learning_rate": 1.1559644568328746e-06, + "loss": 0.0477, + "num_input_tokens_seen": 69694056, + "step": 103385 + }, + { + "epoch": 2.5258349009356755, + "grad_norm": 0.20844100415706635, + "learning_rate": 1.1558802222366954e-06, + "loss": 0.0014, + "num_input_tokens_seen": 69697192, + "step": 103390 + }, + { + "epoch": 2.5259570517675227, + "grad_norm": 0.02002965472638607, + "learning_rate": 1.1557959865069133e-06, + "loss": 0.01, + "num_input_tokens_seen": 69700264, + "step": 103395 + }, + { + "epoch": 2.5260792025993695, + "grad_norm": 0.014823324047029018, + "learning_rate": 1.1557117496441414e-06, + "loss": 0.0002, + "num_input_tokens_seen": 69703784, + "step": 103400 + }, + { + "epoch": 2.526201353431217, + "grad_norm": 0.012353803031146526, + "learning_rate": 1.1556275116489913e-06, + "loss": 0.0667, + "num_input_tokens_seen": 69707240, + "step": 103405 + }, + { + "epoch": 2.526323504263064, + "grad_norm": 0.027004987001419067, + "learning_rate": 1.1555432725220762e-06, + "loss": 0.0407, + "num_input_tokens_seen": 69710120, + "step": 103410 + }, + { + "epoch": 2.526445655094911, + "grad_norm": 0.03901997208595276, + "learning_rate": 1.1554590322640088e-06, + "loss": 0.0411, + "num_input_tokens_seen": 69713448, + "step": 103415 + }, + { + "epoch": 2.5265678059267582, + "grad_norm": 0.020968155935406685, + "learning_rate": 1.1553747908754012e-06, + "loss": 0.0514, + "num_input_tokens_seen": 69716776, + "step": 103420 + }, + { + "epoch": 2.5266899567586054, + "grad_norm": 0.07576152682304382, + "learning_rate": 1.1552905483568662e-06, + "loss": 0.1284, + "num_input_tokens_seen": 69720168, + "step": 103425 + }, + { + "epoch": 2.5268121075904526, + "grad_norm": 13.619282722473145, + "learning_rate": 1.1552063047090167e-06, + "loss": 0.0299, + "num_input_tokens_seen": 69723496, + "step": 103430 + }, + { + "epoch": 2.5269342584223, + "grad_norm": 0.025862164795398712, + "learning_rate": 1.1551220599324654e-06, + "loss": 0.0372, + "num_input_tokens_seen": 69727144, + "step": 103435 + }, + { + "epoch": 2.527056409254147, + "grad_norm": 23.02949333190918, + "learning_rate": 1.1550378140278245e-06, + "loss": 0.0396, + "num_input_tokens_seen": 69730536, + "step": 103440 + }, + { + "epoch": 2.527178560085994, + "grad_norm": 0.7969344854354858, + "learning_rate": 1.1549535669957072e-06, + "loss": 0.0008, + "num_input_tokens_seen": 69733928, + "step": 103445 + }, + { + "epoch": 2.5273007109178414, + "grad_norm": 0.06862567365169525, + "learning_rate": 1.1548693188367256e-06, + "loss": 0.0341, + "num_input_tokens_seen": 69737192, + "step": 103450 + }, + { + "epoch": 2.5274228617496886, + "grad_norm": 0.08049945533275604, + "learning_rate": 1.1547850695514929e-06, + "loss": 0.0003, + "num_input_tokens_seen": 69740200, + "step": 103455 + }, + { + "epoch": 2.5275450125815357, + "grad_norm": 0.0023474690970033407, + "learning_rate": 1.1547008191406213e-06, + "loss": 0.0002, + "num_input_tokens_seen": 69743720, + "step": 103460 + }, + { + "epoch": 2.527667163413383, + "grad_norm": 0.11849883198738098, + "learning_rate": 1.154616567604724e-06, + "loss": 0.0002, + "num_input_tokens_seen": 69746856, + "step": 103465 + }, + { + "epoch": 2.52778931424523, + "grad_norm": 0.06941009312868118, + "learning_rate": 1.1545323149444132e-06, + "loss": 0.0006, + "num_input_tokens_seen": 69750248, + "step": 103470 + }, + { + "epoch": 2.5279114650770773, + "grad_norm": 0.09503491222858429, + "learning_rate": 1.1544480611603021e-06, + "loss": 0.0527, + "num_input_tokens_seen": 69753576, + "step": 103475 + }, + { + "epoch": 2.5280336159089245, + "grad_norm": 0.011635013855993748, + "learning_rate": 1.154363806253003e-06, + "loss": 0.0353, + "num_input_tokens_seen": 69756712, + "step": 103480 + }, + { + "epoch": 2.5281557667407712, + "grad_norm": 37.80340576171875, + "learning_rate": 1.1542795502231289e-06, + "loss": 0.0446, + "num_input_tokens_seen": 69759720, + "step": 103485 + }, + { + "epoch": 2.528277917572619, + "grad_norm": 0.006175327580422163, + "learning_rate": 1.1541952930712919e-06, + "loss": 0.0001, + "num_input_tokens_seen": 69762920, + "step": 103490 + }, + { + "epoch": 2.5284000684044656, + "grad_norm": 40.25859832763672, + "learning_rate": 1.1541110347981059e-06, + "loss": 0.0397, + "num_input_tokens_seen": 69766376, + "step": 103495 + }, + { + "epoch": 2.5285222192363133, + "grad_norm": 0.016212737187743187, + "learning_rate": 1.1540267754041826e-06, + "loss": 0.0452, + "num_input_tokens_seen": 69770088, + "step": 103500 + }, + { + "epoch": 2.52864437006816, + "grad_norm": 0.30683091282844543, + "learning_rate": 1.1539425148901356e-06, + "loss": 0.0005, + "num_input_tokens_seen": 69773864, + "step": 103505 + }, + { + "epoch": 2.528766520900007, + "grad_norm": 0.03833984583616257, + "learning_rate": 1.1538582532565768e-06, + "loss": 0.0002, + "num_input_tokens_seen": 69777576, + "step": 103510 + }, + { + "epoch": 2.5288886717318544, + "grad_norm": 0.02461802400648594, + "learning_rate": 1.1537739905041197e-06, + "loss": 0.0003, + "num_input_tokens_seen": 69781288, + "step": 103515 + }, + { + "epoch": 2.5290108225637016, + "grad_norm": 0.0017495234496891499, + "learning_rate": 1.1536897266333766e-06, + "loss": 0.0001, + "num_input_tokens_seen": 69784488, + "step": 103520 + }, + { + "epoch": 2.5291329733955488, + "grad_norm": 44.30553436279297, + "learning_rate": 1.1536054616449602e-06, + "loss": 0.0615, + "num_input_tokens_seen": 69787752, + "step": 103525 + }, + { + "epoch": 2.529255124227396, + "grad_norm": 0.06662532687187195, + "learning_rate": 1.153521195539484e-06, + "loss": 0.0001, + "num_input_tokens_seen": 69790888, + "step": 103530 + }, + { + "epoch": 2.529377275059243, + "grad_norm": 25.166748046875, + "learning_rate": 1.1534369283175602e-06, + "loss": 0.059, + "num_input_tokens_seen": 69794408, + "step": 103535 + }, + { + "epoch": 2.5294994258910903, + "grad_norm": 42.99397659301758, + "learning_rate": 1.1533526599798017e-06, + "loss": 0.154, + "num_input_tokens_seen": 69797672, + "step": 103540 + }, + { + "epoch": 2.5296215767229375, + "grad_norm": 0.0017442662501707673, + "learning_rate": 1.1532683905268216e-06, + "loss": 0.0, + "num_input_tokens_seen": 69800744, + "step": 103545 + }, + { + "epoch": 2.5297437275547847, + "grad_norm": 0.02642848715186119, + "learning_rate": 1.1531841199592323e-06, + "loss": 0.0537, + "num_input_tokens_seen": 69804456, + "step": 103550 + }, + { + "epoch": 2.529865878386632, + "grad_norm": 0.06263232976198196, + "learning_rate": 1.1530998482776473e-06, + "loss": 0.0003, + "num_input_tokens_seen": 69807784, + "step": 103555 + }, + { + "epoch": 2.529988029218479, + "grad_norm": 6.440359115600586, + "learning_rate": 1.1530155754826788e-06, + "loss": 0.0007, + "num_input_tokens_seen": 69811496, + "step": 103560 + }, + { + "epoch": 2.5301101800503263, + "grad_norm": 0.02709343284368515, + "learning_rate": 1.1529313015749399e-06, + "loss": 0.0411, + "num_input_tokens_seen": 69814888, + "step": 103565 + }, + { + "epoch": 2.530232330882173, + "grad_norm": 0.17237377166748047, + "learning_rate": 1.1528470265550434e-06, + "loss": 0.0004, + "num_input_tokens_seen": 69817896, + "step": 103570 + }, + { + "epoch": 2.5303544817140207, + "grad_norm": 0.00297606666572392, + "learning_rate": 1.1527627504236022e-06, + "loss": 0.0003, + "num_input_tokens_seen": 69821416, + "step": 103575 + }, + { + "epoch": 2.5304766325458674, + "grad_norm": 1.024841070175171, + "learning_rate": 1.1526784731812292e-06, + "loss": 0.0481, + "num_input_tokens_seen": 69824808, + "step": 103580 + }, + { + "epoch": 2.530598783377715, + "grad_norm": 0.609325110912323, + "learning_rate": 1.1525941948285372e-06, + "loss": 0.0322, + "num_input_tokens_seen": 69827880, + "step": 103585 + }, + { + "epoch": 2.530720934209562, + "grad_norm": 0.01956762745976448, + "learning_rate": 1.1525099153661391e-06, + "loss": 0.0001, + "num_input_tokens_seen": 69831144, + "step": 103590 + }, + { + "epoch": 2.530843085041409, + "grad_norm": 0.004689326509833336, + "learning_rate": 1.1524256347946482e-06, + "loss": 0.0001, + "num_input_tokens_seen": 69834216, + "step": 103595 + }, + { + "epoch": 2.530965235873256, + "grad_norm": 0.010422090999782085, + "learning_rate": 1.1523413531146768e-06, + "loss": 0.0003, + "num_input_tokens_seen": 69837736, + "step": 103600 + }, + { + "epoch": 2.5310873867051034, + "grad_norm": 9.557653427124023, + "learning_rate": 1.1522570703268381e-06, + "loss": 0.2075, + "num_input_tokens_seen": 69841192, + "step": 103605 + }, + { + "epoch": 2.5312095375369505, + "grad_norm": 0.26799166202545166, + "learning_rate": 1.152172786431745e-06, + "loss": 0.0003, + "num_input_tokens_seen": 69844584, + "step": 103610 + }, + { + "epoch": 2.5313316883687977, + "grad_norm": 0.018254579976201057, + "learning_rate": 1.152088501430011e-06, + "loss": 0.0927, + "num_input_tokens_seen": 69847656, + "step": 103615 + }, + { + "epoch": 2.531453839200645, + "grad_norm": 0.004553716164082289, + "learning_rate": 1.152004215322248e-06, + "loss": 0.1143, + "num_input_tokens_seen": 69850920, + "step": 103620 + }, + { + "epoch": 2.531575990032492, + "grad_norm": 0.36942997574806213, + "learning_rate": 1.1519199281090697e-06, + "loss": 0.0559, + "num_input_tokens_seen": 69854248, + "step": 103625 + }, + { + "epoch": 2.5316981408643393, + "grad_norm": 0.31802067160606384, + "learning_rate": 1.1518356397910887e-06, + "loss": 0.0527, + "num_input_tokens_seen": 69857640, + "step": 103630 + }, + { + "epoch": 2.5318202916961865, + "grad_norm": 0.22344498336315155, + "learning_rate": 1.151751350368918e-06, + "loss": 0.0001, + "num_input_tokens_seen": 69861288, + "step": 103635 + }, + { + "epoch": 2.5319424425280337, + "grad_norm": 893.3682861328125, + "learning_rate": 1.1516670598431709e-06, + "loss": 0.076, + "num_input_tokens_seen": 69864744, + "step": 103640 + }, + { + "epoch": 2.532064593359881, + "grad_norm": 0.15178991854190826, + "learning_rate": 1.15158276821446e-06, + "loss": 0.0737, + "num_input_tokens_seen": 69867880, + "step": 103645 + }, + { + "epoch": 2.532186744191728, + "grad_norm": 0.035966865718364716, + "learning_rate": 1.1514984754833983e-06, + "loss": 0.0002, + "num_input_tokens_seen": 69871464, + "step": 103650 + }, + { + "epoch": 2.5323088950235753, + "grad_norm": 0.05065901577472687, + "learning_rate": 1.1514141816505992e-06, + "loss": 0.0004, + "num_input_tokens_seen": 69874600, + "step": 103655 + }, + { + "epoch": 2.5324310458554224, + "grad_norm": 0.1139889732003212, + "learning_rate": 1.1513298867166755e-06, + "loss": 0.0003, + "num_input_tokens_seen": 69877736, + "step": 103660 + }, + { + "epoch": 2.532553196687269, + "grad_norm": 0.07051640748977661, + "learning_rate": 1.1512455906822398e-06, + "loss": 0.0001, + "num_input_tokens_seen": 69881000, + "step": 103665 + }, + { + "epoch": 2.532675347519117, + "grad_norm": 27.080020904541016, + "learning_rate": 1.1511612935479058e-06, + "loss": 0.0609, + "num_input_tokens_seen": 69884456, + "step": 103670 + }, + { + "epoch": 2.5327974983509636, + "grad_norm": 0.03246402367949486, + "learning_rate": 1.1510769953142858e-06, + "loss": 0.0002, + "num_input_tokens_seen": 69887976, + "step": 103675 + }, + { + "epoch": 2.532919649182811, + "grad_norm": 0.007298397831618786, + "learning_rate": 1.1509926959819936e-06, + "loss": 0.0003, + "num_input_tokens_seen": 69891304, + "step": 103680 + }, + { + "epoch": 2.533041800014658, + "grad_norm": 0.011478755623102188, + "learning_rate": 1.1509083955516418e-06, + "loss": 0.0623, + "num_input_tokens_seen": 69894696, + "step": 103685 + }, + { + "epoch": 2.533163950846505, + "grad_norm": 0.10327120125293732, + "learning_rate": 1.1508240940238438e-06, + "loss": 0.0379, + "num_input_tokens_seen": 69897960, + "step": 103690 + }, + { + "epoch": 2.5332861016783523, + "grad_norm": 0.003992895130068064, + "learning_rate": 1.150739791399212e-06, + "loss": 0.0005, + "num_input_tokens_seen": 69901352, + "step": 103695 + }, + { + "epoch": 2.5334082525101995, + "grad_norm": 0.0049509950913488865, + "learning_rate": 1.1506554876783604e-06, + "loss": 0.0708, + "num_input_tokens_seen": 69904744, + "step": 103700 + }, + { + "epoch": 2.5335304033420467, + "grad_norm": 0.002750345505774021, + "learning_rate": 1.1505711828619008e-06, + "loss": 0.0433, + "num_input_tokens_seen": 69908328, + "step": 103705 + }, + { + "epoch": 2.533652554173894, + "grad_norm": 0.014179835096001625, + "learning_rate": 1.150486876950448e-06, + "loss": 0.0002, + "num_input_tokens_seen": 69911848, + "step": 103710 + }, + { + "epoch": 2.533774705005741, + "grad_norm": 0.02460402064025402, + "learning_rate": 1.1504025699446136e-06, + "loss": 0.0379, + "num_input_tokens_seen": 69914984, + "step": 103715 + }, + { + "epoch": 2.5338968558375883, + "grad_norm": 0.08573606610298157, + "learning_rate": 1.1503182618450114e-06, + "loss": 0.04, + "num_input_tokens_seen": 69918440, + "step": 103720 + }, + { + "epoch": 2.5340190066694355, + "grad_norm": 32.343074798583984, + "learning_rate": 1.1502339526522545e-06, + "loss": 0.103, + "num_input_tokens_seen": 69921448, + "step": 103725 + }, + { + "epoch": 2.5341411575012827, + "grad_norm": 146.36965942382812, + "learning_rate": 1.1501496423669557e-06, + "loss": 0.1077, + "num_input_tokens_seen": 69924776, + "step": 103730 + }, + { + "epoch": 2.53426330833313, + "grad_norm": 0.018751777708530426, + "learning_rate": 1.1500653309897282e-06, + "loss": 0.0513, + "num_input_tokens_seen": 69927912, + "step": 103735 + }, + { + "epoch": 2.534385459164977, + "grad_norm": 10.86872673034668, + "learning_rate": 1.1499810185211853e-06, + "loss": 0.1221, + "num_input_tokens_seen": 69931112, + "step": 103740 + }, + { + "epoch": 2.5345076099968242, + "grad_norm": 0.5235853791236877, + "learning_rate": 1.14989670496194e-06, + "loss": 0.0003, + "num_input_tokens_seen": 69934760, + "step": 103745 + }, + { + "epoch": 2.534629760828671, + "grad_norm": 0.023617833852767944, + "learning_rate": 1.149812390312606e-06, + "loss": 0.0005, + "num_input_tokens_seen": 69938472, + "step": 103750 + }, + { + "epoch": 2.5347519116605186, + "grad_norm": 0.2028709203004837, + "learning_rate": 1.1497280745737955e-06, + "loss": 0.0005, + "num_input_tokens_seen": 69941544, + "step": 103755 + }, + { + "epoch": 2.5348740624923654, + "grad_norm": 0.06458128988742828, + "learning_rate": 1.1496437577461227e-06, + "loss": 0.0577, + "num_input_tokens_seen": 69944744, + "step": 103760 + }, + { + "epoch": 2.534996213324213, + "grad_norm": 0.22427748143672943, + "learning_rate": 1.1495594398301998e-06, + "loss": 0.0896, + "num_input_tokens_seen": 69948008, + "step": 103765 + }, + { + "epoch": 2.5351183641560597, + "grad_norm": 0.015495997853577137, + "learning_rate": 1.1494751208266408e-06, + "loss": 0.0024, + "num_input_tokens_seen": 69951144, + "step": 103770 + }, + { + "epoch": 2.535240514987907, + "grad_norm": 0.03996007516980171, + "learning_rate": 1.1493908007360581e-06, + "loss": 0.0001, + "num_input_tokens_seen": 69955304, + "step": 103775 + }, + { + "epoch": 2.535362665819754, + "grad_norm": 0.00529811205342412, + "learning_rate": 1.1493064795590655e-06, + "loss": 0.0611, + "num_input_tokens_seen": 69958696, + "step": 103780 + }, + { + "epoch": 2.5354848166516013, + "grad_norm": 0.026105934754014015, + "learning_rate": 1.1492221572962762e-06, + "loss": 0.0002, + "num_input_tokens_seen": 69962024, + "step": 103785 + }, + { + "epoch": 2.5356069674834485, + "grad_norm": 0.013941958546638489, + "learning_rate": 1.1491378339483028e-06, + "loss": 0.1421, + "num_input_tokens_seen": 69965480, + "step": 103790 + }, + { + "epoch": 2.5357291183152957, + "grad_norm": 0.01809925213456154, + "learning_rate": 1.1490535095157594e-06, + "loss": 0.0547, + "num_input_tokens_seen": 69968808, + "step": 103795 + }, + { + "epoch": 2.535851269147143, + "grad_norm": 0.8025308847427368, + "learning_rate": 1.1489691839992584e-06, + "loss": 0.1093, + "num_input_tokens_seen": 69972072, + "step": 103800 + }, + { + "epoch": 2.53597341997899, + "grad_norm": 0.03385000675916672, + "learning_rate": 1.1488848573994137e-06, + "loss": 0.0117, + "num_input_tokens_seen": 69975592, + "step": 103805 + }, + { + "epoch": 2.5360955708108373, + "grad_norm": 0.010065199807286263, + "learning_rate": 1.148800529716838e-06, + "loss": 0.0939, + "num_input_tokens_seen": 69978792, + "step": 103810 + }, + { + "epoch": 2.5362177216426844, + "grad_norm": 0.12215670198202133, + "learning_rate": 1.1487162009521453e-06, + "loss": 0.0549, + "num_input_tokens_seen": 69983080, + "step": 103815 + }, + { + "epoch": 2.5363398724745316, + "grad_norm": 0.012825618498027325, + "learning_rate": 1.1486318711059481e-06, + "loss": 0.0003, + "num_input_tokens_seen": 69986408, + "step": 103820 + }, + { + "epoch": 2.536462023306379, + "grad_norm": 0.26266250014305115, + "learning_rate": 1.14854754017886e-06, + "loss": 0.0032, + "num_input_tokens_seen": 69989672, + "step": 103825 + }, + { + "epoch": 2.536584174138226, + "grad_norm": 22.808828353881836, + "learning_rate": 1.1484632081714941e-06, + "loss": 0.0553, + "num_input_tokens_seen": 69992872, + "step": 103830 + }, + { + "epoch": 2.536706324970073, + "grad_norm": 0.2519030272960663, + "learning_rate": 1.148378875084464e-06, + "loss": 0.0003, + "num_input_tokens_seen": 69996328, + "step": 103835 + }, + { + "epoch": 2.5368284758019204, + "grad_norm": 0.03134218975901604, + "learning_rate": 1.1482945409183825e-06, + "loss": 0.0003, + "num_input_tokens_seen": 69999400, + "step": 103840 + }, + { + "epoch": 2.536950626633767, + "grad_norm": 583.1251220703125, + "learning_rate": 1.1482102056738636e-06, + "loss": 0.0552, + "num_input_tokens_seen": 70002728, + "step": 103845 + }, + { + "epoch": 2.5370727774656148, + "grad_norm": 72.49507141113281, + "learning_rate": 1.1481258693515202e-06, + "loss": 0.0368, + "num_input_tokens_seen": 70006824, + "step": 103850 + }, + { + "epoch": 2.5371949282974615, + "grad_norm": 0.02389582060277462, + "learning_rate": 1.1480415319519653e-06, + "loss": 0.0001, + "num_input_tokens_seen": 70010792, + "step": 103855 + }, + { + "epoch": 2.5373170791293087, + "grad_norm": 0.028493667021393776, + "learning_rate": 1.1479571934758128e-06, + "loss": 0.0492, + "num_input_tokens_seen": 70014120, + "step": 103860 + }, + { + "epoch": 2.537439229961156, + "grad_norm": 0.18289715051651, + "learning_rate": 1.147872853923676e-06, + "loss": 0.001, + "num_input_tokens_seen": 70017192, + "step": 103865 + }, + { + "epoch": 2.537561380793003, + "grad_norm": 0.030623802915215492, + "learning_rate": 1.1477885132961678e-06, + "loss": 0.1266, + "num_input_tokens_seen": 70021288, + "step": 103870 + }, + { + "epoch": 2.5376835316248503, + "grad_norm": 0.007544918451458216, + "learning_rate": 1.1477041715939018e-06, + "loss": 0.0002, + "num_input_tokens_seen": 70024616, + "step": 103875 + }, + { + "epoch": 2.5378056824566975, + "grad_norm": 0.006404994986951351, + "learning_rate": 1.1476198288174912e-06, + "loss": 0.0003, + "num_input_tokens_seen": 70027752, + "step": 103880 + }, + { + "epoch": 2.5379278332885447, + "grad_norm": 0.03089703619480133, + "learning_rate": 1.1475354849675496e-06, + "loss": 0.0002, + "num_input_tokens_seen": 70031080, + "step": 103885 + }, + { + "epoch": 2.538049984120392, + "grad_norm": 18.35439682006836, + "learning_rate": 1.1474511400446903e-06, + "loss": 0.1082, + "num_input_tokens_seen": 70034472, + "step": 103890 + }, + { + "epoch": 2.538172134952239, + "grad_norm": 0.25763773918151855, + "learning_rate": 1.1473667940495265e-06, + "loss": 0.0636, + "num_input_tokens_seen": 70037800, + "step": 103895 + }, + { + "epoch": 2.5382942857840862, + "grad_norm": 0.5698397159576416, + "learning_rate": 1.1472824469826718e-06, + "loss": 0.0506, + "num_input_tokens_seen": 70041448, + "step": 103900 + }, + { + "epoch": 2.5384164366159334, + "grad_norm": 0.06392552703619003, + "learning_rate": 1.1471980988447397e-06, + "loss": 0.0272, + "num_input_tokens_seen": 70044712, + "step": 103905 + }, + { + "epoch": 2.5385385874477806, + "grad_norm": 0.008248881436884403, + "learning_rate": 1.1471137496363435e-06, + "loss": 0.0003, + "num_input_tokens_seen": 70047848, + "step": 103910 + }, + { + "epoch": 2.538660738279628, + "grad_norm": 0.022066285833716393, + "learning_rate": 1.1470293993580961e-06, + "loss": 0.0003, + "num_input_tokens_seen": 70050856, + "step": 103915 + }, + { + "epoch": 2.538782889111475, + "grad_norm": 0.03940925747156143, + "learning_rate": 1.1469450480106118e-06, + "loss": 0.1717, + "num_input_tokens_seen": 70054056, + "step": 103920 + }, + { + "epoch": 2.538905039943322, + "grad_norm": 0.11242895573377609, + "learning_rate": 1.1468606955945034e-06, + "loss": 0.0005, + "num_input_tokens_seen": 70057384, + "step": 103925 + }, + { + "epoch": 2.539027190775169, + "grad_norm": 0.13371014595031738, + "learning_rate": 1.1467763421103846e-06, + "loss": 0.0002, + "num_input_tokens_seen": 70060328, + "step": 103930 + }, + { + "epoch": 2.5391493416070166, + "grad_norm": 0.04523512348532677, + "learning_rate": 1.1466919875588688e-06, + "loss": 0.0002, + "num_input_tokens_seen": 70063528, + "step": 103935 + }, + { + "epoch": 2.5392714924388633, + "grad_norm": 0.050599198788404465, + "learning_rate": 1.1466076319405693e-06, + "loss": 0.0429, + "num_input_tokens_seen": 70066856, + "step": 103940 + }, + { + "epoch": 2.539393643270711, + "grad_norm": 0.002950431313365698, + "learning_rate": 1.1465232752560996e-06, + "loss": 0.0001, + "num_input_tokens_seen": 70070440, + "step": 103945 + }, + { + "epoch": 2.5395157941025577, + "grad_norm": 0.1107611358165741, + "learning_rate": 1.1464389175060734e-06, + "loss": 0.0763, + "num_input_tokens_seen": 70073640, + "step": 103950 + }, + { + "epoch": 2.539637944934405, + "grad_norm": 64.87561798095703, + "learning_rate": 1.1463545586911036e-06, + "loss": 0.061, + "num_input_tokens_seen": 70077224, + "step": 103955 + }, + { + "epoch": 2.539760095766252, + "grad_norm": 157.2279815673828, + "learning_rate": 1.1462701988118047e-06, + "loss": 0.0204, + "num_input_tokens_seen": 70080872, + "step": 103960 + }, + { + "epoch": 2.5398822465980992, + "grad_norm": 0.033033862709999084, + "learning_rate": 1.146185837868789e-06, + "loss": 0.001, + "num_input_tokens_seen": 70083816, + "step": 103965 + }, + { + "epoch": 2.5400043974299464, + "grad_norm": 0.032412346452474594, + "learning_rate": 1.1461014758626712e-06, + "loss": 0.0478, + "num_input_tokens_seen": 70087080, + "step": 103970 + }, + { + "epoch": 2.5401265482617936, + "grad_norm": 1.0125641822814941, + "learning_rate": 1.146017112794064e-06, + "loss": 0.0004, + "num_input_tokens_seen": 70090344, + "step": 103975 + }, + { + "epoch": 2.540248699093641, + "grad_norm": 0.058324720710515976, + "learning_rate": 1.1459327486635808e-06, + "loss": 0.0001, + "num_input_tokens_seen": 70093480, + "step": 103980 + }, + { + "epoch": 2.540370849925488, + "grad_norm": 0.5170395970344543, + "learning_rate": 1.1458483834718352e-06, + "loss": 0.0005, + "num_input_tokens_seen": 70096552, + "step": 103985 + }, + { + "epoch": 2.540493000757335, + "grad_norm": 16.676877975463867, + "learning_rate": 1.1457640172194414e-06, + "loss": 0.0779, + "num_input_tokens_seen": 70100072, + "step": 103990 + }, + { + "epoch": 2.5406151515891824, + "grad_norm": 0.2772276997566223, + "learning_rate": 1.1456796499070123e-06, + "loss": 0.0007, + "num_input_tokens_seen": 70103208, + "step": 103995 + }, + { + "epoch": 2.5407373024210296, + "grad_norm": 0.2213776856660843, + "learning_rate": 1.1455952815351616e-06, + "loss": 0.0004, + "num_input_tokens_seen": 70106984, + "step": 104000 + }, + { + "epoch": 2.5408594532528768, + "grad_norm": 0.09152863919734955, + "learning_rate": 1.1455109121045028e-06, + "loss": 0.0016, + "num_input_tokens_seen": 70110056, + "step": 104005 + }, + { + "epoch": 2.540981604084724, + "grad_norm": 0.07251527905464172, + "learning_rate": 1.1454265416156497e-06, + "loss": 0.0786, + "num_input_tokens_seen": 70113256, + "step": 104010 + }, + { + "epoch": 2.541103754916571, + "grad_norm": 0.010876025073230267, + "learning_rate": 1.1453421700692152e-06, + "loss": 0.1102, + "num_input_tokens_seen": 70116584, + "step": 104015 + }, + { + "epoch": 2.5412259057484183, + "grad_norm": 0.6156110763549805, + "learning_rate": 1.1452577974658139e-06, + "loss": 0.0002, + "num_input_tokens_seen": 70120296, + "step": 104020 + }, + { + "epoch": 2.541348056580265, + "grad_norm": 0.0007826112559996545, + "learning_rate": 1.1451734238060587e-06, + "loss": 0.0001, + "num_input_tokens_seen": 70124008, + "step": 104025 + }, + { + "epoch": 2.5414702074121127, + "grad_norm": 51.516597747802734, + "learning_rate": 1.145089049090563e-06, + "loss": 0.0318, + "num_input_tokens_seen": 70127336, + "step": 104030 + }, + { + "epoch": 2.5415923582439595, + "grad_norm": 0.01903372071683407, + "learning_rate": 1.145004673319941e-06, + "loss": 0.0001, + "num_input_tokens_seen": 70130216, + "step": 104035 + }, + { + "epoch": 2.5417145090758066, + "grad_norm": 0.014983262866735458, + "learning_rate": 1.144920296494806e-06, + "loss": 0.08, + "num_input_tokens_seen": 70133032, + "step": 104040 + }, + { + "epoch": 2.541836659907654, + "grad_norm": 58.6263542175293, + "learning_rate": 1.1448359186157714e-06, + "loss": 0.1615, + "num_input_tokens_seen": 70136168, + "step": 104045 + }, + { + "epoch": 2.541958810739501, + "grad_norm": 0.15867555141448975, + "learning_rate": 1.1447515396834513e-06, + "loss": 0.0461, + "num_input_tokens_seen": 70139112, + "step": 104050 + }, + { + "epoch": 2.542080961571348, + "grad_norm": 38.69710922241211, + "learning_rate": 1.144667159698459e-06, + "loss": 0.0632, + "num_input_tokens_seen": 70142056, + "step": 104055 + }, + { + "epoch": 2.5422031124031954, + "grad_norm": 0.004569328855723143, + "learning_rate": 1.1445827786614082e-06, + "loss": 0.0895, + "num_input_tokens_seen": 70145256, + "step": 104060 + }, + { + "epoch": 2.5423252632350426, + "grad_norm": 0.27281489968299866, + "learning_rate": 1.1444983965729125e-06, + "loss": 0.0009, + "num_input_tokens_seen": 70148584, + "step": 104065 + }, + { + "epoch": 2.54244741406689, + "grad_norm": 28.797000885009766, + "learning_rate": 1.1444140134335855e-06, + "loss": 0.0608, + "num_input_tokens_seen": 70151720, + "step": 104070 + }, + { + "epoch": 2.542569564898737, + "grad_norm": 0.031929127871990204, + "learning_rate": 1.1443296292440412e-06, + "loss": 0.0352, + "num_input_tokens_seen": 70155432, + "step": 104075 + }, + { + "epoch": 2.542691715730584, + "grad_norm": 20.4262752532959, + "learning_rate": 1.1442452440048929e-06, + "loss": 0.0566, + "num_input_tokens_seen": 70158440, + "step": 104080 + }, + { + "epoch": 2.5428138665624314, + "grad_norm": 0.33279237151145935, + "learning_rate": 1.1441608577167544e-06, + "loss": 0.0275, + "num_input_tokens_seen": 70161960, + "step": 104085 + }, + { + "epoch": 2.5429360173942785, + "grad_norm": 0.028363827615976334, + "learning_rate": 1.1440764703802394e-06, + "loss": 0.0454, + "num_input_tokens_seen": 70165160, + "step": 104090 + }, + { + "epoch": 2.5430581682261257, + "grad_norm": 1.5578937530517578, + "learning_rate": 1.1439920819959614e-06, + "loss": 0.0004, + "num_input_tokens_seen": 70168360, + "step": 104095 + }, + { + "epoch": 2.543180319057973, + "grad_norm": 0.04033007472753525, + "learning_rate": 1.1439076925645347e-06, + "loss": 0.0001, + "num_input_tokens_seen": 70171368, + "step": 104100 + }, + { + "epoch": 2.54330246988982, + "grad_norm": 0.014733080752193928, + "learning_rate": 1.143823302086572e-06, + "loss": 0.0002, + "num_input_tokens_seen": 70174760, + "step": 104105 + }, + { + "epoch": 2.543424620721667, + "grad_norm": 0.0995849072933197, + "learning_rate": 1.1437389105626877e-06, + "loss": 0.1443, + "num_input_tokens_seen": 70178088, + "step": 104110 + }, + { + "epoch": 2.5435467715535145, + "grad_norm": 0.01298166811466217, + "learning_rate": 1.1436545179934953e-06, + "loss": 0.0003, + "num_input_tokens_seen": 70181544, + "step": 104115 + }, + { + "epoch": 2.5436689223853612, + "grad_norm": 0.003690902143716812, + "learning_rate": 1.1435701243796088e-06, + "loss": 0.0225, + "num_input_tokens_seen": 70185320, + "step": 104120 + }, + { + "epoch": 2.543791073217209, + "grad_norm": 0.007218752522021532, + "learning_rate": 1.1434857297216417e-06, + "loss": 0.0005, + "num_input_tokens_seen": 70188712, + "step": 104125 + }, + { + "epoch": 2.5439132240490556, + "grad_norm": 0.010767634958028793, + "learning_rate": 1.143401334020208e-06, + "loss": 0.0001, + "num_input_tokens_seen": 70191784, + "step": 104130 + }, + { + "epoch": 2.544035374880903, + "grad_norm": 0.02760545164346695, + "learning_rate": 1.143316937275921e-06, + "loss": 0.0745, + "num_input_tokens_seen": 70195176, + "step": 104135 + }, + { + "epoch": 2.54415752571275, + "grad_norm": 0.013613578863441944, + "learning_rate": 1.1432325394893946e-06, + "loss": 0.0892, + "num_input_tokens_seen": 70198376, + "step": 104140 + }, + { + "epoch": 2.544279676544597, + "grad_norm": 0.013177074491977692, + "learning_rate": 1.1431481406612427e-06, + "loss": 0.0002, + "num_input_tokens_seen": 70201640, + "step": 104145 + }, + { + "epoch": 2.5444018273764444, + "grad_norm": 0.01608988456428051, + "learning_rate": 1.143063740792079e-06, + "loss": 0.0331, + "num_input_tokens_seen": 70204776, + "step": 104150 + }, + { + "epoch": 2.5445239782082916, + "grad_norm": 0.15606385469436646, + "learning_rate": 1.1429793398825173e-06, + "loss": 0.0663, + "num_input_tokens_seen": 70208040, + "step": 104155 + }, + { + "epoch": 2.5446461290401388, + "grad_norm": 0.22624799609184265, + "learning_rate": 1.1428949379331716e-06, + "loss": 0.1334, + "num_input_tokens_seen": 70211240, + "step": 104160 + }, + { + "epoch": 2.544768279871986, + "grad_norm": 0.007654536981135607, + "learning_rate": 1.1428105349446554e-06, + "loss": 0.0003, + "num_input_tokens_seen": 70214824, + "step": 104165 + }, + { + "epoch": 2.544890430703833, + "grad_norm": 0.17889165878295898, + "learning_rate": 1.1427261309175821e-06, + "loss": 0.0404, + "num_input_tokens_seen": 70217832, + "step": 104170 + }, + { + "epoch": 2.5450125815356803, + "grad_norm": 1.8349424600601196, + "learning_rate": 1.1426417258525667e-06, + "loss": 0.001, + "num_input_tokens_seen": 70221672, + "step": 104175 + }, + { + "epoch": 2.5451347323675275, + "grad_norm": 133.47740173339844, + "learning_rate": 1.1425573197502221e-06, + "loss": 0.0196, + "num_input_tokens_seen": 70224616, + "step": 104180 + }, + { + "epoch": 2.5452568831993747, + "grad_norm": 0.011612365022301674, + "learning_rate": 1.1424729126111623e-06, + "loss": 0.0895, + "num_input_tokens_seen": 70227944, + "step": 104185 + }, + { + "epoch": 2.545379034031222, + "grad_norm": 0.033770378679037094, + "learning_rate": 1.142388504436001e-06, + "loss": 0.0384, + "num_input_tokens_seen": 70231400, + "step": 104190 + }, + { + "epoch": 2.5455011848630686, + "grad_norm": 20.022085189819336, + "learning_rate": 1.1423040952253523e-06, + "loss": 0.0854, + "num_input_tokens_seen": 70235048, + "step": 104195 + }, + { + "epoch": 2.5456233356949163, + "grad_norm": 0.1359310746192932, + "learning_rate": 1.14221968497983e-06, + "loss": 0.0004, + "num_input_tokens_seen": 70238056, + "step": 104200 + }, + { + "epoch": 2.545745486526763, + "grad_norm": 0.12865757942199707, + "learning_rate": 1.1421352737000475e-06, + "loss": 0.0048, + "num_input_tokens_seen": 70241256, + "step": 104205 + }, + { + "epoch": 2.5458676373586107, + "grad_norm": 0.13368584215641022, + "learning_rate": 1.1420508613866193e-06, + "loss": 0.0939, + "num_input_tokens_seen": 70244840, + "step": 104210 + }, + { + "epoch": 2.5459897881904574, + "grad_norm": 0.026051154360175133, + "learning_rate": 1.1419664480401592e-06, + "loss": 0.0282, + "num_input_tokens_seen": 70248040, + "step": 104215 + }, + { + "epoch": 2.5461119390223046, + "grad_norm": 0.04423855245113373, + "learning_rate": 1.141882033661281e-06, + "loss": 0.0002, + "num_input_tokens_seen": 70251560, + "step": 104220 + }, + { + "epoch": 2.546234089854152, + "grad_norm": 0.0026223089080303907, + "learning_rate": 1.1417976182505981e-06, + "loss": 0.0384, + "num_input_tokens_seen": 70254696, + "step": 104225 + }, + { + "epoch": 2.546356240685999, + "grad_norm": 0.009092241525650024, + "learning_rate": 1.141713201808725e-06, + "loss": 0.0489, + "num_input_tokens_seen": 70258408, + "step": 104230 + }, + { + "epoch": 2.546478391517846, + "grad_norm": 0.261366605758667, + "learning_rate": 1.1416287843362753e-06, + "loss": 0.0386, + "num_input_tokens_seen": 70261544, + "step": 104235 + }, + { + "epoch": 2.5466005423496934, + "grad_norm": 0.008134927600622177, + "learning_rate": 1.1415443658338632e-06, + "loss": 0.049, + "num_input_tokens_seen": 70265512, + "step": 104240 + }, + { + "epoch": 2.5467226931815405, + "grad_norm": 0.005791224539279938, + "learning_rate": 1.141459946302102e-06, + "loss": 0.0002, + "num_input_tokens_seen": 70269032, + "step": 104245 + }, + { + "epoch": 2.5468448440133877, + "grad_norm": 0.024592120200395584, + "learning_rate": 1.1413755257416064e-06, + "loss": 0.0005, + "num_input_tokens_seen": 70272552, + "step": 104250 + }, + { + "epoch": 2.546966994845235, + "grad_norm": 779.8514404296875, + "learning_rate": 1.14129110415299e-06, + "loss": 0.0149, + "num_input_tokens_seen": 70276264, + "step": 104255 + }, + { + "epoch": 2.547089145677082, + "grad_norm": 0.012323008850216866, + "learning_rate": 1.1412066815368664e-06, + "loss": 0.0001, + "num_input_tokens_seen": 70280232, + "step": 104260 + }, + { + "epoch": 2.5472112965089293, + "grad_norm": 10.281240463256836, + "learning_rate": 1.1411222578938496e-06, + "loss": 0.0857, + "num_input_tokens_seen": 70283944, + "step": 104265 + }, + { + "epoch": 2.5473334473407765, + "grad_norm": 0.0013304692693054676, + "learning_rate": 1.1410378332245542e-06, + "loss": 0.0043, + "num_input_tokens_seen": 70286888, + "step": 104270 + }, + { + "epoch": 2.5474555981726237, + "grad_norm": 0.023484796285629272, + "learning_rate": 1.1409534075295938e-06, + "loss": 0.0002, + "num_input_tokens_seen": 70289960, + "step": 104275 + }, + { + "epoch": 2.547577749004471, + "grad_norm": 0.007967732846736908, + "learning_rate": 1.140868980809582e-06, + "loss": 0.0002, + "num_input_tokens_seen": 70293480, + "step": 104280 + }, + { + "epoch": 2.547699899836318, + "grad_norm": 13.040853500366211, + "learning_rate": 1.1407845530651336e-06, + "loss": 0.0424, + "num_input_tokens_seen": 70297064, + "step": 104285 + }, + { + "epoch": 2.547822050668165, + "grad_norm": 0.01658696122467518, + "learning_rate": 1.140700124296862e-06, + "loss": 0.0003, + "num_input_tokens_seen": 70300584, + "step": 104290 + }, + { + "epoch": 2.5479442015000124, + "grad_norm": 0.016342423856258392, + "learning_rate": 1.1406156945053806e-06, + "loss": 0.0005, + "num_input_tokens_seen": 70303848, + "step": 104295 + }, + { + "epoch": 2.548066352331859, + "grad_norm": 226.27023315429688, + "learning_rate": 1.1405312636913044e-06, + "loss": 0.0167, + "num_input_tokens_seen": 70307304, + "step": 104300 + }, + { + "epoch": 2.548188503163707, + "grad_norm": 14.65396499633789, + "learning_rate": 1.1404468318552471e-06, + "loss": 0.0823, + "num_input_tokens_seen": 70310888, + "step": 104305 + }, + { + "epoch": 2.5483106539955536, + "grad_norm": 0.19434776902198792, + "learning_rate": 1.140362398997823e-06, + "loss": 0.0244, + "num_input_tokens_seen": 70314664, + "step": 104310 + }, + { + "epoch": 2.5484328048274008, + "grad_norm": 0.00956706702709198, + "learning_rate": 1.1402779651196452e-06, + "loss": 0.0003, + "num_input_tokens_seen": 70318120, + "step": 104315 + }, + { + "epoch": 2.548554955659248, + "grad_norm": 0.6326279044151306, + "learning_rate": 1.1401935302213286e-06, + "loss": 0.0411, + "num_input_tokens_seen": 70321960, + "step": 104320 + }, + { + "epoch": 2.548677106491095, + "grad_norm": 0.01305704191327095, + "learning_rate": 1.1401090943034865e-06, + "loss": 0.0766, + "num_input_tokens_seen": 70325288, + "step": 104325 + }, + { + "epoch": 2.5487992573229423, + "grad_norm": 9.197270393371582, + "learning_rate": 1.140024657366734e-06, + "loss": 0.0956, + "num_input_tokens_seen": 70329000, + "step": 104330 + }, + { + "epoch": 2.5489214081547895, + "grad_norm": 23.622716903686523, + "learning_rate": 1.1399402194116842e-06, + "loss": 0.0524, + "num_input_tokens_seen": 70332904, + "step": 104335 + }, + { + "epoch": 2.5490435589866367, + "grad_norm": 0.02332184463739395, + "learning_rate": 1.1398557804389517e-06, + "loss": 0.0008, + "num_input_tokens_seen": 70335912, + "step": 104340 + }, + { + "epoch": 2.549165709818484, + "grad_norm": 0.01457999274134636, + "learning_rate": 1.1397713404491503e-06, + "loss": 0.0751, + "num_input_tokens_seen": 70339624, + "step": 104345 + }, + { + "epoch": 2.549287860650331, + "grad_norm": 0.06870204210281372, + "learning_rate": 1.139686899442894e-06, + "loss": 0.0365, + "num_input_tokens_seen": 70342568, + "step": 104350 + }, + { + "epoch": 2.5494100114821783, + "grad_norm": 0.521581768989563, + "learning_rate": 1.139602457420797e-06, + "loss": 0.0006, + "num_input_tokens_seen": 70345896, + "step": 104355 + }, + { + "epoch": 2.5495321623140255, + "grad_norm": 0.026709794998168945, + "learning_rate": 1.1395180143834734e-06, + "loss": 0.0002, + "num_input_tokens_seen": 70349096, + "step": 104360 + }, + { + "epoch": 2.5496543131458727, + "grad_norm": 0.03073815070092678, + "learning_rate": 1.139433570331537e-06, + "loss": 0.0394, + "num_input_tokens_seen": 70352616, + "step": 104365 + }, + { + "epoch": 2.54977646397772, + "grad_norm": 0.01742694526910782, + "learning_rate": 1.1393491252656025e-06, + "loss": 0.0001, + "num_input_tokens_seen": 70355944, + "step": 104370 + }, + { + "epoch": 2.5498986148095666, + "grad_norm": 38.13505935668945, + "learning_rate": 1.1392646791862836e-06, + "loss": 0.0468, + "num_input_tokens_seen": 70359848, + "step": 104375 + }, + { + "epoch": 2.5500207656414142, + "grad_norm": 0.018928751349449158, + "learning_rate": 1.1391802320941946e-06, + "loss": 0.0005, + "num_input_tokens_seen": 70362984, + "step": 104380 + }, + { + "epoch": 2.550142916473261, + "grad_norm": 0.2556987404823303, + "learning_rate": 1.1390957839899495e-06, + "loss": 0.1049, + "num_input_tokens_seen": 70366184, + "step": 104385 + }, + { + "epoch": 2.5502650673051086, + "grad_norm": 0.038296084851026535, + "learning_rate": 1.1390113348741624e-06, + "loss": 0.0002, + "num_input_tokens_seen": 70369192, + "step": 104390 + }, + { + "epoch": 2.5503872181369553, + "grad_norm": 0.7614354491233826, + "learning_rate": 1.1389268847474476e-06, + "loss": 0.0037, + "num_input_tokens_seen": 70372328, + "step": 104395 + }, + { + "epoch": 2.5505093689688025, + "grad_norm": 1.100406527519226, + "learning_rate": 1.138842433610419e-06, + "loss": 0.0006, + "num_input_tokens_seen": 70375400, + "step": 104400 + }, + { + "epoch": 2.5506315198006497, + "grad_norm": 0.14379285275936127, + "learning_rate": 1.1387579814636908e-06, + "loss": 0.1083, + "num_input_tokens_seen": 70378600, + "step": 104405 + }, + { + "epoch": 2.550753670632497, + "grad_norm": 14.693830490112305, + "learning_rate": 1.1386735283078775e-06, + "loss": 0.0582, + "num_input_tokens_seen": 70381608, + "step": 104410 + }, + { + "epoch": 2.550875821464344, + "grad_norm": 0.05920464172959328, + "learning_rate": 1.1385890741435926e-06, + "loss": 0.0683, + "num_input_tokens_seen": 70384552, + "step": 104415 + }, + { + "epoch": 2.5509979722961913, + "grad_norm": 32.26267623901367, + "learning_rate": 1.1385046189714509e-06, + "loss": 0.0298, + "num_input_tokens_seen": 70388072, + "step": 104420 + }, + { + "epoch": 2.5511201231280385, + "grad_norm": 0.010261290706694126, + "learning_rate": 1.1384201627920663e-06, + "loss": 0.0002, + "num_input_tokens_seen": 70391784, + "step": 104425 + }, + { + "epoch": 2.5512422739598857, + "grad_norm": 20.50096893310547, + "learning_rate": 1.1383357056060531e-06, + "loss": 0.0701, + "num_input_tokens_seen": 70395880, + "step": 104430 + }, + { + "epoch": 2.551364424791733, + "grad_norm": 11.664816856384277, + "learning_rate": 1.1382512474140255e-06, + "loss": 0.1085, + "num_input_tokens_seen": 70399272, + "step": 104435 + }, + { + "epoch": 2.55148657562358, + "grad_norm": 78.14788055419922, + "learning_rate": 1.1381667882165977e-06, + "loss": 0.0453, + "num_input_tokens_seen": 70402472, + "step": 104440 + }, + { + "epoch": 2.5516087264554272, + "grad_norm": 0.008329739794135094, + "learning_rate": 1.138082328014384e-06, + "loss": 0.0013, + "num_input_tokens_seen": 70405288, + "step": 104445 + }, + { + "epoch": 2.5517308772872744, + "grad_norm": 0.9223102331161499, + "learning_rate": 1.137997866807998e-06, + "loss": 0.0005, + "num_input_tokens_seen": 70408488, + "step": 104450 + }, + { + "epoch": 2.5518530281191216, + "grad_norm": 0.051211223006248474, + "learning_rate": 1.1379134045980545e-06, + "loss": 0.0669, + "num_input_tokens_seen": 70412072, + "step": 104455 + }, + { + "epoch": 2.551975178950969, + "grad_norm": 0.23331056535243988, + "learning_rate": 1.1378289413851677e-06, + "loss": 0.0398, + "num_input_tokens_seen": 70415272, + "step": 104460 + }, + { + "epoch": 2.552097329782816, + "grad_norm": 27.745243072509766, + "learning_rate": 1.1377444771699519e-06, + "loss": 0.0236, + "num_input_tokens_seen": 70418856, + "step": 104465 + }, + { + "epoch": 2.5522194806146627, + "grad_norm": 4.048886775970459, + "learning_rate": 1.1376600119530211e-06, + "loss": 0.058, + "num_input_tokens_seen": 70421992, + "step": 104470 + }, + { + "epoch": 2.5523416314465104, + "grad_norm": 0.04298697039484978, + "learning_rate": 1.1375755457349896e-06, + "loss": 0.0004, + "num_input_tokens_seen": 70425320, + "step": 104475 + }, + { + "epoch": 2.552463782278357, + "grad_norm": 0.17285777628421783, + "learning_rate": 1.1374910785164717e-06, + "loss": 0.0008, + "num_input_tokens_seen": 70428584, + "step": 104480 + }, + { + "epoch": 2.5525859331102043, + "grad_norm": 0.2123318761587143, + "learning_rate": 1.1374066102980819e-06, + "loss": 0.0002, + "num_input_tokens_seen": 70431592, + "step": 104485 + }, + { + "epoch": 2.5527080839420515, + "grad_norm": 34.717105865478516, + "learning_rate": 1.1373221410804343e-06, + "loss": 0.0543, + "num_input_tokens_seen": 70435048, + "step": 104490 + }, + { + "epoch": 2.5528302347738987, + "grad_norm": 0.024648137390613556, + "learning_rate": 1.1372376708641432e-06, + "loss": 0.0001, + "num_input_tokens_seen": 70438056, + "step": 104495 + }, + { + "epoch": 2.552952385605746, + "grad_norm": 0.07736524939537048, + "learning_rate": 1.1371531996498226e-06, + "loss": 0.0525, + "num_input_tokens_seen": 70441448, + "step": 104500 + }, + { + "epoch": 2.553074536437593, + "grad_norm": 0.004976922646164894, + "learning_rate": 1.137068727438087e-06, + "loss": 0.0002, + "num_input_tokens_seen": 70445160, + "step": 104505 + }, + { + "epoch": 2.5531966872694403, + "grad_norm": 0.010621704161167145, + "learning_rate": 1.136984254229551e-06, + "loss": 0.1141, + "num_input_tokens_seen": 70448488, + "step": 104510 + }, + { + "epoch": 2.5533188381012875, + "grad_norm": 23.055747985839844, + "learning_rate": 1.1368997800248284e-06, + "loss": 0.0478, + "num_input_tokens_seen": 70451496, + "step": 104515 + }, + { + "epoch": 2.5534409889331346, + "grad_norm": 0.020686976611614227, + "learning_rate": 1.1368153048245337e-06, + "loss": 0.0003, + "num_input_tokens_seen": 70455080, + "step": 104520 + }, + { + "epoch": 2.553563139764982, + "grad_norm": 0.19006820023059845, + "learning_rate": 1.1367308286292816e-06, + "loss": 0.0002, + "num_input_tokens_seen": 70458152, + "step": 104525 + }, + { + "epoch": 2.553685290596829, + "grad_norm": 0.07241163402795792, + "learning_rate": 1.136646351439686e-06, + "loss": 0.0514, + "num_input_tokens_seen": 70461352, + "step": 104530 + }, + { + "epoch": 2.553807441428676, + "grad_norm": 0.053362783044576645, + "learning_rate": 1.1365618732563616e-06, + "loss": 0.0001, + "num_input_tokens_seen": 70464744, + "step": 104535 + }, + { + "epoch": 2.5539295922605234, + "grad_norm": 0.0271244365721941, + "learning_rate": 1.1364773940799222e-06, + "loss": 0.0001, + "num_input_tokens_seen": 70468712, + "step": 104540 + }, + { + "epoch": 2.5540517430923706, + "grad_norm": 0.0525955893099308, + "learning_rate": 1.1363929139109827e-06, + "loss": 0.0677, + "num_input_tokens_seen": 70472040, + "step": 104545 + }, + { + "epoch": 2.554173893924218, + "grad_norm": 0.015901561826467514, + "learning_rate": 1.1363084327501573e-06, + "loss": 0.0709, + "num_input_tokens_seen": 70475560, + "step": 104550 + }, + { + "epoch": 2.5542960447560645, + "grad_norm": 22.091421127319336, + "learning_rate": 1.1362239505980602e-06, + "loss": 0.0426, + "num_input_tokens_seen": 70478696, + "step": 104555 + }, + { + "epoch": 2.554418195587912, + "grad_norm": 0.14539512991905212, + "learning_rate": 1.1361394674553058e-06, + "loss": 0.061, + "num_input_tokens_seen": 70481960, + "step": 104560 + }, + { + "epoch": 2.554540346419759, + "grad_norm": 15.404756546020508, + "learning_rate": 1.1360549833225087e-06, + "loss": 0.0904, + "num_input_tokens_seen": 70485480, + "step": 104565 + }, + { + "epoch": 2.5546624972516065, + "grad_norm": 13.381711959838867, + "learning_rate": 1.1359704982002832e-06, + "loss": 0.0503, + "num_input_tokens_seen": 70488872, + "step": 104570 + }, + { + "epoch": 2.5547846480834533, + "grad_norm": 0.0017305332003161311, + "learning_rate": 1.1358860120892433e-06, + "loss": 0.0002, + "num_input_tokens_seen": 70492264, + "step": 104575 + }, + { + "epoch": 2.5549067989153005, + "grad_norm": 0.5444920063018799, + "learning_rate": 1.135801524990004e-06, + "loss": 0.0004, + "num_input_tokens_seen": 70495912, + "step": 104580 + }, + { + "epoch": 2.5550289497471477, + "grad_norm": 0.06043120473623276, + "learning_rate": 1.1357170369031797e-06, + "loss": 0.039, + "num_input_tokens_seen": 70499240, + "step": 104585 + }, + { + "epoch": 2.555151100578995, + "grad_norm": 0.03580227494239807, + "learning_rate": 1.1356325478293844e-06, + "loss": 0.0529, + "num_input_tokens_seen": 70502376, + "step": 104590 + }, + { + "epoch": 2.555273251410842, + "grad_norm": 0.015926828607916832, + "learning_rate": 1.135548057769233e-06, + "loss": 0.0002, + "num_input_tokens_seen": 70505960, + "step": 104595 + }, + { + "epoch": 2.5553954022426892, + "grad_norm": 12.819072723388672, + "learning_rate": 1.1354635667233394e-06, + "loss": 0.088, + "num_input_tokens_seen": 70509224, + "step": 104600 + }, + { + "epoch": 2.5555175530745364, + "grad_norm": 0.046196166425943375, + "learning_rate": 1.1353790746923182e-06, + "loss": 0.0003, + "num_input_tokens_seen": 70512808, + "step": 104605 + }, + { + "epoch": 2.5556397039063836, + "grad_norm": 0.1168728843331337, + "learning_rate": 1.1352945816767843e-06, + "loss": 0.0002, + "num_input_tokens_seen": 70516072, + "step": 104610 + }, + { + "epoch": 2.555761854738231, + "grad_norm": 0.05582430958747864, + "learning_rate": 1.1352100876773515e-06, + "loss": 0.0295, + "num_input_tokens_seen": 70519784, + "step": 104615 + }, + { + "epoch": 2.555884005570078, + "grad_norm": 91.32950592041016, + "learning_rate": 1.1351255926946348e-06, + "loss": 0.0026, + "num_input_tokens_seen": 70523112, + "step": 104620 + }, + { + "epoch": 2.556006156401925, + "grad_norm": 0.06987828761339188, + "learning_rate": 1.1350410967292483e-06, + "loss": 0.0491, + "num_input_tokens_seen": 70526568, + "step": 104625 + }, + { + "epoch": 2.5561283072337724, + "grad_norm": 0.049678053706884384, + "learning_rate": 1.1349565997818067e-06, + "loss": 0.0004, + "num_input_tokens_seen": 70529832, + "step": 104630 + }, + { + "epoch": 2.5562504580656196, + "grad_norm": 0.27473777532577515, + "learning_rate": 1.1348721018529243e-06, + "loss": 0.0526, + "num_input_tokens_seen": 70532904, + "step": 104635 + }, + { + "epoch": 2.5563726088974663, + "grad_norm": 0.16689088940620422, + "learning_rate": 1.134787602943216e-06, + "loss": 0.0003, + "num_input_tokens_seen": 70535656, + "step": 104640 + }, + { + "epoch": 2.556494759729314, + "grad_norm": 0.8218722939491272, + "learning_rate": 1.1347031030532956e-06, + "loss": 0.0007, + "num_input_tokens_seen": 70538728, + "step": 104645 + }, + { + "epoch": 2.5566169105611607, + "grad_norm": 0.009063877165317535, + "learning_rate": 1.1346186021837782e-06, + "loss": 0.0001, + "num_input_tokens_seen": 70542440, + "step": 104650 + }, + { + "epoch": 2.5567390613930083, + "grad_norm": 0.17698997259140015, + "learning_rate": 1.1345341003352782e-06, + "loss": 0.1421, + "num_input_tokens_seen": 70546152, + "step": 104655 + }, + { + "epoch": 2.556861212224855, + "grad_norm": 0.05905401334166527, + "learning_rate": 1.1344495975084098e-06, + "loss": 0.0421, + "num_input_tokens_seen": 70549672, + "step": 104660 + }, + { + "epoch": 2.5569833630567023, + "grad_norm": 0.00954660214483738, + "learning_rate": 1.134365093703788e-06, + "loss": 0.0003, + "num_input_tokens_seen": 70553768, + "step": 104665 + }, + { + "epoch": 2.5571055138885495, + "grad_norm": 0.21002456545829773, + "learning_rate": 1.1342805889220269e-06, + "loss": 0.0027, + "num_input_tokens_seen": 70557160, + "step": 104670 + }, + { + "epoch": 2.5572276647203966, + "grad_norm": 19.2589168548584, + "learning_rate": 1.1341960831637414e-06, + "loss": 0.0029, + "num_input_tokens_seen": 70560360, + "step": 104675 + }, + { + "epoch": 2.557349815552244, + "grad_norm": 0.11573576182126999, + "learning_rate": 1.1341115764295458e-06, + "loss": 0.0108, + "num_input_tokens_seen": 70563880, + "step": 104680 + }, + { + "epoch": 2.557471966384091, + "grad_norm": 0.0027204095385968685, + "learning_rate": 1.1340270687200547e-06, + "loss": 0.0001, + "num_input_tokens_seen": 70567400, + "step": 104685 + }, + { + "epoch": 2.557594117215938, + "grad_norm": 1.3431215286254883, + "learning_rate": 1.1339425600358827e-06, + "loss": 0.0127, + "num_input_tokens_seen": 70570600, + "step": 104690 + }, + { + "epoch": 2.5577162680477854, + "grad_norm": 0.07056227326393127, + "learning_rate": 1.1338580503776445e-06, + "loss": 0.0003, + "num_input_tokens_seen": 70573480, + "step": 104695 + }, + { + "epoch": 2.5578384188796326, + "grad_norm": 0.015122072771191597, + "learning_rate": 1.1337735397459547e-06, + "loss": 0.0947, + "num_input_tokens_seen": 70576552, + "step": 104700 + }, + { + "epoch": 2.55796056971148, + "grad_norm": 0.0022590847220271826, + "learning_rate": 1.1336890281414275e-06, + "loss": 0.0, + "num_input_tokens_seen": 70579624, + "step": 104705 + }, + { + "epoch": 2.558082720543327, + "grad_norm": 0.3419479727745056, + "learning_rate": 1.133604515564678e-06, + "loss": 0.0491, + "num_input_tokens_seen": 70582760, + "step": 104710 + }, + { + "epoch": 2.558204871375174, + "grad_norm": 0.024046342819929123, + "learning_rate": 1.13352000201632e-06, + "loss": 0.0449, + "num_input_tokens_seen": 70585832, + "step": 104715 + }, + { + "epoch": 2.5583270222070214, + "grad_norm": 0.03014945797622204, + "learning_rate": 1.133435487496969e-06, + "loss": 0.0007, + "num_input_tokens_seen": 70589096, + "step": 104720 + }, + { + "epoch": 2.5584491730388685, + "grad_norm": 0.165313258767128, + "learning_rate": 1.1333509720072392e-06, + "loss": 0.0004, + "num_input_tokens_seen": 70592872, + "step": 104725 + }, + { + "epoch": 2.5585713238707157, + "grad_norm": 0.10299237817525864, + "learning_rate": 1.133266455547745e-06, + "loss": 0.0002, + "num_input_tokens_seen": 70596584, + "step": 104730 + }, + { + "epoch": 2.5586934747025625, + "grad_norm": 0.0059926919639110565, + "learning_rate": 1.1331819381191016e-06, + "loss": 0.0001, + "num_input_tokens_seen": 70599848, + "step": 104735 + }, + { + "epoch": 2.55881562553441, + "grad_norm": 36.820457458496094, + "learning_rate": 1.1330974197219233e-06, + "loss": 0.1445, + "num_input_tokens_seen": 70602856, + "step": 104740 + }, + { + "epoch": 2.558937776366257, + "grad_norm": 0.5047180652618408, + "learning_rate": 1.1330129003568247e-06, + "loss": 0.0707, + "num_input_tokens_seen": 70606312, + "step": 104745 + }, + { + "epoch": 2.5590599271981045, + "grad_norm": 0.057552579790353775, + "learning_rate": 1.1329283800244206e-06, + "loss": 0.0002, + "num_input_tokens_seen": 70609704, + "step": 104750 + }, + { + "epoch": 2.5591820780299512, + "grad_norm": 0.008029787801206112, + "learning_rate": 1.1328438587253258e-06, + "loss": 0.0465, + "num_input_tokens_seen": 70613032, + "step": 104755 + }, + { + "epoch": 2.5593042288617984, + "grad_norm": 0.019414668902754784, + "learning_rate": 1.132759336460154e-06, + "loss": 0.0001, + "num_input_tokens_seen": 70616232, + "step": 104760 + }, + { + "epoch": 2.5594263796936456, + "grad_norm": 0.08621279895305634, + "learning_rate": 1.1326748132295211e-06, + "loss": 0.0001, + "num_input_tokens_seen": 70619432, + "step": 104765 + }, + { + "epoch": 2.559548530525493, + "grad_norm": 0.02141975425183773, + "learning_rate": 1.1325902890340414e-06, + "loss": 0.0002, + "num_input_tokens_seen": 70622824, + "step": 104770 + }, + { + "epoch": 2.55967068135734, + "grad_norm": 75.61103820800781, + "learning_rate": 1.1325057638743292e-06, + "loss": 0.0308, + "num_input_tokens_seen": 70626024, + "step": 104775 + }, + { + "epoch": 2.559792832189187, + "grad_norm": 0.11365992575883865, + "learning_rate": 1.1324212377509993e-06, + "loss": 0.0441, + "num_input_tokens_seen": 70630120, + "step": 104780 + }, + { + "epoch": 2.5599149830210344, + "grad_norm": 0.05007657781243324, + "learning_rate": 1.132336710664667e-06, + "loss": 0.0148, + "num_input_tokens_seen": 70633640, + "step": 104785 + }, + { + "epoch": 2.5600371338528816, + "grad_norm": 0.06095591560006142, + "learning_rate": 1.1322521826159464e-06, + "loss": 0.0002, + "num_input_tokens_seen": 70637160, + "step": 104790 + }, + { + "epoch": 2.5601592846847288, + "grad_norm": 0.04456912353634834, + "learning_rate": 1.1321676536054522e-06, + "loss": 0.2187, + "num_input_tokens_seen": 70640168, + "step": 104795 + }, + { + "epoch": 2.560281435516576, + "grad_norm": 41.895538330078125, + "learning_rate": 1.1320831236337994e-06, + "loss": 0.0387, + "num_input_tokens_seen": 70643560, + "step": 104800 + }, + { + "epoch": 2.560403586348423, + "grad_norm": 0.01936202123761177, + "learning_rate": 1.1319985927016026e-06, + "loss": 0.0059, + "num_input_tokens_seen": 70647464, + "step": 104805 + }, + { + "epoch": 2.5605257371802703, + "grad_norm": 0.4608546495437622, + "learning_rate": 1.1319140608094762e-06, + "loss": 0.0004, + "num_input_tokens_seen": 70650728, + "step": 104810 + }, + { + "epoch": 2.5606478880121175, + "grad_norm": 0.014622372575104237, + "learning_rate": 1.1318295279580357e-06, + "loss": 0.0838, + "num_input_tokens_seen": 70653544, + "step": 104815 + }, + { + "epoch": 2.5607700388439643, + "grad_norm": 31.645605087280273, + "learning_rate": 1.1317449941478952e-06, + "loss": 0.0363, + "num_input_tokens_seen": 70656552, + "step": 104820 + }, + { + "epoch": 2.560892189675812, + "grad_norm": 0.06011528894305229, + "learning_rate": 1.1316604593796695e-06, + "loss": 0.0003, + "num_input_tokens_seen": 70659560, + "step": 104825 + }, + { + "epoch": 2.5610143405076586, + "grad_norm": 0.036927856504917145, + "learning_rate": 1.1315759236539737e-06, + "loss": 0.0803, + "num_input_tokens_seen": 70662824, + "step": 104830 + }, + { + "epoch": 2.5611364913395063, + "grad_norm": 0.05356254428625107, + "learning_rate": 1.1314913869714225e-06, + "loss": 0.0002, + "num_input_tokens_seen": 70666344, + "step": 104835 + }, + { + "epoch": 2.561258642171353, + "grad_norm": 0.43842801451683044, + "learning_rate": 1.1314068493326305e-06, + "loss": 0.0007, + "num_input_tokens_seen": 70669672, + "step": 104840 + }, + { + "epoch": 2.5613807930032, + "grad_norm": 0.03688225895166397, + "learning_rate": 1.1313223107382124e-06, + "loss": 0.0153, + "num_input_tokens_seen": 70673128, + "step": 104845 + }, + { + "epoch": 2.5615029438350474, + "grad_norm": 0.011995815671980381, + "learning_rate": 1.1312377711887835e-06, + "loss": 0.013, + "num_input_tokens_seen": 70676776, + "step": 104850 + }, + { + "epoch": 2.5616250946668946, + "grad_norm": 23.376665115356445, + "learning_rate": 1.1311532306849579e-06, + "loss": 0.0668, + "num_input_tokens_seen": 70679976, + "step": 104855 + }, + { + "epoch": 2.5617472454987418, + "grad_norm": 0.09116607159376144, + "learning_rate": 1.131068689227351e-06, + "loss": 0.0443, + "num_input_tokens_seen": 70683240, + "step": 104860 + }, + { + "epoch": 2.561869396330589, + "grad_norm": 0.021630791947245598, + "learning_rate": 1.130984146816577e-06, + "loss": 0.0001, + "num_input_tokens_seen": 70687208, + "step": 104865 + }, + { + "epoch": 2.561991547162436, + "grad_norm": 0.012038863264024258, + "learning_rate": 1.1308996034532511e-06, + "loss": 0.0, + "num_input_tokens_seen": 70690472, + "step": 104870 + }, + { + "epoch": 2.5621136979942833, + "grad_norm": 0.007038927637040615, + "learning_rate": 1.1308150591379884e-06, + "loss": 0.0573, + "num_input_tokens_seen": 70693864, + "step": 104875 + }, + { + "epoch": 2.5622358488261305, + "grad_norm": 0.16878843307495117, + "learning_rate": 1.1307305138714032e-06, + "loss": 0.0002, + "num_input_tokens_seen": 70697448, + "step": 104880 + }, + { + "epoch": 2.5623579996579777, + "grad_norm": 0.047601111233234406, + "learning_rate": 1.1306459676541104e-06, + "loss": 0.0601, + "num_input_tokens_seen": 70700968, + "step": 104885 + }, + { + "epoch": 2.562480150489825, + "grad_norm": 0.4622974395751953, + "learning_rate": 1.130561420486725e-06, + "loss": 0.0004, + "num_input_tokens_seen": 70704488, + "step": 104890 + }, + { + "epoch": 2.562602301321672, + "grad_norm": 1.5819555521011353, + "learning_rate": 1.1304768723698622e-06, + "loss": 0.0356, + "num_input_tokens_seen": 70708008, + "step": 104895 + }, + { + "epoch": 2.5627244521535193, + "grad_norm": 0.32831981778144836, + "learning_rate": 1.130392323304136e-06, + "loss": 0.0384, + "num_input_tokens_seen": 70711208, + "step": 104900 + }, + { + "epoch": 2.5628466029853665, + "grad_norm": 0.12604233622550964, + "learning_rate": 1.1303077732901625e-06, + "loss": 0.0003, + "num_input_tokens_seen": 70714536, + "step": 104905 + }, + { + "epoch": 2.5629687538172137, + "grad_norm": 0.3454369008541107, + "learning_rate": 1.1302232223285554e-06, + "loss": 0.0339, + "num_input_tokens_seen": 70717800, + "step": 104910 + }, + { + "epoch": 2.5630909046490604, + "grad_norm": 27.095327377319336, + "learning_rate": 1.1301386704199298e-06, + "loss": 0.0424, + "num_input_tokens_seen": 70721320, + "step": 104915 + }, + { + "epoch": 2.563213055480908, + "grad_norm": 0.38351860642433167, + "learning_rate": 1.1300541175649008e-06, + "loss": 0.1076, + "num_input_tokens_seen": 70724328, + "step": 104920 + }, + { + "epoch": 2.563335206312755, + "grad_norm": 40.32852554321289, + "learning_rate": 1.1299695637640834e-06, + "loss": 0.1206, + "num_input_tokens_seen": 70727592, + "step": 104925 + }, + { + "epoch": 2.563457357144602, + "grad_norm": 0.0024320860393345356, + "learning_rate": 1.1298850090180923e-06, + "loss": 0.0001, + "num_input_tokens_seen": 70730984, + "step": 104930 + }, + { + "epoch": 2.563579507976449, + "grad_norm": 0.058492377400398254, + "learning_rate": 1.1298004533275426e-06, + "loss": 0.0515, + "num_input_tokens_seen": 70734120, + "step": 104935 + }, + { + "epoch": 2.5637016588082964, + "grad_norm": 0.0694495216012001, + "learning_rate": 1.1297158966930492e-06, + "loss": 0.061, + "num_input_tokens_seen": 70737768, + "step": 104940 + }, + { + "epoch": 2.5638238096401436, + "grad_norm": 0.07813955098390579, + "learning_rate": 1.1296313391152266e-06, + "loss": 0.0008, + "num_input_tokens_seen": 70741288, + "step": 104945 + }, + { + "epoch": 2.5639459604719907, + "grad_norm": 0.16016274690628052, + "learning_rate": 1.1295467805946902e-06, + "loss": 0.0874, + "num_input_tokens_seen": 70744552, + "step": 104950 + }, + { + "epoch": 2.564068111303838, + "grad_norm": 0.0038009141571819782, + "learning_rate": 1.1294622211320548e-06, + "loss": 0.0529, + "num_input_tokens_seen": 70748136, + "step": 104955 + }, + { + "epoch": 2.564190262135685, + "grad_norm": 0.00716787576675415, + "learning_rate": 1.1293776607279352e-06, + "loss": 0.0002, + "num_input_tokens_seen": 70751464, + "step": 104960 + }, + { + "epoch": 2.5643124129675323, + "grad_norm": 43.62774658203125, + "learning_rate": 1.1292930993829465e-06, + "loss": 0.1121, + "num_input_tokens_seen": 70754792, + "step": 104965 + }, + { + "epoch": 2.5644345637993795, + "grad_norm": 317.7919921875, + "learning_rate": 1.1292085370977036e-06, + "loss": 0.0725, + "num_input_tokens_seen": 70757928, + "step": 104970 + }, + { + "epoch": 2.5645567146312267, + "grad_norm": 26.290687561035156, + "learning_rate": 1.1291239738728214e-06, + "loss": 0.0597, + "num_input_tokens_seen": 70760808, + "step": 104975 + }, + { + "epoch": 2.564678865463074, + "grad_norm": 30.182716369628906, + "learning_rate": 1.129039409708915e-06, + "loss": 0.063, + "num_input_tokens_seen": 70764264, + "step": 104980 + }, + { + "epoch": 2.564801016294921, + "grad_norm": 0.4601612687110901, + "learning_rate": 1.1289548446065993e-06, + "loss": 0.125, + "num_input_tokens_seen": 70767656, + "step": 104985 + }, + { + "epoch": 2.5649231671267683, + "grad_norm": 0.022800132632255554, + "learning_rate": 1.1288702785664894e-06, + "loss": 0.0002, + "num_input_tokens_seen": 70770920, + "step": 104990 + }, + { + "epoch": 2.5650453179586155, + "grad_norm": 0.060902874916791916, + "learning_rate": 1.1287857115892002e-06, + "loss": 0.0001, + "num_input_tokens_seen": 70774312, + "step": 104995 + }, + { + "epoch": 2.565167468790462, + "grad_norm": 65.98351287841797, + "learning_rate": 1.1287011436753465e-06, + "loss": 0.0115, + "num_input_tokens_seen": 70777320, + "step": 105000 + }, + { + "epoch": 2.56528961962231, + "grad_norm": 0.09278260916471481, + "learning_rate": 1.1286165748255434e-06, + "loss": 0.0479, + "num_input_tokens_seen": 70780200, + "step": 105005 + }, + { + "epoch": 2.5654117704541566, + "grad_norm": 0.008261207491159439, + "learning_rate": 1.128532005040406e-06, + "loss": 0.0056, + "num_input_tokens_seen": 70783656, + "step": 105010 + }, + { + "epoch": 2.565533921286004, + "grad_norm": 0.0054551963694393635, + "learning_rate": 1.1284474343205494e-06, + "loss": 0.0001, + "num_input_tokens_seen": 70787368, + "step": 105015 + }, + { + "epoch": 2.565656072117851, + "grad_norm": 0.007644488476216793, + "learning_rate": 1.1283628626665887e-06, + "loss": 0.0002, + "num_input_tokens_seen": 70790696, + "step": 105020 + }, + { + "epoch": 2.565778222949698, + "grad_norm": 0.08200935274362564, + "learning_rate": 1.1282782900791384e-06, + "loss": 0.0017, + "num_input_tokens_seen": 70794024, + "step": 105025 + }, + { + "epoch": 2.5659003737815453, + "grad_norm": 0.15629921853542328, + "learning_rate": 1.128193716558814e-06, + "loss": 0.0585, + "num_input_tokens_seen": 70797160, + "step": 105030 + }, + { + "epoch": 2.5660225246133925, + "grad_norm": 11.52754020690918, + "learning_rate": 1.1281091421062306e-06, + "loss": 0.0408, + "num_input_tokens_seen": 70800616, + "step": 105035 + }, + { + "epoch": 2.5661446754452397, + "grad_norm": 0.008200634270906448, + "learning_rate": 1.1280245667220024e-06, + "loss": 0.1486, + "num_input_tokens_seen": 70803880, + "step": 105040 + }, + { + "epoch": 2.566266826277087, + "grad_norm": 0.660971999168396, + "learning_rate": 1.1279399904067457e-06, + "loss": 0.0004, + "num_input_tokens_seen": 70807272, + "step": 105045 + }, + { + "epoch": 2.566388977108934, + "grad_norm": 0.07039150595664978, + "learning_rate": 1.127855413161075e-06, + "loss": 0.0849, + "num_input_tokens_seen": 70810600, + "step": 105050 + }, + { + "epoch": 2.5665111279407813, + "grad_norm": 0.04144516959786415, + "learning_rate": 1.1277708349856053e-06, + "loss": 0.0003, + "num_input_tokens_seen": 70814312, + "step": 105055 + }, + { + "epoch": 2.5666332787726285, + "grad_norm": 0.12249460071325302, + "learning_rate": 1.1276862558809518e-06, + "loss": 0.0004, + "num_input_tokens_seen": 70817256, + "step": 105060 + }, + { + "epoch": 2.5667554296044757, + "grad_norm": 0.20821748673915863, + "learning_rate": 1.127601675847729e-06, + "loss": 0.1226, + "num_input_tokens_seen": 70820328, + "step": 105065 + }, + { + "epoch": 2.566877580436323, + "grad_norm": 0.019365403801202774, + "learning_rate": 1.1275170948865531e-06, + "loss": 0.0547, + "num_input_tokens_seen": 70823528, + "step": 105070 + }, + { + "epoch": 2.56699973126817, + "grad_norm": 0.003225482301786542, + "learning_rate": 1.127432512998038e-06, + "loss": 0.0004, + "num_input_tokens_seen": 70826856, + "step": 105075 + }, + { + "epoch": 2.5671218821000172, + "grad_norm": 0.009520246647298336, + "learning_rate": 1.1273479301827999e-06, + "loss": 0.0396, + "num_input_tokens_seen": 70830952, + "step": 105080 + }, + { + "epoch": 2.5672440329318644, + "grad_norm": 0.09223540872335434, + "learning_rate": 1.1272633464414533e-06, + "loss": 0.0001, + "num_input_tokens_seen": 70834216, + "step": 105085 + }, + { + "epoch": 2.5673661837637116, + "grad_norm": 0.018998224288225174, + "learning_rate": 1.127178761774613e-06, + "loss": 0.0003, + "num_input_tokens_seen": 70837224, + "step": 105090 + }, + { + "epoch": 2.5674883345955584, + "grad_norm": 0.02200205810368061, + "learning_rate": 1.127094176182895e-06, + "loss": 0.1041, + "num_input_tokens_seen": 70840360, + "step": 105095 + }, + { + "epoch": 2.567610485427406, + "grad_norm": 0.1800374537706375, + "learning_rate": 1.1270095896669138e-06, + "loss": 0.0004, + "num_input_tokens_seen": 70844008, + "step": 105100 + }, + { + "epoch": 2.5677326362592527, + "grad_norm": 0.04483243450522423, + "learning_rate": 1.126925002227285e-06, + "loss": 0.0003, + "num_input_tokens_seen": 70847528, + "step": 105105 + }, + { + "epoch": 2.5678547870911, + "grad_norm": 0.025489473715424538, + "learning_rate": 1.126840413864623e-06, + "loss": 0.0001, + "num_input_tokens_seen": 70850984, + "step": 105110 + }, + { + "epoch": 2.567976937922947, + "grad_norm": 0.08616407215595245, + "learning_rate": 1.1267558245795438e-06, + "loss": 0.0006, + "num_input_tokens_seen": 70854824, + "step": 105115 + }, + { + "epoch": 2.5680990887547943, + "grad_norm": 0.578325092792511, + "learning_rate": 1.1266712343726617e-06, + "loss": 0.0005, + "num_input_tokens_seen": 70858216, + "step": 105120 + }, + { + "epoch": 2.5682212395866415, + "grad_norm": 0.011274863965809345, + "learning_rate": 1.1265866432445925e-06, + "loss": 0.0001, + "num_input_tokens_seen": 70861544, + "step": 105125 + }, + { + "epoch": 2.5683433904184887, + "grad_norm": 0.1854897290468216, + "learning_rate": 1.1265020511959514e-06, + "loss": 0.0753, + "num_input_tokens_seen": 70864616, + "step": 105130 + }, + { + "epoch": 2.568465541250336, + "grad_norm": 0.019263846799731255, + "learning_rate": 1.126417458227353e-06, + "loss": 0.0005, + "num_input_tokens_seen": 70867496, + "step": 105135 + }, + { + "epoch": 2.568587692082183, + "grad_norm": 0.003985455725342035, + "learning_rate": 1.126332864339413e-06, + "loss": 0.034, + "num_input_tokens_seen": 70871272, + "step": 105140 + }, + { + "epoch": 2.5687098429140303, + "grad_norm": 0.02949185110628605, + "learning_rate": 1.1262482695327464e-06, + "loss": 0.0221, + "num_input_tokens_seen": 70874600, + "step": 105145 + }, + { + "epoch": 2.5688319937458775, + "grad_norm": 0.023490281775593758, + "learning_rate": 1.1261636738079686e-06, + "loss": 0.0683, + "num_input_tokens_seen": 70878184, + "step": 105150 + }, + { + "epoch": 2.5689541445777246, + "grad_norm": 0.09141631424427032, + "learning_rate": 1.1260790771656944e-06, + "loss": 0.1187, + "num_input_tokens_seen": 70881640, + "step": 105155 + }, + { + "epoch": 2.569076295409572, + "grad_norm": 0.2394016832113266, + "learning_rate": 1.1259944796065392e-06, + "loss": 0.0005, + "num_input_tokens_seen": 70885160, + "step": 105160 + }, + { + "epoch": 2.569198446241419, + "grad_norm": 0.5712342262268066, + "learning_rate": 1.1259098811311184e-06, + "loss": 0.0457, + "num_input_tokens_seen": 70888360, + "step": 105165 + }, + { + "epoch": 2.569320597073266, + "grad_norm": 0.007113562431186438, + "learning_rate": 1.1258252817400472e-06, + "loss": 0.0001, + "num_input_tokens_seen": 70891176, + "step": 105170 + }, + { + "epoch": 2.5694427479051134, + "grad_norm": 0.039674174040555954, + "learning_rate": 1.1257406814339404e-06, + "loss": 0.0005, + "num_input_tokens_seen": 70895144, + "step": 105175 + }, + { + "epoch": 2.56956489873696, + "grad_norm": 72.04270935058594, + "learning_rate": 1.1256560802134138e-06, + "loss": 0.0957, + "num_input_tokens_seen": 70898728, + "step": 105180 + }, + { + "epoch": 2.569687049568808, + "grad_norm": 0.0031505043152719736, + "learning_rate": 1.1255714780790823e-06, + "loss": 0.0003, + "num_input_tokens_seen": 70901928, + "step": 105185 + }, + { + "epoch": 2.5698092004006545, + "grad_norm": 0.030574841424822807, + "learning_rate": 1.1254868750315613e-06, + "loss": 0.0618, + "num_input_tokens_seen": 70904872, + "step": 105190 + }, + { + "epoch": 2.569931351232502, + "grad_norm": 0.0069647496566176414, + "learning_rate": 1.1254022710714662e-06, + "loss": 0.0002, + "num_input_tokens_seen": 70908200, + "step": 105195 + }, + { + "epoch": 2.570053502064349, + "grad_norm": 0.1449422985315323, + "learning_rate": 1.1253176661994114e-06, + "loss": 0.1128, + "num_input_tokens_seen": 70910888, + "step": 105200 + }, + { + "epoch": 2.570175652896196, + "grad_norm": 0.04498355835676193, + "learning_rate": 1.1252330604160134e-06, + "loss": 0.0522, + "num_input_tokens_seen": 70913960, + "step": 105205 + }, + { + "epoch": 2.5702978037280433, + "grad_norm": 0.03253661096096039, + "learning_rate": 1.1251484537218867e-06, + "loss": 0.0927, + "num_input_tokens_seen": 70917032, + "step": 105210 + }, + { + "epoch": 2.5704199545598905, + "grad_norm": 0.002849914599210024, + "learning_rate": 1.125063846117647e-06, + "loss": 0.0654, + "num_input_tokens_seen": 70920616, + "step": 105215 + }, + { + "epoch": 2.5705421053917377, + "grad_norm": 0.00351352128200233, + "learning_rate": 1.124979237603909e-06, + "loss": 0.0005, + "num_input_tokens_seen": 70923688, + "step": 105220 + }, + { + "epoch": 2.570664256223585, + "grad_norm": 27.33385467529297, + "learning_rate": 1.1248946281812888e-06, + "loss": 0.0007, + "num_input_tokens_seen": 70926888, + "step": 105225 + }, + { + "epoch": 2.570786407055432, + "grad_norm": 0.008452944457530975, + "learning_rate": 1.1248100178504008e-06, + "loss": 0.0345, + "num_input_tokens_seen": 70930216, + "step": 105230 + }, + { + "epoch": 2.5709085578872792, + "grad_norm": 0.19451171159744263, + "learning_rate": 1.124725406611861e-06, + "loss": 0.0003, + "num_input_tokens_seen": 70933928, + "step": 105235 + }, + { + "epoch": 2.5710307087191264, + "grad_norm": 0.013641073368489742, + "learning_rate": 1.1246407944662846e-06, + "loss": 0.0002, + "num_input_tokens_seen": 70937768, + "step": 105240 + }, + { + "epoch": 2.5711528595509736, + "grad_norm": 0.02136857435107231, + "learning_rate": 1.1245561814142865e-06, + "loss": 0.0001, + "num_input_tokens_seen": 70940584, + "step": 105245 + }, + { + "epoch": 2.571275010382821, + "grad_norm": 0.01668001152575016, + "learning_rate": 1.1244715674564827e-06, + "loss": 0.0001, + "num_input_tokens_seen": 70943912, + "step": 105250 + }, + { + "epoch": 2.571397161214668, + "grad_norm": 0.016919095069169998, + "learning_rate": 1.1243869525934881e-06, + "loss": 0.0001, + "num_input_tokens_seen": 70947304, + "step": 105255 + }, + { + "epoch": 2.571519312046515, + "grad_norm": 0.015760477632284164, + "learning_rate": 1.124302336825918e-06, + "loss": 0.0585, + "num_input_tokens_seen": 70950440, + "step": 105260 + }, + { + "epoch": 2.571641462878362, + "grad_norm": 0.012062730267643929, + "learning_rate": 1.1242177201543881e-06, + "loss": 0.0586, + "num_input_tokens_seen": 70954024, + "step": 105265 + }, + { + "epoch": 2.5717636137102096, + "grad_norm": 0.008802815340459347, + "learning_rate": 1.1241331025795132e-06, + "loss": 0.0002, + "num_input_tokens_seen": 70957160, + "step": 105270 + }, + { + "epoch": 2.5718857645420563, + "grad_norm": 0.14933550357818604, + "learning_rate": 1.1240484841019094e-06, + "loss": 0.0006, + "num_input_tokens_seen": 70960680, + "step": 105275 + }, + { + "epoch": 2.572007915373904, + "grad_norm": 0.2914406359195709, + "learning_rate": 1.1239638647221915e-06, + "loss": 0.0338, + "num_input_tokens_seen": 70963944, + "step": 105280 + }, + { + "epoch": 2.5721300662057507, + "grad_norm": 196.21656799316406, + "learning_rate": 1.123879244440975e-06, + "loss": 0.0267, + "num_input_tokens_seen": 70967144, + "step": 105285 + }, + { + "epoch": 2.572252217037598, + "grad_norm": 78.05303192138672, + "learning_rate": 1.1237946232588749e-06, + "loss": 0.0523, + "num_input_tokens_seen": 70970728, + "step": 105290 + }, + { + "epoch": 2.572374367869445, + "grad_norm": 0.009671011008322239, + "learning_rate": 1.1237100011765074e-06, + "loss": 0.097, + "num_input_tokens_seen": 70974120, + "step": 105295 + }, + { + "epoch": 2.5724965187012923, + "grad_norm": 0.0990491583943367, + "learning_rate": 1.1236253781944873e-06, + "loss": 0.0057, + "num_input_tokens_seen": 70977576, + "step": 105300 + }, + { + "epoch": 2.5726186695331394, + "grad_norm": 0.7363083958625793, + "learning_rate": 1.1235407543134305e-06, + "loss": 0.0995, + "num_input_tokens_seen": 70980648, + "step": 105305 + }, + { + "epoch": 2.5727408203649866, + "grad_norm": 0.08942437171936035, + "learning_rate": 1.123456129533952e-06, + "loss": 0.0181, + "num_input_tokens_seen": 70984040, + "step": 105310 + }, + { + "epoch": 2.572862971196834, + "grad_norm": 0.17924891412258148, + "learning_rate": 1.1233715038566673e-06, + "loss": 0.0529, + "num_input_tokens_seen": 70987240, + "step": 105315 + }, + { + "epoch": 2.572985122028681, + "grad_norm": 0.11793851107358932, + "learning_rate": 1.123286877282192e-06, + "loss": 0.0002, + "num_input_tokens_seen": 70990760, + "step": 105320 + }, + { + "epoch": 2.573107272860528, + "grad_norm": 13.390181541442871, + "learning_rate": 1.123202249811141e-06, + "loss": 0.0819, + "num_input_tokens_seen": 70994024, + "step": 105325 + }, + { + "epoch": 2.5732294236923754, + "grad_norm": 0.5596466064453125, + "learning_rate": 1.1231176214441305e-06, + "loss": 0.0822, + "num_input_tokens_seen": 70997672, + "step": 105330 + }, + { + "epoch": 2.5733515745242226, + "grad_norm": 0.20801320672035217, + "learning_rate": 1.123032992181775e-06, + "loss": 0.0558, + "num_input_tokens_seen": 71001128, + "step": 105335 + }, + { + "epoch": 2.5734737253560698, + "grad_norm": 0.025137195363640785, + "learning_rate": 1.1229483620246912e-06, + "loss": 0.035, + "num_input_tokens_seen": 71004456, + "step": 105340 + }, + { + "epoch": 2.573595876187917, + "grad_norm": 0.005038428120315075, + "learning_rate": 1.1228637309734935e-06, + "loss": 0.0001, + "num_input_tokens_seen": 71008040, + "step": 105345 + }, + { + "epoch": 2.573718027019764, + "grad_norm": 0.2574462592601776, + "learning_rate": 1.1227790990287976e-06, + "loss": 0.0002, + "num_input_tokens_seen": 71011432, + "step": 105350 + }, + { + "epoch": 2.5738401778516113, + "grad_norm": 0.0075719221495091915, + "learning_rate": 1.122694466191219e-06, + "loss": 0.0002, + "num_input_tokens_seen": 71014504, + "step": 105355 + }, + { + "epoch": 2.573962328683458, + "grad_norm": 0.2954552173614502, + "learning_rate": 1.1226098324613733e-06, + "loss": 0.0604, + "num_input_tokens_seen": 71017384, + "step": 105360 + }, + { + "epoch": 2.5740844795153057, + "grad_norm": 10.460375785827637, + "learning_rate": 1.122525197839876e-06, + "loss": 0.0862, + "num_input_tokens_seen": 71021160, + "step": 105365 + }, + { + "epoch": 2.5742066303471525, + "grad_norm": 0.015190809965133667, + "learning_rate": 1.1224405623273427e-06, + "loss": 0.0003, + "num_input_tokens_seen": 71024296, + "step": 105370 + }, + { + "epoch": 2.574328781179, + "grad_norm": 0.05254284292459488, + "learning_rate": 1.1223559259243886e-06, + "loss": 0.0003, + "num_input_tokens_seen": 71027496, + "step": 105375 + }, + { + "epoch": 2.574450932010847, + "grad_norm": 0.01987920142710209, + "learning_rate": 1.1222712886316292e-06, + "loss": 0.0031, + "num_input_tokens_seen": 71030760, + "step": 105380 + }, + { + "epoch": 2.574573082842694, + "grad_norm": 0.26836860179901123, + "learning_rate": 1.12218665044968e-06, + "loss": 0.0002, + "num_input_tokens_seen": 71034472, + "step": 105385 + }, + { + "epoch": 2.5746952336745412, + "grad_norm": 0.2163187563419342, + "learning_rate": 1.1221020113791567e-06, + "loss": 0.0005, + "num_input_tokens_seen": 71037544, + "step": 105390 + }, + { + "epoch": 2.5748173845063884, + "grad_norm": 0.0660480409860611, + "learning_rate": 1.122017371420675e-06, + "loss": 0.1522, + "num_input_tokens_seen": 71041320, + "step": 105395 + }, + { + "epoch": 2.5749395353382356, + "grad_norm": 0.11806187033653259, + "learning_rate": 1.1219327305748499e-06, + "loss": 0.0685, + "num_input_tokens_seen": 71044328, + "step": 105400 + }, + { + "epoch": 2.575061686170083, + "grad_norm": 0.2208234816789627, + "learning_rate": 1.121848088842297e-06, + "loss": 0.0003, + "num_input_tokens_seen": 71047400, + "step": 105405 + }, + { + "epoch": 2.57518383700193, + "grad_norm": 0.04226498678326607, + "learning_rate": 1.1217634462236323e-06, + "loss": 0.0553, + "num_input_tokens_seen": 71050792, + "step": 105410 + }, + { + "epoch": 2.575305987833777, + "grad_norm": 0.10588227212429047, + "learning_rate": 1.121678802719471e-06, + "loss": 0.0514, + "num_input_tokens_seen": 71053928, + "step": 105415 + }, + { + "epoch": 2.5754281386656244, + "grad_norm": 13.927465438842773, + "learning_rate": 1.1215941583304288e-06, + "loss": 0.0257, + "num_input_tokens_seen": 71057384, + "step": 105420 + }, + { + "epoch": 2.5755502894974716, + "grad_norm": 76.3477783203125, + "learning_rate": 1.121509513057121e-06, + "loss": 0.0033, + "num_input_tokens_seen": 71060520, + "step": 105425 + }, + { + "epoch": 2.5756724403293187, + "grad_norm": 0.009837713092565536, + "learning_rate": 1.1214248669001635e-06, + "loss": 0.0305, + "num_input_tokens_seen": 71064104, + "step": 105430 + }, + { + "epoch": 2.575794591161166, + "grad_norm": 0.00897394958883524, + "learning_rate": 1.1213402198601715e-06, + "loss": 0.0323, + "num_input_tokens_seen": 71067112, + "step": 105435 + }, + { + "epoch": 2.575916741993013, + "grad_norm": 0.0860351026058197, + "learning_rate": 1.1212555719377612e-06, + "loss": 0.0435, + "num_input_tokens_seen": 71070376, + "step": 105440 + }, + { + "epoch": 2.57603889282486, + "grad_norm": 0.22064711153507233, + "learning_rate": 1.121170923133547e-06, + "loss": 0.0008, + "num_input_tokens_seen": 71073704, + "step": 105445 + }, + { + "epoch": 2.5761610436567075, + "grad_norm": 0.2620203197002411, + "learning_rate": 1.1210862734481459e-06, + "loss": 0.0006, + "num_input_tokens_seen": 71076840, + "step": 105450 + }, + { + "epoch": 2.5762831944885543, + "grad_norm": 0.7787070870399475, + "learning_rate": 1.1210016228821729e-06, + "loss": 0.0011, + "num_input_tokens_seen": 71080168, + "step": 105455 + }, + { + "epoch": 2.576405345320402, + "grad_norm": 0.03237830102443695, + "learning_rate": 1.1209169714362431e-06, + "loss": 0.0003, + "num_input_tokens_seen": 71083816, + "step": 105460 + }, + { + "epoch": 2.5765274961522486, + "grad_norm": 0.1585754007101059, + "learning_rate": 1.120832319110973e-06, + "loss": 0.0002, + "num_input_tokens_seen": 71087080, + "step": 105465 + }, + { + "epoch": 2.576649646984096, + "grad_norm": 0.13227100670337677, + "learning_rate": 1.1207476659069776e-06, + "loss": 0.0426, + "num_input_tokens_seen": 71090664, + "step": 105470 + }, + { + "epoch": 2.576771797815943, + "grad_norm": 0.055049993097782135, + "learning_rate": 1.1206630118248725e-06, + "loss": 0.0008, + "num_input_tokens_seen": 71094440, + "step": 105475 + }, + { + "epoch": 2.57689394864779, + "grad_norm": 0.0033830543980002403, + "learning_rate": 1.1205783568652738e-06, + "loss": 0.0001, + "num_input_tokens_seen": 71097576, + "step": 105480 + }, + { + "epoch": 2.5770160994796374, + "grad_norm": 14.097530364990234, + "learning_rate": 1.1204937010287968e-06, + "loss": 0.0618, + "num_input_tokens_seen": 71100840, + "step": 105485 + }, + { + "epoch": 2.5771382503114846, + "grad_norm": 0.10420069098472595, + "learning_rate": 1.1204090443160573e-06, + "loss": 0.0001, + "num_input_tokens_seen": 71104040, + "step": 105490 + }, + { + "epoch": 2.5772604011433318, + "grad_norm": 0.02168843150138855, + "learning_rate": 1.1203243867276706e-06, + "loss": 0.0431, + "num_input_tokens_seen": 71107624, + "step": 105495 + }, + { + "epoch": 2.577382551975179, + "grad_norm": 20.862285614013672, + "learning_rate": 1.1202397282642529e-06, + "loss": 0.0615, + "num_input_tokens_seen": 71111016, + "step": 105500 + }, + { + "epoch": 2.577504702807026, + "grad_norm": 0.044624313712120056, + "learning_rate": 1.1201550689264191e-06, + "loss": 0.0249, + "num_input_tokens_seen": 71114664, + "step": 105505 + }, + { + "epoch": 2.5776268536388733, + "grad_norm": 0.06067600101232529, + "learning_rate": 1.1200704087147854e-06, + "loss": 0.0515, + "num_input_tokens_seen": 71118248, + "step": 105510 + }, + { + "epoch": 2.5777490044707205, + "grad_norm": 0.0106893889605999, + "learning_rate": 1.1199857476299678e-06, + "loss": 0.0328, + "num_input_tokens_seen": 71121448, + "step": 105515 + }, + { + "epoch": 2.5778711553025677, + "grad_norm": 0.02916247397661209, + "learning_rate": 1.1199010856725813e-06, + "loss": 0.038, + "num_input_tokens_seen": 71125736, + "step": 105520 + }, + { + "epoch": 2.577993306134415, + "grad_norm": 0.07611119747161865, + "learning_rate": 1.119816422843242e-06, + "loss": 0.0903, + "num_input_tokens_seen": 71129064, + "step": 105525 + }, + { + "epoch": 2.578115456966262, + "grad_norm": 0.0019288009498268366, + "learning_rate": 1.1197317591425656e-06, + "loss": 0.0002, + "num_input_tokens_seen": 71132328, + "step": 105530 + }, + { + "epoch": 2.5782376077981093, + "grad_norm": 0.0974397137761116, + "learning_rate": 1.1196470945711674e-06, + "loss": 0.0002, + "num_input_tokens_seen": 71135720, + "step": 105535 + }, + { + "epoch": 2.578359758629956, + "grad_norm": 0.07071943581104279, + "learning_rate": 1.119562429129663e-06, + "loss": 0.0004, + "num_input_tokens_seen": 71138920, + "step": 105540 + }, + { + "epoch": 2.5784819094618037, + "grad_norm": 0.02228919044137001, + "learning_rate": 1.1194777628186686e-06, + "loss": 0.0558, + "num_input_tokens_seen": 71142184, + "step": 105545 + }, + { + "epoch": 2.5786040602936504, + "grad_norm": 27.30817985534668, + "learning_rate": 1.1193930956388004e-06, + "loss": 0.1206, + "num_input_tokens_seen": 71145768, + "step": 105550 + }, + { + "epoch": 2.5787262111254976, + "grad_norm": 0.14151504635810852, + "learning_rate": 1.1193084275906727e-06, + "loss": 0.072, + "num_input_tokens_seen": 71149032, + "step": 105555 + }, + { + "epoch": 2.578848361957345, + "grad_norm": 0.0615425668656826, + "learning_rate": 1.1192237586749025e-06, + "loss": 0.0004, + "num_input_tokens_seen": 71152232, + "step": 105560 + }, + { + "epoch": 2.578970512789192, + "grad_norm": 0.12096342444419861, + "learning_rate": 1.119139088892105e-06, + "loss": 0.0338, + "num_input_tokens_seen": 71155944, + "step": 105565 + }, + { + "epoch": 2.579092663621039, + "grad_norm": 0.0027860745321959257, + "learning_rate": 1.119054418242896e-06, + "loss": 0.0276, + "num_input_tokens_seen": 71159272, + "step": 105570 + }, + { + "epoch": 2.5792148144528864, + "grad_norm": 0.016637323424220085, + "learning_rate": 1.1189697467278911e-06, + "loss": 0.0489, + "num_input_tokens_seen": 71162664, + "step": 105575 + }, + { + "epoch": 2.5793369652847336, + "grad_norm": 0.005513378884643316, + "learning_rate": 1.1188850743477062e-06, + "loss": 0.0843, + "num_input_tokens_seen": 71166056, + "step": 105580 + }, + { + "epoch": 2.5794591161165807, + "grad_norm": 11.760927200317383, + "learning_rate": 1.1188004011029573e-06, + "loss": 0.0413, + "num_input_tokens_seen": 71170088, + "step": 105585 + }, + { + "epoch": 2.579581266948428, + "grad_norm": 29.75730323791504, + "learning_rate": 1.1187157269942598e-06, + "loss": 0.0483, + "num_input_tokens_seen": 71173480, + "step": 105590 + }, + { + "epoch": 2.579703417780275, + "grad_norm": 0.4479432702064514, + "learning_rate": 1.1186310520222297e-06, + "loss": 0.1053, + "num_input_tokens_seen": 71176936, + "step": 105595 + }, + { + "epoch": 2.5798255686121223, + "grad_norm": 0.19967444241046906, + "learning_rate": 1.1185463761874823e-06, + "loss": 0.0391, + "num_input_tokens_seen": 71180200, + "step": 105600 + }, + { + "epoch": 2.5799477194439695, + "grad_norm": 0.19300290942192078, + "learning_rate": 1.1184616994906341e-06, + "loss": 0.0013, + "num_input_tokens_seen": 71183400, + "step": 105605 + }, + { + "epoch": 2.5800698702758167, + "grad_norm": 0.021904923021793365, + "learning_rate": 1.1183770219323005e-06, + "loss": 0.0414, + "num_input_tokens_seen": 71187624, + "step": 105610 + }, + { + "epoch": 2.580192021107664, + "grad_norm": 0.022521737962961197, + "learning_rate": 1.1182923435130972e-06, + "loss": 0.0002, + "num_input_tokens_seen": 71191208, + "step": 105615 + }, + { + "epoch": 2.580314171939511, + "grad_norm": 1.1650266647338867, + "learning_rate": 1.1182076642336405e-06, + "loss": 0.0011, + "num_input_tokens_seen": 71194920, + "step": 105620 + }, + { + "epoch": 2.580436322771358, + "grad_norm": 0.006862139794975519, + "learning_rate": 1.1181229840945457e-06, + "loss": 0.0513, + "num_input_tokens_seen": 71197992, + "step": 105625 + }, + { + "epoch": 2.5805584736032054, + "grad_norm": 0.03143366053700447, + "learning_rate": 1.1180383030964288e-06, + "loss": 0.06, + "num_input_tokens_seen": 71200936, + "step": 105630 + }, + { + "epoch": 2.580680624435052, + "grad_norm": 0.32460829615592957, + "learning_rate": 1.1179536212399057e-06, + "loss": 0.0589, + "num_input_tokens_seen": 71204648, + "step": 105635 + }, + { + "epoch": 2.5808027752669, + "grad_norm": 0.01299480814486742, + "learning_rate": 1.1178689385255919e-06, + "loss": 0.0012, + "num_input_tokens_seen": 71208104, + "step": 105640 + }, + { + "epoch": 2.5809249260987466, + "grad_norm": 0.007713967002928257, + "learning_rate": 1.1177842549541036e-06, + "loss": 0.0709, + "num_input_tokens_seen": 71211368, + "step": 105645 + }, + { + "epoch": 2.5810470769305938, + "grad_norm": 0.010506748221814632, + "learning_rate": 1.1176995705260566e-06, + "loss": 0.0003, + "num_input_tokens_seen": 71214568, + "step": 105650 + }, + { + "epoch": 2.581169227762441, + "grad_norm": 0.04141269251704216, + "learning_rate": 1.1176148852420666e-06, + "loss": 0.0699, + "num_input_tokens_seen": 71217512, + "step": 105655 + }, + { + "epoch": 2.581291378594288, + "grad_norm": 0.15972989797592163, + "learning_rate": 1.1175301991027494e-06, + "loss": 0.0004, + "num_input_tokens_seen": 71221160, + "step": 105660 + }, + { + "epoch": 2.5814135294261353, + "grad_norm": 16.147079467773438, + "learning_rate": 1.117445512108721e-06, + "loss": 0.1022, + "num_input_tokens_seen": 71224488, + "step": 105665 + }, + { + "epoch": 2.5815356802579825, + "grad_norm": 0.015271567739546299, + "learning_rate": 1.1173608242605974e-06, + "loss": 0.0003, + "num_input_tokens_seen": 71227752, + "step": 105670 + }, + { + "epoch": 2.5816578310898297, + "grad_norm": 0.5717945694923401, + "learning_rate": 1.1172761355589941e-06, + "loss": 0.0007, + "num_input_tokens_seen": 71230952, + "step": 105675 + }, + { + "epoch": 2.581779981921677, + "grad_norm": 0.03975324705243111, + "learning_rate": 1.1171914460045278e-06, + "loss": 0.0002, + "num_input_tokens_seen": 71234280, + "step": 105680 + }, + { + "epoch": 2.581902132753524, + "grad_norm": 0.23138143122196198, + "learning_rate": 1.1171067555978133e-06, + "loss": 0.0385, + "num_input_tokens_seen": 71237608, + "step": 105685 + }, + { + "epoch": 2.5820242835853713, + "grad_norm": 0.023231210187077522, + "learning_rate": 1.117022064339467e-06, + "loss": 0.0005, + "num_input_tokens_seen": 71241192, + "step": 105690 + }, + { + "epoch": 2.5821464344172185, + "grad_norm": 0.015621090307831764, + "learning_rate": 1.1169373722301044e-06, + "loss": 0.0006, + "num_input_tokens_seen": 71245544, + "step": 105695 + }, + { + "epoch": 2.5822685852490657, + "grad_norm": 0.008443130180239677, + "learning_rate": 1.1168526792703422e-06, + "loss": 0.0001, + "num_input_tokens_seen": 71248680, + "step": 105700 + }, + { + "epoch": 2.582390736080913, + "grad_norm": 66.18633270263672, + "learning_rate": 1.1167679854607962e-06, + "loss": 0.1098, + "num_input_tokens_seen": 71251624, + "step": 105705 + }, + { + "epoch": 2.58251288691276, + "grad_norm": 0.10550694167613983, + "learning_rate": 1.1166832908020815e-06, + "loss": 0.0002, + "num_input_tokens_seen": 71255144, + "step": 105710 + }, + { + "epoch": 2.5826350377446072, + "grad_norm": 0.1457262635231018, + "learning_rate": 1.1165985952948146e-06, + "loss": 0.0002, + "num_input_tokens_seen": 71258600, + "step": 105715 + }, + { + "epoch": 2.582757188576454, + "grad_norm": 0.010310126468539238, + "learning_rate": 1.1165138989396114e-06, + "loss": 0.1239, + "num_input_tokens_seen": 71261992, + "step": 105720 + }, + { + "epoch": 2.5828793394083016, + "grad_norm": 0.19131167232990265, + "learning_rate": 1.116429201737088e-06, + "loss": 0.0002, + "num_input_tokens_seen": 71265256, + "step": 105725 + }, + { + "epoch": 2.5830014902401484, + "grad_norm": 0.03742775321006775, + "learning_rate": 1.11634450368786e-06, + "loss": 0.0001, + "num_input_tokens_seen": 71268968, + "step": 105730 + }, + { + "epoch": 2.5831236410719955, + "grad_norm": 0.16259603202342987, + "learning_rate": 1.1162598047925434e-06, + "loss": 0.0003, + "num_input_tokens_seen": 71272360, + "step": 105735 + }, + { + "epoch": 2.5832457919038427, + "grad_norm": 0.008227083832025528, + "learning_rate": 1.1161751050517544e-06, + "loss": 0.0006, + "num_input_tokens_seen": 71275432, + "step": 105740 + }, + { + "epoch": 2.58336794273569, + "grad_norm": 0.025638852268457413, + "learning_rate": 1.1160904044661086e-06, + "loss": 0.0468, + "num_input_tokens_seen": 71278632, + "step": 105745 + }, + { + "epoch": 2.583490093567537, + "grad_norm": 0.11284564435482025, + "learning_rate": 1.1160057030362221e-06, + "loss": 0.0655, + "num_input_tokens_seen": 71281960, + "step": 105750 + }, + { + "epoch": 2.5836122443993843, + "grad_norm": 0.0041786786168813705, + "learning_rate": 1.115921000762711e-06, + "loss": 0.0002, + "num_input_tokens_seen": 71285288, + "step": 105755 + }, + { + "epoch": 2.5837343952312315, + "grad_norm": 0.23622022569179535, + "learning_rate": 1.115836297646191e-06, + "loss": 0.0257, + "num_input_tokens_seen": 71288616, + "step": 105760 + }, + { + "epoch": 2.5838565460630787, + "grad_norm": 17.04725456237793, + "learning_rate": 1.1157515936872785e-06, + "loss": 0.062, + "num_input_tokens_seen": 71292264, + "step": 105765 + }, + { + "epoch": 2.583978696894926, + "grad_norm": 0.0007923907251097262, + "learning_rate": 1.1156668888865895e-06, + "loss": 0.0001, + "num_input_tokens_seen": 71295656, + "step": 105770 + }, + { + "epoch": 2.584100847726773, + "grad_norm": 56.511810302734375, + "learning_rate": 1.1155821832447394e-06, + "loss": 0.1369, + "num_input_tokens_seen": 71299240, + "step": 105775 + }, + { + "epoch": 2.5842229985586203, + "grad_norm": 0.011822639964520931, + "learning_rate": 1.1154974767623448e-06, + "loss": 0.0002, + "num_input_tokens_seen": 71302376, + "step": 105780 + }, + { + "epoch": 2.5843451493904674, + "grad_norm": 0.03911390155553818, + "learning_rate": 1.1154127694400215e-06, + "loss": 0.0633, + "num_input_tokens_seen": 71305704, + "step": 105785 + }, + { + "epoch": 2.5844673002223146, + "grad_norm": 0.12544453144073486, + "learning_rate": 1.1153280612783856e-06, + "loss": 0.1287, + "num_input_tokens_seen": 71309544, + "step": 105790 + }, + { + "epoch": 2.584589451054162, + "grad_norm": 0.017842907458543777, + "learning_rate": 1.1152433522780526e-06, + "loss": 0.0481, + "num_input_tokens_seen": 71313256, + "step": 105795 + }, + { + "epoch": 2.584711601886009, + "grad_norm": 0.09327222406864166, + "learning_rate": 1.1151586424396394e-06, + "loss": 0.1074, + "num_input_tokens_seen": 71316520, + "step": 105800 + }, + { + "epoch": 2.5848337527178558, + "grad_norm": 0.22710292041301727, + "learning_rate": 1.1150739317637613e-06, + "loss": 0.0003, + "num_input_tokens_seen": 71319912, + "step": 105805 + }, + { + "epoch": 2.5849559035497034, + "grad_norm": 0.022945843636989594, + "learning_rate": 1.1149892202510347e-06, + "loss": 0.0219, + "num_input_tokens_seen": 71323048, + "step": 105810 + }, + { + "epoch": 2.58507805438155, + "grad_norm": 0.42688482999801636, + "learning_rate": 1.1149045079020755e-06, + "loss": 0.0004, + "num_input_tokens_seen": 71326248, + "step": 105815 + }, + { + "epoch": 2.5852002052133978, + "grad_norm": 0.21542878448963165, + "learning_rate": 1.1148197947174997e-06, + "loss": 0.049, + "num_input_tokens_seen": 71329448, + "step": 105820 + }, + { + "epoch": 2.5853223560452445, + "grad_norm": 0.04005388543009758, + "learning_rate": 1.1147350806979237e-06, + "loss": 0.0583, + "num_input_tokens_seen": 71332968, + "step": 105825 + }, + { + "epoch": 2.5854445068770917, + "grad_norm": 0.005568156484514475, + "learning_rate": 1.1146503658439632e-06, + "loss": 0.0449, + "num_input_tokens_seen": 71336360, + "step": 105830 + }, + { + "epoch": 2.585566657708939, + "grad_norm": 0.0002745167294051498, + "learning_rate": 1.1145656501562346e-06, + "loss": 0.0003, + "num_input_tokens_seen": 71340392, + "step": 105835 + }, + { + "epoch": 2.585688808540786, + "grad_norm": 0.038722485303878784, + "learning_rate": 1.1144809336353538e-06, + "loss": 0.0697, + "num_input_tokens_seen": 71343592, + "step": 105840 + }, + { + "epoch": 2.5858109593726333, + "grad_norm": 0.028439607471227646, + "learning_rate": 1.1143962162819367e-06, + "loss": 0.0399, + "num_input_tokens_seen": 71346856, + "step": 105845 + }, + { + "epoch": 2.5859331102044805, + "grad_norm": 0.014738436788320541, + "learning_rate": 1.1143114980965993e-06, + "loss": 0.0008, + "num_input_tokens_seen": 71349672, + "step": 105850 + }, + { + "epoch": 2.5860552610363277, + "grad_norm": 0.0705747976899147, + "learning_rate": 1.1142267790799581e-06, + "loss": 0.0481, + "num_input_tokens_seen": 71353192, + "step": 105855 + }, + { + "epoch": 2.586177411868175, + "grad_norm": 0.7925322651863098, + "learning_rate": 1.1141420592326292e-06, + "loss": 0.0546, + "num_input_tokens_seen": 71356264, + "step": 105860 + }, + { + "epoch": 2.586299562700022, + "grad_norm": 0.018346594646573067, + "learning_rate": 1.1140573385552285e-06, + "loss": 0.0001, + "num_input_tokens_seen": 71360040, + "step": 105865 + }, + { + "epoch": 2.5864217135318692, + "grad_norm": 0.09538698196411133, + "learning_rate": 1.113972617048372e-06, + "loss": 0.0003, + "num_input_tokens_seen": 71363304, + "step": 105870 + }, + { + "epoch": 2.5865438643637164, + "grad_norm": 0.22416944801807404, + "learning_rate": 1.1138878947126761e-06, + "loss": 0.038, + "num_input_tokens_seen": 71366824, + "step": 105875 + }, + { + "epoch": 2.5866660151955636, + "grad_norm": 0.07192140817642212, + "learning_rate": 1.113803171548757e-06, + "loss": 0.0006, + "num_input_tokens_seen": 71370536, + "step": 105880 + }, + { + "epoch": 2.586788166027411, + "grad_norm": 1.827846884727478, + "learning_rate": 1.1137184475572305e-06, + "loss": 0.0009, + "num_input_tokens_seen": 71373480, + "step": 105885 + }, + { + "epoch": 2.5869103168592575, + "grad_norm": 0.02320150099694729, + "learning_rate": 1.1136337227387126e-06, + "loss": 0.0489, + "num_input_tokens_seen": 71376616, + "step": 105890 + }, + { + "epoch": 2.587032467691105, + "grad_norm": 0.010031183250248432, + "learning_rate": 1.11354899709382e-06, + "loss": 0.0004, + "num_input_tokens_seen": 71380200, + "step": 105895 + }, + { + "epoch": 2.587154618522952, + "grad_norm": 0.026507128030061722, + "learning_rate": 1.1134642706231685e-06, + "loss": 0.0481, + "num_input_tokens_seen": 71383464, + "step": 105900 + }, + { + "epoch": 2.5872767693547996, + "grad_norm": 58.687049865722656, + "learning_rate": 1.1133795433273742e-06, + "loss": 0.0592, + "num_input_tokens_seen": 71386792, + "step": 105905 + }, + { + "epoch": 2.5873989201866463, + "grad_norm": 0.026232236996293068, + "learning_rate": 1.1132948152070535e-06, + "loss": 0.0425, + "num_input_tokens_seen": 71390312, + "step": 105910 + }, + { + "epoch": 2.5875210710184935, + "grad_norm": 18.692960739135742, + "learning_rate": 1.1132100862628222e-06, + "loss": 0.0389, + "num_input_tokens_seen": 71394088, + "step": 105915 + }, + { + "epoch": 2.5876432218503407, + "grad_norm": 0.4010098874568939, + "learning_rate": 1.1131253564952969e-06, + "loss": 0.0007, + "num_input_tokens_seen": 71398184, + "step": 105920 + }, + { + "epoch": 2.587765372682188, + "grad_norm": 0.20872338116168976, + "learning_rate": 1.1130406259050935e-06, + "loss": 0.0373, + "num_input_tokens_seen": 71401448, + "step": 105925 + }, + { + "epoch": 2.587887523514035, + "grad_norm": 0.05918128043413162, + "learning_rate": 1.1129558944928284e-06, + "loss": 0.0506, + "num_input_tokens_seen": 71404776, + "step": 105930 + }, + { + "epoch": 2.5880096743458822, + "grad_norm": 0.23181723058223724, + "learning_rate": 1.1128711622591173e-06, + "loss": 0.1251, + "num_input_tokens_seen": 71408168, + "step": 105935 + }, + { + "epoch": 2.5881318251777294, + "grad_norm": 19.287378311157227, + "learning_rate": 1.1127864292045773e-06, + "loss": 0.0879, + "num_input_tokens_seen": 71411432, + "step": 105940 + }, + { + "epoch": 2.5882539760095766, + "grad_norm": 0.06729073822498322, + "learning_rate": 1.1127016953298237e-06, + "loss": 0.0024, + "num_input_tokens_seen": 71414632, + "step": 105945 + }, + { + "epoch": 2.588376126841424, + "grad_norm": 0.15538254380226135, + "learning_rate": 1.112616960635473e-06, + "loss": 0.0003, + "num_input_tokens_seen": 71418152, + "step": 105950 + }, + { + "epoch": 2.588498277673271, + "grad_norm": 0.01470793318003416, + "learning_rate": 1.1125322251221416e-06, + "loss": 0.0001, + "num_input_tokens_seen": 71421224, + "step": 105955 + }, + { + "epoch": 2.588620428505118, + "grad_norm": 0.5175967216491699, + "learning_rate": 1.1124474887904457e-06, + "loss": 0.1095, + "num_input_tokens_seen": 71424552, + "step": 105960 + }, + { + "epoch": 2.5887425793369654, + "grad_norm": 0.01623247191309929, + "learning_rate": 1.1123627516410013e-06, + "loss": 0.0455, + "num_input_tokens_seen": 71427688, + "step": 105965 + }, + { + "epoch": 2.5888647301688126, + "grad_norm": 0.009030419401824474, + "learning_rate": 1.1122780136744247e-06, + "loss": 0.0001, + "num_input_tokens_seen": 71431080, + "step": 105970 + }, + { + "epoch": 2.5889868810006598, + "grad_norm": 0.1200258657336235, + "learning_rate": 1.1121932748913318e-06, + "loss": 0.0223, + "num_input_tokens_seen": 71434536, + "step": 105975 + }, + { + "epoch": 2.589109031832507, + "grad_norm": 0.030536944046616554, + "learning_rate": 1.11210853529234e-06, + "loss": 0.0356, + "num_input_tokens_seen": 71438120, + "step": 105980 + }, + { + "epoch": 2.5892311826643537, + "grad_norm": 0.0698750764131546, + "learning_rate": 1.1120237948780642e-06, + "loss": 0.0387, + "num_input_tokens_seen": 71441384, + "step": 105985 + }, + { + "epoch": 2.5893533334962013, + "grad_norm": 27.646520614624023, + "learning_rate": 1.1119390536491218e-06, + "loss": 0.0381, + "num_input_tokens_seen": 71444392, + "step": 105990 + }, + { + "epoch": 2.589475484328048, + "grad_norm": 34.76492691040039, + "learning_rate": 1.1118543116061282e-06, + "loss": 0.0539, + "num_input_tokens_seen": 71447464, + "step": 105995 + }, + { + "epoch": 2.5895976351598953, + "grad_norm": 0.014757978729903698, + "learning_rate": 1.1117695687497e-06, + "loss": 0.0538, + "num_input_tokens_seen": 71451240, + "step": 106000 + }, + { + "epoch": 2.5897197859917425, + "grad_norm": 0.20501114428043365, + "learning_rate": 1.1116848250804529e-06, + "loss": 0.0004, + "num_input_tokens_seen": 71454376, + "step": 106005 + }, + { + "epoch": 2.5898419368235897, + "grad_norm": 0.24411644041538239, + "learning_rate": 1.1116000805990043e-06, + "loss": 0.0004, + "num_input_tokens_seen": 71457576, + "step": 106010 + }, + { + "epoch": 2.589964087655437, + "grad_norm": 0.018468251451849937, + "learning_rate": 1.1115153353059698e-06, + "loss": 0.0003, + "num_input_tokens_seen": 71461032, + "step": 106015 + }, + { + "epoch": 2.590086238487284, + "grad_norm": 17.73522186279297, + "learning_rate": 1.1114305892019656e-06, + "loss": 0.0524, + "num_input_tokens_seen": 71464744, + "step": 106020 + }, + { + "epoch": 2.590208389319131, + "grad_norm": 0.05297902598977089, + "learning_rate": 1.1113458422876085e-06, + "loss": 0.0001, + "num_input_tokens_seen": 71468840, + "step": 106025 + }, + { + "epoch": 2.5903305401509784, + "grad_norm": 0.09020661562681198, + "learning_rate": 1.1112610945635145e-06, + "loss": 0.001, + "num_input_tokens_seen": 71472552, + "step": 106030 + }, + { + "epoch": 2.5904526909828256, + "grad_norm": 0.012612675316631794, + "learning_rate": 1.1111763460302994e-06, + "loss": 0.0379, + "num_input_tokens_seen": 71475496, + "step": 106035 + }, + { + "epoch": 2.590574841814673, + "grad_norm": 0.7227842211723328, + "learning_rate": 1.1110915966885805e-06, + "loss": 0.0496, + "num_input_tokens_seen": 71478888, + "step": 106040 + }, + { + "epoch": 2.59069699264652, + "grad_norm": 0.1919873207807541, + "learning_rate": 1.1110068465389735e-06, + "loss": 0.1119, + "num_input_tokens_seen": 71482664, + "step": 106045 + }, + { + "epoch": 2.590819143478367, + "grad_norm": 0.0008974491502158344, + "learning_rate": 1.1109220955820948e-06, + "loss": 0.0002, + "num_input_tokens_seen": 71485992, + "step": 106050 + }, + { + "epoch": 2.5909412943102144, + "grad_norm": 0.0659492015838623, + "learning_rate": 1.1108373438185608e-06, + "loss": 0.0002, + "num_input_tokens_seen": 71489320, + "step": 106055 + }, + { + "epoch": 2.5910634451420615, + "grad_norm": 10.360064506530762, + "learning_rate": 1.1107525912489878e-06, + "loss": 0.001, + "num_input_tokens_seen": 71492648, + "step": 106060 + }, + { + "epoch": 2.5911855959739087, + "grad_norm": 0.03574829176068306, + "learning_rate": 1.1106678378739922e-06, + "loss": 0.0004, + "num_input_tokens_seen": 71495592, + "step": 106065 + }, + { + "epoch": 2.5913077468057555, + "grad_norm": 0.020459705963730812, + "learning_rate": 1.11058308369419e-06, + "loss": 0.0003, + "num_input_tokens_seen": 71498856, + "step": 106070 + }, + { + "epoch": 2.591429897637603, + "grad_norm": 0.02584422007203102, + "learning_rate": 1.1104983287101982e-06, + "loss": 0.0003, + "num_input_tokens_seen": 71502248, + "step": 106075 + }, + { + "epoch": 2.59155204846945, + "grad_norm": 0.10443761199712753, + "learning_rate": 1.1104135729226329e-06, + "loss": 0.0001, + "num_input_tokens_seen": 71505512, + "step": 106080 + }, + { + "epoch": 2.5916741993012975, + "grad_norm": 0.01857997104525566, + "learning_rate": 1.1103288163321103e-06, + "loss": 0.0002, + "num_input_tokens_seen": 71508584, + "step": 106085 + }, + { + "epoch": 2.5917963501331442, + "grad_norm": 0.08549598604440689, + "learning_rate": 1.1102440589392468e-06, + "loss": 0.0564, + "num_input_tokens_seen": 71511720, + "step": 106090 + }, + { + "epoch": 2.5919185009649914, + "grad_norm": 0.0008379715145565569, + "learning_rate": 1.110159300744659e-06, + "loss": 0.0, + "num_input_tokens_seen": 71515112, + "step": 106095 + }, + { + "epoch": 2.5920406517968386, + "grad_norm": 63.77941131591797, + "learning_rate": 1.1100745417489629e-06, + "loss": 0.0367, + "num_input_tokens_seen": 71518568, + "step": 106100 + }, + { + "epoch": 2.592162802628686, + "grad_norm": 0.015878800302743912, + "learning_rate": 1.1099897819527755e-06, + "loss": 0.0002, + "num_input_tokens_seen": 71521960, + "step": 106105 + }, + { + "epoch": 2.592284953460533, + "grad_norm": 38.300418853759766, + "learning_rate": 1.1099050213567127e-06, + "loss": 0.1, + "num_input_tokens_seen": 71525032, + "step": 106110 + }, + { + "epoch": 2.59240710429238, + "grad_norm": 0.010526223108172417, + "learning_rate": 1.109820259961391e-06, + "loss": 0.0616, + "num_input_tokens_seen": 71528040, + "step": 106115 + }, + { + "epoch": 2.5925292551242274, + "grad_norm": 0.005474468693137169, + "learning_rate": 1.1097354977674267e-06, + "loss": 0.034, + "num_input_tokens_seen": 71531688, + "step": 106120 + }, + { + "epoch": 2.5926514059560746, + "grad_norm": 0.024080710485577583, + "learning_rate": 1.1096507347754364e-06, + "loss": 0.0806, + "num_input_tokens_seen": 71535144, + "step": 106125 + }, + { + "epoch": 2.5927735567879218, + "grad_norm": 0.011120163835585117, + "learning_rate": 1.1095659709860363e-06, + "loss": 0.0007, + "num_input_tokens_seen": 71538152, + "step": 106130 + }, + { + "epoch": 2.592895707619769, + "grad_norm": 0.10011861473321915, + "learning_rate": 1.1094812063998431e-06, + "loss": 0.0003, + "num_input_tokens_seen": 71541800, + "step": 106135 + }, + { + "epoch": 2.593017858451616, + "grad_norm": 36.78319549560547, + "learning_rate": 1.1093964410174733e-06, + "loss": 0.0434, + "num_input_tokens_seen": 71545064, + "step": 106140 + }, + { + "epoch": 2.5931400092834633, + "grad_norm": 0.029222454875707626, + "learning_rate": 1.1093116748395432e-06, + "loss": 0.0001, + "num_input_tokens_seen": 71548392, + "step": 106145 + }, + { + "epoch": 2.5932621601153105, + "grad_norm": 0.10176047682762146, + "learning_rate": 1.1092269078666689e-06, + "loss": 0.0001, + "num_input_tokens_seen": 71552168, + "step": 106150 + }, + { + "epoch": 2.5933843109471577, + "grad_norm": 0.03981450945138931, + "learning_rate": 1.1091421400994674e-06, + "loss": 0.0002, + "num_input_tokens_seen": 71555880, + "step": 106155 + }, + { + "epoch": 2.593506461779005, + "grad_norm": 0.033819641917943954, + "learning_rate": 1.1090573715385547e-06, + "loss": 0.0002, + "num_input_tokens_seen": 71559080, + "step": 106160 + }, + { + "epoch": 2.5936286126108516, + "grad_norm": 0.00025354253011755645, + "learning_rate": 1.1089726021845475e-06, + "loss": 0.0005, + "num_input_tokens_seen": 71562664, + "step": 106165 + }, + { + "epoch": 2.5937507634426993, + "grad_norm": 98.45695495605469, + "learning_rate": 1.1088878320380623e-06, + "loss": 0.1754, + "num_input_tokens_seen": 71565736, + "step": 106170 + }, + { + "epoch": 2.593872914274546, + "grad_norm": 0.010990033857524395, + "learning_rate": 1.1088030610997155e-06, + "loss": 0.0001, + "num_input_tokens_seen": 71568872, + "step": 106175 + }, + { + "epoch": 2.593995065106393, + "grad_norm": 0.08195897191762924, + "learning_rate": 1.1087182893701234e-06, + "loss": 0.0001, + "num_input_tokens_seen": 71571944, + "step": 106180 + }, + { + "epoch": 2.5941172159382404, + "grad_norm": 0.07107039541006088, + "learning_rate": 1.108633516849903e-06, + "loss": 0.0002, + "num_input_tokens_seen": 71575016, + "step": 106185 + }, + { + "epoch": 2.5942393667700876, + "grad_norm": 46.70288848876953, + "learning_rate": 1.1085487435396703e-06, + "loss": 0.0885, + "num_input_tokens_seen": 71578600, + "step": 106190 + }, + { + "epoch": 2.594361517601935, + "grad_norm": 17.831167221069336, + "learning_rate": 1.108463969440042e-06, + "loss": 0.1151, + "num_input_tokens_seen": 71582120, + "step": 106195 + }, + { + "epoch": 2.594483668433782, + "grad_norm": 0.004120448138564825, + "learning_rate": 1.1083791945516344e-06, + "loss": 0.0001, + "num_input_tokens_seen": 71585640, + "step": 106200 + }, + { + "epoch": 2.594605819265629, + "grad_norm": 0.009358244016766548, + "learning_rate": 1.108294418875064e-06, + "loss": 0.0824, + "num_input_tokens_seen": 71588648, + "step": 106205 + }, + { + "epoch": 2.5947279700974764, + "grad_norm": 11.974853515625, + "learning_rate": 1.1082096424109476e-06, + "loss": 0.0405, + "num_input_tokens_seen": 71592232, + "step": 106210 + }, + { + "epoch": 2.5948501209293235, + "grad_norm": 0.01048423070460558, + "learning_rate": 1.1081248651599017e-06, + "loss": 0.0002, + "num_input_tokens_seen": 71595624, + "step": 106215 + }, + { + "epoch": 2.5949722717611707, + "grad_norm": 0.014245947822928429, + "learning_rate": 1.1080400871225429e-06, + "loss": 0.0711, + "num_input_tokens_seen": 71598632, + "step": 106220 + }, + { + "epoch": 2.595094422593018, + "grad_norm": 0.20360706746578217, + "learning_rate": 1.1079553082994868e-06, + "loss": 0.0373, + "num_input_tokens_seen": 71602024, + "step": 106225 + }, + { + "epoch": 2.595216573424865, + "grad_norm": 0.06869331747293472, + "learning_rate": 1.1078705286913513e-06, + "loss": 0.0002, + "num_input_tokens_seen": 71605224, + "step": 106230 + }, + { + "epoch": 2.5953387242567123, + "grad_norm": 0.30428367853164673, + "learning_rate": 1.107785748298752e-06, + "loss": 0.001, + "num_input_tokens_seen": 71608104, + "step": 106235 + }, + { + "epoch": 2.5954608750885595, + "grad_norm": 0.010862280614674091, + "learning_rate": 1.1077009671223059e-06, + "loss": 0.0735, + "num_input_tokens_seen": 71611368, + "step": 106240 + }, + { + "epoch": 2.5955830259204067, + "grad_norm": 0.18098630011081696, + "learning_rate": 1.1076161851626294e-06, + "loss": 0.0959, + "num_input_tokens_seen": 71615080, + "step": 106245 + }, + { + "epoch": 2.5957051767522534, + "grad_norm": 0.3075055181980133, + "learning_rate": 1.107531402420339e-06, + "loss": 0.0983, + "num_input_tokens_seen": 71619176, + "step": 106250 + }, + { + "epoch": 2.595827327584101, + "grad_norm": 0.5143350958824158, + "learning_rate": 1.1074466188960515e-06, + "loss": 0.0505, + "num_input_tokens_seen": 71622568, + "step": 106255 + }, + { + "epoch": 2.595949478415948, + "grad_norm": 0.11659617722034454, + "learning_rate": 1.107361834590383e-06, + "loss": 0.0746, + "num_input_tokens_seen": 71625832, + "step": 106260 + }, + { + "epoch": 2.5960716292477954, + "grad_norm": 0.02491629868745804, + "learning_rate": 1.1072770495039506e-06, + "loss": 0.0001, + "num_input_tokens_seen": 71629352, + "step": 106265 + }, + { + "epoch": 2.596193780079642, + "grad_norm": 763.0545654296875, + "learning_rate": 1.1071922636373702e-06, + "loss": 0.1042, + "num_input_tokens_seen": 71632424, + "step": 106270 + }, + { + "epoch": 2.5963159309114894, + "grad_norm": 0.06559303402900696, + "learning_rate": 1.1071074769912593e-06, + "loss": 0.0007, + "num_input_tokens_seen": 71635688, + "step": 106275 + }, + { + "epoch": 2.5964380817433366, + "grad_norm": 0.2602338194847107, + "learning_rate": 1.107022689566234e-06, + "loss": 0.0956, + "num_input_tokens_seen": 71639016, + "step": 106280 + }, + { + "epoch": 2.5965602325751838, + "grad_norm": 0.012517737224698067, + "learning_rate": 1.1069379013629105e-06, + "loss": 0.0249, + "num_input_tokens_seen": 71642856, + "step": 106285 + }, + { + "epoch": 2.596682383407031, + "grad_norm": 0.03258126974105835, + "learning_rate": 1.106853112381906e-06, + "loss": 0.0006, + "num_input_tokens_seen": 71645800, + "step": 106290 + }, + { + "epoch": 2.596804534238878, + "grad_norm": 0.05590072646737099, + "learning_rate": 1.1067683226238372e-06, + "loss": 0.082, + "num_input_tokens_seen": 71649000, + "step": 106295 + }, + { + "epoch": 2.5969266850707253, + "grad_norm": 0.03783603757619858, + "learning_rate": 1.1066835320893204e-06, + "loss": 0.1012, + "num_input_tokens_seen": 71652136, + "step": 106300 + }, + { + "epoch": 2.5970488359025725, + "grad_norm": 0.008168010972440243, + "learning_rate": 1.1065987407789724e-06, + "loss": 0.0006, + "num_input_tokens_seen": 71655272, + "step": 106305 + }, + { + "epoch": 2.5971709867344197, + "grad_norm": 0.15477074682712555, + "learning_rate": 1.1065139486934092e-06, + "loss": 0.0003, + "num_input_tokens_seen": 71658728, + "step": 106310 + }, + { + "epoch": 2.597293137566267, + "grad_norm": 13.059903144836426, + "learning_rate": 1.106429155833248e-06, + "loss": 0.0397, + "num_input_tokens_seen": 71661928, + "step": 106315 + }, + { + "epoch": 2.597415288398114, + "grad_norm": 0.013284175656735897, + "learning_rate": 1.1063443621991056e-06, + "loss": 0.1028, + "num_input_tokens_seen": 71666088, + "step": 106320 + }, + { + "epoch": 2.5975374392299613, + "grad_norm": 0.03173963353037834, + "learning_rate": 1.1062595677915983e-06, + "loss": 0.0001, + "num_input_tokens_seen": 71669416, + "step": 106325 + }, + { + "epoch": 2.5976595900618085, + "grad_norm": 1.1281355619430542, + "learning_rate": 1.1061747726113427e-06, + "loss": 0.0008, + "num_input_tokens_seen": 71672616, + "step": 106330 + }, + { + "epoch": 2.597781740893655, + "grad_norm": 0.009192223660647869, + "learning_rate": 1.1060899766589558e-06, + "loss": 0.0001, + "num_input_tokens_seen": 71675560, + "step": 106335 + }, + { + "epoch": 2.597903891725503, + "grad_norm": 0.3092971444129944, + "learning_rate": 1.1060051799350538e-06, + "loss": 0.0002, + "num_input_tokens_seen": 71678952, + "step": 106340 + }, + { + "epoch": 2.5980260425573496, + "grad_norm": 0.025217989459633827, + "learning_rate": 1.105920382440254e-06, + "loss": 0.0003, + "num_input_tokens_seen": 71682664, + "step": 106345 + }, + { + "epoch": 2.5981481933891972, + "grad_norm": 0.022733714431524277, + "learning_rate": 1.1058355841751723e-06, + "loss": 0.0007, + "num_input_tokens_seen": 71685480, + "step": 106350 + }, + { + "epoch": 2.598270344221044, + "grad_norm": 0.054993562400341034, + "learning_rate": 1.105750785140426e-06, + "loss": 0.0004, + "num_input_tokens_seen": 71689320, + "step": 106355 + }, + { + "epoch": 2.598392495052891, + "grad_norm": 0.02195347473025322, + "learning_rate": 1.1056659853366315e-06, + "loss": 0.0632, + "num_input_tokens_seen": 71692584, + "step": 106360 + }, + { + "epoch": 2.5985146458847383, + "grad_norm": 0.00801927875727415, + "learning_rate": 1.1055811847644057e-06, + "loss": 0.0002, + "num_input_tokens_seen": 71696168, + "step": 106365 + }, + { + "epoch": 2.5986367967165855, + "grad_norm": 0.21472963690757751, + "learning_rate": 1.105496383424365e-06, + "loss": 0.0345, + "num_input_tokens_seen": 71699560, + "step": 106370 + }, + { + "epoch": 2.5987589475484327, + "grad_norm": 0.1342368721961975, + "learning_rate": 1.1054115813171262e-06, + "loss": 0.1055, + "num_input_tokens_seen": 71703080, + "step": 106375 + }, + { + "epoch": 2.59888109838028, + "grad_norm": 0.04510444775223732, + "learning_rate": 1.1053267784433057e-06, + "loss": 0.0006, + "num_input_tokens_seen": 71706344, + "step": 106380 + }, + { + "epoch": 2.599003249212127, + "grad_norm": 0.01901618391275406, + "learning_rate": 1.105241974803521e-06, + "loss": 0.0001, + "num_input_tokens_seen": 71709480, + "step": 106385 + }, + { + "epoch": 2.5991254000439743, + "grad_norm": 0.27060818672180176, + "learning_rate": 1.105157170398388e-06, + "loss": 0.1092, + "num_input_tokens_seen": 71713064, + "step": 106390 + }, + { + "epoch": 2.5992475508758215, + "grad_norm": 30.31696128845215, + "learning_rate": 1.105072365228524e-06, + "loss": 0.0829, + "num_input_tokens_seen": 71716264, + "step": 106395 + }, + { + "epoch": 2.5993697017076687, + "grad_norm": 0.026632215827703476, + "learning_rate": 1.1049875592945454e-06, + "loss": 0.0351, + "num_input_tokens_seen": 71719336, + "step": 106400 + }, + { + "epoch": 2.599491852539516, + "grad_norm": 0.02794702723622322, + "learning_rate": 1.1049027525970691e-06, + "loss": 0.0424, + "num_input_tokens_seen": 71722792, + "step": 106405 + }, + { + "epoch": 2.599614003371363, + "grad_norm": 0.04424641281366348, + "learning_rate": 1.104817945136712e-06, + "loss": 0.1048, + "num_input_tokens_seen": 71725992, + "step": 106410 + }, + { + "epoch": 2.5997361542032102, + "grad_norm": 0.14470571279525757, + "learning_rate": 1.1047331369140901e-06, + "loss": 0.0006, + "num_input_tokens_seen": 71729832, + "step": 106415 + }, + { + "epoch": 2.5998583050350574, + "grad_norm": 0.006976842414587736, + "learning_rate": 1.1046483279298212e-06, + "loss": 0.0009, + "num_input_tokens_seen": 71733416, + "step": 106420 + }, + { + "epoch": 2.5999804558669046, + "grad_norm": 0.07952874898910522, + "learning_rate": 1.1045635181845212e-06, + "loss": 0.0681, + "num_input_tokens_seen": 71736872, + "step": 106425 + }, + { + "epoch": 2.6001026066987514, + "grad_norm": 0.03277485817670822, + "learning_rate": 1.104478707678807e-06, + "loss": 0.053, + "num_input_tokens_seen": 71741096, + "step": 106430 + }, + { + "epoch": 2.600224757530599, + "grad_norm": 0.6419253349304199, + "learning_rate": 1.104393896413296e-06, + "loss": 0.0423, + "num_input_tokens_seen": 71744616, + "step": 106435 + }, + { + "epoch": 2.6003469083624458, + "grad_norm": 94.72898864746094, + "learning_rate": 1.104309084388604e-06, + "loss": 0.0314, + "num_input_tokens_seen": 71748072, + "step": 106440 + }, + { + "epoch": 2.6004690591942934, + "grad_norm": 0.23949620127677917, + "learning_rate": 1.1042242716053486e-06, + "loss": 0.0395, + "num_input_tokens_seen": 71751656, + "step": 106445 + }, + { + "epoch": 2.60059121002614, + "grad_norm": 0.09659047424793243, + "learning_rate": 1.1041394580641464e-06, + "loss": 0.0008, + "num_input_tokens_seen": 71754984, + "step": 106450 + }, + { + "epoch": 2.6007133608579873, + "grad_norm": 14.89390754699707, + "learning_rate": 1.104054643765614e-06, + "loss": 0.0907, + "num_input_tokens_seen": 71759144, + "step": 106455 + }, + { + "epoch": 2.6008355116898345, + "grad_norm": 0.34447380900382996, + "learning_rate": 1.103969828710368e-06, + "loss": 0.139, + "num_input_tokens_seen": 71762216, + "step": 106460 + }, + { + "epoch": 2.6009576625216817, + "grad_norm": 0.06632810831069946, + "learning_rate": 1.1038850128990255e-06, + "loss": 0.0731, + "num_input_tokens_seen": 71765288, + "step": 106465 + }, + { + "epoch": 2.601079813353529, + "grad_norm": 0.022017555311322212, + "learning_rate": 1.1038001963322031e-06, + "loss": 0.0346, + "num_input_tokens_seen": 71768616, + "step": 106470 + }, + { + "epoch": 2.601201964185376, + "grad_norm": 0.053672295063734055, + "learning_rate": 1.103715379010518e-06, + "loss": 0.1645, + "num_input_tokens_seen": 71771944, + "step": 106475 + }, + { + "epoch": 2.6013241150172233, + "grad_norm": 0.08040033280849457, + "learning_rate": 1.103630560934587e-06, + "loss": 0.0008, + "num_input_tokens_seen": 71774824, + "step": 106480 + }, + { + "epoch": 2.6014462658490705, + "grad_norm": 0.009018289856612682, + "learning_rate": 1.1035457421050262e-06, + "loss": 0.0003, + "num_input_tokens_seen": 71777704, + "step": 106485 + }, + { + "epoch": 2.6015684166809177, + "grad_norm": 38.83277130126953, + "learning_rate": 1.1034609225224531e-06, + "loss": 0.0582, + "num_input_tokens_seen": 71780904, + "step": 106490 + }, + { + "epoch": 2.601690567512765, + "grad_norm": 0.023919718340039253, + "learning_rate": 1.1033761021874844e-06, + "loss": 0.057, + "num_input_tokens_seen": 71784488, + "step": 106495 + }, + { + "epoch": 2.601812718344612, + "grad_norm": 0.6867104172706604, + "learning_rate": 1.103291281100737e-06, + "loss": 0.0008, + "num_input_tokens_seen": 71787816, + "step": 106500 + }, + { + "epoch": 2.601934869176459, + "grad_norm": 15.458674430847168, + "learning_rate": 1.1032064592628275e-06, + "loss": 0.0037, + "num_input_tokens_seen": 71791144, + "step": 106505 + }, + { + "epoch": 2.6020570200083064, + "grad_norm": 0.11199041455984116, + "learning_rate": 1.1031216366743727e-06, + "loss": 0.0006, + "num_input_tokens_seen": 71794536, + "step": 106510 + }, + { + "epoch": 2.602179170840153, + "grad_norm": 0.729247510433197, + "learning_rate": 1.1030368133359897e-06, + "loss": 0.0006, + "num_input_tokens_seen": 71797992, + "step": 106515 + }, + { + "epoch": 2.602301321672001, + "grad_norm": 0.02848580852150917, + "learning_rate": 1.1029519892482953e-06, + "loss": 0.0132, + "num_input_tokens_seen": 71801320, + "step": 106520 + }, + { + "epoch": 2.6024234725038475, + "grad_norm": 0.11427497118711472, + "learning_rate": 1.1028671644119066e-06, + "loss": 0.0659, + "num_input_tokens_seen": 71804712, + "step": 106525 + }, + { + "epoch": 2.602545623335695, + "grad_norm": 0.043485596776008606, + "learning_rate": 1.1027823388274397e-06, + "loss": 0.0356, + "num_input_tokens_seen": 71807976, + "step": 106530 + }, + { + "epoch": 2.602667774167542, + "grad_norm": 68.4632339477539, + "learning_rate": 1.1026975124955123e-06, + "loss": 0.0395, + "num_input_tokens_seen": 71811368, + "step": 106535 + }, + { + "epoch": 2.602789924999389, + "grad_norm": 0.018174318596720695, + "learning_rate": 1.1026126854167408e-06, + "loss": 0.0551, + "num_input_tokens_seen": 71814824, + "step": 106540 + }, + { + "epoch": 2.6029120758312363, + "grad_norm": 0.1868731528520584, + "learning_rate": 1.1025278575917425e-06, + "loss": 0.105, + "num_input_tokens_seen": 71818536, + "step": 106545 + }, + { + "epoch": 2.6030342266630835, + "grad_norm": 1.8846322298049927, + "learning_rate": 1.102443029021134e-06, + "loss": 0.1427, + "num_input_tokens_seen": 71821608, + "step": 106550 + }, + { + "epoch": 2.6031563774949307, + "grad_norm": 0.01644289679825306, + "learning_rate": 1.102358199705532e-06, + "loss": 0.0354, + "num_input_tokens_seen": 71825000, + "step": 106555 + }, + { + "epoch": 2.603278528326778, + "grad_norm": 1.783953070640564, + "learning_rate": 1.102273369645554e-06, + "loss": 0.1585, + "num_input_tokens_seen": 71828072, + "step": 106560 + }, + { + "epoch": 2.603400679158625, + "grad_norm": 0.03290699049830437, + "learning_rate": 1.1021885388418164e-06, + "loss": 0.0227, + "num_input_tokens_seen": 71831400, + "step": 106565 + }, + { + "epoch": 2.6035228299904722, + "grad_norm": 0.018067052587866783, + "learning_rate": 1.1021037072949362e-06, + "loss": 0.0262, + "num_input_tokens_seen": 71834344, + "step": 106570 + }, + { + "epoch": 2.6036449808223194, + "grad_norm": 0.012045308016240597, + "learning_rate": 1.1020188750055304e-06, + "loss": 0.0009, + "num_input_tokens_seen": 71837736, + "step": 106575 + }, + { + "epoch": 2.6037671316541666, + "grad_norm": 0.3654814660549164, + "learning_rate": 1.1019340419742157e-06, + "loss": 0.046, + "num_input_tokens_seen": 71841256, + "step": 106580 + }, + { + "epoch": 2.603889282486014, + "grad_norm": 0.29941239953041077, + "learning_rate": 1.1018492082016095e-06, + "loss": 0.0006, + "num_input_tokens_seen": 71844264, + "step": 106585 + }, + { + "epoch": 2.604011433317861, + "grad_norm": 0.053947120904922485, + "learning_rate": 1.1017643736883284e-06, + "loss": 0.0029, + "num_input_tokens_seen": 71847912, + "step": 106590 + }, + { + "epoch": 2.604133584149708, + "grad_norm": 0.06559449434280396, + "learning_rate": 1.1016795384349892e-06, + "loss": 0.0539, + "num_input_tokens_seen": 71851176, + "step": 106595 + }, + { + "epoch": 2.6042557349815554, + "grad_norm": 0.10444041341543198, + "learning_rate": 1.1015947024422094e-06, + "loss": 0.0338, + "num_input_tokens_seen": 71854696, + "step": 106600 + }, + { + "epoch": 2.6043778858134026, + "grad_norm": 0.014229257591068745, + "learning_rate": 1.1015098657106054e-06, + "loss": 0.0005, + "num_input_tokens_seen": 71857896, + "step": 106605 + }, + { + "epoch": 2.6045000366452493, + "grad_norm": 0.0054950471967458725, + "learning_rate": 1.1014250282407946e-06, + "loss": 0.0001, + "num_input_tokens_seen": 71861032, + "step": 106610 + }, + { + "epoch": 2.604622187477097, + "grad_norm": 0.32495608925819397, + "learning_rate": 1.1013401900333937e-06, + "loss": 0.0005, + "num_input_tokens_seen": 71864104, + "step": 106615 + }, + { + "epoch": 2.6047443383089437, + "grad_norm": 0.012543436139822006, + "learning_rate": 1.1012553510890192e-06, + "loss": 0.1128, + "num_input_tokens_seen": 71867176, + "step": 106620 + }, + { + "epoch": 2.604866489140791, + "grad_norm": 0.019681131467223167, + "learning_rate": 1.101170511408289e-06, + "loss": 0.0007, + "num_input_tokens_seen": 71870120, + "step": 106625 + }, + { + "epoch": 2.604988639972638, + "grad_norm": 0.2520747184753418, + "learning_rate": 1.1010856709918193e-06, + "loss": 0.2105, + "num_input_tokens_seen": 71873704, + "step": 106630 + }, + { + "epoch": 2.6051107908044853, + "grad_norm": 0.009804549627006054, + "learning_rate": 1.1010008298402275e-06, + "loss": 0.0681, + "num_input_tokens_seen": 71876968, + "step": 106635 + }, + { + "epoch": 2.6052329416363325, + "grad_norm": 0.005678892135620117, + "learning_rate": 1.1009159879541307e-06, + "loss": 0.0504, + "num_input_tokens_seen": 71880104, + "step": 106640 + }, + { + "epoch": 2.6053550924681796, + "grad_norm": 21.121488571166992, + "learning_rate": 1.1008311453341455e-06, + "loss": 0.0761, + "num_input_tokens_seen": 71883816, + "step": 106645 + }, + { + "epoch": 2.605477243300027, + "grad_norm": 0.023501016199588776, + "learning_rate": 1.1007463019808892e-06, + "loss": 0.0003, + "num_input_tokens_seen": 71887208, + "step": 106650 + }, + { + "epoch": 2.605599394131874, + "grad_norm": 0.01757156103849411, + "learning_rate": 1.1006614578949786e-06, + "loss": 0.0006, + "num_input_tokens_seen": 71890280, + "step": 106655 + }, + { + "epoch": 2.605721544963721, + "grad_norm": 0.06994085013866425, + "learning_rate": 1.1005766130770312e-06, + "loss": 0.0002, + "num_input_tokens_seen": 71893992, + "step": 106660 + }, + { + "epoch": 2.6058436957955684, + "grad_norm": 0.020432811230421066, + "learning_rate": 1.100491767527663e-06, + "loss": 0.0005, + "num_input_tokens_seen": 71896936, + "step": 106665 + }, + { + "epoch": 2.6059658466274156, + "grad_norm": 0.006643875502049923, + "learning_rate": 1.1004069212474921e-06, + "loss": 0.0567, + "num_input_tokens_seen": 71900264, + "step": 106670 + }, + { + "epoch": 2.606087997459263, + "grad_norm": 0.0019912130665034056, + "learning_rate": 1.1003220742371348e-06, + "loss": 0.0003, + "num_input_tokens_seen": 71903848, + "step": 106675 + }, + { + "epoch": 2.60621014829111, + "grad_norm": 0.0037333867512643337, + "learning_rate": 1.1002372264972083e-06, + "loss": 0.0285, + "num_input_tokens_seen": 71907368, + "step": 106680 + }, + { + "epoch": 2.606332299122957, + "grad_norm": 0.4669528603553772, + "learning_rate": 1.1001523780283302e-06, + "loss": 0.0645, + "num_input_tokens_seen": 71910632, + "step": 106685 + }, + { + "epoch": 2.6064544499548044, + "grad_norm": 0.01946832798421383, + "learning_rate": 1.1000675288311166e-06, + "loss": 0.0915, + "num_input_tokens_seen": 71914152, + "step": 106690 + }, + { + "epoch": 2.606576600786651, + "grad_norm": 0.3970881998538971, + "learning_rate": 1.099982678906185e-06, + "loss": 0.0004, + "num_input_tokens_seen": 71917544, + "step": 106695 + }, + { + "epoch": 2.6066987516184987, + "grad_norm": 0.4034290611743927, + "learning_rate": 1.0998978282541527e-06, + "loss": 0.0003, + "num_input_tokens_seen": 71920616, + "step": 106700 + }, + { + "epoch": 2.6068209024503455, + "grad_norm": 0.18643775582313538, + "learning_rate": 1.0998129768756365e-06, + "loss": 0.0006, + "num_input_tokens_seen": 71923496, + "step": 106705 + }, + { + "epoch": 2.606943053282193, + "grad_norm": 0.07174662500619888, + "learning_rate": 1.0997281247712536e-06, + "loss": 0.0002, + "num_input_tokens_seen": 71926760, + "step": 106710 + }, + { + "epoch": 2.60706520411404, + "grad_norm": 0.06458175182342529, + "learning_rate": 1.0996432719416209e-06, + "loss": 0.0627, + "num_input_tokens_seen": 71930024, + "step": 106715 + }, + { + "epoch": 2.607187354945887, + "grad_norm": 0.034823257476091385, + "learning_rate": 1.0995584183873553e-06, + "loss": 0.055, + "num_input_tokens_seen": 71933096, + "step": 106720 + }, + { + "epoch": 2.6073095057777342, + "grad_norm": 55.86451721191406, + "learning_rate": 1.0994735641090742e-06, + "loss": 0.1001, + "num_input_tokens_seen": 71936552, + "step": 106725 + }, + { + "epoch": 2.6074316566095814, + "grad_norm": 0.013018675148487091, + "learning_rate": 1.0993887091073947e-06, + "loss": 0.0001, + "num_input_tokens_seen": 71939752, + "step": 106730 + }, + { + "epoch": 2.6075538074414286, + "grad_norm": 11.976505279541016, + "learning_rate": 1.0993038533829338e-06, + "loss": 0.0597, + "num_input_tokens_seen": 71943208, + "step": 106735 + }, + { + "epoch": 2.607675958273276, + "grad_norm": 0.017002243548631668, + "learning_rate": 1.0992189969363084e-06, + "loss": 0.1481, + "num_input_tokens_seen": 71946408, + "step": 106740 + }, + { + "epoch": 2.607798109105123, + "grad_norm": 0.1695324033498764, + "learning_rate": 1.099134139768136e-06, + "loss": 0.0688, + "num_input_tokens_seen": 71949864, + "step": 106745 + }, + { + "epoch": 2.60792025993697, + "grad_norm": 18.079139709472656, + "learning_rate": 1.0990492818790331e-06, + "loss": 0.0416, + "num_input_tokens_seen": 71952872, + "step": 106750 + }, + { + "epoch": 2.6080424107688174, + "grad_norm": 0.03448851406574249, + "learning_rate": 1.0989644232696174e-06, + "loss": 0.0018, + "num_input_tokens_seen": 71955944, + "step": 106755 + }, + { + "epoch": 2.6081645616006646, + "grad_norm": 0.03426092118024826, + "learning_rate": 1.0988795639405056e-06, + "loss": 0.0482, + "num_input_tokens_seen": 71959528, + "step": 106760 + }, + { + "epoch": 2.6082867124325118, + "grad_norm": 33.893646240234375, + "learning_rate": 1.0987947038923155e-06, + "loss": 0.0607, + "num_input_tokens_seen": 71962856, + "step": 106765 + }, + { + "epoch": 2.608408863264359, + "grad_norm": 0.21278434991836548, + "learning_rate": 1.0987098431256637e-06, + "loss": 0.0006, + "num_input_tokens_seen": 71966312, + "step": 106770 + }, + { + "epoch": 2.608531014096206, + "grad_norm": 0.15520715713500977, + "learning_rate": 1.098624981641167e-06, + "loss": 0.0003, + "num_input_tokens_seen": 71969384, + "step": 106775 + }, + { + "epoch": 2.6086531649280533, + "grad_norm": 0.04019223898649216, + "learning_rate": 1.0985401194394431e-06, + "loss": 0.0861, + "num_input_tokens_seen": 71973480, + "step": 106780 + }, + { + "epoch": 2.6087753157599005, + "grad_norm": 0.09767397493124008, + "learning_rate": 1.0984552565211087e-06, + "loss": 0.0238, + "num_input_tokens_seen": 71976744, + "step": 106785 + }, + { + "epoch": 2.6088974665917473, + "grad_norm": 0.013539946638047695, + "learning_rate": 1.0983703928867813e-06, + "loss": 0.0192, + "num_input_tokens_seen": 71979880, + "step": 106790 + }, + { + "epoch": 2.609019617423595, + "grad_norm": 0.059728436172008514, + "learning_rate": 1.098285528537078e-06, + "loss": 0.0003, + "num_input_tokens_seen": 71983144, + "step": 106795 + }, + { + "epoch": 2.6091417682554416, + "grad_norm": 0.02229916676878929, + "learning_rate": 1.098200663472616e-06, + "loss": 0.0002, + "num_input_tokens_seen": 71986536, + "step": 106800 + }, + { + "epoch": 2.609263919087289, + "grad_norm": 0.042197611182928085, + "learning_rate": 1.0981157976940124e-06, + "loss": 0.0006, + "num_input_tokens_seen": 71990184, + "step": 106805 + }, + { + "epoch": 2.609386069919136, + "grad_norm": 0.20854344964027405, + "learning_rate": 1.0980309312018841e-06, + "loss": 0.0002, + "num_input_tokens_seen": 71993320, + "step": 106810 + }, + { + "epoch": 2.609508220750983, + "grad_norm": 18.07057762145996, + "learning_rate": 1.0979460639968485e-06, + "loss": 0.0633, + "num_input_tokens_seen": 71996456, + "step": 106815 + }, + { + "epoch": 2.6096303715828304, + "grad_norm": 0.38547414541244507, + "learning_rate": 1.097861196079523e-06, + "loss": 0.0005, + "num_input_tokens_seen": 72000232, + "step": 106820 + }, + { + "epoch": 2.6097525224146776, + "grad_norm": 0.01530816126614809, + "learning_rate": 1.0977763274505244e-06, + "loss": 0.0001, + "num_input_tokens_seen": 72003368, + "step": 106825 + }, + { + "epoch": 2.609874673246525, + "grad_norm": 0.022470971569418907, + "learning_rate": 1.09769145811047e-06, + "loss": 0.0448, + "num_input_tokens_seen": 72006568, + "step": 106830 + }, + { + "epoch": 2.609996824078372, + "grad_norm": 0.01738395355641842, + "learning_rate": 1.0976065880599772e-06, + "loss": 0.0073, + "num_input_tokens_seen": 72009896, + "step": 106835 + }, + { + "epoch": 2.610118974910219, + "grad_norm": 0.004830238409340382, + "learning_rate": 1.097521717299663e-06, + "loss": 0.0017, + "num_input_tokens_seen": 72013352, + "step": 106840 + }, + { + "epoch": 2.6102411257420663, + "grad_norm": 0.44913017749786377, + "learning_rate": 1.0974368458301444e-06, + "loss": 0.0055, + "num_input_tokens_seen": 72017320, + "step": 106845 + }, + { + "epoch": 2.6103632765739135, + "grad_norm": 0.005423199385404587, + "learning_rate": 1.0973519736520392e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72020520, + "step": 106850 + }, + { + "epoch": 2.6104854274057607, + "grad_norm": 0.09631983935832977, + "learning_rate": 1.0972671007659642e-06, + "loss": 0.0536, + "num_input_tokens_seen": 72023848, + "step": 106855 + }, + { + "epoch": 2.610607578237608, + "grad_norm": 0.18404462933540344, + "learning_rate": 1.0971822271725367e-06, + "loss": 0.0004, + "num_input_tokens_seen": 72027112, + "step": 106860 + }, + { + "epoch": 2.610729729069455, + "grad_norm": 0.028065208345651627, + "learning_rate": 1.0970973528723736e-06, + "loss": 0.0671, + "num_input_tokens_seen": 72030632, + "step": 106865 + }, + { + "epoch": 2.6108518799013023, + "grad_norm": 0.21517078578472137, + "learning_rate": 1.0970124778660928e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72034280, + "step": 106870 + }, + { + "epoch": 2.610974030733149, + "grad_norm": 0.14804969727993011, + "learning_rate": 1.096927602154311e-06, + "loss": 0.0008, + "num_input_tokens_seen": 72037928, + "step": 106875 + }, + { + "epoch": 2.6110961815649967, + "grad_norm": 0.0020192775409668684, + "learning_rate": 1.0968427257376455e-06, + "loss": 0.0001, + "num_input_tokens_seen": 72041128, + "step": 106880 + }, + { + "epoch": 2.6112183323968434, + "grad_norm": 0.004779881797730923, + "learning_rate": 1.096757848616714e-06, + "loss": 0.1136, + "num_input_tokens_seen": 72045288, + "step": 106885 + }, + { + "epoch": 2.611340483228691, + "grad_norm": 0.315485417842865, + "learning_rate": 1.096672970792133e-06, + "loss": 0.0686, + "num_input_tokens_seen": 72048680, + "step": 106890 + }, + { + "epoch": 2.611462634060538, + "grad_norm": 0.8199894428253174, + "learning_rate": 1.0965880922645204e-06, + "loss": 0.0818, + "num_input_tokens_seen": 72052328, + "step": 106895 + }, + { + "epoch": 2.611584784892385, + "grad_norm": 0.37913942337036133, + "learning_rate": 1.0965032130344932e-06, + "loss": 0.0543, + "num_input_tokens_seen": 72055592, + "step": 106900 + }, + { + "epoch": 2.611706935724232, + "grad_norm": 0.049211300909519196, + "learning_rate": 1.0964183331026686e-06, + "loss": 0.039, + "num_input_tokens_seen": 72058920, + "step": 106905 + }, + { + "epoch": 2.6118290865560794, + "grad_norm": 12.023048400878906, + "learning_rate": 1.0963334524696637e-06, + "loss": 0.038, + "num_input_tokens_seen": 72062568, + "step": 106910 + }, + { + "epoch": 2.6119512373879266, + "grad_norm": 0.03025723248720169, + "learning_rate": 1.0962485711360963e-06, + "loss": 0.0005, + "num_input_tokens_seen": 72065960, + "step": 106915 + }, + { + "epoch": 2.6120733882197738, + "grad_norm": 0.030587896704673767, + "learning_rate": 1.0961636891025836e-06, + "loss": 0.0006, + "num_input_tokens_seen": 72069352, + "step": 106920 + }, + { + "epoch": 2.612195539051621, + "grad_norm": 0.006601739674806595, + "learning_rate": 1.0960788063697425e-06, + "loss": 0.0421, + "num_input_tokens_seen": 72072424, + "step": 106925 + }, + { + "epoch": 2.612317689883468, + "grad_norm": 0.10948925465345383, + "learning_rate": 1.0959939229381906e-06, + "loss": 0.0334, + "num_input_tokens_seen": 72075944, + "step": 106930 + }, + { + "epoch": 2.6124398407153153, + "grad_norm": 0.01464875414967537, + "learning_rate": 1.0959090388085448e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72079464, + "step": 106935 + }, + { + "epoch": 2.6125619915471625, + "grad_norm": 0.022532816976308823, + "learning_rate": 1.0958241539814226e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72082408, + "step": 106940 + }, + { + "epoch": 2.6126841423790097, + "grad_norm": 0.08455182611942291, + "learning_rate": 1.0957392684574415e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72085928, + "step": 106945 + }, + { + "epoch": 2.612806293210857, + "grad_norm": 0.02040986716747284, + "learning_rate": 1.095654382237219e-06, + "loss": 0.0555, + "num_input_tokens_seen": 72089000, + "step": 106950 + }, + { + "epoch": 2.612928444042704, + "grad_norm": 35.983070373535156, + "learning_rate": 1.0955694953213718e-06, + "loss": 0.0447, + "num_input_tokens_seen": 72092456, + "step": 106955 + }, + { + "epoch": 2.613050594874551, + "grad_norm": 0.011616901494562626, + "learning_rate": 1.0954846077105178e-06, + "loss": 0.0293, + "num_input_tokens_seen": 72095720, + "step": 106960 + }, + { + "epoch": 2.6131727457063985, + "grad_norm": 0.0024410607293248177, + "learning_rate": 1.0953997194052738e-06, + "loss": 0.0001, + "num_input_tokens_seen": 72100136, + "step": 106965 + }, + { + "epoch": 2.613294896538245, + "grad_norm": 0.21549752354621887, + "learning_rate": 1.0953148304062575e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72103208, + "step": 106970 + }, + { + "epoch": 2.613417047370093, + "grad_norm": 27.977876663208008, + "learning_rate": 1.095229940714086e-06, + "loss": 0.0588, + "num_input_tokens_seen": 72106856, + "step": 106975 + }, + { + "epoch": 2.6135391982019396, + "grad_norm": 0.007714892737567425, + "learning_rate": 1.0951450503293769e-06, + "loss": 0.1241, + "num_input_tokens_seen": 72110568, + "step": 106980 + }, + { + "epoch": 2.6136613490337868, + "grad_norm": 0.7092371582984924, + "learning_rate": 1.0950601592527473e-06, + "loss": 0.0004, + "num_input_tokens_seen": 72113512, + "step": 106985 + }, + { + "epoch": 2.613783499865634, + "grad_norm": 0.025556981563568115, + "learning_rate": 1.0949752674848144e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72117032, + "step": 106990 + }, + { + "epoch": 2.613905650697481, + "grad_norm": 0.08398312330245972, + "learning_rate": 1.0948903750261962e-06, + "loss": 0.0003, + "num_input_tokens_seen": 72120360, + "step": 106995 + }, + { + "epoch": 2.6140278015293283, + "grad_norm": 0.1594977229833603, + "learning_rate": 1.0948054818775094e-06, + "loss": 0.0755, + "num_input_tokens_seen": 72123688, + "step": 107000 + }, + { + "epoch": 2.6141499523611755, + "grad_norm": 0.054152004420757294, + "learning_rate": 1.094720588039372e-06, + "loss": 0.0001, + "num_input_tokens_seen": 72126952, + "step": 107005 + }, + { + "epoch": 2.6142721031930227, + "grad_norm": 0.09151487797498703, + "learning_rate": 1.0946356935124006e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72130472, + "step": 107010 + }, + { + "epoch": 2.61439425402487, + "grad_norm": 14.935052871704102, + "learning_rate": 1.0945507982972134e-06, + "loss": 0.1374, + "num_input_tokens_seen": 72134248, + "step": 107015 + }, + { + "epoch": 2.614516404856717, + "grad_norm": 0.003952572587877512, + "learning_rate": 1.0944659023944269e-06, + "loss": 0.0003, + "num_input_tokens_seen": 72137832, + "step": 107020 + }, + { + "epoch": 2.6146385556885643, + "grad_norm": 0.05129333212971687, + "learning_rate": 1.0943810058046591e-06, + "loss": 0.0468, + "num_input_tokens_seen": 72141096, + "step": 107025 + }, + { + "epoch": 2.6147607065204115, + "grad_norm": 0.030844559893012047, + "learning_rate": 1.0942961085285275e-06, + "loss": 0.0003, + "num_input_tokens_seen": 72144488, + "step": 107030 + }, + { + "epoch": 2.6148828573522587, + "grad_norm": 0.01932377554476261, + "learning_rate": 1.094211210566649e-06, + "loss": 0.0328, + "num_input_tokens_seen": 72147944, + "step": 107035 + }, + { + "epoch": 2.615005008184106, + "grad_norm": 0.043806299567222595, + "learning_rate": 1.0941263119196413e-06, + "loss": 0.0216, + "num_input_tokens_seen": 72151528, + "step": 107040 + }, + { + "epoch": 2.615127159015953, + "grad_norm": 0.2381763905286789, + "learning_rate": 1.094041412588122e-06, + "loss": 0.0005, + "num_input_tokens_seen": 72154984, + "step": 107045 + }, + { + "epoch": 2.6152493098478002, + "grad_norm": 6.37664794921875, + "learning_rate": 1.093956512572708e-06, + "loss": 0.0319, + "num_input_tokens_seen": 72157928, + "step": 107050 + }, + { + "epoch": 2.615371460679647, + "grad_norm": 854.23046875, + "learning_rate": 1.0938716118740167e-06, + "loss": 0.102, + "num_input_tokens_seen": 72161320, + "step": 107055 + }, + { + "epoch": 2.6154936115114946, + "grad_norm": 20.003992080688477, + "learning_rate": 1.0937867104926662e-06, + "loss": 0.0676, + "num_input_tokens_seen": 72164648, + "step": 107060 + }, + { + "epoch": 2.6156157623433414, + "grad_norm": 0.01823362149298191, + "learning_rate": 1.0937018084292731e-06, + "loss": 0.1215, + "num_input_tokens_seen": 72168040, + "step": 107065 + }, + { + "epoch": 2.615737913175189, + "grad_norm": 0.014729354530572891, + "learning_rate": 1.0936169056844556e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72171432, + "step": 107070 + }, + { + "epoch": 2.6158600640070357, + "grad_norm": 0.0513114258646965, + "learning_rate": 1.093532002258831e-06, + "loss": 0.0514, + "num_input_tokens_seen": 72174824, + "step": 107075 + }, + { + "epoch": 2.615982214838883, + "grad_norm": 0.05931246653199196, + "learning_rate": 1.0934470981530162e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72178280, + "step": 107080 + }, + { + "epoch": 2.61610436567073, + "grad_norm": 0.026284227147698402, + "learning_rate": 1.093362193367629e-06, + "loss": 0.0679, + "num_input_tokens_seen": 72181608, + "step": 107085 + }, + { + "epoch": 2.6162265165025773, + "grad_norm": 0.32598185539245605, + "learning_rate": 1.0932772879032868e-06, + "loss": 0.0029, + "num_input_tokens_seen": 72185320, + "step": 107090 + }, + { + "epoch": 2.6163486673344245, + "grad_norm": 0.15179592370986938, + "learning_rate": 1.0931923817606068e-06, + "loss": 0.0907, + "num_input_tokens_seen": 72188520, + "step": 107095 + }, + { + "epoch": 2.6164708181662717, + "grad_norm": 0.4278791844844818, + "learning_rate": 1.0931074749402072e-06, + "loss": 0.0006, + "num_input_tokens_seen": 72191976, + "step": 107100 + }, + { + "epoch": 2.616592968998119, + "grad_norm": 178.62254333496094, + "learning_rate": 1.0930225674427047e-06, + "loss": 0.0831, + "num_input_tokens_seen": 72195176, + "step": 107105 + }, + { + "epoch": 2.616715119829966, + "grad_norm": 0.04899469390511513, + "learning_rate": 1.0929376592687173e-06, + "loss": 0.0003, + "num_input_tokens_seen": 72198312, + "step": 107110 + }, + { + "epoch": 2.6168372706618133, + "grad_norm": 0.10489187389612198, + "learning_rate": 1.092852750418862e-06, + "loss": 0.0003, + "num_input_tokens_seen": 72201640, + "step": 107115 + }, + { + "epoch": 2.6169594214936605, + "grad_norm": 0.059486132115125656, + "learning_rate": 1.092767840893757e-06, + "loss": 0.0884, + "num_input_tokens_seen": 72205032, + "step": 107120 + }, + { + "epoch": 2.6170815723255076, + "grad_norm": 0.14339995384216309, + "learning_rate": 1.0926829306940186e-06, + "loss": 0.0003, + "num_input_tokens_seen": 72208296, + "step": 107125 + }, + { + "epoch": 2.617203723157355, + "grad_norm": 21.472740173339844, + "learning_rate": 1.0925980198202655e-06, + "loss": 0.0576, + "num_input_tokens_seen": 72211624, + "step": 107130 + }, + { + "epoch": 2.617325873989202, + "grad_norm": 22.353364944458008, + "learning_rate": 1.0925131082731146e-06, + "loss": 0.0413, + "num_input_tokens_seen": 72214760, + "step": 107135 + }, + { + "epoch": 2.6174480248210488, + "grad_norm": 0.08398572355508804, + "learning_rate": 1.0924281960531834e-06, + "loss": 0.0006, + "num_input_tokens_seen": 72218088, + "step": 107140 + }, + { + "epoch": 2.6175701756528964, + "grad_norm": 0.05433683097362518, + "learning_rate": 1.0923432831610897e-06, + "loss": 0.125, + "num_input_tokens_seen": 72221864, + "step": 107145 + }, + { + "epoch": 2.617692326484743, + "grad_norm": 0.034245796501636505, + "learning_rate": 1.0922583695974506e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72225384, + "step": 107150 + }, + { + "epoch": 2.617814477316591, + "grad_norm": 0.08957494050264359, + "learning_rate": 1.0921734553628836e-06, + "loss": 0.001, + "num_input_tokens_seen": 72228712, + "step": 107155 + }, + { + "epoch": 2.6179366281484375, + "grad_norm": 0.055757008492946625, + "learning_rate": 1.0920885404580066e-06, + "loss": 0.0004, + "num_input_tokens_seen": 72232168, + "step": 107160 + }, + { + "epoch": 2.6180587789802847, + "grad_norm": 22.70863914489746, + "learning_rate": 1.0920036248834373e-06, + "loss": 0.0667, + "num_input_tokens_seen": 72235112, + "step": 107165 + }, + { + "epoch": 2.618180929812132, + "grad_norm": 0.015680965036153793, + "learning_rate": 1.0919187086397928e-06, + "loss": 0.1277, + "num_input_tokens_seen": 72238824, + "step": 107170 + }, + { + "epoch": 2.618303080643979, + "grad_norm": 33.95769119262695, + "learning_rate": 1.0918337917276906e-06, + "loss": 0.0355, + "num_input_tokens_seen": 72242024, + "step": 107175 + }, + { + "epoch": 2.6184252314758263, + "grad_norm": 0.04578560218214989, + "learning_rate": 1.0917488741477483e-06, + "loss": 0.0037, + "num_input_tokens_seen": 72245032, + "step": 107180 + }, + { + "epoch": 2.6185473823076735, + "grad_norm": 25.26980209350586, + "learning_rate": 1.0916639559005837e-06, + "loss": 0.0374, + "num_input_tokens_seen": 72248360, + "step": 107185 + }, + { + "epoch": 2.6186695331395207, + "grad_norm": 14.420616149902344, + "learning_rate": 1.091579036986814e-06, + "loss": 0.0883, + "num_input_tokens_seen": 72251368, + "step": 107190 + }, + { + "epoch": 2.618791683971368, + "grad_norm": 0.02755843475461006, + "learning_rate": 1.091494117407057e-06, + "loss": 0.165, + "num_input_tokens_seen": 72254440, + "step": 107195 + }, + { + "epoch": 2.618913834803215, + "grad_norm": 0.6943149566650391, + "learning_rate": 1.0914091971619299e-06, + "loss": 0.0005, + "num_input_tokens_seen": 72257256, + "step": 107200 + }, + { + "epoch": 2.6190359856350622, + "grad_norm": 0.029792679473757744, + "learning_rate": 1.091324276252051e-06, + "loss": 0.065, + "num_input_tokens_seen": 72260328, + "step": 107205 + }, + { + "epoch": 2.6191581364669094, + "grad_norm": 0.030234433710575104, + "learning_rate": 1.091239354678037e-06, + "loss": 0.0003, + "num_input_tokens_seen": 72263912, + "step": 107210 + }, + { + "epoch": 2.6192802872987566, + "grad_norm": 0.08542269468307495, + "learning_rate": 1.091154432440506e-06, + "loss": 0.0407, + "num_input_tokens_seen": 72267496, + "step": 107215 + }, + { + "epoch": 2.619402438130604, + "grad_norm": 0.0761958509683609, + "learning_rate": 1.0910695095400753e-06, + "loss": 0.1219, + "num_input_tokens_seen": 72270696, + "step": 107220 + }, + { + "epoch": 2.619524588962451, + "grad_norm": 1.5762901306152344, + "learning_rate": 1.0909845859773628e-06, + "loss": 0.042, + "num_input_tokens_seen": 72273768, + "step": 107225 + }, + { + "epoch": 2.619646739794298, + "grad_norm": 0.00965029839426279, + "learning_rate": 1.0908996617529862e-06, + "loss": 0.059, + "num_input_tokens_seen": 72277096, + "step": 107230 + }, + { + "epoch": 2.619768890626145, + "grad_norm": 0.6805073022842407, + "learning_rate": 1.0908147368675626e-06, + "loss": 0.0013, + "num_input_tokens_seen": 72280488, + "step": 107235 + }, + { + "epoch": 2.6198910414579926, + "grad_norm": 0.017918897792696953, + "learning_rate": 1.09072981132171e-06, + "loss": 0.0383, + "num_input_tokens_seen": 72284136, + "step": 107240 + }, + { + "epoch": 2.6200131922898393, + "grad_norm": 0.12319338321685791, + "learning_rate": 1.0906448851160453e-06, + "loss": 0.0349, + "num_input_tokens_seen": 72287720, + "step": 107245 + }, + { + "epoch": 2.6201353431216865, + "grad_norm": 0.071799635887146, + "learning_rate": 1.0905599582511866e-06, + "loss": 0.0361, + "num_input_tokens_seen": 72291176, + "step": 107250 + }, + { + "epoch": 2.6202574939535337, + "grad_norm": 0.03280222788453102, + "learning_rate": 1.0904750307277519e-06, + "loss": 0.0229, + "num_input_tokens_seen": 72294888, + "step": 107255 + }, + { + "epoch": 2.620379644785381, + "grad_norm": 0.03872086480259895, + "learning_rate": 1.0903901025463581e-06, + "loss": 0.0008, + "num_input_tokens_seen": 72298408, + "step": 107260 + }, + { + "epoch": 2.620501795617228, + "grad_norm": 49.3137092590332, + "learning_rate": 1.0903051737076236e-06, + "loss": 0.0814, + "num_input_tokens_seen": 72302824, + "step": 107265 + }, + { + "epoch": 2.6206239464490753, + "grad_norm": 0.19232575595378876, + "learning_rate": 1.0902202442121654e-06, + "loss": 0.03, + "num_input_tokens_seen": 72306344, + "step": 107270 + }, + { + "epoch": 2.6207460972809224, + "grad_norm": 0.03860234469175339, + "learning_rate": 1.0901353140606013e-06, + "loss": 0.0003, + "num_input_tokens_seen": 72309288, + "step": 107275 + }, + { + "epoch": 2.6208682481127696, + "grad_norm": 0.08553210645914078, + "learning_rate": 1.090050383253549e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72312744, + "step": 107280 + }, + { + "epoch": 2.620990398944617, + "grad_norm": 0.043976765125989914, + "learning_rate": 1.089965451791626e-06, + "loss": 0.1156, + "num_input_tokens_seen": 72315944, + "step": 107285 + }, + { + "epoch": 2.621112549776464, + "grad_norm": 0.017137890681624413, + "learning_rate": 1.0898805196754502e-06, + "loss": 0.0001, + "num_input_tokens_seen": 72319336, + "step": 107290 + }, + { + "epoch": 2.621234700608311, + "grad_norm": 0.22604624927043915, + "learning_rate": 1.089795586905639e-06, + "loss": 0.0004, + "num_input_tokens_seen": 72323240, + "step": 107295 + }, + { + "epoch": 2.6213568514401584, + "grad_norm": 0.04105839133262634, + "learning_rate": 1.08971065348281e-06, + "loss": 0.0003, + "num_input_tokens_seen": 72326312, + "step": 107300 + }, + { + "epoch": 2.6214790022720056, + "grad_norm": 0.1574070006608963, + "learning_rate": 1.0896257194075812e-06, + "loss": 0.0003, + "num_input_tokens_seen": 72329512, + "step": 107305 + }, + { + "epoch": 2.6216011531038528, + "grad_norm": 0.03792516887187958, + "learning_rate": 1.0895407846805698e-06, + "loss": 0.0728, + "num_input_tokens_seen": 72332776, + "step": 107310 + }, + { + "epoch": 2.6217233039357, + "grad_norm": 0.0181158147752285, + "learning_rate": 1.0894558493023937e-06, + "loss": 0.0272, + "num_input_tokens_seen": 72335848, + "step": 107315 + }, + { + "epoch": 2.6218454547675467, + "grad_norm": 108.81785583496094, + "learning_rate": 1.089370913273671e-06, + "loss": 0.1086, + "num_input_tokens_seen": 72339432, + "step": 107320 + }, + { + "epoch": 2.6219676055993943, + "grad_norm": 0.1919240802526474, + "learning_rate": 1.0892859765950187e-06, + "loss": 0.0533, + "num_input_tokens_seen": 72342440, + "step": 107325 + }, + { + "epoch": 2.622089756431241, + "grad_norm": 0.21514792740345, + "learning_rate": 1.089201039267055e-06, + "loss": 0.001, + "num_input_tokens_seen": 72345384, + "step": 107330 + }, + { + "epoch": 2.6222119072630887, + "grad_norm": 0.057491566985845566, + "learning_rate": 1.0891161012903971e-06, + "loss": 0.0707, + "num_input_tokens_seen": 72348264, + "step": 107335 + }, + { + "epoch": 2.6223340580949355, + "grad_norm": 0.008800412528216839, + "learning_rate": 1.0890311626656631e-06, + "loss": 0.0004, + "num_input_tokens_seen": 72351400, + "step": 107340 + }, + { + "epoch": 2.6224562089267827, + "grad_norm": 0.04608434811234474, + "learning_rate": 1.0889462233934704e-06, + "loss": 0.0444, + "num_input_tokens_seen": 72354792, + "step": 107345 + }, + { + "epoch": 2.62257835975863, + "grad_norm": 1.220207691192627, + "learning_rate": 1.088861283474437e-06, + "loss": 0.0006, + "num_input_tokens_seen": 72359016, + "step": 107350 + }, + { + "epoch": 2.622700510590477, + "grad_norm": 0.06609610468149185, + "learning_rate": 1.0887763429091804e-06, + "loss": 0.0004, + "num_input_tokens_seen": 72361960, + "step": 107355 + }, + { + "epoch": 2.6228226614223242, + "grad_norm": 1.9949488639831543, + "learning_rate": 1.0886914016983182e-06, + "loss": 0.0087, + "num_input_tokens_seen": 72365864, + "step": 107360 + }, + { + "epoch": 2.6229448122541714, + "grad_norm": 0.1548883318901062, + "learning_rate": 1.0886064598424684e-06, + "loss": 0.0458, + "num_input_tokens_seen": 72369192, + "step": 107365 + }, + { + "epoch": 2.6230669630860186, + "grad_norm": 33.023460388183594, + "learning_rate": 1.0885215173422486e-06, + "loss": 0.038, + "num_input_tokens_seen": 72372264, + "step": 107370 + }, + { + "epoch": 2.623189113917866, + "grad_norm": 1.717215895652771, + "learning_rate": 1.0884365741982764e-06, + "loss": 0.0536, + "num_input_tokens_seen": 72375528, + "step": 107375 + }, + { + "epoch": 2.623311264749713, + "grad_norm": 0.05810878798365593, + "learning_rate": 1.0883516304111698e-06, + "loss": 0.0377, + "num_input_tokens_seen": 72378792, + "step": 107380 + }, + { + "epoch": 2.62343341558156, + "grad_norm": 0.034050095826387405, + "learning_rate": 1.0882666859815466e-06, + "loss": 0.0281, + "num_input_tokens_seen": 72382312, + "step": 107385 + }, + { + "epoch": 2.6235555664134074, + "grad_norm": 4.691112041473389, + "learning_rate": 1.088181740910024e-06, + "loss": 0.0007, + "num_input_tokens_seen": 72385576, + "step": 107390 + }, + { + "epoch": 2.6236777172452546, + "grad_norm": 0.07725610584020615, + "learning_rate": 1.0880967951972201e-06, + "loss": 0.0006, + "num_input_tokens_seen": 72388968, + "step": 107395 + }, + { + "epoch": 2.6237998680771017, + "grad_norm": 0.024160612374544144, + "learning_rate": 1.0880118488437526e-06, + "loss": 0.0001, + "num_input_tokens_seen": 72392232, + "step": 107400 + }, + { + "epoch": 2.6239220189089485, + "grad_norm": 0.024952422827482224, + "learning_rate": 1.087926901850239e-06, + "loss": 0.0431, + "num_input_tokens_seen": 72395304, + "step": 107405 + }, + { + "epoch": 2.624044169740796, + "grad_norm": 0.22318053245544434, + "learning_rate": 1.0878419542172975e-06, + "loss": 0.0003, + "num_input_tokens_seen": 72398824, + "step": 107410 + }, + { + "epoch": 2.624166320572643, + "grad_norm": 0.014739875681698322, + "learning_rate": 1.087757005945546e-06, + "loss": 0.0326, + "num_input_tokens_seen": 72401768, + "step": 107415 + }, + { + "epoch": 2.6242884714044905, + "grad_norm": 0.028513239696621895, + "learning_rate": 1.0876720570356018e-06, + "loss": 0.0902, + "num_input_tokens_seen": 72405096, + "step": 107420 + }, + { + "epoch": 2.6244106222363373, + "grad_norm": 0.0022885762155056, + "learning_rate": 1.0875871074880827e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72408168, + "step": 107425 + }, + { + "epoch": 2.6245327730681844, + "grad_norm": 0.11882738023996353, + "learning_rate": 1.0875021573036067e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72411368, + "step": 107430 + }, + { + "epoch": 2.6246549239000316, + "grad_norm": 0.01739366352558136, + "learning_rate": 1.0874172064827913e-06, + "loss": 0.0367, + "num_input_tokens_seen": 72415080, + "step": 107435 + }, + { + "epoch": 2.624777074731879, + "grad_norm": 0.004528645426034927, + "learning_rate": 1.0873322550262548e-06, + "loss": 0.104, + "num_input_tokens_seen": 72418408, + "step": 107440 + }, + { + "epoch": 2.624899225563726, + "grad_norm": 0.21300160884857178, + "learning_rate": 1.0872473029346143e-06, + "loss": 0.0003, + "num_input_tokens_seen": 72422184, + "step": 107445 + }, + { + "epoch": 2.625021376395573, + "grad_norm": 0.03288082405924797, + "learning_rate": 1.0871623502084882e-06, + "loss": 0.0004, + "num_input_tokens_seen": 72425768, + "step": 107450 + }, + { + "epoch": 2.6251435272274204, + "grad_norm": 0.010147850960493088, + "learning_rate": 1.087077396848494e-06, + "loss": 0.0001, + "num_input_tokens_seen": 72429160, + "step": 107455 + }, + { + "epoch": 2.6252656780592676, + "grad_norm": 34.80997085571289, + "learning_rate": 1.0869924428552492e-06, + "loss": 0.1302, + "num_input_tokens_seen": 72432424, + "step": 107460 + }, + { + "epoch": 2.6253878288911148, + "grad_norm": 0.039994291961193085, + "learning_rate": 1.0869074882293723e-06, + "loss": 0.0571, + "num_input_tokens_seen": 72435432, + "step": 107465 + }, + { + "epoch": 2.625509979722962, + "grad_norm": 0.016550660133361816, + "learning_rate": 1.0868225329714806e-06, + "loss": 0.0333, + "num_input_tokens_seen": 72438824, + "step": 107470 + }, + { + "epoch": 2.625632130554809, + "grad_norm": 0.02417152374982834, + "learning_rate": 1.0867375770821922e-06, + "loss": 0.0397, + "num_input_tokens_seen": 72442472, + "step": 107475 + }, + { + "epoch": 2.6257542813866563, + "grad_norm": 0.0034279574174433947, + "learning_rate": 1.086652620562125e-06, + "loss": 0.0001, + "num_input_tokens_seen": 72445864, + "step": 107480 + }, + { + "epoch": 2.6258764322185035, + "grad_norm": 0.010594514198601246, + "learning_rate": 1.0865676634118963e-06, + "loss": 0.0005, + "num_input_tokens_seen": 72449256, + "step": 107485 + }, + { + "epoch": 2.6259985830503507, + "grad_norm": 76.29888916015625, + "learning_rate": 1.0864827056321243e-06, + "loss": 0.068, + "num_input_tokens_seen": 72452328, + "step": 107490 + }, + { + "epoch": 2.626120733882198, + "grad_norm": 0.001311702886596322, + "learning_rate": 1.0863977472234275e-06, + "loss": 0.0141, + "num_input_tokens_seen": 72455720, + "step": 107495 + }, + { + "epoch": 2.6262428847140447, + "grad_norm": 0.26813095808029175, + "learning_rate": 1.086312788186422e-06, + "loss": 0.0773, + "num_input_tokens_seen": 72458856, + "step": 107500 + }, + { + "epoch": 2.6263650355458923, + "grad_norm": 0.1785283088684082, + "learning_rate": 1.0862278285217272e-06, + "loss": 0.0408, + "num_input_tokens_seen": 72462184, + "step": 107505 + }, + { + "epoch": 2.626487186377739, + "grad_norm": 0.20264551043510437, + "learning_rate": 1.0861428682299605e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72465576, + "step": 107510 + }, + { + "epoch": 2.6266093372095867, + "grad_norm": 0.018418628722429276, + "learning_rate": 1.0860579073117394e-06, + "loss": 0.0005, + "num_input_tokens_seen": 72469096, + "step": 107515 + }, + { + "epoch": 2.6267314880414334, + "grad_norm": 0.007999802008271217, + "learning_rate": 1.0859729457676823e-06, + "loss": 0.0541, + "num_input_tokens_seen": 72473064, + "step": 107520 + }, + { + "epoch": 2.6268536388732806, + "grad_norm": 0.008016117848455906, + "learning_rate": 1.0858879835984067e-06, + "loss": 0.0007, + "num_input_tokens_seen": 72476392, + "step": 107525 + }, + { + "epoch": 2.626975789705128, + "grad_norm": 0.2861591875553131, + "learning_rate": 1.0858030208045305e-06, + "loss": 0.0004, + "num_input_tokens_seen": 72479784, + "step": 107530 + }, + { + "epoch": 2.627097940536975, + "grad_norm": 0.6544962525367737, + "learning_rate": 1.0857180573866718e-06, + "loss": 0.0005, + "num_input_tokens_seen": 72482984, + "step": 107535 + }, + { + "epoch": 2.627220091368822, + "grad_norm": 0.2255128175020218, + "learning_rate": 1.0856330933454485e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72486056, + "step": 107540 + }, + { + "epoch": 2.6273422422006694, + "grad_norm": 0.01406821422278881, + "learning_rate": 1.0855481286814781e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72489320, + "step": 107545 + }, + { + "epoch": 2.6274643930325166, + "grad_norm": 0.007307102438062429, + "learning_rate": 1.0854631633953788e-06, + "loss": 0.0526, + "num_input_tokens_seen": 72492392, + "step": 107550 + }, + { + "epoch": 2.6275865438643637, + "grad_norm": 0.0460895299911499, + "learning_rate": 1.0853781974877682e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72495720, + "step": 107555 + }, + { + "epoch": 2.627708694696211, + "grad_norm": 0.012345832772552967, + "learning_rate": 1.0852932309592644e-06, + "loss": 0.0, + "num_input_tokens_seen": 72498792, + "step": 107560 + }, + { + "epoch": 2.627830845528058, + "grad_norm": 0.3947714865207672, + "learning_rate": 1.0852082638104854e-06, + "loss": 0.0453, + "num_input_tokens_seen": 72501928, + "step": 107565 + }, + { + "epoch": 2.6279529963599053, + "grad_norm": 23.10430908203125, + "learning_rate": 1.0851232960420488e-06, + "loss": 0.0501, + "num_input_tokens_seen": 72505384, + "step": 107570 + }, + { + "epoch": 2.6280751471917525, + "grad_norm": 13.216294288635254, + "learning_rate": 1.085038327654573e-06, + "loss": 0.0009, + "num_input_tokens_seen": 72508328, + "step": 107575 + }, + { + "epoch": 2.6281972980235997, + "grad_norm": 0.05808434635400772, + "learning_rate": 1.0849533586486754e-06, + "loss": 0.0004, + "num_input_tokens_seen": 72511528, + "step": 107580 + }, + { + "epoch": 2.6283194488554464, + "grad_norm": 0.1069037988781929, + "learning_rate": 1.0848683890249743e-06, + "loss": 0.0138, + "num_input_tokens_seen": 72514664, + "step": 107585 + }, + { + "epoch": 2.628441599687294, + "grad_norm": 0.009551643393933773, + "learning_rate": 1.0847834187840873e-06, + "loss": 0.0008, + "num_input_tokens_seen": 72518312, + "step": 107590 + }, + { + "epoch": 2.628563750519141, + "grad_norm": 0.036050669848918915, + "learning_rate": 1.0846984479266326e-06, + "loss": 0.0001, + "num_input_tokens_seen": 72521640, + "step": 107595 + }, + { + "epoch": 2.6286859013509885, + "grad_norm": 14.879709243774414, + "learning_rate": 1.084613476453228e-06, + "loss": 0.0467, + "num_input_tokens_seen": 72525096, + "step": 107600 + }, + { + "epoch": 2.628808052182835, + "grad_norm": 0.016211412847042084, + "learning_rate": 1.0845285043644914e-06, + "loss": 0.0555, + "num_input_tokens_seen": 72529512, + "step": 107605 + }, + { + "epoch": 2.6289302030146824, + "grad_norm": 0.017843419685959816, + "learning_rate": 1.0844435316610408e-06, + "loss": 0.0999, + "num_input_tokens_seen": 72533224, + "step": 107610 + }, + { + "epoch": 2.6290523538465296, + "grad_norm": 0.689307451248169, + "learning_rate": 1.084358558343494e-06, + "loss": 0.0004, + "num_input_tokens_seen": 72536552, + "step": 107615 + }, + { + "epoch": 2.6291745046783768, + "grad_norm": 0.06051316112279892, + "learning_rate": 1.084273584412469e-06, + "loss": 0.0028, + "num_input_tokens_seen": 72540264, + "step": 107620 + }, + { + "epoch": 2.629296655510224, + "grad_norm": 134.1039581298828, + "learning_rate": 1.084188609868584e-06, + "loss": 0.0367, + "num_input_tokens_seen": 72543464, + "step": 107625 + }, + { + "epoch": 2.629418806342071, + "grad_norm": 0.1083790734410286, + "learning_rate": 1.0841036347124565e-06, + "loss": 0.0459, + "num_input_tokens_seen": 72546408, + "step": 107630 + }, + { + "epoch": 2.6295409571739183, + "grad_norm": 0.1749052256345749, + "learning_rate": 1.0840186589447052e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72550568, + "step": 107635 + }, + { + "epoch": 2.6296631080057655, + "grad_norm": 0.0020598629489541054, + "learning_rate": 1.0839336825659473e-06, + "loss": 0.0001, + "num_input_tokens_seen": 72553896, + "step": 107640 + }, + { + "epoch": 2.6297852588376127, + "grad_norm": 0.5078147053718567, + "learning_rate": 1.0838487055768013e-06, + "loss": 0.0003, + "num_input_tokens_seen": 72557480, + "step": 107645 + }, + { + "epoch": 2.62990740966946, + "grad_norm": 17.110179901123047, + "learning_rate": 1.083763727977885e-06, + "loss": 0.1545, + "num_input_tokens_seen": 72560808, + "step": 107650 + }, + { + "epoch": 2.630029560501307, + "grad_norm": 38.41392517089844, + "learning_rate": 1.0836787497698161e-06, + "loss": 0.1782, + "num_input_tokens_seen": 72564328, + "step": 107655 + }, + { + "epoch": 2.6301517113331543, + "grad_norm": 0.02592952363193035, + "learning_rate": 1.0835937709532131e-06, + "loss": 0.0003, + "num_input_tokens_seen": 72567272, + "step": 107660 + }, + { + "epoch": 2.6302738621650015, + "grad_norm": 0.01741073466837406, + "learning_rate": 1.0835087915286933e-06, + "loss": 0.0573, + "num_input_tokens_seen": 72570472, + "step": 107665 + }, + { + "epoch": 2.6303960129968487, + "grad_norm": 0.011544224806129932, + "learning_rate": 1.0834238114968754e-06, + "loss": 0.0001, + "num_input_tokens_seen": 72573672, + "step": 107670 + }, + { + "epoch": 2.630518163828696, + "grad_norm": 0.14928054809570312, + "learning_rate": 1.0833388308583772e-06, + "loss": 0.1061, + "num_input_tokens_seen": 72576616, + "step": 107675 + }, + { + "epoch": 2.6306403146605426, + "grad_norm": 10.083694458007812, + "learning_rate": 1.0832538496138165e-06, + "loss": 0.0239, + "num_input_tokens_seen": 72580392, + "step": 107680 + }, + { + "epoch": 2.6307624654923902, + "grad_norm": 0.007448187563568354, + "learning_rate": 1.0831688677638112e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72583464, + "step": 107685 + }, + { + "epoch": 2.630884616324237, + "grad_norm": 0.06917484849691391, + "learning_rate": 1.0830838853089796e-06, + "loss": 0.0003, + "num_input_tokens_seen": 72586984, + "step": 107690 + }, + { + "epoch": 2.631006767156084, + "grad_norm": 0.04246160760521889, + "learning_rate": 1.08299890224994e-06, + "loss": 0.0636, + "num_input_tokens_seen": 72589672, + "step": 107695 + }, + { + "epoch": 2.6311289179879314, + "grad_norm": 0.4987105131149292, + "learning_rate": 1.0829139185873097e-06, + "loss": 0.0014, + "num_input_tokens_seen": 72593320, + "step": 107700 + }, + { + "epoch": 2.6312510688197785, + "grad_norm": 1.0627728700637817, + "learning_rate": 1.082828934321707e-06, + "loss": 0.0544, + "num_input_tokens_seen": 72596904, + "step": 107705 + }, + { + "epoch": 2.6313732196516257, + "grad_norm": 0.0632128193974495, + "learning_rate": 1.0827439494537506e-06, + "loss": 0.107, + "num_input_tokens_seen": 72600360, + "step": 107710 + }, + { + "epoch": 2.631495370483473, + "grad_norm": 0.21518582105636597, + "learning_rate": 1.0826589639840572e-06, + "loss": 0.0938, + "num_input_tokens_seen": 72603816, + "step": 107715 + }, + { + "epoch": 2.63161752131532, + "grad_norm": 0.013827536255121231, + "learning_rate": 1.082573977913246e-06, + "loss": 0.0401, + "num_input_tokens_seen": 72607400, + "step": 107720 + }, + { + "epoch": 2.6317396721471673, + "grad_norm": 0.029351606965065002, + "learning_rate": 1.0824889912419344e-06, + "loss": 0.0001, + "num_input_tokens_seen": 72610600, + "step": 107725 + }, + { + "epoch": 2.6318618229790145, + "grad_norm": 0.01663423702120781, + "learning_rate": 1.0824040039707409e-06, + "loss": 0.007, + "num_input_tokens_seen": 72614120, + "step": 107730 + }, + { + "epoch": 2.6319839738108617, + "grad_norm": 0.01716860942542553, + "learning_rate": 1.0823190161002834e-06, + "loss": 0.1447, + "num_input_tokens_seen": 72617192, + "step": 107735 + }, + { + "epoch": 2.632106124642709, + "grad_norm": 0.025707658380270004, + "learning_rate": 1.0822340276311794e-06, + "loss": 0.0502, + "num_input_tokens_seen": 72620968, + "step": 107740 + }, + { + "epoch": 2.632228275474556, + "grad_norm": 0.03293826803565025, + "learning_rate": 1.082149038564048e-06, + "loss": 0.0001, + "num_input_tokens_seen": 72624488, + "step": 107745 + }, + { + "epoch": 2.6323504263064033, + "grad_norm": 0.02947530522942543, + "learning_rate": 1.0820640488995062e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72627816, + "step": 107750 + }, + { + "epoch": 2.6324725771382504, + "grad_norm": 0.006064974702894688, + "learning_rate": 1.0819790586381729e-06, + "loss": 0.0009, + "num_input_tokens_seen": 72631080, + "step": 107755 + }, + { + "epoch": 2.6325947279700976, + "grad_norm": 0.02745603770017624, + "learning_rate": 1.0818940677806657e-06, + "loss": 0.0577, + "num_input_tokens_seen": 72634664, + "step": 107760 + }, + { + "epoch": 2.6327168788019444, + "grad_norm": 0.06063401699066162, + "learning_rate": 1.081809076327603e-06, + "loss": 0.0003, + "num_input_tokens_seen": 72638376, + "step": 107765 + }, + { + "epoch": 2.632839029633792, + "grad_norm": 0.10956387221813202, + "learning_rate": 1.0817240842796025e-06, + "loss": 0.0004, + "num_input_tokens_seen": 72641832, + "step": 107770 + }, + { + "epoch": 2.6329611804656388, + "grad_norm": 0.45911097526550293, + "learning_rate": 1.0816390916372824e-06, + "loss": 0.0004, + "num_input_tokens_seen": 72645096, + "step": 107775 + }, + { + "epoch": 2.6330833312974864, + "grad_norm": 0.06135956570506096, + "learning_rate": 1.0815540984012608e-06, + "loss": 0.0003, + "num_input_tokens_seen": 72650280, + "step": 107780 + }, + { + "epoch": 2.633205482129333, + "grad_norm": 0.0062957340851426125, + "learning_rate": 1.081469104572156e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72653352, + "step": 107785 + }, + { + "epoch": 2.6333276329611803, + "grad_norm": 18.732004165649414, + "learning_rate": 1.081384110150586e-06, + "loss": 0.0716, + "num_input_tokens_seen": 72656488, + "step": 107790 + }, + { + "epoch": 2.6334497837930275, + "grad_norm": 0.02306182123720646, + "learning_rate": 1.0812991151371691e-06, + "loss": 0.0003, + "num_input_tokens_seen": 72659752, + "step": 107795 + }, + { + "epoch": 2.6335719346248747, + "grad_norm": 0.43138134479522705, + "learning_rate": 1.0812141195325228e-06, + "loss": 0.1924, + "num_input_tokens_seen": 72663208, + "step": 107800 + }, + { + "epoch": 2.633694085456722, + "grad_norm": 0.010642552748322487, + "learning_rate": 1.0811291233372659e-06, + "loss": 0.0001, + "num_input_tokens_seen": 72666472, + "step": 107805 + }, + { + "epoch": 2.633816236288569, + "grad_norm": 12.951112747192383, + "learning_rate": 1.081044126552016e-06, + "loss": 0.0924, + "num_input_tokens_seen": 72669800, + "step": 107810 + }, + { + "epoch": 2.6339383871204163, + "grad_norm": 18.641759872436523, + "learning_rate": 1.0809591291773913e-06, + "loss": 0.0457, + "num_input_tokens_seen": 72673000, + "step": 107815 + }, + { + "epoch": 2.6340605379522635, + "grad_norm": 0.13156826794147491, + "learning_rate": 1.0808741312140103e-06, + "loss": 0.0397, + "num_input_tokens_seen": 72676584, + "step": 107820 + }, + { + "epoch": 2.6341826887841107, + "grad_norm": 0.010222331620752811, + "learning_rate": 1.0807891326624906e-06, + "loss": 0.0004, + "num_input_tokens_seen": 72680936, + "step": 107825 + }, + { + "epoch": 2.634304839615958, + "grad_norm": 0.03131281957030296, + "learning_rate": 1.0807041335234508e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72684264, + "step": 107830 + }, + { + "epoch": 2.634426990447805, + "grad_norm": 0.2757070064544678, + "learning_rate": 1.0806191337975085e-06, + "loss": 0.0007, + "num_input_tokens_seen": 72687720, + "step": 107835 + }, + { + "epoch": 2.6345491412796522, + "grad_norm": 0.707978367805481, + "learning_rate": 1.0805341334852824e-06, + "loss": 0.0407, + "num_input_tokens_seen": 72691240, + "step": 107840 + }, + { + "epoch": 2.6346712921114994, + "grad_norm": 0.02468128129839897, + "learning_rate": 1.0804491325873904e-06, + "loss": 0.1027, + "num_input_tokens_seen": 72694440, + "step": 107845 + }, + { + "epoch": 2.6347934429433466, + "grad_norm": 0.03739924356341362, + "learning_rate": 1.0803641311044507e-06, + "loss": 0.0001, + "num_input_tokens_seen": 72698088, + "step": 107850 + }, + { + "epoch": 2.634915593775194, + "grad_norm": 0.29329103231430054, + "learning_rate": 1.0802791290370819e-06, + "loss": 0.1029, + "num_input_tokens_seen": 72701864, + "step": 107855 + }, + { + "epoch": 2.6350377446070405, + "grad_norm": 0.01383211649954319, + "learning_rate": 1.0801941263859011e-06, + "loss": 0.0001, + "num_input_tokens_seen": 72705256, + "step": 107860 + }, + { + "epoch": 2.635159895438888, + "grad_norm": 0.04233228415250778, + "learning_rate": 1.0801091231515272e-06, + "loss": 0.0008, + "num_input_tokens_seen": 72708328, + "step": 107865 + }, + { + "epoch": 2.635282046270735, + "grad_norm": 19.92087173461914, + "learning_rate": 1.0800241193345778e-06, + "loss": 0.0444, + "num_input_tokens_seen": 72712040, + "step": 107870 + }, + { + "epoch": 2.635404197102582, + "grad_norm": 0.02420320361852646, + "learning_rate": 1.079939114935672e-06, + "loss": 0.0004, + "num_input_tokens_seen": 72715944, + "step": 107875 + }, + { + "epoch": 2.6355263479344293, + "grad_norm": 0.03484322503209114, + "learning_rate": 1.0798541099554272e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72719080, + "step": 107880 + }, + { + "epoch": 2.6356484987662765, + "grad_norm": 0.13985350728034973, + "learning_rate": 1.0797691043944622e-06, + "loss": 0.0332, + "num_input_tokens_seen": 72722088, + "step": 107885 + }, + { + "epoch": 2.6357706495981237, + "grad_norm": 0.1291801631450653, + "learning_rate": 1.0796840982533943e-06, + "loss": 0.0018, + "num_input_tokens_seen": 72725352, + "step": 107890 + }, + { + "epoch": 2.635892800429971, + "grad_norm": 0.033996183425188065, + "learning_rate": 1.0795990915328426e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72728936, + "step": 107895 + }, + { + "epoch": 2.636014951261818, + "grad_norm": 0.05727604776620865, + "learning_rate": 1.0795140842334248e-06, + "loss": 0.0652, + "num_input_tokens_seen": 72732392, + "step": 107900 + }, + { + "epoch": 2.6361371020936653, + "grad_norm": 0.021231159567832947, + "learning_rate": 1.0794290763557591e-06, + "loss": 0.0001, + "num_input_tokens_seen": 72735656, + "step": 107905 + }, + { + "epoch": 2.6362592529255124, + "grad_norm": 18.85205078125, + "learning_rate": 1.0793440679004638e-06, + "loss": 0.0377, + "num_input_tokens_seen": 72738856, + "step": 107910 + }, + { + "epoch": 2.6363814037573596, + "grad_norm": 0.13238240778446198, + "learning_rate": 1.0792590588681572e-06, + "loss": 0.0366, + "num_input_tokens_seen": 72741992, + "step": 107915 + }, + { + "epoch": 2.636503554589207, + "grad_norm": 48.16722106933594, + "learning_rate": 1.0791740492594574e-06, + "loss": 0.1173, + "num_input_tokens_seen": 72745320, + "step": 107920 + }, + { + "epoch": 2.636625705421054, + "grad_norm": 0.30411502718925476, + "learning_rate": 1.0790890390749824e-06, + "loss": 0.0003, + "num_input_tokens_seen": 72748840, + "step": 107925 + }, + { + "epoch": 2.636747856252901, + "grad_norm": 0.07888974249362946, + "learning_rate": 1.079004028315351e-06, + "loss": 0.0001, + "num_input_tokens_seen": 72752040, + "step": 107930 + }, + { + "epoch": 2.6368700070847484, + "grad_norm": 0.02969658188521862, + "learning_rate": 1.0789190169811806e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72755624, + "step": 107935 + }, + { + "epoch": 2.6369921579165956, + "grad_norm": 0.010916121304035187, + "learning_rate": 1.0788340050730902e-06, + "loss": 0.0001, + "num_input_tokens_seen": 72758952, + "step": 107940 + }, + { + "epoch": 2.6371143087484423, + "grad_norm": 0.03490295261144638, + "learning_rate": 1.0787489925916976e-06, + "loss": 0.0457, + "num_input_tokens_seen": 72762280, + "step": 107945 + }, + { + "epoch": 2.63723645958029, + "grad_norm": 0.13596896827220917, + "learning_rate": 1.0786639795376214e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72765544, + "step": 107950 + }, + { + "epoch": 2.6373586104121367, + "grad_norm": 0.014326742850244045, + "learning_rate": 1.0785789659114792e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72768808, + "step": 107955 + }, + { + "epoch": 2.6374807612439843, + "grad_norm": 0.009243039414286613, + "learning_rate": 1.07849395171389e-06, + "loss": 0.0001, + "num_input_tokens_seen": 72772328, + "step": 107960 + }, + { + "epoch": 2.637602912075831, + "grad_norm": 0.3703426420688629, + "learning_rate": 1.0784089369454714e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72775208, + "step": 107965 + }, + { + "epoch": 2.6377250629076783, + "grad_norm": 0.005901218391954899, + "learning_rate": 1.0783239216068421e-06, + "loss": 0.0001, + "num_input_tokens_seen": 72778408, + "step": 107970 + }, + { + "epoch": 2.6378472137395255, + "grad_norm": 345.4832763671875, + "learning_rate": 1.07823890569862e-06, + "loss": 0.021, + "num_input_tokens_seen": 72781864, + "step": 107975 + }, + { + "epoch": 2.6379693645713727, + "grad_norm": 0.02927909605205059, + "learning_rate": 1.0781538892214235e-06, + "loss": 0.0156, + "num_input_tokens_seen": 72785448, + "step": 107980 + }, + { + "epoch": 2.63809151540322, + "grad_norm": 0.013741977512836456, + "learning_rate": 1.078068872175871e-06, + "loss": 0.1625, + "num_input_tokens_seen": 72788584, + "step": 107985 + }, + { + "epoch": 2.638213666235067, + "grad_norm": 0.006222781725227833, + "learning_rate": 1.0779838545625808e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72791528, + "step": 107990 + }, + { + "epoch": 2.6383358170669142, + "grad_norm": 0.019642196595668793, + "learning_rate": 1.0778988363821706e-06, + "loss": 0.0865, + "num_input_tokens_seen": 72795688, + "step": 107995 + }, + { + "epoch": 2.6384579678987614, + "grad_norm": 0.010990871116518974, + "learning_rate": 1.0778138176352596e-06, + "loss": 0.0782, + "num_input_tokens_seen": 72798824, + "step": 108000 + }, + { + "epoch": 2.6385801187306086, + "grad_norm": 0.030032608658075333, + "learning_rate": 1.0777287983224652e-06, + "loss": 0.0001, + "num_input_tokens_seen": 72801960, + "step": 108005 + }, + { + "epoch": 2.638702269562456, + "grad_norm": 0.013616411946713924, + "learning_rate": 1.0776437784444065e-06, + "loss": 0.0001, + "num_input_tokens_seen": 72804968, + "step": 108010 + }, + { + "epoch": 2.638824420394303, + "grad_norm": 69.19341278076172, + "learning_rate": 1.0775587580017012e-06, + "loss": 0.1758, + "num_input_tokens_seen": 72808552, + "step": 108015 + }, + { + "epoch": 2.63894657122615, + "grad_norm": 1.5476653575897217, + "learning_rate": 1.0774737369949678e-06, + "loss": 0.0349, + "num_input_tokens_seen": 72811624, + "step": 108020 + }, + { + "epoch": 2.6390687220579974, + "grad_norm": 0.01377920899540186, + "learning_rate": 1.0773887154248242e-06, + "loss": 0.0004, + "num_input_tokens_seen": 72815720, + "step": 108025 + }, + { + "epoch": 2.639190872889844, + "grad_norm": 23.38638687133789, + "learning_rate": 1.0773036932918892e-06, + "loss": 0.03, + "num_input_tokens_seen": 72819112, + "step": 108030 + }, + { + "epoch": 2.6393130237216917, + "grad_norm": 0.05645943433046341, + "learning_rate": 1.077218670596781e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72823016, + "step": 108035 + }, + { + "epoch": 2.6394351745535385, + "grad_norm": 0.002317589009180665, + "learning_rate": 1.077133647340118e-06, + "loss": 0.0001, + "num_input_tokens_seen": 72826472, + "step": 108040 + }, + { + "epoch": 2.639557325385386, + "grad_norm": 29.722360610961914, + "learning_rate": 1.0770486235225182e-06, + "loss": 0.1396, + "num_input_tokens_seen": 72829864, + "step": 108045 + }, + { + "epoch": 2.639679476217233, + "grad_norm": 0.17399294674396515, + "learning_rate": 1.0769635991446002e-06, + "loss": 0.0001, + "num_input_tokens_seen": 72833192, + "step": 108050 + }, + { + "epoch": 2.63980162704908, + "grad_norm": 0.10186666995286942, + "learning_rate": 1.076878574206982e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72836392, + "step": 108055 + }, + { + "epoch": 2.6399237778809272, + "grad_norm": 27.92131996154785, + "learning_rate": 1.0767935487102823e-06, + "loss": 0.0351, + "num_input_tokens_seen": 72840168, + "step": 108060 + }, + { + "epoch": 2.6400459287127744, + "grad_norm": 0.010424827225506306, + "learning_rate": 1.0767085226551194e-06, + "loss": 0.0398, + "num_input_tokens_seen": 72843496, + "step": 108065 + }, + { + "epoch": 2.6401680795446216, + "grad_norm": 0.05996089056134224, + "learning_rate": 1.0766234960421112e-06, + "loss": 0.0001, + "num_input_tokens_seen": 72847144, + "step": 108070 + }, + { + "epoch": 2.640290230376469, + "grad_norm": 0.2539304196834564, + "learning_rate": 1.0765384688718766e-06, + "loss": 0.0001, + "num_input_tokens_seen": 72850280, + "step": 108075 + }, + { + "epoch": 2.640412381208316, + "grad_norm": 0.01256766077131033, + "learning_rate": 1.0764534411450334e-06, + "loss": 0.0539, + "num_input_tokens_seen": 72853736, + "step": 108080 + }, + { + "epoch": 2.640534532040163, + "grad_norm": 0.045256491750478745, + "learning_rate": 1.0763684128622003e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72857192, + "step": 108085 + }, + { + "epoch": 2.6406566828720104, + "grad_norm": 0.016109425574541092, + "learning_rate": 1.0762833840239956e-06, + "loss": 0.0377, + "num_input_tokens_seen": 72861288, + "step": 108090 + }, + { + "epoch": 2.6407788337038576, + "grad_norm": 0.0010781821329146624, + "learning_rate": 1.0761983546310376e-06, + "loss": 0.0013, + "num_input_tokens_seen": 72865000, + "step": 108095 + }, + { + "epoch": 2.6409009845357048, + "grad_norm": 0.1415102183818817, + "learning_rate": 1.0761133246839446e-06, + "loss": 0.0492, + "num_input_tokens_seen": 72868008, + "step": 108100 + }, + { + "epoch": 2.641023135367552, + "grad_norm": 0.23793765902519226, + "learning_rate": 1.076028294183335e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72871016, + "step": 108105 + }, + { + "epoch": 2.641145286199399, + "grad_norm": 0.011273748241364956, + "learning_rate": 1.0759432631298276e-06, + "loss": 0.1439, + "num_input_tokens_seen": 72874344, + "step": 108110 + }, + { + "epoch": 2.6412674370312463, + "grad_norm": 0.3362903594970703, + "learning_rate": 1.0758582315240402e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72877672, + "step": 108115 + }, + { + "epoch": 2.6413895878630935, + "grad_norm": 0.012885728850960732, + "learning_rate": 1.075773199366591e-06, + "loss": 0.0001, + "num_input_tokens_seen": 72880744, + "step": 108120 + }, + { + "epoch": 2.6415117386949403, + "grad_norm": 0.03369002044200897, + "learning_rate": 1.075688166658099e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72884072, + "step": 108125 + }, + { + "epoch": 2.641633889526788, + "grad_norm": 0.013920611701905727, + "learning_rate": 1.0756031333991822e-06, + "loss": 0.0001, + "num_input_tokens_seen": 72887656, + "step": 108130 + }, + { + "epoch": 2.6417560403586346, + "grad_norm": 0.0669659972190857, + "learning_rate": 1.075518099590459e-06, + "loss": 0.001, + "num_input_tokens_seen": 72891176, + "step": 108135 + }, + { + "epoch": 2.6418781911904823, + "grad_norm": 0.15272586047649384, + "learning_rate": 1.0754330652325481e-06, + "loss": 0.0001, + "num_input_tokens_seen": 72894696, + "step": 108140 + }, + { + "epoch": 2.642000342022329, + "grad_norm": 0.005168919917196035, + "learning_rate": 1.0753480303260675e-06, + "loss": 0.0003, + "num_input_tokens_seen": 72897832, + "step": 108145 + }, + { + "epoch": 2.642122492854176, + "grad_norm": 0.7232739329338074, + "learning_rate": 1.0752629948716356e-06, + "loss": 0.1065, + "num_input_tokens_seen": 72901224, + "step": 108150 + }, + { + "epoch": 2.6422446436860234, + "grad_norm": 0.046349361538887024, + "learning_rate": 1.075177958869871e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72904872, + "step": 108155 + }, + { + "epoch": 2.6423667945178706, + "grad_norm": 0.421809583902359, + "learning_rate": 1.0750929223213923e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72908328, + "step": 108160 + }, + { + "epoch": 2.642488945349718, + "grad_norm": 2.9098937375238165e-05, + "learning_rate": 1.0750078852268178e-06, + "loss": 0.0373, + "num_input_tokens_seen": 72911592, + "step": 108165 + }, + { + "epoch": 2.642611096181565, + "grad_norm": 0.01656261831521988, + "learning_rate": 1.0749228475867656e-06, + "loss": 0.0021, + "num_input_tokens_seen": 72914984, + "step": 108170 + }, + { + "epoch": 2.642733247013412, + "grad_norm": 48.75508499145508, + "learning_rate": 1.0748378094018541e-06, + "loss": 0.0402, + "num_input_tokens_seen": 72918120, + "step": 108175 + }, + { + "epoch": 2.6428553978452594, + "grad_norm": 0.157123863697052, + "learning_rate": 1.0747527706727022e-06, + "loss": 0.0542, + "num_input_tokens_seen": 72921896, + "step": 108180 + }, + { + "epoch": 2.6429775486771065, + "grad_norm": 0.12687893211841583, + "learning_rate": 1.0746677313999277e-06, + "loss": 0.0001, + "num_input_tokens_seen": 72926248, + "step": 108185 + }, + { + "epoch": 2.6430996995089537, + "grad_norm": 0.06420924514532089, + "learning_rate": 1.0745826915841495e-06, + "loss": 0.0366, + "num_input_tokens_seen": 72929512, + "step": 108190 + }, + { + "epoch": 2.643221850340801, + "grad_norm": 0.003628705395385623, + "learning_rate": 1.0744976512259862e-06, + "loss": 0.0567, + "num_input_tokens_seen": 72932968, + "step": 108195 + }, + { + "epoch": 2.643344001172648, + "grad_norm": 0.1197664812207222, + "learning_rate": 1.0744126103260558e-06, + "loss": 0.0003, + "num_input_tokens_seen": 72937000, + "step": 108200 + }, + { + "epoch": 2.6434661520044953, + "grad_norm": 0.02228275127708912, + "learning_rate": 1.0743275688849767e-06, + "loss": 0.0941, + "num_input_tokens_seen": 72940136, + "step": 108205 + }, + { + "epoch": 2.643588302836342, + "grad_norm": 0.35382992029190063, + "learning_rate": 1.0742425269033678e-06, + "loss": 0.0424, + "num_input_tokens_seen": 72944040, + "step": 108210 + }, + { + "epoch": 2.6437104536681897, + "grad_norm": 0.0022606253623962402, + "learning_rate": 1.074157484381847e-06, + "loss": 0.1378, + "num_input_tokens_seen": 72947240, + "step": 108215 + }, + { + "epoch": 2.6438326045000364, + "grad_norm": 0.027560878545045853, + "learning_rate": 1.0740724413210332e-06, + "loss": 0.0001, + "num_input_tokens_seen": 72950760, + "step": 108220 + }, + { + "epoch": 2.643954755331884, + "grad_norm": 0.007026746869087219, + "learning_rate": 1.0739873977215447e-06, + "loss": 0.0452, + "num_input_tokens_seen": 72954024, + "step": 108225 + }, + { + "epoch": 2.644076906163731, + "grad_norm": 0.25033438205718994, + "learning_rate": 1.073902353584e-06, + "loss": 0.0432, + "num_input_tokens_seen": 72957736, + "step": 108230 + }, + { + "epoch": 2.644199056995578, + "grad_norm": 0.21594354510307312, + "learning_rate": 1.0738173089090172e-06, + "loss": 0.1475, + "num_input_tokens_seen": 72961256, + "step": 108235 + }, + { + "epoch": 2.644321207827425, + "grad_norm": 0.458386093378067, + "learning_rate": 1.0737322636972155e-06, + "loss": 0.021, + "num_input_tokens_seen": 72964456, + "step": 108240 + }, + { + "epoch": 2.6444433586592724, + "grad_norm": 0.02155539020895958, + "learning_rate": 1.0736472179492125e-06, + "loss": 0.0001, + "num_input_tokens_seen": 72968040, + "step": 108245 + }, + { + "epoch": 2.6445655094911196, + "grad_norm": 0.04842046648263931, + "learning_rate": 1.0735621716656274e-06, + "loss": 0.0004, + "num_input_tokens_seen": 72971432, + "step": 108250 + }, + { + "epoch": 2.6446876603229668, + "grad_norm": 0.009866573847830296, + "learning_rate": 1.0734771248470783e-06, + "loss": 0.0004, + "num_input_tokens_seen": 72974632, + "step": 108255 + }, + { + "epoch": 2.644809811154814, + "grad_norm": 0.07377776503562927, + "learning_rate": 1.0733920774941837e-06, + "loss": 0.1142, + "num_input_tokens_seen": 72977704, + "step": 108260 + }, + { + "epoch": 2.644931961986661, + "grad_norm": 0.035990867763757706, + "learning_rate": 1.0733070296075623e-06, + "loss": 0.0015, + "num_input_tokens_seen": 72981288, + "step": 108265 + }, + { + "epoch": 2.6450541128185083, + "grad_norm": 0.027638480067253113, + "learning_rate": 1.0732219811878327e-06, + "loss": 0.0004, + "num_input_tokens_seen": 72984744, + "step": 108270 + }, + { + "epoch": 2.6451762636503555, + "grad_norm": 0.00911200325936079, + "learning_rate": 1.0731369322356127e-06, + "loss": 0.0593, + "num_input_tokens_seen": 72988456, + "step": 108275 + }, + { + "epoch": 2.6452984144822027, + "grad_norm": 0.3369785249233246, + "learning_rate": 1.0730518827515216e-06, + "loss": 0.0002, + "num_input_tokens_seen": 72991656, + "step": 108280 + }, + { + "epoch": 2.64542056531405, + "grad_norm": 56.09309005737305, + "learning_rate": 1.0729668327361774e-06, + "loss": 0.082, + "num_input_tokens_seen": 72995304, + "step": 108285 + }, + { + "epoch": 2.645542716145897, + "grad_norm": 0.04016836732625961, + "learning_rate": 1.0728817821901988e-06, + "loss": 0.0756, + "num_input_tokens_seen": 72998312, + "step": 108290 + }, + { + "epoch": 2.6456648669777443, + "grad_norm": 0.009905444458127022, + "learning_rate": 1.0727967311142044e-06, + "loss": 0.0001, + "num_input_tokens_seen": 73001384, + "step": 108295 + }, + { + "epoch": 2.6457870178095915, + "grad_norm": 0.031229624524712563, + "learning_rate": 1.0727116795088125e-06, + "loss": 0.0173, + "num_input_tokens_seen": 73004392, + "step": 108300 + }, + { + "epoch": 2.645909168641438, + "grad_norm": 0.14251470565795898, + "learning_rate": 1.0726266273746414e-06, + "loss": 0.0006, + "num_input_tokens_seen": 73007656, + "step": 108305 + }, + { + "epoch": 2.646031319473286, + "grad_norm": 0.08483777940273285, + "learning_rate": 1.0725415747123102e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73011496, + "step": 108310 + }, + { + "epoch": 2.6461534703051326, + "grad_norm": 23.98685646057129, + "learning_rate": 1.0724565215224373e-06, + "loss": 0.0925, + "num_input_tokens_seen": 73014824, + "step": 108315 + }, + { + "epoch": 2.64627562113698, + "grad_norm": 0.050838831812143326, + "learning_rate": 1.072371467805641e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73017960, + "step": 108320 + }, + { + "epoch": 2.646397771968827, + "grad_norm": 0.23060138523578644, + "learning_rate": 1.07228641356254e-06, + "loss": 0.0005, + "num_input_tokens_seen": 73021288, + "step": 108325 + }, + { + "epoch": 2.646519922800674, + "grad_norm": 0.12116897851228714, + "learning_rate": 1.0722013587937526e-06, + "loss": 0.0516, + "num_input_tokens_seen": 73024424, + "step": 108330 + }, + { + "epoch": 2.6466420736325214, + "grad_norm": 0.4200071096420288, + "learning_rate": 1.0721163034998975e-06, + "loss": 0.0482, + "num_input_tokens_seen": 73027496, + "step": 108335 + }, + { + "epoch": 2.6467642244643685, + "grad_norm": 0.10476449131965637, + "learning_rate": 1.0720312476815932e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73031272, + "step": 108340 + }, + { + "epoch": 2.6468863752962157, + "grad_norm": 0.302228182554245, + "learning_rate": 1.0719461913394582e-06, + "loss": 0.0004, + "num_input_tokens_seen": 73034280, + "step": 108345 + }, + { + "epoch": 2.647008526128063, + "grad_norm": 0.10496729612350464, + "learning_rate": 1.0718611344741116e-06, + "loss": 0.0822, + "num_input_tokens_seen": 73037224, + "step": 108350 + }, + { + "epoch": 2.64713067695991, + "grad_norm": 0.07415910065174103, + "learning_rate": 1.071776077086171e-06, + "loss": 0.0934, + "num_input_tokens_seen": 73040808, + "step": 108355 + }, + { + "epoch": 2.6472528277917573, + "grad_norm": 24.086450576782227, + "learning_rate": 1.0716910191762559e-06, + "loss": 0.0623, + "num_input_tokens_seen": 73044072, + "step": 108360 + }, + { + "epoch": 2.6473749786236045, + "grad_norm": 0.011858934536576271, + "learning_rate": 1.0716059607449842e-06, + "loss": 0.0005, + "num_input_tokens_seen": 73047080, + "step": 108365 + }, + { + "epoch": 2.6474971294554517, + "grad_norm": 0.20711888372898102, + "learning_rate": 1.0715209017929744e-06, + "loss": 0.0398, + "num_input_tokens_seen": 73050152, + "step": 108370 + }, + { + "epoch": 2.647619280287299, + "grad_norm": 0.13118284940719604, + "learning_rate": 1.0714358423208457e-06, + "loss": 0.0184, + "num_input_tokens_seen": 73053672, + "step": 108375 + }, + { + "epoch": 2.647741431119146, + "grad_norm": 0.014156767167150974, + "learning_rate": 1.0713507823292165e-06, + "loss": 0.0849, + "num_input_tokens_seen": 73056936, + "step": 108380 + }, + { + "epoch": 2.6478635819509933, + "grad_norm": 0.022906072437763214, + "learning_rate": 1.071265721818705e-06, + "loss": 0.0005, + "num_input_tokens_seen": 73060136, + "step": 108385 + }, + { + "epoch": 2.64798573278284, + "grad_norm": 54.88224411010742, + "learning_rate": 1.0711806607899302e-06, + "loss": 0.1114, + "num_input_tokens_seen": 73063784, + "step": 108390 + }, + { + "epoch": 2.6481078836146876, + "grad_norm": 0.03241246938705444, + "learning_rate": 1.0710955992435101e-06, + "loss": 0.0003, + "num_input_tokens_seen": 73067304, + "step": 108395 + }, + { + "epoch": 2.6482300344465344, + "grad_norm": 0.015850190073251724, + "learning_rate": 1.0710105371800637e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73070888, + "step": 108400 + }, + { + "epoch": 2.648352185278382, + "grad_norm": 0.16307973861694336, + "learning_rate": 1.07092547460021e-06, + "loss": 0.0008, + "num_input_tokens_seen": 73074088, + "step": 108405 + }, + { + "epoch": 2.6484743361102288, + "grad_norm": 24.154691696166992, + "learning_rate": 1.0708404115045669e-06, + "loss": 0.1232, + "num_input_tokens_seen": 73077608, + "step": 108410 + }, + { + "epoch": 2.648596486942076, + "grad_norm": 0.012943286448717117, + "learning_rate": 1.0707553478937533e-06, + "loss": 0.026, + "num_input_tokens_seen": 73081000, + "step": 108415 + }, + { + "epoch": 2.648718637773923, + "grad_norm": 0.012203868478536606, + "learning_rate": 1.070670283768388e-06, + "loss": 0.0003, + "num_input_tokens_seen": 73084136, + "step": 108420 + }, + { + "epoch": 2.6488407886057703, + "grad_norm": 0.034377772361040115, + "learning_rate": 1.0705852191290891e-06, + "loss": 0.0421, + "num_input_tokens_seen": 73087528, + "step": 108425 + }, + { + "epoch": 2.6489629394376175, + "grad_norm": 0.2772684097290039, + "learning_rate": 1.0705001539764754e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73091048, + "step": 108430 + }, + { + "epoch": 2.6490850902694647, + "grad_norm": 0.05456133186817169, + "learning_rate": 1.0704150883111659e-06, + "loss": 0.0005, + "num_input_tokens_seen": 73094440, + "step": 108435 + }, + { + "epoch": 2.649207241101312, + "grad_norm": 0.010156864300370216, + "learning_rate": 1.0703300221337787e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73098472, + "step": 108440 + }, + { + "epoch": 2.649329391933159, + "grad_norm": 0.0660182535648346, + "learning_rate": 1.070244955444933e-06, + "loss": 0.0863, + "num_input_tokens_seen": 73101928, + "step": 108445 + }, + { + "epoch": 2.6494515427650063, + "grad_norm": 0.11449148505926132, + "learning_rate": 1.0701598882452469e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73105768, + "step": 108450 + }, + { + "epoch": 2.6495736935968535, + "grad_norm": 0.1072482019662857, + "learning_rate": 1.070074820535339e-06, + "loss": 0.1254, + "num_input_tokens_seen": 73109544, + "step": 108455 + }, + { + "epoch": 2.6496958444287007, + "grad_norm": 0.13168303668498993, + "learning_rate": 1.0699897523158283e-06, + "loss": 0.0293, + "num_input_tokens_seen": 73113064, + "step": 108460 + }, + { + "epoch": 2.649817995260548, + "grad_norm": 159.1094512939453, + "learning_rate": 1.0699046835873336e-06, + "loss": 0.0029, + "num_input_tokens_seen": 73116200, + "step": 108465 + }, + { + "epoch": 2.649940146092395, + "grad_norm": 0.06051814183592796, + "learning_rate": 1.069819614350473e-06, + "loss": 0.0515, + "num_input_tokens_seen": 73119592, + "step": 108470 + }, + { + "epoch": 2.650062296924242, + "grad_norm": 40.68425369262695, + "learning_rate": 1.0697345446058654e-06, + "loss": 0.0455, + "num_input_tokens_seen": 73122984, + "step": 108475 + }, + { + "epoch": 2.6501844477560894, + "grad_norm": 35.27906799316406, + "learning_rate": 1.0696494743541296e-06, + "loss": 0.0456, + "num_input_tokens_seen": 73126248, + "step": 108480 + }, + { + "epoch": 2.650306598587936, + "grad_norm": 0.003338438691571355, + "learning_rate": 1.069564403595884e-06, + "loss": 0.0004, + "num_input_tokens_seen": 73129448, + "step": 108485 + }, + { + "epoch": 2.650428749419784, + "grad_norm": 3.661097764968872, + "learning_rate": 1.0694793323317473e-06, + "loss": 0.0008, + "num_input_tokens_seen": 73132840, + "step": 108490 + }, + { + "epoch": 2.6505509002516305, + "grad_norm": 0.45807284116744995, + "learning_rate": 1.0693942605623378e-06, + "loss": 0.0907, + "num_input_tokens_seen": 73136040, + "step": 108495 + }, + { + "epoch": 2.6506730510834777, + "grad_norm": 0.04252566769719124, + "learning_rate": 1.069309188288275e-06, + "loss": 0.1461, + "num_input_tokens_seen": 73139496, + "step": 108500 + }, + { + "epoch": 2.650795201915325, + "grad_norm": 0.08476745337247849, + "learning_rate": 1.069224115510177e-06, + "loss": 0.0003, + "num_input_tokens_seen": 73143080, + "step": 108505 + }, + { + "epoch": 2.650917352747172, + "grad_norm": 0.014480705372989178, + "learning_rate": 1.0691390422286627e-06, + "loss": 0.0458, + "num_input_tokens_seen": 73147112, + "step": 108510 + }, + { + "epoch": 2.6510395035790193, + "grad_norm": 0.020470378920435905, + "learning_rate": 1.0690539684443507e-06, + "loss": 0.0001, + "num_input_tokens_seen": 73150568, + "step": 108515 + }, + { + "epoch": 2.6511616544108665, + "grad_norm": 0.12641994655132294, + "learning_rate": 1.06896889415786e-06, + "loss": 0.0005, + "num_input_tokens_seen": 73153640, + "step": 108520 + }, + { + "epoch": 2.6512838052427137, + "grad_norm": 0.16786456108093262, + "learning_rate": 1.0688838193698083e-06, + "loss": 0.0003, + "num_input_tokens_seen": 73157096, + "step": 108525 + }, + { + "epoch": 2.651405956074561, + "grad_norm": 0.013153999112546444, + "learning_rate": 1.0687987440808153e-06, + "loss": 0.0606, + "num_input_tokens_seen": 73161128, + "step": 108530 + }, + { + "epoch": 2.651528106906408, + "grad_norm": 0.058872465044260025, + "learning_rate": 1.0687136682914993e-06, + "loss": 0.0434, + "num_input_tokens_seen": 73164328, + "step": 108535 + }, + { + "epoch": 2.6516502577382552, + "grad_norm": 29.729175567626953, + "learning_rate": 1.068628592002479e-06, + "loss": 0.0347, + "num_input_tokens_seen": 73167272, + "step": 108540 + }, + { + "epoch": 2.6517724085701024, + "grad_norm": 0.2370745837688446, + "learning_rate": 1.068543515214373e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73170344, + "step": 108545 + }, + { + "epoch": 2.6518945594019496, + "grad_norm": 0.00412773247808218, + "learning_rate": 1.0684584379278004e-06, + "loss": 0.0003, + "num_input_tokens_seen": 73173224, + "step": 108550 + }, + { + "epoch": 2.652016710233797, + "grad_norm": 0.07895587384700775, + "learning_rate": 1.0683733601433793e-06, + "loss": 0.0238, + "num_input_tokens_seen": 73176296, + "step": 108555 + }, + { + "epoch": 2.652138861065644, + "grad_norm": 0.09276538342237473, + "learning_rate": 1.068288281861729e-06, + "loss": 0.0469, + "num_input_tokens_seen": 73179176, + "step": 108560 + }, + { + "epoch": 2.652261011897491, + "grad_norm": 0.0015131103573367, + "learning_rate": 1.0682032030834678e-06, + "loss": 0.0003, + "num_input_tokens_seen": 73182376, + "step": 108565 + }, + { + "epoch": 2.652383162729338, + "grad_norm": 0.5894980430603027, + "learning_rate": 1.0681181238092146e-06, + "loss": 0.0007, + "num_input_tokens_seen": 73185640, + "step": 108570 + }, + { + "epoch": 2.6525053135611856, + "grad_norm": 53.47251510620117, + "learning_rate": 1.0680330440395882e-06, + "loss": 0.0396, + "num_input_tokens_seen": 73188584, + "step": 108575 + }, + { + "epoch": 2.6526274643930323, + "grad_norm": 16.599782943725586, + "learning_rate": 1.0679479637752069e-06, + "loss": 0.0931, + "num_input_tokens_seen": 73191912, + "step": 108580 + }, + { + "epoch": 2.65274961522488, + "grad_norm": 0.012147694826126099, + "learning_rate": 1.06786288301669e-06, + "loss": 0.0326, + "num_input_tokens_seen": 73195304, + "step": 108585 + }, + { + "epoch": 2.6528717660567267, + "grad_norm": 0.17597021162509918, + "learning_rate": 1.0677778017646558e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73198568, + "step": 108590 + }, + { + "epoch": 2.652993916888574, + "grad_norm": 0.029810475185513496, + "learning_rate": 1.0676927200197234e-06, + "loss": 0.0001, + "num_input_tokens_seen": 73201640, + "step": 108595 + }, + { + "epoch": 2.653116067720421, + "grad_norm": 0.011127985082566738, + "learning_rate": 1.067607637782511e-06, + "loss": 0.0535, + "num_input_tokens_seen": 73204712, + "step": 108600 + }, + { + "epoch": 2.6532382185522683, + "grad_norm": 0.014848160557448864, + "learning_rate": 1.0675225550536383e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73208616, + "step": 108605 + }, + { + "epoch": 2.6533603693841155, + "grad_norm": 94.40913391113281, + "learning_rate": 1.067437471833723e-06, + "loss": 0.147, + "num_input_tokens_seen": 73211944, + "step": 108610 + }, + { + "epoch": 2.6534825202159626, + "grad_norm": 0.10178336501121521, + "learning_rate": 1.0673523881233841e-06, + "loss": 0.0615, + "num_input_tokens_seen": 73215592, + "step": 108615 + }, + { + "epoch": 2.65360467104781, + "grad_norm": 0.03686375916004181, + "learning_rate": 1.0672673039232405e-06, + "loss": 0.1221, + "num_input_tokens_seen": 73218792, + "step": 108620 + }, + { + "epoch": 2.653726821879657, + "grad_norm": 0.44345104694366455, + "learning_rate": 1.0671822192339112e-06, + "loss": 0.0805, + "num_input_tokens_seen": 73222568, + "step": 108625 + }, + { + "epoch": 2.653848972711504, + "grad_norm": 0.08455266058444977, + "learning_rate": 1.0670971340560148e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73225640, + "step": 108630 + }, + { + "epoch": 2.6539711235433514, + "grad_norm": 0.05392299219965935, + "learning_rate": 1.06701204839017e-06, + "loss": 0.0003, + "num_input_tokens_seen": 73229032, + "step": 108635 + }, + { + "epoch": 2.6540932743751986, + "grad_norm": 13.104811668395996, + "learning_rate": 1.0669269622369957e-06, + "loss": 0.0764, + "num_input_tokens_seen": 73232360, + "step": 108640 + }, + { + "epoch": 2.654215425207046, + "grad_norm": 0.19615143537521362, + "learning_rate": 1.06684187559711e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73235688, + "step": 108645 + }, + { + "epoch": 2.654337576038893, + "grad_norm": 0.08257798105478287, + "learning_rate": 1.0667567884711323e-06, + "loss": 0.0005, + "num_input_tokens_seen": 73238888, + "step": 108650 + }, + { + "epoch": 2.6544597268707397, + "grad_norm": 0.08494102954864502, + "learning_rate": 1.0666717008596814e-06, + "loss": 0.0003, + "num_input_tokens_seen": 73242216, + "step": 108655 + }, + { + "epoch": 2.6545818777025874, + "grad_norm": 193.64535522460938, + "learning_rate": 1.0665866127633762e-06, + "loss": 0.0155, + "num_input_tokens_seen": 73245608, + "step": 108660 + }, + { + "epoch": 2.654704028534434, + "grad_norm": 15.049497604370117, + "learning_rate": 1.066501524182835e-06, + "loss": 0.1046, + "num_input_tokens_seen": 73248680, + "step": 108665 + }, + { + "epoch": 2.6548261793662817, + "grad_norm": 0.3932948410511017, + "learning_rate": 1.066416435118677e-06, + "loss": 0.0481, + "num_input_tokens_seen": 73251688, + "step": 108670 + }, + { + "epoch": 2.6549483301981285, + "grad_norm": 0.17279548943042755, + "learning_rate": 1.0663313455715208e-06, + "loss": 0.0006, + "num_input_tokens_seen": 73254824, + "step": 108675 + }, + { + "epoch": 2.6550704810299757, + "grad_norm": 0.061224523931741714, + "learning_rate": 1.066246255541985e-06, + "loss": 0.0009, + "num_input_tokens_seen": 73258088, + "step": 108680 + }, + { + "epoch": 2.655192631861823, + "grad_norm": 0.034446604549884796, + "learning_rate": 1.0661611650306887e-06, + "loss": 0.0344, + "num_input_tokens_seen": 73261800, + "step": 108685 + }, + { + "epoch": 2.65531478269367, + "grad_norm": 0.016167186200618744, + "learning_rate": 1.0660760740382508e-06, + "loss": 0.0007, + "num_input_tokens_seen": 73265256, + "step": 108690 + }, + { + "epoch": 2.6554369335255172, + "grad_norm": 0.019697608426213264, + "learning_rate": 1.0659909825652898e-06, + "loss": 0.0007, + "num_input_tokens_seen": 73268520, + "step": 108695 + }, + { + "epoch": 2.6555590843573644, + "grad_norm": 0.15763618052005768, + "learning_rate": 1.0659058906124245e-06, + "loss": 0.1883, + "num_input_tokens_seen": 73272040, + "step": 108700 + }, + { + "epoch": 2.6556812351892116, + "grad_norm": 0.07214447855949402, + "learning_rate": 1.0658207981802741e-06, + "loss": 0.0034, + "num_input_tokens_seen": 73275176, + "step": 108705 + }, + { + "epoch": 2.655803386021059, + "grad_norm": 0.021427778527140617, + "learning_rate": 1.0657357052694567e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73278504, + "step": 108710 + }, + { + "epoch": 2.655925536852906, + "grad_norm": 28.227033615112305, + "learning_rate": 1.0656506118805919e-06, + "loss": 0.1303, + "num_input_tokens_seen": 73282280, + "step": 108715 + }, + { + "epoch": 2.656047687684753, + "grad_norm": 0.042592257261276245, + "learning_rate": 1.0655655180142982e-06, + "loss": 0.0003, + "num_input_tokens_seen": 73285480, + "step": 108720 + }, + { + "epoch": 2.6561698385166004, + "grad_norm": 0.236333966255188, + "learning_rate": 1.0654804236711942e-06, + "loss": 0.0318, + "num_input_tokens_seen": 73288872, + "step": 108725 + }, + { + "epoch": 2.6562919893484476, + "grad_norm": 0.005104314535856247, + "learning_rate": 1.0653953288518994e-06, + "loss": 0.0001, + "num_input_tokens_seen": 73292008, + "step": 108730 + }, + { + "epoch": 2.6564141401802948, + "grad_norm": 5.778536796569824, + "learning_rate": 1.0653102335570317e-06, + "loss": 0.0007, + "num_input_tokens_seen": 73295464, + "step": 108735 + }, + { + "epoch": 2.656536291012142, + "grad_norm": 0.056464217603206635, + "learning_rate": 1.0652251377872108e-06, + "loss": 0.0001, + "num_input_tokens_seen": 73298728, + "step": 108740 + }, + { + "epoch": 2.656658441843989, + "grad_norm": 0.012570296414196491, + "learning_rate": 1.065140041543055e-06, + "loss": 0.0319, + "num_input_tokens_seen": 73302248, + "step": 108745 + }, + { + "epoch": 2.656780592675836, + "grad_norm": 30.777732849121094, + "learning_rate": 1.0650549448251831e-06, + "loss": 0.0538, + "num_input_tokens_seen": 73305768, + "step": 108750 + }, + { + "epoch": 2.6569027435076835, + "grad_norm": 0.010172931477427483, + "learning_rate": 1.0649698476342144e-06, + "loss": 0.0003, + "num_input_tokens_seen": 73309480, + "step": 108755 + }, + { + "epoch": 2.6570248943395303, + "grad_norm": 0.006178905721753836, + "learning_rate": 1.0648847499707673e-06, + "loss": 0.0003, + "num_input_tokens_seen": 73312680, + "step": 108760 + }, + { + "epoch": 2.6571470451713775, + "grad_norm": 0.02570711076259613, + "learning_rate": 1.0647996518354608e-06, + "loss": 0.0005, + "num_input_tokens_seen": 73316136, + "step": 108765 + }, + { + "epoch": 2.6572691960032246, + "grad_norm": 0.006446475628763437, + "learning_rate": 1.0647145532289142e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73318952, + "step": 108770 + }, + { + "epoch": 2.657391346835072, + "grad_norm": 0.01190762035548687, + "learning_rate": 1.0646294541517456e-06, + "loss": 0.0639, + "num_input_tokens_seen": 73321960, + "step": 108775 + }, + { + "epoch": 2.657513497666919, + "grad_norm": 0.012693374417722225, + "learning_rate": 1.0645443546045743e-06, + "loss": 0.0747, + "num_input_tokens_seen": 73325032, + "step": 108780 + }, + { + "epoch": 2.657635648498766, + "grad_norm": 0.02469741739332676, + "learning_rate": 1.0644592545880193e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73328616, + "step": 108785 + }, + { + "epoch": 2.6577577993306134, + "grad_norm": 0.00556557159870863, + "learning_rate": 1.0643741541026991e-06, + "loss": 0.0795, + "num_input_tokens_seen": 73331752, + "step": 108790 + }, + { + "epoch": 2.6578799501624606, + "grad_norm": 0.009257211349904537, + "learning_rate": 1.0642890531492327e-06, + "loss": 0.0526, + "num_input_tokens_seen": 73335272, + "step": 108795 + }, + { + "epoch": 2.658002100994308, + "grad_norm": 0.07994474470615387, + "learning_rate": 1.064203951728239e-06, + "loss": 0.0645, + "num_input_tokens_seen": 73338536, + "step": 108800 + }, + { + "epoch": 2.658124251826155, + "grad_norm": 0.49836266040802, + "learning_rate": 1.064118849840337e-06, + "loss": 0.1111, + "num_input_tokens_seen": 73341992, + "step": 108805 + }, + { + "epoch": 2.658246402658002, + "grad_norm": 13.673401832580566, + "learning_rate": 1.0640337474861453e-06, + "loss": 0.0614, + "num_input_tokens_seen": 73345256, + "step": 108810 + }, + { + "epoch": 2.6583685534898494, + "grad_norm": 0.16080302000045776, + "learning_rate": 1.063948644666283e-06, + "loss": 0.049, + "num_input_tokens_seen": 73348520, + "step": 108815 + }, + { + "epoch": 2.6584907043216965, + "grad_norm": 0.014671185985207558, + "learning_rate": 1.063863541381369e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73351400, + "step": 108820 + }, + { + "epoch": 2.6586128551535437, + "grad_norm": 0.0028238091617822647, + "learning_rate": 1.0637784376320222e-06, + "loss": 0.0517, + "num_input_tokens_seen": 73355176, + "step": 108825 + }, + { + "epoch": 2.658735005985391, + "grad_norm": 0.004296763800084591, + "learning_rate": 1.0636933334188615e-06, + "loss": 0.0001, + "num_input_tokens_seen": 73358568, + "step": 108830 + }, + { + "epoch": 2.6588571568172377, + "grad_norm": 39.8594856262207, + "learning_rate": 1.0636082287425058e-06, + "loss": 0.0933, + "num_input_tokens_seen": 73362088, + "step": 108835 + }, + { + "epoch": 2.6589793076490853, + "grad_norm": 0.02145841158926487, + "learning_rate": 1.0635231236035739e-06, + "loss": 0.0006, + "num_input_tokens_seen": 73365224, + "step": 108840 + }, + { + "epoch": 2.659101458480932, + "grad_norm": 0.01345257367938757, + "learning_rate": 1.0634380180026846e-06, + "loss": 0.0004, + "num_input_tokens_seen": 73368232, + "step": 108845 + }, + { + "epoch": 2.6592236093127797, + "grad_norm": 0.0031906412914395332, + "learning_rate": 1.063352911940457e-06, + "loss": 0.0001, + "num_input_tokens_seen": 73371432, + "step": 108850 + }, + { + "epoch": 2.6593457601446264, + "grad_norm": 0.011594736017286777, + "learning_rate": 1.0632678054175102e-06, + "loss": 0.0944, + "num_input_tokens_seen": 73374952, + "step": 108855 + }, + { + "epoch": 2.6594679109764736, + "grad_norm": 0.009862457402050495, + "learning_rate": 1.0631826984344624e-06, + "loss": 0.0001, + "num_input_tokens_seen": 73378600, + "step": 108860 + }, + { + "epoch": 2.659590061808321, + "grad_norm": 0.02546362765133381, + "learning_rate": 1.0630975909919334e-06, + "loss": 0.1409, + "num_input_tokens_seen": 73382184, + "step": 108865 + }, + { + "epoch": 2.659712212640168, + "grad_norm": 33.12275695800781, + "learning_rate": 1.0630124830905418e-06, + "loss": 0.0337, + "num_input_tokens_seen": 73385384, + "step": 108870 + }, + { + "epoch": 2.659834363472015, + "grad_norm": 0.02543380670249462, + "learning_rate": 1.0629273747309064e-06, + "loss": 0.0398, + "num_input_tokens_seen": 73389096, + "step": 108875 + }, + { + "epoch": 2.6599565143038624, + "grad_norm": 0.002421492477878928, + "learning_rate": 1.0628422659136463e-06, + "loss": 0.053, + "num_input_tokens_seen": 73392104, + "step": 108880 + }, + { + "epoch": 2.6600786651357096, + "grad_norm": 0.017665240913629532, + "learning_rate": 1.06275715663938e-06, + "loss": 0.0375, + "num_input_tokens_seen": 73395176, + "step": 108885 + }, + { + "epoch": 2.6602008159675568, + "grad_norm": 0.06175203621387482, + "learning_rate": 1.062672046908727e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73398696, + "step": 108890 + }, + { + "epoch": 2.660322966799404, + "grad_norm": 0.07373465597629547, + "learning_rate": 1.0625869367223063e-06, + "loss": 0.0645, + "num_input_tokens_seen": 73401960, + "step": 108895 + }, + { + "epoch": 2.660445117631251, + "grad_norm": 2.7904903888702393, + "learning_rate": 1.062501826080736e-06, + "loss": 0.0004, + "num_input_tokens_seen": 73404968, + "step": 108900 + }, + { + "epoch": 2.6605672684630983, + "grad_norm": 346.7339172363281, + "learning_rate": 1.062416714984636e-06, + "loss": 0.035, + "num_input_tokens_seen": 73408168, + "step": 108905 + }, + { + "epoch": 2.6606894192949455, + "grad_norm": 0.07050517201423645, + "learning_rate": 1.0623316034346248e-06, + "loss": 0.0526, + "num_input_tokens_seen": 73411560, + "step": 108910 + }, + { + "epoch": 2.6608115701267927, + "grad_norm": 21.976076126098633, + "learning_rate": 1.0622464914313214e-06, + "loss": 0.1244, + "num_input_tokens_seen": 73414888, + "step": 108915 + }, + { + "epoch": 2.66093372095864, + "grad_norm": 0.02514161542057991, + "learning_rate": 1.0621613789753447e-06, + "loss": 0.0005, + "num_input_tokens_seen": 73418280, + "step": 108920 + }, + { + "epoch": 2.661055871790487, + "grad_norm": 0.019108710810542107, + "learning_rate": 1.062076266067314e-06, + "loss": 0.065, + "num_input_tokens_seen": 73422056, + "step": 108925 + }, + { + "epoch": 2.661178022622334, + "grad_norm": 0.4653870463371277, + "learning_rate": 1.0619911527078475e-06, + "loss": 0.0006, + "num_input_tokens_seen": 73426088, + "step": 108930 + }, + { + "epoch": 2.6613001734541815, + "grad_norm": 0.012500254437327385, + "learning_rate": 1.0619060388975649e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73429864, + "step": 108935 + }, + { + "epoch": 2.661422324286028, + "grad_norm": 0.27674028277397156, + "learning_rate": 1.0618209246370853e-06, + "loss": 0.0009, + "num_input_tokens_seen": 73433256, + "step": 108940 + }, + { + "epoch": 2.6615444751178754, + "grad_norm": 0.010696930810809135, + "learning_rate": 1.061735809927027e-06, + "loss": 0.0418, + "num_input_tokens_seen": 73436392, + "step": 108945 + }, + { + "epoch": 2.6616666259497226, + "grad_norm": 0.016720250248908997, + "learning_rate": 1.0616506947680092e-06, + "loss": 0.0004, + "num_input_tokens_seen": 73439656, + "step": 108950 + }, + { + "epoch": 2.6617887767815698, + "grad_norm": 37.6923828125, + "learning_rate": 1.0615655791606511e-06, + "loss": 0.0398, + "num_input_tokens_seen": 73442792, + "step": 108955 + }, + { + "epoch": 2.661910927613417, + "grad_norm": 298.3846740722656, + "learning_rate": 1.0614804631055713e-06, + "loss": 0.028, + "num_input_tokens_seen": 73446184, + "step": 108960 + }, + { + "epoch": 2.662033078445264, + "grad_norm": 0.11180854588747025, + "learning_rate": 1.0613953466033892e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73449704, + "step": 108965 + }, + { + "epoch": 2.6621552292771113, + "grad_norm": 0.27024519443511963, + "learning_rate": 1.0613102296547237e-06, + "loss": 0.0732, + "num_input_tokens_seen": 73452968, + "step": 108970 + }, + { + "epoch": 2.6622773801089585, + "grad_norm": 0.012191432528197765, + "learning_rate": 1.0612251122601936e-06, + "loss": 0.0004, + "num_input_tokens_seen": 73456424, + "step": 108975 + }, + { + "epoch": 2.6623995309408057, + "grad_norm": 0.007310071494430304, + "learning_rate": 1.0611399944204181e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73459816, + "step": 108980 + }, + { + "epoch": 2.662521681772653, + "grad_norm": 20.659034729003906, + "learning_rate": 1.061054876136016e-06, + "loss": 0.0805, + "num_input_tokens_seen": 73463208, + "step": 108985 + }, + { + "epoch": 2.6626438326045, + "grad_norm": 0.029268672689795494, + "learning_rate": 1.0609697574076066e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73466600, + "step": 108990 + }, + { + "epoch": 2.6627659834363473, + "grad_norm": 0.11111558228731155, + "learning_rate": 1.0608846382358087e-06, + "loss": 0.0896, + "num_input_tokens_seen": 73469480, + "step": 108995 + }, + { + "epoch": 2.6628881342681945, + "grad_norm": 50.83061599731445, + "learning_rate": 1.0607995186212413e-06, + "loss": 0.0629, + "num_input_tokens_seen": 73472872, + "step": 109000 + }, + { + "epoch": 2.6630102851000417, + "grad_norm": 0.21760523319244385, + "learning_rate": 1.0607143985645235e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73476072, + "step": 109005 + }, + { + "epoch": 2.663132435931889, + "grad_norm": 0.009848492220044136, + "learning_rate": 1.0606292780662742e-06, + "loss": 0.0007, + "num_input_tokens_seen": 73479208, + "step": 109010 + }, + { + "epoch": 2.6632545867637356, + "grad_norm": 31.30138397216797, + "learning_rate": 1.0605441571271126e-06, + "loss": 0.0978, + "num_input_tokens_seen": 73482600, + "step": 109015 + }, + { + "epoch": 2.6633767375955832, + "grad_norm": 48.70668411254883, + "learning_rate": 1.0604590357476571e-06, + "loss": 0.1106, + "num_input_tokens_seen": 73486248, + "step": 109020 + }, + { + "epoch": 2.66349888842743, + "grad_norm": 0.12107347697019577, + "learning_rate": 1.0603739139285276e-06, + "loss": 0.0001, + "num_input_tokens_seen": 73489832, + "step": 109025 + }, + { + "epoch": 2.6636210392592776, + "grad_norm": 0.021504517644643784, + "learning_rate": 1.0602887916703429e-06, + "loss": 0.0776, + "num_input_tokens_seen": 73493096, + "step": 109030 + }, + { + "epoch": 2.6637431900911244, + "grad_norm": 0.17749902606010437, + "learning_rate": 1.0602036689737218e-06, + "loss": 0.0434, + "num_input_tokens_seen": 73495976, + "step": 109035 + }, + { + "epoch": 2.6638653409229716, + "grad_norm": 45.952964782714844, + "learning_rate": 1.0601185458392833e-06, + "loss": 0.0723, + "num_input_tokens_seen": 73499112, + "step": 109040 + }, + { + "epoch": 2.6639874917548187, + "grad_norm": 29.334917068481445, + "learning_rate": 1.0600334222676469e-06, + "loss": 0.0641, + "num_input_tokens_seen": 73502248, + "step": 109045 + }, + { + "epoch": 2.664109642586666, + "grad_norm": 0.13320650160312653, + "learning_rate": 1.059948298259431e-06, + "loss": 0.0585, + "num_input_tokens_seen": 73505768, + "step": 109050 + }, + { + "epoch": 2.664231793418513, + "grad_norm": 0.07540590316057205, + "learning_rate": 1.0598631738152547e-06, + "loss": 0.0025, + "num_input_tokens_seen": 73509224, + "step": 109055 + }, + { + "epoch": 2.6643539442503603, + "grad_norm": 0.11329970508813858, + "learning_rate": 1.0597780489357378e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73512232, + "step": 109060 + }, + { + "epoch": 2.6644760950822075, + "grad_norm": 550.1355590820312, + "learning_rate": 1.0596929236214986e-06, + "loss": 0.0629, + "num_input_tokens_seen": 73515432, + "step": 109065 + }, + { + "epoch": 2.6645982459140547, + "grad_norm": 0.047656625509262085, + "learning_rate": 1.0596077978731565e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73518632, + "step": 109070 + }, + { + "epoch": 2.664720396745902, + "grad_norm": 0.09848161041736603, + "learning_rate": 1.0595226716913307e-06, + "loss": 0.0379, + "num_input_tokens_seen": 73521768, + "step": 109075 + }, + { + "epoch": 2.664842547577749, + "grad_norm": 0.10069756954908371, + "learning_rate": 1.0594375450766394e-06, + "loss": 0.0423, + "num_input_tokens_seen": 73525288, + "step": 109080 + }, + { + "epoch": 2.6649646984095963, + "grad_norm": 0.023019161075353622, + "learning_rate": 1.0593524180297026e-06, + "loss": 0.0643, + "num_input_tokens_seen": 73528744, + "step": 109085 + }, + { + "epoch": 2.6650868492414435, + "grad_norm": 0.027636736631393433, + "learning_rate": 1.059267290551139e-06, + "loss": 0.0247, + "num_input_tokens_seen": 73532264, + "step": 109090 + }, + { + "epoch": 2.6652090000732906, + "grad_norm": 19.799278259277344, + "learning_rate": 1.059182162641568e-06, + "loss": 0.1019, + "num_input_tokens_seen": 73535528, + "step": 109095 + }, + { + "epoch": 2.6653311509051374, + "grad_norm": 0.16363434493541718, + "learning_rate": 1.0590970343016083e-06, + "loss": 0.0003, + "num_input_tokens_seen": 73539048, + "step": 109100 + }, + { + "epoch": 2.665453301736985, + "grad_norm": 0.007981637492775917, + "learning_rate": 1.059011905531879e-06, + "loss": 0.0003, + "num_input_tokens_seen": 73542632, + "step": 109105 + }, + { + "epoch": 2.6655754525688318, + "grad_norm": 0.09691093862056732, + "learning_rate": 1.0589267763329992e-06, + "loss": 0.0005, + "num_input_tokens_seen": 73546280, + "step": 109110 + }, + { + "epoch": 2.6656976034006794, + "grad_norm": 18.96839141845703, + "learning_rate": 1.0588416467055878e-06, + "loss": 0.0432, + "num_input_tokens_seen": 73549608, + "step": 109115 + }, + { + "epoch": 2.665819754232526, + "grad_norm": 0.04357759281992912, + "learning_rate": 1.0587565166502646e-06, + "loss": 0.0004, + "num_input_tokens_seen": 73552872, + "step": 109120 + }, + { + "epoch": 2.6659419050643733, + "grad_norm": 0.2836996912956238, + "learning_rate": 1.058671386167648e-06, + "loss": 0.0005, + "num_input_tokens_seen": 73556584, + "step": 109125 + }, + { + "epoch": 2.6660640558962205, + "grad_norm": 0.009287668392062187, + "learning_rate": 1.058586255258357e-06, + "loss": 0.0417, + "num_input_tokens_seen": 73560296, + "step": 109130 + }, + { + "epoch": 2.6661862067280677, + "grad_norm": 0.01081774290651083, + "learning_rate": 1.0585011239230114e-06, + "loss": 0.0329, + "num_input_tokens_seen": 73563688, + "step": 109135 + }, + { + "epoch": 2.666308357559915, + "grad_norm": 0.032498445361852646, + "learning_rate": 1.0584159921622298e-06, + "loss": 0.001, + "num_input_tokens_seen": 73566888, + "step": 109140 + }, + { + "epoch": 2.666430508391762, + "grad_norm": 0.020721541717648506, + "learning_rate": 1.0583308599766314e-06, + "loss": 0.0001, + "num_input_tokens_seen": 73569896, + "step": 109145 + }, + { + "epoch": 2.6665526592236093, + "grad_norm": 0.06984131038188934, + "learning_rate": 1.0582457273668352e-06, + "loss": 0.0778, + "num_input_tokens_seen": 73573864, + "step": 109150 + }, + { + "epoch": 2.6666748100554565, + "grad_norm": 16.520017623901367, + "learning_rate": 1.0581605943334606e-06, + "loss": 0.0621, + "num_input_tokens_seen": 73577640, + "step": 109155 + }, + { + "epoch": 2.6667969608873037, + "grad_norm": 19.316129684448242, + "learning_rate": 1.0580754608771263e-06, + "loss": 0.0427, + "num_input_tokens_seen": 73581224, + "step": 109160 + }, + { + "epoch": 2.666919111719151, + "grad_norm": 35.69450759887695, + "learning_rate": 1.0579903269984519e-06, + "loss": 0.0369, + "num_input_tokens_seen": 73584488, + "step": 109165 + }, + { + "epoch": 2.667041262550998, + "grad_norm": 0.025746477767825127, + "learning_rate": 1.0579051926980558e-06, + "loss": 0.0395, + "num_input_tokens_seen": 73588392, + "step": 109170 + }, + { + "epoch": 2.6671634133828452, + "grad_norm": 1.5054798126220703, + "learning_rate": 1.0578200579765577e-06, + "loss": 0.0004, + "num_input_tokens_seen": 73591720, + "step": 109175 + }, + { + "epoch": 2.6672855642146924, + "grad_norm": 0.08736526966094971, + "learning_rate": 1.0577349228345766e-06, + "loss": 0.0005, + "num_input_tokens_seen": 73594792, + "step": 109180 + }, + { + "epoch": 2.6674077150465396, + "grad_norm": 0.06524144113063812, + "learning_rate": 1.0576497872727318e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73598120, + "step": 109185 + }, + { + "epoch": 2.667529865878387, + "grad_norm": 0.5269588232040405, + "learning_rate": 1.0575646512916422e-06, + "loss": 0.0004, + "num_input_tokens_seen": 73601576, + "step": 109190 + }, + { + "epoch": 2.6676520167102336, + "grad_norm": 0.2253422737121582, + "learning_rate": 1.0574795148919268e-06, + "loss": 0.0617, + "num_input_tokens_seen": 73605288, + "step": 109195 + }, + { + "epoch": 2.667774167542081, + "grad_norm": 0.2920159101486206, + "learning_rate": 1.0573943780742051e-06, + "loss": 0.0706, + "num_input_tokens_seen": 73608424, + "step": 109200 + }, + { + "epoch": 2.667896318373928, + "grad_norm": 122.05394744873047, + "learning_rate": 1.0573092408390963e-06, + "loss": 0.0431, + "num_input_tokens_seen": 73611752, + "step": 109205 + }, + { + "epoch": 2.6680184692057756, + "grad_norm": 0.03391716629266739, + "learning_rate": 1.0572241031872187e-06, + "loss": 0.0003, + "num_input_tokens_seen": 73614696, + "step": 109210 + }, + { + "epoch": 2.6681406200376223, + "grad_norm": 0.10054274648427963, + "learning_rate": 1.0571389651191924e-06, + "loss": 0.0003, + "num_input_tokens_seen": 73618216, + "step": 109215 + }, + { + "epoch": 2.6682627708694695, + "grad_norm": 0.19624534249305725, + "learning_rate": 1.057053826635636e-06, + "loss": 0.0004, + "num_input_tokens_seen": 73621288, + "step": 109220 + }, + { + "epoch": 2.6683849217013167, + "grad_norm": 0.07142213732004166, + "learning_rate": 1.0569686877371688e-06, + "loss": 0.0963, + "num_input_tokens_seen": 73625320, + "step": 109225 + }, + { + "epoch": 2.668507072533164, + "grad_norm": 0.0393596850335598, + "learning_rate": 1.0568835484244103e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73628904, + "step": 109230 + }, + { + "epoch": 2.668629223365011, + "grad_norm": 0.11763045936822891, + "learning_rate": 1.056798408697979e-06, + "loss": 0.0003, + "num_input_tokens_seen": 73632040, + "step": 109235 + }, + { + "epoch": 2.6687513741968583, + "grad_norm": 0.030843744054436684, + "learning_rate": 1.0567132685584944e-06, + "loss": 0.0456, + "num_input_tokens_seen": 73635432, + "step": 109240 + }, + { + "epoch": 2.6688735250287055, + "grad_norm": 0.0399046465754509, + "learning_rate": 1.0566281280065757e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73638760, + "step": 109245 + }, + { + "epoch": 2.6689956758605526, + "grad_norm": 13.421170234680176, + "learning_rate": 1.0565429870428424e-06, + "loss": 0.0653, + "num_input_tokens_seen": 73641832, + "step": 109250 + }, + { + "epoch": 2.6691178266924, + "grad_norm": 0.16441789269447327, + "learning_rate": 1.056457845667913e-06, + "loss": 0.104, + "num_input_tokens_seen": 73644968, + "step": 109255 + }, + { + "epoch": 2.669239977524247, + "grad_norm": 0.16625621914863586, + "learning_rate": 1.0563727038824069e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73648296, + "step": 109260 + }, + { + "epoch": 2.669362128356094, + "grad_norm": 0.07061154395341873, + "learning_rate": 1.0562875616869433e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73651496, + "step": 109265 + }, + { + "epoch": 2.6694842791879414, + "grad_norm": 15.163482666015625, + "learning_rate": 1.0562024190821412e-06, + "loss": 0.0999, + "num_input_tokens_seen": 73654568, + "step": 109270 + }, + { + "epoch": 2.6696064300197886, + "grad_norm": 0.20702727138996124, + "learning_rate": 1.0561172760686204e-06, + "loss": 0.029, + "num_input_tokens_seen": 73658280, + "step": 109275 + }, + { + "epoch": 2.6697285808516353, + "grad_norm": 0.005529319401830435, + "learning_rate": 1.0560321326469996e-06, + "loss": 0.0345, + "num_input_tokens_seen": 73661288, + "step": 109280 + }, + { + "epoch": 2.669850731683483, + "grad_norm": 0.03878706693649292, + "learning_rate": 1.055946988817898e-06, + "loss": 0.0469, + "num_input_tokens_seen": 73664488, + "step": 109285 + }, + { + "epoch": 2.6699728825153297, + "grad_norm": 0.04471408948302269, + "learning_rate": 1.055861844581935e-06, + "loss": 0.0789, + "num_input_tokens_seen": 73667880, + "step": 109290 + }, + { + "epoch": 2.6700950333471773, + "grad_norm": 0.07671474665403366, + "learning_rate": 1.0557766999397295e-06, + "loss": 0.0226, + "num_input_tokens_seen": 73671144, + "step": 109295 + }, + { + "epoch": 2.670217184179024, + "grad_norm": 0.049207672476768494, + "learning_rate": 1.0556915548919007e-06, + "loss": 0.0373, + "num_input_tokens_seen": 73674792, + "step": 109300 + }, + { + "epoch": 2.6703393350108713, + "grad_norm": 0.03824898228049278, + "learning_rate": 1.0556064094390682e-06, + "loss": 0.076, + "num_input_tokens_seen": 73677928, + "step": 109305 + }, + { + "epoch": 2.6704614858427185, + "grad_norm": 0.12842383980751038, + "learning_rate": 1.0555212635818507e-06, + "loss": 0.0333, + "num_input_tokens_seen": 73681256, + "step": 109310 + }, + { + "epoch": 2.6705836366745657, + "grad_norm": 0.0072562661953270435, + "learning_rate": 1.0554361173208679e-06, + "loss": 0.0005, + "num_input_tokens_seen": 73684776, + "step": 109315 + }, + { + "epoch": 2.670705787506413, + "grad_norm": 0.2971610724925995, + "learning_rate": 1.0553509706567384e-06, + "loss": 0.0482, + "num_input_tokens_seen": 73687656, + "step": 109320 + }, + { + "epoch": 2.67082793833826, + "grad_norm": 0.022412337362766266, + "learning_rate": 1.055265823590082e-06, + "loss": 0.0003, + "num_input_tokens_seen": 73690792, + "step": 109325 + }, + { + "epoch": 2.6709500891701072, + "grad_norm": 0.04063393548130989, + "learning_rate": 1.0551806761215175e-06, + "loss": 0.0578, + "num_input_tokens_seen": 73694632, + "step": 109330 + }, + { + "epoch": 2.6710722400019544, + "grad_norm": 210.82992553710938, + "learning_rate": 1.0550955282516644e-06, + "loss": 0.0572, + "num_input_tokens_seen": 73697832, + "step": 109335 + }, + { + "epoch": 2.6711943908338016, + "grad_norm": 26.77365493774414, + "learning_rate": 1.0550103799811419e-06, + "loss": 0.0728, + "num_input_tokens_seen": 73700904, + "step": 109340 + }, + { + "epoch": 2.671316541665649, + "grad_norm": 0.027570553123950958, + "learning_rate": 1.054925231310569e-06, + "loss": 0.0323, + "num_input_tokens_seen": 73704104, + "step": 109345 + }, + { + "epoch": 2.671438692497496, + "grad_norm": 0.16419246792793274, + "learning_rate": 1.054840082240565e-06, + "loss": 0.0353, + "num_input_tokens_seen": 73707432, + "step": 109350 + }, + { + "epoch": 2.671560843329343, + "grad_norm": 0.055125705897808075, + "learning_rate": 1.0547549327717491e-06, + "loss": 0.0406, + "num_input_tokens_seen": 73710632, + "step": 109355 + }, + { + "epoch": 2.6716829941611904, + "grad_norm": 1.7239696979522705, + "learning_rate": 1.054669782904741e-06, + "loss": 0.0004, + "num_input_tokens_seen": 73714216, + "step": 109360 + }, + { + "epoch": 2.6718051449930376, + "grad_norm": 0.043563805520534515, + "learning_rate": 1.054584632640159e-06, + "loss": 0.0004, + "num_input_tokens_seen": 73717288, + "step": 109365 + }, + { + "epoch": 2.6719272958248848, + "grad_norm": 25.463674545288086, + "learning_rate": 1.0544994819786233e-06, + "loss": 0.0627, + "num_input_tokens_seen": 73720680, + "step": 109370 + }, + { + "epoch": 2.6720494466567315, + "grad_norm": 0.024402741342782974, + "learning_rate": 1.0544143309207525e-06, + "loss": 0.0008, + "num_input_tokens_seen": 73723880, + "step": 109375 + }, + { + "epoch": 2.672171597488579, + "grad_norm": 0.025014013051986694, + "learning_rate": 1.054329179467166e-06, + "loss": 0.0004, + "num_input_tokens_seen": 73727016, + "step": 109380 + }, + { + "epoch": 2.672293748320426, + "grad_norm": 22.745786666870117, + "learning_rate": 1.0542440276184835e-06, + "loss": 0.1426, + "num_input_tokens_seen": 73730152, + "step": 109385 + }, + { + "epoch": 2.672415899152273, + "grad_norm": 17.65288734436035, + "learning_rate": 1.0541588753753235e-06, + "loss": 0.0894, + "num_input_tokens_seen": 73733544, + "step": 109390 + }, + { + "epoch": 2.6725380499841203, + "grad_norm": 0.011913538910448551, + "learning_rate": 1.0540737227383052e-06, + "loss": 0.0003, + "num_input_tokens_seen": 73737256, + "step": 109395 + }, + { + "epoch": 2.6726602008159674, + "grad_norm": 0.08027175068855286, + "learning_rate": 1.0539885697080487e-06, + "loss": 0.0003, + "num_input_tokens_seen": 73740584, + "step": 109400 + }, + { + "epoch": 2.6727823516478146, + "grad_norm": 0.0017405960243195295, + "learning_rate": 1.053903416285173e-06, + "loss": 0.0398, + "num_input_tokens_seen": 73744040, + "step": 109405 + }, + { + "epoch": 2.672904502479662, + "grad_norm": 0.09514592587947845, + "learning_rate": 1.053818262470297e-06, + "loss": 0.0559, + "num_input_tokens_seen": 73747176, + "step": 109410 + }, + { + "epoch": 2.673026653311509, + "grad_norm": 0.014695454388856888, + "learning_rate": 1.0537331082640399e-06, + "loss": 0.043, + "num_input_tokens_seen": 73750568, + "step": 109415 + }, + { + "epoch": 2.673148804143356, + "grad_norm": 13.021860122680664, + "learning_rate": 1.0536479536670215e-06, + "loss": 0.0016, + "num_input_tokens_seen": 73753768, + "step": 109420 + }, + { + "epoch": 2.6732709549752034, + "grad_norm": 0.020546959713101387, + "learning_rate": 1.0535627986798603e-06, + "loss": 0.0005, + "num_input_tokens_seen": 73756776, + "step": 109425 + }, + { + "epoch": 2.6733931058070506, + "grad_norm": 0.09020008146762848, + "learning_rate": 1.0534776433031764e-06, + "loss": 0.0491, + "num_input_tokens_seen": 73760232, + "step": 109430 + }, + { + "epoch": 2.6735152566388978, + "grad_norm": 0.008538307622075081, + "learning_rate": 1.0533924875375886e-06, + "loss": 0.0004, + "num_input_tokens_seen": 73763752, + "step": 109435 + }, + { + "epoch": 2.673637407470745, + "grad_norm": 0.02596125937998295, + "learning_rate": 1.0533073313837163e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73766952, + "step": 109440 + }, + { + "epoch": 2.673759558302592, + "grad_norm": 0.01916971616446972, + "learning_rate": 1.0532221748421785e-06, + "loss": 0.0642, + "num_input_tokens_seen": 73770216, + "step": 109445 + }, + { + "epoch": 2.6738817091344393, + "grad_norm": 22.06159210205078, + "learning_rate": 1.053137017913595e-06, + "loss": 0.0589, + "num_input_tokens_seen": 73773608, + "step": 109450 + }, + { + "epoch": 2.6740038599662865, + "grad_norm": 0.0795968696475029, + "learning_rate": 1.0530518605985848e-06, + "loss": 0.0693, + "num_input_tokens_seen": 73777000, + "step": 109455 + }, + { + "epoch": 2.6741260107981333, + "grad_norm": 0.0034378645941615105, + "learning_rate": 1.052966702897767e-06, + "loss": 0.0567, + "num_input_tokens_seen": 73780328, + "step": 109460 + }, + { + "epoch": 2.674248161629981, + "grad_norm": 53.51797103881836, + "learning_rate": 1.0528815448117613e-06, + "loss": 0.0032, + "num_input_tokens_seen": 73783336, + "step": 109465 + }, + { + "epoch": 2.6743703124618277, + "grad_norm": 0.053672004491090775, + "learning_rate": 1.052796386341187e-06, + "loss": 0.0556, + "num_input_tokens_seen": 73786408, + "step": 109470 + }, + { + "epoch": 2.6744924632936753, + "grad_norm": 0.0037759304977953434, + "learning_rate": 1.0527112274866628e-06, + "loss": 0.0095, + "num_input_tokens_seen": 73789416, + "step": 109475 + }, + { + "epoch": 2.674614614125522, + "grad_norm": 0.3404017388820648, + "learning_rate": 1.0526260682488085e-06, + "loss": 0.0003, + "num_input_tokens_seen": 73792296, + "step": 109480 + }, + { + "epoch": 2.6747367649573692, + "grad_norm": 0.01111539825797081, + "learning_rate": 1.0525409086282433e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73795880, + "step": 109485 + }, + { + "epoch": 2.6748589157892164, + "grad_norm": 0.023893926292657852, + "learning_rate": 1.0524557486255862e-06, + "loss": 0.0256, + "num_input_tokens_seen": 73799336, + "step": 109490 + }, + { + "epoch": 2.6749810666210636, + "grad_norm": 0.002047064481303096, + "learning_rate": 1.052370588241457e-06, + "loss": 0.0001, + "num_input_tokens_seen": 73802856, + "step": 109495 + }, + { + "epoch": 2.675103217452911, + "grad_norm": 0.016036951914429665, + "learning_rate": 1.052285427476475e-06, + "loss": 0.0006, + "num_input_tokens_seen": 73806312, + "step": 109500 + }, + { + "epoch": 2.675225368284758, + "grad_norm": 0.0667245090007782, + "learning_rate": 1.052200266331259e-06, + "loss": 0.0001, + "num_input_tokens_seen": 73809576, + "step": 109505 + }, + { + "epoch": 2.675347519116605, + "grad_norm": 0.2333661913871765, + "learning_rate": 1.0521151048064287e-06, + "loss": 0.0672, + "num_input_tokens_seen": 73813224, + "step": 109510 + }, + { + "epoch": 2.6754696699484524, + "grad_norm": 0.006419423967599869, + "learning_rate": 1.0520299429026038e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73816872, + "step": 109515 + }, + { + "epoch": 2.6755918207802996, + "grad_norm": 0.026847392320632935, + "learning_rate": 1.0519447806204026e-06, + "loss": 0.0004, + "num_input_tokens_seen": 73820136, + "step": 109520 + }, + { + "epoch": 2.6757139716121467, + "grad_norm": 0.24557249248027802, + "learning_rate": 1.0518596179604451e-06, + "loss": 0.0003, + "num_input_tokens_seen": 73823528, + "step": 109525 + }, + { + "epoch": 2.675836122443994, + "grad_norm": 0.013657464645802975, + "learning_rate": 1.0517744549233505e-06, + "loss": 0.075, + "num_input_tokens_seen": 73826984, + "step": 109530 + }, + { + "epoch": 2.675958273275841, + "grad_norm": 0.0005760699859820306, + "learning_rate": 1.0516892915097381e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73830248, + "step": 109535 + }, + { + "epoch": 2.6760804241076883, + "grad_norm": 0.015993546694517136, + "learning_rate": 1.0516041277202275e-06, + "loss": 0.1249, + "num_input_tokens_seen": 73834408, + "step": 109540 + }, + { + "epoch": 2.676202574939535, + "grad_norm": 0.008551014587283134, + "learning_rate": 1.0515189635554375e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73837736, + "step": 109545 + }, + { + "epoch": 2.6763247257713827, + "grad_norm": 0.11013732105493546, + "learning_rate": 1.0514337990159879e-06, + "loss": 0.1554, + "num_input_tokens_seen": 73841512, + "step": 109550 + }, + { + "epoch": 2.6764468766032294, + "grad_norm": 0.0076579442247748375, + "learning_rate": 1.0513486341024978e-06, + "loss": 0.0001, + "num_input_tokens_seen": 73844584, + "step": 109555 + }, + { + "epoch": 2.676569027435077, + "grad_norm": 0.0050875660963356495, + "learning_rate": 1.051263468815587e-06, + "loss": 0.0004, + "num_input_tokens_seen": 73847784, + "step": 109560 + }, + { + "epoch": 2.676691178266924, + "grad_norm": 0.03001215308904648, + "learning_rate": 1.0511783031558741e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73851112, + "step": 109565 + }, + { + "epoch": 2.676813329098771, + "grad_norm": 0.007266190368682146, + "learning_rate": 1.051093137123979e-06, + "loss": 0.0271, + "num_input_tokens_seen": 73854248, + "step": 109570 + }, + { + "epoch": 2.676935479930618, + "grad_norm": 39.2228889465332, + "learning_rate": 1.0510079707205206e-06, + "loss": 0.049, + "num_input_tokens_seen": 73857320, + "step": 109575 + }, + { + "epoch": 2.6770576307624654, + "grad_norm": 0.05401134118437767, + "learning_rate": 1.0509228039461184e-06, + "loss": 0.0453, + "num_input_tokens_seen": 73860328, + "step": 109580 + }, + { + "epoch": 2.6771797815943126, + "grad_norm": 0.10138184577226639, + "learning_rate": 1.0508376368013922e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73863592, + "step": 109585 + }, + { + "epoch": 2.6773019324261598, + "grad_norm": 0.04651380330324173, + "learning_rate": 1.050752469286961e-06, + "loss": 0.0542, + "num_input_tokens_seen": 73867496, + "step": 109590 + }, + { + "epoch": 2.677424083258007, + "grad_norm": 0.005964824929833412, + "learning_rate": 1.0506673014034441e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73871016, + "step": 109595 + }, + { + "epoch": 2.677546234089854, + "grad_norm": 12.295047760009766, + "learning_rate": 1.050582133151461e-06, + "loss": 0.1749, + "num_input_tokens_seen": 73874792, + "step": 109600 + }, + { + "epoch": 2.6776683849217013, + "grad_norm": 0.09397143125534058, + "learning_rate": 1.0504969645316313e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73878568, + "step": 109605 + }, + { + "epoch": 2.6777905357535485, + "grad_norm": 0.007933935150504112, + "learning_rate": 1.0504117955445736e-06, + "loss": 0.0592, + "num_input_tokens_seen": 73881576, + "step": 109610 + }, + { + "epoch": 2.6779126865853957, + "grad_norm": 0.2045755684375763, + "learning_rate": 1.050326626190908e-06, + "loss": 0.0768, + "num_input_tokens_seen": 73885032, + "step": 109615 + }, + { + "epoch": 2.678034837417243, + "grad_norm": 0.012918557040393353, + "learning_rate": 1.050241456471254e-06, + "loss": 0.0703, + "num_input_tokens_seen": 73888232, + "step": 109620 + }, + { + "epoch": 2.67815698824909, + "grad_norm": 0.01960483193397522, + "learning_rate": 1.05015628638623e-06, + "loss": 0.0003, + "num_input_tokens_seen": 73891304, + "step": 109625 + }, + { + "epoch": 2.6782791390809373, + "grad_norm": 0.026721443980932236, + "learning_rate": 1.0500711159364564e-06, + "loss": 0.0004, + "num_input_tokens_seen": 73894376, + "step": 109630 + }, + { + "epoch": 2.6784012899127845, + "grad_norm": 0.03172799199819565, + "learning_rate": 1.049985945122552e-06, + "loss": 0.0007, + "num_input_tokens_seen": 73897832, + "step": 109635 + }, + { + "epoch": 2.6785234407446312, + "grad_norm": 508.57391357421875, + "learning_rate": 1.0499007739451364e-06, + "loss": 0.0138, + "num_input_tokens_seen": 73901352, + "step": 109640 + }, + { + "epoch": 2.678645591576479, + "grad_norm": 27.824207305908203, + "learning_rate": 1.0498156024048285e-06, + "loss": 0.0374, + "num_input_tokens_seen": 73904680, + "step": 109645 + }, + { + "epoch": 2.6787677424083256, + "grad_norm": 0.019097011536359787, + "learning_rate": 1.0497304305022488e-06, + "loss": 0.0002, + "num_input_tokens_seen": 73908264, + "step": 109650 + }, + { + "epoch": 2.6788898932401732, + "grad_norm": 239.4774932861328, + "learning_rate": 1.0496452582380158e-06, + "loss": 0.0327, + "num_input_tokens_seen": 73911592, + "step": 109655 + }, + { + "epoch": 2.67901204407202, + "grad_norm": 0.0204803254455328, + "learning_rate": 1.0495600856127492e-06, + "loss": 0.0616, + "num_input_tokens_seen": 73914984, + "step": 109660 + }, + { + "epoch": 2.679134194903867, + "grad_norm": 0.013475023210048676, + "learning_rate": 1.0494749126270681e-06, + "loss": 0.1155, + "num_input_tokens_seen": 73917992, + "step": 109665 + }, + { + "epoch": 2.6792563457357144, + "grad_norm": 0.08012855798006058, + "learning_rate": 1.0493897392815927e-06, + "loss": 0.0319, + "num_input_tokens_seen": 73921128, + "step": 109670 + }, + { + "epoch": 2.6793784965675616, + "grad_norm": 0.012000217102468014, + "learning_rate": 1.049304565576941e-06, + "loss": 0.0491, + "num_input_tokens_seen": 73924968, + "step": 109675 + }, + { + "epoch": 2.6795006473994087, + "grad_norm": 0.018842976540327072, + "learning_rate": 1.0492193915137337e-06, + "loss": 0.0897, + "num_input_tokens_seen": 73928168, + "step": 109680 + }, + { + "epoch": 2.679622798231256, + "grad_norm": 15.081670761108398, + "learning_rate": 1.0491342170925898e-06, + "loss": 0.0729, + "num_input_tokens_seen": 73931560, + "step": 109685 + }, + { + "epoch": 2.679744949063103, + "grad_norm": 0.0979156643152237, + "learning_rate": 1.0490490423141286e-06, + "loss": 0.0372, + "num_input_tokens_seen": 73934696, + "step": 109690 + }, + { + "epoch": 2.6798670998949503, + "grad_norm": 0.03201375901699066, + "learning_rate": 1.0489638671789695e-06, + "loss": 0.0507, + "num_input_tokens_seen": 73938408, + "step": 109695 + }, + { + "epoch": 2.6799892507267975, + "grad_norm": 0.06937959790229797, + "learning_rate": 1.0488786916877322e-06, + "loss": 0.0004, + "num_input_tokens_seen": 73941544, + "step": 109700 + }, + { + "epoch": 2.6801114015586447, + "grad_norm": 0.010883811861276627, + "learning_rate": 1.0487935158410354e-06, + "loss": 0.045, + "num_input_tokens_seen": 73945448, + "step": 109705 + }, + { + "epoch": 2.680233552390492, + "grad_norm": 0.01242207270115614, + "learning_rate": 1.0487083396394994e-06, + "loss": 0.0579, + "num_input_tokens_seen": 73948712, + "step": 109710 + }, + { + "epoch": 2.680355703222339, + "grad_norm": 0.008689627051353455, + "learning_rate": 1.0486231630837435e-06, + "loss": 0.027, + "num_input_tokens_seen": 73952360, + "step": 109715 + }, + { + "epoch": 2.6804778540541863, + "grad_norm": 0.15042613446712494, + "learning_rate": 1.0485379861743867e-06, + "loss": 0.0385, + "num_input_tokens_seen": 73955752, + "step": 109720 + }, + { + "epoch": 2.680600004886033, + "grad_norm": 0.02071407251060009, + "learning_rate": 1.0484528089120484e-06, + "loss": 0.0506, + "num_input_tokens_seen": 73959144, + "step": 109725 + }, + { + "epoch": 2.6807221557178806, + "grad_norm": 0.9460954666137695, + "learning_rate": 1.0483676312973484e-06, + "loss": 0.0005, + "num_input_tokens_seen": 73962344, + "step": 109730 + }, + { + "epoch": 2.6808443065497274, + "grad_norm": 0.18505899608135223, + "learning_rate": 1.0482824533309057e-06, + "loss": 0.0003, + "num_input_tokens_seen": 73965544, + "step": 109735 + }, + { + "epoch": 2.680966457381575, + "grad_norm": 0.01839536800980568, + "learning_rate": 1.04819727501334e-06, + "loss": 0.0005, + "num_input_tokens_seen": 73968936, + "step": 109740 + }, + { + "epoch": 2.6810886082134218, + "grad_norm": 0.3862949013710022, + "learning_rate": 1.0481120963452712e-06, + "loss": 0.0503, + "num_input_tokens_seen": 73972200, + "step": 109745 + }, + { + "epoch": 2.681210759045269, + "grad_norm": 0.015912873670458794, + "learning_rate": 1.048026917327318e-06, + "loss": 0.0004, + "num_input_tokens_seen": 73975080, + "step": 109750 + }, + { + "epoch": 2.681332909877116, + "grad_norm": 23.779499053955078, + "learning_rate": 1.0479417379601003e-06, + "loss": 0.0591, + "num_input_tokens_seen": 73978152, + "step": 109755 + }, + { + "epoch": 2.6814550607089633, + "grad_norm": 0.2585065960884094, + "learning_rate": 1.0478565582442374e-06, + "loss": 0.0969, + "num_input_tokens_seen": 73981416, + "step": 109760 + }, + { + "epoch": 2.6815772115408105, + "grad_norm": 86.09591674804688, + "learning_rate": 1.0477713781803487e-06, + "loss": 0.0928, + "num_input_tokens_seen": 73985128, + "step": 109765 + }, + { + "epoch": 2.6816993623726577, + "grad_norm": 0.012614854611456394, + "learning_rate": 1.0476861977690533e-06, + "loss": 0.0412, + "num_input_tokens_seen": 73988328, + "step": 109770 + }, + { + "epoch": 2.681821513204505, + "grad_norm": 0.11613646149635315, + "learning_rate": 1.0476010170109715e-06, + "loss": 0.0452, + "num_input_tokens_seen": 73991592, + "step": 109775 + }, + { + "epoch": 2.681943664036352, + "grad_norm": 1.0370471477508545, + "learning_rate": 1.0475158359067222e-06, + "loss": 0.0005, + "num_input_tokens_seen": 73994920, + "step": 109780 + }, + { + "epoch": 2.6820658148681993, + "grad_norm": 0.13517054915428162, + "learning_rate": 1.047430654456925e-06, + "loss": 0.1469, + "num_input_tokens_seen": 73997992, + "step": 109785 + }, + { + "epoch": 2.6821879657000465, + "grad_norm": 123.24833679199219, + "learning_rate": 1.0473454726621992e-06, + "loss": 0.1009, + "num_input_tokens_seen": 74001192, + "step": 109790 + }, + { + "epoch": 2.6823101165318937, + "grad_norm": 43.56346130371094, + "learning_rate": 1.0472602905231647e-06, + "loss": 0.001, + "num_input_tokens_seen": 74004584, + "step": 109795 + }, + { + "epoch": 2.682432267363741, + "grad_norm": 0.03455251827836037, + "learning_rate": 1.0471751080404401e-06, + "loss": 0.0008, + "num_input_tokens_seen": 74008040, + "step": 109800 + }, + { + "epoch": 2.682554418195588, + "grad_norm": 0.12241503596305847, + "learning_rate": 1.0470899252146456e-06, + "loss": 0.0005, + "num_input_tokens_seen": 74010984, + "step": 109805 + }, + { + "epoch": 2.6826765690274352, + "grad_norm": 0.010911804623901844, + "learning_rate": 1.0470047420464008e-06, + "loss": 0.0008, + "num_input_tokens_seen": 74014184, + "step": 109810 + }, + { + "epoch": 2.6827987198592824, + "grad_norm": 1.412819504737854, + "learning_rate": 1.0469195585363246e-06, + "loss": 0.0006, + "num_input_tokens_seen": 74017448, + "step": 109815 + }, + { + "epoch": 2.682920870691129, + "grad_norm": 0.12392840534448624, + "learning_rate": 1.0468343746850369e-06, + "loss": 0.1279, + "num_input_tokens_seen": 74020904, + "step": 109820 + }, + { + "epoch": 2.683043021522977, + "grad_norm": 414.29779052734375, + "learning_rate": 1.0467491904931574e-06, + "loss": 0.0209, + "num_input_tokens_seen": 74024424, + "step": 109825 + }, + { + "epoch": 2.6831651723548235, + "grad_norm": 0.019323252141475677, + "learning_rate": 1.0466640059613045e-06, + "loss": 0.0351, + "num_input_tokens_seen": 74027752, + "step": 109830 + }, + { + "epoch": 2.6832873231866707, + "grad_norm": 0.06316502392292023, + "learning_rate": 1.0465788210900987e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74031016, + "step": 109835 + }, + { + "epoch": 2.683409474018518, + "grad_norm": 0.035119447857141495, + "learning_rate": 1.046493635880159e-06, + "loss": 0.059, + "num_input_tokens_seen": 74034600, + "step": 109840 + }, + { + "epoch": 2.683531624850365, + "grad_norm": 0.0017133563524112105, + "learning_rate": 1.0464084503321053e-06, + "loss": 0.0526, + "num_input_tokens_seen": 74037672, + "step": 109845 + }, + { + "epoch": 2.6836537756822123, + "grad_norm": 0.024416368454694748, + "learning_rate": 1.046323264446557e-06, + "loss": 0.0802, + "num_input_tokens_seen": 74040808, + "step": 109850 + }, + { + "epoch": 2.6837759265140595, + "grad_norm": 0.08081745356321335, + "learning_rate": 1.046238078224133e-06, + "loss": 0.0002, + "num_input_tokens_seen": 74043944, + "step": 109855 + }, + { + "epoch": 2.6838980773459067, + "grad_norm": 0.15387476980686188, + "learning_rate": 1.0461528916654536e-06, + "loss": 0.0271, + "num_input_tokens_seen": 74047464, + "step": 109860 + }, + { + "epoch": 2.684020228177754, + "grad_norm": 0.1924990713596344, + "learning_rate": 1.046067704771138e-06, + "loss": 0.0003, + "num_input_tokens_seen": 74050920, + "step": 109865 + }, + { + "epoch": 2.684142379009601, + "grad_norm": 0.050476282835006714, + "learning_rate": 1.0459825175418057e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74054312, + "step": 109870 + }, + { + "epoch": 2.6842645298414483, + "grad_norm": 0.08359876275062561, + "learning_rate": 1.045897329978076e-06, + "loss": 0.0002, + "num_input_tokens_seen": 74057640, + "step": 109875 + }, + { + "epoch": 2.6843866806732954, + "grad_norm": 0.013668022118508816, + "learning_rate": 1.0458121420805685e-06, + "loss": 0.0004, + "num_input_tokens_seen": 74060648, + "step": 109880 + }, + { + "epoch": 2.6845088315051426, + "grad_norm": 0.011722809635102749, + "learning_rate": 1.045726953849903e-06, + "loss": 0.0581, + "num_input_tokens_seen": 74064232, + "step": 109885 + }, + { + "epoch": 2.68463098233699, + "grad_norm": 0.153499573469162, + "learning_rate": 1.0456417652866986e-06, + "loss": 0.0007, + "num_input_tokens_seen": 74067368, + "step": 109890 + }, + { + "epoch": 2.684753133168837, + "grad_norm": 0.11200530081987381, + "learning_rate": 1.045556576391575e-06, + "loss": 0.0438, + "num_input_tokens_seen": 74071016, + "step": 109895 + }, + { + "epoch": 2.684875284000684, + "grad_norm": 0.01185649260878563, + "learning_rate": 1.0454713871651518e-06, + "loss": 0.0004, + "num_input_tokens_seen": 74074408, + "step": 109900 + }, + { + "epoch": 2.684997434832531, + "grad_norm": 14.90159797668457, + "learning_rate": 1.0453861976080485e-06, + "loss": 0.0523, + "num_input_tokens_seen": 74078184, + "step": 109905 + }, + { + "epoch": 2.6851195856643786, + "grad_norm": 0.015263424254953861, + "learning_rate": 1.0453010077208845e-06, + "loss": 0.0002, + "num_input_tokens_seen": 74081576, + "step": 109910 + }, + { + "epoch": 2.6852417364962253, + "grad_norm": 0.10612472891807556, + "learning_rate": 1.0452158175042794e-06, + "loss": 0.0134, + "num_input_tokens_seen": 74084904, + "step": 109915 + }, + { + "epoch": 2.685363887328073, + "grad_norm": 0.0759076327085495, + "learning_rate": 1.0451306269588526e-06, + "loss": 0.0866, + "num_input_tokens_seen": 74088360, + "step": 109920 + }, + { + "epoch": 2.6854860381599197, + "grad_norm": 0.004117521457374096, + "learning_rate": 1.0450454360852238e-06, + "loss": 0.0005, + "num_input_tokens_seen": 74091432, + "step": 109925 + }, + { + "epoch": 2.685608188991767, + "grad_norm": 0.04352575168013573, + "learning_rate": 1.0449602448840127e-06, + "loss": 0.0003, + "num_input_tokens_seen": 74094568, + "step": 109930 + }, + { + "epoch": 2.685730339823614, + "grad_norm": 42.42630386352539, + "learning_rate": 1.0448750533558383e-06, + "loss": 0.0733, + "num_input_tokens_seen": 74097896, + "step": 109935 + }, + { + "epoch": 2.6858524906554613, + "grad_norm": 157.4754180908203, + "learning_rate": 1.0447898615013206e-06, + "loss": 0.0455, + "num_input_tokens_seen": 74101032, + "step": 109940 + }, + { + "epoch": 2.6859746414873085, + "grad_norm": 0.034718506038188934, + "learning_rate": 1.044704669321079e-06, + "loss": 0.0701, + "num_input_tokens_seen": 74104552, + "step": 109945 + }, + { + "epoch": 2.6860967923191557, + "grad_norm": 0.18217360973358154, + "learning_rate": 1.044619476815733e-06, + "loss": 0.0004, + "num_input_tokens_seen": 74108328, + "step": 109950 + }, + { + "epoch": 2.686218943151003, + "grad_norm": 36.641632080078125, + "learning_rate": 1.044534283985902e-06, + "loss": 0.074, + "num_input_tokens_seen": 74112168, + "step": 109955 + }, + { + "epoch": 2.68634109398285, + "grad_norm": 0.04653691127896309, + "learning_rate": 1.044449090832206e-06, + "loss": 0.0443, + "num_input_tokens_seen": 74115624, + "step": 109960 + }, + { + "epoch": 2.6864632448146972, + "grad_norm": 0.010276403278112411, + "learning_rate": 1.044363897355264e-06, + "loss": 0.0447, + "num_input_tokens_seen": 74118696, + "step": 109965 + }, + { + "epoch": 2.6865853956465444, + "grad_norm": 22.342988967895508, + "learning_rate": 1.044278703555696e-06, + "loss": 0.1297, + "num_input_tokens_seen": 74122088, + "step": 109970 + }, + { + "epoch": 2.6867075464783916, + "grad_norm": 0.28928831219673157, + "learning_rate": 1.044193509434121e-06, + "loss": 0.0467, + "num_input_tokens_seen": 74125032, + "step": 109975 + }, + { + "epoch": 2.686829697310239, + "grad_norm": 16.242509841918945, + "learning_rate": 1.0441083149911596e-06, + "loss": 0.0384, + "num_input_tokens_seen": 74128360, + "step": 109980 + }, + { + "epoch": 2.686951848142086, + "grad_norm": 0.03907003253698349, + "learning_rate": 1.04402312022743e-06, + "loss": 0.0019, + "num_input_tokens_seen": 74132008, + "step": 109985 + }, + { + "epoch": 2.687073998973933, + "grad_norm": 0.10918454825878143, + "learning_rate": 1.0439379251435527e-06, + "loss": 0.0002, + "num_input_tokens_seen": 74135208, + "step": 109990 + }, + { + "epoch": 2.6871961498057804, + "grad_norm": 0.6462915539741516, + "learning_rate": 1.043852729740147e-06, + "loss": 0.0008, + "num_input_tokens_seen": 74138600, + "step": 109995 + }, + { + "epoch": 2.687318300637627, + "grad_norm": 0.1784975826740265, + "learning_rate": 1.0437675340178322e-06, + "loss": 0.0655, + "num_input_tokens_seen": 74141992, + "step": 110000 + }, + { + "epoch": 2.6874404514694747, + "grad_norm": 0.0959717407822609, + "learning_rate": 1.0436823379772283e-06, + "loss": 0.0427, + "num_input_tokens_seen": 74145128, + "step": 110005 + }, + { + "epoch": 2.6875626023013215, + "grad_norm": 0.012468988075852394, + "learning_rate": 1.0435971416189549e-06, + "loss": 0.0003, + "num_input_tokens_seen": 74148776, + "step": 110010 + }, + { + "epoch": 2.6876847531331687, + "grad_norm": 0.022382086142897606, + "learning_rate": 1.0435119449436309e-06, + "loss": 0.0481, + "num_input_tokens_seen": 74151912, + "step": 110015 + }, + { + "epoch": 2.687806903965016, + "grad_norm": 0.04924376308917999, + "learning_rate": 1.0434267479518768e-06, + "loss": 0.0373, + "num_input_tokens_seen": 74154984, + "step": 110020 + }, + { + "epoch": 2.687929054796863, + "grad_norm": 18.4334659576416, + "learning_rate": 1.0433415506443117e-06, + "loss": 0.193, + "num_input_tokens_seen": 74157928, + "step": 110025 + }, + { + "epoch": 2.6880512056287102, + "grad_norm": 0.030442774295806885, + "learning_rate": 1.043256353021555e-06, + "loss": 0.0006, + "num_input_tokens_seen": 74161512, + "step": 110030 + }, + { + "epoch": 2.6881733564605574, + "grad_norm": 0.06618069857358932, + "learning_rate": 1.0431711550842265e-06, + "loss": 0.0002, + "num_input_tokens_seen": 74164968, + "step": 110035 + }, + { + "epoch": 2.6882955072924046, + "grad_norm": 0.46438732743263245, + "learning_rate": 1.0430859568329458e-06, + "loss": 0.0004, + "num_input_tokens_seen": 74168232, + "step": 110040 + }, + { + "epoch": 2.688417658124252, + "grad_norm": 1.1904546022415161, + "learning_rate": 1.0430007582683322e-06, + "loss": 0.0494, + "num_input_tokens_seen": 74171624, + "step": 110045 + }, + { + "epoch": 2.688539808956099, + "grad_norm": 478.31195068359375, + "learning_rate": 1.0429155593910055e-06, + "loss": 0.0137, + "num_input_tokens_seen": 74175272, + "step": 110050 + }, + { + "epoch": 2.688661959787946, + "grad_norm": 0.02546611614525318, + "learning_rate": 1.0428303602015856e-06, + "loss": 0.0411, + "num_input_tokens_seen": 74178792, + "step": 110055 + }, + { + "epoch": 2.6887841106197934, + "grad_norm": 10.07689094543457, + "learning_rate": 1.0427451607006916e-06, + "loss": 0.1165, + "num_input_tokens_seen": 74182248, + "step": 110060 + }, + { + "epoch": 2.6889062614516406, + "grad_norm": 0.045556433498859406, + "learning_rate": 1.0426599608889435e-06, + "loss": 0.0005, + "num_input_tokens_seen": 74185896, + "step": 110065 + }, + { + "epoch": 2.6890284122834878, + "grad_norm": 0.21916463971138, + "learning_rate": 1.0425747607669607e-06, + "loss": 0.0047, + "num_input_tokens_seen": 74189096, + "step": 110070 + }, + { + "epoch": 2.689150563115335, + "grad_norm": 81.54149627685547, + "learning_rate": 1.0424895603353626e-06, + "loss": 0.0301, + "num_input_tokens_seen": 74192168, + "step": 110075 + }, + { + "epoch": 2.689272713947182, + "grad_norm": 0.053823795169591904, + "learning_rate": 1.0424043595947692e-06, + "loss": 0.0455, + "num_input_tokens_seen": 74195368, + "step": 110080 + }, + { + "epoch": 2.689394864779029, + "grad_norm": 0.05746879801154137, + "learning_rate": 1.0423191585457997e-06, + "loss": 0.0002, + "num_input_tokens_seen": 74198568, + "step": 110085 + }, + { + "epoch": 2.6895170156108765, + "grad_norm": 0.039911337196826935, + "learning_rate": 1.0422339571890738e-06, + "loss": 0.0326, + "num_input_tokens_seen": 74202024, + "step": 110090 + }, + { + "epoch": 2.6896391664427233, + "grad_norm": 0.011159949004650116, + "learning_rate": 1.0421487555252115e-06, + "loss": 0.0468, + "num_input_tokens_seen": 74205736, + "step": 110095 + }, + { + "epoch": 2.689761317274571, + "grad_norm": 0.0034481934271752834, + "learning_rate": 1.042063553554832e-06, + "loss": 0.1252, + "num_input_tokens_seen": 74208936, + "step": 110100 + }, + { + "epoch": 2.6898834681064177, + "grad_norm": 0.03940635919570923, + "learning_rate": 1.041978351278555e-06, + "loss": 0.0002, + "num_input_tokens_seen": 74212200, + "step": 110105 + }, + { + "epoch": 2.690005618938265, + "grad_norm": 0.3935776352882385, + "learning_rate": 1.0418931486969998e-06, + "loss": 0.0007, + "num_input_tokens_seen": 74215976, + "step": 110110 + }, + { + "epoch": 2.690127769770112, + "grad_norm": 0.1421252191066742, + "learning_rate": 1.0418079458107868e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74219176, + "step": 110115 + }, + { + "epoch": 2.690249920601959, + "grad_norm": 1.2814970016479492, + "learning_rate": 1.041722742620535e-06, + "loss": 0.0011, + "num_input_tokens_seen": 74222696, + "step": 110120 + }, + { + "epoch": 2.6903720714338064, + "grad_norm": 0.04111748933792114, + "learning_rate": 1.0416375391268642e-06, + "loss": 0.17, + "num_input_tokens_seen": 74225960, + "step": 110125 + }, + { + "epoch": 2.6904942222656536, + "grad_norm": 385.12689208984375, + "learning_rate": 1.0415523353303942e-06, + "loss": 0.0067, + "num_input_tokens_seen": 74229992, + "step": 110130 + }, + { + "epoch": 2.690616373097501, + "grad_norm": 0.01891736313700676, + "learning_rate": 1.0414671312317444e-06, + "loss": 0.0004, + "num_input_tokens_seen": 74233576, + "step": 110135 + }, + { + "epoch": 2.690738523929348, + "grad_norm": 9.382598876953125, + "learning_rate": 1.0413819268315343e-06, + "loss": 0.1042, + "num_input_tokens_seen": 74237224, + "step": 110140 + }, + { + "epoch": 2.690860674761195, + "grad_norm": 0.06907006353139877, + "learning_rate": 1.0412967221303836e-06, + "loss": 0.0367, + "num_input_tokens_seen": 74240808, + "step": 110145 + }, + { + "epoch": 2.6909828255930424, + "grad_norm": 0.052153579890728, + "learning_rate": 1.041211517128912e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74244392, + "step": 110150 + }, + { + "epoch": 2.6911049764248895, + "grad_norm": 30.580373764038086, + "learning_rate": 1.0411263118277396e-06, + "loss": 0.1193, + "num_input_tokens_seen": 74247464, + "step": 110155 + }, + { + "epoch": 2.6912271272567367, + "grad_norm": 0.918717622756958, + "learning_rate": 1.0410411062274856e-06, + "loss": 0.0004, + "num_input_tokens_seen": 74250664, + "step": 110160 + }, + { + "epoch": 2.691349278088584, + "grad_norm": 0.008485613390803337, + "learning_rate": 1.0409559003287692e-06, + "loss": 0.066, + "num_input_tokens_seen": 74254376, + "step": 110165 + }, + { + "epoch": 2.6914714289204307, + "grad_norm": 0.03416123986244202, + "learning_rate": 1.0408706941322105e-06, + "loss": 0.0647, + "num_input_tokens_seen": 74257832, + "step": 110170 + }, + { + "epoch": 2.6915935797522783, + "grad_norm": 0.05413348227739334, + "learning_rate": 1.0407854876384293e-06, + "loss": 0.0744, + "num_input_tokens_seen": 74261096, + "step": 110175 + }, + { + "epoch": 2.691715730584125, + "grad_norm": 0.02704242244362831, + "learning_rate": 1.0407002808480454e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74264680, + "step": 110180 + }, + { + "epoch": 2.6918378814159727, + "grad_norm": 0.0406159944832325, + "learning_rate": 1.0406150737616776e-06, + "loss": 0.0811, + "num_input_tokens_seen": 74268264, + "step": 110185 + }, + { + "epoch": 2.6919600322478194, + "grad_norm": 0.09724722802639008, + "learning_rate": 1.0405298663799463e-06, + "loss": 0.0358, + "num_input_tokens_seen": 74271464, + "step": 110190 + }, + { + "epoch": 2.6920821830796666, + "grad_norm": 8.796319961547852, + "learning_rate": 1.0404446587034708e-06, + "loss": 0.0011, + "num_input_tokens_seen": 74274536, + "step": 110195 + }, + { + "epoch": 2.692204333911514, + "grad_norm": 0.05820569023489952, + "learning_rate": 1.040359450732871e-06, + "loss": 0.0002, + "num_input_tokens_seen": 74277928, + "step": 110200 + }, + { + "epoch": 2.692326484743361, + "grad_norm": 0.004109354689717293, + "learning_rate": 1.040274242468766e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74281256, + "step": 110205 + }, + { + "epoch": 2.692448635575208, + "grad_norm": 0.1228046640753746, + "learning_rate": 1.0401890339117763e-06, + "loss": 0.0004, + "num_input_tokens_seen": 74284968, + "step": 110210 + }, + { + "epoch": 2.6925707864070554, + "grad_norm": 0.012766096740961075, + "learning_rate": 1.0401038250625212e-06, + "loss": 0.0004, + "num_input_tokens_seen": 74288360, + "step": 110215 + }, + { + "epoch": 2.6926929372389026, + "grad_norm": 0.03938378766179085, + "learning_rate": 1.04001861592162e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74291304, + "step": 110220 + }, + { + "epoch": 2.6928150880707498, + "grad_norm": 0.02110552228987217, + "learning_rate": 1.0399334064896927e-06, + "loss": 0.0396, + "num_input_tokens_seen": 74295528, + "step": 110225 + }, + { + "epoch": 2.692937238902597, + "grad_norm": 0.011235724203288555, + "learning_rate": 1.0398481967673592e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74299112, + "step": 110230 + }, + { + "epoch": 2.693059389734444, + "grad_norm": 0.0376700684428215, + "learning_rate": 1.0397629867552387e-06, + "loss": 0.0483, + "num_input_tokens_seen": 74302632, + "step": 110235 + }, + { + "epoch": 2.6931815405662913, + "grad_norm": 0.06534771621227264, + "learning_rate": 1.039677776453951e-06, + "loss": 0.0002, + "num_input_tokens_seen": 74305832, + "step": 110240 + }, + { + "epoch": 2.6933036913981385, + "grad_norm": 0.1882932335138321, + "learning_rate": 1.0395925658641161e-06, + "loss": 0.0002, + "num_input_tokens_seen": 74309224, + "step": 110245 + }, + { + "epoch": 2.6934258422299857, + "grad_norm": 0.04143861308693886, + "learning_rate": 1.039507354986353e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74312552, + "step": 110250 + }, + { + "epoch": 2.693547993061833, + "grad_norm": 0.0019535149913281202, + "learning_rate": 1.0394221438212822e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74315880, + "step": 110255 + }, + { + "epoch": 2.69367014389368, + "grad_norm": 0.4619247317314148, + "learning_rate": 1.0393369323695227e-06, + "loss": 0.0373, + "num_input_tokens_seen": 74319144, + "step": 110260 + }, + { + "epoch": 2.693792294725527, + "grad_norm": 0.12660761177539825, + "learning_rate": 1.0392517206316944e-06, + "loss": 0.0397, + "num_input_tokens_seen": 74322408, + "step": 110265 + }, + { + "epoch": 2.6939144455573745, + "grad_norm": 0.00939325150102377, + "learning_rate": 1.0391665086084172e-06, + "loss": 0.0002, + "num_input_tokens_seen": 74325480, + "step": 110270 + }, + { + "epoch": 2.694036596389221, + "grad_norm": 13.599160194396973, + "learning_rate": 1.0390812963003105e-06, + "loss": 0.0656, + "num_input_tokens_seen": 74328488, + "step": 110275 + }, + { + "epoch": 2.694158747221069, + "grad_norm": 0.04216121882200241, + "learning_rate": 1.0389960837079944e-06, + "loss": 0.0378, + "num_input_tokens_seen": 74331880, + "step": 110280 + }, + { + "epoch": 2.6942808980529156, + "grad_norm": 0.015083221718668938, + "learning_rate": 1.0389108708320879e-06, + "loss": 0.0332, + "num_input_tokens_seen": 74335144, + "step": 110285 + }, + { + "epoch": 2.694403048884763, + "grad_norm": 0.15606962144374847, + "learning_rate": 1.0388256576732115e-06, + "loss": 0.0656, + "num_input_tokens_seen": 74338664, + "step": 110290 + }, + { + "epoch": 2.69452519971661, + "grad_norm": 0.02123434469103813, + "learning_rate": 1.038740444231984e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74341992, + "step": 110295 + }, + { + "epoch": 2.694647350548457, + "grad_norm": 0.007134847808629274, + "learning_rate": 1.0386552305090256e-06, + "loss": 0.0523, + "num_input_tokens_seen": 74345384, + "step": 110300 + }, + { + "epoch": 2.6947695013803044, + "grad_norm": 0.007809483911842108, + "learning_rate": 1.0385700165049565e-06, + "loss": 0.035, + "num_input_tokens_seen": 74348584, + "step": 110305 + }, + { + "epoch": 2.6948916522121515, + "grad_norm": 2.889589548110962, + "learning_rate": 1.0384848022203955e-06, + "loss": 0.0611, + "num_input_tokens_seen": 74352360, + "step": 110310 + }, + { + "epoch": 2.6950138030439987, + "grad_norm": 0.8335879445075989, + "learning_rate": 1.0383995876559626e-06, + "loss": 0.001, + "num_input_tokens_seen": 74356008, + "step": 110315 + }, + { + "epoch": 2.695135953875846, + "grad_norm": 0.08728937804698944, + "learning_rate": 1.0383143728122776e-06, + "loss": 0.0002, + "num_input_tokens_seen": 74359272, + "step": 110320 + }, + { + "epoch": 2.695258104707693, + "grad_norm": 0.11635275930166245, + "learning_rate": 1.0382291576899605e-06, + "loss": 0.0304, + "num_input_tokens_seen": 74362536, + "step": 110325 + }, + { + "epoch": 2.6953802555395403, + "grad_norm": 0.11208701133728027, + "learning_rate": 1.03814394228963e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74365544, + "step": 110330 + }, + { + "epoch": 2.6955024063713875, + "grad_norm": 0.022906199097633362, + "learning_rate": 1.0380587266119072e-06, + "loss": 0.0003, + "num_input_tokens_seen": 74368872, + "step": 110335 + }, + { + "epoch": 2.6956245572032347, + "grad_norm": 0.024830637499690056, + "learning_rate": 1.0379735106574107e-06, + "loss": 0.0465, + "num_input_tokens_seen": 74372200, + "step": 110340 + }, + { + "epoch": 2.695746708035082, + "grad_norm": 0.005570548586547375, + "learning_rate": 1.0378882944267608e-06, + "loss": 0.0579, + "num_input_tokens_seen": 74375784, + "step": 110345 + }, + { + "epoch": 2.6958688588669286, + "grad_norm": 20.110021591186523, + "learning_rate": 1.037803077920577e-06, + "loss": 0.1055, + "num_input_tokens_seen": 74379112, + "step": 110350 + }, + { + "epoch": 2.6959910096987763, + "grad_norm": 16.832504272460938, + "learning_rate": 1.037717861139479e-06, + "loss": 0.0908, + "num_input_tokens_seen": 74382248, + "step": 110355 + }, + { + "epoch": 2.696113160530623, + "grad_norm": 0.008288275450468063, + "learning_rate": 1.0376326440840863e-06, + "loss": 0.083, + "num_input_tokens_seen": 74385512, + "step": 110360 + }, + { + "epoch": 2.6962353113624706, + "grad_norm": 0.15754544734954834, + "learning_rate": 1.0375474267550192e-06, + "loss": 0.0502, + "num_input_tokens_seen": 74388904, + "step": 110365 + }, + { + "epoch": 2.6963574621943174, + "grad_norm": 0.011054810136556625, + "learning_rate": 1.0374622091528973e-06, + "loss": 0.0367, + "num_input_tokens_seen": 74392424, + "step": 110370 + }, + { + "epoch": 2.6964796130261646, + "grad_norm": 16.19540786743164, + "learning_rate": 1.03737699127834e-06, + "loss": 0.0426, + "num_input_tokens_seen": 74396392, + "step": 110375 + }, + { + "epoch": 2.6966017638580118, + "grad_norm": 0.013768474571406841, + "learning_rate": 1.037291773131967e-06, + "loss": 0.0002, + "num_input_tokens_seen": 74399656, + "step": 110380 + }, + { + "epoch": 2.696723914689859, + "grad_norm": 0.22251591086387634, + "learning_rate": 1.0372065547143982e-06, + "loss": 0.061, + "num_input_tokens_seen": 74403240, + "step": 110385 + }, + { + "epoch": 2.696846065521706, + "grad_norm": 0.13074393570423126, + "learning_rate": 1.0371213360262537e-06, + "loss": 0.0491, + "num_input_tokens_seen": 74406888, + "step": 110390 + }, + { + "epoch": 2.6969682163535533, + "grad_norm": 0.020176751539111137, + "learning_rate": 1.0370361170681525e-06, + "loss": 0.0968, + "num_input_tokens_seen": 74410024, + "step": 110395 + }, + { + "epoch": 2.6970903671854005, + "grad_norm": 0.0231939610093832, + "learning_rate": 1.0369508978407146e-06, + "loss": 0.009, + "num_input_tokens_seen": 74413480, + "step": 110400 + }, + { + "epoch": 2.6972125180172477, + "grad_norm": 0.05197136104106903, + "learning_rate": 1.0368656783445603e-06, + "loss": 0.0004, + "num_input_tokens_seen": 74416808, + "step": 110405 + }, + { + "epoch": 2.697334668849095, + "grad_norm": 1.812922716140747, + "learning_rate": 1.0367804585803084e-06, + "loss": 0.0412, + "num_input_tokens_seen": 74421224, + "step": 110410 + }, + { + "epoch": 2.697456819680942, + "grad_norm": 0.00616570794954896, + "learning_rate": 1.0366952385485792e-06, + "loss": 0.0004, + "num_input_tokens_seen": 74425000, + "step": 110415 + }, + { + "epoch": 2.6975789705127893, + "grad_norm": 0.011104024946689606, + "learning_rate": 1.0366100182499923e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74428328, + "step": 110420 + }, + { + "epoch": 2.6977011213446365, + "grad_norm": 0.013107498176395893, + "learning_rate": 1.0365247976851677e-06, + "loss": 0.015, + "num_input_tokens_seen": 74432424, + "step": 110425 + }, + { + "epoch": 2.6978232721764837, + "grad_norm": 0.03257214277982712, + "learning_rate": 1.036439576854725e-06, + "loss": 0.0002, + "num_input_tokens_seen": 74435752, + "step": 110430 + }, + { + "epoch": 2.697945423008331, + "grad_norm": 0.002233329229056835, + "learning_rate": 1.0363543557592838e-06, + "loss": 0.0002, + "num_input_tokens_seen": 74439016, + "step": 110435 + }, + { + "epoch": 2.698067573840178, + "grad_norm": 0.049250464886426926, + "learning_rate": 1.036269134399464e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74442600, + "step": 110440 + }, + { + "epoch": 2.698189724672025, + "grad_norm": 0.10871399939060211, + "learning_rate": 1.0361839127758854e-06, + "loss": 0.0664, + "num_input_tokens_seen": 74445928, + "step": 110445 + }, + { + "epoch": 2.6983118755038724, + "grad_norm": 0.6769186854362488, + "learning_rate": 1.0360986908891672e-06, + "loss": 0.0002, + "num_input_tokens_seen": 74449640, + "step": 110450 + }, + { + "epoch": 2.698434026335719, + "grad_norm": 0.0014609667705371976, + "learning_rate": 1.03601346873993e-06, + "loss": 0.0002, + "num_input_tokens_seen": 74453032, + "step": 110455 + }, + { + "epoch": 2.6985561771675664, + "grad_norm": 0.0031799173448234797, + "learning_rate": 1.035928246328793e-06, + "loss": 0.0714, + "num_input_tokens_seen": 74456360, + "step": 110460 + }, + { + "epoch": 2.6986783279994135, + "grad_norm": 0.0017244528280571103, + "learning_rate": 1.0358430236563762e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74460008, + "step": 110465 + }, + { + "epoch": 2.6988004788312607, + "grad_norm": 2.850145101547241, + "learning_rate": 1.0357578007232991e-06, + "loss": 0.001, + "num_input_tokens_seen": 74463144, + "step": 110470 + }, + { + "epoch": 2.698922629663108, + "grad_norm": 0.010116000659763813, + "learning_rate": 1.035672577530182e-06, + "loss": 0.0366, + "num_input_tokens_seen": 74466856, + "step": 110475 + }, + { + "epoch": 2.699044780494955, + "grad_norm": 0.10385355353355408, + "learning_rate": 1.0355873540776443e-06, + "loss": 0.0362, + "num_input_tokens_seen": 74470184, + "step": 110480 + }, + { + "epoch": 2.6991669313268023, + "grad_norm": 51.628379821777344, + "learning_rate": 1.0355021303663053e-06, + "loss": 0.0514, + "num_input_tokens_seen": 74473896, + "step": 110485 + }, + { + "epoch": 2.6992890821586495, + "grad_norm": 0.009503796696662903, + "learning_rate": 1.035416906396786e-06, + "loss": 0.0279, + "num_input_tokens_seen": 74477736, + "step": 110490 + }, + { + "epoch": 2.6994112329904967, + "grad_norm": 0.0070153940469026566, + "learning_rate": 1.035331682169705e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74480872, + "step": 110495 + }, + { + "epoch": 2.699533383822344, + "grad_norm": 0.007911594584584236, + "learning_rate": 1.0352464576856826e-06, + "loss": 0.0, + "num_input_tokens_seen": 74484072, + "step": 110500 + }, + { + "epoch": 2.699655534654191, + "grad_norm": 0.2613266706466675, + "learning_rate": 1.0351612329453384e-06, + "loss": 0.0002, + "num_input_tokens_seen": 74487208, + "step": 110505 + }, + { + "epoch": 2.6997776854860382, + "grad_norm": 0.01738755591213703, + "learning_rate": 1.0350760079492922e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74490472, + "step": 110510 + }, + { + "epoch": 2.6998998363178854, + "grad_norm": 0.1000705137848854, + "learning_rate": 1.0349907826981638e-06, + "loss": 0.0002, + "num_input_tokens_seen": 74493928, + "step": 110515 + }, + { + "epoch": 2.7000219871497326, + "grad_norm": 0.00792511273175478, + "learning_rate": 1.0349055571925731e-06, + "loss": 0.0002, + "num_input_tokens_seen": 74497064, + "step": 110520 + }, + { + "epoch": 2.70014413798158, + "grad_norm": 0.0014833626337349415, + "learning_rate": 1.0348203314331398e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74500456, + "step": 110525 + }, + { + "epoch": 2.7002662888134266, + "grad_norm": 4.809901714324951, + "learning_rate": 1.0347351054204839e-06, + "loss": 0.063, + "num_input_tokens_seen": 74503656, + "step": 110530 + }, + { + "epoch": 2.700388439645274, + "grad_norm": 0.15625979006290436, + "learning_rate": 1.0346498791552247e-06, + "loss": 0.0384, + "num_input_tokens_seen": 74507048, + "step": 110535 + }, + { + "epoch": 2.700510590477121, + "grad_norm": 0.02334548532962799, + "learning_rate": 1.0345646526379824e-06, + "loss": 0.0447, + "num_input_tokens_seen": 74510504, + "step": 110540 + }, + { + "epoch": 2.7006327413089686, + "grad_norm": 0.6260141134262085, + "learning_rate": 1.0344794258693766e-06, + "loss": 0.0005, + "num_input_tokens_seen": 74513768, + "step": 110545 + }, + { + "epoch": 2.7007548921408153, + "grad_norm": 0.17521077394485474, + "learning_rate": 1.0343941988500271e-06, + "loss": 0.0004, + "num_input_tokens_seen": 74517032, + "step": 110550 + }, + { + "epoch": 2.7008770429726625, + "grad_norm": 0.017024708911776543, + "learning_rate": 1.0343089715805537e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74520360, + "step": 110555 + }, + { + "epoch": 2.7009991938045097, + "grad_norm": 0.15910948812961578, + "learning_rate": 1.0342237440615765e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74524264, + "step": 110560 + }, + { + "epoch": 2.701121344636357, + "grad_norm": 0.09337528795003891, + "learning_rate": 1.0341385162937147e-06, + "loss": 0.0879, + "num_input_tokens_seen": 74527848, + "step": 110565 + }, + { + "epoch": 2.701243495468204, + "grad_norm": 0.0003024581237696111, + "learning_rate": 1.0340532882775887e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74531176, + "step": 110570 + }, + { + "epoch": 2.7013656463000513, + "grad_norm": 0.010254576802253723, + "learning_rate": 1.0339680600138176e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74534632, + "step": 110575 + }, + { + "epoch": 2.7014877971318985, + "grad_norm": 0.058449145406484604, + "learning_rate": 1.033882831503022e-06, + "loss": 0.115, + "num_input_tokens_seen": 74538024, + "step": 110580 + }, + { + "epoch": 2.7016099479637457, + "grad_norm": 0.2035467028617859, + "learning_rate": 1.0337976027458213e-06, + "loss": 0.0003, + "num_input_tokens_seen": 74541288, + "step": 110585 + }, + { + "epoch": 2.701732098795593, + "grad_norm": 0.09681735932826996, + "learning_rate": 1.0337123737428352e-06, + "loss": 0.0002, + "num_input_tokens_seen": 74544360, + "step": 110590 + }, + { + "epoch": 2.70185424962744, + "grad_norm": 0.28981155157089233, + "learning_rate": 1.033627144494684e-06, + "loss": 0.0741, + "num_input_tokens_seen": 74547688, + "step": 110595 + }, + { + "epoch": 2.701976400459287, + "grad_norm": 0.4329851567745209, + "learning_rate": 1.033541915001987e-06, + "loss": 0.0002, + "num_input_tokens_seen": 74550888, + "step": 110600 + }, + { + "epoch": 2.7020985512911344, + "grad_norm": 0.023930076509714127, + "learning_rate": 1.033456685265364e-06, + "loss": 0.0412, + "num_input_tokens_seen": 74554280, + "step": 110605 + }, + { + "epoch": 2.7022207021229816, + "grad_norm": 0.014112595468759537, + "learning_rate": 1.0333714552854349e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74557544, + "step": 110610 + }, + { + "epoch": 2.7023428529548283, + "grad_norm": 0.8872970342636108, + "learning_rate": 1.0332862250628198e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74560552, + "step": 110615 + }, + { + "epoch": 2.702465003786676, + "grad_norm": 0.1556682139635086, + "learning_rate": 1.0332009945981384e-06, + "loss": 0.0002, + "num_input_tokens_seen": 74564264, + "step": 110620 + }, + { + "epoch": 2.7025871546185227, + "grad_norm": 0.0046437764540314674, + "learning_rate": 1.0331157638920102e-06, + "loss": 0.0114, + "num_input_tokens_seen": 74567464, + "step": 110625 + }, + { + "epoch": 2.7027093054503704, + "grad_norm": 0.013693672604858875, + "learning_rate": 1.0330305329450555e-06, + "loss": 0.0342, + "num_input_tokens_seen": 74570536, + "step": 110630 + }, + { + "epoch": 2.702831456282217, + "grad_norm": 0.28892767429351807, + "learning_rate": 1.0329453017578937e-06, + "loss": 0.0003, + "num_input_tokens_seen": 74574312, + "step": 110635 + }, + { + "epoch": 2.7029536071140643, + "grad_norm": 0.027796685695648193, + "learning_rate": 1.0328600703311447e-06, + "loss": 0.0523, + "num_input_tokens_seen": 74577640, + "step": 110640 + }, + { + "epoch": 2.7030757579459115, + "grad_norm": 0.004140998236835003, + "learning_rate": 1.0327748386654287e-06, + "loss": 0.0002, + "num_input_tokens_seen": 74581160, + "step": 110645 + }, + { + "epoch": 2.7031979087777587, + "grad_norm": 0.008366966620087624, + "learning_rate": 1.0326896067613654e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74584808, + "step": 110650 + }, + { + "epoch": 2.703320059609606, + "grad_norm": 0.013736417517066002, + "learning_rate": 1.032604374619574e-06, + "loss": 0.0419, + "num_input_tokens_seen": 74588520, + "step": 110655 + }, + { + "epoch": 2.703442210441453, + "grad_norm": 130.45484924316406, + "learning_rate": 1.0325191422406751e-06, + "loss": 0.049, + "num_input_tokens_seen": 74592104, + "step": 110660 + }, + { + "epoch": 2.7035643612733002, + "grad_norm": 0.008666807785630226, + "learning_rate": 1.0324339096252883e-06, + "loss": 0.0638, + "num_input_tokens_seen": 74595944, + "step": 110665 + }, + { + "epoch": 2.7036865121051474, + "grad_norm": 0.015011264011263847, + "learning_rate": 1.0323486767740331e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74599528, + "step": 110670 + }, + { + "epoch": 2.7038086629369946, + "grad_norm": 0.0012946202186867595, + "learning_rate": 1.03226344368753e-06, + "loss": 0.0002, + "num_input_tokens_seen": 74602856, + "step": 110675 + }, + { + "epoch": 2.703930813768842, + "grad_norm": 0.0014228617073968053, + "learning_rate": 1.032178210366398e-06, + "loss": 0.0468, + "num_input_tokens_seen": 74606632, + "step": 110680 + }, + { + "epoch": 2.704052964600689, + "grad_norm": 0.0369437038898468, + "learning_rate": 1.0320929768112578e-06, + "loss": 0.0465, + "num_input_tokens_seen": 74609704, + "step": 110685 + }, + { + "epoch": 2.704175115432536, + "grad_norm": 15.255035400390625, + "learning_rate": 1.0320077430227287e-06, + "loss": 0.0526, + "num_input_tokens_seen": 74613544, + "step": 110690 + }, + { + "epoch": 2.7042972662643834, + "grad_norm": 0.0014747011009603739, + "learning_rate": 1.0319225090014307e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74617192, + "step": 110695 + }, + { + "epoch": 2.7044194170962306, + "grad_norm": 0.017089655622839928, + "learning_rate": 1.0318372747479838e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74620584, + "step": 110700 + }, + { + "epoch": 2.7045415679280778, + "grad_norm": 0.01144702173769474, + "learning_rate": 1.0317520402630076e-06, + "loss": 0.0967, + "num_input_tokens_seen": 74623912, + "step": 110705 + }, + { + "epoch": 2.7046637187599245, + "grad_norm": 0.08541342616081238, + "learning_rate": 1.0316668055471219e-06, + "loss": 0.055, + "num_input_tokens_seen": 74627304, + "step": 110710 + }, + { + "epoch": 2.704785869591772, + "grad_norm": 149.50946044921875, + "learning_rate": 1.0315815706009464e-06, + "loss": 0.002, + "num_input_tokens_seen": 74630440, + "step": 110715 + }, + { + "epoch": 2.704908020423619, + "grad_norm": 0.012330389581620693, + "learning_rate": 1.0314963354251018e-06, + "loss": 0.0004, + "num_input_tokens_seen": 74633832, + "step": 110720 + }, + { + "epoch": 2.7050301712554665, + "grad_norm": 0.0038798125460743904, + "learning_rate": 1.031411100020207e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74636968, + "step": 110725 + }, + { + "epoch": 2.7051523220873133, + "grad_norm": 0.02092175930738449, + "learning_rate": 1.0313258643868823e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74640424, + "step": 110730 + }, + { + "epoch": 2.7052744729191605, + "grad_norm": 0.029145987704396248, + "learning_rate": 1.0312406285257474e-06, + "loss": 0.0516, + "num_input_tokens_seen": 74643368, + "step": 110735 + }, + { + "epoch": 2.7053966237510076, + "grad_norm": 0.09617263823747635, + "learning_rate": 1.0311553924374224e-06, + "loss": 0.0442, + "num_input_tokens_seen": 74646440, + "step": 110740 + }, + { + "epoch": 2.705518774582855, + "grad_norm": 0.0012000126298516989, + "learning_rate": 1.031070156122527e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74649960, + "step": 110745 + }, + { + "epoch": 2.705640925414702, + "grad_norm": 0.03371034935116768, + "learning_rate": 1.030984919581681e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74653480, + "step": 110750 + }, + { + "epoch": 2.705763076246549, + "grad_norm": 0.06264438480138779, + "learning_rate": 1.0308996828155048e-06, + "loss": 0.1049, + "num_input_tokens_seen": 74656744, + "step": 110755 + }, + { + "epoch": 2.7058852270783964, + "grad_norm": 0.012665455229580402, + "learning_rate": 1.0308144458246172e-06, + "loss": 0.0002, + "num_input_tokens_seen": 74660136, + "step": 110760 + }, + { + "epoch": 2.7060073779102436, + "grad_norm": 0.09554749727249146, + "learning_rate": 1.0307292086096386e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74663272, + "step": 110765 + }, + { + "epoch": 2.706129528742091, + "grad_norm": 0.0033516192343086004, + "learning_rate": 1.0306439711711893e-06, + "loss": 0.1075, + "num_input_tokens_seen": 74666792, + "step": 110770 + }, + { + "epoch": 2.706251679573938, + "grad_norm": 0.00686039961874485, + "learning_rate": 1.0305587335098887e-06, + "loss": 0.0689, + "num_input_tokens_seen": 74670056, + "step": 110775 + }, + { + "epoch": 2.706373830405785, + "grad_norm": 0.3009239733219147, + "learning_rate": 1.0304734956263567e-06, + "loss": 0.0003, + "num_input_tokens_seen": 74673256, + "step": 110780 + }, + { + "epoch": 2.7064959812376324, + "grad_norm": 0.13494005799293518, + "learning_rate": 1.0303882575212132e-06, + "loss": 0.0788, + "num_input_tokens_seen": 74676584, + "step": 110785 + }, + { + "epoch": 2.7066181320694795, + "grad_norm": 0.11872848868370056, + "learning_rate": 1.0303030191950784e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74680168, + "step": 110790 + }, + { + "epoch": 2.7067402829013263, + "grad_norm": 0.05219002440571785, + "learning_rate": 1.0302177806485715e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74683496, + "step": 110795 + }, + { + "epoch": 2.706862433733174, + "grad_norm": 0.005950891878455877, + "learning_rate": 1.030132541882313e-06, + "loss": 0.0943, + "num_input_tokens_seen": 74686760, + "step": 110800 + }, + { + "epoch": 2.7069845845650207, + "grad_norm": 0.012123181484639645, + "learning_rate": 1.0300473028969225e-06, + "loss": 0.0065, + "num_input_tokens_seen": 74689960, + "step": 110805 + }, + { + "epoch": 2.7071067353968683, + "grad_norm": 0.42338210344314575, + "learning_rate": 1.02996206369302e-06, + "loss": 0.0007, + "num_input_tokens_seen": 74693160, + "step": 110810 + }, + { + "epoch": 2.707228886228715, + "grad_norm": 0.008268994279205799, + "learning_rate": 1.0298768242712253e-06, + "loss": 0.0002, + "num_input_tokens_seen": 74696552, + "step": 110815 + }, + { + "epoch": 2.7073510370605622, + "grad_norm": 21.602508544921875, + "learning_rate": 1.0297915846321583e-06, + "loss": 0.0875, + "num_input_tokens_seen": 74700008, + "step": 110820 + }, + { + "epoch": 2.7074731878924094, + "grad_norm": 0.0032915703486651182, + "learning_rate": 1.0297063447764387e-06, + "loss": 0.1667, + "num_input_tokens_seen": 74703016, + "step": 110825 + }, + { + "epoch": 2.7075953387242566, + "grad_norm": 0.14885084331035614, + "learning_rate": 1.0296211047046865e-06, + "loss": 0.0567, + "num_input_tokens_seen": 74706216, + "step": 110830 + }, + { + "epoch": 2.707717489556104, + "grad_norm": 16.99559211730957, + "learning_rate": 1.0295358644175222e-06, + "loss": 0.0626, + "num_input_tokens_seen": 74710056, + "step": 110835 + }, + { + "epoch": 2.707839640387951, + "grad_norm": 0.10859328508377075, + "learning_rate": 1.0294506239155647e-06, + "loss": 0.001, + "num_input_tokens_seen": 74713448, + "step": 110840 + }, + { + "epoch": 2.707961791219798, + "grad_norm": 0.178997203707695, + "learning_rate": 1.0293653831994345e-06, + "loss": 0.0469, + "num_input_tokens_seen": 74717032, + "step": 110845 + }, + { + "epoch": 2.7080839420516454, + "grad_norm": 0.0051488084718585014, + "learning_rate": 1.0292801422697512e-06, + "loss": 0.0831, + "num_input_tokens_seen": 74720424, + "step": 110850 + }, + { + "epoch": 2.7082060928834926, + "grad_norm": 0.006985927931964397, + "learning_rate": 1.029194901127135e-06, + "loss": 0.0002, + "num_input_tokens_seen": 74723496, + "step": 110855 + }, + { + "epoch": 2.7083282437153398, + "grad_norm": 26.968042373657227, + "learning_rate": 1.0291096597722054e-06, + "loss": 0.0571, + "num_input_tokens_seen": 74726888, + "step": 110860 + }, + { + "epoch": 2.708450394547187, + "grad_norm": 0.06846634298563004, + "learning_rate": 1.0290244182055828e-06, + "loss": 0.0003, + "num_input_tokens_seen": 74730216, + "step": 110865 + }, + { + "epoch": 2.708572545379034, + "grad_norm": 0.29106584191322327, + "learning_rate": 1.0289391764278868e-06, + "loss": 0.0573, + "num_input_tokens_seen": 74733224, + "step": 110870 + }, + { + "epoch": 2.7086946962108813, + "grad_norm": 0.07840101420879364, + "learning_rate": 1.0288539344397371e-06, + "loss": 0.0729, + "num_input_tokens_seen": 74736360, + "step": 110875 + }, + { + "epoch": 2.7088168470427285, + "grad_norm": 0.07999832928180695, + "learning_rate": 1.028768692241754e-06, + "loss": 0.0665, + "num_input_tokens_seen": 74739752, + "step": 110880 + }, + { + "epoch": 2.7089389978745757, + "grad_norm": 0.018879253417253494, + "learning_rate": 1.028683449834557e-06, + "loss": 0.0434, + "num_input_tokens_seen": 74742888, + "step": 110885 + }, + { + "epoch": 2.7090611487064225, + "grad_norm": 0.1324795037508011, + "learning_rate": 1.0285982072187665e-06, + "loss": 0.0005, + "num_input_tokens_seen": 74746024, + "step": 110890 + }, + { + "epoch": 2.70918329953827, + "grad_norm": 0.09683836251497269, + "learning_rate": 1.028512964395002e-06, + "loss": 0.0003, + "num_input_tokens_seen": 74749928, + "step": 110895 + }, + { + "epoch": 2.709305450370117, + "grad_norm": 0.014940385706722736, + "learning_rate": 1.0284277213638837e-06, + "loss": 0.049, + "num_input_tokens_seen": 74753192, + "step": 110900 + }, + { + "epoch": 2.709427601201964, + "grad_norm": 0.01176470797508955, + "learning_rate": 1.0283424781260312e-06, + "loss": 0.035, + "num_input_tokens_seen": 74756264, + "step": 110905 + }, + { + "epoch": 2.709549752033811, + "grad_norm": 0.03302425146102905, + "learning_rate": 1.028257234682065e-06, + "loss": 0.0002, + "num_input_tokens_seen": 74759272, + "step": 110910 + }, + { + "epoch": 2.7096719028656584, + "grad_norm": 28.31501007080078, + "learning_rate": 1.028171991032604e-06, + "loss": 0.072, + "num_input_tokens_seen": 74762728, + "step": 110915 + }, + { + "epoch": 2.7097940536975056, + "grad_norm": 0.01271035149693489, + "learning_rate": 1.0280867471782691e-06, + "loss": 0.0009, + "num_input_tokens_seen": 74765864, + "step": 110920 + }, + { + "epoch": 2.709916204529353, + "grad_norm": 0.0440848134458065, + "learning_rate": 1.0280015031196796e-06, + "loss": 0.0694, + "num_input_tokens_seen": 74769064, + "step": 110925 + }, + { + "epoch": 2.7100383553612, + "grad_norm": 23.72658920288086, + "learning_rate": 1.0279162588574557e-06, + "loss": 0.0984, + "num_input_tokens_seen": 74772136, + "step": 110930 + }, + { + "epoch": 2.710160506193047, + "grad_norm": 19.42964744567871, + "learning_rate": 1.0278310143922173e-06, + "loss": 0.1046, + "num_input_tokens_seen": 74775784, + "step": 110935 + }, + { + "epoch": 2.7102826570248943, + "grad_norm": 0.3410216271877289, + "learning_rate": 1.0277457697245842e-06, + "loss": 0.0369, + "num_input_tokens_seen": 74779304, + "step": 110940 + }, + { + "epoch": 2.7104048078567415, + "grad_norm": 0.17364412546157837, + "learning_rate": 1.0276605248551764e-06, + "loss": 0.0423, + "num_input_tokens_seen": 74782440, + "step": 110945 + }, + { + "epoch": 2.7105269586885887, + "grad_norm": 0.02989744395017624, + "learning_rate": 1.0275752797846137e-06, + "loss": 0.053, + "num_input_tokens_seen": 74785448, + "step": 110950 + }, + { + "epoch": 2.710649109520436, + "grad_norm": 0.051603052765131, + "learning_rate": 1.0274900345135167e-06, + "loss": 0.0615, + "num_input_tokens_seen": 74788584, + "step": 110955 + }, + { + "epoch": 2.710771260352283, + "grad_norm": 49.23311233520508, + "learning_rate": 1.0274047890425043e-06, + "loss": 0.0954, + "num_input_tokens_seen": 74791656, + "step": 110960 + }, + { + "epoch": 2.7108934111841303, + "grad_norm": 0.027312999591231346, + "learning_rate": 1.027319543372197e-06, + "loss": 0.0367, + "num_input_tokens_seen": 74794792, + "step": 110965 + }, + { + "epoch": 2.7110155620159775, + "grad_norm": 0.04595275968313217, + "learning_rate": 1.0272342975032147e-06, + "loss": 0.0134, + "num_input_tokens_seen": 74797928, + "step": 110970 + }, + { + "epoch": 2.7111377128478242, + "grad_norm": 0.16071614623069763, + "learning_rate": 1.0271490514361771e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74801448, + "step": 110975 + }, + { + "epoch": 2.711259863679672, + "grad_norm": 1.5390660762786865, + "learning_rate": 1.0270638051717041e-06, + "loss": 0.0006, + "num_input_tokens_seen": 74804904, + "step": 110980 + }, + { + "epoch": 2.7113820145115186, + "grad_norm": 0.0822259932756424, + "learning_rate": 1.0269785587104163e-06, + "loss": 0.0006, + "num_input_tokens_seen": 74808616, + "step": 110985 + }, + { + "epoch": 2.7115041653433662, + "grad_norm": 0.5313860774040222, + "learning_rate": 1.0268933120529332e-06, + "loss": 0.0005, + "num_input_tokens_seen": 74811752, + "step": 110990 + }, + { + "epoch": 2.711626316175213, + "grad_norm": 0.19341067969799042, + "learning_rate": 1.0268080651998744e-06, + "loss": 0.0923, + "num_input_tokens_seen": 74814952, + "step": 110995 + }, + { + "epoch": 2.71174846700706, + "grad_norm": 0.0634603202342987, + "learning_rate": 1.0267228181518601e-06, + "loss": 0.1456, + "num_input_tokens_seen": 74818280, + "step": 111000 + }, + { + "epoch": 2.7118706178389074, + "grad_norm": 0.1839318722486496, + "learning_rate": 1.0266375709095103e-06, + "loss": 0.0662, + "num_input_tokens_seen": 74821352, + "step": 111005 + }, + { + "epoch": 2.7119927686707546, + "grad_norm": 16.85820960998535, + "learning_rate": 1.0265523234734453e-06, + "loss": 0.0913, + "num_input_tokens_seen": 74824936, + "step": 111010 + }, + { + "epoch": 2.7121149195026018, + "grad_norm": 0.048869337886571884, + "learning_rate": 1.0264670758442843e-06, + "loss": 0.0007, + "num_input_tokens_seen": 74828968, + "step": 111015 + }, + { + "epoch": 2.712237070334449, + "grad_norm": 31.205768585205078, + "learning_rate": 1.0263818280226477e-06, + "loss": 0.0701, + "num_input_tokens_seen": 74832360, + "step": 111020 + }, + { + "epoch": 2.712359221166296, + "grad_norm": 0.005597368814051151, + "learning_rate": 1.0262965800091553e-06, + "loss": 0.0004, + "num_input_tokens_seen": 74836008, + "step": 111025 + }, + { + "epoch": 2.7124813719981433, + "grad_norm": 0.2927609384059906, + "learning_rate": 1.0262113318044271e-06, + "loss": 0.0006, + "num_input_tokens_seen": 74839272, + "step": 111030 + }, + { + "epoch": 2.7126035228299905, + "grad_norm": 0.04279811680316925, + "learning_rate": 1.0261260834090833e-06, + "loss": 0.001, + "num_input_tokens_seen": 74842280, + "step": 111035 + }, + { + "epoch": 2.7127256736618377, + "grad_norm": 65.13777923583984, + "learning_rate": 1.0260408348237432e-06, + "loss": 0.0838, + "num_input_tokens_seen": 74845608, + "step": 111040 + }, + { + "epoch": 2.712847824493685, + "grad_norm": 51.76634979248047, + "learning_rate": 1.0259555860490272e-06, + "loss": 0.1082, + "num_input_tokens_seen": 74848936, + "step": 111045 + }, + { + "epoch": 2.712969975325532, + "grad_norm": 0.09504994004964828, + "learning_rate": 1.0258703370855553e-06, + "loss": 0.0002, + "num_input_tokens_seen": 74852008, + "step": 111050 + }, + { + "epoch": 2.7130921261573793, + "grad_norm": 0.13983154296875, + "learning_rate": 1.0257850879339474e-06, + "loss": 0.0004, + "num_input_tokens_seen": 74855336, + "step": 111055 + }, + { + "epoch": 2.7132142769892265, + "grad_norm": 0.043822336941957474, + "learning_rate": 1.0256998385948234e-06, + "loss": 0.0002, + "num_input_tokens_seen": 74858408, + "step": 111060 + }, + { + "epoch": 2.7133364278210736, + "grad_norm": 0.1019313782453537, + "learning_rate": 1.0256145890688035e-06, + "loss": 0.0816, + "num_input_tokens_seen": 74862056, + "step": 111065 + }, + { + "epoch": 2.7134585786529204, + "grad_norm": 42.160888671875, + "learning_rate": 1.0255293393565073e-06, + "loss": 0.0331, + "num_input_tokens_seen": 74865512, + "step": 111070 + }, + { + "epoch": 2.713580729484768, + "grad_norm": 0.030252449214458466, + "learning_rate": 1.0254440894585543e-06, + "loss": 0.0002, + "num_input_tokens_seen": 74869096, + "step": 111075 + }, + { + "epoch": 2.7137028803166148, + "grad_norm": 0.43322911858558655, + "learning_rate": 1.0253588393755653e-06, + "loss": 0.085, + "num_input_tokens_seen": 74872296, + "step": 111080 + }, + { + "epoch": 2.713825031148462, + "grad_norm": 0.06952600926160812, + "learning_rate": 1.0252735891081604e-06, + "loss": 0.0866, + "num_input_tokens_seen": 74875496, + "step": 111085 + }, + { + "epoch": 2.713947181980309, + "grad_norm": 0.04150708019733429, + "learning_rate": 1.025188338656959e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74878696, + "step": 111090 + }, + { + "epoch": 2.7140693328121563, + "grad_norm": 0.015112698078155518, + "learning_rate": 1.025103088022581e-06, + "loss": 0.0315, + "num_input_tokens_seen": 74881768, + "step": 111095 + }, + { + "epoch": 2.7141914836440035, + "grad_norm": 0.05250068008899689, + "learning_rate": 1.0250178372056468e-06, + "loss": 0.0004, + "num_input_tokens_seen": 74884904, + "step": 111100 + }, + { + "epoch": 2.7143136344758507, + "grad_norm": 0.009632604196667671, + "learning_rate": 1.024932586206776e-06, + "loss": 0.0004, + "num_input_tokens_seen": 74888424, + "step": 111105 + }, + { + "epoch": 2.714435785307698, + "grad_norm": 25.12550163269043, + "learning_rate": 1.0248473350265892e-06, + "loss": 0.0539, + "num_input_tokens_seen": 74891432, + "step": 111110 + }, + { + "epoch": 2.714557936139545, + "grad_norm": 0.08560064435005188, + "learning_rate": 1.0247620836657053e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74894888, + "step": 111115 + }, + { + "epoch": 2.7146800869713923, + "grad_norm": 1.7176668643951416, + "learning_rate": 1.0246768321247452e-06, + "loss": 0.0004, + "num_input_tokens_seen": 74898088, + "step": 111120 + }, + { + "epoch": 2.7148022378032395, + "grad_norm": 0.02066458947956562, + "learning_rate": 1.0245915804043283e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74901160, + "step": 111125 + }, + { + "epoch": 2.7149243886350867, + "grad_norm": 39.92478942871094, + "learning_rate": 1.0245063285050751e-06, + "loss": 0.1044, + "num_input_tokens_seen": 74904936, + "step": 111130 + }, + { + "epoch": 2.715046539466934, + "grad_norm": 0.0022745642345398664, + "learning_rate": 1.024421076427605e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74908392, + "step": 111135 + }, + { + "epoch": 2.715168690298781, + "grad_norm": 0.2585334777832031, + "learning_rate": 1.0243358241725383e-06, + "loss": 0.0003, + "num_input_tokens_seen": 74911400, + "step": 111140 + }, + { + "epoch": 2.7152908411306282, + "grad_norm": 0.008308542892336845, + "learning_rate": 1.0242505717404953e-06, + "loss": 0.0717, + "num_input_tokens_seen": 74914792, + "step": 111145 + }, + { + "epoch": 2.7154129919624754, + "grad_norm": 0.09557440131902695, + "learning_rate": 1.0241653191320952e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74918056, + "step": 111150 + }, + { + "epoch": 2.715535142794322, + "grad_norm": 0.023261088877916336, + "learning_rate": 1.0240800663479586e-06, + "loss": 0.0002, + "num_input_tokens_seen": 74921064, + "step": 111155 + }, + { + "epoch": 2.71565729362617, + "grad_norm": 0.013574914075434208, + "learning_rate": 1.0239948133887053e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74924200, + "step": 111160 + }, + { + "epoch": 2.7157794444580166, + "grad_norm": 0.06006774678826332, + "learning_rate": 1.0239095602549552e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74927528, + "step": 111165 + }, + { + "epoch": 2.715901595289864, + "grad_norm": 0.09563129395246506, + "learning_rate": 1.0238243069473283e-06, + "loss": 0.0002, + "num_input_tokens_seen": 74930984, + "step": 111170 + }, + { + "epoch": 2.716023746121711, + "grad_norm": 0.008038848638534546, + "learning_rate": 1.0237390534664447e-06, + "loss": 0.1227, + "num_input_tokens_seen": 74934184, + "step": 111175 + }, + { + "epoch": 2.716145896953558, + "grad_norm": 0.1147889792919159, + "learning_rate": 1.0236537998129245e-06, + "loss": 0.0002, + "num_input_tokens_seen": 74937768, + "step": 111180 + }, + { + "epoch": 2.7162680477854053, + "grad_norm": 0.22480729222297668, + "learning_rate": 1.0235685459873873e-06, + "loss": 0.0001, + "num_input_tokens_seen": 74940968, + "step": 111185 + }, + { + "epoch": 2.7163901986172525, + "grad_norm": 11.220277786254883, + "learning_rate": 1.0234832919904533e-06, + "loss": 0.1094, + "num_input_tokens_seen": 74944104, + "step": 111190 + }, + { + "epoch": 2.7165123494490997, + "grad_norm": 0.28037410974502563, + "learning_rate": 1.0233980378227426e-06, + "loss": 0.0005, + "num_input_tokens_seen": 74947112, + "step": 111195 + }, + { + "epoch": 2.716634500280947, + "grad_norm": 0.01165709923952818, + "learning_rate": 1.0233127834848744e-06, + "loss": 0.0515, + "num_input_tokens_seen": 74950568, + "step": 111200 + }, + { + "epoch": 2.716756651112794, + "grad_norm": 0.07802575081586838, + "learning_rate": 1.0232275289774702e-06, + "loss": 0.0549, + "num_input_tokens_seen": 74953704, + "step": 111205 + }, + { + "epoch": 2.7168788019446413, + "grad_norm": 0.23846709728240967, + "learning_rate": 1.0231422743011488e-06, + "loss": 0.0341, + "num_input_tokens_seen": 74957032, + "step": 111210 + }, + { + "epoch": 2.7170009527764885, + "grad_norm": 0.03532462939620018, + "learning_rate": 1.0230570194565307e-06, + "loss": 0.0003, + "num_input_tokens_seen": 74960360, + "step": 111215 + }, + { + "epoch": 2.7171231036083356, + "grad_norm": 0.06964351236820221, + "learning_rate": 1.022971764444236e-06, + "loss": 0.0002, + "num_input_tokens_seen": 74963496, + "step": 111220 + }, + { + "epoch": 2.717245254440183, + "grad_norm": 15.483033180236816, + "learning_rate": 1.0228865092648842e-06, + "loss": 0.0649, + "num_input_tokens_seen": 74966568, + "step": 111225 + }, + { + "epoch": 2.71736740527203, + "grad_norm": 25.231521606445312, + "learning_rate": 1.022801253919095e-06, + "loss": 0.0351, + "num_input_tokens_seen": 74969768, + "step": 111230 + }, + { + "epoch": 2.717489556103877, + "grad_norm": 24.629430770874023, + "learning_rate": 1.0227159984074895e-06, + "loss": 0.0572, + "num_input_tokens_seen": 74972968, + "step": 111235 + }, + { + "epoch": 2.717611706935724, + "grad_norm": 0.027782179415225983, + "learning_rate": 1.0226307427306873e-06, + "loss": 0.001, + "num_input_tokens_seen": 74976424, + "step": 111240 + }, + { + "epoch": 2.7177338577675716, + "grad_norm": 0.02719302475452423, + "learning_rate": 1.022545486889308e-06, + "loss": 0.0011, + "num_input_tokens_seen": 74979752, + "step": 111245 + }, + { + "epoch": 2.7178560085994183, + "grad_norm": 0.0871863141655922, + "learning_rate": 1.022460230883972e-06, + "loss": 0.0011, + "num_input_tokens_seen": 74983336, + "step": 111250 + }, + { + "epoch": 2.717978159431266, + "grad_norm": 0.04725907742977142, + "learning_rate": 1.022374974715299e-06, + "loss": 0.0007, + "num_input_tokens_seen": 74986920, + "step": 111255 + }, + { + "epoch": 2.7181003102631127, + "grad_norm": 0.05942535400390625, + "learning_rate": 1.022289718383909e-06, + "loss": 0.0009, + "num_input_tokens_seen": 74990120, + "step": 111260 + }, + { + "epoch": 2.71822246109496, + "grad_norm": 13.261385917663574, + "learning_rate": 1.0222044618904225e-06, + "loss": 0.0554, + "num_input_tokens_seen": 74993192, + "step": 111265 + }, + { + "epoch": 2.718344611926807, + "grad_norm": 0.021577315405011177, + "learning_rate": 1.0221192052354593e-06, + "loss": 0.0469, + "num_input_tokens_seen": 74996584, + "step": 111270 + }, + { + "epoch": 2.7184667627586543, + "grad_norm": 31.583599090576172, + "learning_rate": 1.0220339484196392e-06, + "loss": 0.0437, + "num_input_tokens_seen": 74999976, + "step": 111275 + }, + { + "epoch": 2.7185889135905015, + "grad_norm": 0.07107774913311005, + "learning_rate": 1.0219486914435823e-06, + "loss": 0.0002, + "num_input_tokens_seen": 75003240, + "step": 111280 + }, + { + "epoch": 2.7187110644223487, + "grad_norm": 0.05268010124564171, + "learning_rate": 1.0218634343079082e-06, + "loss": 0.0002, + "num_input_tokens_seen": 75006760, + "step": 111285 + }, + { + "epoch": 2.718833215254196, + "grad_norm": 0.010458775795996189, + "learning_rate": 1.0217781770132375e-06, + "loss": 0.0001, + "num_input_tokens_seen": 75010280, + "step": 111290 + }, + { + "epoch": 2.718955366086043, + "grad_norm": 13.336898803710938, + "learning_rate": 1.0216929195601903e-06, + "loss": 0.0352, + "num_input_tokens_seen": 75013864, + "step": 111295 + }, + { + "epoch": 2.7190775169178902, + "grad_norm": 0.6519309282302856, + "learning_rate": 1.0216076619493861e-06, + "loss": 0.0565, + "num_input_tokens_seen": 75016872, + "step": 111300 + }, + { + "epoch": 2.7191996677497374, + "grad_norm": 0.7603404521942139, + "learning_rate": 1.0215224041814455e-06, + "loss": 0.0642, + "num_input_tokens_seen": 75019944, + "step": 111305 + }, + { + "epoch": 2.7193218185815846, + "grad_norm": 0.1283659040927887, + "learning_rate": 1.0214371462569878e-06, + "loss": 0.0002, + "num_input_tokens_seen": 75023144, + "step": 111310 + }, + { + "epoch": 2.719443969413432, + "grad_norm": 0.10316579043865204, + "learning_rate": 1.0213518881766337e-06, + "loss": 0.0376, + "num_input_tokens_seen": 75026408, + "step": 111315 + }, + { + "epoch": 2.719566120245279, + "grad_norm": 0.07398030906915665, + "learning_rate": 1.0212666299410026e-06, + "loss": 0.095, + "num_input_tokens_seen": 75029928, + "step": 111320 + }, + { + "epoch": 2.719688271077126, + "grad_norm": 0.006797238253057003, + "learning_rate": 1.0211813715507151e-06, + "loss": 0.0001, + "num_input_tokens_seen": 75033192, + "step": 111325 + }, + { + "epoch": 2.7198104219089734, + "grad_norm": 0.008504372090101242, + "learning_rate": 1.0210961130063911e-06, + "loss": 0.0002, + "num_input_tokens_seen": 75036328, + "step": 111330 + }, + { + "epoch": 2.71993257274082, + "grad_norm": 0.014152489602565765, + "learning_rate": 1.0210108543086502e-06, + "loss": 0.0006, + "num_input_tokens_seen": 75039400, + "step": 111335 + }, + { + "epoch": 2.7200547235726678, + "grad_norm": 0.16172784566879272, + "learning_rate": 1.020925595458113e-06, + "loss": 0.0005, + "num_input_tokens_seen": 75042600, + "step": 111340 + }, + { + "epoch": 2.7201768744045145, + "grad_norm": 0.00845453143119812, + "learning_rate": 1.020840336455399e-06, + "loss": 0.0708, + "num_input_tokens_seen": 75045992, + "step": 111345 + }, + { + "epoch": 2.720299025236362, + "grad_norm": 0.055560726672410965, + "learning_rate": 1.0207550773011285e-06, + "loss": 0.0451, + "num_input_tokens_seen": 75049256, + "step": 111350 + }, + { + "epoch": 2.720421176068209, + "grad_norm": 49.54551315307617, + "learning_rate": 1.0206698179959213e-06, + "loss": 0.0575, + "num_input_tokens_seen": 75052840, + "step": 111355 + }, + { + "epoch": 2.720543326900056, + "grad_norm": 0.040758680552244186, + "learning_rate": 1.0205845585403978e-06, + "loss": 0.0001, + "num_input_tokens_seen": 75056296, + "step": 111360 + }, + { + "epoch": 2.7206654777319033, + "grad_norm": 0.060084953904151917, + "learning_rate": 1.020499298935178e-06, + "loss": 0.0007, + "num_input_tokens_seen": 75059240, + "step": 111365 + }, + { + "epoch": 2.7207876285637504, + "grad_norm": 0.08063948899507523, + "learning_rate": 1.0204140391808818e-06, + "loss": 0.0009, + "num_input_tokens_seen": 75062760, + "step": 111370 + }, + { + "epoch": 2.7209097793955976, + "grad_norm": 33.327274322509766, + "learning_rate": 1.0203287792781293e-06, + "loss": 0.0915, + "num_input_tokens_seen": 75065832, + "step": 111375 + }, + { + "epoch": 2.721031930227445, + "grad_norm": 0.31778210401535034, + "learning_rate": 1.0202435192275404e-06, + "loss": 0.049, + "num_input_tokens_seen": 75068840, + "step": 111380 + }, + { + "epoch": 2.721154081059292, + "grad_norm": 25.272445678710938, + "learning_rate": 1.0201582590297345e-06, + "loss": 0.0844, + "num_input_tokens_seen": 75071912, + "step": 111385 + }, + { + "epoch": 2.721276231891139, + "grad_norm": 0.030040111392736435, + "learning_rate": 1.0200729986853332e-06, + "loss": 0.0358, + "num_input_tokens_seen": 75075112, + "step": 111390 + }, + { + "epoch": 2.7213983827229864, + "grad_norm": 0.011033943854272366, + "learning_rate": 1.0199877381949552e-06, + "loss": 0.1127, + "num_input_tokens_seen": 75078824, + "step": 111395 + }, + { + "epoch": 2.7215205335548336, + "grad_norm": 0.014756470918655396, + "learning_rate": 1.019902477559221e-06, + "loss": 0.0515, + "num_input_tokens_seen": 75082088, + "step": 111400 + }, + { + "epoch": 2.7216426843866808, + "grad_norm": 1.1041114330291748, + "learning_rate": 1.0198172167787508e-06, + "loss": 0.001, + "num_input_tokens_seen": 75085416, + "step": 111405 + }, + { + "epoch": 2.721764835218528, + "grad_norm": 0.1292664259672165, + "learning_rate": 1.0197319558541645e-06, + "loss": 0.0569, + "num_input_tokens_seen": 75088424, + "step": 111410 + }, + { + "epoch": 2.721886986050375, + "grad_norm": 0.22659969329833984, + "learning_rate": 1.0196466947860819e-06, + "loss": 0.0385, + "num_input_tokens_seen": 75091624, + "step": 111415 + }, + { + "epoch": 2.722009136882222, + "grad_norm": 0.16476193070411682, + "learning_rate": 1.0195614335751234e-06, + "loss": 0.0003, + "num_input_tokens_seen": 75095528, + "step": 111420 + }, + { + "epoch": 2.7221312877140695, + "grad_norm": 0.02349873073399067, + "learning_rate": 1.019476172221909e-06, + "loss": 0.0823, + "num_input_tokens_seen": 75098984, + "step": 111425 + }, + { + "epoch": 2.7222534385459163, + "grad_norm": 0.13670580089092255, + "learning_rate": 1.0193909107270583e-06, + "loss": 0.069, + "num_input_tokens_seen": 75102440, + "step": 111430 + }, + { + "epoch": 2.722375589377764, + "grad_norm": 0.012231250293552876, + "learning_rate": 1.019305649091192e-06, + "loss": 0.0002, + "num_input_tokens_seen": 75105384, + "step": 111435 + }, + { + "epoch": 2.7224977402096107, + "grad_norm": 0.16043393313884735, + "learning_rate": 1.0192203873149299e-06, + "loss": 0.0002, + "num_input_tokens_seen": 75108584, + "step": 111440 + }, + { + "epoch": 2.722619891041458, + "grad_norm": 0.04114136844873428, + "learning_rate": 1.0191351253988915e-06, + "loss": 0.0537, + "num_input_tokens_seen": 75111848, + "step": 111445 + }, + { + "epoch": 2.722742041873305, + "grad_norm": 0.01634533330798149, + "learning_rate": 1.0190498633436976e-06, + "loss": 0.0003, + "num_input_tokens_seen": 75114984, + "step": 111450 + }, + { + "epoch": 2.7228641927051522, + "grad_norm": 526.8831176757812, + "learning_rate": 1.018964601149968e-06, + "loss": 0.036, + "num_input_tokens_seen": 75118376, + "step": 111455 + }, + { + "epoch": 2.7229863435369994, + "grad_norm": 0.10897945612668991, + "learning_rate": 1.0188793388183229e-06, + "loss": 0.0004, + "num_input_tokens_seen": 75121768, + "step": 111460 + }, + { + "epoch": 2.7231084943688466, + "grad_norm": 0.11047311872243881, + "learning_rate": 1.0187940763493818e-06, + "loss": 0.0001, + "num_input_tokens_seen": 75125288, + "step": 111465 + }, + { + "epoch": 2.723230645200694, + "grad_norm": 0.4363507330417633, + "learning_rate": 1.0187088137437652e-06, + "loss": 0.0005, + "num_input_tokens_seen": 75128808, + "step": 111470 + }, + { + "epoch": 2.723352796032541, + "grad_norm": 0.07578103244304657, + "learning_rate": 1.0186235510020933e-06, + "loss": 0.0002, + "num_input_tokens_seen": 75132136, + "step": 111475 + }, + { + "epoch": 2.723474946864388, + "grad_norm": 0.014645537361502647, + "learning_rate": 1.0185382881249857e-06, + "loss": 0.0686, + "num_input_tokens_seen": 75135656, + "step": 111480 + }, + { + "epoch": 2.7235970976962354, + "grad_norm": 32.36290740966797, + "learning_rate": 1.0184530251130628e-06, + "loss": 0.0898, + "num_input_tokens_seen": 75139240, + "step": 111485 + }, + { + "epoch": 2.7237192485280826, + "grad_norm": 0.09573503583669662, + "learning_rate": 1.0183677619669446e-06, + "loss": 0.0002, + "num_input_tokens_seen": 75142568, + "step": 111490 + }, + { + "epoch": 2.7238413993599297, + "grad_norm": 0.11125845462083817, + "learning_rate": 1.0182824986872509e-06, + "loss": 0.0829, + "num_input_tokens_seen": 75145960, + "step": 111495 + }, + { + "epoch": 2.723963550191777, + "grad_norm": 13.607086181640625, + "learning_rate": 1.0181972352746022e-06, + "loss": 0.1665, + "num_input_tokens_seen": 75149160, + "step": 111500 + }, + { + "epoch": 2.724085701023624, + "grad_norm": 0.5846430063247681, + "learning_rate": 1.018111971729618e-06, + "loss": 0.0468, + "num_input_tokens_seen": 75152232, + "step": 111505 + }, + { + "epoch": 2.7242078518554713, + "grad_norm": 0.24971908330917358, + "learning_rate": 1.0180267080529187e-06, + "loss": 0.0844, + "num_input_tokens_seen": 75155240, + "step": 111510 + }, + { + "epoch": 2.724330002687318, + "grad_norm": 0.19749361276626587, + "learning_rate": 1.0179414442451244e-06, + "loss": 0.0002, + "num_input_tokens_seen": 75158376, + "step": 111515 + }, + { + "epoch": 2.7244521535191657, + "grad_norm": 0.09108477085828781, + "learning_rate": 1.0178561803068554e-06, + "loss": 0.0005, + "num_input_tokens_seen": 75161512, + "step": 111520 + }, + { + "epoch": 2.7245743043510124, + "grad_norm": 0.1065296083688736, + "learning_rate": 1.0177709162387311e-06, + "loss": 0.1018, + "num_input_tokens_seen": 75165096, + "step": 111525 + }, + { + "epoch": 2.7246964551828596, + "grad_norm": 0.012275001965463161, + "learning_rate": 1.0176856520413723e-06, + "loss": 0.1232, + "num_input_tokens_seen": 75168680, + "step": 111530 + }, + { + "epoch": 2.724818606014707, + "grad_norm": 1.0489786863327026, + "learning_rate": 1.0176003877153986e-06, + "loss": 0.0565, + "num_input_tokens_seen": 75172072, + "step": 111535 + }, + { + "epoch": 2.724940756846554, + "grad_norm": 0.12052971124649048, + "learning_rate": 1.0175151232614296e-06, + "loss": 0.0412, + "num_input_tokens_seen": 75175656, + "step": 111540 + }, + { + "epoch": 2.725062907678401, + "grad_norm": 0.078212209045887, + "learning_rate": 1.0174298586800862e-06, + "loss": 0.0868, + "num_input_tokens_seen": 75179560, + "step": 111545 + }, + { + "epoch": 2.7251850585102484, + "grad_norm": 13.409255981445312, + "learning_rate": 1.0173445939719882e-06, + "loss": 0.0638, + "num_input_tokens_seen": 75183016, + "step": 111550 + }, + { + "epoch": 2.7253072093420956, + "grad_norm": 0.19932065904140472, + "learning_rate": 1.0172593291377559e-06, + "loss": 0.0006, + "num_input_tokens_seen": 75186664, + "step": 111555 + }, + { + "epoch": 2.7254293601739428, + "grad_norm": 11.157979011535645, + "learning_rate": 1.017174064178009e-06, + "loss": 0.042, + "num_input_tokens_seen": 75189736, + "step": 111560 + }, + { + "epoch": 2.72555151100579, + "grad_norm": 16.223159790039062, + "learning_rate": 1.0170887990933675e-06, + "loss": 0.0351, + "num_input_tokens_seen": 75193000, + "step": 111565 + }, + { + "epoch": 2.725673661837637, + "grad_norm": 0.085118867456913, + "learning_rate": 1.0170035338844514e-06, + "loss": 0.0005, + "num_input_tokens_seen": 75196520, + "step": 111570 + }, + { + "epoch": 2.7257958126694843, + "grad_norm": 0.029599210247397423, + "learning_rate": 1.0169182685518817e-06, + "loss": 0.0004, + "num_input_tokens_seen": 75199976, + "step": 111575 + }, + { + "epoch": 2.7259179635013315, + "grad_norm": 0.7240051627159119, + "learning_rate": 1.0168330030962775e-06, + "loss": 0.029, + "num_input_tokens_seen": 75203176, + "step": 111580 + }, + { + "epoch": 2.7260401143331787, + "grad_norm": 0.00847703404724598, + "learning_rate": 1.0167477375182592e-06, + "loss": 0.0004, + "num_input_tokens_seen": 75206632, + "step": 111585 + }, + { + "epoch": 2.726162265165026, + "grad_norm": 15.220355033874512, + "learning_rate": 1.0166624718184467e-06, + "loss": 0.0851, + "num_input_tokens_seen": 75209832, + "step": 111590 + }, + { + "epoch": 2.726284415996873, + "grad_norm": 0.044437870383262634, + "learning_rate": 1.0165772059974604e-06, + "loss": 0.0901, + "num_input_tokens_seen": 75213032, + "step": 111595 + }, + { + "epoch": 2.72640656682872, + "grad_norm": 0.021976593881845474, + "learning_rate": 1.0164919400559202e-06, + "loss": 0.0003, + "num_input_tokens_seen": 75216232, + "step": 111600 + }, + { + "epoch": 2.7265287176605675, + "grad_norm": 0.003190957475453615, + "learning_rate": 1.016406673994446e-06, + "loss": 0.0001, + "num_input_tokens_seen": 75219560, + "step": 111605 + }, + { + "epoch": 2.7266508684924142, + "grad_norm": 0.08274943381547928, + "learning_rate": 1.016321407813658e-06, + "loss": 0.0337, + "num_input_tokens_seen": 75223720, + "step": 111610 + }, + { + "epoch": 2.726773019324262, + "grad_norm": 0.05247628316283226, + "learning_rate": 1.0162361415141766e-06, + "loss": 0.0276, + "num_input_tokens_seen": 75227560, + "step": 111615 + }, + { + "epoch": 2.7268951701561086, + "grad_norm": 0.09594687819480896, + "learning_rate": 1.0161508750966214e-06, + "loss": 0.044, + "num_input_tokens_seen": 75230888, + "step": 111620 + }, + { + "epoch": 2.727017320987956, + "grad_norm": 0.02336997166275978, + "learning_rate": 1.0160656085616128e-06, + "loss": 0.0003, + "num_input_tokens_seen": 75234216, + "step": 111625 + }, + { + "epoch": 2.727139471819803, + "grad_norm": 310.22930908203125, + "learning_rate": 1.0159803419097708e-06, + "loss": 0.0228, + "num_input_tokens_seen": 75237608, + "step": 111630 + }, + { + "epoch": 2.72726162265165, + "grad_norm": 0.20268277823925018, + "learning_rate": 1.0158950751417155e-06, + "loss": 0.0006, + "num_input_tokens_seen": 75240488, + "step": 111635 + }, + { + "epoch": 2.7273837734834974, + "grad_norm": 0.039220090955495834, + "learning_rate": 1.0158098082580669e-06, + "loss": 0.0002, + "num_input_tokens_seen": 75243816, + "step": 111640 + }, + { + "epoch": 2.7275059243153446, + "grad_norm": 16.948116302490234, + "learning_rate": 1.015724541259445e-06, + "loss": 0.0294, + "num_input_tokens_seen": 75246952, + "step": 111645 + }, + { + "epoch": 2.7276280751471917, + "grad_norm": 0.030434172600507736, + "learning_rate": 1.01563927414647e-06, + "loss": 0.0006, + "num_input_tokens_seen": 75250216, + "step": 111650 + }, + { + "epoch": 2.727750225979039, + "grad_norm": 55.45297622680664, + "learning_rate": 1.0155540069197623e-06, + "loss": 0.0515, + "num_input_tokens_seen": 75253480, + "step": 111655 + }, + { + "epoch": 2.727872376810886, + "grad_norm": 0.044720862060785294, + "learning_rate": 1.0154687395799415e-06, + "loss": 0.0007, + "num_input_tokens_seen": 75256936, + "step": 111660 + }, + { + "epoch": 2.7279945276427333, + "grad_norm": 0.01326954085379839, + "learning_rate": 1.0153834721276276e-06, + "loss": 0.0002, + "num_input_tokens_seen": 75260392, + "step": 111665 + }, + { + "epoch": 2.7281166784745805, + "grad_norm": 0.005129373632371426, + "learning_rate": 1.0152982045634411e-06, + "loss": 0.054, + "num_input_tokens_seen": 75263784, + "step": 111670 + }, + { + "epoch": 2.7282388293064277, + "grad_norm": 0.0035448065027594566, + "learning_rate": 1.015212936888002e-06, + "loss": 0.0001, + "num_input_tokens_seen": 75266664, + "step": 111675 + }, + { + "epoch": 2.728360980138275, + "grad_norm": 0.12941040098667145, + "learning_rate": 1.0151276691019304e-06, + "loss": 0.0001, + "num_input_tokens_seen": 75270504, + "step": 111680 + }, + { + "epoch": 2.728483130970122, + "grad_norm": 0.006792883854359388, + "learning_rate": 1.0150424012058466e-06, + "loss": 0.1038, + "num_input_tokens_seen": 75274280, + "step": 111685 + }, + { + "epoch": 2.7286052818019693, + "grad_norm": 0.019393935799598694, + "learning_rate": 1.0149571332003702e-06, + "loss": 0.001, + "num_input_tokens_seen": 75277672, + "step": 111690 + }, + { + "epoch": 2.728727432633816, + "grad_norm": 0.04781755059957504, + "learning_rate": 1.014871865086121e-06, + "loss": 0.068, + "num_input_tokens_seen": 75281000, + "step": 111695 + }, + { + "epoch": 2.7288495834656636, + "grad_norm": 0.06856974959373474, + "learning_rate": 1.01478659686372e-06, + "loss": 0.0001, + "num_input_tokens_seen": 75284712, + "step": 111700 + }, + { + "epoch": 2.7289717342975104, + "grad_norm": 0.12601056694984436, + "learning_rate": 1.0147013285337868e-06, + "loss": 0.0002, + "num_input_tokens_seen": 75288168, + "step": 111705 + }, + { + "epoch": 2.7290938851293576, + "grad_norm": 0.5254818797111511, + "learning_rate": 1.0146160600969419e-06, + "loss": 0.0541, + "num_input_tokens_seen": 75291432, + "step": 111710 + }, + { + "epoch": 2.7292160359612048, + "grad_norm": 0.018003568053245544, + "learning_rate": 1.0145307915538047e-06, + "loss": 0.0503, + "num_input_tokens_seen": 75294952, + "step": 111715 + }, + { + "epoch": 2.729338186793052, + "grad_norm": 0.015305710025131702, + "learning_rate": 1.014445522904996e-06, + "loss": 0.002, + "num_input_tokens_seen": 75298024, + "step": 111720 + }, + { + "epoch": 2.729460337624899, + "grad_norm": 0.033240336924791336, + "learning_rate": 1.014360254151135e-06, + "loss": 0.0644, + "num_input_tokens_seen": 75301544, + "step": 111725 + }, + { + "epoch": 2.7295824884567463, + "grad_norm": 0.017280491068959236, + "learning_rate": 1.014274985292843e-06, + "loss": 0.0001, + "num_input_tokens_seen": 75305128, + "step": 111730 + }, + { + "epoch": 2.7297046392885935, + "grad_norm": 0.0738111361861229, + "learning_rate": 1.0141897163307394e-06, + "loss": 0.0358, + "num_input_tokens_seen": 75308520, + "step": 111735 + }, + { + "epoch": 2.7298267901204407, + "grad_norm": 0.0017817127518355846, + "learning_rate": 1.0141044472654441e-06, + "loss": 0.0006, + "num_input_tokens_seen": 75311656, + "step": 111740 + }, + { + "epoch": 2.729948940952288, + "grad_norm": 0.43171536922454834, + "learning_rate": 1.0140191780975776e-06, + "loss": 0.0003, + "num_input_tokens_seen": 75314920, + "step": 111745 + }, + { + "epoch": 2.730071091784135, + "grad_norm": 0.5606056451797485, + "learning_rate": 1.0139339088277599e-06, + "loss": 0.0012, + "num_input_tokens_seen": 75318504, + "step": 111750 + }, + { + "epoch": 2.7301932426159823, + "grad_norm": 19.605661392211914, + "learning_rate": 1.013848639456611e-06, + "loss": 0.0693, + "num_input_tokens_seen": 75321640, + "step": 111755 + }, + { + "epoch": 2.7303153934478295, + "grad_norm": 50.48213195800781, + "learning_rate": 1.0137633699847507e-06, + "loss": 0.0515, + "num_input_tokens_seen": 75325288, + "step": 111760 + }, + { + "epoch": 2.7304375442796767, + "grad_norm": 0.007247891277074814, + "learning_rate": 1.0136781004128e-06, + "loss": 0.0002, + "num_input_tokens_seen": 75328808, + "step": 111765 + }, + { + "epoch": 2.730559695111524, + "grad_norm": 21.53213882446289, + "learning_rate": 1.0135928307413785e-06, + "loss": 0.0646, + "num_input_tokens_seen": 75332392, + "step": 111770 + }, + { + "epoch": 2.730681845943371, + "grad_norm": 0.0011175754480063915, + "learning_rate": 1.013507560971106e-06, + "loss": 0.0001, + "num_input_tokens_seen": 75336104, + "step": 111775 + }, + { + "epoch": 2.730803996775218, + "grad_norm": 0.04286136478185654, + "learning_rate": 1.013422291102603e-06, + "loss": 0.0465, + "num_input_tokens_seen": 75339368, + "step": 111780 + }, + { + "epoch": 2.7309261476070654, + "grad_norm": 0.474054753780365, + "learning_rate": 1.0133370211364892e-06, + "loss": 0.044, + "num_input_tokens_seen": 75342568, + "step": 111785 + }, + { + "epoch": 2.731048298438912, + "grad_norm": 4.169500350952148, + "learning_rate": 1.0132517510733853e-06, + "loss": 0.0007, + "num_input_tokens_seen": 75345704, + "step": 111790 + }, + { + "epoch": 2.73117044927076, + "grad_norm": 0.29191893339157104, + "learning_rate": 1.0131664809139111e-06, + "loss": 0.1245, + "num_input_tokens_seen": 75349288, + "step": 111795 + }, + { + "epoch": 2.7312926001026065, + "grad_norm": 0.093380406498909, + "learning_rate": 1.0130812106586868e-06, + "loss": 0.0002, + "num_input_tokens_seen": 75352552, + "step": 111800 + }, + { + "epoch": 2.7314147509344537, + "grad_norm": 0.030852509662508965, + "learning_rate": 1.012995940308332e-06, + "loss": 0.0465, + "num_input_tokens_seen": 75355816, + "step": 111805 + }, + { + "epoch": 2.731536901766301, + "grad_norm": 0.010006435215473175, + "learning_rate": 1.0129106698634676e-06, + "loss": 0.0003, + "num_input_tokens_seen": 75359016, + "step": 111810 + }, + { + "epoch": 2.731659052598148, + "grad_norm": 30.960412979125977, + "learning_rate": 1.0128253993247132e-06, + "loss": 0.0834, + "num_input_tokens_seen": 75362216, + "step": 111815 + }, + { + "epoch": 2.7317812034299953, + "grad_norm": 0.02463303506374359, + "learning_rate": 1.012740128692689e-06, + "loss": 0.0003, + "num_input_tokens_seen": 75365672, + "step": 111820 + }, + { + "epoch": 2.7319033542618425, + "grad_norm": 0.014886599965393543, + "learning_rate": 1.0126548579680154e-06, + "loss": 0.1318, + "num_input_tokens_seen": 75368936, + "step": 111825 + }, + { + "epoch": 2.7320255050936897, + "grad_norm": 0.09876159578561783, + "learning_rate": 1.012569587151312e-06, + "loss": 0.0329, + "num_input_tokens_seen": 75372328, + "step": 111830 + }, + { + "epoch": 2.732147655925537, + "grad_norm": 0.0017728406237438321, + "learning_rate": 1.0124843162431994e-06, + "loss": 0.0027, + "num_input_tokens_seen": 75375528, + "step": 111835 + }, + { + "epoch": 2.732269806757384, + "grad_norm": 0.006446607410907745, + "learning_rate": 1.0123990452442977e-06, + "loss": 0.0477, + "num_input_tokens_seen": 75378472, + "step": 111840 + }, + { + "epoch": 2.7323919575892313, + "grad_norm": 14.4539213180542, + "learning_rate": 1.0123137741552264e-06, + "loss": 0.0479, + "num_input_tokens_seen": 75381736, + "step": 111845 + }, + { + "epoch": 2.7325141084210784, + "grad_norm": 0.04016166925430298, + "learning_rate": 1.012228502976606e-06, + "loss": 0.0001, + "num_input_tokens_seen": 75385128, + "step": 111850 + }, + { + "epoch": 2.7326362592529256, + "grad_norm": 0.008136886171996593, + "learning_rate": 1.0121432317090568e-06, + "loss": 0.0001, + "num_input_tokens_seen": 75388264, + "step": 111855 + }, + { + "epoch": 2.732758410084773, + "grad_norm": 0.027124160900712013, + "learning_rate": 1.0120579603531987e-06, + "loss": 0.0334, + "num_input_tokens_seen": 75391912, + "step": 111860 + }, + { + "epoch": 2.7328805609166196, + "grad_norm": 0.07476188987493515, + "learning_rate": 1.0119726889096518e-06, + "loss": 0.0002, + "num_input_tokens_seen": 75395304, + "step": 111865 + }, + { + "epoch": 2.733002711748467, + "grad_norm": 0.009432375431060791, + "learning_rate": 1.0118874173790364e-06, + "loss": 0.0001, + "num_input_tokens_seen": 75398440, + "step": 111870 + }, + { + "epoch": 2.733124862580314, + "grad_norm": 0.06053454801440239, + "learning_rate": 1.0118021457619725e-06, + "loss": 0.0004, + "num_input_tokens_seen": 75401896, + "step": 111875 + }, + { + "epoch": 2.7332470134121616, + "grad_norm": 32.21339797973633, + "learning_rate": 1.01171687405908e-06, + "loss": 0.1077, + "num_input_tokens_seen": 75405096, + "step": 111880 + }, + { + "epoch": 2.7333691642440083, + "grad_norm": 0.004754332359880209, + "learning_rate": 1.0116316022709794e-06, + "loss": 0.0356, + "num_input_tokens_seen": 75408936, + "step": 111885 + }, + { + "epoch": 2.7334913150758555, + "grad_norm": 0.40759822726249695, + "learning_rate": 1.0115463303982909e-06, + "loss": 0.0008, + "num_input_tokens_seen": 75412328, + "step": 111890 + }, + { + "epoch": 2.7336134659077027, + "grad_norm": 5.361917018890381, + "learning_rate": 1.0114610584416342e-06, + "loss": 0.0011, + "num_input_tokens_seen": 75415784, + "step": 111895 + }, + { + "epoch": 2.73373561673955, + "grad_norm": 0.07207702100276947, + "learning_rate": 1.0113757864016298e-06, + "loss": 0.0411, + "num_input_tokens_seen": 75419304, + "step": 111900 + }, + { + "epoch": 2.733857767571397, + "grad_norm": 0.059938084334135056, + "learning_rate": 1.0112905142788973e-06, + "loss": 0.0423, + "num_input_tokens_seen": 75422248, + "step": 111905 + }, + { + "epoch": 2.7339799184032443, + "grad_norm": 0.2088875025510788, + "learning_rate": 1.0112052420740573e-06, + "loss": 0.0229, + "num_input_tokens_seen": 75425384, + "step": 111910 + }, + { + "epoch": 2.7341020692350915, + "grad_norm": 0.008488044142723083, + "learning_rate": 1.0111199697877295e-06, + "loss": 0.1241, + "num_input_tokens_seen": 75428456, + "step": 111915 + }, + { + "epoch": 2.7342242200669387, + "grad_norm": 0.06650572270154953, + "learning_rate": 1.0110346974205344e-06, + "loss": 0.0039, + "num_input_tokens_seen": 75432424, + "step": 111920 + }, + { + "epoch": 2.734346370898786, + "grad_norm": 0.027216732501983643, + "learning_rate": 1.010949424973092e-06, + "loss": 0.0002, + "num_input_tokens_seen": 75435880, + "step": 111925 + }, + { + "epoch": 2.734468521730633, + "grad_norm": 0.2589007318019867, + "learning_rate": 1.0108641524460227e-06, + "loss": 0.0335, + "num_input_tokens_seen": 75439528, + "step": 111930 + }, + { + "epoch": 2.7345906725624802, + "grad_norm": 0.9645323753356934, + "learning_rate": 1.010778879839946e-06, + "loss": 0.0004, + "num_input_tokens_seen": 75442792, + "step": 111935 + }, + { + "epoch": 2.7347128233943274, + "grad_norm": 0.36214014887809753, + "learning_rate": 1.0106936071554828e-06, + "loss": 0.0001, + "num_input_tokens_seen": 75446760, + "step": 111940 + }, + { + "epoch": 2.7348349742261746, + "grad_norm": 55.24829864501953, + "learning_rate": 1.0106083343932527e-06, + "loss": 0.0312, + "num_input_tokens_seen": 75450536, + "step": 111945 + }, + { + "epoch": 2.734957125058022, + "grad_norm": 3.9883968830108643, + "learning_rate": 1.0105230615538757e-06, + "loss": 0.0496, + "num_input_tokens_seen": 75454376, + "step": 111950 + }, + { + "epoch": 2.735079275889869, + "grad_norm": 0.030904987826943398, + "learning_rate": 1.0104377886379725e-06, + "loss": 0.0975, + "num_input_tokens_seen": 75457832, + "step": 111955 + }, + { + "epoch": 2.7352014267217157, + "grad_norm": 0.000735341280233115, + "learning_rate": 1.0103525156461628e-06, + "loss": 0.0478, + "num_input_tokens_seen": 75461800, + "step": 111960 + }, + { + "epoch": 2.7353235775535634, + "grad_norm": 0.178748220205307, + "learning_rate": 1.0102672425790665e-06, + "loss": 0.0003, + "num_input_tokens_seen": 75464936, + "step": 111965 + }, + { + "epoch": 2.73544572838541, + "grad_norm": 0.006740411277860403, + "learning_rate": 1.0101819694373045e-06, + "loss": 0.0001, + "num_input_tokens_seen": 75468456, + "step": 111970 + }, + { + "epoch": 2.7355678792172573, + "grad_norm": 0.032522208988666534, + "learning_rate": 1.0100966962214959e-06, + "loss": 0.0001, + "num_input_tokens_seen": 75472040, + "step": 111975 + }, + { + "epoch": 2.7356900300491045, + "grad_norm": 0.0018017988186329603, + "learning_rate": 1.0100114229322618e-06, + "loss": 0.176, + "num_input_tokens_seen": 75475560, + "step": 111980 + }, + { + "epoch": 2.7358121808809517, + "grad_norm": 1.2526220083236694, + "learning_rate": 1.009926149570222e-06, + "loss": 0.0004, + "num_input_tokens_seen": 75478952, + "step": 111985 + }, + { + "epoch": 2.735934331712799, + "grad_norm": 0.01706717163324356, + "learning_rate": 1.0098408761359965e-06, + "loss": 0.0003, + "num_input_tokens_seen": 75482088, + "step": 111990 + }, + { + "epoch": 2.736056482544646, + "grad_norm": 0.1926831156015396, + "learning_rate": 1.0097556026302056e-06, + "loss": 0.0492, + "num_input_tokens_seen": 75485160, + "step": 111995 + }, + { + "epoch": 2.7361786333764933, + "grad_norm": 0.05004437267780304, + "learning_rate": 1.0096703290534693e-06, + "loss": 0.0853, + "num_input_tokens_seen": 75488232, + "step": 112000 + }, + { + "epoch": 2.7363007842083404, + "grad_norm": 0.008143107406795025, + "learning_rate": 1.0095850554064074e-06, + "loss": 0.0004, + "num_input_tokens_seen": 75491112, + "step": 112005 + }, + { + "epoch": 2.7364229350401876, + "grad_norm": 0.4659461975097656, + "learning_rate": 1.0094997816896407e-06, + "loss": 0.0004, + "num_input_tokens_seen": 75494120, + "step": 112010 + }, + { + "epoch": 2.736545085872035, + "grad_norm": 0.030890757218003273, + "learning_rate": 1.009414507903789e-06, + "loss": 0.0351, + "num_input_tokens_seen": 75497192, + "step": 112015 + }, + { + "epoch": 2.736667236703882, + "grad_norm": 0.056731339544057846, + "learning_rate": 1.0093292340494726e-06, + "loss": 0.0548, + "num_input_tokens_seen": 75500840, + "step": 112020 + }, + { + "epoch": 2.736789387535729, + "grad_norm": 0.0058851963840425014, + "learning_rate": 1.0092439601273112e-06, + "loss": 0.0271, + "num_input_tokens_seen": 75504168, + "step": 112025 + }, + { + "epoch": 2.7369115383675764, + "grad_norm": 0.0594380646944046, + "learning_rate": 1.0091586861379256e-06, + "loss": 0.0604, + "num_input_tokens_seen": 75507432, + "step": 112030 + }, + { + "epoch": 2.7370336891994236, + "grad_norm": 0.12918642163276672, + "learning_rate": 1.0090734120819353e-06, + "loss": 0.0002, + "num_input_tokens_seen": 75510504, + "step": 112035 + }, + { + "epoch": 2.7371558400312708, + "grad_norm": 0.012315675616264343, + "learning_rate": 1.0089881379599605e-06, + "loss": 0.115, + "num_input_tokens_seen": 75513576, + "step": 112040 + }, + { + "epoch": 2.7372779908631175, + "grad_norm": 0.22571399807929993, + "learning_rate": 1.0089028637726223e-06, + "loss": 0.0002, + "num_input_tokens_seen": 75517160, + "step": 112045 + }, + { + "epoch": 2.737400141694965, + "grad_norm": 70.73702239990234, + "learning_rate": 1.0088175895205396e-06, + "loss": 0.1897, + "num_input_tokens_seen": 75520744, + "step": 112050 + }, + { + "epoch": 2.737522292526812, + "grad_norm": 0.03366973623633385, + "learning_rate": 1.008732315204333e-06, + "loss": 0.0008, + "num_input_tokens_seen": 75523944, + "step": 112055 + }, + { + "epoch": 2.7376444433586595, + "grad_norm": 0.002571349497884512, + "learning_rate": 1.0086470408246225e-06, + "loss": 0.0002, + "num_input_tokens_seen": 75527848, + "step": 112060 + }, + { + "epoch": 2.7377665941905063, + "grad_norm": 284.5186462402344, + "learning_rate": 1.0085617663820288e-06, + "loss": 0.0832, + "num_input_tokens_seen": 75530856, + "step": 112065 + }, + { + "epoch": 2.7378887450223535, + "grad_norm": 0.15322719514369965, + "learning_rate": 1.0084764918771711e-06, + "loss": 0.0334, + "num_input_tokens_seen": 75534504, + "step": 112070 + }, + { + "epoch": 2.7380108958542007, + "grad_norm": 0.09069032222032547, + "learning_rate": 1.0083912173106703e-06, + "loss": 0.0002, + "num_input_tokens_seen": 75537704, + "step": 112075 + }, + { + "epoch": 2.738133046686048, + "grad_norm": 0.2874353229999542, + "learning_rate": 1.0083059426831466e-06, + "loss": 0.0529, + "num_input_tokens_seen": 75540712, + "step": 112080 + }, + { + "epoch": 2.738255197517895, + "grad_norm": 0.02338283136487007, + "learning_rate": 1.0082206679952197e-06, + "loss": 0.0003, + "num_input_tokens_seen": 75543912, + "step": 112085 + }, + { + "epoch": 2.7383773483497422, + "grad_norm": 43.06546401977539, + "learning_rate": 1.00813539324751e-06, + "loss": 0.077, + "num_input_tokens_seen": 75547560, + "step": 112090 + }, + { + "epoch": 2.7384994991815894, + "grad_norm": 0.15701234340667725, + "learning_rate": 1.0080501184406372e-06, + "loss": 0.0003, + "num_input_tokens_seen": 75550632, + "step": 112095 + }, + { + "epoch": 2.7386216500134366, + "grad_norm": 0.10237427800893784, + "learning_rate": 1.007964843575222e-06, + "loss": 0.0004, + "num_input_tokens_seen": 75554088, + "step": 112100 + }, + { + "epoch": 2.738743800845284, + "grad_norm": 0.02234814129769802, + "learning_rate": 1.007879568651884e-06, + "loss": 0.0002, + "num_input_tokens_seen": 75557480, + "step": 112105 + }, + { + "epoch": 2.738865951677131, + "grad_norm": 56.13254165649414, + "learning_rate": 1.007794293671244e-06, + "loss": 0.1363, + "num_input_tokens_seen": 75561320, + "step": 112110 + }, + { + "epoch": 2.738988102508978, + "grad_norm": 21.933855056762695, + "learning_rate": 1.0077090186339218e-06, + "loss": 0.0579, + "num_input_tokens_seen": 75564648, + "step": 112115 + }, + { + "epoch": 2.7391102533408254, + "grad_norm": 0.01483561284840107, + "learning_rate": 1.0076237435405374e-06, + "loss": 0.0003, + "num_input_tokens_seen": 75567912, + "step": 112120 + }, + { + "epoch": 2.7392324041726726, + "grad_norm": 160.25828552246094, + "learning_rate": 1.0075384683917111e-06, + "loss": 0.0162, + "num_input_tokens_seen": 75571240, + "step": 112125 + }, + { + "epoch": 2.7393545550045197, + "grad_norm": 0.40404099225997925, + "learning_rate": 1.007453193188063e-06, + "loss": 0.0249, + "num_input_tokens_seen": 75574696, + "step": 112130 + }, + { + "epoch": 2.739476705836367, + "grad_norm": 0.020996518433094025, + "learning_rate": 1.0073679179302133e-06, + "loss": 0.1026, + "num_input_tokens_seen": 75577832, + "step": 112135 + }, + { + "epoch": 2.7395988566682137, + "grad_norm": 0.008093031123280525, + "learning_rate": 1.0072826426187821e-06, + "loss": 0.0002, + "num_input_tokens_seen": 75581224, + "step": 112140 + }, + { + "epoch": 2.7397210075000613, + "grad_norm": 0.004957599099725485, + "learning_rate": 1.0071973672543898e-06, + "loss": 0.0561, + "num_input_tokens_seen": 75584744, + "step": 112145 + }, + { + "epoch": 2.739843158331908, + "grad_norm": 0.018184345215559006, + "learning_rate": 1.0071120918376563e-06, + "loss": 0.0002, + "num_input_tokens_seen": 75588200, + "step": 112150 + }, + { + "epoch": 2.7399653091637552, + "grad_norm": 0.005898487288504839, + "learning_rate": 1.0070268163692017e-06, + "loss": 0.0543, + "num_input_tokens_seen": 75591208, + "step": 112155 + }, + { + "epoch": 2.7400874599956024, + "grad_norm": 38.10469055175781, + "learning_rate": 1.0069415408496458e-06, + "loss": 0.0554, + "num_input_tokens_seen": 75594408, + "step": 112160 + }, + { + "epoch": 2.7402096108274496, + "grad_norm": 0.009115857072174549, + "learning_rate": 1.0068562652796095e-06, + "loss": 0.0006, + "num_input_tokens_seen": 75598120, + "step": 112165 + }, + { + "epoch": 2.740331761659297, + "grad_norm": 0.20606650412082672, + "learning_rate": 1.0067709896597126e-06, + "loss": 0.0003, + "num_input_tokens_seen": 75601192, + "step": 112170 + }, + { + "epoch": 2.740453912491144, + "grad_norm": 0.28834593296051025, + "learning_rate": 1.0066857139905752e-06, + "loss": 0.0003, + "num_input_tokens_seen": 75604328, + "step": 112175 + }, + { + "epoch": 2.740576063322991, + "grad_norm": 0.008773494511842728, + "learning_rate": 1.0066004382728176e-06, + "loss": 0.048, + "num_input_tokens_seen": 75607784, + "step": 112180 + }, + { + "epoch": 2.7406982141548384, + "grad_norm": 0.037397127598524094, + "learning_rate": 1.0065151625070595e-06, + "loss": 0.0001, + "num_input_tokens_seen": 75610984, + "step": 112185 + }, + { + "epoch": 2.7408203649866856, + "grad_norm": 0.10962966829538345, + "learning_rate": 1.0064298866939216e-06, + "loss": 0.0001, + "num_input_tokens_seen": 75614632, + "step": 112190 + }, + { + "epoch": 2.7409425158185328, + "grad_norm": 358.03375244140625, + "learning_rate": 1.0063446108340236e-06, + "loss": 0.0069, + "num_input_tokens_seen": 75617960, + "step": 112195 + }, + { + "epoch": 2.74106466665038, + "grad_norm": 0.04087607190012932, + "learning_rate": 1.0062593349279865e-06, + "loss": 0.0001, + "num_input_tokens_seen": 75621672, + "step": 112200 + }, + { + "epoch": 2.741186817482227, + "grad_norm": 1.1919631958007812, + "learning_rate": 1.0061740589764294e-06, + "loss": 0.0379, + "num_input_tokens_seen": 75625000, + "step": 112205 + }, + { + "epoch": 2.7413089683140743, + "grad_norm": 0.009299659170210361, + "learning_rate": 1.0060887829799728e-06, + "loss": 0.0185, + "num_input_tokens_seen": 75628392, + "step": 112210 + }, + { + "epoch": 2.7414311191459215, + "grad_norm": 0.003497474826872349, + "learning_rate": 1.0060035069392371e-06, + "loss": 0.0003, + "num_input_tokens_seen": 75631720, + "step": 112215 + }, + { + "epoch": 2.7415532699777687, + "grad_norm": 0.0010874419240280986, + "learning_rate": 1.0059182308548424e-06, + "loss": 0.0847, + "num_input_tokens_seen": 75635432, + "step": 112220 + }, + { + "epoch": 2.7416754208096155, + "grad_norm": 0.025534670799970627, + "learning_rate": 1.0058329547274083e-06, + "loss": 0.0001, + "num_input_tokens_seen": 75638824, + "step": 112225 + }, + { + "epoch": 2.741797571641463, + "grad_norm": 0.06358666718006134, + "learning_rate": 1.0057476785575555e-06, + "loss": 0.0539, + "num_input_tokens_seen": 75642472, + "step": 112230 + }, + { + "epoch": 2.74191972247331, + "grad_norm": 0.01722525805234909, + "learning_rate": 1.0056624023459045e-06, + "loss": 0.0001, + "num_input_tokens_seen": 75646504, + "step": 112235 + }, + { + "epoch": 2.7420418733051575, + "grad_norm": 0.5232629179954529, + "learning_rate": 1.0055771260930745e-06, + "loss": 0.0002, + "num_input_tokens_seen": 75650152, + "step": 112240 + }, + { + "epoch": 2.742164024137004, + "grad_norm": 0.512911319732666, + "learning_rate": 1.0054918497996865e-06, + "loss": 0.1006, + "num_input_tokens_seen": 75653288, + "step": 112245 + }, + { + "epoch": 2.7422861749688514, + "grad_norm": 0.0002688757376745343, + "learning_rate": 1.00540657346636e-06, + "loss": 0.0779, + "num_input_tokens_seen": 75656616, + "step": 112250 + }, + { + "epoch": 2.7424083258006986, + "grad_norm": 0.016228975728154182, + "learning_rate": 1.0053212970937157e-06, + "loss": 0.0001, + "num_input_tokens_seen": 75659944, + "step": 112255 + }, + { + "epoch": 2.742530476632546, + "grad_norm": 6.871138095855713, + "learning_rate": 1.0052360206823733e-06, + "loss": 0.0383, + "num_input_tokens_seen": 75663016, + "step": 112260 + }, + { + "epoch": 2.742652627464393, + "grad_norm": 0.07361658662557602, + "learning_rate": 1.0051507442329533e-06, + "loss": 0.0728, + "num_input_tokens_seen": 75666472, + "step": 112265 + }, + { + "epoch": 2.74277477829624, + "grad_norm": 39.02785110473633, + "learning_rate": 1.0050654677460754e-06, + "loss": 0.1776, + "num_input_tokens_seen": 75669672, + "step": 112270 + }, + { + "epoch": 2.7428969291280874, + "grad_norm": 0.022098205983638763, + "learning_rate": 1.0049801912223603e-06, + "loss": 0.0558, + "num_input_tokens_seen": 75672744, + "step": 112275 + }, + { + "epoch": 2.7430190799599345, + "grad_norm": 0.07683975249528885, + "learning_rate": 1.004894914662428e-06, + "loss": 0.1883, + "num_input_tokens_seen": 75676072, + "step": 112280 + }, + { + "epoch": 2.7431412307917817, + "grad_norm": 0.02362046204507351, + "learning_rate": 1.0048096380668982e-06, + "loss": 0.1013, + "num_input_tokens_seen": 75679208, + "step": 112285 + }, + { + "epoch": 2.743263381623629, + "grad_norm": 0.04820949584245682, + "learning_rate": 1.0047243614363916e-06, + "loss": 0.0001, + "num_input_tokens_seen": 75682984, + "step": 112290 + }, + { + "epoch": 2.743385532455476, + "grad_norm": 23.516231536865234, + "learning_rate": 1.0046390847715282e-06, + "loss": 0.0477, + "num_input_tokens_seen": 75686568, + "step": 112295 + }, + { + "epoch": 2.7435076832873233, + "grad_norm": 0.06488364189863205, + "learning_rate": 1.0045538080729283e-06, + "loss": 0.0003, + "num_input_tokens_seen": 75689896, + "step": 112300 + }, + { + "epoch": 2.7436298341191705, + "grad_norm": 0.009952181950211525, + "learning_rate": 1.004468531341212e-06, + "loss": 0.0502, + "num_input_tokens_seen": 75693416, + "step": 112305 + }, + { + "epoch": 2.7437519849510172, + "grad_norm": 0.01083527971059084, + "learning_rate": 1.004383254576999e-06, + "loss": 0.0489, + "num_input_tokens_seen": 75696488, + "step": 112310 + }, + { + "epoch": 2.743874135782865, + "grad_norm": 0.007215315010398626, + "learning_rate": 1.00429797778091e-06, + "loss": 0.1186, + "num_input_tokens_seen": 75699752, + "step": 112315 + }, + { + "epoch": 2.7439962866147116, + "grad_norm": 0.023482423275709152, + "learning_rate": 1.0042127009535647e-06, + "loss": 0.056, + "num_input_tokens_seen": 75702568, + "step": 112320 + }, + { + "epoch": 2.7441184374465593, + "grad_norm": 47.78105545043945, + "learning_rate": 1.0041274240955834e-06, + "loss": 0.1368, + "num_input_tokens_seen": 75705832, + "step": 112325 + }, + { + "epoch": 2.744240588278406, + "grad_norm": 0.26664429903030396, + "learning_rate": 1.0040421472075865e-06, + "loss": 0.0002, + "num_input_tokens_seen": 75709224, + "step": 112330 + }, + { + "epoch": 2.744362739110253, + "grad_norm": 30.276893615722656, + "learning_rate": 1.0039568702901942e-06, + "loss": 0.1433, + "num_input_tokens_seen": 75712488, + "step": 112335 + }, + { + "epoch": 2.7444848899421004, + "grad_norm": 0.09096966683864594, + "learning_rate": 1.0038715933440265e-06, + "loss": 0.0005, + "num_input_tokens_seen": 75715944, + "step": 112340 + }, + { + "epoch": 2.7446070407739476, + "grad_norm": 0.09213719516992569, + "learning_rate": 1.0037863163697034e-06, + "loss": 0.0003, + "num_input_tokens_seen": 75718952, + "step": 112345 + }, + { + "epoch": 2.7447291916057948, + "grad_norm": 0.038761381059885025, + "learning_rate": 1.0037010393678449e-06, + "loss": 0.0397, + "num_input_tokens_seen": 75722152, + "step": 112350 + }, + { + "epoch": 2.744851342437642, + "grad_norm": 0.0874420702457428, + "learning_rate": 1.003615762339072e-06, + "loss": 0.0005, + "num_input_tokens_seen": 75725032, + "step": 112355 + }, + { + "epoch": 2.744973493269489, + "grad_norm": 0.2326081395149231, + "learning_rate": 1.0035304852840042e-06, + "loss": 0.0392, + "num_input_tokens_seen": 75728168, + "step": 112360 + }, + { + "epoch": 2.7450956441013363, + "grad_norm": 40.23335266113281, + "learning_rate": 1.0034452082032615e-06, + "loss": 0.0927, + "num_input_tokens_seen": 75731112, + "step": 112365 + }, + { + "epoch": 2.7452177949331835, + "grad_norm": 0.10373587161302567, + "learning_rate": 1.0033599310974645e-06, + "loss": 0.0002, + "num_input_tokens_seen": 75734312, + "step": 112370 + }, + { + "epoch": 2.7453399457650307, + "grad_norm": 10.037677764892578, + "learning_rate": 1.003274653967233e-06, + "loss": 0.0007, + "num_input_tokens_seen": 75737704, + "step": 112375 + }, + { + "epoch": 2.745462096596878, + "grad_norm": 0.325348436832428, + "learning_rate": 1.0031893768131874e-06, + "loss": 0.0004, + "num_input_tokens_seen": 75741096, + "step": 112380 + }, + { + "epoch": 2.745584247428725, + "grad_norm": 0.03348547965288162, + "learning_rate": 1.0031040996359478e-06, + "loss": 0.0006, + "num_input_tokens_seen": 75744424, + "step": 112385 + }, + { + "epoch": 2.7457063982605723, + "grad_norm": 0.0019438609015196562, + "learning_rate": 1.0030188224361344e-06, + "loss": 0.0002, + "num_input_tokens_seen": 75748136, + "step": 112390 + }, + { + "epoch": 2.7458285490924195, + "grad_norm": 0.055961500853300095, + "learning_rate": 1.0029335452143673e-06, + "loss": 0.0344, + "num_input_tokens_seen": 75753384, + "step": 112395 + }, + { + "epoch": 2.7459506999242667, + "grad_norm": 0.038423359394073486, + "learning_rate": 1.0028482679712667e-06, + "loss": 0.0005, + "num_input_tokens_seen": 75756648, + "step": 112400 + }, + { + "epoch": 2.7460728507561134, + "grad_norm": 0.03630128875374794, + "learning_rate": 1.0027629907074527e-06, + "loss": 0.0003, + "num_input_tokens_seen": 75760104, + "step": 112405 + }, + { + "epoch": 2.746195001587961, + "grad_norm": 0.2955532968044281, + "learning_rate": 1.0026777134235456e-06, + "loss": 0.0717, + "num_input_tokens_seen": 75763624, + "step": 112410 + }, + { + "epoch": 2.746317152419808, + "grad_norm": 0.00962782371789217, + "learning_rate": 1.0025924361201652e-06, + "loss": 0.0823, + "num_input_tokens_seen": 75766632, + "step": 112415 + }, + { + "epoch": 2.7464393032516554, + "grad_norm": 0.03371252119541168, + "learning_rate": 1.0025071587979322e-06, + "loss": 0.0001, + "num_input_tokens_seen": 75769768, + "step": 112420 + }, + { + "epoch": 2.746561454083502, + "grad_norm": 0.45033684372901917, + "learning_rate": 1.0024218814574664e-06, + "loss": 0.0004, + "num_input_tokens_seen": 75773096, + "step": 112425 + }, + { + "epoch": 2.7466836049153494, + "grad_norm": 0.558583676815033, + "learning_rate": 1.0023366040993876e-06, + "loss": 0.0716, + "num_input_tokens_seen": 75776424, + "step": 112430 + }, + { + "epoch": 2.7468057557471965, + "grad_norm": 0.00862213410437107, + "learning_rate": 1.0022513267243169e-06, + "loss": 0.0001, + "num_input_tokens_seen": 75779560, + "step": 112435 + }, + { + "epoch": 2.7469279065790437, + "grad_norm": 0.012112240307033062, + "learning_rate": 1.0021660493328737e-06, + "loss": 0.0013, + "num_input_tokens_seen": 75783080, + "step": 112440 + }, + { + "epoch": 2.747050057410891, + "grad_norm": 52.65711212158203, + "learning_rate": 1.0020807719256784e-06, + "loss": 0.046, + "num_input_tokens_seen": 75786408, + "step": 112445 + }, + { + "epoch": 2.747172208242738, + "grad_norm": 0.04763922840356827, + "learning_rate": 1.0019954945033513e-06, + "loss": 0.0004, + "num_input_tokens_seen": 75789480, + "step": 112450 + }, + { + "epoch": 2.7472943590745853, + "grad_norm": 0.052201878279447556, + "learning_rate": 1.0019102170665124e-06, + "loss": 0.0313, + "num_input_tokens_seen": 75792936, + "step": 112455 + }, + { + "epoch": 2.7474165099064325, + "grad_norm": 0.007947223260998726, + "learning_rate": 1.0018249396157818e-06, + "loss": 0.1355, + "num_input_tokens_seen": 75796136, + "step": 112460 + }, + { + "epoch": 2.7475386607382797, + "grad_norm": 0.02703636698424816, + "learning_rate": 1.00173966215178e-06, + "loss": 0.0003, + "num_input_tokens_seen": 75799208, + "step": 112465 + }, + { + "epoch": 2.747660811570127, + "grad_norm": 45.332603454589844, + "learning_rate": 1.0016543846751265e-06, + "loss": 0.0389, + "num_input_tokens_seen": 75802536, + "step": 112470 + }, + { + "epoch": 2.747782962401974, + "grad_norm": 0.008542269468307495, + "learning_rate": 1.001569107186442e-06, + "loss": 0.0, + "num_input_tokens_seen": 75806312, + "step": 112475 + }, + { + "epoch": 2.7479051132338213, + "grad_norm": 4.6416215896606445, + "learning_rate": 1.0014838296863467e-06, + "loss": 0.0008, + "num_input_tokens_seen": 75810216, + "step": 112480 + }, + { + "epoch": 2.7480272640656684, + "grad_norm": 0.0047716982662677765, + "learning_rate": 1.0013985521754606e-06, + "loss": 0.0002, + "num_input_tokens_seen": 75813672, + "step": 112485 + }, + { + "epoch": 2.748149414897515, + "grad_norm": 0.010327127762138844, + "learning_rate": 1.0013132746544038e-06, + "loss": 0.0526, + "num_input_tokens_seen": 75817192, + "step": 112490 + }, + { + "epoch": 2.748271565729363, + "grad_norm": 0.0022148210555315018, + "learning_rate": 1.0012279971237965e-06, + "loss": 0.0005, + "num_input_tokens_seen": 75820264, + "step": 112495 + }, + { + "epoch": 2.7483937165612096, + "grad_norm": 27.39652442932129, + "learning_rate": 1.0011427195842589e-06, + "loss": 0.0785, + "num_input_tokens_seen": 75823272, + "step": 112500 + }, + { + "epoch": 2.748515867393057, + "grad_norm": 0.18398308753967285, + "learning_rate": 1.0010574420364108e-06, + "loss": 0.0461, + "num_input_tokens_seen": 75826344, + "step": 112505 + }, + { + "epoch": 2.748638018224904, + "grad_norm": 0.2839778661727905, + "learning_rate": 1.0009721644808734e-06, + "loss": 0.0008, + "num_input_tokens_seen": 75829672, + "step": 112510 + }, + { + "epoch": 2.748760169056751, + "grad_norm": 0.0654890388250351, + "learning_rate": 1.0008868869182656e-06, + "loss": 0.0408, + "num_input_tokens_seen": 75833192, + "step": 112515 + }, + { + "epoch": 2.7488823198885983, + "grad_norm": 0.01932072453200817, + "learning_rate": 1.0008016093492082e-06, + "loss": 0.0003, + "num_input_tokens_seen": 75836968, + "step": 112520 + }, + { + "epoch": 2.7490044707204455, + "grad_norm": 0.020976558327674866, + "learning_rate": 1.0007163317743214e-06, + "loss": 0.0, + "num_input_tokens_seen": 75840040, + "step": 112525 + }, + { + "epoch": 2.7491266215522927, + "grad_norm": 0.012725918553769588, + "learning_rate": 1.000631054194225e-06, + "loss": 0.025, + "num_input_tokens_seen": 75843432, + "step": 112530 + }, + { + "epoch": 2.74924877238414, + "grad_norm": 0.01118039432913065, + "learning_rate": 1.0005457766095395e-06, + "loss": 0.0011, + "num_input_tokens_seen": 75846824, + "step": 112535 + }, + { + "epoch": 2.749370923215987, + "grad_norm": 49.7636833190918, + "learning_rate": 1.000460499020885e-06, + "loss": 0.0434, + "num_input_tokens_seen": 75850344, + "step": 112540 + }, + { + "epoch": 2.7494930740478343, + "grad_norm": 0.0003692000173032284, + "learning_rate": 1.0003752214288818e-06, + "loss": 0.0944, + "num_input_tokens_seen": 75854120, + "step": 112545 + }, + { + "epoch": 2.7496152248796815, + "grad_norm": 0.006110539194196463, + "learning_rate": 1.0002899438341498e-06, + "loss": 0.0541, + "num_input_tokens_seen": 75857384, + "step": 112550 + }, + { + "epoch": 2.7497373757115287, + "grad_norm": 0.132795512676239, + "learning_rate": 1.0002046662373092e-06, + "loss": 0.0001, + "num_input_tokens_seen": 75860968, + "step": 112555 + }, + { + "epoch": 2.749859526543376, + "grad_norm": 0.030062628909945488, + "learning_rate": 1.0001193886389803e-06, + "loss": 0.0002, + "num_input_tokens_seen": 75864552, + "step": 112560 + }, + { + "epoch": 2.749981677375223, + "grad_norm": 0.005698109045624733, + "learning_rate": 1.000034111039783e-06, + "loss": 0.0001, + "num_input_tokens_seen": 75867752, + "step": 112565 + }, + { + "epoch": 2.7501038282070702, + "grad_norm": 1.852415919303894, + "learning_rate": 9.99948833440338e-07, + "loss": 0.0008, + "num_input_tokens_seen": 75871400, + "step": 112570 + }, + { + "epoch": 2.750201548872548, + "eval_loss": 0.20390097796916962, + "eval_runtime": 47.6331, + "eval_samples_per_second": 763.86, + "eval_steps_per_second": 95.501, + "num_input_tokens_seen": 75874280, + "step": 112574 + }, + { + "epoch": 2.7502259790389174, + "grad_norm": 0.024079719558358192, + "learning_rate": 9.998635558412646e-07, + "loss": 0.0001, + "num_input_tokens_seen": 75874856, + "step": 112575 + }, + { + "epoch": 2.7503481298707646, + "grad_norm": 890.7974853515625, + "learning_rate": 9.997782782431837e-07, + "loss": 0.0653, + "num_input_tokens_seen": 75878184, + "step": 112580 + }, + { + "epoch": 2.7504702807026113, + "grad_norm": 0.0008283877978101373, + "learning_rate": 9.996930006467153e-07, + "loss": 0.0, + "num_input_tokens_seen": 75881448, + "step": 112585 + }, + { + "epoch": 2.750592431534459, + "grad_norm": 0.05993705615401268, + "learning_rate": 9.996077230524793e-07, + "loss": 0.0685, + "num_input_tokens_seen": 75884392, + "step": 112590 + }, + { + "epoch": 2.7507145823663057, + "grad_norm": 0.03609907999634743, + "learning_rate": 9.995224454610963e-07, + "loss": 0.0523, + "num_input_tokens_seen": 75887464, + "step": 112595 + }, + { + "epoch": 2.750836733198153, + "grad_norm": 0.001615293207578361, + "learning_rate": 9.994371678731857e-07, + "loss": 0.0001, + "num_input_tokens_seen": 75890472, + "step": 112600 + }, + { + "epoch": 2.75095888403, + "grad_norm": 0.12746308743953705, + "learning_rate": 9.993518902893688e-07, + "loss": 0.0001, + "num_input_tokens_seen": 75893800, + "step": 112605 + }, + { + "epoch": 2.7510810348618473, + "grad_norm": 0.016232147812843323, + "learning_rate": 9.992666127102648e-07, + "loss": 0.1098, + "num_input_tokens_seen": 75897192, + "step": 112610 + }, + { + "epoch": 2.7512031856936945, + "grad_norm": 0.03641084209084511, + "learning_rate": 9.991813351364941e-07, + "loss": 0.0004, + "num_input_tokens_seen": 75900392, + "step": 112615 + }, + { + "epoch": 2.7513253365255417, + "grad_norm": 0.00999990850687027, + "learning_rate": 9.990960575686773e-07, + "loss": 0.0393, + "num_input_tokens_seen": 75904040, + "step": 112620 + }, + { + "epoch": 2.751447487357389, + "grad_norm": 0.005113823339343071, + "learning_rate": 9.990107800074338e-07, + "loss": 0.0002, + "num_input_tokens_seen": 75907432, + "step": 112625 + }, + { + "epoch": 2.751569638189236, + "grad_norm": 0.08694149553775787, + "learning_rate": 9.989255024533846e-07, + "loss": 0.0374, + "num_input_tokens_seen": 75910824, + "step": 112630 + }, + { + "epoch": 2.7516917890210832, + "grad_norm": 0.02369142509996891, + "learning_rate": 9.98840224907149e-07, + "loss": 0.1025, + "num_input_tokens_seen": 75914088, + "step": 112635 + }, + { + "epoch": 2.7518139398529304, + "grad_norm": 0.05341620370745659, + "learning_rate": 9.98754947369348e-07, + "loss": 0.0001, + "num_input_tokens_seen": 75917224, + "step": 112640 + }, + { + "epoch": 2.7519360906847776, + "grad_norm": 0.058093249797821045, + "learning_rate": 9.98669669840601e-07, + "loss": 0.0677, + "num_input_tokens_seen": 75920808, + "step": 112645 + }, + { + "epoch": 2.752058241516625, + "grad_norm": 0.009987649507820606, + "learning_rate": 9.985843923215284e-07, + "loss": 0.0001, + "num_input_tokens_seen": 75924520, + "step": 112650 + }, + { + "epoch": 2.752180392348472, + "grad_norm": 0.04662652686238289, + "learning_rate": 9.98499114812751e-07, + "loss": 0.0261, + "num_input_tokens_seen": 75927528, + "step": 112655 + }, + { + "epoch": 2.752302543180319, + "grad_norm": 0.0058932919055223465, + "learning_rate": 9.98413837314888e-07, + "loss": 0.0002, + "num_input_tokens_seen": 75930536, + "step": 112660 + }, + { + "epoch": 2.7524246940121664, + "grad_norm": 0.00041428772965446115, + "learning_rate": 9.983285598285606e-07, + "loss": 0.0001, + "num_input_tokens_seen": 75934248, + "step": 112665 + }, + { + "epoch": 2.752546844844013, + "grad_norm": 75.15556335449219, + "learning_rate": 9.98243282354388e-07, + "loss": 0.0701, + "num_input_tokens_seen": 75937320, + "step": 112670 + }, + { + "epoch": 2.7526689956758608, + "grad_norm": 38.11005783081055, + "learning_rate": 9.981580048929904e-07, + "loss": 0.095, + "num_input_tokens_seen": 75940072, + "step": 112675 + }, + { + "epoch": 2.7527911465077075, + "grad_norm": 0.19551952183246613, + "learning_rate": 9.980727274449886e-07, + "loss": 0.0005, + "num_input_tokens_seen": 75943848, + "step": 112680 + }, + { + "epoch": 2.752913297339555, + "grad_norm": 0.005637908820062876, + "learning_rate": 9.979874500110023e-07, + "loss": 0.0933, + "num_input_tokens_seen": 75946984, + "step": 112685 + }, + { + "epoch": 2.753035448171402, + "grad_norm": 0.02520628087222576, + "learning_rate": 9.979021725916521e-07, + "loss": 0.0004, + "num_input_tokens_seen": 75950120, + "step": 112690 + }, + { + "epoch": 2.753157599003249, + "grad_norm": 0.1252969652414322, + "learning_rate": 9.978168951875576e-07, + "loss": 0.0001, + "num_input_tokens_seen": 75953512, + "step": 112695 + }, + { + "epoch": 2.7532797498350963, + "grad_norm": 0.1884545087814331, + "learning_rate": 9.977316177993395e-07, + "loss": 0.0721, + "num_input_tokens_seen": 75956584, + "step": 112700 + }, + { + "epoch": 2.7534019006669435, + "grad_norm": 0.07097301632165909, + "learning_rate": 9.976463404276173e-07, + "loss": 0.0661, + "num_input_tokens_seen": 75960168, + "step": 112705 + }, + { + "epoch": 2.7535240514987906, + "grad_norm": 0.02633850835263729, + "learning_rate": 9.975610630730118e-07, + "loss": 0.0001, + "num_input_tokens_seen": 75963816, + "step": 112710 + }, + { + "epoch": 2.753646202330638, + "grad_norm": 0.03084862418472767, + "learning_rate": 9.97475785736143e-07, + "loss": 0.0405, + "num_input_tokens_seen": 75967016, + "step": 112715 + }, + { + "epoch": 2.753768353162485, + "grad_norm": 0.0017762664938345551, + "learning_rate": 9.973905084176307e-07, + "loss": 0.0001, + "num_input_tokens_seen": 75970600, + "step": 112720 + }, + { + "epoch": 2.753890503994332, + "grad_norm": 0.06449517607688904, + "learning_rate": 9.973052311180956e-07, + "loss": 0.1143, + "num_input_tokens_seen": 75973736, + "step": 112725 + }, + { + "epoch": 2.7540126548261794, + "grad_norm": 0.07654926925897598, + "learning_rate": 9.972199538381573e-07, + "loss": 0.0002, + "num_input_tokens_seen": 75977192, + "step": 112730 + }, + { + "epoch": 2.7541348056580266, + "grad_norm": 0.062180664390325546, + "learning_rate": 9.97134676578436e-07, + "loss": 0.0349, + "num_input_tokens_seen": 75980520, + "step": 112735 + }, + { + "epoch": 2.754256956489874, + "grad_norm": 62.79735565185547, + "learning_rate": 9.970493993395527e-07, + "loss": 0.2066, + "num_input_tokens_seen": 75984104, + "step": 112740 + }, + { + "epoch": 2.754379107321721, + "grad_norm": 0.04304159805178642, + "learning_rate": 9.969641221221267e-07, + "loss": 0.0005, + "num_input_tokens_seen": 75987368, + "step": 112745 + }, + { + "epoch": 2.754501258153568, + "grad_norm": 0.3604861795902252, + "learning_rate": 9.968788449267786e-07, + "loss": 0.0365, + "num_input_tokens_seen": 75990632, + "step": 112750 + }, + { + "epoch": 2.7546234089854154, + "grad_norm": 0.07671809196472168, + "learning_rate": 9.96793567754128e-07, + "loss": 0.0005, + "num_input_tokens_seen": 75994216, + "step": 112755 + }, + { + "epoch": 2.7547455598172625, + "grad_norm": 0.007398826535791159, + "learning_rate": 9.967082906047958e-07, + "loss": 0.0001, + "num_input_tokens_seen": 75997480, + "step": 112760 + }, + { + "epoch": 2.7548677106491093, + "grad_norm": 0.012353933416306973, + "learning_rate": 9.966230134794017e-07, + "loss": 0.0611, + "num_input_tokens_seen": 76000744, + "step": 112765 + }, + { + "epoch": 2.754989861480957, + "grad_norm": 0.05334820970892906, + "learning_rate": 9.965377363785657e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76004456, + "step": 112770 + }, + { + "epoch": 2.7551120123128037, + "grad_norm": 0.05684984475374222, + "learning_rate": 9.964524593029089e-07, + "loss": 0.146, + "num_input_tokens_seen": 76007656, + "step": 112775 + }, + { + "epoch": 2.755234163144651, + "grad_norm": 0.02138231322169304, + "learning_rate": 9.963671822530499e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76010920, + "step": 112780 + }, + { + "epoch": 2.755356313976498, + "grad_norm": 0.14898619055747986, + "learning_rate": 9.962819052296105e-07, + "loss": 0.0003, + "num_input_tokens_seen": 76014440, + "step": 112785 + }, + { + "epoch": 2.7554784648083452, + "grad_norm": 49.297935485839844, + "learning_rate": 9.961966282332093e-07, + "loss": 0.0998, + "num_input_tokens_seen": 76017704, + "step": 112790 + }, + { + "epoch": 2.7556006156401924, + "grad_norm": 0.06874702125787735, + "learning_rate": 9.96111351264468e-07, + "loss": 0.0668, + "num_input_tokens_seen": 76020648, + "step": 112795 + }, + { + "epoch": 2.7557227664720396, + "grad_norm": 0.3245699405670166, + "learning_rate": 9.960260743240054e-07, + "loss": 0.0536, + "num_input_tokens_seen": 76023592, + "step": 112800 + }, + { + "epoch": 2.755844917303887, + "grad_norm": 0.4467359781265259, + "learning_rate": 9.959407974124423e-07, + "loss": 0.0483, + "num_input_tokens_seen": 76026536, + "step": 112805 + }, + { + "epoch": 2.755967068135734, + "grad_norm": 0.13222886621952057, + "learning_rate": 9.958555205303992e-07, + "loss": 0.0415, + "num_input_tokens_seen": 76029800, + "step": 112810 + }, + { + "epoch": 2.756089218967581, + "grad_norm": 0.01306053064763546, + "learning_rate": 9.957702436784956e-07, + "loss": 0.0215, + "num_input_tokens_seen": 76032616, + "step": 112815 + }, + { + "epoch": 2.7562113697994284, + "grad_norm": 0.019551554694771767, + "learning_rate": 9.95684966857352e-07, + "loss": 0.1356, + "num_input_tokens_seen": 76035624, + "step": 112820 + }, + { + "epoch": 2.7563335206312756, + "grad_norm": 0.022561049088835716, + "learning_rate": 9.955996900675888e-07, + "loss": 0.001, + "num_input_tokens_seen": 76038760, + "step": 112825 + }, + { + "epoch": 2.7564556714631228, + "grad_norm": 0.07821331173181534, + "learning_rate": 9.955144133098253e-07, + "loss": 0.0445, + "num_input_tokens_seen": 76042344, + "step": 112830 + }, + { + "epoch": 2.75657782229497, + "grad_norm": 0.17297571897506714, + "learning_rate": 9.954291365846825e-07, + "loss": 0.0004, + "num_input_tokens_seen": 76045608, + "step": 112835 + }, + { + "epoch": 2.756699973126817, + "grad_norm": 0.15678569674491882, + "learning_rate": 9.953438598927801e-07, + "loss": 0.0006, + "num_input_tokens_seen": 76048744, + "step": 112840 + }, + { + "epoch": 2.7568221239586643, + "grad_norm": 0.07841111719608307, + "learning_rate": 9.952585832347387e-07, + "loss": 0.0003, + "num_input_tokens_seen": 76051816, + "step": 112845 + }, + { + "epoch": 2.756944274790511, + "grad_norm": 0.014237224124372005, + "learning_rate": 9.951733066111776e-07, + "loss": 0.0392, + "num_input_tokens_seen": 76054952, + "step": 112850 + }, + { + "epoch": 2.7570664256223587, + "grad_norm": 0.21275421977043152, + "learning_rate": 9.950880300227183e-07, + "loss": 0.1102, + "num_input_tokens_seen": 76058280, + "step": 112855 + }, + { + "epoch": 2.7571885764542055, + "grad_norm": 0.013343808241188526, + "learning_rate": 9.950027534699793e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76062120, + "step": 112860 + }, + { + "epoch": 2.757310727286053, + "grad_norm": 0.07676204293966293, + "learning_rate": 9.949174769535821e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76065448, + "step": 112865 + }, + { + "epoch": 2.7574328781179, + "grad_norm": 34.86268615722656, + "learning_rate": 9.948322004741465e-07, + "loss": 0.1247, + "num_input_tokens_seen": 76069032, + "step": 112870 + }, + { + "epoch": 2.757555028949747, + "grad_norm": 0.07208728790283203, + "learning_rate": 9.947469240322922e-07, + "loss": 0.0006, + "num_input_tokens_seen": 76072616, + "step": 112875 + }, + { + "epoch": 2.757677179781594, + "grad_norm": 0.28862571716308594, + "learning_rate": 9.946616476286402e-07, + "loss": 0.0514, + "num_input_tokens_seen": 76075816, + "step": 112880 + }, + { + "epoch": 2.7577993306134414, + "grad_norm": 0.08105853945016861, + "learning_rate": 9.945763712638094e-07, + "loss": 0.0008, + "num_input_tokens_seen": 76079144, + "step": 112885 + }, + { + "epoch": 2.7579214814452886, + "grad_norm": 0.00472519313916564, + "learning_rate": 9.944910949384213e-07, + "loss": 0.0408, + "num_input_tokens_seen": 76082664, + "step": 112890 + }, + { + "epoch": 2.758043632277136, + "grad_norm": 0.14678433537483215, + "learning_rate": 9.944058186530951e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76086248, + "step": 112895 + }, + { + "epoch": 2.758165783108983, + "grad_norm": 0.09297756850719452, + "learning_rate": 9.94320542408451e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76089512, + "step": 112900 + }, + { + "epoch": 2.75828793394083, + "grad_norm": 0.03199482336640358, + "learning_rate": 9.9423526620511e-07, + "loss": 0.1132, + "num_input_tokens_seen": 76092584, + "step": 112905 + }, + { + "epoch": 2.7584100847726774, + "grad_norm": 0.12539511919021606, + "learning_rate": 9.941499900436915e-07, + "loss": 0.0003, + "num_input_tokens_seen": 76096104, + "step": 112910 + }, + { + "epoch": 2.7585322356045245, + "grad_norm": 0.033748965710401535, + "learning_rate": 9.94064713924816e-07, + "loss": 0.0628, + "num_input_tokens_seen": 76099432, + "step": 112915 + }, + { + "epoch": 2.7586543864363717, + "grad_norm": 16.201793670654297, + "learning_rate": 9.93979437849103e-07, + "loss": 0.0576, + "num_input_tokens_seen": 76103464, + "step": 112920 + }, + { + "epoch": 2.758776537268219, + "grad_norm": 0.017845073714852333, + "learning_rate": 9.938941618171736e-07, + "loss": 0.049, + "num_input_tokens_seen": 76106664, + "step": 112925 + }, + { + "epoch": 2.758898688100066, + "grad_norm": 0.01976141892373562, + "learning_rate": 9.938088858296477e-07, + "loss": 0.0003, + "num_input_tokens_seen": 76110376, + "step": 112930 + }, + { + "epoch": 2.759020838931913, + "grad_norm": 16.954193115234375, + "learning_rate": 9.937236098871447e-07, + "loss": 0.0705, + "num_input_tokens_seen": 76114152, + "step": 112935 + }, + { + "epoch": 2.7591429897637605, + "grad_norm": 34.79048156738281, + "learning_rate": 9.936383339902858e-07, + "loss": 0.0008, + "num_input_tokens_seen": 76117480, + "step": 112940 + }, + { + "epoch": 2.7592651405956072, + "grad_norm": 0.17568282783031464, + "learning_rate": 9.935530581396902e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76120744, + "step": 112945 + }, + { + "epoch": 2.759387291427455, + "grad_norm": 0.010089860297739506, + "learning_rate": 9.93467782335979e-07, + "loss": 0.0257, + "num_input_tokens_seen": 76124264, + "step": 112950 + }, + { + "epoch": 2.7595094422593016, + "grad_norm": 0.032470934092998505, + "learning_rate": 9.933825065797711e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76128296, + "step": 112955 + }, + { + "epoch": 2.759631593091149, + "grad_norm": 0.037431854754686356, + "learning_rate": 9.932972308716877e-07, + "loss": 0.0007, + "num_input_tokens_seen": 76132008, + "step": 112960 + }, + { + "epoch": 2.759753743922996, + "grad_norm": 0.17975296080112457, + "learning_rate": 9.93211955212349e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76136104, + "step": 112965 + }, + { + "epoch": 2.759875894754843, + "grad_norm": 0.013288943096995354, + "learning_rate": 9.931266796023744e-07, + "loss": 0.0009, + "num_input_tokens_seen": 76139688, + "step": 112970 + }, + { + "epoch": 2.7599980455866904, + "grad_norm": 0.09433402121067047, + "learning_rate": 9.930414040423848e-07, + "loss": 0.0008, + "num_input_tokens_seen": 76143400, + "step": 112975 + }, + { + "epoch": 2.7601201964185376, + "grad_norm": 0.19448913633823395, + "learning_rate": 9.929561285329997e-07, + "loss": 0.0592, + "num_input_tokens_seen": 76147560, + "step": 112980 + }, + { + "epoch": 2.7602423472503848, + "grad_norm": 0.04659518599510193, + "learning_rate": 9.928708530748395e-07, + "loss": 0.0453, + "num_input_tokens_seen": 76150760, + "step": 112985 + }, + { + "epoch": 2.760364498082232, + "grad_norm": 0.052931103855371475, + "learning_rate": 9.927855776685247e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76154216, + "step": 112990 + }, + { + "epoch": 2.760486648914079, + "grad_norm": 0.023006392642855644, + "learning_rate": 9.927003023146745e-07, + "loss": 0.0004, + "num_input_tokens_seen": 76157352, + "step": 112995 + }, + { + "epoch": 2.7606087997459263, + "grad_norm": 0.009410569444298744, + "learning_rate": 9.926150270139104e-07, + "loss": 0.0006, + "num_input_tokens_seen": 76160680, + "step": 113000 + }, + { + "epoch": 2.7607309505777735, + "grad_norm": 0.01064909528940916, + "learning_rate": 9.925297517668512e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76163944, + "step": 113005 + }, + { + "epoch": 2.7608531014096207, + "grad_norm": 0.158854141831398, + "learning_rate": 9.924444765741183e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76167016, + "step": 113010 + }, + { + "epoch": 2.760975252241468, + "grad_norm": 0.17472516000270844, + "learning_rate": 9.923592014363305e-07, + "loss": 0.0008, + "num_input_tokens_seen": 76170600, + "step": 113015 + }, + { + "epoch": 2.761097403073315, + "grad_norm": 0.006018009968101978, + "learning_rate": 9.92273926354109e-07, + "loss": 0.0372, + "num_input_tokens_seen": 76173800, + "step": 113020 + }, + { + "epoch": 2.7612195539051623, + "grad_norm": 0.007808130234479904, + "learning_rate": 9.921886513280735e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76176936, + "step": 113025 + }, + { + "epoch": 2.761341704737009, + "grad_norm": 0.08086532354354858, + "learning_rate": 9.921033763588444e-07, + "loss": 0.0004, + "num_input_tokens_seen": 76179880, + "step": 113030 + }, + { + "epoch": 2.7614638555688567, + "grad_norm": 0.006262186449021101, + "learning_rate": 9.920181014470417e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76183528, + "step": 113035 + }, + { + "epoch": 2.7615860064007034, + "grad_norm": 0.020363593474030495, + "learning_rate": 9.919328265932852e-07, + "loss": 0.0705, + "num_input_tokens_seen": 76186856, + "step": 113040 + }, + { + "epoch": 2.761708157232551, + "grad_norm": 0.011324654333293438, + "learning_rate": 9.918475517981958e-07, + "loss": 0.0, + "num_input_tokens_seen": 76190376, + "step": 113045 + }, + { + "epoch": 2.7618303080643978, + "grad_norm": 12.743666648864746, + "learning_rate": 9.917622770623925e-07, + "loss": 0.049, + "num_input_tokens_seen": 76194216, + "step": 113050 + }, + { + "epoch": 2.761952458896245, + "grad_norm": 13.240878105163574, + "learning_rate": 9.916770023864964e-07, + "loss": 0.0647, + "num_input_tokens_seen": 76197224, + "step": 113055 + }, + { + "epoch": 2.762074609728092, + "grad_norm": 0.5368944406509399, + "learning_rate": 9.915917277711277e-07, + "loss": 0.0004, + "num_input_tokens_seen": 76200808, + "step": 113060 + }, + { + "epoch": 2.7621967605599393, + "grad_norm": 0.029167726635932922, + "learning_rate": 9.915064532169058e-07, + "loss": 0.0004, + "num_input_tokens_seen": 76203816, + "step": 113065 + }, + { + "epoch": 2.7623189113917865, + "grad_norm": 0.005114384926855564, + "learning_rate": 9.91421178724452e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76206952, + "step": 113070 + }, + { + "epoch": 2.7624410622236337, + "grad_norm": 0.03877369314432144, + "learning_rate": 9.913359042943848e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76210024, + "step": 113075 + }, + { + "epoch": 2.762563213055481, + "grad_norm": 672.5220947265625, + "learning_rate": 9.912506299273256e-07, + "loss": 0.0261, + "num_input_tokens_seen": 76213480, + "step": 113080 + }, + { + "epoch": 2.762685363887328, + "grad_norm": 0.17560061812400818, + "learning_rate": 9.911653556238945e-07, + "loss": 0.1107, + "num_input_tokens_seen": 76216488, + "step": 113085 + }, + { + "epoch": 2.7628075147191753, + "grad_norm": 0.04069105163216591, + "learning_rate": 9.910800813847107e-07, + "loss": 0.0676, + "num_input_tokens_seen": 76219816, + "step": 113090 + }, + { + "epoch": 2.7629296655510225, + "grad_norm": 0.014730570837855339, + "learning_rate": 9.909948072103956e-07, + "loss": 0.0007, + "num_input_tokens_seen": 76223144, + "step": 113095 + }, + { + "epoch": 2.7630518163828697, + "grad_norm": 0.13270990550518036, + "learning_rate": 9.90909533101568e-07, + "loss": 0.045, + "num_input_tokens_seen": 76226536, + "step": 113100 + }, + { + "epoch": 2.763173967214717, + "grad_norm": 0.04199433699250221, + "learning_rate": 9.908242590588494e-07, + "loss": 0.0005, + "num_input_tokens_seen": 76230248, + "step": 113105 + }, + { + "epoch": 2.763296118046564, + "grad_norm": 0.035170089453458786, + "learning_rate": 9.907389850828586e-07, + "loss": 0.0003, + "num_input_tokens_seen": 76233576, + "step": 113110 + }, + { + "epoch": 2.763418268878411, + "grad_norm": 0.08260912448167801, + "learning_rate": 9.906537111742167e-07, + "loss": 0.0006, + "num_input_tokens_seen": 76237096, + "step": 113115 + }, + { + "epoch": 2.7635404197102584, + "grad_norm": 0.03758857026696205, + "learning_rate": 9.905684373335436e-07, + "loss": 0.0524, + "num_input_tokens_seen": 76240616, + "step": 113120 + }, + { + "epoch": 2.763662570542105, + "grad_norm": 0.22792379558086395, + "learning_rate": 9.90483163561459e-07, + "loss": 0.0543, + "num_input_tokens_seen": 76243496, + "step": 113125 + }, + { + "epoch": 2.763784721373953, + "grad_norm": 10.247224807739258, + "learning_rate": 9.90397889858584e-07, + "loss": 0.0777, + "num_input_tokens_seen": 76247144, + "step": 113130 + }, + { + "epoch": 2.7639068722057996, + "grad_norm": 0.026630444452166557, + "learning_rate": 9.903126162255379e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76250280, + "step": 113135 + }, + { + "epoch": 2.7640290230376467, + "grad_norm": 21.695283889770508, + "learning_rate": 9.902273426629406e-07, + "loss": 0.1067, + "num_input_tokens_seen": 76253608, + "step": 113140 + }, + { + "epoch": 2.764151173869494, + "grad_norm": 0.09523158520460129, + "learning_rate": 9.901420691714135e-07, + "loss": 0.0005, + "num_input_tokens_seen": 76257640, + "step": 113145 + }, + { + "epoch": 2.764273324701341, + "grad_norm": 0.2219829559326172, + "learning_rate": 9.900567957515752e-07, + "loss": 0.0003, + "num_input_tokens_seen": 76261096, + "step": 113150 + }, + { + "epoch": 2.7643954755331883, + "grad_norm": 0.3712293207645416, + "learning_rate": 9.89971522404047e-07, + "loss": 0.0451, + "num_input_tokens_seen": 76264552, + "step": 113155 + }, + { + "epoch": 2.7645176263650355, + "grad_norm": 0.12669548392295837, + "learning_rate": 9.898862491294483e-07, + "loss": 0.0003, + "num_input_tokens_seen": 76267880, + "step": 113160 + }, + { + "epoch": 2.7646397771968827, + "grad_norm": 0.03212711215019226, + "learning_rate": 9.898009759283999e-07, + "loss": 0.0569, + "num_input_tokens_seen": 76271144, + "step": 113165 + }, + { + "epoch": 2.76476192802873, + "grad_norm": 0.030199235305190086, + "learning_rate": 9.89715702801521e-07, + "loss": 0.0003, + "num_input_tokens_seen": 76274600, + "step": 113170 + }, + { + "epoch": 2.764884078860577, + "grad_norm": 0.034971047192811966, + "learning_rate": 9.896304297494327e-07, + "loss": 0.0003, + "num_input_tokens_seen": 76277992, + "step": 113175 + }, + { + "epoch": 2.7650062296924243, + "grad_norm": 0.0070164743810892105, + "learning_rate": 9.895451567727544e-07, + "loss": 0.0003, + "num_input_tokens_seen": 76281128, + "step": 113180 + }, + { + "epoch": 2.7651283805242715, + "grad_norm": 0.07414111495018005, + "learning_rate": 9.894598838721069e-07, + "loss": 0.0009, + "num_input_tokens_seen": 76284456, + "step": 113185 + }, + { + "epoch": 2.7652505313561186, + "grad_norm": 0.01181288342922926, + "learning_rate": 9.893746110481097e-07, + "loss": 0.07, + "num_input_tokens_seen": 76287656, + "step": 113190 + }, + { + "epoch": 2.765372682187966, + "grad_norm": 0.04185613617300987, + "learning_rate": 9.892893383013833e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76290856, + "step": 113195 + }, + { + "epoch": 2.765494833019813, + "grad_norm": 0.03417033702135086, + "learning_rate": 9.89204065632548e-07, + "loss": 0.1193, + "num_input_tokens_seen": 76293992, + "step": 113200 + }, + { + "epoch": 2.76561698385166, + "grad_norm": 0.003816602984443307, + "learning_rate": 9.89118793042223e-07, + "loss": 0.0896, + "num_input_tokens_seen": 76297128, + "step": 113205 + }, + { + "epoch": 2.765739134683507, + "grad_norm": 0.16992871463298798, + "learning_rate": 9.890335205310291e-07, + "loss": 0.0004, + "num_input_tokens_seen": 76300648, + "step": 113210 + }, + { + "epoch": 2.7658612855153546, + "grad_norm": 0.0317666195333004, + "learning_rate": 9.88948248099587e-07, + "loss": 0.0428, + "num_input_tokens_seen": 76303656, + "step": 113215 + }, + { + "epoch": 2.7659834363472013, + "grad_norm": 0.01543817576020956, + "learning_rate": 9.888629757485156e-07, + "loss": 0.0539, + "num_input_tokens_seen": 76307560, + "step": 113220 + }, + { + "epoch": 2.7661055871790485, + "grad_norm": 0.03953710198402405, + "learning_rate": 9.88777703478436e-07, + "loss": 0.0398, + "num_input_tokens_seen": 76311336, + "step": 113225 + }, + { + "epoch": 2.7662277380108957, + "grad_norm": 0.045606087893247604, + "learning_rate": 9.886924312899679e-07, + "loss": 0.0003, + "num_input_tokens_seen": 76314856, + "step": 113230 + }, + { + "epoch": 2.766349888842743, + "grad_norm": 4.622429370880127, + "learning_rate": 9.886071591837314e-07, + "loss": 0.0458, + "num_input_tokens_seen": 76318184, + "step": 113235 + }, + { + "epoch": 2.76647203967459, + "grad_norm": 0.03922732546925545, + "learning_rate": 9.88521887160347e-07, + "loss": 0.0004, + "num_input_tokens_seen": 76321320, + "step": 113240 + }, + { + "epoch": 2.7665941905064373, + "grad_norm": 0.027470586821436882, + "learning_rate": 9.88436615220434e-07, + "loss": 0.0004, + "num_input_tokens_seen": 76324648, + "step": 113245 + }, + { + "epoch": 2.7667163413382845, + "grad_norm": 0.04852357134222984, + "learning_rate": 9.883513433646135e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76327848, + "step": 113250 + }, + { + "epoch": 2.7668384921701317, + "grad_norm": 0.07106790691614151, + "learning_rate": 9.882660715935047e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76331496, + "step": 113255 + }, + { + "epoch": 2.766960643001979, + "grad_norm": 31.647287368774414, + "learning_rate": 9.881807999077288e-07, + "loss": 0.0431, + "num_input_tokens_seen": 76334568, + "step": 113260 + }, + { + "epoch": 2.767082793833826, + "grad_norm": 0.009211227297782898, + "learning_rate": 9.880955283079047e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76337704, + "step": 113265 + }, + { + "epoch": 2.7672049446656732, + "grad_norm": 0.010737738572061062, + "learning_rate": 9.880102567946533e-07, + "loss": 0.0003, + "num_input_tokens_seen": 76341032, + "step": 113270 + }, + { + "epoch": 2.7673270954975204, + "grad_norm": 0.05103771388530731, + "learning_rate": 9.879249853685949e-07, + "loss": 0.1252, + "num_input_tokens_seen": 76344232, + "step": 113275 + }, + { + "epoch": 2.7674492463293676, + "grad_norm": 0.06809987127780914, + "learning_rate": 9.878397140303487e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76348008, + "step": 113280 + }, + { + "epoch": 2.767571397161215, + "grad_norm": 0.024205448105931282, + "learning_rate": 9.877544427805358e-07, + "loss": 0.0323, + "num_input_tokens_seen": 76351336, + "step": 113285 + }, + { + "epoch": 2.767693547993062, + "grad_norm": 0.06032427400350571, + "learning_rate": 9.876691716197759e-07, + "loss": 0.0003, + "num_input_tokens_seen": 76354216, + "step": 113290 + }, + { + "epoch": 2.7678156988249087, + "grad_norm": 0.10438254475593567, + "learning_rate": 9.875839005486886e-07, + "loss": 0.0585, + "num_input_tokens_seen": 76357544, + "step": 113295 + }, + { + "epoch": 2.7679378496567564, + "grad_norm": 0.061745673418045044, + "learning_rate": 9.87498629567895e-07, + "loss": 0.0257, + "num_input_tokens_seen": 76361256, + "step": 113300 + }, + { + "epoch": 2.768060000488603, + "grad_norm": 0.004507183562964201, + "learning_rate": 9.874133586780145e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76364520, + "step": 113305 + }, + { + "epoch": 2.7681821513204508, + "grad_norm": 0.05256107077002525, + "learning_rate": 9.873280878796676e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76367528, + "step": 113310 + }, + { + "epoch": 2.7683043021522975, + "grad_norm": 0.01807519607245922, + "learning_rate": 9.87242817173474e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76370792, + "step": 113315 + }, + { + "epoch": 2.7684264529841447, + "grad_norm": 0.13760019838809967, + "learning_rate": 9.871575465600546e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76373736, + "step": 113320 + }, + { + "epoch": 2.768548603815992, + "grad_norm": 0.4591255486011505, + "learning_rate": 9.870722760400285e-07, + "loss": 0.0006, + "num_input_tokens_seen": 76377128, + "step": 113325 + }, + { + "epoch": 2.768670754647839, + "grad_norm": 0.02008800208568573, + "learning_rate": 9.869870056140163e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76380968, + "step": 113330 + }, + { + "epoch": 2.7687929054796863, + "grad_norm": 0.006490596570074558, + "learning_rate": 9.869017352826382e-07, + "loss": 0.0426, + "num_input_tokens_seen": 76384168, + "step": 113335 + }, + { + "epoch": 2.7689150563115335, + "grad_norm": 0.00938922818750143, + "learning_rate": 9.86816465046514e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76387240, + "step": 113340 + }, + { + "epoch": 2.7690372071433806, + "grad_norm": 0.01878109946846962, + "learning_rate": 9.867311949062644e-07, + "loss": 0.0003, + "num_input_tokens_seen": 76390504, + "step": 113345 + }, + { + "epoch": 2.769159357975228, + "grad_norm": 0.10273200273513794, + "learning_rate": 9.86645924862509e-07, + "loss": 0.0004, + "num_input_tokens_seen": 76393576, + "step": 113350 + }, + { + "epoch": 2.769281508807075, + "grad_norm": 0.007617755327373743, + "learning_rate": 9.865606549158681e-07, + "loss": 0.0725, + "num_input_tokens_seen": 76397032, + "step": 113355 + }, + { + "epoch": 2.769403659638922, + "grad_norm": 0.0022685937583446503, + "learning_rate": 9.864753850669613e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76400744, + "step": 113360 + }, + { + "epoch": 2.7695258104707694, + "grad_norm": 0.01068595889955759, + "learning_rate": 9.863901153164094e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76404136, + "step": 113365 + }, + { + "epoch": 2.7696479613026166, + "grad_norm": 23.14859390258789, + "learning_rate": 9.863048456648324e-07, + "loss": 0.1539, + "num_input_tokens_seen": 76407528, + "step": 113370 + }, + { + "epoch": 2.769770112134464, + "grad_norm": 0.012229954823851585, + "learning_rate": 9.862195761128498e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76411176, + "step": 113375 + }, + { + "epoch": 2.7698922629663105, + "grad_norm": 16.60312271118164, + "learning_rate": 9.861343066610829e-07, + "loss": 0.0967, + "num_input_tokens_seen": 76414184, + "step": 113380 + }, + { + "epoch": 2.770014413798158, + "grad_norm": 0.0237701628357172, + "learning_rate": 9.860490373101503e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76417128, + "step": 113385 + }, + { + "epoch": 2.770136564630005, + "grad_norm": 0.19100302457809448, + "learning_rate": 9.859637680606732e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76420456, + "step": 113390 + }, + { + "epoch": 2.7702587154618525, + "grad_norm": 0.2962363064289093, + "learning_rate": 9.858784989132717e-07, + "loss": 0.0728, + "num_input_tokens_seen": 76423784, + "step": 113395 + }, + { + "epoch": 2.7703808662936993, + "grad_norm": 0.1180248036980629, + "learning_rate": 9.857932298685648e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76427624, + "step": 113400 + }, + { + "epoch": 2.7705030171255465, + "grad_norm": 0.014938372187316418, + "learning_rate": 9.85707960927174e-07, + "loss": 0.0504, + "num_input_tokens_seen": 76431272, + "step": 113405 + }, + { + "epoch": 2.7706251679573937, + "grad_norm": 16.964563369750977, + "learning_rate": 9.856226920897182e-07, + "loss": 0.0524, + "num_input_tokens_seen": 76434856, + "step": 113410 + }, + { + "epoch": 2.770747318789241, + "grad_norm": 0.04503167048096657, + "learning_rate": 9.855374233568186e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76437992, + "step": 113415 + }, + { + "epoch": 2.770869469621088, + "grad_norm": 0.04523736983537674, + "learning_rate": 9.854521547290942e-07, + "loss": 0.0984, + "num_input_tokens_seen": 76441448, + "step": 113420 + }, + { + "epoch": 2.7709916204529352, + "grad_norm": 2.487971544265747, + "learning_rate": 9.853668862071657e-07, + "loss": 0.0019, + "num_input_tokens_seen": 76444904, + "step": 113425 + }, + { + "epoch": 2.7711137712847824, + "grad_norm": 0.17408542335033417, + "learning_rate": 9.852816177916535e-07, + "loss": 0.0006, + "num_input_tokens_seen": 76448040, + "step": 113430 + }, + { + "epoch": 2.7712359221166296, + "grad_norm": 0.1603502482175827, + "learning_rate": 9.851963494831771e-07, + "loss": 0.0007, + "num_input_tokens_seen": 76451304, + "step": 113435 + }, + { + "epoch": 2.771358072948477, + "grad_norm": 0.011682548560202122, + "learning_rate": 9.851110812823571e-07, + "loss": 0.0018, + "num_input_tokens_seen": 76454632, + "step": 113440 + }, + { + "epoch": 2.771480223780324, + "grad_norm": 2.488227128982544, + "learning_rate": 9.850258131898133e-07, + "loss": 0.0006, + "num_input_tokens_seen": 76457896, + "step": 113445 + }, + { + "epoch": 2.771602374612171, + "grad_norm": 0.01607205905020237, + "learning_rate": 9.849405452061654e-07, + "loss": 0.0169, + "num_input_tokens_seen": 76461032, + "step": 113450 + }, + { + "epoch": 2.7717245254440184, + "grad_norm": 0.002345475135371089, + "learning_rate": 9.848552773320345e-07, + "loss": 0.0567, + "num_input_tokens_seen": 76464360, + "step": 113455 + }, + { + "epoch": 2.7718466762758656, + "grad_norm": 0.015486088581383228, + "learning_rate": 9.847700095680394e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76467624, + "step": 113460 + }, + { + "epoch": 2.7719688271077128, + "grad_norm": 0.03439730033278465, + "learning_rate": 9.846847419148016e-07, + "loss": 0.0483, + "num_input_tokens_seen": 76470952, + "step": 113465 + }, + { + "epoch": 2.77209097793956, + "grad_norm": 30.77482032775879, + "learning_rate": 9.8459947437294e-07, + "loss": 0.0459, + "num_input_tokens_seen": 76474280, + "step": 113470 + }, + { + "epoch": 2.7722131287714067, + "grad_norm": 0.06692664325237274, + "learning_rate": 9.845142069430754e-07, + "loss": 0.1802, + "num_input_tokens_seen": 76477608, + "step": 113475 + }, + { + "epoch": 2.7723352796032543, + "grad_norm": 25.74443244934082, + "learning_rate": 9.844289396258272e-07, + "loss": 0.0344, + "num_input_tokens_seen": 76480872, + "step": 113480 + }, + { + "epoch": 2.772457430435101, + "grad_norm": 0.021210841834545135, + "learning_rate": 9.843436724218163e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76484200, + "step": 113485 + }, + { + "epoch": 2.7725795812669487, + "grad_norm": 0.00724679185077548, + "learning_rate": 9.842584053316626e-07, + "loss": 0.0005, + "num_input_tokens_seen": 76487592, + "step": 113490 + }, + { + "epoch": 2.7727017320987954, + "grad_norm": 0.018039369955658913, + "learning_rate": 9.841731383559857e-07, + "loss": 0.0005, + "num_input_tokens_seen": 76490984, + "step": 113495 + }, + { + "epoch": 2.7728238829306426, + "grad_norm": 0.016924476251006126, + "learning_rate": 9.840878714954063e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76494120, + "step": 113500 + }, + { + "epoch": 2.77294603376249, + "grad_norm": 67.77898406982422, + "learning_rate": 9.840026047505438e-07, + "loss": 0.0468, + "num_input_tokens_seen": 76497384, + "step": 113505 + }, + { + "epoch": 2.773068184594337, + "grad_norm": 0.033945340663194656, + "learning_rate": 9.839173381220191e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76500840, + "step": 113510 + }, + { + "epoch": 2.773190335426184, + "grad_norm": 0.02062019146978855, + "learning_rate": 9.838320716104515e-07, + "loss": 0.151, + "num_input_tokens_seen": 76504744, + "step": 113515 + }, + { + "epoch": 2.7733124862580314, + "grad_norm": 0.00838180910795927, + "learning_rate": 9.837468052164612e-07, + "loss": 0.0003, + "num_input_tokens_seen": 76507624, + "step": 113520 + }, + { + "epoch": 2.7734346370898786, + "grad_norm": 0.10621123760938644, + "learning_rate": 9.83661538940669e-07, + "loss": 0.0006, + "num_input_tokens_seen": 76510952, + "step": 113525 + }, + { + "epoch": 2.7735567879217258, + "grad_norm": 0.006988401524722576, + "learning_rate": 9.83576272783694e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76514216, + "step": 113530 + }, + { + "epoch": 2.773678938753573, + "grad_norm": 0.00296860933303833, + "learning_rate": 9.834910067461574e-07, + "loss": 0.0008, + "num_input_tokens_seen": 76517672, + "step": 113535 + }, + { + "epoch": 2.77380108958542, + "grad_norm": 0.03899238258600235, + "learning_rate": 9.834057408286782e-07, + "loss": 0.053, + "num_input_tokens_seen": 76521640, + "step": 113540 + }, + { + "epoch": 2.7739232404172673, + "grad_norm": 0.1324678212404251, + "learning_rate": 9.83320475031877e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76525480, + "step": 113545 + }, + { + "epoch": 2.7740453912491145, + "grad_norm": 0.02307271398603916, + "learning_rate": 9.83235209356374e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76529000, + "step": 113550 + }, + { + "epoch": 2.7741675420809617, + "grad_norm": 0.0023544637951999903, + "learning_rate": 9.831499438027888e-07, + "loss": 0.141, + "num_input_tokens_seen": 76532584, + "step": 113555 + }, + { + "epoch": 2.7742896929128085, + "grad_norm": 0.03203720226883888, + "learning_rate": 9.83064678371742e-07, + "loss": 0.0664, + "num_input_tokens_seen": 76535912, + "step": 113560 + }, + { + "epoch": 2.774411843744656, + "grad_norm": 0.5081039071083069, + "learning_rate": 9.82979413063853e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76539560, + "step": 113565 + }, + { + "epoch": 2.774533994576503, + "grad_norm": 0.0042310431599617004, + "learning_rate": 9.828941478797428e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76543144, + "step": 113570 + }, + { + "epoch": 2.7746561454083505, + "grad_norm": 0.03580312058329582, + "learning_rate": 9.828088828200303e-07, + "loss": 0.0469, + "num_input_tokens_seen": 76547368, + "step": 113575 + }, + { + "epoch": 2.7747782962401972, + "grad_norm": 0.06093088537454605, + "learning_rate": 9.827236178853366e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76550632, + "step": 113580 + }, + { + "epoch": 2.7749004470720444, + "grad_norm": 0.08521981537342072, + "learning_rate": 9.826383530762817e-07, + "loss": 0.115, + "num_input_tokens_seen": 76554152, + "step": 113585 + }, + { + "epoch": 2.7750225979038916, + "grad_norm": 0.011083260178565979, + "learning_rate": 9.825530883934847e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76557352, + "step": 113590 + }, + { + "epoch": 2.775144748735739, + "grad_norm": 0.0052310023456811905, + "learning_rate": 9.82467823837567e-07, + "loss": 0.0003, + "num_input_tokens_seen": 76560808, + "step": 113595 + }, + { + "epoch": 2.775266899567586, + "grad_norm": 0.016578521579504013, + "learning_rate": 9.823825594091477e-07, + "loss": 0.0767, + "num_input_tokens_seen": 76564072, + "step": 113600 + }, + { + "epoch": 2.775389050399433, + "grad_norm": 0.04751870781183243, + "learning_rate": 9.822972951088473e-07, + "loss": 0.0003, + "num_input_tokens_seen": 76567144, + "step": 113605 + }, + { + "epoch": 2.7755112012312804, + "grad_norm": 0.05532370135188103, + "learning_rate": 9.822120309372855e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76570472, + "step": 113610 + }, + { + "epoch": 2.7756333520631276, + "grad_norm": 0.03185422718524933, + "learning_rate": 9.821267668950824e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76573672, + "step": 113615 + }, + { + "epoch": 2.7757555028949747, + "grad_norm": 0.07462109625339508, + "learning_rate": 9.820415029828588e-07, + "loss": 0.0003, + "num_input_tokens_seen": 76577000, + "step": 113620 + }, + { + "epoch": 2.775877653726822, + "grad_norm": 296.1712341308594, + "learning_rate": 9.81956239201234e-07, + "loss": 0.078, + "num_input_tokens_seen": 76580584, + "step": 113625 + }, + { + "epoch": 2.775999804558669, + "grad_norm": 0.011432225815951824, + "learning_rate": 9.818709755508284e-07, + "loss": 0.0664, + "num_input_tokens_seen": 76583848, + "step": 113630 + }, + { + "epoch": 2.7761219553905163, + "grad_norm": 0.08162835985422134, + "learning_rate": 9.817857120322615e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76587240, + "step": 113635 + }, + { + "epoch": 2.7762441062223635, + "grad_norm": 0.11095693707466125, + "learning_rate": 9.81700448646154e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76590632, + "step": 113640 + }, + { + "epoch": 2.7763662570542107, + "grad_norm": 155.89930725097656, + "learning_rate": 9.81615185393126e-07, + "loss": 0.058, + "num_input_tokens_seen": 76594536, + "step": 113645 + }, + { + "epoch": 2.776488407886058, + "grad_norm": 0.017484327778220177, + "learning_rate": 9.815299222737972e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76598184, + "step": 113650 + }, + { + "epoch": 2.7766105587179046, + "grad_norm": 0.4224332869052887, + "learning_rate": 9.814446592887878e-07, + "loss": 0.0623, + "num_input_tokens_seen": 76601128, + "step": 113655 + }, + { + "epoch": 2.7767327095497523, + "grad_norm": 0.009731501340866089, + "learning_rate": 9.813593964387177e-07, + "loss": 0.0008, + "num_input_tokens_seen": 76604968, + "step": 113660 + }, + { + "epoch": 2.776854860381599, + "grad_norm": 0.027277182787656784, + "learning_rate": 9.812741337242074e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76608680, + "step": 113665 + }, + { + "epoch": 2.776977011213446, + "grad_norm": 0.004735254216939211, + "learning_rate": 9.811888711458762e-07, + "loss": 0.0472, + "num_input_tokens_seen": 76612072, + "step": 113670 + }, + { + "epoch": 2.7770991620452934, + "grad_norm": 0.03370065987110138, + "learning_rate": 9.811036087043445e-07, + "loss": 0.0003, + "num_input_tokens_seen": 76615592, + "step": 113675 + }, + { + "epoch": 2.7772213128771406, + "grad_norm": 0.3166724145412445, + "learning_rate": 9.81018346400233e-07, + "loss": 0.0837, + "num_input_tokens_seen": 76619176, + "step": 113680 + }, + { + "epoch": 2.7773434637089878, + "grad_norm": 0.03397630155086517, + "learning_rate": 9.809330842341607e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76623080, + "step": 113685 + }, + { + "epoch": 2.777465614540835, + "grad_norm": 0.015391331166028976, + "learning_rate": 9.808478222067487e-07, + "loss": 0.0598, + "num_input_tokens_seen": 76626344, + "step": 113690 + }, + { + "epoch": 2.777587765372682, + "grad_norm": 0.2570863366127014, + "learning_rate": 9.807625603186158e-07, + "loss": 0.0004, + "num_input_tokens_seen": 76629736, + "step": 113695 + }, + { + "epoch": 2.7777099162045293, + "grad_norm": 0.017918601632118225, + "learning_rate": 9.80677298570383e-07, + "loss": 0.0003, + "num_input_tokens_seen": 76632936, + "step": 113700 + }, + { + "epoch": 2.7778320670363765, + "grad_norm": 0.004007379058748484, + "learning_rate": 9.805920369626706e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76636264, + "step": 113705 + }, + { + "epoch": 2.7779542178682237, + "grad_norm": 806.7548217773438, + "learning_rate": 9.805067754960973e-07, + "loss": 0.0168, + "num_input_tokens_seen": 76639336, + "step": 113710 + }, + { + "epoch": 2.778076368700071, + "grad_norm": 0.07259964942932129, + "learning_rate": 9.804215141712848e-07, + "loss": 0.0004, + "num_input_tokens_seen": 76642664, + "step": 113715 + }, + { + "epoch": 2.778198519531918, + "grad_norm": 0.005305212456732988, + "learning_rate": 9.803362529888516e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76645864, + "step": 113720 + }, + { + "epoch": 2.7783206703637653, + "grad_norm": 0.1281394064426422, + "learning_rate": 9.80250991949419e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76649256, + "step": 113725 + }, + { + "epoch": 2.7784428211956125, + "grad_norm": 0.01599547639489174, + "learning_rate": 9.80165731053606e-07, + "loss": 0.1172, + "num_input_tokens_seen": 76652456, + "step": 113730 + }, + { + "epoch": 2.7785649720274597, + "grad_norm": 0.38258928060531616, + "learning_rate": 9.800804703020331e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76656168, + "step": 113735 + }, + { + "epoch": 2.7786871228593064, + "grad_norm": 0.048715341836214066, + "learning_rate": 9.79995209695321e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76660584, + "step": 113740 + }, + { + "epoch": 2.778809273691154, + "grad_norm": 18.242271423339844, + "learning_rate": 9.799099492340885e-07, + "loss": 0.1553, + "num_input_tokens_seen": 76663912, + "step": 113745 + }, + { + "epoch": 2.778931424523001, + "grad_norm": 0.2122068852186203, + "learning_rate": 9.798246889189567e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76667048, + "step": 113750 + }, + { + "epoch": 2.7790535753548484, + "grad_norm": 44.100433349609375, + "learning_rate": 9.79739428750545e-07, + "loss": 0.0978, + "num_input_tokens_seen": 76670568, + "step": 113755 + }, + { + "epoch": 2.779175726186695, + "grad_norm": 0.013350458815693855, + "learning_rate": 9.796541687294738e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76674024, + "step": 113760 + }, + { + "epoch": 2.7792978770185424, + "grad_norm": 0.00019100087229162455, + "learning_rate": 9.795689088563626e-07, + "loss": 0.0366, + "num_input_tokens_seen": 76677288, + "step": 113765 + }, + { + "epoch": 2.7794200278503896, + "grad_norm": 0.0020818603225052357, + "learning_rate": 9.79483649131832e-07, + "loss": 0.0404, + "num_input_tokens_seen": 76681000, + "step": 113770 + }, + { + "epoch": 2.7795421786822367, + "grad_norm": 0.011318969540297985, + "learning_rate": 9.79398389556502e-07, + "loss": 0.0566, + "num_input_tokens_seen": 76684200, + "step": 113775 + }, + { + "epoch": 2.779664329514084, + "grad_norm": 0.10724736005067825, + "learning_rate": 9.79313130130992e-07, + "loss": 0.0003, + "num_input_tokens_seen": 76687272, + "step": 113780 + }, + { + "epoch": 2.779786480345931, + "grad_norm": 0.0018801460973918438, + "learning_rate": 9.79227870855923e-07, + "loss": 0.0536, + "num_input_tokens_seen": 76690280, + "step": 113785 + }, + { + "epoch": 2.7799086311777783, + "grad_norm": 0.11404317617416382, + "learning_rate": 9.79142611731914e-07, + "loss": 0.0336, + "num_input_tokens_seen": 76693736, + "step": 113790 + }, + { + "epoch": 2.7800307820096255, + "grad_norm": 0.45159173011779785, + "learning_rate": 9.790573527595856e-07, + "loss": 0.0004, + "num_input_tokens_seen": 76697064, + "step": 113795 + }, + { + "epoch": 2.7801529328414727, + "grad_norm": 0.07708978652954102, + "learning_rate": 9.789720939395581e-07, + "loss": 0.0004, + "num_input_tokens_seen": 76700264, + "step": 113800 + }, + { + "epoch": 2.78027508367332, + "grad_norm": 0.009074263274669647, + "learning_rate": 9.78886835272451e-07, + "loss": 0.0105, + "num_input_tokens_seen": 76704040, + "step": 113805 + }, + { + "epoch": 2.780397234505167, + "grad_norm": 0.029474390670657158, + "learning_rate": 9.788015767588846e-07, + "loss": 0.0003, + "num_input_tokens_seen": 76707752, + "step": 113810 + }, + { + "epoch": 2.7805193853370143, + "grad_norm": 0.17673642933368683, + "learning_rate": 9.787163183994787e-07, + "loss": 0.0331, + "num_input_tokens_seen": 76711400, + "step": 113815 + }, + { + "epoch": 2.7806415361688614, + "grad_norm": 0.039392951875925064, + "learning_rate": 9.786310601948538e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76715688, + "step": 113820 + }, + { + "epoch": 2.7807636870007086, + "grad_norm": 0.06219044327735901, + "learning_rate": 9.78545802145629e-07, + "loss": 0.0429, + "num_input_tokens_seen": 76719272, + "step": 113825 + }, + { + "epoch": 2.780885837832556, + "grad_norm": 0.03805144503712654, + "learning_rate": 9.784605442524252e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76722728, + "step": 113830 + }, + { + "epoch": 2.7810079886644026, + "grad_norm": 0.05186213180422783, + "learning_rate": 9.783752865158623e-07, + "loss": 0.0003, + "num_input_tokens_seen": 76725480, + "step": 113835 + }, + { + "epoch": 2.78113013949625, + "grad_norm": 0.006760579068213701, + "learning_rate": 9.782900289365597e-07, + "loss": 0.1236, + "num_input_tokens_seen": 76729192, + "step": 113840 + }, + { + "epoch": 2.781252290328097, + "grad_norm": 0.06068001687526703, + "learning_rate": 9.782047715151384e-07, + "loss": 0.0393, + "num_input_tokens_seen": 76732648, + "step": 113845 + }, + { + "epoch": 2.781374441159944, + "grad_norm": 30.574745178222656, + "learning_rate": 9.781195142522175e-07, + "loss": 0.0977, + "num_input_tokens_seen": 76736104, + "step": 113850 + }, + { + "epoch": 2.7814965919917913, + "grad_norm": 0.034847330302000046, + "learning_rate": 9.780342571484174e-07, + "loss": 0.0477, + "num_input_tokens_seen": 76739816, + "step": 113855 + }, + { + "epoch": 2.7816187428236385, + "grad_norm": 0.008041945286095142, + "learning_rate": 9.779490002043584e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76743784, + "step": 113860 + }, + { + "epoch": 2.7817408936554857, + "grad_norm": 0.008885260671377182, + "learning_rate": 9.778637434206595e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76747496, + "step": 113865 + }, + { + "epoch": 2.781863044487333, + "grad_norm": 0.2046172320842743, + "learning_rate": 9.777784867979422e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76751080, + "step": 113870 + }, + { + "epoch": 2.78198519531918, + "grad_norm": 0.033650998026132584, + "learning_rate": 9.776932303368252e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76754408, + "step": 113875 + }, + { + "epoch": 2.7821073461510273, + "grad_norm": 0.011351230554282665, + "learning_rate": 9.776079740379294e-07, + "loss": 0.0373, + "num_input_tokens_seen": 76757672, + "step": 113880 + }, + { + "epoch": 2.7822294969828745, + "grad_norm": 0.11378277838230133, + "learning_rate": 9.77522717901874e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76761448, + "step": 113885 + }, + { + "epoch": 2.7823516478147217, + "grad_norm": 0.06339457631111145, + "learning_rate": 9.774374619292796e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76764904, + "step": 113890 + }, + { + "epoch": 2.782473798646569, + "grad_norm": 0.0020954750943928957, + "learning_rate": 9.773522061207664e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76768104, + "step": 113895 + }, + { + "epoch": 2.782595949478416, + "grad_norm": 0.01680098846554756, + "learning_rate": 9.772669504769534e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76771752, + "step": 113900 + }, + { + "epoch": 2.7827181003102632, + "grad_norm": 0.007107491604983807, + "learning_rate": 9.77181694998462e-07, + "loss": 0.062, + "num_input_tokens_seen": 76775080, + "step": 113905 + }, + { + "epoch": 2.7828402511421104, + "grad_norm": 0.0028211900498718023, + "learning_rate": 9.77096439685911e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76778408, + "step": 113910 + }, + { + "epoch": 2.7829624019739576, + "grad_norm": 0.037216681987047195, + "learning_rate": 9.770111845399209e-07, + "loss": 0.0615, + "num_input_tokens_seen": 76781544, + "step": 113915 + }, + { + "epoch": 2.7830845528058044, + "grad_norm": 0.05089016631245613, + "learning_rate": 9.769259295611117e-07, + "loss": 0.0005, + "num_input_tokens_seen": 76784872, + "step": 113920 + }, + { + "epoch": 2.783206703637652, + "grad_norm": 0.051179852336645126, + "learning_rate": 9.768406747501032e-07, + "loss": 0.0311, + "num_input_tokens_seen": 76788392, + "step": 113925 + }, + { + "epoch": 2.7833288544694987, + "grad_norm": 0.02378770150244236, + "learning_rate": 9.76755420107516e-07, + "loss": 0.0004, + "num_input_tokens_seen": 76791464, + "step": 113930 + }, + { + "epoch": 2.7834510053013464, + "grad_norm": 0.04368620738387108, + "learning_rate": 9.76670165633969e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76794664, + "step": 113935 + }, + { + "epoch": 2.783573156133193, + "grad_norm": 0.03758257254958153, + "learning_rate": 9.765849113300833e-07, + "loss": 0.0498, + "num_input_tokens_seen": 76797416, + "step": 113940 + }, + { + "epoch": 2.7836953069650403, + "grad_norm": 0.10067665576934814, + "learning_rate": 9.76499657196478e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76800680, + "step": 113945 + }, + { + "epoch": 2.7838174577968875, + "grad_norm": 0.0006436361582018435, + "learning_rate": 9.764144032337738e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76804008, + "step": 113950 + }, + { + "epoch": 2.7839396086287347, + "grad_norm": 0.003465403337031603, + "learning_rate": 9.763291494425904e-07, + "loss": 0.0, + "num_input_tokens_seen": 76807336, + "step": 113955 + }, + { + "epoch": 2.784061759460582, + "grad_norm": 0.02004861645400524, + "learning_rate": 9.762438958235479e-07, + "loss": 0.0464, + "num_input_tokens_seen": 76810664, + "step": 113960 + }, + { + "epoch": 2.784183910292429, + "grad_norm": 0.04684687405824661, + "learning_rate": 9.76158642377266e-07, + "loss": 0.0434, + "num_input_tokens_seen": 76813928, + "step": 113965 + }, + { + "epoch": 2.7843060611242763, + "grad_norm": 0.0895547866821289, + "learning_rate": 9.760733891043648e-07, + "loss": 0.0327, + "num_input_tokens_seen": 76816872, + "step": 113970 + }, + { + "epoch": 2.7844282119561234, + "grad_norm": 0.0200139582157135, + "learning_rate": 9.759881360054646e-07, + "loss": 0.0003, + "num_input_tokens_seen": 76819880, + "step": 113975 + }, + { + "epoch": 2.7845503627879706, + "grad_norm": 20.02742576599121, + "learning_rate": 9.75902883081185e-07, + "loss": 0.0751, + "num_input_tokens_seen": 76822888, + "step": 113980 + }, + { + "epoch": 2.784672513619818, + "grad_norm": 0.012123535387217999, + "learning_rate": 9.758176303321458e-07, + "loss": 0.0019, + "num_input_tokens_seen": 76826216, + "step": 113985 + }, + { + "epoch": 2.784794664451665, + "grad_norm": 17.554426193237305, + "learning_rate": 9.757323777589678e-07, + "loss": 0.1272, + "num_input_tokens_seen": 76829608, + "step": 113990 + }, + { + "epoch": 2.784916815283512, + "grad_norm": 0.014771767891943455, + "learning_rate": 9.7564712536227e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76833256, + "step": 113995 + }, + { + "epoch": 2.7850389661153594, + "grad_norm": 0.06855263561010361, + "learning_rate": 9.755618731426735e-07, + "loss": 0.0843, + "num_input_tokens_seen": 76836840, + "step": 114000 + }, + { + "epoch": 2.785161116947206, + "grad_norm": 0.05960407853126526, + "learning_rate": 9.754766211007972e-07, + "loss": 0.0657, + "num_input_tokens_seen": 76839976, + "step": 114005 + }, + { + "epoch": 2.7852832677790538, + "grad_norm": 0.024061383679509163, + "learning_rate": 9.753913692372615e-07, + "loss": 0.049, + "num_input_tokens_seen": 76843112, + "step": 114010 + }, + { + "epoch": 2.7854054186109005, + "grad_norm": 0.060720350593328476, + "learning_rate": 9.753061175526867e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76846824, + "step": 114015 + }, + { + "epoch": 2.785527569442748, + "grad_norm": 0.14546024799346924, + "learning_rate": 9.752208660476919e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76850472, + "step": 114020 + }, + { + "epoch": 2.785649720274595, + "grad_norm": 0.4514792859554291, + "learning_rate": 9.751356147228982e-07, + "loss": 0.0228, + "num_input_tokens_seen": 76854568, + "step": 114025 + }, + { + "epoch": 2.785771871106442, + "grad_norm": 21.797420501708984, + "learning_rate": 9.750503635789246e-07, + "loss": 0.0465, + "num_input_tokens_seen": 76857832, + "step": 114030 + }, + { + "epoch": 2.7858940219382893, + "grad_norm": 46.9689826965332, + "learning_rate": 9.749651126163919e-07, + "loss": 0.0738, + "num_input_tokens_seen": 76861288, + "step": 114035 + }, + { + "epoch": 2.7860161727701365, + "grad_norm": 0.06661438196897507, + "learning_rate": 9.74879861835919e-07, + "loss": 0.0677, + "num_input_tokens_seen": 76864360, + "step": 114040 + }, + { + "epoch": 2.7861383236019837, + "grad_norm": 0.31695249676704407, + "learning_rate": 9.747946112381266e-07, + "loss": 0.0312, + "num_input_tokens_seen": 76867688, + "step": 114045 + }, + { + "epoch": 2.786260474433831, + "grad_norm": 0.10348512977361679, + "learning_rate": 9.747093608236352e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76871272, + "step": 114050 + }, + { + "epoch": 2.786382625265678, + "grad_norm": 0.31157082319259644, + "learning_rate": 9.746241105930634e-07, + "loss": 0.0739, + "num_input_tokens_seen": 76874728, + "step": 114055 + }, + { + "epoch": 2.7865047760975252, + "grad_norm": 0.11700203269720078, + "learning_rate": 9.745388605470324e-07, + "loss": 0.0991, + "num_input_tokens_seen": 76877736, + "step": 114060 + }, + { + "epoch": 2.7866269269293724, + "grad_norm": 0.0033500140998512506, + "learning_rate": 9.744536106861615e-07, + "loss": 0.0004, + "num_input_tokens_seen": 76880936, + "step": 114065 + }, + { + "epoch": 2.7867490777612196, + "grad_norm": 37.0404052734375, + "learning_rate": 9.74368361011071e-07, + "loss": 0.0363, + "num_input_tokens_seen": 76884840, + "step": 114070 + }, + { + "epoch": 2.786871228593067, + "grad_norm": 0.07285968214273453, + "learning_rate": 9.742831115223802e-07, + "loss": 0.0005, + "num_input_tokens_seen": 76888104, + "step": 114075 + }, + { + "epoch": 2.786993379424914, + "grad_norm": 0.009817846119403839, + "learning_rate": 9.741978622207097e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76891560, + "step": 114080 + }, + { + "epoch": 2.787115530256761, + "grad_norm": 26.087810516357422, + "learning_rate": 9.741126131066796e-07, + "loss": 0.0989, + "num_input_tokens_seen": 76894888, + "step": 114085 + }, + { + "epoch": 2.7872376810886084, + "grad_norm": 0.24341338872909546, + "learning_rate": 9.740273641809092e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76898280, + "step": 114090 + }, + { + "epoch": 2.7873598319204556, + "grad_norm": 0.0460708923637867, + "learning_rate": 9.739421154440192e-07, + "loss": 0.0003, + "num_input_tokens_seen": 76901608, + "step": 114095 + }, + { + "epoch": 2.7874819827523023, + "grad_norm": 0.03550918772816658, + "learning_rate": 9.738568668966286e-07, + "loss": 0.0602, + "num_input_tokens_seen": 76905064, + "step": 114100 + }, + { + "epoch": 2.78760413358415, + "grad_norm": 0.024629903957247734, + "learning_rate": 9.737716185393582e-07, + "loss": 0.0537, + "num_input_tokens_seen": 76908392, + "step": 114105 + }, + { + "epoch": 2.7877262844159967, + "grad_norm": 0.1366603970527649, + "learning_rate": 9.736863703728275e-07, + "loss": 0.0227, + "num_input_tokens_seen": 76911976, + "step": 114110 + }, + { + "epoch": 2.7878484352478443, + "grad_norm": 0.02549423649907112, + "learning_rate": 9.736011223976567e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76915304, + "step": 114115 + }, + { + "epoch": 2.787970586079691, + "grad_norm": 0.07304327934980392, + "learning_rate": 9.735158746144657e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76919016, + "step": 114120 + }, + { + "epoch": 2.7880927369115382, + "grad_norm": 0.07051575928926468, + "learning_rate": 9.734306270238744e-07, + "loss": 0.0003, + "num_input_tokens_seen": 76921896, + "step": 114125 + }, + { + "epoch": 2.7882148877433854, + "grad_norm": 0.0368647538125515, + "learning_rate": 9.733453796265029e-07, + "loss": 0.0425, + "num_input_tokens_seen": 76925224, + "step": 114130 + }, + { + "epoch": 2.7883370385752326, + "grad_norm": 0.2852783799171448, + "learning_rate": 9.732601324229704e-07, + "loss": 0.0003, + "num_input_tokens_seen": 76928808, + "step": 114135 + }, + { + "epoch": 2.78845918940708, + "grad_norm": 0.014169542118906975, + "learning_rate": 9.731748854138977e-07, + "loss": 0.0005, + "num_input_tokens_seen": 76932200, + "step": 114140 + }, + { + "epoch": 2.788581340238927, + "grad_norm": 0.03942190483212471, + "learning_rate": 9.730896385999045e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76935528, + "step": 114145 + }, + { + "epoch": 2.788703491070774, + "grad_norm": 0.01825009100139141, + "learning_rate": 9.730043919816104e-07, + "loss": 0.0325, + "num_input_tokens_seen": 76938536, + "step": 114150 + }, + { + "epoch": 2.7888256419026214, + "grad_norm": 0.017786700278520584, + "learning_rate": 9.72919145559636e-07, + "loss": 0.0001, + "num_input_tokens_seen": 76942248, + "step": 114155 + }, + { + "epoch": 2.7889477927344686, + "grad_norm": 0.05002596229314804, + "learning_rate": 9.728338993346007e-07, + "loss": 0.0003, + "num_input_tokens_seen": 76945448, + "step": 114160 + }, + { + "epoch": 2.7890699435663158, + "grad_norm": 0.05178292095661163, + "learning_rate": 9.727486533071248e-07, + "loss": 0.0501, + "num_input_tokens_seen": 76949160, + "step": 114165 + }, + { + "epoch": 2.789192094398163, + "grad_norm": 0.027847200632095337, + "learning_rate": 9.72663407477828e-07, + "loss": 0.128, + "num_input_tokens_seen": 76952232, + "step": 114170 + }, + { + "epoch": 2.78931424523001, + "grad_norm": 0.3631141781806946, + "learning_rate": 9.7257816184733e-07, + "loss": 0.0003, + "num_input_tokens_seen": 76955368, + "step": 114175 + }, + { + "epoch": 2.7894363960618573, + "grad_norm": 0.014581491239368916, + "learning_rate": 9.724929164162512e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76958568, + "step": 114180 + }, + { + "epoch": 2.789558546893704, + "grad_norm": 0.004223099909722805, + "learning_rate": 9.72407671185211e-07, + "loss": 0.0391, + "num_input_tokens_seen": 76962344, + "step": 114185 + }, + { + "epoch": 2.7896806977255517, + "grad_norm": 0.13153673708438873, + "learning_rate": 9.7232242615483e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76965864, + "step": 114190 + }, + { + "epoch": 2.7898028485573985, + "grad_norm": 0.011691474355757236, + "learning_rate": 9.722371813257274e-07, + "loss": 0.1431, + "num_input_tokens_seen": 76969064, + "step": 114195 + }, + { + "epoch": 2.789924999389246, + "grad_norm": 0.09665603190660477, + "learning_rate": 9.721519366985234e-07, + "loss": 0.0559, + "num_input_tokens_seen": 76972840, + "step": 114200 + }, + { + "epoch": 2.790047150221093, + "grad_norm": 14.975530624389648, + "learning_rate": 9.720666922738386e-07, + "loss": 0.0503, + "num_input_tokens_seen": 76975976, + "step": 114205 + }, + { + "epoch": 2.79016930105294, + "grad_norm": 0.27885282039642334, + "learning_rate": 9.719814480522918e-07, + "loss": 0.0392, + "num_input_tokens_seen": 76979496, + "step": 114210 + }, + { + "epoch": 2.790291451884787, + "grad_norm": 0.14521782100200653, + "learning_rate": 9.718962040345038e-07, + "loss": 0.0003, + "num_input_tokens_seen": 76982952, + "step": 114215 + }, + { + "epoch": 2.7904136027166344, + "grad_norm": 0.1501213163137436, + "learning_rate": 9.718109602210941e-07, + "loss": 0.0005, + "num_input_tokens_seen": 76986408, + "step": 114220 + }, + { + "epoch": 2.7905357535484816, + "grad_norm": 0.28133001923561096, + "learning_rate": 9.717257166126827e-07, + "loss": 0.0251, + "num_input_tokens_seen": 76989480, + "step": 114225 + }, + { + "epoch": 2.790657904380329, + "grad_norm": 0.08251863718032837, + "learning_rate": 9.716404732098894e-07, + "loss": 0.1115, + "num_input_tokens_seen": 76993064, + "step": 114230 + }, + { + "epoch": 2.790780055212176, + "grad_norm": 0.23545780777931213, + "learning_rate": 9.71555230013334e-07, + "loss": 0.0002, + "num_input_tokens_seen": 76996264, + "step": 114235 + }, + { + "epoch": 2.790902206044023, + "grad_norm": 0.00977549608796835, + "learning_rate": 9.71469987023637e-07, + "loss": 0.0004, + "num_input_tokens_seen": 77000040, + "step": 114240 + }, + { + "epoch": 2.7910243568758704, + "grad_norm": 0.21447953581809998, + "learning_rate": 9.713847442414174e-07, + "loss": 0.0002, + "num_input_tokens_seen": 77003496, + "step": 114245 + }, + { + "epoch": 2.7911465077077176, + "grad_norm": 0.13338632881641388, + "learning_rate": 9.712995016672963e-07, + "loss": 0.106, + "num_input_tokens_seen": 77006952, + "step": 114250 + }, + { + "epoch": 2.7912686585395647, + "grad_norm": 0.01698482036590576, + "learning_rate": 9.712142593018926e-07, + "loss": 0.0004, + "num_input_tokens_seen": 77010536, + "step": 114255 + }, + { + "epoch": 2.791390809371412, + "grad_norm": 42.586021423339844, + "learning_rate": 9.711290171458265e-07, + "loss": 0.1619, + "num_input_tokens_seen": 77014248, + "step": 114260 + }, + { + "epoch": 2.791512960203259, + "grad_norm": 0.04516065493226051, + "learning_rate": 9.71043775199718e-07, + "loss": 0.0399, + "num_input_tokens_seen": 77017896, + "step": 114265 + }, + { + "epoch": 2.7916351110351063, + "grad_norm": 0.025962043553590775, + "learning_rate": 9.70958533464187e-07, + "loss": 0.0002, + "num_input_tokens_seen": 77021416, + "step": 114270 + }, + { + "epoch": 2.7917572618669535, + "grad_norm": 0.014167881570756435, + "learning_rate": 9.708732919398534e-07, + "loss": 0.0003, + "num_input_tokens_seen": 77025064, + "step": 114275 + }, + { + "epoch": 2.7918794126988002, + "grad_norm": 0.010760514996945858, + "learning_rate": 9.707880506273369e-07, + "loss": 0.0005, + "num_input_tokens_seen": 77028520, + "step": 114280 + }, + { + "epoch": 2.792001563530648, + "grad_norm": 0.07058648020029068, + "learning_rate": 9.70702809527258e-07, + "loss": 0.0312, + "num_input_tokens_seen": 77031784, + "step": 114285 + }, + { + "epoch": 2.7921237143624946, + "grad_norm": 0.005182285327464342, + "learning_rate": 9.706175686402354e-07, + "loss": 0.0003, + "num_input_tokens_seen": 77034792, + "step": 114290 + }, + { + "epoch": 2.792245865194342, + "grad_norm": 0.00801653228700161, + "learning_rate": 9.7053232796689e-07, + "loss": 0.0019, + "num_input_tokens_seen": 77037992, + "step": 114295 + }, + { + "epoch": 2.792368016026189, + "grad_norm": 0.027459247037768364, + "learning_rate": 9.704470875078419e-07, + "loss": 0.0002, + "num_input_tokens_seen": 77041320, + "step": 114300 + }, + { + "epoch": 2.792490166858036, + "grad_norm": 35.760154724121094, + "learning_rate": 9.7036184726371e-07, + "loss": 0.0492, + "num_input_tokens_seen": 77044712, + "step": 114305 + }, + { + "epoch": 2.7926123176898834, + "grad_norm": 0.10340413451194763, + "learning_rate": 9.70276607235115e-07, + "loss": 0.001, + "num_input_tokens_seen": 77047720, + "step": 114310 + }, + { + "epoch": 2.7927344685217306, + "grad_norm": 0.06875290721654892, + "learning_rate": 9.701913674226764e-07, + "loss": 0.0556, + "num_input_tokens_seen": 77051048, + "step": 114315 + }, + { + "epoch": 2.7928566193535778, + "grad_norm": 0.0017848052084445953, + "learning_rate": 9.701061278270143e-07, + "loss": 0.0003, + "num_input_tokens_seen": 77054504, + "step": 114320 + }, + { + "epoch": 2.792978770185425, + "grad_norm": 0.037753038108348846, + "learning_rate": 9.700208884487485e-07, + "loss": 0.0002, + "num_input_tokens_seen": 77057704, + "step": 114325 + }, + { + "epoch": 2.793100921017272, + "grad_norm": 746.1931762695312, + "learning_rate": 9.699356492884986e-07, + "loss": 0.005, + "num_input_tokens_seen": 77061096, + "step": 114330 + }, + { + "epoch": 2.7932230718491193, + "grad_norm": 0.17954334616661072, + "learning_rate": 9.698504103468851e-07, + "loss": 0.0504, + "num_input_tokens_seen": 77064424, + "step": 114335 + }, + { + "epoch": 2.7933452226809665, + "grad_norm": 16.79789924621582, + "learning_rate": 9.697651716245271e-07, + "loss": 0.0347, + "num_input_tokens_seen": 77067688, + "step": 114340 + }, + { + "epoch": 2.7934673735128137, + "grad_norm": 0.002543478272855282, + "learning_rate": 9.696799331220453e-07, + "loss": 0.0002, + "num_input_tokens_seen": 77071016, + "step": 114345 + }, + { + "epoch": 2.793589524344661, + "grad_norm": 0.00716570857912302, + "learning_rate": 9.69594694840059e-07, + "loss": 0.0, + "num_input_tokens_seen": 77074408, + "step": 114350 + }, + { + "epoch": 2.793711675176508, + "grad_norm": 0.004741067066788673, + "learning_rate": 9.69509456779188e-07, + "loss": 0.0513, + "num_input_tokens_seen": 77077800, + "step": 114355 + }, + { + "epoch": 2.7938338260083553, + "grad_norm": 0.0017748570535331964, + "learning_rate": 9.694242189400528e-07, + "loss": 0.0007, + "num_input_tokens_seen": 77081128, + "step": 114360 + }, + { + "epoch": 2.793955976840202, + "grad_norm": 0.013606770895421505, + "learning_rate": 9.693389813232727e-07, + "loss": 0.0006, + "num_input_tokens_seen": 77084328, + "step": 114365 + }, + { + "epoch": 2.7940781276720497, + "grad_norm": 0.0782063752412796, + "learning_rate": 9.69253743929468e-07, + "loss": 0.0002, + "num_input_tokens_seen": 77087080, + "step": 114370 + }, + { + "epoch": 2.7942002785038964, + "grad_norm": 0.0396018847823143, + "learning_rate": 9.691685067592584e-07, + "loss": 0.0002, + "num_input_tokens_seen": 77090344, + "step": 114375 + }, + { + "epoch": 2.794322429335744, + "grad_norm": 0.0037475209683179855, + "learning_rate": 9.690832698132636e-07, + "loss": 0.0001, + "num_input_tokens_seen": 77093864, + "step": 114380 + }, + { + "epoch": 2.794444580167591, + "grad_norm": 0.4666299819946289, + "learning_rate": 9.689980330921035e-07, + "loss": 0.0002, + "num_input_tokens_seen": 77097384, + "step": 114385 + }, + { + "epoch": 2.794566730999438, + "grad_norm": 0.09782411903142929, + "learning_rate": 9.689127965963978e-07, + "loss": 0.0001, + "num_input_tokens_seen": 77100904, + "step": 114390 + }, + { + "epoch": 2.794688881831285, + "grad_norm": 0.005147072486579418, + "learning_rate": 9.68827560326767e-07, + "loss": 0.1179, + "num_input_tokens_seen": 77104168, + "step": 114395 + }, + { + "epoch": 2.7948110326631324, + "grad_norm": 0.2189302146434784, + "learning_rate": 9.687423242838303e-07, + "loss": 0.0004, + "num_input_tokens_seen": 77107304, + "step": 114400 + }, + { + "epoch": 2.7949331834949795, + "grad_norm": 0.0006944366032257676, + "learning_rate": 9.686570884682082e-07, + "loss": 0.0501, + "num_input_tokens_seen": 77110312, + "step": 114405 + }, + { + "epoch": 2.7950553343268267, + "grad_norm": 0.012314150109887123, + "learning_rate": 9.685718528805199e-07, + "loss": 0.0001, + "num_input_tokens_seen": 77113896, + "step": 114410 + }, + { + "epoch": 2.795177485158674, + "grad_norm": 0.013810068368911743, + "learning_rate": 9.684866175213856e-07, + "loss": 0.0002, + "num_input_tokens_seen": 77117352, + "step": 114415 + }, + { + "epoch": 2.795299635990521, + "grad_norm": 0.056724026799201965, + "learning_rate": 9.68401382391425e-07, + "loss": 0.0001, + "num_input_tokens_seen": 77120744, + "step": 114420 + }, + { + "epoch": 2.7954217868223683, + "grad_norm": 0.0070975953713059425, + "learning_rate": 9.68316147491258e-07, + "loss": 0.0006, + "num_input_tokens_seen": 77124200, + "step": 114425 + }, + { + "epoch": 2.7955439376542155, + "grad_norm": 0.0037659115623682737, + "learning_rate": 9.682309128215047e-07, + "loss": 0.1171, + "num_input_tokens_seen": 77127720, + "step": 114430 + }, + { + "epoch": 2.7956660884860627, + "grad_norm": 0.018607452511787415, + "learning_rate": 9.681456783827848e-07, + "loss": 0.0003, + "num_input_tokens_seen": 77131048, + "step": 114435 + }, + { + "epoch": 2.79578823931791, + "grad_norm": 0.010145665146410465, + "learning_rate": 9.68060444175718e-07, + "loss": 0.0001, + "num_input_tokens_seen": 77134760, + "step": 114440 + }, + { + "epoch": 2.795910390149757, + "grad_norm": 0.027114300057291985, + "learning_rate": 9.67975210200924e-07, + "loss": 0.0002, + "num_input_tokens_seen": 77138088, + "step": 114445 + }, + { + "epoch": 2.796032540981604, + "grad_norm": 0.002864605514332652, + "learning_rate": 9.67889976459023e-07, + "loss": 0.066, + "num_input_tokens_seen": 77141352, + "step": 114450 + }, + { + "epoch": 2.7961546918134514, + "grad_norm": 0.23206019401550293, + "learning_rate": 9.678047429506352e-07, + "loss": 0.0845, + "num_input_tokens_seen": 77145064, + "step": 114455 + }, + { + "epoch": 2.796276842645298, + "grad_norm": 6.529763049911708e-05, + "learning_rate": 9.677195096763791e-07, + "loss": 0.0015, + "num_input_tokens_seen": 77148072, + "step": 114460 + }, + { + "epoch": 2.796398993477146, + "grad_norm": 2.753253221511841, + "learning_rate": 9.676342766368763e-07, + "loss": 0.0003, + "num_input_tokens_seen": 77151272, + "step": 114465 + }, + { + "epoch": 2.7965211443089926, + "grad_norm": 0.004751298110932112, + "learning_rate": 9.67549043832745e-07, + "loss": 0.0525, + "num_input_tokens_seen": 77154664, + "step": 114470 + }, + { + "epoch": 2.7966432951408398, + "grad_norm": 33.22373962402344, + "learning_rate": 9.674638112646065e-07, + "loss": 0.1032, + "num_input_tokens_seen": 77157992, + "step": 114475 + }, + { + "epoch": 2.796765445972687, + "grad_norm": 3.374569892883301, + "learning_rate": 9.673785789330795e-07, + "loss": 0.0004, + "num_input_tokens_seen": 77161640, + "step": 114480 + }, + { + "epoch": 2.796887596804534, + "grad_norm": 0.013949088752269745, + "learning_rate": 9.67293346838784e-07, + "loss": 0.0006, + "num_input_tokens_seen": 77164968, + "step": 114485 + }, + { + "epoch": 2.7970097476363813, + "grad_norm": 0.10943493992090225, + "learning_rate": 9.672081149823406e-07, + "loss": 0.0677, + "num_input_tokens_seen": 77168488, + "step": 114490 + }, + { + "epoch": 2.7971318984682285, + "grad_norm": 0.015567791648209095, + "learning_rate": 9.671228833643683e-07, + "loss": 0.0006, + "num_input_tokens_seen": 77171816, + "step": 114495 + }, + { + "epoch": 2.7972540493000757, + "grad_norm": 0.01357315480709076, + "learning_rate": 9.670376519854874e-07, + "loss": 0.0961, + "num_input_tokens_seen": 77176040, + "step": 114500 + }, + { + "epoch": 2.797376200131923, + "grad_norm": 0.015812013298273087, + "learning_rate": 9.669524208463172e-07, + "loss": 0.0001, + "num_input_tokens_seen": 77179624, + "step": 114505 + }, + { + "epoch": 2.79749835096377, + "grad_norm": 31.484952926635742, + "learning_rate": 9.66867189947478e-07, + "loss": 0.0786, + "num_input_tokens_seen": 77183208, + "step": 114510 + }, + { + "epoch": 2.7976205017956173, + "grad_norm": 18.537179946899414, + "learning_rate": 9.667819592895899e-07, + "loss": 0.1302, + "num_input_tokens_seen": 77186408, + "step": 114515 + }, + { + "epoch": 2.7977426526274645, + "grad_norm": 0.026167120784521103, + "learning_rate": 9.666967288732719e-07, + "loss": 0.0002, + "num_input_tokens_seen": 77189992, + "step": 114520 + }, + { + "epoch": 2.7978648034593117, + "grad_norm": 37.67267608642578, + "learning_rate": 9.666114986991446e-07, + "loss": 0.0535, + "num_input_tokens_seen": 77193064, + "step": 114525 + }, + { + "epoch": 2.797986954291159, + "grad_norm": 0.6203153729438782, + "learning_rate": 9.665262687678273e-07, + "loss": 0.0011, + "num_input_tokens_seen": 77196200, + "step": 114530 + }, + { + "epoch": 2.798109105123006, + "grad_norm": 0.01730647124350071, + "learning_rate": 9.6644103907994e-07, + "loss": 0.0468, + "num_input_tokens_seen": 77199464, + "step": 114535 + }, + { + "epoch": 2.7982312559548532, + "grad_norm": 0.03615279495716095, + "learning_rate": 9.663558096361023e-07, + "loss": 0.0001, + "num_input_tokens_seen": 77202792, + "step": 114540 + }, + { + "epoch": 2.7983534067867, + "grad_norm": 0.05028081312775612, + "learning_rate": 9.662705804369343e-07, + "loss": 0.1592, + "num_input_tokens_seen": 77206248, + "step": 114545 + }, + { + "epoch": 2.7984755576185476, + "grad_norm": 84.04562377929688, + "learning_rate": 9.66185351483056e-07, + "loss": 0.0375, + "num_input_tokens_seen": 77209512, + "step": 114550 + }, + { + "epoch": 2.7985977084503944, + "grad_norm": 24.186880111694336, + "learning_rate": 9.661001227750864e-07, + "loss": 0.0478, + "num_input_tokens_seen": 77212648, + "step": 114555 + }, + { + "epoch": 2.798719859282242, + "grad_norm": 8.545869827270508, + "learning_rate": 9.660148943136465e-07, + "loss": 0.1093, + "num_input_tokens_seen": 77215784, + "step": 114560 + }, + { + "epoch": 2.7988420101140887, + "grad_norm": 0.02828356996178627, + "learning_rate": 9.659296660993548e-07, + "loss": 0.0492, + "num_input_tokens_seen": 77219752, + "step": 114565 + }, + { + "epoch": 2.798964160945936, + "grad_norm": 0.0055162059143185616, + "learning_rate": 9.65844438132832e-07, + "loss": 0.0809, + "num_input_tokens_seen": 77223208, + "step": 114570 + }, + { + "epoch": 2.799086311777783, + "grad_norm": 0.015754438936710358, + "learning_rate": 9.657592104146976e-07, + "loss": 0.045, + "num_input_tokens_seen": 77226216, + "step": 114575 + }, + { + "epoch": 2.7992084626096303, + "grad_norm": 0.01425847876816988, + "learning_rate": 9.656739829455712e-07, + "loss": 0.0002, + "num_input_tokens_seen": 77229928, + "step": 114580 + }, + { + "epoch": 2.7993306134414775, + "grad_norm": 0.19158978760242462, + "learning_rate": 9.655887557260731e-07, + "loss": 0.0017, + "num_input_tokens_seen": 77233384, + "step": 114585 + }, + { + "epoch": 2.7994527642733247, + "grad_norm": 0.010324357077479362, + "learning_rate": 9.655035287568229e-07, + "loss": 0.0002, + "num_input_tokens_seen": 77237288, + "step": 114590 + }, + { + "epoch": 2.799574915105172, + "grad_norm": 0.6244637370109558, + "learning_rate": 9.654183020384405e-07, + "loss": 0.0004, + "num_input_tokens_seen": 77240552, + "step": 114595 + }, + { + "epoch": 2.799697065937019, + "grad_norm": 0.024087099358439445, + "learning_rate": 9.65333075571545e-07, + "loss": 0.0413, + "num_input_tokens_seen": 77243688, + "step": 114600 + }, + { + "epoch": 2.7998192167688662, + "grad_norm": 39.12160873413086, + "learning_rate": 9.652478493567566e-07, + "loss": 0.1785, + "num_input_tokens_seen": 77246824, + "step": 114605 + }, + { + "epoch": 2.7999413676007134, + "grad_norm": 24.444210052490234, + "learning_rate": 9.651626233946959e-07, + "loss": 0.0537, + "num_input_tokens_seen": 77249896, + "step": 114610 + }, + { + "epoch": 2.8000635184325606, + "grad_norm": 0.05592348426580429, + "learning_rate": 9.650773976859812e-07, + "loss": 0.0716, + "num_input_tokens_seen": 77252968, + "step": 114615 + }, + { + "epoch": 2.800185669264408, + "grad_norm": 0.1441681683063507, + "learning_rate": 9.649921722312337e-07, + "loss": 0.0002, + "num_input_tokens_seen": 77256552, + "step": 114620 + }, + { + "epoch": 2.800307820096255, + "grad_norm": 0.015336308628320694, + "learning_rate": 9.64906947031072e-07, + "loss": 0.0003, + "num_input_tokens_seen": 77259944, + "step": 114625 + }, + { + "epoch": 2.8004299709281018, + "grad_norm": 0.12260361760854721, + "learning_rate": 9.64821722086117e-07, + "loss": 0.0004, + "num_input_tokens_seen": 77263080, + "step": 114630 + }, + { + "epoch": 2.8005521217599494, + "grad_norm": 0.08802598714828491, + "learning_rate": 9.647364973969876e-07, + "loss": 0.0006, + "num_input_tokens_seen": 77266344, + "step": 114635 + }, + { + "epoch": 2.800674272591796, + "grad_norm": 0.02480044774711132, + "learning_rate": 9.646512729643037e-07, + "loss": 0.0404, + "num_input_tokens_seen": 77269800, + "step": 114640 + }, + { + "epoch": 2.8007964234236438, + "grad_norm": 0.0269177183508873, + "learning_rate": 9.645660487886856e-07, + "loss": 0.0002, + "num_input_tokens_seen": 77273128, + "step": 114645 + }, + { + "epoch": 2.8009185742554905, + "grad_norm": 0.086511991918087, + "learning_rate": 9.644808248707523e-07, + "loss": 0.0002, + "num_input_tokens_seen": 77276328, + "step": 114650 + }, + { + "epoch": 2.8010407250873377, + "grad_norm": 0.13102589547634125, + "learning_rate": 9.643956012111247e-07, + "loss": 0.0009, + "num_input_tokens_seen": 77279976, + "step": 114655 + }, + { + "epoch": 2.801162875919185, + "grad_norm": 0.38331305980682373, + "learning_rate": 9.64310377810421e-07, + "loss": 0.0442, + "num_input_tokens_seen": 77283176, + "step": 114660 + }, + { + "epoch": 2.801285026751032, + "grad_norm": 0.055746957659721375, + "learning_rate": 9.642251546692621e-07, + "loss": 0.0006, + "num_input_tokens_seen": 77286312, + "step": 114665 + }, + { + "epoch": 2.8014071775828793, + "grad_norm": 29.84088897705078, + "learning_rate": 9.641399317882678e-07, + "loss": 0.0478, + "num_input_tokens_seen": 77289704, + "step": 114670 + }, + { + "epoch": 2.8015293284147265, + "grad_norm": 0.005420892499387264, + "learning_rate": 9.640547091680572e-07, + "loss": 0.0001, + "num_input_tokens_seen": 77293416, + "step": 114675 + }, + { + "epoch": 2.8016514792465737, + "grad_norm": 0.006929678376764059, + "learning_rate": 9.639694868092509e-07, + "loss": 0.0008, + "num_input_tokens_seen": 77297128, + "step": 114680 + }, + { + "epoch": 2.801773630078421, + "grad_norm": 0.06219131126999855, + "learning_rate": 9.638842647124679e-07, + "loss": 0.137, + "num_input_tokens_seen": 77300904, + "step": 114685 + }, + { + "epoch": 2.801895780910268, + "grad_norm": 0.08001910150051117, + "learning_rate": 9.637990428783282e-07, + "loss": 0.0002, + "num_input_tokens_seen": 77304680, + "step": 114690 + }, + { + "epoch": 2.802017931742115, + "grad_norm": 0.016454750671982765, + "learning_rate": 9.637138213074516e-07, + "loss": 0.0002, + "num_input_tokens_seen": 77307816, + "step": 114695 + }, + { + "epoch": 2.8021400825739624, + "grad_norm": 0.04410364478826523, + "learning_rate": 9.636286000004578e-07, + "loss": 0.0464, + "num_input_tokens_seen": 77311016, + "step": 114700 + }, + { + "epoch": 2.8022622334058096, + "grad_norm": 0.5116713643074036, + "learning_rate": 9.63543378957967e-07, + "loss": 0.0951, + "num_input_tokens_seen": 77314152, + "step": 114705 + }, + { + "epoch": 2.802384384237657, + "grad_norm": 19.533918380737305, + "learning_rate": 9.63458158180598e-07, + "loss": 0.0961, + "num_input_tokens_seen": 77318184, + "step": 114710 + }, + { + "epoch": 2.802506535069504, + "grad_norm": 0.10283757001161575, + "learning_rate": 9.633729376689715e-07, + "loss": 0.0001, + "num_input_tokens_seen": 77321448, + "step": 114715 + }, + { + "epoch": 2.802628685901351, + "grad_norm": 0.09375852346420288, + "learning_rate": 9.632877174237066e-07, + "loss": 0.0527, + "num_input_tokens_seen": 77324584, + "step": 114720 + }, + { + "epoch": 2.802750836733198, + "grad_norm": 0.022160667926073074, + "learning_rate": 9.632024974454233e-07, + "loss": 0.0005, + "num_input_tokens_seen": 77328680, + "step": 114725 + }, + { + "epoch": 2.8028729875650455, + "grad_norm": 42.232276916503906, + "learning_rate": 9.631172777347414e-07, + "loss": 0.0593, + "num_input_tokens_seen": 77331816, + "step": 114730 + }, + { + "epoch": 2.8029951383968923, + "grad_norm": 0.039244040846824646, + "learning_rate": 9.630320582922805e-07, + "loss": 0.139, + "num_input_tokens_seen": 77334888, + "step": 114735 + }, + { + "epoch": 2.8031172892287395, + "grad_norm": 36.34895706176758, + "learning_rate": 9.629468391186605e-07, + "loss": 0.0428, + "num_input_tokens_seen": 77340328, + "step": 114740 + }, + { + "epoch": 2.8032394400605867, + "grad_norm": 0.02295946702361107, + "learning_rate": 9.628616202145012e-07, + "loss": 0.0409, + "num_input_tokens_seen": 77343464, + "step": 114745 + }, + { + "epoch": 2.803361590892434, + "grad_norm": 0.11297443509101868, + "learning_rate": 9.627764015804223e-07, + "loss": 0.0007, + "num_input_tokens_seen": 77346856, + "step": 114750 + }, + { + "epoch": 2.803483741724281, + "grad_norm": 25.889450073242188, + "learning_rate": 9.62691183217043e-07, + "loss": 0.0529, + "num_input_tokens_seen": 77350376, + "step": 114755 + }, + { + "epoch": 2.8036058925561282, + "grad_norm": 0.05398506671190262, + "learning_rate": 9.626059651249834e-07, + "loss": 0.0002, + "num_input_tokens_seen": 77353768, + "step": 114760 + }, + { + "epoch": 2.8037280433879754, + "grad_norm": 0.10572467744350433, + "learning_rate": 9.625207473048638e-07, + "loss": 0.0332, + "num_input_tokens_seen": 77357224, + "step": 114765 + }, + { + "epoch": 2.8038501942198226, + "grad_norm": 0.03468018025159836, + "learning_rate": 9.624355297573028e-07, + "loss": 0.0003, + "num_input_tokens_seen": 77360616, + "step": 114770 + }, + { + "epoch": 2.80397234505167, + "grad_norm": 0.0012982026673853397, + "learning_rate": 9.623503124829213e-07, + "loss": 0.0404, + "num_input_tokens_seen": 77363816, + "step": 114775 + }, + { + "epoch": 2.804094495883517, + "grad_norm": 0.03607716038823128, + "learning_rate": 9.622650954823378e-07, + "loss": 0.0002, + "num_input_tokens_seen": 77367080, + "step": 114780 + }, + { + "epoch": 2.804216646715364, + "grad_norm": 0.006853197701275349, + "learning_rate": 9.621798787561736e-07, + "loss": 0.0003, + "num_input_tokens_seen": 77370280, + "step": 114785 + }, + { + "epoch": 2.8043387975472114, + "grad_norm": 0.03181392699480057, + "learning_rate": 9.620946623050468e-07, + "loss": 0.0004, + "num_input_tokens_seen": 77374632, + "step": 114790 + }, + { + "epoch": 2.8044609483790586, + "grad_norm": 0.04052650183439255, + "learning_rate": 9.620094461295779e-07, + "loss": 0.0841, + "num_input_tokens_seen": 77378216, + "step": 114795 + }, + { + "epoch": 2.8045830992109058, + "grad_norm": 0.0003261482634115964, + "learning_rate": 9.619242302303867e-07, + "loss": 0.0001, + "num_input_tokens_seen": 77382184, + "step": 114800 + }, + { + "epoch": 2.804705250042753, + "grad_norm": 0.21794497966766357, + "learning_rate": 9.618390146080925e-07, + "loss": 0.0003, + "num_input_tokens_seen": 77385256, + "step": 114805 + }, + { + "epoch": 2.8048274008745997, + "grad_norm": 0.42617887258529663, + "learning_rate": 9.617537992633155e-07, + "loss": 0.0314, + "num_input_tokens_seen": 77388456, + "step": 114810 + }, + { + "epoch": 2.8049495517064473, + "grad_norm": 0.010697565041482449, + "learning_rate": 9.61668584196675e-07, + "loss": 0.1347, + "num_input_tokens_seen": 77392360, + "step": 114815 + }, + { + "epoch": 2.805071702538294, + "grad_norm": 0.01782727800309658, + "learning_rate": 9.615833694087908e-07, + "loss": 0.0002, + "num_input_tokens_seen": 77395688, + "step": 114820 + }, + { + "epoch": 2.8051938533701417, + "grad_norm": 0.013532194308936596, + "learning_rate": 9.614981549002828e-07, + "loss": 0.0527, + "num_input_tokens_seen": 77398632, + "step": 114825 + }, + { + "epoch": 2.8053160042019885, + "grad_norm": 0.047766704112291336, + "learning_rate": 9.614129406717703e-07, + "loss": 0.0573, + "num_input_tokens_seen": 77401960, + "step": 114830 + }, + { + "epoch": 2.8054381550338356, + "grad_norm": 0.08423038572072983, + "learning_rate": 9.61327726723874e-07, + "loss": 0.053, + "num_input_tokens_seen": 77404968, + "step": 114835 + }, + { + "epoch": 2.805560305865683, + "grad_norm": 0.044354844838380814, + "learning_rate": 9.612425130572124e-07, + "loss": 0.1165, + "num_input_tokens_seen": 77408488, + "step": 114840 + }, + { + "epoch": 2.80568245669753, + "grad_norm": 0.06138172373175621, + "learning_rate": 9.611572996724055e-07, + "loss": 0.0006, + "num_input_tokens_seen": 77411688, + "step": 114845 + }, + { + "epoch": 2.805804607529377, + "grad_norm": 0.002905749948695302, + "learning_rate": 9.610720865700735e-07, + "loss": 0.0002, + "num_input_tokens_seen": 77414760, + "step": 114850 + }, + { + "epoch": 2.8059267583612244, + "grad_norm": 0.08945734798908234, + "learning_rate": 9.609868737508353e-07, + "loss": 0.0004, + "num_input_tokens_seen": 77418152, + "step": 114855 + }, + { + "epoch": 2.8060489091930716, + "grad_norm": 0.003969348501414061, + "learning_rate": 9.609016612153115e-07, + "loss": 0.0003, + "num_input_tokens_seen": 77421544, + "step": 114860 + }, + { + "epoch": 2.806171060024919, + "grad_norm": 20.634653091430664, + "learning_rate": 9.60816448964121e-07, + "loss": 0.0805, + "num_input_tokens_seen": 77424808, + "step": 114865 + }, + { + "epoch": 2.806293210856766, + "grad_norm": 0.007369068451225758, + "learning_rate": 9.607312369978842e-07, + "loss": 0.0, + "num_input_tokens_seen": 77428200, + "step": 114870 + }, + { + "epoch": 2.806415361688613, + "grad_norm": 0.09972415119409561, + "learning_rate": 9.606460253172201e-07, + "loss": 0.0002, + "num_input_tokens_seen": 77431656, + "step": 114875 + }, + { + "epoch": 2.8065375125204604, + "grad_norm": 0.01619172841310501, + "learning_rate": 9.60560813922749e-07, + "loss": 0.0337, + "num_input_tokens_seen": 77435112, + "step": 114880 + }, + { + "epoch": 2.8066596633523075, + "grad_norm": 0.8120678067207336, + "learning_rate": 9.604756028150898e-07, + "loss": 0.0006, + "num_input_tokens_seen": 77438312, + "step": 114885 + }, + { + "epoch": 2.8067818141841547, + "grad_norm": 15.800888061523438, + "learning_rate": 9.60390391994863e-07, + "loss": 0.0456, + "num_input_tokens_seen": 77441320, + "step": 114890 + }, + { + "epoch": 2.806903965016002, + "grad_norm": 0.03910161927342415, + "learning_rate": 9.603051814626877e-07, + "loss": 0.0, + "num_input_tokens_seen": 77444968, + "step": 114895 + }, + { + "epoch": 2.807026115847849, + "grad_norm": 0.2639329731464386, + "learning_rate": 9.60219971219184e-07, + "loss": 0.0489, + "num_input_tokens_seen": 77448232, + "step": 114900 + }, + { + "epoch": 2.807148266679696, + "grad_norm": 14.962903022766113, + "learning_rate": 9.601347612649715e-07, + "loss": 0.1049, + "num_input_tokens_seen": 77452072, + "step": 114905 + }, + { + "epoch": 2.8072704175115435, + "grad_norm": 0.07089398801326752, + "learning_rate": 9.600495516006694e-07, + "loss": 0.0002, + "num_input_tokens_seen": 77455912, + "step": 114910 + }, + { + "epoch": 2.8073925683433902, + "grad_norm": 0.042340733110904694, + "learning_rate": 9.599643422268976e-07, + "loss": 0.0718, + "num_input_tokens_seen": 77459304, + "step": 114915 + }, + { + "epoch": 2.8075147191752374, + "grad_norm": 0.6589097380638123, + "learning_rate": 9.598791331442765e-07, + "loss": 0.0013, + "num_input_tokens_seen": 77462696, + "step": 114920 + }, + { + "epoch": 2.8076368700070846, + "grad_norm": 0.4322875440120697, + "learning_rate": 9.597939243534244e-07, + "loss": 0.0004, + "num_input_tokens_seen": 77466472, + "step": 114925 + }, + { + "epoch": 2.807759020838932, + "grad_norm": 44.9621467590332, + "learning_rate": 9.597087158549623e-07, + "loss": 0.0032, + "num_input_tokens_seen": 77469608, + "step": 114930 + }, + { + "epoch": 2.807881171670779, + "grad_norm": 0.038575429469347, + "learning_rate": 9.596235076495088e-07, + "loss": 0.1067, + "num_input_tokens_seen": 77473064, + "step": 114935 + }, + { + "epoch": 2.808003322502626, + "grad_norm": 0.28912949562072754, + "learning_rate": 9.595382997376846e-07, + "loss": 0.0006, + "num_input_tokens_seen": 77476456, + "step": 114940 + }, + { + "epoch": 2.8081254733344734, + "grad_norm": 0.33396586775779724, + "learning_rate": 9.594530921201082e-07, + "loss": 0.0002, + "num_input_tokens_seen": 77479976, + "step": 114945 + }, + { + "epoch": 2.8082476241663206, + "grad_norm": 0.015432288870215416, + "learning_rate": 9.593678847974e-07, + "loss": 0.0001, + "num_input_tokens_seen": 77483240, + "step": 114950 + }, + { + "epoch": 2.8083697749981678, + "grad_norm": 0.8247775435447693, + "learning_rate": 9.592826777701796e-07, + "loss": 0.0007, + "num_input_tokens_seen": 77486568, + "step": 114955 + }, + { + "epoch": 2.808491925830015, + "grad_norm": 0.018848801031708717, + "learning_rate": 9.591974710390663e-07, + "loss": 0.0416, + "num_input_tokens_seen": 77489640, + "step": 114960 + }, + { + "epoch": 2.808614076661862, + "grad_norm": 0.03549596667289734, + "learning_rate": 9.591122646046802e-07, + "loss": 0.0001, + "num_input_tokens_seen": 77493224, + "step": 114965 + }, + { + "epoch": 2.8087362274937093, + "grad_norm": 0.018727822229266167, + "learning_rate": 9.590270584676403e-07, + "loss": 0.0001, + "num_input_tokens_seen": 77497128, + "step": 114970 + }, + { + "epoch": 2.8088583783255565, + "grad_norm": 0.13936327397823334, + "learning_rate": 9.589418526285667e-07, + "loss": 0.1205, + "num_input_tokens_seen": 77500392, + "step": 114975 + }, + { + "epoch": 2.8089805291574037, + "grad_norm": 0.004517362453043461, + "learning_rate": 9.588566470880794e-07, + "loss": 0.116, + "num_input_tokens_seen": 77503528, + "step": 114980 + }, + { + "epoch": 2.809102679989251, + "grad_norm": 16.47195816040039, + "learning_rate": 9.587714418467974e-07, + "loss": 0.1401, + "num_input_tokens_seen": 77506728, + "step": 114985 + }, + { + "epoch": 2.8092248308210976, + "grad_norm": 21.894033432006836, + "learning_rate": 9.586862369053409e-07, + "loss": 0.0006, + "num_input_tokens_seen": 77510248, + "step": 114990 + }, + { + "epoch": 2.8093469816529453, + "grad_norm": 0.08726862072944641, + "learning_rate": 9.586010322643287e-07, + "loss": 0.0479, + "num_input_tokens_seen": 77513704, + "step": 114995 + }, + { + "epoch": 2.809469132484792, + "grad_norm": 0.05452345311641693, + "learning_rate": 9.585158279243812e-07, + "loss": 0.0371, + "num_input_tokens_seen": 77517096, + "step": 115000 + }, + { + "epoch": 2.8095912833166397, + "grad_norm": 0.16248691082000732, + "learning_rate": 9.584306238861178e-07, + "loss": 0.1121, + "num_input_tokens_seen": 77520168, + "step": 115005 + }, + { + "epoch": 2.8097134341484864, + "grad_norm": 0.0710228681564331, + "learning_rate": 9.583454201501576e-07, + "loss": 0.0471, + "num_input_tokens_seen": 77523944, + "step": 115010 + }, + { + "epoch": 2.8098355849803336, + "grad_norm": 0.029321448877453804, + "learning_rate": 9.582602167171215e-07, + "loss": 0.0003, + "num_input_tokens_seen": 77527208, + "step": 115015 + }, + { + "epoch": 2.809957735812181, + "grad_norm": 0.0011495605576783419, + "learning_rate": 9.581750135876275e-07, + "loss": 0.0006, + "num_input_tokens_seen": 77530024, + "step": 115020 + }, + { + "epoch": 2.810079886644028, + "grad_norm": 0.07034885138273239, + "learning_rate": 9.580898107622967e-07, + "loss": 0.0001, + "num_input_tokens_seen": 77533096, + "step": 115025 + }, + { + "epoch": 2.810202037475875, + "grad_norm": 0.002323192311450839, + "learning_rate": 9.580046082417476e-07, + "loss": 0.0003, + "num_input_tokens_seen": 77536360, + "step": 115030 + }, + { + "epoch": 2.8103241883077223, + "grad_norm": 75.02461242675781, + "learning_rate": 9.57919406026601e-07, + "loss": 0.0416, + "num_input_tokens_seen": 77539560, + "step": 115035 + }, + { + "epoch": 2.8104463391395695, + "grad_norm": 0.014927959069609642, + "learning_rate": 9.57834204117475e-07, + "loss": 0.0003, + "num_input_tokens_seen": 77542632, + "step": 115040 + }, + { + "epoch": 2.8105684899714167, + "grad_norm": 178.06178283691406, + "learning_rate": 9.577490025149901e-07, + "loss": 0.0707, + "num_input_tokens_seen": 77546152, + "step": 115045 + }, + { + "epoch": 2.810690640803264, + "grad_norm": 0.056914959102869034, + "learning_rate": 9.576638012197661e-07, + "loss": 0.0003, + "num_input_tokens_seen": 77549544, + "step": 115050 + }, + { + "epoch": 2.810812791635111, + "grad_norm": 0.021990373730659485, + "learning_rate": 9.575786002324225e-07, + "loss": 0.0002, + "num_input_tokens_seen": 77552680, + "step": 115055 + }, + { + "epoch": 2.8109349424669583, + "grad_norm": 0.0051172287203371525, + "learning_rate": 9.574933995535786e-07, + "loss": 0.0019, + "num_input_tokens_seen": 77555880, + "step": 115060 + }, + { + "epoch": 2.8110570932988055, + "grad_norm": 0.018687183037400246, + "learning_rate": 9.57408199183854e-07, + "loss": 0.0742, + "num_input_tokens_seen": 77558952, + "step": 115065 + }, + { + "epoch": 2.8111792441306527, + "grad_norm": 0.04799675941467285, + "learning_rate": 9.57322999123868e-07, + "loss": 0.0004, + "num_input_tokens_seen": 77562600, + "step": 115070 + }, + { + "epoch": 2.8113013949624994, + "grad_norm": 0.05752696096897125, + "learning_rate": 9.572377993742413e-07, + "loss": 0.0002, + "num_input_tokens_seen": 77566312, + "step": 115075 + }, + { + "epoch": 2.811423545794347, + "grad_norm": 1.0827497243881226, + "learning_rate": 9.571525999355926e-07, + "loss": 0.0333, + "num_input_tokens_seen": 77569384, + "step": 115080 + }, + { + "epoch": 2.811545696626194, + "grad_norm": 0.03544053062796593, + "learning_rate": 9.570674008085419e-07, + "loss": 0.0003, + "num_input_tokens_seen": 77572776, + "step": 115085 + }, + { + "epoch": 2.8116678474580414, + "grad_norm": 0.009640112519264221, + "learning_rate": 9.569822019937082e-07, + "loss": 0.0554, + "num_input_tokens_seen": 77576232, + "step": 115090 + }, + { + "epoch": 2.811789998289888, + "grad_norm": 0.0009792305063456297, + "learning_rate": 9.568970034917119e-07, + "loss": 0.0236, + "num_input_tokens_seen": 77579624, + "step": 115095 + }, + { + "epoch": 2.8119121491217354, + "grad_norm": 0.0004921176587231457, + "learning_rate": 9.56811805303172e-07, + "loss": 0.0628, + "num_input_tokens_seen": 77583464, + "step": 115100 + }, + { + "epoch": 2.8120342999535826, + "grad_norm": 0.004929780960083008, + "learning_rate": 9.56726607428708e-07, + "loss": 0.0399, + "num_input_tokens_seen": 77586856, + "step": 115105 + }, + { + "epoch": 2.8121564507854298, + "grad_norm": 0.003183274297043681, + "learning_rate": 9.566414098689404e-07, + "loss": 0.0001, + "num_input_tokens_seen": 77589864, + "step": 115110 + }, + { + "epoch": 2.812278601617277, + "grad_norm": 0.02194182761013508, + "learning_rate": 9.565562126244876e-07, + "loss": 0.0761, + "num_input_tokens_seen": 77592808, + "step": 115115 + }, + { + "epoch": 2.812400752449124, + "grad_norm": 0.2706195116043091, + "learning_rate": 9.5647101569597e-07, + "loss": 0.0002, + "num_input_tokens_seen": 77596200, + "step": 115120 + }, + { + "epoch": 2.8125229032809713, + "grad_norm": 1.5332212448120117, + "learning_rate": 9.563858190840066e-07, + "loss": 0.0723, + "num_input_tokens_seen": 77599528, + "step": 115125 + }, + { + "epoch": 2.8126450541128185, + "grad_norm": 0.036401137709617615, + "learning_rate": 9.563006227892172e-07, + "loss": 0.0001, + "num_input_tokens_seen": 77602920, + "step": 115130 + }, + { + "epoch": 2.8127672049446657, + "grad_norm": 0.013892637565732002, + "learning_rate": 9.562154268122217e-07, + "loss": 0.0001, + "num_input_tokens_seen": 77606632, + "step": 115135 + }, + { + "epoch": 2.812889355776513, + "grad_norm": 291.0292053222656, + "learning_rate": 9.561302311536392e-07, + "loss": 0.0291, + "num_input_tokens_seen": 77610216, + "step": 115140 + }, + { + "epoch": 2.81301150660836, + "grad_norm": 0.0005044717690907419, + "learning_rate": 9.5604503581409e-07, + "loss": 0.0193, + "num_input_tokens_seen": 77613800, + "step": 115145 + }, + { + "epoch": 2.8131336574402073, + "grad_norm": 21.074951171875, + "learning_rate": 9.559598407941925e-07, + "loss": 0.0568, + "num_input_tokens_seen": 77617000, + "step": 115150 + }, + { + "epoch": 2.8132558082720545, + "grad_norm": 0.0009566976805217564, + "learning_rate": 9.558746460945672e-07, + "loss": 0.0002, + "num_input_tokens_seen": 77620072, + "step": 115155 + }, + { + "epoch": 2.8133779591039016, + "grad_norm": 0.14798258244991302, + "learning_rate": 9.557894517158332e-07, + "loss": 0.0001, + "num_input_tokens_seen": 77623208, + "step": 115160 + }, + { + "epoch": 2.813500109935749, + "grad_norm": 0.13334304094314575, + "learning_rate": 9.557042576586101e-07, + "loss": 0.0002, + "num_input_tokens_seen": 77626088, + "step": 115165 + }, + { + "epoch": 2.8136222607675956, + "grad_norm": 0.1850810945034027, + "learning_rate": 9.55619063923518e-07, + "loss": 0.0003, + "num_input_tokens_seen": 77629160, + "step": 115170 + }, + { + "epoch": 2.813744411599443, + "grad_norm": 0.055887024849653244, + "learning_rate": 9.555338705111753e-07, + "loss": 0.0001, + "num_input_tokens_seen": 77632808, + "step": 115175 + }, + { + "epoch": 2.81386656243129, + "grad_norm": 0.2823597192764282, + "learning_rate": 9.55448677422203e-07, + "loss": 0.0002, + "num_input_tokens_seen": 77636712, + "step": 115180 + }, + { + "epoch": 2.8139887132631376, + "grad_norm": 0.00106719764880836, + "learning_rate": 9.55363484657219e-07, + "loss": 0.0386, + "num_input_tokens_seen": 77640104, + "step": 115185 + }, + { + "epoch": 2.8141108640949843, + "grad_norm": 0.01963244192302227, + "learning_rate": 9.552782922168447e-07, + "loss": 0.0113, + "num_input_tokens_seen": 77643304, + "step": 115190 + }, + { + "epoch": 2.8142330149268315, + "grad_norm": 0.006312564015388489, + "learning_rate": 9.55193100101698e-07, + "loss": 0.0599, + "num_input_tokens_seen": 77646632, + "step": 115195 + }, + { + "epoch": 2.8143551657586787, + "grad_norm": 0.20666086673736572, + "learning_rate": 9.551079083123996e-07, + "loss": 0.0457, + "num_input_tokens_seen": 77649768, + "step": 115200 + }, + { + "epoch": 2.814477316590526, + "grad_norm": 0.005621414165943861, + "learning_rate": 9.550227168495683e-07, + "loss": 0.0008, + "num_input_tokens_seen": 77653032, + "step": 115205 + }, + { + "epoch": 2.814599467422373, + "grad_norm": 25.110536575317383, + "learning_rate": 9.54937525713824e-07, + "loss": 0.0429, + "num_input_tokens_seen": 77656296, + "step": 115210 + }, + { + "epoch": 2.8147216182542203, + "grad_norm": 0.28763338923454285, + "learning_rate": 9.548523349057864e-07, + "loss": 0.0993, + "num_input_tokens_seen": 77659496, + "step": 115215 + }, + { + "epoch": 2.8148437690860675, + "grad_norm": 0.013426266610622406, + "learning_rate": 9.54767144426074e-07, + "loss": 0.0003, + "num_input_tokens_seen": 77663016, + "step": 115220 + }, + { + "epoch": 2.8149659199179147, + "grad_norm": 0.06359026581048965, + "learning_rate": 9.546819542753074e-07, + "loss": 0.0411, + "num_input_tokens_seen": 77666920, + "step": 115225 + }, + { + "epoch": 2.815088070749762, + "grad_norm": 0.012988533824682236, + "learning_rate": 9.545967644541063e-07, + "loss": 0.007, + "num_input_tokens_seen": 77670312, + "step": 115230 + }, + { + "epoch": 2.815210221581609, + "grad_norm": 0.5349770188331604, + "learning_rate": 9.545115749630891e-07, + "loss": 0.0466, + "num_input_tokens_seen": 77673832, + "step": 115235 + }, + { + "epoch": 2.8153323724134562, + "grad_norm": 0.37651684880256653, + "learning_rate": 9.544263858028765e-07, + "loss": 0.0004, + "num_input_tokens_seen": 77677032, + "step": 115240 + }, + { + "epoch": 2.8154545232453034, + "grad_norm": 0.04008885845541954, + "learning_rate": 9.54341196974087e-07, + "loss": 0.0046, + "num_input_tokens_seen": 77680296, + "step": 115245 + }, + { + "epoch": 2.8155766740771506, + "grad_norm": 0.0235599298030138, + "learning_rate": 9.542560084773412e-07, + "loss": 0.0458, + "num_input_tokens_seen": 77683880, + "step": 115250 + }, + { + "epoch": 2.8156988249089974, + "grad_norm": 0.04918990284204483, + "learning_rate": 9.541708203132577e-07, + "loss": 0.0001, + "num_input_tokens_seen": 77687080, + "step": 115255 + }, + { + "epoch": 2.815820975740845, + "grad_norm": 0.017839496955275536, + "learning_rate": 9.54085632482456e-07, + "loss": 0.0001, + "num_input_tokens_seen": 77690472, + "step": 115260 + }, + { + "epoch": 2.8159431265726917, + "grad_norm": 0.002251403173431754, + "learning_rate": 9.540004449855565e-07, + "loss": 0.0751, + "num_input_tokens_seen": 77693864, + "step": 115265 + }, + { + "epoch": 2.8160652774045394, + "grad_norm": 0.0022762392181903124, + "learning_rate": 9.539152578231776e-07, + "loss": 0.0454, + "num_input_tokens_seen": 77697192, + "step": 115270 + }, + { + "epoch": 2.816187428236386, + "grad_norm": 0.009227758273482323, + "learning_rate": 9.538300709959398e-07, + "loss": 0.069, + "num_input_tokens_seen": 77700520, + "step": 115275 + }, + { + "epoch": 2.8163095790682333, + "grad_norm": 0.01429454330354929, + "learning_rate": 9.537448845044617e-07, + "loss": 0.0, + "num_input_tokens_seen": 77704040, + "step": 115280 + }, + { + "epoch": 2.8164317299000805, + "grad_norm": 0.027496200054883957, + "learning_rate": 9.536596983493633e-07, + "loss": 0.0002, + "num_input_tokens_seen": 77707880, + "step": 115285 + }, + { + "epoch": 2.8165538807319277, + "grad_norm": 0.0004138645890634507, + "learning_rate": 9.535745125312644e-07, + "loss": 0.035, + "num_input_tokens_seen": 77711208, + "step": 115290 + }, + { + "epoch": 2.816676031563775, + "grad_norm": 0.008841968141496181, + "learning_rate": 9.534893270507837e-07, + "loss": 0.0001, + "num_input_tokens_seen": 77714472, + "step": 115295 + }, + { + "epoch": 2.816798182395622, + "grad_norm": 0.1025681346654892, + "learning_rate": 9.534041419085417e-07, + "loss": 0.0003, + "num_input_tokens_seen": 77717864, + "step": 115300 + }, + { + "epoch": 2.8169203332274693, + "grad_norm": 0.04022670164704323, + "learning_rate": 9.53318957105157e-07, + "loss": 0.0001, + "num_input_tokens_seen": 77721256, + "step": 115305 + }, + { + "epoch": 2.8170424840593165, + "grad_norm": 0.008396613411605358, + "learning_rate": 9.532337726412494e-07, + "loss": 0.0654, + "num_input_tokens_seen": 77724456, + "step": 115310 + }, + { + "epoch": 2.8171646348911636, + "grad_norm": 30.426029205322266, + "learning_rate": 9.531485885174384e-07, + "loss": 0.0492, + "num_input_tokens_seen": 77727912, + "step": 115315 + }, + { + "epoch": 2.817286785723011, + "grad_norm": 0.03145899623632431, + "learning_rate": 9.530634047343432e-07, + "loss": 0.0479, + "num_input_tokens_seen": 77731432, + "step": 115320 + }, + { + "epoch": 2.817408936554858, + "grad_norm": 0.01578490436077118, + "learning_rate": 9.52978221292584e-07, + "loss": 0.0404, + "num_input_tokens_seen": 77734760, + "step": 115325 + }, + { + "epoch": 2.817531087386705, + "grad_norm": 0.021990390494465828, + "learning_rate": 9.528930381927794e-07, + "loss": 0.0004, + "num_input_tokens_seen": 77738216, + "step": 115330 + }, + { + "epoch": 2.8176532382185524, + "grad_norm": 0.0014852866297587752, + "learning_rate": 9.528078554355497e-07, + "loss": 0.1156, + "num_input_tokens_seen": 77742504, + "step": 115335 + }, + { + "epoch": 2.8177753890503996, + "grad_norm": 0.17477023601531982, + "learning_rate": 9.527226730215136e-07, + "loss": 0.0003, + "num_input_tokens_seen": 77746024, + "step": 115340 + }, + { + "epoch": 2.817897539882247, + "grad_norm": 0.00901720579713583, + "learning_rate": 9.526374909512913e-07, + "loss": 0.0, + "num_input_tokens_seen": 77749800, + "step": 115345 + }, + { + "epoch": 2.8180196907140935, + "grad_norm": 0.013932290486991405, + "learning_rate": 9.525523092255015e-07, + "loss": 0.0001, + "num_input_tokens_seen": 77753192, + "step": 115350 + }, + { + "epoch": 2.818141841545941, + "grad_norm": 0.016308395192027092, + "learning_rate": 9.524671278447642e-07, + "loss": 0.0001, + "num_input_tokens_seen": 77757160, + "step": 115355 + }, + { + "epoch": 2.818263992377788, + "grad_norm": 0.06437277048826218, + "learning_rate": 9.523819468096988e-07, + "loss": 0.0004, + "num_input_tokens_seen": 77760744, + "step": 115360 + }, + { + "epoch": 2.818386143209635, + "grad_norm": 0.012472563423216343, + "learning_rate": 9.522967661209249e-07, + "loss": 0.0002, + "num_input_tokens_seen": 77764200, + "step": 115365 + }, + { + "epoch": 2.8185082940414823, + "grad_norm": 0.009374906308948994, + "learning_rate": 9.522115857790616e-07, + "loss": 0.036, + "num_input_tokens_seen": 77767656, + "step": 115370 + }, + { + "epoch": 2.8186304448733295, + "grad_norm": 0.048512861132621765, + "learning_rate": 9.521264057847283e-07, + "loss": 0.0542, + "num_input_tokens_seen": 77770600, + "step": 115375 + }, + { + "epoch": 2.8187525957051767, + "grad_norm": 23.211782455444336, + "learning_rate": 9.520412261385445e-07, + "loss": 0.0633, + "num_input_tokens_seen": 77773736, + "step": 115380 + }, + { + "epoch": 2.818874746537024, + "grad_norm": 0.025888273492455482, + "learning_rate": 9.519560468411304e-07, + "loss": 0.0526, + "num_input_tokens_seen": 77776872, + "step": 115385 + }, + { + "epoch": 2.818996897368871, + "grad_norm": 0.1917659342288971, + "learning_rate": 9.518708678931044e-07, + "loss": 0.0002, + "num_input_tokens_seen": 77779944, + "step": 115390 + }, + { + "epoch": 2.8191190482007182, + "grad_norm": 15.961345672607422, + "learning_rate": 9.517856892950866e-07, + "loss": 0.0981, + "num_input_tokens_seen": 77783208, + "step": 115395 + }, + { + "epoch": 2.8192411990325654, + "grad_norm": 0.020637147128582, + "learning_rate": 9.51700511047696e-07, + "loss": 0.0001, + "num_input_tokens_seen": 77786728, + "step": 115400 + }, + { + "epoch": 2.8193633498644126, + "grad_norm": 0.5006750822067261, + "learning_rate": 9.516153331515528e-07, + "loss": 0.0721, + "num_input_tokens_seen": 77790120, + "step": 115405 + }, + { + "epoch": 2.81948550069626, + "grad_norm": 0.04073977470397949, + "learning_rate": 9.515301556072754e-07, + "loss": 0.1149, + "num_input_tokens_seen": 77793512, + "step": 115410 + }, + { + "epoch": 2.819607651528107, + "grad_norm": 0.176833376288414, + "learning_rate": 9.514449784154837e-07, + "loss": 0.0486, + "num_input_tokens_seen": 77797608, + "step": 115415 + }, + { + "epoch": 2.819729802359954, + "grad_norm": 0.03162245824933052, + "learning_rate": 9.513598015767978e-07, + "loss": 0.0898, + "num_input_tokens_seen": 77801384, + "step": 115420 + }, + { + "epoch": 2.8198519531918014, + "grad_norm": 0.18794050812721252, + "learning_rate": 9.512746250918358e-07, + "loss": 0.0009, + "num_input_tokens_seen": 77805096, + "step": 115425 + }, + { + "epoch": 2.8199741040236486, + "grad_norm": 0.058585792779922485, + "learning_rate": 9.511894489612183e-07, + "loss": 0.001, + "num_input_tokens_seen": 77808168, + "step": 115430 + }, + { + "epoch": 2.8200962548554953, + "grad_norm": 0.03166075795888901, + "learning_rate": 9.51104273185564e-07, + "loss": 0.0834, + "num_input_tokens_seen": 77811816, + "step": 115435 + }, + { + "epoch": 2.820218405687343, + "grad_norm": 0.12651073932647705, + "learning_rate": 9.510190977654924e-07, + "loss": 0.0453, + "num_input_tokens_seen": 77814952, + "step": 115440 + }, + { + "epoch": 2.8203405565191897, + "grad_norm": 0.200551837682724, + "learning_rate": 9.509339227016235e-07, + "loss": 0.0005, + "num_input_tokens_seen": 77818472, + "step": 115445 + }, + { + "epoch": 2.8204627073510373, + "grad_norm": 0.04132993146777153, + "learning_rate": 9.50848747994576e-07, + "loss": 0.0503, + "num_input_tokens_seen": 77821992, + "step": 115450 + }, + { + "epoch": 2.820584858182884, + "grad_norm": 0.02948671020567417, + "learning_rate": 9.5076357364497e-07, + "loss": 0.1241, + "num_input_tokens_seen": 77824936, + "step": 115455 + }, + { + "epoch": 2.8207070090147313, + "grad_norm": 0.0442376546561718, + "learning_rate": 9.506783996534244e-07, + "loss": 0.0472, + "num_input_tokens_seen": 77828264, + "step": 115460 + }, + { + "epoch": 2.8208291598465784, + "grad_norm": 0.0049124970100820065, + "learning_rate": 9.505932260205586e-07, + "loss": 0.0284, + "num_input_tokens_seen": 77831400, + "step": 115465 + }, + { + "epoch": 2.8209513106784256, + "grad_norm": 39.6253776550293, + "learning_rate": 9.505080527469924e-07, + "loss": 0.0399, + "num_input_tokens_seen": 77834536, + "step": 115470 + }, + { + "epoch": 2.821073461510273, + "grad_norm": 0.0008584270253777504, + "learning_rate": 9.504228798333445e-07, + "loss": 0.0344, + "num_input_tokens_seen": 77837928, + "step": 115475 + }, + { + "epoch": 2.82119561234212, + "grad_norm": 0.014727511443197727, + "learning_rate": 9.503377072802353e-07, + "loss": 0.0446, + "num_input_tokens_seen": 77841576, + "step": 115480 + }, + { + "epoch": 2.821317763173967, + "grad_norm": 0.05348619073629379, + "learning_rate": 9.502525350882831e-07, + "loss": 0.0524, + "num_input_tokens_seen": 77844520, + "step": 115485 + }, + { + "epoch": 2.8214399140058144, + "grad_norm": 1.4710263013839722, + "learning_rate": 9.501673632581083e-07, + "loss": 0.0459, + "num_input_tokens_seen": 77848424, + "step": 115490 + }, + { + "epoch": 2.8215620648376616, + "grad_norm": 0.015803487971425056, + "learning_rate": 9.500821917903295e-07, + "loss": 0.0005, + "num_input_tokens_seen": 77851944, + "step": 115495 + }, + { + "epoch": 2.8216842156695088, + "grad_norm": 0.1972040981054306, + "learning_rate": 9.499970206855668e-07, + "loss": 0.0003, + "num_input_tokens_seen": 77855400, + "step": 115500 + }, + { + "epoch": 2.821806366501356, + "grad_norm": 0.051901161670684814, + "learning_rate": 9.499118499444388e-07, + "loss": 0.0003, + "num_input_tokens_seen": 77858920, + "step": 115505 + }, + { + "epoch": 2.821928517333203, + "grad_norm": 0.01611727848649025, + "learning_rate": 9.498266795675654e-07, + "loss": 0.0011, + "num_input_tokens_seen": 77862376, + "step": 115510 + }, + { + "epoch": 2.8220506681650503, + "grad_norm": 0.05335599184036255, + "learning_rate": 9.497415095555659e-07, + "loss": 0.0787, + "num_input_tokens_seen": 77865896, + "step": 115515 + }, + { + "epoch": 2.822172818996897, + "grad_norm": 0.031127309426665306, + "learning_rate": 9.496563399090598e-07, + "loss": 0.0487, + "num_input_tokens_seen": 77869864, + "step": 115520 + }, + { + "epoch": 2.8222949698287447, + "grad_norm": 0.09673364460468292, + "learning_rate": 9.495711706286666e-07, + "loss": 0.0255, + "num_input_tokens_seen": 77873064, + "step": 115525 + }, + { + "epoch": 2.8224171206605915, + "grad_norm": 0.5968812704086304, + "learning_rate": 9.494860017150048e-07, + "loss": 0.0015, + "num_input_tokens_seen": 77876520, + "step": 115530 + }, + { + "epoch": 2.822539271492439, + "grad_norm": 0.07545629888772964, + "learning_rate": 9.494008331686945e-07, + "loss": 0.0005, + "num_input_tokens_seen": 77879976, + "step": 115535 + }, + { + "epoch": 2.822661422324286, + "grad_norm": 0.09752354770898819, + "learning_rate": 9.493156649903553e-07, + "loss": 0.0006, + "num_input_tokens_seen": 77883624, + "step": 115540 + }, + { + "epoch": 2.822783573156133, + "grad_norm": 0.03341040015220642, + "learning_rate": 9.492304971806059e-07, + "loss": 0.0001, + "num_input_tokens_seen": 77887400, + "step": 115545 + }, + { + "epoch": 2.8229057239879802, + "grad_norm": 0.0756709948182106, + "learning_rate": 9.491453297400663e-07, + "loss": 0.0258, + "num_input_tokens_seen": 77891048, + "step": 115550 + }, + { + "epoch": 2.8230278748198274, + "grad_norm": 0.028244758024811745, + "learning_rate": 9.490601626693551e-07, + "loss": 0.0001, + "num_input_tokens_seen": 77894184, + "step": 115555 + }, + { + "epoch": 2.8231500256516746, + "grad_norm": 0.12282195687294006, + "learning_rate": 9.489749959690926e-07, + "loss": 0.0004, + "num_input_tokens_seen": 77897384, + "step": 115560 + }, + { + "epoch": 2.823272176483522, + "grad_norm": 0.011518844403326511, + "learning_rate": 9.488898296398975e-07, + "loss": 0.0003, + "num_input_tokens_seen": 77900840, + "step": 115565 + }, + { + "epoch": 2.823394327315369, + "grad_norm": 17.510047912597656, + "learning_rate": 9.48804663682389e-07, + "loss": 0.0249, + "num_input_tokens_seen": 77904040, + "step": 115570 + }, + { + "epoch": 2.823516478147216, + "grad_norm": 0.035026874393224716, + "learning_rate": 9.487194980971871e-07, + "loss": 0.0003, + "num_input_tokens_seen": 77907624, + "step": 115575 + }, + { + "epoch": 2.8236386289790634, + "grad_norm": 0.013446671888232231, + "learning_rate": 9.486343328849105e-07, + "loss": 0.0279, + "num_input_tokens_seen": 77911080, + "step": 115580 + }, + { + "epoch": 2.8237607798109106, + "grad_norm": 0.004692543763667345, + "learning_rate": 9.485491680461792e-07, + "loss": 0.0, + "num_input_tokens_seen": 77914536, + "step": 115585 + }, + { + "epoch": 2.8238829306427577, + "grad_norm": 0.03838277608156204, + "learning_rate": 9.484640035816119e-07, + "loss": 0.0459, + "num_input_tokens_seen": 77918312, + "step": 115590 + }, + { + "epoch": 2.824005081474605, + "grad_norm": 0.028557293117046356, + "learning_rate": 9.483788394918285e-07, + "loss": 0.073, + "num_input_tokens_seen": 77922216, + "step": 115595 + }, + { + "epoch": 2.824127232306452, + "grad_norm": 0.17949016392230988, + "learning_rate": 9.482936757774477e-07, + "loss": 0.0002, + "num_input_tokens_seen": 77925736, + "step": 115600 + }, + { + "epoch": 2.8242493831382993, + "grad_norm": 0.00452646566554904, + "learning_rate": 9.482085124390892e-07, + "loss": 0.0001, + "num_input_tokens_seen": 77929064, + "step": 115605 + }, + { + "epoch": 2.8243715339701465, + "grad_norm": 0.08502378314733505, + "learning_rate": 9.481233494773727e-07, + "loss": 0.0004, + "num_input_tokens_seen": 77932136, + "step": 115610 + }, + { + "epoch": 2.8244936848019933, + "grad_norm": 0.015604489482939243, + "learning_rate": 9.48038186892917e-07, + "loss": 0.063, + "num_input_tokens_seen": 77934888, + "step": 115615 + }, + { + "epoch": 2.824615835633841, + "grad_norm": 0.00344777200371027, + "learning_rate": 9.479530246863416e-07, + "loss": 0.0003, + "num_input_tokens_seen": 77938344, + "step": 115620 + }, + { + "epoch": 2.8247379864656876, + "grad_norm": 0.02435332164168358, + "learning_rate": 9.478678628582657e-07, + "loss": 0.0007, + "num_input_tokens_seen": 77941672, + "step": 115625 + }, + { + "epoch": 2.8248601372975353, + "grad_norm": 0.008998130448162556, + "learning_rate": 9.477827014093086e-07, + "loss": 0.0834, + "num_input_tokens_seen": 77945000, + "step": 115630 + }, + { + "epoch": 2.824982288129382, + "grad_norm": 0.023994240909814835, + "learning_rate": 9.476975403400902e-07, + "loss": 0.0504, + "num_input_tokens_seen": 77948072, + "step": 115635 + }, + { + "epoch": 2.825104438961229, + "grad_norm": 14.398282051086426, + "learning_rate": 9.476123796512288e-07, + "loss": 0.0314, + "num_input_tokens_seen": 77951208, + "step": 115640 + }, + { + "epoch": 2.8252265897930764, + "grad_norm": 0.08678370714187622, + "learning_rate": 9.475272193433448e-07, + "loss": 0.0604, + "num_input_tokens_seen": 77954792, + "step": 115645 + }, + { + "epoch": 2.8253487406249236, + "grad_norm": 86.93380737304688, + "learning_rate": 9.474420594170566e-07, + "loss": 0.0883, + "num_input_tokens_seen": 77958632, + "step": 115650 + }, + { + "epoch": 2.8254708914567708, + "grad_norm": 0.08339137583971024, + "learning_rate": 9.473568998729842e-07, + "loss": 0.034, + "num_input_tokens_seen": 77962408, + "step": 115655 + }, + { + "epoch": 2.825593042288618, + "grad_norm": 0.004629269242286682, + "learning_rate": 9.472717407117461e-07, + "loss": 0.0001, + "num_input_tokens_seen": 77965736, + "step": 115660 + }, + { + "epoch": 2.825715193120465, + "grad_norm": 0.06785409152507782, + "learning_rate": 9.471865819339624e-07, + "loss": 0.0514, + "num_input_tokens_seen": 77969512, + "step": 115665 + }, + { + "epoch": 2.8258373439523123, + "grad_norm": 14.922898292541504, + "learning_rate": 9.471014235402521e-07, + "loss": 0.1223, + "num_input_tokens_seen": 77972968, + "step": 115670 + }, + { + "epoch": 2.8259594947841595, + "grad_norm": 0.011822236701846123, + "learning_rate": 9.470162655312344e-07, + "loss": 0.0635, + "num_input_tokens_seen": 77975912, + "step": 115675 + }, + { + "epoch": 2.8260816456160067, + "grad_norm": 0.11484556645154953, + "learning_rate": 9.46931107907529e-07, + "loss": 0.0108, + "num_input_tokens_seen": 77979176, + "step": 115680 + }, + { + "epoch": 2.826203796447854, + "grad_norm": 0.01218679640442133, + "learning_rate": 9.468459506697543e-07, + "loss": 0.0002, + "num_input_tokens_seen": 77982760, + "step": 115685 + }, + { + "epoch": 2.826325947279701, + "grad_norm": 0.013855358585715294, + "learning_rate": 9.467607938185301e-07, + "loss": 0.0003, + "num_input_tokens_seen": 77986216, + "step": 115690 + }, + { + "epoch": 2.8264480981115483, + "grad_norm": 0.09669753909111023, + "learning_rate": 9.466756373544763e-07, + "loss": 0.0501, + "num_input_tokens_seen": 77989672, + "step": 115695 + }, + { + "epoch": 2.826570248943395, + "grad_norm": 0.03389380872249603, + "learning_rate": 9.465904812782112e-07, + "loss": 0.1267, + "num_input_tokens_seen": 77992872, + "step": 115700 + }, + { + "epoch": 2.8266923997752427, + "grad_norm": 0.027956049889326096, + "learning_rate": 9.465053255903548e-07, + "loss": 0.0841, + "num_input_tokens_seen": 77995880, + "step": 115705 + }, + { + "epoch": 2.8268145506070894, + "grad_norm": 0.006097717210650444, + "learning_rate": 9.464201702915256e-07, + "loss": 0.0007, + "num_input_tokens_seen": 77999400, + "step": 115710 + }, + { + "epoch": 2.826936701438937, + "grad_norm": 0.02535603940486908, + "learning_rate": 9.463350153823438e-07, + "loss": 0.1111, + "num_input_tokens_seen": 78003112, + "step": 115715 + }, + { + "epoch": 2.827058852270784, + "grad_norm": 0.7845985889434814, + "learning_rate": 9.462498608634281e-07, + "loss": 0.0557, + "num_input_tokens_seen": 78006568, + "step": 115720 + }, + { + "epoch": 2.827181003102631, + "grad_norm": 0.061375007033348083, + "learning_rate": 9.461647067353975e-07, + "loss": 0.0315, + "num_input_tokens_seen": 78009512, + "step": 115725 + }, + { + "epoch": 2.827303153934478, + "grad_norm": 0.10138930380344391, + "learning_rate": 9.460795529988723e-07, + "loss": 0.0488, + "num_input_tokens_seen": 78013160, + "step": 115730 + }, + { + "epoch": 2.8274253047663254, + "grad_norm": 0.11310622096061707, + "learning_rate": 9.459943996544703e-07, + "loss": 0.041, + "num_input_tokens_seen": 78016232, + "step": 115735 + }, + { + "epoch": 2.8275474555981726, + "grad_norm": 0.015895165503025055, + "learning_rate": 9.459092467028122e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78019688, + "step": 115740 + }, + { + "epoch": 2.8276696064300197, + "grad_norm": 0.004023950546979904, + "learning_rate": 9.458240941445163e-07, + "loss": 0.0002, + "num_input_tokens_seen": 78023016, + "step": 115745 + }, + { + "epoch": 2.827791757261867, + "grad_norm": 0.7409040331840515, + "learning_rate": 9.457389419802024e-07, + "loss": 0.0003, + "num_input_tokens_seen": 78026344, + "step": 115750 + }, + { + "epoch": 2.827913908093714, + "grad_norm": 0.03319403901696205, + "learning_rate": 9.45653790210489e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78029928, + "step": 115755 + }, + { + "epoch": 2.8280360589255613, + "grad_norm": 0.024063147604465485, + "learning_rate": 9.455686388359961e-07, + "loss": 0.0024, + "num_input_tokens_seen": 78033320, + "step": 115760 + }, + { + "epoch": 2.8281582097574085, + "grad_norm": 0.011868052184581757, + "learning_rate": 9.45483487857343e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78036264, + "step": 115765 + }, + { + "epoch": 2.8282803605892557, + "grad_norm": 0.018821561709046364, + "learning_rate": 9.453983372751484e-07, + "loss": 0.0918, + "num_input_tokens_seen": 78039656, + "step": 115770 + }, + { + "epoch": 2.828402511421103, + "grad_norm": 0.20139813423156738, + "learning_rate": 9.453131870900318e-07, + "loss": 0.1312, + "num_input_tokens_seen": 78042600, + "step": 115775 + }, + { + "epoch": 2.82852466225295, + "grad_norm": 0.21862444281578064, + "learning_rate": 9.452280373026125e-07, + "loss": 0.032, + "num_input_tokens_seen": 78045544, + "step": 115780 + }, + { + "epoch": 2.8286468130847973, + "grad_norm": 0.128239706158638, + "learning_rate": 9.451428879135093e-07, + "loss": 0.0007, + "num_input_tokens_seen": 78048424, + "step": 115785 + }, + { + "epoch": 2.8287689639166445, + "grad_norm": 42.088844299316406, + "learning_rate": 9.450577389233423e-07, + "loss": 0.0537, + "num_input_tokens_seen": 78051688, + "step": 115790 + }, + { + "epoch": 2.828891114748491, + "grad_norm": 16.15775489807129, + "learning_rate": 9.449725903327297e-07, + "loss": 0.0318, + "num_input_tokens_seen": 78055272, + "step": 115795 + }, + { + "epoch": 2.829013265580339, + "grad_norm": 0.013786409981548786, + "learning_rate": 9.448874421422916e-07, + "loss": 0.0493, + "num_input_tokens_seen": 78059240, + "step": 115800 + }, + { + "epoch": 2.8291354164121856, + "grad_norm": 0.011192580685019493, + "learning_rate": 9.448022943526466e-07, + "loss": 0.0586, + "num_input_tokens_seen": 78062568, + "step": 115805 + }, + { + "epoch": 2.8292575672440328, + "grad_norm": 0.739030122756958, + "learning_rate": 9.447171469644144e-07, + "loss": 0.0004, + "num_input_tokens_seen": 78066152, + "step": 115810 + }, + { + "epoch": 2.82937971807588, + "grad_norm": 0.02116185426712036, + "learning_rate": 9.446319999782136e-07, + "loss": 0.0, + "num_input_tokens_seen": 78069288, + "step": 115815 + }, + { + "epoch": 2.829501868907727, + "grad_norm": 0.9456649422645569, + "learning_rate": 9.445468533946641e-07, + "loss": 0.0536, + "num_input_tokens_seen": 78072872, + "step": 115820 + }, + { + "epoch": 2.8296240197395743, + "grad_norm": 0.019342761486768723, + "learning_rate": 9.444617072143848e-07, + "loss": 0.0002, + "num_input_tokens_seen": 78076200, + "step": 115825 + }, + { + "epoch": 2.8297461705714215, + "grad_norm": 0.44813454151153564, + "learning_rate": 9.443765614379948e-07, + "loss": 0.0005, + "num_input_tokens_seen": 78079336, + "step": 115830 + }, + { + "epoch": 2.8298683214032687, + "grad_norm": 0.042555276304483414, + "learning_rate": 9.442914160661137e-07, + "loss": 0.0002, + "num_input_tokens_seen": 78082792, + "step": 115835 + }, + { + "epoch": 2.829990472235116, + "grad_norm": 0.04323071613907814, + "learning_rate": 9.442062710993599e-07, + "loss": 0.0349, + "num_input_tokens_seen": 78086824, + "step": 115840 + }, + { + "epoch": 2.830112623066963, + "grad_norm": 26.882911682128906, + "learning_rate": 9.44121126538353e-07, + "loss": 0.1367, + "num_input_tokens_seen": 78090664, + "step": 115845 + }, + { + "epoch": 2.8302347738988103, + "grad_norm": 0.062530517578125, + "learning_rate": 9.44035982383713e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78093800, + "step": 115850 + }, + { + "epoch": 2.8303569247306575, + "grad_norm": 0.19051715731620789, + "learning_rate": 9.439508386360577e-07, + "loss": 0.2, + "num_input_tokens_seen": 78097192, + "step": 115855 + }, + { + "epoch": 2.8304790755625047, + "grad_norm": 0.08016974478960037, + "learning_rate": 9.438656952960076e-07, + "loss": 0.0004, + "num_input_tokens_seen": 78100584, + "step": 115860 + }, + { + "epoch": 2.830601226394352, + "grad_norm": 0.0509033165872097, + "learning_rate": 9.437805523641808e-07, + "loss": 0.0002, + "num_input_tokens_seen": 78103656, + "step": 115865 + }, + { + "epoch": 2.830723377226199, + "grad_norm": 0.002763264812529087, + "learning_rate": 9.436954098411973e-07, + "loss": 0.0003, + "num_input_tokens_seen": 78106792, + "step": 115870 + }, + { + "epoch": 2.8308455280580462, + "grad_norm": 0.08348390460014343, + "learning_rate": 9.436102677276757e-07, + "loss": 0.0003, + "num_input_tokens_seen": 78109864, + "step": 115875 + }, + { + "epoch": 2.830967678889893, + "grad_norm": 0.009231437928974628, + "learning_rate": 9.435251260242352e-07, + "loss": 0.1042, + "num_input_tokens_seen": 78113000, + "step": 115880 + }, + { + "epoch": 2.8310898297217406, + "grad_norm": 0.2583863139152527, + "learning_rate": 9.434399847314958e-07, + "loss": 0.0387, + "num_input_tokens_seen": 78116072, + "step": 115885 + }, + { + "epoch": 2.8312119805535874, + "grad_norm": 0.4468061625957489, + "learning_rate": 9.433548438500753e-07, + "loss": 0.0591, + "num_input_tokens_seen": 78119400, + "step": 115890 + }, + { + "epoch": 2.831334131385435, + "grad_norm": 0.37474071979522705, + "learning_rate": 9.432697033805943e-07, + "loss": 0.0005, + "num_input_tokens_seen": 78122856, + "step": 115895 + }, + { + "epoch": 2.8314562822172817, + "grad_norm": 0.8062929511070251, + "learning_rate": 9.431845633236707e-07, + "loss": 0.0006, + "num_input_tokens_seen": 78126440, + "step": 115900 + }, + { + "epoch": 2.831578433049129, + "grad_norm": 0.009114105254411697, + "learning_rate": 9.430994236799247e-07, + "loss": 0.0405, + "num_input_tokens_seen": 78130024, + "step": 115905 + }, + { + "epoch": 2.831700583880976, + "grad_norm": 0.45530620217323303, + "learning_rate": 9.430142844499746e-07, + "loss": 0.0005, + "num_input_tokens_seen": 78133288, + "step": 115910 + }, + { + "epoch": 2.8318227347128233, + "grad_norm": 0.06309596449136734, + "learning_rate": 9.429291456344398e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78136296, + "step": 115915 + }, + { + "epoch": 2.8319448855446705, + "grad_norm": 11.176220893859863, + "learning_rate": 9.428440072339402e-07, + "loss": 0.0636, + "num_input_tokens_seen": 78139624, + "step": 115920 + }, + { + "epoch": 2.8320670363765177, + "grad_norm": 0.014629398472607136, + "learning_rate": 9.42758869249094e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78143080, + "step": 115925 + }, + { + "epoch": 2.832189187208365, + "grad_norm": 0.048689037561416626, + "learning_rate": 9.426737316805209e-07, + "loss": 0.0874, + "num_input_tokens_seen": 78146088, + "step": 115930 + }, + { + "epoch": 2.832311338040212, + "grad_norm": 0.03567717596888542, + "learning_rate": 9.425885945288397e-07, + "loss": 0.0012, + "num_input_tokens_seen": 78149800, + "step": 115935 + }, + { + "epoch": 2.8324334888720593, + "grad_norm": 0.03357863798737526, + "learning_rate": 9.425034577946696e-07, + "loss": 0.0344, + "num_input_tokens_seen": 78152936, + "step": 115940 + }, + { + "epoch": 2.8325556397039064, + "grad_norm": 31.494722366333008, + "learning_rate": 9.424183214786301e-07, + "loss": 0.0427, + "num_input_tokens_seen": 78156008, + "step": 115945 + }, + { + "epoch": 2.8326777905357536, + "grad_norm": 0.0009557157754898071, + "learning_rate": 9.423331855813396e-07, + "loss": 0.0006, + "num_input_tokens_seen": 78159336, + "step": 115950 + }, + { + "epoch": 2.832799941367601, + "grad_norm": 0.3145740330219269, + "learning_rate": 9.422480501034183e-07, + "loss": 0.0478, + "num_input_tokens_seen": 78162344, + "step": 115955 + }, + { + "epoch": 2.832922092199448, + "grad_norm": 0.019206028431653976, + "learning_rate": 9.421629150454841e-07, + "loss": 0.0656, + "num_input_tokens_seen": 78165480, + "step": 115960 + }, + { + "epoch": 2.833044243031295, + "grad_norm": 0.011119597591459751, + "learning_rate": 9.420777804081572e-07, + "loss": 0.1134, + "num_input_tokens_seen": 78168616, + "step": 115965 + }, + { + "epoch": 2.8331663938631424, + "grad_norm": 0.07654442638158798, + "learning_rate": 9.419926461920559e-07, + "loss": 0.0411, + "num_input_tokens_seen": 78172392, + "step": 115970 + }, + { + "epoch": 2.833288544694989, + "grad_norm": 11.022329330444336, + "learning_rate": 9.419075123977999e-07, + "loss": 0.03, + "num_input_tokens_seen": 78175528, + "step": 115975 + }, + { + "epoch": 2.8334106955268368, + "grad_norm": 0.04601052403450012, + "learning_rate": 9.41822379026008e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78179176, + "step": 115980 + }, + { + "epoch": 2.8335328463586835, + "grad_norm": 0.12010195106267929, + "learning_rate": 9.417372460772994e-07, + "loss": 0.0004, + "num_input_tokens_seen": 78182504, + "step": 115985 + }, + { + "epoch": 2.8336549971905307, + "grad_norm": 0.002848004223778844, + "learning_rate": 9.416521135522936e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78185960, + "step": 115990 + }, + { + "epoch": 2.833777148022378, + "grad_norm": 0.06952404975891113, + "learning_rate": 9.415669814516087e-07, + "loss": 0.0012, + "num_input_tokens_seen": 78189416, + "step": 115995 + }, + { + "epoch": 2.833899298854225, + "grad_norm": 0.2463352084159851, + "learning_rate": 9.414818497758645e-07, + "loss": 0.0218, + "num_input_tokens_seen": 78192936, + "step": 116000 + }, + { + "epoch": 2.8340214496860723, + "grad_norm": 0.028511550277471542, + "learning_rate": 9.413967185256806e-07, + "loss": 0.0002, + "num_input_tokens_seen": 78196200, + "step": 116005 + }, + { + "epoch": 2.8341436005179195, + "grad_norm": 0.09941234439611435, + "learning_rate": 9.413115877016749e-07, + "loss": 0.0003, + "num_input_tokens_seen": 78200104, + "step": 116010 + }, + { + "epoch": 2.8342657513497667, + "grad_norm": 0.009271027520298958, + "learning_rate": 9.412264573044676e-07, + "loss": 0.092, + "num_input_tokens_seen": 78203432, + "step": 116015 + }, + { + "epoch": 2.834387902181614, + "grad_norm": 0.14161081612110138, + "learning_rate": 9.411413273346769e-07, + "loss": 0.0005, + "num_input_tokens_seen": 78206760, + "step": 116020 + }, + { + "epoch": 2.834510053013461, + "grad_norm": 0.05342858284711838, + "learning_rate": 9.410561977929228e-07, + "loss": 0.0717, + "num_input_tokens_seen": 78210536, + "step": 116025 + }, + { + "epoch": 2.8346322038453082, + "grad_norm": 89.6737289428711, + "learning_rate": 9.409710686798236e-07, + "loss": 0.1138, + "num_input_tokens_seen": 78213864, + "step": 116030 + }, + { + "epoch": 2.8347543546771554, + "grad_norm": 464.81121826171875, + "learning_rate": 9.408859399959984e-07, + "loss": 0.0712, + "num_input_tokens_seen": 78216936, + "step": 116035 + }, + { + "epoch": 2.8348765055090026, + "grad_norm": 0.04185141995549202, + "learning_rate": 9.408008117420671e-07, + "loss": 0.0003, + "num_input_tokens_seen": 78220200, + "step": 116040 + }, + { + "epoch": 2.83499865634085, + "grad_norm": 0.03626517951488495, + "learning_rate": 9.407156839186477e-07, + "loss": 0.0403, + "num_input_tokens_seen": 78223464, + "step": 116045 + }, + { + "epoch": 2.835120807172697, + "grad_norm": 0.018436536192893982, + "learning_rate": 9.406305565263602e-07, + "loss": 0.0548, + "num_input_tokens_seen": 78227176, + "step": 116050 + }, + { + "epoch": 2.835242958004544, + "grad_norm": 0.4739210605621338, + "learning_rate": 9.405454295658229e-07, + "loss": 0.0007, + "num_input_tokens_seen": 78230696, + "step": 116055 + }, + { + "epoch": 2.835365108836391, + "grad_norm": 0.08458838611841202, + "learning_rate": 9.404603030376555e-07, + "loss": 0.0455, + "num_input_tokens_seen": 78233640, + "step": 116060 + }, + { + "epoch": 2.8354872596682386, + "grad_norm": 0.004867865238338709, + "learning_rate": 9.403751769424765e-07, + "loss": 0.0569, + "num_input_tokens_seen": 78237480, + "step": 116065 + }, + { + "epoch": 2.8356094105000853, + "grad_norm": 14.933873176574707, + "learning_rate": 9.402900512809052e-07, + "loss": 0.0715, + "num_input_tokens_seen": 78240680, + "step": 116070 + }, + { + "epoch": 2.835731561331933, + "grad_norm": 25.75342559814453, + "learning_rate": 9.402049260535613e-07, + "loss": 0.0405, + "num_input_tokens_seen": 78244136, + "step": 116075 + }, + { + "epoch": 2.8358537121637797, + "grad_norm": 0.1192922443151474, + "learning_rate": 9.401198012610628e-07, + "loss": 0.0513, + "num_input_tokens_seen": 78247912, + "step": 116080 + }, + { + "epoch": 2.835975862995627, + "grad_norm": 0.06797201931476593, + "learning_rate": 9.400346769040294e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78251176, + "step": 116085 + }, + { + "epoch": 2.836098013827474, + "grad_norm": 10.027449607849121, + "learning_rate": 9.399495529830798e-07, + "loss": 0.0773, + "num_input_tokens_seen": 78254120, + "step": 116090 + }, + { + "epoch": 2.8362201646593213, + "grad_norm": 21.70563316345215, + "learning_rate": 9.398644294988332e-07, + "loss": 0.1131, + "num_input_tokens_seen": 78257256, + "step": 116095 + }, + { + "epoch": 2.8363423154911684, + "grad_norm": 1.4771491289138794, + "learning_rate": 9.397793064519088e-07, + "loss": 0.0006, + "num_input_tokens_seen": 78261160, + "step": 116100 + }, + { + "epoch": 2.8364644663230156, + "grad_norm": 0.07461465150117874, + "learning_rate": 9.396941838429253e-07, + "loss": 0.0003, + "num_input_tokens_seen": 78264488, + "step": 116105 + }, + { + "epoch": 2.836586617154863, + "grad_norm": 0.023504182696342468, + "learning_rate": 9.396090616725022e-07, + "loss": 0.0073, + "num_input_tokens_seen": 78267624, + "step": 116110 + }, + { + "epoch": 2.83670876798671, + "grad_norm": 0.16718228161334991, + "learning_rate": 9.395239399412579e-07, + "loss": 0.0315, + "num_input_tokens_seen": 78271016, + "step": 116115 + }, + { + "epoch": 2.836830918818557, + "grad_norm": 24.0216121673584, + "learning_rate": 9.394388186498121e-07, + "loss": 0.0016, + "num_input_tokens_seen": 78274536, + "step": 116120 + }, + { + "epoch": 2.8369530696504044, + "grad_norm": 0.4365783929824829, + "learning_rate": 9.393536977987831e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78278504, + "step": 116125 + }, + { + "epoch": 2.8370752204822516, + "grad_norm": 0.1103735938668251, + "learning_rate": 9.392685773887907e-07, + "loss": 0.0003, + "num_input_tokens_seen": 78281256, + "step": 116130 + }, + { + "epoch": 2.8371973713140988, + "grad_norm": 27.963003158569336, + "learning_rate": 9.391834574204534e-07, + "loss": 0.103, + "num_input_tokens_seen": 78284392, + "step": 116135 + }, + { + "epoch": 2.837319522145946, + "grad_norm": 0.08092053979635239, + "learning_rate": 9.390983378943903e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78288104, + "step": 116140 + }, + { + "epoch": 2.8374416729777927, + "grad_norm": 0.039317984133958817, + "learning_rate": 9.390132188112207e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78291944, + "step": 116145 + }, + { + "epoch": 2.8375638238096403, + "grad_norm": 0.015090661123394966, + "learning_rate": 9.389281001715631e-07, + "loss": 0.0004, + "num_input_tokens_seen": 78295464, + "step": 116150 + }, + { + "epoch": 2.837685974641487, + "grad_norm": 0.241033136844635, + "learning_rate": 9.388429819760367e-07, + "loss": 0.0004, + "num_input_tokens_seen": 78298536, + "step": 116155 + }, + { + "epoch": 2.8378081254733347, + "grad_norm": 0.003959252033382654, + "learning_rate": 9.38757864225261e-07, + "loss": 0.0002, + "num_input_tokens_seen": 78302184, + "step": 116160 + }, + { + "epoch": 2.8379302763051815, + "grad_norm": 0.12065454572439194, + "learning_rate": 9.386727469198541e-07, + "loss": 0.0452, + "num_input_tokens_seen": 78305704, + "step": 116165 + }, + { + "epoch": 2.8380524271370287, + "grad_norm": 0.0025195570196956396, + "learning_rate": 9.385876300604359e-07, + "loss": 0.036, + "num_input_tokens_seen": 78309160, + "step": 116170 + }, + { + "epoch": 2.838174577968876, + "grad_norm": 0.0028503055218607187, + "learning_rate": 9.385025136476246e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78312744, + "step": 116175 + }, + { + "epoch": 2.838296728800723, + "grad_norm": 13.748456954956055, + "learning_rate": 9.3841739768204e-07, + "loss": 0.0361, + "num_input_tokens_seen": 78316136, + "step": 116180 + }, + { + "epoch": 2.8384188796325702, + "grad_norm": 0.03185724467039108, + "learning_rate": 9.383322821643003e-07, + "loss": 0.0304, + "num_input_tokens_seen": 78319784, + "step": 116185 + }, + { + "epoch": 2.8385410304644174, + "grad_norm": 0.03878801688551903, + "learning_rate": 9.382471670950248e-07, + "loss": 0.0021, + "num_input_tokens_seen": 78322728, + "step": 116190 + }, + { + "epoch": 2.8386631812962646, + "grad_norm": 0.11397980153560638, + "learning_rate": 9.38162052474833e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78326312, + "step": 116195 + }, + { + "epoch": 2.838785332128112, + "grad_norm": 0.062247421592473984, + "learning_rate": 9.380769383043428e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78329832, + "step": 116200 + }, + { + "epoch": 2.838907482959959, + "grad_norm": 0.0130871357396245, + "learning_rate": 9.379918245841741e-07, + "loss": 0.066, + "num_input_tokens_seen": 78333096, + "step": 116205 + }, + { + "epoch": 2.839029633791806, + "grad_norm": 0.017938796430826187, + "learning_rate": 9.379067113149452e-07, + "loss": 0.0003, + "num_input_tokens_seen": 78336552, + "step": 116210 + }, + { + "epoch": 2.8391517846236534, + "grad_norm": 26.627717971801758, + "learning_rate": 9.378215984972759e-07, + "loss": 0.1872, + "num_input_tokens_seen": 78340584, + "step": 116215 + }, + { + "epoch": 2.8392739354555006, + "grad_norm": 0.03087383322417736, + "learning_rate": 9.377364861317843e-07, + "loss": 0.0005, + "num_input_tokens_seen": 78344744, + "step": 116220 + }, + { + "epoch": 2.8393960862873477, + "grad_norm": 35.27699279785156, + "learning_rate": 9.376513742190896e-07, + "loss": 0.0476, + "num_input_tokens_seen": 78348136, + "step": 116225 + }, + { + "epoch": 2.839518237119195, + "grad_norm": 0.001459734863601625, + "learning_rate": 9.375662627598113e-07, + "loss": 0.0004, + "num_input_tokens_seen": 78351400, + "step": 116230 + }, + { + "epoch": 2.839640387951042, + "grad_norm": 15.316701889038086, + "learning_rate": 9.374811517545677e-07, + "loss": 0.0415, + "num_input_tokens_seen": 78354536, + "step": 116235 + }, + { + "epoch": 2.839762538782889, + "grad_norm": 24.034547805786133, + "learning_rate": 9.37396041203978e-07, + "loss": 0.1079, + "num_input_tokens_seen": 78357864, + "step": 116240 + }, + { + "epoch": 2.8398846896147365, + "grad_norm": 0.1375059336423874, + "learning_rate": 9.373109311086612e-07, + "loss": 0.0373, + "num_input_tokens_seen": 78361576, + "step": 116245 + }, + { + "epoch": 2.8400068404465832, + "grad_norm": 0.002783549018204212, + "learning_rate": 9.372258214692358e-07, + "loss": 0.1102, + "num_input_tokens_seen": 78365032, + "step": 116250 + }, + { + "epoch": 2.840128991278431, + "grad_norm": 0.8223499059677124, + "learning_rate": 9.371407122863217e-07, + "loss": 0.0763, + "num_input_tokens_seen": 78368232, + "step": 116255 + }, + { + "epoch": 2.8402511421102776, + "grad_norm": 0.0012122441548854113, + "learning_rate": 9.370556035605366e-07, + "loss": 0.1492, + "num_input_tokens_seen": 78371432, + "step": 116260 + }, + { + "epoch": 2.840373292942125, + "grad_norm": 0.007098636124283075, + "learning_rate": 9.369704952925007e-07, + "loss": 0.048, + "num_input_tokens_seen": 78374824, + "step": 116265 + }, + { + "epoch": 2.840495443773972, + "grad_norm": 0.11667418479919434, + "learning_rate": 9.368853874828318e-07, + "loss": 0.0225, + "num_input_tokens_seen": 78378088, + "step": 116270 + }, + { + "epoch": 2.840617594605819, + "grad_norm": 0.003962985239923, + "learning_rate": 9.368002801321499e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78381608, + "step": 116275 + }, + { + "epoch": 2.8407397454376664, + "grad_norm": 74.27780151367188, + "learning_rate": 9.367151732410727e-07, + "loss": 0.0282, + "num_input_tokens_seen": 78384936, + "step": 116280 + }, + { + "epoch": 2.8408618962695136, + "grad_norm": 0.3086697459220886, + "learning_rate": 9.366300668102201e-07, + "loss": 0.0012, + "num_input_tokens_seen": 78388136, + "step": 116285 + }, + { + "epoch": 2.8409840471013608, + "grad_norm": 40.96001052856445, + "learning_rate": 9.365449608402107e-07, + "loss": 0.0479, + "num_input_tokens_seen": 78391784, + "step": 116290 + }, + { + "epoch": 2.841106197933208, + "grad_norm": 0.12763236463069916, + "learning_rate": 9.364598553316635e-07, + "loss": 0.001, + "num_input_tokens_seen": 78394792, + "step": 116295 + }, + { + "epoch": 2.841228348765055, + "grad_norm": 0.08105745911598206, + "learning_rate": 9.363747502851975e-07, + "loss": 0.0002, + "num_input_tokens_seen": 78398440, + "step": 116300 + }, + { + "epoch": 2.8413504995969023, + "grad_norm": 0.009671597741544247, + "learning_rate": 9.36289645701431e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78402088, + "step": 116305 + }, + { + "epoch": 2.8414726504287495, + "grad_norm": 0.015310993418097496, + "learning_rate": 9.362045415809837e-07, + "loss": 0.0002, + "num_input_tokens_seen": 78405480, + "step": 116310 + }, + { + "epoch": 2.8415948012605967, + "grad_norm": 0.32537516951560974, + "learning_rate": 9.361194379244738e-07, + "loss": 0.026, + "num_input_tokens_seen": 78408808, + "step": 116315 + }, + { + "epoch": 2.841716952092444, + "grad_norm": 0.1695280373096466, + "learning_rate": 9.360343347325204e-07, + "loss": 0.0271, + "num_input_tokens_seen": 78412200, + "step": 116320 + }, + { + "epoch": 2.8418391029242906, + "grad_norm": 0.31750592589378357, + "learning_rate": 9.359492320057431e-07, + "loss": 0.0005, + "num_input_tokens_seen": 78415656, + "step": 116325 + }, + { + "epoch": 2.8419612537561383, + "grad_norm": 0.2636977732181549, + "learning_rate": 9.358641297447596e-07, + "loss": 0.0516, + "num_input_tokens_seen": 78419752, + "step": 116330 + }, + { + "epoch": 2.842083404587985, + "grad_norm": 0.0896163284778595, + "learning_rate": 9.357790279501901e-07, + "loss": 0.0686, + "num_input_tokens_seen": 78422760, + "step": 116335 + }, + { + "epoch": 2.8422055554198327, + "grad_norm": 0.2173192948102951, + "learning_rate": 9.35693926622652e-07, + "loss": 0.0587, + "num_input_tokens_seen": 78425960, + "step": 116340 + }, + { + "epoch": 2.8423277062516794, + "grad_norm": 0.984163224697113, + "learning_rate": 9.356088257627655e-07, + "loss": 0.0005, + "num_input_tokens_seen": 78429160, + "step": 116345 + }, + { + "epoch": 2.8424498570835266, + "grad_norm": 0.02859634719789028, + "learning_rate": 9.355237253711489e-07, + "loss": 0.0003, + "num_input_tokens_seen": 78432680, + "step": 116350 + }, + { + "epoch": 2.842572007915374, + "grad_norm": 36.05793762207031, + "learning_rate": 9.354386254484207e-07, + "loss": 0.08, + "num_input_tokens_seen": 78435688, + "step": 116355 + }, + { + "epoch": 2.842694158747221, + "grad_norm": 0.11167803406715393, + "learning_rate": 9.353535259952009e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78439144, + "step": 116360 + }, + { + "epoch": 2.842816309579068, + "grad_norm": 0.005631339270621538, + "learning_rate": 9.35268427012107e-07, + "loss": 0.0002, + "num_input_tokens_seen": 78442408, + "step": 116365 + }, + { + "epoch": 2.8429384604109154, + "grad_norm": 0.03177811950445175, + "learning_rate": 9.351833284997589e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78445608, + "step": 116370 + }, + { + "epoch": 2.8430606112427625, + "grad_norm": 0.01641424559056759, + "learning_rate": 9.350982304587746e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78449064, + "step": 116375 + }, + { + "epoch": 2.8431827620746097, + "grad_norm": 0.0047453767620027065, + "learning_rate": 9.350131328897736e-07, + "loss": 0.0079, + "num_input_tokens_seen": 78452328, + "step": 116380 + }, + { + "epoch": 2.843304912906457, + "grad_norm": 0.25099989771842957, + "learning_rate": 9.34928035793375e-07, + "loss": 0.0005, + "num_input_tokens_seen": 78455784, + "step": 116385 + }, + { + "epoch": 2.843427063738304, + "grad_norm": 0.008549386635422707, + "learning_rate": 9.348429391701969e-07, + "loss": 0.0002, + "num_input_tokens_seen": 78458920, + "step": 116390 + }, + { + "epoch": 2.8435492145701513, + "grad_norm": 0.2677416205406189, + "learning_rate": 9.347578430208586e-07, + "loss": 0.0007, + "num_input_tokens_seen": 78462248, + "step": 116395 + }, + { + "epoch": 2.8436713654019985, + "grad_norm": 0.44831427931785583, + "learning_rate": 9.346727473459787e-07, + "loss": 0.0003, + "num_input_tokens_seen": 78465768, + "step": 116400 + }, + { + "epoch": 2.8437935162338457, + "grad_norm": 0.01087011955678463, + "learning_rate": 9.34587652146176e-07, + "loss": 0.0006, + "num_input_tokens_seen": 78469672, + "step": 116405 + }, + { + "epoch": 2.843915667065693, + "grad_norm": 0.0809653252363205, + "learning_rate": 9.345025574220698e-07, + "loss": 0.0397, + "num_input_tokens_seen": 78472744, + "step": 116410 + }, + { + "epoch": 2.84403781789754, + "grad_norm": 0.02179126814007759, + "learning_rate": 9.344174631742782e-07, + "loss": 0.0002, + "num_input_tokens_seen": 78476136, + "step": 116415 + }, + { + "epoch": 2.844159968729387, + "grad_norm": 0.005103960167616606, + "learning_rate": 9.34332369403421e-07, + "loss": 0.0002, + "num_input_tokens_seen": 78479656, + "step": 116420 + }, + { + "epoch": 2.8442821195612344, + "grad_norm": 472.42657470703125, + "learning_rate": 9.342472761101161e-07, + "loss": 0.0707, + "num_input_tokens_seen": 78482856, + "step": 116425 + }, + { + "epoch": 2.844404270393081, + "grad_norm": 0.06024815887212753, + "learning_rate": 9.34162183294983e-07, + "loss": 0.0003, + "num_input_tokens_seen": 78485928, + "step": 116430 + }, + { + "epoch": 2.8445264212249284, + "grad_norm": 0.05335991457104683, + "learning_rate": 9.340770909586397e-07, + "loss": 0.0332, + "num_input_tokens_seen": 78489512, + "step": 116435 + }, + { + "epoch": 2.8446485720567756, + "grad_norm": 0.05633782967925072, + "learning_rate": 9.339919991017059e-07, + "loss": 0.0, + "num_input_tokens_seen": 78492648, + "step": 116440 + }, + { + "epoch": 2.8447707228886228, + "grad_norm": 0.015017188154160976, + "learning_rate": 9.339069077248e-07, + "loss": 0.112, + "num_input_tokens_seen": 78495976, + "step": 116445 + }, + { + "epoch": 2.84489287372047, + "grad_norm": 0.13255448639392853, + "learning_rate": 9.338218168285407e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78499432, + "step": 116450 + }, + { + "epoch": 2.845015024552317, + "grad_norm": 0.06532268226146698, + "learning_rate": 9.337367264135474e-07, + "loss": 0.0442, + "num_input_tokens_seen": 78502952, + "step": 116455 + }, + { + "epoch": 2.8451371753841643, + "grad_norm": 0.009263267740607262, + "learning_rate": 9.336516364804379e-07, + "loss": 0.0002, + "num_input_tokens_seen": 78506088, + "step": 116460 + }, + { + "epoch": 2.8452593262160115, + "grad_norm": 0.03530608117580414, + "learning_rate": 9.335665470298319e-07, + "loss": 0.0, + "num_input_tokens_seen": 78509992, + "step": 116465 + }, + { + "epoch": 2.8453814770478587, + "grad_norm": 0.05856453999876976, + "learning_rate": 9.334814580623476e-07, + "loss": 0.0432, + "num_input_tokens_seen": 78513832, + "step": 116470 + }, + { + "epoch": 2.845503627879706, + "grad_norm": 0.36436575651168823, + "learning_rate": 9.333963695786038e-07, + "loss": 0.0003, + "num_input_tokens_seen": 78517032, + "step": 116475 + }, + { + "epoch": 2.845625778711553, + "grad_norm": 0.054122958332300186, + "learning_rate": 9.333112815792202e-07, + "loss": 0.0007, + "num_input_tokens_seen": 78520232, + "step": 116480 + }, + { + "epoch": 2.8457479295434003, + "grad_norm": 0.011439117603003979, + "learning_rate": 9.332261940648143e-07, + "loss": 0.0563, + "num_input_tokens_seen": 78523560, + "step": 116485 + }, + { + "epoch": 2.8458700803752475, + "grad_norm": 0.12531723082065582, + "learning_rate": 9.331411070360059e-07, + "loss": 0.0002, + "num_input_tokens_seen": 78527208, + "step": 116490 + }, + { + "epoch": 2.8459922312070947, + "grad_norm": 0.012637167237699032, + "learning_rate": 9.330560204934129e-07, + "loss": 0.0016, + "num_input_tokens_seen": 78530408, + "step": 116495 + }, + { + "epoch": 2.846114382038942, + "grad_norm": 0.013321335427463055, + "learning_rate": 9.329709344376549e-07, + "loss": 0.0004, + "num_input_tokens_seen": 78533544, + "step": 116500 + }, + { + "epoch": 2.8462365328707886, + "grad_norm": 76.95355224609375, + "learning_rate": 9.328858488693503e-07, + "loss": 0.0606, + "num_input_tokens_seen": 78536680, + "step": 116505 + }, + { + "epoch": 2.8463586837026362, + "grad_norm": 0.23179636895656586, + "learning_rate": 9.328007637891175e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78539752, + "step": 116510 + }, + { + "epoch": 2.846480834534483, + "grad_norm": 0.005101948510855436, + "learning_rate": 9.327156791975762e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78543400, + "step": 116515 + }, + { + "epoch": 2.8466029853663306, + "grad_norm": 0.07535602897405624, + "learning_rate": 9.326305950953439e-07, + "loss": 0.0796, + "num_input_tokens_seen": 78546792, + "step": 116520 + }, + { + "epoch": 2.8467251361981774, + "grad_norm": 42.198482513427734, + "learning_rate": 9.325455114830406e-07, + "loss": 0.0829, + "num_input_tokens_seen": 78550120, + "step": 116525 + }, + { + "epoch": 2.8468472870300245, + "grad_norm": 0.0038453794550150633, + "learning_rate": 9.32460428361284e-07, + "loss": 0.1552, + "num_input_tokens_seen": 78553128, + "step": 116530 + }, + { + "epoch": 2.8469694378618717, + "grad_norm": 0.15347400307655334, + "learning_rate": 9.323753457306934e-07, + "loss": 0.0618, + "num_input_tokens_seen": 78556584, + "step": 116535 + }, + { + "epoch": 2.847091588693719, + "grad_norm": 0.0010158936493098736, + "learning_rate": 9.322902635918879e-07, + "loss": 0.0, + "num_input_tokens_seen": 78560040, + "step": 116540 + }, + { + "epoch": 2.847213739525566, + "grad_norm": 0.02456558868288994, + "learning_rate": 9.322051819454856e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78563432, + "step": 116545 + }, + { + "epoch": 2.8473358903574133, + "grad_norm": 0.06507387012243271, + "learning_rate": 9.321201007921054e-07, + "loss": 0.0003, + "num_input_tokens_seen": 78566376, + "step": 116550 + }, + { + "epoch": 2.8474580411892605, + "grad_norm": 0.006255817599594593, + "learning_rate": 9.320350201323662e-07, + "loss": 0.0002, + "num_input_tokens_seen": 78569576, + "step": 116555 + }, + { + "epoch": 2.8475801920211077, + "grad_norm": 0.03314283862709999, + "learning_rate": 9.319499399668863e-07, + "loss": 0.0002, + "num_input_tokens_seen": 78572712, + "step": 116560 + }, + { + "epoch": 2.847702342852955, + "grad_norm": 1.2695236206054688, + "learning_rate": 9.318648602962852e-07, + "loss": 0.0002, + "num_input_tokens_seen": 78575784, + "step": 116565 + }, + { + "epoch": 2.847824493684802, + "grad_norm": 0.06694022566080093, + "learning_rate": 9.317797811211807e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78579304, + "step": 116570 + }, + { + "epoch": 2.8479466445166493, + "grad_norm": 38.54062271118164, + "learning_rate": 9.316947024421923e-07, + "loss": 0.122, + "num_input_tokens_seen": 78582440, + "step": 116575 + }, + { + "epoch": 2.8480687953484964, + "grad_norm": 0.06952695548534393, + "learning_rate": 9.316096242599382e-07, + "loss": 0.0641, + "num_input_tokens_seen": 78585960, + "step": 116580 + }, + { + "epoch": 2.8481909461803436, + "grad_norm": 0.0023296386934816837, + "learning_rate": 9.315245465750376e-07, + "loss": 0.0, + "num_input_tokens_seen": 78589672, + "step": 116585 + }, + { + "epoch": 2.8483130970121904, + "grad_norm": 0.4964067041873932, + "learning_rate": 9.314394693881086e-07, + "loss": 0.0003, + "num_input_tokens_seen": 78593256, + "step": 116590 + }, + { + "epoch": 2.848435247844038, + "grad_norm": 0.01752789504826069, + "learning_rate": 9.313543926997703e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78596968, + "step": 116595 + }, + { + "epoch": 2.8485573986758848, + "grad_norm": 0.008099086582660675, + "learning_rate": 9.312693165106413e-07, + "loss": 0.0002, + "num_input_tokens_seen": 78600424, + "step": 116600 + }, + { + "epoch": 2.8486795495077324, + "grad_norm": 0.004455342888832092, + "learning_rate": 9.311842408213404e-07, + "loss": 0.0411, + "num_input_tokens_seen": 78603688, + "step": 116605 + }, + { + "epoch": 2.848801700339579, + "grad_norm": 0.0616503469645977, + "learning_rate": 9.310991656324865e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78606568, + "step": 116610 + }, + { + "epoch": 2.8489238511714263, + "grad_norm": 36.6910285949707, + "learning_rate": 9.310140909446974e-07, + "loss": 0.0405, + "num_input_tokens_seen": 78609896, + "step": 116615 + }, + { + "epoch": 2.8490460020032735, + "grad_norm": 0.0014246907085180283, + "learning_rate": 9.309290167585929e-07, + "loss": 0.0679, + "num_input_tokens_seen": 78613416, + "step": 116620 + }, + { + "epoch": 2.8491681528351207, + "grad_norm": 0.001358588458970189, + "learning_rate": 9.308439430747908e-07, + "loss": 0.0002, + "num_input_tokens_seen": 78617320, + "step": 116625 + }, + { + "epoch": 2.849290303666968, + "grad_norm": 0.003647299250587821, + "learning_rate": 9.307588698939101e-07, + "loss": 0.0717, + "num_input_tokens_seen": 78620584, + "step": 116630 + }, + { + "epoch": 2.849412454498815, + "grad_norm": 0.1672152727842331, + "learning_rate": 9.306737972165699e-07, + "loss": 0.0423, + "num_input_tokens_seen": 78623976, + "step": 116635 + }, + { + "epoch": 2.8495346053306623, + "grad_norm": 4.308013103582198e-06, + "learning_rate": 9.30588725043388e-07, + "loss": 0.0002, + "num_input_tokens_seen": 78627304, + "step": 116640 + }, + { + "epoch": 2.8496567561625095, + "grad_norm": 0.013248121365904808, + "learning_rate": 9.305036533749842e-07, + "loss": 0.0814, + "num_input_tokens_seen": 78630184, + "step": 116645 + }, + { + "epoch": 2.8497789069943567, + "grad_norm": 0.6838406920433044, + "learning_rate": 9.304185822119759e-07, + "loss": 0.0217, + "num_input_tokens_seen": 78633832, + "step": 116650 + }, + { + "epoch": 2.849901057826204, + "grad_norm": 0.005772408097982407, + "learning_rate": 9.303335115549828e-07, + "loss": 0.0672, + "num_input_tokens_seen": 78637288, + "step": 116655 + }, + { + "epoch": 2.850023208658051, + "grad_norm": 0.0022411204408854246, + "learning_rate": 9.302484414046233e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78641256, + "step": 116660 + }, + { + "epoch": 2.8501453594898982, + "grad_norm": 50.66267395019531, + "learning_rate": 9.301633717615152e-07, + "loss": 0.1293, + "num_input_tokens_seen": 78644840, + "step": 116665 + }, + { + "epoch": 2.8502675103217454, + "grad_norm": 0.007837265729904175, + "learning_rate": 9.300783026262785e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78648104, + "step": 116670 + }, + { + "epoch": 2.8503896611535926, + "grad_norm": 21.46866798400879, + "learning_rate": 9.299932339995308e-07, + "loss": 0.0458, + "num_input_tokens_seen": 78651880, + "step": 116675 + }, + { + "epoch": 2.85051181198544, + "grad_norm": 175.33380126953125, + "learning_rate": 9.299081658818915e-07, + "loss": 0.029, + "num_input_tokens_seen": 78655208, + "step": 116680 + }, + { + "epoch": 2.8506339628172865, + "grad_norm": 0.016408970579504967, + "learning_rate": 9.298230982739784e-07, + "loss": 0.1522, + "num_input_tokens_seen": 78658216, + "step": 116685 + }, + { + "epoch": 2.850756113649134, + "grad_norm": 0.013738897629082203, + "learning_rate": 9.297380311764107e-07, + "loss": 0.0002, + "num_input_tokens_seen": 78661160, + "step": 116690 + }, + { + "epoch": 2.850878264480981, + "grad_norm": 0.2336987853050232, + "learning_rate": 9.296529645898073e-07, + "loss": 0.0005, + "num_input_tokens_seen": 78664232, + "step": 116695 + }, + { + "epoch": 2.8510004153128286, + "grad_norm": 0.06240047514438629, + "learning_rate": 9.295678985147863e-07, + "loss": 0.0003, + "num_input_tokens_seen": 78667496, + "step": 116700 + }, + { + "epoch": 2.8511225661446753, + "grad_norm": 0.2012481391429901, + "learning_rate": 9.294828329519664e-07, + "loss": 0.0002, + "num_input_tokens_seen": 78670632, + "step": 116705 + }, + { + "epoch": 2.8512447169765225, + "grad_norm": 0.07818334549665451, + "learning_rate": 9.293977679019663e-07, + "loss": 0.0565, + "num_input_tokens_seen": 78673768, + "step": 116710 + }, + { + "epoch": 2.8513668678083697, + "grad_norm": 0.010745156556367874, + "learning_rate": 9.293127033654045e-07, + "loss": 0.0215, + "num_input_tokens_seen": 78677352, + "step": 116715 + }, + { + "epoch": 2.851489018640217, + "grad_norm": 0.21190738677978516, + "learning_rate": 9.292276393429001e-07, + "loss": 0.0767, + "num_input_tokens_seen": 78680744, + "step": 116720 + }, + { + "epoch": 2.851611169472064, + "grad_norm": 0.0015108853112906218, + "learning_rate": 9.291425758350709e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78684008, + "step": 116725 + }, + { + "epoch": 2.8517333203039112, + "grad_norm": 0.1414852887392044, + "learning_rate": 9.290575128425364e-07, + "loss": 0.0004, + "num_input_tokens_seen": 78687336, + "step": 116730 + }, + { + "epoch": 2.8518554711357584, + "grad_norm": 0.18726703524589539, + "learning_rate": 9.289724503659145e-07, + "loss": 0.0002, + "num_input_tokens_seen": 78690856, + "step": 116735 + }, + { + "epoch": 2.8519776219676056, + "grad_norm": 0.2173684686422348, + "learning_rate": 9.288873884058242e-07, + "loss": 0.0536, + "num_input_tokens_seen": 78694056, + "step": 116740 + }, + { + "epoch": 2.852099772799453, + "grad_norm": 0.004171188920736313, + "learning_rate": 9.288023269628836e-07, + "loss": 0.1149, + "num_input_tokens_seen": 78697512, + "step": 116745 + }, + { + "epoch": 2.8522219236313, + "grad_norm": 44.7703971862793, + "learning_rate": 9.287172660377119e-07, + "loss": 0.0704, + "num_input_tokens_seen": 78700712, + "step": 116750 + }, + { + "epoch": 2.852344074463147, + "grad_norm": 0.31654784083366394, + "learning_rate": 9.286322056309272e-07, + "loss": 0.0002, + "num_input_tokens_seen": 78704040, + "step": 116755 + }, + { + "epoch": 2.8524662252949944, + "grad_norm": 0.007102549076080322, + "learning_rate": 9.285471457431486e-07, + "loss": 0.0325, + "num_input_tokens_seen": 78707688, + "step": 116760 + }, + { + "epoch": 2.8525883761268416, + "grad_norm": 36.19954299926758, + "learning_rate": 9.284620863749945e-07, + "loss": 0.0999, + "num_input_tokens_seen": 78711208, + "step": 116765 + }, + { + "epoch": 2.8527105269586883, + "grad_norm": 0.3918383717536926, + "learning_rate": 9.283770275270828e-07, + "loss": 0.1455, + "num_input_tokens_seen": 78714472, + "step": 116770 + }, + { + "epoch": 2.852832677790536, + "grad_norm": 0.020883552730083466, + "learning_rate": 9.282919692000331e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78717928, + "step": 116775 + }, + { + "epoch": 2.8529548286223827, + "grad_norm": 0.19970282912254333, + "learning_rate": 9.282069113944631e-07, + "loss": 0.0002, + "num_input_tokens_seen": 78721256, + "step": 116780 + }, + { + "epoch": 2.8530769794542303, + "grad_norm": 0.02931102365255356, + "learning_rate": 9.281218541109917e-07, + "loss": 0.0378, + "num_input_tokens_seen": 78724904, + "step": 116785 + }, + { + "epoch": 2.853199130286077, + "grad_norm": 0.0009442739537917078, + "learning_rate": 9.28036797350238e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78728616, + "step": 116790 + }, + { + "epoch": 2.8533212811179243, + "grad_norm": 0.5051276683807373, + "learning_rate": 9.279517411128196e-07, + "loss": 0.0005, + "num_input_tokens_seen": 78732072, + "step": 116795 + }, + { + "epoch": 2.8534434319497715, + "grad_norm": 35.278289794921875, + "learning_rate": 9.27866685399356e-07, + "loss": 0.054, + "num_input_tokens_seen": 78735208, + "step": 116800 + }, + { + "epoch": 2.8535655827816186, + "grad_norm": 32.29579162597656, + "learning_rate": 9.277816302104647e-07, + "loss": 0.0594, + "num_input_tokens_seen": 78737960, + "step": 116805 + }, + { + "epoch": 2.853687733613466, + "grad_norm": 0.0075948829762637615, + "learning_rate": 9.276965755467652e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78741224, + "step": 116810 + }, + { + "epoch": 2.853809884445313, + "grad_norm": 0.040782373398542404, + "learning_rate": 9.276115214088758e-07, + "loss": 0.0185, + "num_input_tokens_seen": 78744360, + "step": 116815 + }, + { + "epoch": 2.85393203527716, + "grad_norm": 0.07822287082672119, + "learning_rate": 9.275264677974144e-07, + "loss": 0.0538, + "num_input_tokens_seen": 78748072, + "step": 116820 + }, + { + "epoch": 2.8540541861090074, + "grad_norm": 0.00317594176158309, + "learning_rate": 9.274414147130006e-07, + "loss": 0.0005, + "num_input_tokens_seen": 78751464, + "step": 116825 + }, + { + "epoch": 2.8541763369408546, + "grad_norm": 41.26247787475586, + "learning_rate": 9.273563621562516e-07, + "loss": 0.0495, + "num_input_tokens_seen": 78755112, + "step": 116830 + }, + { + "epoch": 2.854298487772702, + "grad_norm": 37.74317932128906, + "learning_rate": 9.272713101277873e-07, + "loss": 0.0566, + "num_input_tokens_seen": 78758632, + "step": 116835 + }, + { + "epoch": 2.854420638604549, + "grad_norm": 0.15638141334056854, + "learning_rate": 9.271862586282252e-07, + "loss": 0.0003, + "num_input_tokens_seen": 78761896, + "step": 116840 + }, + { + "epoch": 2.854542789436396, + "grad_norm": 0.17552080750465393, + "learning_rate": 9.271012076581842e-07, + "loss": 0.0005, + "num_input_tokens_seen": 78765032, + "step": 116845 + }, + { + "epoch": 2.8546649402682434, + "grad_norm": 0.009987038560211658, + "learning_rate": 9.270161572182833e-07, + "loss": 0.0002, + "num_input_tokens_seen": 78768168, + "step": 116850 + }, + { + "epoch": 2.8547870911000905, + "grad_norm": 0.06905113905668259, + "learning_rate": 9.269311073091403e-07, + "loss": 0.0751, + "num_input_tokens_seen": 78771432, + "step": 116855 + }, + { + "epoch": 2.8549092419319377, + "grad_norm": 0.03222937509417534, + "learning_rate": 9.268460579313738e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78774888, + "step": 116860 + }, + { + "epoch": 2.8550313927637845, + "grad_norm": 0.17098113894462585, + "learning_rate": 9.267610090856025e-07, + "loss": 0.0604, + "num_input_tokens_seen": 78778024, + "step": 116865 + }, + { + "epoch": 2.855153543595632, + "grad_norm": 12.133179664611816, + "learning_rate": 9.266759607724451e-07, + "loss": 0.1097, + "num_input_tokens_seen": 78781160, + "step": 116870 + }, + { + "epoch": 2.855275694427479, + "grad_norm": 0.021802183240652084, + "learning_rate": 9.265909129925194e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78784552, + "step": 116875 + }, + { + "epoch": 2.855397845259326, + "grad_norm": 0.005931623745709658, + "learning_rate": 9.265058657464444e-07, + "loss": 0.0936, + "num_input_tokens_seen": 78787560, + "step": 116880 + }, + { + "epoch": 2.8555199960911732, + "grad_norm": 51.705326080322266, + "learning_rate": 9.264208190348388e-07, + "loss": 0.1411, + "num_input_tokens_seen": 78791080, + "step": 116885 + }, + { + "epoch": 2.8556421469230204, + "grad_norm": 0.04667201265692711, + "learning_rate": 9.263357728583204e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78794728, + "step": 116890 + }, + { + "epoch": 2.8557642977548676, + "grad_norm": 0.01778298430144787, + "learning_rate": 9.262507272175087e-07, + "loss": 0.0003, + "num_input_tokens_seen": 78798376, + "step": 116895 + }, + { + "epoch": 2.855886448586715, + "grad_norm": 21.58896827697754, + "learning_rate": 9.261656821130208e-07, + "loss": 0.0517, + "num_input_tokens_seen": 78801576, + "step": 116900 + }, + { + "epoch": 2.856008599418562, + "grad_norm": 0.033148009330034256, + "learning_rate": 9.260806375454764e-07, + "loss": 0.0004, + "num_input_tokens_seen": 78805160, + "step": 116905 + }, + { + "epoch": 2.856130750250409, + "grad_norm": 0.6077508926391602, + "learning_rate": 9.259955935154932e-07, + "loss": 0.0003, + "num_input_tokens_seen": 78808552, + "step": 116910 + }, + { + "epoch": 2.8562529010822564, + "grad_norm": 0.025105271488428116, + "learning_rate": 9.259105500236902e-07, + "loss": 0.0674, + "num_input_tokens_seen": 78811816, + "step": 116915 + }, + { + "epoch": 2.8563750519141036, + "grad_norm": 0.041017137467861176, + "learning_rate": 9.258255070706857e-07, + "loss": 0.0481, + "num_input_tokens_seen": 78814952, + "step": 116920 + }, + { + "epoch": 2.8564972027459508, + "grad_norm": 0.029608365148305893, + "learning_rate": 9.257404646570978e-07, + "loss": 0.0651, + "num_input_tokens_seen": 78818152, + "step": 116925 + }, + { + "epoch": 2.856619353577798, + "grad_norm": 20.657743453979492, + "learning_rate": 9.256554227835455e-07, + "loss": 0.0628, + "num_input_tokens_seen": 78821288, + "step": 116930 + }, + { + "epoch": 2.856741504409645, + "grad_norm": 0.018212083727121353, + "learning_rate": 9.255703814506466e-07, + "loss": 0.0003, + "num_input_tokens_seen": 78824552, + "step": 116935 + }, + { + "epoch": 2.8568636552414923, + "grad_norm": 0.008494891226291656, + "learning_rate": 9.254853406590197e-07, + "loss": 0.0002, + "num_input_tokens_seen": 78827816, + "step": 116940 + }, + { + "epoch": 2.8569858060733395, + "grad_norm": 39.98295974731445, + "learning_rate": 9.254003004092841e-07, + "loss": 0.0845, + "num_input_tokens_seen": 78830952, + "step": 116945 + }, + { + "epoch": 2.8571079569051863, + "grad_norm": 0.2915056049823761, + "learning_rate": 9.253152607020572e-07, + "loss": 0.0004, + "num_input_tokens_seen": 78833832, + "step": 116950 + }, + { + "epoch": 2.857230107737034, + "grad_norm": 0.01098598912358284, + "learning_rate": 9.25230221537958e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78836968, + "step": 116955 + }, + { + "epoch": 2.8573522585688806, + "grad_norm": 0.01814945414662361, + "learning_rate": 9.251451829176045e-07, + "loss": 0.0715, + "num_input_tokens_seen": 78840168, + "step": 116960 + }, + { + "epoch": 2.8574744094007283, + "grad_norm": 0.19818727672100067, + "learning_rate": 9.250601448416155e-07, + "loss": 0.0757, + "num_input_tokens_seen": 78843560, + "step": 116965 + }, + { + "epoch": 2.857596560232575, + "grad_norm": 0.0399983674287796, + "learning_rate": 9.249751073106095e-07, + "loss": 0.0006, + "num_input_tokens_seen": 78847080, + "step": 116970 + }, + { + "epoch": 2.857718711064422, + "grad_norm": 0.17228193581104279, + "learning_rate": 9.248900703252042e-07, + "loss": 0.0008, + "num_input_tokens_seen": 78850472, + "step": 116975 + }, + { + "epoch": 2.8578408618962694, + "grad_norm": 0.0858980044722557, + "learning_rate": 9.248050338860192e-07, + "loss": 0.0529, + "num_input_tokens_seen": 78853992, + "step": 116980 + }, + { + "epoch": 2.8579630127281166, + "grad_norm": 33.91243362426758, + "learning_rate": 9.247199979936715e-07, + "loss": 0.1234, + "num_input_tokens_seen": 78857384, + "step": 116985 + }, + { + "epoch": 2.858085163559964, + "grad_norm": 0.14944037795066833, + "learning_rate": 9.246349626487809e-07, + "loss": 0.0639, + "num_input_tokens_seen": 78860456, + "step": 116990 + }, + { + "epoch": 2.858207314391811, + "grad_norm": 0.2674129605293274, + "learning_rate": 9.245499278519644e-07, + "loss": 0.0381, + "num_input_tokens_seen": 78863720, + "step": 116995 + }, + { + "epoch": 2.858329465223658, + "grad_norm": 0.07492050528526306, + "learning_rate": 9.244648936038412e-07, + "loss": 0.0433, + "num_input_tokens_seen": 78866792, + "step": 117000 + }, + { + "epoch": 2.8584516160555054, + "grad_norm": 0.13231611251831055, + "learning_rate": 9.243798599050302e-07, + "loss": 0.0666, + "num_input_tokens_seen": 78870120, + "step": 117005 + }, + { + "epoch": 2.8585737668873525, + "grad_norm": 126.9564437866211, + "learning_rate": 9.242948267561489e-07, + "loss": 0.0207, + "num_input_tokens_seen": 78873320, + "step": 117010 + }, + { + "epoch": 2.8586959177191997, + "grad_norm": 168.95916748046875, + "learning_rate": 9.242097941578159e-07, + "loss": 0.031, + "num_input_tokens_seen": 78876520, + "step": 117015 + }, + { + "epoch": 2.858818068551047, + "grad_norm": 0.014719391241669655, + "learning_rate": 9.241247621106498e-07, + "loss": 0.0434, + "num_input_tokens_seen": 78879720, + "step": 117020 + }, + { + "epoch": 2.858940219382894, + "grad_norm": 0.04394163191318512, + "learning_rate": 9.24039730615269e-07, + "loss": 0.067, + "num_input_tokens_seen": 78882920, + "step": 117025 + }, + { + "epoch": 2.8590623702147413, + "grad_norm": 326.14288330078125, + "learning_rate": 9.239546996722914e-07, + "loss": 0.0288, + "num_input_tokens_seen": 78886248, + "step": 117030 + }, + { + "epoch": 2.8591845210465885, + "grad_norm": 0.01146597508341074, + "learning_rate": 9.238696692823355e-07, + "loss": 0.064, + "num_input_tokens_seen": 78889448, + "step": 117035 + }, + { + "epoch": 2.8593066718784357, + "grad_norm": 41.29705810546875, + "learning_rate": 9.237846394460203e-07, + "loss": 0.0519, + "num_input_tokens_seen": 78892456, + "step": 117040 + }, + { + "epoch": 2.8594288227102824, + "grad_norm": 0.007990594953298569, + "learning_rate": 9.236996101639632e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78895976, + "step": 117045 + }, + { + "epoch": 2.85955097354213, + "grad_norm": 0.8427734375, + "learning_rate": 9.236145814367836e-07, + "loss": 0.0754, + "num_input_tokens_seen": 78899368, + "step": 117050 + }, + { + "epoch": 2.859673124373977, + "grad_norm": 0.02752232365310192, + "learning_rate": 9.23529553265099e-07, + "loss": 0.0563, + "num_input_tokens_seen": 78902440, + "step": 117055 + }, + { + "epoch": 2.859795275205824, + "grad_norm": 0.1377357393503189, + "learning_rate": 9.23444525649528e-07, + "loss": 0.0582, + "num_input_tokens_seen": 78905640, + "step": 117060 + }, + { + "epoch": 2.859917426037671, + "grad_norm": 0.5623184442520142, + "learning_rate": 9.233594985906892e-07, + "loss": 0.0004, + "num_input_tokens_seen": 78909224, + "step": 117065 + }, + { + "epoch": 2.8600395768695184, + "grad_norm": 0.017609449103474617, + "learning_rate": 9.232744720892006e-07, + "loss": 0.0002, + "num_input_tokens_seen": 78912360, + "step": 117070 + }, + { + "epoch": 2.8601617277013656, + "grad_norm": 73.11463165283203, + "learning_rate": 9.23189446145681e-07, + "loss": 0.0495, + "num_input_tokens_seen": 78915688, + "step": 117075 + }, + { + "epoch": 2.8602838785332128, + "grad_norm": 0.369545578956604, + "learning_rate": 9.231044207607479e-07, + "loss": 0.0003, + "num_input_tokens_seen": 78918888, + "step": 117080 + }, + { + "epoch": 2.86040602936506, + "grad_norm": 0.04580839350819588, + "learning_rate": 9.230193959350207e-07, + "loss": 0.0357, + "num_input_tokens_seen": 78922408, + "step": 117085 + }, + { + "epoch": 2.860528180196907, + "grad_norm": 0.0036676761228591204, + "learning_rate": 9.229343716691166e-07, + "loss": 0.1141, + "num_input_tokens_seen": 78926056, + "step": 117090 + }, + { + "epoch": 2.8606503310287543, + "grad_norm": 0.13639883697032928, + "learning_rate": 9.228493479636545e-07, + "loss": 0.0002, + "num_input_tokens_seen": 78929192, + "step": 117095 + }, + { + "epoch": 2.8607724818606015, + "grad_norm": 0.010075625032186508, + "learning_rate": 9.227643248192532e-07, + "loss": 0.0, + "num_input_tokens_seen": 78932648, + "step": 117100 + }, + { + "epoch": 2.8608946326924487, + "grad_norm": 0.841788113117218, + "learning_rate": 9.226793022365299e-07, + "loss": 0.0474, + "num_input_tokens_seen": 78936104, + "step": 117105 + }, + { + "epoch": 2.861016783524296, + "grad_norm": 23.549686431884766, + "learning_rate": 9.225942802161041e-07, + "loss": 0.0352, + "num_input_tokens_seen": 78939176, + "step": 117110 + }, + { + "epoch": 2.861138934356143, + "grad_norm": 0.00849588867276907, + "learning_rate": 9.225092587585929e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78942248, + "step": 117115 + }, + { + "epoch": 2.8612610851879903, + "grad_norm": 0.36857789754867554, + "learning_rate": 9.224242378646156e-07, + "loss": 0.0004, + "num_input_tokens_seen": 78945384, + "step": 117120 + }, + { + "epoch": 2.8613832360198375, + "grad_norm": 0.483141154050827, + "learning_rate": 9.223392175347903e-07, + "loss": 0.0003, + "num_input_tokens_seen": 78949224, + "step": 117125 + }, + { + "epoch": 2.861505386851684, + "grad_norm": 0.04797397181391716, + "learning_rate": 9.222541977697346e-07, + "loss": 0.0003, + "num_input_tokens_seen": 78952424, + "step": 117130 + }, + { + "epoch": 2.861627537683532, + "grad_norm": 19.711732864379883, + "learning_rate": 9.221691785700679e-07, + "loss": 0.062, + "num_input_tokens_seen": 78955816, + "step": 117135 + }, + { + "epoch": 2.8617496885153786, + "grad_norm": 2.041687488555908, + "learning_rate": 9.220841599364073e-07, + "loss": 0.0006, + "num_input_tokens_seen": 78959144, + "step": 117140 + }, + { + "epoch": 2.861871839347226, + "grad_norm": 0.006740696262568235, + "learning_rate": 9.219991418693721e-07, + "loss": 0.0002, + "num_input_tokens_seen": 78962344, + "step": 117145 + }, + { + "epoch": 2.861993990179073, + "grad_norm": 0.08522861450910568, + "learning_rate": 9.219141243695796e-07, + "loss": 0.0002, + "num_input_tokens_seen": 78965608, + "step": 117150 + }, + { + "epoch": 2.86211614101092, + "grad_norm": 0.02093188278377056, + "learning_rate": 9.218291074376487e-07, + "loss": 0.0878, + "num_input_tokens_seen": 78968680, + "step": 117155 + }, + { + "epoch": 2.8622382918427673, + "grad_norm": 0.0043796938844025135, + "learning_rate": 9.217440910741979e-07, + "loss": 0.0415, + "num_input_tokens_seen": 78972136, + "step": 117160 + }, + { + "epoch": 2.8623604426746145, + "grad_norm": 0.1477183699607849, + "learning_rate": 9.21659075279845e-07, + "loss": 0.076, + "num_input_tokens_seen": 78975144, + "step": 117165 + }, + { + "epoch": 2.8624825935064617, + "grad_norm": 0.3976381719112396, + "learning_rate": 9.215740600552084e-07, + "loss": 0.0004, + "num_input_tokens_seen": 78978472, + "step": 117170 + }, + { + "epoch": 2.862604744338309, + "grad_norm": 0.030067600309848785, + "learning_rate": 9.214890454009062e-07, + "loss": 0.0381, + "num_input_tokens_seen": 78981928, + "step": 117175 + }, + { + "epoch": 2.862726895170156, + "grad_norm": 0.4479190707206726, + "learning_rate": 9.214040313175571e-07, + "loss": 0.0068, + "num_input_tokens_seen": 78985256, + "step": 117180 + }, + { + "epoch": 2.8628490460020033, + "grad_norm": 26.292327880859375, + "learning_rate": 9.213190178057784e-07, + "loss": 0.0431, + "num_input_tokens_seen": 78988520, + "step": 117185 + }, + { + "epoch": 2.8629711968338505, + "grad_norm": 0.019016873091459274, + "learning_rate": 9.212340048661892e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78991976, + "step": 117190 + }, + { + "epoch": 2.8630933476656977, + "grad_norm": 0.09147219359874725, + "learning_rate": 9.211489924994078e-07, + "loss": 0.0006, + "num_input_tokens_seen": 78994984, + "step": 117195 + }, + { + "epoch": 2.863215498497545, + "grad_norm": 0.020165953785181046, + "learning_rate": 9.210639807060518e-07, + "loss": 0.0001, + "num_input_tokens_seen": 78998568, + "step": 117200 + }, + { + "epoch": 2.863337649329392, + "grad_norm": 65.74693298339844, + "learning_rate": 9.209789694867401e-07, + "loss": 0.0717, + "num_input_tokens_seen": 79002344, + "step": 117205 + }, + { + "epoch": 2.8634598001612392, + "grad_norm": 1.2636717557907104, + "learning_rate": 9.208939588420902e-07, + "loss": 0.0002, + "num_input_tokens_seen": 79005864, + "step": 117210 + }, + { + "epoch": 2.863581950993086, + "grad_norm": 0.0033105944748967886, + "learning_rate": 9.208089487727208e-07, + "loss": 0.1023, + "num_input_tokens_seen": 79009064, + "step": 117215 + }, + { + "epoch": 2.8637041018249336, + "grad_norm": 0.016109691932797432, + "learning_rate": 9.207239392792503e-07, + "loss": 0.0004, + "num_input_tokens_seen": 79012200, + "step": 117220 + }, + { + "epoch": 2.8638262526567804, + "grad_norm": 0.7885407209396362, + "learning_rate": 9.206389303622964e-07, + "loss": 0.0346, + "num_input_tokens_seen": 79015464, + "step": 117225 + }, + { + "epoch": 2.863948403488628, + "grad_norm": 0.01864694058895111, + "learning_rate": 9.205539220224779e-07, + "loss": 0.0002, + "num_input_tokens_seen": 79018920, + "step": 117230 + }, + { + "epoch": 2.8640705543204747, + "grad_norm": 1.6205692291259766, + "learning_rate": 9.20468914260412e-07, + "loss": 0.0442, + "num_input_tokens_seen": 79022504, + "step": 117235 + }, + { + "epoch": 2.864192705152322, + "grad_norm": 0.27415525913238525, + "learning_rate": 9.203839070767182e-07, + "loss": 0.0002, + "num_input_tokens_seen": 79025704, + "step": 117240 + }, + { + "epoch": 2.864314855984169, + "grad_norm": 370.6185607910156, + "learning_rate": 9.202989004720136e-07, + "loss": 0.0865, + "num_input_tokens_seen": 79028712, + "step": 117245 + }, + { + "epoch": 2.8644370068160163, + "grad_norm": 0.01124818529933691, + "learning_rate": 9.202138944469168e-07, + "loss": 0.0, + "num_input_tokens_seen": 79031656, + "step": 117250 + }, + { + "epoch": 2.8645591576478635, + "grad_norm": 0.0069727166555821896, + "learning_rate": 9.201288890020464e-07, + "loss": 0.0002, + "num_input_tokens_seen": 79035944, + "step": 117255 + }, + { + "epoch": 2.8646813084797107, + "grad_norm": 0.016167595982551575, + "learning_rate": 9.200438841380198e-07, + "loss": 0.0001, + "num_input_tokens_seen": 79039080, + "step": 117260 + }, + { + "epoch": 2.864803459311558, + "grad_norm": 0.022563988342881203, + "learning_rate": 9.199588798554559e-07, + "loss": 0.0225, + "num_input_tokens_seen": 79043112, + "step": 117265 + }, + { + "epoch": 2.864925610143405, + "grad_norm": 0.009637587703764439, + "learning_rate": 9.198738761549724e-07, + "loss": 0.0026, + "num_input_tokens_seen": 79046568, + "step": 117270 + }, + { + "epoch": 2.8650477609752523, + "grad_norm": 0.02311187982559204, + "learning_rate": 9.197888730371875e-07, + "loss": 0.0501, + "num_input_tokens_seen": 79050152, + "step": 117275 + }, + { + "epoch": 2.8651699118070995, + "grad_norm": 0.1332298219203949, + "learning_rate": 9.1970387050272e-07, + "loss": 0.0002, + "num_input_tokens_seen": 79053800, + "step": 117280 + }, + { + "epoch": 2.8652920626389466, + "grad_norm": 0.31413617730140686, + "learning_rate": 9.196188685521869e-07, + "loss": 0.0236, + "num_input_tokens_seen": 79057128, + "step": 117285 + }, + { + "epoch": 2.865414213470794, + "grad_norm": 0.05050954595208168, + "learning_rate": 9.195338671862077e-07, + "loss": 0.1096, + "num_input_tokens_seen": 79060776, + "step": 117290 + }, + { + "epoch": 2.865536364302641, + "grad_norm": 0.01581372506916523, + "learning_rate": 9.194488664053992e-07, + "loss": 0.0395, + "num_input_tokens_seen": 79063848, + "step": 117295 + }, + { + "epoch": 2.865658515134488, + "grad_norm": 0.056255050003528595, + "learning_rate": 9.193638662103807e-07, + "loss": 0.0001, + "num_input_tokens_seen": 79066984, + "step": 117300 + }, + { + "epoch": 2.8657806659663354, + "grad_norm": 0.10820584744215012, + "learning_rate": 9.192788666017695e-07, + "loss": 0.0002, + "num_input_tokens_seen": 79070184, + "step": 117305 + }, + { + "epoch": 2.865902816798182, + "grad_norm": 0.016701946035027504, + "learning_rate": 9.191938675801839e-07, + "loss": 0.0002, + "num_input_tokens_seen": 79073512, + "step": 117310 + }, + { + "epoch": 2.86602496763003, + "grad_norm": 0.007976418361067772, + "learning_rate": 9.191088691462428e-07, + "loss": 0.0003, + "num_input_tokens_seen": 79077160, + "step": 117315 + }, + { + "epoch": 2.8661471184618765, + "grad_norm": 0.008298970758914948, + "learning_rate": 9.190238713005636e-07, + "loss": 0.0005, + "num_input_tokens_seen": 79080552, + "step": 117320 + }, + { + "epoch": 2.866269269293724, + "grad_norm": 0.07891908288002014, + "learning_rate": 9.189388740437645e-07, + "loss": 0.0422, + "num_input_tokens_seen": 79084328, + "step": 117325 + }, + { + "epoch": 2.866391420125571, + "grad_norm": 0.06407475471496582, + "learning_rate": 9.188538773764637e-07, + "loss": 0.1515, + "num_input_tokens_seen": 79088104, + "step": 117330 + }, + { + "epoch": 2.866513570957418, + "grad_norm": 0.08465859293937683, + "learning_rate": 9.187688812992796e-07, + "loss": 0.0001, + "num_input_tokens_seen": 79091432, + "step": 117335 + }, + { + "epoch": 2.8666357217892653, + "grad_norm": 0.09898823499679565, + "learning_rate": 9.186838858128295e-07, + "loss": 0.0315, + "num_input_tokens_seen": 79095272, + "step": 117340 + }, + { + "epoch": 2.8667578726211125, + "grad_norm": 0.1045418381690979, + "learning_rate": 9.185988909177321e-07, + "loss": 0.0003, + "num_input_tokens_seen": 79098408, + "step": 117345 + }, + { + "epoch": 2.8668800234529597, + "grad_norm": 40.67689895629883, + "learning_rate": 9.18513896614606e-07, + "loss": 0.1138, + "num_input_tokens_seen": 79101352, + "step": 117350 + }, + { + "epoch": 2.867002174284807, + "grad_norm": 0.007143858820199966, + "learning_rate": 9.184289029040683e-07, + "loss": 0.0741, + "num_input_tokens_seen": 79104424, + "step": 117355 + }, + { + "epoch": 2.867124325116654, + "grad_norm": 0.09244782477617264, + "learning_rate": 9.18343909786738e-07, + "loss": 0.0324, + "num_input_tokens_seen": 79107944, + "step": 117360 + }, + { + "epoch": 2.8672464759485012, + "grad_norm": 0.13260585069656372, + "learning_rate": 9.182589172632321e-07, + "loss": 0.0407, + "num_input_tokens_seen": 79111528, + "step": 117365 + }, + { + "epoch": 2.8673686267803484, + "grad_norm": 0.026284320279955864, + "learning_rate": 9.181739253341699e-07, + "loss": 0.0002, + "num_input_tokens_seen": 79114984, + "step": 117370 + }, + { + "epoch": 2.8674907776121956, + "grad_norm": 1.0271484851837158, + "learning_rate": 9.180889340001686e-07, + "loss": 0.0382, + "num_input_tokens_seen": 79118632, + "step": 117375 + }, + { + "epoch": 2.867612928444043, + "grad_norm": 0.022432560101151466, + "learning_rate": 9.180039432618467e-07, + "loss": 0.0347, + "num_input_tokens_seen": 79121320, + "step": 117380 + }, + { + "epoch": 2.86773507927589, + "grad_norm": 0.019050292670726776, + "learning_rate": 9.179189531198225e-07, + "loss": 0.0292, + "num_input_tokens_seen": 79124456, + "step": 117385 + }, + { + "epoch": 2.867857230107737, + "grad_norm": 0.021058736369013786, + "learning_rate": 9.178339635747132e-07, + "loss": 0.1183, + "num_input_tokens_seen": 79127720, + "step": 117390 + }, + { + "epoch": 2.867979380939584, + "grad_norm": 0.026515983045101166, + "learning_rate": 9.17748974627138e-07, + "loss": 0.0002, + "num_input_tokens_seen": 79131048, + "step": 117395 + }, + { + "epoch": 2.8681015317714316, + "grad_norm": 0.041551969945430756, + "learning_rate": 9.176639862777138e-07, + "loss": 0.0332, + "num_input_tokens_seen": 79134376, + "step": 117400 + }, + { + "epoch": 2.8682236826032783, + "grad_norm": 0.0533473864197731, + "learning_rate": 9.175789985270593e-07, + "loss": 0.0001, + "num_input_tokens_seen": 79138024, + "step": 117405 + }, + { + "epoch": 2.868345833435126, + "grad_norm": 0.007953424006700516, + "learning_rate": 9.174940113757929e-07, + "loss": 0.0003, + "num_input_tokens_seen": 79141736, + "step": 117410 + }, + { + "epoch": 2.8684679842669727, + "grad_norm": 0.015473191626369953, + "learning_rate": 9.174090248245318e-07, + "loss": 0.0004, + "num_input_tokens_seen": 79145192, + "step": 117415 + }, + { + "epoch": 2.86859013509882, + "grad_norm": 0.059506792575120926, + "learning_rate": 9.17324038873895e-07, + "loss": 0.0003, + "num_input_tokens_seen": 79148712, + "step": 117420 + }, + { + "epoch": 2.868712285930667, + "grad_norm": 0.01240159384906292, + "learning_rate": 9.172390535244996e-07, + "loss": 0.0569, + "num_input_tokens_seen": 79152424, + "step": 117425 + }, + { + "epoch": 2.8688344367625143, + "grad_norm": 0.003421264234930277, + "learning_rate": 9.171540687769641e-07, + "loss": 0.0572, + "num_input_tokens_seen": 79155688, + "step": 117430 + }, + { + "epoch": 2.8689565875943615, + "grad_norm": 0.014913155697286129, + "learning_rate": 9.170690846319069e-07, + "loss": 0.0388, + "num_input_tokens_seen": 79159080, + "step": 117435 + }, + { + "epoch": 2.8690787384262086, + "grad_norm": 0.07318806648254395, + "learning_rate": 9.16984101089945e-07, + "loss": 0.0622, + "num_input_tokens_seen": 79162280, + "step": 117440 + }, + { + "epoch": 2.869200889258056, + "grad_norm": 0.10887158662080765, + "learning_rate": 9.168991181516977e-07, + "loss": 0.0002, + "num_input_tokens_seen": 79165416, + "step": 117445 + }, + { + "epoch": 2.869323040089903, + "grad_norm": 0.048394229263067245, + "learning_rate": 9.168141358177819e-07, + "loss": 0.0002, + "num_input_tokens_seen": 79169000, + "step": 117450 + }, + { + "epoch": 2.86944519092175, + "grad_norm": 1.3721727132797241, + "learning_rate": 9.167291540888163e-07, + "loss": 0.0004, + "num_input_tokens_seen": 79172392, + "step": 117455 + }, + { + "epoch": 2.8695673417535974, + "grad_norm": 0.49199050664901733, + "learning_rate": 9.166441729654184e-07, + "loss": 0.0382, + "num_input_tokens_seen": 79175592, + "step": 117460 + }, + { + "epoch": 2.8696894925854446, + "grad_norm": 0.014259264804422855, + "learning_rate": 9.165591924482066e-07, + "loss": 0.0005, + "num_input_tokens_seen": 79178856, + "step": 117465 + }, + { + "epoch": 2.869811643417292, + "grad_norm": 0.004566266667097807, + "learning_rate": 9.164742125377991e-07, + "loss": 0.0001, + "num_input_tokens_seen": 79182184, + "step": 117470 + }, + { + "epoch": 2.869933794249139, + "grad_norm": 0.147256538271904, + "learning_rate": 9.163892332348133e-07, + "loss": 0.0441, + "num_input_tokens_seen": 79185256, + "step": 117475 + }, + { + "epoch": 2.870055945080986, + "grad_norm": 0.05114344507455826, + "learning_rate": 9.163042545398676e-07, + "loss": 0.0002, + "num_input_tokens_seen": 79188456, + "step": 117480 + }, + { + "epoch": 2.8701780959128333, + "grad_norm": 30.305294036865234, + "learning_rate": 9.162192764535798e-07, + "loss": 0.0422, + "num_input_tokens_seen": 79191592, + "step": 117485 + }, + { + "epoch": 2.87030024674468, + "grad_norm": 0.003184929024428129, + "learning_rate": 9.161342989765683e-07, + "loss": 0.0001, + "num_input_tokens_seen": 79195240, + "step": 117490 + }, + { + "epoch": 2.8704223975765277, + "grad_norm": 0.006119478493928909, + "learning_rate": 9.160493221094502e-07, + "loss": 0.0, + "num_input_tokens_seen": 79198504, + "step": 117495 + }, + { + "epoch": 2.8705445484083745, + "grad_norm": 23.057008743286133, + "learning_rate": 9.159643458528441e-07, + "loss": 0.0476, + "num_input_tokens_seen": 79202152, + "step": 117500 + }, + { + "epoch": 2.8706666992402217, + "grad_norm": 0.008205844089388847, + "learning_rate": 9.158793702073682e-07, + "loss": 0.1373, + "num_input_tokens_seen": 79206056, + "step": 117505 + }, + { + "epoch": 2.870788850072069, + "grad_norm": 0.006589832715690136, + "learning_rate": 9.157943951736397e-07, + "loss": 0.0454, + "num_input_tokens_seen": 79209512, + "step": 117510 + }, + { + "epoch": 2.870911000903916, + "grad_norm": 0.15100379288196564, + "learning_rate": 9.157094207522775e-07, + "loss": 0.0001, + "num_input_tokens_seen": 79212968, + "step": 117515 + }, + { + "epoch": 2.8710331517357632, + "grad_norm": 0.0029677124693989754, + "learning_rate": 9.156244469438987e-07, + "loss": 0.0, + "num_input_tokens_seen": 79216360, + "step": 117520 + }, + { + "epoch": 2.8711553025676104, + "grad_norm": 0.0012235000031068921, + "learning_rate": 9.155394737491218e-07, + "loss": 0.1394, + "num_input_tokens_seen": 79220008, + "step": 117525 + }, + { + "epoch": 2.8712774533994576, + "grad_norm": 0.00305823958478868, + "learning_rate": 9.154545011685645e-07, + "loss": 0.0001, + "num_input_tokens_seen": 79223080, + "step": 117530 + }, + { + "epoch": 2.871399604231305, + "grad_norm": 0.002814261708408594, + "learning_rate": 9.153695292028449e-07, + "loss": 0.0538, + "num_input_tokens_seen": 79226408, + "step": 117535 + }, + { + "epoch": 2.871521755063152, + "grad_norm": 0.047743260860443115, + "learning_rate": 9.15284557852581e-07, + "loss": 0.0013, + "num_input_tokens_seen": 79229480, + "step": 117540 + }, + { + "epoch": 2.871643905894999, + "grad_norm": 0.0454338937997818, + "learning_rate": 9.1519958711839e-07, + "loss": 0.0001, + "num_input_tokens_seen": 79233064, + "step": 117545 + }, + { + "epoch": 2.8717660567268464, + "grad_norm": 0.022779377177357674, + "learning_rate": 9.151146170008911e-07, + "loss": 0.0001, + "num_input_tokens_seen": 79236328, + "step": 117550 + }, + { + "epoch": 2.8718882075586936, + "grad_norm": 0.0035186749882996082, + "learning_rate": 9.150296475007009e-07, + "loss": 0.0425, + "num_input_tokens_seen": 79240040, + "step": 117555 + }, + { + "epoch": 2.8720103583905408, + "grad_norm": 0.012661670334637165, + "learning_rate": 9.14944678618438e-07, + "loss": 0.0569, + "num_input_tokens_seen": 79242920, + "step": 117560 + }, + { + "epoch": 2.872132509222388, + "grad_norm": 0.06139669194817543, + "learning_rate": 9.148597103547209e-07, + "loss": 0.0001, + "num_input_tokens_seen": 79246056, + "step": 117565 + }, + { + "epoch": 2.872254660054235, + "grad_norm": 0.054819829761981964, + "learning_rate": 9.147747427101663e-07, + "loss": 0.0001, + "num_input_tokens_seen": 79249256, + "step": 117570 + }, + { + "epoch": 2.872376810886082, + "grad_norm": 21.28368377685547, + "learning_rate": 9.146897756853931e-07, + "loss": 0.0872, + "num_input_tokens_seen": 79252456, + "step": 117575 + }, + { + "epoch": 2.8724989617179295, + "grad_norm": 0.008261069655418396, + "learning_rate": 9.146048092810184e-07, + "loss": 0.0549, + "num_input_tokens_seen": 79255720, + "step": 117580 + }, + { + "epoch": 2.8726211125497763, + "grad_norm": 0.02081366255879402, + "learning_rate": 9.145198434976609e-07, + "loss": 0.0717, + "num_input_tokens_seen": 79258856, + "step": 117585 + }, + { + "epoch": 2.872743263381624, + "grad_norm": 28.763362884521484, + "learning_rate": 9.144348783359379e-07, + "loss": 0.1152, + "num_input_tokens_seen": 79262184, + "step": 117590 + }, + { + "epoch": 2.8728654142134706, + "grad_norm": 0.757977306842804, + "learning_rate": 9.143499137964673e-07, + "loss": 0.0368, + "num_input_tokens_seen": 79265320, + "step": 117595 + }, + { + "epoch": 2.872987565045318, + "grad_norm": 0.06304121017456055, + "learning_rate": 9.142649498798675e-07, + "loss": 0.0002, + "num_input_tokens_seen": 79268456, + "step": 117600 + }, + { + "epoch": 2.873109715877165, + "grad_norm": 0.00835332740098238, + "learning_rate": 9.141799865867558e-07, + "loss": 0.0458, + "num_input_tokens_seen": 79271656, + "step": 117605 + }, + { + "epoch": 2.873231866709012, + "grad_norm": 0.022202685475349426, + "learning_rate": 9.140950239177505e-07, + "loss": 0.0651, + "num_input_tokens_seen": 79275304, + "step": 117610 + }, + { + "epoch": 2.8733540175408594, + "grad_norm": 0.315326988697052, + "learning_rate": 9.140100618734691e-07, + "loss": 0.0012, + "num_input_tokens_seen": 79278632, + "step": 117615 + }, + { + "epoch": 2.8734761683727066, + "grad_norm": 0.1667528599500656, + "learning_rate": 9.139251004545296e-07, + "loss": 0.0002, + "num_input_tokens_seen": 79282728, + "step": 117620 + }, + { + "epoch": 2.8735983192045538, + "grad_norm": 0.04228285327553749, + "learning_rate": 9.138401396615502e-07, + "loss": 0.0611, + "num_input_tokens_seen": 79285928, + "step": 117625 + }, + { + "epoch": 2.873720470036401, + "grad_norm": 59.36479187011719, + "learning_rate": 9.137551794951483e-07, + "loss": 0.0664, + "num_input_tokens_seen": 79289128, + "step": 117630 + }, + { + "epoch": 2.873842620868248, + "grad_norm": 0.06058398261666298, + "learning_rate": 9.136702199559421e-07, + "loss": 0.0003, + "num_input_tokens_seen": 79292520, + "step": 117635 + }, + { + "epoch": 2.8739647717000953, + "grad_norm": 0.024536775425076485, + "learning_rate": 9.135852610445491e-07, + "loss": 0.0246, + "num_input_tokens_seen": 79295656, + "step": 117640 + }, + { + "epoch": 2.8740869225319425, + "grad_norm": 0.6847139000892639, + "learning_rate": 9.135003027615876e-07, + "loss": 0.0599, + "num_input_tokens_seen": 79299112, + "step": 117645 + }, + { + "epoch": 2.8742090733637897, + "grad_norm": 0.026592666283249855, + "learning_rate": 9.134153451076748e-07, + "loss": 0.0503, + "num_input_tokens_seen": 79302504, + "step": 117650 + }, + { + "epoch": 2.874331224195637, + "grad_norm": 0.01792846992611885, + "learning_rate": 9.133303880834287e-07, + "loss": 0.0781, + "num_input_tokens_seen": 79305896, + "step": 117655 + }, + { + "epoch": 2.874453375027484, + "grad_norm": 14.320727348327637, + "learning_rate": 9.13245431689468e-07, + "loss": 0.1096, + "num_input_tokens_seen": 79309288, + "step": 117660 + }, + { + "epoch": 2.8745755258593313, + "grad_norm": 0.00515905162319541, + "learning_rate": 9.131604759264093e-07, + "loss": 0.0001, + "num_input_tokens_seen": 79312616, + "step": 117665 + }, + { + "epoch": 2.874697676691178, + "grad_norm": 50.880348205566406, + "learning_rate": 9.130755207948715e-07, + "loss": 0.027, + "num_input_tokens_seen": 79315816, + "step": 117670 + }, + { + "epoch": 2.8748198275230257, + "grad_norm": 0.03606473654508591, + "learning_rate": 9.129905662954713e-07, + "loss": 0.0957, + "num_input_tokens_seen": 79319144, + "step": 117675 + }, + { + "epoch": 2.8749419783548724, + "grad_norm": 27.191478729248047, + "learning_rate": 9.129056124288275e-07, + "loss": 0.062, + "num_input_tokens_seen": 79322600, + "step": 117680 + }, + { + "epoch": 2.8750641291867196, + "grad_norm": 0.27269473671913147, + "learning_rate": 9.128206591955574e-07, + "loss": 0.0008, + "num_input_tokens_seen": 79326248, + "step": 117685 + }, + { + "epoch": 2.875186280018567, + "grad_norm": 0.03510739281773567, + "learning_rate": 9.12735706596279e-07, + "loss": 0.0772, + "num_input_tokens_seen": 79329576, + "step": 117690 + }, + { + "epoch": 2.875308430850414, + "grad_norm": 0.006244110409170389, + "learning_rate": 9.126507546316102e-07, + "loss": 0.0556, + "num_input_tokens_seen": 79332840, + "step": 117695 + }, + { + "epoch": 2.875430581682261, + "grad_norm": 0.2085716277360916, + "learning_rate": 9.125658033021682e-07, + "loss": 0.0282, + "num_input_tokens_seen": 79335976, + "step": 117700 + }, + { + "epoch": 2.8755527325141084, + "grad_norm": 51.214942932128906, + "learning_rate": 9.124808526085714e-07, + "loss": 0.146, + "num_input_tokens_seen": 79341288, + "step": 117705 + }, + { + "epoch": 2.8756748833459556, + "grad_norm": 1.7222996950149536, + "learning_rate": 9.123959025514372e-07, + "loss": 0.0616, + "num_input_tokens_seen": 79344424, + "step": 117710 + }, + { + "epoch": 2.8757970341778027, + "grad_norm": 0.03357335925102234, + "learning_rate": 9.123109531313836e-07, + "loss": 0.0348, + "num_input_tokens_seen": 79347816, + "step": 117715 + }, + { + "epoch": 2.87591918500965, + "grad_norm": 0.01639949530363083, + "learning_rate": 9.122260043490285e-07, + "loss": 0.026, + "num_input_tokens_seen": 79351528, + "step": 117720 + }, + { + "epoch": 2.876041335841497, + "grad_norm": 0.012518463656306267, + "learning_rate": 9.121410562049893e-07, + "loss": 0.0003, + "num_input_tokens_seen": 79354984, + "step": 117725 + }, + { + "epoch": 2.8761634866733443, + "grad_norm": 0.1400122493505478, + "learning_rate": 9.120561086998842e-07, + "loss": 0.0001, + "num_input_tokens_seen": 79358312, + "step": 117730 + }, + { + "epoch": 2.8762856375051915, + "grad_norm": 0.03491630777716637, + "learning_rate": 9.119711618343305e-07, + "loss": 0.0341, + "num_input_tokens_seen": 79361448, + "step": 117735 + }, + { + "epoch": 2.8764077883370387, + "grad_norm": 0.016988487914204597, + "learning_rate": 9.118862156089465e-07, + "loss": 0.0311, + "num_input_tokens_seen": 79364712, + "step": 117740 + }, + { + "epoch": 2.876529939168886, + "grad_norm": 0.16878642141819, + "learning_rate": 9.118012700243495e-07, + "loss": 0.0303, + "num_input_tokens_seen": 79368424, + "step": 117745 + }, + { + "epoch": 2.876652090000733, + "grad_norm": 0.16372260451316833, + "learning_rate": 9.117163250811571e-07, + "loss": 0.0004, + "num_input_tokens_seen": 79371880, + "step": 117750 + }, + { + "epoch": 2.87677424083258, + "grad_norm": 0.026938119903206825, + "learning_rate": 9.116313807799878e-07, + "loss": 0.0165, + "num_input_tokens_seen": 79375144, + "step": 117755 + }, + { + "epoch": 2.8768963916644275, + "grad_norm": 36.589786529541016, + "learning_rate": 9.115464371214585e-07, + "loss": 0.0751, + "num_input_tokens_seen": 79378472, + "step": 117760 + }, + { + "epoch": 2.877018542496274, + "grad_norm": 0.034229863435029984, + "learning_rate": 9.114614941061877e-07, + "loss": 0.1053, + "num_input_tokens_seen": 79382184, + "step": 117765 + }, + { + "epoch": 2.877140693328122, + "grad_norm": 0.3387535512447357, + "learning_rate": 9.113765517347922e-07, + "loss": 0.0002, + "num_input_tokens_seen": 79385896, + "step": 117770 + }, + { + "epoch": 2.8772628441599686, + "grad_norm": 0.028109047561883926, + "learning_rate": 9.112916100078903e-07, + "loss": 0.0003, + "num_input_tokens_seen": 79389480, + "step": 117775 + }, + { + "epoch": 2.8773849949918158, + "grad_norm": 0.019770437851548195, + "learning_rate": 9.112066689261001e-07, + "loss": 0.141, + "num_input_tokens_seen": 79392808, + "step": 117780 + }, + { + "epoch": 2.877507145823663, + "grad_norm": 0.12967844307422638, + "learning_rate": 9.111217284900387e-07, + "loss": 0.051, + "num_input_tokens_seen": 79396328, + "step": 117785 + }, + { + "epoch": 2.87762929665551, + "grad_norm": 0.14182066917419434, + "learning_rate": 9.110367887003241e-07, + "loss": 0.0002, + "num_input_tokens_seen": 79399848, + "step": 117790 + }, + { + "epoch": 2.8777514474873573, + "grad_norm": 0.10105834156274796, + "learning_rate": 9.109518495575736e-07, + "loss": 0.0005, + "num_input_tokens_seen": 79403304, + "step": 117795 + }, + { + "epoch": 2.8778735983192045, + "grad_norm": 0.44391387701034546, + "learning_rate": 9.108669110624055e-07, + "loss": 0.0006, + "num_input_tokens_seen": 79406312, + "step": 117800 + }, + { + "epoch": 2.8779957491510517, + "grad_norm": 0.04133082181215286, + "learning_rate": 9.107819732154371e-07, + "loss": 0.0482, + "num_input_tokens_seen": 79409576, + "step": 117805 + }, + { + "epoch": 2.878117899982899, + "grad_norm": 0.005388464778661728, + "learning_rate": 9.10697036017286e-07, + "loss": 0.0002, + "num_input_tokens_seen": 79413032, + "step": 117810 + }, + { + "epoch": 2.878240050814746, + "grad_norm": 0.03301483392715454, + "learning_rate": 9.106120994685704e-07, + "loss": 0.0005, + "num_input_tokens_seen": 79416360, + "step": 117815 + }, + { + "epoch": 2.8783622016465933, + "grad_norm": 0.08911029249429703, + "learning_rate": 9.105271635699072e-07, + "loss": 0.0006, + "num_input_tokens_seen": 79419304, + "step": 117820 + }, + { + "epoch": 2.8784843524784405, + "grad_norm": 0.12972067296504974, + "learning_rate": 9.104422283219151e-07, + "loss": 0.0025, + "num_input_tokens_seen": 79422824, + "step": 117825 + }, + { + "epoch": 2.8786065033102877, + "grad_norm": 57.04539108276367, + "learning_rate": 9.103572937252107e-07, + "loss": 0.0386, + "num_input_tokens_seen": 79426152, + "step": 117830 + }, + { + "epoch": 2.878728654142135, + "grad_norm": 0.23031321167945862, + "learning_rate": 9.102723597804125e-07, + "loss": 0.0001, + "num_input_tokens_seen": 79429544, + "step": 117835 + }, + { + "epoch": 2.8788508049739816, + "grad_norm": 0.05489039421081543, + "learning_rate": 9.101874264881377e-07, + "loss": 0.0002, + "num_input_tokens_seen": 79432872, + "step": 117840 + }, + { + "epoch": 2.8789729558058292, + "grad_norm": 0.05241875350475311, + "learning_rate": 9.101024938490041e-07, + "loss": 0.04, + "num_input_tokens_seen": 79436264, + "step": 117845 + }, + { + "epoch": 2.879095106637676, + "grad_norm": 0.030297545716166496, + "learning_rate": 9.100175618636296e-07, + "loss": 0.0291, + "num_input_tokens_seen": 79440360, + "step": 117850 + }, + { + "epoch": 2.8792172574695236, + "grad_norm": 0.07322395592927933, + "learning_rate": 9.099326305326311e-07, + "loss": 0.0001, + "num_input_tokens_seen": 79443880, + "step": 117855 + }, + { + "epoch": 2.8793394083013704, + "grad_norm": 0.1015520840883255, + "learning_rate": 9.098476998566274e-07, + "loss": 0.0001, + "num_input_tokens_seen": 79447080, + "step": 117860 + }, + { + "epoch": 2.8794615591332176, + "grad_norm": 358.073974609375, + "learning_rate": 9.097627698362348e-07, + "loss": 0.0258, + "num_input_tokens_seen": 79450600, + "step": 117865 + }, + { + "epoch": 2.8795837099650647, + "grad_norm": 0.09080827236175537, + "learning_rate": 9.096778404720716e-07, + "loss": 0.0003, + "num_input_tokens_seen": 79453864, + "step": 117870 + }, + { + "epoch": 2.879705860796912, + "grad_norm": 0.06684160232543945, + "learning_rate": 9.095929117647559e-07, + "loss": 0.0002, + "num_input_tokens_seen": 79457768, + "step": 117875 + }, + { + "epoch": 2.879828011628759, + "grad_norm": 1036.3775634765625, + "learning_rate": 9.095079837149046e-07, + "loss": 0.1143, + "num_input_tokens_seen": 79461352, + "step": 117880 + }, + { + "epoch": 2.8799501624606063, + "grad_norm": 0.011043991893529892, + "learning_rate": 9.094230563231359e-07, + "loss": 0.0, + "num_input_tokens_seen": 79464744, + "step": 117885 + }, + { + "epoch": 2.8800723132924535, + "grad_norm": 54.542789459228516, + "learning_rate": 9.093381295900666e-07, + "loss": 0.0752, + "num_input_tokens_seen": 79468072, + "step": 117890 + }, + { + "epoch": 2.8801944641243007, + "grad_norm": 0.03255674988031387, + "learning_rate": 9.092532035163153e-07, + "loss": 0.0001, + "num_input_tokens_seen": 79471976, + "step": 117895 + }, + { + "epoch": 2.880316614956148, + "grad_norm": 0.1379069983959198, + "learning_rate": 9.091682781024989e-07, + "loss": 0.061, + "num_input_tokens_seen": 79475176, + "step": 117900 + }, + { + "epoch": 2.880438765787995, + "grad_norm": 0.8194472193717957, + "learning_rate": 9.090833533492349e-07, + "loss": 0.0468, + "num_input_tokens_seen": 79478888, + "step": 117905 + }, + { + "epoch": 2.8805609166198423, + "grad_norm": 0.4301382005214691, + "learning_rate": 9.089984292571418e-07, + "loss": 0.0002, + "num_input_tokens_seen": 79482792, + "step": 117910 + }, + { + "epoch": 2.8806830674516894, + "grad_norm": 0.04192114621400833, + "learning_rate": 9.08913505826836e-07, + "loss": 0.1394, + "num_input_tokens_seen": 79486120, + "step": 117915 + }, + { + "epoch": 2.8808052182835366, + "grad_norm": 0.9268858432769775, + "learning_rate": 9.088285830589362e-07, + "loss": 0.031, + "num_input_tokens_seen": 79489064, + "step": 117920 + }, + { + "epoch": 2.880927369115384, + "grad_norm": 0.06339632719755173, + "learning_rate": 9.087436609540591e-07, + "loss": 0.1359, + "num_input_tokens_seen": 79492456, + "step": 117925 + }, + { + "epoch": 2.881049519947231, + "grad_norm": 0.1005081757903099, + "learning_rate": 9.086587395128226e-07, + "loss": 0.0421, + "num_input_tokens_seen": 79495848, + "step": 117930 + }, + { + "epoch": 2.8811716707790778, + "grad_norm": 0.023637497797608376, + "learning_rate": 9.085738187358447e-07, + "loss": 0.0386, + "num_input_tokens_seen": 79498856, + "step": 117935 + }, + { + "epoch": 2.8812938216109254, + "grad_norm": 0.43018922209739685, + "learning_rate": 9.084888986237425e-07, + "loss": 0.0005, + "num_input_tokens_seen": 79502184, + "step": 117940 + }, + { + "epoch": 2.881415972442772, + "grad_norm": 0.006060573272407055, + "learning_rate": 9.084039791771334e-07, + "loss": 0.163, + "num_input_tokens_seen": 79505128, + "step": 117945 + }, + { + "epoch": 2.8815381232746193, + "grad_norm": 0.023106878623366356, + "learning_rate": 9.083190603966354e-07, + "loss": 0.0004, + "num_input_tokens_seen": 79508520, + "step": 117950 + }, + { + "epoch": 2.8816602741064665, + "grad_norm": 0.2827344834804535, + "learning_rate": 9.082341422828657e-07, + "loss": 0.0002, + "num_input_tokens_seen": 79511656, + "step": 117955 + }, + { + "epoch": 2.8817824249383137, + "grad_norm": 0.024535616859793663, + "learning_rate": 9.081492248364422e-07, + "loss": 0.0002, + "num_input_tokens_seen": 79515816, + "step": 117960 + }, + { + "epoch": 2.881904575770161, + "grad_norm": 0.018632639199495316, + "learning_rate": 9.080643080579818e-07, + "loss": 0.0003, + "num_input_tokens_seen": 79519016, + "step": 117965 + }, + { + "epoch": 2.882026726602008, + "grad_norm": 0.2095278650522232, + "learning_rate": 9.079793919481032e-07, + "loss": 0.0845, + "num_input_tokens_seen": 79522280, + "step": 117970 + }, + { + "epoch": 2.8821488774338553, + "grad_norm": 25.330570220947266, + "learning_rate": 9.078944765074225e-07, + "loss": 0.0587, + "num_input_tokens_seen": 79525992, + "step": 117975 + }, + { + "epoch": 2.8822710282657025, + "grad_norm": 0.003723717760294676, + "learning_rate": 9.078095617365584e-07, + "loss": 0.0467, + "num_input_tokens_seen": 79529128, + "step": 117980 + }, + { + "epoch": 2.8823931790975497, + "grad_norm": 0.011801057495176792, + "learning_rate": 9.077246476361276e-07, + "loss": 0.0002, + "num_input_tokens_seen": 79532648, + "step": 117985 + }, + { + "epoch": 2.882515329929397, + "grad_norm": 17.591293334960938, + "learning_rate": 9.076397342067483e-07, + "loss": 0.1119, + "num_input_tokens_seen": 79535720, + "step": 117990 + }, + { + "epoch": 2.882637480761244, + "grad_norm": 1.953616976737976, + "learning_rate": 9.075548214490376e-07, + "loss": 0.0794, + "num_input_tokens_seen": 79539496, + "step": 117995 + }, + { + "epoch": 2.8827596315930912, + "grad_norm": 0.05649435520172119, + "learning_rate": 9.074699093636131e-07, + "loss": 0.0648, + "num_input_tokens_seen": 79542568, + "step": 118000 + }, + { + "epoch": 2.8828817824249384, + "grad_norm": 1.586169719696045, + "learning_rate": 9.073849979510926e-07, + "loss": 0.1219, + "num_input_tokens_seen": 79545832, + "step": 118005 + }, + { + "epoch": 2.8830039332567856, + "grad_norm": 0.477380633354187, + "learning_rate": 9.073000872120927e-07, + "loss": 0.0005, + "num_input_tokens_seen": 79548968, + "step": 118010 + }, + { + "epoch": 2.883126084088633, + "grad_norm": 46.87863540649414, + "learning_rate": 9.072151771472321e-07, + "loss": 0.0312, + "num_input_tokens_seen": 79552552, + "step": 118015 + }, + { + "epoch": 2.8832482349204795, + "grad_norm": 0.1196833923459053, + "learning_rate": 9.071302677571272e-07, + "loss": 0.0009, + "num_input_tokens_seen": 79555752, + "step": 118020 + }, + { + "epoch": 2.883370385752327, + "grad_norm": 1.0895923376083374, + "learning_rate": 9.070453590423959e-07, + "loss": 0.022, + "num_input_tokens_seen": 79558888, + "step": 118025 + }, + { + "epoch": 2.883492536584174, + "grad_norm": 39.65727615356445, + "learning_rate": 9.069604510036563e-07, + "loss": 0.0734, + "num_input_tokens_seen": 79562472, + "step": 118030 + }, + { + "epoch": 2.8836146874160216, + "grad_norm": 69.60074615478516, + "learning_rate": 9.068755436415247e-07, + "loss": 0.0395, + "num_input_tokens_seen": 79565480, + "step": 118035 + }, + { + "epoch": 2.8837368382478683, + "grad_norm": 0.02860451489686966, + "learning_rate": 9.067906369566198e-07, + "loss": 0.0004, + "num_input_tokens_seen": 79568616, + "step": 118040 + }, + { + "epoch": 2.8838589890797155, + "grad_norm": 0.06782763451337814, + "learning_rate": 9.06705730949558e-07, + "loss": 0.0279, + "num_input_tokens_seen": 79571688, + "step": 118045 + }, + { + "epoch": 2.8839811399115627, + "grad_norm": 0.3268910348415375, + "learning_rate": 9.066208256209576e-07, + "loss": 0.0527, + "num_input_tokens_seen": 79574952, + "step": 118050 + }, + { + "epoch": 2.88410329074341, + "grad_norm": 15.585838317871094, + "learning_rate": 9.065359209714356e-07, + "loss": 0.1484, + "num_input_tokens_seen": 79578088, + "step": 118055 + }, + { + "epoch": 2.884225441575257, + "grad_norm": 0.0012059342116117477, + "learning_rate": 9.064510170016092e-07, + "loss": 0.0526, + "num_input_tokens_seen": 79581544, + "step": 118060 + }, + { + "epoch": 2.8843475924071043, + "grad_norm": 0.18633608520030975, + "learning_rate": 9.063661137120966e-07, + "loss": 0.0003, + "num_input_tokens_seen": 79584616, + "step": 118065 + }, + { + "epoch": 2.8844697432389514, + "grad_norm": 0.010403754189610481, + "learning_rate": 9.062812111035143e-07, + "loss": 0.0003, + "num_input_tokens_seen": 79588456, + "step": 118070 + }, + { + "epoch": 2.8845918940707986, + "grad_norm": 0.013725988566875458, + "learning_rate": 9.061963091764809e-07, + "loss": 0.048, + "num_input_tokens_seen": 79591400, + "step": 118075 + }, + { + "epoch": 2.884714044902646, + "grad_norm": 0.07384747266769409, + "learning_rate": 9.061114079316124e-07, + "loss": 0.0317, + "num_input_tokens_seen": 79595112, + "step": 118080 + }, + { + "epoch": 2.884836195734493, + "grad_norm": 0.027986222878098488, + "learning_rate": 9.060265073695272e-07, + "loss": 0.0711, + "num_input_tokens_seen": 79598120, + "step": 118085 + }, + { + "epoch": 2.88495834656634, + "grad_norm": 0.24027803540229797, + "learning_rate": 9.059416074908429e-07, + "loss": 0.0002, + "num_input_tokens_seen": 79601256, + "step": 118090 + }, + { + "epoch": 2.8850804973981874, + "grad_norm": 0.13268686830997467, + "learning_rate": 9.058567082961764e-07, + "loss": 0.0311, + "num_input_tokens_seen": 79604328, + "step": 118095 + }, + { + "epoch": 2.8852026482300346, + "grad_norm": 0.018090954050421715, + "learning_rate": 9.057718097861452e-07, + "loss": 0.0378, + "num_input_tokens_seen": 79607720, + "step": 118100 + }, + { + "epoch": 2.8853247990618818, + "grad_norm": 0.011310219764709473, + "learning_rate": 9.056869119613667e-07, + "loss": 0.0387, + "num_input_tokens_seen": 79611048, + "step": 118105 + }, + { + "epoch": 2.885446949893729, + "grad_norm": 0.02010614424943924, + "learning_rate": 9.056020148224584e-07, + "loss": 0.0, + "num_input_tokens_seen": 79614632, + "step": 118110 + }, + { + "epoch": 2.8855691007255757, + "grad_norm": 0.027373293414711952, + "learning_rate": 9.055171183700376e-07, + "loss": 0.0003, + "num_input_tokens_seen": 79617960, + "step": 118115 + }, + { + "epoch": 2.8856912515574233, + "grad_norm": 20.399688720703125, + "learning_rate": 9.054322226047214e-07, + "loss": 0.0904, + "num_input_tokens_seen": 79621480, + "step": 118120 + }, + { + "epoch": 2.88581340238927, + "grad_norm": 0.01832777075469494, + "learning_rate": 9.053473275271281e-07, + "loss": 0.0001, + "num_input_tokens_seen": 79624872, + "step": 118125 + }, + { + "epoch": 2.8859355532211173, + "grad_norm": 0.04512257128953934, + "learning_rate": 9.05262433137874e-07, + "loss": 0.0001, + "num_input_tokens_seen": 79628200, + "step": 118130 + }, + { + "epoch": 2.8860577040529645, + "grad_norm": 0.10394764691591263, + "learning_rate": 9.051775394375775e-07, + "loss": 0.0235, + "num_input_tokens_seen": 79631656, + "step": 118135 + }, + { + "epoch": 2.8861798548848117, + "grad_norm": 0.45173031091690063, + "learning_rate": 9.050926464268549e-07, + "loss": 0.0429, + "num_input_tokens_seen": 79635112, + "step": 118140 + }, + { + "epoch": 2.886302005716659, + "grad_norm": 0.007458243519067764, + "learning_rate": 9.050077541063243e-07, + "loss": 0.0283, + "num_input_tokens_seen": 79638248, + "step": 118145 + }, + { + "epoch": 2.886424156548506, + "grad_norm": 0.00957757979631424, + "learning_rate": 9.049228624766029e-07, + "loss": 0.0365, + "num_input_tokens_seen": 79641256, + "step": 118150 + }, + { + "epoch": 2.8865463073803532, + "grad_norm": 10.149345397949219, + "learning_rate": 9.04837971538308e-07, + "loss": 0.0593, + "num_input_tokens_seen": 79644776, + "step": 118155 + }, + { + "epoch": 2.8866684582122004, + "grad_norm": 0.0181287694722414, + "learning_rate": 9.047530812920572e-07, + "loss": 0.0001, + "num_input_tokens_seen": 79648680, + "step": 118160 + }, + { + "epoch": 2.8867906090440476, + "grad_norm": 0.01915971376001835, + "learning_rate": 9.046681917384672e-07, + "loss": 0.0596, + "num_input_tokens_seen": 79652008, + "step": 118165 + }, + { + "epoch": 2.886912759875895, + "grad_norm": 25.121864318847656, + "learning_rate": 9.045833028781562e-07, + "loss": 0.0422, + "num_input_tokens_seen": 79655336, + "step": 118170 + }, + { + "epoch": 2.887034910707742, + "grad_norm": 0.002604476409032941, + "learning_rate": 9.044984147117406e-07, + "loss": 0.0441, + "num_input_tokens_seen": 79658408, + "step": 118175 + }, + { + "epoch": 2.887157061539589, + "grad_norm": 0.1552521288394928, + "learning_rate": 9.044135272398382e-07, + "loss": 0.0003, + "num_input_tokens_seen": 79661480, + "step": 118180 + }, + { + "epoch": 2.8872792123714364, + "grad_norm": 0.04718397930264473, + "learning_rate": 9.043286404630668e-07, + "loss": 0.0311, + "num_input_tokens_seen": 79664808, + "step": 118185 + }, + { + "epoch": 2.8874013632032836, + "grad_norm": 22.923688888549805, + "learning_rate": 9.042437543820428e-07, + "loss": 0.1177, + "num_input_tokens_seen": 79668072, + "step": 118190 + }, + { + "epoch": 2.8875235140351307, + "grad_norm": 0.15967994928359985, + "learning_rate": 9.041588689973845e-07, + "loss": 0.0004, + "num_input_tokens_seen": 79671336, + "step": 118195 + }, + { + "epoch": 2.8876456648669775, + "grad_norm": 0.009539058431982994, + "learning_rate": 9.040739843097082e-07, + "loss": 0.0513, + "num_input_tokens_seen": 79674856, + "step": 118200 + }, + { + "epoch": 2.887767815698825, + "grad_norm": 0.019401682540774345, + "learning_rate": 9.03989100319632e-07, + "loss": 0.0004, + "num_input_tokens_seen": 79678120, + "step": 118205 + }, + { + "epoch": 2.887889966530672, + "grad_norm": 61.033382415771484, + "learning_rate": 9.039042170277728e-07, + "loss": 0.0283, + "num_input_tokens_seen": 79681384, + "step": 118210 + }, + { + "epoch": 2.8880121173625195, + "grad_norm": 0.18923260271549225, + "learning_rate": 9.038193344347478e-07, + "loss": 0.002, + "num_input_tokens_seen": 79684712, + "step": 118215 + }, + { + "epoch": 2.8881342681943662, + "grad_norm": 1.0554591417312622, + "learning_rate": 9.037344525411747e-07, + "loss": 0.0011, + "num_input_tokens_seen": 79688168, + "step": 118220 + }, + { + "epoch": 2.8882564190262134, + "grad_norm": 29.14522933959961, + "learning_rate": 9.036495713476704e-07, + "loss": 0.1608, + "num_input_tokens_seen": 79691624, + "step": 118225 + }, + { + "epoch": 2.8883785698580606, + "grad_norm": 0.2183917909860611, + "learning_rate": 9.035646908548527e-07, + "loss": 0.0006, + "num_input_tokens_seen": 79694696, + "step": 118230 + }, + { + "epoch": 2.888500720689908, + "grad_norm": 37.8150634765625, + "learning_rate": 9.034798110633379e-07, + "loss": 0.1012, + "num_input_tokens_seen": 79698088, + "step": 118235 + }, + { + "epoch": 2.888622871521755, + "grad_norm": 28.24630355834961, + "learning_rate": 9.033949319737439e-07, + "loss": 0.0404, + "num_input_tokens_seen": 79701736, + "step": 118240 + }, + { + "epoch": 2.888745022353602, + "grad_norm": 0.0458686463534832, + "learning_rate": 9.033100535866885e-07, + "loss": 0.0011, + "num_input_tokens_seen": 79704808, + "step": 118245 + }, + { + "epoch": 2.8888671731854494, + "grad_norm": 0.4126882553100586, + "learning_rate": 9.032251759027881e-07, + "loss": 0.0005, + "num_input_tokens_seen": 79708136, + "step": 118250 + }, + { + "epoch": 2.8889893240172966, + "grad_norm": 0.047328535467386246, + "learning_rate": 9.031402989226603e-07, + "loss": 0.0314, + "num_input_tokens_seen": 79711464, + "step": 118255 + }, + { + "epoch": 2.8891114748491438, + "grad_norm": 0.03057565726339817, + "learning_rate": 9.030554226469222e-07, + "loss": 0.0678, + "num_input_tokens_seen": 79714984, + "step": 118260 + }, + { + "epoch": 2.889233625680991, + "grad_norm": 0.32294678688049316, + "learning_rate": 9.029705470761913e-07, + "loss": 0.0005, + "num_input_tokens_seen": 79718376, + "step": 118265 + }, + { + "epoch": 2.889355776512838, + "grad_norm": 0.33089131116867065, + "learning_rate": 9.028856722110846e-07, + "loss": 0.0003, + "num_input_tokens_seen": 79721576, + "step": 118270 + }, + { + "epoch": 2.8894779273446853, + "grad_norm": 0.025815755128860474, + "learning_rate": 9.028007980522192e-07, + "loss": 0.0543, + "num_input_tokens_seen": 79724712, + "step": 118275 + }, + { + "epoch": 2.8896000781765325, + "grad_norm": 0.16206568479537964, + "learning_rate": 9.02715924600213e-07, + "loss": 0.0002, + "num_input_tokens_seen": 79727848, + "step": 118280 + }, + { + "epoch": 2.8897222290083793, + "grad_norm": 0.056277234107255936, + "learning_rate": 9.026310518556822e-07, + "loss": 0.0002, + "num_input_tokens_seen": 79730920, + "step": 118285 + }, + { + "epoch": 2.889844379840227, + "grad_norm": 0.020027903839945793, + "learning_rate": 9.025461798192452e-07, + "loss": 0.1201, + "num_input_tokens_seen": 79734376, + "step": 118290 + }, + { + "epoch": 2.8899665306720737, + "grad_norm": 0.054904062300920486, + "learning_rate": 9.024613084915181e-07, + "loss": 0.0417, + "num_input_tokens_seen": 79737448, + "step": 118295 + }, + { + "epoch": 2.8900886815039213, + "grad_norm": 0.0019138812785968184, + "learning_rate": 9.023764378731189e-07, + "loss": 0.0603, + "num_input_tokens_seen": 79740392, + "step": 118300 + }, + { + "epoch": 2.890210832335768, + "grad_norm": 0.16153118014335632, + "learning_rate": 9.022915679646643e-07, + "loss": 0.0005, + "num_input_tokens_seen": 79743848, + "step": 118305 + }, + { + "epoch": 2.890332983167615, + "grad_norm": 0.005311083514243364, + "learning_rate": 9.022066987667717e-07, + "loss": 0.0001, + "num_input_tokens_seen": 79746920, + "step": 118310 + }, + { + "epoch": 2.8904551339994624, + "grad_norm": 13.794023513793945, + "learning_rate": 9.021218302800586e-07, + "loss": 0.0396, + "num_input_tokens_seen": 79750184, + "step": 118315 + }, + { + "epoch": 2.8905772848313096, + "grad_norm": 0.019158679991960526, + "learning_rate": 9.020369625051414e-07, + "loss": 0.0003, + "num_input_tokens_seen": 79753640, + "step": 118320 + }, + { + "epoch": 2.890699435663157, + "grad_norm": 0.02267547883093357, + "learning_rate": 9.019520954426383e-07, + "loss": 0.0379, + "num_input_tokens_seen": 79756904, + "step": 118325 + }, + { + "epoch": 2.890821586495004, + "grad_norm": 0.3091530203819275, + "learning_rate": 9.018672290931654e-07, + "loss": 0.0003, + "num_input_tokens_seen": 79759720, + "step": 118330 + }, + { + "epoch": 2.890943737326851, + "grad_norm": 0.02154541015625, + "learning_rate": 9.017823634573404e-07, + "loss": 0.0473, + "num_input_tokens_seen": 79762856, + "step": 118335 + }, + { + "epoch": 2.8910658881586984, + "grad_norm": 15.876699447631836, + "learning_rate": 9.01697498535781e-07, + "loss": 0.0804, + "num_input_tokens_seen": 79765864, + "step": 118340 + }, + { + "epoch": 2.8911880389905456, + "grad_norm": 0.03007902391254902, + "learning_rate": 9.016126343291033e-07, + "loss": 0.0002, + "num_input_tokens_seen": 79769384, + "step": 118345 + }, + { + "epoch": 2.8913101898223927, + "grad_norm": 0.01837383396923542, + "learning_rate": 9.015277708379254e-07, + "loss": 0.0477, + "num_input_tokens_seen": 79772840, + "step": 118350 + }, + { + "epoch": 2.89143234065424, + "grad_norm": 31.521644592285156, + "learning_rate": 9.014429080628636e-07, + "loss": 0.0381, + "num_input_tokens_seen": 79775912, + "step": 118355 + }, + { + "epoch": 2.891554491486087, + "grad_norm": 0.011032157577574253, + "learning_rate": 9.01358046004536e-07, + "loss": 0.0549, + "num_input_tokens_seen": 79779112, + "step": 118360 + }, + { + "epoch": 2.8916766423179343, + "grad_norm": 32.51725769042969, + "learning_rate": 9.012731846635589e-07, + "loss": 0.0525, + "num_input_tokens_seen": 79782248, + "step": 118365 + }, + { + "epoch": 2.8917987931497815, + "grad_norm": 0.015014342032372952, + "learning_rate": 9.011883240405496e-07, + "loss": 0.0164, + "num_input_tokens_seen": 79785512, + "step": 118370 + }, + { + "epoch": 2.8919209439816287, + "grad_norm": 0.032123107463121414, + "learning_rate": 9.011034641361259e-07, + "loss": 0.0004, + "num_input_tokens_seen": 79788712, + "step": 118375 + }, + { + "epoch": 2.8920430948134754, + "grad_norm": 0.1224132776260376, + "learning_rate": 9.010186049509038e-07, + "loss": 0.0002, + "num_input_tokens_seen": 79792424, + "step": 118380 + }, + { + "epoch": 2.892165245645323, + "grad_norm": 0.01400213222950697, + "learning_rate": 9.009337464855016e-07, + "loss": 0.0003, + "num_input_tokens_seen": 79795560, + "step": 118385 + }, + { + "epoch": 2.89228739647717, + "grad_norm": 1.1057496070861816, + "learning_rate": 9.008488887405354e-07, + "loss": 0.0077, + "num_input_tokens_seen": 79798824, + "step": 118390 + }, + { + "epoch": 2.8924095473090174, + "grad_norm": 0.032255593687295914, + "learning_rate": 9.007640317166228e-07, + "loss": 0.0389, + "num_input_tokens_seen": 79802664, + "step": 118395 + }, + { + "epoch": 2.892531698140864, + "grad_norm": 0.11206360161304474, + "learning_rate": 9.006791754143812e-07, + "loss": 0.0001, + "num_input_tokens_seen": 79805928, + "step": 118400 + }, + { + "epoch": 2.8926538489727114, + "grad_norm": 0.29293307662010193, + "learning_rate": 9.005943198344271e-07, + "loss": 0.0004, + "num_input_tokens_seen": 79808936, + "step": 118405 + }, + { + "epoch": 2.8927759998045586, + "grad_norm": 0.0059355502016842365, + "learning_rate": 9.005094649773779e-07, + "loss": 0.0001, + "num_input_tokens_seen": 79812520, + "step": 118410 + }, + { + "epoch": 2.8928981506364058, + "grad_norm": 0.06507638841867447, + "learning_rate": 9.004246108438505e-07, + "loss": 0.066, + "num_input_tokens_seen": 79815720, + "step": 118415 + }, + { + "epoch": 2.893020301468253, + "grad_norm": 0.008765455335378647, + "learning_rate": 9.003397574344624e-07, + "loss": 0.1074, + "num_input_tokens_seen": 79819368, + "step": 118420 + }, + { + "epoch": 2.8931424523001, + "grad_norm": 0.025619253516197205, + "learning_rate": 9.002549047498301e-07, + "loss": 0.0001, + "num_input_tokens_seen": 79822632, + "step": 118425 + }, + { + "epoch": 2.8932646031319473, + "grad_norm": 0.018892310559749603, + "learning_rate": 9.001700527905709e-07, + "loss": 0.0616, + "num_input_tokens_seen": 79826152, + "step": 118430 + }, + { + "epoch": 2.8933867539637945, + "grad_norm": 0.1085629016160965, + "learning_rate": 9.000852015573024e-07, + "loss": 0.0312, + "num_input_tokens_seen": 79829288, + "step": 118435 + }, + { + "epoch": 2.8935089047956417, + "grad_norm": 22.9990234375, + "learning_rate": 9.000003510506407e-07, + "loss": 0.0863, + "num_input_tokens_seen": 79832680, + "step": 118440 + }, + { + "epoch": 2.893631055627489, + "grad_norm": 0.1559326946735382, + "learning_rate": 8.999155012712036e-07, + "loss": 0.0971, + "num_input_tokens_seen": 79836200, + "step": 118445 + }, + { + "epoch": 2.893753206459336, + "grad_norm": 0.017499227076768875, + "learning_rate": 8.998306522196077e-07, + "loss": 0.0002, + "num_input_tokens_seen": 79839592, + "step": 118450 + }, + { + "epoch": 2.8938753572911833, + "grad_norm": 0.3136901259422302, + "learning_rate": 8.997458038964706e-07, + "loss": 0.0369, + "num_input_tokens_seen": 79842984, + "step": 118455 + }, + { + "epoch": 2.8939975081230305, + "grad_norm": 54.96400451660156, + "learning_rate": 8.996609563024084e-07, + "loss": 0.0433, + "num_input_tokens_seen": 79846184, + "step": 118460 + }, + { + "epoch": 2.894119658954877, + "grad_norm": 27.167482376098633, + "learning_rate": 8.995761094380392e-07, + "loss": 0.0678, + "num_input_tokens_seen": 79850024, + "step": 118465 + }, + { + "epoch": 2.894241809786725, + "grad_norm": 0.40186038613319397, + "learning_rate": 8.994912633039796e-07, + "loss": 0.0003, + "num_input_tokens_seen": 79853416, + "step": 118470 + }, + { + "epoch": 2.8943639606185716, + "grad_norm": 0.07050628960132599, + "learning_rate": 8.994064179008461e-07, + "loss": 0.1456, + "num_input_tokens_seen": 79856680, + "step": 118475 + }, + { + "epoch": 2.8944861114504192, + "grad_norm": 0.1932118982076645, + "learning_rate": 8.993215732292567e-07, + "loss": 0.0005, + "num_input_tokens_seen": 79860072, + "step": 118480 + }, + { + "epoch": 2.894608262282266, + "grad_norm": 30.953916549682617, + "learning_rate": 8.992367292898274e-07, + "loss": 0.069, + "num_input_tokens_seen": 79863528, + "step": 118485 + }, + { + "epoch": 2.894730413114113, + "grad_norm": 0.2753456234931946, + "learning_rate": 8.991518860831758e-07, + "loss": 0.0002, + "num_input_tokens_seen": 79866600, + "step": 118490 + }, + { + "epoch": 2.8948525639459604, + "grad_norm": 0.0773712545633316, + "learning_rate": 8.990670436099192e-07, + "loss": 0.002, + "num_input_tokens_seen": 79869800, + "step": 118495 + }, + { + "epoch": 2.8949747147778075, + "grad_norm": 0.15468727052211761, + "learning_rate": 8.989822018706738e-07, + "loss": 0.0002, + "num_input_tokens_seen": 79873064, + "step": 118500 + }, + { + "epoch": 2.8950968656096547, + "grad_norm": 0.02235586568713188, + "learning_rate": 8.988973608660572e-07, + "loss": 0.0997, + "num_input_tokens_seen": 79876776, + "step": 118505 + }, + { + "epoch": 2.895219016441502, + "grad_norm": 44.47627639770508, + "learning_rate": 8.988125205966861e-07, + "loss": 0.102, + "num_input_tokens_seen": 79880232, + "step": 118510 + }, + { + "epoch": 2.895341167273349, + "grad_norm": 0.013052013702690601, + "learning_rate": 8.987276810631779e-07, + "loss": 0.0003, + "num_input_tokens_seen": 79883624, + "step": 118515 + }, + { + "epoch": 2.8954633181051963, + "grad_norm": 0.09096262603998184, + "learning_rate": 8.986428422661489e-07, + "loss": 0.113, + "num_input_tokens_seen": 79886760, + "step": 118520 + }, + { + "epoch": 2.8955854689370435, + "grad_norm": 0.007621260825544596, + "learning_rate": 8.985580042062163e-07, + "loss": 0.0002, + "num_input_tokens_seen": 79890408, + "step": 118525 + }, + { + "epoch": 2.8957076197688907, + "grad_norm": 22.843814849853516, + "learning_rate": 8.984731668839976e-07, + "loss": 0.117, + "num_input_tokens_seen": 79893800, + "step": 118530 + }, + { + "epoch": 2.895829770600738, + "grad_norm": 19.730802536010742, + "learning_rate": 8.983883303001088e-07, + "loss": 0.0008, + "num_input_tokens_seen": 79897000, + "step": 118535 + }, + { + "epoch": 2.895951921432585, + "grad_norm": 0.05201861262321472, + "learning_rate": 8.98303494455168e-07, + "loss": 0.0004, + "num_input_tokens_seen": 79900328, + "step": 118540 + }, + { + "epoch": 2.8960740722644323, + "grad_norm": 0.8403574824333191, + "learning_rate": 8.982186593497909e-07, + "loss": 0.0257, + "num_input_tokens_seen": 79903272, + "step": 118545 + }, + { + "epoch": 2.8961962230962794, + "grad_norm": 0.0860314890742302, + "learning_rate": 8.981338249845952e-07, + "loss": 0.0007, + "num_input_tokens_seen": 79907240, + "step": 118550 + }, + { + "epoch": 2.8963183739281266, + "grad_norm": 0.011775447055697441, + "learning_rate": 8.980489913601982e-07, + "loss": 0.046, + "num_input_tokens_seen": 79910248, + "step": 118555 + }, + { + "epoch": 2.8964405247599734, + "grad_norm": 11.870306968688965, + "learning_rate": 8.979641584772161e-07, + "loss": 0.0285, + "num_input_tokens_seen": 79913832, + "step": 118560 + }, + { + "epoch": 2.896562675591821, + "grad_norm": 0.008414355106651783, + "learning_rate": 8.97879326336266e-07, + "loss": 0.03, + "num_input_tokens_seen": 79917608, + "step": 118565 + }, + { + "epoch": 2.8966848264236678, + "grad_norm": 0.4362105131149292, + "learning_rate": 8.977944949379652e-07, + "loss": 0.038, + "num_input_tokens_seen": 79921192, + "step": 118570 + }, + { + "epoch": 2.896806977255515, + "grad_norm": 0.07210730016231537, + "learning_rate": 8.977096642829301e-07, + "loss": 0.0002, + "num_input_tokens_seen": 79924264, + "step": 118575 + }, + { + "epoch": 2.896929128087362, + "grad_norm": 25.157201766967773, + "learning_rate": 8.976248343717778e-07, + "loss": 0.101, + "num_input_tokens_seen": 79927656, + "step": 118580 + }, + { + "epoch": 2.8970512789192093, + "grad_norm": 0.6024566292762756, + "learning_rate": 8.97540005205125e-07, + "loss": 0.0175, + "num_input_tokens_seen": 79931112, + "step": 118585 + }, + { + "epoch": 2.8971734297510565, + "grad_norm": 0.12825407087802887, + "learning_rate": 8.974551767835893e-07, + "loss": 0.052, + "num_input_tokens_seen": 79934184, + "step": 118590 + }, + { + "epoch": 2.8972955805829037, + "grad_norm": 22.66798210144043, + "learning_rate": 8.973703491077867e-07, + "loss": 0.076, + "num_input_tokens_seen": 79938152, + "step": 118595 + }, + { + "epoch": 2.897417731414751, + "grad_norm": 0.0005638687289319932, + "learning_rate": 8.972855221783351e-07, + "loss": 0.001, + "num_input_tokens_seen": 79941992, + "step": 118600 + }, + { + "epoch": 2.897539882246598, + "grad_norm": 0.007802937179803848, + "learning_rate": 8.972006959958502e-07, + "loss": 0.108, + "num_input_tokens_seen": 79945512, + "step": 118605 + }, + { + "epoch": 2.8976620330784453, + "grad_norm": 27.239303588867188, + "learning_rate": 8.9711587056095e-07, + "loss": 0.1229, + "num_input_tokens_seen": 79948520, + "step": 118610 + }, + { + "epoch": 2.8977841839102925, + "grad_norm": 0.05133926868438721, + "learning_rate": 8.970310458742505e-07, + "loss": 0.0696, + "num_input_tokens_seen": 79951976, + "step": 118615 + }, + { + "epoch": 2.8979063347421397, + "grad_norm": 0.03441181033849716, + "learning_rate": 8.969462219363691e-07, + "loss": 0.0124, + "num_input_tokens_seen": 79955304, + "step": 118620 + }, + { + "epoch": 2.898028485573987, + "grad_norm": 0.005173355340957642, + "learning_rate": 8.968613987479227e-07, + "loss": 0.0007, + "num_input_tokens_seen": 79958632, + "step": 118625 + }, + { + "epoch": 2.898150636405834, + "grad_norm": 33.71710205078125, + "learning_rate": 8.967765763095274e-07, + "loss": 0.1746, + "num_input_tokens_seen": 79961960, + "step": 118630 + }, + { + "epoch": 2.8982727872376812, + "grad_norm": 0.1187821552157402, + "learning_rate": 8.966917546218012e-07, + "loss": 0.0957, + "num_input_tokens_seen": 79965672, + "step": 118635 + }, + { + "epoch": 2.8983949380695284, + "grad_norm": 0.04378324747085571, + "learning_rate": 8.966069336853598e-07, + "loss": 0.0004, + "num_input_tokens_seen": 79969128, + "step": 118640 + }, + { + "epoch": 2.898517088901375, + "grad_norm": 0.30410701036453247, + "learning_rate": 8.965221135008207e-07, + "loss": 0.0006, + "num_input_tokens_seen": 79973096, + "step": 118645 + }, + { + "epoch": 2.898639239733223, + "grad_norm": 0.022590233013033867, + "learning_rate": 8.96437294068801e-07, + "loss": 0.0502, + "num_input_tokens_seen": 79976936, + "step": 118650 + }, + { + "epoch": 2.8987613905650695, + "grad_norm": 0.03725120425224304, + "learning_rate": 8.963524753899167e-07, + "loss": 0.0165, + "num_input_tokens_seen": 79980264, + "step": 118655 + }, + { + "epoch": 2.898883541396917, + "grad_norm": 0.4708126187324524, + "learning_rate": 8.962676574647855e-07, + "loss": 0.0442, + "num_input_tokens_seen": 79984040, + "step": 118660 + }, + { + "epoch": 2.899005692228764, + "grad_norm": 0.024750245735049248, + "learning_rate": 8.961828402940233e-07, + "loss": 0.0008, + "num_input_tokens_seen": 79987240, + "step": 118665 + }, + { + "epoch": 2.899127843060611, + "grad_norm": 0.057560380548238754, + "learning_rate": 8.96098023878248e-07, + "loss": 0.0009, + "num_input_tokens_seen": 79990568, + "step": 118670 + }, + { + "epoch": 2.8992499938924583, + "grad_norm": 2.3662123680114746, + "learning_rate": 8.960132082180755e-07, + "loss": 0.0004, + "num_input_tokens_seen": 79994664, + "step": 118675 + }, + { + "epoch": 2.8993721447243055, + "grad_norm": 0.03252064064145088, + "learning_rate": 8.959283933141227e-07, + "loss": 0.0007, + "num_input_tokens_seen": 79998312, + "step": 118680 + }, + { + "epoch": 2.8994942955561527, + "grad_norm": 29.49829864501953, + "learning_rate": 8.958435791670071e-07, + "loss": 0.0703, + "num_input_tokens_seen": 80001960, + "step": 118685 + }, + { + "epoch": 2.899616446388, + "grad_norm": 0.45613086223602295, + "learning_rate": 8.957587657773447e-07, + "loss": 0.0645, + "num_input_tokens_seen": 80005544, + "step": 118690 + }, + { + "epoch": 2.899738597219847, + "grad_norm": 0.009808986447751522, + "learning_rate": 8.956739531457528e-07, + "loss": 0.0504, + "num_input_tokens_seen": 80009128, + "step": 118695 + }, + { + "epoch": 2.8998607480516942, + "grad_norm": 0.34509333968162537, + "learning_rate": 8.955891412728476e-07, + "loss": 0.0348, + "num_input_tokens_seen": 80012136, + "step": 118700 + }, + { + "epoch": 2.8999828988835414, + "grad_norm": 0.0034415319096297026, + "learning_rate": 8.955043301592463e-07, + "loss": 0.0459, + "num_input_tokens_seen": 80016296, + "step": 118705 + }, + { + "epoch": 2.9001050497153886, + "grad_norm": 0.04839680716395378, + "learning_rate": 8.954195198055659e-07, + "loss": 0.1135, + "num_input_tokens_seen": 80019368, + "step": 118710 + }, + { + "epoch": 2.900227200547236, + "grad_norm": 0.041439514607191086, + "learning_rate": 8.953347102124229e-07, + "loss": 0.0673, + "num_input_tokens_seen": 80022440, + "step": 118715 + }, + { + "epoch": 2.900349351379083, + "grad_norm": 0.20260454714298248, + "learning_rate": 8.952499013804339e-07, + "loss": 0.0003, + "num_input_tokens_seen": 80025960, + "step": 118720 + }, + { + "epoch": 2.90047150221093, + "grad_norm": 0.02303224429488182, + "learning_rate": 8.951650933102158e-07, + "loss": 0.0003, + "num_input_tokens_seen": 80029224, + "step": 118725 + }, + { + "epoch": 2.9005936530427774, + "grad_norm": 0.21073459088802338, + "learning_rate": 8.950802860023854e-07, + "loss": 0.0007, + "num_input_tokens_seen": 80032296, + "step": 118730 + }, + { + "epoch": 2.9007158038746246, + "grad_norm": 0.17983902990818024, + "learning_rate": 8.949954794575593e-07, + "loss": 0.0003, + "num_input_tokens_seen": 80035560, + "step": 118735 + }, + { + "epoch": 2.9008379547064713, + "grad_norm": 0.009431470185518265, + "learning_rate": 8.949106736763541e-07, + "loss": 0.033, + "num_input_tokens_seen": 80038760, + "step": 118740 + }, + { + "epoch": 2.900960105538319, + "grad_norm": 0.48249107599258423, + "learning_rate": 8.948258686593872e-07, + "loss": 0.0003, + "num_input_tokens_seen": 80042024, + "step": 118745 + }, + { + "epoch": 2.9010822563701657, + "grad_norm": 0.07081861048936844, + "learning_rate": 8.947410644072745e-07, + "loss": 0.0002, + "num_input_tokens_seen": 80045288, + "step": 118750 + }, + { + "epoch": 2.901204407202013, + "grad_norm": 0.12552383542060852, + "learning_rate": 8.946562609206334e-07, + "loss": 0.0002, + "num_input_tokens_seen": 80048488, + "step": 118755 + }, + { + "epoch": 2.90132655803386, + "grad_norm": 0.033153653144836426, + "learning_rate": 8.9457145820008e-07, + "loss": 0.0005, + "num_input_tokens_seen": 80051752, + "step": 118760 + }, + { + "epoch": 2.9014487088657073, + "grad_norm": 0.006722915451973677, + "learning_rate": 8.944866562462317e-07, + "loss": 0.0, + "num_input_tokens_seen": 80055144, + "step": 118765 + }, + { + "epoch": 2.9015708596975545, + "grad_norm": 0.16946537792682648, + "learning_rate": 8.944018550597043e-07, + "loss": 0.0548, + "num_input_tokens_seen": 80058664, + "step": 118770 + }, + { + "epoch": 2.9016930105294017, + "grad_norm": 0.028818394988775253, + "learning_rate": 8.943170546411153e-07, + "loss": 0.0006, + "num_input_tokens_seen": 80061672, + "step": 118775 + }, + { + "epoch": 2.901815161361249, + "grad_norm": 0.01083712000399828, + "learning_rate": 8.942322549910813e-07, + "loss": 0.0002, + "num_input_tokens_seen": 80064744, + "step": 118780 + }, + { + "epoch": 2.901937312193096, + "grad_norm": 0.0124315544962883, + "learning_rate": 8.941474561102185e-07, + "loss": 0.0701, + "num_input_tokens_seen": 80068200, + "step": 118785 + }, + { + "epoch": 2.902059463024943, + "grad_norm": 0.02468065172433853, + "learning_rate": 8.940626579991442e-07, + "loss": 0.0, + "num_input_tokens_seen": 80071464, + "step": 118790 + }, + { + "epoch": 2.9021816138567904, + "grad_norm": 36.39851379394531, + "learning_rate": 8.939778606584743e-07, + "loss": 0.0936, + "num_input_tokens_seen": 80074728, + "step": 118795 + }, + { + "epoch": 2.9023037646886376, + "grad_norm": 0.20101211965084076, + "learning_rate": 8.938930640888258e-07, + "loss": 0.035, + "num_input_tokens_seen": 80077928, + "step": 118800 + }, + { + "epoch": 2.902425915520485, + "grad_norm": 25.310131072998047, + "learning_rate": 8.93808268290816e-07, + "loss": 0.1283, + "num_input_tokens_seen": 80080936, + "step": 118805 + }, + { + "epoch": 2.902548066352332, + "grad_norm": 4.265520095825195, + "learning_rate": 8.937234732650606e-07, + "loss": 0.0009, + "num_input_tokens_seen": 80084648, + "step": 118810 + }, + { + "epoch": 2.902670217184179, + "grad_norm": 0.2513897120952606, + "learning_rate": 8.936386790121772e-07, + "loss": 0.0339, + "num_input_tokens_seen": 80088168, + "step": 118815 + }, + { + "epoch": 2.9027923680160264, + "grad_norm": 0.02958909422159195, + "learning_rate": 8.935538855327814e-07, + "loss": 0.1282, + "num_input_tokens_seen": 80091432, + "step": 118820 + }, + { + "epoch": 2.902914518847873, + "grad_norm": 0.1474994271993637, + "learning_rate": 8.934690928274908e-07, + "loss": 0.0003, + "num_input_tokens_seen": 80094824, + "step": 118825 + }, + { + "epoch": 2.9030366696797207, + "grad_norm": 1.460278868675232, + "learning_rate": 8.933843008969215e-07, + "loss": 0.0645, + "num_input_tokens_seen": 80098344, + "step": 118830 + }, + { + "epoch": 2.9031588205115675, + "grad_norm": 0.08092187345027924, + "learning_rate": 8.9329950974169e-07, + "loss": 0.0006, + "num_input_tokens_seen": 80102312, + "step": 118835 + }, + { + "epoch": 2.903280971343415, + "grad_norm": 0.02012321911752224, + "learning_rate": 8.932147193624135e-07, + "loss": 0.0003, + "num_input_tokens_seen": 80105640, + "step": 118840 + }, + { + "epoch": 2.903403122175262, + "grad_norm": 0.08701890707015991, + "learning_rate": 8.931299297597079e-07, + "loss": 0.0005, + "num_input_tokens_seen": 80108968, + "step": 118845 + }, + { + "epoch": 2.903525273007109, + "grad_norm": 0.006065746303647757, + "learning_rate": 8.930451409341908e-07, + "loss": 0.0633, + "num_input_tokens_seen": 80112232, + "step": 118850 + }, + { + "epoch": 2.9036474238389562, + "grad_norm": 0.07881610095500946, + "learning_rate": 8.929603528864775e-07, + "loss": 0.0004, + "num_input_tokens_seen": 80115176, + "step": 118855 + }, + { + "epoch": 2.9037695746708034, + "grad_norm": 0.11350667476654053, + "learning_rate": 8.928755656171853e-07, + "loss": 0.0004, + "num_input_tokens_seen": 80118632, + "step": 118860 + }, + { + "epoch": 2.9038917255026506, + "grad_norm": 0.05210879072546959, + "learning_rate": 8.927907791269314e-07, + "loss": 0.0001, + "num_input_tokens_seen": 80122152, + "step": 118865 + }, + { + "epoch": 2.904013876334498, + "grad_norm": 0.02516857162117958, + "learning_rate": 8.927059934163316e-07, + "loss": 0.0005, + "num_input_tokens_seen": 80125672, + "step": 118870 + }, + { + "epoch": 2.904136027166345, + "grad_norm": 0.021097134798765182, + "learning_rate": 8.926212084860025e-07, + "loss": 0.0004, + "num_input_tokens_seen": 80129064, + "step": 118875 + }, + { + "epoch": 2.904258177998192, + "grad_norm": 0.27393844723701477, + "learning_rate": 8.925364243365609e-07, + "loss": 0.0452, + "num_input_tokens_seen": 80132520, + "step": 118880 + }, + { + "epoch": 2.9043803288300394, + "grad_norm": 0.19845488667488098, + "learning_rate": 8.924516409686235e-07, + "loss": 0.0003, + "num_input_tokens_seen": 80136104, + "step": 118885 + }, + { + "epoch": 2.9045024796618866, + "grad_norm": 0.014890705235302448, + "learning_rate": 8.923668583828066e-07, + "loss": 0.0, + "num_input_tokens_seen": 80139304, + "step": 118890 + }, + { + "epoch": 2.9046246304937338, + "grad_norm": 0.07981786876916885, + "learning_rate": 8.922820765797265e-07, + "loss": 0.0005, + "num_input_tokens_seen": 80142760, + "step": 118895 + }, + { + "epoch": 2.904746781325581, + "grad_norm": 0.09087047725915909, + "learning_rate": 8.921972955600006e-07, + "loss": 0.0583, + "num_input_tokens_seen": 80146664, + "step": 118900 + }, + { + "epoch": 2.904868932157428, + "grad_norm": 0.19944734871387482, + "learning_rate": 8.921125153242447e-07, + "loss": 0.0017, + "num_input_tokens_seen": 80150184, + "step": 118905 + }, + { + "epoch": 2.904991082989275, + "grad_norm": 0.46191540360450745, + "learning_rate": 8.920277358730759e-07, + "loss": 0.0004, + "num_input_tokens_seen": 80153448, + "step": 118910 + }, + { + "epoch": 2.9051132338211225, + "grad_norm": 0.025700677186250687, + "learning_rate": 8.9194295720711e-07, + "loss": 0.0466, + "num_input_tokens_seen": 80156904, + "step": 118915 + }, + { + "epoch": 2.9052353846529693, + "grad_norm": 32.34309005737305, + "learning_rate": 8.918581793269645e-07, + "loss": 0.0635, + "num_input_tokens_seen": 80159848, + "step": 118920 + }, + { + "epoch": 2.905357535484817, + "grad_norm": 0.011389784514904022, + "learning_rate": 8.917734022332549e-07, + "loss": 0.0002, + "num_input_tokens_seen": 80163304, + "step": 118925 + }, + { + "epoch": 2.9054796863166636, + "grad_norm": 0.04762762784957886, + "learning_rate": 8.916886259265985e-07, + "loss": 0.0615, + "num_input_tokens_seen": 80166632, + "step": 118930 + }, + { + "epoch": 2.905601837148511, + "grad_norm": 0.018525706604123116, + "learning_rate": 8.916038504076117e-07, + "loss": 0.0403, + "num_input_tokens_seen": 80169832, + "step": 118935 + }, + { + "epoch": 2.905723987980358, + "grad_norm": 0.010757447220385075, + "learning_rate": 8.915190756769104e-07, + "loss": 0.0435, + "num_input_tokens_seen": 80173224, + "step": 118940 + }, + { + "epoch": 2.905846138812205, + "grad_norm": 0.012967376969754696, + "learning_rate": 8.91434301735112e-07, + "loss": 0.0, + "num_input_tokens_seen": 80176680, + "step": 118945 + }, + { + "epoch": 2.9059682896440524, + "grad_norm": 0.03979997709393501, + "learning_rate": 8.913495285828323e-07, + "loss": 0.0005, + "num_input_tokens_seen": 80179688, + "step": 118950 + }, + { + "epoch": 2.9060904404758996, + "grad_norm": 0.012289268895983696, + "learning_rate": 8.912647562206879e-07, + "loss": 0.0277, + "num_input_tokens_seen": 80183208, + "step": 118955 + }, + { + "epoch": 2.906212591307747, + "grad_norm": 2.190854072570801, + "learning_rate": 8.911799846492959e-07, + "loss": 0.0351, + "num_input_tokens_seen": 80186280, + "step": 118960 + }, + { + "epoch": 2.906334742139594, + "grad_norm": 0.36408019065856934, + "learning_rate": 8.910952138692718e-07, + "loss": 0.0683, + "num_input_tokens_seen": 80189224, + "step": 118965 + }, + { + "epoch": 2.906456892971441, + "grad_norm": 13.64661693572998, + "learning_rate": 8.910104438812332e-07, + "loss": 0.0866, + "num_input_tokens_seen": 80192488, + "step": 118970 + }, + { + "epoch": 2.9065790438032884, + "grad_norm": 0.03847251832485199, + "learning_rate": 8.909256746857953e-07, + "loss": 0.1192, + "num_input_tokens_seen": 80196008, + "step": 118975 + }, + { + "epoch": 2.9067011946351355, + "grad_norm": 0.044732265174388885, + "learning_rate": 8.908409062835759e-07, + "loss": 0.0297, + "num_input_tokens_seen": 80199976, + "step": 118980 + }, + { + "epoch": 2.9068233454669827, + "grad_norm": 0.18884044885635376, + "learning_rate": 8.907561386751905e-07, + "loss": 0.001, + "num_input_tokens_seen": 80203048, + "step": 118985 + }, + { + "epoch": 2.90694549629883, + "grad_norm": 0.014213848859071732, + "learning_rate": 8.906713718612555e-07, + "loss": 0.0578, + "num_input_tokens_seen": 80206248, + "step": 118990 + }, + { + "epoch": 2.907067647130677, + "grad_norm": 0.019253773614764214, + "learning_rate": 8.905866058423884e-07, + "loss": 0.0005, + "num_input_tokens_seen": 80209384, + "step": 118995 + }, + { + "epoch": 2.9071897979625243, + "grad_norm": 0.015433445572853088, + "learning_rate": 8.905018406192042e-07, + "loss": 0.0004, + "num_input_tokens_seen": 80212776, + "step": 119000 + }, + { + "epoch": 2.907311948794371, + "grad_norm": 0.019289663061499596, + "learning_rate": 8.904170761923206e-07, + "loss": 0.0001, + "num_input_tokens_seen": 80216040, + "step": 119005 + }, + { + "epoch": 2.9074340996262187, + "grad_norm": 19.523340225219727, + "learning_rate": 8.903323125623531e-07, + "loss": 0.0406, + "num_input_tokens_seen": 80219624, + "step": 119010 + }, + { + "epoch": 2.9075562504580654, + "grad_norm": 0.09069832414388657, + "learning_rate": 8.90247549729919e-07, + "loss": 0.0843, + "num_input_tokens_seen": 80223144, + "step": 119015 + }, + { + "epoch": 2.907678401289913, + "grad_norm": 12.428088188171387, + "learning_rate": 8.901627876956337e-07, + "loss": 0.1005, + "num_input_tokens_seen": 80226856, + "step": 119020 + }, + { + "epoch": 2.90780055212176, + "grad_norm": 0.13833267986774445, + "learning_rate": 8.900780264601144e-07, + "loss": 0.0006, + "num_input_tokens_seen": 80230120, + "step": 119025 + }, + { + "epoch": 2.907922702953607, + "grad_norm": 0.11480916291475296, + "learning_rate": 8.899932660239773e-07, + "loss": 0.0362, + "num_input_tokens_seen": 80233512, + "step": 119030 + }, + { + "epoch": 2.908044853785454, + "grad_norm": 0.019525211304426193, + "learning_rate": 8.899085063878387e-07, + "loss": 0.0922, + "num_input_tokens_seen": 80236648, + "step": 119035 + }, + { + "epoch": 2.9081670046173014, + "grad_norm": 0.29217401146888733, + "learning_rate": 8.89823747552315e-07, + "loss": 0.0004, + "num_input_tokens_seen": 80239656, + "step": 119040 + }, + { + "epoch": 2.9082891554491486, + "grad_norm": 0.0739276260137558, + "learning_rate": 8.897389895180228e-07, + "loss": 0.0003, + "num_input_tokens_seen": 80243176, + "step": 119045 + }, + { + "epoch": 2.9084113062809958, + "grad_norm": 0.012618579901754856, + "learning_rate": 8.89654232285578e-07, + "loss": 0.0005, + "num_input_tokens_seen": 80246440, + "step": 119050 + }, + { + "epoch": 2.908533457112843, + "grad_norm": 0.032436829060316086, + "learning_rate": 8.895694758555979e-07, + "loss": 0.1446, + "num_input_tokens_seen": 80249832, + "step": 119055 + }, + { + "epoch": 2.90865560794469, + "grad_norm": 0.4129319489002228, + "learning_rate": 8.894847202286976e-07, + "loss": 0.0979, + "num_input_tokens_seen": 80253864, + "step": 119060 + }, + { + "epoch": 2.9087777587765373, + "grad_norm": 0.024983467534184456, + "learning_rate": 8.893999654054947e-07, + "loss": 0.0005, + "num_input_tokens_seen": 80257064, + "step": 119065 + }, + { + "epoch": 2.9088999096083845, + "grad_norm": 0.00642590643838048, + "learning_rate": 8.893152113866045e-07, + "loss": 0.0008, + "num_input_tokens_seen": 80260456, + "step": 119070 + }, + { + "epoch": 2.9090220604402317, + "grad_norm": 0.025908183306455612, + "learning_rate": 8.892304581726444e-07, + "loss": 0.0608, + "num_input_tokens_seen": 80263912, + "step": 119075 + }, + { + "epoch": 2.909144211272079, + "grad_norm": 0.08259736746549606, + "learning_rate": 8.891457057642296e-07, + "loss": 0.0639, + "num_input_tokens_seen": 80267240, + "step": 119080 + }, + { + "epoch": 2.909266362103926, + "grad_norm": 14.999557495117188, + "learning_rate": 8.890609541619775e-07, + "loss": 0.0015, + "num_input_tokens_seen": 80270504, + "step": 119085 + }, + { + "epoch": 2.909388512935773, + "grad_norm": 0.385803759098053, + "learning_rate": 8.88976203366504e-07, + "loss": 0.0003, + "num_input_tokens_seen": 80273960, + "step": 119090 + }, + { + "epoch": 2.9095106637676205, + "grad_norm": 13.95687198638916, + "learning_rate": 8.88891453378425e-07, + "loss": 0.0896, + "num_input_tokens_seen": 80277160, + "step": 119095 + }, + { + "epoch": 2.909632814599467, + "grad_norm": 0.04457539692521095, + "learning_rate": 8.888067041983577e-07, + "loss": 0.082, + "num_input_tokens_seen": 80280168, + "step": 119100 + }, + { + "epoch": 2.909754965431315, + "grad_norm": 0.0129321264103055, + "learning_rate": 8.887219558269176e-07, + "loss": 0.0943, + "num_input_tokens_seen": 80283496, + "step": 119105 + }, + { + "epoch": 2.9098771162631616, + "grad_norm": 0.028965329751372337, + "learning_rate": 8.886372082647212e-07, + "loss": 0.0014, + "num_input_tokens_seen": 80286952, + "step": 119110 + }, + { + "epoch": 2.909999267095009, + "grad_norm": 0.08315041661262512, + "learning_rate": 8.885524615123855e-07, + "loss": 0.0001, + "num_input_tokens_seen": 80290216, + "step": 119115 + }, + { + "epoch": 2.910121417926856, + "grad_norm": 0.0037849927321076393, + "learning_rate": 8.88467715570526e-07, + "loss": 0.0558, + "num_input_tokens_seen": 80293608, + "step": 119120 + }, + { + "epoch": 2.910243568758703, + "grad_norm": 0.017975879833102226, + "learning_rate": 8.883829704397594e-07, + "loss": 0.0561, + "num_input_tokens_seen": 80297064, + "step": 119125 + }, + { + "epoch": 2.9103657195905503, + "grad_norm": 43.18073272705078, + "learning_rate": 8.882982261207016e-07, + "loss": 0.135, + "num_input_tokens_seen": 80300136, + "step": 119130 + }, + { + "epoch": 2.9104878704223975, + "grad_norm": 3.4434423446655273, + "learning_rate": 8.882134826139695e-07, + "loss": 0.0004, + "num_input_tokens_seen": 80303720, + "step": 119135 + }, + { + "epoch": 2.9106100212542447, + "grad_norm": 0.0011810072464868426, + "learning_rate": 8.881287399201789e-07, + "loss": 0.029, + "num_input_tokens_seen": 80306728, + "step": 119140 + }, + { + "epoch": 2.910732172086092, + "grad_norm": 0.07076526433229446, + "learning_rate": 8.880439980399459e-07, + "loss": 0.0001, + "num_input_tokens_seen": 80309864, + "step": 119145 + }, + { + "epoch": 2.910854322917939, + "grad_norm": 0.020179910585284233, + "learning_rate": 8.879592569738875e-07, + "loss": 0.0003, + "num_input_tokens_seen": 80313320, + "step": 119150 + }, + { + "epoch": 2.9109764737497863, + "grad_norm": 0.06361079961061478, + "learning_rate": 8.878745167226192e-07, + "loss": 0.0001, + "num_input_tokens_seen": 80316456, + "step": 119155 + }, + { + "epoch": 2.9110986245816335, + "grad_norm": 18.111434936523438, + "learning_rate": 8.877897772867579e-07, + "loss": 0.0462, + "num_input_tokens_seen": 80319720, + "step": 119160 + }, + { + "epoch": 2.9112207754134807, + "grad_norm": 0.03135787695646286, + "learning_rate": 8.877050386669191e-07, + "loss": 0.069, + "num_input_tokens_seen": 80322920, + "step": 119165 + }, + { + "epoch": 2.911342926245328, + "grad_norm": 0.07542268186807632, + "learning_rate": 8.876203008637198e-07, + "loss": 0.0012, + "num_input_tokens_seen": 80325864, + "step": 119170 + }, + { + "epoch": 2.911465077077175, + "grad_norm": 0.04872267693281174, + "learning_rate": 8.875355638777756e-07, + "loss": 0.0837, + "num_input_tokens_seen": 80328808, + "step": 119175 + }, + { + "epoch": 2.9115872279090222, + "grad_norm": 0.04887533187866211, + "learning_rate": 8.874508277097033e-07, + "loss": 0.0001, + "num_input_tokens_seen": 80332200, + "step": 119180 + }, + { + "epoch": 2.911709378740869, + "grad_norm": 0.006453742738813162, + "learning_rate": 8.873660923601187e-07, + "loss": 0.0373, + "num_input_tokens_seen": 80335528, + "step": 119185 + }, + { + "epoch": 2.9118315295727166, + "grad_norm": 0.6347325444221497, + "learning_rate": 8.872813578296382e-07, + "loss": 0.0005, + "num_input_tokens_seen": 80339176, + "step": 119190 + }, + { + "epoch": 2.9119536804045634, + "grad_norm": 0.0005772336153313518, + "learning_rate": 8.871966241188781e-07, + "loss": 0.0008, + "num_input_tokens_seen": 80342440, + "step": 119195 + }, + { + "epoch": 2.9120758312364106, + "grad_norm": 0.1035570576786995, + "learning_rate": 8.871118912284543e-07, + "loss": 0.0004, + "num_input_tokens_seen": 80345896, + "step": 119200 + }, + { + "epoch": 2.9121979820682578, + "grad_norm": 0.29160940647125244, + "learning_rate": 8.870271591589831e-07, + "loss": 0.0496, + "num_input_tokens_seen": 80349032, + "step": 119205 + }, + { + "epoch": 2.912320132900105, + "grad_norm": 16.458744049072266, + "learning_rate": 8.869424279110812e-07, + "loss": 0.0837, + "num_input_tokens_seen": 80352424, + "step": 119210 + }, + { + "epoch": 2.912442283731952, + "grad_norm": 0.21430669724941254, + "learning_rate": 8.86857697485364e-07, + "loss": 0.0317, + "num_input_tokens_seen": 80355560, + "step": 119215 + }, + { + "epoch": 2.9125644345637993, + "grad_norm": 17.024089813232422, + "learning_rate": 8.867729678824484e-07, + "loss": 0.0856, + "num_input_tokens_seen": 80359272, + "step": 119220 + }, + { + "epoch": 2.9126865853956465, + "grad_norm": 45.32529067993164, + "learning_rate": 8.866882391029498e-07, + "loss": 0.1324, + "num_input_tokens_seen": 80362472, + "step": 119225 + }, + { + "epoch": 2.9128087362274937, + "grad_norm": 0.015839653089642525, + "learning_rate": 8.866035111474853e-07, + "loss": 0.0467, + "num_input_tokens_seen": 80365672, + "step": 119230 + }, + { + "epoch": 2.912930887059341, + "grad_norm": 0.04247782379388809, + "learning_rate": 8.865187840166701e-07, + "loss": 0.0007, + "num_input_tokens_seen": 80368872, + "step": 119235 + }, + { + "epoch": 2.913053037891188, + "grad_norm": 0.02549228072166443, + "learning_rate": 8.86434057711121e-07, + "loss": 0.0001, + "num_input_tokens_seen": 80371816, + "step": 119240 + }, + { + "epoch": 2.9131751887230353, + "grad_norm": 0.02016402594745159, + "learning_rate": 8.863493322314543e-07, + "loss": 0.1107, + "num_input_tokens_seen": 80375144, + "step": 119245 + }, + { + "epoch": 2.9132973395548825, + "grad_norm": 0.2670440971851349, + "learning_rate": 8.862646075782852e-07, + "loss": 0.0006, + "num_input_tokens_seen": 80378216, + "step": 119250 + }, + { + "epoch": 2.9134194903867296, + "grad_norm": 0.09133932739496231, + "learning_rate": 8.861798837522311e-07, + "loss": 0.0002, + "num_input_tokens_seen": 80381288, + "step": 119255 + }, + { + "epoch": 2.913541641218577, + "grad_norm": 0.1471731960773468, + "learning_rate": 8.86095160753907e-07, + "loss": 0.0004, + "num_input_tokens_seen": 80384488, + "step": 119260 + }, + { + "epoch": 2.913663792050424, + "grad_norm": 0.08419156074523926, + "learning_rate": 8.860104385839295e-07, + "loss": 0.0001, + "num_input_tokens_seen": 80388072, + "step": 119265 + }, + { + "epoch": 2.9137859428822708, + "grad_norm": 0.18315629661083221, + "learning_rate": 8.859257172429153e-07, + "loss": 0.0408, + "num_input_tokens_seen": 80391400, + "step": 119270 + }, + { + "epoch": 2.9139080937141184, + "grad_norm": 0.09528139978647232, + "learning_rate": 8.858409967314792e-07, + "loss": 0.001, + "num_input_tokens_seen": 80394472, + "step": 119275 + }, + { + "epoch": 2.914030244545965, + "grad_norm": 0.030033722519874573, + "learning_rate": 8.857562770502389e-07, + "loss": 0.0334, + "num_input_tokens_seen": 80398568, + "step": 119280 + }, + { + "epoch": 2.914152395377813, + "grad_norm": 0.01004182081669569, + "learning_rate": 8.856715581998091e-07, + "loss": 0.0006, + "num_input_tokens_seen": 80401640, + "step": 119285 + }, + { + "epoch": 2.9142745462096595, + "grad_norm": 0.031097983941435814, + "learning_rate": 8.855868401808069e-07, + "loss": 0.0001, + "num_input_tokens_seen": 80405224, + "step": 119290 + }, + { + "epoch": 2.9143966970415067, + "grad_norm": 0.029696395620703697, + "learning_rate": 8.855021229938478e-07, + "loss": 0.0001, + "num_input_tokens_seen": 80408808, + "step": 119295 + }, + { + "epoch": 2.914518847873354, + "grad_norm": 0.38885918259620667, + "learning_rate": 8.854174066395476e-07, + "loss": 0.0503, + "num_input_tokens_seen": 80412776, + "step": 119300 + }, + { + "epoch": 2.914640998705201, + "grad_norm": 19.236915588378906, + "learning_rate": 8.853326911185236e-07, + "loss": 0.043, + "num_input_tokens_seen": 80416488, + "step": 119305 + }, + { + "epoch": 2.9147631495370483, + "grad_norm": 30.828025817871094, + "learning_rate": 8.852479764313905e-07, + "loss": 0.1288, + "num_input_tokens_seen": 80419752, + "step": 119310 + }, + { + "epoch": 2.9148853003688955, + "grad_norm": 0.02660001441836357, + "learning_rate": 8.851632625787655e-07, + "loss": 0.0579, + "num_input_tokens_seen": 80422952, + "step": 119315 + }, + { + "epoch": 2.9150074512007427, + "grad_norm": 93.14715576171875, + "learning_rate": 8.850785495612636e-07, + "loss": 0.031, + "num_input_tokens_seen": 80426216, + "step": 119320 + }, + { + "epoch": 2.91512960203259, + "grad_norm": 0.06403271853923798, + "learning_rate": 8.84993837379502e-07, + "loss": 0.0002, + "num_input_tokens_seen": 80429544, + "step": 119325 + }, + { + "epoch": 2.915251752864437, + "grad_norm": 185.7920684814453, + "learning_rate": 8.849091260340955e-07, + "loss": 0.1621, + "num_input_tokens_seen": 80432936, + "step": 119330 + }, + { + "epoch": 2.9153739036962842, + "grad_norm": 142.79888916015625, + "learning_rate": 8.848244155256613e-07, + "loss": 0.0385, + "num_input_tokens_seen": 80436456, + "step": 119335 + }, + { + "epoch": 2.9154960545281314, + "grad_norm": 0.07249239832162857, + "learning_rate": 8.847397058548146e-07, + "loss": 0.0395, + "num_input_tokens_seen": 80440296, + "step": 119340 + }, + { + "epoch": 2.9156182053599786, + "grad_norm": 0.09680524468421936, + "learning_rate": 8.846549970221719e-07, + "loss": 0.0423, + "num_input_tokens_seen": 80443880, + "step": 119345 + }, + { + "epoch": 2.915740356191826, + "grad_norm": 0.011335140094161034, + "learning_rate": 8.845702890283492e-07, + "loss": 0.0439, + "num_input_tokens_seen": 80447464, + "step": 119350 + }, + { + "epoch": 2.9158625070236726, + "grad_norm": 0.022819865494966507, + "learning_rate": 8.844855818739623e-07, + "loss": 0.0005, + "num_input_tokens_seen": 80451496, + "step": 119355 + }, + { + "epoch": 2.91598465785552, + "grad_norm": 0.002536680083721876, + "learning_rate": 8.844008755596271e-07, + "loss": 0.0001, + "num_input_tokens_seen": 80454888, + "step": 119360 + }, + { + "epoch": 2.916106808687367, + "grad_norm": 0.019164428114891052, + "learning_rate": 8.843161700859602e-07, + "loss": 0.0004, + "num_input_tokens_seen": 80458280, + "step": 119365 + }, + { + "epoch": 2.9162289595192146, + "grad_norm": 0.04097279906272888, + "learning_rate": 8.842314654535769e-07, + "loss": 0.0002, + "num_input_tokens_seen": 80461352, + "step": 119370 + }, + { + "epoch": 2.9163511103510613, + "grad_norm": 0.015141436830163002, + "learning_rate": 8.841467616630939e-07, + "loss": 0.0729, + "num_input_tokens_seen": 80464936, + "step": 119375 + }, + { + "epoch": 2.9164732611829085, + "grad_norm": 0.08130350708961487, + "learning_rate": 8.840620587151264e-07, + "loss": 0.0541, + "num_input_tokens_seen": 80468200, + "step": 119380 + }, + { + "epoch": 2.9165954120147557, + "grad_norm": 19.866153717041016, + "learning_rate": 8.839773566102912e-07, + "loss": 0.0778, + "num_input_tokens_seen": 80471784, + "step": 119385 + }, + { + "epoch": 2.916717562846603, + "grad_norm": 0.0013215701328590512, + "learning_rate": 8.838926553492035e-07, + "loss": 0.0001, + "num_input_tokens_seen": 80475560, + "step": 119390 + }, + { + "epoch": 2.91683971367845, + "grad_norm": 0.014515292830765247, + "learning_rate": 8.838079549324797e-07, + "loss": 0.0434, + "num_input_tokens_seen": 80478440, + "step": 119395 + }, + { + "epoch": 2.9169618645102973, + "grad_norm": 0.00209752912633121, + "learning_rate": 8.837232553607361e-07, + "loss": 0.1949, + "num_input_tokens_seen": 80482152, + "step": 119400 + }, + { + "epoch": 2.9170840153421445, + "grad_norm": 0.349534273147583, + "learning_rate": 8.836385566345878e-07, + "loss": 0.0005, + "num_input_tokens_seen": 80485288, + "step": 119405 + }, + { + "epoch": 2.9172061661739916, + "grad_norm": 0.04242877662181854, + "learning_rate": 8.835538587546515e-07, + "loss": 0.0007, + "num_input_tokens_seen": 80488424, + "step": 119410 + }, + { + "epoch": 2.917328317005839, + "grad_norm": 0.23315057158470154, + "learning_rate": 8.834691617215425e-07, + "loss": 0.0004, + "num_input_tokens_seen": 80491880, + "step": 119415 + }, + { + "epoch": 2.917450467837686, + "grad_norm": 0.08955276757478714, + "learning_rate": 8.833844655358772e-07, + "loss": 0.0002, + "num_input_tokens_seen": 80495464, + "step": 119420 + }, + { + "epoch": 2.917572618669533, + "grad_norm": 0.05814218521118164, + "learning_rate": 8.832997701982718e-07, + "loss": 0.0002, + "num_input_tokens_seen": 80499176, + "step": 119425 + }, + { + "epoch": 2.9176947695013804, + "grad_norm": 0.07658043503761292, + "learning_rate": 8.832150757093414e-07, + "loss": 0.038, + "num_input_tokens_seen": 80502440, + "step": 119430 + }, + { + "epoch": 2.9178169203332276, + "grad_norm": 0.33819350600242615, + "learning_rate": 8.831303820697028e-07, + "loss": 0.0007, + "num_input_tokens_seen": 80505896, + "step": 119435 + }, + { + "epoch": 2.917939071165075, + "grad_norm": 0.06955944001674652, + "learning_rate": 8.830456892799712e-07, + "loss": 0.0727, + "num_input_tokens_seen": 80509608, + "step": 119440 + }, + { + "epoch": 2.918061221996922, + "grad_norm": 0.09534032642841339, + "learning_rate": 8.82960997340763e-07, + "loss": 0.0004, + "num_input_tokens_seen": 80513064, + "step": 119445 + }, + { + "epoch": 2.9181833728287687, + "grad_norm": 0.06615091860294342, + "learning_rate": 8.828763062526938e-07, + "loss": 0.0613, + "num_input_tokens_seen": 80516200, + "step": 119450 + }, + { + "epoch": 2.9183055236606164, + "grad_norm": 0.04451657086610794, + "learning_rate": 8.827916160163794e-07, + "loss": 0.0504, + "num_input_tokens_seen": 80519464, + "step": 119455 + }, + { + "epoch": 2.918427674492463, + "grad_norm": 0.133866086602211, + "learning_rate": 8.827069266324364e-07, + "loss": 0.0493, + "num_input_tokens_seen": 80522856, + "step": 119460 + }, + { + "epoch": 2.9185498253243107, + "grad_norm": 26.166975021362305, + "learning_rate": 8.826222381014796e-07, + "loss": 0.0707, + "num_input_tokens_seen": 80526184, + "step": 119465 + }, + { + "epoch": 2.9186719761561575, + "grad_norm": 0.007928439415991306, + "learning_rate": 8.82537550424126e-07, + "loss": 0.0001, + "num_input_tokens_seen": 80529384, + "step": 119470 + }, + { + "epoch": 2.9187941269880047, + "grad_norm": 0.13122475147247314, + "learning_rate": 8.824528636009904e-07, + "loss": 0.0003, + "num_input_tokens_seen": 80532904, + "step": 119475 + }, + { + "epoch": 2.918916277819852, + "grad_norm": 0.03710798919200897, + "learning_rate": 8.823681776326898e-07, + "loss": 0.0424, + "num_input_tokens_seen": 80536232, + "step": 119480 + }, + { + "epoch": 2.919038428651699, + "grad_norm": 0.006326170172542334, + "learning_rate": 8.822834925198389e-07, + "loss": 0.0442, + "num_input_tokens_seen": 80539432, + "step": 119485 + }, + { + "epoch": 2.9191605794835462, + "grad_norm": 0.0011891796020790935, + "learning_rate": 8.82198808263054e-07, + "loss": 0.0366, + "num_input_tokens_seen": 80542952, + "step": 119490 + }, + { + "epoch": 2.9192827303153934, + "grad_norm": 13.90089225769043, + "learning_rate": 8.821141248629516e-07, + "loss": 0.1097, + "num_input_tokens_seen": 80546280, + "step": 119495 + }, + { + "epoch": 2.9194048811472406, + "grad_norm": 0.0893154889345169, + "learning_rate": 8.820294423201469e-07, + "loss": 0.0007, + "num_input_tokens_seen": 80549608, + "step": 119500 + }, + { + "epoch": 2.919527031979088, + "grad_norm": 0.6648651361465454, + "learning_rate": 8.819447606352557e-07, + "loss": 0.0007, + "num_input_tokens_seen": 80552488, + "step": 119505 + }, + { + "epoch": 2.919649182810935, + "grad_norm": 0.14524568617343903, + "learning_rate": 8.818600798088939e-07, + "loss": 0.0443, + "num_input_tokens_seen": 80555880, + "step": 119510 + }, + { + "epoch": 2.919771333642782, + "grad_norm": 0.07950985431671143, + "learning_rate": 8.817753998416772e-07, + "loss": 0.0484, + "num_input_tokens_seen": 80559208, + "step": 119515 + }, + { + "epoch": 2.9198934844746294, + "grad_norm": 0.06964538246393204, + "learning_rate": 8.81690720734222e-07, + "loss": 0.0005, + "num_input_tokens_seen": 80562472, + "step": 119520 + }, + { + "epoch": 2.9200156353064766, + "grad_norm": 0.13740044832229614, + "learning_rate": 8.816060424871433e-07, + "loss": 0.0001, + "num_input_tokens_seen": 80565864, + "step": 119525 + }, + { + "epoch": 2.9201377861383238, + "grad_norm": 0.11162356287240982, + "learning_rate": 8.815213651010578e-07, + "loss": 0.0344, + "num_input_tokens_seen": 80569896, + "step": 119530 + }, + { + "epoch": 2.9202599369701705, + "grad_norm": 0.15913273394107819, + "learning_rate": 8.814366885765802e-07, + "loss": 0.0379, + "num_input_tokens_seen": 80573160, + "step": 119535 + }, + { + "epoch": 2.920382087802018, + "grad_norm": 0.013078569434583187, + "learning_rate": 8.813520129143275e-07, + "loss": 0.0001, + "num_input_tokens_seen": 80576360, + "step": 119540 + }, + { + "epoch": 2.920504238633865, + "grad_norm": 0.2981469929218292, + "learning_rate": 8.812673381149143e-07, + "loss": 0.1346, + "num_input_tokens_seen": 80579560, + "step": 119545 + }, + { + "epoch": 2.9206263894657125, + "grad_norm": 0.20622022449970245, + "learning_rate": 8.811826641789572e-07, + "loss": 0.0011, + "num_input_tokens_seen": 80582824, + "step": 119550 + }, + { + "epoch": 2.9207485402975593, + "grad_norm": 26.90651512145996, + "learning_rate": 8.810979911070721e-07, + "loss": 0.0475, + "num_input_tokens_seen": 80586088, + "step": 119555 + }, + { + "epoch": 2.9208706911294064, + "grad_norm": 14.657966613769531, + "learning_rate": 8.810133188998739e-07, + "loss": 0.0415, + "num_input_tokens_seen": 80589352, + "step": 119560 + }, + { + "epoch": 2.9209928419612536, + "grad_norm": 0.016939891502261162, + "learning_rate": 8.809286475579791e-07, + "loss": 0.0768, + "num_input_tokens_seen": 80592424, + "step": 119565 + }, + { + "epoch": 2.921114992793101, + "grad_norm": 0.1867605596780777, + "learning_rate": 8.808439770820028e-07, + "loss": 0.0411, + "num_input_tokens_seen": 80595560, + "step": 119570 + }, + { + "epoch": 2.921237143624948, + "grad_norm": 59.19410705566406, + "learning_rate": 8.807593074725618e-07, + "loss": 0.038, + "num_input_tokens_seen": 80598760, + "step": 119575 + }, + { + "epoch": 2.921359294456795, + "grad_norm": 0.22791574895381927, + "learning_rate": 8.806746387302706e-07, + "loss": 0.0347, + "num_input_tokens_seen": 80602088, + "step": 119580 + }, + { + "epoch": 2.9214814452886424, + "grad_norm": 22.170124053955078, + "learning_rate": 8.805899708557454e-07, + "loss": 0.0412, + "num_input_tokens_seen": 80605608, + "step": 119585 + }, + { + "epoch": 2.9216035961204896, + "grad_norm": 224.7540283203125, + "learning_rate": 8.805053038496028e-07, + "loss": 0.017, + "num_input_tokens_seen": 80608680, + "step": 119590 + }, + { + "epoch": 2.9217257469523368, + "grad_norm": 0.733668863773346, + "learning_rate": 8.804206377124571e-07, + "loss": 0.0006, + "num_input_tokens_seen": 80612264, + "step": 119595 + }, + { + "epoch": 2.921847897784184, + "grad_norm": 0.04018937796354294, + "learning_rate": 8.803359724449252e-07, + "loss": 0.0524, + "num_input_tokens_seen": 80615784, + "step": 119600 + }, + { + "epoch": 2.921970048616031, + "grad_norm": 0.4537980556488037, + "learning_rate": 8.80251308047622e-07, + "loss": 0.0002, + "num_input_tokens_seen": 80618664, + "step": 119605 + }, + { + "epoch": 2.9220921994478783, + "grad_norm": 0.2348211258649826, + "learning_rate": 8.801666445211634e-07, + "loss": 0.0706, + "num_input_tokens_seen": 80621480, + "step": 119610 + }, + { + "epoch": 2.9222143502797255, + "grad_norm": 0.01142832450568676, + "learning_rate": 8.800819818661655e-07, + "loss": 0.0004, + "num_input_tokens_seen": 80625000, + "step": 119615 + }, + { + "epoch": 2.9223365011115727, + "grad_norm": 0.31771257519721985, + "learning_rate": 8.799973200832434e-07, + "loss": 0.0479, + "num_input_tokens_seen": 80627944, + "step": 119620 + }, + { + "epoch": 2.92245865194342, + "grad_norm": 0.05220457911491394, + "learning_rate": 8.799126591730134e-07, + "loss": 0.0536, + "num_input_tokens_seen": 80631528, + "step": 119625 + }, + { + "epoch": 2.9225808027752667, + "grad_norm": 0.051249198615550995, + "learning_rate": 8.798279991360904e-07, + "loss": 0.0004, + "num_input_tokens_seen": 80634600, + "step": 119630 + }, + { + "epoch": 2.9227029536071143, + "grad_norm": 0.6016253232955933, + "learning_rate": 8.797433399730909e-07, + "loss": 0.0009, + "num_input_tokens_seen": 80637736, + "step": 119635 + }, + { + "epoch": 2.922825104438961, + "grad_norm": 0.05893409997224808, + "learning_rate": 8.796586816846299e-07, + "loss": 0.0002, + "num_input_tokens_seen": 80640936, + "step": 119640 + }, + { + "epoch": 2.9229472552708082, + "grad_norm": 0.14922615885734558, + "learning_rate": 8.795740242713232e-07, + "loss": 0.0851, + "num_input_tokens_seen": 80644520, + "step": 119645 + }, + { + "epoch": 2.9230694061026554, + "grad_norm": 0.3645789921283722, + "learning_rate": 8.794893677337872e-07, + "loss": 0.0007, + "num_input_tokens_seen": 80647848, + "step": 119650 + }, + { + "epoch": 2.9231915569345026, + "grad_norm": 0.025446340441703796, + "learning_rate": 8.794047120726364e-07, + "loss": 0.085, + "num_input_tokens_seen": 80651048, + "step": 119655 + }, + { + "epoch": 2.92331370776635, + "grad_norm": 0.23881550133228302, + "learning_rate": 8.793200572884873e-07, + "loss": 0.0325, + "num_input_tokens_seen": 80654696, + "step": 119660 + }, + { + "epoch": 2.923435858598197, + "grad_norm": 0.03703530132770538, + "learning_rate": 8.792354033819549e-07, + "loss": 0.0006, + "num_input_tokens_seen": 80657832, + "step": 119665 + }, + { + "epoch": 2.923558009430044, + "grad_norm": 0.11316487938165665, + "learning_rate": 8.79150750353655e-07, + "loss": 0.0542, + "num_input_tokens_seen": 80661032, + "step": 119670 + }, + { + "epoch": 2.9236801602618914, + "grad_norm": 0.0426037423312664, + "learning_rate": 8.79066098204204e-07, + "loss": 0.0402, + "num_input_tokens_seen": 80664872, + "step": 119675 + }, + { + "epoch": 2.9238023110937386, + "grad_norm": 0.36164647340774536, + "learning_rate": 8.789814469342161e-07, + "loss": 0.0004, + "num_input_tokens_seen": 80668136, + "step": 119680 + }, + { + "epoch": 2.9239244619255857, + "grad_norm": 1.571184515953064, + "learning_rate": 8.788967965443083e-07, + "loss": 0.0003, + "num_input_tokens_seen": 80671272, + "step": 119685 + }, + { + "epoch": 2.924046612757433, + "grad_norm": 28.044828414916992, + "learning_rate": 8.788121470350952e-07, + "loss": 0.0599, + "num_input_tokens_seen": 80674472, + "step": 119690 + }, + { + "epoch": 2.92416876358928, + "grad_norm": 0.016553180292248726, + "learning_rate": 8.78727498407193e-07, + "loss": 0.0005, + "num_input_tokens_seen": 80677864, + "step": 119695 + }, + { + "epoch": 2.9242909144211273, + "grad_norm": 0.01405636128038168, + "learning_rate": 8.786428506612168e-07, + "loss": 0.0001, + "num_input_tokens_seen": 80681320, + "step": 119700 + }, + { + "epoch": 2.9244130652529745, + "grad_norm": 0.007759598549455404, + "learning_rate": 8.785582037977826e-07, + "loss": 0.0456, + "num_input_tokens_seen": 80684136, + "step": 119705 + }, + { + "epoch": 2.9245352160848217, + "grad_norm": 46.18053436279297, + "learning_rate": 8.78473557817506e-07, + "loss": 0.079, + "num_input_tokens_seen": 80687848, + "step": 119710 + }, + { + "epoch": 2.9246573669166684, + "grad_norm": 0.003461407730355859, + "learning_rate": 8.783889127210019e-07, + "loss": 0.0381, + "num_input_tokens_seen": 80692008, + "step": 119715 + }, + { + "epoch": 2.924779517748516, + "grad_norm": 36.74126052856445, + "learning_rate": 8.783042685088869e-07, + "loss": 0.0368, + "num_input_tokens_seen": 80695464, + "step": 119720 + }, + { + "epoch": 2.924901668580363, + "grad_norm": 0.4897412955760956, + "learning_rate": 8.782196251817754e-07, + "loss": 0.0003, + "num_input_tokens_seen": 80698856, + "step": 119725 + }, + { + "epoch": 2.9250238194122105, + "grad_norm": 0.06261391192674637, + "learning_rate": 8.78134982740284e-07, + "loss": 0.0534, + "num_input_tokens_seen": 80702440, + "step": 119730 + }, + { + "epoch": 2.925145970244057, + "grad_norm": 0.012732506729662418, + "learning_rate": 8.780503411850273e-07, + "loss": 0.0969, + "num_input_tokens_seen": 80705832, + "step": 119735 + }, + { + "epoch": 2.9252681210759044, + "grad_norm": 0.03934415802359581, + "learning_rate": 8.779657005166215e-07, + "loss": 0.0409, + "num_input_tokens_seen": 80709096, + "step": 119740 + }, + { + "epoch": 2.9253902719077516, + "grad_norm": 0.05687893554568291, + "learning_rate": 8.778810607356822e-07, + "loss": 0.0003, + "num_input_tokens_seen": 80712552, + "step": 119745 + }, + { + "epoch": 2.9255124227395988, + "grad_norm": 0.0480547770857811, + "learning_rate": 8.777964218428243e-07, + "loss": 0.0006, + "num_input_tokens_seen": 80716200, + "step": 119750 + }, + { + "epoch": 2.925634573571446, + "grad_norm": 0.0338495597243309, + "learning_rate": 8.777117838386642e-07, + "loss": 0.0347, + "num_input_tokens_seen": 80719208, + "step": 119755 + }, + { + "epoch": 2.925756724403293, + "grad_norm": 0.23030585050582886, + "learning_rate": 8.776271467238166e-07, + "loss": 0.0806, + "num_input_tokens_seen": 80722344, + "step": 119760 + }, + { + "epoch": 2.9258788752351403, + "grad_norm": 0.025462327525019646, + "learning_rate": 8.775425104988971e-07, + "loss": 0.0008, + "num_input_tokens_seen": 80726184, + "step": 119765 + }, + { + "epoch": 2.9260010260669875, + "grad_norm": 0.3015349805355072, + "learning_rate": 8.774578751645219e-07, + "loss": 0.0004, + "num_input_tokens_seen": 80729384, + "step": 119770 + }, + { + "epoch": 2.9261231768988347, + "grad_norm": 18.960887908935547, + "learning_rate": 8.773732407213055e-07, + "loss": 0.0429, + "num_input_tokens_seen": 80732712, + "step": 119775 + }, + { + "epoch": 2.926245327730682, + "grad_norm": 0.013392589055001736, + "learning_rate": 8.772886071698643e-07, + "loss": 0.1408, + "num_input_tokens_seen": 80735848, + "step": 119780 + }, + { + "epoch": 2.926367478562529, + "grad_norm": 0.004601533990353346, + "learning_rate": 8.772039745108129e-07, + "loss": 0.1689, + "num_input_tokens_seen": 80739112, + "step": 119785 + }, + { + "epoch": 2.9264896293943763, + "grad_norm": 0.014628436416387558, + "learning_rate": 8.771193427447677e-07, + "loss": 0.0001, + "num_input_tokens_seen": 80742184, + "step": 119790 + }, + { + "epoch": 2.9266117802262235, + "grad_norm": 63.195289611816406, + "learning_rate": 8.770347118723433e-07, + "loss": 0.0205, + "num_input_tokens_seen": 80745704, + "step": 119795 + }, + { + "epoch": 2.9267339310580707, + "grad_norm": 0.010293773375451565, + "learning_rate": 8.769500818941555e-07, + "loss": 0.0481, + "num_input_tokens_seen": 80749096, + "step": 119800 + }, + { + "epoch": 2.926856081889918, + "grad_norm": 0.019898995757102966, + "learning_rate": 8.768654528108202e-07, + "loss": 0.0003, + "num_input_tokens_seen": 80753064, + "step": 119805 + }, + { + "epoch": 2.9269782327217646, + "grad_norm": 0.0067049735225737095, + "learning_rate": 8.767808246229523e-07, + "loss": 0.0007, + "num_input_tokens_seen": 80756328, + "step": 119810 + }, + { + "epoch": 2.9271003835536122, + "grad_norm": 0.0841495618224144, + "learning_rate": 8.766961973311674e-07, + "loss": 0.0002, + "num_input_tokens_seen": 80759592, + "step": 119815 + }, + { + "epoch": 2.927222534385459, + "grad_norm": 0.12519319355487823, + "learning_rate": 8.766115709360808e-07, + "loss": 0.0002, + "num_input_tokens_seen": 80762664, + "step": 119820 + }, + { + "epoch": 2.927344685217306, + "grad_norm": 21.410520553588867, + "learning_rate": 8.76526945438308e-07, + "loss": 0.0503, + "num_input_tokens_seen": 80766056, + "step": 119825 + }, + { + "epoch": 2.9274668360491534, + "grad_norm": 0.0299088042229414, + "learning_rate": 8.764423208384647e-07, + "loss": 0.0013, + "num_input_tokens_seen": 80769256, + "step": 119830 + }, + { + "epoch": 2.9275889868810006, + "grad_norm": 0.353015273809433, + "learning_rate": 8.763576971371658e-07, + "loss": 0.0003, + "num_input_tokens_seen": 80772712, + "step": 119835 + }, + { + "epoch": 2.9277111377128477, + "grad_norm": 0.7852094769477844, + "learning_rate": 8.762730743350273e-07, + "loss": 0.0003, + "num_input_tokens_seen": 80776232, + "step": 119840 + }, + { + "epoch": 2.927833288544695, + "grad_norm": 0.002658928046002984, + "learning_rate": 8.761884524326639e-07, + "loss": 0.0004, + "num_input_tokens_seen": 80779432, + "step": 119845 + }, + { + "epoch": 2.927955439376542, + "grad_norm": 0.039048757404088974, + "learning_rate": 8.761038314306918e-07, + "loss": 0.0, + "num_input_tokens_seen": 80782696, + "step": 119850 + }, + { + "epoch": 2.9280775902083893, + "grad_norm": 23.21377182006836, + "learning_rate": 8.760192113297255e-07, + "loss": 0.0748, + "num_input_tokens_seen": 80786408, + "step": 119855 + }, + { + "epoch": 2.9281997410402365, + "grad_norm": 0.22337211668491364, + "learning_rate": 8.759345921303811e-07, + "loss": 0.0003, + "num_input_tokens_seen": 80789416, + "step": 119860 + }, + { + "epoch": 2.9283218918720837, + "grad_norm": 0.001950685866177082, + "learning_rate": 8.75849973833274e-07, + "loss": 0.0001, + "num_input_tokens_seen": 80792424, + "step": 119865 + }, + { + "epoch": 2.928444042703931, + "grad_norm": 0.011079180054366589, + "learning_rate": 8.757653564390187e-07, + "loss": 0.0918, + "num_input_tokens_seen": 80795496, + "step": 119870 + }, + { + "epoch": 2.928566193535778, + "grad_norm": 0.01707855612039566, + "learning_rate": 8.756807399482316e-07, + "loss": 0.0604, + "num_input_tokens_seen": 80798952, + "step": 119875 + }, + { + "epoch": 2.9286883443676253, + "grad_norm": 0.010524839162826538, + "learning_rate": 8.755961243615273e-07, + "loss": 0.0002, + "num_input_tokens_seen": 80802408, + "step": 119880 + }, + { + "epoch": 2.9288104951994725, + "grad_norm": 0.025105202570557594, + "learning_rate": 8.755115096795218e-07, + "loss": 0.0001, + "num_input_tokens_seen": 80805736, + "step": 119885 + }, + { + "epoch": 2.9289326460313196, + "grad_norm": 0.08292187005281448, + "learning_rate": 8.754268959028297e-07, + "loss": 0.0742, + "num_input_tokens_seen": 80809064, + "step": 119890 + }, + { + "epoch": 2.9290547968631664, + "grad_norm": 0.06168559938669205, + "learning_rate": 8.753422830320666e-07, + "loss": 0.0888, + "num_input_tokens_seen": 80812200, + "step": 119895 + }, + { + "epoch": 2.929176947695014, + "grad_norm": 0.013893499039113522, + "learning_rate": 8.752576710678484e-07, + "loss": 0.0339, + "num_input_tokens_seen": 80815592, + "step": 119900 + }, + { + "epoch": 2.9292990985268608, + "grad_norm": 0.019977891817688942, + "learning_rate": 8.751730600107896e-07, + "loss": 0.0002, + "num_input_tokens_seen": 80818792, + "step": 119905 + }, + { + "epoch": 2.9294212493587084, + "grad_norm": 296.9643859863281, + "learning_rate": 8.750884498615063e-07, + "loss": 0.1176, + "num_input_tokens_seen": 80822248, + "step": 119910 + }, + { + "epoch": 2.929543400190555, + "grad_norm": 0.3068140149116516, + "learning_rate": 8.750038406206131e-07, + "loss": 0.0195, + "num_input_tokens_seen": 80825512, + "step": 119915 + }, + { + "epoch": 2.9296655510224023, + "grad_norm": 0.07228963822126389, + "learning_rate": 8.749192322887255e-07, + "loss": 0.0833, + "num_input_tokens_seen": 80828392, + "step": 119920 + }, + { + "epoch": 2.9297877018542495, + "grad_norm": 0.2793293595314026, + "learning_rate": 8.748346248664593e-07, + "loss": 0.064, + "num_input_tokens_seen": 80831656, + "step": 119925 + }, + { + "epoch": 2.9299098526860967, + "grad_norm": 0.023717185482382774, + "learning_rate": 8.74750018354429e-07, + "loss": 0.0004, + "num_input_tokens_seen": 80835048, + "step": 119930 + }, + { + "epoch": 2.930032003517944, + "grad_norm": 1083.84228515625, + "learning_rate": 8.746654127532505e-07, + "loss": 0.0185, + "num_input_tokens_seen": 80838184, + "step": 119935 + }, + { + "epoch": 2.930154154349791, + "grad_norm": 0.176435187458992, + "learning_rate": 8.745808080635385e-07, + "loss": 0.0455, + "num_input_tokens_seen": 80841128, + "step": 119940 + }, + { + "epoch": 2.9302763051816383, + "grad_norm": 0.1183871254324913, + "learning_rate": 8.744962042859089e-07, + "loss": 0.0456, + "num_input_tokens_seen": 80844456, + "step": 119945 + }, + { + "epoch": 2.9303984560134855, + "grad_norm": 0.006031445227563381, + "learning_rate": 8.744116014209763e-07, + "loss": 0.0452, + "num_input_tokens_seen": 80847848, + "step": 119950 + }, + { + "epoch": 2.9305206068453327, + "grad_norm": 0.01688563823699951, + "learning_rate": 8.743269994693565e-07, + "loss": 0.0002, + "num_input_tokens_seen": 80851048, + "step": 119955 + }, + { + "epoch": 2.93064275767718, + "grad_norm": 0.010305249132215977, + "learning_rate": 8.742423984316648e-07, + "loss": 0.0404, + "num_input_tokens_seen": 80853992, + "step": 119960 + }, + { + "epoch": 2.930764908509027, + "grad_norm": 0.006608907133340836, + "learning_rate": 8.741577983085161e-07, + "loss": 0.0002, + "num_input_tokens_seen": 80858088, + "step": 119965 + }, + { + "epoch": 2.9308870593408742, + "grad_norm": 0.041544102132320404, + "learning_rate": 8.740731991005257e-07, + "loss": 0.0668, + "num_input_tokens_seen": 80861480, + "step": 119970 + }, + { + "epoch": 2.9310092101727214, + "grad_norm": 0.04633386433124542, + "learning_rate": 8.739886008083088e-07, + "loss": 0.041, + "num_input_tokens_seen": 80864808, + "step": 119975 + }, + { + "epoch": 2.931131361004568, + "grad_norm": 0.048316024243831635, + "learning_rate": 8.739040034324805e-07, + "loss": 0.0002, + "num_input_tokens_seen": 80868328, + "step": 119980 + }, + { + "epoch": 2.931253511836416, + "grad_norm": 0.004913606680929661, + "learning_rate": 8.738194069736566e-07, + "loss": 0.0014, + "num_input_tokens_seen": 80871656, + "step": 119985 + }, + { + "epoch": 2.9313756626682625, + "grad_norm": 0.6599655151367188, + "learning_rate": 8.737348114324516e-07, + "loss": 0.0004, + "num_input_tokens_seen": 80874984, + "step": 119990 + }, + { + "epoch": 2.93149781350011, + "grad_norm": 0.007945040240883827, + "learning_rate": 8.736502168094814e-07, + "loss": 0.0002, + "num_input_tokens_seen": 80877864, + "step": 119995 + }, + { + "epoch": 2.931619964331957, + "grad_norm": 0.010767627507448196, + "learning_rate": 8.735656231053603e-07, + "loss": 0.0002, + "num_input_tokens_seen": 80881576, + "step": 120000 + }, + { + "epoch": 2.931742115163804, + "grad_norm": 0.12437177449464798, + "learning_rate": 8.734810303207046e-07, + "loss": 0.0001, + "num_input_tokens_seen": 80885160, + "step": 120005 + }, + { + "epoch": 2.9318642659956513, + "grad_norm": 0.032584793865680695, + "learning_rate": 8.733964384561282e-07, + "loss": 0.0001, + "num_input_tokens_seen": 80888616, + "step": 120010 + }, + { + "epoch": 2.9319864168274985, + "grad_norm": 16.456106185913086, + "learning_rate": 8.733118475122473e-07, + "loss": 0.0528, + "num_input_tokens_seen": 80892200, + "step": 120015 + }, + { + "epoch": 2.9321085676593457, + "grad_norm": 0.02670753188431263, + "learning_rate": 8.732272574896769e-07, + "loss": 0.0002, + "num_input_tokens_seen": 80895400, + "step": 120020 + }, + { + "epoch": 2.932230718491193, + "grad_norm": 0.4635147154331207, + "learning_rate": 8.731426683890315e-07, + "loss": 0.0595, + "num_input_tokens_seen": 80898344, + "step": 120025 + }, + { + "epoch": 2.93235286932304, + "grad_norm": 0.0504097044467926, + "learning_rate": 8.730580802109273e-07, + "loss": 0.0001, + "num_input_tokens_seen": 80901992, + "step": 120030 + }, + { + "epoch": 2.9324750201548873, + "grad_norm": 18.078235626220703, + "learning_rate": 8.729734929559785e-07, + "loss": 0.081, + "num_input_tokens_seen": 80905576, + "step": 120035 + }, + { + "epoch": 2.9325971709867344, + "grad_norm": 0.055162858217954636, + "learning_rate": 8.728889066248009e-07, + "loss": 0.0757, + "num_input_tokens_seen": 80908904, + "step": 120040 + }, + { + "epoch": 2.9327193218185816, + "grad_norm": 0.0030497191473841667, + "learning_rate": 8.72804321218009e-07, + "loss": 0.0523, + "num_input_tokens_seen": 80912040, + "step": 120045 + }, + { + "epoch": 2.932841472650429, + "grad_norm": 0.20927371084690094, + "learning_rate": 8.727197367362182e-07, + "loss": 0.0001, + "num_input_tokens_seen": 80915816, + "step": 120050 + }, + { + "epoch": 2.932963623482276, + "grad_norm": 21.464340209960938, + "learning_rate": 8.726351531800442e-07, + "loss": 0.0552, + "num_input_tokens_seen": 80919592, + "step": 120055 + }, + { + "epoch": 2.933085774314123, + "grad_norm": 14.764019966125488, + "learning_rate": 8.725505705501012e-07, + "loss": 0.0407, + "num_input_tokens_seen": 80923112, + "step": 120060 + }, + { + "epoch": 2.9332079251459704, + "grad_norm": 0.012927616015076637, + "learning_rate": 8.724659888470052e-07, + "loss": 0.1252, + "num_input_tokens_seen": 80926696, + "step": 120065 + }, + { + "epoch": 2.9333300759778176, + "grad_norm": 65.81498718261719, + "learning_rate": 8.723814080713705e-07, + "loss": 0.1547, + "num_input_tokens_seen": 80929896, + "step": 120070 + }, + { + "epoch": 2.9334522268096643, + "grad_norm": 0.012749478220939636, + "learning_rate": 8.722968282238124e-07, + "loss": 0.0726, + "num_input_tokens_seen": 80933608, + "step": 120075 + }, + { + "epoch": 2.933574377641512, + "grad_norm": 9.03498363494873, + "learning_rate": 8.722122493049465e-07, + "loss": 0.0006, + "num_input_tokens_seen": 80937320, + "step": 120080 + }, + { + "epoch": 2.9336965284733587, + "grad_norm": 0.04102368652820587, + "learning_rate": 8.721276713153871e-07, + "loss": 0.0139, + "num_input_tokens_seen": 80940264, + "step": 120085 + }, + { + "epoch": 2.9338186793052063, + "grad_norm": 0.007833786308765411, + "learning_rate": 8.720430942557502e-07, + "loss": 0.0001, + "num_input_tokens_seen": 80943784, + "step": 120090 + }, + { + "epoch": 2.933940830137053, + "grad_norm": 0.055653203278779984, + "learning_rate": 8.719585181266498e-07, + "loss": 0.0395, + "num_input_tokens_seen": 80946728, + "step": 120095 + }, + { + "epoch": 2.9340629809689003, + "grad_norm": 0.4759756326675415, + "learning_rate": 8.718739429287018e-07, + "loss": 0.0009, + "num_input_tokens_seen": 80950312, + "step": 120100 + }, + { + "epoch": 2.9341851318007475, + "grad_norm": 0.05956454947590828, + "learning_rate": 8.717893686625206e-07, + "loss": 0.0001, + "num_input_tokens_seen": 80953256, + "step": 120105 + }, + { + "epoch": 2.9343072826325947, + "grad_norm": 0.041279036551713943, + "learning_rate": 8.717047953287217e-07, + "loss": 0.0003, + "num_input_tokens_seen": 80956392, + "step": 120110 + }, + { + "epoch": 2.934429433464442, + "grad_norm": 0.16914759576320648, + "learning_rate": 8.716202229279204e-07, + "loss": 0.0006, + "num_input_tokens_seen": 80959464, + "step": 120115 + }, + { + "epoch": 2.934551584296289, + "grad_norm": 33.73630905151367, + "learning_rate": 8.715356514607312e-07, + "loss": 0.1412, + "num_input_tokens_seen": 80962984, + "step": 120120 + }, + { + "epoch": 2.9346737351281362, + "grad_norm": 30.533674240112305, + "learning_rate": 8.714510809277692e-07, + "loss": 0.0863, + "num_input_tokens_seen": 80966568, + "step": 120125 + }, + { + "epoch": 2.9347958859599834, + "grad_norm": 0.09490475803613663, + "learning_rate": 8.713665113296495e-07, + "loss": 0.0521, + "num_input_tokens_seen": 80969960, + "step": 120130 + }, + { + "epoch": 2.9349180367918306, + "grad_norm": 0.041476789861917496, + "learning_rate": 8.71281942666987e-07, + "loss": 0.0003, + "num_input_tokens_seen": 80973544, + "step": 120135 + }, + { + "epoch": 2.935040187623678, + "grad_norm": 0.12009302526712418, + "learning_rate": 8.711973749403974e-07, + "loss": 0.0001, + "num_input_tokens_seen": 80976936, + "step": 120140 + }, + { + "epoch": 2.935162338455525, + "grad_norm": 0.02471102401614189, + "learning_rate": 8.711128081504945e-07, + "loss": 0.0008, + "num_input_tokens_seen": 80980392, + "step": 120145 + }, + { + "epoch": 2.935284489287372, + "grad_norm": 0.21336673200130463, + "learning_rate": 8.710282422978942e-07, + "loss": 0.0004, + "num_input_tokens_seen": 80983464, + "step": 120150 + }, + { + "epoch": 2.9354066401192194, + "grad_norm": 0.035361818969249725, + "learning_rate": 8.709436773832111e-07, + "loss": 0.0001, + "num_input_tokens_seen": 80987112, + "step": 120155 + }, + { + "epoch": 2.935528790951066, + "grad_norm": 0.33538514375686646, + "learning_rate": 8.708591134070607e-07, + "loss": 0.0004, + "num_input_tokens_seen": 80990440, + "step": 120160 + }, + { + "epoch": 2.9356509417829137, + "grad_norm": 0.006556864362210035, + "learning_rate": 8.707745503700569e-07, + "loss": 0.0001, + "num_input_tokens_seen": 80993768, + "step": 120165 + }, + { + "epoch": 2.9357730926147605, + "grad_norm": 0.0718025341629982, + "learning_rate": 8.706899882728157e-07, + "loss": 0.0545, + "num_input_tokens_seen": 80997032, + "step": 120170 + }, + { + "epoch": 2.935895243446608, + "grad_norm": 0.013668089173734188, + "learning_rate": 8.70605427115952e-07, + "loss": 0.0672, + "num_input_tokens_seen": 81000744, + "step": 120175 + }, + { + "epoch": 2.936017394278455, + "grad_norm": 0.03084360808134079, + "learning_rate": 8.705208669000798e-07, + "loss": 0.0002, + "num_input_tokens_seen": 81003624, + "step": 120180 + }, + { + "epoch": 2.936139545110302, + "grad_norm": 0.06931090354919434, + "learning_rate": 8.704363076258152e-07, + "loss": 0.0415, + "num_input_tokens_seen": 81007144, + "step": 120185 + }, + { + "epoch": 2.9362616959421493, + "grad_norm": 0.23784157633781433, + "learning_rate": 8.703517492937721e-07, + "loss": 0.0488, + "num_input_tokens_seen": 81010536, + "step": 120190 + }, + { + "epoch": 2.9363838467739964, + "grad_norm": 0.10758914798498154, + "learning_rate": 8.702671919045665e-07, + "loss": 0.0002, + "num_input_tokens_seen": 81014120, + "step": 120195 + }, + { + "epoch": 2.9365059976058436, + "grad_norm": 0.03329499065876007, + "learning_rate": 8.701826354588123e-07, + "loss": 0.0001, + "num_input_tokens_seen": 81017128, + "step": 120200 + }, + { + "epoch": 2.936628148437691, + "grad_norm": 0.023509986698627472, + "learning_rate": 8.70098079957125e-07, + "loss": 0.0001, + "num_input_tokens_seen": 81020392, + "step": 120205 + }, + { + "epoch": 2.936750299269538, + "grad_norm": 0.06350888311862946, + "learning_rate": 8.700135254001197e-07, + "loss": 0.0936, + "num_input_tokens_seen": 81023400, + "step": 120210 + }, + { + "epoch": 2.936872450101385, + "grad_norm": 0.16623754799365997, + "learning_rate": 8.699289717884106e-07, + "loss": 0.0829, + "num_input_tokens_seen": 81026920, + "step": 120215 + }, + { + "epoch": 2.9369946009332324, + "grad_norm": 0.2236860990524292, + "learning_rate": 8.698444191226134e-07, + "loss": 0.0003, + "num_input_tokens_seen": 81030504, + "step": 120220 + }, + { + "epoch": 2.9371167517650796, + "grad_norm": 0.21353180706501007, + "learning_rate": 8.697598674033424e-07, + "loss": 0.0006, + "num_input_tokens_seen": 81033832, + "step": 120225 + }, + { + "epoch": 2.9372389025969268, + "grad_norm": 0.1755683422088623, + "learning_rate": 8.696753166312125e-07, + "loss": 0.0007, + "num_input_tokens_seen": 81037160, + "step": 120230 + }, + { + "epoch": 2.937361053428774, + "grad_norm": 1.234663486480713, + "learning_rate": 8.695907668068392e-07, + "loss": 0.0003, + "num_input_tokens_seen": 81040680, + "step": 120235 + }, + { + "epoch": 2.937483204260621, + "grad_norm": 43.08966827392578, + "learning_rate": 8.695062179308365e-07, + "loss": 0.0676, + "num_input_tokens_seen": 81044264, + "step": 120240 + }, + { + "epoch": 2.9376053550924683, + "grad_norm": 0.07131534814834595, + "learning_rate": 8.694216700038199e-07, + "loss": 0.0002, + "num_input_tokens_seen": 81047144, + "step": 120245 + }, + { + "epoch": 2.9377275059243155, + "grad_norm": 85.09132385253906, + "learning_rate": 8.693371230264038e-07, + "loss": 0.0415, + "num_input_tokens_seen": 81050792, + "step": 120250 + }, + { + "epoch": 2.9378496567561623, + "grad_norm": 0.09336002171039581, + "learning_rate": 8.692525769992037e-07, + "loss": 0.0001, + "num_input_tokens_seen": 81054056, + "step": 120255 + }, + { + "epoch": 2.93797180758801, + "grad_norm": 0.011122871190309525, + "learning_rate": 8.691680319228337e-07, + "loss": 0.0015, + "num_input_tokens_seen": 81057704, + "step": 120260 + }, + { + "epoch": 2.9380939584198567, + "grad_norm": 28.55598258972168, + "learning_rate": 8.690834877979087e-07, + "loss": 0.0378, + "num_input_tokens_seen": 81061096, + "step": 120265 + }, + { + "epoch": 2.938216109251704, + "grad_norm": 0.23469318449497223, + "learning_rate": 8.689989446250444e-07, + "loss": 0.0002, + "num_input_tokens_seen": 81064360, + "step": 120270 + }, + { + "epoch": 2.938338260083551, + "grad_norm": 17.726032257080078, + "learning_rate": 8.689144024048549e-07, + "loss": 0.0424, + "num_input_tokens_seen": 81067880, + "step": 120275 + }, + { + "epoch": 2.9384604109153982, + "grad_norm": 0.0037503752391785383, + "learning_rate": 8.688298611379548e-07, + "loss": 0.0002, + "num_input_tokens_seen": 81071400, + "step": 120280 + }, + { + "epoch": 2.9385825617472454, + "grad_norm": 0.04066101089119911, + "learning_rate": 8.687453208249594e-07, + "loss": 0.0571, + "num_input_tokens_seen": 81075240, + "step": 120285 + }, + { + "epoch": 2.9387047125790926, + "grad_norm": 0.7134239077568054, + "learning_rate": 8.686607814664836e-07, + "loss": 0.0005, + "num_input_tokens_seen": 81079016, + "step": 120290 + }, + { + "epoch": 2.93882686341094, + "grad_norm": 0.0060598282143473625, + "learning_rate": 8.685762430631415e-07, + "loss": 0.0893, + "num_input_tokens_seen": 81082600, + "step": 120295 + }, + { + "epoch": 2.938949014242787, + "grad_norm": 0.23264743387699127, + "learning_rate": 8.684917056155482e-07, + "loss": 0.0513, + "num_input_tokens_seen": 81086568, + "step": 120300 + }, + { + "epoch": 2.939071165074634, + "grad_norm": 0.006453213281929493, + "learning_rate": 8.684071691243191e-07, + "loss": 0.0001, + "num_input_tokens_seen": 81090600, + "step": 120305 + }, + { + "epoch": 2.9391933159064814, + "grad_norm": 0.01573999412357807, + "learning_rate": 8.68322633590068e-07, + "loss": 0.0467, + "num_input_tokens_seen": 81093928, + "step": 120310 + }, + { + "epoch": 2.9393154667383286, + "grad_norm": 0.05469103530049324, + "learning_rate": 8.682380990134106e-07, + "loss": 0.0514, + "num_input_tokens_seen": 81097192, + "step": 120315 + }, + { + "epoch": 2.9394376175701757, + "grad_norm": 274.07611083984375, + "learning_rate": 8.681535653949607e-07, + "loss": 0.0329, + "num_input_tokens_seen": 81100840, + "step": 120320 + }, + { + "epoch": 2.939559768402023, + "grad_norm": 0.011432591825723648, + "learning_rate": 8.680690327353338e-07, + "loss": 0.043, + "num_input_tokens_seen": 81104104, + "step": 120325 + }, + { + "epoch": 2.93968191923387, + "grad_norm": 0.18098084628582, + "learning_rate": 8.679845010351446e-07, + "loss": 0.0461, + "num_input_tokens_seen": 81107368, + "step": 120330 + }, + { + "epoch": 2.9398040700657173, + "grad_norm": 0.02071589045226574, + "learning_rate": 8.678999702950069e-07, + "loss": 0.0743, + "num_input_tokens_seen": 81110504, + "step": 120335 + }, + { + "epoch": 2.939926220897564, + "grad_norm": 0.03295678645372391, + "learning_rate": 8.678154405155369e-07, + "loss": 0.0001, + "num_input_tokens_seen": 81114024, + "step": 120340 + }, + { + "epoch": 2.9400483717294117, + "grad_norm": 0.027914108708500862, + "learning_rate": 8.677309116973481e-07, + "loss": 0.0901, + "num_input_tokens_seen": 81117352, + "step": 120345 + }, + { + "epoch": 2.9401705225612584, + "grad_norm": 0.03139597550034523, + "learning_rate": 8.67646383841056e-07, + "loss": 0.0569, + "num_input_tokens_seen": 81120616, + "step": 120350 + }, + { + "epoch": 2.940292673393106, + "grad_norm": 0.025647137314081192, + "learning_rate": 8.675618569472747e-07, + "loss": 0.0745, + "num_input_tokens_seen": 81123560, + "step": 120355 + }, + { + "epoch": 2.940414824224953, + "grad_norm": 18.67249298095703, + "learning_rate": 8.674773310166191e-07, + "loss": 0.049, + "num_input_tokens_seen": 81127080, + "step": 120360 + }, + { + "epoch": 2.9405369750568, + "grad_norm": 1.2997747659683228, + "learning_rate": 8.673928060497045e-07, + "loss": 0.0007, + "num_input_tokens_seen": 81130920, + "step": 120365 + }, + { + "epoch": 2.940659125888647, + "grad_norm": 0.03595520183444023, + "learning_rate": 8.673082820471447e-07, + "loss": 0.0001, + "num_input_tokens_seen": 81133992, + "step": 120370 + }, + { + "epoch": 2.9407812767204944, + "grad_norm": 0.2902233898639679, + "learning_rate": 8.67223759009555e-07, + "loss": 0.0162, + "num_input_tokens_seen": 81137448, + "step": 120375 + }, + { + "epoch": 2.9409034275523416, + "grad_norm": 0.024199893698096275, + "learning_rate": 8.671392369375498e-07, + "loss": 0.0002, + "num_input_tokens_seen": 81140904, + "step": 120380 + }, + { + "epoch": 2.9410255783841888, + "grad_norm": 0.0339377336204052, + "learning_rate": 8.670547158317434e-07, + "loss": 0.0001, + "num_input_tokens_seen": 81144552, + "step": 120385 + }, + { + "epoch": 2.941147729216036, + "grad_norm": 0.008640998043119907, + "learning_rate": 8.669701956927515e-07, + "loss": 0.0562, + "num_input_tokens_seen": 81148072, + "step": 120390 + }, + { + "epoch": 2.941269880047883, + "grad_norm": 0.28882527351379395, + "learning_rate": 8.668856765211876e-07, + "loss": 0.1314, + "num_input_tokens_seen": 81151208, + "step": 120395 + }, + { + "epoch": 2.9413920308797303, + "grad_norm": 0.2839096784591675, + "learning_rate": 8.668011583176673e-07, + "loss": 0.0001, + "num_input_tokens_seen": 81154792, + "step": 120400 + }, + { + "epoch": 2.9415141817115775, + "grad_norm": 0.021755626425147057, + "learning_rate": 8.667166410828044e-07, + "loss": 0.0786, + "num_input_tokens_seen": 81158056, + "step": 120405 + }, + { + "epoch": 2.9416363325434247, + "grad_norm": 0.060528360307216644, + "learning_rate": 8.666321248172143e-07, + "loss": 0.0003, + "num_input_tokens_seen": 81161768, + "step": 120410 + }, + { + "epoch": 2.941758483375272, + "grad_norm": 0.2548329532146454, + "learning_rate": 8.665476095215109e-07, + "loss": 0.0005, + "num_input_tokens_seen": 81165288, + "step": 120415 + }, + { + "epoch": 2.941880634207119, + "grad_norm": 0.022125931456685066, + "learning_rate": 8.664630951963091e-07, + "loss": 0.0666, + "num_input_tokens_seen": 81168680, + "step": 120420 + }, + { + "epoch": 2.942002785038966, + "grad_norm": 0.15540054440498352, + "learning_rate": 8.66378581842224e-07, + "loss": 0.0008, + "num_input_tokens_seen": 81172584, + "step": 120425 + }, + { + "epoch": 2.9421249358708135, + "grad_norm": 3.678313078125939e-05, + "learning_rate": 8.662940694598697e-07, + "loss": 0.0002, + "num_input_tokens_seen": 81175784, + "step": 120430 + }, + { + "epoch": 2.94224708670266, + "grad_norm": 0.26085910201072693, + "learning_rate": 8.662095580498607e-07, + "loss": 0.0002, + "num_input_tokens_seen": 81179496, + "step": 120435 + }, + { + "epoch": 2.942369237534508, + "grad_norm": 0.03058680146932602, + "learning_rate": 8.66125047612812e-07, + "loss": 0.0003, + "num_input_tokens_seen": 81182888, + "step": 120440 + }, + { + "epoch": 2.9424913883663546, + "grad_norm": 0.19108319282531738, + "learning_rate": 8.660405381493381e-07, + "loss": 0.0444, + "num_input_tokens_seen": 81186472, + "step": 120445 + }, + { + "epoch": 2.942613539198202, + "grad_norm": 0.0032404169905930758, + "learning_rate": 8.65956029660053e-07, + "loss": 0.0001, + "num_input_tokens_seen": 81189928, + "step": 120450 + }, + { + "epoch": 2.942735690030049, + "grad_norm": 0.5902910828590393, + "learning_rate": 8.658715221455717e-07, + "loss": 0.0373, + "num_input_tokens_seen": 81193000, + "step": 120455 + }, + { + "epoch": 2.942857840861896, + "grad_norm": 11.769871711730957, + "learning_rate": 8.657870156065091e-07, + "loss": 0.0634, + "num_input_tokens_seen": 81196136, + "step": 120460 + }, + { + "epoch": 2.9429799916937434, + "grad_norm": 0.14178988337516785, + "learning_rate": 8.657025100434792e-07, + "loss": 0.0003, + "num_input_tokens_seen": 81199144, + "step": 120465 + }, + { + "epoch": 2.9431021425255905, + "grad_norm": 0.004357943311333656, + "learning_rate": 8.65618005457097e-07, + "loss": 0.0535, + "num_input_tokens_seen": 81202728, + "step": 120470 + }, + { + "epoch": 2.9432242933574377, + "grad_norm": 0.005334364715963602, + "learning_rate": 8.655335018479764e-07, + "loss": 0.0379, + "num_input_tokens_seen": 81205864, + "step": 120475 + }, + { + "epoch": 2.943346444189285, + "grad_norm": 0.10710705816745758, + "learning_rate": 8.654489992167326e-07, + "loss": 0.0515, + "num_input_tokens_seen": 81209192, + "step": 120480 + }, + { + "epoch": 2.943468595021132, + "grad_norm": 0.02609104849398136, + "learning_rate": 8.653644975639802e-07, + "loss": 0.0004, + "num_input_tokens_seen": 81212904, + "step": 120485 + }, + { + "epoch": 2.9435907458529793, + "grad_norm": 0.015001550316810608, + "learning_rate": 8.652799968903328e-07, + "loss": 0.0001, + "num_input_tokens_seen": 81216360, + "step": 120490 + }, + { + "epoch": 2.9437128966848265, + "grad_norm": 0.004880082793533802, + "learning_rate": 8.65195497196406e-07, + "loss": 0.0606, + "num_input_tokens_seen": 81218984, + "step": 120495 + }, + { + "epoch": 2.9438350475166737, + "grad_norm": 0.02117271162569523, + "learning_rate": 8.651109984828133e-07, + "loss": 0.0446, + "num_input_tokens_seen": 81222184, + "step": 120500 + }, + { + "epoch": 2.943957198348521, + "grad_norm": 0.14116646349430084, + "learning_rate": 8.650265007501702e-07, + "loss": 0.0004, + "num_input_tokens_seen": 81225192, + "step": 120505 + }, + { + "epoch": 2.944079349180368, + "grad_norm": 0.031078439205884933, + "learning_rate": 8.649420039990904e-07, + "loss": 0.0001, + "num_input_tokens_seen": 81229224, + "step": 120510 + }, + { + "epoch": 2.9442015000122153, + "grad_norm": 0.1605730801820755, + "learning_rate": 8.648575082301884e-07, + "loss": 0.0379, + "num_input_tokens_seen": 81232552, + "step": 120515 + }, + { + "epoch": 2.944323650844062, + "grad_norm": 0.08591590076684952, + "learning_rate": 8.647730134440796e-07, + "loss": 0.0001, + "num_input_tokens_seen": 81236264, + "step": 120520 + }, + { + "epoch": 2.9444458016759096, + "grad_norm": 2.821213960647583, + "learning_rate": 8.646885196413772e-07, + "loss": 0.0005, + "num_input_tokens_seen": 81239720, + "step": 120525 + }, + { + "epoch": 2.9445679525077564, + "grad_norm": 0.0029579047113656998, + "learning_rate": 8.646040268226969e-07, + "loss": 0.0001, + "num_input_tokens_seen": 81244200, + "step": 120530 + }, + { + "epoch": 2.944690103339604, + "grad_norm": 0.012535029090940952, + "learning_rate": 8.645195349886522e-07, + "loss": 0.0627, + "num_input_tokens_seen": 81247336, + "step": 120535 + }, + { + "epoch": 2.9448122541714508, + "grad_norm": 0.010929737240076065, + "learning_rate": 8.644350441398575e-07, + "loss": 0.0279, + "num_input_tokens_seen": 81250792, + "step": 120540 + }, + { + "epoch": 2.944934405003298, + "grad_norm": 0.29619693756103516, + "learning_rate": 8.643505542769283e-07, + "loss": 0.0466, + "num_input_tokens_seen": 81253992, + "step": 120545 + }, + { + "epoch": 2.945056555835145, + "grad_norm": 0.08591552823781967, + "learning_rate": 8.642660654004779e-07, + "loss": 0.0002, + "num_input_tokens_seen": 81257448, + "step": 120550 + }, + { + "epoch": 2.9451787066669923, + "grad_norm": 0.09558862447738647, + "learning_rate": 8.641815775111216e-07, + "loss": 0.0001, + "num_input_tokens_seen": 81260584, + "step": 120555 + }, + { + "epoch": 2.9453008574988395, + "grad_norm": 27.3420352935791, + "learning_rate": 8.640970906094729e-07, + "loss": 0.0765, + "num_input_tokens_seen": 81263848, + "step": 120560 + }, + { + "epoch": 2.9454230083306867, + "grad_norm": 0.0957900658249855, + "learning_rate": 8.640126046961473e-07, + "loss": 0.0003, + "num_input_tokens_seen": 81267688, + "step": 120565 + }, + { + "epoch": 2.945545159162534, + "grad_norm": 0.054024793207645416, + "learning_rate": 8.639281197717579e-07, + "loss": 0.0001, + "num_input_tokens_seen": 81271016, + "step": 120570 + }, + { + "epoch": 2.945667309994381, + "grad_norm": 0.016774863004684448, + "learning_rate": 8.6384363583692e-07, + "loss": 0.0001, + "num_input_tokens_seen": 81274408, + "step": 120575 + }, + { + "epoch": 2.9457894608262283, + "grad_norm": 0.029413677752017975, + "learning_rate": 8.637591528922482e-07, + "loss": 0.0, + "num_input_tokens_seen": 81277544, + "step": 120580 + }, + { + "epoch": 2.9459116116580755, + "grad_norm": 0.0019817219581454992, + "learning_rate": 8.636746709383563e-07, + "loss": 0.0515, + "num_input_tokens_seen": 81281000, + "step": 120585 + }, + { + "epoch": 2.9460337624899227, + "grad_norm": 266.28082275390625, + "learning_rate": 8.635901899758589e-07, + "loss": 0.0343, + "num_input_tokens_seen": 81284264, + "step": 120590 + }, + { + "epoch": 2.94615591332177, + "grad_norm": 0.043926727026700974, + "learning_rate": 8.635057100053702e-07, + "loss": 0.0548, + "num_input_tokens_seen": 81287528, + "step": 120595 + }, + { + "epoch": 2.946278064153617, + "grad_norm": 0.010100879706442356, + "learning_rate": 8.63421231027505e-07, + "loss": 0.0001, + "num_input_tokens_seen": 81290792, + "step": 120600 + }, + { + "epoch": 2.946400214985464, + "grad_norm": 0.10049595683813095, + "learning_rate": 8.633367530428769e-07, + "loss": 0.0002, + "num_input_tokens_seen": 81294056, + "step": 120605 + }, + { + "epoch": 2.9465223658173114, + "grad_norm": 0.026556089520454407, + "learning_rate": 8.632522760521006e-07, + "loss": 0.0001, + "num_input_tokens_seen": 81297704, + "step": 120610 + }, + { + "epoch": 2.946644516649158, + "grad_norm": 0.14986656606197357, + "learning_rate": 8.63167800055791e-07, + "loss": 0.0008, + "num_input_tokens_seen": 81300904, + "step": 120615 + }, + { + "epoch": 2.946766667481006, + "grad_norm": 0.001587200560607016, + "learning_rate": 8.630833250545616e-07, + "loss": 0.059, + "num_input_tokens_seen": 81304360, + "step": 120620 + }, + { + "epoch": 2.9468888183128525, + "grad_norm": 0.03377887234091759, + "learning_rate": 8.629988510490274e-07, + "loss": 0.0009, + "num_input_tokens_seen": 81307880, + "step": 120625 + }, + { + "epoch": 2.9470109691446997, + "grad_norm": 0.13490785658359528, + "learning_rate": 8.629143780398022e-07, + "loss": 0.0728, + "num_input_tokens_seen": 81311144, + "step": 120630 + }, + { + "epoch": 2.947133119976547, + "grad_norm": 0.0219797994941473, + "learning_rate": 8.628299060275006e-07, + "loss": 0.0006, + "num_input_tokens_seen": 81314408, + "step": 120635 + }, + { + "epoch": 2.947255270808394, + "grad_norm": 0.031006071716547012, + "learning_rate": 8.62745435012737e-07, + "loss": 0.0001, + "num_input_tokens_seen": 81317736, + "step": 120640 + }, + { + "epoch": 2.9473774216402413, + "grad_norm": 0.01624114066362381, + "learning_rate": 8.62660964996125e-07, + "loss": 0.0788, + "num_input_tokens_seen": 81321064, + "step": 120645 + }, + { + "epoch": 2.9474995724720885, + "grad_norm": 0.4532836377620697, + "learning_rate": 8.625764959782799e-07, + "loss": 0.0423, + "num_input_tokens_seen": 81324200, + "step": 120650 + }, + { + "epoch": 2.9476217233039357, + "grad_norm": 26.661325454711914, + "learning_rate": 8.624920279598152e-07, + "loss": 0.1261, + "num_input_tokens_seen": 81327784, + "step": 120655 + }, + { + "epoch": 2.947743874135783, + "grad_norm": 0.06300724297761917, + "learning_rate": 8.624075609413457e-07, + "loss": 0.0719, + "num_input_tokens_seen": 81330984, + "step": 120660 + }, + { + "epoch": 2.94786602496763, + "grad_norm": 0.014966240152716637, + "learning_rate": 8.623230949234851e-07, + "loss": 0.0002, + "num_input_tokens_seen": 81334184, + "step": 120665 + }, + { + "epoch": 2.9479881757994773, + "grad_norm": 24.074207305908203, + "learning_rate": 8.62238629906848e-07, + "loss": 0.1365, + "num_input_tokens_seen": 81337256, + "step": 120670 + }, + { + "epoch": 2.9481103266313244, + "grad_norm": 0.0069862534292042255, + "learning_rate": 8.62154165892049e-07, + "loss": 0.0002, + "num_input_tokens_seen": 81340456, + "step": 120675 + }, + { + "epoch": 2.9482324774631716, + "grad_norm": 0.008710438385605812, + "learning_rate": 8.620697028797016e-07, + "loss": 0.0003, + "num_input_tokens_seen": 81343720, + "step": 120680 + }, + { + "epoch": 2.948354628295019, + "grad_norm": 0.13439220190048218, + "learning_rate": 8.619852408704208e-07, + "loss": 0.0586, + "num_input_tokens_seen": 81348008, + "step": 120685 + }, + { + "epoch": 2.948476779126866, + "grad_norm": 0.017029277980327606, + "learning_rate": 8.619007798648202e-07, + "loss": 0.0004, + "num_input_tokens_seen": 81351592, + "step": 120690 + }, + { + "epoch": 2.948598929958713, + "grad_norm": 0.018834611400961876, + "learning_rate": 8.618163198635142e-07, + "loss": 0.0006, + "num_input_tokens_seen": 81355048, + "step": 120695 + }, + { + "epoch": 2.94872108079056, + "grad_norm": 0.03875388205051422, + "learning_rate": 8.617318608671174e-07, + "loss": 0.0334, + "num_input_tokens_seen": 81358440, + "step": 120700 + }, + { + "epoch": 2.9488432316224076, + "grad_norm": 14.059005737304688, + "learning_rate": 8.616474028762432e-07, + "loss": 0.0654, + "num_input_tokens_seen": 81361704, + "step": 120705 + }, + { + "epoch": 2.9489653824542543, + "grad_norm": 0.020959537476301193, + "learning_rate": 8.615629458915069e-07, + "loss": 0.0003, + "num_input_tokens_seen": 81365224, + "step": 120710 + }, + { + "epoch": 2.9490875332861015, + "grad_norm": 0.1255888193845749, + "learning_rate": 8.614784899135216e-07, + "loss": 0.0775, + "num_input_tokens_seen": 81368680, + "step": 120715 + }, + { + "epoch": 2.9492096841179487, + "grad_norm": 0.014902031049132347, + "learning_rate": 8.613940349429024e-07, + "loss": 0.0002, + "num_input_tokens_seen": 81371560, + "step": 120720 + }, + { + "epoch": 2.949331834949796, + "grad_norm": 0.1372278332710266, + "learning_rate": 8.613095809802626e-07, + "loss": 0.0003, + "num_input_tokens_seen": 81375016, + "step": 120725 + }, + { + "epoch": 2.949453985781643, + "grad_norm": 0.001658071530982852, + "learning_rate": 8.612251280262168e-07, + "loss": 0.0294, + "num_input_tokens_seen": 81378664, + "step": 120730 + }, + { + "epoch": 2.9495761366134903, + "grad_norm": 0.059731800109148026, + "learning_rate": 8.611406760813797e-07, + "loss": 0.1169, + "num_input_tokens_seen": 81381544, + "step": 120735 + }, + { + "epoch": 2.9496982874453375, + "grad_norm": 0.007839690893888474, + "learning_rate": 8.610562251463648e-07, + "loss": 0.0003, + "num_input_tokens_seen": 81384936, + "step": 120740 + }, + { + "epoch": 2.9498204382771847, + "grad_norm": 22.30178451538086, + "learning_rate": 8.609717752217864e-07, + "loss": 0.1131, + "num_input_tokens_seen": 81388392, + "step": 120745 + }, + { + "epoch": 2.949942589109032, + "grad_norm": 0.015643972903490067, + "learning_rate": 8.608873263082584e-07, + "loss": 0.058, + "num_input_tokens_seen": 81391656, + "step": 120750 + }, + { + "epoch": 2.950064739940879, + "grad_norm": 0.017678532749414444, + "learning_rate": 8.608028784063957e-07, + "loss": 0.0001, + "num_input_tokens_seen": 81394792, + "step": 120755 + }, + { + "epoch": 2.9501868907727262, + "grad_norm": 9.996286392211914, + "learning_rate": 8.607184315168112e-07, + "loss": 0.1291, + "num_input_tokens_seen": 81397928, + "step": 120760 + }, + { + "epoch": 2.9503090416045734, + "grad_norm": 9.89952278137207, + "learning_rate": 8.6063398564012e-07, + "loss": 0.0884, + "num_input_tokens_seen": 81401512, + "step": 120765 + }, + { + "epoch": 2.9504311924364206, + "grad_norm": 0.048212021589279175, + "learning_rate": 8.605495407769362e-07, + "loss": 0.0323, + "num_input_tokens_seen": 81404520, + "step": 120770 + }, + { + "epoch": 2.950553343268268, + "grad_norm": 0.2680891752243042, + "learning_rate": 8.604650969278733e-07, + "loss": 0.0005, + "num_input_tokens_seen": 81408104, + "step": 120775 + }, + { + "epoch": 2.950675494100115, + "grad_norm": 0.014988810755312443, + "learning_rate": 8.603806540935461e-07, + "loss": 0.0481, + "num_input_tokens_seen": 81411880, + "step": 120780 + }, + { + "epoch": 2.9507976449319617, + "grad_norm": 0.09640608727931976, + "learning_rate": 8.602962122745679e-07, + "loss": 0.0005, + "num_input_tokens_seen": 81415528, + "step": 120785 + }, + { + "epoch": 2.9509197957638094, + "grad_norm": 0.10828623175621033, + "learning_rate": 8.602117714715536e-07, + "loss": 0.0011, + "num_input_tokens_seen": 81418984, + "step": 120790 + }, + { + "epoch": 2.951041946595656, + "grad_norm": 0.13522803783416748, + "learning_rate": 8.601273316851168e-07, + "loss": 0.0006, + "num_input_tokens_seen": 81422248, + "step": 120795 + }, + { + "epoch": 2.9511640974275037, + "grad_norm": 0.036873117089271545, + "learning_rate": 8.600428929158715e-07, + "loss": 0.0004, + "num_input_tokens_seen": 81425320, + "step": 120800 + }, + { + "epoch": 2.9512862482593505, + "grad_norm": 0.15291425585746765, + "learning_rate": 8.599584551644324e-07, + "loss": 0.1369, + "num_input_tokens_seen": 81428712, + "step": 120805 + }, + { + "epoch": 2.9514083990911977, + "grad_norm": 0.17997407913208008, + "learning_rate": 8.598740184314124e-07, + "loss": 0.0005, + "num_input_tokens_seen": 81431784, + "step": 120810 + }, + { + "epoch": 2.951530549923045, + "grad_norm": 0.15903061628341675, + "learning_rate": 8.597895827174269e-07, + "loss": 0.0376, + "num_input_tokens_seen": 81435176, + "step": 120815 + }, + { + "epoch": 2.951652700754892, + "grad_norm": 0.06580569595098495, + "learning_rate": 8.597051480230886e-07, + "loss": 0.0005, + "num_input_tokens_seen": 81438760, + "step": 120820 + }, + { + "epoch": 2.9517748515867392, + "grad_norm": 0.052439238876104355, + "learning_rate": 8.596207143490123e-07, + "loss": 0.0537, + "num_input_tokens_seen": 81441960, + "step": 120825 + }, + { + "epoch": 2.9518970024185864, + "grad_norm": 0.025906480848789215, + "learning_rate": 8.595362816958124e-07, + "loss": 0.001, + "num_input_tokens_seen": 81445288, + "step": 120830 + }, + { + "epoch": 2.9520191532504336, + "grad_norm": 0.01771625503897667, + "learning_rate": 8.594518500641019e-07, + "loss": 0.0002, + "num_input_tokens_seen": 81448424, + "step": 120835 + }, + { + "epoch": 2.952141304082281, + "grad_norm": 0.2788228392601013, + "learning_rate": 8.59367419454496e-07, + "loss": 0.0427, + "num_input_tokens_seen": 81451368, + "step": 120840 + }, + { + "epoch": 2.952263454914128, + "grad_norm": 0.26164335012435913, + "learning_rate": 8.592829898676076e-07, + "loss": 0.0004, + "num_input_tokens_seen": 81454760, + "step": 120845 + }, + { + "epoch": 2.952385605745975, + "grad_norm": 12.802289962768555, + "learning_rate": 8.591985613040511e-07, + "loss": 0.1223, + "num_input_tokens_seen": 81457960, + "step": 120850 + }, + { + "epoch": 2.9525077565778224, + "grad_norm": 0.7145296931266785, + "learning_rate": 8.591141337644409e-07, + "loss": 0.0372, + "num_input_tokens_seen": 81461352, + "step": 120855 + }, + { + "epoch": 2.9526299074096696, + "grad_norm": 0.05967799574136734, + "learning_rate": 8.590297072493901e-07, + "loss": 0.0711, + "num_input_tokens_seen": 81464552, + "step": 120860 + }, + { + "epoch": 2.9527520582415168, + "grad_norm": 0.08640944957733154, + "learning_rate": 8.589452817595138e-07, + "loss": 0.0007, + "num_input_tokens_seen": 81467752, + "step": 120865 + }, + { + "epoch": 2.952874209073364, + "grad_norm": 0.059327781200408936, + "learning_rate": 8.588608572954248e-07, + "loss": 0.0001, + "num_input_tokens_seen": 81471208, + "step": 120870 + }, + { + "epoch": 2.952996359905211, + "grad_norm": 8.785379031905904e-05, + "learning_rate": 8.587764338577381e-07, + "loss": 0.0384, + "num_input_tokens_seen": 81474664, + "step": 120875 + }, + { + "epoch": 2.953118510737058, + "grad_norm": 0.17147451639175415, + "learning_rate": 8.586920114470666e-07, + "loss": 0.0005, + "num_input_tokens_seen": 81478696, + "step": 120880 + }, + { + "epoch": 2.9532406615689055, + "grad_norm": 0.017553070560097694, + "learning_rate": 8.586075900640248e-07, + "loss": 0.117, + "num_input_tokens_seen": 81481640, + "step": 120885 + }, + { + "epoch": 2.9533628124007523, + "grad_norm": 0.023216333240270615, + "learning_rate": 8.585231697092272e-07, + "loss": 0.0002, + "num_input_tokens_seen": 81485416, + "step": 120890 + }, + { + "epoch": 2.9534849632325995, + "grad_norm": 65.75865936279297, + "learning_rate": 8.584387503832868e-07, + "loss": 0.0558, + "num_input_tokens_seen": 81488616, + "step": 120895 + }, + { + "epoch": 2.9536071140644466, + "grad_norm": 0.9762405753135681, + "learning_rate": 8.583543320868181e-07, + "loss": 0.0993, + "num_input_tokens_seen": 81492008, + "step": 120900 + }, + { + "epoch": 2.953729264896294, + "grad_norm": 0.018305018544197083, + "learning_rate": 8.582699148204347e-07, + "loss": 0.0326, + "num_input_tokens_seen": 81495336, + "step": 120905 + }, + { + "epoch": 2.953851415728141, + "grad_norm": 0.005096158478409052, + "learning_rate": 8.581854985847508e-07, + "loss": 0.0003, + "num_input_tokens_seen": 81498536, + "step": 120910 + }, + { + "epoch": 2.953973566559988, + "grad_norm": 0.06799504905939102, + "learning_rate": 8.581010833803795e-07, + "loss": 0.0576, + "num_input_tokens_seen": 81502184, + "step": 120915 + }, + { + "epoch": 2.9540957173918354, + "grad_norm": 0.09852556139230728, + "learning_rate": 8.580166692079355e-07, + "loss": 0.0441, + "num_input_tokens_seen": 81505384, + "step": 120920 + }, + { + "epoch": 2.9542178682236826, + "grad_norm": 69.60293579101562, + "learning_rate": 8.579322560680329e-07, + "loss": 0.0665, + "num_input_tokens_seen": 81508456, + "step": 120925 + }, + { + "epoch": 2.95434001905553, + "grad_norm": 0.3892046809196472, + "learning_rate": 8.578478439612846e-07, + "loss": 0.0293, + "num_input_tokens_seen": 81511464, + "step": 120930 + }, + { + "epoch": 2.954462169887377, + "grad_norm": 0.019820692017674446, + "learning_rate": 8.577634328883055e-07, + "loss": 0.0624, + "num_input_tokens_seen": 81514792, + "step": 120935 + }, + { + "epoch": 2.954584320719224, + "grad_norm": 0.05053357407450676, + "learning_rate": 8.576790228497085e-07, + "loss": 0.0006, + "num_input_tokens_seen": 81518440, + "step": 120940 + }, + { + "epoch": 2.9547064715510714, + "grad_norm": 0.2523508667945862, + "learning_rate": 8.575946138461082e-07, + "loss": 0.0003, + "num_input_tokens_seen": 81522408, + "step": 120945 + }, + { + "epoch": 2.9548286223829185, + "grad_norm": 0.08181318640708923, + "learning_rate": 8.575102058781181e-07, + "loss": 0.0007, + "num_input_tokens_seen": 81525800, + "step": 120950 + }, + { + "epoch": 2.9549507732147657, + "grad_norm": 32.51930618286133, + "learning_rate": 8.574257989463522e-07, + "loss": 0.0014, + "num_input_tokens_seen": 81529064, + "step": 120955 + }, + { + "epoch": 2.955072924046613, + "grad_norm": 0.07995011657476425, + "learning_rate": 8.573413930514244e-07, + "loss": 0.0004, + "num_input_tokens_seen": 81532584, + "step": 120960 + }, + { + "epoch": 2.9551950748784597, + "grad_norm": 0.014723896980285645, + "learning_rate": 8.57256988193948e-07, + "loss": 0.0609, + "num_input_tokens_seen": 81535912, + "step": 120965 + }, + { + "epoch": 2.9553172257103073, + "grad_norm": 42.25886917114258, + "learning_rate": 8.571725843745374e-07, + "loss": 0.1942, + "num_input_tokens_seen": 81539304, + "step": 120970 + }, + { + "epoch": 2.955439376542154, + "grad_norm": 0.14818468689918518, + "learning_rate": 8.57088181593806e-07, + "loss": 0.0004, + "num_input_tokens_seen": 81542632, + "step": 120975 + }, + { + "epoch": 2.9555615273740017, + "grad_norm": 0.336948424577713, + "learning_rate": 8.570037798523677e-07, + "loss": 0.0085, + "num_input_tokens_seen": 81545832, + "step": 120980 + }, + { + "epoch": 2.9556836782058484, + "grad_norm": 0.010888462886214256, + "learning_rate": 8.569193791508368e-07, + "loss": 0.0031, + "num_input_tokens_seen": 81548840, + "step": 120985 + }, + { + "epoch": 2.9558058290376956, + "grad_norm": 0.026835812255740166, + "learning_rate": 8.568349794898262e-07, + "loss": 0.0003, + "num_input_tokens_seen": 81552040, + "step": 120990 + }, + { + "epoch": 2.955927979869543, + "grad_norm": 0.006106141954660416, + "learning_rate": 8.567505808699506e-07, + "loss": 0.0001, + "num_input_tokens_seen": 81555240, + "step": 120995 + }, + { + "epoch": 2.95605013070139, + "grad_norm": 0.024814531207084656, + "learning_rate": 8.566661832918231e-07, + "loss": 0.0002, + "num_input_tokens_seen": 81558760, + "step": 121000 + }, + { + "epoch": 2.956172281533237, + "grad_norm": 0.035326164215803146, + "learning_rate": 8.565817867560576e-07, + "loss": 0.0002, + "num_input_tokens_seen": 81561640, + "step": 121005 + }, + { + "epoch": 2.9562944323650844, + "grad_norm": 49.551002502441406, + "learning_rate": 8.564973912632679e-07, + "loss": 0.0845, + "num_input_tokens_seen": 81564776, + "step": 121010 + }, + { + "epoch": 2.9564165831969316, + "grad_norm": 0.08276200294494629, + "learning_rate": 8.564129968140677e-07, + "loss": 0.0002, + "num_input_tokens_seen": 81567848, + "step": 121015 + }, + { + "epoch": 2.9565387340287788, + "grad_norm": 0.03254903480410576, + "learning_rate": 8.563286034090711e-07, + "loss": 0.0001, + "num_input_tokens_seen": 81571368, + "step": 121020 + }, + { + "epoch": 2.956660884860626, + "grad_norm": 0.025688692927360535, + "learning_rate": 8.562442110488911e-07, + "loss": 0.049, + "num_input_tokens_seen": 81574824, + "step": 121025 + }, + { + "epoch": 2.956783035692473, + "grad_norm": 0.43042516708374023, + "learning_rate": 8.561598197341424e-07, + "loss": 0.0005, + "num_input_tokens_seen": 81578024, + "step": 121030 + }, + { + "epoch": 2.9569051865243203, + "grad_norm": 0.02726716548204422, + "learning_rate": 8.560754294654377e-07, + "loss": 0.048, + "num_input_tokens_seen": 81582120, + "step": 121035 + }, + { + "epoch": 2.9570273373561675, + "grad_norm": 0.0018384922295808792, + "learning_rate": 8.559910402433912e-07, + "loss": 0.0003, + "num_input_tokens_seen": 81585640, + "step": 121040 + }, + { + "epoch": 2.9571494881880147, + "grad_norm": 15.961065292358398, + "learning_rate": 8.55906652068617e-07, + "loss": 0.0613, + "num_input_tokens_seen": 81588520, + "step": 121045 + }, + { + "epoch": 2.9572716390198615, + "grad_norm": 0.10140183568000793, + "learning_rate": 8.558222649417282e-07, + "loss": 0.0588, + "num_input_tokens_seen": 81592296, + "step": 121050 + }, + { + "epoch": 2.957393789851709, + "grad_norm": 0.019582247361540794, + "learning_rate": 8.557378788633386e-07, + "loss": 0.0004, + "num_input_tokens_seen": 81595688, + "step": 121055 + }, + { + "epoch": 2.957515940683556, + "grad_norm": 236.25596618652344, + "learning_rate": 8.55653493834062e-07, + "loss": 0.0867, + "num_input_tokens_seen": 81599208, + "step": 121060 + }, + { + "epoch": 2.9576380915154035, + "grad_norm": 0.0025045794900506735, + "learning_rate": 8.555691098545122e-07, + "loss": 0.0317, + "num_input_tokens_seen": 81602984, + "step": 121065 + }, + { + "epoch": 2.95776024234725, + "grad_norm": 0.029240965843200684, + "learning_rate": 8.554847269253023e-07, + "loss": 0.0933, + "num_input_tokens_seen": 81606312, + "step": 121070 + }, + { + "epoch": 2.9578823931790974, + "grad_norm": 55.595611572265625, + "learning_rate": 8.554003450470463e-07, + "loss": 0.0954, + "num_input_tokens_seen": 81609832, + "step": 121075 + }, + { + "epoch": 2.9580045440109446, + "grad_norm": 0.018441449850797653, + "learning_rate": 8.553159642203584e-07, + "loss": 0.0003, + "num_input_tokens_seen": 81613096, + "step": 121080 + }, + { + "epoch": 2.958126694842792, + "grad_norm": 0.039422810077667236, + "learning_rate": 8.552315844458511e-07, + "loss": 0.049, + "num_input_tokens_seen": 81616552, + "step": 121085 + }, + { + "epoch": 2.958248845674639, + "grad_norm": 0.2605257034301758, + "learning_rate": 8.551472057241393e-07, + "loss": 0.0008, + "num_input_tokens_seen": 81619560, + "step": 121090 + }, + { + "epoch": 2.958370996506486, + "grad_norm": 0.006907718721777201, + "learning_rate": 8.550628280558354e-07, + "loss": 0.0002, + "num_input_tokens_seen": 81622824, + "step": 121095 + }, + { + "epoch": 2.9584931473383334, + "grad_norm": 0.018841054290533066, + "learning_rate": 8.549784514415539e-07, + "loss": 0.0003, + "num_input_tokens_seen": 81625896, + "step": 121100 + }, + { + "epoch": 2.9586152981701805, + "grad_norm": 0.058261509984731674, + "learning_rate": 8.548940758819081e-07, + "loss": 0.0002, + "num_input_tokens_seen": 81629224, + "step": 121105 + }, + { + "epoch": 2.9587374490020277, + "grad_norm": 18.50106430053711, + "learning_rate": 8.548097013775116e-07, + "loss": 0.0641, + "num_input_tokens_seen": 81632680, + "step": 121110 + }, + { + "epoch": 2.958859599833875, + "grad_norm": 0.10784649848937988, + "learning_rate": 8.547253279289781e-07, + "loss": 0.0467, + "num_input_tokens_seen": 81635880, + "step": 121115 + }, + { + "epoch": 2.958981750665722, + "grad_norm": 0.014018794521689415, + "learning_rate": 8.546409555369207e-07, + "loss": 0.0001, + "num_input_tokens_seen": 81638888, + "step": 121120 + }, + { + "epoch": 2.9591039014975693, + "grad_norm": 0.02228376641869545, + "learning_rate": 8.545565842019539e-07, + "loss": 0.0001, + "num_input_tokens_seen": 81643944, + "step": 121125 + }, + { + "epoch": 2.9592260523294165, + "grad_norm": 39.539794921875, + "learning_rate": 8.544722139246902e-07, + "loss": 0.1017, + "num_input_tokens_seen": 81647016, + "step": 121130 + }, + { + "epoch": 2.9593482031612637, + "grad_norm": 0.007597757503390312, + "learning_rate": 8.543878447057439e-07, + "loss": 0.0002, + "num_input_tokens_seen": 81650792, + "step": 121135 + }, + { + "epoch": 2.959470353993111, + "grad_norm": 0.02928669936954975, + "learning_rate": 8.543034765457286e-07, + "loss": 0.0007, + "num_input_tokens_seen": 81654184, + "step": 121140 + }, + { + "epoch": 2.9595925048249576, + "grad_norm": 0.054370008409023285, + "learning_rate": 8.542191094452574e-07, + "loss": 0.0001, + "num_input_tokens_seen": 81657384, + "step": 121145 + }, + { + "epoch": 2.9597146556568052, + "grad_norm": 0.011456976644694805, + "learning_rate": 8.541347434049442e-07, + "loss": 0.0003, + "num_input_tokens_seen": 81661032, + "step": 121150 + }, + { + "epoch": 2.959836806488652, + "grad_norm": 0.005163121968507767, + "learning_rate": 8.540503784254023e-07, + "loss": 0.0001, + "num_input_tokens_seen": 81664488, + "step": 121155 + }, + { + "epoch": 2.9599589573204996, + "grad_norm": 0.3065810203552246, + "learning_rate": 8.539660145072452e-07, + "loss": 0.0002, + "num_input_tokens_seen": 81667688, + "step": 121160 + }, + { + "epoch": 2.9600811081523464, + "grad_norm": 0.016518019139766693, + "learning_rate": 8.538816516510866e-07, + "loss": 0.0002, + "num_input_tokens_seen": 81671016, + "step": 121165 + }, + { + "epoch": 2.9602032589841936, + "grad_norm": 54.666160583496094, + "learning_rate": 8.537972898575398e-07, + "loss": 0.0225, + "num_input_tokens_seen": 81674728, + "step": 121170 + }, + { + "epoch": 2.9603254098160408, + "grad_norm": 0.020525958389043808, + "learning_rate": 8.537129291272187e-07, + "loss": 0.073, + "num_input_tokens_seen": 81678312, + "step": 121175 + }, + { + "epoch": 2.960447560647888, + "grad_norm": 0.07339372485876083, + "learning_rate": 8.536285694607361e-07, + "loss": 0.0501, + "num_input_tokens_seen": 81681448, + "step": 121180 + }, + { + "epoch": 2.960569711479735, + "grad_norm": 0.045110974460840225, + "learning_rate": 8.535442108587066e-07, + "loss": 0.1142, + "num_input_tokens_seen": 81685416, + "step": 121185 + }, + { + "epoch": 2.9606918623115823, + "grad_norm": 0.035503778606653214, + "learning_rate": 8.534598533217423e-07, + "loss": 0.0001, + "num_input_tokens_seen": 81688872, + "step": 121190 + }, + { + "epoch": 2.9608140131434295, + "grad_norm": 0.04830501601099968, + "learning_rate": 8.533754968504574e-07, + "loss": 0.0535, + "num_input_tokens_seen": 81692008, + "step": 121195 + }, + { + "epoch": 2.9609361639752767, + "grad_norm": 0.006708557717502117, + "learning_rate": 8.532911414454657e-07, + "loss": 0.0858, + "num_input_tokens_seen": 81695272, + "step": 121200 + }, + { + "epoch": 2.961058314807124, + "grad_norm": 0.021845147013664246, + "learning_rate": 8.532067871073803e-07, + "loss": 0.0493, + "num_input_tokens_seen": 81698536, + "step": 121205 + }, + { + "epoch": 2.961180465638971, + "grad_norm": 15.514656066894531, + "learning_rate": 8.531224338368144e-07, + "loss": 0.1358, + "num_input_tokens_seen": 81701736, + "step": 121210 + }, + { + "epoch": 2.9613026164708183, + "grad_norm": 0.1647643893957138, + "learning_rate": 8.530380816343818e-07, + "loss": 0.0583, + "num_input_tokens_seen": 81705128, + "step": 121215 + }, + { + "epoch": 2.9614247673026655, + "grad_norm": 0.048709526658058167, + "learning_rate": 8.52953730500696e-07, + "loss": 0.1345, + "num_input_tokens_seen": 81708200, + "step": 121220 + }, + { + "epoch": 2.9615469181345127, + "grad_norm": 0.07313637435436249, + "learning_rate": 8.528693804363697e-07, + "loss": 0.0674, + "num_input_tokens_seen": 81711784, + "step": 121225 + }, + { + "epoch": 2.9616690689663594, + "grad_norm": 0.0985637828707695, + "learning_rate": 8.527850314420169e-07, + "loss": 0.0849, + "num_input_tokens_seen": 81714600, + "step": 121230 + }, + { + "epoch": 2.961791219798207, + "grad_norm": 0.11501231044530869, + "learning_rate": 8.527006835182514e-07, + "loss": 0.0005, + "num_input_tokens_seen": 81717736, + "step": 121235 + }, + { + "epoch": 2.9619133706300538, + "grad_norm": 0.049504294991493225, + "learning_rate": 8.526163366656857e-07, + "loss": 0.0429, + "num_input_tokens_seen": 81721128, + "step": 121240 + }, + { + "epoch": 2.9620355214619014, + "grad_norm": 0.17213276028633118, + "learning_rate": 8.52531990884934e-07, + "loss": 0.0115, + "num_input_tokens_seen": 81724200, + "step": 121245 + }, + { + "epoch": 2.962157672293748, + "grad_norm": 0.20793768763542175, + "learning_rate": 8.52447646176609e-07, + "loss": 0.0013, + "num_input_tokens_seen": 81727464, + "step": 121250 + }, + { + "epoch": 2.9622798231255953, + "grad_norm": 0.01568690501153469, + "learning_rate": 8.523633025413246e-07, + "loss": 0.0742, + "num_input_tokens_seen": 81730536, + "step": 121255 + }, + { + "epoch": 2.9624019739574425, + "grad_norm": 0.1056603267788887, + "learning_rate": 8.522789599796939e-07, + "loss": 0.0003, + "num_input_tokens_seen": 81733544, + "step": 121260 + }, + { + "epoch": 2.9625241247892897, + "grad_norm": 0.031303521245718, + "learning_rate": 8.521946184923304e-07, + "loss": 0.1397, + "num_input_tokens_seen": 81736744, + "step": 121265 + }, + { + "epoch": 2.962646275621137, + "grad_norm": 0.49149009585380554, + "learning_rate": 8.521102780798475e-07, + "loss": 0.0798, + "num_input_tokens_seen": 81740136, + "step": 121270 + }, + { + "epoch": 2.962768426452984, + "grad_norm": 0.1194659098982811, + "learning_rate": 8.520259387428582e-07, + "loss": 0.0639, + "num_input_tokens_seen": 81743336, + "step": 121275 + }, + { + "epoch": 2.9628905772848313, + "grad_norm": 0.09376633167266846, + "learning_rate": 8.519416004819764e-07, + "loss": 0.0005, + "num_input_tokens_seen": 81746344, + "step": 121280 + }, + { + "epoch": 2.9630127281166785, + "grad_norm": 0.6304414868354797, + "learning_rate": 8.518572632978147e-07, + "loss": 0.0007, + "num_input_tokens_seen": 81749608, + "step": 121285 + }, + { + "epoch": 2.9631348789485257, + "grad_norm": 0.13901102542877197, + "learning_rate": 8.517729271909869e-07, + "loss": 0.1305, + "num_input_tokens_seen": 81752872, + "step": 121290 + }, + { + "epoch": 2.963257029780373, + "grad_norm": 0.006146088242530823, + "learning_rate": 8.516885921621064e-07, + "loss": 0.0004, + "num_input_tokens_seen": 81755944, + "step": 121295 + }, + { + "epoch": 2.96337918061222, + "grad_norm": 0.11365926265716553, + "learning_rate": 8.516042582117862e-07, + "loss": 0.0011, + "num_input_tokens_seen": 81759720, + "step": 121300 + }, + { + "epoch": 2.9635013314440672, + "grad_norm": 0.007527015637606382, + "learning_rate": 8.5151992534064e-07, + "loss": 0.0005, + "num_input_tokens_seen": 81763368, + "step": 121305 + }, + { + "epoch": 2.9636234822759144, + "grad_norm": 0.020057743415236473, + "learning_rate": 8.514355935492806e-07, + "loss": 0.1187, + "num_input_tokens_seen": 81766824, + "step": 121310 + }, + { + "epoch": 2.9637456331077616, + "grad_norm": 0.029346054419875145, + "learning_rate": 8.513512628383217e-07, + "loss": 0.0003, + "num_input_tokens_seen": 81769896, + "step": 121315 + }, + { + "epoch": 2.963867783939609, + "grad_norm": 0.07407413423061371, + "learning_rate": 8.512669332083763e-07, + "loss": 0.0732, + "num_input_tokens_seen": 81773608, + "step": 121320 + }, + { + "epoch": 2.9639899347714556, + "grad_norm": 0.2536911368370056, + "learning_rate": 8.511826046600575e-07, + "loss": 0.0336, + "num_input_tokens_seen": 81777000, + "step": 121325 + }, + { + "epoch": 2.964112085603303, + "grad_norm": 0.06144652143120766, + "learning_rate": 8.510982771939794e-07, + "loss": 0.035, + "num_input_tokens_seen": 81780712, + "step": 121330 + }, + { + "epoch": 2.96423423643515, + "grad_norm": 0.08548964560031891, + "learning_rate": 8.510139508107541e-07, + "loss": 0.0515, + "num_input_tokens_seen": 81783720, + "step": 121335 + }, + { + "epoch": 2.964356387266997, + "grad_norm": 0.010217788629233837, + "learning_rate": 8.509296255109959e-07, + "loss": 0.0001, + "num_input_tokens_seen": 81787112, + "step": 121340 + }, + { + "epoch": 2.9644785380988443, + "grad_norm": 0.0944608524441719, + "learning_rate": 8.508453012953172e-07, + "loss": 0.0445, + "num_input_tokens_seen": 81790568, + "step": 121345 + }, + { + "epoch": 2.9646006889306915, + "grad_norm": 0.12509916722774506, + "learning_rate": 8.507609781643316e-07, + "loss": 0.0408, + "num_input_tokens_seen": 81793576, + "step": 121350 + }, + { + "epoch": 2.9647228397625387, + "grad_norm": 0.06585719436407089, + "learning_rate": 8.506766561186526e-07, + "loss": 0.0401, + "num_input_tokens_seen": 81797096, + "step": 121355 + }, + { + "epoch": 2.964844990594386, + "grad_norm": 1.7288832664489746, + "learning_rate": 8.505923351588931e-07, + "loss": 0.0665, + "num_input_tokens_seen": 81800296, + "step": 121360 + }, + { + "epoch": 2.964967141426233, + "grad_norm": 0.03702880069613457, + "learning_rate": 8.505080152856661e-07, + "loss": 0.0237, + "num_input_tokens_seen": 81803432, + "step": 121365 + }, + { + "epoch": 2.9650892922580803, + "grad_norm": 0.8792369365692139, + "learning_rate": 8.504236964995851e-07, + "loss": 0.0005, + "num_input_tokens_seen": 81806568, + "step": 121370 + }, + { + "epoch": 2.9652114430899275, + "grad_norm": 0.18109741806983948, + "learning_rate": 8.503393788012635e-07, + "loss": 0.0384, + "num_input_tokens_seen": 81810152, + "step": 121375 + }, + { + "epoch": 2.9653335939217746, + "grad_norm": 60.54440689086914, + "learning_rate": 8.502550621913137e-07, + "loss": 0.0011, + "num_input_tokens_seen": 81813800, + "step": 121380 + }, + { + "epoch": 2.965455744753622, + "grad_norm": 0.0288938470184803, + "learning_rate": 8.501707466703494e-07, + "loss": 0.128, + "num_input_tokens_seen": 81816616, + "step": 121385 + }, + { + "epoch": 2.965577895585469, + "grad_norm": 0.09552355855703354, + "learning_rate": 8.50086432238984e-07, + "loss": 0.0599, + "num_input_tokens_seen": 81819560, + "step": 121390 + }, + { + "epoch": 2.965700046417316, + "grad_norm": 0.2099023461341858, + "learning_rate": 8.500021188978301e-07, + "loss": 0.0009, + "num_input_tokens_seen": 81822760, + "step": 121395 + }, + { + "epoch": 2.9658221972491634, + "grad_norm": 0.0012449991190806031, + "learning_rate": 8.499178066475016e-07, + "loss": 0.049, + "num_input_tokens_seen": 81826024, + "step": 121400 + }, + { + "epoch": 2.9659443480810106, + "grad_norm": 0.37859848141670227, + "learning_rate": 8.498334954886107e-07, + "loss": 0.0004, + "num_input_tokens_seen": 81829224, + "step": 121405 + }, + { + "epoch": 2.9660664989128573, + "grad_norm": 0.6261986494064331, + "learning_rate": 8.497491854217713e-07, + "loss": 0.0006, + "num_input_tokens_seen": 81832680, + "step": 121410 + }, + { + "epoch": 2.966188649744705, + "grad_norm": 0.20188039541244507, + "learning_rate": 8.496648764475961e-07, + "loss": 0.0006, + "num_input_tokens_seen": 81836072, + "step": 121415 + }, + { + "epoch": 2.9663108005765517, + "grad_norm": 0.12402575463056564, + "learning_rate": 8.495805685666985e-07, + "loss": 0.0003, + "num_input_tokens_seen": 81839592, + "step": 121420 + }, + { + "epoch": 2.9664329514083994, + "grad_norm": 0.037398580461740494, + "learning_rate": 8.494962617796915e-07, + "loss": 0.0445, + "num_input_tokens_seen": 81843048, + "step": 121425 + }, + { + "epoch": 2.966555102240246, + "grad_norm": 0.1763017773628235, + "learning_rate": 8.494119560871879e-07, + "loss": 0.0362, + "num_input_tokens_seen": 81845928, + "step": 121430 + }, + { + "epoch": 2.9666772530720933, + "grad_norm": 0.7172082662582397, + "learning_rate": 8.493276514898014e-07, + "loss": 0.0004, + "num_input_tokens_seen": 81849704, + "step": 121435 + }, + { + "epoch": 2.9667994039039405, + "grad_norm": 0.0016529584536328912, + "learning_rate": 8.492433479881444e-07, + "loss": 0.001, + "num_input_tokens_seen": 81853032, + "step": 121440 + }, + { + "epoch": 2.9669215547357877, + "grad_norm": 0.0239529088139534, + "learning_rate": 8.491590455828302e-07, + "loss": 0.0002, + "num_input_tokens_seen": 81856360, + "step": 121445 + }, + { + "epoch": 2.967043705567635, + "grad_norm": 0.011942592449486256, + "learning_rate": 8.490747442744725e-07, + "loss": 0.0014, + "num_input_tokens_seen": 81859624, + "step": 121450 + }, + { + "epoch": 2.967165856399482, + "grad_norm": 0.047506652772426605, + "learning_rate": 8.489904440636833e-07, + "loss": 0.0002, + "num_input_tokens_seen": 81863144, + "step": 121455 + }, + { + "epoch": 2.9672880072313292, + "grad_norm": 0.04296977072954178, + "learning_rate": 8.489061449510768e-07, + "loss": 0.0291, + "num_input_tokens_seen": 81866408, + "step": 121460 + }, + { + "epoch": 2.9674101580631764, + "grad_norm": 0.010503513738512993, + "learning_rate": 8.488218469372652e-07, + "loss": 0.0001, + "num_input_tokens_seen": 81869480, + "step": 121465 + }, + { + "epoch": 2.9675323088950236, + "grad_norm": 0.11669360101222992, + "learning_rate": 8.487375500228617e-07, + "loss": 0.0002, + "num_input_tokens_seen": 81873192, + "step": 121470 + }, + { + "epoch": 2.967654459726871, + "grad_norm": 0.25940388441085815, + "learning_rate": 8.486532542084795e-07, + "loss": 0.0002, + "num_input_tokens_seen": 81876584, + "step": 121475 + }, + { + "epoch": 2.967776610558718, + "grad_norm": 0.13429482281208038, + "learning_rate": 8.485689594947314e-07, + "loss": 0.0008, + "num_input_tokens_seen": 81880040, + "step": 121480 + }, + { + "epoch": 2.967898761390565, + "grad_norm": 23.280900955200195, + "learning_rate": 8.484846658822308e-07, + "loss": 0.1177, + "num_input_tokens_seen": 81883560, + "step": 121485 + }, + { + "epoch": 2.9680209122224124, + "grad_norm": 0.06801927834749222, + "learning_rate": 8.484003733715902e-07, + "loss": 0.027, + "num_input_tokens_seen": 81887016, + "step": 121490 + }, + { + "epoch": 2.968143063054259, + "grad_norm": 0.002891592215746641, + "learning_rate": 8.483160819634232e-07, + "loss": 0.0002, + "num_input_tokens_seen": 81890792, + "step": 121495 + }, + { + "epoch": 2.9682652138861068, + "grad_norm": 0.0072302971966564655, + "learning_rate": 8.482317916583422e-07, + "loss": 0.0389, + "num_input_tokens_seen": 81894056, + "step": 121500 + }, + { + "epoch": 2.9683873647179535, + "grad_norm": 0.05241973325610161, + "learning_rate": 8.481475024569602e-07, + "loss": 0.0513, + "num_input_tokens_seen": 81897320, + "step": 121505 + }, + { + "epoch": 2.968509515549801, + "grad_norm": 0.029552537947893143, + "learning_rate": 8.480632143598909e-07, + "loss": 0.1138, + "num_input_tokens_seen": 81900392, + "step": 121510 + }, + { + "epoch": 2.968631666381648, + "grad_norm": 0.042538851499557495, + "learning_rate": 8.479789273677465e-07, + "loss": 0.0574, + "num_input_tokens_seen": 81903656, + "step": 121515 + }, + { + "epoch": 2.968753817213495, + "grad_norm": 0.05005302652716637, + "learning_rate": 8.478946414811403e-07, + "loss": 0.0004, + "num_input_tokens_seen": 81906856, + "step": 121520 + }, + { + "epoch": 2.9688759680453423, + "grad_norm": 0.1344650685787201, + "learning_rate": 8.478103567006853e-07, + "loss": 0.0002, + "num_input_tokens_seen": 81910312, + "step": 121525 + }, + { + "epoch": 2.9689981188771895, + "grad_norm": 0.02481931447982788, + "learning_rate": 8.477260730269944e-07, + "loss": 0.0725, + "num_input_tokens_seen": 81913576, + "step": 121530 + }, + { + "epoch": 2.9691202697090366, + "grad_norm": 0.001746109570376575, + "learning_rate": 8.4764179046068e-07, + "loss": 0.0002, + "num_input_tokens_seen": 81917224, + "step": 121535 + }, + { + "epoch": 2.969242420540884, + "grad_norm": 0.00772502226755023, + "learning_rate": 8.475575090023555e-07, + "loss": 0.0007, + "num_input_tokens_seen": 81920744, + "step": 121540 + }, + { + "epoch": 2.969364571372731, + "grad_norm": 0.010284801945090294, + "learning_rate": 8.474732286526342e-07, + "loss": 0.0429, + "num_input_tokens_seen": 81924072, + "step": 121545 + }, + { + "epoch": 2.969486722204578, + "grad_norm": 17.817644119262695, + "learning_rate": 8.473889494121282e-07, + "loss": 0.0468, + "num_input_tokens_seen": 81927336, + "step": 121550 + }, + { + "epoch": 2.9696088730364254, + "grad_norm": 0.07845073193311691, + "learning_rate": 8.473046712814513e-07, + "loss": 0.0852, + "num_input_tokens_seen": 81930664, + "step": 121555 + }, + { + "epoch": 2.9697310238682726, + "grad_norm": 0.22656197845935822, + "learning_rate": 8.472203942612154e-07, + "loss": 0.0011, + "num_input_tokens_seen": 81933736, + "step": 121560 + }, + { + "epoch": 2.96985317470012, + "grad_norm": 0.041642528027296066, + "learning_rate": 8.471361183520341e-07, + "loss": 0.042, + "num_input_tokens_seen": 81937384, + "step": 121565 + }, + { + "epoch": 2.969975325531967, + "grad_norm": 0.018616296350955963, + "learning_rate": 8.470518435545202e-07, + "loss": 0.0002, + "num_input_tokens_seen": 81940776, + "step": 121570 + }, + { + "epoch": 2.970097476363814, + "grad_norm": 0.033992212265729904, + "learning_rate": 8.469675698692862e-07, + "loss": 0.0001, + "num_input_tokens_seen": 81944168, + "step": 121575 + }, + { + "epoch": 2.9702196271956613, + "grad_norm": 0.1472531408071518, + "learning_rate": 8.468832972969457e-07, + "loss": 0.0002, + "num_input_tokens_seen": 81947752, + "step": 121580 + }, + { + "epoch": 2.9703417780275085, + "grad_norm": 33.03204345703125, + "learning_rate": 8.467990258381104e-07, + "loss": 0.0919, + "num_input_tokens_seen": 81950952, + "step": 121585 + }, + { + "epoch": 2.9704639288593553, + "grad_norm": 0.008867146447300911, + "learning_rate": 8.467147554933942e-07, + "loss": 0.0725, + "num_input_tokens_seen": 81954216, + "step": 121590 + }, + { + "epoch": 2.970586079691203, + "grad_norm": 0.011977802962064743, + "learning_rate": 8.466304862634092e-07, + "loss": 0.0004, + "num_input_tokens_seen": 81957480, + "step": 121595 + }, + { + "epoch": 2.9707082305230497, + "grad_norm": 0.12904588878154755, + "learning_rate": 8.465462181487684e-07, + "loss": 0.0345, + "num_input_tokens_seen": 81960808, + "step": 121600 + }, + { + "epoch": 2.9708303813548973, + "grad_norm": 0.07471118867397308, + "learning_rate": 8.464619511500855e-07, + "loss": 0.0002, + "num_input_tokens_seen": 81964328, + "step": 121605 + }, + { + "epoch": 2.970952532186744, + "grad_norm": 0.11465713381767273, + "learning_rate": 8.463776852679718e-07, + "loss": 0.0376, + "num_input_tokens_seen": 81968296, + "step": 121610 + }, + { + "epoch": 2.9710746830185912, + "grad_norm": 0.15388906002044678, + "learning_rate": 8.462934205030417e-07, + "loss": 0.0004, + "num_input_tokens_seen": 81971624, + "step": 121615 + }, + { + "epoch": 2.9711968338504384, + "grad_norm": 0.013246401213109493, + "learning_rate": 8.462091568559067e-07, + "loss": 0.0007, + "num_input_tokens_seen": 81974952, + "step": 121620 + }, + { + "epoch": 2.9713189846822856, + "grad_norm": 0.03343416750431061, + "learning_rate": 8.461248943271802e-07, + "loss": 0.0005, + "num_input_tokens_seen": 81978600, + "step": 121625 + }, + { + "epoch": 2.971441135514133, + "grad_norm": 0.03627277538180351, + "learning_rate": 8.460406329174748e-07, + "loss": 0.0003, + "num_input_tokens_seen": 81981608, + "step": 121630 + }, + { + "epoch": 2.97156328634598, + "grad_norm": 0.10073908418416977, + "learning_rate": 8.459563726274031e-07, + "loss": 0.0003, + "num_input_tokens_seen": 81984872, + "step": 121635 + }, + { + "epoch": 2.971685437177827, + "grad_norm": 0.1816432625055313, + "learning_rate": 8.458721134575785e-07, + "loss": 0.0568, + "num_input_tokens_seen": 81988008, + "step": 121640 + }, + { + "epoch": 2.9718075880096744, + "grad_norm": 0.016053643077611923, + "learning_rate": 8.457878554086129e-07, + "loss": 0.0342, + "num_input_tokens_seen": 81990952, + "step": 121645 + }, + { + "epoch": 2.9719297388415216, + "grad_norm": 0.09785997867584229, + "learning_rate": 8.4570359848112e-07, + "loss": 0.0345, + "num_input_tokens_seen": 81994344, + "step": 121650 + }, + { + "epoch": 2.9720518896733688, + "grad_norm": 0.00311757018789649, + "learning_rate": 8.456193426757117e-07, + "loss": 0.0003, + "num_input_tokens_seen": 81997672, + "step": 121655 + }, + { + "epoch": 2.972174040505216, + "grad_norm": 0.08872219175100327, + "learning_rate": 8.455350879930009e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82000680, + "step": 121660 + }, + { + "epoch": 2.972296191337063, + "grad_norm": 0.017512673512101173, + "learning_rate": 8.454508344336009e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82003560, + "step": 121665 + }, + { + "epoch": 2.9724183421689103, + "grad_norm": 0.03185400366783142, + "learning_rate": 8.453665819981239e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82006824, + "step": 121670 + }, + { + "epoch": 2.972540493000757, + "grad_norm": 18.32367706298828, + "learning_rate": 8.452823306871826e-07, + "loss": 0.0457, + "num_input_tokens_seen": 82010024, + "step": 121675 + }, + { + "epoch": 2.9726626438326047, + "grad_norm": 0.19949117302894592, + "learning_rate": 8.451980805013898e-07, + "loss": 0.1149, + "num_input_tokens_seen": 82013224, + "step": 121680 + }, + { + "epoch": 2.9727847946644514, + "grad_norm": 0.14638371765613556, + "learning_rate": 8.451138314413586e-07, + "loss": 0.0007, + "num_input_tokens_seen": 82016680, + "step": 121685 + }, + { + "epoch": 2.972906945496299, + "grad_norm": 12.467910766601562, + "learning_rate": 8.450295835077007e-07, + "loss": 0.071, + "num_input_tokens_seen": 82020072, + "step": 121690 + }, + { + "epoch": 2.973029096328146, + "grad_norm": 0.11415675282478333, + "learning_rate": 8.449453367010293e-07, + "loss": 0.0457, + "num_input_tokens_seen": 82023400, + "step": 121695 + }, + { + "epoch": 2.973151247159993, + "grad_norm": 0.0506068654358387, + "learning_rate": 8.448610910219577e-07, + "loss": 0.0517, + "num_input_tokens_seen": 82026728, + "step": 121700 + }, + { + "epoch": 2.97327339799184, + "grad_norm": 0.025008076801896095, + "learning_rate": 8.447768464710974e-07, + "loss": 0.0089, + "num_input_tokens_seen": 82030120, + "step": 121705 + }, + { + "epoch": 2.9733955488236874, + "grad_norm": 0.03348412737250328, + "learning_rate": 8.446926030490622e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82033768, + "step": 121710 + }, + { + "epoch": 2.9735176996555346, + "grad_norm": 0.518605649471283, + "learning_rate": 8.446083607564636e-07, + "loss": 0.0642, + "num_input_tokens_seen": 82037224, + "step": 121715 + }, + { + "epoch": 2.9736398504873818, + "grad_norm": 0.0018251192523166537, + "learning_rate": 8.445241195939152e-07, + "loss": 0.1574, + "num_input_tokens_seen": 82040616, + "step": 121720 + }, + { + "epoch": 2.973762001319229, + "grad_norm": 14.896915435791016, + "learning_rate": 8.444398795620289e-07, + "loss": 0.0398, + "num_input_tokens_seen": 82043688, + "step": 121725 + }, + { + "epoch": 2.973884152151076, + "grad_norm": 1260.587158203125, + "learning_rate": 8.443556406614179e-07, + "loss": 0.0905, + "num_input_tokens_seen": 82047528, + "step": 121730 + }, + { + "epoch": 2.9740063029829233, + "grad_norm": 0.005130738485604525, + "learning_rate": 8.442714028926946e-07, + "loss": 0.0304, + "num_input_tokens_seen": 82050920, + "step": 121735 + }, + { + "epoch": 2.9741284538147705, + "grad_norm": 42.79816818237305, + "learning_rate": 8.441871662564712e-07, + "loss": 0.0853, + "num_input_tokens_seen": 82054120, + "step": 121740 + }, + { + "epoch": 2.9742506046466177, + "grad_norm": 0.3749528229236603, + "learning_rate": 8.44102930753361e-07, + "loss": 0.0006, + "num_input_tokens_seen": 82057384, + "step": 121745 + }, + { + "epoch": 2.974372755478465, + "grad_norm": 0.13021938502788544, + "learning_rate": 8.44018696383976e-07, + "loss": 0.0003, + "num_input_tokens_seen": 82060648, + "step": 121750 + }, + { + "epoch": 2.974494906310312, + "grad_norm": 0.08072230964899063, + "learning_rate": 8.439344631489287e-07, + "loss": 0.0006, + "num_input_tokens_seen": 82064104, + "step": 121755 + }, + { + "epoch": 2.9746170571421593, + "grad_norm": 0.3794505000114441, + "learning_rate": 8.438502310488326e-07, + "loss": 0.0518, + "num_input_tokens_seen": 82067112, + "step": 121760 + }, + { + "epoch": 2.9747392079740065, + "grad_norm": 0.29038816690444946, + "learning_rate": 8.437660000842991e-07, + "loss": 0.0008, + "num_input_tokens_seen": 82070312, + "step": 121765 + }, + { + "epoch": 2.9748613588058532, + "grad_norm": 0.034577976912260056, + "learning_rate": 8.436817702559417e-07, + "loss": 0.1004, + "num_input_tokens_seen": 82073384, + "step": 121770 + }, + { + "epoch": 2.974983509637701, + "grad_norm": 33.719852447509766, + "learning_rate": 8.435975415643724e-07, + "loss": 0.1113, + "num_input_tokens_seen": 82077032, + "step": 121775 + }, + { + "epoch": 2.9751056604695476, + "grad_norm": 0.037223588675260544, + "learning_rate": 8.435133140102036e-07, + "loss": 0.0006, + "num_input_tokens_seen": 82080040, + "step": 121780 + }, + { + "epoch": 2.975227811301395, + "grad_norm": 0.01888175681233406, + "learning_rate": 8.434290875940483e-07, + "loss": 0.0452, + "num_input_tokens_seen": 82083112, + "step": 121785 + }, + { + "epoch": 2.975349962133242, + "grad_norm": 0.0968722254037857, + "learning_rate": 8.433448623165185e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82086440, + "step": 121790 + }, + { + "epoch": 2.975472112965089, + "grad_norm": 0.010138538666069508, + "learning_rate": 8.432606381782275e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82089896, + "step": 121795 + }, + { + "epoch": 2.9755942637969364, + "grad_norm": 0.039139967411756516, + "learning_rate": 8.431764151797867e-07, + "loss": 0.0003, + "num_input_tokens_seen": 82092712, + "step": 121800 + }, + { + "epoch": 2.9757164146287836, + "grad_norm": 0.06282107532024384, + "learning_rate": 8.430921933218097e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82095592, + "step": 121805 + }, + { + "epoch": 2.9758385654606307, + "grad_norm": 0.027126094326376915, + "learning_rate": 8.430079726049081e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82098984, + "step": 121810 + }, + { + "epoch": 2.975960716292478, + "grad_norm": 0.1266213208436966, + "learning_rate": 8.429237530296946e-07, + "loss": 0.0666, + "num_input_tokens_seen": 82102184, + "step": 121815 + }, + { + "epoch": 2.976082867124325, + "grad_norm": 0.0039200736209750175, + "learning_rate": 8.428395345967825e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82105192, + "step": 121820 + }, + { + "epoch": 2.9762050179561723, + "grad_norm": 0.11012466996908188, + "learning_rate": 8.427553173067832e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82108392, + "step": 121825 + }, + { + "epoch": 2.9763271687880195, + "grad_norm": 28.55767250061035, + "learning_rate": 8.426711011603094e-07, + "loss": 0.0345, + "num_input_tokens_seen": 82112040, + "step": 121830 + }, + { + "epoch": 2.9764493196198667, + "grad_norm": 0.008977274410426617, + "learning_rate": 8.425868861579739e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82115624, + "step": 121835 + }, + { + "epoch": 2.976571470451714, + "grad_norm": 0.036314770579338074, + "learning_rate": 8.425026723003889e-07, + "loss": 0.0657, + "num_input_tokens_seen": 82118824, + "step": 121840 + }, + { + "epoch": 2.976693621283561, + "grad_norm": 0.13658422231674194, + "learning_rate": 8.424184595881666e-07, + "loss": 0.0003, + "num_input_tokens_seen": 82122472, + "step": 121845 + }, + { + "epoch": 2.9768157721154083, + "grad_norm": 0.05026088282465935, + "learning_rate": 8.423342480219195e-07, + "loss": 0.0441, + "num_input_tokens_seen": 82126248, + "step": 121850 + }, + { + "epoch": 2.976937922947255, + "grad_norm": 14.070030212402344, + "learning_rate": 8.422500376022607e-07, + "loss": 0.0734, + "num_input_tokens_seen": 82129704, + "step": 121855 + }, + { + "epoch": 2.9770600737791026, + "grad_norm": 0.08053138852119446, + "learning_rate": 8.421658283298017e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82133224, + "step": 121860 + }, + { + "epoch": 2.9771822246109494, + "grad_norm": 0.018369318917393684, + "learning_rate": 8.420816202051555e-07, + "loss": 0.0003, + "num_input_tokens_seen": 82136616, + "step": 121865 + }, + { + "epoch": 2.977304375442797, + "grad_norm": 0.20027559995651245, + "learning_rate": 8.419974132289338e-07, + "loss": 0.0505, + "num_input_tokens_seen": 82140136, + "step": 121870 + }, + { + "epoch": 2.9774265262746438, + "grad_norm": 0.044966600835323334, + "learning_rate": 8.419132074017499e-07, + "loss": 0.0423, + "num_input_tokens_seen": 82143144, + "step": 121875 + }, + { + "epoch": 2.977548677106491, + "grad_norm": 6.6325201988220215, + "learning_rate": 8.418290027242153e-07, + "loss": 0.0182, + "num_input_tokens_seen": 82146408, + "step": 121880 + }, + { + "epoch": 2.977670827938338, + "grad_norm": 0.023646734654903412, + "learning_rate": 8.417447991969429e-07, + "loss": 0.0004, + "num_input_tokens_seen": 82149864, + "step": 121885 + }, + { + "epoch": 2.9777929787701853, + "grad_norm": 0.043020982295274734, + "learning_rate": 8.41660596820545e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82153512, + "step": 121890 + }, + { + "epoch": 2.9779151296020325, + "grad_norm": 9.485835075378418, + "learning_rate": 8.415763955956336e-07, + "loss": 0.0526, + "num_input_tokens_seen": 82156648, + "step": 121895 + }, + { + "epoch": 2.9780372804338797, + "grad_norm": 21.41651153564453, + "learning_rate": 8.414921955228216e-07, + "loss": 0.0731, + "num_input_tokens_seen": 82159528, + "step": 121900 + }, + { + "epoch": 2.978159431265727, + "grad_norm": 0.009164262562990189, + "learning_rate": 8.414079966027206e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82162984, + "step": 121905 + }, + { + "epoch": 2.978281582097574, + "grad_norm": 0.1760968714952469, + "learning_rate": 8.413237988359432e-07, + "loss": 0.0691, + "num_input_tokens_seen": 82166440, + "step": 121910 + }, + { + "epoch": 2.9784037329294213, + "grad_norm": 0.04876111447811127, + "learning_rate": 8.412396022231023e-07, + "loss": 0.049, + "num_input_tokens_seen": 82170472, + "step": 121915 + }, + { + "epoch": 2.9785258837612685, + "grad_norm": 277.4755859375, + "learning_rate": 8.411554067648092e-07, + "loss": 0.1157, + "num_input_tokens_seen": 82173544, + "step": 121920 + }, + { + "epoch": 2.9786480345931157, + "grad_norm": 0.037194229662418365, + "learning_rate": 8.410712124616773e-07, + "loss": 0.0765, + "num_input_tokens_seen": 82177384, + "step": 121925 + }, + { + "epoch": 2.978770185424963, + "grad_norm": 0.0221418384462595, + "learning_rate": 8.409870193143179e-07, + "loss": 0.0004, + "num_input_tokens_seen": 82180776, + "step": 121930 + }, + { + "epoch": 2.97889233625681, + "grad_norm": 0.011147744953632355, + "learning_rate": 8.409028273233439e-07, + "loss": 0.0006, + "num_input_tokens_seen": 82184168, + "step": 121935 + }, + { + "epoch": 2.9790144870886572, + "grad_norm": 0.011297612451016903, + "learning_rate": 8.40818636489367e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82187944, + "step": 121940 + }, + { + "epoch": 2.9791366379205044, + "grad_norm": 0.02041480876505375, + "learning_rate": 8.407344468129998e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82191528, + "step": 121945 + }, + { + "epoch": 2.979258788752351, + "grad_norm": 0.01931638829410076, + "learning_rate": 8.40650258294855e-07, + "loss": 0.0849, + "num_input_tokens_seen": 82195048, + "step": 121950 + }, + { + "epoch": 2.979380939584199, + "grad_norm": 0.007796446327120066, + "learning_rate": 8.405660709355439e-07, + "loss": 0.0, + "num_input_tokens_seen": 82198248, + "step": 121955 + }, + { + "epoch": 2.9795030904160456, + "grad_norm": 0.012662877328693867, + "learning_rate": 8.404818847356796e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82201704, + "step": 121960 + }, + { + "epoch": 2.9796252412478927, + "grad_norm": 0.1940654069185257, + "learning_rate": 8.403976996958735e-07, + "loss": 0.0003, + "num_input_tokens_seen": 82204968, + "step": 121965 + }, + { + "epoch": 2.97974739207974, + "grad_norm": 0.0024405743461102247, + "learning_rate": 8.403135158167382e-07, + "loss": 0.0454, + "num_input_tokens_seen": 82208104, + "step": 121970 + }, + { + "epoch": 2.979869542911587, + "grad_norm": 0.010657384060323238, + "learning_rate": 8.402293330988866e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82211368, + "step": 121975 + }, + { + "epoch": 2.9799916937434343, + "grad_norm": 0.06140420585870743, + "learning_rate": 8.401451515429299e-07, + "loss": 0.1058, + "num_input_tokens_seen": 82214440, + "step": 121980 + }, + { + "epoch": 2.9801138445752815, + "grad_norm": 0.022319672629237175, + "learning_rate": 8.400609711494807e-07, + "loss": 0.0618, + "num_input_tokens_seen": 82217832, + "step": 121985 + }, + { + "epoch": 2.9802359954071287, + "grad_norm": 0.05925625190138817, + "learning_rate": 8.399767919191511e-07, + "loss": 0.0467, + "num_input_tokens_seen": 82221416, + "step": 121990 + }, + { + "epoch": 2.980358146238976, + "grad_norm": 0.07047688215970993, + "learning_rate": 8.398926138525536e-07, + "loss": 0.0016, + "num_input_tokens_seen": 82224360, + "step": 121995 + }, + { + "epoch": 2.980480297070823, + "grad_norm": 25.92572021484375, + "learning_rate": 8.398084369502996e-07, + "loss": 0.0424, + "num_input_tokens_seen": 82227560, + "step": 122000 + }, + { + "epoch": 2.9806024479026703, + "grad_norm": 0.0666838213801384, + "learning_rate": 8.397242612130017e-07, + "loss": 0.0332, + "num_input_tokens_seen": 82230632, + "step": 122005 + }, + { + "epoch": 2.9807245987345174, + "grad_norm": 0.7408774495124817, + "learning_rate": 8.396400866412725e-07, + "loss": 0.0683, + "num_input_tokens_seen": 82234088, + "step": 122010 + }, + { + "epoch": 2.9808467495663646, + "grad_norm": 0.01501146424561739, + "learning_rate": 8.395559132357234e-07, + "loss": 0.0003, + "num_input_tokens_seen": 82237416, + "step": 122015 + }, + { + "epoch": 2.980968900398212, + "grad_norm": 0.35236650705337524, + "learning_rate": 8.394717409969671e-07, + "loss": 0.0005, + "num_input_tokens_seen": 82240872, + "step": 122020 + }, + { + "epoch": 2.981091051230059, + "grad_norm": 0.029871536418795586, + "learning_rate": 8.393875699256152e-07, + "loss": 0.1045, + "num_input_tokens_seen": 82244776, + "step": 122025 + }, + { + "epoch": 2.981213202061906, + "grad_norm": 0.04119180887937546, + "learning_rate": 8.393034000222805e-07, + "loss": 0.056, + "num_input_tokens_seen": 82247912, + "step": 122030 + }, + { + "epoch": 2.981335352893753, + "grad_norm": 0.037397608160972595, + "learning_rate": 8.392192312875742e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82251624, + "step": 122035 + }, + { + "epoch": 2.9814575037256006, + "grad_norm": 0.009144660085439682, + "learning_rate": 8.391350637221092e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82254696, + "step": 122040 + }, + { + "epoch": 2.9815796545574473, + "grad_norm": 0.04723630100488663, + "learning_rate": 8.390508973264974e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82258216, + "step": 122045 + }, + { + "epoch": 2.981701805389295, + "grad_norm": 26.548973083496094, + "learning_rate": 8.389667321013505e-07, + "loss": 0.0971, + "num_input_tokens_seen": 82261544, + "step": 122050 + }, + { + "epoch": 2.9818239562211417, + "grad_norm": 0.1542336344718933, + "learning_rate": 8.388825680472811e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82264808, + "step": 122055 + }, + { + "epoch": 2.981946107052989, + "grad_norm": 0.018932661041617393, + "learning_rate": 8.387984051649006e-07, + "loss": 0.0009, + "num_input_tokens_seen": 82268072, + "step": 122060 + }, + { + "epoch": 2.982068257884836, + "grad_norm": 0.2454744130373001, + "learning_rate": 8.387142434548216e-07, + "loss": 0.0004, + "num_input_tokens_seen": 82271592, + "step": 122065 + }, + { + "epoch": 2.9821904087166833, + "grad_norm": 23.384599685668945, + "learning_rate": 8.386300829176563e-07, + "loss": 0.0491, + "num_input_tokens_seen": 82274408, + "step": 122070 + }, + { + "epoch": 2.9823125595485305, + "grad_norm": 0.22480256855487823, + "learning_rate": 8.38545923554016e-07, + "loss": 0.0003, + "num_input_tokens_seen": 82277672, + "step": 122075 + }, + { + "epoch": 2.9824347103803777, + "grad_norm": 0.08951090276241302, + "learning_rate": 8.384617653645136e-07, + "loss": 0.0442, + "num_input_tokens_seen": 82281064, + "step": 122080 + }, + { + "epoch": 2.982556861212225, + "grad_norm": 8.396158218383789, + "learning_rate": 8.383776083497604e-07, + "loss": 0.0342, + "num_input_tokens_seen": 82285096, + "step": 122085 + }, + { + "epoch": 2.982679012044072, + "grad_norm": 0.02514975517988205, + "learning_rate": 8.382934525103688e-07, + "loss": 0.1432, + "num_input_tokens_seen": 82288296, + "step": 122090 + }, + { + "epoch": 2.9828011628759192, + "grad_norm": 0.1363772749900818, + "learning_rate": 8.382092978469508e-07, + "loss": 0.0003, + "num_input_tokens_seen": 82291880, + "step": 122095 + }, + { + "epoch": 2.9829233137077664, + "grad_norm": 0.01438989583402872, + "learning_rate": 8.381251443601181e-07, + "loss": 0.0005, + "num_input_tokens_seen": 82295208, + "step": 122100 + }, + { + "epoch": 2.9830454645396136, + "grad_norm": 0.04141591861844063, + "learning_rate": 8.380409920504832e-07, + "loss": 0.062, + "num_input_tokens_seen": 82298664, + "step": 122105 + }, + { + "epoch": 2.983167615371461, + "grad_norm": 0.08502558618783951, + "learning_rate": 8.379568409186573e-07, + "loss": 0.0502, + "num_input_tokens_seen": 82301992, + "step": 122110 + }, + { + "epoch": 2.983289766203308, + "grad_norm": 0.022353440523147583, + "learning_rate": 8.378726909652533e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82305448, + "step": 122115 + }, + { + "epoch": 2.9834119170351547, + "grad_norm": 0.013993551954627037, + "learning_rate": 8.377885421908824e-07, + "loss": 0.0539, + "num_input_tokens_seen": 82308392, + "step": 122120 + }, + { + "epoch": 2.9835340678670024, + "grad_norm": 0.23815952241420746, + "learning_rate": 8.377043945961566e-07, + "loss": 0.034, + "num_input_tokens_seen": 82311848, + "step": 122125 + }, + { + "epoch": 2.983656218698849, + "grad_norm": 0.05029185861349106, + "learning_rate": 8.376202481816888e-07, + "loss": 0.0556, + "num_input_tokens_seen": 82315048, + "step": 122130 + }, + { + "epoch": 2.9837783695306968, + "grad_norm": 0.1486971527338028, + "learning_rate": 8.375361029480898e-07, + "loss": 0.0474, + "num_input_tokens_seen": 82318312, + "step": 122135 + }, + { + "epoch": 2.9839005203625435, + "grad_norm": 0.01855803281068802, + "learning_rate": 8.374519588959721e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82322024, + "step": 122140 + }, + { + "epoch": 2.9840226711943907, + "grad_norm": 0.12863339483737946, + "learning_rate": 8.373678160259474e-07, + "loss": 0.0384, + "num_input_tokens_seen": 82325672, + "step": 122145 + }, + { + "epoch": 2.984144822026238, + "grad_norm": 0.30401062965393066, + "learning_rate": 8.372836743386279e-07, + "loss": 0.0317, + "num_input_tokens_seen": 82329128, + "step": 122150 + }, + { + "epoch": 2.984266972858085, + "grad_norm": 0.0029473446775227785, + "learning_rate": 8.371995338346249e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82332264, + "step": 122155 + }, + { + "epoch": 2.9843891236899323, + "grad_norm": 0.019141053780913353, + "learning_rate": 8.371153945145506e-07, + "loss": 0.0006, + "num_input_tokens_seen": 82335848, + "step": 122160 + }, + { + "epoch": 2.9845112745217794, + "grad_norm": 0.013371425680816174, + "learning_rate": 8.370312563790174e-07, + "loss": 0.1012, + "num_input_tokens_seen": 82339176, + "step": 122165 + }, + { + "epoch": 2.9846334253536266, + "grad_norm": 0.045894015580415726, + "learning_rate": 8.369471194286364e-07, + "loss": 0.0005, + "num_input_tokens_seen": 82342824, + "step": 122170 + }, + { + "epoch": 2.984755576185474, + "grad_norm": 0.1080714762210846, + "learning_rate": 8.368629836640202e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82346024, + "step": 122175 + }, + { + "epoch": 2.984877727017321, + "grad_norm": 479.940673828125, + "learning_rate": 8.367788490857798e-07, + "loss": 0.0257, + "num_input_tokens_seen": 82349672, + "step": 122180 + }, + { + "epoch": 2.984999877849168, + "grad_norm": 0.02565705217421055, + "learning_rate": 8.366947156945279e-07, + "loss": 0.0003, + "num_input_tokens_seen": 82352872, + "step": 122185 + }, + { + "epoch": 2.9851220286810154, + "grad_norm": 0.020736481994390488, + "learning_rate": 8.366105834908756e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82356200, + "step": 122190 + }, + { + "epoch": 2.9852441795128626, + "grad_norm": 0.01151752658188343, + "learning_rate": 8.365264524754353e-07, + "loss": 0.0564, + "num_input_tokens_seen": 82359848, + "step": 122195 + }, + { + "epoch": 2.9853663303447098, + "grad_norm": 0.0341997891664505, + "learning_rate": 8.364423226488187e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82363176, + "step": 122200 + }, + { + "epoch": 2.985488481176557, + "grad_norm": 0.0014073860365897417, + "learning_rate": 8.363581940116373e-07, + "loss": 0.0343, + "num_input_tokens_seen": 82366632, + "step": 122205 + }, + { + "epoch": 2.985610632008404, + "grad_norm": 0.041219040751457214, + "learning_rate": 8.362740665645034e-07, + "loss": 0.0003, + "num_input_tokens_seen": 82370216, + "step": 122210 + }, + { + "epoch": 2.985732782840251, + "grad_norm": 78.13631439208984, + "learning_rate": 8.361899403080282e-07, + "loss": 0.062, + "num_input_tokens_seen": 82373864, + "step": 122215 + }, + { + "epoch": 2.9858549336720985, + "grad_norm": 0.0018047604244202375, + "learning_rate": 8.361058152428238e-07, + "loss": 0.0334, + "num_input_tokens_seen": 82377384, + "step": 122220 + }, + { + "epoch": 2.9859770845039453, + "grad_norm": 0.004586088005453348, + "learning_rate": 8.360216913695023e-07, + "loss": 0.0373, + "num_input_tokens_seen": 82380328, + "step": 122225 + }, + { + "epoch": 2.986099235335793, + "grad_norm": 32.516361236572266, + "learning_rate": 8.359375686886748e-07, + "loss": 0.1049, + "num_input_tokens_seen": 82383784, + "step": 122230 + }, + { + "epoch": 2.9862213861676397, + "grad_norm": 0.005690659396350384, + "learning_rate": 8.358534472009538e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82387624, + "step": 122235 + }, + { + "epoch": 2.986343536999487, + "grad_norm": 30.37091636657715, + "learning_rate": 8.357693269069506e-07, + "loss": 0.0445, + "num_input_tokens_seen": 82391144, + "step": 122240 + }, + { + "epoch": 2.986465687831334, + "grad_norm": 0.008414661511778831, + "learning_rate": 8.356852078072769e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82394280, + "step": 122245 + }, + { + "epoch": 2.9865878386631812, + "grad_norm": 0.019386325031518936, + "learning_rate": 8.356010899025448e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82397608, + "step": 122250 + }, + { + "epoch": 2.9867099894950284, + "grad_norm": 0.02001720480620861, + "learning_rate": 8.355169731933654e-07, + "loss": 0.0866, + "num_input_tokens_seen": 82400936, + "step": 122255 + }, + { + "epoch": 2.9868321403268756, + "grad_norm": 0.12658660113811493, + "learning_rate": 8.354328576803511e-07, + "loss": 0.1082, + "num_input_tokens_seen": 82404072, + "step": 122260 + }, + { + "epoch": 2.986954291158723, + "grad_norm": 2.110907554626465, + "learning_rate": 8.353487433641131e-07, + "loss": 0.001, + "num_input_tokens_seen": 82407912, + "step": 122265 + }, + { + "epoch": 2.98707644199057, + "grad_norm": 0.002116686664521694, + "learning_rate": 8.352646302452637e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82411112, + "step": 122270 + }, + { + "epoch": 2.987198592822417, + "grad_norm": 0.05068863555788994, + "learning_rate": 8.351805183244137e-07, + "loss": 0.0992, + "num_input_tokens_seen": 82414696, + "step": 122275 + }, + { + "epoch": 2.9873207436542644, + "grad_norm": 0.03697684034705162, + "learning_rate": 8.350964076021754e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82418216, + "step": 122280 + }, + { + "epoch": 2.9874428944861116, + "grad_norm": 0.3347267210483551, + "learning_rate": 8.350122980791608e-07, + "loss": 0.0007, + "num_input_tokens_seen": 82421352, + "step": 122285 + }, + { + "epoch": 2.9875650453179587, + "grad_norm": 0.14682719111442566, + "learning_rate": 8.349281897559808e-07, + "loss": 0.0323, + "num_input_tokens_seen": 82424680, + "step": 122290 + }, + { + "epoch": 2.987687196149806, + "grad_norm": 0.3761098384857178, + "learning_rate": 8.348440826332477e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82427816, + "step": 122295 + }, + { + "epoch": 2.9878093469816527, + "grad_norm": 0.006452401168644428, + "learning_rate": 8.347599767115726e-07, + "loss": 0.0663, + "num_input_tokens_seen": 82431464, + "step": 122300 + }, + { + "epoch": 2.9879314978135003, + "grad_norm": 0.0023743468336760998, + "learning_rate": 8.346758719915677e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82434664, + "step": 122305 + }, + { + "epoch": 2.988053648645347, + "grad_norm": 0.5880225896835327, + "learning_rate": 8.345917684738439e-07, + "loss": 0.0007, + "num_input_tokens_seen": 82438056, + "step": 122310 + }, + { + "epoch": 2.9881757994771947, + "grad_norm": 0.16489212214946747, + "learning_rate": 8.345076661590133e-07, + "loss": 0.0277, + "num_input_tokens_seen": 82441448, + "step": 122315 + }, + { + "epoch": 2.9882979503090414, + "grad_norm": 0.000908148183953017, + "learning_rate": 8.344235650476878e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82447208, + "step": 122320 + }, + { + "epoch": 2.9884201011408886, + "grad_norm": 0.015482887625694275, + "learning_rate": 8.343394651404783e-07, + "loss": 0.0547, + "num_input_tokens_seen": 82450472, + "step": 122325 + }, + { + "epoch": 2.988542251972736, + "grad_norm": 0.1507757008075714, + "learning_rate": 8.342553664379971e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82454184, + "step": 122330 + }, + { + "epoch": 2.988664402804583, + "grad_norm": 0.09877260029315948, + "learning_rate": 8.341712689408551e-07, + "loss": 0.1056, + "num_input_tokens_seen": 82457640, + "step": 122335 + }, + { + "epoch": 2.98878655363643, + "grad_norm": 0.24767570197582245, + "learning_rate": 8.340871726496646e-07, + "loss": 0.1443, + "num_input_tokens_seen": 82460904, + "step": 122340 + }, + { + "epoch": 2.9889087044682774, + "grad_norm": 0.012325254268944263, + "learning_rate": 8.340030775650364e-07, + "loss": 0.0491, + "num_input_tokens_seen": 82464808, + "step": 122345 + }, + { + "epoch": 2.9890308553001246, + "grad_norm": 0.045981768518686295, + "learning_rate": 8.339189836875827e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82468136, + "step": 122350 + }, + { + "epoch": 2.9891530061319718, + "grad_norm": 0.023901034146547318, + "learning_rate": 8.338348910179151e-07, + "loss": 0.0624, + "num_input_tokens_seen": 82471400, + "step": 122355 + }, + { + "epoch": 2.989275156963819, + "grad_norm": 121.82840728759766, + "learning_rate": 8.337507995566444e-07, + "loss": 0.0363, + "num_input_tokens_seen": 82474792, + "step": 122360 + }, + { + "epoch": 2.989397307795666, + "grad_norm": 0.009395765140652657, + "learning_rate": 8.33666709304383e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82478248, + "step": 122365 + }, + { + "epoch": 2.9895194586275133, + "grad_norm": 0.12960787117481232, + "learning_rate": 8.335826202617416e-07, + "loss": 0.0351, + "num_input_tokens_seen": 82481960, + "step": 122370 + }, + { + "epoch": 2.9896416094593605, + "grad_norm": 0.004650300834327936, + "learning_rate": 8.334985324293321e-07, + "loss": 0.0016, + "num_input_tokens_seen": 82484904, + "step": 122375 + }, + { + "epoch": 2.9897637602912077, + "grad_norm": 0.07671540230512619, + "learning_rate": 8.334144458077665e-07, + "loss": 0.0666, + "num_input_tokens_seen": 82488488, + "step": 122380 + }, + { + "epoch": 2.989885911123055, + "grad_norm": 0.06331785023212433, + "learning_rate": 8.333303603976555e-07, + "loss": 0.0276, + "num_input_tokens_seen": 82491432, + "step": 122385 + }, + { + "epoch": 2.990008061954902, + "grad_norm": 0.011702315881848335, + "learning_rate": 8.332462761996114e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82494568, + "step": 122390 + }, + { + "epoch": 2.990130212786749, + "grad_norm": 0.0312877893447876, + "learning_rate": 8.33162193214245e-07, + "loss": 0.0772, + "num_input_tokens_seen": 82497640, + "step": 122395 + }, + { + "epoch": 2.9902523636185965, + "grad_norm": 29.36800193786621, + "learning_rate": 8.330781114421678e-07, + "loss": 0.0973, + "num_input_tokens_seen": 82501352, + "step": 122400 + }, + { + "epoch": 2.990374514450443, + "grad_norm": 0.15311099588871002, + "learning_rate": 8.329940308839918e-07, + "loss": 0.0007, + "num_input_tokens_seen": 82504424, + "step": 122405 + }, + { + "epoch": 2.9904966652822904, + "grad_norm": 0.008352646604180336, + "learning_rate": 8.329099515403277e-07, + "loss": 0.0004, + "num_input_tokens_seen": 82507752, + "step": 122410 + }, + { + "epoch": 2.9906188161141376, + "grad_norm": 0.03414013236761093, + "learning_rate": 8.328258734117879e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82510952, + "step": 122415 + }, + { + "epoch": 2.990740966945985, + "grad_norm": 0.03758617490530014, + "learning_rate": 8.327417964989827e-07, + "loss": 0.0692, + "num_input_tokens_seen": 82513896, + "step": 122420 + }, + { + "epoch": 2.990863117777832, + "grad_norm": 0.05610216408967972, + "learning_rate": 8.326577208025247e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82517160, + "step": 122425 + }, + { + "epoch": 2.990985268609679, + "grad_norm": 0.025218749418854713, + "learning_rate": 8.325736463230244e-07, + "loss": 0.0538, + "num_input_tokens_seen": 82521064, + "step": 122430 + }, + { + "epoch": 2.9911074194415264, + "grad_norm": 0.19947555661201477, + "learning_rate": 8.324895730610939e-07, + "loss": 0.0005, + "num_input_tokens_seen": 82524264, + "step": 122435 + }, + { + "epoch": 2.9912295702733736, + "grad_norm": 0.022488972172141075, + "learning_rate": 8.32405501017344e-07, + "loss": 0.0017, + "num_input_tokens_seen": 82527208, + "step": 122440 + }, + { + "epoch": 2.9913517211052207, + "grad_norm": 0.1366833597421646, + "learning_rate": 8.323214301923865e-07, + "loss": 0.0107, + "num_input_tokens_seen": 82530472, + "step": 122445 + }, + { + "epoch": 2.991473871937068, + "grad_norm": 0.5030769109725952, + "learning_rate": 8.322373605868326e-07, + "loss": 0.0723, + "num_input_tokens_seen": 82534056, + "step": 122450 + }, + { + "epoch": 2.991596022768915, + "grad_norm": 0.016812171787023544, + "learning_rate": 8.321532922012937e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82537832, + "step": 122455 + }, + { + "epoch": 2.9917181736007623, + "grad_norm": 0.004857075400650501, + "learning_rate": 8.320692250363816e-07, + "loss": 0.0827, + "num_input_tokens_seen": 82541352, + "step": 122460 + }, + { + "epoch": 2.9918403244326095, + "grad_norm": 0.17521388828754425, + "learning_rate": 8.319851590927067e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82544744, + "step": 122465 + }, + { + "epoch": 2.9919624752644567, + "grad_norm": 0.08017278462648392, + "learning_rate": 8.31901094370881e-07, + "loss": 0.0006, + "num_input_tokens_seen": 82548584, + "step": 122470 + }, + { + "epoch": 2.992084626096304, + "grad_norm": 0.027329741045832634, + "learning_rate": 8.318170308715161e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82551976, + "step": 122475 + }, + { + "epoch": 2.9922067769281506, + "grad_norm": 0.011738654226064682, + "learning_rate": 8.317329685952226e-07, + "loss": 0.1283, + "num_input_tokens_seen": 82555304, + "step": 122480 + }, + { + "epoch": 2.9923289277599983, + "grad_norm": 0.06388230621814728, + "learning_rate": 8.316489075426127e-07, + "loss": 0.0862, + "num_input_tokens_seen": 82558312, + "step": 122485 + }, + { + "epoch": 2.992451078591845, + "grad_norm": 0.16729193925857544, + "learning_rate": 8.315648477142967e-07, + "loss": 0.0003, + "num_input_tokens_seen": 82561320, + "step": 122490 + }, + { + "epoch": 2.9925732294236926, + "grad_norm": 0.012937177903950214, + "learning_rate": 8.314807891108869e-07, + "loss": 0.0004, + "num_input_tokens_seen": 82564776, + "step": 122495 + }, + { + "epoch": 2.9926953802555394, + "grad_norm": 0.0036519125569611788, + "learning_rate": 8.313967317329936e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82568296, + "step": 122500 + }, + { + "epoch": 2.9928175310873866, + "grad_norm": 0.005260920617729425, + "learning_rate": 8.313126755812289e-07, + "loss": 0.0401, + "num_input_tokens_seen": 82571304, + "step": 122505 + }, + { + "epoch": 2.9929396819192338, + "grad_norm": 0.007121355272829533, + "learning_rate": 8.31228620656204e-07, + "loss": 0.0003, + "num_input_tokens_seen": 82574312, + "step": 122510 + }, + { + "epoch": 2.993061832751081, + "grad_norm": 0.03562138229608536, + "learning_rate": 8.311445669585297e-07, + "loss": 0.0303, + "num_input_tokens_seen": 82578536, + "step": 122515 + }, + { + "epoch": 2.993183983582928, + "grad_norm": 0.07383058965206146, + "learning_rate": 8.310605144888177e-07, + "loss": 0.0379, + "num_input_tokens_seen": 82581736, + "step": 122520 + }, + { + "epoch": 2.9933061344147753, + "grad_norm": 37.444820404052734, + "learning_rate": 8.309764632476788e-07, + "loss": 0.0499, + "num_input_tokens_seen": 82584872, + "step": 122525 + }, + { + "epoch": 2.9934282852466225, + "grad_norm": 16.04447364807129, + "learning_rate": 8.308924132357245e-07, + "loss": 0.0547, + "num_input_tokens_seen": 82587752, + "step": 122530 + }, + { + "epoch": 2.9935504360784697, + "grad_norm": 0.05529216304421425, + "learning_rate": 8.308083644535665e-07, + "loss": 0.0685, + "num_input_tokens_seen": 82590952, + "step": 122535 + }, + { + "epoch": 2.993672586910317, + "grad_norm": 0.19819395244121552, + "learning_rate": 8.307243169018151e-07, + "loss": 0.1417, + "num_input_tokens_seen": 82594344, + "step": 122540 + }, + { + "epoch": 2.993794737742164, + "grad_norm": 0.3649430274963379, + "learning_rate": 8.306402705810824e-07, + "loss": 0.1196, + "num_input_tokens_seen": 82597864, + "step": 122545 + }, + { + "epoch": 2.9939168885740113, + "grad_norm": 1411.5084228515625, + "learning_rate": 8.305562254919791e-07, + "loss": 0.035, + "num_input_tokens_seen": 82601064, + "step": 122550 + }, + { + "epoch": 2.9940390394058585, + "grad_norm": 0.06733270734548569, + "learning_rate": 8.304721816351164e-07, + "loss": 0.0011, + "num_input_tokens_seen": 82604520, + "step": 122555 + }, + { + "epoch": 2.9941611902377057, + "grad_norm": 0.03253524750471115, + "learning_rate": 8.303881390111056e-07, + "loss": 0.0345, + "num_input_tokens_seen": 82607720, + "step": 122560 + }, + { + "epoch": 2.9942833410695524, + "grad_norm": 0.035487256944179535, + "learning_rate": 8.303040976205578e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82610920, + "step": 122565 + }, + { + "epoch": 2.9944054919014, + "grad_norm": 0.019332388415932655, + "learning_rate": 8.302200574640845e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82614632, + "step": 122570 + }, + { + "epoch": 2.994527642733247, + "grad_norm": 0.35883399844169617, + "learning_rate": 8.301360185422963e-07, + "loss": 0.0005, + "num_input_tokens_seen": 82617896, + "step": 122575 + }, + { + "epoch": 2.9946497935650944, + "grad_norm": 0.93114173412323, + "learning_rate": 8.30051980855805e-07, + "loss": 0.1424, + "num_input_tokens_seen": 82621608, + "step": 122580 + }, + { + "epoch": 2.994771944396941, + "grad_norm": 0.0395393930375576, + "learning_rate": 8.29967944405221e-07, + "loss": 0.0424, + "num_input_tokens_seen": 82624680, + "step": 122585 + }, + { + "epoch": 2.9948940952287884, + "grad_norm": 0.058739159256219864, + "learning_rate": 8.298839091911562e-07, + "loss": 0.0853, + "num_input_tokens_seen": 82628200, + "step": 122590 + }, + { + "epoch": 2.9950162460606355, + "grad_norm": 0.02455005794763565, + "learning_rate": 8.297998752142211e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82631720, + "step": 122595 + }, + { + "epoch": 2.9951383968924827, + "grad_norm": 0.5319987535476685, + "learning_rate": 8.297158424750272e-07, + "loss": 0.0006, + "num_input_tokens_seen": 82635112, + "step": 122600 + }, + { + "epoch": 2.99526054772433, + "grad_norm": 0.1334734559059143, + "learning_rate": 8.296318109741856e-07, + "loss": 0.1043, + "num_input_tokens_seen": 82638632, + "step": 122605 + }, + { + "epoch": 2.995382698556177, + "grad_norm": 0.023001959547400475, + "learning_rate": 8.295477807123071e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82642152, + "step": 122610 + }, + { + "epoch": 2.9955048493880243, + "grad_norm": 10.496086120605469, + "learning_rate": 8.294637516900034e-07, + "loss": 0.1018, + "num_input_tokens_seen": 82645544, + "step": 122615 + }, + { + "epoch": 2.9956270002198715, + "grad_norm": 0.02662235125899315, + "learning_rate": 8.293797239078846e-07, + "loss": 0.0574, + "num_input_tokens_seen": 82648936, + "step": 122620 + }, + { + "epoch": 2.9957491510517187, + "grad_norm": 0.2781931459903717, + "learning_rate": 8.292956973665624e-07, + "loss": 0.0003, + "num_input_tokens_seen": 82652008, + "step": 122625 + }, + { + "epoch": 2.995871301883566, + "grad_norm": 0.027957888320088387, + "learning_rate": 8.292116720666482e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82655336, + "step": 122630 + }, + { + "epoch": 2.995993452715413, + "grad_norm": 0.04008051007986069, + "learning_rate": 8.291276480087522e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82658280, + "step": 122635 + }, + { + "epoch": 2.9961156035472603, + "grad_norm": 0.09257989376783371, + "learning_rate": 8.290436251934865e-07, + "loss": 0.0589, + "num_input_tokens_seen": 82661864, + "step": 122640 + }, + { + "epoch": 2.9962377543791074, + "grad_norm": 0.09206127375364304, + "learning_rate": 8.289596036214609e-07, + "loss": 0.0005, + "num_input_tokens_seen": 82664936, + "step": 122645 + }, + { + "epoch": 2.9963599052109546, + "grad_norm": 0.2243671715259552, + "learning_rate": 8.288755832932877e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82668584, + "step": 122650 + }, + { + "epoch": 2.996482056042802, + "grad_norm": 0.06528321653604507, + "learning_rate": 8.287915642095766e-07, + "loss": 0.0285, + "num_input_tokens_seen": 82671528, + "step": 122655 + }, + { + "epoch": 2.9966042068746486, + "grad_norm": 0.01274157129228115, + "learning_rate": 8.287075463709396e-07, + "loss": 0.0394, + "num_input_tokens_seen": 82674600, + "step": 122660 + }, + { + "epoch": 2.996726357706496, + "grad_norm": 0.09428822249174118, + "learning_rate": 8.286235297779878e-07, + "loss": 0.0007, + "num_input_tokens_seen": 82677800, + "step": 122665 + }, + { + "epoch": 2.996848508538343, + "grad_norm": 0.008868207223713398, + "learning_rate": 8.285395144313312e-07, + "loss": 0.002, + "num_input_tokens_seen": 82681320, + "step": 122670 + }, + { + "epoch": 2.9969706593701906, + "grad_norm": 0.005609673913568258, + "learning_rate": 8.284555003315819e-07, + "loss": 0.0726, + "num_input_tokens_seen": 82684072, + "step": 122675 + }, + { + "epoch": 2.9970928102020373, + "grad_norm": 0.2403886914253235, + "learning_rate": 8.283714874793497e-07, + "loss": 0.0866, + "num_input_tokens_seen": 82687528, + "step": 122680 + }, + { + "epoch": 2.9972149610338845, + "grad_norm": 224.93780517578125, + "learning_rate": 8.282874758752464e-07, + "loss": 0.0152, + "num_input_tokens_seen": 82691048, + "step": 122685 + }, + { + "epoch": 2.9973371118657317, + "grad_norm": 0.33052942156791687, + "learning_rate": 8.28203465519883e-07, + "loss": 0.0684, + "num_input_tokens_seen": 82694120, + "step": 122690 + }, + { + "epoch": 2.997459262697579, + "grad_norm": 0.11541890352964401, + "learning_rate": 8.2811945641387e-07, + "loss": 0.0517, + "num_input_tokens_seen": 82697512, + "step": 122695 + }, + { + "epoch": 2.997581413529426, + "grad_norm": 0.03359320014715195, + "learning_rate": 8.280354485578188e-07, + "loss": 0.0149, + "num_input_tokens_seen": 82700648, + "step": 122700 + }, + { + "epoch": 2.9977035643612733, + "grad_norm": 0.10430304706096649, + "learning_rate": 8.2795144195234e-07, + "loss": 0.0664, + "num_input_tokens_seen": 82704232, + "step": 122705 + }, + { + "epoch": 2.9978257151931205, + "grad_norm": 0.04724357649683952, + "learning_rate": 8.278674365980445e-07, + "loss": 0.0004, + "num_input_tokens_seen": 82707688, + "step": 122710 + }, + { + "epoch": 2.9979478660249677, + "grad_norm": 0.059274137020111084, + "learning_rate": 8.277834324955433e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82710760, + "step": 122715 + }, + { + "epoch": 2.998070016856815, + "grad_norm": 0.0280942153185606, + "learning_rate": 8.276994296454471e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82714024, + "step": 122720 + }, + { + "epoch": 2.998192167688662, + "grad_norm": 0.48792269825935364, + "learning_rate": 8.276154280483674e-07, + "loss": 0.0016, + "num_input_tokens_seen": 82717352, + "step": 122725 + }, + { + "epoch": 2.9983143185205092, + "grad_norm": 0.20319870114326477, + "learning_rate": 8.275314277049144e-07, + "loss": 0.038, + "num_input_tokens_seen": 82720616, + "step": 122730 + }, + { + "epoch": 2.9984364693523564, + "grad_norm": 0.09327713400125504, + "learning_rate": 8.274474286156994e-07, + "loss": 0.0933, + "num_input_tokens_seen": 82723688, + "step": 122735 + }, + { + "epoch": 2.9985586201842036, + "grad_norm": 0.06357068568468094, + "learning_rate": 8.273634307813329e-07, + "loss": 0.0555, + "num_input_tokens_seen": 82726888, + "step": 122740 + }, + { + "epoch": 2.9986807710160504, + "grad_norm": 0.10009672492742538, + "learning_rate": 8.272794342024263e-07, + "loss": 0.0492, + "num_input_tokens_seen": 82730216, + "step": 122745 + }, + { + "epoch": 2.998802921847898, + "grad_norm": 0.1424786001443863, + "learning_rate": 8.271954388795897e-07, + "loss": 0.0003, + "num_input_tokens_seen": 82733480, + "step": 122750 + }, + { + "epoch": 2.9989250726797447, + "grad_norm": 0.007452080957591534, + "learning_rate": 8.271114448134345e-07, + "loss": 0.0561, + "num_input_tokens_seen": 82736680, + "step": 122755 + }, + { + "epoch": 2.9990472235115924, + "grad_norm": 0.02739526517689228, + "learning_rate": 8.270274520045715e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82740072, + "step": 122760 + }, + { + "epoch": 2.999169374343439, + "grad_norm": 0.11577534675598145, + "learning_rate": 8.269434604536113e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82743272, + "step": 122765 + }, + { + "epoch": 2.9992915251752863, + "grad_norm": 0.02286659926176071, + "learning_rate": 8.268594701611651e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82746600, + "step": 122770 + }, + { + "epoch": 2.9994136760071335, + "grad_norm": 33.681095123291016, + "learning_rate": 8.267754811278429e-07, + "loss": 0.0518, + "num_input_tokens_seen": 82750248, + "step": 122775 + }, + { + "epoch": 2.9995358268389807, + "grad_norm": 0.03711327537894249, + "learning_rate": 8.266914933542559e-07, + "loss": 0.0044, + "num_input_tokens_seen": 82753768, + "step": 122780 + }, + { + "epoch": 2.999657977670828, + "grad_norm": 54.62702941894531, + "learning_rate": 8.266075068410156e-07, + "loss": 0.1181, + "num_input_tokens_seen": 82757416, + "step": 122785 + }, + { + "epoch": 2.999780128502675, + "grad_norm": 0.03556351736187935, + "learning_rate": 8.265235215887317e-07, + "loss": 0.0627, + "num_input_tokens_seen": 82760936, + "step": 122790 + }, + { + "epoch": 2.9999022793345222, + "grad_norm": 0.003964877687394619, + "learning_rate": 8.264395375980156e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82763880, + "step": 122795 + }, + { + "epoch": 3.0000244301663694, + "grad_norm": 0.03248461335897446, + "learning_rate": 8.263555548694777e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82767184, + "step": 122800 + }, + { + "epoch": 3.0001465809982166, + "grad_norm": 0.0407659187912941, + "learning_rate": 8.262715734037292e-07, + "loss": 0.0003, + "num_input_tokens_seen": 82770064, + "step": 122805 + }, + { + "epoch": 3.000219871497325, + "eval_loss": 0.19360828399658203, + "eval_runtime": 47.722, + "eval_samples_per_second": 762.437, + "eval_steps_per_second": 95.323, + "num_input_tokens_seen": 82772304, + "step": 122808 + }, + { + "epoch": 3.000268731830064, + "grad_norm": 0.08304096758365631, + "learning_rate": 8.261875932013802e-07, + "loss": 0.0303, + "num_input_tokens_seen": 82774032, + "step": 122810 + }, + { + "epoch": 3.000390882661911, + "grad_norm": 0.012755513191223145, + "learning_rate": 8.26103614263042e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82777744, + "step": 122815 + }, + { + "epoch": 3.000513033493758, + "grad_norm": 16.014375686645508, + "learning_rate": 8.260196365893252e-07, + "loss": 0.0634, + "num_input_tokens_seen": 82781072, + "step": 122820 + }, + { + "epoch": 3.0006351843256054, + "grad_norm": 0.07999828457832336, + "learning_rate": 8.2593566018084e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82784400, + "step": 122825 + }, + { + "epoch": 3.0007573351574526, + "grad_norm": 0.029006067663431168, + "learning_rate": 8.25851685038198e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82787472, + "step": 122830 + }, + { + "epoch": 3.0008794859892998, + "grad_norm": 0.030935434624552727, + "learning_rate": 8.257677111620089e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82790864, + "step": 122835 + }, + { + "epoch": 3.001001636821147, + "grad_norm": 60.06044006347656, + "learning_rate": 8.256837385528839e-07, + "loss": 0.0135, + "num_input_tokens_seen": 82794384, + "step": 122840 + }, + { + "epoch": 3.0011237876529937, + "grad_norm": 0.09562061727046967, + "learning_rate": 8.25599767211434e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82798288, + "step": 122845 + }, + { + "epoch": 3.001245938484841, + "grad_norm": 0.004097535274922848, + "learning_rate": 8.255157971382691e-07, + "loss": 0.0, + "num_input_tokens_seen": 82801232, + "step": 122850 + }, + { + "epoch": 3.001368089316688, + "grad_norm": 0.052044086158275604, + "learning_rate": 8.254318283340007e-07, + "loss": 0.0683, + "num_input_tokens_seen": 82804304, + "step": 122855 + }, + { + "epoch": 3.0014902401485353, + "grad_norm": 0.09695622324943542, + "learning_rate": 8.253478607992388e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82807568, + "step": 122860 + }, + { + "epoch": 3.0016123909803825, + "grad_norm": 0.013213934376835823, + "learning_rate": 8.252638945345942e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82810768, + "step": 122865 + }, + { + "epoch": 3.0017345418122297, + "grad_norm": 0.07854120433330536, + "learning_rate": 8.251799295406776e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82814096, + "step": 122870 + }, + { + "epoch": 3.001856692644077, + "grad_norm": 0.04113243520259857, + "learning_rate": 8.250959658180993e-07, + "loss": 0.0, + "num_input_tokens_seen": 82817872, + "step": 122875 + }, + { + "epoch": 3.001978843475924, + "grad_norm": 0.014978439547121525, + "learning_rate": 8.250120033674706e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82821456, + "step": 122880 + }, + { + "epoch": 3.002100994307771, + "grad_norm": 0.09572244435548782, + "learning_rate": 8.249280421894012e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82824720, + "step": 122885 + }, + { + "epoch": 3.0022231451396184, + "grad_norm": 0.00526698911562562, + "learning_rate": 8.248440822845028e-07, + "loss": 0.0, + "num_input_tokens_seen": 82828432, + "step": 122890 + }, + { + "epoch": 3.0023452959714656, + "grad_norm": 0.03695838525891304, + "learning_rate": 8.247601236533848e-07, + "loss": 0.001, + "num_input_tokens_seen": 82831440, + "step": 122895 + }, + { + "epoch": 3.002467446803313, + "grad_norm": 0.04748551920056343, + "learning_rate": 8.246761662966587e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82835152, + "step": 122900 + }, + { + "epoch": 3.00258959763516, + "grad_norm": 0.005985293071717024, + "learning_rate": 8.245922102149343e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82838800, + "step": 122905 + }, + { + "epoch": 3.002711748467007, + "grad_norm": 0.02861677296459675, + "learning_rate": 8.245082554088228e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82841936, + "step": 122910 + }, + { + "epoch": 3.0028338992988544, + "grad_norm": 0.03024284727871418, + "learning_rate": 8.244243018789343e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82845328, + "step": 122915 + }, + { + "epoch": 3.0029560501307015, + "grad_norm": 0.010547067038714886, + "learning_rate": 8.243403496258797e-07, + "loss": 0.0391, + "num_input_tokens_seen": 82848592, + "step": 122920 + }, + { + "epoch": 3.0030782009625487, + "grad_norm": 0.02874787710607052, + "learning_rate": 8.242563986502693e-07, + "loss": 0.0016, + "num_input_tokens_seen": 82852368, + "step": 122925 + }, + { + "epoch": 3.003200351794396, + "grad_norm": 0.0258303415030241, + "learning_rate": 8.241724489527134e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82856016, + "step": 122930 + }, + { + "epoch": 3.0033225026262427, + "grad_norm": 0.012022108770906925, + "learning_rate": 8.240885005338227e-07, + "loss": 0.0094, + "num_input_tokens_seen": 82859344, + "step": 122935 + }, + { + "epoch": 3.00344465345809, + "grad_norm": 0.28652423620224, + "learning_rate": 8.240045533942081e-07, + "loss": 0.0081, + "num_input_tokens_seen": 82862288, + "step": 122940 + }, + { + "epoch": 3.003566804289937, + "grad_norm": 0.0074856579303741455, + "learning_rate": 8.239206075344793e-07, + "loss": 0.0, + "num_input_tokens_seen": 82865296, + "step": 122945 + }, + { + "epoch": 3.0036889551217842, + "grad_norm": 0.003348552156239748, + "learning_rate": 8.238366629552478e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82868880, + "step": 122950 + }, + { + "epoch": 3.0038111059536314, + "grad_norm": 0.008183478377759457, + "learning_rate": 8.237527196571229e-07, + "loss": 0.0431, + "num_input_tokens_seen": 82872144, + "step": 122955 + }, + { + "epoch": 3.0039332567854786, + "grad_norm": 0.15718647837638855, + "learning_rate": 8.23668777640716e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82875216, + "step": 122960 + }, + { + "epoch": 3.004055407617326, + "grad_norm": 236.15402221679688, + "learning_rate": 8.23584836906637e-07, + "loss": 0.0294, + "num_input_tokens_seen": 82878736, + "step": 122965 + }, + { + "epoch": 3.004177558449173, + "grad_norm": 0.007492092438042164, + "learning_rate": 8.235008974554964e-07, + "loss": 0.0, + "num_input_tokens_seen": 82882384, + "step": 122970 + }, + { + "epoch": 3.00429970928102, + "grad_norm": 0.00372646888718009, + "learning_rate": 8.234169592879053e-07, + "loss": 0.0, + "num_input_tokens_seen": 82885648, + "step": 122975 + }, + { + "epoch": 3.0044218601128674, + "grad_norm": 0.006116523407399654, + "learning_rate": 8.233330224044728e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82889104, + "step": 122980 + }, + { + "epoch": 3.0045440109447146, + "grad_norm": 0.0030098408460617065, + "learning_rate": 8.232490868058106e-07, + "loss": 0.0, + "num_input_tokens_seen": 82892688, + "step": 122985 + }, + { + "epoch": 3.0046661617765618, + "grad_norm": 0.1593664586544037, + "learning_rate": 8.231651524925283e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82895888, + "step": 122990 + }, + { + "epoch": 3.004788312608409, + "grad_norm": 0.022927800193428993, + "learning_rate": 8.230812194652369e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82899024, + "step": 122995 + }, + { + "epoch": 3.004910463440256, + "grad_norm": 0.007119093555957079, + "learning_rate": 8.229972877245461e-07, + "loss": 0.0, + "num_input_tokens_seen": 82902352, + "step": 123000 + }, + { + "epoch": 3.0050326142721033, + "grad_norm": 0.002705874852836132, + "learning_rate": 8.229133572710665e-07, + "loss": 0.0, + "num_input_tokens_seen": 82905424, + "step": 123005 + }, + { + "epoch": 3.0051547651039505, + "grad_norm": 0.041886989027261734, + "learning_rate": 8.228294281054091e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82908432, + "step": 123010 + }, + { + "epoch": 3.0052769159357977, + "grad_norm": 0.10048436373472214, + "learning_rate": 8.227455002281835e-07, + "loss": 0.0002, + "num_input_tokens_seen": 82911888, + "step": 123015 + }, + { + "epoch": 3.005399066767645, + "grad_norm": 0.006059782113879919, + "learning_rate": 8.226615736400004e-07, + "loss": 0.0, + "num_input_tokens_seen": 82915152, + "step": 123020 + }, + { + "epoch": 3.0055212175994916, + "grad_norm": 0.03320590406656265, + "learning_rate": 8.225776483414699e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82918416, + "step": 123025 + }, + { + "epoch": 3.005643368431339, + "grad_norm": 0.0008360522333532572, + "learning_rate": 8.224937243332024e-07, + "loss": 0.0, + "num_input_tokens_seen": 82922128, + "step": 123030 + }, + { + "epoch": 3.005765519263186, + "grad_norm": 0.0301240012049675, + "learning_rate": 8.224098016158087e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82925456, + "step": 123035 + }, + { + "epoch": 3.005887670095033, + "grad_norm": 0.00424900371581316, + "learning_rate": 8.223258801898981e-07, + "loss": 0.0476, + "num_input_tokens_seen": 82928528, + "step": 123040 + }, + { + "epoch": 3.0060098209268804, + "grad_norm": 0.012241334654390812, + "learning_rate": 8.22241960056082e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82932112, + "step": 123045 + }, + { + "epoch": 3.0061319717587276, + "grad_norm": 0.006143426522612572, + "learning_rate": 8.221580412149697e-07, + "loss": 0.0307, + "num_input_tokens_seen": 82935184, + "step": 123050 + }, + { + "epoch": 3.006254122590575, + "grad_norm": 0.009116101078689098, + "learning_rate": 8.220741236671726e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82938704, + "step": 123055 + }, + { + "epoch": 3.006376273422422, + "grad_norm": 0.02133636362850666, + "learning_rate": 8.219902074132996e-07, + "loss": 0.0, + "num_input_tokens_seen": 82941840, + "step": 123060 + }, + { + "epoch": 3.006498424254269, + "grad_norm": 0.26472222805023193, + "learning_rate": 8.219062924539621e-07, + "loss": 0.0003, + "num_input_tokens_seen": 82944912, + "step": 123065 + }, + { + "epoch": 3.0066205750861164, + "grad_norm": 0.041839998215436935, + "learning_rate": 8.218223787897699e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82947600, + "step": 123070 + }, + { + "epoch": 3.0067427259179635, + "grad_norm": 0.02455715648829937, + "learning_rate": 8.217384664213332e-07, + "loss": 0.0, + "num_input_tokens_seen": 82950928, + "step": 123075 + }, + { + "epoch": 3.0068648767498107, + "grad_norm": 0.25165486335754395, + "learning_rate": 8.216545553492626e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82954256, + "step": 123080 + }, + { + "epoch": 3.006987027581658, + "grad_norm": 0.0012642904184758663, + "learning_rate": 8.215706455741677e-07, + "loss": 0.0, + "num_input_tokens_seen": 82957904, + "step": 123085 + }, + { + "epoch": 3.007109178413505, + "grad_norm": 0.0021325668785721064, + "learning_rate": 8.214867370966589e-07, + "loss": 0.0, + "num_input_tokens_seen": 82961616, + "step": 123090 + }, + { + "epoch": 3.0072313292453523, + "grad_norm": 0.013873937539756298, + "learning_rate": 8.214028299173471e-07, + "loss": 0.0, + "num_input_tokens_seen": 82966032, + "step": 123095 + }, + { + "epoch": 3.0073534800771995, + "grad_norm": 0.007746911607682705, + "learning_rate": 8.213189240368416e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82969424, + "step": 123100 + }, + { + "epoch": 3.0074756309090467, + "grad_norm": 27.39247703552246, + "learning_rate": 8.212350194557532e-07, + "loss": 0.0028, + "num_input_tokens_seen": 82973264, + "step": 123105 + }, + { + "epoch": 3.007597781740894, + "grad_norm": 0.000948448374401778, + "learning_rate": 8.211511161746914e-07, + "loss": 0.0, + "num_input_tokens_seen": 82976592, + "step": 123110 + }, + { + "epoch": 3.0077199325727406, + "grad_norm": 0.16879381239414215, + "learning_rate": 8.210672141942674e-07, + "loss": 0.0028, + "num_input_tokens_seen": 82979856, + "step": 123115 + }, + { + "epoch": 3.007842083404588, + "grad_norm": 0.0199663657695055, + "learning_rate": 8.209833135150901e-07, + "loss": 0.1153, + "num_input_tokens_seen": 82983184, + "step": 123120 + }, + { + "epoch": 3.007964234236435, + "grad_norm": 0.0003089674864895642, + "learning_rate": 8.208994141377706e-07, + "loss": 0.0, + "num_input_tokens_seen": 82986256, + "step": 123125 + }, + { + "epoch": 3.008086385068282, + "grad_norm": 0.013704835437238216, + "learning_rate": 8.20815516062919e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82990032, + "step": 123130 + }, + { + "epoch": 3.0082085359001294, + "grad_norm": 0.0007210209732875228, + "learning_rate": 8.207316192911447e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82993936, + "step": 123135 + }, + { + "epoch": 3.0083306867319766, + "grad_norm": 0.00036419389653019607, + "learning_rate": 8.206477238230587e-07, + "loss": 0.0001, + "num_input_tokens_seen": 82997072, + "step": 123140 + }, + { + "epoch": 3.0084528375638238, + "grad_norm": 0.001539978664368391, + "learning_rate": 8.205638296592703e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83000528, + "step": 123145 + }, + { + "epoch": 3.008574988395671, + "grad_norm": 0.1176891177892685, + "learning_rate": 8.204799368003903e-07, + "loss": 0.0002, + "num_input_tokens_seen": 83003728, + "step": 123150 + }, + { + "epoch": 3.008697139227518, + "grad_norm": 0.004794170614331961, + "learning_rate": 8.203960452470282e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83006928, + "step": 123155 + }, + { + "epoch": 3.0088192900593653, + "grad_norm": 0.0019680424593389034, + "learning_rate": 8.203121549997942e-07, + "loss": 0.0764, + "num_input_tokens_seen": 83010064, + "step": 123160 + }, + { + "epoch": 3.0089414408912125, + "grad_norm": 0.005374094936996698, + "learning_rate": 8.202282660592992e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83013520, + "step": 123165 + }, + { + "epoch": 3.0090635917230597, + "grad_norm": 206.52078247070312, + "learning_rate": 8.201443784261522e-07, + "loss": 0.0589, + "num_input_tokens_seen": 83016912, + "step": 123170 + }, + { + "epoch": 3.009185742554907, + "grad_norm": 0.041995588690042496, + "learning_rate": 8.200604921009637e-07, + "loss": 0.0002, + "num_input_tokens_seen": 83020368, + "step": 123175 + }, + { + "epoch": 3.009307893386754, + "grad_norm": 0.002659060060977936, + "learning_rate": 8.199766070843437e-07, + "loss": 0.0, + "num_input_tokens_seen": 83023760, + "step": 123180 + }, + { + "epoch": 3.0094300442186013, + "grad_norm": 0.0031865271739661694, + "learning_rate": 8.198927233769021e-07, + "loss": 0.0, + "num_input_tokens_seen": 83027088, + "step": 123185 + }, + { + "epoch": 3.0095521950504485, + "grad_norm": 0.006657351739704609, + "learning_rate": 8.198088409792495e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83031056, + "step": 123190 + }, + { + "epoch": 3.0096743458822957, + "grad_norm": 0.021974647417664528, + "learning_rate": 8.197249598919949e-07, + "loss": 0.0, + "num_input_tokens_seen": 83034192, + "step": 123195 + }, + { + "epoch": 3.009796496714143, + "grad_norm": 0.2845902144908905, + "learning_rate": 8.196410801157494e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83037904, + "step": 123200 + }, + { + "epoch": 3.0099186475459896, + "grad_norm": 0.0019516038009896874, + "learning_rate": 8.19557201651122e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83042000, + "step": 123205 + }, + { + "epoch": 3.010040798377837, + "grad_norm": 0.002426578663289547, + "learning_rate": 8.194733244987235e-07, + "loss": 0.0, + "num_input_tokens_seen": 83045456, + "step": 123210 + }, + { + "epoch": 3.010162949209684, + "grad_norm": 0.002065720036625862, + "learning_rate": 8.193894486591633e-07, + "loss": 0.0, + "num_input_tokens_seen": 83048528, + "step": 123215 + }, + { + "epoch": 3.010285100041531, + "grad_norm": 0.0328923799097538, + "learning_rate": 8.193055741330517e-07, + "loss": 0.0, + "num_input_tokens_seen": 83051472, + "step": 123220 + }, + { + "epoch": 3.0104072508733783, + "grad_norm": 0.016800448298454285, + "learning_rate": 8.192217009209986e-07, + "loss": 0.0637, + "num_input_tokens_seen": 83054672, + "step": 123225 + }, + { + "epoch": 3.0105294017052255, + "grad_norm": 0.0432097427546978, + "learning_rate": 8.191378290236139e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83058192, + "step": 123230 + }, + { + "epoch": 3.0106515525370727, + "grad_norm": 0.001382253016345203, + "learning_rate": 8.19053958441508e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83061392, + "step": 123235 + }, + { + "epoch": 3.01077370336892, + "grad_norm": 110.7890396118164, + "learning_rate": 8.189700891752897e-07, + "loss": 0.0532, + "num_input_tokens_seen": 83064592, + "step": 123240 + }, + { + "epoch": 3.010895854200767, + "grad_norm": 0.01439904235303402, + "learning_rate": 8.188862212255696e-07, + "loss": 0.0002, + "num_input_tokens_seen": 83067728, + "step": 123245 + }, + { + "epoch": 3.0110180050326143, + "grad_norm": 0.012585053220391273, + "learning_rate": 8.188023545929581e-07, + "loss": 0.0, + "num_input_tokens_seen": 83070736, + "step": 123250 + }, + { + "epoch": 3.0111401558644615, + "grad_norm": 0.02939104288816452, + "learning_rate": 8.187184892780641e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83074384, + "step": 123255 + }, + { + "epoch": 3.0112623066963087, + "grad_norm": 0.0023191035725176334, + "learning_rate": 8.186346252814986e-07, + "loss": 0.0, + "num_input_tokens_seen": 83077456, + "step": 123260 + }, + { + "epoch": 3.011384457528156, + "grad_norm": 0.001554151182062924, + "learning_rate": 8.185507626038703e-07, + "loss": 0.0, + "num_input_tokens_seen": 83080976, + "step": 123265 + }, + { + "epoch": 3.011506608360003, + "grad_norm": 0.0005073735374026, + "learning_rate": 8.184669012457902e-07, + "loss": 0.0, + "num_input_tokens_seen": 83084944, + "step": 123270 + }, + { + "epoch": 3.0116287591918502, + "grad_norm": 0.013489312492311, + "learning_rate": 8.183830412078671e-07, + "loss": 0.0002, + "num_input_tokens_seen": 83088144, + "step": 123275 + }, + { + "epoch": 3.0117509100236974, + "grad_norm": 0.0010727399494498968, + "learning_rate": 8.182991824907118e-07, + "loss": 0.0, + "num_input_tokens_seen": 83091472, + "step": 123280 + }, + { + "epoch": 3.0118730608555446, + "grad_norm": 0.007046072278171778, + "learning_rate": 8.182153250949336e-07, + "loss": 0.0, + "num_input_tokens_seen": 83095056, + "step": 123285 + }, + { + "epoch": 3.0119952116873914, + "grad_norm": 0.0008582680602557957, + "learning_rate": 8.181314690211422e-07, + "loss": 0.0, + "num_input_tokens_seen": 83098384, + "step": 123290 + }, + { + "epoch": 3.0121173625192386, + "grad_norm": 0.0054124463349580765, + "learning_rate": 8.180476142699482e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83101648, + "step": 123295 + }, + { + "epoch": 3.0122395133510858, + "grad_norm": 0.20177757740020752, + "learning_rate": 8.179637608419603e-07, + "loss": 0.0002, + "num_input_tokens_seen": 83105360, + "step": 123300 + }, + { + "epoch": 3.012361664182933, + "grad_norm": 0.07805618643760681, + "learning_rate": 8.178799087377893e-07, + "loss": 0.0017, + "num_input_tokens_seen": 83108368, + "step": 123305 + }, + { + "epoch": 3.01248381501478, + "grad_norm": 0.07807561010122299, + "learning_rate": 8.177960579580443e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83111440, + "step": 123310 + }, + { + "epoch": 3.0126059658466273, + "grad_norm": 0.001814626739360392, + "learning_rate": 8.177122085033352e-07, + "loss": 0.0, + "num_input_tokens_seen": 83114896, + "step": 123315 + }, + { + "epoch": 3.0127281166784745, + "grad_norm": 0.0006838240078650415, + "learning_rate": 8.176283603742726e-07, + "loss": 0.0179, + "num_input_tokens_seen": 83117968, + "step": 123320 + }, + { + "epoch": 3.0128502675103217, + "grad_norm": 0.002631385810673237, + "learning_rate": 8.175445135714653e-07, + "loss": 0.0, + "num_input_tokens_seen": 83121104, + "step": 123325 + }, + { + "epoch": 3.012972418342169, + "grad_norm": 0.005652591120451689, + "learning_rate": 8.174606680955232e-07, + "loss": 0.0, + "num_input_tokens_seen": 83124432, + "step": 123330 + }, + { + "epoch": 3.013094569174016, + "grad_norm": 412.9010925292969, + "learning_rate": 8.173768239470564e-07, + "loss": 0.0534, + "num_input_tokens_seen": 83127504, + "step": 123335 + }, + { + "epoch": 3.0132167200058633, + "grad_norm": 0.0015622521750628948, + "learning_rate": 8.172929811266744e-07, + "loss": 0.0, + "num_input_tokens_seen": 83131280, + "step": 123340 + }, + { + "epoch": 3.0133388708377105, + "grad_norm": 0.001484617474488914, + "learning_rate": 8.172091396349871e-07, + "loss": 0.0, + "num_input_tokens_seen": 83134544, + "step": 123345 + }, + { + "epoch": 3.0134610216695576, + "grad_norm": 0.0010365790221840143, + "learning_rate": 8.171252994726039e-07, + "loss": 0.0, + "num_input_tokens_seen": 83137616, + "step": 123350 + }, + { + "epoch": 3.013583172501405, + "grad_norm": 0.013608659617602825, + "learning_rate": 8.17041460640135e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83141008, + "step": 123355 + }, + { + "epoch": 3.013705323333252, + "grad_norm": 0.03848070278763771, + "learning_rate": 8.169576231381894e-07, + "loss": 0.0, + "num_input_tokens_seen": 83144912, + "step": 123360 + }, + { + "epoch": 3.013827474165099, + "grad_norm": 0.034808073192834854, + "learning_rate": 8.168737869673776e-07, + "loss": 0.0, + "num_input_tokens_seen": 83148560, + "step": 123365 + }, + { + "epoch": 3.0139496249969464, + "grad_norm": 0.1787676066160202, + "learning_rate": 8.167899521283086e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83151824, + "step": 123370 + }, + { + "epoch": 3.0140717758287936, + "grad_norm": 0.08518391847610474, + "learning_rate": 8.167061186215925e-07, + "loss": 0.0002, + "num_input_tokens_seen": 83155472, + "step": 123375 + }, + { + "epoch": 3.0141939266606403, + "grad_norm": 0.0024606117513030767, + "learning_rate": 8.166222864478387e-07, + "loss": 0.0, + "num_input_tokens_seen": 83158864, + "step": 123380 + }, + { + "epoch": 3.0143160774924875, + "grad_norm": 0.0019267149036750197, + "learning_rate": 8.16538455607657e-07, + "loss": 0.0, + "num_input_tokens_seen": 83161936, + "step": 123385 + }, + { + "epoch": 3.0144382283243347, + "grad_norm": 35.64485549926758, + "learning_rate": 8.164546261016572e-07, + "loss": 0.0565, + "num_input_tokens_seen": 83165008, + "step": 123390 + }, + { + "epoch": 3.014560379156182, + "grad_norm": 24.512760162353516, + "learning_rate": 8.163707979304483e-07, + "loss": 0.0688, + "num_input_tokens_seen": 83168976, + "step": 123395 + }, + { + "epoch": 3.014682529988029, + "grad_norm": 0.02174725942313671, + "learning_rate": 8.162869710946404e-07, + "loss": 0.0, + "num_input_tokens_seen": 83172368, + "step": 123400 + }, + { + "epoch": 3.0148046808198763, + "grad_norm": 0.002265357645228505, + "learning_rate": 8.162031455948435e-07, + "loss": 0.0, + "num_input_tokens_seen": 83175824, + "step": 123405 + }, + { + "epoch": 3.0149268316517235, + "grad_norm": 0.0016550914151594043, + "learning_rate": 8.161193214316662e-07, + "loss": 0.0, + "num_input_tokens_seen": 83178896, + "step": 123410 + }, + { + "epoch": 3.0150489824835707, + "grad_norm": 0.004780837334692478, + "learning_rate": 8.160354986057192e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83181968, + "step": 123415 + }, + { + "epoch": 3.015171133315418, + "grad_norm": 0.002558564767241478, + "learning_rate": 8.15951677117611e-07, + "loss": 0.0, + "num_input_tokens_seen": 83185680, + "step": 123420 + }, + { + "epoch": 3.015293284147265, + "grad_norm": 0.00885737594217062, + "learning_rate": 8.158678569679523e-07, + "loss": 0.0, + "num_input_tokens_seen": 83188944, + "step": 123425 + }, + { + "epoch": 3.0154154349791122, + "grad_norm": 0.00036788417492061853, + "learning_rate": 8.157840381573515e-07, + "loss": 0.0, + "num_input_tokens_seen": 83192080, + "step": 123430 + }, + { + "epoch": 3.0155375858109594, + "grad_norm": 0.0042150323279201984, + "learning_rate": 8.15700220686419e-07, + "loss": 0.0, + "num_input_tokens_seen": 83195152, + "step": 123435 + }, + { + "epoch": 3.0156597366428066, + "grad_norm": 0.04398500174283981, + "learning_rate": 8.156164045557643e-07, + "loss": 0.0, + "num_input_tokens_seen": 83198544, + "step": 123440 + }, + { + "epoch": 3.015781887474654, + "grad_norm": 0.002602602820843458, + "learning_rate": 8.155325897659963e-07, + "loss": 0.0, + "num_input_tokens_seen": 83202192, + "step": 123445 + }, + { + "epoch": 3.015904038306501, + "grad_norm": 0.002433629473671317, + "learning_rate": 8.154487763177252e-07, + "loss": 0.0003, + "num_input_tokens_seen": 83205648, + "step": 123450 + }, + { + "epoch": 3.016026189138348, + "grad_norm": 0.0015204442897811532, + "learning_rate": 8.1536496421156e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83209296, + "step": 123455 + }, + { + "epoch": 3.0161483399701954, + "grad_norm": 0.05968998745083809, + "learning_rate": 8.152811534481108e-07, + "loss": 0.0551, + "num_input_tokens_seen": 83212560, + "step": 123460 + }, + { + "epoch": 3.0162704908020426, + "grad_norm": 0.0054749189876019955, + "learning_rate": 8.151973440279862e-07, + "loss": 0.0, + "num_input_tokens_seen": 83216016, + "step": 123465 + }, + { + "epoch": 3.0163926416338893, + "grad_norm": 0.0608709380030632, + "learning_rate": 8.151135359517963e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83219152, + "step": 123470 + }, + { + "epoch": 3.0165147924657365, + "grad_norm": 0.13947315514087677, + "learning_rate": 8.150297292201509e-07, + "loss": 0.0183, + "num_input_tokens_seen": 83222608, + "step": 123475 + }, + { + "epoch": 3.0166369432975837, + "grad_norm": 0.0014263535849750042, + "learning_rate": 8.149459238336589e-07, + "loss": 0.0, + "num_input_tokens_seen": 83226000, + "step": 123480 + }, + { + "epoch": 3.016759094129431, + "grad_norm": 0.27640414237976074, + "learning_rate": 8.148621197929298e-07, + "loss": 0.0002, + "num_input_tokens_seen": 83228880, + "step": 123485 + }, + { + "epoch": 3.016881244961278, + "grad_norm": 0.0012199397897347808, + "learning_rate": 8.147783170985734e-07, + "loss": 0.0, + "num_input_tokens_seen": 83232272, + "step": 123490 + }, + { + "epoch": 3.0170033957931253, + "grad_norm": 0.026773726567626, + "learning_rate": 8.146945157511984e-07, + "loss": 0.0, + "num_input_tokens_seen": 83235536, + "step": 123495 + }, + { + "epoch": 3.0171255466249725, + "grad_norm": 0.019472522661089897, + "learning_rate": 8.146107157514152e-07, + "loss": 0.0, + "num_input_tokens_seen": 83239056, + "step": 123500 + }, + { + "epoch": 3.0172476974568196, + "grad_norm": 0.017728516831994057, + "learning_rate": 8.145269170998326e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83242320, + "step": 123505 + }, + { + "epoch": 3.017369848288667, + "grad_norm": 0.0009785944130271673, + "learning_rate": 8.144431197970602e-07, + "loss": 0.0, + "num_input_tokens_seen": 83245648, + "step": 123510 + }, + { + "epoch": 3.017491999120514, + "grad_norm": 0.008329205214977264, + "learning_rate": 8.143593238437072e-07, + "loss": 0.0, + "num_input_tokens_seen": 83249744, + "step": 123515 + }, + { + "epoch": 3.017614149952361, + "grad_norm": 0.0062654935754835606, + "learning_rate": 8.142755292403833e-07, + "loss": 0.0, + "num_input_tokens_seen": 83253072, + "step": 123520 + }, + { + "epoch": 3.0177363007842084, + "grad_norm": 0.002115017967298627, + "learning_rate": 8.141917359876975e-07, + "loss": 0.0, + "num_input_tokens_seen": 83256848, + "step": 123525 + }, + { + "epoch": 3.0178584516160556, + "grad_norm": 0.0019589269068092108, + "learning_rate": 8.141079440862595e-07, + "loss": 0.0, + "num_input_tokens_seen": 83260496, + "step": 123530 + }, + { + "epoch": 3.017980602447903, + "grad_norm": 0.003976363688707352, + "learning_rate": 8.140241535366785e-07, + "loss": 0.0, + "num_input_tokens_seen": 83263824, + "step": 123535 + }, + { + "epoch": 3.01810275327975, + "grad_norm": 0.0012698272475972772, + "learning_rate": 8.139403643395639e-07, + "loss": 0.0004, + "num_input_tokens_seen": 83267152, + "step": 123540 + }, + { + "epoch": 3.018224904111597, + "grad_norm": 0.00013846179354004562, + "learning_rate": 8.138565764955252e-07, + "loss": 0.0, + "num_input_tokens_seen": 83270544, + "step": 123545 + }, + { + "epoch": 3.0183470549434444, + "grad_norm": 0.002126675797626376, + "learning_rate": 8.137727900051712e-07, + "loss": 0.0, + "num_input_tokens_seen": 83273424, + "step": 123550 + }, + { + "epoch": 3.0184692057752915, + "grad_norm": 0.014070043340325356, + "learning_rate": 8.136890048691116e-07, + "loss": 0.0, + "num_input_tokens_seen": 83276944, + "step": 123555 + }, + { + "epoch": 3.0185913566071383, + "grad_norm": 0.3048165440559387, + "learning_rate": 8.136052210879559e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83280144, + "step": 123560 + }, + { + "epoch": 3.0187135074389855, + "grad_norm": 0.00013424389180727303, + "learning_rate": 8.135214386623128e-07, + "loss": 0.0, + "num_input_tokens_seen": 83283728, + "step": 123565 + }, + { + "epoch": 3.0188356582708327, + "grad_norm": 0.0036720738280564547, + "learning_rate": 8.134376575927924e-07, + "loss": 0.0667, + "num_input_tokens_seen": 83286928, + "step": 123570 + }, + { + "epoch": 3.01895780910268, + "grad_norm": 0.001070171594619751, + "learning_rate": 8.133538778800032e-07, + "loss": 0.0, + "num_input_tokens_seen": 83290576, + "step": 123575 + }, + { + "epoch": 3.019079959934527, + "grad_norm": 0.001411976758390665, + "learning_rate": 8.132700995245552e-07, + "loss": 0.0, + "num_input_tokens_seen": 83293776, + "step": 123580 + }, + { + "epoch": 3.0192021107663742, + "grad_norm": 0.0027057963889092207, + "learning_rate": 8.131863225270568e-07, + "loss": 0.0, + "num_input_tokens_seen": 83297040, + "step": 123585 + }, + { + "epoch": 3.0193242615982214, + "grad_norm": 0.006092796102166176, + "learning_rate": 8.131025468881179e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83300816, + "step": 123590 + }, + { + "epoch": 3.0194464124300686, + "grad_norm": 0.10362302511930466, + "learning_rate": 8.130187726083477e-07, + "loss": 0.0533, + "num_input_tokens_seen": 83304336, + "step": 123595 + }, + { + "epoch": 3.019568563261916, + "grad_norm": 0.005874832160770893, + "learning_rate": 8.12934999688355e-07, + "loss": 0.0, + "num_input_tokens_seen": 83307600, + "step": 123600 + }, + { + "epoch": 3.019690714093763, + "grad_norm": 13.92029094696045, + "learning_rate": 8.128512281287496e-07, + "loss": 0.0414, + "num_input_tokens_seen": 83311184, + "step": 123605 + }, + { + "epoch": 3.01981286492561, + "grad_norm": 0.002932375529780984, + "learning_rate": 8.1276745793014e-07, + "loss": 0.0006, + "num_input_tokens_seen": 83314384, + "step": 123610 + }, + { + "epoch": 3.0199350157574574, + "grad_norm": 0.3603280484676361, + "learning_rate": 8.126836890931363e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83317328, + "step": 123615 + }, + { + "epoch": 3.0200571665893046, + "grad_norm": 0.04042335972189903, + "learning_rate": 8.125999216183466e-07, + "loss": 0.0, + "num_input_tokens_seen": 83321040, + "step": 123620 + }, + { + "epoch": 3.0201793174211518, + "grad_norm": 0.0001353430125163868, + "learning_rate": 8.125161555063809e-07, + "loss": 0.0, + "num_input_tokens_seen": 83324368, + "step": 123625 + }, + { + "epoch": 3.020301468252999, + "grad_norm": 0.0132133224979043, + "learning_rate": 8.124323907578485e-07, + "loss": 0.0203, + "num_input_tokens_seen": 83328016, + "step": 123630 + }, + { + "epoch": 3.020423619084846, + "grad_norm": 0.004359879996627569, + "learning_rate": 8.12348627373358e-07, + "loss": 0.0, + "num_input_tokens_seen": 83331792, + "step": 123635 + }, + { + "epoch": 3.0205457699166933, + "grad_norm": 0.013602170161902905, + "learning_rate": 8.122648653535187e-07, + "loss": 0.0, + "num_input_tokens_seen": 83335248, + "step": 123640 + }, + { + "epoch": 3.0206679207485405, + "grad_norm": 0.004602179396897554, + "learning_rate": 8.121811046989397e-07, + "loss": 0.0739, + "num_input_tokens_seen": 83338448, + "step": 123645 + }, + { + "epoch": 3.0207900715803873, + "grad_norm": 0.004291081335395575, + "learning_rate": 8.120973454102303e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83341712, + "step": 123650 + }, + { + "epoch": 3.0209122224122344, + "grad_norm": 0.0004687369801104069, + "learning_rate": 8.120135874879998e-07, + "loss": 0.0501, + "num_input_tokens_seen": 83345296, + "step": 123655 + }, + { + "epoch": 3.0210343732440816, + "grad_norm": 0.003266794141381979, + "learning_rate": 8.119298309328565e-07, + "loss": 0.0, + "num_input_tokens_seen": 83348496, + "step": 123660 + }, + { + "epoch": 3.021156524075929, + "grad_norm": 0.0014626506017521024, + "learning_rate": 8.118460757454107e-07, + "loss": 0.0, + "num_input_tokens_seen": 83351824, + "step": 123665 + }, + { + "epoch": 3.021278674907776, + "grad_norm": 0.06713787466287613, + "learning_rate": 8.117623219262702e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83355216, + "step": 123670 + }, + { + "epoch": 3.021400825739623, + "grad_norm": 0.0004317023267503828, + "learning_rate": 8.116785694760453e-07, + "loss": 0.0, + "num_input_tokens_seen": 83358928, + "step": 123675 + }, + { + "epoch": 3.0215229765714704, + "grad_norm": 0.0015613315626978874, + "learning_rate": 8.115948183953441e-07, + "loss": 0.0, + "num_input_tokens_seen": 83362128, + "step": 123680 + }, + { + "epoch": 3.0216451274033176, + "grad_norm": 20.940481185913086, + "learning_rate": 8.115110686847762e-07, + "loss": 0.0907, + "num_input_tokens_seen": 83365328, + "step": 123685 + }, + { + "epoch": 3.0217672782351648, + "grad_norm": 0.0010938859777525067, + "learning_rate": 8.114273203449504e-07, + "loss": 0.0, + "num_input_tokens_seen": 83368592, + "step": 123690 + }, + { + "epoch": 3.021889429067012, + "grad_norm": 0.0003897510759998113, + "learning_rate": 8.11343573376476e-07, + "loss": 0.0377, + "num_input_tokens_seen": 83371920, + "step": 123695 + }, + { + "epoch": 3.022011579898859, + "grad_norm": 0.018642354756593704, + "learning_rate": 8.112598277799621e-07, + "loss": 0.0, + "num_input_tokens_seen": 83375312, + "step": 123700 + }, + { + "epoch": 3.0221337307307063, + "grad_norm": 0.005920675583183765, + "learning_rate": 8.111760835560171e-07, + "loss": 0.0, + "num_input_tokens_seen": 83378896, + "step": 123705 + }, + { + "epoch": 3.0222558815625535, + "grad_norm": 0.009021357633173466, + "learning_rate": 8.110923407052507e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83381904, + "step": 123710 + }, + { + "epoch": 3.0223780323944007, + "grad_norm": 0.00507028354331851, + "learning_rate": 8.110085992282713e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83385360, + "step": 123715 + }, + { + "epoch": 3.022500183226248, + "grad_norm": 0.6077379584312439, + "learning_rate": 8.10924859125688e-07, + "loss": 0.0002, + "num_input_tokens_seen": 83389264, + "step": 123720 + }, + { + "epoch": 3.022622334058095, + "grad_norm": 10.27883529663086, + "learning_rate": 8.108411203981106e-07, + "loss": 0.0444, + "num_input_tokens_seen": 83392784, + "step": 123725 + }, + { + "epoch": 3.0227444848899423, + "grad_norm": 0.01629662699997425, + "learning_rate": 8.107573830461469e-07, + "loss": 0.0, + "num_input_tokens_seen": 83396240, + "step": 123730 + }, + { + "epoch": 3.0228666357217895, + "grad_norm": 0.06298413127660751, + "learning_rate": 8.10673647070407e-07, + "loss": 0.0, + "num_input_tokens_seen": 83399696, + "step": 123735 + }, + { + "epoch": 3.0229887865536362, + "grad_norm": 268.79547119140625, + "learning_rate": 8.105899124714987e-07, + "loss": 0.0022, + "num_input_tokens_seen": 83403408, + "step": 123740 + }, + { + "epoch": 3.0231109373854834, + "grad_norm": 0.002456031972542405, + "learning_rate": 8.105061792500317e-07, + "loss": 0.0, + "num_input_tokens_seen": 83406608, + "step": 123745 + }, + { + "epoch": 3.0232330882173306, + "grad_norm": 0.028353121131658554, + "learning_rate": 8.10422447406615e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83410128, + "step": 123750 + }, + { + "epoch": 3.023355239049178, + "grad_norm": 0.052956756204366684, + "learning_rate": 8.10338716941857e-07, + "loss": 0.0, + "num_input_tokens_seen": 83413840, + "step": 123755 + }, + { + "epoch": 3.023477389881025, + "grad_norm": 0.0012758343946188688, + "learning_rate": 8.10254987856367e-07, + "loss": 0.0, + "num_input_tokens_seen": 83416848, + "step": 123760 + }, + { + "epoch": 3.023599540712872, + "grad_norm": 0.0033717567566782236, + "learning_rate": 8.101712601507535e-07, + "loss": 0.0688, + "num_input_tokens_seen": 83420368, + "step": 123765 + }, + { + "epoch": 3.0237216915447194, + "grad_norm": 0.025035880506038666, + "learning_rate": 8.10087533825626e-07, + "loss": 0.0312, + "num_input_tokens_seen": 83423632, + "step": 123770 + }, + { + "epoch": 3.0238438423765666, + "grad_norm": 0.025489607825875282, + "learning_rate": 8.100038088815925e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83426896, + "step": 123775 + }, + { + "epoch": 3.0239659932084137, + "grad_norm": 0.012176979333162308, + "learning_rate": 8.099200853192627e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83430352, + "step": 123780 + }, + { + "epoch": 3.024088144040261, + "grad_norm": 7.685334276175126e-05, + "learning_rate": 8.098363631392454e-07, + "loss": 0.0, + "num_input_tokens_seen": 83433744, + "step": 123785 + }, + { + "epoch": 3.024210294872108, + "grad_norm": 0.0011124322190880775, + "learning_rate": 8.09752642342149e-07, + "loss": 0.0, + "num_input_tokens_seen": 83437328, + "step": 123790 + }, + { + "epoch": 3.0243324457039553, + "grad_norm": 0.0012952962424606085, + "learning_rate": 8.096689229285827e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83440656, + "step": 123795 + }, + { + "epoch": 3.0244545965358025, + "grad_norm": 0.3104526698589325, + "learning_rate": 8.095852048991551e-07, + "loss": 0.0002, + "num_input_tokens_seen": 83443856, + "step": 123800 + }, + { + "epoch": 3.0245767473676497, + "grad_norm": 0.0013774075778201222, + "learning_rate": 8.095014882544749e-07, + "loss": 0.0, + "num_input_tokens_seen": 83446864, + "step": 123805 + }, + { + "epoch": 3.024698898199497, + "grad_norm": 0.0012292321771383286, + "learning_rate": 8.094177729951515e-07, + "loss": 0.0, + "num_input_tokens_seen": 83450000, + "step": 123810 + }, + { + "epoch": 3.024821049031344, + "grad_norm": 0.009159058332443237, + "learning_rate": 8.093340591217928e-07, + "loss": 0.0, + "num_input_tokens_seen": 83453712, + "step": 123815 + }, + { + "epoch": 3.0249431998631913, + "grad_norm": 0.006682706065475941, + "learning_rate": 8.092503466350086e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83457040, + "step": 123820 + }, + { + "epoch": 3.025065350695038, + "grad_norm": 0.007138276472687721, + "learning_rate": 8.091666355354069e-07, + "loss": 0.0, + "num_input_tokens_seen": 83460496, + "step": 123825 + }, + { + "epoch": 3.025187501526885, + "grad_norm": 0.0004544088733382523, + "learning_rate": 8.09082925823597e-07, + "loss": 0.0, + "num_input_tokens_seen": 83463376, + "step": 123830 + }, + { + "epoch": 3.0253096523587324, + "grad_norm": 0.04789057374000549, + "learning_rate": 8.089992175001871e-07, + "loss": 0.0659, + "num_input_tokens_seen": 83466448, + "step": 123835 + }, + { + "epoch": 3.0254318031905796, + "grad_norm": 0.007176238112151623, + "learning_rate": 8.089155105657864e-07, + "loss": 0.0, + "num_input_tokens_seen": 83469712, + "step": 123840 + }, + { + "epoch": 3.0255539540224268, + "grad_norm": 17.64391326904297, + "learning_rate": 8.088318050210036e-07, + "loss": 0.0427, + "num_input_tokens_seen": 83472784, + "step": 123845 + }, + { + "epoch": 3.025676104854274, + "grad_norm": 0.09517499804496765, + "learning_rate": 8.087481008664471e-07, + "loss": 0.0, + "num_input_tokens_seen": 83476304, + "step": 123850 + }, + { + "epoch": 3.025798255686121, + "grad_norm": 0.03949768841266632, + "learning_rate": 8.086643981027264e-07, + "loss": 0.0, + "num_input_tokens_seen": 83479824, + "step": 123855 + }, + { + "epoch": 3.0259204065179683, + "grad_norm": 0.01607385464012623, + "learning_rate": 8.085806967304491e-07, + "loss": 0.0, + "num_input_tokens_seen": 83483792, + "step": 123860 + }, + { + "epoch": 3.0260425573498155, + "grad_norm": 0.0017718808958306909, + "learning_rate": 8.084969967502248e-07, + "loss": 0.0, + "num_input_tokens_seen": 83487184, + "step": 123865 + }, + { + "epoch": 3.0261647081816627, + "grad_norm": 0.06724121421575546, + "learning_rate": 8.084132981626615e-07, + "loss": 0.0, + "num_input_tokens_seen": 83490448, + "step": 123870 + }, + { + "epoch": 3.02628685901351, + "grad_norm": 0.004997440613806248, + "learning_rate": 8.083296009683683e-07, + "loss": 0.0, + "num_input_tokens_seen": 83493840, + "step": 123875 + }, + { + "epoch": 3.026409009845357, + "grad_norm": 0.0055533465929329395, + "learning_rate": 8.08245905167954e-07, + "loss": 0.0, + "num_input_tokens_seen": 83497424, + "step": 123880 + }, + { + "epoch": 3.0265311606772043, + "grad_norm": 0.01228258665651083, + "learning_rate": 8.081622107620267e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83500688, + "step": 123885 + }, + { + "epoch": 3.0266533115090515, + "grad_norm": 0.00043016637209802866, + "learning_rate": 8.08078517751196e-07, + "loss": 0.0, + "num_input_tokens_seen": 83503696, + "step": 123890 + }, + { + "epoch": 3.0267754623408987, + "grad_norm": 0.00011228966468479484, + "learning_rate": 8.079948261360693e-07, + "loss": 0.0002, + "num_input_tokens_seen": 83507088, + "step": 123895 + }, + { + "epoch": 3.026897613172746, + "grad_norm": 0.0008924832800403237, + "learning_rate": 8.079111359172561e-07, + "loss": 0.0, + "num_input_tokens_seen": 83510224, + "step": 123900 + }, + { + "epoch": 3.027019764004593, + "grad_norm": 0.06737366318702698, + "learning_rate": 8.078274470953652e-07, + "loss": 0.0, + "num_input_tokens_seen": 83513232, + "step": 123905 + }, + { + "epoch": 3.0271419148364402, + "grad_norm": 0.00100752804428339, + "learning_rate": 8.077437596710042e-07, + "loss": 0.0, + "num_input_tokens_seen": 83516240, + "step": 123910 + }, + { + "epoch": 3.027264065668287, + "grad_norm": 40.753448486328125, + "learning_rate": 8.076600736447827e-07, + "loss": 0.068, + "num_input_tokens_seen": 83519824, + "step": 123915 + }, + { + "epoch": 3.027386216500134, + "grad_norm": 0.00984196923673153, + "learning_rate": 8.075763890173086e-07, + "loss": 0.0, + "num_input_tokens_seen": 83523408, + "step": 123920 + }, + { + "epoch": 3.0275083673319814, + "grad_norm": 0.010168238542973995, + "learning_rate": 8.074927057891911e-07, + "loss": 0.0, + "num_input_tokens_seen": 83526416, + "step": 123925 + }, + { + "epoch": 3.0276305181638286, + "grad_norm": 69.27855682373047, + "learning_rate": 8.07409023961038e-07, + "loss": 0.074, + "num_input_tokens_seen": 83529552, + "step": 123930 + }, + { + "epoch": 3.0277526689956757, + "grad_norm": 0.0016448360402137041, + "learning_rate": 8.073253435334582e-07, + "loss": 0.0, + "num_input_tokens_seen": 83532880, + "step": 123935 + }, + { + "epoch": 3.027874819827523, + "grad_norm": 0.010067627765238285, + "learning_rate": 8.072416645070607e-07, + "loss": 0.0, + "num_input_tokens_seen": 83536400, + "step": 123940 + }, + { + "epoch": 3.02799697065937, + "grad_norm": 0.0036460966803133488, + "learning_rate": 8.071579868824536e-07, + "loss": 0.0, + "num_input_tokens_seen": 83539728, + "step": 123945 + }, + { + "epoch": 3.0281191214912173, + "grad_norm": 0.0032593992073088884, + "learning_rate": 8.070743106602455e-07, + "loss": 0.0, + "num_input_tokens_seen": 83542992, + "step": 123950 + }, + { + "epoch": 3.0282412723230645, + "grad_norm": 0.0018469596980139613, + "learning_rate": 8.069906358410448e-07, + "loss": 0.0, + "num_input_tokens_seen": 83546384, + "step": 123955 + }, + { + "epoch": 3.0283634231549117, + "grad_norm": 0.004029898438602686, + "learning_rate": 8.0690696242546e-07, + "loss": 0.0, + "num_input_tokens_seen": 83549520, + "step": 123960 + }, + { + "epoch": 3.028485573986759, + "grad_norm": 0.020568979904055595, + "learning_rate": 8.068232904141002e-07, + "loss": 0.0, + "num_input_tokens_seen": 83553552, + "step": 123965 + }, + { + "epoch": 3.028607724818606, + "grad_norm": 0.0037417744752019644, + "learning_rate": 8.067396198075727e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83556816, + "step": 123970 + }, + { + "epoch": 3.0287298756504533, + "grad_norm": 0.18165583908557892, + "learning_rate": 8.066559506064873e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83559824, + "step": 123975 + }, + { + "epoch": 3.0288520264823005, + "grad_norm": 0.011169048957526684, + "learning_rate": 8.065722828114513e-07, + "loss": 0.0, + "num_input_tokens_seen": 83563088, + "step": 123980 + }, + { + "epoch": 3.0289741773141476, + "grad_norm": 0.014875008724629879, + "learning_rate": 8.064886164230742e-07, + "loss": 0.0002, + "num_input_tokens_seen": 83566736, + "step": 123985 + }, + { + "epoch": 3.029096328145995, + "grad_norm": 0.0020445336122065783, + "learning_rate": 8.064049514419635e-07, + "loss": 0.0, + "num_input_tokens_seen": 83570704, + "step": 123990 + }, + { + "epoch": 3.029218478977842, + "grad_norm": 0.01352640800178051, + "learning_rate": 8.063212878687282e-07, + "loss": 0.0, + "num_input_tokens_seen": 83574096, + "step": 123995 + }, + { + "epoch": 3.029340629809689, + "grad_norm": 0.0033647569362074137, + "learning_rate": 8.062376257039766e-07, + "loss": 0.0, + "num_input_tokens_seen": 83577296, + "step": 124000 + }, + { + "epoch": 3.029462780641536, + "grad_norm": 0.0048996442928910255, + "learning_rate": 8.061539649483171e-07, + "loss": 0.0003, + "num_input_tokens_seen": 83580560, + "step": 124005 + }, + { + "epoch": 3.029584931473383, + "grad_norm": 0.0022092899307608604, + "learning_rate": 8.060703056023583e-07, + "loss": 0.0, + "num_input_tokens_seen": 83583696, + "step": 124010 + }, + { + "epoch": 3.0297070823052303, + "grad_norm": 38.923072814941406, + "learning_rate": 8.059866476667081e-07, + "loss": 0.0359, + "num_input_tokens_seen": 83586704, + "step": 124015 + }, + { + "epoch": 3.0298292331370775, + "grad_norm": 0.006466974038630724, + "learning_rate": 8.059029911419755e-07, + "loss": 0.0002, + "num_input_tokens_seen": 83590160, + "step": 124020 + }, + { + "epoch": 3.0299513839689247, + "grad_norm": 0.017574403434991837, + "learning_rate": 8.058193360287681e-07, + "loss": 0.0, + "num_input_tokens_seen": 83593296, + "step": 124025 + }, + { + "epoch": 3.030073534800772, + "grad_norm": 0.0008997087134048343, + "learning_rate": 8.057356823276947e-07, + "loss": 0.0, + "num_input_tokens_seen": 83596944, + "step": 124030 + }, + { + "epoch": 3.030195685632619, + "grad_norm": 32.1270637512207, + "learning_rate": 8.056520300393642e-07, + "loss": 0.0513, + "num_input_tokens_seen": 83600144, + "step": 124035 + }, + { + "epoch": 3.0303178364644663, + "grad_norm": 0.007690535392612219, + "learning_rate": 8.055683791643839e-07, + "loss": 0.0348, + "num_input_tokens_seen": 83603152, + "step": 124040 + }, + { + "epoch": 3.0304399872963135, + "grad_norm": 0.1448715180158615, + "learning_rate": 8.054847297033633e-07, + "loss": 0.0882, + "num_input_tokens_seen": 83606288, + "step": 124045 + }, + { + "epoch": 3.0305621381281607, + "grad_norm": 0.009605436585843563, + "learning_rate": 8.054010816569094e-07, + "loss": 0.0, + "num_input_tokens_seen": 83609872, + "step": 124050 + }, + { + "epoch": 3.030684288960008, + "grad_norm": 0.18111363053321838, + "learning_rate": 8.053174350256313e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83613520, + "step": 124055 + }, + { + "epoch": 3.030806439791855, + "grad_norm": 0.0024255227763205767, + "learning_rate": 8.052337898101376e-07, + "loss": 0.0, + "num_input_tokens_seen": 83616720, + "step": 124060 + }, + { + "epoch": 3.0309285906237022, + "grad_norm": 0.09568436443805695, + "learning_rate": 8.051501460110357e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83619984, + "step": 124065 + }, + { + "epoch": 3.0310507414555494, + "grad_norm": 0.0018284890102222562, + "learning_rate": 8.050665036289347e-07, + "loss": 0.0, + "num_input_tokens_seen": 83622992, + "step": 124070 + }, + { + "epoch": 3.0311728922873966, + "grad_norm": 0.016492174938321114, + "learning_rate": 8.049828626644422e-07, + "loss": 0.0, + "num_input_tokens_seen": 83626128, + "step": 124075 + }, + { + "epoch": 3.031295043119244, + "grad_norm": 0.0006634180899709463, + "learning_rate": 8.048992231181671e-07, + "loss": 0.0098, + "num_input_tokens_seen": 83630352, + "step": 124080 + }, + { + "epoch": 3.031417193951091, + "grad_norm": 0.042172130197286606, + "learning_rate": 8.048155849907168e-07, + "loss": 0.0, + "num_input_tokens_seen": 83633616, + "step": 124085 + }, + { + "epoch": 3.031539344782938, + "grad_norm": 0.0017520836554467678, + "learning_rate": 8.047319482827003e-07, + "loss": 0.0571, + "num_input_tokens_seen": 83636688, + "step": 124090 + }, + { + "epoch": 3.031661495614785, + "grad_norm": 0.010810492560267448, + "learning_rate": 8.046483129947259e-07, + "loss": 0.0, + "num_input_tokens_seen": 83640336, + "step": 124095 + }, + { + "epoch": 3.031783646446632, + "grad_norm": 0.002398022450506687, + "learning_rate": 8.045646791274011e-07, + "loss": 0.0, + "num_input_tokens_seen": 83643600, + "step": 124100 + }, + { + "epoch": 3.0319057972784793, + "grad_norm": 0.03453930467367172, + "learning_rate": 8.04481046681335e-07, + "loss": 0.0, + "num_input_tokens_seen": 83646992, + "step": 124105 + }, + { + "epoch": 3.0320279481103265, + "grad_norm": 0.0012411042116582394, + "learning_rate": 8.043974156571351e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83650256, + "step": 124110 + }, + { + "epoch": 3.0321500989421737, + "grad_norm": 0.0029082736000418663, + "learning_rate": 8.043137860554094e-07, + "loss": 0.0325, + "num_input_tokens_seen": 83653584, + "step": 124115 + }, + { + "epoch": 3.032272249774021, + "grad_norm": 50.47825622558594, + "learning_rate": 8.042301578767671e-07, + "loss": 0.0514, + "num_input_tokens_seen": 83656592, + "step": 124120 + }, + { + "epoch": 3.032394400605868, + "grad_norm": 0.06554097682237625, + "learning_rate": 8.041465311218153e-07, + "loss": 0.0, + "num_input_tokens_seen": 83659920, + "step": 124125 + }, + { + "epoch": 3.0325165514377153, + "grad_norm": 0.03542274609208107, + "learning_rate": 8.040629057911629e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83663376, + "step": 124130 + }, + { + "epoch": 3.0326387022695624, + "grad_norm": 0.08334421366453171, + "learning_rate": 8.039792818854175e-07, + "loss": 0.0002, + "num_input_tokens_seen": 83666576, + "step": 124135 + }, + { + "epoch": 3.0327608531014096, + "grad_norm": 0.002318961312994361, + "learning_rate": 8.038956594051878e-07, + "loss": 0.0, + "num_input_tokens_seen": 83670352, + "step": 124140 + }, + { + "epoch": 3.032883003933257, + "grad_norm": 0.0056270817294716835, + "learning_rate": 8.038120383510813e-07, + "loss": 0.0501, + "num_input_tokens_seen": 83673744, + "step": 124145 + }, + { + "epoch": 3.033005154765104, + "grad_norm": 0.04084935039281845, + "learning_rate": 8.037284187237065e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83676816, + "step": 124150 + }, + { + "epoch": 3.033127305596951, + "grad_norm": 0.028429778292775154, + "learning_rate": 8.036448005236715e-07, + "loss": 0.0, + "num_input_tokens_seen": 83680464, + "step": 124155 + }, + { + "epoch": 3.0332494564287984, + "grad_norm": 0.009558050893247128, + "learning_rate": 8.035611837515843e-07, + "loss": 0.0, + "num_input_tokens_seen": 83683856, + "step": 124160 + }, + { + "epoch": 3.0333716072606456, + "grad_norm": 0.009309230372309685, + "learning_rate": 8.034775684080532e-07, + "loss": 0.085, + "num_input_tokens_seen": 83686864, + "step": 124165 + }, + { + "epoch": 3.0334937580924928, + "grad_norm": 0.08569590002298355, + "learning_rate": 8.033939544936857e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83690384, + "step": 124170 + }, + { + "epoch": 3.03361590892434, + "grad_norm": 0.47182491421699524, + "learning_rate": 8.033103420090906e-07, + "loss": 0.0002, + "num_input_tokens_seen": 83693968, + "step": 124175 + }, + { + "epoch": 3.033738059756187, + "grad_norm": 0.005380032118409872, + "learning_rate": 8.032267309548752e-07, + "loss": 0.0, + "num_input_tokens_seen": 83697104, + "step": 124180 + }, + { + "epoch": 3.033860210588034, + "grad_norm": 0.0026039707008749247, + "learning_rate": 8.03143121331648e-07, + "loss": 0.0, + "num_input_tokens_seen": 83700688, + "step": 124185 + }, + { + "epoch": 3.033982361419881, + "grad_norm": 0.14887675642967224, + "learning_rate": 8.030595131400174e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83704016, + "step": 124190 + }, + { + "epoch": 3.0341045122517283, + "grad_norm": 0.0026839664205908775, + "learning_rate": 8.029759063805906e-07, + "loss": 0.0, + "num_input_tokens_seen": 83708240, + "step": 124195 + }, + { + "epoch": 3.0342266630835755, + "grad_norm": 0.01625916361808777, + "learning_rate": 8.028923010539763e-07, + "loss": 0.0, + "num_input_tokens_seen": 83711568, + "step": 124200 + }, + { + "epoch": 3.0343488139154227, + "grad_norm": 0.01792566291987896, + "learning_rate": 8.028086971607818e-07, + "loss": 0.0439, + "num_input_tokens_seen": 83714896, + "step": 124205 + }, + { + "epoch": 3.03447096474727, + "grad_norm": 0.001258393982425332, + "learning_rate": 8.027250947016157e-07, + "loss": 0.0, + "num_input_tokens_seen": 83718160, + "step": 124210 + }, + { + "epoch": 3.034593115579117, + "grad_norm": 0.5693371891975403, + "learning_rate": 8.026414936770861e-07, + "loss": 0.0002, + "num_input_tokens_seen": 83721488, + "step": 124215 + }, + { + "epoch": 3.0347152664109642, + "grad_norm": 0.017172690480947495, + "learning_rate": 8.025578940878001e-07, + "loss": 0.0751, + "num_input_tokens_seen": 83724432, + "step": 124220 + }, + { + "epoch": 3.0348374172428114, + "grad_norm": 0.009208094328641891, + "learning_rate": 8.024742959343667e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83728144, + "step": 124225 + }, + { + "epoch": 3.0349595680746586, + "grad_norm": 0.008969802409410477, + "learning_rate": 8.023906992173929e-07, + "loss": 0.041, + "num_input_tokens_seen": 83731280, + "step": 124230 + }, + { + "epoch": 3.035081718906506, + "grad_norm": 0.015506453812122345, + "learning_rate": 8.023071039374875e-07, + "loss": 0.0002, + "num_input_tokens_seen": 83734608, + "step": 124235 + }, + { + "epoch": 3.035203869738353, + "grad_norm": 0.023344608023762703, + "learning_rate": 8.022235100952576e-07, + "loss": 0.0, + "num_input_tokens_seen": 83737936, + "step": 124240 + }, + { + "epoch": 3.0353260205702, + "grad_norm": 0.0039008702151477337, + "learning_rate": 8.021399176913115e-07, + "loss": 0.0002, + "num_input_tokens_seen": 83741456, + "step": 124245 + }, + { + "epoch": 3.0354481714020474, + "grad_norm": 0.03828645497560501, + "learning_rate": 8.020563267262576e-07, + "loss": 0.0, + "num_input_tokens_seen": 83744720, + "step": 124250 + }, + { + "epoch": 3.0355703222338946, + "grad_norm": 0.0011847520945593715, + "learning_rate": 8.019727372007028e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83748368, + "step": 124255 + }, + { + "epoch": 3.0356924730657417, + "grad_norm": 0.00173528294544667, + "learning_rate": 8.01889149115256e-07, + "loss": 0.006, + "num_input_tokens_seen": 83751376, + "step": 124260 + }, + { + "epoch": 3.035814623897589, + "grad_norm": 0.0002933296491391957, + "learning_rate": 8.018055624705244e-07, + "loss": 0.0, + "num_input_tokens_seen": 83755152, + "step": 124265 + }, + { + "epoch": 3.0359367747294357, + "grad_norm": 0.25443074107170105, + "learning_rate": 8.017219772671158e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83758352, + "step": 124270 + }, + { + "epoch": 3.036058925561283, + "grad_norm": 0.0008950105402618647, + "learning_rate": 8.016383935056389e-07, + "loss": 0.0, + "num_input_tokens_seen": 83761744, + "step": 124275 + }, + { + "epoch": 3.03618107639313, + "grad_norm": 0.006972425617277622, + "learning_rate": 8.015548111867003e-07, + "loss": 0.0, + "num_input_tokens_seen": 83764816, + "step": 124280 + }, + { + "epoch": 3.0363032272249773, + "grad_norm": 0.0014357100008055568, + "learning_rate": 8.014712303109092e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83768528, + "step": 124285 + }, + { + "epoch": 3.0364253780568244, + "grad_norm": 110.88409423828125, + "learning_rate": 8.01387650878872e-07, + "loss": 0.0751, + "num_input_tokens_seen": 83771728, + "step": 124290 + }, + { + "epoch": 3.0365475288886716, + "grad_norm": 0.001392767415381968, + "learning_rate": 8.013040728911977e-07, + "loss": 0.0673, + "num_input_tokens_seen": 83774928, + "step": 124295 + }, + { + "epoch": 3.036669679720519, + "grad_norm": 0.0041596959345042706, + "learning_rate": 8.012204963484934e-07, + "loss": 0.0, + "num_input_tokens_seen": 83778512, + "step": 124300 + }, + { + "epoch": 3.036791830552366, + "grad_norm": 0.0020632941741496325, + "learning_rate": 8.011369212513671e-07, + "loss": 0.0, + "num_input_tokens_seen": 83781520, + "step": 124305 + }, + { + "epoch": 3.036913981384213, + "grad_norm": 0.006585871335119009, + "learning_rate": 8.010533476004267e-07, + "loss": 0.0, + "num_input_tokens_seen": 83784784, + "step": 124310 + }, + { + "epoch": 3.0370361322160604, + "grad_norm": 0.0029705450870096684, + "learning_rate": 8.009697753962799e-07, + "loss": 0.0, + "num_input_tokens_seen": 83788112, + "step": 124315 + }, + { + "epoch": 3.0371582830479076, + "grad_norm": 0.006383042316883802, + "learning_rate": 8.008862046395346e-07, + "loss": 0.0, + "num_input_tokens_seen": 83791376, + "step": 124320 + }, + { + "epoch": 3.0372804338797548, + "grad_norm": 0.15197601914405823, + "learning_rate": 8.00802635330798e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83795536, + "step": 124325 + }, + { + "epoch": 3.037402584711602, + "grad_norm": 0.023821311071515083, + "learning_rate": 8.007190674706786e-07, + "loss": 0.0, + "num_input_tokens_seen": 83798480, + "step": 124330 + }, + { + "epoch": 3.037524735543449, + "grad_norm": 0.0059493849985301495, + "learning_rate": 8.006355010597832e-07, + "loss": 0.0, + "num_input_tokens_seen": 83801680, + "step": 124335 + }, + { + "epoch": 3.0376468863752963, + "grad_norm": 0.002639399142935872, + "learning_rate": 8.005519360987201e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83805008, + "step": 124340 + }, + { + "epoch": 3.0377690372071435, + "grad_norm": 0.00016760479775257409, + "learning_rate": 8.004683725880976e-07, + "loss": 0.0, + "num_input_tokens_seen": 83808464, + "step": 124345 + }, + { + "epoch": 3.0378911880389907, + "grad_norm": 0.046830035746097565, + "learning_rate": 8.00384810528522e-07, + "loss": 0.0, + "num_input_tokens_seen": 83811728, + "step": 124350 + }, + { + "epoch": 3.038013338870838, + "grad_norm": 0.0029815714806318283, + "learning_rate": 8.003012499206025e-07, + "loss": 0.0, + "num_input_tokens_seen": 83814992, + "step": 124355 + }, + { + "epoch": 3.038135489702685, + "grad_norm": 0.0005295452428981662, + "learning_rate": 8.002176907649454e-07, + "loss": 0.0, + "num_input_tokens_seen": 83818256, + "step": 124360 + }, + { + "epoch": 3.038257640534532, + "grad_norm": 0.0025638635270297527, + "learning_rate": 8.001341330621593e-07, + "loss": 0.0, + "num_input_tokens_seen": 83821520, + "step": 124365 + }, + { + "epoch": 3.038379791366379, + "grad_norm": 0.009190984070301056, + "learning_rate": 8.000505768128517e-07, + "loss": 0.0, + "num_input_tokens_seen": 83824912, + "step": 124370 + }, + { + "epoch": 3.0385019421982262, + "grad_norm": 0.030233608558773994, + "learning_rate": 7.999670220176297e-07, + "loss": 0.0, + "num_input_tokens_seen": 83828304, + "step": 124375 + }, + { + "epoch": 3.0386240930300734, + "grad_norm": 0.002085483865812421, + "learning_rate": 7.998834686771016e-07, + "loss": 0.0, + "num_input_tokens_seen": 83832528, + "step": 124380 + }, + { + "epoch": 3.0387462438619206, + "grad_norm": 27.48931312561035, + "learning_rate": 7.997999167918745e-07, + "loss": 0.0572, + "num_input_tokens_seen": 83835856, + "step": 124385 + }, + { + "epoch": 3.038868394693768, + "grad_norm": 0.0012941723689436913, + "learning_rate": 7.997163663625566e-07, + "loss": 0.0, + "num_input_tokens_seen": 83839184, + "step": 124390 + }, + { + "epoch": 3.038990545525615, + "grad_norm": 0.04093189164996147, + "learning_rate": 7.996328173897548e-07, + "loss": 0.0, + "num_input_tokens_seen": 83842448, + "step": 124395 + }, + { + "epoch": 3.039112696357462, + "grad_norm": 0.0002823161776177585, + "learning_rate": 7.995492698740769e-07, + "loss": 0.0, + "num_input_tokens_seen": 83846288, + "step": 124400 + }, + { + "epoch": 3.0392348471893094, + "grad_norm": 0.004458566661924124, + "learning_rate": 7.994657238161311e-07, + "loss": 0.0, + "num_input_tokens_seen": 83849872, + "step": 124405 + }, + { + "epoch": 3.0393569980211566, + "grad_norm": 0.0033806823194026947, + "learning_rate": 7.99382179216524e-07, + "loss": 0.0, + "num_input_tokens_seen": 83853136, + "step": 124410 + }, + { + "epoch": 3.0394791488530037, + "grad_norm": 0.01399586908519268, + "learning_rate": 7.992986360758643e-07, + "loss": 0.0302, + "num_input_tokens_seen": 83856464, + "step": 124415 + }, + { + "epoch": 3.039601299684851, + "grad_norm": 0.008270226418972015, + "learning_rate": 7.992150943947586e-07, + "loss": 0.0, + "num_input_tokens_seen": 83859664, + "step": 124420 + }, + { + "epoch": 3.039723450516698, + "grad_norm": 0.00548640126362443, + "learning_rate": 7.991315541738147e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83862928, + "step": 124425 + }, + { + "epoch": 3.0398456013485453, + "grad_norm": 0.00551306689158082, + "learning_rate": 7.990480154136401e-07, + "loss": 0.0, + "num_input_tokens_seen": 83865808, + "step": 124430 + }, + { + "epoch": 3.0399677521803925, + "grad_norm": 0.004245460033416748, + "learning_rate": 7.989644781148422e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83869072, + "step": 124435 + }, + { + "epoch": 3.0400899030122397, + "grad_norm": 0.008234397508203983, + "learning_rate": 7.988809422780292e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83872592, + "step": 124440 + }, + { + "epoch": 3.040212053844087, + "grad_norm": 0.008850080892443657, + "learning_rate": 7.987974079038076e-07, + "loss": 0.0, + "num_input_tokens_seen": 83875728, + "step": 124445 + }, + { + "epoch": 3.0403342046759336, + "grad_norm": 170.21939086914062, + "learning_rate": 7.987138749927858e-07, + "loss": 0.1003, + "num_input_tokens_seen": 83878992, + "step": 124450 + }, + { + "epoch": 3.040456355507781, + "grad_norm": 0.014337592758238316, + "learning_rate": 7.986303435455705e-07, + "loss": 0.0, + "num_input_tokens_seen": 83881936, + "step": 124455 + }, + { + "epoch": 3.040578506339628, + "grad_norm": 0.005087694618850946, + "learning_rate": 7.985468135627696e-07, + "loss": 0.0005, + "num_input_tokens_seen": 83885648, + "step": 124460 + }, + { + "epoch": 3.040700657171475, + "grad_norm": 0.0010110632283613086, + "learning_rate": 7.984632850449903e-07, + "loss": 0.0, + "num_input_tokens_seen": 83889232, + "step": 124465 + }, + { + "epoch": 3.0408228080033224, + "grad_norm": 0.009701843373477459, + "learning_rate": 7.983797579928406e-07, + "loss": 0.0, + "num_input_tokens_seen": 83892944, + "step": 124470 + }, + { + "epoch": 3.0409449588351696, + "grad_norm": 0.08975927531719208, + "learning_rate": 7.982962324069275e-07, + "loss": 0.0, + "num_input_tokens_seen": 83895888, + "step": 124475 + }, + { + "epoch": 3.0410671096670168, + "grad_norm": 0.010530831292271614, + "learning_rate": 7.98212708287858e-07, + "loss": 0.0, + "num_input_tokens_seen": 83899856, + "step": 124480 + }, + { + "epoch": 3.041189260498864, + "grad_norm": 0.0028245660942047834, + "learning_rate": 7.981291856362405e-07, + "loss": 0.0, + "num_input_tokens_seen": 83903120, + "step": 124485 + }, + { + "epoch": 3.041311411330711, + "grad_norm": 0.000383858714485541, + "learning_rate": 7.980456644526813e-07, + "loss": 0.0, + "num_input_tokens_seen": 83906384, + "step": 124490 + }, + { + "epoch": 3.0414335621625583, + "grad_norm": 0.001335768261924386, + "learning_rate": 7.979621447377885e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83909648, + "step": 124495 + }, + { + "epoch": 3.0415557129944055, + "grad_norm": 0.0008865381241776049, + "learning_rate": 7.978786264921695e-07, + "loss": 0.0372, + "num_input_tokens_seen": 83913040, + "step": 124500 + }, + { + "epoch": 3.0416778638262527, + "grad_norm": 9.840297570917755e-05, + "learning_rate": 7.977951097164312e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83916944, + "step": 124505 + }, + { + "epoch": 3.0418000146581, + "grad_norm": 0.012626949697732925, + "learning_rate": 7.977115944111819e-07, + "loss": 0.0002, + "num_input_tokens_seen": 83920016, + "step": 124510 + }, + { + "epoch": 3.041922165489947, + "grad_norm": 0.0019248174503445625, + "learning_rate": 7.976280805770275e-07, + "loss": 0.0, + "num_input_tokens_seen": 83923600, + "step": 124515 + }, + { + "epoch": 3.0420443163217943, + "grad_norm": 0.0012115843128412962, + "learning_rate": 7.975445682145766e-07, + "loss": 0.0, + "num_input_tokens_seen": 83926992, + "step": 124520 + }, + { + "epoch": 3.0421664671536415, + "grad_norm": 0.00032460439251735806, + "learning_rate": 7.974610573244362e-07, + "loss": 0.0, + "num_input_tokens_seen": 83930576, + "step": 124525 + }, + { + "epoch": 3.0422886179854887, + "grad_norm": 0.009787623770534992, + "learning_rate": 7.97377547907213e-07, + "loss": 0.0, + "num_input_tokens_seen": 83933648, + "step": 124530 + }, + { + "epoch": 3.042410768817336, + "grad_norm": 0.0008228658698499203, + "learning_rate": 7.972940399635153e-07, + "loss": 0.0, + "num_input_tokens_seen": 83937040, + "step": 124535 + }, + { + "epoch": 3.0425329196491826, + "grad_norm": 0.003583153011277318, + "learning_rate": 7.972105334939493e-07, + "loss": 0.0465, + "num_input_tokens_seen": 83940240, + "step": 124540 + }, + { + "epoch": 3.04265507048103, + "grad_norm": 0.0009180400520563126, + "learning_rate": 7.971270284991234e-07, + "loss": 0.0, + "num_input_tokens_seen": 83943376, + "step": 124545 + }, + { + "epoch": 3.042777221312877, + "grad_norm": 0.04430723562836647, + "learning_rate": 7.970435249796438e-07, + "loss": 0.0, + "num_input_tokens_seen": 83946512, + "step": 124550 + }, + { + "epoch": 3.042899372144724, + "grad_norm": 0.00021339183149393648, + "learning_rate": 7.969600229361181e-07, + "loss": 0.0, + "num_input_tokens_seen": 83950160, + "step": 124555 + }, + { + "epoch": 3.0430215229765714, + "grad_norm": 0.001275393646210432, + "learning_rate": 7.968765223691544e-07, + "loss": 0.0, + "num_input_tokens_seen": 83953168, + "step": 124560 + }, + { + "epoch": 3.0431436738084185, + "grad_norm": 0.20841944217681885, + "learning_rate": 7.967930232793589e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83956176, + "step": 124565 + }, + { + "epoch": 3.0432658246402657, + "grad_norm": 0.02498796209692955, + "learning_rate": 7.967095256673395e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83959632, + "step": 124570 + }, + { + "epoch": 3.043387975472113, + "grad_norm": 0.01822241209447384, + "learning_rate": 7.966260295337029e-07, + "loss": 0.0, + "num_input_tokens_seen": 83963280, + "step": 124575 + }, + { + "epoch": 3.04351012630396, + "grad_norm": 0.010968970134854317, + "learning_rate": 7.965425348790564e-07, + "loss": 0.0, + "num_input_tokens_seen": 83966352, + "step": 124580 + }, + { + "epoch": 3.0436322771358073, + "grad_norm": 50.25225830078125, + "learning_rate": 7.964590417040075e-07, + "loss": 0.0751, + "num_input_tokens_seen": 83969744, + "step": 124585 + }, + { + "epoch": 3.0437544279676545, + "grad_norm": 0.0015823058784008026, + "learning_rate": 7.963755500091629e-07, + "loss": 0.0, + "num_input_tokens_seen": 83972944, + "step": 124590 + }, + { + "epoch": 3.0438765787995017, + "grad_norm": 0.0021922453306615353, + "learning_rate": 7.962920597951305e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83976720, + "step": 124595 + }, + { + "epoch": 3.043998729631349, + "grad_norm": 0.000679630262311548, + "learning_rate": 7.962085710625166e-07, + "loss": 0.0, + "num_input_tokens_seen": 83979984, + "step": 124600 + }, + { + "epoch": 3.044120880463196, + "grad_norm": 0.0018834839574992657, + "learning_rate": 7.961250838119292e-07, + "loss": 0.0, + "num_input_tokens_seen": 83983184, + "step": 124605 + }, + { + "epoch": 3.0442430312950433, + "grad_norm": 0.010868406854569912, + "learning_rate": 7.960415980439747e-07, + "loss": 0.0727, + "num_input_tokens_seen": 83986576, + "step": 124610 + }, + { + "epoch": 3.0443651821268904, + "grad_norm": 0.029458479955792427, + "learning_rate": 7.959581137592606e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83989776, + "step": 124615 + }, + { + "epoch": 3.0444873329587376, + "grad_norm": 0.002476650755852461, + "learning_rate": 7.95874630958394e-07, + "loss": 0.0001, + "num_input_tokens_seen": 83993360, + "step": 124620 + }, + { + "epoch": 3.044609483790585, + "grad_norm": 0.02898203395307064, + "learning_rate": 7.957911496419821e-07, + "loss": 0.0, + "num_input_tokens_seen": 83996752, + "step": 124625 + }, + { + "epoch": 3.0447316346224316, + "grad_norm": 3.074690539506264e-05, + "learning_rate": 7.95707669810632e-07, + "loss": 0.0, + "num_input_tokens_seen": 84000272, + "step": 124630 + }, + { + "epoch": 3.0448537854542788, + "grad_norm": 0.00030377108487300575, + "learning_rate": 7.956241914649503e-07, + "loss": 0.0, + "num_input_tokens_seen": 84003536, + "step": 124635 + }, + { + "epoch": 3.044975936286126, + "grad_norm": 0.005053306929767132, + "learning_rate": 7.955407146055448e-07, + "loss": 0.0, + "num_input_tokens_seen": 84007056, + "step": 124640 + }, + { + "epoch": 3.045098087117973, + "grad_norm": 0.0005974622326903045, + "learning_rate": 7.954572392330219e-07, + "loss": 0.0, + "num_input_tokens_seen": 84010064, + "step": 124645 + }, + { + "epoch": 3.0452202379498203, + "grad_norm": 30.14084243774414, + "learning_rate": 7.953737653479889e-07, + "loss": 0.0814, + "num_input_tokens_seen": 84013584, + "step": 124650 + }, + { + "epoch": 3.0453423887816675, + "grad_norm": 0.0002505712036509067, + "learning_rate": 7.952902929510534e-07, + "loss": 0.0625, + "num_input_tokens_seen": 84017168, + "step": 124655 + }, + { + "epoch": 3.0454645396135147, + "grad_norm": 0.0006067760987207294, + "learning_rate": 7.952068220428215e-07, + "loss": 0.0, + "num_input_tokens_seen": 84020944, + "step": 124660 + }, + { + "epoch": 3.045586690445362, + "grad_norm": 0.017085615545511246, + "learning_rate": 7.951233526239012e-07, + "loss": 0.0225, + "num_input_tokens_seen": 84024336, + "step": 124665 + }, + { + "epoch": 3.045708841277209, + "grad_norm": 0.003791298484429717, + "learning_rate": 7.950398846948984e-07, + "loss": 0.0, + "num_input_tokens_seen": 84027728, + "step": 124670 + }, + { + "epoch": 3.0458309921090563, + "grad_norm": 0.012227640487253666, + "learning_rate": 7.949564182564209e-07, + "loss": 0.0, + "num_input_tokens_seen": 84030928, + "step": 124675 + }, + { + "epoch": 3.0459531429409035, + "grad_norm": 0.001645068172365427, + "learning_rate": 7.948729533090758e-07, + "loss": 0.0, + "num_input_tokens_seen": 84034448, + "step": 124680 + }, + { + "epoch": 3.0460752937727507, + "grad_norm": 0.0035828205291181803, + "learning_rate": 7.947894898534693e-07, + "loss": 0.0378, + "num_input_tokens_seen": 84037520, + "step": 124685 + }, + { + "epoch": 3.046197444604598, + "grad_norm": 0.0012684467947110534, + "learning_rate": 7.947060278902091e-07, + "loss": 0.0, + "num_input_tokens_seen": 84041488, + "step": 124690 + }, + { + "epoch": 3.046319595436445, + "grad_norm": 0.00011037226795451716, + "learning_rate": 7.946225674199017e-07, + "loss": 0.0411, + "num_input_tokens_seen": 84044624, + "step": 124695 + }, + { + "epoch": 3.0464417462682922, + "grad_norm": 0.0008721097256056964, + "learning_rate": 7.945391084431546e-07, + "loss": 0.0, + "num_input_tokens_seen": 84047888, + "step": 124700 + }, + { + "epoch": 3.0465638971001394, + "grad_norm": 0.012080611661076546, + "learning_rate": 7.944556509605737e-07, + "loss": 0.0, + "num_input_tokens_seen": 84051344, + "step": 124705 + }, + { + "epoch": 3.0466860479319866, + "grad_norm": 0.0017130423802882433, + "learning_rate": 7.943721949727668e-07, + "loss": 0.0002, + "num_input_tokens_seen": 84054416, + "step": 124710 + }, + { + "epoch": 3.046808198763834, + "grad_norm": 0.0068734074011445045, + "learning_rate": 7.94288740480341e-07, + "loss": 0.0, + "num_input_tokens_seen": 84057872, + "step": 124715 + }, + { + "epoch": 3.0469303495956805, + "grad_norm": 0.02271157130599022, + "learning_rate": 7.942052874839024e-07, + "loss": 0.0, + "num_input_tokens_seen": 84060880, + "step": 124720 + }, + { + "epoch": 3.0470525004275277, + "grad_norm": 0.009061945602297783, + "learning_rate": 7.941218359840587e-07, + "loss": 0.0, + "num_input_tokens_seen": 84063952, + "step": 124725 + }, + { + "epoch": 3.047174651259375, + "grad_norm": 22.329303741455078, + "learning_rate": 7.940383859814162e-07, + "loss": 0.0725, + "num_input_tokens_seen": 84067472, + "step": 124730 + }, + { + "epoch": 3.047296802091222, + "grad_norm": 0.033782485872507095, + "learning_rate": 7.93954937476582e-07, + "loss": 0.0, + "num_input_tokens_seen": 84071312, + "step": 124735 + }, + { + "epoch": 3.0474189529230693, + "grad_norm": 0.001782033359631896, + "learning_rate": 7.938714904701627e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84074960, + "step": 124740 + }, + { + "epoch": 3.0475411037549165, + "grad_norm": 0.001810292131267488, + "learning_rate": 7.937880449627655e-07, + "loss": 0.0365, + "num_input_tokens_seen": 84078224, + "step": 124745 + }, + { + "epoch": 3.0476632545867637, + "grad_norm": 0.0007144034607335925, + "learning_rate": 7.937046009549971e-07, + "loss": 0.012, + "num_input_tokens_seen": 84081680, + "step": 124750 + }, + { + "epoch": 3.047785405418611, + "grad_norm": 0.020684758201241493, + "learning_rate": 7.936211584474641e-07, + "loss": 0.0002, + "num_input_tokens_seen": 84085008, + "step": 124755 + }, + { + "epoch": 3.047907556250458, + "grad_norm": 0.0005914786597713828, + "learning_rate": 7.935377174407742e-07, + "loss": 0.1145, + "num_input_tokens_seen": 84088080, + "step": 124760 + }, + { + "epoch": 3.0480297070823053, + "grad_norm": 0.0006892129895277321, + "learning_rate": 7.934542779355329e-07, + "loss": 0.0, + "num_input_tokens_seen": 84091472, + "step": 124765 + }, + { + "epoch": 3.0481518579141524, + "grad_norm": 194.6983184814453, + "learning_rate": 7.933708399323478e-07, + "loss": 0.035, + "num_input_tokens_seen": 84094736, + "step": 124770 + }, + { + "epoch": 3.0482740087459996, + "grad_norm": 0.06471144407987595, + "learning_rate": 7.932874034318256e-07, + "loss": 0.0, + "num_input_tokens_seen": 84098000, + "step": 124775 + }, + { + "epoch": 3.048396159577847, + "grad_norm": 0.000554750207811594, + "learning_rate": 7.932039684345731e-07, + "loss": 0.0738, + "num_input_tokens_seen": 84101008, + "step": 124780 + }, + { + "epoch": 3.048518310409694, + "grad_norm": 0.002123037585988641, + "learning_rate": 7.93120534941197e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84104080, + "step": 124785 + }, + { + "epoch": 3.048640461241541, + "grad_norm": 0.002121489029377699, + "learning_rate": 7.930371029523037e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84107792, + "step": 124790 + }, + { + "epoch": 3.0487626120733884, + "grad_norm": 0.0026022300589829683, + "learning_rate": 7.929536724685006e-07, + "loss": 0.0004, + "num_input_tokens_seen": 84111504, + "step": 124795 + }, + { + "epoch": 3.0488847629052356, + "grad_norm": 0.006417084950953722, + "learning_rate": 7.928702434903938e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84114832, + "step": 124800 + }, + { + "epoch": 3.0490069137370828, + "grad_norm": 0.001482818159274757, + "learning_rate": 7.927868160185901e-07, + "loss": 0.0002, + "num_input_tokens_seen": 84118224, + "step": 124805 + }, + { + "epoch": 3.0491290645689295, + "grad_norm": 0.02109672501683235, + "learning_rate": 7.92703390053697e-07, + "loss": 0.0, + "num_input_tokens_seen": 84121360, + "step": 124810 + }, + { + "epoch": 3.0492512154007767, + "grad_norm": 0.017897358164191246, + "learning_rate": 7.926199655963201e-07, + "loss": 0.0004, + "num_input_tokens_seen": 84124432, + "step": 124815 + }, + { + "epoch": 3.049373366232624, + "grad_norm": 0.01913967914879322, + "learning_rate": 7.92536542647067e-07, + "loss": 0.0, + "num_input_tokens_seen": 84127376, + "step": 124820 + }, + { + "epoch": 3.049495517064471, + "grad_norm": 0.12012538313865662, + "learning_rate": 7.924531212065436e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84130384, + "step": 124825 + }, + { + "epoch": 3.0496176678963183, + "grad_norm": 0.01247513946145773, + "learning_rate": 7.923697012753572e-07, + "loss": 0.0, + "num_input_tokens_seen": 84133328, + "step": 124830 + }, + { + "epoch": 3.0497398187281655, + "grad_norm": 0.000963643251452595, + "learning_rate": 7.922862828541143e-07, + "loss": 0.0, + "num_input_tokens_seen": 84136592, + "step": 124835 + }, + { + "epoch": 3.0498619695600127, + "grad_norm": 0.01684432663023472, + "learning_rate": 7.922028659434209e-07, + "loss": 0.0, + "num_input_tokens_seen": 84140112, + "step": 124840 + }, + { + "epoch": 3.04998412039186, + "grad_norm": 0.007283932529389858, + "learning_rate": 7.921194505438848e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84143760, + "step": 124845 + }, + { + "epoch": 3.050106271223707, + "grad_norm": 0.004185799043625593, + "learning_rate": 7.920360366561113e-07, + "loss": 0.0, + "num_input_tokens_seen": 84147152, + "step": 124850 + }, + { + "epoch": 3.0502284220555542, + "grad_norm": 0.002467324025928974, + "learning_rate": 7.919526242807082e-07, + "loss": 0.0, + "num_input_tokens_seen": 84150288, + "step": 124855 + }, + { + "epoch": 3.0503505728874014, + "grad_norm": 0.007444701623171568, + "learning_rate": 7.918692134182812e-07, + "loss": 0.0, + "num_input_tokens_seen": 84153424, + "step": 124860 + }, + { + "epoch": 3.0504727237192486, + "grad_norm": 0.006697532255202532, + "learning_rate": 7.917858040694372e-07, + "loss": 0.0, + "num_input_tokens_seen": 84156880, + "step": 124865 + }, + { + "epoch": 3.050594874551096, + "grad_norm": 0.003599474672228098, + "learning_rate": 7.917023962347833e-07, + "loss": 0.0, + "num_input_tokens_seen": 84160336, + "step": 124870 + }, + { + "epoch": 3.050717025382943, + "grad_norm": 0.0016461735358461738, + "learning_rate": 7.916189899149251e-07, + "loss": 0.0, + "num_input_tokens_seen": 84164048, + "step": 124875 + }, + { + "epoch": 3.05083917621479, + "grad_norm": 0.0001344871852779761, + "learning_rate": 7.915355851104701e-07, + "loss": 0.0, + "num_input_tokens_seen": 84167376, + "step": 124880 + }, + { + "epoch": 3.0509613270466374, + "grad_norm": 0.192990243434906, + "learning_rate": 7.914521818220243e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84170640, + "step": 124885 + }, + { + "epoch": 3.0510834778784846, + "grad_norm": 0.006739953067153692, + "learning_rate": 7.913687800501942e-07, + "loss": 0.0679, + "num_input_tokens_seen": 84174352, + "step": 124890 + }, + { + "epoch": 3.0512056287103313, + "grad_norm": 0.0011157767148688436, + "learning_rate": 7.912853797955866e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84177616, + "step": 124895 + }, + { + "epoch": 3.0513277795421785, + "grad_norm": 0.0006541142356581986, + "learning_rate": 7.912019810588075e-07, + "loss": 0.0, + "num_input_tokens_seen": 84180752, + "step": 124900 + }, + { + "epoch": 3.0514499303740257, + "grad_norm": 0.013899991288781166, + "learning_rate": 7.911185838404642e-07, + "loss": 0.0, + "num_input_tokens_seen": 84183824, + "step": 124905 + }, + { + "epoch": 3.051572081205873, + "grad_norm": 0.11817273497581482, + "learning_rate": 7.910351881411624e-07, + "loss": 0.0, + "num_input_tokens_seen": 84187280, + "step": 124910 + }, + { + "epoch": 3.05169423203772, + "grad_norm": 0.020235439762473106, + "learning_rate": 7.909517939615092e-07, + "loss": 0.0002, + "num_input_tokens_seen": 84190480, + "step": 124915 + }, + { + "epoch": 3.0518163828695672, + "grad_norm": 0.003976785112172365, + "learning_rate": 7.908684013021106e-07, + "loss": 0.0, + "num_input_tokens_seen": 84194448, + "step": 124920 + }, + { + "epoch": 3.0519385337014144, + "grad_norm": 0.014983579516410828, + "learning_rate": 7.907850101635731e-07, + "loss": 0.0, + "num_input_tokens_seen": 84198224, + "step": 124925 + }, + { + "epoch": 3.0520606845332616, + "grad_norm": 0.00013353618851397187, + "learning_rate": 7.907016205465035e-07, + "loss": 0.0, + "num_input_tokens_seen": 84201488, + "step": 124930 + }, + { + "epoch": 3.052182835365109, + "grad_norm": 0.005031253211200237, + "learning_rate": 7.906182324515079e-07, + "loss": 0.0648, + "num_input_tokens_seen": 84205072, + "step": 124935 + }, + { + "epoch": 3.052304986196956, + "grad_norm": 8.619089203421026e-05, + "learning_rate": 7.905348458791932e-07, + "loss": 0.0, + "num_input_tokens_seen": 84209040, + "step": 124940 + }, + { + "epoch": 3.052427137028803, + "grad_norm": 0.0009526963112875819, + "learning_rate": 7.904514608301649e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84212368, + "step": 124945 + }, + { + "epoch": 3.0525492878606504, + "grad_norm": 0.0012412937358021736, + "learning_rate": 7.903680773050304e-07, + "loss": 0.0, + "num_input_tokens_seen": 84215568, + "step": 124950 + }, + { + "epoch": 3.0526714386924976, + "grad_norm": 0.020034687593579292, + "learning_rate": 7.902846953043951e-07, + "loss": 0.0, + "num_input_tokens_seen": 84219024, + "step": 124955 + }, + { + "epoch": 3.0527935895243448, + "grad_norm": 0.0002694456488825381, + "learning_rate": 7.902013148288661e-07, + "loss": 0.0245, + "num_input_tokens_seen": 84222544, + "step": 124960 + }, + { + "epoch": 3.052915740356192, + "grad_norm": 0.10132147371768951, + "learning_rate": 7.901179358790499e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84225680, + "step": 124965 + }, + { + "epoch": 3.053037891188039, + "grad_norm": 0.009896229021251202, + "learning_rate": 7.90034558455552e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84229392, + "step": 124970 + }, + { + "epoch": 3.0531600420198863, + "grad_norm": 0.004478112794458866, + "learning_rate": 7.899511825589798e-07, + "loss": 0.0, + "num_input_tokens_seen": 84232528, + "step": 124975 + }, + { + "epoch": 3.0532821928517335, + "grad_norm": 0.044881828129291534, + "learning_rate": 7.898678081899386e-07, + "loss": 0.0, + "num_input_tokens_seen": 84235600, + "step": 124980 + }, + { + "epoch": 3.0534043436835803, + "grad_norm": 0.007525295484811068, + "learning_rate": 7.897844353490355e-07, + "loss": 0.0, + "num_input_tokens_seen": 84239440, + "step": 124985 + }, + { + "epoch": 3.0535264945154275, + "grad_norm": 0.03217082843184471, + "learning_rate": 7.897010640368767e-07, + "loss": 0.0, + "num_input_tokens_seen": 84242896, + "step": 124990 + }, + { + "epoch": 3.0536486453472746, + "grad_norm": 0.005499767605215311, + "learning_rate": 7.89617694254068e-07, + "loss": 0.004, + "num_input_tokens_seen": 84246352, + "step": 124995 + }, + { + "epoch": 3.053770796179122, + "grad_norm": 0.011192183010280132, + "learning_rate": 7.895343260012163e-07, + "loss": 0.0328, + "num_input_tokens_seen": 84249488, + "step": 125000 + }, + { + "epoch": 3.053892947010969, + "grad_norm": 0.0028834501281380653, + "learning_rate": 7.894509592789272e-07, + "loss": 0.0572, + "num_input_tokens_seen": 84252688, + "step": 125005 + }, + { + "epoch": 3.054015097842816, + "grad_norm": 0.001355986576527357, + "learning_rate": 7.893675940878079e-07, + "loss": 0.0455, + "num_input_tokens_seen": 84256336, + "step": 125010 + }, + { + "epoch": 3.0541372486746634, + "grad_norm": 0.007849025540053844, + "learning_rate": 7.892842304284634e-07, + "loss": 0.0, + "num_input_tokens_seen": 84259920, + "step": 125015 + }, + { + "epoch": 3.0542593995065106, + "grad_norm": 0.002404062310233712, + "learning_rate": 7.892008683015009e-07, + "loss": 0.0291, + "num_input_tokens_seen": 84262992, + "step": 125020 + }, + { + "epoch": 3.054381550338358, + "grad_norm": 0.0010292022489011288, + "learning_rate": 7.891175077075267e-07, + "loss": 0.0408, + "num_input_tokens_seen": 84266640, + "step": 125025 + }, + { + "epoch": 3.054503701170205, + "grad_norm": 0.0029042158275842667, + "learning_rate": 7.890341486471464e-07, + "loss": 0.0, + "num_input_tokens_seen": 84270544, + "step": 125030 + }, + { + "epoch": 3.054625852002052, + "grad_norm": 0.008272890001535416, + "learning_rate": 7.889507911209669e-07, + "loss": 0.0, + "num_input_tokens_seen": 84274000, + "step": 125035 + }, + { + "epoch": 3.0547480028338994, + "grad_norm": 0.0023922626860439777, + "learning_rate": 7.888674351295937e-07, + "loss": 0.0, + "num_input_tokens_seen": 84277008, + "step": 125040 + }, + { + "epoch": 3.0548701536657465, + "grad_norm": 0.10242076218128204, + "learning_rate": 7.887840806736335e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84280912, + "step": 125045 + }, + { + "epoch": 3.0549923044975937, + "grad_norm": 0.002840844914317131, + "learning_rate": 7.88700727753692e-07, + "loss": 0.0002, + "num_input_tokens_seen": 84283984, + "step": 125050 + }, + { + "epoch": 3.055114455329441, + "grad_norm": 0.0061529530212283134, + "learning_rate": 7.886173763703756e-07, + "loss": 0.0002, + "num_input_tokens_seen": 84287056, + "step": 125055 + }, + { + "epoch": 3.055236606161288, + "grad_norm": 0.0021191227715462446, + "learning_rate": 7.885340265242909e-07, + "loss": 0.0, + "num_input_tokens_seen": 84291088, + "step": 125060 + }, + { + "epoch": 3.0553587569931353, + "grad_norm": 0.0029413315933197737, + "learning_rate": 7.884506782160431e-07, + "loss": 0.0, + "num_input_tokens_seen": 84294352, + "step": 125065 + }, + { + "epoch": 3.0554809078249825, + "grad_norm": 0.04265553131699562, + "learning_rate": 7.883673314462394e-07, + "loss": 0.0572, + "num_input_tokens_seen": 84297680, + "step": 125070 + }, + { + "epoch": 3.0556030586568292, + "grad_norm": 0.002523405710235238, + "learning_rate": 7.882839862154849e-07, + "loss": 0.0003, + "num_input_tokens_seen": 84300880, + "step": 125075 + }, + { + "epoch": 3.0557252094886764, + "grad_norm": 0.004484863486140966, + "learning_rate": 7.882006425243866e-07, + "loss": 0.0002, + "num_input_tokens_seen": 84304272, + "step": 125080 + }, + { + "epoch": 3.0558473603205236, + "grad_norm": 0.05211414024233818, + "learning_rate": 7.8811730037355e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84307472, + "step": 125085 + }, + { + "epoch": 3.055969511152371, + "grad_norm": 0.0016424644272774458, + "learning_rate": 7.880339597635814e-07, + "loss": 0.0, + "num_input_tokens_seen": 84310736, + "step": 125090 + }, + { + "epoch": 3.056091661984218, + "grad_norm": 0.0005251190159469843, + "learning_rate": 7.879506206950872e-07, + "loss": 0.0, + "num_input_tokens_seen": 84314000, + "step": 125095 + }, + { + "epoch": 3.056213812816065, + "grad_norm": 0.0010125606786459684, + "learning_rate": 7.878672831686725e-07, + "loss": 0.0, + "num_input_tokens_seen": 84317200, + "step": 125100 + }, + { + "epoch": 3.0563359636479124, + "grad_norm": 0.006290761288255453, + "learning_rate": 7.877839471849446e-07, + "loss": 0.0, + "num_input_tokens_seen": 84321232, + "step": 125105 + }, + { + "epoch": 3.0564581144797596, + "grad_norm": 0.0041360314935445786, + "learning_rate": 7.877006127445084e-07, + "loss": 0.0, + "num_input_tokens_seen": 84325264, + "step": 125110 + }, + { + "epoch": 3.0565802653116068, + "grad_norm": 0.0011427260469645262, + "learning_rate": 7.876172798479703e-07, + "loss": 0.0034, + "num_input_tokens_seen": 84328592, + "step": 125115 + }, + { + "epoch": 3.056702416143454, + "grad_norm": 0.004253493621945381, + "learning_rate": 7.875339484959371e-07, + "loss": 0.0, + "num_input_tokens_seen": 84331728, + "step": 125120 + }, + { + "epoch": 3.056824566975301, + "grad_norm": 0.038184262812137604, + "learning_rate": 7.874506186890138e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84335248, + "step": 125125 + }, + { + "epoch": 3.0569467178071483, + "grad_norm": 0.0009650553110986948, + "learning_rate": 7.873672904278069e-07, + "loss": 0.0, + "num_input_tokens_seen": 84338384, + "step": 125130 + }, + { + "epoch": 3.0570688686389955, + "grad_norm": 0.0030284132808446884, + "learning_rate": 7.87283963712922e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84342032, + "step": 125135 + }, + { + "epoch": 3.0571910194708427, + "grad_norm": 15.960134506225586, + "learning_rate": 7.872006385449658e-07, + "loss": 0.035, + "num_input_tokens_seen": 84345232, + "step": 125140 + }, + { + "epoch": 3.05731317030269, + "grad_norm": 0.0062057022005319595, + "learning_rate": 7.871173149245436e-07, + "loss": 0.0, + "num_input_tokens_seen": 84348880, + "step": 125145 + }, + { + "epoch": 3.057435321134537, + "grad_norm": 0.0008573241648264229, + "learning_rate": 7.870339928522613e-07, + "loss": 0.0, + "num_input_tokens_seen": 84352400, + "step": 125150 + }, + { + "epoch": 3.0575574719663843, + "grad_norm": 0.0005532007780857384, + "learning_rate": 7.869506723287254e-07, + "loss": 0.0, + "num_input_tokens_seen": 84355728, + "step": 125155 + }, + { + "epoch": 3.0576796227982315, + "grad_norm": 0.022176533937454224, + "learning_rate": 7.868673533545411e-07, + "loss": 0.0, + "num_input_tokens_seen": 84359568, + "step": 125160 + }, + { + "epoch": 3.057801773630078, + "grad_norm": 0.01318781916052103, + "learning_rate": 7.867840359303153e-07, + "loss": 0.0, + "num_input_tokens_seen": 84362768, + "step": 125165 + }, + { + "epoch": 3.0579239244619254, + "grad_norm": 0.00014520602417178452, + "learning_rate": 7.867007200566527e-07, + "loss": 0.0465, + "num_input_tokens_seen": 84366480, + "step": 125170 + }, + { + "epoch": 3.0580460752937726, + "grad_norm": 0.04776505380868912, + "learning_rate": 7.866174057341601e-07, + "loss": 0.0788, + "num_input_tokens_seen": 84369744, + "step": 125175 + }, + { + "epoch": 3.05816822612562, + "grad_norm": 5.101992792333476e-05, + "learning_rate": 7.865340929634434e-07, + "loss": 0.0002, + "num_input_tokens_seen": 84372816, + "step": 125180 + }, + { + "epoch": 3.058290376957467, + "grad_norm": 0.004976267460733652, + "learning_rate": 7.864507817451079e-07, + "loss": 0.031, + "num_input_tokens_seen": 84376208, + "step": 125185 + }, + { + "epoch": 3.058412527789314, + "grad_norm": 0.013150850310921669, + "learning_rate": 7.8636747207976e-07, + "loss": 0.0, + "num_input_tokens_seen": 84379856, + "step": 125190 + }, + { + "epoch": 3.0585346786211614, + "grad_norm": 0.0034766318276524544, + "learning_rate": 7.862841639680052e-07, + "loss": 0.0, + "num_input_tokens_seen": 84383248, + "step": 125195 + }, + { + "epoch": 3.0586568294530085, + "grad_norm": 0.002520632930099964, + "learning_rate": 7.862008574104493e-07, + "loss": 0.0, + "num_input_tokens_seen": 84386448, + "step": 125200 + }, + { + "epoch": 3.0587789802848557, + "grad_norm": 0.012875786982476711, + "learning_rate": 7.861175524076984e-07, + "loss": 0.0, + "num_input_tokens_seen": 84389840, + "step": 125205 + }, + { + "epoch": 3.058901131116703, + "grad_norm": 0.009824838489294052, + "learning_rate": 7.860342489603578e-07, + "loss": 0.0, + "num_input_tokens_seen": 84392976, + "step": 125210 + }, + { + "epoch": 3.05902328194855, + "grad_norm": 0.0018767307046800852, + "learning_rate": 7.859509470690343e-07, + "loss": 0.0383, + "num_input_tokens_seen": 84396368, + "step": 125215 + }, + { + "epoch": 3.0591454327803973, + "grad_norm": 0.001162689528428018, + "learning_rate": 7.858676467343326e-07, + "loss": 0.0, + "num_input_tokens_seen": 84399376, + "step": 125220 + }, + { + "epoch": 3.0592675836122445, + "grad_norm": 0.0010780501179397106, + "learning_rate": 7.857843479568595e-07, + "loss": 0.0008, + "num_input_tokens_seen": 84402576, + "step": 125225 + }, + { + "epoch": 3.0593897344440917, + "grad_norm": 0.0029626989271491766, + "learning_rate": 7.857010507372197e-07, + "loss": 0.0, + "num_input_tokens_seen": 84405904, + "step": 125230 + }, + { + "epoch": 3.059511885275939, + "grad_norm": 120.70935821533203, + "learning_rate": 7.856177550760197e-07, + "loss": 0.0201, + "num_input_tokens_seen": 84409104, + "step": 125235 + }, + { + "epoch": 3.059634036107786, + "grad_norm": 0.0020443014800548553, + "learning_rate": 7.85534460973865e-07, + "loss": 0.0, + "num_input_tokens_seen": 84412752, + "step": 125240 + }, + { + "epoch": 3.0597561869396332, + "grad_norm": 0.00010733292583609, + "learning_rate": 7.854511684313615e-07, + "loss": 0.0, + "num_input_tokens_seen": 84415952, + "step": 125245 + }, + { + "epoch": 3.0598783377714804, + "grad_norm": 0.00694976095110178, + "learning_rate": 7.85367877449115e-07, + "loss": 0.0523, + "num_input_tokens_seen": 84419216, + "step": 125250 + }, + { + "epoch": 3.060000488603327, + "grad_norm": 0.0008842891547828913, + "learning_rate": 7.852845880277306e-07, + "loss": 0.0, + "num_input_tokens_seen": 84422928, + "step": 125255 + }, + { + "epoch": 3.0601226394351744, + "grad_norm": 0.0027291791047900915, + "learning_rate": 7.852013001678149e-07, + "loss": 0.0871, + "num_input_tokens_seen": 84426384, + "step": 125260 + }, + { + "epoch": 3.0602447902670216, + "grad_norm": 0.032263994216918945, + "learning_rate": 7.851180138699725e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84429776, + "step": 125265 + }, + { + "epoch": 3.0603669410988688, + "grad_norm": 0.035991132259368896, + "learning_rate": 7.850347291348098e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84433424, + "step": 125270 + }, + { + "epoch": 3.060489091930716, + "grad_norm": 0.09364461898803711, + "learning_rate": 7.849514459629329e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84436432, + "step": 125275 + }, + { + "epoch": 3.060611242762563, + "grad_norm": 0.009549515321850777, + "learning_rate": 7.848681643549464e-07, + "loss": 0.0, + "num_input_tokens_seen": 84440016, + "step": 125280 + }, + { + "epoch": 3.0607333935944103, + "grad_norm": 0.009542601183056831, + "learning_rate": 7.84784884311457e-07, + "loss": 0.0, + "num_input_tokens_seen": 84443024, + "step": 125285 + }, + { + "epoch": 3.0608555444262575, + "grad_norm": 0.0017092217458412051, + "learning_rate": 7.847016058330692e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84446416, + "step": 125290 + }, + { + "epoch": 3.0609776952581047, + "grad_norm": 0.0002163954050047323, + "learning_rate": 7.846183289203898e-07, + "loss": 0.0325, + "num_input_tokens_seen": 84449488, + "step": 125295 + }, + { + "epoch": 3.061099846089952, + "grad_norm": 0.0009846043540164828, + "learning_rate": 7.845350535740236e-07, + "loss": 0.0, + "num_input_tokens_seen": 84453328, + "step": 125300 + }, + { + "epoch": 3.061221996921799, + "grad_norm": 0.002706692088395357, + "learning_rate": 7.844517797945763e-07, + "loss": 0.0002, + "num_input_tokens_seen": 84456656, + "step": 125305 + }, + { + "epoch": 3.0613441477536463, + "grad_norm": 0.0011171639198437333, + "learning_rate": 7.843685075826538e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84459728, + "step": 125310 + }, + { + "epoch": 3.0614662985854935, + "grad_norm": 0.1492723524570465, + "learning_rate": 7.842852369388612e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84462800, + "step": 125315 + }, + { + "epoch": 3.0615884494173407, + "grad_norm": 0.0004917579353787005, + "learning_rate": 7.84201967863805e-07, + "loss": 0.0, + "num_input_tokens_seen": 84465872, + "step": 125320 + }, + { + "epoch": 3.061710600249188, + "grad_norm": 0.0017128594918176532, + "learning_rate": 7.841187003580895e-07, + "loss": 0.0, + "num_input_tokens_seen": 84470032, + "step": 125325 + }, + { + "epoch": 3.061832751081035, + "grad_norm": 0.00024660531198605895, + "learning_rate": 7.84035434422321e-07, + "loss": 0.0, + "num_input_tokens_seen": 84473296, + "step": 125330 + }, + { + "epoch": 3.061954901912882, + "grad_norm": 0.0023638547863811255, + "learning_rate": 7.839521700571053e-07, + "loss": 0.0453, + "num_input_tokens_seen": 84476816, + "step": 125335 + }, + { + "epoch": 3.062077052744729, + "grad_norm": 0.0002841923851519823, + "learning_rate": 7.838689072630471e-07, + "loss": 0.0, + "num_input_tokens_seen": 84480528, + "step": 125340 + }, + { + "epoch": 3.062199203576576, + "grad_norm": 0.0006632845615968108, + "learning_rate": 7.837856460407527e-07, + "loss": 0.0, + "num_input_tokens_seen": 84484112, + "step": 125345 + }, + { + "epoch": 3.0623213544084233, + "grad_norm": 0.12877187132835388, + "learning_rate": 7.837023863908271e-07, + "loss": 0.0, + "num_input_tokens_seen": 84487696, + "step": 125350 + }, + { + "epoch": 3.0624435052402705, + "grad_norm": 0.008744907565414906, + "learning_rate": 7.836191283138759e-07, + "loss": 0.0, + "num_input_tokens_seen": 84491280, + "step": 125355 + }, + { + "epoch": 3.0625656560721177, + "grad_norm": 0.0188330989331007, + "learning_rate": 7.835358718105046e-07, + "loss": 0.0, + "num_input_tokens_seen": 84494416, + "step": 125360 + }, + { + "epoch": 3.062687806903965, + "grad_norm": 0.014633757062256336, + "learning_rate": 7.834526168813185e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84497360, + "step": 125365 + }, + { + "epoch": 3.062809957735812, + "grad_norm": 0.0016165561974048615, + "learning_rate": 7.833693635269236e-07, + "loss": 0.0, + "num_input_tokens_seen": 84500624, + "step": 125370 + }, + { + "epoch": 3.0629321085676593, + "grad_norm": 0.000309811148326844, + "learning_rate": 7.832861117479245e-07, + "loss": 0.0, + "num_input_tokens_seen": 84503952, + "step": 125375 + }, + { + "epoch": 3.0630542593995065, + "grad_norm": 0.017329243943095207, + "learning_rate": 7.832028615449275e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84507152, + "step": 125380 + }, + { + "epoch": 3.0631764102313537, + "grad_norm": 0.0019927939865738153, + "learning_rate": 7.831196129185371e-07, + "loss": 0.0, + "num_input_tokens_seen": 84510416, + "step": 125385 + }, + { + "epoch": 3.063298561063201, + "grad_norm": 0.0007572838803753257, + "learning_rate": 7.830363658693596e-07, + "loss": 0.0, + "num_input_tokens_seen": 84513936, + "step": 125390 + }, + { + "epoch": 3.063420711895048, + "grad_norm": 0.003489340888336301, + "learning_rate": 7.82953120398e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84517392, + "step": 125395 + }, + { + "epoch": 3.0635428627268952, + "grad_norm": 0.05194596201181412, + "learning_rate": 7.828698765050636e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84520784, + "step": 125400 + }, + { + "epoch": 3.0636650135587424, + "grad_norm": 0.01658715307712555, + "learning_rate": 7.827866341911558e-07, + "loss": 0.0, + "num_input_tokens_seen": 84524048, + "step": 125405 + }, + { + "epoch": 3.0637871643905896, + "grad_norm": 0.00041749561205506325, + "learning_rate": 7.827033934568821e-07, + "loss": 0.0, + "num_input_tokens_seen": 84527248, + "step": 125410 + }, + { + "epoch": 3.063909315222437, + "grad_norm": 0.001444365014322102, + "learning_rate": 7.82620154302848e-07, + "loss": 0.0, + "num_input_tokens_seen": 84530576, + "step": 125415 + }, + { + "epoch": 3.064031466054284, + "grad_norm": 0.0007564245024695992, + "learning_rate": 7.825369167296581e-07, + "loss": 0.0, + "num_input_tokens_seen": 84533776, + "step": 125420 + }, + { + "epoch": 3.064153616886131, + "grad_norm": 0.0001561980025144294, + "learning_rate": 7.824536807379183e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84537424, + "step": 125425 + }, + { + "epoch": 3.0642757677179784, + "grad_norm": 0.002753429114818573, + "learning_rate": 7.823704463282342e-07, + "loss": 0.0, + "num_input_tokens_seen": 84540688, + "step": 125430 + }, + { + "epoch": 3.064397918549825, + "grad_norm": 0.0050643994472920895, + "learning_rate": 7.822872135012104e-07, + "loss": 0.0, + "num_input_tokens_seen": 84544144, + "step": 125435 + }, + { + "epoch": 3.0645200693816723, + "grad_norm": 0.0004163897247053683, + "learning_rate": 7.82203982257453e-07, + "loss": 0.0003, + "num_input_tokens_seen": 84547536, + "step": 125440 + }, + { + "epoch": 3.0646422202135195, + "grad_norm": 0.004049566108733416, + "learning_rate": 7.821207525975664e-07, + "loss": 0.0, + "num_input_tokens_seen": 84551056, + "step": 125445 + }, + { + "epoch": 3.0647643710453667, + "grad_norm": 0.0017293097916990519, + "learning_rate": 7.820375245221567e-07, + "loss": 0.0, + "num_input_tokens_seen": 84554448, + "step": 125450 + }, + { + "epoch": 3.064886521877214, + "grad_norm": 0.0007199915708042681, + "learning_rate": 7.819542980318283e-07, + "loss": 0.0, + "num_input_tokens_seen": 84557840, + "step": 125455 + }, + { + "epoch": 3.065008672709061, + "grad_norm": 0.007718597538769245, + "learning_rate": 7.81871073127187e-07, + "loss": 0.0, + "num_input_tokens_seen": 84561424, + "step": 125460 + }, + { + "epoch": 3.0651308235409083, + "grad_norm": 0.005239197518676519, + "learning_rate": 7.817878498088382e-07, + "loss": 0.0, + "num_input_tokens_seen": 84565136, + "step": 125465 + }, + { + "epoch": 3.0652529743727555, + "grad_norm": 0.0002189621445722878, + "learning_rate": 7.817046280773864e-07, + "loss": 0.0, + "num_input_tokens_seen": 84568592, + "step": 125470 + }, + { + "epoch": 3.0653751252046026, + "grad_norm": 0.0008673128322698176, + "learning_rate": 7.816214079334378e-07, + "loss": 0.0, + "num_input_tokens_seen": 84571920, + "step": 125475 + }, + { + "epoch": 3.06549727603645, + "grad_norm": 7.186421134974808e-05, + "learning_rate": 7.815381893775965e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84574736, + "step": 125480 + }, + { + "epoch": 3.065619426868297, + "grad_norm": 0.0008313908474519849, + "learning_rate": 7.814549724104683e-07, + "loss": 0.0, + "num_input_tokens_seen": 84578064, + "step": 125485 + }, + { + "epoch": 3.065741577700144, + "grad_norm": 0.00026990409241989255, + "learning_rate": 7.813717570326588e-07, + "loss": 0.0, + "num_input_tokens_seen": 84581200, + "step": 125490 + }, + { + "epoch": 3.0658637285319914, + "grad_norm": 0.02783309668302536, + "learning_rate": 7.812885432447722e-07, + "loss": 0.0, + "num_input_tokens_seen": 84584464, + "step": 125495 + }, + { + "epoch": 3.0659858793638386, + "grad_norm": 0.0011254458222538233, + "learning_rate": 7.812053310474146e-07, + "loss": 0.0016, + "num_input_tokens_seen": 84587984, + "step": 125500 + }, + { + "epoch": 3.066108030195686, + "grad_norm": 23.020633697509766, + "learning_rate": 7.811221204411905e-07, + "loss": 0.0716, + "num_input_tokens_seen": 84591376, + "step": 125505 + }, + { + "epoch": 3.066230181027533, + "grad_norm": 0.0007930602878332138, + "learning_rate": 7.810389114267051e-07, + "loss": 0.0, + "num_input_tokens_seen": 84594512, + "step": 125510 + }, + { + "epoch": 3.06635233185938, + "grad_norm": 0.0006310031749308109, + "learning_rate": 7.809557040045637e-07, + "loss": 0.0, + "num_input_tokens_seen": 84597712, + "step": 125515 + }, + { + "epoch": 3.066474482691227, + "grad_norm": 0.015794072300195694, + "learning_rate": 7.808724981753712e-07, + "loss": 0.0, + "num_input_tokens_seen": 84601040, + "step": 125520 + }, + { + "epoch": 3.066596633523074, + "grad_norm": 0.00023171577777247876, + "learning_rate": 7.807892939397331e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84604176, + "step": 125525 + }, + { + "epoch": 3.0667187843549213, + "grad_norm": 0.012235916219651699, + "learning_rate": 7.807060912982538e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84607632, + "step": 125530 + }, + { + "epoch": 3.0668409351867685, + "grad_norm": 0.0012531871907413006, + "learning_rate": 7.806228902515393e-07, + "loss": 0.0, + "num_input_tokens_seen": 84611344, + "step": 125535 + }, + { + "epoch": 3.0669630860186157, + "grad_norm": 0.0702088475227356, + "learning_rate": 7.805396908001938e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84615568, + "step": 125540 + }, + { + "epoch": 3.067085236850463, + "grad_norm": 0.0007449003169313073, + "learning_rate": 7.804564929448227e-07, + "loss": 0.054, + "num_input_tokens_seen": 84618896, + "step": 125545 + }, + { + "epoch": 3.06720738768231, + "grad_norm": 0.010736105963587761, + "learning_rate": 7.803732966860311e-07, + "loss": 0.0, + "num_input_tokens_seen": 84622096, + "step": 125550 + }, + { + "epoch": 3.0673295385141572, + "grad_norm": 0.05345999822020531, + "learning_rate": 7.80290102024424e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84625680, + "step": 125555 + }, + { + "epoch": 3.0674516893460044, + "grad_norm": 0.0007100952789187431, + "learning_rate": 7.802069089606064e-07, + "loss": 0.0, + "num_input_tokens_seen": 84628944, + "step": 125560 + }, + { + "epoch": 3.0675738401778516, + "grad_norm": 0.06198933348059654, + "learning_rate": 7.801237174951833e-07, + "loss": 0.0, + "num_input_tokens_seen": 84632528, + "step": 125565 + }, + { + "epoch": 3.067695991009699, + "grad_norm": 0.00022255294607020915, + "learning_rate": 7.800405276287599e-07, + "loss": 0.0, + "num_input_tokens_seen": 84635664, + "step": 125570 + }, + { + "epoch": 3.067818141841546, + "grad_norm": 0.0003733917837962508, + "learning_rate": 7.799573393619403e-07, + "loss": 0.0, + "num_input_tokens_seen": 84638928, + "step": 125575 + }, + { + "epoch": 3.067940292673393, + "grad_norm": 0.0009603972430340946, + "learning_rate": 7.798741526953303e-07, + "loss": 0.0, + "num_input_tokens_seen": 84642448, + "step": 125580 + }, + { + "epoch": 3.0680624435052404, + "grad_norm": 0.0003000438155140728, + "learning_rate": 7.797909676295351e-07, + "loss": 0.0, + "num_input_tokens_seen": 84645840, + "step": 125585 + }, + { + "epoch": 3.0681845943370876, + "grad_norm": 0.0011194392573088408, + "learning_rate": 7.797077841651587e-07, + "loss": 0.0308, + "num_input_tokens_seen": 84648592, + "step": 125590 + }, + { + "epoch": 3.0683067451689348, + "grad_norm": 0.009211283177137375, + "learning_rate": 7.79624602302807e-07, + "loss": 0.0, + "num_input_tokens_seen": 84652112, + "step": 125595 + }, + { + "epoch": 3.068428896000782, + "grad_norm": 0.0015620322665199637, + "learning_rate": 7.79541422043084e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84655376, + "step": 125600 + }, + { + "epoch": 3.068551046832629, + "grad_norm": 0.0010103716049343348, + "learning_rate": 7.794582433865956e-07, + "loss": 0.0, + "num_input_tokens_seen": 84658640, + "step": 125605 + }, + { + "epoch": 3.068673197664476, + "grad_norm": 0.03660206124186516, + "learning_rate": 7.793750663339459e-07, + "loss": 0.0, + "num_input_tokens_seen": 84661456, + "step": 125610 + }, + { + "epoch": 3.068795348496323, + "grad_norm": 0.0035117187071591616, + "learning_rate": 7.792918908857399e-07, + "loss": 0.0, + "num_input_tokens_seen": 84664784, + "step": 125615 + }, + { + "epoch": 3.0689174993281703, + "grad_norm": 0.0003971233672928065, + "learning_rate": 7.792087170425829e-07, + "loss": 0.0, + "num_input_tokens_seen": 84668176, + "step": 125620 + }, + { + "epoch": 3.0690396501600175, + "grad_norm": 0.0019440487958490849, + "learning_rate": 7.791255448050793e-07, + "loss": 0.0003, + "num_input_tokens_seen": 84671248, + "step": 125625 + }, + { + "epoch": 3.0691618009918646, + "grad_norm": 0.002487177960574627, + "learning_rate": 7.790423741738344e-07, + "loss": 0.0, + "num_input_tokens_seen": 84674576, + "step": 125630 + }, + { + "epoch": 3.069283951823712, + "grad_norm": 0.001050000311806798, + "learning_rate": 7.789592051494524e-07, + "loss": 0.0, + "num_input_tokens_seen": 84677904, + "step": 125635 + }, + { + "epoch": 3.069406102655559, + "grad_norm": 0.1311432123184204, + "learning_rate": 7.788760377325385e-07, + "loss": 0.0, + "num_input_tokens_seen": 84681360, + "step": 125640 + }, + { + "epoch": 3.069528253487406, + "grad_norm": 0.0007992003229446709, + "learning_rate": 7.78792871923698e-07, + "loss": 0.0, + "num_input_tokens_seen": 84684752, + "step": 125645 + }, + { + "epoch": 3.0696504043192534, + "grad_norm": 0.000563696725293994, + "learning_rate": 7.787097077235348e-07, + "loss": 0.0, + "num_input_tokens_seen": 84688464, + "step": 125650 + }, + { + "epoch": 3.0697725551511006, + "grad_norm": 0.00010724652383942157, + "learning_rate": 7.786265451326546e-07, + "loss": 0.0, + "num_input_tokens_seen": 84692368, + "step": 125655 + }, + { + "epoch": 3.069894705982948, + "grad_norm": 0.0006794543587602675, + "learning_rate": 7.785433841516614e-07, + "loss": 0.0, + "num_input_tokens_seen": 84695824, + "step": 125660 + }, + { + "epoch": 3.070016856814795, + "grad_norm": 0.010498473420739174, + "learning_rate": 7.784602247811603e-07, + "loss": 0.0, + "num_input_tokens_seen": 84699472, + "step": 125665 + }, + { + "epoch": 3.070139007646642, + "grad_norm": 0.003232223680242896, + "learning_rate": 7.783770670217561e-07, + "loss": 0.041, + "num_input_tokens_seen": 84703184, + "step": 125670 + }, + { + "epoch": 3.0702611584784893, + "grad_norm": 0.06351174414157867, + "learning_rate": 7.782939108740532e-07, + "loss": 0.0, + "num_input_tokens_seen": 84706768, + "step": 125675 + }, + { + "epoch": 3.0703833093103365, + "grad_norm": 0.0006841022404842079, + "learning_rate": 7.782107563386571e-07, + "loss": 0.0, + "num_input_tokens_seen": 84709776, + "step": 125680 + }, + { + "epoch": 3.0705054601421837, + "grad_norm": 0.006533041596412659, + "learning_rate": 7.781276034161714e-07, + "loss": 0.0284, + "num_input_tokens_seen": 84713552, + "step": 125685 + }, + { + "epoch": 3.070627610974031, + "grad_norm": 0.01073833741247654, + "learning_rate": 7.780444521072021e-07, + "loss": 0.0, + "num_input_tokens_seen": 84716816, + "step": 125690 + }, + { + "epoch": 3.070749761805878, + "grad_norm": 0.00148698256816715, + "learning_rate": 7.779613024123528e-07, + "loss": 0.0, + "num_input_tokens_seen": 84720592, + "step": 125695 + }, + { + "epoch": 3.070871912637725, + "grad_norm": 0.0003934977576136589, + "learning_rate": 7.778781543322289e-07, + "loss": 0.0, + "num_input_tokens_seen": 84723984, + "step": 125700 + }, + { + "epoch": 3.070994063469572, + "grad_norm": 0.0019493974978104234, + "learning_rate": 7.777950078674345e-07, + "loss": 0.0, + "num_input_tokens_seen": 84727312, + "step": 125705 + }, + { + "epoch": 3.0711162143014192, + "grad_norm": 0.0042640152387320995, + "learning_rate": 7.777118630185748e-07, + "loss": 0.0, + "num_input_tokens_seen": 84730768, + "step": 125710 + }, + { + "epoch": 3.0712383651332664, + "grad_norm": 0.0034844614565372467, + "learning_rate": 7.776287197862541e-07, + "loss": 0.0, + "num_input_tokens_seen": 84734032, + "step": 125715 + }, + { + "epoch": 3.0713605159651136, + "grad_norm": 0.000586310459766537, + "learning_rate": 7.775455781710774e-07, + "loss": 0.0, + "num_input_tokens_seen": 84737296, + "step": 125720 + }, + { + "epoch": 3.071482666796961, + "grad_norm": 0.0002898842503782362, + "learning_rate": 7.774624381736489e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84740752, + "step": 125725 + }, + { + "epoch": 3.071604817628808, + "grad_norm": 1.425169825553894, + "learning_rate": 7.773792997945734e-07, + "loss": 0.0003, + "num_input_tokens_seen": 84744528, + "step": 125730 + }, + { + "epoch": 3.071726968460655, + "grad_norm": 0.000697058392688632, + "learning_rate": 7.772961630344552e-07, + "loss": 0.0, + "num_input_tokens_seen": 84747984, + "step": 125735 + }, + { + "epoch": 3.0718491192925024, + "grad_norm": 0.0004514774482231587, + "learning_rate": 7.772130278938999e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84751184, + "step": 125740 + }, + { + "epoch": 3.0719712701243496, + "grad_norm": 0.00027701619546860456, + "learning_rate": 7.771298943735108e-07, + "loss": 0.0, + "num_input_tokens_seen": 84755088, + "step": 125745 + }, + { + "epoch": 3.0720934209561968, + "grad_norm": 0.0006469250656664371, + "learning_rate": 7.770467624738935e-07, + "loss": 0.0, + "num_input_tokens_seen": 84758544, + "step": 125750 + }, + { + "epoch": 3.072215571788044, + "grad_norm": 0.05088665336370468, + "learning_rate": 7.769636321956516e-07, + "loss": 0.0, + "num_input_tokens_seen": 84762192, + "step": 125755 + }, + { + "epoch": 3.072337722619891, + "grad_norm": 0.01325522642582655, + "learning_rate": 7.768805035393907e-07, + "loss": 0.0, + "num_input_tokens_seen": 84765328, + "step": 125760 + }, + { + "epoch": 3.0724598734517383, + "grad_norm": 28.06966209411621, + "learning_rate": 7.767973765057145e-07, + "loss": 0.1304, + "num_input_tokens_seen": 84768912, + "step": 125765 + }, + { + "epoch": 3.0725820242835855, + "grad_norm": 0.0073534781113266945, + "learning_rate": 7.767142510952277e-07, + "loss": 0.0, + "num_input_tokens_seen": 84772112, + "step": 125770 + }, + { + "epoch": 3.0727041751154327, + "grad_norm": 0.001848505693487823, + "learning_rate": 7.766311273085353e-07, + "loss": 0.0072, + "num_input_tokens_seen": 84774992, + "step": 125775 + }, + { + "epoch": 3.07282632594728, + "grad_norm": 0.031989775598049164, + "learning_rate": 7.765480051462409e-07, + "loss": 0.0, + "num_input_tokens_seen": 84777936, + "step": 125780 + }, + { + "epoch": 3.072948476779127, + "grad_norm": 0.00015862514555919915, + "learning_rate": 7.764648846089501e-07, + "loss": 0.0318, + "num_input_tokens_seen": 84781456, + "step": 125785 + }, + { + "epoch": 3.073070627610974, + "grad_norm": 0.0009046062477864325, + "learning_rate": 7.763817656972662e-07, + "loss": 0.0418, + "num_input_tokens_seen": 84784656, + "step": 125790 + }, + { + "epoch": 3.073192778442821, + "grad_norm": 0.002990515436977148, + "learning_rate": 7.762986484117943e-07, + "loss": 0.0, + "num_input_tokens_seen": 84787600, + "step": 125795 + }, + { + "epoch": 3.073314929274668, + "grad_norm": 0.0013474200386554003, + "learning_rate": 7.762155327531392e-07, + "loss": 0.0307, + "num_input_tokens_seen": 84790736, + "step": 125800 + }, + { + "epoch": 3.0734370801065154, + "grad_norm": 0.005178503692150116, + "learning_rate": 7.761324187219046e-07, + "loss": 0.0, + "num_input_tokens_seen": 84794192, + "step": 125805 + }, + { + "epoch": 3.0735592309383626, + "grad_norm": 0.010057356208562851, + "learning_rate": 7.760493063186957e-07, + "loss": 0.0, + "num_input_tokens_seen": 84797456, + "step": 125810 + }, + { + "epoch": 3.0736813817702098, + "grad_norm": 0.0014329110272228718, + "learning_rate": 7.759661955441161e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84801104, + "step": 125815 + }, + { + "epoch": 3.073803532602057, + "grad_norm": 0.005202101077884436, + "learning_rate": 7.758830863987707e-07, + "loss": 0.0, + "num_input_tokens_seen": 84804496, + "step": 125820 + }, + { + "epoch": 3.073925683433904, + "grad_norm": 0.0016744352178648114, + "learning_rate": 7.757999788832639e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84807952, + "step": 125825 + }, + { + "epoch": 3.0740478342657513, + "grad_norm": 0.00039133901009336114, + "learning_rate": 7.757168729981996e-07, + "loss": 0.0, + "num_input_tokens_seen": 84811088, + "step": 125830 + }, + { + "epoch": 3.0741699850975985, + "grad_norm": 0.009436092339456081, + "learning_rate": 7.75633768744183e-07, + "loss": 0.0489, + "num_input_tokens_seen": 84814608, + "step": 125835 + }, + { + "epoch": 3.0742921359294457, + "grad_norm": 0.04576566070318222, + "learning_rate": 7.755506661218174e-07, + "loss": 0.0, + "num_input_tokens_seen": 84817680, + "step": 125840 + }, + { + "epoch": 3.074414286761293, + "grad_norm": 0.0022139260545372963, + "learning_rate": 7.754675651317083e-07, + "loss": 0.0005, + "num_input_tokens_seen": 84821328, + "step": 125845 + }, + { + "epoch": 3.07453643759314, + "grad_norm": 0.00011124507727799937, + "learning_rate": 7.753844657744591e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84824848, + "step": 125850 + }, + { + "epoch": 3.0746585884249873, + "grad_norm": 0.0007051240536384284, + "learning_rate": 7.753013680506747e-07, + "loss": 0.0, + "num_input_tokens_seen": 84827792, + "step": 125855 + }, + { + "epoch": 3.0747807392568345, + "grad_norm": 0.0007837665034458041, + "learning_rate": 7.752182719609589e-07, + "loss": 0.0, + "num_input_tokens_seen": 84831248, + "step": 125860 + }, + { + "epoch": 3.0749028900886817, + "grad_norm": 0.0006996446172706783, + "learning_rate": 7.751351775059165e-07, + "loss": 0.0, + "num_input_tokens_seen": 84834448, + "step": 125865 + }, + { + "epoch": 3.075025040920529, + "grad_norm": 0.02283223159611225, + "learning_rate": 7.750520846861515e-07, + "loss": 0.0, + "num_input_tokens_seen": 84837968, + "step": 125870 + }, + { + "epoch": 3.075147191752376, + "grad_norm": 0.0009633800364099443, + "learning_rate": 7.749689935022683e-07, + "loss": 0.0, + "num_input_tokens_seen": 84840976, + "step": 125875 + }, + { + "epoch": 3.075269342584223, + "grad_norm": 0.0018521981546655297, + "learning_rate": 7.748859039548713e-07, + "loss": 0.0, + "num_input_tokens_seen": 84844240, + "step": 125880 + }, + { + "epoch": 3.07539149341607, + "grad_norm": 0.0005762922228313982, + "learning_rate": 7.748028160445641e-07, + "loss": 0.05, + "num_input_tokens_seen": 84848656, + "step": 125885 + }, + { + "epoch": 3.075513644247917, + "grad_norm": 0.005822031758725643, + "learning_rate": 7.747197297719515e-07, + "loss": 0.0, + "num_input_tokens_seen": 84851856, + "step": 125890 + }, + { + "epoch": 3.0756357950797644, + "grad_norm": 0.011522366665303707, + "learning_rate": 7.746366451376379e-07, + "loss": 0.0002, + "num_input_tokens_seen": 84855056, + "step": 125895 + }, + { + "epoch": 3.0757579459116116, + "grad_norm": 0.16439086198806763, + "learning_rate": 7.745535621422269e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84858704, + "step": 125900 + }, + { + "epoch": 3.0758800967434587, + "grad_norm": 0.00033246594830416143, + "learning_rate": 7.744704807863234e-07, + "loss": 0.0761, + "num_input_tokens_seen": 84862480, + "step": 125905 + }, + { + "epoch": 3.076002247575306, + "grad_norm": 0.1619579792022705, + "learning_rate": 7.743874010705308e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84866256, + "step": 125910 + }, + { + "epoch": 3.076124398407153, + "grad_norm": 0.0010160075034946203, + "learning_rate": 7.743043229954542e-07, + "loss": 0.0002, + "num_input_tokens_seen": 84869776, + "step": 125915 + }, + { + "epoch": 3.0762465492390003, + "grad_norm": 0.000158069291501306, + "learning_rate": 7.74221246561697e-07, + "loss": 0.0444, + "num_input_tokens_seen": 84873488, + "step": 125920 + }, + { + "epoch": 3.0763687000708475, + "grad_norm": 0.0011093869106844068, + "learning_rate": 7.741381717698634e-07, + "loss": 0.0, + "num_input_tokens_seen": 84876880, + "step": 125925 + }, + { + "epoch": 3.0764908509026947, + "grad_norm": 0.0034708997700363398, + "learning_rate": 7.740550986205582e-07, + "loss": 0.0, + "num_input_tokens_seen": 84880208, + "step": 125930 + }, + { + "epoch": 3.076613001734542, + "grad_norm": 0.006403934210538864, + "learning_rate": 7.739720271143847e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84883088, + "step": 125935 + }, + { + "epoch": 3.076735152566389, + "grad_norm": 0.0007148389122448862, + "learning_rate": 7.738889572519478e-07, + "loss": 0.0, + "num_input_tokens_seen": 84886800, + "step": 125940 + }, + { + "epoch": 3.0768573033982363, + "grad_norm": 0.02925863303244114, + "learning_rate": 7.738058890338508e-07, + "loss": 0.0013, + "num_input_tokens_seen": 84890256, + "step": 125945 + }, + { + "epoch": 3.0769794542300835, + "grad_norm": 0.001407115370966494, + "learning_rate": 7.737228224606982e-07, + "loss": 0.0, + "num_input_tokens_seen": 84893520, + "step": 125950 + }, + { + "epoch": 3.0771016050619306, + "grad_norm": 0.0008443163824267685, + "learning_rate": 7.736397575330946e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84897232, + "step": 125955 + }, + { + "epoch": 3.077223755893778, + "grad_norm": 0.003405147697776556, + "learning_rate": 7.735566942516431e-07, + "loss": 0.0, + "num_input_tokens_seen": 84900688, + "step": 125960 + }, + { + "epoch": 3.0773459067256246, + "grad_norm": 0.005903087090700865, + "learning_rate": 7.734736326169487e-07, + "loss": 0.0, + "num_input_tokens_seen": 84904208, + "step": 125965 + }, + { + "epoch": 3.0774680575574718, + "grad_norm": 0.011653275229036808, + "learning_rate": 7.733905726296146e-07, + "loss": 0.0, + "num_input_tokens_seen": 84907280, + "step": 125970 + }, + { + "epoch": 3.077590208389319, + "grad_norm": 0.0014293314889073372, + "learning_rate": 7.733075142902453e-07, + "loss": 0.0, + "num_input_tokens_seen": 84910416, + "step": 125975 + }, + { + "epoch": 3.077712359221166, + "grad_norm": 0.0037776813842356205, + "learning_rate": 7.732244575994447e-07, + "loss": 0.0, + "num_input_tokens_seen": 84913808, + "step": 125980 + }, + { + "epoch": 3.0778345100530133, + "grad_norm": 16.149124145507812, + "learning_rate": 7.731414025578168e-07, + "loss": 0.0553, + "num_input_tokens_seen": 84917136, + "step": 125985 + }, + { + "epoch": 3.0779566608848605, + "grad_norm": 0.0021692479494959116, + "learning_rate": 7.730583491659659e-07, + "loss": 0.0, + "num_input_tokens_seen": 84920784, + "step": 125990 + }, + { + "epoch": 3.0780788117167077, + "grad_norm": 0.003136912826448679, + "learning_rate": 7.729752974244953e-07, + "loss": 0.0, + "num_input_tokens_seen": 84924368, + "step": 125995 + }, + { + "epoch": 3.078200962548555, + "grad_norm": 0.0010444693034514785, + "learning_rate": 7.728922473340098e-07, + "loss": 0.0663, + "num_input_tokens_seen": 84928080, + "step": 126000 + }, + { + "epoch": 3.078323113380402, + "grad_norm": 0.0024431466590613127, + "learning_rate": 7.728091988951126e-07, + "loss": 0.0002, + "num_input_tokens_seen": 84931280, + "step": 126005 + }, + { + "epoch": 3.0784452642122493, + "grad_norm": 0.011616510339081287, + "learning_rate": 7.727261521084085e-07, + "loss": 0.0, + "num_input_tokens_seen": 84934672, + "step": 126010 + }, + { + "epoch": 3.0785674150440965, + "grad_norm": 0.0012070763623341918, + "learning_rate": 7.726431069745004e-07, + "loss": 0.0, + "num_input_tokens_seen": 84938256, + "step": 126015 + }, + { + "epoch": 3.0786895658759437, + "grad_norm": 0.002955326810479164, + "learning_rate": 7.725600634939931e-07, + "loss": 0.0, + "num_input_tokens_seen": 84941328, + "step": 126020 + }, + { + "epoch": 3.078811716707791, + "grad_norm": 0.001535121351480484, + "learning_rate": 7.724770216674901e-07, + "loss": 0.0, + "num_input_tokens_seen": 84944720, + "step": 126025 + }, + { + "epoch": 3.078933867539638, + "grad_norm": 0.004278361797332764, + "learning_rate": 7.723939814955955e-07, + "loss": 0.0, + "num_input_tokens_seen": 84947792, + "step": 126030 + }, + { + "epoch": 3.0790560183714852, + "grad_norm": 0.018353991210460663, + "learning_rate": 7.723109429789132e-07, + "loss": 0.0, + "num_input_tokens_seen": 84951248, + "step": 126035 + }, + { + "epoch": 3.0791781692033324, + "grad_norm": 0.08312410116195679, + "learning_rate": 7.722279061180465e-07, + "loss": 0.0, + "num_input_tokens_seen": 84954128, + "step": 126040 + }, + { + "epoch": 3.0793003200351796, + "grad_norm": 0.007280240301042795, + "learning_rate": 7.721448709135998e-07, + "loss": 0.0, + "num_input_tokens_seen": 84957648, + "step": 126045 + }, + { + "epoch": 3.079422470867027, + "grad_norm": 0.0015203187940642238, + "learning_rate": 7.720618373661774e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84960912, + "step": 126050 + }, + { + "epoch": 3.079544621698874, + "grad_norm": 0.001035342924296856, + "learning_rate": 7.719788054763821e-07, + "loss": 0.0, + "num_input_tokens_seen": 84964048, + "step": 126055 + }, + { + "epoch": 3.0796667725307207, + "grad_norm": 0.001273818896152079, + "learning_rate": 7.718957752448187e-07, + "loss": 0.0, + "num_input_tokens_seen": 84967696, + "step": 126060 + }, + { + "epoch": 3.079788923362568, + "grad_norm": 23.178403854370117, + "learning_rate": 7.718127466720901e-07, + "loss": 0.0857, + "num_input_tokens_seen": 84971088, + "step": 126065 + }, + { + "epoch": 3.079911074194415, + "grad_norm": 0.0031698124948889017, + "learning_rate": 7.71729719758801e-07, + "loss": 0.0003, + "num_input_tokens_seen": 84974608, + "step": 126070 + }, + { + "epoch": 3.0800332250262623, + "grad_norm": 0.00352927902713418, + "learning_rate": 7.716466945055546e-07, + "loss": 0.0838, + "num_input_tokens_seen": 84977936, + "step": 126075 + }, + { + "epoch": 3.0801553758581095, + "grad_norm": 0.0046735224314033985, + "learning_rate": 7.715636709129547e-07, + "loss": 0.0003, + "num_input_tokens_seen": 84980880, + "step": 126080 + }, + { + "epoch": 3.0802775266899567, + "grad_norm": 0.21390970051288605, + "learning_rate": 7.714806489816056e-07, + "loss": 0.0002, + "num_input_tokens_seen": 84983696, + "step": 126085 + }, + { + "epoch": 3.080399677521804, + "grad_norm": 0.0023563490249216557, + "learning_rate": 7.713976287121102e-07, + "loss": 0.0, + "num_input_tokens_seen": 84987344, + "step": 126090 + }, + { + "epoch": 3.080521828353651, + "grad_norm": 0.004802169743925333, + "learning_rate": 7.713146101050733e-07, + "loss": 0.0, + "num_input_tokens_seen": 84990736, + "step": 126095 + }, + { + "epoch": 3.0806439791854983, + "grad_norm": 0.14076021313667297, + "learning_rate": 7.712315931610976e-07, + "loss": 0.0001, + "num_input_tokens_seen": 84994064, + "step": 126100 + }, + { + "epoch": 3.0807661300173454, + "grad_norm": 23.542325973510742, + "learning_rate": 7.711485778807872e-07, + "loss": 0.0533, + "num_input_tokens_seen": 84997264, + "step": 126105 + }, + { + "epoch": 3.0808882808491926, + "grad_norm": 0.0003235576150473207, + "learning_rate": 7.710655642647463e-07, + "loss": 0.0, + "num_input_tokens_seen": 85000656, + "step": 126110 + }, + { + "epoch": 3.08101043168104, + "grad_norm": 0.0005096203531138599, + "learning_rate": 7.709825523135778e-07, + "loss": 0.0003, + "num_input_tokens_seen": 85004048, + "step": 126115 + }, + { + "epoch": 3.081132582512887, + "grad_norm": 0.00011765821545850486, + "learning_rate": 7.708995420278864e-07, + "loss": 0.0, + "num_input_tokens_seen": 85007568, + "step": 126120 + }, + { + "epoch": 3.081254733344734, + "grad_norm": 0.0091170035302639, + "learning_rate": 7.708165334082746e-07, + "loss": 0.0, + "num_input_tokens_seen": 85011344, + "step": 126125 + }, + { + "epoch": 3.0813768841765814, + "grad_norm": 0.004456630442291498, + "learning_rate": 7.707335264553469e-07, + "loss": 0.0, + "num_input_tokens_seen": 85014544, + "step": 126130 + }, + { + "epoch": 3.0814990350084286, + "grad_norm": 0.004846032243221998, + "learning_rate": 7.706505211697065e-07, + "loss": 0.0, + "num_input_tokens_seen": 85018064, + "step": 126135 + }, + { + "epoch": 3.081621185840276, + "grad_norm": 0.005253411363810301, + "learning_rate": 7.70567517551957e-07, + "loss": 0.0, + "num_input_tokens_seen": 85021392, + "step": 126140 + }, + { + "epoch": 3.0817433366721225, + "grad_norm": 0.0008864422561600804, + "learning_rate": 7.704845156027027e-07, + "loss": 0.0002, + "num_input_tokens_seen": 85024528, + "step": 126145 + }, + { + "epoch": 3.0818654875039697, + "grad_norm": 101.36573028564453, + "learning_rate": 7.704015153225463e-07, + "loss": 0.0418, + "num_input_tokens_seen": 85027728, + "step": 126150 + }, + { + "epoch": 3.081987638335817, + "grad_norm": 0.0012882634764537215, + "learning_rate": 7.703185167120922e-07, + "loss": 0.0, + "num_input_tokens_seen": 85030864, + "step": 126155 + }, + { + "epoch": 3.082109789167664, + "grad_norm": 0.0049063474871218204, + "learning_rate": 7.702355197719432e-07, + "loss": 0.0, + "num_input_tokens_seen": 85034512, + "step": 126160 + }, + { + "epoch": 3.0822319399995113, + "grad_norm": 0.007059808354824781, + "learning_rate": 7.701525245027039e-07, + "loss": 0.067, + "num_input_tokens_seen": 85038160, + "step": 126165 + }, + { + "epoch": 3.0823540908313585, + "grad_norm": 0.007309371139854193, + "learning_rate": 7.700695309049766e-07, + "loss": 0.0, + "num_input_tokens_seen": 85041616, + "step": 126170 + }, + { + "epoch": 3.0824762416632057, + "grad_norm": 0.0031766737811267376, + "learning_rate": 7.699865389793659e-07, + "loss": 0.0, + "num_input_tokens_seen": 85044880, + "step": 126175 + }, + { + "epoch": 3.082598392495053, + "grad_norm": 0.0005614809924736619, + "learning_rate": 7.699035487264749e-07, + "loss": 0.0, + "num_input_tokens_seen": 85047760, + "step": 126180 + }, + { + "epoch": 3.0827205433269, + "grad_norm": 0.008097624406218529, + "learning_rate": 7.698205601469072e-07, + "loss": 0.0, + "num_input_tokens_seen": 85051216, + "step": 126185 + }, + { + "epoch": 3.0828426941587472, + "grad_norm": 0.002300823340192437, + "learning_rate": 7.697375732412665e-07, + "loss": 0.0, + "num_input_tokens_seen": 85054736, + "step": 126190 + }, + { + "epoch": 3.0829648449905944, + "grad_norm": 0.00025516541791148484, + "learning_rate": 7.696545880101556e-07, + "loss": 0.0, + "num_input_tokens_seen": 85058192, + "step": 126195 + }, + { + "epoch": 3.0830869958224416, + "grad_norm": 0.9779638648033142, + "learning_rate": 7.695716044541786e-07, + "loss": 0.0002, + "num_input_tokens_seen": 85061456, + "step": 126200 + }, + { + "epoch": 3.083209146654289, + "grad_norm": 0.002006381982937455, + "learning_rate": 7.694886225739392e-07, + "loss": 0.0, + "num_input_tokens_seen": 85064656, + "step": 126205 + }, + { + "epoch": 3.083331297486136, + "grad_norm": 0.024691827595233917, + "learning_rate": 7.694056423700401e-07, + "loss": 0.0, + "num_input_tokens_seen": 85067856, + "step": 126210 + }, + { + "epoch": 3.083453448317983, + "grad_norm": 0.008902209810912609, + "learning_rate": 7.693226638430857e-07, + "loss": 0.0433, + "num_input_tokens_seen": 85071120, + "step": 126215 + }, + { + "epoch": 3.0835755991498304, + "grad_norm": 0.0031682380940765142, + "learning_rate": 7.692396869936784e-07, + "loss": 0.0, + "num_input_tokens_seen": 85074384, + "step": 126220 + }, + { + "epoch": 3.0836977499816776, + "grad_norm": 0.0010243651922792196, + "learning_rate": 7.691567118224228e-07, + "loss": 0.0, + "num_input_tokens_seen": 85078224, + "step": 126225 + }, + { + "epoch": 3.0838199008135248, + "grad_norm": 0.03558716922998428, + "learning_rate": 7.690737383299212e-07, + "loss": 0.0, + "num_input_tokens_seen": 85081296, + "step": 126230 + }, + { + "epoch": 3.0839420516453715, + "grad_norm": 0.007264365907758474, + "learning_rate": 7.689907665167775e-07, + "loss": 0.0, + "num_input_tokens_seen": 85084816, + "step": 126235 + }, + { + "epoch": 3.0840642024772187, + "grad_norm": 0.002083815401419997, + "learning_rate": 7.689077963835955e-07, + "loss": 0.0, + "num_input_tokens_seen": 85088272, + "step": 126240 + }, + { + "epoch": 3.084186353309066, + "grad_norm": 0.00029359650216065347, + "learning_rate": 7.688248279309776e-07, + "loss": 0.0, + "num_input_tokens_seen": 85091792, + "step": 126245 + }, + { + "epoch": 3.084308504140913, + "grad_norm": 0.001116403378546238, + "learning_rate": 7.687418611595282e-07, + "loss": 0.0541, + "num_input_tokens_seen": 85094992, + "step": 126250 + }, + { + "epoch": 3.0844306549727603, + "grad_norm": 0.002682020887732506, + "learning_rate": 7.686588960698497e-07, + "loss": 0.0002, + "num_input_tokens_seen": 85098512, + "step": 126255 + }, + { + "epoch": 3.0845528058046074, + "grad_norm": 0.0009902494493871927, + "learning_rate": 7.685759326625461e-07, + "loss": 0.0, + "num_input_tokens_seen": 85101712, + "step": 126260 + }, + { + "epoch": 3.0846749566364546, + "grad_norm": 0.05289221554994583, + "learning_rate": 7.684929709382209e-07, + "loss": 0.0, + "num_input_tokens_seen": 85105040, + "step": 126265 + }, + { + "epoch": 3.084797107468302, + "grad_norm": 0.00030390024767257273, + "learning_rate": 7.684100108974766e-07, + "loss": 0.0001, + "num_input_tokens_seen": 85108304, + "step": 126270 + }, + { + "epoch": 3.084919258300149, + "grad_norm": 0.0018810959300026298, + "learning_rate": 7.683270525409175e-07, + "loss": 0.0, + "num_input_tokens_seen": 85111632, + "step": 126275 + }, + { + "epoch": 3.085041409131996, + "grad_norm": 0.025849755853414536, + "learning_rate": 7.682440958691461e-07, + "loss": 0.0003, + "num_input_tokens_seen": 85115408, + "step": 126280 + }, + { + "epoch": 3.0851635599638434, + "grad_norm": 0.0540730357170105, + "learning_rate": 7.681611408827661e-07, + "loss": 0.0001, + "num_input_tokens_seen": 85119056, + "step": 126285 + }, + { + "epoch": 3.0852857107956906, + "grad_norm": 0.038840051740407944, + "learning_rate": 7.680781875823805e-07, + "loss": 0.0, + "num_input_tokens_seen": 85122384, + "step": 126290 + }, + { + "epoch": 3.0854078616275378, + "grad_norm": 0.0032664036843925714, + "learning_rate": 7.679952359685925e-07, + "loss": 0.0, + "num_input_tokens_seen": 85125584, + "step": 126295 + }, + { + "epoch": 3.085530012459385, + "grad_norm": 0.00981723889708519, + "learning_rate": 7.679122860420059e-07, + "loss": 0.0002, + "num_input_tokens_seen": 85129808, + "step": 126300 + }, + { + "epoch": 3.085652163291232, + "grad_norm": 0.00037645341944880784, + "learning_rate": 7.678293378032234e-07, + "loss": 0.0, + "num_input_tokens_seen": 85132752, + "step": 126305 + }, + { + "epoch": 3.0857743141230793, + "grad_norm": 0.0018895752727985382, + "learning_rate": 7.677463912528487e-07, + "loss": 0.0, + "num_input_tokens_seen": 85135888, + "step": 126310 + }, + { + "epoch": 3.0858964649549265, + "grad_norm": 0.059392791241407394, + "learning_rate": 7.676634463914843e-07, + "loss": 0.0, + "num_input_tokens_seen": 85139728, + "step": 126315 + }, + { + "epoch": 3.0860186157867737, + "grad_norm": 0.0005743264337070286, + "learning_rate": 7.675805032197342e-07, + "loss": 0.0, + "num_input_tokens_seen": 85143248, + "step": 126320 + }, + { + "epoch": 3.0861407666186205, + "grad_norm": 0.6246962547302246, + "learning_rate": 7.674975617382007e-07, + "loss": 0.0003, + "num_input_tokens_seen": 85146576, + "step": 126325 + }, + { + "epoch": 3.0862629174504677, + "grad_norm": 0.016369406133890152, + "learning_rate": 7.674146219474877e-07, + "loss": 0.0, + "num_input_tokens_seen": 85149904, + "step": 126330 + }, + { + "epoch": 3.086385068282315, + "grad_norm": 0.004355208016932011, + "learning_rate": 7.673316838481982e-07, + "loss": 0.0, + "num_input_tokens_seen": 85153232, + "step": 126335 + }, + { + "epoch": 3.086507219114162, + "grad_norm": 0.034227363765239716, + "learning_rate": 7.672487474409353e-07, + "loss": 0.0, + "num_input_tokens_seen": 85156304, + "step": 126340 + }, + { + "epoch": 3.0866293699460092, + "grad_norm": 0.00046993931755423546, + "learning_rate": 7.671658127263023e-07, + "loss": 0.0, + "num_input_tokens_seen": 85159824, + "step": 126345 + }, + { + "epoch": 3.0867515207778564, + "grad_norm": 0.009331612847745419, + "learning_rate": 7.670828797049017e-07, + "loss": 0.0535, + "num_input_tokens_seen": 85163088, + "step": 126350 + }, + { + "epoch": 3.0868736716097036, + "grad_norm": 0.0005309986299835145, + "learning_rate": 7.66999948377337e-07, + "loss": 0.0002, + "num_input_tokens_seen": 85166928, + "step": 126355 + }, + { + "epoch": 3.086995822441551, + "grad_norm": 0.0022755253594368696, + "learning_rate": 7.669170187442119e-07, + "loss": 0.0, + "num_input_tokens_seen": 85170064, + "step": 126360 + }, + { + "epoch": 3.087117973273398, + "grad_norm": 0.006053520832210779, + "learning_rate": 7.668340908061284e-07, + "loss": 0.0, + "num_input_tokens_seen": 85173712, + "step": 126365 + }, + { + "epoch": 3.087240124105245, + "grad_norm": 0.0020175171084702015, + "learning_rate": 7.667511645636905e-07, + "loss": 0.0, + "num_input_tokens_seen": 85176848, + "step": 126370 + }, + { + "epoch": 3.0873622749370924, + "grad_norm": 0.0023011830635368824, + "learning_rate": 7.666682400175005e-07, + "loss": 0.0, + "num_input_tokens_seen": 85179920, + "step": 126375 + }, + { + "epoch": 3.0874844257689396, + "grad_norm": 0.002503541298210621, + "learning_rate": 7.665853171681622e-07, + "loss": 0.0001, + "num_input_tokens_seen": 85182928, + "step": 126380 + }, + { + "epoch": 3.0876065766007867, + "grad_norm": 0.000122290279250592, + "learning_rate": 7.665023960162781e-07, + "loss": 0.0, + "num_input_tokens_seen": 85186064, + "step": 126385 + }, + { + "epoch": 3.087728727432634, + "grad_norm": 0.0057792747393250465, + "learning_rate": 7.664194765624512e-07, + "loss": 0.0, + "num_input_tokens_seen": 85189072, + "step": 126390 + }, + { + "epoch": 3.087850878264481, + "grad_norm": 32.82571792602539, + "learning_rate": 7.66336558807285e-07, + "loss": 0.0546, + "num_input_tokens_seen": 85192528, + "step": 126395 + }, + { + "epoch": 3.0879730290963283, + "grad_norm": 0.002696740673854947, + "learning_rate": 7.662536427513818e-07, + "loss": 0.0, + "num_input_tokens_seen": 85196240, + "step": 126400 + }, + { + "epoch": 3.0880951799281755, + "grad_norm": 0.005557489115744829, + "learning_rate": 7.661707283953455e-07, + "loss": 0.0, + "num_input_tokens_seen": 85199632, + "step": 126405 + }, + { + "epoch": 3.0882173307600223, + "grad_norm": 0.0012418734841048717, + "learning_rate": 7.660878157397779e-07, + "loss": 0.0, + "num_input_tokens_seen": 85203280, + "step": 126410 + }, + { + "epoch": 3.0883394815918694, + "grad_norm": 0.010740534402430058, + "learning_rate": 7.660049047852834e-07, + "loss": 0.0, + "num_input_tokens_seen": 85206800, + "step": 126415 + }, + { + "epoch": 3.0884616324237166, + "grad_norm": 0.0020704532507807016, + "learning_rate": 7.659219955324635e-07, + "loss": 0.0, + "num_input_tokens_seen": 85209808, + "step": 126420 + }, + { + "epoch": 3.088583783255564, + "grad_norm": 0.0006153500871732831, + "learning_rate": 7.658390879819218e-07, + "loss": 0.0868, + "num_input_tokens_seen": 85213328, + "step": 126425 + }, + { + "epoch": 3.088705934087411, + "grad_norm": 0.00045507983304560184, + "learning_rate": 7.657561821342617e-07, + "loss": 0.0, + "num_input_tokens_seen": 85216400, + "step": 126430 + }, + { + "epoch": 3.088828084919258, + "grad_norm": 0.02990969642996788, + "learning_rate": 7.656732779900856e-07, + "loss": 0.0, + "num_input_tokens_seen": 85219600, + "step": 126435 + }, + { + "epoch": 3.0889502357511054, + "grad_norm": 0.0035446241963654757, + "learning_rate": 7.655903755499961e-07, + "loss": 0.0, + "num_input_tokens_seen": 85222736, + "step": 126440 + }, + { + "epoch": 3.0890723865829526, + "grad_norm": 0.000591681688092649, + "learning_rate": 7.655074748145968e-07, + "loss": 0.0, + "num_input_tokens_seen": 85225872, + "step": 126445 + }, + { + "epoch": 3.0891945374147998, + "grad_norm": 0.050365399569272995, + "learning_rate": 7.654245757844897e-07, + "loss": 0.0, + "num_input_tokens_seen": 85229584, + "step": 126450 + }, + { + "epoch": 3.089316688246647, + "grad_norm": 0.00013743084855377674, + "learning_rate": 7.653416784602789e-07, + "loss": 0.0, + "num_input_tokens_seen": 85233232, + "step": 126455 + }, + { + "epoch": 3.089438839078494, + "grad_norm": 0.001835143892094493, + "learning_rate": 7.652587828425659e-07, + "loss": 0.0526, + "num_input_tokens_seen": 85236688, + "step": 126460 + }, + { + "epoch": 3.0895609899103413, + "grad_norm": 0.012828472070395947, + "learning_rate": 7.651758889319547e-07, + "loss": 0.0, + "num_input_tokens_seen": 85239888, + "step": 126465 + }, + { + "epoch": 3.0896831407421885, + "grad_norm": 0.007912257686257362, + "learning_rate": 7.650929967290471e-07, + "loss": 0.0001, + "num_input_tokens_seen": 85243024, + "step": 126470 + }, + { + "epoch": 3.0898052915740357, + "grad_norm": 0.0017077409429475665, + "learning_rate": 7.650101062344468e-07, + "loss": 0.0, + "num_input_tokens_seen": 85246288, + "step": 126475 + }, + { + "epoch": 3.089927442405883, + "grad_norm": 0.0008549513877369463, + "learning_rate": 7.649272174487558e-07, + "loss": 0.0, + "num_input_tokens_seen": 85249104, + "step": 126480 + }, + { + "epoch": 3.09004959323773, + "grad_norm": 0.00041189262992702425, + "learning_rate": 7.648443303725775e-07, + "loss": 0.0, + "num_input_tokens_seen": 85252880, + "step": 126485 + }, + { + "epoch": 3.0901717440695773, + "grad_norm": 0.04326731711626053, + "learning_rate": 7.647614450065145e-07, + "loss": 0.0, + "num_input_tokens_seen": 85256400, + "step": 126490 + }, + { + "epoch": 3.0902938949014245, + "grad_norm": 0.03192279860377312, + "learning_rate": 7.646785613511696e-07, + "loss": 0.0004, + "num_input_tokens_seen": 85259600, + "step": 126495 + }, + { + "epoch": 3.0904160457332717, + "grad_norm": 0.0032559670507907867, + "learning_rate": 7.645956794071457e-07, + "loss": 0.0, + "num_input_tokens_seen": 85262928, + "step": 126500 + }, + { + "epoch": 3.0905381965651184, + "grad_norm": 0.002402453450486064, + "learning_rate": 7.645127991750449e-07, + "loss": 0.0, + "num_input_tokens_seen": 85266384, + "step": 126505 + }, + { + "epoch": 3.0906603473969656, + "grad_norm": 0.20489975810050964, + "learning_rate": 7.644299206554702e-07, + "loss": 0.0001, + "num_input_tokens_seen": 85269456, + "step": 126510 + }, + { + "epoch": 3.090782498228813, + "grad_norm": 0.0044357432052493095, + "learning_rate": 7.643470438490252e-07, + "loss": 0.0, + "num_input_tokens_seen": 85272528, + "step": 126515 + }, + { + "epoch": 3.09090464906066, + "grad_norm": 0.38196611404418945, + "learning_rate": 7.642641687563112e-07, + "loss": 0.0002, + "num_input_tokens_seen": 85275920, + "step": 126520 + }, + { + "epoch": 3.091026799892507, + "grad_norm": 0.002942395396530628, + "learning_rate": 7.641812953779322e-07, + "loss": 0.0, + "num_input_tokens_seen": 85279632, + "step": 126525 + }, + { + "epoch": 3.0911489507243544, + "grad_norm": 0.001338522182777524, + "learning_rate": 7.640984237144898e-07, + "loss": 0.0, + "num_input_tokens_seen": 85282768, + "step": 126530 + }, + { + "epoch": 3.0912711015562016, + "grad_norm": 0.004309714771807194, + "learning_rate": 7.640155537665875e-07, + "loss": 0.0, + "num_input_tokens_seen": 85285840, + "step": 126535 + }, + { + "epoch": 3.0913932523880487, + "grad_norm": 0.0006489718798547983, + "learning_rate": 7.639326855348273e-07, + "loss": 0.0, + "num_input_tokens_seen": 85289360, + "step": 126540 + }, + { + "epoch": 3.091515403219896, + "grad_norm": 0.001993774203583598, + "learning_rate": 7.638498190198119e-07, + "loss": 0.0, + "num_input_tokens_seen": 85292368, + "step": 126545 + }, + { + "epoch": 3.091637554051743, + "grad_norm": 8.232580148614943e-05, + "learning_rate": 7.637669542221445e-07, + "loss": 0.0, + "num_input_tokens_seen": 85296144, + "step": 126550 + }, + { + "epoch": 3.0917597048835903, + "grad_norm": 0.014385731890797615, + "learning_rate": 7.63684091142427e-07, + "loss": 0.0242, + "num_input_tokens_seen": 85299408, + "step": 126555 + }, + { + "epoch": 3.0918818557154375, + "grad_norm": 0.0003742116969078779, + "learning_rate": 7.636012297812627e-07, + "loss": 0.0812, + "num_input_tokens_seen": 85302992, + "step": 126560 + }, + { + "epoch": 3.0920040065472847, + "grad_norm": 0.004770467523485422, + "learning_rate": 7.635183701392536e-07, + "loss": 0.0, + "num_input_tokens_seen": 85306320, + "step": 126565 + }, + { + "epoch": 3.092126157379132, + "grad_norm": 0.0011331579880788922, + "learning_rate": 7.634355122170028e-07, + "loss": 0.0001, + "num_input_tokens_seen": 85309264, + "step": 126570 + }, + { + "epoch": 3.092248308210979, + "grad_norm": 0.0013692784123122692, + "learning_rate": 7.633526560151121e-07, + "loss": 0.0, + "num_input_tokens_seen": 85312656, + "step": 126575 + }, + { + "epoch": 3.0923704590428263, + "grad_norm": 0.0005857199430465698, + "learning_rate": 7.632698015341846e-07, + "loss": 0.0279, + "num_input_tokens_seen": 85315792, + "step": 126580 + }, + { + "epoch": 3.0924926098746734, + "grad_norm": 0.0007058735354803503, + "learning_rate": 7.631869487748232e-07, + "loss": 0.0, + "num_input_tokens_seen": 85318928, + "step": 126585 + }, + { + "epoch": 3.09261476070652, + "grad_norm": 0.0006951163522899151, + "learning_rate": 7.631040977376299e-07, + "loss": 0.0, + "num_input_tokens_seen": 85321936, + "step": 126590 + }, + { + "epoch": 3.0927369115383674, + "grad_norm": 0.0006128123495727777, + "learning_rate": 7.630212484232072e-07, + "loss": 0.0, + "num_input_tokens_seen": 85325520, + "step": 126595 + }, + { + "epoch": 3.0928590623702146, + "grad_norm": 55.12501907348633, + "learning_rate": 7.629384008321578e-07, + "loss": 0.0346, + "num_input_tokens_seen": 85328464, + "step": 126600 + }, + { + "epoch": 3.0929812132020618, + "grad_norm": 0.05353361740708351, + "learning_rate": 7.628555549650838e-07, + "loss": 0.0, + "num_input_tokens_seen": 85331536, + "step": 126605 + }, + { + "epoch": 3.093103364033909, + "grad_norm": 0.0017707470105960965, + "learning_rate": 7.627727108225886e-07, + "loss": 0.0, + "num_input_tokens_seen": 85334800, + "step": 126610 + }, + { + "epoch": 3.093225514865756, + "grad_norm": 0.003513862146064639, + "learning_rate": 7.626898684052734e-07, + "loss": 0.0, + "num_input_tokens_seen": 85338064, + "step": 126615 + }, + { + "epoch": 3.0933476656976033, + "grad_norm": 0.010240820236504078, + "learning_rate": 7.626070277137417e-07, + "loss": 0.0, + "num_input_tokens_seen": 85341520, + "step": 126620 + }, + { + "epoch": 3.0934698165294505, + "grad_norm": 0.0003203249361831695, + "learning_rate": 7.625241887485954e-07, + "loss": 0.0453, + "num_input_tokens_seen": 85344400, + "step": 126625 + }, + { + "epoch": 3.0935919673612977, + "grad_norm": 0.002799415495246649, + "learning_rate": 7.624413515104373e-07, + "loss": 0.0, + "num_input_tokens_seen": 85347664, + "step": 126630 + }, + { + "epoch": 3.093714118193145, + "grad_norm": 9.273582691093907e-05, + "learning_rate": 7.623585159998692e-07, + "loss": 0.0, + "num_input_tokens_seen": 85350992, + "step": 126635 + }, + { + "epoch": 3.093836269024992, + "grad_norm": 0.0013542450033128262, + "learning_rate": 7.622756822174941e-07, + "loss": 0.0, + "num_input_tokens_seen": 85354384, + "step": 126640 + }, + { + "epoch": 3.0939584198568393, + "grad_norm": 0.0002636424032971263, + "learning_rate": 7.621928501639142e-07, + "loss": 0.0, + "num_input_tokens_seen": 85357328, + "step": 126645 + }, + { + "epoch": 3.0940805706886865, + "grad_norm": 0.00017485875287093222, + "learning_rate": 7.621100198397318e-07, + "loss": 0.0, + "num_input_tokens_seen": 85360912, + "step": 126650 + }, + { + "epoch": 3.0942027215205337, + "grad_norm": 0.012742831371724606, + "learning_rate": 7.620271912455496e-07, + "loss": 0.0, + "num_input_tokens_seen": 85364112, + "step": 126655 + }, + { + "epoch": 3.094324872352381, + "grad_norm": 0.06087416782975197, + "learning_rate": 7.619443643819691e-07, + "loss": 0.0, + "num_input_tokens_seen": 85367440, + "step": 126660 + }, + { + "epoch": 3.094447023184228, + "grad_norm": 0.003833112772554159, + "learning_rate": 7.618615392495935e-07, + "loss": 0.0001, + "num_input_tokens_seen": 85371024, + "step": 126665 + }, + { + "epoch": 3.0945691740160752, + "grad_norm": 0.001838882453739643, + "learning_rate": 7.61778715849025e-07, + "loss": 0.0, + "num_input_tokens_seen": 85374992, + "step": 126670 + }, + { + "epoch": 3.0946913248479224, + "grad_norm": 0.0028312301728874445, + "learning_rate": 7.616958941808654e-07, + "loss": 0.0, + "num_input_tokens_seen": 85378000, + "step": 126675 + }, + { + "epoch": 3.094813475679769, + "grad_norm": 0.0475040078163147, + "learning_rate": 7.616130742457178e-07, + "loss": 0.0, + "num_input_tokens_seen": 85380944, + "step": 126680 + }, + { + "epoch": 3.0949356265116164, + "grad_norm": 5.351084837457165e-05, + "learning_rate": 7.615302560441837e-07, + "loss": 0.0, + "num_input_tokens_seen": 85384272, + "step": 126685 + }, + { + "epoch": 3.0950577773434635, + "grad_norm": 0.005748182535171509, + "learning_rate": 7.614474395768661e-07, + "loss": 0.0, + "num_input_tokens_seen": 85387664, + "step": 126690 + }, + { + "epoch": 3.0951799281753107, + "grad_norm": 0.011121712625026703, + "learning_rate": 7.613646248443666e-07, + "loss": 0.0317, + "num_input_tokens_seen": 85390992, + "step": 126695 + }, + { + "epoch": 3.095302079007158, + "grad_norm": 0.022904004901647568, + "learning_rate": 7.612818118472875e-07, + "loss": 0.0, + "num_input_tokens_seen": 85394000, + "step": 126700 + }, + { + "epoch": 3.095424229839005, + "grad_norm": 0.005358532536774874, + "learning_rate": 7.611990005862318e-07, + "loss": 0.0, + "num_input_tokens_seen": 85397584, + "step": 126705 + }, + { + "epoch": 3.0955463806708523, + "grad_norm": 0.0005007089930586517, + "learning_rate": 7.611161910618007e-07, + "loss": 0.0716, + "num_input_tokens_seen": 85401616, + "step": 126710 + }, + { + "epoch": 3.0956685315026995, + "grad_norm": 0.002198169706389308, + "learning_rate": 7.610333832745974e-07, + "loss": 0.0, + "num_input_tokens_seen": 85405392, + "step": 126715 + }, + { + "epoch": 3.0957906823345467, + "grad_norm": 0.001933772349730134, + "learning_rate": 7.609505772252232e-07, + "loss": 0.0, + "num_input_tokens_seen": 85408656, + "step": 126720 + }, + { + "epoch": 3.095912833166394, + "grad_norm": 0.0009322002879343927, + "learning_rate": 7.60867772914281e-07, + "loss": 0.0, + "num_input_tokens_seen": 85411920, + "step": 126725 + }, + { + "epoch": 3.096034983998241, + "grad_norm": 0.0036974812392145395, + "learning_rate": 7.607849703423723e-07, + "loss": 0.0, + "num_input_tokens_seen": 85415376, + "step": 126730 + }, + { + "epoch": 3.0961571348300883, + "grad_norm": 0.007073775865137577, + "learning_rate": 7.607021695100997e-07, + "loss": 0.0, + "num_input_tokens_seen": 85418448, + "step": 126735 + }, + { + "epoch": 3.0962792856619354, + "grad_norm": 207.88877868652344, + "learning_rate": 7.606193704180655e-07, + "loss": 0.0533, + "num_input_tokens_seen": 85421648, + "step": 126740 + }, + { + "epoch": 3.0964014364937826, + "grad_norm": 8.36336039355956e-05, + "learning_rate": 7.605365730668717e-07, + "loss": 0.0, + "num_input_tokens_seen": 85425168, + "step": 126745 + }, + { + "epoch": 3.09652358732563, + "grad_norm": 0.31576985120773315, + "learning_rate": 7.604537774571203e-07, + "loss": 0.0002, + "num_input_tokens_seen": 85428496, + "step": 126750 + }, + { + "epoch": 3.096645738157477, + "grad_norm": 0.03226271644234657, + "learning_rate": 7.603709835894133e-07, + "loss": 0.0, + "num_input_tokens_seen": 85432272, + "step": 126755 + }, + { + "epoch": 3.096767888989324, + "grad_norm": 0.0003956420405302197, + "learning_rate": 7.602881914643529e-07, + "loss": 0.0, + "num_input_tokens_seen": 85435664, + "step": 126760 + }, + { + "epoch": 3.0968900398211714, + "grad_norm": 0.0004236078239046037, + "learning_rate": 7.602054010825415e-07, + "loss": 0.0, + "num_input_tokens_seen": 85439248, + "step": 126765 + }, + { + "epoch": 3.097012190653018, + "grad_norm": 0.00017692515393719077, + "learning_rate": 7.601226124445806e-07, + "loss": 0.0204, + "num_input_tokens_seen": 85442640, + "step": 126770 + }, + { + "epoch": 3.0971343414848653, + "grad_norm": 0.05441778898239136, + "learning_rate": 7.60039825551073e-07, + "loss": 0.0, + "num_input_tokens_seen": 85446032, + "step": 126775 + }, + { + "epoch": 3.0972564923167125, + "grad_norm": 0.0025518974289298058, + "learning_rate": 7.599570404026199e-07, + "loss": 0.0, + "num_input_tokens_seen": 85449168, + "step": 126780 + }, + { + "epoch": 3.0973786431485597, + "grad_norm": 0.000867149792611599, + "learning_rate": 7.598742569998243e-07, + "loss": 0.0, + "num_input_tokens_seen": 85452496, + "step": 126785 + }, + { + "epoch": 3.097500793980407, + "grad_norm": 0.007959526032209396, + "learning_rate": 7.597914753432871e-07, + "loss": 0.0359, + "num_input_tokens_seen": 85455824, + "step": 126790 + }, + { + "epoch": 3.097622944812254, + "grad_norm": 0.00035928128636442125, + "learning_rate": 7.597086954336112e-07, + "loss": 0.0, + "num_input_tokens_seen": 85459920, + "step": 126795 + }, + { + "epoch": 3.0977450956441013, + "grad_norm": 0.0035206761676818132, + "learning_rate": 7.596259172713982e-07, + "loss": 0.0383, + "num_input_tokens_seen": 85463312, + "step": 126800 + }, + { + "epoch": 3.0978672464759485, + "grad_norm": 0.00011418592475820333, + "learning_rate": 7.595431408572504e-07, + "loss": 0.0, + "num_input_tokens_seen": 85466256, + "step": 126805 + }, + { + "epoch": 3.0979893973077957, + "grad_norm": 0.0019963637460023165, + "learning_rate": 7.594603661917695e-07, + "loss": 0.0, + "num_input_tokens_seen": 85470096, + "step": 126810 + }, + { + "epoch": 3.098111548139643, + "grad_norm": 0.0009545327047817409, + "learning_rate": 7.593775932755573e-07, + "loss": 0.0726, + "num_input_tokens_seen": 85473232, + "step": 126815 + }, + { + "epoch": 3.09823369897149, + "grad_norm": 0.002073055598884821, + "learning_rate": 7.592948221092158e-07, + "loss": 0.0366, + "num_input_tokens_seen": 85476560, + "step": 126820 + }, + { + "epoch": 3.0983558498033372, + "grad_norm": 0.0013438011519610882, + "learning_rate": 7.592120526933477e-07, + "loss": 0.0, + "num_input_tokens_seen": 85479632, + "step": 126825 + }, + { + "epoch": 3.0984780006351844, + "grad_norm": 0.005760578904300928, + "learning_rate": 7.591292850285538e-07, + "loss": 0.0, + "num_input_tokens_seen": 85482704, + "step": 126830 + }, + { + "epoch": 3.0986001514670316, + "grad_norm": 0.00027516690897755325, + "learning_rate": 7.59046519115437e-07, + "loss": 0.0, + "num_input_tokens_seen": 85486288, + "step": 126835 + }, + { + "epoch": 3.098722302298879, + "grad_norm": 0.002451132982969284, + "learning_rate": 7.589637549545981e-07, + "loss": 0.0001, + "num_input_tokens_seen": 85489680, + "step": 126840 + }, + { + "epoch": 3.098844453130726, + "grad_norm": 0.0006566059309989214, + "learning_rate": 7.588809925466402e-07, + "loss": 0.0, + "num_input_tokens_seen": 85493008, + "step": 126845 + }, + { + "epoch": 3.098966603962573, + "grad_norm": 0.0007958338828757405, + "learning_rate": 7.587982318921643e-07, + "loss": 0.0, + "num_input_tokens_seen": 85496208, + "step": 126850 + }, + { + "epoch": 3.0990887547944204, + "grad_norm": 0.43890684843063354, + "learning_rate": 7.587154729917724e-07, + "loss": 0.0001, + "num_input_tokens_seen": 85499280, + "step": 126855 + }, + { + "epoch": 3.099210905626267, + "grad_norm": 0.047244809567928314, + "learning_rate": 7.586327158460668e-07, + "loss": 0.0966, + "num_input_tokens_seen": 85502608, + "step": 126860 + }, + { + "epoch": 3.0993330564581143, + "grad_norm": 0.0005038433591835201, + "learning_rate": 7.585499604556487e-07, + "loss": 0.0, + "num_input_tokens_seen": 85505424, + "step": 126865 + }, + { + "epoch": 3.0994552072899615, + "grad_norm": 0.011972165666520596, + "learning_rate": 7.584672068211205e-07, + "loss": 0.0, + "num_input_tokens_seen": 85508816, + "step": 126870 + }, + { + "epoch": 3.0995773581218087, + "grad_norm": 0.00880894623696804, + "learning_rate": 7.583844549430835e-07, + "loss": 0.0, + "num_input_tokens_seen": 85512272, + "step": 126875 + }, + { + "epoch": 3.099699508953656, + "grad_norm": 0.0007148732547648251, + "learning_rate": 7.583017048221401e-07, + "loss": 0.0, + "num_input_tokens_seen": 85515472, + "step": 126880 + }, + { + "epoch": 3.099821659785503, + "grad_norm": 0.0030713542364537716, + "learning_rate": 7.582189564588912e-07, + "loss": 0.0, + "num_input_tokens_seen": 85518544, + "step": 126885 + }, + { + "epoch": 3.0999438106173502, + "grad_norm": 0.001756560057401657, + "learning_rate": 7.581362098539391e-07, + "loss": 0.0, + "num_input_tokens_seen": 85521616, + "step": 126890 + }, + { + "epoch": 3.1000659614491974, + "grad_norm": 0.005060962401330471, + "learning_rate": 7.58053465007886e-07, + "loss": 0.0, + "num_input_tokens_seen": 85524368, + "step": 126895 + }, + { + "epoch": 3.1001881122810446, + "grad_norm": 0.0010907781543210149, + "learning_rate": 7.57970721921333e-07, + "loss": 0.0, + "num_input_tokens_seen": 85527376, + "step": 126900 + }, + { + "epoch": 3.100310263112892, + "grad_norm": 0.3052005171775818, + "learning_rate": 7.578879805948819e-07, + "loss": 0.0001, + "num_input_tokens_seen": 85531472, + "step": 126905 + }, + { + "epoch": 3.100432413944739, + "grad_norm": 0.00023940950632095337, + "learning_rate": 7.578052410291346e-07, + "loss": 0.0, + "num_input_tokens_seen": 85534672, + "step": 126910 + }, + { + "epoch": 3.100554564776586, + "grad_norm": 0.002871927572414279, + "learning_rate": 7.577225032246925e-07, + "loss": 0.0, + "num_input_tokens_seen": 85537872, + "step": 126915 + }, + { + "epoch": 3.1006767156084334, + "grad_norm": 0.0004271389334462583, + "learning_rate": 7.576397671821579e-07, + "loss": 0.0, + "num_input_tokens_seen": 85541584, + "step": 126920 + }, + { + "epoch": 3.1007988664402806, + "grad_norm": 0.0003819416160695255, + "learning_rate": 7.575570329021316e-07, + "loss": 0.0, + "num_input_tokens_seen": 85544720, + "step": 126925 + }, + { + "epoch": 3.1009210172721278, + "grad_norm": 0.0009660868090577424, + "learning_rate": 7.574743003852164e-07, + "loss": 0.0, + "num_input_tokens_seen": 85547984, + "step": 126930 + }, + { + "epoch": 3.101043168103975, + "grad_norm": 0.13878706097602844, + "learning_rate": 7.573915696320128e-07, + "loss": 0.0, + "num_input_tokens_seen": 85550928, + "step": 126935 + }, + { + "epoch": 3.101165318935822, + "grad_norm": 0.6104390025138855, + "learning_rate": 7.573088406431236e-07, + "loss": 0.0005, + "num_input_tokens_seen": 85554512, + "step": 126940 + }, + { + "epoch": 3.1012874697676693, + "grad_norm": 0.001662836642935872, + "learning_rate": 7.57226113419149e-07, + "loss": 0.0001, + "num_input_tokens_seen": 85557648, + "step": 126945 + }, + { + "epoch": 3.101409620599516, + "grad_norm": 0.003861561883240938, + "learning_rate": 7.571433879606918e-07, + "loss": 0.0, + "num_input_tokens_seen": 85561936, + "step": 126950 + }, + { + "epoch": 3.1015317714313633, + "grad_norm": 0.00627047149464488, + "learning_rate": 7.570606642683531e-07, + "loss": 0.0, + "num_input_tokens_seen": 85565264, + "step": 126955 + }, + { + "epoch": 3.1016539222632105, + "grad_norm": 28.827089309692383, + "learning_rate": 7.569779423427347e-07, + "loss": 0.075, + "num_input_tokens_seen": 85568400, + "step": 126960 + }, + { + "epoch": 3.1017760730950577, + "grad_norm": 0.002582937479019165, + "learning_rate": 7.568952221844383e-07, + "loss": 0.0001, + "num_input_tokens_seen": 85571984, + "step": 126965 + }, + { + "epoch": 3.101898223926905, + "grad_norm": 0.005004982929676771, + "learning_rate": 7.568125037940647e-07, + "loss": 0.0, + "num_input_tokens_seen": 85575312, + "step": 126970 + }, + { + "epoch": 3.102020374758752, + "grad_norm": 0.0010750489309430122, + "learning_rate": 7.56729787172216e-07, + "loss": 0.0, + "num_input_tokens_seen": 85579152, + "step": 126975 + }, + { + "epoch": 3.102142525590599, + "grad_norm": 0.00830749236047268, + "learning_rate": 7.566470723194942e-07, + "loss": 0.0001, + "num_input_tokens_seen": 85582672, + "step": 126980 + }, + { + "epoch": 3.1022646764224464, + "grad_norm": 0.0012467604828998446, + "learning_rate": 7.565643592364999e-07, + "loss": 0.0003, + "num_input_tokens_seen": 85585872, + "step": 126985 + }, + { + "epoch": 3.1023868272542936, + "grad_norm": 0.0003762088599614799, + "learning_rate": 7.564816479238355e-07, + "loss": 0.0425, + "num_input_tokens_seen": 85589456, + "step": 126990 + }, + { + "epoch": 3.102508978086141, + "grad_norm": 0.0005477772792801261, + "learning_rate": 7.563989383821017e-07, + "loss": 0.0, + "num_input_tokens_seen": 85592784, + "step": 126995 + }, + { + "epoch": 3.102631128917988, + "grad_norm": 0.008774727582931519, + "learning_rate": 7.563162306119006e-07, + "loss": 0.0, + "num_input_tokens_seen": 85595920, + "step": 127000 + }, + { + "epoch": 3.102753279749835, + "grad_norm": 817.6456909179688, + "learning_rate": 7.562335246138333e-07, + "loss": 0.029, + "num_input_tokens_seen": 85599184, + "step": 127005 + }, + { + "epoch": 3.1028754305816824, + "grad_norm": 0.0020112013444304466, + "learning_rate": 7.561508203885012e-07, + "loss": 0.0005, + "num_input_tokens_seen": 85602384, + "step": 127010 + }, + { + "epoch": 3.1029975814135295, + "grad_norm": 0.00014069992175791413, + "learning_rate": 7.560681179365062e-07, + "loss": 0.0, + "num_input_tokens_seen": 85605648, + "step": 127015 + }, + { + "epoch": 3.1031197322453767, + "grad_norm": 0.003962227143347263, + "learning_rate": 7.559854172584491e-07, + "loss": 0.0, + "num_input_tokens_seen": 85609424, + "step": 127020 + }, + { + "epoch": 3.103241883077224, + "grad_norm": 4.573463593260385e-05, + "learning_rate": 7.559027183549322e-07, + "loss": 0.0, + "num_input_tokens_seen": 85612752, + "step": 127025 + }, + { + "epoch": 3.103364033909071, + "grad_norm": 0.0379859022796154, + "learning_rate": 7.558200212265558e-07, + "loss": 0.0001, + "num_input_tokens_seen": 85616336, + "step": 127030 + }, + { + "epoch": 3.103486184740918, + "grad_norm": 0.0006606020033359528, + "learning_rate": 7.557373258739224e-07, + "loss": 0.1186, + "num_input_tokens_seen": 85619792, + "step": 127035 + }, + { + "epoch": 3.103608335572765, + "grad_norm": 0.01268491055816412, + "learning_rate": 7.556546322976324e-07, + "loss": 0.0, + "num_input_tokens_seen": 85623248, + "step": 127040 + }, + { + "epoch": 3.1037304864046122, + "grad_norm": 0.0007136997883208096, + "learning_rate": 7.555719404982878e-07, + "loss": 0.0, + "num_input_tokens_seen": 85626896, + "step": 127045 + }, + { + "epoch": 3.1038526372364594, + "grad_norm": 0.009720143862068653, + "learning_rate": 7.5548925047649e-07, + "loss": 0.0001, + "num_input_tokens_seen": 85630160, + "step": 127050 + }, + { + "epoch": 3.1039747880683066, + "grad_norm": 0.006915161851793528, + "learning_rate": 7.5540656223284e-07, + "loss": 0.0, + "num_input_tokens_seen": 85633040, + "step": 127055 + }, + { + "epoch": 3.104096938900154, + "grad_norm": 0.00606177793815732, + "learning_rate": 7.553238757679393e-07, + "loss": 0.0001, + "num_input_tokens_seen": 85636176, + "step": 127060 + }, + { + "epoch": 3.104219089732001, + "grad_norm": 0.0016445768997073174, + "learning_rate": 7.552411910823891e-07, + "loss": 0.076, + "num_input_tokens_seen": 85639568, + "step": 127065 + }, + { + "epoch": 3.104341240563848, + "grad_norm": 0.002737719565629959, + "learning_rate": 7.551585081767906e-07, + "loss": 0.0, + "num_input_tokens_seen": 85642896, + "step": 127070 + }, + { + "epoch": 3.1044633913956954, + "grad_norm": 0.0015813398640602827, + "learning_rate": 7.550758270517458e-07, + "loss": 0.0, + "num_input_tokens_seen": 85645968, + "step": 127075 + }, + { + "epoch": 3.1045855422275426, + "grad_norm": 0.01730138435959816, + "learning_rate": 7.54993147707855e-07, + "loss": 0.0, + "num_input_tokens_seen": 85649424, + "step": 127080 + }, + { + "epoch": 3.1047076930593898, + "grad_norm": 0.0005342924268916249, + "learning_rate": 7.549104701457203e-07, + "loss": 0.0, + "num_input_tokens_seen": 85652688, + "step": 127085 + }, + { + "epoch": 3.104829843891237, + "grad_norm": 1.5160987377166748, + "learning_rate": 7.548277943659422e-07, + "loss": 0.0002, + "num_input_tokens_seen": 85655952, + "step": 127090 + }, + { + "epoch": 3.104951994723084, + "grad_norm": 0.013315865769982338, + "learning_rate": 7.547451203691227e-07, + "loss": 0.0, + "num_input_tokens_seen": 85659600, + "step": 127095 + }, + { + "epoch": 3.1050741455549313, + "grad_norm": 0.00107246870175004, + "learning_rate": 7.546624481558623e-07, + "loss": 0.0001, + "num_input_tokens_seen": 85662864, + "step": 127100 + }, + { + "epoch": 3.1051962963867785, + "grad_norm": 0.006762396544218063, + "learning_rate": 7.545797777267627e-07, + "loss": 0.075, + "num_input_tokens_seen": 85666448, + "step": 127105 + }, + { + "epoch": 3.1053184472186257, + "grad_norm": 0.01796438731253147, + "learning_rate": 7.54497109082425e-07, + "loss": 0.0, + "num_input_tokens_seen": 85669840, + "step": 127110 + }, + { + "epoch": 3.105440598050473, + "grad_norm": 0.0015615341253578663, + "learning_rate": 7.544144422234504e-07, + "loss": 0.0, + "num_input_tokens_seen": 85673296, + "step": 127115 + }, + { + "epoch": 3.10556274888232, + "grad_norm": 0.00022422667825594544, + "learning_rate": 7.543317771504402e-07, + "loss": 0.0, + "num_input_tokens_seen": 85676624, + "step": 127120 + }, + { + "epoch": 3.1056848997141673, + "grad_norm": 0.00045156272244639695, + "learning_rate": 7.542491138639951e-07, + "loss": 0.0, + "num_input_tokens_seen": 85679888, + "step": 127125 + }, + { + "epoch": 3.105807050546014, + "grad_norm": 0.05712326988577843, + "learning_rate": 7.541664523647168e-07, + "loss": 0.0001, + "num_input_tokens_seen": 85683664, + "step": 127130 + }, + { + "epoch": 3.105929201377861, + "grad_norm": 0.04789616912603378, + "learning_rate": 7.540837926532057e-07, + "loss": 0.0, + "num_input_tokens_seen": 85687312, + "step": 127135 + }, + { + "epoch": 3.1060513522097084, + "grad_norm": 0.0007048913976177573, + "learning_rate": 7.540011347300635e-07, + "loss": 0.0, + "num_input_tokens_seen": 85690512, + "step": 127140 + }, + { + "epoch": 3.1061735030415556, + "grad_norm": 0.0033927359618246555, + "learning_rate": 7.539184785958916e-07, + "loss": 0.0, + "num_input_tokens_seen": 85693648, + "step": 127145 + }, + { + "epoch": 3.106295653873403, + "grad_norm": 0.00035177572863176465, + "learning_rate": 7.538358242512905e-07, + "loss": 0.0003, + "num_input_tokens_seen": 85696848, + "step": 127150 + }, + { + "epoch": 3.10641780470525, + "grad_norm": 0.0007704797317273915, + "learning_rate": 7.537531716968617e-07, + "loss": 0.0, + "num_input_tokens_seen": 85700368, + "step": 127155 + }, + { + "epoch": 3.106539955537097, + "grad_norm": 0.0019401259487494826, + "learning_rate": 7.536705209332059e-07, + "loss": 0.0, + "num_input_tokens_seen": 85703568, + "step": 127160 + }, + { + "epoch": 3.1066621063689444, + "grad_norm": 0.0008472330518998206, + "learning_rate": 7.535878719609241e-07, + "loss": 0.0, + "num_input_tokens_seen": 85707216, + "step": 127165 + }, + { + "epoch": 3.1067842572007915, + "grad_norm": 0.0016105916583910584, + "learning_rate": 7.535052247806179e-07, + "loss": 0.0, + "num_input_tokens_seen": 85710736, + "step": 127170 + }, + { + "epoch": 3.1069064080326387, + "grad_norm": 0.00024303798272740096, + "learning_rate": 7.534225793928878e-07, + "loss": 0.0, + "num_input_tokens_seen": 85714064, + "step": 127175 + }, + { + "epoch": 3.107028558864486, + "grad_norm": 0.00046464145998470485, + "learning_rate": 7.533399357983353e-07, + "loss": 0.0, + "num_input_tokens_seen": 85717328, + "step": 127180 + }, + { + "epoch": 3.107150709696333, + "grad_norm": 0.011482232250273228, + "learning_rate": 7.532572939975608e-07, + "loss": 0.0, + "num_input_tokens_seen": 85721040, + "step": 127185 + }, + { + "epoch": 3.1072728605281803, + "grad_norm": 0.002534418599680066, + "learning_rate": 7.53174653991166e-07, + "loss": 0.0, + "num_input_tokens_seen": 85724112, + "step": 127190 + }, + { + "epoch": 3.1073950113600275, + "grad_norm": 0.00020533621136564761, + "learning_rate": 7.530920157797511e-07, + "loss": 0.0, + "num_input_tokens_seen": 85728336, + "step": 127195 + }, + { + "epoch": 3.1075171621918747, + "grad_norm": 0.0007511446019634604, + "learning_rate": 7.530093793639174e-07, + "loss": 0.0001, + "num_input_tokens_seen": 85731920, + "step": 127200 + }, + { + "epoch": 3.107639313023722, + "grad_norm": 0.01170251052826643, + "learning_rate": 7.529267447442664e-07, + "loss": 0.0, + "num_input_tokens_seen": 85735696, + "step": 127205 + }, + { + "epoch": 3.107761463855569, + "grad_norm": 0.000938538636546582, + "learning_rate": 7.528441119213984e-07, + "loss": 0.0365, + "num_input_tokens_seen": 85739152, + "step": 127210 + }, + { + "epoch": 3.107883614687416, + "grad_norm": 0.05814709886908531, + "learning_rate": 7.527614808959144e-07, + "loss": 0.0, + "num_input_tokens_seen": 85742416, + "step": 127215 + }, + { + "epoch": 3.108005765519263, + "grad_norm": 0.0004410638939589262, + "learning_rate": 7.526788516684155e-07, + "loss": 0.0, + "num_input_tokens_seen": 85745488, + "step": 127220 + }, + { + "epoch": 3.10812791635111, + "grad_norm": 0.00028008874505758286, + "learning_rate": 7.525962242395022e-07, + "loss": 0.0583, + "num_input_tokens_seen": 85748560, + "step": 127225 + }, + { + "epoch": 3.1082500671829574, + "grad_norm": 0.03178201988339424, + "learning_rate": 7.525135986097762e-07, + "loss": 0.0, + "num_input_tokens_seen": 85752272, + "step": 127230 + }, + { + "epoch": 3.1083722180148046, + "grad_norm": 0.0007714095409028232, + "learning_rate": 7.524309747798374e-07, + "loss": 0.0774, + "num_input_tokens_seen": 85755472, + "step": 127235 + }, + { + "epoch": 3.1084943688466518, + "grad_norm": 0.004785490222275257, + "learning_rate": 7.523483527502876e-07, + "loss": 0.0, + "num_input_tokens_seen": 85758480, + "step": 127240 + }, + { + "epoch": 3.108616519678499, + "grad_norm": 0.002524032723158598, + "learning_rate": 7.522657325217267e-07, + "loss": 0.0, + "num_input_tokens_seen": 85761424, + "step": 127245 + }, + { + "epoch": 3.108738670510346, + "grad_norm": 0.11010199785232544, + "learning_rate": 7.521831140947566e-07, + "loss": 0.0001, + "num_input_tokens_seen": 85764688, + "step": 127250 + }, + { + "epoch": 3.1088608213421933, + "grad_norm": 0.16963838040828705, + "learning_rate": 7.52100497469977e-07, + "loss": 0.0, + "num_input_tokens_seen": 85767760, + "step": 127255 + }, + { + "epoch": 3.1089829721740405, + "grad_norm": 0.0005663972115144134, + "learning_rate": 7.520178826479895e-07, + "loss": 0.0, + "num_input_tokens_seen": 85770832, + "step": 127260 + }, + { + "epoch": 3.1091051230058877, + "grad_norm": 0.0009025745093822479, + "learning_rate": 7.519352696293948e-07, + "loss": 0.0001, + "num_input_tokens_seen": 85774544, + "step": 127265 + }, + { + "epoch": 3.109227273837735, + "grad_norm": 0.004693191032856703, + "learning_rate": 7.518526584147934e-07, + "loss": 0.0325, + "num_input_tokens_seen": 85778064, + "step": 127270 + }, + { + "epoch": 3.109349424669582, + "grad_norm": 0.0005293733556754887, + "learning_rate": 7.517700490047864e-07, + "loss": 0.0, + "num_input_tokens_seen": 85781328, + "step": 127275 + }, + { + "epoch": 3.1094715755014293, + "grad_norm": 0.3933495879173279, + "learning_rate": 7.516874413999739e-07, + "loss": 0.0002, + "num_input_tokens_seen": 85784400, + "step": 127280 + }, + { + "epoch": 3.1095937263332765, + "grad_norm": 0.0027819527313113213, + "learning_rate": 7.516048356009577e-07, + "loss": 0.0, + "num_input_tokens_seen": 85787600, + "step": 127285 + }, + { + "epoch": 3.1097158771651237, + "grad_norm": 0.002181408926844597, + "learning_rate": 7.515222316083374e-07, + "loss": 0.0, + "num_input_tokens_seen": 85790864, + "step": 127290 + }, + { + "epoch": 3.109838027996971, + "grad_norm": 0.00033735146280378103, + "learning_rate": 7.514396294227143e-07, + "loss": 0.0, + "num_input_tokens_seen": 85794320, + "step": 127295 + }, + { + "epoch": 3.109960178828818, + "grad_norm": 0.0002902150445152074, + "learning_rate": 7.513570290446896e-07, + "loss": 0.0, + "num_input_tokens_seen": 85797584, + "step": 127300 + }, + { + "epoch": 3.110082329660665, + "grad_norm": 0.0033535470720380545, + "learning_rate": 7.512744304748629e-07, + "loss": 0.0, + "num_input_tokens_seen": 85801104, + "step": 127305 + }, + { + "epoch": 3.110204480492512, + "grad_norm": 0.00014512380585074425, + "learning_rate": 7.511918337138359e-07, + "loss": 0.0, + "num_input_tokens_seen": 85804368, + "step": 127310 + }, + { + "epoch": 3.110326631324359, + "grad_norm": 0.004475784488022327, + "learning_rate": 7.511092387622086e-07, + "loss": 0.0001, + "num_input_tokens_seen": 85807696, + "step": 127315 + }, + { + "epoch": 3.1104487821562063, + "grad_norm": 0.013879061676561832, + "learning_rate": 7.510266456205816e-07, + "loss": 0.0001, + "num_input_tokens_seen": 85810512, + "step": 127320 + }, + { + "epoch": 3.1105709329880535, + "grad_norm": 0.004589783027768135, + "learning_rate": 7.509440542895562e-07, + "loss": 0.0, + "num_input_tokens_seen": 85813520, + "step": 127325 + }, + { + "epoch": 3.1106930838199007, + "grad_norm": 0.00585208460688591, + "learning_rate": 7.508614647697324e-07, + "loss": 0.0002, + "num_input_tokens_seen": 85816784, + "step": 127330 + }, + { + "epoch": 3.110815234651748, + "grad_norm": 0.0008580581634305418, + "learning_rate": 7.507788770617111e-07, + "loss": 0.0, + "num_input_tokens_seen": 85820112, + "step": 127335 + }, + { + "epoch": 3.110937385483595, + "grad_norm": 0.0012955941492691636, + "learning_rate": 7.506962911660927e-07, + "loss": 0.0, + "num_input_tokens_seen": 85823056, + "step": 127340 + }, + { + "epoch": 3.1110595363154423, + "grad_norm": 0.0007464304217137396, + "learning_rate": 7.506137070834784e-07, + "loss": 0.0, + "num_input_tokens_seen": 85826448, + "step": 127345 + }, + { + "epoch": 3.1111816871472895, + "grad_norm": 0.002111968584358692, + "learning_rate": 7.505311248144677e-07, + "loss": 0.0, + "num_input_tokens_seen": 85829456, + "step": 127350 + }, + { + "epoch": 3.1113038379791367, + "grad_norm": 0.0009733681799843907, + "learning_rate": 7.504485443596619e-07, + "loss": 0.0, + "num_input_tokens_seen": 85832720, + "step": 127355 + }, + { + "epoch": 3.111425988810984, + "grad_norm": 0.003962097689509392, + "learning_rate": 7.503659657196617e-07, + "loss": 0.0, + "num_input_tokens_seen": 85836048, + "step": 127360 + }, + { + "epoch": 3.111548139642831, + "grad_norm": 0.0002371447772020474, + "learning_rate": 7.502833888950672e-07, + "loss": 0.0, + "num_input_tokens_seen": 85839376, + "step": 127365 + }, + { + "epoch": 3.1116702904746782, + "grad_norm": 0.003351636463776231, + "learning_rate": 7.502008138864791e-07, + "loss": 0.0, + "num_input_tokens_seen": 85842448, + "step": 127370 + }, + { + "epoch": 3.1117924413065254, + "grad_norm": 3.950299742427887e-06, + "learning_rate": 7.501182406944977e-07, + "loss": 0.0442, + "num_input_tokens_seen": 85845648, + "step": 127375 + }, + { + "epoch": 3.1119145921383726, + "grad_norm": 0.00042691812268458307, + "learning_rate": 7.500356693197236e-07, + "loss": 0.0, + "num_input_tokens_seen": 85848784, + "step": 127380 + }, + { + "epoch": 3.11203674297022, + "grad_norm": 13.983359336853027, + "learning_rate": 7.499530997627576e-07, + "loss": 0.0551, + "num_input_tokens_seen": 85851920, + "step": 127385 + }, + { + "epoch": 3.112158893802067, + "grad_norm": 28.192888259887695, + "learning_rate": 7.498705320241998e-07, + "loss": 0.0619, + "num_input_tokens_seen": 85855056, + "step": 127390 + }, + { + "epoch": 3.1122810446339138, + "grad_norm": 0.020545123144984245, + "learning_rate": 7.49787966104651e-07, + "loss": 0.0, + "num_input_tokens_seen": 85858256, + "step": 127395 + }, + { + "epoch": 3.112403195465761, + "grad_norm": 0.25158601999282837, + "learning_rate": 7.49705402004711e-07, + "loss": 0.0002, + "num_input_tokens_seen": 85861712, + "step": 127400 + }, + { + "epoch": 3.112525346297608, + "grad_norm": 0.047194406390190125, + "learning_rate": 7.49622839724981e-07, + "loss": 0.0, + "num_input_tokens_seen": 85865104, + "step": 127405 + }, + { + "epoch": 3.1126474971294553, + "grad_norm": 0.0534670390188694, + "learning_rate": 7.495402792660608e-07, + "loss": 0.0002, + "num_input_tokens_seen": 85868368, + "step": 127410 + }, + { + "epoch": 3.1127696479613025, + "grad_norm": 0.015193069353699684, + "learning_rate": 7.494577206285511e-07, + "loss": 0.0, + "num_input_tokens_seen": 85871440, + "step": 127415 + }, + { + "epoch": 3.1128917987931497, + "grad_norm": 0.005533255636692047, + "learning_rate": 7.493751638130523e-07, + "loss": 0.0001, + "num_input_tokens_seen": 85875472, + "step": 127420 + }, + { + "epoch": 3.113013949624997, + "grad_norm": 0.0008053280180320144, + "learning_rate": 7.492926088201648e-07, + "loss": 0.0001, + "num_input_tokens_seen": 85878672, + "step": 127425 + }, + { + "epoch": 3.113136100456844, + "grad_norm": 0.008561119437217712, + "learning_rate": 7.49210055650489e-07, + "loss": 0.0, + "num_input_tokens_seen": 85881936, + "step": 127430 + }, + { + "epoch": 3.1132582512886913, + "grad_norm": 0.0030915874522179365, + "learning_rate": 7.491275043046246e-07, + "loss": 0.0, + "num_input_tokens_seen": 85885264, + "step": 127435 + }, + { + "epoch": 3.1133804021205385, + "grad_norm": 32.49300765991211, + "learning_rate": 7.49044954783173e-07, + "loss": 0.0663, + "num_input_tokens_seen": 85888400, + "step": 127440 + }, + { + "epoch": 3.1135025529523856, + "grad_norm": 0.000495272921398282, + "learning_rate": 7.489624070867337e-07, + "loss": 0.0, + "num_input_tokens_seen": 85892048, + "step": 127445 + }, + { + "epoch": 3.113624703784233, + "grad_norm": 0.0019321962026879191, + "learning_rate": 7.48879861215907e-07, + "loss": 0.0, + "num_input_tokens_seen": 85895056, + "step": 127450 + }, + { + "epoch": 3.11374685461608, + "grad_norm": 0.6780271530151367, + "learning_rate": 7.487973171712942e-07, + "loss": 0.0006, + "num_input_tokens_seen": 85898000, + "step": 127455 + }, + { + "epoch": 3.113869005447927, + "grad_norm": 0.0012302626855671406, + "learning_rate": 7.487147749534943e-07, + "loss": 0.0, + "num_input_tokens_seen": 85901584, + "step": 127460 + }, + { + "epoch": 3.1139911562797744, + "grad_norm": 0.00070422631688416, + "learning_rate": 7.486322345631086e-07, + "loss": 0.0, + "num_input_tokens_seen": 85904784, + "step": 127465 + }, + { + "epoch": 3.1141133071116216, + "grad_norm": 0.00759016303345561, + "learning_rate": 7.485496960007367e-07, + "loss": 0.0, + "num_input_tokens_seen": 85908048, + "step": 127470 + }, + { + "epoch": 3.114235457943469, + "grad_norm": 0.009760981425642967, + "learning_rate": 7.484671592669789e-07, + "loss": 0.0, + "num_input_tokens_seen": 85911248, + "step": 127475 + }, + { + "epoch": 3.1143576087753155, + "grad_norm": 0.16156858205795288, + "learning_rate": 7.483846243624359e-07, + "loss": 0.0001, + "num_input_tokens_seen": 85914768, + "step": 127480 + }, + { + "epoch": 3.1144797596071627, + "grad_norm": 26.11109733581543, + "learning_rate": 7.483020912877072e-07, + "loss": 0.0927, + "num_input_tokens_seen": 85918032, + "step": 127485 + }, + { + "epoch": 3.11460191043901, + "grad_norm": 0.019733689725399017, + "learning_rate": 7.482195600433938e-07, + "loss": 0.0, + "num_input_tokens_seen": 85921552, + "step": 127490 + }, + { + "epoch": 3.114724061270857, + "grad_norm": 6.98584844940342e-05, + "learning_rate": 7.481370306300949e-07, + "loss": 0.0, + "num_input_tokens_seen": 85925200, + "step": 127495 + }, + { + "epoch": 3.1148462121027043, + "grad_norm": 0.327136367559433, + "learning_rate": 7.48054503048412e-07, + "loss": 0.0001, + "num_input_tokens_seen": 85928528, + "step": 127500 + }, + { + "epoch": 3.1149683629345515, + "grad_norm": 0.014904793351888657, + "learning_rate": 7.479719772989439e-07, + "loss": 0.0, + "num_input_tokens_seen": 85932112, + "step": 127505 + }, + { + "epoch": 3.1150905137663987, + "grad_norm": 0.12061361223459244, + "learning_rate": 7.478894533822914e-07, + "loss": 0.0004, + "num_input_tokens_seen": 85935120, + "step": 127510 + }, + { + "epoch": 3.115212664598246, + "grad_norm": 0.002666124375537038, + "learning_rate": 7.478069312990549e-07, + "loss": 0.0, + "num_input_tokens_seen": 85938320, + "step": 127515 + }, + { + "epoch": 3.115334815430093, + "grad_norm": 0.02227841690182686, + "learning_rate": 7.477244110498342e-07, + "loss": 0.0313, + "num_input_tokens_seen": 85941456, + "step": 127520 + }, + { + "epoch": 3.1154569662619402, + "grad_norm": 0.15282024443149567, + "learning_rate": 7.476418926352295e-07, + "loss": 0.0676, + "num_input_tokens_seen": 85944656, + "step": 127525 + }, + { + "epoch": 3.1155791170937874, + "grad_norm": 0.0020133310463279486, + "learning_rate": 7.475593760558406e-07, + "loss": 0.0, + "num_input_tokens_seen": 85947728, + "step": 127530 + }, + { + "epoch": 3.1157012679256346, + "grad_norm": 0.027102556079626083, + "learning_rate": 7.474768613122678e-07, + "loss": 0.0, + "num_input_tokens_seen": 85950672, + "step": 127535 + }, + { + "epoch": 3.115823418757482, + "grad_norm": 47.2922248840332, + "learning_rate": 7.473943484051115e-07, + "loss": 0.0224, + "num_input_tokens_seen": 85954384, + "step": 127540 + }, + { + "epoch": 3.115945569589329, + "grad_norm": 1599.6219482421875, + "learning_rate": 7.473118373349709e-07, + "loss": 0.0348, + "num_input_tokens_seen": 85957520, + "step": 127545 + }, + { + "epoch": 3.116067720421176, + "grad_norm": 0.002141246572136879, + "learning_rate": 7.472293281024474e-07, + "loss": 0.0, + "num_input_tokens_seen": 85960720, + "step": 127550 + }, + { + "epoch": 3.1161898712530234, + "grad_norm": 0.02709318697452545, + "learning_rate": 7.471468207081394e-07, + "loss": 0.0, + "num_input_tokens_seen": 85963984, + "step": 127555 + }, + { + "epoch": 3.1163120220848706, + "grad_norm": 0.005332686472684145, + "learning_rate": 7.470643151526483e-07, + "loss": 0.041, + "num_input_tokens_seen": 85966992, + "step": 127560 + }, + { + "epoch": 3.1164341729167178, + "grad_norm": 0.0005027592997066677, + "learning_rate": 7.469818114365732e-07, + "loss": 0.0, + "num_input_tokens_seen": 85970128, + "step": 127565 + }, + { + "epoch": 3.116556323748565, + "grad_norm": 0.0007410330581478775, + "learning_rate": 7.468993095605143e-07, + "loss": 0.0, + "num_input_tokens_seen": 85973392, + "step": 127570 + }, + { + "epoch": 3.1166784745804117, + "grad_norm": 0.010723591782152653, + "learning_rate": 7.46816809525072e-07, + "loss": 0.0, + "num_input_tokens_seen": 85976784, + "step": 127575 + }, + { + "epoch": 3.116800625412259, + "grad_norm": 0.020599083974957466, + "learning_rate": 7.467343113308459e-07, + "loss": 0.0, + "num_input_tokens_seen": 85980432, + "step": 127580 + }, + { + "epoch": 3.116922776244106, + "grad_norm": 6.975384894758463e-05, + "learning_rate": 7.466518149784362e-07, + "loss": 0.0788, + "num_input_tokens_seen": 85984144, + "step": 127585 + }, + { + "epoch": 3.1170449270759533, + "grad_norm": 0.0009970880346372724, + "learning_rate": 7.465693204684422e-07, + "loss": 0.0, + "num_input_tokens_seen": 85987280, + "step": 127590 + }, + { + "epoch": 3.1171670779078005, + "grad_norm": 0.0002569013158790767, + "learning_rate": 7.464868278014647e-07, + "loss": 0.0, + "num_input_tokens_seen": 85990544, + "step": 127595 + }, + { + "epoch": 3.1172892287396476, + "grad_norm": 0.00037578216870315373, + "learning_rate": 7.464043369781027e-07, + "loss": 0.0, + "num_input_tokens_seen": 85994064, + "step": 127600 + }, + { + "epoch": 3.117411379571495, + "grad_norm": 0.0027975961565971375, + "learning_rate": 7.463218479989568e-07, + "loss": 0.0, + "num_input_tokens_seen": 85997712, + "step": 127605 + }, + { + "epoch": 3.117533530403342, + "grad_norm": 0.0024667629040777683, + "learning_rate": 7.462393608646269e-07, + "loss": 0.0, + "num_input_tokens_seen": 86001488, + "step": 127610 + }, + { + "epoch": 3.117655681235189, + "grad_norm": 0.0030389088205993176, + "learning_rate": 7.461568755757122e-07, + "loss": 0.0, + "num_input_tokens_seen": 86004880, + "step": 127615 + }, + { + "epoch": 3.1177778320670364, + "grad_norm": 0.001127371215261519, + "learning_rate": 7.460743921328134e-07, + "loss": 0.0, + "num_input_tokens_seen": 86008272, + "step": 127620 + }, + { + "epoch": 3.1178999828988836, + "grad_norm": 0.0004868946853093803, + "learning_rate": 7.459919105365297e-07, + "loss": 0.0864, + "num_input_tokens_seen": 86011408, + "step": 127625 + }, + { + "epoch": 3.118022133730731, + "grad_norm": 0.0030997958965599537, + "learning_rate": 7.459094307874609e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86015184, + "step": 127630 + }, + { + "epoch": 3.118144284562578, + "grad_norm": 0.0009643174707889557, + "learning_rate": 7.458269528862075e-07, + "loss": 0.0003, + "num_input_tokens_seen": 86018256, + "step": 127635 + }, + { + "epoch": 3.118266435394425, + "grad_norm": 0.0002588916686363518, + "learning_rate": 7.457444768333686e-07, + "loss": 0.058, + "num_input_tokens_seen": 86021392, + "step": 127640 + }, + { + "epoch": 3.1183885862262724, + "grad_norm": 0.006056373938918114, + "learning_rate": 7.456620026295446e-07, + "loss": 0.0, + "num_input_tokens_seen": 86025296, + "step": 127645 + }, + { + "epoch": 3.1185107370581195, + "grad_norm": 0.0017953312490135431, + "learning_rate": 7.455795302753345e-07, + "loss": 0.0, + "num_input_tokens_seen": 86028880, + "step": 127650 + }, + { + "epoch": 3.1186328878899667, + "grad_norm": 0.001279030810110271, + "learning_rate": 7.454970597713388e-07, + "loss": 0.0, + "num_input_tokens_seen": 86032272, + "step": 127655 + }, + { + "epoch": 3.1187550387218135, + "grad_norm": 0.009960848838090897, + "learning_rate": 7.454145911181566e-07, + "loss": 0.0, + "num_input_tokens_seen": 86035280, + "step": 127660 + }, + { + "epoch": 3.1188771895536607, + "grad_norm": 0.003310409840196371, + "learning_rate": 7.453321243163879e-07, + "loss": 0.0, + "num_input_tokens_seen": 86038416, + "step": 127665 + }, + { + "epoch": 3.118999340385508, + "grad_norm": 0.0014001899398863316, + "learning_rate": 7.452496593666329e-07, + "loss": 0.0002, + "num_input_tokens_seen": 86041488, + "step": 127670 + }, + { + "epoch": 3.119121491217355, + "grad_norm": 0.006665100809186697, + "learning_rate": 7.451671962694907e-07, + "loss": 0.0003, + "num_input_tokens_seen": 86044880, + "step": 127675 + }, + { + "epoch": 3.1192436420492022, + "grad_norm": 0.00016490106645505875, + "learning_rate": 7.450847350255613e-07, + "loss": 0.0, + "num_input_tokens_seen": 86048400, + "step": 127680 + }, + { + "epoch": 3.1193657928810494, + "grad_norm": 0.0031723338179290295, + "learning_rate": 7.450022756354443e-07, + "loss": 0.0, + "num_input_tokens_seen": 86051728, + "step": 127685 + }, + { + "epoch": 3.1194879437128966, + "grad_norm": 0.0014128751354292035, + "learning_rate": 7.449198180997389e-07, + "loss": 0.0, + "num_input_tokens_seen": 86054992, + "step": 127690 + }, + { + "epoch": 3.119610094544744, + "grad_norm": 0.006229817401617765, + "learning_rate": 7.448373624190458e-07, + "loss": 0.0, + "num_input_tokens_seen": 86057936, + "step": 127695 + }, + { + "epoch": 3.119732245376591, + "grad_norm": 0.05980667844414711, + "learning_rate": 7.447549085939636e-07, + "loss": 0.0609, + "num_input_tokens_seen": 86061200, + "step": 127700 + }, + { + "epoch": 3.119854396208438, + "grad_norm": 6.3800071075093e-06, + "learning_rate": 7.446724566250927e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86064528, + "step": 127705 + }, + { + "epoch": 3.1199765470402854, + "grad_norm": 0.07691963762044907, + "learning_rate": 7.44590006513032e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86067728, + "step": 127710 + }, + { + "epoch": 3.1200986978721326, + "grad_norm": 0.0028570157010108232, + "learning_rate": 7.445075582583819e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86070928, + "step": 127715 + }, + { + "epoch": 3.1202208487039798, + "grad_norm": 0.07462915778160095, + "learning_rate": 7.444251118617411e-07, + "loss": 0.0, + "num_input_tokens_seen": 86074256, + "step": 127720 + }, + { + "epoch": 3.120342999535827, + "grad_norm": 0.008847139775753021, + "learning_rate": 7.443426673237098e-07, + "loss": 0.0, + "num_input_tokens_seen": 86077520, + "step": 127725 + }, + { + "epoch": 3.120465150367674, + "grad_norm": 0.04461780562996864, + "learning_rate": 7.442602246448875e-07, + "loss": 0.0, + "num_input_tokens_seen": 86080720, + "step": 127730 + }, + { + "epoch": 3.1205873011995213, + "grad_norm": 4.41354131908156e-05, + "learning_rate": 7.441777838258736e-07, + "loss": 0.0002, + "num_input_tokens_seen": 86084304, + "step": 127735 + }, + { + "epoch": 3.1207094520313685, + "grad_norm": 0.013822735287249088, + "learning_rate": 7.440953448672678e-07, + "loss": 0.0, + "num_input_tokens_seen": 86087504, + "step": 127740 + }, + { + "epoch": 3.1208316028632157, + "grad_norm": 0.0070150066167116165, + "learning_rate": 7.440129077696691e-07, + "loss": 0.0, + "num_input_tokens_seen": 86090832, + "step": 127745 + }, + { + "epoch": 3.1209537536950624, + "grad_norm": 0.006862407084554434, + "learning_rate": 7.439304725336778e-07, + "loss": 0.0, + "num_input_tokens_seen": 86094352, + "step": 127750 + }, + { + "epoch": 3.1210759045269096, + "grad_norm": 0.0013574488693848252, + "learning_rate": 7.438480391598925e-07, + "loss": 0.0836, + "num_input_tokens_seen": 86097488, + "step": 127755 + }, + { + "epoch": 3.121198055358757, + "grad_norm": 0.0021971026435494423, + "learning_rate": 7.437656076489133e-07, + "loss": 0.0, + "num_input_tokens_seen": 86100624, + "step": 127760 + }, + { + "epoch": 3.121320206190604, + "grad_norm": 0.0034022280015051365, + "learning_rate": 7.436831780013398e-07, + "loss": 0.0052, + "num_input_tokens_seen": 86104336, + "step": 127765 + }, + { + "epoch": 3.121442357022451, + "grad_norm": 0.09795635938644409, + "learning_rate": 7.436007502177708e-07, + "loss": 0.0003, + "num_input_tokens_seen": 86108240, + "step": 127770 + }, + { + "epoch": 3.1215645078542984, + "grad_norm": 0.0012237750925123692, + "learning_rate": 7.435183242988066e-07, + "loss": 0.0, + "num_input_tokens_seen": 86111440, + "step": 127775 + }, + { + "epoch": 3.1216866586861456, + "grad_norm": 0.0012300205416977406, + "learning_rate": 7.434359002450458e-07, + "loss": 0.0, + "num_input_tokens_seen": 86115216, + "step": 127780 + }, + { + "epoch": 3.1218088095179928, + "grad_norm": 0.37210115790367126, + "learning_rate": 7.433534780570881e-07, + "loss": 0.0002, + "num_input_tokens_seen": 86118672, + "step": 127785 + }, + { + "epoch": 3.12193096034984, + "grad_norm": 0.0007529134163632989, + "learning_rate": 7.432710577355332e-07, + "loss": 0.0, + "num_input_tokens_seen": 86121552, + "step": 127790 + }, + { + "epoch": 3.122053111181687, + "grad_norm": 0.0013815355487167835, + "learning_rate": 7.431886392809799e-07, + "loss": 0.131, + "num_input_tokens_seen": 86125136, + "step": 127795 + }, + { + "epoch": 3.1221752620135343, + "grad_norm": 0.03114251233637333, + "learning_rate": 7.431062226940281e-07, + "loss": 0.0, + "num_input_tokens_seen": 86128336, + "step": 127800 + }, + { + "epoch": 3.1222974128453815, + "grad_norm": 0.0010919078486040235, + "learning_rate": 7.430238079752768e-07, + "loss": 0.0, + "num_input_tokens_seen": 86131536, + "step": 127805 + }, + { + "epoch": 3.1224195636772287, + "grad_norm": 0.0005009145243093371, + "learning_rate": 7.429413951253259e-07, + "loss": 0.0, + "num_input_tokens_seen": 86134672, + "step": 127810 + }, + { + "epoch": 3.122541714509076, + "grad_norm": 0.0005763991503044963, + "learning_rate": 7.428589841447737e-07, + "loss": 0.0, + "num_input_tokens_seen": 86138512, + "step": 127815 + }, + { + "epoch": 3.122663865340923, + "grad_norm": 0.0008991776849143207, + "learning_rate": 7.427765750342201e-07, + "loss": 0.0, + "num_input_tokens_seen": 86141648, + "step": 127820 + }, + { + "epoch": 3.1227860161727703, + "grad_norm": 0.0005382252857089043, + "learning_rate": 7.42694167794265e-07, + "loss": 0.0, + "num_input_tokens_seen": 86145168, + "step": 127825 + }, + { + "epoch": 3.1229081670046175, + "grad_norm": 0.000699529075063765, + "learning_rate": 7.426117624255068e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86148304, + "step": 127830 + }, + { + "epoch": 3.1230303178364647, + "grad_norm": 0.0004691076755989343, + "learning_rate": 7.425293589285453e-07, + "loss": 0.0, + "num_input_tokens_seen": 86151440, + "step": 127835 + }, + { + "epoch": 3.1231524686683114, + "grad_norm": 0.0002849614538718015, + "learning_rate": 7.424469573039793e-07, + "loss": 0.0, + "num_input_tokens_seen": 86154832, + "step": 127840 + }, + { + "epoch": 3.1232746195001586, + "grad_norm": 0.001343605574220419, + "learning_rate": 7.423645575524087e-07, + "loss": 0.1155, + "num_input_tokens_seen": 86158480, + "step": 127845 + }, + { + "epoch": 3.123396770332006, + "grad_norm": 0.0011115703964605927, + "learning_rate": 7.422821596744318e-07, + "loss": 0.0, + "num_input_tokens_seen": 86162128, + "step": 127850 + }, + { + "epoch": 3.123518921163853, + "grad_norm": 0.0004212648200336844, + "learning_rate": 7.421997636706486e-07, + "loss": 0.0, + "num_input_tokens_seen": 86165456, + "step": 127855 + }, + { + "epoch": 3.1236410719957, + "grad_norm": 0.0056180949322879314, + "learning_rate": 7.421173695416582e-07, + "loss": 0.0978, + "num_input_tokens_seen": 86168912, + "step": 127860 + }, + { + "epoch": 3.1237632228275474, + "grad_norm": 0.00836129393428564, + "learning_rate": 7.420349772880592e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86172432, + "step": 127865 + }, + { + "epoch": 3.1238853736593946, + "grad_norm": 0.0003254195617046207, + "learning_rate": 7.419525869104518e-07, + "loss": 0.0, + "num_input_tokens_seen": 86175568, + "step": 127870 + }, + { + "epoch": 3.1240075244912417, + "grad_norm": 0.00044932254240848124, + "learning_rate": 7.418701984094341e-07, + "loss": 0.0, + "num_input_tokens_seen": 86178832, + "step": 127875 + }, + { + "epoch": 3.124129675323089, + "grad_norm": 0.006995463743805885, + "learning_rate": 7.41787811785606e-07, + "loss": 0.0, + "num_input_tokens_seen": 86181968, + "step": 127880 + }, + { + "epoch": 3.124251826154936, + "grad_norm": 0.00013451272388920188, + "learning_rate": 7.417054270395664e-07, + "loss": 0.0, + "num_input_tokens_seen": 86185360, + "step": 127885 + }, + { + "epoch": 3.1243739769867833, + "grad_norm": 0.016113661229610443, + "learning_rate": 7.416230441719143e-07, + "loss": 0.0, + "num_input_tokens_seen": 86188944, + "step": 127890 + }, + { + "epoch": 3.1244961278186305, + "grad_norm": 0.0011379237985238433, + "learning_rate": 7.415406631832493e-07, + "loss": 0.0, + "num_input_tokens_seen": 86192336, + "step": 127895 + }, + { + "epoch": 3.1246182786504777, + "grad_norm": 0.012022904120385647, + "learning_rate": 7.414582840741696e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86196368, + "step": 127900 + }, + { + "epoch": 3.124740429482325, + "grad_norm": 0.01843303069472313, + "learning_rate": 7.413759068452752e-07, + "loss": 0.0, + "num_input_tokens_seen": 86199760, + "step": 127905 + }, + { + "epoch": 3.124862580314172, + "grad_norm": 0.0009940828895196319, + "learning_rate": 7.412935314971643e-07, + "loss": 0.0339, + "num_input_tokens_seen": 86203216, + "step": 127910 + }, + { + "epoch": 3.1249847311460193, + "grad_norm": 0.0009316856157965958, + "learning_rate": 7.412111580304366e-07, + "loss": 0.0, + "num_input_tokens_seen": 86206224, + "step": 127915 + }, + { + "epoch": 3.1251068819778665, + "grad_norm": 0.013916014693677425, + "learning_rate": 7.411287864456912e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86209616, + "step": 127920 + }, + { + "epoch": 3.125229032809713, + "grad_norm": 0.0019733826629817486, + "learning_rate": 7.410464167435265e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86212880, + "step": 127925 + }, + { + "epoch": 3.1253511836415604, + "grad_norm": 0.0007011366542428732, + "learning_rate": 7.409640489245423e-07, + "loss": 0.0, + "num_input_tokens_seen": 86216400, + "step": 127930 + }, + { + "epoch": 3.1254733344734076, + "grad_norm": 0.0009087013895623386, + "learning_rate": 7.408816829893371e-07, + "loss": 0.0, + "num_input_tokens_seen": 86219472, + "step": 127935 + }, + { + "epoch": 3.1255954853052548, + "grad_norm": 0.008903411217033863, + "learning_rate": 7.407993189385098e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86223184, + "step": 127940 + }, + { + "epoch": 3.125717636137102, + "grad_norm": 0.004272781778126955, + "learning_rate": 7.407169567726599e-07, + "loss": 0.0588, + "num_input_tokens_seen": 86226320, + "step": 127945 + }, + { + "epoch": 3.125839786968949, + "grad_norm": 0.006159925367683172, + "learning_rate": 7.406345964923857e-07, + "loss": 0.0, + "num_input_tokens_seen": 86229840, + "step": 127950 + }, + { + "epoch": 3.1259619378007963, + "grad_norm": 0.0068336776457726955, + "learning_rate": 7.40552238098287e-07, + "loss": 0.0, + "num_input_tokens_seen": 86233744, + "step": 127955 + }, + { + "epoch": 3.1260840886326435, + "grad_norm": 0.022573009133338928, + "learning_rate": 7.404698815909616e-07, + "loss": 0.0002, + "num_input_tokens_seen": 86237264, + "step": 127960 + }, + { + "epoch": 3.1262062394644907, + "grad_norm": 0.005695146508514881, + "learning_rate": 7.403875269710093e-07, + "loss": 0.0024, + "num_input_tokens_seen": 86240720, + "step": 127965 + }, + { + "epoch": 3.126328390296338, + "grad_norm": 0.06871867179870605, + "learning_rate": 7.403051742390285e-07, + "loss": 0.0, + "num_input_tokens_seen": 86243856, + "step": 127970 + }, + { + "epoch": 3.126450541128185, + "grad_norm": 0.015363491140305996, + "learning_rate": 7.402228233956184e-07, + "loss": 0.0345, + "num_input_tokens_seen": 86247248, + "step": 127975 + }, + { + "epoch": 3.1265726919600323, + "grad_norm": 0.03830622509121895, + "learning_rate": 7.401404744413782e-07, + "loss": 0.0, + "num_input_tokens_seen": 86250832, + "step": 127980 + }, + { + "epoch": 3.1266948427918795, + "grad_norm": 0.0008308925316669047, + "learning_rate": 7.40058127376906e-07, + "loss": 0.0, + "num_input_tokens_seen": 86254800, + "step": 127985 + }, + { + "epoch": 3.1268169936237267, + "grad_norm": 0.0017075296491384506, + "learning_rate": 7.399757822028011e-07, + "loss": 0.0, + "num_input_tokens_seen": 86258256, + "step": 127990 + }, + { + "epoch": 3.126939144455574, + "grad_norm": 0.007749969605356455, + "learning_rate": 7.398934389196622e-07, + "loss": 0.0, + "num_input_tokens_seen": 86261456, + "step": 127995 + }, + { + "epoch": 3.127061295287421, + "grad_norm": 0.0004765443445648998, + "learning_rate": 7.398110975280884e-07, + "loss": 0.0, + "num_input_tokens_seen": 86265424, + "step": 128000 + }, + { + "epoch": 3.1271834461192682, + "grad_norm": 0.0021843453869223595, + "learning_rate": 7.39728758028678e-07, + "loss": 0.0, + "num_input_tokens_seen": 86269008, + "step": 128005 + }, + { + "epoch": 3.1273055969511154, + "grad_norm": 0.0013905063970014453, + "learning_rate": 7.3964642042203e-07, + "loss": 0.0002, + "num_input_tokens_seen": 86272464, + "step": 128010 + }, + { + "epoch": 3.1274277477829626, + "grad_norm": 0.03797272965312004, + "learning_rate": 7.395640847087436e-07, + "loss": 0.0, + "num_input_tokens_seen": 86275856, + "step": 128015 + }, + { + "epoch": 3.1275498986148094, + "grad_norm": 0.09020286798477173, + "learning_rate": 7.394817508894169e-07, + "loss": 0.0447, + "num_input_tokens_seen": 86279184, + "step": 128020 + }, + { + "epoch": 3.1276720494466566, + "grad_norm": 0.0005714365397579968, + "learning_rate": 7.393994189646493e-07, + "loss": 0.0, + "num_input_tokens_seen": 86282320, + "step": 128025 + }, + { + "epoch": 3.1277942002785037, + "grad_norm": 0.004440112039446831, + "learning_rate": 7.393170889350388e-07, + "loss": 0.0, + "num_input_tokens_seen": 86285712, + "step": 128030 + }, + { + "epoch": 3.127916351110351, + "grad_norm": 0.0017021347302943468, + "learning_rate": 7.392347608011849e-07, + "loss": 0.0, + "num_input_tokens_seen": 86289616, + "step": 128035 + }, + { + "epoch": 3.128038501942198, + "grad_norm": 0.0017139667179435492, + "learning_rate": 7.391524345636859e-07, + "loss": 0.0, + "num_input_tokens_seen": 86293072, + "step": 128040 + }, + { + "epoch": 3.1281606527740453, + "grad_norm": 0.02372741885483265, + "learning_rate": 7.390701102231405e-07, + "loss": 0.0, + "num_input_tokens_seen": 86296592, + "step": 128045 + }, + { + "epoch": 3.1282828036058925, + "grad_norm": 0.0010716107208281755, + "learning_rate": 7.389877877801476e-07, + "loss": 0.0, + "num_input_tokens_seen": 86299792, + "step": 128050 + }, + { + "epoch": 3.1284049544377397, + "grad_norm": 0.002866087481379509, + "learning_rate": 7.389054672353054e-07, + "loss": 0.0, + "num_input_tokens_seen": 86303568, + "step": 128055 + }, + { + "epoch": 3.128527105269587, + "grad_norm": 0.002422518329694867, + "learning_rate": 7.388231485892132e-07, + "loss": 0.0, + "num_input_tokens_seen": 86306960, + "step": 128060 + }, + { + "epoch": 3.128649256101434, + "grad_norm": 0.002960509154945612, + "learning_rate": 7.38740831842469e-07, + "loss": 0.0, + "num_input_tokens_seen": 86310544, + "step": 128065 + }, + { + "epoch": 3.1287714069332813, + "grad_norm": 0.013696005567908287, + "learning_rate": 7.386585169956717e-07, + "loss": 0.0553, + "num_input_tokens_seen": 86313808, + "step": 128070 + }, + { + "epoch": 3.1288935577651285, + "grad_norm": 0.005466190166771412, + "learning_rate": 7.385762040494203e-07, + "loss": 0.0, + "num_input_tokens_seen": 86317200, + "step": 128075 + }, + { + "epoch": 3.1290157085969756, + "grad_norm": 0.00015159814211074263, + "learning_rate": 7.384938930043128e-07, + "loss": 0.0, + "num_input_tokens_seen": 86320784, + "step": 128080 + }, + { + "epoch": 3.129137859428823, + "grad_norm": 0.03894127905368805, + "learning_rate": 7.384115838609483e-07, + "loss": 0.0, + "num_input_tokens_seen": 86324240, + "step": 128085 + }, + { + "epoch": 3.12926001026067, + "grad_norm": 0.0035981256514787674, + "learning_rate": 7.38329276619925e-07, + "loss": 0.0, + "num_input_tokens_seen": 86327632, + "step": 128090 + }, + { + "epoch": 3.129382161092517, + "grad_norm": 43.11925506591797, + "learning_rate": 7.382469712818413e-07, + "loss": 0.0439, + "num_input_tokens_seen": 86330960, + "step": 128095 + }, + { + "epoch": 3.1295043119243644, + "grad_norm": 11.641146659851074, + "learning_rate": 7.381646678472965e-07, + "loss": 0.0949, + "num_input_tokens_seen": 86334352, + "step": 128100 + }, + { + "epoch": 3.129626462756211, + "grad_norm": 0.0009966957150027156, + "learning_rate": 7.380823663168882e-07, + "loss": 0.0, + "num_input_tokens_seen": 86338000, + "step": 128105 + }, + { + "epoch": 3.1297486135880583, + "grad_norm": 0.0005179878207854927, + "learning_rate": 7.380000666912158e-07, + "loss": 0.0439, + "num_input_tokens_seen": 86341840, + "step": 128110 + }, + { + "epoch": 3.1298707644199055, + "grad_norm": 0.000786024727858603, + "learning_rate": 7.379177689708771e-07, + "loss": 0.0, + "num_input_tokens_seen": 86345040, + "step": 128115 + }, + { + "epoch": 3.1299929152517527, + "grad_norm": 1.1530879735946655, + "learning_rate": 7.378354731564711e-07, + "loss": 0.0408, + "num_input_tokens_seen": 86348688, + "step": 128120 + }, + { + "epoch": 3.1301150660836, + "grad_norm": 16.093496322631836, + "learning_rate": 7.377531792485958e-07, + "loss": 0.0332, + "num_input_tokens_seen": 86351760, + "step": 128125 + }, + { + "epoch": 3.130237216915447, + "grad_norm": 0.03726727515459061, + "learning_rate": 7.376708872478499e-07, + "loss": 0.0, + "num_input_tokens_seen": 86354832, + "step": 128130 + }, + { + "epoch": 3.1303593677472943, + "grad_norm": 0.0029666442424058914, + "learning_rate": 7.375885971548321e-07, + "loss": 0.0003, + "num_input_tokens_seen": 86358416, + "step": 128135 + }, + { + "epoch": 3.1304815185791415, + "grad_norm": 0.002378805074840784, + "learning_rate": 7.375063089701405e-07, + "loss": 0.0, + "num_input_tokens_seen": 86361680, + "step": 128140 + }, + { + "epoch": 3.1306036694109887, + "grad_norm": 0.12748949229717255, + "learning_rate": 7.374240226943737e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86364880, + "step": 128145 + }, + { + "epoch": 3.130725820242836, + "grad_norm": 20.794109344482422, + "learning_rate": 7.3734173832813e-07, + "loss": 0.0008, + "num_input_tokens_seen": 86368016, + "step": 128150 + }, + { + "epoch": 3.130847971074683, + "grad_norm": 0.0004399059107527137, + "learning_rate": 7.37259455872008e-07, + "loss": 0.0, + "num_input_tokens_seen": 86371088, + "step": 128155 + }, + { + "epoch": 3.1309701219065302, + "grad_norm": 0.00037931985571049154, + "learning_rate": 7.371771753266055e-07, + "loss": 0.0, + "num_input_tokens_seen": 86374608, + "step": 128160 + }, + { + "epoch": 3.1310922727383774, + "grad_norm": 0.00013678277900908142, + "learning_rate": 7.370948966925212e-07, + "loss": 0.0, + "num_input_tokens_seen": 86378384, + "step": 128165 + }, + { + "epoch": 3.1312144235702246, + "grad_norm": 0.0024177455343306065, + "learning_rate": 7.370126199703541e-07, + "loss": 0.0621, + "num_input_tokens_seen": 86381264, + "step": 128170 + }, + { + "epoch": 3.131336574402072, + "grad_norm": 769.744873046875, + "learning_rate": 7.369303451607014e-07, + "loss": 0.0168, + "num_input_tokens_seen": 86384336, + "step": 128175 + }, + { + "epoch": 3.131458725233919, + "grad_norm": 0.003459363942965865, + "learning_rate": 7.368480722641626e-07, + "loss": 0.0, + "num_input_tokens_seen": 86388112, + "step": 128180 + }, + { + "epoch": 3.131580876065766, + "grad_norm": 0.0007123707910068333, + "learning_rate": 7.367658012813347e-07, + "loss": 0.0, + "num_input_tokens_seen": 86392016, + "step": 128185 + }, + { + "epoch": 3.1317030268976134, + "grad_norm": 0.0074523743242025375, + "learning_rate": 7.366835322128171e-07, + "loss": 0.0, + "num_input_tokens_seen": 86395344, + "step": 128190 + }, + { + "epoch": 3.1318251777294606, + "grad_norm": 0.008593794889748096, + "learning_rate": 7.366012650592076e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86398736, + "step": 128195 + }, + { + "epoch": 3.1319473285613073, + "grad_norm": 0.07377134263515472, + "learning_rate": 7.365189998211046e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86402384, + "step": 128200 + }, + { + "epoch": 3.1320694793931545, + "grad_norm": 0.000612644711509347, + "learning_rate": 7.364367364991064e-07, + "loss": 0.0, + "num_input_tokens_seen": 86406032, + "step": 128205 + }, + { + "epoch": 3.1321916302250017, + "grad_norm": 0.00483801169320941, + "learning_rate": 7.363544750938109e-07, + "loss": 0.0, + "num_input_tokens_seen": 86410128, + "step": 128210 + }, + { + "epoch": 3.132313781056849, + "grad_norm": 0.00010632204066496342, + "learning_rate": 7.362722156058169e-07, + "loss": 0.0002, + "num_input_tokens_seen": 86413520, + "step": 128215 + }, + { + "epoch": 3.132435931888696, + "grad_norm": 0.0007703229202888906, + "learning_rate": 7.361899580357219e-07, + "loss": 0.0, + "num_input_tokens_seen": 86416592, + "step": 128220 + }, + { + "epoch": 3.1325580827205433, + "grad_norm": 0.0008033441845327616, + "learning_rate": 7.361077023841244e-07, + "loss": 0.0, + "num_input_tokens_seen": 86419984, + "step": 128225 + }, + { + "epoch": 3.1326802335523904, + "grad_norm": 0.0024972441606223583, + "learning_rate": 7.360254486516231e-07, + "loss": 0.0, + "num_input_tokens_seen": 86423568, + "step": 128230 + }, + { + "epoch": 3.1328023843842376, + "grad_norm": 0.0006891600205563009, + "learning_rate": 7.359431968388153e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86427088, + "step": 128235 + }, + { + "epoch": 3.132924535216085, + "grad_norm": 0.0033626670483499765, + "learning_rate": 7.358609469463e-07, + "loss": 0.0, + "num_input_tokens_seen": 86430160, + "step": 128240 + }, + { + "epoch": 3.133046686047932, + "grad_norm": 0.49775242805480957, + "learning_rate": 7.357786989746748e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86433232, + "step": 128245 + }, + { + "epoch": 3.133168836879779, + "grad_norm": 0.00190130271948874, + "learning_rate": 7.356964529245378e-07, + "loss": 0.0002, + "num_input_tokens_seen": 86436432, + "step": 128250 + }, + { + "epoch": 3.1332909877116264, + "grad_norm": 0.0011852835305035114, + "learning_rate": 7.356142087964876e-07, + "loss": 0.0002, + "num_input_tokens_seen": 86439888, + "step": 128255 + }, + { + "epoch": 3.1334131385434736, + "grad_norm": 6.330687756417319e-05, + "learning_rate": 7.355319665911217e-07, + "loss": 0.0, + "num_input_tokens_seen": 86443408, + "step": 128260 + }, + { + "epoch": 3.1335352893753208, + "grad_norm": 0.00018317383364774287, + "learning_rate": 7.354497263090386e-07, + "loss": 0.0, + "num_input_tokens_seen": 86446608, + "step": 128265 + }, + { + "epoch": 3.133657440207168, + "grad_norm": 0.001978239743039012, + "learning_rate": 7.353674879508363e-07, + "loss": 0.0, + "num_input_tokens_seen": 86450448, + "step": 128270 + }, + { + "epoch": 3.133779591039015, + "grad_norm": 0.003964691422879696, + "learning_rate": 7.352852515171128e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86453648, + "step": 128275 + }, + { + "epoch": 3.1339017418708623, + "grad_norm": 0.015365658327937126, + "learning_rate": 7.35203017008466e-07, + "loss": 0.0, + "num_input_tokens_seen": 86457104, + "step": 128280 + }, + { + "epoch": 3.134023892702709, + "grad_norm": 0.002852953039109707, + "learning_rate": 7.351207844254938e-07, + "loss": 0.0, + "num_input_tokens_seen": 86460560, + "step": 128285 + }, + { + "epoch": 3.1341460435345563, + "grad_norm": 0.2430233508348465, + "learning_rate": 7.350385537687951e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86463824, + "step": 128290 + }, + { + "epoch": 3.1342681943664035, + "grad_norm": 0.0005990845966152847, + "learning_rate": 7.349563250389672e-07, + "loss": 0.0, + "num_input_tokens_seen": 86467088, + "step": 128295 + }, + { + "epoch": 3.1343903451982507, + "grad_norm": 0.0022348244674503803, + "learning_rate": 7.34874098236608e-07, + "loss": 0.0002, + "num_input_tokens_seen": 86470736, + "step": 128300 + }, + { + "epoch": 3.134512496030098, + "grad_norm": 0.0038873967714607716, + "learning_rate": 7.347918733623157e-07, + "loss": 0.0, + "num_input_tokens_seen": 86474576, + "step": 128305 + }, + { + "epoch": 3.134634646861945, + "grad_norm": 0.0013605405110865831, + "learning_rate": 7.347096504166885e-07, + "loss": 0.0, + "num_input_tokens_seen": 86478160, + "step": 128310 + }, + { + "epoch": 3.1347567976937922, + "grad_norm": 0.02028125710785389, + "learning_rate": 7.346274294003237e-07, + "loss": 0.0, + "num_input_tokens_seen": 86481552, + "step": 128315 + }, + { + "epoch": 3.1348789485256394, + "grad_norm": 0.00044287380296736956, + "learning_rate": 7.345452103138195e-07, + "loss": 0.0, + "num_input_tokens_seen": 86485200, + "step": 128320 + }, + { + "epoch": 3.1350010993574866, + "grad_norm": 0.008367608301341534, + "learning_rate": 7.344629931577744e-07, + "loss": 0.0, + "num_input_tokens_seen": 86488592, + "step": 128325 + }, + { + "epoch": 3.135123250189334, + "grad_norm": 0.044535327702760696, + "learning_rate": 7.343807779327855e-07, + "loss": 0.0, + "num_input_tokens_seen": 86491792, + "step": 128330 + }, + { + "epoch": 3.135245401021181, + "grad_norm": 0.0006210493156686425, + "learning_rate": 7.342985646394513e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86495888, + "step": 128335 + }, + { + "epoch": 3.135367551853028, + "grad_norm": 0.0006383709842339158, + "learning_rate": 7.342163532783689e-07, + "loss": 0.0, + "num_input_tokens_seen": 86499024, + "step": 128340 + }, + { + "epoch": 3.1354897026848754, + "grad_norm": 1.4991077478043735e-05, + "learning_rate": 7.341341438501372e-07, + "loss": 0.0, + "num_input_tokens_seen": 86502608, + "step": 128345 + }, + { + "epoch": 3.1356118535167226, + "grad_norm": 0.001342342933639884, + "learning_rate": 7.340519363553532e-07, + "loss": 0.0, + "num_input_tokens_seen": 86506128, + "step": 128350 + }, + { + "epoch": 3.1357340043485697, + "grad_norm": 4.490639184950851e-05, + "learning_rate": 7.339697307946152e-07, + "loss": 0.0, + "num_input_tokens_seen": 86509776, + "step": 128355 + }, + { + "epoch": 3.135856155180417, + "grad_norm": 0.0005294650327414274, + "learning_rate": 7.338875271685211e-07, + "loss": 0.0, + "num_input_tokens_seen": 86513168, + "step": 128360 + }, + { + "epoch": 3.135978306012264, + "grad_norm": 0.0015506184427067637, + "learning_rate": 7.33805325477668e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86516560, + "step": 128365 + }, + { + "epoch": 3.136100456844111, + "grad_norm": 0.0003006265906151384, + "learning_rate": 7.337231257226546e-07, + "loss": 0.0005, + "num_input_tokens_seen": 86520080, + "step": 128370 + }, + { + "epoch": 3.1362226076759585, + "grad_norm": 0.026055965572595596, + "learning_rate": 7.336409279040778e-07, + "loss": 0.0, + "num_input_tokens_seen": 86523536, + "step": 128375 + }, + { + "epoch": 3.1363447585078053, + "grad_norm": 0.000478060421301052, + "learning_rate": 7.335587320225359e-07, + "loss": 0.0, + "num_input_tokens_seen": 86526864, + "step": 128380 + }, + { + "epoch": 3.1364669093396524, + "grad_norm": 0.5377205014228821, + "learning_rate": 7.33476538078627e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86530384, + "step": 128385 + }, + { + "epoch": 3.1365890601714996, + "grad_norm": 0.00033786255517043173, + "learning_rate": 7.333943460729479e-07, + "loss": 0.0, + "num_input_tokens_seen": 86533776, + "step": 128390 + }, + { + "epoch": 3.136711211003347, + "grad_norm": 0.011143106035888195, + "learning_rate": 7.333121560060973e-07, + "loss": 0.0, + "num_input_tokens_seen": 86537424, + "step": 128395 + }, + { + "epoch": 3.136833361835194, + "grad_norm": 0.00022679645917378366, + "learning_rate": 7.332299678786722e-07, + "loss": 0.0, + "num_input_tokens_seen": 86541520, + "step": 128400 + }, + { + "epoch": 3.136955512667041, + "grad_norm": 0.00040304975118488073, + "learning_rate": 7.331477816912703e-07, + "loss": 0.0329, + "num_input_tokens_seen": 86544464, + "step": 128405 + }, + { + "epoch": 3.1370776634988884, + "grad_norm": 0.0011125191813334823, + "learning_rate": 7.330655974444899e-07, + "loss": 0.0, + "num_input_tokens_seen": 86547664, + "step": 128410 + }, + { + "epoch": 3.1371998143307356, + "grad_norm": 0.0013933493755757809, + "learning_rate": 7.329834151389278e-07, + "loss": 0.0, + "num_input_tokens_seen": 86550544, + "step": 128415 + }, + { + "epoch": 3.1373219651625828, + "grad_norm": 0.0029185281600803137, + "learning_rate": 7.329012347751827e-07, + "loss": 0.0406, + "num_input_tokens_seen": 86553872, + "step": 128420 + }, + { + "epoch": 3.13744411599443, + "grad_norm": 0.00033394224010407925, + "learning_rate": 7.328190563538512e-07, + "loss": 0.0, + "num_input_tokens_seen": 86557072, + "step": 128425 + }, + { + "epoch": 3.137566266826277, + "grad_norm": 0.004945480264723301, + "learning_rate": 7.327368798755318e-07, + "loss": 0.0, + "num_input_tokens_seen": 86560656, + "step": 128430 + }, + { + "epoch": 3.1376884176581243, + "grad_norm": 0.033799320459365845, + "learning_rate": 7.326547053408212e-07, + "loss": 0.0, + "num_input_tokens_seen": 86564368, + "step": 128435 + }, + { + "epoch": 3.1378105684899715, + "grad_norm": 0.008554290048778057, + "learning_rate": 7.325725327503175e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86567312, + "step": 128440 + }, + { + "epoch": 3.1379327193218187, + "grad_norm": 0.0007735177641734481, + "learning_rate": 7.324903621046188e-07, + "loss": 0.0, + "num_input_tokens_seen": 86570512, + "step": 128445 + }, + { + "epoch": 3.138054870153666, + "grad_norm": 0.0013030408881604671, + "learning_rate": 7.324081934043218e-07, + "loss": 0.0, + "num_input_tokens_seen": 86574992, + "step": 128450 + }, + { + "epoch": 3.138177020985513, + "grad_norm": 0.0024377258960157633, + "learning_rate": 7.323260266500244e-07, + "loss": 0.0479, + "num_input_tokens_seen": 86578384, + "step": 128455 + }, + { + "epoch": 3.1382991718173603, + "grad_norm": 0.005287248641252518, + "learning_rate": 7.322438618423241e-07, + "loss": 0.0, + "num_input_tokens_seen": 86583760, + "step": 128460 + }, + { + "epoch": 3.138421322649207, + "grad_norm": 10.693768501281738, + "learning_rate": 7.321616989818189e-07, + "loss": 0.0339, + "num_input_tokens_seen": 86587600, + "step": 128465 + }, + { + "epoch": 3.1385434734810542, + "grad_norm": 0.0022159749642014503, + "learning_rate": 7.320795380691051e-07, + "loss": 0.0587, + "num_input_tokens_seen": 86590992, + "step": 128470 + }, + { + "epoch": 3.1386656243129014, + "grad_norm": 0.04777996987104416, + "learning_rate": 7.319973791047813e-07, + "loss": 0.0, + "num_input_tokens_seen": 86594320, + "step": 128475 + }, + { + "epoch": 3.1387877751447486, + "grad_norm": 0.12414655089378357, + "learning_rate": 7.319152220894449e-07, + "loss": 0.0, + "num_input_tokens_seen": 86597584, + "step": 128480 + }, + { + "epoch": 3.138909925976596, + "grad_norm": 0.0007596567156724632, + "learning_rate": 7.318330670236927e-07, + "loss": 0.0255, + "num_input_tokens_seen": 86601488, + "step": 128485 + }, + { + "epoch": 3.139032076808443, + "grad_norm": 0.7451971173286438, + "learning_rate": 7.31750913908123e-07, + "loss": 0.0696, + "num_input_tokens_seen": 86604880, + "step": 128490 + }, + { + "epoch": 3.13915422764029, + "grad_norm": 717.7139282226562, + "learning_rate": 7.316687627433323e-07, + "loss": 0.0703, + "num_input_tokens_seen": 86608592, + "step": 128495 + }, + { + "epoch": 3.1392763784721374, + "grad_norm": 0.001274026115424931, + "learning_rate": 7.315866135299189e-07, + "loss": 0.0002, + "num_input_tokens_seen": 86612112, + "step": 128500 + }, + { + "epoch": 3.1393985293039846, + "grad_norm": 0.005512653850018978, + "learning_rate": 7.315044662684797e-07, + "loss": 0.0, + "num_input_tokens_seen": 86615184, + "step": 128505 + }, + { + "epoch": 3.1395206801358317, + "grad_norm": 0.45431196689605713, + "learning_rate": 7.314223209596122e-07, + "loss": 0.0003, + "num_input_tokens_seen": 86618384, + "step": 128510 + }, + { + "epoch": 3.139642830967679, + "grad_norm": 0.3715175688266754, + "learning_rate": 7.313401776039142e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86621648, + "step": 128515 + }, + { + "epoch": 3.139764981799526, + "grad_norm": 1.0766215324401855, + "learning_rate": 7.312580362019822e-07, + "loss": 0.0003, + "num_input_tokens_seen": 86624912, + "step": 128520 + }, + { + "epoch": 3.1398871326313733, + "grad_norm": 0.00018668769916985184, + "learning_rate": 7.311758967544143e-07, + "loss": 0.0, + "num_input_tokens_seen": 86628112, + "step": 128525 + }, + { + "epoch": 3.1400092834632205, + "grad_norm": 1.5411447748192586e-05, + "learning_rate": 7.310937592618074e-07, + "loss": 0.0716, + "num_input_tokens_seen": 86631440, + "step": 128530 + }, + { + "epoch": 3.1401314342950677, + "grad_norm": 0.04406026378273964, + "learning_rate": 7.31011623724759e-07, + "loss": 0.0, + "num_input_tokens_seen": 86635088, + "step": 128535 + }, + { + "epoch": 3.140253585126915, + "grad_norm": 0.25144264101982117, + "learning_rate": 7.309294901438667e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86638480, + "step": 128540 + }, + { + "epoch": 3.140375735958762, + "grad_norm": 0.003910064697265625, + "learning_rate": 7.308473585197272e-07, + "loss": 0.0, + "num_input_tokens_seen": 86641936, + "step": 128545 + }, + { + "epoch": 3.140497886790609, + "grad_norm": 0.008174985647201538, + "learning_rate": 7.307652288529385e-07, + "loss": 0.0, + "num_input_tokens_seen": 86644880, + "step": 128550 + }, + { + "epoch": 3.140620037622456, + "grad_norm": 0.0032011514995247126, + "learning_rate": 7.306831011440971e-07, + "loss": 0.0, + "num_input_tokens_seen": 86649168, + "step": 128555 + }, + { + "epoch": 3.140742188454303, + "grad_norm": 0.0005409402656368911, + "learning_rate": 7.30600975393801e-07, + "loss": 0.0, + "num_input_tokens_seen": 86652432, + "step": 128560 + }, + { + "epoch": 3.1408643392861504, + "grad_norm": 0.0008832211024127901, + "learning_rate": 7.305188516026468e-07, + "loss": 0.0822, + "num_input_tokens_seen": 86655696, + "step": 128565 + }, + { + "epoch": 3.1409864901179976, + "grad_norm": 0.004884419031441212, + "learning_rate": 7.304367297712318e-07, + "loss": 0.0048, + "num_input_tokens_seen": 86659088, + "step": 128570 + }, + { + "epoch": 3.1411086409498448, + "grad_norm": 0.004036621190607548, + "learning_rate": 7.303546099001539e-07, + "loss": 0.0, + "num_input_tokens_seen": 86663056, + "step": 128575 + }, + { + "epoch": 3.141230791781692, + "grad_norm": 0.0024037614930421114, + "learning_rate": 7.302724919900093e-07, + "loss": 0.0, + "num_input_tokens_seen": 86666128, + "step": 128580 + }, + { + "epoch": 3.141352942613539, + "grad_norm": 0.022274015471339226, + "learning_rate": 7.301903760413961e-07, + "loss": 0.036, + "num_input_tokens_seen": 86669776, + "step": 128585 + }, + { + "epoch": 3.1414750934453863, + "grad_norm": 0.001364888739772141, + "learning_rate": 7.301082620549107e-07, + "loss": 0.0002, + "num_input_tokens_seen": 86672656, + "step": 128590 + }, + { + "epoch": 3.1415972442772335, + "grad_norm": 0.00043467117939144373, + "learning_rate": 7.300261500311507e-07, + "loss": 0.0541, + "num_input_tokens_seen": 86675984, + "step": 128595 + }, + { + "epoch": 3.1417193951090807, + "grad_norm": 0.003563940990716219, + "learning_rate": 7.299440399707133e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86679376, + "step": 128600 + }, + { + "epoch": 3.141841545940928, + "grad_norm": 0.04951038211584091, + "learning_rate": 7.298619318741955e-07, + "loss": 0.0, + "num_input_tokens_seen": 86682576, + "step": 128605 + }, + { + "epoch": 3.141963696772775, + "grad_norm": 0.00022587290732190013, + "learning_rate": 7.297798257421944e-07, + "loss": 0.0, + "num_input_tokens_seen": 86685712, + "step": 128610 + }, + { + "epoch": 3.1420858476046223, + "grad_norm": 0.003272018162533641, + "learning_rate": 7.296977215753069e-07, + "loss": 0.0, + "num_input_tokens_seen": 86688528, + "step": 128615 + }, + { + "epoch": 3.1422079984364695, + "grad_norm": 0.005255506839603186, + "learning_rate": 7.296156193741305e-07, + "loss": 0.0, + "num_input_tokens_seen": 86691856, + "step": 128620 + }, + { + "epoch": 3.1423301492683167, + "grad_norm": 3.095484134973958e-05, + "learning_rate": 7.295335191392617e-07, + "loss": 0.0, + "num_input_tokens_seen": 86694928, + "step": 128625 + }, + { + "epoch": 3.142452300100164, + "grad_norm": 0.05205165222287178, + "learning_rate": 7.294514208712979e-07, + "loss": 0.0004, + "num_input_tokens_seen": 86698320, + "step": 128630 + }, + { + "epoch": 3.142574450932011, + "grad_norm": 0.08933182805776596, + "learning_rate": 7.293693245708365e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86701904, + "step": 128635 + }, + { + "epoch": 3.1426966017638582, + "grad_norm": 0.0016661847475916147, + "learning_rate": 7.29287230238474e-07, + "loss": 0.0546, + "num_input_tokens_seen": 86705680, + "step": 128640 + }, + { + "epoch": 3.142818752595705, + "grad_norm": 0.0012546000070869923, + "learning_rate": 7.292051378748076e-07, + "loss": 0.0, + "num_input_tokens_seen": 86709072, + "step": 128645 + }, + { + "epoch": 3.142940903427552, + "grad_norm": 0.0005776460748165846, + "learning_rate": 7.291230474804342e-07, + "loss": 0.0, + "num_input_tokens_seen": 86712336, + "step": 128650 + }, + { + "epoch": 3.1430630542593994, + "grad_norm": 0.11688629537820816, + "learning_rate": 7.290409590559508e-07, + "loss": 0.0458, + "num_input_tokens_seen": 86715216, + "step": 128655 + }, + { + "epoch": 3.1431852050912465, + "grad_norm": 0.00021849303448107094, + "learning_rate": 7.289588726019547e-07, + "loss": 0.0, + "num_input_tokens_seen": 86718672, + "step": 128660 + }, + { + "epoch": 3.1433073559230937, + "grad_norm": 0.006894854828715324, + "learning_rate": 7.288767881190423e-07, + "loss": 0.0, + "num_input_tokens_seen": 86721872, + "step": 128665 + }, + { + "epoch": 3.143429506754941, + "grad_norm": 0.0018162491032853723, + "learning_rate": 7.287947056078112e-07, + "loss": 0.1059, + "num_input_tokens_seen": 86725392, + "step": 128670 + }, + { + "epoch": 3.143551657586788, + "grad_norm": 0.010568572208285332, + "learning_rate": 7.287126250688575e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86728464, + "step": 128675 + }, + { + "epoch": 3.1436738084186353, + "grad_norm": 0.0048435525968670845, + "learning_rate": 7.286305465027789e-07, + "loss": 0.0, + "num_input_tokens_seen": 86731536, + "step": 128680 + }, + { + "epoch": 3.1437959592504825, + "grad_norm": 0.0015782549744471908, + "learning_rate": 7.285484699101716e-07, + "loss": 0.0, + "num_input_tokens_seen": 86734736, + "step": 128685 + }, + { + "epoch": 3.1439181100823297, + "grad_norm": 2663.33740234375, + "learning_rate": 7.284663952916328e-07, + "loss": 0.0305, + "num_input_tokens_seen": 86738128, + "step": 128690 + }, + { + "epoch": 3.144040260914177, + "grad_norm": 0.007821955718100071, + "learning_rate": 7.283843226477598e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86741328, + "step": 128695 + }, + { + "epoch": 3.144162411746024, + "grad_norm": 0.030453966930508614, + "learning_rate": 7.283022519791487e-07, + "loss": 0.0, + "num_input_tokens_seen": 86744720, + "step": 128700 + }, + { + "epoch": 3.1442845625778713, + "grad_norm": 0.015875792130827904, + "learning_rate": 7.282201832863972e-07, + "loss": 0.0, + "num_input_tokens_seen": 86748688, + "step": 128705 + }, + { + "epoch": 3.1444067134097184, + "grad_norm": 0.20353585481643677, + "learning_rate": 7.281381165701011e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86752144, + "step": 128710 + }, + { + "epoch": 3.1445288642415656, + "grad_norm": 0.0022369814105331898, + "learning_rate": 7.280560518308582e-07, + "loss": 0.0, + "num_input_tokens_seen": 86755600, + "step": 128715 + }, + { + "epoch": 3.144651015073413, + "grad_norm": 0.0004540746449492872, + "learning_rate": 7.279739890692646e-07, + "loss": 0.0002, + "num_input_tokens_seen": 86758864, + "step": 128720 + }, + { + "epoch": 3.14477316590526, + "grad_norm": 0.007710547186434269, + "learning_rate": 7.27891928285917e-07, + "loss": 0.0, + "num_input_tokens_seen": 86762448, + "step": 128725 + }, + { + "epoch": 3.1448953167371068, + "grad_norm": 0.00036782852839678526, + "learning_rate": 7.278098694814131e-07, + "loss": 0.0343, + "num_input_tokens_seen": 86765840, + "step": 128730 + }, + { + "epoch": 3.145017467568954, + "grad_norm": 0.3705562949180603, + "learning_rate": 7.277278126563485e-07, + "loss": 0.0003, + "num_input_tokens_seen": 86769296, + "step": 128735 + }, + { + "epoch": 3.145139618400801, + "grad_norm": 0.0008896345389075577, + "learning_rate": 7.27645757811321e-07, + "loss": 0.0, + "num_input_tokens_seen": 86772560, + "step": 128740 + }, + { + "epoch": 3.1452617692326483, + "grad_norm": 0.005860117729753256, + "learning_rate": 7.275637049469263e-07, + "loss": 0.0, + "num_input_tokens_seen": 86776080, + "step": 128745 + }, + { + "epoch": 3.1453839200644955, + "grad_norm": 9.167318057734519e-05, + "learning_rate": 7.274816540637616e-07, + "loss": 0.0, + "num_input_tokens_seen": 86779088, + "step": 128750 + }, + { + "epoch": 3.1455060708963427, + "grad_norm": 0.00035717521677725017, + "learning_rate": 7.27399605162424e-07, + "loss": 0.036, + "num_input_tokens_seen": 86782544, + "step": 128755 + }, + { + "epoch": 3.14562822172819, + "grad_norm": 0.003054143860936165, + "learning_rate": 7.273175582435098e-07, + "loss": 0.0, + "num_input_tokens_seen": 86785680, + "step": 128760 + }, + { + "epoch": 3.145750372560037, + "grad_norm": 0.004068409558385611, + "learning_rate": 7.272355133076154e-07, + "loss": 0.0, + "num_input_tokens_seen": 86789136, + "step": 128765 + }, + { + "epoch": 3.1458725233918843, + "grad_norm": 0.008344685658812523, + "learning_rate": 7.271534703553379e-07, + "loss": 0.0, + "num_input_tokens_seen": 86792656, + "step": 128770 + }, + { + "epoch": 3.1459946742237315, + "grad_norm": 0.00015507386706303805, + "learning_rate": 7.270714293872738e-07, + "loss": 0.0003, + "num_input_tokens_seen": 86795792, + "step": 128775 + }, + { + "epoch": 3.1461168250555787, + "grad_norm": 0.024332698434591293, + "learning_rate": 7.269893904040194e-07, + "loss": 0.0, + "num_input_tokens_seen": 86799696, + "step": 128780 + }, + { + "epoch": 3.146238975887426, + "grad_norm": 0.000505484058521688, + "learning_rate": 7.269073534061715e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86803216, + "step": 128785 + }, + { + "epoch": 3.146361126719273, + "grad_norm": 0.005547806154936552, + "learning_rate": 7.268253183943271e-07, + "loss": 0.0, + "num_input_tokens_seen": 86806224, + "step": 128790 + }, + { + "epoch": 3.1464832775511202, + "grad_norm": 0.001450459472835064, + "learning_rate": 7.267432853690823e-07, + "loss": 0.0477, + "num_input_tokens_seen": 86809168, + "step": 128795 + }, + { + "epoch": 3.1466054283829674, + "grad_norm": 0.0012143428903073072, + "learning_rate": 7.266612543310339e-07, + "loss": 0.0, + "num_input_tokens_seen": 86812688, + "step": 128800 + }, + { + "epoch": 3.1467275792148146, + "grad_norm": 382.2807312011719, + "learning_rate": 7.265792252807783e-07, + "loss": 0.0772, + "num_input_tokens_seen": 86815696, + "step": 128805 + }, + { + "epoch": 3.146849730046662, + "grad_norm": 2.9038077627774328e-05, + "learning_rate": 7.264971982189122e-07, + "loss": 0.0288, + "num_input_tokens_seen": 86819344, + "step": 128810 + }, + { + "epoch": 3.146971880878509, + "grad_norm": 8.047770825214684e-05, + "learning_rate": 7.26415173146032e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86822928, + "step": 128815 + }, + { + "epoch": 3.147094031710356, + "grad_norm": 0.0018185937078669667, + "learning_rate": 7.263331500627343e-07, + "loss": 0.0, + "num_input_tokens_seen": 86826192, + "step": 128820 + }, + { + "epoch": 3.147216182542203, + "grad_norm": 0.0006772976485081017, + "learning_rate": 7.262511289696158e-07, + "loss": 0.0, + "num_input_tokens_seen": 86829264, + "step": 128825 + }, + { + "epoch": 3.14733833337405, + "grad_norm": 0.001433266093954444, + "learning_rate": 7.261691098672722e-07, + "loss": 0.0, + "num_input_tokens_seen": 86833424, + "step": 128830 + }, + { + "epoch": 3.1474604842058973, + "grad_norm": 0.000816820771433413, + "learning_rate": 7.260870927563009e-07, + "loss": 0.0, + "num_input_tokens_seen": 86836432, + "step": 128835 + }, + { + "epoch": 3.1475826350377445, + "grad_norm": 0.000719138071872294, + "learning_rate": 7.260050776372974e-07, + "loss": 0.0, + "num_input_tokens_seen": 86839952, + "step": 128840 + }, + { + "epoch": 3.1477047858695917, + "grad_norm": 0.0024840179830789566, + "learning_rate": 7.259230645108589e-07, + "loss": 0.0003, + "num_input_tokens_seen": 86843344, + "step": 128845 + }, + { + "epoch": 3.147826936701439, + "grad_norm": 0.04995972290635109, + "learning_rate": 7.258410533775819e-07, + "loss": 0.0, + "num_input_tokens_seen": 86846608, + "step": 128850 + }, + { + "epoch": 3.147949087533286, + "grad_norm": 0.0003871772496495396, + "learning_rate": 7.257590442380621e-07, + "loss": 0.0, + "num_input_tokens_seen": 86849808, + "step": 128855 + }, + { + "epoch": 3.1480712383651333, + "grad_norm": 0.0005167672061361372, + "learning_rate": 7.256770370928968e-07, + "loss": 0.0, + "num_input_tokens_seen": 86853264, + "step": 128860 + }, + { + "epoch": 3.1481933891969804, + "grad_norm": 0.002442282158881426, + "learning_rate": 7.255950319426814e-07, + "loss": 0.0, + "num_input_tokens_seen": 86857040, + "step": 128865 + }, + { + "epoch": 3.1483155400288276, + "grad_norm": 0.00010521677177166566, + "learning_rate": 7.25513028788013e-07, + "loss": 0.0, + "num_input_tokens_seen": 86860752, + "step": 128870 + }, + { + "epoch": 3.148437690860675, + "grad_norm": 27.28692626953125, + "learning_rate": 7.254310276294876e-07, + "loss": 0.0538, + "num_input_tokens_seen": 86863824, + "step": 128875 + }, + { + "epoch": 3.148559841692522, + "grad_norm": 0.005157478619366884, + "learning_rate": 7.253490284677015e-07, + "loss": 0.0, + "num_input_tokens_seen": 86866896, + "step": 128880 + }, + { + "epoch": 3.148681992524369, + "grad_norm": 0.1106048971414566, + "learning_rate": 7.252670313032514e-07, + "loss": 0.0003, + "num_input_tokens_seen": 86870224, + "step": 128885 + }, + { + "epoch": 3.1488041433562164, + "grad_norm": 0.0023445556871593, + "learning_rate": 7.251850361367329e-07, + "loss": 0.0, + "num_input_tokens_seen": 86873552, + "step": 128890 + }, + { + "epoch": 3.1489262941880636, + "grad_norm": 0.004360957071185112, + "learning_rate": 7.251030429687433e-07, + "loss": 0.0, + "num_input_tokens_seen": 86876816, + "step": 128895 + }, + { + "epoch": 3.1490484450199108, + "grad_norm": 0.010129369795322418, + "learning_rate": 7.250210517998778e-07, + "loss": 0.0, + "num_input_tokens_seen": 86880144, + "step": 128900 + }, + { + "epoch": 3.149170595851758, + "grad_norm": 0.0009373872308060527, + "learning_rate": 7.249390626307332e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86883664, + "step": 128905 + }, + { + "epoch": 3.1492927466836047, + "grad_norm": 0.0001356559369014576, + "learning_rate": 7.248570754619061e-07, + "loss": 0.0013, + "num_input_tokens_seen": 86886736, + "step": 128910 + }, + { + "epoch": 3.149414897515452, + "grad_norm": 0.00227438029833138, + "learning_rate": 7.247750902939922e-07, + "loss": 0.0, + "num_input_tokens_seen": 86889808, + "step": 128915 + }, + { + "epoch": 3.149537048347299, + "grad_norm": 0.0019287688191980124, + "learning_rate": 7.246931071275879e-07, + "loss": 0.0, + "num_input_tokens_seen": 86893072, + "step": 128920 + }, + { + "epoch": 3.1496591991791463, + "grad_norm": 0.000745182391256094, + "learning_rate": 7.246111259632892e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86896208, + "step": 128925 + }, + { + "epoch": 3.1497813500109935, + "grad_norm": 0.0013668572064489126, + "learning_rate": 7.245291468016928e-07, + "loss": 0.0, + "num_input_tokens_seen": 86899536, + "step": 128930 + }, + { + "epoch": 3.1499035008428407, + "grad_norm": 0.0015289505245164037, + "learning_rate": 7.244471696433943e-07, + "loss": 0.0, + "num_input_tokens_seen": 86902672, + "step": 128935 + }, + { + "epoch": 3.150025651674688, + "grad_norm": 0.005079837050288916, + "learning_rate": 7.243651944889897e-07, + "loss": 0.0, + "num_input_tokens_seen": 86905808, + "step": 128940 + }, + { + "epoch": 3.150147802506535, + "grad_norm": 0.0121584078297019, + "learning_rate": 7.242832213390763e-07, + "loss": 0.0, + "num_input_tokens_seen": 86909072, + "step": 128945 + }, + { + "epoch": 3.1502699533383822, + "grad_norm": 4.289217758923769e-05, + "learning_rate": 7.24201250194249e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86912208, + "step": 128950 + }, + { + "epoch": 3.1503921041702294, + "grad_norm": 0.0025455676950514317, + "learning_rate": 7.241192810551047e-07, + "loss": 0.0574, + "num_input_tokens_seen": 86915280, + "step": 128955 + }, + { + "epoch": 3.1505142550020766, + "grad_norm": 0.000352776434738189, + "learning_rate": 7.240373139222387e-07, + "loss": 0.0, + "num_input_tokens_seen": 86918864, + "step": 128960 + }, + { + "epoch": 3.150636405833924, + "grad_norm": 0.5395686030387878, + "learning_rate": 7.239553487962479e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86922000, + "step": 128965 + }, + { + "epoch": 3.150758556665771, + "grad_norm": 0.0004787775978911668, + "learning_rate": 7.238733856777281e-07, + "loss": 0.0005, + "num_input_tokens_seen": 86925264, + "step": 128970 + }, + { + "epoch": 3.150880707497618, + "grad_norm": 0.0027044913731515408, + "learning_rate": 7.237914245672752e-07, + "loss": 0.0, + "num_input_tokens_seen": 86928784, + "step": 128975 + }, + { + "epoch": 3.1510028583294654, + "grad_norm": 0.001796882483176887, + "learning_rate": 7.237094654654857e-07, + "loss": 0.0, + "num_input_tokens_seen": 86932240, + "step": 128980 + }, + { + "epoch": 3.1511250091613126, + "grad_norm": 0.0006195952300913632, + "learning_rate": 7.236275083729546e-07, + "loss": 0.0, + "num_input_tokens_seen": 86935440, + "step": 128985 + }, + { + "epoch": 3.1512471599931597, + "grad_norm": 0.004765995312482119, + "learning_rate": 7.235455532902793e-07, + "loss": 0.0, + "num_input_tokens_seen": 86938704, + "step": 128990 + }, + { + "epoch": 3.1513693108250065, + "grad_norm": 0.01532856933772564, + "learning_rate": 7.234636002180545e-07, + "loss": 0.0, + "num_input_tokens_seen": 86942160, + "step": 128995 + }, + { + "epoch": 3.1514914616568537, + "grad_norm": 0.0055182985961437225, + "learning_rate": 7.233816491568768e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86945872, + "step": 129000 + }, + { + "epoch": 3.151613612488701, + "grad_norm": 0.003385876538231969, + "learning_rate": 7.232997001073427e-07, + "loss": 0.0, + "num_input_tokens_seen": 86948944, + "step": 129005 + }, + { + "epoch": 3.151735763320548, + "grad_norm": 0.07067646086215973, + "learning_rate": 7.23217753070047e-07, + "loss": 0.0, + "num_input_tokens_seen": 86952080, + "step": 129010 + }, + { + "epoch": 3.1518579141523952, + "grad_norm": 0.0036404673010110855, + "learning_rate": 7.231358080455868e-07, + "loss": 0.0002, + "num_input_tokens_seen": 86955088, + "step": 129015 + }, + { + "epoch": 3.1519800649842424, + "grad_norm": 0.002148119965568185, + "learning_rate": 7.23053865034557e-07, + "loss": 0.0348, + "num_input_tokens_seen": 86958288, + "step": 129020 + }, + { + "epoch": 3.1521022158160896, + "grad_norm": 0.0029477067291736603, + "learning_rate": 7.229719240375545e-07, + "loss": 0.0, + "num_input_tokens_seen": 86961744, + "step": 129025 + }, + { + "epoch": 3.152224366647937, + "grad_norm": 0.0029479314107447863, + "learning_rate": 7.228899850551743e-07, + "loss": 0.0, + "num_input_tokens_seen": 86965264, + "step": 129030 + }, + { + "epoch": 3.152346517479784, + "grad_norm": 0.0006279372028075159, + "learning_rate": 7.228080480880125e-07, + "loss": 0.0, + "num_input_tokens_seen": 86968336, + "step": 129035 + }, + { + "epoch": 3.152468668311631, + "grad_norm": 0.0009111549588851631, + "learning_rate": 7.227261131366655e-07, + "loss": 0.0, + "num_input_tokens_seen": 86971984, + "step": 129040 + }, + { + "epoch": 3.1525908191434784, + "grad_norm": 0.011269640177488327, + "learning_rate": 7.226441802017286e-07, + "loss": 0.0, + "num_input_tokens_seen": 86975120, + "step": 129045 + }, + { + "epoch": 3.1527129699753256, + "grad_norm": 0.01214879471808672, + "learning_rate": 7.22562249283798e-07, + "loss": 0.0, + "num_input_tokens_seen": 86978576, + "step": 129050 + }, + { + "epoch": 3.1528351208071728, + "grad_norm": 0.20093731582164764, + "learning_rate": 7.224803203834691e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86981648, + "step": 129055 + }, + { + "epoch": 3.15295727163902, + "grad_norm": 0.0008670971728861332, + "learning_rate": 7.223983935013378e-07, + "loss": 0.0416, + "num_input_tokens_seen": 86984912, + "step": 129060 + }, + { + "epoch": 3.153079422470867, + "grad_norm": 0.017721112817525864, + "learning_rate": 7.223164686380005e-07, + "loss": 0.0001, + "num_input_tokens_seen": 86988240, + "step": 129065 + }, + { + "epoch": 3.1532015733027143, + "grad_norm": 0.13180363178253174, + "learning_rate": 7.222345457940524e-07, + "loss": 0.1514, + "num_input_tokens_seen": 86991248, + "step": 129070 + }, + { + "epoch": 3.1533237241345615, + "grad_norm": 0.03970232978463173, + "learning_rate": 7.221526249700893e-07, + "loss": 0.0, + "num_input_tokens_seen": 86994896, + "step": 129075 + }, + { + "epoch": 3.1534458749664087, + "grad_norm": 0.1309601068496704, + "learning_rate": 7.220707061667072e-07, + "loss": 0.0464, + "num_input_tokens_seen": 86998160, + "step": 129080 + }, + { + "epoch": 3.153568025798256, + "grad_norm": 39.45444869995117, + "learning_rate": 7.219887893845018e-07, + "loss": 0.0489, + "num_input_tokens_seen": 87001680, + "step": 129085 + }, + { + "epoch": 3.1536901766301026, + "grad_norm": 0.00731156300753355, + "learning_rate": 7.219068746240682e-07, + "loss": 0.0688, + "num_input_tokens_seen": 87005200, + "step": 129090 + }, + { + "epoch": 3.15381232746195, + "grad_norm": 0.02709462121129036, + "learning_rate": 7.218249618860026e-07, + "loss": 0.0005, + "num_input_tokens_seen": 87008720, + "step": 129095 + }, + { + "epoch": 3.153934478293797, + "grad_norm": 0.0009111324907280505, + "learning_rate": 7.217430511709013e-07, + "loss": 0.0, + "num_input_tokens_seen": 87012560, + "step": 129100 + }, + { + "epoch": 3.154056629125644, + "grad_norm": 0.0053654201328754425, + "learning_rate": 7.216611424793588e-07, + "loss": 0.0, + "num_input_tokens_seen": 87015504, + "step": 129105 + }, + { + "epoch": 3.1541787799574914, + "grad_norm": 8.82080930750817e-05, + "learning_rate": 7.215792358119718e-07, + "loss": 0.0, + "num_input_tokens_seen": 87018704, + "step": 129110 + }, + { + "epoch": 3.1543009307893386, + "grad_norm": 0.002231209073215723, + "learning_rate": 7.21497331169335e-07, + "loss": 0.0, + "num_input_tokens_seen": 87022288, + "step": 129115 + }, + { + "epoch": 3.154423081621186, + "grad_norm": 0.00520332483574748, + "learning_rate": 7.214154285520451e-07, + "loss": 0.0, + "num_input_tokens_seen": 87025872, + "step": 129120 + }, + { + "epoch": 3.154545232453033, + "grad_norm": 497.58929443359375, + "learning_rate": 7.213335279606965e-07, + "loss": 0.0131, + "num_input_tokens_seen": 87029456, + "step": 129125 + }, + { + "epoch": 3.15466738328488, + "grad_norm": 0.0042228191159665585, + "learning_rate": 7.212516293958857e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87033104, + "step": 129130 + }, + { + "epoch": 3.1547895341167274, + "grad_norm": 0.000230711157200858, + "learning_rate": 7.211697328582082e-07, + "loss": 0.0002, + "num_input_tokens_seen": 87036368, + "step": 129135 + }, + { + "epoch": 3.1549116849485745, + "grad_norm": 0.05502479523420334, + "learning_rate": 7.210878383482593e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87039568, + "step": 129140 + }, + { + "epoch": 3.1550338357804217, + "grad_norm": 0.11637943238019943, + "learning_rate": 7.210059458666348e-07, + "loss": 0.0, + "num_input_tokens_seen": 87042704, + "step": 129145 + }, + { + "epoch": 3.155155986612269, + "grad_norm": 0.002879110863432288, + "learning_rate": 7.209240554139296e-07, + "loss": 0.1131, + "num_input_tokens_seen": 87046416, + "step": 129150 + }, + { + "epoch": 3.155278137444116, + "grad_norm": 0.0007435963489115238, + "learning_rate": 7.208421669907398e-07, + "loss": 0.0002, + "num_input_tokens_seen": 87049872, + "step": 129155 + }, + { + "epoch": 3.1554002882759633, + "grad_norm": 0.23474089801311493, + "learning_rate": 7.207602805976613e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87053200, + "step": 129160 + }, + { + "epoch": 3.1555224391078105, + "grad_norm": 0.09376419335603714, + "learning_rate": 7.206783962352889e-07, + "loss": 0.0, + "num_input_tokens_seen": 87057936, + "step": 129165 + }, + { + "epoch": 3.1556445899396577, + "grad_norm": 0.0002012960467254743, + "learning_rate": 7.205965139042186e-07, + "loss": 0.0, + "num_input_tokens_seen": 87061520, + "step": 129170 + }, + { + "epoch": 3.1557667407715044, + "grad_norm": 0.00446065841242671, + "learning_rate": 7.205146336050451e-07, + "loss": 0.0, + "num_input_tokens_seen": 87064848, + "step": 129175 + }, + { + "epoch": 3.1558888916033516, + "grad_norm": 0.0006037917919456959, + "learning_rate": 7.204327553383649e-07, + "loss": 0.0501, + "num_input_tokens_seen": 87067984, + "step": 129180 + }, + { + "epoch": 3.156011042435199, + "grad_norm": 0.0025712205097079277, + "learning_rate": 7.203508791047727e-07, + "loss": 0.0055, + "num_input_tokens_seen": 87070928, + "step": 129185 + }, + { + "epoch": 3.156133193267046, + "grad_norm": 0.0012938089203089476, + "learning_rate": 7.202690049048638e-07, + "loss": 0.0003, + "num_input_tokens_seen": 87074128, + "step": 129190 + }, + { + "epoch": 3.156255344098893, + "grad_norm": 0.010054078884422779, + "learning_rate": 7.201871327392344e-07, + "loss": 0.0, + "num_input_tokens_seen": 87077392, + "step": 129195 + }, + { + "epoch": 3.1563774949307404, + "grad_norm": 0.0007030934211798012, + "learning_rate": 7.201052626084792e-07, + "loss": 0.0004, + "num_input_tokens_seen": 87080592, + "step": 129200 + }, + { + "epoch": 3.1564996457625876, + "grad_norm": 8.724342478672042e-05, + "learning_rate": 7.200233945131939e-07, + "loss": 0.0, + "num_input_tokens_seen": 87083856, + "step": 129205 + }, + { + "epoch": 3.1566217965944348, + "grad_norm": 0.0009425784228369594, + "learning_rate": 7.199415284539736e-07, + "loss": 0.0, + "num_input_tokens_seen": 87087056, + "step": 129210 + }, + { + "epoch": 3.156743947426282, + "grad_norm": 0.00022201616957318038, + "learning_rate": 7.198596644314137e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87090256, + "step": 129215 + }, + { + "epoch": 3.156866098258129, + "grad_norm": 299.904052734375, + "learning_rate": 7.1977780244611e-07, + "loss": 0.043, + "num_input_tokens_seen": 87093520, + "step": 129220 + }, + { + "epoch": 3.1569882490899763, + "grad_norm": 0.398426353931427, + "learning_rate": 7.196959424986575e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87096656, + "step": 129225 + }, + { + "epoch": 3.1571103999218235, + "grad_norm": 0.00035814818693324924, + "learning_rate": 7.196140845896514e-07, + "loss": 0.0, + "num_input_tokens_seen": 87100048, + "step": 129230 + }, + { + "epoch": 3.1572325507536707, + "grad_norm": 0.0038671051152050495, + "learning_rate": 7.195322287196872e-07, + "loss": 0.0, + "num_input_tokens_seen": 87103824, + "step": 129235 + }, + { + "epoch": 3.157354701585518, + "grad_norm": 4.015643571619876e-05, + "learning_rate": 7.194503748893601e-07, + "loss": 0.0003, + "num_input_tokens_seen": 87107344, + "step": 129240 + }, + { + "epoch": 3.157476852417365, + "grad_norm": 6.89021180733107e-06, + "learning_rate": 7.193685230992651e-07, + "loss": 0.0, + "num_input_tokens_seen": 87110736, + "step": 129245 + }, + { + "epoch": 3.1575990032492123, + "grad_norm": 0.0034669388551265, + "learning_rate": 7.192866733499976e-07, + "loss": 0.0002, + "num_input_tokens_seen": 87113936, + "step": 129250 + }, + { + "epoch": 3.1577211540810595, + "grad_norm": 0.00022201616957318038, + "learning_rate": 7.192048256421532e-07, + "loss": 0.0, + "num_input_tokens_seen": 87117328, + "step": 129255 + }, + { + "epoch": 3.1578433049129067, + "grad_norm": 0.16952480375766754, + "learning_rate": 7.191229799763265e-07, + "loss": 0.0, + "num_input_tokens_seen": 87120592, + "step": 129260 + }, + { + "epoch": 3.157965455744754, + "grad_norm": 0.003934331238269806, + "learning_rate": 7.190411363531136e-07, + "loss": 0.0, + "num_input_tokens_seen": 87123856, + "step": 129265 + }, + { + "epoch": 3.1580876065766006, + "grad_norm": 9.973491978598759e-05, + "learning_rate": 7.189592947731085e-07, + "loss": 0.0489, + "num_input_tokens_seen": 87127184, + "step": 129270 + }, + { + "epoch": 3.158209757408448, + "grad_norm": 0.0008483565761707723, + "learning_rate": 7.188774552369077e-07, + "loss": 0.0, + "num_input_tokens_seen": 87130512, + "step": 129275 + }, + { + "epoch": 3.158331908240295, + "grad_norm": 0.0038336871657520533, + "learning_rate": 7.187956177451049e-07, + "loss": 0.0, + "num_input_tokens_seen": 87133904, + "step": 129280 + }, + { + "epoch": 3.158454059072142, + "grad_norm": 0.0014283207710832357, + "learning_rate": 7.187137822982965e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87137168, + "step": 129285 + }, + { + "epoch": 3.1585762099039894, + "grad_norm": 0.017337150871753693, + "learning_rate": 7.186319488970771e-07, + "loss": 0.0, + "num_input_tokens_seen": 87140560, + "step": 129290 + }, + { + "epoch": 3.1586983607358365, + "grad_norm": 0.00010832039697561413, + "learning_rate": 7.185501175420416e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87144208, + "step": 129295 + }, + { + "epoch": 3.1588205115676837, + "grad_norm": 0.0023732376284897327, + "learning_rate": 7.184682882337856e-07, + "loss": 0.0, + "num_input_tokens_seen": 87147472, + "step": 129300 + }, + { + "epoch": 3.158942662399531, + "grad_norm": 0.0010419761529192328, + "learning_rate": 7.183864609729037e-07, + "loss": 0.0, + "num_input_tokens_seen": 87150672, + "step": 129305 + }, + { + "epoch": 3.159064813231378, + "grad_norm": 0.0005405220435932279, + "learning_rate": 7.183046357599912e-07, + "loss": 0.0, + "num_input_tokens_seen": 87153744, + "step": 129310 + }, + { + "epoch": 3.1591869640632253, + "grad_norm": 3.672197999549098e-05, + "learning_rate": 7.182228125956433e-07, + "loss": 0.0291, + "num_input_tokens_seen": 87156944, + "step": 129315 + }, + { + "epoch": 3.1593091148950725, + "grad_norm": 0.0006655405159108341, + "learning_rate": 7.181409914804547e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87160528, + "step": 129320 + }, + { + "epoch": 3.1594312657269197, + "grad_norm": 0.0002669768873602152, + "learning_rate": 7.18059172415021e-07, + "loss": 0.0591, + "num_input_tokens_seen": 87164176, + "step": 129325 + }, + { + "epoch": 3.159553416558767, + "grad_norm": 0.0010073547018691897, + "learning_rate": 7.179773553999364e-07, + "loss": 0.0, + "num_input_tokens_seen": 87167696, + "step": 129330 + }, + { + "epoch": 3.159675567390614, + "grad_norm": 0.015864400193095207, + "learning_rate": 7.178955404357967e-07, + "loss": 0.0, + "num_input_tokens_seen": 87170896, + "step": 129335 + }, + { + "epoch": 3.1597977182224612, + "grad_norm": 0.05096196010708809, + "learning_rate": 7.178137275231963e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87174032, + "step": 129340 + }, + { + "epoch": 3.1599198690543084, + "grad_norm": 3.69091285392642e-05, + "learning_rate": 7.177319166627304e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87177296, + "step": 129345 + }, + { + "epoch": 3.1600420198861556, + "grad_norm": 0.00034703267738223076, + "learning_rate": 7.176501078549941e-07, + "loss": 0.0, + "num_input_tokens_seen": 87181200, + "step": 129350 + }, + { + "epoch": 3.1601641707180024, + "grad_norm": 1.9219897985458374, + "learning_rate": 7.175683011005818e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87184656, + "step": 129355 + }, + { + "epoch": 3.1602863215498496, + "grad_norm": 0.007372839376330376, + "learning_rate": 7.174864964000893e-07, + "loss": 0.0, + "num_input_tokens_seen": 87187728, + "step": 129360 + }, + { + "epoch": 3.1604084723816968, + "grad_norm": 0.2552824318408966, + "learning_rate": 7.174046937541103e-07, + "loss": 0.0443, + "num_input_tokens_seen": 87191184, + "step": 129365 + }, + { + "epoch": 3.160530623213544, + "grad_norm": 0.007546910550445318, + "learning_rate": 7.173228931632406e-07, + "loss": 0.0, + "num_input_tokens_seen": 87194128, + "step": 129370 + }, + { + "epoch": 3.160652774045391, + "grad_norm": 0.00017476998618803918, + "learning_rate": 7.172410946280752e-07, + "loss": 0.0, + "num_input_tokens_seen": 87197712, + "step": 129375 + }, + { + "epoch": 3.1607749248772383, + "grad_norm": 0.0020659775473177433, + "learning_rate": 7.171592981492085e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87201040, + "step": 129380 + }, + { + "epoch": 3.1608970757090855, + "grad_norm": 0.002920554019510746, + "learning_rate": 7.170775037272353e-07, + "loss": 0.0, + "num_input_tokens_seen": 87204432, + "step": 129385 + }, + { + "epoch": 3.1610192265409327, + "grad_norm": 0.00036389954038895667, + "learning_rate": 7.169957113627507e-07, + "loss": 0.0626, + "num_input_tokens_seen": 87207504, + "step": 129390 + }, + { + "epoch": 3.16114137737278, + "grad_norm": 0.00018603085482027382, + "learning_rate": 7.169139210563495e-07, + "loss": 0.0, + "num_input_tokens_seen": 87211024, + "step": 129395 + }, + { + "epoch": 3.161263528204627, + "grad_norm": 0.0009080006857402623, + "learning_rate": 7.168321328086262e-07, + "loss": 0.0269, + "num_input_tokens_seen": 87214288, + "step": 129400 + }, + { + "epoch": 3.1613856790364743, + "grad_norm": 0.0012143454514443874, + "learning_rate": 7.167503466201757e-07, + "loss": 0.0003, + "num_input_tokens_seen": 87218000, + "step": 129405 + }, + { + "epoch": 3.1615078298683215, + "grad_norm": 0.0028844240587204695, + "learning_rate": 7.166685624915931e-07, + "loss": 0.0688, + "num_input_tokens_seen": 87221072, + "step": 129410 + }, + { + "epoch": 3.1616299807001687, + "grad_norm": 0.03779517486691475, + "learning_rate": 7.165867804234727e-07, + "loss": 0.0, + "num_input_tokens_seen": 87224720, + "step": 129415 + }, + { + "epoch": 3.161752131532016, + "grad_norm": 0.01557319238781929, + "learning_rate": 7.165050004164098e-07, + "loss": 0.0, + "num_input_tokens_seen": 87228368, + "step": 129420 + }, + { + "epoch": 3.161874282363863, + "grad_norm": 0.0016049350379034877, + "learning_rate": 7.164232224709984e-07, + "loss": 0.0, + "num_input_tokens_seen": 87231696, + "step": 129425 + }, + { + "epoch": 3.16199643319571, + "grad_norm": 0.0009750159224495292, + "learning_rate": 7.16341446587834e-07, + "loss": 0.0489, + "num_input_tokens_seen": 87235024, + "step": 129430 + }, + { + "epoch": 3.1621185840275574, + "grad_norm": 0.004246531054377556, + "learning_rate": 7.162596727675105e-07, + "loss": 0.0, + "num_input_tokens_seen": 87238800, + "step": 129435 + }, + { + "epoch": 3.1622407348594046, + "grad_norm": 0.00011263210762990639, + "learning_rate": 7.161779010106233e-07, + "loss": 0.0642, + "num_input_tokens_seen": 87242064, + "step": 129440 + }, + { + "epoch": 3.162362885691252, + "grad_norm": 0.0018355419160798192, + "learning_rate": 7.160961313177667e-07, + "loss": 0.0, + "num_input_tokens_seen": 87244944, + "step": 129445 + }, + { + "epoch": 3.1624850365230985, + "grad_norm": 0.047277096658945084, + "learning_rate": 7.16014363689535e-07, + "loss": 0.0, + "num_input_tokens_seen": 87248400, + "step": 129450 + }, + { + "epoch": 3.1626071873549457, + "grad_norm": 0.004752216394990683, + "learning_rate": 7.159325981265238e-07, + "loss": 0.0, + "num_input_tokens_seen": 87251920, + "step": 129455 + }, + { + "epoch": 3.162729338186793, + "grad_norm": 0.00715651735663414, + "learning_rate": 7.158508346293268e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87255824, + "step": 129460 + }, + { + "epoch": 3.16285148901864, + "grad_norm": 0.0004549931618385017, + "learning_rate": 7.157690731985388e-07, + "loss": 0.0477, + "num_input_tokens_seen": 87259408, + "step": 129465 + }, + { + "epoch": 3.1629736398504873, + "grad_norm": 39.06941223144531, + "learning_rate": 7.15687313834755e-07, + "loss": 0.0637, + "num_input_tokens_seen": 87262672, + "step": 129470 + }, + { + "epoch": 3.1630957906823345, + "grad_norm": 0.14959043264389038, + "learning_rate": 7.156055565385692e-07, + "loss": 0.0, + "num_input_tokens_seen": 87265744, + "step": 129475 + }, + { + "epoch": 3.1632179415141817, + "grad_norm": 0.00015907990746200085, + "learning_rate": 7.155238013105765e-07, + "loss": 0.0, + "num_input_tokens_seen": 87269072, + "step": 129480 + }, + { + "epoch": 3.163340092346029, + "grad_norm": 0.001588508952409029, + "learning_rate": 7.15442048151371e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87272144, + "step": 129485 + }, + { + "epoch": 3.163462243177876, + "grad_norm": 3.941430350096198e-06, + "learning_rate": 7.153602970615478e-07, + "loss": 0.0, + "num_input_tokens_seen": 87275984, + "step": 129490 + }, + { + "epoch": 3.1635843940097232, + "grad_norm": 0.0015727278077974916, + "learning_rate": 7.152785480417009e-07, + "loss": 0.0, + "num_input_tokens_seen": 87279312, + "step": 129495 + }, + { + "epoch": 3.1637065448415704, + "grad_norm": 0.030083678662776947, + "learning_rate": 7.151968010924247e-07, + "loss": 0.0718, + "num_input_tokens_seen": 87282512, + "step": 129500 + }, + { + "epoch": 3.1638286956734176, + "grad_norm": 0.0015011405339464545, + "learning_rate": 7.151150562143145e-07, + "loss": 0.0, + "num_input_tokens_seen": 87285968, + "step": 129505 + }, + { + "epoch": 3.163950846505265, + "grad_norm": 0.003872362896800041, + "learning_rate": 7.150333134079636e-07, + "loss": 0.0007, + "num_input_tokens_seen": 87289296, + "step": 129510 + }, + { + "epoch": 3.164072997337112, + "grad_norm": 0.0008269310346804559, + "learning_rate": 7.149515726739677e-07, + "loss": 0.0, + "num_input_tokens_seen": 87292752, + "step": 129515 + }, + { + "epoch": 3.164195148168959, + "grad_norm": 0.00023637239064555615, + "learning_rate": 7.148698340129202e-07, + "loss": 0.0, + "num_input_tokens_seen": 87296464, + "step": 129520 + }, + { + "epoch": 3.1643172990008064, + "grad_norm": 0.017714515328407288, + "learning_rate": 7.14788097425416e-07, + "loss": 0.0, + "num_input_tokens_seen": 87299536, + "step": 129525 + }, + { + "epoch": 3.1644394498326536, + "grad_norm": 0.04738396778702736, + "learning_rate": 7.147063629120497e-07, + "loss": 0.0, + "num_input_tokens_seen": 87302736, + "step": 129530 + }, + { + "epoch": 3.1645616006645003, + "grad_norm": 0.0005483126733452082, + "learning_rate": 7.146246304734155e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87305744, + "step": 129535 + }, + { + "epoch": 3.1646837514963475, + "grad_norm": 0.06283750385046005, + "learning_rate": 7.145429001101077e-07, + "loss": 0.0, + "num_input_tokens_seen": 87308752, + "step": 129540 + }, + { + "epoch": 3.1648059023281947, + "grad_norm": 4.195101428194903e-05, + "learning_rate": 7.144611718227206e-07, + "loss": 0.0, + "num_input_tokens_seen": 87312144, + "step": 129545 + }, + { + "epoch": 3.164928053160042, + "grad_norm": 0.04716356843709946, + "learning_rate": 7.143794456118488e-07, + "loss": 0.0, + "num_input_tokens_seen": 87315344, + "step": 129550 + }, + { + "epoch": 3.165050203991889, + "grad_norm": 0.00015722461102996022, + "learning_rate": 7.142977214780863e-07, + "loss": 0.0, + "num_input_tokens_seen": 87318736, + "step": 129555 + }, + { + "epoch": 3.1651723548237363, + "grad_norm": 0.00816582702100277, + "learning_rate": 7.142159994220274e-07, + "loss": 0.0, + "num_input_tokens_seen": 87322384, + "step": 129560 + }, + { + "epoch": 3.1652945056555835, + "grad_norm": 0.0011006930144503713, + "learning_rate": 7.141342794442671e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87325648, + "step": 129565 + }, + { + "epoch": 3.1654166564874306, + "grad_norm": 0.09024383872747421, + "learning_rate": 7.140525615453989e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87329488, + "step": 129570 + }, + { + "epoch": 3.165538807319278, + "grad_norm": 0.007687109988182783, + "learning_rate": 7.139708457260176e-07, + "loss": 0.0, + "num_input_tokens_seen": 87332560, + "step": 129575 + }, + { + "epoch": 3.165660958151125, + "grad_norm": 0.02901403419673443, + "learning_rate": 7.138891319867171e-07, + "loss": 0.0685, + "num_input_tokens_seen": 87335440, + "step": 129580 + }, + { + "epoch": 3.165783108982972, + "grad_norm": 0.0005344308447092772, + "learning_rate": 7.138074203280919e-07, + "loss": 0.0402, + "num_input_tokens_seen": 87338448, + "step": 129585 + }, + { + "epoch": 3.1659052598148194, + "grad_norm": 0.0023302987683564425, + "learning_rate": 7.137257107507359e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87342736, + "step": 129590 + }, + { + "epoch": 3.1660274106466666, + "grad_norm": 0.0006107247318141162, + "learning_rate": 7.136440032552437e-07, + "loss": 0.0, + "num_input_tokens_seen": 87345872, + "step": 129595 + }, + { + "epoch": 3.166149561478514, + "grad_norm": 0.0016856311121955514, + "learning_rate": 7.135622978422096e-07, + "loss": 0.0, + "num_input_tokens_seen": 87349392, + "step": 129600 + }, + { + "epoch": 3.166271712310361, + "grad_norm": 0.022728141397237778, + "learning_rate": 7.13480594512227e-07, + "loss": 0.0, + "num_input_tokens_seen": 87352784, + "step": 129605 + }, + { + "epoch": 3.166393863142208, + "grad_norm": 0.001035742461681366, + "learning_rate": 7.13398893265891e-07, + "loss": 0.0, + "num_input_tokens_seen": 87356304, + "step": 129610 + }, + { + "epoch": 3.1665160139740554, + "grad_norm": 0.015423404984176159, + "learning_rate": 7.13317194103795e-07, + "loss": 0.0, + "num_input_tokens_seen": 87359568, + "step": 129615 + }, + { + "epoch": 3.166638164805902, + "grad_norm": 8.489882748108357e-05, + "learning_rate": 7.132354970265333e-07, + "loss": 0.0, + "num_input_tokens_seen": 87363472, + "step": 129620 + }, + { + "epoch": 3.1667603156377493, + "grad_norm": 0.005331840366125107, + "learning_rate": 7.131538020347007e-07, + "loss": 0.0, + "num_input_tokens_seen": 87366608, + "step": 129625 + }, + { + "epoch": 3.1668824664695965, + "grad_norm": 0.0009954558918252587, + "learning_rate": 7.130721091288905e-07, + "loss": 0.0543, + "num_input_tokens_seen": 87369808, + "step": 129630 + }, + { + "epoch": 3.1670046173014437, + "grad_norm": 0.004612566903233528, + "learning_rate": 7.129904183096973e-07, + "loss": 0.0, + "num_input_tokens_seen": 87373584, + "step": 129635 + }, + { + "epoch": 3.167126768133291, + "grad_norm": 0.007426253519952297, + "learning_rate": 7.129087295777148e-07, + "loss": 0.0441, + "num_input_tokens_seen": 87377296, + "step": 129640 + }, + { + "epoch": 3.167248918965138, + "grad_norm": 0.000458728609373793, + "learning_rate": 7.128270429335375e-07, + "loss": 0.1569, + "num_input_tokens_seen": 87380560, + "step": 129645 + }, + { + "epoch": 3.1673710697969852, + "grad_norm": 0.014502973295748234, + "learning_rate": 7.12745358377759e-07, + "loss": 0.0, + "num_input_tokens_seen": 87383888, + "step": 129650 + }, + { + "epoch": 3.1674932206288324, + "grad_norm": 0.0007057562470436096, + "learning_rate": 7.126636759109732e-07, + "loss": 0.0, + "num_input_tokens_seen": 87387152, + "step": 129655 + }, + { + "epoch": 3.1676153714606796, + "grad_norm": 0.004495688248425722, + "learning_rate": 7.12581995533775e-07, + "loss": 0.0, + "num_input_tokens_seen": 87390928, + "step": 129660 + }, + { + "epoch": 3.167737522292527, + "grad_norm": 0.006966794840991497, + "learning_rate": 7.125003172467574e-07, + "loss": 0.0, + "num_input_tokens_seen": 87394384, + "step": 129665 + }, + { + "epoch": 3.167859673124374, + "grad_norm": 0.006203863769769669, + "learning_rate": 7.124186410505153e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87397584, + "step": 129670 + }, + { + "epoch": 3.167981823956221, + "grad_norm": 0.0003877569397445768, + "learning_rate": 7.123369669456417e-07, + "loss": 0.0455, + "num_input_tokens_seen": 87400976, + "step": 129675 + }, + { + "epoch": 3.1681039747880684, + "grad_norm": 0.009295029565691948, + "learning_rate": 7.12255294932731e-07, + "loss": 0.0002, + "num_input_tokens_seen": 87404240, + "step": 129680 + }, + { + "epoch": 3.1682261256199156, + "grad_norm": 0.002795857610180974, + "learning_rate": 7.121736250123777e-07, + "loss": 0.0, + "num_input_tokens_seen": 87407376, + "step": 129685 + }, + { + "epoch": 3.1683482764517628, + "grad_norm": 0.0059104301035404205, + "learning_rate": 7.120919571851749e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87410832, + "step": 129690 + }, + { + "epoch": 3.16847042728361, + "grad_norm": 0.02798088826239109, + "learning_rate": 7.120102914517168e-07, + "loss": 0.0, + "num_input_tokens_seen": 87414416, + "step": 129695 + }, + { + "epoch": 3.168592578115457, + "grad_norm": 0.001736186444759369, + "learning_rate": 7.119286278125976e-07, + "loss": 0.0, + "num_input_tokens_seen": 87417488, + "step": 129700 + }, + { + "epoch": 3.1687147289473043, + "grad_norm": 0.03984348848462105, + "learning_rate": 7.118469662684108e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87420752, + "step": 129705 + }, + { + "epoch": 3.1688368797791515, + "grad_norm": 0.0017900230595842004, + "learning_rate": 7.1176530681975e-07, + "loss": 0.0875, + "num_input_tokens_seen": 87424144, + "step": 129710 + }, + { + "epoch": 3.1689590306109983, + "grad_norm": 0.0016544137615710497, + "learning_rate": 7.116836494672096e-07, + "loss": 0.0002, + "num_input_tokens_seen": 87427792, + "step": 129715 + }, + { + "epoch": 3.1690811814428455, + "grad_norm": 0.00012129753304179758, + "learning_rate": 7.116019942113835e-07, + "loss": 0.0, + "num_input_tokens_seen": 87432336, + "step": 129720 + }, + { + "epoch": 3.1692033322746926, + "grad_norm": 19.250612258911133, + "learning_rate": 7.115203410528649e-07, + "loss": 0.0255, + "num_input_tokens_seen": 87435536, + "step": 129725 + }, + { + "epoch": 3.16932548310654, + "grad_norm": 0.006029466167092323, + "learning_rate": 7.114386899922483e-07, + "loss": 0.0, + "num_input_tokens_seen": 87438800, + "step": 129730 + }, + { + "epoch": 3.169447633938387, + "grad_norm": 0.032365381717681885, + "learning_rate": 7.113570410301268e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87442576, + "step": 129735 + }, + { + "epoch": 3.169569784770234, + "grad_norm": 0.007656916975975037, + "learning_rate": 7.11275394167095e-07, + "loss": 0.0, + "num_input_tokens_seen": 87446288, + "step": 129740 + }, + { + "epoch": 3.1696919356020814, + "grad_norm": 0.09838418662548065, + "learning_rate": 7.111937494037457e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87449552, + "step": 129745 + }, + { + "epoch": 3.1698140864339286, + "grad_norm": 0.004372446797788143, + "learning_rate": 7.111121067406735e-07, + "loss": 0.0, + "num_input_tokens_seen": 87452496, + "step": 129750 + }, + { + "epoch": 3.169936237265776, + "grad_norm": 0.006624558009207249, + "learning_rate": 7.110304661784719e-07, + "loss": 0.0, + "num_input_tokens_seen": 87456272, + "step": 129755 + }, + { + "epoch": 3.170058388097623, + "grad_norm": 0.10447652637958527, + "learning_rate": 7.10948827717734e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87459984, + "step": 129760 + }, + { + "epoch": 3.17018053892947, + "grad_norm": 0.0034557366743683815, + "learning_rate": 7.108671913590543e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87463632, + "step": 129765 + }, + { + "epoch": 3.1703026897613173, + "grad_norm": 0.001495639211498201, + "learning_rate": 7.107855571030259e-07, + "loss": 0.0, + "num_input_tokens_seen": 87467088, + "step": 129770 + }, + { + "epoch": 3.1704248405931645, + "grad_norm": 0.013662347570061684, + "learning_rate": 7.107039249502427e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87470288, + "step": 129775 + }, + { + "epoch": 3.1705469914250117, + "grad_norm": 0.00015722191892564297, + "learning_rate": 7.106222949012988e-07, + "loss": 0.0, + "num_input_tokens_seen": 87473424, + "step": 129780 + }, + { + "epoch": 3.170669142256859, + "grad_norm": 0.012680304236710072, + "learning_rate": 7.105406669567869e-07, + "loss": 0.0087, + "num_input_tokens_seen": 87476688, + "step": 129785 + }, + { + "epoch": 3.170791293088706, + "grad_norm": 0.0014011618914082646, + "learning_rate": 7.104590411173014e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87479952, + "step": 129790 + }, + { + "epoch": 3.1709134439205533, + "grad_norm": 0.03720277547836304, + "learning_rate": 7.103774173834354e-07, + "loss": 0.0, + "num_input_tokens_seen": 87483536, + "step": 129795 + }, + { + "epoch": 3.1710355947524, + "grad_norm": 0.013112449087202549, + "learning_rate": 7.102957957557831e-07, + "loss": 0.0, + "num_input_tokens_seen": 87487184, + "step": 129800 + }, + { + "epoch": 3.1711577455842472, + "grad_norm": 0.001320056733675301, + "learning_rate": 7.102141762349376e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87490768, + "step": 129805 + }, + { + "epoch": 3.1712798964160944, + "grad_norm": 0.00020275999850127846, + "learning_rate": 7.101325588214923e-07, + "loss": 0.0, + "num_input_tokens_seen": 87494096, + "step": 129810 + }, + { + "epoch": 3.1714020472479416, + "grad_norm": 0.012677210383117199, + "learning_rate": 7.100509435160413e-07, + "loss": 0.0, + "num_input_tokens_seen": 87497168, + "step": 129815 + }, + { + "epoch": 3.171524198079789, + "grad_norm": 0.0008101258426904678, + "learning_rate": 7.099693303191775e-07, + "loss": 0.0, + "num_input_tokens_seen": 87500624, + "step": 129820 + }, + { + "epoch": 3.171646348911636, + "grad_norm": 0.0013864908833056688, + "learning_rate": 7.098877192314952e-07, + "loss": 0.0, + "num_input_tokens_seen": 87503952, + "step": 129825 + }, + { + "epoch": 3.171768499743483, + "grad_norm": 0.007696992717683315, + "learning_rate": 7.098061102535868e-07, + "loss": 0.0, + "num_input_tokens_seen": 87507408, + "step": 129830 + }, + { + "epoch": 3.1718906505753304, + "grad_norm": 0.0013759899884462357, + "learning_rate": 7.09724503386047e-07, + "loss": 0.0501, + "num_input_tokens_seen": 87510480, + "step": 129835 + }, + { + "epoch": 3.1720128014071776, + "grad_norm": 0.0023257506545633078, + "learning_rate": 7.096428986294682e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87513936, + "step": 129840 + }, + { + "epoch": 3.1721349522390248, + "grad_norm": 0.0013343783793970942, + "learning_rate": 7.095612959844447e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87517904, + "step": 129845 + }, + { + "epoch": 3.172257103070872, + "grad_norm": 0.028019046410918236, + "learning_rate": 7.094796954515695e-07, + "loss": 0.0003, + "num_input_tokens_seen": 87520976, + "step": 129850 + }, + { + "epoch": 3.172379253902719, + "grad_norm": 0.00030671211425215006, + "learning_rate": 7.093980970314361e-07, + "loss": 0.0003, + "num_input_tokens_seen": 87523984, + "step": 129855 + }, + { + "epoch": 3.1725014047345663, + "grad_norm": 0.007012230344116688, + "learning_rate": 7.09316500724638e-07, + "loss": 0.0251, + "num_input_tokens_seen": 87527120, + "step": 129860 + }, + { + "epoch": 3.1726235555664135, + "grad_norm": 0.0017949631437659264, + "learning_rate": 7.092349065317683e-07, + "loss": 0.0688, + "num_input_tokens_seen": 87530576, + "step": 129865 + }, + { + "epoch": 3.1727457063982607, + "grad_norm": 0.008416776545345783, + "learning_rate": 7.091533144534203e-07, + "loss": 0.0, + "num_input_tokens_seen": 87533968, + "step": 129870 + }, + { + "epoch": 3.172867857230108, + "grad_norm": 0.02502255141735077, + "learning_rate": 7.090717244901883e-07, + "loss": 0.0, + "num_input_tokens_seen": 87537744, + "step": 129875 + }, + { + "epoch": 3.172990008061955, + "grad_norm": 0.0020456870552152395, + "learning_rate": 7.089901366426642e-07, + "loss": 0.0, + "num_input_tokens_seen": 87541200, + "step": 129880 + }, + { + "epoch": 3.1731121588938023, + "grad_norm": 0.014949319884181023, + "learning_rate": 7.089085509114428e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87544272, + "step": 129885 + }, + { + "epoch": 3.1732343097256495, + "grad_norm": 0.005121736787259579, + "learning_rate": 7.088269672971164e-07, + "loss": 0.0, + "num_input_tokens_seen": 87547216, + "step": 129890 + }, + { + "epoch": 3.173356460557496, + "grad_norm": 0.03437983617186546, + "learning_rate": 7.087453858002787e-07, + "loss": 0.0, + "num_input_tokens_seen": 87550608, + "step": 129895 + }, + { + "epoch": 3.1734786113893434, + "grad_norm": 0.005664190277457237, + "learning_rate": 7.086638064215226e-07, + "loss": 0.0, + "num_input_tokens_seen": 87553680, + "step": 129900 + }, + { + "epoch": 3.1736007622211906, + "grad_norm": 0.00024182397464755923, + "learning_rate": 7.085822291614419e-07, + "loss": 0.0, + "num_input_tokens_seen": 87556816, + "step": 129905 + }, + { + "epoch": 3.1737229130530378, + "grad_norm": 0.03858715668320656, + "learning_rate": 7.085006540206298e-07, + "loss": 0.0, + "num_input_tokens_seen": 87560592, + "step": 129910 + }, + { + "epoch": 3.173845063884885, + "grad_norm": 0.0002090064954245463, + "learning_rate": 7.08419080999679e-07, + "loss": 0.0, + "num_input_tokens_seen": 87563856, + "step": 129915 + }, + { + "epoch": 3.173967214716732, + "grad_norm": 0.026446470990777016, + "learning_rate": 7.083375100991835e-07, + "loss": 0.0, + "num_input_tokens_seen": 87567248, + "step": 129920 + }, + { + "epoch": 3.1740893655485793, + "grad_norm": 0.0023130772169679403, + "learning_rate": 7.082559413197356e-07, + "loss": 0.0, + "num_input_tokens_seen": 87570576, + "step": 129925 + }, + { + "epoch": 3.1742115163804265, + "grad_norm": 0.0021503111347556114, + "learning_rate": 7.081743746619289e-07, + "loss": 0.0427, + "num_input_tokens_seen": 87573776, + "step": 129930 + }, + { + "epoch": 3.1743336672122737, + "grad_norm": 0.002262742491438985, + "learning_rate": 7.080928101263571e-07, + "loss": 0.0, + "num_input_tokens_seen": 87576912, + "step": 129935 + }, + { + "epoch": 3.174455818044121, + "grad_norm": 0.00038311193929985166, + "learning_rate": 7.080112477136124e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87580240, + "step": 129940 + }, + { + "epoch": 3.174577968875968, + "grad_norm": 0.0581546351313591, + "learning_rate": 7.07929687424289e-07, + "loss": 0.0, + "num_input_tokens_seen": 87583824, + "step": 129945 + }, + { + "epoch": 3.1747001197078153, + "grad_norm": 0.017676610499620438, + "learning_rate": 7.078481292589791e-07, + "loss": 0.0679, + "num_input_tokens_seen": 87587280, + "step": 129950 + }, + { + "epoch": 3.1748222705396625, + "grad_norm": 0.09328219294548035, + "learning_rate": 7.077665732182765e-07, + "loss": 0.0637, + "num_input_tokens_seen": 87590480, + "step": 129955 + }, + { + "epoch": 3.1749444213715097, + "grad_norm": 0.0017453327309340239, + "learning_rate": 7.076850193027737e-07, + "loss": 0.0388, + "num_input_tokens_seen": 87593936, + "step": 129960 + }, + { + "epoch": 3.175066572203357, + "grad_norm": 0.005118220578879118, + "learning_rate": 7.07603467513064e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87597392, + "step": 129965 + }, + { + "epoch": 3.175188723035204, + "grad_norm": 0.0016354554099962115, + "learning_rate": 7.075219178497409e-07, + "loss": 0.0, + "num_input_tokens_seen": 87601872, + "step": 129970 + }, + { + "epoch": 3.1753108738670512, + "grad_norm": 0.012382320128381252, + "learning_rate": 7.074403703133967e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87605264, + "step": 129975 + }, + { + "epoch": 3.175433024698898, + "grad_norm": 0.00264959828928113, + "learning_rate": 7.073588249046252e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87609168, + "step": 129980 + }, + { + "epoch": 3.175555175530745, + "grad_norm": 0.09764406085014343, + "learning_rate": 7.072772816240184e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87612304, + "step": 129985 + }, + { + "epoch": 3.1756773263625924, + "grad_norm": 0.023879224434494972, + "learning_rate": 7.071957404721707e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87615312, + "step": 129990 + }, + { + "epoch": 3.1757994771944396, + "grad_norm": 0.00015118512965273112, + "learning_rate": 7.071142014496737e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87618960, + "step": 129995 + }, + { + "epoch": 3.1759216280262867, + "grad_norm": 0.0017053249757736921, + "learning_rate": 7.070326645571213e-07, + "loss": 0.0003, + "num_input_tokens_seen": 87622096, + "step": 130000 + }, + { + "epoch": 3.176043778858134, + "grad_norm": 44.842586517333984, + "learning_rate": 7.06951129795106e-07, + "loss": 0.062, + "num_input_tokens_seen": 87625040, + "step": 130005 + }, + { + "epoch": 3.176165929689981, + "grad_norm": 0.005887690931558609, + "learning_rate": 7.068695971642212e-07, + "loss": 0.0, + "num_input_tokens_seen": 87628624, + "step": 130010 + }, + { + "epoch": 3.1762880805218283, + "grad_norm": 0.0014447735156863928, + "learning_rate": 7.067880666650594e-07, + "loss": 0.0835, + "num_input_tokens_seen": 87631888, + "step": 130015 + }, + { + "epoch": 3.1764102313536755, + "grad_norm": 0.0010694863740354776, + "learning_rate": 7.067065382982136e-07, + "loss": 0.0, + "num_input_tokens_seen": 87635472, + "step": 130020 + }, + { + "epoch": 3.1765323821855227, + "grad_norm": 0.006981464568525553, + "learning_rate": 7.066250120642765e-07, + "loss": 0.0, + "num_input_tokens_seen": 87638992, + "step": 130025 + }, + { + "epoch": 3.17665453301737, + "grad_norm": 0.17222914099693298, + "learning_rate": 7.065434879638417e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87642192, + "step": 130030 + }, + { + "epoch": 3.176776683849217, + "grad_norm": 0.0022465409711003304, + "learning_rate": 7.064619659975012e-07, + "loss": 0.0, + "num_input_tokens_seen": 87645392, + "step": 130035 + }, + { + "epoch": 3.1768988346810643, + "grad_norm": 0.04304928332567215, + "learning_rate": 7.063804461658486e-07, + "loss": 0.0, + "num_input_tokens_seen": 87648912, + "step": 130040 + }, + { + "epoch": 3.1770209855129115, + "grad_norm": 0.002591552911326289, + "learning_rate": 7.06298928469476e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87652560, + "step": 130045 + }, + { + "epoch": 3.1771431363447586, + "grad_norm": 0.0006279898807406425, + "learning_rate": 7.06217412908977e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87656144, + "step": 130050 + }, + { + "epoch": 3.177265287176606, + "grad_norm": 0.03918922692537308, + "learning_rate": 7.061358994849434e-07, + "loss": 0.0, + "num_input_tokens_seen": 87659344, + "step": 130055 + }, + { + "epoch": 3.177387438008453, + "grad_norm": 0.015605071559548378, + "learning_rate": 7.06054388197969e-07, + "loss": 0.0, + "num_input_tokens_seen": 87662800, + "step": 130060 + }, + { + "epoch": 3.1775095888402998, + "grad_norm": 0.0005128192133270204, + "learning_rate": 7.059728790486463e-07, + "loss": 0.0162, + "num_input_tokens_seen": 87666128, + "step": 130065 + }, + { + "epoch": 3.177631739672147, + "grad_norm": 0.005295256618410349, + "learning_rate": 7.058913720375674e-07, + "loss": 0.0002, + "num_input_tokens_seen": 87669072, + "step": 130070 + }, + { + "epoch": 3.177753890503994, + "grad_norm": 0.02350236475467682, + "learning_rate": 7.058098671653261e-07, + "loss": 0.0, + "num_input_tokens_seen": 87672208, + "step": 130075 + }, + { + "epoch": 3.1778760413358413, + "grad_norm": 0.0005129770725034177, + "learning_rate": 7.057283644325141e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87676112, + "step": 130080 + }, + { + "epoch": 3.1779981921676885, + "grad_norm": 0.009734749794006348, + "learning_rate": 7.056468638397246e-07, + "loss": 0.0, + "num_input_tokens_seen": 87679376, + "step": 130085 + }, + { + "epoch": 3.1781203429995357, + "grad_norm": 0.0009069366496987641, + "learning_rate": 7.055653653875507e-07, + "loss": 0.0655, + "num_input_tokens_seen": 87682576, + "step": 130090 + }, + { + "epoch": 3.178242493831383, + "grad_norm": 0.002525150077417493, + "learning_rate": 7.054838690765843e-07, + "loss": 0.0, + "num_input_tokens_seen": 87685968, + "step": 130095 + }, + { + "epoch": 3.17836464466323, + "grad_norm": 0.0003225205873604864, + "learning_rate": 7.054023749074188e-07, + "loss": 0.0, + "num_input_tokens_seen": 87689168, + "step": 130100 + }, + { + "epoch": 3.1784867954950773, + "grad_norm": 0.0007413416169583797, + "learning_rate": 7.053208828806459e-07, + "loss": 0.0, + "num_input_tokens_seen": 87692752, + "step": 130105 + }, + { + "epoch": 3.1786089463269245, + "grad_norm": 0.0032514086924493313, + "learning_rate": 7.052393929968593e-07, + "loss": 0.0007, + "num_input_tokens_seen": 87695632, + "step": 130110 + }, + { + "epoch": 3.1787310971587717, + "grad_norm": 0.0037922996561974287, + "learning_rate": 7.05157905256651e-07, + "loss": 0.0002, + "num_input_tokens_seen": 87698768, + "step": 130115 + }, + { + "epoch": 3.178853247990619, + "grad_norm": 0.0006531036924570799, + "learning_rate": 7.050764196606134e-07, + "loss": 0.0, + "num_input_tokens_seen": 87702288, + "step": 130120 + }, + { + "epoch": 3.178975398822466, + "grad_norm": 0.00046883506001904607, + "learning_rate": 7.049949362093399e-07, + "loss": 0.0, + "num_input_tokens_seen": 87705232, + "step": 130125 + }, + { + "epoch": 3.1790975496543132, + "grad_norm": 0.0008409220608882606, + "learning_rate": 7.049134549034222e-07, + "loss": 0.0511, + "num_input_tokens_seen": 87709136, + "step": 130130 + }, + { + "epoch": 3.1792197004861604, + "grad_norm": 0.0014450735179707408, + "learning_rate": 7.048319757434535e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87712400, + "step": 130135 + }, + { + "epoch": 3.1793418513180076, + "grad_norm": 0.0005200757295824587, + "learning_rate": 7.047504987300256e-07, + "loss": 0.0, + "num_input_tokens_seen": 87715600, + "step": 130140 + }, + { + "epoch": 3.179464002149855, + "grad_norm": 0.013696745969355106, + "learning_rate": 7.046690238637321e-07, + "loss": 0.0, + "num_input_tokens_seen": 87719184, + "step": 130145 + }, + { + "epoch": 3.179586152981702, + "grad_norm": 0.0015738914953544736, + "learning_rate": 7.045875511451642e-07, + "loss": 0.0, + "num_input_tokens_seen": 87722512, + "step": 130150 + }, + { + "epoch": 3.179708303813549, + "grad_norm": 0.00783845316618681, + "learning_rate": 7.045060805749156e-07, + "loss": 0.0, + "num_input_tokens_seen": 87725648, + "step": 130155 + }, + { + "epoch": 3.179830454645396, + "grad_norm": 0.0014518832322210073, + "learning_rate": 7.044246121535781e-07, + "loss": 0.0613, + "num_input_tokens_seen": 87728848, + "step": 130160 + }, + { + "epoch": 3.179952605477243, + "grad_norm": 0.15988725423812866, + "learning_rate": 7.043431458817444e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87731920, + "step": 130165 + }, + { + "epoch": 3.1800747563090903, + "grad_norm": 0.0022228602319955826, + "learning_rate": 7.042616817600067e-07, + "loss": 0.0, + "num_input_tokens_seen": 87735184, + "step": 130170 + }, + { + "epoch": 3.1801969071409375, + "grad_norm": 0.029602840542793274, + "learning_rate": 7.041802197889577e-07, + "loss": 0.0366, + "num_input_tokens_seen": 87738704, + "step": 130175 + }, + { + "epoch": 3.1803190579727847, + "grad_norm": 51.33023452758789, + "learning_rate": 7.040987599691895e-07, + "loss": 0.0402, + "num_input_tokens_seen": 87741840, + "step": 130180 + }, + { + "epoch": 3.180441208804632, + "grad_norm": 0.00034280677209608257, + "learning_rate": 7.040173023012952e-07, + "loss": 0.0, + "num_input_tokens_seen": 87745104, + "step": 130185 + }, + { + "epoch": 3.180563359636479, + "grad_norm": 0.017405470833182335, + "learning_rate": 7.039358467858662e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87749008, + "step": 130190 + }, + { + "epoch": 3.1806855104683263, + "grad_norm": 0.008447892032563686, + "learning_rate": 7.038543934234957e-07, + "loss": 0.0, + "num_input_tokens_seen": 87752848, + "step": 130195 + }, + { + "epoch": 3.1808076613001735, + "grad_norm": 0.04451584443449974, + "learning_rate": 7.037729422147754e-07, + "loss": 0.0, + "num_input_tokens_seen": 87756048, + "step": 130200 + }, + { + "epoch": 3.1809298121320206, + "grad_norm": 0.015546695329248905, + "learning_rate": 7.036914931602984e-07, + "loss": 0.0, + "num_input_tokens_seen": 87759504, + "step": 130205 + }, + { + "epoch": 3.181051962963868, + "grad_norm": 0.0012945537455379963, + "learning_rate": 7.03610046260656e-07, + "loss": 0.0003, + "num_input_tokens_seen": 87762576, + "step": 130210 + }, + { + "epoch": 3.181174113795715, + "grad_norm": 0.00038110712193883955, + "learning_rate": 7.035286015164413e-07, + "loss": 0.0, + "num_input_tokens_seen": 87765712, + "step": 130215 + }, + { + "epoch": 3.181296264627562, + "grad_norm": 0.0018213241128250957, + "learning_rate": 7.034471589282467e-07, + "loss": 0.0003, + "num_input_tokens_seen": 87769168, + "step": 130220 + }, + { + "epoch": 3.1814184154594094, + "grad_norm": 13.02833366394043, + "learning_rate": 7.033657184966634e-07, + "loss": 0.0399, + "num_input_tokens_seen": 87772624, + "step": 130225 + }, + { + "epoch": 3.1815405662912566, + "grad_norm": 0.023209894075989723, + "learning_rate": 7.032842802222851e-07, + "loss": 0.0, + "num_input_tokens_seen": 87776080, + "step": 130230 + }, + { + "epoch": 3.181662717123104, + "grad_norm": 0.07949075847864151, + "learning_rate": 7.032028441057028e-07, + "loss": 0.0672, + "num_input_tokens_seen": 87779472, + "step": 130235 + }, + { + "epoch": 3.181784867954951, + "grad_norm": 0.016844136640429497, + "learning_rate": 7.031214101475092e-07, + "loss": 0.0, + "num_input_tokens_seen": 87782800, + "step": 130240 + }, + { + "epoch": 3.1819070187867977, + "grad_norm": 0.0032533081248402596, + "learning_rate": 7.030399783482971e-07, + "loss": 0.0, + "num_input_tokens_seen": 87786064, + "step": 130245 + }, + { + "epoch": 3.182029169618645, + "grad_norm": 0.0273179579526186, + "learning_rate": 7.029585487086576e-07, + "loss": 0.0, + "num_input_tokens_seen": 87789072, + "step": 130250 + }, + { + "epoch": 3.182151320450492, + "grad_norm": 0.000918433303013444, + "learning_rate": 7.028771212291839e-07, + "loss": 0.0, + "num_input_tokens_seen": 87792272, + "step": 130255 + }, + { + "epoch": 3.1822734712823393, + "grad_norm": 0.02920876070857048, + "learning_rate": 7.027956959104673e-07, + "loss": 0.0002, + "num_input_tokens_seen": 87795472, + "step": 130260 + }, + { + "epoch": 3.1823956221141865, + "grad_norm": 6.971739639993757e-05, + "learning_rate": 7.027142727531008e-07, + "loss": 0.0002, + "num_input_tokens_seen": 87798800, + "step": 130265 + }, + { + "epoch": 3.1825177729460337, + "grad_norm": 0.05342421680688858, + "learning_rate": 7.026328517576757e-07, + "loss": 0.0, + "num_input_tokens_seen": 87801872, + "step": 130270 + }, + { + "epoch": 3.182639923777881, + "grad_norm": 0.0004939790815114975, + "learning_rate": 7.025514329247844e-07, + "loss": 0.0004, + "num_input_tokens_seen": 87804880, + "step": 130275 + }, + { + "epoch": 3.182762074609728, + "grad_norm": 0.18766658008098602, + "learning_rate": 7.024700162550194e-07, + "loss": 0.0002, + "num_input_tokens_seen": 87808272, + "step": 130280 + }, + { + "epoch": 3.1828842254415752, + "grad_norm": 0.0023981656413525343, + "learning_rate": 7.023886017489721e-07, + "loss": 0.0, + "num_input_tokens_seen": 87811856, + "step": 130285 + }, + { + "epoch": 3.1830063762734224, + "grad_norm": 0.00666560186073184, + "learning_rate": 7.023071894072354e-07, + "loss": 0.0317, + "num_input_tokens_seen": 87815120, + "step": 130290 + }, + { + "epoch": 3.1831285271052696, + "grad_norm": 0.004666912369430065, + "learning_rate": 7.022257792304005e-07, + "loss": 0.0, + "num_input_tokens_seen": 87818832, + "step": 130295 + }, + { + "epoch": 3.183250677937117, + "grad_norm": 0.005564829334616661, + "learning_rate": 7.021443712190601e-07, + "loss": 0.0005, + "num_input_tokens_seen": 87822544, + "step": 130300 + }, + { + "epoch": 3.183372828768964, + "grad_norm": 0.09069501608610153, + "learning_rate": 7.020629653738056e-07, + "loss": 0.0, + "num_input_tokens_seen": 87826064, + "step": 130305 + }, + { + "epoch": 3.183494979600811, + "grad_norm": 6.888069765409455e-05, + "learning_rate": 7.019815616952295e-07, + "loss": 0.0, + "num_input_tokens_seen": 87829520, + "step": 130310 + }, + { + "epoch": 3.1836171304326584, + "grad_norm": 0.005070709623396397, + "learning_rate": 7.019001601839238e-07, + "loss": 0.0, + "num_input_tokens_seen": 87832976, + "step": 130315 + }, + { + "epoch": 3.1837392812645056, + "grad_norm": 0.00020985525043215603, + "learning_rate": 7.018187608404802e-07, + "loss": 0.0674, + "num_input_tokens_seen": 87836112, + "step": 130320 + }, + { + "epoch": 3.1838614320963528, + "grad_norm": 7.797882426530123e-05, + "learning_rate": 7.017373636654908e-07, + "loss": 0.0703, + "num_input_tokens_seen": 87839056, + "step": 130325 + }, + { + "epoch": 3.1839835829282, + "grad_norm": 0.006128346081823111, + "learning_rate": 7.016559686595475e-07, + "loss": 0.0, + "num_input_tokens_seen": 87842320, + "step": 130330 + }, + { + "epoch": 3.184105733760047, + "grad_norm": 0.15260010957717896, + "learning_rate": 7.015745758232421e-07, + "loss": 0.0, + "num_input_tokens_seen": 87845776, + "step": 130335 + }, + { + "epoch": 3.184227884591894, + "grad_norm": 0.15395669639110565, + "learning_rate": 7.01493185157167e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87848848, + "step": 130340 + }, + { + "epoch": 3.184350035423741, + "grad_norm": 4.5210264943307266e-05, + "learning_rate": 7.014117966619133e-07, + "loss": 0.0, + "num_input_tokens_seen": 87852880, + "step": 130345 + }, + { + "epoch": 3.1844721862555883, + "grad_norm": 0.06586100906133652, + "learning_rate": 7.013304103380738e-07, + "loss": 0.0, + "num_input_tokens_seen": 87855952, + "step": 130350 + }, + { + "epoch": 3.1845943370874354, + "grad_norm": 0.24961978197097778, + "learning_rate": 7.012490261862394e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87859472, + "step": 130355 + }, + { + "epoch": 3.1847164879192826, + "grad_norm": 12.334030151367188, + "learning_rate": 7.011676442070029e-07, + "loss": 0.031, + "num_input_tokens_seen": 87862608, + "step": 130360 + }, + { + "epoch": 3.18483863875113, + "grad_norm": 0.00022853047994431108, + "learning_rate": 7.010862644009553e-07, + "loss": 0.0, + "num_input_tokens_seen": 87866320, + "step": 130365 + }, + { + "epoch": 3.184960789582977, + "grad_norm": 0.06349430978298187, + "learning_rate": 7.010048867686889e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87869712, + "step": 130370 + }, + { + "epoch": 3.185082940414824, + "grad_norm": 0.009467333555221558, + "learning_rate": 7.009235113107956e-07, + "loss": 0.0, + "num_input_tokens_seen": 87873424, + "step": 130375 + }, + { + "epoch": 3.1852050912466714, + "grad_norm": 0.00011473613267298788, + "learning_rate": 7.008421380278666e-07, + "loss": 0.0311, + "num_input_tokens_seen": 87876496, + "step": 130380 + }, + { + "epoch": 3.1853272420785186, + "grad_norm": 0.03573605790734291, + "learning_rate": 7.007607669204944e-07, + "loss": 0.0317, + "num_input_tokens_seen": 87879824, + "step": 130385 + }, + { + "epoch": 3.1854493929103658, + "grad_norm": 0.007226187270134687, + "learning_rate": 7.006793979892702e-07, + "loss": 0.0418, + "num_input_tokens_seen": 87883152, + "step": 130390 + }, + { + "epoch": 3.185571543742213, + "grad_norm": 0.00042921036947518587, + "learning_rate": 7.005980312347856e-07, + "loss": 0.0, + "num_input_tokens_seen": 87886736, + "step": 130395 + }, + { + "epoch": 3.18569369457406, + "grad_norm": 17.04238510131836, + "learning_rate": 7.005166666576333e-07, + "loss": 0.033, + "num_input_tokens_seen": 87890320, + "step": 130400 + }, + { + "epoch": 3.1858158454059073, + "grad_norm": 0.07590338587760925, + "learning_rate": 7.004353042584038e-07, + "loss": 0.0002, + "num_input_tokens_seen": 87894288, + "step": 130405 + }, + { + "epoch": 3.1859379962377545, + "grad_norm": 0.0023550644982606173, + "learning_rate": 7.003539440376898e-07, + "loss": 0.0, + "num_input_tokens_seen": 87897872, + "step": 130410 + }, + { + "epoch": 3.1860601470696017, + "grad_norm": 0.02361527644097805, + "learning_rate": 7.002725859960821e-07, + "loss": 0.0, + "num_input_tokens_seen": 87902224, + "step": 130415 + }, + { + "epoch": 3.186182297901449, + "grad_norm": 0.05243421345949173, + "learning_rate": 7.001912301341732e-07, + "loss": 0.0, + "num_input_tokens_seen": 87905872, + "step": 130420 + }, + { + "epoch": 3.1863044487332957, + "grad_norm": 0.00032037662458606064, + "learning_rate": 7.001098764525542e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87909072, + "step": 130425 + }, + { + "epoch": 3.186426599565143, + "grad_norm": 0.043554555624723434, + "learning_rate": 7.000285249518164e-07, + "loss": 0.0, + "num_input_tokens_seen": 87912336, + "step": 130430 + }, + { + "epoch": 3.18654875039699, + "grad_norm": 0.04560072347521782, + "learning_rate": 6.999471756325523e-07, + "loss": 0.0, + "num_input_tokens_seen": 87915792, + "step": 130435 + }, + { + "epoch": 3.1866709012288372, + "grad_norm": 0.24238801002502441, + "learning_rate": 6.998658284953528e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87919056, + "step": 130440 + }, + { + "epoch": 3.1867930520606844, + "grad_norm": 0.11142058670520782, + "learning_rate": 6.9978448354081e-07, + "loss": 0.0572, + "num_input_tokens_seen": 87922448, + "step": 130445 + }, + { + "epoch": 3.1869152028925316, + "grad_norm": 0.050994254648685455, + "learning_rate": 6.997031407695148e-07, + "loss": 0.0, + "num_input_tokens_seen": 87926032, + "step": 130450 + }, + { + "epoch": 3.187037353724379, + "grad_norm": 0.012314151972532272, + "learning_rate": 6.996218001820596e-07, + "loss": 0.0555, + "num_input_tokens_seen": 87929296, + "step": 130455 + }, + { + "epoch": 3.187159504556226, + "grad_norm": 0.0014616765547543764, + "learning_rate": 6.99540461779035e-07, + "loss": 0.0, + "num_input_tokens_seen": 87932944, + "step": 130460 + }, + { + "epoch": 3.187281655388073, + "grad_norm": 0.006061887834221125, + "learning_rate": 6.994591255610331e-07, + "loss": 0.0, + "num_input_tokens_seen": 87936016, + "step": 130465 + }, + { + "epoch": 3.1874038062199204, + "grad_norm": 0.00010483522055437788, + "learning_rate": 6.993777915286455e-07, + "loss": 0.0, + "num_input_tokens_seen": 87939600, + "step": 130470 + }, + { + "epoch": 3.1875259570517676, + "grad_norm": 0.001160849235020578, + "learning_rate": 6.992964596824633e-07, + "loss": 0.0, + "num_input_tokens_seen": 87942608, + "step": 130475 + }, + { + "epoch": 3.1876481078836147, + "grad_norm": 2.6711461544036865, + "learning_rate": 6.992151300230782e-07, + "loss": 0.0002, + "num_input_tokens_seen": 87946000, + "step": 130480 + }, + { + "epoch": 3.187770258715462, + "grad_norm": 0.000411342567531392, + "learning_rate": 6.991338025510816e-07, + "loss": 0.0, + "num_input_tokens_seen": 87949520, + "step": 130485 + }, + { + "epoch": 3.187892409547309, + "grad_norm": 0.004299001768231392, + "learning_rate": 6.990524772670645e-07, + "loss": 0.0, + "num_input_tokens_seen": 87953104, + "step": 130490 + }, + { + "epoch": 3.1880145603791563, + "grad_norm": 0.0024759077932685614, + "learning_rate": 6.989711541716192e-07, + "loss": 0.0, + "num_input_tokens_seen": 87956624, + "step": 130495 + }, + { + "epoch": 3.1881367112110035, + "grad_norm": 0.002334713703021407, + "learning_rate": 6.988898332653363e-07, + "loss": 0.0, + "num_input_tokens_seen": 87960016, + "step": 130500 + }, + { + "epoch": 3.1882588620428507, + "grad_norm": 0.0018174410797655582, + "learning_rate": 6.988085145488081e-07, + "loss": 0.0, + "num_input_tokens_seen": 87963152, + "step": 130505 + }, + { + "epoch": 3.188381012874698, + "grad_norm": 0.00905153714120388, + "learning_rate": 6.987271980226247e-07, + "loss": 0.0, + "num_input_tokens_seen": 87966544, + "step": 130510 + }, + { + "epoch": 3.188503163706545, + "grad_norm": 0.0009720994858071208, + "learning_rate": 6.986458836873787e-07, + "loss": 0.0, + "num_input_tokens_seen": 87969616, + "step": 130515 + }, + { + "epoch": 3.188625314538392, + "grad_norm": 0.0017524209106341004, + "learning_rate": 6.985645715436605e-07, + "loss": 0.0, + "num_input_tokens_seen": 87973264, + "step": 130520 + }, + { + "epoch": 3.188747465370239, + "grad_norm": 0.0002401109377387911, + "learning_rate": 6.98483261592062e-07, + "loss": 0.0365, + "num_input_tokens_seen": 87976464, + "step": 130525 + }, + { + "epoch": 3.188869616202086, + "grad_norm": 0.5011038184165955, + "learning_rate": 6.984019538331745e-07, + "loss": 0.0001, + "num_input_tokens_seen": 87979600, + "step": 130530 + }, + { + "epoch": 3.1889917670339334, + "grad_norm": 0.014322505332529545, + "learning_rate": 6.983206482675885e-07, + "loss": 0.0, + "num_input_tokens_seen": 87982864, + "step": 130535 + }, + { + "epoch": 3.1891139178657806, + "grad_norm": 0.01264337170869112, + "learning_rate": 6.982393448958965e-07, + "loss": 0.0002, + "num_input_tokens_seen": 87986448, + "step": 130540 + }, + { + "epoch": 3.1892360686976278, + "grad_norm": 0.0003539649769663811, + "learning_rate": 6.981580437186887e-07, + "loss": 0.0, + "num_input_tokens_seen": 87989904, + "step": 130545 + }, + { + "epoch": 3.189358219529475, + "grad_norm": 0.004508215002715588, + "learning_rate": 6.980767447365574e-07, + "loss": 0.0, + "num_input_tokens_seen": 87993168, + "step": 130550 + }, + { + "epoch": 3.189480370361322, + "grad_norm": 0.001551062916405499, + "learning_rate": 6.979954479500924e-07, + "loss": 0.0, + "num_input_tokens_seen": 87996240, + "step": 130555 + }, + { + "epoch": 3.1896025211931693, + "grad_norm": 0.00028249152819626033, + "learning_rate": 6.979141533598861e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88000080, + "step": 130560 + }, + { + "epoch": 3.1897246720250165, + "grad_norm": 0.0010627760784700513, + "learning_rate": 6.978328609665296e-07, + "loss": 0.0, + "num_input_tokens_seen": 88003408, + "step": 130565 + }, + { + "epoch": 3.1898468228568637, + "grad_norm": 0.0011315912706777453, + "learning_rate": 6.977515707706134e-07, + "loss": 0.0498, + "num_input_tokens_seen": 88006480, + "step": 130570 + }, + { + "epoch": 3.189968973688711, + "grad_norm": 0.03361840918660164, + "learning_rate": 6.976702827727294e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88009872, + "step": 130575 + }, + { + "epoch": 3.190091124520558, + "grad_norm": 0.0004706278850790113, + "learning_rate": 6.975889969734684e-07, + "loss": 0.0002, + "num_input_tokens_seen": 88013328, + "step": 130580 + }, + { + "epoch": 3.1902132753524053, + "grad_norm": 0.004012112505733967, + "learning_rate": 6.975077133734213e-07, + "loss": 0.0, + "num_input_tokens_seen": 88017232, + "step": 130585 + }, + { + "epoch": 3.1903354261842525, + "grad_norm": 0.0007510602008551359, + "learning_rate": 6.974264319731797e-07, + "loss": 0.0, + "num_input_tokens_seen": 88020688, + "step": 130590 + }, + { + "epoch": 3.1904575770160997, + "grad_norm": 0.0016171341994777322, + "learning_rate": 6.973451527733343e-07, + "loss": 0.0, + "num_input_tokens_seen": 88023568, + "step": 130595 + }, + { + "epoch": 3.190579727847947, + "grad_norm": 0.0002590622170828283, + "learning_rate": 6.972638757744766e-07, + "loss": 0.0, + "num_input_tokens_seen": 88027024, + "step": 130600 + }, + { + "epoch": 3.1907018786797936, + "grad_norm": 0.0003849154163617641, + "learning_rate": 6.971826009771971e-07, + "loss": 0.0, + "num_input_tokens_seen": 88030544, + "step": 130605 + }, + { + "epoch": 3.190824029511641, + "grad_norm": 0.00034108353429473937, + "learning_rate": 6.971013283820876e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88034064, + "step": 130610 + }, + { + "epoch": 3.190946180343488, + "grad_norm": 0.002630336210131645, + "learning_rate": 6.970200579897382e-07, + "loss": 0.0, + "num_input_tokens_seen": 88037328, + "step": 130615 + }, + { + "epoch": 3.191068331175335, + "grad_norm": 0.0024332611355930567, + "learning_rate": 6.969387898007406e-07, + "loss": 0.0, + "num_input_tokens_seen": 88041040, + "step": 130620 + }, + { + "epoch": 3.1911904820071824, + "grad_norm": 0.00011417076166253537, + "learning_rate": 6.968575238156858e-07, + "loss": 0.0, + "num_input_tokens_seen": 88044624, + "step": 130625 + }, + { + "epoch": 3.1913126328390296, + "grad_norm": 0.0007042978540994227, + "learning_rate": 6.967762600351646e-07, + "loss": 0.0, + "num_input_tokens_seen": 88047952, + "step": 130630 + }, + { + "epoch": 3.1914347836708767, + "grad_norm": 0.00012770794273819774, + "learning_rate": 6.966949984597679e-07, + "loss": 0.0, + "num_input_tokens_seen": 88051216, + "step": 130635 + }, + { + "epoch": 3.191556934502724, + "grad_norm": 0.027314137667417526, + "learning_rate": 6.966137390900868e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88054352, + "step": 130640 + }, + { + "epoch": 3.191679085334571, + "grad_norm": 52.50052261352539, + "learning_rate": 6.96532481926712e-07, + "loss": 0.0467, + "num_input_tokens_seen": 88057872, + "step": 130645 + }, + { + "epoch": 3.1918012361664183, + "grad_norm": 0.00997094251215458, + "learning_rate": 6.96451226970235e-07, + "loss": 0.0, + "num_input_tokens_seen": 88060880, + "step": 130650 + }, + { + "epoch": 3.1919233869982655, + "grad_norm": 0.012899408116936684, + "learning_rate": 6.963699742212459e-07, + "loss": 0.0, + "num_input_tokens_seen": 88064272, + "step": 130655 + }, + { + "epoch": 3.1920455378301127, + "grad_norm": 0.00294029014185071, + "learning_rate": 6.962887236803363e-07, + "loss": 0.0418, + "num_input_tokens_seen": 88067920, + "step": 130660 + }, + { + "epoch": 3.19216768866196, + "grad_norm": 0.0002910926123149693, + "learning_rate": 6.962074753480966e-07, + "loss": 0.0, + "num_input_tokens_seen": 88071504, + "step": 130665 + }, + { + "epoch": 3.192289839493807, + "grad_norm": 0.017607729882001877, + "learning_rate": 6.961262292251182e-07, + "loss": 0.0, + "num_input_tokens_seen": 88074576, + "step": 130670 + }, + { + "epoch": 3.1924119903256543, + "grad_norm": 9.501243039267138e-05, + "learning_rate": 6.96044985311991e-07, + "loss": 0.0, + "num_input_tokens_seen": 88078032, + "step": 130675 + }, + { + "epoch": 3.1925341411575014, + "grad_norm": 0.001667727599851787, + "learning_rate": 6.959637436093069e-07, + "loss": 0.0, + "num_input_tokens_seen": 88081488, + "step": 130680 + }, + { + "epoch": 3.1926562919893486, + "grad_norm": 0.00019331845396663994, + "learning_rate": 6.958825041176564e-07, + "loss": 0.0, + "num_input_tokens_seen": 88084752, + "step": 130685 + }, + { + "epoch": 3.1927784428211954, + "grad_norm": 0.000298037106404081, + "learning_rate": 6.958012668376295e-07, + "loss": 0.0, + "num_input_tokens_seen": 88087952, + "step": 130690 + }, + { + "epoch": 3.1929005936530426, + "grad_norm": 0.0023864801041781902, + "learning_rate": 6.957200317698182e-07, + "loss": 0.0, + "num_input_tokens_seen": 88091280, + "step": 130695 + }, + { + "epoch": 3.1930227444848898, + "grad_norm": 0.13202454149723053, + "learning_rate": 6.956387989148123e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88094800, + "step": 130700 + }, + { + "epoch": 3.193144895316737, + "grad_norm": 0.018603404983878136, + "learning_rate": 6.955575682732032e-07, + "loss": 0.0, + "num_input_tokens_seen": 88097872, + "step": 130705 + }, + { + "epoch": 3.193267046148584, + "grad_norm": 0.00045014871284365654, + "learning_rate": 6.95476339845581e-07, + "loss": 0.0, + "num_input_tokens_seen": 88101648, + "step": 130710 + }, + { + "epoch": 3.1933891969804313, + "grad_norm": 3.039315743080806e-05, + "learning_rate": 6.953951136325367e-07, + "loss": 0.0002, + "num_input_tokens_seen": 88105296, + "step": 130715 + }, + { + "epoch": 3.1935113478122785, + "grad_norm": 0.00015498968423344195, + "learning_rate": 6.953138896346615e-07, + "loss": 0.0, + "num_input_tokens_seen": 88108688, + "step": 130720 + }, + { + "epoch": 3.1936334986441257, + "grad_norm": 21.384185791015625, + "learning_rate": 6.952326678525452e-07, + "loss": 0.0903, + "num_input_tokens_seen": 88112400, + "step": 130725 + }, + { + "epoch": 3.193755649475973, + "grad_norm": 0.01024781446903944, + "learning_rate": 6.951514482867794e-07, + "loss": 0.0, + "num_input_tokens_seen": 88115728, + "step": 130730 + }, + { + "epoch": 3.19387780030782, + "grad_norm": 0.0065758079290390015, + "learning_rate": 6.95070230937954e-07, + "loss": 0.0005, + "num_input_tokens_seen": 88118672, + "step": 130735 + }, + { + "epoch": 3.1939999511396673, + "grad_norm": 0.0006454504327848554, + "learning_rate": 6.949890158066598e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88122064, + "step": 130740 + }, + { + "epoch": 3.1941221019715145, + "grad_norm": 0.005921836942434311, + "learning_rate": 6.949078028934879e-07, + "loss": 0.0, + "num_input_tokens_seen": 88125136, + "step": 130745 + }, + { + "epoch": 3.1942442528033617, + "grad_norm": 0.00011554537923075259, + "learning_rate": 6.94826592199028e-07, + "loss": 0.0002, + "num_input_tokens_seen": 88128208, + "step": 130750 + }, + { + "epoch": 3.194366403635209, + "grad_norm": 0.007586855906993151, + "learning_rate": 6.947453837238716e-07, + "loss": 0.0, + "num_input_tokens_seen": 88131984, + "step": 130755 + }, + { + "epoch": 3.194488554467056, + "grad_norm": 0.0022421926259994507, + "learning_rate": 6.946641774686085e-07, + "loss": 0.1728, + "num_input_tokens_seen": 88135696, + "step": 130760 + }, + { + "epoch": 3.1946107052989032, + "grad_norm": 0.05754800885915756, + "learning_rate": 6.945829734338301e-07, + "loss": 0.0, + "num_input_tokens_seen": 88139088, + "step": 130765 + }, + { + "epoch": 3.1947328561307504, + "grad_norm": 104.725341796875, + "learning_rate": 6.94501771620126e-07, + "loss": 0.0379, + "num_input_tokens_seen": 88142224, + "step": 130770 + }, + { + "epoch": 3.1948550069625976, + "grad_norm": 0.0007363682962022722, + "learning_rate": 6.944205720280875e-07, + "loss": 0.0019, + "num_input_tokens_seen": 88145488, + "step": 130775 + }, + { + "epoch": 3.194977157794445, + "grad_norm": 0.06700855493545532, + "learning_rate": 6.943393746583047e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88148624, + "step": 130780 + }, + { + "epoch": 3.1950993086262915, + "grad_norm": 0.00593652855604887, + "learning_rate": 6.942581795113681e-07, + "loss": 0.0, + "num_input_tokens_seen": 88152336, + "step": 130785 + }, + { + "epoch": 3.1952214594581387, + "grad_norm": 0.0031424618791788816, + "learning_rate": 6.941769865878684e-07, + "loss": 0.0, + "num_input_tokens_seen": 88155600, + "step": 130790 + }, + { + "epoch": 3.195343610289986, + "grad_norm": 0.00047509727301076055, + "learning_rate": 6.940957958883957e-07, + "loss": 0.0003, + "num_input_tokens_seen": 88159248, + "step": 130795 + }, + { + "epoch": 3.195465761121833, + "grad_norm": 0.00025798825663514435, + "learning_rate": 6.940146074135406e-07, + "loss": 0.0683, + "num_input_tokens_seen": 88162832, + "step": 130800 + }, + { + "epoch": 3.1955879119536803, + "grad_norm": 0.0051554907113313675, + "learning_rate": 6.93933421163894e-07, + "loss": 0.0465, + "num_input_tokens_seen": 88166096, + "step": 130805 + }, + { + "epoch": 3.1957100627855275, + "grad_norm": 0.001389906508848071, + "learning_rate": 6.938522371400455e-07, + "loss": 0.0, + "num_input_tokens_seen": 88169552, + "step": 130810 + }, + { + "epoch": 3.1958322136173747, + "grad_norm": 0.01061465684324503, + "learning_rate": 6.937710553425862e-07, + "loss": 0.0, + "num_input_tokens_seen": 88172688, + "step": 130815 + }, + { + "epoch": 3.195954364449222, + "grad_norm": 0.0011689724633470178, + "learning_rate": 6.936898757721059e-07, + "loss": 0.0, + "num_input_tokens_seen": 88175952, + "step": 130820 + }, + { + "epoch": 3.196076515281069, + "grad_norm": 0.01699061505496502, + "learning_rate": 6.936086984291954e-07, + "loss": 0.0, + "num_input_tokens_seen": 88179472, + "step": 130825 + }, + { + "epoch": 3.1961986661129163, + "grad_norm": 0.0058431727811694145, + "learning_rate": 6.935275233144447e-07, + "loss": 0.0407, + "num_input_tokens_seen": 88182928, + "step": 130830 + }, + { + "epoch": 3.1963208169447634, + "grad_norm": 0.0075033195316791534, + "learning_rate": 6.934463504284442e-07, + "loss": 0.0, + "num_input_tokens_seen": 88186064, + "step": 130835 + }, + { + "epoch": 3.1964429677766106, + "grad_norm": 0.002028069458901882, + "learning_rate": 6.933651797717847e-07, + "loss": 0.0002, + "num_input_tokens_seen": 88189072, + "step": 130840 + }, + { + "epoch": 3.196565118608458, + "grad_norm": 0.0006016287952661514, + "learning_rate": 6.932840113450555e-07, + "loss": 0.0, + "num_input_tokens_seen": 88192336, + "step": 130845 + }, + { + "epoch": 3.196687269440305, + "grad_norm": 0.0034821913577616215, + "learning_rate": 6.932028451488481e-07, + "loss": 0.0, + "num_input_tokens_seen": 88195728, + "step": 130850 + }, + { + "epoch": 3.196809420272152, + "grad_norm": 8.157228876370937e-05, + "learning_rate": 6.931216811837515e-07, + "loss": 0.0, + "num_input_tokens_seen": 88199312, + "step": 130855 + }, + { + "epoch": 3.1969315711039994, + "grad_norm": 0.0009059004369191825, + "learning_rate": 6.930405194503571e-07, + "loss": 0.0, + "num_input_tokens_seen": 88202576, + "step": 130860 + }, + { + "epoch": 3.1970537219358466, + "grad_norm": 0.006109096109867096, + "learning_rate": 6.929593599492543e-07, + "loss": 0.0, + "num_input_tokens_seen": 88205648, + "step": 130865 + }, + { + "epoch": 3.1971758727676933, + "grad_norm": 0.0026130101177841425, + "learning_rate": 6.928782026810333e-07, + "loss": 0.0558, + "num_input_tokens_seen": 88208784, + "step": 130870 + }, + { + "epoch": 3.1972980235995405, + "grad_norm": 2.651002250786405e-05, + "learning_rate": 6.927970476462853e-07, + "loss": 0.0, + "num_input_tokens_seen": 88211920, + "step": 130875 + }, + { + "epoch": 3.1974201744313877, + "grad_norm": 0.016481151804327965, + "learning_rate": 6.927158948455994e-07, + "loss": 0.0, + "num_input_tokens_seen": 88215376, + "step": 130880 + }, + { + "epoch": 3.197542325263235, + "grad_norm": 0.15974406898021698, + "learning_rate": 6.926347442795664e-07, + "loss": 0.0006, + "num_input_tokens_seen": 88219152, + "step": 130885 + }, + { + "epoch": 3.197664476095082, + "grad_norm": 0.08364619314670563, + "learning_rate": 6.925535959487761e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88222992, + "step": 130890 + }, + { + "epoch": 3.1977866269269293, + "grad_norm": 0.0003432470257394016, + "learning_rate": 6.924724498538186e-07, + "loss": 0.0, + "num_input_tokens_seen": 88226320, + "step": 130895 + }, + { + "epoch": 3.1979087777587765, + "grad_norm": 0.0074369474314153194, + "learning_rate": 6.923913059952844e-07, + "loss": 0.0, + "num_input_tokens_seen": 88230032, + "step": 130900 + }, + { + "epoch": 3.1980309285906237, + "grad_norm": 0.0005242819897830486, + "learning_rate": 6.92310164373763e-07, + "loss": 0.0, + "num_input_tokens_seen": 88232976, + "step": 130905 + }, + { + "epoch": 3.198153079422471, + "grad_norm": 0.004553093109279871, + "learning_rate": 6.922290249898454e-07, + "loss": 0.0, + "num_input_tokens_seen": 88236432, + "step": 130910 + }, + { + "epoch": 3.198275230254318, + "grad_norm": 0.0007548279245384037, + "learning_rate": 6.921478878441206e-07, + "loss": 0.0, + "num_input_tokens_seen": 88240336, + "step": 130915 + }, + { + "epoch": 3.1983973810861652, + "grad_norm": 3.5369539546081796e-05, + "learning_rate": 6.920667529371795e-07, + "loss": 0.0, + "num_input_tokens_seen": 88243536, + "step": 130920 + }, + { + "epoch": 3.1985195319180124, + "grad_norm": 0.005292957182973623, + "learning_rate": 6.919856202696115e-07, + "loss": 0.0, + "num_input_tokens_seen": 88247440, + "step": 130925 + }, + { + "epoch": 3.1986416827498596, + "grad_norm": 0.0005814823089167476, + "learning_rate": 6.919044898420072e-07, + "loss": 0.0, + "num_input_tokens_seen": 88251472, + "step": 130930 + }, + { + "epoch": 3.198763833581707, + "grad_norm": 0.01645071804523468, + "learning_rate": 6.918233616549561e-07, + "loss": 0.0, + "num_input_tokens_seen": 88254864, + "step": 130935 + }, + { + "epoch": 3.198885984413554, + "grad_norm": 0.0033709302078932524, + "learning_rate": 6.917422357090485e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88258128, + "step": 130940 + }, + { + "epoch": 3.199008135245401, + "grad_norm": 0.04572080448269844, + "learning_rate": 6.916611120048743e-07, + "loss": 0.0, + "num_input_tokens_seen": 88261584, + "step": 130945 + }, + { + "epoch": 3.1991302860772484, + "grad_norm": 0.022833306342363358, + "learning_rate": 6.915799905430233e-07, + "loss": 0.0, + "num_input_tokens_seen": 88264976, + "step": 130950 + }, + { + "epoch": 3.1992524369090956, + "grad_norm": 0.0016878378810361028, + "learning_rate": 6.914988713240855e-07, + "loss": 0.0, + "num_input_tokens_seen": 88268048, + "step": 130955 + }, + { + "epoch": 3.1993745877409427, + "grad_norm": 0.07270152121782303, + "learning_rate": 6.914177543486512e-07, + "loss": 0.0, + "num_input_tokens_seen": 88271568, + "step": 130960 + }, + { + "epoch": 3.1994967385727895, + "grad_norm": 0.0013511275174096227, + "learning_rate": 6.913366396173097e-07, + "loss": 0.0555, + "num_input_tokens_seen": 88275088, + "step": 130965 + }, + { + "epoch": 3.1996188894046367, + "grad_norm": 0.002923275576904416, + "learning_rate": 6.912555271306515e-07, + "loss": 0.0, + "num_input_tokens_seen": 88278352, + "step": 130970 + }, + { + "epoch": 3.199741040236484, + "grad_norm": 0.00026840600185096264, + "learning_rate": 6.911744168892657e-07, + "loss": 0.0, + "num_input_tokens_seen": 88281680, + "step": 130975 + }, + { + "epoch": 3.199863191068331, + "grad_norm": 0.0009354413487017155, + "learning_rate": 6.910933088937432e-07, + "loss": 0.0619, + "num_input_tokens_seen": 88284752, + "step": 130980 + }, + { + "epoch": 3.1999853419001782, + "grad_norm": 0.0007869818946346641, + "learning_rate": 6.910122031446726e-07, + "loss": 0.0, + "num_input_tokens_seen": 88287760, + "step": 130985 + }, + { + "epoch": 3.2001074927320254, + "grad_norm": 0.00020354626758489758, + "learning_rate": 6.909310996426447e-07, + "loss": 0.0, + "num_input_tokens_seen": 88291024, + "step": 130990 + }, + { + "epoch": 3.2002296435638726, + "grad_norm": 0.0014506744919344783, + "learning_rate": 6.908499983882492e-07, + "loss": 0.0, + "num_input_tokens_seen": 88294672, + "step": 130995 + }, + { + "epoch": 3.20035179439572, + "grad_norm": 0.0020283234771341085, + "learning_rate": 6.907688993820752e-07, + "loss": 0.0, + "num_input_tokens_seen": 88297936, + "step": 131000 + }, + { + "epoch": 3.200473945227567, + "grad_norm": 0.013011399656534195, + "learning_rate": 6.906878026247134e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88301072, + "step": 131005 + }, + { + "epoch": 3.200596096059414, + "grad_norm": 0.011319443583488464, + "learning_rate": 6.906067081167526e-07, + "loss": 0.0, + "num_input_tokens_seen": 88304336, + "step": 131010 + }, + { + "epoch": 3.2007182468912614, + "grad_norm": 0.04771203547716141, + "learning_rate": 6.905256158587836e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88307664, + "step": 131015 + }, + { + "epoch": 3.2008403977231086, + "grad_norm": 5.148741245269775, + "learning_rate": 6.904445258513949e-07, + "loss": 0.0003, + "num_input_tokens_seen": 88311056, + "step": 131020 + }, + { + "epoch": 3.2009625485549558, + "grad_norm": 93.61454772949219, + "learning_rate": 6.903634380951771e-07, + "loss": 0.0625, + "num_input_tokens_seen": 88314320, + "step": 131025 + }, + { + "epoch": 3.201084699386803, + "grad_norm": 0.0014936138177290559, + "learning_rate": 6.9028235259072e-07, + "loss": 0.0, + "num_input_tokens_seen": 88317712, + "step": 131030 + }, + { + "epoch": 3.20120685021865, + "grad_norm": 0.0007606488652527332, + "learning_rate": 6.902012693386123e-07, + "loss": 0.0, + "num_input_tokens_seen": 88321104, + "step": 131035 + }, + { + "epoch": 3.2013290010504973, + "grad_norm": 0.003329877508804202, + "learning_rate": 6.90120188339445e-07, + "loss": 0.0, + "num_input_tokens_seen": 88324368, + "step": 131040 + }, + { + "epoch": 3.2014511518823445, + "grad_norm": 0.0009622600628063083, + "learning_rate": 6.900391095938068e-07, + "loss": 0.0, + "num_input_tokens_seen": 88328208, + "step": 131045 + }, + { + "epoch": 3.2015733027141913, + "grad_norm": 0.001263272948563099, + "learning_rate": 6.899580331022873e-07, + "loss": 0.0, + "num_input_tokens_seen": 88331152, + "step": 131050 + }, + { + "epoch": 3.2016954535460385, + "grad_norm": 0.018948759883642197, + "learning_rate": 6.898769588654767e-07, + "loss": 0.0, + "num_input_tokens_seen": 88333904, + "step": 131055 + }, + { + "epoch": 3.2018176043778857, + "grad_norm": 0.014238434843719006, + "learning_rate": 6.89795886883964e-07, + "loss": 0.0, + "num_input_tokens_seen": 88336784, + "step": 131060 + }, + { + "epoch": 3.201939755209733, + "grad_norm": 0.34706351161003113, + "learning_rate": 6.897148171583394e-07, + "loss": 0.034, + "num_input_tokens_seen": 88340112, + "step": 131065 + }, + { + "epoch": 3.20206190604158, + "grad_norm": 0.0014285926008597016, + "learning_rate": 6.896337496891917e-07, + "loss": 0.0, + "num_input_tokens_seen": 88343312, + "step": 131070 + }, + { + "epoch": 3.202184056873427, + "grad_norm": 0.008070665411651134, + "learning_rate": 6.895526844771112e-07, + "loss": 0.0, + "num_input_tokens_seen": 88346896, + "step": 131075 + }, + { + "epoch": 3.2023062077052744, + "grad_norm": 0.0005574771203100681, + "learning_rate": 6.894716215226868e-07, + "loss": 0.0, + "num_input_tokens_seen": 88350480, + "step": 131080 + }, + { + "epoch": 3.2024283585371216, + "grad_norm": 0.000559286039788276, + "learning_rate": 6.893905608265084e-07, + "loss": 0.0, + "num_input_tokens_seen": 88354256, + "step": 131085 + }, + { + "epoch": 3.202550509368969, + "grad_norm": 0.0003937402507290244, + "learning_rate": 6.893095023891653e-07, + "loss": 0.0, + "num_input_tokens_seen": 88357584, + "step": 131090 + }, + { + "epoch": 3.202672660200816, + "grad_norm": 0.09179575741291046, + "learning_rate": 6.892284462112472e-07, + "loss": 0.0, + "num_input_tokens_seen": 88360976, + "step": 131095 + }, + { + "epoch": 3.202794811032663, + "grad_norm": 0.0010902261128649116, + "learning_rate": 6.891473922933435e-07, + "loss": 0.0, + "num_input_tokens_seen": 88364368, + "step": 131100 + }, + { + "epoch": 3.2029169618645104, + "grad_norm": 0.0002610184019431472, + "learning_rate": 6.890663406360434e-07, + "loss": 0.0, + "num_input_tokens_seen": 88367696, + "step": 131105 + }, + { + "epoch": 3.2030391126963575, + "grad_norm": 0.0027821410913020372, + "learning_rate": 6.889852912399363e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88370896, + "step": 131110 + }, + { + "epoch": 3.2031612635282047, + "grad_norm": 0.0007937629707157612, + "learning_rate": 6.889042441056123e-07, + "loss": 0.0, + "num_input_tokens_seen": 88374544, + "step": 131115 + }, + { + "epoch": 3.203283414360052, + "grad_norm": 0.0003279813681729138, + "learning_rate": 6.888231992336599e-07, + "loss": 0.0, + "num_input_tokens_seen": 88378128, + "step": 131120 + }, + { + "epoch": 3.203405565191899, + "grad_norm": 0.00046456785639747977, + "learning_rate": 6.887421566246694e-07, + "loss": 0.0305, + "num_input_tokens_seen": 88381392, + "step": 131125 + }, + { + "epoch": 3.2035277160237463, + "grad_norm": 0.08387045562267303, + "learning_rate": 6.886611162792291e-07, + "loss": 0.0, + "num_input_tokens_seen": 88384592, + "step": 131130 + }, + { + "epoch": 3.203649866855593, + "grad_norm": 0.01048725750297308, + "learning_rate": 6.885800781979294e-07, + "loss": 0.0, + "num_input_tokens_seen": 88387984, + "step": 131135 + }, + { + "epoch": 3.2037720176874402, + "grad_norm": 0.0010767376516014338, + "learning_rate": 6.884990423813586e-07, + "loss": 0.0, + "num_input_tokens_seen": 88391632, + "step": 131140 + }, + { + "epoch": 3.2038941685192874, + "grad_norm": 0.002124819438904524, + "learning_rate": 6.884180088301068e-07, + "loss": 0.0, + "num_input_tokens_seen": 88394576, + "step": 131145 + }, + { + "epoch": 3.2040163193511346, + "grad_norm": 0.002192183630540967, + "learning_rate": 6.883369775447633e-07, + "loss": 0.0616, + "num_input_tokens_seen": 88397840, + "step": 131150 + }, + { + "epoch": 3.204138470182982, + "grad_norm": 0.0002685803337953985, + "learning_rate": 6.882559485259167e-07, + "loss": 0.0, + "num_input_tokens_seen": 88401296, + "step": 131155 + }, + { + "epoch": 3.204260621014829, + "grad_norm": 0.001198447891511023, + "learning_rate": 6.88174921774157e-07, + "loss": 0.0, + "num_input_tokens_seen": 88404880, + "step": 131160 + }, + { + "epoch": 3.204382771846676, + "grad_norm": 0.001283994410187006, + "learning_rate": 6.880938972900729e-07, + "loss": 0.0332, + "num_input_tokens_seen": 88408656, + "step": 131165 + }, + { + "epoch": 3.2045049226785234, + "grad_norm": 0.003258587559685111, + "learning_rate": 6.880128750742542e-07, + "loss": 0.0, + "num_input_tokens_seen": 88412432, + "step": 131170 + }, + { + "epoch": 3.2046270735103706, + "grad_norm": 0.004086503759026527, + "learning_rate": 6.879318551272894e-07, + "loss": 0.0, + "num_input_tokens_seen": 88415440, + "step": 131175 + }, + { + "epoch": 3.2047492243422178, + "grad_norm": 0.00019456451991572976, + "learning_rate": 6.87850837449768e-07, + "loss": 0.0, + "num_input_tokens_seen": 88418960, + "step": 131180 + }, + { + "epoch": 3.204871375174065, + "grad_norm": 0.003474778262898326, + "learning_rate": 6.877698220422799e-07, + "loss": 0.0546, + "num_input_tokens_seen": 88422352, + "step": 131185 + }, + { + "epoch": 3.204993526005912, + "grad_norm": 0.012960633262991905, + "learning_rate": 6.87688808905413e-07, + "loss": 0.0, + "num_input_tokens_seen": 88425424, + "step": 131190 + }, + { + "epoch": 3.2051156768377593, + "grad_norm": 0.02482810989022255, + "learning_rate": 6.876077980397575e-07, + "loss": 0.0, + "num_input_tokens_seen": 88428816, + "step": 131195 + }, + { + "epoch": 3.2052378276696065, + "grad_norm": 0.05032927915453911, + "learning_rate": 6.87526789445902e-07, + "loss": 0.0002, + "num_input_tokens_seen": 88432208, + "step": 131200 + }, + { + "epoch": 3.2053599785014537, + "grad_norm": 0.11770392954349518, + "learning_rate": 6.874457831244355e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88435920, + "step": 131205 + }, + { + "epoch": 3.205482129333301, + "grad_norm": 0.03212438151240349, + "learning_rate": 6.873647790759478e-07, + "loss": 0.0, + "num_input_tokens_seen": 88440080, + "step": 131210 + }, + { + "epoch": 3.205604280165148, + "grad_norm": 0.0017605361063033342, + "learning_rate": 6.87283777301027e-07, + "loss": 0.0, + "num_input_tokens_seen": 88443536, + "step": 131215 + }, + { + "epoch": 3.2057264309969953, + "grad_norm": 0.0023195864632725716, + "learning_rate": 6.872027778002632e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88447056, + "step": 131220 + }, + { + "epoch": 3.2058485818288425, + "grad_norm": 0.3160790205001831, + "learning_rate": 6.871217805742444e-07, + "loss": 0.0002, + "num_input_tokens_seen": 88450640, + "step": 131225 + }, + { + "epoch": 3.205970732660689, + "grad_norm": 0.13889338076114655, + "learning_rate": 6.870407856235608e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88454352, + "step": 131230 + }, + { + "epoch": 3.2060928834925364, + "grad_norm": 0.0015086831990629435, + "learning_rate": 6.869597929488e-07, + "loss": 0.0, + "num_input_tokens_seen": 88458192, + "step": 131235 + }, + { + "epoch": 3.2062150343243836, + "grad_norm": 0.017581794410943985, + "learning_rate": 6.868788025505523e-07, + "loss": 0.0, + "num_input_tokens_seen": 88461648, + "step": 131240 + }, + { + "epoch": 3.206337185156231, + "grad_norm": 0.002975418232381344, + "learning_rate": 6.86797814429406e-07, + "loss": 0.0, + "num_input_tokens_seen": 88465616, + "step": 131245 + }, + { + "epoch": 3.206459335988078, + "grad_norm": 0.0019408295629546046, + "learning_rate": 6.867168285859504e-07, + "loss": 0.0, + "num_input_tokens_seen": 88469136, + "step": 131250 + }, + { + "epoch": 3.206581486819925, + "grad_norm": 0.0003184815577697009, + "learning_rate": 6.866358450207741e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88473040, + "step": 131255 + }, + { + "epoch": 3.2067036376517724, + "grad_norm": 0.004589362069964409, + "learning_rate": 6.865548637344664e-07, + "loss": 0.0, + "num_input_tokens_seen": 88475984, + "step": 131260 + }, + { + "epoch": 3.2068257884836195, + "grad_norm": 3.0053229331970215, + "learning_rate": 6.864738847276162e-07, + "loss": 0.0, + "num_input_tokens_seen": 88479568, + "step": 131265 + }, + { + "epoch": 3.2069479393154667, + "grad_norm": 21.975263595581055, + "learning_rate": 6.863929080008118e-07, + "loss": 0.0501, + "num_input_tokens_seen": 88482640, + "step": 131270 + }, + { + "epoch": 3.207070090147314, + "grad_norm": 0.0006934599950909615, + "learning_rate": 6.863119335546423e-07, + "loss": 0.0, + "num_input_tokens_seen": 88486032, + "step": 131275 + }, + { + "epoch": 3.207192240979161, + "grad_norm": 0.006196063477545977, + "learning_rate": 6.862309613896975e-07, + "loss": 0.0, + "num_input_tokens_seen": 88490000, + "step": 131280 + }, + { + "epoch": 3.2073143918110083, + "grad_norm": 0.000710879685357213, + "learning_rate": 6.86149991506565e-07, + "loss": 0.0, + "num_input_tokens_seen": 88493264, + "step": 131285 + }, + { + "epoch": 3.2074365426428555, + "grad_norm": 0.0015197633765637875, + "learning_rate": 6.860690239058347e-07, + "loss": 0.0, + "num_input_tokens_seen": 88496976, + "step": 131290 + }, + { + "epoch": 3.2075586934747027, + "grad_norm": 0.005204588640481234, + "learning_rate": 6.859880585880944e-07, + "loss": 0.0553, + "num_input_tokens_seen": 88500432, + "step": 131295 + }, + { + "epoch": 3.20768084430655, + "grad_norm": 0.0007456667954102159, + "learning_rate": 6.859070955539337e-07, + "loss": 0.0, + "num_input_tokens_seen": 88503952, + "step": 131300 + }, + { + "epoch": 3.207802995138397, + "grad_norm": 0.00033635873114690185, + "learning_rate": 6.858261348039411e-07, + "loss": 0.0377, + "num_input_tokens_seen": 88507280, + "step": 131305 + }, + { + "epoch": 3.2079251459702443, + "grad_norm": 0.003113040467724204, + "learning_rate": 6.85745176338705e-07, + "loss": 0.0, + "num_input_tokens_seen": 88510672, + "step": 131310 + }, + { + "epoch": 3.208047296802091, + "grad_norm": 0.00037906321813352406, + "learning_rate": 6.856642201588149e-07, + "loss": 0.0, + "num_input_tokens_seen": 88514576, + "step": 131315 + }, + { + "epoch": 3.208169447633938, + "grad_norm": 0.0035938401706516743, + "learning_rate": 6.855832662648589e-07, + "loss": 0.0, + "num_input_tokens_seen": 88518224, + "step": 131320 + }, + { + "epoch": 3.2082915984657854, + "grad_norm": 0.002629627473652363, + "learning_rate": 6.855023146574262e-07, + "loss": 0.0, + "num_input_tokens_seen": 88521872, + "step": 131325 + }, + { + "epoch": 3.2084137492976326, + "grad_norm": 67.88191986083984, + "learning_rate": 6.854213653371049e-07, + "loss": 0.0526, + "num_input_tokens_seen": 88525264, + "step": 131330 + }, + { + "epoch": 3.2085359001294798, + "grad_norm": 0.011571920476853848, + "learning_rate": 6.85340418304484e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88528272, + "step": 131335 + }, + { + "epoch": 3.208658050961327, + "grad_norm": 0.0011708816746249795, + "learning_rate": 6.852594735601527e-07, + "loss": 0.0, + "num_input_tokens_seen": 88531728, + "step": 131340 + }, + { + "epoch": 3.208780201793174, + "grad_norm": 0.0015514298574998975, + "learning_rate": 6.851785311046987e-07, + "loss": 0.0154, + "num_input_tokens_seen": 88534992, + "step": 131345 + }, + { + "epoch": 3.2089023526250213, + "grad_norm": 0.03095664456486702, + "learning_rate": 6.850975909387115e-07, + "loss": 0.0543, + "num_input_tokens_seen": 88538384, + "step": 131350 + }, + { + "epoch": 3.2090245034568685, + "grad_norm": 0.2210804671049118, + "learning_rate": 6.850166530627791e-07, + "loss": 0.0552, + "num_input_tokens_seen": 88541392, + "step": 131355 + }, + { + "epoch": 3.2091466542887157, + "grad_norm": 1.6713513135910034, + "learning_rate": 6.849357174774901e-07, + "loss": 0.0504, + "num_input_tokens_seen": 88544656, + "step": 131360 + }, + { + "epoch": 3.209268805120563, + "grad_norm": 0.00557379936799407, + "learning_rate": 6.848547841834338e-07, + "loss": 0.0004, + "num_input_tokens_seen": 88548176, + "step": 131365 + }, + { + "epoch": 3.20939095595241, + "grad_norm": 0.004929456394165754, + "learning_rate": 6.847738531811978e-07, + "loss": 0.0, + "num_input_tokens_seen": 88551312, + "step": 131370 + }, + { + "epoch": 3.2095131067842573, + "grad_norm": 0.22072072327136993, + "learning_rate": 6.846929244713717e-07, + "loss": 0.0, + "num_input_tokens_seen": 88554832, + "step": 131375 + }, + { + "epoch": 3.2096352576161045, + "grad_norm": 0.002108390908688307, + "learning_rate": 6.846119980545429e-07, + "loss": 0.0, + "num_input_tokens_seen": 88557712, + "step": 131380 + }, + { + "epoch": 3.2097574084479517, + "grad_norm": 0.004526549018919468, + "learning_rate": 6.845310739313008e-07, + "loss": 0.0, + "num_input_tokens_seen": 88560912, + "step": 131385 + }, + { + "epoch": 3.209879559279799, + "grad_norm": 0.0006033982499502599, + "learning_rate": 6.844501521022333e-07, + "loss": 0.0003, + "num_input_tokens_seen": 88564176, + "step": 131390 + }, + { + "epoch": 3.210001710111646, + "grad_norm": 0.0009200237691402435, + "learning_rate": 6.843692325679293e-07, + "loss": 0.0052, + "num_input_tokens_seen": 88567440, + "step": 131395 + }, + { + "epoch": 3.2101238609434932, + "grad_norm": 0.00029823146178387105, + "learning_rate": 6.842883153289771e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88571152, + "step": 131400 + }, + { + "epoch": 3.2102460117753404, + "grad_norm": 0.004292737692594528, + "learning_rate": 6.842074003859654e-07, + "loss": 0.0, + "num_input_tokens_seen": 88574800, + "step": 131405 + }, + { + "epoch": 3.210368162607187, + "grad_norm": 0.016947563737630844, + "learning_rate": 6.841264877394822e-07, + "loss": 0.0, + "num_input_tokens_seen": 88578640, + "step": 131410 + }, + { + "epoch": 3.2104903134390343, + "grad_norm": 0.17108048498630524, + "learning_rate": 6.840455773901163e-07, + "loss": 0.0, + "num_input_tokens_seen": 88582032, + "step": 131415 + }, + { + "epoch": 3.2106124642708815, + "grad_norm": 0.00048168751527555287, + "learning_rate": 6.839646693384559e-07, + "loss": 0.0, + "num_input_tokens_seen": 88585488, + "step": 131420 + }, + { + "epoch": 3.2107346151027287, + "grad_norm": 0.0014778217300772667, + "learning_rate": 6.838837635850894e-07, + "loss": 0.0, + "num_input_tokens_seen": 88588688, + "step": 131425 + }, + { + "epoch": 3.210856765934576, + "grad_norm": 3.849087443086319e-05, + "learning_rate": 6.83802860130605e-07, + "loss": 0.0, + "num_input_tokens_seen": 88592400, + "step": 131430 + }, + { + "epoch": 3.210978916766423, + "grad_norm": 0.00019800262816715986, + "learning_rate": 6.837219589755916e-07, + "loss": 0.1358, + "num_input_tokens_seen": 88595536, + "step": 131435 + }, + { + "epoch": 3.2111010675982703, + "grad_norm": 0.00025983096566051245, + "learning_rate": 6.836410601206368e-07, + "loss": 0.0, + "num_input_tokens_seen": 88598800, + "step": 131440 + }, + { + "epoch": 3.2112232184301175, + "grad_norm": 0.0001302832388319075, + "learning_rate": 6.835601635663297e-07, + "loss": 0.0003, + "num_input_tokens_seen": 88602128, + "step": 131445 + }, + { + "epoch": 3.2113453692619647, + "grad_norm": 0.16828882694244385, + "learning_rate": 6.834792693132578e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88605456, + "step": 131450 + }, + { + "epoch": 3.211467520093812, + "grad_norm": 0.1079067662358284, + "learning_rate": 6.8339837736201e-07, + "loss": 0.0, + "num_input_tokens_seen": 88608656, + "step": 131455 + }, + { + "epoch": 3.211589670925659, + "grad_norm": 0.0008097724057734013, + "learning_rate": 6.833174877131746e-07, + "loss": 0.0002, + "num_input_tokens_seen": 88611728, + "step": 131460 + }, + { + "epoch": 3.2117118217575062, + "grad_norm": 0.00048446975415572524, + "learning_rate": 6.832366003673392e-07, + "loss": 0.1079, + "num_input_tokens_seen": 88615056, + "step": 131465 + }, + { + "epoch": 3.2118339725893534, + "grad_norm": 0.0007245682063512504, + "learning_rate": 6.831557153250929e-07, + "loss": 0.0, + "num_input_tokens_seen": 88618128, + "step": 131470 + }, + { + "epoch": 3.2119561234212006, + "grad_norm": 0.0002163596946047619, + "learning_rate": 6.830748325870229e-07, + "loss": 0.0006, + "num_input_tokens_seen": 88621136, + "step": 131475 + }, + { + "epoch": 3.212078274253048, + "grad_norm": 34.87563705444336, + "learning_rate": 6.829939521537184e-07, + "loss": 0.0466, + "num_input_tokens_seen": 88624784, + "step": 131480 + }, + { + "epoch": 3.212200425084895, + "grad_norm": 34.09080123901367, + "learning_rate": 6.829130740257668e-07, + "loss": 0.0501, + "num_input_tokens_seen": 88627792, + "step": 131485 + }, + { + "epoch": 3.212322575916742, + "grad_norm": 0.01155630499124527, + "learning_rate": 6.828321982037566e-07, + "loss": 0.0589, + "num_input_tokens_seen": 88631376, + "step": 131490 + }, + { + "epoch": 3.212444726748589, + "grad_norm": 0.004634066019207239, + "learning_rate": 6.827513246882763e-07, + "loss": 0.0, + "num_input_tokens_seen": 88634576, + "step": 131495 + }, + { + "epoch": 3.212566877580436, + "grad_norm": 0.00048246190999634564, + "learning_rate": 6.826704534799135e-07, + "loss": 0.0917, + "num_input_tokens_seen": 88637584, + "step": 131500 + }, + { + "epoch": 3.2126890284122833, + "grad_norm": 0.009547368623316288, + "learning_rate": 6.825895845792567e-07, + "loss": 0.0, + "num_input_tokens_seen": 88641296, + "step": 131505 + }, + { + "epoch": 3.2128111792441305, + "grad_norm": 0.030916303396224976, + "learning_rate": 6.825087179868935e-07, + "loss": 0.0, + "num_input_tokens_seen": 88644624, + "step": 131510 + }, + { + "epoch": 3.2129333300759777, + "grad_norm": 0.012111157178878784, + "learning_rate": 6.824278537034125e-07, + "loss": 0.0, + "num_input_tokens_seen": 88647696, + "step": 131515 + }, + { + "epoch": 3.213055480907825, + "grad_norm": 0.009429936297237873, + "learning_rate": 6.823469917294017e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88651088, + "step": 131520 + }, + { + "epoch": 3.213177631739672, + "grad_norm": 0.0030681893695145845, + "learning_rate": 6.822661320654486e-07, + "loss": 0.0, + "num_input_tokens_seen": 88654160, + "step": 131525 + }, + { + "epoch": 3.2132997825715193, + "grad_norm": 0.0012531710090115666, + "learning_rate": 6.821852747121422e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88658000, + "step": 131530 + }, + { + "epoch": 3.2134219334033665, + "grad_norm": 0.003988673444837332, + "learning_rate": 6.821044196700694e-07, + "loss": 0.0002, + "num_input_tokens_seen": 88661392, + "step": 131535 + }, + { + "epoch": 3.2135440842352136, + "grad_norm": 0.0003183768130838871, + "learning_rate": 6.820235669398192e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88665488, + "step": 131540 + }, + { + "epoch": 3.213666235067061, + "grad_norm": 0.0008758779149502516, + "learning_rate": 6.819427165219789e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88668880, + "step": 131545 + }, + { + "epoch": 3.213788385898908, + "grad_norm": 0.0005794035969302058, + "learning_rate": 6.818618684171367e-07, + "loss": 0.0, + "num_input_tokens_seen": 88672528, + "step": 131550 + }, + { + "epoch": 3.213910536730755, + "grad_norm": 0.00545384269207716, + "learning_rate": 6.817810226258806e-07, + "loss": 0.0348, + "num_input_tokens_seen": 88675536, + "step": 131555 + }, + { + "epoch": 3.2140326875626024, + "grad_norm": 0.0026415474712848663, + "learning_rate": 6.817001791487986e-07, + "loss": 0.0, + "num_input_tokens_seen": 88678800, + "step": 131560 + }, + { + "epoch": 3.2141548383944496, + "grad_norm": 0.024035947397351265, + "learning_rate": 6.816193379864785e-07, + "loss": 0.0525, + "num_input_tokens_seen": 88682320, + "step": 131565 + }, + { + "epoch": 3.214276989226297, + "grad_norm": 0.0018660565838217735, + "learning_rate": 6.815384991395081e-07, + "loss": 0.0, + "num_input_tokens_seen": 88686096, + "step": 131570 + }, + { + "epoch": 3.214399140058144, + "grad_norm": 0.0012469434877857566, + "learning_rate": 6.814576626084757e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88689296, + "step": 131575 + }, + { + "epoch": 3.214521290889991, + "grad_norm": 0.0005144423921592534, + "learning_rate": 6.813768283939683e-07, + "loss": 0.0, + "num_input_tokens_seen": 88692368, + "step": 131580 + }, + { + "epoch": 3.2146434417218384, + "grad_norm": 0.007941600866615772, + "learning_rate": 6.812959964965746e-07, + "loss": 0.0, + "num_input_tokens_seen": 88695632, + "step": 131585 + }, + { + "epoch": 3.214765592553685, + "grad_norm": 0.002308615716174245, + "learning_rate": 6.812151669168821e-07, + "loss": 0.0, + "num_input_tokens_seen": 88699088, + "step": 131590 + }, + { + "epoch": 3.2148877433855323, + "grad_norm": 0.0004510280559770763, + "learning_rate": 6.811343396554786e-07, + "loss": 0.0838, + "num_input_tokens_seen": 88702096, + "step": 131595 + }, + { + "epoch": 3.2150098942173795, + "grad_norm": 0.4567067325115204, + "learning_rate": 6.810535147129524e-07, + "loss": 0.0002, + "num_input_tokens_seen": 88704848, + "step": 131600 + }, + { + "epoch": 3.2151320450492267, + "grad_norm": 0.00045220693573355675, + "learning_rate": 6.809726920898902e-07, + "loss": 0.0513, + "num_input_tokens_seen": 88708432, + "step": 131605 + }, + { + "epoch": 3.215254195881074, + "grad_norm": 0.003778433194383979, + "learning_rate": 6.808918717868805e-07, + "loss": 0.0002, + "num_input_tokens_seen": 88711696, + "step": 131610 + }, + { + "epoch": 3.215376346712921, + "grad_norm": 0.01336110569536686, + "learning_rate": 6.808110538045114e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88714960, + "step": 131615 + }, + { + "epoch": 3.2154984975447682, + "grad_norm": 0.008328008465468884, + "learning_rate": 6.807302381433697e-07, + "loss": 0.0, + "num_input_tokens_seen": 88718672, + "step": 131620 + }, + { + "epoch": 3.2156206483766154, + "grad_norm": 0.0011965453159064054, + "learning_rate": 6.806494248040439e-07, + "loss": 0.0688, + "num_input_tokens_seen": 88722128, + "step": 131625 + }, + { + "epoch": 3.2157427992084626, + "grad_norm": 0.0028489800170063972, + "learning_rate": 6.805686137871211e-07, + "loss": 0.0371, + "num_input_tokens_seen": 88725200, + "step": 131630 + }, + { + "epoch": 3.21586495004031, + "grad_norm": 0.0015178770991042256, + "learning_rate": 6.804878050931895e-07, + "loss": 0.0002, + "num_input_tokens_seen": 88728144, + "step": 131635 + }, + { + "epoch": 3.215987100872157, + "grad_norm": 0.025252878665924072, + "learning_rate": 6.804069987228363e-07, + "loss": 0.0002, + "num_input_tokens_seen": 88731536, + "step": 131640 + }, + { + "epoch": 3.216109251704004, + "grad_norm": 0.0018437153194099665, + "learning_rate": 6.803261946766492e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88734864, + "step": 131645 + }, + { + "epoch": 3.2162314025358514, + "grad_norm": 0.0007309846114367247, + "learning_rate": 6.802453929552165e-07, + "loss": 0.0551, + "num_input_tokens_seen": 88738256, + "step": 131650 + }, + { + "epoch": 3.2163535533676986, + "grad_norm": 0.04808569699525833, + "learning_rate": 6.801645935591249e-07, + "loss": 0.0355, + "num_input_tokens_seen": 88741392, + "step": 131655 + }, + { + "epoch": 3.2164757041995458, + "grad_norm": 0.001859356532804668, + "learning_rate": 6.800837964889627e-07, + "loss": 0.0002, + "num_input_tokens_seen": 88744976, + "step": 131660 + }, + { + "epoch": 3.216597855031393, + "grad_norm": 0.012184222228825092, + "learning_rate": 6.800030017453171e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88748304, + "step": 131665 + }, + { + "epoch": 3.21672000586324, + "grad_norm": 0.0017921392573043704, + "learning_rate": 6.799222093287756e-07, + "loss": 0.0, + "num_input_tokens_seen": 88751504, + "step": 131670 + }, + { + "epoch": 3.216842156695087, + "grad_norm": 0.0525604784488678, + "learning_rate": 6.79841419239926e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88754256, + "step": 131675 + }, + { + "epoch": 3.216964307526934, + "grad_norm": 0.008412518538534641, + "learning_rate": 6.797606314793556e-07, + "loss": 0.0, + "num_input_tokens_seen": 88757264, + "step": 131680 + }, + { + "epoch": 3.2170864583587813, + "grad_norm": 0.006365551147609949, + "learning_rate": 6.796798460476523e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88760400, + "step": 131685 + }, + { + "epoch": 3.2172086091906285, + "grad_norm": 0.039969027042388916, + "learning_rate": 6.795990629454029e-07, + "loss": 0.0004, + "num_input_tokens_seen": 88763920, + "step": 131690 + }, + { + "epoch": 3.2173307600224756, + "grad_norm": 0.00221992377191782, + "learning_rate": 6.795182821731957e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88767056, + "step": 131695 + }, + { + "epoch": 3.217452910854323, + "grad_norm": 0.002486801240593195, + "learning_rate": 6.794375037316173e-07, + "loss": 0.0, + "num_input_tokens_seen": 88770064, + "step": 131700 + }, + { + "epoch": 3.21757506168617, + "grad_norm": 0.003136269748210907, + "learning_rate": 6.793567276212557e-07, + "loss": 0.0, + "num_input_tokens_seen": 88773136, + "step": 131705 + }, + { + "epoch": 3.217697212518017, + "grad_norm": 0.0005143572925589979, + "learning_rate": 6.792759538426987e-07, + "loss": 0.0, + "num_input_tokens_seen": 88776208, + "step": 131710 + }, + { + "epoch": 3.2178193633498644, + "grad_norm": 0.00440189940854907, + "learning_rate": 6.791951823965327e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88779664, + "step": 131715 + }, + { + "epoch": 3.2179415141817116, + "grad_norm": 0.000603349064476788, + "learning_rate": 6.791144132833459e-07, + "loss": 0.0, + "num_input_tokens_seen": 88783056, + "step": 131720 + }, + { + "epoch": 3.218063665013559, + "grad_norm": 0.0004256120591890067, + "learning_rate": 6.790336465037254e-07, + "loss": 0.0, + "num_input_tokens_seen": 88786128, + "step": 131725 + }, + { + "epoch": 3.218185815845406, + "grad_norm": 0.00017496588407084346, + "learning_rate": 6.789528820582587e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88789456, + "step": 131730 + }, + { + "epoch": 3.218307966677253, + "grad_norm": 0.04625757038593292, + "learning_rate": 6.788721199475325e-07, + "loss": 0.0, + "num_input_tokens_seen": 88793168, + "step": 131735 + }, + { + "epoch": 3.2184301175091004, + "grad_norm": 0.011211957782506943, + "learning_rate": 6.787913601721346e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88796368, + "step": 131740 + }, + { + "epoch": 3.2185522683409475, + "grad_norm": 0.00312986271455884, + "learning_rate": 6.787106027326529e-07, + "loss": 0.0, + "num_input_tokens_seen": 88799824, + "step": 131745 + }, + { + "epoch": 3.2186744191727947, + "grad_norm": 0.020161407068371773, + "learning_rate": 6.786298476296737e-07, + "loss": 0.0, + "num_input_tokens_seen": 88803280, + "step": 131750 + }, + { + "epoch": 3.218796570004642, + "grad_norm": 0.00044350314419716597, + "learning_rate": 6.785490948637849e-07, + "loss": 0.0595, + "num_input_tokens_seen": 88806928, + "step": 131755 + }, + { + "epoch": 3.2189187208364887, + "grad_norm": 0.0013805481139570475, + "learning_rate": 6.784683444355732e-07, + "loss": 0.105, + "num_input_tokens_seen": 88810192, + "step": 131760 + }, + { + "epoch": 3.219040871668336, + "grad_norm": 0.01782987453043461, + "learning_rate": 6.783875963456264e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88813456, + "step": 131765 + }, + { + "epoch": 3.219163022500183, + "grad_norm": 0.0005228363443166018, + "learning_rate": 6.783068505945319e-07, + "loss": 0.0, + "num_input_tokens_seen": 88817360, + "step": 131770 + }, + { + "epoch": 3.2192851733320302, + "grad_norm": 0.12143167853355408, + "learning_rate": 6.782261071828759e-07, + "loss": 0.0382, + "num_input_tokens_seen": 88820560, + "step": 131775 + }, + { + "epoch": 3.2194073241638774, + "grad_norm": 0.09187977761030197, + "learning_rate": 6.781453661112466e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88823824, + "step": 131780 + }, + { + "epoch": 3.2195294749957246, + "grad_norm": 0.004339349456131458, + "learning_rate": 6.780646273802304e-07, + "loss": 0.0, + "num_input_tokens_seen": 88827408, + "step": 131785 + }, + { + "epoch": 3.219651625827572, + "grad_norm": 0.0006389893242157996, + "learning_rate": 6.779838909904152e-07, + "loss": 0.0002, + "num_input_tokens_seen": 88830480, + "step": 131790 + }, + { + "epoch": 3.219773776659419, + "grad_norm": 0.0051666791550815105, + "learning_rate": 6.779031569423874e-07, + "loss": 0.0, + "num_input_tokens_seen": 88834000, + "step": 131795 + }, + { + "epoch": 3.219895927491266, + "grad_norm": 0.004778078757226467, + "learning_rate": 6.778224252367344e-07, + "loss": 0.0002, + "num_input_tokens_seen": 88837072, + "step": 131800 + }, + { + "epoch": 3.2200180783231134, + "grad_norm": 0.008272204548120499, + "learning_rate": 6.777416958740438e-07, + "loss": 0.0, + "num_input_tokens_seen": 88840272, + "step": 131805 + }, + { + "epoch": 3.2201402291549606, + "grad_norm": 0.008195118978619576, + "learning_rate": 6.77660968854902e-07, + "loss": 0.0, + "num_input_tokens_seen": 88843664, + "step": 131810 + }, + { + "epoch": 3.2202623799868078, + "grad_norm": 0.016739945858716965, + "learning_rate": 6.775802441798966e-07, + "loss": 0.0, + "num_input_tokens_seen": 88847056, + "step": 131815 + }, + { + "epoch": 3.220384530818655, + "grad_norm": 0.010644571855664253, + "learning_rate": 6.774995218496142e-07, + "loss": 0.0, + "num_input_tokens_seen": 88850384, + "step": 131820 + }, + { + "epoch": 3.220506681650502, + "grad_norm": 0.00225659366697073, + "learning_rate": 6.774188018646421e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88853392, + "step": 131825 + }, + { + "epoch": 3.2206288324823493, + "grad_norm": 0.0010362331522628665, + "learning_rate": 6.773380842255671e-07, + "loss": 0.0, + "num_input_tokens_seen": 88856976, + "step": 131830 + }, + { + "epoch": 3.2207509833141965, + "grad_norm": 0.03388144075870514, + "learning_rate": 6.772573689329763e-07, + "loss": 0.0, + "num_input_tokens_seen": 88860496, + "step": 131835 + }, + { + "epoch": 3.2208731341460437, + "grad_norm": 0.014827468432486057, + "learning_rate": 6.77176655987457e-07, + "loss": 0.0, + "num_input_tokens_seen": 88864336, + "step": 131840 + }, + { + "epoch": 3.220995284977891, + "grad_norm": 0.009838799946010113, + "learning_rate": 6.770959453895955e-07, + "loss": 0.0, + "num_input_tokens_seen": 88867856, + "step": 131845 + }, + { + "epoch": 3.221117435809738, + "grad_norm": 0.005385571159422398, + "learning_rate": 6.770152371399795e-07, + "loss": 0.0, + "num_input_tokens_seen": 88871632, + "step": 131850 + }, + { + "epoch": 3.221239586641585, + "grad_norm": 0.09350186586380005, + "learning_rate": 6.769345312391952e-07, + "loss": 0.0, + "num_input_tokens_seen": 88874960, + "step": 131855 + }, + { + "epoch": 3.221361737473432, + "grad_norm": 0.0004697141412179917, + "learning_rate": 6.7685382768783e-07, + "loss": 0.0, + "num_input_tokens_seen": 88878416, + "step": 131860 + }, + { + "epoch": 3.221483888305279, + "grad_norm": 35.65409851074219, + "learning_rate": 6.767731264864709e-07, + "loss": 0.0563, + "num_input_tokens_seen": 88881744, + "step": 131865 + }, + { + "epoch": 3.2216060391371264, + "grad_norm": 0.0021280786022543907, + "learning_rate": 6.766924276357044e-07, + "loss": 0.0, + "num_input_tokens_seen": 88884944, + "step": 131870 + }, + { + "epoch": 3.2217281899689736, + "grad_norm": 0.001438329927623272, + "learning_rate": 6.766117311361177e-07, + "loss": 0.0, + "num_input_tokens_seen": 88888016, + "step": 131875 + }, + { + "epoch": 3.2218503408008208, + "grad_norm": 0.0007603879203088582, + "learning_rate": 6.765310369882973e-07, + "loss": 0.0, + "num_input_tokens_seen": 88891728, + "step": 131880 + }, + { + "epoch": 3.221972491632668, + "grad_norm": 0.0012879845453426242, + "learning_rate": 6.764503451928305e-07, + "loss": 0.0, + "num_input_tokens_seen": 88894736, + "step": 131885 + }, + { + "epoch": 3.222094642464515, + "grad_norm": 0.001136791193857789, + "learning_rate": 6.763696557503034e-07, + "loss": 0.0181, + "num_input_tokens_seen": 88898064, + "step": 131890 + }, + { + "epoch": 3.2222167932963623, + "grad_norm": 0.083034448325634, + "learning_rate": 6.762889686613032e-07, + "loss": 0.0378, + "num_input_tokens_seen": 88901456, + "step": 131895 + }, + { + "epoch": 3.2223389441282095, + "grad_norm": 0.0010081586660817266, + "learning_rate": 6.762082839264169e-07, + "loss": 0.0, + "num_input_tokens_seen": 88904912, + "step": 131900 + }, + { + "epoch": 3.2224610949600567, + "grad_norm": 0.0007607207517139614, + "learning_rate": 6.761276015462309e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88907984, + "step": 131905 + }, + { + "epoch": 3.222583245791904, + "grad_norm": 0.0006288554286584258, + "learning_rate": 6.760469215213324e-07, + "loss": 0.0, + "num_input_tokens_seen": 88911632, + "step": 131910 + }, + { + "epoch": 3.222705396623751, + "grad_norm": 0.004640562925487757, + "learning_rate": 6.759662438523074e-07, + "loss": 0.0317, + "num_input_tokens_seen": 88915152, + "step": 131915 + }, + { + "epoch": 3.2228275474555983, + "grad_norm": 0.002924085594713688, + "learning_rate": 6.758855685397432e-07, + "loss": 0.0, + "num_input_tokens_seen": 88918416, + "step": 131920 + }, + { + "epoch": 3.2229496982874455, + "grad_norm": 0.0007594149792566895, + "learning_rate": 6.758048955842266e-07, + "loss": 0.0, + "num_input_tokens_seen": 88921488, + "step": 131925 + }, + { + "epoch": 3.2230718491192927, + "grad_norm": 219.20193481445312, + "learning_rate": 6.757242249863435e-07, + "loss": 0.0663, + "num_input_tokens_seen": 88924944, + "step": 131930 + }, + { + "epoch": 3.22319399995114, + "grad_norm": 0.006914704106748104, + "learning_rate": 6.756435567466816e-07, + "loss": 0.0397, + "num_input_tokens_seen": 88928144, + "step": 131935 + }, + { + "epoch": 3.2233161507829866, + "grad_norm": 0.003912906628102064, + "learning_rate": 6.755628908658265e-07, + "loss": 0.0, + "num_input_tokens_seen": 88931472, + "step": 131940 + }, + { + "epoch": 3.223438301614834, + "grad_norm": 0.0022785658948123455, + "learning_rate": 6.754822273443657e-07, + "loss": 0.0, + "num_input_tokens_seen": 88935248, + "step": 131945 + }, + { + "epoch": 3.223560452446681, + "grad_norm": 0.0023476288188248873, + "learning_rate": 6.754015661828851e-07, + "loss": 0.0, + "num_input_tokens_seen": 88938576, + "step": 131950 + }, + { + "epoch": 3.223682603278528, + "grad_norm": 0.06342912465333939, + "learning_rate": 6.753209073819717e-07, + "loss": 0.0, + "num_input_tokens_seen": 88942032, + "step": 131955 + }, + { + "epoch": 3.2238047541103754, + "grad_norm": 0.0013723451411351562, + "learning_rate": 6.752402509422121e-07, + "loss": 0.0, + "num_input_tokens_seen": 88945104, + "step": 131960 + }, + { + "epoch": 3.2239269049422226, + "grad_norm": 0.002334070857614279, + "learning_rate": 6.751595968641925e-07, + "loss": 0.0, + "num_input_tokens_seen": 88949072, + "step": 131965 + }, + { + "epoch": 3.2240490557740697, + "grad_norm": 0.005523113068193197, + "learning_rate": 6.750789451485002e-07, + "loss": 0.0728, + "num_input_tokens_seen": 88952976, + "step": 131970 + }, + { + "epoch": 3.224171206605917, + "grad_norm": 0.08320626616477966, + "learning_rate": 6.74998295795721e-07, + "loss": 0.0, + "num_input_tokens_seen": 88956304, + "step": 131975 + }, + { + "epoch": 3.224293357437764, + "grad_norm": 0.0005697169108316302, + "learning_rate": 6.749176488064414e-07, + "loss": 0.0, + "num_input_tokens_seen": 88959696, + "step": 131980 + }, + { + "epoch": 3.2244155082696113, + "grad_norm": 0.002113824477419257, + "learning_rate": 6.748370041812482e-07, + "loss": 0.0, + "num_input_tokens_seen": 88963088, + "step": 131985 + }, + { + "epoch": 3.2245376591014585, + "grad_norm": 0.0005294289439916611, + "learning_rate": 6.747563619207276e-07, + "loss": 0.0006, + "num_input_tokens_seen": 88966288, + "step": 131990 + }, + { + "epoch": 3.2246598099333057, + "grad_norm": 0.003573896363377571, + "learning_rate": 6.746757220254667e-07, + "loss": 0.0, + "num_input_tokens_seen": 88970256, + "step": 131995 + }, + { + "epoch": 3.224781960765153, + "grad_norm": 0.0011918267700821161, + "learning_rate": 6.745950844960509e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88973520, + "step": 132000 + }, + { + "epoch": 3.224904111597, + "grad_norm": 0.033025145530700684, + "learning_rate": 6.745144493330676e-07, + "loss": 0.0, + "num_input_tokens_seen": 88977104, + "step": 132005 + }, + { + "epoch": 3.2250262624288473, + "grad_norm": 0.000612385047134012, + "learning_rate": 6.744338165371023e-07, + "loss": 0.0002, + "num_input_tokens_seen": 88980560, + "step": 132010 + }, + { + "epoch": 3.2251484132606945, + "grad_norm": 0.001855800743214786, + "learning_rate": 6.74353186108742e-07, + "loss": 0.0, + "num_input_tokens_seen": 88983696, + "step": 132015 + }, + { + "epoch": 3.2252705640925416, + "grad_norm": 0.01201531384140253, + "learning_rate": 6.742725580485732e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88986704, + "step": 132020 + }, + { + "epoch": 3.225392714924389, + "grad_norm": 0.002281307941302657, + "learning_rate": 6.741919323571818e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88989584, + "step": 132025 + }, + { + "epoch": 3.225514865756236, + "grad_norm": 0.008396490477025509, + "learning_rate": 6.741113090351544e-07, + "loss": 0.0636, + "num_input_tokens_seen": 88992976, + "step": 132030 + }, + { + "epoch": 3.2256370165880828, + "grad_norm": 0.28072237968444824, + "learning_rate": 6.740306880830771e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88996368, + "step": 132035 + }, + { + "epoch": 3.22575916741993, + "grad_norm": 0.46522748470306396, + "learning_rate": 6.739500695015365e-07, + "loss": 0.0001, + "num_input_tokens_seen": 88999568, + "step": 132040 + }, + { + "epoch": 3.225881318251777, + "grad_norm": 2.098423480987549, + "learning_rate": 6.738694532911184e-07, + "loss": 0.0004, + "num_input_tokens_seen": 89002896, + "step": 132045 + }, + { + "epoch": 3.2260034690836243, + "grad_norm": 0.030650269240140915, + "learning_rate": 6.737888394524092e-07, + "loss": 0.0001, + "num_input_tokens_seen": 89006480, + "step": 132050 + }, + { + "epoch": 3.2261256199154715, + "grad_norm": 0.007767067290842533, + "learning_rate": 6.737082279859958e-07, + "loss": 0.0002, + "num_input_tokens_seen": 89009936, + "step": 132055 + }, + { + "epoch": 3.2262477707473187, + "grad_norm": 0.019312912598252296, + "learning_rate": 6.736276188924636e-07, + "loss": 0.0, + "num_input_tokens_seen": 89013136, + "step": 132060 + }, + { + "epoch": 3.226369921579166, + "grad_norm": 0.0337306447327137, + "learning_rate": 6.735470121723994e-07, + "loss": 0.0, + "num_input_tokens_seen": 89016528, + "step": 132065 + }, + { + "epoch": 3.226492072411013, + "grad_norm": 0.00048753563896752894, + "learning_rate": 6.734664078263887e-07, + "loss": 0.0, + "num_input_tokens_seen": 89020368, + "step": 132070 + }, + { + "epoch": 3.2266142232428603, + "grad_norm": 0.0010278359986841679, + "learning_rate": 6.733858058550185e-07, + "loss": 0.0, + "num_input_tokens_seen": 89023568, + "step": 132075 + }, + { + "epoch": 3.2267363740747075, + "grad_norm": 0.00571131706237793, + "learning_rate": 6.733052062588747e-07, + "loss": 0.0001, + "num_input_tokens_seen": 89026960, + "step": 132080 + }, + { + "epoch": 3.2268585249065547, + "grad_norm": 0.0007071031141094863, + "learning_rate": 6.732246090385428e-07, + "loss": 0.0001, + "num_input_tokens_seen": 89030480, + "step": 132085 + }, + { + "epoch": 3.226980675738402, + "grad_norm": 0.0007928020786494017, + "learning_rate": 6.7314401419461e-07, + "loss": 0.0, + "num_input_tokens_seen": 89034064, + "step": 132090 + }, + { + "epoch": 3.227102826570249, + "grad_norm": 0.012956708669662476, + "learning_rate": 6.730634217276614e-07, + "loss": 0.0, + "num_input_tokens_seen": 89037584, + "step": 132095 + }, + { + "epoch": 3.2272249774020962, + "grad_norm": 0.016126209869980812, + "learning_rate": 6.729828316382837e-07, + "loss": 0.0, + "num_input_tokens_seen": 89041040, + "step": 132100 + }, + { + "epoch": 3.2273471282339434, + "grad_norm": 0.0004620710969902575, + "learning_rate": 6.729022439270627e-07, + "loss": 0.0, + "num_input_tokens_seen": 89044048, + "step": 132105 + }, + { + "epoch": 3.2274692790657906, + "grad_norm": 0.012120189145207405, + "learning_rate": 6.728216585945845e-07, + "loss": 0.0, + "num_input_tokens_seen": 89047376, + "step": 132110 + }, + { + "epoch": 3.227591429897638, + "grad_norm": 0.019521376118063927, + "learning_rate": 6.727410756414356e-07, + "loss": 0.0, + "num_input_tokens_seen": 89050768, + "step": 132115 + }, + { + "epoch": 3.2277135807294846, + "grad_norm": 0.0017054117051884532, + "learning_rate": 6.726604950682011e-07, + "loss": 0.0, + "num_input_tokens_seen": 89053968, + "step": 132120 + }, + { + "epoch": 3.2278357315613317, + "grad_norm": 0.2144937962293625, + "learning_rate": 6.72579916875468e-07, + "loss": 0.0001, + "num_input_tokens_seen": 89057488, + "step": 132125 + }, + { + "epoch": 3.227957882393179, + "grad_norm": 0.00031233998015522957, + "learning_rate": 6.724993410638216e-07, + "loss": 0.0, + "num_input_tokens_seen": 89060880, + "step": 132130 + }, + { + "epoch": 3.228080033225026, + "grad_norm": 0.0021750705782324076, + "learning_rate": 6.724187676338481e-07, + "loss": 0.0, + "num_input_tokens_seen": 89064016, + "step": 132135 + }, + { + "epoch": 3.2282021840568733, + "grad_norm": 0.00044330034870654345, + "learning_rate": 6.723381965861334e-07, + "loss": 0.0, + "num_input_tokens_seen": 89067216, + "step": 132140 + }, + { + "epoch": 3.2283243348887205, + "grad_norm": 6.203007797012106e-05, + "learning_rate": 6.722576279212632e-07, + "loss": 0.0, + "num_input_tokens_seen": 89070544, + "step": 132145 + }, + { + "epoch": 3.2284464857205677, + "grad_norm": 0.002357953228056431, + "learning_rate": 6.721770616398242e-07, + "loss": 0.0, + "num_input_tokens_seen": 89074320, + "step": 132150 + }, + { + "epoch": 3.228568636552415, + "grad_norm": 0.0007738954154774547, + "learning_rate": 6.720964977424013e-07, + "loss": 0.0, + "num_input_tokens_seen": 89077776, + "step": 132155 + }, + { + "epoch": 3.228690787384262, + "grad_norm": 0.001500914222560823, + "learning_rate": 6.720159362295814e-07, + "loss": 0.0, + "num_input_tokens_seen": 89080912, + "step": 132160 + }, + { + "epoch": 3.2288129382161093, + "grad_norm": 0.0029234839603304863, + "learning_rate": 6.719353771019494e-07, + "loss": 0.0, + "num_input_tokens_seen": 89084560, + "step": 132165 + }, + { + "epoch": 3.2289350890479565, + "grad_norm": 0.0030644559301435947, + "learning_rate": 6.718548203600915e-07, + "loss": 0.0023, + "num_input_tokens_seen": 89087888, + "step": 132170 + }, + { + "epoch": 3.2290572398798036, + "grad_norm": 0.00027050048811361194, + "learning_rate": 6.71774266004594e-07, + "loss": 0.0488, + "num_input_tokens_seen": 89091024, + "step": 132175 + }, + { + "epoch": 3.229179390711651, + "grad_norm": 0.0002629015943966806, + "learning_rate": 6.716937140360421e-07, + "loss": 0.0, + "num_input_tokens_seen": 89094160, + "step": 132180 + }, + { + "epoch": 3.229301541543498, + "grad_norm": 0.0009435914689674973, + "learning_rate": 6.716131644550218e-07, + "loss": 0.0875, + "num_input_tokens_seen": 89097552, + "step": 132185 + }, + { + "epoch": 3.229423692375345, + "grad_norm": 0.021992305293679237, + "learning_rate": 6.71532617262119e-07, + "loss": 0.0, + "num_input_tokens_seen": 89100816, + "step": 132190 + }, + { + "epoch": 3.2295458432071924, + "grad_norm": 0.00021161261247470975, + "learning_rate": 6.714520724579196e-07, + "loss": 0.0002, + "num_input_tokens_seen": 89103888, + "step": 132195 + }, + { + "epoch": 3.2296679940390396, + "grad_norm": 0.002651083981618285, + "learning_rate": 6.713715300430085e-07, + "loss": 0.0, + "num_input_tokens_seen": 89107024, + "step": 132200 + }, + { + "epoch": 3.2297901448708863, + "grad_norm": 0.0004908027476631105, + "learning_rate": 6.712909900179722e-07, + "loss": 0.0, + "num_input_tokens_seen": 89110288, + "step": 132205 + }, + { + "epoch": 3.229912295702734, + "grad_norm": 0.0019102852093055844, + "learning_rate": 6.712104523833965e-07, + "loss": 0.0, + "num_input_tokens_seen": 89113552, + "step": 132210 + }, + { + "epoch": 3.2300344465345807, + "grad_norm": 23.559524536132812, + "learning_rate": 6.711299171398664e-07, + "loss": 0.0579, + "num_input_tokens_seen": 89117136, + "step": 132215 + }, + { + "epoch": 3.230156597366428, + "grad_norm": 0.053920187056064606, + "learning_rate": 6.710493842879685e-07, + "loss": 0.0, + "num_input_tokens_seen": 89120016, + "step": 132220 + }, + { + "epoch": 3.230278748198275, + "grad_norm": 0.001154446741566062, + "learning_rate": 6.709688538282876e-07, + "loss": 0.0001, + "num_input_tokens_seen": 89122896, + "step": 132225 + }, + { + "epoch": 3.2304008990301223, + "grad_norm": 0.0008815564215183258, + "learning_rate": 6.708883257614098e-07, + "loss": 0.0, + "num_input_tokens_seen": 89126544, + "step": 132230 + }, + { + "epoch": 3.2305230498619695, + "grad_norm": 0.0008381134830415249, + "learning_rate": 6.708078000879209e-07, + "loss": 0.0, + "num_input_tokens_seen": 89129744, + "step": 132235 + }, + { + "epoch": 3.2306452006938167, + "grad_norm": 0.010292504914104939, + "learning_rate": 6.707272768084057e-07, + "loss": 0.0001, + "num_input_tokens_seen": 89132816, + "step": 132240 + }, + { + "epoch": 3.230767351525664, + "grad_norm": 0.004390857182443142, + "learning_rate": 6.706467559234507e-07, + "loss": 0.0, + "num_input_tokens_seen": 89135760, + "step": 132245 + }, + { + "epoch": 3.230889502357511, + "grad_norm": 0.01249841507524252, + "learning_rate": 6.705662374336409e-07, + "loss": 0.0, + "num_input_tokens_seen": 89139088, + "step": 132250 + }, + { + "epoch": 3.2310116531893582, + "grad_norm": 6.614337326027453e-05, + "learning_rate": 6.704857213395622e-07, + "loss": 0.0, + "num_input_tokens_seen": 89142288, + "step": 132255 + }, + { + "epoch": 3.2311338040212054, + "grad_norm": 0.0024311612360179424, + "learning_rate": 6.704052076417996e-07, + "loss": 0.0, + "num_input_tokens_seen": 89145552, + "step": 132260 + }, + { + "epoch": 3.2312559548530526, + "grad_norm": 0.0019033217104151845, + "learning_rate": 6.70324696340939e-07, + "loss": 0.0, + "num_input_tokens_seen": 89148880, + "step": 132265 + }, + { + "epoch": 3.2313781056849, + "grad_norm": 0.0026095532812178135, + "learning_rate": 6.702441874375664e-07, + "loss": 0.0, + "num_input_tokens_seen": 89152400, + "step": 132270 + }, + { + "epoch": 3.231500256516747, + "grad_norm": 0.02475583739578724, + "learning_rate": 6.701636809322662e-07, + "loss": 0.0001, + "num_input_tokens_seen": 89155408, + "step": 132275 + }, + { + "epoch": 3.231622407348594, + "grad_norm": 0.0015918344724923372, + "learning_rate": 6.700831768256249e-07, + "loss": 0.0, + "num_input_tokens_seen": 89158800, + "step": 132280 + }, + { + "epoch": 3.2317445581804414, + "grad_norm": 0.0030924296006560326, + "learning_rate": 6.700026751182273e-07, + "loss": 0.0, + "num_input_tokens_seen": 89162192, + "step": 132285 + }, + { + "epoch": 3.2318667090122886, + "grad_norm": 0.00015647780674044043, + "learning_rate": 6.69922175810659e-07, + "loss": 0.0001, + "num_input_tokens_seen": 89165136, + "step": 132290 + }, + { + "epoch": 3.2319888598441358, + "grad_norm": 0.0005808421992696822, + "learning_rate": 6.698416789035053e-07, + "loss": 0.0, + "num_input_tokens_seen": 89168336, + "step": 132295 + }, + { + "epoch": 3.2321110106759825, + "grad_norm": 0.0008498340612277389, + "learning_rate": 6.697611843973517e-07, + "loss": 0.0266, + "num_input_tokens_seen": 89171472, + "step": 132300 + }, + { + "epoch": 3.2322331615078297, + "grad_norm": 0.001342020696029067, + "learning_rate": 6.696806922927838e-07, + "loss": 0.0, + "num_input_tokens_seen": 89174864, + "step": 132305 + }, + { + "epoch": 3.232355312339677, + "grad_norm": 8.064936264418066e-05, + "learning_rate": 6.696002025903864e-07, + "loss": 0.0, + "num_input_tokens_seen": 89178064, + "step": 132310 + }, + { + "epoch": 3.232477463171524, + "grad_norm": 0.0025094160810112953, + "learning_rate": 6.695197152907457e-07, + "loss": 0.0, + "num_input_tokens_seen": 89181008, + "step": 132315 + }, + { + "epoch": 3.2325996140033713, + "grad_norm": 0.0005574112292379141, + "learning_rate": 6.694392303944461e-07, + "loss": 0.0, + "num_input_tokens_seen": 89184336, + "step": 132320 + }, + { + "epoch": 3.2327217648352184, + "grad_norm": 0.019411450251936913, + "learning_rate": 6.693587479020732e-07, + "loss": 0.0, + "num_input_tokens_seen": 89187792, + "step": 132325 + }, + { + "epoch": 3.2328439156670656, + "grad_norm": 0.05258939042687416, + "learning_rate": 6.692782678142129e-07, + "loss": 0.0, + "num_input_tokens_seen": 89191184, + "step": 132330 + }, + { + "epoch": 3.232966066498913, + "grad_norm": 0.002094942843541503, + "learning_rate": 6.691977901314498e-07, + "loss": 0.0462, + "num_input_tokens_seen": 89194384, + "step": 132335 + }, + { + "epoch": 3.23308821733076, + "grad_norm": 0.0006676348857581615, + "learning_rate": 6.691173148543694e-07, + "loss": 0.0, + "num_input_tokens_seen": 89198224, + "step": 132340 + }, + { + "epoch": 3.233210368162607, + "grad_norm": 0.002458383562043309, + "learning_rate": 6.690368419835569e-07, + "loss": 0.0, + "num_input_tokens_seen": 89201360, + "step": 132345 + }, + { + "epoch": 3.2333325189944544, + "grad_norm": 0.0006629899726249278, + "learning_rate": 6.689563715195975e-07, + "loss": 0.0, + "num_input_tokens_seen": 89204560, + "step": 132350 + }, + { + "epoch": 3.2334546698263016, + "grad_norm": 6.684491381747648e-05, + "learning_rate": 6.688759034630761e-07, + "loss": 0.0, + "num_input_tokens_seen": 89207952, + "step": 132355 + }, + { + "epoch": 3.2335768206581488, + "grad_norm": 0.0046476987190544605, + "learning_rate": 6.687954378145782e-07, + "loss": 0.0, + "num_input_tokens_seen": 89211024, + "step": 132360 + }, + { + "epoch": 3.233698971489996, + "grad_norm": 0.0023857601918280125, + "learning_rate": 6.687149745746894e-07, + "loss": 0.1191, + "num_input_tokens_seen": 89214608, + "step": 132365 + }, + { + "epoch": 3.233821122321843, + "grad_norm": 82.63002014160156, + "learning_rate": 6.686345137439939e-07, + "loss": 0.1076, + "num_input_tokens_seen": 89218384, + "step": 132370 + }, + { + "epoch": 3.2339432731536903, + "grad_norm": 0.015677260234951973, + "learning_rate": 6.68554055323078e-07, + "loss": 0.0, + "num_input_tokens_seen": 89221456, + "step": 132375 + }, + { + "epoch": 3.2340654239855375, + "grad_norm": 0.0005482754786498845, + "learning_rate": 6.684735993125255e-07, + "loss": 0.0043, + "num_input_tokens_seen": 89225040, + "step": 132380 + }, + { + "epoch": 3.2341875748173843, + "grad_norm": 0.0008770662243478, + "learning_rate": 6.683931457129224e-07, + "loss": 0.0, + "num_input_tokens_seen": 89228240, + "step": 132385 + }, + { + "epoch": 3.2343097256492315, + "grad_norm": 0.022349568083882332, + "learning_rate": 6.683126945248538e-07, + "loss": 0.0, + "num_input_tokens_seen": 89231376, + "step": 132390 + }, + { + "epoch": 3.2344318764810787, + "grad_norm": 0.00225584814324975, + "learning_rate": 6.682322457489042e-07, + "loss": 0.0, + "num_input_tokens_seen": 89234576, + "step": 132395 + }, + { + "epoch": 3.234554027312926, + "grad_norm": 0.01475659478455782, + "learning_rate": 6.681517993856592e-07, + "loss": 0.0, + "num_input_tokens_seen": 89237840, + "step": 132400 + }, + { + "epoch": 3.234676178144773, + "grad_norm": 0.000728962360881269, + "learning_rate": 6.680713554357031e-07, + "loss": 0.0354, + "num_input_tokens_seen": 89241360, + "step": 132405 + }, + { + "epoch": 3.2347983289766202, + "grad_norm": 0.00011187187919858843, + "learning_rate": 6.679909138996219e-07, + "loss": 0.0, + "num_input_tokens_seen": 89244880, + "step": 132410 + }, + { + "epoch": 3.2349204798084674, + "grad_norm": 0.0008701798506081104, + "learning_rate": 6.679104747779996e-07, + "loss": 0.0, + "num_input_tokens_seen": 89247952, + "step": 132415 + }, + { + "epoch": 3.2350426306403146, + "grad_norm": 0.0016978960484266281, + "learning_rate": 6.678300380714217e-07, + "loss": 0.0001, + "num_input_tokens_seen": 89251472, + "step": 132420 + }, + { + "epoch": 3.235164781472162, + "grad_norm": 2.5931992530822754, + "learning_rate": 6.677496037804735e-07, + "loss": 0.0239, + "num_input_tokens_seen": 89255120, + "step": 132425 + }, + { + "epoch": 3.235286932304009, + "grad_norm": 0.025977689772844315, + "learning_rate": 6.676691719057393e-07, + "loss": 0.0, + "num_input_tokens_seen": 89258640, + "step": 132430 + }, + { + "epoch": 3.235409083135856, + "grad_norm": 9.993995627155527e-05, + "learning_rate": 6.675887424478044e-07, + "loss": 0.0, + "num_input_tokens_seen": 89262224, + "step": 132435 + }, + { + "epoch": 3.2355312339677034, + "grad_norm": 0.03302092105150223, + "learning_rate": 6.675083154072535e-07, + "loss": 0.0, + "num_input_tokens_seen": 89265808, + "step": 132440 + }, + { + "epoch": 3.2356533847995506, + "grad_norm": 0.006590451113879681, + "learning_rate": 6.674278907846715e-07, + "loss": 0.0001, + "num_input_tokens_seen": 89269072, + "step": 132445 + }, + { + "epoch": 3.2357755356313977, + "grad_norm": 0.02588418684899807, + "learning_rate": 6.673474685806435e-07, + "loss": 0.0001, + "num_input_tokens_seen": 89272336, + "step": 132450 + }, + { + "epoch": 3.235897686463245, + "grad_norm": 0.01116380374878645, + "learning_rate": 6.672670487957538e-07, + "loss": 0.0, + "num_input_tokens_seen": 89275600, + "step": 132455 + }, + { + "epoch": 3.236019837295092, + "grad_norm": 0.015107502229511738, + "learning_rate": 6.671866314305881e-07, + "loss": 0.0, + "num_input_tokens_seen": 89278736, + "step": 132460 + }, + { + "epoch": 3.2361419881269393, + "grad_norm": 0.0018150904215872288, + "learning_rate": 6.671062164857303e-07, + "loss": 0.0, + "num_input_tokens_seen": 89282192, + "step": 132465 + }, + { + "epoch": 3.2362641389587865, + "grad_norm": 0.0016039460897445679, + "learning_rate": 6.67025803961766e-07, + "loss": 0.0002, + "num_input_tokens_seen": 89285072, + "step": 132470 + }, + { + "epoch": 3.2363862897906337, + "grad_norm": 0.007005858235061169, + "learning_rate": 6.669453938592793e-07, + "loss": 0.0, + "num_input_tokens_seen": 89288400, + "step": 132475 + }, + { + "epoch": 3.2365084406224804, + "grad_norm": 0.0012618335895240307, + "learning_rate": 6.668649861788551e-07, + "loss": 0.0325, + "num_input_tokens_seen": 89291344, + "step": 132480 + }, + { + "epoch": 3.2366305914543276, + "grad_norm": 0.004066959489136934, + "learning_rate": 6.66784580921079e-07, + "loss": 0.0663, + "num_input_tokens_seen": 89295184, + "step": 132485 + }, + { + "epoch": 3.236752742286175, + "grad_norm": 0.024879004806280136, + "learning_rate": 6.667041780865347e-07, + "loss": 0.0439, + "num_input_tokens_seen": 89298192, + "step": 132490 + }, + { + "epoch": 3.236874893118022, + "grad_norm": 0.00970000121742487, + "learning_rate": 6.666237776758072e-07, + "loss": 0.0, + "num_input_tokens_seen": 89301392, + "step": 132495 + }, + { + "epoch": 3.236997043949869, + "grad_norm": 0.02402373217046261, + "learning_rate": 6.665433796894812e-07, + "loss": 0.0, + "num_input_tokens_seen": 89304784, + "step": 132500 + }, + { + "epoch": 3.2371191947817164, + "grad_norm": 0.012411472387611866, + "learning_rate": 6.66462984128142e-07, + "loss": 0.0, + "num_input_tokens_seen": 89308048, + "step": 132505 + }, + { + "epoch": 3.2372413456135636, + "grad_norm": 0.013225167989730835, + "learning_rate": 6.663825909923729e-07, + "loss": 0.0001, + "num_input_tokens_seen": 89311376, + "step": 132510 + }, + { + "epoch": 3.2373634964454108, + "grad_norm": 0.004108354914933443, + "learning_rate": 6.663022002827595e-07, + "loss": 0.0, + "num_input_tokens_seen": 89314448, + "step": 132515 + }, + { + "epoch": 3.237485647277258, + "grad_norm": 0.34476473927497864, + "learning_rate": 6.662218119998866e-07, + "loss": 0.001, + "num_input_tokens_seen": 89317520, + "step": 132520 + }, + { + "epoch": 3.237607798109105, + "grad_norm": 0.010773766785860062, + "learning_rate": 6.661414261443381e-07, + "loss": 0.0, + "num_input_tokens_seen": 89320720, + "step": 132525 + }, + { + "epoch": 3.2377299489409523, + "grad_norm": 0.02545316517353058, + "learning_rate": 6.660610427166993e-07, + "loss": 0.0, + "num_input_tokens_seen": 89323856, + "step": 132530 + }, + { + "epoch": 3.2378520997727995, + "grad_norm": 0.014912369661033154, + "learning_rate": 6.659806617175541e-07, + "loss": 0.0, + "num_input_tokens_seen": 89328016, + "step": 132535 + }, + { + "epoch": 3.2379742506046467, + "grad_norm": 0.10480382293462753, + "learning_rate": 6.659002831474878e-07, + "loss": 0.0, + "num_input_tokens_seen": 89331344, + "step": 132540 + }, + { + "epoch": 3.238096401436494, + "grad_norm": 0.008487637154757977, + "learning_rate": 6.658199070070842e-07, + "loss": 0.0, + "num_input_tokens_seen": 89334992, + "step": 132545 + }, + { + "epoch": 3.238218552268341, + "grad_norm": 8.822243398753926e-05, + "learning_rate": 6.657395332969279e-07, + "loss": 0.0, + "num_input_tokens_seen": 89338128, + "step": 132550 + }, + { + "epoch": 3.2383407031001883, + "grad_norm": 0.0040891217067837715, + "learning_rate": 6.656591620176041e-07, + "loss": 0.0, + "num_input_tokens_seen": 89341648, + "step": 132555 + }, + { + "epoch": 3.2384628539320355, + "grad_norm": 0.0024746954441070557, + "learning_rate": 6.655787931696964e-07, + "loss": 0.0, + "num_input_tokens_seen": 89345104, + "step": 132560 + }, + { + "epoch": 3.2385850047638822, + "grad_norm": 0.1467559039592743, + "learning_rate": 6.6549842675379e-07, + "loss": 0.0001, + "num_input_tokens_seen": 89347856, + "step": 132565 + }, + { + "epoch": 3.2387071555957294, + "grad_norm": 0.0009347347659058869, + "learning_rate": 6.654180627704687e-07, + "loss": 0.0001, + "num_input_tokens_seen": 89350992, + "step": 132570 + }, + { + "epoch": 3.2388293064275766, + "grad_norm": 0.0004353010153863579, + "learning_rate": 6.653377012203171e-07, + "loss": 0.0, + "num_input_tokens_seen": 89354000, + "step": 132575 + }, + { + "epoch": 3.238951457259424, + "grad_norm": 0.007408044300973415, + "learning_rate": 6.652573421039203e-07, + "loss": 0.1238, + "num_input_tokens_seen": 89357904, + "step": 132580 + }, + { + "epoch": 3.239073608091271, + "grad_norm": 0.0021132344845682383, + "learning_rate": 6.651769854218615e-07, + "loss": 0.0, + "num_input_tokens_seen": 89361616, + "step": 132585 + }, + { + "epoch": 3.239195758923118, + "grad_norm": 0.0005457843071781099, + "learning_rate": 6.650966311747263e-07, + "loss": 0.0, + "num_input_tokens_seen": 89365200, + "step": 132590 + }, + { + "epoch": 3.2393179097549654, + "grad_norm": 0.0004142549878451973, + "learning_rate": 6.650162793630982e-07, + "loss": 0.0, + "num_input_tokens_seen": 89369168, + "step": 132595 + }, + { + "epoch": 3.2394400605868126, + "grad_norm": 0.00022182843531481922, + "learning_rate": 6.649359299875619e-07, + "loss": 0.0, + "num_input_tokens_seen": 89372624, + "step": 132600 + }, + { + "epoch": 3.2395622114186597, + "grad_norm": 0.032848212867975235, + "learning_rate": 6.648555830487018e-07, + "loss": 0.0001, + "num_input_tokens_seen": 89375888, + "step": 132605 + }, + { + "epoch": 3.239684362250507, + "grad_norm": 3.7265719583956525e-05, + "learning_rate": 6.647752385471015e-07, + "loss": 0.0, + "num_input_tokens_seen": 89379280, + "step": 132610 + }, + { + "epoch": 3.239806513082354, + "grad_norm": 0.005500871688127518, + "learning_rate": 6.646948964833465e-07, + "loss": 0.0003, + "num_input_tokens_seen": 89382416, + "step": 132615 + }, + { + "epoch": 3.2399286639142013, + "grad_norm": 0.006964322179555893, + "learning_rate": 6.646145568580198e-07, + "loss": 0.0, + "num_input_tokens_seen": 89385872, + "step": 132620 + }, + { + "epoch": 3.2400508147460485, + "grad_norm": 0.004247676581144333, + "learning_rate": 6.645342196717067e-07, + "loss": 0.0, + "num_input_tokens_seen": 89389136, + "step": 132625 + }, + { + "epoch": 3.2401729655778957, + "grad_norm": 0.033477533608675, + "learning_rate": 6.644538849249907e-07, + "loss": 0.0, + "num_input_tokens_seen": 89394768, + "step": 132630 + }, + { + "epoch": 3.240295116409743, + "grad_norm": 0.0007979911752045155, + "learning_rate": 6.643735526184562e-07, + "loss": 0.0, + "num_input_tokens_seen": 89398608, + "step": 132635 + }, + { + "epoch": 3.24041726724159, + "grad_norm": 9.7431001663208, + "learning_rate": 6.64293222752688e-07, + "loss": 0.0285, + "num_input_tokens_seen": 89402128, + "step": 132640 + }, + { + "epoch": 3.2405394180734373, + "grad_norm": 0.006626779213547707, + "learning_rate": 6.642128953282695e-07, + "loss": 0.0087, + "num_input_tokens_seen": 89405456, + "step": 132645 + }, + { + "epoch": 3.2406615689052845, + "grad_norm": 4.8968089686241e-05, + "learning_rate": 6.641325703457852e-07, + "loss": 0.0684, + "num_input_tokens_seen": 89409680, + "step": 132650 + }, + { + "epoch": 3.2407837197371316, + "grad_norm": 0.00018853062647394836, + "learning_rate": 6.64052247805819e-07, + "loss": 0.0, + "num_input_tokens_seen": 89413264, + "step": 132655 + }, + { + "epoch": 3.2409058705689784, + "grad_norm": 0.042871687561273575, + "learning_rate": 6.639719277089556e-07, + "loss": 0.0, + "num_input_tokens_seen": 89416592, + "step": 132660 + }, + { + "epoch": 3.2410280214008256, + "grad_norm": 0.00719387736171484, + "learning_rate": 6.638916100557782e-07, + "loss": 0.0001, + "num_input_tokens_seen": 89420048, + "step": 132665 + }, + { + "epoch": 3.2411501722326728, + "grad_norm": 0.0006991358241066337, + "learning_rate": 6.638112948468715e-07, + "loss": 0.0, + "num_input_tokens_seen": 89423056, + "step": 132670 + }, + { + "epoch": 3.24127232306452, + "grad_norm": 0.004646055866032839, + "learning_rate": 6.637309820828199e-07, + "loss": 0.0, + "num_input_tokens_seen": 89426768, + "step": 132675 + }, + { + "epoch": 3.241394473896367, + "grad_norm": 0.00044314860133454204, + "learning_rate": 6.636506717642066e-07, + "loss": 0.0, + "num_input_tokens_seen": 89430032, + "step": 132680 + }, + { + "epoch": 3.2415166247282143, + "grad_norm": 0.00037013122346252203, + "learning_rate": 6.635703638916165e-07, + "loss": 0.0203, + "num_input_tokens_seen": 89433424, + "step": 132685 + }, + { + "epoch": 3.2416387755600615, + "grad_norm": 0.0005003222031518817, + "learning_rate": 6.634900584656328e-07, + "loss": 0.0, + "num_input_tokens_seen": 89437264, + "step": 132690 + }, + { + "epoch": 3.2417609263919087, + "grad_norm": 0.0033043583389371634, + "learning_rate": 6.634097554868403e-07, + "loss": 0.0, + "num_input_tokens_seen": 89440464, + "step": 132695 + }, + { + "epoch": 3.241883077223756, + "grad_norm": 0.0024806768633425236, + "learning_rate": 6.633294549558223e-07, + "loss": 0.0, + "num_input_tokens_seen": 89443792, + "step": 132700 + }, + { + "epoch": 3.242005228055603, + "grad_norm": 0.000671973277349025, + "learning_rate": 6.63249156873163e-07, + "loss": 0.0158, + "num_input_tokens_seen": 89447184, + "step": 132705 + }, + { + "epoch": 3.2421273788874503, + "grad_norm": 0.07534290105104446, + "learning_rate": 6.631688612394469e-07, + "loss": 0.0, + "num_input_tokens_seen": 89450960, + "step": 132710 + }, + { + "epoch": 3.2422495297192975, + "grad_norm": 0.013921644538640976, + "learning_rate": 6.630885680552567e-07, + "loss": 0.0, + "num_input_tokens_seen": 89454224, + "step": 132715 + }, + { + "epoch": 3.2423716805511447, + "grad_norm": 0.006058692466467619, + "learning_rate": 6.630082773211777e-07, + "loss": 0.0, + "num_input_tokens_seen": 89457808, + "step": 132720 + }, + { + "epoch": 3.242493831382992, + "grad_norm": 0.003040890209376812, + "learning_rate": 6.629279890377926e-07, + "loss": 0.0519, + "num_input_tokens_seen": 89461520, + "step": 132725 + }, + { + "epoch": 3.242615982214839, + "grad_norm": 0.0009857604745775461, + "learning_rate": 6.62847703205686e-07, + "loss": 0.0, + "num_input_tokens_seen": 89464912, + "step": 132730 + }, + { + "epoch": 3.2427381330466862, + "grad_norm": 0.035643454641103745, + "learning_rate": 6.627674198254419e-07, + "loss": 0.0961, + "num_input_tokens_seen": 89467984, + "step": 132735 + }, + { + "epoch": 3.2428602838785334, + "grad_norm": 0.00444948673248291, + "learning_rate": 6.626871388976433e-07, + "loss": 0.0001, + "num_input_tokens_seen": 89470992, + "step": 132740 + }, + { + "epoch": 3.24298243471038, + "grad_norm": 0.0002501757408026606, + "learning_rate": 6.626068604228752e-07, + "loss": 0.0, + "num_input_tokens_seen": 89474256, + "step": 132745 + }, + { + "epoch": 3.2431045855422274, + "grad_norm": 0.0025310420896857977, + "learning_rate": 6.625265844017205e-07, + "loss": 0.0, + "num_input_tokens_seen": 89477328, + "step": 132750 + }, + { + "epoch": 3.2432267363740745, + "grad_norm": 0.0023761270567774773, + "learning_rate": 6.624463108347631e-07, + "loss": 0.0, + "num_input_tokens_seen": 89481040, + "step": 132755 + }, + { + "epoch": 3.2433488872059217, + "grad_norm": 0.001543680438771844, + "learning_rate": 6.62366039722587e-07, + "loss": 0.0, + "num_input_tokens_seen": 89484560, + "step": 132760 + }, + { + "epoch": 3.243471038037769, + "grad_norm": 0.0036118794232606888, + "learning_rate": 6.622857710657757e-07, + "loss": 0.0, + "num_input_tokens_seen": 89488016, + "step": 132765 + }, + { + "epoch": 3.243593188869616, + "grad_norm": 0.00880009587854147, + "learning_rate": 6.622055048649135e-07, + "loss": 0.0, + "num_input_tokens_seen": 89491344, + "step": 132770 + }, + { + "epoch": 3.2437153397014633, + "grad_norm": 0.00033158602309413254, + "learning_rate": 6.621252411205834e-07, + "loss": 0.0, + "num_input_tokens_seen": 89494864, + "step": 132775 + }, + { + "epoch": 3.2438374905333105, + "grad_norm": 0.01662488840520382, + "learning_rate": 6.620449798333696e-07, + "loss": 0.0001, + "num_input_tokens_seen": 89498384, + "step": 132780 + }, + { + "epoch": 3.2439596413651577, + "grad_norm": 0.0022721965797245502, + "learning_rate": 6.619647210038554e-07, + "loss": 0.0, + "num_input_tokens_seen": 89501584, + "step": 132785 + }, + { + "epoch": 3.244081792197005, + "grad_norm": 0.003767449175938964, + "learning_rate": 6.618844646326245e-07, + "loss": 0.0001, + "num_input_tokens_seen": 89504656, + "step": 132790 + }, + { + "epoch": 3.244203943028852, + "grad_norm": 0.0007159715751186013, + "learning_rate": 6.618042107202613e-07, + "loss": 0.0066, + "num_input_tokens_seen": 89507984, + "step": 132795 + }, + { + "epoch": 3.2443260938606993, + "grad_norm": 0.007665713783353567, + "learning_rate": 6.617239592673485e-07, + "loss": 0.0, + "num_input_tokens_seen": 89511440, + "step": 132800 + }, + { + "epoch": 3.2444482446925464, + "grad_norm": 7.725616887910292e-05, + "learning_rate": 6.616437102744701e-07, + "loss": 0.0, + "num_input_tokens_seen": 89514704, + "step": 132805 + }, + { + "epoch": 3.2445703955243936, + "grad_norm": 0.004263072274625301, + "learning_rate": 6.615634637422097e-07, + "loss": 0.0, + "num_input_tokens_seen": 89517712, + "step": 132810 + }, + { + "epoch": 3.244692546356241, + "grad_norm": 0.00012406962923705578, + "learning_rate": 6.61483219671151e-07, + "loss": 0.0, + "num_input_tokens_seen": 89521232, + "step": 132815 + }, + { + "epoch": 3.244814697188088, + "grad_norm": 9.752336336532608e-05, + "learning_rate": 6.61402978061877e-07, + "loss": 0.0468, + "num_input_tokens_seen": 89525008, + "step": 132820 + }, + { + "epoch": 3.244936848019935, + "grad_norm": 0.16568030416965485, + "learning_rate": 6.613227389149716e-07, + "loss": 0.0001, + "num_input_tokens_seen": 89527824, + "step": 132825 + }, + { + "epoch": 3.245058998851782, + "grad_norm": 0.002355449367314577, + "learning_rate": 6.612425022310186e-07, + "loss": 0.0, + "num_input_tokens_seen": 89531280, + "step": 132830 + }, + { + "epoch": 3.245181149683629, + "grad_norm": 0.01688223145902157, + "learning_rate": 6.611622680106011e-07, + "loss": 0.0, + "num_input_tokens_seen": 89534992, + "step": 132835 + }, + { + "epoch": 3.2453033005154763, + "grad_norm": 0.0006499082082882524, + "learning_rate": 6.610820362543028e-07, + "loss": 0.0543, + "num_input_tokens_seen": 89538640, + "step": 132840 + }, + { + "epoch": 3.2454254513473235, + "grad_norm": 0.8445472121238708, + "learning_rate": 6.61001806962707e-07, + "loss": 0.0003, + "num_input_tokens_seen": 89541904, + "step": 132845 + }, + { + "epoch": 3.2455476021791707, + "grad_norm": 0.0038771627005189657, + "learning_rate": 6.609215801363974e-07, + "loss": 0.0, + "num_input_tokens_seen": 89545040, + "step": 132850 + }, + { + "epoch": 3.245669753011018, + "grad_norm": 3.451886004768312e-05, + "learning_rate": 6.608413557759572e-07, + "loss": 0.0003, + "num_input_tokens_seen": 89548432, + "step": 132855 + }, + { + "epoch": 3.245791903842865, + "grad_norm": 0.03163387253880501, + "learning_rate": 6.607611338819696e-07, + "loss": 0.0, + "num_input_tokens_seen": 89551696, + "step": 132860 + }, + { + "epoch": 3.2459140546747123, + "grad_norm": 0.0013465473894029856, + "learning_rate": 6.606809144550188e-07, + "loss": 0.0001, + "num_input_tokens_seen": 89554960, + "step": 132865 + }, + { + "epoch": 3.2460362055065595, + "grad_norm": 0.28843867778778076, + "learning_rate": 6.606006974956872e-07, + "loss": 0.0831, + "num_input_tokens_seen": 89558544, + "step": 132870 + }, + { + "epoch": 3.2461583563384067, + "grad_norm": 0.0004849781689699739, + "learning_rate": 6.60520483004559e-07, + "loss": 0.0, + "num_input_tokens_seen": 89561872, + "step": 132875 + }, + { + "epoch": 3.246280507170254, + "grad_norm": 0.0038369568064808846, + "learning_rate": 6.604402709822168e-07, + "loss": 0.0001, + "num_input_tokens_seen": 89565648, + "step": 132880 + }, + { + "epoch": 3.246402658002101, + "grad_norm": 0.0021499425638467073, + "learning_rate": 6.603600614292441e-07, + "loss": 0.0, + "num_input_tokens_seen": 89569424, + "step": 132885 + }, + { + "epoch": 3.2465248088339482, + "grad_norm": 0.001384978648275137, + "learning_rate": 6.602798543462252e-07, + "loss": 0.0, + "num_input_tokens_seen": 89572752, + "step": 132890 + }, + { + "epoch": 3.2466469596657954, + "grad_norm": 0.0010479361517354846, + "learning_rate": 6.601996497337418e-07, + "loss": 0.0, + "num_input_tokens_seen": 89576272, + "step": 132895 + }, + { + "epoch": 3.2467691104976426, + "grad_norm": 0.054091040045022964, + "learning_rate": 6.601194475923784e-07, + "loss": 0.0, + "num_input_tokens_seen": 89578960, + "step": 132900 + }, + { + "epoch": 3.24689126132949, + "grad_norm": 0.002501105424016714, + "learning_rate": 6.600392479227177e-07, + "loss": 0.0, + "num_input_tokens_seen": 89582288, + "step": 132905 + }, + { + "epoch": 3.247013412161337, + "grad_norm": 0.0004881916393060237, + "learning_rate": 6.599590507253429e-07, + "loss": 0.0625, + "num_input_tokens_seen": 89586320, + "step": 132910 + }, + { + "epoch": 3.247135562993184, + "grad_norm": 0.00018641616043169051, + "learning_rate": 6.598788560008375e-07, + "loss": 0.0, + "num_input_tokens_seen": 89590096, + "step": 132915 + }, + { + "epoch": 3.2472577138250314, + "grad_norm": 0.00022535701282322407, + "learning_rate": 6.597986637497841e-07, + "loss": 0.0, + "num_input_tokens_seen": 89593360, + "step": 132920 + }, + { + "epoch": 3.247379864656878, + "grad_norm": 0.0004712426452897489, + "learning_rate": 6.597184739727669e-07, + "loss": 0.0001, + "num_input_tokens_seen": 89596944, + "step": 132925 + }, + { + "epoch": 3.2475020154887253, + "grad_norm": 0.0014859620714560151, + "learning_rate": 6.59638286670368e-07, + "loss": 0.0, + "num_input_tokens_seen": 89600272, + "step": 132930 + }, + { + "epoch": 3.2476241663205725, + "grad_norm": 0.7832088470458984, + "learning_rate": 6.595581018431715e-07, + "loss": 0.0206, + "num_input_tokens_seen": 89603472, + "step": 132935 + }, + { + "epoch": 3.2477463171524197, + "grad_norm": 0.0012433022493496537, + "learning_rate": 6.594779194917596e-07, + "loss": 0.0, + "num_input_tokens_seen": 89607056, + "step": 132940 + }, + { + "epoch": 3.247868467984267, + "grad_norm": 0.0006725243292748928, + "learning_rate": 6.593977396167159e-07, + "loss": 0.0, + "num_input_tokens_seen": 89610704, + "step": 132945 + }, + { + "epoch": 3.247990618816114, + "grad_norm": 0.006654083263128996, + "learning_rate": 6.59317562218624e-07, + "loss": 0.0433, + "num_input_tokens_seen": 89614096, + "step": 132950 + }, + { + "epoch": 3.2481127696479613, + "grad_norm": 0.0048959460109472275, + "learning_rate": 6.59237387298066e-07, + "loss": 0.0, + "num_input_tokens_seen": 89617424, + "step": 132955 + }, + { + "epoch": 3.2482349204798084, + "grad_norm": 0.0009554842254146934, + "learning_rate": 6.591572148556254e-07, + "loss": 0.0, + "num_input_tokens_seen": 89620560, + "step": 132960 + }, + { + "epoch": 3.2483570713116556, + "grad_norm": 0.00032741823815740645, + "learning_rate": 6.590770448918852e-07, + "loss": 0.0, + "num_input_tokens_seen": 89624144, + "step": 132965 + }, + { + "epoch": 3.248479222143503, + "grad_norm": 0.007848912850022316, + "learning_rate": 6.589968774074287e-07, + "loss": 0.0, + "num_input_tokens_seen": 89627856, + "step": 132970 + }, + { + "epoch": 3.24860137297535, + "grad_norm": 0.0036281703505665064, + "learning_rate": 6.589167124028382e-07, + "loss": 0.0002, + "num_input_tokens_seen": 89631312, + "step": 132975 + }, + { + "epoch": 3.248723523807197, + "grad_norm": 0.003891072468832135, + "learning_rate": 6.588365498786972e-07, + "loss": 0.0, + "num_input_tokens_seen": 89634384, + "step": 132980 + }, + { + "epoch": 3.2488456746390444, + "grad_norm": 0.0014572052750736475, + "learning_rate": 6.587563898355888e-07, + "loss": 0.0, + "num_input_tokens_seen": 89638032, + "step": 132985 + }, + { + "epoch": 3.2489678254708916, + "grad_norm": 0.0013541283551603556, + "learning_rate": 6.586762322740953e-07, + "loss": 0.0, + "num_input_tokens_seen": 89642000, + "step": 132990 + }, + { + "epoch": 3.2490899763027388, + "grad_norm": 0.0017062696861103177, + "learning_rate": 6.585960771948006e-07, + "loss": 0.0, + "num_input_tokens_seen": 89645392, + "step": 132995 + }, + { + "epoch": 3.249212127134586, + "grad_norm": 0.000382839934900403, + "learning_rate": 6.585159245982866e-07, + "loss": 0.0, + "num_input_tokens_seen": 89648720, + "step": 133000 + }, + { + "epoch": 3.249334277966433, + "grad_norm": 0.0014269673265516758, + "learning_rate": 6.584357744851369e-07, + "loss": 0.0005, + "num_input_tokens_seen": 89651920, + "step": 133005 + }, + { + "epoch": 3.24945642879828, + "grad_norm": 0.009629074484109879, + "learning_rate": 6.583556268559343e-07, + "loss": 0.0, + "num_input_tokens_seen": 89654864, + "step": 133010 + }, + { + "epoch": 3.249578579630127, + "grad_norm": 0.014059150591492653, + "learning_rate": 6.582754817112609e-07, + "loss": 0.0, + "num_input_tokens_seen": 89658384, + "step": 133015 + }, + { + "epoch": 3.2497007304619743, + "grad_norm": 0.00020276792929507792, + "learning_rate": 6.581953390517007e-07, + "loss": 0.0572, + "num_input_tokens_seen": 89661392, + "step": 133020 + }, + { + "epoch": 3.2498228812938215, + "grad_norm": 3.3943728340091184e-05, + "learning_rate": 6.581151988778354e-07, + "loss": 0.0, + "num_input_tokens_seen": 89664592, + "step": 133025 + }, + { + "epoch": 3.2499450321256687, + "grad_norm": 0.00014127862232271582, + "learning_rate": 6.580350611902488e-07, + "loss": 0.0, + "num_input_tokens_seen": 89668176, + "step": 133030 + }, + { + "epoch": 3.250067182957516, + "grad_norm": 0.0010063429363071918, + "learning_rate": 6.579549259895227e-07, + "loss": 0.0, + "num_input_tokens_seen": 89671440, + "step": 133035 + }, + { + "epoch": 3.250189333789363, + "grad_norm": 0.014567175880074501, + "learning_rate": 6.578747932762405e-07, + "loss": 0.0, + "num_input_tokens_seen": 89674576, + "step": 133040 + }, + { + "epoch": 3.250238194122102, + "eval_loss": 0.26102444529533386, + "eval_runtime": 47.8303, + "eval_samples_per_second": 760.71, + "eval_steps_per_second": 95.107, + "num_input_tokens_seen": 89675984, + "step": 133042 + }, + { + "epoch": 3.2503114846212102, + "grad_norm": 0.0018988935044035316, + "learning_rate": 6.577946630509852e-07, + "loss": 0.0, + "num_input_tokens_seen": 89678096, + "step": 133045 + }, + { + "epoch": 3.2504336354530574, + "grad_norm": 0.03488035500049591, + "learning_rate": 6.577145353143388e-07, + "loss": 0.1774, + "num_input_tokens_seen": 89681424, + "step": 133050 + }, + { + "epoch": 3.2505557862849046, + "grad_norm": 0.0004958320059813559, + "learning_rate": 6.576344100668847e-07, + "loss": 0.0, + "num_input_tokens_seen": 89684240, + "step": 133055 + }, + { + "epoch": 3.250677937116752, + "grad_norm": 0.08107688277959824, + "learning_rate": 6.575542873092051e-07, + "loss": 0.0, + "num_input_tokens_seen": 89688208, + "step": 133060 + }, + { + "epoch": 3.250800087948599, + "grad_norm": 0.04220094159245491, + "learning_rate": 6.574741670418829e-07, + "loss": 0.0943, + "num_input_tokens_seen": 89691536, + "step": 133065 + }, + { + "epoch": 3.250922238780446, + "grad_norm": 0.026985283941030502, + "learning_rate": 6.573940492655005e-07, + "loss": 0.0, + "num_input_tokens_seen": 89694864, + "step": 133070 + }, + { + "epoch": 3.2510443896122934, + "grad_norm": 0.004324234090745449, + "learning_rate": 6.573139339806406e-07, + "loss": 0.0, + "num_input_tokens_seen": 89698064, + "step": 133075 + }, + { + "epoch": 3.2511665404441406, + "grad_norm": 0.0011103182332590222, + "learning_rate": 6.572338211878864e-07, + "loss": 0.0, + "num_input_tokens_seen": 89701328, + "step": 133080 + }, + { + "epoch": 3.2512886912759877, + "grad_norm": 0.00011528417962836102, + "learning_rate": 6.571537108878195e-07, + "loss": 0.0313, + "num_input_tokens_seen": 89705040, + "step": 133085 + }, + { + "epoch": 3.251410842107835, + "grad_norm": 0.008831367827951908, + "learning_rate": 6.570736030810236e-07, + "loss": 0.0, + "num_input_tokens_seen": 89708880, + "step": 133090 + }, + { + "epoch": 3.2515329929396817, + "grad_norm": 0.015606212429702282, + "learning_rate": 6.569934977680802e-07, + "loss": 0.0, + "num_input_tokens_seen": 89712464, + "step": 133095 + }, + { + "epoch": 3.2516551437715293, + "grad_norm": 0.183375284075737, + "learning_rate": 6.569133949495724e-07, + "loss": 0.0001, + "num_input_tokens_seen": 89716176, + "step": 133100 + }, + { + "epoch": 3.251777294603376, + "grad_norm": 0.00039575432310812175, + "learning_rate": 6.568332946260831e-07, + "loss": 0.0, + "num_input_tokens_seen": 89719056, + "step": 133105 + }, + { + "epoch": 3.2518994454352232, + "grad_norm": 0.005938323680311441, + "learning_rate": 6.56753196798194e-07, + "loss": 0.0, + "num_input_tokens_seen": 89722448, + "step": 133110 + }, + { + "epoch": 3.2520215962670704, + "grad_norm": 0.14990012347698212, + "learning_rate": 6.566731014664881e-07, + "loss": 0.0001, + "num_input_tokens_seen": 89725648, + "step": 133115 + }, + { + "epoch": 3.2521437470989176, + "grad_norm": 0.009630163200199604, + "learning_rate": 6.565930086315479e-07, + "loss": 0.0001, + "num_input_tokens_seen": 89728592, + "step": 133120 + }, + { + "epoch": 3.252265897930765, + "grad_norm": 0.0037281611002981663, + "learning_rate": 6.565129182939557e-07, + "loss": 0.0, + "num_input_tokens_seen": 89731792, + "step": 133125 + }, + { + "epoch": 3.252388048762612, + "grad_norm": 0.023433174937963486, + "learning_rate": 6.564328304542936e-07, + "loss": 0.0, + "num_input_tokens_seen": 89735504, + "step": 133130 + }, + { + "epoch": 3.252510199594459, + "grad_norm": 0.17902742326259613, + "learning_rate": 6.563527451131443e-07, + "loss": 0.0002, + "num_input_tokens_seen": 89739280, + "step": 133135 + }, + { + "epoch": 3.2526323504263064, + "grad_norm": 0.0012498385040089488, + "learning_rate": 6.562726622710908e-07, + "loss": 0.0, + "num_input_tokens_seen": 89743248, + "step": 133140 + }, + { + "epoch": 3.2527545012581536, + "grad_norm": 0.002557901432737708, + "learning_rate": 6.561925819287144e-07, + "loss": 0.0, + "num_input_tokens_seen": 89746384, + "step": 133145 + }, + { + "epoch": 3.2528766520900008, + "grad_norm": 0.0017938632518053055, + "learning_rate": 6.561125040865984e-07, + "loss": 0.0838, + "num_input_tokens_seen": 89749968, + "step": 133150 + }, + { + "epoch": 3.252998802921848, + "grad_norm": 0.017057769000530243, + "learning_rate": 6.560324287453246e-07, + "loss": 0.0, + "num_input_tokens_seen": 89753040, + "step": 133155 + }, + { + "epoch": 3.253120953753695, + "grad_norm": 0.0012168893590569496, + "learning_rate": 6.559523559054758e-07, + "loss": 0.0778, + "num_input_tokens_seen": 89756496, + "step": 133160 + }, + { + "epoch": 3.2532431045855423, + "grad_norm": 0.0019093046430498362, + "learning_rate": 6.558722855676335e-07, + "loss": 0.0002, + "num_input_tokens_seen": 89759440, + "step": 133165 + }, + { + "epoch": 3.2533652554173895, + "grad_norm": 0.001984116854146123, + "learning_rate": 6.557922177323807e-07, + "loss": 0.0611, + "num_input_tokens_seen": 89762960, + "step": 133170 + }, + { + "epoch": 3.2534874062492367, + "grad_norm": 0.00024715482140891254, + "learning_rate": 6.557121524002998e-07, + "loss": 0.0609, + "num_input_tokens_seen": 89766544, + "step": 133175 + }, + { + "epoch": 3.253609557081084, + "grad_norm": 0.007147402036935091, + "learning_rate": 6.556320895719723e-07, + "loss": 0.0, + "num_input_tokens_seen": 89769744, + "step": 133180 + }, + { + "epoch": 3.253731707912931, + "grad_norm": 0.01977328583598137, + "learning_rate": 6.555520292479812e-07, + "loss": 0.0, + "num_input_tokens_seen": 89773584, + "step": 133185 + }, + { + "epoch": 3.253853858744778, + "grad_norm": 0.07471085339784622, + "learning_rate": 6.554719714289081e-07, + "loss": 0.0002, + "num_input_tokens_seen": 89776656, + "step": 133190 + }, + { + "epoch": 3.253976009576625, + "grad_norm": 0.014332527294754982, + "learning_rate": 6.553919161153354e-07, + "loss": 0.0557, + "num_input_tokens_seen": 89779984, + "step": 133195 + }, + { + "epoch": 3.254098160408472, + "grad_norm": 0.001091811340302229, + "learning_rate": 6.553118633078457e-07, + "loss": 0.0, + "num_input_tokens_seen": 89783568, + "step": 133200 + }, + { + "epoch": 3.2542203112403194, + "grad_norm": 0.016809549182653427, + "learning_rate": 6.552318130070206e-07, + "loss": 0.0001, + "num_input_tokens_seen": 89786896, + "step": 133205 + }, + { + "epoch": 3.2543424620721666, + "grad_norm": 0.09317447245121002, + "learning_rate": 6.551517652134428e-07, + "loss": 0.0513, + "num_input_tokens_seen": 89789904, + "step": 133210 + }, + { + "epoch": 3.254464612904014, + "grad_norm": 0.025950223207473755, + "learning_rate": 6.550717199276939e-07, + "loss": 0.0001, + "num_input_tokens_seen": 89793104, + "step": 133215 + }, + { + "epoch": 3.254586763735861, + "grad_norm": 1.1682426929473877, + "learning_rate": 6.549916771503564e-07, + "loss": 0.0344, + "num_input_tokens_seen": 89796304, + "step": 133220 + }, + { + "epoch": 3.254708914567708, + "grad_norm": 0.012416906654834747, + "learning_rate": 6.549116368820121e-07, + "loss": 0.0058, + "num_input_tokens_seen": 89799184, + "step": 133225 + }, + { + "epoch": 3.2548310653995554, + "grad_norm": 0.010654439218342304, + "learning_rate": 6.548315991232428e-07, + "loss": 0.0377, + "num_input_tokens_seen": 89802384, + "step": 133230 + }, + { + "epoch": 3.2549532162314025, + "grad_norm": 0.0026855203323066235, + "learning_rate": 6.547515638746315e-07, + "loss": 0.0001, + "num_input_tokens_seen": 89806032, + "step": 133235 + }, + { + "epoch": 3.2550753670632497, + "grad_norm": 0.004891504999250174, + "learning_rate": 6.546715311367593e-07, + "loss": 0.0137, + "num_input_tokens_seen": 89808976, + "step": 133240 + }, + { + "epoch": 3.255197517895097, + "grad_norm": 1.8451565504074097, + "learning_rate": 6.545915009102091e-07, + "loss": 0.0284, + "num_input_tokens_seen": 89813200, + "step": 133245 + }, + { + "epoch": 3.255319668726944, + "grad_norm": 0.2361879199743271, + "learning_rate": 6.545114731955619e-07, + "loss": 0.0005, + "num_input_tokens_seen": 89816400, + "step": 133250 + }, + { + "epoch": 3.2554418195587913, + "grad_norm": 0.019942492246627808, + "learning_rate": 6.544314479934005e-07, + "loss": 0.0, + "num_input_tokens_seen": 89819728, + "step": 133255 + }, + { + "epoch": 3.2555639703906385, + "grad_norm": 0.0032596394885331392, + "learning_rate": 6.543514253043063e-07, + "loss": 0.0001, + "num_input_tokens_seen": 89823312, + "step": 133260 + }, + { + "epoch": 3.2556861212224857, + "grad_norm": 0.0006665443652309477, + "learning_rate": 6.542714051288618e-07, + "loss": 0.0, + "num_input_tokens_seen": 89826832, + "step": 133265 + }, + { + "epoch": 3.255808272054333, + "grad_norm": 0.024253949522972107, + "learning_rate": 6.541913874676486e-07, + "loss": 0.0, + "num_input_tokens_seen": 89829968, + "step": 133270 + }, + { + "epoch": 3.2559304228861796, + "grad_norm": 0.01717350445687771, + "learning_rate": 6.541113723212484e-07, + "loss": 0.0465, + "num_input_tokens_seen": 89833168, + "step": 133275 + }, + { + "epoch": 3.2560525737180273, + "grad_norm": 0.34456226229667664, + "learning_rate": 6.540313596902438e-07, + "loss": 0.0002, + "num_input_tokens_seen": 89836752, + "step": 133280 + }, + { + "epoch": 3.256174724549874, + "grad_norm": 0.0388353168964386, + "learning_rate": 6.539513495752155e-07, + "loss": 0.0, + "num_input_tokens_seen": 89840208, + "step": 133285 + }, + { + "epoch": 3.256296875381721, + "grad_norm": 0.0007972258608788252, + "learning_rate": 6.538713419767463e-07, + "loss": 0.0002, + "num_input_tokens_seen": 89843088, + "step": 133290 + }, + { + "epoch": 3.2564190262135684, + "grad_norm": 0.0009614526061341166, + "learning_rate": 6.537913368954182e-07, + "loss": 0.0, + "num_input_tokens_seen": 89846416, + "step": 133295 + }, + { + "epoch": 3.2565411770454156, + "grad_norm": 0.0011288266396149993, + "learning_rate": 6.537113343318122e-07, + "loss": 0.0001, + "num_input_tokens_seen": 89850000, + "step": 133300 + }, + { + "epoch": 3.2566633278772628, + "grad_norm": 0.0012508789077401161, + "learning_rate": 6.536313342865109e-07, + "loss": 0.0001, + "num_input_tokens_seen": 89853008, + "step": 133305 + }, + { + "epoch": 3.25678547870911, + "grad_norm": 0.015493168495595455, + "learning_rate": 6.535513367600953e-07, + "loss": 0.0, + "num_input_tokens_seen": 89856464, + "step": 133310 + }, + { + "epoch": 3.256907629540957, + "grad_norm": 21.916187286376953, + "learning_rate": 6.534713417531479e-07, + "loss": 0.0343, + "num_input_tokens_seen": 89859216, + "step": 133315 + }, + { + "epoch": 3.2570297803728043, + "grad_norm": 0.0007719859713688493, + "learning_rate": 6.533913492662497e-07, + "loss": 0.0001, + "num_input_tokens_seen": 89862992, + "step": 133320 + }, + { + "epoch": 3.2571519312046515, + "grad_norm": 0.027441492304205894, + "learning_rate": 6.533113592999833e-07, + "loss": 0.0, + "num_input_tokens_seen": 89866000, + "step": 133325 + }, + { + "epoch": 3.2572740820364987, + "grad_norm": 0.038071949034929276, + "learning_rate": 6.532313718549299e-07, + "loss": 0.0247, + "num_input_tokens_seen": 89869200, + "step": 133330 + }, + { + "epoch": 3.257396232868346, + "grad_norm": 0.0006605487433262169, + "learning_rate": 6.531513869316707e-07, + "loss": 0.0, + "num_input_tokens_seen": 89872528, + "step": 133335 + }, + { + "epoch": 3.257518383700193, + "grad_norm": 0.052122194319963455, + "learning_rate": 6.530714045307886e-07, + "loss": 0.0, + "num_input_tokens_seen": 89875920, + "step": 133340 + }, + { + "epoch": 3.2576405345320403, + "grad_norm": 0.00033679555053822696, + "learning_rate": 6.52991424652864e-07, + "loss": 0.0, + "num_input_tokens_seen": 89879568, + "step": 133345 + }, + { + "epoch": 3.2577626853638875, + "grad_norm": 0.2269451916217804, + "learning_rate": 6.529114472984791e-07, + "loss": 0.0418, + "num_input_tokens_seen": 89882640, + "step": 133350 + }, + { + "epoch": 3.2578848361957347, + "grad_norm": 0.00019456748850643635, + "learning_rate": 6.52831472468216e-07, + "loss": 0.0001, + "num_input_tokens_seen": 89886160, + "step": 133355 + }, + { + "epoch": 3.258006987027582, + "grad_norm": 0.0026399213820695877, + "learning_rate": 6.527515001626554e-07, + "loss": 0.0004, + "num_input_tokens_seen": 89889680, + "step": 133360 + }, + { + "epoch": 3.258129137859429, + "grad_norm": 0.0004979751538485289, + "learning_rate": 6.526715303823795e-07, + "loss": 0.0, + "num_input_tokens_seen": 89893264, + "step": 133365 + }, + { + "epoch": 3.258251288691276, + "grad_norm": 0.0011222100583836436, + "learning_rate": 6.525915631279697e-07, + "loss": 0.0, + "num_input_tokens_seen": 89896592, + "step": 133370 + }, + { + "epoch": 3.258373439523123, + "grad_norm": 0.008360485546290874, + "learning_rate": 6.525115984000073e-07, + "loss": 0.0, + "num_input_tokens_seen": 89900048, + "step": 133375 + }, + { + "epoch": 3.25849559035497, + "grad_norm": 0.09326830506324768, + "learning_rate": 6.524316361990741e-07, + "loss": 0.036, + "num_input_tokens_seen": 89903760, + "step": 133380 + }, + { + "epoch": 3.2586177411868174, + "grad_norm": 0.08574612438678741, + "learning_rate": 6.523516765257513e-07, + "loss": 0.0, + "num_input_tokens_seen": 89907344, + "step": 133385 + }, + { + "epoch": 3.2587398920186645, + "grad_norm": 0.006928337272256613, + "learning_rate": 6.522717193806211e-07, + "loss": 0.0, + "num_input_tokens_seen": 89910416, + "step": 133390 + }, + { + "epoch": 3.2588620428505117, + "grad_norm": 0.0026560239493846893, + "learning_rate": 6.52191764764264e-07, + "loss": 0.0, + "num_input_tokens_seen": 89914064, + "step": 133395 + }, + { + "epoch": 3.258984193682359, + "grad_norm": 0.005287462379783392, + "learning_rate": 6.521118126772625e-07, + "loss": 0.0, + "num_input_tokens_seen": 89917520, + "step": 133400 + }, + { + "epoch": 3.259106344514206, + "grad_norm": 0.0896107479929924, + "learning_rate": 6.520318631201969e-07, + "loss": 0.0001, + "num_input_tokens_seen": 89920464, + "step": 133405 + }, + { + "epoch": 3.2592284953460533, + "grad_norm": 0.0007962300442159176, + "learning_rate": 6.519519160936495e-07, + "loss": 0.0, + "num_input_tokens_seen": 89923536, + "step": 133410 + }, + { + "epoch": 3.2593506461779005, + "grad_norm": 0.0007463557994924486, + "learning_rate": 6.518719715982011e-07, + "loss": 0.0, + "num_input_tokens_seen": 89927184, + "step": 133415 + }, + { + "epoch": 3.2594727970097477, + "grad_norm": 0.07562704384326935, + "learning_rate": 6.517920296344335e-07, + "loss": 0.0001, + "num_input_tokens_seen": 89930512, + "step": 133420 + }, + { + "epoch": 3.259594947841595, + "grad_norm": 0.5774443745613098, + "learning_rate": 6.517120902029281e-07, + "loss": 0.0003, + "num_input_tokens_seen": 89933776, + "step": 133425 + }, + { + "epoch": 3.259717098673442, + "grad_norm": 0.0017166045727208257, + "learning_rate": 6.516321533042659e-07, + "loss": 0.0, + "num_input_tokens_seen": 89936976, + "step": 133430 + }, + { + "epoch": 3.2598392495052892, + "grad_norm": 0.013317457400262356, + "learning_rate": 6.515522189390286e-07, + "loss": 0.0309, + "num_input_tokens_seen": 89940368, + "step": 133435 + }, + { + "epoch": 3.2599614003371364, + "grad_norm": 0.0011091303313151002, + "learning_rate": 6.514722871077969e-07, + "loss": 0.0, + "num_input_tokens_seen": 89943376, + "step": 133440 + }, + { + "epoch": 3.2600835511689836, + "grad_norm": 0.002225500764325261, + "learning_rate": 6.513923578111525e-07, + "loss": 0.0, + "num_input_tokens_seen": 89946320, + "step": 133445 + }, + { + "epoch": 3.260205702000831, + "grad_norm": 0.01922401413321495, + "learning_rate": 6.513124310496769e-07, + "loss": 0.0, + "num_input_tokens_seen": 89949648, + "step": 133450 + }, + { + "epoch": 3.2603278528326776, + "grad_norm": 0.00025927339447662234, + "learning_rate": 6.512325068239508e-07, + "loss": 0.0, + "num_input_tokens_seen": 89952912, + "step": 133455 + }, + { + "epoch": 3.260450003664525, + "grad_norm": 0.0014124533627182245, + "learning_rate": 6.511525851345562e-07, + "loss": 0.0, + "num_input_tokens_seen": 89956560, + "step": 133460 + }, + { + "epoch": 3.260572154496372, + "grad_norm": 0.027523819357156754, + "learning_rate": 6.510726659820733e-07, + "loss": 0.0265, + "num_input_tokens_seen": 89959632, + "step": 133465 + }, + { + "epoch": 3.260694305328219, + "grad_norm": 0.0057300967164337635, + "learning_rate": 6.509927493670842e-07, + "loss": 0.0002, + "num_input_tokens_seen": 89962704, + "step": 133470 + }, + { + "epoch": 3.2608164561600663, + "grad_norm": 0.004024703986942768, + "learning_rate": 6.509128352901694e-07, + "loss": 0.0, + "num_input_tokens_seen": 89965712, + "step": 133475 + }, + { + "epoch": 3.2609386069919135, + "grad_norm": 0.000581703323405236, + "learning_rate": 6.508329237519106e-07, + "loss": 0.0, + "num_input_tokens_seen": 89968976, + "step": 133480 + }, + { + "epoch": 3.2610607578237607, + "grad_norm": 0.00022578010975848883, + "learning_rate": 6.507530147528888e-07, + "loss": 0.0, + "num_input_tokens_seen": 89972176, + "step": 133485 + }, + { + "epoch": 3.261182908655608, + "grad_norm": 3.6321653169579804e-05, + "learning_rate": 6.506731082936845e-07, + "loss": 0.0, + "num_input_tokens_seen": 89975056, + "step": 133490 + }, + { + "epoch": 3.261305059487455, + "grad_norm": 0.0028527742251753807, + "learning_rate": 6.505932043748798e-07, + "loss": 0.0, + "num_input_tokens_seen": 89978640, + "step": 133495 + }, + { + "epoch": 3.2614272103193023, + "grad_norm": 2.6254167556762695, + "learning_rate": 6.505133029970551e-07, + "loss": 0.0002, + "num_input_tokens_seen": 89982288, + "step": 133500 + }, + { + "epoch": 3.2615493611511495, + "grad_norm": 0.0006126816151663661, + "learning_rate": 6.504334041607914e-07, + "loss": 0.0, + "num_input_tokens_seen": 89986064, + "step": 133505 + }, + { + "epoch": 3.2616715119829967, + "grad_norm": 0.0002679953468032181, + "learning_rate": 6.503535078666705e-07, + "loss": 0.0, + "num_input_tokens_seen": 89989328, + "step": 133510 + }, + { + "epoch": 3.261793662814844, + "grad_norm": 42.283355712890625, + "learning_rate": 6.502736141152724e-07, + "loss": 0.0527, + "num_input_tokens_seen": 89992592, + "step": 133515 + }, + { + "epoch": 3.261915813646691, + "grad_norm": 74.90733337402344, + "learning_rate": 6.501937229071793e-07, + "loss": 0.0548, + "num_input_tokens_seen": 89995856, + "step": 133520 + }, + { + "epoch": 3.262037964478538, + "grad_norm": 0.010414835065603256, + "learning_rate": 6.501138342429713e-07, + "loss": 0.0, + "num_input_tokens_seen": 89999312, + "step": 133525 + }, + { + "epoch": 3.2621601153103854, + "grad_norm": 0.002175811445340514, + "learning_rate": 6.500339481232296e-07, + "loss": 0.0, + "num_input_tokens_seen": 90002512, + "step": 133530 + }, + { + "epoch": 3.2622822661422326, + "grad_norm": 0.003502947511151433, + "learning_rate": 6.49954064548535e-07, + "loss": 0.0, + "num_input_tokens_seen": 90005520, + "step": 133535 + }, + { + "epoch": 3.2624044169740793, + "grad_norm": 0.00042188982479274273, + "learning_rate": 6.498741835194684e-07, + "loss": 0.0, + "num_input_tokens_seen": 90008656, + "step": 133540 + }, + { + "epoch": 3.262526567805927, + "grad_norm": 0.005062382202595472, + "learning_rate": 6.497943050366115e-07, + "loss": 0.0, + "num_input_tokens_seen": 90012624, + "step": 133545 + }, + { + "epoch": 3.2626487186377737, + "grad_norm": 0.0034066669177263975, + "learning_rate": 6.49714429100544e-07, + "loss": 0.0, + "num_input_tokens_seen": 90015632, + "step": 133550 + }, + { + "epoch": 3.262770869469621, + "grad_norm": 0.0008335746242664754, + "learning_rate": 6.496345557118478e-07, + "loss": 0.0441, + "num_input_tokens_seen": 90019344, + "step": 133555 + }, + { + "epoch": 3.262893020301468, + "grad_norm": 0.0030794290360063314, + "learning_rate": 6.495546848711031e-07, + "loss": 0.075, + "num_input_tokens_seen": 90022672, + "step": 133560 + }, + { + "epoch": 3.2630151711333153, + "grad_norm": 0.003941728733479977, + "learning_rate": 6.494748165788912e-07, + "loss": 0.0, + "num_input_tokens_seen": 90026128, + "step": 133565 + }, + { + "epoch": 3.2631373219651625, + "grad_norm": 0.0013275842647999525, + "learning_rate": 6.493949508357926e-07, + "loss": 0.0004, + "num_input_tokens_seen": 90029456, + "step": 133570 + }, + { + "epoch": 3.2632594727970097, + "grad_norm": 0.003424836788326502, + "learning_rate": 6.493150876423882e-07, + "loss": 0.0, + "num_input_tokens_seen": 90032592, + "step": 133575 + }, + { + "epoch": 3.263381623628857, + "grad_norm": 0.0038157880771905184, + "learning_rate": 6.492352269992588e-07, + "loss": 0.0, + "num_input_tokens_seen": 90036240, + "step": 133580 + }, + { + "epoch": 3.263503774460704, + "grad_norm": 0.00199596188031137, + "learning_rate": 6.491553689069853e-07, + "loss": 0.0, + "num_input_tokens_seen": 90039376, + "step": 133585 + }, + { + "epoch": 3.2636259252925512, + "grad_norm": 0.03811373934149742, + "learning_rate": 6.490755133661484e-07, + "loss": 0.1105, + "num_input_tokens_seen": 90042448, + "step": 133590 + }, + { + "epoch": 3.2637480761243984, + "grad_norm": 0.0061827911995351315, + "learning_rate": 6.489956603773284e-07, + "loss": 0.0406, + "num_input_tokens_seen": 90045648, + "step": 133595 + }, + { + "epoch": 3.2638702269562456, + "grad_norm": 0.0010410583345219493, + "learning_rate": 6.489158099411062e-07, + "loss": 0.0001, + "num_input_tokens_seen": 90049104, + "step": 133600 + }, + { + "epoch": 3.263992377788093, + "grad_norm": 0.0003505503118503839, + "learning_rate": 6.488359620580634e-07, + "loss": 0.056, + "num_input_tokens_seen": 90052560, + "step": 133605 + }, + { + "epoch": 3.26411452861994, + "grad_norm": 0.0007625527214258909, + "learning_rate": 6.487561167287794e-07, + "loss": 0.0, + "num_input_tokens_seen": 90055760, + "step": 133610 + }, + { + "epoch": 3.264236679451787, + "grad_norm": 0.003039886010810733, + "learning_rate": 6.486762739538356e-07, + "loss": 0.0, + "num_input_tokens_seen": 90059216, + "step": 133615 + }, + { + "epoch": 3.2643588302836344, + "grad_norm": 0.00022629981685895473, + "learning_rate": 6.485964337338124e-07, + "loss": 0.0348, + "num_input_tokens_seen": 90062672, + "step": 133620 + }, + { + "epoch": 3.2644809811154816, + "grad_norm": 0.0004150950408075005, + "learning_rate": 6.485165960692906e-07, + "loss": 0.0686, + "num_input_tokens_seen": 90065808, + "step": 133625 + }, + { + "epoch": 3.2646031319473288, + "grad_norm": 0.008487959392368793, + "learning_rate": 6.484367609608502e-07, + "loss": 0.0004, + "num_input_tokens_seen": 90069392, + "step": 133630 + }, + { + "epoch": 3.2647252827791755, + "grad_norm": 0.00031680116080679, + "learning_rate": 6.483569284090725e-07, + "loss": 0.0, + "num_input_tokens_seen": 90072400, + "step": 133635 + }, + { + "epoch": 3.2648474336110227, + "grad_norm": 0.0003479302395135164, + "learning_rate": 6.482770984145381e-07, + "loss": 0.0, + "num_input_tokens_seen": 90076048, + "step": 133640 + }, + { + "epoch": 3.26496958444287, + "grad_norm": 0.03978421166539192, + "learning_rate": 6.481972709778267e-07, + "loss": 0.0, + "num_input_tokens_seen": 90078992, + "step": 133645 + }, + { + "epoch": 3.265091735274717, + "grad_norm": 0.0023317488376051188, + "learning_rate": 6.481174460995198e-07, + "loss": 0.0, + "num_input_tokens_seen": 90082576, + "step": 133650 + }, + { + "epoch": 3.2652138861065643, + "grad_norm": 0.0005702111520804465, + "learning_rate": 6.480376237801973e-07, + "loss": 0.0, + "num_input_tokens_seen": 90085712, + "step": 133655 + }, + { + "epoch": 3.2653360369384115, + "grad_norm": 0.0005525056039914489, + "learning_rate": 6.479578040204396e-07, + "loss": 0.0, + "num_input_tokens_seen": 90088848, + "step": 133660 + }, + { + "epoch": 3.2654581877702586, + "grad_norm": 0.0002772066800389439, + "learning_rate": 6.478779868208278e-07, + "loss": 0.0, + "num_input_tokens_seen": 90091920, + "step": 133665 + }, + { + "epoch": 3.265580338602106, + "grad_norm": 0.003932084422558546, + "learning_rate": 6.477981721819416e-07, + "loss": 0.0, + "num_input_tokens_seen": 90095120, + "step": 133670 + }, + { + "epoch": 3.265702489433953, + "grad_norm": 0.0019422955811023712, + "learning_rate": 6.477183601043625e-07, + "loss": 0.0, + "num_input_tokens_seen": 90098192, + "step": 133675 + }, + { + "epoch": 3.2658246402658, + "grad_norm": 0.010535065084695816, + "learning_rate": 6.476385505886698e-07, + "loss": 0.0, + "num_input_tokens_seen": 90101264, + "step": 133680 + }, + { + "epoch": 3.2659467910976474, + "grad_norm": 0.0017384805250912905, + "learning_rate": 6.475587436354443e-07, + "loss": 0.0, + "num_input_tokens_seen": 90104400, + "step": 133685 + }, + { + "epoch": 3.2660689419294946, + "grad_norm": 0.0010419674217700958, + "learning_rate": 6.474789392452666e-07, + "loss": 0.0, + "num_input_tokens_seen": 90108112, + "step": 133690 + }, + { + "epoch": 3.266191092761342, + "grad_norm": 0.016605474054813385, + "learning_rate": 6.473991374187166e-07, + "loss": 0.0001, + "num_input_tokens_seen": 90111056, + "step": 133695 + }, + { + "epoch": 3.266313243593189, + "grad_norm": 0.07536473870277405, + "learning_rate": 6.473193381563753e-07, + "loss": 0.0, + "num_input_tokens_seen": 90114512, + "step": 133700 + }, + { + "epoch": 3.266435394425036, + "grad_norm": 0.0019029227551072836, + "learning_rate": 6.472395414588222e-07, + "loss": 0.0, + "num_input_tokens_seen": 90117840, + "step": 133705 + }, + { + "epoch": 3.2665575452568834, + "grad_norm": 0.00034090832923538983, + "learning_rate": 6.471597473266385e-07, + "loss": 0.0359, + "num_input_tokens_seen": 90121424, + "step": 133710 + }, + { + "epoch": 3.2666796960887305, + "grad_norm": 0.027212005108594894, + "learning_rate": 6.470799557604035e-07, + "loss": 0.0, + "num_input_tokens_seen": 90124880, + "step": 133715 + }, + { + "epoch": 3.2668018469205773, + "grad_norm": 0.0004326793714426458, + "learning_rate": 6.470001667606986e-07, + "loss": 0.0, + "num_input_tokens_seen": 90127888, + "step": 133720 + }, + { + "epoch": 3.266923997752425, + "grad_norm": 0.0014250059612095356, + "learning_rate": 6.469203803281027e-07, + "loss": 0.0, + "num_input_tokens_seen": 90131344, + "step": 133725 + }, + { + "epoch": 3.2670461485842717, + "grad_norm": 0.03089824691414833, + "learning_rate": 6.468405964631972e-07, + "loss": 0.0453, + "num_input_tokens_seen": 90134288, + "step": 133730 + }, + { + "epoch": 3.267168299416119, + "grad_norm": 0.0011771543649956584, + "learning_rate": 6.467608151665618e-07, + "loss": 0.0, + "num_input_tokens_seen": 90137616, + "step": 133735 + }, + { + "epoch": 3.267290450247966, + "grad_norm": 0.0012687522685155272, + "learning_rate": 6.46681036438777e-07, + "loss": 0.0, + "num_input_tokens_seen": 90141264, + "step": 133740 + }, + { + "epoch": 3.2674126010798132, + "grad_norm": 0.0001758733851602301, + "learning_rate": 6.466012602804225e-07, + "loss": 0.0, + "num_input_tokens_seen": 90144400, + "step": 133745 + }, + { + "epoch": 3.2675347519116604, + "grad_norm": 0.0003743020643014461, + "learning_rate": 6.465214866920785e-07, + "loss": 0.0, + "num_input_tokens_seen": 90147856, + "step": 133750 + }, + { + "epoch": 3.2676569027435076, + "grad_norm": 0.024355005472898483, + "learning_rate": 6.464417156743253e-07, + "loss": 0.0002, + "num_input_tokens_seen": 90151504, + "step": 133755 + }, + { + "epoch": 3.267779053575355, + "grad_norm": 0.020076554268598557, + "learning_rate": 6.463619472277436e-07, + "loss": 0.0, + "num_input_tokens_seen": 90155088, + "step": 133760 + }, + { + "epoch": 3.267901204407202, + "grad_norm": 9.398308611707762e-05, + "learning_rate": 6.462821813529125e-07, + "loss": 0.0, + "num_input_tokens_seen": 90158032, + "step": 133765 + }, + { + "epoch": 3.268023355239049, + "grad_norm": 0.00041264158789999783, + "learning_rate": 6.462024180504128e-07, + "loss": 0.0043, + "num_input_tokens_seen": 90161040, + "step": 133770 + }, + { + "epoch": 3.2681455060708964, + "grad_norm": 0.00012087931827409193, + "learning_rate": 6.461226573208239e-07, + "loss": 0.0001, + "num_input_tokens_seen": 90164368, + "step": 133775 + }, + { + "epoch": 3.2682676569027436, + "grad_norm": 7.979868678376079e-05, + "learning_rate": 6.460428991647265e-07, + "loss": 0.0, + "num_input_tokens_seen": 90167504, + "step": 133780 + }, + { + "epoch": 3.2683898077345908, + "grad_norm": 0.008352191187441349, + "learning_rate": 6.459631435827001e-07, + "loss": 0.0, + "num_input_tokens_seen": 90170960, + "step": 133785 + }, + { + "epoch": 3.268511958566438, + "grad_norm": 0.0003512290713842958, + "learning_rate": 6.458833905753251e-07, + "loss": 0.0, + "num_input_tokens_seen": 90174160, + "step": 133790 + }, + { + "epoch": 3.268634109398285, + "grad_norm": 8.37286570458673e-05, + "learning_rate": 6.458036401431816e-07, + "loss": 0.0, + "num_input_tokens_seen": 90177616, + "step": 133795 + }, + { + "epoch": 3.2687562602301323, + "grad_norm": 0.00048232992412522435, + "learning_rate": 6.457238922868487e-07, + "loss": 0.0, + "num_input_tokens_seen": 90181200, + "step": 133800 + }, + { + "epoch": 3.2688784110619795, + "grad_norm": 0.0005159936263225973, + "learning_rate": 6.456441470069076e-07, + "loss": 0.0, + "num_input_tokens_seen": 90184656, + "step": 133805 + }, + { + "epoch": 3.2690005618938267, + "grad_norm": 0.0002815297630149871, + "learning_rate": 6.45564404303937e-07, + "loss": 0.0, + "num_input_tokens_seen": 90187920, + "step": 133810 + }, + { + "epoch": 3.2691227127256735, + "grad_norm": 0.0007183253183029592, + "learning_rate": 6.454846641785174e-07, + "loss": 0.0, + "num_input_tokens_seen": 90191184, + "step": 133815 + }, + { + "epoch": 3.2692448635575206, + "grad_norm": 0.007655336521565914, + "learning_rate": 6.454049266312291e-07, + "loss": 0.0, + "num_input_tokens_seen": 90194448, + "step": 133820 + }, + { + "epoch": 3.269367014389368, + "grad_norm": 0.015166401863098145, + "learning_rate": 6.453251916626512e-07, + "loss": 0.0, + "num_input_tokens_seen": 90197584, + "step": 133825 + }, + { + "epoch": 3.269489165221215, + "grad_norm": 0.00043149187695235014, + "learning_rate": 6.452454592733642e-07, + "loss": 0.0, + "num_input_tokens_seen": 90200720, + "step": 133830 + }, + { + "epoch": 3.269611316053062, + "grad_norm": 93.59136199951172, + "learning_rate": 6.451657294639475e-07, + "loss": 0.0838, + "num_input_tokens_seen": 90204496, + "step": 133835 + }, + { + "epoch": 3.2697334668849094, + "grad_norm": 0.06872415542602539, + "learning_rate": 6.450860022349811e-07, + "loss": 0.0318, + "num_input_tokens_seen": 90208144, + "step": 133840 + }, + { + "epoch": 3.2698556177167566, + "grad_norm": 0.00017835324979387224, + "learning_rate": 6.450062775870446e-07, + "loss": 0.0204, + "num_input_tokens_seen": 90211600, + "step": 133845 + }, + { + "epoch": 3.269977768548604, + "grad_norm": 0.0010258352849632502, + "learning_rate": 6.44926555520718e-07, + "loss": 0.0, + "num_input_tokens_seen": 90214928, + "step": 133850 + }, + { + "epoch": 3.270099919380451, + "grad_norm": 0.0022669630125164986, + "learning_rate": 6.44846836036581e-07, + "loss": 0.0451, + "num_input_tokens_seen": 90218640, + "step": 133855 + }, + { + "epoch": 3.270222070212298, + "grad_norm": 1563.13525390625, + "learning_rate": 6.447671191352134e-07, + "loss": 0.105, + "num_input_tokens_seen": 90221840, + "step": 133860 + }, + { + "epoch": 3.2703442210441453, + "grad_norm": 0.008289139717817307, + "learning_rate": 6.446874048171948e-07, + "loss": 0.0, + "num_input_tokens_seen": 90225168, + "step": 133865 + }, + { + "epoch": 3.2704663718759925, + "grad_norm": 0.003361885203048587, + "learning_rate": 6.446076930831049e-07, + "loss": 0.0, + "num_input_tokens_seen": 90228368, + "step": 133870 + }, + { + "epoch": 3.2705885227078397, + "grad_norm": 0.01708057150244713, + "learning_rate": 6.445279839335237e-07, + "loss": 0.0, + "num_input_tokens_seen": 90231504, + "step": 133875 + }, + { + "epoch": 3.270710673539687, + "grad_norm": 0.010200517252087593, + "learning_rate": 6.444482773690303e-07, + "loss": 0.0, + "num_input_tokens_seen": 90234832, + "step": 133880 + }, + { + "epoch": 3.270832824371534, + "grad_norm": 0.0008809025166556239, + "learning_rate": 6.443685733902046e-07, + "loss": 0.0451, + "num_input_tokens_seen": 90238032, + "step": 133885 + }, + { + "epoch": 3.2709549752033813, + "grad_norm": 0.018013739958405495, + "learning_rate": 6.442888719976266e-07, + "loss": 0.0, + "num_input_tokens_seen": 90241488, + "step": 133890 + }, + { + "epoch": 3.2710771260352285, + "grad_norm": 0.00027410429902374744, + "learning_rate": 6.442091731918756e-07, + "loss": 0.0001, + "num_input_tokens_seen": 90244944, + "step": 133895 + }, + { + "epoch": 3.2711992768670752, + "grad_norm": 0.09898103773593903, + "learning_rate": 6.441294769735312e-07, + "loss": 0.0, + "num_input_tokens_seen": 90248656, + "step": 133900 + }, + { + "epoch": 3.271321427698923, + "grad_norm": 0.0009784942958503962, + "learning_rate": 6.440497833431726e-07, + "loss": 0.0203, + "num_input_tokens_seen": 90251984, + "step": 133905 + }, + { + "epoch": 3.2714435785307696, + "grad_norm": 0.003985030576586723, + "learning_rate": 6.439700923013798e-07, + "loss": 0.0, + "num_input_tokens_seen": 90254992, + "step": 133910 + }, + { + "epoch": 3.271565729362617, + "grad_norm": 0.02826862223446369, + "learning_rate": 6.438904038487325e-07, + "loss": 0.0, + "num_input_tokens_seen": 90258256, + "step": 133915 + }, + { + "epoch": 3.271687880194464, + "grad_norm": 0.00011547313624760136, + "learning_rate": 6.438107179858097e-07, + "loss": 0.0, + "num_input_tokens_seen": 90261520, + "step": 133920 + }, + { + "epoch": 3.271810031026311, + "grad_norm": 73.17760467529297, + "learning_rate": 6.437310347131915e-07, + "loss": 0.0688, + "num_input_tokens_seen": 90264848, + "step": 133925 + }, + { + "epoch": 3.2719321818581584, + "grad_norm": 0.0009607910178601742, + "learning_rate": 6.436513540314566e-07, + "loss": 0.0, + "num_input_tokens_seen": 90268304, + "step": 133930 + }, + { + "epoch": 3.2720543326900056, + "grad_norm": 0.014877952635288239, + "learning_rate": 6.435716759411853e-07, + "loss": 0.0, + "num_input_tokens_seen": 90271632, + "step": 133935 + }, + { + "epoch": 3.2721764835218528, + "grad_norm": 97.39641571044922, + "learning_rate": 6.434920004429564e-07, + "loss": 0.0115, + "num_input_tokens_seen": 90275280, + "step": 133940 + }, + { + "epoch": 3.2722986343537, + "grad_norm": 0.002261359943076968, + "learning_rate": 6.434123275373496e-07, + "loss": 0.0, + "num_input_tokens_seen": 90278544, + "step": 133945 + }, + { + "epoch": 3.272420785185547, + "grad_norm": 15.63397216796875, + "learning_rate": 6.433326572249446e-07, + "loss": 0.0225, + "num_input_tokens_seen": 90281552, + "step": 133950 + }, + { + "epoch": 3.2725429360173943, + "grad_norm": 0.01223745010793209, + "learning_rate": 6.432529895063199e-07, + "loss": 0.0, + "num_input_tokens_seen": 90285200, + "step": 133955 + }, + { + "epoch": 3.2726650868492415, + "grad_norm": 0.0026963527780026197, + "learning_rate": 6.431733243820558e-07, + "loss": 0.0001, + "num_input_tokens_seen": 90289040, + "step": 133960 + }, + { + "epoch": 3.2727872376810887, + "grad_norm": 0.005582587327808142, + "learning_rate": 6.430936618527311e-07, + "loss": 0.0, + "num_input_tokens_seen": 90292752, + "step": 133965 + }, + { + "epoch": 3.272909388512936, + "grad_norm": 0.0012265495024621487, + "learning_rate": 6.430140019189254e-07, + "loss": 0.0, + "num_input_tokens_seen": 90296336, + "step": 133970 + }, + { + "epoch": 3.273031539344783, + "grad_norm": 0.01831926219165325, + "learning_rate": 6.429343445812177e-07, + "loss": 0.0, + "num_input_tokens_seen": 90299280, + "step": 133975 + }, + { + "epoch": 3.2731536901766303, + "grad_norm": 0.00041980401147156954, + "learning_rate": 6.428546898401874e-07, + "loss": 0.0, + "num_input_tokens_seen": 90302416, + "step": 133980 + }, + { + "epoch": 3.2732758410084775, + "grad_norm": 0.0010928019182756543, + "learning_rate": 6.427750376964143e-07, + "loss": 0.0, + "num_input_tokens_seen": 90305616, + "step": 133985 + }, + { + "epoch": 3.2733979918403247, + "grad_norm": 0.0043005309998989105, + "learning_rate": 6.42695388150477e-07, + "loss": 0.0, + "num_input_tokens_seen": 90309456, + "step": 133990 + }, + { + "epoch": 3.2735201426721714, + "grad_norm": 0.01648613251745701, + "learning_rate": 6.426157412029549e-07, + "loss": 0.0027, + "num_input_tokens_seen": 90312720, + "step": 133995 + }, + { + "epoch": 3.2736422935040186, + "grad_norm": 0.001249257242307067, + "learning_rate": 6.425360968544272e-07, + "loss": 0.0, + "num_input_tokens_seen": 90316304, + "step": 134000 + }, + { + "epoch": 3.2737644443358658, + "grad_norm": 0.0011085321893915534, + "learning_rate": 6.42456455105473e-07, + "loss": 0.0021, + "num_input_tokens_seen": 90319376, + "step": 134005 + }, + { + "epoch": 3.273886595167713, + "grad_norm": 0.0005377625930123031, + "learning_rate": 6.42376815956672e-07, + "loss": 0.0, + "num_input_tokens_seen": 90322832, + "step": 134010 + }, + { + "epoch": 3.27400874599956, + "grad_norm": 0.05542025715112686, + "learning_rate": 6.422971794086028e-07, + "loss": 0.0, + "num_input_tokens_seen": 90325840, + "step": 134015 + }, + { + "epoch": 3.2741308968314073, + "grad_norm": 23.629776000976562, + "learning_rate": 6.422175454618448e-07, + "loss": 0.0455, + "num_input_tokens_seen": 90329488, + "step": 134020 + }, + { + "epoch": 3.2742530476632545, + "grad_norm": 0.024139046669006348, + "learning_rate": 6.421379141169769e-07, + "loss": 0.095, + "num_input_tokens_seen": 90333072, + "step": 134025 + }, + { + "epoch": 3.2743751984951017, + "grad_norm": 0.005243145395070314, + "learning_rate": 6.420582853745787e-07, + "loss": 0.0001, + "num_input_tokens_seen": 90336144, + "step": 134030 + }, + { + "epoch": 3.274497349326949, + "grad_norm": 0.0035617812536656857, + "learning_rate": 6.419786592352283e-07, + "loss": 0.0591, + "num_input_tokens_seen": 90339664, + "step": 134035 + }, + { + "epoch": 3.274619500158796, + "grad_norm": 0.011242561042308807, + "learning_rate": 6.418990356995058e-07, + "loss": 0.0, + "num_input_tokens_seen": 90342736, + "step": 134040 + }, + { + "epoch": 3.2747416509906433, + "grad_norm": 0.01859310083091259, + "learning_rate": 6.418194147679898e-07, + "loss": 0.0, + "num_input_tokens_seen": 90346128, + "step": 134045 + }, + { + "epoch": 3.2748638018224905, + "grad_norm": 0.05717771500349045, + "learning_rate": 6.417397964412594e-07, + "loss": 0.0, + "num_input_tokens_seen": 90349520, + "step": 134050 + }, + { + "epoch": 3.2749859526543377, + "grad_norm": 0.004490395076572895, + "learning_rate": 6.416601807198936e-07, + "loss": 0.0214, + "num_input_tokens_seen": 90352848, + "step": 134055 + }, + { + "epoch": 3.275108103486185, + "grad_norm": 224.87542724609375, + "learning_rate": 6.41580567604471e-07, + "loss": 0.0247, + "num_input_tokens_seen": 90356432, + "step": 134060 + }, + { + "epoch": 3.275230254318032, + "grad_norm": 0.0020325894001871347, + "learning_rate": 6.415009570955709e-07, + "loss": 0.0001, + "num_input_tokens_seen": 90359632, + "step": 134065 + }, + { + "epoch": 3.2753524051498792, + "grad_norm": 0.008936960250139236, + "learning_rate": 6.414213491937728e-07, + "loss": 0.0348, + "num_input_tokens_seen": 90363536, + "step": 134070 + }, + { + "epoch": 3.2754745559817264, + "grad_norm": 0.0028429508674889803, + "learning_rate": 6.413417438996547e-07, + "loss": 0.0001, + "num_input_tokens_seen": 90366928, + "step": 134075 + }, + { + "epoch": 3.275596706813573, + "grad_norm": 0.08826093375682831, + "learning_rate": 6.412621412137962e-07, + "loss": 0.0001, + "num_input_tokens_seen": 90370256, + "step": 134080 + }, + { + "epoch": 3.275718857645421, + "grad_norm": 0.000634827243629843, + "learning_rate": 6.411825411367755e-07, + "loss": 0.0001, + "num_input_tokens_seen": 90373456, + "step": 134085 + }, + { + "epoch": 3.2758410084772676, + "grad_norm": 0.002248347969725728, + "learning_rate": 6.411029436691723e-07, + "loss": 0.0, + "num_input_tokens_seen": 90376592, + "step": 134090 + }, + { + "epoch": 3.2759631593091147, + "grad_norm": 0.04211549088358879, + "learning_rate": 6.410233488115646e-07, + "loss": 0.0, + "num_input_tokens_seen": 90380112, + "step": 134095 + }, + { + "epoch": 3.276085310140962, + "grad_norm": 2.0768942704307847e-05, + "learning_rate": 6.409437565645319e-07, + "loss": 0.0, + "num_input_tokens_seen": 90383568, + "step": 134100 + }, + { + "epoch": 3.276207460972809, + "grad_norm": 0.011450367048382759, + "learning_rate": 6.408641669286529e-07, + "loss": 0.0, + "num_input_tokens_seen": 90386960, + "step": 134105 + }, + { + "epoch": 3.2763296118046563, + "grad_norm": 0.007889004424214363, + "learning_rate": 6.40784579904506e-07, + "loss": 0.0024, + "num_input_tokens_seen": 90390096, + "step": 134110 + }, + { + "epoch": 3.2764517626365035, + "grad_norm": 0.0003262287937104702, + "learning_rate": 6.407049954926705e-07, + "loss": 0.0653, + "num_input_tokens_seen": 90393424, + "step": 134115 + }, + { + "epoch": 3.2765739134683507, + "grad_norm": 0.008806253783404827, + "learning_rate": 6.406254136937246e-07, + "loss": 0.0, + "num_input_tokens_seen": 90396624, + "step": 134120 + }, + { + "epoch": 3.276696064300198, + "grad_norm": 9.64621503953822e-05, + "learning_rate": 6.405458345082477e-07, + "loss": 0.0001, + "num_input_tokens_seen": 90399824, + "step": 134125 + }, + { + "epoch": 3.276818215132045, + "grad_norm": 0.010716472752392292, + "learning_rate": 6.404662579368178e-07, + "loss": 0.0002, + "num_input_tokens_seen": 90403344, + "step": 134130 + }, + { + "epoch": 3.2769403659638923, + "grad_norm": 88.94921112060547, + "learning_rate": 6.403866839800141e-07, + "loss": 0.1229, + "num_input_tokens_seen": 90406416, + "step": 134135 + }, + { + "epoch": 3.2770625167957395, + "grad_norm": 0.004718456882983446, + "learning_rate": 6.403071126384154e-07, + "loss": 0.0001, + "num_input_tokens_seen": 90410064, + "step": 134140 + }, + { + "epoch": 3.2771846676275866, + "grad_norm": 0.0020348127000033855, + "learning_rate": 6.402275439126e-07, + "loss": 0.0, + "num_input_tokens_seen": 90413328, + "step": 134145 + }, + { + "epoch": 3.277306818459434, + "grad_norm": 0.0047275470569729805, + "learning_rate": 6.401479778031467e-07, + "loss": 0.0, + "num_input_tokens_seen": 90416592, + "step": 134150 + }, + { + "epoch": 3.277428969291281, + "grad_norm": 0.002068186178803444, + "learning_rate": 6.40068414310634e-07, + "loss": 0.0, + "num_input_tokens_seen": 90419664, + "step": 134155 + }, + { + "epoch": 3.277551120123128, + "grad_norm": 0.002157870912924409, + "learning_rate": 6.399888534356404e-07, + "loss": 0.0, + "num_input_tokens_seen": 90422928, + "step": 134160 + }, + { + "epoch": 3.277673270954975, + "grad_norm": 0.006738911848515272, + "learning_rate": 6.399092951787451e-07, + "loss": 0.0, + "num_input_tokens_seen": 90426256, + "step": 134165 + }, + { + "epoch": 3.2777954217868226, + "grad_norm": 0.01319417729973793, + "learning_rate": 6.398297395405259e-07, + "loss": 0.0, + "num_input_tokens_seen": 90429712, + "step": 134170 + }, + { + "epoch": 3.2779175726186693, + "grad_norm": 298.2821350097656, + "learning_rate": 6.39750186521562e-07, + "loss": 0.0214, + "num_input_tokens_seen": 90432912, + "step": 134175 + }, + { + "epoch": 3.2780397234505165, + "grad_norm": 0.002581668319180608, + "learning_rate": 6.396706361224313e-07, + "loss": 0.0, + "num_input_tokens_seen": 90436624, + "step": 134180 + }, + { + "epoch": 3.2781618742823637, + "grad_norm": 0.00018515576084610075, + "learning_rate": 6.395910883437132e-07, + "loss": 0.0, + "num_input_tokens_seen": 90440016, + "step": 134185 + }, + { + "epoch": 3.278284025114211, + "grad_norm": 0.0025104996748268604, + "learning_rate": 6.395115431859851e-07, + "loss": 0.0, + "num_input_tokens_seen": 90443344, + "step": 134190 + }, + { + "epoch": 3.278406175946058, + "grad_norm": 0.01018584705889225, + "learning_rate": 6.394320006498262e-07, + "loss": 0.0, + "num_input_tokens_seen": 90446608, + "step": 134195 + }, + { + "epoch": 3.2785283267779053, + "grad_norm": 0.48450037837028503, + "learning_rate": 6.393524607358149e-07, + "loss": 0.006, + "num_input_tokens_seen": 90449808, + "step": 134200 + }, + { + "epoch": 3.2786504776097525, + "grad_norm": 0.00024042440054472536, + "learning_rate": 6.392729234445293e-07, + "loss": 0.0, + "num_input_tokens_seen": 90454352, + "step": 134205 + }, + { + "epoch": 3.2787726284415997, + "grad_norm": 0.01791437901556492, + "learning_rate": 6.391933887765484e-07, + "loss": 0.0001, + "num_input_tokens_seen": 90458128, + "step": 134210 + }, + { + "epoch": 3.278894779273447, + "grad_norm": 0.034773245453834534, + "learning_rate": 6.391138567324497e-07, + "loss": 0.0, + "num_input_tokens_seen": 90461776, + "step": 134215 + }, + { + "epoch": 3.279016930105294, + "grad_norm": 0.0008298733155243099, + "learning_rate": 6.390343273128123e-07, + "loss": 0.0, + "num_input_tokens_seen": 90465168, + "step": 134220 + }, + { + "epoch": 3.2791390809371412, + "grad_norm": 0.00022249763424042612, + "learning_rate": 6.389548005182146e-07, + "loss": 0.0001, + "num_input_tokens_seen": 90468304, + "step": 134225 + }, + { + "epoch": 3.2792612317689884, + "grad_norm": 0.0016737374244257808, + "learning_rate": 6.388752763492344e-07, + "loss": 0.0544, + "num_input_tokens_seen": 90471760, + "step": 134230 + }, + { + "epoch": 3.2793833826008356, + "grad_norm": 0.0006355304503813386, + "learning_rate": 6.387957548064505e-07, + "loss": 0.129, + "num_input_tokens_seen": 90474896, + "step": 134235 + }, + { + "epoch": 3.279505533432683, + "grad_norm": 0.00043898748117499053, + "learning_rate": 6.387162358904408e-07, + "loss": 0.0568, + "num_input_tokens_seen": 90478096, + "step": 134240 + }, + { + "epoch": 3.27962768426453, + "grad_norm": 0.002303024521097541, + "learning_rate": 6.386367196017842e-07, + "loss": 0.0546, + "num_input_tokens_seen": 90482064, + "step": 134245 + }, + { + "epoch": 3.279749835096377, + "grad_norm": 0.11519376933574677, + "learning_rate": 6.385572059410583e-07, + "loss": 0.0775, + "num_input_tokens_seen": 90485456, + "step": 134250 + }, + { + "epoch": 3.2798719859282244, + "grad_norm": 0.0054914443753659725, + "learning_rate": 6.384776949088416e-07, + "loss": 0.0155, + "num_input_tokens_seen": 90488464, + "step": 134255 + }, + { + "epoch": 3.279994136760071, + "grad_norm": 0.024513721466064453, + "learning_rate": 6.383981865057125e-07, + "loss": 0.0, + "num_input_tokens_seen": 90491920, + "step": 134260 + }, + { + "epoch": 3.2801162875919183, + "grad_norm": 0.07861457020044327, + "learning_rate": 6.383186807322488e-07, + "loss": 0.0, + "num_input_tokens_seen": 90495504, + "step": 134265 + }, + { + "epoch": 3.2802384384237655, + "grad_norm": 0.002974571194499731, + "learning_rate": 6.382391775890293e-07, + "loss": 0.1083, + "num_input_tokens_seen": 90498896, + "step": 134270 + }, + { + "epoch": 3.2803605892556127, + "grad_norm": 0.009940390475094318, + "learning_rate": 6.381596770766313e-07, + "loss": 0.0001, + "num_input_tokens_seen": 90502480, + "step": 134275 + }, + { + "epoch": 3.28048274008746, + "grad_norm": 0.8798280954360962, + "learning_rate": 6.380801791956341e-07, + "loss": 0.0007, + "num_input_tokens_seen": 90505552, + "step": 134280 + }, + { + "epoch": 3.280604890919307, + "grad_norm": 0.007819697260856628, + "learning_rate": 6.380006839466146e-07, + "loss": 0.0008, + "num_input_tokens_seen": 90509008, + "step": 134285 + }, + { + "epoch": 3.2807270417511543, + "grad_norm": 0.001056055654771626, + "learning_rate": 6.379211913301514e-07, + "loss": 0.0192, + "num_input_tokens_seen": 90512592, + "step": 134290 + }, + { + "epoch": 3.2808491925830015, + "grad_norm": 0.015260012820363045, + "learning_rate": 6.378417013468233e-07, + "loss": 0.0, + "num_input_tokens_seen": 90515536, + "step": 134295 + }, + { + "epoch": 3.2809713434148486, + "grad_norm": 0.0004757667484227568, + "learning_rate": 6.377622139972074e-07, + "loss": 0.0, + "num_input_tokens_seen": 90518800, + "step": 134300 + }, + { + "epoch": 3.281093494246696, + "grad_norm": 0.014724750071763992, + "learning_rate": 6.376827292818822e-07, + "loss": 0.0001, + "num_input_tokens_seen": 90522000, + "step": 134305 + }, + { + "epoch": 3.281215645078543, + "grad_norm": 0.001603720709681511, + "learning_rate": 6.376032472014256e-07, + "loss": 0.0, + "num_input_tokens_seen": 90525200, + "step": 134310 + }, + { + "epoch": 3.28133779591039, + "grad_norm": 13.405024528503418, + "learning_rate": 6.375237677564154e-07, + "loss": 0.029, + "num_input_tokens_seen": 90528272, + "step": 134315 + }, + { + "epoch": 3.2814599467422374, + "grad_norm": 0.00246247136965394, + "learning_rate": 6.374442909474304e-07, + "loss": 0.0, + "num_input_tokens_seen": 90531216, + "step": 134320 + }, + { + "epoch": 3.2815820975740846, + "grad_norm": 0.002576568629592657, + "learning_rate": 6.373648167750475e-07, + "loss": 0.0001, + "num_input_tokens_seen": 90534672, + "step": 134325 + }, + { + "epoch": 3.281704248405932, + "grad_norm": 0.004765174351632595, + "learning_rate": 6.372853452398457e-07, + "loss": 0.0, + "num_input_tokens_seen": 90538000, + "step": 134330 + }, + { + "epoch": 3.281826399237779, + "grad_norm": 0.07081875205039978, + "learning_rate": 6.37205876342402e-07, + "loss": 0.0002, + "num_input_tokens_seen": 90541392, + "step": 134335 + }, + { + "epoch": 3.281948550069626, + "grad_norm": 0.35364988446235657, + "learning_rate": 6.371264100832951e-07, + "loss": 0.057, + "num_input_tokens_seen": 90544976, + "step": 134340 + }, + { + "epoch": 3.282070700901473, + "grad_norm": 0.006533215753734112, + "learning_rate": 6.370469464631021e-07, + "loss": 0.0, + "num_input_tokens_seen": 90547920, + "step": 134345 + }, + { + "epoch": 3.2821928517333205, + "grad_norm": 0.04874301329255104, + "learning_rate": 6.369674854824017e-07, + "loss": 0.0, + "num_input_tokens_seen": 90550992, + "step": 134350 + }, + { + "epoch": 3.2823150025651673, + "grad_norm": 0.0069482699036598206, + "learning_rate": 6.368880271417712e-07, + "loss": 0.0001, + "num_input_tokens_seen": 90553872, + "step": 134355 + }, + { + "epoch": 3.2824371533970145, + "grad_norm": 0.0007645434234291315, + "learning_rate": 6.368085714417888e-07, + "loss": 0.0, + "num_input_tokens_seen": 90557584, + "step": 134360 + }, + { + "epoch": 3.2825593042288617, + "grad_norm": 0.0026880523655563593, + "learning_rate": 6.367291183830322e-07, + "loss": 0.0, + "num_input_tokens_seen": 90561104, + "step": 134365 + }, + { + "epoch": 3.282681455060709, + "grad_norm": 40.059661865234375, + "learning_rate": 6.366496679660789e-07, + "loss": 0.0144, + "num_input_tokens_seen": 90564560, + "step": 134370 + }, + { + "epoch": 3.282803605892556, + "grad_norm": 0.848136305809021, + "learning_rate": 6.365702201915069e-07, + "loss": 0.0507, + "num_input_tokens_seen": 90567632, + "step": 134375 + }, + { + "epoch": 3.2829257567244032, + "grad_norm": 0.01674298197031021, + "learning_rate": 6.364907750598942e-07, + "loss": 0.0, + "num_input_tokens_seen": 90571088, + "step": 134380 + }, + { + "epoch": 3.2830479075562504, + "grad_norm": 0.0930301696062088, + "learning_rate": 6.364113325718183e-07, + "loss": 0.0, + "num_input_tokens_seen": 90574416, + "step": 134385 + }, + { + "epoch": 3.2831700583880976, + "grad_norm": 0.024816663935780525, + "learning_rate": 6.363318927278571e-07, + "loss": 0.0, + "num_input_tokens_seen": 90577744, + "step": 134390 + }, + { + "epoch": 3.283292209219945, + "grad_norm": 0.002348346170037985, + "learning_rate": 6.36252455528588e-07, + "loss": 0.0395, + "num_input_tokens_seen": 90580752, + "step": 134395 + }, + { + "epoch": 3.283414360051792, + "grad_norm": 0.0015244438545778394, + "learning_rate": 6.361730209745891e-07, + "loss": 0.0001, + "num_input_tokens_seen": 90583888, + "step": 134400 + }, + { + "epoch": 3.283536510883639, + "grad_norm": 0.005531086586415768, + "learning_rate": 6.360935890664376e-07, + "loss": 0.0, + "num_input_tokens_seen": 90587408, + "step": 134405 + }, + { + "epoch": 3.2836586617154864, + "grad_norm": 0.030012428760528564, + "learning_rate": 6.360141598047115e-07, + "loss": 0.0, + "num_input_tokens_seen": 90591248, + "step": 134410 + }, + { + "epoch": 3.2837808125473336, + "grad_norm": 0.40428632497787476, + "learning_rate": 6.359347331899887e-07, + "loss": 0.0001, + "num_input_tokens_seen": 90595280, + "step": 134415 + }, + { + "epoch": 3.2839029633791808, + "grad_norm": 0.03745681792497635, + "learning_rate": 6.358553092228458e-07, + "loss": 0.0, + "num_input_tokens_seen": 90598288, + "step": 134420 + }, + { + "epoch": 3.284025114211028, + "grad_norm": 0.01733500324189663, + "learning_rate": 6.357758879038617e-07, + "loss": 0.0001, + "num_input_tokens_seen": 90601488, + "step": 134425 + }, + { + "epoch": 3.284147265042875, + "grad_norm": 0.0021801122929900885, + "learning_rate": 6.356964692336127e-07, + "loss": 0.0, + "num_input_tokens_seen": 90604944, + "step": 134430 + }, + { + "epoch": 3.2842694158747223, + "grad_norm": 0.00015866855392232537, + "learning_rate": 6.356170532126774e-07, + "loss": 0.0, + "num_input_tokens_seen": 90608272, + "step": 134435 + }, + { + "epoch": 3.284391566706569, + "grad_norm": 86.30461120605469, + "learning_rate": 6.355376398416325e-07, + "loss": 0.0718, + "num_input_tokens_seen": 90611408, + "step": 134440 + }, + { + "epoch": 3.2845137175384163, + "grad_norm": 0.0021867312025278807, + "learning_rate": 6.354582291210559e-07, + "loss": 0.0, + "num_input_tokens_seen": 90614288, + "step": 134445 + }, + { + "epoch": 3.2846358683702634, + "grad_norm": 0.0006792885833419859, + "learning_rate": 6.353788210515255e-07, + "loss": 0.0, + "num_input_tokens_seen": 90617488, + "step": 134450 + }, + { + "epoch": 3.2847580192021106, + "grad_norm": 0.01702149212360382, + "learning_rate": 6.352994156336182e-07, + "loss": 0.0, + "num_input_tokens_seen": 90620688, + "step": 134455 + }, + { + "epoch": 3.284880170033958, + "grad_norm": 0.0014157212572172284, + "learning_rate": 6.352200128679117e-07, + "loss": 0.0001, + "num_input_tokens_seen": 90623888, + "step": 134460 + }, + { + "epoch": 3.285002320865805, + "grad_norm": 0.00022495303710456938, + "learning_rate": 6.351406127549834e-07, + "loss": 0.0, + "num_input_tokens_seen": 90627088, + "step": 134465 + }, + { + "epoch": 3.285124471697652, + "grad_norm": 159.6038360595703, + "learning_rate": 6.350612152954105e-07, + "loss": 0.0512, + "num_input_tokens_seen": 90631184, + "step": 134470 + }, + { + "epoch": 3.2852466225294994, + "grad_norm": 19.899839401245117, + "learning_rate": 6.349818204897708e-07, + "loss": 0.0382, + "num_input_tokens_seen": 90634576, + "step": 134475 + }, + { + "epoch": 3.2853687733613466, + "grad_norm": 0.0002656186406966299, + "learning_rate": 6.349024283386413e-07, + "loss": 0.0, + "num_input_tokens_seen": 90638032, + "step": 134480 + }, + { + "epoch": 3.2854909241931938, + "grad_norm": 0.0022826252970844507, + "learning_rate": 6.348230388425999e-07, + "loss": 0.0, + "num_input_tokens_seen": 90641424, + "step": 134485 + }, + { + "epoch": 3.285613075025041, + "grad_norm": 0.0040170783177018166, + "learning_rate": 6.347436520022231e-07, + "loss": 0.0, + "num_input_tokens_seen": 90644688, + "step": 134490 + }, + { + "epoch": 3.285735225856888, + "grad_norm": 0.02008282206952572, + "learning_rate": 6.346642678180891e-07, + "loss": 0.0134, + "num_input_tokens_seen": 90648016, + "step": 134495 + }, + { + "epoch": 3.2858573766887353, + "grad_norm": 0.001128724543377757, + "learning_rate": 6.345848862907746e-07, + "loss": 0.0007, + "num_input_tokens_seen": 90651344, + "step": 134500 + }, + { + "epoch": 3.2859795275205825, + "grad_norm": 0.0005452800542116165, + "learning_rate": 6.34505507420857e-07, + "loss": 0.0, + "num_input_tokens_seen": 90654864, + "step": 134505 + }, + { + "epoch": 3.2861016783524297, + "grad_norm": 0.026881614699959755, + "learning_rate": 6.344261312089138e-07, + "loss": 0.0, + "num_input_tokens_seen": 90658576, + "step": 134510 + }, + { + "epoch": 3.286223829184277, + "grad_norm": 0.003952720668166876, + "learning_rate": 6.343467576555222e-07, + "loss": 0.0002, + "num_input_tokens_seen": 90661648, + "step": 134515 + }, + { + "epoch": 3.286345980016124, + "grad_norm": 0.23101937770843506, + "learning_rate": 6.342673867612594e-07, + "loss": 0.0001, + "num_input_tokens_seen": 90665424, + "step": 134520 + }, + { + "epoch": 3.286468130847971, + "grad_norm": 0.10918232798576355, + "learning_rate": 6.341880185267021e-07, + "loss": 0.0001, + "num_input_tokens_seen": 90668624, + "step": 134525 + }, + { + "epoch": 3.2865902816798185, + "grad_norm": 0.0025677045341581106, + "learning_rate": 6.34108652952428e-07, + "loss": 0.0, + "num_input_tokens_seen": 90671952, + "step": 134530 + }, + { + "epoch": 3.2867124325116652, + "grad_norm": 0.04821763187646866, + "learning_rate": 6.340292900390146e-07, + "loss": 0.0, + "num_input_tokens_seen": 90675536, + "step": 134535 + }, + { + "epoch": 3.2868345833435124, + "grad_norm": 0.0005546039319597185, + "learning_rate": 6.339499297870382e-07, + "loss": 0.0, + "num_input_tokens_seen": 90678416, + "step": 134540 + }, + { + "epoch": 3.2869567341753596, + "grad_norm": 0.0003405036695767194, + "learning_rate": 6.338705721970768e-07, + "loss": 0.0, + "num_input_tokens_seen": 90681616, + "step": 134545 + }, + { + "epoch": 3.287078885007207, + "grad_norm": 0.0017196012195199728, + "learning_rate": 6.337912172697066e-07, + "loss": 0.0, + "num_input_tokens_seen": 90685072, + "step": 134550 + }, + { + "epoch": 3.287201035839054, + "grad_norm": 6.0141754150390625, + "learning_rate": 6.337118650055056e-07, + "loss": 0.0003, + "num_input_tokens_seen": 90688080, + "step": 134555 + }, + { + "epoch": 3.287323186670901, + "grad_norm": 0.010194681584835052, + "learning_rate": 6.336325154050502e-07, + "loss": 0.0002, + "num_input_tokens_seen": 90691280, + "step": 134560 + }, + { + "epoch": 3.2874453375027484, + "grad_norm": 0.0034128446131944656, + "learning_rate": 6.335531684689177e-07, + "loss": 0.0, + "num_input_tokens_seen": 90694928, + "step": 134565 + }, + { + "epoch": 3.2875674883345956, + "grad_norm": 0.004803723655641079, + "learning_rate": 6.334738241976854e-07, + "loss": 0.0319, + "num_input_tokens_seen": 90698448, + "step": 134570 + }, + { + "epoch": 3.2876896391664427, + "grad_norm": 0.00041958095971494913, + "learning_rate": 6.333944825919295e-07, + "loss": 0.0001, + "num_input_tokens_seen": 90701648, + "step": 134575 + }, + { + "epoch": 3.28781178999829, + "grad_norm": 0.0028518533799797297, + "learning_rate": 6.333151436522282e-07, + "loss": 0.0, + "num_input_tokens_seen": 90705424, + "step": 134580 + }, + { + "epoch": 3.287933940830137, + "grad_norm": 0.0027906023897230625, + "learning_rate": 6.33235807379157e-07, + "loss": 0.0, + "num_input_tokens_seen": 90708880, + "step": 134585 + }, + { + "epoch": 3.2880560916619843, + "grad_norm": 31.440628051757812, + "learning_rate": 6.331564737732944e-07, + "loss": 0.1364, + "num_input_tokens_seen": 90711888, + "step": 134590 + }, + { + "epoch": 3.2881782424938315, + "grad_norm": 49.43909454345703, + "learning_rate": 6.330771428352161e-07, + "loss": 0.0446, + "num_input_tokens_seen": 90714768, + "step": 134595 + }, + { + "epoch": 3.2883003933256787, + "grad_norm": 0.00022165839618537575, + "learning_rate": 6.329978145654994e-07, + "loss": 0.0, + "num_input_tokens_seen": 90717904, + "step": 134600 + }, + { + "epoch": 3.288422544157526, + "grad_norm": 0.0024501916486769915, + "learning_rate": 6.329184889647219e-07, + "loss": 0.0, + "num_input_tokens_seen": 90722000, + "step": 134605 + }, + { + "epoch": 3.2885446949893726, + "grad_norm": 0.1524658054113388, + "learning_rate": 6.328391660334596e-07, + "loss": 0.0001, + "num_input_tokens_seen": 90725008, + "step": 134610 + }, + { + "epoch": 3.2886668458212203, + "grad_norm": 569.6676635742188, + "learning_rate": 6.327598457722896e-07, + "loss": 0.0378, + "num_input_tokens_seen": 90728336, + "step": 134615 + }, + { + "epoch": 3.288788996653067, + "grad_norm": 0.019205080345273018, + "learning_rate": 6.326805281817887e-07, + "loss": 0.0, + "num_input_tokens_seen": 90731792, + "step": 134620 + }, + { + "epoch": 3.288911147484914, + "grad_norm": 0.0007060529896989465, + "learning_rate": 6.326012132625338e-07, + "loss": 0.0, + "num_input_tokens_seen": 90735504, + "step": 134625 + }, + { + "epoch": 3.2890332983167614, + "grad_norm": 0.0064339907839894295, + "learning_rate": 6.32521901015102e-07, + "loss": 0.0, + "num_input_tokens_seen": 90738960, + "step": 134630 + }, + { + "epoch": 3.2891554491486086, + "grad_norm": 0.0008209880325011909, + "learning_rate": 6.324425914400693e-07, + "loss": 0.0, + "num_input_tokens_seen": 90741840, + "step": 134635 + }, + { + "epoch": 3.2892775999804558, + "grad_norm": 0.0007558853249065578, + "learning_rate": 6.323632845380134e-07, + "loss": 0.0028, + "num_input_tokens_seen": 90745104, + "step": 134640 + }, + { + "epoch": 3.289399750812303, + "grad_norm": 0.0004715778341051191, + "learning_rate": 6.322839803095102e-07, + "loss": 0.0, + "num_input_tokens_seen": 90748432, + "step": 134645 + }, + { + "epoch": 3.28952190164415, + "grad_norm": 0.0012815663358196616, + "learning_rate": 6.322046787551372e-07, + "loss": 0.0, + "num_input_tokens_seen": 90751440, + "step": 134650 + }, + { + "epoch": 3.2896440524759973, + "grad_norm": 0.0019647274166345596, + "learning_rate": 6.321253798754702e-07, + "loss": 0.0, + "num_input_tokens_seen": 90755024, + "step": 134655 + }, + { + "epoch": 3.2897662033078445, + "grad_norm": 0.9728825688362122, + "learning_rate": 6.320460836710866e-07, + "loss": 0.0002, + "num_input_tokens_seen": 90758544, + "step": 134660 + }, + { + "epoch": 3.2898883541396917, + "grad_norm": 0.014007066376507282, + "learning_rate": 6.319667901425629e-07, + "loss": 0.0, + "num_input_tokens_seen": 90761936, + "step": 134665 + }, + { + "epoch": 3.290010504971539, + "grad_norm": 0.001597664668224752, + "learning_rate": 6.318874992904757e-07, + "loss": 0.0, + "num_input_tokens_seen": 90765008, + "step": 134670 + }, + { + "epoch": 3.290132655803386, + "grad_norm": 0.13420917093753815, + "learning_rate": 6.318082111154018e-07, + "loss": 0.0, + "num_input_tokens_seen": 90768336, + "step": 134675 + }, + { + "epoch": 3.2902548066352333, + "grad_norm": 0.0011054584756493568, + "learning_rate": 6.317289256179172e-07, + "loss": 0.0, + "num_input_tokens_seen": 90771536, + "step": 134680 + }, + { + "epoch": 3.2903769574670805, + "grad_norm": 0.014804008416831493, + "learning_rate": 6.316496427985995e-07, + "loss": 0.0004, + "num_input_tokens_seen": 90774928, + "step": 134685 + }, + { + "epoch": 3.2904991082989277, + "grad_norm": 0.0023647164925932884, + "learning_rate": 6.31570362658024e-07, + "loss": 0.0083, + "num_input_tokens_seen": 90778320, + "step": 134690 + }, + { + "epoch": 3.290621259130775, + "grad_norm": 0.001251070643775165, + "learning_rate": 6.31491085196768e-07, + "loss": 0.0, + "num_input_tokens_seen": 90781520, + "step": 134695 + }, + { + "epoch": 3.290743409962622, + "grad_norm": 0.0034466327633708715, + "learning_rate": 6.314118104154084e-07, + "loss": 0.0, + "num_input_tokens_seen": 90784784, + "step": 134700 + }, + { + "epoch": 3.290865560794469, + "grad_norm": 0.00014235633716452867, + "learning_rate": 6.313325383145208e-07, + "loss": 0.0001, + "num_input_tokens_seen": 90787664, + "step": 134705 + }, + { + "epoch": 3.290987711626316, + "grad_norm": 0.03204859048128128, + "learning_rate": 6.312532688946826e-07, + "loss": 0.0, + "num_input_tokens_seen": 90790800, + "step": 134710 + }, + { + "epoch": 3.291109862458163, + "grad_norm": 0.05829733610153198, + "learning_rate": 6.311740021564693e-07, + "loss": 0.0, + "num_input_tokens_seen": 90793872, + "step": 134715 + }, + { + "epoch": 3.2912320132900104, + "grad_norm": 0.0008750380948185921, + "learning_rate": 6.310947381004582e-07, + "loss": 0.0001, + "num_input_tokens_seen": 90797136, + "step": 134720 + }, + { + "epoch": 3.2913541641218576, + "grad_norm": 0.0014805016107857227, + "learning_rate": 6.310154767272255e-07, + "loss": 0.0, + "num_input_tokens_seen": 90800528, + "step": 134725 + }, + { + "epoch": 3.2914763149537047, + "grad_norm": 0.008990327827632427, + "learning_rate": 6.309362180373472e-07, + "loss": 0.0609, + "num_input_tokens_seen": 90806800, + "step": 134730 + }, + { + "epoch": 3.291598465785552, + "grad_norm": 0.0007601430988870561, + "learning_rate": 6.308569620314003e-07, + "loss": 0.0, + "num_input_tokens_seen": 90810128, + "step": 134735 + }, + { + "epoch": 3.291720616617399, + "grad_norm": 0.0013207076117396355, + "learning_rate": 6.307777087099603e-07, + "loss": 0.0, + "num_input_tokens_seen": 90813008, + "step": 134740 + }, + { + "epoch": 3.2918427674492463, + "grad_norm": 0.0012732624309137464, + "learning_rate": 6.306984580736048e-07, + "loss": 0.0299, + "num_input_tokens_seen": 90816208, + "step": 134745 + }, + { + "epoch": 3.2919649182810935, + "grad_norm": 0.0010163038969039917, + "learning_rate": 6.306192101229089e-07, + "loss": 0.0011, + "num_input_tokens_seen": 90819664, + "step": 134750 + }, + { + "epoch": 3.2920870691129407, + "grad_norm": 0.00017418651259504259, + "learning_rate": 6.305399648584495e-07, + "loss": 0.0, + "num_input_tokens_seen": 90822864, + "step": 134755 + }, + { + "epoch": 3.292209219944788, + "grad_norm": 0.00439677806571126, + "learning_rate": 6.304607222808032e-07, + "loss": 0.0, + "num_input_tokens_seen": 90826064, + "step": 134760 + }, + { + "epoch": 3.292331370776635, + "grad_norm": 0.005836864467710257, + "learning_rate": 6.303814823905458e-07, + "loss": 0.0, + "num_input_tokens_seen": 90829072, + "step": 134765 + }, + { + "epoch": 3.2924535216084823, + "grad_norm": 0.0002580733271315694, + "learning_rate": 6.303022451882536e-07, + "loss": 0.0679, + "num_input_tokens_seen": 90832592, + "step": 134770 + }, + { + "epoch": 3.2925756724403294, + "grad_norm": 0.0020530004985630512, + "learning_rate": 6.30223010674503e-07, + "loss": 0.0, + "num_input_tokens_seen": 90836048, + "step": 134775 + }, + { + "epoch": 3.2926978232721766, + "grad_norm": 0.0024392507039010525, + "learning_rate": 6.301437788498698e-07, + "loss": 0.0, + "num_input_tokens_seen": 90839312, + "step": 134780 + }, + { + "epoch": 3.292819974104024, + "grad_norm": 0.7272111177444458, + "learning_rate": 6.30064549714931e-07, + "loss": 0.0002, + "num_input_tokens_seen": 90842576, + "step": 134785 + }, + { + "epoch": 3.2929421249358706, + "grad_norm": 0.02128804847598076, + "learning_rate": 6.299853232702619e-07, + "loss": 0.0, + "num_input_tokens_seen": 90845648, + "step": 134790 + }, + { + "epoch": 3.293064275767718, + "grad_norm": 0.0028478854801505804, + "learning_rate": 6.299060995164394e-07, + "loss": 0.056, + "num_input_tokens_seen": 90848784, + "step": 134795 + }, + { + "epoch": 3.293186426599565, + "grad_norm": 0.0012133775744587183, + "learning_rate": 6.298268784540389e-07, + "loss": 0.0, + "num_input_tokens_seen": 90852304, + "step": 134800 + }, + { + "epoch": 3.293308577431412, + "grad_norm": 0.030529484152793884, + "learning_rate": 6.297476600836374e-07, + "loss": 0.0, + "num_input_tokens_seen": 90855312, + "step": 134805 + }, + { + "epoch": 3.2934307282632593, + "grad_norm": 1.027057409286499, + "learning_rate": 6.2966844440581e-07, + "loss": 0.0003, + "num_input_tokens_seen": 90858768, + "step": 134810 + }, + { + "epoch": 3.2935528790951065, + "grad_norm": 0.0038657994009554386, + "learning_rate": 6.295892314211334e-07, + "loss": 0.0002, + "num_input_tokens_seen": 90862032, + "step": 134815 + }, + { + "epoch": 3.2936750299269537, + "grad_norm": 0.0011094954097643495, + "learning_rate": 6.295100211301836e-07, + "loss": 0.0001, + "num_input_tokens_seen": 90865552, + "step": 134820 + }, + { + "epoch": 3.293797180758801, + "grad_norm": 0.00212948489934206, + "learning_rate": 6.294308135335367e-07, + "loss": 0.0, + "num_input_tokens_seen": 90868688, + "step": 134825 + }, + { + "epoch": 3.293919331590648, + "grad_norm": 0.011899611912667751, + "learning_rate": 6.293516086317687e-07, + "loss": 0.0, + "num_input_tokens_seen": 90872016, + "step": 134830 + }, + { + "epoch": 3.2940414824224953, + "grad_norm": 0.005786948837339878, + "learning_rate": 6.292724064254551e-07, + "loss": 0.0, + "num_input_tokens_seen": 90875216, + "step": 134835 + }, + { + "epoch": 3.2941636332543425, + "grad_norm": 0.00014639328583143651, + "learning_rate": 6.291932069151726e-07, + "loss": 0.0001, + "num_input_tokens_seen": 90878288, + "step": 134840 + }, + { + "epoch": 3.2942857840861897, + "grad_norm": 0.0010571659076958895, + "learning_rate": 6.291140101014966e-07, + "loss": 0.0, + "num_input_tokens_seen": 90881616, + "step": 134845 + }, + { + "epoch": 3.294407934918037, + "grad_norm": 0.00961493793874979, + "learning_rate": 6.290348159850032e-07, + "loss": 0.0536, + "num_input_tokens_seen": 90884688, + "step": 134850 + }, + { + "epoch": 3.294530085749884, + "grad_norm": 0.0011426964774727821, + "learning_rate": 6.289556245662687e-07, + "loss": 0.0, + "num_input_tokens_seen": 90888720, + "step": 134855 + }, + { + "epoch": 3.2946522365817312, + "grad_norm": 0.060139358043670654, + "learning_rate": 6.288764358458685e-07, + "loss": 0.0001, + "num_input_tokens_seen": 90891728, + "step": 134860 + }, + { + "epoch": 3.2947743874135784, + "grad_norm": 0.004513459745794535, + "learning_rate": 6.287972498243788e-07, + "loss": 0.0, + "num_input_tokens_seen": 90895056, + "step": 134865 + }, + { + "epoch": 3.2948965382454256, + "grad_norm": 0.007571101188659668, + "learning_rate": 6.287180665023751e-07, + "loss": 0.0, + "num_input_tokens_seen": 90898320, + "step": 134870 + }, + { + "epoch": 3.295018689077273, + "grad_norm": 0.003237637458369136, + "learning_rate": 6.286388858804337e-07, + "loss": 0.0, + "num_input_tokens_seen": 90901840, + "step": 134875 + }, + { + "epoch": 3.29514083990912, + "grad_norm": 0.001679329783655703, + "learning_rate": 6.285597079591305e-07, + "loss": 0.0001, + "num_input_tokens_seen": 90905104, + "step": 134880 + }, + { + "epoch": 3.2952629907409667, + "grad_norm": 0.00029259934672154486, + "learning_rate": 6.284805327390404e-07, + "loss": 0.0, + "num_input_tokens_seen": 90908496, + "step": 134885 + }, + { + "epoch": 3.295385141572814, + "grad_norm": 0.003385191550478339, + "learning_rate": 6.284013602207403e-07, + "loss": 0.0, + "num_input_tokens_seen": 90911760, + "step": 134890 + }, + { + "epoch": 3.295507292404661, + "grad_norm": 0.0001985864364542067, + "learning_rate": 6.283221904048051e-07, + "loss": 0.0, + "num_input_tokens_seen": 90915024, + "step": 134895 + }, + { + "epoch": 3.2956294432365083, + "grad_norm": 0.004313347861170769, + "learning_rate": 6.282430232918112e-07, + "loss": 0.0773, + "num_input_tokens_seen": 90918288, + "step": 134900 + }, + { + "epoch": 3.2957515940683555, + "grad_norm": 0.0002821721136569977, + "learning_rate": 6.281638588823337e-07, + "loss": 0.0365, + "num_input_tokens_seen": 90921360, + "step": 134905 + }, + { + "epoch": 3.2958737449002027, + "grad_norm": 0.002907287096604705, + "learning_rate": 6.280846971769486e-07, + "loss": 0.0978, + "num_input_tokens_seen": 90924432, + "step": 134910 + }, + { + "epoch": 3.29599589573205, + "grad_norm": 0.0004542749666143209, + "learning_rate": 6.280055381762319e-07, + "loss": 0.0001, + "num_input_tokens_seen": 90928144, + "step": 134915 + }, + { + "epoch": 3.296118046563897, + "grad_norm": 0.004509879741817713, + "learning_rate": 6.27926381880759e-07, + "loss": 0.0002, + "num_input_tokens_seen": 90931600, + "step": 134920 + }, + { + "epoch": 3.2962401973957443, + "grad_norm": 0.012882355600595474, + "learning_rate": 6.278472282911054e-07, + "loss": 0.0003, + "num_input_tokens_seen": 90934672, + "step": 134925 + }, + { + "epoch": 3.2963623482275914, + "grad_norm": 0.000582054490223527, + "learning_rate": 6.277680774078469e-07, + "loss": 0.0, + "num_input_tokens_seen": 90938256, + "step": 134930 + }, + { + "epoch": 3.2964844990594386, + "grad_norm": 0.0010208197636529803, + "learning_rate": 6.276889292315588e-07, + "loss": 0.0643, + "num_input_tokens_seen": 90941456, + "step": 134935 + }, + { + "epoch": 3.296606649891286, + "grad_norm": 0.004477804992347956, + "learning_rate": 6.276097837628174e-07, + "loss": 0.0, + "num_input_tokens_seen": 90944656, + "step": 134940 + }, + { + "epoch": 3.296728800723133, + "grad_norm": 0.007399188820272684, + "learning_rate": 6.275306410021974e-07, + "loss": 0.0565, + "num_input_tokens_seen": 90947856, + "step": 134945 + }, + { + "epoch": 3.29685095155498, + "grad_norm": 0.05458146706223488, + "learning_rate": 6.274515009502751e-07, + "loss": 0.0555, + "num_input_tokens_seen": 90951376, + "step": 134950 + }, + { + "epoch": 3.2969731023868274, + "grad_norm": 0.0012545207282528281, + "learning_rate": 6.273723636076254e-07, + "loss": 0.0, + "num_input_tokens_seen": 90954832, + "step": 134955 + }, + { + "epoch": 3.2970952532186746, + "grad_norm": 0.007827935740351677, + "learning_rate": 6.272932289748244e-07, + "loss": 0.0005, + "num_input_tokens_seen": 90958224, + "step": 134960 + }, + { + "epoch": 3.2972174040505218, + "grad_norm": 0.0029065613634884357, + "learning_rate": 6.272140970524469e-07, + "loss": 0.0, + "num_input_tokens_seen": 90961552, + "step": 134965 + }, + { + "epoch": 3.2973395548823685, + "grad_norm": 0.002743184333667159, + "learning_rate": 6.27134967841069e-07, + "loss": 0.0001, + "num_input_tokens_seen": 90964880, + "step": 134970 + }, + { + "epoch": 3.297461705714216, + "grad_norm": 0.0002498124958947301, + "learning_rate": 6.270558413412659e-07, + "loss": 0.0, + "num_input_tokens_seen": 90968208, + "step": 134975 + }, + { + "epoch": 3.297583856546063, + "grad_norm": 0.0028212815523147583, + "learning_rate": 6.26976717553613e-07, + "loss": 0.0001, + "num_input_tokens_seen": 90971536, + "step": 134980 + }, + { + "epoch": 3.29770600737791, + "grad_norm": 0.014758952893316746, + "learning_rate": 6.26897596478686e-07, + "loss": 0.0489, + "num_input_tokens_seen": 90974672, + "step": 134985 + }, + { + "epoch": 3.2978281582097573, + "grad_norm": 0.004514573607593775, + "learning_rate": 6.268184781170596e-07, + "loss": 0.0003, + "num_input_tokens_seen": 90978320, + "step": 134990 + }, + { + "epoch": 3.2979503090416045, + "grad_norm": 0.0010999558726325631, + "learning_rate": 6.2673936246931e-07, + "loss": 0.0, + "num_input_tokens_seen": 90981968, + "step": 134995 + }, + { + "epoch": 3.2980724598734517, + "grad_norm": 0.004744267091155052, + "learning_rate": 6.266602495360116e-07, + "loss": 0.0, + "num_input_tokens_seen": 90985040, + "step": 135000 + }, + { + "epoch": 3.298194610705299, + "grad_norm": 0.0059790643863379955, + "learning_rate": 6.265811393177405e-07, + "loss": 0.0, + "num_input_tokens_seen": 90988240, + "step": 135005 + }, + { + "epoch": 3.298316761537146, + "grad_norm": 0.022460024803876877, + "learning_rate": 6.265020318150721e-07, + "loss": 0.0, + "num_input_tokens_seen": 90991696, + "step": 135010 + }, + { + "epoch": 3.2984389123689932, + "grad_norm": 0.003150185802951455, + "learning_rate": 6.26422927028581e-07, + "loss": 0.0, + "num_input_tokens_seen": 90995024, + "step": 135015 + }, + { + "epoch": 3.2985610632008404, + "grad_norm": 0.001140235923230648, + "learning_rate": 6.263438249588433e-07, + "loss": 0.0, + "num_input_tokens_seen": 90998672, + "step": 135020 + }, + { + "epoch": 3.2986832140326876, + "grad_norm": 4.278022606740706e-05, + "learning_rate": 6.262647256064333e-07, + "loss": 0.0, + "num_input_tokens_seen": 91002384, + "step": 135025 + }, + { + "epoch": 3.298805364864535, + "grad_norm": 0.010379807092249393, + "learning_rate": 6.26185628971927e-07, + "loss": 0.0, + "num_input_tokens_seen": 91005648, + "step": 135030 + }, + { + "epoch": 3.298927515696382, + "grad_norm": 0.007365286350250244, + "learning_rate": 6.261065350558996e-07, + "loss": 0.0, + "num_input_tokens_seen": 91009040, + "step": 135035 + }, + { + "epoch": 3.299049666528229, + "grad_norm": 0.05299066752195358, + "learning_rate": 6.260274438589254e-07, + "loss": 0.0, + "num_input_tokens_seen": 91012688, + "step": 135040 + }, + { + "epoch": 3.2991718173600764, + "grad_norm": 0.002590995281934738, + "learning_rate": 6.25948355381581e-07, + "loss": 0.0, + "num_input_tokens_seen": 91016272, + "step": 135045 + }, + { + "epoch": 3.2992939681919236, + "grad_norm": 0.019199777394533157, + "learning_rate": 6.258692696244401e-07, + "loss": 0.0, + "num_input_tokens_seen": 91019792, + "step": 135050 + }, + { + "epoch": 3.2994161190237707, + "grad_norm": 0.009994552470743656, + "learning_rate": 6.257901865880791e-07, + "loss": 0.0, + "num_input_tokens_seen": 91022992, + "step": 135055 + }, + { + "epoch": 3.299538269855618, + "grad_norm": 0.006808954291045666, + "learning_rate": 6.257111062730718e-07, + "loss": 0.0, + "num_input_tokens_seen": 91026192, + "step": 135060 + }, + { + "epoch": 3.2996604206874647, + "grad_norm": 0.01960168033838272, + "learning_rate": 6.256320286799944e-07, + "loss": 0.0685, + "num_input_tokens_seen": 91029200, + "step": 135065 + }, + { + "epoch": 3.299782571519312, + "grad_norm": 0.0008817182388156652, + "learning_rate": 6.255529538094216e-07, + "loss": 0.0, + "num_input_tokens_seen": 91032272, + "step": 135070 + }, + { + "epoch": 3.299904722351159, + "grad_norm": 0.00037948612589389086, + "learning_rate": 6.254738816619285e-07, + "loss": 0.0, + "num_input_tokens_seen": 91035664, + "step": 135075 + }, + { + "epoch": 3.3000268731830062, + "grad_norm": 0.0006895409314893186, + "learning_rate": 6.253948122380898e-07, + "loss": 0.0, + "num_input_tokens_seen": 91039248, + "step": 135080 + }, + { + "epoch": 3.3001490240148534, + "grad_norm": 0.0011778641492128372, + "learning_rate": 6.25315745538481e-07, + "loss": 0.0, + "num_input_tokens_seen": 91043216, + "step": 135085 + }, + { + "epoch": 3.3002711748467006, + "grad_norm": 0.0011863255640491843, + "learning_rate": 6.252366815636767e-07, + "loss": 0.0, + "num_input_tokens_seen": 91046992, + "step": 135090 + }, + { + "epoch": 3.300393325678548, + "grad_norm": 0.0013244269648566842, + "learning_rate": 6.251576203142524e-07, + "loss": 0.0, + "num_input_tokens_seen": 91050192, + "step": 135095 + }, + { + "epoch": 3.300515476510395, + "grad_norm": 0.04680801182985306, + "learning_rate": 6.250785617907822e-07, + "loss": 0.0, + "num_input_tokens_seen": 91053456, + "step": 135100 + }, + { + "epoch": 3.300637627342242, + "grad_norm": 0.0024712015874683857, + "learning_rate": 6.249995059938421e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91056784, + "step": 135105 + }, + { + "epoch": 3.3007597781740894, + "grad_norm": 0.001630530459806323, + "learning_rate": 6.24920452924006e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91060240, + "step": 135110 + }, + { + "epoch": 3.3008819290059366, + "grad_norm": 159.94313049316406, + "learning_rate": 6.248414025818496e-07, + "loss": 0.096, + "num_input_tokens_seen": 91063504, + "step": 135115 + }, + { + "epoch": 3.3010040798377838, + "grad_norm": 0.02005532756447792, + "learning_rate": 6.247623549679471e-07, + "loss": 0.0, + "num_input_tokens_seen": 91066768, + "step": 135120 + }, + { + "epoch": 3.301126230669631, + "grad_norm": 0.0057156141847372055, + "learning_rate": 6.246833100828738e-07, + "loss": 0.0, + "num_input_tokens_seen": 91070032, + "step": 135125 + }, + { + "epoch": 3.301248381501478, + "grad_norm": 0.001090741716325283, + "learning_rate": 6.246042679272044e-07, + "loss": 0.0, + "num_input_tokens_seen": 91073552, + "step": 135130 + }, + { + "epoch": 3.3013705323333253, + "grad_norm": 17.12499237060547, + "learning_rate": 6.245252285015139e-07, + "loss": 0.0696, + "num_input_tokens_seen": 91077392, + "step": 135135 + }, + { + "epoch": 3.3014926831651725, + "grad_norm": 0.0025136242620646954, + "learning_rate": 6.24446191806377e-07, + "loss": 0.0, + "num_input_tokens_seen": 91080592, + "step": 135140 + }, + { + "epoch": 3.3016148339970197, + "grad_norm": 0.017409253865480423, + "learning_rate": 6.243671578423679e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91083920, + "step": 135145 + }, + { + "epoch": 3.3017369848288665, + "grad_norm": 0.001015423913486302, + "learning_rate": 6.242881266100625e-07, + "loss": 0.0, + "num_input_tokens_seen": 91087056, + "step": 135150 + }, + { + "epoch": 3.301859135660714, + "grad_norm": 0.013307973742485046, + "learning_rate": 6.242090981100343e-07, + "loss": 0.0, + "num_input_tokens_seen": 91090192, + "step": 135155 + }, + { + "epoch": 3.301981286492561, + "grad_norm": 0.02078000269830227, + "learning_rate": 6.241300723428587e-07, + "loss": 0.0, + "num_input_tokens_seen": 91093328, + "step": 135160 + }, + { + "epoch": 3.302103437324408, + "grad_norm": 0.013913290575146675, + "learning_rate": 6.240510493091108e-07, + "loss": 0.0, + "num_input_tokens_seen": 91096464, + "step": 135165 + }, + { + "epoch": 3.302225588156255, + "grad_norm": 20.877248764038086, + "learning_rate": 6.239720290093642e-07, + "loss": 0.0362, + "num_input_tokens_seen": 91099664, + "step": 135170 + }, + { + "epoch": 3.3023477389881024, + "grad_norm": 0.00022836009156890213, + "learning_rate": 6.238930114441947e-07, + "loss": 0.0, + "num_input_tokens_seen": 91102864, + "step": 135175 + }, + { + "epoch": 3.3024698898199496, + "grad_norm": 0.007412285078316927, + "learning_rate": 6.23813996614176e-07, + "loss": 0.0, + "num_input_tokens_seen": 91106320, + "step": 135180 + }, + { + "epoch": 3.302592040651797, + "grad_norm": 0.0035116132348775864, + "learning_rate": 6.237349845198831e-07, + "loss": 0.0, + "num_input_tokens_seen": 91109648, + "step": 135185 + }, + { + "epoch": 3.302714191483644, + "grad_norm": 0.21899212896823883, + "learning_rate": 6.23655975161891e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91112848, + "step": 135190 + }, + { + "epoch": 3.302836342315491, + "grad_norm": 0.04682852700352669, + "learning_rate": 6.235769685407734e-07, + "loss": 0.0875, + "num_input_tokens_seen": 91116048, + "step": 135195 + }, + { + "epoch": 3.3029584931473384, + "grad_norm": 0.005642554722726345, + "learning_rate": 6.234979646571057e-07, + "loss": 0.0, + "num_input_tokens_seen": 91119568, + "step": 135200 + }, + { + "epoch": 3.3030806439791855, + "grad_norm": 0.010612928308546543, + "learning_rate": 6.234189635114617e-07, + "loss": 0.0, + "num_input_tokens_seen": 91122960, + "step": 135205 + }, + { + "epoch": 3.3032027948110327, + "grad_norm": 0.033904921263456345, + "learning_rate": 6.233399651044167e-07, + "loss": 0.0, + "num_input_tokens_seen": 91125840, + "step": 135210 + }, + { + "epoch": 3.30332494564288, + "grad_norm": 0.014243927784264088, + "learning_rate": 6.232609694365443e-07, + "loss": 0.0, + "num_input_tokens_seen": 91129168, + "step": 135215 + }, + { + "epoch": 3.303447096474727, + "grad_norm": 0.0019423479679971933, + "learning_rate": 6.231819765084195e-07, + "loss": 0.0, + "num_input_tokens_seen": 91132688, + "step": 135220 + }, + { + "epoch": 3.3035692473065743, + "grad_norm": 221.44570922851562, + "learning_rate": 6.231029863206172e-07, + "loss": 0.005, + "num_input_tokens_seen": 91136208, + "step": 135225 + }, + { + "epoch": 3.3036913981384215, + "grad_norm": 0.008687580935657024, + "learning_rate": 6.23023998873711e-07, + "loss": 0.0, + "num_input_tokens_seen": 91139408, + "step": 135230 + }, + { + "epoch": 3.3038135489702682, + "grad_norm": 0.005624264944344759, + "learning_rate": 6.229450141682758e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91142480, + "step": 135235 + }, + { + "epoch": 3.303935699802116, + "grad_norm": 0.001273568719625473, + "learning_rate": 6.228660322048858e-07, + "loss": 0.0004, + "num_input_tokens_seen": 91146064, + "step": 135240 + }, + { + "epoch": 3.3040578506339626, + "grad_norm": 0.00161461450625211, + "learning_rate": 6.227870529841155e-07, + "loss": 0.029, + "num_input_tokens_seen": 91149776, + "step": 135245 + }, + { + "epoch": 3.30418000146581, + "grad_norm": 0.002101946622133255, + "learning_rate": 6.227080765065392e-07, + "loss": 0.0, + "num_input_tokens_seen": 91152976, + "step": 135250 + }, + { + "epoch": 3.304302152297657, + "grad_norm": 0.0007854495197534561, + "learning_rate": 6.226291027727311e-07, + "loss": 0.0, + "num_input_tokens_seen": 91156240, + "step": 135255 + }, + { + "epoch": 3.304424303129504, + "grad_norm": 0.0015872808871790767, + "learning_rate": 6.22550131783266e-07, + "loss": 0.0, + "num_input_tokens_seen": 91159632, + "step": 135260 + }, + { + "epoch": 3.3045464539613514, + "grad_norm": 0.0007271585636772215, + "learning_rate": 6.224711635387174e-07, + "loss": 0.0, + "num_input_tokens_seen": 91162704, + "step": 135265 + }, + { + "epoch": 3.3046686047931986, + "grad_norm": 0.4559313654899597, + "learning_rate": 6.223921980396606e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91166160, + "step": 135270 + }, + { + "epoch": 3.3047907556250458, + "grad_norm": 0.0029029580764472485, + "learning_rate": 6.223132352866688e-07, + "loss": 0.0, + "num_input_tokens_seen": 91169424, + "step": 135275 + }, + { + "epoch": 3.304912906456893, + "grad_norm": 0.07211734354496002, + "learning_rate": 6.22234275280317e-07, + "loss": 0.0, + "num_input_tokens_seen": 91172432, + "step": 135280 + }, + { + "epoch": 3.30503505728874, + "grad_norm": 0.002586309565231204, + "learning_rate": 6.221553180211791e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91175504, + "step": 135285 + }, + { + "epoch": 3.3051572081205873, + "grad_norm": 16.60679054260254, + "learning_rate": 6.220763635098294e-07, + "loss": 0.0339, + "num_input_tokens_seen": 91178768, + "step": 135290 + }, + { + "epoch": 3.3052793589524345, + "grad_norm": 0.001326601137407124, + "learning_rate": 6.21997411746842e-07, + "loss": 0.0, + "num_input_tokens_seen": 91182032, + "step": 135295 + }, + { + "epoch": 3.3054015097842817, + "grad_norm": 0.011996055953204632, + "learning_rate": 6.21918462732791e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91185616, + "step": 135300 + }, + { + "epoch": 3.305523660616129, + "grad_norm": 6.76007111906074e-05, + "learning_rate": 6.218395164682509e-07, + "loss": 0.0, + "num_input_tokens_seen": 91188624, + "step": 135305 + }, + { + "epoch": 3.305645811447976, + "grad_norm": 0.0028651319444179535, + "learning_rate": 6.217605729537952e-07, + "loss": 0.1288, + "num_input_tokens_seen": 91191632, + "step": 135310 + }, + { + "epoch": 3.3057679622798233, + "grad_norm": 0.0010443766368553042, + "learning_rate": 6.216816321899984e-07, + "loss": 0.0002, + "num_input_tokens_seen": 91194960, + "step": 135315 + }, + { + "epoch": 3.3058901131116705, + "grad_norm": 0.0022625199053436518, + "learning_rate": 6.216026941774348e-07, + "loss": 0.0034, + "num_input_tokens_seen": 91198608, + "step": 135320 + }, + { + "epoch": 3.3060122639435177, + "grad_norm": 0.0015713103348389268, + "learning_rate": 6.215237589166778e-07, + "loss": 0.0, + "num_input_tokens_seen": 91201488, + "step": 135325 + }, + { + "epoch": 3.3061344147753644, + "grad_norm": 0.0026938188821077347, + "learning_rate": 6.214448264083024e-07, + "loss": 0.0002, + "num_input_tokens_seen": 91204816, + "step": 135330 + }, + { + "epoch": 3.3062565656072116, + "grad_norm": 0.0012552423868328333, + "learning_rate": 6.213658966528814e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91208400, + "step": 135335 + }, + { + "epoch": 3.306378716439059, + "grad_norm": 0.0023763503413647413, + "learning_rate": 6.212869696509896e-07, + "loss": 0.0, + "num_input_tokens_seen": 91211280, + "step": 135340 + }, + { + "epoch": 3.306500867270906, + "grad_norm": 0.0035993000492453575, + "learning_rate": 6.212080454032013e-07, + "loss": 0.0, + "num_input_tokens_seen": 91214672, + "step": 135345 + }, + { + "epoch": 3.306623018102753, + "grad_norm": 0.0008662957116030157, + "learning_rate": 6.211291239100893e-07, + "loss": 0.0, + "num_input_tokens_seen": 91218512, + "step": 135350 + }, + { + "epoch": 3.3067451689346004, + "grad_norm": 0.010020371526479721, + "learning_rate": 6.210502051722289e-07, + "loss": 0.0576, + "num_input_tokens_seen": 91222288, + "step": 135355 + }, + { + "epoch": 3.3068673197664475, + "grad_norm": 0.005591185763478279, + "learning_rate": 6.209712891901927e-07, + "loss": 0.0, + "num_input_tokens_seen": 91225616, + "step": 135360 + }, + { + "epoch": 3.3069894705982947, + "grad_norm": 0.008373158052563667, + "learning_rate": 6.208923759645557e-07, + "loss": 0.0, + "num_input_tokens_seen": 91229072, + "step": 135365 + }, + { + "epoch": 3.307111621430142, + "grad_norm": 0.0013248298782855272, + "learning_rate": 6.20813465495891e-07, + "loss": 0.0, + "num_input_tokens_seen": 91232400, + "step": 135370 + }, + { + "epoch": 3.307233772261989, + "grad_norm": 0.0008385555702261627, + "learning_rate": 6.207345577847727e-07, + "loss": 0.0, + "num_input_tokens_seen": 91235536, + "step": 135375 + }, + { + "epoch": 3.3073559230938363, + "grad_norm": 23.962797164916992, + "learning_rate": 6.20655652831775e-07, + "loss": 0.0256, + "num_input_tokens_seen": 91238928, + "step": 135380 + }, + { + "epoch": 3.3074780739256835, + "grad_norm": 0.016070673242211342, + "learning_rate": 6.205767506374713e-07, + "loss": 0.0, + "num_input_tokens_seen": 91242128, + "step": 135385 + }, + { + "epoch": 3.3076002247575307, + "grad_norm": 0.011566529050469398, + "learning_rate": 6.204978512024355e-07, + "loss": 0.0, + "num_input_tokens_seen": 91245520, + "step": 135390 + }, + { + "epoch": 3.307722375589378, + "grad_norm": 0.27769777178764343, + "learning_rate": 6.204189545272415e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91248848, + "step": 135395 + }, + { + "epoch": 3.307844526421225, + "grad_norm": 0.0017242628382518888, + "learning_rate": 6.203400606124629e-07, + "loss": 0.0418, + "num_input_tokens_seen": 91251792, + "step": 135400 + }, + { + "epoch": 3.3079666772530723, + "grad_norm": 0.011997255496680737, + "learning_rate": 6.202611694586735e-07, + "loss": 0.0334, + "num_input_tokens_seen": 91255376, + "step": 135405 + }, + { + "epoch": 3.3080888280849194, + "grad_norm": 12.103500366210938, + "learning_rate": 6.201822810664468e-07, + "loss": 0.0397, + "num_input_tokens_seen": 91258448, + "step": 135410 + }, + { + "epoch": 3.308210978916766, + "grad_norm": 0.012106460519134998, + "learning_rate": 6.201033954363571e-07, + "loss": 0.0214, + "num_input_tokens_seen": 91262096, + "step": 135415 + }, + { + "epoch": 3.308333129748614, + "grad_norm": 0.021580029278993607, + "learning_rate": 6.200245125689774e-07, + "loss": 0.0, + "num_input_tokens_seen": 91265552, + "step": 135420 + }, + { + "epoch": 3.3084552805804606, + "grad_norm": 0.001080291927792132, + "learning_rate": 6.19945632464882e-07, + "loss": 0.0, + "num_input_tokens_seen": 91268816, + "step": 135425 + }, + { + "epoch": 3.3085774314123078, + "grad_norm": 0.0014836577465757728, + "learning_rate": 6.198667551246437e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91272208, + "step": 135430 + }, + { + "epoch": 3.308699582244155, + "grad_norm": 0.0157320536673069, + "learning_rate": 6.197878805488368e-07, + "loss": 0.0, + "num_input_tokens_seen": 91275280, + "step": 135435 + }, + { + "epoch": 3.308821733076002, + "grad_norm": 0.017448775470256805, + "learning_rate": 6.197090087380348e-07, + "loss": 0.0004, + "num_input_tokens_seen": 91278736, + "step": 135440 + }, + { + "epoch": 3.3089438839078493, + "grad_norm": 0.004054314456880093, + "learning_rate": 6.196301396928109e-07, + "loss": 0.0, + "num_input_tokens_seen": 91282512, + "step": 135445 + }, + { + "epoch": 3.3090660347396965, + "grad_norm": 0.0010950237046927214, + "learning_rate": 6.195512734137395e-07, + "loss": 0.0726, + "num_input_tokens_seen": 91285200, + "step": 135450 + }, + { + "epoch": 3.3091881855715437, + "grad_norm": 0.027147632092237473, + "learning_rate": 6.194724099013929e-07, + "loss": 0.0016, + "num_input_tokens_seen": 91288208, + "step": 135455 + }, + { + "epoch": 3.309310336403391, + "grad_norm": 0.0017834958853200078, + "learning_rate": 6.193935491563458e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91292112, + "step": 135460 + }, + { + "epoch": 3.309432487235238, + "grad_norm": 0.0024778808001428843, + "learning_rate": 6.19314691179171e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91295376, + "step": 135465 + }, + { + "epoch": 3.3095546380670853, + "grad_norm": 11.526365280151367, + "learning_rate": 6.192358359704417e-07, + "loss": 0.0011, + "num_input_tokens_seen": 91298832, + "step": 135470 + }, + { + "epoch": 3.3096767888989325, + "grad_norm": 0.007461852394044399, + "learning_rate": 6.191569835307324e-07, + "loss": 0.0, + "num_input_tokens_seen": 91302672, + "step": 135475 + }, + { + "epoch": 3.3097989397307797, + "grad_norm": 0.0007520471699535847, + "learning_rate": 6.190781338606157e-07, + "loss": 0.0, + "num_input_tokens_seen": 91306000, + "step": 135480 + }, + { + "epoch": 3.309921090562627, + "grad_norm": 0.0005934469518251717, + "learning_rate": 6.189992869606655e-07, + "loss": 0.0, + "num_input_tokens_seen": 91309392, + "step": 135485 + }, + { + "epoch": 3.310043241394474, + "grad_norm": 0.0005032624467276037, + "learning_rate": 6.189204428314547e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91312592, + "step": 135490 + }, + { + "epoch": 3.3101653922263212, + "grad_norm": 0.0002889156749006361, + "learning_rate": 6.18841601473557e-07, + "loss": 0.0, + "num_input_tokens_seen": 91315920, + "step": 135495 + }, + { + "epoch": 3.3102875430581684, + "grad_norm": 0.0017207504715770483, + "learning_rate": 6.18762762887546e-07, + "loss": 0.0027, + "num_input_tokens_seen": 91319056, + "step": 135500 + }, + { + "epoch": 3.3104096938900156, + "grad_norm": 0.00313011952675879, + "learning_rate": 6.186839270739943e-07, + "loss": 0.0, + "num_input_tokens_seen": 91322000, + "step": 135505 + }, + { + "epoch": 3.3105318447218623, + "grad_norm": 0.007349675986915827, + "learning_rate": 6.18605094033476e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91325136, + "step": 135510 + }, + { + "epoch": 3.3106539955537095, + "grad_norm": 0.0009442095761187375, + "learning_rate": 6.185262637665636e-07, + "loss": 0.0, + "num_input_tokens_seen": 91328016, + "step": 135515 + }, + { + "epoch": 3.3107761463855567, + "grad_norm": 0.00473749591037631, + "learning_rate": 6.184474362738314e-07, + "loss": 0.0311, + "num_input_tokens_seen": 91331472, + "step": 135520 + }, + { + "epoch": 3.310898297217404, + "grad_norm": 0.00017679596203379333, + "learning_rate": 6.183686115558515e-07, + "loss": 0.1495, + "num_input_tokens_seen": 91334864, + "step": 135525 + }, + { + "epoch": 3.311020448049251, + "grad_norm": 0.022237082943320274, + "learning_rate": 6.182897896131977e-07, + "loss": 0.0895, + "num_input_tokens_seen": 91338000, + "step": 135530 + }, + { + "epoch": 3.3111425988810983, + "grad_norm": 0.0017483303090557456, + "learning_rate": 6.182109704464438e-07, + "loss": 0.0, + "num_input_tokens_seen": 91341456, + "step": 135535 + }, + { + "epoch": 3.3112647497129455, + "grad_norm": 0.009556563571095467, + "learning_rate": 6.181321540561619e-07, + "loss": 0.0, + "num_input_tokens_seen": 91344656, + "step": 135540 + }, + { + "epoch": 3.3113869005447927, + "grad_norm": 0.013166706077754498, + "learning_rate": 6.18053340442926e-07, + "loss": 0.0, + "num_input_tokens_seen": 91347792, + "step": 135545 + }, + { + "epoch": 3.31150905137664, + "grad_norm": 0.00014834990724921227, + "learning_rate": 6.179745296073087e-07, + "loss": 0.0, + "num_input_tokens_seen": 91351440, + "step": 135550 + }, + { + "epoch": 3.311631202208487, + "grad_norm": 0.020284490659832954, + "learning_rate": 6.178957215498836e-07, + "loss": 0.0, + "num_input_tokens_seen": 91355152, + "step": 135555 + }, + { + "epoch": 3.3117533530403342, + "grad_norm": 0.03961563855409622, + "learning_rate": 6.178169162712234e-07, + "loss": 0.0366, + "num_input_tokens_seen": 91358160, + "step": 135560 + }, + { + "epoch": 3.3118755038721814, + "grad_norm": 0.029587585479021072, + "learning_rate": 6.177381137719013e-07, + "loss": 0.0, + "num_input_tokens_seen": 91361360, + "step": 135565 + }, + { + "epoch": 3.3119976547040286, + "grad_norm": 0.07171474397182465, + "learning_rate": 6.176593140524909e-07, + "loss": 0.0002, + "num_input_tokens_seen": 91365072, + "step": 135570 + }, + { + "epoch": 3.312119805535876, + "grad_norm": 0.0006190555286593735, + "learning_rate": 6.175805171135642e-07, + "loss": 0.0, + "num_input_tokens_seen": 91368784, + "step": 135575 + }, + { + "epoch": 3.312241956367723, + "grad_norm": 0.001581455348059535, + "learning_rate": 6.175017229556953e-07, + "loss": 0.0, + "num_input_tokens_seen": 91371984, + "step": 135580 + }, + { + "epoch": 3.31236410719957, + "grad_norm": 0.027697941288352013, + "learning_rate": 6.174229315794564e-07, + "loss": 0.0658, + "num_input_tokens_seen": 91374736, + "step": 135585 + }, + { + "epoch": 3.3124862580314174, + "grad_norm": 0.009913300164043903, + "learning_rate": 6.17344142985421e-07, + "loss": 0.0009, + "num_input_tokens_seen": 91377936, + "step": 135590 + }, + { + "epoch": 3.312608408863264, + "grad_norm": 0.02108718268573284, + "learning_rate": 6.17265357174162e-07, + "loss": 0.0, + "num_input_tokens_seen": 91381200, + "step": 135595 + }, + { + "epoch": 3.3127305596951118, + "grad_norm": 0.0025244238786399364, + "learning_rate": 6.171865741462522e-07, + "loss": 0.0123, + "num_input_tokens_seen": 91384208, + "step": 135600 + }, + { + "epoch": 3.3128527105269585, + "grad_norm": 0.02910471521317959, + "learning_rate": 6.171077939022649e-07, + "loss": 0.0, + "num_input_tokens_seen": 91387728, + "step": 135605 + }, + { + "epoch": 3.3129748613588057, + "grad_norm": 0.003600621595978737, + "learning_rate": 6.170290164427721e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91390736, + "step": 135610 + }, + { + "epoch": 3.313097012190653, + "grad_norm": 0.0006075279670767486, + "learning_rate": 6.169502417683478e-07, + "loss": 0.0002, + "num_input_tokens_seen": 91394000, + "step": 135615 + }, + { + "epoch": 3.3132191630225, + "grad_norm": 0.027739422395825386, + "learning_rate": 6.16871469879564e-07, + "loss": 0.0, + "num_input_tokens_seen": 91397392, + "step": 135620 + }, + { + "epoch": 3.3133413138543473, + "grad_norm": 0.013399646617472172, + "learning_rate": 6.16792700776994e-07, + "loss": 0.0403, + "num_input_tokens_seen": 91400656, + "step": 135625 + }, + { + "epoch": 3.3134634646861945, + "grad_norm": 0.003509106580168009, + "learning_rate": 6.167139344612108e-07, + "loss": 0.0, + "num_input_tokens_seen": 91403984, + "step": 135630 + }, + { + "epoch": 3.3135856155180416, + "grad_norm": 0.0002290195261593908, + "learning_rate": 6.166351709327866e-07, + "loss": 0.0, + "num_input_tokens_seen": 91407120, + "step": 135635 + }, + { + "epoch": 3.313707766349889, + "grad_norm": 0.0010968875139951706, + "learning_rate": 6.16556410192295e-07, + "loss": 0.0, + "num_input_tokens_seen": 91410640, + "step": 135640 + }, + { + "epoch": 3.313829917181736, + "grad_norm": 57.31441879272461, + "learning_rate": 6.164776522403079e-07, + "loss": 0.0575, + "num_input_tokens_seen": 91413712, + "step": 135645 + }, + { + "epoch": 3.313952068013583, + "grad_norm": 0.0006980210309848189, + "learning_rate": 6.163988970773985e-07, + "loss": 0.041, + "num_input_tokens_seen": 91417680, + "step": 135650 + }, + { + "epoch": 3.3140742188454304, + "grad_norm": 0.00029445570544339716, + "learning_rate": 6.163201447041399e-07, + "loss": 0.0, + "num_input_tokens_seen": 91420752, + "step": 135655 + }, + { + "epoch": 3.3141963696772776, + "grad_norm": 0.0027940631844103336, + "learning_rate": 6.162413951211041e-07, + "loss": 0.0003, + "num_input_tokens_seen": 91424336, + "step": 135660 + }, + { + "epoch": 3.314318520509125, + "grad_norm": 0.02510049007833004, + "learning_rate": 6.161626483288643e-07, + "loss": 0.0, + "num_input_tokens_seen": 91427536, + "step": 135665 + }, + { + "epoch": 3.314440671340972, + "grad_norm": 0.007939993403851986, + "learning_rate": 6.160839043279926e-07, + "loss": 0.0, + "num_input_tokens_seen": 91430544, + "step": 135670 + }, + { + "epoch": 3.314562822172819, + "grad_norm": 0.0004061466024722904, + "learning_rate": 6.160051631190623e-07, + "loss": 0.0, + "num_input_tokens_seen": 91433872, + "step": 135675 + }, + { + "epoch": 3.314684973004666, + "grad_norm": 0.010502016171813011, + "learning_rate": 6.159264247026456e-07, + "loss": 0.0, + "num_input_tokens_seen": 91437200, + "step": 135680 + }, + { + "epoch": 3.3148071238365135, + "grad_norm": 0.0011987907346338034, + "learning_rate": 6.158476890793152e-07, + "loss": 0.0, + "num_input_tokens_seen": 91440528, + "step": 135685 + }, + { + "epoch": 3.3149292746683603, + "grad_norm": 0.000779199821408838, + "learning_rate": 6.157689562496439e-07, + "loss": 0.0709, + "num_input_tokens_seen": 91443984, + "step": 135690 + }, + { + "epoch": 3.3150514255002075, + "grad_norm": 0.0010632823687046766, + "learning_rate": 6.15690226214204e-07, + "loss": 0.0, + "num_input_tokens_seen": 91447248, + "step": 135695 + }, + { + "epoch": 3.3151735763320547, + "grad_norm": 0.005994278471916914, + "learning_rate": 6.156114989735682e-07, + "loss": 0.0, + "num_input_tokens_seen": 91450448, + "step": 135700 + }, + { + "epoch": 3.315295727163902, + "grad_norm": 0.00121176743414253, + "learning_rate": 6.15532774528309e-07, + "loss": 0.0, + "num_input_tokens_seen": 91454096, + "step": 135705 + }, + { + "epoch": 3.315417877995749, + "grad_norm": 0.0034481610637158155, + "learning_rate": 6.154540528789988e-07, + "loss": 0.0, + "num_input_tokens_seen": 91457552, + "step": 135710 + }, + { + "epoch": 3.3155400288275962, + "grad_norm": 0.0012256080517545342, + "learning_rate": 6.153753340262101e-07, + "loss": 0.0, + "num_input_tokens_seen": 91461264, + "step": 135715 + }, + { + "epoch": 3.3156621796594434, + "grad_norm": 0.003028532722964883, + "learning_rate": 6.152966179705154e-07, + "loss": 0.0, + "num_input_tokens_seen": 91464656, + "step": 135720 + }, + { + "epoch": 3.3157843304912906, + "grad_norm": 35.18305587768555, + "learning_rate": 6.152179047124875e-07, + "loss": 0.2296, + "num_input_tokens_seen": 91467728, + "step": 135725 + }, + { + "epoch": 3.315906481323138, + "grad_norm": 0.003076448105275631, + "learning_rate": 6.15139194252698e-07, + "loss": 0.0, + "num_input_tokens_seen": 91470928, + "step": 135730 + }, + { + "epoch": 3.316028632154985, + "grad_norm": 0.0004749661311507225, + "learning_rate": 6.150604865917201e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91474448, + "step": 135735 + }, + { + "epoch": 3.316150782986832, + "grad_norm": 0.006054687313735485, + "learning_rate": 6.149817817301257e-07, + "loss": 0.0, + "num_input_tokens_seen": 91478032, + "step": 135740 + }, + { + "epoch": 3.3162729338186794, + "grad_norm": 0.003576445858925581, + "learning_rate": 6.149030796684875e-07, + "loss": 0.0, + "num_input_tokens_seen": 91481744, + "step": 135745 + }, + { + "epoch": 3.3163950846505266, + "grad_norm": 0.0043333168141543865, + "learning_rate": 6.148243804073776e-07, + "loss": 0.0, + "num_input_tokens_seen": 91485072, + "step": 135750 + }, + { + "epoch": 3.3165172354823738, + "grad_norm": 0.016035636886954308, + "learning_rate": 6.147456839473684e-07, + "loss": 0.0, + "num_input_tokens_seen": 91488464, + "step": 135755 + }, + { + "epoch": 3.316639386314221, + "grad_norm": 0.06646835058927536, + "learning_rate": 6.146669902890324e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91491792, + "step": 135760 + }, + { + "epoch": 3.316761537146068, + "grad_norm": 0.02964874729514122, + "learning_rate": 6.145882994329414e-07, + "loss": 0.0, + "num_input_tokens_seen": 91495056, + "step": 135765 + }, + { + "epoch": 3.3168836879779153, + "grad_norm": 0.06201769784092903, + "learning_rate": 6.145096113796684e-07, + "loss": 0.0, + "num_input_tokens_seen": 91498256, + "step": 135770 + }, + { + "epoch": 3.317005838809762, + "grad_norm": 52.88552474975586, + "learning_rate": 6.144309261297847e-07, + "loss": 0.0546, + "num_input_tokens_seen": 91501776, + "step": 135775 + }, + { + "epoch": 3.3171279896416097, + "grad_norm": 0.17194008827209473, + "learning_rate": 6.143522436838628e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91504656, + "step": 135780 + }, + { + "epoch": 3.3172501404734565, + "grad_norm": 0.00464673014357686, + "learning_rate": 6.142735640424759e-07, + "loss": 0.0027, + "num_input_tokens_seen": 91507792, + "step": 135785 + }, + { + "epoch": 3.3173722913053036, + "grad_norm": 0.009759887121617794, + "learning_rate": 6.141948872061947e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91511184, + "step": 135790 + }, + { + "epoch": 3.317494442137151, + "grad_norm": 0.19676682353019714, + "learning_rate": 6.141162131755926e-07, + "loss": 0.0, + "num_input_tokens_seen": 91514576, + "step": 135795 + }, + { + "epoch": 3.317616592968998, + "grad_norm": 0.15152515470981598, + "learning_rate": 6.140375419512406e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91517840, + "step": 135800 + }, + { + "epoch": 3.317738743800845, + "grad_norm": 0.014471475966274738, + "learning_rate": 6.139588735337118e-07, + "loss": 0.0, + "num_input_tokens_seen": 91521232, + "step": 135805 + }, + { + "epoch": 3.3178608946326924, + "grad_norm": 0.001642052666284144, + "learning_rate": 6.138802079235781e-07, + "loss": 0.0, + "num_input_tokens_seen": 91524240, + "step": 135810 + }, + { + "epoch": 3.3179830454645396, + "grad_norm": 0.013218896463513374, + "learning_rate": 6.138015451214109e-07, + "loss": 0.0002, + "num_input_tokens_seen": 91527696, + "step": 135815 + }, + { + "epoch": 3.318105196296387, + "grad_norm": 0.0030538730788975954, + "learning_rate": 6.137228851277831e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91531920, + "step": 135820 + }, + { + "epoch": 3.318227347128234, + "grad_norm": 0.006154006812721491, + "learning_rate": 6.136442279432661e-07, + "loss": 0.058, + "num_input_tokens_seen": 91535824, + "step": 135825 + }, + { + "epoch": 3.318349497960081, + "grad_norm": 0.008251064456999302, + "learning_rate": 6.135655735684327e-07, + "loss": 0.0468, + "num_input_tokens_seen": 91539216, + "step": 135830 + }, + { + "epoch": 3.3184716487919284, + "grad_norm": 28.197277069091797, + "learning_rate": 6.134869220038537e-07, + "loss": 0.0526, + "num_input_tokens_seen": 91542864, + "step": 135835 + }, + { + "epoch": 3.3185937996237755, + "grad_norm": 0.0020110972691327333, + "learning_rate": 6.134082732501018e-07, + "loss": 0.0, + "num_input_tokens_seen": 91546000, + "step": 135840 + }, + { + "epoch": 3.3187159504556227, + "grad_norm": 0.0014587831683456898, + "learning_rate": 6.133296273077495e-07, + "loss": 0.0, + "num_input_tokens_seen": 91549392, + "step": 135845 + }, + { + "epoch": 3.31883810128747, + "grad_norm": 0.003994397819042206, + "learning_rate": 6.132509841773678e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91552912, + "step": 135850 + }, + { + "epoch": 3.318960252119317, + "grad_norm": 0.42959192395210266, + "learning_rate": 6.131723438595291e-07, + "loss": 0.0, + "num_input_tokens_seen": 91555984, + "step": 135855 + }, + { + "epoch": 3.319082402951164, + "grad_norm": 0.0016079478664323688, + "learning_rate": 6.13093706354805e-07, + "loss": 0.0, + "num_input_tokens_seen": 91559312, + "step": 135860 + }, + { + "epoch": 3.3192045537830115, + "grad_norm": 0.10521599650382996, + "learning_rate": 6.130150716637677e-07, + "loss": 0.0, + "num_input_tokens_seen": 91563280, + "step": 135865 + }, + { + "epoch": 3.3193267046148582, + "grad_norm": 0.006913943216204643, + "learning_rate": 6.129364397869887e-07, + "loss": 0.0, + "num_input_tokens_seen": 91566736, + "step": 135870 + }, + { + "epoch": 3.3194488554467054, + "grad_norm": 0.0004993074107915163, + "learning_rate": 6.128578107250399e-07, + "loss": 0.0, + "num_input_tokens_seen": 91570256, + "step": 135875 + }, + { + "epoch": 3.3195710062785526, + "grad_norm": 0.0032401448115706444, + "learning_rate": 6.127791844784937e-07, + "loss": 0.0, + "num_input_tokens_seen": 91573968, + "step": 135880 + }, + { + "epoch": 3.3196931571104, + "grad_norm": 0.0026708885561674833, + "learning_rate": 6.12700561047921e-07, + "loss": 0.0, + "num_input_tokens_seen": 91577424, + "step": 135885 + }, + { + "epoch": 3.319815307942247, + "grad_norm": 0.41900894045829773, + "learning_rate": 6.126219404338944e-07, + "loss": 0.0002, + "num_input_tokens_seen": 91581072, + "step": 135890 + }, + { + "epoch": 3.319937458774094, + "grad_norm": 0.002558025298640132, + "learning_rate": 6.125433226369847e-07, + "loss": 0.0095, + "num_input_tokens_seen": 91584848, + "step": 135895 + }, + { + "epoch": 3.3200596096059414, + "grad_norm": 0.00013529101852327585, + "learning_rate": 6.124647076577644e-07, + "loss": 0.0, + "num_input_tokens_seen": 91588240, + "step": 135900 + }, + { + "epoch": 3.3201817604377886, + "grad_norm": 0.002773279557004571, + "learning_rate": 6.123860954968051e-07, + "loss": 0.0, + "num_input_tokens_seen": 91591184, + "step": 135905 + }, + { + "epoch": 3.3203039112696358, + "grad_norm": 0.00456498796120286, + "learning_rate": 6.123074861546783e-07, + "loss": 0.0477, + "num_input_tokens_seen": 91594576, + "step": 135910 + }, + { + "epoch": 3.320426062101483, + "grad_norm": 0.010695680975914001, + "learning_rate": 6.122288796319559e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91597776, + "step": 135915 + }, + { + "epoch": 3.32054821293333, + "grad_norm": 0.007244298234581947, + "learning_rate": 6.121502759292091e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91601168, + "step": 135920 + }, + { + "epoch": 3.3206703637651773, + "grad_norm": 0.0032030600123107433, + "learning_rate": 6.120716750470102e-07, + "loss": 0.0, + "num_input_tokens_seen": 91604496, + "step": 135925 + }, + { + "epoch": 3.3207925145970245, + "grad_norm": 0.00389327434822917, + "learning_rate": 6.119930769859299e-07, + "loss": 0.0, + "num_input_tokens_seen": 91607696, + "step": 135930 + }, + { + "epoch": 3.3209146654288717, + "grad_norm": 0.025663211941719055, + "learning_rate": 6.119144817465405e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91611216, + "step": 135935 + }, + { + "epoch": 3.321036816260719, + "grad_norm": 0.0016724220477044582, + "learning_rate": 6.118358893294135e-07, + "loss": 0.0, + "num_input_tokens_seen": 91614608, + "step": 135940 + }, + { + "epoch": 3.321158967092566, + "grad_norm": 0.0005501621635630727, + "learning_rate": 6.117572997351201e-07, + "loss": 0.0, + "num_input_tokens_seen": 91617680, + "step": 135945 + }, + { + "epoch": 3.3212811179244133, + "grad_norm": 0.003155820071697235, + "learning_rate": 6.116787129642324e-07, + "loss": 0.0, + "num_input_tokens_seen": 91620944, + "step": 135950 + }, + { + "epoch": 3.32140326875626, + "grad_norm": 0.004578068852424622, + "learning_rate": 6.116001290173211e-07, + "loss": 0.0, + "num_input_tokens_seen": 91624464, + "step": 135955 + }, + { + "epoch": 3.321525419588107, + "grad_norm": 0.0008005460840649903, + "learning_rate": 6.115215478949587e-07, + "loss": 0.0, + "num_input_tokens_seen": 91627728, + "step": 135960 + }, + { + "epoch": 3.3216475704199544, + "grad_norm": 0.009103553369641304, + "learning_rate": 6.114429695977157e-07, + "loss": 0.0, + "num_input_tokens_seen": 91631184, + "step": 135965 + }, + { + "epoch": 3.3217697212518016, + "grad_norm": 0.0013579517835751176, + "learning_rate": 6.113643941261639e-07, + "loss": 0.0, + "num_input_tokens_seen": 91634384, + "step": 135970 + }, + { + "epoch": 3.3218918720836488, + "grad_norm": 9.428247722098604e-05, + "learning_rate": 6.112858214808749e-07, + "loss": 0.0, + "num_input_tokens_seen": 91638032, + "step": 135975 + }, + { + "epoch": 3.322014022915496, + "grad_norm": 0.004081009421497583, + "learning_rate": 6.112072516624198e-07, + "loss": 0.0, + "num_input_tokens_seen": 91641552, + "step": 135980 + }, + { + "epoch": 3.322136173747343, + "grad_norm": 0.011382028460502625, + "learning_rate": 6.111286846713704e-07, + "loss": 0.0, + "num_input_tokens_seen": 91644816, + "step": 135985 + }, + { + "epoch": 3.3222583245791903, + "grad_norm": 0.07787194848060608, + "learning_rate": 6.110501205082976e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91648016, + "step": 135990 + }, + { + "epoch": 3.3223804754110375, + "grad_norm": 0.006711010821163654, + "learning_rate": 6.109715591737727e-07, + "loss": 0.0512, + "num_input_tokens_seen": 91651088, + "step": 135995 + }, + { + "epoch": 3.3225026262428847, + "grad_norm": 0.0014324527001008391, + "learning_rate": 6.108930006683678e-07, + "loss": 0.0356, + "num_input_tokens_seen": 91654352, + "step": 136000 + }, + { + "epoch": 3.322624777074732, + "grad_norm": 0.0011398588540032506, + "learning_rate": 6.108144449926533e-07, + "loss": 0.0, + "num_input_tokens_seen": 91657360, + "step": 136005 + }, + { + "epoch": 3.322746927906579, + "grad_norm": 0.02468794398009777, + "learning_rate": 6.10735892147201e-07, + "loss": 0.0, + "num_input_tokens_seen": 91660560, + "step": 136010 + }, + { + "epoch": 3.3228690787384263, + "grad_norm": 0.0005330094136297703, + "learning_rate": 6.10657342132582e-07, + "loss": 0.0407, + "num_input_tokens_seen": 91663824, + "step": 136015 + }, + { + "epoch": 3.3229912295702735, + "grad_norm": 0.0012963847257196903, + "learning_rate": 6.105787949493675e-07, + "loss": 0.0, + "num_input_tokens_seen": 91666896, + "step": 136020 + }, + { + "epoch": 3.3231133804021207, + "grad_norm": 0.0028438479639589787, + "learning_rate": 6.105002505981287e-07, + "loss": 0.0, + "num_input_tokens_seen": 91670224, + "step": 136025 + }, + { + "epoch": 3.323235531233968, + "grad_norm": 0.033919740468263626, + "learning_rate": 6.104217090794365e-07, + "loss": 0.0, + "num_input_tokens_seen": 91673616, + "step": 136030 + }, + { + "epoch": 3.323357682065815, + "grad_norm": 0.015069283545017242, + "learning_rate": 6.10343170393863e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91677136, + "step": 136035 + }, + { + "epoch": 3.323479832897662, + "grad_norm": 0.009964990429580212, + "learning_rate": 6.102646345419784e-07, + "loss": 0.0, + "num_input_tokens_seen": 91680208, + "step": 136040 + }, + { + "epoch": 3.3236019837295094, + "grad_norm": 0.0020420521032065153, + "learning_rate": 6.101861015243546e-07, + "loss": 0.0, + "num_input_tokens_seen": 91683984, + "step": 136045 + }, + { + "epoch": 3.323724134561356, + "grad_norm": 0.0036547784693539143, + "learning_rate": 6.101075713415617e-07, + "loss": 0.0256, + "num_input_tokens_seen": 91687120, + "step": 136050 + }, + { + "epoch": 3.3238462853932034, + "grad_norm": 0.0009308999869972467, + "learning_rate": 6.100290439941718e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91690576, + "step": 136055 + }, + { + "epoch": 3.3239684362250506, + "grad_norm": 18.14400291442871, + "learning_rate": 6.099505194827557e-07, + "loss": 0.0653, + "num_input_tokens_seen": 91693648, + "step": 136060 + }, + { + "epoch": 3.3240905870568977, + "grad_norm": 0.033947087824344635, + "learning_rate": 6.098719978078841e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91696976, + "step": 136065 + }, + { + "epoch": 3.324212737888745, + "grad_norm": 0.06831333786249161, + "learning_rate": 6.097934789701285e-07, + "loss": 0.0, + "num_input_tokens_seen": 91700816, + "step": 136070 + }, + { + "epoch": 3.324334888720592, + "grad_norm": 0.023533586412668228, + "learning_rate": 6.097149629700593e-07, + "loss": 0.0, + "num_input_tokens_seen": 91704336, + "step": 136075 + }, + { + "epoch": 3.3244570395524393, + "grad_norm": 126.68274688720703, + "learning_rate": 6.096364498082483e-07, + "loss": 0.0022, + "num_input_tokens_seen": 91707536, + "step": 136080 + }, + { + "epoch": 3.3245791903842865, + "grad_norm": 0.0028027426451444626, + "learning_rate": 6.095579394852657e-07, + "loss": 0.0, + "num_input_tokens_seen": 91710800, + "step": 136085 + }, + { + "epoch": 3.3247013412161337, + "grad_norm": 8.272482872009277, + "learning_rate": 6.094794320016826e-07, + "loss": 0.0003, + "num_input_tokens_seen": 91713872, + "step": 136090 + }, + { + "epoch": 3.324823492047981, + "grad_norm": 0.0009194356389343739, + "learning_rate": 6.094009273580707e-07, + "loss": 0.0, + "num_input_tokens_seen": 91717520, + "step": 136095 + }, + { + "epoch": 3.324945642879828, + "grad_norm": 0.0003622818912845105, + "learning_rate": 6.093224255549998e-07, + "loss": 0.0, + "num_input_tokens_seen": 91720912, + "step": 136100 + }, + { + "epoch": 3.3250677937116753, + "grad_norm": 0.0019771524239331484, + "learning_rate": 6.092439265930416e-07, + "loss": 0.0385, + "num_input_tokens_seen": 91724240, + "step": 136105 + }, + { + "epoch": 3.3251899445435225, + "grad_norm": 0.0015008923364803195, + "learning_rate": 6.091654304727665e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91727504, + "step": 136110 + }, + { + "epoch": 3.3253120953753696, + "grad_norm": 0.0024910790380090475, + "learning_rate": 6.090869371947458e-07, + "loss": 0.0, + "num_input_tokens_seen": 91730704, + "step": 136115 + }, + { + "epoch": 3.325434246207217, + "grad_norm": 0.0058991131372749805, + "learning_rate": 6.090084467595497e-07, + "loss": 0.0, + "num_input_tokens_seen": 91734224, + "step": 136120 + }, + { + "epoch": 3.325556397039064, + "grad_norm": 0.002062541898339987, + "learning_rate": 6.089299591677492e-07, + "loss": 0.0, + "num_input_tokens_seen": 91737360, + "step": 136125 + }, + { + "epoch": 3.325678547870911, + "grad_norm": 0.001725882524624467, + "learning_rate": 6.088514744199158e-07, + "loss": 0.0, + "num_input_tokens_seen": 91740432, + "step": 136130 + }, + { + "epoch": 3.325800698702758, + "grad_norm": 0.004223139490932226, + "learning_rate": 6.087729925166191e-07, + "loss": 0.0005, + "num_input_tokens_seen": 91743952, + "step": 136135 + }, + { + "epoch": 3.325922849534605, + "grad_norm": 0.00504926685243845, + "learning_rate": 6.086945134584311e-07, + "loss": 0.0, + "num_input_tokens_seen": 91747792, + "step": 136140 + }, + { + "epoch": 3.3260450003664523, + "grad_norm": 0.001159909414127469, + "learning_rate": 6.086160372459211e-07, + "loss": 0.0, + "num_input_tokens_seen": 91751312, + "step": 136145 + }, + { + "epoch": 3.3261671511982995, + "grad_norm": 0.005098773166537285, + "learning_rate": 6.085375638796608e-07, + "loss": 0.0, + "num_input_tokens_seen": 91754576, + "step": 136150 + }, + { + "epoch": 3.3262893020301467, + "grad_norm": 0.10678897053003311, + "learning_rate": 6.084590933602209e-07, + "loss": 0.0, + "num_input_tokens_seen": 91757968, + "step": 136155 + }, + { + "epoch": 3.326411452861994, + "grad_norm": 0.009015164338052273, + "learning_rate": 6.083806256881716e-07, + "loss": 0.0, + "num_input_tokens_seen": 91761360, + "step": 136160 + }, + { + "epoch": 3.326533603693841, + "grad_norm": 0.0019480243790894747, + "learning_rate": 6.083021608640837e-07, + "loss": 0.0, + "num_input_tokens_seen": 91764944, + "step": 136165 + }, + { + "epoch": 3.3266557545256883, + "grad_norm": 0.00024862895952537656, + "learning_rate": 6.082236988885279e-07, + "loss": 0.0, + "num_input_tokens_seen": 91768016, + "step": 136170 + }, + { + "epoch": 3.3267779053575355, + "grad_norm": 0.0010257846442982554, + "learning_rate": 6.081452397620747e-07, + "loss": 0.0414, + "num_input_tokens_seen": 91770960, + "step": 136175 + }, + { + "epoch": 3.3269000561893827, + "grad_norm": 0.0008336947648786008, + "learning_rate": 6.080667834852948e-07, + "loss": 0.0, + "num_input_tokens_seen": 91774160, + "step": 136180 + }, + { + "epoch": 3.32702220702123, + "grad_norm": 0.04634743928909302, + "learning_rate": 6.079883300587583e-07, + "loss": 0.0, + "num_input_tokens_seen": 91776912, + "step": 136185 + }, + { + "epoch": 3.327144357853077, + "grad_norm": 0.004024902358651161, + "learning_rate": 6.079098794830366e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91780176, + "step": 136190 + }, + { + "epoch": 3.3272665086849242, + "grad_norm": 0.018328385427594185, + "learning_rate": 6.078314317586992e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91783120, + "step": 136195 + }, + { + "epoch": 3.3273886595167714, + "grad_norm": 0.013721533119678497, + "learning_rate": 6.077529868863178e-07, + "loss": 0.0, + "num_input_tokens_seen": 91786512, + "step": 136200 + }, + { + "epoch": 3.3275108103486186, + "grad_norm": 0.002645154483616352, + "learning_rate": 6.076745448664616e-07, + "loss": 0.0, + "num_input_tokens_seen": 91789648, + "step": 136205 + }, + { + "epoch": 3.327632961180466, + "grad_norm": 0.0104436706751585, + "learning_rate": 6.075961056997017e-07, + "loss": 0.0, + "num_input_tokens_seen": 91792848, + "step": 136210 + }, + { + "epoch": 3.327755112012313, + "grad_norm": 0.036987703293561935, + "learning_rate": 6.075176693866086e-07, + "loss": 0.0433, + "num_input_tokens_seen": 91796176, + "step": 136215 + }, + { + "epoch": 3.3278772628441597, + "grad_norm": 0.004183395765721798, + "learning_rate": 6.074392359277526e-07, + "loss": 0.0353, + "num_input_tokens_seen": 91799568, + "step": 136220 + }, + { + "epoch": 3.3279994136760074, + "grad_norm": 0.003504760330542922, + "learning_rate": 6.073608053237042e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91802960, + "step": 136225 + }, + { + "epoch": 3.328121564507854, + "grad_norm": 0.0017839574720710516, + "learning_rate": 6.072823775750333e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91805904, + "step": 136230 + }, + { + "epoch": 3.3282437153397013, + "grad_norm": 0.00135110376868397, + "learning_rate": 6.072039526823109e-07, + "loss": 0.0, + "num_input_tokens_seen": 91809232, + "step": 136235 + }, + { + "epoch": 3.3283658661715485, + "grad_norm": 0.0043913801200687885, + "learning_rate": 6.071255306461067e-07, + "loss": 0.0, + "num_input_tokens_seen": 91812368, + "step": 136240 + }, + { + "epoch": 3.3284880170033957, + "grad_norm": 0.0013979102950543165, + "learning_rate": 6.070471114669913e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91815632, + "step": 136245 + }, + { + "epoch": 3.328610167835243, + "grad_norm": 0.013382709585130215, + "learning_rate": 6.069686951455353e-07, + "loss": 0.0, + "num_input_tokens_seen": 91818768, + "step": 136250 + }, + { + "epoch": 3.32873231866709, + "grad_norm": 0.0029500352684408426, + "learning_rate": 6.068902816823083e-07, + "loss": 0.0, + "num_input_tokens_seen": 91822480, + "step": 136255 + }, + { + "epoch": 3.3288544694989373, + "grad_norm": 0.0005986875621601939, + "learning_rate": 6.068118710778813e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91825680, + "step": 136260 + }, + { + "epoch": 3.3289766203307845, + "grad_norm": 0.0019035233417525887, + "learning_rate": 6.067334633328237e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91829392, + "step": 136265 + }, + { + "epoch": 3.3290987711626316, + "grad_norm": 0.0014049690216779709, + "learning_rate": 6.066550584477065e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91832784, + "step": 136270 + }, + { + "epoch": 3.329220921994479, + "grad_norm": 0.002635091543197632, + "learning_rate": 6.065766564230995e-07, + "loss": 0.0, + "num_input_tokens_seen": 91836176, + "step": 136275 + }, + { + "epoch": 3.329343072826326, + "grad_norm": 0.0010414626449346542, + "learning_rate": 6.064982572595725e-07, + "loss": 0.0607, + "num_input_tokens_seen": 91839312, + "step": 136280 + }, + { + "epoch": 3.329465223658173, + "grad_norm": 0.0004415341536514461, + "learning_rate": 6.064198609576965e-07, + "loss": 0.0, + "num_input_tokens_seen": 91843216, + "step": 136285 + }, + { + "epoch": 3.3295873744900204, + "grad_norm": 0.02237209863960743, + "learning_rate": 6.063414675180407e-07, + "loss": 0.0, + "num_input_tokens_seen": 91846480, + "step": 136290 + }, + { + "epoch": 3.3297095253218676, + "grad_norm": 0.008092211559414864, + "learning_rate": 6.06263076941176e-07, + "loss": 0.0, + "num_input_tokens_seen": 91850320, + "step": 136295 + }, + { + "epoch": 3.329831676153715, + "grad_norm": 0.0001304069155594334, + "learning_rate": 6.061846892276718e-07, + "loss": 0.0, + "num_input_tokens_seen": 91853712, + "step": 136300 + }, + { + "epoch": 3.3299538269855615, + "grad_norm": 0.0009956281865015626, + "learning_rate": 6.061063043780985e-07, + "loss": 0.0, + "num_input_tokens_seen": 91857040, + "step": 136305 + }, + { + "epoch": 3.330075977817409, + "grad_norm": 0.0025123651139438152, + "learning_rate": 6.060279223930263e-07, + "loss": 0.0343, + "num_input_tokens_seen": 91860368, + "step": 136310 + }, + { + "epoch": 3.330198128649256, + "grad_norm": 0.001795079791918397, + "learning_rate": 6.059495432730248e-07, + "loss": 0.0, + "num_input_tokens_seen": 91863632, + "step": 136315 + }, + { + "epoch": 3.330320279481103, + "grad_norm": 0.0031533746514469385, + "learning_rate": 6.058711670186645e-07, + "loss": 0.0, + "num_input_tokens_seen": 91867024, + "step": 136320 + }, + { + "epoch": 3.3304424303129503, + "grad_norm": 0.0018401495181024075, + "learning_rate": 6.057927936305149e-07, + "loss": 0.0006, + "num_input_tokens_seen": 91870160, + "step": 136325 + }, + { + "epoch": 3.3305645811447975, + "grad_norm": 0.019284943118691444, + "learning_rate": 6.057144231091461e-07, + "loss": 0.0, + "num_input_tokens_seen": 91873232, + "step": 136330 + }, + { + "epoch": 3.3306867319766447, + "grad_norm": 0.008021565154194832, + "learning_rate": 6.056360554551281e-07, + "loss": 0.0, + "num_input_tokens_seen": 91876048, + "step": 136335 + }, + { + "epoch": 3.330808882808492, + "grad_norm": 0.014595117419958115, + "learning_rate": 6.055576906690306e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91879120, + "step": 136340 + }, + { + "epoch": 3.330931033640339, + "grad_norm": 0.07980555295944214, + "learning_rate": 6.054793287514241e-07, + "loss": 0.0, + "num_input_tokens_seen": 91882320, + "step": 136345 + }, + { + "epoch": 3.3310531844721862, + "grad_norm": 0.1366054266691208, + "learning_rate": 6.054009697028776e-07, + "loss": 0.0, + "num_input_tokens_seen": 91885776, + "step": 136350 + }, + { + "epoch": 3.3311753353040334, + "grad_norm": 0.05009021982550621, + "learning_rate": 6.053226135239618e-07, + "loss": 0.0, + "num_input_tokens_seen": 91889040, + "step": 136355 + }, + { + "epoch": 3.3312974861358806, + "grad_norm": 0.00617753341794014, + "learning_rate": 6.052442602152457e-07, + "loss": 0.0002, + "num_input_tokens_seen": 91892496, + "step": 136360 + }, + { + "epoch": 3.331419636967728, + "grad_norm": 0.007933614775538445, + "learning_rate": 6.051659097772996e-07, + "loss": 0.0, + "num_input_tokens_seen": 91895760, + "step": 136365 + }, + { + "epoch": 3.331541787799575, + "grad_norm": 0.09009955078363419, + "learning_rate": 6.050875622106932e-07, + "loss": 0.0, + "num_input_tokens_seen": 91898704, + "step": 136370 + }, + { + "epoch": 3.331663938631422, + "grad_norm": 0.00013575205230154097, + "learning_rate": 6.050092175159964e-07, + "loss": 0.0667, + "num_input_tokens_seen": 91901968, + "step": 136375 + }, + { + "epoch": 3.3317860894632694, + "grad_norm": 0.00041842387872748077, + "learning_rate": 6.04930875693779e-07, + "loss": 0.0, + "num_input_tokens_seen": 91904976, + "step": 136380 + }, + { + "epoch": 3.3319082402951166, + "grad_norm": 0.0005398113862611353, + "learning_rate": 6.048525367446102e-07, + "loss": 0.0, + "num_input_tokens_seen": 91908368, + "step": 136385 + }, + { + "epoch": 3.3320303911269638, + "grad_norm": 0.019100764766335487, + "learning_rate": 6.047742006690602e-07, + "loss": 0.0527, + "num_input_tokens_seen": 91911312, + "step": 136390 + }, + { + "epoch": 3.332152541958811, + "grad_norm": 0.0005279332399368286, + "learning_rate": 6.046958674676983e-07, + "loss": 0.0, + "num_input_tokens_seen": 91914512, + "step": 136395 + }, + { + "epoch": 3.3322746927906577, + "grad_norm": 0.00022604659898206592, + "learning_rate": 6.046175371410944e-07, + "loss": 0.0, + "num_input_tokens_seen": 91917968, + "step": 136400 + }, + { + "epoch": 3.332396843622505, + "grad_norm": 0.00010783207835629582, + "learning_rate": 6.045392096898184e-07, + "loss": 0.0, + "num_input_tokens_seen": 91921488, + "step": 136405 + }, + { + "epoch": 3.332518994454352, + "grad_norm": 0.011618511751294136, + "learning_rate": 6.044608851144392e-07, + "loss": 0.0, + "num_input_tokens_seen": 91924368, + "step": 136410 + }, + { + "epoch": 3.3326411452861993, + "grad_norm": 0.03842709958553314, + "learning_rate": 6.043825634155274e-07, + "loss": 0.0453, + "num_input_tokens_seen": 91927568, + "step": 136415 + }, + { + "epoch": 3.3327632961180464, + "grad_norm": 0.001610856968909502, + "learning_rate": 6.043042445936515e-07, + "loss": 0.0, + "num_input_tokens_seen": 91930832, + "step": 136420 + }, + { + "epoch": 3.3328854469498936, + "grad_norm": 0.008025610819458961, + "learning_rate": 6.04225928649382e-07, + "loss": 0.0, + "num_input_tokens_seen": 91934608, + "step": 136425 + }, + { + "epoch": 3.333007597781741, + "grad_norm": 0.000821794499643147, + "learning_rate": 6.041476155832877e-07, + "loss": 0.0, + "num_input_tokens_seen": 91937936, + "step": 136430 + }, + { + "epoch": 3.333129748613588, + "grad_norm": 0.007527175359427929, + "learning_rate": 6.040693053959384e-07, + "loss": 0.0, + "num_input_tokens_seen": 91941200, + "step": 136435 + }, + { + "epoch": 3.333251899445435, + "grad_norm": 0.01938558928668499, + "learning_rate": 6.039909980879039e-07, + "loss": 0.0, + "num_input_tokens_seen": 91944400, + "step": 136440 + }, + { + "epoch": 3.3333740502772824, + "grad_norm": 0.031942564994096756, + "learning_rate": 6.039126936597529e-07, + "loss": 0.0, + "num_input_tokens_seen": 91947920, + "step": 136445 + }, + { + "epoch": 3.3334962011091296, + "grad_norm": 0.00023766096273902804, + "learning_rate": 6.038343921120558e-07, + "loss": 0.0, + "num_input_tokens_seen": 91951120, + "step": 136450 + }, + { + "epoch": 3.3336183519409768, + "grad_norm": 0.0017525376752018929, + "learning_rate": 6.037560934453812e-07, + "loss": 0.0, + "num_input_tokens_seen": 91954192, + "step": 136455 + }, + { + "epoch": 3.333740502772824, + "grad_norm": 0.7847245335578918, + "learning_rate": 6.036777976602987e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91957392, + "step": 136460 + }, + { + "epoch": 3.333862653604671, + "grad_norm": 0.006502887699753046, + "learning_rate": 6.035995047573785e-07, + "loss": 0.0, + "num_input_tokens_seen": 91960464, + "step": 136465 + }, + { + "epoch": 3.3339848044365183, + "grad_norm": 0.004712434485554695, + "learning_rate": 6.035212147371887e-07, + "loss": 0.0, + "num_input_tokens_seen": 91964112, + "step": 136470 + }, + { + "epoch": 3.3341069552683655, + "grad_norm": 0.05862710252404213, + "learning_rate": 6.034429276002996e-07, + "loss": 0.0, + "num_input_tokens_seen": 91967184, + "step": 136475 + }, + { + "epoch": 3.3342291061002127, + "grad_norm": 0.002594059333205223, + "learning_rate": 6.033646433472803e-07, + "loss": 0.0, + "num_input_tokens_seen": 91970704, + "step": 136480 + }, + { + "epoch": 3.3343512569320595, + "grad_norm": 3.1144535541534424, + "learning_rate": 6.032863619786999e-07, + "loss": 0.0005, + "num_input_tokens_seen": 91974224, + "step": 136485 + }, + { + "epoch": 3.334473407763907, + "grad_norm": 0.008164148777723312, + "learning_rate": 6.032080834951276e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91977488, + "step": 136490 + }, + { + "epoch": 3.334595558595754, + "grad_norm": 0.0005573926609940827, + "learning_rate": 6.031298078971328e-07, + "loss": 0.0, + "num_input_tokens_seen": 91981136, + "step": 136495 + }, + { + "epoch": 3.334717709427601, + "grad_norm": 0.0009567593806423247, + "learning_rate": 6.030515351852852e-07, + "loss": 0.0, + "num_input_tokens_seen": 91984656, + "step": 136500 + }, + { + "epoch": 3.3348398602594482, + "grad_norm": 0.00125235749874264, + "learning_rate": 6.029732653601531e-07, + "loss": 0.055, + "num_input_tokens_seen": 91987664, + "step": 136505 + }, + { + "epoch": 3.3349620110912954, + "grad_norm": 0.22148816287517548, + "learning_rate": 6.028949984223066e-07, + "loss": 0.0001, + "num_input_tokens_seen": 91991120, + "step": 136510 + }, + { + "epoch": 3.3350841619231426, + "grad_norm": 0.009758691303431988, + "learning_rate": 6.028167343723142e-07, + "loss": 0.0645, + "num_input_tokens_seen": 91994064, + "step": 136515 + }, + { + "epoch": 3.33520631275499, + "grad_norm": 0.0007590046152472496, + "learning_rate": 6.027384732107455e-07, + "loss": 0.0, + "num_input_tokens_seen": 91997072, + "step": 136520 + }, + { + "epoch": 3.335328463586837, + "grad_norm": 0.01003759540617466, + "learning_rate": 6.026602149381694e-07, + "loss": 0.0, + "num_input_tokens_seen": 91999952, + "step": 136525 + }, + { + "epoch": 3.335450614418684, + "grad_norm": 0.0017680259188637137, + "learning_rate": 6.025819595551551e-07, + "loss": 0.0, + "num_input_tokens_seen": 92003664, + "step": 136530 + }, + { + "epoch": 3.3355727652505314, + "grad_norm": 16.769784927368164, + "learning_rate": 6.025037070622719e-07, + "loss": 0.0406, + "num_input_tokens_seen": 92006928, + "step": 136535 + }, + { + "epoch": 3.3356949160823786, + "grad_norm": 0.0016546935075893998, + "learning_rate": 6.024254574600884e-07, + "loss": 0.0, + "num_input_tokens_seen": 92010320, + "step": 136540 + }, + { + "epoch": 3.3358170669142257, + "grad_norm": 0.001963790040463209, + "learning_rate": 6.02347210749174e-07, + "loss": 0.0, + "num_input_tokens_seen": 92013456, + "step": 136545 + }, + { + "epoch": 3.335939217746073, + "grad_norm": 0.0012200935743749142, + "learning_rate": 6.022689669300976e-07, + "loss": 0.0662, + "num_input_tokens_seen": 92016976, + "step": 136550 + }, + { + "epoch": 3.33606136857792, + "grad_norm": 0.0235135480761528, + "learning_rate": 6.02190726003428e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92020368, + "step": 136555 + }, + { + "epoch": 3.3361835194097673, + "grad_norm": 0.003793837269768119, + "learning_rate": 6.021124879697348e-07, + "loss": 0.0, + "num_input_tokens_seen": 92023376, + "step": 136560 + }, + { + "epoch": 3.3363056702416145, + "grad_norm": 0.0012269438011571765, + "learning_rate": 6.020342528295863e-07, + "loss": 0.0, + "num_input_tokens_seen": 92026512, + "step": 136565 + }, + { + "epoch": 3.3364278210734617, + "grad_norm": 0.00037712950143031776, + "learning_rate": 6.019560205835522e-07, + "loss": 0.0, + "num_input_tokens_seen": 92029520, + "step": 136570 + }, + { + "epoch": 3.336549971905309, + "grad_norm": 0.0009124455391429365, + "learning_rate": 6.018777912322007e-07, + "loss": 0.0002, + "num_input_tokens_seen": 92032784, + "step": 136575 + }, + { + "epoch": 3.3366721227371556, + "grad_norm": 0.03195099160075188, + "learning_rate": 6.01799564776101e-07, + "loss": 0.0, + "num_input_tokens_seen": 92036432, + "step": 136580 + }, + { + "epoch": 3.336794273569003, + "grad_norm": 0.0006369482143782079, + "learning_rate": 6.017213412158221e-07, + "loss": 0.0, + "num_input_tokens_seen": 92040080, + "step": 136585 + }, + { + "epoch": 3.33691642440085, + "grad_norm": 0.25829169154167175, + "learning_rate": 6.016431205519323e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92044112, + "step": 136590 + }, + { + "epoch": 3.337038575232697, + "grad_norm": 0.0028545642271637917, + "learning_rate": 6.015649027850015e-07, + "loss": 0.0002, + "num_input_tokens_seen": 92047504, + "step": 136595 + }, + { + "epoch": 3.3371607260645444, + "grad_norm": 0.0025095988530665636, + "learning_rate": 6.014866879155973e-07, + "loss": 0.0, + "num_input_tokens_seen": 92050704, + "step": 136600 + }, + { + "epoch": 3.3372828768963916, + "grad_norm": 0.000693855807185173, + "learning_rate": 6.014084759442897e-07, + "loss": 0.0, + "num_input_tokens_seen": 92054416, + "step": 136605 + }, + { + "epoch": 3.3374050277282388, + "grad_norm": 0.03811664879322052, + "learning_rate": 6.013302668716462e-07, + "loss": 0.0, + "num_input_tokens_seen": 92057808, + "step": 136610 + }, + { + "epoch": 3.337527178560086, + "grad_norm": 138.32643127441406, + "learning_rate": 6.012520606982365e-07, + "loss": 0.0131, + "num_input_tokens_seen": 92061328, + "step": 136615 + }, + { + "epoch": 3.337649329391933, + "grad_norm": 0.006058790720999241, + "learning_rate": 6.011738574246294e-07, + "loss": 0.0003, + "num_input_tokens_seen": 92064912, + "step": 136620 + }, + { + "epoch": 3.3377714802237803, + "grad_norm": 0.0011405334807932377, + "learning_rate": 6.010956570513927e-07, + "loss": 0.0, + "num_input_tokens_seen": 92068112, + "step": 136625 + }, + { + "epoch": 3.3378936310556275, + "grad_norm": 0.0002621239691507071, + "learning_rate": 6.010174595790963e-07, + "loss": 0.0, + "num_input_tokens_seen": 92071440, + "step": 136630 + }, + { + "epoch": 3.3380157818874747, + "grad_norm": 0.0019386817002668977, + "learning_rate": 6.009392650083079e-07, + "loss": 0.2199, + "num_input_tokens_seen": 92075024, + "step": 136635 + }, + { + "epoch": 3.338137932719322, + "grad_norm": 0.0012935078702867031, + "learning_rate": 6.008610733395965e-07, + "loss": 0.0284, + "num_input_tokens_seen": 92078352, + "step": 136640 + }, + { + "epoch": 3.338260083551169, + "grad_norm": 0.007016130723059177, + "learning_rate": 6.007828845735308e-07, + "loss": 0.0, + "num_input_tokens_seen": 92082192, + "step": 136645 + }, + { + "epoch": 3.3383822343830163, + "grad_norm": 0.0018813019851222634, + "learning_rate": 6.007046987106792e-07, + "loss": 0.0, + "num_input_tokens_seen": 92085136, + "step": 136650 + }, + { + "epoch": 3.3385043852148635, + "grad_norm": 0.0037900370080024004, + "learning_rate": 6.006265157516106e-07, + "loss": 0.0, + "num_input_tokens_seen": 92088400, + "step": 136655 + }, + { + "epoch": 3.3386265360467107, + "grad_norm": 0.004031932447105646, + "learning_rate": 6.005483356968932e-07, + "loss": 0.0774, + "num_input_tokens_seen": 92091216, + "step": 136660 + }, + { + "epoch": 3.3387486868785574, + "grad_norm": 0.0031838095746934414, + "learning_rate": 6.004701585470961e-07, + "loss": 0.0, + "num_input_tokens_seen": 92094544, + "step": 136665 + }, + { + "epoch": 3.338870837710405, + "grad_norm": 0.0348113477230072, + "learning_rate": 6.00391984302787e-07, + "loss": 0.0, + "num_input_tokens_seen": 92098000, + "step": 136670 + }, + { + "epoch": 3.338992988542252, + "grad_norm": 0.0009348982712253928, + "learning_rate": 6.003138129645353e-07, + "loss": 0.0, + "num_input_tokens_seen": 92101200, + "step": 136675 + }, + { + "epoch": 3.339115139374099, + "grad_norm": 0.003419789019972086, + "learning_rate": 6.002356445329088e-07, + "loss": 0.0, + "num_input_tokens_seen": 92104848, + "step": 136680 + }, + { + "epoch": 3.339237290205946, + "grad_norm": 0.007083706092089415, + "learning_rate": 6.001574790084763e-07, + "loss": 0.0, + "num_input_tokens_seen": 92108240, + "step": 136685 + }, + { + "epoch": 3.3393594410377934, + "grad_norm": 0.030707471072673798, + "learning_rate": 6.000793163918063e-07, + "loss": 0.0, + "num_input_tokens_seen": 92111696, + "step": 136690 + }, + { + "epoch": 3.3394815918696406, + "grad_norm": 0.0045353625901043415, + "learning_rate": 6.000011566834667e-07, + "loss": 0.0, + "num_input_tokens_seen": 92115152, + "step": 136695 + }, + { + "epoch": 3.3396037427014877, + "grad_norm": 0.002015952952206135, + "learning_rate": 5.999229998840268e-07, + "loss": 0.0, + "num_input_tokens_seen": 92118352, + "step": 136700 + }, + { + "epoch": 3.339725893533335, + "grad_norm": 7.489074050681666e-05, + "learning_rate": 5.998448459940539e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92121744, + "step": 136705 + }, + { + "epoch": 3.339848044365182, + "grad_norm": 0.057606253772974014, + "learning_rate": 5.997666950141169e-07, + "loss": 0.0, + "num_input_tokens_seen": 92124752, + "step": 136710 + }, + { + "epoch": 3.3399701951970293, + "grad_norm": 0.0022049969993531704, + "learning_rate": 5.996885469447847e-07, + "loss": 0.0, + "num_input_tokens_seen": 92128336, + "step": 136715 + }, + { + "epoch": 3.3400923460288765, + "grad_norm": 89.3311538696289, + "learning_rate": 5.996104017866245e-07, + "loss": 0.0918, + "num_input_tokens_seen": 92131792, + "step": 136720 + }, + { + "epoch": 3.3402144968607237, + "grad_norm": 0.000725537771359086, + "learning_rate": 5.995322595402057e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92135056, + "step": 136725 + }, + { + "epoch": 3.340336647692571, + "grad_norm": 0.0006241207593120635, + "learning_rate": 5.994541202060955e-07, + "loss": 0.0, + "num_input_tokens_seen": 92138384, + "step": 136730 + }, + { + "epoch": 3.340458798524418, + "grad_norm": 0.003467158181592822, + "learning_rate": 5.993759837848631e-07, + "loss": 0.0, + "num_input_tokens_seen": 92142480, + "step": 136735 + }, + { + "epoch": 3.3405809493562653, + "grad_norm": 0.0008007539436221123, + "learning_rate": 5.99297850277076e-07, + "loss": 0.0, + "num_input_tokens_seen": 92145744, + "step": 136740 + }, + { + "epoch": 3.3407031001881125, + "grad_norm": 0.008967030793428421, + "learning_rate": 5.992197196833026e-07, + "loss": 0.0, + "num_input_tokens_seen": 92149264, + "step": 136745 + }, + { + "epoch": 3.340825251019959, + "grad_norm": 0.00047545693814754486, + "learning_rate": 5.991415920041117e-07, + "loss": 0.0009, + "num_input_tokens_seen": 92152656, + "step": 136750 + }, + { + "epoch": 3.340947401851807, + "grad_norm": 28.655147552490234, + "learning_rate": 5.990634672400705e-07, + "loss": 0.0902, + "num_input_tokens_seen": 92155920, + "step": 136755 + }, + { + "epoch": 3.3410695526836536, + "grad_norm": 0.0035769431851804256, + "learning_rate": 5.98985345391748e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92158928, + "step": 136760 + }, + { + "epoch": 3.3411917035155008, + "grad_norm": 0.027868378907442093, + "learning_rate": 5.989072264597115e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92162000, + "step": 136765 + }, + { + "epoch": 3.341313854347348, + "grad_norm": 0.0004942073719576001, + "learning_rate": 5.988291104445296e-07, + "loss": 0.0, + "num_input_tokens_seen": 92165136, + "step": 136770 + }, + { + "epoch": 3.341436005179195, + "grad_norm": 0.0015912620583549142, + "learning_rate": 5.987509973467706e-07, + "loss": 0.0, + "num_input_tokens_seen": 92168976, + "step": 136775 + }, + { + "epoch": 3.3415581560110423, + "grad_norm": 86.06957244873047, + "learning_rate": 5.98672887167002e-07, + "loss": 0.0235, + "num_input_tokens_seen": 92172176, + "step": 136780 + }, + { + "epoch": 3.3416803068428895, + "grad_norm": 0.0011086321901530027, + "learning_rate": 5.985947799057924e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92175504, + "step": 136785 + }, + { + "epoch": 3.3418024576747367, + "grad_norm": 0.007813183590769768, + "learning_rate": 5.985166755637092e-07, + "loss": 0.0002, + "num_input_tokens_seen": 92178768, + "step": 136790 + }, + { + "epoch": 3.341924608506584, + "grad_norm": 0.00011821733642136678, + "learning_rate": 5.984385741413209e-07, + "loss": 0.0414, + "num_input_tokens_seen": 92181904, + "step": 136795 + }, + { + "epoch": 3.342046759338431, + "grad_norm": 37.56241989135742, + "learning_rate": 5.983604756391954e-07, + "loss": 0.1131, + "num_input_tokens_seen": 92185488, + "step": 136800 + }, + { + "epoch": 3.3421689101702783, + "grad_norm": 0.0004566130228340626, + "learning_rate": 5.982823800579002e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92189584, + "step": 136805 + }, + { + "epoch": 3.3422910610021255, + "grad_norm": 0.03750479221343994, + "learning_rate": 5.98204287398004e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92193808, + "step": 136810 + }, + { + "epoch": 3.3424132118339727, + "grad_norm": 0.008922645822167397, + "learning_rate": 5.981261976600738e-07, + "loss": 0.0, + "num_input_tokens_seen": 92197072, + "step": 136815 + }, + { + "epoch": 3.34253536266582, + "grad_norm": 0.010072392411530018, + "learning_rate": 5.980481108446786e-07, + "loss": 0.0, + "num_input_tokens_seen": 92201296, + "step": 136820 + }, + { + "epoch": 3.342657513497667, + "grad_norm": 0.015576275065541267, + "learning_rate": 5.97970026952385e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92204496, + "step": 136825 + }, + { + "epoch": 3.3427796643295142, + "grad_norm": 0.16503624618053436, + "learning_rate": 5.978919459837621e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92207888, + "step": 136830 + }, + { + "epoch": 3.3429018151613614, + "grad_norm": 0.004576961509883404, + "learning_rate": 5.978138679393766e-07, + "loss": 0.0, + "num_input_tokens_seen": 92210896, + "step": 136835 + }, + { + "epoch": 3.3430239659932086, + "grad_norm": 0.002583190565928817, + "learning_rate": 5.977357928197971e-07, + "loss": 0.0, + "num_input_tokens_seen": 92213904, + "step": 136840 + }, + { + "epoch": 3.3431461168250554, + "grad_norm": 0.002497687004506588, + "learning_rate": 5.976577206255913e-07, + "loss": 0.0, + "num_input_tokens_seen": 92217232, + "step": 136845 + }, + { + "epoch": 3.343268267656903, + "grad_norm": 0.0009140447364188731, + "learning_rate": 5.975796513573263e-07, + "loss": 0.0002, + "num_input_tokens_seen": 92220240, + "step": 136850 + }, + { + "epoch": 3.3433904184887497, + "grad_norm": 0.04105198755860329, + "learning_rate": 5.975015850155708e-07, + "loss": 0.0, + "num_input_tokens_seen": 92224016, + "step": 136855 + }, + { + "epoch": 3.343512569320597, + "grad_norm": 0.015119964256882668, + "learning_rate": 5.974235216008916e-07, + "loss": 0.0, + "num_input_tokens_seen": 92227408, + "step": 136860 + }, + { + "epoch": 3.343634720152444, + "grad_norm": 0.0003713774203788489, + "learning_rate": 5.973454611138568e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92230480, + "step": 136865 + }, + { + "epoch": 3.3437568709842913, + "grad_norm": 0.010512808337807655, + "learning_rate": 5.972674035550345e-07, + "loss": 0.0921, + "num_input_tokens_seen": 92234832, + "step": 136870 + }, + { + "epoch": 3.3438790218161385, + "grad_norm": 0.0023981162812560797, + "learning_rate": 5.971893489249917e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92238224, + "step": 136875 + }, + { + "epoch": 3.3440011726479857, + "grad_norm": 0.003602446988224983, + "learning_rate": 5.971112972242966e-07, + "loss": 0.0, + "num_input_tokens_seen": 92241104, + "step": 136880 + }, + { + "epoch": 3.344123323479833, + "grad_norm": 0.009283242747187614, + "learning_rate": 5.970332484535161e-07, + "loss": 0.0, + "num_input_tokens_seen": 92244112, + "step": 136885 + }, + { + "epoch": 3.34424547431168, + "grad_norm": 0.0038792439736425877, + "learning_rate": 5.969552026132186e-07, + "loss": 0.0, + "num_input_tokens_seen": 92247376, + "step": 136890 + }, + { + "epoch": 3.3443676251435273, + "grad_norm": 0.001365432282909751, + "learning_rate": 5.968771597039711e-07, + "loss": 0.0, + "num_input_tokens_seen": 92250960, + "step": 136895 + }, + { + "epoch": 3.3444897759753744, + "grad_norm": 0.0032672842498868704, + "learning_rate": 5.967991197263412e-07, + "loss": 0.0, + "num_input_tokens_seen": 92254096, + "step": 136900 + }, + { + "epoch": 3.3446119268072216, + "grad_norm": 0.05888065695762634, + "learning_rate": 5.967210826808968e-07, + "loss": 0.0, + "num_input_tokens_seen": 92257680, + "step": 136905 + }, + { + "epoch": 3.344734077639069, + "grad_norm": 0.006529807113111019, + "learning_rate": 5.966430485682048e-07, + "loss": 0.0, + "num_input_tokens_seen": 92260688, + "step": 136910 + }, + { + "epoch": 3.344856228470916, + "grad_norm": 0.011194230057299137, + "learning_rate": 5.965650173888334e-07, + "loss": 0.0, + "num_input_tokens_seen": 92264784, + "step": 136915 + }, + { + "epoch": 3.344978379302763, + "grad_norm": 0.00022088276455178857, + "learning_rate": 5.964869891433494e-07, + "loss": 0.0, + "num_input_tokens_seen": 92268240, + "step": 136920 + }, + { + "epoch": 3.3451005301346104, + "grad_norm": 0.0009188788244500756, + "learning_rate": 5.964089638323204e-07, + "loss": 0.0, + "num_input_tokens_seen": 92271632, + "step": 136925 + }, + { + "epoch": 3.345222680966457, + "grad_norm": 0.016729068011045456, + "learning_rate": 5.963309414563146e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92275280, + "step": 136930 + }, + { + "epoch": 3.3453448317983048, + "grad_norm": 0.0008369608549401164, + "learning_rate": 5.962529220158983e-07, + "loss": 0.0, + "num_input_tokens_seen": 92278672, + "step": 136935 + }, + { + "epoch": 3.3454669826301515, + "grad_norm": 0.0001761860039550811, + "learning_rate": 5.961749055116396e-07, + "loss": 0.0, + "num_input_tokens_seen": 92282192, + "step": 136940 + }, + { + "epoch": 3.3455891334619987, + "grad_norm": 0.001143355155363679, + "learning_rate": 5.960968919441055e-07, + "loss": 0.0, + "num_input_tokens_seen": 92285904, + "step": 136945 + }, + { + "epoch": 3.345711284293846, + "grad_norm": 0.010505764745175838, + "learning_rate": 5.960188813138634e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92289104, + "step": 136950 + }, + { + "epoch": 3.345833435125693, + "grad_norm": 0.02035011164844036, + "learning_rate": 5.959408736214807e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92292368, + "step": 136955 + }, + { + "epoch": 3.3459555859575403, + "grad_norm": 0.0037497461307793856, + "learning_rate": 5.958628688675244e-07, + "loss": 0.0, + "num_input_tokens_seen": 92295632, + "step": 136960 + }, + { + "epoch": 3.3460777367893875, + "grad_norm": 0.002685282379388809, + "learning_rate": 5.957848670525624e-07, + "loss": 0.0, + "num_input_tokens_seen": 92299536, + "step": 136965 + }, + { + "epoch": 3.3461998876212347, + "grad_norm": 0.004526887554675341, + "learning_rate": 5.957068681771613e-07, + "loss": 0.0006, + "num_input_tokens_seen": 92302608, + "step": 136970 + }, + { + "epoch": 3.346322038453082, + "grad_norm": 0.002396609168499708, + "learning_rate": 5.95628872241889e-07, + "loss": 0.0, + "num_input_tokens_seen": 92306000, + "step": 136975 + }, + { + "epoch": 3.346444189284929, + "grad_norm": 0.0007503124652430415, + "learning_rate": 5.955508792473118e-07, + "loss": 0.0, + "num_input_tokens_seen": 92309456, + "step": 136980 + }, + { + "epoch": 3.3465663401167762, + "grad_norm": 0.0008637371938675642, + "learning_rate": 5.954728891939977e-07, + "loss": 0.0, + "num_input_tokens_seen": 92313488, + "step": 136985 + }, + { + "epoch": 3.3466884909486234, + "grad_norm": 0.0006992123671807349, + "learning_rate": 5.953949020825133e-07, + "loss": 0.0, + "num_input_tokens_seen": 92316688, + "step": 136990 + }, + { + "epoch": 3.3468106417804706, + "grad_norm": 0.004576565232127905, + "learning_rate": 5.95316917913426e-07, + "loss": 0.0, + "num_input_tokens_seen": 92320080, + "step": 136995 + }, + { + "epoch": 3.346932792612318, + "grad_norm": 0.010585155338048935, + "learning_rate": 5.952389366873034e-07, + "loss": 0.0, + "num_input_tokens_seen": 92323664, + "step": 137000 + }, + { + "epoch": 3.347054943444165, + "grad_norm": 0.0007783559849485755, + "learning_rate": 5.951609584047117e-07, + "loss": 0.0, + "num_input_tokens_seen": 92327376, + "step": 137005 + }, + { + "epoch": 3.347177094276012, + "grad_norm": 0.001543479971587658, + "learning_rate": 5.950829830662186e-07, + "loss": 0.0, + "num_input_tokens_seen": 92330448, + "step": 137010 + }, + { + "epoch": 3.3472992451078594, + "grad_norm": 0.0005223507178016007, + "learning_rate": 5.950050106723907e-07, + "loss": 0.0, + "num_input_tokens_seen": 92334096, + "step": 137015 + }, + { + "epoch": 3.3474213959397066, + "grad_norm": 19.34946060180664, + "learning_rate": 5.949270412237953e-07, + "loss": 0.039, + "num_input_tokens_seen": 92337488, + "step": 137020 + }, + { + "epoch": 3.3475435467715533, + "grad_norm": 0.0002101163554470986, + "learning_rate": 5.948490747209997e-07, + "loss": 0.043, + "num_input_tokens_seen": 92341008, + "step": 137025 + }, + { + "epoch": 3.3476656976034005, + "grad_norm": 0.10596171766519547, + "learning_rate": 5.947711111645703e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92344016, + "step": 137030 + }, + { + "epoch": 3.3477878484352477, + "grad_norm": 0.021134018898010254, + "learning_rate": 5.946931505550746e-07, + "loss": 0.0, + "num_input_tokens_seen": 92347408, + "step": 137035 + }, + { + "epoch": 3.347909999267095, + "grad_norm": 0.011571722105145454, + "learning_rate": 5.946151928930792e-07, + "loss": 0.0, + "num_input_tokens_seen": 92351120, + "step": 137040 + }, + { + "epoch": 3.348032150098942, + "grad_norm": 0.07473000884056091, + "learning_rate": 5.945372381791513e-07, + "loss": 0.0, + "num_input_tokens_seen": 92355216, + "step": 137045 + }, + { + "epoch": 3.3481543009307893, + "grad_norm": 0.00010492785804672167, + "learning_rate": 5.944592864138575e-07, + "loss": 0.0, + "num_input_tokens_seen": 92358288, + "step": 137050 + }, + { + "epoch": 3.3482764517626364, + "grad_norm": 0.0009476658888161182, + "learning_rate": 5.943813375977647e-07, + "loss": 0.0, + "num_input_tokens_seen": 92361296, + "step": 137055 + }, + { + "epoch": 3.3483986025944836, + "grad_norm": 0.0033659671898931265, + "learning_rate": 5.943033917314404e-07, + "loss": 0.0, + "num_input_tokens_seen": 92364560, + "step": 137060 + }, + { + "epoch": 3.348520753426331, + "grad_norm": 0.00013656694500241429, + "learning_rate": 5.942254488154504e-07, + "loss": 0.0, + "num_input_tokens_seen": 92368080, + "step": 137065 + }, + { + "epoch": 3.348642904258178, + "grad_norm": 0.002999598393216729, + "learning_rate": 5.941475088503627e-07, + "loss": 0.0, + "num_input_tokens_seen": 92371408, + "step": 137070 + }, + { + "epoch": 3.348765055090025, + "grad_norm": 0.0001747731730574742, + "learning_rate": 5.940695718367428e-07, + "loss": 0.0, + "num_input_tokens_seen": 92374416, + "step": 137075 + }, + { + "epoch": 3.3488872059218724, + "grad_norm": 0.0003013430687133223, + "learning_rate": 5.939916377751584e-07, + "loss": 0.0, + "num_input_tokens_seen": 92378256, + "step": 137080 + }, + { + "epoch": 3.3490093567537196, + "grad_norm": 0.005943240597844124, + "learning_rate": 5.939137066661763e-07, + "loss": 0.0418, + "num_input_tokens_seen": 92381584, + "step": 137085 + }, + { + "epoch": 3.3491315075855668, + "grad_norm": 0.009656358510255814, + "learning_rate": 5.938357785103625e-07, + "loss": 0.0, + "num_input_tokens_seen": 92384912, + "step": 137090 + }, + { + "epoch": 3.349253658417414, + "grad_norm": 0.00010450145782670006, + "learning_rate": 5.937578533082846e-07, + "loss": 0.0, + "num_input_tokens_seen": 92388752, + "step": 137095 + }, + { + "epoch": 3.349375809249261, + "grad_norm": 0.0005017042858526111, + "learning_rate": 5.936799310605087e-07, + "loss": 0.0, + "num_input_tokens_seen": 92391696, + "step": 137100 + }, + { + "epoch": 3.3494979600811083, + "grad_norm": 0.10877280682325363, + "learning_rate": 5.936020117676015e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92395088, + "step": 137105 + }, + { + "epoch": 3.349620110912955, + "grad_norm": 0.003869707463309169, + "learning_rate": 5.9352409543013e-07, + "loss": 0.0, + "num_input_tokens_seen": 92398608, + "step": 137110 + }, + { + "epoch": 3.3497422617448027, + "grad_norm": 0.02804212085902691, + "learning_rate": 5.934461820486603e-07, + "loss": 0.0, + "num_input_tokens_seen": 92402064, + "step": 137115 + }, + { + "epoch": 3.3498644125766495, + "grad_norm": 0.0017672295216470957, + "learning_rate": 5.933682716237596e-07, + "loss": 0.0, + "num_input_tokens_seen": 92405520, + "step": 137120 + }, + { + "epoch": 3.3499865634084967, + "grad_norm": 0.6739102005958557, + "learning_rate": 5.932903641559939e-07, + "loss": 0.0007, + "num_input_tokens_seen": 92409104, + "step": 137125 + }, + { + "epoch": 3.350108714240344, + "grad_norm": 0.00022642611293122172, + "learning_rate": 5.932124596459305e-07, + "loss": 0.0, + "num_input_tokens_seen": 92412432, + "step": 137130 + }, + { + "epoch": 3.350230865072191, + "grad_norm": 0.0005862355465069413, + "learning_rate": 5.93134558094135e-07, + "loss": 0.0, + "num_input_tokens_seen": 92416016, + "step": 137135 + }, + { + "epoch": 3.3503530159040382, + "grad_norm": 0.0008308440446853638, + "learning_rate": 5.930566595011749e-07, + "loss": 0.0, + "num_input_tokens_seen": 92419152, + "step": 137140 + }, + { + "epoch": 3.3504751667358854, + "grad_norm": 0.030860837548971176, + "learning_rate": 5.929787638676158e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92422224, + "step": 137145 + }, + { + "epoch": 3.3505973175677326, + "grad_norm": 0.0021630236878991127, + "learning_rate": 5.929008711940249e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92425872, + "step": 137150 + }, + { + "epoch": 3.35071946839958, + "grad_norm": 0.13139772415161133, + "learning_rate": 5.928229814809684e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92429584, + "step": 137155 + }, + { + "epoch": 3.350841619231427, + "grad_norm": 0.02592483162879944, + "learning_rate": 5.927450947290125e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92432592, + "step": 137160 + }, + { + "epoch": 3.350963770063274, + "grad_norm": 447.85107421875, + "learning_rate": 5.926672109387241e-07, + "loss": 0.0204, + "num_input_tokens_seen": 92436304, + "step": 137165 + }, + { + "epoch": 3.3510859208951214, + "grad_norm": 0.0019791650120168924, + "learning_rate": 5.925893301106688e-07, + "loss": 0.0, + "num_input_tokens_seen": 92439760, + "step": 137170 + }, + { + "epoch": 3.3512080717269686, + "grad_norm": 8.97948193596676e-05, + "learning_rate": 5.925114522454136e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92442896, + "step": 137175 + }, + { + "epoch": 3.3513302225588157, + "grad_norm": 0.0074377055279910564, + "learning_rate": 5.924335773435251e-07, + "loss": 0.0, + "num_input_tokens_seen": 92445712, + "step": 137180 + }, + { + "epoch": 3.351452373390663, + "grad_norm": 0.0001644604344619438, + "learning_rate": 5.923557054055688e-07, + "loss": 0.0, + "num_input_tokens_seen": 92448848, + "step": 137185 + }, + { + "epoch": 3.35157452422251, + "grad_norm": 0.0013965462567284703, + "learning_rate": 5.922778364321119e-07, + "loss": 0.0, + "num_input_tokens_seen": 92452048, + "step": 137190 + }, + { + "epoch": 3.3516966750543573, + "grad_norm": 0.0002730031847022474, + "learning_rate": 5.921999704237197e-07, + "loss": 0.0, + "num_input_tokens_seen": 92455056, + "step": 137195 + }, + { + "epoch": 3.3518188258862045, + "grad_norm": 0.004419253673404455, + "learning_rate": 5.921221073809596e-07, + "loss": 0.0, + "num_input_tokens_seen": 92458704, + "step": 137200 + }, + { + "epoch": 3.3519409767180512, + "grad_norm": 0.025197016075253487, + "learning_rate": 5.92044247304397e-07, + "loss": 0.0004, + "num_input_tokens_seen": 92461712, + "step": 137205 + }, + { + "epoch": 3.3520631275498984, + "grad_norm": 5.4128915508044884e-05, + "learning_rate": 5.919663901945982e-07, + "loss": 0.0003, + "num_input_tokens_seen": 92464720, + "step": 137210 + }, + { + "epoch": 3.3521852783817456, + "grad_norm": 0.0008574188686907291, + "learning_rate": 5.918885360521297e-07, + "loss": 0.0, + "num_input_tokens_seen": 92467728, + "step": 137215 + }, + { + "epoch": 3.352307429213593, + "grad_norm": 0.0039085992611944675, + "learning_rate": 5.918106848775574e-07, + "loss": 0.0, + "num_input_tokens_seen": 92470928, + "step": 137220 + }, + { + "epoch": 3.35242958004544, + "grad_norm": 0.0001256961259059608, + "learning_rate": 5.917328366714479e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92474384, + "step": 137225 + }, + { + "epoch": 3.352551730877287, + "grad_norm": 0.0015291773015633225, + "learning_rate": 5.916549914343667e-07, + "loss": 0.0, + "num_input_tokens_seen": 92477776, + "step": 137230 + }, + { + "epoch": 3.3526738817091344, + "grad_norm": 70.6291732788086, + "learning_rate": 5.915771491668801e-07, + "loss": 0.0838, + "num_input_tokens_seen": 92481296, + "step": 137235 + }, + { + "epoch": 3.3527960325409816, + "grad_norm": 0.07619927823543549, + "learning_rate": 5.914993098695548e-07, + "loss": 0.0, + "num_input_tokens_seen": 92484560, + "step": 137240 + }, + { + "epoch": 3.3529181833728288, + "grad_norm": 2.3242442694026977e-05, + "learning_rate": 5.914214735429559e-07, + "loss": 0.0, + "num_input_tokens_seen": 92488592, + "step": 137245 + }, + { + "epoch": 3.353040334204676, + "grad_norm": 0.002132971538230777, + "learning_rate": 5.913436401876505e-07, + "loss": 0.0, + "num_input_tokens_seen": 92491984, + "step": 137250 + }, + { + "epoch": 3.353162485036523, + "grad_norm": 7.056258618831635e-05, + "learning_rate": 5.912658098042038e-07, + "loss": 0.0, + "num_input_tokens_seen": 92495376, + "step": 137255 + }, + { + "epoch": 3.3532846358683703, + "grad_norm": 0.0008625510963611305, + "learning_rate": 5.91187982393182e-07, + "loss": 0.0, + "num_input_tokens_seen": 92498256, + "step": 137260 + }, + { + "epoch": 3.3534067867002175, + "grad_norm": 0.0012392710195854306, + "learning_rate": 5.911101579551511e-07, + "loss": 0.0, + "num_input_tokens_seen": 92501776, + "step": 137265 + }, + { + "epoch": 3.3535289375320647, + "grad_norm": 0.0004566275456454605, + "learning_rate": 5.910323364906771e-07, + "loss": 0.0, + "num_input_tokens_seen": 92504784, + "step": 137270 + }, + { + "epoch": 3.353651088363912, + "grad_norm": 0.0005576558178290725, + "learning_rate": 5.909545180003262e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92507984, + "step": 137275 + }, + { + "epoch": 3.353773239195759, + "grad_norm": 0.00011330114648444578, + "learning_rate": 5.908767024846637e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92511248, + "step": 137280 + }, + { + "epoch": 3.3538953900276063, + "grad_norm": 0.0003647850244306028, + "learning_rate": 5.907988899442565e-07, + "loss": 0.0, + "num_input_tokens_seen": 92514512, + "step": 137285 + }, + { + "epoch": 3.354017540859453, + "grad_norm": 5.280201730784029e-05, + "learning_rate": 5.90721080379669e-07, + "loss": 0.0, + "num_input_tokens_seen": 92517456, + "step": 137290 + }, + { + "epoch": 3.3541396916913007, + "grad_norm": 0.7083552479743958, + "learning_rate": 5.906432737914686e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92520912, + "step": 137295 + }, + { + "epoch": 3.3542618425231474, + "grad_norm": 0.009471097961068153, + "learning_rate": 5.905654701802198e-07, + "loss": 0.0, + "num_input_tokens_seen": 92524368, + "step": 137300 + }, + { + "epoch": 3.3543839933549946, + "grad_norm": 1.3663839126820676e-05, + "learning_rate": 5.904876695464894e-07, + "loss": 0.0, + "num_input_tokens_seen": 92527696, + "step": 137305 + }, + { + "epoch": 3.354506144186842, + "grad_norm": 0.000423252786276862, + "learning_rate": 5.90409871890843e-07, + "loss": 0.0761, + "num_input_tokens_seen": 92531088, + "step": 137310 + }, + { + "epoch": 3.354628295018689, + "grad_norm": 0.0005392113816924393, + "learning_rate": 5.903320772138458e-07, + "loss": 0.0, + "num_input_tokens_seen": 92534480, + "step": 137315 + }, + { + "epoch": 3.354750445850536, + "grad_norm": 0.002171800471842289, + "learning_rate": 5.902542855160641e-07, + "loss": 0.0, + "num_input_tokens_seen": 92537872, + "step": 137320 + }, + { + "epoch": 3.3548725966823834, + "grad_norm": 0.00034207970020361245, + "learning_rate": 5.901764967980634e-07, + "loss": 0.0, + "num_input_tokens_seen": 92541904, + "step": 137325 + }, + { + "epoch": 3.3549947475142305, + "grad_norm": 0.0010893391445279121, + "learning_rate": 5.900987110604092e-07, + "loss": 0.0, + "num_input_tokens_seen": 92544976, + "step": 137330 + }, + { + "epoch": 3.3551168983460777, + "grad_norm": 0.001828734646551311, + "learning_rate": 5.900209283036677e-07, + "loss": 0.0, + "num_input_tokens_seen": 92548240, + "step": 137335 + }, + { + "epoch": 3.355239049177925, + "grad_norm": 0.0322355292737484, + "learning_rate": 5.899431485284041e-07, + "loss": 0.0, + "num_input_tokens_seen": 92551568, + "step": 137340 + }, + { + "epoch": 3.355361200009772, + "grad_norm": 0.11325082927942276, + "learning_rate": 5.898653717351847e-07, + "loss": 0.0003, + "num_input_tokens_seen": 92554960, + "step": 137345 + }, + { + "epoch": 3.3554833508416193, + "grad_norm": 3.2338393793907017e-05, + "learning_rate": 5.89787597924574e-07, + "loss": 0.0, + "num_input_tokens_seen": 92558608, + "step": 137350 + }, + { + "epoch": 3.3556055016734665, + "grad_norm": 0.00022079696645960212, + "learning_rate": 5.897098270971388e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92561808, + "step": 137355 + }, + { + "epoch": 3.3557276525053137, + "grad_norm": 0.001337683410383761, + "learning_rate": 5.896320592534438e-07, + "loss": 0.0, + "num_input_tokens_seen": 92564880, + "step": 137360 + }, + { + "epoch": 3.355849803337161, + "grad_norm": 28.633352279663086, + "learning_rate": 5.895542943940546e-07, + "loss": 0.081, + "num_input_tokens_seen": 92567888, + "step": 137365 + }, + { + "epoch": 3.355971954169008, + "grad_norm": 0.00016493708244524896, + "learning_rate": 5.894765325195374e-07, + "loss": 0.0, + "num_input_tokens_seen": 92571280, + "step": 137370 + }, + { + "epoch": 3.356094105000855, + "grad_norm": 0.6020975708961487, + "learning_rate": 5.893987736304569e-07, + "loss": 0.0, + "num_input_tokens_seen": 92574864, + "step": 137375 + }, + { + "epoch": 3.3562162558327024, + "grad_norm": 0.0003073678817600012, + "learning_rate": 5.893210177273793e-07, + "loss": 0.0, + "num_input_tokens_seen": 92578640, + "step": 137380 + }, + { + "epoch": 3.356338406664549, + "grad_norm": 0.014538398012518883, + "learning_rate": 5.892432648108694e-07, + "loss": 0.0, + "num_input_tokens_seen": 92581584, + "step": 137385 + }, + { + "epoch": 3.3564605574963964, + "grad_norm": 9.626116661820561e-05, + "learning_rate": 5.891655148814934e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92585872, + "step": 137390 + }, + { + "epoch": 3.3565827083282436, + "grad_norm": 0.0016042754286900163, + "learning_rate": 5.890877679398158e-07, + "loss": 0.0, + "num_input_tokens_seen": 92588944, + "step": 137395 + }, + { + "epoch": 3.3567048591600908, + "grad_norm": 0.0007302347803488374, + "learning_rate": 5.890100239864024e-07, + "loss": 0.0879, + "num_input_tokens_seen": 92592400, + "step": 137400 + }, + { + "epoch": 3.356827009991938, + "grad_norm": 0.00015298521611839533, + "learning_rate": 5.88932283021819e-07, + "loss": 0.0, + "num_input_tokens_seen": 92595920, + "step": 137405 + }, + { + "epoch": 3.356949160823785, + "grad_norm": 0.004179175477474928, + "learning_rate": 5.888545450466307e-07, + "loss": 0.0, + "num_input_tokens_seen": 92599504, + "step": 137410 + }, + { + "epoch": 3.3570713116556323, + "grad_norm": 8.79567232914269e-05, + "learning_rate": 5.887768100614026e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92602704, + "step": 137415 + }, + { + "epoch": 3.3571934624874795, + "grad_norm": 104.23986053466797, + "learning_rate": 5.886990780667e-07, + "loss": 0.0007, + "num_input_tokens_seen": 92605776, + "step": 137420 + }, + { + "epoch": 3.3573156133193267, + "grad_norm": 0.005066916812211275, + "learning_rate": 5.886213490630883e-07, + "loss": 0.0, + "num_input_tokens_seen": 92609232, + "step": 137425 + }, + { + "epoch": 3.357437764151174, + "grad_norm": 20.911270141601562, + "learning_rate": 5.885436230511332e-07, + "loss": 0.0012, + "num_input_tokens_seen": 92612304, + "step": 137430 + }, + { + "epoch": 3.357559914983021, + "grad_norm": 0.0022453595884144306, + "learning_rate": 5.884659000313989e-07, + "loss": 0.0, + "num_input_tokens_seen": 92615568, + "step": 137435 + }, + { + "epoch": 3.3576820658148683, + "grad_norm": 0.0018182910280302167, + "learning_rate": 5.883881800044519e-07, + "loss": 0.0, + "num_input_tokens_seen": 92619472, + "step": 137440 + }, + { + "epoch": 3.3578042166467155, + "grad_norm": 0.015961581841111183, + "learning_rate": 5.883104629708563e-07, + "loss": 0.0, + "num_input_tokens_seen": 92623376, + "step": 137445 + }, + { + "epoch": 3.3579263674785627, + "grad_norm": 0.00026710316888056695, + "learning_rate": 5.882327489311781e-07, + "loss": 0.043, + "num_input_tokens_seen": 92626768, + "step": 137450 + }, + { + "epoch": 3.35804851831041, + "grad_norm": 0.00041441788198426366, + "learning_rate": 5.881550378859817e-07, + "loss": 0.0, + "num_input_tokens_seen": 92630224, + "step": 137455 + }, + { + "epoch": 3.358170669142257, + "grad_norm": 0.00719722593203187, + "learning_rate": 5.88077329835833e-07, + "loss": 0.0003, + "num_input_tokens_seen": 92633552, + "step": 137460 + }, + { + "epoch": 3.3582928199741042, + "grad_norm": 0.0013678967952728271, + "learning_rate": 5.879996247812969e-07, + "loss": 0.039, + "num_input_tokens_seen": 92636560, + "step": 137465 + }, + { + "epoch": 3.358414970805951, + "grad_norm": 0.000781642273068428, + "learning_rate": 5.879219227229378e-07, + "loss": 0.0353, + "num_input_tokens_seen": 92639888, + "step": 137470 + }, + { + "epoch": 3.358537121637798, + "grad_norm": 0.0018686053808778524, + "learning_rate": 5.878442236613217e-07, + "loss": 0.0, + "num_input_tokens_seen": 92643408, + "step": 137475 + }, + { + "epoch": 3.3586592724696454, + "grad_norm": 0.004237237386405468, + "learning_rate": 5.87766527597013e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92646864, + "step": 137480 + }, + { + "epoch": 3.3587814233014925, + "grad_norm": 0.0021779872477054596, + "learning_rate": 5.876888345305769e-07, + "loss": 0.0, + "num_input_tokens_seen": 92650320, + "step": 137485 + }, + { + "epoch": 3.3589035741333397, + "grad_norm": 0.0003856025286950171, + "learning_rate": 5.87611144462579e-07, + "loss": 0.0, + "num_input_tokens_seen": 92653264, + "step": 137490 + }, + { + "epoch": 3.359025724965187, + "grad_norm": 0.002911294810473919, + "learning_rate": 5.875334573935833e-07, + "loss": 0.0346, + "num_input_tokens_seen": 92656592, + "step": 137495 + }, + { + "epoch": 3.359147875797034, + "grad_norm": 0.026074426248669624, + "learning_rate": 5.874557733241557e-07, + "loss": 0.0, + "num_input_tokens_seen": 92660240, + "step": 137500 + }, + { + "epoch": 3.3592700266288813, + "grad_norm": 0.0025548043195158243, + "learning_rate": 5.873780922548602e-07, + "loss": 0.0, + "num_input_tokens_seen": 92663632, + "step": 137505 + }, + { + "epoch": 3.3593921774607285, + "grad_norm": 0.00036749220453202724, + "learning_rate": 5.873004141862626e-07, + "loss": 0.0, + "num_input_tokens_seen": 92667088, + "step": 137510 + }, + { + "epoch": 3.3595143282925757, + "grad_norm": 33.46885299682617, + "learning_rate": 5.872227391189273e-07, + "loss": 0.0619, + "num_input_tokens_seen": 92670160, + "step": 137515 + }, + { + "epoch": 3.359636479124423, + "grad_norm": 54.26581954956055, + "learning_rate": 5.871450670534189e-07, + "loss": 0.0596, + "num_input_tokens_seen": 92673488, + "step": 137520 + }, + { + "epoch": 3.35975862995627, + "grad_norm": 0.0010688966140151024, + "learning_rate": 5.870673979903031e-07, + "loss": 0.0, + "num_input_tokens_seen": 92676496, + "step": 137525 + }, + { + "epoch": 3.3598807807881172, + "grad_norm": 0.0011966531164944172, + "learning_rate": 5.869897319301438e-07, + "loss": 0.0, + "num_input_tokens_seen": 92679760, + "step": 137530 + }, + { + "epoch": 3.3600029316199644, + "grad_norm": 0.001972771715372801, + "learning_rate": 5.869120688735067e-07, + "loss": 0.0, + "num_input_tokens_seen": 92683728, + "step": 137535 + }, + { + "epoch": 3.3601250824518116, + "grad_norm": 0.0003887968778144568, + "learning_rate": 5.868344088209558e-07, + "loss": 0.0414, + "num_input_tokens_seen": 92686864, + "step": 137540 + }, + { + "epoch": 3.360247233283659, + "grad_norm": 0.0006644058739766479, + "learning_rate": 5.867567517730565e-07, + "loss": 0.0477, + "num_input_tokens_seen": 92690256, + "step": 137545 + }, + { + "epoch": 3.360369384115506, + "grad_norm": 0.00020107610907871276, + "learning_rate": 5.866790977303729e-07, + "loss": 0.0, + "num_input_tokens_seen": 92693584, + "step": 137550 + }, + { + "epoch": 3.3604915349473528, + "grad_norm": 0.008693316951394081, + "learning_rate": 5.866014466934701e-07, + "loss": 0.0325, + "num_input_tokens_seen": 92697104, + "step": 137555 + }, + { + "epoch": 3.3606136857792004, + "grad_norm": 13.368460655212402, + "learning_rate": 5.865237986629132e-07, + "loss": 0.065, + "num_input_tokens_seen": 92700624, + "step": 137560 + }, + { + "epoch": 3.360735836611047, + "grad_norm": 0.0017884562257677317, + "learning_rate": 5.864461536392662e-07, + "loss": 0.0, + "num_input_tokens_seen": 92704080, + "step": 137565 + }, + { + "epoch": 3.3608579874428943, + "grad_norm": 0.002366987755522132, + "learning_rate": 5.863685116230939e-07, + "loss": 0.0406, + "num_input_tokens_seen": 92707024, + "step": 137570 + }, + { + "epoch": 3.3609801382747415, + "grad_norm": 0.003429947653785348, + "learning_rate": 5.862908726149611e-07, + "loss": 0.0, + "num_input_tokens_seen": 92710224, + "step": 137575 + }, + { + "epoch": 3.3611022891065887, + "grad_norm": 0.0011162642622366548, + "learning_rate": 5.862132366154322e-07, + "loss": 0.0, + "num_input_tokens_seen": 92713744, + "step": 137580 + }, + { + "epoch": 3.361224439938436, + "grad_norm": 0.01790856011211872, + "learning_rate": 5.861356036250724e-07, + "loss": 0.0, + "num_input_tokens_seen": 92716816, + "step": 137585 + }, + { + "epoch": 3.361346590770283, + "grad_norm": 0.003260530298575759, + "learning_rate": 5.860579736444453e-07, + "loss": 0.0, + "num_input_tokens_seen": 92720272, + "step": 137590 + }, + { + "epoch": 3.3614687416021303, + "grad_norm": 0.537703812122345, + "learning_rate": 5.859803466741164e-07, + "loss": 0.0007, + "num_input_tokens_seen": 92723280, + "step": 137595 + }, + { + "epoch": 3.3615908924339775, + "grad_norm": 0.003660767339169979, + "learning_rate": 5.859027227146493e-07, + "loss": 0.0, + "num_input_tokens_seen": 92727120, + "step": 137600 + }, + { + "epoch": 3.3617130432658247, + "grad_norm": 0.25912341475486755, + "learning_rate": 5.858251017666095e-07, + "loss": 0.0005, + "num_input_tokens_seen": 92730576, + "step": 137605 + }, + { + "epoch": 3.361835194097672, + "grad_norm": 0.0004168798914179206, + "learning_rate": 5.857474838305605e-07, + "loss": 0.0, + "num_input_tokens_seen": 92734416, + "step": 137610 + }, + { + "epoch": 3.361957344929519, + "grad_norm": 0.005119622685015202, + "learning_rate": 5.856698689070674e-07, + "loss": 0.0, + "num_input_tokens_seen": 92738064, + "step": 137615 + }, + { + "epoch": 3.362079495761366, + "grad_norm": 0.006182185839861631, + "learning_rate": 5.855922569966945e-07, + "loss": 0.0, + "num_input_tokens_seen": 92741520, + "step": 137620 + }, + { + "epoch": 3.3622016465932134, + "grad_norm": 0.0035552720073610544, + "learning_rate": 5.85514648100006e-07, + "loss": 0.0551, + "num_input_tokens_seen": 92745168, + "step": 137625 + }, + { + "epoch": 3.3623237974250606, + "grad_norm": 0.0037220127414911985, + "learning_rate": 5.854370422175668e-07, + "loss": 0.0, + "num_input_tokens_seen": 92748432, + "step": 137630 + }, + { + "epoch": 3.362445948256908, + "grad_norm": 19.34136199951172, + "learning_rate": 5.853594393499406e-07, + "loss": 0.039, + "num_input_tokens_seen": 92751440, + "step": 137635 + }, + { + "epoch": 3.362568099088755, + "grad_norm": 0.000591250485740602, + "learning_rate": 5.852818394976919e-07, + "loss": 0.0, + "num_input_tokens_seen": 92754640, + "step": 137640 + }, + { + "epoch": 3.362690249920602, + "grad_norm": 0.0014338805340230465, + "learning_rate": 5.852042426613858e-07, + "loss": 0.0542, + "num_input_tokens_seen": 92757776, + "step": 137645 + }, + { + "epoch": 3.362812400752449, + "grad_norm": 0.020083287730813026, + "learning_rate": 5.851266488415856e-07, + "loss": 0.0, + "num_input_tokens_seen": 92760912, + "step": 137650 + }, + { + "epoch": 3.362934551584296, + "grad_norm": 0.0002926739689428359, + "learning_rate": 5.850490580388562e-07, + "loss": 0.0, + "num_input_tokens_seen": 92764240, + "step": 137655 + }, + { + "epoch": 3.3630567024161433, + "grad_norm": 0.0002616309793666005, + "learning_rate": 5.849714702537615e-07, + "loss": 0.0, + "num_input_tokens_seen": 92767568, + "step": 137660 + }, + { + "epoch": 3.3631788532479905, + "grad_norm": 0.0019339133286848664, + "learning_rate": 5.848938854868661e-07, + "loss": 0.0, + "num_input_tokens_seen": 92770704, + "step": 137665 + }, + { + "epoch": 3.3633010040798377, + "grad_norm": 0.004293452017009258, + "learning_rate": 5.848163037387339e-07, + "loss": 0.0, + "num_input_tokens_seen": 92773776, + "step": 137670 + }, + { + "epoch": 3.363423154911685, + "grad_norm": 0.012178168632090092, + "learning_rate": 5.847387250099292e-07, + "loss": 0.0, + "num_input_tokens_seen": 92777296, + "step": 137675 + }, + { + "epoch": 3.363545305743532, + "grad_norm": 0.12999624013900757, + "learning_rate": 5.846611493010163e-07, + "loss": 0.0475, + "num_input_tokens_seen": 92780688, + "step": 137680 + }, + { + "epoch": 3.3636674565753792, + "grad_norm": 0.008777649141848087, + "learning_rate": 5.845835766125589e-07, + "loss": 0.0, + "num_input_tokens_seen": 92784144, + "step": 137685 + }, + { + "epoch": 3.3637896074072264, + "grad_norm": 0.0008219497394748032, + "learning_rate": 5.84506006945122e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92787664, + "step": 137690 + }, + { + "epoch": 3.3639117582390736, + "grad_norm": 0.004280789755284786, + "learning_rate": 5.844284402992685e-07, + "loss": 0.0, + "num_input_tokens_seen": 92790736, + "step": 137695 + }, + { + "epoch": 3.364033909070921, + "grad_norm": 20.881895065307617, + "learning_rate": 5.843508766755638e-07, + "loss": 0.0418, + "num_input_tokens_seen": 92793808, + "step": 137700 + }, + { + "epoch": 3.364156059902768, + "grad_norm": 0.0034292656928300858, + "learning_rate": 5.842733160745709e-07, + "loss": 0.0, + "num_input_tokens_seen": 92797712, + "step": 137705 + }, + { + "epoch": 3.364278210734615, + "grad_norm": 0.0009706453420221806, + "learning_rate": 5.841957584968542e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92801296, + "step": 137710 + }, + { + "epoch": 3.3644003615664624, + "grad_norm": 0.02814302034676075, + "learning_rate": 5.841182039429782e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92804880, + "step": 137715 + }, + { + "epoch": 3.3645225123983096, + "grad_norm": 0.001046680612489581, + "learning_rate": 5.840406524135061e-07, + "loss": 0.0, + "num_input_tokens_seen": 92808272, + "step": 137720 + }, + { + "epoch": 3.3646446632301568, + "grad_norm": 0.0003182195359840989, + "learning_rate": 5.839631039090025e-07, + "loss": 0.0002, + "num_input_tokens_seen": 92811408, + "step": 137725 + }, + { + "epoch": 3.364766814062004, + "grad_norm": 0.0013458180474117398, + "learning_rate": 5.838855584300311e-07, + "loss": 0.0, + "num_input_tokens_seen": 92815120, + "step": 137730 + }, + { + "epoch": 3.3648889648938507, + "grad_norm": 0.011692610569298267, + "learning_rate": 5.838080159771556e-07, + "loss": 0.0, + "num_input_tokens_seen": 92818448, + "step": 137735 + }, + { + "epoch": 3.3650111157256983, + "grad_norm": 0.0017371824942529202, + "learning_rate": 5.837304765509405e-07, + "loss": 0.0, + "num_input_tokens_seen": 92821648, + "step": 137740 + }, + { + "epoch": 3.365133266557545, + "grad_norm": 0.006624247413128614, + "learning_rate": 5.83652940151949e-07, + "loss": 0.0, + "num_input_tokens_seen": 92824848, + "step": 137745 + }, + { + "epoch": 3.3652554173893923, + "grad_norm": 0.004637475591152906, + "learning_rate": 5.835754067807457e-07, + "loss": 0.0, + "num_input_tokens_seen": 92827664, + "step": 137750 + }, + { + "epoch": 3.3653775682212395, + "grad_norm": 0.009000650607049465, + "learning_rate": 5.834978764378935e-07, + "loss": 0.0, + "num_input_tokens_seen": 92831056, + "step": 137755 + }, + { + "epoch": 3.3654997190530866, + "grad_norm": 0.00272104749456048, + "learning_rate": 5.834203491239574e-07, + "loss": 0.0, + "num_input_tokens_seen": 92834448, + "step": 137760 + }, + { + "epoch": 3.365621869884934, + "grad_norm": 0.0005968745681457222, + "learning_rate": 5.833428248395e-07, + "loss": 0.0, + "num_input_tokens_seen": 92838032, + "step": 137765 + }, + { + "epoch": 3.365744020716781, + "grad_norm": 0.10684335976839066, + "learning_rate": 5.832653035850856e-07, + "loss": 0.005, + "num_input_tokens_seen": 92841168, + "step": 137770 + }, + { + "epoch": 3.365866171548628, + "grad_norm": 0.011952157132327557, + "learning_rate": 5.831877853612785e-07, + "loss": 0.0, + "num_input_tokens_seen": 92844624, + "step": 137775 + }, + { + "epoch": 3.3659883223804754, + "grad_norm": 0.0029711390379816294, + "learning_rate": 5.831102701686416e-07, + "loss": 0.0257, + "num_input_tokens_seen": 92848080, + "step": 137780 + }, + { + "epoch": 3.3661104732123226, + "grad_norm": 0.006985391024500132, + "learning_rate": 5.830327580077392e-07, + "loss": 0.0, + "num_input_tokens_seen": 92851088, + "step": 137785 + }, + { + "epoch": 3.36623262404417, + "grad_norm": 0.00044560953392647207, + "learning_rate": 5.829552488791345e-07, + "loss": 0.0, + "num_input_tokens_seen": 92854800, + "step": 137790 + }, + { + "epoch": 3.366354774876017, + "grad_norm": 0.003403823124244809, + "learning_rate": 5.828777427833917e-07, + "loss": 0.0478, + "num_input_tokens_seen": 92858000, + "step": 137795 + }, + { + "epoch": 3.366476925707864, + "grad_norm": 0.003880531992763281, + "learning_rate": 5.82800239721074e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92861776, + "step": 137800 + }, + { + "epoch": 3.3665990765397114, + "grad_norm": 0.0010169809684157372, + "learning_rate": 5.82722739692745e-07, + "loss": 0.0676, + "num_input_tokens_seen": 92864592, + "step": 137805 + }, + { + "epoch": 3.3667212273715585, + "grad_norm": 0.001201188424602151, + "learning_rate": 5.826452426989688e-07, + "loss": 0.0, + "num_input_tokens_seen": 92867792, + "step": 137810 + }, + { + "epoch": 3.3668433782034057, + "grad_norm": 0.000344475352903828, + "learning_rate": 5.825677487403082e-07, + "loss": 0.0846, + "num_input_tokens_seen": 92870928, + "step": 137815 + }, + { + "epoch": 3.366965529035253, + "grad_norm": 0.00039618040318600833, + "learning_rate": 5.824902578173278e-07, + "loss": 0.0, + "num_input_tokens_seen": 92874576, + "step": 137820 + }, + { + "epoch": 3.3670876798671, + "grad_norm": 0.0006792770582251251, + "learning_rate": 5.824127699305899e-07, + "loss": 0.0, + "num_input_tokens_seen": 92878672, + "step": 137825 + }, + { + "epoch": 3.367209830698947, + "grad_norm": 0.012354973703622818, + "learning_rate": 5.823352850806587e-07, + "loss": 0.0, + "num_input_tokens_seen": 92882000, + "step": 137830 + }, + { + "epoch": 3.367331981530794, + "grad_norm": 0.027326742187142372, + "learning_rate": 5.822578032680983e-07, + "loss": 0.0, + "num_input_tokens_seen": 92885392, + "step": 137835 + }, + { + "epoch": 3.3674541323626412, + "grad_norm": 0.0008394501637667418, + "learning_rate": 5.821803244934708e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92888976, + "step": 137840 + }, + { + "epoch": 3.3675762831944884, + "grad_norm": 0.00104551634285599, + "learning_rate": 5.821028487573408e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92892560, + "step": 137845 + }, + { + "epoch": 3.3676984340263356, + "grad_norm": 0.0046212682500481606, + "learning_rate": 5.82025376060271e-07, + "loss": 0.0027, + "num_input_tokens_seen": 92895696, + "step": 137850 + }, + { + "epoch": 3.367820584858183, + "grad_norm": 0.20989792048931122, + "learning_rate": 5.819479064028254e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92898960, + "step": 137855 + }, + { + "epoch": 3.36794273569003, + "grad_norm": 0.00013297107943799347, + "learning_rate": 5.818704397855667e-07, + "loss": 0.0237, + "num_input_tokens_seen": 92902224, + "step": 137860 + }, + { + "epoch": 3.368064886521877, + "grad_norm": 0.0011130105704069138, + "learning_rate": 5.817929762090588e-07, + "loss": 0.0, + "num_input_tokens_seen": 92905744, + "step": 137865 + }, + { + "epoch": 3.3681870373537244, + "grad_norm": 0.0032320625614374876, + "learning_rate": 5.81715515673865e-07, + "loss": 0.0, + "num_input_tokens_seen": 92909136, + "step": 137870 + }, + { + "epoch": 3.3683091881855716, + "grad_norm": 0.0019834605045616627, + "learning_rate": 5.816380581805482e-07, + "loss": 0.0, + "num_input_tokens_seen": 92912528, + "step": 137875 + }, + { + "epoch": 3.3684313390174188, + "grad_norm": 0.0003283233963884413, + "learning_rate": 5.815606037296723e-07, + "loss": 0.0, + "num_input_tokens_seen": 92915856, + "step": 137880 + }, + { + "epoch": 3.368553489849266, + "grad_norm": 0.030905848369002342, + "learning_rate": 5.814831523217998e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92919184, + "step": 137885 + }, + { + "epoch": 3.368675640681113, + "grad_norm": 0.0006857877597212791, + "learning_rate": 5.814057039574944e-07, + "loss": 0.0, + "num_input_tokens_seen": 92922064, + "step": 137890 + }, + { + "epoch": 3.3687977915129603, + "grad_norm": 0.8784730434417725, + "learning_rate": 5.813282586373198e-07, + "loss": 0.0002, + "num_input_tokens_seen": 92925392, + "step": 137895 + }, + { + "epoch": 3.3689199423448075, + "grad_norm": 0.010333950631320477, + "learning_rate": 5.812508163618389e-07, + "loss": 0.0, + "num_input_tokens_seen": 92928848, + "step": 137900 + }, + { + "epoch": 3.3690420931766547, + "grad_norm": 0.0007572348113171756, + "learning_rate": 5.811733771316139e-07, + "loss": 0.0, + "num_input_tokens_seen": 92931984, + "step": 137905 + }, + { + "epoch": 3.369164244008502, + "grad_norm": 0.006056161597371101, + "learning_rate": 5.810959409472093e-07, + "loss": 0.0, + "num_input_tokens_seen": 92935440, + "step": 137910 + }, + { + "epoch": 3.3692863948403486, + "grad_norm": 0.0007624893332831562, + "learning_rate": 5.810185078091879e-07, + "loss": 0.0, + "num_input_tokens_seen": 92939088, + "step": 137915 + }, + { + "epoch": 3.3694085456721963, + "grad_norm": 0.00043983652722090483, + "learning_rate": 5.809410777181118e-07, + "loss": 0.0, + "num_input_tokens_seen": 92942544, + "step": 137920 + }, + { + "epoch": 3.369530696504043, + "grad_norm": 0.3516944646835327, + "learning_rate": 5.808636506745453e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92945360, + "step": 137925 + }, + { + "epoch": 3.36965284733589, + "grad_norm": 0.03775249049067497, + "learning_rate": 5.807862266790512e-07, + "loss": 0.0, + "num_input_tokens_seen": 92948944, + "step": 137930 + }, + { + "epoch": 3.3697749981677374, + "grad_norm": 0.00020264858903829008, + "learning_rate": 5.807088057321921e-07, + "loss": 0.068, + "num_input_tokens_seen": 92952400, + "step": 137935 + }, + { + "epoch": 3.3698971489995846, + "grad_norm": 0.003877782030031085, + "learning_rate": 5.806313878345317e-07, + "loss": 0.0, + "num_input_tokens_seen": 92955664, + "step": 137940 + }, + { + "epoch": 3.370019299831432, + "grad_norm": 0.1308281123638153, + "learning_rate": 5.805539729866322e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92959376, + "step": 137945 + }, + { + "epoch": 3.370141450663279, + "grad_norm": 0.009636446833610535, + "learning_rate": 5.804765611890576e-07, + "loss": 0.0, + "num_input_tokens_seen": 92962768, + "step": 137950 + }, + { + "epoch": 3.370263601495126, + "grad_norm": 0.003599069779738784, + "learning_rate": 5.803991524423698e-07, + "loss": 0.0, + "num_input_tokens_seen": 92965776, + "step": 137955 + }, + { + "epoch": 3.3703857523269733, + "grad_norm": 21.893583297729492, + "learning_rate": 5.803217467471322e-07, + "loss": 0.1704, + "num_input_tokens_seen": 92968848, + "step": 137960 + }, + { + "epoch": 3.3705079031588205, + "grad_norm": 0.07198493182659149, + "learning_rate": 5.802443441039082e-07, + "loss": 0.0, + "num_input_tokens_seen": 92971728, + "step": 137965 + }, + { + "epoch": 3.3706300539906677, + "grad_norm": 0.003822456346824765, + "learning_rate": 5.801669445132597e-07, + "loss": 0.0007, + "num_input_tokens_seen": 92975440, + "step": 137970 + }, + { + "epoch": 3.370752204822515, + "grad_norm": 0.0025848846416920424, + "learning_rate": 5.800895479757506e-07, + "loss": 0.0, + "num_input_tokens_seen": 92978832, + "step": 137975 + }, + { + "epoch": 3.370874355654362, + "grad_norm": 0.0029444515239447355, + "learning_rate": 5.800121544919429e-07, + "loss": 0.0, + "num_input_tokens_seen": 92982288, + "step": 137980 + }, + { + "epoch": 3.3709965064862093, + "grad_norm": 0.006925372406840324, + "learning_rate": 5.799347640623997e-07, + "loss": 0.0001, + "num_input_tokens_seen": 92985744, + "step": 137985 + }, + { + "epoch": 3.3711186573180565, + "grad_norm": 0.0038767820224165916, + "learning_rate": 5.798573766876841e-07, + "loss": 0.0, + "num_input_tokens_seen": 92989328, + "step": 137990 + }, + { + "epoch": 3.3712408081499037, + "grad_norm": 0.06253549456596375, + "learning_rate": 5.797799923683586e-07, + "loss": 0.0, + "num_input_tokens_seen": 92992720, + "step": 137995 + }, + { + "epoch": 3.3713629589817504, + "grad_norm": 0.00161257095169276, + "learning_rate": 5.797026111049863e-07, + "loss": 0.0, + "num_input_tokens_seen": 92995984, + "step": 138000 + }, + { + "epoch": 3.371485109813598, + "grad_norm": 0.0027689035050570965, + "learning_rate": 5.796252328981295e-07, + "loss": 0.0, + "num_input_tokens_seen": 93000208, + "step": 138005 + }, + { + "epoch": 3.371607260645445, + "grad_norm": 0.00044218875700607896, + "learning_rate": 5.795478577483508e-07, + "loss": 0.0003, + "num_input_tokens_seen": 93003920, + "step": 138010 + }, + { + "epoch": 3.371729411477292, + "grad_norm": 0.004747296683490276, + "learning_rate": 5.794704856562136e-07, + "loss": 0.0096, + "num_input_tokens_seen": 93007504, + "step": 138015 + }, + { + "epoch": 3.371851562309139, + "grad_norm": 0.00363075640052557, + "learning_rate": 5.793931166222798e-07, + "loss": 0.0002, + "num_input_tokens_seen": 93010896, + "step": 138020 + }, + { + "epoch": 3.3719737131409864, + "grad_norm": 0.00033181399339810014, + "learning_rate": 5.793157506471127e-07, + "loss": 0.0, + "num_input_tokens_seen": 93014160, + "step": 138025 + }, + { + "epoch": 3.3720958639728336, + "grad_norm": 5.022310506319627e-05, + "learning_rate": 5.79238387731274e-07, + "loss": 0.0, + "num_input_tokens_seen": 93017424, + "step": 138030 + }, + { + "epoch": 3.3722180148046808, + "grad_norm": 0.0004518816713243723, + "learning_rate": 5.791610278753276e-07, + "loss": 0.0, + "num_input_tokens_seen": 93021456, + "step": 138035 + }, + { + "epoch": 3.372340165636528, + "grad_norm": 0.08886852115392685, + "learning_rate": 5.79083671079835e-07, + "loss": 0.0334, + "num_input_tokens_seen": 93024464, + "step": 138040 + }, + { + "epoch": 3.372462316468375, + "grad_norm": 0.0003890712687280029, + "learning_rate": 5.79006317345359e-07, + "loss": 0.0, + "num_input_tokens_seen": 93027600, + "step": 138045 + }, + { + "epoch": 3.3725844673002223, + "grad_norm": 0.1624067723751068, + "learning_rate": 5.789289666724629e-07, + "loss": 0.0, + "num_input_tokens_seen": 93031248, + "step": 138050 + }, + { + "epoch": 3.3727066181320695, + "grad_norm": 0.0007669181213714182, + "learning_rate": 5.78851619061708e-07, + "loss": 0.0, + "num_input_tokens_seen": 93034576, + "step": 138055 + }, + { + "epoch": 3.3728287689639167, + "grad_norm": 0.009815610013902187, + "learning_rate": 5.787742745136579e-07, + "loss": 0.0, + "num_input_tokens_seen": 93037648, + "step": 138060 + }, + { + "epoch": 3.372950919795764, + "grad_norm": 0.00123111205175519, + "learning_rate": 5.786969330288741e-07, + "loss": 0.0, + "num_input_tokens_seen": 93040720, + "step": 138065 + }, + { + "epoch": 3.373073070627611, + "grad_norm": 0.004505062475800514, + "learning_rate": 5.7861959460792e-07, + "loss": 0.0726, + "num_input_tokens_seen": 93043984, + "step": 138070 + }, + { + "epoch": 3.3731952214594583, + "grad_norm": 0.005152316763997078, + "learning_rate": 5.785422592513572e-07, + "loss": 0.0, + "num_input_tokens_seen": 93046992, + "step": 138075 + }, + { + "epoch": 3.3733173722913055, + "grad_norm": 0.0056071956641972065, + "learning_rate": 5.784649269597482e-07, + "loss": 0.0, + "num_input_tokens_seen": 93050192, + "step": 138080 + }, + { + "epoch": 3.3734395231231527, + "grad_norm": 0.011997929774224758, + "learning_rate": 5.783875977336563e-07, + "loss": 0.0, + "num_input_tokens_seen": 93053456, + "step": 138085 + }, + { + "epoch": 3.373561673955, + "grad_norm": 0.0025390381924808025, + "learning_rate": 5.783102715736426e-07, + "loss": 0.0377, + "num_input_tokens_seen": 93057040, + "step": 138090 + }, + { + "epoch": 3.3736838247868466, + "grad_norm": 0.007399492897093296, + "learning_rate": 5.782329484802706e-07, + "loss": 0.0, + "num_input_tokens_seen": 93060240, + "step": 138095 + }, + { + "epoch": 3.3738059756186938, + "grad_norm": 0.09200409799814224, + "learning_rate": 5.781556284541015e-07, + "loss": 0.0, + "num_input_tokens_seen": 93064016, + "step": 138100 + }, + { + "epoch": 3.373928126450541, + "grad_norm": 0.0009583776700310409, + "learning_rate": 5.780783114956986e-07, + "loss": 0.0, + "num_input_tokens_seen": 93066896, + "step": 138105 + }, + { + "epoch": 3.374050277282388, + "grad_norm": 21.754066467285156, + "learning_rate": 5.780009976056237e-07, + "loss": 0.056, + "num_input_tokens_seen": 93070480, + "step": 138110 + }, + { + "epoch": 3.3741724281142353, + "grad_norm": 0.0002358252095291391, + "learning_rate": 5.779236867844385e-07, + "loss": 0.0, + "num_input_tokens_seen": 93074128, + "step": 138115 + }, + { + "epoch": 3.3742945789460825, + "grad_norm": 0.001320650801062584, + "learning_rate": 5.778463790327064e-07, + "loss": 0.0, + "num_input_tokens_seen": 93077456, + "step": 138120 + }, + { + "epoch": 3.3744167297779297, + "grad_norm": 0.005088005214929581, + "learning_rate": 5.777690743509885e-07, + "loss": 0.0, + "num_input_tokens_seen": 93080976, + "step": 138125 + }, + { + "epoch": 3.374538880609777, + "grad_norm": 0.00016588116704951972, + "learning_rate": 5.776917727398478e-07, + "loss": 0.0, + "num_input_tokens_seen": 93084560, + "step": 138130 + }, + { + "epoch": 3.374661031441624, + "grad_norm": 0.023031558841466904, + "learning_rate": 5.776144741998457e-07, + "loss": 0.0, + "num_input_tokens_seen": 93088208, + "step": 138135 + }, + { + "epoch": 3.3747831822734713, + "grad_norm": 0.0005032282206229866, + "learning_rate": 5.775371787315448e-07, + "loss": 0.0, + "num_input_tokens_seen": 93091792, + "step": 138140 + }, + { + "epoch": 3.3749053331053185, + "grad_norm": 0.06005355343222618, + "learning_rate": 5.774598863355077e-07, + "loss": 0.0001, + "num_input_tokens_seen": 93095440, + "step": 138145 + }, + { + "epoch": 3.3750274839371657, + "grad_norm": 0.0024213064461946487, + "learning_rate": 5.773825970122954e-07, + "loss": 0.0454, + "num_input_tokens_seen": 93098768, + "step": 138150 + }, + { + "epoch": 3.375149634769013, + "grad_norm": 0.0022115923929959536, + "learning_rate": 5.773053107624711e-07, + "loss": 0.0569, + "num_input_tokens_seen": 93102096, + "step": 138155 + }, + { + "epoch": 3.37527178560086, + "grad_norm": 0.0012570025864988565, + "learning_rate": 5.772280275865955e-07, + "loss": 0.0, + "num_input_tokens_seen": 93105616, + "step": 138160 + }, + { + "epoch": 3.3753939364327072, + "grad_norm": 0.0030597366858273745, + "learning_rate": 5.771507474852322e-07, + "loss": 0.0, + "num_input_tokens_seen": 93109200, + "step": 138165 + }, + { + "epoch": 3.3755160872645544, + "grad_norm": 0.0005051979096606374, + "learning_rate": 5.770734704589417e-07, + "loss": 0.0643, + "num_input_tokens_seen": 93112208, + "step": 138170 + }, + { + "epoch": 3.3756382380964016, + "grad_norm": 0.00468916492536664, + "learning_rate": 5.769961965082868e-07, + "loss": 0.0, + "num_input_tokens_seen": 93115856, + "step": 138175 + }, + { + "epoch": 3.3757603889282484, + "grad_norm": 0.019855257123708725, + "learning_rate": 5.769189256338299e-07, + "loss": 0.0, + "num_input_tokens_seen": 93119056, + "step": 138180 + }, + { + "epoch": 3.375882539760096, + "grad_norm": 0.014095536433160305, + "learning_rate": 5.768416578361317e-07, + "loss": 0.0, + "num_input_tokens_seen": 93122704, + "step": 138185 + }, + { + "epoch": 3.3760046905919427, + "grad_norm": 0.045923784375190735, + "learning_rate": 5.767643931157552e-07, + "loss": 0.0, + "num_input_tokens_seen": 93125904, + "step": 138190 + }, + { + "epoch": 3.37612684142379, + "grad_norm": 0.20710352063179016, + "learning_rate": 5.766871314732616e-07, + "loss": 0.0, + "num_input_tokens_seen": 93129168, + "step": 138195 + }, + { + "epoch": 3.376248992255637, + "grad_norm": 0.0003032416570931673, + "learning_rate": 5.76609872909213e-07, + "loss": 0.0464, + "num_input_tokens_seen": 93132496, + "step": 138200 + }, + { + "epoch": 3.3763711430874843, + "grad_norm": 0.0032075492199510336, + "learning_rate": 5.765326174241716e-07, + "loss": 0.0588, + "num_input_tokens_seen": 93135888, + "step": 138205 + }, + { + "epoch": 3.3764932939193315, + "grad_norm": 0.02502131648361683, + "learning_rate": 5.76455365018699e-07, + "loss": 0.0, + "num_input_tokens_seen": 93139088, + "step": 138210 + }, + { + "epoch": 3.3766154447511787, + "grad_norm": 0.0029747423250228167, + "learning_rate": 5.763781156933565e-07, + "loss": 0.0, + "num_input_tokens_seen": 93142864, + "step": 138215 + }, + { + "epoch": 3.376737595583026, + "grad_norm": 0.11040252447128296, + "learning_rate": 5.763008694487066e-07, + "loss": 0.0001, + "num_input_tokens_seen": 93146256, + "step": 138220 + }, + { + "epoch": 3.376859746414873, + "grad_norm": 0.004152897745370865, + "learning_rate": 5.762236262853108e-07, + "loss": 0.0, + "num_input_tokens_seen": 93149584, + "step": 138225 + }, + { + "epoch": 3.3769818972467203, + "grad_norm": 0.007863405160605907, + "learning_rate": 5.761463862037304e-07, + "loss": 0.0, + "num_input_tokens_seen": 93152976, + "step": 138230 + }, + { + "epoch": 3.3771040480785675, + "grad_norm": 0.005199017468839884, + "learning_rate": 5.760691492045275e-07, + "loss": 0.0001, + "num_input_tokens_seen": 93156048, + "step": 138235 + }, + { + "epoch": 3.3772261989104146, + "grad_norm": 0.0022649518214166164, + "learning_rate": 5.75991915288264e-07, + "loss": 0.0, + "num_input_tokens_seen": 93159184, + "step": 138240 + }, + { + "epoch": 3.377348349742262, + "grad_norm": 0.024286113679409027, + "learning_rate": 5.759146844555011e-07, + "loss": 0.0, + "num_input_tokens_seen": 93162960, + "step": 138245 + }, + { + "epoch": 3.377470500574109, + "grad_norm": 0.00025870741228573024, + "learning_rate": 5.758374567068011e-07, + "loss": 0.0, + "num_input_tokens_seen": 93166544, + "step": 138250 + }, + { + "epoch": 3.377592651405956, + "grad_norm": 0.0024284597020596266, + "learning_rate": 5.757602320427248e-07, + "loss": 0.0013, + "num_input_tokens_seen": 93169616, + "step": 138255 + }, + { + "epoch": 3.3777148022378034, + "grad_norm": 0.00405424740165472, + "learning_rate": 5.756830104638345e-07, + "loss": 0.0, + "num_input_tokens_seen": 93173200, + "step": 138260 + }, + { + "epoch": 3.3778369530696506, + "grad_norm": 0.004090134520083666, + "learning_rate": 5.756057919706912e-07, + "loss": 0.0001, + "num_input_tokens_seen": 93176272, + "step": 138265 + }, + { + "epoch": 3.377959103901498, + "grad_norm": 0.07391510158777237, + "learning_rate": 5.755285765638565e-07, + "loss": 0.0, + "num_input_tokens_seen": 93179536, + "step": 138270 + }, + { + "epoch": 3.3780812547333445, + "grad_norm": 0.01007003989070654, + "learning_rate": 5.754513642438928e-07, + "loss": 0.0, + "num_input_tokens_seen": 93182928, + "step": 138275 + }, + { + "epoch": 3.3782034055651917, + "grad_norm": 0.01484597846865654, + "learning_rate": 5.753741550113605e-07, + "loss": 0.0001, + "num_input_tokens_seen": 93186128, + "step": 138280 + }, + { + "epoch": 3.378325556397039, + "grad_norm": 0.0003579423646442592, + "learning_rate": 5.752969488668218e-07, + "loss": 0.0246, + "num_input_tokens_seen": 93189776, + "step": 138285 + }, + { + "epoch": 3.378447707228886, + "grad_norm": 0.003087420715019107, + "learning_rate": 5.752197458108376e-07, + "loss": 0.0512, + "num_input_tokens_seen": 93193424, + "step": 138290 + }, + { + "epoch": 3.3785698580607333, + "grad_norm": 0.005263794679194689, + "learning_rate": 5.751425458439698e-07, + "loss": 0.0, + "num_input_tokens_seen": 93196688, + "step": 138295 + }, + { + "epoch": 3.3786920088925805, + "grad_norm": 0.011670518666505814, + "learning_rate": 5.750653489667801e-07, + "loss": 0.0001, + "num_input_tokens_seen": 93200080, + "step": 138300 + }, + { + "epoch": 3.3788141597244277, + "grad_norm": 23.023923873901367, + "learning_rate": 5.749881551798288e-07, + "loss": 0.0213, + "num_input_tokens_seen": 93203152, + "step": 138305 + }, + { + "epoch": 3.378936310556275, + "grad_norm": 0.0015072767855599523, + "learning_rate": 5.749109644836786e-07, + "loss": 0.0, + "num_input_tokens_seen": 93206032, + "step": 138310 + }, + { + "epoch": 3.379058461388122, + "grad_norm": 0.0019943653605878353, + "learning_rate": 5.748337768788901e-07, + "loss": 0.0, + "num_input_tokens_seen": 93209040, + "step": 138315 + }, + { + "epoch": 3.3791806122199692, + "grad_norm": 0.03423043712973595, + "learning_rate": 5.747565923660244e-07, + "loss": 0.0, + "num_input_tokens_seen": 93212816, + "step": 138320 + }, + { + "epoch": 3.3793027630518164, + "grad_norm": 0.007128616329282522, + "learning_rate": 5.746794109456434e-07, + "loss": 0.0, + "num_input_tokens_seen": 93216272, + "step": 138325 + }, + { + "epoch": 3.3794249138836636, + "grad_norm": 0.0021170915570110083, + "learning_rate": 5.746022326183079e-07, + "loss": 0.0, + "num_input_tokens_seen": 93219664, + "step": 138330 + }, + { + "epoch": 3.379547064715511, + "grad_norm": 0.0022564528044313192, + "learning_rate": 5.745250573845797e-07, + "loss": 0.0246, + "num_input_tokens_seen": 93222864, + "step": 138335 + }, + { + "epoch": 3.379669215547358, + "grad_norm": 0.005194546654820442, + "learning_rate": 5.744478852450192e-07, + "loss": 0.0001, + "num_input_tokens_seen": 93226192, + "step": 138340 + }, + { + "epoch": 3.379791366379205, + "grad_norm": 0.040994592010974884, + "learning_rate": 5.743707162001888e-07, + "loss": 0.0002, + "num_input_tokens_seen": 93229392, + "step": 138345 + }, + { + "epoch": 3.3799135172110524, + "grad_norm": 0.004443665035068989, + "learning_rate": 5.742935502506484e-07, + "loss": 0.0, + "num_input_tokens_seen": 93232784, + "step": 138350 + }, + { + "epoch": 3.3800356680428996, + "grad_norm": 0.003939436282962561, + "learning_rate": 5.742163873969599e-07, + "loss": 0.0001, + "num_input_tokens_seen": 93235984, + "step": 138355 + }, + { + "epoch": 3.3801578188747463, + "grad_norm": 0.0042626261711120605, + "learning_rate": 5.741392276396847e-07, + "loss": 0.0, + "num_input_tokens_seen": 93239248, + "step": 138360 + }, + { + "epoch": 3.380279969706594, + "grad_norm": 0.0003855399845633656, + "learning_rate": 5.740620709793832e-07, + "loss": 0.0002, + "num_input_tokens_seen": 93242448, + "step": 138365 + }, + { + "epoch": 3.3804021205384407, + "grad_norm": 0.052950721234083176, + "learning_rate": 5.739849174166173e-07, + "loss": 0.0, + "num_input_tokens_seen": 93245456, + "step": 138370 + }, + { + "epoch": 3.380524271370288, + "grad_norm": 0.00018201395869255066, + "learning_rate": 5.739077669519473e-07, + "loss": 0.0, + "num_input_tokens_seen": 93248336, + "step": 138375 + }, + { + "epoch": 3.380646422202135, + "grad_norm": 0.017797749489545822, + "learning_rate": 5.738306195859351e-07, + "loss": 0.0, + "num_input_tokens_seen": 93251472, + "step": 138380 + }, + { + "epoch": 3.3807685730339823, + "grad_norm": 0.002218404784798622, + "learning_rate": 5.737534753191406e-07, + "loss": 0.0, + "num_input_tokens_seen": 93254672, + "step": 138385 + }, + { + "epoch": 3.3808907238658295, + "grad_norm": 0.013328985311090946, + "learning_rate": 5.736763341521256e-07, + "loss": 0.0001, + "num_input_tokens_seen": 93258064, + "step": 138390 + }, + { + "epoch": 3.3810128746976766, + "grad_norm": 37.32630157470703, + "learning_rate": 5.735991960854514e-07, + "loss": 0.0696, + "num_input_tokens_seen": 93261648, + "step": 138395 + }, + { + "epoch": 3.381135025529524, + "grad_norm": 0.030253412202000618, + "learning_rate": 5.735220611196781e-07, + "loss": 0.0008, + "num_input_tokens_seen": 93264912, + "step": 138400 + }, + { + "epoch": 3.381257176361371, + "grad_norm": 0.00933878030627966, + "learning_rate": 5.734449292553675e-07, + "loss": 0.0, + "num_input_tokens_seen": 93268368, + "step": 138405 + }, + { + "epoch": 3.381379327193218, + "grad_norm": 8.085768786258996e-05, + "learning_rate": 5.733678004930798e-07, + "loss": 0.0, + "num_input_tokens_seen": 93271824, + "step": 138410 + }, + { + "epoch": 3.3815014780250654, + "grad_norm": 0.006211719010025263, + "learning_rate": 5.732906748333766e-07, + "loss": 0.0, + "num_input_tokens_seen": 93275344, + "step": 138415 + }, + { + "epoch": 3.3816236288569126, + "grad_norm": 0.0006274699117057025, + "learning_rate": 5.732135522768182e-07, + "loss": 0.0001, + "num_input_tokens_seen": 93278992, + "step": 138420 + }, + { + "epoch": 3.38174577968876, + "grad_norm": 0.0006471577798947692, + "learning_rate": 5.731364328239654e-07, + "loss": 0.0, + "num_input_tokens_seen": 93282000, + "step": 138425 + }, + { + "epoch": 3.381867930520607, + "grad_norm": 0.030883746221661568, + "learning_rate": 5.730593164753795e-07, + "loss": 0.1003, + "num_input_tokens_seen": 93285584, + "step": 138430 + }, + { + "epoch": 3.381990081352454, + "grad_norm": 0.008337740786373615, + "learning_rate": 5.729822032316208e-07, + "loss": 0.0, + "num_input_tokens_seen": 93288912, + "step": 138435 + }, + { + "epoch": 3.3821122321843013, + "grad_norm": 279.927978515625, + "learning_rate": 5.729050930932508e-07, + "loss": 0.0022, + "num_input_tokens_seen": 93292112, + "step": 138440 + }, + { + "epoch": 3.382234383016148, + "grad_norm": 0.00017066705913748592, + "learning_rate": 5.728279860608294e-07, + "loss": 0.0, + "num_input_tokens_seen": 93295184, + "step": 138445 + }, + { + "epoch": 3.3823565338479957, + "grad_norm": 0.0031765324529260397, + "learning_rate": 5.727508821349178e-07, + "loss": 0.0, + "num_input_tokens_seen": 93298896, + "step": 138450 + }, + { + "epoch": 3.3824786846798425, + "grad_norm": 0.007076940033584833, + "learning_rate": 5.726737813160771e-07, + "loss": 0.0, + "num_input_tokens_seen": 93302288, + "step": 138455 + }, + { + "epoch": 3.3826008355116897, + "grad_norm": 0.022491326555609703, + "learning_rate": 5.725966836048671e-07, + "loss": 0.0, + "num_input_tokens_seen": 93305552, + "step": 138460 + }, + { + "epoch": 3.382722986343537, + "grad_norm": 0.005003460217267275, + "learning_rate": 5.725195890018495e-07, + "loss": 0.0, + "num_input_tokens_seen": 93308880, + "step": 138465 + }, + { + "epoch": 3.382845137175384, + "grad_norm": 0.0008077129605226219, + "learning_rate": 5.72442497507584e-07, + "loss": 0.0001, + "num_input_tokens_seen": 93312272, + "step": 138470 + }, + { + "epoch": 3.3829672880072312, + "grad_norm": 0.0005891541368328035, + "learning_rate": 5.72365409122632e-07, + "loss": 0.0, + "num_input_tokens_seen": 93315408, + "step": 138475 + }, + { + "epoch": 3.3830894388390784, + "grad_norm": 1.0880800485610962, + "learning_rate": 5.722883238475535e-07, + "loss": 0.0002, + "num_input_tokens_seen": 93319120, + "step": 138480 + }, + { + "epoch": 3.3832115896709256, + "grad_norm": 0.027302728965878487, + "learning_rate": 5.722112416829092e-07, + "loss": 0.0, + "num_input_tokens_seen": 93322640, + "step": 138485 + }, + { + "epoch": 3.383333740502773, + "grad_norm": 0.0027143717743456364, + "learning_rate": 5.721341626292603e-07, + "loss": 0.0, + "num_input_tokens_seen": 93326224, + "step": 138490 + }, + { + "epoch": 3.38345589133462, + "grad_norm": 0.00450406176969409, + "learning_rate": 5.720570866871664e-07, + "loss": 0.0452, + "num_input_tokens_seen": 93330256, + "step": 138495 + }, + { + "epoch": 3.383578042166467, + "grad_norm": 0.0004445763770490885, + "learning_rate": 5.719800138571889e-07, + "loss": 0.0, + "num_input_tokens_seen": 93333520, + "step": 138500 + }, + { + "epoch": 3.3837001929983144, + "grad_norm": 0.007731878198683262, + "learning_rate": 5.719029441398875e-07, + "loss": 0.0, + "num_input_tokens_seen": 93336848, + "step": 138505 + }, + { + "epoch": 3.3838223438301616, + "grad_norm": 0.03202767297625542, + "learning_rate": 5.718258775358229e-07, + "loss": 0.0001, + "num_input_tokens_seen": 93340048, + "step": 138510 + }, + { + "epoch": 3.3839444946620088, + "grad_norm": 0.0012034822721034288, + "learning_rate": 5.717488140455562e-07, + "loss": 0.0, + "num_input_tokens_seen": 93343504, + "step": 138515 + }, + { + "epoch": 3.384066645493856, + "grad_norm": 0.0018715094774961472, + "learning_rate": 5.716717536696473e-07, + "loss": 0.0, + "num_input_tokens_seen": 93347024, + "step": 138520 + }, + { + "epoch": 3.384188796325703, + "grad_norm": 0.001975530991330743, + "learning_rate": 5.715946964086562e-07, + "loss": 0.0638, + "num_input_tokens_seen": 93350224, + "step": 138525 + }, + { + "epoch": 3.3843109471575503, + "grad_norm": 0.002123733516782522, + "learning_rate": 5.71517642263144e-07, + "loss": 0.0, + "num_input_tokens_seen": 93354192, + "step": 138530 + }, + { + "epoch": 3.3844330979893975, + "grad_norm": 0.0031818312127143145, + "learning_rate": 5.714405912336708e-07, + "loss": 0.0, + "num_input_tokens_seen": 93357712, + "step": 138535 + }, + { + "epoch": 3.3845552488212443, + "grad_norm": 0.2843506634235382, + "learning_rate": 5.713635433207966e-07, + "loss": 0.0489, + "num_input_tokens_seen": 93361040, + "step": 138540 + }, + { + "epoch": 3.3846773996530914, + "grad_norm": 0.0008113362709991634, + "learning_rate": 5.71286498525082e-07, + "loss": 0.0, + "num_input_tokens_seen": 93364560, + "step": 138545 + }, + { + "epoch": 3.3847995504849386, + "grad_norm": 0.010063768364489079, + "learning_rate": 5.712094568470875e-07, + "loss": 0.0, + "num_input_tokens_seen": 93368336, + "step": 138550 + }, + { + "epoch": 3.384921701316786, + "grad_norm": 0.006297845859080553, + "learning_rate": 5.711324182873729e-07, + "loss": 0.0, + "num_input_tokens_seen": 93371472, + "step": 138555 + }, + { + "epoch": 3.385043852148633, + "grad_norm": 96.34769439697266, + "learning_rate": 5.710553828464993e-07, + "loss": 0.0546, + "num_input_tokens_seen": 93374480, + "step": 138560 + }, + { + "epoch": 3.38516600298048, + "grad_norm": 0.003234319156035781, + "learning_rate": 5.709783505250256e-07, + "loss": 0.0, + "num_input_tokens_seen": 93378000, + "step": 138565 + }, + { + "epoch": 3.3852881538123274, + "grad_norm": 0.007625074591487646, + "learning_rate": 5.709013213235133e-07, + "loss": 0.0, + "num_input_tokens_seen": 93381200, + "step": 138570 + }, + { + "epoch": 3.3854103046441746, + "grad_norm": 0.020981401205062866, + "learning_rate": 5.708242952425216e-07, + "loss": 0.0, + "num_input_tokens_seen": 93384592, + "step": 138575 + }, + { + "epoch": 3.3855324554760218, + "grad_norm": 0.0004391383845359087, + "learning_rate": 5.707472722826109e-07, + "loss": 0.0, + "num_input_tokens_seen": 93389264, + "step": 138580 + }, + { + "epoch": 3.385654606307869, + "grad_norm": 0.03177093714475632, + "learning_rate": 5.706702524443419e-07, + "loss": 0.0, + "num_input_tokens_seen": 93392784, + "step": 138585 + }, + { + "epoch": 3.385776757139716, + "grad_norm": 0.00039413681952282786, + "learning_rate": 5.705932357282741e-07, + "loss": 0.0001, + "num_input_tokens_seen": 93396176, + "step": 138590 + }, + { + "epoch": 3.3858989079715633, + "grad_norm": 0.00042011006735265255, + "learning_rate": 5.705162221349681e-07, + "loss": 0.0, + "num_input_tokens_seen": 93399376, + "step": 138595 + }, + { + "epoch": 3.3860210588034105, + "grad_norm": 0.00041277159471064806, + "learning_rate": 5.704392116649832e-07, + "loss": 0.0, + "num_input_tokens_seen": 93403216, + "step": 138600 + }, + { + "epoch": 3.3861432096352577, + "grad_norm": 0.002446555532515049, + "learning_rate": 5.703622043188799e-07, + "loss": 0.0684, + "num_input_tokens_seen": 93406416, + "step": 138605 + }, + { + "epoch": 3.386265360467105, + "grad_norm": 0.0015684259124100208, + "learning_rate": 5.702852000972187e-07, + "loss": 0.0371, + "num_input_tokens_seen": 93409360, + "step": 138610 + }, + { + "epoch": 3.386387511298952, + "grad_norm": 0.005386325065046549, + "learning_rate": 5.702081990005587e-07, + "loss": 0.0001, + "num_input_tokens_seen": 93412240, + "step": 138615 + }, + { + "epoch": 3.3865096621307993, + "grad_norm": 0.004681466147303581, + "learning_rate": 5.701312010294606e-07, + "loss": 0.0, + "num_input_tokens_seen": 93415760, + "step": 138620 + }, + { + "epoch": 3.386631812962646, + "grad_norm": 0.4638597369194031, + "learning_rate": 5.700542061844839e-07, + "loss": 0.0003, + "num_input_tokens_seen": 93418768, + "step": 138625 + }, + { + "epoch": 3.3867539637944937, + "grad_norm": 0.00034154325840063393, + "learning_rate": 5.699772144661885e-07, + "loss": 0.0002, + "num_input_tokens_seen": 93422096, + "step": 138630 + }, + { + "epoch": 3.3868761146263404, + "grad_norm": 1.7362202405929565, + "learning_rate": 5.699002258751348e-07, + "loss": 0.0419, + "num_input_tokens_seen": 93425488, + "step": 138635 + }, + { + "epoch": 3.3869982654581876, + "grad_norm": 0.0004423426289577037, + "learning_rate": 5.698232404118819e-07, + "loss": 0.0, + "num_input_tokens_seen": 93429136, + "step": 138640 + }, + { + "epoch": 3.387120416290035, + "grad_norm": 0.004544767551124096, + "learning_rate": 5.697462580769905e-07, + "loss": 0.0, + "num_input_tokens_seen": 93432464, + "step": 138645 + }, + { + "epoch": 3.387242567121882, + "grad_norm": 0.001746030175127089, + "learning_rate": 5.696692788710196e-07, + "loss": 0.0001, + "num_input_tokens_seen": 93435792, + "step": 138650 + }, + { + "epoch": 3.387364717953729, + "grad_norm": 0.00814051739871502, + "learning_rate": 5.6959230279453e-07, + "loss": 0.0, + "num_input_tokens_seen": 93439568, + "step": 138655 + }, + { + "epoch": 3.3874868687855764, + "grad_norm": 0.0016798563301563263, + "learning_rate": 5.695153298480803e-07, + "loss": 0.0, + "num_input_tokens_seen": 93443152, + "step": 138660 + }, + { + "epoch": 3.3876090196174236, + "grad_norm": 25.824024200439453, + "learning_rate": 5.694383600322314e-07, + "loss": 0.0431, + "num_input_tokens_seen": 93446736, + "step": 138665 + }, + { + "epoch": 3.3877311704492707, + "grad_norm": 0.0016559103969484568, + "learning_rate": 5.693613933475423e-07, + "loss": 0.0, + "num_input_tokens_seen": 93449872, + "step": 138670 + }, + { + "epoch": 3.387853321281118, + "grad_norm": 0.02711871638894081, + "learning_rate": 5.692844297945728e-07, + "loss": 0.0155, + "num_input_tokens_seen": 93453200, + "step": 138675 + }, + { + "epoch": 3.387975472112965, + "grad_norm": 0.0018949408549815416, + "learning_rate": 5.692074693738833e-07, + "loss": 0.0, + "num_input_tokens_seen": 93456464, + "step": 138680 + }, + { + "epoch": 3.3880976229448123, + "grad_norm": 0.0003768306924030185, + "learning_rate": 5.691305120860323e-07, + "loss": 0.1295, + "num_input_tokens_seen": 93459664, + "step": 138685 + }, + { + "epoch": 3.3882197737766595, + "grad_norm": 0.018024956807494164, + "learning_rate": 5.690535579315809e-07, + "loss": 0.0, + "num_input_tokens_seen": 93463056, + "step": 138690 + }, + { + "epoch": 3.3883419246085067, + "grad_norm": 0.004028045106679201, + "learning_rate": 5.689766069110873e-07, + "loss": 0.0, + "num_input_tokens_seen": 93466192, + "step": 138695 + }, + { + "epoch": 3.388464075440354, + "grad_norm": 0.004355765879154205, + "learning_rate": 5.688996590251118e-07, + "loss": 0.0, + "num_input_tokens_seen": 93469392, + "step": 138700 + }, + { + "epoch": 3.388586226272201, + "grad_norm": 0.0004680180863942951, + "learning_rate": 5.688227142742143e-07, + "loss": 0.0, + "num_input_tokens_seen": 93472592, + "step": 138705 + }, + { + "epoch": 3.3887083771040483, + "grad_norm": 0.01364127453416586, + "learning_rate": 5.687457726589535e-07, + "loss": 0.0, + "num_input_tokens_seen": 93475792, + "step": 138710 + }, + { + "epoch": 3.3888305279358955, + "grad_norm": 0.0096084950491786, + "learning_rate": 5.6866883417989e-07, + "loss": 0.0001, + "num_input_tokens_seen": 93478864, + "step": 138715 + }, + { + "epoch": 3.388952678767742, + "grad_norm": 7.81436829129234e-05, + "learning_rate": 5.685918988375823e-07, + "loss": 0.0722, + "num_input_tokens_seen": 93482640, + "step": 138720 + }, + { + "epoch": 3.3890748295995894, + "grad_norm": 0.0009174890001304448, + "learning_rate": 5.685149666325907e-07, + "loss": 0.0001, + "num_input_tokens_seen": 93486160, + "step": 138725 + }, + { + "epoch": 3.3891969804314366, + "grad_norm": 0.5889875888824463, + "learning_rate": 5.684380375654744e-07, + "loss": 0.0002, + "num_input_tokens_seen": 93489488, + "step": 138730 + }, + { + "epoch": 3.3893191312632838, + "grad_norm": 0.00953624863177538, + "learning_rate": 5.683611116367924e-07, + "loss": 0.0, + "num_input_tokens_seen": 93492880, + "step": 138735 + }, + { + "epoch": 3.389441282095131, + "grad_norm": 0.0017142931465059519, + "learning_rate": 5.682841888471047e-07, + "loss": 0.0005, + "num_input_tokens_seen": 93496144, + "step": 138740 + }, + { + "epoch": 3.389563432926978, + "grad_norm": 0.001251032343134284, + "learning_rate": 5.682072691969701e-07, + "loss": 0.0, + "num_input_tokens_seen": 93499408, + "step": 138745 + }, + { + "epoch": 3.3896855837588253, + "grad_norm": 0.00032911982270888984, + "learning_rate": 5.68130352686949e-07, + "loss": 0.0, + "num_input_tokens_seen": 93503056, + "step": 138750 + }, + { + "epoch": 3.3898077345906725, + "grad_norm": 0.0022663853596895933, + "learning_rate": 5.680534393175997e-07, + "loss": 0.0, + "num_input_tokens_seen": 93506384, + "step": 138755 + }, + { + "epoch": 3.3899298854225197, + "grad_norm": 9.262767707696185e-05, + "learning_rate": 5.679765290894818e-07, + "loss": 0.0, + "num_input_tokens_seen": 93509776, + "step": 138760 + }, + { + "epoch": 3.390052036254367, + "grad_norm": 0.007468585856258869, + "learning_rate": 5.678996220031553e-07, + "loss": 0.0, + "num_input_tokens_seen": 93513296, + "step": 138765 + }, + { + "epoch": 3.390174187086214, + "grad_norm": 0.023306336253881454, + "learning_rate": 5.678227180591786e-07, + "loss": 0.0, + "num_input_tokens_seen": 93516624, + "step": 138770 + }, + { + "epoch": 3.3902963379180613, + "grad_norm": 0.0007523863459937274, + "learning_rate": 5.677458172581115e-07, + "loss": 0.0, + "num_input_tokens_seen": 93520144, + "step": 138775 + }, + { + "epoch": 3.3904184887499085, + "grad_norm": 0.01631038449704647, + "learning_rate": 5.676689196005129e-07, + "loss": 0.0, + "num_input_tokens_seen": 93523408, + "step": 138780 + }, + { + "epoch": 3.3905406395817557, + "grad_norm": 0.0019541881047189236, + "learning_rate": 5.675920250869426e-07, + "loss": 0.0, + "num_input_tokens_seen": 93526928, + "step": 138785 + }, + { + "epoch": 3.390662790413603, + "grad_norm": 0.0017687291838228703, + "learning_rate": 5.67515133717959e-07, + "loss": 0.0, + "num_input_tokens_seen": 93530192, + "step": 138790 + }, + { + "epoch": 3.39078494124545, + "grad_norm": 0.0010739255230873823, + "learning_rate": 5.674382454941215e-07, + "loss": 0.0001, + "num_input_tokens_seen": 93533584, + "step": 138795 + }, + { + "epoch": 3.3909070920772972, + "grad_norm": 0.0009468430071137846, + "learning_rate": 5.6736136041599e-07, + "loss": 0.0, + "num_input_tokens_seen": 93536848, + "step": 138800 + }, + { + "epoch": 3.391029242909144, + "grad_norm": 0.001576863694936037, + "learning_rate": 5.672844784841226e-07, + "loss": 0.0, + "num_input_tokens_seen": 93540240, + "step": 138805 + }, + { + "epoch": 3.3911513937409916, + "grad_norm": 0.0003011640510521829, + "learning_rate": 5.672075996990792e-07, + "loss": 0.0, + "num_input_tokens_seen": 93543760, + "step": 138810 + }, + { + "epoch": 3.3912735445728384, + "grad_norm": 0.0003061066963709891, + "learning_rate": 5.671307240614183e-07, + "loss": 0.0, + "num_input_tokens_seen": 93546704, + "step": 138815 + }, + { + "epoch": 3.3913956954046856, + "grad_norm": 0.0020857774652540684, + "learning_rate": 5.670538515716996e-07, + "loss": 0.0, + "num_input_tokens_seen": 93549904, + "step": 138820 + }, + { + "epoch": 3.3915178462365327, + "grad_norm": 714.5460815429688, + "learning_rate": 5.669769822304812e-07, + "loss": 0.0227, + "num_input_tokens_seen": 93553552, + "step": 138825 + }, + { + "epoch": 3.39163999706838, + "grad_norm": 0.0003291108296252787, + "learning_rate": 5.669001160383231e-07, + "loss": 0.0, + "num_input_tokens_seen": 93557072, + "step": 138830 + }, + { + "epoch": 3.391762147900227, + "grad_norm": 1.0141626596450806, + "learning_rate": 5.668232529957835e-07, + "loss": 0.0002, + "num_input_tokens_seen": 93560208, + "step": 138835 + }, + { + "epoch": 3.3918842987320743, + "grad_norm": 0.016728295013308525, + "learning_rate": 5.667463931034219e-07, + "loss": 0.0, + "num_input_tokens_seen": 93563344, + "step": 138840 + }, + { + "epoch": 3.3920064495639215, + "grad_norm": 0.40863490104675293, + "learning_rate": 5.666695363617972e-07, + "loss": 0.0002, + "num_input_tokens_seen": 93566480, + "step": 138845 + }, + { + "epoch": 3.3921286003957687, + "grad_norm": 0.0022530490532517433, + "learning_rate": 5.66592682771468e-07, + "loss": 0.0001, + "num_input_tokens_seen": 93569808, + "step": 138850 + }, + { + "epoch": 3.392250751227616, + "grad_norm": 0.0001802055921871215, + "learning_rate": 5.66515832332993e-07, + "loss": 0.0001, + "num_input_tokens_seen": 93573264, + "step": 138855 + }, + { + "epoch": 3.392372902059463, + "grad_norm": 1311.0792236328125, + "learning_rate": 5.664389850469322e-07, + "loss": 0.0144, + "num_input_tokens_seen": 93576400, + "step": 138860 + }, + { + "epoch": 3.3924950528913103, + "grad_norm": 0.0003263753023929894, + "learning_rate": 5.663621409138431e-07, + "loss": 0.0, + "num_input_tokens_seen": 93579728, + "step": 138865 + }, + { + "epoch": 3.3926172037231574, + "grad_norm": 0.0007934704190120101, + "learning_rate": 5.662852999342856e-07, + "loss": 0.0, + "num_input_tokens_seen": 93583184, + "step": 138870 + }, + { + "epoch": 3.3927393545550046, + "grad_norm": 0.000460325856693089, + "learning_rate": 5.662084621088177e-07, + "loss": 0.0, + "num_input_tokens_seen": 93586256, + "step": 138875 + }, + { + "epoch": 3.392861505386852, + "grad_norm": 0.012204419821500778, + "learning_rate": 5.66131627437999e-07, + "loss": 0.0395, + "num_input_tokens_seen": 93589776, + "step": 138880 + }, + { + "epoch": 3.392983656218699, + "grad_norm": 0.0014695656718686223, + "learning_rate": 5.660547959223871e-07, + "loss": 0.0, + "num_input_tokens_seen": 93593360, + "step": 138885 + }, + { + "epoch": 3.393105807050546, + "grad_norm": 0.027506841346621513, + "learning_rate": 5.659779675625418e-07, + "loss": 0.0, + "num_input_tokens_seen": 93596624, + "step": 138890 + }, + { + "epoch": 3.3932279578823934, + "grad_norm": 0.0004509424907155335, + "learning_rate": 5.659011423590217e-07, + "loss": 0.0004, + "num_input_tokens_seen": 93599952, + "step": 138895 + }, + { + "epoch": 3.39335010871424, + "grad_norm": 0.004649386275559664, + "learning_rate": 5.658243203123848e-07, + "loss": 0.0001, + "num_input_tokens_seen": 93603088, + "step": 138900 + }, + { + "epoch": 3.3934722595460873, + "grad_norm": 0.018392464146018028, + "learning_rate": 5.657475014231908e-07, + "loss": 0.0, + "num_input_tokens_seen": 93606928, + "step": 138905 + }, + { + "epoch": 3.3935944103779345, + "grad_norm": 0.008321767672896385, + "learning_rate": 5.656706856919971e-07, + "loss": 0.0, + "num_input_tokens_seen": 93610768, + "step": 138910 + }, + { + "epoch": 3.3937165612097817, + "grad_norm": 0.03646848350763321, + "learning_rate": 5.655938731193633e-07, + "loss": 0.0, + "num_input_tokens_seen": 93614032, + "step": 138915 + }, + { + "epoch": 3.393838712041629, + "grad_norm": 0.000169451828696765, + "learning_rate": 5.655170637058479e-07, + "loss": 0.0, + "num_input_tokens_seen": 93617232, + "step": 138920 + }, + { + "epoch": 3.393960862873476, + "grad_norm": 0.0003246811975259334, + "learning_rate": 5.654402574520088e-07, + "loss": 0.0, + "num_input_tokens_seen": 93620880, + "step": 138925 + }, + { + "epoch": 3.3940830137053233, + "grad_norm": 0.028859952464699745, + "learning_rate": 5.653634543584056e-07, + "loss": 0.0, + "num_input_tokens_seen": 93624016, + "step": 138930 + }, + { + "epoch": 3.3942051645371705, + "grad_norm": 0.0009226136025972664, + "learning_rate": 5.652866544255962e-07, + "loss": 0.0, + "num_input_tokens_seen": 93627216, + "step": 138935 + }, + { + "epoch": 3.3943273153690177, + "grad_norm": 0.0003126481897197664, + "learning_rate": 5.652098576541387e-07, + "loss": 0.0739, + "num_input_tokens_seen": 93630288, + "step": 138940 + }, + { + "epoch": 3.394449466200865, + "grad_norm": 0.001821165787987411, + "learning_rate": 5.651330640445926e-07, + "loss": 0.0, + "num_input_tokens_seen": 93633296, + "step": 138945 + }, + { + "epoch": 3.394571617032712, + "grad_norm": 0.0009765501017682254, + "learning_rate": 5.650562735975152e-07, + "loss": 0.0, + "num_input_tokens_seen": 93636816, + "step": 138950 + }, + { + "epoch": 3.3946937678645592, + "grad_norm": 8.84903347468935e-05, + "learning_rate": 5.649794863134663e-07, + "loss": 0.1083, + "num_input_tokens_seen": 93640144, + "step": 138955 + }, + { + "epoch": 3.3948159186964064, + "grad_norm": 0.00939985178411007, + "learning_rate": 5.649027021930031e-07, + "loss": 0.0, + "num_input_tokens_seen": 93643472, + "step": 138960 + }, + { + "epoch": 3.3949380695282536, + "grad_norm": 0.0015276978956535459, + "learning_rate": 5.648259212366847e-07, + "loss": 0.0, + "num_input_tokens_seen": 93646928, + "step": 138965 + }, + { + "epoch": 3.395060220360101, + "grad_norm": 0.002236619358882308, + "learning_rate": 5.647491434450688e-07, + "loss": 0.0, + "num_input_tokens_seen": 93650512, + "step": 138970 + }, + { + "epoch": 3.395182371191948, + "grad_norm": 0.03943828493356705, + "learning_rate": 5.646723688187148e-07, + "loss": 0.0, + "num_input_tokens_seen": 93653712, + "step": 138975 + }, + { + "epoch": 3.395304522023795, + "grad_norm": 0.015553759410977364, + "learning_rate": 5.645955973581799e-07, + "loss": 0.0005, + "num_input_tokens_seen": 93656976, + "step": 138980 + }, + { + "epoch": 3.395426672855642, + "grad_norm": 0.0021713352762162685, + "learning_rate": 5.645188290640231e-07, + "loss": 0.0, + "num_input_tokens_seen": 93660432, + "step": 138985 + }, + { + "epoch": 3.3955488236874896, + "grad_norm": 0.00021980589372105896, + "learning_rate": 5.644420639368028e-07, + "loss": 0.0, + "num_input_tokens_seen": 93663696, + "step": 138990 + }, + { + "epoch": 3.3956709745193363, + "grad_norm": 0.027932588011026382, + "learning_rate": 5.643653019770764e-07, + "loss": 0.0355, + "num_input_tokens_seen": 93666896, + "step": 138995 + }, + { + "epoch": 3.3957931253511835, + "grad_norm": 0.014813977293670177, + "learning_rate": 5.642885431854034e-07, + "loss": 0.0, + "num_input_tokens_seen": 93669904, + "step": 139000 + }, + { + "epoch": 3.3959152761830307, + "grad_norm": 0.00031996812322176993, + "learning_rate": 5.642117875623406e-07, + "loss": 0.0, + "num_input_tokens_seen": 93673424, + "step": 139005 + }, + { + "epoch": 3.396037427014878, + "grad_norm": 0.00020197322010062635, + "learning_rate": 5.641350351084471e-07, + "loss": 0.0, + "num_input_tokens_seen": 93676432, + "step": 139010 + }, + { + "epoch": 3.396159577846725, + "grad_norm": 99.5778579711914, + "learning_rate": 5.640582858242812e-07, + "loss": 0.0412, + "num_input_tokens_seen": 93680272, + "step": 139015 + }, + { + "epoch": 3.3962817286785723, + "grad_norm": 0.0011479872046038508, + "learning_rate": 5.639815397104004e-07, + "loss": 0.0, + "num_input_tokens_seen": 93683664, + "step": 139020 + }, + { + "epoch": 3.3964038795104194, + "grad_norm": 68.51597595214844, + "learning_rate": 5.639047967673634e-07, + "loss": 0.0011, + "num_input_tokens_seen": 93687120, + "step": 139025 + }, + { + "epoch": 3.3965260303422666, + "grad_norm": 0.0023358529433608055, + "learning_rate": 5.638280569957277e-07, + "loss": 0.0, + "num_input_tokens_seen": 93690384, + "step": 139030 + }, + { + "epoch": 3.396648181174114, + "grad_norm": 0.0002345311950193718, + "learning_rate": 5.637513203960519e-07, + "loss": 0.0, + "num_input_tokens_seen": 93693328, + "step": 139035 + }, + { + "epoch": 3.396770332005961, + "grad_norm": 0.004020425956696272, + "learning_rate": 5.636745869688939e-07, + "loss": 0.0001, + "num_input_tokens_seen": 93696592, + "step": 139040 + }, + { + "epoch": 3.396892482837808, + "grad_norm": 0.0003916302521247417, + "learning_rate": 5.635978567148114e-07, + "loss": 0.0, + "num_input_tokens_seen": 93699856, + "step": 139045 + }, + { + "epoch": 3.3970146336696554, + "grad_norm": 0.001366269774734974, + "learning_rate": 5.63521129634363e-07, + "loss": 0.0, + "num_input_tokens_seen": 93702800, + "step": 139050 + }, + { + "epoch": 3.3971367845015026, + "grad_norm": 0.002023897599428892, + "learning_rate": 5.634444057281058e-07, + "loss": 0.0, + "num_input_tokens_seen": 93706256, + "step": 139055 + }, + { + "epoch": 3.3972589353333498, + "grad_norm": 75.8708267211914, + "learning_rate": 5.633676849965989e-07, + "loss": 0.0868, + "num_input_tokens_seen": 93710032, + "step": 139060 + }, + { + "epoch": 3.397381086165197, + "grad_norm": 0.001355248736217618, + "learning_rate": 5.632909674403991e-07, + "loss": 0.0, + "num_input_tokens_seen": 93713104, + "step": 139065 + }, + { + "epoch": 3.3975032369970437, + "grad_norm": 0.0011240398744121194, + "learning_rate": 5.63214253060065e-07, + "loss": 0.0, + "num_input_tokens_seen": 93716752, + "step": 139070 + }, + { + "epoch": 3.3976253878288913, + "grad_norm": 0.00047678040573373437, + "learning_rate": 5.631375418561546e-07, + "loss": 0.0, + "num_input_tokens_seen": 93720400, + "step": 139075 + }, + { + "epoch": 3.397747538660738, + "grad_norm": 0.0010390763636678457, + "learning_rate": 5.630608338292251e-07, + "loss": 0.0909, + "num_input_tokens_seen": 93723792, + "step": 139080 + }, + { + "epoch": 3.3978696894925853, + "grad_norm": 0.00013290536298882216, + "learning_rate": 5.629841289798352e-07, + "loss": 0.0, + "num_input_tokens_seen": 93727184, + "step": 139085 + }, + { + "epoch": 3.3979918403244325, + "grad_norm": 0.0025743693113327026, + "learning_rate": 5.629074273085419e-07, + "loss": 0.0, + "num_input_tokens_seen": 93731024, + "step": 139090 + }, + { + "epoch": 3.3981139911562797, + "grad_norm": 0.0016093073645606637, + "learning_rate": 5.628307288159035e-07, + "loss": 0.0, + "num_input_tokens_seen": 93734224, + "step": 139095 + }, + { + "epoch": 3.398236141988127, + "grad_norm": 0.001873745582997799, + "learning_rate": 5.627540335024776e-07, + "loss": 0.0, + "num_input_tokens_seen": 93737168, + "step": 139100 + }, + { + "epoch": 3.398358292819974, + "grad_norm": 0.0914987251162529, + "learning_rate": 5.626773413688218e-07, + "loss": 0.0, + "num_input_tokens_seen": 93740368, + "step": 139105 + }, + { + "epoch": 3.3984804436518212, + "grad_norm": 0.0004763550532516092, + "learning_rate": 5.626006524154943e-07, + "loss": 0.0448, + "num_input_tokens_seen": 93743568, + "step": 139110 + }, + { + "epoch": 3.3986025944836684, + "grad_norm": 0.0016596581554040313, + "learning_rate": 5.625239666430521e-07, + "loss": 0.0365, + "num_input_tokens_seen": 93747088, + "step": 139115 + }, + { + "epoch": 3.3987247453155156, + "grad_norm": 0.012288597412407398, + "learning_rate": 5.624472840520538e-07, + "loss": 0.0, + "num_input_tokens_seen": 93751568, + "step": 139120 + }, + { + "epoch": 3.398846896147363, + "grad_norm": 0.0013532297452911735, + "learning_rate": 5.623706046430561e-07, + "loss": 0.0, + "num_input_tokens_seen": 93754704, + "step": 139125 + }, + { + "epoch": 3.39896904697921, + "grad_norm": 0.0021572858095169067, + "learning_rate": 5.622939284166175e-07, + "loss": 0.0, + "num_input_tokens_seen": 93758288, + "step": 139130 + }, + { + "epoch": 3.399091197811057, + "grad_norm": 0.00035751090035773814, + "learning_rate": 5.622172553732946e-07, + "loss": 0.0, + "num_input_tokens_seen": 93763856, + "step": 139135 + }, + { + "epoch": 3.3992133486429044, + "grad_norm": 0.009177071042358875, + "learning_rate": 5.621405855136463e-07, + "loss": 0.0, + "num_input_tokens_seen": 93766864, + "step": 139140 + }, + { + "epoch": 3.3993354994747516, + "grad_norm": 0.03870631381869316, + "learning_rate": 5.620639188382287e-07, + "loss": 0.0001, + "num_input_tokens_seen": 93770064, + "step": 139145 + }, + { + "epoch": 3.3994576503065987, + "grad_norm": 0.003173073288053274, + "learning_rate": 5.619872553476007e-07, + "loss": 0.0, + "num_input_tokens_seen": 93773584, + "step": 139150 + }, + { + "epoch": 3.399579801138446, + "grad_norm": 0.000830693868920207, + "learning_rate": 5.619105950423191e-07, + "loss": 0.0001, + "num_input_tokens_seen": 93777104, + "step": 139155 + }, + { + "epoch": 3.399701951970293, + "grad_norm": 0.004456925205886364, + "learning_rate": 5.618339379229411e-07, + "loss": 0.0, + "num_input_tokens_seen": 93780176, + "step": 139160 + }, + { + "epoch": 3.39982410280214, + "grad_norm": 0.037957482039928436, + "learning_rate": 5.617572839900246e-07, + "loss": 0.0, + "num_input_tokens_seen": 93783888, + "step": 139165 + }, + { + "epoch": 3.399946253633987, + "grad_norm": 0.0009529789094813168, + "learning_rate": 5.616806332441274e-07, + "loss": 0.0, + "num_input_tokens_seen": 93787472, + "step": 139170 + }, + { + "epoch": 3.4000684044658342, + "grad_norm": 0.43681278824806213, + "learning_rate": 5.616039856858062e-07, + "loss": 0.0001, + "num_input_tokens_seen": 93790864, + "step": 139175 + }, + { + "epoch": 3.4001905552976814, + "grad_norm": 0.0007161188987083733, + "learning_rate": 5.61527341315619e-07, + "loss": 0.0, + "num_input_tokens_seen": 93794064, + "step": 139180 + }, + { + "epoch": 3.4003127061295286, + "grad_norm": 0.0001278212876059115, + "learning_rate": 5.614507001341224e-07, + "loss": 0.0, + "num_input_tokens_seen": 93797264, + "step": 139185 + }, + { + "epoch": 3.400434856961376, + "grad_norm": 0.00162774499040097, + "learning_rate": 5.613740621418748e-07, + "loss": 0.1035, + "num_input_tokens_seen": 93800592, + "step": 139190 + }, + { + "epoch": 3.400557007793223, + "grad_norm": 0.01283679436892271, + "learning_rate": 5.612974273394327e-07, + "loss": 0.0, + "num_input_tokens_seen": 93803920, + "step": 139195 + }, + { + "epoch": 3.40067915862507, + "grad_norm": 0.5930418372154236, + "learning_rate": 5.612207957273535e-07, + "loss": 0.0002, + "num_input_tokens_seen": 93807248, + "step": 139200 + }, + { + "epoch": 3.4008013094569174, + "grad_norm": 0.0012030928628519177, + "learning_rate": 5.611441673061951e-07, + "loss": 0.0, + "num_input_tokens_seen": 93810704, + "step": 139205 + }, + { + "epoch": 3.4009234602887646, + "grad_norm": 0.0014273381093516946, + "learning_rate": 5.610675420765141e-07, + "loss": 0.0, + "num_input_tokens_seen": 93813968, + "step": 139210 + }, + { + "epoch": 3.4010456111206118, + "grad_norm": 0.0039851912297308445, + "learning_rate": 5.609909200388683e-07, + "loss": 0.0002, + "num_input_tokens_seen": 93817104, + "step": 139215 + }, + { + "epoch": 3.401167761952459, + "grad_norm": 3.220369035261683e-05, + "learning_rate": 5.609143011938143e-07, + "loss": 0.0, + "num_input_tokens_seen": 93822544, + "step": 139220 + }, + { + "epoch": 3.401289912784306, + "grad_norm": 0.0013037320459261537, + "learning_rate": 5.608376855419094e-07, + "loss": 0.0, + "num_input_tokens_seen": 93825872, + "step": 139225 + }, + { + "epoch": 3.4014120636161533, + "grad_norm": 0.00025415164418518543, + "learning_rate": 5.607610730837116e-07, + "loss": 0.0, + "num_input_tokens_seen": 93828752, + "step": 139230 + }, + { + "epoch": 3.4015342144480005, + "grad_norm": 0.0004327208735048771, + "learning_rate": 5.60684463819777e-07, + "loss": 0.0003, + "num_input_tokens_seen": 93832272, + "step": 139235 + }, + { + "epoch": 3.4016563652798477, + "grad_norm": 5.300765405991115e-05, + "learning_rate": 5.606078577506635e-07, + "loss": 0.0, + "num_input_tokens_seen": 93835664, + "step": 139240 + }, + { + "epoch": 3.401778516111695, + "grad_norm": 0.003021878655999899, + "learning_rate": 5.605312548769278e-07, + "loss": 0.0, + "num_input_tokens_seen": 93839312, + "step": 139245 + }, + { + "epoch": 3.4019006669435417, + "grad_norm": 0.00036664100480265915, + "learning_rate": 5.604546551991266e-07, + "loss": 0.0, + "num_input_tokens_seen": 93842512, + "step": 139250 + }, + { + "epoch": 3.4020228177753893, + "grad_norm": 0.0010006529046222568, + "learning_rate": 5.603780587178177e-07, + "loss": 0.0399, + "num_input_tokens_seen": 93845776, + "step": 139255 + }, + { + "epoch": 3.402144968607236, + "grad_norm": 0.0046476093120872974, + "learning_rate": 5.603014654335576e-07, + "loss": 0.0, + "num_input_tokens_seen": 93848784, + "step": 139260 + }, + { + "epoch": 3.402267119439083, + "grad_norm": 0.004209481179714203, + "learning_rate": 5.602248753469039e-07, + "loss": 0.0, + "num_input_tokens_seen": 93852752, + "step": 139265 + }, + { + "epoch": 3.4023892702709304, + "grad_norm": 0.0006373856449499726, + "learning_rate": 5.601482884584125e-07, + "loss": 0.0, + "num_input_tokens_seen": 93856016, + "step": 139270 + }, + { + "epoch": 3.4025114211027776, + "grad_norm": 0.020606614649295807, + "learning_rate": 5.600717047686417e-07, + "loss": 0.0425, + "num_input_tokens_seen": 93859728, + "step": 139275 + }, + { + "epoch": 3.402633571934625, + "grad_norm": 0.00027014079387299716, + "learning_rate": 5.599951242781473e-07, + "loss": 0.135, + "num_input_tokens_seen": 93862544, + "step": 139280 + }, + { + "epoch": 3.402755722766472, + "grad_norm": 0.00013495008170139045, + "learning_rate": 5.599185469874872e-07, + "loss": 0.0, + "num_input_tokens_seen": 93866000, + "step": 139285 + }, + { + "epoch": 3.402877873598319, + "grad_norm": 2.1405741790658794e-05, + "learning_rate": 5.598419728972174e-07, + "loss": 0.0763, + "num_input_tokens_seen": 93869456, + "step": 139290 + }, + { + "epoch": 3.4030000244301664, + "grad_norm": 138.9090576171875, + "learning_rate": 5.59765402007895e-07, + "loss": 0.0026, + "num_input_tokens_seen": 93872784, + "step": 139295 + }, + { + "epoch": 3.4031221752620135, + "grad_norm": 0.0009671871666796505, + "learning_rate": 5.596888343200776e-07, + "loss": 0.0, + "num_input_tokens_seen": 93876304, + "step": 139300 + }, + { + "epoch": 3.4032443260938607, + "grad_norm": 0.003403621492907405, + "learning_rate": 5.596122698343208e-07, + "loss": 0.0, + "num_input_tokens_seen": 93879760, + "step": 139305 + }, + { + "epoch": 3.403366476925708, + "grad_norm": 0.02258618362247944, + "learning_rate": 5.595357085511827e-07, + "loss": 0.0004, + "num_input_tokens_seen": 93883024, + "step": 139310 + }, + { + "epoch": 3.403488627757555, + "grad_norm": 0.46484342217445374, + "learning_rate": 5.594591504712189e-07, + "loss": 0.0003, + "num_input_tokens_seen": 93886224, + "step": 139315 + }, + { + "epoch": 3.4036107785894023, + "grad_norm": 0.005803953390568495, + "learning_rate": 5.593825955949865e-07, + "loss": 0.0279, + "num_input_tokens_seen": 93889552, + "step": 139320 + }, + { + "epoch": 3.4037329294212495, + "grad_norm": 0.024452105164527893, + "learning_rate": 5.593060439230429e-07, + "loss": 0.0, + "num_input_tokens_seen": 93893072, + "step": 139325 + }, + { + "epoch": 3.4038550802530967, + "grad_norm": 0.029246680438518524, + "learning_rate": 5.592294954559439e-07, + "loss": 0.0, + "num_input_tokens_seen": 93896656, + "step": 139330 + }, + { + "epoch": 3.403977231084944, + "grad_norm": 0.009791702963411808, + "learning_rate": 5.591529501942469e-07, + "loss": 0.0, + "num_input_tokens_seen": 93900304, + "step": 139335 + }, + { + "epoch": 3.404099381916791, + "grad_norm": 0.009990615770220757, + "learning_rate": 5.590764081385078e-07, + "loss": 0.0, + "num_input_tokens_seen": 93903760, + "step": 139340 + }, + { + "epoch": 3.404221532748638, + "grad_norm": 0.0006494000554084778, + "learning_rate": 5.589998692892841e-07, + "loss": 0.0626, + "num_input_tokens_seen": 93907216, + "step": 139345 + }, + { + "epoch": 3.404343683580485, + "grad_norm": 0.004100476857274771, + "learning_rate": 5.589233336471316e-07, + "loss": 0.0524, + "num_input_tokens_seen": 93910544, + "step": 139350 + }, + { + "epoch": 3.404465834412332, + "grad_norm": 0.01545916311442852, + "learning_rate": 5.588468012126076e-07, + "loss": 0.0, + "num_input_tokens_seen": 93914000, + "step": 139355 + }, + { + "epoch": 3.4045879852441794, + "grad_norm": 0.012155533768236637, + "learning_rate": 5.587702719862683e-07, + "loss": 0.0002, + "num_input_tokens_seen": 93917584, + "step": 139360 + }, + { + "epoch": 3.4047101360760266, + "grad_norm": 193.21961975097656, + "learning_rate": 5.586937459686701e-07, + "loss": 0.0134, + "num_input_tokens_seen": 93920720, + "step": 139365 + }, + { + "epoch": 3.4048322869078738, + "grad_norm": 0.7182050943374634, + "learning_rate": 5.586172231603697e-07, + "loss": 0.0004, + "num_input_tokens_seen": 93923856, + "step": 139370 + }, + { + "epoch": 3.404954437739721, + "grad_norm": 0.00684003159403801, + "learning_rate": 5.585407035619234e-07, + "loss": 0.0001, + "num_input_tokens_seen": 93927056, + "step": 139375 + }, + { + "epoch": 3.405076588571568, + "grad_norm": 0.012966359034180641, + "learning_rate": 5.584641871738882e-07, + "loss": 0.0, + "num_input_tokens_seen": 93930640, + "step": 139380 + }, + { + "epoch": 3.4051987394034153, + "grad_norm": 0.0029723867774009705, + "learning_rate": 5.583876739968197e-07, + "loss": 0.0, + "num_input_tokens_seen": 93934096, + "step": 139385 + }, + { + "epoch": 3.4053208902352625, + "grad_norm": 0.03468893840909004, + "learning_rate": 5.58311164031275e-07, + "loss": 0.0005, + "num_input_tokens_seen": 93937168, + "step": 139390 + }, + { + "epoch": 3.4054430410671097, + "grad_norm": 28.802310943603516, + "learning_rate": 5.582346572778104e-07, + "loss": 0.0956, + "num_input_tokens_seen": 93940496, + "step": 139395 + }, + { + "epoch": 3.405565191898957, + "grad_norm": 0.0013457908062264323, + "learning_rate": 5.581581537369821e-07, + "loss": 0.0378, + "num_input_tokens_seen": 93943696, + "step": 139400 + }, + { + "epoch": 3.405687342730804, + "grad_norm": 0.0074604772962629795, + "learning_rate": 5.580816534093468e-07, + "loss": 0.0002, + "num_input_tokens_seen": 93947088, + "step": 139405 + }, + { + "epoch": 3.4058094935626513, + "grad_norm": 0.00541321886703372, + "learning_rate": 5.580051562954602e-07, + "loss": 0.0, + "num_input_tokens_seen": 93950224, + "step": 139410 + }, + { + "epoch": 3.4059316443944985, + "grad_norm": 0.010462961159646511, + "learning_rate": 5.57928662395879e-07, + "loss": 0.0001, + "num_input_tokens_seen": 93954000, + "step": 139415 + }, + { + "epoch": 3.4060537952263457, + "grad_norm": 0.000587663846090436, + "learning_rate": 5.5785217171116e-07, + "loss": 0.0, + "num_input_tokens_seen": 93957712, + "step": 139420 + }, + { + "epoch": 3.406175946058193, + "grad_norm": 0.10726846754550934, + "learning_rate": 5.577756842418584e-07, + "loss": 0.0, + "num_input_tokens_seen": 93961360, + "step": 139425 + }, + { + "epoch": 3.4062980968900396, + "grad_norm": 0.0019450212130323052, + "learning_rate": 5.576991999885313e-07, + "loss": 0.0, + "num_input_tokens_seen": 93964496, + "step": 139430 + }, + { + "epoch": 3.4064202477218872, + "grad_norm": 0.009344382211565971, + "learning_rate": 5.576227189517343e-07, + "loss": 0.0, + "num_input_tokens_seen": 93968016, + "step": 139435 + }, + { + "epoch": 3.406542398553734, + "grad_norm": 0.012868695892393589, + "learning_rate": 5.575462411320241e-07, + "loss": 0.0, + "num_input_tokens_seen": 93971664, + "step": 139440 + }, + { + "epoch": 3.406664549385581, + "grad_norm": 0.001059131813235581, + "learning_rate": 5.574697665299565e-07, + "loss": 0.0, + "num_input_tokens_seen": 93974992, + "step": 139445 + }, + { + "epoch": 3.4067867002174284, + "grad_norm": 0.0033619788009673357, + "learning_rate": 5.573932951460881e-07, + "loss": 0.0514, + "num_input_tokens_seen": 93978064, + "step": 139450 + }, + { + "epoch": 3.4069088510492755, + "grad_norm": 0.0006246289703994989, + "learning_rate": 5.573168269809743e-07, + "loss": 0.0, + "num_input_tokens_seen": 93981520, + "step": 139455 + }, + { + "epoch": 3.4070310018811227, + "grad_norm": 0.0036987571511417627, + "learning_rate": 5.572403620351718e-07, + "loss": 0.0, + "num_input_tokens_seen": 93984848, + "step": 139460 + }, + { + "epoch": 3.40715315271297, + "grad_norm": 0.0031094530131667852, + "learning_rate": 5.571639003092368e-07, + "loss": 0.0, + "num_input_tokens_seen": 93988496, + "step": 139465 + }, + { + "epoch": 3.407275303544817, + "grad_norm": 0.0013259407132863998, + "learning_rate": 5.570874418037244e-07, + "loss": 0.0, + "num_input_tokens_seen": 93991440, + "step": 139470 + }, + { + "epoch": 3.4073974543766643, + "grad_norm": 0.04478773847222328, + "learning_rate": 5.570109865191912e-07, + "loss": 0.0, + "num_input_tokens_seen": 93994896, + "step": 139475 + }, + { + "epoch": 3.4075196052085115, + "grad_norm": 0.014282858930528164, + "learning_rate": 5.569345344561938e-07, + "loss": 0.0, + "num_input_tokens_seen": 93998032, + "step": 139480 + }, + { + "epoch": 3.4076417560403587, + "grad_norm": 0.004201257135719061, + "learning_rate": 5.568580856152873e-07, + "loss": 0.0, + "num_input_tokens_seen": 94000784, + "step": 139485 + }, + { + "epoch": 3.407763906872206, + "grad_norm": 0.006269012577831745, + "learning_rate": 5.567816399970282e-07, + "loss": 0.0002, + "num_input_tokens_seen": 94004048, + "step": 139490 + }, + { + "epoch": 3.407886057704053, + "grad_norm": 0.27850908041000366, + "learning_rate": 5.567051976019719e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94006928, + "step": 139495 + }, + { + "epoch": 3.4080082085359003, + "grad_norm": 0.00017745274817571044, + "learning_rate": 5.566287584306751e-07, + "loss": 0.0569, + "num_input_tokens_seen": 94010128, + "step": 139500 + }, + { + "epoch": 3.4081303593677474, + "grad_norm": 0.0006029402720741928, + "learning_rate": 5.565523224836928e-07, + "loss": 0.0003, + "num_input_tokens_seen": 94013200, + "step": 139505 + }, + { + "epoch": 3.4082525101995946, + "grad_norm": 0.0006123344646766782, + "learning_rate": 5.564758897615813e-07, + "loss": 0.0003, + "num_input_tokens_seen": 94016528, + "step": 139510 + }, + { + "epoch": 3.4083746610314414, + "grad_norm": 0.0006965179345570505, + "learning_rate": 5.563994602648967e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94019728, + "step": 139515 + }, + { + "epoch": 3.408496811863289, + "grad_norm": 0.010701261460781097, + "learning_rate": 5.563230339941942e-07, + "loss": 0.0, + "num_input_tokens_seen": 94023184, + "step": 139520 + }, + { + "epoch": 3.4086189626951358, + "grad_norm": 0.006104510743170977, + "learning_rate": 5.562466109500304e-07, + "loss": 0.0, + "num_input_tokens_seen": 94026320, + "step": 139525 + }, + { + "epoch": 3.408741113526983, + "grad_norm": 0.0008365048561245203, + "learning_rate": 5.561701911329602e-07, + "loss": 0.0, + "num_input_tokens_seen": 94029456, + "step": 139530 + }, + { + "epoch": 3.40886326435883, + "grad_norm": 0.042535971850156784, + "learning_rate": 5.560937745435401e-07, + "loss": 0.05, + "num_input_tokens_seen": 94033168, + "step": 139535 + }, + { + "epoch": 3.4089854151906773, + "grad_norm": 0.00022195244673639536, + "learning_rate": 5.560173611823251e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94036624, + "step": 139540 + }, + { + "epoch": 3.4091075660225245, + "grad_norm": 0.12215565890073776, + "learning_rate": 5.559409510498711e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94041040, + "step": 139545 + }, + { + "epoch": 3.4092297168543717, + "grad_norm": 0.0006495245615951717, + "learning_rate": 5.558645441467346e-07, + "loss": 0.0, + "num_input_tokens_seen": 94044240, + "step": 139550 + }, + { + "epoch": 3.409351867686219, + "grad_norm": 0.011606546118855476, + "learning_rate": 5.557881404734705e-07, + "loss": 0.0, + "num_input_tokens_seen": 94047184, + "step": 139555 + }, + { + "epoch": 3.409474018518066, + "grad_norm": 0.0022992929443717003, + "learning_rate": 5.557117400306341e-07, + "loss": 0.0, + "num_input_tokens_seen": 94050448, + "step": 139560 + }, + { + "epoch": 3.4095961693499133, + "grad_norm": 9.92760033113882e-05, + "learning_rate": 5.556353428187818e-07, + "loss": 0.0, + "num_input_tokens_seen": 94054160, + "step": 139565 + }, + { + "epoch": 3.4097183201817605, + "grad_norm": 0.0028340895660221577, + "learning_rate": 5.555589488384685e-07, + "loss": 0.0, + "num_input_tokens_seen": 94057616, + "step": 139570 + }, + { + "epoch": 3.4098404710136077, + "grad_norm": 0.0007459719781763852, + "learning_rate": 5.554825580902503e-07, + "loss": 0.0, + "num_input_tokens_seen": 94060752, + "step": 139575 + }, + { + "epoch": 3.409962621845455, + "grad_norm": 0.0012573867570608854, + "learning_rate": 5.554061705746822e-07, + "loss": 0.0, + "num_input_tokens_seen": 94064656, + "step": 139580 + }, + { + "epoch": 3.410084772677302, + "grad_norm": 0.10932556539773941, + "learning_rate": 5.553297862923203e-07, + "loss": 0.0, + "num_input_tokens_seen": 94068368, + "step": 139585 + }, + { + "epoch": 3.4102069235091492, + "grad_norm": 0.0009823766304180026, + "learning_rate": 5.552534052437195e-07, + "loss": 0.0, + "num_input_tokens_seen": 94072144, + "step": 139590 + }, + { + "epoch": 3.4103290743409964, + "grad_norm": 0.006408262066543102, + "learning_rate": 5.551770274294361e-07, + "loss": 0.0, + "num_input_tokens_seen": 94075472, + "step": 139595 + }, + { + "epoch": 3.4104512251728436, + "grad_norm": 0.0034182979725301266, + "learning_rate": 5.551006528500244e-07, + "loss": 0.0804, + "num_input_tokens_seen": 94078800, + "step": 139600 + }, + { + "epoch": 3.410573376004691, + "grad_norm": 0.2944185435771942, + "learning_rate": 5.550242815060404e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94081936, + "step": 139605 + }, + { + "epoch": 3.4106955268365375, + "grad_norm": 0.0003999386972282082, + "learning_rate": 5.5494791339804e-07, + "loss": 0.0, + "num_input_tokens_seen": 94085584, + "step": 139610 + }, + { + "epoch": 3.4108176776683847, + "grad_norm": 0.047348976135253906, + "learning_rate": 5.548715485265776e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94089040, + "step": 139615 + }, + { + "epoch": 3.410939828500232, + "grad_norm": 0.012279681861400604, + "learning_rate": 5.547951868922096e-07, + "loss": 0.0, + "num_input_tokens_seen": 94092048, + "step": 139620 + }, + { + "epoch": 3.411061979332079, + "grad_norm": 0.006022414658218622, + "learning_rate": 5.547188284954902e-07, + "loss": 0.0, + "num_input_tokens_seen": 94095120, + "step": 139625 + }, + { + "epoch": 3.4111841301639263, + "grad_norm": 0.0033210585825145245, + "learning_rate": 5.546424733369752e-07, + "loss": 0.0, + "num_input_tokens_seen": 94098768, + "step": 139630 + }, + { + "epoch": 3.4113062809957735, + "grad_norm": 0.0012283911928534508, + "learning_rate": 5.545661214172204e-07, + "loss": 0.0, + "num_input_tokens_seen": 94101520, + "step": 139635 + }, + { + "epoch": 3.4114284318276207, + "grad_norm": 0.0016081221401691437, + "learning_rate": 5.544897727367802e-07, + "loss": 0.0694, + "num_input_tokens_seen": 94104528, + "step": 139640 + }, + { + "epoch": 3.411550582659468, + "grad_norm": 0.02599150314927101, + "learning_rate": 5.544134272962105e-07, + "loss": 0.0, + "num_input_tokens_seen": 94107856, + "step": 139645 + }, + { + "epoch": 3.411672733491315, + "grad_norm": 0.0009995142463594675, + "learning_rate": 5.543370850960659e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94110928, + "step": 139650 + }, + { + "epoch": 3.4117948843231622, + "grad_norm": 0.0031279893592000008, + "learning_rate": 5.542607461369022e-07, + "loss": 0.0, + "num_input_tokens_seen": 94114256, + "step": 139655 + }, + { + "epoch": 3.4119170351550094, + "grad_norm": 0.0002375991316512227, + "learning_rate": 5.54184410419274e-07, + "loss": 0.0, + "num_input_tokens_seen": 94117776, + "step": 139660 + }, + { + "epoch": 3.4120391859868566, + "grad_norm": 1.0746980905532837, + "learning_rate": 5.54108077943737e-07, + "loss": 0.0004, + "num_input_tokens_seen": 94121232, + "step": 139665 + }, + { + "epoch": 3.412161336818704, + "grad_norm": 0.000253146281465888, + "learning_rate": 5.540317487108459e-07, + "loss": 0.0, + "num_input_tokens_seen": 94124560, + "step": 139670 + }, + { + "epoch": 3.412283487650551, + "grad_norm": 0.0034022240433841944, + "learning_rate": 5.539554227211556e-07, + "loss": 0.0, + "num_input_tokens_seen": 94127952, + "step": 139675 + }, + { + "epoch": 3.412405638482398, + "grad_norm": 0.004152646288275719, + "learning_rate": 5.538790999752217e-07, + "loss": 0.0, + "num_input_tokens_seen": 94130960, + "step": 139680 + }, + { + "epoch": 3.4125277893142454, + "grad_norm": 0.0016099411295726895, + "learning_rate": 5.538027804735987e-07, + "loss": 0.0, + "num_input_tokens_seen": 94134288, + "step": 139685 + }, + { + "epoch": 3.4126499401460926, + "grad_norm": 0.003590294159948826, + "learning_rate": 5.537264642168422e-07, + "loss": 0.0, + "num_input_tokens_seen": 94137424, + "step": 139690 + }, + { + "epoch": 3.4127720909779393, + "grad_norm": 0.010240147821605206, + "learning_rate": 5.536501512055065e-07, + "loss": 0.0, + "num_input_tokens_seen": 94140944, + "step": 139695 + }, + { + "epoch": 3.412894241809787, + "grad_norm": 0.0274630356580019, + "learning_rate": 5.535738414401471e-07, + "loss": 0.065, + "num_input_tokens_seen": 94143824, + "step": 139700 + }, + { + "epoch": 3.4130163926416337, + "grad_norm": 0.0014623597962781787, + "learning_rate": 5.534975349213191e-07, + "loss": 0.0376, + "num_input_tokens_seen": 94147024, + "step": 139705 + }, + { + "epoch": 3.413138543473481, + "grad_norm": 0.0355786494910717, + "learning_rate": 5.534212316495767e-07, + "loss": 0.0, + "num_input_tokens_seen": 94150864, + "step": 139710 + }, + { + "epoch": 3.413260694305328, + "grad_norm": 0.0009701123344711959, + "learning_rate": 5.533449316254756e-07, + "loss": 0.0, + "num_input_tokens_seen": 94154256, + "step": 139715 + }, + { + "epoch": 3.4133828451371753, + "grad_norm": 0.0032924406696110964, + "learning_rate": 5.532686348495699e-07, + "loss": 0.0, + "num_input_tokens_seen": 94157904, + "step": 139720 + }, + { + "epoch": 3.4135049959690225, + "grad_norm": 0.004160938318818808, + "learning_rate": 5.531923413224148e-07, + "loss": 0.0, + "num_input_tokens_seen": 94161168, + "step": 139725 + }, + { + "epoch": 3.4136271468008696, + "grad_norm": 0.0015338974772021174, + "learning_rate": 5.531160510445657e-07, + "loss": 0.0, + "num_input_tokens_seen": 94164432, + "step": 139730 + }, + { + "epoch": 3.413749297632717, + "grad_norm": 0.02621505782008171, + "learning_rate": 5.530397640165765e-07, + "loss": 0.0004, + "num_input_tokens_seen": 94168528, + "step": 139735 + }, + { + "epoch": 3.413871448464564, + "grad_norm": 0.004979231394827366, + "learning_rate": 5.529634802390026e-07, + "loss": 0.0256, + "num_input_tokens_seen": 94171536, + "step": 139740 + }, + { + "epoch": 3.413993599296411, + "grad_norm": 0.005769277922809124, + "learning_rate": 5.528871997123981e-07, + "loss": 0.0, + "num_input_tokens_seen": 94174608, + "step": 139745 + }, + { + "epoch": 3.4141157501282584, + "grad_norm": 0.0031776968389749527, + "learning_rate": 5.528109224373186e-07, + "loss": 0.0501, + "num_input_tokens_seen": 94177744, + "step": 139750 + }, + { + "epoch": 3.4142379009601056, + "grad_norm": 0.0034165666438639164, + "learning_rate": 5.52734648414318e-07, + "loss": 0.0, + "num_input_tokens_seen": 94181072, + "step": 139755 + }, + { + "epoch": 3.414360051791953, + "grad_norm": 0.0006008553318679333, + "learning_rate": 5.526583776439517e-07, + "loss": 0.0, + "num_input_tokens_seen": 94184656, + "step": 139760 + }, + { + "epoch": 3.4144822026238, + "grad_norm": 0.022039469331502914, + "learning_rate": 5.525821101267735e-07, + "loss": 0.0, + "num_input_tokens_seen": 94188496, + "step": 139765 + }, + { + "epoch": 3.414604353455647, + "grad_norm": 0.0023359765764325857, + "learning_rate": 5.525058458633391e-07, + "loss": 0.0, + "num_input_tokens_seen": 94191696, + "step": 139770 + }, + { + "epoch": 3.4147265042874944, + "grad_norm": 6.296150240814313e-05, + "learning_rate": 5.524295848542025e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94195344, + "step": 139775 + }, + { + "epoch": 3.4148486551193415, + "grad_norm": 0.0014544177101925015, + "learning_rate": 5.523533270999179e-07, + "loss": 0.0, + "num_input_tokens_seen": 94198800, + "step": 139780 + }, + { + "epoch": 3.4149708059511887, + "grad_norm": 0.019036872312426567, + "learning_rate": 5.522770726010404e-07, + "loss": 0.0, + "num_input_tokens_seen": 94202576, + "step": 139785 + }, + { + "epoch": 3.4150929567830355, + "grad_norm": 0.0024766286369413137, + "learning_rate": 5.522008213581249e-07, + "loss": 0.0, + "num_input_tokens_seen": 94205968, + "step": 139790 + }, + { + "epoch": 3.4152151076148827, + "grad_norm": 0.029975347220897675, + "learning_rate": 5.521245733717248e-07, + "loss": 0.0716, + "num_input_tokens_seen": 94209168, + "step": 139795 + }, + { + "epoch": 3.41533725844673, + "grad_norm": 0.0008781835786066949, + "learning_rate": 5.520483286423958e-07, + "loss": 0.0, + "num_input_tokens_seen": 94212688, + "step": 139800 + }, + { + "epoch": 3.415459409278577, + "grad_norm": 0.0005944212316535413, + "learning_rate": 5.519720871706916e-07, + "loss": 0.0002, + "num_input_tokens_seen": 94215696, + "step": 139805 + }, + { + "epoch": 3.4155815601104242, + "grad_norm": 0.024179916828870773, + "learning_rate": 5.51895848957167e-07, + "loss": 0.0, + "num_input_tokens_seen": 94219280, + "step": 139810 + }, + { + "epoch": 3.4157037109422714, + "grad_norm": 0.0005148306954652071, + "learning_rate": 5.518196140023761e-07, + "loss": 0.0002, + "num_input_tokens_seen": 94222928, + "step": 139815 + }, + { + "epoch": 3.4158258617741186, + "grad_norm": 0.017895027995109558, + "learning_rate": 5.517433823068736e-07, + "loss": 0.0, + "num_input_tokens_seen": 94226128, + "step": 139820 + }, + { + "epoch": 3.415948012605966, + "grad_norm": 0.00047818353050388396, + "learning_rate": 5.51667153871214e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94229200, + "step": 139825 + }, + { + "epoch": 3.416070163437813, + "grad_norm": 0.0006948122172616422, + "learning_rate": 5.51590928695951e-07, + "loss": 0.0, + "num_input_tokens_seen": 94232400, + "step": 139830 + }, + { + "epoch": 3.41619231426966, + "grad_norm": 0.06057262420654297, + "learning_rate": 5.515147067816399e-07, + "loss": 0.0002, + "num_input_tokens_seen": 94235920, + "step": 139835 + }, + { + "epoch": 3.4163144651015074, + "grad_norm": 0.0007530879229307175, + "learning_rate": 5.514384881288341e-07, + "loss": 0.0, + "num_input_tokens_seen": 94239568, + "step": 139840 + }, + { + "epoch": 3.4164366159333546, + "grad_norm": 0.003584317397326231, + "learning_rate": 5.513622727380887e-07, + "loss": 0.0, + "num_input_tokens_seen": 94242960, + "step": 139845 + }, + { + "epoch": 3.4165587667652018, + "grad_norm": 0.0016264979494735599, + "learning_rate": 5.51286060609957e-07, + "loss": 0.0, + "num_input_tokens_seen": 94245776, + "step": 139850 + }, + { + "epoch": 3.416680917597049, + "grad_norm": 0.00211779261007905, + "learning_rate": 5.512098517449938e-07, + "loss": 0.0, + "num_input_tokens_seen": 94249168, + "step": 139855 + }, + { + "epoch": 3.416803068428896, + "grad_norm": 0.8464714884757996, + "learning_rate": 5.511336461437536e-07, + "loss": 0.0003, + "num_input_tokens_seen": 94252496, + "step": 139860 + }, + { + "epoch": 3.4169252192607433, + "grad_norm": 0.00245960452593863, + "learning_rate": 5.510574438067904e-07, + "loss": 0.0, + "num_input_tokens_seen": 94256080, + "step": 139865 + }, + { + "epoch": 3.4170473700925905, + "grad_norm": 0.04314202442765236, + "learning_rate": 5.509812447346578e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94259792, + "step": 139870 + }, + { + "epoch": 3.4171695209244373, + "grad_norm": 0.060364704579114914, + "learning_rate": 5.509050489279107e-07, + "loss": 0.0, + "num_input_tokens_seen": 94262800, + "step": 139875 + }, + { + "epoch": 3.417291671756285, + "grad_norm": 0.0005380921647883952, + "learning_rate": 5.508288563871024e-07, + "loss": 0.0, + "num_input_tokens_seen": 94266064, + "step": 139880 + }, + { + "epoch": 3.4174138225881316, + "grad_norm": 0.0008233811822719872, + "learning_rate": 5.50752667112788e-07, + "loss": 0.0, + "num_input_tokens_seen": 94270288, + "step": 139885 + }, + { + "epoch": 3.417535973419979, + "grad_norm": 0.0003207654517609626, + "learning_rate": 5.506764811055206e-07, + "loss": 0.0, + "num_input_tokens_seen": 94273680, + "step": 139890 + }, + { + "epoch": 3.417658124251826, + "grad_norm": 0.004454872105270624, + "learning_rate": 5.506002983658551e-07, + "loss": 0.0, + "num_input_tokens_seen": 94276880, + "step": 139895 + }, + { + "epoch": 3.417780275083673, + "grad_norm": 0.0006709819426760077, + "learning_rate": 5.505241188943446e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94280464, + "step": 139900 + }, + { + "epoch": 3.4179024259155204, + "grad_norm": 0.0037196956109255552, + "learning_rate": 5.504479426915441e-07, + "loss": 0.0402, + "num_input_tokens_seen": 94283600, + "step": 139905 + }, + { + "epoch": 3.4180245767473676, + "grad_norm": 0.005503419786691666, + "learning_rate": 5.503717697580067e-07, + "loss": 0.0, + "num_input_tokens_seen": 94287248, + "step": 139910 + }, + { + "epoch": 3.418146727579215, + "grad_norm": 0.09051766246557236, + "learning_rate": 5.502956000942867e-07, + "loss": 0.0, + "num_input_tokens_seen": 94290320, + "step": 139915 + }, + { + "epoch": 3.418268878411062, + "grad_norm": 0.013865584507584572, + "learning_rate": 5.502194337009384e-07, + "loss": 0.1, + "num_input_tokens_seen": 94293328, + "step": 139920 + }, + { + "epoch": 3.418391029242909, + "grad_norm": 0.00011315821029711515, + "learning_rate": 5.501432705785149e-07, + "loss": 0.0, + "num_input_tokens_seen": 94298704, + "step": 139925 + }, + { + "epoch": 3.4185131800747564, + "grad_norm": 0.0008986890898086131, + "learning_rate": 5.500671107275712e-07, + "loss": 0.0, + "num_input_tokens_seen": 94301840, + "step": 139930 + }, + { + "epoch": 3.4186353309066035, + "grad_norm": 0.00023561430862173438, + "learning_rate": 5.499909541486598e-07, + "loss": 0.0, + "num_input_tokens_seen": 94304976, + "step": 139935 + }, + { + "epoch": 3.4187574817384507, + "grad_norm": 0.013146194629371166, + "learning_rate": 5.499148008423353e-07, + "loss": 0.0, + "num_input_tokens_seen": 94308368, + "step": 139940 + }, + { + "epoch": 3.418879632570298, + "grad_norm": 0.0015010988572612405, + "learning_rate": 5.49838650809152e-07, + "loss": 0.0, + "num_input_tokens_seen": 94311760, + "step": 139945 + }, + { + "epoch": 3.419001783402145, + "grad_norm": 0.012417850084602833, + "learning_rate": 5.497625040496625e-07, + "loss": 0.0, + "num_input_tokens_seen": 94314960, + "step": 139950 + }, + { + "epoch": 3.4191239342339923, + "grad_norm": 0.0008566909236833453, + "learning_rate": 5.496863605644215e-07, + "loss": 0.0, + "num_input_tokens_seen": 94318224, + "step": 139955 + }, + { + "epoch": 3.4192460850658395, + "grad_norm": 0.02170843631029129, + "learning_rate": 5.496102203539823e-07, + "loss": 0.0, + "num_input_tokens_seen": 94321872, + "step": 139960 + }, + { + "epoch": 3.4193682358976867, + "grad_norm": 0.006882964167743921, + "learning_rate": 5.495340834188988e-07, + "loss": 0.0475, + "num_input_tokens_seen": 94325200, + "step": 139965 + }, + { + "epoch": 3.4194903867295334, + "grad_norm": 0.002628026297315955, + "learning_rate": 5.494579497597245e-07, + "loss": 0.0, + "num_input_tokens_seen": 94328528, + "step": 139970 + }, + { + "epoch": 3.4196125375613806, + "grad_norm": 0.0007482045330107212, + "learning_rate": 5.493818193770134e-07, + "loss": 0.0, + "num_input_tokens_seen": 94331792, + "step": 139975 + }, + { + "epoch": 3.419734688393228, + "grad_norm": 0.014378773048520088, + "learning_rate": 5.493056922713189e-07, + "loss": 0.0365, + "num_input_tokens_seen": 94335760, + "step": 139980 + }, + { + "epoch": 3.419856839225075, + "grad_norm": 0.00155660230666399, + "learning_rate": 5.492295684431942e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94339088, + "step": 139985 + }, + { + "epoch": 3.419978990056922, + "grad_norm": 0.0016220946563407779, + "learning_rate": 5.491534478931939e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94342352, + "step": 139990 + }, + { + "epoch": 3.4201011408887694, + "grad_norm": 0.0007400620961561799, + "learning_rate": 5.490773306218705e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94345488, + "step": 139995 + }, + { + "epoch": 3.4202232917206166, + "grad_norm": 0.007975745014846325, + "learning_rate": 5.490012166297783e-07, + "loss": 0.0, + "num_input_tokens_seen": 94349136, + "step": 140000 + }, + { + "epoch": 3.4203454425524638, + "grad_norm": 0.016189241781830788, + "learning_rate": 5.489251059174705e-07, + "loss": 0.0, + "num_input_tokens_seen": 94352272, + "step": 140005 + }, + { + "epoch": 3.420467593384311, + "grad_norm": 0.00473784189671278, + "learning_rate": 5.488489984855002e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94355728, + "step": 140010 + }, + { + "epoch": 3.420589744216158, + "grad_norm": 0.004506468307226896, + "learning_rate": 5.487728943344221e-07, + "loss": 0.0, + "num_input_tokens_seen": 94359248, + "step": 140015 + }, + { + "epoch": 3.4207118950480053, + "grad_norm": 27.068483352661133, + "learning_rate": 5.486967934647884e-07, + "loss": 0.0799, + "num_input_tokens_seen": 94362256, + "step": 140020 + }, + { + "epoch": 3.4208340458798525, + "grad_norm": 0.00046828857739456, + "learning_rate": 5.486206958771534e-07, + "loss": 0.0, + "num_input_tokens_seen": 94366096, + "step": 140025 + }, + { + "epoch": 3.4209561967116997, + "grad_norm": 144.9590606689453, + "learning_rate": 5.485446015720695e-07, + "loss": 0.002, + "num_input_tokens_seen": 94369104, + "step": 140030 + }, + { + "epoch": 3.421078347543547, + "grad_norm": 0.0013556088088080287, + "learning_rate": 5.484685105500908e-07, + "loss": 0.0955, + "num_input_tokens_seen": 94372496, + "step": 140035 + }, + { + "epoch": 3.421200498375394, + "grad_norm": 0.009067345410585403, + "learning_rate": 5.48392422811771e-07, + "loss": 0.0, + "num_input_tokens_seen": 94375568, + "step": 140040 + }, + { + "epoch": 3.4213226492072413, + "grad_norm": 0.001369845005683601, + "learning_rate": 5.483163383576626e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94379216, + "step": 140045 + }, + { + "epoch": 3.4214448000390885, + "grad_norm": 0.003838911419734359, + "learning_rate": 5.482402571883196e-07, + "loss": 0.0, + "num_input_tokens_seen": 94382736, + "step": 140050 + }, + { + "epoch": 3.421566950870935, + "grad_norm": 0.02284904755651951, + "learning_rate": 5.481641793042945e-07, + "loss": 0.0, + "num_input_tokens_seen": 94386576, + "step": 140055 + }, + { + "epoch": 3.421689101702783, + "grad_norm": 0.004888457246124744, + "learning_rate": 5.480881047061415e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94390288, + "step": 140060 + }, + { + "epoch": 3.4218112525346296, + "grad_norm": 0.004341055639088154, + "learning_rate": 5.48012033394413e-07, + "loss": 0.0, + "num_input_tokens_seen": 94393680, + "step": 140065 + }, + { + "epoch": 3.421933403366477, + "grad_norm": 0.0021836755331605673, + "learning_rate": 5.47935965369663e-07, + "loss": 0.0, + "num_input_tokens_seen": 94397392, + "step": 140070 + }, + { + "epoch": 3.422055554198324, + "grad_norm": 0.0013760802103206515, + "learning_rate": 5.478599006324436e-07, + "loss": 0.0, + "num_input_tokens_seen": 94400720, + "step": 140075 + }, + { + "epoch": 3.422177705030171, + "grad_norm": 0.011365426704287529, + "learning_rate": 5.477838391833092e-07, + "loss": 0.0, + "num_input_tokens_seen": 94404048, + "step": 140080 + }, + { + "epoch": 3.4222998558620183, + "grad_norm": 0.002810131059959531, + "learning_rate": 5.477077810228123e-07, + "loss": 0.0, + "num_input_tokens_seen": 94407248, + "step": 140085 + }, + { + "epoch": 3.4224220066938655, + "grad_norm": 0.19500964879989624, + "learning_rate": 5.476317261515058e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94410576, + "step": 140090 + }, + { + "epoch": 3.4225441575257127, + "grad_norm": 0.019051941111683846, + "learning_rate": 5.475556745699433e-07, + "loss": 0.0007, + "num_input_tokens_seen": 94413776, + "step": 140095 + }, + { + "epoch": 3.42266630835756, + "grad_norm": 0.040146760642528534, + "learning_rate": 5.474796262786773e-07, + "loss": 0.0, + "num_input_tokens_seen": 94417232, + "step": 140100 + }, + { + "epoch": 3.422788459189407, + "grad_norm": 0.003175535937771201, + "learning_rate": 5.474035812782611e-07, + "loss": 0.0, + "num_input_tokens_seen": 94420176, + "step": 140105 + }, + { + "epoch": 3.4229106100212543, + "grad_norm": 0.003321741707623005, + "learning_rate": 5.473275395692483e-07, + "loss": 0.0, + "num_input_tokens_seen": 94423312, + "step": 140110 + }, + { + "epoch": 3.4230327608531015, + "grad_norm": 0.0006850691861473024, + "learning_rate": 5.472515011521908e-07, + "loss": 0.083, + "num_input_tokens_seen": 94426896, + "step": 140115 + }, + { + "epoch": 3.4231549116849487, + "grad_norm": 0.04458747059106827, + "learning_rate": 5.471754660276428e-07, + "loss": 0.0, + "num_input_tokens_seen": 94430352, + "step": 140120 + }, + { + "epoch": 3.423277062516796, + "grad_norm": 0.009945902973413467, + "learning_rate": 5.470994341961561e-07, + "loss": 0.0, + "num_input_tokens_seen": 94434128, + "step": 140125 + }, + { + "epoch": 3.423399213348643, + "grad_norm": 0.0006958251469768584, + "learning_rate": 5.47023405658284e-07, + "loss": 0.0548, + "num_input_tokens_seen": 94437200, + "step": 140130 + }, + { + "epoch": 3.4235213641804902, + "grad_norm": 0.006527234800159931, + "learning_rate": 5.469473804145801e-07, + "loss": 0.0, + "num_input_tokens_seen": 94440528, + "step": 140135 + }, + { + "epoch": 3.423643515012337, + "grad_norm": 0.0007295972318388522, + "learning_rate": 5.468713584655963e-07, + "loss": 0.0, + "num_input_tokens_seen": 94443792, + "step": 140140 + }, + { + "epoch": 3.4237656658441846, + "grad_norm": 0.0014944429276511073, + "learning_rate": 5.467953398118863e-07, + "loss": 0.0489, + "num_input_tokens_seen": 94446864, + "step": 140145 + }, + { + "epoch": 3.4238878166760314, + "grad_norm": 0.05061039701104164, + "learning_rate": 5.467193244540019e-07, + "loss": 0.031, + "num_input_tokens_seen": 94450192, + "step": 140150 + }, + { + "epoch": 3.4240099675078786, + "grad_norm": 0.20036552846431732, + "learning_rate": 5.466433123924969e-07, + "loss": 0.0366, + "num_input_tokens_seen": 94453968, + "step": 140155 + }, + { + "epoch": 3.4241321183397257, + "grad_norm": 0.0480385348200798, + "learning_rate": 5.465673036279235e-07, + "loss": 0.0, + "num_input_tokens_seen": 94457296, + "step": 140160 + }, + { + "epoch": 3.424254269171573, + "grad_norm": 0.008224071003496647, + "learning_rate": 5.464912981608345e-07, + "loss": 0.0, + "num_input_tokens_seen": 94460176, + "step": 140165 + }, + { + "epoch": 3.42437642000342, + "grad_norm": 0.00030735330074094236, + "learning_rate": 5.464152959917831e-07, + "loss": 0.0, + "num_input_tokens_seen": 94463184, + "step": 140170 + }, + { + "epoch": 3.4244985708352673, + "grad_norm": 0.1763540804386139, + "learning_rate": 5.463392971213218e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94466320, + "step": 140175 + }, + { + "epoch": 3.4246207216671145, + "grad_norm": 0.007921542041003704, + "learning_rate": 5.462633015500027e-07, + "loss": 0.0, + "num_input_tokens_seen": 94469392, + "step": 140180 + }, + { + "epoch": 3.4247428724989617, + "grad_norm": 0.0009218865307047963, + "learning_rate": 5.461873092783792e-07, + "loss": 0.027, + "num_input_tokens_seen": 94472656, + "step": 140185 + }, + { + "epoch": 3.424865023330809, + "grad_norm": 0.0015647134277969599, + "learning_rate": 5.461113203070033e-07, + "loss": 0.0, + "num_input_tokens_seen": 94476688, + "step": 140190 + }, + { + "epoch": 3.424987174162656, + "grad_norm": 2.1352298259735107, + "learning_rate": 5.460353346364284e-07, + "loss": 0.001, + "num_input_tokens_seen": 94479888, + "step": 140195 + }, + { + "epoch": 3.4251093249945033, + "grad_norm": 0.002515707165002823, + "learning_rate": 5.459593522672063e-07, + "loss": 0.0, + "num_input_tokens_seen": 94483152, + "step": 140200 + }, + { + "epoch": 3.4252314758263505, + "grad_norm": 0.002265633549541235, + "learning_rate": 5.458833731998902e-07, + "loss": 0.0, + "num_input_tokens_seen": 94486480, + "step": 140205 + }, + { + "epoch": 3.4253536266581976, + "grad_norm": 0.07212600111961365, + "learning_rate": 5.45807397435032e-07, + "loss": 0.0388, + "num_input_tokens_seen": 94489744, + "step": 140210 + }, + { + "epoch": 3.425475777490045, + "grad_norm": 0.00012162972416263074, + "learning_rate": 5.45731424973185e-07, + "loss": 0.0, + "num_input_tokens_seen": 94493072, + "step": 140215 + }, + { + "epoch": 3.425597928321892, + "grad_norm": 0.038783859461545944, + "learning_rate": 5.456554558149008e-07, + "loss": 0.0, + "num_input_tokens_seen": 94496400, + "step": 140220 + }, + { + "epoch": 3.425720079153739, + "grad_norm": 5.232390685705468e-05, + "learning_rate": 5.455794899607324e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94499920, + "step": 140225 + }, + { + "epoch": 3.4258422299855864, + "grad_norm": 8.634371624793857e-05, + "learning_rate": 5.455035274112325e-07, + "loss": 0.0644, + "num_input_tokens_seen": 94503888, + "step": 140230 + }, + { + "epoch": 3.425964380817433, + "grad_norm": 0.0008194191614165902, + "learning_rate": 5.454275681669529e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94507728, + "step": 140235 + }, + { + "epoch": 3.4260865316492803, + "grad_norm": 0.07005437463521957, + "learning_rate": 5.453516122284465e-07, + "loss": 0.0, + "num_input_tokens_seen": 94511184, + "step": 140240 + }, + { + "epoch": 3.4262086824811275, + "grad_norm": 0.011823393404483795, + "learning_rate": 5.452756595962653e-07, + "loss": 0.0, + "num_input_tokens_seen": 94514576, + "step": 140245 + }, + { + "epoch": 3.4263308333129747, + "grad_norm": 0.0013865167275071144, + "learning_rate": 5.45199710270962e-07, + "loss": 0.0, + "num_input_tokens_seen": 94517840, + "step": 140250 + }, + { + "epoch": 3.426452984144822, + "grad_norm": 0.0023665507324039936, + "learning_rate": 5.451237642530884e-07, + "loss": 0.0, + "num_input_tokens_seen": 94521040, + "step": 140255 + }, + { + "epoch": 3.426575134976669, + "grad_norm": 0.0009652891894802451, + "learning_rate": 5.450478215431973e-07, + "loss": 0.0, + "num_input_tokens_seen": 94524368, + "step": 140260 + }, + { + "epoch": 3.4266972858085163, + "grad_norm": 0.00044628497562371194, + "learning_rate": 5.44971882141841e-07, + "loss": 0.0576, + "num_input_tokens_seen": 94527632, + "step": 140265 + }, + { + "epoch": 3.4268194366403635, + "grad_norm": 0.022095222026109695, + "learning_rate": 5.448959460495712e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94531024, + "step": 140270 + }, + { + "epoch": 3.4269415874722107, + "grad_norm": 0.0030793990008533, + "learning_rate": 5.44820013266941e-07, + "loss": 0.0, + "num_input_tokens_seen": 94534032, + "step": 140275 + }, + { + "epoch": 3.427063738304058, + "grad_norm": 0.00028188584838062525, + "learning_rate": 5.447440837945015e-07, + "loss": 0.0, + "num_input_tokens_seen": 94537680, + "step": 140280 + }, + { + "epoch": 3.427185889135905, + "grad_norm": 0.0007988949655555189, + "learning_rate": 5.446681576328061e-07, + "loss": 0.0002, + "num_input_tokens_seen": 94540624, + "step": 140285 + }, + { + "epoch": 3.4273080399677522, + "grad_norm": 0.0007177051738835871, + "learning_rate": 5.445922347824062e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94543952, + "step": 140290 + }, + { + "epoch": 3.4274301907995994, + "grad_norm": 0.001201879233121872, + "learning_rate": 5.445163152438535e-07, + "loss": 0.0, + "num_input_tokens_seen": 94547344, + "step": 140295 + }, + { + "epoch": 3.4275523416314466, + "grad_norm": 0.008907120674848557, + "learning_rate": 5.444403990177013e-07, + "loss": 0.047, + "num_input_tokens_seen": 94550928, + "step": 140300 + }, + { + "epoch": 3.427674492463294, + "grad_norm": 0.001095519750379026, + "learning_rate": 5.443644861045006e-07, + "loss": 0.0, + "num_input_tokens_seen": 94554128, + "step": 140305 + }, + { + "epoch": 3.427796643295141, + "grad_norm": 0.1418331414461136, + "learning_rate": 5.442885765048042e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94557584, + "step": 140310 + }, + { + "epoch": 3.427918794126988, + "grad_norm": 0.022201886400580406, + "learning_rate": 5.442126702191637e-07, + "loss": 0.0, + "num_input_tokens_seen": 94560720, + "step": 140315 + }, + { + "epoch": 3.428040944958835, + "grad_norm": 0.0010307944612577558, + "learning_rate": 5.44136767248131e-07, + "loss": 0.0325, + "num_input_tokens_seen": 94564176, + "step": 140320 + }, + { + "epoch": 3.4281630957906826, + "grad_norm": 0.00235103745944798, + "learning_rate": 5.440608675922589e-07, + "loss": 0.0366, + "num_input_tokens_seen": 94567568, + "step": 140325 + }, + { + "epoch": 3.4282852466225293, + "grad_norm": 0.019659819081425667, + "learning_rate": 5.439849712520983e-07, + "loss": 0.0, + "num_input_tokens_seen": 94570896, + "step": 140330 + }, + { + "epoch": 3.4284073974543765, + "grad_norm": 0.002396960277110338, + "learning_rate": 5.439090782282021e-07, + "loss": 0.0, + "num_input_tokens_seen": 94574736, + "step": 140335 + }, + { + "epoch": 3.4285295482862237, + "grad_norm": 0.003646480618044734, + "learning_rate": 5.438331885211212e-07, + "loss": 0.0, + "num_input_tokens_seen": 94577744, + "step": 140340 + }, + { + "epoch": 3.428651699118071, + "grad_norm": 0.0033332430757582188, + "learning_rate": 5.437573021314083e-07, + "loss": 0.0569, + "num_input_tokens_seen": 94580880, + "step": 140345 + }, + { + "epoch": 3.428773849949918, + "grad_norm": 0.0023963816929608583, + "learning_rate": 5.436814190596153e-07, + "loss": 0.0248, + "num_input_tokens_seen": 94584080, + "step": 140350 + }, + { + "epoch": 3.4288960007817653, + "grad_norm": 0.0009214163874275982, + "learning_rate": 5.436055393062933e-07, + "loss": 0.0, + "num_input_tokens_seen": 94587536, + "step": 140355 + }, + { + "epoch": 3.4290181516136125, + "grad_norm": 37.51756286621094, + "learning_rate": 5.43529662871995e-07, + "loss": 0.0017, + "num_input_tokens_seen": 94590928, + "step": 140360 + }, + { + "epoch": 3.4291403024454596, + "grad_norm": 0.0013852888951078057, + "learning_rate": 5.434537897572713e-07, + "loss": 0.0, + "num_input_tokens_seen": 94594320, + "step": 140365 + }, + { + "epoch": 3.429262453277307, + "grad_norm": 0.010237696580588818, + "learning_rate": 5.43377919962675e-07, + "loss": 0.0, + "num_input_tokens_seen": 94597776, + "step": 140370 + }, + { + "epoch": 3.429384604109154, + "grad_norm": 0.0037443661130964756, + "learning_rate": 5.433020534887568e-07, + "loss": 0.043, + "num_input_tokens_seen": 94601424, + "step": 140375 + }, + { + "epoch": 3.429506754941001, + "grad_norm": 0.07455271482467651, + "learning_rate": 5.432261903360693e-07, + "loss": 0.0, + "num_input_tokens_seen": 94604624, + "step": 140380 + }, + { + "epoch": 3.4296289057728484, + "grad_norm": 0.002680698409676552, + "learning_rate": 5.431503305051634e-07, + "loss": 0.0, + "num_input_tokens_seen": 94607760, + "step": 140385 + }, + { + "epoch": 3.4297510566046956, + "grad_norm": 0.06336840987205505, + "learning_rate": 5.430744739965915e-07, + "loss": 0.0572, + "num_input_tokens_seen": 94611152, + "step": 140390 + }, + { + "epoch": 3.429873207436543, + "grad_norm": 0.0008052958874031901, + "learning_rate": 5.429986208109052e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94614544, + "step": 140395 + }, + { + "epoch": 3.42999535826839, + "grad_norm": 0.02269018441438675, + "learning_rate": 5.429227709486552e-07, + "loss": 0.0, + "num_input_tokens_seen": 94618448, + "step": 140400 + }, + { + "epoch": 3.430117509100237, + "grad_norm": 0.00405275821685791, + "learning_rate": 5.428469244103941e-07, + "loss": 0.0549, + "num_input_tokens_seen": 94621840, + "step": 140405 + }, + { + "epoch": 3.4302396599320844, + "grad_norm": 0.6425538659095764, + "learning_rate": 5.427710811966729e-07, + "loss": 0.0004, + "num_input_tokens_seen": 94625488, + "step": 140410 + }, + { + "epoch": 3.430361810763931, + "grad_norm": 0.001985025592148304, + "learning_rate": 5.426952413080433e-07, + "loss": 0.0058, + "num_input_tokens_seen": 94629264, + "step": 140415 + }, + { + "epoch": 3.4304839615957783, + "grad_norm": 16.643945693969727, + "learning_rate": 5.426194047450574e-07, + "loss": 0.0299, + "num_input_tokens_seen": 94633104, + "step": 140420 + }, + { + "epoch": 3.4306061124276255, + "grad_norm": 0.11192969977855682, + "learning_rate": 5.425435715082657e-07, + "loss": 0.0656, + "num_input_tokens_seen": 94636176, + "step": 140425 + }, + { + "epoch": 3.4307282632594727, + "grad_norm": 0.03374035283923149, + "learning_rate": 5.424677415982206e-07, + "loss": 0.0, + "num_input_tokens_seen": 94639632, + "step": 140430 + }, + { + "epoch": 3.43085041409132, + "grad_norm": 0.011221355758607388, + "learning_rate": 5.423919150154727e-07, + "loss": 0.0444, + "num_input_tokens_seen": 94642896, + "step": 140435 + }, + { + "epoch": 3.430972564923167, + "grad_norm": 0.000732203247025609, + "learning_rate": 5.42316091760574e-07, + "loss": 0.0422, + "num_input_tokens_seen": 94646480, + "step": 140440 + }, + { + "epoch": 3.4310947157550142, + "grad_norm": 0.1238001212477684, + "learning_rate": 5.42240271834076e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94650128, + "step": 140445 + }, + { + "epoch": 3.4312168665868614, + "grad_norm": 0.01326525118201971, + "learning_rate": 5.421644552365296e-07, + "loss": 0.0, + "num_input_tokens_seen": 94653776, + "step": 140450 + }, + { + "epoch": 3.4313390174187086, + "grad_norm": 0.011391034349799156, + "learning_rate": 5.420886419684869e-07, + "loss": 0.0, + "num_input_tokens_seen": 94656976, + "step": 140455 + }, + { + "epoch": 3.431461168250556, + "grad_norm": 0.00035396870225667953, + "learning_rate": 5.420128320304982e-07, + "loss": 0.0, + "num_input_tokens_seen": 94660304, + "step": 140460 + }, + { + "epoch": 3.431583319082403, + "grad_norm": 0.0002599718573037535, + "learning_rate": 5.419370254231159e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94663632, + "step": 140465 + }, + { + "epoch": 3.43170546991425, + "grad_norm": 0.07461328059434891, + "learning_rate": 5.418612221468902e-07, + "loss": 0.0, + "num_input_tokens_seen": 94667152, + "step": 140470 + }, + { + "epoch": 3.4318276207460974, + "grad_norm": 0.12314751744270325, + "learning_rate": 5.41785422202373e-07, + "loss": 0.0, + "num_input_tokens_seen": 94670544, + "step": 140475 + }, + { + "epoch": 3.4319497715779446, + "grad_norm": 0.004574818070977926, + "learning_rate": 5.417096255901159e-07, + "loss": 0.1119, + "num_input_tokens_seen": 94674064, + "step": 140480 + }, + { + "epoch": 3.4320719224097918, + "grad_norm": 0.005468493793159723, + "learning_rate": 5.416338323106697e-07, + "loss": 0.0, + "num_input_tokens_seen": 94677584, + "step": 140485 + }, + { + "epoch": 3.432194073241639, + "grad_norm": 0.02408468723297119, + "learning_rate": 5.41558042364585e-07, + "loss": 0.0004, + "num_input_tokens_seen": 94680784, + "step": 140490 + }, + { + "epoch": 3.432316224073486, + "grad_norm": 0.009744217619299889, + "learning_rate": 5.41482255752414e-07, + "loss": 0.1002, + "num_input_tokens_seen": 94683920, + "step": 140495 + }, + { + "epoch": 3.432438374905333, + "grad_norm": 0.6703230738639832, + "learning_rate": 5.414064724747069e-07, + "loss": 0.0002, + "num_input_tokens_seen": 94686800, + "step": 140500 + }, + { + "epoch": 3.4325605257371805, + "grad_norm": 0.005938725546002388, + "learning_rate": 5.413306925320158e-07, + "loss": 0.0, + "num_input_tokens_seen": 94690768, + "step": 140505 + }, + { + "epoch": 3.4326826765690273, + "grad_norm": 0.09531354159116745, + "learning_rate": 5.412549159248909e-07, + "loss": 0.0006, + "num_input_tokens_seen": 94693776, + "step": 140510 + }, + { + "epoch": 3.4328048274008744, + "grad_norm": 0.0047434065490961075, + "learning_rate": 5.411791426538839e-07, + "loss": 0.0, + "num_input_tokens_seen": 94697488, + "step": 140515 + }, + { + "epoch": 3.4329269782327216, + "grad_norm": 0.0113271065056324, + "learning_rate": 5.411033727195453e-07, + "loss": 0.0203, + "num_input_tokens_seen": 94700688, + "step": 140520 + }, + { + "epoch": 3.433049129064569, + "grad_norm": 0.0006195983733050525, + "learning_rate": 5.410276061224267e-07, + "loss": 0.0, + "num_input_tokens_seen": 94704336, + "step": 140525 + }, + { + "epoch": 3.433171279896416, + "grad_norm": 0.05286652222275734, + "learning_rate": 5.409518428630785e-07, + "loss": 0.0, + "num_input_tokens_seen": 94707600, + "step": 140530 + }, + { + "epoch": 3.433293430728263, + "grad_norm": 0.00045306552783586085, + "learning_rate": 5.408760829420519e-07, + "loss": 0.0, + "num_input_tokens_seen": 94710864, + "step": 140535 + }, + { + "epoch": 3.4334155815601104, + "grad_norm": 0.06464538723230362, + "learning_rate": 5.408003263598984e-07, + "loss": 0.0, + "num_input_tokens_seen": 94714128, + "step": 140540 + }, + { + "epoch": 3.4335377323919576, + "grad_norm": 0.0013601994141936302, + "learning_rate": 5.407245731171679e-07, + "loss": 0.0, + "num_input_tokens_seen": 94717264, + "step": 140545 + }, + { + "epoch": 3.4336598832238048, + "grad_norm": 0.006994623225182295, + "learning_rate": 5.406488232144122e-07, + "loss": 0.0, + "num_input_tokens_seen": 94720848, + "step": 140550 + }, + { + "epoch": 3.433782034055652, + "grad_norm": 26.440235137939453, + "learning_rate": 5.405730766521815e-07, + "loss": 0.0694, + "num_input_tokens_seen": 94724368, + "step": 140555 + }, + { + "epoch": 3.433904184887499, + "grad_norm": 0.23649762570858002, + "learning_rate": 5.404973334310274e-07, + "loss": 0.0606, + "num_input_tokens_seen": 94727824, + "step": 140560 + }, + { + "epoch": 3.4340263357193463, + "grad_norm": 0.01221111137419939, + "learning_rate": 5.404215935514999e-07, + "loss": 0.0, + "num_input_tokens_seen": 94730960, + "step": 140565 + }, + { + "epoch": 3.4341484865511935, + "grad_norm": 0.0008569937199354172, + "learning_rate": 5.403458570141502e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94734224, + "step": 140570 + }, + { + "epoch": 3.4342706373830407, + "grad_norm": 0.002068551490083337, + "learning_rate": 5.402701238195293e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94737488, + "step": 140575 + }, + { + "epoch": 3.434392788214888, + "grad_norm": 0.018063923344016075, + "learning_rate": 5.401943939681875e-07, + "loss": 0.0, + "num_input_tokens_seen": 94741008, + "step": 140580 + }, + { + "epoch": 3.4345149390467347, + "grad_norm": 0.019212810322642326, + "learning_rate": 5.401186674606759e-07, + "loss": 0.0004, + "num_input_tokens_seen": 94744400, + "step": 140585 + }, + { + "epoch": 3.4346370898785823, + "grad_norm": 0.002542370930314064, + "learning_rate": 5.400429442975448e-07, + "loss": 0.0186, + "num_input_tokens_seen": 94747984, + "step": 140590 + }, + { + "epoch": 3.434759240710429, + "grad_norm": 0.016300778836011887, + "learning_rate": 5.399672244793455e-07, + "loss": 0.0, + "num_input_tokens_seen": 94751568, + "step": 140595 + }, + { + "epoch": 3.4348813915422762, + "grad_norm": 0.019658824428915977, + "learning_rate": 5.398915080066283e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94754832, + "step": 140600 + }, + { + "epoch": 3.4350035423741234, + "grad_norm": 0.014098125509917736, + "learning_rate": 5.398157948799434e-07, + "loss": 0.0, + "num_input_tokens_seen": 94758032, + "step": 140605 + }, + { + "epoch": 3.4351256932059706, + "grad_norm": 0.007338009774684906, + "learning_rate": 5.397400850998421e-07, + "loss": 0.0187, + "num_input_tokens_seen": 94761552, + "step": 140610 + }, + { + "epoch": 3.435247844037818, + "grad_norm": 0.0005641011521220207, + "learning_rate": 5.396643786668744e-07, + "loss": 0.0, + "num_input_tokens_seen": 94764752, + "step": 140615 + }, + { + "epoch": 3.435369994869665, + "grad_norm": 0.03765435144305229, + "learning_rate": 5.395886755815918e-07, + "loss": 0.0, + "num_input_tokens_seen": 94768272, + "step": 140620 + }, + { + "epoch": 3.435492145701512, + "grad_norm": 0.013260685838758945, + "learning_rate": 5.395129758445433e-07, + "loss": 0.0, + "num_input_tokens_seen": 94771664, + "step": 140625 + }, + { + "epoch": 3.4356142965333594, + "grad_norm": 0.004746972117573023, + "learning_rate": 5.394372794562805e-07, + "loss": 0.0005, + "num_input_tokens_seen": 94774928, + "step": 140630 + }, + { + "epoch": 3.4357364473652066, + "grad_norm": 0.0005900778924115002, + "learning_rate": 5.393615864173542e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94778384, + "step": 140635 + }, + { + "epoch": 3.4358585981970537, + "grad_norm": 0.0015556630678474903, + "learning_rate": 5.392858967283138e-07, + "loss": 0.0, + "num_input_tokens_seen": 94782032, + "step": 140640 + }, + { + "epoch": 3.435980749028901, + "grad_norm": 0.00017046624270733446, + "learning_rate": 5.392102103897108e-07, + "loss": 0.0006, + "num_input_tokens_seen": 94785488, + "step": 140645 + }, + { + "epoch": 3.436102899860748, + "grad_norm": 0.0002703327627386898, + "learning_rate": 5.391345274020946e-07, + "loss": 0.0, + "num_input_tokens_seen": 94788752, + "step": 140650 + }, + { + "epoch": 3.4362250506925953, + "grad_norm": 0.002478924347087741, + "learning_rate": 5.390588477660161e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94792016, + "step": 140655 + }, + { + "epoch": 3.4363472015244425, + "grad_norm": 0.01701575517654419, + "learning_rate": 5.389831714820261e-07, + "loss": 0.0, + "num_input_tokens_seen": 94794960, + "step": 140660 + }, + { + "epoch": 3.4364693523562897, + "grad_norm": 0.021169807761907578, + "learning_rate": 5.389074985506739e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94798032, + "step": 140665 + }, + { + "epoch": 3.436591503188137, + "grad_norm": 0.001484877779148519, + "learning_rate": 5.38831828972511e-07, + "loss": 0.0, + "num_input_tokens_seen": 94801232, + "step": 140670 + }, + { + "epoch": 3.436713654019984, + "grad_norm": 0.6949237585067749, + "learning_rate": 5.387561627480865e-07, + "loss": 0.0003, + "num_input_tokens_seen": 94804880, + "step": 140675 + }, + { + "epoch": 3.436835804851831, + "grad_norm": 0.0015133284032344818, + "learning_rate": 5.386804998779517e-07, + "loss": 0.0421, + "num_input_tokens_seen": 94808336, + "step": 140680 + }, + { + "epoch": 3.436957955683678, + "grad_norm": 0.25915995240211487, + "learning_rate": 5.386048403626561e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94811600, + "step": 140685 + }, + { + "epoch": 3.437080106515525, + "grad_norm": 0.009517625905573368, + "learning_rate": 5.385291842027505e-07, + "loss": 0.0, + "num_input_tokens_seen": 94815248, + "step": 140690 + }, + { + "epoch": 3.4372022573473724, + "grad_norm": 0.011960485950112343, + "learning_rate": 5.384535313987844e-07, + "loss": 0.0, + "num_input_tokens_seen": 94818704, + "step": 140695 + }, + { + "epoch": 3.4373244081792196, + "grad_norm": 0.0006022618617862463, + "learning_rate": 5.383778819513088e-07, + "loss": 0.0, + "num_input_tokens_seen": 94821776, + "step": 140700 + }, + { + "epoch": 3.4374465590110668, + "grad_norm": 5.928589962422848e-05, + "learning_rate": 5.383022358608733e-07, + "loss": 0.0, + "num_input_tokens_seen": 94825680, + "step": 140705 + }, + { + "epoch": 3.437568709842914, + "grad_norm": 0.0023734017740935087, + "learning_rate": 5.382265931280279e-07, + "loss": 0.0003, + "num_input_tokens_seen": 94829328, + "step": 140710 + }, + { + "epoch": 3.437690860674761, + "grad_norm": 0.00043151769204996526, + "learning_rate": 5.381509537533231e-07, + "loss": 0.0, + "num_input_tokens_seen": 94832464, + "step": 140715 + }, + { + "epoch": 3.4378130115066083, + "grad_norm": 0.0004412507696542889, + "learning_rate": 5.380753177373085e-07, + "loss": 0.0002, + "num_input_tokens_seen": 94835408, + "step": 140720 + }, + { + "epoch": 3.4379351623384555, + "grad_norm": 0.002872526179999113, + "learning_rate": 5.379996850805344e-07, + "loss": 0.0, + "num_input_tokens_seen": 94838416, + "step": 140725 + }, + { + "epoch": 3.4380573131703027, + "grad_norm": 0.022230220958590508, + "learning_rate": 5.379240557835514e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94842000, + "step": 140730 + }, + { + "epoch": 3.43817946400215, + "grad_norm": 0.003806622000411153, + "learning_rate": 5.378484298469084e-07, + "loss": 0.0, + "num_input_tokens_seen": 94845008, + "step": 140735 + }, + { + "epoch": 3.438301614833997, + "grad_norm": 0.0002141565055353567, + "learning_rate": 5.377728072711563e-07, + "loss": 0.0, + "num_input_tokens_seen": 94848464, + "step": 140740 + }, + { + "epoch": 3.4384237656658443, + "grad_norm": 0.0003093667037319392, + "learning_rate": 5.376971880568444e-07, + "loss": 0.0371, + "num_input_tokens_seen": 94851984, + "step": 140745 + }, + { + "epoch": 3.4385459164976915, + "grad_norm": 0.04661043733358383, + "learning_rate": 5.376215722045227e-07, + "loss": 0.0, + "num_input_tokens_seen": 94855120, + "step": 140750 + }, + { + "epoch": 3.4386680673295387, + "grad_norm": 0.0001445067609893158, + "learning_rate": 5.375459597147419e-07, + "loss": 0.0002, + "num_input_tokens_seen": 94858704, + "step": 140755 + }, + { + "epoch": 3.438790218161386, + "grad_norm": 0.006317912135273218, + "learning_rate": 5.374703505880507e-07, + "loss": 0.0, + "num_input_tokens_seen": 94861904, + "step": 140760 + }, + { + "epoch": 3.4389123689932326, + "grad_norm": 0.009212841279804707, + "learning_rate": 5.373947448250001e-07, + "loss": 0.039, + "num_input_tokens_seen": 94865296, + "step": 140765 + }, + { + "epoch": 3.4390345198250802, + "grad_norm": 0.02927383780479431, + "learning_rate": 5.373191424261388e-07, + "loss": 0.0, + "num_input_tokens_seen": 94868624, + "step": 140770 + }, + { + "epoch": 3.439156670656927, + "grad_norm": 0.009841068647801876, + "learning_rate": 5.372435433920175e-07, + "loss": 0.0, + "num_input_tokens_seen": 94871824, + "step": 140775 + }, + { + "epoch": 3.439278821488774, + "grad_norm": 0.0017802307847887278, + "learning_rate": 5.371679477231852e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94875344, + "step": 140780 + }, + { + "epoch": 3.4394009723206214, + "grad_norm": 0.0039372132159769535, + "learning_rate": 5.370923554201923e-07, + "loss": 0.0, + "num_input_tokens_seen": 94879120, + "step": 140785 + }, + { + "epoch": 3.4395231231524686, + "grad_norm": 0.00011069678294006735, + "learning_rate": 5.370167664835885e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94882000, + "step": 140790 + }, + { + "epoch": 3.4396452739843157, + "grad_norm": 0.018852299079298973, + "learning_rate": 5.369411809139232e-07, + "loss": 0.005, + "num_input_tokens_seen": 94886160, + "step": 140795 + }, + { + "epoch": 3.439767424816163, + "grad_norm": 0.0011196270352229476, + "learning_rate": 5.36865598711746e-07, + "loss": 0.0, + "num_input_tokens_seen": 94889552, + "step": 140800 + }, + { + "epoch": 3.43988957564801, + "grad_norm": 8.786971739027649e-05, + "learning_rate": 5.367900198776072e-07, + "loss": 0.0011, + "num_input_tokens_seen": 94893328, + "step": 140805 + }, + { + "epoch": 3.4400117264798573, + "grad_norm": 0.0004634481738321483, + "learning_rate": 5.367144444120553e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94896528, + "step": 140810 + }, + { + "epoch": 3.4401338773117045, + "grad_norm": 0.0002953216608148068, + "learning_rate": 5.366388723156412e-07, + "loss": 0.0, + "num_input_tokens_seen": 94900048, + "step": 140815 + }, + { + "epoch": 3.4402560281435517, + "grad_norm": 0.004183363169431686, + "learning_rate": 5.365633035889135e-07, + "loss": 0.0685, + "num_input_tokens_seen": 94903056, + "step": 140820 + }, + { + "epoch": 3.440378178975399, + "grad_norm": 0.011689604260027409, + "learning_rate": 5.364877382324222e-07, + "loss": 0.0276, + "num_input_tokens_seen": 94906320, + "step": 140825 + }, + { + "epoch": 3.440500329807246, + "grad_norm": 0.005056587513536215, + "learning_rate": 5.364121762467165e-07, + "loss": 0.0, + "num_input_tokens_seen": 94909840, + "step": 140830 + }, + { + "epoch": 3.4406224806390933, + "grad_norm": 347.3509216308594, + "learning_rate": 5.363366176323465e-07, + "loss": 0.0737, + "num_input_tokens_seen": 94913360, + "step": 140835 + }, + { + "epoch": 3.4407446314709405, + "grad_norm": 0.0028221230022609234, + "learning_rate": 5.362610623898612e-07, + "loss": 0.0, + "num_input_tokens_seen": 94916816, + "step": 140840 + }, + { + "epoch": 3.4408667823027876, + "grad_norm": 0.004818981513381004, + "learning_rate": 5.361855105198098e-07, + "loss": 0.0, + "num_input_tokens_seen": 94920336, + "step": 140845 + }, + { + "epoch": 3.440988933134635, + "grad_norm": 0.000316784338792786, + "learning_rate": 5.361099620227427e-07, + "loss": 0.0279, + "num_input_tokens_seen": 94923408, + "step": 140850 + }, + { + "epoch": 3.441111083966482, + "grad_norm": 0.0016813983675092459, + "learning_rate": 5.360344168992083e-07, + "loss": 0.0, + "num_input_tokens_seen": 94926416, + "step": 140855 + }, + { + "epoch": 3.4412332347983288, + "grad_norm": 27.457794189453125, + "learning_rate": 5.359588751497568e-07, + "loss": 0.095, + "num_input_tokens_seen": 94929744, + "step": 140860 + }, + { + "epoch": 3.441355385630176, + "grad_norm": 0.001600230229087174, + "learning_rate": 5.358833367749368e-07, + "loss": 0.0, + "num_input_tokens_seen": 94933072, + "step": 140865 + }, + { + "epoch": 3.441477536462023, + "grad_norm": 0.035153310745954514, + "learning_rate": 5.358078017752984e-07, + "loss": 0.0, + "num_input_tokens_seen": 94936912, + "step": 140870 + }, + { + "epoch": 3.4415996872938703, + "grad_norm": 0.0017619016580283642, + "learning_rate": 5.357322701513901e-07, + "loss": 0.0, + "num_input_tokens_seen": 94940752, + "step": 140875 + }, + { + "epoch": 3.4417218381257175, + "grad_norm": 0.002070831134915352, + "learning_rate": 5.356567419037616e-07, + "loss": 0.0002, + "num_input_tokens_seen": 94944080, + "step": 140880 + }, + { + "epoch": 3.4418439889575647, + "grad_norm": 0.0022519580088555813, + "learning_rate": 5.355812170329626e-07, + "loss": 0.0, + "num_input_tokens_seen": 94947344, + "step": 140885 + }, + { + "epoch": 3.441966139789412, + "grad_norm": 1.9227795600891113, + "learning_rate": 5.355056955395415e-07, + "loss": 0.0006, + "num_input_tokens_seen": 94950544, + "step": 140890 + }, + { + "epoch": 3.442088290621259, + "grad_norm": 0.0021642029751092196, + "learning_rate": 5.354301774240483e-07, + "loss": 0.0, + "num_input_tokens_seen": 94953488, + "step": 140895 + }, + { + "epoch": 3.4422104414531063, + "grad_norm": 0.000459485687315464, + "learning_rate": 5.353546626870313e-07, + "loss": 0.0, + "num_input_tokens_seen": 94956496, + "step": 140900 + }, + { + "epoch": 3.4423325922849535, + "grad_norm": 0.0010341558372601867, + "learning_rate": 5.352791513290408e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94959952, + "step": 140905 + }, + { + "epoch": 3.4424547431168007, + "grad_norm": 0.0008501124102622271, + "learning_rate": 5.352036433506251e-07, + "loss": 0.0, + "num_input_tokens_seen": 94963600, + "step": 140910 + }, + { + "epoch": 3.442576893948648, + "grad_norm": 0.0004239061672706157, + "learning_rate": 5.351281387523332e-07, + "loss": 0.0, + "num_input_tokens_seen": 94967056, + "step": 140915 + }, + { + "epoch": 3.442699044780495, + "grad_norm": 0.0022695395164191723, + "learning_rate": 5.35052637534715e-07, + "loss": 0.0, + "num_input_tokens_seen": 94970064, + "step": 140920 + }, + { + "epoch": 3.4428211956123422, + "grad_norm": 5.309248808771372e-05, + "learning_rate": 5.349771396983185e-07, + "loss": 0.0, + "num_input_tokens_seen": 94973392, + "step": 140925 + }, + { + "epoch": 3.4429433464441894, + "grad_norm": 0.004750723950564861, + "learning_rate": 5.349016452436938e-07, + "loss": 0.0001, + "num_input_tokens_seen": 94976656, + "step": 140930 + }, + { + "epoch": 3.4430654972760366, + "grad_norm": 0.0031600017100572586, + "learning_rate": 5.348261541713891e-07, + "loss": 0.0, + "num_input_tokens_seen": 94979728, + "step": 140935 + }, + { + "epoch": 3.443187648107884, + "grad_norm": 0.000454644177807495, + "learning_rate": 5.347506664819536e-07, + "loss": 0.0, + "num_input_tokens_seen": 94983056, + "step": 140940 + }, + { + "epoch": 3.4433097989397305, + "grad_norm": 166.5770263671875, + "learning_rate": 5.346751821759369e-07, + "loss": 0.043, + "num_input_tokens_seen": 94986832, + "step": 140945 + }, + { + "epoch": 3.443431949771578, + "grad_norm": 0.05186281353235245, + "learning_rate": 5.34599701253887e-07, + "loss": 0.0, + "num_input_tokens_seen": 94990096, + "step": 140950 + }, + { + "epoch": 3.443554100603425, + "grad_norm": 0.004760414827615023, + "learning_rate": 5.345242237163537e-07, + "loss": 0.0, + "num_input_tokens_seen": 94993552, + "step": 140955 + }, + { + "epoch": 3.443676251435272, + "grad_norm": 0.002913939068093896, + "learning_rate": 5.34448749563885e-07, + "loss": 0.0, + "num_input_tokens_seen": 94997584, + "step": 140960 + }, + { + "epoch": 3.4437984022671193, + "grad_norm": 0.001271998044103384, + "learning_rate": 5.343732787970305e-07, + "loss": 0.0, + "num_input_tokens_seen": 95000912, + "step": 140965 + }, + { + "epoch": 3.4439205530989665, + "grad_norm": 0.001133499899879098, + "learning_rate": 5.342978114163384e-07, + "loss": 0.0001, + "num_input_tokens_seen": 95004240, + "step": 140970 + }, + { + "epoch": 3.4440427039308137, + "grad_norm": 130.31053161621094, + "learning_rate": 5.342223474223579e-07, + "loss": 0.0204, + "num_input_tokens_seen": 95007952, + "step": 140975 + }, + { + "epoch": 3.444164854762661, + "grad_norm": 0.009950289502739906, + "learning_rate": 5.341468868156382e-07, + "loss": 0.081, + "num_input_tokens_seen": 95011152, + "step": 140980 + }, + { + "epoch": 3.444287005594508, + "grad_norm": 0.006339088547974825, + "learning_rate": 5.340714295967272e-07, + "loss": 0.0001, + "num_input_tokens_seen": 95014480, + "step": 140985 + }, + { + "epoch": 3.4444091564263553, + "grad_norm": 45.47343826293945, + "learning_rate": 5.339959757661745e-07, + "loss": 0.0439, + "num_input_tokens_seen": 95018000, + "step": 140990 + }, + { + "epoch": 3.4445313072582024, + "grad_norm": 0.000821881287265569, + "learning_rate": 5.339205253245281e-07, + "loss": 0.0, + "num_input_tokens_seen": 95021264, + "step": 140995 + }, + { + "epoch": 3.4446534580900496, + "grad_norm": 0.2967153787612915, + "learning_rate": 5.338450782723373e-07, + "loss": 0.0001, + "num_input_tokens_seen": 95024528, + "step": 141000 + }, + { + "epoch": 3.444775608921897, + "grad_norm": 0.0012332163751125336, + "learning_rate": 5.337696346101502e-07, + "loss": 0.0001, + "num_input_tokens_seen": 95028176, + "step": 141005 + }, + { + "epoch": 3.444897759753744, + "grad_norm": 0.08027886599302292, + "learning_rate": 5.336941943385158e-07, + "loss": 0.0, + "num_input_tokens_seen": 95031312, + "step": 141010 + }, + { + "epoch": 3.445019910585591, + "grad_norm": 0.032805878669023514, + "learning_rate": 5.336187574579829e-07, + "loss": 0.0, + "num_input_tokens_seen": 95034640, + "step": 141015 + }, + { + "epoch": 3.4451420614174384, + "grad_norm": 0.011173062957823277, + "learning_rate": 5.335433239690994e-07, + "loss": 0.0, + "num_input_tokens_seen": 95037968, + "step": 141020 + }, + { + "epoch": 3.4452642122492856, + "grad_norm": 0.006212008185684681, + "learning_rate": 5.334678938724147e-07, + "loss": 0.0, + "num_input_tokens_seen": 95041296, + "step": 141025 + }, + { + "epoch": 3.4453863630811328, + "grad_norm": 0.0015581079060211778, + "learning_rate": 5.333924671684765e-07, + "loss": 0.0, + "num_input_tokens_seen": 95044624, + "step": 141030 + }, + { + "epoch": 3.44550851391298, + "grad_norm": 2.255311965942383, + "learning_rate": 5.333170438578338e-07, + "loss": 0.0016, + "num_input_tokens_seen": 95047824, + "step": 141035 + }, + { + "epoch": 3.4456306647448267, + "grad_norm": 0.002275418723002076, + "learning_rate": 5.332416239410355e-07, + "loss": 0.0, + "num_input_tokens_seen": 95051472, + "step": 141040 + }, + { + "epoch": 3.445752815576674, + "grad_norm": 1.5443556308746338, + "learning_rate": 5.331662074186294e-07, + "loss": 0.0011, + "num_input_tokens_seen": 95054544, + "step": 141045 + }, + { + "epoch": 3.445874966408521, + "grad_norm": 0.0001924506650539115, + "learning_rate": 5.330907942911644e-07, + "loss": 0.0, + "num_input_tokens_seen": 95058000, + "step": 141050 + }, + { + "epoch": 3.4459971172403683, + "grad_norm": 0.003207719186320901, + "learning_rate": 5.330153845591884e-07, + "loss": 0.0, + "num_input_tokens_seen": 95061520, + "step": 141055 + }, + { + "epoch": 3.4461192680722155, + "grad_norm": 0.00038838028558529913, + "learning_rate": 5.329399782232501e-07, + "loss": 0.0, + "num_input_tokens_seen": 95065104, + "step": 141060 + }, + { + "epoch": 3.4462414189040627, + "grad_norm": 0.016583800315856934, + "learning_rate": 5.328645752838983e-07, + "loss": 0.0, + "num_input_tokens_seen": 95068496, + "step": 141065 + }, + { + "epoch": 3.44636356973591, + "grad_norm": 0.0007583460537716746, + "learning_rate": 5.327891757416806e-07, + "loss": 0.0, + "num_input_tokens_seen": 95071760, + "step": 141070 + }, + { + "epoch": 3.446485720567757, + "grad_norm": 0.00037626130506396294, + "learning_rate": 5.327137795971461e-07, + "loss": 0.0, + "num_input_tokens_seen": 95075024, + "step": 141075 + }, + { + "epoch": 3.4466078713996042, + "grad_norm": 0.0008241772884503007, + "learning_rate": 5.326383868508422e-07, + "loss": 0.0, + "num_input_tokens_seen": 95078032, + "step": 141080 + }, + { + "epoch": 3.4467300222314514, + "grad_norm": 0.00028776045655831695, + "learning_rate": 5.325629975033181e-07, + "loss": 0.0024, + "num_input_tokens_seen": 95082320, + "step": 141085 + }, + { + "epoch": 3.4468521730632986, + "grad_norm": 0.006339185405522585, + "learning_rate": 5.324876115551211e-07, + "loss": 0.0, + "num_input_tokens_seen": 95085584, + "step": 141090 + }, + { + "epoch": 3.446974323895146, + "grad_norm": 0.0002104215818690136, + "learning_rate": 5.324122290068001e-07, + "loss": 0.0002, + "num_input_tokens_seen": 95088912, + "step": 141095 + }, + { + "epoch": 3.447096474726993, + "grad_norm": 0.0013865433866158128, + "learning_rate": 5.323368498589035e-07, + "loss": 0.0, + "num_input_tokens_seen": 95091664, + "step": 141100 + }, + { + "epoch": 3.44721862555884, + "grad_norm": 0.07491642981767654, + "learning_rate": 5.322614741119791e-07, + "loss": 0.0, + "num_input_tokens_seen": 95094928, + "step": 141105 + }, + { + "epoch": 3.4473407763906874, + "grad_norm": 0.0023299320600926876, + "learning_rate": 5.321861017665745e-07, + "loss": 0.0, + "num_input_tokens_seen": 95098256, + "step": 141110 + }, + { + "epoch": 3.4474629272225346, + "grad_norm": 0.01721266657114029, + "learning_rate": 5.321107328232391e-07, + "loss": 0.0, + "num_input_tokens_seen": 95101328, + "step": 141115 + }, + { + "epoch": 3.4475850780543817, + "grad_norm": 0.0050105806440114975, + "learning_rate": 5.320353672825197e-07, + "loss": 0.0256, + "num_input_tokens_seen": 95104656, + "step": 141120 + }, + { + "epoch": 3.4477072288862285, + "grad_norm": 0.00042763014789670706, + "learning_rate": 5.319600051449653e-07, + "loss": 0.0, + "num_input_tokens_seen": 95108432, + "step": 141125 + }, + { + "epoch": 3.447829379718076, + "grad_norm": 0.0009441131260246038, + "learning_rate": 5.318846464111235e-07, + "loss": 0.0, + "num_input_tokens_seen": 95111760, + "step": 141130 + }, + { + "epoch": 3.447951530549923, + "grad_norm": 0.0003792133938986808, + "learning_rate": 5.318092910815426e-07, + "loss": 0.0, + "num_input_tokens_seen": 95114896, + "step": 141135 + }, + { + "epoch": 3.44807368138177, + "grad_norm": 0.001232424401678145, + "learning_rate": 5.317339391567702e-07, + "loss": 0.0, + "num_input_tokens_seen": 95118160, + "step": 141140 + }, + { + "epoch": 3.4481958322136173, + "grad_norm": 0.0013748533092439175, + "learning_rate": 5.31658590637355e-07, + "loss": 0.0, + "num_input_tokens_seen": 95121808, + "step": 141145 + }, + { + "epoch": 3.4483179830454644, + "grad_norm": 0.00029413148877210915, + "learning_rate": 5.315832455238439e-07, + "loss": 0.0, + "num_input_tokens_seen": 95125328, + "step": 141150 + }, + { + "epoch": 3.4484401338773116, + "grad_norm": 0.014625326730310917, + "learning_rate": 5.315079038167856e-07, + "loss": 0.0, + "num_input_tokens_seen": 95128656, + "step": 141155 + }, + { + "epoch": 3.448562284709159, + "grad_norm": 0.001389988581649959, + "learning_rate": 5.314325655167282e-07, + "loss": 0.0, + "num_input_tokens_seen": 95131920, + "step": 141160 + }, + { + "epoch": 3.448684435541006, + "grad_norm": 0.004245539661496878, + "learning_rate": 5.313572306242187e-07, + "loss": 0.0, + "num_input_tokens_seen": 95135568, + "step": 141165 + }, + { + "epoch": 3.448806586372853, + "grad_norm": 0.0002586462942417711, + "learning_rate": 5.312818991398061e-07, + "loss": 0.143, + "num_input_tokens_seen": 95138768, + "step": 141170 + }, + { + "epoch": 3.4489287372047004, + "grad_norm": 4.186246223980561e-05, + "learning_rate": 5.31206571064037e-07, + "loss": 0.0, + "num_input_tokens_seen": 95142352, + "step": 141175 + }, + { + "epoch": 3.4490508880365476, + "grad_norm": 0.0017056518699973822, + "learning_rate": 5.311312463974602e-07, + "loss": 0.0266, + "num_input_tokens_seen": 95145680, + "step": 141180 + }, + { + "epoch": 3.4491730388683948, + "grad_norm": 0.015404156409204006, + "learning_rate": 5.310559251406228e-07, + "loss": 0.0, + "num_input_tokens_seen": 95149456, + "step": 141185 + }, + { + "epoch": 3.449295189700242, + "grad_norm": 0.14585693180561066, + "learning_rate": 5.309806072940728e-07, + "loss": 0.0001, + "num_input_tokens_seen": 95152592, + "step": 141190 + }, + { + "epoch": 3.449417340532089, + "grad_norm": 0.0002526379539631307, + "learning_rate": 5.309052928583583e-07, + "loss": 0.0804, + "num_input_tokens_seen": 95156304, + "step": 141195 + }, + { + "epoch": 3.4495394913639363, + "grad_norm": 0.004309144802391529, + "learning_rate": 5.308299818340263e-07, + "loss": 0.0568, + "num_input_tokens_seen": 95159568, + "step": 141200 + }, + { + "epoch": 3.4496616421957835, + "grad_norm": 0.00161842186935246, + "learning_rate": 5.307546742216253e-07, + "loss": 0.0, + "num_input_tokens_seen": 95162832, + "step": 141205 + }, + { + "epoch": 3.4497837930276303, + "grad_norm": 0.0006941687897779047, + "learning_rate": 5.306793700217021e-07, + "loss": 0.0, + "num_input_tokens_seen": 95166352, + "step": 141210 + }, + { + "epoch": 3.449905943859478, + "grad_norm": 0.005880511831492186, + "learning_rate": 5.306040692348053e-07, + "loss": 0.0625, + "num_input_tokens_seen": 95169936, + "step": 141215 + }, + { + "epoch": 3.4500280946913247, + "grad_norm": 0.001196865108795464, + "learning_rate": 5.305287718614818e-07, + "loss": 0.0, + "num_input_tokens_seen": 95173008, + "step": 141220 + }, + { + "epoch": 3.450150245523172, + "grad_norm": 0.00487381499260664, + "learning_rate": 5.304534779022789e-07, + "loss": 0.0888, + "num_input_tokens_seen": 95176080, + "step": 141225 + }, + { + "epoch": 3.450272396355019, + "grad_norm": 0.0032687492202967405, + "learning_rate": 5.303781873577451e-07, + "loss": 0.0227, + "num_input_tokens_seen": 95179152, + "step": 141230 + }, + { + "epoch": 3.4503945471868662, + "grad_norm": 0.00017934656352736056, + "learning_rate": 5.303029002284271e-07, + "loss": 0.0, + "num_input_tokens_seen": 95182352, + "step": 141235 + }, + { + "epoch": 3.4505166980187134, + "grad_norm": 0.6623572111129761, + "learning_rate": 5.30227616514873e-07, + "loss": 0.0002, + "num_input_tokens_seen": 95185808, + "step": 141240 + }, + { + "epoch": 3.4506388488505606, + "grad_norm": 0.006883205845952034, + "learning_rate": 5.301523362176297e-07, + "loss": 0.0, + "num_input_tokens_seen": 95189136, + "step": 141245 + }, + { + "epoch": 3.450760999682408, + "grad_norm": 0.0020480111707001925, + "learning_rate": 5.30077059337245e-07, + "loss": 0.0, + "num_input_tokens_seen": 95192528, + "step": 141250 + }, + { + "epoch": 3.450883150514255, + "grad_norm": 0.10637383162975311, + "learning_rate": 5.300017858742667e-07, + "loss": 0.028, + "num_input_tokens_seen": 95195728, + "step": 141255 + }, + { + "epoch": 3.451005301346102, + "grad_norm": 0.41171160340309143, + "learning_rate": 5.299265158292414e-07, + "loss": 0.0001, + "num_input_tokens_seen": 95198992, + "step": 141260 + }, + { + "epoch": 3.4511274521779494, + "grad_norm": 0.001396168489009142, + "learning_rate": 5.298512492027174e-07, + "loss": 0.0266, + "num_input_tokens_seen": 95202256, + "step": 141265 + }, + { + "epoch": 3.4512496030097966, + "grad_norm": 0.2164297252893448, + "learning_rate": 5.297759859952411e-07, + "loss": 0.0006, + "num_input_tokens_seen": 95205264, + "step": 141270 + }, + { + "epoch": 3.4513717538416437, + "grad_norm": 0.008003800176084042, + "learning_rate": 5.297007262073607e-07, + "loss": 0.0002, + "num_input_tokens_seen": 95208208, + "step": 141275 + }, + { + "epoch": 3.451493904673491, + "grad_norm": 0.0007602174300700426, + "learning_rate": 5.296254698396227e-07, + "loss": 0.1171, + "num_input_tokens_seen": 95211600, + "step": 141280 + }, + { + "epoch": 3.451616055505338, + "grad_norm": 0.007521396037191153, + "learning_rate": 5.295502168925749e-07, + "loss": 0.0, + "num_input_tokens_seen": 95214608, + "step": 141285 + }, + { + "epoch": 3.4517382063371853, + "grad_norm": 0.09310326725244522, + "learning_rate": 5.294749673667646e-07, + "loss": 0.0, + "num_input_tokens_seen": 95217616, + "step": 141290 + }, + { + "epoch": 3.4518603571690325, + "grad_norm": 0.012504089623689651, + "learning_rate": 5.293997212627388e-07, + "loss": 0.0, + "num_input_tokens_seen": 95221072, + "step": 141295 + }, + { + "epoch": 3.4519825080008797, + "grad_norm": 0.03356561064720154, + "learning_rate": 5.293244785810451e-07, + "loss": 0.0, + "num_input_tokens_seen": 95224208, + "step": 141300 + }, + { + "epoch": 3.4521046588327264, + "grad_norm": 0.0009436097461730242, + "learning_rate": 5.292492393222299e-07, + "loss": 0.0, + "num_input_tokens_seen": 95227536, + "step": 141305 + }, + { + "epoch": 3.4522268096645736, + "grad_norm": 0.029604079201817513, + "learning_rate": 5.291740034868413e-07, + "loss": 0.0001, + "num_input_tokens_seen": 95230736, + "step": 141310 + }, + { + "epoch": 3.452348960496421, + "grad_norm": 0.0026405539829283953, + "learning_rate": 5.290987710754256e-07, + "loss": 0.0001, + "num_input_tokens_seen": 95234384, + "step": 141315 + }, + { + "epoch": 3.452471111328268, + "grad_norm": 0.020678304135799408, + "learning_rate": 5.290235420885307e-07, + "loss": 0.0, + "num_input_tokens_seen": 95237904, + "step": 141320 + }, + { + "epoch": 3.452593262160115, + "grad_norm": 0.002151084365323186, + "learning_rate": 5.289483165267033e-07, + "loss": 0.0, + "num_input_tokens_seen": 95240848, + "step": 141325 + }, + { + "epoch": 3.4527154129919624, + "grad_norm": 0.006780423689633608, + "learning_rate": 5.2887309439049e-07, + "loss": 0.0348, + "num_input_tokens_seen": 95243984, + "step": 141330 + }, + { + "epoch": 3.4528375638238096, + "grad_norm": 0.002874805126339197, + "learning_rate": 5.287978756804387e-07, + "loss": 0.0439, + "num_input_tokens_seen": 95247376, + "step": 141335 + }, + { + "epoch": 3.4529597146556568, + "grad_norm": 0.008217284455895424, + "learning_rate": 5.287226603970956e-07, + "loss": 0.0, + "num_input_tokens_seen": 95250576, + "step": 141340 + }, + { + "epoch": 3.453081865487504, + "grad_norm": 0.00020205184409860522, + "learning_rate": 5.28647448541008e-07, + "loss": 0.0685, + "num_input_tokens_seen": 95253904, + "step": 141345 + }, + { + "epoch": 3.453204016319351, + "grad_norm": 0.011114844121038914, + "learning_rate": 5.285722401127234e-07, + "loss": 0.0, + "num_input_tokens_seen": 95256592, + "step": 141350 + }, + { + "epoch": 3.4533261671511983, + "grad_norm": 0.005883699748665094, + "learning_rate": 5.284970351127878e-07, + "loss": 0.0, + "num_input_tokens_seen": 95260112, + "step": 141355 + }, + { + "epoch": 3.4534483179830455, + "grad_norm": 0.004646258894354105, + "learning_rate": 5.284218335417489e-07, + "loss": 0.0001, + "num_input_tokens_seen": 95263312, + "step": 141360 + }, + { + "epoch": 3.4535704688148927, + "grad_norm": 0.010385840199887753, + "learning_rate": 5.283466354001531e-07, + "loss": 0.0, + "num_input_tokens_seen": 95266832, + "step": 141365 + }, + { + "epoch": 3.45369261964674, + "grad_norm": 0.0005899182870052755, + "learning_rate": 5.282714406885478e-07, + "loss": 0.0, + "num_input_tokens_seen": 95270160, + "step": 141370 + }, + { + "epoch": 3.453814770478587, + "grad_norm": 0.036543406546115875, + "learning_rate": 5.28196249407479e-07, + "loss": 0.044, + "num_input_tokens_seen": 95273168, + "step": 141375 + }, + { + "epoch": 3.4539369213104343, + "grad_norm": 0.015833524987101555, + "learning_rate": 5.281210615574939e-07, + "loss": 0.0, + "num_input_tokens_seen": 95276496, + "step": 141380 + }, + { + "epoch": 3.4540590721422815, + "grad_norm": 0.012564883567392826, + "learning_rate": 5.280458771391398e-07, + "loss": 0.0001, + "num_input_tokens_seen": 95279760, + "step": 141385 + }, + { + "epoch": 3.454181222974128, + "grad_norm": 0.0949430912733078, + "learning_rate": 5.279706961529627e-07, + "loss": 0.0006, + "num_input_tokens_seen": 95283728, + "step": 141390 + }, + { + "epoch": 3.454303373805976, + "grad_norm": 53.003692626953125, + "learning_rate": 5.2789551859951e-07, + "loss": 0.108, + "num_input_tokens_seen": 95286992, + "step": 141395 + }, + { + "epoch": 3.4544255246378226, + "grad_norm": 0.007030909415334463, + "learning_rate": 5.278203444793276e-07, + "loss": 0.0003, + "num_input_tokens_seen": 95290512, + "step": 141400 + }, + { + "epoch": 3.45454767546967, + "grad_norm": 0.00644741440191865, + "learning_rate": 5.277451737929628e-07, + "loss": 0.0, + "num_input_tokens_seen": 95293904, + "step": 141405 + }, + { + "epoch": 3.454669826301517, + "grad_norm": 0.01929423399269581, + "learning_rate": 5.276700065409623e-07, + "loss": 0.0, + "num_input_tokens_seen": 95297552, + "step": 141410 + }, + { + "epoch": 3.454791977133364, + "grad_norm": 0.0031475010327994823, + "learning_rate": 5.275948427238727e-07, + "loss": 0.0, + "num_input_tokens_seen": 95301200, + "step": 141415 + }, + { + "epoch": 3.4549141279652114, + "grad_norm": 0.011686863377690315, + "learning_rate": 5.275196823422401e-07, + "loss": 0.0, + "num_input_tokens_seen": 95304976, + "step": 141420 + }, + { + "epoch": 3.4550362787970585, + "grad_norm": 0.0013666352024301887, + "learning_rate": 5.274445253966118e-07, + "loss": 0.0002, + "num_input_tokens_seen": 95308304, + "step": 141425 + }, + { + "epoch": 3.4551584296289057, + "grad_norm": 0.0014562977012246847, + "learning_rate": 5.273693718875336e-07, + "loss": 0.0, + "num_input_tokens_seen": 95311696, + "step": 141430 + }, + { + "epoch": 3.455280580460753, + "grad_norm": 0.005508221220225096, + "learning_rate": 5.27294221815553e-07, + "loss": 0.0, + "num_input_tokens_seen": 95314960, + "step": 141435 + }, + { + "epoch": 3.4554027312926, + "grad_norm": 0.0002619369188323617, + "learning_rate": 5.272190751812154e-07, + "loss": 0.0129, + "num_input_tokens_seen": 95318672, + "step": 141440 + }, + { + "epoch": 3.4555248821244473, + "grad_norm": 0.0007996332133188844, + "learning_rate": 5.271439319850682e-07, + "loss": 0.0, + "num_input_tokens_seen": 95322128, + "step": 141445 + }, + { + "epoch": 3.4556470329562945, + "grad_norm": 0.1051895022392273, + "learning_rate": 5.270687922276573e-07, + "loss": 0.0001, + "num_input_tokens_seen": 95325264, + "step": 141450 + }, + { + "epoch": 3.4557691837881417, + "grad_norm": 0.002971762791275978, + "learning_rate": 5.269936559095296e-07, + "loss": 0.0002, + "num_input_tokens_seen": 95328336, + "step": 141455 + }, + { + "epoch": 3.455891334619989, + "grad_norm": 0.0001414813450537622, + "learning_rate": 5.269185230312309e-07, + "loss": 0.0, + "num_input_tokens_seen": 95331664, + "step": 141460 + }, + { + "epoch": 3.456013485451836, + "grad_norm": 0.0005533331423066556, + "learning_rate": 5.268433935933079e-07, + "loss": 0.0, + "num_input_tokens_seen": 95334608, + "step": 141465 + }, + { + "epoch": 3.4561356362836833, + "grad_norm": 0.005247303284704685, + "learning_rate": 5.267682675963074e-07, + "loss": 0.0, + "num_input_tokens_seen": 95338256, + "step": 141470 + }, + { + "epoch": 3.4562577871155304, + "grad_norm": 0.0007101392257027328, + "learning_rate": 5.26693145040775e-07, + "loss": 0.0064, + "num_input_tokens_seen": 95341584, + "step": 141475 + }, + { + "epoch": 3.4563799379473776, + "grad_norm": 0.000519787659868598, + "learning_rate": 5.266180259272578e-07, + "loss": 0.0, + "num_input_tokens_seen": 95344720, + "step": 141480 + }, + { + "epoch": 3.4565020887792244, + "grad_norm": 0.5096776485443115, + "learning_rate": 5.265429102563012e-07, + "loss": 0.0002, + "num_input_tokens_seen": 95347728, + "step": 141485 + }, + { + "epoch": 3.4566242396110716, + "grad_norm": 0.0044595482759177685, + "learning_rate": 5.264677980284522e-07, + "loss": 0.0, + "num_input_tokens_seen": 95350928, + "step": 141490 + }, + { + "epoch": 3.4567463904429188, + "grad_norm": 0.0008935718215070665, + "learning_rate": 5.263926892442565e-07, + "loss": 0.0, + "num_input_tokens_seen": 95354000, + "step": 141495 + }, + { + "epoch": 3.456868541274766, + "grad_norm": 0.0032971524633467197, + "learning_rate": 5.263175839042604e-07, + "loss": 0.0, + "num_input_tokens_seen": 95357328, + "step": 141500 + }, + { + "epoch": 3.456990692106613, + "grad_norm": 0.07441108673810959, + "learning_rate": 5.262424820090108e-07, + "loss": 0.0, + "num_input_tokens_seen": 95360592, + "step": 141505 + }, + { + "epoch": 3.4571128429384603, + "grad_norm": 0.0006337398081086576, + "learning_rate": 5.261673835590527e-07, + "loss": 0.0002, + "num_input_tokens_seen": 95363664, + "step": 141510 + }, + { + "epoch": 3.4572349937703075, + "grad_norm": 0.005729390308260918, + "learning_rate": 5.260922885549333e-07, + "loss": 0.0, + "num_input_tokens_seen": 95366736, + "step": 141515 + }, + { + "epoch": 3.4573571446021547, + "grad_norm": 0.00012647430412471294, + "learning_rate": 5.260171969971981e-07, + "loss": 0.0004, + "num_input_tokens_seen": 95370256, + "step": 141520 + }, + { + "epoch": 3.457479295434002, + "grad_norm": 0.001564518897794187, + "learning_rate": 5.259421088863934e-07, + "loss": 0.0, + "num_input_tokens_seen": 95373392, + "step": 141525 + }, + { + "epoch": 3.457601446265849, + "grad_norm": 0.01004080194979906, + "learning_rate": 5.258670242230653e-07, + "loss": 0.0, + "num_input_tokens_seen": 95376784, + "step": 141530 + }, + { + "epoch": 3.4577235970976963, + "grad_norm": 0.0010182727128267288, + "learning_rate": 5.257919430077594e-07, + "loss": 0.0, + "num_input_tokens_seen": 95380048, + "step": 141535 + }, + { + "epoch": 3.4578457479295435, + "grad_norm": 0.010390793904662132, + "learning_rate": 5.257168652410223e-07, + "loss": 0.0, + "num_input_tokens_seen": 95383440, + "step": 141540 + }, + { + "epoch": 3.4579678987613907, + "grad_norm": 0.22023968398571014, + "learning_rate": 5.256417909233993e-07, + "loss": 0.0002, + "num_input_tokens_seen": 95387088, + "step": 141545 + }, + { + "epoch": 3.458090049593238, + "grad_norm": 0.003728673327714205, + "learning_rate": 5.255667200554372e-07, + "loss": 0.0, + "num_input_tokens_seen": 95390544, + "step": 141550 + }, + { + "epoch": 3.458212200425085, + "grad_norm": 0.04721221327781677, + "learning_rate": 5.254916526376813e-07, + "loss": 0.0, + "num_input_tokens_seen": 95394256, + "step": 141555 + }, + { + "epoch": 3.4583343512569322, + "grad_norm": 0.05539275333285332, + "learning_rate": 5.254165886706776e-07, + "loss": 0.0, + "num_input_tokens_seen": 95397584, + "step": 141560 + }, + { + "epoch": 3.4584565020887794, + "grad_norm": 0.0006743664271198213, + "learning_rate": 5.253415281549726e-07, + "loss": 0.0, + "num_input_tokens_seen": 95401232, + "step": 141565 + }, + { + "epoch": 3.458578652920626, + "grad_norm": 0.2516691982746124, + "learning_rate": 5.252664710911112e-07, + "loss": 0.0395, + "num_input_tokens_seen": 95404176, + "step": 141570 + }, + { + "epoch": 3.458700803752474, + "grad_norm": 0.0004871607816312462, + "learning_rate": 5.251914174796401e-07, + "loss": 0.1123, + "num_input_tokens_seen": 95407952, + "step": 141575 + }, + { + "epoch": 3.4588229545843205, + "grad_norm": 0.004576869774609804, + "learning_rate": 5.251163673211043e-07, + "loss": 0.0645, + "num_input_tokens_seen": 95411216, + "step": 141580 + }, + { + "epoch": 3.4589451054161677, + "grad_norm": 0.05273722484707832, + "learning_rate": 5.250413206160505e-07, + "loss": 0.0, + "num_input_tokens_seen": 95414480, + "step": 141585 + }, + { + "epoch": 3.459067256248015, + "grad_norm": 0.0011433085892349482, + "learning_rate": 5.249662773650235e-07, + "loss": 0.0, + "num_input_tokens_seen": 95417808, + "step": 141590 + }, + { + "epoch": 3.459189407079862, + "grad_norm": 0.0031519471667706966, + "learning_rate": 5.248912375685694e-07, + "loss": 0.0, + "num_input_tokens_seen": 95421264, + "step": 141595 + }, + { + "epoch": 3.4593115579117093, + "grad_norm": 0.004848890472203493, + "learning_rate": 5.248162012272345e-07, + "loss": 0.0, + "num_input_tokens_seen": 95424272, + "step": 141600 + }, + { + "epoch": 3.4594337087435565, + "grad_norm": 0.009992300532758236, + "learning_rate": 5.247411683415637e-07, + "loss": 0.0, + "num_input_tokens_seen": 95427728, + "step": 141605 + }, + { + "epoch": 3.4595558595754037, + "grad_norm": 7.762262976029888e-05, + "learning_rate": 5.246661389121032e-07, + "loss": 0.0, + "num_input_tokens_seen": 95430992, + "step": 141610 + }, + { + "epoch": 3.459678010407251, + "grad_norm": 0.006131031550467014, + "learning_rate": 5.24591112939398e-07, + "loss": 0.0, + "num_input_tokens_seen": 95434448, + "step": 141615 + }, + { + "epoch": 3.459800161239098, + "grad_norm": 0.00018021617142949253, + "learning_rate": 5.245160904239947e-07, + "loss": 0.0, + "num_input_tokens_seen": 95438032, + "step": 141620 + }, + { + "epoch": 3.4599223120709452, + "grad_norm": 0.0003632458974607289, + "learning_rate": 5.244410713664377e-07, + "loss": 0.0, + "num_input_tokens_seen": 95441040, + "step": 141625 + }, + { + "epoch": 3.4600444629027924, + "grad_norm": 0.02345282770693302, + "learning_rate": 5.243660557672736e-07, + "loss": 0.0, + "num_input_tokens_seen": 95444816, + "step": 141630 + }, + { + "epoch": 3.4601666137346396, + "grad_norm": 0.004612599965184927, + "learning_rate": 5.242910436270474e-07, + "loss": 0.0, + "num_input_tokens_seen": 95447696, + "step": 141635 + }, + { + "epoch": 3.460288764566487, + "grad_norm": 2.76730479527032e-05, + "learning_rate": 5.242160349463043e-07, + "loss": 0.0, + "num_input_tokens_seen": 95450896, + "step": 141640 + }, + { + "epoch": 3.460410915398334, + "grad_norm": 106.46083068847656, + "learning_rate": 5.241410297255906e-07, + "loss": 0.0738, + "num_input_tokens_seen": 95454096, + "step": 141645 + }, + { + "epoch": 3.460533066230181, + "grad_norm": 0.0009458428248763084, + "learning_rate": 5.240660279654509e-07, + "loss": 0.0, + "num_input_tokens_seen": 95457360, + "step": 141650 + }, + { + "epoch": 3.460655217062028, + "grad_norm": 884.1512451171875, + "learning_rate": 5.239910296664311e-07, + "loss": 0.0108, + "num_input_tokens_seen": 95460560, + "step": 141655 + }, + { + "epoch": 3.4607773678938756, + "grad_norm": 0.18908710777759552, + "learning_rate": 5.239160348290769e-07, + "loss": 0.0001, + "num_input_tokens_seen": 95464400, + "step": 141660 + }, + { + "epoch": 3.4608995187257223, + "grad_norm": 0.019987985491752625, + "learning_rate": 5.238410434539329e-07, + "loss": 0.0395, + "num_input_tokens_seen": 95467792, + "step": 141665 + }, + { + "epoch": 3.4610216695575695, + "grad_norm": 0.002345374319702387, + "learning_rate": 5.237660555415454e-07, + "loss": 0.0, + "num_input_tokens_seen": 95470928, + "step": 141670 + }, + { + "epoch": 3.4611438203894167, + "grad_norm": 0.030504705384373665, + "learning_rate": 5.236910710924588e-07, + "loss": 0.0003, + "num_input_tokens_seen": 95474128, + "step": 141675 + }, + { + "epoch": 3.461265971221264, + "grad_norm": 0.0014592435909435153, + "learning_rate": 5.236160901072193e-07, + "loss": 0.0001, + "num_input_tokens_seen": 95477712, + "step": 141680 + }, + { + "epoch": 3.461388122053111, + "grad_norm": 0.0011629685759544373, + "learning_rate": 5.235411125863713e-07, + "loss": 0.0, + "num_input_tokens_seen": 95480912, + "step": 141685 + }, + { + "epoch": 3.4615102728849583, + "grad_norm": 0.0006028559291735291, + "learning_rate": 5.234661385304603e-07, + "loss": 0.0, + "num_input_tokens_seen": 95483920, + "step": 141690 + }, + { + "epoch": 3.4616324237168055, + "grad_norm": 0.000889910152181983, + "learning_rate": 5.233911679400324e-07, + "loss": 0.0, + "num_input_tokens_seen": 95486992, + "step": 141695 + }, + { + "epoch": 3.4617545745486527, + "grad_norm": 0.0008395583718083799, + "learning_rate": 5.233162008156316e-07, + "loss": 0.0, + "num_input_tokens_seen": 95490448, + "step": 141700 + }, + { + "epoch": 3.4618767253805, + "grad_norm": 0.06065589189529419, + "learning_rate": 5.232412371578038e-07, + "loss": 0.0, + "num_input_tokens_seen": 95494032, + "step": 141705 + }, + { + "epoch": 3.461998876212347, + "grad_norm": 0.009806448593735695, + "learning_rate": 5.231662769670939e-07, + "loss": 0.0, + "num_input_tokens_seen": 95497616, + "step": 141710 + }, + { + "epoch": 3.462121027044194, + "grad_norm": 0.0029943319968879223, + "learning_rate": 5.230913202440469e-07, + "loss": 0.0917, + "num_input_tokens_seen": 95501328, + "step": 141715 + }, + { + "epoch": 3.4622431778760414, + "grad_norm": 0.023564515635371208, + "learning_rate": 5.230163669892085e-07, + "loss": 0.0172, + "num_input_tokens_seen": 95504784, + "step": 141720 + }, + { + "epoch": 3.4623653287078886, + "grad_norm": 0.0008361052605323493, + "learning_rate": 5.229414172031235e-07, + "loss": 0.0, + "num_input_tokens_seen": 95507920, + "step": 141725 + }, + { + "epoch": 3.462487479539736, + "grad_norm": 0.0017536578234285116, + "learning_rate": 5.228664708863362e-07, + "loss": 0.0, + "num_input_tokens_seen": 95511248, + "step": 141730 + }, + { + "epoch": 3.462609630371583, + "grad_norm": 0.7590802907943726, + "learning_rate": 5.227915280393928e-07, + "loss": 0.0004, + "num_input_tokens_seen": 95514256, + "step": 141735 + }, + { + "epoch": 3.46273178120343, + "grad_norm": 0.007260952610522509, + "learning_rate": 5.227165886628373e-07, + "loss": 0.0, + "num_input_tokens_seen": 95517392, + "step": 141740 + }, + { + "epoch": 3.4628539320352774, + "grad_norm": 0.014699455350637436, + "learning_rate": 5.226416527572157e-07, + "loss": 0.0, + "num_input_tokens_seen": 95520976, + "step": 141745 + }, + { + "epoch": 3.462976082867124, + "grad_norm": 0.0004249770427122712, + "learning_rate": 5.225667203230719e-07, + "loss": 0.0, + "num_input_tokens_seen": 95524496, + "step": 141750 + }, + { + "epoch": 3.4630982336989717, + "grad_norm": 0.011417384259402752, + "learning_rate": 5.224917913609517e-07, + "loss": 0.0, + "num_input_tokens_seen": 95528208, + "step": 141755 + }, + { + "epoch": 3.4632203845308185, + "grad_norm": 0.022379083558917046, + "learning_rate": 5.224168658713992e-07, + "loss": 0.0, + "num_input_tokens_seen": 95531856, + "step": 141760 + }, + { + "epoch": 3.4633425353626657, + "grad_norm": 0.00799702201038599, + "learning_rate": 5.223419438549601e-07, + "loss": 0.0, + "num_input_tokens_seen": 95535120, + "step": 141765 + }, + { + "epoch": 3.463464686194513, + "grad_norm": 0.0009740145178511739, + "learning_rate": 5.222670253121786e-07, + "loss": 0.0, + "num_input_tokens_seen": 95538192, + "step": 141770 + }, + { + "epoch": 3.46358683702636, + "grad_norm": 0.000678222393617034, + "learning_rate": 5.221921102435995e-07, + "loss": 0.0, + "num_input_tokens_seen": 95541200, + "step": 141775 + }, + { + "epoch": 3.4637089878582072, + "grad_norm": 0.008515672758221626, + "learning_rate": 5.221171986497686e-07, + "loss": 0.0001, + "num_input_tokens_seen": 95544784, + "step": 141780 + }, + { + "epoch": 3.4638311386900544, + "grad_norm": 0.000750542851164937, + "learning_rate": 5.220422905312293e-07, + "loss": 0.0406, + "num_input_tokens_seen": 95548304, + "step": 141785 + }, + { + "epoch": 3.4639532895219016, + "grad_norm": 0.00016197515651583672, + "learning_rate": 5.219673858885276e-07, + "loss": 0.0, + "num_input_tokens_seen": 95551312, + "step": 141790 + }, + { + "epoch": 3.464075440353749, + "grad_norm": 0.004807916935533285, + "learning_rate": 5.218924847222073e-07, + "loss": 0.0, + "num_input_tokens_seen": 95554640, + "step": 141795 + }, + { + "epoch": 3.464197591185596, + "grad_norm": 0.01425571646541357, + "learning_rate": 5.218175870328136e-07, + "loss": 0.0, + "num_input_tokens_seen": 95557520, + "step": 141800 + }, + { + "epoch": 3.464319742017443, + "grad_norm": 0.03530505299568176, + "learning_rate": 5.217426928208908e-07, + "loss": 0.0, + "num_input_tokens_seen": 95560592, + "step": 141805 + }, + { + "epoch": 3.4644418928492904, + "grad_norm": 0.0017172127263620496, + "learning_rate": 5.216678020869838e-07, + "loss": 0.0, + "num_input_tokens_seen": 95563408, + "step": 141810 + }, + { + "epoch": 3.4645640436811376, + "grad_norm": 0.290860652923584, + "learning_rate": 5.215929148316376e-07, + "loss": 0.0001, + "num_input_tokens_seen": 95566992, + "step": 141815 + }, + { + "epoch": 3.4646861945129848, + "grad_norm": 0.04317009821534157, + "learning_rate": 5.215180310553959e-07, + "loss": 0.0, + "num_input_tokens_seen": 95570384, + "step": 141820 + }, + { + "epoch": 3.464808345344832, + "grad_norm": 0.027695301920175552, + "learning_rate": 5.214431507588042e-07, + "loss": 0.0781, + "num_input_tokens_seen": 95573648, + "step": 141825 + }, + { + "epoch": 3.464930496176679, + "grad_norm": 3.70340421795845e-05, + "learning_rate": 5.213682739424063e-07, + "loss": 0.0, + "num_input_tokens_seen": 95577232, + "step": 141830 + }, + { + "epoch": 3.465052647008526, + "grad_norm": 0.0006201051292009652, + "learning_rate": 5.212934006067474e-07, + "loss": 0.0, + "num_input_tokens_seen": 95580752, + "step": 141835 + }, + { + "epoch": 3.4651747978403735, + "grad_norm": 0.028088655322790146, + "learning_rate": 5.212185307523716e-07, + "loss": 0.0, + "num_input_tokens_seen": 95583888, + "step": 141840 + }, + { + "epoch": 3.4652969486722203, + "grad_norm": 0.025718344375491142, + "learning_rate": 5.211436643798231e-07, + "loss": 0.0, + "num_input_tokens_seen": 95587536, + "step": 141845 + }, + { + "epoch": 3.4654190995040675, + "grad_norm": 0.006959665101021528, + "learning_rate": 5.21068801489647e-07, + "loss": 0.0236, + "num_input_tokens_seen": 95590992, + "step": 141850 + }, + { + "epoch": 3.4655412503359146, + "grad_norm": 0.0032126219011843204, + "learning_rate": 5.20993942082387e-07, + "loss": 0.0, + "num_input_tokens_seen": 95594832, + "step": 141855 + }, + { + "epoch": 3.465663401167762, + "grad_norm": 0.0005849121371284127, + "learning_rate": 5.209190861585883e-07, + "loss": 0.0001, + "num_input_tokens_seen": 95598352, + "step": 141860 + }, + { + "epoch": 3.465785551999609, + "grad_norm": 0.09262830764055252, + "learning_rate": 5.208442337187945e-07, + "loss": 0.0001, + "num_input_tokens_seen": 95601616, + "step": 141865 + }, + { + "epoch": 3.465907702831456, + "grad_norm": 0.004266612231731415, + "learning_rate": 5.207693847635503e-07, + "loss": 0.0, + "num_input_tokens_seen": 95605072, + "step": 141870 + }, + { + "epoch": 3.4660298536633034, + "grad_norm": 0.0007697442197240889, + "learning_rate": 5.206945392934004e-07, + "loss": 0.0, + "num_input_tokens_seen": 95608912, + "step": 141875 + }, + { + "epoch": 3.4661520044951506, + "grad_norm": 0.00639304518699646, + "learning_rate": 5.206196973088884e-07, + "loss": 0.0096, + "num_input_tokens_seen": 95614480, + "step": 141880 + }, + { + "epoch": 3.466274155326998, + "grad_norm": 0.0009124985663220286, + "learning_rate": 5.205448588105592e-07, + "loss": 0.0477, + "num_input_tokens_seen": 95618192, + "step": 141885 + }, + { + "epoch": 3.466396306158845, + "grad_norm": 0.048047225922346115, + "learning_rate": 5.204700237989563e-07, + "loss": 0.0, + "num_input_tokens_seen": 95620944, + "step": 141890 + }, + { + "epoch": 3.466518456990692, + "grad_norm": 0.007053458597511053, + "learning_rate": 5.203951922746249e-07, + "loss": 0.0, + "num_input_tokens_seen": 95624400, + "step": 141895 + }, + { + "epoch": 3.4666406078225394, + "grad_norm": 0.009079281240701675, + "learning_rate": 5.203203642381083e-07, + "loss": 0.0, + "num_input_tokens_seen": 95627664, + "step": 141900 + }, + { + "epoch": 3.4667627586543865, + "grad_norm": 0.0011568315094336867, + "learning_rate": 5.202455396899508e-07, + "loss": 0.0, + "num_input_tokens_seen": 95630928, + "step": 141905 + }, + { + "epoch": 3.4668849094862337, + "grad_norm": 8.400007209274918e-05, + "learning_rate": 5.201707186306974e-07, + "loss": 0.0, + "num_input_tokens_seen": 95633808, + "step": 141910 + }, + { + "epoch": 3.467007060318081, + "grad_norm": 0.004403356928378344, + "learning_rate": 5.20095901060891e-07, + "loss": 0.0, + "num_input_tokens_seen": 95637648, + "step": 141915 + }, + { + "epoch": 3.467129211149928, + "grad_norm": 0.0006242926465347409, + "learning_rate": 5.200210869810768e-07, + "loss": 0.0002, + "num_input_tokens_seen": 95641296, + "step": 141920 + }, + { + "epoch": 3.4672513619817753, + "grad_norm": 0.40456339716911316, + "learning_rate": 5.19946276391798e-07, + "loss": 0.0001, + "num_input_tokens_seen": 95644624, + "step": 141925 + }, + { + "epoch": 3.467373512813622, + "grad_norm": 0.0003231973387300968, + "learning_rate": 5.198714692935993e-07, + "loss": 0.0, + "num_input_tokens_seen": 95647888, + "step": 141930 + }, + { + "epoch": 3.4674956636454692, + "grad_norm": 0.020619744434952736, + "learning_rate": 5.19796665687024e-07, + "loss": 0.0, + "num_input_tokens_seen": 95651216, + "step": 141935 + }, + { + "epoch": 3.4676178144773164, + "grad_norm": 0.011790621094405651, + "learning_rate": 5.197218655726171e-07, + "loss": 0.0, + "num_input_tokens_seen": 95654544, + "step": 141940 + }, + { + "epoch": 3.4677399653091636, + "grad_norm": 0.0007443840149790049, + "learning_rate": 5.196470689509218e-07, + "loss": 0.0, + "num_input_tokens_seen": 95658192, + "step": 141945 + }, + { + "epoch": 3.467862116141011, + "grad_norm": 0.01050039753317833, + "learning_rate": 5.195722758224819e-07, + "loss": 0.0, + "num_input_tokens_seen": 95662352, + "step": 141950 + }, + { + "epoch": 3.467984266972858, + "grad_norm": 0.005819221027195454, + "learning_rate": 5.19497486187842e-07, + "loss": 0.0, + "num_input_tokens_seen": 95665744, + "step": 141955 + }, + { + "epoch": 3.468106417804705, + "grad_norm": 0.0011456216452643275, + "learning_rate": 5.19422700047545e-07, + "loss": 0.0, + "num_input_tokens_seen": 95669200, + "step": 141960 + }, + { + "epoch": 3.4682285686365524, + "grad_norm": 0.15970462560653687, + "learning_rate": 5.193479174021358e-07, + "loss": 0.0, + "num_input_tokens_seen": 95672592, + "step": 141965 + }, + { + "epoch": 3.4683507194683996, + "grad_norm": 0.0024676870089024305, + "learning_rate": 5.19273138252158e-07, + "loss": 0.0675, + "num_input_tokens_seen": 95676368, + "step": 141970 + }, + { + "epoch": 3.4684728703002468, + "grad_norm": 0.002797144465148449, + "learning_rate": 5.191983625981549e-07, + "loss": 0.0, + "num_input_tokens_seen": 95679632, + "step": 141975 + }, + { + "epoch": 3.468595021132094, + "grad_norm": 0.05274736136198044, + "learning_rate": 5.191235904406709e-07, + "loss": 0.0039, + "num_input_tokens_seen": 95683088, + "step": 141980 + }, + { + "epoch": 3.468717171963941, + "grad_norm": 0.011775636114180088, + "learning_rate": 5.190488217802492e-07, + "loss": 0.0, + "num_input_tokens_seen": 95686672, + "step": 141985 + }, + { + "epoch": 3.4688393227957883, + "grad_norm": 0.0019723197910934687, + "learning_rate": 5.189740566174341e-07, + "loss": 0.0, + "num_input_tokens_seen": 95690512, + "step": 141990 + }, + { + "epoch": 3.4689614736276355, + "grad_norm": 0.0013073725858703256, + "learning_rate": 5.188992949527688e-07, + "loss": 0.0399, + "num_input_tokens_seen": 95694096, + "step": 141995 + }, + { + "epoch": 3.4690836244594827, + "grad_norm": 0.009154350496828556, + "learning_rate": 5.188245367867971e-07, + "loss": 0.0009, + "num_input_tokens_seen": 95697296, + "step": 142000 + }, + { + "epoch": 3.46920577529133, + "grad_norm": 19.28621482849121, + "learning_rate": 5.187497821200633e-07, + "loss": 0.0556, + "num_input_tokens_seen": 95700688, + "step": 142005 + }, + { + "epoch": 3.469327926123177, + "grad_norm": 0.016265923157334328, + "learning_rate": 5.1867503095311e-07, + "loss": 0.0, + "num_input_tokens_seen": 95704080, + "step": 142010 + }, + { + "epoch": 3.469450076955024, + "grad_norm": 0.0036928714253008366, + "learning_rate": 5.186002832864819e-07, + "loss": 0.0399, + "num_input_tokens_seen": 95707408, + "step": 142015 + }, + { + "epoch": 3.4695722277868715, + "grad_norm": 3.3985830668825656e-05, + "learning_rate": 5.185255391207215e-07, + "loss": 0.0001, + "num_input_tokens_seen": 95710800, + "step": 142020 + }, + { + "epoch": 3.469694378618718, + "grad_norm": 0.009393775835633278, + "learning_rate": 5.18450798456373e-07, + "loss": 0.0, + "num_input_tokens_seen": 95714192, + "step": 142025 + }, + { + "epoch": 3.4698165294505654, + "grad_norm": 0.001170952571555972, + "learning_rate": 5.1837606129398e-07, + "loss": 0.0, + "num_input_tokens_seen": 95717456, + "step": 142030 + }, + { + "epoch": 3.4699386802824126, + "grad_norm": 0.0034615262411534786, + "learning_rate": 5.183013276340859e-07, + "loss": 0.0, + "num_input_tokens_seen": 95720528, + "step": 142035 + }, + { + "epoch": 3.47006083111426, + "grad_norm": 0.005982245784252882, + "learning_rate": 5.182265974772339e-07, + "loss": 0.0001, + "num_input_tokens_seen": 95724048, + "step": 142040 + }, + { + "epoch": 3.470182981946107, + "grad_norm": 0.001264628954231739, + "learning_rate": 5.181518708239679e-07, + "loss": 0.0444, + "num_input_tokens_seen": 95727312, + "step": 142045 + }, + { + "epoch": 3.470305132777954, + "grad_norm": 0.0009982496267184615, + "learning_rate": 5.180771476748307e-07, + "loss": 0.0, + "num_input_tokens_seen": 95730640, + "step": 142050 + }, + { + "epoch": 3.4704272836098013, + "grad_norm": 0.0025757618714123964, + "learning_rate": 5.180024280303665e-07, + "loss": 0.0, + "num_input_tokens_seen": 95733648, + "step": 142055 + }, + { + "epoch": 3.4705494344416485, + "grad_norm": 0.00035017167101614177, + "learning_rate": 5.17927711891118e-07, + "loss": 0.0, + "num_input_tokens_seen": 95737232, + "step": 142060 + }, + { + "epoch": 3.4706715852734957, + "grad_norm": 2.5826502678683028e-05, + "learning_rate": 5.178529992576291e-07, + "loss": 0.0079, + "num_input_tokens_seen": 95740368, + "step": 142065 + }, + { + "epoch": 3.470793736105343, + "grad_norm": 3.1048228265717626e-05, + "learning_rate": 5.177782901304426e-07, + "loss": 0.0, + "num_input_tokens_seen": 95743568, + "step": 142070 + }, + { + "epoch": 3.47091588693719, + "grad_norm": 15.827798843383789, + "learning_rate": 5.177035845101023e-07, + "loss": 0.0422, + "num_input_tokens_seen": 95747088, + "step": 142075 + }, + { + "epoch": 3.4710380377690373, + "grad_norm": 0.003537968033924699, + "learning_rate": 5.176288823971511e-07, + "loss": 0.0001, + "num_input_tokens_seen": 95750480, + "step": 142080 + }, + { + "epoch": 3.4711601886008845, + "grad_norm": 0.0037160757929086685, + "learning_rate": 5.175541837921326e-07, + "loss": 0.0, + "num_input_tokens_seen": 95753488, + "step": 142085 + }, + { + "epoch": 3.4712823394327317, + "grad_norm": 0.0007948851562105119, + "learning_rate": 5.174794886955895e-07, + "loss": 0.0001, + "num_input_tokens_seen": 95756880, + "step": 142090 + }, + { + "epoch": 3.471404490264579, + "grad_norm": 0.0015356955118477345, + "learning_rate": 5.174047971080653e-07, + "loss": 0.0, + "num_input_tokens_seen": 95760144, + "step": 142095 + }, + { + "epoch": 3.471526641096426, + "grad_norm": 0.0012752095935866237, + "learning_rate": 5.173301090301036e-07, + "loss": 0.0002, + "num_input_tokens_seen": 95764112, + "step": 142100 + }, + { + "epoch": 3.4716487919282732, + "grad_norm": 0.071896031498909, + "learning_rate": 5.172554244622469e-07, + "loss": 0.0, + "num_input_tokens_seen": 95767568, + "step": 142105 + }, + { + "epoch": 3.47177094276012, + "grad_norm": 0.0005444808048196137, + "learning_rate": 5.171807434050389e-07, + "loss": 0.0, + "num_input_tokens_seen": 95771216, + "step": 142110 + }, + { + "epoch": 3.471893093591967, + "grad_norm": 0.01946081407368183, + "learning_rate": 5.17106065859022e-07, + "loss": 0.012, + "num_input_tokens_seen": 95774352, + "step": 142115 + }, + { + "epoch": 3.4720152444238144, + "grad_norm": 0.021580923348665237, + "learning_rate": 5.170313918247397e-07, + "loss": 0.0, + "num_input_tokens_seen": 95778000, + "step": 142120 + }, + { + "epoch": 3.4721373952556616, + "grad_norm": 0.005905607715249062, + "learning_rate": 5.169567213027355e-07, + "loss": 0.0001, + "num_input_tokens_seen": 95781456, + "step": 142125 + }, + { + "epoch": 3.4722595460875088, + "grad_norm": 0.003950274549424648, + "learning_rate": 5.168820542935514e-07, + "loss": 0.0001, + "num_input_tokens_seen": 95785168, + "step": 142130 + }, + { + "epoch": 3.472381696919356, + "grad_norm": 0.0005195545381866395, + "learning_rate": 5.168073907977315e-07, + "loss": 0.0, + "num_input_tokens_seen": 95790480, + "step": 142135 + }, + { + "epoch": 3.472503847751203, + "grad_norm": 0.10520616173744202, + "learning_rate": 5.167327308158177e-07, + "loss": 0.0, + "num_input_tokens_seen": 95793744, + "step": 142140 + }, + { + "epoch": 3.4726259985830503, + "grad_norm": 0.0003425517934374511, + "learning_rate": 5.166580743483539e-07, + "loss": 0.0, + "num_input_tokens_seen": 95797200, + "step": 142145 + }, + { + "epoch": 3.4727481494148975, + "grad_norm": 0.0060327257961034775, + "learning_rate": 5.165834213958825e-07, + "loss": 0.0, + "num_input_tokens_seen": 95800528, + "step": 142150 + }, + { + "epoch": 3.4728703002467447, + "grad_norm": 0.019918235018849373, + "learning_rate": 5.165087719589462e-07, + "loss": 0.0938, + "num_input_tokens_seen": 95803728, + "step": 142155 + }, + { + "epoch": 3.472992451078592, + "grad_norm": 0.49219003319740295, + "learning_rate": 5.164341260380885e-07, + "loss": 0.0001, + "num_input_tokens_seen": 95807248, + "step": 142160 + }, + { + "epoch": 3.473114601910439, + "grad_norm": 0.008874580264091492, + "learning_rate": 5.163594836338515e-07, + "loss": 0.0001, + "num_input_tokens_seen": 95810320, + "step": 142165 + }, + { + "epoch": 3.4732367527422863, + "grad_norm": 0.0008690126123838127, + "learning_rate": 5.162848447467789e-07, + "loss": 0.0524, + "num_input_tokens_seen": 95813712, + "step": 142170 + }, + { + "epoch": 3.4733589035741335, + "grad_norm": 0.0009398649563081563, + "learning_rate": 5.162102093774126e-07, + "loss": 0.0382, + "num_input_tokens_seen": 95817232, + "step": 142175 + }, + { + "epoch": 3.4734810544059807, + "grad_norm": 111.28203582763672, + "learning_rate": 5.161355775262957e-07, + "loss": 0.0341, + "num_input_tokens_seen": 95820432, + "step": 142180 + }, + { + "epoch": 3.473603205237828, + "grad_norm": 0.008379044011235237, + "learning_rate": 5.160609491939713e-07, + "loss": 0.0, + "num_input_tokens_seen": 95824272, + "step": 142185 + }, + { + "epoch": 3.473725356069675, + "grad_norm": 0.050469011068344116, + "learning_rate": 5.159863243809816e-07, + "loss": 0.0001, + "num_input_tokens_seen": 95828112, + "step": 142190 + }, + { + "epoch": 3.4738475069015218, + "grad_norm": 0.006850233767181635, + "learning_rate": 5.159117030878699e-07, + "loss": 0.0, + "num_input_tokens_seen": 95831376, + "step": 142195 + }, + { + "epoch": 3.4739696577333694, + "grad_norm": 0.0007184845162555575, + "learning_rate": 5.158370853151783e-07, + "loss": 0.0, + "num_input_tokens_seen": 95834640, + "step": 142200 + }, + { + "epoch": 3.474091808565216, + "grad_norm": 0.0031210724264383316, + "learning_rate": 5.157624710634499e-07, + "loss": 0.0005, + "num_input_tokens_seen": 95838416, + "step": 142205 + }, + { + "epoch": 3.4742139593970633, + "grad_norm": 0.0003850772918667644, + "learning_rate": 5.156878603332265e-07, + "loss": 0.0, + "num_input_tokens_seen": 95841296, + "step": 142210 + }, + { + "epoch": 3.4743361102289105, + "grad_norm": 0.13583695888519287, + "learning_rate": 5.156132531250515e-07, + "loss": 0.0, + "num_input_tokens_seen": 95844880, + "step": 142215 + }, + { + "epoch": 3.4744582610607577, + "grad_norm": 0.0015774507774040103, + "learning_rate": 5.155386494394674e-07, + "loss": 0.0, + "num_input_tokens_seen": 95848528, + "step": 142220 + }, + { + "epoch": 3.474580411892605, + "grad_norm": 0.023258036002516747, + "learning_rate": 5.154640492770161e-07, + "loss": 0.0, + "num_input_tokens_seen": 95851920, + "step": 142225 + }, + { + "epoch": 3.474702562724452, + "grad_norm": 0.0010493238223716617, + "learning_rate": 5.153894526382412e-07, + "loss": 0.0, + "num_input_tokens_seen": 95855440, + "step": 142230 + }, + { + "epoch": 3.4748247135562993, + "grad_norm": 0.001293657929636538, + "learning_rate": 5.153148595236839e-07, + "loss": 0.0001, + "num_input_tokens_seen": 95859088, + "step": 142235 + }, + { + "epoch": 3.4749468643881465, + "grad_norm": 0.11125042289495468, + "learning_rate": 5.152402699338878e-07, + "loss": 0.0001, + "num_input_tokens_seen": 95862032, + "step": 142240 + }, + { + "epoch": 3.4750690152199937, + "grad_norm": 0.00025333481607958674, + "learning_rate": 5.151656838693945e-07, + "loss": 0.0, + "num_input_tokens_seen": 95865616, + "step": 142245 + }, + { + "epoch": 3.475191166051841, + "grad_norm": 0.0008209923398680985, + "learning_rate": 5.150911013307471e-07, + "loss": 0.0, + "num_input_tokens_seen": 95869264, + "step": 142250 + }, + { + "epoch": 3.475313316883688, + "grad_norm": 0.00028186949202790856, + "learning_rate": 5.150165223184877e-07, + "loss": 0.0, + "num_input_tokens_seen": 95873488, + "step": 142255 + }, + { + "epoch": 3.4754354677155352, + "grad_norm": 0.001031126594170928, + "learning_rate": 5.149419468331582e-07, + "loss": 0.0754, + "num_input_tokens_seen": 95876944, + "step": 142260 + }, + { + "epoch": 3.4755576185473824, + "grad_norm": 41.8818244934082, + "learning_rate": 5.148673748753017e-07, + "loss": 0.0454, + "num_input_tokens_seen": 95880080, + "step": 142265 + }, + { + "epoch": 3.4756797693792296, + "grad_norm": 22.44657325744629, + "learning_rate": 5.147928064454597e-07, + "loss": 0.1032, + "num_input_tokens_seen": 95883472, + "step": 142270 + }, + { + "epoch": 3.475801920211077, + "grad_norm": 0.0021455264650285244, + "learning_rate": 5.147182415441749e-07, + "loss": 0.0, + "num_input_tokens_seen": 95886992, + "step": 142275 + }, + { + "epoch": 3.4759240710429236, + "grad_norm": 0.0005806908593513072, + "learning_rate": 5.1464368017199e-07, + "loss": 0.0422, + "num_input_tokens_seen": 95890384, + "step": 142280 + }, + { + "epoch": 3.476046221874771, + "grad_norm": 0.0041892873123288155, + "learning_rate": 5.145691223294464e-07, + "loss": 0.0001, + "num_input_tokens_seen": 95893712, + "step": 142285 + }, + { + "epoch": 3.476168372706618, + "grad_norm": 0.005277763120830059, + "learning_rate": 5.144945680170871e-07, + "loss": 0.0001, + "num_input_tokens_seen": 95897168, + "step": 142290 + }, + { + "epoch": 3.476290523538465, + "grad_norm": 0.009297845885157585, + "learning_rate": 5.144200172354534e-07, + "loss": 0.0, + "num_input_tokens_seen": 95900624, + "step": 142295 + }, + { + "epoch": 3.4764126743703123, + "grad_norm": 0.01249323133379221, + "learning_rate": 5.143454699850884e-07, + "loss": 0.0002, + "num_input_tokens_seen": 95904016, + "step": 142300 + }, + { + "epoch": 3.4765348252021595, + "grad_norm": 0.002345341257750988, + "learning_rate": 5.142709262665334e-07, + "loss": 0.0001, + "num_input_tokens_seen": 95907600, + "step": 142305 + }, + { + "epoch": 3.4766569760340067, + "grad_norm": 0.008824708871543407, + "learning_rate": 5.14196386080331e-07, + "loss": 0.0, + "num_input_tokens_seen": 95910736, + "step": 142310 + }, + { + "epoch": 3.476779126865854, + "grad_norm": 0.009206004440784454, + "learning_rate": 5.141218494270234e-07, + "loss": 0.0, + "num_input_tokens_seen": 95914640, + "step": 142315 + }, + { + "epoch": 3.476901277697701, + "grad_norm": 0.09007237106561661, + "learning_rate": 5.14047316307152e-07, + "loss": 0.0001, + "num_input_tokens_seen": 95918032, + "step": 142320 + }, + { + "epoch": 3.4770234285295483, + "grad_norm": 0.0011511669727042317, + "learning_rate": 5.139727867212596e-07, + "loss": 0.0, + "num_input_tokens_seen": 95921808, + "step": 142325 + }, + { + "epoch": 3.4771455793613955, + "grad_norm": 0.014105385169386864, + "learning_rate": 5.138982606698876e-07, + "loss": 0.0026, + "num_input_tokens_seen": 95925328, + "step": 142330 + }, + { + "epoch": 3.4772677301932426, + "grad_norm": 0.00038664639578200877, + "learning_rate": 5.13823738153578e-07, + "loss": 0.0, + "num_input_tokens_seen": 95928528, + "step": 142335 + }, + { + "epoch": 3.47738988102509, + "grad_norm": 0.0021773031912744045, + "learning_rate": 5.137492191728734e-07, + "loss": 0.0695, + "num_input_tokens_seen": 95931408, + "step": 142340 + }, + { + "epoch": 3.477512031856937, + "grad_norm": 0.0014116725651547313, + "learning_rate": 5.136747037283149e-07, + "loss": 0.0, + "num_input_tokens_seen": 95935120, + "step": 142345 + }, + { + "epoch": 3.477634182688784, + "grad_norm": 0.030428191646933556, + "learning_rate": 5.136001918204451e-07, + "loss": 0.0439, + "num_input_tokens_seen": 95938576, + "step": 142350 + }, + { + "epoch": 3.4777563335206314, + "grad_norm": 0.0033477810211479664, + "learning_rate": 5.135256834498054e-07, + "loss": 0.0, + "num_input_tokens_seen": 95942096, + "step": 142355 + }, + { + "epoch": 3.4778784843524786, + "grad_norm": 8.172683010343462e-05, + "learning_rate": 5.134511786169376e-07, + "loss": 0.0, + "num_input_tokens_seen": 95945360, + "step": 142360 + }, + { + "epoch": 3.478000635184326, + "grad_norm": 0.006972254253923893, + "learning_rate": 5.133766773223839e-07, + "loss": 0.0, + "num_input_tokens_seen": 95948688, + "step": 142365 + }, + { + "epoch": 3.478122786016173, + "grad_norm": 0.0022087888792157173, + "learning_rate": 5.133021795666858e-07, + "loss": 0.0002, + "num_input_tokens_seen": 95952144, + "step": 142370 + }, + { + "epoch": 3.4782449368480197, + "grad_norm": 0.001995604485273361, + "learning_rate": 5.132276853503853e-07, + "loss": 0.0, + "num_input_tokens_seen": 95955472, + "step": 142375 + }, + { + "epoch": 3.478367087679867, + "grad_norm": 0.009456205181777477, + "learning_rate": 5.131531946740238e-07, + "loss": 0.0, + "num_input_tokens_seen": 95958736, + "step": 142380 + }, + { + "epoch": 3.478489238511714, + "grad_norm": 3.635671964730136e-05, + "learning_rate": 5.130787075381433e-07, + "loss": 0.0001, + "num_input_tokens_seen": 95962256, + "step": 142385 + }, + { + "epoch": 3.4786113893435613, + "grad_norm": 0.0007822351763024926, + "learning_rate": 5.130042239432853e-07, + "loss": 0.0, + "num_input_tokens_seen": 95965328, + "step": 142390 + }, + { + "epoch": 3.4787335401754085, + "grad_norm": 0.0020497075747698545, + "learning_rate": 5.129297438899918e-07, + "loss": 0.0, + "num_input_tokens_seen": 95968848, + "step": 142395 + }, + { + "epoch": 3.4788556910072557, + "grad_norm": 0.0031505257356911898, + "learning_rate": 5.128552673788038e-07, + "loss": 0.0, + "num_input_tokens_seen": 95972176, + "step": 142400 + }, + { + "epoch": 3.478977841839103, + "grad_norm": 0.006290497723966837, + "learning_rate": 5.127807944102634e-07, + "loss": 0.0, + "num_input_tokens_seen": 95975056, + "step": 142405 + }, + { + "epoch": 3.47909999267095, + "grad_norm": 0.004650265444070101, + "learning_rate": 5.127063249849125e-07, + "loss": 0.0, + "num_input_tokens_seen": 95978704, + "step": 142410 + }, + { + "epoch": 3.4792221435027972, + "grad_norm": 0.0017632690723985434, + "learning_rate": 5.126318591032919e-07, + "loss": 0.0166, + "num_input_tokens_seen": 95982608, + "step": 142415 + }, + { + "epoch": 3.4793442943346444, + "grad_norm": 0.0033946416806429625, + "learning_rate": 5.125573967659437e-07, + "loss": 0.0, + "num_input_tokens_seen": 95986640, + "step": 142420 + }, + { + "epoch": 3.4794664451664916, + "grad_norm": 0.003886127145960927, + "learning_rate": 5.124829379734091e-07, + "loss": 0.0001, + "num_input_tokens_seen": 95990096, + "step": 142425 + }, + { + "epoch": 3.479588595998339, + "grad_norm": 0.004299870692193508, + "learning_rate": 5.124084827262297e-07, + "loss": 0.0, + "num_input_tokens_seen": 95993168, + "step": 142430 + }, + { + "epoch": 3.479710746830186, + "grad_norm": 0.0008910160977393389, + "learning_rate": 5.123340310249471e-07, + "loss": 0.0, + "num_input_tokens_seen": 95996816, + "step": 142435 + }, + { + "epoch": 3.479832897662033, + "grad_norm": 0.00021660601487383246, + "learning_rate": 5.122595828701024e-07, + "loss": 0.0, + "num_input_tokens_seen": 95999952, + "step": 142440 + }, + { + "epoch": 3.4799550484938804, + "grad_norm": 0.0003907645004801452, + "learning_rate": 5.121851382622375e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96003408, + "step": 142445 + }, + { + "epoch": 3.4800771993257276, + "grad_norm": 0.0013325160834938288, + "learning_rate": 5.121106972018931e-07, + "loss": 0.0, + "num_input_tokens_seen": 96006800, + "step": 142450 + }, + { + "epoch": 3.4801993501575748, + "grad_norm": 0.0025354530662298203, + "learning_rate": 5.120362596896115e-07, + "loss": 0.0, + "num_input_tokens_seen": 96010384, + "step": 142455 + }, + { + "epoch": 3.4803215009894215, + "grad_norm": 0.0005585558828897774, + "learning_rate": 5.119618257259333e-07, + "loss": 0.0, + "num_input_tokens_seen": 96013712, + "step": 142460 + }, + { + "epoch": 3.480443651821269, + "grad_norm": 0.0003892200766131282, + "learning_rate": 5.118873953113995e-07, + "loss": 0.0192, + "num_input_tokens_seen": 96017168, + "step": 142465 + }, + { + "epoch": 3.480565802653116, + "grad_norm": 0.0006179861375130713, + "learning_rate": 5.118129684465524e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96020688, + "step": 142470 + }, + { + "epoch": 3.480687953484963, + "grad_norm": 0.0033679294865578413, + "learning_rate": 5.117385451319322e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96023632, + "step": 142475 + }, + { + "epoch": 3.4808101043168103, + "grad_norm": 0.004632589407265186, + "learning_rate": 5.116641253680811e-07, + "loss": 0.0, + "num_input_tokens_seen": 96027024, + "step": 142480 + }, + { + "epoch": 3.4809322551486575, + "grad_norm": 0.0003384651499800384, + "learning_rate": 5.115897091555394e-07, + "loss": 0.0343, + "num_input_tokens_seen": 96030224, + "step": 142485 + }, + { + "epoch": 3.4810544059805046, + "grad_norm": 0.0016163821564987302, + "learning_rate": 5.115152964948487e-07, + "loss": 0.0, + "num_input_tokens_seen": 96033616, + "step": 142490 + }, + { + "epoch": 3.481176556812352, + "grad_norm": 0.002763167954981327, + "learning_rate": 5.114408873865505e-07, + "loss": 0.0, + "num_input_tokens_seen": 96036944, + "step": 142495 + }, + { + "epoch": 3.481298707644199, + "grad_norm": 0.17083217203617096, + "learning_rate": 5.113664818311852e-07, + "loss": 0.0, + "num_input_tokens_seen": 96040272, + "step": 142500 + }, + { + "epoch": 3.481420858476046, + "grad_norm": 0.03899205103516579, + "learning_rate": 5.112920798292947e-07, + "loss": 0.0, + "num_input_tokens_seen": 96043792, + "step": 142505 + }, + { + "epoch": 3.4815430093078934, + "grad_norm": 0.03535054251551628, + "learning_rate": 5.112176813814193e-07, + "loss": 0.0, + "num_input_tokens_seen": 96046928, + "step": 142510 + }, + { + "epoch": 3.4816651601397406, + "grad_norm": 0.00012026626791339368, + "learning_rate": 5.111432864881007e-07, + "loss": 0.0305, + "num_input_tokens_seen": 96049936, + "step": 142515 + }, + { + "epoch": 3.481787310971588, + "grad_norm": 0.005253299605101347, + "learning_rate": 5.110688951498792e-07, + "loss": 0.0, + "num_input_tokens_seen": 96053520, + "step": 142520 + }, + { + "epoch": 3.481909461803435, + "grad_norm": 0.15855756402015686, + "learning_rate": 5.109945073672963e-07, + "loss": 0.0, + "num_input_tokens_seen": 96056528, + "step": 142525 + }, + { + "epoch": 3.482031612635282, + "grad_norm": 4.489319690037519e-05, + "learning_rate": 5.109201231408931e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96060240, + "step": 142530 + }, + { + "epoch": 3.4821537634671293, + "grad_norm": 0.0007414538995362818, + "learning_rate": 5.1084574247121e-07, + "loss": 0.0, + "num_input_tokens_seen": 96064016, + "step": 142535 + }, + { + "epoch": 3.4822759142989765, + "grad_norm": 0.005368458107113838, + "learning_rate": 5.107713653587886e-07, + "loss": 0.0, + "num_input_tokens_seen": 96067472, + "step": 142540 + }, + { + "epoch": 3.4823980651308237, + "grad_norm": 0.49003827571868896, + "learning_rate": 5.106969918041692e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96070736, + "step": 142545 + }, + { + "epoch": 3.482520215962671, + "grad_norm": 0.009398811496794224, + "learning_rate": 5.106226218078931e-07, + "loss": 0.0, + "num_input_tokens_seen": 96074192, + "step": 142550 + }, + { + "epoch": 3.4826423667945177, + "grad_norm": 0.0036050749476999044, + "learning_rate": 5.105482553705005e-07, + "loss": 0.0321, + "num_input_tokens_seen": 96077328, + "step": 142555 + }, + { + "epoch": 3.482764517626365, + "grad_norm": 0.0015523568727076054, + "learning_rate": 5.104738924925331e-07, + "loss": 0.0, + "num_input_tokens_seen": 96080464, + "step": 142560 + }, + { + "epoch": 3.482886668458212, + "grad_norm": 0.0035881816875189543, + "learning_rate": 5.103995331745313e-07, + "loss": 0.0, + "num_input_tokens_seen": 96083792, + "step": 142565 + }, + { + "epoch": 3.4830088192900592, + "grad_norm": 0.004073690623044968, + "learning_rate": 5.103251774170352e-07, + "loss": 0.0, + "num_input_tokens_seen": 96087376, + "step": 142570 + }, + { + "epoch": 3.4831309701219064, + "grad_norm": 0.0024040727876126766, + "learning_rate": 5.102508252205866e-07, + "loss": 0.0, + "num_input_tokens_seen": 96090448, + "step": 142575 + }, + { + "epoch": 3.4832531209537536, + "grad_norm": 0.001509516965597868, + "learning_rate": 5.101764765857254e-07, + "loss": 0.0291, + "num_input_tokens_seen": 96093584, + "step": 142580 + }, + { + "epoch": 3.483375271785601, + "grad_norm": 0.054433681070804596, + "learning_rate": 5.101021315129925e-07, + "loss": 0.0, + "num_input_tokens_seen": 96096976, + "step": 142585 + }, + { + "epoch": 3.483497422617448, + "grad_norm": 0.00018331894534640014, + "learning_rate": 5.10027790002929e-07, + "loss": 0.0, + "num_input_tokens_seen": 96100880, + "step": 142590 + }, + { + "epoch": 3.483619573449295, + "grad_norm": 0.003632371546700597, + "learning_rate": 5.099534520560751e-07, + "loss": 0.0, + "num_input_tokens_seen": 96104272, + "step": 142595 + }, + { + "epoch": 3.4837417242811424, + "grad_norm": 0.00011348898988217115, + "learning_rate": 5.098791176729716e-07, + "loss": 0.0524, + "num_input_tokens_seen": 96107216, + "step": 142600 + }, + { + "epoch": 3.4838638751129896, + "grad_norm": 0.0023140916600823402, + "learning_rate": 5.098047868541587e-07, + "loss": 0.0284, + "num_input_tokens_seen": 96110480, + "step": 142605 + }, + { + "epoch": 3.4839860259448368, + "grad_norm": 0.00012285502452868968, + "learning_rate": 5.097304596001777e-07, + "loss": 0.0339, + "num_input_tokens_seen": 96113936, + "step": 142610 + }, + { + "epoch": 3.484108176776684, + "grad_norm": 0.001920671435073018, + "learning_rate": 5.096561359115682e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96117072, + "step": 142615 + }, + { + "epoch": 3.484230327608531, + "grad_norm": 0.013230564072728157, + "learning_rate": 5.095818157888712e-07, + "loss": 0.0, + "num_input_tokens_seen": 96120272, + "step": 142620 + }, + { + "epoch": 3.4843524784403783, + "grad_norm": 2.9572129249572754, + "learning_rate": 5.095074992326274e-07, + "loss": 0.0531, + "num_input_tokens_seen": 96123600, + "step": 142625 + }, + { + "epoch": 3.4844746292722255, + "grad_norm": 0.03145867958664894, + "learning_rate": 5.094331862433768e-07, + "loss": 0.0, + "num_input_tokens_seen": 96127376, + "step": 142630 + }, + { + "epoch": 3.4845967801040727, + "grad_norm": 0.01226538885384798, + "learning_rate": 5.093588768216602e-07, + "loss": 0.0546, + "num_input_tokens_seen": 96131088, + "step": 142635 + }, + { + "epoch": 3.4847189309359194, + "grad_norm": 0.17331823706626892, + "learning_rate": 5.092845709680176e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96134224, + "step": 142640 + }, + { + "epoch": 3.484841081767767, + "grad_norm": 0.005320474039763212, + "learning_rate": 5.092102686829896e-07, + "loss": 0.0, + "num_input_tokens_seen": 96137296, + "step": 142645 + }, + { + "epoch": 3.484963232599614, + "grad_norm": 0.0061260429210960865, + "learning_rate": 5.091359699671168e-07, + "loss": 0.0, + "num_input_tokens_seen": 96140944, + "step": 142650 + }, + { + "epoch": 3.485085383431461, + "grad_norm": 0.0017569613410159945, + "learning_rate": 5.090616748209388e-07, + "loss": 0.0, + "num_input_tokens_seen": 96144464, + "step": 142655 + }, + { + "epoch": 3.485207534263308, + "grad_norm": 0.000533196609467268, + "learning_rate": 5.089873832449969e-07, + "loss": 0.0, + "num_input_tokens_seen": 96147792, + "step": 142660 + }, + { + "epoch": 3.4853296850951554, + "grad_norm": 0.01179375872015953, + "learning_rate": 5.089130952398308e-07, + "loss": 0.0003, + "num_input_tokens_seen": 96151312, + "step": 142665 + }, + { + "epoch": 3.4854518359270026, + "grad_norm": 0.0049562300555408, + "learning_rate": 5.088388108059802e-07, + "loss": 0.0849, + "num_input_tokens_seen": 96154640, + "step": 142670 + }, + { + "epoch": 3.4855739867588498, + "grad_norm": 0.0013419613242149353, + "learning_rate": 5.087645299439864e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96158032, + "step": 142675 + }, + { + "epoch": 3.485696137590697, + "grad_norm": 0.32624852657318115, + "learning_rate": 5.086902526543889e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96160976, + "step": 142680 + }, + { + "epoch": 3.485818288422544, + "grad_norm": 0.00047253796947188675, + "learning_rate": 5.08615978937728e-07, + "loss": 0.0, + "num_input_tokens_seen": 96164944, + "step": 142685 + }, + { + "epoch": 3.4859404392543913, + "grad_norm": 0.22956159710884094, + "learning_rate": 5.085417087945436e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96168272, + "step": 142690 + }, + { + "epoch": 3.4860625900862385, + "grad_norm": 0.2009653002023697, + "learning_rate": 5.084674422253767e-07, + "loss": 0.0, + "num_input_tokens_seen": 96171984, + "step": 142695 + }, + { + "epoch": 3.4861847409180857, + "grad_norm": 0.001368940225802362, + "learning_rate": 5.083931792307661e-07, + "loss": 0.0, + "num_input_tokens_seen": 96175312, + "step": 142700 + }, + { + "epoch": 3.486306891749933, + "grad_norm": 0.012675776146352291, + "learning_rate": 5.08318919811253e-07, + "loss": 0.049, + "num_input_tokens_seen": 96178768, + "step": 142705 + }, + { + "epoch": 3.48642904258178, + "grad_norm": 0.0061261216178536415, + "learning_rate": 5.082446639673766e-07, + "loss": 0.0, + "num_input_tokens_seen": 96182224, + "step": 142710 + }, + { + "epoch": 3.4865511934136273, + "grad_norm": 0.002247494412586093, + "learning_rate": 5.081704116996773e-07, + "loss": 0.0005, + "num_input_tokens_seen": 96185808, + "step": 142715 + }, + { + "epoch": 3.4866733442454745, + "grad_norm": 0.005125206429511309, + "learning_rate": 5.080961630086954e-07, + "loss": 0.001, + "num_input_tokens_seen": 96189008, + "step": 142720 + }, + { + "epoch": 3.4867954950773212, + "grad_norm": 0.011667453683912754, + "learning_rate": 5.080219178949701e-07, + "loss": 0.0157, + "num_input_tokens_seen": 96192272, + "step": 142725 + }, + { + "epoch": 3.486917645909169, + "grad_norm": 0.01347960066050291, + "learning_rate": 5.079476763590422e-07, + "loss": 0.0, + "num_input_tokens_seen": 96195856, + "step": 142730 + }, + { + "epoch": 3.4870397967410156, + "grad_norm": 0.008868924342095852, + "learning_rate": 5.078734384014507e-07, + "loss": 0.0663, + "num_input_tokens_seen": 96199184, + "step": 142735 + }, + { + "epoch": 3.487161947572863, + "grad_norm": 0.15017327666282654, + "learning_rate": 5.07799204022736e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96202320, + "step": 142740 + }, + { + "epoch": 3.48728409840471, + "grad_norm": 0.0016383582260459661, + "learning_rate": 5.077249732234381e-07, + "loss": 0.0, + "num_input_tokens_seen": 96205776, + "step": 142745 + }, + { + "epoch": 3.487406249236557, + "grad_norm": 0.00033586935023777187, + "learning_rate": 5.076507460040964e-07, + "loss": 0.0, + "num_input_tokens_seen": 96208912, + "step": 142750 + }, + { + "epoch": 3.4875284000684044, + "grad_norm": 0.0009348663734272122, + "learning_rate": 5.075765223652511e-07, + "loss": 0.0, + "num_input_tokens_seen": 96212048, + "step": 142755 + }, + { + "epoch": 3.4876505509002516, + "grad_norm": 0.043417029082775116, + "learning_rate": 5.075023023074415e-07, + "loss": 0.0, + "num_input_tokens_seen": 96216016, + "step": 142760 + }, + { + "epoch": 3.4877727017320987, + "grad_norm": 0.0003841420984826982, + "learning_rate": 5.07428085831208e-07, + "loss": 0.0, + "num_input_tokens_seen": 96219536, + "step": 142765 + }, + { + "epoch": 3.487894852563946, + "grad_norm": 0.00028211314929649234, + "learning_rate": 5.0735387293709e-07, + "loss": 0.0, + "num_input_tokens_seen": 96223056, + "step": 142770 + }, + { + "epoch": 3.488017003395793, + "grad_norm": 0.0009631828288547695, + "learning_rate": 5.072796636256267e-07, + "loss": 0.0693, + "num_input_tokens_seen": 96226128, + "step": 142775 + }, + { + "epoch": 3.4881391542276403, + "grad_norm": 0.01604449190199375, + "learning_rate": 5.072054578973585e-07, + "loss": 0.0005, + "num_input_tokens_seen": 96229392, + "step": 142780 + }, + { + "epoch": 3.4882613050594875, + "grad_norm": 0.0003695189079735428, + "learning_rate": 5.071312557528244e-07, + "loss": 0.0003, + "num_input_tokens_seen": 96232336, + "step": 142785 + }, + { + "epoch": 3.4883834558913347, + "grad_norm": 0.0021992342080920935, + "learning_rate": 5.07057057192565e-07, + "loss": 0.0, + "num_input_tokens_seen": 96235792, + "step": 142790 + }, + { + "epoch": 3.488505606723182, + "grad_norm": 0.22512054443359375, + "learning_rate": 5.069828622171186e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96238992, + "step": 142795 + }, + { + "epoch": 3.488627757555029, + "grad_norm": 0.02057253010571003, + "learning_rate": 5.06908670827026e-07, + "loss": 0.0, + "num_input_tokens_seen": 96242768, + "step": 142800 + }, + { + "epoch": 3.4887499083868763, + "grad_norm": 0.006819578818976879, + "learning_rate": 5.068344830228257e-07, + "loss": 0.0, + "num_input_tokens_seen": 96245968, + "step": 142805 + }, + { + "epoch": 3.4888720592187235, + "grad_norm": 0.05661904439330101, + "learning_rate": 5.067602988050576e-07, + "loss": 0.0, + "num_input_tokens_seen": 96249360, + "step": 142810 + }, + { + "epoch": 3.4889942100505706, + "grad_norm": 0.0019289409974589944, + "learning_rate": 5.066861181742619e-07, + "loss": 0.0, + "num_input_tokens_seen": 96252432, + "step": 142815 + }, + { + "epoch": 3.4891163608824174, + "grad_norm": 0.001519133453257382, + "learning_rate": 5.066119411309769e-07, + "loss": 0.0, + "num_input_tokens_seen": 96255824, + "step": 142820 + }, + { + "epoch": 3.489238511714265, + "grad_norm": 0.0009457020205445588, + "learning_rate": 5.065377676757428e-07, + "loss": 0.0, + "num_input_tokens_seen": 96258960, + "step": 142825 + }, + { + "epoch": 3.4893606625461118, + "grad_norm": 0.015754206106066704, + "learning_rate": 5.064635978090986e-07, + "loss": 0.0, + "num_input_tokens_seen": 96262160, + "step": 142830 + }, + { + "epoch": 3.489482813377959, + "grad_norm": 0.005095080006867647, + "learning_rate": 5.063894315315837e-07, + "loss": 0.0245, + "num_input_tokens_seen": 96265424, + "step": 142835 + }, + { + "epoch": 3.489604964209806, + "grad_norm": 0.004009610507637262, + "learning_rate": 5.063152688437382e-07, + "loss": 0.0, + "num_input_tokens_seen": 96268752, + "step": 142840 + }, + { + "epoch": 3.4897271150416533, + "grad_norm": 0.004639514721930027, + "learning_rate": 5.062411097461004e-07, + "loss": 0.0, + "num_input_tokens_seen": 96272208, + "step": 142845 + }, + { + "epoch": 3.4898492658735005, + "grad_norm": 0.0006230053259059787, + "learning_rate": 5.061669542392104e-07, + "loss": 0.0, + "num_input_tokens_seen": 96276048, + "step": 142850 + }, + { + "epoch": 3.4899714167053477, + "grad_norm": 2334.111328125, + "learning_rate": 5.060928023236069e-07, + "loss": 0.0918, + "num_input_tokens_seen": 96279312, + "step": 142855 + }, + { + "epoch": 3.490093567537195, + "grad_norm": 0.009122136048972607, + "learning_rate": 5.060186539998295e-07, + "loss": 0.0, + "num_input_tokens_seen": 96282256, + "step": 142860 + }, + { + "epoch": 3.490215718369042, + "grad_norm": 0.004901864565908909, + "learning_rate": 5.059445092684171e-07, + "loss": 0.0, + "num_input_tokens_seen": 96285136, + "step": 142865 + }, + { + "epoch": 3.4903378692008893, + "grad_norm": 33.27792739868164, + "learning_rate": 5.058703681299094e-07, + "loss": 0.0548, + "num_input_tokens_seen": 96288336, + "step": 142870 + }, + { + "epoch": 3.4904600200327365, + "grad_norm": 0.06755754351615906, + "learning_rate": 5.057962305848454e-07, + "loss": 0.0002, + "num_input_tokens_seen": 96291280, + "step": 142875 + }, + { + "epoch": 3.4905821708645837, + "grad_norm": 0.007401663344353437, + "learning_rate": 5.057220966337638e-07, + "loss": 0.0, + "num_input_tokens_seen": 96294800, + "step": 142880 + }, + { + "epoch": 3.490704321696431, + "grad_norm": 0.0013410568935796618, + "learning_rate": 5.056479662772042e-07, + "loss": 0.0008, + "num_input_tokens_seen": 96297808, + "step": 142885 + }, + { + "epoch": 3.490826472528278, + "grad_norm": 0.06911446154117584, + "learning_rate": 5.055738395157055e-07, + "loss": 0.0511, + "num_input_tokens_seen": 96301136, + "step": 142890 + }, + { + "epoch": 3.4909486233601252, + "grad_norm": 0.0003165986272506416, + "learning_rate": 5.054997163498065e-07, + "loss": 0.0, + "num_input_tokens_seen": 96304528, + "step": 142895 + }, + { + "epoch": 3.4910707741919724, + "grad_norm": 0.050810977816581726, + "learning_rate": 5.054255967800471e-07, + "loss": 0.0, + "num_input_tokens_seen": 96307600, + "step": 142900 + }, + { + "epoch": 3.491192925023819, + "grad_norm": 0.009707154706120491, + "learning_rate": 5.053514808069655e-07, + "loss": 0.0, + "num_input_tokens_seen": 96310928, + "step": 142905 + }, + { + "epoch": 3.491315075855667, + "grad_norm": 0.001085141790099442, + "learning_rate": 5.052773684311011e-07, + "loss": 0.0, + "num_input_tokens_seen": 96314512, + "step": 142910 + }, + { + "epoch": 3.4914372266875136, + "grad_norm": 0.002273534657433629, + "learning_rate": 5.052032596529926e-07, + "loss": 0.0, + "num_input_tokens_seen": 96318096, + "step": 142915 + }, + { + "epoch": 3.4915593775193607, + "grad_norm": 0.001621173694729805, + "learning_rate": 5.051291544731794e-07, + "loss": 0.0268, + "num_input_tokens_seen": 96321296, + "step": 142920 + }, + { + "epoch": 3.491681528351208, + "grad_norm": 0.0009241417865268886, + "learning_rate": 5.050550528921998e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96324496, + "step": 142925 + }, + { + "epoch": 3.491803679183055, + "grad_norm": 0.0004545733390841633, + "learning_rate": 5.049809549105928e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96327760, + "step": 142930 + }, + { + "epoch": 3.4919258300149023, + "grad_norm": 0.0037344826851040125, + "learning_rate": 5.049068605288978e-07, + "loss": 0.0002, + "num_input_tokens_seen": 96330640, + "step": 142935 + }, + { + "epoch": 3.4920479808467495, + "grad_norm": 0.0003664505493361503, + "learning_rate": 5.04832769747653e-07, + "loss": 0.0, + "num_input_tokens_seen": 96333584, + "step": 142940 + }, + { + "epoch": 3.4921701316785967, + "grad_norm": 0.07739343494176865, + "learning_rate": 5.047586825673978e-07, + "loss": 0.0016, + "num_input_tokens_seen": 96336848, + "step": 142945 + }, + { + "epoch": 3.492292282510444, + "grad_norm": 0.00023452962341252714, + "learning_rate": 5.046845989886703e-07, + "loss": 0.0, + "num_input_tokens_seen": 96340496, + "step": 142950 + }, + { + "epoch": 3.492414433342291, + "grad_norm": 0.0009708444122225046, + "learning_rate": 5.0461051901201e-07, + "loss": 0.0, + "num_input_tokens_seen": 96343568, + "step": 142955 + }, + { + "epoch": 3.4925365841741383, + "grad_norm": 0.0010330878430977464, + "learning_rate": 5.04536442637955e-07, + "loss": 0.0, + "num_input_tokens_seen": 96347216, + "step": 142960 + }, + { + "epoch": 3.4926587350059854, + "grad_norm": 32.308738708496094, + "learning_rate": 5.044623698670441e-07, + "loss": 0.0489, + "num_input_tokens_seen": 96349968, + "step": 142965 + }, + { + "epoch": 3.4927808858378326, + "grad_norm": 0.007837752811610699, + "learning_rate": 5.043883006998166e-07, + "loss": 0.0, + "num_input_tokens_seen": 96353744, + "step": 142970 + }, + { + "epoch": 3.49290303666968, + "grad_norm": 0.002741006202995777, + "learning_rate": 5.043142351368106e-07, + "loss": 0.0609, + "num_input_tokens_seen": 96356816, + "step": 142975 + }, + { + "epoch": 3.493025187501527, + "grad_norm": 0.0050845337100327015, + "learning_rate": 5.042401731785645e-07, + "loss": 0.0, + "num_input_tokens_seen": 96360400, + "step": 142980 + }, + { + "epoch": 3.493147338333374, + "grad_norm": 0.022080646827816963, + "learning_rate": 5.041661148256175e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96363600, + "step": 142985 + }, + { + "epoch": 3.4932694891652214, + "grad_norm": 0.0163921769708395, + "learning_rate": 5.040920600785075e-07, + "loss": 0.0, + "num_input_tokens_seen": 96367120, + "step": 142990 + }, + { + "epoch": 3.4933916399970686, + "grad_norm": 0.00010943515371764079, + "learning_rate": 5.04018008937774e-07, + "loss": 0.0006, + "num_input_tokens_seen": 96370256, + "step": 142995 + }, + { + "epoch": 3.4935137908289153, + "grad_norm": 0.012624138034880161, + "learning_rate": 5.039439614039543e-07, + "loss": 0.0, + "num_input_tokens_seen": 96373648, + "step": 143000 + }, + { + "epoch": 3.4936359416607625, + "grad_norm": 0.005008554086089134, + "learning_rate": 5.03869917477588e-07, + "loss": 0.0, + "num_input_tokens_seen": 96377232, + "step": 143005 + }, + { + "epoch": 3.4937580924926097, + "grad_norm": 0.021310361102223396, + "learning_rate": 5.037958771592128e-07, + "loss": 0.0, + "num_input_tokens_seen": 96380368, + "step": 143010 + }, + { + "epoch": 3.493880243324457, + "grad_norm": 0.011983363889157772, + "learning_rate": 5.037218404493677e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96384080, + "step": 143015 + }, + { + "epoch": 3.494002394156304, + "grad_norm": 0.022541791200637817, + "learning_rate": 5.036478073485906e-07, + "loss": 0.0, + "num_input_tokens_seen": 96387728, + "step": 143020 + }, + { + "epoch": 3.4941245449881513, + "grad_norm": 0.0031625598203390837, + "learning_rate": 5.035737778574202e-07, + "loss": 0.0, + "num_input_tokens_seen": 96391184, + "step": 143025 + }, + { + "epoch": 3.4942466958199985, + "grad_norm": 21.59922218322754, + "learning_rate": 5.034997519763951e-07, + "loss": 0.0355, + "num_input_tokens_seen": 96394448, + "step": 143030 + }, + { + "epoch": 3.4943688466518457, + "grad_norm": 0.0019981954246759415, + "learning_rate": 5.034257297060529e-07, + "loss": 0.0, + "num_input_tokens_seen": 96398032, + "step": 143035 + }, + { + "epoch": 3.494490997483693, + "grad_norm": 11.361743927001953, + "learning_rate": 5.033517110469327e-07, + "loss": 0.001, + "num_input_tokens_seen": 96401168, + "step": 143040 + }, + { + "epoch": 3.49461314831554, + "grad_norm": 6.632530130445957e-05, + "learning_rate": 5.032776959995721e-07, + "loss": 0.0002, + "num_input_tokens_seen": 96404688, + "step": 143045 + }, + { + "epoch": 3.4947352991473872, + "grad_norm": 0.00037045919452793896, + "learning_rate": 5.032036845645099e-07, + "loss": 0.0, + "num_input_tokens_seen": 96407824, + "step": 143050 + }, + { + "epoch": 3.4948574499792344, + "grad_norm": 0.0032580657862126827, + "learning_rate": 5.031296767422844e-07, + "loss": 0.0842, + "num_input_tokens_seen": 96411088, + "step": 143055 + }, + { + "epoch": 3.4949796008110816, + "grad_norm": 0.013785989955067635, + "learning_rate": 5.030556725334331e-07, + "loss": 0.0, + "num_input_tokens_seen": 96414672, + "step": 143060 + }, + { + "epoch": 3.495101751642929, + "grad_norm": 0.00632657203823328, + "learning_rate": 5.029816719384949e-07, + "loss": 0.0, + "num_input_tokens_seen": 96417808, + "step": 143065 + }, + { + "epoch": 3.495223902474776, + "grad_norm": 0.00011165008618263528, + "learning_rate": 5.029076749580075e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96421264, + "step": 143070 + }, + { + "epoch": 3.495346053306623, + "grad_norm": 0.0034451729152351618, + "learning_rate": 5.028336815925094e-07, + "loss": 0.0, + "num_input_tokens_seen": 96424848, + "step": 143075 + }, + { + "epoch": 3.4954682041384704, + "grad_norm": 0.05073514208197594, + "learning_rate": 5.027596918425386e-07, + "loss": 0.0172, + "num_input_tokens_seen": 96429072, + "step": 143080 + }, + { + "epoch": 3.495590354970317, + "grad_norm": 0.0018720118096098304, + "learning_rate": 5.026857057086325e-07, + "loss": 0.0, + "num_input_tokens_seen": 96432400, + "step": 143085 + }, + { + "epoch": 3.4957125058021647, + "grad_norm": 0.00102695869281888, + "learning_rate": 5.026117231913303e-07, + "loss": 0.0, + "num_input_tokens_seen": 96435600, + "step": 143090 + }, + { + "epoch": 3.4958346566340115, + "grad_norm": 0.002525731222704053, + "learning_rate": 5.025377442911689e-07, + "loss": 0.0, + "num_input_tokens_seen": 96439056, + "step": 143095 + }, + { + "epoch": 3.4959568074658587, + "grad_norm": 176.42254638671875, + "learning_rate": 5.024637690086873e-07, + "loss": 0.1003, + "num_input_tokens_seen": 96442192, + "step": 143100 + }, + { + "epoch": 3.496078958297706, + "grad_norm": 0.001519545796327293, + "learning_rate": 5.023897973444226e-07, + "loss": 0.0, + "num_input_tokens_seen": 96445776, + "step": 143105 + }, + { + "epoch": 3.496201109129553, + "grad_norm": 0.0004945505643263459, + "learning_rate": 5.023158292989135e-07, + "loss": 0.0, + "num_input_tokens_seen": 96449104, + "step": 143110 + }, + { + "epoch": 3.4963232599614003, + "grad_norm": 0.04455680400133133, + "learning_rate": 5.022418648726972e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96451920, + "step": 143115 + }, + { + "epoch": 3.4964454107932474, + "grad_norm": 0.006775304209440947, + "learning_rate": 5.021679040663118e-07, + "loss": 0.0002, + "num_input_tokens_seen": 96454864, + "step": 143120 + }, + { + "epoch": 3.4965675616250946, + "grad_norm": 0.0007572094327770174, + "learning_rate": 5.020939468802958e-07, + "loss": 0.0, + "num_input_tokens_seen": 96458768, + "step": 143125 + }, + { + "epoch": 3.496689712456942, + "grad_norm": 0.000804522424004972, + "learning_rate": 5.020199933151862e-07, + "loss": 0.0, + "num_input_tokens_seen": 96462160, + "step": 143130 + }, + { + "epoch": 3.496811863288789, + "grad_norm": 0.006477277260273695, + "learning_rate": 5.019460433715214e-07, + "loss": 0.0, + "num_input_tokens_seen": 96465296, + "step": 143135 + }, + { + "epoch": 3.496934014120636, + "grad_norm": 0.002193046035245061, + "learning_rate": 5.018720970498387e-07, + "loss": 0.0, + "num_input_tokens_seen": 96468880, + "step": 143140 + }, + { + "epoch": 3.4970561649524834, + "grad_norm": 48.42259216308594, + "learning_rate": 5.01798154350676e-07, + "loss": 0.0269, + "num_input_tokens_seen": 96472208, + "step": 143145 + }, + { + "epoch": 3.4971783157843306, + "grad_norm": 0.009474172256886959, + "learning_rate": 5.017242152745715e-07, + "loss": 0.0, + "num_input_tokens_seen": 96475728, + "step": 143150 + }, + { + "epoch": 3.4973004666161778, + "grad_norm": 0.0001851016713771969, + "learning_rate": 5.016502798220622e-07, + "loss": 0.0, + "num_input_tokens_seen": 96479184, + "step": 143155 + }, + { + "epoch": 3.497422617448025, + "grad_norm": 0.0032584320288151503, + "learning_rate": 5.015763479936865e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96482320, + "step": 143160 + }, + { + "epoch": 3.497544768279872, + "grad_norm": 0.0007037792238406837, + "learning_rate": 5.015024197899812e-07, + "loss": 0.0, + "num_input_tokens_seen": 96485264, + "step": 143165 + }, + { + "epoch": 3.4976669191117193, + "grad_norm": 0.31896787881851196, + "learning_rate": 5.014284952114848e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96488848, + "step": 143170 + }, + { + "epoch": 3.4977890699435665, + "grad_norm": 0.0003134756116196513, + "learning_rate": 5.013545742587341e-07, + "loss": 0.0, + "num_input_tokens_seen": 96492496, + "step": 143175 + }, + { + "epoch": 3.4979112207754133, + "grad_norm": 0.011219010688364506, + "learning_rate": 5.012806569322674e-07, + "loss": 0.0344, + "num_input_tokens_seen": 96496208, + "step": 143180 + }, + { + "epoch": 3.4980333716072605, + "grad_norm": 0.12939979135990143, + "learning_rate": 5.012067432326219e-07, + "loss": 0.0642, + "num_input_tokens_seen": 96499408, + "step": 143185 + }, + { + "epoch": 3.4981555224391077, + "grad_norm": 0.18904073536396027, + "learning_rate": 5.011328331603348e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96502672, + "step": 143190 + }, + { + "epoch": 3.498277673270955, + "grad_norm": 0.0017373156733810902, + "learning_rate": 5.010589267159443e-07, + "loss": 0.0, + "num_input_tokens_seen": 96506000, + "step": 143195 + }, + { + "epoch": 3.498399824102802, + "grad_norm": 0.0005279670003801584, + "learning_rate": 5.00985023899987e-07, + "loss": 0.0, + "num_input_tokens_seen": 96509584, + "step": 143200 + }, + { + "epoch": 3.4985219749346492, + "grad_norm": 0.005878926254808903, + "learning_rate": 5.00911124713001e-07, + "loss": 0.0, + "num_input_tokens_seen": 96512592, + "step": 143205 + }, + { + "epoch": 3.4986441257664964, + "grad_norm": 0.00023229350335896015, + "learning_rate": 5.008372291555238e-07, + "loss": 0.0, + "num_input_tokens_seen": 96516176, + "step": 143210 + }, + { + "epoch": 3.4987662765983436, + "grad_norm": 0.001065135351382196, + "learning_rate": 5.007633372280921e-07, + "loss": 0.0, + "num_input_tokens_seen": 96519440, + "step": 143215 + }, + { + "epoch": 3.498888427430191, + "grad_norm": 0.02967376634478569, + "learning_rate": 5.006894489312442e-07, + "loss": 0.0, + "num_input_tokens_seen": 96522512, + "step": 143220 + }, + { + "epoch": 3.499010578262038, + "grad_norm": 0.0024210901465266943, + "learning_rate": 5.006155642655165e-07, + "loss": 0.0, + "num_input_tokens_seen": 96525840, + "step": 143225 + }, + { + "epoch": 3.499132729093885, + "grad_norm": 0.0014005607226863503, + "learning_rate": 5.005416832314471e-07, + "loss": 0.0, + "num_input_tokens_seen": 96529424, + "step": 143230 + }, + { + "epoch": 3.4992548799257324, + "grad_norm": 0.009456724859774113, + "learning_rate": 5.004678058295726e-07, + "loss": 0.0, + "num_input_tokens_seen": 96532560, + "step": 143235 + }, + { + "epoch": 3.4993770307575796, + "grad_norm": 0.002857289044186473, + "learning_rate": 5.003939320604304e-07, + "loss": 0.0, + "num_input_tokens_seen": 96535760, + "step": 143240 + }, + { + "epoch": 3.4994991815894267, + "grad_norm": 0.0014447863213717937, + "learning_rate": 5.003200619245584e-07, + "loss": 0.0, + "num_input_tokens_seen": 96539152, + "step": 143245 + }, + { + "epoch": 3.499621332421274, + "grad_norm": 0.0642884150147438, + "learning_rate": 5.00246195422493e-07, + "loss": 0.0, + "num_input_tokens_seen": 96542672, + "step": 143250 + }, + { + "epoch": 3.499743483253121, + "grad_norm": 0.0013544763205572963, + "learning_rate": 5.00172332554772e-07, + "loss": 0.0421, + "num_input_tokens_seen": 96546128, + "step": 143255 + }, + { + "epoch": 3.4998656340849683, + "grad_norm": 0.02764141745865345, + "learning_rate": 5.000984733219318e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96549840, + "step": 143260 + }, + { + "epoch": 3.499987784916815, + "grad_norm": 0.001067040953785181, + "learning_rate": 5.000246177245104e-07, + "loss": 0.0, + "num_input_tokens_seen": 96553104, + "step": 143265 + }, + { + "epoch": 3.5001099357486627, + "grad_norm": 0.006503286771476269, + "learning_rate": 4.999507657630441e-07, + "loss": 0.1349, + "num_input_tokens_seen": 96557008, + "step": 143270 + }, + { + "epoch": 3.5002320865805094, + "grad_norm": 7.654829823877662e-05, + "learning_rate": 4.998769174380703e-07, + "loss": 0.0332, + "num_input_tokens_seen": 96560080, + "step": 143275 + }, + { + "epoch": 3.5002565167468793, + "eval_loss": 0.24939614534378052, + "eval_runtime": 47.9318, + "eval_samples_per_second": 759.099, + "eval_steps_per_second": 94.906, + "num_input_tokens_seen": 96560720, + "step": 143276 + }, + { + "epoch": 3.5003542374123566, + "grad_norm": 0.0012859876733273268, + "learning_rate": 4.998030727501263e-07, + "loss": 0.0, + "num_input_tokens_seen": 96563536, + "step": 143280 + }, + { + "epoch": 3.500476388244204, + "grad_norm": 0.002032454591244459, + "learning_rate": 4.997292316997492e-07, + "loss": 0.0563, + "num_input_tokens_seen": 96566736, + "step": 143285 + }, + { + "epoch": 3.500598539076051, + "grad_norm": 0.002231521299108863, + "learning_rate": 4.996553942874751e-07, + "loss": 0.0, + "num_input_tokens_seen": 96570192, + "step": 143290 + }, + { + "epoch": 3.500720689907898, + "grad_norm": 66.57640838623047, + "learning_rate": 4.995815605138419e-07, + "loss": 0.0762, + "num_input_tokens_seen": 96573392, + "step": 143295 + }, + { + "epoch": 3.5008428407397454, + "grad_norm": 0.0034801210276782513, + "learning_rate": 4.995077303793859e-07, + "loss": 0.0, + "num_input_tokens_seen": 96576848, + "step": 143300 + }, + { + "epoch": 3.5009649915715926, + "grad_norm": 0.0004456123278941959, + "learning_rate": 4.994339038846447e-07, + "loss": 0.0, + "num_input_tokens_seen": 96580112, + "step": 143305 + }, + { + "epoch": 3.5010871424034398, + "grad_norm": 0.0003277628275100142, + "learning_rate": 4.993600810301543e-07, + "loss": 0.0, + "num_input_tokens_seen": 96583568, + "step": 143310 + }, + { + "epoch": 3.501209293235287, + "grad_norm": 0.00047519730287604034, + "learning_rate": 4.992862618164525e-07, + "loss": 0.0, + "num_input_tokens_seen": 96586960, + "step": 143315 + }, + { + "epoch": 3.501331444067134, + "grad_norm": 0.001118892221711576, + "learning_rate": 4.992124462440754e-07, + "loss": 0.0016, + "num_input_tokens_seen": 96590224, + "step": 143320 + }, + { + "epoch": 3.5014535948989813, + "grad_norm": 0.0010593585902824998, + "learning_rate": 4.991386343135602e-07, + "loss": 0.0, + "num_input_tokens_seen": 96593680, + "step": 143325 + }, + { + "epoch": 3.5015757457308285, + "grad_norm": 0.00285542756319046, + "learning_rate": 4.990648260254434e-07, + "loss": 0.0, + "num_input_tokens_seen": 96596688, + "step": 143330 + }, + { + "epoch": 3.5016978965626757, + "grad_norm": 0.030459964647889137, + "learning_rate": 4.989910213802618e-07, + "loss": 0.0, + "num_input_tokens_seen": 96599824, + "step": 143335 + }, + { + "epoch": 3.501820047394523, + "grad_norm": 0.0019312178483232856, + "learning_rate": 4.989172203785528e-07, + "loss": 0.0526, + "num_input_tokens_seen": 96603216, + "step": 143340 + }, + { + "epoch": 3.50194219822637, + "grad_norm": 0.0007082682568579912, + "learning_rate": 4.98843423020852e-07, + "loss": 0.0, + "num_input_tokens_seen": 96607248, + "step": 143345 + }, + { + "epoch": 3.502064349058217, + "grad_norm": 0.0170864537358284, + "learning_rate": 4.98769629307697e-07, + "loss": 0.0002, + "num_input_tokens_seen": 96610704, + "step": 143350 + }, + { + "epoch": 3.5021864998900645, + "grad_norm": 0.006353064905852079, + "learning_rate": 4.986958392396239e-07, + "loss": 0.0377, + "num_input_tokens_seen": 96613904, + "step": 143355 + }, + { + "epoch": 3.502308650721911, + "grad_norm": 0.00010134075273526832, + "learning_rate": 4.986220528171692e-07, + "loss": 0.0, + "num_input_tokens_seen": 96617360, + "step": 143360 + }, + { + "epoch": 3.502430801553759, + "grad_norm": 0.0008548396872356534, + "learning_rate": 4.985482700408704e-07, + "loss": 0.0, + "num_input_tokens_seen": 96621008, + "step": 143365 + }, + { + "epoch": 3.5025529523856056, + "grad_norm": 0.073929063975811, + "learning_rate": 4.98474490911263e-07, + "loss": 0.0, + "num_input_tokens_seen": 96624208, + "step": 143370 + }, + { + "epoch": 3.502675103217453, + "grad_norm": 0.0011899123201146722, + "learning_rate": 4.984007154288843e-07, + "loss": 0.0, + "num_input_tokens_seen": 96627920, + "step": 143375 + }, + { + "epoch": 3.5027972540493, + "grad_norm": 0.010921421460807323, + "learning_rate": 4.983269435942702e-07, + "loss": 0.0, + "num_input_tokens_seen": 96631120, + "step": 143380 + }, + { + "epoch": 3.502919404881147, + "grad_norm": 0.0010460818884894252, + "learning_rate": 4.98253175407958e-07, + "loss": 0.0626, + "num_input_tokens_seen": 96634704, + "step": 143385 + }, + { + "epoch": 3.5030415557129944, + "grad_norm": 0.0009477305575273931, + "learning_rate": 4.981794108704834e-07, + "loss": 0.087, + "num_input_tokens_seen": 96637968, + "step": 143390 + }, + { + "epoch": 3.5031637065448415, + "grad_norm": 0.14386871457099915, + "learning_rate": 4.981056499823829e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96641552, + "step": 143395 + }, + { + "epoch": 3.5032858573766887, + "grad_norm": 0.0003375703818164766, + "learning_rate": 4.980318927441934e-07, + "loss": 0.0491, + "num_input_tokens_seen": 96645008, + "step": 143400 + }, + { + "epoch": 3.503408008208536, + "grad_norm": 0.0025346807669848204, + "learning_rate": 4.979581391564507e-07, + "loss": 0.0, + "num_input_tokens_seen": 96648400, + "step": 143405 + }, + { + "epoch": 3.503530159040383, + "grad_norm": 0.009452375583350658, + "learning_rate": 4.978843892196918e-07, + "loss": 0.0, + "num_input_tokens_seen": 96651792, + "step": 143410 + }, + { + "epoch": 3.5036523098722303, + "grad_norm": 0.07168054580688477, + "learning_rate": 4.978106429344523e-07, + "loss": 0.0548, + "num_input_tokens_seen": 96655248, + "step": 143415 + }, + { + "epoch": 3.5037744607040775, + "grad_norm": 0.011760186403989792, + "learning_rate": 4.977369003012691e-07, + "loss": 0.0, + "num_input_tokens_seen": 96658192, + "step": 143420 + }, + { + "epoch": 3.5038966115359247, + "grad_norm": 0.0027387761510908604, + "learning_rate": 4.976631613206781e-07, + "loss": 0.0, + "num_input_tokens_seen": 96661456, + "step": 143425 + }, + { + "epoch": 3.504018762367772, + "grad_norm": 0.0015713631873950362, + "learning_rate": 4.975894259932156e-07, + "loss": 0.0, + "num_input_tokens_seen": 96664912, + "step": 143430 + }, + { + "epoch": 3.5041409131996186, + "grad_norm": 0.0005512706702575088, + "learning_rate": 4.975156943194183e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96667856, + "step": 143435 + }, + { + "epoch": 3.5042630640314663, + "grad_norm": 0.0021763816475868225, + "learning_rate": 4.974419662998216e-07, + "loss": 0.0, + "num_input_tokens_seen": 96671312, + "step": 143440 + }, + { + "epoch": 3.504385214863313, + "grad_norm": 0.009386450052261353, + "learning_rate": 4.973682419349625e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96674832, + "step": 143445 + }, + { + "epoch": 3.5045073656951606, + "grad_norm": 0.00046236676280386746, + "learning_rate": 4.972945212253764e-07, + "loss": 0.031, + "num_input_tokens_seen": 96678032, + "step": 143450 + }, + { + "epoch": 3.5046295165270074, + "grad_norm": 0.003409997094422579, + "learning_rate": 4.972208041715997e-07, + "loss": 0.0, + "num_input_tokens_seen": 96680976, + "step": 143455 + }, + { + "epoch": 3.5047516673588546, + "grad_norm": 0.00525322463363409, + "learning_rate": 4.971470907741691e-07, + "loss": 0.0, + "num_input_tokens_seen": 96684560, + "step": 143460 + }, + { + "epoch": 3.5048738181907018, + "grad_norm": 0.001424750778824091, + "learning_rate": 4.970733810336196e-07, + "loss": 0.0, + "num_input_tokens_seen": 96687888, + "step": 143465 + }, + { + "epoch": 3.504995969022549, + "grad_norm": 0.0075294580310583115, + "learning_rate": 4.96999674950488e-07, + "loss": 0.0, + "num_input_tokens_seen": 96691408, + "step": 143470 + }, + { + "epoch": 3.505118119854396, + "grad_norm": 0.0034216614440083504, + "learning_rate": 4.969259725253098e-07, + "loss": 0.0, + "num_input_tokens_seen": 96694736, + "step": 143475 + }, + { + "epoch": 3.5052402706862433, + "grad_norm": 0.013915733434259892, + "learning_rate": 4.968522737586216e-07, + "loss": 0.0, + "num_input_tokens_seen": 96698192, + "step": 143480 + }, + { + "epoch": 3.5053624215180905, + "grad_norm": 0.000553418998606503, + "learning_rate": 4.967785786509586e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96701904, + "step": 143485 + }, + { + "epoch": 3.5054845723499377, + "grad_norm": 0.08207833021879196, + "learning_rate": 4.967048872028575e-07, + "loss": 0.0, + "num_input_tokens_seen": 96705168, + "step": 143490 + }, + { + "epoch": 3.505606723181785, + "grad_norm": 0.0022202276159077883, + "learning_rate": 4.966311994148539e-07, + "loss": 0.0, + "num_input_tokens_seen": 96708560, + "step": 143495 + }, + { + "epoch": 3.505728874013632, + "grad_norm": 0.04817364364862442, + "learning_rate": 4.965575152874833e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96711632, + "step": 143500 + }, + { + "epoch": 3.5058510248454793, + "grad_norm": 0.0006875473191030324, + "learning_rate": 4.96483834821282e-07, + "loss": 0.0307, + "num_input_tokens_seen": 96715280, + "step": 143505 + }, + { + "epoch": 3.5059731756773265, + "grad_norm": 0.0027317036874592304, + "learning_rate": 4.964101580167855e-07, + "loss": 0.0, + "num_input_tokens_seen": 96718352, + "step": 143510 + }, + { + "epoch": 3.5060953265091737, + "grad_norm": 0.15406769514083862, + "learning_rate": 4.963364848745301e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96721744, + "step": 143515 + }, + { + "epoch": 3.506217477341021, + "grad_norm": 0.001995170721784234, + "learning_rate": 4.962628153950508e-07, + "loss": 0.092, + "num_input_tokens_seen": 96724816, + "step": 143520 + }, + { + "epoch": 3.506339628172868, + "grad_norm": 0.08986975252628326, + "learning_rate": 4.961891495788838e-07, + "loss": 0.0, + "num_input_tokens_seen": 96728272, + "step": 143525 + }, + { + "epoch": 3.506461779004715, + "grad_norm": 0.00025660096434876323, + "learning_rate": 4.961154874265653e-07, + "loss": 0.0, + "num_input_tokens_seen": 96731600, + "step": 143530 + }, + { + "epoch": 3.5065839298365624, + "grad_norm": 0.027546176686882973, + "learning_rate": 4.9604182893863e-07, + "loss": 0.0, + "num_input_tokens_seen": 96735312, + "step": 143535 + }, + { + "epoch": 3.506706080668409, + "grad_norm": 0.0038960163947194815, + "learning_rate": 4.959681741156146e-07, + "loss": 0.0671, + "num_input_tokens_seen": 96738128, + "step": 143540 + }, + { + "epoch": 3.5068282315002564, + "grad_norm": 0.00791318528354168, + "learning_rate": 4.958945229580537e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96741520, + "step": 143545 + }, + { + "epoch": 3.5069503823321035, + "grad_norm": 0.014546308666467667, + "learning_rate": 4.958208754664834e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96744784, + "step": 143550 + }, + { + "epoch": 3.5070725331639507, + "grad_norm": 0.0013046764070168138, + "learning_rate": 4.957472316414398e-07, + "loss": 0.0341, + "num_input_tokens_seen": 96748560, + "step": 143555 + }, + { + "epoch": 3.507194683995798, + "grad_norm": 0.002422966528683901, + "learning_rate": 4.956735914834576e-07, + "loss": 0.0359, + "num_input_tokens_seen": 96752144, + "step": 143560 + }, + { + "epoch": 3.507316834827645, + "grad_norm": 0.002931036287918687, + "learning_rate": 4.95599954993073e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96755536, + "step": 143565 + }, + { + "epoch": 3.5074389856594923, + "grad_norm": 0.004579018801450729, + "learning_rate": 4.955263221708206e-07, + "loss": 0.0002, + "num_input_tokens_seen": 96758864, + "step": 143570 + }, + { + "epoch": 3.5075611364913395, + "grad_norm": 0.0005623329780064523, + "learning_rate": 4.954526930172371e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96762640, + "step": 143575 + }, + { + "epoch": 3.5076832873231867, + "grad_norm": 0.20815621316432953, + "learning_rate": 4.953790675328569e-07, + "loss": 0.0271, + "num_input_tokens_seen": 96766160, + "step": 143580 + }, + { + "epoch": 3.507805438155034, + "grad_norm": 0.0022279792465269566, + "learning_rate": 4.95305445718216e-07, + "loss": 0.0414, + "num_input_tokens_seen": 96769744, + "step": 143585 + }, + { + "epoch": 3.507927588986881, + "grad_norm": 0.01307701226323843, + "learning_rate": 4.952318275738499e-07, + "loss": 0.0, + "num_input_tokens_seen": 96773392, + "step": 143590 + }, + { + "epoch": 3.5080497398187283, + "grad_norm": 0.09603697061538696, + "learning_rate": 4.951582131002936e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96776592, + "step": 143595 + }, + { + "epoch": 3.5081718906505754, + "grad_norm": 0.0005847231950610876, + "learning_rate": 4.950846022980822e-07, + "loss": 0.0, + "num_input_tokens_seen": 96779856, + "step": 143600 + }, + { + "epoch": 3.5082940414824226, + "grad_norm": 0.001085648313164711, + "learning_rate": 4.950109951677519e-07, + "loss": 0.0011, + "num_input_tokens_seen": 96783056, + "step": 143605 + }, + { + "epoch": 3.50841619231427, + "grad_norm": 0.0025777278933674097, + "learning_rate": 4.94937391709837e-07, + "loss": 0.0, + "num_input_tokens_seen": 96787088, + "step": 143610 + }, + { + "epoch": 3.5085383431461166, + "grad_norm": 0.0006910903030075133, + "learning_rate": 4.948637919248736e-07, + "loss": 0.0, + "num_input_tokens_seen": 96790544, + "step": 143615 + }, + { + "epoch": 3.508660493977964, + "grad_norm": 0.0012758640805259347, + "learning_rate": 4.947901958133962e-07, + "loss": 0.0, + "num_input_tokens_seen": 96794384, + "step": 143620 + }, + { + "epoch": 3.508782644809811, + "grad_norm": 0.0014997717225924134, + "learning_rate": 4.947166033759408e-07, + "loss": 0.0, + "num_input_tokens_seen": 96797840, + "step": 143625 + }, + { + "epoch": 3.5089047956416586, + "grad_norm": 0.0014699621824547648, + "learning_rate": 4.946430146130419e-07, + "loss": 0.0, + "num_input_tokens_seen": 96801232, + "step": 143630 + }, + { + "epoch": 3.5090269464735053, + "grad_norm": 0.035319749265909195, + "learning_rate": 4.94569429525235e-07, + "loss": 0.0, + "num_input_tokens_seen": 96804688, + "step": 143635 + }, + { + "epoch": 3.5091490973053525, + "grad_norm": 0.001275359420105815, + "learning_rate": 4.94495848113055e-07, + "loss": 0.0, + "num_input_tokens_seen": 96808528, + "step": 143640 + }, + { + "epoch": 3.5092712481371997, + "grad_norm": 0.003758045844733715, + "learning_rate": 4.944222703770371e-07, + "loss": 0.0, + "num_input_tokens_seen": 96811792, + "step": 143645 + }, + { + "epoch": 3.509393398969047, + "grad_norm": 0.0023038359358906746, + "learning_rate": 4.943486963177168e-07, + "loss": 0.0, + "num_input_tokens_seen": 96814992, + "step": 143650 + }, + { + "epoch": 3.509515549800894, + "grad_norm": 0.00477098673582077, + "learning_rate": 4.942751259356285e-07, + "loss": 0.0, + "num_input_tokens_seen": 96818896, + "step": 143655 + }, + { + "epoch": 3.5096377006327413, + "grad_norm": 0.06560824066400528, + "learning_rate": 4.942015592313078e-07, + "loss": 0.0213, + "num_input_tokens_seen": 96821840, + "step": 143660 + }, + { + "epoch": 3.5097598514645885, + "grad_norm": 0.0005781487561762333, + "learning_rate": 4.94127996205289e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96825232, + "step": 143665 + }, + { + "epoch": 3.5098820022964357, + "grad_norm": 0.05620527267456055, + "learning_rate": 4.940544368581079e-07, + "loss": 0.0, + "num_input_tokens_seen": 96829136, + "step": 143670 + }, + { + "epoch": 3.510004153128283, + "grad_norm": 0.11132854223251343, + "learning_rate": 4.939808811902986e-07, + "loss": 0.0003, + "num_input_tokens_seen": 96832784, + "step": 143675 + }, + { + "epoch": 3.51012630396013, + "grad_norm": 0.003071943297982216, + "learning_rate": 4.939073292023965e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96835792, + "step": 143680 + }, + { + "epoch": 3.5102484547919772, + "grad_norm": 0.017345771193504333, + "learning_rate": 4.938337808949368e-07, + "loss": 0.0, + "num_input_tokens_seen": 96838672, + "step": 143685 + }, + { + "epoch": 3.5103706056238244, + "grad_norm": 3.049597978591919, + "learning_rate": 4.937602362684535e-07, + "loss": 0.0007, + "num_input_tokens_seen": 96841744, + "step": 143690 + }, + { + "epoch": 3.5104927564556716, + "grad_norm": 0.23833641409873962, + "learning_rate": 4.936866953234824e-07, + "loss": 0.0, + "num_input_tokens_seen": 96845264, + "step": 143695 + }, + { + "epoch": 3.510614907287519, + "grad_norm": 0.012993131764233112, + "learning_rate": 4.936131580605578e-07, + "loss": 0.0, + "num_input_tokens_seen": 96848336, + "step": 143700 + }, + { + "epoch": 3.510737058119366, + "grad_norm": 0.002314453711733222, + "learning_rate": 4.935396244802142e-07, + "loss": 0.0691, + "num_input_tokens_seen": 96851472, + "step": 143705 + }, + { + "epoch": 3.5108592089512127, + "grad_norm": 0.015659581869840622, + "learning_rate": 4.934660945829869e-07, + "loss": 0.0, + "num_input_tokens_seen": 96854992, + "step": 143710 + }, + { + "epoch": 3.5109813597830604, + "grad_norm": 0.0017577859107404947, + "learning_rate": 4.933925683694101e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96857872, + "step": 143715 + }, + { + "epoch": 3.511103510614907, + "grad_norm": 0.053707629442214966, + "learning_rate": 4.933190458400193e-07, + "loss": 0.0, + "num_input_tokens_seen": 96861008, + "step": 143720 + }, + { + "epoch": 3.5112256614467543, + "grad_norm": 0.00028163212118670344, + "learning_rate": 4.932455269953482e-07, + "loss": 0.0, + "num_input_tokens_seen": 96864208, + "step": 143725 + }, + { + "epoch": 3.5113478122786015, + "grad_norm": 1.548169493675232, + "learning_rate": 4.931720118359323e-07, + "loss": 0.0005, + "num_input_tokens_seen": 96867536, + "step": 143730 + }, + { + "epoch": 3.5114699631104487, + "grad_norm": 0.03325950354337692, + "learning_rate": 4.930985003623054e-07, + "loss": 0.0004, + "num_input_tokens_seen": 96871440, + "step": 143735 + }, + { + "epoch": 3.511592113942296, + "grad_norm": 4.091006278991699, + "learning_rate": 4.930249925750026e-07, + "loss": 0.0906, + "num_input_tokens_seen": 96874768, + "step": 143740 + }, + { + "epoch": 3.511714264774143, + "grad_norm": 0.054243385791778564, + "learning_rate": 4.929514884745588e-07, + "loss": 0.0, + "num_input_tokens_seen": 96878672, + "step": 143745 + }, + { + "epoch": 3.5118364156059902, + "grad_norm": 0.0010780526790767908, + "learning_rate": 4.928779880615078e-07, + "loss": 0.0, + "num_input_tokens_seen": 96881808, + "step": 143750 + }, + { + "epoch": 3.5119585664378374, + "grad_norm": 0.0017064374405890703, + "learning_rate": 4.928044913363849e-07, + "loss": 0.0421, + "num_input_tokens_seen": 96885264, + "step": 143755 + }, + { + "epoch": 3.5120807172696846, + "grad_norm": 0.002845099428668618, + "learning_rate": 4.927309982997237e-07, + "loss": 0.0, + "num_input_tokens_seen": 96888912, + "step": 143760 + }, + { + "epoch": 3.512202868101532, + "grad_norm": 0.0018212158465757966, + "learning_rate": 4.926575089520592e-07, + "loss": 0.0, + "num_input_tokens_seen": 96892304, + "step": 143765 + }, + { + "epoch": 3.512325018933379, + "grad_norm": 0.0034192639868706465, + "learning_rate": 4.925840232939261e-07, + "loss": 0.0975, + "num_input_tokens_seen": 96895632, + "step": 143770 + }, + { + "epoch": 3.512447169765226, + "grad_norm": 0.000690255023073405, + "learning_rate": 4.92510541325858e-07, + "loss": 0.0002, + "num_input_tokens_seen": 96899152, + "step": 143775 + }, + { + "epoch": 3.5125693205970734, + "grad_norm": 0.023969320580363274, + "learning_rate": 4.924370630483902e-07, + "loss": 0.0, + "num_input_tokens_seen": 96902736, + "step": 143780 + }, + { + "epoch": 3.5126914714289206, + "grad_norm": 0.4486314356327057, + "learning_rate": 4.923635884620561e-07, + "loss": 0.0002, + "num_input_tokens_seen": 96906000, + "step": 143785 + }, + { + "epoch": 3.5128136222607678, + "grad_norm": 0.0013574823969975114, + "learning_rate": 4.92290117567391e-07, + "loss": 0.0, + "num_input_tokens_seen": 96909584, + "step": 143790 + }, + { + "epoch": 3.5129357730926145, + "grad_norm": 0.3799872100353241, + "learning_rate": 4.922166503649284e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96912912, + "step": 143795 + }, + { + "epoch": 3.513057923924462, + "grad_norm": 0.0018316078931093216, + "learning_rate": 4.921431868552032e-07, + "loss": 0.0, + "num_input_tokens_seen": 96916176, + "step": 143800 + }, + { + "epoch": 3.513180074756309, + "grad_norm": 0.0247394610196352, + "learning_rate": 4.920697270387489e-07, + "loss": 0.0, + "num_input_tokens_seen": 96919760, + "step": 143805 + }, + { + "epoch": 3.5133022255881565, + "grad_norm": 0.003527220571413636, + "learning_rate": 4.919962709161008e-07, + "loss": 0.0, + "num_input_tokens_seen": 96923344, + "step": 143810 + }, + { + "epoch": 3.5134243764200033, + "grad_norm": 0.014180098660290241, + "learning_rate": 4.919228184877922e-07, + "loss": 0.0, + "num_input_tokens_seen": 96926352, + "step": 143815 + }, + { + "epoch": 3.5135465272518505, + "grad_norm": 0.0015301750972867012, + "learning_rate": 4.918493697543572e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96930000, + "step": 143820 + }, + { + "epoch": 3.5136686780836976, + "grad_norm": 0.059794213622808456, + "learning_rate": 4.917759247163307e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96933520, + "step": 143825 + }, + { + "epoch": 3.513790828915545, + "grad_norm": 0.0013804766349494457, + "learning_rate": 4.917024833742459e-07, + "loss": 0.0, + "num_input_tokens_seen": 96936528, + "step": 143830 + }, + { + "epoch": 3.513912979747392, + "grad_norm": 0.007687632460147142, + "learning_rate": 4.916290457286374e-07, + "loss": 0.0002, + "num_input_tokens_seen": 96939792, + "step": 143835 + }, + { + "epoch": 3.514035130579239, + "grad_norm": 0.0029032202437520027, + "learning_rate": 4.915556117800395e-07, + "loss": 0.0, + "num_input_tokens_seen": 96942928, + "step": 143840 + }, + { + "epoch": 3.5141572814110864, + "grad_norm": 0.007129203528165817, + "learning_rate": 4.914821815289858e-07, + "loss": 0.0, + "num_input_tokens_seen": 96946064, + "step": 143845 + }, + { + "epoch": 3.5142794322429336, + "grad_norm": 0.0005458451923914254, + "learning_rate": 4.914087549760106e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96949136, + "step": 143850 + }, + { + "epoch": 3.514401583074781, + "grad_norm": 0.0006653392338193953, + "learning_rate": 4.913353321216475e-07, + "loss": 0.0, + "num_input_tokens_seen": 96952464, + "step": 143855 + }, + { + "epoch": 3.514523733906628, + "grad_norm": 6.379980087280273, + "learning_rate": 4.912619129664306e-07, + "loss": 0.0008, + "num_input_tokens_seen": 96955792, + "step": 143860 + }, + { + "epoch": 3.514645884738475, + "grad_norm": 0.0027823809068650007, + "learning_rate": 4.911884975108943e-07, + "loss": 0.0001, + "num_input_tokens_seen": 96959696, + "step": 143865 + }, + { + "epoch": 3.5147680355703224, + "grad_norm": 0.0003252919123042375, + "learning_rate": 4.911150857555717e-07, + "loss": 0.0, + "num_input_tokens_seen": 96963280, + "step": 143870 + }, + { + "epoch": 3.5148901864021695, + "grad_norm": 0.004755501169711351, + "learning_rate": 4.910416777009975e-07, + "loss": 0.0, + "num_input_tokens_seen": 96966544, + "step": 143875 + }, + { + "epoch": 3.5150123372340163, + "grad_norm": 0.0003794467484112829, + "learning_rate": 4.909682733477047e-07, + "loss": 0.0613, + "num_input_tokens_seen": 96970064, + "step": 143880 + }, + { + "epoch": 3.515134488065864, + "grad_norm": 0.0023390930145978928, + "learning_rate": 4.90894872696228e-07, + "loss": 0.0046, + "num_input_tokens_seen": 96973456, + "step": 143885 + }, + { + "epoch": 3.5152566388977107, + "grad_norm": 0.021191345527768135, + "learning_rate": 4.908214757471002e-07, + "loss": 0.0, + "num_input_tokens_seen": 96977168, + "step": 143890 + }, + { + "epoch": 3.5153787897295583, + "grad_norm": 0.0015332361217588186, + "learning_rate": 4.907480825008556e-07, + "loss": 0.0, + "num_input_tokens_seen": 96980496, + "step": 143895 + }, + { + "epoch": 3.515500940561405, + "grad_norm": 0.00018658730550669134, + "learning_rate": 4.906746929580284e-07, + "loss": 0.0, + "num_input_tokens_seen": 96984208, + "step": 143900 + }, + { + "epoch": 3.5156230913932522, + "grad_norm": 0.0002967533946502954, + "learning_rate": 4.906013071191517e-07, + "loss": 0.0003, + "num_input_tokens_seen": 96987088, + "step": 143905 + }, + { + "epoch": 3.5157452422250994, + "grad_norm": 0.004914917983114719, + "learning_rate": 4.90527924984759e-07, + "loss": 0.0002, + "num_input_tokens_seen": 96990352, + "step": 143910 + }, + { + "epoch": 3.5158673930569466, + "grad_norm": 0.003742861095815897, + "learning_rate": 4.904545465553847e-07, + "loss": 0.0, + "num_input_tokens_seen": 96993552, + "step": 143915 + }, + { + "epoch": 3.515989543888794, + "grad_norm": 0.0010810464154928923, + "learning_rate": 4.903811718315615e-07, + "loss": 0.0, + "num_input_tokens_seen": 96996880, + "step": 143920 + }, + { + "epoch": 3.516111694720641, + "grad_norm": 0.1657690852880478, + "learning_rate": 4.903078008138239e-07, + "loss": 0.0001, + "num_input_tokens_seen": 97000272, + "step": 143925 + }, + { + "epoch": 3.516233845552488, + "grad_norm": 0.0005674972198903561, + "learning_rate": 4.902344335027047e-07, + "loss": 0.0003, + "num_input_tokens_seen": 97003792, + "step": 143930 + }, + { + "epoch": 3.5163559963843354, + "grad_norm": 0.0016297880792990327, + "learning_rate": 4.901610698987381e-07, + "loss": 0.0, + "num_input_tokens_seen": 97006672, + "step": 143935 + }, + { + "epoch": 3.5164781472161826, + "grad_norm": 0.013815726153552532, + "learning_rate": 4.900877100024571e-07, + "loss": 0.0, + "num_input_tokens_seen": 97009936, + "step": 143940 + }, + { + "epoch": 3.5166002980480298, + "grad_norm": 0.0040595149621367455, + "learning_rate": 4.900143538143958e-07, + "loss": 0.0, + "num_input_tokens_seen": 97013328, + "step": 143945 + }, + { + "epoch": 3.516722448879877, + "grad_norm": 0.00040780974086374044, + "learning_rate": 4.899410013350867e-07, + "loss": 0.0009, + "num_input_tokens_seen": 97016656, + "step": 143950 + }, + { + "epoch": 3.516844599711724, + "grad_norm": 0.00041566326399333775, + "learning_rate": 4.898676525650639e-07, + "loss": 0.0002, + "num_input_tokens_seen": 97019536, + "step": 143955 + }, + { + "epoch": 3.5169667505435713, + "grad_norm": 0.026442712172865868, + "learning_rate": 4.897943075048612e-07, + "loss": 0.0, + "num_input_tokens_seen": 97022736, + "step": 143960 + }, + { + "epoch": 3.5170889013754185, + "grad_norm": 0.12575189769268036, + "learning_rate": 4.897209661550111e-07, + "loss": 0.0, + "num_input_tokens_seen": 97026960, + "step": 143965 + }, + { + "epoch": 3.5172110522072657, + "grad_norm": 0.0001432756835129112, + "learning_rate": 4.896476285160479e-07, + "loss": 0.0, + "num_input_tokens_seen": 97030544, + "step": 143970 + }, + { + "epoch": 3.5173332030391125, + "grad_norm": 17.486061096191406, + "learning_rate": 4.895742945885038e-07, + "loss": 0.0002, + "num_input_tokens_seen": 97036240, + "step": 143975 + }, + { + "epoch": 3.51745535387096, + "grad_norm": 0.0007020349148660898, + "learning_rate": 4.895009643729133e-07, + "loss": 0.0, + "num_input_tokens_seen": 97039632, + "step": 143980 + }, + { + "epoch": 3.517577504702807, + "grad_norm": 0.001066066906787455, + "learning_rate": 4.894276378698087e-07, + "loss": 0.0, + "num_input_tokens_seen": 97043152, + "step": 143985 + }, + { + "epoch": 3.5176996555346545, + "grad_norm": 0.0017511459300294518, + "learning_rate": 4.893543150797236e-07, + "loss": 0.0, + "num_input_tokens_seen": 97046928, + "step": 143990 + }, + { + "epoch": 3.517821806366501, + "grad_norm": 0.004090828821063042, + "learning_rate": 4.892809960031916e-07, + "loss": 0.0, + "num_input_tokens_seen": 97050384, + "step": 143995 + }, + { + "epoch": 3.5179439571983484, + "grad_norm": 0.0008125362219288945, + "learning_rate": 4.892076806407451e-07, + "loss": 0.0001, + "num_input_tokens_seen": 97053648, + "step": 144000 + }, + { + "epoch": 3.5180661080301956, + "grad_norm": 0.001185380737297237, + "learning_rate": 4.891343689929182e-07, + "loss": 0.0, + "num_input_tokens_seen": 97056912, + "step": 144005 + }, + { + "epoch": 3.518188258862043, + "grad_norm": 4.846961975097656, + "learning_rate": 4.890610610602437e-07, + "loss": 0.0017, + "num_input_tokens_seen": 97060496, + "step": 144010 + }, + { + "epoch": 3.51831040969389, + "grad_norm": 0.00037370057543739676, + "learning_rate": 4.889877568432541e-07, + "loss": 0.0, + "num_input_tokens_seen": 97064272, + "step": 144015 + }, + { + "epoch": 3.518432560525737, + "grad_norm": 0.02818647213280201, + "learning_rate": 4.889144563424834e-07, + "loss": 0.0, + "num_input_tokens_seen": 97067408, + "step": 144020 + }, + { + "epoch": 3.5185547113575844, + "grad_norm": 0.010698727332055569, + "learning_rate": 4.888411595584639e-07, + "loss": 0.0, + "num_input_tokens_seen": 97070736, + "step": 144025 + }, + { + "epoch": 3.5186768621894315, + "grad_norm": 0.0008168493513949215, + "learning_rate": 4.887678664917292e-07, + "loss": 0.0, + "num_input_tokens_seen": 97074192, + "step": 144030 + }, + { + "epoch": 3.5187990130212787, + "grad_norm": 0.057550206780433655, + "learning_rate": 4.886945771428118e-07, + "loss": 0.0, + "num_input_tokens_seen": 97077840, + "step": 144035 + }, + { + "epoch": 3.518921163853126, + "grad_norm": 1.4165632724761963, + "learning_rate": 4.886212915122453e-07, + "loss": 0.0003, + "num_input_tokens_seen": 97080912, + "step": 144040 + }, + { + "epoch": 3.519043314684973, + "grad_norm": 0.0002670617832336575, + "learning_rate": 4.88548009600562e-07, + "loss": 0.0001, + "num_input_tokens_seen": 97084240, + "step": 144045 + }, + { + "epoch": 3.5191654655168203, + "grad_norm": 0.010491433553397655, + "learning_rate": 4.884747314082951e-07, + "loss": 0.0, + "num_input_tokens_seen": 97087696, + "step": 144050 + }, + { + "epoch": 3.5192876163486675, + "grad_norm": 0.0011365021346136928, + "learning_rate": 4.884014569359779e-07, + "loss": 0.0, + "num_input_tokens_seen": 97091280, + "step": 144055 + }, + { + "epoch": 3.5194097671805142, + "grad_norm": 0.009114922024309635, + "learning_rate": 4.883281861841425e-07, + "loss": 0.0, + "num_input_tokens_seen": 97094544, + "step": 144060 + }, + { + "epoch": 3.519531918012362, + "grad_norm": 0.004652893636375666, + "learning_rate": 4.882549191533226e-07, + "loss": 0.0, + "num_input_tokens_seen": 97097808, + "step": 144065 + }, + { + "epoch": 3.5196540688442086, + "grad_norm": 0.0016463432693853974, + "learning_rate": 4.881816558440501e-07, + "loss": 0.0, + "num_input_tokens_seen": 97101072, + "step": 144070 + }, + { + "epoch": 3.5197762196760563, + "grad_norm": 0.0022587061394006014, + "learning_rate": 4.881083962568587e-07, + "loss": 0.0, + "num_input_tokens_seen": 97104144, + "step": 144075 + }, + { + "epoch": 3.519898370507903, + "grad_norm": 0.0006902991444803774, + "learning_rate": 4.880351403922804e-07, + "loss": 0.0224, + "num_input_tokens_seen": 97107280, + "step": 144080 + }, + { + "epoch": 3.52002052133975, + "grad_norm": 0.0041884733363986015, + "learning_rate": 4.879618882508481e-07, + "loss": 0.0, + "num_input_tokens_seen": 97110480, + "step": 144085 + }, + { + "epoch": 3.5201426721715974, + "grad_norm": 75.95166778564453, + "learning_rate": 4.878886398330952e-07, + "loss": 0.0922, + "num_input_tokens_seen": 97114000, + "step": 144090 + }, + { + "epoch": 3.5202648230034446, + "grad_norm": 0.332274466753006, + "learning_rate": 4.878153951395535e-07, + "loss": 0.0005, + "num_input_tokens_seen": 97117840, + "step": 144095 + }, + { + "epoch": 3.5203869738352918, + "grad_norm": 0.0002197538415202871, + "learning_rate": 4.877421541707563e-07, + "loss": 0.0, + "num_input_tokens_seen": 97120912, + "step": 144100 + }, + { + "epoch": 3.520509124667139, + "grad_norm": 0.0009739008964970708, + "learning_rate": 4.876689169272355e-07, + "loss": 0.0, + "num_input_tokens_seen": 97124048, + "step": 144105 + }, + { + "epoch": 3.520631275498986, + "grad_norm": 0.002452496439218521, + "learning_rate": 4.875956834095247e-07, + "loss": 0.0, + "num_input_tokens_seen": 97127760, + "step": 144110 + }, + { + "epoch": 3.5207534263308333, + "grad_norm": 0.01981990784406662, + "learning_rate": 4.875224536181553e-07, + "loss": 0.0, + "num_input_tokens_seen": 97131536, + "step": 144115 + }, + { + "epoch": 3.5208755771626805, + "grad_norm": 0.0002594464167486876, + "learning_rate": 4.87449227553661e-07, + "loss": 0.0, + "num_input_tokens_seen": 97134864, + "step": 144120 + }, + { + "epoch": 3.5209977279945277, + "grad_norm": 0.0004595739010255784, + "learning_rate": 4.873760052165737e-07, + "loss": 0.0, + "num_input_tokens_seen": 97138512, + "step": 144125 + }, + { + "epoch": 3.521119878826375, + "grad_norm": 0.012149565853178501, + "learning_rate": 4.873027866074258e-07, + "loss": 0.0003, + "num_input_tokens_seen": 97142288, + "step": 144130 + }, + { + "epoch": 3.521242029658222, + "grad_norm": 0.007893732748925686, + "learning_rate": 4.8722957172675e-07, + "loss": 0.0, + "num_input_tokens_seen": 97145616, + "step": 144135 + }, + { + "epoch": 3.5213641804900693, + "grad_norm": 0.0037115265149623156, + "learning_rate": 4.871563605750785e-07, + "loss": 0.0, + "num_input_tokens_seen": 97149136, + "step": 144140 + }, + { + "epoch": 3.5214863313219165, + "grad_norm": 0.0001909531856654212, + "learning_rate": 4.870831531529438e-07, + "loss": 0.0001, + "num_input_tokens_seen": 97152656, + "step": 144145 + }, + { + "epoch": 3.5216084821537637, + "grad_norm": 0.013603954575955868, + "learning_rate": 4.870099494608788e-07, + "loss": 0.0, + "num_input_tokens_seen": 97156112, + "step": 144150 + }, + { + "epoch": 3.5217306329856104, + "grad_norm": 0.0094887875020504, + "learning_rate": 4.86936749499415e-07, + "loss": 0.0, + "num_input_tokens_seen": 97159632, + "step": 144155 + }, + { + "epoch": 3.521852783817458, + "grad_norm": 0.0001825742656365037, + "learning_rate": 4.868635532690856e-07, + "loss": 0.0, + "num_input_tokens_seen": 97163408, + "step": 144160 + }, + { + "epoch": 3.521974934649305, + "grad_norm": 0.0007969773141667247, + "learning_rate": 4.867903607704219e-07, + "loss": 0.0, + "num_input_tokens_seen": 97166864, + "step": 144165 + }, + { + "epoch": 3.522097085481152, + "grad_norm": 0.00028032498084940016, + "learning_rate": 4.867171720039569e-07, + "loss": 0.0, + "num_input_tokens_seen": 97170064, + "step": 144170 + }, + { + "epoch": 3.522219236312999, + "grad_norm": 0.0015606768429279327, + "learning_rate": 4.86643986970223e-07, + "loss": 0.0, + "num_input_tokens_seen": 97173712, + "step": 144175 + }, + { + "epoch": 3.5223413871448463, + "grad_norm": 0.0021179928444325924, + "learning_rate": 4.865708056697517e-07, + "loss": 0.0, + "num_input_tokens_seen": 97177424, + "step": 144180 + }, + { + "epoch": 3.5224635379766935, + "grad_norm": 0.0003003637830261141, + "learning_rate": 4.864976281030761e-07, + "loss": 0.0032, + "num_input_tokens_seen": 97180560, + "step": 144185 + }, + { + "epoch": 3.5225856888085407, + "grad_norm": 0.001165257883258164, + "learning_rate": 4.864244542707274e-07, + "loss": 0.0, + "num_input_tokens_seen": 97183504, + "step": 144190 + }, + { + "epoch": 3.522707839640388, + "grad_norm": 0.00015066112973727286, + "learning_rate": 4.863512841732386e-07, + "loss": 0.0, + "num_input_tokens_seen": 97186640, + "step": 144195 + }, + { + "epoch": 3.522829990472235, + "grad_norm": 0.001419951906427741, + "learning_rate": 4.86278117811141e-07, + "loss": 0.0924, + "num_input_tokens_seen": 97189968, + "step": 144200 + }, + { + "epoch": 3.5229521413040823, + "grad_norm": 7.131059828680009e-05, + "learning_rate": 4.862049551849671e-07, + "loss": 0.1, + "num_input_tokens_seen": 97193104, + "step": 144205 + }, + { + "epoch": 3.5230742921359295, + "grad_norm": 0.00625317357480526, + "learning_rate": 4.861317962952494e-07, + "loss": 0.0, + "num_input_tokens_seen": 97196752, + "step": 144210 + }, + { + "epoch": 3.5231964429677767, + "grad_norm": 0.00016985490219667554, + "learning_rate": 4.860586411425195e-07, + "loss": 0.0, + "num_input_tokens_seen": 97200208, + "step": 144215 + }, + { + "epoch": 3.523318593799624, + "grad_norm": 0.0004274469683878124, + "learning_rate": 4.859854897273089e-07, + "loss": 0.0, + "num_input_tokens_seen": 97203472, + "step": 144220 + }, + { + "epoch": 3.523440744631471, + "grad_norm": 0.00014868633297737688, + "learning_rate": 4.859123420501506e-07, + "loss": 0.0308, + "num_input_tokens_seen": 97207248, + "step": 144225 + }, + { + "epoch": 3.5235628954633182, + "grad_norm": 0.000561856955755502, + "learning_rate": 4.858391981115759e-07, + "loss": 0.0, + "num_input_tokens_seen": 97211152, + "step": 144230 + }, + { + "epoch": 3.5236850462951654, + "grad_norm": 0.511713445186615, + "learning_rate": 4.857660579121164e-07, + "loss": 0.0003, + "num_input_tokens_seen": 97214544, + "step": 144235 + }, + { + "epoch": 3.523807197127012, + "grad_norm": 0.0024884133599698544, + "learning_rate": 4.856929214523047e-07, + "loss": 0.0, + "num_input_tokens_seen": 97218256, + "step": 144240 + }, + { + "epoch": 3.52392934795886, + "grad_norm": 0.0004282156878616661, + "learning_rate": 4.856197887326726e-07, + "loss": 0.0671, + "num_input_tokens_seen": 97221456, + "step": 144245 + }, + { + "epoch": 3.5240514987907066, + "grad_norm": 0.00036971422377973795, + "learning_rate": 4.855466597537514e-07, + "loss": 0.0, + "num_input_tokens_seen": 97224656, + "step": 144250 + }, + { + "epoch": 3.524173649622554, + "grad_norm": 0.003901298623532057, + "learning_rate": 4.854735345160736e-07, + "loss": 0.0, + "num_input_tokens_seen": 97227984, + "step": 144255 + }, + { + "epoch": 3.524295800454401, + "grad_norm": 0.0001063659947249107, + "learning_rate": 4.854004130201704e-07, + "loss": 0.0965, + "num_input_tokens_seen": 97230992, + "step": 144260 + }, + { + "epoch": 3.524417951286248, + "grad_norm": 0.0028174621984362602, + "learning_rate": 4.853272952665737e-07, + "loss": 0.0, + "num_input_tokens_seen": 97234064, + "step": 144265 + }, + { + "epoch": 3.5245401021180953, + "grad_norm": 0.054041311144828796, + "learning_rate": 4.852541812558158e-07, + "loss": 0.0, + "num_input_tokens_seen": 97237328, + "step": 144270 + }, + { + "epoch": 3.5246622529499425, + "grad_norm": 0.0009716590284369886, + "learning_rate": 4.851810709884274e-07, + "loss": 0.0739, + "num_input_tokens_seen": 97240592, + "step": 144275 + }, + { + "epoch": 3.5247844037817897, + "grad_norm": 0.005417170003056526, + "learning_rate": 4.851079644649412e-07, + "loss": 0.0003, + "num_input_tokens_seen": 97243856, + "step": 144280 + }, + { + "epoch": 3.524906554613637, + "grad_norm": 0.0002540026616770774, + "learning_rate": 4.850348616858881e-07, + "loss": 0.0, + "num_input_tokens_seen": 97247312, + "step": 144285 + }, + { + "epoch": 3.525028705445484, + "grad_norm": 0.010472937487065792, + "learning_rate": 4.849617626518002e-07, + "loss": 0.0653, + "num_input_tokens_seen": 97251216, + "step": 144290 + }, + { + "epoch": 3.5251508562773313, + "grad_norm": 0.0028327424079179764, + "learning_rate": 4.848886673632086e-07, + "loss": 0.0, + "num_input_tokens_seen": 97254416, + "step": 144295 + }, + { + "epoch": 3.5252730071091785, + "grad_norm": 0.010983242653310299, + "learning_rate": 4.848155758206452e-07, + "loss": 0.0, + "num_input_tokens_seen": 97257872, + "step": 144300 + }, + { + "epoch": 3.5253951579410256, + "grad_norm": 0.013581220991909504, + "learning_rate": 4.847424880246417e-07, + "loss": 0.0738, + "num_input_tokens_seen": 97261072, + "step": 144305 + }, + { + "epoch": 3.525517308772873, + "grad_norm": 0.0014813852030783892, + "learning_rate": 4.846694039757292e-07, + "loss": 0.0001, + "num_input_tokens_seen": 97264464, + "step": 144310 + }, + { + "epoch": 3.52563945960472, + "grad_norm": 0.0008495126385241747, + "learning_rate": 4.845963236744397e-07, + "loss": 0.0001, + "num_input_tokens_seen": 97267728, + "step": 144315 + }, + { + "epoch": 3.525761610436567, + "grad_norm": 0.0009643576922826469, + "learning_rate": 4.845232471213045e-07, + "loss": 0.0, + "num_input_tokens_seen": 97270800, + "step": 144320 + }, + { + "epoch": 3.525883761268414, + "grad_norm": 0.24121108651161194, + "learning_rate": 4.844501743168543e-07, + "loss": 0.0001, + "num_input_tokens_seen": 97274768, + "step": 144325 + }, + { + "epoch": 3.5260059121002616, + "grad_norm": 0.00016662731650285423, + "learning_rate": 4.843771052616216e-07, + "loss": 0.0625, + "num_input_tokens_seen": 97278608, + "step": 144330 + }, + { + "epoch": 3.5261280629321083, + "grad_norm": 0.01039132196456194, + "learning_rate": 4.843040399561369e-07, + "loss": 0.0, + "num_input_tokens_seen": 97282960, + "step": 144335 + }, + { + "epoch": 3.526250213763956, + "grad_norm": 23.242433547973633, + "learning_rate": 4.842309784009323e-07, + "loss": 0.0389, + "num_input_tokens_seen": 97285840, + "step": 144340 + }, + { + "epoch": 3.5263723645958027, + "grad_norm": 0.00015703374810982496, + "learning_rate": 4.841579205965384e-07, + "loss": 0.0, + "num_input_tokens_seen": 97289168, + "step": 144345 + }, + { + "epoch": 3.52649451542765, + "grad_norm": 0.035092271864414215, + "learning_rate": 4.840848665434872e-07, + "loss": 0.0, + "num_input_tokens_seen": 97292240, + "step": 144350 + }, + { + "epoch": 3.526616666259497, + "grad_norm": 0.004903553519397974, + "learning_rate": 4.840118162423092e-07, + "loss": 0.0, + "num_input_tokens_seen": 97295632, + "step": 144355 + }, + { + "epoch": 3.5267388170913443, + "grad_norm": 0.0005345535464584827, + "learning_rate": 4.839387696935361e-07, + "loss": 0.0, + "num_input_tokens_seen": 97298832, + "step": 144360 + }, + { + "epoch": 3.5268609679231915, + "grad_norm": 0.023823775351047516, + "learning_rate": 4.838657268976994e-07, + "loss": 0.0001, + "num_input_tokens_seen": 97301968, + "step": 144365 + }, + { + "epoch": 3.5269831187550387, + "grad_norm": 0.00848571676760912, + "learning_rate": 4.837926878553296e-07, + "loss": 0.0, + "num_input_tokens_seen": 97305040, + "step": 144370 + }, + { + "epoch": 3.527105269586886, + "grad_norm": 0.00041069305734708905, + "learning_rate": 4.837196525669586e-07, + "loss": 0.0, + "num_input_tokens_seen": 97309008, + "step": 144375 + }, + { + "epoch": 3.527227420418733, + "grad_norm": 0.024357300251722336, + "learning_rate": 4.836466210331168e-07, + "loss": 0.0, + "num_input_tokens_seen": 97312656, + "step": 144380 + }, + { + "epoch": 3.5273495712505802, + "grad_norm": 36.91389465332031, + "learning_rate": 4.83573593254336e-07, + "loss": 0.0573, + "num_input_tokens_seen": 97315792, + "step": 144385 + }, + { + "epoch": 3.5274717220824274, + "grad_norm": 0.003210177179425955, + "learning_rate": 4.835005692311466e-07, + "loss": 0.0, + "num_input_tokens_seen": 97319952, + "step": 144390 + }, + { + "epoch": 3.5275938729142746, + "grad_norm": 6.834761734353378e-05, + "learning_rate": 4.834275489640799e-07, + "loss": 0.0852, + "num_input_tokens_seen": 97323856, + "step": 144395 + }, + { + "epoch": 3.527716023746122, + "grad_norm": 0.001592350541613996, + "learning_rate": 4.833545324536674e-07, + "loss": 0.0, + "num_input_tokens_seen": 97327696, + "step": 144400 + }, + { + "epoch": 3.527838174577969, + "grad_norm": 0.0010718260891735554, + "learning_rate": 4.832815197004394e-07, + "loss": 0.0004, + "num_input_tokens_seen": 97330640, + "step": 144405 + }, + { + "epoch": 3.527960325409816, + "grad_norm": 0.09385642409324646, + "learning_rate": 4.832085107049275e-07, + "loss": 0.0, + "num_input_tokens_seen": 97333840, + "step": 144410 + }, + { + "epoch": 3.5280824762416634, + "grad_norm": 0.003379811067134142, + "learning_rate": 4.83135505467662e-07, + "loss": 0.0, + "num_input_tokens_seen": 97337168, + "step": 144415 + }, + { + "epoch": 3.52820462707351, + "grad_norm": 0.08003751188516617, + "learning_rate": 4.830625039891744e-07, + "loss": 0.0, + "num_input_tokens_seen": 97340560, + "step": 144420 + }, + { + "epoch": 3.5283267779053578, + "grad_norm": 0.0010598560329526663, + "learning_rate": 4.82989506269995e-07, + "loss": 0.0, + "num_input_tokens_seen": 97344016, + "step": 144425 + }, + { + "epoch": 3.5284489287372045, + "grad_norm": 0.000324626627843827, + "learning_rate": 4.829165123106552e-07, + "loss": 0.0348, + "num_input_tokens_seen": 97347280, + "step": 144430 + }, + { + "epoch": 3.528571079569052, + "grad_norm": 0.009058714844286442, + "learning_rate": 4.828435221116858e-07, + "loss": 0.0001, + "num_input_tokens_seen": 97350160, + "step": 144435 + }, + { + "epoch": 3.528693230400899, + "grad_norm": 0.03952280059456825, + "learning_rate": 4.827705356736169e-07, + "loss": 0.0, + "num_input_tokens_seen": 97353296, + "step": 144440 + }, + { + "epoch": 3.528815381232746, + "grad_norm": 0.005792593117803335, + "learning_rate": 4.826975529969802e-07, + "loss": 0.0, + "num_input_tokens_seen": 97356432, + "step": 144445 + }, + { + "epoch": 3.5289375320645933, + "grad_norm": 0.0001956968626473099, + "learning_rate": 4.826245740823056e-07, + "loss": 0.0001, + "num_input_tokens_seen": 97359824, + "step": 144450 + }, + { + "epoch": 3.5290596828964405, + "grad_norm": 0.00083877460565418, + "learning_rate": 4.825515989301244e-07, + "loss": 0.0, + "num_input_tokens_seen": 97363536, + "step": 144455 + }, + { + "epoch": 3.5291818337282876, + "grad_norm": 0.00042945987661369145, + "learning_rate": 4.824786275409675e-07, + "loss": 0.0, + "num_input_tokens_seen": 97366800, + "step": 144460 + }, + { + "epoch": 3.529303984560135, + "grad_norm": 0.013885509222745895, + "learning_rate": 4.824056599153646e-07, + "loss": 0.0, + "num_input_tokens_seen": 97370256, + "step": 144465 + }, + { + "epoch": 3.529426135391982, + "grad_norm": 0.9156922101974487, + "learning_rate": 4.823326960538476e-07, + "loss": 0.0, + "num_input_tokens_seen": 97373392, + "step": 144470 + }, + { + "epoch": 3.529548286223829, + "grad_norm": 0.004656149540096521, + "learning_rate": 4.82259735956946e-07, + "loss": 0.0, + "num_input_tokens_seen": 97377104, + "step": 144475 + }, + { + "epoch": 3.5296704370556764, + "grad_norm": 0.0002942743303719908, + "learning_rate": 4.821867796251908e-07, + "loss": 0.0, + "num_input_tokens_seen": 97380368, + "step": 144480 + }, + { + "epoch": 3.5297925878875236, + "grad_norm": 0.004940942395478487, + "learning_rate": 4.82113827059113e-07, + "loss": 0.0, + "num_input_tokens_seen": 97383568, + "step": 144485 + }, + { + "epoch": 3.529914738719371, + "grad_norm": 0.0011132443323731422, + "learning_rate": 4.820408782592425e-07, + "loss": 0.0002, + "num_input_tokens_seen": 97387024, + "step": 144490 + }, + { + "epoch": 3.530036889551218, + "grad_norm": 0.001001048949547112, + "learning_rate": 4.819679332261104e-07, + "loss": 0.0, + "num_input_tokens_seen": 97390224, + "step": 144495 + }, + { + "epoch": 3.530159040383065, + "grad_norm": 0.00019185699056833982, + "learning_rate": 4.818949919602465e-07, + "loss": 0.0, + "num_input_tokens_seen": 97393296, + "step": 144500 + }, + { + "epoch": 3.530281191214912, + "grad_norm": 0.0046676155179739, + "learning_rate": 4.818220544621817e-07, + "loss": 0.0, + "num_input_tokens_seen": 97396496, + "step": 144505 + }, + { + "epoch": 3.5304033420467595, + "grad_norm": 0.26163390278816223, + "learning_rate": 4.817491207324461e-07, + "loss": 0.0001, + "num_input_tokens_seen": 97399760, + "step": 144510 + }, + { + "epoch": 3.5305254928786063, + "grad_norm": 7.951348379720002e-05, + "learning_rate": 4.816761907715702e-07, + "loss": 0.0, + "num_input_tokens_seen": 97403152, + "step": 144515 + }, + { + "epoch": 3.530647643710454, + "grad_norm": 0.0023116308730095625, + "learning_rate": 4.81603264580085e-07, + "loss": 0.0, + "num_input_tokens_seen": 97406736, + "step": 144520 + }, + { + "epoch": 3.5307697945423007, + "grad_norm": 0.0011757491156458855, + "learning_rate": 4.8153034215852e-07, + "loss": 0.0001, + "num_input_tokens_seen": 97409744, + "step": 144525 + }, + { + "epoch": 3.530891945374148, + "grad_norm": 0.0014412114396691322, + "learning_rate": 4.814574235074056e-07, + "loss": 0.0, + "num_input_tokens_seen": 97413328, + "step": 144530 + }, + { + "epoch": 3.531014096205995, + "grad_norm": 0.0006095138960517943, + "learning_rate": 4.813845086272727e-07, + "loss": 0.0003, + "num_input_tokens_seen": 97416720, + "step": 144535 + }, + { + "epoch": 3.5311362470378422, + "grad_norm": 0.0010900633642449975, + "learning_rate": 4.813115975186512e-07, + "loss": 0.0, + "num_input_tokens_seen": 97420176, + "step": 144540 + }, + { + "epoch": 3.5312583978696894, + "grad_norm": 0.0006364626460708678, + "learning_rate": 4.812386901820708e-07, + "loss": 0.0, + "num_input_tokens_seen": 97423312, + "step": 144545 + }, + { + "epoch": 3.5313805487015366, + "grad_norm": 0.004302667919546366, + "learning_rate": 4.811657866180621e-07, + "loss": 0.0, + "num_input_tokens_seen": 97426960, + "step": 144550 + }, + { + "epoch": 3.531502699533384, + "grad_norm": 0.0002200361923314631, + "learning_rate": 4.810928868271558e-07, + "loss": 0.0, + "num_input_tokens_seen": 97429968, + "step": 144555 + }, + { + "epoch": 3.531624850365231, + "grad_norm": 0.00010742614540504292, + "learning_rate": 4.810199908098813e-07, + "loss": 0.0, + "num_input_tokens_seen": 97433616, + "step": 144560 + }, + { + "epoch": 3.531747001197078, + "grad_norm": 0.0015959254233166575, + "learning_rate": 4.809470985667692e-07, + "loss": 0.0001, + "num_input_tokens_seen": 97436624, + "step": 144565 + }, + { + "epoch": 3.5318691520289254, + "grad_norm": 0.00047675054520368576, + "learning_rate": 4.808742100983492e-07, + "loss": 0.0, + "num_input_tokens_seen": 97440144, + "step": 144570 + }, + { + "epoch": 3.5319913028607726, + "grad_norm": 0.002196983899921179, + "learning_rate": 4.808013254051514e-07, + "loss": 0.0947, + "num_input_tokens_seen": 97443856, + "step": 144575 + }, + { + "epoch": 3.5321134536926198, + "grad_norm": 0.0001492752635385841, + "learning_rate": 4.807284444877066e-07, + "loss": 0.0, + "num_input_tokens_seen": 97447184, + "step": 144580 + }, + { + "epoch": 3.532235604524467, + "grad_norm": 0.0891515240073204, + "learning_rate": 4.806555673465437e-07, + "loss": 0.0001, + "num_input_tokens_seen": 97450448, + "step": 144585 + }, + { + "epoch": 3.532357755356314, + "grad_norm": 0.002806537551805377, + "learning_rate": 4.805826939821937e-07, + "loss": 0.0, + "num_input_tokens_seen": 97453520, + "step": 144590 + }, + { + "epoch": 3.5324799061881613, + "grad_norm": 0.0002000067033804953, + "learning_rate": 4.805098243951855e-07, + "loss": 0.0, + "num_input_tokens_seen": 97456656, + "step": 144595 + }, + { + "epoch": 3.532602057020008, + "grad_norm": 0.0005195115809328854, + "learning_rate": 4.8043695858605e-07, + "loss": 0.0, + "num_input_tokens_seen": 97459728, + "step": 144600 + }, + { + "epoch": 3.5327242078518557, + "grad_norm": 0.025334253907203674, + "learning_rate": 4.803640965553164e-07, + "loss": 0.0002, + "num_input_tokens_seen": 97462736, + "step": 144605 + }, + { + "epoch": 3.5328463586837024, + "grad_norm": 0.00040619299397803843, + "learning_rate": 4.802912383035148e-07, + "loss": 0.0003, + "num_input_tokens_seen": 97465808, + "step": 144610 + }, + { + "epoch": 3.5329685095155496, + "grad_norm": 0.00015731289749965072, + "learning_rate": 4.802183838311755e-07, + "loss": 0.0, + "num_input_tokens_seen": 97469456, + "step": 144615 + }, + { + "epoch": 3.533090660347397, + "grad_norm": 0.0008111689821816981, + "learning_rate": 4.801455331388275e-07, + "loss": 0.0, + "num_input_tokens_seen": 97472720, + "step": 144620 + }, + { + "epoch": 3.533212811179244, + "grad_norm": 0.013854150660336018, + "learning_rate": 4.800726862270014e-07, + "loss": 0.0, + "num_input_tokens_seen": 97475920, + "step": 144625 + }, + { + "epoch": 3.533334962011091, + "grad_norm": 0.0011478314409032464, + "learning_rate": 4.799998430962267e-07, + "loss": 0.0316, + "num_input_tokens_seen": 97479248, + "step": 144630 + }, + { + "epoch": 3.5334571128429384, + "grad_norm": 0.0010483600199222565, + "learning_rate": 4.799270037470324e-07, + "loss": 0.0, + "num_input_tokens_seen": 97482768, + "step": 144635 + }, + { + "epoch": 3.5335792636747856, + "grad_norm": 0.013378694653511047, + "learning_rate": 4.798541681799494e-07, + "loss": 0.0, + "num_input_tokens_seen": 97485968, + "step": 144640 + }, + { + "epoch": 3.5337014145066328, + "grad_norm": 0.006969807669520378, + "learning_rate": 4.797813363955064e-07, + "loss": 0.0, + "num_input_tokens_seen": 97489296, + "step": 144645 + }, + { + "epoch": 3.53382356533848, + "grad_norm": 0.00013974278408568352, + "learning_rate": 4.797085083942336e-07, + "loss": 0.0, + "num_input_tokens_seen": 97492816, + "step": 144650 + }, + { + "epoch": 3.533945716170327, + "grad_norm": 0.24057985842227936, + "learning_rate": 4.796356841766602e-07, + "loss": 0.0001, + "num_input_tokens_seen": 97496272, + "step": 144655 + }, + { + "epoch": 3.5340678670021743, + "grad_norm": 0.005007751751691103, + "learning_rate": 4.795628637433165e-07, + "loss": 0.0, + "num_input_tokens_seen": 97499792, + "step": 144660 + }, + { + "epoch": 3.5341900178340215, + "grad_norm": 0.002173068467527628, + "learning_rate": 4.794900470947312e-07, + "loss": 0.0, + "num_input_tokens_seen": 97503504, + "step": 144665 + }, + { + "epoch": 3.5343121686658687, + "grad_norm": 0.00048343787784688175, + "learning_rate": 4.794172342314345e-07, + "loss": 0.0, + "num_input_tokens_seen": 97506768, + "step": 144670 + }, + { + "epoch": 3.534434319497716, + "grad_norm": 35.95384979248047, + "learning_rate": 4.793444251539558e-07, + "loss": 0.1554, + "num_input_tokens_seen": 97509968, + "step": 144675 + }, + { + "epoch": 3.534556470329563, + "grad_norm": 0.0020706241484731436, + "learning_rate": 4.792716198628242e-07, + "loss": 0.0002, + "num_input_tokens_seen": 97513616, + "step": 144680 + }, + { + "epoch": 3.53467862116141, + "grad_norm": 0.0008316741441376507, + "learning_rate": 4.791988183585697e-07, + "loss": 0.0001, + "num_input_tokens_seen": 97517392, + "step": 144685 + }, + { + "epoch": 3.5348007719932575, + "grad_norm": 0.0002605855988804251, + "learning_rate": 4.791260206417212e-07, + "loss": 0.0001, + "num_input_tokens_seen": 97520912, + "step": 144690 + }, + { + "epoch": 3.5349229228251042, + "grad_norm": 0.046921804547309875, + "learning_rate": 4.790532267128088e-07, + "loss": 0.0, + "num_input_tokens_seen": 97524304, + "step": 144695 + }, + { + "epoch": 3.535045073656952, + "grad_norm": 0.00059171934844926, + "learning_rate": 4.78980436572361e-07, + "loss": 0.0, + "num_input_tokens_seen": 97528592, + "step": 144700 + }, + { + "epoch": 3.5351672244887986, + "grad_norm": 4.206080120638944e-05, + "learning_rate": 4.789076502209077e-07, + "loss": 0.0, + "num_input_tokens_seen": 97532624, + "step": 144705 + }, + { + "epoch": 3.535289375320646, + "grad_norm": 0.004249978344887495, + "learning_rate": 4.788348676589784e-07, + "loss": 0.0, + "num_input_tokens_seen": 97535760, + "step": 144710 + }, + { + "epoch": 3.535411526152493, + "grad_norm": 0.0190388523042202, + "learning_rate": 4.787620888871018e-07, + "loss": 0.0815, + "num_input_tokens_seen": 97539216, + "step": 144715 + }, + { + "epoch": 3.53553367698434, + "grad_norm": 0.0013153469190001488, + "learning_rate": 4.786893139058078e-07, + "loss": 0.0, + "num_input_tokens_seen": 97542736, + "step": 144720 + }, + { + "epoch": 3.5356558278161874, + "grad_norm": 0.0047118207439780235, + "learning_rate": 4.78616542715625e-07, + "loss": 0.0, + "num_input_tokens_seen": 97545936, + "step": 144725 + }, + { + "epoch": 3.5357779786480346, + "grad_norm": 0.0007281777216121554, + "learning_rate": 4.785437753170832e-07, + "loss": 0.0, + "num_input_tokens_seen": 97549136, + "step": 144730 + }, + { + "epoch": 3.5359001294798817, + "grad_norm": 0.20080366730690002, + "learning_rate": 4.784710117107112e-07, + "loss": 0.0, + "num_input_tokens_seen": 97552464, + "step": 144735 + }, + { + "epoch": 3.536022280311729, + "grad_norm": 0.0007743925671093166, + "learning_rate": 4.783982518970384e-07, + "loss": 0.0, + "num_input_tokens_seen": 97556112, + "step": 144740 + }, + { + "epoch": 3.536144431143576, + "grad_norm": 0.16999106109142303, + "learning_rate": 4.783254958765939e-07, + "loss": 0.0, + "num_input_tokens_seen": 97559440, + "step": 144745 + }, + { + "epoch": 3.5362665819754233, + "grad_norm": 0.0008060486288741231, + "learning_rate": 4.782527436499063e-07, + "loss": 0.0, + "num_input_tokens_seen": 97562576, + "step": 144750 + }, + { + "epoch": 3.5363887328072705, + "grad_norm": 0.02622039243578911, + "learning_rate": 4.781799952175056e-07, + "loss": 0.0, + "num_input_tokens_seen": 97566160, + "step": 144755 + }, + { + "epoch": 3.5365108836391177, + "grad_norm": 8.060546679189429e-05, + "learning_rate": 4.781072505799197e-07, + "loss": 0.0, + "num_input_tokens_seen": 97570064, + "step": 144760 + }, + { + "epoch": 3.536633034470965, + "grad_norm": 0.007018039468675852, + "learning_rate": 4.780345097376784e-07, + "loss": 0.0003, + "num_input_tokens_seen": 97573200, + "step": 144765 + }, + { + "epoch": 3.536755185302812, + "grad_norm": 0.008267217315733433, + "learning_rate": 4.779617726913109e-07, + "loss": 0.0829, + "num_input_tokens_seen": 97576528, + "step": 144770 + }, + { + "epoch": 3.5368773361346593, + "grad_norm": 0.001004086690954864, + "learning_rate": 4.778890394413454e-07, + "loss": 0.0, + "num_input_tokens_seen": 97580176, + "step": 144775 + }, + { + "epoch": 3.536999486966506, + "grad_norm": 0.0035942229442298412, + "learning_rate": 4.778163099883117e-07, + "loss": 0.0003, + "num_input_tokens_seen": 97583440, + "step": 144780 + }, + { + "epoch": 3.5371216377983536, + "grad_norm": 0.002467522630468011, + "learning_rate": 4.777435843327377e-07, + "loss": 0.0, + "num_input_tokens_seen": 97586896, + "step": 144785 + }, + { + "epoch": 3.5372437886302004, + "grad_norm": 0.00476666959002614, + "learning_rate": 4.776708624751535e-07, + "loss": 0.0, + "num_input_tokens_seen": 97590160, + "step": 144790 + }, + { + "epoch": 3.5373659394620476, + "grad_norm": 4.979291043127887e-05, + "learning_rate": 4.775981444160865e-07, + "loss": 0.0846, + "num_input_tokens_seen": 97593552, + "step": 144795 + }, + { + "epoch": 3.5374880902938948, + "grad_norm": 0.0007779115112498403, + "learning_rate": 4.775254301560666e-07, + "loss": 0.0, + "num_input_tokens_seen": 97596816, + "step": 144800 + }, + { + "epoch": 3.537610241125742, + "grad_norm": 0.6420580148696899, + "learning_rate": 4.774527196956226e-07, + "loss": 0.0005, + "num_input_tokens_seen": 97600080, + "step": 144805 + }, + { + "epoch": 3.537732391957589, + "grad_norm": 0.0033624963834881783, + "learning_rate": 4.773800130352825e-07, + "loss": 0.0, + "num_input_tokens_seen": 97603152, + "step": 144810 + }, + { + "epoch": 3.5378545427894363, + "grad_norm": 0.002699097152799368, + "learning_rate": 4.77307310175576e-07, + "loss": 0.0, + "num_input_tokens_seen": 97606608, + "step": 144815 + }, + { + "epoch": 3.5379766936212835, + "grad_norm": 0.00019098362827207893, + "learning_rate": 4.772346111170309e-07, + "loss": 0.0, + "num_input_tokens_seen": 97609872, + "step": 144820 + }, + { + "epoch": 3.5380988444531307, + "grad_norm": 0.028840744867920876, + "learning_rate": 4.771619158601764e-07, + "loss": 0.0, + "num_input_tokens_seen": 97613328, + "step": 144825 + }, + { + "epoch": 3.538220995284978, + "grad_norm": 0.011287868022918701, + "learning_rate": 4.770892244055413e-07, + "loss": 0.0, + "num_input_tokens_seen": 97616592, + "step": 144830 + }, + { + "epoch": 3.538343146116825, + "grad_norm": 0.02073443867266178, + "learning_rate": 4.770165367536541e-07, + "loss": 0.0, + "num_input_tokens_seen": 97620240, + "step": 144835 + }, + { + "epoch": 3.5384652969486723, + "grad_norm": 0.0004936923505738378, + "learning_rate": 4.76943852905043e-07, + "loss": 0.0001, + "num_input_tokens_seen": 97623824, + "step": 144840 + }, + { + "epoch": 3.5385874477805195, + "grad_norm": 0.0005841117817908525, + "learning_rate": 4.768711728602371e-07, + "loss": 0.0, + "num_input_tokens_seen": 97626896, + "step": 144845 + }, + { + "epoch": 3.5387095986123667, + "grad_norm": 0.0016818649601191282, + "learning_rate": 4.767984966197649e-07, + "loss": 0.0001, + "num_input_tokens_seen": 97630224, + "step": 144850 + }, + { + "epoch": 3.538831749444214, + "grad_norm": 0.007975401356816292, + "learning_rate": 4.767258241841543e-07, + "loss": 0.0, + "num_input_tokens_seen": 97633552, + "step": 144855 + }, + { + "epoch": 3.538953900276061, + "grad_norm": 0.03557858616113663, + "learning_rate": 4.766531555539343e-07, + "loss": 0.0, + "num_input_tokens_seen": 97637648, + "step": 144860 + }, + { + "epoch": 3.539076051107908, + "grad_norm": 0.0010234951041638851, + "learning_rate": 4.7658049072963357e-07, + "loss": 0.0, + "num_input_tokens_seen": 97641488, + "step": 144865 + }, + { + "epoch": 3.5391982019397554, + "grad_norm": 0.3270286023616791, + "learning_rate": 4.7650782971178003e-07, + "loss": 0.0, + "num_input_tokens_seen": 97644432, + "step": 144870 + }, + { + "epoch": 3.539320352771602, + "grad_norm": 0.0560191385447979, + "learning_rate": 4.764351725009027e-07, + "loss": 0.0698, + "num_input_tokens_seen": 97647632, + "step": 144875 + }, + { + "epoch": 3.53944250360345, + "grad_norm": 0.004076603800058365, + "learning_rate": 4.763625190975292e-07, + "loss": 0.0002, + "num_input_tokens_seen": 97650896, + "step": 144880 + }, + { + "epoch": 3.5395646544352966, + "grad_norm": 0.0007098827045410872, + "learning_rate": 4.762898695021884e-07, + "loss": 0.0501, + "num_input_tokens_seen": 97653776, + "step": 144885 + }, + { + "epoch": 3.5396868052671437, + "grad_norm": 0.009917641058564186, + "learning_rate": 4.7621722371540884e-07, + "loss": 0.0325, + "num_input_tokens_seen": 97656976, + "step": 144890 + }, + { + "epoch": 3.539808956098991, + "grad_norm": 0.010203611105680466, + "learning_rate": 4.7614458173771807e-07, + "loss": 0.0, + "num_input_tokens_seen": 97660560, + "step": 144895 + }, + { + "epoch": 3.539931106930838, + "grad_norm": 0.004964059218764305, + "learning_rate": 4.760719435696453e-07, + "loss": 0.0, + "num_input_tokens_seen": 97664080, + "step": 144900 + }, + { + "epoch": 3.5400532577626853, + "grad_norm": 0.0006427750922739506, + "learning_rate": 4.759993092117178e-07, + "loss": 0.0, + "num_input_tokens_seen": 97667472, + "step": 144905 + }, + { + "epoch": 3.5401754085945325, + "grad_norm": 0.004524386487901211, + "learning_rate": 4.759266786644648e-07, + "loss": 0.0371, + "num_input_tokens_seen": 97670672, + "step": 144910 + }, + { + "epoch": 3.5402975594263797, + "grad_norm": 8.596708357799798e-05, + "learning_rate": 4.7585405192841343e-07, + "loss": 0.0, + "num_input_tokens_seen": 97674000, + "step": 144915 + }, + { + "epoch": 3.540419710258227, + "grad_norm": 0.01520247757434845, + "learning_rate": 4.7578142900409237e-07, + "loss": 0.0, + "num_input_tokens_seen": 97677392, + "step": 144920 + }, + { + "epoch": 3.540541861090074, + "grad_norm": 0.0006964769563637674, + "learning_rate": 4.7570880989203023e-07, + "loss": 0.1114, + "num_input_tokens_seen": 97680784, + "step": 144925 + }, + { + "epoch": 3.5406640119219213, + "grad_norm": 0.0032320874743163586, + "learning_rate": 4.756361945927542e-07, + "loss": 0.0, + "num_input_tokens_seen": 97684048, + "step": 144930 + }, + { + "epoch": 3.5407861627537685, + "grad_norm": 0.0014720155159011483, + "learning_rate": 4.755635831067931e-07, + "loss": 0.0224, + "num_input_tokens_seen": 97687376, + "step": 144935 + }, + { + "epoch": 3.5409083135856156, + "grad_norm": 0.00014576480316463858, + "learning_rate": 4.754909754346748e-07, + "loss": 0.0, + "num_input_tokens_seen": 97691088, + "step": 144940 + }, + { + "epoch": 3.541030464417463, + "grad_norm": 0.0016071463469415903, + "learning_rate": 4.7541837157692676e-07, + "loss": 0.0, + "num_input_tokens_seen": 97694736, + "step": 144945 + }, + { + "epoch": 3.5411526152493096, + "grad_norm": 0.027005070820450783, + "learning_rate": 4.753457715340778e-07, + "loss": 0.0002, + "num_input_tokens_seen": 97698064, + "step": 144950 + }, + { + "epoch": 3.541274766081157, + "grad_norm": 0.018544316291809082, + "learning_rate": 4.752731753066552e-07, + "loss": 0.0, + "num_input_tokens_seen": 97701392, + "step": 144955 + }, + { + "epoch": 3.541396916913004, + "grad_norm": 0.025899503380060196, + "learning_rate": 4.7520058289518747e-07, + "loss": 0.0, + "num_input_tokens_seen": 97704720, + "step": 144960 + }, + { + "epoch": 3.5415190677448516, + "grad_norm": 0.0339164175093174, + "learning_rate": 4.75127994300202e-07, + "loss": 0.0, + "num_input_tokens_seen": 97707856, + "step": 144965 + }, + { + "epoch": 3.5416412185766983, + "grad_norm": 0.01129910722374916, + "learning_rate": 4.7505540952222725e-07, + "loss": 0.0, + "num_input_tokens_seen": 97711312, + "step": 144970 + }, + { + "epoch": 3.5417633694085455, + "grad_norm": 0.001934434287250042, + "learning_rate": 4.749828285617904e-07, + "loss": 0.0, + "num_input_tokens_seen": 97714704, + "step": 144975 + }, + { + "epoch": 3.5418855202403927, + "grad_norm": 0.0007302694721147418, + "learning_rate": 4.7491025141941955e-07, + "loss": 0.0348, + "num_input_tokens_seen": 97718032, + "step": 144980 + }, + { + "epoch": 3.54200767107224, + "grad_norm": 0.0018314080080017447, + "learning_rate": 4.74837678095643e-07, + "loss": 0.0, + "num_input_tokens_seen": 97721360, + "step": 144985 + }, + { + "epoch": 3.542129821904087, + "grad_norm": 0.0010636234655976295, + "learning_rate": 4.7476510859098775e-07, + "loss": 0.0, + "num_input_tokens_seen": 97724560, + "step": 144990 + }, + { + "epoch": 3.5422519727359343, + "grad_norm": 0.01839928887784481, + "learning_rate": 4.7469254290598224e-07, + "loss": 0.0001, + "num_input_tokens_seen": 97728272, + "step": 144995 + }, + { + "epoch": 3.5423741235677815, + "grad_norm": 0.00023793902073521167, + "learning_rate": 4.7461998104115355e-07, + "loss": 0.0, + "num_input_tokens_seen": 97731920, + "step": 145000 + }, + { + "epoch": 3.5424962743996287, + "grad_norm": 0.0005881048273295164, + "learning_rate": 4.7454742299703e-07, + "loss": 0.0, + "num_input_tokens_seen": 97734928, + "step": 145005 + }, + { + "epoch": 3.542618425231476, + "grad_norm": 0.0031892170663923025, + "learning_rate": 4.744748687741386e-07, + "loss": 0.0, + "num_input_tokens_seen": 97738000, + "step": 145010 + }, + { + "epoch": 3.542740576063323, + "grad_norm": 0.0010000730399042368, + "learning_rate": 4.7440231837300716e-07, + "loss": 0.0, + "num_input_tokens_seen": 97741904, + "step": 145015 + }, + { + "epoch": 3.5428627268951702, + "grad_norm": 0.0004860509361606091, + "learning_rate": 4.743297717941639e-07, + "loss": 0.0, + "num_input_tokens_seen": 97744912, + "step": 145020 + }, + { + "epoch": 3.5429848777270174, + "grad_norm": 0.0049315025098621845, + "learning_rate": 4.7425722903813556e-07, + "loss": 0.0, + "num_input_tokens_seen": 97748112, + "step": 145025 + }, + { + "epoch": 3.5431070285588646, + "grad_norm": 0.0005576725816354156, + "learning_rate": 4.7418469010545036e-07, + "loss": 0.0, + "num_input_tokens_seen": 97751440, + "step": 145030 + }, + { + "epoch": 3.543229179390712, + "grad_norm": 0.01474781148135662, + "learning_rate": 4.7411215499663525e-07, + "loss": 0.0, + "num_input_tokens_seen": 97754640, + "step": 145035 + }, + { + "epoch": 3.543351330222559, + "grad_norm": 0.0008148238994181156, + "learning_rate": 4.7403962371221837e-07, + "loss": 0.0, + "num_input_tokens_seen": 97758032, + "step": 145040 + }, + { + "epoch": 3.5434734810544057, + "grad_norm": 0.002185927936807275, + "learning_rate": 4.7396709625272636e-07, + "loss": 0.0, + "num_input_tokens_seen": 97761680, + "step": 145045 + }, + { + "epoch": 3.5435956318862534, + "grad_norm": 0.08207400143146515, + "learning_rate": 4.738945726186875e-07, + "loss": 0.0, + "num_input_tokens_seen": 97765264, + "step": 145050 + }, + { + "epoch": 3.5437177827181, + "grad_norm": 0.0013452256098389626, + "learning_rate": 4.738220528106288e-07, + "loss": 0.0, + "num_input_tokens_seen": 97768336, + "step": 145055 + }, + { + "epoch": 3.5438399335499478, + "grad_norm": 0.0007345146732404828, + "learning_rate": 4.7374953682907736e-07, + "loss": 0.0001, + "num_input_tokens_seen": 97771536, + "step": 145060 + }, + { + "epoch": 3.5439620843817945, + "grad_norm": 0.002797122113406658, + "learning_rate": 4.7367702467456115e-07, + "loss": 0.0, + "num_input_tokens_seen": 97774544, + "step": 145065 + }, + { + "epoch": 3.5440842352136417, + "grad_norm": 0.00030433552456088364, + "learning_rate": 4.736045163476068e-07, + "loss": 0.0, + "num_input_tokens_seen": 97777808, + "step": 145070 + }, + { + "epoch": 3.544206386045489, + "grad_norm": 0.0008012225735001266, + "learning_rate": 4.73532011848742e-07, + "loss": 0.0, + "num_input_tokens_seen": 97781392, + "step": 145075 + }, + { + "epoch": 3.544328536877336, + "grad_norm": 0.0005928860628046095, + "learning_rate": 4.734595111784945e-07, + "loss": 0.0663, + "num_input_tokens_seen": 97784528, + "step": 145080 + }, + { + "epoch": 3.5444506877091833, + "grad_norm": 0.0003491929383017123, + "learning_rate": 4.733870143373905e-07, + "loss": 0.0001, + "num_input_tokens_seen": 97787664, + "step": 145085 + }, + { + "epoch": 3.5445728385410304, + "grad_norm": 0.016333796083927155, + "learning_rate": 4.7331452132595827e-07, + "loss": 0.0, + "num_input_tokens_seen": 97790544, + "step": 145090 + }, + { + "epoch": 3.5446949893728776, + "grad_norm": 0.001521604834124446, + "learning_rate": 4.7324203214472403e-07, + "loss": 0.0, + "num_input_tokens_seen": 97794000, + "step": 145095 + }, + { + "epoch": 3.544817140204725, + "grad_norm": 0.13567009568214417, + "learning_rate": 4.731695467942158e-07, + "loss": 0.0, + "num_input_tokens_seen": 97797392, + "step": 145100 + }, + { + "epoch": 3.544939291036572, + "grad_norm": 0.00036377584910951555, + "learning_rate": 4.730970652749601e-07, + "loss": 0.0, + "num_input_tokens_seen": 97800400, + "step": 145105 + }, + { + "epoch": 3.545061441868419, + "grad_norm": 0.00028549678972922266, + "learning_rate": 4.730245875874841e-07, + "loss": 0.0, + "num_input_tokens_seen": 97803536, + "step": 145110 + }, + { + "epoch": 3.5451835927002664, + "grad_norm": 0.002109118504449725, + "learning_rate": 4.7295211373231546e-07, + "loss": 0.0, + "num_input_tokens_seen": 97806672, + "step": 145115 + }, + { + "epoch": 3.5453057435321136, + "grad_norm": 0.012760130688548088, + "learning_rate": 4.7287964370998043e-07, + "loss": 0.0, + "num_input_tokens_seen": 97810064, + "step": 145120 + }, + { + "epoch": 3.5454278943639608, + "grad_norm": 0.0016017990419641137, + "learning_rate": 4.7280717752100683e-07, + "loss": 0.0, + "num_input_tokens_seen": 97813584, + "step": 145125 + }, + { + "epoch": 3.5455500451958075, + "grad_norm": 0.00019777823763433844, + "learning_rate": 4.7273471516592076e-07, + "loss": 0.0, + "num_input_tokens_seen": 97816848, + "step": 145130 + }, + { + "epoch": 3.545672196027655, + "grad_norm": 0.00033244179212488234, + "learning_rate": 4.726622566452497e-07, + "loss": 0.0, + "num_input_tokens_seen": 97820240, + "step": 145135 + }, + { + "epoch": 3.545794346859502, + "grad_norm": 0.006279574707150459, + "learning_rate": 4.7258980195952103e-07, + "loss": 0.0435, + "num_input_tokens_seen": 97823184, + "step": 145140 + }, + { + "epoch": 3.5459164976913495, + "grad_norm": 0.00035050706355832517, + "learning_rate": 4.7251735110926103e-07, + "loss": 0.0, + "num_input_tokens_seen": 97826512, + "step": 145145 + }, + { + "epoch": 3.5460386485231963, + "grad_norm": 0.013322637416422367, + "learning_rate": 4.724449040949965e-07, + "loss": 0.0, + "num_input_tokens_seen": 97829584, + "step": 145150 + }, + { + "epoch": 3.5461607993550435, + "grad_norm": 0.0018419913249090314, + "learning_rate": 4.723724609172548e-07, + "loss": 0.0001, + "num_input_tokens_seen": 97832464, + "step": 145155 + }, + { + "epoch": 3.5462829501868907, + "grad_norm": 0.0012325738789513707, + "learning_rate": 4.7230002157656245e-07, + "loss": 0.0001, + "num_input_tokens_seen": 97835600, + "step": 145160 + }, + { + "epoch": 3.546405101018738, + "grad_norm": 0.00032701349118724465, + "learning_rate": 4.7222758607344593e-07, + "loss": 0.1, + "num_input_tokens_seen": 97838928, + "step": 145165 + }, + { + "epoch": 3.546527251850585, + "grad_norm": 0.0011002069804817438, + "learning_rate": 4.7215515440843236e-07, + "loss": 0.0841, + "num_input_tokens_seen": 97842384, + "step": 145170 + }, + { + "epoch": 3.5466494026824322, + "grad_norm": 0.02300787903368473, + "learning_rate": 4.720827265820489e-07, + "loss": 0.0, + "num_input_tokens_seen": 97845584, + "step": 145175 + }, + { + "epoch": 3.5467715535142794, + "grad_norm": 0.03971827030181885, + "learning_rate": 4.7201030259482146e-07, + "loss": 0.0, + "num_input_tokens_seen": 97849168, + "step": 145180 + }, + { + "epoch": 3.5468937043461266, + "grad_norm": 0.08529645949602127, + "learning_rate": 4.719378824472774e-07, + "loss": 0.0, + "num_input_tokens_seen": 97852688, + "step": 145185 + }, + { + "epoch": 3.547015855177974, + "grad_norm": 0.00528777576982975, + "learning_rate": 4.7186546613994283e-07, + "loss": 0.0002, + "num_input_tokens_seen": 97855888, + "step": 145190 + }, + { + "epoch": 3.547138006009821, + "grad_norm": 0.002800324000418186, + "learning_rate": 4.7179305367334453e-07, + "loss": 0.0001, + "num_input_tokens_seen": 97859216, + "step": 145195 + }, + { + "epoch": 3.547260156841668, + "grad_norm": 0.00263691577129066, + "learning_rate": 4.7172064504800967e-07, + "loss": 0.0, + "num_input_tokens_seen": 97862416, + "step": 145200 + }, + { + "epoch": 3.5473823076735154, + "grad_norm": 0.03326334431767464, + "learning_rate": 4.7164824026446405e-07, + "loss": 0.0332, + "num_input_tokens_seen": 97865488, + "step": 145205 + }, + { + "epoch": 3.5475044585053626, + "grad_norm": 0.004230371210724115, + "learning_rate": 4.7157583932323475e-07, + "loss": 0.0001, + "num_input_tokens_seen": 97868624, + "step": 145210 + }, + { + "epoch": 3.5476266093372097, + "grad_norm": 0.0032722600735723972, + "learning_rate": 4.7150344222484786e-07, + "loss": 0.0001, + "num_input_tokens_seen": 97871568, + "step": 145215 + }, + { + "epoch": 3.547748760169057, + "grad_norm": 0.014357690699398518, + "learning_rate": 4.714310489698303e-07, + "loss": 0.0, + "num_input_tokens_seen": 97874832, + "step": 145220 + }, + { + "epoch": 3.5478709110009037, + "grad_norm": 0.003965605981647968, + "learning_rate": 4.7135865955870803e-07, + "loss": 0.062, + "num_input_tokens_seen": 97877968, + "step": 145225 + }, + { + "epoch": 3.5479930618327513, + "grad_norm": 0.0004163807607255876, + "learning_rate": 4.7128627399200784e-07, + "loss": 0.0, + "num_input_tokens_seen": 97881296, + "step": 145230 + }, + { + "epoch": 3.548115212664598, + "grad_norm": 0.0007733021629974246, + "learning_rate": 4.712138922702563e-07, + "loss": 0.0001, + "num_input_tokens_seen": 97885456, + "step": 145235 + }, + { + "epoch": 3.5482373634964453, + "grad_norm": 0.01566707342863083, + "learning_rate": 4.7114151439397933e-07, + "loss": 0.0194, + "num_input_tokens_seen": 97888592, + "step": 145240 + }, + { + "epoch": 3.5483595143282924, + "grad_norm": 0.07242065668106079, + "learning_rate": 4.710691403637038e-07, + "loss": 0.0002, + "num_input_tokens_seen": 97891728, + "step": 145245 + }, + { + "epoch": 3.5484816651601396, + "grad_norm": 0.005784235429018736, + "learning_rate": 4.7099677017995575e-07, + "loss": 0.0393, + "num_input_tokens_seen": 97894992, + "step": 145250 + }, + { + "epoch": 3.548603815991987, + "grad_norm": 0.002436217153444886, + "learning_rate": 4.7092440384326113e-07, + "loss": 0.0001, + "num_input_tokens_seen": 97898064, + "step": 145255 + }, + { + "epoch": 3.548725966823834, + "grad_norm": 0.005221229046583176, + "learning_rate": 4.708520413541469e-07, + "loss": 0.0001, + "num_input_tokens_seen": 97901328, + "step": 145260 + }, + { + "epoch": 3.548848117655681, + "grad_norm": 0.01007154956459999, + "learning_rate": 4.7077968271313863e-07, + "loss": 0.0477, + "num_input_tokens_seen": 97904464, + "step": 145265 + }, + { + "epoch": 3.5489702684875284, + "grad_norm": 0.07373490929603577, + "learning_rate": 4.707073279207632e-07, + "loss": 0.0002, + "num_input_tokens_seen": 97907664, + "step": 145270 + }, + { + "epoch": 3.5490924193193756, + "grad_norm": 0.046758461743593216, + "learning_rate": 4.706349769775461e-07, + "loss": 0.0536, + "num_input_tokens_seen": 97910864, + "step": 145275 + }, + { + "epoch": 3.5492145701512228, + "grad_norm": 0.0076475548557937145, + "learning_rate": 4.705626298840141e-07, + "loss": 0.0001, + "num_input_tokens_seen": 97914256, + "step": 145280 + }, + { + "epoch": 3.54933672098307, + "grad_norm": 0.016452349722385406, + "learning_rate": 4.7049028664069266e-07, + "loss": 0.0004, + "num_input_tokens_seen": 97917328, + "step": 145285 + }, + { + "epoch": 3.549458871814917, + "grad_norm": 0.04465107619762421, + "learning_rate": 4.7041794724810846e-07, + "loss": 0.0001, + "num_input_tokens_seen": 97920464, + "step": 145290 + }, + { + "epoch": 3.5495810226467643, + "grad_norm": 0.26384222507476807, + "learning_rate": 4.703456117067877e-07, + "loss": 0.0001, + "num_input_tokens_seen": 97923600, + "step": 145295 + }, + { + "epoch": 3.5497031734786115, + "grad_norm": 0.028273768723011017, + "learning_rate": 4.702732800172556e-07, + "loss": 0.0, + "num_input_tokens_seen": 97926928, + "step": 145300 + }, + { + "epoch": 3.5498253243104587, + "grad_norm": 0.043451856821775436, + "learning_rate": 4.702009521800392e-07, + "loss": 0.0365, + "num_input_tokens_seen": 97930192, + "step": 145305 + }, + { + "epoch": 3.5499474751423055, + "grad_norm": 0.002773558022454381, + "learning_rate": 4.701286281956636e-07, + "loss": 0.0, + "num_input_tokens_seen": 97933456, + "step": 145310 + }, + { + "epoch": 3.550069625974153, + "grad_norm": 0.017502794042229652, + "learning_rate": 4.7005630806465547e-07, + "loss": 0.0001, + "num_input_tokens_seen": 97936592, + "step": 145315 + }, + { + "epoch": 3.550191776806, + "grad_norm": 0.0033604062628000975, + "learning_rate": 4.6998399178754e-07, + "loss": 0.0, + "num_input_tokens_seen": 97940240, + "step": 145320 + }, + { + "epoch": 3.5503139276378475, + "grad_norm": 0.007517929654568434, + "learning_rate": 4.6991167936484346e-07, + "loss": 0.0, + "num_input_tokens_seen": 97943632, + "step": 145325 + }, + { + "epoch": 3.5504360784696942, + "grad_norm": 0.49256661534309387, + "learning_rate": 4.698393707970922e-07, + "loss": 0.0006, + "num_input_tokens_seen": 97947088, + "step": 145330 + }, + { + "epoch": 3.5505582293015414, + "grad_norm": 0.000612253847066313, + "learning_rate": 4.697670660848113e-07, + "loss": 0.0, + "num_input_tokens_seen": 97950864, + "step": 145335 + }, + { + "epoch": 3.5506803801333886, + "grad_norm": 0.024863019585609436, + "learning_rate": 4.6969476522852726e-07, + "loss": 0.0001, + "num_input_tokens_seen": 97954384, + "step": 145340 + }, + { + "epoch": 3.550802530965236, + "grad_norm": 0.017794562503695488, + "learning_rate": 4.696224682287652e-07, + "loss": 0.0, + "num_input_tokens_seen": 97957712, + "step": 145345 + }, + { + "epoch": 3.550924681797083, + "grad_norm": 0.037794046103954315, + "learning_rate": 4.695501750860514e-07, + "loss": 0.0, + "num_input_tokens_seen": 97960848, + "step": 145350 + }, + { + "epoch": 3.55104683262893, + "grad_norm": 0.012041272595524788, + "learning_rate": 4.694778858009112e-07, + "loss": 0.0, + "num_input_tokens_seen": 97964560, + "step": 145355 + }, + { + "epoch": 3.5511689834607774, + "grad_norm": 0.022189682349562645, + "learning_rate": 4.694056003738708e-07, + "loss": 0.0, + "num_input_tokens_seen": 97967184, + "step": 145360 + }, + { + "epoch": 3.5512911342926246, + "grad_norm": 0.0005078144022263587, + "learning_rate": 4.693333188054556e-07, + "loss": 0.0, + "num_input_tokens_seen": 97970064, + "step": 145365 + }, + { + "epoch": 3.5514132851244717, + "grad_norm": 73.60726928710938, + "learning_rate": 4.692610410961909e-07, + "loss": 0.1659, + "num_input_tokens_seen": 97973264, + "step": 145370 + }, + { + "epoch": 3.551535435956319, + "grad_norm": 0.003191084135323763, + "learning_rate": 4.6918876724660296e-07, + "loss": 0.0, + "num_input_tokens_seen": 97976784, + "step": 145375 + }, + { + "epoch": 3.551657586788166, + "grad_norm": 0.011187938041985035, + "learning_rate": 4.691164972572168e-07, + "loss": 0.0, + "num_input_tokens_seen": 97980048, + "step": 145380 + }, + { + "epoch": 3.5517797376200133, + "grad_norm": 0.0007194733479991555, + "learning_rate": 4.690442311285582e-07, + "loss": 0.0, + "num_input_tokens_seen": 97983504, + "step": 145385 + }, + { + "epoch": 3.5519018884518605, + "grad_norm": 0.0015291129238903522, + "learning_rate": 4.689719688611532e-07, + "loss": 0.0235, + "num_input_tokens_seen": 97987216, + "step": 145390 + }, + { + "epoch": 3.5520240392837072, + "grad_norm": 0.05800214782357216, + "learning_rate": 4.6889971045552636e-07, + "loss": 0.0, + "num_input_tokens_seen": 97990480, + "step": 145395 + }, + { + "epoch": 3.552146190115555, + "grad_norm": 0.00901162251830101, + "learning_rate": 4.6882745591220417e-07, + "loss": 0.0, + "num_input_tokens_seen": 97993680, + "step": 145400 + }, + { + "epoch": 3.5522683409474016, + "grad_norm": 0.006305050104856491, + "learning_rate": 4.687552052317112e-07, + "loss": 0.0, + "num_input_tokens_seen": 97997008, + "step": 145405 + }, + { + "epoch": 3.5523904917792493, + "grad_norm": 0.007316413801163435, + "learning_rate": 4.6868295841457363e-07, + "loss": 0.0, + "num_input_tokens_seen": 98000272, + "step": 145410 + }, + { + "epoch": 3.552512642611096, + "grad_norm": 0.014707071706652641, + "learning_rate": 4.68610715461316e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98003600, + "step": 145415 + }, + { + "epoch": 3.552634793442943, + "grad_norm": 0.0018783895066007972, + "learning_rate": 4.6853847637246433e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98006800, + "step": 145420 + }, + { + "epoch": 3.5527569442747904, + "grad_norm": 0.017941100522875786, + "learning_rate": 4.6846624114854415e-07, + "loss": 0.06, + "num_input_tokens_seen": 98010064, + "step": 145425 + }, + { + "epoch": 3.5528790951066376, + "grad_norm": 0.001249508699402213, + "learning_rate": 4.6839400979008005e-07, + "loss": 0.0288, + "num_input_tokens_seen": 98013456, + "step": 145430 + }, + { + "epoch": 3.5530012459384848, + "grad_norm": 0.002429740270599723, + "learning_rate": 4.683217822975981e-07, + "loss": 0.0606, + "num_input_tokens_seen": 98016912, + "step": 145435 + }, + { + "epoch": 3.553123396770332, + "grad_norm": 0.0017890139715746045, + "learning_rate": 4.6824955867162276e-07, + "loss": 0.0246, + "num_input_tokens_seen": 98020496, + "step": 145440 + }, + { + "epoch": 3.553245547602179, + "grad_norm": 0.001725839334540069, + "learning_rate": 4.681773389126795e-07, + "loss": 0.0, + "num_input_tokens_seen": 98023632, + "step": 145445 + }, + { + "epoch": 3.5533676984340263, + "grad_norm": 0.0002874275960493833, + "learning_rate": 4.681051230212942e-07, + "loss": 0.0, + "num_input_tokens_seen": 98027664, + "step": 145450 + }, + { + "epoch": 3.5534898492658735, + "grad_norm": 0.0017251042881980538, + "learning_rate": 4.680329109979916e-07, + "loss": 0.0, + "num_input_tokens_seen": 98030800, + "step": 145455 + }, + { + "epoch": 3.5536120000977207, + "grad_norm": 0.004043647553771734, + "learning_rate": 4.679607028432961e-07, + "loss": 0.0, + "num_input_tokens_seen": 98034576, + "step": 145460 + }, + { + "epoch": 3.553734150929568, + "grad_norm": 0.0002633123949635774, + "learning_rate": 4.6788849855773413e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98037840, + "step": 145465 + }, + { + "epoch": 3.553856301761415, + "grad_norm": 22.955411911010742, + "learning_rate": 4.6781629814183e-07, + "loss": 0.0667, + "num_input_tokens_seen": 98040976, + "step": 145470 + }, + { + "epoch": 3.5539784525932623, + "grad_norm": 0.020732754841446877, + "learning_rate": 4.6774410159610847e-07, + "loss": 0.0, + "num_input_tokens_seen": 98043984, + "step": 145475 + }, + { + "epoch": 3.5541006034251095, + "grad_norm": 0.10230601578950882, + "learning_rate": 4.676719089210951e-07, + "loss": 0.0344, + "num_input_tokens_seen": 98047504, + "step": 145480 + }, + { + "epoch": 3.5542227542569567, + "grad_norm": 0.0032723443582654, + "learning_rate": 4.675997201173151e-07, + "loss": 0.0, + "num_input_tokens_seen": 98050448, + "step": 145485 + }, + { + "epoch": 3.5543449050888034, + "grad_norm": 0.013427302241325378, + "learning_rate": 4.6752753518529276e-07, + "loss": 0.0601, + "num_input_tokens_seen": 98053648, + "step": 145490 + }, + { + "epoch": 3.554467055920651, + "grad_norm": 0.0018329521408304572, + "learning_rate": 4.674553541255537e-07, + "loss": 0.0024, + "num_input_tokens_seen": 98056912, + "step": 145495 + }, + { + "epoch": 3.554589206752498, + "grad_norm": 0.004841540474444628, + "learning_rate": 4.673831769386223e-07, + "loss": 0.0, + "num_input_tokens_seen": 98060304, + "step": 145500 + }, + { + "epoch": 3.5547113575843454, + "grad_norm": 0.0034890659153461456, + "learning_rate": 4.67311003625024e-07, + "loss": 0.0, + "num_input_tokens_seen": 98063440, + "step": 145505 + }, + { + "epoch": 3.554833508416192, + "grad_norm": 0.9182288646697998, + "learning_rate": 4.67238834185283e-07, + "loss": 0.0002, + "num_input_tokens_seen": 98066832, + "step": 145510 + }, + { + "epoch": 3.5549556592480394, + "grad_norm": 0.012411080300807953, + "learning_rate": 4.6716666861992447e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98069904, + "step": 145515 + }, + { + "epoch": 3.5550778100798865, + "grad_norm": 0.019958067685365677, + "learning_rate": 4.6709450692947363e-07, + "loss": 0.0474, + "num_input_tokens_seen": 98073680, + "step": 145520 + }, + { + "epoch": 3.5551999609117337, + "grad_norm": 0.0059996098279953, + "learning_rate": 4.670223491144545e-07, + "loss": 0.0, + "num_input_tokens_seen": 98076752, + "step": 145525 + }, + { + "epoch": 3.555322111743581, + "grad_norm": 0.0008659661398269236, + "learning_rate": 4.6695019517539257e-07, + "loss": 0.0501, + "num_input_tokens_seen": 98080144, + "step": 145530 + }, + { + "epoch": 3.555444262575428, + "grad_norm": 0.0021337266080081463, + "learning_rate": 4.6687804511281183e-07, + "loss": 0.0, + "num_input_tokens_seen": 98083344, + "step": 145535 + }, + { + "epoch": 3.5555664134072753, + "grad_norm": 0.008302521891891956, + "learning_rate": 4.668058989272373e-07, + "loss": 0.0667, + "num_input_tokens_seen": 98086672, + "step": 145540 + }, + { + "epoch": 3.5556885642391225, + "grad_norm": 0.0024677154142409563, + "learning_rate": 4.667337566191941e-07, + "loss": 0.0, + "num_input_tokens_seen": 98089872, + "step": 145545 + }, + { + "epoch": 3.5558107150709697, + "grad_norm": 0.05019146203994751, + "learning_rate": 4.666616181892061e-07, + "loss": 0.0, + "num_input_tokens_seen": 98092944, + "step": 145550 + }, + { + "epoch": 3.555932865902817, + "grad_norm": 0.0019049012335017323, + "learning_rate": 4.665894836377986e-07, + "loss": 0.0352, + "num_input_tokens_seen": 98096016, + "step": 145555 + }, + { + "epoch": 3.556055016734664, + "grad_norm": 0.002492862520739436, + "learning_rate": 4.665173529654959e-07, + "loss": 0.0, + "num_input_tokens_seen": 98099280, + "step": 145560 + }, + { + "epoch": 3.5561771675665113, + "grad_norm": 0.008258125744760036, + "learning_rate": 4.6644522617282203e-07, + "loss": 0.0727, + "num_input_tokens_seen": 98102864, + "step": 145565 + }, + { + "epoch": 3.5562993183983584, + "grad_norm": 0.027965500950813293, + "learning_rate": 4.6637310326030243e-07, + "loss": 0.0569, + "num_input_tokens_seen": 98106064, + "step": 145570 + }, + { + "epoch": 3.556421469230205, + "grad_norm": 0.006361914332956076, + "learning_rate": 4.663009842284608e-07, + "loss": 0.0, + "num_input_tokens_seen": 98109456, + "step": 145575 + }, + { + "epoch": 3.556543620062053, + "grad_norm": 0.002534546423703432, + "learning_rate": 4.662288690778222e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98112784, + "step": 145580 + }, + { + "epoch": 3.5566657708938996, + "grad_norm": 0.0019274181686341763, + "learning_rate": 4.661567578089105e-07, + "loss": 0.0, + "num_input_tokens_seen": 98116112, + "step": 145585 + }, + { + "epoch": 3.556787921725747, + "grad_norm": 0.0018334973137825727, + "learning_rate": 4.660846504222509e-07, + "loss": 0.0003, + "num_input_tokens_seen": 98119312, + "step": 145590 + }, + { + "epoch": 3.556910072557594, + "grad_norm": 0.0011634392431005836, + "learning_rate": 4.660125469183669e-07, + "loss": 0.0, + "num_input_tokens_seen": 98122448, + "step": 145595 + }, + { + "epoch": 3.557032223389441, + "grad_norm": 0.0033267210237681866, + "learning_rate": 4.6594044729778336e-07, + "loss": 0.0259, + "num_input_tokens_seen": 98125904, + "step": 145600 + }, + { + "epoch": 3.5571543742212883, + "grad_norm": 0.004412208218127489, + "learning_rate": 4.658683515610248e-07, + "loss": 0.0, + "num_input_tokens_seen": 98129232, + "step": 145605 + }, + { + "epoch": 3.5572765250531355, + "grad_norm": 0.11257799714803696, + "learning_rate": 4.6579625970861494e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98132624, + "step": 145610 + }, + { + "epoch": 3.5573986758849827, + "grad_norm": 0.006835788954049349, + "learning_rate": 4.657241717410787e-07, + "loss": 0.0404, + "num_input_tokens_seen": 98135888, + "step": 145615 + }, + { + "epoch": 3.55752082671683, + "grad_norm": 0.0053612408228218555, + "learning_rate": 4.656520876589397e-07, + "loss": 0.0, + "num_input_tokens_seen": 98139088, + "step": 145620 + }, + { + "epoch": 3.557642977548677, + "grad_norm": 0.013923496007919312, + "learning_rate": 4.6558000746272276e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98142544, + "step": 145625 + }, + { + "epoch": 3.5577651283805243, + "grad_norm": 0.004267881624400616, + "learning_rate": 4.655079311529513e-07, + "loss": 0.0307, + "num_input_tokens_seen": 98145808, + "step": 145630 + }, + { + "epoch": 3.5578872792123715, + "grad_norm": 0.006597915198653936, + "learning_rate": 4.6543585873015e-07, + "loss": 0.0004, + "num_input_tokens_seen": 98149072, + "step": 145635 + }, + { + "epoch": 3.5580094300442187, + "grad_norm": 0.01169089786708355, + "learning_rate": 4.6536379019484327e-07, + "loss": 0.0, + "num_input_tokens_seen": 98152528, + "step": 145640 + }, + { + "epoch": 3.558131580876066, + "grad_norm": 0.0013577293138951063, + "learning_rate": 4.6529172554755446e-07, + "loss": 0.0048, + "num_input_tokens_seen": 98155728, + "step": 145645 + }, + { + "epoch": 3.558253731707913, + "grad_norm": 0.05119822919368744, + "learning_rate": 4.6521966478880846e-07, + "loss": 0.0006, + "num_input_tokens_seen": 98159696, + "step": 145650 + }, + { + "epoch": 3.5583758825397602, + "grad_norm": 0.0024902746081352234, + "learning_rate": 4.6514760791912853e-07, + "loss": 0.0, + "num_input_tokens_seen": 98163152, + "step": 145655 + }, + { + "epoch": 3.5584980333716074, + "grad_norm": 0.00010411084076622501, + "learning_rate": 4.6507555493903936e-07, + "loss": 0.0, + "num_input_tokens_seen": 98166992, + "step": 145660 + }, + { + "epoch": 3.5586201842034546, + "grad_norm": 0.00025045243091881275, + "learning_rate": 4.6500350584906435e-07, + "loss": 0.0, + "num_input_tokens_seen": 98170192, + "step": 145665 + }, + { + "epoch": 3.5587423350353014, + "grad_norm": 0.0014511343324556947, + "learning_rate": 4.649314606497281e-07, + "loss": 0.0488, + "num_input_tokens_seen": 98173264, + "step": 145670 + }, + { + "epoch": 3.558864485867149, + "grad_norm": 0.0007764685433357954, + "learning_rate": 4.6485941934155413e-07, + "loss": 0.0, + "num_input_tokens_seen": 98176464, + "step": 145675 + }, + { + "epoch": 3.5589866366989957, + "grad_norm": 0.0007026352686807513, + "learning_rate": 4.6478738192506607e-07, + "loss": 0.049, + "num_input_tokens_seen": 98179984, + "step": 145680 + }, + { + "epoch": 3.559108787530843, + "grad_norm": 0.002035627607256174, + "learning_rate": 4.647153484007884e-07, + "loss": 0.0008, + "num_input_tokens_seen": 98183376, + "step": 145685 + }, + { + "epoch": 3.55923093836269, + "grad_norm": 27.889453887939453, + "learning_rate": 4.6464331876924443e-07, + "loss": 0.0477, + "num_input_tokens_seen": 98187216, + "step": 145690 + }, + { + "epoch": 3.5593530891945373, + "grad_norm": 0.02954605594277382, + "learning_rate": 4.645712930309582e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98190672, + "step": 145695 + }, + { + "epoch": 3.5594752400263845, + "grad_norm": 0.0005955526721663773, + "learning_rate": 4.6449927118645393e-07, + "loss": 0.0, + "num_input_tokens_seen": 98194384, + "step": 145700 + }, + { + "epoch": 3.5595973908582317, + "grad_norm": 0.005972879007458687, + "learning_rate": 4.6442725323625467e-07, + "loss": 0.0, + "num_input_tokens_seen": 98197904, + "step": 145705 + }, + { + "epoch": 3.559719541690079, + "grad_norm": 0.0004364507331047207, + "learning_rate": 4.6435523918088473e-07, + "loss": 0.0, + "num_input_tokens_seen": 98201808, + "step": 145710 + }, + { + "epoch": 3.559841692521926, + "grad_norm": 0.014401191845536232, + "learning_rate": 4.642832290208672e-07, + "loss": 0.0, + "num_input_tokens_seen": 98205328, + "step": 145715 + }, + { + "epoch": 3.5599638433537732, + "grad_norm": 0.0015330385649576783, + "learning_rate": 4.642112227567265e-07, + "loss": 0.0, + "num_input_tokens_seen": 98208656, + "step": 145720 + }, + { + "epoch": 3.5600859941856204, + "grad_norm": 0.0011512466007843614, + "learning_rate": 4.641392203889857e-07, + "loss": 0.0, + "num_input_tokens_seen": 98211792, + "step": 145725 + }, + { + "epoch": 3.5602081450174676, + "grad_norm": 0.0002796232874970883, + "learning_rate": 4.640672219181684e-07, + "loss": 0.0, + "num_input_tokens_seen": 98214800, + "step": 145730 + }, + { + "epoch": 3.560330295849315, + "grad_norm": 0.011278784833848476, + "learning_rate": 4.639952273447989e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98218256, + "step": 145735 + }, + { + "epoch": 3.560452446681162, + "grad_norm": 0.004169910680502653, + "learning_rate": 4.639232366693998e-07, + "loss": 0.0, + "num_input_tokens_seen": 98221648, + "step": 145740 + }, + { + "epoch": 3.560574597513009, + "grad_norm": 0.004272952675819397, + "learning_rate": 4.638512498924956e-07, + "loss": 0.0, + "num_input_tokens_seen": 98224784, + "step": 145745 + }, + { + "epoch": 3.5606967483448564, + "grad_norm": 8.410751615883783e-05, + "learning_rate": 4.637792670146089e-07, + "loss": 0.0, + "num_input_tokens_seen": 98228368, + "step": 145750 + }, + { + "epoch": 3.560818899176703, + "grad_norm": 0.004245266318321228, + "learning_rate": 4.6370728803626357e-07, + "loss": 0.0399, + "num_input_tokens_seen": 98231568, + "step": 145755 + }, + { + "epoch": 3.5609410500085508, + "grad_norm": 0.00774649903178215, + "learning_rate": 4.6363531295798344e-07, + "loss": 0.0479, + "num_input_tokens_seen": 98234960, + "step": 145760 + }, + { + "epoch": 3.5610632008403975, + "grad_norm": 0.010200269520282745, + "learning_rate": 4.635633417802917e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98238672, + "step": 145765 + }, + { + "epoch": 3.561185351672245, + "grad_norm": 0.0071225594729185104, + "learning_rate": 4.634913745037111e-07, + "loss": 0.0, + "num_input_tokens_seen": 98242384, + "step": 145770 + }, + { + "epoch": 3.561307502504092, + "grad_norm": 0.007639218121767044, + "learning_rate": 4.6341941112876593e-07, + "loss": 0.0, + "num_input_tokens_seen": 98245520, + "step": 145775 + }, + { + "epoch": 3.561429653335939, + "grad_norm": 0.008603011257946491, + "learning_rate": 4.633474516559792e-07, + "loss": 0.0, + "num_input_tokens_seen": 98248656, + "step": 145780 + }, + { + "epoch": 3.5615518041677863, + "grad_norm": 0.010492679663002491, + "learning_rate": 4.632754960858738e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98251472, + "step": 145785 + }, + { + "epoch": 3.5616739549996335, + "grad_norm": 0.00014071117038838565, + "learning_rate": 4.6320354441897326e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98254928, + "step": 145790 + }, + { + "epoch": 3.5617961058314807, + "grad_norm": 0.003771584015339613, + "learning_rate": 4.6313159665580124e-07, + "loss": 0.0454, + "num_input_tokens_seen": 98258256, + "step": 145795 + }, + { + "epoch": 3.561918256663328, + "grad_norm": 0.007178634870797396, + "learning_rate": 4.630596527968804e-07, + "loss": 0.0, + "num_input_tokens_seen": 98261264, + "step": 145800 + }, + { + "epoch": 3.562040407495175, + "grad_norm": 0.006300019100308418, + "learning_rate": 4.629877128427345e-07, + "loss": 0.0, + "num_input_tokens_seen": 98264784, + "step": 145805 + }, + { + "epoch": 3.562162558327022, + "grad_norm": 0.00243266299366951, + "learning_rate": 4.6291577679388607e-07, + "loss": 0.0, + "num_input_tokens_seen": 98267920, + "step": 145810 + }, + { + "epoch": 3.5622847091588694, + "grad_norm": 0.0011975874658674002, + "learning_rate": 4.628438446508589e-07, + "loss": 0.0, + "num_input_tokens_seen": 98271440, + "step": 145815 + }, + { + "epoch": 3.5624068599907166, + "grad_norm": 0.0013561134692281485, + "learning_rate": 4.6277191641417547e-07, + "loss": 0.0, + "num_input_tokens_seen": 98275152, + "step": 145820 + }, + { + "epoch": 3.562529010822564, + "grad_norm": 0.006943685468286276, + "learning_rate": 4.6269999208435903e-07, + "loss": 0.0, + "num_input_tokens_seen": 98278480, + "step": 145825 + }, + { + "epoch": 3.562651161654411, + "grad_norm": 0.0019534756429493427, + "learning_rate": 4.6262807166193316e-07, + "loss": 0.0, + "num_input_tokens_seen": 98282256, + "step": 145830 + }, + { + "epoch": 3.562773312486258, + "grad_norm": 0.0007993488106876612, + "learning_rate": 4.6255615514742016e-07, + "loss": 0.0, + "num_input_tokens_seen": 98285392, + "step": 145835 + }, + { + "epoch": 3.5628954633181054, + "grad_norm": 0.008926840499043465, + "learning_rate": 4.6248424254134376e-07, + "loss": 0.0, + "num_input_tokens_seen": 98288528, + "step": 145840 + }, + { + "epoch": 3.5630176141499525, + "grad_norm": 0.005140448454767466, + "learning_rate": 4.6241233384422616e-07, + "loss": 0.0, + "num_input_tokens_seen": 98291792, + "step": 145845 + }, + { + "epoch": 3.5631397649817993, + "grad_norm": 0.017702613025903702, + "learning_rate": 4.6234042905659066e-07, + "loss": 0.0, + "num_input_tokens_seen": 98295696, + "step": 145850 + }, + { + "epoch": 3.563261915813647, + "grad_norm": 0.0010711681097745895, + "learning_rate": 4.6226852817896046e-07, + "loss": 0.0, + "num_input_tokens_seen": 98299216, + "step": 145855 + }, + { + "epoch": 3.5633840666454937, + "grad_norm": 0.0027515243273228407, + "learning_rate": 4.621966312118578e-07, + "loss": 0.0, + "num_input_tokens_seen": 98302800, + "step": 145860 + }, + { + "epoch": 3.563506217477341, + "grad_norm": 0.0022552255541086197, + "learning_rate": 4.621247381558063e-07, + "loss": 0.0002, + "num_input_tokens_seen": 98306192, + "step": 145865 + }, + { + "epoch": 3.563628368309188, + "grad_norm": 0.0019252895144745708, + "learning_rate": 4.620528490113284e-07, + "loss": 0.0, + "num_input_tokens_seen": 98309136, + "step": 145870 + }, + { + "epoch": 3.5637505191410352, + "grad_norm": 0.009776824153959751, + "learning_rate": 4.6198096377894644e-07, + "loss": 0.0, + "num_input_tokens_seen": 98312144, + "step": 145875 + }, + { + "epoch": 3.5638726699728824, + "grad_norm": 0.009079672396183014, + "learning_rate": 4.61909082459184e-07, + "loss": 0.0, + "num_input_tokens_seen": 98315856, + "step": 145880 + }, + { + "epoch": 3.5639948208047296, + "grad_norm": 0.0010798972798511386, + "learning_rate": 4.618372050525631e-07, + "loss": 0.0682, + "num_input_tokens_seen": 98319440, + "step": 145885 + }, + { + "epoch": 3.564116971636577, + "grad_norm": 0.020890379324555397, + "learning_rate": 4.617653315596072e-07, + "loss": 0.0, + "num_input_tokens_seen": 98322512, + "step": 145890 + }, + { + "epoch": 3.564239122468424, + "grad_norm": 0.11921865493059158, + "learning_rate": 4.616934619808381e-07, + "loss": 0.0, + "num_input_tokens_seen": 98325776, + "step": 145895 + }, + { + "epoch": 3.564361273300271, + "grad_norm": 0.0025778845883905888, + "learning_rate": 4.6162159631677946e-07, + "loss": 0.0, + "num_input_tokens_seen": 98329040, + "step": 145900 + }, + { + "epoch": 3.5644834241321184, + "grad_norm": 0.00012149583199061453, + "learning_rate": 4.615497345679529e-07, + "loss": 0.0, + "num_input_tokens_seen": 98332496, + "step": 145905 + }, + { + "epoch": 3.5646055749639656, + "grad_norm": 33.14250183105469, + "learning_rate": 4.614778767348815e-07, + "loss": 0.0835, + "num_input_tokens_seen": 98335440, + "step": 145910 + }, + { + "epoch": 3.5647277257958128, + "grad_norm": 0.00033949079806916416, + "learning_rate": 4.6140602281808816e-07, + "loss": 0.0, + "num_input_tokens_seen": 98338768, + "step": 145915 + }, + { + "epoch": 3.56484987662766, + "grad_norm": 0.0017763259820640087, + "learning_rate": 4.613341728180947e-07, + "loss": 0.0225, + "num_input_tokens_seen": 98342352, + "step": 145920 + }, + { + "epoch": 3.564972027459507, + "grad_norm": 0.007347355131059885, + "learning_rate": 4.6126232673542456e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98346192, + "step": 145925 + }, + { + "epoch": 3.5650941782913543, + "grad_norm": 0.09457369893789291, + "learning_rate": 4.6119048457059916e-07, + "loss": 0.0, + "num_input_tokens_seen": 98349456, + "step": 145930 + }, + { + "epoch": 3.565216329123201, + "grad_norm": 0.021234316751360893, + "learning_rate": 4.611186463241419e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98352912, + "step": 145935 + }, + { + "epoch": 3.5653384799550487, + "grad_norm": 0.008176828734576702, + "learning_rate": 4.610468119965744e-07, + "loss": 0.0, + "num_input_tokens_seen": 98355856, + "step": 145940 + }, + { + "epoch": 3.5654606307868955, + "grad_norm": 0.0014455120544880629, + "learning_rate": 4.609749815884194e-07, + "loss": 0.0, + "num_input_tokens_seen": 98359376, + "step": 145945 + }, + { + "epoch": 3.565582781618743, + "grad_norm": 0.020845487713813782, + "learning_rate": 4.609031551001997e-07, + "loss": 0.0002, + "num_input_tokens_seen": 98362448, + "step": 145950 + }, + { + "epoch": 3.56570493245059, + "grad_norm": 16.44266700744629, + "learning_rate": 4.608313325324369e-07, + "loss": 0.0235, + "num_input_tokens_seen": 98365968, + "step": 145955 + }, + { + "epoch": 3.565827083282437, + "grad_norm": 0.0004650065384339541, + "learning_rate": 4.6075951388565414e-07, + "loss": 0.0, + "num_input_tokens_seen": 98369104, + "step": 145960 + }, + { + "epoch": 3.565949234114284, + "grad_norm": 0.003706740215420723, + "learning_rate": 4.6068769916037277e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98372560, + "step": 145965 + }, + { + "epoch": 3.5660713849461314, + "grad_norm": 0.0027632254641503096, + "learning_rate": 4.6061588835711583e-07, + "loss": 0.0, + "num_input_tokens_seen": 98375824, + "step": 145970 + }, + { + "epoch": 3.5661935357779786, + "grad_norm": 0.012814885936677456, + "learning_rate": 4.605440814764049e-07, + "loss": 0.0, + "num_input_tokens_seen": 98379280, + "step": 145975 + }, + { + "epoch": 3.566315686609826, + "grad_norm": 0.040037527680397034, + "learning_rate": 4.604722785187629e-07, + "loss": 0.0002, + "num_input_tokens_seen": 98382608, + "step": 145980 + }, + { + "epoch": 3.566437837441673, + "grad_norm": 0.0004106028936803341, + "learning_rate": 4.604004794847116e-07, + "loss": 0.0, + "num_input_tokens_seen": 98385872, + "step": 145985 + }, + { + "epoch": 3.56655998827352, + "grad_norm": 0.0010544214164838195, + "learning_rate": 4.603286843747728e-07, + "loss": 0.0, + "num_input_tokens_seen": 98389584, + "step": 145990 + }, + { + "epoch": 3.5666821391053674, + "grad_norm": 0.0008923725690692663, + "learning_rate": 4.602568931894694e-07, + "loss": 0.0, + "num_input_tokens_seen": 98393296, + "step": 145995 + }, + { + "epoch": 3.5668042899372145, + "grad_norm": 0.0021879971027374268, + "learning_rate": 4.601851059293225e-07, + "loss": 0.0, + "num_input_tokens_seen": 98396496, + "step": 146000 + }, + { + "epoch": 3.5669264407690617, + "grad_norm": 0.041268907487392426, + "learning_rate": 4.601133225948548e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98399632, + "step": 146005 + }, + { + "epoch": 3.567048591600909, + "grad_norm": 0.03337256982922554, + "learning_rate": 4.600415431865886e-07, + "loss": 0.0003, + "num_input_tokens_seen": 98403024, + "step": 146010 + }, + { + "epoch": 3.567170742432756, + "grad_norm": 0.00034684070851653814, + "learning_rate": 4.5996976770504514e-07, + "loss": 0.0005, + "num_input_tokens_seen": 98406352, + "step": 146015 + }, + { + "epoch": 3.567292893264603, + "grad_norm": 0.0028094665613025427, + "learning_rate": 4.598979961507471e-07, + "loss": 0.0, + "num_input_tokens_seen": 98409488, + "step": 146020 + }, + { + "epoch": 3.5674150440964505, + "grad_norm": 0.058483824133872986, + "learning_rate": 4.598262285242158e-07, + "loss": 0.0, + "num_input_tokens_seen": 98412752, + "step": 146025 + }, + { + "epoch": 3.5675371949282972, + "grad_norm": 0.013209797441959381, + "learning_rate": 4.597544648259738e-07, + "loss": 0.0546, + "num_input_tokens_seen": 98416336, + "step": 146030 + }, + { + "epoch": 3.567659345760145, + "grad_norm": 0.006027190946042538, + "learning_rate": 4.5968270505654227e-07, + "loss": 0.062, + "num_input_tokens_seen": 98419664, + "step": 146035 + }, + { + "epoch": 3.5677814965919916, + "grad_norm": 0.05212077498435974, + "learning_rate": 4.596109492164435e-07, + "loss": 0.0, + "num_input_tokens_seen": 98422800, + "step": 146040 + }, + { + "epoch": 3.567903647423839, + "grad_norm": 0.013847493566572666, + "learning_rate": 4.595391973061995e-07, + "loss": 0.0, + "num_input_tokens_seen": 98425872, + "step": 146045 + }, + { + "epoch": 3.568025798255686, + "grad_norm": 0.0033159477170556784, + "learning_rate": 4.5946744932633155e-07, + "loss": 0.0, + "num_input_tokens_seen": 98429392, + "step": 146050 + }, + { + "epoch": 3.568147949087533, + "grad_norm": 0.03926025703549385, + "learning_rate": 4.5939570527736203e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98433040, + "step": 146055 + }, + { + "epoch": 3.5682700999193804, + "grad_norm": 0.002551204524934292, + "learning_rate": 4.59323965159812e-07, + "loss": 0.0563, + "num_input_tokens_seen": 98436176, + "step": 146060 + }, + { + "epoch": 3.5683922507512276, + "grad_norm": 0.10834618657827377, + "learning_rate": 4.592522289742036e-07, + "loss": 0.0308, + "num_input_tokens_seen": 98439568, + "step": 146065 + }, + { + "epoch": 3.5685144015830748, + "grad_norm": 0.0003172959550283849, + "learning_rate": 4.591804967210586e-07, + "loss": 0.0, + "num_input_tokens_seen": 98442640, + "step": 146070 + }, + { + "epoch": 3.568636552414922, + "grad_norm": 11.231805801391602, + "learning_rate": 4.5910876840089865e-07, + "loss": 0.0002, + "num_input_tokens_seen": 98446096, + "step": 146075 + }, + { + "epoch": 3.568758703246769, + "grad_norm": 0.0025172054301947355, + "learning_rate": 4.590370440142448e-07, + "loss": 0.0, + "num_input_tokens_seen": 98449168, + "step": 146080 + }, + { + "epoch": 3.5688808540786163, + "grad_norm": 0.001789450878277421, + "learning_rate": 4.5896532356161944e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98452688, + "step": 146085 + }, + { + "epoch": 3.5690030049104635, + "grad_norm": 0.002166353864595294, + "learning_rate": 4.5889360704354375e-07, + "loss": 0.0, + "num_input_tokens_seen": 98455952, + "step": 146090 + }, + { + "epoch": 3.5691251557423107, + "grad_norm": 0.0020952769555151463, + "learning_rate": 4.58821894460539e-07, + "loss": 0.0, + "num_input_tokens_seen": 98459408, + "step": 146095 + }, + { + "epoch": 3.569247306574158, + "grad_norm": 0.12789571285247803, + "learning_rate": 4.5875018581312684e-07, + "loss": 0.0504, + "num_input_tokens_seen": 98462736, + "step": 146100 + }, + { + "epoch": 3.569369457406005, + "grad_norm": 0.06485660374164581, + "learning_rate": 4.5867848110182937e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98466192, + "step": 146105 + }, + { + "epoch": 3.5694916082378523, + "grad_norm": 0.02141660824418068, + "learning_rate": 4.5860678032716724e-07, + "loss": 0.0361, + "num_input_tokens_seen": 98469520, + "step": 146110 + }, + { + "epoch": 3.569613759069699, + "grad_norm": 0.0036332227755337954, + "learning_rate": 4.5853508348966253e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98472720, + "step": 146115 + }, + { + "epoch": 3.5697359099015467, + "grad_norm": 0.0006790324696339667, + "learning_rate": 4.5846339058983595e-07, + "loss": 0.0, + "num_input_tokens_seen": 98476304, + "step": 146120 + }, + { + "epoch": 3.5698580607333934, + "grad_norm": 0.007151363417506218, + "learning_rate": 4.583917016282097e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98479312, + "step": 146125 + }, + { + "epoch": 3.569980211565241, + "grad_norm": 0.05629653483629227, + "learning_rate": 4.583200166053043e-07, + "loss": 0.0, + "num_input_tokens_seen": 98482640, + "step": 146130 + }, + { + "epoch": 3.570102362397088, + "grad_norm": 0.0005009549204260111, + "learning_rate": 4.5824833552164134e-07, + "loss": 0.0625, + "num_input_tokens_seen": 98485904, + "step": 146135 + }, + { + "epoch": 3.570224513228935, + "grad_norm": 0.011462909169495106, + "learning_rate": 4.5817665837774265e-07, + "loss": 0.0, + "num_input_tokens_seen": 98489424, + "step": 146140 + }, + { + "epoch": 3.570346664060782, + "grad_norm": 0.011264095082879066, + "learning_rate": 4.581049851741287e-07, + "loss": 0.0, + "num_input_tokens_seen": 98492752, + "step": 146145 + }, + { + "epoch": 3.5704688148926294, + "grad_norm": 0.004408912267535925, + "learning_rate": 4.580333159113213e-07, + "loss": 0.0, + "num_input_tokens_seen": 98496336, + "step": 146150 + }, + { + "epoch": 3.5705909657244765, + "grad_norm": 0.001395556260831654, + "learning_rate": 4.5796165058984104e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98499536, + "step": 146155 + }, + { + "epoch": 3.5707131165563237, + "grad_norm": 0.010997572913765907, + "learning_rate": 4.578899892102095e-07, + "loss": 0.0, + "num_input_tokens_seen": 98502544, + "step": 146160 + }, + { + "epoch": 3.570835267388171, + "grad_norm": 0.0001377869484713301, + "learning_rate": 4.5781833177294815e-07, + "loss": 0.0, + "num_input_tokens_seen": 98505808, + "step": 146165 + }, + { + "epoch": 3.570957418220018, + "grad_norm": 0.0016566417180001736, + "learning_rate": 4.577466782785774e-07, + "loss": 0.0002, + "num_input_tokens_seen": 98509392, + "step": 146170 + }, + { + "epoch": 3.5710795690518653, + "grad_norm": 0.0005025031859986484, + "learning_rate": 4.5767502872761885e-07, + "loss": 0.0009, + "num_input_tokens_seen": 98512656, + "step": 146175 + }, + { + "epoch": 3.5712017198837125, + "grad_norm": 0.001447174116037786, + "learning_rate": 4.576033831205935e-07, + "loss": 0.0, + "num_input_tokens_seen": 98515728, + "step": 146180 + }, + { + "epoch": 3.5713238707155597, + "grad_norm": 0.0032353862188756466, + "learning_rate": 4.5753174145802185e-07, + "loss": 0.0, + "num_input_tokens_seen": 98518800, + "step": 146185 + }, + { + "epoch": 3.571446021547407, + "grad_norm": 0.0006234863540157676, + "learning_rate": 4.5746010374042567e-07, + "loss": 0.0, + "num_input_tokens_seen": 98521936, + "step": 146190 + }, + { + "epoch": 3.571568172379254, + "grad_norm": 0.003277813782915473, + "learning_rate": 4.5738846996832505e-07, + "loss": 0.0306, + "num_input_tokens_seen": 98525072, + "step": 146195 + }, + { + "epoch": 3.571690323211101, + "grad_norm": 0.002012363402172923, + "learning_rate": 4.573168401422419e-07, + "loss": 0.041, + "num_input_tokens_seen": 98528592, + "step": 146200 + }, + { + "epoch": 3.5718124740429484, + "grad_norm": 0.000628719397354871, + "learning_rate": 4.5724521426269626e-07, + "loss": 0.0, + "num_input_tokens_seen": 98532112, + "step": 146205 + }, + { + "epoch": 3.571934624874795, + "grad_norm": 0.010826955549418926, + "learning_rate": 4.571735923302098e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98535568, + "step": 146210 + }, + { + "epoch": 3.572056775706643, + "grad_norm": 0.000483804993564263, + "learning_rate": 4.571019743453025e-07, + "loss": 0.0566, + "num_input_tokens_seen": 98539344, + "step": 146215 + }, + { + "epoch": 3.5721789265384896, + "grad_norm": 0.0004217389796394855, + "learning_rate": 4.5703036030849617e-07, + "loss": 0.0, + "num_input_tokens_seen": 98543056, + "step": 146220 + }, + { + "epoch": 3.5723010773703368, + "grad_norm": 0.02112424373626709, + "learning_rate": 4.5695875022031073e-07, + "loss": 0.0, + "num_input_tokens_seen": 98546640, + "step": 146225 + }, + { + "epoch": 3.572423228202184, + "grad_norm": 0.2892974019050598, + "learning_rate": 4.5688714408126717e-07, + "loss": 0.0002, + "num_input_tokens_seen": 98549712, + "step": 146230 + }, + { + "epoch": 3.572545379034031, + "grad_norm": 0.008840722031891346, + "learning_rate": 4.5681554189188684e-07, + "loss": 0.0, + "num_input_tokens_seen": 98553104, + "step": 146235 + }, + { + "epoch": 3.5726675298658783, + "grad_norm": 0.023208221420645714, + "learning_rate": 4.5674394365268966e-07, + "loss": 0.0003, + "num_input_tokens_seen": 98556496, + "step": 146240 + }, + { + "epoch": 3.5727896806977255, + "grad_norm": 0.1287384033203125, + "learning_rate": 4.56672349364197e-07, + "loss": 0.0007, + "num_input_tokens_seen": 98560208, + "step": 146245 + }, + { + "epoch": 3.5729118315295727, + "grad_norm": 0.0059753842651844025, + "learning_rate": 4.5660075902692877e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98563984, + "step": 146250 + }, + { + "epoch": 3.57303398236142, + "grad_norm": 0.00015051568334456533, + "learning_rate": 4.565291726414059e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98567696, + "step": 146255 + }, + { + "epoch": 3.573156133193267, + "grad_norm": 0.0029284560587257147, + "learning_rate": 4.5645759020814955e-07, + "loss": 0.0, + "num_input_tokens_seen": 98570704, + "step": 146260 + }, + { + "epoch": 3.5732782840251143, + "grad_norm": 0.1056944951415062, + "learning_rate": 4.5638601172767934e-07, + "loss": 0.0893, + "num_input_tokens_seen": 98573968, + "step": 146265 + }, + { + "epoch": 3.5734004348569615, + "grad_norm": 0.0052173323929309845, + "learning_rate": 4.5631443720051667e-07, + "loss": 0.0, + "num_input_tokens_seen": 98577040, + "step": 146270 + }, + { + "epoch": 3.5735225856888087, + "grad_norm": 0.0021510545630007982, + "learning_rate": 4.5624286662718124e-07, + "loss": 0.0225, + "num_input_tokens_seen": 98580176, + "step": 146275 + }, + { + "epoch": 3.573644736520656, + "grad_norm": 0.0036637140437960625, + "learning_rate": 4.5617130000819435e-07, + "loss": 0.0, + "num_input_tokens_seen": 98583184, + "step": 146280 + }, + { + "epoch": 3.573766887352503, + "grad_norm": 0.000627799890935421, + "learning_rate": 4.560997373440757e-07, + "loss": 0.0, + "num_input_tokens_seen": 98586640, + "step": 146285 + }, + { + "epoch": 3.57388903818435, + "grad_norm": 0.012774244882166386, + "learning_rate": 4.560281786353464e-07, + "loss": 0.0, + "num_input_tokens_seen": 98589776, + "step": 146290 + }, + { + "epoch": 3.574011189016197, + "grad_norm": 0.0006221451330929995, + "learning_rate": 4.5595662388252643e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98592912, + "step": 146295 + }, + { + "epoch": 3.5741333398480446, + "grad_norm": 0.00047458289191126823, + "learning_rate": 4.55885073086136e-07, + "loss": 0.0, + "num_input_tokens_seen": 98596176, + "step": 146300 + }, + { + "epoch": 3.5742554906798913, + "grad_norm": 0.0028228475712239742, + "learning_rate": 4.558135262466959e-07, + "loss": 0.0058, + "num_input_tokens_seen": 98599312, + "step": 146305 + }, + { + "epoch": 3.5743776415117385, + "grad_norm": 0.0960443764925003, + "learning_rate": 4.557419833647258e-07, + "loss": 0.0, + "num_input_tokens_seen": 98602512, + "step": 146310 + }, + { + "epoch": 3.5744997923435857, + "grad_norm": 0.013317099772393703, + "learning_rate": 4.556704444407465e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98605904, + "step": 146315 + }, + { + "epoch": 3.574621943175433, + "grad_norm": 0.0006582035566680133, + "learning_rate": 4.5559890947527843e-07, + "loss": 0.0003, + "num_input_tokens_seen": 98609680, + "step": 146320 + }, + { + "epoch": 3.57474409400728, + "grad_norm": 0.13117016851902008, + "learning_rate": 4.555273784688413e-07, + "loss": 0.0657, + "num_input_tokens_seen": 98613072, + "step": 146325 + }, + { + "epoch": 3.5748662448391273, + "grad_norm": 0.043668705970048904, + "learning_rate": 4.554558514219557e-07, + "loss": 0.0, + "num_input_tokens_seen": 98616656, + "step": 146330 + }, + { + "epoch": 3.5749883956709745, + "grad_norm": 0.017411423847079277, + "learning_rate": 4.553843283351413e-07, + "loss": 0.1131, + "num_input_tokens_seen": 98619920, + "step": 146335 + }, + { + "epoch": 3.5751105465028217, + "grad_norm": 0.001685897121205926, + "learning_rate": 4.553128092089189e-07, + "loss": 0.0, + "num_input_tokens_seen": 98622864, + "step": 146340 + }, + { + "epoch": 3.575232697334669, + "grad_norm": 0.0014981109416112304, + "learning_rate": 4.5524129404380794e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98626448, + "step": 146345 + }, + { + "epoch": 3.575354848166516, + "grad_norm": 0.0020773394498974085, + "learning_rate": 4.551697828403288e-07, + "loss": 0.0, + "num_input_tokens_seen": 98629904, + "step": 146350 + }, + { + "epoch": 3.5754769989983632, + "grad_norm": 0.001966852927580476, + "learning_rate": 4.5509827559900194e-07, + "loss": 0.0, + "num_input_tokens_seen": 98633616, + "step": 146355 + }, + { + "epoch": 3.5755991498302104, + "grad_norm": 0.00928665604442358, + "learning_rate": 4.550267723203466e-07, + "loss": 0.0, + "num_input_tokens_seen": 98636816, + "step": 146360 + }, + { + "epoch": 3.5757213006620576, + "grad_norm": 0.003639796283096075, + "learning_rate": 4.5495527300488346e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98640720, + "step": 146365 + }, + { + "epoch": 3.575843451493905, + "grad_norm": 0.002453819615766406, + "learning_rate": 4.548837776531318e-07, + "loss": 0.0, + "num_input_tokens_seen": 98644432, + "step": 146370 + }, + { + "epoch": 3.575965602325752, + "grad_norm": 64.8224105834961, + "learning_rate": 4.548122862656124e-07, + "loss": 0.0023, + "num_input_tokens_seen": 98647824, + "step": 146375 + }, + { + "epoch": 3.5760877531575987, + "grad_norm": 0.0014054341008886695, + "learning_rate": 4.547407988428442e-07, + "loss": 0.0, + "num_input_tokens_seen": 98651024, + "step": 146380 + }, + { + "epoch": 3.5762099039894464, + "grad_norm": 0.005636067595332861, + "learning_rate": 4.5466931538534804e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98654608, + "step": 146385 + }, + { + "epoch": 3.576332054821293, + "grad_norm": 0.014609677717089653, + "learning_rate": 4.545978358936429e-07, + "loss": 0.0, + "num_input_tokens_seen": 98658320, + "step": 146390 + }, + { + "epoch": 3.5764542056531408, + "grad_norm": 0.011487624607980251, + "learning_rate": 4.5452636036824933e-07, + "loss": 0.0, + "num_input_tokens_seen": 98661712, + "step": 146395 + }, + { + "epoch": 3.5765763564849875, + "grad_norm": 0.18441209197044373, + "learning_rate": 4.5445488880968673e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98664720, + "step": 146400 + }, + { + "epoch": 3.5766985073168347, + "grad_norm": 0.029274463653564453, + "learning_rate": 4.543834212184746e-07, + "loss": 0.0003, + "num_input_tokens_seen": 98667984, + "step": 146405 + }, + { + "epoch": 3.576820658148682, + "grad_norm": 0.02028658241033554, + "learning_rate": 4.543119575951331e-07, + "loss": 0.0, + "num_input_tokens_seen": 98671248, + "step": 146410 + }, + { + "epoch": 3.576942808980529, + "grad_norm": 0.049594081938266754, + "learning_rate": 4.5424049794018203e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98674896, + "step": 146415 + }, + { + "epoch": 3.5770649598123763, + "grad_norm": 0.052891720086336136, + "learning_rate": 4.5416904225414055e-07, + "loss": 0.0, + "num_input_tokens_seen": 98678288, + "step": 146420 + }, + { + "epoch": 3.5771871106442235, + "grad_norm": 0.04260937124490738, + "learning_rate": 4.540975905375289e-07, + "loss": 0.0, + "num_input_tokens_seen": 98682064, + "step": 146425 + }, + { + "epoch": 3.5773092614760706, + "grad_norm": 0.00021985071361996233, + "learning_rate": 4.5402614279086617e-07, + "loss": 0.0019, + "num_input_tokens_seen": 98685648, + "step": 146430 + }, + { + "epoch": 3.577431412307918, + "grad_norm": 0.000840392371173948, + "learning_rate": 4.539546990146724e-07, + "loss": 0.0334, + "num_input_tokens_seen": 98689488, + "step": 146435 + }, + { + "epoch": 3.577553563139765, + "grad_norm": 0.00027688511181622744, + "learning_rate": 4.538832592094666e-07, + "loss": 0.0, + "num_input_tokens_seen": 98693136, + "step": 146440 + }, + { + "epoch": 3.577675713971612, + "grad_norm": 0.012098210863769054, + "learning_rate": 4.538118233757686e-07, + "loss": 0.0, + "num_input_tokens_seen": 98696720, + "step": 146445 + }, + { + "epoch": 3.5777978648034594, + "grad_norm": 0.04021194577217102, + "learning_rate": 4.5374039151409836e-07, + "loss": 0.0, + "num_input_tokens_seen": 98700432, + "step": 146450 + }, + { + "epoch": 3.5779200156353066, + "grad_norm": 0.008958011865615845, + "learning_rate": 4.5366896362497464e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98703632, + "step": 146455 + }, + { + "epoch": 3.578042166467154, + "grad_norm": 0.003126812633126974, + "learning_rate": 4.5359753970891735e-07, + "loss": 0.0, + "num_input_tokens_seen": 98706832, + "step": 146460 + }, + { + "epoch": 3.5781643172990005, + "grad_norm": 0.0010996502824127674, + "learning_rate": 4.535261197664455e-07, + "loss": 0.111, + "num_input_tokens_seen": 98710160, + "step": 146465 + }, + { + "epoch": 3.578286468130848, + "grad_norm": 0.00027585600037127733, + "learning_rate": 4.534547037980786e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98713552, + "step": 146470 + }, + { + "epoch": 3.578408618962695, + "grad_norm": 0.001792923198081553, + "learning_rate": 4.533832918043364e-07, + "loss": 0.0, + "num_input_tokens_seen": 98716880, + "step": 146475 + }, + { + "epoch": 3.5785307697945425, + "grad_norm": 0.0010359457228332758, + "learning_rate": 4.533118837857377e-07, + "loss": 0.0, + "num_input_tokens_seen": 98720464, + "step": 146480 + }, + { + "epoch": 3.5786529206263893, + "grad_norm": 0.09417528659105301, + "learning_rate": 4.532404797428023e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98723472, + "step": 146485 + }, + { + "epoch": 3.5787750714582365, + "grad_norm": 0.006773589178919792, + "learning_rate": 4.531690796760492e-07, + "loss": 0.0, + "num_input_tokens_seen": 98727248, + "step": 146490 + }, + { + "epoch": 3.5788972222900837, + "grad_norm": 0.000443848519353196, + "learning_rate": 4.530976835859973e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98730832, + "step": 146495 + }, + { + "epoch": 3.579019373121931, + "grad_norm": 0.0023871995508670807, + "learning_rate": 4.530262914731665e-07, + "loss": 0.0268, + "num_input_tokens_seen": 98734160, + "step": 146500 + }, + { + "epoch": 3.579141523953778, + "grad_norm": 0.0007982885581441224, + "learning_rate": 4.529549033380753e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98737552, + "step": 146505 + }, + { + "epoch": 3.5792636747856252, + "grad_norm": 0.00018090193043462932, + "learning_rate": 4.528835191812435e-07, + "loss": 0.0002, + "num_input_tokens_seen": 98740560, + "step": 146510 + }, + { + "epoch": 3.5793858256174724, + "grad_norm": 0.0023492518812417984, + "learning_rate": 4.5281213900318947e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98743568, + "step": 146515 + }, + { + "epoch": 3.5795079764493196, + "grad_norm": 0.003083728486672044, + "learning_rate": 4.527407628044332e-07, + "loss": 0.0, + "num_input_tokens_seen": 98746832, + "step": 146520 + }, + { + "epoch": 3.579630127281167, + "grad_norm": 0.002531050704419613, + "learning_rate": 4.526693905854929e-07, + "loss": 0.0, + "num_input_tokens_seen": 98750544, + "step": 146525 + }, + { + "epoch": 3.579752278113014, + "grad_norm": 0.00050026283133775, + "learning_rate": 4.5259802234688836e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98753808, + "step": 146530 + }, + { + "epoch": 3.579874428944861, + "grad_norm": 0.03973764181137085, + "learning_rate": 4.525266580891379e-07, + "loss": 0.0002, + "num_input_tokens_seen": 98757008, + "step": 146535 + }, + { + "epoch": 3.5799965797767084, + "grad_norm": 0.00027027149917557836, + "learning_rate": 4.5245529781276083e-07, + "loss": 0.0, + "num_input_tokens_seen": 98760272, + "step": 146540 + }, + { + "epoch": 3.5801187306085556, + "grad_norm": 0.0016394499689340591, + "learning_rate": 4.523839415182765e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98763600, + "step": 146545 + }, + { + "epoch": 3.5802408814404028, + "grad_norm": 0.0012777202064171433, + "learning_rate": 4.5231258920620305e-07, + "loss": 0.0, + "num_input_tokens_seen": 98766992, + "step": 146550 + }, + { + "epoch": 3.58036303227225, + "grad_norm": 0.0009179083281196654, + "learning_rate": 4.522412408770602e-07, + "loss": 0.0, + "num_input_tokens_seen": 98770256, + "step": 146555 + }, + { + "epoch": 3.5804851831040967, + "grad_norm": 0.005039707757532597, + "learning_rate": 4.5216989653136584e-07, + "loss": 0.0, + "num_input_tokens_seen": 98773712, + "step": 146560 + }, + { + "epoch": 3.5806073339359443, + "grad_norm": 0.001076148939318955, + "learning_rate": 4.5209855616963945e-07, + "loss": 0.0, + "num_input_tokens_seen": 98777104, + "step": 146565 + }, + { + "epoch": 3.580729484767791, + "grad_norm": 0.000977373798377812, + "learning_rate": 4.520272197924001e-07, + "loss": 0.0631, + "num_input_tokens_seen": 98780432, + "step": 146570 + }, + { + "epoch": 3.5808516355996387, + "grad_norm": 0.0024073768872767687, + "learning_rate": 4.519558874001658e-07, + "loss": 0.0, + "num_input_tokens_seen": 98784080, + "step": 146575 + }, + { + "epoch": 3.5809737864314855, + "grad_norm": 0.06418831646442413, + "learning_rate": 4.51884558993456e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98786832, + "step": 146580 + }, + { + "epoch": 3.5810959372633326, + "grad_norm": 0.002536097541451454, + "learning_rate": 4.518132345727889e-07, + "loss": 0.0366, + "num_input_tokens_seen": 98790096, + "step": 146585 + }, + { + "epoch": 3.58121808809518, + "grad_norm": 0.0007362982723861933, + "learning_rate": 4.5174191413868354e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98793680, + "step": 146590 + }, + { + "epoch": 3.581340238927027, + "grad_norm": 0.0036098656710237265, + "learning_rate": 4.5167059769165827e-07, + "loss": 0.0, + "num_input_tokens_seen": 98796816, + "step": 146595 + }, + { + "epoch": 3.581462389758874, + "grad_norm": 0.0015288054710254073, + "learning_rate": 4.5159928523223224e-07, + "loss": 0.0, + "num_input_tokens_seen": 98800400, + "step": 146600 + }, + { + "epoch": 3.5815845405907214, + "grad_norm": 0.001292704837396741, + "learning_rate": 4.5152797676092367e-07, + "loss": 0.0, + "num_input_tokens_seen": 98803600, + "step": 146605 + }, + { + "epoch": 3.5817066914225686, + "grad_norm": 0.0005294968141242862, + "learning_rate": 4.514566722782508e-07, + "loss": 0.0, + "num_input_tokens_seen": 98806992, + "step": 146610 + }, + { + "epoch": 3.581828842254416, + "grad_norm": 0.04279835522174835, + "learning_rate": 4.513853717847329e-07, + "loss": 0.0, + "num_input_tokens_seen": 98810384, + "step": 146615 + }, + { + "epoch": 3.581950993086263, + "grad_norm": 0.0010195255745202303, + "learning_rate": 4.513140752808878e-07, + "loss": 0.0, + "num_input_tokens_seen": 98813136, + "step": 146620 + }, + { + "epoch": 3.58207314391811, + "grad_norm": 0.011961672455072403, + "learning_rate": 4.512427827672344e-07, + "loss": 0.0, + "num_input_tokens_seen": 98816976, + "step": 146625 + }, + { + "epoch": 3.5821952947499573, + "grad_norm": 0.0007193966303020716, + "learning_rate": 4.5117149424429135e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98819856, + "step": 146630 + }, + { + "epoch": 3.5823174455818045, + "grad_norm": 0.0016777556156739593, + "learning_rate": 4.5110020971257645e-07, + "loss": 0.0, + "num_input_tokens_seen": 98823696, + "step": 146635 + }, + { + "epoch": 3.5824395964136517, + "grad_norm": 0.0003486250643618405, + "learning_rate": 4.510289291726088e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98827280, + "step": 146640 + }, + { + "epoch": 3.5825617472454985, + "grad_norm": 0.00026854375028051436, + "learning_rate": 4.5095765262490614e-07, + "loss": 0.0, + "num_input_tokens_seen": 98830736, + "step": 146645 + }, + { + "epoch": 3.582683898077346, + "grad_norm": 0.00011141942377435043, + "learning_rate": 4.5088638006998745e-07, + "loss": 0.0, + "num_input_tokens_seen": 98833872, + "step": 146650 + }, + { + "epoch": 3.582806048909193, + "grad_norm": 0.013646208681166172, + "learning_rate": 4.508151115083703e-07, + "loss": 0.0002, + "num_input_tokens_seen": 98837200, + "step": 146655 + }, + { + "epoch": 3.5829281997410405, + "grad_norm": 0.005915411747992039, + "learning_rate": 4.5074384694057334e-07, + "loss": 0.0, + "num_input_tokens_seen": 98840656, + "step": 146660 + }, + { + "epoch": 3.5830503505728872, + "grad_norm": 4.565409471979365e-05, + "learning_rate": 4.5067258636711536e-07, + "loss": 0.0, + "num_input_tokens_seen": 98843984, + "step": 146665 + }, + { + "epoch": 3.5831725014047344, + "grad_norm": 0.003964328207075596, + "learning_rate": 4.5060132978851364e-07, + "loss": 0.0313, + "num_input_tokens_seen": 98847248, + "step": 146670 + }, + { + "epoch": 3.5832946522365816, + "grad_norm": 0.0006252607563510537, + "learning_rate": 4.5053007720528713e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98850384, + "step": 146675 + }, + { + "epoch": 3.583416803068429, + "grad_norm": 0.016371555626392365, + "learning_rate": 4.5045882861795337e-07, + "loss": 0.0, + "num_input_tokens_seen": 98853776, + "step": 146680 + }, + { + "epoch": 3.583538953900276, + "grad_norm": 0.0004095694748684764, + "learning_rate": 4.503875840270311e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98857040, + "step": 146685 + }, + { + "epoch": 3.583661104732123, + "grad_norm": 0.01132937241345644, + "learning_rate": 4.503163434330379e-07, + "loss": 0.0, + "num_input_tokens_seen": 98860944, + "step": 146690 + }, + { + "epoch": 3.5837832555639704, + "grad_norm": 0.0022024072241038084, + "learning_rate": 4.502451068364925e-07, + "loss": 0.0, + "num_input_tokens_seen": 98864208, + "step": 146695 + }, + { + "epoch": 3.5839054063958176, + "grad_norm": 0.009176064282655716, + "learning_rate": 4.501738742379121e-07, + "loss": 0.0, + "num_input_tokens_seen": 98867344, + "step": 146700 + }, + { + "epoch": 3.5840275572276648, + "grad_norm": 0.07557198405265808, + "learning_rate": 4.5010264563781554e-07, + "loss": 0.0, + "num_input_tokens_seen": 98870224, + "step": 146705 + }, + { + "epoch": 3.584149708059512, + "grad_norm": 0.0012070549419149756, + "learning_rate": 4.5003142103672045e-07, + "loss": 0.0, + "num_input_tokens_seen": 98873232, + "step": 146710 + }, + { + "epoch": 3.584271858891359, + "grad_norm": 0.01205280888825655, + "learning_rate": 4.499602004351445e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98876816, + "step": 146715 + }, + { + "epoch": 3.5843940097232063, + "grad_norm": 0.0006821723654866219, + "learning_rate": 4.4988898383360576e-07, + "loss": 0.0002, + "num_input_tokens_seen": 98880208, + "step": 146720 + }, + { + "epoch": 3.5845161605550535, + "grad_norm": 0.005529592279344797, + "learning_rate": 4.498177712326228e-07, + "loss": 0.0, + "num_input_tokens_seen": 98883728, + "step": 146725 + }, + { + "epoch": 3.5846383113869007, + "grad_norm": 0.0008852336904965341, + "learning_rate": 4.4974656263271247e-07, + "loss": 0.0, + "num_input_tokens_seen": 98886864, + "step": 146730 + }, + { + "epoch": 3.584760462218748, + "grad_norm": 0.0005162259330973029, + "learning_rate": 4.496753580343937e-07, + "loss": 0.0, + "num_input_tokens_seen": 98890000, + "step": 146735 + }, + { + "epoch": 3.5848826130505946, + "grad_norm": 0.0826941654086113, + "learning_rate": 4.4960415743818327e-07, + "loss": 0.0, + "num_input_tokens_seen": 98893520, + "step": 146740 + }, + { + "epoch": 3.5850047638824423, + "grad_norm": 0.0002892829361371696, + "learning_rate": 4.495329608445998e-07, + "loss": 0.0, + "num_input_tokens_seen": 98896528, + "step": 146745 + }, + { + "epoch": 3.585126914714289, + "grad_norm": 0.06882300227880478, + "learning_rate": 4.494617682541604e-07, + "loss": 0.062, + "num_input_tokens_seen": 98899856, + "step": 146750 + }, + { + "epoch": 3.585249065546136, + "grad_norm": 0.0031155727338045835, + "learning_rate": 4.4939057966738304e-07, + "loss": 0.0, + "num_input_tokens_seen": 98903440, + "step": 146755 + }, + { + "epoch": 3.5853712163779834, + "grad_norm": 0.00041419637273065746, + "learning_rate": 4.4931939508478575e-07, + "loss": 0.0, + "num_input_tokens_seen": 98906512, + "step": 146760 + }, + { + "epoch": 3.5854933672098306, + "grad_norm": 0.0037223040126264095, + "learning_rate": 4.4924821450688575e-07, + "loss": 0.0, + "num_input_tokens_seen": 98909520, + "step": 146765 + }, + { + "epoch": 3.5856155180416778, + "grad_norm": 1.3689956665039062, + "learning_rate": 4.4917703793420116e-07, + "loss": 0.0003, + "num_input_tokens_seen": 98912400, + "step": 146770 + }, + { + "epoch": 3.585737668873525, + "grad_norm": 0.0005561459111049771, + "learning_rate": 4.4910586536724893e-07, + "loss": 0.0, + "num_input_tokens_seen": 98915920, + "step": 146775 + }, + { + "epoch": 3.585859819705372, + "grad_norm": 7.715138781350106e-05, + "learning_rate": 4.4903469680654703e-07, + "loss": 0.0, + "num_input_tokens_seen": 98919184, + "step": 146780 + }, + { + "epoch": 3.5859819705372193, + "grad_norm": 0.002350148744881153, + "learning_rate": 4.489635322526134e-07, + "loss": 0.0, + "num_input_tokens_seen": 98922640, + "step": 146785 + }, + { + "epoch": 3.5861041213690665, + "grad_norm": 0.013689137063920498, + "learning_rate": 4.488923717059647e-07, + "loss": 0.0, + "num_input_tokens_seen": 98925712, + "step": 146790 + }, + { + "epoch": 3.5862262722009137, + "grad_norm": 0.006791385821998119, + "learning_rate": 4.4882121516711937e-07, + "loss": 0.0, + "num_input_tokens_seen": 98929168, + "step": 146795 + }, + { + "epoch": 3.586348423032761, + "grad_norm": 0.15933595597743988, + "learning_rate": 4.4875006263659445e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98932752, + "step": 146800 + }, + { + "epoch": 3.586470573864608, + "grad_norm": 60.48442077636719, + "learning_rate": 4.486789141149069e-07, + "loss": 0.083, + "num_input_tokens_seen": 98936208, + "step": 146805 + }, + { + "epoch": 3.5865927246964553, + "grad_norm": 0.0009146135998889804, + "learning_rate": 4.4860776960257495e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98939280, + "step": 146810 + }, + { + "epoch": 3.5867148755283025, + "grad_norm": 0.0010512598091736436, + "learning_rate": 4.4853662910011524e-07, + "loss": 0.0, + "num_input_tokens_seen": 98942352, + "step": 146815 + }, + { + "epoch": 3.5868370263601497, + "grad_norm": 0.008385025896131992, + "learning_rate": 4.484654926080459e-07, + "loss": 0.0002, + "num_input_tokens_seen": 98945680, + "step": 146820 + }, + { + "epoch": 3.5869591771919964, + "grad_norm": 0.00016320282884407789, + "learning_rate": 4.4839436012688336e-07, + "loss": 0.0, + "num_input_tokens_seen": 98949520, + "step": 146825 + }, + { + "epoch": 3.587081328023844, + "grad_norm": 0.009338638745248318, + "learning_rate": 4.483232316571459e-07, + "loss": 0.0, + "num_input_tokens_seen": 98952848, + "step": 146830 + }, + { + "epoch": 3.587203478855691, + "grad_norm": 0.15116487443447113, + "learning_rate": 4.482521071993498e-07, + "loss": 0.0001, + "num_input_tokens_seen": 98956112, + "step": 146835 + }, + { + "epoch": 3.5873256296875384, + "grad_norm": 0.0012497843708842993, + "learning_rate": 4.481809867540133e-07, + "loss": 0.0, + "num_input_tokens_seen": 98959376, + "step": 146840 + }, + { + "epoch": 3.587447780519385, + "grad_norm": 0.0009556132717989385, + "learning_rate": 4.4810987032165257e-07, + "loss": 0.0, + "num_input_tokens_seen": 98963536, + "step": 146845 + }, + { + "epoch": 3.5875699313512324, + "grad_norm": 0.00022231374168768525, + "learning_rate": 4.480387579027853e-07, + "loss": 0.0, + "num_input_tokens_seen": 98966800, + "step": 146850 + }, + { + "epoch": 3.5876920821830796, + "grad_norm": 0.0007160860113799572, + "learning_rate": 4.479676494979291e-07, + "loss": 0.0, + "num_input_tokens_seen": 98970192, + "step": 146855 + }, + { + "epoch": 3.5878142330149267, + "grad_norm": 0.003910457249730825, + "learning_rate": 4.4789654510760023e-07, + "loss": 0.0, + "num_input_tokens_seen": 98973072, + "step": 146860 + }, + { + "epoch": 3.587936383846774, + "grad_norm": 0.011815412901341915, + "learning_rate": 4.478254447323165e-07, + "loss": 0.0453, + "num_input_tokens_seen": 98976272, + "step": 146865 + }, + { + "epoch": 3.588058534678621, + "grad_norm": 7.326767081394792e-05, + "learning_rate": 4.477543483725944e-07, + "loss": 0.0, + "num_input_tokens_seen": 98979728, + "step": 146870 + }, + { + "epoch": 3.5881806855104683, + "grad_norm": 0.11559463292360306, + "learning_rate": 4.4768325602895116e-07, + "loss": 0.0, + "num_input_tokens_seen": 98983312, + "step": 146875 + }, + { + "epoch": 3.5883028363423155, + "grad_norm": 0.004063963890075684, + "learning_rate": 4.476121677019042e-07, + "loss": 0.0, + "num_input_tokens_seen": 98986512, + "step": 146880 + }, + { + "epoch": 3.5884249871741627, + "grad_norm": 0.007323488127440214, + "learning_rate": 4.4754108339196974e-07, + "loss": 0.0224, + "num_input_tokens_seen": 98989584, + "step": 146885 + }, + { + "epoch": 3.58854713800601, + "grad_norm": 0.0003849711793009192, + "learning_rate": 4.4747000309966553e-07, + "loss": 0.0, + "num_input_tokens_seen": 98993104, + "step": 146890 + }, + { + "epoch": 3.588669288837857, + "grad_norm": 0.00032672181259840727, + "learning_rate": 4.4739892682550763e-07, + "loss": 0.0, + "num_input_tokens_seen": 98996240, + "step": 146895 + }, + { + "epoch": 3.5887914396697043, + "grad_norm": 0.02428433485329151, + "learning_rate": 4.4732785457001375e-07, + "loss": 0.0, + "num_input_tokens_seen": 98999696, + "step": 146900 + }, + { + "epoch": 3.5889135905015515, + "grad_norm": 0.0003919892478734255, + "learning_rate": 4.472567863337001e-07, + "loss": 0.0003, + "num_input_tokens_seen": 99003344, + "step": 146905 + }, + { + "epoch": 3.5890357413333986, + "grad_norm": 0.002559587825089693, + "learning_rate": 4.4718572211708406e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99007184, + "step": 146910 + }, + { + "epoch": 3.589157892165246, + "grad_norm": 0.0003037639253307134, + "learning_rate": 4.4711466192068215e-07, + "loss": 0.0, + "num_input_tokens_seen": 99010704, + "step": 146915 + }, + { + "epoch": 3.5892800429970926, + "grad_norm": 0.0018825943116098642, + "learning_rate": 4.4704360574501075e-07, + "loss": 0.0725, + "num_input_tokens_seen": 99014416, + "step": 146920 + }, + { + "epoch": 3.58940219382894, + "grad_norm": 0.00019972519658040255, + "learning_rate": 4.469725535905873e-07, + "loss": 0.0, + "num_input_tokens_seen": 99018384, + "step": 146925 + }, + { + "epoch": 3.589524344660787, + "grad_norm": 0.0024977638386189938, + "learning_rate": 4.4690150545792784e-07, + "loss": 0.0, + "num_input_tokens_seen": 99021392, + "step": 146930 + }, + { + "epoch": 3.589646495492634, + "grad_norm": 0.0011735991574823856, + "learning_rate": 4.4683046134754976e-07, + "loss": 0.0, + "num_input_tokens_seen": 99024528, + "step": 146935 + }, + { + "epoch": 3.5897686463244813, + "grad_norm": 0.0015995007706806064, + "learning_rate": 4.46759421259969e-07, + "loss": 0.0545, + "num_input_tokens_seen": 99027536, + "step": 146940 + }, + { + "epoch": 3.5898907971563285, + "grad_norm": 0.0017878710059449077, + "learning_rate": 4.466883851957026e-07, + "loss": 0.0, + "num_input_tokens_seen": 99033232, + "step": 146945 + }, + { + "epoch": 3.5900129479881757, + "grad_norm": 0.00105475343298167, + "learning_rate": 4.466173531552674e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99036368, + "step": 146950 + }, + { + "epoch": 3.590135098820023, + "grad_norm": 0.0045726606622338295, + "learning_rate": 4.465463251391792e-07, + "loss": 0.0, + "num_input_tokens_seen": 99039632, + "step": 146955 + }, + { + "epoch": 3.59025724965187, + "grad_norm": 0.0008261458133347332, + "learning_rate": 4.464753011479555e-07, + "loss": 0.0, + "num_input_tokens_seen": 99043152, + "step": 146960 + }, + { + "epoch": 3.5903794004837173, + "grad_norm": 0.35889339447021484, + "learning_rate": 4.464042811821118e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99048400, + "step": 146965 + }, + { + "epoch": 3.5905015513155645, + "grad_norm": 0.0002090990892611444, + "learning_rate": 4.4633326524216517e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99051664, + "step": 146970 + }, + { + "epoch": 3.5906237021474117, + "grad_norm": 0.001780422287993133, + "learning_rate": 4.462622533286322e-07, + "loss": 0.0, + "num_input_tokens_seen": 99054608, + "step": 146975 + }, + { + "epoch": 3.590745852979259, + "grad_norm": 0.0003043642791453749, + "learning_rate": 4.461912454420288e-07, + "loss": 0.0, + "num_input_tokens_seen": 99058320, + "step": 146980 + }, + { + "epoch": 3.590868003811106, + "grad_norm": 0.0010360745945945382, + "learning_rate": 4.4612024158287196e-07, + "loss": 0.0, + "num_input_tokens_seen": 99061648, + "step": 146985 + }, + { + "epoch": 3.5909901546429532, + "grad_norm": 0.013427109457552433, + "learning_rate": 4.4604924175167737e-07, + "loss": 0.0, + "num_input_tokens_seen": 99065104, + "step": 146990 + }, + { + "epoch": 3.5911123054748004, + "grad_norm": 0.0010030671255663037, + "learning_rate": 4.4597824594896194e-07, + "loss": 0.0, + "num_input_tokens_seen": 99068432, + "step": 146995 + }, + { + "epoch": 3.5912344563066476, + "grad_norm": 0.00030616443837061524, + "learning_rate": 4.459072541752414e-07, + "loss": 0.0, + "num_input_tokens_seen": 99071888, + "step": 147000 + }, + { + "epoch": 3.5913566071384944, + "grad_norm": 0.0007564072730019689, + "learning_rate": 4.4583626643103276e-07, + "loss": 0.1026, + "num_input_tokens_seen": 99075152, + "step": 147005 + }, + { + "epoch": 3.591478757970342, + "grad_norm": 0.003279258031398058, + "learning_rate": 4.4576528271685143e-07, + "loss": 0.0, + "num_input_tokens_seen": 99078608, + "step": 147010 + }, + { + "epoch": 3.5916009088021887, + "grad_norm": 0.0007738301646895707, + "learning_rate": 4.4569430303321445e-07, + "loss": 0.0, + "num_input_tokens_seen": 99082256, + "step": 147015 + }, + { + "epoch": 3.5917230596340364, + "grad_norm": 0.0013760982546955347, + "learning_rate": 4.456233273806376e-07, + "loss": 0.0003, + "num_input_tokens_seen": 99085520, + "step": 147020 + }, + { + "epoch": 3.591845210465883, + "grad_norm": 0.012807670049369335, + "learning_rate": 4.4555235575963655e-07, + "loss": 0.0, + "num_input_tokens_seen": 99089040, + "step": 147025 + }, + { + "epoch": 3.5919673612977303, + "grad_norm": 0.00666635949164629, + "learning_rate": 4.45481388170728e-07, + "loss": 0.0, + "num_input_tokens_seen": 99092560, + "step": 147030 + }, + { + "epoch": 3.5920895121295775, + "grad_norm": 44.692710876464844, + "learning_rate": 4.4541042461442824e-07, + "loss": 0.0414, + "num_input_tokens_seen": 99095952, + "step": 147035 + }, + { + "epoch": 3.5922116629614247, + "grad_norm": 0.0031907649245113134, + "learning_rate": 4.4533946509125267e-07, + "loss": 0.0, + "num_input_tokens_seen": 99099536, + "step": 147040 + }, + { + "epoch": 3.592333813793272, + "grad_norm": 0.011112304404377937, + "learning_rate": 4.4526850960171813e-07, + "loss": 0.0, + "num_input_tokens_seen": 99102544, + "step": 147045 + }, + { + "epoch": 3.592455964625119, + "grad_norm": 0.0003050380910281092, + "learning_rate": 4.4519755814633974e-07, + "loss": 0.0, + "num_input_tokens_seen": 99105616, + "step": 147050 + }, + { + "epoch": 3.5925781154569663, + "grad_norm": 0.0003009406791534275, + "learning_rate": 4.451266107256344e-07, + "loss": 0.0, + "num_input_tokens_seen": 99109008, + "step": 147055 + }, + { + "epoch": 3.5927002662888134, + "grad_norm": 0.00098048010841012, + "learning_rate": 4.4505566734011713e-07, + "loss": 0.0304, + "num_input_tokens_seen": 99111888, + "step": 147060 + }, + { + "epoch": 3.5928224171206606, + "grad_norm": 0.0001502260274719447, + "learning_rate": 4.449847279903044e-07, + "loss": 0.0898, + "num_input_tokens_seen": 99115024, + "step": 147065 + }, + { + "epoch": 3.592944567952508, + "grad_norm": 0.008276687934994698, + "learning_rate": 4.4491379267671227e-07, + "loss": 0.0, + "num_input_tokens_seen": 99118288, + "step": 147070 + }, + { + "epoch": 3.593066718784355, + "grad_norm": 0.0011936317896470428, + "learning_rate": 4.4484286139985604e-07, + "loss": 0.0, + "num_input_tokens_seen": 99121744, + "step": 147075 + }, + { + "epoch": 3.593188869616202, + "grad_norm": 0.001968457130715251, + "learning_rate": 4.447719341602522e-07, + "loss": 0.0, + "num_input_tokens_seen": 99125136, + "step": 147080 + }, + { + "epoch": 3.5933110204480494, + "grad_norm": 0.0005487522576004267, + "learning_rate": 4.4470101095841584e-07, + "loss": 0.0, + "num_input_tokens_seen": 99128336, + "step": 147085 + }, + { + "epoch": 3.593433171279896, + "grad_norm": 0.001973625971004367, + "learning_rate": 4.4463009179486344e-07, + "loss": 0.0, + "num_input_tokens_seen": 99131728, + "step": 147090 + }, + { + "epoch": 3.5935553221117438, + "grad_norm": 0.0011422754032537341, + "learning_rate": 4.4455917667011e-07, + "loss": 0.0, + "num_input_tokens_seen": 99135568, + "step": 147095 + }, + { + "epoch": 3.5936774729435905, + "grad_norm": 0.33334842324256897, + "learning_rate": 4.444882655846717e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99138512, + "step": 147100 + }, + { + "epoch": 3.593799623775438, + "grad_norm": 0.0004663039289880544, + "learning_rate": 4.4441735853906437e-07, + "loss": 0.0, + "num_input_tokens_seen": 99141648, + "step": 147105 + }, + { + "epoch": 3.593921774607285, + "grad_norm": 0.01186708640307188, + "learning_rate": 4.443464555338031e-07, + "loss": 0.0524, + "num_input_tokens_seen": 99144592, + "step": 147110 + }, + { + "epoch": 3.594043925439132, + "grad_norm": 0.007880584336817265, + "learning_rate": 4.4427555656940443e-07, + "loss": 0.0, + "num_input_tokens_seen": 99147664, + "step": 147115 + }, + { + "epoch": 3.5941660762709793, + "grad_norm": 0.0002760709321592003, + "learning_rate": 4.4420466164638316e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99150736, + "step": 147120 + }, + { + "epoch": 3.5942882271028265, + "grad_norm": 0.06267506629228592, + "learning_rate": 4.4413377076525484e-07, + "loss": 0.0, + "num_input_tokens_seen": 99153936, + "step": 147125 + }, + { + "epoch": 3.5944103779346737, + "grad_norm": 0.0029153351206332445, + "learning_rate": 4.4406288392653556e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99157136, + "step": 147130 + }, + { + "epoch": 3.594532528766521, + "grad_norm": 0.0005900236428715289, + "learning_rate": 4.4399200113074006e-07, + "loss": 0.0, + "num_input_tokens_seen": 99160592, + "step": 147135 + }, + { + "epoch": 3.594654679598368, + "grad_norm": 0.005414668936282396, + "learning_rate": 4.439211223783849e-07, + "loss": 0.0286, + "num_input_tokens_seen": 99164112, + "step": 147140 + }, + { + "epoch": 3.5947768304302152, + "grad_norm": 19.3892822265625, + "learning_rate": 4.4385024766998426e-07, + "loss": 0.0962, + "num_input_tokens_seen": 99167120, + "step": 147145 + }, + { + "epoch": 3.5948989812620624, + "grad_norm": 0.0007410175749100745, + "learning_rate": 4.4377937700605473e-07, + "loss": 0.0, + "num_input_tokens_seen": 99170448, + "step": 147150 + }, + { + "epoch": 3.5950211320939096, + "grad_norm": 0.003837596857920289, + "learning_rate": 4.437085103871108e-07, + "loss": 0.0, + "num_input_tokens_seen": 99173776, + "step": 147155 + }, + { + "epoch": 3.595143282925757, + "grad_norm": 0.00045322536607272923, + "learning_rate": 4.4363764781366817e-07, + "loss": 0.0, + "num_input_tokens_seen": 99177360, + "step": 147160 + }, + { + "epoch": 3.595265433757604, + "grad_norm": 0.0013312663650140166, + "learning_rate": 4.435667892862426e-07, + "loss": 0.0, + "num_input_tokens_seen": 99180752, + "step": 147165 + }, + { + "epoch": 3.595387584589451, + "grad_norm": 0.006688072811812162, + "learning_rate": 4.4349593480534854e-07, + "loss": 0.0052, + "num_input_tokens_seen": 99184208, + "step": 147170 + }, + { + "epoch": 3.5955097354212984, + "grad_norm": 0.05550628900527954, + "learning_rate": 4.4342508437150214e-07, + "loss": 0.0, + "num_input_tokens_seen": 99187536, + "step": 147175 + }, + { + "epoch": 3.5956318862531456, + "grad_norm": 0.002747165272012353, + "learning_rate": 4.43354237985218e-07, + "loss": 0.0, + "num_input_tokens_seen": 99190736, + "step": 147180 + }, + { + "epoch": 3.5957540370849923, + "grad_norm": 0.00022270670160651207, + "learning_rate": 4.4328339564701143e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99194448, + "step": 147185 + }, + { + "epoch": 3.59587618791684, + "grad_norm": 0.016678297892212868, + "learning_rate": 4.4321255735739816e-07, + "loss": 0.0, + "num_input_tokens_seen": 99197584, + "step": 147190 + }, + { + "epoch": 3.5959983387486867, + "grad_norm": 0.007575098890811205, + "learning_rate": 4.4314172311689244e-07, + "loss": 0.0003, + "num_input_tokens_seen": 99201616, + "step": 147195 + }, + { + "epoch": 3.5961204895805343, + "grad_norm": 0.027997225522994995, + "learning_rate": 4.430708929260104e-07, + "loss": 0.0, + "num_input_tokens_seen": 99204944, + "step": 147200 + }, + { + "epoch": 3.596242640412381, + "grad_norm": 0.0010609532473608851, + "learning_rate": 4.4300006678526615e-07, + "loss": 0.0, + "num_input_tokens_seen": 99208272, + "step": 147205 + }, + { + "epoch": 3.5963647912442283, + "grad_norm": 0.0009776917286217213, + "learning_rate": 4.429292446951757e-07, + "loss": 0.0, + "num_input_tokens_seen": 99211664, + "step": 147210 + }, + { + "epoch": 3.5964869420760754, + "grad_norm": 0.0003319795650895685, + "learning_rate": 4.4285842665625317e-07, + "loss": 0.0, + "num_input_tokens_seen": 99214992, + "step": 147215 + }, + { + "epoch": 3.5966090929079226, + "grad_norm": 0.00017459203081671149, + "learning_rate": 4.4278761266901444e-07, + "loss": 0.0, + "num_input_tokens_seen": 99218384, + "step": 147220 + }, + { + "epoch": 3.59673124373977, + "grad_norm": 0.09625773131847382, + "learning_rate": 4.4271680273397404e-07, + "loss": 0.0567, + "num_input_tokens_seen": 99221840, + "step": 147225 + }, + { + "epoch": 3.596853394571617, + "grad_norm": 0.0011675640707835555, + "learning_rate": 4.426459968516466e-07, + "loss": 0.0, + "num_input_tokens_seen": 99224784, + "step": 147230 + }, + { + "epoch": 3.596975545403464, + "grad_norm": 0.01471780426800251, + "learning_rate": 4.425751950225477e-07, + "loss": 0.0, + "num_input_tokens_seen": 99227536, + "step": 147235 + }, + { + "epoch": 3.5970976962353114, + "grad_norm": 0.001742966822348535, + "learning_rate": 4.425043972471916e-07, + "loss": 0.0, + "num_input_tokens_seen": 99231184, + "step": 147240 + }, + { + "epoch": 3.5972198470671586, + "grad_norm": 0.0022475863806903362, + "learning_rate": 4.424336035260937e-07, + "loss": 0.0, + "num_input_tokens_seen": 99234768, + "step": 147245 + }, + { + "epoch": 3.5973419978990058, + "grad_norm": 0.006544165778905153, + "learning_rate": 4.423628138597684e-07, + "loss": 0.0, + "num_input_tokens_seen": 99238032, + "step": 147250 + }, + { + "epoch": 3.597464148730853, + "grad_norm": 0.006454230286180973, + "learning_rate": 4.4229202824873056e-07, + "loss": 0.0, + "num_input_tokens_seen": 99241168, + "step": 147255 + }, + { + "epoch": 3.5975862995627, + "grad_norm": 0.002534851199015975, + "learning_rate": 4.422212466934956e-07, + "loss": 0.0035, + "num_input_tokens_seen": 99244560, + "step": 147260 + }, + { + "epoch": 3.5977084503945473, + "grad_norm": 0.0012128119124099612, + "learning_rate": 4.421504691945773e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99247824, + "step": 147265 + }, + { + "epoch": 3.597830601226394, + "grad_norm": 0.001539935707114637, + "learning_rate": 4.4207969575249126e-07, + "loss": 0.075, + "num_input_tokens_seen": 99251280, + "step": 147270 + }, + { + "epoch": 3.5979527520582417, + "grad_norm": 0.004851538222283125, + "learning_rate": 4.420089263677513e-07, + "loss": 0.0, + "num_input_tokens_seen": 99254800, + "step": 147275 + }, + { + "epoch": 3.5980749028900885, + "grad_norm": 0.0061553469859063625, + "learning_rate": 4.4193816104087257e-07, + "loss": 0.0, + "num_input_tokens_seen": 99258256, + "step": 147280 + }, + { + "epoch": 3.598197053721936, + "grad_norm": 0.0033780173398554325, + "learning_rate": 4.4186739977236997e-07, + "loss": 0.0, + "num_input_tokens_seen": 99261392, + "step": 147285 + }, + { + "epoch": 3.598319204553783, + "grad_norm": 0.011429624632000923, + "learning_rate": 4.417966425627574e-07, + "loss": 0.0, + "num_input_tokens_seen": 99264592, + "step": 147290 + }, + { + "epoch": 3.59844135538563, + "grad_norm": 16.78447723388672, + "learning_rate": 4.417258894125502e-07, + "loss": 0.0326, + "num_input_tokens_seen": 99268048, + "step": 147295 + }, + { + "epoch": 3.5985635062174772, + "grad_norm": 0.000813651888165623, + "learning_rate": 4.4165514032226205e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99271504, + "step": 147300 + }, + { + "epoch": 3.5986856570493244, + "grad_norm": 0.3993775546550751, + "learning_rate": 4.4158439529240834e-07, + "loss": 0.0003, + "num_input_tokens_seen": 99274768, + "step": 147305 + }, + { + "epoch": 3.5988078078811716, + "grad_norm": 0.0026703726034611464, + "learning_rate": 4.4151365432350264e-07, + "loss": 0.0, + "num_input_tokens_seen": 99278416, + "step": 147310 + }, + { + "epoch": 3.598929958713019, + "grad_norm": 0.0016981259686872363, + "learning_rate": 4.414429174160603e-07, + "loss": 0.0, + "num_input_tokens_seen": 99281680, + "step": 147315 + }, + { + "epoch": 3.599052109544866, + "grad_norm": 0.00385949295014143, + "learning_rate": 4.4137218457059487e-07, + "loss": 0.0, + "num_input_tokens_seen": 99285072, + "step": 147320 + }, + { + "epoch": 3.599174260376713, + "grad_norm": 0.001972557744011283, + "learning_rate": 4.413014557876216e-07, + "loss": 0.0224, + "num_input_tokens_seen": 99288144, + "step": 147325 + }, + { + "epoch": 3.5992964112085604, + "grad_norm": 0.0009733116021379828, + "learning_rate": 4.412307310676544e-07, + "loss": 0.0, + "num_input_tokens_seen": 99291472, + "step": 147330 + }, + { + "epoch": 3.5994185620404076, + "grad_norm": 0.002177638001739979, + "learning_rate": 4.4116001041120723e-07, + "loss": 0.0, + "num_input_tokens_seen": 99294992, + "step": 147335 + }, + { + "epoch": 3.5995407128722547, + "grad_norm": 0.0006871279911138117, + "learning_rate": 4.410892938187948e-07, + "loss": 0.0, + "num_input_tokens_seen": 99298448, + "step": 147340 + }, + { + "epoch": 3.599662863704102, + "grad_norm": 0.007582599762827158, + "learning_rate": 4.410185812909316e-07, + "loss": 0.0, + "num_input_tokens_seen": 99301584, + "step": 147345 + }, + { + "epoch": 3.599785014535949, + "grad_norm": 0.006781270261853933, + "learning_rate": 4.409478728281314e-07, + "loss": 0.0, + "num_input_tokens_seen": 99305424, + "step": 147350 + }, + { + "epoch": 3.5999071653677963, + "grad_norm": 7.533630559919402e-05, + "learning_rate": 4.4087716843090897e-07, + "loss": 0.0, + "num_input_tokens_seen": 99308944, + "step": 147355 + }, + { + "epoch": 3.6000293161996435, + "grad_norm": 5.5826687457738444e-05, + "learning_rate": 4.408064680997777e-07, + "loss": 0.0, + "num_input_tokens_seen": 99313040, + "step": 147360 + }, + { + "epoch": 3.6001514670314902, + "grad_norm": 0.0023900719825178385, + "learning_rate": 4.407357718352527e-07, + "loss": 0.0, + "num_input_tokens_seen": 99316368, + "step": 147365 + }, + { + "epoch": 3.600273617863338, + "grad_norm": 0.0005167967756278813, + "learning_rate": 4.406650796378472e-07, + "loss": 0.0, + "num_input_tokens_seen": 99319440, + "step": 147370 + }, + { + "epoch": 3.6003957686951846, + "grad_norm": 0.0021112055983394384, + "learning_rate": 4.4059439150807566e-07, + "loss": 0.0, + "num_input_tokens_seen": 99322960, + "step": 147375 + }, + { + "epoch": 3.600517919527032, + "grad_norm": 0.00010200147517025471, + "learning_rate": 4.405237074464525e-07, + "loss": 0.1013, + "num_input_tokens_seen": 99326416, + "step": 147380 + }, + { + "epoch": 3.600640070358879, + "grad_norm": 3.1429230148205534e-05, + "learning_rate": 4.404530274534911e-07, + "loss": 0.0, + "num_input_tokens_seen": 99329744, + "step": 147385 + }, + { + "epoch": 3.600762221190726, + "grad_norm": 0.001426719594746828, + "learning_rate": 4.4038235152970606e-07, + "loss": 0.0, + "num_input_tokens_seen": 99333200, + "step": 147390 + }, + { + "epoch": 3.6008843720225734, + "grad_norm": 0.02611630968749523, + "learning_rate": 4.403116796756108e-07, + "loss": 0.0, + "num_input_tokens_seen": 99336784, + "step": 147395 + }, + { + "epoch": 3.6010065228544206, + "grad_norm": 0.0002963593287859112, + "learning_rate": 4.402410118917199e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99340048, + "step": 147400 + }, + { + "epoch": 3.6011286736862678, + "grad_norm": 0.010210043750703335, + "learning_rate": 4.401703481785466e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99343568, + "step": 147405 + }, + { + "epoch": 3.601250824518115, + "grad_norm": 0.6873293519020081, + "learning_rate": 4.4009968853660496e-07, + "loss": 0.059, + "num_input_tokens_seen": 99346576, + "step": 147410 + }, + { + "epoch": 3.601372975349962, + "grad_norm": 0.00032992829801514745, + "learning_rate": 4.4002903296640947e-07, + "loss": 0.0, + "num_input_tokens_seen": 99349840, + "step": 147415 + }, + { + "epoch": 3.6014951261818093, + "grad_norm": 0.003081189002841711, + "learning_rate": 4.399583814684731e-07, + "loss": 0.0, + "num_input_tokens_seen": 99353360, + "step": 147420 + }, + { + "epoch": 3.6016172770136565, + "grad_norm": 0.00017771427519619465, + "learning_rate": 4.3988773404331027e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99356880, + "step": 147425 + }, + { + "epoch": 3.6017394278455037, + "grad_norm": 0.001582012395374477, + "learning_rate": 4.398170906914346e-07, + "loss": 0.0, + "num_input_tokens_seen": 99359952, + "step": 147430 + }, + { + "epoch": 3.601861578677351, + "grad_norm": 0.0001949633879121393, + "learning_rate": 4.397464514133593e-07, + "loss": 0.0, + "num_input_tokens_seen": 99363408, + "step": 147435 + }, + { + "epoch": 3.601983729509198, + "grad_norm": 0.006579456850886345, + "learning_rate": 4.3967581620959893e-07, + "loss": 0.0349, + "num_input_tokens_seen": 99367184, + "step": 147440 + }, + { + "epoch": 3.6021058803410453, + "grad_norm": 0.0007167812436819077, + "learning_rate": 4.3960518508066624e-07, + "loss": 0.0002, + "num_input_tokens_seen": 99370640, + "step": 147445 + }, + { + "epoch": 3.602228031172892, + "grad_norm": 371.5362548828125, + "learning_rate": 4.3953455802707587e-07, + "loss": 0.0122, + "num_input_tokens_seen": 99373904, + "step": 147450 + }, + { + "epoch": 3.6023501820047397, + "grad_norm": 0.09295155853033066, + "learning_rate": 4.3946393504934054e-07, + "loss": 0.0, + "num_input_tokens_seen": 99377744, + "step": 147455 + }, + { + "epoch": 3.6024723328365864, + "grad_norm": 0.008168951608240604, + "learning_rate": 4.3939331614797457e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99380752, + "step": 147460 + }, + { + "epoch": 3.602594483668434, + "grad_norm": 0.003420223481953144, + "learning_rate": 4.393227013234908e-07, + "loss": 0.0, + "num_input_tokens_seen": 99383568, + "step": 147465 + }, + { + "epoch": 3.602716634500281, + "grad_norm": 0.03465414419770241, + "learning_rate": 4.392520905764032e-07, + "loss": 0.0, + "num_input_tokens_seen": 99386704, + "step": 147470 + }, + { + "epoch": 3.602838785332128, + "grad_norm": 0.00030073043308220804, + "learning_rate": 4.391814839072255e-07, + "loss": 0.0, + "num_input_tokens_seen": 99390032, + "step": 147475 + }, + { + "epoch": 3.602960936163975, + "grad_norm": 0.002083902945742011, + "learning_rate": 4.3911088131647066e-07, + "loss": 0.0, + "num_input_tokens_seen": 99392976, + "step": 147480 + }, + { + "epoch": 3.6030830869958224, + "grad_norm": 0.005192081443965435, + "learning_rate": 4.3904028280465267e-07, + "loss": 0.0, + "num_input_tokens_seen": 99396496, + "step": 147485 + }, + { + "epoch": 3.6032052378276695, + "grad_norm": 0.0013596249045804143, + "learning_rate": 4.389696883722842e-07, + "loss": 0.0, + "num_input_tokens_seen": 99400400, + "step": 147490 + }, + { + "epoch": 3.6033273886595167, + "grad_norm": 0.0020743070635944605, + "learning_rate": 4.3889909801987955e-07, + "loss": 0.0, + "num_input_tokens_seen": 99404496, + "step": 147495 + }, + { + "epoch": 3.603449539491364, + "grad_norm": 0.027771538123488426, + "learning_rate": 4.3882851174795113e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99408208, + "step": 147500 + }, + { + "epoch": 3.603571690323211, + "grad_norm": 0.0006288870936259627, + "learning_rate": 4.387579295570126e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99411280, + "step": 147505 + }, + { + "epoch": 3.6036938411550583, + "grad_norm": 59.22822570800781, + "learning_rate": 4.3868735144757787e-07, + "loss": 0.0457, + "num_input_tokens_seen": 99414672, + "step": 147510 + }, + { + "epoch": 3.6038159919869055, + "grad_norm": 0.006329473108053207, + "learning_rate": 4.3861677742015937e-07, + "loss": 0.0, + "num_input_tokens_seen": 99417872, + "step": 147515 + }, + { + "epoch": 3.6039381428187527, + "grad_norm": 0.13014653325080872, + "learning_rate": 4.3854620747527095e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99421328, + "step": 147520 + }, + { + "epoch": 3.6040602936506, + "grad_norm": 0.0023727233055979013, + "learning_rate": 4.384756416134251e-07, + "loss": 0.0, + "num_input_tokens_seen": 99424528, + "step": 147525 + }, + { + "epoch": 3.604182444482447, + "grad_norm": 0.00016004152712412179, + "learning_rate": 4.3840507983513585e-07, + "loss": 0.0, + "num_input_tokens_seen": 99427664, + "step": 147530 + }, + { + "epoch": 3.604304595314294, + "grad_norm": 0.00026830736896954477, + "learning_rate": 4.383345221409159e-07, + "loss": 0.0, + "num_input_tokens_seen": 99430864, + "step": 147535 + }, + { + "epoch": 3.6044267461461414, + "grad_norm": 0.04292040690779686, + "learning_rate": 4.38263968531278e-07, + "loss": 0.1113, + "num_input_tokens_seen": 99434128, + "step": 147540 + }, + { + "epoch": 3.604548896977988, + "grad_norm": 0.06387756019830704, + "learning_rate": 4.38193419006736e-07, + "loss": 0.0002, + "num_input_tokens_seen": 99437584, + "step": 147545 + }, + { + "epoch": 3.604671047809836, + "grad_norm": 0.006251856684684753, + "learning_rate": 4.381228735678022e-07, + "loss": 0.0, + "num_input_tokens_seen": 99440656, + "step": 147550 + }, + { + "epoch": 3.6047931986416826, + "grad_norm": 0.06982557475566864, + "learning_rate": 4.380523322149903e-07, + "loss": 0.0, + "num_input_tokens_seen": 99444560, + "step": 147555 + }, + { + "epoch": 3.6049153494735298, + "grad_norm": 0.004347453825175762, + "learning_rate": 4.379817949488127e-07, + "loss": 0.0, + "num_input_tokens_seen": 99447824, + "step": 147560 + }, + { + "epoch": 3.605037500305377, + "grad_norm": 0.00021586604998447, + "learning_rate": 4.3791126176978254e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99452304, + "step": 147565 + }, + { + "epoch": 3.605159651137224, + "grad_norm": 0.0011594532988965511, + "learning_rate": 4.378407326784134e-07, + "loss": 0.0, + "num_input_tokens_seen": 99455888, + "step": 147570 + }, + { + "epoch": 3.6052818019690713, + "grad_norm": 0.00046078336890786886, + "learning_rate": 4.377702076752171e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99459408, + "step": 147575 + }, + { + "epoch": 3.6054039528009185, + "grad_norm": 0.01156389620155096, + "learning_rate": 4.3769968676070757e-07, + "loss": 0.0, + "num_input_tokens_seen": 99462928, + "step": 147580 + }, + { + "epoch": 3.6055261036327657, + "grad_norm": 0.008867439813911915, + "learning_rate": 4.376291699353968e-07, + "loss": 0.0, + "num_input_tokens_seen": 99466128, + "step": 147585 + }, + { + "epoch": 3.605648254464613, + "grad_norm": 9.969405800802633e-05, + "learning_rate": 4.375586571997979e-07, + "loss": 0.0, + "num_input_tokens_seen": 99469328, + "step": 147590 + }, + { + "epoch": 3.60577040529646, + "grad_norm": 6.560365727636963e-05, + "learning_rate": 4.374881485544241e-07, + "loss": 0.0, + "num_input_tokens_seen": 99472848, + "step": 147595 + }, + { + "epoch": 3.6058925561283073, + "grad_norm": 0.0015670241555199027, + "learning_rate": 4.3741764399978743e-07, + "loss": 0.0, + "num_input_tokens_seen": 99476560, + "step": 147600 + }, + { + "epoch": 3.6060147069601545, + "grad_norm": 0.00015558060840703547, + "learning_rate": 4.3734714353640124e-07, + "loss": 0.0, + "num_input_tokens_seen": 99480016, + "step": 147605 + }, + { + "epoch": 3.6061368577920017, + "grad_norm": 0.001948792953044176, + "learning_rate": 4.3727664716477773e-07, + "loss": 0.0, + "num_input_tokens_seen": 99484304, + "step": 147610 + }, + { + "epoch": 3.606259008623849, + "grad_norm": 0.0182126946747303, + "learning_rate": 4.372061548854301e-07, + "loss": 0.0, + "num_input_tokens_seen": 99487440, + "step": 147615 + }, + { + "epoch": 3.606381159455696, + "grad_norm": 0.0016897412715479732, + "learning_rate": 4.371356666988704e-07, + "loss": 0.0258, + "num_input_tokens_seen": 99490640, + "step": 147620 + }, + { + "epoch": 3.6065033102875432, + "grad_norm": 0.010075540281832218, + "learning_rate": 4.3706518260561175e-07, + "loss": 0.0, + "num_input_tokens_seen": 99493776, + "step": 147625 + }, + { + "epoch": 3.60662546111939, + "grad_norm": 31.596620559692383, + "learning_rate": 4.369947026061662e-07, + "loss": 0.0891, + "num_input_tokens_seen": 99496912, + "step": 147630 + }, + { + "epoch": 3.6067476119512376, + "grad_norm": 0.0003606700920499861, + "learning_rate": 4.36924226701047e-07, + "loss": 0.0, + "num_input_tokens_seen": 99500304, + "step": 147635 + }, + { + "epoch": 3.6068697627830844, + "grad_norm": 0.004157507326453924, + "learning_rate": 4.368537548907663e-07, + "loss": 0.0, + "num_input_tokens_seen": 99503504, + "step": 147640 + }, + { + "epoch": 3.606991913614932, + "grad_norm": 0.0006897756829857826, + "learning_rate": 4.3678328717583614e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99507408, + "step": 147645 + }, + { + "epoch": 3.6071140644467787, + "grad_norm": 0.00255883508361876, + "learning_rate": 4.3671282355676976e-07, + "loss": 0.1092, + "num_input_tokens_seen": 99510928, + "step": 147650 + }, + { + "epoch": 3.607236215278626, + "grad_norm": 0.05700772628188133, + "learning_rate": 4.3664236403407886e-07, + "loss": 0.0476, + "num_input_tokens_seen": 99514192, + "step": 147655 + }, + { + "epoch": 3.607358366110473, + "grad_norm": 0.011682671494781971, + "learning_rate": 4.365719086082762e-07, + "loss": 0.0003, + "num_input_tokens_seen": 99517392, + "step": 147660 + }, + { + "epoch": 3.6074805169423203, + "grad_norm": 0.0015562445623800159, + "learning_rate": 4.365014572798745e-07, + "loss": 0.0533, + "num_input_tokens_seen": 99520784, + "step": 147665 + }, + { + "epoch": 3.6076026677741675, + "grad_norm": 0.0011776462197303772, + "learning_rate": 4.364310100493855e-07, + "loss": 0.0, + "num_input_tokens_seen": 99524304, + "step": 147670 + }, + { + "epoch": 3.6077248186060147, + "grad_norm": 0.00629253638908267, + "learning_rate": 4.3636056691732214e-07, + "loss": 0.0, + "num_input_tokens_seen": 99527632, + "step": 147675 + }, + { + "epoch": 3.607846969437862, + "grad_norm": 0.08474673330783844, + "learning_rate": 4.3629012788419597e-07, + "loss": 0.0002, + "num_input_tokens_seen": 99530896, + "step": 147680 + }, + { + "epoch": 3.607969120269709, + "grad_norm": 0.0008923867717385292, + "learning_rate": 4.3621969295051953e-07, + "loss": 0.0, + "num_input_tokens_seen": 99534032, + "step": 147685 + }, + { + "epoch": 3.6080912711015563, + "grad_norm": 0.007003965321928263, + "learning_rate": 4.361492621168056e-07, + "loss": 0.0, + "num_input_tokens_seen": 99537680, + "step": 147690 + }, + { + "epoch": 3.6082134219334034, + "grad_norm": 0.006107357330620289, + "learning_rate": 4.360788353835654e-07, + "loss": 0.0, + "num_input_tokens_seen": 99541136, + "step": 147695 + }, + { + "epoch": 3.6083355727652506, + "grad_norm": 0.003756742924451828, + "learning_rate": 4.3600841275131204e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99544336, + "step": 147700 + }, + { + "epoch": 3.608457723597098, + "grad_norm": 0.0005559317069128156, + "learning_rate": 4.359379942205568e-07, + "loss": 0.0, + "num_input_tokens_seen": 99548176, + "step": 147705 + }, + { + "epoch": 3.608579874428945, + "grad_norm": 0.004919090308248997, + "learning_rate": 4.3586757979181254e-07, + "loss": 0.0, + "num_input_tokens_seen": 99551376, + "step": 147710 + }, + { + "epoch": 3.6087020252607918, + "grad_norm": 0.003267875872552395, + "learning_rate": 4.3579716946559064e-07, + "loss": 0.0, + "num_input_tokens_seen": 99554704, + "step": 147715 + }, + { + "epoch": 3.6088241760926394, + "grad_norm": 0.005583520978689194, + "learning_rate": 4.357267632424034e-07, + "loss": 0.0, + "num_input_tokens_seen": 99558224, + "step": 147720 + }, + { + "epoch": 3.608946326924486, + "grad_norm": 0.003153529716655612, + "learning_rate": 4.356563611227634e-07, + "loss": 0.0451, + "num_input_tokens_seen": 99561168, + "step": 147725 + }, + { + "epoch": 3.6090684777563338, + "grad_norm": 0.001594212488271296, + "learning_rate": 4.3558596310718166e-07, + "loss": 0.0, + "num_input_tokens_seen": 99564560, + "step": 147730 + }, + { + "epoch": 3.6091906285881805, + "grad_norm": 0.0006904274341650307, + "learning_rate": 4.355155691961711e-07, + "loss": 0.0, + "num_input_tokens_seen": 99567696, + "step": 147735 + }, + { + "epoch": 3.6093127794200277, + "grad_norm": 0.0009084900375455618, + "learning_rate": 4.3544517939024304e-07, + "loss": 0.0, + "num_input_tokens_seen": 99570640, + "step": 147740 + }, + { + "epoch": 3.609434930251875, + "grad_norm": 0.4451712965965271, + "learning_rate": 4.3537479368990917e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99574096, + "step": 147745 + }, + { + "epoch": 3.609557081083722, + "grad_norm": 0.011974464170634747, + "learning_rate": 4.353044120956819e-07, + "loss": 0.0, + "num_input_tokens_seen": 99577616, + "step": 147750 + }, + { + "epoch": 3.6096792319155693, + "grad_norm": 0.0011010023299604654, + "learning_rate": 4.352340346080726e-07, + "loss": 0.0, + "num_input_tokens_seen": 99581456, + "step": 147755 + }, + { + "epoch": 3.6098013827474165, + "grad_norm": 0.0023438516072928905, + "learning_rate": 4.351636612275936e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99584912, + "step": 147760 + }, + { + "epoch": 3.6099235335792637, + "grad_norm": 0.027717119082808495, + "learning_rate": 4.350932919547561e-07, + "loss": 0.0, + "num_input_tokens_seen": 99588112, + "step": 147765 + }, + { + "epoch": 3.610045684411111, + "grad_norm": 0.0070284148678183556, + "learning_rate": 4.350229267900725e-07, + "loss": 0.0, + "num_input_tokens_seen": 99592400, + "step": 147770 + }, + { + "epoch": 3.610167835242958, + "grad_norm": 6.282122194534168e-05, + "learning_rate": 4.349525657340536e-07, + "loss": 0.0, + "num_input_tokens_seen": 99595664, + "step": 147775 + }, + { + "epoch": 3.6102899860748052, + "grad_norm": 0.028719637542963028, + "learning_rate": 4.348822087872117e-07, + "loss": 0.0005, + "num_input_tokens_seen": 99598608, + "step": 147780 + }, + { + "epoch": 3.6104121369066524, + "grad_norm": 0.004688702989369631, + "learning_rate": 4.3481185595005875e-07, + "loss": 0.0711, + "num_input_tokens_seen": 99601616, + "step": 147785 + }, + { + "epoch": 3.6105342877384996, + "grad_norm": 0.1381920576095581, + "learning_rate": 4.347415072231058e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99604944, + "step": 147790 + }, + { + "epoch": 3.610656438570347, + "grad_norm": 32.13233947753906, + "learning_rate": 4.3467116260686474e-07, + "loss": 0.0344, + "num_input_tokens_seen": 99608272, + "step": 147795 + }, + { + "epoch": 3.610778589402194, + "grad_norm": 0.002420686185359955, + "learning_rate": 4.3460082210184677e-07, + "loss": 0.0, + "num_input_tokens_seen": 99611664, + "step": 147800 + }, + { + "epoch": 3.610900740234041, + "grad_norm": 0.00014533349894918501, + "learning_rate": 4.345304857085642e-07, + "loss": 0.0, + "num_input_tokens_seen": 99614800, + "step": 147805 + }, + { + "epoch": 3.611022891065888, + "grad_norm": 0.00020457235223148018, + "learning_rate": 4.344601534275275e-07, + "loss": 0.0, + "num_input_tokens_seen": 99618000, + "step": 147810 + }, + { + "epoch": 3.6111450418977356, + "grad_norm": 0.008015124127268791, + "learning_rate": 4.3438982525924874e-07, + "loss": 0.0, + "num_input_tokens_seen": 99621456, + "step": 147815 + }, + { + "epoch": 3.6112671927295823, + "grad_norm": 0.0028267372399568558, + "learning_rate": 4.3431950120423963e-07, + "loss": 0.0, + "num_input_tokens_seen": 99625040, + "step": 147820 + }, + { + "epoch": 3.6113893435614295, + "grad_norm": 0.0005698919994756579, + "learning_rate": 4.342491812630109e-07, + "loss": 0.0, + "num_input_tokens_seen": 99628752, + "step": 147825 + }, + { + "epoch": 3.6115114943932767, + "grad_norm": 0.004629726056009531, + "learning_rate": 4.3417886543607474e-07, + "loss": 0.0, + "num_input_tokens_seen": 99632464, + "step": 147830 + }, + { + "epoch": 3.611633645225124, + "grad_norm": 0.0010731581132858992, + "learning_rate": 4.341085537239416e-07, + "loss": 0.0775, + "num_input_tokens_seen": 99635472, + "step": 147835 + }, + { + "epoch": 3.611755796056971, + "grad_norm": 0.0005755699821747839, + "learning_rate": 4.340382461271237e-07, + "loss": 0.0, + "num_input_tokens_seen": 99638928, + "step": 147840 + }, + { + "epoch": 3.6118779468888182, + "grad_norm": 0.029588622972369194, + "learning_rate": 4.339679426461319e-07, + "loss": 0.0, + "num_input_tokens_seen": 99642320, + "step": 147845 + }, + { + "epoch": 3.6120000977206654, + "grad_norm": 0.002909613074734807, + "learning_rate": 4.3389764328147706e-07, + "loss": 0.0, + "num_input_tokens_seen": 99646352, + "step": 147850 + }, + { + "epoch": 3.6121222485525126, + "grad_norm": 0.036447200924158096, + "learning_rate": 4.338273480336712e-07, + "loss": 0.0453, + "num_input_tokens_seen": 99649552, + "step": 147855 + }, + { + "epoch": 3.61224439938436, + "grad_norm": 0.00029940472450107336, + "learning_rate": 4.3375705690322474e-07, + "loss": 0.0, + "num_input_tokens_seen": 99653136, + "step": 147860 + }, + { + "epoch": 3.612366550216207, + "grad_norm": 0.00088671495905146, + "learning_rate": 4.336867698906497e-07, + "loss": 0.0, + "num_input_tokens_seen": 99655888, + "step": 147865 + }, + { + "epoch": 3.612488701048054, + "grad_norm": 0.01289328932762146, + "learning_rate": 4.3361648699645623e-07, + "loss": 0.0, + "num_input_tokens_seen": 99659408, + "step": 147870 + }, + { + "epoch": 3.6126108518799014, + "grad_norm": 0.0001773501862771809, + "learning_rate": 4.335462082211563e-07, + "loss": 0.0, + "num_input_tokens_seen": 99663184, + "step": 147875 + }, + { + "epoch": 3.6127330027117486, + "grad_norm": 0.46676599979400635, + "learning_rate": 4.3347593356526093e-07, + "loss": 0.0002, + "num_input_tokens_seen": 99666512, + "step": 147880 + }, + { + "epoch": 3.6128551535435958, + "grad_norm": 0.0017477070214226842, + "learning_rate": 4.334056630292805e-07, + "loss": 0.0, + "num_input_tokens_seen": 99669840, + "step": 147885 + }, + { + "epoch": 3.612977304375443, + "grad_norm": 0.0036420223768800497, + "learning_rate": 4.33335396613727e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99673232, + "step": 147890 + }, + { + "epoch": 3.6130994552072897, + "grad_norm": 0.08501607924699783, + "learning_rate": 4.332651343191104e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99676304, + "step": 147895 + }, + { + "epoch": 3.6132216060391373, + "grad_norm": 0.0049306293949484825, + "learning_rate": 4.331948761459423e-07, + "loss": 0.0009, + "num_input_tokens_seen": 99679568, + "step": 147900 + }, + { + "epoch": 3.613343756870984, + "grad_norm": 0.014766248874366283, + "learning_rate": 4.331246220947338e-07, + "loss": 0.0, + "num_input_tokens_seen": 99682896, + "step": 147905 + }, + { + "epoch": 3.6134659077028317, + "grad_norm": 0.01937950775027275, + "learning_rate": 4.3305437216599517e-07, + "loss": 0.0, + "num_input_tokens_seen": 99686544, + "step": 147910 + }, + { + "epoch": 3.6135880585346785, + "grad_norm": 0.0013723402516916394, + "learning_rate": 4.3298412636023797e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99690192, + "step": 147915 + }, + { + "epoch": 3.6137102093665256, + "grad_norm": 0.002116596093401313, + "learning_rate": 4.329138846779724e-07, + "loss": 0.0, + "num_input_tokens_seen": 99693840, + "step": 147920 + }, + { + "epoch": 3.613832360198373, + "grad_norm": 0.0024759876541793346, + "learning_rate": 4.3284364711971e-07, + "loss": 0.0006, + "num_input_tokens_seen": 99697040, + "step": 147925 + }, + { + "epoch": 3.61395451103022, + "grad_norm": 0.0009556720615364611, + "learning_rate": 4.3277341368596066e-07, + "loss": 0.0406, + "num_input_tokens_seen": 99700304, + "step": 147930 + }, + { + "epoch": 3.614076661862067, + "grad_norm": 0.0030979400034993887, + "learning_rate": 4.327031843772361e-07, + "loss": 0.049, + "num_input_tokens_seen": 99703760, + "step": 147935 + }, + { + "epoch": 3.6141988126939144, + "grad_norm": 0.012933360412716866, + "learning_rate": 4.3263295919404605e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99707152, + "step": 147940 + }, + { + "epoch": 3.6143209635257616, + "grad_norm": 0.01721920073032379, + "learning_rate": 4.3256273813690227e-07, + "loss": 0.0, + "num_input_tokens_seen": 99710416, + "step": 147945 + }, + { + "epoch": 3.614443114357609, + "grad_norm": 0.00520155718550086, + "learning_rate": 4.3249252120631474e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99713872, + "step": 147950 + }, + { + "epoch": 3.614565265189456, + "grad_norm": 0.0013286650646477938, + "learning_rate": 4.3242230840279393e-07, + "loss": 0.0947, + "num_input_tokens_seen": 99717456, + "step": 147955 + }, + { + "epoch": 3.614687416021303, + "grad_norm": 0.005257413722574711, + "learning_rate": 4.3235209972685117e-07, + "loss": 0.0002, + "num_input_tokens_seen": 99721168, + "step": 147960 + }, + { + "epoch": 3.6148095668531504, + "grad_norm": 0.03281658515334129, + "learning_rate": 4.3228189517899616e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99724880, + "step": 147965 + }, + { + "epoch": 3.6149317176849975, + "grad_norm": 0.14259420335292816, + "learning_rate": 4.3221169475973994e-07, + "loss": 0.0, + "num_input_tokens_seen": 99728144, + "step": 147970 + }, + { + "epoch": 3.6150538685168447, + "grad_norm": 0.00042377988575026393, + "learning_rate": 4.3214149846959336e-07, + "loss": 0.0, + "num_input_tokens_seen": 99731152, + "step": 147975 + }, + { + "epoch": 3.615176019348692, + "grad_norm": 0.008627206087112427, + "learning_rate": 4.320713063090662e-07, + "loss": 0.0, + "num_input_tokens_seen": 99734416, + "step": 147980 + }, + { + "epoch": 3.615298170180539, + "grad_norm": 104.67365264892578, + "learning_rate": 4.320011182786696e-07, + "loss": 0.0224, + "num_input_tokens_seen": 99737872, + "step": 147985 + }, + { + "epoch": 3.615420321012386, + "grad_norm": 0.0013059648917987943, + "learning_rate": 4.319309343789133e-07, + "loss": 0.0, + "num_input_tokens_seen": 99741136, + "step": 147990 + }, + { + "epoch": 3.6155424718442335, + "grad_norm": 0.0005610277294181287, + "learning_rate": 4.3186075461030803e-07, + "loss": 0.0, + "num_input_tokens_seen": 99744208, + "step": 147995 + }, + { + "epoch": 3.6156646226760802, + "grad_norm": 0.0007101591327227652, + "learning_rate": 4.317905789733645e-07, + "loss": 0.0441, + "num_input_tokens_seen": 99748624, + "step": 148000 + }, + { + "epoch": 3.6157867735079274, + "grad_norm": 0.0024499285500496626, + "learning_rate": 4.317204074685924e-07, + "loss": 0.0, + "num_input_tokens_seen": 99751632, + "step": 148005 + }, + { + "epoch": 3.6159089243397746, + "grad_norm": 0.00240133423358202, + "learning_rate": 4.316502400965026e-07, + "loss": 0.0, + "num_input_tokens_seen": 99754896, + "step": 148010 + }, + { + "epoch": 3.616031075171622, + "grad_norm": 0.0021777721121907234, + "learning_rate": 4.315800768576049e-07, + "loss": 0.0, + "num_input_tokens_seen": 99758288, + "step": 148015 + }, + { + "epoch": 3.616153226003469, + "grad_norm": 0.0013742581941187382, + "learning_rate": 4.3150991775241e-07, + "loss": 0.0, + "num_input_tokens_seen": 99761104, + "step": 148020 + }, + { + "epoch": 3.616275376835316, + "grad_norm": 0.0009071017266251147, + "learning_rate": 4.314397627814276e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99764240, + "step": 148025 + }, + { + "epoch": 3.6163975276671634, + "grad_norm": 0.013118072412908077, + "learning_rate": 4.3136961194516817e-07, + "loss": 0.0235, + "num_input_tokens_seen": 99767696, + "step": 148030 + }, + { + "epoch": 3.6165196784990106, + "grad_norm": 0.0009623137302696705, + "learning_rate": 4.312994652441422e-07, + "loss": 0.0382, + "num_input_tokens_seen": 99770704, + "step": 148035 + }, + { + "epoch": 3.6166418293308578, + "grad_norm": 0.0022995336912572384, + "learning_rate": 4.3122932267885917e-07, + "loss": 0.0, + "num_input_tokens_seen": 99774096, + "step": 148040 + }, + { + "epoch": 3.616763980162705, + "grad_norm": 0.0017281353939324617, + "learning_rate": 4.311591842498298e-07, + "loss": 0.0, + "num_input_tokens_seen": 99777616, + "step": 148045 + }, + { + "epoch": 3.616886130994552, + "grad_norm": 0.000500478723552078, + "learning_rate": 4.310890499575638e-07, + "loss": 0.0, + "num_input_tokens_seen": 99780816, + "step": 148050 + }, + { + "epoch": 3.6170082818263993, + "grad_norm": 0.0009686108096502721, + "learning_rate": 4.3101891980257086e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99783952, + "step": 148055 + }, + { + "epoch": 3.6171304326582465, + "grad_norm": 0.004932576324790716, + "learning_rate": 4.3094879378536185e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99787472, + "step": 148060 + }, + { + "epoch": 3.6172525834900937, + "grad_norm": 0.0024955112021416426, + "learning_rate": 4.308786719064459e-07, + "loss": 0.0, + "num_input_tokens_seen": 99790416, + "step": 148065 + }, + { + "epoch": 3.617374734321941, + "grad_norm": 0.000285703397821635, + "learning_rate": 4.3080855416633363e-07, + "loss": 0.0005, + "num_input_tokens_seen": 99793872, + "step": 148070 + }, + { + "epoch": 3.6174968851537876, + "grad_norm": 0.0008046041475608945, + "learning_rate": 4.307384405655343e-07, + "loss": 0.0, + "num_input_tokens_seen": 99796816, + "step": 148075 + }, + { + "epoch": 3.6176190359856353, + "grad_norm": 0.0003960691683460027, + "learning_rate": 4.3066833110455845e-07, + "loss": 0.0, + "num_input_tokens_seen": 99800080, + "step": 148080 + }, + { + "epoch": 3.617741186817482, + "grad_norm": 13.642656326293945, + "learning_rate": 4.3059822578391536e-07, + "loss": 0.0265, + "num_input_tokens_seen": 99803792, + "step": 148085 + }, + { + "epoch": 3.6178633376493297, + "grad_norm": 0.0023105733562260866, + "learning_rate": 4.305281246041151e-07, + "loss": 0.0, + "num_input_tokens_seen": 99807376, + "step": 148090 + }, + { + "epoch": 3.6179854884811764, + "grad_norm": 25.20553970336914, + "learning_rate": 4.3045802756566787e-07, + "loss": 0.0399, + "num_input_tokens_seen": 99810704, + "step": 148095 + }, + { + "epoch": 3.6181076393130236, + "grad_norm": 0.007407731376588345, + "learning_rate": 4.3038793466908266e-07, + "loss": 0.0, + "num_input_tokens_seen": 99814544, + "step": 148100 + }, + { + "epoch": 3.618229790144871, + "grad_norm": 0.1815802901983261, + "learning_rate": 4.303178459148699e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99818512, + "step": 148105 + }, + { + "epoch": 3.618351940976718, + "grad_norm": 0.0026310270186513662, + "learning_rate": 4.3024776130353866e-07, + "loss": 0.0, + "num_input_tokens_seen": 99822160, + "step": 148110 + }, + { + "epoch": 3.618474091808565, + "grad_norm": 0.0008320335182361305, + "learning_rate": 4.3017768083559933e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99825552, + "step": 148115 + }, + { + "epoch": 3.6185962426404124, + "grad_norm": 0.0030299080535769463, + "learning_rate": 4.301076045115608e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99828624, + "step": 148120 + }, + { + "epoch": 3.6187183934722595, + "grad_norm": 0.019902754575014114, + "learning_rate": 4.3003753233193305e-07, + "loss": 0.0, + "num_input_tokens_seen": 99831888, + "step": 148125 + }, + { + "epoch": 3.6188405443041067, + "grad_norm": 0.01636345684528351, + "learning_rate": 4.299674642972261e-07, + "loss": 0.0, + "num_input_tokens_seen": 99835280, + "step": 148130 + }, + { + "epoch": 3.618962695135954, + "grad_norm": 0.01897534541785717, + "learning_rate": 4.2989740040794864e-07, + "loss": 0.0, + "num_input_tokens_seen": 99838608, + "step": 148135 + }, + { + "epoch": 3.619084845967801, + "grad_norm": 0.0227005984634161, + "learning_rate": 4.29827340664611e-07, + "loss": 0.0, + "num_input_tokens_seen": 99841872, + "step": 148140 + }, + { + "epoch": 3.6192069967996483, + "grad_norm": 0.025067569687962532, + "learning_rate": 4.2975728506772193e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99845008, + "step": 148145 + }, + { + "epoch": 3.6193291476314955, + "grad_norm": 0.0004105870029889047, + "learning_rate": 4.296872336177916e-07, + "loss": 0.0002, + "num_input_tokens_seen": 99848464, + "step": 148150 + }, + { + "epoch": 3.6194512984633427, + "grad_norm": 0.000708704290445894, + "learning_rate": 4.2961718631532905e-07, + "loss": 0.0, + "num_input_tokens_seen": 99851984, + "step": 148155 + }, + { + "epoch": 3.6195734492951894, + "grad_norm": 0.012791010551154613, + "learning_rate": 4.295471431608435e-07, + "loss": 0.0, + "num_input_tokens_seen": 99855312, + "step": 148160 + }, + { + "epoch": 3.619695600127037, + "grad_norm": 0.6077525019645691, + "learning_rate": 4.2947710415484486e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99858896, + "step": 148165 + }, + { + "epoch": 3.619817750958884, + "grad_norm": 0.007468364201486111, + "learning_rate": 4.2940706929784176e-07, + "loss": 0.0, + "num_input_tokens_seen": 99862160, + "step": 148170 + }, + { + "epoch": 3.6199399017907314, + "grad_norm": 0.00035050525912083685, + "learning_rate": 4.2933703859034444e-07, + "loss": 0.016, + "num_input_tokens_seen": 99865680, + "step": 148175 + }, + { + "epoch": 3.620062052622578, + "grad_norm": 0.004140862729400396, + "learning_rate": 4.292670120328612e-07, + "loss": 0.0, + "num_input_tokens_seen": 99868816, + "step": 148180 + }, + { + "epoch": 3.6201842034544254, + "grad_norm": 7.159739470807835e-05, + "learning_rate": 4.291969896259017e-07, + "loss": 0.0, + "num_input_tokens_seen": 99872144, + "step": 148185 + }, + { + "epoch": 3.6203063542862726, + "grad_norm": 0.00017383306112606078, + "learning_rate": 4.291269713699757e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99875664, + "step": 148190 + }, + { + "epoch": 3.6204285051181198, + "grad_norm": 0.0002737454778980464, + "learning_rate": 4.2905695726559145e-07, + "loss": 0.0, + "num_input_tokens_seen": 99879312, + "step": 148195 + }, + { + "epoch": 3.620550655949967, + "grad_norm": 0.0008277028682641685, + "learning_rate": 4.289869473132589e-07, + "loss": 0.0003, + "num_input_tokens_seen": 99882640, + "step": 148200 + }, + { + "epoch": 3.620672806781814, + "grad_norm": 29.86298370361328, + "learning_rate": 4.2891694151348654e-07, + "loss": 0.0004, + "num_input_tokens_seen": 99885968, + "step": 148205 + }, + { + "epoch": 3.6207949576136613, + "grad_norm": 0.0006161229102872312, + "learning_rate": 4.288469398667842e-07, + "loss": 0.0, + "num_input_tokens_seen": 99889104, + "step": 148210 + }, + { + "epoch": 3.6209171084455085, + "grad_norm": 0.00639741774648428, + "learning_rate": 4.2877694237366014e-07, + "loss": 0.0, + "num_input_tokens_seen": 99892304, + "step": 148215 + }, + { + "epoch": 3.6210392592773557, + "grad_norm": 0.42625725269317627, + "learning_rate": 4.2870694903462377e-07, + "loss": 0.0002, + "num_input_tokens_seen": 99897680, + "step": 148220 + }, + { + "epoch": 3.621161410109203, + "grad_norm": 0.004575707484036684, + "learning_rate": 4.2863695985018453e-07, + "loss": 0.0004, + "num_input_tokens_seen": 99901328, + "step": 148225 + }, + { + "epoch": 3.62128356094105, + "grad_norm": 0.006831346545368433, + "learning_rate": 4.285669748208507e-07, + "loss": 0.0, + "num_input_tokens_seen": 99904336, + "step": 148230 + }, + { + "epoch": 3.6214057117728973, + "grad_norm": 0.00025160593213513494, + "learning_rate": 4.284969939471318e-07, + "loss": 0.0, + "num_input_tokens_seen": 99907472, + "step": 148235 + }, + { + "epoch": 3.6215278626047445, + "grad_norm": 0.4540623128414154, + "learning_rate": 4.284270172295361e-07, + "loss": 0.0003, + "num_input_tokens_seen": 99910800, + "step": 148240 + }, + { + "epoch": 3.6216500134365917, + "grad_norm": 0.019050270318984985, + "learning_rate": 4.2835704466857325e-07, + "loss": 0.0, + "num_input_tokens_seen": 99914512, + "step": 148245 + }, + { + "epoch": 3.621772164268439, + "grad_norm": 0.0001939299254445359, + "learning_rate": 4.2828707626475133e-07, + "loss": 0.0, + "num_input_tokens_seen": 99917712, + "step": 148250 + }, + { + "epoch": 3.6218943151002856, + "grad_norm": 0.00030984191107563674, + "learning_rate": 4.2821711201858e-07, + "loss": 0.0, + "num_input_tokens_seen": 99920848, + "step": 148255 + }, + { + "epoch": 3.6220164659321332, + "grad_norm": 0.00048598801367916167, + "learning_rate": 4.281471519305676e-07, + "loss": 0.0, + "num_input_tokens_seen": 99924304, + "step": 148260 + }, + { + "epoch": 3.62213861676398, + "grad_norm": 0.0002089657646138221, + "learning_rate": 4.280771960012225e-07, + "loss": 0.0, + "num_input_tokens_seen": 99927824, + "step": 148265 + }, + { + "epoch": 3.6222607675958276, + "grad_norm": 0.00018494176038075238, + "learning_rate": 4.2800724423105427e-07, + "loss": 0.0, + "num_input_tokens_seen": 99931344, + "step": 148270 + }, + { + "epoch": 3.6223829184276743, + "grad_norm": 0.00010118465434061363, + "learning_rate": 4.279372966205709e-07, + "loss": 0.0433, + "num_input_tokens_seen": 99935056, + "step": 148275 + }, + { + "epoch": 3.6225050692595215, + "grad_norm": 0.011214503087103367, + "learning_rate": 4.278673531702812e-07, + "loss": 0.0, + "num_input_tokens_seen": 99938512, + "step": 148280 + }, + { + "epoch": 3.6226272200913687, + "grad_norm": 0.00042954322998411953, + "learning_rate": 4.2779741388069445e-07, + "loss": 0.0, + "num_input_tokens_seen": 99941904, + "step": 148285 + }, + { + "epoch": 3.622749370923216, + "grad_norm": 0.0013367931824177504, + "learning_rate": 4.277274787523185e-07, + "loss": 0.0, + "num_input_tokens_seen": 99945232, + "step": 148290 + }, + { + "epoch": 3.622871521755063, + "grad_norm": 0.05535469576716423, + "learning_rate": 4.2765754778566255e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99948688, + "step": 148295 + }, + { + "epoch": 3.6229936725869103, + "grad_norm": 0.0002708320098463446, + "learning_rate": 4.275876209812346e-07, + "loss": 0.0, + "num_input_tokens_seen": 99952144, + "step": 148300 + }, + { + "epoch": 3.6231158234187575, + "grad_norm": 0.00510826800018549, + "learning_rate": 4.2751769833954334e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99955536, + "step": 148305 + }, + { + "epoch": 3.6232379742506047, + "grad_norm": 8.865822019288316e-05, + "learning_rate": 4.274477798610977e-07, + "loss": 0.0, + "num_input_tokens_seen": 99958928, + "step": 148310 + }, + { + "epoch": 3.623360125082452, + "grad_norm": 0.0006897286511957645, + "learning_rate": 4.2737786554640543e-07, + "loss": 0.0, + "num_input_tokens_seen": 99962256, + "step": 148315 + }, + { + "epoch": 3.623482275914299, + "grad_norm": 0.0003042963217012584, + "learning_rate": 4.2730795539597575e-07, + "loss": 0.0, + "num_input_tokens_seen": 99966608, + "step": 148320 + }, + { + "epoch": 3.6236044267461462, + "grad_norm": 34.62126159667969, + "learning_rate": 4.272380494103163e-07, + "loss": 0.0688, + "num_input_tokens_seen": 99969744, + "step": 148325 + }, + { + "epoch": 3.6237265775779934, + "grad_norm": 0.006418231409043074, + "learning_rate": 4.2716814758993614e-07, + "loss": 0.0, + "num_input_tokens_seen": 99973072, + "step": 148330 + }, + { + "epoch": 3.6238487284098406, + "grad_norm": 0.0013086996041238308, + "learning_rate": 4.270982499353429e-07, + "loss": 0.0, + "num_input_tokens_seen": 99976528, + "step": 148335 + }, + { + "epoch": 3.6239708792416874, + "grad_norm": 0.006796195171773434, + "learning_rate": 4.2702835644704535e-07, + "loss": 0.0, + "num_input_tokens_seen": 99979920, + "step": 148340 + }, + { + "epoch": 3.624093030073535, + "grad_norm": 0.002164926379919052, + "learning_rate": 4.2695846712555204e-07, + "loss": 0.0001, + "num_input_tokens_seen": 99983376, + "step": 148345 + }, + { + "epoch": 3.6242151809053818, + "grad_norm": 0.0018303889082744718, + "learning_rate": 4.2688858197137047e-07, + "loss": 0.0, + "num_input_tokens_seen": 99987216, + "step": 148350 + }, + { + "epoch": 3.6243373317372294, + "grad_norm": 85.07524108886719, + "learning_rate": 4.268187009850097e-07, + "loss": 0.0478, + "num_input_tokens_seen": 99990352, + "step": 148355 + }, + { + "epoch": 3.624459482569076, + "grad_norm": 0.00017215909610968083, + "learning_rate": 4.2674882416697746e-07, + "loss": 0.0891, + "num_input_tokens_seen": 99993680, + "step": 148360 + }, + { + "epoch": 3.6245816334009233, + "grad_norm": 0.1743038147687912, + "learning_rate": 4.2667895151778167e-07, + "loss": 0.0, + "num_input_tokens_seen": 99996560, + "step": 148365 + }, + { + "epoch": 3.6247037842327705, + "grad_norm": 0.0016746616456657648, + "learning_rate": 4.266090830379311e-07, + "loss": 0.0, + "num_input_tokens_seen": 99999696, + "step": 148370 + }, + { + "epoch": 3.6248259350646177, + "grad_norm": 0.00013440451584756374, + "learning_rate": 4.265392187279331e-07, + "loss": 0.0, + "num_input_tokens_seen": 100002832, + "step": 148375 + }, + { + "epoch": 3.624948085896465, + "grad_norm": 0.004280713852494955, + "learning_rate": 4.2646935858829644e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100006608, + "step": 148380 + }, + { + "epoch": 3.625070236728312, + "grad_norm": 0.00028567464323714375, + "learning_rate": 4.2639950261952863e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100010000, + "step": 148385 + }, + { + "epoch": 3.6251923875601593, + "grad_norm": 0.00025981743237935007, + "learning_rate": 4.263296508221381e-07, + "loss": 0.0, + "num_input_tokens_seen": 100013328, + "step": 148390 + }, + { + "epoch": 3.6253145383920065, + "grad_norm": 9.093299740925431e-05, + "learning_rate": 4.262598031966325e-07, + "loss": 0.0, + "num_input_tokens_seen": 100016528, + "step": 148395 + }, + { + "epoch": 3.6254366892238536, + "grad_norm": 0.0031217436771839857, + "learning_rate": 4.261899597435198e-07, + "loss": 0.0, + "num_input_tokens_seen": 100019728, + "step": 148400 + }, + { + "epoch": 3.625558840055701, + "grad_norm": 0.0002645206986926496, + "learning_rate": 4.2612012046330846e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100022864, + "step": 148405 + }, + { + "epoch": 3.625680990887548, + "grad_norm": 7.561508391518146e-05, + "learning_rate": 4.2605028535650553e-07, + "loss": 0.0313, + "num_input_tokens_seen": 100026256, + "step": 148410 + }, + { + "epoch": 3.625803141719395, + "grad_norm": 0.0027498200070112944, + "learning_rate": 4.259804544236197e-07, + "loss": 0.0, + "num_input_tokens_seen": 100029648, + "step": 148415 + }, + { + "epoch": 3.6259252925512424, + "grad_norm": 0.0022068258840590715, + "learning_rate": 4.2591062766515806e-07, + "loss": 0.0667, + "num_input_tokens_seen": 100032720, + "step": 148420 + }, + { + "epoch": 3.6260474433830896, + "grad_norm": 0.02030225098133087, + "learning_rate": 4.258408050816291e-07, + "loss": 0.0, + "num_input_tokens_seen": 100035856, + "step": 148425 + }, + { + "epoch": 3.626169594214937, + "grad_norm": 0.0027770877350121737, + "learning_rate": 4.2577098667353996e-07, + "loss": 0.0, + "num_input_tokens_seen": 100039248, + "step": 148430 + }, + { + "epoch": 3.6262917450467835, + "grad_norm": 0.7698575258255005, + "learning_rate": 4.257011724413985e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100042896, + "step": 148435 + }, + { + "epoch": 3.626413895878631, + "grad_norm": 0.007915056310594082, + "learning_rate": 4.2563136238571307e-07, + "loss": 0.0002, + "num_input_tokens_seen": 100045904, + "step": 148440 + }, + { + "epoch": 3.626536046710478, + "grad_norm": 0.0012001445284113288, + "learning_rate": 4.2556155650699045e-07, + "loss": 0.0, + "num_input_tokens_seen": 100049296, + "step": 148445 + }, + { + "epoch": 3.626658197542325, + "grad_norm": 0.0002976596006192267, + "learning_rate": 4.2549175480573897e-07, + "loss": 0.0, + "num_input_tokens_seen": 100052624, + "step": 148450 + }, + { + "epoch": 3.6267803483741723, + "grad_norm": 0.1230919361114502, + "learning_rate": 4.2542195728246565e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100056016, + "step": 148455 + }, + { + "epoch": 3.6269024992060195, + "grad_norm": 0.00040737129165790975, + "learning_rate": 4.253521639376788e-07, + "loss": 0.0, + "num_input_tokens_seen": 100058960, + "step": 148460 + }, + { + "epoch": 3.6270246500378667, + "grad_norm": 0.00524023687466979, + "learning_rate": 4.2528237477188566e-07, + "loss": 0.0, + "num_input_tokens_seen": 100062672, + "step": 148465 + }, + { + "epoch": 3.627146800869714, + "grad_norm": 0.004324682056903839, + "learning_rate": 4.2521258978559315e-07, + "loss": 0.0, + "num_input_tokens_seen": 100066064, + "step": 148470 + }, + { + "epoch": 3.627268951701561, + "grad_norm": 0.10980729758739471, + "learning_rate": 4.2514280897930977e-07, + "loss": 0.0, + "num_input_tokens_seen": 100069648, + "step": 148475 + }, + { + "epoch": 3.6273911025334082, + "grad_norm": 0.31824973225593567, + "learning_rate": 4.250730323535421e-07, + "loss": 0.0, + "num_input_tokens_seen": 100072784, + "step": 148480 + }, + { + "epoch": 3.6275132533652554, + "grad_norm": 0.0008663343614898622, + "learning_rate": 4.2500325990879835e-07, + "loss": 0.0, + "num_input_tokens_seen": 100075920, + "step": 148485 + }, + { + "epoch": 3.6276354041971026, + "grad_norm": 36.30690002441406, + "learning_rate": 4.249334916455851e-07, + "loss": 0.0655, + "num_input_tokens_seen": 100079632, + "step": 148490 + }, + { + "epoch": 3.62775755502895, + "grad_norm": 0.0030439416877925396, + "learning_rate": 4.2486372756441027e-07, + "loss": 0.0257, + "num_input_tokens_seen": 100083024, + "step": 148495 + }, + { + "epoch": 3.627879705860797, + "grad_norm": 0.004145446699112654, + "learning_rate": 4.247939676657815e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100086160, + "step": 148500 + }, + { + "epoch": 3.628001856692644, + "grad_norm": 0.1289975494146347, + "learning_rate": 4.2472421195020525e-07, + "loss": 0.0002, + "num_input_tokens_seen": 100089552, + "step": 148505 + }, + { + "epoch": 3.6281240075244914, + "grad_norm": 0.006557599641382694, + "learning_rate": 4.2465446041818966e-07, + "loss": 0.0, + "num_input_tokens_seen": 100092624, + "step": 148510 + }, + { + "epoch": 3.6282461583563386, + "grad_norm": 0.0008815338369458914, + "learning_rate": 4.245847130702412e-07, + "loss": 0.0002, + "num_input_tokens_seen": 100096400, + "step": 148515 + }, + { + "epoch": 3.6283683091881853, + "grad_norm": 0.02695058286190033, + "learning_rate": 4.2451496990686784e-07, + "loss": 0.0, + "num_input_tokens_seen": 100100240, + "step": 148520 + }, + { + "epoch": 3.628490460020033, + "grad_norm": 0.298431932926178, + "learning_rate": 4.244452309285761e-07, + "loss": 0.0002, + "num_input_tokens_seen": 100103440, + "step": 148525 + }, + { + "epoch": 3.6286126108518797, + "grad_norm": 0.005468748044222593, + "learning_rate": 4.243754961358733e-07, + "loss": 0.075, + "num_input_tokens_seen": 100107408, + "step": 148530 + }, + { + "epoch": 3.6287347616837273, + "grad_norm": 0.0004392905975691974, + "learning_rate": 4.243057655292672e-07, + "loss": 0.0, + "num_input_tokens_seen": 100110416, + "step": 148535 + }, + { + "epoch": 3.628856912515574, + "grad_norm": 0.0009532935218885541, + "learning_rate": 4.242360391092641e-07, + "loss": 0.0, + "num_input_tokens_seen": 100113808, + "step": 148540 + }, + { + "epoch": 3.6289790633474213, + "grad_norm": 0.0005135192768648267, + "learning_rate": 4.2416631687637173e-07, + "loss": 0.0738, + "num_input_tokens_seen": 100116880, + "step": 148545 + }, + { + "epoch": 3.6291012141792685, + "grad_norm": 0.0001638806570554152, + "learning_rate": 4.240965988310963e-07, + "loss": 0.0, + "num_input_tokens_seen": 100120272, + "step": 148550 + }, + { + "epoch": 3.6292233650111156, + "grad_norm": 0.0005596952978521585, + "learning_rate": 4.240268849739458e-07, + "loss": 0.0739, + "num_input_tokens_seen": 100123472, + "step": 148555 + }, + { + "epoch": 3.629345515842963, + "grad_norm": 0.06809617578983307, + "learning_rate": 4.239571753054263e-07, + "loss": 0.0881, + "num_input_tokens_seen": 100127120, + "step": 148560 + }, + { + "epoch": 3.62946766667481, + "grad_norm": 0.002564807888120413, + "learning_rate": 4.2388746982604553e-07, + "loss": 0.0, + "num_input_tokens_seen": 100130384, + "step": 148565 + }, + { + "epoch": 3.629589817506657, + "grad_norm": 0.0007430867408402264, + "learning_rate": 4.2381776853630955e-07, + "loss": 0.0, + "num_input_tokens_seen": 100133776, + "step": 148570 + }, + { + "epoch": 3.6297119683385044, + "grad_norm": 315.9742126464844, + "learning_rate": 4.237480714367262e-07, + "loss": 0.008, + "num_input_tokens_seen": 100137104, + "step": 148575 + }, + { + "epoch": 3.6298341191703516, + "grad_norm": 0.0024084101896733046, + "learning_rate": 4.236783785278019e-07, + "loss": 0.0, + "num_input_tokens_seen": 100140496, + "step": 148580 + }, + { + "epoch": 3.629956270002199, + "grad_norm": 0.007453474681824446, + "learning_rate": 4.2360868981004305e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100144208, + "step": 148585 + }, + { + "epoch": 3.630078420834046, + "grad_norm": 0.0015486471820622683, + "learning_rate": 4.235390052839568e-07, + "loss": 0.0, + "num_input_tokens_seen": 100147792, + "step": 148590 + }, + { + "epoch": 3.630200571665893, + "grad_norm": 0.005932774394750595, + "learning_rate": 4.2346932495005037e-07, + "loss": 0.0004, + "num_input_tokens_seen": 100150928, + "step": 148595 + }, + { + "epoch": 3.6303227224977404, + "grad_norm": 0.00043449728400446475, + "learning_rate": 4.2339964880882974e-07, + "loss": 0.0014, + "num_input_tokens_seen": 100154192, + "step": 148600 + }, + { + "epoch": 3.6304448733295875, + "grad_norm": 0.0025287808384746313, + "learning_rate": 4.233299768608022e-07, + "loss": 0.0, + "num_input_tokens_seen": 100157712, + "step": 148605 + }, + { + "epoch": 3.6305670241614347, + "grad_norm": 0.16306915879249573, + "learning_rate": 4.232603091064739e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100161232, + "step": 148610 + }, + { + "epoch": 3.6306891749932815, + "grad_norm": 0.0046179573982954025, + "learning_rate": 4.2319064554635174e-07, + "loss": 0.0, + "num_input_tokens_seen": 100165456, + "step": 148615 + }, + { + "epoch": 3.630811325825129, + "grad_norm": 0.0018630430568009615, + "learning_rate": 4.231209861809427e-07, + "loss": 0.0, + "num_input_tokens_seen": 100168464, + "step": 148620 + }, + { + "epoch": 3.630933476656976, + "grad_norm": 0.0014654771657660604, + "learning_rate": 4.2305133101075264e-07, + "loss": 0.0, + "num_input_tokens_seen": 100172880, + "step": 148625 + }, + { + "epoch": 3.631055627488823, + "grad_norm": 0.004783432465046644, + "learning_rate": 4.2298168003628885e-07, + "loss": 0.0, + "num_input_tokens_seen": 100176144, + "step": 148630 + }, + { + "epoch": 3.6311777783206702, + "grad_norm": 0.0012913690879940987, + "learning_rate": 4.2291203325805715e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100179216, + "step": 148635 + }, + { + "epoch": 3.6312999291525174, + "grad_norm": 0.001476217177696526, + "learning_rate": 4.228423906765647e-07, + "loss": 0.0, + "num_input_tokens_seen": 100182608, + "step": 148640 + }, + { + "epoch": 3.6314220799843646, + "grad_norm": 0.000511297257617116, + "learning_rate": 4.2277275229231726e-07, + "loss": 0.0276, + "num_input_tokens_seen": 100186256, + "step": 148645 + }, + { + "epoch": 3.631544230816212, + "grad_norm": 0.16869373619556427, + "learning_rate": 4.227031181058216e-07, + "loss": 0.0007, + "num_input_tokens_seen": 100189328, + "step": 148650 + }, + { + "epoch": 3.631666381648059, + "grad_norm": 0.0005099636036902666, + "learning_rate": 4.226334881175846e-07, + "loss": 0.0, + "num_input_tokens_seen": 100192592, + "step": 148655 + }, + { + "epoch": 3.631788532479906, + "grad_norm": 0.9059097170829773, + "learning_rate": 4.225638623281117e-07, + "loss": 0.0007, + "num_input_tokens_seen": 100195856, + "step": 148660 + }, + { + "epoch": 3.6319106833117534, + "grad_norm": 0.003063419833779335, + "learning_rate": 4.2249424073791006e-07, + "loss": 0.0017, + "num_input_tokens_seen": 100199120, + "step": 148665 + }, + { + "epoch": 3.6320328341436006, + "grad_norm": 0.10458869487047195, + "learning_rate": 4.224246233474857e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100202448, + "step": 148670 + }, + { + "epoch": 3.6321549849754478, + "grad_norm": 0.018070032820105553, + "learning_rate": 4.2235501015734445e-07, + "loss": 0.0, + "num_input_tokens_seen": 100205904, + "step": 148675 + }, + { + "epoch": 3.632277135807295, + "grad_norm": 0.003459717845544219, + "learning_rate": 4.2228540116799326e-07, + "loss": 0.0002, + "num_input_tokens_seen": 100209168, + "step": 148680 + }, + { + "epoch": 3.632399286639142, + "grad_norm": 0.0011072818888351321, + "learning_rate": 4.2221579637993766e-07, + "loss": 0.0, + "num_input_tokens_seen": 100212624, + "step": 148685 + }, + { + "epoch": 3.6325214374709893, + "grad_norm": 0.002741535659879446, + "learning_rate": 4.221461957936846e-07, + "loss": 0.0266, + "num_input_tokens_seen": 100215760, + "step": 148690 + }, + { + "epoch": 3.6326435883028365, + "grad_norm": 0.0002010358584811911, + "learning_rate": 4.220765994097395e-07, + "loss": 0.0, + "num_input_tokens_seen": 100219024, + "step": 148695 + }, + { + "epoch": 3.6327657391346833, + "grad_norm": 0.00028791220393031836, + "learning_rate": 4.2200700722860906e-07, + "loss": 0.0, + "num_input_tokens_seen": 100222224, + "step": 148700 + }, + { + "epoch": 3.632887889966531, + "grad_norm": 0.0003893005778081715, + "learning_rate": 4.219374192507988e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100225232, + "step": 148705 + }, + { + "epoch": 3.6330100407983776, + "grad_norm": 0.0009554345160722733, + "learning_rate": 4.2186783547681516e-07, + "loss": 0.0619, + "num_input_tokens_seen": 100228688, + "step": 148710 + }, + { + "epoch": 3.6331321916302253, + "grad_norm": 0.0010442471830174327, + "learning_rate": 4.2179825590716445e-07, + "loss": 0.0, + "num_input_tokens_seen": 100231888, + "step": 148715 + }, + { + "epoch": 3.633254342462072, + "grad_norm": 0.00042447238229215145, + "learning_rate": 4.21728680542352e-07, + "loss": 0.0, + "num_input_tokens_seen": 100235088, + "step": 148720 + }, + { + "epoch": 3.633376493293919, + "grad_norm": 0.0007365150959230959, + "learning_rate": 4.216591093828844e-07, + "loss": 0.0, + "num_input_tokens_seen": 100238352, + "step": 148725 + }, + { + "epoch": 3.6334986441257664, + "grad_norm": 0.002207621233537793, + "learning_rate": 4.21589542429267e-07, + "loss": 0.0, + "num_input_tokens_seen": 100242832, + "step": 148730 + }, + { + "epoch": 3.6336207949576136, + "grad_norm": 0.004213795997202396, + "learning_rate": 4.215199796820064e-07, + "loss": 0.0, + "num_input_tokens_seen": 100245968, + "step": 148735 + }, + { + "epoch": 3.6337429457894608, + "grad_norm": 0.13924796879291534, + "learning_rate": 4.2145042114160776e-07, + "loss": 0.0, + "num_input_tokens_seen": 100249232, + "step": 148740 + }, + { + "epoch": 3.633865096621308, + "grad_norm": 0.06136344373226166, + "learning_rate": 4.213808668085772e-07, + "loss": 0.0, + "num_input_tokens_seen": 100252688, + "step": 148745 + }, + { + "epoch": 3.633987247453155, + "grad_norm": 0.0009907983476296067, + "learning_rate": 4.2131131668342103e-07, + "loss": 0.0, + "num_input_tokens_seen": 100255952, + "step": 148750 + }, + { + "epoch": 3.6341093982850023, + "grad_norm": 0.0021776894573122263, + "learning_rate": 4.212417707666442e-07, + "loss": 0.0, + "num_input_tokens_seen": 100259088, + "step": 148755 + }, + { + "epoch": 3.6342315491168495, + "grad_norm": 0.0004358404839877039, + "learning_rate": 4.2117222905875327e-07, + "loss": 0.0, + "num_input_tokens_seen": 100262416, + "step": 148760 + }, + { + "epoch": 3.6343536999486967, + "grad_norm": 0.03200365602970123, + "learning_rate": 4.2110269156025327e-07, + "loss": 0.0, + "num_input_tokens_seen": 100265936, + "step": 148765 + }, + { + "epoch": 3.634475850780544, + "grad_norm": 0.002977890195325017, + "learning_rate": 4.2103315827165043e-07, + "loss": 0.0, + "num_input_tokens_seen": 100269776, + "step": 148770 + }, + { + "epoch": 3.634598001612391, + "grad_norm": 0.02720186486840248, + "learning_rate": 4.209636291934503e-07, + "loss": 0.0, + "num_input_tokens_seen": 100273616, + "step": 148775 + }, + { + "epoch": 3.6347201524442383, + "grad_norm": 0.0012489106738939881, + "learning_rate": 4.20894104326158e-07, + "loss": 0.0, + "num_input_tokens_seen": 100276880, + "step": 148780 + }, + { + "epoch": 3.634842303276085, + "grad_norm": 0.0008800218929536641, + "learning_rate": 4.2082458367027986e-07, + "loss": 0.0, + "num_input_tokens_seen": 100280208, + "step": 148785 + }, + { + "epoch": 3.6349644541079327, + "grad_norm": 0.0018722382374107838, + "learning_rate": 4.207550672263208e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100283792, + "step": 148790 + }, + { + "epoch": 3.6350866049397794, + "grad_norm": 0.010279770940542221, + "learning_rate": 4.206855549947871e-07, + "loss": 0.0, + "num_input_tokens_seen": 100287184, + "step": 148795 + }, + { + "epoch": 3.635208755771627, + "grad_norm": 0.006634359247982502, + "learning_rate": 4.2061604697618347e-07, + "loss": 0.0, + "num_input_tokens_seen": 100290448, + "step": 148800 + }, + { + "epoch": 3.635330906603474, + "grad_norm": 25.89348030090332, + "learning_rate": 4.205465431710158e-07, + "loss": 0.0433, + "num_input_tokens_seen": 100293520, + "step": 148805 + }, + { + "epoch": 3.635453057435321, + "grad_norm": 0.0019238588865846395, + "learning_rate": 4.2047704357978975e-07, + "loss": 0.0, + "num_input_tokens_seen": 100297168, + "step": 148810 + }, + { + "epoch": 3.635575208267168, + "grad_norm": 0.010663384571671486, + "learning_rate": 4.204075482030103e-07, + "loss": 0.0, + "num_input_tokens_seen": 100301072, + "step": 148815 + }, + { + "epoch": 3.6356973590990154, + "grad_norm": 0.0011268631787970662, + "learning_rate": 4.203380570411833e-07, + "loss": 0.0004, + "num_input_tokens_seen": 100304784, + "step": 148820 + }, + { + "epoch": 3.6358195099308626, + "grad_norm": 0.26588326692581177, + "learning_rate": 4.2026857009481363e-07, + "loss": 0.0922, + "num_input_tokens_seen": 100308112, + "step": 148825 + }, + { + "epoch": 3.6359416607627097, + "grad_norm": 0.0016133766621351242, + "learning_rate": 4.201990873644071e-07, + "loss": 0.0, + "num_input_tokens_seen": 100311248, + "step": 148830 + }, + { + "epoch": 3.636063811594557, + "grad_norm": 25.05052947998047, + "learning_rate": 4.2012960885046846e-07, + "loss": 0.0607, + "num_input_tokens_seen": 100314512, + "step": 148835 + }, + { + "epoch": 3.636185962426404, + "grad_norm": 31.458444595336914, + "learning_rate": 4.200601345535032e-07, + "loss": 0.0953, + "num_input_tokens_seen": 100317712, + "step": 148840 + }, + { + "epoch": 3.6363081132582513, + "grad_norm": 0.004293879494071007, + "learning_rate": 4.1999066447401707e-07, + "loss": 0.0, + "num_input_tokens_seen": 100321040, + "step": 148845 + }, + { + "epoch": 3.6364302640900985, + "grad_norm": 0.17925694584846497, + "learning_rate": 4.1992119861251443e-07, + "loss": 0.0567, + "num_input_tokens_seen": 100324112, + "step": 148850 + }, + { + "epoch": 3.6365524149219457, + "grad_norm": 0.024837162345647812, + "learning_rate": 4.1985173696950125e-07, + "loss": 0.0, + "num_input_tokens_seen": 100327504, + "step": 148855 + }, + { + "epoch": 3.636674565753793, + "grad_norm": 19.125436782836914, + "learning_rate": 4.1978227954548183e-07, + "loss": 0.0235, + "num_input_tokens_seen": 100331152, + "step": 148860 + }, + { + "epoch": 3.63679671658564, + "grad_norm": 0.007109674625098705, + "learning_rate": 4.197128263409622e-07, + "loss": 0.0591, + "num_input_tokens_seen": 100334288, + "step": 148865 + }, + { + "epoch": 3.6369188674174873, + "grad_norm": 0.002395701128989458, + "learning_rate": 4.196433773564465e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100337744, + "step": 148870 + }, + { + "epoch": 3.6370410182493345, + "grad_norm": 0.000553856254555285, + "learning_rate": 4.195739325924407e-07, + "loss": 0.0, + "num_input_tokens_seen": 100341008, + "step": 148875 + }, + { + "epoch": 3.637163169081181, + "grad_norm": 0.0023220237344503403, + "learning_rate": 4.1950449204944905e-07, + "loss": 0.0, + "num_input_tokens_seen": 100344528, + "step": 148880 + }, + { + "epoch": 3.637285319913029, + "grad_norm": 0.0013494978193193674, + "learning_rate": 4.1943505572797713e-07, + "loss": 0.0, + "num_input_tokens_seen": 100347920, + "step": 148885 + }, + { + "epoch": 3.6374074707448756, + "grad_norm": 0.0008220344316214323, + "learning_rate": 4.1936562362852966e-07, + "loss": 0.0, + "num_input_tokens_seen": 100351056, + "step": 148890 + }, + { + "epoch": 3.6375296215767228, + "grad_norm": 0.006795614026486874, + "learning_rate": 4.1929619575161126e-07, + "loss": 0.0, + "num_input_tokens_seen": 100354000, + "step": 148895 + }, + { + "epoch": 3.63765177240857, + "grad_norm": 0.006160368211567402, + "learning_rate": 4.192267720977271e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100357520, + "step": 148900 + }, + { + "epoch": 3.637773923240417, + "grad_norm": 0.0003869648789986968, + "learning_rate": 4.1915735266738237e-07, + "loss": 0.0, + "num_input_tokens_seen": 100360720, + "step": 148905 + }, + { + "epoch": 3.6378960740722643, + "grad_norm": 87.1167221069336, + "learning_rate": 4.190879374610813e-07, + "loss": 0.043, + "num_input_tokens_seen": 100363984, + "step": 148910 + }, + { + "epoch": 3.6380182249041115, + "grad_norm": 0.02374916709959507, + "learning_rate": 4.190185264793292e-07, + "loss": 0.0, + "num_input_tokens_seen": 100367056, + "step": 148915 + }, + { + "epoch": 3.6381403757359587, + "grad_norm": 0.0508439764380455, + "learning_rate": 4.189491197226305e-07, + "loss": 0.0, + "num_input_tokens_seen": 100370384, + "step": 148920 + }, + { + "epoch": 3.638262526567806, + "grad_norm": 0.0017856404883787036, + "learning_rate": 4.188797171914903e-07, + "loss": 0.0, + "num_input_tokens_seen": 100373968, + "step": 148925 + }, + { + "epoch": 3.638384677399653, + "grad_norm": 0.010784510523080826, + "learning_rate": 4.1881031888641285e-07, + "loss": 0.0, + "num_input_tokens_seen": 100377616, + "step": 148930 + }, + { + "epoch": 3.6385068282315003, + "grad_norm": 0.00013639629469253123, + "learning_rate": 4.18740924807903e-07, + "loss": 0.0, + "num_input_tokens_seen": 100381136, + "step": 148935 + }, + { + "epoch": 3.6386289790633475, + "grad_norm": 0.018810724839568138, + "learning_rate": 4.186715349564658e-07, + "loss": 0.0, + "num_input_tokens_seen": 100384592, + "step": 148940 + }, + { + "epoch": 3.6387511298951947, + "grad_norm": 0.6784608960151672, + "learning_rate": 4.186021493326053e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100387664, + "step": 148945 + }, + { + "epoch": 3.638873280727042, + "grad_norm": 0.0006472144741564989, + "learning_rate": 4.185327679368267e-07, + "loss": 0.0, + "num_input_tokens_seen": 100391568, + "step": 148950 + }, + { + "epoch": 3.638995431558889, + "grad_norm": 0.516774594783783, + "learning_rate": 4.184633907696338e-07, + "loss": 0.0002, + "num_input_tokens_seen": 100394704, + "step": 148955 + }, + { + "epoch": 3.6391175823907362, + "grad_norm": 2.933089308498893e-05, + "learning_rate": 4.183940178315315e-07, + "loss": 0.0002, + "num_input_tokens_seen": 100398032, + "step": 148960 + }, + { + "epoch": 3.639239733222583, + "grad_norm": 0.000815013307146728, + "learning_rate": 4.183246491230248e-07, + "loss": 0.0, + "num_input_tokens_seen": 100401296, + "step": 148965 + }, + { + "epoch": 3.6393618840544306, + "grad_norm": 0.021738985553383827, + "learning_rate": 4.1825528464461725e-07, + "loss": 0.0002, + "num_input_tokens_seen": 100404816, + "step": 148970 + }, + { + "epoch": 3.6394840348862774, + "grad_norm": 0.015505907125771046, + "learning_rate": 4.1818592439681413e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100407824, + "step": 148975 + }, + { + "epoch": 3.639606185718125, + "grad_norm": 0.00117449217941612, + "learning_rate": 4.1811656838011946e-07, + "loss": 0.0, + "num_input_tokens_seen": 100410832, + "step": 148980 + }, + { + "epoch": 3.6397283365499717, + "grad_norm": 8.240255556302145e-05, + "learning_rate": 4.180472165950373e-07, + "loss": 0.0671, + "num_input_tokens_seen": 100413968, + "step": 148985 + }, + { + "epoch": 3.639850487381819, + "grad_norm": 0.0006316679064184427, + "learning_rate": 4.1797786904207254e-07, + "loss": 0.0833, + "num_input_tokens_seen": 100418256, + "step": 148990 + }, + { + "epoch": 3.639972638213666, + "grad_norm": 0.0015859379200264812, + "learning_rate": 4.17908525721729e-07, + "loss": 0.0, + "num_input_tokens_seen": 100422416, + "step": 148995 + }, + { + "epoch": 3.6400947890455133, + "grad_norm": 0.052952248603105545, + "learning_rate": 4.178391866345116e-07, + "loss": 0.0003, + "num_input_tokens_seen": 100426000, + "step": 149000 + }, + { + "epoch": 3.6402169398773605, + "grad_norm": 0.21487371623516083, + "learning_rate": 4.1776985178092383e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100429200, + "step": 149005 + }, + { + "epoch": 3.6403390907092077, + "grad_norm": 0.991147518157959, + "learning_rate": 4.177005211614706e-07, + "loss": 0.001, + "num_input_tokens_seen": 100432144, + "step": 149010 + }, + { + "epoch": 3.640461241541055, + "grad_norm": 0.0008949601906351745, + "learning_rate": 4.176311947766555e-07, + "loss": 0.0, + "num_input_tokens_seen": 100435600, + "step": 149015 + }, + { + "epoch": 3.640583392372902, + "grad_norm": 0.000679291202686727, + "learning_rate": 4.1756187262698305e-07, + "loss": 0.0, + "num_input_tokens_seen": 100438800, + "step": 149020 + }, + { + "epoch": 3.6407055432047493, + "grad_norm": 0.003153888275846839, + "learning_rate": 4.1749255471295755e-07, + "loss": 0.0, + "num_input_tokens_seen": 100441680, + "step": 149025 + }, + { + "epoch": 3.6408276940365965, + "grad_norm": 0.00218926346860826, + "learning_rate": 4.174232410350826e-07, + "loss": 0.0, + "num_input_tokens_seen": 100444624, + "step": 149030 + }, + { + "epoch": 3.6409498448684436, + "grad_norm": 0.0038852146826684475, + "learning_rate": 4.173539315938629e-07, + "loss": 0.0, + "num_input_tokens_seen": 100447888, + "step": 149035 + }, + { + "epoch": 3.641071995700291, + "grad_norm": 27.48082733154297, + "learning_rate": 4.1728462638980164e-07, + "loss": 0.0989, + "num_input_tokens_seen": 100451152, + "step": 149040 + }, + { + "epoch": 3.641194146532138, + "grad_norm": 0.004733655601739883, + "learning_rate": 4.172153254234038e-07, + "loss": 0.0, + "num_input_tokens_seen": 100454608, + "step": 149045 + }, + { + "epoch": 3.641316297363985, + "grad_norm": 0.1282656043767929, + "learning_rate": 4.171460286951725e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100458000, + "step": 149050 + }, + { + "epoch": 3.6414384481958324, + "grad_norm": 50.21139907836914, + "learning_rate": 4.17076736205612e-07, + "loss": 0.1442, + "num_input_tokens_seen": 100461776, + "step": 149055 + }, + { + "epoch": 3.641560599027679, + "grad_norm": 0.001987928058952093, + "learning_rate": 4.170074479552266e-07, + "loss": 0.0476, + "num_input_tokens_seen": 100464848, + "step": 149060 + }, + { + "epoch": 3.641682749859527, + "grad_norm": 17.490524291992188, + "learning_rate": 4.1693816394451954e-07, + "loss": 0.0922, + "num_input_tokens_seen": 100468560, + "step": 149065 + }, + { + "epoch": 3.6418049006913735, + "grad_norm": 0.000808164884801954, + "learning_rate": 4.1686888417399537e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100471888, + "step": 149070 + }, + { + "epoch": 3.6419270515232207, + "grad_norm": 0.00014078160165809095, + "learning_rate": 4.167996086441571e-07, + "loss": 0.0, + "num_input_tokens_seen": 100475472, + "step": 149075 + }, + { + "epoch": 3.642049202355068, + "grad_norm": 0.011615934781730175, + "learning_rate": 4.167303373555092e-07, + "loss": 0.0, + "num_input_tokens_seen": 100478608, + "step": 149080 + }, + { + "epoch": 3.642171353186915, + "grad_norm": 0.0010337198618799448, + "learning_rate": 4.1666107030855535e-07, + "loss": 0.0, + "num_input_tokens_seen": 100482384, + "step": 149085 + }, + { + "epoch": 3.6422935040187623, + "grad_norm": 22.374544143676758, + "learning_rate": 4.165918075037986e-07, + "loss": 0.058, + "num_input_tokens_seen": 100485520, + "step": 149090 + }, + { + "epoch": 3.6424156548506095, + "grad_norm": 0.012283634394407272, + "learning_rate": 4.1652254894174357e-07, + "loss": 0.0, + "num_input_tokens_seen": 100488656, + "step": 149095 + }, + { + "epoch": 3.6425378056824567, + "grad_norm": 0.0002704682119656354, + "learning_rate": 4.1645329462289314e-07, + "loss": 0.0716, + "num_input_tokens_seen": 100492240, + "step": 149100 + }, + { + "epoch": 3.642659956514304, + "grad_norm": 0.46763280034065247, + "learning_rate": 4.163840445477517e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100496720, + "step": 149105 + }, + { + "epoch": 3.642782107346151, + "grad_norm": 0.01930251717567444, + "learning_rate": 4.1631479871682195e-07, + "loss": 0.0526, + "num_input_tokens_seen": 100500048, + "step": 149110 + }, + { + "epoch": 3.6429042581779982, + "grad_norm": 0.011264493688941002, + "learning_rate": 4.1624555713060815e-07, + "loss": 0.0, + "num_input_tokens_seen": 100503184, + "step": 149115 + }, + { + "epoch": 3.6430264090098454, + "grad_norm": 0.1804090440273285, + "learning_rate": 4.1617631978961396e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100506384, + "step": 149120 + }, + { + "epoch": 3.6431485598416926, + "grad_norm": 0.007700651418417692, + "learning_rate": 4.1610708669434224e-07, + "loss": 0.0, + "num_input_tokens_seen": 100509584, + "step": 149125 + }, + { + "epoch": 3.64327071067354, + "grad_norm": 0.008980309590697289, + "learning_rate": 4.1603785784529724e-07, + "loss": 0.0002, + "num_input_tokens_seen": 100512592, + "step": 149130 + }, + { + "epoch": 3.643392861505387, + "grad_norm": 0.015934381633996964, + "learning_rate": 4.1596863324298157e-07, + "loss": 0.0, + "num_input_tokens_seen": 100515920, + "step": 149135 + }, + { + "epoch": 3.643515012337234, + "grad_norm": 0.020287154242396355, + "learning_rate": 4.158994128878994e-07, + "loss": 0.0557, + "num_input_tokens_seen": 100519184, + "step": 149140 + }, + { + "epoch": 3.643637163169081, + "grad_norm": 0.008383965119719505, + "learning_rate": 4.158301967805535e-07, + "loss": 0.0, + "num_input_tokens_seen": 100522640, + "step": 149145 + }, + { + "epoch": 3.6437593140009286, + "grad_norm": 0.4676218628883362, + "learning_rate": 4.1576098492144763e-07, + "loss": 0.0002, + "num_input_tokens_seen": 100526672, + "step": 149150 + }, + { + "epoch": 3.6438814648327753, + "grad_norm": 0.09821810573339462, + "learning_rate": 4.1569177731108526e-07, + "loss": 0.0739, + "num_input_tokens_seen": 100530512, + "step": 149155 + }, + { + "epoch": 3.644003615664623, + "grad_norm": 0.16381299495697021, + "learning_rate": 4.1562257394996913e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100533584, + "step": 149160 + }, + { + "epoch": 3.6441257664964697, + "grad_norm": 0.0033917089458554983, + "learning_rate": 4.155533748386032e-07, + "loss": 0.0, + "num_input_tokens_seen": 100536976, + "step": 149165 + }, + { + "epoch": 3.644247917328317, + "grad_norm": 0.0065474179573357105, + "learning_rate": 4.1548417997749e-07, + "loss": 0.0, + "num_input_tokens_seen": 100540304, + "step": 149170 + }, + { + "epoch": 3.644370068160164, + "grad_norm": 0.001454808283597231, + "learning_rate": 4.154149893671334e-07, + "loss": 0.0, + "num_input_tokens_seen": 100543376, + "step": 149175 + }, + { + "epoch": 3.6444922189920113, + "grad_norm": 0.0016932397847995162, + "learning_rate": 4.153458030080358e-07, + "loss": 0.0, + "num_input_tokens_seen": 100546832, + "step": 149180 + }, + { + "epoch": 3.6446143698238584, + "grad_norm": 0.03147650137543678, + "learning_rate": 4.1527662090070113e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100550352, + "step": 149185 + }, + { + "epoch": 3.6447365206557056, + "grad_norm": 0.021766608580946922, + "learning_rate": 4.1520744304563185e-07, + "loss": 0.0, + "num_input_tokens_seen": 100553488, + "step": 149190 + }, + { + "epoch": 3.644858671487553, + "grad_norm": 0.0049246856942772865, + "learning_rate": 4.151382694433316e-07, + "loss": 0.0002, + "num_input_tokens_seen": 100556688, + "step": 149195 + }, + { + "epoch": 3.6449808223194, + "grad_norm": 0.30315637588500977, + "learning_rate": 4.150691000943033e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100560208, + "step": 149200 + }, + { + "epoch": 3.645102973151247, + "grad_norm": 0.01731988787651062, + "learning_rate": 4.149999349990494e-07, + "loss": 0.0347, + "num_input_tokens_seen": 100563344, + "step": 149205 + }, + { + "epoch": 3.6452251239830944, + "grad_norm": 0.11529941856861115, + "learning_rate": 4.149307741580733e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100566224, + "step": 149210 + }, + { + "epoch": 3.6453472748149416, + "grad_norm": 0.08941885083913803, + "learning_rate": 4.148616175718783e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100569488, + "step": 149215 + }, + { + "epoch": 3.6454694256467888, + "grad_norm": 24.89301109313965, + "learning_rate": 4.1479246524096676e-07, + "loss": 0.0093, + "num_input_tokens_seen": 100572880, + "step": 149220 + }, + { + "epoch": 3.645591576478636, + "grad_norm": 0.0017372871516272426, + "learning_rate": 4.147233171658421e-07, + "loss": 0.0, + "num_input_tokens_seen": 100575824, + "step": 149225 + }, + { + "epoch": 3.6457137273104827, + "grad_norm": 0.0009719376685097814, + "learning_rate": 4.146541733470066e-07, + "loss": 0.0451, + "num_input_tokens_seen": 100579600, + "step": 149230 + }, + { + "epoch": 3.6458358781423303, + "grad_norm": 0.08995156735181808, + "learning_rate": 4.145850337849637e-07, + "loss": 0.0, + "num_input_tokens_seen": 100583376, + "step": 149235 + }, + { + "epoch": 3.645958028974177, + "grad_norm": 0.005203651264309883, + "learning_rate": 4.145158984802155e-07, + "loss": 0.0614, + "num_input_tokens_seen": 100586640, + "step": 149240 + }, + { + "epoch": 3.6460801798060247, + "grad_norm": 0.012616139836609364, + "learning_rate": 4.144467674332651e-07, + "loss": 0.0, + "num_input_tokens_seen": 100590160, + "step": 149245 + }, + { + "epoch": 3.6462023306378715, + "grad_norm": 0.008399611338973045, + "learning_rate": 4.143776406446158e-07, + "loss": 0.0, + "num_input_tokens_seen": 100593488, + "step": 149250 + }, + { + "epoch": 3.6463244814697187, + "grad_norm": 0.0006929049268364906, + "learning_rate": 4.143085181147694e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100596432, + "step": 149255 + }, + { + "epoch": 3.646446632301566, + "grad_norm": 0.0006431890651583672, + "learning_rate": 4.142393998442294e-07, + "loss": 0.0002, + "num_input_tokens_seen": 100599696, + "step": 149260 + }, + { + "epoch": 3.646568783133413, + "grad_norm": 0.0022328917402774096, + "learning_rate": 4.1417028583349766e-07, + "loss": 0.0, + "num_input_tokens_seen": 100603088, + "step": 149265 + }, + { + "epoch": 3.6466909339652602, + "grad_norm": 0.0004925087559968233, + "learning_rate": 4.1410117608307716e-07, + "loss": 0.0, + "num_input_tokens_seen": 100606352, + "step": 149270 + }, + { + "epoch": 3.6468130847971074, + "grad_norm": 0.0036744000390172005, + "learning_rate": 4.140320705934708e-07, + "loss": 0.0, + "num_input_tokens_seen": 100609808, + "step": 149275 + }, + { + "epoch": 3.6469352356289546, + "grad_norm": 0.013988408260047436, + "learning_rate": 4.1396296936518047e-07, + "loss": 0.0, + "num_input_tokens_seen": 100613072, + "step": 149280 + }, + { + "epoch": 3.647057386460802, + "grad_norm": 0.022471310570836067, + "learning_rate": 4.1389387239870945e-07, + "loss": 0.0648, + "num_input_tokens_seen": 100616336, + "step": 149285 + }, + { + "epoch": 3.647179537292649, + "grad_norm": 0.026236383244395256, + "learning_rate": 4.138247796945599e-07, + "loss": 0.0003, + "num_input_tokens_seen": 100619408, + "step": 149290 + }, + { + "epoch": 3.647301688124496, + "grad_norm": 0.01007220707833767, + "learning_rate": 4.1375569125323374e-07, + "loss": 0.0005, + "num_input_tokens_seen": 100622928, + "step": 149295 + }, + { + "epoch": 3.6474238389563434, + "grad_norm": 0.020251065492630005, + "learning_rate": 4.136866070752343e-07, + "loss": 0.0, + "num_input_tokens_seen": 100626128, + "step": 149300 + }, + { + "epoch": 3.6475459897881906, + "grad_norm": 0.007953628897666931, + "learning_rate": 4.1361752716106315e-07, + "loss": 0.0801, + "num_input_tokens_seen": 100629584, + "step": 149305 + }, + { + "epoch": 3.6476681406200377, + "grad_norm": 0.0007968613062985241, + "learning_rate": 4.1354845151122344e-07, + "loss": 0.0563, + "num_input_tokens_seen": 100632848, + "step": 149310 + }, + { + "epoch": 3.647790291451885, + "grad_norm": 0.023661449551582336, + "learning_rate": 4.1347938012621675e-07, + "loss": 0.0, + "num_input_tokens_seen": 100636432, + "step": 149315 + }, + { + "epoch": 3.647912442283732, + "grad_norm": 0.0034785429015755653, + "learning_rate": 4.1341031300654615e-07, + "loss": 0.0, + "num_input_tokens_seen": 100639952, + "step": 149320 + }, + { + "epoch": 3.648034593115579, + "grad_norm": 0.0030977909918874502, + "learning_rate": 4.1334125015271316e-07, + "loss": 0.0, + "num_input_tokens_seen": 100643344, + "step": 149325 + }, + { + "epoch": 3.6481567439474265, + "grad_norm": 0.00491897389292717, + "learning_rate": 4.1327219156522043e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100646608, + "step": 149330 + }, + { + "epoch": 3.6482788947792733, + "grad_norm": 0.005906306207180023, + "learning_rate": 4.1320313724457046e-07, + "loss": 0.0, + "num_input_tokens_seen": 100650320, + "step": 149335 + }, + { + "epoch": 3.648401045611121, + "grad_norm": 0.01143131498247385, + "learning_rate": 4.1313408719126475e-07, + "loss": 0.0, + "num_input_tokens_seen": 100653648, + "step": 149340 + }, + { + "epoch": 3.6485231964429676, + "grad_norm": 0.000916936609428376, + "learning_rate": 4.130650414058061e-07, + "loss": 0.0, + "num_input_tokens_seen": 100656784, + "step": 149345 + }, + { + "epoch": 3.648645347274815, + "grad_norm": 0.00964375026524067, + "learning_rate": 4.1299599988869606e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100660304, + "step": 149350 + }, + { + "epoch": 3.648767498106662, + "grad_norm": 0.004201000090688467, + "learning_rate": 4.1292696264043724e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100663312, + "step": 149355 + }, + { + "epoch": 3.648889648938509, + "grad_norm": 0.0007439203909598291, + "learning_rate": 4.128579296615312e-07, + "loss": 0.0, + "num_input_tokens_seen": 100666448, + "step": 149360 + }, + { + "epoch": 3.6490117997703564, + "grad_norm": 0.005541152320802212, + "learning_rate": 4.127889009524802e-07, + "loss": 0.0, + "num_input_tokens_seen": 100669712, + "step": 149365 + }, + { + "epoch": 3.6491339506022036, + "grad_norm": 0.001362864044494927, + "learning_rate": 4.127198765137866e-07, + "loss": 0.0009, + "num_input_tokens_seen": 100672976, + "step": 149370 + }, + { + "epoch": 3.6492561014340508, + "grad_norm": 0.017730576917529106, + "learning_rate": 4.1265085634595167e-07, + "loss": 0.0, + "num_input_tokens_seen": 100676944, + "step": 149375 + }, + { + "epoch": 3.649378252265898, + "grad_norm": 0.029552794992923737, + "learning_rate": 4.125818404494781e-07, + "loss": 0.0, + "num_input_tokens_seen": 100680784, + "step": 149380 + }, + { + "epoch": 3.649500403097745, + "grad_norm": 0.01852906122803688, + "learning_rate": 4.125128288248669e-07, + "loss": 0.0009, + "num_input_tokens_seen": 100683856, + "step": 149385 + }, + { + "epoch": 3.6496225539295923, + "grad_norm": 0.460417240858078, + "learning_rate": 4.1244382147262093e-07, + "loss": 0.0002, + "num_input_tokens_seen": 100687376, + "step": 149390 + }, + { + "epoch": 3.6497447047614395, + "grad_norm": 0.014944472350180149, + "learning_rate": 4.123748183932414e-07, + "loss": 0.0, + "num_input_tokens_seen": 100690832, + "step": 149395 + }, + { + "epoch": 3.6498668555932867, + "grad_norm": 0.006090358830988407, + "learning_rate": 4.1230581958723e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100694544, + "step": 149400 + }, + { + "epoch": 3.649989006425134, + "grad_norm": 0.022321002557873726, + "learning_rate": 4.122368250550889e-07, + "loss": 0.0003, + "num_input_tokens_seen": 100697616, + "step": 149405 + }, + { + "epoch": 3.6501111572569807, + "grad_norm": 0.0012846055906265974, + "learning_rate": 4.121678347973195e-07, + "loss": 0.0, + "num_input_tokens_seen": 100700816, + "step": 149410 + }, + { + "epoch": 3.6502333080888283, + "grad_norm": 0.0003858658601529896, + "learning_rate": 4.1209884881442413e-07, + "loss": 0.0162, + "num_input_tokens_seen": 100703824, + "step": 149415 + }, + { + "epoch": 3.650355458920675, + "grad_norm": 0.0029156107921153307, + "learning_rate": 4.1202986710690356e-07, + "loss": 0.0, + "num_input_tokens_seen": 100707408, + "step": 149420 + }, + { + "epoch": 3.6504776097525227, + "grad_norm": 18.649917602539062, + "learning_rate": 4.1196088967526e-07, + "loss": 0.0527, + "num_input_tokens_seen": 100711056, + "step": 149425 + }, + { + "epoch": 3.6505997605843694, + "grad_norm": 0.012175104580819607, + "learning_rate": 4.1189191651999546e-07, + "loss": 0.0, + "num_input_tokens_seen": 100714448, + "step": 149430 + }, + { + "epoch": 3.6507219114162166, + "grad_norm": 0.00016269886691588908, + "learning_rate": 4.118229476416106e-07, + "loss": 0.0348, + "num_input_tokens_seen": 100717840, + "step": 149435 + }, + { + "epoch": 3.650844062248064, + "grad_norm": 0.004647035151720047, + "learning_rate": 4.1175398304060783e-07, + "loss": 0.0, + "num_input_tokens_seen": 100721424, + "step": 149440 + }, + { + "epoch": 3.650966213079911, + "grad_norm": 0.0009960465831682086, + "learning_rate": 4.1168502271748794e-07, + "loss": 0.0, + "num_input_tokens_seen": 100725008, + "step": 149445 + }, + { + "epoch": 3.651088363911758, + "grad_norm": 0.0012097298167645931, + "learning_rate": 4.1161606667275327e-07, + "loss": 0.0, + "num_input_tokens_seen": 100728656, + "step": 149450 + }, + { + "epoch": 3.6512105147436054, + "grad_norm": 0.002824085298925638, + "learning_rate": 4.1154711490690443e-07, + "loss": 0.0, + "num_input_tokens_seen": 100731664, + "step": 149455 + }, + { + "epoch": 3.6513326655754526, + "grad_norm": 0.0038389968685805798, + "learning_rate": 4.1147816742044317e-07, + "loss": 0.0, + "num_input_tokens_seen": 100734800, + "step": 149460 + }, + { + "epoch": 3.6514548164072997, + "grad_norm": 0.0008183844038285315, + "learning_rate": 4.1140922421387137e-07, + "loss": 0.0, + "num_input_tokens_seen": 100738384, + "step": 149465 + }, + { + "epoch": 3.651576967239147, + "grad_norm": 0.08280424773693085, + "learning_rate": 4.113402852876897e-07, + "loss": 0.0, + "num_input_tokens_seen": 100741712, + "step": 149470 + }, + { + "epoch": 3.651699118070994, + "grad_norm": 0.0005090846680104733, + "learning_rate": 4.1127135064240003e-07, + "loss": 0.0, + "num_input_tokens_seen": 100744848, + "step": 149475 + }, + { + "epoch": 3.6518212689028413, + "grad_norm": 0.015799952670931816, + "learning_rate": 4.112024202785033e-07, + "loss": 0.0, + "num_input_tokens_seen": 100748176, + "step": 149480 + }, + { + "epoch": 3.6519434197346885, + "grad_norm": 0.0010082110529765487, + "learning_rate": 4.1113349419650113e-07, + "loss": 0.0, + "num_input_tokens_seen": 100751696, + "step": 149485 + }, + { + "epoch": 3.6520655705665357, + "grad_norm": 0.010985535569489002, + "learning_rate": 4.1106457239689417e-07, + "loss": 0.0, + "num_input_tokens_seen": 100755408, + "step": 149490 + }, + { + "epoch": 3.652187721398383, + "grad_norm": 0.0007222816930152476, + "learning_rate": 4.109956548801845e-07, + "loss": 0.0, + "num_input_tokens_seen": 100758416, + "step": 149495 + }, + { + "epoch": 3.65230987223023, + "grad_norm": 0.0009421857539564371, + "learning_rate": 4.1092674164687247e-07, + "loss": 0.0, + "num_input_tokens_seen": 100761296, + "step": 149500 + }, + { + "epoch": 3.652432023062077, + "grad_norm": 0.0004014758742414415, + "learning_rate": 4.1085783269746e-07, + "loss": 0.0, + "num_input_tokens_seen": 100764432, + "step": 149505 + }, + { + "epoch": 3.6525541738939244, + "grad_norm": 0.003766036592423916, + "learning_rate": 4.107889280324478e-07, + "loss": 0.0, + "num_input_tokens_seen": 100767824, + "step": 149510 + }, + { + "epoch": 3.652676324725771, + "grad_norm": 0.006593499332666397, + "learning_rate": 4.107200276523367e-07, + "loss": 0.0, + "num_input_tokens_seen": 100771344, + "step": 149515 + }, + { + "epoch": 3.6527984755576184, + "grad_norm": 0.26415514945983887, + "learning_rate": 4.10651131557628e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100774864, + "step": 149520 + }, + { + "epoch": 3.6529206263894656, + "grad_norm": 0.0013655334478244185, + "learning_rate": 4.105822397488231e-07, + "loss": 0.0313, + "num_input_tokens_seen": 100777808, + "step": 149525 + }, + { + "epoch": 3.6530427772213128, + "grad_norm": 0.01848735846579075, + "learning_rate": 4.1051335222642247e-07, + "loss": 0.0, + "num_input_tokens_seen": 100781712, + "step": 149530 + }, + { + "epoch": 3.65316492805316, + "grad_norm": 0.0029542753472924232, + "learning_rate": 4.1044446899092756e-07, + "loss": 0.0, + "num_input_tokens_seen": 100785360, + "step": 149535 + }, + { + "epoch": 3.653287078885007, + "grad_norm": 0.10529420524835587, + "learning_rate": 4.1037559004283863e-07, + "loss": 0.0672, + "num_input_tokens_seen": 100788368, + "step": 149540 + }, + { + "epoch": 3.6534092297168543, + "grad_norm": 0.3563624918460846, + "learning_rate": 4.103067153826575e-07, + "loss": 0.0004, + "num_input_tokens_seen": 100791824, + "step": 149545 + }, + { + "epoch": 3.6535313805487015, + "grad_norm": 0.046180255711078644, + "learning_rate": 4.1023784501088407e-07, + "loss": 0.0, + "num_input_tokens_seen": 100795344, + "step": 149550 + }, + { + "epoch": 3.6536535313805487, + "grad_norm": 0.0006740966928191483, + "learning_rate": 4.101689789280197e-07, + "loss": 0.0, + "num_input_tokens_seen": 100798800, + "step": 149555 + }, + { + "epoch": 3.653775682212396, + "grad_norm": 0.011169800534844398, + "learning_rate": 4.101001171345655e-07, + "loss": 0.0002, + "num_input_tokens_seen": 100802448, + "step": 149560 + }, + { + "epoch": 3.653897833044243, + "grad_norm": 0.0009329328895546496, + "learning_rate": 4.100312596310216e-07, + "loss": 0.0, + "num_input_tokens_seen": 100806032, + "step": 149565 + }, + { + "epoch": 3.6540199838760903, + "grad_norm": 0.010429616086184978, + "learning_rate": 4.0996240641788936e-07, + "loss": 0.0, + "num_input_tokens_seen": 100809360, + "step": 149570 + }, + { + "epoch": 3.6541421347079375, + "grad_norm": 0.0010302531300112605, + "learning_rate": 4.0989355749566887e-07, + "loss": 0.0, + "num_input_tokens_seen": 100812880, + "step": 149575 + }, + { + "epoch": 3.6542642855397847, + "grad_norm": 42.75357437133789, + "learning_rate": 4.098247128648611e-07, + "loss": 0.0536, + "num_input_tokens_seen": 100816528, + "step": 149580 + }, + { + "epoch": 3.654386436371632, + "grad_norm": 0.0007744549075141549, + "learning_rate": 4.097558725259672e-07, + "loss": 0.0, + "num_input_tokens_seen": 100819664, + "step": 149585 + }, + { + "epoch": 3.6545085872034786, + "grad_norm": 0.002218458568677306, + "learning_rate": 4.09687036479487e-07, + "loss": 0.0, + "num_input_tokens_seen": 100823504, + "step": 149590 + }, + { + "epoch": 3.6546307380353262, + "grad_norm": 0.0005056411027908325, + "learning_rate": 4.0961820472592167e-07, + "loss": 0.0002, + "num_input_tokens_seen": 100827024, + "step": 149595 + }, + { + "epoch": 3.654752888867173, + "grad_norm": 0.0013489199336618185, + "learning_rate": 4.0954937726577165e-07, + "loss": 0.0455, + "num_input_tokens_seen": 100830352, + "step": 149600 + }, + { + "epoch": 3.6548750396990206, + "grad_norm": 0.001258478732779622, + "learning_rate": 4.094805540995371e-07, + "loss": 0.0, + "num_input_tokens_seen": 100834000, + "step": 149605 + }, + { + "epoch": 3.6549971905308674, + "grad_norm": 0.00742685841396451, + "learning_rate": 4.09411735227719e-07, + "loss": 0.0474, + "num_input_tokens_seen": 100837008, + "step": 149610 + }, + { + "epoch": 3.6551193413627145, + "grad_norm": 0.016420450061559677, + "learning_rate": 4.0934292065081733e-07, + "loss": 0.0267, + "num_input_tokens_seen": 100840848, + "step": 149615 + }, + { + "epoch": 3.6552414921945617, + "grad_norm": 0.01596057042479515, + "learning_rate": 4.0927411036933314e-07, + "loss": 0.0, + "num_input_tokens_seen": 100843984, + "step": 149620 + }, + { + "epoch": 3.655363643026409, + "grad_norm": 0.006191764958202839, + "learning_rate": 4.092053043837661e-07, + "loss": 0.0, + "num_input_tokens_seen": 100847504, + "step": 149625 + }, + { + "epoch": 3.655485793858256, + "grad_norm": 0.01052805408835411, + "learning_rate": 4.091365026946174e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100850640, + "step": 149630 + }, + { + "epoch": 3.6556079446901033, + "grad_norm": 0.002658533863723278, + "learning_rate": 4.0906770530238667e-07, + "loss": 0.0, + "num_input_tokens_seen": 100854736, + "step": 149635 + }, + { + "epoch": 3.6557300955219505, + "grad_norm": 0.030460629612207413, + "learning_rate": 4.089989122075748e-07, + "loss": 0.0, + "num_input_tokens_seen": 100858000, + "step": 149640 + }, + { + "epoch": 3.6558522463537977, + "grad_norm": 0.021629929542541504, + "learning_rate": 4.0893012341068146e-07, + "loss": 0.1434, + "num_input_tokens_seen": 100861136, + "step": 149645 + }, + { + "epoch": 3.655974397185645, + "grad_norm": 0.0006607657414861023, + "learning_rate": 4.088613389122072e-07, + "loss": 0.0, + "num_input_tokens_seen": 100864848, + "step": 149650 + }, + { + "epoch": 3.656096548017492, + "grad_norm": 0.013342432677745819, + "learning_rate": 4.087925587126527e-07, + "loss": 0.0012, + "num_input_tokens_seen": 100868240, + "step": 149655 + }, + { + "epoch": 3.6562186988493393, + "grad_norm": 0.3374849855899811, + "learning_rate": 4.087237828125174e-07, + "loss": 0.0003, + "num_input_tokens_seen": 100871888, + "step": 149660 + }, + { + "epoch": 3.6563408496811864, + "grad_norm": 0.006206023972481489, + "learning_rate": 4.0865501121230205e-07, + "loss": 0.0, + "num_input_tokens_seen": 100875344, + "step": 149665 + }, + { + "epoch": 3.6564630005130336, + "grad_norm": 1.4731436967849731, + "learning_rate": 4.085862439125063e-07, + "loss": 0.0003, + "num_input_tokens_seen": 100878544, + "step": 149670 + }, + { + "epoch": 3.656585151344881, + "grad_norm": 0.0007857258315198123, + "learning_rate": 4.0851748091363036e-07, + "loss": 0.0, + "num_input_tokens_seen": 100881616, + "step": 149675 + }, + { + "epoch": 3.656707302176728, + "grad_norm": 0.0003224741667509079, + "learning_rate": 4.084487222161748e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100885072, + "step": 149680 + }, + { + "epoch": 3.6568294530085748, + "grad_norm": 0.008007667027413845, + "learning_rate": 4.0837996782063876e-07, + "loss": 0.0, + "num_input_tokens_seen": 100888592, + "step": 149685 + }, + { + "epoch": 3.6569516038404224, + "grad_norm": 0.15671700239181519, + "learning_rate": 4.083112177275232e-07, + "loss": 0.0005, + "num_input_tokens_seen": 100891984, + "step": 149690 + }, + { + "epoch": 3.657073754672269, + "grad_norm": 0.041414983570575714, + "learning_rate": 4.082424719373272e-07, + "loss": 0.0309, + "num_input_tokens_seen": 100895760, + "step": 149695 + }, + { + "epoch": 3.6571959055041163, + "grad_norm": 0.0028408959042280912, + "learning_rate": 4.0817373045055134e-07, + "loss": 0.0, + "num_input_tokens_seen": 100899600, + "step": 149700 + }, + { + "epoch": 3.6573180563359635, + "grad_norm": 0.017122983932495117, + "learning_rate": 4.0810499326769557e-07, + "loss": 0.0974, + "num_input_tokens_seen": 100902672, + "step": 149705 + }, + { + "epoch": 3.6574402071678107, + "grad_norm": 0.00024130615929607302, + "learning_rate": 4.080362603892589e-07, + "loss": 0.0, + "num_input_tokens_seen": 100905872, + "step": 149710 + }, + { + "epoch": 3.657562357999658, + "grad_norm": 0.015229341574013233, + "learning_rate": 4.079675318157423e-07, + "loss": 0.0, + "num_input_tokens_seen": 100909200, + "step": 149715 + }, + { + "epoch": 3.657684508831505, + "grad_norm": 0.051498375833034515, + "learning_rate": 4.078988075476445e-07, + "loss": 0.0, + "num_input_tokens_seen": 100912144, + "step": 149720 + }, + { + "epoch": 3.6578066596633523, + "grad_norm": 0.04249678924679756, + "learning_rate": 4.0783008758546633e-07, + "loss": 0.0, + "num_input_tokens_seen": 100915536, + "step": 149725 + }, + { + "epoch": 3.6579288104951995, + "grad_norm": 0.008442615158855915, + "learning_rate": 4.0776137192970664e-07, + "loss": 0.0, + "num_input_tokens_seen": 100918544, + "step": 149730 + }, + { + "epoch": 3.6580509613270467, + "grad_norm": 0.004928910173475742, + "learning_rate": 4.076926605808655e-07, + "loss": 0.0, + "num_input_tokens_seen": 100921744, + "step": 149735 + }, + { + "epoch": 3.658173112158894, + "grad_norm": 0.02472076192498207, + "learning_rate": 4.0762395353944303e-07, + "loss": 0.0, + "num_input_tokens_seen": 100924944, + "step": 149740 + }, + { + "epoch": 3.658295262990741, + "grad_norm": 0.22950170934200287, + "learning_rate": 4.075552508059382e-07, + "loss": 0.0002, + "num_input_tokens_seen": 100928656, + "step": 149745 + }, + { + "epoch": 3.6584174138225882, + "grad_norm": 0.0042465124279260635, + "learning_rate": 4.0748655238085115e-07, + "loss": 0.0, + "num_input_tokens_seen": 100931920, + "step": 149750 + }, + { + "epoch": 3.6585395646544354, + "grad_norm": 0.10099215805530548, + "learning_rate": 4.074178582646811e-07, + "loss": 0.0, + "num_input_tokens_seen": 100935312, + "step": 149755 + }, + { + "epoch": 3.6586617154862826, + "grad_norm": 0.1387917548418045, + "learning_rate": 4.07349168457928e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100938832, + "step": 149760 + }, + { + "epoch": 3.65878386631813, + "grad_norm": 0.001386315911076963, + "learning_rate": 4.0728048296109084e-07, + "loss": 0.0, + "num_input_tokens_seen": 100942096, + "step": 149765 + }, + { + "epoch": 3.6589060171499765, + "grad_norm": 0.005705169402062893, + "learning_rate": 4.072118017746694e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100945424, + "step": 149770 + }, + { + "epoch": 3.659028167981824, + "grad_norm": 0.0016926834359765053, + "learning_rate": 4.0714312489916347e-07, + "loss": 0.0, + "num_input_tokens_seen": 100948880, + "step": 149775 + }, + { + "epoch": 3.659150318813671, + "grad_norm": 0.013110811822116375, + "learning_rate": 4.07074452335072e-07, + "loss": 0.0, + "num_input_tokens_seen": 100954448, + "step": 149780 + }, + { + "epoch": 3.6592724696455186, + "grad_norm": 0.030060919001698494, + "learning_rate": 4.0700578408289477e-07, + "loss": 0.0, + "num_input_tokens_seen": 100958288, + "step": 149785 + }, + { + "epoch": 3.6593946204773653, + "grad_norm": 0.0444689504802227, + "learning_rate": 4.069371201431308e-07, + "loss": 0.0514, + "num_input_tokens_seen": 100961296, + "step": 149790 + }, + { + "epoch": 3.6595167713092125, + "grad_norm": 0.0010941436048597097, + "learning_rate": 4.068684605162798e-07, + "loss": 0.0, + "num_input_tokens_seen": 100964432, + "step": 149795 + }, + { + "epoch": 3.6596389221410597, + "grad_norm": 0.01235596090555191, + "learning_rate": 4.067998052028406e-07, + "loss": 0.0, + "num_input_tokens_seen": 100967824, + "step": 149800 + }, + { + "epoch": 3.659761072972907, + "grad_norm": 0.11753348261117935, + "learning_rate": 4.0673115420331315e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100971472, + "step": 149805 + }, + { + "epoch": 3.659883223804754, + "grad_norm": 0.0018995238933712244, + "learning_rate": 4.0666250751819597e-07, + "loss": 0.0, + "num_input_tokens_seen": 100974864, + "step": 149810 + }, + { + "epoch": 3.6600053746366012, + "grad_norm": 0.00396025599911809, + "learning_rate": 4.0659386514798887e-07, + "loss": 0.0, + "num_input_tokens_seen": 100978576, + "step": 149815 + }, + { + "epoch": 3.6601275254684484, + "grad_norm": 0.0511053092777729, + "learning_rate": 4.065252270931909e-07, + "loss": 0.0, + "num_input_tokens_seen": 100982032, + "step": 149820 + }, + { + "epoch": 3.6602496763002956, + "grad_norm": 0.0021754498593509197, + "learning_rate": 4.0645659335430073e-07, + "loss": 0.0, + "num_input_tokens_seen": 100985296, + "step": 149825 + }, + { + "epoch": 3.660371827132143, + "grad_norm": 0.047641050070524216, + "learning_rate": 4.063879639318178e-07, + "loss": 0.0001, + "num_input_tokens_seen": 100988496, + "step": 149830 + }, + { + "epoch": 3.66049397796399, + "grad_norm": 0.001030646963045001, + "learning_rate": 4.063193388262417e-07, + "loss": 0.0, + "num_input_tokens_seen": 100991696, + "step": 149835 + }, + { + "epoch": 3.660616128795837, + "grad_norm": 0.0016205195570364594, + "learning_rate": 4.062507180380707e-07, + "loss": 0.0006, + "num_input_tokens_seen": 100995088, + "step": 149840 + }, + { + "epoch": 3.6607382796276844, + "grad_norm": 0.003273698966950178, + "learning_rate": 4.061821015678044e-07, + "loss": 0.0, + "num_input_tokens_seen": 100998224, + "step": 149845 + }, + { + "epoch": 3.6608604304595316, + "grad_norm": 0.00023401729413308203, + "learning_rate": 4.061134894159413e-07, + "loss": 0.0, + "num_input_tokens_seen": 101001872, + "step": 149850 + }, + { + "epoch": 3.6609825812913783, + "grad_norm": 0.006904236972332001, + "learning_rate": 4.06044881582981e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101005328, + "step": 149855 + }, + { + "epoch": 3.661104732123226, + "grad_norm": 0.0033744508400559425, + "learning_rate": 4.059762780694217e-07, + "loss": 0.0, + "num_input_tokens_seen": 101008720, + "step": 149860 + }, + { + "epoch": 3.6612268829550727, + "grad_norm": 0.0004270582285244018, + "learning_rate": 4.059076788757627e-07, + "loss": 0.0002, + "num_input_tokens_seen": 101012112, + "step": 149865 + }, + { + "epoch": 3.6613490337869203, + "grad_norm": 0.0033920772839337587, + "learning_rate": 4.058390840025032e-07, + "loss": 0.0, + "num_input_tokens_seen": 101015248, + "step": 149870 + }, + { + "epoch": 3.661471184618767, + "grad_norm": 0.004283586982637644, + "learning_rate": 4.0577049345014137e-07, + "loss": 0.06, + "num_input_tokens_seen": 101018128, + "step": 149875 + }, + { + "epoch": 3.6615933354506143, + "grad_norm": 0.00031772934016771615, + "learning_rate": 4.057019072191766e-07, + "loss": 0.0, + "num_input_tokens_seen": 101021136, + "step": 149880 + }, + { + "epoch": 3.6617154862824615, + "grad_norm": 0.022333379834890366, + "learning_rate": 4.056333253101072e-07, + "loss": 0.0003, + "num_input_tokens_seen": 101024848, + "step": 149885 + }, + { + "epoch": 3.6618376371143087, + "grad_norm": 0.3499573767185211, + "learning_rate": 4.0556474772343194e-07, + "loss": 0.0003, + "num_input_tokens_seen": 101028176, + "step": 149890 + }, + { + "epoch": 3.661959787946156, + "grad_norm": 0.015845805406570435, + "learning_rate": 4.0549617445965023e-07, + "loss": 0.0, + "num_input_tokens_seen": 101031248, + "step": 149895 + }, + { + "epoch": 3.662081938778003, + "grad_norm": 0.0006837969995103776, + "learning_rate": 4.0542760551925983e-07, + "loss": 0.0366, + "num_input_tokens_seen": 101034960, + "step": 149900 + }, + { + "epoch": 3.66220408960985, + "grad_norm": 0.014118622057139874, + "learning_rate": 4.053590409027602e-07, + "loss": 0.0, + "num_input_tokens_seen": 101038224, + "step": 149905 + }, + { + "epoch": 3.6623262404416974, + "grad_norm": 0.0033027688041329384, + "learning_rate": 4.052904806106495e-07, + "loss": 0.0, + "num_input_tokens_seen": 101041360, + "step": 149910 + }, + { + "epoch": 3.6624483912735446, + "grad_norm": 0.18004614114761353, + "learning_rate": 4.052219246434261e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101044816, + "step": 149915 + }, + { + "epoch": 3.662570542105392, + "grad_norm": 0.002139299176633358, + "learning_rate": 4.0515337300158914e-07, + "loss": 0.0, + "num_input_tokens_seen": 101047824, + "step": 149920 + }, + { + "epoch": 3.662692692937239, + "grad_norm": 0.00020565226441249251, + "learning_rate": 4.050848256856365e-07, + "loss": 0.0975, + "num_input_tokens_seen": 101051280, + "step": 149925 + }, + { + "epoch": 3.662814843769086, + "grad_norm": 0.003223554929718375, + "learning_rate": 4.0501628269606735e-07, + "loss": 0.0, + "num_input_tokens_seen": 101055056, + "step": 149930 + }, + { + "epoch": 3.6629369946009334, + "grad_norm": 51.59830093383789, + "learning_rate": 4.049477440333795e-07, + "loss": 0.001, + "num_input_tokens_seen": 101058512, + "step": 149935 + }, + { + "epoch": 3.6630591454327806, + "grad_norm": 0.0013691794592887163, + "learning_rate": 4.04879209698072e-07, + "loss": 0.0299, + "num_input_tokens_seen": 101061648, + "step": 149940 + }, + { + "epoch": 3.6631812962646277, + "grad_norm": 0.0034709975589066744, + "learning_rate": 4.048106796906426e-07, + "loss": 0.0, + "num_input_tokens_seen": 101065360, + "step": 149945 + }, + { + "epoch": 3.6633034470964745, + "grad_norm": 0.0040048398077487946, + "learning_rate": 4.047421540115905e-07, + "loss": 0.0, + "num_input_tokens_seen": 101068880, + "step": 149950 + }, + { + "epoch": 3.663425597928322, + "grad_norm": 0.08709995448589325, + "learning_rate": 4.0467363266141317e-07, + "loss": 0.0, + "num_input_tokens_seen": 101071888, + "step": 149955 + }, + { + "epoch": 3.663547748760169, + "grad_norm": 0.004366215784102678, + "learning_rate": 4.046051156406093e-07, + "loss": 0.0, + "num_input_tokens_seen": 101075472, + "step": 149960 + }, + { + "epoch": 3.6636698995920165, + "grad_norm": 0.0002989302738569677, + "learning_rate": 4.045366029496774e-07, + "loss": 0.0, + "num_input_tokens_seen": 101078672, + "step": 149965 + }, + { + "epoch": 3.6637920504238632, + "grad_norm": 0.002677259035408497, + "learning_rate": 4.044680945891152e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101082064, + "step": 149970 + }, + { + "epoch": 3.6639142012557104, + "grad_norm": 0.011708454228937626, + "learning_rate": 4.0439959055942163e-07, + "loss": 0.0, + "num_input_tokens_seen": 101085520, + "step": 149975 + }, + { + "epoch": 3.6640363520875576, + "grad_norm": 0.00015541176253464073, + "learning_rate": 4.0433109086109407e-07, + "loss": 0.0875, + "num_input_tokens_seen": 101089168, + "step": 149980 + }, + { + "epoch": 3.664158502919405, + "grad_norm": 0.0014233732363209128, + "learning_rate": 4.042625954946309e-07, + "loss": 0.0, + "num_input_tokens_seen": 101092560, + "step": 149985 + }, + { + "epoch": 3.664280653751252, + "grad_norm": 0.013304512947797775, + "learning_rate": 4.0419410446053095e-07, + "loss": 0.0839, + "num_input_tokens_seen": 101096400, + "step": 149990 + }, + { + "epoch": 3.664402804583099, + "grad_norm": 0.0023726148065179586, + "learning_rate": 4.0412561775929123e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101099856, + "step": 149995 + }, + { + "epoch": 3.6645249554149464, + "grad_norm": 0.0012767596635967493, + "learning_rate": 4.040571353914106e-07, + "loss": 0.0, + "num_input_tokens_seen": 101102992, + "step": 150000 + }, + { + "epoch": 3.6646471062467936, + "grad_norm": 0.002920180559158325, + "learning_rate": 4.039886573573864e-07, + "loss": 0.0, + "num_input_tokens_seen": 101106320, + "step": 150005 + }, + { + "epoch": 3.6647692570786408, + "grad_norm": 0.00018298810755368322, + "learning_rate": 4.039201836577175e-07, + "loss": 0.0, + "num_input_tokens_seen": 101109328, + "step": 150010 + }, + { + "epoch": 3.664891407910488, + "grad_norm": 0.01957639306783676, + "learning_rate": 4.038517142929012e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101112400, + "step": 150015 + }, + { + "epoch": 3.665013558742335, + "grad_norm": 0.0002295639569638297, + "learning_rate": 4.037832492634353e-07, + "loss": 0.0, + "num_input_tokens_seen": 101115408, + "step": 150020 + }, + { + "epoch": 3.6651357095741823, + "grad_norm": 0.00030080214492045343, + "learning_rate": 4.0371478856981834e-07, + "loss": 0.0, + "num_input_tokens_seen": 101118800, + "step": 150025 + }, + { + "epoch": 3.6652578604060295, + "grad_norm": 0.0009421844151802361, + "learning_rate": 4.036463322125474e-07, + "loss": 0.0, + "num_input_tokens_seen": 101122064, + "step": 150030 + }, + { + "epoch": 3.6653800112378763, + "grad_norm": 0.0010937204351648688, + "learning_rate": 4.0357788019212116e-07, + "loss": 0.0813, + "num_input_tokens_seen": 101125776, + "step": 150035 + }, + { + "epoch": 3.665502162069724, + "grad_norm": 0.005235959775745869, + "learning_rate": 4.0350943250903657e-07, + "loss": 0.0, + "num_input_tokens_seen": 101129168, + "step": 150040 + }, + { + "epoch": 3.6656243129015706, + "grad_norm": 0.0004736567207146436, + "learning_rate": 4.034409891637919e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101132880, + "step": 150045 + }, + { + "epoch": 3.6657464637334183, + "grad_norm": 0.00039471962372772396, + "learning_rate": 4.033725501568851e-07, + "loss": 0.0, + "num_input_tokens_seen": 101136272, + "step": 150050 + }, + { + "epoch": 3.665868614565265, + "grad_norm": 0.006417789030820131, + "learning_rate": 4.0330411548881325e-07, + "loss": 0.0, + "num_input_tokens_seen": 101139728, + "step": 150055 + }, + { + "epoch": 3.665990765397112, + "grad_norm": 0.006520419381558895, + "learning_rate": 4.032356851600748e-07, + "loss": 0.0, + "num_input_tokens_seen": 101142608, + "step": 150060 + }, + { + "epoch": 3.6661129162289594, + "grad_norm": 0.000228529519517906, + "learning_rate": 4.0316725917116645e-07, + "loss": 0.0004, + "num_input_tokens_seen": 101145936, + "step": 150065 + }, + { + "epoch": 3.6662350670608066, + "grad_norm": 0.00034973511355929077, + "learning_rate": 4.030988375225868e-07, + "loss": 0.0, + "num_input_tokens_seen": 101149840, + "step": 150070 + }, + { + "epoch": 3.666357217892654, + "grad_norm": 0.003269769949838519, + "learning_rate": 4.0303042021483256e-07, + "loss": 0.0, + "num_input_tokens_seen": 101153168, + "step": 150075 + }, + { + "epoch": 3.666479368724501, + "grad_norm": 0.00031599405338056386, + "learning_rate": 4.029620072484017e-07, + "loss": 0.0, + "num_input_tokens_seen": 101156560, + "step": 150080 + }, + { + "epoch": 3.666601519556348, + "grad_norm": 0.013983500190079212, + "learning_rate": 4.02893598623792e-07, + "loss": 0.0, + "num_input_tokens_seen": 101160208, + "step": 150085 + }, + { + "epoch": 3.6667236703881954, + "grad_norm": 0.0007696465472690761, + "learning_rate": 4.028251943415003e-07, + "loss": 0.0, + "num_input_tokens_seen": 101163792, + "step": 150090 + }, + { + "epoch": 3.6668458212200425, + "grad_norm": 0.012345520779490471, + "learning_rate": 4.027567944020248e-07, + "loss": 0.0, + "num_input_tokens_seen": 101167184, + "step": 150095 + }, + { + "epoch": 3.6669679720518897, + "grad_norm": 0.0010798462899401784, + "learning_rate": 4.0268839880586214e-07, + "loss": 0.0269, + "num_input_tokens_seen": 101170448, + "step": 150100 + }, + { + "epoch": 3.667090122883737, + "grad_norm": 0.004565829876810312, + "learning_rate": 4.026200075535104e-07, + "loss": 0.0, + "num_input_tokens_seen": 101173712, + "step": 150105 + }, + { + "epoch": 3.667212273715584, + "grad_norm": 0.0010166483698412776, + "learning_rate": 4.0255162064546644e-07, + "loss": 0.0, + "num_input_tokens_seen": 101177296, + "step": 150110 + }, + { + "epoch": 3.6673344245474313, + "grad_norm": 0.9598413705825806, + "learning_rate": 4.0248323808222803e-07, + "loss": 0.0008, + "num_input_tokens_seen": 101181072, + "step": 150115 + }, + { + "epoch": 3.6674565753792785, + "grad_norm": 0.0005369943683035672, + "learning_rate": 4.024148598642919e-07, + "loss": 0.0, + "num_input_tokens_seen": 101184016, + "step": 150120 + }, + { + "epoch": 3.6675787262111257, + "grad_norm": 0.01920846663415432, + "learning_rate": 4.0234648599215606e-07, + "loss": 0.0, + "num_input_tokens_seen": 101186832, + "step": 150125 + }, + { + "epoch": 3.6677008770429724, + "grad_norm": 0.002244969131425023, + "learning_rate": 4.022781164663173e-07, + "loss": 0.0321, + "num_input_tokens_seen": 101189968, + "step": 150130 + }, + { + "epoch": 3.66782302787482, + "grad_norm": 0.1661972850561142, + "learning_rate": 4.0220975128727244e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101193232, + "step": 150135 + }, + { + "epoch": 3.667945178706667, + "grad_norm": 0.00025612005265429616, + "learning_rate": 4.02141390455519e-07, + "loss": 0.0334, + "num_input_tokens_seen": 101196368, + "step": 150140 + }, + { + "epoch": 3.668067329538514, + "grad_norm": 0.004735205322504044, + "learning_rate": 4.0207303397155467e-07, + "loss": 0.0002, + "num_input_tokens_seen": 101199824, + "step": 150145 + }, + { + "epoch": 3.668189480370361, + "grad_norm": 0.0003075416316278279, + "learning_rate": 4.020046818358755e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101203088, + "step": 150150 + }, + { + "epoch": 3.6683116312022084, + "grad_norm": 47.425045013427734, + "learning_rate": 4.0193633404897973e-07, + "loss": 0.1163, + "num_input_tokens_seen": 101206800, + "step": 150155 + }, + { + "epoch": 3.6684337820340556, + "grad_norm": 0.0007772819953970611, + "learning_rate": 4.0186799061136334e-07, + "loss": 0.1, + "num_input_tokens_seen": 101209872, + "step": 150160 + }, + { + "epoch": 3.6685559328659028, + "grad_norm": 0.001310055493377149, + "learning_rate": 4.0179965152352413e-07, + "loss": 0.0268, + "num_input_tokens_seen": 101213648, + "step": 150165 + }, + { + "epoch": 3.66867808369775, + "grad_norm": 0.0007551017333753407, + "learning_rate": 4.0173131678595837e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101216912, + "step": 150170 + }, + { + "epoch": 3.668800234529597, + "grad_norm": 0.018435023725032806, + "learning_rate": 4.016629863991634e-07, + "loss": 0.0371, + "num_input_tokens_seen": 101221136, + "step": 150175 + }, + { + "epoch": 3.6689223853614443, + "grad_norm": 49.14809036254883, + "learning_rate": 4.0159466036363654e-07, + "loss": 0.0526, + "num_input_tokens_seen": 101224720, + "step": 150180 + }, + { + "epoch": 3.6690445361932915, + "grad_norm": 0.005340251140296459, + "learning_rate": 4.0152633867987383e-07, + "loss": 0.0, + "num_input_tokens_seen": 101228368, + "step": 150185 + }, + { + "epoch": 3.6691666870251387, + "grad_norm": 1.001558780670166, + "learning_rate": 4.01458021348373e-07, + "loss": 0.0, + "num_input_tokens_seen": 101231888, + "step": 150190 + }, + { + "epoch": 3.669288837856986, + "grad_norm": 0.00035684643080458045, + "learning_rate": 4.0138970836963006e-07, + "loss": 0.0383, + "num_input_tokens_seen": 101235088, + "step": 150195 + }, + { + "epoch": 3.669410988688833, + "grad_norm": 0.0006180881755426526, + "learning_rate": 4.0132139974414247e-07, + "loss": 0.0201, + "num_input_tokens_seen": 101238352, + "step": 150200 + }, + { + "epoch": 3.6695331395206803, + "grad_norm": 0.002350478433072567, + "learning_rate": 4.012530954724064e-07, + "loss": 0.0, + "num_input_tokens_seen": 101241552, + "step": 150205 + }, + { + "epoch": 3.6696552903525275, + "grad_norm": 0.0037568961270153522, + "learning_rate": 4.011847955549188e-07, + "loss": 0.075, + "num_input_tokens_seen": 101244816, + "step": 150210 + }, + { + "epoch": 3.669777441184374, + "grad_norm": 0.0006394311785697937, + "learning_rate": 4.011164999921768e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101248080, + "step": 150215 + }, + { + "epoch": 3.669899592016222, + "grad_norm": 0.0004662454593926668, + "learning_rate": 4.010482087846766e-07, + "loss": 0.0, + "num_input_tokens_seen": 101251856, + "step": 150220 + }, + { + "epoch": 3.6700217428480686, + "grad_norm": 0.004368456080555916, + "learning_rate": 4.0097992193291474e-07, + "loss": 0.0, + "num_input_tokens_seen": 101255376, + "step": 150225 + }, + { + "epoch": 3.6701438936799162, + "grad_norm": 0.009613312780857086, + "learning_rate": 4.0091163943738825e-07, + "loss": 0.0, + "num_input_tokens_seen": 101258640, + "step": 150230 + }, + { + "epoch": 3.670266044511763, + "grad_norm": 0.0005940728588029742, + "learning_rate": 4.008433612985931e-07, + "loss": 0.0, + "num_input_tokens_seen": 101261648, + "step": 150235 + }, + { + "epoch": 3.67038819534361, + "grad_norm": 16.313629150390625, + "learning_rate": 4.007750875170266e-07, + "loss": 0.0373, + "num_input_tokens_seen": 101264976, + "step": 150240 + }, + { + "epoch": 3.6705103461754574, + "grad_norm": 0.3082471787929535, + "learning_rate": 4.007068180931844e-07, + "loss": 0.0002, + "num_input_tokens_seen": 101268176, + "step": 150245 + }, + { + "epoch": 3.6706324970073045, + "grad_norm": 0.004585791379213333, + "learning_rate": 4.006385530275638e-07, + "loss": 0.0457, + "num_input_tokens_seen": 101271120, + "step": 150250 + }, + { + "epoch": 3.6707546478391517, + "grad_norm": 0.000260774337220937, + "learning_rate": 4.005702923206605e-07, + "loss": 0.0, + "num_input_tokens_seen": 101274320, + "step": 150255 + }, + { + "epoch": 3.670876798670999, + "grad_norm": 9.134371794061735e-05, + "learning_rate": 4.005020359729716e-07, + "loss": 0.0, + "num_input_tokens_seen": 101277840, + "step": 150260 + }, + { + "epoch": 3.670998949502846, + "grad_norm": 0.00024886432220228016, + "learning_rate": 4.0043378398499286e-07, + "loss": 0.0, + "num_input_tokens_seen": 101280784, + "step": 150265 + }, + { + "epoch": 3.6711211003346933, + "grad_norm": 0.4519248604774475, + "learning_rate": 4.0036553635722083e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101284176, + "step": 150270 + }, + { + "epoch": 3.6712432511665405, + "grad_norm": 0.00011098060349468142, + "learning_rate": 4.0029729309015224e-07, + "loss": 0.0, + "num_input_tokens_seen": 101287184, + "step": 150275 + }, + { + "epoch": 3.6713654019983877, + "grad_norm": 0.0005101999267935753, + "learning_rate": 4.0022905418428275e-07, + "loss": 0.0, + "num_input_tokens_seen": 101290512, + "step": 150280 + }, + { + "epoch": 3.671487552830235, + "grad_norm": 0.00131460081320256, + "learning_rate": 4.0016081964010927e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101294608, + "step": 150285 + }, + { + "epoch": 3.671609703662082, + "grad_norm": 0.005014736205339432, + "learning_rate": 4.000925894581272e-07, + "loss": 0.0, + "num_input_tokens_seen": 101298128, + "step": 150290 + }, + { + "epoch": 3.6717318544939292, + "grad_norm": 0.005378643982112408, + "learning_rate": 4.000243636388332e-07, + "loss": 0.0, + "num_input_tokens_seen": 101301456, + "step": 150295 + }, + { + "epoch": 3.671854005325776, + "grad_norm": 0.0007074220338836312, + "learning_rate": 3.9995614218272377e-07, + "loss": 0.0, + "num_input_tokens_seen": 101305040, + "step": 150300 + }, + { + "epoch": 3.6719761561576236, + "grad_norm": 0.34038740396499634, + "learning_rate": 3.9988792509029435e-07, + "loss": 0.0002, + "num_input_tokens_seen": 101308240, + "step": 150305 + }, + { + "epoch": 3.6720983069894704, + "grad_norm": 0.044417813420295715, + "learning_rate": 3.998197123620417e-07, + "loss": 0.0, + "num_input_tokens_seen": 101311504, + "step": 150310 + }, + { + "epoch": 3.672220457821318, + "grad_norm": 0.0010615488281473517, + "learning_rate": 3.997515039984611e-07, + "loss": 0.0, + "num_input_tokens_seen": 101314896, + "step": 150315 + }, + { + "epoch": 3.6723426086531648, + "grad_norm": 0.02319425158202648, + "learning_rate": 3.9968330000004944e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101318032, + "step": 150320 + }, + { + "epoch": 3.672464759485012, + "grad_norm": 0.0005021728575229645, + "learning_rate": 3.9961510036730227e-07, + "loss": 0.0002, + "num_input_tokens_seen": 101321808, + "step": 150325 + }, + { + "epoch": 3.672586910316859, + "grad_norm": 0.0013200391549617052, + "learning_rate": 3.995469051007152e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101325008, + "step": 150330 + }, + { + "epoch": 3.6727090611487063, + "grad_norm": 0.000464181590359658, + "learning_rate": 3.9947871420078495e-07, + "loss": 0.0, + "num_input_tokens_seen": 101328144, + "step": 150335 + }, + { + "epoch": 3.6728312119805535, + "grad_norm": 0.0006909414078108966, + "learning_rate": 3.9941052766800656e-07, + "loss": 0.0, + "num_input_tokens_seen": 101331536, + "step": 150340 + }, + { + "epoch": 3.6729533628124007, + "grad_norm": 0.0009814815130084753, + "learning_rate": 3.993423455028767e-07, + "loss": 0.0, + "num_input_tokens_seen": 101335120, + "step": 150345 + }, + { + "epoch": 3.673075513644248, + "grad_norm": 0.0008539154659956694, + "learning_rate": 3.992741677058906e-07, + "loss": 0.0451, + "num_input_tokens_seen": 101338320, + "step": 150350 + }, + { + "epoch": 3.673197664476095, + "grad_norm": 0.0025035631842911243, + "learning_rate": 3.9920599427754465e-07, + "loss": 0.0, + "num_input_tokens_seen": 101341776, + "step": 150355 + }, + { + "epoch": 3.6733198153079423, + "grad_norm": 0.00038199685513973236, + "learning_rate": 3.991378252183339e-07, + "loss": 0.0002, + "num_input_tokens_seen": 101345680, + "step": 150360 + }, + { + "epoch": 3.6734419661397895, + "grad_norm": 0.0012773294001817703, + "learning_rate": 3.9906966052875457e-07, + "loss": 0.0, + "num_input_tokens_seen": 101349008, + "step": 150365 + }, + { + "epoch": 3.6735641169716367, + "grad_norm": 0.00031368996133096516, + "learning_rate": 3.990015002093027e-07, + "loss": 0.0, + "num_input_tokens_seen": 101352336, + "step": 150370 + }, + { + "epoch": 3.673686267803484, + "grad_norm": 0.0028718202374875546, + "learning_rate": 3.989333442604731e-07, + "loss": 0.0, + "num_input_tokens_seen": 101355536, + "step": 150375 + }, + { + "epoch": 3.673808418635331, + "grad_norm": 0.014222218655049801, + "learning_rate": 3.988651926827623e-07, + "loss": 0.0466, + "num_input_tokens_seen": 101358416, + "step": 150380 + }, + { + "epoch": 3.673930569467178, + "grad_norm": 0.00805890653282404, + "learning_rate": 3.9879704547666517e-07, + "loss": 0.0488, + "num_input_tokens_seen": 101361424, + "step": 150385 + }, + { + "epoch": 3.6740527202990254, + "grad_norm": 0.0034299599938094616, + "learning_rate": 3.987289026426776e-07, + "loss": 0.0, + "num_input_tokens_seen": 101364432, + "step": 150390 + }, + { + "epoch": 3.674174871130872, + "grad_norm": 0.011425524950027466, + "learning_rate": 3.9866076418129545e-07, + "loss": 0.0, + "num_input_tokens_seen": 101368272, + "step": 150395 + }, + { + "epoch": 3.67429702196272, + "grad_norm": 0.011008653789758682, + "learning_rate": 3.985926300930137e-07, + "loss": 0.0297, + "num_input_tokens_seen": 101371536, + "step": 150400 + }, + { + "epoch": 3.6744191727945665, + "grad_norm": 0.0011469714809209108, + "learning_rate": 3.985245003783284e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101375568, + "step": 150405 + }, + { + "epoch": 3.674541323626414, + "grad_norm": 0.0007156103383749723, + "learning_rate": 3.9845637503773443e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101378768, + "step": 150410 + }, + { + "epoch": 3.674663474458261, + "grad_norm": 0.0012320448877289891, + "learning_rate": 3.9838825407172784e-07, + "loss": 0.062, + "num_input_tokens_seen": 101381776, + "step": 150415 + }, + { + "epoch": 3.674785625290108, + "grad_norm": 0.0015208465047180653, + "learning_rate": 3.983201374808033e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101384656, + "step": 150420 + }, + { + "epoch": 3.6749077761219553, + "grad_norm": 0.04762888699769974, + "learning_rate": 3.982520252654569e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101388048, + "step": 150425 + }, + { + "epoch": 3.6750299269538025, + "grad_norm": 0.009869548492133617, + "learning_rate": 3.981839174261833e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101391184, + "step": 150430 + }, + { + "epoch": 3.6751520777856497, + "grad_norm": 0.0018234923481941223, + "learning_rate": 3.9811581396347835e-07, + "loss": 0.0388, + "num_input_tokens_seen": 101394512, + "step": 150435 + }, + { + "epoch": 3.675274228617497, + "grad_norm": 0.0007763886242173612, + "learning_rate": 3.9804771487783727e-07, + "loss": 0.0, + "num_input_tokens_seen": 101397904, + "step": 150440 + }, + { + "epoch": 3.675396379449344, + "grad_norm": 0.002413820242509246, + "learning_rate": 3.9797962016975463e-07, + "loss": 0.0, + "num_input_tokens_seen": 101401232, + "step": 150445 + }, + { + "epoch": 3.6755185302811912, + "grad_norm": 0.014111106283962727, + "learning_rate": 3.979115298397262e-07, + "loss": 0.0004, + "num_input_tokens_seen": 101404944, + "step": 150450 + }, + { + "epoch": 3.6756406811130384, + "grad_norm": 0.010848580859601498, + "learning_rate": 3.978434438882474e-07, + "loss": 0.0917, + "num_input_tokens_seen": 101408208, + "step": 150455 + }, + { + "epoch": 3.6757628319448856, + "grad_norm": 77.76248931884766, + "learning_rate": 3.9777536231581265e-07, + "loss": 0.0012, + "num_input_tokens_seen": 101411536, + "step": 150460 + }, + { + "epoch": 3.675884982776733, + "grad_norm": 0.002751800697296858, + "learning_rate": 3.9770728512291785e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101414736, + "step": 150465 + }, + { + "epoch": 3.67600713360858, + "grad_norm": 0.002995507325977087, + "learning_rate": 3.9763921231005726e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101418128, + "step": 150470 + }, + { + "epoch": 3.676129284440427, + "grad_norm": 0.031024383381009102, + "learning_rate": 3.975711438777267e-07, + "loss": 0.0, + "num_input_tokens_seen": 101421520, + "step": 150475 + }, + { + "epoch": 3.676251435272274, + "grad_norm": 0.17898677289485931, + "learning_rate": 3.975030798264205e-07, + "loss": 0.0003, + "num_input_tokens_seen": 101425232, + "step": 150480 + }, + { + "epoch": 3.6763735861041216, + "grad_norm": 0.01304326206445694, + "learning_rate": 3.974350201566339e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101428304, + "step": 150485 + }, + { + "epoch": 3.6764957369359683, + "grad_norm": 0.0001925857359310612, + "learning_rate": 3.973669648688622e-07, + "loss": 0.0, + "num_input_tokens_seen": 101431568, + "step": 150490 + }, + { + "epoch": 3.676617887767816, + "grad_norm": 0.0064493948593735695, + "learning_rate": 3.9729891396359984e-07, + "loss": 0.0365, + "num_input_tokens_seen": 101435216, + "step": 150495 + }, + { + "epoch": 3.6767400385996627, + "grad_norm": 0.0003160308697260916, + "learning_rate": 3.9723086744134216e-07, + "loss": 0.0, + "num_input_tokens_seen": 101438480, + "step": 150500 + }, + { + "epoch": 3.67686218943151, + "grad_norm": 51.7661018371582, + "learning_rate": 3.971628253025834e-07, + "loss": 0.1552, + "num_input_tokens_seen": 101442064, + "step": 150505 + }, + { + "epoch": 3.676984340263357, + "grad_norm": 0.05616138502955437, + "learning_rate": 3.97094787547819e-07, + "loss": 0.0, + "num_input_tokens_seen": 101445328, + "step": 150510 + }, + { + "epoch": 3.6771064910952043, + "grad_norm": 0.0024035891983658075, + "learning_rate": 3.9702675417754317e-07, + "loss": 0.0, + "num_input_tokens_seen": 101448464, + "step": 150515 + }, + { + "epoch": 3.6772286419270515, + "grad_norm": 0.06979092210531235, + "learning_rate": 3.969587251922509e-07, + "loss": 0.0, + "num_input_tokens_seen": 101451984, + "step": 150520 + }, + { + "epoch": 3.6773507927588986, + "grad_norm": 0.0018006553873419762, + "learning_rate": 3.9689070059243745e-07, + "loss": 0.0, + "num_input_tokens_seen": 101455568, + "step": 150525 + }, + { + "epoch": 3.677472943590746, + "grad_norm": 0.004438464529812336, + "learning_rate": 3.96822680378597e-07, + "loss": 0.0, + "num_input_tokens_seen": 101458768, + "step": 150530 + }, + { + "epoch": 3.677595094422593, + "grad_norm": 0.004643600899726152, + "learning_rate": 3.967546645512239e-07, + "loss": 0.0, + "num_input_tokens_seen": 101462352, + "step": 150535 + }, + { + "epoch": 3.67771724525444, + "grad_norm": 0.004531925544142723, + "learning_rate": 3.9668665311081337e-07, + "loss": 0.0, + "num_input_tokens_seen": 101465424, + "step": 150540 + }, + { + "epoch": 3.6778393960862874, + "grad_norm": 0.001109713688492775, + "learning_rate": 3.966186460578596e-07, + "loss": 0.0, + "num_input_tokens_seen": 101468752, + "step": 150545 + }, + { + "epoch": 3.6779615469181346, + "grad_norm": 27.36225128173828, + "learning_rate": 3.965506433928576e-07, + "loss": 0.0332, + "num_input_tokens_seen": 101471824, + "step": 150550 + }, + { + "epoch": 3.678083697749982, + "grad_norm": 0.0016479621408507228, + "learning_rate": 3.9648264511630125e-07, + "loss": 0.0, + "num_input_tokens_seen": 101475344, + "step": 150555 + }, + { + "epoch": 3.678205848581829, + "grad_norm": 0.0013086326653137803, + "learning_rate": 3.964146512286858e-07, + "loss": 0.0, + "num_input_tokens_seen": 101478544, + "step": 150560 + }, + { + "epoch": 3.678327999413676, + "grad_norm": 0.006203626748174429, + "learning_rate": 3.96346661730505e-07, + "loss": 0.0, + "num_input_tokens_seen": 101482448, + "step": 150565 + }, + { + "epoch": 3.6784501502455234, + "grad_norm": 0.0003723462868947536, + "learning_rate": 3.9627867662225403e-07, + "loss": 0.0, + "num_input_tokens_seen": 101485776, + "step": 150570 + }, + { + "epoch": 3.67857230107737, + "grad_norm": 44.98166275024414, + "learning_rate": 3.962106959044265e-07, + "loss": 0.0854, + "num_input_tokens_seen": 101488784, + "step": 150575 + }, + { + "epoch": 3.6786944519092177, + "grad_norm": 0.0014090328477323055, + "learning_rate": 3.961427195775171e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101491984, + "step": 150580 + }, + { + "epoch": 3.6788166027410645, + "grad_norm": 0.010927320457994938, + "learning_rate": 3.9607474764202073e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101495632, + "step": 150585 + }, + { + "epoch": 3.6789387535729117, + "grad_norm": 0.007265997119247913, + "learning_rate": 3.960067800984309e-07, + "loss": 0.075, + "num_input_tokens_seen": 101499536, + "step": 150590 + }, + { + "epoch": 3.679060904404759, + "grad_norm": 0.4248805642127991, + "learning_rate": 3.9593881694724253e-07, + "loss": 0.0002, + "num_input_tokens_seen": 101502800, + "step": 150595 + }, + { + "epoch": 3.679183055236606, + "grad_norm": 0.00709194503724575, + "learning_rate": 3.958708581889493e-07, + "loss": 0.0, + "num_input_tokens_seen": 101506384, + "step": 150600 + }, + { + "epoch": 3.6793052060684532, + "grad_norm": 0.005410181358456612, + "learning_rate": 3.9580290382404546e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101510160, + "step": 150605 + }, + { + "epoch": 3.6794273569003004, + "grad_norm": 0.004404694773256779, + "learning_rate": 3.957349538530259e-07, + "loss": 0.0004, + "num_input_tokens_seen": 101513872, + "step": 150610 + }, + { + "epoch": 3.6795495077321476, + "grad_norm": 0.004392651841044426, + "learning_rate": 3.95667008276384e-07, + "loss": 0.0, + "num_input_tokens_seen": 101516816, + "step": 150615 + }, + { + "epoch": 3.679671658563995, + "grad_norm": 0.0029403301887214184, + "learning_rate": 3.9559906709461445e-07, + "loss": 0.0, + "num_input_tokens_seen": 101520208, + "step": 150620 + }, + { + "epoch": 3.679793809395842, + "grad_norm": 0.010532950982451439, + "learning_rate": 3.9553113030821064e-07, + "loss": 0.0, + "num_input_tokens_seen": 101523792, + "step": 150625 + }, + { + "epoch": 3.679915960227689, + "grad_norm": 0.0017830339493229985, + "learning_rate": 3.954631979176675e-07, + "loss": 0.0, + "num_input_tokens_seen": 101526992, + "step": 150630 + }, + { + "epoch": 3.6800381110595364, + "grad_norm": 0.027786199003458023, + "learning_rate": 3.953952699234785e-07, + "loss": 0.0, + "num_input_tokens_seen": 101530320, + "step": 150635 + }, + { + "epoch": 3.6801602618913836, + "grad_norm": 0.07802188396453857, + "learning_rate": 3.953273463261374e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101533712, + "step": 150640 + }, + { + "epoch": 3.6802824127232308, + "grad_norm": 0.05071675032377243, + "learning_rate": 3.952594271261388e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101536976, + "step": 150645 + }, + { + "epoch": 3.680404563555078, + "grad_norm": 0.0006053565302863717, + "learning_rate": 3.95191512323976e-07, + "loss": 0.0, + "num_input_tokens_seen": 101540240, + "step": 150650 + }, + { + "epoch": 3.680526714386925, + "grad_norm": 0.0005544578889384866, + "learning_rate": 3.951236019201435e-07, + "loss": 0.0, + "num_input_tokens_seen": 101543696, + "step": 150655 + }, + { + "epoch": 3.680648865218772, + "grad_norm": 0.002754826098680496, + "learning_rate": 3.9505569591513444e-07, + "loss": 0.0, + "num_input_tokens_seen": 101546768, + "step": 150660 + }, + { + "epoch": 3.6807710160506195, + "grad_norm": 0.007763294503092766, + "learning_rate": 3.949877943094435e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101550032, + "step": 150665 + }, + { + "epoch": 3.6808931668824663, + "grad_norm": 0.05538984388113022, + "learning_rate": 3.949198971035638e-07, + "loss": 0.0, + "num_input_tokens_seen": 101553040, + "step": 150670 + }, + { + "epoch": 3.681015317714314, + "grad_norm": 0.0809285044670105, + "learning_rate": 3.9485200429798914e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101556560, + "step": 150675 + }, + { + "epoch": 3.6811374685461606, + "grad_norm": 0.0003654407919384539, + "learning_rate": 3.947841158932139e-07, + "loss": 0.0, + "num_input_tokens_seen": 101560400, + "step": 150680 + }, + { + "epoch": 3.681259619378008, + "grad_norm": 0.0008546562166884542, + "learning_rate": 3.9471623188973115e-07, + "loss": 0.0, + "num_input_tokens_seen": 101564432, + "step": 150685 + }, + { + "epoch": 3.681381770209855, + "grad_norm": 5.210339546203613, + "learning_rate": 3.9464835228803494e-07, + "loss": 0.0006, + "num_input_tokens_seen": 101567248, + "step": 150690 + }, + { + "epoch": 3.681503921041702, + "grad_norm": 0.0010287740733474493, + "learning_rate": 3.945804770886184e-07, + "loss": 0.0, + "num_input_tokens_seen": 101570512, + "step": 150695 + }, + { + "epoch": 3.6816260718735494, + "grad_norm": 0.003941098693758249, + "learning_rate": 3.9451260629197557e-07, + "loss": 0.0, + "num_input_tokens_seen": 101573520, + "step": 150700 + }, + { + "epoch": 3.6817482227053966, + "grad_norm": 0.005360405892133713, + "learning_rate": 3.9444473989860017e-07, + "loss": 0.0, + "num_input_tokens_seen": 101576720, + "step": 150705 + }, + { + "epoch": 3.681870373537244, + "grad_norm": 0.001114573678933084, + "learning_rate": 3.943768779089852e-07, + "loss": 0.0, + "num_input_tokens_seen": 101580368, + "step": 150710 + }, + { + "epoch": 3.681992524369091, + "grad_norm": 0.002144193509593606, + "learning_rate": 3.943090203236248e-07, + "loss": 0.0002, + "num_input_tokens_seen": 101583696, + "step": 150715 + }, + { + "epoch": 3.682114675200938, + "grad_norm": 0.000986748025752604, + "learning_rate": 3.942411671430118e-07, + "loss": 0.0, + "num_input_tokens_seen": 101587600, + "step": 150720 + }, + { + "epoch": 3.6822368260327853, + "grad_norm": 0.006151706911623478, + "learning_rate": 3.941733183676402e-07, + "loss": 0.0, + "num_input_tokens_seen": 101590992, + "step": 150725 + }, + { + "epoch": 3.6823589768646325, + "grad_norm": 0.025515733286738396, + "learning_rate": 3.94105473998003e-07, + "loss": 0.0, + "num_input_tokens_seen": 101594512, + "step": 150730 + }, + { + "epoch": 3.6824811276964797, + "grad_norm": 0.15389606356620789, + "learning_rate": 3.94037634034594e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101598352, + "step": 150735 + }, + { + "epoch": 3.682603278528327, + "grad_norm": 0.0027224128134548664, + "learning_rate": 3.9396979847790603e-07, + "loss": 0.0, + "num_input_tokens_seen": 101601872, + "step": 150740 + }, + { + "epoch": 3.682725429360174, + "grad_norm": 0.014924484305083752, + "learning_rate": 3.9390196732843294e-07, + "loss": 0.0, + "num_input_tokens_seen": 101605456, + "step": 150745 + }, + { + "epoch": 3.6828475801920213, + "grad_norm": 0.015203488990664482, + "learning_rate": 3.9383414058666784e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101608912, + "step": 150750 + }, + { + "epoch": 3.682969731023868, + "grad_norm": 0.3671652674674988, + "learning_rate": 3.9376631825310345e-07, + "loss": 0.0692, + "num_input_tokens_seen": 101612240, + "step": 150755 + }, + { + "epoch": 3.6830918818557157, + "grad_norm": 0.01368219405412674, + "learning_rate": 3.936985003282336e-07, + "loss": 0.0, + "num_input_tokens_seen": 101615312, + "step": 150760 + }, + { + "epoch": 3.6832140326875624, + "grad_norm": 0.001394348219037056, + "learning_rate": 3.936306868125516e-07, + "loss": 0.0, + "num_input_tokens_seen": 101618896, + "step": 150765 + }, + { + "epoch": 3.6833361835194096, + "grad_norm": 0.007090140599757433, + "learning_rate": 3.9356287770654993e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101622096, + "step": 150770 + }, + { + "epoch": 3.683458334351257, + "grad_norm": 0.00044982563122175634, + "learning_rate": 3.934950730107226e-07, + "loss": 0.0349, + "num_input_tokens_seen": 101625680, + "step": 150775 + }, + { + "epoch": 3.683580485183104, + "grad_norm": 23.51737403869629, + "learning_rate": 3.9342727272556186e-07, + "loss": 0.0235, + "num_input_tokens_seen": 101628816, + "step": 150780 + }, + { + "epoch": 3.683702636014951, + "grad_norm": 0.005765157286077738, + "learning_rate": 3.933594768515615e-07, + "loss": 0.0, + "num_input_tokens_seen": 101632144, + "step": 150785 + }, + { + "epoch": 3.6838247868467984, + "grad_norm": 0.0034340538550168276, + "learning_rate": 3.932916853892138e-07, + "loss": 0.0, + "num_input_tokens_seen": 101636304, + "step": 150790 + }, + { + "epoch": 3.6839469376786456, + "grad_norm": 0.011951752938330173, + "learning_rate": 3.9322389833901205e-07, + "loss": 0.0, + "num_input_tokens_seen": 101640208, + "step": 150795 + }, + { + "epoch": 3.6840690885104928, + "grad_norm": 0.0030271399300545454, + "learning_rate": 3.931561157014498e-07, + "loss": 0.0, + "num_input_tokens_seen": 101643472, + "step": 150800 + }, + { + "epoch": 3.68419123934234, + "grad_norm": 0.002439115894958377, + "learning_rate": 3.930883374770191e-07, + "loss": 0.0512, + "num_input_tokens_seen": 101647696, + "step": 150805 + }, + { + "epoch": 3.684313390174187, + "grad_norm": 0.004594883415848017, + "learning_rate": 3.9302056366621363e-07, + "loss": 0.0, + "num_input_tokens_seen": 101651088, + "step": 150810 + }, + { + "epoch": 3.6844355410060343, + "grad_norm": 0.0011603219900280237, + "learning_rate": 3.929527942695254e-07, + "loss": 0.0938, + "num_input_tokens_seen": 101654480, + "step": 150815 + }, + { + "epoch": 3.6845576918378815, + "grad_norm": 0.0017992982175201178, + "learning_rate": 3.9288502928744824e-07, + "loss": 0.0, + "num_input_tokens_seen": 101657552, + "step": 150820 + }, + { + "epoch": 3.6846798426697287, + "grad_norm": 0.009408014826476574, + "learning_rate": 3.9281726872047403e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101660816, + "step": 150825 + }, + { + "epoch": 3.684801993501576, + "grad_norm": 0.003088391851633787, + "learning_rate": 3.927495125690959e-07, + "loss": 0.0728, + "num_input_tokens_seen": 101663952, + "step": 150830 + }, + { + "epoch": 3.684924144333423, + "grad_norm": 0.030103838071227074, + "learning_rate": 3.926817608338071e-07, + "loss": 0.0, + "num_input_tokens_seen": 101667408, + "step": 150835 + }, + { + "epoch": 3.68504629516527, + "grad_norm": 0.0010745770996436477, + "learning_rate": 3.926140135150998e-07, + "loss": 0.0, + "num_input_tokens_seen": 101670608, + "step": 150840 + }, + { + "epoch": 3.6851684459971175, + "grad_norm": 0.0004344276967458427, + "learning_rate": 3.9254627061346655e-07, + "loss": 0.0, + "num_input_tokens_seen": 101674128, + "step": 150845 + }, + { + "epoch": 3.685290596828964, + "grad_norm": 0.0007442793576046824, + "learning_rate": 3.9247853212940043e-07, + "loss": 0.1039, + "num_input_tokens_seen": 101677648, + "step": 150850 + }, + { + "epoch": 3.685412747660812, + "grad_norm": 1.0434240102767944, + "learning_rate": 3.924107980633935e-07, + "loss": 0.0003, + "num_input_tokens_seen": 101680784, + "step": 150855 + }, + { + "epoch": 3.6855348984926586, + "grad_norm": 0.0017332849092781544, + "learning_rate": 3.92343068415939e-07, + "loss": 0.0, + "num_input_tokens_seen": 101684560, + "step": 150860 + }, + { + "epoch": 3.6856570493245058, + "grad_norm": 0.011752471327781677, + "learning_rate": 3.9227534318752887e-07, + "loss": 0.0004, + "num_input_tokens_seen": 101687824, + "step": 150865 + }, + { + "epoch": 3.685779200156353, + "grad_norm": 0.021650463342666626, + "learning_rate": 3.922076223786561e-07, + "loss": 0.0002, + "num_input_tokens_seen": 101691536, + "step": 150870 + }, + { + "epoch": 3.6859013509882, + "grad_norm": 0.00036399438977241516, + "learning_rate": 3.9213990598981283e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101695184, + "step": 150875 + }, + { + "epoch": 3.6860235018200473, + "grad_norm": 0.0023778865579515696, + "learning_rate": 3.9207219402149183e-07, + "loss": 0.0548, + "num_input_tokens_seen": 101699152, + "step": 150880 + }, + { + "epoch": 3.6861456526518945, + "grad_norm": 0.0013532171724364161, + "learning_rate": 3.920044864741852e-07, + "loss": 0.0, + "num_input_tokens_seen": 101702288, + "step": 150885 + }, + { + "epoch": 3.6862678034837417, + "grad_norm": 0.005285841412842274, + "learning_rate": 3.919367833483852e-07, + "loss": 0.0, + "num_input_tokens_seen": 101705616, + "step": 150890 + }, + { + "epoch": 3.686389954315589, + "grad_norm": 0.01104725431650877, + "learning_rate": 3.91869084644585e-07, + "loss": 0.0, + "num_input_tokens_seen": 101708944, + "step": 150895 + }, + { + "epoch": 3.686512105147436, + "grad_norm": 0.0009050817461684346, + "learning_rate": 3.91801390363276e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101712528, + "step": 150900 + }, + { + "epoch": 3.6866342559792833, + "grad_norm": 0.0777590200304985, + "learning_rate": 3.9173370050495123e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101715920, + "step": 150905 + }, + { + "epoch": 3.6867564068111305, + "grad_norm": 0.0008035873179323971, + "learning_rate": 3.916660150701022e-07, + "loss": 0.0225, + "num_input_tokens_seen": 101719184, + "step": 150910 + }, + { + "epoch": 3.6868785576429777, + "grad_norm": 0.00047678741975687444, + "learning_rate": 3.9159833405922193e-07, + "loss": 0.0, + "num_input_tokens_seen": 101722448, + "step": 150915 + }, + { + "epoch": 3.687000708474825, + "grad_norm": 0.0004096681368537247, + "learning_rate": 3.915306574728019e-07, + "loss": 0.0527, + "num_input_tokens_seen": 101725712, + "step": 150920 + }, + { + "epoch": 3.6871228593066716, + "grad_norm": 0.013166260905563831, + "learning_rate": 3.914629853113345e-07, + "loss": 0.0, + "num_input_tokens_seen": 101729232, + "step": 150925 + }, + { + "epoch": 3.6872450101385192, + "grad_norm": 128.74935913085938, + "learning_rate": 3.913953175753123e-07, + "loss": 0.0489, + "num_input_tokens_seen": 101733200, + "step": 150930 + }, + { + "epoch": 3.687367160970366, + "grad_norm": 0.008455926552414894, + "learning_rate": 3.913276542652267e-07, + "loss": 0.0, + "num_input_tokens_seen": 101736272, + "step": 150935 + }, + { + "epoch": 3.6874893118022136, + "grad_norm": 0.003050909610465169, + "learning_rate": 3.912599953815705e-07, + "loss": 0.0, + "num_input_tokens_seen": 101739472, + "step": 150940 + }, + { + "epoch": 3.6876114626340604, + "grad_norm": 0.0009746898431330919, + "learning_rate": 3.911923409248353e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101742800, + "step": 150945 + }, + { + "epoch": 3.6877336134659076, + "grad_norm": 0.006257293745875359, + "learning_rate": 3.911246908955129e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101745936, + "step": 150950 + }, + { + "epoch": 3.6878557642977547, + "grad_norm": 0.03148772194981575, + "learning_rate": 3.9105704529409575e-07, + "loss": 0.0, + "num_input_tokens_seen": 101749136, + "step": 150955 + }, + { + "epoch": 3.687977915129602, + "grad_norm": 0.17039455473423004, + "learning_rate": 3.909894041210753e-07, + "loss": 0.0003, + "num_input_tokens_seen": 101752784, + "step": 150960 + }, + { + "epoch": 3.688100065961449, + "grad_norm": 0.009566457010805607, + "learning_rate": 3.90921767376944e-07, + "loss": 0.0, + "num_input_tokens_seen": 101755792, + "step": 150965 + }, + { + "epoch": 3.6882222167932963, + "grad_norm": 0.006611944641917944, + "learning_rate": 3.9085413506219313e-07, + "loss": 0.0465, + "num_input_tokens_seen": 101758992, + "step": 150970 + }, + { + "epoch": 3.6883443676251435, + "grad_norm": 0.0023192299995571375, + "learning_rate": 3.907865071773151e-07, + "loss": 0.0, + "num_input_tokens_seen": 101762640, + "step": 150975 + }, + { + "epoch": 3.6884665184569907, + "grad_norm": 0.0027550682425498962, + "learning_rate": 3.9071888372280113e-07, + "loss": 0.0002, + "num_input_tokens_seen": 101765904, + "step": 150980 + }, + { + "epoch": 3.688588669288838, + "grad_norm": 15.158320426940918, + "learning_rate": 3.906512646991433e-07, + "loss": 0.0468, + "num_input_tokens_seen": 101769552, + "step": 150985 + }, + { + "epoch": 3.688710820120685, + "grad_norm": 0.00859321653842926, + "learning_rate": 3.9058365010683383e-07, + "loss": 0.0, + "num_input_tokens_seen": 101772880, + "step": 150990 + }, + { + "epoch": 3.6888329709525323, + "grad_norm": 0.0005637798458337784, + "learning_rate": 3.905160399463635e-07, + "loss": 0.0, + "num_input_tokens_seen": 101776400, + "step": 150995 + }, + { + "epoch": 3.6889551217843795, + "grad_norm": 0.004457912407815456, + "learning_rate": 3.9044843421822485e-07, + "loss": 0.0, + "num_input_tokens_seen": 101779664, + "step": 151000 + }, + { + "epoch": 3.6890772726162266, + "grad_norm": 0.0030633006244897842, + "learning_rate": 3.903808329229087e-07, + "loss": 0.0, + "num_input_tokens_seen": 101782800, + "step": 151005 + }, + { + "epoch": 3.689199423448074, + "grad_norm": 0.0062855626456439495, + "learning_rate": 3.9031323606090717e-07, + "loss": 0.0786, + "num_input_tokens_seen": 101786128, + "step": 151010 + }, + { + "epoch": 3.689321574279921, + "grad_norm": 0.005475311540067196, + "learning_rate": 3.902456436327122e-07, + "loss": 0.0, + "num_input_tokens_seen": 101789392, + "step": 151015 + }, + { + "epoch": 3.6894437251117678, + "grad_norm": 0.02409266121685505, + "learning_rate": 3.9017805563881446e-07, + "loss": 0.0, + "num_input_tokens_seen": 101792592, + "step": 151020 + }, + { + "epoch": 3.6895658759436154, + "grad_norm": 0.00048403823166154325, + "learning_rate": 3.901104720797063e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101795792, + "step": 151025 + }, + { + "epoch": 3.689688026775462, + "grad_norm": 0.0028270462062209845, + "learning_rate": 3.9004289295587845e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101799312, + "step": 151030 + }, + { + "epoch": 3.68981017760731, + "grad_norm": 0.02731265313923359, + "learning_rate": 3.8997531826782315e-07, + "loss": 0.0003, + "num_input_tokens_seen": 101802640, + "step": 151035 + }, + { + "epoch": 3.6899323284391565, + "grad_norm": 0.0027652860153466463, + "learning_rate": 3.89907748016031e-07, + "loss": 0.0, + "num_input_tokens_seen": 101806288, + "step": 151040 + }, + { + "epoch": 3.6900544792710037, + "grad_norm": 0.006285255309194326, + "learning_rate": 3.898401822009942e-07, + "loss": 0.0, + "num_input_tokens_seen": 101810896, + "step": 151045 + }, + { + "epoch": 3.690176630102851, + "grad_norm": 0.001907885423861444, + "learning_rate": 3.8977262082320337e-07, + "loss": 0.0, + "num_input_tokens_seen": 101813968, + "step": 151050 + }, + { + "epoch": 3.690298780934698, + "grad_norm": 18.86277198791504, + "learning_rate": 3.897050638831505e-07, + "loss": 0.0667, + "num_input_tokens_seen": 101817360, + "step": 151055 + }, + { + "epoch": 3.6904209317665453, + "grad_norm": 0.022184569388628006, + "learning_rate": 3.896375113813265e-07, + "loss": 0.0, + "num_input_tokens_seen": 101820304, + "step": 151060 + }, + { + "epoch": 3.6905430825983925, + "grad_norm": 0.0037373073864728212, + "learning_rate": 3.8956996331822243e-07, + "loss": 0.0, + "num_input_tokens_seen": 101824016, + "step": 151065 + }, + { + "epoch": 3.6906652334302397, + "grad_norm": 0.0009017616976052523, + "learning_rate": 3.895024196943301e-07, + "loss": 0.0, + "num_input_tokens_seen": 101827856, + "step": 151070 + }, + { + "epoch": 3.690787384262087, + "grad_norm": 0.009098973125219345, + "learning_rate": 3.8943488051013997e-07, + "loss": 0.0, + "num_input_tokens_seen": 101830928, + "step": 151075 + }, + { + "epoch": 3.690909535093934, + "grad_norm": 0.0026963918935507536, + "learning_rate": 3.8936734576614374e-07, + "loss": 0.0371, + "num_input_tokens_seen": 101835280, + "step": 151080 + }, + { + "epoch": 3.6910316859257812, + "grad_norm": 0.0007805816712789237, + "learning_rate": 3.8929981546283266e-07, + "loss": 0.0, + "num_input_tokens_seen": 101838352, + "step": 151085 + }, + { + "epoch": 3.6911538367576284, + "grad_norm": 0.004396933596581221, + "learning_rate": 3.8923228960069723e-07, + "loss": 0.0, + "num_input_tokens_seen": 101841552, + "step": 151090 + }, + { + "epoch": 3.6912759875894756, + "grad_norm": 0.002870592987164855, + "learning_rate": 3.8916476818022914e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101844688, + "step": 151095 + }, + { + "epoch": 3.691398138421323, + "grad_norm": 0.007208169437944889, + "learning_rate": 3.8909725120191893e-07, + "loss": 0.0515, + "num_input_tokens_seen": 101847952, + "step": 151100 + }, + { + "epoch": 3.6915202892531696, + "grad_norm": 0.008845353499054909, + "learning_rate": 3.890297386662578e-07, + "loss": 0.0, + "num_input_tokens_seen": 101851344, + "step": 151105 + }, + { + "epoch": 3.691642440085017, + "grad_norm": 0.014517847448587418, + "learning_rate": 3.88962230573737e-07, + "loss": 0.0, + "num_input_tokens_seen": 101854544, + "step": 151110 + }, + { + "epoch": 3.691764590916864, + "grad_norm": 0.0029305708594620228, + "learning_rate": 3.8889472692484703e-07, + "loss": 0.0002, + "num_input_tokens_seen": 101857744, + "step": 151115 + }, + { + "epoch": 3.6918867417487116, + "grad_norm": 0.039825934916734695, + "learning_rate": 3.8882722772007914e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101861136, + "step": 151120 + }, + { + "epoch": 3.6920088925805583, + "grad_norm": 0.0230876412242651, + "learning_rate": 3.8875973295992383e-07, + "loss": 0.0, + "num_input_tokens_seen": 101864400, + "step": 151125 + }, + { + "epoch": 3.6921310434124055, + "grad_norm": 0.00439818948507309, + "learning_rate": 3.8869224264487244e-07, + "loss": 0.0, + "num_input_tokens_seen": 101867984, + "step": 151130 + }, + { + "epoch": 3.6922531942442527, + "grad_norm": 0.009379718452692032, + "learning_rate": 3.886247567754151e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101871440, + "step": 151135 + }, + { + "epoch": 3.6923753450761, + "grad_norm": 0.001351098413579166, + "learning_rate": 3.88557275352043e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101875152, + "step": 151140 + }, + { + "epoch": 3.692497495907947, + "grad_norm": 0.0009771710028871894, + "learning_rate": 3.884897983752472e-07, + "loss": 0.0, + "num_input_tokens_seen": 101878800, + "step": 151145 + }, + { + "epoch": 3.6926196467397943, + "grad_norm": 0.0007362841279245913, + "learning_rate": 3.88422325845518e-07, + "loss": 0.0, + "num_input_tokens_seen": 101881872, + "step": 151150 + }, + { + "epoch": 3.6927417975716414, + "grad_norm": 0.003438524901866913, + "learning_rate": 3.88354857763346e-07, + "loss": 0.0, + "num_input_tokens_seen": 101885264, + "step": 151155 + }, + { + "epoch": 3.6928639484034886, + "grad_norm": 0.0023364173248410225, + "learning_rate": 3.882873941292221e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101888208, + "step": 151160 + }, + { + "epoch": 3.692986099235336, + "grad_norm": 0.0015807130839675665, + "learning_rate": 3.8821993494363657e-07, + "loss": 0.0, + "num_input_tokens_seen": 101891088, + "step": 151165 + }, + { + "epoch": 3.693108250067183, + "grad_norm": 0.0011053653433918953, + "learning_rate": 3.881524802070806e-07, + "loss": 0.0, + "num_input_tokens_seen": 101894352, + "step": 151170 + }, + { + "epoch": 3.69323040089903, + "grad_norm": 0.012983616441488266, + "learning_rate": 3.880850299200439e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101897232, + "step": 151175 + }, + { + "epoch": 3.6933525517308774, + "grad_norm": 0.0016408859519287944, + "learning_rate": 3.880175840830179e-07, + "loss": 0.0, + "num_input_tokens_seen": 101900368, + "step": 151180 + }, + { + "epoch": 3.6934747025627246, + "grad_norm": 0.0003170320705976337, + "learning_rate": 3.879501426964922e-07, + "loss": 0.0, + "num_input_tokens_seen": 101903504, + "step": 151185 + }, + { + "epoch": 3.6935968533945718, + "grad_norm": 0.1406192183494568, + "learning_rate": 3.8788270576095806e-07, + "loss": 0.0004, + "num_input_tokens_seen": 101906832, + "step": 151190 + }, + { + "epoch": 3.693719004226419, + "grad_norm": 0.16041308641433716, + "learning_rate": 3.8781527327690523e-07, + "loss": 0.0, + "num_input_tokens_seen": 101909968, + "step": 151195 + }, + { + "epoch": 3.6938411550582657, + "grad_norm": 0.04108577221632004, + "learning_rate": 3.8774784524482426e-07, + "loss": 0.0, + "num_input_tokens_seen": 101913296, + "step": 151200 + }, + { + "epoch": 3.6939633058901133, + "grad_norm": 0.10066086798906326, + "learning_rate": 3.876804216652061e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101916688, + "step": 151205 + }, + { + "epoch": 3.69408545672196, + "grad_norm": 0.015516743063926697, + "learning_rate": 3.876130025385402e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101920336, + "step": 151210 + }, + { + "epoch": 3.6942076075538073, + "grad_norm": 0.010554834268987179, + "learning_rate": 3.8754558786531767e-07, + "loss": 0.0, + "num_input_tokens_seen": 101923728, + "step": 151215 + }, + { + "epoch": 3.6943297583856545, + "grad_norm": 0.020320434123277664, + "learning_rate": 3.87478177646028e-07, + "loss": 0.0, + "num_input_tokens_seen": 101927312, + "step": 151220 + }, + { + "epoch": 3.6944519092175017, + "grad_norm": 0.020632604137063026, + "learning_rate": 3.87410771881162e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101931216, + "step": 151225 + }, + { + "epoch": 3.694574060049349, + "grad_norm": 0.0003973071579821408, + "learning_rate": 3.8734337057120945e-07, + "loss": 0.0245, + "num_input_tokens_seen": 101934416, + "step": 151230 + }, + { + "epoch": 3.694696210881196, + "grad_norm": 0.0013479648623615503, + "learning_rate": 3.8727597371666067e-07, + "loss": 0.0, + "num_input_tokens_seen": 101937424, + "step": 151235 + }, + { + "epoch": 3.6948183617130432, + "grad_norm": 0.1412498652935028, + "learning_rate": 3.8720858131800605e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101940816, + "step": 151240 + }, + { + "epoch": 3.6949405125448904, + "grad_norm": 0.012594731524586678, + "learning_rate": 3.8714119337573513e-07, + "loss": 0.0, + "num_input_tokens_seen": 101944464, + "step": 151245 + }, + { + "epoch": 3.6950626633767376, + "grad_norm": 0.42286956310272217, + "learning_rate": 3.8707380989033866e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101947984, + "step": 151250 + }, + { + "epoch": 3.695184814208585, + "grad_norm": 0.0012724720872938633, + "learning_rate": 3.870064308623063e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101951184, + "step": 151255 + }, + { + "epoch": 3.695306965040432, + "grad_norm": 0.0007571643218398094, + "learning_rate": 3.8693905629212775e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101955024, + "step": 151260 + }, + { + "epoch": 3.695429115872279, + "grad_norm": 0.002359704114496708, + "learning_rate": 3.8687168618029366e-07, + "loss": 0.0, + "num_input_tokens_seen": 101958736, + "step": 151265 + }, + { + "epoch": 3.6955512667041264, + "grad_norm": 0.0016779140569269657, + "learning_rate": 3.8680432052729304e-07, + "loss": 0.0, + "num_input_tokens_seen": 101962448, + "step": 151270 + }, + { + "epoch": 3.6956734175359736, + "grad_norm": 0.0044897920452058315, + "learning_rate": 3.867369593336168e-07, + "loss": 0.0, + "num_input_tokens_seen": 101966736, + "step": 151275 + }, + { + "epoch": 3.6957955683678207, + "grad_norm": 19.066856384277344, + "learning_rate": 3.86669602599754e-07, + "loss": 0.115, + "num_input_tokens_seen": 101969808, + "step": 151280 + }, + { + "epoch": 3.6959177191996675, + "grad_norm": 23.83672523498535, + "learning_rate": 3.866022503261952e-07, + "loss": 0.0538, + "num_input_tokens_seen": 101973008, + "step": 151285 + }, + { + "epoch": 3.696039870031515, + "grad_norm": 0.005208525341004133, + "learning_rate": 3.8653490251342945e-07, + "loss": 0.031, + "num_input_tokens_seen": 101976272, + "step": 151290 + }, + { + "epoch": 3.696162020863362, + "grad_norm": 0.011254152283072472, + "learning_rate": 3.8646755916194685e-07, + "loss": 0.0, + "num_input_tokens_seen": 101979344, + "step": 151295 + }, + { + "epoch": 3.6962841716952095, + "grad_norm": 0.007257348857820034, + "learning_rate": 3.864002202722375e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101982928, + "step": 151300 + }, + { + "epoch": 3.6964063225270563, + "grad_norm": 0.059719085693359375, + "learning_rate": 3.863328858447905e-07, + "loss": 0.0, + "num_input_tokens_seen": 101986064, + "step": 151305 + }, + { + "epoch": 3.6965284733589034, + "grad_norm": 0.0017241544555872679, + "learning_rate": 3.8626555588009614e-07, + "loss": 0.0075, + "num_input_tokens_seen": 101989136, + "step": 151310 + }, + { + "epoch": 3.6966506241907506, + "grad_norm": 0.17825163900852203, + "learning_rate": 3.861982303786434e-07, + "loss": 0.0001, + "num_input_tokens_seen": 101992272, + "step": 151315 + }, + { + "epoch": 3.696772775022598, + "grad_norm": 0.0026821442879736423, + "learning_rate": 3.861309093409222e-07, + "loss": 0.0, + "num_input_tokens_seen": 101995280, + "step": 151320 + }, + { + "epoch": 3.696894925854445, + "grad_norm": 0.004103350918740034, + "learning_rate": 3.860635927674225e-07, + "loss": 0.0, + "num_input_tokens_seen": 101998992, + "step": 151325 + }, + { + "epoch": 3.697017076686292, + "grad_norm": 0.0009725447162054479, + "learning_rate": 3.859962806586331e-07, + "loss": 0.0, + "num_input_tokens_seen": 102002448, + "step": 151330 + }, + { + "epoch": 3.6971392275181394, + "grad_norm": 0.0006192554719746113, + "learning_rate": 3.8592897301504436e-07, + "loss": 0.0287, + "num_input_tokens_seen": 102006032, + "step": 151335 + }, + { + "epoch": 3.6972613783499866, + "grad_norm": 0.0013876816956326365, + "learning_rate": 3.8586166983714475e-07, + "loss": 0.0, + "num_input_tokens_seen": 102009488, + "step": 151340 + }, + { + "epoch": 3.6973835291818338, + "grad_norm": 0.06822085380554199, + "learning_rate": 3.8579437112542476e-07, + "loss": 0.0, + "num_input_tokens_seen": 102012880, + "step": 151345 + }, + { + "epoch": 3.697505680013681, + "grad_norm": 0.055710822343826294, + "learning_rate": 3.857270768803729e-07, + "loss": 0.0, + "num_input_tokens_seen": 102016272, + "step": 151350 + }, + { + "epoch": 3.697627830845528, + "grad_norm": 0.06808667629957199, + "learning_rate": 3.856597871024794e-07, + "loss": 0.0001, + "num_input_tokens_seen": 102019536, + "step": 151355 + }, + { + "epoch": 3.6977499816773753, + "grad_norm": 0.13779473304748535, + "learning_rate": 3.855925017922327e-07, + "loss": 0.0001, + "num_input_tokens_seen": 102022800, + "step": 151360 + }, + { + "epoch": 3.6978721325092225, + "grad_norm": 0.0018543327460065484, + "learning_rate": 3.8552522095012296e-07, + "loss": 0.0, + "num_input_tokens_seen": 102026064, + "step": 151365 + }, + { + "epoch": 3.6979942833410693, + "grad_norm": 0.004081390332430601, + "learning_rate": 3.8545794457663903e-07, + "loss": 0.0001, + "num_input_tokens_seen": 102029136, + "step": 151370 + }, + { + "epoch": 3.698116434172917, + "grad_norm": 0.0007932799635455012, + "learning_rate": 3.8539067267227e-07, + "loss": 0.0, + "num_input_tokens_seen": 102032464, + "step": 151375 + }, + { + "epoch": 3.6982385850047637, + "grad_norm": 0.06597576290369034, + "learning_rate": 3.853234052375055e-07, + "loss": 0.0, + "num_input_tokens_seen": 102035920, + "step": 151380 + }, + { + "epoch": 3.6983607358366113, + "grad_norm": 0.04877420514822006, + "learning_rate": 3.852561422728343e-07, + "loss": 0.0001, + "num_input_tokens_seen": 102038992, + "step": 151385 + }, + { + "epoch": 3.698482886668458, + "grad_norm": 0.005778729449957609, + "learning_rate": 3.851888837787457e-07, + "loss": 0.0, + "num_input_tokens_seen": 102042064, + "step": 151390 + }, + { + "epoch": 3.6986050375003052, + "grad_norm": 0.000778566172812134, + "learning_rate": 3.851216297557294e-07, + "loss": 0.0, + "num_input_tokens_seen": 102046160, + "step": 151395 + }, + { + "epoch": 3.6987271883321524, + "grad_norm": 0.006441682111471891, + "learning_rate": 3.850543802042735e-07, + "loss": 0.0, + "num_input_tokens_seen": 102049680, + "step": 151400 + }, + { + "epoch": 3.6988493391639996, + "grad_norm": 0.037144798785448074, + "learning_rate": 3.849871351248679e-07, + "loss": 0.0, + "num_input_tokens_seen": 102053392, + "step": 151405 + }, + { + "epoch": 3.698971489995847, + "grad_norm": 0.00173321389593184, + "learning_rate": 3.84919894518001e-07, + "loss": 0.0, + "num_input_tokens_seen": 102056784, + "step": 151410 + }, + { + "epoch": 3.699093640827694, + "grad_norm": 0.09905073791742325, + "learning_rate": 3.84852658384162e-07, + "loss": 0.0008, + "num_input_tokens_seen": 102060048, + "step": 151415 + }, + { + "epoch": 3.699215791659541, + "grad_norm": 0.006596543826162815, + "learning_rate": 3.847854267238403e-07, + "loss": 0.0, + "num_input_tokens_seen": 102063696, + "step": 151420 + }, + { + "epoch": 3.6993379424913884, + "grad_norm": 0.01068484503775835, + "learning_rate": 3.8471819953752404e-07, + "loss": 0.0, + "num_input_tokens_seen": 102067408, + "step": 151425 + }, + { + "epoch": 3.6994600933232356, + "grad_norm": 23.218570709228516, + "learning_rate": 3.84650976825703e-07, + "loss": 0.0557, + "num_input_tokens_seen": 102070672, + "step": 151430 + }, + { + "epoch": 3.6995822441550827, + "grad_norm": 0.026982801035046577, + "learning_rate": 3.8458375858886513e-07, + "loss": 0.0, + "num_input_tokens_seen": 102073872, + "step": 151435 + }, + { + "epoch": 3.69970439498693, + "grad_norm": 0.0017854716861620545, + "learning_rate": 3.8451654482750006e-07, + "loss": 0.0, + "num_input_tokens_seen": 102077200, + "step": 151440 + }, + { + "epoch": 3.699826545818777, + "grad_norm": 0.0003598171751946211, + "learning_rate": 3.844493355420958e-07, + "loss": 0.0, + "num_input_tokens_seen": 102080400, + "step": 151445 + }, + { + "epoch": 3.6999486966506243, + "grad_norm": 0.0013809788506478071, + "learning_rate": 3.8438213073314164e-07, + "loss": 0.0, + "num_input_tokens_seen": 102083792, + "step": 151450 + }, + { + "epoch": 3.7000708474824715, + "grad_norm": 0.00033191696275025606, + "learning_rate": 3.843149304011265e-07, + "loss": 0.0001, + "num_input_tokens_seen": 102087120, + "step": 151455 + }, + { + "epoch": 3.7001929983143187, + "grad_norm": 0.0005849574226886034, + "learning_rate": 3.842477345465388e-07, + "loss": 0.0, + "num_input_tokens_seen": 102090384, + "step": 151460 + }, + { + "epoch": 3.7003151491461654, + "grad_norm": 0.0002934445801656693, + "learning_rate": 3.841805431698669e-07, + "loss": 0.0, + "num_input_tokens_seen": 102093904, + "step": 151465 + }, + { + "epoch": 3.700437299978013, + "grad_norm": 0.0008289655088447034, + "learning_rate": 3.841133562716e-07, + "loss": 0.0, + "num_input_tokens_seen": 102097104, + "step": 151470 + }, + { + "epoch": 3.70055945080986, + "grad_norm": 0.031241346150636673, + "learning_rate": 3.8404617385222615e-07, + "loss": 0.0168, + "num_input_tokens_seen": 102100432, + "step": 151475 + }, + { + "epoch": 3.7006816016417075, + "grad_norm": 0.0025893929414451122, + "learning_rate": 3.839789959122345e-07, + "loss": 0.0, + "num_input_tokens_seen": 102104080, + "step": 151480 + }, + { + "epoch": 3.700803752473554, + "grad_norm": 0.0004473034350667149, + "learning_rate": 3.8391182245211283e-07, + "loss": 0.0, + "num_input_tokens_seen": 102107216, + "step": 151485 + }, + { + "epoch": 3.7009259033054014, + "grad_norm": 0.0008554542437195778, + "learning_rate": 3.8384465347235064e-07, + "loss": 0.0, + "num_input_tokens_seen": 102110608, + "step": 151490 + }, + { + "epoch": 3.7010480541372486, + "grad_norm": 0.01684235781431198, + "learning_rate": 3.837774889734353e-07, + "loss": 0.0, + "num_input_tokens_seen": 102114320, + "step": 151495 + }, + { + "epoch": 3.7011702049690958, + "grad_norm": 0.37367013096809387, + "learning_rate": 3.837103289558563e-07, + "loss": 0.0001, + "num_input_tokens_seen": 102118352, + "step": 151500 + }, + { + "epoch": 3.701292355800943, + "grad_norm": 0.0028496081940829754, + "learning_rate": 3.836431734201012e-07, + "loss": 0.0, + "num_input_tokens_seen": 102121552, + "step": 151505 + }, + { + "epoch": 3.70141450663279, + "grad_norm": 0.4270290732383728, + "learning_rate": 3.8357602236665867e-07, + "loss": 0.0007, + "num_input_tokens_seen": 102124944, + "step": 151510 + }, + { + "epoch": 3.7015366574646373, + "grad_norm": 0.004349404014647007, + "learning_rate": 3.8350887579601744e-07, + "loss": 0.0, + "num_input_tokens_seen": 102128464, + "step": 151515 + }, + { + "epoch": 3.7016588082964845, + "grad_norm": 0.0028605188708752394, + "learning_rate": 3.834417337086652e-07, + "loss": 0.0, + "num_input_tokens_seen": 102131664, + "step": 151520 + }, + { + "epoch": 3.7017809591283317, + "grad_norm": 0.0024505862966179848, + "learning_rate": 3.833745961050908e-07, + "loss": 0.0, + "num_input_tokens_seen": 102135120, + "step": 151525 + }, + { + "epoch": 3.701903109960179, + "grad_norm": 0.0017270263051614165, + "learning_rate": 3.833074629857819e-07, + "loss": 0.0001, + "num_input_tokens_seen": 102138768, + "step": 151530 + }, + { + "epoch": 3.702025260792026, + "grad_norm": 0.002492433413863182, + "learning_rate": 3.8324033435122727e-07, + "loss": 0.0003, + "num_input_tokens_seen": 102141968, + "step": 151535 + }, + { + "epoch": 3.7021474116238733, + "grad_norm": 0.005628492683172226, + "learning_rate": 3.831732102019145e-07, + "loss": 0.0282, + "num_input_tokens_seen": 102145616, + "step": 151540 + }, + { + "epoch": 3.7022695624557205, + "grad_norm": 0.0043680984526872635, + "learning_rate": 3.83106090538332e-07, + "loss": 0.0002, + "num_input_tokens_seen": 102148944, + "step": 151545 + }, + { + "epoch": 3.702391713287567, + "grad_norm": 4.06487743020989e-05, + "learning_rate": 3.830389753609684e-07, + "loss": 0.0, + "num_input_tokens_seen": 102152336, + "step": 151550 + }, + { + "epoch": 3.702513864119415, + "grad_norm": 0.0013830072712153196, + "learning_rate": 3.8297186467031083e-07, + "loss": 0.0, + "num_input_tokens_seen": 102155600, + "step": 151555 + }, + { + "epoch": 3.7026360149512616, + "grad_norm": 0.0003124834911432117, + "learning_rate": 3.829047584668483e-07, + "loss": 0.0, + "num_input_tokens_seen": 102158928, + "step": 151560 + }, + { + "epoch": 3.7027581657831092, + "grad_norm": 0.006732202600687742, + "learning_rate": 3.8283765675106795e-07, + "loss": 0.0, + "num_input_tokens_seen": 102161808, + "step": 151565 + }, + { + "epoch": 3.702880316614956, + "grad_norm": 0.0012146084336563945, + "learning_rate": 3.8277055952345847e-07, + "loss": 0.0, + "num_input_tokens_seen": 102165008, + "step": 151570 + }, + { + "epoch": 3.703002467446803, + "grad_norm": 0.0025280537083745003, + "learning_rate": 3.827034667845075e-07, + "loss": 0.0001, + "num_input_tokens_seen": 102168592, + "step": 151575 + }, + { + "epoch": 3.7031246182786504, + "grad_norm": 0.008870430290699005, + "learning_rate": 3.8263637853470266e-07, + "loss": 0.0, + "num_input_tokens_seen": 102172368, + "step": 151580 + }, + { + "epoch": 3.7032467691104975, + "grad_norm": 0.11202926933765411, + "learning_rate": 3.825692947745324e-07, + "loss": 0.0, + "num_input_tokens_seen": 102175632, + "step": 151585 + }, + { + "epoch": 3.7033689199423447, + "grad_norm": 0.0003817217075265944, + "learning_rate": 3.82502215504484e-07, + "loss": 0.0, + "num_input_tokens_seen": 102179024, + "step": 151590 + }, + { + "epoch": 3.703491070774192, + "grad_norm": 0.18531584739685059, + "learning_rate": 3.824351407250459e-07, + "loss": 0.0, + "num_input_tokens_seen": 102182352, + "step": 151595 + }, + { + "epoch": 3.703613221606039, + "grad_norm": 0.0019869126845151186, + "learning_rate": 3.823680704367053e-07, + "loss": 0.0, + "num_input_tokens_seen": 102185808, + "step": 151600 + }, + { + "epoch": 3.7037353724378863, + "grad_norm": 0.0009062121971510351, + "learning_rate": 3.823010046399501e-07, + "loss": 0.0, + "num_input_tokens_seen": 102189328, + "step": 151605 + }, + { + "epoch": 3.7038575232697335, + "grad_norm": 0.00044265558244660497, + "learning_rate": 3.8223394333526846e-07, + "loss": 0.0, + "num_input_tokens_seen": 102192784, + "step": 151610 + }, + { + "epoch": 3.7039796741015807, + "grad_norm": 0.10403814166784286, + "learning_rate": 3.8216688652314753e-07, + "loss": 0.0, + "num_input_tokens_seen": 102196240, + "step": 151615 + }, + { + "epoch": 3.704101824933428, + "grad_norm": 0.00029964270652271807, + "learning_rate": 3.8209983420407543e-07, + "loss": 0.0, + "num_input_tokens_seen": 102199312, + "step": 151620 + }, + { + "epoch": 3.704223975765275, + "grad_norm": 0.0005833769682794809, + "learning_rate": 3.820327863785392e-07, + "loss": 0.0001, + "num_input_tokens_seen": 102202832, + "step": 151625 + }, + { + "epoch": 3.7043461265971223, + "grad_norm": 77.15211486816406, + "learning_rate": 3.819657430470271e-07, + "loss": 0.1065, + "num_input_tokens_seen": 102206032, + "step": 151630 + }, + { + "epoch": 3.7044682774289694, + "grad_norm": 0.0013740085996687412, + "learning_rate": 3.81898704210026e-07, + "loss": 0.0, + "num_input_tokens_seen": 102209296, + "step": 151635 + }, + { + "epoch": 3.7045904282608166, + "grad_norm": 0.002347599947825074, + "learning_rate": 3.8183166986802384e-07, + "loss": 0.0, + "num_input_tokens_seen": 102213008, + "step": 151640 + }, + { + "epoch": 3.7047125790926634, + "grad_norm": 0.0009971833787858486, + "learning_rate": 3.8176464002150833e-07, + "loss": 0.0, + "num_input_tokens_seen": 102216528, + "step": 151645 + }, + { + "epoch": 3.704834729924511, + "grad_norm": 0.0005341061041690409, + "learning_rate": 3.816976146709663e-07, + "loss": 0.0, + "num_input_tokens_seen": 102219920, + "step": 151650 + }, + { + "epoch": 3.7049568807563578, + "grad_norm": 0.00022218092635739595, + "learning_rate": 3.8163059381688587e-07, + "loss": 0.0, + "num_input_tokens_seen": 102222992, + "step": 151655 + }, + { + "epoch": 3.705079031588205, + "grad_norm": 0.00023510765458922833, + "learning_rate": 3.8156357745975375e-07, + "loss": 0.113, + "num_input_tokens_seen": 102226640, + "step": 151660 + }, + { + "epoch": 3.705201182420052, + "grad_norm": 0.0005348159465938807, + "learning_rate": 3.8149656560005794e-07, + "loss": 0.0202, + "num_input_tokens_seen": 102229840, + "step": 151665 + }, + { + "epoch": 3.7053233332518993, + "grad_norm": 0.00038441031938418746, + "learning_rate": 3.8142955823828517e-07, + "loss": 0.0, + "num_input_tokens_seen": 102233616, + "step": 151670 + }, + { + "epoch": 3.7054454840837465, + "grad_norm": 0.0006266786367632449, + "learning_rate": 3.8136255537492333e-07, + "loss": 0.0, + "num_input_tokens_seen": 102237008, + "step": 151675 + }, + { + "epoch": 3.7055676349155937, + "grad_norm": 0.0018625278025865555, + "learning_rate": 3.8129555701045936e-07, + "loss": 0.0, + "num_input_tokens_seen": 102240080, + "step": 151680 + }, + { + "epoch": 3.705689785747441, + "grad_norm": 0.0017500559333711863, + "learning_rate": 3.812285631453802e-07, + "loss": 0.0, + "num_input_tokens_seen": 102243664, + "step": 151685 + }, + { + "epoch": 3.705811936579288, + "grad_norm": 0.00026514322962611914, + "learning_rate": 3.8116157378017377e-07, + "loss": 0.0, + "num_input_tokens_seen": 102247184, + "step": 151690 + }, + { + "epoch": 3.7059340874111353, + "grad_norm": 50.65078353881836, + "learning_rate": 3.810945889153264e-07, + "loss": 0.0005, + "num_input_tokens_seen": 102250704, + "step": 151695 + }, + { + "epoch": 3.7060562382429825, + "grad_norm": 0.0005604383768513799, + "learning_rate": 3.8102760855132567e-07, + "loss": 0.0, + "num_input_tokens_seen": 102254352, + "step": 151700 + }, + { + "epoch": 3.7061783890748297, + "grad_norm": 0.0015173493884503841, + "learning_rate": 3.8096063268865897e-07, + "loss": 0.0696, + "num_input_tokens_seen": 102258192, + "step": 151705 + }, + { + "epoch": 3.706300539906677, + "grad_norm": 0.00023253409017343074, + "learning_rate": 3.8089366132781277e-07, + "loss": 0.0, + "num_input_tokens_seen": 102261648, + "step": 151710 + }, + { + "epoch": 3.706422690738524, + "grad_norm": 0.002127137267962098, + "learning_rate": 3.808266944692746e-07, + "loss": 0.0001, + "num_input_tokens_seen": 102265296, + "step": 151715 + }, + { + "epoch": 3.7065448415703712, + "grad_norm": 0.002646263688802719, + "learning_rate": 3.8075973211353107e-07, + "loss": 0.085, + "num_input_tokens_seen": 102268624, + "step": 151720 + }, + { + "epoch": 3.7066669924022184, + "grad_norm": 0.003195775207132101, + "learning_rate": 3.8069277426106917e-07, + "loss": 0.0, + "num_input_tokens_seen": 102272208, + "step": 151725 + }, + { + "epoch": 3.706789143234065, + "grad_norm": 0.0419430248439312, + "learning_rate": 3.8062582091237637e-07, + "loss": 0.0, + "num_input_tokens_seen": 102275728, + "step": 151730 + }, + { + "epoch": 3.706911294065913, + "grad_norm": 68.80615234375, + "learning_rate": 3.805588720679389e-07, + "loss": 0.044, + "num_input_tokens_seen": 102279120, + "step": 151735 + }, + { + "epoch": 3.7070334448977595, + "grad_norm": 0.002701559802517295, + "learning_rate": 3.8049192772824435e-07, + "loss": 0.0, + "num_input_tokens_seen": 102282192, + "step": 151740 + }, + { + "epoch": 3.707155595729607, + "grad_norm": 0.0026187242474406958, + "learning_rate": 3.8042498789377863e-07, + "loss": 0.0, + "num_input_tokens_seen": 102285392, + "step": 151745 + }, + { + "epoch": 3.707277746561454, + "grad_norm": 0.0021011237986385822, + "learning_rate": 3.803580525650294e-07, + "loss": 0.0, + "num_input_tokens_seen": 102289168, + "step": 151750 + }, + { + "epoch": 3.707399897393301, + "grad_norm": 0.0002951928472612053, + "learning_rate": 3.802911217424828e-07, + "loss": 0.0001, + "num_input_tokens_seen": 102292432, + "step": 151755 + }, + { + "epoch": 3.7075220482251483, + "grad_norm": 0.10593866556882858, + "learning_rate": 3.8022419542662585e-07, + "loss": 0.0, + "num_input_tokens_seen": 102295696, + "step": 151760 + }, + { + "epoch": 3.7076441990569955, + "grad_norm": 0.014959882013499737, + "learning_rate": 3.8015727361794547e-07, + "loss": 0.0, + "num_input_tokens_seen": 102299152, + "step": 151765 + }, + { + "epoch": 3.7077663498888427, + "grad_norm": 0.00046182944788597524, + "learning_rate": 3.800903563169283e-07, + "loss": 0.0, + "num_input_tokens_seen": 102302608, + "step": 151770 + }, + { + "epoch": 3.70788850072069, + "grad_norm": 0.38408032059669495, + "learning_rate": 3.800234435240604e-07, + "loss": 0.0001, + "num_input_tokens_seen": 102305680, + "step": 151775 + }, + { + "epoch": 3.708010651552537, + "grad_norm": 0.0023694150149822235, + "learning_rate": 3.7995653523982896e-07, + "loss": 0.0, + "num_input_tokens_seen": 102308752, + "step": 151780 + }, + { + "epoch": 3.7081328023843843, + "grad_norm": 0.0032163665164262056, + "learning_rate": 3.7988963146472053e-07, + "loss": 0.0002, + "num_input_tokens_seen": 102312080, + "step": 151785 + }, + { + "epoch": 3.7082549532162314, + "grad_norm": 0.0014809854328632355, + "learning_rate": 3.798227321992211e-07, + "loss": 0.0, + "num_input_tokens_seen": 102315472, + "step": 151790 + }, + { + "epoch": 3.7083771040480786, + "grad_norm": 0.002615902107208967, + "learning_rate": 3.7975583744381757e-07, + "loss": 0.0443, + "num_input_tokens_seen": 102318800, + "step": 151795 + }, + { + "epoch": 3.708499254879926, + "grad_norm": 82.20895385742188, + "learning_rate": 3.796889471989967e-07, + "loss": 0.0822, + "num_input_tokens_seen": 102321808, + "step": 151800 + }, + { + "epoch": 3.708621405711773, + "grad_norm": 6.49347566650249e-05, + "learning_rate": 3.7962206146524435e-07, + "loss": 0.0, + "num_input_tokens_seen": 102325200, + "step": 151805 + }, + { + "epoch": 3.70874355654362, + "grad_norm": 0.043628934770822525, + "learning_rate": 3.7955518024304756e-07, + "loss": 0.0, + "num_input_tokens_seen": 102328784, + "step": 151810 + }, + { + "epoch": 3.7088657073754674, + "grad_norm": 0.0645923987030983, + "learning_rate": 3.794883035328921e-07, + "loss": 0.0, + "num_input_tokens_seen": 102331984, + "step": 151815 + }, + { + "epoch": 3.7089878582073146, + "grad_norm": 0.0026498925872147083, + "learning_rate": 3.794214313352646e-07, + "loss": 0.0001, + "num_input_tokens_seen": 102335248, + "step": 151820 + }, + { + "epoch": 3.7091100090391613, + "grad_norm": 0.000418938638176769, + "learning_rate": 3.7935456365065164e-07, + "loss": 0.0001, + "num_input_tokens_seen": 102338512, + "step": 151825 + }, + { + "epoch": 3.709232159871009, + "grad_norm": 0.0551934652030468, + "learning_rate": 3.7928770047953883e-07, + "loss": 0.0001, + "num_input_tokens_seen": 102342032, + "step": 151830 + }, + { + "epoch": 3.7093543107028557, + "grad_norm": 0.04365779459476471, + "learning_rate": 3.792208418224133e-07, + "loss": 0.0, + "num_input_tokens_seen": 102345744, + "step": 151835 + }, + { + "epoch": 3.709476461534703, + "grad_norm": 0.005952696315944195, + "learning_rate": 3.7915398767976037e-07, + "loss": 0.0, + "num_input_tokens_seen": 102348752, + "step": 151840 + }, + { + "epoch": 3.70959861236655, + "grad_norm": 39.83332061767578, + "learning_rate": 3.7908713805206694e-07, + "loss": 0.0631, + "num_input_tokens_seen": 102351888, + "step": 151845 + }, + { + "epoch": 3.7097207631983973, + "grad_norm": 0.0012893755920231342, + "learning_rate": 3.7902029293981854e-07, + "loss": 0.0, + "num_input_tokens_seen": 102355472, + "step": 151850 + }, + { + "epoch": 3.7098429140302445, + "grad_norm": 0.00010355439007980749, + "learning_rate": 3.7895345234350163e-07, + "loss": 0.0001, + "num_input_tokens_seen": 102358800, + "step": 151855 + }, + { + "epoch": 3.7099650648620917, + "grad_norm": 0.024188674986362457, + "learning_rate": 3.788866162636025e-07, + "loss": 0.0, + "num_input_tokens_seen": 102361872, + "step": 151860 + }, + { + "epoch": 3.710087215693939, + "grad_norm": 0.0009943852201104164, + "learning_rate": 3.788197847006067e-07, + "loss": 0.0, + "num_input_tokens_seen": 102365136, + "step": 151865 + }, + { + "epoch": 3.710209366525786, + "grad_norm": 0.09669873118400574, + "learning_rate": 3.787529576550008e-07, + "loss": 0.0, + "num_input_tokens_seen": 102368400, + "step": 151870 + }, + { + "epoch": 3.7103315173576332, + "grad_norm": 0.012280414812266827, + "learning_rate": 3.786861351272702e-07, + "loss": 0.0, + "num_input_tokens_seen": 102371728, + "step": 151875 + }, + { + "epoch": 3.7104536681894804, + "grad_norm": 0.00038101509562693536, + "learning_rate": 3.786193171179014e-07, + "loss": 0.0, + "num_input_tokens_seen": 102374992, + "step": 151880 + }, + { + "epoch": 3.7105758190213276, + "grad_norm": 0.9798285961151123, + "learning_rate": 3.7855250362738014e-07, + "loss": 0.0004, + "num_input_tokens_seen": 102378576, + "step": 151885 + }, + { + "epoch": 3.710697969853175, + "grad_norm": 0.00015900557627901435, + "learning_rate": 3.7848569465619187e-07, + "loss": 0.0, + "num_input_tokens_seen": 102381840, + "step": 151890 + }, + { + "epoch": 3.710820120685022, + "grad_norm": 0.001267539686523378, + "learning_rate": 3.7841889020482307e-07, + "loss": 0.0, + "num_input_tokens_seen": 102385232, + "step": 151895 + }, + { + "epoch": 3.710942271516869, + "grad_norm": 0.0014602263690903783, + "learning_rate": 3.7835209027375894e-07, + "loss": 0.0, + "num_input_tokens_seen": 102388304, + "step": 151900 + }, + { + "epoch": 3.7110644223487164, + "grad_norm": 0.00064032559748739, + "learning_rate": 3.7828529486348604e-07, + "loss": 0.0384, + "num_input_tokens_seen": 102391632, + "step": 151905 + }, + { + "epoch": 3.711186573180563, + "grad_norm": 0.006628748960793018, + "learning_rate": 3.782185039744893e-07, + "loss": 0.0, + "num_input_tokens_seen": 102394768, + "step": 151910 + }, + { + "epoch": 3.7113087240124107, + "grad_norm": 0.0025127448607236147, + "learning_rate": 3.781517176072548e-07, + "loss": 0.0, + "num_input_tokens_seen": 102397840, + "step": 151915 + }, + { + "epoch": 3.7114308748442575, + "grad_norm": 0.0008406085544265807, + "learning_rate": 3.7808493576226863e-07, + "loss": 0.0, + "num_input_tokens_seen": 102401104, + "step": 151920 + }, + { + "epoch": 3.711553025676105, + "grad_norm": 0.0004238552937749773, + "learning_rate": 3.780181584400158e-07, + "loss": 0.0293, + "num_input_tokens_seen": 102404752, + "step": 151925 + }, + { + "epoch": 3.711675176507952, + "grad_norm": 0.22442388534545898, + "learning_rate": 3.779513856409825e-07, + "loss": 0.0, + "num_input_tokens_seen": 102407952, + "step": 151930 + }, + { + "epoch": 3.711797327339799, + "grad_norm": 0.034784946590662, + "learning_rate": 3.778846173656538e-07, + "loss": 0.0002, + "num_input_tokens_seen": 102411088, + "step": 151935 + }, + { + "epoch": 3.7119194781716462, + "grad_norm": 0.0021735504269599915, + "learning_rate": 3.778178536145157e-07, + "loss": 0.0, + "num_input_tokens_seen": 102414096, + "step": 151940 + }, + { + "epoch": 3.7120416290034934, + "grad_norm": 0.004135854076594114, + "learning_rate": 3.777510943880532e-07, + "loss": 0.0009, + "num_input_tokens_seen": 102417168, + "step": 151945 + }, + { + "epoch": 3.7121637798353406, + "grad_norm": 0.00016273342771455646, + "learning_rate": 3.776843396867522e-07, + "loss": 0.0, + "num_input_tokens_seen": 102420752, + "step": 151950 + }, + { + "epoch": 3.712285930667188, + "grad_norm": 0.0016355100087821484, + "learning_rate": 3.7761758951109836e-07, + "loss": 0.0, + "num_input_tokens_seen": 102424144, + "step": 151955 + }, + { + "epoch": 3.712408081499035, + "grad_norm": 0.003920779097825289, + "learning_rate": 3.7755084386157643e-07, + "loss": 0.0, + "num_input_tokens_seen": 102427280, + "step": 151960 + }, + { + "epoch": 3.712530232330882, + "grad_norm": 0.0015011931536719203, + "learning_rate": 3.7748410273867247e-07, + "loss": 0.0, + "num_input_tokens_seen": 102431184, + "step": 151965 + }, + { + "epoch": 3.7126523831627294, + "grad_norm": 0.0028277873061597347, + "learning_rate": 3.7741736614287135e-07, + "loss": 0.0, + "num_input_tokens_seen": 102434448, + "step": 151970 + }, + { + "epoch": 3.7127745339945766, + "grad_norm": 0.00037950114347040653, + "learning_rate": 3.7735063407465886e-07, + "loss": 0.0001, + "num_input_tokens_seen": 102437648, + "step": 151975 + }, + { + "epoch": 3.7128966848264238, + "grad_norm": 0.00014305794320534915, + "learning_rate": 3.772839065345197e-07, + "loss": 0.0, + "num_input_tokens_seen": 102440912, + "step": 151980 + }, + { + "epoch": 3.713018835658271, + "grad_norm": 0.00024351131287403405, + "learning_rate": 3.7721718352293976e-07, + "loss": 0.0004, + "num_input_tokens_seen": 102444048, + "step": 151985 + }, + { + "epoch": 3.713140986490118, + "grad_norm": 0.0007917290786281228, + "learning_rate": 3.7715046504040406e-07, + "loss": 0.0, + "num_input_tokens_seen": 102447568, + "step": 151990 + }, + { + "epoch": 3.713263137321965, + "grad_norm": 0.0008913589990697801, + "learning_rate": 3.770837510873972e-07, + "loss": 0.0, + "num_input_tokens_seen": 102450960, + "step": 151995 + }, + { + "epoch": 3.7133852881538125, + "grad_norm": 4.2157516872975975e-05, + "learning_rate": 3.770170416644054e-07, + "loss": 0.0, + "num_input_tokens_seen": 102454928, + "step": 152000 + }, + { + "epoch": 3.7135074389856593, + "grad_norm": 0.003875978058204055, + "learning_rate": 3.7695033677191277e-07, + "loss": 0.0, + "num_input_tokens_seen": 102458384, + "step": 152005 + }, + { + "epoch": 3.713629589817507, + "grad_norm": 27.588912963867188, + "learning_rate": 3.7688363641040486e-07, + "loss": 0.0359, + "num_input_tokens_seen": 102461648, + "step": 152010 + }, + { + "epoch": 3.7137517406493536, + "grad_norm": 0.00013868967653252184, + "learning_rate": 3.7681694058036715e-07, + "loss": 0.0, + "num_input_tokens_seen": 102465488, + "step": 152015 + }, + { + "epoch": 3.713873891481201, + "grad_norm": 0.001578146475367248, + "learning_rate": 3.7675024928228393e-07, + "loss": 0.0, + "num_input_tokens_seen": 102469008, + "step": 152020 + }, + { + "epoch": 3.713996042313048, + "grad_norm": 0.0010178780648857355, + "learning_rate": 3.7668356251664077e-07, + "loss": 0.0, + "num_input_tokens_seen": 102472656, + "step": 152025 + }, + { + "epoch": 3.714118193144895, + "grad_norm": 0.00019512797007337213, + "learning_rate": 3.766168802839221e-07, + "loss": 0.0, + "num_input_tokens_seen": 102476112, + "step": 152030 + }, + { + "epoch": 3.7142403439767424, + "grad_norm": 0.7737483382225037, + "learning_rate": 3.765502025846132e-07, + "loss": 0.0001, + "num_input_tokens_seen": 102479440, + "step": 152035 + }, + { + "epoch": 3.7143624948085896, + "grad_norm": 0.0016303518787026405, + "learning_rate": 3.7648352941919924e-07, + "loss": 0.0, + "num_input_tokens_seen": 102482576, + "step": 152040 + }, + { + "epoch": 3.714484645640437, + "grad_norm": 0.0010339925065636635, + "learning_rate": 3.764168607881644e-07, + "loss": 0.0001, + "num_input_tokens_seen": 102485584, + "step": 152045 + }, + { + "epoch": 3.714606796472284, + "grad_norm": 0.0014051726320758462, + "learning_rate": 3.763501966919942e-07, + "loss": 0.0001, + "num_input_tokens_seen": 102488848, + "step": 152050 + }, + { + "epoch": 3.714728947304131, + "grad_norm": 0.0021041228901594877, + "learning_rate": 3.762835371311728e-07, + "loss": 0.1307, + "num_input_tokens_seen": 102492176, + "step": 152055 + }, + { + "epoch": 3.7148510981359784, + "grad_norm": 0.01120474748313427, + "learning_rate": 3.762168821061856e-07, + "loss": 0.0003, + "num_input_tokens_seen": 102495184, + "step": 152060 + }, + { + "epoch": 3.7149732489678255, + "grad_norm": 0.00018623789947014302, + "learning_rate": 3.761502316175167e-07, + "loss": 0.0, + "num_input_tokens_seen": 102498768, + "step": 152065 + }, + { + "epoch": 3.7150953997996727, + "grad_norm": 0.0017337838653475046, + "learning_rate": 3.760835856656511e-07, + "loss": 0.0, + "num_input_tokens_seen": 102501712, + "step": 152070 + }, + { + "epoch": 3.71521755063152, + "grad_norm": 0.0008111335337162018, + "learning_rate": 3.760169442510738e-07, + "loss": 0.0, + "num_input_tokens_seen": 102505360, + "step": 152075 + }, + { + "epoch": 3.715339701463367, + "grad_norm": 0.2772418260574341, + "learning_rate": 3.7595030737426916e-07, + "loss": 0.062, + "num_input_tokens_seen": 102508560, + "step": 152080 + }, + { + "epoch": 3.7154618522952143, + "grad_norm": 0.00469400966539979, + "learning_rate": 3.758836750357213e-07, + "loss": 0.0, + "num_input_tokens_seen": 102511824, + "step": 152085 + }, + { + "epoch": 3.715584003127061, + "grad_norm": 0.0017776049207895994, + "learning_rate": 3.758170472359156e-07, + "loss": 0.0, + "num_input_tokens_seen": 102515024, + "step": 152090 + }, + { + "epoch": 3.7157061539589087, + "grad_norm": 0.002591691678389907, + "learning_rate": 3.7575042397533627e-07, + "loss": 0.0008, + "num_input_tokens_seen": 102518096, + "step": 152095 + }, + { + "epoch": 3.7158283047907554, + "grad_norm": 0.00019121443619951606, + "learning_rate": 3.756838052544674e-07, + "loss": 0.0433, + "num_input_tokens_seen": 102521488, + "step": 152100 + }, + { + "epoch": 3.715950455622603, + "grad_norm": 0.0015384487342089415, + "learning_rate": 3.756171910737938e-07, + "loss": 0.0, + "num_input_tokens_seen": 102525008, + "step": 152105 + }, + { + "epoch": 3.71607260645445, + "grad_norm": 0.011667652055621147, + "learning_rate": 3.7555058143380024e-07, + "loss": 0.0, + "num_input_tokens_seen": 102528016, + "step": 152110 + }, + { + "epoch": 3.716194757286297, + "grad_norm": 0.00015528489893767983, + "learning_rate": 3.754839763349704e-07, + "loss": 0.0, + "num_input_tokens_seen": 102531536, + "step": 152115 + }, + { + "epoch": 3.716316908118144, + "grad_norm": 0.0011144173331558704, + "learning_rate": 3.7541737577778956e-07, + "loss": 0.0, + "num_input_tokens_seen": 102535120, + "step": 152120 + }, + { + "epoch": 3.7164390589499914, + "grad_norm": 0.00029413026641122997, + "learning_rate": 3.753507797627412e-07, + "loss": 0.0, + "num_input_tokens_seen": 102538640, + "step": 152125 + }, + { + "epoch": 3.7165612097818386, + "grad_norm": 0.00037484162021428347, + "learning_rate": 3.7528418829030986e-07, + "loss": 0.0, + "num_input_tokens_seen": 102541904, + "step": 152130 + }, + { + "epoch": 3.7166833606136858, + "grad_norm": 0.004265445750206709, + "learning_rate": 3.752176013609804e-07, + "loss": 0.0, + "num_input_tokens_seen": 102545360, + "step": 152135 + }, + { + "epoch": 3.716805511445533, + "grad_norm": 0.0016600849339738488, + "learning_rate": 3.7515101897523616e-07, + "loss": 0.0, + "num_input_tokens_seen": 102548944, + "step": 152140 + }, + { + "epoch": 3.71692766227738, + "grad_norm": 0.0005052868509665132, + "learning_rate": 3.750844411335622e-07, + "loss": 0.0, + "num_input_tokens_seen": 102552464, + "step": 152145 + }, + { + "epoch": 3.7170498131092273, + "grad_norm": 0.0002263938804389909, + "learning_rate": 3.7501786783644183e-07, + "loss": 0.0, + "num_input_tokens_seen": 102556048, + "step": 152150 + }, + { + "epoch": 3.7171719639410745, + "grad_norm": 0.0004822145856451243, + "learning_rate": 3.7495129908436e-07, + "loss": 0.0, + "num_input_tokens_seen": 102559504, + "step": 152155 + }, + { + "epoch": 3.7172941147729217, + "grad_norm": 0.01699310727417469, + "learning_rate": 3.7488473487780004e-07, + "loss": 0.0001, + "num_input_tokens_seen": 102562768, + "step": 152160 + }, + { + "epoch": 3.717416265604769, + "grad_norm": 8.816047920845449e-05, + "learning_rate": 3.7481817521724655e-07, + "loss": 0.0, + "num_input_tokens_seen": 102565904, + "step": 152165 + }, + { + "epoch": 3.717538416436616, + "grad_norm": 0.003461300628259778, + "learning_rate": 3.7475162010318374e-07, + "loss": 0.0, + "num_input_tokens_seen": 102569232, + "step": 152170 + }, + { + "epoch": 3.717660567268463, + "grad_norm": 0.00039457378443330526, + "learning_rate": 3.746850695360949e-07, + "loss": 0.0, + "num_input_tokens_seen": 102572496, + "step": 152175 + }, + { + "epoch": 3.7177827181003105, + "grad_norm": 0.0001832624984672293, + "learning_rate": 3.7461852351646483e-07, + "loss": 0.0, + "num_input_tokens_seen": 102576016, + "step": 152180 + }, + { + "epoch": 3.717904868932157, + "grad_norm": 0.0015027448534965515, + "learning_rate": 3.745519820447768e-07, + "loss": 0.0002, + "num_input_tokens_seen": 102579216, + "step": 152185 + }, + { + "epoch": 3.718027019764005, + "grad_norm": 0.0005207133945077658, + "learning_rate": 3.7448544512151514e-07, + "loss": 0.0234, + "num_input_tokens_seen": 102582480, + "step": 152190 + }, + { + "epoch": 3.7181491705958516, + "grad_norm": 0.0003728516458068043, + "learning_rate": 3.7441891274716375e-07, + "loss": 0.0, + "num_input_tokens_seen": 102586064, + "step": 152195 + }, + { + "epoch": 3.718271321427699, + "grad_norm": 0.00019803232862614095, + "learning_rate": 3.743523849222059e-07, + "loss": 0.0, + "num_input_tokens_seen": 102589456, + "step": 152200 + }, + { + "epoch": 3.718393472259546, + "grad_norm": 0.008787796832621098, + "learning_rate": 3.7428586164712604e-07, + "loss": 0.0, + "num_input_tokens_seen": 102592976, + "step": 152205 + }, + { + "epoch": 3.718515623091393, + "grad_norm": 0.00048126160982064903, + "learning_rate": 3.742193429224074e-07, + "loss": 0.0, + "num_input_tokens_seen": 102596112, + "step": 152210 + }, + { + "epoch": 3.7186377739232404, + "grad_norm": 0.001404481241479516, + "learning_rate": 3.741528287485344e-07, + "loss": 0.0, + "num_input_tokens_seen": 102599504, + "step": 152215 + }, + { + "epoch": 3.7187599247550875, + "grad_norm": 0.0026575650554150343, + "learning_rate": 3.7408631912599e-07, + "loss": 0.0, + "num_input_tokens_seen": 102602640, + "step": 152220 + }, + { + "epoch": 3.7188820755869347, + "grad_norm": 0.013400964438915253, + "learning_rate": 3.740198140552582e-07, + "loss": 0.0, + "num_input_tokens_seen": 102606544, + "step": 152225 + }, + { + "epoch": 3.719004226418782, + "grad_norm": 0.0002321565116290003, + "learning_rate": 3.7395331353682305e-07, + "loss": 0.0, + "num_input_tokens_seen": 102609488, + "step": 152230 + }, + { + "epoch": 3.719126377250629, + "grad_norm": 0.00038578774547204375, + "learning_rate": 3.7388681757116736e-07, + "loss": 0.0, + "num_input_tokens_seen": 102612688, + "step": 152235 + }, + { + "epoch": 3.7192485280824763, + "grad_norm": 51.554908752441406, + "learning_rate": 3.7382032615877554e-07, + "loss": 0.0631, + "num_input_tokens_seen": 102616528, + "step": 152240 + }, + { + "epoch": 3.7193706789143235, + "grad_norm": 0.002198802540078759, + "learning_rate": 3.7375383930013037e-07, + "loss": 0.0462, + "num_input_tokens_seen": 102619536, + "step": 152245 + }, + { + "epoch": 3.7194928297461707, + "grad_norm": 0.000260370085015893, + "learning_rate": 3.736873569957162e-07, + "loss": 0.0383, + "num_input_tokens_seen": 102622608, + "step": 152250 + }, + { + "epoch": 3.719614980578018, + "grad_norm": 0.010937336832284927, + "learning_rate": 3.736208792460156e-07, + "loss": 0.0, + "num_input_tokens_seen": 102626320, + "step": 152255 + }, + { + "epoch": 3.719737131409865, + "grad_norm": 0.00016129412688314915, + "learning_rate": 3.7355440605151236e-07, + "loss": 0.0414, + "num_input_tokens_seen": 102629968, + "step": 152260 + }, + { + "epoch": 3.7198592822417123, + "grad_norm": 0.0022476690355688334, + "learning_rate": 3.7348793741269036e-07, + "loss": 0.0, + "num_input_tokens_seen": 102633232, + "step": 152265 + }, + { + "epoch": 3.719981433073559, + "grad_norm": 0.04069744423031807, + "learning_rate": 3.7342147333003227e-07, + "loss": 0.0, + "num_input_tokens_seen": 102636432, + "step": 152270 + }, + { + "epoch": 3.7201035839054066, + "grad_norm": 30.509748458862305, + "learning_rate": 3.733550138040221e-07, + "loss": 0.081, + "num_input_tokens_seen": 102639696, + "step": 152275 + }, + { + "epoch": 3.7202257347372534, + "grad_norm": 0.00010525318066356704, + "learning_rate": 3.7328855883514244e-07, + "loss": 0.0, + "num_input_tokens_seen": 102642896, + "step": 152280 + }, + { + "epoch": 3.7203478855691006, + "grad_norm": 0.016809474676847458, + "learning_rate": 3.7322210842387734e-07, + "loss": 0.0001, + "num_input_tokens_seen": 102645840, + "step": 152285 + }, + { + "epoch": 3.7204700364009478, + "grad_norm": 0.023841066285967827, + "learning_rate": 3.731556625707093e-07, + "loss": 0.0, + "num_input_tokens_seen": 102649104, + "step": 152290 + }, + { + "epoch": 3.720592187232795, + "grad_norm": 0.0014904678100720048, + "learning_rate": 3.730892212761222e-07, + "loss": 0.0, + "num_input_tokens_seen": 102652176, + "step": 152295 + }, + { + "epoch": 3.720714338064642, + "grad_norm": 0.003293322864919901, + "learning_rate": 3.730227845405989e-07, + "loss": 0.0003, + "num_input_tokens_seen": 102655120, + "step": 152300 + }, + { + "epoch": 3.7208364888964893, + "grad_norm": 0.007259721867740154, + "learning_rate": 3.729563523646222e-07, + "loss": 0.0, + "num_input_tokens_seen": 102658448, + "step": 152305 + }, + { + "epoch": 3.7209586397283365, + "grad_norm": 0.00021232757717370987, + "learning_rate": 3.72889924748676e-07, + "loss": 0.0, + "num_input_tokens_seen": 102661520, + "step": 152310 + }, + { + "epoch": 3.7210807905601837, + "grad_norm": 0.001801141886971891, + "learning_rate": 3.728235016932425e-07, + "loss": 0.0, + "num_input_tokens_seen": 102664592, + "step": 152315 + }, + { + "epoch": 3.721202941392031, + "grad_norm": 0.0009091881802305579, + "learning_rate": 3.7275708319880516e-07, + "loss": 0.0, + "num_input_tokens_seen": 102668048, + "step": 152320 + }, + { + "epoch": 3.721325092223878, + "grad_norm": 0.03039466217160225, + "learning_rate": 3.7269066926584746e-07, + "loss": 0.0, + "num_input_tokens_seen": 102671504, + "step": 152325 + }, + { + "epoch": 3.7214472430557253, + "grad_norm": 0.008603163994848728, + "learning_rate": 3.7262425989485145e-07, + "loss": 0.0, + "num_input_tokens_seen": 102675152, + "step": 152330 + }, + { + "epoch": 3.7215693938875725, + "grad_norm": 0.0013472189893946052, + "learning_rate": 3.725578550863011e-07, + "loss": 0.0402, + "num_input_tokens_seen": 102678544, + "step": 152335 + }, + { + "epoch": 3.7216915447194197, + "grad_norm": 0.00042434909846633673, + "learning_rate": 3.7249145484067835e-07, + "loss": 0.0, + "num_input_tokens_seen": 102681872, + "step": 152340 + }, + { + "epoch": 3.721813695551267, + "grad_norm": 0.004947059787809849, + "learning_rate": 3.7242505915846677e-07, + "loss": 0.0, + "num_input_tokens_seen": 102684880, + "step": 152345 + }, + { + "epoch": 3.721935846383114, + "grad_norm": 0.00031549998675473034, + "learning_rate": 3.723586680401487e-07, + "loss": 0.0, + "num_input_tokens_seen": 102687888, + "step": 152350 + }, + { + "epoch": 3.722057997214961, + "grad_norm": 0.0046067675575613976, + "learning_rate": 3.7229228148620726e-07, + "loss": 0.0, + "num_input_tokens_seen": 102691536, + "step": 152355 + }, + { + "epoch": 3.7221801480468084, + "grad_norm": 0.00043726101284846663, + "learning_rate": 3.722258994971255e-07, + "loss": 0.0, + "num_input_tokens_seen": 102694800, + "step": 152360 + }, + { + "epoch": 3.722302298878655, + "grad_norm": 0.0015163227217271924, + "learning_rate": 3.7215952207338543e-07, + "loss": 0.0, + "num_input_tokens_seen": 102698576, + "step": 152365 + }, + { + "epoch": 3.722424449710503, + "grad_norm": 0.07668344676494598, + "learning_rate": 3.7209314921547066e-07, + "loss": 0.0, + "num_input_tokens_seen": 102701840, + "step": 152370 + }, + { + "epoch": 3.7225466005423495, + "grad_norm": 0.0085446797311306, + "learning_rate": 3.7202678092386296e-07, + "loss": 0.0, + "num_input_tokens_seen": 102705296, + "step": 152375 + }, + { + "epoch": 3.7226687513741967, + "grad_norm": 0.0001068677956936881, + "learning_rate": 3.7196041719904536e-07, + "loss": 0.0, + "num_input_tokens_seen": 102709776, + "step": 152380 + }, + { + "epoch": 3.722790902206044, + "grad_norm": 0.0018057903507724404, + "learning_rate": 3.71894058041501e-07, + "loss": 0.0001, + "num_input_tokens_seen": 102712912, + "step": 152385 + }, + { + "epoch": 3.722913053037891, + "grad_norm": 0.0006664296961389482, + "learning_rate": 3.7182770345171187e-07, + "loss": 0.0, + "num_input_tokens_seen": 102716240, + "step": 152390 + }, + { + "epoch": 3.7230352038697383, + "grad_norm": 0.010751817375421524, + "learning_rate": 3.7176135343016036e-07, + "loss": 0.0, + "num_input_tokens_seen": 102719248, + "step": 152395 + }, + { + "epoch": 3.7231573547015855, + "grad_norm": 0.002380897058174014, + "learning_rate": 3.7169500797732966e-07, + "loss": 0.0, + "num_input_tokens_seen": 102722384, + "step": 152400 + }, + { + "epoch": 3.7232795055334327, + "grad_norm": 0.00012997696467209607, + "learning_rate": 3.716286670937018e-07, + "loss": 0.0435, + "num_input_tokens_seen": 102725328, + "step": 152405 + }, + { + "epoch": 3.72340165636528, + "grad_norm": 0.0108709204941988, + "learning_rate": 3.7156233077975895e-07, + "loss": 0.0566, + "num_input_tokens_seen": 102728400, + "step": 152410 + }, + { + "epoch": 3.723523807197127, + "grad_norm": 77.4952163696289, + "learning_rate": 3.714959990359838e-07, + "loss": 0.0031, + "num_input_tokens_seen": 102731728, + "step": 152415 + }, + { + "epoch": 3.7236459580289742, + "grad_norm": 0.11548440158367157, + "learning_rate": 3.7142967186285924e-07, + "loss": 0.0, + "num_input_tokens_seen": 102735952, + "step": 152420 + }, + { + "epoch": 3.7237681088608214, + "grad_norm": 0.007672048639506102, + "learning_rate": 3.7136334926086676e-07, + "loss": 0.0001, + "num_input_tokens_seen": 102739408, + "step": 152425 + }, + { + "epoch": 3.7238902596926686, + "grad_norm": 0.005651409272104502, + "learning_rate": 3.712970312304894e-07, + "loss": 0.0001, + "num_input_tokens_seen": 102742800, + "step": 152430 + }, + { + "epoch": 3.724012410524516, + "grad_norm": 0.00011328620894346386, + "learning_rate": 3.7123071777220884e-07, + "loss": 0.0012, + "num_input_tokens_seen": 102746000, + "step": 152435 + }, + { + "epoch": 3.7241345613563626, + "grad_norm": 0.00017487969307694584, + "learning_rate": 3.711644088865076e-07, + "loss": 0.0, + "num_input_tokens_seen": 102749392, + "step": 152440 + }, + { + "epoch": 3.72425671218821, + "grad_norm": 5.3928306442685425e-05, + "learning_rate": 3.7109810457386825e-07, + "loss": 0.0, + "num_input_tokens_seen": 102752592, + "step": 152445 + }, + { + "epoch": 3.724378863020057, + "grad_norm": 0.00017148379993159324, + "learning_rate": 3.7103180483477234e-07, + "loss": 0.0001, + "num_input_tokens_seen": 102755920, + "step": 152450 + }, + { + "epoch": 3.7245010138519046, + "grad_norm": 0.0005869636661373079, + "learning_rate": 3.7096550966970264e-07, + "loss": 0.0, + "num_input_tokens_seen": 102759504, + "step": 152455 + }, + { + "epoch": 3.7246231646837513, + "grad_norm": 0.0002772973384708166, + "learning_rate": 3.7089921907914056e-07, + "loss": 0.0, + "num_input_tokens_seen": 102762960, + "step": 152460 + }, + { + "epoch": 3.7247453155155985, + "grad_norm": 0.00029023404931649566, + "learning_rate": 3.70832933063569e-07, + "loss": 0.0882, + "num_input_tokens_seen": 102766224, + "step": 152465 + }, + { + "epoch": 3.7248674663474457, + "grad_norm": 0.0037888879887759686, + "learning_rate": 3.707666516234692e-07, + "loss": 0.0, + "num_input_tokens_seen": 102769424, + "step": 152470 + }, + { + "epoch": 3.724989617179293, + "grad_norm": 0.003013129811733961, + "learning_rate": 3.7070037475932346e-07, + "loss": 0.0, + "num_input_tokens_seen": 102772496, + "step": 152475 + }, + { + "epoch": 3.72511176801114, + "grad_norm": 0.0013928675325587392, + "learning_rate": 3.706341024716143e-07, + "loss": 0.0, + "num_input_tokens_seen": 102776080, + "step": 152480 + }, + { + "epoch": 3.7252339188429873, + "grad_norm": 0.012922985479235649, + "learning_rate": 3.705678347608229e-07, + "loss": 0.0, + "num_input_tokens_seen": 102779408, + "step": 152485 + }, + { + "epoch": 3.7253560696748345, + "grad_norm": 0.0011642073513939977, + "learning_rate": 3.705015716274318e-07, + "loss": 0.0, + "num_input_tokens_seen": 102782864, + "step": 152490 + }, + { + "epoch": 3.7254782205066816, + "grad_norm": 0.0006689711008220911, + "learning_rate": 3.704353130719222e-07, + "loss": 0.0, + "num_input_tokens_seen": 102786128, + "step": 152495 + }, + { + "epoch": 3.725600371338529, + "grad_norm": 47.64103698730469, + "learning_rate": 3.7036905909477666e-07, + "loss": 0.1, + "num_input_tokens_seen": 102789520, + "step": 152500 + }, + { + "epoch": 3.725722522170376, + "grad_norm": 0.0068311383947730064, + "learning_rate": 3.7030280969647676e-07, + "loss": 0.0, + "num_input_tokens_seen": 102792592, + "step": 152505 + }, + { + "epoch": 3.725844673002223, + "grad_norm": 0.044286441057920456, + "learning_rate": 3.702365648775039e-07, + "loss": 0.0383, + "num_input_tokens_seen": 102796048, + "step": 152510 + }, + { + "epoch": 3.7259668238340704, + "grad_norm": 0.002478921553120017, + "learning_rate": 3.701703246383403e-07, + "loss": 0.0, + "num_input_tokens_seen": 102799504, + "step": 152515 + }, + { + "epoch": 3.7260889746659176, + "grad_norm": 0.0020042003598064184, + "learning_rate": 3.701040889794673e-07, + "loss": 0.0728, + "num_input_tokens_seen": 102802384, + "step": 152520 + }, + { + "epoch": 3.726211125497765, + "grad_norm": 0.0006063705077394843, + "learning_rate": 3.700378579013671e-07, + "loss": 0.0, + "num_input_tokens_seen": 102805840, + "step": 152525 + }, + { + "epoch": 3.726333276329612, + "grad_norm": 0.00019261521811131388, + "learning_rate": 3.699716314045207e-07, + "loss": 0.0, + "num_input_tokens_seen": 102808976, + "step": 152530 + }, + { + "epoch": 3.7264554271614587, + "grad_norm": 0.00046472440590150654, + "learning_rate": 3.6990540948940995e-07, + "loss": 0.0, + "num_input_tokens_seen": 102812240, + "step": 152535 + }, + { + "epoch": 3.7265775779933064, + "grad_norm": 0.0002507887838874012, + "learning_rate": 3.6983919215651704e-07, + "loss": 0.0, + "num_input_tokens_seen": 102815504, + "step": 152540 + }, + { + "epoch": 3.726699728825153, + "grad_norm": 0.00016412966942880303, + "learning_rate": 3.6977297940632257e-07, + "loss": 0.0001, + "num_input_tokens_seen": 102818512, + "step": 152545 + }, + { + "epoch": 3.7268218796570007, + "grad_norm": 0.0005654366686940193, + "learning_rate": 3.697067712393088e-07, + "loss": 0.0, + "num_input_tokens_seen": 102821584, + "step": 152550 + }, + { + "epoch": 3.7269440304888475, + "grad_norm": 0.0047607459127902985, + "learning_rate": 3.696405676559567e-07, + "loss": 0.0, + "num_input_tokens_seen": 102825040, + "step": 152555 + }, + { + "epoch": 3.7270661813206947, + "grad_norm": 0.0005605295300483704, + "learning_rate": 3.6957436865674817e-07, + "loss": 0.0, + "num_input_tokens_seen": 102828304, + "step": 152560 + }, + { + "epoch": 3.727188332152542, + "grad_norm": 0.0003506525536067784, + "learning_rate": 3.695081742421642e-07, + "loss": 0.0, + "num_input_tokens_seen": 102831824, + "step": 152565 + }, + { + "epoch": 3.727310482984389, + "grad_norm": 0.0026996464002877474, + "learning_rate": 3.6944198441268626e-07, + "loss": 0.0, + "num_input_tokens_seen": 102835024, + "step": 152570 + }, + { + "epoch": 3.7274326338162362, + "grad_norm": 0.0029354402795434, + "learning_rate": 3.6937579916879614e-07, + "loss": 0.0, + "num_input_tokens_seen": 102838288, + "step": 152575 + }, + { + "epoch": 3.7275547846480834, + "grad_norm": 0.0009059472940862179, + "learning_rate": 3.6930961851097454e-07, + "loss": 0.0, + "num_input_tokens_seen": 102841232, + "step": 152580 + }, + { + "epoch": 3.7276769354799306, + "grad_norm": 3.0146076824166812e-05, + "learning_rate": 3.692434424397033e-07, + "loss": 0.0001, + "num_input_tokens_seen": 102844816, + "step": 152585 + }, + { + "epoch": 3.727799086311778, + "grad_norm": 0.012793498113751411, + "learning_rate": 3.6917727095546314e-07, + "loss": 0.0, + "num_input_tokens_seen": 102847888, + "step": 152590 + }, + { + "epoch": 3.727921237143625, + "grad_norm": 0.0031087123788893223, + "learning_rate": 3.691111040587358e-07, + "loss": 0.0001, + "num_input_tokens_seen": 102850896, + "step": 152595 + }, + { + "epoch": 3.728043387975472, + "grad_norm": 0.0017248626099899411, + "learning_rate": 3.69044941750002e-07, + "loss": 0.0, + "num_input_tokens_seen": 102854032, + "step": 152600 + }, + { + "epoch": 3.7281655388073194, + "grad_norm": 0.02217789925634861, + "learning_rate": 3.6897878402974324e-07, + "loss": 0.0, + "num_input_tokens_seen": 102857296, + "step": 152605 + }, + { + "epoch": 3.7282876896391666, + "grad_norm": 0.0074479603208601475, + "learning_rate": 3.689126308984406e-07, + "loss": 0.0, + "num_input_tokens_seen": 102860624, + "step": 152610 + }, + { + "epoch": 3.7284098404710138, + "grad_norm": 0.04629134759306908, + "learning_rate": 3.688464823565747e-07, + "loss": 0.0335, + "num_input_tokens_seen": 102864528, + "step": 152615 + }, + { + "epoch": 3.7285319913028605, + "grad_norm": 0.0012515024282038212, + "learning_rate": 3.687803384046273e-07, + "loss": 0.0001, + "num_input_tokens_seen": 102867728, + "step": 152620 + }, + { + "epoch": 3.728654142134708, + "grad_norm": 0.0010610457975417376, + "learning_rate": 3.687141990430787e-07, + "loss": 0.0, + "num_input_tokens_seen": 102871248, + "step": 152625 + }, + { + "epoch": 3.728776292966555, + "grad_norm": 0.017728229984641075, + "learning_rate": 3.686480642724102e-07, + "loss": 0.0, + "num_input_tokens_seen": 102874448, + "step": 152630 + }, + { + "epoch": 3.7288984437984025, + "grad_norm": 0.0009284796542488039, + "learning_rate": 3.685819340931031e-07, + "loss": 0.0, + "num_input_tokens_seen": 102877584, + "step": 152635 + }, + { + "epoch": 3.7290205946302493, + "grad_norm": 0.005502808839082718, + "learning_rate": 3.685158085056378e-07, + "loss": 0.0, + "num_input_tokens_seen": 102880976, + "step": 152640 + }, + { + "epoch": 3.7291427454620965, + "grad_norm": 0.003437718376517296, + "learning_rate": 3.6844968751049566e-07, + "loss": 0.0388, + "num_input_tokens_seen": 102884432, + "step": 152645 + }, + { + "epoch": 3.7292648962939436, + "grad_norm": 0.0014162580482661724, + "learning_rate": 3.683835711081569e-07, + "loss": 0.0063, + "num_input_tokens_seen": 102887760, + "step": 152650 + }, + { + "epoch": 3.729387047125791, + "grad_norm": 0.05147543549537659, + "learning_rate": 3.6831745929910306e-07, + "loss": 0.0, + "num_input_tokens_seen": 102890896, + "step": 152655 + }, + { + "epoch": 3.729509197957638, + "grad_norm": 24.238784790039062, + "learning_rate": 3.682513520838142e-07, + "loss": 0.0893, + "num_input_tokens_seen": 102894288, + "step": 152660 + }, + { + "epoch": 3.729631348789485, + "grad_norm": 0.0032417848706245422, + "learning_rate": 3.681852494627714e-07, + "loss": 0.0728, + "num_input_tokens_seen": 102897552, + "step": 152665 + }, + { + "epoch": 3.7297534996213324, + "grad_norm": 0.0010357864666730165, + "learning_rate": 3.681191514364558e-07, + "loss": 0.0, + "num_input_tokens_seen": 102901072, + "step": 152670 + }, + { + "epoch": 3.7298756504531796, + "grad_norm": 0.0016148955328390002, + "learning_rate": 3.6805305800534726e-07, + "loss": 0.0, + "num_input_tokens_seen": 102905488, + "step": 152675 + }, + { + "epoch": 3.729997801285027, + "grad_norm": 0.0070404973812401295, + "learning_rate": 3.679869691699273e-07, + "loss": 0.0, + "num_input_tokens_seen": 102908752, + "step": 152680 + }, + { + "epoch": 3.730119952116874, + "grad_norm": 0.00028679074603132904, + "learning_rate": 3.6792088493067576e-07, + "loss": 0.0, + "num_input_tokens_seen": 102911760, + "step": 152685 + }, + { + "epoch": 3.730242102948721, + "grad_norm": 0.0007746733026579022, + "learning_rate": 3.6785480528807343e-07, + "loss": 0.0, + "num_input_tokens_seen": 102914832, + "step": 152690 + }, + { + "epoch": 3.7303642537805684, + "grad_norm": 0.0019843345507979393, + "learning_rate": 3.677887302426014e-07, + "loss": 0.0598, + "num_input_tokens_seen": 102918096, + "step": 152695 + }, + { + "epoch": 3.7304864046124155, + "grad_norm": 0.9899024963378906, + "learning_rate": 3.677226597947398e-07, + "loss": 0.0002, + "num_input_tokens_seen": 102921552, + "step": 152700 + }, + { + "epoch": 3.7306085554442627, + "grad_norm": 0.14979158341884613, + "learning_rate": 3.676565939449687e-07, + "loss": 0.0, + "num_input_tokens_seen": 102925072, + "step": 152705 + }, + { + "epoch": 3.73073070627611, + "grad_norm": 0.09974019974470139, + "learning_rate": 3.6759053269376927e-07, + "loss": 0.0001, + "num_input_tokens_seen": 102928848, + "step": 152710 + }, + { + "epoch": 3.7308528571079567, + "grad_norm": 0.0010769544169306755, + "learning_rate": 3.6752447604162165e-07, + "loss": 0.0001, + "num_input_tokens_seen": 102932432, + "step": 152715 + }, + { + "epoch": 3.7309750079398043, + "grad_norm": 0.05534740537405014, + "learning_rate": 3.674584239890057e-07, + "loss": 0.0, + "num_input_tokens_seen": 102935568, + "step": 152720 + }, + { + "epoch": 3.731097158771651, + "grad_norm": 0.009643294848501682, + "learning_rate": 3.673923765364022e-07, + "loss": 0.0002, + "num_input_tokens_seen": 102938576, + "step": 152725 + }, + { + "epoch": 3.7312193096034982, + "grad_norm": 0.017214003950357437, + "learning_rate": 3.67326333684292e-07, + "loss": 0.0001, + "num_input_tokens_seen": 102942160, + "step": 152730 + }, + { + "epoch": 3.7313414604353454, + "grad_norm": 0.00015420409908983856, + "learning_rate": 3.672602954331544e-07, + "loss": 0.0, + "num_input_tokens_seen": 102945296, + "step": 152735 + }, + { + "epoch": 3.7314636112671926, + "grad_norm": 0.19823694229125977, + "learning_rate": 3.671942617834705e-07, + "loss": 0.0001, + "num_input_tokens_seen": 102948816, + "step": 152740 + }, + { + "epoch": 3.73158576209904, + "grad_norm": 0.0025073799770325422, + "learning_rate": 3.671282327357198e-07, + "loss": 0.0, + "num_input_tokens_seen": 102952336, + "step": 152745 + }, + { + "epoch": 3.731707912930887, + "grad_norm": 0.016093574464321136, + "learning_rate": 3.670622082903828e-07, + "loss": 0.0, + "num_input_tokens_seen": 102955536, + "step": 152750 + }, + { + "epoch": 3.731830063762734, + "grad_norm": 0.0363631546497345, + "learning_rate": 3.6699618844794e-07, + "loss": 0.0, + "num_input_tokens_seen": 102959632, + "step": 152755 + }, + { + "epoch": 3.7319522145945814, + "grad_norm": 0.0034554863814264536, + "learning_rate": 3.6693017320887076e-07, + "loss": 0.0, + "num_input_tokens_seen": 102963536, + "step": 152760 + }, + { + "epoch": 3.7320743654264286, + "grad_norm": 0.31483185291290283, + "learning_rate": 3.6686416257365603e-07, + "loss": 0.0001, + "num_input_tokens_seen": 102966928, + "step": 152765 + }, + { + "epoch": 3.7321965162582758, + "grad_norm": 0.0005507257301360369, + "learning_rate": 3.66798156542775e-07, + "loss": 0.0, + "num_input_tokens_seen": 102970704, + "step": 152770 + }, + { + "epoch": 3.732318667090123, + "grad_norm": 0.0034630734007805586, + "learning_rate": 3.667321551167085e-07, + "loss": 0.0, + "num_input_tokens_seen": 102973648, + "step": 152775 + }, + { + "epoch": 3.73244081792197, + "grad_norm": 0.0002851722820196301, + "learning_rate": 3.666661582959357e-07, + "loss": 0.0, + "num_input_tokens_seen": 102977104, + "step": 152780 + }, + { + "epoch": 3.7325629687538173, + "grad_norm": 0.005233187228441238, + "learning_rate": 3.666001660809369e-07, + "loss": 0.0001, + "num_input_tokens_seen": 102980368, + "step": 152785 + }, + { + "epoch": 3.7326851195856645, + "grad_norm": 0.010846148245036602, + "learning_rate": 3.665341784721925e-07, + "loss": 0.0, + "num_input_tokens_seen": 102983632, + "step": 152790 + }, + { + "epoch": 3.7328072704175117, + "grad_norm": 0.00087640032870695, + "learning_rate": 3.6646819547018147e-07, + "loss": 0.0714, + "num_input_tokens_seen": 102986896, + "step": 152795 + }, + { + "epoch": 3.7329294212493584, + "grad_norm": 0.0027219816111028194, + "learning_rate": 3.6640221707538455e-07, + "loss": 0.0, + "num_input_tokens_seen": 102990224, + "step": 152800 + }, + { + "epoch": 3.733051572081206, + "grad_norm": 0.03295997157692909, + "learning_rate": 3.6633624328828085e-07, + "loss": 0.0, + "num_input_tokens_seen": 102993552, + "step": 152805 + }, + { + "epoch": 3.733173722913053, + "grad_norm": 0.01818210259079933, + "learning_rate": 3.6627027410935063e-07, + "loss": 0.0, + "num_input_tokens_seen": 102996944, + "step": 152810 + }, + { + "epoch": 3.7332958737449005, + "grad_norm": 0.008293227292597294, + "learning_rate": 3.662043095390736e-07, + "loss": 0.0, + "num_input_tokens_seen": 102999952, + "step": 152815 + }, + { + "epoch": 3.733418024576747, + "grad_norm": 0.0011386339319869876, + "learning_rate": 3.661383495779288e-07, + "loss": 0.0, + "num_input_tokens_seen": 103003280, + "step": 152820 + }, + { + "epoch": 3.7335401754085944, + "grad_norm": 0.017195656895637512, + "learning_rate": 3.660723942263967e-07, + "loss": 0.0, + "num_input_tokens_seen": 103006416, + "step": 152825 + }, + { + "epoch": 3.7336623262404416, + "grad_norm": 0.0023615004029124975, + "learning_rate": 3.660064434849565e-07, + "loss": 0.0, + "num_input_tokens_seen": 103009488, + "step": 152830 + }, + { + "epoch": 3.7337844770722888, + "grad_norm": 0.007179385516792536, + "learning_rate": 3.6594049735408816e-07, + "loss": 0.0845, + "num_input_tokens_seen": 103012496, + "step": 152835 + }, + { + "epoch": 3.733906627904136, + "grad_norm": 0.000365541287465021, + "learning_rate": 3.6587455583427074e-07, + "loss": 0.0, + "num_input_tokens_seen": 103016016, + "step": 152840 + }, + { + "epoch": 3.734028778735983, + "grad_norm": 0.004545575473457575, + "learning_rate": 3.6580861892598423e-07, + "loss": 0.0, + "num_input_tokens_seen": 103019088, + "step": 152845 + }, + { + "epoch": 3.7341509295678303, + "grad_norm": 0.1034526601433754, + "learning_rate": 3.657426866297082e-07, + "loss": 0.0002, + "num_input_tokens_seen": 103022416, + "step": 152850 + }, + { + "epoch": 3.7342730803996775, + "grad_norm": 0.0006500378367491066, + "learning_rate": 3.6567675894592174e-07, + "loss": 0.0, + "num_input_tokens_seen": 103025936, + "step": 152855 + }, + { + "epoch": 3.7343952312315247, + "grad_norm": 0.003577360650524497, + "learning_rate": 3.656108358751048e-07, + "loss": 0.012, + "num_input_tokens_seen": 103028880, + "step": 152860 + }, + { + "epoch": 3.734517382063372, + "grad_norm": 0.0014856354100629687, + "learning_rate": 3.655449174177361e-07, + "loss": 0.0, + "num_input_tokens_seen": 103032336, + "step": 152865 + }, + { + "epoch": 3.734639532895219, + "grad_norm": 0.00800647959113121, + "learning_rate": 3.6547900357429585e-07, + "loss": 0.0, + "num_input_tokens_seen": 103035728, + "step": 152870 + }, + { + "epoch": 3.7347616837270663, + "grad_norm": 0.01941361092031002, + "learning_rate": 3.654130943452625e-07, + "loss": 0.0, + "num_input_tokens_seen": 103038800, + "step": 152875 + }, + { + "epoch": 3.7348838345589135, + "grad_norm": 0.18271203339099884, + "learning_rate": 3.653471897311159e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103042576, + "step": 152880 + }, + { + "epoch": 3.7350059853907607, + "grad_norm": 0.003417690983042121, + "learning_rate": 3.6528128973233554e-07, + "loss": 0.0, + "num_input_tokens_seen": 103045904, + "step": 152885 + }, + { + "epoch": 3.735128136222608, + "grad_norm": 0.0034222102258354425, + "learning_rate": 3.652153943494e-07, + "loss": 0.0002, + "num_input_tokens_seen": 103049360, + "step": 152890 + }, + { + "epoch": 3.7352502870544546, + "grad_norm": 0.0035051971208304167, + "learning_rate": 3.6514950358278917e-07, + "loss": 0.0, + "num_input_tokens_seen": 103052752, + "step": 152895 + }, + { + "epoch": 3.7353724378863022, + "grad_norm": 0.05147337168455124, + "learning_rate": 3.6508361743298167e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103055568, + "step": 152900 + }, + { + "epoch": 3.735494588718149, + "grad_norm": 0.002973938826471567, + "learning_rate": 3.6501773590045713e-07, + "loss": 0.0643, + "num_input_tokens_seen": 103059088, + "step": 152905 + }, + { + "epoch": 3.735616739549996, + "grad_norm": 0.0009580811602063477, + "learning_rate": 3.6495185898569405e-07, + "loss": 0.0, + "num_input_tokens_seen": 103061840, + "step": 152910 + }, + { + "epoch": 3.7357388903818434, + "grad_norm": 0.1412133127450943, + "learning_rate": 3.6488598668917224e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103065360, + "step": 152915 + }, + { + "epoch": 3.7358610412136906, + "grad_norm": 0.005600213538855314, + "learning_rate": 3.648201190113703e-07, + "loss": 0.0, + "num_input_tokens_seen": 103068688, + "step": 152920 + }, + { + "epoch": 3.7359831920455377, + "grad_norm": 0.002254885621368885, + "learning_rate": 3.647542559527671e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103072208, + "step": 152925 + }, + { + "epoch": 3.736105342877385, + "grad_norm": 0.0008223111508414149, + "learning_rate": 3.646883975138421e-07, + "loss": 0.0, + "num_input_tokens_seen": 103075856, + "step": 152930 + }, + { + "epoch": 3.736227493709232, + "grad_norm": 0.03418153151869774, + "learning_rate": 3.646225436950735e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103079312, + "step": 152935 + }, + { + "epoch": 3.7363496445410793, + "grad_norm": 0.1998567283153534, + "learning_rate": 3.6455669449694073e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103082704, + "step": 152940 + }, + { + "epoch": 3.7364717953729265, + "grad_norm": 0.009525242261588573, + "learning_rate": 3.6449084991992295e-07, + "loss": 0.0, + "num_input_tokens_seen": 103086288, + "step": 152945 + }, + { + "epoch": 3.7365939462047737, + "grad_norm": 0.07257004827260971, + "learning_rate": 3.644250099644983e-07, + "loss": 0.0003, + "num_input_tokens_seen": 103089424, + "step": 152950 + }, + { + "epoch": 3.736716097036621, + "grad_norm": 0.00019916010205633938, + "learning_rate": 3.643591746311462e-07, + "loss": 0.0, + "num_input_tokens_seen": 103093584, + "step": 152955 + }, + { + "epoch": 3.736838247868468, + "grad_norm": 0.012999081052839756, + "learning_rate": 3.642933439203448e-07, + "loss": 0.0, + "num_input_tokens_seen": 103096592, + "step": 152960 + }, + { + "epoch": 3.7369603987003153, + "grad_norm": 0.000976978917606175, + "learning_rate": 3.6422751783257364e-07, + "loss": 0.0, + "num_input_tokens_seen": 103100240, + "step": 152965 + }, + { + "epoch": 3.7370825495321625, + "grad_norm": 0.002696326235309243, + "learning_rate": 3.6416169636831064e-07, + "loss": 0.0, + "num_input_tokens_seen": 103103376, + "step": 152970 + }, + { + "epoch": 3.7372047003640096, + "grad_norm": 0.013375307433307171, + "learning_rate": 3.640958795280347e-07, + "loss": 0.0, + "num_input_tokens_seen": 103106448, + "step": 152975 + }, + { + "epoch": 3.7373268511958564, + "grad_norm": 0.00033097839332185686, + "learning_rate": 3.6403006731222496e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103110032, + "step": 152980 + }, + { + "epoch": 3.737449002027704, + "grad_norm": 0.00204194663092494, + "learning_rate": 3.6396425972135923e-07, + "loss": 0.0, + "num_input_tokens_seen": 103113360, + "step": 152985 + }, + { + "epoch": 3.7375711528595508, + "grad_norm": 0.03878295421600342, + "learning_rate": 3.638984567559169e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103116496, + "step": 152990 + }, + { + "epoch": 3.7376933036913984, + "grad_norm": 0.00504625029861927, + "learning_rate": 3.6383265841637567e-07, + "loss": 0.0, + "num_input_tokens_seen": 103119952, + "step": 152995 + }, + { + "epoch": 3.737815454523245, + "grad_norm": 0.592056930065155, + "learning_rate": 3.6376686470321447e-07, + "loss": 0.0, + "num_input_tokens_seen": 103123344, + "step": 153000 + }, + { + "epoch": 3.7379376053550923, + "grad_norm": 0.0003572091518435627, + "learning_rate": 3.637010756169121e-07, + "loss": 0.0, + "num_input_tokens_seen": 103126480, + "step": 153005 + }, + { + "epoch": 3.7380597561869395, + "grad_norm": 0.0050292848609387875, + "learning_rate": 3.6363529115794667e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103129936, + "step": 153010 + }, + { + "epoch": 3.7381819070187867, + "grad_norm": 0.0025767607148736715, + "learning_rate": 3.6356951132679626e-07, + "loss": 0.0, + "num_input_tokens_seen": 103133584, + "step": 153015 + }, + { + "epoch": 3.738304057850634, + "grad_norm": 0.0029860185459256172, + "learning_rate": 3.635037361239398e-07, + "loss": 0.0, + "num_input_tokens_seen": 103136848, + "step": 153020 + }, + { + "epoch": 3.738426208682481, + "grad_norm": 0.0033167738001793623, + "learning_rate": 3.6343796554985504e-07, + "loss": 0.0, + "num_input_tokens_seen": 103140048, + "step": 153025 + }, + { + "epoch": 3.7385483595143283, + "grad_norm": 0.02676066942512989, + "learning_rate": 3.6337219960502106e-07, + "loss": 0.0, + "num_input_tokens_seen": 103143376, + "step": 153030 + }, + { + "epoch": 3.7386705103461755, + "grad_norm": 0.0005148989730514586, + "learning_rate": 3.633064382899153e-07, + "loss": 0.0, + "num_input_tokens_seen": 103146640, + "step": 153035 + }, + { + "epoch": 3.7387926611780227, + "grad_norm": 0.01030823029577732, + "learning_rate": 3.632406816050166e-07, + "loss": 0.0, + "num_input_tokens_seen": 103149648, + "step": 153040 + }, + { + "epoch": 3.73891481200987, + "grad_norm": 0.012857099995017052, + "learning_rate": 3.6317492955080263e-07, + "loss": 0.0, + "num_input_tokens_seen": 103152656, + "step": 153045 + }, + { + "epoch": 3.739036962841717, + "grad_norm": 0.002937320852652192, + "learning_rate": 3.6310918212775223e-07, + "loss": 0.0, + "num_input_tokens_seen": 103155984, + "step": 153050 + }, + { + "epoch": 3.7391591136735642, + "grad_norm": 0.0008897144580259919, + "learning_rate": 3.6304343933634284e-07, + "loss": 0.0, + "num_input_tokens_seen": 103159568, + "step": 153055 + }, + { + "epoch": 3.7392812645054114, + "grad_norm": 0.0007698034751228988, + "learning_rate": 3.629777011770532e-07, + "loss": 0.0, + "num_input_tokens_seen": 103162896, + "step": 153060 + }, + { + "epoch": 3.739403415337258, + "grad_norm": 0.002032829448580742, + "learning_rate": 3.629119676503607e-07, + "loss": 0.0, + "num_input_tokens_seen": 103165840, + "step": 153065 + }, + { + "epoch": 3.739525566169106, + "grad_norm": 0.0007218793034553528, + "learning_rate": 3.628462387567437e-07, + "loss": 0.0, + "num_input_tokens_seen": 103169168, + "step": 153070 + }, + { + "epoch": 3.7396477170009526, + "grad_norm": 0.0006760008982382715, + "learning_rate": 3.6278051449668067e-07, + "loss": 0.0, + "num_input_tokens_seen": 103171792, + "step": 153075 + }, + { + "epoch": 3.7397698678328, + "grad_norm": 0.0005427119904197752, + "learning_rate": 3.627147948706487e-07, + "loss": 0.0, + "num_input_tokens_seen": 103175120, + "step": 153080 + }, + { + "epoch": 3.739892018664647, + "grad_norm": 0.23871496319770813, + "learning_rate": 3.626490798791265e-07, + "loss": 0.0182, + "num_input_tokens_seen": 103178000, + "step": 153085 + }, + { + "epoch": 3.740014169496494, + "grad_norm": 0.00962295476347208, + "learning_rate": 3.6258336952259127e-07, + "loss": 0.0, + "num_input_tokens_seen": 103181200, + "step": 153090 + }, + { + "epoch": 3.7401363203283413, + "grad_norm": 0.026946377009153366, + "learning_rate": 3.6251766380152127e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103185040, + "step": 153095 + }, + { + "epoch": 3.7402584711601885, + "grad_norm": 0.007624130696058273, + "learning_rate": 3.6245196271639457e-07, + "loss": 0.031, + "num_input_tokens_seen": 103188880, + "step": 153100 + }, + { + "epoch": 3.7403806219920357, + "grad_norm": 0.00016881691408343613, + "learning_rate": 3.623862662676884e-07, + "loss": 0.0, + "num_input_tokens_seen": 103192336, + "step": 153105 + }, + { + "epoch": 3.740502772823883, + "grad_norm": 0.0038916917983442545, + "learning_rate": 3.6232057445588107e-07, + "loss": 0.0955, + "num_input_tokens_seen": 103195984, + "step": 153110 + }, + { + "epoch": 3.74062492365573, + "grad_norm": 0.000500963709782809, + "learning_rate": 3.622548872814497e-07, + "loss": 0.0, + "num_input_tokens_seen": 103199120, + "step": 153115 + }, + { + "epoch": 3.7407470744875773, + "grad_norm": 0.0008566455217078328, + "learning_rate": 3.621892047448727e-07, + "loss": 0.0, + "num_input_tokens_seen": 103202000, + "step": 153120 + }, + { + "epoch": 3.7408692253194245, + "grad_norm": 0.0006452181842178106, + "learning_rate": 3.6212352684662737e-07, + "loss": 0.0, + "num_input_tokens_seen": 103205392, + "step": 153125 + }, + { + "epoch": 3.7409913761512716, + "grad_norm": 0.0011283294297754765, + "learning_rate": 3.62057853587191e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103208848, + "step": 153130 + }, + { + "epoch": 3.741113526983119, + "grad_norm": 21.12510108947754, + "learning_rate": 3.6199218496704175e-07, + "loss": 0.0969, + "num_input_tokens_seen": 103211984, + "step": 153135 + }, + { + "epoch": 3.741235677814966, + "grad_norm": 0.0006625877576880157, + "learning_rate": 3.619265209866567e-07, + "loss": 0.0, + "num_input_tokens_seen": 103215504, + "step": 153140 + }, + { + "epoch": 3.741357828646813, + "grad_norm": 0.0018214958254247904, + "learning_rate": 3.6186086164651387e-07, + "loss": 0.0, + "num_input_tokens_seen": 103218832, + "step": 153145 + }, + { + "epoch": 3.7414799794786604, + "grad_norm": 27.20746421813965, + "learning_rate": 3.617952069470902e-07, + "loss": 0.0788, + "num_input_tokens_seen": 103222288, + "step": 153150 + }, + { + "epoch": 3.7416021303105076, + "grad_norm": 0.011315912939608097, + "learning_rate": 3.6172955688886343e-07, + "loss": 0.031, + "num_input_tokens_seen": 103225552, + "step": 153155 + }, + { + "epoch": 3.7417242811423543, + "grad_norm": 0.00682856747880578, + "learning_rate": 3.6166391147231126e-07, + "loss": 0.0, + "num_input_tokens_seen": 103230352, + "step": 153160 + }, + { + "epoch": 3.741846431974202, + "grad_norm": 0.08006688207387924, + "learning_rate": 3.615982706979106e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103233488, + "step": 153165 + }, + { + "epoch": 3.7419685828060487, + "grad_norm": 0.004424131475389004, + "learning_rate": 3.6153263456613925e-07, + "loss": 0.0, + "num_input_tokens_seen": 103237264, + "step": 153170 + }, + { + "epoch": 3.7420907336378963, + "grad_norm": 0.0051756673492491245, + "learning_rate": 3.6146700307747403e-07, + "loss": 0.0, + "num_input_tokens_seen": 103240464, + "step": 153175 + }, + { + "epoch": 3.742212884469743, + "grad_norm": 0.11179034411907196, + "learning_rate": 3.6140137623239287e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103243600, + "step": 153180 + }, + { + "epoch": 3.7423350353015903, + "grad_norm": 0.001237615942955017, + "learning_rate": 3.613357540313723e-07, + "loss": 0.0007, + "num_input_tokens_seen": 103246672, + "step": 153185 + }, + { + "epoch": 3.7424571861334375, + "grad_norm": 0.006782029289752245, + "learning_rate": 3.612701364748899e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103250064, + "step": 153190 + }, + { + "epoch": 3.7425793369652847, + "grad_norm": 0.00018652534345164895, + "learning_rate": 3.612045235634232e-07, + "loss": 0.0, + "num_input_tokens_seen": 103253136, + "step": 153195 + }, + { + "epoch": 3.742701487797132, + "grad_norm": 0.001206182991154492, + "learning_rate": 3.6113891529744864e-07, + "loss": 0.0, + "num_input_tokens_seen": 103256592, + "step": 153200 + }, + { + "epoch": 3.742823638628979, + "grad_norm": 0.002523811301216483, + "learning_rate": 3.610733116774441e-07, + "loss": 0.0, + "num_input_tokens_seen": 103259728, + "step": 153205 + }, + { + "epoch": 3.7429457894608262, + "grad_norm": 0.005748201161623001, + "learning_rate": 3.6100771270388606e-07, + "loss": 0.0563, + "num_input_tokens_seen": 103262864, + "step": 153210 + }, + { + "epoch": 3.7430679402926734, + "grad_norm": 0.0019488586112856865, + "learning_rate": 3.6094211837725197e-07, + "loss": 0.0, + "num_input_tokens_seen": 103266192, + "step": 153215 + }, + { + "epoch": 3.7431900911245206, + "grad_norm": 0.0014601226430386305, + "learning_rate": 3.6087652869801846e-07, + "loss": 0.0, + "num_input_tokens_seen": 103269584, + "step": 153220 + }, + { + "epoch": 3.743312241956368, + "grad_norm": 0.0004819185414817184, + "learning_rate": 3.60810943666663e-07, + "loss": 0.0, + "num_input_tokens_seen": 103272784, + "step": 153225 + }, + { + "epoch": 3.743434392788215, + "grad_norm": 6.37445118627511e-05, + "learning_rate": 3.6074536328366235e-07, + "loss": 0.0, + "num_input_tokens_seen": 103276368, + "step": 153230 + }, + { + "epoch": 3.743556543620062, + "grad_norm": 0.004168359562754631, + "learning_rate": 3.606797875494929e-07, + "loss": 0.0, + "num_input_tokens_seen": 103279120, + "step": 153235 + }, + { + "epoch": 3.7436786944519094, + "grad_norm": 0.0013751707738265395, + "learning_rate": 3.606142164646324e-07, + "loss": 0.0, + "num_input_tokens_seen": 103282320, + "step": 153240 + }, + { + "epoch": 3.743800845283756, + "grad_norm": 0.009026645682752132, + "learning_rate": 3.60548650029557e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103285648, + "step": 153245 + }, + { + "epoch": 3.7439229961156038, + "grad_norm": 0.004914161749184132, + "learning_rate": 3.604830882447438e-07, + "loss": 0.0, + "num_input_tokens_seen": 103288592, + "step": 153250 + }, + { + "epoch": 3.7440451469474505, + "grad_norm": 0.07660754024982452, + "learning_rate": 3.6041753111066987e-07, + "loss": 0.0, + "num_input_tokens_seen": 103291792, + "step": 153255 + }, + { + "epoch": 3.744167297779298, + "grad_norm": 0.00439593568444252, + "learning_rate": 3.603519786278114e-07, + "loss": 0.0, + "num_input_tokens_seen": 103295184, + "step": 153260 + }, + { + "epoch": 3.744289448611145, + "grad_norm": 0.0007788151269778609, + "learning_rate": 3.602864307966457e-07, + "loss": 0.1206, + "num_input_tokens_seen": 103298320, + "step": 153265 + }, + { + "epoch": 3.744411599442992, + "grad_norm": 0.13851021230220795, + "learning_rate": 3.6022088761764877e-07, + "loss": 0.0388, + "num_input_tokens_seen": 103301584, + "step": 153270 + }, + { + "epoch": 3.7445337502748393, + "grad_norm": 0.00768559193238616, + "learning_rate": 3.6015534909129796e-07, + "loss": 0.0449, + "num_input_tokens_seen": 103304784, + "step": 153275 + }, + { + "epoch": 3.7446559011066864, + "grad_norm": 0.003182411892339587, + "learning_rate": 3.600898152180692e-07, + "loss": 0.0, + "num_input_tokens_seen": 103308304, + "step": 153280 + }, + { + "epoch": 3.7447780519385336, + "grad_norm": 0.0014601831790059805, + "learning_rate": 3.600242859984395e-07, + "loss": 0.0383, + "num_input_tokens_seen": 103311760, + "step": 153285 + }, + { + "epoch": 3.744900202770381, + "grad_norm": 0.17673958837985992, + "learning_rate": 3.599587614328856e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103314768, + "step": 153290 + }, + { + "epoch": 3.745022353602228, + "grad_norm": 0.007135581690818071, + "learning_rate": 3.598932415218835e-07, + "loss": 0.0, + "num_input_tokens_seen": 103318416, + "step": 153295 + }, + { + "epoch": 3.745144504434075, + "grad_norm": 0.010046053677797318, + "learning_rate": 3.598277262659102e-07, + "loss": 0.0, + "num_input_tokens_seen": 103321296, + "step": 153300 + }, + { + "epoch": 3.7452666552659224, + "grad_norm": 0.005226328037679195, + "learning_rate": 3.597622156654414e-07, + "loss": 0.0, + "num_input_tokens_seen": 103325264, + "step": 153305 + }, + { + "epoch": 3.7453888060977696, + "grad_norm": 0.03831545636057854, + "learning_rate": 3.596967097209541e-07, + "loss": 0.0003, + "num_input_tokens_seen": 103328080, + "step": 153310 + }, + { + "epoch": 3.7455109569296168, + "grad_norm": 0.0007643011631444097, + "learning_rate": 3.596312084329248e-07, + "loss": 0.0, + "num_input_tokens_seen": 103331664, + "step": 153315 + }, + { + "epoch": 3.745633107761464, + "grad_norm": 0.0006596199818886817, + "learning_rate": 3.595657118018297e-07, + "loss": 0.0, + "num_input_tokens_seen": 103335184, + "step": 153320 + }, + { + "epoch": 3.745755258593311, + "grad_norm": 0.006933595985174179, + "learning_rate": 3.595002198281446e-07, + "loss": 0.0002, + "num_input_tokens_seen": 103338704, + "step": 153325 + }, + { + "epoch": 3.7458774094251583, + "grad_norm": 35.37353515625, + "learning_rate": 3.5943473251234656e-07, + "loss": 0.0651, + "num_input_tokens_seen": 103341840, + "step": 153330 + }, + { + "epoch": 3.7459995602570055, + "grad_norm": 0.0031136884354054928, + "learning_rate": 3.5936924985491104e-07, + "loss": 0.0, + "num_input_tokens_seen": 103346000, + "step": 153335 + }, + { + "epoch": 3.7461217110888523, + "grad_norm": 0.0010474255541339517, + "learning_rate": 3.59303771856315e-07, + "loss": 0.0, + "num_input_tokens_seen": 103349136, + "step": 153340 + }, + { + "epoch": 3.7462438619207, + "grad_norm": 0.051398538053035736, + "learning_rate": 3.592382985170339e-07, + "loss": 0.0348, + "num_input_tokens_seen": 103352336, + "step": 153345 + }, + { + "epoch": 3.7463660127525467, + "grad_norm": 0.0012231292203068733, + "learning_rate": 3.591728298375446e-07, + "loss": 0.0, + "num_input_tokens_seen": 103355408, + "step": 153350 + }, + { + "epoch": 3.746488163584394, + "grad_norm": 0.01951722800731659, + "learning_rate": 3.5910736581832246e-07, + "loss": 0.0, + "num_input_tokens_seen": 103358352, + "step": 153355 + }, + { + "epoch": 3.746610314416241, + "grad_norm": 0.003924138844013214, + "learning_rate": 3.5904190645984434e-07, + "loss": 0.0, + "num_input_tokens_seen": 103361680, + "step": 153360 + }, + { + "epoch": 3.7467324652480882, + "grad_norm": 0.009085427969694138, + "learning_rate": 3.589764517625855e-07, + "loss": 0.0, + "num_input_tokens_seen": 103365456, + "step": 153365 + }, + { + "epoch": 3.7468546160799354, + "grad_norm": 0.010301058180630207, + "learning_rate": 3.5891100172702273e-07, + "loss": 0.0, + "num_input_tokens_seen": 103369232, + "step": 153370 + }, + { + "epoch": 3.7469767669117826, + "grad_norm": 0.050948645919561386, + "learning_rate": 3.588455563536311e-07, + "loss": 0.0003, + "num_input_tokens_seen": 103372560, + "step": 153375 + }, + { + "epoch": 3.74709891774363, + "grad_norm": 0.004248334560543299, + "learning_rate": 3.5878011564288714e-07, + "loss": 0.0, + "num_input_tokens_seen": 103376016, + "step": 153380 + }, + { + "epoch": 3.747221068575477, + "grad_norm": 0.001246364787220955, + "learning_rate": 3.587146795952669e-07, + "loss": 0.0, + "num_input_tokens_seen": 103379344, + "step": 153385 + }, + { + "epoch": 3.747343219407324, + "grad_norm": 0.0011733114952221513, + "learning_rate": 3.5864924821124575e-07, + "loss": 0.0, + "num_input_tokens_seen": 103382480, + "step": 153390 + }, + { + "epoch": 3.7474653702391714, + "grad_norm": 465.39776611328125, + "learning_rate": 3.585838214913001e-07, + "loss": 0.0085, + "num_input_tokens_seen": 103385936, + "step": 153395 + }, + { + "epoch": 3.7475875210710186, + "grad_norm": 0.4323878586292267, + "learning_rate": 3.58518399435905e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103389392, + "step": 153400 + }, + { + "epoch": 3.7477096719028657, + "grad_norm": 0.012481575831770897, + "learning_rate": 3.5845298204553676e-07, + "loss": 0.0, + "num_input_tokens_seen": 103393680, + "step": 153405 + }, + { + "epoch": 3.747831822734713, + "grad_norm": 0.0003565066435839981, + "learning_rate": 3.5838756932067126e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103397008, + "step": 153410 + }, + { + "epoch": 3.74795397356656, + "grad_norm": 0.0006096562137827277, + "learning_rate": 3.5832216126178363e-07, + "loss": 0.0321, + "num_input_tokens_seen": 103400784, + "step": 153415 + }, + { + "epoch": 3.7480761243984073, + "grad_norm": 0.0034066636580973864, + "learning_rate": 3.5825675786935006e-07, + "loss": 0.0738, + "num_input_tokens_seen": 103404496, + "step": 153420 + }, + { + "epoch": 3.748198275230254, + "grad_norm": 0.0015200987691059709, + "learning_rate": 3.581913591438457e-07, + "loss": 0.0, + "num_input_tokens_seen": 103407696, + "step": 153425 + }, + { + "epoch": 3.7483204260621017, + "grad_norm": 0.0004193643108010292, + "learning_rate": 3.5812596508574675e-07, + "loss": 0.0, + "num_input_tokens_seen": 103411216, + "step": 153430 + }, + { + "epoch": 3.7484425768939484, + "grad_norm": 0.2604064643383026, + "learning_rate": 3.580605756955284e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103414160, + "step": 153435 + }, + { + "epoch": 3.748564727725796, + "grad_norm": 0.004687589127570391, + "learning_rate": 3.5799519097366593e-07, + "loss": 0.0, + "num_input_tokens_seen": 103417360, + "step": 153440 + }, + { + "epoch": 3.748686878557643, + "grad_norm": 49.20878219604492, + "learning_rate": 3.579298109206353e-07, + "loss": 0.0418, + "num_input_tokens_seen": 103421072, + "step": 153445 + }, + { + "epoch": 3.74880902938949, + "grad_norm": 0.0016561738448217511, + "learning_rate": 3.578644355369116e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103424080, + "step": 153450 + }, + { + "epoch": 3.748931180221337, + "grad_norm": 0.000623115396592766, + "learning_rate": 3.5779906482297073e-07, + "loss": 0.0, + "num_input_tokens_seen": 103427088, + "step": 153455 + }, + { + "epoch": 3.7490533310531844, + "grad_norm": 0.0064060743898153305, + "learning_rate": 3.577336987792874e-07, + "loss": 0.0, + "num_input_tokens_seen": 103430736, + "step": 153460 + }, + { + "epoch": 3.7491754818850316, + "grad_norm": 0.00033515694667585194, + "learning_rate": 3.576683374063374e-07, + "loss": 0.0418, + "num_input_tokens_seen": 103433744, + "step": 153465 + }, + { + "epoch": 3.7492976327168788, + "grad_norm": 0.10513308644294739, + "learning_rate": 3.576029807045964e-07, + "loss": 0.0, + "num_input_tokens_seen": 103438416, + "step": 153470 + }, + { + "epoch": 3.749419783548726, + "grad_norm": 0.008837641216814518, + "learning_rate": 3.5753762867453885e-07, + "loss": 0.075, + "num_input_tokens_seen": 103442128, + "step": 153475 + }, + { + "epoch": 3.749541934380573, + "grad_norm": 0.013438341207802296, + "learning_rate": 3.574722813166409e-07, + "loss": 0.0, + "num_input_tokens_seen": 103445776, + "step": 153480 + }, + { + "epoch": 3.7496640852124203, + "grad_norm": 0.001661556656472385, + "learning_rate": 3.5740693863137696e-07, + "loss": 0.0, + "num_input_tokens_seen": 103448784, + "step": 153485 + }, + { + "epoch": 3.7497862360442675, + "grad_norm": 0.0008842450333759189, + "learning_rate": 3.5734160061922304e-07, + "loss": 0.0, + "num_input_tokens_seen": 103452112, + "step": 153490 + }, + { + "epoch": 3.7499083868761147, + "grad_norm": 0.0011517205275595188, + "learning_rate": 3.572762672806534e-07, + "loss": 0.0354, + "num_input_tokens_seen": 103455696, + "step": 153495 + }, + { + "epoch": 3.750030537707962, + "grad_norm": 0.0016859594034031034, + "learning_rate": 3.572109386161436e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103458896, + "step": 153500 + }, + { + "epoch": 3.750152688539809, + "grad_norm": 0.0004544697585515678, + "learning_rate": 3.571456146261691e-07, + "loss": 0.0, + "num_input_tokens_seen": 103462352, + "step": 153505 + }, + { + "epoch": 3.750274839371656, + "grad_norm": 0.002033928642049432, + "learning_rate": 3.5708029531120433e-07, + "loss": 0.0, + "num_input_tokens_seen": 103465808, + "step": 153510 + }, + { + "epoch": 3.750274839371656, + "eval_loss": 0.24138930439949036, + "eval_runtime": 47.7875, + "eval_samples_per_second": 761.392, + "eval_steps_per_second": 95.192, + "num_input_tokens_seen": 103465808, + "step": 153510 + }, + { + "epoch": 3.7503969902035035, + "grad_norm": 0.0008828761638142169, + "learning_rate": 3.5701498067172487e-07, + "loss": 0.0, + "num_input_tokens_seen": 103468944, + "step": 153515 + }, + { + "epoch": 3.7505191410353502, + "grad_norm": 0.0001341873430646956, + "learning_rate": 3.5694967070820514e-07, + "loss": 0.0, + "num_input_tokens_seen": 103472464, + "step": 153520 + }, + { + "epoch": 3.750641291867198, + "grad_norm": 0.006082539912313223, + "learning_rate": 3.5688436542112054e-07, + "loss": 0.0002, + "num_input_tokens_seen": 103476176, + "step": 153525 + }, + { + "epoch": 3.7507634426990446, + "grad_norm": 0.0005558205884881318, + "learning_rate": 3.5681906481094557e-07, + "loss": 0.0, + "num_input_tokens_seen": 103480336, + "step": 153530 + }, + { + "epoch": 3.750885593530892, + "grad_norm": 0.0016473763389512897, + "learning_rate": 3.5675376887815577e-07, + "loss": 0.0002, + "num_input_tokens_seen": 103483728, + "step": 153535 + }, + { + "epoch": 3.751007744362739, + "grad_norm": 0.011921056546270847, + "learning_rate": 3.566884776232255e-07, + "loss": 0.0, + "num_input_tokens_seen": 103487248, + "step": 153540 + }, + { + "epoch": 3.751129895194586, + "grad_norm": 0.00046353053767234087, + "learning_rate": 3.566231910466293e-07, + "loss": 0.0, + "num_input_tokens_seen": 103490704, + "step": 153545 + }, + { + "epoch": 3.7512520460264334, + "grad_norm": 0.001445167581550777, + "learning_rate": 3.5655790914884264e-07, + "loss": 0.0, + "num_input_tokens_seen": 103494160, + "step": 153550 + }, + { + "epoch": 3.7513741968582806, + "grad_norm": 0.003341602860018611, + "learning_rate": 3.5649263193033964e-07, + "loss": 0.0002, + "num_input_tokens_seen": 103497424, + "step": 153555 + }, + { + "epoch": 3.7514963476901277, + "grad_norm": 0.005969295743852854, + "learning_rate": 3.564273593915953e-07, + "loss": 0.0, + "num_input_tokens_seen": 103500944, + "step": 153560 + }, + { + "epoch": 3.751618498521975, + "grad_norm": 0.01053598988801241, + "learning_rate": 3.563620915330846e-07, + "loss": 0.0, + "num_input_tokens_seen": 103504720, + "step": 153565 + }, + { + "epoch": 3.751740649353822, + "grad_norm": 0.0017889856826514006, + "learning_rate": 3.5629682835528153e-07, + "loss": 0.0, + "num_input_tokens_seen": 103507664, + "step": 153570 + }, + { + "epoch": 3.7518628001856693, + "grad_norm": 39.256404876708984, + "learning_rate": 3.562315698586614e-07, + "loss": 0.0563, + "num_input_tokens_seen": 103510928, + "step": 153575 + }, + { + "epoch": 3.7519849510175165, + "grad_norm": 0.0006424240418709815, + "learning_rate": 3.561663160436982e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103514128, + "step": 153580 + }, + { + "epoch": 3.7521071018493637, + "grad_norm": 0.00029739251476712525, + "learning_rate": 3.5610106691086696e-07, + "loss": 0.0, + "num_input_tokens_seen": 103518096, + "step": 153585 + }, + { + "epoch": 3.752229252681211, + "grad_norm": 49.057334899902344, + "learning_rate": 3.5603582246064165e-07, + "loss": 0.0297, + "num_input_tokens_seen": 103521424, + "step": 153590 + }, + { + "epoch": 3.752351403513058, + "grad_norm": 0.0034984839148819447, + "learning_rate": 3.55970582693497e-07, + "loss": 0.0, + "num_input_tokens_seen": 103524880, + "step": 153595 + }, + { + "epoch": 3.7524735543449053, + "grad_norm": 0.00019109105051029474, + "learning_rate": 3.55905347609908e-07, + "loss": 0.075, + "num_input_tokens_seen": 103528784, + "step": 153600 + }, + { + "epoch": 3.752595705176752, + "grad_norm": 0.028764499351382256, + "learning_rate": 3.55840117210348e-07, + "loss": 0.0, + "num_input_tokens_seen": 103532560, + "step": 153605 + }, + { + "epoch": 3.7527178560085996, + "grad_norm": 0.05303001031279564, + "learning_rate": 3.557748914952924e-07, + "loss": 0.0, + "num_input_tokens_seen": 103535504, + "step": 153610 + }, + { + "epoch": 3.7528400068404464, + "grad_norm": 0.002212024061009288, + "learning_rate": 3.557096704652147e-07, + "loss": 0.0, + "num_input_tokens_seen": 103538448, + "step": 153615 + }, + { + "epoch": 3.752962157672294, + "grad_norm": 0.0033569305669516325, + "learning_rate": 3.5564445412058984e-07, + "loss": 0.0, + "num_input_tokens_seen": 103541584, + "step": 153620 + }, + { + "epoch": 3.7530843085041408, + "grad_norm": 0.021345417946577072, + "learning_rate": 3.5557924246189153e-07, + "loss": 0.0, + "num_input_tokens_seen": 103544528, + "step": 153625 + }, + { + "epoch": 3.753206459335988, + "grad_norm": 0.0057569025084376335, + "learning_rate": 3.555140354895947e-07, + "loss": 0.0, + "num_input_tokens_seen": 103547728, + "step": 153630 + }, + { + "epoch": 3.753328610167835, + "grad_norm": 9.103876072913408e-05, + "learning_rate": 3.5544883320417276e-07, + "loss": 0.0, + "num_input_tokens_seen": 103550928, + "step": 153635 + }, + { + "epoch": 3.7534507609996823, + "grad_norm": 0.0010332964593544602, + "learning_rate": 3.553836356061005e-07, + "loss": 0.0, + "num_input_tokens_seen": 103554064, + "step": 153640 + }, + { + "epoch": 3.7535729118315295, + "grad_norm": 0.3927779793739319, + "learning_rate": 3.5531844269585164e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103557200, + "step": 153645 + }, + { + "epoch": 3.7536950626633767, + "grad_norm": 0.0036919882986694574, + "learning_rate": 3.5525325447390075e-07, + "loss": 0.0002, + "num_input_tokens_seen": 103560528, + "step": 153650 + }, + { + "epoch": 3.753817213495224, + "grad_norm": 0.003987174481153488, + "learning_rate": 3.5518807094072123e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103563920, + "step": 153655 + }, + { + "epoch": 3.753939364327071, + "grad_norm": 65.58088684082031, + "learning_rate": 3.5512289209678794e-07, + "loss": 0.0536, + "num_input_tokens_seen": 103566864, + "step": 153660 + }, + { + "epoch": 3.7540615151589183, + "grad_norm": 0.0005408762372098863, + "learning_rate": 3.55057717942574e-07, + "loss": 0.0, + "num_input_tokens_seen": 103570960, + "step": 153665 + }, + { + "epoch": 3.7541836659907655, + "grad_norm": 0.0010324962204322219, + "learning_rate": 3.549925484785541e-07, + "loss": 0.0003, + "num_input_tokens_seen": 103574608, + "step": 153670 + }, + { + "epoch": 3.7543058168226127, + "grad_norm": 0.002635074546560645, + "learning_rate": 3.5492738370520157e-07, + "loss": 0.0, + "num_input_tokens_seen": 103578832, + "step": 153675 + }, + { + "epoch": 3.75442796765446, + "grad_norm": 0.0025064474903047085, + "learning_rate": 3.54862223622991e-07, + "loss": 0.0, + "num_input_tokens_seen": 103582032, + "step": 153680 + }, + { + "epoch": 3.754550118486307, + "grad_norm": 0.0022307131439447403, + "learning_rate": 3.5479706823239554e-07, + "loss": 0.0, + "num_input_tokens_seen": 103585040, + "step": 153685 + }, + { + "epoch": 3.754672269318154, + "grad_norm": 0.00514277582988143, + "learning_rate": 3.5473191753388923e-07, + "loss": 0.0003, + "num_input_tokens_seen": 103588240, + "step": 153690 + }, + { + "epoch": 3.7547944201500014, + "grad_norm": 0.008436059579253197, + "learning_rate": 3.5466677152794634e-07, + "loss": 0.0, + "num_input_tokens_seen": 103591632, + "step": 153695 + }, + { + "epoch": 3.754916570981848, + "grad_norm": 0.002367235254496336, + "learning_rate": 3.5460163021503996e-07, + "loss": 0.0, + "num_input_tokens_seen": 103594960, + "step": 153700 + }, + { + "epoch": 3.755038721813696, + "grad_norm": 2.3470609188079834, + "learning_rate": 3.545364935956445e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103598544, + "step": 153705 + }, + { + "epoch": 3.7551608726455425, + "grad_norm": 0.0022390945814549923, + "learning_rate": 3.5447136167023286e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103602064, + "step": 153710 + }, + { + "epoch": 3.7552830234773897, + "grad_norm": 0.0007741377921774983, + "learning_rate": 3.544062344392791e-07, + "loss": 0.0, + "num_input_tokens_seen": 103605136, + "step": 153715 + }, + { + "epoch": 3.755405174309237, + "grad_norm": 0.0004159125383011997, + "learning_rate": 3.5434111190325724e-07, + "loss": 0.0, + "num_input_tokens_seen": 103608336, + "step": 153720 + }, + { + "epoch": 3.755527325141084, + "grad_norm": 0.0019826493225991726, + "learning_rate": 3.542759940626401e-07, + "loss": 0.0, + "num_input_tokens_seen": 103611600, + "step": 153725 + }, + { + "epoch": 3.7556494759729313, + "grad_norm": 0.08303279429674149, + "learning_rate": 3.54210880917902e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103614480, + "step": 153730 + }, + { + "epoch": 3.7557716268047785, + "grad_norm": 0.003127832431346178, + "learning_rate": 3.541457724695156e-07, + "loss": 0.0002, + "num_input_tokens_seen": 103617872, + "step": 153735 + }, + { + "epoch": 3.7558937776366257, + "grad_norm": 0.0069281598553061485, + "learning_rate": 3.540806687179553e-07, + "loss": 0.002, + "num_input_tokens_seen": 103621008, + "step": 153740 + }, + { + "epoch": 3.756015928468473, + "grad_norm": 0.007652719039469957, + "learning_rate": 3.5401556966369405e-07, + "loss": 0.0, + "num_input_tokens_seen": 103624272, + "step": 153745 + }, + { + "epoch": 3.75613807930032, + "grad_norm": 0.001770931645296514, + "learning_rate": 3.5395047530720513e-07, + "loss": 0.0002, + "num_input_tokens_seen": 103627216, + "step": 153750 + }, + { + "epoch": 3.7562602301321673, + "grad_norm": 36.390316009521484, + "learning_rate": 3.5388538564896233e-07, + "loss": 0.1025, + "num_input_tokens_seen": 103630544, + "step": 153755 + }, + { + "epoch": 3.7563823809640144, + "grad_norm": 0.03158455714583397, + "learning_rate": 3.5382030068943845e-07, + "loss": 0.0, + "num_input_tokens_seen": 103633680, + "step": 153760 + }, + { + "epoch": 3.7565045317958616, + "grad_norm": 0.0006475714035332203, + "learning_rate": 3.5375522042910756e-07, + "loss": 0.0, + "num_input_tokens_seen": 103636752, + "step": 153765 + }, + { + "epoch": 3.756626682627709, + "grad_norm": 0.00044167076703161, + "learning_rate": 3.5369014486844205e-07, + "loss": 0.0, + "num_input_tokens_seen": 103639888, + "step": 153770 + }, + { + "epoch": 3.756748833459556, + "grad_norm": 0.0006852021906524897, + "learning_rate": 3.536250740079161e-07, + "loss": 0.0, + "num_input_tokens_seen": 103643024, + "step": 153775 + }, + { + "epoch": 3.756870984291403, + "grad_norm": 0.0010416110744699836, + "learning_rate": 3.535600078480021e-07, + "loss": 0.0006, + "num_input_tokens_seen": 103646160, + "step": 153780 + }, + { + "epoch": 3.75699313512325, + "grad_norm": 53.415870666503906, + "learning_rate": 3.5349494638917354e-07, + "loss": 0.0691, + "num_input_tokens_seen": 103648976, + "step": 153785 + }, + { + "epoch": 3.7571152859550976, + "grad_norm": 0.0010279123671352863, + "learning_rate": 3.53429889631904e-07, + "loss": 0.0, + "num_input_tokens_seen": 103652112, + "step": 153790 + }, + { + "epoch": 3.7572374367869443, + "grad_norm": 0.0022128450218588114, + "learning_rate": 3.533648375766659e-07, + "loss": 0.0, + "num_input_tokens_seen": 103655056, + "step": 153795 + }, + { + "epoch": 3.7573595876187915, + "grad_norm": 0.0005283782375045121, + "learning_rate": 3.5329979022393296e-07, + "loss": 0.0007, + "num_input_tokens_seen": 103658448, + "step": 153800 + }, + { + "epoch": 3.7574817384506387, + "grad_norm": 0.03994615003466606, + "learning_rate": 3.532347475741776e-07, + "loss": 0.0, + "num_input_tokens_seen": 103661584, + "step": 153805 + }, + { + "epoch": 3.757603889282486, + "grad_norm": 28.16742515563965, + "learning_rate": 3.5316970962787295e-07, + "loss": 0.0566, + "num_input_tokens_seen": 103665296, + "step": 153810 + }, + { + "epoch": 3.757726040114333, + "grad_norm": 0.0012795196380466223, + "learning_rate": 3.5310467638549256e-07, + "loss": 0.0, + "num_input_tokens_seen": 103668240, + "step": 153815 + }, + { + "epoch": 3.7578481909461803, + "grad_norm": 0.0019318463746458292, + "learning_rate": 3.5303964784750875e-07, + "loss": 0.0, + "num_input_tokens_seen": 103671568, + "step": 153820 + }, + { + "epoch": 3.7579703417780275, + "grad_norm": 0.008436155505478382, + "learning_rate": 3.529746240143948e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103674832, + "step": 153825 + }, + { + "epoch": 3.7580924926098747, + "grad_norm": 0.0005548412445932627, + "learning_rate": 3.5290960488662316e-07, + "loss": 0.0, + "num_input_tokens_seen": 103679312, + "step": 153830 + }, + { + "epoch": 3.758214643441722, + "grad_norm": 0.0015689019346609712, + "learning_rate": 3.528445904646672e-07, + "loss": 0.0, + "num_input_tokens_seen": 103683088, + "step": 153835 + }, + { + "epoch": 3.758336794273569, + "grad_norm": 0.00017826675320975482, + "learning_rate": 3.527795807489992e-07, + "loss": 0.0751, + "num_input_tokens_seen": 103687312, + "step": 153840 + }, + { + "epoch": 3.7584589451054162, + "grad_norm": 0.058404359966516495, + "learning_rate": 3.5271457574009246e-07, + "loss": 0.0, + "num_input_tokens_seen": 103690832, + "step": 153845 + }, + { + "epoch": 3.7585810959372634, + "grad_norm": 0.0002906270674429834, + "learning_rate": 3.5264957543841935e-07, + "loss": 0.0, + "num_input_tokens_seen": 103694032, + "step": 153850 + }, + { + "epoch": 3.7587032467691106, + "grad_norm": 0.0008786988910287619, + "learning_rate": 3.5258457984445234e-07, + "loss": 0.0, + "num_input_tokens_seen": 103697488, + "step": 153855 + }, + { + "epoch": 3.758825397600958, + "grad_norm": 0.008816512301564217, + "learning_rate": 3.5251958895866487e-07, + "loss": 0.0, + "num_input_tokens_seen": 103701072, + "step": 153860 + }, + { + "epoch": 3.758947548432805, + "grad_norm": 0.0047731283120810986, + "learning_rate": 3.5245460278152863e-07, + "loss": 0.0, + "num_input_tokens_seen": 103704016, + "step": 153865 + }, + { + "epoch": 3.7590696992646517, + "grad_norm": 0.24734684824943542, + "learning_rate": 3.523896213135167e-07, + "loss": 0.0834, + "num_input_tokens_seen": 103707472, + "step": 153870 + }, + { + "epoch": 3.7591918500964994, + "grad_norm": 0.005535752046853304, + "learning_rate": 3.523246445551019e-07, + "loss": 0.0489, + "num_input_tokens_seen": 103711120, + "step": 153875 + }, + { + "epoch": 3.759314000928346, + "grad_norm": 0.026731478050351143, + "learning_rate": 3.5225967250675623e-07, + "loss": 0.0, + "num_input_tokens_seen": 103714192, + "step": 153880 + }, + { + "epoch": 3.7594361517601937, + "grad_norm": 0.0007929136045277119, + "learning_rate": 3.521947051689528e-07, + "loss": 0.0, + "num_input_tokens_seen": 103717648, + "step": 153885 + }, + { + "epoch": 3.7595583025920405, + "grad_norm": 0.01964680105447769, + "learning_rate": 3.5212974254216343e-07, + "loss": 0.0, + "num_input_tokens_seen": 103721104, + "step": 153890 + }, + { + "epoch": 3.7596804534238877, + "grad_norm": 0.072999969124794, + "learning_rate": 3.5206478462686106e-07, + "loss": 0.0, + "num_input_tokens_seen": 103724432, + "step": 153895 + }, + { + "epoch": 3.759802604255735, + "grad_norm": 0.005552014335989952, + "learning_rate": 3.5199983142351753e-07, + "loss": 0.0, + "num_input_tokens_seen": 103728016, + "step": 153900 + }, + { + "epoch": 3.759924755087582, + "grad_norm": 46.817413330078125, + "learning_rate": 3.5193488293260554e-07, + "loss": 0.0336, + "num_input_tokens_seen": 103731472, + "step": 153905 + }, + { + "epoch": 3.7600469059194292, + "grad_norm": 0.0019489700207486749, + "learning_rate": 3.5186993915459773e-07, + "loss": 0.104, + "num_input_tokens_seen": 103734864, + "step": 153910 + }, + { + "epoch": 3.7601690567512764, + "grad_norm": 0.019930588081479073, + "learning_rate": 3.5180500008996574e-07, + "loss": 0.0838, + "num_input_tokens_seen": 103738384, + "step": 153915 + }, + { + "epoch": 3.7602912075831236, + "grad_norm": 0.013438960537314415, + "learning_rate": 3.517400657391824e-07, + "loss": 0.0201, + "num_input_tokens_seen": 103741776, + "step": 153920 + }, + { + "epoch": 3.760413358414971, + "grad_norm": 0.00897922832518816, + "learning_rate": 3.516751361027194e-07, + "loss": 0.0004, + "num_input_tokens_seen": 103744848, + "step": 153925 + }, + { + "epoch": 3.760535509246818, + "grad_norm": 0.017759116366505623, + "learning_rate": 3.516102111810494e-07, + "loss": 0.0003, + "num_input_tokens_seen": 103747920, + "step": 153930 + }, + { + "epoch": 3.760657660078665, + "grad_norm": 0.2966800332069397, + "learning_rate": 3.5154529097464413e-07, + "loss": 0.0715, + "num_input_tokens_seen": 103751056, + "step": 153935 + }, + { + "epoch": 3.7607798109105124, + "grad_norm": 0.0029404417145997286, + "learning_rate": 3.5148037548397616e-07, + "loss": 0.0, + "num_input_tokens_seen": 103754448, + "step": 153940 + }, + { + "epoch": 3.7609019617423596, + "grad_norm": 0.018912794068455696, + "learning_rate": 3.514154647095171e-07, + "loss": 0.0224, + "num_input_tokens_seen": 103757648, + "step": 153945 + }, + { + "epoch": 3.7610241125742068, + "grad_norm": 0.008374440483748913, + "learning_rate": 3.5135055865173943e-07, + "loss": 0.0, + "num_input_tokens_seen": 103760528, + "step": 153950 + }, + { + "epoch": 3.761146263406054, + "grad_norm": 0.05202620476484299, + "learning_rate": 3.512856573111147e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103763728, + "step": 153955 + }, + { + "epoch": 3.761268414237901, + "grad_norm": 0.008036044426262379, + "learning_rate": 3.512207606881156e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103766928, + "step": 153960 + }, + { + "epoch": 3.761390565069748, + "grad_norm": 26.79295539855957, + "learning_rate": 3.511558687832131e-07, + "loss": 0.0205, + "num_input_tokens_seen": 103771280, + "step": 153965 + }, + { + "epoch": 3.7615127159015955, + "grad_norm": 0.0021897167898714542, + "learning_rate": 3.510909815968801e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103774480, + "step": 153970 + }, + { + "epoch": 3.7616348667334423, + "grad_norm": 0.1314050406217575, + "learning_rate": 3.510260991295876e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103777552, + "step": 153975 + }, + { + "epoch": 3.7617570175652895, + "grad_norm": 0.0008685789653100073, + "learning_rate": 3.509612213818083e-07, + "loss": 0.0, + "num_input_tokens_seen": 103781264, + "step": 153980 + }, + { + "epoch": 3.7618791683971367, + "grad_norm": 0.0010705238673835993, + "learning_rate": 3.5089634835401317e-07, + "loss": 0.0, + "num_input_tokens_seen": 103785104, + "step": 153985 + }, + { + "epoch": 3.762001319228984, + "grad_norm": 0.0003547889646142721, + "learning_rate": 3.5083148004667474e-07, + "loss": 0.0003, + "num_input_tokens_seen": 103788688, + "step": 153990 + }, + { + "epoch": 3.762123470060831, + "grad_norm": 0.003215220058336854, + "learning_rate": 3.5076661646026396e-07, + "loss": 0.0, + "num_input_tokens_seen": 103791760, + "step": 153995 + }, + { + "epoch": 3.762245620892678, + "grad_norm": 0.07013550400733948, + "learning_rate": 3.507017575952531e-07, + "loss": 0.0002, + "num_input_tokens_seen": 103795024, + "step": 154000 + }, + { + "epoch": 3.7623677717245254, + "grad_norm": 0.0026873864699155092, + "learning_rate": 3.5063690345211396e-07, + "loss": 0.0, + "num_input_tokens_seen": 103798480, + "step": 154005 + }, + { + "epoch": 3.7624899225563726, + "grad_norm": 0.004696927964687347, + "learning_rate": 3.505720540313176e-07, + "loss": 0.0002, + "num_input_tokens_seen": 103801616, + "step": 154010 + }, + { + "epoch": 3.76261207338822, + "grad_norm": 0.01624949276447296, + "learning_rate": 3.5050720933333634e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103804496, + "step": 154015 + }, + { + "epoch": 3.762734224220067, + "grad_norm": 0.003025223733857274, + "learning_rate": 3.504423693586409e-07, + "loss": 0.0, + "num_input_tokens_seen": 103808208, + "step": 154020 + }, + { + "epoch": 3.762856375051914, + "grad_norm": 83.87301635742188, + "learning_rate": 3.503775341077033e-07, + "loss": 0.0018, + "num_input_tokens_seen": 103811920, + "step": 154025 + }, + { + "epoch": 3.7629785258837614, + "grad_norm": 0.0039696102030575275, + "learning_rate": 3.503127035809953e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103815120, + "step": 154030 + }, + { + "epoch": 3.7631006767156086, + "grad_norm": 0.0053939297795295715, + "learning_rate": 3.502478777789878e-07, + "loss": 0.0, + "num_input_tokens_seen": 103818640, + "step": 154035 + }, + { + "epoch": 3.7632228275474557, + "grad_norm": 0.0019495252054184675, + "learning_rate": 3.501830567021528e-07, + "loss": 0.0, + "num_input_tokens_seen": 103822672, + "step": 154040 + }, + { + "epoch": 3.763344978379303, + "grad_norm": 0.024338144809007645, + "learning_rate": 3.5011824035096104e-07, + "loss": 0.0, + "num_input_tokens_seen": 103825680, + "step": 154045 + }, + { + "epoch": 3.7634671292111497, + "grad_norm": 0.026606090366840363, + "learning_rate": 3.500534287258846e-07, + "loss": 0.0, + "num_input_tokens_seen": 103828880, + "step": 154050 + }, + { + "epoch": 3.7635892800429973, + "grad_norm": 0.0008966674213297665, + "learning_rate": 3.4998862182739444e-07, + "loss": 0.0, + "num_input_tokens_seen": 103832336, + "step": 154055 + }, + { + "epoch": 3.763711430874844, + "grad_norm": 0.0002649786474648863, + "learning_rate": 3.499238196559615e-07, + "loss": 0.0, + "num_input_tokens_seen": 103836112, + "step": 154060 + }, + { + "epoch": 3.7638335817066917, + "grad_norm": 0.005120360292494297, + "learning_rate": 3.4985902221205775e-07, + "loss": 0.0, + "num_input_tokens_seen": 103839248, + "step": 154065 + }, + { + "epoch": 3.7639557325385384, + "grad_norm": 0.07683884352445602, + "learning_rate": 3.497942294961537e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103842512, + "step": 154070 + }, + { + "epoch": 3.7640778833703856, + "grad_norm": 0.016874030232429504, + "learning_rate": 3.497294415087212e-07, + "loss": 0.0, + "num_input_tokens_seen": 103846544, + "step": 154075 + }, + { + "epoch": 3.764200034202233, + "grad_norm": 0.004456295166164637, + "learning_rate": 3.496646582502308e-07, + "loss": 0.043, + "num_input_tokens_seen": 103850000, + "step": 154080 + }, + { + "epoch": 3.76432218503408, + "grad_norm": 0.0006547545199282467, + "learning_rate": 3.4959987972115437e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103853520, + "step": 154085 + }, + { + "epoch": 3.764444335865927, + "grad_norm": 0.011782302521169186, + "learning_rate": 3.49535105921962e-07, + "loss": 0.0002, + "num_input_tokens_seen": 103856848, + "step": 154090 + }, + { + "epoch": 3.7645664866977744, + "grad_norm": 36.85483932495117, + "learning_rate": 3.494703368531254e-07, + "loss": 0.0337, + "num_input_tokens_seen": 103860112, + "step": 154095 + }, + { + "epoch": 3.7646886375296216, + "grad_norm": 0.0012486326741054654, + "learning_rate": 3.494055725151158e-07, + "loss": 0.0, + "num_input_tokens_seen": 103863312, + "step": 154100 + }, + { + "epoch": 3.7648107883614688, + "grad_norm": 0.0011928676394745708, + "learning_rate": 3.4934081290840367e-07, + "loss": 0.0, + "num_input_tokens_seen": 103866576, + "step": 154105 + }, + { + "epoch": 3.764932939193316, + "grad_norm": 0.0329124741256237, + "learning_rate": 3.492760580334603e-07, + "loss": 0.0004, + "num_input_tokens_seen": 103869968, + "step": 154110 + }, + { + "epoch": 3.765055090025163, + "grad_norm": 0.00763581832870841, + "learning_rate": 3.492113078907563e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103873296, + "step": 154115 + }, + { + "epoch": 3.7651772408570103, + "grad_norm": 0.12216723710298538, + "learning_rate": 3.4914656248076256e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103876560, + "step": 154120 + }, + { + "epoch": 3.7652993916888575, + "grad_norm": 7.318980351556093e-05, + "learning_rate": 3.490818218039504e-07, + "loss": 0.0, + "num_input_tokens_seen": 103879632, + "step": 154125 + }, + { + "epoch": 3.7654215425207047, + "grad_norm": 0.14317382872104645, + "learning_rate": 3.4901708586079003e-07, + "loss": 0.0, + "num_input_tokens_seen": 103883216, + "step": 154130 + }, + { + "epoch": 3.7655436933525515, + "grad_norm": 0.06965726613998413, + "learning_rate": 3.4895235465175286e-07, + "loss": 0.0, + "num_input_tokens_seen": 103886672, + "step": 154135 + }, + { + "epoch": 3.765665844184399, + "grad_norm": 0.005927124992012978, + "learning_rate": 3.488876281773089e-07, + "loss": 0.0, + "num_input_tokens_seen": 103889680, + "step": 154140 + }, + { + "epoch": 3.765787995016246, + "grad_norm": 0.0025228143204003572, + "learning_rate": 3.4882290643792967e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103893584, + "step": 154145 + }, + { + "epoch": 3.7659101458480935, + "grad_norm": 0.0008066084701567888, + "learning_rate": 3.4875818943408496e-07, + "loss": 0.0, + "num_input_tokens_seen": 103897168, + "step": 154150 + }, + { + "epoch": 3.76603229667994, + "grad_norm": 0.0005203133332543075, + "learning_rate": 3.486934771662462e-07, + "loss": 0.0, + "num_input_tokens_seen": 103900624, + "step": 154155 + }, + { + "epoch": 3.7661544475117874, + "grad_norm": 0.018812181428074837, + "learning_rate": 3.4862876963488375e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103903376, + "step": 154160 + }, + { + "epoch": 3.7662765983436346, + "grad_norm": 0.005005580838769674, + "learning_rate": 3.4856406684046767e-07, + "loss": 0.0143, + "num_input_tokens_seen": 103906704, + "step": 154165 + }, + { + "epoch": 3.766398749175482, + "grad_norm": 0.0023675684351474047, + "learning_rate": 3.484993687834693e-07, + "loss": 0.0, + "num_input_tokens_seen": 103910160, + "step": 154170 + }, + { + "epoch": 3.766520900007329, + "grad_norm": 0.5319631099700928, + "learning_rate": 3.4843467546435836e-07, + "loss": 0.0002, + "num_input_tokens_seen": 103913424, + "step": 154175 + }, + { + "epoch": 3.766643050839176, + "grad_norm": 0.001158917206339538, + "learning_rate": 3.4836998688360576e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103916688, + "step": 154180 + }, + { + "epoch": 3.7667652016710234, + "grad_norm": 0.0007587404688820243, + "learning_rate": 3.4830530304168216e-07, + "loss": 0.0, + "num_input_tokens_seen": 103919952, + "step": 154185 + }, + { + "epoch": 3.7668873525028705, + "grad_norm": 0.0037255992647260427, + "learning_rate": 3.482406239390574e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103923280, + "step": 154190 + }, + { + "epoch": 3.7670095033347177, + "grad_norm": 0.00011486539733596146, + "learning_rate": 3.4817594957620243e-07, + "loss": 0.0, + "num_input_tokens_seen": 103926544, + "step": 154195 + }, + { + "epoch": 3.767131654166565, + "grad_norm": 0.0011237571015954018, + "learning_rate": 3.4811127995358693e-07, + "loss": 0.0, + "num_input_tokens_seen": 103929936, + "step": 154200 + }, + { + "epoch": 3.767253804998412, + "grad_norm": 0.0001079161957022734, + "learning_rate": 3.4804661507168186e-07, + "loss": 0.0, + "num_input_tokens_seen": 103933456, + "step": 154205 + }, + { + "epoch": 3.7673759558302593, + "grad_norm": 0.0612591914832592, + "learning_rate": 3.4798195493095683e-07, + "loss": 0.075, + "num_input_tokens_seen": 103936656, + "step": 154210 + }, + { + "epoch": 3.7674981066621065, + "grad_norm": 0.0009961389005184174, + "learning_rate": 3.4791729953188243e-07, + "loss": 0.0, + "num_input_tokens_seen": 103940048, + "step": 154215 + }, + { + "epoch": 3.7676202574939537, + "grad_norm": 0.0005742756766267121, + "learning_rate": 3.4785264887492914e-07, + "loss": 0.0, + "num_input_tokens_seen": 103943376, + "step": 154220 + }, + { + "epoch": 3.767742408325801, + "grad_norm": 0.0006218006019480526, + "learning_rate": 3.477880029605665e-07, + "loss": 0.0004, + "num_input_tokens_seen": 103947216, + "step": 154225 + }, + { + "epoch": 3.7678645591576476, + "grad_norm": 0.020232796669006348, + "learning_rate": 3.477233617892652e-07, + "loss": 0.0004, + "num_input_tokens_seen": 103951120, + "step": 154230 + }, + { + "epoch": 3.7679867099894953, + "grad_norm": 0.003948678728193045, + "learning_rate": 3.476587253614948e-07, + "loss": 0.0, + "num_input_tokens_seen": 103954576, + "step": 154235 + }, + { + "epoch": 3.768108860821342, + "grad_norm": 0.001216245349496603, + "learning_rate": 3.4759409367772586e-07, + "loss": 0.0001, + "num_input_tokens_seen": 103957840, + "step": 154240 + }, + { + "epoch": 3.7682310116531896, + "grad_norm": 0.00024403842689935118, + "learning_rate": 3.475294667384279e-07, + "loss": 0.0, + "num_input_tokens_seen": 103961296, + "step": 154245 + }, + { + "epoch": 3.7683531624850364, + "grad_norm": 0.0011553462827578187, + "learning_rate": 3.4746484454407135e-07, + "loss": 0.0, + "num_input_tokens_seen": 103964752, + "step": 154250 + }, + { + "epoch": 3.7684753133168836, + "grad_norm": 0.003239996265619993, + "learning_rate": 3.4740022709512575e-07, + "loss": 0.0, + "num_input_tokens_seen": 103968592, + "step": 154255 + }, + { + "epoch": 3.7685974641487308, + "grad_norm": 0.0013838638551533222, + "learning_rate": 3.473356143920615e-07, + "loss": 0.0869, + "num_input_tokens_seen": 103971984, + "step": 154260 + }, + { + "epoch": 3.768719614980578, + "grad_norm": 0.0005694458959624171, + "learning_rate": 3.472710064353478e-07, + "loss": 0.0, + "num_input_tokens_seen": 103975120, + "step": 154265 + }, + { + "epoch": 3.768841765812425, + "grad_norm": 0.0004933263990096748, + "learning_rate": 3.4720640322545537e-07, + "loss": 0.0, + "num_input_tokens_seen": 103978512, + "step": 154270 + }, + { + "epoch": 3.7689639166442723, + "grad_norm": 0.0007346657221205533, + "learning_rate": 3.471418047628532e-07, + "loss": 0.0, + "num_input_tokens_seen": 103981584, + "step": 154275 + }, + { + "epoch": 3.7690860674761195, + "grad_norm": 0.6496771574020386, + "learning_rate": 3.470772110480117e-07, + "loss": 0.0003, + "num_input_tokens_seen": 103985040, + "step": 154280 + }, + { + "epoch": 3.7692082183079667, + "grad_norm": 0.0006116953445598483, + "learning_rate": 3.4701262208140004e-07, + "loss": 0.0, + "num_input_tokens_seen": 103988304, + "step": 154285 + }, + { + "epoch": 3.769330369139814, + "grad_norm": 0.005050478503108025, + "learning_rate": 3.4694803786348857e-07, + "loss": 0.0, + "num_input_tokens_seen": 103991696, + "step": 154290 + }, + { + "epoch": 3.769452519971661, + "grad_norm": 0.0015640381025150418, + "learning_rate": 3.468834583947462e-07, + "loss": 0.0, + "num_input_tokens_seen": 103995152, + "step": 154295 + }, + { + "epoch": 3.7695746708035083, + "grad_norm": 0.004787995480000973, + "learning_rate": 3.468188836756435e-07, + "loss": 0.0, + "num_input_tokens_seen": 103998736, + "step": 154300 + }, + { + "epoch": 3.7696968216353555, + "grad_norm": 0.0012390019837766886, + "learning_rate": 3.467543137066491e-07, + "loss": 0.0, + "num_input_tokens_seen": 104001872, + "step": 154305 + }, + { + "epoch": 3.7698189724672027, + "grad_norm": 0.003966461401432753, + "learning_rate": 3.4668974848823294e-07, + "loss": 0.0, + "num_input_tokens_seen": 104005392, + "step": 154310 + }, + { + "epoch": 3.7699411232990494, + "grad_norm": 0.009088732302188873, + "learning_rate": 3.4662518802086516e-07, + "loss": 0.0005, + "num_input_tokens_seen": 104008336, + "step": 154315 + }, + { + "epoch": 3.770063274130897, + "grad_norm": 0.0006300138775259256, + "learning_rate": 3.465606323050143e-07, + "loss": 0.0001, + "num_input_tokens_seen": 104011408, + "step": 154320 + }, + { + "epoch": 3.770185424962744, + "grad_norm": 0.0006197233451530337, + "learning_rate": 3.4649608134115074e-07, + "loss": 0.0, + "num_input_tokens_seen": 104014544, + "step": 154325 + }, + { + "epoch": 3.7703075757945914, + "grad_norm": 0.0037844895850867033, + "learning_rate": 3.4643153512974297e-07, + "loss": 0.0, + "num_input_tokens_seen": 104017616, + "step": 154330 + }, + { + "epoch": 3.770429726626438, + "grad_norm": 0.0016215958166867495, + "learning_rate": 3.463669936712613e-07, + "loss": 0.0, + "num_input_tokens_seen": 104021008, + "step": 154335 + }, + { + "epoch": 3.7705518774582854, + "grad_norm": 0.008326614275574684, + "learning_rate": 3.463024569661743e-07, + "loss": 0.0, + "num_input_tokens_seen": 104024208, + "step": 154340 + }, + { + "epoch": 3.7706740282901325, + "grad_norm": 0.0009549881215207279, + "learning_rate": 3.462379250149516e-07, + "loss": 0.0, + "num_input_tokens_seen": 104027344, + "step": 154345 + }, + { + "epoch": 3.7707961791219797, + "grad_norm": 2.1092417227919213e-05, + "learning_rate": 3.4617339781806296e-07, + "loss": 0.0, + "num_input_tokens_seen": 104030544, + "step": 154350 + }, + { + "epoch": 3.770918329953827, + "grad_norm": 2.773488449747674e-05, + "learning_rate": 3.4610887537597687e-07, + "loss": 0.0, + "num_input_tokens_seen": 104034064, + "step": 154355 + }, + { + "epoch": 3.771040480785674, + "grad_norm": 0.0017581800930202007, + "learning_rate": 3.460443576891632e-07, + "loss": 0.0001, + "num_input_tokens_seen": 104037328, + "step": 154360 + }, + { + "epoch": 3.7711626316175213, + "grad_norm": 0.0019256254890933633, + "learning_rate": 3.4597984475809094e-07, + "loss": 0.0, + "num_input_tokens_seen": 104040912, + "step": 154365 + }, + { + "epoch": 3.7712847824493685, + "grad_norm": 0.0006136553129181266, + "learning_rate": 3.459153365832288e-07, + "loss": 0.0, + "num_input_tokens_seen": 104044688, + "step": 154370 + }, + { + "epoch": 3.7714069332812157, + "grad_norm": 0.00688550528138876, + "learning_rate": 3.458508331650465e-07, + "loss": 0.0, + "num_input_tokens_seen": 104047824, + "step": 154375 + }, + { + "epoch": 3.771529084113063, + "grad_norm": 0.01938728056848049, + "learning_rate": 3.457863345040126e-07, + "loss": 0.0002, + "num_input_tokens_seen": 104050768, + "step": 154380 + }, + { + "epoch": 3.77165123494491, + "grad_norm": 0.0019597031641751528, + "learning_rate": 3.457218406005968e-07, + "loss": 0.0001, + "num_input_tokens_seen": 104054480, + "step": 154385 + }, + { + "epoch": 3.7717733857767572, + "grad_norm": 0.0019120193319395185, + "learning_rate": 3.456573514552675e-07, + "loss": 0.0, + "num_input_tokens_seen": 104057936, + "step": 154390 + }, + { + "epoch": 3.7718955366086044, + "grad_norm": 0.003979104571044445, + "learning_rate": 3.4559286706849424e-07, + "loss": 0.0002, + "num_input_tokens_seen": 104060944, + "step": 154395 + }, + { + "epoch": 3.7720176874404516, + "grad_norm": 0.0009600624325685203, + "learning_rate": 3.455283874407452e-07, + "loss": 0.0, + "num_input_tokens_seen": 104064336, + "step": 154400 + }, + { + "epoch": 3.772139838272299, + "grad_norm": 0.0037294672802090645, + "learning_rate": 3.4546391257248985e-07, + "loss": 0.0234, + "num_input_tokens_seen": 104067600, + "step": 154405 + }, + { + "epoch": 3.7722619891041456, + "grad_norm": 0.00026753474958240986, + "learning_rate": 3.453994424641973e-07, + "loss": 0.0, + "num_input_tokens_seen": 104071120, + "step": 154410 + }, + { + "epoch": 3.772384139935993, + "grad_norm": 0.0002816287742462009, + "learning_rate": 3.453349771163357e-07, + "loss": 0.0274, + "num_input_tokens_seen": 104074384, + "step": 154415 + }, + { + "epoch": 3.77250629076784, + "grad_norm": 0.00024800936807878315, + "learning_rate": 3.4527051652937467e-07, + "loss": 0.0442, + "num_input_tokens_seen": 104077392, + "step": 154420 + }, + { + "epoch": 3.772628441599687, + "grad_norm": 0.004503862001001835, + "learning_rate": 3.452060607037821e-07, + "loss": 0.0, + "num_input_tokens_seen": 104080656, + "step": 154425 + }, + { + "epoch": 3.7727505924315343, + "grad_norm": 0.0010897937463596463, + "learning_rate": 3.4514160964002725e-07, + "loss": 0.0604, + "num_input_tokens_seen": 104083792, + "step": 154430 + }, + { + "epoch": 3.7728727432633815, + "grad_norm": 0.0008708562818355858, + "learning_rate": 3.450771633385791e-07, + "loss": 0.0, + "num_input_tokens_seen": 104086992, + "step": 154435 + }, + { + "epoch": 3.7729948940952287, + "grad_norm": 0.003654726780951023, + "learning_rate": 3.450127217999055e-07, + "loss": 0.0, + "num_input_tokens_seen": 104090512, + "step": 154440 + }, + { + "epoch": 3.773117044927076, + "grad_norm": 0.031255174428224564, + "learning_rate": 3.44948285024476e-07, + "loss": 0.0, + "num_input_tokens_seen": 104093776, + "step": 154445 + }, + { + "epoch": 3.773239195758923, + "grad_norm": 0.00021036301041021943, + "learning_rate": 3.4488385301275833e-07, + "loss": 0.0, + "num_input_tokens_seen": 104097680, + "step": 154450 + }, + { + "epoch": 3.7733613465907703, + "grad_norm": 0.0038913488388061523, + "learning_rate": 3.448194257652219e-07, + "loss": 0.0671, + "num_input_tokens_seen": 104100688, + "step": 154455 + }, + { + "epoch": 3.7734834974226175, + "grad_norm": 0.001282164128497243, + "learning_rate": 3.447550032823345e-07, + "loss": 0.0, + "num_input_tokens_seen": 104103824, + "step": 154460 + }, + { + "epoch": 3.7736056482544647, + "grad_norm": 0.007293464615941048, + "learning_rate": 3.446905855645653e-07, + "loss": 0.0, + "num_input_tokens_seen": 104107216, + "step": 154465 + }, + { + "epoch": 3.773727799086312, + "grad_norm": 0.0012252561282366514, + "learning_rate": 3.4462617261238245e-07, + "loss": 0.0787, + "num_input_tokens_seen": 104110608, + "step": 154470 + }, + { + "epoch": 3.773849949918159, + "grad_norm": 0.006050746422261, + "learning_rate": 3.4456176442625393e-07, + "loss": 0.0001, + "num_input_tokens_seen": 104114000, + "step": 154475 + }, + { + "epoch": 3.773972100750006, + "grad_norm": 0.0004838865133933723, + "learning_rate": 3.4449736100664895e-07, + "loss": 0.0, + "num_input_tokens_seen": 104117648, + "step": 154480 + }, + { + "epoch": 3.7740942515818534, + "grad_norm": 0.004921168088912964, + "learning_rate": 3.4443296235403507e-07, + "loss": 0.0, + "num_input_tokens_seen": 104120784, + "step": 154485 + }, + { + "epoch": 3.7742164024137006, + "grad_norm": 0.0025191386230289936, + "learning_rate": 3.443685684688814e-07, + "loss": 0.0403, + "num_input_tokens_seen": 104124432, + "step": 154490 + }, + { + "epoch": 3.7743385532455473, + "grad_norm": 0.0045335073955357075, + "learning_rate": 3.4430417935165547e-07, + "loss": 0.0, + "num_input_tokens_seen": 104127568, + "step": 154495 + }, + { + "epoch": 3.774460704077395, + "grad_norm": 0.0010492325527593493, + "learning_rate": 3.44239795002826e-07, + "loss": 0.0, + "num_input_tokens_seen": 104130704, + "step": 154500 + }, + { + "epoch": 3.7745828549092417, + "grad_norm": 0.013704109005630016, + "learning_rate": 3.4417541542286134e-07, + "loss": 0.0, + "num_input_tokens_seen": 104133776, + "step": 154505 + }, + { + "epoch": 3.7747050057410894, + "grad_norm": 0.031134042888879776, + "learning_rate": 3.4411104061222916e-07, + "loss": 0.0, + "num_input_tokens_seen": 104137104, + "step": 154510 + }, + { + "epoch": 3.774827156572936, + "grad_norm": 0.024660363793373108, + "learning_rate": 3.4404667057139827e-07, + "loss": 0.0001, + "num_input_tokens_seen": 104141776, + "step": 154515 + }, + { + "epoch": 3.7749493074047833, + "grad_norm": 0.002114421920850873, + "learning_rate": 3.4398230530083596e-07, + "loss": 0.0, + "num_input_tokens_seen": 104145680, + "step": 154520 + }, + { + "epoch": 3.7750714582366305, + "grad_norm": 0.000558870960958302, + "learning_rate": 3.4391794480101087e-07, + "loss": 0.0, + "num_input_tokens_seen": 104148880, + "step": 154525 + }, + { + "epoch": 3.7751936090684777, + "grad_norm": 0.0018979088636115193, + "learning_rate": 3.4385358907239135e-07, + "loss": 0.0, + "num_input_tokens_seen": 104151952, + "step": 154530 + }, + { + "epoch": 3.775315759900325, + "grad_norm": 0.019822288304567337, + "learning_rate": 3.437892381154446e-07, + "loss": 0.06, + "num_input_tokens_seen": 104155728, + "step": 154535 + }, + { + "epoch": 3.775437910732172, + "grad_norm": 0.0004573519399855286, + "learning_rate": 3.4372489193063935e-07, + "loss": 0.0, + "num_input_tokens_seen": 104158672, + "step": 154540 + }, + { + "epoch": 3.7755600615640192, + "grad_norm": 0.0019705379381775856, + "learning_rate": 3.436605505184429e-07, + "loss": 0.0, + "num_input_tokens_seen": 104162384, + "step": 154545 + }, + { + "epoch": 3.7756822123958664, + "grad_norm": 0.04014309123158455, + "learning_rate": 3.435962138793237e-07, + "loss": 0.0, + "num_input_tokens_seen": 104165584, + "step": 154550 + }, + { + "epoch": 3.7758043632277136, + "grad_norm": 0.0014087480958551168, + "learning_rate": 3.4353188201374915e-07, + "loss": 0.0, + "num_input_tokens_seen": 104168848, + "step": 154555 + }, + { + "epoch": 3.775926514059561, + "grad_norm": 30.96991729736328, + "learning_rate": 3.434675549221876e-07, + "loss": 0.0313, + "num_input_tokens_seen": 104171984, + "step": 154560 + }, + { + "epoch": 3.776048664891408, + "grad_norm": 0.01162485871464014, + "learning_rate": 3.434032326051063e-07, + "loss": 0.0, + "num_input_tokens_seen": 104174992, + "step": 154565 + }, + { + "epoch": 3.776170815723255, + "grad_norm": 0.0003187482070643455, + "learning_rate": 3.4333891506297365e-07, + "loss": 0.0, + "num_input_tokens_seen": 104178512, + "step": 154570 + }, + { + "epoch": 3.7762929665551024, + "grad_norm": 0.012058389373123646, + "learning_rate": 3.432746022962566e-07, + "loss": 0.0, + "num_input_tokens_seen": 104181904, + "step": 154575 + }, + { + "epoch": 3.7764151173869496, + "grad_norm": 0.019850589334964752, + "learning_rate": 3.432102943054237e-07, + "loss": 0.0, + "num_input_tokens_seen": 104185104, + "step": 154580 + }, + { + "epoch": 3.7765372682187968, + "grad_norm": 0.0027016245294362307, + "learning_rate": 3.4314599109094176e-07, + "loss": 0.0, + "num_input_tokens_seen": 104188304, + "step": 154585 + }, + { + "epoch": 3.7766594190506435, + "grad_norm": 0.0011109261540696025, + "learning_rate": 3.4308169265327926e-07, + "loss": 0.0, + "num_input_tokens_seen": 104191504, + "step": 154590 + }, + { + "epoch": 3.776781569882491, + "grad_norm": 29.25566864013672, + "learning_rate": 3.4301739899290303e-07, + "loss": 0.0255, + "num_input_tokens_seen": 104194960, + "step": 154595 + }, + { + "epoch": 3.776903720714338, + "grad_norm": 0.004831159487366676, + "learning_rate": 3.429531101102814e-07, + "loss": 0.0001, + "num_input_tokens_seen": 104198864, + "step": 154600 + }, + { + "epoch": 3.777025871546185, + "grad_norm": 0.05014396086335182, + "learning_rate": 3.42888826005881e-07, + "loss": 0.0, + "num_input_tokens_seen": 104202448, + "step": 154605 + }, + { + "epoch": 3.7771480223780323, + "grad_norm": 0.001068123267032206, + "learning_rate": 3.428245466801701e-07, + "loss": 0.0, + "num_input_tokens_seen": 104205904, + "step": 154610 + }, + { + "epoch": 3.7772701732098795, + "grad_norm": 0.07515928149223328, + "learning_rate": 3.427602721336157e-07, + "loss": 0.0, + "num_input_tokens_seen": 104208976, + "step": 154615 + }, + { + "epoch": 3.7773923240417266, + "grad_norm": 0.008085944689810276, + "learning_rate": 3.426960023666853e-07, + "loss": 0.0538, + "num_input_tokens_seen": 104212240, + "step": 154620 + }, + { + "epoch": 3.777514474873574, + "grad_norm": 0.004900203552097082, + "learning_rate": 3.426317373798466e-07, + "loss": 0.0, + "num_input_tokens_seen": 104216080, + "step": 154625 + }, + { + "epoch": 3.777636625705421, + "grad_norm": 0.11813540011644363, + "learning_rate": 3.425674771735665e-07, + "loss": 0.0001, + "num_input_tokens_seen": 104219536, + "step": 154630 + }, + { + "epoch": 3.777758776537268, + "grad_norm": 0.0009974022395908833, + "learning_rate": 3.4250322174831294e-07, + "loss": 0.0, + "num_input_tokens_seen": 104222864, + "step": 154635 + }, + { + "epoch": 3.7778809273691154, + "grad_norm": 0.00033019485999830067, + "learning_rate": 3.424389711045523e-07, + "loss": 0.0, + "num_input_tokens_seen": 104226448, + "step": 154640 + }, + { + "epoch": 3.7780030782009626, + "grad_norm": 0.1484830528497696, + "learning_rate": 3.4237472524275266e-07, + "loss": 0.0001, + "num_input_tokens_seen": 104229904, + "step": 154645 + }, + { + "epoch": 3.77812522903281, + "grad_norm": 0.05555364117026329, + "learning_rate": 3.423104841633807e-07, + "loss": 0.0, + "num_input_tokens_seen": 104233488, + "step": 154650 + }, + { + "epoch": 3.778247379864657, + "grad_norm": 0.001281336648389697, + "learning_rate": 3.422462478669037e-07, + "loss": 0.0002, + "num_input_tokens_seen": 104237264, + "step": 154655 + }, + { + "epoch": 3.778369530696504, + "grad_norm": 0.02976313605904579, + "learning_rate": 3.4218201635378927e-07, + "loss": 0.0515, + "num_input_tokens_seen": 104240336, + "step": 154660 + }, + { + "epoch": 3.7784916815283514, + "grad_norm": 0.000876471633091569, + "learning_rate": 3.4211778962450376e-07, + "loss": 0.0001, + "num_input_tokens_seen": 104243984, + "step": 154665 + }, + { + "epoch": 3.7786138323601985, + "grad_norm": 0.0004510780854616314, + "learning_rate": 3.4205356767951497e-07, + "loss": 0.0, + "num_input_tokens_seen": 104246992, + "step": 154670 + }, + { + "epoch": 3.7787359831920453, + "grad_norm": 0.000282302382402122, + "learning_rate": 3.4198935051928967e-07, + "loss": 0.1016, + "num_input_tokens_seen": 104250320, + "step": 154675 + }, + { + "epoch": 3.778858134023893, + "grad_norm": 0.001659899833612144, + "learning_rate": 3.419251381442945e-07, + "loss": 0.0591, + "num_input_tokens_seen": 104253712, + "step": 154680 + }, + { + "epoch": 3.7789802848557397, + "grad_norm": 0.0009223993984051049, + "learning_rate": 3.41860930554997e-07, + "loss": 0.0, + "num_input_tokens_seen": 104257104, + "step": 154685 + }, + { + "epoch": 3.7791024356875873, + "grad_norm": 0.002383466577157378, + "learning_rate": 3.4179672775186344e-07, + "loss": 0.0341, + "num_input_tokens_seen": 104260368, + "step": 154690 + }, + { + "epoch": 3.779224586519434, + "grad_norm": 0.00289145833812654, + "learning_rate": 3.417325297353615e-07, + "loss": 0.0, + "num_input_tokens_seen": 104263696, + "step": 154695 + }, + { + "epoch": 3.7793467373512812, + "grad_norm": 0.004132087808102369, + "learning_rate": 3.4166833650595725e-07, + "loss": 0.0, + "num_input_tokens_seen": 104267088, + "step": 154700 + }, + { + "epoch": 3.7794688881831284, + "grad_norm": 0.0008576092659495771, + "learning_rate": 3.4160414806411844e-07, + "loss": 0.0, + "num_input_tokens_seen": 104269968, + "step": 154705 + }, + { + "epoch": 3.7795910390149756, + "grad_norm": 0.03509823605418205, + "learning_rate": 3.4153996441031086e-07, + "loss": 0.0002, + "num_input_tokens_seen": 104272848, + "step": 154710 + }, + { + "epoch": 3.779713189846823, + "grad_norm": 0.010340539738535881, + "learning_rate": 3.4147578554500177e-07, + "loss": 0.0, + "num_input_tokens_seen": 104276176, + "step": 154715 + }, + { + "epoch": 3.77983534067867, + "grad_norm": 0.07541664689779282, + "learning_rate": 3.4141161146865825e-07, + "loss": 0.0, + "num_input_tokens_seen": 104279120, + "step": 154720 + }, + { + "epoch": 3.779957491510517, + "grad_norm": 0.07795143872499466, + "learning_rate": 3.413474421817464e-07, + "loss": 0.0003, + "num_input_tokens_seen": 104282704, + "step": 154725 + }, + { + "epoch": 3.7800796423423644, + "grad_norm": 0.003080329392105341, + "learning_rate": 3.412832776847333e-07, + "loss": 0.0001, + "num_input_tokens_seen": 104285904, + "step": 154730 + }, + { + "epoch": 3.7802017931742116, + "grad_norm": 0.12578627467155457, + "learning_rate": 3.412191179780851e-07, + "loss": 0.0, + "num_input_tokens_seen": 104289040, + "step": 154735 + }, + { + "epoch": 3.7803239440060588, + "grad_norm": 0.0012306523276492953, + "learning_rate": 3.4115496306226863e-07, + "loss": 0.0, + "num_input_tokens_seen": 104292368, + "step": 154740 + }, + { + "epoch": 3.780446094837906, + "grad_norm": 0.002234894782304764, + "learning_rate": 3.410908129377509e-07, + "loss": 0.0, + "num_input_tokens_seen": 104296144, + "step": 154745 + }, + { + "epoch": 3.780568245669753, + "grad_norm": 0.003981317859143019, + "learning_rate": 3.4102666760499753e-07, + "loss": 0.0, + "num_input_tokens_seen": 104299600, + "step": 154750 + }, + { + "epoch": 3.7806903965016003, + "grad_norm": 0.007521201390773058, + "learning_rate": 3.4096252706447595e-07, + "loss": 0.0, + "num_input_tokens_seen": 104303120, + "step": 154755 + }, + { + "epoch": 3.780812547333447, + "grad_norm": 0.0024240443017333746, + "learning_rate": 3.4089839131665175e-07, + "loss": 0.0566, + "num_input_tokens_seen": 104306128, + "step": 154760 + }, + { + "epoch": 3.7809346981652947, + "grad_norm": 0.0009506293572485447, + "learning_rate": 3.4083426036199203e-07, + "loss": 0.0, + "num_input_tokens_seen": 104309328, + "step": 154765 + }, + { + "epoch": 3.7810568489971415, + "grad_norm": 0.0022911010310053825, + "learning_rate": 3.4077013420096255e-07, + "loss": 0.0006, + "num_input_tokens_seen": 104312400, + "step": 154770 + }, + { + "epoch": 3.781178999828989, + "grad_norm": 0.009430350735783577, + "learning_rate": 3.4070601283403033e-07, + "loss": 0.0, + "num_input_tokens_seen": 104315600, + "step": 154775 + }, + { + "epoch": 3.781301150660836, + "grad_norm": 0.002623270731419325, + "learning_rate": 3.406418962616612e-07, + "loss": 0.0, + "num_input_tokens_seen": 104318544, + "step": 154780 + }, + { + "epoch": 3.781423301492683, + "grad_norm": 0.014037542045116425, + "learning_rate": 3.4057778448432127e-07, + "loss": 0.0, + "num_input_tokens_seen": 104322064, + "step": 154785 + }, + { + "epoch": 3.78154545232453, + "grad_norm": 0.0013723873998969793, + "learning_rate": 3.405136775024775e-07, + "loss": 0.0, + "num_input_tokens_seen": 104325776, + "step": 154790 + }, + { + "epoch": 3.7816676031563774, + "grad_norm": 0.007315513212233782, + "learning_rate": 3.4044957531659514e-07, + "loss": 0.0453, + "num_input_tokens_seen": 104329360, + "step": 154795 + }, + { + "epoch": 3.7817897539882246, + "grad_norm": 0.005057999864220619, + "learning_rate": 3.4038547792714135e-07, + "loss": 0.0, + "num_input_tokens_seen": 104332880, + "step": 154800 + }, + { + "epoch": 3.781911904820072, + "grad_norm": 87.0716552734375, + "learning_rate": 3.403213853345813e-07, + "loss": 0.0607, + "num_input_tokens_seen": 104336528, + "step": 154805 + }, + { + "epoch": 3.782034055651919, + "grad_norm": 0.013916331343352795, + "learning_rate": 3.402572975393817e-07, + "loss": 0.0, + "num_input_tokens_seen": 104339600, + "step": 154810 + }, + { + "epoch": 3.782156206483766, + "grad_norm": 1.0355695486068726, + "learning_rate": 3.401932145420088e-07, + "loss": 0.0678, + "num_input_tokens_seen": 104343248, + "step": 154815 + }, + { + "epoch": 3.7822783573156133, + "grad_norm": 0.01644453965127468, + "learning_rate": 3.4012913634292796e-07, + "loss": 0.0001, + "num_input_tokens_seen": 104346640, + "step": 154820 + }, + { + "epoch": 3.7824005081474605, + "grad_norm": 0.08392349630594254, + "learning_rate": 3.400650629426057e-07, + "loss": 0.0, + "num_input_tokens_seen": 104350352, + "step": 154825 + }, + { + "epoch": 3.7825226589793077, + "grad_norm": 0.0005958887049928308, + "learning_rate": 3.400009943415076e-07, + "loss": 0.0001, + "num_input_tokens_seen": 104353424, + "step": 154830 + }, + { + "epoch": 3.782644809811155, + "grad_norm": 0.0016513823065906763, + "learning_rate": 3.3993693054009986e-07, + "loss": 0.0001, + "num_input_tokens_seen": 104356944, + "step": 154835 + }, + { + "epoch": 3.782766960643002, + "grad_norm": 0.0036543281748890877, + "learning_rate": 3.3987287153884856e-07, + "loss": 0.0, + "num_input_tokens_seen": 104360400, + "step": 154840 + }, + { + "epoch": 3.7828891114748493, + "grad_norm": 0.00038208148907870054, + "learning_rate": 3.3980881733821895e-07, + "loss": 0.0, + "num_input_tokens_seen": 104363728, + "step": 154845 + }, + { + "epoch": 3.7830112623066965, + "grad_norm": 0.0020735624711960554, + "learning_rate": 3.3974476793867755e-07, + "loss": 0.0383, + "num_input_tokens_seen": 104366992, + "step": 154850 + }, + { + "epoch": 3.7831334131385432, + "grad_norm": 0.05217864736914635, + "learning_rate": 3.396807233406894e-07, + "loss": 0.0, + "num_input_tokens_seen": 104370128, + "step": 154855 + }, + { + "epoch": 3.783255563970391, + "grad_norm": 0.0008077344973571599, + "learning_rate": 3.3961668354472107e-07, + "loss": 0.0, + "num_input_tokens_seen": 104373264, + "step": 154860 + }, + { + "epoch": 3.7833777148022376, + "grad_norm": 0.0003835418028756976, + "learning_rate": 3.3955264855123747e-07, + "loss": 0.0, + "num_input_tokens_seen": 104376336, + "step": 154865 + }, + { + "epoch": 3.783499865634085, + "grad_norm": 0.005073685199022293, + "learning_rate": 3.3948861836070463e-07, + "loss": 0.0013, + "num_input_tokens_seen": 104379408, + "step": 154870 + }, + { + "epoch": 3.783622016465932, + "grad_norm": 0.001436670427210629, + "learning_rate": 3.394245929735885e-07, + "loss": 0.0001, + "num_input_tokens_seen": 104382864, + "step": 154875 + }, + { + "epoch": 3.783744167297779, + "grad_norm": 0.006241243798285723, + "learning_rate": 3.3936057239035445e-07, + "loss": 0.0001, + "num_input_tokens_seen": 104386448, + "step": 154880 + }, + { + "epoch": 3.7838663181296264, + "grad_norm": 0.00015113978588487953, + "learning_rate": 3.392965566114676e-07, + "loss": 0.0001, + "num_input_tokens_seen": 104389968, + "step": 154885 + }, + { + "epoch": 3.7839884689614736, + "grad_norm": 0.0023052634205669165, + "learning_rate": 3.392325456373943e-07, + "loss": 0.0, + "num_input_tokens_seen": 104393424, + "step": 154890 + }, + { + "epoch": 3.7841106197933208, + "grad_norm": 0.005526892840862274, + "learning_rate": 3.3916853946859936e-07, + "loss": 0.0, + "num_input_tokens_seen": 104396304, + "step": 154895 + }, + { + "epoch": 3.784232770625168, + "grad_norm": 0.008645119145512581, + "learning_rate": 3.3910453810554884e-07, + "loss": 0.0, + "num_input_tokens_seen": 104399568, + "step": 154900 + }, + { + "epoch": 3.784354921457015, + "grad_norm": 2.139812204404734e-05, + "learning_rate": 3.390405415487075e-07, + "loss": 0.0, + "num_input_tokens_seen": 104402896, + "step": 154905 + }, + { + "epoch": 3.7844770722888623, + "grad_norm": 0.0003892584063578397, + "learning_rate": 3.389765497985415e-07, + "loss": 0.0, + "num_input_tokens_seen": 104405904, + "step": 154910 + }, + { + "epoch": 3.7845992231207095, + "grad_norm": 0.0018738510552793741, + "learning_rate": 3.389125628555155e-07, + "loss": 0.0, + "num_input_tokens_seen": 104409360, + "step": 154915 + }, + { + "epoch": 3.7847213739525567, + "grad_norm": 0.0837446004152298, + "learning_rate": 3.3884858072009546e-07, + "loss": 0.0, + "num_input_tokens_seen": 104412112, + "step": 154920 + }, + { + "epoch": 3.784843524784404, + "grad_norm": 0.001453855657018721, + "learning_rate": 3.387846033927461e-07, + "loss": 0.0, + "num_input_tokens_seen": 104415632, + "step": 154925 + }, + { + "epoch": 3.784965675616251, + "grad_norm": 0.0007801406900398433, + "learning_rate": 3.387206308739329e-07, + "loss": 0.0436, + "num_input_tokens_seen": 104418832, + "step": 154930 + }, + { + "epoch": 3.7850878264480983, + "grad_norm": 5.12362239533104e-05, + "learning_rate": 3.3865666316412143e-07, + "loss": 0.0, + "num_input_tokens_seen": 104422224, + "step": 154935 + }, + { + "epoch": 3.785209977279945, + "grad_norm": 0.0006093584233894944, + "learning_rate": 3.385927002637763e-07, + "loss": 0.0181, + "num_input_tokens_seen": 104425936, + "step": 154940 + }, + { + "epoch": 3.7853321281117926, + "grad_norm": 0.0009082345641218126, + "learning_rate": 3.3852874217336323e-07, + "loss": 0.0, + "num_input_tokens_seen": 104429328, + "step": 154945 + }, + { + "epoch": 3.7854542789436394, + "grad_norm": 0.0011150073260068893, + "learning_rate": 3.3846478889334673e-07, + "loss": 0.0, + "num_input_tokens_seen": 104433168, + "step": 154950 + }, + { + "epoch": 3.785576429775487, + "grad_norm": 0.001950499601662159, + "learning_rate": 3.384008404241926e-07, + "loss": 0.092, + "num_input_tokens_seen": 104436304, + "step": 154955 + }, + { + "epoch": 3.7856985806073338, + "grad_norm": 0.0005495021468959749, + "learning_rate": 3.3833689676636525e-07, + "loss": 0.0, + "num_input_tokens_seen": 104439760, + "step": 154960 + }, + { + "epoch": 3.785820731439181, + "grad_norm": 0.0005154844257049263, + "learning_rate": 3.3827295792032984e-07, + "loss": 0.0001, + "num_input_tokens_seen": 104443088, + "step": 154965 + }, + { + "epoch": 3.785942882271028, + "grad_norm": 0.0010477579198777676, + "learning_rate": 3.382090238865518e-07, + "loss": 0.0, + "num_input_tokens_seen": 104446800, + "step": 154970 + }, + { + "epoch": 3.7860650331028753, + "grad_norm": 0.005031648091971874, + "learning_rate": 3.3814509466549545e-07, + "loss": 0.0001, + "num_input_tokens_seen": 104450768, + "step": 154975 + }, + { + "epoch": 3.7861871839347225, + "grad_norm": 0.14880934357643127, + "learning_rate": 3.3808117025762626e-07, + "loss": 0.0002, + "num_input_tokens_seen": 104454032, + "step": 154980 + }, + { + "epoch": 3.7863093347665697, + "grad_norm": 0.00034068856621161103, + "learning_rate": 3.380172506634089e-07, + "loss": 0.0, + "num_input_tokens_seen": 104457744, + "step": 154985 + }, + { + "epoch": 3.786431485598417, + "grad_norm": 0.009613665752112865, + "learning_rate": 3.379533358833078e-07, + "loss": 0.1035, + "num_input_tokens_seen": 104460880, + "step": 154990 + }, + { + "epoch": 3.786553636430264, + "grad_norm": 0.0013408566592261195, + "learning_rate": 3.3788942591778836e-07, + "loss": 0.0, + "num_input_tokens_seen": 104464208, + "step": 154995 + }, + { + "epoch": 3.7866757872621113, + "grad_norm": 0.00530999107286334, + "learning_rate": 3.3782552076731487e-07, + "loss": 0.0, + "num_input_tokens_seen": 104467280, + "step": 155000 + }, + { + "epoch": 3.7867979380939585, + "grad_norm": 0.00015352222544606775, + "learning_rate": 3.377616204323526e-07, + "loss": 0.0, + "num_input_tokens_seen": 104470800, + "step": 155005 + }, + { + "epoch": 3.7869200889258057, + "grad_norm": 0.008193439804017544, + "learning_rate": 3.3769772491336554e-07, + "loss": 0.0, + "num_input_tokens_seen": 104473872, + "step": 155010 + }, + { + "epoch": 3.787042239757653, + "grad_norm": 0.001924472744576633, + "learning_rate": 3.3763383421081927e-07, + "loss": 0.0, + "num_input_tokens_seen": 104477136, + "step": 155015 + }, + { + "epoch": 3.7871643905895, + "grad_norm": 0.006006884854286909, + "learning_rate": 3.3756994832517737e-07, + "loss": 0.0, + "num_input_tokens_seen": 104480592, + "step": 155020 + }, + { + "epoch": 3.7872865414213472, + "grad_norm": 0.0006222435622476041, + "learning_rate": 3.3750606725690513e-07, + "loss": 0.0, + "num_input_tokens_seen": 104483920, + "step": 155025 + }, + { + "epoch": 3.7874086922531944, + "grad_norm": 0.009881896898150444, + "learning_rate": 3.374421910064672e-07, + "loss": 0.0, + "num_input_tokens_seen": 104487056, + "step": 155030 + }, + { + "epoch": 3.787530843085041, + "grad_norm": 0.07544638216495514, + "learning_rate": 3.3737831957432763e-07, + "loss": 0.0, + "num_input_tokens_seen": 104490320, + "step": 155035 + }, + { + "epoch": 3.787652993916889, + "grad_norm": 0.0014049513265490532, + "learning_rate": 3.373144529609514e-07, + "loss": 0.0, + "num_input_tokens_seen": 104493328, + "step": 155040 + }, + { + "epoch": 3.7877751447487356, + "grad_norm": 0.002917036646977067, + "learning_rate": 3.3725059116680245e-07, + "loss": 0.0446, + "num_input_tokens_seen": 104496400, + "step": 155045 + }, + { + "epoch": 3.7878972955805827, + "grad_norm": 8.429832087131217e-05, + "learning_rate": 3.3718673419234565e-07, + "loss": 0.0003, + "num_input_tokens_seen": 104499856, + "step": 155050 + }, + { + "epoch": 3.78801944641243, + "grad_norm": 0.09984175115823746, + "learning_rate": 3.37122882038045e-07, + "loss": 0.0, + "num_input_tokens_seen": 104503696, + "step": 155055 + }, + { + "epoch": 3.788141597244277, + "grad_norm": 0.0014615656109526753, + "learning_rate": 3.3705903470436504e-07, + "loss": 0.0, + "num_input_tokens_seen": 104506768, + "step": 155060 + }, + { + "epoch": 3.7882637480761243, + "grad_norm": 0.005124914925545454, + "learning_rate": 3.369951921917703e-07, + "loss": 0.0, + "num_input_tokens_seen": 104510160, + "step": 155065 + }, + { + "epoch": 3.7883858989079715, + "grad_norm": 0.00016769897774793208, + "learning_rate": 3.369313545007246e-07, + "loss": 0.0001, + "num_input_tokens_seen": 104513680, + "step": 155070 + }, + { + "epoch": 3.7885080497398187, + "grad_norm": 0.0012042616726830602, + "learning_rate": 3.3686752163169275e-07, + "loss": 0.0002, + "num_input_tokens_seen": 104517200, + "step": 155075 + }, + { + "epoch": 3.788630200571666, + "grad_norm": 0.0004410717519931495, + "learning_rate": 3.368036935851384e-07, + "loss": 0.0, + "num_input_tokens_seen": 104520784, + "step": 155080 + }, + { + "epoch": 3.788752351403513, + "grad_norm": 0.00887715257704258, + "learning_rate": 3.367398703615262e-07, + "loss": 0.0714, + "num_input_tokens_seen": 104524496, + "step": 155085 + }, + { + "epoch": 3.7888745022353603, + "grad_norm": 0.0005934710497967899, + "learning_rate": 3.366760519613201e-07, + "loss": 0.0, + "num_input_tokens_seen": 104527888, + "step": 155090 + }, + { + "epoch": 3.7889966530672075, + "grad_norm": 0.000675864634104073, + "learning_rate": 3.3661223838498374e-07, + "loss": 0.0, + "num_input_tokens_seen": 104531408, + "step": 155095 + }, + { + "epoch": 3.7891188038990546, + "grad_norm": 0.005420152563601732, + "learning_rate": 3.36548429632982e-07, + "loss": 0.0001, + "num_input_tokens_seen": 104535056, + "step": 155100 + }, + { + "epoch": 3.789240954730902, + "grad_norm": 0.003639468690380454, + "learning_rate": 3.364846257057783e-07, + "loss": 0.0, + "num_input_tokens_seen": 104538320, + "step": 155105 + }, + { + "epoch": 3.789363105562749, + "grad_norm": 0.005886682774871588, + "learning_rate": 3.364208266038371e-07, + "loss": 0.0002, + "num_input_tokens_seen": 104541648, + "step": 155110 + }, + { + "epoch": 3.789485256394596, + "grad_norm": 0.0007768772193230689, + "learning_rate": 3.363570323276218e-07, + "loss": 0.0, + "num_input_tokens_seen": 104544784, + "step": 155115 + }, + { + "epoch": 3.789607407226443, + "grad_norm": 0.0006768378661945462, + "learning_rate": 3.3629324287759666e-07, + "loss": 0.0, + "num_input_tokens_seen": 104548304, + "step": 155120 + }, + { + "epoch": 3.7897295580582906, + "grad_norm": 0.0010963869281113148, + "learning_rate": 3.362294582542259e-07, + "loss": 0.0193, + "num_input_tokens_seen": 104551952, + "step": 155125 + }, + { + "epoch": 3.7898517088901373, + "grad_norm": 0.001188731868751347, + "learning_rate": 3.3616567845797273e-07, + "loss": 0.0, + "num_input_tokens_seen": 104555728, + "step": 155130 + }, + { + "epoch": 3.789973859721985, + "grad_norm": 0.0016654833452776074, + "learning_rate": 3.3610190348930157e-07, + "loss": 0.0002, + "num_input_tokens_seen": 104558928, + "step": 155135 + }, + { + "epoch": 3.7900960105538317, + "grad_norm": 0.006672716233879328, + "learning_rate": 3.360381333486757e-07, + "loss": 0.0001, + "num_input_tokens_seen": 104562512, + "step": 155140 + }, + { + "epoch": 3.790218161385679, + "grad_norm": 0.00013601829414255917, + "learning_rate": 3.359743680365591e-07, + "loss": 0.0, + "num_input_tokens_seen": 104566096, + "step": 155145 + }, + { + "epoch": 3.790340312217526, + "grad_norm": 0.001462005078792572, + "learning_rate": 3.3591060755341583e-07, + "loss": 0.0, + "num_input_tokens_seen": 104569872, + "step": 155150 + }, + { + "epoch": 3.7904624630493733, + "grad_norm": 0.005027350969612598, + "learning_rate": 3.3584685189970886e-07, + "loss": 0.0, + "num_input_tokens_seen": 104573648, + "step": 155155 + }, + { + "epoch": 3.7905846138812205, + "grad_norm": 0.0005566252511925995, + "learning_rate": 3.357831010759026e-07, + "loss": 0.2054, + "num_input_tokens_seen": 104577936, + "step": 155160 + }, + { + "epoch": 3.7907067647130677, + "grad_norm": 0.00025076669408008456, + "learning_rate": 3.3571935508245986e-07, + "loss": 0.0, + "num_input_tokens_seen": 104581392, + "step": 155165 + }, + { + "epoch": 3.790828915544915, + "grad_norm": 0.011523941531777382, + "learning_rate": 3.35655613919845e-07, + "loss": 0.0, + "num_input_tokens_seen": 104584784, + "step": 155170 + }, + { + "epoch": 3.790951066376762, + "grad_norm": 0.008374925702810287, + "learning_rate": 3.355918775885209e-07, + "loss": 0.0, + "num_input_tokens_seen": 104587856, + "step": 155175 + }, + { + "epoch": 3.7910732172086092, + "grad_norm": 0.005891414824873209, + "learning_rate": 3.355281460889514e-07, + "loss": 0.0, + "num_input_tokens_seen": 104591056, + "step": 155180 + }, + { + "epoch": 3.7911953680404564, + "grad_norm": 25.315025329589844, + "learning_rate": 3.3546441942160033e-07, + "loss": 0.1111, + "num_input_tokens_seen": 104594192, + "step": 155185 + }, + { + "epoch": 3.7913175188723036, + "grad_norm": 0.021479131653904915, + "learning_rate": 3.3540069758693056e-07, + "loss": 0.0, + "num_input_tokens_seen": 104597456, + "step": 155190 + }, + { + "epoch": 3.791439669704151, + "grad_norm": 0.0012764681596308947, + "learning_rate": 3.353369805854055e-07, + "loss": 0.0, + "num_input_tokens_seen": 104600720, + "step": 155195 + }, + { + "epoch": 3.791561820535998, + "grad_norm": 0.025395803153514862, + "learning_rate": 3.3527326841748894e-07, + "loss": 0.0, + "num_input_tokens_seen": 104603920, + "step": 155200 + }, + { + "epoch": 3.7916839713678447, + "grad_norm": 0.2535885274410248, + "learning_rate": 3.3520956108364397e-07, + "loss": 0.0004, + "num_input_tokens_seen": 104607056, + "step": 155205 + }, + { + "epoch": 3.7918061221996924, + "grad_norm": 0.0025765015743672848, + "learning_rate": 3.351458585843335e-07, + "loss": 0.0, + "num_input_tokens_seen": 104610192, + "step": 155210 + }, + { + "epoch": 3.791928273031539, + "grad_norm": 4.375946082291193e-05, + "learning_rate": 3.350821609200213e-07, + "loss": 0.0, + "num_input_tokens_seen": 104613584, + "step": 155215 + }, + { + "epoch": 3.7920504238633868, + "grad_norm": 0.0034888247027993202, + "learning_rate": 3.3501846809117075e-07, + "loss": 0.0, + "num_input_tokens_seen": 104617104, + "step": 155220 + }, + { + "epoch": 3.7921725746952335, + "grad_norm": 0.0013499618507921696, + "learning_rate": 3.349547800982444e-07, + "loss": 0.0235, + "num_input_tokens_seen": 104620304, + "step": 155225 + }, + { + "epoch": 3.7922947255270807, + "grad_norm": 0.01719053089618683, + "learning_rate": 3.3489109694170604e-07, + "loss": 0.0, + "num_input_tokens_seen": 104623888, + "step": 155230 + }, + { + "epoch": 3.792416876358928, + "grad_norm": 0.0007886372623033822, + "learning_rate": 3.3482741862201827e-07, + "loss": 0.0, + "num_input_tokens_seen": 104627600, + "step": 155235 + }, + { + "epoch": 3.792539027190775, + "grad_norm": 0.0006685940898023546, + "learning_rate": 3.3476374513964444e-07, + "loss": 0.0, + "num_input_tokens_seen": 104630928, + "step": 155240 + }, + { + "epoch": 3.7926611780226223, + "grad_norm": 0.004218700807541609, + "learning_rate": 3.3470007649504783e-07, + "loss": 0.0, + "num_input_tokens_seen": 104634128, + "step": 155245 + }, + { + "epoch": 3.7927833288544694, + "grad_norm": 0.0038865339010953903, + "learning_rate": 3.3463641268869093e-07, + "loss": 0.0, + "num_input_tokens_seen": 104637776, + "step": 155250 + }, + { + "epoch": 3.7929054796863166, + "grad_norm": 0.0005819426150992513, + "learning_rate": 3.345727537210373e-07, + "loss": 0.0, + "num_input_tokens_seen": 104640848, + "step": 155255 + }, + { + "epoch": 3.793027630518164, + "grad_norm": 0.00021406644373200834, + "learning_rate": 3.3450909959254937e-07, + "loss": 0.0005, + "num_input_tokens_seen": 104643984, + "step": 155260 + }, + { + "epoch": 3.793149781350011, + "grad_norm": 3.5937824577558786e-05, + "learning_rate": 3.344454503036904e-07, + "loss": 0.0006, + "num_input_tokens_seen": 104647120, + "step": 155265 + }, + { + "epoch": 3.793271932181858, + "grad_norm": 0.001609228434972465, + "learning_rate": 3.3438180585492294e-07, + "loss": 0.0, + "num_input_tokens_seen": 104650640, + "step": 155270 + }, + { + "epoch": 3.7933940830137054, + "grad_norm": 0.006711960770189762, + "learning_rate": 3.3431816624670995e-07, + "loss": 0.0, + "num_input_tokens_seen": 104653648, + "step": 155275 + }, + { + "epoch": 3.7935162338455526, + "grad_norm": 0.000300501094898209, + "learning_rate": 3.3425453147951466e-07, + "loss": 0.0001, + "num_input_tokens_seen": 104656656, + "step": 155280 + }, + { + "epoch": 3.7936383846773998, + "grad_norm": 0.0032590716145932674, + "learning_rate": 3.3419090155379913e-07, + "loss": 0.0001, + "num_input_tokens_seen": 104659792, + "step": 155285 + }, + { + "epoch": 3.793760535509247, + "grad_norm": 0.03171639144420624, + "learning_rate": 3.341272764700268e-07, + "loss": 0.0, + "num_input_tokens_seen": 104663248, + "step": 155290 + }, + { + "epoch": 3.793882686341094, + "grad_norm": 0.0020887863356620073, + "learning_rate": 3.340636562286601e-07, + "loss": 0.0, + "num_input_tokens_seen": 104666512, + "step": 155295 + }, + { + "epoch": 3.794004837172941, + "grad_norm": 0.0004597719816956669, + "learning_rate": 3.340000408301611e-07, + "loss": 0.0, + "num_input_tokens_seen": 104669648, + "step": 155300 + }, + { + "epoch": 3.7941269880047885, + "grad_norm": 0.001281373668462038, + "learning_rate": 3.339364302749933e-07, + "loss": 0.0, + "num_input_tokens_seen": 104672912, + "step": 155305 + }, + { + "epoch": 3.7942491388366353, + "grad_norm": 0.006146824918687344, + "learning_rate": 3.3387282456361867e-07, + "loss": 0.0, + "num_input_tokens_seen": 104676304, + "step": 155310 + }, + { + "epoch": 3.794371289668483, + "grad_norm": 0.0005169722135178745, + "learning_rate": 3.3380922369650035e-07, + "loss": 0.0952, + "num_input_tokens_seen": 104679632, + "step": 155315 + }, + { + "epoch": 3.7944934405003297, + "grad_norm": 0.01592920534312725, + "learning_rate": 3.337456276741002e-07, + "loss": 0.0, + "num_input_tokens_seen": 104682704, + "step": 155320 + }, + { + "epoch": 3.794615591332177, + "grad_norm": 0.002830099780112505, + "learning_rate": 3.336820364968813e-07, + "loss": 0.0, + "num_input_tokens_seen": 104686352, + "step": 155325 + }, + { + "epoch": 3.794737742164024, + "grad_norm": 0.00489602517336607, + "learning_rate": 3.3361845016530566e-07, + "loss": 0.05, + "num_input_tokens_seen": 104689872, + "step": 155330 + }, + { + "epoch": 3.7948598929958712, + "grad_norm": 0.029066437855362892, + "learning_rate": 3.3355486867983573e-07, + "loss": 0.0, + "num_input_tokens_seen": 104693520, + "step": 155335 + }, + { + "epoch": 3.7949820438277184, + "grad_norm": 0.038426849991083145, + "learning_rate": 3.334912920409345e-07, + "loss": 0.0, + "num_input_tokens_seen": 104697104, + "step": 155340 + }, + { + "epoch": 3.7951041946595656, + "grad_norm": 0.0005690338439308107, + "learning_rate": 3.334277202490635e-07, + "loss": 0.0002, + "num_input_tokens_seen": 104700432, + "step": 155345 + }, + { + "epoch": 3.795226345491413, + "grad_norm": 0.0005368631100282073, + "learning_rate": 3.333641533046857e-07, + "loss": 0.0706, + "num_input_tokens_seen": 104703888, + "step": 155350 + }, + { + "epoch": 3.79534849632326, + "grad_norm": 0.09150570631027222, + "learning_rate": 3.333005912082628e-07, + "loss": 0.0, + "num_input_tokens_seen": 104707024, + "step": 155355 + }, + { + "epoch": 3.795470647155107, + "grad_norm": 0.005836548749357462, + "learning_rate": 3.332370339602576e-07, + "loss": 0.0002, + "num_input_tokens_seen": 104710736, + "step": 155360 + }, + { + "epoch": 3.7955927979869544, + "grad_norm": 0.010113383643329144, + "learning_rate": 3.331734815611318e-07, + "loss": 0.0, + "num_input_tokens_seen": 104713744, + "step": 155365 + }, + { + "epoch": 3.7957149488188016, + "grad_norm": 0.6457812190055847, + "learning_rate": 3.3310993401134767e-07, + "loss": 0.0002, + "num_input_tokens_seen": 104717200, + "step": 155370 + }, + { + "epoch": 3.7958370996506487, + "grad_norm": 0.0009153983555734158, + "learning_rate": 3.330463913113679e-07, + "loss": 0.0001, + "num_input_tokens_seen": 104720656, + "step": 155375 + }, + { + "epoch": 3.795959250482496, + "grad_norm": 0.0020990949124097824, + "learning_rate": 3.329828534616538e-07, + "loss": 0.0, + "num_input_tokens_seen": 104723984, + "step": 155380 + }, + { + "epoch": 3.7960814013143427, + "grad_norm": 0.00040871353121474385, + "learning_rate": 3.3291932046266804e-07, + "loss": 0.0, + "num_input_tokens_seen": 104726800, + "step": 155385 + }, + { + "epoch": 3.7962035521461903, + "grad_norm": 0.000676620053127408, + "learning_rate": 3.328557923148722e-07, + "loss": 0.0, + "num_input_tokens_seen": 104730128, + "step": 155390 + }, + { + "epoch": 3.796325702978037, + "grad_norm": 0.00033176448778249323, + "learning_rate": 3.327922690187287e-07, + "loss": 0.0418, + "num_input_tokens_seen": 104733392, + "step": 155395 + }, + { + "epoch": 3.7964478538098847, + "grad_norm": 0.004591756500303745, + "learning_rate": 3.327287505746993e-07, + "loss": 0.0, + "num_input_tokens_seen": 104736656, + "step": 155400 + }, + { + "epoch": 3.7965700046417314, + "grad_norm": 0.026520751416683197, + "learning_rate": 3.3266523698324564e-07, + "loss": 0.0001, + "num_input_tokens_seen": 104739984, + "step": 155405 + }, + { + "epoch": 3.7966921554735786, + "grad_norm": 0.0005124854505993426, + "learning_rate": 3.3260172824483013e-07, + "loss": 0.0004, + "num_input_tokens_seen": 104743504, + "step": 155410 + }, + { + "epoch": 3.796814306305426, + "grad_norm": 0.006375231314450502, + "learning_rate": 3.325382243599141e-07, + "loss": 0.0001, + "num_input_tokens_seen": 104746576, + "step": 155415 + }, + { + "epoch": 3.796936457137273, + "grad_norm": 0.001261277706362307, + "learning_rate": 3.324747253289599e-07, + "loss": 0.0, + "num_input_tokens_seen": 104749648, + "step": 155420 + }, + { + "epoch": 3.79705860796912, + "grad_norm": 0.001110946643166244, + "learning_rate": 3.3241123115242873e-07, + "loss": 0.0, + "num_input_tokens_seen": 104753168, + "step": 155425 + }, + { + "epoch": 3.7971807588009674, + "grad_norm": 40.781349182128906, + "learning_rate": 3.323477418307826e-07, + "loss": 0.0296, + "num_input_tokens_seen": 104756688, + "step": 155430 + }, + { + "epoch": 3.7973029096328146, + "grad_norm": 0.00038077402859926224, + "learning_rate": 3.322842573644837e-07, + "loss": 0.0027, + "num_input_tokens_seen": 104760400, + "step": 155435 + }, + { + "epoch": 3.7974250604646618, + "grad_norm": 0.00400801794603467, + "learning_rate": 3.3222077775399295e-07, + "loss": 0.0256, + "num_input_tokens_seen": 104764176, + "step": 155440 + }, + { + "epoch": 3.797547211296509, + "grad_norm": 0.00038191594649106264, + "learning_rate": 3.321573029997725e-07, + "loss": 0.0, + "num_input_tokens_seen": 104767696, + "step": 155445 + }, + { + "epoch": 3.797669362128356, + "grad_norm": 0.0004798115696758032, + "learning_rate": 3.3209383310228355e-07, + "loss": 0.0, + "num_input_tokens_seen": 104771088, + "step": 155450 + }, + { + "epoch": 3.7977915129602033, + "grad_norm": 0.0025597454514354467, + "learning_rate": 3.3203036806198783e-07, + "loss": 0.0, + "num_input_tokens_seen": 104774160, + "step": 155455 + }, + { + "epoch": 3.7979136637920505, + "grad_norm": 0.005515729542821646, + "learning_rate": 3.3196690787934734e-07, + "loss": 0.0, + "num_input_tokens_seen": 104777424, + "step": 155460 + }, + { + "epoch": 3.7980358146238977, + "grad_norm": 0.036175571382045746, + "learning_rate": 3.3190345255482276e-07, + "loss": 0.0, + "num_input_tokens_seen": 104780752, + "step": 155465 + }, + { + "epoch": 3.798157965455745, + "grad_norm": 0.0009230665164068341, + "learning_rate": 3.318400020888764e-07, + "loss": 0.0, + "num_input_tokens_seen": 104784208, + "step": 155470 + }, + { + "epoch": 3.798280116287592, + "grad_norm": 22.15227699279785, + "learning_rate": 3.317765564819689e-07, + "loss": 0.0343, + "num_input_tokens_seen": 104787728, + "step": 155475 + }, + { + "epoch": 3.798402267119439, + "grad_norm": 0.0009698776993900537, + "learning_rate": 3.317131157345623e-07, + "loss": 0.0, + "num_input_tokens_seen": 104791248, + "step": 155480 + }, + { + "epoch": 3.7985244179512865, + "grad_norm": 0.011073552072048187, + "learning_rate": 3.316496798471173e-07, + "loss": 0.0299, + "num_input_tokens_seen": 104794768, + "step": 155485 + }, + { + "epoch": 3.7986465687831332, + "grad_norm": 0.0010699655395001173, + "learning_rate": 3.3158624882009567e-07, + "loss": 0.0, + "num_input_tokens_seen": 104798224, + "step": 155490 + }, + { + "epoch": 3.7987687196149804, + "grad_norm": 0.002896029269322753, + "learning_rate": 3.3152282265395895e-07, + "loss": 0.0, + "num_input_tokens_seen": 104802000, + "step": 155495 + }, + { + "epoch": 3.7988908704468276, + "grad_norm": 0.0017450011800974607, + "learning_rate": 3.314594013491681e-07, + "loss": 0.0, + "num_input_tokens_seen": 104805136, + "step": 155500 + }, + { + "epoch": 3.799013021278675, + "grad_norm": 0.002626369008794427, + "learning_rate": 3.313959849061838e-07, + "loss": 0.0006, + "num_input_tokens_seen": 104808336, + "step": 155505 + }, + { + "epoch": 3.799135172110522, + "grad_norm": 0.002531877951696515, + "learning_rate": 3.313325733254682e-07, + "loss": 0.0, + "num_input_tokens_seen": 104811472, + "step": 155510 + }, + { + "epoch": 3.799257322942369, + "grad_norm": 0.0009253830648958683, + "learning_rate": 3.3126916660748194e-07, + "loss": 0.0, + "num_input_tokens_seen": 104814928, + "step": 155515 + }, + { + "epoch": 3.7993794737742164, + "grad_norm": 0.0009583283099345863, + "learning_rate": 3.312057647526858e-07, + "loss": 0.0, + "num_input_tokens_seen": 104818000, + "step": 155520 + }, + { + "epoch": 3.7995016246060636, + "grad_norm": 0.0002628966176416725, + "learning_rate": 3.311423677615414e-07, + "loss": 0.0, + "num_input_tokens_seen": 104821072, + "step": 155525 + }, + { + "epoch": 3.7996237754379107, + "grad_norm": 0.0020152684301137924, + "learning_rate": 3.310789756345097e-07, + "loss": 0.0444, + "num_input_tokens_seen": 104824336, + "step": 155530 + }, + { + "epoch": 3.799745926269758, + "grad_norm": 0.0011446630815044045, + "learning_rate": 3.310155883720513e-07, + "loss": 0.0, + "num_input_tokens_seen": 104827472, + "step": 155535 + }, + { + "epoch": 3.799868077101605, + "grad_norm": 0.0017714033601805568, + "learning_rate": 3.309522059746279e-07, + "loss": 0.0, + "num_input_tokens_seen": 104830672, + "step": 155540 + }, + { + "epoch": 3.7999902279334523, + "grad_norm": 0.0008015253115445375, + "learning_rate": 3.308888284426997e-07, + "loss": 0.0, + "num_input_tokens_seen": 104834128, + "step": 155545 + }, + { + "epoch": 3.8001123787652995, + "grad_norm": 0.022300535812973976, + "learning_rate": 3.308254557767279e-07, + "loss": 0.0, + "num_input_tokens_seen": 104837584, + "step": 155550 + }, + { + "epoch": 3.8002345295971467, + "grad_norm": 0.00024351937463507056, + "learning_rate": 3.3076208797717366e-07, + "loss": 0.0, + "num_input_tokens_seen": 104841232, + "step": 155555 + }, + { + "epoch": 3.800356680428994, + "grad_norm": 68.36043548583984, + "learning_rate": 3.3069872504449723e-07, + "loss": 0.0213, + "num_input_tokens_seen": 104844752, + "step": 155560 + }, + { + "epoch": 3.8004788312608406, + "grad_norm": 0.00022004992933943868, + "learning_rate": 3.3063536697915995e-07, + "loss": 0.0, + "num_input_tokens_seen": 104848208, + "step": 155565 + }, + { + "epoch": 3.8006009820926883, + "grad_norm": 0.0009000792051665485, + "learning_rate": 3.30572013781622e-07, + "loss": 0.0, + "num_input_tokens_seen": 104851792, + "step": 155570 + }, + { + "epoch": 3.800723132924535, + "grad_norm": 0.012199978344142437, + "learning_rate": 3.305086654523449e-07, + "loss": 0.0001, + "num_input_tokens_seen": 104855120, + "step": 155575 + }, + { + "epoch": 3.8008452837563826, + "grad_norm": 0.0024338264483958483, + "learning_rate": 3.304453219917883e-07, + "loss": 0.1219, + "num_input_tokens_seen": 104858192, + "step": 155580 + }, + { + "epoch": 3.8009674345882294, + "grad_norm": 0.00391667103394866, + "learning_rate": 3.3038198340041356e-07, + "loss": 0.0293, + "num_input_tokens_seen": 104862352, + "step": 155585 + }, + { + "epoch": 3.8010895854200766, + "grad_norm": 0.003353690728545189, + "learning_rate": 3.3031864967868153e-07, + "loss": 0.0, + "num_input_tokens_seen": 104865168, + "step": 155590 + }, + { + "epoch": 3.8012117362519238, + "grad_norm": 1647.1788330078125, + "learning_rate": 3.302553208270519e-07, + "loss": 0.016, + "num_input_tokens_seen": 104869008, + "step": 155595 + }, + { + "epoch": 3.801333887083771, + "grad_norm": 0.020366232842206955, + "learning_rate": 3.301919968459861e-07, + "loss": 0.0, + "num_input_tokens_seen": 104872144, + "step": 155600 + }, + { + "epoch": 3.801456037915618, + "grad_norm": 0.007908672094345093, + "learning_rate": 3.3012867773594434e-07, + "loss": 0.0, + "num_input_tokens_seen": 104875280, + "step": 155605 + }, + { + "epoch": 3.8015781887474653, + "grad_norm": 0.010846472345292568, + "learning_rate": 3.3006536349738654e-07, + "loss": 0.0, + "num_input_tokens_seen": 104878864, + "step": 155610 + }, + { + "epoch": 3.8017003395793125, + "grad_norm": 0.00580504909157753, + "learning_rate": 3.300020541307741e-07, + "loss": 0.0001, + "num_input_tokens_seen": 104881680, + "step": 155615 + }, + { + "epoch": 3.8018224904111597, + "grad_norm": 0.005202096421271563, + "learning_rate": 3.2993874963656645e-07, + "loss": 0.0001, + "num_input_tokens_seen": 104884496, + "step": 155620 + }, + { + "epoch": 3.801944641243007, + "grad_norm": 0.005184181034564972, + "learning_rate": 3.298754500152249e-07, + "loss": 0.0001, + "num_input_tokens_seen": 104888016, + "step": 155625 + }, + { + "epoch": 3.802066792074854, + "grad_norm": 0.011404477059841156, + "learning_rate": 3.298121552672088e-07, + "loss": 0.0, + "num_input_tokens_seen": 104891216, + "step": 155630 + }, + { + "epoch": 3.8021889429067013, + "grad_norm": 0.00029271142557263374, + "learning_rate": 3.297488653929794e-07, + "loss": 0.0, + "num_input_tokens_seen": 104895184, + "step": 155635 + }, + { + "epoch": 3.8023110937385485, + "grad_norm": 0.0004036023165099323, + "learning_rate": 3.2968558039299633e-07, + "loss": 0.0, + "num_input_tokens_seen": 104899152, + "step": 155640 + }, + { + "epoch": 3.8024332445703957, + "grad_norm": 20.258140563964844, + "learning_rate": 3.296223002677199e-07, + "loss": 0.0869, + "num_input_tokens_seen": 104902416, + "step": 155645 + }, + { + "epoch": 3.802555395402243, + "grad_norm": 0.0014357427135109901, + "learning_rate": 3.2955902501761067e-07, + "loss": 0.0001, + "num_input_tokens_seen": 104905808, + "step": 155650 + }, + { + "epoch": 3.80267754623409, + "grad_norm": 0.004626494366675615, + "learning_rate": 3.294957546431283e-07, + "loss": 0.0, + "num_input_tokens_seen": 104908752, + "step": 155655 + }, + { + "epoch": 3.802799697065937, + "grad_norm": 37.42518997192383, + "learning_rate": 3.294324891447334e-07, + "loss": 0.0559, + "num_input_tokens_seen": 104912208, + "step": 155660 + }, + { + "epoch": 3.8029218478977844, + "grad_norm": 0.0018757034558802843, + "learning_rate": 3.293692285228855e-07, + "loss": 0.0, + "num_input_tokens_seen": 104915920, + "step": 155665 + }, + { + "epoch": 3.803043998729631, + "grad_norm": 0.004554565064609051, + "learning_rate": 3.2930597277804537e-07, + "loss": 0.0, + "num_input_tokens_seen": 104919056, + "step": 155670 + }, + { + "epoch": 3.8031661495614784, + "grad_norm": 0.0016938750632107258, + "learning_rate": 3.2924272191067215e-07, + "loss": 0.0, + "num_input_tokens_seen": 104922384, + "step": 155675 + }, + { + "epoch": 3.8032883003933255, + "grad_norm": 0.00745719950646162, + "learning_rate": 3.291794759212263e-07, + "loss": 0.0336, + "num_input_tokens_seen": 104925840, + "step": 155680 + }, + { + "epoch": 3.8034104512251727, + "grad_norm": 0.001887062331661582, + "learning_rate": 3.2911623481016814e-07, + "loss": 0.0606, + "num_input_tokens_seen": 104929168, + "step": 155685 + }, + { + "epoch": 3.80353260205702, + "grad_norm": 0.047493480145931244, + "learning_rate": 3.2905299857795675e-07, + "loss": 0.0, + "num_input_tokens_seen": 104932752, + "step": 155690 + }, + { + "epoch": 3.803654752888867, + "grad_norm": 0.006993389688432217, + "learning_rate": 3.289897672250528e-07, + "loss": 0.0332, + "num_input_tokens_seen": 104935888, + "step": 155695 + }, + { + "epoch": 3.8037769037207143, + "grad_norm": 0.0030944752506911755, + "learning_rate": 3.289265407519154e-07, + "loss": 0.074, + "num_input_tokens_seen": 104939216, + "step": 155700 + }, + { + "epoch": 3.8038990545525615, + "grad_norm": 0.14342844486236572, + "learning_rate": 3.28863319159005e-07, + "loss": 0.0001, + "num_input_tokens_seen": 104942800, + "step": 155705 + }, + { + "epoch": 3.8040212053844087, + "grad_norm": 0.007254133466631174, + "learning_rate": 3.28800102446781e-07, + "loss": 0.0, + "num_input_tokens_seen": 104946064, + "step": 155710 + }, + { + "epoch": 3.804143356216256, + "grad_norm": 0.0028453157283365726, + "learning_rate": 3.2873689061570297e-07, + "loss": 0.0, + "num_input_tokens_seen": 104949776, + "step": 155715 + }, + { + "epoch": 3.804265507048103, + "grad_norm": 0.0009071178501471877, + "learning_rate": 3.286736836662311e-07, + "loss": 0.0064, + "num_input_tokens_seen": 104953104, + "step": 155720 + }, + { + "epoch": 3.8043876578799503, + "grad_norm": 0.003251630812883377, + "learning_rate": 3.286104815988244e-07, + "loss": 0.0001, + "num_input_tokens_seen": 104956240, + "step": 155725 + }, + { + "epoch": 3.8045098087117974, + "grad_norm": 0.012875410728156567, + "learning_rate": 3.285472844139432e-07, + "loss": 0.0001, + "num_input_tokens_seen": 104959248, + "step": 155730 + }, + { + "epoch": 3.8046319595436446, + "grad_norm": 0.004795216489583254, + "learning_rate": 3.2848409211204653e-07, + "loss": 0.0, + "num_input_tokens_seen": 104962256, + "step": 155735 + }, + { + "epoch": 3.804754110375492, + "grad_norm": 0.0023328224197030067, + "learning_rate": 3.2842090469359406e-07, + "loss": 0.0001, + "num_input_tokens_seen": 104965328, + "step": 155740 + }, + { + "epoch": 3.8048762612073386, + "grad_norm": 0.007147334516048431, + "learning_rate": 3.283577221590457e-07, + "loss": 0.0002, + "num_input_tokens_seen": 104968592, + "step": 155745 + }, + { + "epoch": 3.804998412039186, + "grad_norm": 0.001518872333690524, + "learning_rate": 3.282945445088604e-07, + "loss": 0.0, + "num_input_tokens_seen": 104972368, + "step": 155750 + }, + { + "epoch": 3.805120562871033, + "grad_norm": 0.006218430120497942, + "learning_rate": 3.28231371743498e-07, + "loss": 0.0, + "num_input_tokens_seen": 104975696, + "step": 155755 + }, + { + "epoch": 3.8052427137028806, + "grad_norm": 0.2355884313583374, + "learning_rate": 3.281682038634176e-07, + "loss": 0.0002, + "num_input_tokens_seen": 104979792, + "step": 155760 + }, + { + "epoch": 3.8053648645347273, + "grad_norm": 0.0006400325219146907, + "learning_rate": 3.28105040869079e-07, + "loss": 0.0, + "num_input_tokens_seen": 104983376, + "step": 155765 + }, + { + "epoch": 3.8054870153665745, + "grad_norm": 0.0002564819878898561, + "learning_rate": 3.280418827609409e-07, + "loss": 0.0415, + "num_input_tokens_seen": 104986768, + "step": 155770 + }, + { + "epoch": 3.8056091661984217, + "grad_norm": 0.004700619261711836, + "learning_rate": 3.2797872953946305e-07, + "loss": 0.0, + "num_input_tokens_seen": 104990416, + "step": 155775 + }, + { + "epoch": 3.805731317030269, + "grad_norm": 0.0008923843270167708, + "learning_rate": 3.279155812051049e-07, + "loss": 0.0, + "num_input_tokens_seen": 104994064, + "step": 155780 + }, + { + "epoch": 3.805853467862116, + "grad_norm": 0.001144541660323739, + "learning_rate": 3.2785243775832505e-07, + "loss": 0.0001, + "num_input_tokens_seen": 104997520, + "step": 155785 + }, + { + "epoch": 3.8059756186939633, + "grad_norm": 0.00030647515086457133, + "learning_rate": 3.277892991995834e-07, + "loss": 0.0, + "num_input_tokens_seen": 105000720, + "step": 155790 + }, + { + "epoch": 3.8060977695258105, + "grad_norm": 0.010796795599162579, + "learning_rate": 3.277261655293384e-07, + "loss": 0.0, + "num_input_tokens_seen": 105004432, + "step": 155795 + }, + { + "epoch": 3.8062199203576577, + "grad_norm": 0.19322262704372406, + "learning_rate": 3.2766303674804964e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105007824, + "step": 155800 + }, + { + "epoch": 3.806342071189505, + "grad_norm": 18.42563247680664, + "learning_rate": 3.275999128561764e-07, + "loss": 0.0619, + "num_input_tokens_seen": 105010832, + "step": 155805 + }, + { + "epoch": 3.806464222021352, + "grad_norm": 21.573936462402344, + "learning_rate": 3.2753679385417745e-07, + "loss": 0.0279, + "num_input_tokens_seen": 105014160, + "step": 155810 + }, + { + "epoch": 3.8065863728531992, + "grad_norm": 0.001498056692071259, + "learning_rate": 3.274736797425115e-07, + "loss": 0.0, + "num_input_tokens_seen": 105017808, + "step": 155815 + }, + { + "epoch": 3.8067085236850464, + "grad_norm": 20.64427947998047, + "learning_rate": 3.2741057052163814e-07, + "loss": 0.0546, + "num_input_tokens_seen": 105021392, + "step": 155820 + }, + { + "epoch": 3.8068306745168936, + "grad_norm": 0.01792875863611698, + "learning_rate": 3.27347466192016e-07, + "loss": 0.0, + "num_input_tokens_seen": 105024848, + "step": 155825 + }, + { + "epoch": 3.8069528253487404, + "grad_norm": 0.0008755176095291972, + "learning_rate": 3.2728436675410376e-07, + "loss": 0.0, + "num_input_tokens_seen": 105027856, + "step": 155830 + }, + { + "epoch": 3.807074976180588, + "grad_norm": 0.10433609038591385, + "learning_rate": 3.2722127220836047e-07, + "loss": 0.0, + "num_input_tokens_seen": 105031120, + "step": 155835 + }, + { + "epoch": 3.8071971270124347, + "grad_norm": 0.00405006343498826, + "learning_rate": 3.271581825552454e-07, + "loss": 0.0012, + "num_input_tokens_seen": 105034320, + "step": 155840 + }, + { + "epoch": 3.8073192778442824, + "grad_norm": 0.004479088354855776, + "learning_rate": 3.270950977952166e-07, + "loss": 0.0, + "num_input_tokens_seen": 105037328, + "step": 155845 + }, + { + "epoch": 3.807441428676129, + "grad_norm": 0.025686856359243393, + "learning_rate": 3.270320179287337e-07, + "loss": 0.0, + "num_input_tokens_seen": 105040592, + "step": 155850 + }, + { + "epoch": 3.8075635795079763, + "grad_norm": 0.028088264167308807, + "learning_rate": 3.2696894295625456e-07, + "loss": 0.0353, + "num_input_tokens_seen": 105043856, + "step": 155855 + }, + { + "epoch": 3.8076857303398235, + "grad_norm": 0.0788717269897461, + "learning_rate": 3.2690587287823824e-07, + "loss": 0.0005, + "num_input_tokens_seen": 105047376, + "step": 155860 + }, + { + "epoch": 3.8078078811716707, + "grad_norm": 0.31897079944610596, + "learning_rate": 3.2684280769514384e-07, + "loss": 0.0003, + "num_input_tokens_seen": 105050832, + "step": 155865 + }, + { + "epoch": 3.807930032003518, + "grad_norm": 0.003613928332924843, + "learning_rate": 3.267797474074293e-07, + "loss": 0.0467, + "num_input_tokens_seen": 105054032, + "step": 155870 + }, + { + "epoch": 3.808052182835365, + "grad_norm": 0.008457034826278687, + "learning_rate": 3.267166920155537e-07, + "loss": 0.0, + "num_input_tokens_seen": 105057616, + "step": 155875 + }, + { + "epoch": 3.8081743336672123, + "grad_norm": 21.948022842407227, + "learning_rate": 3.266536415199753e-07, + "loss": 0.0264, + "num_input_tokens_seen": 105060880, + "step": 155880 + }, + { + "epoch": 3.8082964844990594, + "grad_norm": 0.563244640827179, + "learning_rate": 3.265905959211529e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105064208, + "step": 155885 + }, + { + "epoch": 3.8084186353309066, + "grad_norm": 0.6338911652565002, + "learning_rate": 3.2652755521954456e-07, + "loss": 0.1409, + "num_input_tokens_seen": 105067280, + "step": 155890 + }, + { + "epoch": 3.808540786162754, + "grad_norm": 0.007176944054663181, + "learning_rate": 3.2646451941560895e-07, + "loss": 0.0, + "num_input_tokens_seen": 105070864, + "step": 155895 + }, + { + "epoch": 3.808662936994601, + "grad_norm": 0.03897714242339134, + "learning_rate": 3.264014885098049e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105074320, + "step": 155900 + }, + { + "epoch": 3.808785087826448, + "grad_norm": 0.021542305126786232, + "learning_rate": 3.2633846250259e-07, + "loss": 0.0, + "num_input_tokens_seen": 105077584, + "step": 155905 + }, + { + "epoch": 3.8089072386582954, + "grad_norm": 0.0006053220713511109, + "learning_rate": 3.262754413944233e-07, + "loss": 0.0, + "num_input_tokens_seen": 105080848, + "step": 155910 + }, + { + "epoch": 3.8090293894901426, + "grad_norm": 0.0019855385180562735, + "learning_rate": 3.2621242518576286e-07, + "loss": 0.0, + "num_input_tokens_seen": 105084176, + "step": 155915 + }, + { + "epoch": 3.8091515403219898, + "grad_norm": 0.01248567271977663, + "learning_rate": 3.261494138770665e-07, + "loss": 0.0, + "num_input_tokens_seen": 105087376, + "step": 155920 + }, + { + "epoch": 3.8092736911538365, + "grad_norm": 0.001591844018548727, + "learning_rate": 3.260864074687932e-07, + "loss": 0.0002, + "num_input_tokens_seen": 105090640, + "step": 155925 + }, + { + "epoch": 3.809395841985684, + "grad_norm": 0.00416983338072896, + "learning_rate": 3.260234059614005e-07, + "loss": 0.0, + "num_input_tokens_seen": 105094288, + "step": 155930 + }, + { + "epoch": 3.809517992817531, + "grad_norm": 0.0006153019494377077, + "learning_rate": 3.259604093553472e-07, + "loss": 0.06, + "num_input_tokens_seen": 105097360, + "step": 155935 + }, + { + "epoch": 3.8096401436493785, + "grad_norm": 0.00553601048886776, + "learning_rate": 3.258974176510908e-07, + "loss": 0.0, + "num_input_tokens_seen": 105101392, + "step": 155940 + }, + { + "epoch": 3.8097622944812253, + "grad_norm": 88.26294708251953, + "learning_rate": 3.258344308490899e-07, + "loss": 0.0563, + "num_input_tokens_seen": 105104848, + "step": 155945 + }, + { + "epoch": 3.8098844453130725, + "grad_norm": 58.035316467285156, + "learning_rate": 3.2577144894980213e-07, + "loss": 0.0378, + "num_input_tokens_seen": 105108048, + "step": 155950 + }, + { + "epoch": 3.8100065961449197, + "grad_norm": 0.007737881503999233, + "learning_rate": 3.2570847195368565e-07, + "loss": 0.0, + "num_input_tokens_seen": 105110928, + "step": 155955 + }, + { + "epoch": 3.810128746976767, + "grad_norm": 0.03556019812822342, + "learning_rate": 3.256454998611989e-07, + "loss": 0.0, + "num_input_tokens_seen": 105114640, + "step": 155960 + }, + { + "epoch": 3.810250897808614, + "grad_norm": 0.0022949352860450745, + "learning_rate": 3.2558253267279923e-07, + "loss": 0.0614, + "num_input_tokens_seen": 105118224, + "step": 155965 + }, + { + "epoch": 3.8103730486404612, + "grad_norm": 0.011481580324470997, + "learning_rate": 3.25519570388945e-07, + "loss": 0.0002, + "num_input_tokens_seen": 105121424, + "step": 155970 + }, + { + "epoch": 3.8104951994723084, + "grad_norm": 0.0012904921313747764, + "learning_rate": 3.254566130100935e-07, + "loss": 0.0, + "num_input_tokens_seen": 105125200, + "step": 155975 + }, + { + "epoch": 3.8106173503041556, + "grad_norm": 0.004640914965420961, + "learning_rate": 3.253936605367034e-07, + "loss": 0.0, + "num_input_tokens_seen": 105128272, + "step": 155980 + }, + { + "epoch": 3.810739501136003, + "grad_norm": 1.151449203491211, + "learning_rate": 3.2533071296923154e-07, + "loss": 0.0003, + "num_input_tokens_seen": 105131408, + "step": 155985 + }, + { + "epoch": 3.81086165196785, + "grad_norm": 0.008894668892025948, + "learning_rate": 3.2526777030813636e-07, + "loss": 0.0, + "num_input_tokens_seen": 105134672, + "step": 155990 + }, + { + "epoch": 3.810983802799697, + "grad_norm": 0.0030117423739284277, + "learning_rate": 3.2520483255387567e-07, + "loss": 0.0569, + "num_input_tokens_seen": 105138000, + "step": 155995 + }, + { + "epoch": 3.8111059536315444, + "grad_norm": 0.0029887051787227392, + "learning_rate": 3.2514189970690666e-07, + "loss": 0.0, + "num_input_tokens_seen": 105141456, + "step": 156000 + }, + { + "epoch": 3.8112281044633916, + "grad_norm": 0.002914925804361701, + "learning_rate": 3.2507897176768753e-07, + "loss": 0.0, + "num_input_tokens_seen": 105144912, + "step": 156005 + }, + { + "epoch": 3.8113502552952383, + "grad_norm": 0.006630240008234978, + "learning_rate": 3.250160487366753e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105148240, + "step": 156010 + }, + { + "epoch": 3.811472406127086, + "grad_norm": 0.0067259520292282104, + "learning_rate": 3.2495313061432836e-07, + "loss": 0.0, + "num_input_tokens_seen": 105151184, + "step": 156015 + }, + { + "epoch": 3.8115945569589327, + "grad_norm": 0.0004644088912755251, + "learning_rate": 3.2489021740110344e-07, + "loss": 0.0, + "num_input_tokens_seen": 105154640, + "step": 156020 + }, + { + "epoch": 3.8117167077907803, + "grad_norm": 0.010894610546529293, + "learning_rate": 3.2482730909745883e-07, + "loss": 0.0, + "num_input_tokens_seen": 105157968, + "step": 156025 + }, + { + "epoch": 3.811838858622627, + "grad_norm": 0.384575754404068, + "learning_rate": 3.2476440570385155e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105161168, + "step": 156030 + }, + { + "epoch": 3.8119610094544742, + "grad_norm": 0.0025056188460439444, + "learning_rate": 3.2470150722073875e-07, + "loss": 0.0007, + "num_input_tokens_seen": 105164432, + "step": 156035 + }, + { + "epoch": 3.8120831602863214, + "grad_norm": 0.006241832859814167, + "learning_rate": 3.246386136485786e-07, + "loss": 0.0, + "num_input_tokens_seen": 105167888, + "step": 156040 + }, + { + "epoch": 3.8122053111181686, + "grad_norm": 0.002331847557798028, + "learning_rate": 3.2457572498782783e-07, + "loss": 0.0, + "num_input_tokens_seen": 105171472, + "step": 156045 + }, + { + "epoch": 3.812327461950016, + "grad_norm": 0.003435875289142132, + "learning_rate": 3.2451284123894394e-07, + "loss": 0.0, + "num_input_tokens_seen": 105174800, + "step": 156050 + }, + { + "epoch": 3.812449612781863, + "grad_norm": 0.00930994562804699, + "learning_rate": 3.2444996240238474e-07, + "loss": 0.0, + "num_input_tokens_seen": 105178128, + "step": 156055 + }, + { + "epoch": 3.81257176361371, + "grad_norm": 0.0016094446182250977, + "learning_rate": 3.2438708847860684e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105181904, + "step": 156060 + }, + { + "epoch": 3.8126939144455574, + "grad_norm": 0.001043045660480857, + "learning_rate": 3.243242194680681e-07, + "loss": 0.0879, + "num_input_tokens_seen": 105185744, + "step": 156065 + }, + { + "epoch": 3.8128160652774046, + "grad_norm": 0.021015044301748276, + "learning_rate": 3.242613553712249e-07, + "loss": 0.0, + "num_input_tokens_seen": 105189392, + "step": 156070 + }, + { + "epoch": 3.8129382161092518, + "grad_norm": 0.004099312704056501, + "learning_rate": 3.2419849618853535e-07, + "loss": 0.0671, + "num_input_tokens_seen": 105192720, + "step": 156075 + }, + { + "epoch": 3.813060366941099, + "grad_norm": 0.001958550186827779, + "learning_rate": 3.2413564192045573e-07, + "loss": 0.0, + "num_input_tokens_seen": 105195792, + "step": 156080 + }, + { + "epoch": 3.813182517772946, + "grad_norm": 0.09461662918329239, + "learning_rate": 3.2407279256744344e-07, + "loss": 0.0302, + "num_input_tokens_seen": 105198864, + "step": 156085 + }, + { + "epoch": 3.8133046686047933, + "grad_norm": 0.004242750816047192, + "learning_rate": 3.2400994812995595e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105202448, + "step": 156090 + }, + { + "epoch": 3.8134268194366405, + "grad_norm": 0.00951340515166521, + "learning_rate": 3.2394710860844963e-07, + "loss": 0.0, + "num_input_tokens_seen": 105205840, + "step": 156095 + }, + { + "epoch": 3.8135489702684877, + "grad_norm": 16.03184700012207, + "learning_rate": 3.2388427400338215e-07, + "loss": 0.0173, + "num_input_tokens_seen": 105209360, + "step": 156100 + }, + { + "epoch": 3.8136711211003345, + "grad_norm": 0.0026069078594446182, + "learning_rate": 3.2382144431520975e-07, + "loss": 0.0, + "num_input_tokens_seen": 105213008, + "step": 156105 + }, + { + "epoch": 3.813793271932182, + "grad_norm": 0.0012540466850623488, + "learning_rate": 3.2375861954438967e-07, + "loss": 0.0359, + "num_input_tokens_seen": 105216592, + "step": 156110 + }, + { + "epoch": 3.813915422764029, + "grad_norm": 0.006343018263578415, + "learning_rate": 3.236957996913791e-07, + "loss": 0.0, + "num_input_tokens_seen": 105220176, + "step": 156115 + }, + { + "epoch": 3.814037573595876, + "grad_norm": 0.6488041877746582, + "learning_rate": 3.236329847566346e-07, + "loss": 0.0003, + "num_input_tokens_seen": 105223824, + "step": 156120 + }, + { + "epoch": 3.814159724427723, + "grad_norm": 0.011054744012653828, + "learning_rate": 3.2357017474061255e-07, + "loss": 0.0, + "num_input_tokens_seen": 105227024, + "step": 156125 + }, + { + "epoch": 3.8142818752595704, + "grad_norm": 0.008859804831445217, + "learning_rate": 3.2350736964377045e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105230224, + "step": 156130 + }, + { + "epoch": 3.8144040260914176, + "grad_norm": 0.0750259980559349, + "learning_rate": 3.2344456946656485e-07, + "loss": 0.0513, + "num_input_tokens_seen": 105233616, + "step": 156135 + }, + { + "epoch": 3.814526176923265, + "grad_norm": 0.02294854074716568, + "learning_rate": 3.233817742094519e-07, + "loss": 0.0, + "num_input_tokens_seen": 105236880, + "step": 156140 + }, + { + "epoch": 3.814648327755112, + "grad_norm": 0.0033728990238159895, + "learning_rate": 3.233189838728887e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105239952, + "step": 156145 + }, + { + "epoch": 3.814770478586959, + "grad_norm": 0.012777571566402912, + "learning_rate": 3.232561984573321e-07, + "loss": 0.0, + "num_input_tokens_seen": 105243216, + "step": 156150 + }, + { + "epoch": 3.8148926294188064, + "grad_norm": 0.09926612675189972, + "learning_rate": 3.2319341796323817e-07, + "loss": 0.0002, + "num_input_tokens_seen": 105246416, + "step": 156155 + }, + { + "epoch": 3.8150147802506535, + "grad_norm": 0.002459646435454488, + "learning_rate": 3.231306423910641e-07, + "loss": 0.0, + "num_input_tokens_seen": 105249680, + "step": 156160 + }, + { + "epoch": 3.8151369310825007, + "grad_norm": 0.0014912053011357784, + "learning_rate": 3.230678717412657e-07, + "loss": 0.0314, + "num_input_tokens_seen": 105253008, + "step": 156165 + }, + { + "epoch": 3.815259081914348, + "grad_norm": 0.007945233955979347, + "learning_rate": 3.230051060142999e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105255888, + "step": 156170 + }, + { + "epoch": 3.815381232746195, + "grad_norm": 0.14787600934505463, + "learning_rate": 3.2294234521062325e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105259408, + "step": 156175 + }, + { + "epoch": 3.8155033835780423, + "grad_norm": 0.015462593175470829, + "learning_rate": 3.228795893306917e-07, + "loss": 0.0, + "num_input_tokens_seen": 105262352, + "step": 156180 + }, + { + "epoch": 3.8156255344098895, + "grad_norm": 0.0005006550345569849, + "learning_rate": 3.228168383749622e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105265680, + "step": 156185 + }, + { + "epoch": 3.8157476852417362, + "grad_norm": 0.3698059916496277, + "learning_rate": 3.2275409234389053e-07, + "loss": 0.0323, + "num_input_tokens_seen": 105269008, + "step": 156190 + }, + { + "epoch": 3.815869836073584, + "grad_norm": 0.020955270156264305, + "learning_rate": 3.226913512379336e-07, + "loss": 0.0, + "num_input_tokens_seen": 105272080, + "step": 156195 + }, + { + "epoch": 3.8159919869054306, + "grad_norm": 0.00198382674716413, + "learning_rate": 3.226286150575469e-07, + "loss": 0.0, + "num_input_tokens_seen": 105275216, + "step": 156200 + }, + { + "epoch": 3.8161141377372783, + "grad_norm": 0.008968944661319256, + "learning_rate": 3.225658838031872e-07, + "loss": 0.0, + "num_input_tokens_seen": 105278672, + "step": 156205 + }, + { + "epoch": 3.816236288569125, + "grad_norm": 0.0022881953045725822, + "learning_rate": 3.225031574753109e-07, + "loss": 0.0, + "num_input_tokens_seen": 105281744, + "step": 156210 + }, + { + "epoch": 3.816358439400972, + "grad_norm": 0.011253178119659424, + "learning_rate": 3.2244043607437353e-07, + "loss": 0.0, + "num_input_tokens_seen": 105284880, + "step": 156215 + }, + { + "epoch": 3.8164805902328194, + "grad_norm": 0.013502773828804493, + "learning_rate": 3.223777196008318e-07, + "loss": 0.0, + "num_input_tokens_seen": 105287952, + "step": 156220 + }, + { + "epoch": 3.8166027410646666, + "grad_norm": 26.68203353881836, + "learning_rate": 3.2231500805514167e-07, + "loss": 0.0277, + "num_input_tokens_seen": 105292048, + "step": 156225 + }, + { + "epoch": 3.8167248918965138, + "grad_norm": 0.027704458683729172, + "learning_rate": 3.222523014377587e-07, + "loss": 0.0, + "num_input_tokens_seen": 105295376, + "step": 156230 + }, + { + "epoch": 3.816847042728361, + "grad_norm": 0.011415623128414154, + "learning_rate": 3.2218959974913963e-07, + "loss": 0.0, + "num_input_tokens_seen": 105298960, + "step": 156235 + }, + { + "epoch": 3.816969193560208, + "grad_norm": 0.0035196489188820124, + "learning_rate": 3.2212690298973976e-07, + "loss": 0.0, + "num_input_tokens_seen": 105302096, + "step": 156240 + }, + { + "epoch": 3.8170913443920553, + "grad_norm": 0.0004433184221852571, + "learning_rate": 3.220642111600157e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105305104, + "step": 156245 + }, + { + "epoch": 3.8172134952239025, + "grad_norm": 0.0012700185179710388, + "learning_rate": 3.220015242604227e-07, + "loss": 0.0, + "num_input_tokens_seen": 105308176, + "step": 156250 + }, + { + "epoch": 3.8173356460557497, + "grad_norm": 0.007200855761766434, + "learning_rate": 3.219388422914173e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105311376, + "step": 156255 + }, + { + "epoch": 3.817457796887597, + "grad_norm": 0.004918563179671764, + "learning_rate": 3.2187616525345474e-07, + "loss": 0.0, + "num_input_tokens_seen": 105315152, + "step": 156260 + }, + { + "epoch": 3.817579947719444, + "grad_norm": 0.008288434706628323, + "learning_rate": 3.2181349314699115e-07, + "loss": 0.0, + "num_input_tokens_seen": 105318544, + "step": 156265 + }, + { + "epoch": 3.8177020985512913, + "grad_norm": 0.0018310670275241137, + "learning_rate": 3.217508259724825e-07, + "loss": 0.0, + "num_input_tokens_seen": 105321680, + "step": 156270 + }, + { + "epoch": 3.817824249383138, + "grad_norm": 0.017679639160633087, + "learning_rate": 3.216881637303839e-07, + "loss": 0.0002, + "num_input_tokens_seen": 105324688, + "step": 156275 + }, + { + "epoch": 3.8179464002149857, + "grad_norm": 0.00957119558006525, + "learning_rate": 3.216255064211517e-07, + "loss": 0.0007, + "num_input_tokens_seen": 105328080, + "step": 156280 + }, + { + "epoch": 3.8180685510468324, + "grad_norm": 0.0067603024654090405, + "learning_rate": 3.215628540452411e-07, + "loss": 0.0, + "num_input_tokens_seen": 105331856, + "step": 156285 + }, + { + "epoch": 3.81819070187868, + "grad_norm": 0.005869430955499411, + "learning_rate": 3.215002066031082e-07, + "loss": 0.0, + "num_input_tokens_seen": 105335312, + "step": 156290 + }, + { + "epoch": 3.818312852710527, + "grad_norm": 0.04536915570497513, + "learning_rate": 3.2143756409520783e-07, + "loss": 0.0834, + "num_input_tokens_seen": 105338704, + "step": 156295 + }, + { + "epoch": 3.818435003542374, + "grad_norm": 0.004794863983988762, + "learning_rate": 3.213749265219962e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105341968, + "step": 156300 + }, + { + "epoch": 3.818557154374221, + "grad_norm": 0.009946302510797977, + "learning_rate": 3.2131229388392877e-07, + "loss": 0.0, + "num_input_tokens_seen": 105345360, + "step": 156305 + }, + { + "epoch": 3.8186793052060684, + "grad_norm": 0.016436627134680748, + "learning_rate": 3.2124966618146066e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105349264, + "step": 156310 + }, + { + "epoch": 3.8188014560379155, + "grad_norm": 0.0038346415385603905, + "learning_rate": 3.211870434150479e-07, + "loss": 0.0, + "num_input_tokens_seen": 105352528, + "step": 156315 + }, + { + "epoch": 3.8189236068697627, + "grad_norm": 0.0010770164662972093, + "learning_rate": 3.211244255851452e-07, + "loss": 0.0, + "num_input_tokens_seen": 105356112, + "step": 156320 + }, + { + "epoch": 3.81904575770161, + "grad_norm": 0.01691940799355507, + "learning_rate": 3.2106181269220856e-07, + "loss": 0.0, + "num_input_tokens_seen": 105359568, + "step": 156325 + }, + { + "epoch": 3.819167908533457, + "grad_norm": 0.11155705899000168, + "learning_rate": 3.209992047366927e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105363152, + "step": 156330 + }, + { + "epoch": 3.8192900593653043, + "grad_norm": 0.004868871998041868, + "learning_rate": 3.209366017190536e-07, + "loss": 0.0, + "num_input_tokens_seen": 105366608, + "step": 156335 + }, + { + "epoch": 3.8194122101971515, + "grad_norm": 0.0012334700440987945, + "learning_rate": 3.2087400363974615e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105370000, + "step": 156340 + }, + { + "epoch": 3.8195343610289987, + "grad_norm": 0.002305307425558567, + "learning_rate": 3.2081141049922534e-07, + "loss": 0.0, + "num_input_tokens_seen": 105373456, + "step": 156345 + }, + { + "epoch": 3.819656511860846, + "grad_norm": 0.0002718440373428166, + "learning_rate": 3.207488222979469e-07, + "loss": 0.0, + "num_input_tokens_seen": 105376912, + "step": 156350 + }, + { + "epoch": 3.819778662692693, + "grad_norm": 0.0016530955908820033, + "learning_rate": 3.2068623903636536e-07, + "loss": 0.0, + "num_input_tokens_seen": 105380368, + "step": 156355 + }, + { + "epoch": 3.8199008135245403, + "grad_norm": 0.0009915755363181233, + "learning_rate": 3.206236607149363e-07, + "loss": 0.0, + "num_input_tokens_seen": 105384144, + "step": 156360 + }, + { + "epoch": 3.8200229643563874, + "grad_norm": 0.006974408403038979, + "learning_rate": 3.2056108733411504e-07, + "loss": 0.0, + "num_input_tokens_seen": 105387152, + "step": 156365 + }, + { + "epoch": 3.820145115188234, + "grad_norm": 0.0008302279748022556, + "learning_rate": 3.2049851889435585e-07, + "loss": 0.0, + "num_input_tokens_seen": 105390608, + "step": 156370 + }, + { + "epoch": 3.820267266020082, + "grad_norm": 0.0007241375278681517, + "learning_rate": 3.2043595539611455e-07, + "loss": 0.0, + "num_input_tokens_seen": 105394256, + "step": 156375 + }, + { + "epoch": 3.8203894168519286, + "grad_norm": 0.0036159860901534557, + "learning_rate": 3.2037339683984554e-07, + "loss": 0.0, + "num_input_tokens_seen": 105397648, + "step": 156380 + }, + { + "epoch": 3.820511567683776, + "grad_norm": 0.0054675801657140255, + "learning_rate": 3.203108432260042e-07, + "loss": 0.0479, + "num_input_tokens_seen": 105400848, + "step": 156385 + }, + { + "epoch": 3.820633718515623, + "grad_norm": 0.0014255117857828736, + "learning_rate": 3.20248294555045e-07, + "loss": 0.0, + "num_input_tokens_seen": 105404688, + "step": 156390 + }, + { + "epoch": 3.82075586934747, + "grad_norm": 0.0033376587089151144, + "learning_rate": 3.201857508274231e-07, + "loss": 0.0, + "num_input_tokens_seen": 105408272, + "step": 156395 + }, + { + "epoch": 3.8208780201793173, + "grad_norm": 135.96827697753906, + "learning_rate": 3.201232120435934e-07, + "loss": 0.0166, + "num_input_tokens_seen": 105411344, + "step": 156400 + }, + { + "epoch": 3.8210001710111645, + "grad_norm": 8.144104957580566, + "learning_rate": 3.2006067820401026e-07, + "loss": 0.1224, + "num_input_tokens_seen": 105414864, + "step": 156405 + }, + { + "epoch": 3.8211223218430117, + "grad_norm": 0.004194645211100578, + "learning_rate": 3.1999814930912914e-07, + "loss": 0.0, + "num_input_tokens_seen": 105418640, + "step": 156410 + }, + { + "epoch": 3.821244472674859, + "grad_norm": 0.0026404806412756443, + "learning_rate": 3.1993562535940413e-07, + "loss": 0.0, + "num_input_tokens_seen": 105422288, + "step": 156415 + }, + { + "epoch": 3.821366623506706, + "grad_norm": 0.02045687474310398, + "learning_rate": 3.198731063552901e-07, + "loss": 0.0, + "num_input_tokens_seen": 105425744, + "step": 156420 + }, + { + "epoch": 3.8214887743385533, + "grad_norm": 0.02419469691812992, + "learning_rate": 3.1981059229724205e-07, + "loss": 0.0, + "num_input_tokens_seen": 105428944, + "step": 156425 + }, + { + "epoch": 3.8216109251704005, + "grad_norm": 0.03584418073296547, + "learning_rate": 3.197480831857143e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105432400, + "step": 156430 + }, + { + "epoch": 3.8217330760022477, + "grad_norm": 0.05083722621202469, + "learning_rate": 3.1968557902116124e-07, + "loss": 0.0, + "num_input_tokens_seen": 105435664, + "step": 156435 + }, + { + "epoch": 3.821855226834095, + "grad_norm": 0.0034922566264867783, + "learning_rate": 3.196230798040379e-07, + "loss": 0.0, + "num_input_tokens_seen": 105439248, + "step": 156440 + }, + { + "epoch": 3.821977377665942, + "grad_norm": 0.011057974770665169, + "learning_rate": 3.195605855347985e-07, + "loss": 0.0, + "num_input_tokens_seen": 105442576, + "step": 156445 + }, + { + "epoch": 3.8220995284977892, + "grad_norm": 0.0003465529007371515, + "learning_rate": 3.194980962138972e-07, + "loss": 0.0442, + "num_input_tokens_seen": 105445776, + "step": 156450 + }, + { + "epoch": 3.822221679329636, + "grad_norm": 0.004655024968087673, + "learning_rate": 3.1943561184178893e-07, + "loss": 0.0698, + "num_input_tokens_seen": 105449744, + "step": 156455 + }, + { + "epoch": 3.8223438301614836, + "grad_norm": 55.83885192871094, + "learning_rate": 3.1937313241892806e-07, + "loss": 0.0703, + "num_input_tokens_seen": 105452880, + "step": 156460 + }, + { + "epoch": 3.8224659809933303, + "grad_norm": 0.009342874400317669, + "learning_rate": 3.1931065794576863e-07, + "loss": 0.0317, + "num_input_tokens_seen": 105456208, + "step": 156465 + }, + { + "epoch": 3.822588131825178, + "grad_norm": 0.0033157255966216326, + "learning_rate": 3.1924818842276547e-07, + "loss": 0.0, + "num_input_tokens_seen": 105459472, + "step": 156470 + }, + { + "epoch": 3.8227102826570247, + "grad_norm": 0.004607651382684708, + "learning_rate": 3.1918572385037225e-07, + "loss": 0.0, + "num_input_tokens_seen": 105462608, + "step": 156475 + }, + { + "epoch": 3.822832433488872, + "grad_norm": 0.0009573953575454652, + "learning_rate": 3.191232642290439e-07, + "loss": 0.0, + "num_input_tokens_seen": 105466640, + "step": 156480 + }, + { + "epoch": 3.822954584320719, + "grad_norm": 0.021095087751746178, + "learning_rate": 3.19060809559234e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105470224, + "step": 156485 + }, + { + "epoch": 3.8230767351525663, + "grad_norm": 0.0032122910488396883, + "learning_rate": 3.18998359841397e-07, + "loss": 0.0, + "num_input_tokens_seen": 105473360, + "step": 156490 + }, + { + "epoch": 3.8231988859844135, + "grad_norm": 0.19686217606067657, + "learning_rate": 3.189359150759875e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105476496, + "step": 156495 + }, + { + "epoch": 3.8233210368162607, + "grad_norm": 0.04594238102436066, + "learning_rate": 3.1887347526345885e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105479568, + "step": 156500 + }, + { + "epoch": 3.823443187648108, + "grad_norm": 0.0016377909341827035, + "learning_rate": 3.1881104040426574e-07, + "loss": 0.0, + "num_input_tokens_seen": 105482512, + "step": 156505 + }, + { + "epoch": 3.823565338479955, + "grad_norm": 0.003330622799694538, + "learning_rate": 3.187486104988617e-07, + "loss": 0.0, + "num_input_tokens_seen": 105485968, + "step": 156510 + }, + { + "epoch": 3.8236874893118022, + "grad_norm": 0.001057515270076692, + "learning_rate": 3.18686185547701e-07, + "loss": 0.0003, + "num_input_tokens_seen": 105489680, + "step": 156515 + }, + { + "epoch": 3.8238096401436494, + "grad_norm": 0.00812963955104351, + "learning_rate": 3.1862376555123795e-07, + "loss": 0.0, + "num_input_tokens_seen": 105492880, + "step": 156520 + }, + { + "epoch": 3.8239317909754966, + "grad_norm": 0.004067501053214073, + "learning_rate": 3.1856135050992584e-07, + "loss": 0.0, + "num_input_tokens_seen": 105495952, + "step": 156525 + }, + { + "epoch": 3.824053941807344, + "grad_norm": 0.005482273641973734, + "learning_rate": 3.184989404242191e-07, + "loss": 0.0002, + "num_input_tokens_seen": 105499472, + "step": 156530 + }, + { + "epoch": 3.824176092639191, + "grad_norm": 0.005263707600533962, + "learning_rate": 3.184365352945715e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105502672, + "step": 156535 + }, + { + "epoch": 3.824298243471038, + "grad_norm": 0.0015458049019798636, + "learning_rate": 3.183741351214363e-07, + "loss": 0.0, + "num_input_tokens_seen": 105505936, + "step": 156540 + }, + { + "epoch": 3.8244203943028854, + "grad_norm": 0.000650758680421859, + "learning_rate": 3.1831173990526806e-07, + "loss": 0.0, + "num_input_tokens_seen": 105509200, + "step": 156545 + }, + { + "epoch": 3.824542545134732, + "grad_norm": 0.0012706829002127051, + "learning_rate": 3.1824934964652e-07, + "loss": 0.0, + "num_input_tokens_seen": 105512528, + "step": 156550 + }, + { + "epoch": 3.8246646959665798, + "grad_norm": 0.4105299711227417, + "learning_rate": 3.1818696434564616e-07, + "loss": 0.0016, + "num_input_tokens_seen": 105515728, + "step": 156555 + }, + { + "epoch": 3.8247868467984265, + "grad_norm": 0.007774420082569122, + "learning_rate": 3.1812458400309993e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105518800, + "step": 156560 + }, + { + "epoch": 3.8249089976302737, + "grad_norm": 0.033843837678432465, + "learning_rate": 3.180622086193354e-07, + "loss": 0.0, + "num_input_tokens_seen": 105522832, + "step": 156565 + }, + { + "epoch": 3.825031148462121, + "grad_norm": 0.0010137784993276, + "learning_rate": 3.1799983819480557e-07, + "loss": 0.0, + "num_input_tokens_seen": 105526224, + "step": 156570 + }, + { + "epoch": 3.825153299293968, + "grad_norm": 0.000436730042565614, + "learning_rate": 3.179374727299644e-07, + "loss": 0.0, + "num_input_tokens_seen": 105529744, + "step": 156575 + }, + { + "epoch": 3.8252754501258153, + "grad_norm": 0.017465995624661446, + "learning_rate": 3.1787511222526565e-07, + "loss": 0.0, + "num_input_tokens_seen": 105533136, + "step": 156580 + }, + { + "epoch": 3.8253976009576625, + "grad_norm": 0.0004046593385282904, + "learning_rate": 3.1781275668116225e-07, + "loss": 0.0332, + "num_input_tokens_seen": 105536528, + "step": 156585 + }, + { + "epoch": 3.8255197517895096, + "grad_norm": 0.00676988298073411, + "learning_rate": 3.177504060981083e-07, + "loss": 0.0, + "num_input_tokens_seen": 105539984, + "step": 156590 + }, + { + "epoch": 3.825641902621357, + "grad_norm": 0.00046601903159171343, + "learning_rate": 3.176880604765565e-07, + "loss": 0.0, + "num_input_tokens_seen": 105543376, + "step": 156595 + }, + { + "epoch": 3.825764053453204, + "grad_norm": 0.0029104005079716444, + "learning_rate": 3.1762571981696106e-07, + "loss": 0.0, + "num_input_tokens_seen": 105547024, + "step": 156600 + }, + { + "epoch": 3.825886204285051, + "grad_norm": 0.00048557791160419583, + "learning_rate": 3.175633841197746e-07, + "loss": 0.0, + "num_input_tokens_seen": 105550160, + "step": 156605 + }, + { + "epoch": 3.8260083551168984, + "grad_norm": 0.0008252342231571674, + "learning_rate": 3.1750105338545075e-07, + "loss": 0.0, + "num_input_tokens_seen": 105553424, + "step": 156610 + }, + { + "epoch": 3.8261305059487456, + "grad_norm": 0.00999493058770895, + "learning_rate": 3.174387276144431e-07, + "loss": 0.0, + "num_input_tokens_seen": 105556880, + "step": 156615 + }, + { + "epoch": 3.826252656780593, + "grad_norm": 0.0012048380449414253, + "learning_rate": 3.1737640680720433e-07, + "loss": 0.0, + "num_input_tokens_seen": 105560208, + "step": 156620 + }, + { + "epoch": 3.82637480761244, + "grad_norm": 0.004898492246866226, + "learning_rate": 3.173140909641883e-07, + "loss": 0.0, + "num_input_tokens_seen": 105564176, + "step": 156625 + }, + { + "epoch": 3.826496958444287, + "grad_norm": 0.8940964937210083, + "learning_rate": 3.1725178008584743e-07, + "loss": 0.0002, + "num_input_tokens_seen": 105567440, + "step": 156630 + }, + { + "epoch": 3.826619109276134, + "grad_norm": 0.0011455508647486567, + "learning_rate": 3.1718947417263553e-07, + "loss": 0.0, + "num_input_tokens_seen": 105570576, + "step": 156635 + }, + { + "epoch": 3.8267412601079815, + "grad_norm": 0.0053358012810349464, + "learning_rate": 3.1712717322500514e-07, + "loss": 0.0, + "num_input_tokens_seen": 105573520, + "step": 156640 + }, + { + "epoch": 3.8268634109398283, + "grad_norm": 0.002351941540837288, + "learning_rate": 3.1706487724341e-07, + "loss": 0.0, + "num_input_tokens_seen": 105576592, + "step": 156645 + }, + { + "epoch": 3.826985561771676, + "grad_norm": 0.06902336329221725, + "learning_rate": 3.1700258622830265e-07, + "loss": 0.0626, + "num_input_tokens_seen": 105580304, + "step": 156650 + }, + { + "epoch": 3.8271077126035227, + "grad_norm": 0.0015805003931745887, + "learning_rate": 3.1694030018013596e-07, + "loss": 0.0, + "num_input_tokens_seen": 105583888, + "step": 156655 + }, + { + "epoch": 3.82722986343537, + "grad_norm": 0.01736919768154621, + "learning_rate": 3.168780190993634e-07, + "loss": 0.0, + "num_input_tokens_seen": 105587024, + "step": 156660 + }, + { + "epoch": 3.827352014267217, + "grad_norm": 0.003065595170482993, + "learning_rate": 3.1681574298643743e-07, + "loss": 0.0, + "num_input_tokens_seen": 105590288, + "step": 156665 + }, + { + "epoch": 3.8274741650990642, + "grad_norm": 0.017361000180244446, + "learning_rate": 3.16753471841811e-07, + "loss": 0.0, + "num_input_tokens_seen": 105593232, + "step": 156670 + }, + { + "epoch": 3.8275963159309114, + "grad_norm": 0.001208508969284594, + "learning_rate": 3.1669120566593745e-07, + "loss": 0.0, + "num_input_tokens_seen": 105596368, + "step": 156675 + }, + { + "epoch": 3.8277184667627586, + "grad_norm": 0.16313768923282623, + "learning_rate": 3.166289444592689e-07, + "loss": 0.0, + "num_input_tokens_seen": 105599568, + "step": 156680 + }, + { + "epoch": 3.827840617594606, + "grad_norm": 0.0008120430866256356, + "learning_rate": 3.1656668822225884e-07, + "loss": 0.0, + "num_input_tokens_seen": 105603152, + "step": 156685 + }, + { + "epoch": 3.827962768426453, + "grad_norm": 0.5106744170188904, + "learning_rate": 3.165044369553592e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105606736, + "step": 156690 + }, + { + "epoch": 3.8280849192583, + "grad_norm": 0.0019487874815240502, + "learning_rate": 3.1644219065902366e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105610448, + "step": 156695 + }, + { + "epoch": 3.8282070700901474, + "grad_norm": 0.0008269055979326367, + "learning_rate": 3.1637994933370393e-07, + "loss": 0.0, + "num_input_tokens_seen": 105613456, + "step": 156700 + }, + { + "epoch": 3.8283292209219946, + "grad_norm": 0.0038787401281297207, + "learning_rate": 3.16317712979853e-07, + "loss": 0.0, + "num_input_tokens_seen": 105616784, + "step": 156705 + }, + { + "epoch": 3.8284513717538418, + "grad_norm": 0.00037652463652193546, + "learning_rate": 3.16255481597924e-07, + "loss": 0.0, + "num_input_tokens_seen": 105620624, + "step": 156710 + }, + { + "epoch": 3.828573522585689, + "grad_norm": 0.03159145638346672, + "learning_rate": 3.161932551883687e-07, + "loss": 0.0, + "num_input_tokens_seen": 105624080, + "step": 156715 + }, + { + "epoch": 3.828695673417536, + "grad_norm": 0.0006402541184797883, + "learning_rate": 3.161310337516402e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105627344, + "step": 156720 + }, + { + "epoch": 3.8288178242493833, + "grad_norm": 0.0021268008276820183, + "learning_rate": 3.1606881728819057e-07, + "loss": 0.0, + "num_input_tokens_seen": 105630416, + "step": 156725 + }, + { + "epoch": 3.82893997508123, + "grad_norm": 0.010209254920482635, + "learning_rate": 3.160066057984724e-07, + "loss": 0.0, + "num_input_tokens_seen": 105633680, + "step": 156730 + }, + { + "epoch": 3.8290621259130777, + "grad_norm": 0.007120275404304266, + "learning_rate": 3.1594439928293847e-07, + "loss": 0.0, + "num_input_tokens_seen": 105637136, + "step": 156735 + }, + { + "epoch": 3.8291842767449245, + "grad_norm": 0.00281558302231133, + "learning_rate": 3.1588219774204085e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105640080, + "step": 156740 + }, + { + "epoch": 3.8293064275767716, + "grad_norm": 0.0013388247461989522, + "learning_rate": 3.1582000117623154e-07, + "loss": 0.0, + "num_input_tokens_seen": 105643856, + "step": 156745 + }, + { + "epoch": 3.829428578408619, + "grad_norm": 0.0020310573745518923, + "learning_rate": 3.1575780958596353e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105646928, + "step": 156750 + }, + { + "epoch": 3.829550729240466, + "grad_norm": 0.0013612187467515469, + "learning_rate": 3.1569562297168883e-07, + "loss": 0.0, + "num_input_tokens_seen": 105650064, + "step": 156755 + }, + { + "epoch": 3.829672880072313, + "grad_norm": 0.0015260720392689109, + "learning_rate": 3.1563344133385927e-07, + "loss": 0.0, + "num_input_tokens_seen": 105653648, + "step": 156760 + }, + { + "epoch": 3.8297950309041604, + "grad_norm": 0.003066976321861148, + "learning_rate": 3.155712646729275e-07, + "loss": 0.0002, + "num_input_tokens_seen": 105656528, + "step": 156765 + }, + { + "epoch": 3.8299171817360076, + "grad_norm": 0.0018471548100933433, + "learning_rate": 3.155090929893458e-07, + "loss": 0.0, + "num_input_tokens_seen": 105660112, + "step": 156770 + }, + { + "epoch": 3.830039332567855, + "grad_norm": 0.002350366208702326, + "learning_rate": 3.154469262835657e-07, + "loss": 0.0, + "num_input_tokens_seen": 105663504, + "step": 156775 + }, + { + "epoch": 3.830161483399702, + "grad_norm": 35.2591438293457, + "learning_rate": 3.153847645560401e-07, + "loss": 0.0762, + "num_input_tokens_seen": 105666960, + "step": 156780 + }, + { + "epoch": 3.830283634231549, + "grad_norm": 0.016773177310824394, + "learning_rate": 3.153226078072202e-07, + "loss": 0.0838, + "num_input_tokens_seen": 105670800, + "step": 156785 + }, + { + "epoch": 3.8304057850633964, + "grad_norm": 0.0027613737620413303, + "learning_rate": 3.152604560375589e-07, + "loss": 0.0, + "num_input_tokens_seen": 105673872, + "step": 156790 + }, + { + "epoch": 3.8305279358952435, + "grad_norm": 522.2001953125, + "learning_rate": 3.1519830924750734e-07, + "loss": 0.003, + "num_input_tokens_seen": 105677008, + "step": 156795 + }, + { + "epoch": 3.8306500867270907, + "grad_norm": 0.0006445105536840856, + "learning_rate": 3.151361674375179e-07, + "loss": 0.05, + "num_input_tokens_seen": 105680912, + "step": 156800 + }, + { + "epoch": 3.830772237558938, + "grad_norm": 0.0025355806574225426, + "learning_rate": 3.1507403060804274e-07, + "loss": 0.0, + "num_input_tokens_seen": 105684624, + "step": 156805 + }, + { + "epoch": 3.830894388390785, + "grad_norm": 0.007931312546133995, + "learning_rate": 3.1501189875953314e-07, + "loss": 0.0, + "num_input_tokens_seen": 105687696, + "step": 156810 + }, + { + "epoch": 3.831016539222632, + "grad_norm": 0.19625136256217957, + "learning_rate": 3.1494977189244166e-07, + "loss": 0.0003, + "num_input_tokens_seen": 105690960, + "step": 156815 + }, + { + "epoch": 3.8311386900544795, + "grad_norm": 0.0006346600712276995, + "learning_rate": 3.148876500072193e-07, + "loss": 0.0, + "num_input_tokens_seen": 105694608, + "step": 156820 + }, + { + "epoch": 3.8312608408863262, + "grad_norm": 255.44143676757812, + "learning_rate": 3.1482553310431816e-07, + "loss": 0.0444, + "num_input_tokens_seen": 105698000, + "step": 156825 + }, + { + "epoch": 3.831382991718174, + "grad_norm": 0.001601660973392427, + "learning_rate": 3.147634211841904e-07, + "loss": 0.0, + "num_input_tokens_seen": 105701456, + "step": 156830 + }, + { + "epoch": 3.8315051425500206, + "grad_norm": 0.005953413899987936, + "learning_rate": 3.1470131424728707e-07, + "loss": 0.0002, + "num_input_tokens_seen": 105705232, + "step": 156835 + }, + { + "epoch": 3.831627293381868, + "grad_norm": 0.0003653077292256057, + "learning_rate": 3.146392122940604e-07, + "loss": 0.0, + "num_input_tokens_seen": 105708176, + "step": 156840 + }, + { + "epoch": 3.831749444213715, + "grad_norm": 0.0009022181620821357, + "learning_rate": 3.145771153249618e-07, + "loss": 0.0, + "num_input_tokens_seen": 105711056, + "step": 156845 + }, + { + "epoch": 3.831871595045562, + "grad_norm": 0.00146490428596735, + "learning_rate": 3.145150233404423e-07, + "loss": 0.0, + "num_input_tokens_seen": 105714064, + "step": 156850 + }, + { + "epoch": 3.8319937458774094, + "grad_norm": 0.0021413289941847324, + "learning_rate": 3.144529363409544e-07, + "loss": 0.0, + "num_input_tokens_seen": 105717328, + "step": 156855 + }, + { + "epoch": 3.8321158967092566, + "grad_norm": 0.021374104544520378, + "learning_rate": 3.1439085432694866e-07, + "loss": 0.175, + "num_input_tokens_seen": 105720528, + "step": 156860 + }, + { + "epoch": 3.8322380475411038, + "grad_norm": 0.0032985706347972155, + "learning_rate": 3.143287772988774e-07, + "loss": 0.0, + "num_input_tokens_seen": 105723728, + "step": 156865 + }, + { + "epoch": 3.832360198372951, + "grad_norm": 0.006007062271237373, + "learning_rate": 3.142667052571915e-07, + "loss": 0.0003, + "num_input_tokens_seen": 105726672, + "step": 156870 + }, + { + "epoch": 3.832482349204798, + "grad_norm": 0.004959517158567905, + "learning_rate": 3.1420463820234266e-07, + "loss": 0.0, + "num_input_tokens_seen": 105729936, + "step": 156875 + }, + { + "epoch": 3.8326045000366453, + "grad_norm": 0.009409020654857159, + "learning_rate": 3.14142576134782e-07, + "loss": 0.0544, + "num_input_tokens_seen": 105733072, + "step": 156880 + }, + { + "epoch": 3.8327266508684925, + "grad_norm": 65.75674438476562, + "learning_rate": 3.140805190549609e-07, + "loss": 0.0538, + "num_input_tokens_seen": 105736208, + "step": 156885 + }, + { + "epoch": 3.8328488017003397, + "grad_norm": 0.006501436233520508, + "learning_rate": 3.140184669633311e-07, + "loss": 0.0, + "num_input_tokens_seen": 105739408, + "step": 156890 + }, + { + "epoch": 3.832970952532187, + "grad_norm": 0.00046390751958824694, + "learning_rate": 3.1395641986034324e-07, + "loss": 0.0, + "num_input_tokens_seen": 105742992, + "step": 156895 + }, + { + "epoch": 3.8330931033640336, + "grad_norm": 0.006816296372562647, + "learning_rate": 3.138943777464491e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105746384, + "step": 156900 + }, + { + "epoch": 3.8332152541958813, + "grad_norm": 26.399097442626953, + "learning_rate": 3.138323406220993e-07, + "loss": 0.0245, + "num_input_tokens_seen": 105749904, + "step": 156905 + }, + { + "epoch": 3.833337405027728, + "grad_norm": 0.014151382260024548, + "learning_rate": 3.1377030848774565e-07, + "loss": 0.0, + "num_input_tokens_seen": 105753808, + "step": 156910 + }, + { + "epoch": 3.8334595558595757, + "grad_norm": 0.0056682913564145565, + "learning_rate": 3.1370828134383845e-07, + "loss": 0.0, + "num_input_tokens_seen": 105757136, + "step": 156915 + }, + { + "epoch": 3.8335817066914224, + "grad_norm": 0.017652269452810287, + "learning_rate": 3.136462591908293e-07, + "loss": 0.0, + "num_input_tokens_seen": 105759888, + "step": 156920 + }, + { + "epoch": 3.8337038575232696, + "grad_norm": 0.32824185490608215, + "learning_rate": 3.1358424202916945e-07, + "loss": 0.0361, + "num_input_tokens_seen": 105763280, + "step": 156925 + }, + { + "epoch": 3.8338260083551168, + "grad_norm": 0.0020293546840548515, + "learning_rate": 3.135222298593093e-07, + "loss": 0.0, + "num_input_tokens_seen": 105766288, + "step": 156930 + }, + { + "epoch": 3.833948159186964, + "grad_norm": 0.018652616068720818, + "learning_rate": 3.134602226817005e-07, + "loss": 0.0, + "num_input_tokens_seen": 105769680, + "step": 156935 + }, + { + "epoch": 3.834070310018811, + "grad_norm": 0.0029221416916698217, + "learning_rate": 3.1339822049679323e-07, + "loss": 0.0, + "num_input_tokens_seen": 105773584, + "step": 156940 + }, + { + "epoch": 3.8341924608506583, + "grad_norm": 0.004153568297624588, + "learning_rate": 3.1333622330503905e-07, + "loss": 0.0406, + "num_input_tokens_seen": 105776656, + "step": 156945 + }, + { + "epoch": 3.8343146116825055, + "grad_norm": 0.0019725682213902473, + "learning_rate": 3.1327423110688835e-07, + "loss": 0.0, + "num_input_tokens_seen": 105780240, + "step": 156950 + }, + { + "epoch": 3.8344367625143527, + "grad_norm": 0.006787466816604137, + "learning_rate": 3.1321224390279235e-07, + "loss": 0.0005, + "num_input_tokens_seen": 105784144, + "step": 156955 + }, + { + "epoch": 3.8345589133462, + "grad_norm": 0.0011359815252944827, + "learning_rate": 3.1315026169320167e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105787728, + "step": 156960 + }, + { + "epoch": 3.834681064178047, + "grad_norm": 0.004417893476784229, + "learning_rate": 3.130882844785667e-07, + "loss": 0.0, + "num_input_tokens_seen": 105791760, + "step": 156965 + }, + { + "epoch": 3.8348032150098943, + "grad_norm": 0.00016033170686569065, + "learning_rate": 3.1302631225933884e-07, + "loss": 0.0, + "num_input_tokens_seen": 105795216, + "step": 156970 + }, + { + "epoch": 3.8349253658417415, + "grad_norm": 0.01326667983084917, + "learning_rate": 3.1296434503596815e-07, + "loss": 0.0399, + "num_input_tokens_seen": 105798736, + "step": 156975 + }, + { + "epoch": 3.8350475166735887, + "grad_norm": 0.038337502628564835, + "learning_rate": 3.1290238280890547e-07, + "loss": 0.0, + "num_input_tokens_seen": 105802384, + "step": 156980 + }, + { + "epoch": 3.835169667505436, + "grad_norm": 0.0024482160806655884, + "learning_rate": 3.128404255786017e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105805776, + "step": 156985 + }, + { + "epoch": 3.835291818337283, + "grad_norm": 0.6643205881118774, + "learning_rate": 3.12778473345507e-07, + "loss": 0.0737, + "num_input_tokens_seen": 105809040, + "step": 156990 + }, + { + "epoch": 3.83541396916913, + "grad_norm": 0.050306521356105804, + "learning_rate": 3.1271652611007226e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105812304, + "step": 156995 + }, + { + "epoch": 3.8355361200009774, + "grad_norm": 0.00011481645924504846, + "learning_rate": 3.126545838727476e-07, + "loss": 0.0, + "num_input_tokens_seen": 105815824, + "step": 157000 + }, + { + "epoch": 3.835658270832824, + "grad_norm": 0.010054918937385082, + "learning_rate": 3.12592646633984e-07, + "loss": 0.0, + "num_input_tokens_seen": 105819280, + "step": 157005 + }, + { + "epoch": 3.835780421664672, + "grad_norm": 0.0001712543162284419, + "learning_rate": 3.1253071439423116e-07, + "loss": 0.0, + "num_input_tokens_seen": 105822992, + "step": 157010 + }, + { + "epoch": 3.8359025724965186, + "grad_norm": 0.0005007112631574273, + "learning_rate": 3.1246878715393996e-07, + "loss": 0.0, + "num_input_tokens_seen": 105826512, + "step": 157015 + }, + { + "epoch": 3.8360247233283657, + "grad_norm": 0.014416385442018509, + "learning_rate": 3.1240686491356096e-07, + "loss": 0.0, + "num_input_tokens_seen": 105829904, + "step": 157020 + }, + { + "epoch": 3.836146874160213, + "grad_norm": 0.006188525352627039, + "learning_rate": 3.123449476735438e-07, + "loss": 0.0, + "num_input_tokens_seen": 105832912, + "step": 157025 + }, + { + "epoch": 3.83626902499206, + "grad_norm": 0.0016473844880238175, + "learning_rate": 3.1228303543433953e-07, + "loss": 0.0, + "num_input_tokens_seen": 105836368, + "step": 157030 + }, + { + "epoch": 3.8363911758239073, + "grad_norm": 0.017037464305758476, + "learning_rate": 3.1222112819639755e-07, + "loss": 0.0, + "num_input_tokens_seen": 105839760, + "step": 157035 + }, + { + "epoch": 3.8365133266557545, + "grad_norm": 0.002478261012583971, + "learning_rate": 3.1215922596016897e-07, + "loss": 0.0, + "num_input_tokens_seen": 105843280, + "step": 157040 + }, + { + "epoch": 3.8366354774876017, + "grad_norm": 0.000509330362547189, + "learning_rate": 3.1209732872610316e-07, + "loss": 0.0, + "num_input_tokens_seen": 105846416, + "step": 157045 + }, + { + "epoch": 3.836757628319449, + "grad_norm": 200.7996063232422, + "learning_rate": 3.1203543649465093e-07, + "loss": 0.0174, + "num_input_tokens_seen": 105849680, + "step": 157050 + }, + { + "epoch": 3.836879779151296, + "grad_norm": 0.08813425898551941, + "learning_rate": 3.1197354926626175e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105853328, + "step": 157055 + }, + { + "epoch": 3.8370019299831433, + "grad_norm": 0.00032203836599364877, + "learning_rate": 3.1191166704138614e-07, + "loss": 0.0, + "num_input_tokens_seen": 105856528, + "step": 157060 + }, + { + "epoch": 3.8371240808149905, + "grad_norm": 0.003999975975602865, + "learning_rate": 3.118497898204742e-07, + "loss": 0.0, + "num_input_tokens_seen": 105859792, + "step": 157065 + }, + { + "epoch": 3.8372462316468376, + "grad_norm": 0.02387079782783985, + "learning_rate": 3.117879176039752e-07, + "loss": 0.0, + "num_input_tokens_seen": 105863312, + "step": 157070 + }, + { + "epoch": 3.837368382478685, + "grad_norm": 0.0012991444673389196, + "learning_rate": 3.117260503923396e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105867024, + "step": 157075 + }, + { + "epoch": 3.8374905333105316, + "grad_norm": 0.002810559468343854, + "learning_rate": 3.1166418818601757e-07, + "loss": 0.1088, + "num_input_tokens_seen": 105870288, + "step": 157080 + }, + { + "epoch": 3.837612684142379, + "grad_norm": 0.009368712082505226, + "learning_rate": 3.116023309854584e-07, + "loss": 0.0, + "num_input_tokens_seen": 105873744, + "step": 157085 + }, + { + "epoch": 3.837734834974226, + "grad_norm": 0.01500675454735756, + "learning_rate": 3.1154047879111256e-07, + "loss": 0.0, + "num_input_tokens_seen": 105876880, + "step": 157090 + }, + { + "epoch": 3.8378569858060736, + "grad_norm": 0.15511620044708252, + "learning_rate": 3.1147863160342923e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105880336, + "step": 157095 + }, + { + "epoch": 3.8379791366379203, + "grad_norm": 0.0014603231102228165, + "learning_rate": 3.114167894228589e-07, + "loss": 0.0, + "num_input_tokens_seen": 105883792, + "step": 157100 + }, + { + "epoch": 3.8381012874697675, + "grad_norm": 0.0008880642708390951, + "learning_rate": 3.1135495224985043e-07, + "loss": 0.0, + "num_input_tokens_seen": 105887312, + "step": 157105 + }, + { + "epoch": 3.8382234383016147, + "grad_norm": 0.0009251810261048377, + "learning_rate": 3.11293120084854e-07, + "loss": 0.0, + "num_input_tokens_seen": 105890704, + "step": 157110 + }, + { + "epoch": 3.838345589133462, + "grad_norm": 0.006861668545752764, + "learning_rate": 3.112312929283195e-07, + "loss": 0.0, + "num_input_tokens_seen": 105893904, + "step": 157115 + }, + { + "epoch": 3.838467739965309, + "grad_norm": 0.031088093295693398, + "learning_rate": 3.111694707806961e-07, + "loss": 0.0325, + "num_input_tokens_seen": 105897424, + "step": 157120 + }, + { + "epoch": 3.8385898907971563, + "grad_norm": 0.0009894341928884387, + "learning_rate": 3.111076536424337e-07, + "loss": 0.0, + "num_input_tokens_seen": 105900496, + "step": 157125 + }, + { + "epoch": 3.8387120416290035, + "grad_norm": 0.0024139767047017813, + "learning_rate": 3.1104584151398163e-07, + "loss": 0.0, + "num_input_tokens_seen": 105904208, + "step": 157130 + }, + { + "epoch": 3.8388341924608507, + "grad_norm": 0.0013710018247365952, + "learning_rate": 3.1098403439578945e-07, + "loss": 0.0, + "num_input_tokens_seen": 105907728, + "step": 157135 + }, + { + "epoch": 3.838956343292698, + "grad_norm": 0.000259570952039212, + "learning_rate": 3.1092223228830696e-07, + "loss": 0.0002, + "num_input_tokens_seen": 105911056, + "step": 157140 + }, + { + "epoch": 3.839078494124545, + "grad_norm": 0.0033034791704267263, + "learning_rate": 3.1086043519198315e-07, + "loss": 0.0, + "num_input_tokens_seen": 105913936, + "step": 157145 + }, + { + "epoch": 3.8392006449563922, + "grad_norm": 11.38533878326416, + "learning_rate": 3.107986431072678e-07, + "loss": 0.0924, + "num_input_tokens_seen": 105917072, + "step": 157150 + }, + { + "epoch": 3.8393227957882394, + "grad_norm": 0.003908009268343449, + "learning_rate": 3.107368560346101e-07, + "loss": 0.0003, + "num_input_tokens_seen": 105920272, + "step": 157155 + }, + { + "epoch": 3.8394449466200866, + "grad_norm": 0.007159855682402849, + "learning_rate": 3.1067507397445904e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105923344, + "step": 157160 + }, + { + "epoch": 3.839567097451934, + "grad_norm": 0.025989584624767303, + "learning_rate": 3.106132969272646e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105926480, + "step": 157165 + }, + { + "epoch": 3.839689248283781, + "grad_norm": 0.0034603348467499018, + "learning_rate": 3.1055152489347535e-07, + "loss": 0.0, + "num_input_tokens_seen": 105929872, + "step": 157170 + }, + { + "epoch": 3.8398113991156277, + "grad_norm": 0.0010983675019815564, + "learning_rate": 3.1048975787354126e-07, + "loss": 0.0, + "num_input_tokens_seen": 105933392, + "step": 157175 + }, + { + "epoch": 3.8399335499474754, + "grad_norm": 0.003446553135290742, + "learning_rate": 3.104279958679107e-07, + "loss": 0.0588, + "num_input_tokens_seen": 105936784, + "step": 157180 + }, + { + "epoch": 3.840055700779322, + "grad_norm": 0.006336711347103119, + "learning_rate": 3.103662388770335e-07, + "loss": 0.0, + "num_input_tokens_seen": 105939920, + "step": 157185 + }, + { + "epoch": 3.8401778516111693, + "grad_norm": 0.21213245391845703, + "learning_rate": 3.1030448690135824e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105943312, + "step": 157190 + }, + { + "epoch": 3.8403000024430165, + "grad_norm": 0.008288219571113586, + "learning_rate": 3.1024273994133465e-07, + "loss": 0.0, + "num_input_tokens_seen": 105946832, + "step": 157195 + }, + { + "epoch": 3.8404221532748637, + "grad_norm": 0.0031236589420586824, + "learning_rate": 3.1018099799741095e-07, + "loss": 0.0, + "num_input_tokens_seen": 105950352, + "step": 157200 + }, + { + "epoch": 3.840544304106711, + "grad_norm": 0.14038878679275513, + "learning_rate": 3.101192610700366e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105953680, + "step": 157205 + }, + { + "epoch": 3.840666454938558, + "grad_norm": 0.005273896735161543, + "learning_rate": 3.100575291596609e-07, + "loss": 0.0, + "num_input_tokens_seen": 105957072, + "step": 157210 + }, + { + "epoch": 3.8407886057704053, + "grad_norm": 0.007837706245481968, + "learning_rate": 3.0999580226673207e-07, + "loss": 0.0, + "num_input_tokens_seen": 105960656, + "step": 157215 + }, + { + "epoch": 3.8409107566022525, + "grad_norm": 0.0034366052132099867, + "learning_rate": 3.0993408039169964e-07, + "loss": 0.0, + "num_input_tokens_seen": 105964176, + "step": 157220 + }, + { + "epoch": 3.8410329074340996, + "grad_norm": 0.03968435898423195, + "learning_rate": 3.098723635350119e-07, + "loss": 0.0, + "num_input_tokens_seen": 105967952, + "step": 157225 + }, + { + "epoch": 3.841155058265947, + "grad_norm": 0.025878041982650757, + "learning_rate": 3.0981065169711793e-07, + "loss": 0.0, + "num_input_tokens_seen": 105971344, + "step": 157230 + }, + { + "epoch": 3.841277209097794, + "grad_norm": 0.00534423440694809, + "learning_rate": 3.0974894487846693e-07, + "loss": 0.0009, + "num_input_tokens_seen": 105974800, + "step": 157235 + }, + { + "epoch": 3.841399359929641, + "grad_norm": 0.01658131182193756, + "learning_rate": 3.096872430795069e-07, + "loss": 0.0, + "num_input_tokens_seen": 105978640, + "step": 157240 + }, + { + "epoch": 3.8415215107614884, + "grad_norm": 0.0042229569517076015, + "learning_rate": 3.0962554630068716e-07, + "loss": 0.0563, + "num_input_tokens_seen": 105981712, + "step": 157245 + }, + { + "epoch": 3.8416436615933356, + "grad_norm": 0.0016310889041051269, + "learning_rate": 3.095638545424559e-07, + "loss": 0.0003, + "num_input_tokens_seen": 105984656, + "step": 157250 + }, + { + "epoch": 3.841765812425183, + "grad_norm": 0.0044392179697752, + "learning_rate": 3.095021678052623e-07, + "loss": 0.036, + "num_input_tokens_seen": 105987984, + "step": 157255 + }, + { + "epoch": 3.8418879632570295, + "grad_norm": 0.0019932505674660206, + "learning_rate": 3.094404860895543e-07, + "loss": 0.0, + "num_input_tokens_seen": 105991376, + "step": 157260 + }, + { + "epoch": 3.842010114088877, + "grad_norm": 0.0009364528232254088, + "learning_rate": 3.0937880939578123e-07, + "loss": 0.0001, + "num_input_tokens_seen": 105994640, + "step": 157265 + }, + { + "epoch": 3.842132264920724, + "grad_norm": 0.021783795207738876, + "learning_rate": 3.0931713772439106e-07, + "loss": 0.0, + "num_input_tokens_seen": 105997648, + "step": 157270 + }, + { + "epoch": 3.8422544157525715, + "grad_norm": 0.00874258577823639, + "learning_rate": 3.0925547107583225e-07, + "loss": 0.0, + "num_input_tokens_seen": 106000848, + "step": 157275 + }, + { + "epoch": 3.8423765665844183, + "grad_norm": 0.0005422328831627965, + "learning_rate": 3.0919380945055374e-07, + "loss": 0.0, + "num_input_tokens_seen": 106004624, + "step": 157280 + }, + { + "epoch": 3.8424987174162655, + "grad_norm": 0.004375748336315155, + "learning_rate": 3.091321528490033e-07, + "loss": 0.0, + "num_input_tokens_seen": 106007568, + "step": 157285 + }, + { + "epoch": 3.8426208682481127, + "grad_norm": 0.011253989301621914, + "learning_rate": 3.090705012716297e-07, + "loss": 0.0, + "num_input_tokens_seen": 106010960, + "step": 157290 + }, + { + "epoch": 3.84274301907996, + "grad_norm": 0.004648137837648392, + "learning_rate": 3.090088547188815e-07, + "loss": 0.0, + "num_input_tokens_seen": 106014224, + "step": 157295 + }, + { + "epoch": 3.842865169911807, + "grad_norm": 0.0027260452043265104, + "learning_rate": 3.0894721319120654e-07, + "loss": 0.0, + "num_input_tokens_seen": 106017424, + "step": 157300 + }, + { + "epoch": 3.8429873207436542, + "grad_norm": 0.00022834166884422302, + "learning_rate": 3.088855766890536e-07, + "loss": 0.0004, + "num_input_tokens_seen": 106020816, + "step": 157305 + }, + { + "epoch": 3.8431094715755014, + "grad_norm": 0.025838159024715424, + "learning_rate": 3.0882394521287023e-07, + "loss": 0.0002, + "num_input_tokens_seen": 106024208, + "step": 157310 + }, + { + "epoch": 3.8432316224073486, + "grad_norm": 0.0010606314754113555, + "learning_rate": 3.0876231876310544e-07, + "loss": 0.0466, + "num_input_tokens_seen": 106027344, + "step": 157315 + }, + { + "epoch": 3.843353773239196, + "grad_norm": 0.010034746490418911, + "learning_rate": 3.0870069734020665e-07, + "loss": 0.0625, + "num_input_tokens_seen": 106030480, + "step": 157320 + }, + { + "epoch": 3.843475924071043, + "grad_norm": 0.00046287436271086335, + "learning_rate": 3.086390809446223e-07, + "loss": 0.0002, + "num_input_tokens_seen": 106033872, + "step": 157325 + }, + { + "epoch": 3.84359807490289, + "grad_norm": 0.004671595059335232, + "learning_rate": 3.0857746957680096e-07, + "loss": 0.0, + "num_input_tokens_seen": 106037328, + "step": 157330 + }, + { + "epoch": 3.8437202257347374, + "grad_norm": 0.012156388722360134, + "learning_rate": 3.085158632371898e-07, + "loss": 0.0, + "num_input_tokens_seen": 106040464, + "step": 157335 + }, + { + "epoch": 3.8438423765665846, + "grad_norm": 0.00037465174682438374, + "learning_rate": 3.084542619262376e-07, + "loss": 0.0, + "num_input_tokens_seen": 106043792, + "step": 157340 + }, + { + "epoch": 3.8439645273984313, + "grad_norm": 0.0014692572876811028, + "learning_rate": 3.083926656443917e-07, + "loss": 0.0272, + "num_input_tokens_seen": 106047184, + "step": 157345 + }, + { + "epoch": 3.844086678230279, + "grad_norm": 0.10350882261991501, + "learning_rate": 3.083310743921006e-07, + "loss": 0.0, + "num_input_tokens_seen": 106050640, + "step": 157350 + }, + { + "epoch": 3.8442088290621257, + "grad_norm": 0.018523210659623146, + "learning_rate": 3.082694881698118e-07, + "loss": 0.0002, + "num_input_tokens_seen": 106054416, + "step": 157355 + }, + { + "epoch": 3.8443309798939733, + "grad_norm": 0.0012730018934234977, + "learning_rate": 3.082079069779735e-07, + "loss": 0.0, + "num_input_tokens_seen": 106057808, + "step": 157360 + }, + { + "epoch": 3.84445313072582, + "grad_norm": 0.0022517370525747538, + "learning_rate": 3.081463308170331e-07, + "loss": 0.0, + "num_input_tokens_seen": 106061136, + "step": 157365 + }, + { + "epoch": 3.8445752815576673, + "grad_norm": 0.0016175990458577871, + "learning_rate": 3.0808475968743907e-07, + "loss": 0.0, + "num_input_tokens_seen": 106064272, + "step": 157370 + }, + { + "epoch": 3.8446974323895144, + "grad_norm": 0.09517046064138412, + "learning_rate": 3.080231935896387e-07, + "loss": 0.0001, + "num_input_tokens_seen": 106067792, + "step": 157375 + }, + { + "epoch": 3.8448195832213616, + "grad_norm": 27.080537796020508, + "learning_rate": 3.0796163252407946e-07, + "loss": 0.0924, + "num_input_tokens_seen": 106070928, + "step": 157380 + }, + { + "epoch": 3.844941734053209, + "grad_norm": 0.0014318153262138367, + "learning_rate": 3.079000764912093e-07, + "loss": 0.0001, + "num_input_tokens_seen": 106074064, + "step": 157385 + }, + { + "epoch": 3.845063884885056, + "grad_norm": 75.27845764160156, + "learning_rate": 3.078385254914764e-07, + "loss": 0.0155, + "num_input_tokens_seen": 106077776, + "step": 157390 + }, + { + "epoch": 3.845186035716903, + "grad_norm": 0.06256076693534851, + "learning_rate": 3.077769795253276e-07, + "loss": 0.0001, + "num_input_tokens_seen": 106081360, + "step": 157395 + }, + { + "epoch": 3.8453081865487504, + "grad_norm": 0.0019677793607115746, + "learning_rate": 3.077154385932109e-07, + "loss": 0.0, + "num_input_tokens_seen": 106084368, + "step": 157400 + }, + { + "epoch": 3.8454303373805976, + "grad_norm": 0.07321988046169281, + "learning_rate": 3.0765390269557356e-07, + "loss": 0.0, + "num_input_tokens_seen": 106087824, + "step": 157405 + }, + { + "epoch": 3.8455524882124448, + "grad_norm": 0.0027987107168883085, + "learning_rate": 3.0759237183286356e-07, + "loss": 0.0, + "num_input_tokens_seen": 106091088, + "step": 157410 + }, + { + "epoch": 3.845674639044292, + "grad_norm": 0.013440297916531563, + "learning_rate": 3.075308460055278e-07, + "loss": 0.0002, + "num_input_tokens_seen": 106094224, + "step": 157415 + }, + { + "epoch": 3.845796789876139, + "grad_norm": 0.002260087290778756, + "learning_rate": 3.074693252140139e-07, + "loss": 0.0, + "num_input_tokens_seen": 106097424, + "step": 157420 + }, + { + "epoch": 3.8459189407079863, + "grad_norm": 0.017164267599582672, + "learning_rate": 3.0740780945876963e-07, + "loss": 0.0002, + "num_input_tokens_seen": 106100816, + "step": 157425 + }, + { + "epoch": 3.8460410915398335, + "grad_norm": 0.09083389490842819, + "learning_rate": 3.0734629874024187e-07, + "loss": 0.0202, + "num_input_tokens_seen": 106104080, + "step": 157430 + }, + { + "epoch": 3.8461632423716807, + "grad_norm": 0.00015704776160418987, + "learning_rate": 3.072847930588783e-07, + "loss": 0.0, + "num_input_tokens_seen": 106107664, + "step": 157435 + }, + { + "epoch": 3.8462853932035275, + "grad_norm": 0.005764813628047705, + "learning_rate": 3.072232924151258e-07, + "loss": 0.0, + "num_input_tokens_seen": 106111760, + "step": 157440 + }, + { + "epoch": 3.846407544035375, + "grad_norm": 0.004832027480006218, + "learning_rate": 3.071617968094319e-07, + "loss": 0.0, + "num_input_tokens_seen": 106114704, + "step": 157445 + }, + { + "epoch": 3.846529694867222, + "grad_norm": 9.927536302711815e-05, + "learning_rate": 3.0710030624224405e-07, + "loss": 0.0, + "num_input_tokens_seen": 106117840, + "step": 157450 + }, + { + "epoch": 3.8466518456990695, + "grad_norm": 0.007467443123459816, + "learning_rate": 3.070388207140088e-07, + "loss": 0.0001, + "num_input_tokens_seen": 106120912, + "step": 157455 + }, + { + "epoch": 3.8467739965309162, + "grad_norm": 0.05119911581277847, + "learning_rate": 3.0697734022517386e-07, + "loss": 0.0, + "num_input_tokens_seen": 106124624, + "step": 157460 + }, + { + "epoch": 3.8468961473627634, + "grad_norm": 0.0014485791325569153, + "learning_rate": 3.0691586477618614e-07, + "loss": 0.0, + "num_input_tokens_seen": 106128400, + "step": 157465 + }, + { + "epoch": 3.8470182981946106, + "grad_norm": 0.004423577804118395, + "learning_rate": 3.0685439436749237e-07, + "loss": 0.0614, + "num_input_tokens_seen": 106131792, + "step": 157470 + }, + { + "epoch": 3.847140449026458, + "grad_norm": 0.0003178124316036701, + "learning_rate": 3.067929289995402e-07, + "loss": 0.0, + "num_input_tokens_seen": 106135824, + "step": 157475 + }, + { + "epoch": 3.847262599858305, + "grad_norm": 0.0001393468992318958, + "learning_rate": 3.067314686727761e-07, + "loss": 0.0, + "num_input_tokens_seen": 106139152, + "step": 157480 + }, + { + "epoch": 3.847384750690152, + "grad_norm": 0.0022997669875621796, + "learning_rate": 3.066700133876474e-07, + "loss": 0.0, + "num_input_tokens_seen": 106142032, + "step": 157485 + }, + { + "epoch": 3.8475069015219994, + "grad_norm": 0.24477975070476532, + "learning_rate": 3.066085631446006e-07, + "loss": 0.0001, + "num_input_tokens_seen": 106145616, + "step": 157490 + }, + { + "epoch": 3.8476290523538466, + "grad_norm": 0.0012179150944575667, + "learning_rate": 3.0654711794408304e-07, + "loss": 0.0, + "num_input_tokens_seen": 106148944, + "step": 157495 + }, + { + "epoch": 3.8477512031856937, + "grad_norm": 0.3981289267539978, + "learning_rate": 3.064856777865412e-07, + "loss": 0.0002, + "num_input_tokens_seen": 106152528, + "step": 157500 + }, + { + "epoch": 3.847873354017541, + "grad_norm": 0.32446688413619995, + "learning_rate": 3.064242426724223e-07, + "loss": 0.0002, + "num_input_tokens_seen": 106156048, + "step": 157505 + }, + { + "epoch": 3.847995504849388, + "grad_norm": 0.0024203043431043625, + "learning_rate": 3.0636281260217243e-07, + "loss": 0.0, + "num_input_tokens_seen": 106159184, + "step": 157510 + }, + { + "epoch": 3.8481176556812353, + "grad_norm": 0.005391005426645279, + "learning_rate": 3.0630138757623893e-07, + "loss": 0.0433, + "num_input_tokens_seen": 106162832, + "step": 157515 + }, + { + "epoch": 3.8482398065130825, + "grad_norm": 0.0027643253561109304, + "learning_rate": 3.0623996759506855e-07, + "loss": 0.0, + "num_input_tokens_seen": 106166096, + "step": 157520 + }, + { + "epoch": 3.8483619573449293, + "grad_norm": 16.738208770751953, + "learning_rate": 3.061785526591073e-07, + "loss": 0.1025, + "num_input_tokens_seen": 106169552, + "step": 157525 + }, + { + "epoch": 3.848484108176777, + "grad_norm": 0.0038885173853486776, + "learning_rate": 3.0611714276880273e-07, + "loss": 0.0, + "num_input_tokens_seen": 106173008, + "step": 157530 + }, + { + "epoch": 3.8486062590086236, + "grad_norm": 0.0036091655492782593, + "learning_rate": 3.060557379246005e-07, + "loss": 0.0, + "num_input_tokens_seen": 106176400, + "step": 157535 + }, + { + "epoch": 3.8487284098404713, + "grad_norm": 0.0007444451912306249, + "learning_rate": 3.0599433812694765e-07, + "loss": 0.0283, + "num_input_tokens_seen": 106179600, + "step": 157540 + }, + { + "epoch": 3.848850560672318, + "grad_norm": 0.0016477408353239298, + "learning_rate": 3.0593294337629097e-07, + "loss": 0.0, + "num_input_tokens_seen": 106183632, + "step": 157545 + }, + { + "epoch": 3.848972711504165, + "grad_norm": 0.025943255051970482, + "learning_rate": 3.0587155367307614e-07, + "loss": 0.0001, + "num_input_tokens_seen": 106186960, + "step": 157550 + }, + { + "epoch": 3.8490948623360124, + "grad_norm": 0.0005402068491093814, + "learning_rate": 3.0581016901775057e-07, + "loss": 0.0713, + "num_input_tokens_seen": 106190288, + "step": 157555 + }, + { + "epoch": 3.8492170131678596, + "grad_norm": 0.7853077054023743, + "learning_rate": 3.057487894107598e-07, + "loss": 0.0002, + "num_input_tokens_seen": 106193808, + "step": 157560 + }, + { + "epoch": 3.8493391639997068, + "grad_norm": 0.023170972242951393, + "learning_rate": 3.056874148525508e-07, + "loss": 0.0, + "num_input_tokens_seen": 106196944, + "step": 157565 + }, + { + "epoch": 3.849461314831554, + "grad_norm": 0.0006037909188307822, + "learning_rate": 3.056260453435694e-07, + "loss": 0.0001, + "num_input_tokens_seen": 106200208, + "step": 157570 + }, + { + "epoch": 3.849583465663401, + "grad_norm": 0.0042616198770701885, + "learning_rate": 3.0556468088426256e-07, + "loss": 0.0001, + "num_input_tokens_seen": 106203728, + "step": 157575 + }, + { + "epoch": 3.8497056164952483, + "grad_norm": 0.0017427516868337989, + "learning_rate": 3.0550332147507606e-07, + "loss": 0.0, + "num_input_tokens_seen": 106206864, + "step": 157580 + }, + { + "epoch": 3.8498277673270955, + "grad_norm": 0.000949391454923898, + "learning_rate": 3.0544196711645586e-07, + "loss": 0.0, + "num_input_tokens_seen": 106210128, + "step": 157585 + }, + { + "epoch": 3.8499499181589427, + "grad_norm": 0.07061922550201416, + "learning_rate": 3.053806178088488e-07, + "loss": 0.0001, + "num_input_tokens_seen": 106213456, + "step": 157590 + }, + { + "epoch": 3.85007206899079, + "grad_norm": 0.0007843165658414364, + "learning_rate": 3.053192735527005e-07, + "loss": 0.0001, + "num_input_tokens_seen": 106216656, + "step": 157595 + }, + { + "epoch": 3.850194219822637, + "grad_norm": 0.011597550474107265, + "learning_rate": 3.052579343484573e-07, + "loss": 0.0619, + "num_input_tokens_seen": 106220048, + "step": 157600 + }, + { + "epoch": 3.8503163706544843, + "grad_norm": 0.0038043519016355276, + "learning_rate": 3.0519660019656544e-07, + "loss": 0.0005, + "num_input_tokens_seen": 106223312, + "step": 157605 + }, + { + "epoch": 3.8504385214863315, + "grad_norm": 0.12183055281639099, + "learning_rate": 3.051352710974706e-07, + "loss": 0.0, + "num_input_tokens_seen": 106226448, + "step": 157610 + }, + { + "epoch": 3.8505606723181787, + "grad_norm": 0.0008379321661777794, + "learning_rate": 3.0507394705161913e-07, + "loss": 0.0, + "num_input_tokens_seen": 106229776, + "step": 157615 + }, + { + "epoch": 3.8506828231500254, + "grad_norm": 0.0189727284014225, + "learning_rate": 3.050126280594567e-07, + "loss": 0.0, + "num_input_tokens_seen": 106232976, + "step": 157620 + }, + { + "epoch": 3.850804973981873, + "grad_norm": 0.007365493103861809, + "learning_rate": 3.0495131412142963e-07, + "loss": 0.0, + "num_input_tokens_seen": 106236624, + "step": 157625 + }, + { + "epoch": 3.85092712481372, + "grad_norm": 17.411718368530273, + "learning_rate": 3.0489000523798316e-07, + "loss": 0.1139, + "num_input_tokens_seen": 106240528, + "step": 157630 + }, + { + "epoch": 3.851049275645567, + "grad_norm": 0.0039583053439855576, + "learning_rate": 3.048287014095635e-07, + "loss": 0.0, + "num_input_tokens_seen": 106244304, + "step": 157635 + }, + { + "epoch": 3.851171426477414, + "grad_norm": 0.00854410044848919, + "learning_rate": 3.0476740263661693e-07, + "loss": 0.0, + "num_input_tokens_seen": 106248272, + "step": 157640 + }, + { + "epoch": 3.8512935773092614, + "grad_norm": 0.4559212923049927, + "learning_rate": 3.0470610891958836e-07, + "loss": 0.0001, + "num_input_tokens_seen": 106251600, + "step": 157645 + }, + { + "epoch": 3.8514157281411086, + "grad_norm": 0.00413027498871088, + "learning_rate": 3.0464482025892444e-07, + "loss": 0.0, + "num_input_tokens_seen": 106254864, + "step": 157650 + }, + { + "epoch": 3.8515378789729557, + "grad_norm": 0.004023184534162283, + "learning_rate": 3.0458353665507e-07, + "loss": 0.0, + "num_input_tokens_seen": 106258064, + "step": 157655 + }, + { + "epoch": 3.851660029804803, + "grad_norm": 0.0012542590266093612, + "learning_rate": 3.0452225810847153e-07, + "loss": 0.0, + "num_input_tokens_seen": 106261584, + "step": 157660 + }, + { + "epoch": 3.85178218063665, + "grad_norm": 0.002548971213400364, + "learning_rate": 3.0446098461957383e-07, + "loss": 0.0001, + "num_input_tokens_seen": 106265168, + "step": 157665 + }, + { + "epoch": 3.8519043314684973, + "grad_norm": 0.001746556255966425, + "learning_rate": 3.043997161888233e-07, + "loss": 0.0, + "num_input_tokens_seen": 106268880, + "step": 157670 + }, + { + "epoch": 3.8520264823003445, + "grad_norm": 0.0014939203392714262, + "learning_rate": 3.0433845281666484e-07, + "loss": 0.0, + "num_input_tokens_seen": 106271824, + "step": 157675 + }, + { + "epoch": 3.8521486331321917, + "grad_norm": 0.02588808350265026, + "learning_rate": 3.0427719450354463e-07, + "loss": 0.0, + "num_input_tokens_seen": 106275472, + "step": 157680 + }, + { + "epoch": 3.852270783964039, + "grad_norm": 0.02764168195426464, + "learning_rate": 3.042159412499077e-07, + "loss": 0.0001, + "num_input_tokens_seen": 106278992, + "step": 157685 + }, + { + "epoch": 3.852392934795886, + "grad_norm": 0.0657828077673912, + "learning_rate": 3.041546930561992e-07, + "loss": 0.0001, + "num_input_tokens_seen": 106282320, + "step": 157690 + }, + { + "epoch": 3.8525150856277333, + "grad_norm": 0.029806675389409065, + "learning_rate": 3.04093449922865e-07, + "loss": 0.0, + "num_input_tokens_seen": 106285904, + "step": 157695 + }, + { + "epoch": 3.8526372364595804, + "grad_norm": 0.0013426138320937753, + "learning_rate": 3.0403221185035075e-07, + "loss": 0.0, + "num_input_tokens_seen": 106288912, + "step": 157700 + }, + { + "epoch": 3.852759387291427, + "grad_norm": 0.0034400273580104113, + "learning_rate": 3.0397097883910116e-07, + "loss": 0.0, + "num_input_tokens_seen": 106291664, + "step": 157705 + }, + { + "epoch": 3.852881538123275, + "grad_norm": 0.007743450812995434, + "learning_rate": 3.0390975088956207e-07, + "loss": 0.0, + "num_input_tokens_seen": 106294928, + "step": 157710 + }, + { + "epoch": 3.8530036889551216, + "grad_norm": 0.01713470369577408, + "learning_rate": 3.038485280021783e-07, + "loss": 0.0, + "num_input_tokens_seen": 106298448, + "step": 157715 + }, + { + "epoch": 3.853125839786969, + "grad_norm": 0.0026307678781449795, + "learning_rate": 3.0378731017739547e-07, + "loss": 0.0, + "num_input_tokens_seen": 106301968, + "step": 157720 + }, + { + "epoch": 3.853247990618816, + "grad_norm": 0.026001354679465294, + "learning_rate": 3.0372609741565824e-07, + "loss": 0.0, + "num_input_tokens_seen": 106305296, + "step": 157725 + }, + { + "epoch": 3.853370141450663, + "grad_norm": 0.013364420272409916, + "learning_rate": 3.0366488971741224e-07, + "loss": 0.0, + "num_input_tokens_seen": 106308304, + "step": 157730 + }, + { + "epoch": 3.8534922922825103, + "grad_norm": 2.170867919921875, + "learning_rate": 3.036036870831027e-07, + "loss": 0.0002, + "num_input_tokens_seen": 106311824, + "step": 157735 + }, + { + "epoch": 3.8536144431143575, + "grad_norm": 0.0966757982969284, + "learning_rate": 3.0354248951317407e-07, + "loss": 0.022, + "num_input_tokens_seen": 106315216, + "step": 157740 + }, + { + "epoch": 3.8537365939462047, + "grad_norm": 0.018707867711782455, + "learning_rate": 3.034812970080721e-07, + "loss": 0.0, + "num_input_tokens_seen": 106318288, + "step": 157745 + }, + { + "epoch": 3.853858744778052, + "grad_norm": 0.011910786852240562, + "learning_rate": 3.034201095682413e-07, + "loss": 0.0, + "num_input_tokens_seen": 106321168, + "step": 157750 + }, + { + "epoch": 3.853980895609899, + "grad_norm": 0.0008571508224122226, + "learning_rate": 3.0335892719412704e-07, + "loss": 0.0, + "num_input_tokens_seen": 106325072, + "step": 157755 + }, + { + "epoch": 3.8541030464417463, + "grad_norm": 0.002870930125936866, + "learning_rate": 3.032977498861737e-07, + "loss": 0.0, + "num_input_tokens_seen": 106328464, + "step": 157760 + }, + { + "epoch": 3.8542251972735935, + "grad_norm": 0.013235303573310375, + "learning_rate": 3.032365776448266e-07, + "loss": 0.0, + "num_input_tokens_seen": 106331472, + "step": 157765 + }, + { + "epoch": 3.8543473481054407, + "grad_norm": 0.05652252584695816, + "learning_rate": 3.0317541047053074e-07, + "loss": 0.0003, + "num_input_tokens_seen": 106335120, + "step": 157770 + }, + { + "epoch": 3.854469498937288, + "grad_norm": 0.0021372237242758274, + "learning_rate": 3.031142483637308e-07, + "loss": 0.0, + "num_input_tokens_seen": 106338320, + "step": 157775 + }, + { + "epoch": 3.854591649769135, + "grad_norm": 0.00040053142583929, + "learning_rate": 3.030530913248711e-07, + "loss": 0.0, + "num_input_tokens_seen": 106341264, + "step": 157780 + }, + { + "epoch": 3.8547138006009822, + "grad_norm": 0.000844833324663341, + "learning_rate": 3.0299193935439714e-07, + "loss": 0.0, + "num_input_tokens_seen": 106344080, + "step": 157785 + }, + { + "epoch": 3.8548359514328294, + "grad_norm": 0.004650758113712072, + "learning_rate": 3.0293079245275297e-07, + "loss": 0.0, + "num_input_tokens_seen": 106347536, + "step": 157790 + }, + { + "epoch": 3.8549581022646766, + "grad_norm": 0.0012025663163512945, + "learning_rate": 3.0286965062038383e-07, + "loss": 0.0001, + "num_input_tokens_seen": 106350608, + "step": 157795 + }, + { + "epoch": 3.8550802530965234, + "grad_norm": 0.0009295342024415731, + "learning_rate": 3.028085138577338e-07, + "loss": 0.0, + "num_input_tokens_seen": 106353616, + "step": 157800 + }, + { + "epoch": 3.855202403928371, + "grad_norm": 0.005253209732472897, + "learning_rate": 3.027473821652481e-07, + "loss": 0.0, + "num_input_tokens_seen": 106357328, + "step": 157805 + }, + { + "epoch": 3.8553245547602177, + "grad_norm": 0.3880409002304077, + "learning_rate": 3.0268625554337067e-07, + "loss": 0.0001, + "num_input_tokens_seen": 106360528, + "step": 157810 + }, + { + "epoch": 3.855446705592065, + "grad_norm": 0.00103185314219445, + "learning_rate": 3.026251339925466e-07, + "loss": 0.0, + "num_input_tokens_seen": 106364176, + "step": 157815 + }, + { + "epoch": 3.855568856423912, + "grad_norm": 0.0018289852887392044, + "learning_rate": 3.025640175132199e-07, + "loss": 0.0, + "num_input_tokens_seen": 106367696, + "step": 157820 + }, + { + "epoch": 3.8556910072557593, + "grad_norm": 0.010241532698273659, + "learning_rate": 3.025029061058352e-07, + "loss": 0.0, + "num_input_tokens_seen": 106370896, + "step": 157825 + }, + { + "epoch": 3.8558131580876065, + "grad_norm": 0.14833614230155945, + "learning_rate": 3.0244179977083727e-07, + "loss": 0.0001, + "num_input_tokens_seen": 106374480, + "step": 157830 + }, + { + "epoch": 3.8559353089194537, + "grad_norm": 0.023858023807406425, + "learning_rate": 3.023806985086699e-07, + "loss": 0.0, + "num_input_tokens_seen": 106377872, + "step": 157835 + }, + { + "epoch": 3.856057459751301, + "grad_norm": 0.16358113288879395, + "learning_rate": 3.0231960231977803e-07, + "loss": 0.0, + "num_input_tokens_seen": 106381520, + "step": 157840 + }, + { + "epoch": 3.856179610583148, + "grad_norm": 0.00500052236020565, + "learning_rate": 3.022585112046053e-07, + "loss": 0.0001, + "num_input_tokens_seen": 106384464, + "step": 157845 + }, + { + "epoch": 3.8563017614149953, + "grad_norm": 0.0005464443238452077, + "learning_rate": 3.021974251635965e-07, + "loss": 0.0001, + "num_input_tokens_seen": 106388112, + "step": 157850 + }, + { + "epoch": 3.8564239122468424, + "grad_norm": 0.000844131747726351, + "learning_rate": 3.021363441971959e-07, + "loss": 0.0001, + "num_input_tokens_seen": 106391312, + "step": 157855 + }, + { + "epoch": 3.8565460630786896, + "grad_norm": 0.0025310174096375704, + "learning_rate": 3.020752683058473e-07, + "loss": 0.0, + "num_input_tokens_seen": 106394832, + "step": 157860 + }, + { + "epoch": 3.856668213910537, + "grad_norm": 0.0003100551257375628, + "learning_rate": 3.0201419748999524e-07, + "loss": 0.0, + "num_input_tokens_seen": 106398160, + "step": 157865 + }, + { + "epoch": 3.856790364742384, + "grad_norm": 0.0006610352429561317, + "learning_rate": 3.019531317500834e-07, + "loss": 0.0, + "num_input_tokens_seen": 106401488, + "step": 157870 + }, + { + "epoch": 3.856912515574231, + "grad_norm": 0.0011790769640356302, + "learning_rate": 3.0189207108655656e-07, + "loss": 0.0, + "num_input_tokens_seen": 106404816, + "step": 157875 + }, + { + "epoch": 3.8570346664060784, + "grad_norm": 0.0006918759318068624, + "learning_rate": 3.018310154998579e-07, + "loss": 0.0465, + "num_input_tokens_seen": 106408080, + "step": 157880 + }, + { + "epoch": 3.857156817237925, + "grad_norm": 0.005680972710251808, + "learning_rate": 3.017699649904323e-07, + "loss": 0.0, + "num_input_tokens_seen": 106411664, + "step": 157885 + }, + { + "epoch": 3.8572789680697728, + "grad_norm": 0.002036899561062455, + "learning_rate": 3.017089195587232e-07, + "loss": 0.0, + "num_input_tokens_seen": 106414800, + "step": 157890 + }, + { + "epoch": 3.8574011189016195, + "grad_norm": 0.00011878184886882082, + "learning_rate": 3.0164787920517445e-07, + "loss": 0.0001, + "num_input_tokens_seen": 106418448, + "step": 157895 + }, + { + "epoch": 3.857523269733467, + "grad_norm": 0.5946331024169922, + "learning_rate": 3.0158684393023035e-07, + "loss": 0.0002, + "num_input_tokens_seen": 106421904, + "step": 157900 + }, + { + "epoch": 3.857645420565314, + "grad_norm": 0.00022538656776305288, + "learning_rate": 3.015258137343344e-07, + "loss": 0.0, + "num_input_tokens_seen": 106425104, + "step": 157905 + }, + { + "epoch": 3.857767571397161, + "grad_norm": 0.0005389982834458351, + "learning_rate": 3.0146478861793076e-07, + "loss": 0.0, + "num_input_tokens_seen": 106428752, + "step": 157910 + }, + { + "epoch": 3.8578897222290083, + "grad_norm": 0.0007590112509205937, + "learning_rate": 3.0140376858146286e-07, + "loss": 0.0, + "num_input_tokens_seen": 106432016, + "step": 157915 + }, + { + "epoch": 3.8580118730608555, + "grad_norm": 0.005567790009081364, + "learning_rate": 3.0134275362537465e-07, + "loss": 0.0, + "num_input_tokens_seen": 106435024, + "step": 157920 + }, + { + "epoch": 3.8581340238927027, + "grad_norm": 0.006411381531506777, + "learning_rate": 3.012817437501102e-07, + "loss": 0.0006, + "num_input_tokens_seen": 106438544, + "step": 157925 + }, + { + "epoch": 3.85825617472455, + "grad_norm": 0.5736486315727234, + "learning_rate": 3.0122073895611244e-07, + "loss": 0.0468, + "num_input_tokens_seen": 106441616, + "step": 157930 + }, + { + "epoch": 3.858378325556397, + "grad_norm": 0.002640968654304743, + "learning_rate": 3.011597392438258e-07, + "loss": 0.0, + "num_input_tokens_seen": 106444752, + "step": 157935 + }, + { + "epoch": 3.8585004763882442, + "grad_norm": 0.00707990862429142, + "learning_rate": 3.010987446136931e-07, + "loss": 0.0835, + "num_input_tokens_seen": 106448208, + "step": 157940 + }, + { + "epoch": 3.8586226272200914, + "grad_norm": 0.001580284209921956, + "learning_rate": 3.0103775506615837e-07, + "loss": 0.0, + "num_input_tokens_seen": 106452176, + "step": 157945 + }, + { + "epoch": 3.8587447780519386, + "grad_norm": 0.0004957346827723086, + "learning_rate": 3.0097677060166536e-07, + "loss": 0.0, + "num_input_tokens_seen": 106455952, + "step": 157950 + }, + { + "epoch": 3.858866928883786, + "grad_norm": 0.0006093760603107512, + "learning_rate": 3.00915791220657e-07, + "loss": 0.0, + "num_input_tokens_seen": 106459152, + "step": 157955 + }, + { + "epoch": 3.858989079715633, + "grad_norm": 0.005039932671934366, + "learning_rate": 3.008548169235774e-07, + "loss": 0.0, + "num_input_tokens_seen": 106462800, + "step": 157960 + }, + { + "epoch": 3.85911123054748, + "grad_norm": 0.005193840246647596, + "learning_rate": 3.0079384771086924e-07, + "loss": 0.0, + "num_input_tokens_seen": 106466320, + "step": 157965 + }, + { + "epoch": 3.859233381379327, + "grad_norm": 0.006895958911627531, + "learning_rate": 3.0073288358297656e-07, + "loss": 0.0, + "num_input_tokens_seen": 106469456, + "step": 157970 + }, + { + "epoch": 3.8593555322111746, + "grad_norm": 0.01828017272055149, + "learning_rate": 3.0067192454034217e-07, + "loss": 0.0, + "num_input_tokens_seen": 106472656, + "step": 157975 + }, + { + "epoch": 3.8594776830430213, + "grad_norm": 0.004485220182687044, + "learning_rate": 3.0061097058341e-07, + "loss": 0.0, + "num_input_tokens_seen": 106476240, + "step": 157980 + }, + { + "epoch": 3.859599833874869, + "grad_norm": 0.00388152408413589, + "learning_rate": 3.005500217126226e-07, + "loss": 0.0003, + "num_input_tokens_seen": 106479696, + "step": 157985 + }, + { + "epoch": 3.8597219847067157, + "grad_norm": 0.0043313889764249325, + "learning_rate": 3.004890779284239e-07, + "loss": 0.0796, + "num_input_tokens_seen": 106483216, + "step": 157990 + }, + { + "epoch": 3.859844135538563, + "grad_norm": 0.0008505122968927026, + "learning_rate": 3.0042813923125675e-07, + "loss": 0.0003, + "num_input_tokens_seen": 106486864, + "step": 157995 + }, + { + "epoch": 3.85996628637041, + "grad_norm": 0.005160463973879814, + "learning_rate": 3.0036720562156406e-07, + "loss": 0.0, + "num_input_tokens_seen": 106490512, + "step": 158000 + }, + { + "epoch": 3.8600884372022572, + "grad_norm": 0.0028276483062654734, + "learning_rate": 3.003062770997892e-07, + "loss": 0.0, + "num_input_tokens_seen": 106494032, + "step": 158005 + }, + { + "epoch": 3.8602105880341044, + "grad_norm": 0.008698842488229275, + "learning_rate": 3.002453536663756e-07, + "loss": 0.0385, + "num_input_tokens_seen": 106497552, + "step": 158010 + }, + { + "epoch": 3.8603327388659516, + "grad_norm": 0.0025066733360290527, + "learning_rate": 3.001844353217657e-07, + "loss": 0.0, + "num_input_tokens_seen": 106501136, + "step": 158015 + }, + { + "epoch": 3.860454889697799, + "grad_norm": 0.00978843867778778, + "learning_rate": 3.0012352206640313e-07, + "loss": 0.0, + "num_input_tokens_seen": 106504080, + "step": 158020 + }, + { + "epoch": 3.860577040529646, + "grad_norm": 0.016681527718901634, + "learning_rate": 3.000626139007302e-07, + "loss": 0.0, + "num_input_tokens_seen": 106507280, + "step": 158025 + }, + { + "epoch": 3.860699191361493, + "grad_norm": 0.03341963514685631, + "learning_rate": 3.0000171082519056e-07, + "loss": 0.0, + "num_input_tokens_seen": 106510288, + "step": 158030 + }, + { + "epoch": 3.8608213421933404, + "grad_norm": 0.002291738986968994, + "learning_rate": 2.999408128402264e-07, + "loss": 0.0, + "num_input_tokens_seen": 106513808, + "step": 158035 + }, + { + "epoch": 3.8609434930251876, + "grad_norm": 0.0023505198769271374, + "learning_rate": 2.9987991994628094e-07, + "loss": 0.0, + "num_input_tokens_seen": 106517008, + "step": 158040 + }, + { + "epoch": 3.8610656438570348, + "grad_norm": 0.0010157792130485177, + "learning_rate": 2.998190321437973e-07, + "loss": 0.0, + "num_input_tokens_seen": 106520144, + "step": 158045 + }, + { + "epoch": 3.861187794688882, + "grad_norm": 0.0016791699454188347, + "learning_rate": 2.9975814943321774e-07, + "loss": 0.0436, + "num_input_tokens_seen": 106523152, + "step": 158050 + }, + { + "epoch": 3.861309945520729, + "grad_norm": 0.0008609518990851939, + "learning_rate": 2.9969727181498563e-07, + "loss": 0.0, + "num_input_tokens_seen": 106526736, + "step": 158055 + }, + { + "epoch": 3.8614320963525763, + "grad_norm": 0.004202486481517553, + "learning_rate": 2.996363992895429e-07, + "loss": 0.0, + "num_input_tokens_seen": 106530256, + "step": 158060 + }, + { + "epoch": 3.861554247184423, + "grad_norm": 0.0281804371625185, + "learning_rate": 2.9957553185733295e-07, + "loss": 0.0, + "num_input_tokens_seen": 106533584, + "step": 158065 + }, + { + "epoch": 3.8616763980162707, + "grad_norm": 0.03088965080678463, + "learning_rate": 2.995146695187979e-07, + "loss": 0.0, + "num_input_tokens_seen": 106536976, + "step": 158070 + }, + { + "epoch": 3.8617985488481175, + "grad_norm": 0.0011702068150043488, + "learning_rate": 2.994538122743806e-07, + "loss": 0.0003, + "num_input_tokens_seen": 106540176, + "step": 158075 + }, + { + "epoch": 3.861920699679965, + "grad_norm": 0.002321459585800767, + "learning_rate": 2.993929601245239e-07, + "loss": 0.0, + "num_input_tokens_seen": 106543312, + "step": 158080 + }, + { + "epoch": 3.862042850511812, + "grad_norm": 0.0071487524546682835, + "learning_rate": 2.993321130696699e-07, + "loss": 0.0, + "num_input_tokens_seen": 106546576, + "step": 158085 + }, + { + "epoch": 3.862165001343659, + "grad_norm": 0.011662092991173267, + "learning_rate": 2.9927127111026094e-07, + "loss": 0.0, + "num_input_tokens_seen": 106549712, + "step": 158090 + }, + { + "epoch": 3.862287152175506, + "grad_norm": 0.029429420828819275, + "learning_rate": 2.992104342467402e-07, + "loss": 0.0, + "num_input_tokens_seen": 106552912, + "step": 158095 + }, + { + "epoch": 3.8624093030073534, + "grad_norm": 0.005462816916406155, + "learning_rate": 2.9914960247954936e-07, + "loss": 0.0, + "num_input_tokens_seen": 106556560, + "step": 158100 + }, + { + "epoch": 3.8625314538392006, + "grad_norm": 0.016585996374487877, + "learning_rate": 2.9908877580913126e-07, + "loss": 0.0, + "num_input_tokens_seen": 106559568, + "step": 158105 + }, + { + "epoch": 3.862653604671048, + "grad_norm": 0.001426746603101492, + "learning_rate": 2.99027954235928e-07, + "loss": 0.0693, + "num_input_tokens_seen": 106563088, + "step": 158110 + }, + { + "epoch": 3.862775755502895, + "grad_norm": 0.0006431220099329948, + "learning_rate": 2.989671377603822e-07, + "loss": 0.0, + "num_input_tokens_seen": 106566800, + "step": 158115 + }, + { + "epoch": 3.862897906334742, + "grad_norm": 0.007045199163258076, + "learning_rate": 2.989063263829357e-07, + "loss": 0.0002, + "num_input_tokens_seen": 106570256, + "step": 158120 + }, + { + "epoch": 3.8630200571665894, + "grad_norm": 0.0010710848728194833, + "learning_rate": 2.9884552010403106e-07, + "loss": 0.0, + "num_input_tokens_seen": 106573520, + "step": 158125 + }, + { + "epoch": 3.8631422079984366, + "grad_norm": 0.008409908041357994, + "learning_rate": 2.987847189241103e-07, + "loss": 0.0, + "num_input_tokens_seen": 106576848, + "step": 158130 + }, + { + "epoch": 3.8632643588302837, + "grad_norm": 0.003941592760384083, + "learning_rate": 2.987239228436156e-07, + "loss": 0.0353, + "num_input_tokens_seen": 106580048, + "step": 158135 + }, + { + "epoch": 3.863386509662131, + "grad_norm": 0.0005290769040584564, + "learning_rate": 2.9866313186298944e-07, + "loss": 0.0775, + "num_input_tokens_seen": 106583632, + "step": 158140 + }, + { + "epoch": 3.863508660493978, + "grad_norm": 28.251291275024414, + "learning_rate": 2.9860234598267333e-07, + "loss": 0.0489, + "num_input_tokens_seen": 106586960, + "step": 158145 + }, + { + "epoch": 3.863630811325825, + "grad_norm": 0.01787254773080349, + "learning_rate": 2.985415652031099e-07, + "loss": 0.0, + "num_input_tokens_seen": 106590096, + "step": 158150 + }, + { + "epoch": 3.8637529621576725, + "grad_norm": 0.006238003261387348, + "learning_rate": 2.9848078952474063e-07, + "loss": 0.0, + "num_input_tokens_seen": 106593232, + "step": 158155 + }, + { + "epoch": 3.8638751129895192, + "grad_norm": 0.002003963105380535, + "learning_rate": 2.984200189480077e-07, + "loss": 0.0, + "num_input_tokens_seen": 106596688, + "step": 158160 + }, + { + "epoch": 3.863997263821367, + "grad_norm": 0.008238325826823711, + "learning_rate": 2.983592534733533e-07, + "loss": 0.0, + "num_input_tokens_seen": 106600272, + "step": 158165 + }, + { + "epoch": 3.8641194146532136, + "grad_norm": 0.007528550922870636, + "learning_rate": 2.98298493101219e-07, + "loss": 0.0258, + "num_input_tokens_seen": 106603472, + "step": 158170 + }, + { + "epoch": 3.864241565485061, + "grad_norm": 0.004292473196983337, + "learning_rate": 2.982377378320471e-07, + "loss": 0.0, + "num_input_tokens_seen": 106606480, + "step": 158175 + }, + { + "epoch": 3.864363716316908, + "grad_norm": 0.014148806221783161, + "learning_rate": 2.981769876662786e-07, + "loss": 0.0001, + "num_input_tokens_seen": 106609936, + "step": 158180 + }, + { + "epoch": 3.864485867148755, + "grad_norm": 0.002944579115137458, + "learning_rate": 2.981162426043563e-07, + "loss": 0.0, + "num_input_tokens_seen": 106613264, + "step": 158185 + }, + { + "epoch": 3.8646080179806024, + "grad_norm": 0.07395872473716736, + "learning_rate": 2.980555026467212e-07, + "loss": 0.0004, + "num_input_tokens_seen": 106616272, + "step": 158190 + }, + { + "epoch": 3.8647301688124496, + "grad_norm": 0.005129363853484392, + "learning_rate": 2.9799476779381547e-07, + "loss": 0.0, + "num_input_tokens_seen": 106619728, + "step": 158195 + }, + { + "epoch": 3.8648523196442968, + "grad_norm": 0.0018878680421039462, + "learning_rate": 2.9793403804608066e-07, + "loss": 0.0, + "num_input_tokens_seen": 106623888, + "step": 158200 + }, + { + "epoch": 3.864974470476144, + "grad_norm": 0.03187033161520958, + "learning_rate": 2.9787331340395807e-07, + "loss": 0.0, + "num_input_tokens_seen": 106627664, + "step": 158205 + }, + { + "epoch": 3.865096621307991, + "grad_norm": 0.0018504534382373095, + "learning_rate": 2.9781259386788984e-07, + "loss": 0.0, + "num_input_tokens_seen": 106631248, + "step": 158210 + }, + { + "epoch": 3.8652187721398383, + "grad_norm": 0.0012075145496055484, + "learning_rate": 2.97751879438317e-07, + "loss": 0.0, + "num_input_tokens_seen": 106634704, + "step": 158215 + }, + { + "epoch": 3.8653409229716855, + "grad_norm": 0.023051664233207703, + "learning_rate": 2.976911701156818e-07, + "loss": 0.001, + "num_input_tokens_seen": 106638416, + "step": 158220 + }, + { + "epoch": 3.8654630738035327, + "grad_norm": 0.0033154578413814306, + "learning_rate": 2.9763046590042487e-07, + "loss": 0.0, + "num_input_tokens_seen": 106641616, + "step": 158225 + }, + { + "epoch": 3.86558522463538, + "grad_norm": 0.001119441818445921, + "learning_rate": 2.9756976679298805e-07, + "loss": 0.0536, + "num_input_tokens_seen": 106644688, + "step": 158230 + }, + { + "epoch": 3.865707375467227, + "grad_norm": 0.0025297042448073626, + "learning_rate": 2.9750907279381333e-07, + "loss": 0.0, + "num_input_tokens_seen": 106648464, + "step": 158235 + }, + { + "epoch": 3.8658295262990743, + "grad_norm": 0.0012945241760462523, + "learning_rate": 2.97448383903341e-07, + "loss": 0.0, + "num_input_tokens_seen": 106651792, + "step": 158240 + }, + { + "epoch": 3.865951677130921, + "grad_norm": 0.08160996437072754, + "learning_rate": 2.973877001220135e-07, + "loss": 0.0002, + "num_input_tokens_seen": 106655440, + "step": 158245 + }, + { + "epoch": 3.8660738279627687, + "grad_norm": 0.0054352604784071445, + "learning_rate": 2.9732702145027136e-07, + "loss": 0.0, + "num_input_tokens_seen": 106658576, + "step": 158250 + }, + { + "epoch": 3.8661959787946154, + "grad_norm": 0.03615583851933479, + "learning_rate": 2.97266347888556e-07, + "loss": 0.0002, + "num_input_tokens_seen": 106662288, + "step": 158255 + }, + { + "epoch": 3.8663181296264626, + "grad_norm": 0.007702614646404982, + "learning_rate": 2.9720567943730913e-07, + "loss": 0.0447, + "num_input_tokens_seen": 106665168, + "step": 158260 + }, + { + "epoch": 3.86644028045831, + "grad_norm": 0.013397863134741783, + "learning_rate": 2.971450160969712e-07, + "loss": 0.0, + "num_input_tokens_seen": 106668112, + "step": 158265 + }, + { + "epoch": 3.866562431290157, + "grad_norm": 0.0030918209813535213, + "learning_rate": 2.9708435786798414e-07, + "loss": 0.0, + "num_input_tokens_seen": 106672208, + "step": 158270 + }, + { + "epoch": 3.866684582122004, + "grad_norm": 0.0047109597362577915, + "learning_rate": 2.970237047507883e-07, + "loss": 0.0, + "num_input_tokens_seen": 106675408, + "step": 158275 + }, + { + "epoch": 3.8668067329538514, + "grad_norm": 0.0012370293261483312, + "learning_rate": 2.9696305674582553e-07, + "loss": 0.0001, + "num_input_tokens_seen": 106678992, + "step": 158280 + }, + { + "epoch": 3.8669288837856985, + "grad_norm": 0.005313628353178501, + "learning_rate": 2.969024138535362e-07, + "loss": 0.0, + "num_input_tokens_seen": 106682384, + "step": 158285 + }, + { + "epoch": 3.8670510346175457, + "grad_norm": 0.000701016397215426, + "learning_rate": 2.96841776074362e-07, + "loss": 0.058, + "num_input_tokens_seen": 106685520, + "step": 158290 + }, + { + "epoch": 3.867173185449393, + "grad_norm": 0.0036516785621643066, + "learning_rate": 2.9678114340874317e-07, + "loss": 0.0, + "num_input_tokens_seen": 106689040, + "step": 158295 + }, + { + "epoch": 3.86729533628124, + "grad_norm": 0.2513877749443054, + "learning_rate": 2.967205158571212e-07, + "loss": 0.0204, + "num_input_tokens_seen": 106692880, + "step": 158300 + }, + { + "epoch": 3.8674174871130873, + "grad_norm": 0.00019543587404768914, + "learning_rate": 2.966598934199369e-07, + "loss": 0.0001, + "num_input_tokens_seen": 106696528, + "step": 158305 + }, + { + "epoch": 3.8675396379449345, + "grad_norm": 0.012953980825841427, + "learning_rate": 2.965992760976308e-07, + "loss": 0.0001, + "num_input_tokens_seen": 106699728, + "step": 158310 + }, + { + "epoch": 3.8676617887767817, + "grad_norm": 0.024555783718824387, + "learning_rate": 2.9653866389064387e-07, + "loss": 0.0, + "num_input_tokens_seen": 106702800, + "step": 158315 + }, + { + "epoch": 3.867783939608629, + "grad_norm": 0.06984036415815353, + "learning_rate": 2.9647805679941726e-07, + "loss": 0.0, + "num_input_tokens_seen": 106705808, + "step": 158320 + }, + { + "epoch": 3.867906090440476, + "grad_norm": 0.005320082418620586, + "learning_rate": 2.9641745482439115e-07, + "loss": 0.0004, + "num_input_tokens_seen": 106709264, + "step": 158325 + }, + { + "epoch": 3.868028241272323, + "grad_norm": 0.03669695928692818, + "learning_rate": 2.9635685796600695e-07, + "loss": 0.0, + "num_input_tokens_seen": 106712336, + "step": 158330 + }, + { + "epoch": 3.8681503921041704, + "grad_norm": 0.0015380196273326874, + "learning_rate": 2.962962662247045e-07, + "loss": 0.0, + "num_input_tokens_seen": 106715600, + "step": 158335 + }, + { + "epoch": 3.868272542936017, + "grad_norm": 0.0006915759877301753, + "learning_rate": 2.962356796009253e-07, + "loss": 0.0, + "num_input_tokens_seen": 106719184, + "step": 158340 + }, + { + "epoch": 3.868394693767865, + "grad_norm": 95.70181274414062, + "learning_rate": 2.961750980951091e-07, + "loss": 0.0771, + "num_input_tokens_seen": 106722512, + "step": 158345 + }, + { + "epoch": 3.8685168445997116, + "grad_norm": 49.37448501586914, + "learning_rate": 2.9611452170769704e-07, + "loss": 0.0329, + "num_input_tokens_seen": 106725584, + "step": 158350 + }, + { + "epoch": 3.8686389954315588, + "grad_norm": 0.009279985912144184, + "learning_rate": 2.960539504391297e-07, + "loss": 0.0001, + "num_input_tokens_seen": 106728784, + "step": 158355 + }, + { + "epoch": 3.868761146263406, + "grad_norm": 0.06280479580163956, + "learning_rate": 2.959933842898471e-07, + "loss": 0.0, + "num_input_tokens_seen": 106731984, + "step": 158360 + }, + { + "epoch": 3.868883297095253, + "grad_norm": 0.0013443909119814634, + "learning_rate": 2.959328232602902e-07, + "loss": 0.0, + "num_input_tokens_seen": 106735312, + "step": 158365 + }, + { + "epoch": 3.8690054479271003, + "grad_norm": 0.0017101116245612502, + "learning_rate": 2.95872267350899e-07, + "loss": 0.0, + "num_input_tokens_seen": 106739024, + "step": 158370 + }, + { + "epoch": 3.8691275987589475, + "grad_norm": 0.0016042448114603758, + "learning_rate": 2.9581171656211423e-07, + "loss": 0.0439, + "num_input_tokens_seen": 106742224, + "step": 158375 + }, + { + "epoch": 3.8692497495907947, + "grad_norm": 0.03807985782623291, + "learning_rate": 2.9575117089437584e-07, + "loss": 0.0, + "num_input_tokens_seen": 106745360, + "step": 158380 + }, + { + "epoch": 3.869371900422642, + "grad_norm": 0.0057759396731853485, + "learning_rate": 2.956906303481244e-07, + "loss": 0.0, + "num_input_tokens_seen": 106748880, + "step": 158385 + }, + { + "epoch": 3.869494051254489, + "grad_norm": 0.003695416497066617, + "learning_rate": 2.956300949238003e-07, + "loss": 0.0, + "num_input_tokens_seen": 106752272, + "step": 158390 + }, + { + "epoch": 3.8696162020863363, + "grad_norm": 0.0005830421578139067, + "learning_rate": 2.955695646218437e-07, + "loss": 0.0, + "num_input_tokens_seen": 106755664, + "step": 158395 + }, + { + "epoch": 3.8697383529181835, + "grad_norm": 0.0017493544146418571, + "learning_rate": 2.9550903944269445e-07, + "loss": 0.0489, + "num_input_tokens_seen": 106758992, + "step": 158400 + }, + { + "epoch": 3.8698605037500307, + "grad_norm": 0.04775483161211014, + "learning_rate": 2.9544851938679314e-07, + "loss": 0.0544, + "num_input_tokens_seen": 106762448, + "step": 158405 + }, + { + "epoch": 3.869982654581878, + "grad_norm": 0.09650695323944092, + "learning_rate": 2.9538800445457946e-07, + "loss": 0.0005, + "num_input_tokens_seen": 106766352, + "step": 158410 + }, + { + "epoch": 3.8701048054137246, + "grad_norm": 0.010348550975322723, + "learning_rate": 2.95327494646494e-07, + "loss": 0.0, + "num_input_tokens_seen": 106769808, + "step": 158415 + }, + { + "epoch": 3.8702269562455722, + "grad_norm": 0.00027812286862172186, + "learning_rate": 2.9526698996297615e-07, + "loss": 0.0536, + "num_input_tokens_seen": 106773264, + "step": 158420 + }, + { + "epoch": 3.870349107077419, + "grad_norm": 0.001886223559267819, + "learning_rate": 2.952064904044668e-07, + "loss": 0.0002, + "num_input_tokens_seen": 106776336, + "step": 158425 + }, + { + "epoch": 3.8704712579092666, + "grad_norm": 0.0028586441185325384, + "learning_rate": 2.951459959714049e-07, + "loss": 0.0, + "num_input_tokens_seen": 106779280, + "step": 158430 + }, + { + "epoch": 3.8705934087411134, + "grad_norm": 0.010470589622855186, + "learning_rate": 2.9508550666423136e-07, + "loss": 0.0, + "num_input_tokens_seen": 106782288, + "step": 158435 + }, + { + "epoch": 3.8707155595729605, + "grad_norm": 0.00042181566823273897, + "learning_rate": 2.9502502248338525e-07, + "loss": 0.0372, + "num_input_tokens_seen": 106785680, + "step": 158440 + }, + { + "epoch": 3.8708377104048077, + "grad_norm": 0.0008364489185623825, + "learning_rate": 2.9496454342930674e-07, + "loss": 0.0, + "num_input_tokens_seen": 106788944, + "step": 158445 + }, + { + "epoch": 3.870959861236655, + "grad_norm": 0.0018208818510174751, + "learning_rate": 2.949040695024361e-07, + "loss": 0.0, + "num_input_tokens_seen": 106792464, + "step": 158450 + }, + { + "epoch": 3.871082012068502, + "grad_norm": 3.7427899837493896, + "learning_rate": 2.9484360070321236e-07, + "loss": 0.0006, + "num_input_tokens_seen": 106795920, + "step": 158455 + }, + { + "epoch": 3.8712041629003493, + "grad_norm": 0.008290299214422703, + "learning_rate": 2.94783137032076e-07, + "loss": 0.0, + "num_input_tokens_seen": 106798928, + "step": 158460 + }, + { + "epoch": 3.8713263137321965, + "grad_norm": 0.0034195426851511, + "learning_rate": 2.94722678489466e-07, + "loss": 0.0427, + "num_input_tokens_seen": 106802256, + "step": 158465 + }, + { + "epoch": 3.8714484645640437, + "grad_norm": 0.00021721194207202643, + "learning_rate": 2.946622250758226e-07, + "loss": 0.0, + "num_input_tokens_seen": 106805584, + "step": 158470 + }, + { + "epoch": 3.871570615395891, + "grad_norm": 0.0020799501799046993, + "learning_rate": 2.9460177679158505e-07, + "loss": 0.0, + "num_input_tokens_seen": 106809232, + "step": 158475 + }, + { + "epoch": 3.871692766227738, + "grad_norm": 0.012277498841285706, + "learning_rate": 2.9454133363719304e-07, + "loss": 0.0, + "num_input_tokens_seen": 106812112, + "step": 158480 + }, + { + "epoch": 3.8718149170595852, + "grad_norm": 0.0012988023227080703, + "learning_rate": 2.944808956130864e-07, + "loss": 0.0489, + "num_input_tokens_seen": 106815952, + "step": 158485 + }, + { + "epoch": 3.8719370678914324, + "grad_norm": 0.005164467729628086, + "learning_rate": 2.944204627197042e-07, + "loss": 0.0003, + "num_input_tokens_seen": 106819088, + "step": 158490 + }, + { + "epoch": 3.8720592187232796, + "grad_norm": 0.003667938755825162, + "learning_rate": 2.9436003495748664e-07, + "loss": 0.0, + "num_input_tokens_seen": 106821968, + "step": 158495 + }, + { + "epoch": 3.872181369555127, + "grad_norm": 0.0016687294701114297, + "learning_rate": 2.942996123268722e-07, + "loss": 0.0, + "num_input_tokens_seen": 106825616, + "step": 158500 + }, + { + "epoch": 3.872303520386974, + "grad_norm": 0.0008459803066216409, + "learning_rate": 2.942391948283012e-07, + "loss": 0.0, + "num_input_tokens_seen": 106829072, + "step": 158505 + }, + { + "epoch": 3.8724256712188208, + "grad_norm": 0.0023603031877428293, + "learning_rate": 2.941787824622125e-07, + "loss": 0.0, + "num_input_tokens_seen": 106832336, + "step": 158510 + }, + { + "epoch": 3.8725478220506684, + "grad_norm": 0.015080592595040798, + "learning_rate": 2.9411837522904536e-07, + "loss": 0.0, + "num_input_tokens_seen": 106835920, + "step": 158515 + }, + { + "epoch": 3.872669972882515, + "grad_norm": 0.009865859523415565, + "learning_rate": 2.940579731292395e-07, + "loss": 0.0, + "num_input_tokens_seen": 106839504, + "step": 158520 + }, + { + "epoch": 3.8727921237143628, + "grad_norm": 248.03933715820312, + "learning_rate": 2.9399757616323363e-07, + "loss": 0.0327, + "num_input_tokens_seen": 106842832, + "step": 158525 + }, + { + "epoch": 3.8729142745462095, + "grad_norm": 0.005308913998305798, + "learning_rate": 2.9393718433146766e-07, + "loss": 0.0, + "num_input_tokens_seen": 106846160, + "step": 158530 + }, + { + "epoch": 3.8730364253780567, + "grad_norm": 0.002870805561542511, + "learning_rate": 2.938767976343799e-07, + "loss": 0.0, + "num_input_tokens_seen": 106849296, + "step": 158535 + }, + { + "epoch": 3.873158576209904, + "grad_norm": 0.0016712587093934417, + "learning_rate": 2.9381641607241014e-07, + "loss": 0.0001, + "num_input_tokens_seen": 106852944, + "step": 158540 + }, + { + "epoch": 3.873280727041751, + "grad_norm": 0.006070550996810198, + "learning_rate": 2.937560396459976e-07, + "loss": 0.0, + "num_input_tokens_seen": 106856720, + "step": 158545 + }, + { + "epoch": 3.8734028778735983, + "grad_norm": 0.00014672131510451436, + "learning_rate": 2.936956683555808e-07, + "loss": 0.0004, + "num_input_tokens_seen": 106859728, + "step": 158550 + }, + { + "epoch": 3.8735250287054455, + "grad_norm": 0.001376955071464181, + "learning_rate": 2.936353022015994e-07, + "loss": 0.0, + "num_input_tokens_seen": 106863376, + "step": 158555 + }, + { + "epoch": 3.8736471795372927, + "grad_norm": 0.0015214919112622738, + "learning_rate": 2.935749411844918e-07, + "loss": 0.0, + "num_input_tokens_seen": 106866576, + "step": 158560 + }, + { + "epoch": 3.87376933036914, + "grad_norm": 0.0040117776952683926, + "learning_rate": 2.9351458530469707e-07, + "loss": 0.0, + "num_input_tokens_seen": 106869904, + "step": 158565 + }, + { + "epoch": 3.873891481200987, + "grad_norm": 0.0029242250602692366, + "learning_rate": 2.9345423456265474e-07, + "loss": 0.0325, + "num_input_tokens_seen": 106873232, + "step": 158570 + }, + { + "epoch": 3.874013632032834, + "grad_norm": 0.0031731261406093836, + "learning_rate": 2.933938889588029e-07, + "loss": 0.0, + "num_input_tokens_seen": 106876688, + "step": 158575 + }, + { + "epoch": 3.8741357828646814, + "grad_norm": 0.0031085580121725798, + "learning_rate": 2.933335484935812e-07, + "loss": 0.0, + "num_input_tokens_seen": 106879952, + "step": 158580 + }, + { + "epoch": 3.8742579336965286, + "grad_norm": 0.0030465181916952133, + "learning_rate": 2.932732131674275e-07, + "loss": 0.0953, + "num_input_tokens_seen": 106883600, + "step": 158585 + }, + { + "epoch": 3.874380084528376, + "grad_norm": 0.0008837351924739778, + "learning_rate": 2.932128829807815e-07, + "loss": 0.0, + "num_input_tokens_seen": 106886864, + "step": 158590 + }, + { + "epoch": 3.8745022353602225, + "grad_norm": 0.01298034843057394, + "learning_rate": 2.931525579340811e-07, + "loss": 0.0524, + "num_input_tokens_seen": 106890320, + "step": 158595 + }, + { + "epoch": 3.87462438619207, + "grad_norm": 0.0018864242592826486, + "learning_rate": 2.9309223802776585e-07, + "loss": 0.0, + "num_input_tokens_seen": 106894096, + "step": 158600 + }, + { + "epoch": 3.874746537023917, + "grad_norm": 0.0011013778857886791, + "learning_rate": 2.9303192326227365e-07, + "loss": 0.0, + "num_input_tokens_seen": 106898000, + "step": 158605 + }, + { + "epoch": 3.8748686878557645, + "grad_norm": 0.008215600624680519, + "learning_rate": 2.929716136380438e-07, + "loss": 0.0004, + "num_input_tokens_seen": 106901008, + "step": 158610 + }, + { + "epoch": 3.8749908386876113, + "grad_norm": 0.0011877411743625998, + "learning_rate": 2.9291130915551443e-07, + "loss": 0.0, + "num_input_tokens_seen": 106904144, + "step": 158615 + }, + { + "epoch": 3.8751129895194585, + "grad_norm": 0.004192714113742113, + "learning_rate": 2.928510098151239e-07, + "loss": 0.0, + "num_input_tokens_seen": 106907536, + "step": 158620 + }, + { + "epoch": 3.8752351403513057, + "grad_norm": 0.0015647343825548887, + "learning_rate": 2.927907156173114e-07, + "loss": 0.0001, + "num_input_tokens_seen": 106911184, + "step": 158625 + }, + { + "epoch": 3.875357291183153, + "grad_norm": 0.005396246910095215, + "learning_rate": 2.927304265625148e-07, + "loss": 0.003, + "num_input_tokens_seen": 106914320, + "step": 158630 + }, + { + "epoch": 3.875479442015, + "grad_norm": 0.005837199278175831, + "learning_rate": 2.9267014265117264e-07, + "loss": 0.0, + "num_input_tokens_seen": 106917776, + "step": 158635 + }, + { + "epoch": 3.8756015928468472, + "grad_norm": 0.003539950354024768, + "learning_rate": 2.9260986388372377e-07, + "loss": 0.0, + "num_input_tokens_seen": 106921680, + "step": 158640 + }, + { + "epoch": 3.8757237436786944, + "grad_norm": 0.01194003690034151, + "learning_rate": 2.92549590260606e-07, + "loss": 0.0001, + "num_input_tokens_seen": 106924816, + "step": 158645 + }, + { + "epoch": 3.8758458945105416, + "grad_norm": 0.0017153606750071049, + "learning_rate": 2.9248932178225813e-07, + "loss": 0.0, + "num_input_tokens_seen": 106927696, + "step": 158650 + }, + { + "epoch": 3.875968045342389, + "grad_norm": 0.00042412380571477115, + "learning_rate": 2.9242905844911794e-07, + "loss": 0.0002, + "num_input_tokens_seen": 106931024, + "step": 158655 + }, + { + "epoch": 3.876090196174236, + "grad_norm": 0.03366420045495033, + "learning_rate": 2.923688002616239e-07, + "loss": 0.0001, + "num_input_tokens_seen": 106934160, + "step": 158660 + }, + { + "epoch": 3.876212347006083, + "grad_norm": 0.004773721564561129, + "learning_rate": 2.9230854722021456e-07, + "loss": 0.0002, + "num_input_tokens_seen": 106937360, + "step": 158665 + }, + { + "epoch": 3.8763344978379304, + "grad_norm": 0.0011197905987501144, + "learning_rate": 2.922482993253277e-07, + "loss": 0.0001, + "num_input_tokens_seen": 106940624, + "step": 158670 + }, + { + "epoch": 3.8764566486697776, + "grad_norm": 0.006437378935515881, + "learning_rate": 2.921880565774016e-07, + "loss": 0.0, + "num_input_tokens_seen": 106944080, + "step": 158675 + }, + { + "epoch": 3.8765787995016248, + "grad_norm": 0.0029959857929497957, + "learning_rate": 2.9212781897687424e-07, + "loss": 0.0, + "num_input_tokens_seen": 106947408, + "step": 158680 + }, + { + "epoch": 3.876700950333472, + "grad_norm": 0.03182037174701691, + "learning_rate": 2.920675865241841e-07, + "loss": 0.0, + "num_input_tokens_seen": 106951312, + "step": 158685 + }, + { + "epoch": 3.8768231011653187, + "grad_norm": 0.015112731605768204, + "learning_rate": 2.920073592197684e-07, + "loss": 0.0002, + "num_input_tokens_seen": 106954576, + "step": 158690 + }, + { + "epoch": 3.8769452519971663, + "grad_norm": 0.0007206939626485109, + "learning_rate": 2.919471370640657e-07, + "loss": 0.0002, + "num_input_tokens_seen": 106957776, + "step": 158695 + }, + { + "epoch": 3.877067402829013, + "grad_norm": 0.0029561310075223446, + "learning_rate": 2.918869200575141e-07, + "loss": 0.0444, + "num_input_tokens_seen": 106961488, + "step": 158700 + }, + { + "epoch": 3.8771895536608603, + "grad_norm": 0.0027475692331790924, + "learning_rate": 2.918267082005513e-07, + "loss": 0.0, + "num_input_tokens_seen": 106964944, + "step": 158705 + }, + { + "epoch": 3.8773117044927075, + "grad_norm": 0.0017425743862986565, + "learning_rate": 2.9176650149361495e-07, + "loss": 0.0, + "num_input_tokens_seen": 106968208, + "step": 158710 + }, + { + "epoch": 3.8774338553245546, + "grad_norm": 0.008587181568145752, + "learning_rate": 2.9170629993714336e-07, + "loss": 0.0, + "num_input_tokens_seen": 106971216, + "step": 158715 + }, + { + "epoch": 3.877556006156402, + "grad_norm": 0.21515829861164093, + "learning_rate": 2.9164610353157373e-07, + "loss": 0.0001, + "num_input_tokens_seen": 106974672, + "step": 158720 + }, + { + "epoch": 3.877678156988249, + "grad_norm": 0.043689239770174026, + "learning_rate": 2.915859122773444e-07, + "loss": 0.0, + "num_input_tokens_seen": 106977744, + "step": 158725 + }, + { + "epoch": 3.877800307820096, + "grad_norm": 0.010470571927726269, + "learning_rate": 2.915257261748927e-07, + "loss": 0.017, + "num_input_tokens_seen": 106981392, + "step": 158730 + }, + { + "epoch": 3.8779224586519434, + "grad_norm": 0.002375473501160741, + "learning_rate": 2.9146554522465674e-07, + "loss": 0.0, + "num_input_tokens_seen": 106984784, + "step": 158735 + }, + { + "epoch": 3.8780446094837906, + "grad_norm": 0.12363884598016739, + "learning_rate": 2.914053694270735e-07, + "loss": 0.0002, + "num_input_tokens_seen": 106988432, + "step": 158740 + }, + { + "epoch": 3.878166760315638, + "grad_norm": 0.09494874626398087, + "learning_rate": 2.9134519878258133e-07, + "loss": 0.0001, + "num_input_tokens_seen": 106992080, + "step": 158745 + }, + { + "epoch": 3.878288911147485, + "grad_norm": 0.005724775139242411, + "learning_rate": 2.9128503329161724e-07, + "loss": 0.0, + "num_input_tokens_seen": 106995792, + "step": 158750 + }, + { + "epoch": 3.878411061979332, + "grad_norm": 0.018899552524089813, + "learning_rate": 2.912248729546191e-07, + "loss": 0.0, + "num_input_tokens_seen": 106999184, + "step": 158755 + }, + { + "epoch": 3.8785332128111794, + "grad_norm": 0.35931485891342163, + "learning_rate": 2.9116471777202445e-07, + "loss": 0.0623, + "num_input_tokens_seen": 107002128, + "step": 158760 + }, + { + "epoch": 3.8786553636430265, + "grad_norm": 0.3836727738380432, + "learning_rate": 2.911045677442704e-07, + "loss": 0.0002, + "num_input_tokens_seen": 107005840, + "step": 158765 + }, + { + "epoch": 3.8787775144748737, + "grad_norm": 0.08484736829996109, + "learning_rate": 2.910444228717949e-07, + "loss": 0.0, + "num_input_tokens_seen": 107008784, + "step": 158770 + }, + { + "epoch": 3.8788996653067205, + "grad_norm": 0.02943931147456169, + "learning_rate": 2.9098428315503466e-07, + "loss": 0.0001, + "num_input_tokens_seen": 107011728, + "step": 158775 + }, + { + "epoch": 3.879021816138568, + "grad_norm": 0.008347810246050358, + "learning_rate": 2.9092414859442784e-07, + "loss": 0.0, + "num_input_tokens_seen": 107015056, + "step": 158780 + }, + { + "epoch": 3.879143966970415, + "grad_norm": 0.003075164509937167, + "learning_rate": 2.908640191904109e-07, + "loss": 0.0001, + "num_input_tokens_seen": 107018128, + "step": 158785 + }, + { + "epoch": 3.8792661178022625, + "grad_norm": 0.0002224208292318508, + "learning_rate": 2.908038949434216e-07, + "loss": 0.0, + "num_input_tokens_seen": 107021200, + "step": 158790 + }, + { + "epoch": 3.8793882686341092, + "grad_norm": 45.61159896850586, + "learning_rate": 2.907437758538975e-07, + "loss": 0.07, + "num_input_tokens_seen": 107024400, + "step": 158795 + }, + { + "epoch": 3.8795104194659564, + "grad_norm": 0.00022618890216108412, + "learning_rate": 2.906836619222751e-07, + "loss": 0.0001, + "num_input_tokens_seen": 107028048, + "step": 158800 + }, + { + "epoch": 3.8796325702978036, + "grad_norm": 0.0012848754413425922, + "learning_rate": 2.906235531489921e-07, + "loss": 0.0, + "num_input_tokens_seen": 107031184, + "step": 158805 + }, + { + "epoch": 3.879754721129651, + "grad_norm": 0.0028317791875451803, + "learning_rate": 2.905634495344853e-07, + "loss": 0.0, + "num_input_tokens_seen": 107034640, + "step": 158810 + }, + { + "epoch": 3.879876871961498, + "grad_norm": 0.005218177102506161, + "learning_rate": 2.905033510791921e-07, + "loss": 0.0, + "num_input_tokens_seen": 107037776, + "step": 158815 + }, + { + "epoch": 3.879999022793345, + "grad_norm": 0.0034669225569814444, + "learning_rate": 2.9044325778354937e-07, + "loss": 0.0, + "num_input_tokens_seen": 107040976, + "step": 158820 + }, + { + "epoch": 3.8801211736251924, + "grad_norm": 0.018067175522446632, + "learning_rate": 2.903831696479938e-07, + "loss": 0.0, + "num_input_tokens_seen": 107044496, + "step": 158825 + }, + { + "epoch": 3.8802433244570396, + "grad_norm": 0.0030394871719181538, + "learning_rate": 2.90323086672963e-07, + "loss": 0.0, + "num_input_tokens_seen": 107047696, + "step": 158830 + }, + { + "epoch": 3.8803654752888868, + "grad_norm": 0.00258993124589324, + "learning_rate": 2.9026300885889333e-07, + "loss": 0.0, + "num_input_tokens_seen": 107050704, + "step": 158835 + }, + { + "epoch": 3.880487626120734, + "grad_norm": 0.0005346864345483482, + "learning_rate": 2.9020293620622214e-07, + "loss": 0.0631, + "num_input_tokens_seen": 107053968, + "step": 158840 + }, + { + "epoch": 3.880609776952581, + "grad_norm": 0.0007554969051852822, + "learning_rate": 2.90142868715386e-07, + "loss": 0.0, + "num_input_tokens_seen": 107057104, + "step": 158845 + }, + { + "epoch": 3.8807319277844283, + "grad_norm": 0.0022322291042655706, + "learning_rate": 2.900828063868216e-07, + "loss": 0.0325, + "num_input_tokens_seen": 107060368, + "step": 158850 + }, + { + "epoch": 3.8808540786162755, + "grad_norm": 6.525689968839288e-05, + "learning_rate": 2.9002274922096646e-07, + "loss": 0.0, + "num_input_tokens_seen": 107063888, + "step": 158855 + }, + { + "epoch": 3.8809762294481227, + "grad_norm": 0.00232454901561141, + "learning_rate": 2.899626972182565e-07, + "loss": 0.0, + "num_input_tokens_seen": 107067344, + "step": 158860 + }, + { + "epoch": 3.88109838027997, + "grad_norm": 0.01140713132917881, + "learning_rate": 2.899026503791291e-07, + "loss": 0.0, + "num_input_tokens_seen": 107070608, + "step": 158865 + }, + { + "epoch": 3.8812205311118166, + "grad_norm": 0.00026834936579689384, + "learning_rate": 2.898426087040203e-07, + "loss": 0.0, + "num_input_tokens_seen": 107073872, + "step": 158870 + }, + { + "epoch": 3.8813426819436643, + "grad_norm": 0.0015741335228085518, + "learning_rate": 2.89782572193367e-07, + "loss": 0.0, + "num_input_tokens_seen": 107077456, + "step": 158875 + }, + { + "epoch": 3.881464832775511, + "grad_norm": 0.02891607955098152, + "learning_rate": 2.8972254084760626e-07, + "loss": 0.0, + "num_input_tokens_seen": 107080784, + "step": 158880 + }, + { + "epoch": 3.881586983607358, + "grad_norm": 0.012207439169287682, + "learning_rate": 2.8966251466717395e-07, + "loss": 0.0366, + "num_input_tokens_seen": 107083728, + "step": 158885 + }, + { + "epoch": 3.8817091344392054, + "grad_norm": 0.6531633734703064, + "learning_rate": 2.896024936525071e-07, + "loss": 0.0002, + "num_input_tokens_seen": 107086800, + "step": 158890 + }, + { + "epoch": 3.8818312852710526, + "grad_norm": 0.007391483057290316, + "learning_rate": 2.895424778040417e-07, + "loss": 0.0001, + "num_input_tokens_seen": 107091344, + "step": 158895 + }, + { + "epoch": 3.8819534361029, + "grad_norm": 0.000366843567462638, + "learning_rate": 2.894824671222149e-07, + "loss": 0.0, + "num_input_tokens_seen": 107094864, + "step": 158900 + }, + { + "epoch": 3.882075586934747, + "grad_norm": 0.0009734971099533141, + "learning_rate": 2.894224616074623e-07, + "loss": 0.0, + "num_input_tokens_seen": 107098320, + "step": 158905 + }, + { + "epoch": 3.882197737766594, + "grad_norm": 0.019451703876256943, + "learning_rate": 2.89362461260221e-07, + "loss": 0.0001, + "num_input_tokens_seen": 107101264, + "step": 158910 + }, + { + "epoch": 3.8823198885984413, + "grad_norm": 0.012309051118791103, + "learning_rate": 2.893024660809268e-07, + "loss": 0.0, + "num_input_tokens_seen": 107104720, + "step": 158915 + }, + { + "epoch": 3.8824420394302885, + "grad_norm": 0.0053732809610664845, + "learning_rate": 2.892424760700164e-07, + "loss": 0.0, + "num_input_tokens_seen": 107108112, + "step": 158920 + }, + { + "epoch": 3.8825641902621357, + "grad_norm": 0.0009634348680265248, + "learning_rate": 2.89182491227926e-07, + "loss": 0.0, + "num_input_tokens_seen": 107111312, + "step": 158925 + }, + { + "epoch": 3.882686341093983, + "grad_norm": 0.029144437983632088, + "learning_rate": 2.891225115550914e-07, + "loss": 0.0, + "num_input_tokens_seen": 107115280, + "step": 158930 + }, + { + "epoch": 3.88280849192583, + "grad_norm": 41.72419357299805, + "learning_rate": 2.890625370519493e-07, + "loss": 0.0838, + "num_input_tokens_seen": 107118608, + "step": 158935 + }, + { + "epoch": 3.8829306427576773, + "grad_norm": 0.0015868102200329304, + "learning_rate": 2.8900256771893536e-07, + "loss": 0.0, + "num_input_tokens_seen": 107121616, + "step": 158940 + }, + { + "epoch": 3.8830527935895245, + "grad_norm": 0.006282476708292961, + "learning_rate": 2.8894260355648605e-07, + "loss": 0.0378, + "num_input_tokens_seen": 107124624, + "step": 158945 + }, + { + "epoch": 3.8831749444213717, + "grad_norm": 0.0006471476517617702, + "learning_rate": 2.888826445650376e-07, + "loss": 0.0, + "num_input_tokens_seen": 107127760, + "step": 158950 + }, + { + "epoch": 3.8832970952532184, + "grad_norm": 0.005045460537075996, + "learning_rate": 2.8882269074502565e-07, + "loss": 0.0, + "num_input_tokens_seen": 107130960, + "step": 158955 + }, + { + "epoch": 3.883419246085066, + "grad_norm": 0.002895316807553172, + "learning_rate": 2.887627420968867e-07, + "loss": 0.0, + "num_input_tokens_seen": 107134096, + "step": 158960 + }, + { + "epoch": 3.883541396916913, + "grad_norm": 0.30166134238243103, + "learning_rate": 2.8870279862105596e-07, + "loss": 0.0, + "num_input_tokens_seen": 107137360, + "step": 158965 + }, + { + "epoch": 3.8836635477487604, + "grad_norm": 0.16179952025413513, + "learning_rate": 2.886428603179698e-07, + "loss": 0.0, + "num_input_tokens_seen": 107140496, + "step": 158970 + }, + { + "epoch": 3.883785698580607, + "grad_norm": 0.0012596636079251766, + "learning_rate": 2.8858292718806443e-07, + "loss": 0.0, + "num_input_tokens_seen": 107143952, + "step": 158975 + }, + { + "epoch": 3.8839078494124544, + "grad_norm": 0.03619730472564697, + "learning_rate": 2.88522999231775e-07, + "loss": 0.0, + "num_input_tokens_seen": 107147664, + "step": 158980 + }, + { + "epoch": 3.8840300002443016, + "grad_norm": 0.001989589538425207, + "learning_rate": 2.8846307644953803e-07, + "loss": 0.0, + "num_input_tokens_seen": 107151376, + "step": 158985 + }, + { + "epoch": 3.8841521510761488, + "grad_norm": 0.0026612537913024426, + "learning_rate": 2.884031588417887e-07, + "loss": 0.0, + "num_input_tokens_seen": 107154832, + "step": 158990 + }, + { + "epoch": 3.884274301907996, + "grad_norm": 0.0006597295869141817, + "learning_rate": 2.8834324640896325e-07, + "loss": 0.0, + "num_input_tokens_seen": 107157904, + "step": 158995 + }, + { + "epoch": 3.884396452739843, + "grad_norm": 0.0057218982838094234, + "learning_rate": 2.8828333915149674e-07, + "loss": 0.0, + "num_input_tokens_seen": 107161296, + "step": 159000 + }, + { + "epoch": 3.8845186035716903, + "grad_norm": 0.00010775305418064818, + "learning_rate": 2.882234370698253e-07, + "loss": 0.0, + "num_input_tokens_seen": 107164624, + "step": 159005 + }, + { + "epoch": 3.8846407544035375, + "grad_norm": 0.0007086143596097827, + "learning_rate": 2.8816354016438483e-07, + "loss": 0.0, + "num_input_tokens_seen": 107168528, + "step": 159010 + }, + { + "epoch": 3.8847629052353847, + "grad_norm": 0.007450388744473457, + "learning_rate": 2.881036484356104e-07, + "loss": 0.0, + "num_input_tokens_seen": 107171856, + "step": 159015 + }, + { + "epoch": 3.884885056067232, + "grad_norm": 0.220229834318161, + "learning_rate": 2.8804376188393755e-07, + "loss": 0.0001, + "num_input_tokens_seen": 107174992, + "step": 159020 + }, + { + "epoch": 3.885007206899079, + "grad_norm": 0.000642230617813766, + "learning_rate": 2.8798388050980225e-07, + "loss": 0.0, + "num_input_tokens_seen": 107178384, + "step": 159025 + }, + { + "epoch": 3.8851293577309263, + "grad_norm": 0.03365645557641983, + "learning_rate": 2.8792400431363927e-07, + "loss": 0.0001, + "num_input_tokens_seen": 107181776, + "step": 159030 + }, + { + "epoch": 3.8852515085627735, + "grad_norm": 0.0017283897614106536, + "learning_rate": 2.8786413329588486e-07, + "loss": 0.0, + "num_input_tokens_seen": 107184976, + "step": 159035 + }, + { + "epoch": 3.88537365939462, + "grad_norm": 0.00508853467181325, + "learning_rate": 2.8780426745697374e-07, + "loss": 0.0, + "num_input_tokens_seen": 107188816, + "step": 159040 + }, + { + "epoch": 3.885495810226468, + "grad_norm": 0.003997340332716703, + "learning_rate": 2.877444067973418e-07, + "loss": 0.0, + "num_input_tokens_seen": 107192208, + "step": 159045 + }, + { + "epoch": 3.8856179610583146, + "grad_norm": 28.383333206176758, + "learning_rate": 2.8768455131742373e-07, + "loss": 0.0566, + "num_input_tokens_seen": 107195728, + "step": 159050 + }, + { + "epoch": 3.885740111890162, + "grad_norm": 0.01382320187985897, + "learning_rate": 2.876247010176556e-07, + "loss": 0.0353, + "num_input_tokens_seen": 107199120, + "step": 159055 + }, + { + "epoch": 3.885862262722009, + "grad_norm": 0.002845111768692732, + "learning_rate": 2.875648558984719e-07, + "loss": 0.0, + "num_input_tokens_seen": 107202256, + "step": 159060 + }, + { + "epoch": 3.885984413553856, + "grad_norm": 0.001575396629050374, + "learning_rate": 2.875050159603082e-07, + "loss": 0.0, + "num_input_tokens_seen": 107205520, + "step": 159065 + }, + { + "epoch": 3.8861065643857033, + "grad_norm": 90.33045959472656, + "learning_rate": 2.8744518120359997e-07, + "loss": 0.0253, + "num_input_tokens_seen": 107208848, + "step": 159070 + }, + { + "epoch": 3.8862287152175505, + "grad_norm": 0.0005721577326767147, + "learning_rate": 2.8738535162878173e-07, + "loss": 0.0, + "num_input_tokens_seen": 107211984, + "step": 159075 + }, + { + "epoch": 3.8863508660493977, + "grad_norm": 0.0007707910262979567, + "learning_rate": 2.873255272362891e-07, + "loss": 0.0002, + "num_input_tokens_seen": 107215504, + "step": 159080 + }, + { + "epoch": 3.886473016881245, + "grad_norm": 0.0014013528125360608, + "learning_rate": 2.872657080265567e-07, + "loss": 0.0001, + "num_input_tokens_seen": 107218576, + "step": 159085 + }, + { + "epoch": 3.886595167713092, + "grad_norm": 0.0005075766239315271, + "learning_rate": 2.8720589400002014e-07, + "loss": 0.0005, + "num_input_tokens_seen": 107221584, + "step": 159090 + }, + { + "epoch": 3.8867173185449393, + "grad_norm": 0.0056504616513848305, + "learning_rate": 2.871460851571137e-07, + "loss": 0.0, + "num_input_tokens_seen": 107225104, + "step": 159095 + }, + { + "epoch": 3.8868394693767865, + "grad_norm": 0.0012615727027878165, + "learning_rate": 2.870862814982726e-07, + "loss": 0.0, + "num_input_tokens_seen": 107228944, + "step": 159100 + }, + { + "epoch": 3.8869616202086337, + "grad_norm": 0.002773595042526722, + "learning_rate": 2.8702648302393217e-07, + "loss": 0.0, + "num_input_tokens_seen": 107232272, + "step": 159105 + }, + { + "epoch": 3.887083771040481, + "grad_norm": 0.013990364968776703, + "learning_rate": 2.869666897345265e-07, + "loss": 0.0, + "num_input_tokens_seen": 107235920, + "step": 159110 + }, + { + "epoch": 3.887205921872328, + "grad_norm": 0.002354390686377883, + "learning_rate": 2.8690690163049136e-07, + "loss": 0.0, + "num_input_tokens_seen": 107238992, + "step": 159115 + }, + { + "epoch": 3.8873280727041752, + "grad_norm": 0.0010478426702320576, + "learning_rate": 2.868471187122606e-07, + "loss": 0.0, + "num_input_tokens_seen": 107242832, + "step": 159120 + }, + { + "epoch": 3.8874502235360224, + "grad_norm": 0.0021144491620361805, + "learning_rate": 2.8678734098026967e-07, + "loss": 0.0001, + "num_input_tokens_seen": 107245904, + "step": 159125 + }, + { + "epoch": 3.8875723743678696, + "grad_norm": 0.0012400305131450295, + "learning_rate": 2.8672756843495316e-07, + "loss": 0.0, + "num_input_tokens_seen": 107249104, + "step": 159130 + }, + { + "epoch": 3.8876945251997164, + "grad_norm": 0.0015789041062816978, + "learning_rate": 2.8666780107674524e-07, + "loss": 0.0002, + "num_input_tokens_seen": 107252112, + "step": 159135 + }, + { + "epoch": 3.887816676031564, + "grad_norm": 0.0018158305902034044, + "learning_rate": 2.8660803890608123e-07, + "loss": 0.0, + "num_input_tokens_seen": 107255440, + "step": 159140 + }, + { + "epoch": 3.8879388268634107, + "grad_norm": 0.008747609332203865, + "learning_rate": 2.865482819233951e-07, + "loss": 0.0001, + "num_input_tokens_seen": 107258448, + "step": 159145 + }, + { + "epoch": 3.8880609776952584, + "grad_norm": 0.0015083423350006342, + "learning_rate": 2.864885301291221e-07, + "loss": 0.0534, + "num_input_tokens_seen": 107261520, + "step": 159150 + }, + { + "epoch": 3.888183128527105, + "grad_norm": 0.00502125546336174, + "learning_rate": 2.8642878352369616e-07, + "loss": 0.0, + "num_input_tokens_seen": 107264784, + "step": 159155 + }, + { + "epoch": 3.8883052793589523, + "grad_norm": 0.002522894414141774, + "learning_rate": 2.8636904210755196e-07, + "loss": 0.1633, + "num_input_tokens_seen": 107267984, + "step": 159160 + }, + { + "epoch": 3.8884274301907995, + "grad_norm": 0.0046274131163954735, + "learning_rate": 2.8630930588112443e-07, + "loss": 0.0, + "num_input_tokens_seen": 107271312, + "step": 159165 + }, + { + "epoch": 3.8885495810226467, + "grad_norm": 0.884371817111969, + "learning_rate": 2.8624957484484723e-07, + "loss": 0.0002, + "num_input_tokens_seen": 107274512, + "step": 159170 + }, + { + "epoch": 3.888671731854494, + "grad_norm": 0.0001581492106197402, + "learning_rate": 2.8618984899915533e-07, + "loss": 0.0, + "num_input_tokens_seen": 107278288, + "step": 159175 + }, + { + "epoch": 3.888793882686341, + "grad_norm": 0.0011975031811743975, + "learning_rate": 2.861301283444827e-07, + "loss": 0.0, + "num_input_tokens_seen": 107281808, + "step": 159180 + }, + { + "epoch": 3.8889160335181883, + "grad_norm": 0.002455994253978133, + "learning_rate": 2.8607041288126396e-07, + "loss": 0.0, + "num_input_tokens_seen": 107285200, + "step": 159185 + }, + { + "epoch": 3.8890381843500355, + "grad_norm": 0.04356267303228378, + "learning_rate": 2.8601070260993287e-07, + "loss": 0.0, + "num_input_tokens_seen": 107288272, + "step": 159190 + }, + { + "epoch": 3.8891603351818826, + "grad_norm": 0.01476934365928173, + "learning_rate": 2.859509975309241e-07, + "loss": 0.0, + "num_input_tokens_seen": 107291472, + "step": 159195 + }, + { + "epoch": 3.88928248601373, + "grad_norm": 0.0009058124851435423, + "learning_rate": 2.8589129764467203e-07, + "loss": 0.0, + "num_input_tokens_seen": 107294736, + "step": 159200 + }, + { + "epoch": 3.889404636845577, + "grad_norm": 0.0037373830564320087, + "learning_rate": 2.858316029516101e-07, + "loss": 0.0245, + "num_input_tokens_seen": 107298128, + "step": 159205 + }, + { + "epoch": 3.889526787677424, + "grad_norm": 0.11109024286270142, + "learning_rate": 2.8577191345217324e-07, + "loss": 0.0, + "num_input_tokens_seen": 107302096, + "step": 159210 + }, + { + "epoch": 3.8896489385092714, + "grad_norm": 0.017915405333042145, + "learning_rate": 2.857122291467948e-07, + "loss": 0.0, + "num_input_tokens_seen": 107305296, + "step": 159215 + }, + { + "epoch": 3.889771089341118, + "grad_norm": 0.0032476552296429873, + "learning_rate": 2.856525500359095e-07, + "loss": 0.0001, + "num_input_tokens_seen": 107309136, + "step": 159220 + }, + { + "epoch": 3.889893240172966, + "grad_norm": 0.09382815659046173, + "learning_rate": 2.855928761199505e-07, + "loss": 0.0, + "num_input_tokens_seen": 107312272, + "step": 159225 + }, + { + "epoch": 3.8900153910048125, + "grad_norm": 0.025482621043920517, + "learning_rate": 2.855332073993528e-07, + "loss": 0.0, + "num_input_tokens_seen": 107315408, + "step": 159230 + }, + { + "epoch": 3.89013754183666, + "grad_norm": 0.028035694733262062, + "learning_rate": 2.854735438745497e-07, + "loss": 0.0, + "num_input_tokens_seen": 107318608, + "step": 159235 + }, + { + "epoch": 3.890259692668507, + "grad_norm": 0.016784196719527245, + "learning_rate": 2.854138855459748e-07, + "loss": 0.0, + "num_input_tokens_seen": 107321936, + "step": 159240 + }, + { + "epoch": 3.890381843500354, + "grad_norm": 0.030693599954247475, + "learning_rate": 2.8535423241406274e-07, + "loss": 0.0, + "num_input_tokens_seen": 107324880, + "step": 159245 + }, + { + "epoch": 3.8905039943322013, + "grad_norm": 0.0026803172659128904, + "learning_rate": 2.8529458447924646e-07, + "loss": 0.0, + "num_input_tokens_seen": 107328912, + "step": 159250 + }, + { + "epoch": 3.8906261451640485, + "grad_norm": 0.0023972035851329565, + "learning_rate": 2.852349417419604e-07, + "loss": 0.0204, + "num_input_tokens_seen": 107332048, + "step": 159255 + }, + { + "epoch": 3.8907482959958957, + "grad_norm": 0.0074956901371479034, + "learning_rate": 2.8517530420263826e-07, + "loss": 0.0, + "num_input_tokens_seen": 107335248, + "step": 159260 + }, + { + "epoch": 3.890870446827743, + "grad_norm": 0.001000861986540258, + "learning_rate": 2.8511567186171327e-07, + "loss": 0.0, + "num_input_tokens_seen": 107338256, + "step": 159265 + }, + { + "epoch": 3.89099259765959, + "grad_norm": 0.018955791369080544, + "learning_rate": 2.8505604471961975e-07, + "loss": 0.0001, + "num_input_tokens_seen": 107341840, + "step": 159270 + }, + { + "epoch": 3.8911147484914372, + "grad_norm": 0.02058357745409012, + "learning_rate": 2.849964227767906e-07, + "loss": 0.0, + "num_input_tokens_seen": 107345424, + "step": 159275 + }, + { + "epoch": 3.8912368993232844, + "grad_norm": 0.004790999460965395, + "learning_rate": 2.849368060336599e-07, + "loss": 0.0001, + "num_input_tokens_seen": 107348432, + "step": 159280 + }, + { + "epoch": 3.8913590501551316, + "grad_norm": 0.03545403480529785, + "learning_rate": 2.848771944906613e-07, + "loss": 0.1123, + "num_input_tokens_seen": 107352720, + "step": 159285 + }, + { + "epoch": 3.891481200986979, + "grad_norm": 0.0004981185775250196, + "learning_rate": 2.8481758814822777e-07, + "loss": 0.0, + "num_input_tokens_seen": 107355792, + "step": 159290 + }, + { + "epoch": 3.891603351818826, + "grad_norm": 0.0007688974146731198, + "learning_rate": 2.8475798700679344e-07, + "loss": 0.0, + "num_input_tokens_seen": 107359568, + "step": 159295 + }, + { + "epoch": 3.891725502650673, + "grad_norm": 0.00931483879685402, + "learning_rate": 2.846983910667911e-07, + "loss": 0.0, + "num_input_tokens_seen": 107362896, + "step": 159300 + }, + { + "epoch": 3.8918476534825204, + "grad_norm": 0.0016654456267133355, + "learning_rate": 2.8463880032865463e-07, + "loss": 0.0588, + "num_input_tokens_seen": 107366544, + "step": 159305 + }, + { + "epoch": 3.8919698043143676, + "grad_norm": 0.002245159586891532, + "learning_rate": 2.84579214792817e-07, + "loss": 0.0, + "num_input_tokens_seen": 107369616, + "step": 159310 + }, + { + "epoch": 3.8920919551462143, + "grad_norm": 0.0015120379393920302, + "learning_rate": 2.8451963445971184e-07, + "loss": 0.0002, + "num_input_tokens_seen": 107373136, + "step": 159315 + }, + { + "epoch": 3.892214105978062, + "grad_norm": 0.008667359128594398, + "learning_rate": 2.844600593297726e-07, + "loss": 0.0, + "num_input_tokens_seen": 107376400, + "step": 159320 + }, + { + "epoch": 3.8923362568099087, + "grad_norm": 0.0029568129684776068, + "learning_rate": 2.8440048940343185e-07, + "loss": 0.0002, + "num_input_tokens_seen": 107379792, + "step": 159325 + }, + { + "epoch": 3.892458407641756, + "grad_norm": 0.005628951825201511, + "learning_rate": 2.843409246811236e-07, + "loss": 0.0, + "num_input_tokens_seen": 107382800, + "step": 159330 + }, + { + "epoch": 3.892580558473603, + "grad_norm": 0.012718535959720612, + "learning_rate": 2.842813651632806e-07, + "loss": 0.0, + "num_input_tokens_seen": 107385936, + "step": 159335 + }, + { + "epoch": 3.8927027093054503, + "grad_norm": 16.744064331054688, + "learning_rate": 2.8422181085033583e-07, + "loss": 0.0256, + "num_input_tokens_seen": 107389264, + "step": 159340 + }, + { + "epoch": 3.8928248601372974, + "grad_norm": 0.0019720131531357765, + "learning_rate": 2.841622617427227e-07, + "loss": 0.0488, + "num_input_tokens_seen": 107392400, + "step": 159345 + }, + { + "epoch": 3.8929470109691446, + "grad_norm": 0.0032979014795273542, + "learning_rate": 2.841027178408739e-07, + "loss": 0.0488, + "num_input_tokens_seen": 107395600, + "step": 159350 + }, + { + "epoch": 3.893069161800992, + "grad_norm": 0.00013743384624831378, + "learning_rate": 2.8404317914522304e-07, + "loss": 0.0, + "num_input_tokens_seen": 107398864, + "step": 159355 + }, + { + "epoch": 3.893191312632839, + "grad_norm": 0.008313512429594994, + "learning_rate": 2.839836456562025e-07, + "loss": 0.0, + "num_input_tokens_seen": 107402960, + "step": 159360 + }, + { + "epoch": 3.893313463464686, + "grad_norm": 0.0017402205849066377, + "learning_rate": 2.839241173742456e-07, + "loss": 0.0, + "num_input_tokens_seen": 107406096, + "step": 159365 + }, + { + "epoch": 3.8934356142965334, + "grad_norm": 0.0016363713657483459, + "learning_rate": 2.838645942997849e-07, + "loss": 0.0005, + "num_input_tokens_seen": 107409296, + "step": 159370 + }, + { + "epoch": 3.8935577651283806, + "grad_norm": 0.0010019043693318963, + "learning_rate": 2.8380507643325357e-07, + "loss": 0.0, + "num_input_tokens_seen": 107412688, + "step": 159375 + }, + { + "epoch": 3.8936799159602278, + "grad_norm": 0.006046359892934561, + "learning_rate": 2.8374556377508463e-07, + "loss": 0.0441, + "num_input_tokens_seen": 107415952, + "step": 159380 + }, + { + "epoch": 3.893802066792075, + "grad_norm": 0.000366544903954491, + "learning_rate": 2.8368605632571017e-07, + "loss": 0.0048, + "num_input_tokens_seen": 107419472, + "step": 159385 + }, + { + "epoch": 3.893924217623922, + "grad_norm": 0.0009933729888871312, + "learning_rate": 2.836265540855638e-07, + "loss": 0.0, + "num_input_tokens_seen": 107422736, + "step": 159390 + }, + { + "epoch": 3.8940463684557693, + "grad_norm": 0.0023379067424684763, + "learning_rate": 2.835670570550774e-07, + "loss": 0.0, + "num_input_tokens_seen": 107426576, + "step": 159395 + }, + { + "epoch": 3.894168519287616, + "grad_norm": 0.0043589891865849495, + "learning_rate": 2.8350756523468454e-07, + "loss": 0.0, + "num_input_tokens_seen": 107430416, + "step": 159400 + }, + { + "epoch": 3.8942906701194637, + "grad_norm": 30.011795043945312, + "learning_rate": 2.834480786248169e-07, + "loss": 0.0489, + "num_input_tokens_seen": 107433488, + "step": 159405 + }, + { + "epoch": 3.8944128209513105, + "grad_norm": 17.220151901245117, + "learning_rate": 2.833885972259077e-07, + "loss": 0.0476, + "num_input_tokens_seen": 107436496, + "step": 159410 + }, + { + "epoch": 3.894534971783158, + "grad_norm": 0.0010251520434394479, + "learning_rate": 2.8332912103838957e-07, + "loss": 0.0, + "num_input_tokens_seen": 107440080, + "step": 159415 + }, + { + "epoch": 3.894657122615005, + "grad_norm": 0.00047361108590848744, + "learning_rate": 2.8326965006269454e-07, + "loss": 0.0, + "num_input_tokens_seen": 107443536, + "step": 159420 + }, + { + "epoch": 3.894779273446852, + "grad_norm": 0.029390431940555573, + "learning_rate": 2.832101842992558e-07, + "loss": 0.0, + "num_input_tokens_seen": 107446672, + "step": 159425 + }, + { + "epoch": 3.8949014242786992, + "grad_norm": 0.031576137989759445, + "learning_rate": 2.8315072374850504e-07, + "loss": 0.0, + "num_input_tokens_seen": 107449808, + "step": 159430 + }, + { + "epoch": 3.8950235751105464, + "grad_norm": 0.015351535752415657, + "learning_rate": 2.8309126841087527e-07, + "loss": 0.0, + "num_input_tokens_seen": 107453200, + "step": 159435 + }, + { + "epoch": 3.8951457259423936, + "grad_norm": 28.911039352416992, + "learning_rate": 2.8303181828679857e-07, + "loss": 0.0512, + "num_input_tokens_seen": 107456976, + "step": 159440 + }, + { + "epoch": 3.895267876774241, + "grad_norm": 0.012119249440729618, + "learning_rate": 2.829723733767071e-07, + "loss": 0.0002, + "num_input_tokens_seen": 107460368, + "step": 159445 + }, + { + "epoch": 3.895390027606088, + "grad_norm": 0.0006879116408526897, + "learning_rate": 2.8291293368103374e-07, + "loss": 0.0, + "num_input_tokens_seen": 107463440, + "step": 159450 + }, + { + "epoch": 3.895512178437935, + "grad_norm": 0.0032404428347945213, + "learning_rate": 2.8285349920021006e-07, + "loss": 0.0006, + "num_input_tokens_seen": 107466960, + "step": 159455 + }, + { + "epoch": 3.8956343292697824, + "grad_norm": 0.017269376665353775, + "learning_rate": 2.8279406993466893e-07, + "loss": 0.0, + "num_input_tokens_seen": 107470224, + "step": 159460 + }, + { + "epoch": 3.8957564801016296, + "grad_norm": 0.0010835862485691905, + "learning_rate": 2.8273464588484186e-07, + "loss": 0.0001, + "num_input_tokens_seen": 107474192, + "step": 159465 + }, + { + "epoch": 3.8958786309334767, + "grad_norm": 0.00023717455042060465, + "learning_rate": 2.826752270511614e-07, + "loss": 0.0, + "num_input_tokens_seen": 107477520, + "step": 159470 + }, + { + "epoch": 3.896000781765324, + "grad_norm": 0.0019396002171561122, + "learning_rate": 2.8261581343406005e-07, + "loss": 0.0, + "num_input_tokens_seen": 107480720, + "step": 159475 + }, + { + "epoch": 3.896122932597171, + "grad_norm": 0.0029893694445490837, + "learning_rate": 2.8255640503396903e-07, + "loss": 0.0001, + "num_input_tokens_seen": 107484624, + "step": 159480 + }, + { + "epoch": 3.896245083429018, + "grad_norm": 0.005537273362278938, + "learning_rate": 2.8249700185132107e-07, + "loss": 0.0, + "num_input_tokens_seen": 107488080, + "step": 159485 + }, + { + "epoch": 3.8963672342608655, + "grad_norm": 0.00046166477841325104, + "learning_rate": 2.824376038865477e-07, + "loss": 0.0, + "num_input_tokens_seen": 107491408, + "step": 159490 + }, + { + "epoch": 3.8964893850927123, + "grad_norm": 0.0005455143982544541, + "learning_rate": 2.823782111400813e-07, + "loss": 0.0, + "num_input_tokens_seen": 107494992, + "step": 159495 + }, + { + "epoch": 3.89661153592456, + "grad_norm": 0.006175261456519365, + "learning_rate": 2.823188236123533e-07, + "loss": 0.0, + "num_input_tokens_seen": 107498320, + "step": 159500 + }, + { + "epoch": 3.8967336867564066, + "grad_norm": 0.0041683511808514595, + "learning_rate": 2.8225944130379586e-07, + "loss": 0.0001, + "num_input_tokens_seen": 107501392, + "step": 159505 + }, + { + "epoch": 3.896855837588254, + "grad_norm": 0.003727443516254425, + "learning_rate": 2.822000642148411e-07, + "loss": 0.0, + "num_input_tokens_seen": 107504912, + "step": 159510 + }, + { + "epoch": 3.896977988420101, + "grad_norm": 0.010317213833332062, + "learning_rate": 2.821406923459202e-07, + "loss": 0.0, + "num_input_tokens_seen": 107508240, + "step": 159515 + }, + { + "epoch": 3.897100139251948, + "grad_norm": 0.001735912635922432, + "learning_rate": 2.8208132569746555e-07, + "loss": 0.0399, + "num_input_tokens_seen": 107511248, + "step": 159520 + }, + { + "epoch": 3.8972222900837954, + "grad_norm": 0.009436232969164848, + "learning_rate": 2.8202196426990844e-07, + "loss": 0.0, + "num_input_tokens_seen": 107514576, + "step": 159525 + }, + { + "epoch": 3.8973444409156426, + "grad_norm": 0.0007831714465282857, + "learning_rate": 2.819626080636809e-07, + "loss": 0.0, + "num_input_tokens_seen": 107518608, + "step": 159530 + }, + { + "epoch": 3.8974665917474898, + "grad_norm": 0.034116633236408234, + "learning_rate": 2.8190325707921416e-07, + "loss": 0.1042, + "num_input_tokens_seen": 107522128, + "step": 159535 + }, + { + "epoch": 3.897588742579337, + "grad_norm": 0.0010338969295844436, + "learning_rate": 2.818439113169403e-07, + "loss": 0.0001, + "num_input_tokens_seen": 107525968, + "step": 159540 + }, + { + "epoch": 3.897710893411184, + "grad_norm": 0.002002190565690398, + "learning_rate": 2.817845707772908e-07, + "loss": 0.0001, + "num_input_tokens_seen": 107528784, + "step": 159545 + }, + { + "epoch": 3.8978330442430313, + "grad_norm": 0.002047667745500803, + "learning_rate": 2.817252354606966e-07, + "loss": 0.0, + "num_input_tokens_seen": 107531856, + "step": 159550 + }, + { + "epoch": 3.8979551950748785, + "grad_norm": 0.03190112113952637, + "learning_rate": 2.8166590536759015e-07, + "loss": 0.0, + "num_input_tokens_seen": 107535056, + "step": 159555 + }, + { + "epoch": 3.8980773459067257, + "grad_norm": 0.005458956118673086, + "learning_rate": 2.8160658049840205e-07, + "loss": 0.1033, + "num_input_tokens_seen": 107538512, + "step": 159560 + }, + { + "epoch": 3.898199496738573, + "grad_norm": 0.0033516723196953535, + "learning_rate": 2.815472608535642e-07, + "loss": 0.0, + "num_input_tokens_seen": 107541968, + "step": 159565 + }, + { + "epoch": 3.89832164757042, + "grad_norm": 0.0010578599758446217, + "learning_rate": 2.8148794643350816e-07, + "loss": 0.0318, + "num_input_tokens_seen": 107545360, + "step": 159570 + }, + { + "epoch": 3.8984437984022673, + "grad_norm": 0.00019192988111171871, + "learning_rate": 2.8142863723866475e-07, + "loss": 0.0, + "num_input_tokens_seen": 107548368, + "step": 159575 + }, + { + "epoch": 3.898565949234114, + "grad_norm": 15.34018325805664, + "learning_rate": 2.8136933326946574e-07, + "loss": 0.058, + "num_input_tokens_seen": 107551824, + "step": 159580 + }, + { + "epoch": 3.8986881000659617, + "grad_norm": 0.009013521485030651, + "learning_rate": 2.813100345263421e-07, + "loss": 0.0, + "num_input_tokens_seen": 107555088, + "step": 159585 + }, + { + "epoch": 3.8988102508978084, + "grad_norm": 0.008238616399466991, + "learning_rate": 2.812507410097251e-07, + "loss": 0.0, + "num_input_tokens_seen": 107558608, + "step": 159590 + }, + { + "epoch": 3.898932401729656, + "grad_norm": 0.005177440121769905, + "learning_rate": 2.811914527200463e-07, + "loss": 0.0008, + "num_input_tokens_seen": 107562000, + "step": 159595 + }, + { + "epoch": 3.899054552561503, + "grad_norm": 0.0011088430183008313, + "learning_rate": 2.8113216965773634e-07, + "loss": 0.0, + "num_input_tokens_seen": 107565456, + "step": 159600 + }, + { + "epoch": 3.89917670339335, + "grad_norm": 0.024311434477567673, + "learning_rate": 2.810728918232269e-07, + "loss": 0.0, + "num_input_tokens_seen": 107568592, + "step": 159605 + }, + { + "epoch": 3.899298854225197, + "grad_norm": 0.006240412592887878, + "learning_rate": 2.8101361921694854e-07, + "loss": 0.0, + "num_input_tokens_seen": 107572432, + "step": 159610 + }, + { + "epoch": 3.8994210050570444, + "grad_norm": 0.008073143661022186, + "learning_rate": 2.8095435183933267e-07, + "loss": 0.0572, + "num_input_tokens_seen": 107575504, + "step": 159615 + }, + { + "epoch": 3.8995431558888916, + "grad_norm": 0.12511250376701355, + "learning_rate": 2.8089508969081e-07, + "loss": 0.0002, + "num_input_tokens_seen": 107578896, + "step": 159620 + }, + { + "epoch": 3.8996653067207387, + "grad_norm": 0.0018284362740814686, + "learning_rate": 2.8083583277181154e-07, + "loss": 0.0433, + "num_input_tokens_seen": 107582096, + "step": 159625 + }, + { + "epoch": 3.899787457552586, + "grad_norm": 0.004669949412345886, + "learning_rate": 2.807765810827687e-07, + "loss": 0.0002, + "num_input_tokens_seen": 107585616, + "step": 159630 + }, + { + "epoch": 3.899909608384433, + "grad_norm": 0.0008054210338741541, + "learning_rate": 2.807173346241116e-07, + "loss": 0.081, + "num_input_tokens_seen": 107588624, + "step": 159635 + }, + { + "epoch": 3.9000317592162803, + "grad_norm": 0.0017973963404074311, + "learning_rate": 2.80658093396272e-07, + "loss": 0.0003, + "num_input_tokens_seen": 107592144, + "step": 159640 + }, + { + "epoch": 3.9001539100481275, + "grad_norm": 0.053213659673929214, + "learning_rate": 2.8059885739968e-07, + "loss": 0.0, + "num_input_tokens_seen": 107595344, + "step": 159645 + }, + { + "epoch": 3.9002760608799747, + "grad_norm": 0.027758058160543442, + "learning_rate": 2.805396266347665e-07, + "loss": 0.0478, + "num_input_tokens_seen": 107598928, + "step": 159650 + }, + { + "epoch": 3.900398211711822, + "grad_norm": 0.003646234283223748, + "learning_rate": 2.804804011019626e-07, + "loss": 0.0, + "num_input_tokens_seen": 107601936, + "step": 159655 + }, + { + "epoch": 3.900520362543669, + "grad_norm": 0.006015193648636341, + "learning_rate": 2.8042118080169843e-07, + "loss": 0.0, + "num_input_tokens_seen": 107605456, + "step": 159660 + }, + { + "epoch": 3.900642513375516, + "grad_norm": 0.0008773684385232627, + "learning_rate": 2.803619657344053e-07, + "loss": 0.0, + "num_input_tokens_seen": 107608656, + "step": 159665 + }, + { + "epoch": 3.9007646642073635, + "grad_norm": 0.0009342681732960045, + "learning_rate": 2.803027559005131e-07, + "loss": 0.0, + "num_input_tokens_seen": 107611984, + "step": 159670 + }, + { + "epoch": 3.90088681503921, + "grad_norm": 0.008757350035011768, + "learning_rate": 2.8024355130045316e-07, + "loss": 0.0439, + "num_input_tokens_seen": 107615056, + "step": 159675 + }, + { + "epoch": 3.901008965871058, + "grad_norm": 0.0015640563797205687, + "learning_rate": 2.801843519346555e-07, + "loss": 0.0, + "num_input_tokens_seen": 107618512, + "step": 159680 + }, + { + "epoch": 3.9011311167029046, + "grad_norm": 0.02840656414628029, + "learning_rate": 2.8012515780355084e-07, + "loss": 0.0, + "num_input_tokens_seen": 107622160, + "step": 159685 + }, + { + "epoch": 3.9012532675347518, + "grad_norm": 0.0020951058249920607, + "learning_rate": 2.8006596890756995e-07, + "loss": 0.0, + "num_input_tokens_seen": 107625616, + "step": 159690 + }, + { + "epoch": 3.901375418366599, + "grad_norm": 0.0030957581475377083, + "learning_rate": 2.8000678524714263e-07, + "loss": 0.0, + "num_input_tokens_seen": 107629264, + "step": 159695 + }, + { + "epoch": 3.901497569198446, + "grad_norm": 0.04345306009054184, + "learning_rate": 2.7994760682269993e-07, + "loss": 0.0001, + "num_input_tokens_seen": 107632528, + "step": 159700 + }, + { + "epoch": 3.9016197200302933, + "grad_norm": 0.0007010184344835579, + "learning_rate": 2.798884336346716e-07, + "loss": 0.0, + "num_input_tokens_seen": 107635984, + "step": 159705 + }, + { + "epoch": 3.9017418708621405, + "grad_norm": 0.0029301580507308245, + "learning_rate": 2.7982926568348853e-07, + "loss": 0.0, + "num_input_tokens_seen": 107639184, + "step": 159710 + }, + { + "epoch": 3.9018640216939877, + "grad_norm": 0.043662700802087784, + "learning_rate": 2.797701029695805e-07, + "loss": 0.0, + "num_input_tokens_seen": 107642768, + "step": 159715 + }, + { + "epoch": 3.901986172525835, + "grad_norm": 0.018145162612199783, + "learning_rate": 2.7971094549337805e-07, + "loss": 0.0, + "num_input_tokens_seen": 107646160, + "step": 159720 + }, + { + "epoch": 3.902108323357682, + "grad_norm": 0.0647394210100174, + "learning_rate": 2.7965179325531154e-07, + "loss": 0.0, + "num_input_tokens_seen": 107649872, + "step": 159725 + }, + { + "epoch": 3.9022304741895293, + "grad_norm": 0.014430363662540913, + "learning_rate": 2.7959264625581067e-07, + "loss": 0.0, + "num_input_tokens_seen": 107653648, + "step": 159730 + }, + { + "epoch": 3.9023526250213765, + "grad_norm": 0.015603567473590374, + "learning_rate": 2.795335044953061e-07, + "loss": 0.0, + "num_input_tokens_seen": 107657296, + "step": 159735 + }, + { + "epoch": 3.9024747758532237, + "grad_norm": 0.00026981995324604213, + "learning_rate": 2.794743679742274e-07, + "loss": 0.0, + "num_input_tokens_seen": 107660432, + "step": 159740 + }, + { + "epoch": 3.902596926685071, + "grad_norm": 0.004165019374340773, + "learning_rate": 2.7941523669300527e-07, + "loss": 0.0, + "num_input_tokens_seen": 107664144, + "step": 159745 + }, + { + "epoch": 3.902719077516918, + "grad_norm": 0.002123868092894554, + "learning_rate": 2.793561106520693e-07, + "loss": 0.0, + "num_input_tokens_seen": 107667344, + "step": 159750 + }, + { + "epoch": 3.9028412283487652, + "grad_norm": 0.005422023590654135, + "learning_rate": 2.7929698985184923e-07, + "loss": 0.0, + "num_input_tokens_seen": 107670544, + "step": 159755 + }, + { + "epoch": 3.902963379180612, + "grad_norm": 0.032854385673999786, + "learning_rate": 2.792378742927756e-07, + "loss": 0.0001, + "num_input_tokens_seen": 107674320, + "step": 159760 + }, + { + "epoch": 3.9030855300124596, + "grad_norm": 0.5987409949302673, + "learning_rate": 2.791787639752776e-07, + "loss": 0.0001, + "num_input_tokens_seen": 107677776, + "step": 159765 + }, + { + "epoch": 3.9032076808443064, + "grad_norm": 0.0016436355654150248, + "learning_rate": 2.7911965889978595e-07, + "loss": 0.0001, + "num_input_tokens_seen": 107681488, + "step": 159770 + }, + { + "epoch": 3.9033298316761535, + "grad_norm": 0.00790480338037014, + "learning_rate": 2.7906055906672965e-07, + "loss": 0.0001, + "num_input_tokens_seen": 107685328, + "step": 159775 + }, + { + "epoch": 3.9034519825080007, + "grad_norm": 0.10544174164533615, + "learning_rate": 2.7900146447653895e-07, + "loss": 0.0, + "num_input_tokens_seen": 107688336, + "step": 159780 + }, + { + "epoch": 3.903574133339848, + "grad_norm": 0.0011935977963730693, + "learning_rate": 2.789423751296438e-07, + "loss": 0.0, + "num_input_tokens_seen": 107691536, + "step": 159785 + }, + { + "epoch": 3.903696284171695, + "grad_norm": 0.002410582033917308, + "learning_rate": 2.788832910264732e-07, + "loss": 0.0, + "num_input_tokens_seen": 107695120, + "step": 159790 + }, + { + "epoch": 3.9038184350035423, + "grad_norm": 0.0012916321866214275, + "learning_rate": 2.7882421216745776e-07, + "loss": 0.0, + "num_input_tokens_seen": 107698768, + "step": 159795 + }, + { + "epoch": 3.9039405858353895, + "grad_norm": 0.002064318861812353, + "learning_rate": 2.787651385530263e-07, + "loss": 0.0, + "num_input_tokens_seen": 107702224, + "step": 159800 + }, + { + "epoch": 3.9040627366672367, + "grad_norm": 0.050407975912094116, + "learning_rate": 2.787060701836089e-07, + "loss": 0.0, + "num_input_tokens_seen": 107705424, + "step": 159805 + }, + { + "epoch": 3.904184887499084, + "grad_norm": 0.02390783280134201, + "learning_rate": 2.7864700705963484e-07, + "loss": 0.0, + "num_input_tokens_seen": 107708688, + "step": 159810 + }, + { + "epoch": 3.904307038330931, + "grad_norm": 0.005413845181465149, + "learning_rate": 2.785879491815336e-07, + "loss": 0.0, + "num_input_tokens_seen": 107712016, + "step": 159815 + }, + { + "epoch": 3.9044291891627783, + "grad_norm": 0.0004383553168736398, + "learning_rate": 2.785288965497352e-07, + "loss": 0.0, + "num_input_tokens_seen": 107715152, + "step": 159820 + }, + { + "epoch": 3.9045513399946254, + "grad_norm": 0.0010350412921980023, + "learning_rate": 2.784698491646684e-07, + "loss": 0.0, + "num_input_tokens_seen": 107718032, + "step": 159825 + }, + { + "epoch": 3.9046734908264726, + "grad_norm": 0.09186630696058273, + "learning_rate": 2.7841080702676336e-07, + "loss": 0.0, + "num_input_tokens_seen": 107721552, + "step": 159830 + }, + { + "epoch": 3.90479564165832, + "grad_norm": 0.0008800149080343544, + "learning_rate": 2.783517701364485e-07, + "loss": 0.0, + "num_input_tokens_seen": 107725328, + "step": 159835 + }, + { + "epoch": 3.904917792490167, + "grad_norm": 0.004721642006188631, + "learning_rate": 2.782927384941541e-07, + "loss": 0.0, + "num_input_tokens_seen": 107728336, + "step": 159840 + }, + { + "epoch": 3.9050399433220138, + "grad_norm": 0.0003880482690874487, + "learning_rate": 2.7823371210030865e-07, + "loss": 0.0, + "num_input_tokens_seen": 107731472, + "step": 159845 + }, + { + "epoch": 3.9051620941538614, + "grad_norm": 0.0011299933539703488, + "learning_rate": 2.781746909553422e-07, + "loss": 0.0, + "num_input_tokens_seen": 107734736, + "step": 159850 + }, + { + "epoch": 3.905284244985708, + "grad_norm": 0.01743919961154461, + "learning_rate": 2.7811567505968346e-07, + "loss": 0.0, + "num_input_tokens_seen": 107738832, + "step": 159855 + }, + { + "epoch": 3.9054063958175558, + "grad_norm": 0.002373091410845518, + "learning_rate": 2.780566644137614e-07, + "loss": 0.0, + "num_input_tokens_seen": 107742224, + "step": 159860 + }, + { + "epoch": 3.9055285466494025, + "grad_norm": 0.0019706422463059425, + "learning_rate": 2.7799765901800576e-07, + "loss": 0.0, + "num_input_tokens_seen": 107745424, + "step": 159865 + }, + { + "epoch": 3.9056506974812497, + "grad_norm": 0.041707828640937805, + "learning_rate": 2.779386588728451e-07, + "loss": 0.0, + "num_input_tokens_seen": 107748560, + "step": 159870 + }, + { + "epoch": 3.905772848313097, + "grad_norm": 0.002031422220170498, + "learning_rate": 2.7787966397870855e-07, + "loss": 0.0001, + "num_input_tokens_seen": 107751952, + "step": 159875 + }, + { + "epoch": 3.905894999144944, + "grad_norm": 0.0063712685368955135, + "learning_rate": 2.7782067433602574e-07, + "loss": 0.0, + "num_input_tokens_seen": 107755216, + "step": 159880 + }, + { + "epoch": 3.9060171499767913, + "grad_norm": 0.0017904570559039712, + "learning_rate": 2.777616899452249e-07, + "loss": 0.0, + "num_input_tokens_seen": 107758480, + "step": 159885 + }, + { + "epoch": 3.9061393008086385, + "grad_norm": 0.46773773431777954, + "learning_rate": 2.7770271080673566e-07, + "loss": 0.0002, + "num_input_tokens_seen": 107761872, + "step": 159890 + }, + { + "epoch": 3.9062614516404857, + "grad_norm": 0.004684579558670521, + "learning_rate": 2.776437369209862e-07, + "loss": 0.0, + "num_input_tokens_seen": 107765584, + "step": 159895 + }, + { + "epoch": 3.906383602472333, + "grad_norm": 0.5561292767524719, + "learning_rate": 2.7758476828840615e-07, + "loss": 0.0001, + "num_input_tokens_seen": 107768592, + "step": 159900 + }, + { + "epoch": 3.90650575330418, + "grad_norm": 0.0021681429352611303, + "learning_rate": 2.775258049094236e-07, + "loss": 0.0, + "num_input_tokens_seen": 107771664, + "step": 159905 + }, + { + "epoch": 3.9066279041360272, + "grad_norm": 0.0008911622571758926, + "learning_rate": 2.7746684678446776e-07, + "loss": 0.0, + "num_input_tokens_seen": 107774800, + "step": 159910 + }, + { + "epoch": 3.9067500549678744, + "grad_norm": 0.0005695793661288917, + "learning_rate": 2.774078939139677e-07, + "loss": 0.0001, + "num_input_tokens_seen": 107778000, + "step": 159915 + }, + { + "epoch": 3.9068722057997216, + "grad_norm": 0.0030290824361145496, + "learning_rate": 2.773489462983514e-07, + "loss": 0.0, + "num_input_tokens_seen": 107781264, + "step": 159920 + }, + { + "epoch": 3.906994356631569, + "grad_norm": 0.004985318053513765, + "learning_rate": 2.7729000393804825e-07, + "loss": 0.0, + "num_input_tokens_seen": 107785040, + "step": 159925 + }, + { + "epoch": 3.907116507463416, + "grad_norm": 0.0003536255971994251, + "learning_rate": 2.772310668334863e-07, + "loss": 0.0, + "num_input_tokens_seen": 107788560, + "step": 159930 + }, + { + "epoch": 3.907238658295263, + "grad_norm": 0.04345562309026718, + "learning_rate": 2.771721349850944e-07, + "loss": 0.0001, + "num_input_tokens_seen": 107792080, + "step": 159935 + }, + { + "epoch": 3.90736080912711, + "grad_norm": 0.04325956106185913, + "learning_rate": 2.7711320839330155e-07, + "loss": 0.0, + "num_input_tokens_seen": 107795472, + "step": 159940 + }, + { + "epoch": 3.9074829599589576, + "grad_norm": 0.0022824618499726057, + "learning_rate": 2.7705428705853537e-07, + "loss": 0.0, + "num_input_tokens_seen": 107798928, + "step": 159945 + }, + { + "epoch": 3.9076051107908043, + "grad_norm": 0.3876858949661255, + "learning_rate": 2.769953709812254e-07, + "loss": 0.0001, + "num_input_tokens_seen": 107802576, + "step": 159950 + }, + { + "epoch": 3.9077272616226515, + "grad_norm": 0.037881772965192795, + "learning_rate": 2.769364601617994e-07, + "loss": 0.0, + "num_input_tokens_seen": 107805776, + "step": 159955 + }, + { + "epoch": 3.9078494124544987, + "grad_norm": 0.0005450754542835057, + "learning_rate": 2.7687755460068575e-07, + "loss": 0.0, + "num_input_tokens_seen": 107809040, + "step": 159960 + }, + { + "epoch": 3.907971563286346, + "grad_norm": 0.0026407239492982626, + "learning_rate": 2.768186542983133e-07, + "loss": 0.0667, + "num_input_tokens_seen": 107812304, + "step": 159965 + }, + { + "epoch": 3.908093714118193, + "grad_norm": 0.0014854839537292719, + "learning_rate": 2.767597592551097e-07, + "loss": 0.0, + "num_input_tokens_seen": 107815696, + "step": 159970 + }, + { + "epoch": 3.9082158649500403, + "grad_norm": 45.480194091796875, + "learning_rate": 2.767008694715041e-07, + "loss": 0.0632, + "num_input_tokens_seen": 107819344, + "step": 159975 + }, + { + "epoch": 3.9083380157818874, + "grad_norm": 0.0038357700686901808, + "learning_rate": 2.766419849479239e-07, + "loss": 0.0001, + "num_input_tokens_seen": 107822544, + "step": 159980 + }, + { + "epoch": 3.9084601666137346, + "grad_norm": 0.01778177171945572, + "learning_rate": 2.765831056847981e-07, + "loss": 0.0, + "num_input_tokens_seen": 107825808, + "step": 159985 + }, + { + "epoch": 3.908582317445582, + "grad_norm": 0.009620671160519123, + "learning_rate": 2.765242316825542e-07, + "loss": 0.0001, + "num_input_tokens_seen": 107829328, + "step": 159990 + }, + { + "epoch": 3.908704468277429, + "grad_norm": 0.0016746602486819029, + "learning_rate": 2.764653629416208e-07, + "loss": 0.0, + "num_input_tokens_seen": 107832720, + "step": 159995 + }, + { + "epoch": 3.908826619109276, + "grad_norm": 0.0006174160516820848, + "learning_rate": 2.7640649946242613e-07, + "loss": 0.0, + "num_input_tokens_seen": 107835920, + "step": 160000 + }, + { + "epoch": 3.9089487699411234, + "grad_norm": 0.0012764016864821315, + "learning_rate": 2.7634764124539765e-07, + "loss": 0.0, + "num_input_tokens_seen": 107839696, + "step": 160005 + }, + { + "epoch": 3.9090709207729706, + "grad_norm": 0.000690353917889297, + "learning_rate": 2.762887882909641e-07, + "loss": 0.0638, + "num_input_tokens_seen": 107843664, + "step": 160010 + }, + { + "epoch": 3.9091930716048178, + "grad_norm": 0.000673630100209266, + "learning_rate": 2.7622994059955287e-07, + "loss": 0.0, + "num_input_tokens_seen": 107847120, + "step": 160015 + }, + { + "epoch": 3.909315222436665, + "grad_norm": 0.0010738215642049909, + "learning_rate": 2.7617109817159244e-07, + "loss": 0.0, + "num_input_tokens_seen": 107850512, + "step": 160020 + }, + { + "epoch": 3.9094373732685117, + "grad_norm": 0.0006555470172315836, + "learning_rate": 2.7611226100751016e-07, + "loss": 0.0001, + "num_input_tokens_seen": 107854352, + "step": 160025 + }, + { + "epoch": 3.9095595241003593, + "grad_norm": 0.0011639671865850687, + "learning_rate": 2.760534291077343e-07, + "loss": 0.0, + "num_input_tokens_seen": 107857744, + "step": 160030 + }, + { + "epoch": 3.909681674932206, + "grad_norm": 0.04372568055987358, + "learning_rate": 2.759946024726928e-07, + "loss": 0.0388, + "num_input_tokens_seen": 107861136, + "step": 160035 + }, + { + "epoch": 3.9098038257640537, + "grad_norm": 0.010761997662484646, + "learning_rate": 2.7593578110281314e-07, + "loss": 0.0001, + "num_input_tokens_seen": 107864400, + "step": 160040 + }, + { + "epoch": 3.9099259765959005, + "grad_norm": 0.08728259056806564, + "learning_rate": 2.758769649985234e-07, + "loss": 0.0, + "num_input_tokens_seen": 107867920, + "step": 160045 + }, + { + "epoch": 3.9100481274277477, + "grad_norm": 0.0009333228808827698, + "learning_rate": 2.7581815416025087e-07, + "loss": 0.0, + "num_input_tokens_seen": 107871376, + "step": 160050 + }, + { + "epoch": 3.910170278259595, + "grad_norm": 0.0002366242988500744, + "learning_rate": 2.757593485884238e-07, + "loss": 0.0, + "num_input_tokens_seen": 107874576, + "step": 160055 + }, + { + "epoch": 3.910292429091442, + "grad_norm": 0.011959303170442581, + "learning_rate": 2.7570054828346957e-07, + "loss": 0.0, + "num_input_tokens_seen": 107878416, + "step": 160060 + }, + { + "epoch": 3.9104145799232892, + "grad_norm": 0.019165946170687675, + "learning_rate": 2.756417532458154e-07, + "loss": 0.0, + "num_input_tokens_seen": 107882256, + "step": 160065 + }, + { + "epoch": 3.9105367307551364, + "grad_norm": 0.0004379362508188933, + "learning_rate": 2.755829634758896e-07, + "loss": 0.0003, + "num_input_tokens_seen": 107885520, + "step": 160070 + }, + { + "epoch": 3.9106588815869836, + "grad_norm": 0.0002538120897952467, + "learning_rate": 2.7552417897411905e-07, + "loss": 0.0, + "num_input_tokens_seen": 107888976, + "step": 160075 + }, + { + "epoch": 3.910781032418831, + "grad_norm": 0.007831291295588017, + "learning_rate": 2.7546539974093175e-07, + "loss": 0.0, + "num_input_tokens_seen": 107892688, + "step": 160080 + }, + { + "epoch": 3.910903183250678, + "grad_norm": 0.0030617325101047754, + "learning_rate": 2.7540662577675477e-07, + "loss": 0.0001, + "num_input_tokens_seen": 107895824, + "step": 160085 + }, + { + "epoch": 3.911025334082525, + "grad_norm": 0.00044064526446163654, + "learning_rate": 2.753478570820156e-07, + "loss": 0.0, + "num_input_tokens_seen": 107899024, + "step": 160090 + }, + { + "epoch": 3.9111474849143724, + "grad_norm": 0.0005494902725331485, + "learning_rate": 2.7528909365714205e-07, + "loss": 0.0002, + "num_input_tokens_seen": 107902096, + "step": 160095 + }, + { + "epoch": 3.9112696357462196, + "grad_norm": 0.0003827103937510401, + "learning_rate": 2.752303355025608e-07, + "loss": 0.0, + "num_input_tokens_seen": 107905872, + "step": 160100 + }, + { + "epoch": 3.9113917865780667, + "grad_norm": 0.00016441762272734195, + "learning_rate": 2.751715826186998e-07, + "loss": 0.0, + "num_input_tokens_seen": 107909008, + "step": 160105 + }, + { + "epoch": 3.9115139374099135, + "grad_norm": 0.003068072721362114, + "learning_rate": 2.751128350059857e-07, + "loss": 0.0, + "num_input_tokens_seen": 107912400, + "step": 160110 + }, + { + "epoch": 3.911636088241761, + "grad_norm": 0.045241840183734894, + "learning_rate": 2.7505409266484636e-07, + "loss": 0.0, + "num_input_tokens_seen": 107915856, + "step": 160115 + }, + { + "epoch": 3.911758239073608, + "grad_norm": 0.0033214271534234285, + "learning_rate": 2.749953555957083e-07, + "loss": 0.0, + "num_input_tokens_seen": 107919376, + "step": 160120 + }, + { + "epoch": 3.9118803899054555, + "grad_norm": 0.0077246990986168385, + "learning_rate": 2.7493662379899906e-07, + "loss": 0.0, + "num_input_tokens_seen": 107922704, + "step": 160125 + }, + { + "epoch": 3.9120025407373022, + "grad_norm": 0.0009203380905091763, + "learning_rate": 2.748778972751461e-07, + "loss": 0.0, + "num_input_tokens_seen": 107926160, + "step": 160130 + }, + { + "epoch": 3.9121246915691494, + "grad_norm": 0.02601289190351963, + "learning_rate": 2.748191760245756e-07, + "loss": 0.0003, + "num_input_tokens_seen": 107929360, + "step": 160135 + }, + { + "epoch": 3.9122468424009966, + "grad_norm": 0.010707485489547253, + "learning_rate": 2.7476046004771557e-07, + "loss": 0.0001, + "num_input_tokens_seen": 107932688, + "step": 160140 + }, + { + "epoch": 3.912368993232844, + "grad_norm": 0.003142469795420766, + "learning_rate": 2.747017493449922e-07, + "loss": 0.0, + "num_input_tokens_seen": 107936144, + "step": 160145 + }, + { + "epoch": 3.912491144064691, + "grad_norm": 0.0026227415073662996, + "learning_rate": 2.74643043916833e-07, + "loss": 0.0, + "num_input_tokens_seen": 107939856, + "step": 160150 + }, + { + "epoch": 3.912613294896538, + "grad_norm": 0.0004527507408056408, + "learning_rate": 2.7458434376366457e-07, + "loss": 0.0, + "num_input_tokens_seen": 107943056, + "step": 160155 + }, + { + "epoch": 3.9127354457283854, + "grad_norm": 0.017700159922242165, + "learning_rate": 2.74525648885914e-07, + "loss": 0.0, + "num_input_tokens_seen": 107946512, + "step": 160160 + }, + { + "epoch": 3.9128575965602326, + "grad_norm": 0.004137058742344379, + "learning_rate": 2.744669592840082e-07, + "loss": 0.0684, + "num_input_tokens_seen": 107949904, + "step": 160165 + }, + { + "epoch": 3.9129797473920798, + "grad_norm": 0.0004117517964914441, + "learning_rate": 2.744082749583734e-07, + "loss": 0.0, + "num_input_tokens_seen": 107953168, + "step": 160170 + }, + { + "epoch": 3.913101898223927, + "grad_norm": 0.005193542223423719, + "learning_rate": 2.743495959094372e-07, + "loss": 0.0, + "num_input_tokens_seen": 107956176, + "step": 160175 + }, + { + "epoch": 3.913224049055774, + "grad_norm": 26.64436912536621, + "learning_rate": 2.742909221376255e-07, + "loss": 0.0633, + "num_input_tokens_seen": 107959696, + "step": 160180 + }, + { + "epoch": 3.9133461998876213, + "grad_norm": 0.0012446003966033459, + "learning_rate": 2.742322536433654e-07, + "loss": 0.0, + "num_input_tokens_seen": 107962960, + "step": 160185 + }, + { + "epoch": 3.9134683507194685, + "grad_norm": 0.0022602214012295008, + "learning_rate": 2.74173590427084e-07, + "loss": 0.0, + "num_input_tokens_seen": 107966352, + "step": 160190 + }, + { + "epoch": 3.9135905015513157, + "grad_norm": 0.006497112102806568, + "learning_rate": 2.74114932489207e-07, + "loss": 0.0005, + "num_input_tokens_seen": 107969552, + "step": 160195 + }, + { + "epoch": 3.913712652383163, + "grad_norm": 0.00037623572279699147, + "learning_rate": 2.7405627983016186e-07, + "loss": 0.0001, + "num_input_tokens_seen": 107972560, + "step": 160200 + }, + { + "epoch": 3.9138348032150097, + "grad_norm": 0.014156325720250607, + "learning_rate": 2.7399763245037444e-07, + "loss": 0.0, + "num_input_tokens_seen": 107975824, + "step": 160205 + }, + { + "epoch": 3.9139569540468573, + "grad_norm": 1.580858042871114e-05, + "learning_rate": 2.739389903502718e-07, + "loss": 0.0, + "num_input_tokens_seen": 107979280, + "step": 160210 + }, + { + "epoch": 3.914079104878704, + "grad_norm": 0.0027877134270966053, + "learning_rate": 2.738803535302797e-07, + "loss": 0.0, + "num_input_tokens_seen": 107982544, + "step": 160215 + }, + { + "epoch": 3.9142012557105517, + "grad_norm": 0.0003772991767618805, + "learning_rate": 2.738217219908251e-07, + "loss": 0.0, + "num_input_tokens_seen": 107985872, + "step": 160220 + }, + { + "epoch": 3.9143234065423984, + "grad_norm": 0.0005691954283975065, + "learning_rate": 2.7376309573233446e-07, + "loss": 0.0, + "num_input_tokens_seen": 107989776, + "step": 160225 + }, + { + "epoch": 3.9144455573742456, + "grad_norm": 0.04178770259022713, + "learning_rate": 2.7370447475523363e-07, + "loss": 0.0, + "num_input_tokens_seen": 107993104, + "step": 160230 + }, + { + "epoch": 3.914567708206093, + "grad_norm": 0.015368896536529064, + "learning_rate": 2.7364585905994953e-07, + "loss": 0.0003, + "num_input_tokens_seen": 107996624, + "step": 160235 + }, + { + "epoch": 3.91468985903794, + "grad_norm": 0.0012327000731602311, + "learning_rate": 2.7358724864690783e-07, + "loss": 0.0, + "num_input_tokens_seen": 108000656, + "step": 160240 + }, + { + "epoch": 3.914812009869787, + "grad_norm": 0.00057915726210922, + "learning_rate": 2.7352864351653503e-07, + "loss": 0.0146, + "num_input_tokens_seen": 108003728, + "step": 160245 + }, + { + "epoch": 3.9149341607016344, + "grad_norm": 0.007998412474989891, + "learning_rate": 2.7347004366925764e-07, + "loss": 0.0, + "num_input_tokens_seen": 108006800, + "step": 160250 + }, + { + "epoch": 3.9150563115334815, + "grad_norm": 0.007969462312757969, + "learning_rate": 2.7341144910550116e-07, + "loss": 0.0001, + "num_input_tokens_seen": 108010576, + "step": 160255 + }, + { + "epoch": 3.9151784623653287, + "grad_norm": 0.0016169307054951787, + "learning_rate": 2.7335285982569247e-07, + "loss": 0.0, + "num_input_tokens_seen": 108013520, + "step": 160260 + }, + { + "epoch": 3.915300613197176, + "grad_norm": 0.0054115429520606995, + "learning_rate": 2.732942758302571e-07, + "loss": 0.0001, + "num_input_tokens_seen": 108016528, + "step": 160265 + }, + { + "epoch": 3.915422764029023, + "grad_norm": 0.0002611346426419914, + "learning_rate": 2.732356971196209e-07, + "loss": 0.0, + "num_input_tokens_seen": 108019600, + "step": 160270 + }, + { + "epoch": 3.9155449148608703, + "grad_norm": 0.06928683817386627, + "learning_rate": 2.7317712369421053e-07, + "loss": 0.0, + "num_input_tokens_seen": 108022864, + "step": 160275 + }, + { + "epoch": 3.9156670656927175, + "grad_norm": 0.0011088034370914102, + "learning_rate": 2.731185555544514e-07, + "loss": 0.0002, + "num_input_tokens_seen": 108026064, + "step": 160280 + }, + { + "epoch": 3.9157892165245647, + "grad_norm": 0.00044814738794229925, + "learning_rate": 2.7305999270076985e-07, + "loss": 0.0024, + "num_input_tokens_seen": 108029712, + "step": 160285 + }, + { + "epoch": 3.9159113673564114, + "grad_norm": 3.936755092581734e-05, + "learning_rate": 2.730014351335913e-07, + "loss": 0.0609, + "num_input_tokens_seen": 108032784, + "step": 160290 + }, + { + "epoch": 3.916033518188259, + "grad_norm": 0.0029017627239227295, + "learning_rate": 2.729428828533421e-07, + "loss": 0.0, + "num_input_tokens_seen": 108035728, + "step": 160295 + }, + { + "epoch": 3.916155669020106, + "grad_norm": 0.00020281197794247419, + "learning_rate": 2.7288433586044746e-07, + "loss": 0.0, + "num_input_tokens_seen": 108039760, + "step": 160300 + }, + { + "epoch": 3.9162778198519534, + "grad_norm": 0.0007818678277544677, + "learning_rate": 2.728257941553336e-07, + "loss": 0.0, + "num_input_tokens_seen": 108043088, + "step": 160305 + }, + { + "epoch": 3.9163999706838, + "grad_norm": 0.00039589000516571105, + "learning_rate": 2.7276725773842646e-07, + "loss": 0.0, + "num_input_tokens_seen": 108046864, + "step": 160310 + }, + { + "epoch": 3.9165221215156474, + "grad_norm": 0.0013458137400448322, + "learning_rate": 2.727087266101511e-07, + "loss": 0.0, + "num_input_tokens_seen": 108050960, + "step": 160315 + }, + { + "epoch": 3.9166442723474946, + "grad_norm": 0.002179963979870081, + "learning_rate": 2.726502007709338e-07, + "loss": 0.0, + "num_input_tokens_seen": 108053968, + "step": 160320 + }, + { + "epoch": 3.9167664231793418, + "grad_norm": 0.042245928198099136, + "learning_rate": 2.725916802211995e-07, + "loss": 0.0, + "num_input_tokens_seen": 108057232, + "step": 160325 + }, + { + "epoch": 3.916888574011189, + "grad_norm": 0.004011943470686674, + "learning_rate": 2.7253316496137457e-07, + "loss": 0.0464, + "num_input_tokens_seen": 108060496, + "step": 160330 + }, + { + "epoch": 3.917010724843036, + "grad_norm": 0.047628071159124374, + "learning_rate": 2.7247465499188373e-07, + "loss": 0.0774, + "num_input_tokens_seen": 108065744, + "step": 160335 + }, + { + "epoch": 3.9171328756748833, + "grad_norm": 0.0027071163058280945, + "learning_rate": 2.724161503131529e-07, + "loss": 0.0, + "num_input_tokens_seen": 108069136, + "step": 160340 + }, + { + "epoch": 3.9172550265067305, + "grad_norm": 0.000492277555167675, + "learning_rate": 2.7235765092560794e-07, + "loss": 0.0816, + "num_input_tokens_seen": 108072272, + "step": 160345 + }, + { + "epoch": 3.9173771773385777, + "grad_norm": 1.0742985068645794e-05, + "learning_rate": 2.722991568296734e-07, + "loss": 0.0, + "num_input_tokens_seen": 108075216, + "step": 160350 + }, + { + "epoch": 3.917499328170425, + "grad_norm": 0.00816552434116602, + "learning_rate": 2.7224066802577547e-07, + "loss": 0.0001, + "num_input_tokens_seen": 108079120, + "step": 160355 + }, + { + "epoch": 3.917621479002272, + "grad_norm": 0.00256815692409873, + "learning_rate": 2.721821845143388e-07, + "loss": 0.019, + "num_input_tokens_seen": 108082512, + "step": 160360 + }, + { + "epoch": 3.9177436298341193, + "grad_norm": 0.002888076938688755, + "learning_rate": 2.721237062957894e-07, + "loss": 0.0007, + "num_input_tokens_seen": 108085648, + "step": 160365 + }, + { + "epoch": 3.9178657806659665, + "grad_norm": 0.0006103937048465014, + "learning_rate": 2.720652333705522e-07, + "loss": 0.0, + "num_input_tokens_seen": 108088976, + "step": 160370 + }, + { + "epoch": 3.9179879314978137, + "grad_norm": 0.0034813780803233385, + "learning_rate": 2.720067657390521e-07, + "loss": 0.0, + "num_input_tokens_seen": 108092432, + "step": 160375 + }, + { + "epoch": 3.918110082329661, + "grad_norm": 0.008327373303472996, + "learning_rate": 2.7194830340171494e-07, + "loss": 0.0, + "num_input_tokens_seen": 108095760, + "step": 160380 + }, + { + "epoch": 3.9182322331615076, + "grad_norm": 0.025573179125785828, + "learning_rate": 2.7188984635896516e-07, + "loss": 0.0, + "num_input_tokens_seen": 108099024, + "step": 160385 + }, + { + "epoch": 3.9183543839933552, + "grad_norm": 0.054493945091962814, + "learning_rate": 2.718313946112286e-07, + "loss": 0.0, + "num_input_tokens_seen": 108102480, + "step": 160390 + }, + { + "epoch": 3.918476534825202, + "grad_norm": 0.003784226719290018, + "learning_rate": 2.717729481589297e-07, + "loss": 0.0, + "num_input_tokens_seen": 108105872, + "step": 160395 + }, + { + "epoch": 3.918598685657049, + "grad_norm": 0.0017452125903218985, + "learning_rate": 2.7171450700249375e-07, + "loss": 0.0, + "num_input_tokens_seen": 108108880, + "step": 160400 + }, + { + "epoch": 3.9187208364888964, + "grad_norm": 0.0016395858256146312, + "learning_rate": 2.7165607114234614e-07, + "loss": 0.0, + "num_input_tokens_seen": 108112336, + "step": 160405 + }, + { + "epoch": 3.9188429873207435, + "grad_norm": 0.0008384010288864374, + "learning_rate": 2.715976405789111e-07, + "loss": 0.0, + "num_input_tokens_seen": 108115408, + "step": 160410 + }, + { + "epoch": 3.9189651381525907, + "grad_norm": 0.0010273084044456482, + "learning_rate": 2.7153921531261436e-07, + "loss": 0.0, + "num_input_tokens_seen": 108118608, + "step": 160415 + }, + { + "epoch": 3.919087288984438, + "grad_norm": 4.537554195849225e-05, + "learning_rate": 2.7148079534388004e-07, + "loss": 0.0, + "num_input_tokens_seen": 108122448, + "step": 160420 + }, + { + "epoch": 3.919209439816285, + "grad_norm": 0.0004955941112712026, + "learning_rate": 2.714223806731335e-07, + "loss": 0.0, + "num_input_tokens_seen": 108126352, + "step": 160425 + }, + { + "epoch": 3.9193315906481323, + "grad_norm": 0.002432050183415413, + "learning_rate": 2.7136397130079926e-07, + "loss": 0.0021, + "num_input_tokens_seen": 108129552, + "step": 160430 + }, + { + "epoch": 3.9194537414799795, + "grad_norm": 0.1448434293270111, + "learning_rate": 2.71305567227302e-07, + "loss": 0.0, + "num_input_tokens_seen": 108133008, + "step": 160435 + }, + { + "epoch": 3.9195758923118267, + "grad_norm": 0.054726745933294296, + "learning_rate": 2.7124716845306717e-07, + "loss": 0.0, + "num_input_tokens_seen": 108136400, + "step": 160440 + }, + { + "epoch": 3.919698043143674, + "grad_norm": 0.02143864706158638, + "learning_rate": 2.7118877497851844e-07, + "loss": 0.0, + "num_input_tokens_seen": 108139984, + "step": 160445 + }, + { + "epoch": 3.919820193975521, + "grad_norm": 0.10433755069971085, + "learning_rate": 2.711303868040814e-07, + "loss": 0.0, + "num_input_tokens_seen": 108143248, + "step": 160450 + }, + { + "epoch": 3.9199423448073683, + "grad_norm": 0.001276486786082387, + "learning_rate": 2.7107200393017994e-07, + "loss": 0.0, + "num_input_tokens_seen": 108146448, + "step": 160455 + }, + { + "epoch": 3.9200644956392154, + "grad_norm": 0.0015387848252430558, + "learning_rate": 2.710136263572391e-07, + "loss": 0.0, + "num_input_tokens_seen": 108149776, + "step": 160460 + }, + { + "epoch": 3.9201866464710626, + "grad_norm": 0.003469153307378292, + "learning_rate": 2.7095525408568297e-07, + "loss": 0.0, + "num_input_tokens_seen": 108152848, + "step": 160465 + }, + { + "epoch": 3.9203087973029094, + "grad_norm": 0.008718705736100674, + "learning_rate": 2.7089688711593674e-07, + "loss": 0.0, + "num_input_tokens_seen": 108155792, + "step": 160470 + }, + { + "epoch": 3.920430948134757, + "grad_norm": 0.002317639999091625, + "learning_rate": 2.7083852544842433e-07, + "loss": 0.0, + "num_input_tokens_seen": 108158736, + "step": 160475 + }, + { + "epoch": 3.9205530989666038, + "grad_norm": 0.004345919005572796, + "learning_rate": 2.7078016908357004e-07, + "loss": 0.0, + "num_input_tokens_seen": 108162000, + "step": 160480 + }, + { + "epoch": 3.9206752497984514, + "grad_norm": 0.08784374594688416, + "learning_rate": 2.707218180217988e-07, + "loss": 0.0, + "num_input_tokens_seen": 108165648, + "step": 160485 + }, + { + "epoch": 3.920797400630298, + "grad_norm": 0.0225373525172472, + "learning_rate": 2.7066347226353435e-07, + "loss": 0.0, + "num_input_tokens_seen": 108169296, + "step": 160490 + }, + { + "epoch": 3.9209195514621453, + "grad_norm": 0.0019363034516572952, + "learning_rate": 2.706051318092013e-07, + "loss": 0.0, + "num_input_tokens_seen": 108172368, + "step": 160495 + }, + { + "epoch": 3.9210417022939925, + "grad_norm": 0.0003040806914214045, + "learning_rate": 2.705467966592242e-07, + "loss": 0.0001, + "num_input_tokens_seen": 108175568, + "step": 160500 + }, + { + "epoch": 3.9211638531258397, + "grad_norm": 6.52025337330997e-05, + "learning_rate": 2.704884668140267e-07, + "loss": 0.0, + "num_input_tokens_seen": 108179088, + "step": 160505 + }, + { + "epoch": 3.921286003957687, + "grad_norm": 0.00024085829500108957, + "learning_rate": 2.704301422740336e-07, + "loss": 0.0, + "num_input_tokens_seen": 108182096, + "step": 160510 + }, + { + "epoch": 3.921408154789534, + "grad_norm": 0.002301350235939026, + "learning_rate": 2.703718230396683e-07, + "loss": 0.0, + "num_input_tokens_seen": 108185744, + "step": 160515 + }, + { + "epoch": 3.9215303056213813, + "grad_norm": 0.0006811128696426749, + "learning_rate": 2.703135091113559e-07, + "loss": 0.0, + "num_input_tokens_seen": 108189328, + "step": 160520 + }, + { + "epoch": 3.9216524564532285, + "grad_norm": 0.0007640509284101427, + "learning_rate": 2.7025520048951944e-07, + "loss": 0.0, + "num_input_tokens_seen": 108192656, + "step": 160525 + }, + { + "epoch": 3.9217746072850757, + "grad_norm": 0.001994823105633259, + "learning_rate": 2.701968971745835e-07, + "loss": 0.0, + "num_input_tokens_seen": 108196240, + "step": 160530 + }, + { + "epoch": 3.921896758116923, + "grad_norm": 0.007130780257284641, + "learning_rate": 2.701385991669722e-07, + "loss": 0.074, + "num_input_tokens_seen": 108199376, + "step": 160535 + }, + { + "epoch": 3.92201890894877, + "grad_norm": 0.005872517358511686, + "learning_rate": 2.7008030646710923e-07, + "loss": 0.0, + "num_input_tokens_seen": 108202384, + "step": 160540 + }, + { + "epoch": 3.9221410597806172, + "grad_norm": 4.299524784088135, + "learning_rate": 2.7002201907541875e-07, + "loss": 0.0007, + "num_input_tokens_seen": 108205712, + "step": 160545 + }, + { + "epoch": 3.9222632106124644, + "grad_norm": 0.0014040902024134994, + "learning_rate": 2.699637369923242e-07, + "loss": 0.0069, + "num_input_tokens_seen": 108209168, + "step": 160550 + }, + { + "epoch": 3.9223853614443116, + "grad_norm": 7.01907993061468e-05, + "learning_rate": 2.699054602182498e-07, + "loss": 0.0, + "num_input_tokens_seen": 108212688, + "step": 160555 + }, + { + "epoch": 3.922507512276159, + "grad_norm": 0.0007399121532216668, + "learning_rate": 2.6984718875361947e-07, + "loss": 0.0, + "num_input_tokens_seen": 108216080, + "step": 160560 + }, + { + "epoch": 3.9226296631080055, + "grad_norm": 0.006925436668097973, + "learning_rate": 2.6978892259885657e-07, + "loss": 0.0321, + "num_input_tokens_seen": 108219472, + "step": 160565 + }, + { + "epoch": 3.922751813939853, + "grad_norm": 0.0036607375368475914, + "learning_rate": 2.697306617543852e-07, + "loss": 0.0, + "num_input_tokens_seen": 108222800, + "step": 160570 + }, + { + "epoch": 3.9228739647717, + "grad_norm": 0.001552755944430828, + "learning_rate": 2.6967240622062895e-07, + "loss": 0.0, + "num_input_tokens_seen": 108225936, + "step": 160575 + }, + { + "epoch": 3.922996115603547, + "grad_norm": 0.0007583214901387691, + "learning_rate": 2.696141559980111e-07, + "loss": 0.0, + "num_input_tokens_seen": 108229392, + "step": 160580 + }, + { + "epoch": 3.9231182664353943, + "grad_norm": 0.008268969133496284, + "learning_rate": 2.6955591108695585e-07, + "loss": 0.0, + "num_input_tokens_seen": 108232784, + "step": 160585 + }, + { + "epoch": 3.9232404172672415, + "grad_norm": 5.6169724302890245e-06, + "learning_rate": 2.6949767148788615e-07, + "loss": 0.0, + "num_input_tokens_seen": 108236496, + "step": 160590 + }, + { + "epoch": 3.9233625680990887, + "grad_norm": 0.0036725711543112993, + "learning_rate": 2.694394372012262e-07, + "loss": 0.0, + "num_input_tokens_seen": 108240400, + "step": 160595 + }, + { + "epoch": 3.923484718930936, + "grad_norm": 0.0005443750414997339, + "learning_rate": 2.6938120822739884e-07, + "loss": 0.0, + "num_input_tokens_seen": 108244240, + "step": 160600 + }, + { + "epoch": 3.923606869762783, + "grad_norm": 0.12630638480186462, + "learning_rate": 2.693229845668281e-07, + "loss": 0.0, + "num_input_tokens_seen": 108248976, + "step": 160605 + }, + { + "epoch": 3.9237290205946302, + "grad_norm": 0.9307985305786133, + "learning_rate": 2.6926476621993697e-07, + "loss": 0.0003, + "num_input_tokens_seen": 108252432, + "step": 160610 + }, + { + "epoch": 3.9238511714264774, + "grad_norm": 0.0034127281978726387, + "learning_rate": 2.6920655318714923e-07, + "loss": 0.0, + "num_input_tokens_seen": 108255568, + "step": 160615 + }, + { + "epoch": 3.9239733222583246, + "grad_norm": 0.025321057066321373, + "learning_rate": 2.6914834546888766e-07, + "loss": 0.0, + "num_input_tokens_seen": 108259280, + "step": 160620 + }, + { + "epoch": 3.924095473090172, + "grad_norm": 0.00016328954370692372, + "learning_rate": 2.69090143065576e-07, + "loss": 0.0, + "num_input_tokens_seen": 108262608, + "step": 160625 + }, + { + "epoch": 3.924217623922019, + "grad_norm": 0.05200456827878952, + "learning_rate": 2.690319459776376e-07, + "loss": 0.0001, + "num_input_tokens_seen": 108266000, + "step": 160630 + }, + { + "epoch": 3.924339774753866, + "grad_norm": 0.008335310034453869, + "learning_rate": 2.689737542054953e-07, + "loss": 0.0, + "num_input_tokens_seen": 108269136, + "step": 160635 + }, + { + "epoch": 3.9244619255857134, + "grad_norm": 0.00032875704346224666, + "learning_rate": 2.689155677495727e-07, + "loss": 0.0, + "num_input_tokens_seen": 108272144, + "step": 160640 + }, + { + "epoch": 3.9245840764175606, + "grad_norm": 0.021563809365034103, + "learning_rate": 2.6885738661029246e-07, + "loss": 0.0, + "num_input_tokens_seen": 108275152, + "step": 160645 + }, + { + "epoch": 3.9247062272494073, + "grad_norm": 0.0003353093343321234, + "learning_rate": 2.687992107880779e-07, + "loss": 0.0, + "num_input_tokens_seen": 108278608, + "step": 160650 + }, + { + "epoch": 3.924828378081255, + "grad_norm": 0.0016764416359364986, + "learning_rate": 2.6874104028335256e-07, + "loss": 0.0, + "num_input_tokens_seen": 108281808, + "step": 160655 + }, + { + "epoch": 3.9249505289131017, + "grad_norm": 0.0027599085588008165, + "learning_rate": 2.686828750965386e-07, + "loss": 0.0553, + "num_input_tokens_seen": 108285136, + "step": 160660 + }, + { + "epoch": 3.9250726797449493, + "grad_norm": 0.0004885609960183501, + "learning_rate": 2.6862471522805995e-07, + "loss": 0.0, + "num_input_tokens_seen": 108288400, + "step": 160665 + }, + { + "epoch": 3.925194830576796, + "grad_norm": 0.0022552688606083393, + "learning_rate": 2.685665606783387e-07, + "loss": 0.0822, + "num_input_tokens_seen": 108291792, + "step": 160670 + }, + { + "epoch": 3.9253169814086433, + "grad_norm": 0.0019291022326797247, + "learning_rate": 2.6850841144779844e-07, + "loss": 0.0, + "num_input_tokens_seen": 108295312, + "step": 160675 + }, + { + "epoch": 3.9254391322404905, + "grad_norm": 0.7367055416107178, + "learning_rate": 2.684502675368617e-07, + "loss": 0.0002, + "num_input_tokens_seen": 108298768, + "step": 160680 + }, + { + "epoch": 3.9255612830723376, + "grad_norm": 0.0002957978576887399, + "learning_rate": 2.683921289459512e-07, + "loss": 0.0, + "num_input_tokens_seen": 108302096, + "step": 160685 + }, + { + "epoch": 3.925683433904185, + "grad_norm": 0.00023816576867830008, + "learning_rate": 2.683339956754902e-07, + "loss": 0.0, + "num_input_tokens_seen": 108305040, + "step": 160690 + }, + { + "epoch": 3.925805584736032, + "grad_norm": 0.001074330066330731, + "learning_rate": 2.6827586772590084e-07, + "loss": 0.0, + "num_input_tokens_seen": 108308624, + "step": 160695 + }, + { + "epoch": 3.925927735567879, + "grad_norm": 0.005222479347139597, + "learning_rate": 2.6821774509760655e-07, + "loss": 0.0, + "num_input_tokens_seen": 108311696, + "step": 160700 + }, + { + "epoch": 3.9260498863997264, + "grad_norm": 0.001443124609068036, + "learning_rate": 2.681596277910293e-07, + "loss": 0.0, + "num_input_tokens_seen": 108315088, + "step": 160705 + }, + { + "epoch": 3.9261720372315736, + "grad_norm": 0.001377643900923431, + "learning_rate": 2.68101515806592e-07, + "loss": 0.0, + "num_input_tokens_seen": 108318416, + "step": 160710 + }, + { + "epoch": 3.926294188063421, + "grad_norm": 0.000944781641010195, + "learning_rate": 2.680434091447177e-07, + "loss": 0.0, + "num_input_tokens_seen": 108321616, + "step": 160715 + }, + { + "epoch": 3.926416338895268, + "grad_norm": 0.0011502342531457543, + "learning_rate": 2.6798530780582826e-07, + "loss": 0.0353, + "num_input_tokens_seen": 108324688, + "step": 160720 + }, + { + "epoch": 3.926538489727115, + "grad_norm": 0.0038087046705186367, + "learning_rate": 2.6792721179034695e-07, + "loss": 0.0, + "num_input_tokens_seen": 108327824, + "step": 160725 + }, + { + "epoch": 3.9266606405589624, + "grad_norm": 0.0036730014253407717, + "learning_rate": 2.678691210986955e-07, + "loss": 0.0, + "num_input_tokens_seen": 108331472, + "step": 160730 + }, + { + "epoch": 3.926782791390809, + "grad_norm": 0.0008776450995355844, + "learning_rate": 2.67811035731297e-07, + "loss": 0.0003, + "num_input_tokens_seen": 108334544, + "step": 160735 + }, + { + "epoch": 3.9269049422226567, + "grad_norm": 21.63974380493164, + "learning_rate": 2.677529556885734e-07, + "loss": 0.074, + "num_input_tokens_seen": 108337616, + "step": 160740 + }, + { + "epoch": 3.9270270930545035, + "grad_norm": 0.003887888975441456, + "learning_rate": 2.6769488097094704e-07, + "loss": 0.0, + "num_input_tokens_seen": 108342096, + "step": 160745 + }, + { + "epoch": 3.927149243886351, + "grad_norm": 0.0010327683994546533, + "learning_rate": 2.67636811578841e-07, + "loss": 0.0, + "num_input_tokens_seen": 108345360, + "step": 160750 + }, + { + "epoch": 3.927271394718198, + "grad_norm": 0.0014810446882620454, + "learning_rate": 2.675787475126766e-07, + "loss": 0.0, + "num_input_tokens_seen": 108348880, + "step": 160755 + }, + { + "epoch": 3.927393545550045, + "grad_norm": 0.002494688145816326, + "learning_rate": 2.675206887728769e-07, + "loss": 0.0, + "num_input_tokens_seen": 108351824, + "step": 160760 + }, + { + "epoch": 3.9275156963818922, + "grad_norm": 0.00014557481335941702, + "learning_rate": 2.6746263535986345e-07, + "loss": 0.0003, + "num_input_tokens_seen": 108355216, + "step": 160765 + }, + { + "epoch": 3.9276378472137394, + "grad_norm": 0.0002793877793010324, + "learning_rate": 2.6740458727405903e-07, + "loss": 0.0, + "num_input_tokens_seen": 108358672, + "step": 160770 + }, + { + "epoch": 3.9277599980455866, + "grad_norm": 0.0017380811041221023, + "learning_rate": 2.6734654451588524e-07, + "loss": 0.0, + "num_input_tokens_seen": 108362064, + "step": 160775 + }, + { + "epoch": 3.927882148877434, + "grad_norm": 0.004759801551699638, + "learning_rate": 2.6728850708576467e-07, + "loss": 0.0, + "num_input_tokens_seen": 108365392, + "step": 160780 + }, + { + "epoch": 3.928004299709281, + "grad_norm": 0.001996064791455865, + "learning_rate": 2.672304749841189e-07, + "loss": 0.0, + "num_input_tokens_seen": 108368656, + "step": 160785 + }, + { + "epoch": 3.928126450541128, + "grad_norm": 0.0019026733934879303, + "learning_rate": 2.671724482113705e-07, + "loss": 0.0, + "num_input_tokens_seen": 108372496, + "step": 160790 + }, + { + "epoch": 3.9282486013729754, + "grad_norm": 0.0004214816144667566, + "learning_rate": 2.6711442676794117e-07, + "loss": 0.0001, + "num_input_tokens_seen": 108375824, + "step": 160795 + }, + { + "epoch": 3.9283707522048226, + "grad_norm": 0.001756381243467331, + "learning_rate": 2.6705641065425255e-07, + "loss": 0.0001, + "num_input_tokens_seen": 108378960, + "step": 160800 + }, + { + "epoch": 3.9284929030366698, + "grad_norm": 0.11266539990901947, + "learning_rate": 2.669983998707268e-07, + "loss": 0.0, + "num_input_tokens_seen": 108381712, + "step": 160805 + }, + { + "epoch": 3.928615053868517, + "grad_norm": 0.0004792583640664816, + "learning_rate": 2.669403944177863e-07, + "loss": 0.0, + "num_input_tokens_seen": 108385040, + "step": 160810 + }, + { + "epoch": 3.928737204700364, + "grad_norm": 0.0034123146906495094, + "learning_rate": 2.668823942958519e-07, + "loss": 0.0, + "num_input_tokens_seen": 108388176, + "step": 160815 + }, + { + "epoch": 3.9288593555322113, + "grad_norm": 0.002358450088649988, + "learning_rate": 2.668243995053464e-07, + "loss": 0.0, + "num_input_tokens_seen": 108391632, + "step": 160820 + }, + { + "epoch": 3.9289815063640585, + "grad_norm": 0.0016407499788329005, + "learning_rate": 2.667664100466906e-07, + "loss": 0.0, + "num_input_tokens_seen": 108395472, + "step": 160825 + }, + { + "epoch": 3.9291036571959053, + "grad_norm": 0.007201909553259611, + "learning_rate": 2.6670842592030706e-07, + "loss": 0.0, + "num_input_tokens_seen": 108398992, + "step": 160830 + }, + { + "epoch": 3.929225808027753, + "grad_norm": 0.00013028294779360294, + "learning_rate": 2.6665044712661687e-07, + "loss": 0.0, + "num_input_tokens_seen": 108402512, + "step": 160835 + }, + { + "epoch": 3.9293479588595996, + "grad_norm": 0.00041084669646807015, + "learning_rate": 2.665924736660418e-07, + "loss": 0.0402, + "num_input_tokens_seen": 108405776, + "step": 160840 + }, + { + "epoch": 3.929470109691447, + "grad_norm": 6.577320891665295e-05, + "learning_rate": 2.6653450553900383e-07, + "loss": 0.0, + "num_input_tokens_seen": 108408976, + "step": 160845 + }, + { + "epoch": 3.929592260523294, + "grad_norm": 0.02033839002251625, + "learning_rate": 2.664765427459239e-07, + "loss": 0.0001, + "num_input_tokens_seen": 108412880, + "step": 160850 + }, + { + "epoch": 3.929714411355141, + "grad_norm": 7.121515955077484e-05, + "learning_rate": 2.6641858528722403e-07, + "loss": 0.0489, + "num_input_tokens_seen": 108416080, + "step": 160855 + }, + { + "epoch": 3.9298365621869884, + "grad_norm": 0.002095576375722885, + "learning_rate": 2.6636063316332535e-07, + "loss": 0.0, + "num_input_tokens_seen": 108419280, + "step": 160860 + }, + { + "epoch": 3.9299587130188356, + "grad_norm": 0.0001592604094184935, + "learning_rate": 2.663026863746495e-07, + "loss": 0.0, + "num_input_tokens_seen": 108422864, + "step": 160865 + }, + { + "epoch": 3.930080863850683, + "grad_norm": 0.0009298757067881525, + "learning_rate": 2.662447449216181e-07, + "loss": 0.0, + "num_input_tokens_seen": 108425936, + "step": 160870 + }, + { + "epoch": 3.93020301468253, + "grad_norm": 0.007275915704667568, + "learning_rate": 2.6618680880465207e-07, + "loss": 0.0882, + "num_input_tokens_seen": 108429456, + "step": 160875 + }, + { + "epoch": 3.930325165514377, + "grad_norm": 17.31068992614746, + "learning_rate": 2.6612887802417307e-07, + "loss": 0.0373, + "num_input_tokens_seen": 108432272, + "step": 160880 + }, + { + "epoch": 3.9304473163462244, + "grad_norm": 0.03703254088759422, + "learning_rate": 2.660709525806024e-07, + "loss": 0.035, + "num_input_tokens_seen": 108435408, + "step": 160885 + }, + { + "epoch": 3.9305694671780715, + "grad_norm": 0.0028196689672768116, + "learning_rate": 2.660130324743608e-07, + "loss": 0.0, + "num_input_tokens_seen": 108438736, + "step": 160890 + }, + { + "epoch": 3.9306916180099187, + "grad_norm": 0.0006376370438374579, + "learning_rate": 2.659551177058701e-07, + "loss": 0.0, + "num_input_tokens_seen": 108442000, + "step": 160895 + }, + { + "epoch": 3.930813768841766, + "grad_norm": 0.0008170445216819644, + "learning_rate": 2.6589720827555094e-07, + "loss": 0.0, + "num_input_tokens_seen": 108445264, + "step": 160900 + }, + { + "epoch": 3.930935919673613, + "grad_norm": 0.3517223298549652, + "learning_rate": 2.6583930418382507e-07, + "loss": 0.0729, + "num_input_tokens_seen": 108448656, + "step": 160905 + }, + { + "epoch": 3.9310580705054603, + "grad_norm": 0.0012302043614909053, + "learning_rate": 2.6578140543111293e-07, + "loss": 0.0, + "num_input_tokens_seen": 108452304, + "step": 160910 + }, + { + "epoch": 3.931180221337307, + "grad_norm": 0.002731229877099395, + "learning_rate": 2.6572351201783625e-07, + "loss": 0.0, + "num_input_tokens_seen": 108455632, + "step": 160915 + }, + { + "epoch": 3.9313023721691547, + "grad_norm": 2.0680634406744502e-05, + "learning_rate": 2.656656239444153e-07, + "loss": 0.0, + "num_input_tokens_seen": 108459216, + "step": 160920 + }, + { + "epoch": 3.9314245230010014, + "grad_norm": 0.0033918926492333412, + "learning_rate": 2.6560774121127185e-07, + "loss": 0.0869, + "num_input_tokens_seen": 108462352, + "step": 160925 + }, + { + "epoch": 3.931546673832849, + "grad_norm": 0.0004112799360882491, + "learning_rate": 2.6554986381882603e-07, + "loss": 0.0, + "num_input_tokens_seen": 108466000, + "step": 160930 + }, + { + "epoch": 3.931668824664696, + "grad_norm": 0.0012325807474553585, + "learning_rate": 2.6549199176749915e-07, + "loss": 0.0, + "num_input_tokens_seen": 108469264, + "step": 160935 + }, + { + "epoch": 3.931790975496543, + "grad_norm": 0.0005546318716369569, + "learning_rate": 2.654341250577125e-07, + "loss": 0.0, + "num_input_tokens_seen": 108472528, + "step": 160940 + }, + { + "epoch": 3.93191312632839, + "grad_norm": 0.0006047788774594665, + "learning_rate": 2.6537626368988595e-07, + "loss": 0.0, + "num_input_tokens_seen": 108476048, + "step": 160945 + }, + { + "epoch": 3.9320352771602374, + "grad_norm": 0.02619839832186699, + "learning_rate": 2.6531840766444127e-07, + "loss": 0.0, + "num_input_tokens_seen": 108479312, + "step": 160950 + }, + { + "epoch": 3.9321574279920846, + "grad_norm": 2.9269987862790003e-05, + "learning_rate": 2.6526055698179826e-07, + "loss": 0.0, + "num_input_tokens_seen": 108482448, + "step": 160955 + }, + { + "epoch": 3.9322795788239318, + "grad_norm": 0.0012526396894827485, + "learning_rate": 2.652027116423783e-07, + "loss": 0.0, + "num_input_tokens_seen": 108486288, + "step": 160960 + }, + { + "epoch": 3.932401729655779, + "grad_norm": 0.003693891456350684, + "learning_rate": 2.65144871646602e-07, + "loss": 0.0, + "num_input_tokens_seen": 108489936, + "step": 160965 + }, + { + "epoch": 3.932523880487626, + "grad_norm": 0.001101264264434576, + "learning_rate": 2.6508703699488964e-07, + "loss": 0.0, + "num_input_tokens_seen": 108493456, + "step": 160970 + }, + { + "epoch": 3.9326460313194733, + "grad_norm": 0.001372728613205254, + "learning_rate": 2.6502920768766234e-07, + "loss": 0.0, + "num_input_tokens_seen": 108496784, + "step": 160975 + }, + { + "epoch": 3.9327681821513205, + "grad_norm": 0.0008805957622826099, + "learning_rate": 2.6497138372534e-07, + "loss": 0.0, + "num_input_tokens_seen": 108500304, + "step": 160980 + }, + { + "epoch": 3.9328903329831677, + "grad_norm": 0.001661680988036096, + "learning_rate": 2.6491356510834374e-07, + "loss": 0.0, + "num_input_tokens_seen": 108503632, + "step": 160985 + }, + { + "epoch": 3.933012483815015, + "grad_norm": 0.003996791783720255, + "learning_rate": 2.6485575183709375e-07, + "loss": 0.0, + "num_input_tokens_seen": 108506832, + "step": 160990 + }, + { + "epoch": 3.933134634646862, + "grad_norm": 0.0058486550115048885, + "learning_rate": 2.6479794391201005e-07, + "loss": 0.0, + "num_input_tokens_seen": 108510352, + "step": 160995 + }, + { + "epoch": 3.9332567854787093, + "grad_norm": 0.00618613138794899, + "learning_rate": 2.6474014133351383e-07, + "loss": 0.0446, + "num_input_tokens_seen": 108514128, + "step": 161000 + }, + { + "epoch": 3.9333789363105565, + "grad_norm": 0.03912936523556709, + "learning_rate": 2.6468234410202484e-07, + "loss": 0.0001, + "num_input_tokens_seen": 108517328, + "step": 161005 + }, + { + "epoch": 3.933501087142403, + "grad_norm": 0.0005721602938137949, + "learning_rate": 2.6462455221796386e-07, + "loss": 0.1235, + "num_input_tokens_seen": 108520848, + "step": 161010 + }, + { + "epoch": 3.933623237974251, + "grad_norm": 0.0008427056600339711, + "learning_rate": 2.645667656817506e-07, + "loss": 0.0, + "num_input_tokens_seen": 108523856, + "step": 161015 + }, + { + "epoch": 3.9337453888060976, + "grad_norm": 0.0006119231111370027, + "learning_rate": 2.6450898449380575e-07, + "loss": 0.0, + "num_input_tokens_seen": 108527184, + "step": 161020 + }, + { + "epoch": 3.9338675396379448, + "grad_norm": 0.005697476677596569, + "learning_rate": 2.6445120865454964e-07, + "loss": 0.0, + "num_input_tokens_seen": 108530576, + "step": 161025 + }, + { + "epoch": 3.933989690469792, + "grad_norm": 0.001607921440154314, + "learning_rate": 2.643934381644017e-07, + "loss": 0.0, + "num_input_tokens_seen": 108534672, + "step": 161030 + }, + { + "epoch": 3.934111841301639, + "grad_norm": 28.863571166992188, + "learning_rate": 2.64335673023783e-07, + "loss": 0.0524, + "num_input_tokens_seen": 108538384, + "step": 161035 + }, + { + "epoch": 3.9342339921334863, + "grad_norm": 0.0019764634780585766, + "learning_rate": 2.6427791323311287e-07, + "loss": 0.0, + "num_input_tokens_seen": 108542288, + "step": 161040 + }, + { + "epoch": 3.9343561429653335, + "grad_norm": 0.009618827141821384, + "learning_rate": 2.642201587928119e-07, + "loss": 0.0, + "num_input_tokens_seen": 108546128, + "step": 161045 + }, + { + "epoch": 3.9344782937971807, + "grad_norm": 0.0010088874259963632, + "learning_rate": 2.641624097032995e-07, + "loss": 0.0, + "num_input_tokens_seen": 108549520, + "step": 161050 + }, + { + "epoch": 3.934600444629028, + "grad_norm": 0.0008176874252967536, + "learning_rate": 2.64104665964996e-07, + "loss": 0.0, + "num_input_tokens_seen": 108553104, + "step": 161055 + }, + { + "epoch": 3.934722595460875, + "grad_norm": 0.00025323365116491914, + "learning_rate": 2.640469275783217e-07, + "loss": 0.0, + "num_input_tokens_seen": 108556304, + "step": 161060 + }, + { + "epoch": 3.9348447462927223, + "grad_norm": 0.005040105897933245, + "learning_rate": 2.6398919454369564e-07, + "loss": 0.0, + "num_input_tokens_seen": 108560016, + "step": 161065 + }, + { + "epoch": 3.9349668971245695, + "grad_norm": 0.0007410483667626977, + "learning_rate": 2.639314668615384e-07, + "loss": 0.0, + "num_input_tokens_seen": 108562960, + "step": 161070 + }, + { + "epoch": 3.9350890479564167, + "grad_norm": 0.015878252685070038, + "learning_rate": 2.638737445322694e-07, + "loss": 0.0, + "num_input_tokens_seen": 108566480, + "step": 161075 + }, + { + "epoch": 3.935211198788264, + "grad_norm": 0.014976591803133488, + "learning_rate": 2.638160275563087e-07, + "loss": 0.0, + "num_input_tokens_seen": 108569232, + "step": 161080 + }, + { + "epoch": 3.935333349620111, + "grad_norm": 0.007671588100492954, + "learning_rate": 2.637583159340756e-07, + "loss": 0.0479, + "num_input_tokens_seen": 108572368, + "step": 161085 + }, + { + "epoch": 3.9354555004519582, + "grad_norm": 0.03282986581325531, + "learning_rate": 2.637006096659903e-07, + "loss": 0.0, + "num_input_tokens_seen": 108575248, + "step": 161090 + }, + { + "epoch": 3.935577651283805, + "grad_norm": 0.006898272316902876, + "learning_rate": 2.6364290875247195e-07, + "loss": 0.0, + "num_input_tokens_seen": 108578640, + "step": 161095 + }, + { + "epoch": 3.9356998021156526, + "grad_norm": 0.003733513643965125, + "learning_rate": 2.635852131939407e-07, + "loss": 0.0, + "num_input_tokens_seen": 108582160, + "step": 161100 + }, + { + "epoch": 3.9358219529474994, + "grad_norm": 0.00028158118948340416, + "learning_rate": 2.635275229908158e-07, + "loss": 0.0, + "num_input_tokens_seen": 108585424, + "step": 161105 + }, + { + "epoch": 3.935944103779347, + "grad_norm": 0.002537839813157916, + "learning_rate": 2.6346983814351667e-07, + "loss": 0.0001, + "num_input_tokens_seen": 108588432, + "step": 161110 + }, + { + "epoch": 3.9360662546111937, + "grad_norm": 0.000527587253600359, + "learning_rate": 2.634121586524629e-07, + "loss": 0.0, + "num_input_tokens_seen": 108591952, + "step": 161115 + }, + { + "epoch": 3.936188405443041, + "grad_norm": 0.015813853591680527, + "learning_rate": 2.633544845180743e-07, + "loss": 0.0, + "num_input_tokens_seen": 108595408, + "step": 161120 + }, + { + "epoch": 3.936310556274888, + "grad_norm": 0.0024629030376672745, + "learning_rate": 2.632968157407698e-07, + "loss": 0.0, + "num_input_tokens_seen": 108598608, + "step": 161125 + }, + { + "epoch": 3.9364327071067353, + "grad_norm": 67.15186309814453, + "learning_rate": 2.632391523209693e-07, + "loss": 0.0778, + "num_input_tokens_seen": 108601808, + "step": 161130 + }, + { + "epoch": 3.9365548579385825, + "grad_norm": 0.0006878372514620423, + "learning_rate": 2.631814942590914e-07, + "loss": 0.0, + "num_input_tokens_seen": 108604880, + "step": 161135 + }, + { + "epoch": 3.9366770087704297, + "grad_norm": 0.007203788496553898, + "learning_rate": 2.631238415555563e-07, + "loss": 0.0, + "num_input_tokens_seen": 108608016, + "step": 161140 + }, + { + "epoch": 3.936799159602277, + "grad_norm": 0.00016854728164616972, + "learning_rate": 2.6306619421078245e-07, + "loss": 0.1047, + "num_input_tokens_seen": 108611920, + "step": 161145 + }, + { + "epoch": 3.936921310434124, + "grad_norm": 0.0006344046560116112, + "learning_rate": 2.630085522251896e-07, + "loss": 0.0, + "num_input_tokens_seen": 108614736, + "step": 161150 + }, + { + "epoch": 3.9370434612659713, + "grad_norm": 0.007249165792018175, + "learning_rate": 2.629509155991969e-07, + "loss": 0.0, + "num_input_tokens_seen": 108617936, + "step": 161155 + }, + { + "epoch": 3.9371656120978185, + "grad_norm": 0.0009594596922397614, + "learning_rate": 2.6289328433322323e-07, + "loss": 0.0, + "num_input_tokens_seen": 108621072, + "step": 161160 + }, + { + "epoch": 3.9372877629296656, + "grad_norm": 0.0017339460318908095, + "learning_rate": 2.6283565842768807e-07, + "loss": 0.0, + "num_input_tokens_seen": 108624336, + "step": 161165 + }, + { + "epoch": 3.937409913761513, + "grad_norm": 0.000566888484172523, + "learning_rate": 2.627780378830099e-07, + "loss": 0.0, + "num_input_tokens_seen": 108627792, + "step": 161170 + }, + { + "epoch": 3.93753206459336, + "grad_norm": 0.0023493324406445026, + "learning_rate": 2.6272042269960856e-07, + "loss": 0.0001, + "num_input_tokens_seen": 108631120, + "step": 161175 + }, + { + "epoch": 3.9376542154252068, + "grad_norm": 0.0009030869114212692, + "learning_rate": 2.6266281287790225e-07, + "loss": 0.0002, + "num_input_tokens_seen": 108634384, + "step": 161180 + }, + { + "epoch": 3.9377763662570544, + "grad_norm": 0.0036799830850213766, + "learning_rate": 2.6260520841831037e-07, + "loss": 0.0631, + "num_input_tokens_seen": 108637584, + "step": 161185 + }, + { + "epoch": 3.937898517088901, + "grad_norm": 0.004490790888667107, + "learning_rate": 2.6254760932125184e-07, + "loss": 0.0, + "num_input_tokens_seen": 108641168, + "step": 161190 + }, + { + "epoch": 3.938020667920749, + "grad_norm": 0.0012281035305932164, + "learning_rate": 2.624900155871457e-07, + "loss": 0.0001, + "num_input_tokens_seen": 108644560, + "step": 161195 + }, + { + "epoch": 3.9381428187525955, + "grad_norm": 0.009919474832713604, + "learning_rate": 2.624324272164101e-07, + "loss": 0.0001, + "num_input_tokens_seen": 108648912, + "step": 161200 + }, + { + "epoch": 3.9382649695844427, + "grad_norm": 0.013267102651298046, + "learning_rate": 2.6237484420946456e-07, + "loss": 0.1652, + "num_input_tokens_seen": 108652240, + "step": 161205 + }, + { + "epoch": 3.93838712041629, + "grad_norm": 0.001139350119046867, + "learning_rate": 2.6231726656672726e-07, + "loss": 0.0, + "num_input_tokens_seen": 108655376, + "step": 161210 + }, + { + "epoch": 3.938509271248137, + "grad_norm": 0.023086732253432274, + "learning_rate": 2.622596942886175e-07, + "loss": 0.0, + "num_input_tokens_seen": 108658704, + "step": 161215 + }, + { + "epoch": 3.9386314220799843, + "grad_norm": 0.0004796923603862524, + "learning_rate": 2.622021273755535e-07, + "loss": 0.0, + "num_input_tokens_seen": 108661776, + "step": 161220 + }, + { + "epoch": 3.9387535729118315, + "grad_norm": 0.0027204875368624926, + "learning_rate": 2.621445658279542e-07, + "loss": 0.0, + "num_input_tokens_seen": 108665040, + "step": 161225 + }, + { + "epoch": 3.9388757237436787, + "grad_norm": 0.010596921667456627, + "learning_rate": 2.6208700964623785e-07, + "loss": 0.0001, + "num_input_tokens_seen": 108668240, + "step": 161230 + }, + { + "epoch": 3.938997874575526, + "grad_norm": 0.022362949326634407, + "learning_rate": 2.620294588308235e-07, + "loss": 0.0, + "num_input_tokens_seen": 108671376, + "step": 161235 + }, + { + "epoch": 3.939120025407373, + "grad_norm": 0.04769204929471016, + "learning_rate": 2.619719133821292e-07, + "loss": 0.0, + "num_input_tokens_seen": 108674576, + "step": 161240 + }, + { + "epoch": 3.9392421762392202, + "grad_norm": 0.001487054629251361, + "learning_rate": 2.6191437330057364e-07, + "loss": 0.0, + "num_input_tokens_seen": 108678160, + "step": 161245 + }, + { + "epoch": 3.9393643270710674, + "grad_norm": 0.003391053294762969, + "learning_rate": 2.6185683858657546e-07, + "loss": 0.0, + "num_input_tokens_seen": 108681680, + "step": 161250 + }, + { + "epoch": 3.9394864779029146, + "grad_norm": 0.6954625248908997, + "learning_rate": 2.617993092405527e-07, + "loss": 0.0001, + "num_input_tokens_seen": 108685008, + "step": 161255 + }, + { + "epoch": 3.939608628734762, + "grad_norm": 0.0019832707475870848, + "learning_rate": 2.6174178526292424e-07, + "loss": 0.0, + "num_input_tokens_seen": 108688528, + "step": 161260 + }, + { + "epoch": 3.939730779566609, + "grad_norm": 0.06212802603840828, + "learning_rate": 2.616842666541077e-07, + "loss": 0.0001, + "num_input_tokens_seen": 108691984, + "step": 161265 + }, + { + "epoch": 3.939852930398456, + "grad_norm": 0.004584174137562513, + "learning_rate": 2.616267534145218e-07, + "loss": 0.0648, + "num_input_tokens_seen": 108695312, + "step": 161270 + }, + { + "epoch": 3.939975081230303, + "grad_norm": 0.006212098989635706, + "learning_rate": 2.6156924554458506e-07, + "loss": 0.0, + "num_input_tokens_seen": 108698576, + "step": 161275 + }, + { + "epoch": 3.9400972320621506, + "grad_norm": 0.000308558956021443, + "learning_rate": 2.61511743044715e-07, + "loss": 0.0, + "num_input_tokens_seen": 108701584, + "step": 161280 + }, + { + "epoch": 3.9402193828939973, + "grad_norm": 0.00427340529859066, + "learning_rate": 2.614542459153306e-07, + "loss": 0.0, + "num_input_tokens_seen": 108705040, + "step": 161285 + }, + { + "epoch": 3.940341533725845, + "grad_norm": 0.0022537033073604107, + "learning_rate": 2.6139675415684914e-07, + "loss": 0.0, + "num_input_tokens_seen": 108708176, + "step": 161290 + }, + { + "epoch": 3.9404636845576917, + "grad_norm": 0.007408153731375933, + "learning_rate": 2.613392677696895e-07, + "loss": 0.0002, + "num_input_tokens_seen": 108711376, + "step": 161295 + }, + { + "epoch": 3.940585835389539, + "grad_norm": 0.036171622574329376, + "learning_rate": 2.612817867542694e-07, + "loss": 0.0001, + "num_input_tokens_seen": 108714512, + "step": 161300 + }, + { + "epoch": 3.940707986221386, + "grad_norm": 0.0014052970800548792, + "learning_rate": 2.612243111110065e-07, + "loss": 0.0, + "num_input_tokens_seen": 108717584, + "step": 161305 + }, + { + "epoch": 3.9408301370532333, + "grad_norm": 0.0031262929551303387, + "learning_rate": 2.611668408403195e-07, + "loss": 0.0001, + "num_input_tokens_seen": 108720976, + "step": 161310 + }, + { + "epoch": 3.9409522878850805, + "grad_norm": 0.0026391837745904922, + "learning_rate": 2.611093759426256e-07, + "loss": 0.0, + "num_input_tokens_seen": 108724304, + "step": 161315 + }, + { + "epoch": 3.9410744387169276, + "grad_norm": 33.50833511352539, + "learning_rate": 2.6105191641834337e-07, + "loss": 0.0456, + "num_input_tokens_seen": 108727504, + "step": 161320 + }, + { + "epoch": 3.941196589548775, + "grad_norm": 0.007016018498688936, + "learning_rate": 2.6099446226789e-07, + "loss": 0.0, + "num_input_tokens_seen": 108730576, + "step": 161325 + }, + { + "epoch": 3.941318740380622, + "grad_norm": 0.03891550377011299, + "learning_rate": 2.6093701349168396e-07, + "loss": 0.0, + "num_input_tokens_seen": 108734096, + "step": 161330 + }, + { + "epoch": 3.941440891212469, + "grad_norm": 20.601884841918945, + "learning_rate": 2.608795700901425e-07, + "loss": 0.0457, + "num_input_tokens_seen": 108737552, + "step": 161335 + }, + { + "epoch": 3.9415630420443164, + "grad_norm": 0.001628112862817943, + "learning_rate": 2.608221320636836e-07, + "loss": 0.0, + "num_input_tokens_seen": 108740816, + "step": 161340 + }, + { + "epoch": 3.9416851928761636, + "grad_norm": 0.0007106566918082535, + "learning_rate": 2.607646994127253e-07, + "loss": 0.0003, + "num_input_tokens_seen": 108744208, + "step": 161345 + }, + { + "epoch": 3.941807343708011, + "grad_norm": 19.420751571655273, + "learning_rate": 2.6070727213768464e-07, + "loss": 0.0418, + "num_input_tokens_seen": 108747408, + "step": 161350 + }, + { + "epoch": 3.941929494539858, + "grad_norm": 0.2629610002040863, + "learning_rate": 2.606498502389798e-07, + "loss": 0.0006, + "num_input_tokens_seen": 108750608, + "step": 161355 + }, + { + "epoch": 3.9420516453717047, + "grad_norm": 0.0015320018865168095, + "learning_rate": 2.6059243371702775e-07, + "loss": 0.0001, + "num_input_tokens_seen": 108753808, + "step": 161360 + }, + { + "epoch": 3.9421737962035523, + "grad_norm": 0.002873330609872937, + "learning_rate": 2.605350225722465e-07, + "loss": 0.0, + "num_input_tokens_seen": 108756752, + "step": 161365 + }, + { + "epoch": 3.942295947035399, + "grad_norm": 0.007898210547864437, + "learning_rate": 2.6047761680505367e-07, + "loss": 0.0003, + "num_input_tokens_seen": 108759632, + "step": 161370 + }, + { + "epoch": 3.9424180978672467, + "grad_norm": 0.0009628442348912358, + "learning_rate": 2.604202164158663e-07, + "loss": 0.0, + "num_input_tokens_seen": 108762960, + "step": 161375 + }, + { + "epoch": 3.9425402486990935, + "grad_norm": 0.28202518820762634, + "learning_rate": 2.6036282140510224e-07, + "loss": 0.0001, + "num_input_tokens_seen": 108766352, + "step": 161380 + }, + { + "epoch": 3.9426623995309407, + "grad_norm": 0.0036664672661572695, + "learning_rate": 2.6030543177317853e-07, + "loss": 0.0652, + "num_input_tokens_seen": 108769872, + "step": 161385 + }, + { + "epoch": 3.942784550362788, + "grad_norm": 0.005653384141623974, + "learning_rate": 2.602480475205129e-07, + "loss": 0.0489, + "num_input_tokens_seen": 108773200, + "step": 161390 + }, + { + "epoch": 3.942906701194635, + "grad_norm": 0.001647413824684918, + "learning_rate": 2.6019066864752206e-07, + "loss": 0.0006, + "num_input_tokens_seen": 108777040, + "step": 161395 + }, + { + "epoch": 3.9430288520264822, + "grad_norm": 0.3349778950214386, + "learning_rate": 2.60133295154624e-07, + "loss": 0.0001, + "num_input_tokens_seen": 108780368, + "step": 161400 + }, + { + "epoch": 3.9431510028583294, + "grad_norm": 0.6160761713981628, + "learning_rate": 2.600759270422355e-07, + "loss": 0.0001, + "num_input_tokens_seen": 108783440, + "step": 161405 + }, + { + "epoch": 3.9432731536901766, + "grad_norm": 0.0059109157882630825, + "learning_rate": 2.6001856431077395e-07, + "loss": 0.0, + "num_input_tokens_seen": 108786512, + "step": 161410 + }, + { + "epoch": 3.943395304522024, + "grad_norm": 0.0066981189884245396, + "learning_rate": 2.599612069606565e-07, + "loss": 0.0, + "num_input_tokens_seen": 108790096, + "step": 161415 + }, + { + "epoch": 3.943517455353871, + "grad_norm": 0.011463329195976257, + "learning_rate": 2.5990385499229994e-07, + "loss": 0.011, + "num_input_tokens_seen": 108793360, + "step": 161420 + }, + { + "epoch": 3.943639606185718, + "grad_norm": 0.0017142114229500294, + "learning_rate": 2.5984650840612157e-07, + "loss": 0.0, + "num_input_tokens_seen": 108796688, + "step": 161425 + }, + { + "epoch": 3.9437617570175654, + "grad_norm": 0.055649321526288986, + "learning_rate": 2.5978916720253873e-07, + "loss": 0.0014, + "num_input_tokens_seen": 108799760, + "step": 161430 + }, + { + "epoch": 3.9438839078494126, + "grad_norm": 0.012390898540616035, + "learning_rate": 2.5973183138196785e-07, + "loss": 0.0, + "num_input_tokens_seen": 108803344, + "step": 161435 + }, + { + "epoch": 3.9440060586812598, + "grad_norm": 0.0008434390183538198, + "learning_rate": 2.5967450094482657e-07, + "loss": 0.0003, + "num_input_tokens_seen": 108806480, + "step": 161440 + }, + { + "epoch": 3.944128209513107, + "grad_norm": 0.0012261479860171676, + "learning_rate": 2.596171758915312e-07, + "loss": 0.0, + "num_input_tokens_seen": 108810320, + "step": 161445 + }, + { + "epoch": 3.944250360344954, + "grad_norm": 0.0011466493597254157, + "learning_rate": 2.595598562224991e-07, + "loss": 0.0, + "num_input_tokens_seen": 108813712, + "step": 161450 + }, + { + "epoch": 3.944372511176801, + "grad_norm": 0.007299667224287987, + "learning_rate": 2.5950254193814655e-07, + "loss": 0.0001, + "num_input_tokens_seen": 108816656, + "step": 161455 + }, + { + "epoch": 3.9444946620086485, + "grad_norm": 0.005376824643462896, + "learning_rate": 2.5944523303889065e-07, + "loss": 0.0001, + "num_input_tokens_seen": 108820304, + "step": 161460 + }, + { + "epoch": 3.9446168128404953, + "grad_norm": 0.0009492832468822598, + "learning_rate": 2.593879295251485e-07, + "loss": 0.0, + "num_input_tokens_seen": 108823440, + "step": 161465 + }, + { + "epoch": 3.9447389636723424, + "grad_norm": 0.0012723127147182822, + "learning_rate": 2.5933063139733637e-07, + "loss": 0.0, + "num_input_tokens_seen": 108826704, + "step": 161470 + }, + { + "epoch": 3.9448611145041896, + "grad_norm": 0.0002114681847160682, + "learning_rate": 2.592733386558713e-07, + "loss": 0.0, + "num_input_tokens_seen": 108830864, + "step": 161475 + }, + { + "epoch": 3.944983265336037, + "grad_norm": 0.0071832574903965, + "learning_rate": 2.5921605130116954e-07, + "loss": 0.0, + "num_input_tokens_seen": 108834320, + "step": 161480 + }, + { + "epoch": 3.945105416167884, + "grad_norm": 0.00037165748653933406, + "learning_rate": 2.591587693336481e-07, + "loss": 0.0, + "num_input_tokens_seen": 108837712, + "step": 161485 + }, + { + "epoch": 3.945227566999731, + "grad_norm": 0.014342446811497211, + "learning_rate": 2.5910149275372305e-07, + "loss": 0.0, + "num_input_tokens_seen": 108840976, + "step": 161490 + }, + { + "epoch": 3.9453497178315784, + "grad_norm": 0.03518125042319298, + "learning_rate": 2.5904422156181126e-07, + "loss": 0.0, + "num_input_tokens_seen": 108844176, + "step": 161495 + }, + { + "epoch": 3.9454718686634256, + "grad_norm": 0.006159580312669277, + "learning_rate": 2.589869557583294e-07, + "loss": 0.0, + "num_input_tokens_seen": 108847504, + "step": 161500 + }, + { + "epoch": 3.9455940194952728, + "grad_norm": 0.0010740587022155523, + "learning_rate": 2.589296953436938e-07, + "loss": 0.0, + "num_input_tokens_seen": 108850576, + "step": 161505 + }, + { + "epoch": 3.94571617032712, + "grad_norm": 0.0005017686635255814, + "learning_rate": 2.5887244031832043e-07, + "loss": 0.0, + "num_input_tokens_seen": 108854032, + "step": 161510 + }, + { + "epoch": 3.945838321158967, + "grad_norm": 0.0033837794326245785, + "learning_rate": 2.5881519068262635e-07, + "loss": 0.0, + "num_input_tokens_seen": 108857104, + "step": 161515 + }, + { + "epoch": 3.9459604719908143, + "grad_norm": 0.03228937089443207, + "learning_rate": 2.587579464370273e-07, + "loss": 0.0001, + "num_input_tokens_seen": 108860560, + "step": 161520 + }, + { + "epoch": 3.9460826228226615, + "grad_norm": 0.002921751234680414, + "learning_rate": 2.587007075819401e-07, + "loss": 0.0, + "num_input_tokens_seen": 108864080, + "step": 161525 + }, + { + "epoch": 3.9462047736545087, + "grad_norm": 0.047707222402095795, + "learning_rate": 2.586434741177804e-07, + "loss": 0.0, + "num_input_tokens_seen": 108867344, + "step": 161530 + }, + { + "epoch": 3.946326924486356, + "grad_norm": 0.008138305507600307, + "learning_rate": 2.5858624604496504e-07, + "loss": 0.0, + "num_input_tokens_seen": 108870544, + "step": 161535 + }, + { + "epoch": 3.9464490753182027, + "grad_norm": 0.3126929998397827, + "learning_rate": 2.585290233639097e-07, + "loss": 0.0001, + "num_input_tokens_seen": 108873936, + "step": 161540 + }, + { + "epoch": 3.9465712261500503, + "grad_norm": 0.001759407576173544, + "learning_rate": 2.584718060750309e-07, + "loss": 0.0, + "num_input_tokens_seen": 108876944, + "step": 161545 + }, + { + "epoch": 3.946693376981897, + "grad_norm": 0.0005770392599515617, + "learning_rate": 2.584145941787444e-07, + "loss": 0.0153, + "num_input_tokens_seen": 108880144, + "step": 161550 + }, + { + "epoch": 3.9468155278137447, + "grad_norm": 0.0005854017799720168, + "learning_rate": 2.5835738767546647e-07, + "loss": 0.0, + "num_input_tokens_seen": 108883664, + "step": 161555 + }, + { + "epoch": 3.9469376786455914, + "grad_norm": 0.0027297306805849075, + "learning_rate": 2.5830018656561325e-07, + "loss": 0.0, + "num_input_tokens_seen": 108886992, + "step": 161560 + }, + { + "epoch": 3.9470598294774386, + "grad_norm": 0.00015110817912500352, + "learning_rate": 2.582429908496003e-07, + "loss": 0.0, + "num_input_tokens_seen": 108891024, + "step": 161565 + }, + { + "epoch": 3.947181980309286, + "grad_norm": 0.0010867511155083776, + "learning_rate": 2.581858005278442e-07, + "loss": 0.0, + "num_input_tokens_seen": 108894736, + "step": 161570 + }, + { + "epoch": 3.947304131141133, + "grad_norm": 0.02226412296295166, + "learning_rate": 2.581286156007602e-07, + "loss": 0.0, + "num_input_tokens_seen": 108898192, + "step": 161575 + }, + { + "epoch": 3.94742628197298, + "grad_norm": 6.201533687999472e-05, + "learning_rate": 2.5807143606876436e-07, + "loss": 0.0, + "num_input_tokens_seen": 108902032, + "step": 161580 + }, + { + "epoch": 3.9475484328048274, + "grad_norm": 0.0005853096372447908, + "learning_rate": 2.5801426193227296e-07, + "loss": 0.0002, + "num_input_tokens_seen": 108905552, + "step": 161585 + }, + { + "epoch": 3.9476705836366746, + "grad_norm": 0.054668866097927094, + "learning_rate": 2.5795709319170114e-07, + "loss": 0.0, + "num_input_tokens_seen": 108908560, + "step": 161590 + }, + { + "epoch": 3.9477927344685217, + "grad_norm": 0.008940774016082287, + "learning_rate": 2.578999298474651e-07, + "loss": 0.0875, + "num_input_tokens_seen": 108911440, + "step": 161595 + }, + { + "epoch": 3.947914885300369, + "grad_norm": 0.002913819393143058, + "learning_rate": 2.5784277189998016e-07, + "loss": 0.0001, + "num_input_tokens_seen": 108914960, + "step": 161600 + }, + { + "epoch": 3.948037036132216, + "grad_norm": 11.425198554992676, + "learning_rate": 2.577856193496625e-07, + "loss": 0.0019, + "num_input_tokens_seen": 108918288, + "step": 161605 + }, + { + "epoch": 3.9481591869640633, + "grad_norm": 0.004938581492751837, + "learning_rate": 2.577284721969274e-07, + "loss": 0.0, + "num_input_tokens_seen": 108921808, + "step": 161610 + }, + { + "epoch": 3.9482813377959105, + "grad_norm": 0.46004217863082886, + "learning_rate": 2.576713304421902e-07, + "loss": 0.0001, + "num_input_tokens_seen": 108924752, + "step": 161615 + }, + { + "epoch": 3.9484034886277577, + "grad_norm": 0.008259247988462448, + "learning_rate": 2.57614194085867e-07, + "loss": 0.0001, + "num_input_tokens_seen": 108928208, + "step": 161620 + }, + { + "epoch": 3.948525639459605, + "grad_norm": 0.00015825018635950983, + "learning_rate": 2.575570631283729e-07, + "loss": 0.0, + "num_input_tokens_seen": 108931472, + "step": 161625 + }, + { + "epoch": 3.948647790291452, + "grad_norm": 0.02835937775671482, + "learning_rate": 2.574999375701238e-07, + "loss": 0.0546, + "num_input_tokens_seen": 108934736, + "step": 161630 + }, + { + "epoch": 3.948769941123299, + "grad_norm": 45.61603546142578, + "learning_rate": 2.574428174115345e-07, + "loss": 0.0978, + "num_input_tokens_seen": 108938128, + "step": 161635 + }, + { + "epoch": 3.9488920919551465, + "grad_norm": 0.026648221537470818, + "learning_rate": 2.573857026530211e-07, + "loss": 0.0001, + "num_input_tokens_seen": 108941712, + "step": 161640 + }, + { + "epoch": 3.949014242786993, + "grad_norm": 0.0013561674859374762, + "learning_rate": 2.5732859329499825e-07, + "loss": 0.0, + "num_input_tokens_seen": 108945808, + "step": 161645 + }, + { + "epoch": 3.9491363936188404, + "grad_norm": 0.003145823022350669, + "learning_rate": 2.572714893378817e-07, + "loss": 0.0, + "num_input_tokens_seen": 108950160, + "step": 161650 + }, + { + "epoch": 3.9492585444506876, + "grad_norm": 24.384469985961914, + "learning_rate": 2.5721439078208686e-07, + "loss": 0.0288, + "num_input_tokens_seen": 108953744, + "step": 161655 + }, + { + "epoch": 3.9493806952825348, + "grad_norm": 223.1640167236328, + "learning_rate": 2.571572976280285e-07, + "loss": 0.075, + "num_input_tokens_seen": 108956752, + "step": 161660 + }, + { + "epoch": 3.949502846114382, + "grad_norm": 0.299441397190094, + "learning_rate": 2.5710020987612234e-07, + "loss": 0.0466, + "num_input_tokens_seen": 108959760, + "step": 161665 + }, + { + "epoch": 3.949624996946229, + "grad_norm": 0.007449428550899029, + "learning_rate": 2.57043127526783e-07, + "loss": 0.0, + "num_input_tokens_seen": 108963088, + "step": 161670 + }, + { + "epoch": 3.9497471477780763, + "grad_norm": 0.3708611726760864, + "learning_rate": 2.569860505804259e-07, + "loss": 0.0002, + "num_input_tokens_seen": 108966544, + "step": 161675 + }, + { + "epoch": 3.9498692986099235, + "grad_norm": 0.01345105655491352, + "learning_rate": 2.5692897903746635e-07, + "loss": 0.0003, + "num_input_tokens_seen": 108969552, + "step": 161680 + }, + { + "epoch": 3.9499914494417707, + "grad_norm": 0.4047967791557312, + "learning_rate": 2.568719128983189e-07, + "loss": 0.0001, + "num_input_tokens_seen": 108973008, + "step": 161685 + }, + { + "epoch": 3.950113600273618, + "grad_norm": 0.002885033143684268, + "learning_rate": 2.5681485216339907e-07, + "loss": 0.0, + "num_input_tokens_seen": 108976208, + "step": 161690 + }, + { + "epoch": 3.950235751105465, + "grad_norm": 0.0001398777967551723, + "learning_rate": 2.5675779683312115e-07, + "loss": 0.0001, + "num_input_tokens_seen": 108979472, + "step": 161695 + }, + { + "epoch": 3.9503579019373123, + "grad_norm": 0.0028743662405759096, + "learning_rate": 2.5670074690790065e-07, + "loss": 0.0, + "num_input_tokens_seen": 108982800, + "step": 161700 + }, + { + "epoch": 3.9504800527691595, + "grad_norm": 0.0034815717954188585, + "learning_rate": 2.5664370238815214e-07, + "loss": 0.0, + "num_input_tokens_seen": 108986192, + "step": 161705 + }, + { + "epoch": 3.9506022036010067, + "grad_norm": 0.00020252345711924136, + "learning_rate": 2.565866632742908e-07, + "loss": 0.0, + "num_input_tokens_seen": 108989072, + "step": 161710 + }, + { + "epoch": 3.950724354432854, + "grad_norm": 0.002388434950262308, + "learning_rate": 2.5652962956673086e-07, + "loss": 0.0, + "num_input_tokens_seen": 108992656, + "step": 161715 + }, + { + "epoch": 3.9508465052647006, + "grad_norm": 0.15899832546710968, + "learning_rate": 2.5647260126588775e-07, + "loss": 0.0001, + "num_input_tokens_seen": 108995536, + "step": 161720 + }, + { + "epoch": 3.9509686560965482, + "grad_norm": 0.0023573962971568108, + "learning_rate": 2.5641557837217586e-07, + "loss": 0.0, + "num_input_tokens_seen": 108999312, + "step": 161725 + }, + { + "epoch": 3.951090806928395, + "grad_norm": 0.00032487220596522093, + "learning_rate": 2.563585608860096e-07, + "loss": 0.1403, + "num_input_tokens_seen": 109002576, + "step": 161730 + }, + { + "epoch": 3.9512129577602426, + "grad_norm": 32.855960845947266, + "learning_rate": 2.563015488078039e-07, + "loss": 0.0348, + "num_input_tokens_seen": 109005776, + "step": 161735 + }, + { + "epoch": 3.9513351085920894, + "grad_norm": 0.05380634963512421, + "learning_rate": 2.5624454213797366e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109008912, + "step": 161740 + }, + { + "epoch": 3.9514572594239366, + "grad_norm": 0.0002731550484895706, + "learning_rate": 2.5618754087693283e-07, + "loss": 0.0, + "num_input_tokens_seen": 109012240, + "step": 161745 + }, + { + "epoch": 3.9515794102557837, + "grad_norm": 0.08828431367874146, + "learning_rate": 2.5613054502509655e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109015504, + "step": 161750 + }, + { + "epoch": 3.951701561087631, + "grad_norm": 0.04316204786300659, + "learning_rate": 2.560735545828787e-07, + "loss": 0.0, + "num_input_tokens_seen": 109018704, + "step": 161755 + }, + { + "epoch": 3.951823711919478, + "grad_norm": 0.0059670270420610905, + "learning_rate": 2.560165695506945e-07, + "loss": 0.0002, + "num_input_tokens_seen": 109022352, + "step": 161760 + }, + { + "epoch": 3.9519458627513253, + "grad_norm": 0.0006971214897930622, + "learning_rate": 2.559595899289575e-07, + "loss": 0.0005, + "num_input_tokens_seen": 109026192, + "step": 161765 + }, + { + "epoch": 3.9520680135831725, + "grad_norm": 0.00021715546608902514, + "learning_rate": 2.5590261571808247e-07, + "loss": 0.0, + "num_input_tokens_seen": 109029520, + "step": 161770 + }, + { + "epoch": 3.9521901644150197, + "grad_norm": 38.267738342285156, + "learning_rate": 2.558456469184841e-07, + "loss": 0.0542, + "num_input_tokens_seen": 109033168, + "step": 161775 + }, + { + "epoch": 3.952312315246867, + "grad_norm": 0.002327230293303728, + "learning_rate": 2.55788683530576e-07, + "loss": 0.0, + "num_input_tokens_seen": 109037200, + "step": 161780 + }, + { + "epoch": 3.952434466078714, + "grad_norm": 0.00969710759818554, + "learning_rate": 2.5573172555477316e-07, + "loss": 0.0392, + "num_input_tokens_seen": 109040400, + "step": 161785 + }, + { + "epoch": 3.9525566169105613, + "grad_norm": 0.023102683946490288, + "learning_rate": 2.55674772991489e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109043664, + "step": 161790 + }, + { + "epoch": 3.9526787677424084, + "grad_norm": 0.0002032756310654804, + "learning_rate": 2.5561782584113845e-07, + "loss": 0.0, + "num_input_tokens_seen": 109046928, + "step": 161795 + }, + { + "epoch": 3.9528009185742556, + "grad_norm": 0.40158918499946594, + "learning_rate": 2.55560884104135e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109049744, + "step": 161800 + }, + { + "epoch": 3.9529230694061024, + "grad_norm": 0.0035159934777766466, + "learning_rate": 2.555039477808929e-07, + "loss": 0.0, + "num_input_tokens_seen": 109053264, + "step": 161805 + }, + { + "epoch": 3.95304522023795, + "grad_norm": 0.0007293214439414442, + "learning_rate": 2.5544701687182677e-07, + "loss": 0.0, + "num_input_tokens_seen": 109056272, + "step": 161810 + }, + { + "epoch": 3.9531673710697968, + "grad_norm": 0.0006925832713022828, + "learning_rate": 2.5539009137735013e-07, + "loss": 0.0, + "num_input_tokens_seen": 109059536, + "step": 161815 + }, + { + "epoch": 3.9532895219016444, + "grad_norm": 0.48797106742858887, + "learning_rate": 2.553331712978768e-07, + "loss": 0.0003, + "num_input_tokens_seen": 109062800, + "step": 161820 + }, + { + "epoch": 3.953411672733491, + "grad_norm": 0.0024608115199953318, + "learning_rate": 2.552762566338211e-07, + "loss": 0.0, + "num_input_tokens_seen": 109066128, + "step": 161825 + }, + { + "epoch": 3.9535338235653383, + "grad_norm": 0.0036889470648020506, + "learning_rate": 2.552193473855966e-07, + "loss": 0.0, + "num_input_tokens_seen": 109069840, + "step": 161830 + }, + { + "epoch": 3.9536559743971855, + "grad_norm": 0.0005082987481728196, + "learning_rate": 2.551624435536176e-07, + "loss": 0.0, + "num_input_tokens_seen": 109073168, + "step": 161835 + }, + { + "epoch": 3.9537781252290327, + "grad_norm": 65.21703338623047, + "learning_rate": 2.551055451382973e-07, + "loss": 0.0184, + "num_input_tokens_seen": 109076240, + "step": 161840 + }, + { + "epoch": 3.95390027606088, + "grad_norm": 0.0011391066946089268, + "learning_rate": 2.550486521400501e-07, + "loss": 0.0, + "num_input_tokens_seen": 109080016, + "step": 161845 + }, + { + "epoch": 3.954022426892727, + "grad_norm": 0.009924840182065964, + "learning_rate": 2.5499176455928927e-07, + "loss": 0.0005, + "num_input_tokens_seen": 109083536, + "step": 161850 + }, + { + "epoch": 3.9541445777245743, + "grad_norm": 0.0007699128473177552, + "learning_rate": 2.5493488239642904e-07, + "loss": 0.0354, + "num_input_tokens_seen": 109087184, + "step": 161855 + }, + { + "epoch": 3.9542667285564215, + "grad_norm": 0.040427979081869125, + "learning_rate": 2.5487800565188236e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109090640, + "step": 161860 + }, + { + "epoch": 3.9543888793882687, + "grad_norm": 0.001991491997614503, + "learning_rate": 2.548211343260632e-07, + "loss": 0.0554, + "num_input_tokens_seen": 109093776, + "step": 161865 + }, + { + "epoch": 3.954511030220116, + "grad_norm": 0.001988684758543968, + "learning_rate": 2.5476426841938545e-07, + "loss": 0.0, + "num_input_tokens_seen": 109096976, + "step": 161870 + }, + { + "epoch": 3.954633181051963, + "grad_norm": 0.012328723445534706, + "learning_rate": 2.547074079322622e-07, + "loss": 0.0, + "num_input_tokens_seen": 109100560, + "step": 161875 + }, + { + "epoch": 3.9547553318838102, + "grad_norm": 0.00014424313849303871, + "learning_rate": 2.5465055286510737e-07, + "loss": 0.0, + "num_input_tokens_seen": 109103888, + "step": 161880 + }, + { + "epoch": 3.9548774827156574, + "grad_norm": 0.030226293951272964, + "learning_rate": 2.5459370321833396e-07, + "loss": 0.0, + "num_input_tokens_seen": 109107344, + "step": 161885 + }, + { + "epoch": 3.9549996335475046, + "grad_norm": 0.009276174008846283, + "learning_rate": 2.545368589923559e-07, + "loss": 0.0, + "num_input_tokens_seen": 109110544, + "step": 161890 + }, + { + "epoch": 3.955121784379352, + "grad_norm": 0.00403105653822422, + "learning_rate": 2.54480020187586e-07, + "loss": 0.0, + "num_input_tokens_seen": 109113680, + "step": 161895 + }, + { + "epoch": 3.9552439352111985, + "grad_norm": 0.0010097883641719818, + "learning_rate": 2.54423186804438e-07, + "loss": 0.0, + "num_input_tokens_seen": 109117008, + "step": 161900 + }, + { + "epoch": 3.955366086043046, + "grad_norm": 0.0010946433758363128, + "learning_rate": 2.5436635884332526e-07, + "loss": 0.0, + "num_input_tokens_seen": 109120336, + "step": 161905 + }, + { + "epoch": 3.955488236874893, + "grad_norm": 0.012915721163153648, + "learning_rate": 2.5430953630466067e-07, + "loss": 0.0, + "num_input_tokens_seen": 109123792, + "step": 161910 + }, + { + "epoch": 3.95561038770674, + "grad_norm": 0.003288612002506852, + "learning_rate": 2.54252719188858e-07, + "loss": 0.0, + "num_input_tokens_seen": 109127568, + "step": 161915 + }, + { + "epoch": 3.9557325385385873, + "grad_norm": 0.00014995834499131888, + "learning_rate": 2.5419590749633014e-07, + "loss": 0.0527, + "num_input_tokens_seen": 109130960, + "step": 161920 + }, + { + "epoch": 3.9558546893704345, + "grad_norm": 0.01613342948257923, + "learning_rate": 2.5413910122748996e-07, + "loss": 0.0, + "num_input_tokens_seen": 109134288, + "step": 161925 + }, + { + "epoch": 3.9559768402022817, + "grad_norm": 0.001566616352647543, + "learning_rate": 2.5408230038275115e-07, + "loss": 0.0, + "num_input_tokens_seen": 109137552, + "step": 161930 + }, + { + "epoch": 3.956098991034129, + "grad_norm": 0.0040521277114748955, + "learning_rate": 2.5402550496252616e-07, + "loss": 0.0, + "num_input_tokens_seen": 109141200, + "step": 161935 + }, + { + "epoch": 3.956221141865976, + "grad_norm": 0.007688730955123901, + "learning_rate": 2.539687149672287e-07, + "loss": 0.0843, + "num_input_tokens_seen": 109144272, + "step": 161940 + }, + { + "epoch": 3.9563432926978233, + "grad_norm": 0.005946619436144829, + "learning_rate": 2.53911930397271e-07, + "loss": 0.0, + "num_input_tokens_seen": 109147664, + "step": 161945 + }, + { + "epoch": 3.9564654435296704, + "grad_norm": 0.0005307059618644416, + "learning_rate": 2.538551512530668e-07, + "loss": 0.0, + "num_input_tokens_seen": 109151056, + "step": 161950 + }, + { + "epoch": 3.9565875943615176, + "grad_norm": 0.001883355900645256, + "learning_rate": 2.537983775350283e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109154576, + "step": 161955 + }, + { + "epoch": 3.956709745193365, + "grad_norm": 0.001277736620977521, + "learning_rate": 2.5374160924356867e-07, + "loss": 0.0002, + "num_input_tokens_seen": 109157968, + "step": 161960 + }, + { + "epoch": 3.956831896025212, + "grad_norm": 6.472727545769885e-05, + "learning_rate": 2.5368484637910117e-07, + "loss": 0.0, + "num_input_tokens_seen": 109161360, + "step": 161965 + }, + { + "epoch": 3.956954046857059, + "grad_norm": 0.006716002244502306, + "learning_rate": 2.536280889420378e-07, + "loss": 0.0, + "num_input_tokens_seen": 109164944, + "step": 161970 + }, + { + "epoch": 3.9570761976889064, + "grad_norm": 0.06781076639890671, + "learning_rate": 2.535713369327921e-07, + "loss": 0.0, + "num_input_tokens_seen": 109168912, + "step": 161975 + }, + { + "epoch": 3.9571983485207536, + "grad_norm": 0.015026175417006016, + "learning_rate": 2.5351459035177604e-07, + "loss": 0.0224, + "num_input_tokens_seen": 109172048, + "step": 161980 + }, + { + "epoch": 3.9573204993526003, + "grad_norm": 0.0010686632012948394, + "learning_rate": 2.534578491994026e-07, + "loss": 0.0, + "num_input_tokens_seen": 109175184, + "step": 161985 + }, + { + "epoch": 3.957442650184448, + "grad_norm": 0.17090198397636414, + "learning_rate": 2.534011134760848e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109178384, + "step": 161990 + }, + { + "epoch": 3.9575648010162947, + "grad_norm": 0.01744556427001953, + "learning_rate": 2.533443831822347e-07, + "loss": 0.0, + "num_input_tokens_seen": 109181648, + "step": 161995 + }, + { + "epoch": 3.9576869518481423, + "grad_norm": 0.0013410678366199136, + "learning_rate": 2.5328765831826537e-07, + "loss": 0.0, + "num_input_tokens_seen": 109185040, + "step": 162000 + }, + { + "epoch": 3.957809102679989, + "grad_norm": 0.002465799218043685, + "learning_rate": 2.532309388845887e-07, + "loss": 0.0, + "num_input_tokens_seen": 109188752, + "step": 162005 + }, + { + "epoch": 3.9579312535118363, + "grad_norm": 0.0010966339614242315, + "learning_rate": 2.531742248816178e-07, + "loss": 0.0, + "num_input_tokens_seen": 109192656, + "step": 162010 + }, + { + "epoch": 3.9580534043436835, + "grad_norm": 0.06205965578556061, + "learning_rate": 2.531175163097645e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109196048, + "step": 162015 + }, + { + "epoch": 3.9581755551755307, + "grad_norm": 0.07128459960222244, + "learning_rate": 2.5306081316944185e-07, + "loss": 0.0, + "num_input_tokens_seen": 109199120, + "step": 162020 + }, + { + "epoch": 3.958297706007378, + "grad_norm": 0.0008230642415583134, + "learning_rate": 2.530041154610615e-07, + "loss": 0.0, + "num_input_tokens_seen": 109202448, + "step": 162025 + }, + { + "epoch": 3.958419856839225, + "grad_norm": 0.0010746166808530688, + "learning_rate": 2.529474231850365e-07, + "loss": 0.0, + "num_input_tokens_seen": 109205456, + "step": 162030 + }, + { + "epoch": 3.9585420076710722, + "grad_norm": 0.0005239786696620286, + "learning_rate": 2.528907363417787e-07, + "loss": 0.0237, + "num_input_tokens_seen": 109208208, + "step": 162035 + }, + { + "epoch": 3.9586641585029194, + "grad_norm": 0.0008065864094533026, + "learning_rate": 2.528340549317002e-07, + "loss": 0.0, + "num_input_tokens_seen": 109211792, + "step": 162040 + }, + { + "epoch": 3.9587863093347666, + "grad_norm": 0.0016470836708322167, + "learning_rate": 2.5277737895521365e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109214992, + "step": 162045 + }, + { + "epoch": 3.958908460166614, + "grad_norm": 8.174134563887492e-05, + "learning_rate": 2.5272070841273076e-07, + "loss": 0.0, + "num_input_tokens_seen": 109218192, + "step": 162050 + }, + { + "epoch": 3.959030610998461, + "grad_norm": 0.004978245124220848, + "learning_rate": 2.526640433046638e-07, + "loss": 0.0, + "num_input_tokens_seen": 109221328, + "step": 162055 + }, + { + "epoch": 3.959152761830308, + "grad_norm": 0.0015048523200675845, + "learning_rate": 2.526073836314252e-07, + "loss": 0.0, + "num_input_tokens_seen": 109224656, + "step": 162060 + }, + { + "epoch": 3.9592749126621554, + "grad_norm": 0.009316632524132729, + "learning_rate": 2.525507293934265e-07, + "loss": 0.0, + "num_input_tokens_seen": 109227920, + "step": 162065 + }, + { + "epoch": 3.9593970634940026, + "grad_norm": 0.0022315524984151125, + "learning_rate": 2.524940805910802e-07, + "loss": 0.0, + "num_input_tokens_seen": 109231632, + "step": 162070 + }, + { + "epoch": 3.9595192143258497, + "grad_norm": 0.0013435684377327561, + "learning_rate": 2.524374372247977e-07, + "loss": 0.0, + "num_input_tokens_seen": 109234832, + "step": 162075 + }, + { + "epoch": 3.9596413651576965, + "grad_norm": 0.0003065931668970734, + "learning_rate": 2.523807992949912e-07, + "loss": 0.0576, + "num_input_tokens_seen": 109237968, + "step": 162080 + }, + { + "epoch": 3.959763515989544, + "grad_norm": 0.06455646455287933, + "learning_rate": 2.52324166802073e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109241232, + "step": 162085 + }, + { + "epoch": 3.959885666821391, + "grad_norm": 0.0016405819915235043, + "learning_rate": 2.5226753974645423e-07, + "loss": 0.0824, + "num_input_tokens_seen": 109244496, + "step": 162090 + }, + { + "epoch": 3.960007817653238, + "grad_norm": 0.002664495026692748, + "learning_rate": 2.522109181285473e-07, + "loss": 0.0, + "num_input_tokens_seen": 109247696, + "step": 162095 + }, + { + "epoch": 3.9601299684850853, + "grad_norm": 0.0009869072819128633, + "learning_rate": 2.5215430194876343e-07, + "loss": 0.0, + "num_input_tokens_seen": 109251152, + "step": 162100 + }, + { + "epoch": 3.9602521193169324, + "grad_norm": 0.0019497988978400826, + "learning_rate": 2.520976912075149e-07, + "loss": 0.0, + "num_input_tokens_seen": 109254608, + "step": 162105 + }, + { + "epoch": 3.9603742701487796, + "grad_norm": 0.24836663901805878, + "learning_rate": 2.5204108590521277e-07, + "loss": 0.0003, + "num_input_tokens_seen": 109258000, + "step": 162110 + }, + { + "epoch": 3.960496420980627, + "grad_norm": 0.0007280535064637661, + "learning_rate": 2.519844860422692e-07, + "loss": 0.0513, + "num_input_tokens_seen": 109261264, + "step": 162115 + }, + { + "epoch": 3.960618571812474, + "grad_norm": 0.001604035496711731, + "learning_rate": 2.519278916190958e-07, + "loss": 0.0, + "num_input_tokens_seen": 109264848, + "step": 162120 + }, + { + "epoch": 3.960740722644321, + "grad_norm": 0.0037682605907320976, + "learning_rate": 2.51871302636104e-07, + "loss": 0.0, + "num_input_tokens_seen": 109268048, + "step": 162125 + }, + { + "epoch": 3.9608628734761684, + "grad_norm": 0.0007140697562135756, + "learning_rate": 2.51814719093705e-07, + "loss": 0.0, + "num_input_tokens_seen": 109271504, + "step": 162130 + }, + { + "epoch": 3.9609850243080156, + "grad_norm": 0.006422894541174173, + "learning_rate": 2.5175814099231096e-07, + "loss": 0.0, + "num_input_tokens_seen": 109274640, + "step": 162135 + }, + { + "epoch": 3.9611071751398628, + "grad_norm": 0.061705734580755234, + "learning_rate": 2.5170156833233256e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109278096, + "step": 162140 + }, + { + "epoch": 3.96122932597171, + "grad_norm": 0.0003613313310779631, + "learning_rate": 2.516450011141821e-07, + "loss": 0.0019, + "num_input_tokens_seen": 109282000, + "step": 162145 + }, + { + "epoch": 3.961351476803557, + "grad_norm": 0.0009361098054796457, + "learning_rate": 2.5158843933827e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109285648, + "step": 162150 + }, + { + "epoch": 3.9614736276354043, + "grad_norm": 0.18613314628601074, + "learning_rate": 2.515318830050085e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109289040, + "step": 162155 + }, + { + "epoch": 3.9615957784672515, + "grad_norm": 0.005279130302369595, + "learning_rate": 2.514753321148081e-07, + "loss": 0.0, + "num_input_tokens_seen": 109292432, + "step": 162160 + }, + { + "epoch": 3.9617179292990983, + "grad_norm": 0.0038733906112611294, + "learning_rate": 2.514187866680807e-07, + "loss": 0.0, + "num_input_tokens_seen": 109295824, + "step": 162165 + }, + { + "epoch": 3.961840080130946, + "grad_norm": 0.0029186783358454704, + "learning_rate": 2.5136224666523696e-07, + "loss": 0.0, + "num_input_tokens_seen": 109298832, + "step": 162170 + }, + { + "epoch": 3.9619622309627927, + "grad_norm": 0.0011153332889080048, + "learning_rate": 2.5130571210668825e-07, + "loss": 0.0571, + "num_input_tokens_seen": 109301968, + "step": 162175 + }, + { + "epoch": 3.9620843817946403, + "grad_norm": 0.0011105951853096485, + "learning_rate": 2.5124918299284615e-07, + "loss": 0.0, + "num_input_tokens_seen": 109305360, + "step": 162180 + }, + { + "epoch": 3.962206532626487, + "grad_norm": 0.0008032761397771537, + "learning_rate": 2.5119265932412105e-07, + "loss": 0.0, + "num_input_tokens_seen": 109308624, + "step": 162185 + }, + { + "epoch": 3.962328683458334, + "grad_norm": 0.0009347455343231559, + "learning_rate": 2.511361411009246e-07, + "loss": 0.0, + "num_input_tokens_seen": 109311824, + "step": 162190 + }, + { + "epoch": 3.9624508342901814, + "grad_norm": 0.0006021481240168214, + "learning_rate": 2.5107962832366735e-07, + "loss": 0.0, + "num_input_tokens_seen": 109315408, + "step": 162195 + }, + { + "epoch": 3.9625729851220286, + "grad_norm": 0.00013758940622210503, + "learning_rate": 2.510231209927608e-07, + "loss": 0.0, + "num_input_tokens_seen": 109318864, + "step": 162200 + }, + { + "epoch": 3.962695135953876, + "grad_norm": 5.944971561431885, + "learning_rate": 2.509666191086152e-07, + "loss": 0.0013, + "num_input_tokens_seen": 109322000, + "step": 162205 + }, + { + "epoch": 3.962817286785723, + "grad_norm": 0.09964948892593384, + "learning_rate": 2.509101226716418e-07, + "loss": 0.0598, + "num_input_tokens_seen": 109325200, + "step": 162210 + }, + { + "epoch": 3.96293943761757, + "grad_norm": 0.000213745137443766, + "learning_rate": 2.5085363168225173e-07, + "loss": 0.0, + "num_input_tokens_seen": 109329168, + "step": 162215 + }, + { + "epoch": 3.9630615884494174, + "grad_norm": 0.001257982337847352, + "learning_rate": 2.5079714614085535e-07, + "loss": 0.0583, + "num_input_tokens_seen": 109332880, + "step": 162220 + }, + { + "epoch": 3.9631837392812646, + "grad_norm": 0.0010682143038138747, + "learning_rate": 2.5074066604786383e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109335952, + "step": 162225 + }, + { + "epoch": 3.9633058901131117, + "grad_norm": 0.0014836126938462257, + "learning_rate": 2.506841914036878e-07, + "loss": 0.0, + "num_input_tokens_seen": 109338960, + "step": 162230 + }, + { + "epoch": 3.963428040944959, + "grad_norm": 0.004185094032436609, + "learning_rate": 2.506277222087375e-07, + "loss": 0.1063, + "num_input_tokens_seen": 109342288, + "step": 162235 + }, + { + "epoch": 3.963550191776806, + "grad_norm": 0.0005608157953247428, + "learning_rate": 2.505712584634243e-07, + "loss": 0.0, + "num_input_tokens_seen": 109345680, + "step": 162240 + }, + { + "epoch": 3.9636723426086533, + "grad_norm": 0.009780601598322392, + "learning_rate": 2.505148001681582e-07, + "loss": 0.0, + "num_input_tokens_seen": 109349648, + "step": 162245 + }, + { + "epoch": 3.9637944934405, + "grad_norm": 0.00024733677855692804, + "learning_rate": 2.5045834732335024e-07, + "loss": 0.0, + "num_input_tokens_seen": 109353360, + "step": 162250 + }, + { + "epoch": 3.9639166442723477, + "grad_norm": 0.1472015380859375, + "learning_rate": 2.5040189992941063e-07, + "loss": 0.0456, + "num_input_tokens_seen": 109356368, + "step": 162255 + }, + { + "epoch": 3.9640387951041944, + "grad_norm": 0.11164890229701996, + "learning_rate": 2.5034545798675024e-07, + "loss": 0.0, + "num_input_tokens_seen": 109359504, + "step": 162260 + }, + { + "epoch": 3.964160945936042, + "grad_norm": 0.004264294635504484, + "learning_rate": 2.50289021495779e-07, + "loss": 0.0, + "num_input_tokens_seen": 109362960, + "step": 162265 + }, + { + "epoch": 3.964283096767889, + "grad_norm": 0.005529699381440878, + "learning_rate": 2.502325904569077e-07, + "loss": 0.0, + "num_input_tokens_seen": 109366160, + "step": 162270 + }, + { + "epoch": 3.964405247599736, + "grad_norm": 0.09107805043458939, + "learning_rate": 2.5017616487054694e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109369488, + "step": 162275 + }, + { + "epoch": 3.964527398431583, + "grad_norm": 0.000815638224594295, + "learning_rate": 2.501197447371065e-07, + "loss": 0.0, + "num_input_tokens_seen": 109372624, + "step": 162280 + }, + { + "epoch": 3.9646495492634304, + "grad_norm": 0.0033097947016358376, + "learning_rate": 2.5006333005699734e-07, + "loss": 0.0, + "num_input_tokens_seen": 109375824, + "step": 162285 + }, + { + "epoch": 3.9647717000952776, + "grad_norm": 0.01763514243066311, + "learning_rate": 2.5000692083062893e-07, + "loss": 0.0849, + "num_input_tokens_seen": 109379280, + "step": 162290 + }, + { + "epoch": 3.9648938509271248, + "grad_norm": 0.00030953448731452227, + "learning_rate": 2.49950517058412e-07, + "loss": 0.0, + "num_input_tokens_seen": 109382608, + "step": 162295 + }, + { + "epoch": 3.965016001758972, + "grad_norm": 0.002793027088046074, + "learning_rate": 2.498941187407568e-07, + "loss": 0.0, + "num_input_tokens_seen": 109386128, + "step": 162300 + }, + { + "epoch": 3.965138152590819, + "grad_norm": 0.006183539051562548, + "learning_rate": 2.498377258780732e-07, + "loss": 0.0407, + "num_input_tokens_seen": 109389200, + "step": 162305 + }, + { + "epoch": 3.9652603034226663, + "grad_norm": 0.021969813853502274, + "learning_rate": 2.4978133847077163e-07, + "loss": 0.0353, + "num_input_tokens_seen": 109392272, + "step": 162310 + }, + { + "epoch": 3.9653824542545135, + "grad_norm": 0.306232213973999, + "learning_rate": 2.497249565192617e-07, + "loss": 0.0002, + "num_input_tokens_seen": 109395792, + "step": 162315 + }, + { + "epoch": 3.9655046050863607, + "grad_norm": 0.004216625355184078, + "learning_rate": 2.4966858002395396e-07, + "loss": 0.0, + "num_input_tokens_seen": 109398672, + "step": 162320 + }, + { + "epoch": 3.965626755918208, + "grad_norm": 0.0067145368084311485, + "learning_rate": 2.496122089852578e-07, + "loss": 0.0, + "num_input_tokens_seen": 109402000, + "step": 162325 + }, + { + "epoch": 3.965748906750055, + "grad_norm": 0.028485514223575592, + "learning_rate": 2.495558434035838e-07, + "loss": 0.0, + "num_input_tokens_seen": 109405584, + "step": 162330 + }, + { + "epoch": 3.9658710575819023, + "grad_norm": 0.0013114007888361812, + "learning_rate": 2.4949948327934134e-07, + "loss": 0.0, + "num_input_tokens_seen": 109408592, + "step": 162335 + }, + { + "epoch": 3.9659932084137495, + "grad_norm": 0.0038664492312818766, + "learning_rate": 2.494431286129407e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109411984, + "step": 162340 + }, + { + "epoch": 3.966115359245596, + "grad_norm": 16.34613800048828, + "learning_rate": 2.493867794047916e-07, + "loss": 0.075, + "num_input_tokens_seen": 109415248, + "step": 162345 + }, + { + "epoch": 3.966237510077444, + "grad_norm": 0.0032380574848502874, + "learning_rate": 2.493304356553033e-07, + "loss": 0.0, + "num_input_tokens_seen": 109419280, + "step": 162350 + }, + { + "epoch": 3.9663596609092906, + "grad_norm": 0.002148458966985345, + "learning_rate": 2.492740973648864e-07, + "loss": 0.0, + "num_input_tokens_seen": 109422480, + "step": 162355 + }, + { + "epoch": 3.9664818117411382, + "grad_norm": 0.005396352615207434, + "learning_rate": 2.492177645339497e-07, + "loss": 0.0, + "num_input_tokens_seen": 109425808, + "step": 162360 + }, + { + "epoch": 3.966603962572985, + "grad_norm": 0.0016786479391157627, + "learning_rate": 2.491614371629035e-07, + "loss": 0.0002, + "num_input_tokens_seen": 109428816, + "step": 162365 + }, + { + "epoch": 3.966726113404832, + "grad_norm": 0.0009584561921656132, + "learning_rate": 2.491051152521576e-07, + "loss": 0.0725, + "num_input_tokens_seen": 109431952, + "step": 162370 + }, + { + "epoch": 3.9668482642366794, + "grad_norm": 0.06761808693408966, + "learning_rate": 2.4904879880212094e-07, + "loss": 0.0, + "num_input_tokens_seen": 109435216, + "step": 162375 + }, + { + "epoch": 3.9669704150685265, + "grad_norm": 0.0004407799569889903, + "learning_rate": 2.489924878132036e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109438544, + "step": 162380 + }, + { + "epoch": 3.9670925659003737, + "grad_norm": 0.013496466912329197, + "learning_rate": 2.489361822858147e-07, + "loss": 0.0006, + "num_input_tokens_seen": 109442192, + "step": 162385 + }, + { + "epoch": 3.967214716732221, + "grad_norm": 0.01726139336824417, + "learning_rate": 2.488798822203638e-07, + "loss": 0.0008, + "num_input_tokens_seen": 109445456, + "step": 162390 + }, + { + "epoch": 3.967336867564068, + "grad_norm": 0.03320024162530899, + "learning_rate": 2.488235876172609e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109448656, + "step": 162395 + }, + { + "epoch": 3.9674590183959153, + "grad_norm": 0.0007737289415672421, + "learning_rate": 2.4876729847691445e-07, + "loss": 0.0, + "num_input_tokens_seen": 109452176, + "step": 162400 + }, + { + "epoch": 3.9675811692277625, + "grad_norm": 0.011182458139955997, + "learning_rate": 2.4871101479973456e-07, + "loss": 0.0, + "num_input_tokens_seen": 109455312, + "step": 162405 + }, + { + "epoch": 3.9677033200596097, + "grad_norm": 0.006535296328365803, + "learning_rate": 2.4865473658613e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109458832, + "step": 162410 + }, + { + "epoch": 3.967825470891457, + "grad_norm": 0.011599404737353325, + "learning_rate": 2.485984638365106e-07, + "loss": 0.0, + "num_input_tokens_seen": 109462096, + "step": 162415 + }, + { + "epoch": 3.967947621723304, + "grad_norm": 0.02636815793812275, + "learning_rate": 2.4854219655128493e-07, + "loss": 0.0, + "num_input_tokens_seen": 109465808, + "step": 162420 + }, + { + "epoch": 3.9680697725551513, + "grad_norm": 54.23196029663086, + "learning_rate": 2.4848593473086253e-07, + "loss": 0.0444, + "num_input_tokens_seen": 109469456, + "step": 162425 + }, + { + "epoch": 3.968191923386998, + "grad_norm": 0.002171638421714306, + "learning_rate": 2.4842967837565287e-07, + "loss": 0.0, + "num_input_tokens_seen": 109473488, + "step": 162430 + }, + { + "epoch": 3.9683140742188456, + "grad_norm": 0.011168444529175758, + "learning_rate": 2.483734274860647e-07, + "loss": 0.0, + "num_input_tokens_seen": 109476752, + "step": 162435 + }, + { + "epoch": 3.9684362250506924, + "grad_norm": 0.0005473028868436813, + "learning_rate": 2.4831718206250694e-07, + "loss": 0.0003, + "num_input_tokens_seen": 109480080, + "step": 162440 + }, + { + "epoch": 3.96855837588254, + "grad_norm": 0.00041105563286691904, + "learning_rate": 2.4826094210538895e-07, + "loss": 0.0002, + "num_input_tokens_seen": 109483344, + "step": 162445 + }, + { + "epoch": 3.9686805267143868, + "grad_norm": 0.00028596349875442684, + "learning_rate": 2.482047076151197e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109486224, + "step": 162450 + }, + { + "epoch": 3.968802677546234, + "grad_norm": 0.12528088688850403, + "learning_rate": 2.4814847859210763e-07, + "loss": 0.0, + "num_input_tokens_seen": 109489232, + "step": 162455 + }, + { + "epoch": 3.968924828378081, + "grad_norm": 0.0037132389843463898, + "learning_rate": 2.480922550367621e-07, + "loss": 0.0, + "num_input_tokens_seen": 109492368, + "step": 162460 + }, + { + "epoch": 3.9690469792099283, + "grad_norm": 0.005314487498253584, + "learning_rate": 2.480360369494923e-07, + "loss": 0.0, + "num_input_tokens_seen": 109495440, + "step": 162465 + }, + { + "epoch": 3.9691691300417755, + "grad_norm": 0.02827450819313526, + "learning_rate": 2.479798243307063e-07, + "loss": 0.0, + "num_input_tokens_seen": 109498640, + "step": 162470 + }, + { + "epoch": 3.9692912808736227, + "grad_norm": 0.023080473765730858, + "learning_rate": 2.479236171808137e-07, + "loss": 0.0, + "num_input_tokens_seen": 109502288, + "step": 162475 + }, + { + "epoch": 3.96941343170547, + "grad_norm": 0.019256386905908585, + "learning_rate": 2.478674155002224e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109506128, + "step": 162480 + }, + { + "epoch": 3.969535582537317, + "grad_norm": 0.0003879503929056227, + "learning_rate": 2.4781121928934155e-07, + "loss": 0.0, + "num_input_tokens_seen": 109509968, + "step": 162485 + }, + { + "epoch": 3.9696577333691643, + "grad_norm": 0.002552243648096919, + "learning_rate": 2.477550285485802e-07, + "loss": 0.1131, + "num_input_tokens_seen": 109513488, + "step": 162490 + }, + { + "epoch": 3.9697798842010115, + "grad_norm": 0.09655870497226715, + "learning_rate": 2.476988432783463e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109517200, + "step": 162495 + }, + { + "epoch": 3.9699020350328587, + "grad_norm": 0.021147755905985832, + "learning_rate": 2.4764266347904905e-07, + "loss": 0.0, + "num_input_tokens_seen": 109520400, + "step": 162500 + }, + { + "epoch": 3.970024185864706, + "grad_norm": 0.0016340258298441768, + "learning_rate": 2.4758648915109636e-07, + "loss": 0.0393, + "num_input_tokens_seen": 109524048, + "step": 162505 + }, + { + "epoch": 3.970146336696553, + "grad_norm": 0.009415844455361366, + "learning_rate": 2.4753032029489753e-07, + "loss": 0.0, + "num_input_tokens_seen": 109527376, + "step": 162510 + }, + { + "epoch": 3.9702684875284002, + "grad_norm": 0.00024968807701952755, + "learning_rate": 2.4747415691086013e-07, + "loss": 0.0, + "num_input_tokens_seen": 109530768, + "step": 162515 + }, + { + "epoch": 3.9703906383602474, + "grad_norm": 0.009475484490394592, + "learning_rate": 2.474179989993932e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109534032, + "step": 162520 + }, + { + "epoch": 3.970512789192094, + "grad_norm": 0.011886761523783207, + "learning_rate": 2.473618465609053e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109537296, + "step": 162525 + }, + { + "epoch": 3.970634940023942, + "grad_norm": 0.011090297251939774, + "learning_rate": 2.4730569959580416e-07, + "loss": 0.0, + "num_input_tokens_seen": 109540304, + "step": 162530 + }, + { + "epoch": 3.9707570908557885, + "grad_norm": 0.00524521991610527, + "learning_rate": 2.4724955810449865e-07, + "loss": 0.0, + "num_input_tokens_seen": 109543440, + "step": 162535 + }, + { + "epoch": 3.9708792416876357, + "grad_norm": 0.05811067670583725, + "learning_rate": 2.471934220873969e-07, + "loss": 0.0, + "num_input_tokens_seen": 109547152, + "step": 162540 + }, + { + "epoch": 3.971001392519483, + "grad_norm": 0.0004150404129177332, + "learning_rate": 2.471372915449067e-07, + "loss": 0.0, + "num_input_tokens_seen": 109550160, + "step": 162545 + }, + { + "epoch": 3.97112354335133, + "grad_norm": 0.1513708084821701, + "learning_rate": 2.4708116647743696e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109553296, + "step": 162550 + }, + { + "epoch": 3.9712456941831773, + "grad_norm": 0.003029992338269949, + "learning_rate": 2.4702504688539516e-07, + "loss": 0.0893, + "num_input_tokens_seen": 109556688, + "step": 162555 + }, + { + "epoch": 3.9713678450150245, + "grad_norm": 0.11696822941303253, + "learning_rate": 2.469689327691901e-07, + "loss": 0.0817, + "num_input_tokens_seen": 109560336, + "step": 162560 + }, + { + "epoch": 3.9714899958468717, + "grad_norm": 0.016360236331820488, + "learning_rate": 2.4691282412922923e-07, + "loss": 0.0, + "num_input_tokens_seen": 109563792, + "step": 162565 + }, + { + "epoch": 3.971612146678719, + "grad_norm": 0.0006904311594553292, + "learning_rate": 2.4685672096592105e-07, + "loss": 0.0, + "num_input_tokens_seen": 109566992, + "step": 162570 + }, + { + "epoch": 3.971734297510566, + "grad_norm": 0.0017873753095045686, + "learning_rate": 2.468006232796731e-07, + "loss": 0.0, + "num_input_tokens_seen": 109570384, + "step": 162575 + }, + { + "epoch": 3.9718564483424132, + "grad_norm": 0.003968059550970793, + "learning_rate": 2.4674453107089356e-07, + "loss": 0.0, + "num_input_tokens_seen": 109573520, + "step": 162580 + }, + { + "epoch": 3.9719785991742604, + "grad_norm": 1.919407606124878, + "learning_rate": 2.4668844433999083e-07, + "loss": 0.0015, + "num_input_tokens_seen": 109576912, + "step": 162585 + }, + { + "epoch": 3.9721007500061076, + "grad_norm": 0.01713302545249462, + "learning_rate": 2.466323630873719e-07, + "loss": 0.0, + "num_input_tokens_seen": 109580240, + "step": 162590 + }, + { + "epoch": 3.972222900837955, + "grad_norm": 0.013763492926955223, + "learning_rate": 2.465762873134455e-07, + "loss": 0.0, + "num_input_tokens_seen": 109583440, + "step": 162595 + }, + { + "epoch": 3.972345051669802, + "grad_norm": 0.0006270576850511134, + "learning_rate": 2.465202170186186e-07, + "loss": 0.0, + "num_input_tokens_seen": 109587024, + "step": 162600 + }, + { + "epoch": 3.972467202501649, + "grad_norm": 0.12661677598953247, + "learning_rate": 2.4646415220329963e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109590224, + "step": 162605 + }, + { + "epoch": 3.972589353333496, + "grad_norm": 0.003163676941767335, + "learning_rate": 2.4640809286789575e-07, + "loss": 0.0, + "num_input_tokens_seen": 109593232, + "step": 162610 + }, + { + "epoch": 3.9727115041653436, + "grad_norm": 0.0038118986412882805, + "learning_rate": 2.463520390128149e-07, + "loss": 0.0, + "num_input_tokens_seen": 109596944, + "step": 162615 + }, + { + "epoch": 3.9728336549971903, + "grad_norm": 0.0014689149102196097, + "learning_rate": 2.4629599063846494e-07, + "loss": 0.0, + "num_input_tokens_seen": 109600080, + "step": 162620 + }, + { + "epoch": 3.972955805829038, + "grad_norm": 0.00022433280537370592, + "learning_rate": 2.4623994774525313e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109603216, + "step": 162625 + }, + { + "epoch": 3.9730779566608847, + "grad_norm": 0.004913068376481533, + "learning_rate": 2.461839103335873e-07, + "loss": 0.0377, + "num_input_tokens_seen": 109606352, + "step": 162630 + }, + { + "epoch": 3.973200107492732, + "grad_norm": 0.0011740082409232855, + "learning_rate": 2.461278784038747e-07, + "loss": 0.0, + "num_input_tokens_seen": 109609808, + "step": 162635 + }, + { + "epoch": 3.973322258324579, + "grad_norm": 0.002981620142236352, + "learning_rate": 2.4607185195652315e-07, + "loss": 0.0, + "num_input_tokens_seen": 109613200, + "step": 162640 + }, + { + "epoch": 3.9734444091564263, + "grad_norm": 0.016984274610877037, + "learning_rate": 2.460158309919396e-07, + "loss": 0.0, + "num_input_tokens_seen": 109616784, + "step": 162645 + }, + { + "epoch": 3.9735665599882735, + "grad_norm": 0.0023610140196979046, + "learning_rate": 2.4595981551053193e-07, + "loss": 0.0, + "num_input_tokens_seen": 109620240, + "step": 162650 + }, + { + "epoch": 3.9736887108201207, + "grad_norm": 0.07518626004457474, + "learning_rate": 2.459038055127073e-07, + "loss": 0.0002, + "num_input_tokens_seen": 109623440, + "step": 162655 + }, + { + "epoch": 3.973810861651968, + "grad_norm": 0.0030677923932671547, + "learning_rate": 2.458478009988728e-07, + "loss": 0.0, + "num_input_tokens_seen": 109626704, + "step": 162660 + }, + { + "epoch": 3.973933012483815, + "grad_norm": 0.0016324043972417712, + "learning_rate": 2.4579180196943614e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109630416, + "step": 162665 + }, + { + "epoch": 3.974055163315662, + "grad_norm": 0.006567919161170721, + "learning_rate": 2.4573580842480424e-07, + "loss": 0.0, + "num_input_tokens_seen": 109634064, + "step": 162670 + }, + { + "epoch": 3.9741773141475094, + "grad_norm": 0.0009271908784285188, + "learning_rate": 2.456798203653843e-07, + "loss": 0.0, + "num_input_tokens_seen": 109637648, + "step": 162675 + }, + { + "epoch": 3.9742994649793566, + "grad_norm": 74.43541717529297, + "learning_rate": 2.456238377915839e-07, + "loss": 0.0523, + "num_input_tokens_seen": 109641104, + "step": 162680 + }, + { + "epoch": 3.974421615811204, + "grad_norm": 0.0008278028690256178, + "learning_rate": 2.4556786070380954e-07, + "loss": 0.0, + "num_input_tokens_seen": 109644944, + "step": 162685 + }, + { + "epoch": 3.974543766643051, + "grad_norm": 0.0014180800644680858, + "learning_rate": 2.455118891024689e-07, + "loss": 0.0, + "num_input_tokens_seen": 109648080, + "step": 162690 + }, + { + "epoch": 3.974665917474898, + "grad_norm": 0.001689577242359519, + "learning_rate": 2.454559229879685e-07, + "loss": 0.0, + "num_input_tokens_seen": 109651664, + "step": 162695 + }, + { + "epoch": 3.9747880683067454, + "grad_norm": 0.015290713869035244, + "learning_rate": 2.453999623607155e-07, + "loss": 0.0, + "num_input_tokens_seen": 109655120, + "step": 162700 + }, + { + "epoch": 3.974910219138592, + "grad_norm": 0.3379508852958679, + "learning_rate": 2.4534400722111724e-07, + "loss": 0.0, + "num_input_tokens_seen": 109658128, + "step": 162705 + }, + { + "epoch": 3.9750323699704397, + "grad_norm": 0.02128562517464161, + "learning_rate": 2.4528805756958004e-07, + "loss": 0.0, + "num_input_tokens_seen": 109661776, + "step": 162710 + }, + { + "epoch": 3.9751545208022865, + "grad_norm": 0.022409426048398018, + "learning_rate": 2.452321134065114e-07, + "loss": 0.0, + "num_input_tokens_seen": 109664976, + "step": 162715 + }, + { + "epoch": 3.9752766716341337, + "grad_norm": 0.0011433304753154516, + "learning_rate": 2.4517617473231755e-07, + "loss": 0.0, + "num_input_tokens_seen": 109668304, + "step": 162720 + }, + { + "epoch": 3.975398822465981, + "grad_norm": 0.0014628785429522395, + "learning_rate": 2.4512024154740594e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109671120, + "step": 162725 + }, + { + "epoch": 3.975520973297828, + "grad_norm": 0.0012373403878882527, + "learning_rate": 2.450643138521826e-07, + "loss": 0.0, + "num_input_tokens_seen": 109674320, + "step": 162730 + }, + { + "epoch": 3.9756431241296752, + "grad_norm": 0.010684294626116753, + "learning_rate": 2.4500839164705464e-07, + "loss": 0.0004, + "num_input_tokens_seen": 109677776, + "step": 162735 + }, + { + "epoch": 3.9757652749615224, + "grad_norm": 0.005231752991676331, + "learning_rate": 2.4495247493242896e-07, + "loss": 0.0, + "num_input_tokens_seen": 109680976, + "step": 162740 + }, + { + "epoch": 3.9758874257933696, + "grad_norm": 0.004223552066832781, + "learning_rate": 2.4489656370871205e-07, + "loss": 0.06, + "num_input_tokens_seen": 109683920, + "step": 162745 + }, + { + "epoch": 3.976009576625217, + "grad_norm": 0.004554894287139177, + "learning_rate": 2.4484065797631015e-07, + "loss": 0.0, + "num_input_tokens_seen": 109687312, + "step": 162750 + }, + { + "epoch": 3.976131727457064, + "grad_norm": 0.04216483607888222, + "learning_rate": 2.447847577356303e-07, + "loss": 0.0, + "num_input_tokens_seen": 109690576, + "step": 162755 + }, + { + "epoch": 3.976253878288911, + "grad_norm": 0.1104845255613327, + "learning_rate": 2.4472886298707883e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109693776, + "step": 162760 + }, + { + "epoch": 3.9763760291207584, + "grad_norm": 0.000709182524587959, + "learning_rate": 2.44672973731062e-07, + "loss": 0.0, + "num_input_tokens_seen": 109696912, + "step": 162765 + }, + { + "epoch": 3.9764981799526056, + "grad_norm": 0.0018730240408331156, + "learning_rate": 2.4461708996798634e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109700304, + "step": 162770 + }, + { + "epoch": 3.9766203307844528, + "grad_norm": 0.002656224649399519, + "learning_rate": 2.445612116982588e-07, + "loss": 0.0, + "num_input_tokens_seen": 109703248, + "step": 162775 + }, + { + "epoch": 3.9767424816163, + "grad_norm": 0.006554523948580027, + "learning_rate": 2.44505338922285e-07, + "loss": 0.0, + "num_input_tokens_seen": 109706576, + "step": 162780 + }, + { + "epoch": 3.976864632448147, + "grad_norm": 0.00015725352568551898, + "learning_rate": 2.444494716404718e-07, + "loss": 0.0004, + "num_input_tokens_seen": 109709648, + "step": 162785 + }, + { + "epoch": 3.976986783279994, + "grad_norm": 0.012902540154755116, + "learning_rate": 2.4439360985322497e-07, + "loss": 0.0, + "num_input_tokens_seen": 109713040, + "step": 162790 + }, + { + "epoch": 3.9771089341118415, + "grad_norm": 0.00813246052712202, + "learning_rate": 2.443377535609511e-07, + "loss": 0.0, + "num_input_tokens_seen": 109716240, + "step": 162795 + }, + { + "epoch": 3.9772310849436883, + "grad_norm": 0.00023855824838392437, + "learning_rate": 2.4428190276405657e-07, + "loss": 0.0, + "num_input_tokens_seen": 109719504, + "step": 162800 + }, + { + "epoch": 3.977353235775536, + "grad_norm": 0.002531233709305525, + "learning_rate": 2.4422605746294713e-07, + "loss": 0.0578, + "num_input_tokens_seen": 109722768, + "step": 162805 + }, + { + "epoch": 3.9774753866073826, + "grad_norm": 0.0015762082766741514, + "learning_rate": 2.4417021765802923e-07, + "loss": 0.0, + "num_input_tokens_seen": 109725904, + "step": 162810 + }, + { + "epoch": 3.97759753743923, + "grad_norm": 0.0006869949284009635, + "learning_rate": 2.4411438334970856e-07, + "loss": 0.0, + "num_input_tokens_seen": 109729808, + "step": 162815 + }, + { + "epoch": 3.977719688271077, + "grad_norm": 0.04206356033682823, + "learning_rate": 2.4405855453839174e-07, + "loss": 0.0, + "num_input_tokens_seen": 109732944, + "step": 162820 + }, + { + "epoch": 3.977841839102924, + "grad_norm": 0.00013251813652459532, + "learning_rate": 2.4400273122448413e-07, + "loss": 0.0, + "num_input_tokens_seen": 109736336, + "step": 162825 + }, + { + "epoch": 3.9779639899347714, + "grad_norm": 0.0011560230050235987, + "learning_rate": 2.43946913408392e-07, + "loss": 0.0453, + "num_input_tokens_seen": 109739600, + "step": 162830 + }, + { + "epoch": 3.9780861407666186, + "grad_norm": 0.0013253620127215981, + "learning_rate": 2.438911010905216e-07, + "loss": 0.0, + "num_input_tokens_seen": 109742864, + "step": 162835 + }, + { + "epoch": 3.978208291598466, + "grad_norm": 0.0038731468375772238, + "learning_rate": 2.4383529427127804e-07, + "loss": 0.0, + "num_input_tokens_seen": 109746448, + "step": 162840 + }, + { + "epoch": 3.978330442430313, + "grad_norm": 0.018492287024855614, + "learning_rate": 2.43779492951068e-07, + "loss": 0.0, + "num_input_tokens_seen": 109750032, + "step": 162845 + }, + { + "epoch": 3.97845259326216, + "grad_norm": 0.001245662453584373, + "learning_rate": 2.4372369713029683e-07, + "loss": 0.0, + "num_input_tokens_seen": 109753360, + "step": 162850 + }, + { + "epoch": 3.9785747440940074, + "grad_norm": 0.0058091082610189915, + "learning_rate": 2.436679068093701e-07, + "loss": 0.0, + "num_input_tokens_seen": 109756624, + "step": 162855 + }, + { + "epoch": 3.9786968949258545, + "grad_norm": 56.89339828491211, + "learning_rate": 2.436121219886941e-07, + "loss": 0.0489, + "num_input_tokens_seen": 109759760, + "step": 162860 + }, + { + "epoch": 3.9788190457577017, + "grad_norm": 0.0014791139401495457, + "learning_rate": 2.4355634266867387e-07, + "loss": 0.0, + "num_input_tokens_seen": 109763152, + "step": 162865 + }, + { + "epoch": 3.978941196589549, + "grad_norm": 0.0012810054467990994, + "learning_rate": 2.435005688497157e-07, + "loss": 0.0, + "num_input_tokens_seen": 109766288, + "step": 162870 + }, + { + "epoch": 3.9790633474213957, + "grad_norm": 0.0035496437922120094, + "learning_rate": 2.434448005322245e-07, + "loss": 0.0, + "num_input_tokens_seen": 109769360, + "step": 162875 + }, + { + "epoch": 3.9791854982532433, + "grad_norm": 0.0032136563677340746, + "learning_rate": 2.4338903771660656e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109772624, + "step": 162880 + }, + { + "epoch": 3.97930764908509, + "grad_norm": 0.0025195456109941006, + "learning_rate": 2.433332804032667e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109775568, + "step": 162885 + }, + { + "epoch": 3.9794297999169377, + "grad_norm": 0.009100309573113918, + "learning_rate": 2.4327752859261074e-07, + "loss": 0.0, + "num_input_tokens_seen": 109779088, + "step": 162890 + }, + { + "epoch": 3.9795519507487844, + "grad_norm": 0.002834005979821086, + "learning_rate": 2.432217822850445e-07, + "loss": 0.0, + "num_input_tokens_seen": 109782032, + "step": 162895 + }, + { + "epoch": 3.9796741015806316, + "grad_norm": 0.0029583594296127558, + "learning_rate": 2.4316604148097264e-07, + "loss": 0.0, + "num_input_tokens_seen": 109785296, + "step": 162900 + }, + { + "epoch": 3.979796252412479, + "grad_norm": 0.0010822449112311006, + "learning_rate": 2.431103061808012e-07, + "loss": 0.0479, + "num_input_tokens_seen": 109788816, + "step": 162905 + }, + { + "epoch": 3.979918403244326, + "grad_norm": 0.0011741408379748464, + "learning_rate": 2.43054576384935e-07, + "loss": 0.0004, + "num_input_tokens_seen": 109792080, + "step": 162910 + }, + { + "epoch": 3.980040554076173, + "grad_norm": 11.589085578918457, + "learning_rate": 2.429988520937797e-07, + "loss": 0.041, + "num_input_tokens_seen": 109795216, + "step": 162915 + }, + { + "epoch": 3.9801627049080204, + "grad_norm": 0.013919665478169918, + "learning_rate": 2.4294313330773995e-07, + "loss": 0.0004, + "num_input_tokens_seen": 109798800, + "step": 162920 + }, + { + "epoch": 3.9802848557398676, + "grad_norm": 0.002399963326752186, + "learning_rate": 2.428874200272215e-07, + "loss": 0.0, + "num_input_tokens_seen": 109802384, + "step": 162925 + }, + { + "epoch": 3.9804070065717148, + "grad_norm": 0.011189553886651993, + "learning_rate": 2.4283171225262967e-07, + "loss": 0.0, + "num_input_tokens_seen": 109806416, + "step": 162930 + }, + { + "epoch": 3.980529157403562, + "grad_norm": 0.0008779754862189293, + "learning_rate": 2.42776009984369e-07, + "loss": 0.0, + "num_input_tokens_seen": 109810192, + "step": 162935 + }, + { + "epoch": 3.980651308235409, + "grad_norm": 0.0033224131911993027, + "learning_rate": 2.427203132228451e-07, + "loss": 0.0, + "num_input_tokens_seen": 109813584, + "step": 162940 + }, + { + "epoch": 3.9807734590672563, + "grad_norm": 2.35109018831281e-05, + "learning_rate": 2.426646219684625e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109816592, + "step": 162945 + }, + { + "epoch": 3.9808956098991035, + "grad_norm": 0.06689867377281189, + "learning_rate": 2.426089362216267e-07, + "loss": 0.0, + "num_input_tokens_seen": 109819728, + "step": 162950 + }, + { + "epoch": 3.9810177607309507, + "grad_norm": 0.008919079788029194, + "learning_rate": 2.4255325598274225e-07, + "loss": 0.0, + "num_input_tokens_seen": 109822864, + "step": 162955 + }, + { + "epoch": 3.981139911562798, + "grad_norm": 0.01653454639017582, + "learning_rate": 2.424975812522144e-07, + "loss": 0.0, + "num_input_tokens_seen": 109826320, + "step": 162960 + }, + { + "epoch": 3.981262062394645, + "grad_norm": 0.016628887504339218, + "learning_rate": 2.424419120304481e-07, + "loss": 0.0, + "num_input_tokens_seen": 109829584, + "step": 162965 + }, + { + "epoch": 3.981384213226492, + "grad_norm": 0.0006351763149723411, + "learning_rate": 2.423862483178475e-07, + "loss": 0.0, + "num_input_tokens_seen": 109833040, + "step": 162970 + }, + { + "epoch": 3.9815063640583395, + "grad_norm": 0.004808998201042414, + "learning_rate": 2.4233059011481817e-07, + "loss": 0.0, + "num_input_tokens_seen": 109836176, + "step": 162975 + }, + { + "epoch": 3.981628514890186, + "grad_norm": 0.0008432915201410651, + "learning_rate": 2.422749374217643e-07, + "loss": 0.0, + "num_input_tokens_seen": 109839248, + "step": 162980 + }, + { + "epoch": 3.981750665722034, + "grad_norm": 0.0023574333172291517, + "learning_rate": 2.4221929023909096e-07, + "loss": 0.0, + "num_input_tokens_seen": 109842704, + "step": 162985 + }, + { + "epoch": 3.9818728165538806, + "grad_norm": 0.003182420041412115, + "learning_rate": 2.4216364856720295e-07, + "loss": 0.0, + "num_input_tokens_seen": 109846608, + "step": 162990 + }, + { + "epoch": 3.981994967385728, + "grad_norm": 0.039140839129686356, + "learning_rate": 2.421080124065045e-07, + "loss": 0.0093, + "num_input_tokens_seen": 109849616, + "step": 162995 + }, + { + "epoch": 3.982117118217575, + "grad_norm": 0.0021024311427026987, + "learning_rate": 2.4205238175740075e-07, + "loss": 0.0, + "num_input_tokens_seen": 109852880, + "step": 163000 + }, + { + "epoch": 3.982239269049422, + "grad_norm": 0.0015542428009212017, + "learning_rate": 2.4199675662029563e-07, + "loss": 0.0, + "num_input_tokens_seen": 109856336, + "step": 163005 + }, + { + "epoch": 3.9823614198812693, + "grad_norm": 6.775200017727911e-05, + "learning_rate": 2.4194113699559395e-07, + "loss": 0.0, + "num_input_tokens_seen": 109859600, + "step": 163010 + }, + { + "epoch": 3.9824835707131165, + "grad_norm": 0.0029845689423382282, + "learning_rate": 2.4188552288370043e-07, + "loss": 0.0523, + "num_input_tokens_seen": 109862928, + "step": 163015 + }, + { + "epoch": 3.9826057215449637, + "grad_norm": 0.1912604123353958, + "learning_rate": 2.4182991428501906e-07, + "loss": 0.0, + "num_input_tokens_seen": 109866256, + "step": 163020 + }, + { + "epoch": 3.982727872376811, + "grad_norm": 0.0005314030568115413, + "learning_rate": 2.4177431119995483e-07, + "loss": 0.0, + "num_input_tokens_seen": 109869584, + "step": 163025 + }, + { + "epoch": 3.982850023208658, + "grad_norm": 0.005421550944447517, + "learning_rate": 2.417187136289115e-07, + "loss": 0.0, + "num_input_tokens_seen": 109872912, + "step": 163030 + }, + { + "epoch": 3.9829721740405053, + "grad_norm": 0.0011578707490116358, + "learning_rate": 2.4166312157229384e-07, + "loss": 0.1016, + "num_input_tokens_seen": 109875984, + "step": 163035 + }, + { + "epoch": 3.9830943248723525, + "grad_norm": 0.11689208447933197, + "learning_rate": 2.416075350305056e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109879184, + "step": 163040 + }, + { + "epoch": 3.9832164757041997, + "grad_norm": 0.0014069858007133007, + "learning_rate": 2.4155195400395144e-07, + "loss": 0.0002, + "num_input_tokens_seen": 109882704, + "step": 163045 + }, + { + "epoch": 3.983338626536047, + "grad_norm": 0.00013945819227956235, + "learning_rate": 2.414963784930357e-07, + "loss": 0.0, + "num_input_tokens_seen": 109886288, + "step": 163050 + }, + { + "epoch": 3.9834607773678936, + "grad_norm": 0.00327501748688519, + "learning_rate": 2.414408084981623e-07, + "loss": 0.0, + "num_input_tokens_seen": 109890064, + "step": 163055 + }, + { + "epoch": 3.9835829281997412, + "grad_norm": 0.00012703366519417614, + "learning_rate": 2.4138524401973515e-07, + "loss": 0.0, + "num_input_tokens_seen": 109893264, + "step": 163060 + }, + { + "epoch": 3.983705079031588, + "grad_norm": 0.012055044062435627, + "learning_rate": 2.4132968505815874e-07, + "loss": 0.0, + "num_input_tokens_seen": 109896208, + "step": 163065 + }, + { + "epoch": 3.9838272298634356, + "grad_norm": 0.3332982063293457, + "learning_rate": 2.4127413161383693e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109899472, + "step": 163070 + }, + { + "epoch": 3.9839493806952824, + "grad_norm": 0.00024611069238744676, + "learning_rate": 2.412185836871735e-07, + "loss": 0.0, + "num_input_tokens_seen": 109902480, + "step": 163075 + }, + { + "epoch": 3.9840715315271296, + "grad_norm": 0.00047673837980255485, + "learning_rate": 2.4116304127857256e-07, + "loss": 0.0, + "num_input_tokens_seen": 109905744, + "step": 163080 + }, + { + "epoch": 3.9841936823589768, + "grad_norm": 0.0008909995085559785, + "learning_rate": 2.411075043884384e-07, + "loss": 0.0534, + "num_input_tokens_seen": 109909264, + "step": 163085 + }, + { + "epoch": 3.984315833190824, + "grad_norm": 0.0021835891529917717, + "learning_rate": 2.410519730171743e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109912784, + "step": 163090 + }, + { + "epoch": 3.984437984022671, + "grad_norm": 0.0039231725968420506, + "learning_rate": 2.4099644716518464e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109915920, + "step": 163095 + }, + { + "epoch": 3.9845601348545183, + "grad_norm": 0.0007445600931532681, + "learning_rate": 2.409409268328727e-07, + "loss": 0.0114, + "num_input_tokens_seen": 109919760, + "step": 163100 + }, + { + "epoch": 3.9846822856863655, + "grad_norm": 0.012176787480711937, + "learning_rate": 2.4088541202064247e-07, + "loss": 0.0, + "num_input_tokens_seen": 109923536, + "step": 163105 + }, + { + "epoch": 3.9848044365182127, + "grad_norm": 0.004063439555466175, + "learning_rate": 2.4082990272889804e-07, + "loss": 0.0, + "num_input_tokens_seen": 109927184, + "step": 163110 + }, + { + "epoch": 3.98492658735006, + "grad_norm": 0.026905439794063568, + "learning_rate": 2.407743989580424e-07, + "loss": 0.0002, + "num_input_tokens_seen": 109930384, + "step": 163115 + }, + { + "epoch": 3.985048738181907, + "grad_norm": 0.029015744104981422, + "learning_rate": 2.407189007084799e-07, + "loss": 0.0, + "num_input_tokens_seen": 109934096, + "step": 163120 + }, + { + "epoch": 3.9851708890137543, + "grad_norm": 0.0023757724557071924, + "learning_rate": 2.4066340798061344e-07, + "loss": 0.0, + "num_input_tokens_seen": 109937744, + "step": 163125 + }, + { + "epoch": 3.9852930398456015, + "grad_norm": 0.09235114604234695, + "learning_rate": 2.4060792077484727e-07, + "loss": 0.0, + "num_input_tokens_seen": 109940944, + "step": 163130 + }, + { + "epoch": 3.9854151906774486, + "grad_norm": 0.0007887451793067157, + "learning_rate": 2.405524390915842e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109944336, + "step": 163135 + }, + { + "epoch": 3.985537341509296, + "grad_norm": 0.014184202067553997, + "learning_rate": 2.4049696293122803e-07, + "loss": 0.0004, + "num_input_tokens_seen": 109947856, + "step": 163140 + }, + { + "epoch": 3.985659492341143, + "grad_norm": 0.004972795955836773, + "learning_rate": 2.4044149229418255e-07, + "loss": 0.0, + "num_input_tokens_seen": 109951184, + "step": 163145 + }, + { + "epoch": 3.9857816431729898, + "grad_norm": 0.2864531874656677, + "learning_rate": 2.4038602718085057e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109954512, + "step": 163150 + }, + { + "epoch": 3.9859037940048374, + "grad_norm": 0.0016255693044513464, + "learning_rate": 2.4033056759163597e-07, + "loss": 0.0, + "num_input_tokens_seen": 109958096, + "step": 163155 + }, + { + "epoch": 3.986025944836684, + "grad_norm": 0.00540508795529604, + "learning_rate": 2.402751135269417e-07, + "loss": 0.0465, + "num_input_tokens_seen": 109961296, + "step": 163160 + }, + { + "epoch": 3.9861480956685313, + "grad_norm": 0.012475240975618362, + "learning_rate": 2.4021966498717107e-07, + "loss": 0.0, + "num_input_tokens_seen": 109965072, + "step": 163165 + }, + { + "epoch": 3.9862702465003785, + "grad_norm": 0.00033634447026997805, + "learning_rate": 2.4016422197272757e-07, + "loss": 0.0, + "num_input_tokens_seen": 109967952, + "step": 163170 + }, + { + "epoch": 3.9863923973322257, + "grad_norm": 0.0004531031008809805, + "learning_rate": 2.4010878448401393e-07, + "loss": 0.0, + "num_input_tokens_seen": 109971088, + "step": 163175 + }, + { + "epoch": 3.986514548164073, + "grad_norm": 0.000985053600743413, + "learning_rate": 2.4005335252143387e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109974672, + "step": 163180 + }, + { + "epoch": 3.98663669899592, + "grad_norm": 0.005329513922333717, + "learning_rate": 2.3999792608539005e-07, + "loss": 0.0, + "num_input_tokens_seen": 109977808, + "step": 163185 + }, + { + "epoch": 3.9867588498277673, + "grad_norm": 0.0018812778871506453, + "learning_rate": 2.3994250517628587e-07, + "loss": 0.0, + "num_input_tokens_seen": 109981328, + "step": 163190 + }, + { + "epoch": 3.9868810006596145, + "grad_norm": 0.017847029492259026, + "learning_rate": 2.398870897945241e-07, + "loss": 0.0, + "num_input_tokens_seen": 109985040, + "step": 163195 + }, + { + "epoch": 3.9870031514914617, + "grad_norm": 0.03550269454717636, + "learning_rate": 2.398316799405077e-07, + "loss": 0.0, + "num_input_tokens_seen": 109988752, + "step": 163200 + }, + { + "epoch": 3.987125302323309, + "grad_norm": 0.0018748401198536158, + "learning_rate": 2.397762756146402e-07, + "loss": 0.0, + "num_input_tokens_seen": 109992464, + "step": 163205 + }, + { + "epoch": 3.987247453155156, + "grad_norm": 0.0008736561867408454, + "learning_rate": 2.3972087681732367e-07, + "loss": 0.0, + "num_input_tokens_seen": 109995792, + "step": 163210 + }, + { + "epoch": 3.9873696039870032, + "grad_norm": 0.00046163939987309277, + "learning_rate": 2.396654835489618e-07, + "loss": 0.0001, + "num_input_tokens_seen": 109999888, + "step": 163215 + }, + { + "epoch": 3.9874917548188504, + "grad_norm": 0.0022144047543406487, + "learning_rate": 2.396100958099567e-07, + "loss": 0.0, + "num_input_tokens_seen": 110003024, + "step": 163220 + }, + { + "epoch": 3.9876139056506976, + "grad_norm": 0.004857294727116823, + "learning_rate": 2.395547136007119e-07, + "loss": 0.0, + "num_input_tokens_seen": 110006288, + "step": 163225 + }, + { + "epoch": 3.987736056482545, + "grad_norm": 0.0012780651450157166, + "learning_rate": 2.3949933692162936e-07, + "loss": 0.0, + "num_input_tokens_seen": 110010320, + "step": 163230 + }, + { + "epoch": 3.9878582073143916, + "grad_norm": 0.00010625671711750329, + "learning_rate": 2.394439657731122e-07, + "loss": 0.0, + "num_input_tokens_seen": 110013840, + "step": 163235 + }, + { + "epoch": 3.987980358146239, + "grad_norm": 0.0005658533773384988, + "learning_rate": 2.393886001555634e-07, + "loss": 0.0, + "num_input_tokens_seen": 110017232, + "step": 163240 + }, + { + "epoch": 3.988102508978086, + "grad_norm": 0.04258429631590843, + "learning_rate": 2.3933324006938503e-07, + "loss": 0.0, + "num_input_tokens_seen": 110020624, + "step": 163245 + }, + { + "epoch": 3.9882246598099336, + "grad_norm": 0.01064017042517662, + "learning_rate": 2.3927788551498016e-07, + "loss": 0.0, + "num_input_tokens_seen": 110023760, + "step": 163250 + }, + { + "epoch": 3.9883468106417803, + "grad_norm": 27.123470306396484, + "learning_rate": 2.392225364927508e-07, + "loss": 0.0371, + "num_input_tokens_seen": 110026832, + "step": 163255 + }, + { + "epoch": 3.9884689614736275, + "grad_norm": 0.00024280698562506586, + "learning_rate": 2.3916719300310017e-07, + "loss": 0.0371, + "num_input_tokens_seen": 110030224, + "step": 163260 + }, + { + "epoch": 3.9885911123054747, + "grad_norm": 0.0007595649221912026, + "learning_rate": 2.3911185504642993e-07, + "loss": 0.0679, + "num_input_tokens_seen": 110033616, + "step": 163265 + }, + { + "epoch": 3.988713263137322, + "grad_norm": 0.052835941314697266, + "learning_rate": 2.3905652262314335e-07, + "loss": 0.0, + "num_input_tokens_seen": 110037072, + "step": 163270 + }, + { + "epoch": 3.988835413969169, + "grad_norm": 0.0002746016252785921, + "learning_rate": 2.390011957336424e-07, + "loss": 0.0, + "num_input_tokens_seen": 110040400, + "step": 163275 + }, + { + "epoch": 3.9889575648010163, + "grad_norm": 0.010532891377806664, + "learning_rate": 2.3894587437832903e-07, + "loss": 0.0, + "num_input_tokens_seen": 110043280, + "step": 163280 + }, + { + "epoch": 3.9890797156328635, + "grad_norm": 0.02451830357313156, + "learning_rate": 2.388905585576063e-07, + "loss": 0.0, + "num_input_tokens_seen": 110046928, + "step": 163285 + }, + { + "epoch": 3.9892018664647106, + "grad_norm": 0.0006182059296406806, + "learning_rate": 2.3883524827187593e-07, + "loss": 0.0439, + "num_input_tokens_seen": 110050128, + "step": 163290 + }, + { + "epoch": 3.989324017296558, + "grad_norm": 0.034917838871479034, + "learning_rate": 2.387799435215403e-07, + "loss": 0.0626, + "num_input_tokens_seen": 110053712, + "step": 163295 + }, + { + "epoch": 3.989446168128405, + "grad_norm": 0.035469673573970795, + "learning_rate": 2.3872464430700203e-07, + "loss": 0.0667, + "num_input_tokens_seen": 110057232, + "step": 163300 + }, + { + "epoch": 3.989568318960252, + "grad_norm": 0.00020851861336268485, + "learning_rate": 2.3866935062866254e-07, + "loss": 0.0, + "num_input_tokens_seen": 110060432, + "step": 163305 + }, + { + "epoch": 3.9896904697920994, + "grad_norm": 0.003421532455831766, + "learning_rate": 2.3861406248692463e-07, + "loss": 0.0002, + "num_input_tokens_seen": 110063824, + "step": 163310 + }, + { + "epoch": 3.9898126206239466, + "grad_norm": 0.004031584598124027, + "learning_rate": 2.3855877988218974e-07, + "loss": 0.0, + "num_input_tokens_seen": 110067216, + "step": 163315 + }, + { + "epoch": 3.9899347714557933, + "grad_norm": 0.001301066018640995, + "learning_rate": 2.3850350281486044e-07, + "loss": 0.0, + "num_input_tokens_seen": 110070544, + "step": 163320 + }, + { + "epoch": 3.990056922287641, + "grad_norm": 0.008654467761516571, + "learning_rate": 2.384482312853383e-07, + "loss": 0.0, + "num_input_tokens_seen": 110073360, + "step": 163325 + }, + { + "epoch": 3.9901790731194877, + "grad_norm": 0.008370326831936836, + "learning_rate": 2.383929652940253e-07, + "loss": 0.0565, + "num_input_tokens_seen": 110076688, + "step": 163330 + }, + { + "epoch": 3.9903012239513354, + "grad_norm": 0.006358446553349495, + "learning_rate": 2.3833770484132398e-07, + "loss": 0.0, + "num_input_tokens_seen": 110080272, + "step": 163335 + }, + { + "epoch": 3.990423374783182, + "grad_norm": 0.0030323562677949667, + "learning_rate": 2.3828244992763536e-07, + "loss": 0.0, + "num_input_tokens_seen": 110083664, + "step": 163340 + }, + { + "epoch": 3.9905455256150293, + "grad_norm": 0.04232315719127655, + "learning_rate": 2.3822720055336188e-07, + "loss": 0.0001, + "num_input_tokens_seen": 110086928, + "step": 163345 + }, + { + "epoch": 3.9906676764468765, + "grad_norm": 0.0002105066378135234, + "learning_rate": 2.381719567189049e-07, + "loss": 0.0, + "num_input_tokens_seen": 110090192, + "step": 163350 + }, + { + "epoch": 3.9907898272787237, + "grad_norm": 0.015618708916008472, + "learning_rate": 2.381167184246663e-07, + "loss": 0.0001, + "num_input_tokens_seen": 110093584, + "step": 163355 + }, + { + "epoch": 3.990911978110571, + "grad_norm": 0.03186903893947601, + "learning_rate": 2.380614856710481e-07, + "loss": 0.0, + "num_input_tokens_seen": 110096656, + "step": 163360 + }, + { + "epoch": 3.991034128942418, + "grad_norm": 0.027114881202578545, + "learning_rate": 2.380062584584518e-07, + "loss": 0.0002, + "num_input_tokens_seen": 110100112, + "step": 163365 + }, + { + "epoch": 3.9911562797742652, + "grad_norm": 0.03611982986330986, + "learning_rate": 2.3795103678727857e-07, + "loss": 0.0002, + "num_input_tokens_seen": 110103440, + "step": 163370 + }, + { + "epoch": 3.9912784306061124, + "grad_norm": 0.004180221818387508, + "learning_rate": 2.3789582065793068e-07, + "loss": 0.0607, + "num_input_tokens_seen": 110106640, + "step": 163375 + }, + { + "epoch": 3.9914005814379596, + "grad_norm": 0.001564105274155736, + "learning_rate": 2.3784061007080937e-07, + "loss": 0.0003, + "num_input_tokens_seen": 110110096, + "step": 163380 + }, + { + "epoch": 3.991522732269807, + "grad_norm": 0.01233486458659172, + "learning_rate": 2.3778540502631583e-07, + "loss": 0.0, + "num_input_tokens_seen": 110113744, + "step": 163385 + }, + { + "epoch": 3.991644883101654, + "grad_norm": 0.000720239186193794, + "learning_rate": 2.377302055248519e-07, + "loss": 0.0888, + "num_input_tokens_seen": 110117648, + "step": 163390 + }, + { + "epoch": 3.991767033933501, + "grad_norm": 0.0004685970488935709, + "learning_rate": 2.3767501156681923e-07, + "loss": 0.0, + "num_input_tokens_seen": 110120912, + "step": 163395 + }, + { + "epoch": 3.9918891847653484, + "grad_norm": 0.0007855355506762862, + "learning_rate": 2.3761982315261853e-07, + "loss": 0.0372, + "num_input_tokens_seen": 110124176, + "step": 163400 + }, + { + "epoch": 3.9920113355971956, + "grad_norm": 0.002032736549153924, + "learning_rate": 2.375646402826519e-07, + "loss": 0.0, + "num_input_tokens_seen": 110127184, + "step": 163405 + }, + { + "epoch": 3.9921334864290428, + "grad_norm": 0.0016364410985261202, + "learning_rate": 2.3750946295732e-07, + "loss": 0.0001, + "num_input_tokens_seen": 110130512, + "step": 163410 + }, + { + "epoch": 3.9922556372608895, + "grad_norm": 0.00046291478793136775, + "learning_rate": 2.374542911770243e-07, + "loss": 0.039, + "num_input_tokens_seen": 110133584, + "step": 163415 + }, + { + "epoch": 3.992377788092737, + "grad_norm": 0.00025917578022927046, + "learning_rate": 2.3739912494216641e-07, + "loss": 0.0, + "num_input_tokens_seen": 110137360, + "step": 163420 + }, + { + "epoch": 3.992499938924584, + "grad_norm": 0.0023525061551481485, + "learning_rate": 2.3734396425314695e-07, + "loss": 0.0693, + "num_input_tokens_seen": 110140560, + "step": 163425 + }, + { + "epoch": 3.9926220897564315, + "grad_norm": 0.013277255930006504, + "learning_rate": 2.3728880911036752e-07, + "loss": 0.0001, + "num_input_tokens_seen": 110143632, + "step": 163430 + }, + { + "epoch": 3.9927442405882783, + "grad_norm": 0.002585779642686248, + "learning_rate": 2.372336595142288e-07, + "loss": 0.0, + "num_input_tokens_seen": 110146896, + "step": 163435 + }, + { + "epoch": 3.9928663914201254, + "grad_norm": 0.00874535832554102, + "learning_rate": 2.3717851546513234e-07, + "loss": 0.0, + "num_input_tokens_seen": 110150864, + "step": 163440 + }, + { + "epoch": 3.9929885422519726, + "grad_norm": 0.002351542469114065, + "learning_rate": 2.3712337696347863e-07, + "loss": 0.0, + "num_input_tokens_seen": 110154384, + "step": 163445 + }, + { + "epoch": 3.99311069308382, + "grad_norm": 0.0005293526337482035, + "learning_rate": 2.3706824400966886e-07, + "loss": 0.0, + "num_input_tokens_seen": 110157904, + "step": 163450 + }, + { + "epoch": 3.993232843915667, + "grad_norm": 0.002115802839398384, + "learning_rate": 2.3701311660410438e-07, + "loss": 0.0446, + "num_input_tokens_seen": 110161104, + "step": 163455 + }, + { + "epoch": 3.993354994747514, + "grad_norm": 0.0006367531605064869, + "learning_rate": 2.3695799474718537e-07, + "loss": 0.0, + "num_input_tokens_seen": 110164688, + "step": 163460 + }, + { + "epoch": 3.9934771455793614, + "grad_norm": 0.030436921864748, + "learning_rate": 2.3690287843931334e-07, + "loss": 0.0317, + "num_input_tokens_seen": 110168720, + "step": 163465 + }, + { + "epoch": 3.9935992964112086, + "grad_norm": 0.007803088985383511, + "learning_rate": 2.3684776768088887e-07, + "loss": 0.0, + "num_input_tokens_seen": 110172048, + "step": 163470 + }, + { + "epoch": 3.9937214472430558, + "grad_norm": 0.003044381272047758, + "learning_rate": 2.3679266247231244e-07, + "loss": 0.0001, + "num_input_tokens_seen": 110177424, + "step": 163475 + }, + { + "epoch": 3.993843598074903, + "grad_norm": 0.009955629706382751, + "learning_rate": 2.3673756281398528e-07, + "loss": 0.0, + "num_input_tokens_seen": 110181008, + "step": 163480 + }, + { + "epoch": 3.99396574890675, + "grad_norm": 0.0034395726397633553, + "learning_rate": 2.3668246870630759e-07, + "loss": 0.0003, + "num_input_tokens_seen": 110184336, + "step": 163485 + }, + { + "epoch": 3.9940878997385973, + "grad_norm": 0.03659478574991226, + "learning_rate": 2.3662738014968054e-07, + "loss": 0.0, + "num_input_tokens_seen": 110187856, + "step": 163490 + }, + { + "epoch": 3.9942100505704445, + "grad_norm": 0.0005764599773101509, + "learning_rate": 2.3657229714450422e-07, + "loss": 0.0, + "num_input_tokens_seen": 110190800, + "step": 163495 + }, + { + "epoch": 3.9943322014022913, + "grad_norm": 0.06950213015079498, + "learning_rate": 2.365172196911799e-07, + "loss": 0.0, + "num_input_tokens_seen": 110194384, + "step": 163500 + }, + { + "epoch": 3.994454352234139, + "grad_norm": 29.22477149963379, + "learning_rate": 2.3646214779010732e-07, + "loss": 0.1123, + "num_input_tokens_seen": 110197840, + "step": 163505 + }, + { + "epoch": 3.9945765030659857, + "grad_norm": 0.0926981195807457, + "learning_rate": 2.364070814416873e-07, + "loss": 0.0, + "num_input_tokens_seen": 110201872, + "step": 163510 + }, + { + "epoch": 3.9946986538978333, + "grad_norm": 28.436185836791992, + "learning_rate": 2.3635202064632075e-07, + "loss": 0.0336, + "num_input_tokens_seen": 110204752, + "step": 163515 + }, + { + "epoch": 3.99482080472968, + "grad_norm": 0.012042644433677197, + "learning_rate": 2.3629696540440735e-07, + "loss": 0.0, + "num_input_tokens_seen": 110208208, + "step": 163520 + }, + { + "epoch": 3.9949429555615272, + "grad_norm": 0.010948545299470425, + "learning_rate": 2.3624191571634822e-07, + "loss": 0.0, + "num_input_tokens_seen": 110211472, + "step": 163525 + }, + { + "epoch": 3.9950651063933744, + "grad_norm": 0.003581192810088396, + "learning_rate": 2.3618687158254292e-07, + "loss": 0.0, + "num_input_tokens_seen": 110214672, + "step": 163530 + }, + { + "epoch": 3.9951872572252216, + "grad_norm": 0.0026018167845904827, + "learning_rate": 2.3613183300339246e-07, + "loss": 0.0, + "num_input_tokens_seen": 110218128, + "step": 163535 + }, + { + "epoch": 3.995309408057069, + "grad_norm": 0.0834880992770195, + "learning_rate": 2.3607679997929652e-07, + "loss": 0.0, + "num_input_tokens_seen": 110221392, + "step": 163540 + }, + { + "epoch": 3.995431558888916, + "grad_norm": 0.00801916979253292, + "learning_rate": 2.3602177251065548e-07, + "loss": 0.0, + "num_input_tokens_seen": 110224464, + "step": 163545 + }, + { + "epoch": 3.995553709720763, + "grad_norm": 0.01131167821586132, + "learning_rate": 2.3596675059786998e-07, + "loss": 0.0, + "num_input_tokens_seen": 110227856, + "step": 163550 + }, + { + "epoch": 3.9956758605526104, + "grad_norm": 0.00454461295157671, + "learning_rate": 2.3591173424133937e-07, + "loss": 0.0, + "num_input_tokens_seen": 110231248, + "step": 163555 + }, + { + "epoch": 3.9957980113844576, + "grad_norm": 0.0033284425735473633, + "learning_rate": 2.3585672344146457e-07, + "loss": 0.0645, + "num_input_tokens_seen": 110234640, + "step": 163560 + }, + { + "epoch": 3.9959201622163047, + "grad_norm": 0.010862693190574646, + "learning_rate": 2.358017181986448e-07, + "loss": 0.0, + "num_input_tokens_seen": 110237712, + "step": 163565 + }, + { + "epoch": 3.996042313048152, + "grad_norm": 2.686147672648076e-05, + "learning_rate": 2.3574671851328077e-07, + "loss": 0.0, + "num_input_tokens_seen": 110240720, + "step": 163570 + }, + { + "epoch": 3.996164463879999, + "grad_norm": 0.020719388499855995, + "learning_rate": 2.3569172438577189e-07, + "loss": 0.0, + "num_input_tokens_seen": 110244368, + "step": 163575 + }, + { + "epoch": 3.9962866147118463, + "grad_norm": 0.03670404851436615, + "learning_rate": 2.3563673581651866e-07, + "loss": 0.0005, + "num_input_tokens_seen": 110247376, + "step": 163580 + }, + { + "epoch": 3.9964087655436935, + "grad_norm": 0.0005382307572290301, + "learning_rate": 2.3558175280592075e-07, + "loss": 0.0, + "num_input_tokens_seen": 110250576, + "step": 163585 + }, + { + "epoch": 3.9965309163755407, + "grad_norm": 0.16491441428661346, + "learning_rate": 2.355267753543776e-07, + "loss": 0.0, + "num_input_tokens_seen": 110253776, + "step": 163590 + }, + { + "epoch": 3.9966530672073874, + "grad_norm": 0.0002645579806994647, + "learning_rate": 2.3547180346228957e-07, + "loss": 0.0, + "num_input_tokens_seen": 110257296, + "step": 163595 + }, + { + "epoch": 3.996775218039235, + "grad_norm": 0.0007041199132800102, + "learning_rate": 2.35416837130056e-07, + "loss": 0.0, + "num_input_tokens_seen": 110260688, + "step": 163600 + }, + { + "epoch": 3.996897368871082, + "grad_norm": 0.00015905153122730553, + "learning_rate": 2.353618763580768e-07, + "loss": 0.0, + "num_input_tokens_seen": 110264656, + "step": 163605 + }, + { + "epoch": 3.997019519702929, + "grad_norm": 0.0004138974763918668, + "learning_rate": 2.353069211467521e-07, + "loss": 0.0, + "num_input_tokens_seen": 110267920, + "step": 163610 + }, + { + "epoch": 3.997141670534776, + "grad_norm": 0.021569538861513138, + "learning_rate": 2.352519714964808e-07, + "loss": 0.0, + "num_input_tokens_seen": 110271312, + "step": 163615 + }, + { + "epoch": 3.9972638213666234, + "grad_norm": 0.014961598441004753, + "learning_rate": 2.3519702740766312e-07, + "loss": 0.0002, + "num_input_tokens_seen": 110274576, + "step": 163620 + }, + { + "epoch": 3.9973859721984706, + "grad_norm": 0.001369571196846664, + "learning_rate": 2.3514208888069798e-07, + "loss": 0.0, + "num_input_tokens_seen": 110277392, + "step": 163625 + }, + { + "epoch": 3.9975081230303178, + "grad_norm": 0.0006057513528503478, + "learning_rate": 2.3508715591598572e-07, + "loss": 0.0, + "num_input_tokens_seen": 110280592, + "step": 163630 + }, + { + "epoch": 3.997630273862165, + "grad_norm": 0.000672376248985529, + "learning_rate": 2.3503222851392513e-07, + "loss": 0.0, + "num_input_tokens_seen": 110283984, + "step": 163635 + }, + { + "epoch": 3.997752424694012, + "grad_norm": 0.0005224617780186236, + "learning_rate": 2.3497730667491577e-07, + "loss": 0.0663, + "num_input_tokens_seen": 110287376, + "step": 163640 + }, + { + "epoch": 3.9978745755258593, + "grad_norm": 0.38089480996131897, + "learning_rate": 2.3492239039935756e-07, + "loss": 0.0002, + "num_input_tokens_seen": 110290960, + "step": 163645 + }, + { + "epoch": 3.9979967263577065, + "grad_norm": 0.0008336842292919755, + "learning_rate": 2.348674796876493e-07, + "loss": 0.0, + "num_input_tokens_seen": 110294416, + "step": 163650 + }, + { + "epoch": 3.9981188771895537, + "grad_norm": 0.0014327648095786572, + "learning_rate": 2.3481257454019078e-07, + "loss": 0.0, + "num_input_tokens_seen": 110298512, + "step": 163655 + }, + { + "epoch": 3.998241028021401, + "grad_norm": 115.09334564208984, + "learning_rate": 2.3475767495738075e-07, + "loss": 0.0535, + "num_input_tokens_seen": 110302160, + "step": 163660 + }, + { + "epoch": 3.998363178853248, + "grad_norm": 0.00022477505262941122, + "learning_rate": 2.347027809396186e-07, + "loss": 0.0, + "num_input_tokens_seen": 110305616, + "step": 163665 + }, + { + "epoch": 3.9984853296850953, + "grad_norm": 46.11375045776367, + "learning_rate": 2.346478924873041e-07, + "loss": 0.0135, + "num_input_tokens_seen": 110308752, + "step": 163670 + }, + { + "epoch": 3.9986074805169425, + "grad_norm": 0.012809050269424915, + "learning_rate": 2.3459300960083593e-07, + "loss": 0.0, + "num_input_tokens_seen": 110312080, + "step": 163675 + }, + { + "epoch": 3.9987296313487892, + "grad_norm": 0.0009718817891553044, + "learning_rate": 2.3453813228061302e-07, + "loss": 0.0975, + "num_input_tokens_seen": 110315280, + "step": 163680 + }, + { + "epoch": 3.998851782180637, + "grad_norm": 0.01494208537042141, + "learning_rate": 2.3448326052703492e-07, + "loss": 0.0191, + "num_input_tokens_seen": 110318544, + "step": 163685 + }, + { + "epoch": 3.9989739330124836, + "grad_norm": 0.0013464835938066244, + "learning_rate": 2.3442839434050043e-07, + "loss": 0.0003, + "num_input_tokens_seen": 110322256, + "step": 163690 + }, + { + "epoch": 3.9990960838443312, + "grad_norm": 0.10594768822193146, + "learning_rate": 2.3437353372140833e-07, + "loss": 0.0, + "num_input_tokens_seen": 110325520, + "step": 163695 + }, + { + "epoch": 3.999218234676178, + "grad_norm": 0.008476635441184044, + "learning_rate": 2.3431867867015788e-07, + "loss": 0.0, + "num_input_tokens_seen": 110328976, + "step": 163700 + }, + { + "epoch": 3.999340385508025, + "grad_norm": 0.03905835002660751, + "learning_rate": 2.3426382918714815e-07, + "loss": 0.0001, + "num_input_tokens_seen": 110332048, + "step": 163705 + }, + { + "epoch": 3.9994625363398724, + "grad_norm": 0.0044195763766765594, + "learning_rate": 2.3420898527277754e-07, + "loss": 0.0, + "num_input_tokens_seen": 110335568, + "step": 163710 + }, + { + "epoch": 3.9995846871717196, + "grad_norm": 0.0023455540649592876, + "learning_rate": 2.341541469274454e-07, + "loss": 0.0, + "num_input_tokens_seen": 110339024, + "step": 163715 + }, + { + "epoch": 3.9997068380035667, + "grad_norm": 0.12929129600524902, + "learning_rate": 2.3409931415155003e-07, + "loss": 0.0002, + "num_input_tokens_seen": 110342032, + "step": 163720 + }, + { + "epoch": 3.999828988835414, + "grad_norm": 0.013033738359808922, + "learning_rate": 2.340444869454905e-07, + "loss": 0.0001, + "num_input_tokens_seen": 110345680, + "step": 163725 + }, + { + "epoch": 3.999951139667261, + "grad_norm": 0.004790800623595715, + "learning_rate": 2.339896653096658e-07, + "loss": 0.0, + "num_input_tokens_seen": 110348688, + "step": 163730 + }, + { + "epoch": 4.000073290499108, + "grad_norm": 0.0014749355614185333, + "learning_rate": 2.3393484924447392e-07, + "loss": 0.0, + "num_input_tokens_seen": 110351272, + "step": 163735 + }, + { + "epoch": 4.0001954413309555, + "grad_norm": 0.0026620272547006607, + "learning_rate": 2.3388003875031415e-07, + "loss": 0.0, + "num_input_tokens_seen": 110354664, + "step": 163740 + }, + { + "epoch": 4.000293161996433, + "eval_loss": 0.247334286570549, + "eval_runtime": 47.9251, + "eval_samples_per_second": 759.206, + "eval_steps_per_second": 94.919, + "num_input_tokens_seen": 110357352, + "step": 163744 + }, + { + "epoch": 4.000317592162802, + "grad_norm": 0.0002669579698704183, + "learning_rate": 2.3382523382758456e-07, + "loss": 0.0, + "num_input_tokens_seen": 110357928, + "step": 163745 + }, + { + "epoch": 4.00043974299465, + "grad_norm": 0.019051766023039818, + "learning_rate": 2.337704344766842e-07, + "loss": 0.0, + "num_input_tokens_seen": 110361320, + "step": 163750 + }, + { + "epoch": 4.000561893826497, + "grad_norm": 0.0015128724044188857, + "learning_rate": 2.337156406980111e-07, + "loss": 0.0, + "num_input_tokens_seen": 110364584, + "step": 163755 + }, + { + "epoch": 4.000684044658344, + "grad_norm": 0.1339402049779892, + "learning_rate": 2.3366085249196387e-07, + "loss": 0.0001, + "num_input_tokens_seen": 110368168, + "step": 163760 + }, + { + "epoch": 4.000806195490191, + "grad_norm": 0.012656701728701591, + "learning_rate": 2.3360606985894138e-07, + "loss": 0.0, + "num_input_tokens_seen": 110371176, + "step": 163765 + }, + { + "epoch": 4.000928346322039, + "grad_norm": 0.0021968057844787836, + "learning_rate": 2.335512927993414e-07, + "loss": 0.0, + "num_input_tokens_seen": 110374312, + "step": 163770 + }, + { + "epoch": 4.001050497153885, + "grad_norm": 0.001350610051304102, + "learning_rate": 2.3349652131356278e-07, + "loss": 0.0, + "num_input_tokens_seen": 110377512, + "step": 163775 + }, + { + "epoch": 4.001172647985733, + "grad_norm": 0.0015590637922286987, + "learning_rate": 2.334417554020035e-07, + "loss": 0.0419, + "num_input_tokens_seen": 110381096, + "step": 163780 + }, + { + "epoch": 4.00129479881758, + "grad_norm": 0.0005102349678054452, + "learning_rate": 2.333869950650621e-07, + "loss": 0.0, + "num_input_tokens_seen": 110384296, + "step": 163785 + }, + { + "epoch": 4.001416949649427, + "grad_norm": 0.004541546106338501, + "learning_rate": 2.333322403031367e-07, + "loss": 0.0, + "num_input_tokens_seen": 110387496, + "step": 163790 + }, + { + "epoch": 4.001539100481274, + "grad_norm": 0.01784665323793888, + "learning_rate": 2.332774911166252e-07, + "loss": 0.0, + "num_input_tokens_seen": 110391208, + "step": 163795 + }, + { + "epoch": 4.001661251313122, + "grad_norm": 0.03898552432656288, + "learning_rate": 2.332227475059263e-07, + "loss": 0.0, + "num_input_tokens_seen": 110394536, + "step": 163800 + }, + { + "epoch": 4.0017834021449685, + "grad_norm": 0.003088279627263546, + "learning_rate": 2.3316800947143744e-07, + "loss": 0.0, + "num_input_tokens_seen": 110397672, + "step": 163805 + }, + { + "epoch": 4.001905552976816, + "grad_norm": 0.006609124131500721, + "learning_rate": 2.3311327701355743e-07, + "loss": 0.0, + "num_input_tokens_seen": 110401320, + "step": 163810 + }, + { + "epoch": 4.002027703808663, + "grad_norm": 0.008004154078662395, + "learning_rate": 2.3305855013268372e-07, + "loss": 0.0, + "num_input_tokens_seen": 110404776, + "step": 163815 + }, + { + "epoch": 4.0021498546405105, + "grad_norm": 0.007009640336036682, + "learning_rate": 2.3300382882921444e-07, + "loss": 0.0002, + "num_input_tokens_seen": 110408296, + "step": 163820 + }, + { + "epoch": 4.002272005472357, + "grad_norm": 0.004657072480767965, + "learning_rate": 2.329491131035478e-07, + "loss": 0.0, + "num_input_tokens_seen": 110411240, + "step": 163825 + }, + { + "epoch": 4.002394156304204, + "grad_norm": 0.00040120657649822533, + "learning_rate": 2.3289440295608142e-07, + "loss": 0.0, + "num_input_tokens_seen": 110414888, + "step": 163830 + }, + { + "epoch": 4.002516307136052, + "grad_norm": 0.003007831983268261, + "learning_rate": 2.328396983872134e-07, + "loss": 0.0, + "num_input_tokens_seen": 110418216, + "step": 163835 + }, + { + "epoch": 4.002638457967898, + "grad_norm": 0.018329862505197525, + "learning_rate": 2.327849993973413e-07, + "loss": 0.0, + "num_input_tokens_seen": 110421544, + "step": 163840 + }, + { + "epoch": 4.002760608799746, + "grad_norm": 0.007019806187599897, + "learning_rate": 2.3273030598686317e-07, + "loss": 0.0, + "num_input_tokens_seen": 110424744, + "step": 163845 + }, + { + "epoch": 4.002882759631593, + "grad_norm": 0.0743463933467865, + "learning_rate": 2.3267561815617641e-07, + "loss": 0.0, + "num_input_tokens_seen": 110428648, + "step": 163850 + }, + { + "epoch": 4.00300491046344, + "grad_norm": 0.0006112426053732634, + "learning_rate": 2.32620935905679e-07, + "loss": 0.0, + "num_input_tokens_seen": 110432424, + "step": 163855 + }, + { + "epoch": 4.003127061295287, + "grad_norm": 0.002231811173260212, + "learning_rate": 2.3256625923576877e-07, + "loss": 0.0, + "num_input_tokens_seen": 110435752, + "step": 163860 + }, + { + "epoch": 4.003249212127135, + "grad_norm": 0.00027201403281651437, + "learning_rate": 2.325115881468428e-07, + "loss": 0.0, + "num_input_tokens_seen": 110438952, + "step": 163865 + }, + { + "epoch": 4.0033713629589815, + "grad_norm": 0.006251008715480566, + "learning_rate": 2.324569226392994e-07, + "loss": 0.0, + "num_input_tokens_seen": 110442024, + "step": 163870 + }, + { + "epoch": 4.003493513790829, + "grad_norm": 0.0033184706699103117, + "learning_rate": 2.3240226271353525e-07, + "loss": 0.0, + "num_input_tokens_seen": 110445928, + "step": 163875 + }, + { + "epoch": 4.003615664622676, + "grad_norm": 0.0009782484266906977, + "learning_rate": 2.323476083699487e-07, + "loss": 0.0, + "num_input_tokens_seen": 110448936, + "step": 163880 + }, + { + "epoch": 4.003737815454524, + "grad_norm": 0.0054573486559093, + "learning_rate": 2.3229295960893647e-07, + "loss": 0.0, + "num_input_tokens_seen": 110452648, + "step": 163885 + }, + { + "epoch": 4.00385996628637, + "grad_norm": 0.00012896816770080477, + "learning_rate": 2.3223831643089664e-07, + "loss": 0.0, + "num_input_tokens_seen": 110456040, + "step": 163890 + }, + { + "epoch": 4.003982117118218, + "grad_norm": 0.002094441093504429, + "learning_rate": 2.3218367883622635e-07, + "loss": 0.0, + "num_input_tokens_seen": 110459176, + "step": 163895 + }, + { + "epoch": 4.004104267950065, + "grad_norm": 0.0022213133051991463, + "learning_rate": 2.3212904682532242e-07, + "loss": 0.0, + "num_input_tokens_seen": 110462760, + "step": 163900 + }, + { + "epoch": 4.004226418781912, + "grad_norm": 0.008752007968723774, + "learning_rate": 2.3207442039858306e-07, + "loss": 0.0001, + "num_input_tokens_seen": 110466152, + "step": 163905 + }, + { + "epoch": 4.004348569613759, + "grad_norm": 0.001377705717459321, + "learning_rate": 2.320197995564046e-07, + "loss": 0.0, + "num_input_tokens_seen": 110469544, + "step": 163910 + }, + { + "epoch": 4.004470720445606, + "grad_norm": 0.0014387565897777677, + "learning_rate": 2.3196518429918488e-07, + "loss": 0.0534, + "num_input_tokens_seen": 110473320, + "step": 163915 + }, + { + "epoch": 4.0045928712774534, + "grad_norm": 0.0043650115840137005, + "learning_rate": 2.319105746273211e-07, + "loss": 0.0, + "num_input_tokens_seen": 110476520, + "step": 163920 + }, + { + "epoch": 4.0047150221093, + "grad_norm": 0.0008779270574450493, + "learning_rate": 2.3185597054120999e-07, + "loss": 0.0, + "num_input_tokens_seen": 110479976, + "step": 163925 + }, + { + "epoch": 4.004837172941148, + "grad_norm": 0.02200518734753132, + "learning_rate": 2.3180137204124905e-07, + "loss": 0.0, + "num_input_tokens_seen": 110483496, + "step": 163930 + }, + { + "epoch": 4.004959323772995, + "grad_norm": 0.09678073972463608, + "learning_rate": 2.317467791278349e-07, + "loss": 0.0, + "num_input_tokens_seen": 110486632, + "step": 163935 + }, + { + "epoch": 4.005081474604842, + "grad_norm": 0.003165224101394415, + "learning_rate": 2.3169219180136513e-07, + "loss": 0.0, + "num_input_tokens_seen": 110490152, + "step": 163940 + }, + { + "epoch": 4.005203625436689, + "grad_norm": 0.0009228963754139841, + "learning_rate": 2.3163761006223616e-07, + "loss": 0.0, + "num_input_tokens_seen": 110494120, + "step": 163945 + }, + { + "epoch": 4.005325776268537, + "grad_norm": 0.0017742099007591605, + "learning_rate": 2.315830339108451e-07, + "loss": 0.0, + "num_input_tokens_seen": 110497896, + "step": 163950 + }, + { + "epoch": 4.005447927100383, + "grad_norm": 0.00030024818261153996, + "learning_rate": 2.3152846334758912e-07, + "loss": 0.0, + "num_input_tokens_seen": 110501032, + "step": 163955 + }, + { + "epoch": 4.005570077932231, + "grad_norm": 0.0026951045729219913, + "learning_rate": 2.314738983728647e-07, + "loss": 0.0, + "num_input_tokens_seen": 110504616, + "step": 163960 + }, + { + "epoch": 4.005692228764078, + "grad_norm": 0.0005750693962909281, + "learning_rate": 2.31419338987069e-07, + "loss": 0.0, + "num_input_tokens_seen": 110508200, + "step": 163965 + }, + { + "epoch": 4.005814379595925, + "grad_norm": 0.0014873980544507504, + "learning_rate": 2.3136478519059832e-07, + "loss": 0.0, + "num_input_tokens_seen": 110511720, + "step": 163970 + }, + { + "epoch": 4.005936530427772, + "grad_norm": 0.0025685285218060017, + "learning_rate": 2.3131023698384966e-07, + "loss": 0.0002, + "num_input_tokens_seen": 110516008, + "step": 163975 + }, + { + "epoch": 4.00605868125962, + "grad_norm": 0.002139053773134947, + "learning_rate": 2.3125569436721993e-07, + "loss": 0.0, + "num_input_tokens_seen": 110519016, + "step": 163980 + }, + { + "epoch": 4.0061808320914665, + "grad_norm": 0.004334287252277136, + "learning_rate": 2.3120115734110556e-07, + "loss": 0.0, + "num_input_tokens_seen": 110522408, + "step": 163985 + }, + { + "epoch": 4.006302982923314, + "grad_norm": 0.0014206593623384833, + "learning_rate": 2.3114662590590294e-07, + "loss": 0.0, + "num_input_tokens_seen": 110526056, + "step": 163990 + }, + { + "epoch": 4.006425133755161, + "grad_norm": 0.0003952729166485369, + "learning_rate": 2.310921000620092e-07, + "loss": 0.0202, + "num_input_tokens_seen": 110528936, + "step": 163995 + }, + { + "epoch": 4.006547284587008, + "grad_norm": 0.020650498569011688, + "learning_rate": 2.3103757980982042e-07, + "loss": 0.0, + "num_input_tokens_seen": 110532264, + "step": 164000 + }, + { + "epoch": 4.006669435418855, + "grad_norm": 0.0008711630944162607, + "learning_rate": 2.3098306514973287e-07, + "loss": 0.0, + "num_input_tokens_seen": 110535400, + "step": 164005 + }, + { + "epoch": 4.006791586250702, + "grad_norm": 0.0007402771152555943, + "learning_rate": 2.3092855608214345e-07, + "loss": 0.0, + "num_input_tokens_seen": 110538600, + "step": 164010 + }, + { + "epoch": 4.00691373708255, + "grad_norm": 0.00012223249359522015, + "learning_rate": 2.3087405260744852e-07, + "loss": 0.0467, + "num_input_tokens_seen": 110541928, + "step": 164015 + }, + { + "epoch": 4.007035887914396, + "grad_norm": 0.0020083903800696135, + "learning_rate": 2.3081955472604419e-07, + "loss": 0.0, + "num_input_tokens_seen": 110545192, + "step": 164020 + }, + { + "epoch": 4.007158038746244, + "grad_norm": 0.006110670510679483, + "learning_rate": 2.3076506243832727e-07, + "loss": 0.0, + "num_input_tokens_seen": 110548456, + "step": 164025 + }, + { + "epoch": 4.007280189578091, + "grad_norm": 0.00020064758427906781, + "learning_rate": 2.3071057574469332e-07, + "loss": 0.0, + "num_input_tokens_seen": 110551592, + "step": 164030 + }, + { + "epoch": 4.007402340409938, + "grad_norm": 0.00031926928204484284, + "learning_rate": 2.3065609464553937e-07, + "loss": 0.0, + "num_input_tokens_seen": 110554664, + "step": 164035 + }, + { + "epoch": 4.007524491241785, + "grad_norm": 0.0028003801126033068, + "learning_rate": 2.3060161914126086e-07, + "loss": 0.0, + "num_input_tokens_seen": 110558056, + "step": 164040 + }, + { + "epoch": 4.007646642073633, + "grad_norm": 0.009324025362730026, + "learning_rate": 2.305471492322544e-07, + "loss": 0.0, + "num_input_tokens_seen": 110561192, + "step": 164045 + }, + { + "epoch": 4.0077687929054795, + "grad_norm": 0.002270081313326955, + "learning_rate": 2.3049268491891615e-07, + "loss": 0.0, + "num_input_tokens_seen": 110565160, + "step": 164050 + }, + { + "epoch": 4.007890943737327, + "grad_norm": 0.00206292187795043, + "learning_rate": 2.3043822620164187e-07, + "loss": 0.0003, + "num_input_tokens_seen": 110568616, + "step": 164055 + }, + { + "epoch": 4.008013094569174, + "grad_norm": 0.00012831814819946885, + "learning_rate": 2.3038377308082812e-07, + "loss": 0.0, + "num_input_tokens_seen": 110572712, + "step": 164060 + }, + { + "epoch": 4.0081352454010215, + "grad_norm": 0.0010039202170446515, + "learning_rate": 2.3032932555687033e-07, + "loss": 0.0003, + "num_input_tokens_seen": 110576552, + "step": 164065 + }, + { + "epoch": 4.008257396232868, + "grad_norm": 0.0008860706002451479, + "learning_rate": 2.3027488363016458e-07, + "loss": 0.0, + "num_input_tokens_seen": 110579752, + "step": 164070 + }, + { + "epoch": 4.008379547064716, + "grad_norm": 0.0006712638423778117, + "learning_rate": 2.3022044730110723e-07, + "loss": 0.0, + "num_input_tokens_seen": 110583144, + "step": 164075 + }, + { + "epoch": 4.008501697896563, + "grad_norm": 0.0006703397375531495, + "learning_rate": 2.301660165700936e-07, + "loss": 0.0002, + "num_input_tokens_seen": 110586344, + "step": 164080 + }, + { + "epoch": 4.00862384872841, + "grad_norm": 0.0009707873105071485, + "learning_rate": 2.3011159143752e-07, + "loss": 0.0, + "num_input_tokens_seen": 110590120, + "step": 164085 + }, + { + "epoch": 4.008745999560257, + "grad_norm": 0.0013939151540398598, + "learning_rate": 2.300571719037817e-07, + "loss": 0.0, + "num_input_tokens_seen": 110593448, + "step": 164090 + }, + { + "epoch": 4.008868150392104, + "grad_norm": 0.0066462913528084755, + "learning_rate": 2.3000275796927504e-07, + "loss": 0.0, + "num_input_tokens_seen": 110597288, + "step": 164095 + }, + { + "epoch": 4.008990301223951, + "grad_norm": 0.0016602538526058197, + "learning_rate": 2.2994834963439547e-07, + "loss": 0.0, + "num_input_tokens_seen": 110600168, + "step": 164100 + }, + { + "epoch": 4.009112452055798, + "grad_norm": 0.00018916718545369804, + "learning_rate": 2.2989394689953824e-07, + "loss": 0.0, + "num_input_tokens_seen": 110603944, + "step": 164105 + }, + { + "epoch": 4.009234602887646, + "grad_norm": 0.0003112444537691772, + "learning_rate": 2.2983954976509967e-07, + "loss": 0.0, + "num_input_tokens_seen": 110606888, + "step": 164110 + }, + { + "epoch": 4.0093567537194925, + "grad_norm": 0.001570153865031898, + "learning_rate": 2.2978515823147481e-07, + "loss": 0.0, + "num_input_tokens_seen": 110610088, + "step": 164115 + }, + { + "epoch": 4.00947890455134, + "grad_norm": 0.0008236741996370256, + "learning_rate": 2.2973077229905967e-07, + "loss": 0.0, + "num_input_tokens_seen": 110613416, + "step": 164120 + }, + { + "epoch": 4.009601055383187, + "grad_norm": 0.0006083215121179819, + "learning_rate": 2.2967639196824928e-07, + "loss": 0.0002, + "num_input_tokens_seen": 110616488, + "step": 164125 + }, + { + "epoch": 4.0097232062150345, + "grad_norm": 0.011240467429161072, + "learning_rate": 2.296220172394394e-07, + "loss": 0.0, + "num_input_tokens_seen": 110619624, + "step": 164130 + }, + { + "epoch": 4.009845357046881, + "grad_norm": 0.001044436008669436, + "learning_rate": 2.2956764811302564e-07, + "loss": 0.0, + "num_input_tokens_seen": 110622952, + "step": 164135 + }, + { + "epoch": 4.009967507878729, + "grad_norm": 0.0005551927606575191, + "learning_rate": 2.295132845894029e-07, + "loss": 0.058, + "num_input_tokens_seen": 110626280, + "step": 164140 + }, + { + "epoch": 4.010089658710576, + "grad_norm": 0.00011305078805889934, + "learning_rate": 2.2945892666896705e-07, + "loss": 0.0, + "num_input_tokens_seen": 110630056, + "step": 164145 + }, + { + "epoch": 4.010211809542423, + "grad_norm": 0.0020253423135727644, + "learning_rate": 2.2940457435211292e-07, + "loss": 0.0001, + "num_input_tokens_seen": 110633320, + "step": 164150 + }, + { + "epoch": 4.01033396037427, + "grad_norm": 0.000794794992543757, + "learning_rate": 2.2935022763923618e-07, + "loss": 0.0, + "num_input_tokens_seen": 110636904, + "step": 164155 + }, + { + "epoch": 4.010456111206118, + "grad_norm": 0.003235079115256667, + "learning_rate": 2.2929588653073163e-07, + "loss": 0.0, + "num_input_tokens_seen": 110640104, + "step": 164160 + }, + { + "epoch": 4.010578262037964, + "grad_norm": 0.00310494820587337, + "learning_rate": 2.2924155102699472e-07, + "loss": 0.0, + "num_input_tokens_seen": 110643304, + "step": 164165 + }, + { + "epoch": 4.010700412869812, + "grad_norm": 0.01030244305729866, + "learning_rate": 2.2918722112842082e-07, + "loss": 0.0, + "num_input_tokens_seen": 110646568, + "step": 164170 + }, + { + "epoch": 4.010822563701659, + "grad_norm": 0.0007277664844878018, + "learning_rate": 2.291328968354045e-07, + "loss": 0.0001, + "num_input_tokens_seen": 110649896, + "step": 164175 + }, + { + "epoch": 4.0109447145335055, + "grad_norm": 0.03205833584070206, + "learning_rate": 2.2907857814834132e-07, + "loss": 0.0, + "num_input_tokens_seen": 110653160, + "step": 164180 + }, + { + "epoch": 4.011066865365353, + "grad_norm": 0.01856006681919098, + "learning_rate": 2.2902426506762574e-07, + "loss": 0.0, + "num_input_tokens_seen": 110656552, + "step": 164185 + }, + { + "epoch": 4.0111890161972, + "grad_norm": 0.0006459152791649103, + "learning_rate": 2.2896995759365344e-07, + "loss": 0.0, + "num_input_tokens_seen": 110659688, + "step": 164190 + }, + { + "epoch": 4.0113111670290476, + "grad_norm": 0.00521416962146759, + "learning_rate": 2.289156557268187e-07, + "loss": 0.0, + "num_input_tokens_seen": 110663144, + "step": 164195 + }, + { + "epoch": 4.011433317860894, + "grad_norm": 0.0003529897076077759, + "learning_rate": 2.2886135946751706e-07, + "loss": 0.0, + "num_input_tokens_seen": 110666152, + "step": 164200 + }, + { + "epoch": 4.011555468692742, + "grad_norm": 0.016934869810938835, + "learning_rate": 2.2880706881614298e-07, + "loss": 0.0, + "num_input_tokens_seen": 110669352, + "step": 164205 + }, + { + "epoch": 4.011677619524589, + "grad_norm": 0.001921301824040711, + "learning_rate": 2.2875278377309093e-07, + "loss": 0.0, + "num_input_tokens_seen": 110673064, + "step": 164210 + }, + { + "epoch": 4.011799770356436, + "grad_norm": 0.005222341511398554, + "learning_rate": 2.2869850433875648e-07, + "loss": 0.0, + "num_input_tokens_seen": 110676328, + "step": 164215 + }, + { + "epoch": 4.011921921188283, + "grad_norm": 0.000702301156707108, + "learning_rate": 2.2864423051353366e-07, + "loss": 0.0, + "num_input_tokens_seen": 110679784, + "step": 164220 + }, + { + "epoch": 4.012044072020131, + "grad_norm": 0.0010769871296361089, + "learning_rate": 2.2858996229781745e-07, + "loss": 0.0, + "num_input_tokens_seen": 110682856, + "step": 164225 + }, + { + "epoch": 4.012166222851977, + "grad_norm": 0.011183716356754303, + "learning_rate": 2.285356996920028e-07, + "loss": 0.0, + "num_input_tokens_seen": 110686248, + "step": 164230 + }, + { + "epoch": 4.012288373683825, + "grad_norm": 0.0006547856028191745, + "learning_rate": 2.2848144269648373e-07, + "loss": 0.0, + "num_input_tokens_seen": 110689960, + "step": 164235 + }, + { + "epoch": 4.012410524515672, + "grad_norm": 0.03447788953781128, + "learning_rate": 2.2842719131165544e-07, + "loss": 0.0235, + "num_input_tokens_seen": 110693672, + "step": 164240 + }, + { + "epoch": 4.0125326753475195, + "grad_norm": 0.0019113948801532388, + "learning_rate": 2.2837294553791186e-07, + "loss": 0.0, + "num_input_tokens_seen": 110696936, + "step": 164245 + }, + { + "epoch": 4.012654826179366, + "grad_norm": 0.006821444723755121, + "learning_rate": 2.2831870537564814e-07, + "loss": 0.0, + "num_input_tokens_seen": 110700328, + "step": 164250 + }, + { + "epoch": 4.012776977011214, + "grad_norm": 0.00023176280956249684, + "learning_rate": 2.282644708252579e-07, + "loss": 0.0, + "num_input_tokens_seen": 110703720, + "step": 164255 + }, + { + "epoch": 4.012899127843061, + "grad_norm": 0.007748324424028397, + "learning_rate": 2.2821024188713612e-07, + "loss": 0.0, + "num_input_tokens_seen": 110707240, + "step": 164260 + }, + { + "epoch": 4.013021278674908, + "grad_norm": 0.00011586442997213453, + "learning_rate": 2.2815601856167722e-07, + "loss": 0.0, + "num_input_tokens_seen": 110710568, + "step": 164265 + }, + { + "epoch": 4.013143429506755, + "grad_norm": 0.0004410957044456154, + "learning_rate": 2.281018008492751e-07, + "loss": 0.0, + "num_input_tokens_seen": 110714280, + "step": 164270 + }, + { + "epoch": 4.013265580338602, + "grad_norm": 0.0011323723010718822, + "learning_rate": 2.280475887503247e-07, + "loss": 0.0, + "num_input_tokens_seen": 110717736, + "step": 164275 + }, + { + "epoch": 4.013387731170449, + "grad_norm": 0.018920201808214188, + "learning_rate": 2.2799338226521947e-07, + "loss": 0.0, + "num_input_tokens_seen": 110720872, + "step": 164280 + }, + { + "epoch": 4.013509882002296, + "grad_norm": 0.002477227710187435, + "learning_rate": 2.279391813943541e-07, + "loss": 0.0, + "num_input_tokens_seen": 110724264, + "step": 164285 + }, + { + "epoch": 4.013632032834144, + "grad_norm": 0.010635924525558949, + "learning_rate": 2.2788498613812279e-07, + "loss": 0.0, + "num_input_tokens_seen": 110728552, + "step": 164290 + }, + { + "epoch": 4.0137541836659905, + "grad_norm": 0.0006659305654466152, + "learning_rate": 2.278307964969196e-07, + "loss": 0.0, + "num_input_tokens_seen": 110732136, + "step": 164295 + }, + { + "epoch": 4.013876334497838, + "grad_norm": 8.711554983165115e-05, + "learning_rate": 2.2777661247113832e-07, + "loss": 0.0, + "num_input_tokens_seen": 110735400, + "step": 164300 + }, + { + "epoch": 4.013998485329685, + "grad_norm": 0.0004100829246453941, + "learning_rate": 2.2772243406117353e-07, + "loss": 0.0, + "num_input_tokens_seen": 110738920, + "step": 164305 + }, + { + "epoch": 4.0141206361615325, + "grad_norm": 0.001897158334031701, + "learning_rate": 2.2766826126741877e-07, + "loss": 0.0, + "num_input_tokens_seen": 110742376, + "step": 164310 + }, + { + "epoch": 4.014242786993379, + "grad_norm": 0.0074745300225913525, + "learning_rate": 2.27614094090268e-07, + "loss": 0.0, + "num_input_tokens_seen": 110745576, + "step": 164315 + }, + { + "epoch": 4.014364937825227, + "grad_norm": 0.0002957666292786598, + "learning_rate": 2.275599325301153e-07, + "loss": 0.0, + "num_input_tokens_seen": 110748968, + "step": 164320 + }, + { + "epoch": 4.014487088657074, + "grad_norm": 0.008016318082809448, + "learning_rate": 2.275057765873547e-07, + "loss": 0.0, + "num_input_tokens_seen": 110752232, + "step": 164325 + }, + { + "epoch": 4.014609239488921, + "grad_norm": 0.001003736280836165, + "learning_rate": 2.274516262623797e-07, + "loss": 0.0004, + "num_input_tokens_seen": 110755624, + "step": 164330 + }, + { + "epoch": 4.014731390320768, + "grad_norm": 0.00033409715979360044, + "learning_rate": 2.2739748155558448e-07, + "loss": 0.0, + "num_input_tokens_seen": 110758760, + "step": 164335 + }, + { + "epoch": 4.014853541152616, + "grad_norm": 0.0016860624309629202, + "learning_rate": 2.273433424673622e-07, + "loss": 0.0, + "num_input_tokens_seen": 110761960, + "step": 164340 + }, + { + "epoch": 4.014975691984462, + "grad_norm": 0.0037533806171268225, + "learning_rate": 2.2728920899810734e-07, + "loss": 0.0, + "num_input_tokens_seen": 110765160, + "step": 164345 + }, + { + "epoch": 4.01509784281631, + "grad_norm": 0.12728959321975708, + "learning_rate": 2.27235081148213e-07, + "loss": 0.0001, + "num_input_tokens_seen": 110768616, + "step": 164350 + }, + { + "epoch": 4.015219993648157, + "grad_norm": 0.0001371447870042175, + "learning_rate": 2.2718095891807287e-07, + "loss": 0.0, + "num_input_tokens_seen": 110771880, + "step": 164355 + }, + { + "epoch": 4.0153421444800035, + "grad_norm": 0.00028730809572152793, + "learning_rate": 2.27126842308081e-07, + "loss": 0.0, + "num_input_tokens_seen": 110775272, + "step": 164360 + }, + { + "epoch": 4.015464295311851, + "grad_norm": 0.0014190376969054341, + "learning_rate": 2.2707273131863025e-07, + "loss": 0.0001, + "num_input_tokens_seen": 110778216, + "step": 164365 + }, + { + "epoch": 4.015586446143698, + "grad_norm": 0.008561410009860992, + "learning_rate": 2.270186259501149e-07, + "loss": 0.0, + "num_input_tokens_seen": 110781608, + "step": 164370 + }, + { + "epoch": 4.0157085969755455, + "grad_norm": 0.0006715949275530875, + "learning_rate": 2.269645262029276e-07, + "loss": 0.0, + "num_input_tokens_seen": 110784872, + "step": 164375 + }, + { + "epoch": 4.015830747807392, + "grad_norm": 0.0041750771924853325, + "learning_rate": 2.269104320774623e-07, + "loss": 0.0, + "num_input_tokens_seen": 110788392, + "step": 164380 + }, + { + "epoch": 4.01595289863924, + "grad_norm": 0.007014765869826078, + "learning_rate": 2.2685634357411242e-07, + "loss": 0.0, + "num_input_tokens_seen": 110791720, + "step": 164385 + }, + { + "epoch": 4.016075049471087, + "grad_norm": 0.0028299784753471613, + "learning_rate": 2.2680226069327102e-07, + "loss": 0.0, + "num_input_tokens_seen": 110794792, + "step": 164390 + }, + { + "epoch": 4.016197200302934, + "grad_norm": 0.0004960571532137692, + "learning_rate": 2.2674818343533175e-07, + "loss": 0.0, + "num_input_tokens_seen": 110798184, + "step": 164395 + }, + { + "epoch": 4.016319351134781, + "grad_norm": 0.008951370604336262, + "learning_rate": 2.2669411180068742e-07, + "loss": 0.0, + "num_input_tokens_seen": 110801256, + "step": 164400 + }, + { + "epoch": 4.016441501966629, + "grad_norm": 0.004213410429656506, + "learning_rate": 2.2664004578973173e-07, + "loss": 0.0, + "num_input_tokens_seen": 110804712, + "step": 164405 + }, + { + "epoch": 4.016563652798475, + "grad_norm": 0.00019606153364293277, + "learning_rate": 2.2658598540285767e-07, + "loss": 0.0, + "num_input_tokens_seen": 110807720, + "step": 164410 + }, + { + "epoch": 4.016685803630323, + "grad_norm": 0.0010281888535246253, + "learning_rate": 2.2653193064045807e-07, + "loss": 0.0, + "num_input_tokens_seen": 110811048, + "step": 164415 + }, + { + "epoch": 4.01680795446217, + "grad_norm": 0.0025090575218200684, + "learning_rate": 2.2647788150292657e-07, + "loss": 0.0, + "num_input_tokens_seen": 110814440, + "step": 164420 + }, + { + "epoch": 4.016930105294017, + "grad_norm": 0.0008665485656820238, + "learning_rate": 2.2642383799065578e-07, + "loss": 0.0, + "num_input_tokens_seen": 110817640, + "step": 164425 + }, + { + "epoch": 4.017052256125864, + "grad_norm": 0.009751011617481709, + "learning_rate": 2.2636980010403904e-07, + "loss": 0.0, + "num_input_tokens_seen": 110821096, + "step": 164430 + }, + { + "epoch": 4.017174406957712, + "grad_norm": 0.005670327227562666, + "learning_rate": 2.2631576784346906e-07, + "loss": 0.0, + "num_input_tokens_seen": 110824616, + "step": 164435 + }, + { + "epoch": 4.0172965577895585, + "grad_norm": 0.0025142088998109102, + "learning_rate": 2.2626174120933882e-07, + "loss": 0.0, + "num_input_tokens_seen": 110828008, + "step": 164440 + }, + { + "epoch": 4.017418708621406, + "grad_norm": 0.0021428910549730062, + "learning_rate": 2.262077202020416e-07, + "loss": 0.0001, + "num_input_tokens_seen": 110831144, + "step": 164445 + }, + { + "epoch": 4.017540859453253, + "grad_norm": 0.00027007071184925735, + "learning_rate": 2.261537048219697e-07, + "loss": 0.0, + "num_input_tokens_seen": 110834216, + "step": 164450 + }, + { + "epoch": 4.0176630102851, + "grad_norm": 0.011947103776037693, + "learning_rate": 2.2609969506951655e-07, + "loss": 0.0, + "num_input_tokens_seen": 110837352, + "step": 164455 + }, + { + "epoch": 4.017785161116947, + "grad_norm": 8.05276504252106e-05, + "learning_rate": 2.260456909450742e-07, + "loss": 0.0, + "num_input_tokens_seen": 110840808, + "step": 164460 + }, + { + "epoch": 4.017907311948794, + "grad_norm": 0.00022645792341791093, + "learning_rate": 2.2599169244903604e-07, + "loss": 0.0, + "num_input_tokens_seen": 110843752, + "step": 164465 + }, + { + "epoch": 4.018029462780642, + "grad_norm": 0.0010104464599862695, + "learning_rate": 2.259376995817942e-07, + "loss": 0.0, + "num_input_tokens_seen": 110847016, + "step": 164470 + }, + { + "epoch": 4.018151613612488, + "grad_norm": 0.0003966574731748551, + "learning_rate": 2.258837123437416e-07, + "loss": 0.0, + "num_input_tokens_seen": 110850280, + "step": 164475 + }, + { + "epoch": 4.018273764444336, + "grad_norm": 0.00997732114046812, + "learning_rate": 2.258297307352711e-07, + "loss": 0.0, + "num_input_tokens_seen": 110853224, + "step": 164480 + }, + { + "epoch": 4.018395915276183, + "grad_norm": 0.0007848578388802707, + "learning_rate": 2.257757547567748e-07, + "loss": 0.0, + "num_input_tokens_seen": 110857000, + "step": 164485 + }, + { + "epoch": 4.01851806610803, + "grad_norm": 0.000499740825034678, + "learning_rate": 2.2572178440864575e-07, + "loss": 0.0, + "num_input_tokens_seen": 110860136, + "step": 164490 + }, + { + "epoch": 4.018640216939877, + "grad_norm": 0.0006135724834166467, + "learning_rate": 2.256678196912758e-07, + "loss": 0.0002, + "num_input_tokens_seen": 110863080, + "step": 164495 + }, + { + "epoch": 4.018762367771725, + "grad_norm": 0.00032030860893428326, + "learning_rate": 2.2561386060505805e-07, + "loss": 0.0, + "num_input_tokens_seen": 110866664, + "step": 164500 + }, + { + "epoch": 4.0188845186035715, + "grad_norm": 0.001445369329303503, + "learning_rate": 2.2555990715038432e-07, + "loss": 0.0, + "num_input_tokens_seen": 110869480, + "step": 164505 + }, + { + "epoch": 4.019006669435419, + "grad_norm": 0.004807790741324425, + "learning_rate": 2.255059593276476e-07, + "loss": 0.0, + "num_input_tokens_seen": 110872296, + "step": 164510 + }, + { + "epoch": 4.019128820267266, + "grad_norm": 0.0006840810528956354, + "learning_rate": 2.254520171372397e-07, + "loss": 0.0, + "num_input_tokens_seen": 110876072, + "step": 164515 + }, + { + "epoch": 4.019250971099114, + "grad_norm": 0.00278223748318851, + "learning_rate": 2.253980805795529e-07, + "loss": 0.0, + "num_input_tokens_seen": 110879720, + "step": 164520 + }, + { + "epoch": 4.01937312193096, + "grad_norm": 0.00030470662750303745, + "learning_rate": 2.2534414965497984e-07, + "loss": 0.0, + "num_input_tokens_seen": 110882920, + "step": 164525 + }, + { + "epoch": 4.019495272762808, + "grad_norm": 0.0013142710085958242, + "learning_rate": 2.2529022436391221e-07, + "loss": 0.0, + "num_input_tokens_seen": 110886568, + "step": 164530 + }, + { + "epoch": 4.019617423594655, + "grad_norm": 0.0029207654297351837, + "learning_rate": 2.2523630470674238e-07, + "loss": 0.0, + "num_input_tokens_seen": 110889832, + "step": 164535 + }, + { + "epoch": 4.019739574426501, + "grad_norm": 0.07291989028453827, + "learning_rate": 2.251823906838629e-07, + "loss": 0.0, + "num_input_tokens_seen": 110893672, + "step": 164540 + }, + { + "epoch": 4.019861725258349, + "grad_norm": 0.005791286937892437, + "learning_rate": 2.2512848229566517e-07, + "loss": 0.0, + "num_input_tokens_seen": 110897384, + "step": 164545 + }, + { + "epoch": 4.019983876090196, + "grad_norm": 0.005201366264373064, + "learning_rate": 2.2507457954254173e-07, + "loss": 0.0, + "num_input_tokens_seen": 110900584, + "step": 164550 + }, + { + "epoch": 4.020106026922043, + "grad_norm": 0.0014897704822942615, + "learning_rate": 2.2502068242488414e-07, + "loss": 0.0, + "num_input_tokens_seen": 110903656, + "step": 164555 + }, + { + "epoch": 4.02022817775389, + "grad_norm": 0.0009115237044170499, + "learning_rate": 2.249667909430849e-07, + "loss": 0.0, + "num_input_tokens_seen": 110906984, + "step": 164560 + }, + { + "epoch": 4.020350328585738, + "grad_norm": 0.0010413050185889006, + "learning_rate": 2.2491290509753536e-07, + "loss": 0.0, + "num_input_tokens_seen": 110910376, + "step": 164565 + }, + { + "epoch": 4.020472479417585, + "grad_norm": 0.00037109790719114244, + "learning_rate": 2.2485902488862763e-07, + "loss": 0.0, + "num_input_tokens_seen": 110913384, + "step": 164570 + }, + { + "epoch": 4.020594630249432, + "grad_norm": 0.0031259816605597734, + "learning_rate": 2.2480515031675384e-07, + "loss": 0.0, + "num_input_tokens_seen": 110917224, + "step": 164575 + }, + { + "epoch": 4.020716781081279, + "grad_norm": 0.00027198364841751754, + "learning_rate": 2.2475128138230516e-07, + "loss": 0.0, + "num_input_tokens_seen": 110920360, + "step": 164580 + }, + { + "epoch": 4.020838931913127, + "grad_norm": 0.25161880254745483, + "learning_rate": 2.24697418085674e-07, + "loss": 0.0, + "num_input_tokens_seen": 110923496, + "step": 164585 + }, + { + "epoch": 4.020961082744973, + "grad_norm": 0.015751512721180916, + "learning_rate": 2.2464356042725152e-07, + "loss": 0.0, + "num_input_tokens_seen": 110926888, + "step": 164590 + }, + { + "epoch": 4.021083233576821, + "grad_norm": 0.012762527912855148, + "learning_rate": 2.2458970840742976e-07, + "loss": 0.0, + "num_input_tokens_seen": 110930152, + "step": 164595 + }, + { + "epoch": 4.021205384408668, + "grad_norm": 0.020243745297193527, + "learning_rate": 2.2453586202660003e-07, + "loss": 0.0001, + "num_input_tokens_seen": 110933672, + "step": 164600 + }, + { + "epoch": 4.021327535240515, + "grad_norm": 0.0001934144675033167, + "learning_rate": 2.2448202128515436e-07, + "loss": 0.0, + "num_input_tokens_seen": 110936936, + "step": 164605 + }, + { + "epoch": 4.021449686072362, + "grad_norm": 0.0003170445270370692, + "learning_rate": 2.2442818618348368e-07, + "loss": 0.0, + "num_input_tokens_seen": 110939816, + "step": 164610 + }, + { + "epoch": 4.02157183690421, + "grad_norm": 0.00047657734830863774, + "learning_rate": 2.2437435672198014e-07, + "loss": 0.0, + "num_input_tokens_seen": 110943272, + "step": 164615 + }, + { + "epoch": 4.0216939877360565, + "grad_norm": 0.005242410581558943, + "learning_rate": 2.243205329010349e-07, + "loss": 0.0, + "num_input_tokens_seen": 110946408, + "step": 164620 + }, + { + "epoch": 4.021816138567903, + "grad_norm": 0.0006164037040434778, + "learning_rate": 2.242667147210392e-07, + "loss": 0.0002, + "num_input_tokens_seen": 110949864, + "step": 164625 + }, + { + "epoch": 4.021938289399751, + "grad_norm": 0.00393176032230258, + "learning_rate": 2.2421290218238442e-07, + "loss": 0.0, + "num_input_tokens_seen": 110953832, + "step": 164630 + }, + { + "epoch": 4.022060440231598, + "grad_norm": 0.00013939302880316973, + "learning_rate": 2.241590952854625e-07, + "loss": 0.0, + "num_input_tokens_seen": 110956968, + "step": 164635 + }, + { + "epoch": 4.022182591063445, + "grad_norm": 0.0005854598130099475, + "learning_rate": 2.241052940306639e-07, + "loss": 0.0, + "num_input_tokens_seen": 110960168, + "step": 164640 + }, + { + "epoch": 4.022304741895292, + "grad_norm": 0.00018355067004449666, + "learning_rate": 2.2405149841838068e-07, + "loss": 0.0, + "num_input_tokens_seen": 110963496, + "step": 164645 + }, + { + "epoch": 4.02242689272714, + "grad_norm": 0.0023532859049737453, + "learning_rate": 2.2399770844900334e-07, + "loss": 0.0, + "num_input_tokens_seen": 110966632, + "step": 164650 + }, + { + "epoch": 4.022549043558986, + "grad_norm": 0.0013373279944062233, + "learning_rate": 2.2394392412292372e-07, + "loss": 0.0, + "num_input_tokens_seen": 110969896, + "step": 164655 + }, + { + "epoch": 4.022671194390834, + "grad_norm": 0.8230629563331604, + "learning_rate": 2.238901454405323e-07, + "loss": 0.0005, + "num_input_tokens_seen": 110973352, + "step": 164660 + }, + { + "epoch": 4.022793345222681, + "grad_norm": 0.0009062674944289029, + "learning_rate": 2.2383637240222052e-07, + "loss": 0.0, + "num_input_tokens_seen": 110976680, + "step": 164665 + }, + { + "epoch": 4.022915496054528, + "grad_norm": 0.0002744555240496993, + "learning_rate": 2.2378260500837965e-07, + "loss": 0.0, + "num_input_tokens_seen": 110980136, + "step": 164670 + }, + { + "epoch": 4.023037646886375, + "grad_norm": 0.00032696948619559407, + "learning_rate": 2.2372884325940013e-07, + "loss": 0.0, + "num_input_tokens_seen": 110983400, + "step": 164675 + }, + { + "epoch": 4.023159797718223, + "grad_norm": 0.0033456883393228054, + "learning_rate": 2.2367508715567364e-07, + "loss": 0.0, + "num_input_tokens_seen": 110987240, + "step": 164680 + }, + { + "epoch": 4.0232819485500695, + "grad_norm": 0.002025763038545847, + "learning_rate": 2.236213366975903e-07, + "loss": 0.0, + "num_input_tokens_seen": 110990568, + "step": 164685 + }, + { + "epoch": 4.023404099381917, + "grad_norm": 0.029058869928121567, + "learning_rate": 2.2356759188554153e-07, + "loss": 0.0, + "num_input_tokens_seen": 110994024, + "step": 164690 + }, + { + "epoch": 4.023526250213764, + "grad_norm": 0.01186623889952898, + "learning_rate": 2.235138527199184e-07, + "loss": 0.0, + "num_input_tokens_seen": 110997096, + "step": 164695 + }, + { + "epoch": 4.0236484010456115, + "grad_norm": 0.007000217214226723, + "learning_rate": 2.2346011920111095e-07, + "loss": 0.0, + "num_input_tokens_seen": 111000744, + "step": 164700 + }, + { + "epoch": 4.023770551877458, + "grad_norm": 0.0017543296562507749, + "learning_rate": 2.2340639132951077e-07, + "loss": 0.0, + "num_input_tokens_seen": 111004008, + "step": 164705 + }, + { + "epoch": 4.023892702709306, + "grad_norm": 0.0025902055203914642, + "learning_rate": 2.2335266910550787e-07, + "loss": 0.0, + "num_input_tokens_seen": 111007528, + "step": 164710 + }, + { + "epoch": 4.024014853541153, + "grad_norm": 0.0019935499876737595, + "learning_rate": 2.2329895252949348e-07, + "loss": 0.0, + "num_input_tokens_seen": 111010600, + "step": 164715 + }, + { + "epoch": 4.024137004372999, + "grad_norm": 0.0051970952190458775, + "learning_rate": 2.2324524160185808e-07, + "loss": 0.0, + "num_input_tokens_seen": 111014696, + "step": 164720 + }, + { + "epoch": 4.024259155204847, + "grad_norm": 0.0010366010246798396, + "learning_rate": 2.2319153632299192e-07, + "loss": 0.0698, + "num_input_tokens_seen": 111017832, + "step": 164725 + }, + { + "epoch": 4.024381306036694, + "grad_norm": 0.0009999765316024423, + "learning_rate": 2.2313783669328613e-07, + "loss": 0.0, + "num_input_tokens_seen": 111020968, + "step": 164730 + }, + { + "epoch": 4.024503456868541, + "grad_norm": 0.00013882630446460098, + "learning_rate": 2.230841427131307e-07, + "loss": 0.0, + "num_input_tokens_seen": 111024232, + "step": 164735 + }, + { + "epoch": 4.024625607700388, + "grad_norm": 0.0006216170149855316, + "learning_rate": 2.2303045438291656e-07, + "loss": 0.0, + "num_input_tokens_seen": 111027816, + "step": 164740 + }, + { + "epoch": 4.024747758532236, + "grad_norm": 0.0018986144568771124, + "learning_rate": 2.2297677170303363e-07, + "loss": 0.0, + "num_input_tokens_seen": 111030568, + "step": 164745 + }, + { + "epoch": 4.0248699093640825, + "grad_norm": 0.0037460937164723873, + "learning_rate": 2.2292309467387293e-07, + "loss": 0.0, + "num_input_tokens_seen": 111033576, + "step": 164750 + }, + { + "epoch": 4.02499206019593, + "grad_norm": 0.002591182477772236, + "learning_rate": 2.2286942329582425e-07, + "loss": 0.0, + "num_input_tokens_seen": 111037352, + "step": 164755 + }, + { + "epoch": 4.025114211027777, + "grad_norm": 0.002688974840566516, + "learning_rate": 2.2281575756927818e-07, + "loss": 0.0, + "num_input_tokens_seen": 111040488, + "step": 164760 + }, + { + "epoch": 4.0252363618596245, + "grad_norm": 0.0012876774417236447, + "learning_rate": 2.2276209749462516e-07, + "loss": 0.0, + "num_input_tokens_seen": 111043752, + "step": 164765 + }, + { + "epoch": 4.025358512691471, + "grad_norm": 0.00019919118494726717, + "learning_rate": 2.227084430722549e-07, + "loss": 0.0, + "num_input_tokens_seen": 111047080, + "step": 164770 + }, + { + "epoch": 4.025480663523319, + "grad_norm": 0.00011809620627900586, + "learning_rate": 2.226547943025583e-07, + "loss": 0.0, + "num_input_tokens_seen": 111050216, + "step": 164775 + }, + { + "epoch": 4.025602814355166, + "grad_norm": 0.00037095736479386687, + "learning_rate": 2.226011511859247e-07, + "loss": 0.0, + "num_input_tokens_seen": 111053416, + "step": 164780 + }, + { + "epoch": 4.025724965187013, + "grad_norm": 0.0015142523916438222, + "learning_rate": 2.2254751372274473e-07, + "loss": 0.0, + "num_input_tokens_seen": 111057000, + "step": 164785 + }, + { + "epoch": 4.02584711601886, + "grad_norm": 0.005019500385969877, + "learning_rate": 2.2249388191340857e-07, + "loss": 0.0, + "num_input_tokens_seen": 111060264, + "step": 164790 + }, + { + "epoch": 4.025969266850708, + "grad_norm": 0.0001835352013586089, + "learning_rate": 2.2244025575830582e-07, + "loss": 0.0, + "num_input_tokens_seen": 111063336, + "step": 164795 + }, + { + "epoch": 4.026091417682554, + "grad_norm": 0.0005110033671371639, + "learning_rate": 2.2238663525782687e-07, + "loss": 0.0, + "num_input_tokens_seen": 111066728, + "step": 164800 + }, + { + "epoch": 4.026213568514401, + "grad_norm": 0.0037860642187297344, + "learning_rate": 2.2233302041236124e-07, + "loss": 0.0, + "num_input_tokens_seen": 111070248, + "step": 164805 + }, + { + "epoch": 4.026335719346249, + "grad_norm": 0.000749050872400403, + "learning_rate": 2.222794112222993e-07, + "loss": 0.0, + "num_input_tokens_seen": 111073256, + "step": 164810 + }, + { + "epoch": 4.0264578701780955, + "grad_norm": 0.008735577575862408, + "learning_rate": 2.2222580768803045e-07, + "loss": 0.0, + "num_input_tokens_seen": 111076264, + "step": 164815 + }, + { + "epoch": 4.026580021009943, + "grad_norm": 6.568283879460068e-06, + "learning_rate": 2.221722098099449e-07, + "loss": 0.0, + "num_input_tokens_seen": 111080040, + "step": 164820 + }, + { + "epoch": 4.02670217184179, + "grad_norm": 0.0029867016710340977, + "learning_rate": 2.221186175884323e-07, + "loss": 0.0, + "num_input_tokens_seen": 111083240, + "step": 164825 + }, + { + "epoch": 4.0268243226736375, + "grad_norm": 0.0010198794770985842, + "learning_rate": 2.2206503102388207e-07, + "loss": 0.0, + "num_input_tokens_seen": 111086184, + "step": 164830 + }, + { + "epoch": 4.026946473505484, + "grad_norm": 0.0011232220567762852, + "learning_rate": 2.2201145011668443e-07, + "loss": 0.0, + "num_input_tokens_seen": 111089320, + "step": 164835 + }, + { + "epoch": 4.027068624337332, + "grad_norm": 0.000603526714257896, + "learning_rate": 2.219578748672285e-07, + "loss": 0.0, + "num_input_tokens_seen": 111092648, + "step": 164840 + }, + { + "epoch": 4.027190775169179, + "grad_norm": 0.0009548215894028544, + "learning_rate": 2.219043052759042e-07, + "loss": 0.0, + "num_input_tokens_seen": 111096104, + "step": 164845 + }, + { + "epoch": 4.027312926001026, + "grad_norm": 0.049355845898389816, + "learning_rate": 2.2185074134310134e-07, + "loss": 0.0, + "num_input_tokens_seen": 111099304, + "step": 164850 + }, + { + "epoch": 4.027435076832873, + "grad_norm": 0.013851494528353214, + "learning_rate": 2.217971830692089e-07, + "loss": 0.0, + "num_input_tokens_seen": 111102824, + "step": 164855 + }, + { + "epoch": 4.027557227664721, + "grad_norm": 0.0009064696496352553, + "learning_rate": 2.2174363045461697e-07, + "loss": 0.0, + "num_input_tokens_seen": 111106280, + "step": 164860 + }, + { + "epoch": 4.027679378496567, + "grad_norm": 0.0007289585773833096, + "learning_rate": 2.216900834997143e-07, + "loss": 0.0, + "num_input_tokens_seen": 111109416, + "step": 164865 + }, + { + "epoch": 4.027801529328415, + "grad_norm": 0.001933575258590281, + "learning_rate": 2.2163654220489102e-07, + "loss": 0.0, + "num_input_tokens_seen": 111112744, + "step": 164870 + }, + { + "epoch": 4.027923680160262, + "grad_norm": 0.0033129544463008642, + "learning_rate": 2.2158300657053596e-07, + "loss": 0.0, + "num_input_tokens_seen": 111116328, + "step": 164875 + }, + { + "epoch": 4.028045830992109, + "grad_norm": 0.0009052801760844886, + "learning_rate": 2.215294765970386e-07, + "loss": 0.0, + "num_input_tokens_seen": 111119272, + "step": 164880 + }, + { + "epoch": 4.028167981823956, + "grad_norm": 5.468447125167586e-05, + "learning_rate": 2.2147595228478844e-07, + "loss": 0.0, + "num_input_tokens_seen": 111122600, + "step": 164885 + }, + { + "epoch": 4.028290132655804, + "grad_norm": 0.0022210185416042805, + "learning_rate": 2.2142243363417446e-07, + "loss": 0.0002, + "num_input_tokens_seen": 111126184, + "step": 164890 + }, + { + "epoch": 4.028412283487651, + "grad_norm": 0.0010133266914635897, + "learning_rate": 2.213689206455861e-07, + "loss": 0.0, + "num_input_tokens_seen": 111129256, + "step": 164895 + }, + { + "epoch": 4.028534434319497, + "grad_norm": 0.00019960623467341065, + "learning_rate": 2.2131541331941216e-07, + "loss": 0.0, + "num_input_tokens_seen": 111132136, + "step": 164900 + }, + { + "epoch": 4.028656585151345, + "grad_norm": 195.2945098876953, + "learning_rate": 2.2126191165604214e-07, + "loss": 0.0464, + "num_input_tokens_seen": 111135528, + "step": 164905 + }, + { + "epoch": 4.028778735983192, + "grad_norm": 4.072034789714962e-05, + "learning_rate": 2.2120841565586479e-07, + "loss": 0.0, + "num_input_tokens_seen": 111138792, + "step": 164910 + }, + { + "epoch": 4.028900886815039, + "grad_norm": 0.021570960059762, + "learning_rate": 2.211549253192696e-07, + "loss": 0.0, + "num_input_tokens_seen": 111141864, + "step": 164915 + }, + { + "epoch": 4.029023037646886, + "grad_norm": 0.002378236735239625, + "learning_rate": 2.2110144064664493e-07, + "loss": 0.0, + "num_input_tokens_seen": 111145320, + "step": 164920 + }, + { + "epoch": 4.029145188478734, + "grad_norm": 0.0004563156981021166, + "learning_rate": 2.2104796163838036e-07, + "loss": 0.0, + "num_input_tokens_seen": 111148264, + "step": 164925 + }, + { + "epoch": 4.0292673393105805, + "grad_norm": 0.008762065321207047, + "learning_rate": 2.2099448829486455e-07, + "loss": 0.0, + "num_input_tokens_seen": 111152168, + "step": 164930 + }, + { + "epoch": 4.029389490142428, + "grad_norm": 0.003576488932594657, + "learning_rate": 2.2094102061648613e-07, + "loss": 0.0, + "num_input_tokens_seen": 111155432, + "step": 164935 + }, + { + "epoch": 4.029511640974275, + "grad_norm": 0.003416407387703657, + "learning_rate": 2.2088755860363406e-07, + "loss": 0.0, + "num_input_tokens_seen": 111158952, + "step": 164940 + }, + { + "epoch": 4.0296337918061225, + "grad_norm": 0.01084967702627182, + "learning_rate": 2.2083410225669752e-07, + "loss": 0.0, + "num_input_tokens_seen": 111162472, + "step": 164945 + }, + { + "epoch": 4.029755942637969, + "grad_norm": 0.0006958711892366409, + "learning_rate": 2.2078065157606473e-07, + "loss": 0.0, + "num_input_tokens_seen": 111165928, + "step": 164950 + }, + { + "epoch": 4.029878093469817, + "grad_norm": 0.003058177651837468, + "learning_rate": 2.2072720656212483e-07, + "loss": 0.0, + "num_input_tokens_seen": 111168936, + "step": 164955 + }, + { + "epoch": 4.030000244301664, + "grad_norm": 0.0013698239345103502, + "learning_rate": 2.206737672152661e-07, + "loss": 0.0, + "num_input_tokens_seen": 111172456, + "step": 164960 + }, + { + "epoch": 4.030122395133511, + "grad_norm": 0.00022180116502568126, + "learning_rate": 2.206203335358776e-07, + "loss": 0.0, + "num_input_tokens_seen": 111176040, + "step": 164965 + }, + { + "epoch": 4.030244545965358, + "grad_norm": 0.0037178494967520237, + "learning_rate": 2.2056690552434732e-07, + "loss": 0.0, + "num_input_tokens_seen": 111179176, + "step": 164970 + }, + { + "epoch": 4.030366696797206, + "grad_norm": 0.0011469481978565454, + "learning_rate": 2.2051348318106421e-07, + "loss": 0.0, + "num_input_tokens_seen": 111182376, + "step": 164975 + }, + { + "epoch": 4.030488847629052, + "grad_norm": 0.007485619746148586, + "learning_rate": 2.2046006650641692e-07, + "loss": 0.0, + "num_input_tokens_seen": 111185448, + "step": 164980 + }, + { + "epoch": 4.030610998460899, + "grad_norm": 0.00013309967471286654, + "learning_rate": 2.204066555007935e-07, + "loss": 0.0, + "num_input_tokens_seen": 111188328, + "step": 164985 + }, + { + "epoch": 4.030733149292747, + "grad_norm": 0.0012170979753136635, + "learning_rate": 2.2035325016458273e-07, + "loss": 0.0, + "num_input_tokens_seen": 111191528, + "step": 164990 + }, + { + "epoch": 4.0308553001245935, + "grad_norm": 0.0009001771104522049, + "learning_rate": 2.2029985049817268e-07, + "loss": 0.0, + "num_input_tokens_seen": 111194984, + "step": 164995 + }, + { + "epoch": 4.030977450956441, + "grad_norm": 0.0003405268071219325, + "learning_rate": 2.2024645650195174e-07, + "loss": 0.0, + "num_input_tokens_seen": 111198440, + "step": 165000 + }, + { + "epoch": 4.031099601788288, + "grad_norm": 0.0001786830252967775, + "learning_rate": 2.2019306817630856e-07, + "loss": 0.0, + "num_input_tokens_seen": 111201704, + "step": 165005 + }, + { + "epoch": 4.0312217526201355, + "grad_norm": 0.00017067190492525697, + "learning_rate": 2.2013968552163098e-07, + "loss": 0.0, + "num_input_tokens_seen": 111205224, + "step": 165010 + }, + { + "epoch": 4.031343903451982, + "grad_norm": 0.0004245521849952638, + "learning_rate": 2.2008630853830755e-07, + "loss": 0.0, + "num_input_tokens_seen": 111208296, + "step": 165015 + }, + { + "epoch": 4.03146605428383, + "grad_norm": 0.248831644654274, + "learning_rate": 2.20032937226726e-07, + "loss": 0.0003, + "num_input_tokens_seen": 111211816, + "step": 165020 + }, + { + "epoch": 4.031588205115677, + "grad_norm": 0.000578263308852911, + "learning_rate": 2.19979571587275e-07, + "loss": 0.0, + "num_input_tokens_seen": 111215016, + "step": 165025 + }, + { + "epoch": 4.031710355947524, + "grad_norm": 0.00015196092135738581, + "learning_rate": 2.1992621162034232e-07, + "loss": 0.0, + "num_input_tokens_seen": 111218408, + "step": 165030 + }, + { + "epoch": 4.031832506779371, + "grad_norm": 0.005513553041964769, + "learning_rate": 2.1987285732631577e-07, + "loss": 0.0, + "num_input_tokens_seen": 111221544, + "step": 165035 + }, + { + "epoch": 4.031954657611219, + "grad_norm": 8.522966527380049e-05, + "learning_rate": 2.1981950870558385e-07, + "loss": 0.0, + "num_input_tokens_seen": 111226088, + "step": 165040 + }, + { + "epoch": 4.032076808443065, + "grad_norm": 0.00015558319864794612, + "learning_rate": 2.1976616575853412e-07, + "loss": 0.0, + "num_input_tokens_seen": 111229608, + "step": 165045 + }, + { + "epoch": 4.032198959274913, + "grad_norm": 0.03268592432141304, + "learning_rate": 2.1971282848555495e-07, + "loss": 0.0, + "num_input_tokens_seen": 111232552, + "step": 165050 + }, + { + "epoch": 4.03232111010676, + "grad_norm": 0.0004529871221166104, + "learning_rate": 2.1965949688703368e-07, + "loss": 0.0, + "num_input_tokens_seen": 111236200, + "step": 165055 + }, + { + "epoch": 4.032443260938607, + "grad_norm": 0.0002521305868867785, + "learning_rate": 2.1960617096335876e-07, + "loss": 0.0, + "num_input_tokens_seen": 111239464, + "step": 165060 + }, + { + "epoch": 4.032565411770454, + "grad_norm": 0.00017973648209590465, + "learning_rate": 2.1955285071491724e-07, + "loss": 0.0, + "num_input_tokens_seen": 111242600, + "step": 165065 + }, + { + "epoch": 4.032687562602302, + "grad_norm": 0.0015271722804754972, + "learning_rate": 2.194995361420975e-07, + "loss": 0.0, + "num_input_tokens_seen": 111246056, + "step": 165070 + }, + { + "epoch": 4.0328097134341485, + "grad_norm": 0.0016294847009703517, + "learning_rate": 2.1944622724528716e-07, + "loss": 0.0, + "num_input_tokens_seen": 111249320, + "step": 165075 + }, + { + "epoch": 4.032931864265995, + "grad_norm": 0.0002762658696155995, + "learning_rate": 2.1939292402487363e-07, + "loss": 0.0609, + "num_input_tokens_seen": 111252776, + "step": 165080 + }, + { + "epoch": 4.033054015097843, + "grad_norm": 0.020646601915359497, + "learning_rate": 2.1933962648124505e-07, + "loss": 0.0, + "num_input_tokens_seen": 111255912, + "step": 165085 + }, + { + "epoch": 4.03317616592969, + "grad_norm": 0.00040234896005131304, + "learning_rate": 2.1928633461478828e-07, + "loss": 0.062, + "num_input_tokens_seen": 111259240, + "step": 165090 + }, + { + "epoch": 4.033298316761537, + "grad_norm": 0.0001797089062165469, + "learning_rate": 2.192330484258913e-07, + "loss": 0.0, + "num_input_tokens_seen": 111262760, + "step": 165095 + }, + { + "epoch": 4.033420467593384, + "grad_norm": 0.002640556776896119, + "learning_rate": 2.1917976791494186e-07, + "loss": 0.0, + "num_input_tokens_seen": 111266088, + "step": 165100 + }, + { + "epoch": 4.033542618425232, + "grad_norm": 0.008661020547151566, + "learning_rate": 2.1912649308232688e-07, + "loss": 0.0, + "num_input_tokens_seen": 111269608, + "step": 165105 + }, + { + "epoch": 4.033664769257078, + "grad_norm": 0.0007709195488132536, + "learning_rate": 2.190732239284344e-07, + "loss": 0.0, + "num_input_tokens_seen": 111272488, + "step": 165110 + }, + { + "epoch": 4.033786920088926, + "grad_norm": 0.0018076790729537606, + "learning_rate": 2.1901996045365123e-07, + "loss": 0.0, + "num_input_tokens_seen": 111275944, + "step": 165115 + }, + { + "epoch": 4.033909070920773, + "grad_norm": 0.00034789761411957443, + "learning_rate": 2.1896670265836516e-07, + "loss": 0.0, + "num_input_tokens_seen": 111279336, + "step": 165120 + }, + { + "epoch": 4.03403122175262, + "grad_norm": 0.0001242852013092488, + "learning_rate": 2.1891345054296306e-07, + "loss": 0.0, + "num_input_tokens_seen": 111283048, + "step": 165125 + }, + { + "epoch": 4.034153372584467, + "grad_norm": 0.00019528514530975372, + "learning_rate": 2.188602041078328e-07, + "loss": 0.0, + "num_input_tokens_seen": 111285992, + "step": 165130 + }, + { + "epoch": 4.034275523416315, + "grad_norm": 0.00017401730292476714, + "learning_rate": 2.1880696335336114e-07, + "loss": 0.0, + "num_input_tokens_seen": 111289064, + "step": 165135 + }, + { + "epoch": 4.0343976742481615, + "grad_norm": 0.00035434920573607087, + "learning_rate": 2.1875372827993499e-07, + "loss": 0.0, + "num_input_tokens_seen": 111292712, + "step": 165140 + }, + { + "epoch": 4.034519825080009, + "grad_norm": 0.06561069935560226, + "learning_rate": 2.1870049888794228e-07, + "loss": 0.0, + "num_input_tokens_seen": 111296232, + "step": 165145 + }, + { + "epoch": 4.034641975911856, + "grad_norm": 0.0016677493695169687, + "learning_rate": 2.1864727517776938e-07, + "loss": 0.0001, + "num_input_tokens_seen": 111299432, + "step": 165150 + }, + { + "epoch": 4.0347641267437035, + "grad_norm": 0.0006396759999915957, + "learning_rate": 2.1859405714980394e-07, + "loss": 0.0, + "num_input_tokens_seen": 111302440, + "step": 165155 + }, + { + "epoch": 4.03488627757555, + "grad_norm": 0.0030758215580135584, + "learning_rate": 2.1854084480443237e-07, + "loss": 0.0, + "num_input_tokens_seen": 111305768, + "step": 165160 + }, + { + "epoch": 4.035008428407397, + "grad_norm": 0.002016278449445963, + "learning_rate": 2.1848763814204197e-07, + "loss": 0.0, + "num_input_tokens_seen": 111309032, + "step": 165165 + }, + { + "epoch": 4.035130579239245, + "grad_norm": 0.0009641702054068446, + "learning_rate": 2.1843443716301991e-07, + "loss": 0.0, + "num_input_tokens_seen": 111312424, + "step": 165170 + }, + { + "epoch": 4.035252730071091, + "grad_norm": 0.00021530137746594846, + "learning_rate": 2.1838124186775265e-07, + "loss": 0.0, + "num_input_tokens_seen": 111318184, + "step": 165175 + }, + { + "epoch": 4.035374880902939, + "grad_norm": 0.0003201996732968837, + "learning_rate": 2.1832805225662742e-07, + "loss": 0.0, + "num_input_tokens_seen": 111321320, + "step": 165180 + }, + { + "epoch": 4.035497031734786, + "grad_norm": 0.002409678651019931, + "learning_rate": 2.1827486833003062e-07, + "loss": 0.0479, + "num_input_tokens_seen": 111324840, + "step": 165185 + }, + { + "epoch": 4.035619182566633, + "grad_norm": 0.00077003677142784, + "learning_rate": 2.1822169008834924e-07, + "loss": 0.0, + "num_input_tokens_seen": 111328616, + "step": 165190 + }, + { + "epoch": 4.03574133339848, + "grad_norm": 0.004224866628646851, + "learning_rate": 2.181685175319702e-07, + "loss": 0.0, + "num_input_tokens_seen": 111331752, + "step": 165195 + }, + { + "epoch": 4.035863484230328, + "grad_norm": 0.0008089193142950535, + "learning_rate": 2.1811535066127983e-07, + "loss": 0.0002, + "num_input_tokens_seen": 111335272, + "step": 165200 + }, + { + "epoch": 4.035985635062175, + "grad_norm": 0.0007582316757179797, + "learning_rate": 2.180621894766651e-07, + "loss": 0.0, + "num_input_tokens_seen": 111339112, + "step": 165205 + }, + { + "epoch": 4.036107785894022, + "grad_norm": 0.0345303900539875, + "learning_rate": 2.1800903397851222e-07, + "loss": 0.0, + "num_input_tokens_seen": 111342312, + "step": 165210 + }, + { + "epoch": 4.036229936725869, + "grad_norm": 0.016312891617417336, + "learning_rate": 2.1795588416720822e-07, + "loss": 0.0, + "num_input_tokens_seen": 111345832, + "step": 165215 + }, + { + "epoch": 4.036352087557717, + "grad_norm": 0.0017842523520812392, + "learning_rate": 2.1790274004313912e-07, + "loss": 0.0, + "num_input_tokens_seen": 111348776, + "step": 165220 + }, + { + "epoch": 4.036474238389563, + "grad_norm": 0.0002951300411950797, + "learning_rate": 2.1784960160669197e-07, + "loss": 0.0, + "num_input_tokens_seen": 111352552, + "step": 165225 + }, + { + "epoch": 4.036596389221411, + "grad_norm": 0.037422504276037216, + "learning_rate": 2.1779646885825264e-07, + "loss": 0.0, + "num_input_tokens_seen": 111355688, + "step": 165230 + }, + { + "epoch": 4.036718540053258, + "grad_norm": 0.002478367416188121, + "learning_rate": 2.1774334179820797e-07, + "loss": 0.0, + "num_input_tokens_seen": 111359208, + "step": 165235 + }, + { + "epoch": 4.036840690885105, + "grad_norm": 0.0008452267502434552, + "learning_rate": 2.1769022042694385e-07, + "loss": 0.0168, + "num_input_tokens_seen": 111362728, + "step": 165240 + }, + { + "epoch": 4.036962841716952, + "grad_norm": 0.004490249324589968, + "learning_rate": 2.176371047448472e-07, + "loss": 0.0, + "num_input_tokens_seen": 111366568, + "step": 165245 + }, + { + "epoch": 4.037084992548799, + "grad_norm": 0.0013267035828903317, + "learning_rate": 2.175839947523036e-07, + "loss": 0.0, + "num_input_tokens_seen": 111369768, + "step": 165250 + }, + { + "epoch": 4.0372071433806465, + "grad_norm": 0.007490311283618212, + "learning_rate": 2.1753089044969997e-07, + "loss": 0.0001, + "num_input_tokens_seen": 111373288, + "step": 165255 + }, + { + "epoch": 4.037329294212493, + "grad_norm": 0.030202653259038925, + "learning_rate": 2.1747779183742187e-07, + "loss": 0.0004, + "num_input_tokens_seen": 111377128, + "step": 165260 + }, + { + "epoch": 4.037451445044341, + "grad_norm": 0.002074525225907564, + "learning_rate": 2.17424698915856e-07, + "loss": 0.0, + "num_input_tokens_seen": 111380968, + "step": 165265 + }, + { + "epoch": 4.037573595876188, + "grad_norm": 0.0004991142195649445, + "learning_rate": 2.1737161168538787e-07, + "loss": 0.0, + "num_input_tokens_seen": 111383912, + "step": 165270 + }, + { + "epoch": 4.037695746708035, + "grad_norm": 0.0005876885261386633, + "learning_rate": 2.1731853014640422e-07, + "loss": 0.0, + "num_input_tokens_seen": 111387688, + "step": 165275 + }, + { + "epoch": 4.037817897539882, + "grad_norm": 0.0024651093408465385, + "learning_rate": 2.1726545429929055e-07, + "loss": 0.0, + "num_input_tokens_seen": 111390632, + "step": 165280 + }, + { + "epoch": 4.03794004837173, + "grad_norm": 0.0009485721820965409, + "learning_rate": 2.1721238414443287e-07, + "loss": 0.0, + "num_input_tokens_seen": 111394728, + "step": 165285 + }, + { + "epoch": 4.038062199203576, + "grad_norm": 0.00034085451625287533, + "learning_rate": 2.1715931968221768e-07, + "loss": 0.0, + "num_input_tokens_seen": 111398504, + "step": 165290 + }, + { + "epoch": 4.038184350035424, + "grad_norm": 0.00034384272294119, + "learning_rate": 2.1710626091303008e-07, + "loss": 0.0, + "num_input_tokens_seen": 111401320, + "step": 165295 + }, + { + "epoch": 4.038306500867271, + "grad_norm": 0.00017364125233143568, + "learning_rate": 2.1705320783725667e-07, + "loss": 0.0, + "num_input_tokens_seen": 111404840, + "step": 165300 + }, + { + "epoch": 4.038428651699118, + "grad_norm": 0.0002893655910156667, + "learning_rate": 2.170001604552827e-07, + "loss": 0.0, + "num_input_tokens_seen": 111408168, + "step": 165305 + }, + { + "epoch": 4.038550802530965, + "grad_norm": 0.0010250789346173406, + "learning_rate": 2.1694711876749438e-07, + "loss": 0.0, + "num_input_tokens_seen": 111411688, + "step": 165310 + }, + { + "epoch": 4.038672953362813, + "grad_norm": 0.00014593149535357952, + "learning_rate": 2.168940827742769e-07, + "loss": 0.0, + "num_input_tokens_seen": 111415144, + "step": 165315 + }, + { + "epoch": 4.0387951041946595, + "grad_norm": 0.0019269100157544017, + "learning_rate": 2.1684105247601635e-07, + "loss": 0.0, + "num_input_tokens_seen": 111418664, + "step": 165320 + }, + { + "epoch": 4.038917255026507, + "grad_norm": 0.0007912858854979277, + "learning_rate": 2.1678802787309857e-07, + "loss": 0.0, + "num_input_tokens_seen": 111421608, + "step": 165325 + }, + { + "epoch": 4.039039405858354, + "grad_norm": 0.0006178620969876647, + "learning_rate": 2.167350089659087e-07, + "loss": 0.0, + "num_input_tokens_seen": 111424808, + "step": 165330 + }, + { + "epoch": 4.0391615566902015, + "grad_norm": 0.021620342507958412, + "learning_rate": 2.166819957548327e-07, + "loss": 0.0, + "num_input_tokens_seen": 111428456, + "step": 165335 + }, + { + "epoch": 4.039283707522048, + "grad_norm": 0.0001561202952871099, + "learning_rate": 2.1662898824025588e-07, + "loss": 0.0, + "num_input_tokens_seen": 111431656, + "step": 165340 + }, + { + "epoch": 4.039405858353895, + "grad_norm": 0.020325763151049614, + "learning_rate": 2.1657598642256358e-07, + "loss": 0.0, + "num_input_tokens_seen": 111434792, + "step": 165345 + }, + { + "epoch": 4.039528009185743, + "grad_norm": 0.0001341603638138622, + "learning_rate": 2.165229903021417e-07, + "loss": 0.0, + "num_input_tokens_seen": 111438120, + "step": 165350 + }, + { + "epoch": 4.039650160017589, + "grad_norm": 0.00033617831650190055, + "learning_rate": 2.1646999987937497e-07, + "loss": 0.0, + "num_input_tokens_seen": 111441448, + "step": 165355 + }, + { + "epoch": 4.039772310849437, + "grad_norm": 0.000377911317627877, + "learning_rate": 2.164170151546496e-07, + "loss": 0.0, + "num_input_tokens_seen": 111444520, + "step": 165360 + }, + { + "epoch": 4.039894461681284, + "grad_norm": 0.0014009472215548158, + "learning_rate": 2.1636403612835007e-07, + "loss": 0.0, + "num_input_tokens_seen": 111447848, + "step": 165365 + }, + { + "epoch": 4.040016612513131, + "grad_norm": 0.0025961874052882195, + "learning_rate": 2.1631106280086232e-07, + "loss": 0.0349, + "num_input_tokens_seen": 111451240, + "step": 165370 + }, + { + "epoch": 4.040138763344978, + "grad_norm": 0.00035863713128492236, + "learning_rate": 2.1625809517257098e-07, + "loss": 0.0, + "num_input_tokens_seen": 111454632, + "step": 165375 + }, + { + "epoch": 4.040260914176826, + "grad_norm": 0.0002058813552139327, + "learning_rate": 2.162051332438617e-07, + "loss": 0.0, + "num_input_tokens_seen": 111458152, + "step": 165380 + }, + { + "epoch": 4.0403830650086725, + "grad_norm": 0.0014873233158141375, + "learning_rate": 2.1615217701511967e-07, + "loss": 0.0, + "num_input_tokens_seen": 111461480, + "step": 165385 + }, + { + "epoch": 4.04050521584052, + "grad_norm": 0.0005732462159357965, + "learning_rate": 2.1609922648672962e-07, + "loss": 0.0, + "num_input_tokens_seen": 111464552, + "step": 165390 + }, + { + "epoch": 4.040627366672367, + "grad_norm": 0.0001214592921314761, + "learning_rate": 2.1604628165907712e-07, + "loss": 0.0, + "num_input_tokens_seen": 111468072, + "step": 165395 + }, + { + "epoch": 4.0407495175042145, + "grad_norm": 7.036358874756843e-05, + "learning_rate": 2.1599334253254665e-07, + "loss": 0.0, + "num_input_tokens_seen": 111471592, + "step": 165400 + }, + { + "epoch": 4.040871668336061, + "grad_norm": 1.940254878718406e-05, + "learning_rate": 2.1594040910752344e-07, + "loss": 0.0, + "num_input_tokens_seen": 111474984, + "step": 165405 + }, + { + "epoch": 4.040993819167909, + "grad_norm": 0.0024557760916650295, + "learning_rate": 2.1588748138439271e-07, + "loss": 0.0348, + "num_input_tokens_seen": 111478504, + "step": 165410 + }, + { + "epoch": 4.041115969999756, + "grad_norm": 0.00025112141156569123, + "learning_rate": 2.1583455936353888e-07, + "loss": 0.0, + "num_input_tokens_seen": 111481640, + "step": 165415 + }, + { + "epoch": 4.041238120831603, + "grad_norm": 0.0030566269997507334, + "learning_rate": 2.157816430453473e-07, + "loss": 0.0, + "num_input_tokens_seen": 111485480, + "step": 165420 + }, + { + "epoch": 4.04136027166345, + "grad_norm": 9.694881009636447e-05, + "learning_rate": 2.1572873243020228e-07, + "loss": 0.0003, + "num_input_tokens_seen": 111488616, + "step": 165425 + }, + { + "epoch": 4.041482422495297, + "grad_norm": 0.00042643630877137184, + "learning_rate": 2.1567582751848913e-07, + "loss": 0.0, + "num_input_tokens_seen": 111491624, + "step": 165430 + }, + { + "epoch": 4.041604573327144, + "grad_norm": 0.0017886931309476495, + "learning_rate": 2.1562292831059203e-07, + "loss": 0.0, + "num_input_tokens_seen": 111494952, + "step": 165435 + }, + { + "epoch": 4.041726724158991, + "grad_norm": 0.00014759103942196816, + "learning_rate": 2.1557003480689627e-07, + "loss": 0.0, + "num_input_tokens_seen": 111497896, + "step": 165440 + }, + { + "epoch": 4.041848874990839, + "grad_norm": 0.0017384887905791402, + "learning_rate": 2.1551714700778623e-07, + "loss": 0.0, + "num_input_tokens_seen": 111501032, + "step": 165445 + }, + { + "epoch": 4.0419710258226855, + "grad_norm": 0.00014177450793795288, + "learning_rate": 2.1546426491364622e-07, + "loss": 0.0, + "num_input_tokens_seen": 111504168, + "step": 165450 + }, + { + "epoch": 4.042093176654533, + "grad_norm": 0.007318509742617607, + "learning_rate": 2.154113885248613e-07, + "loss": 0.0882, + "num_input_tokens_seen": 111507688, + "step": 165455 + }, + { + "epoch": 4.04221532748638, + "grad_norm": 0.0010406904621049762, + "learning_rate": 2.1535851784181558e-07, + "loss": 0.0, + "num_input_tokens_seen": 111510760, + "step": 165460 + }, + { + "epoch": 4.0423374783182275, + "grad_norm": 0.002014661906287074, + "learning_rate": 2.15305652864894e-07, + "loss": 0.0, + "num_input_tokens_seen": 111513832, + "step": 165465 + }, + { + "epoch": 4.042459629150074, + "grad_norm": 0.10928776115179062, + "learning_rate": 2.1525279359448046e-07, + "loss": 0.0, + "num_input_tokens_seen": 111517224, + "step": 165470 + }, + { + "epoch": 4.042581779981922, + "grad_norm": 0.0005027904408052564, + "learning_rate": 2.1519994003095976e-07, + "loss": 0.0, + "num_input_tokens_seen": 111520680, + "step": 165475 + }, + { + "epoch": 4.042703930813769, + "grad_norm": 0.0004568375297822058, + "learning_rate": 2.1514709217471638e-07, + "loss": 0.0, + "num_input_tokens_seen": 111523688, + "step": 165480 + }, + { + "epoch": 4.042826081645616, + "grad_norm": 0.016826355829834938, + "learning_rate": 2.1509425002613424e-07, + "loss": 0.0, + "num_input_tokens_seen": 111526824, + "step": 165485 + }, + { + "epoch": 4.042948232477463, + "grad_norm": 6.418684642994776e-05, + "learning_rate": 2.1504141358559812e-07, + "loss": 0.0, + "num_input_tokens_seen": 111530216, + "step": 165490 + }, + { + "epoch": 4.043070383309311, + "grad_norm": 0.0003524493076838553, + "learning_rate": 2.1498858285349164e-07, + "loss": 0.0, + "num_input_tokens_seen": 111533224, + "step": 165495 + }, + { + "epoch": 4.043192534141157, + "grad_norm": 0.00021575384016614407, + "learning_rate": 2.1493575783019934e-07, + "loss": 0.0, + "num_input_tokens_seen": 111536872, + "step": 165500 + }, + { + "epoch": 4.043314684973005, + "grad_norm": 0.0024450430646538734, + "learning_rate": 2.148829385161056e-07, + "loss": 0.0, + "num_input_tokens_seen": 111540072, + "step": 165505 + }, + { + "epoch": 4.043436835804852, + "grad_norm": 4.44727556896396e-05, + "learning_rate": 2.1483012491159404e-07, + "loss": 0.0, + "num_input_tokens_seen": 111543656, + "step": 165510 + }, + { + "epoch": 4.0435589866366985, + "grad_norm": 0.001662745140492916, + "learning_rate": 2.1477731701704927e-07, + "loss": 0.0, + "num_input_tokens_seen": 111547560, + "step": 165515 + }, + { + "epoch": 4.043681137468546, + "grad_norm": 0.0012131177354604006, + "learning_rate": 2.147245148328548e-07, + "loss": 0.0, + "num_input_tokens_seen": 111550824, + "step": 165520 + }, + { + "epoch": 4.043803288300393, + "grad_norm": 0.005215724930167198, + "learning_rate": 2.1467171835939525e-07, + "loss": 0.0, + "num_input_tokens_seen": 111554344, + "step": 165525 + }, + { + "epoch": 4.043925439132241, + "grad_norm": 0.003541510319337249, + "learning_rate": 2.146189275970538e-07, + "loss": 0.0, + "num_input_tokens_seen": 111557608, + "step": 165530 + }, + { + "epoch": 4.044047589964087, + "grad_norm": 0.0022107716649770737, + "learning_rate": 2.1456614254621497e-07, + "loss": 0.0, + "num_input_tokens_seen": 111561640, + "step": 165535 + }, + { + "epoch": 4.044169740795935, + "grad_norm": 0.0003690843877848238, + "learning_rate": 2.1451336320726222e-07, + "loss": 0.0, + "num_input_tokens_seen": 111564776, + "step": 165540 + }, + { + "epoch": 4.044291891627782, + "grad_norm": 0.0020510528702288866, + "learning_rate": 2.1446058958057978e-07, + "loss": 0.0001, + "num_input_tokens_seen": 111567912, + "step": 165545 + }, + { + "epoch": 4.044414042459629, + "grad_norm": 0.03945672884583473, + "learning_rate": 2.1440782166655101e-07, + "loss": 0.0001, + "num_input_tokens_seen": 111571176, + "step": 165550 + }, + { + "epoch": 4.044536193291476, + "grad_norm": 0.0011049823369830847, + "learning_rate": 2.1435505946556008e-07, + "loss": 0.0, + "num_input_tokens_seen": 111574440, + "step": 165555 + }, + { + "epoch": 4.044658344123324, + "grad_norm": 0.003834398230537772, + "learning_rate": 2.1430230297799024e-07, + "loss": 0.0, + "num_input_tokens_seen": 111578088, + "step": 165560 + }, + { + "epoch": 4.0447804949551704, + "grad_norm": 0.00021740724332630634, + "learning_rate": 2.142495522042257e-07, + "loss": 0.0, + "num_input_tokens_seen": 111581544, + "step": 165565 + }, + { + "epoch": 4.044902645787018, + "grad_norm": 0.00013685625162906945, + "learning_rate": 2.141968071446494e-07, + "loss": 0.0, + "num_input_tokens_seen": 111585128, + "step": 165570 + }, + { + "epoch": 4.045024796618865, + "grad_norm": 0.0021345813293009996, + "learning_rate": 2.1414406779964555e-07, + "loss": 0.0, + "num_input_tokens_seen": 111588712, + "step": 165575 + }, + { + "epoch": 4.0451469474507125, + "grad_norm": 0.10980292409658432, + "learning_rate": 2.1409133416959712e-07, + "loss": 0.0, + "num_input_tokens_seen": 111592744, + "step": 165580 + }, + { + "epoch": 4.045269098282559, + "grad_norm": 0.00013364851474761963, + "learning_rate": 2.1403860625488823e-07, + "loss": 0.0, + "num_input_tokens_seen": 111596456, + "step": 165585 + }, + { + "epoch": 4.045391249114407, + "grad_norm": 0.002575997728854418, + "learning_rate": 2.1398588405590168e-07, + "loss": 0.0, + "num_input_tokens_seen": 111599720, + "step": 165590 + }, + { + "epoch": 4.045513399946254, + "grad_norm": 0.0008017385262064636, + "learning_rate": 2.1393316757302116e-07, + "loss": 0.0, + "num_input_tokens_seen": 111603496, + "step": 165595 + }, + { + "epoch": 4.045635550778101, + "grad_norm": 5.82604952796828e-05, + "learning_rate": 2.1388045680663047e-07, + "loss": 0.0, + "num_input_tokens_seen": 111606824, + "step": 165600 + }, + { + "epoch": 4.045757701609948, + "grad_norm": 0.0010002893395721912, + "learning_rate": 2.1382775175711222e-07, + "loss": 0.0, + "num_input_tokens_seen": 111612008, + "step": 165605 + }, + { + "epoch": 4.045879852441795, + "grad_norm": 0.0016825840575620532, + "learning_rate": 2.1377505242485018e-07, + "loss": 0.0, + "num_input_tokens_seen": 111615016, + "step": 165610 + }, + { + "epoch": 4.046002003273642, + "grad_norm": 0.0027573788538575172, + "learning_rate": 2.1372235881022726e-07, + "loss": 0.0, + "num_input_tokens_seen": 111618600, + "step": 165615 + }, + { + "epoch": 4.046124154105489, + "grad_norm": 0.0046699317172169685, + "learning_rate": 2.1366967091362708e-07, + "loss": 0.0, + "num_input_tokens_seen": 111622248, + "step": 165620 + }, + { + "epoch": 4.046246304937337, + "grad_norm": 0.00024322244280483574, + "learning_rate": 2.136169887354322e-07, + "loss": 0.0, + "num_input_tokens_seen": 111625576, + "step": 165625 + }, + { + "epoch": 4.0463684557691835, + "grad_norm": 0.0006262024398893118, + "learning_rate": 2.1356431227602624e-07, + "loss": 0.0, + "num_input_tokens_seen": 111629608, + "step": 165630 + }, + { + "epoch": 4.046490606601031, + "grad_norm": 0.0033578008878976107, + "learning_rate": 2.1351164153579226e-07, + "loss": 0.0, + "num_input_tokens_seen": 111633000, + "step": 165635 + }, + { + "epoch": 4.046612757432878, + "grad_norm": 0.0009691547602415085, + "learning_rate": 2.1345897651511292e-07, + "loss": 0.0, + "num_input_tokens_seen": 111636072, + "step": 165640 + }, + { + "epoch": 4.0467349082647255, + "grad_norm": 0.0001765676133800298, + "learning_rate": 2.1340631721437174e-07, + "loss": 0.0, + "num_input_tokens_seen": 111639592, + "step": 165645 + }, + { + "epoch": 4.046857059096572, + "grad_norm": 9.644962847232819e-05, + "learning_rate": 2.1335366363395147e-07, + "loss": 0.0, + "num_input_tokens_seen": 111643432, + "step": 165650 + }, + { + "epoch": 4.04697920992842, + "grad_norm": 0.004384058527648449, + "learning_rate": 2.1330101577423453e-07, + "loss": 0.0, + "num_input_tokens_seen": 111646696, + "step": 165655 + }, + { + "epoch": 4.047101360760267, + "grad_norm": 0.0009935772977769375, + "learning_rate": 2.1324837363560456e-07, + "loss": 0.0, + "num_input_tokens_seen": 111650152, + "step": 165660 + }, + { + "epoch": 4.047223511592114, + "grad_norm": 0.0007049691630527377, + "learning_rate": 2.1319573721844376e-07, + "loss": 0.0, + "num_input_tokens_seen": 111653416, + "step": 165665 + }, + { + "epoch": 4.047345662423961, + "grad_norm": 0.003179597668349743, + "learning_rate": 2.131431065231355e-07, + "loss": 0.0, + "num_input_tokens_seen": 111657064, + "step": 165670 + }, + { + "epoch": 4.047467813255809, + "grad_norm": 0.0008164419559761882, + "learning_rate": 2.1309048155006183e-07, + "loss": 0.0, + "num_input_tokens_seen": 111660200, + "step": 165675 + }, + { + "epoch": 4.047589964087655, + "grad_norm": 4.1886734834406525e-05, + "learning_rate": 2.1303786229960618e-07, + "loss": 0.0, + "num_input_tokens_seen": 111663592, + "step": 165680 + }, + { + "epoch": 4.047712114919503, + "grad_norm": 0.00021079998987261206, + "learning_rate": 2.1298524877215052e-07, + "loss": 0.0, + "num_input_tokens_seen": 111666664, + "step": 165685 + }, + { + "epoch": 4.04783426575135, + "grad_norm": 7.617528171977028e-05, + "learning_rate": 2.1293264096807783e-07, + "loss": 0.0, + "num_input_tokens_seen": 111669800, + "step": 165690 + }, + { + "epoch": 4.0479564165831965, + "grad_norm": 0.0013283933512866497, + "learning_rate": 2.1288003888777096e-07, + "loss": 0.0004, + "num_input_tokens_seen": 111672808, + "step": 165695 + }, + { + "epoch": 4.048078567415044, + "grad_norm": 0.0020888675935566425, + "learning_rate": 2.128274425316119e-07, + "loss": 0.0, + "num_input_tokens_seen": 111676520, + "step": 165700 + }, + { + "epoch": 4.048200718246891, + "grad_norm": 4.661543425754644e-05, + "learning_rate": 2.1277485189998366e-07, + "loss": 0.0, + "num_input_tokens_seen": 111679848, + "step": 165705 + }, + { + "epoch": 4.0483228690787385, + "grad_norm": 0.0011659510200843215, + "learning_rate": 2.127222669932681e-07, + "loss": 0.0, + "num_input_tokens_seen": 111683304, + "step": 165710 + }, + { + "epoch": 4.048445019910585, + "grad_norm": 0.0026223217137157917, + "learning_rate": 2.1266968781184814e-07, + "loss": 0.0, + "num_input_tokens_seen": 111686504, + "step": 165715 + }, + { + "epoch": 4.048567170742433, + "grad_norm": 0.0005963681032881141, + "learning_rate": 2.1261711435610607e-07, + "loss": 0.0, + "num_input_tokens_seen": 111690088, + "step": 165720 + }, + { + "epoch": 4.04868932157428, + "grad_norm": 0.00229801912792027, + "learning_rate": 2.1256454662642398e-07, + "loss": 0.0, + "num_input_tokens_seen": 111693544, + "step": 165725 + }, + { + "epoch": 4.048811472406127, + "grad_norm": 3.6713732697535306e-05, + "learning_rate": 2.1251198462318444e-07, + "loss": 0.0, + "num_input_tokens_seen": 111696680, + "step": 165730 + }, + { + "epoch": 4.048933623237974, + "grad_norm": 0.00027011564816348255, + "learning_rate": 2.1245942834676944e-07, + "loss": 0.0, + "num_input_tokens_seen": 111700328, + "step": 165735 + }, + { + "epoch": 4.049055774069822, + "grad_norm": 2.1074765754747204e-05, + "learning_rate": 2.124068777975615e-07, + "loss": 0.0, + "num_input_tokens_seen": 111703848, + "step": 165740 + }, + { + "epoch": 4.049177924901668, + "grad_norm": 0.002213638974353671, + "learning_rate": 2.123543329759423e-07, + "loss": 0.0, + "num_input_tokens_seen": 111707240, + "step": 165745 + }, + { + "epoch": 4.049300075733516, + "grad_norm": 0.0069284215569496155, + "learning_rate": 2.123017938822945e-07, + "loss": 0.0001, + "num_input_tokens_seen": 111710056, + "step": 165750 + }, + { + "epoch": 4.049422226565363, + "grad_norm": 1.297972266911529e-05, + "learning_rate": 2.1224926051699987e-07, + "loss": 0.0001, + "num_input_tokens_seen": 111713768, + "step": 165755 + }, + { + "epoch": 4.04954437739721, + "grad_norm": 0.0045747156254947186, + "learning_rate": 2.121967328804404e-07, + "loss": 0.0, + "num_input_tokens_seen": 111717096, + "step": 165760 + }, + { + "epoch": 4.049666528229057, + "grad_norm": 0.0001304529287153855, + "learning_rate": 2.1214421097299828e-07, + "loss": 0.0, + "num_input_tokens_seen": 111720232, + "step": 165765 + }, + { + "epoch": 4.049788679060905, + "grad_norm": 9.356778900837526e-05, + "learning_rate": 2.1209169479505519e-07, + "loss": 0.0001, + "num_input_tokens_seen": 111723560, + "step": 165770 + }, + { + "epoch": 4.0499108298927515, + "grad_norm": 0.0004932557349093258, + "learning_rate": 2.1203918434699342e-07, + "loss": 0.0, + "num_input_tokens_seen": 111727336, + "step": 165775 + }, + { + "epoch": 4.050032980724599, + "grad_norm": 0.0008772346773184836, + "learning_rate": 2.1198667962919437e-07, + "loss": 0.0, + "num_input_tokens_seen": 111730536, + "step": 165780 + }, + { + "epoch": 4.050155131556446, + "grad_norm": 0.00020624265016522259, + "learning_rate": 2.1193418064204016e-07, + "loss": 0.0, + "num_input_tokens_seen": 111734440, + "step": 165785 + }, + { + "epoch": 4.050277282388293, + "grad_norm": 0.0005781511426903307, + "learning_rate": 2.1188168738591284e-07, + "loss": 0.0, + "num_input_tokens_seen": 111737832, + "step": 165790 + }, + { + "epoch": 4.05039943322014, + "grad_norm": 4.9924346967600286e-05, + "learning_rate": 2.1182919986119364e-07, + "loss": 0.0591, + "num_input_tokens_seen": 111741160, + "step": 165795 + }, + { + "epoch": 4.050521584051987, + "grad_norm": 0.008669133298099041, + "learning_rate": 2.117767180682647e-07, + "loss": 0.0, + "num_input_tokens_seen": 111744488, + "step": 165800 + }, + { + "epoch": 4.050643734883835, + "grad_norm": 8.618797437520698e-05, + "learning_rate": 2.1172424200750715e-07, + "loss": 0.0, + "num_input_tokens_seen": 111747880, + "step": 165805 + }, + { + "epoch": 4.050765885715681, + "grad_norm": 5.16844738740474e-05, + "learning_rate": 2.1167177167930307e-07, + "loss": 0.0, + "num_input_tokens_seen": 111751016, + "step": 165810 + }, + { + "epoch": 4.050888036547529, + "grad_norm": 0.00011682142212521285, + "learning_rate": 2.1161930708403407e-07, + "loss": 0.0001, + "num_input_tokens_seen": 111754024, + "step": 165815 + }, + { + "epoch": 4.051010187379376, + "grad_norm": 0.0013719471171498299, + "learning_rate": 2.1156684822208127e-07, + "loss": 0.0, + "num_input_tokens_seen": 111758184, + "step": 165820 + }, + { + "epoch": 4.051132338211223, + "grad_norm": 0.0009899984579533339, + "learning_rate": 2.1151439509382674e-07, + "loss": 0.0, + "num_input_tokens_seen": 111761384, + "step": 165825 + }, + { + "epoch": 4.05125448904307, + "grad_norm": 0.00012213773152325302, + "learning_rate": 2.1146194769965132e-07, + "loss": 0.0, + "num_input_tokens_seen": 111764392, + "step": 165830 + }, + { + "epoch": 4.051376639874918, + "grad_norm": 0.006048544310033321, + "learning_rate": 2.114095060399369e-07, + "loss": 0.0, + "num_input_tokens_seen": 111767208, + "step": 165835 + }, + { + "epoch": 4.0514987907067646, + "grad_norm": 0.00441456213593483, + "learning_rate": 2.1135707011506442e-07, + "loss": 0.0, + "num_input_tokens_seen": 111770088, + "step": 165840 + }, + { + "epoch": 4.051620941538612, + "grad_norm": 0.008306225761771202, + "learning_rate": 2.113046399254157e-07, + "loss": 0.0, + "num_input_tokens_seen": 111773416, + "step": 165845 + }, + { + "epoch": 4.051743092370459, + "grad_norm": 0.00023795659944880754, + "learning_rate": 2.112522154713715e-07, + "loss": 0.0, + "num_input_tokens_seen": 111776488, + "step": 165850 + }, + { + "epoch": 4.051865243202307, + "grad_norm": 3.328266757307574e-05, + "learning_rate": 2.111997967533137e-07, + "loss": 0.0, + "num_input_tokens_seen": 111779624, + "step": 165855 + }, + { + "epoch": 4.051987394034153, + "grad_norm": 0.00015622578212060034, + "learning_rate": 2.1114738377162279e-07, + "loss": 0.0, + "num_input_tokens_seen": 111783208, + "step": 165860 + }, + { + "epoch": 4.052109544866001, + "grad_norm": 0.0018481943989172578, + "learning_rate": 2.1109497652668052e-07, + "loss": 0.0, + "num_input_tokens_seen": 111786728, + "step": 165865 + }, + { + "epoch": 4.052231695697848, + "grad_norm": 0.0011088012252002954, + "learning_rate": 2.110425750188679e-07, + "loss": 0.0, + "num_input_tokens_seen": 111789864, + "step": 165870 + }, + { + "epoch": 4.052353846529694, + "grad_norm": 0.0009757218649610877, + "learning_rate": 2.1099017924856544e-07, + "loss": 0.0, + "num_input_tokens_seen": 111793256, + "step": 165875 + }, + { + "epoch": 4.052475997361542, + "grad_norm": 3.3502219594083726e-05, + "learning_rate": 2.109377892161547e-07, + "loss": 0.0, + "num_input_tokens_seen": 111796840, + "step": 165880 + }, + { + "epoch": 4.052598148193389, + "grad_norm": 0.02216157130897045, + "learning_rate": 2.108854049220169e-07, + "loss": 0.0, + "num_input_tokens_seen": 111800104, + "step": 165885 + }, + { + "epoch": 4.0527202990252365, + "grad_norm": 0.001708241063170135, + "learning_rate": 2.1083302636653234e-07, + "loss": 0.0, + "num_input_tokens_seen": 111803112, + "step": 165890 + }, + { + "epoch": 4.052842449857083, + "grad_norm": 0.0003455929982010275, + "learning_rate": 2.1078065355008257e-07, + "loss": 0.0, + "num_input_tokens_seen": 111806888, + "step": 165895 + }, + { + "epoch": 4.052964600688931, + "grad_norm": 0.00027073745150119066, + "learning_rate": 2.1072828647304795e-07, + "loss": 0.0, + "num_input_tokens_seen": 111809896, + "step": 165900 + }, + { + "epoch": 4.053086751520778, + "grad_norm": 6.761745316907763e-05, + "learning_rate": 2.1067592513580944e-07, + "loss": 0.0001, + "num_input_tokens_seen": 111813352, + "step": 165905 + }, + { + "epoch": 4.053208902352625, + "grad_norm": 0.00017820534412749112, + "learning_rate": 2.1062356953874815e-07, + "loss": 0.0, + "num_input_tokens_seen": 111816360, + "step": 165910 + }, + { + "epoch": 4.053331053184472, + "grad_norm": 0.0018837273819372058, + "learning_rate": 2.1057121968224445e-07, + "loss": 0.0, + "num_input_tokens_seen": 111819688, + "step": 165915 + }, + { + "epoch": 4.05345320401632, + "grad_norm": 6.063660293875728e-06, + "learning_rate": 2.1051887556667937e-07, + "loss": 0.0224, + "num_input_tokens_seen": 111823336, + "step": 165920 + }, + { + "epoch": 4.053575354848166, + "grad_norm": 0.002080413280054927, + "learning_rate": 2.10466537192433e-07, + "loss": 0.0, + "num_input_tokens_seen": 111826344, + "step": 165925 + }, + { + "epoch": 4.053697505680014, + "grad_norm": 0.0037358372937887907, + "learning_rate": 2.1041420455988668e-07, + "loss": 0.0, + "num_input_tokens_seen": 111829992, + "step": 165930 + }, + { + "epoch": 4.053819656511861, + "grad_norm": 0.00466674380004406, + "learning_rate": 2.1036187766942037e-07, + "loss": 0.0, + "num_input_tokens_seen": 111833512, + "step": 165935 + }, + { + "epoch": 4.053941807343708, + "grad_norm": 0.0008783154771663249, + "learning_rate": 2.103095565214149e-07, + "loss": 0.0, + "num_input_tokens_seen": 111836904, + "step": 165940 + }, + { + "epoch": 4.054063958175555, + "grad_norm": 0.0030196059960871935, + "learning_rate": 2.1025724111625099e-07, + "loss": 0.0, + "num_input_tokens_seen": 111840616, + "step": 165945 + }, + { + "epoch": 4.054186109007403, + "grad_norm": 0.00019221876573283225, + "learning_rate": 2.1020493145430851e-07, + "loss": 0.0, + "num_input_tokens_seen": 111843752, + "step": 165950 + }, + { + "epoch": 4.0543082598392495, + "grad_norm": 0.0013134641340002418, + "learning_rate": 2.1015262753596853e-07, + "loss": 0.0, + "num_input_tokens_seen": 111847720, + "step": 165955 + }, + { + "epoch": 4.054430410671097, + "grad_norm": 0.0001785226777428761, + "learning_rate": 2.1010032936161103e-07, + "loss": 0.0007, + "num_input_tokens_seen": 111851176, + "step": 165960 + }, + { + "epoch": 4.054552561502944, + "grad_norm": 0.001006326638162136, + "learning_rate": 2.100480369316162e-07, + "loss": 0.0, + "num_input_tokens_seen": 111854376, + "step": 165965 + }, + { + "epoch": 4.054674712334791, + "grad_norm": 0.004637191072106361, + "learning_rate": 2.0999575024636474e-07, + "loss": 0.0, + "num_input_tokens_seen": 111857576, + "step": 165970 + }, + { + "epoch": 4.054796863166638, + "grad_norm": 0.00010858668974833563, + "learning_rate": 2.0994346930623642e-07, + "loss": 0.0, + "num_input_tokens_seen": 111861224, + "step": 165975 + }, + { + "epoch": 4.054919013998485, + "grad_norm": 7.806902431184426e-05, + "learning_rate": 2.0989119411161194e-07, + "loss": 0.0, + "num_input_tokens_seen": 111864424, + "step": 165980 + }, + { + "epoch": 4.055041164830333, + "grad_norm": 0.0072829751297831535, + "learning_rate": 2.09838924662871e-07, + "loss": 0.0, + "num_input_tokens_seen": 111867496, + "step": 165985 + }, + { + "epoch": 4.055163315662179, + "grad_norm": 0.004414117429405451, + "learning_rate": 2.097866609603941e-07, + "loss": 0.0, + "num_input_tokens_seen": 111870568, + "step": 165990 + }, + { + "epoch": 4.055285466494027, + "grad_norm": 0.004452398046851158, + "learning_rate": 2.097344030045609e-07, + "loss": 0.0, + "num_input_tokens_seen": 111873960, + "step": 165995 + }, + { + "epoch": 4.055407617325874, + "grad_norm": 0.00011855754564749077, + "learning_rate": 2.096821507957517e-07, + "loss": 0.0, + "num_input_tokens_seen": 111877416, + "step": 166000 + }, + { + "epoch": 4.055529768157721, + "grad_norm": 0.0006512808613479137, + "learning_rate": 2.096299043343468e-07, + "loss": 0.0, + "num_input_tokens_seen": 111880360, + "step": 166005 + }, + { + "epoch": 4.055651918989568, + "grad_norm": 0.001110451645217836, + "learning_rate": 2.0957766362072548e-07, + "loss": 0.0, + "num_input_tokens_seen": 111883432, + "step": 166010 + }, + { + "epoch": 4.055774069821416, + "grad_norm": 0.0010831262916326523, + "learning_rate": 2.0952542865526824e-07, + "loss": 0.0, + "num_input_tokens_seen": 111886632, + "step": 166015 + }, + { + "epoch": 4.0558962206532625, + "grad_norm": 0.0010897867614403367, + "learning_rate": 2.094731994383544e-07, + "loss": 0.0, + "num_input_tokens_seen": 111889832, + "step": 166020 + }, + { + "epoch": 4.05601837148511, + "grad_norm": 0.004145484417676926, + "learning_rate": 2.0942097597036446e-07, + "loss": 0.0, + "num_input_tokens_seen": 111893160, + "step": 166025 + }, + { + "epoch": 4.056140522316957, + "grad_norm": 3.431236109463498e-05, + "learning_rate": 2.0936875825167744e-07, + "loss": 0.0005, + "num_input_tokens_seen": 111896936, + "step": 166030 + }, + { + "epoch": 4.0562626731488045, + "grad_norm": 0.0006043767789378762, + "learning_rate": 2.093165462826736e-07, + "loss": 0.0, + "num_input_tokens_seen": 111900712, + "step": 166035 + }, + { + "epoch": 4.056384823980651, + "grad_norm": 0.002768630860373378, + "learning_rate": 2.0926434006373261e-07, + "loss": 0.0, + "num_input_tokens_seen": 111903976, + "step": 166040 + }, + { + "epoch": 4.056506974812499, + "grad_norm": 0.0006307873409241438, + "learning_rate": 2.0921213959523388e-07, + "loss": 0.0, + "num_input_tokens_seen": 111907176, + "step": 166045 + }, + { + "epoch": 4.056629125644346, + "grad_norm": 0.0002128913183696568, + "learning_rate": 2.091599448775574e-07, + "loss": 0.0, + "num_input_tokens_seen": 111910440, + "step": 166050 + }, + { + "epoch": 4.056751276476192, + "grad_norm": 0.001996406354010105, + "learning_rate": 2.091077559110822e-07, + "loss": 0.0246, + "num_input_tokens_seen": 111913512, + "step": 166055 + }, + { + "epoch": 4.05687342730804, + "grad_norm": 0.001469214097596705, + "learning_rate": 2.0905557269618845e-07, + "loss": 0.0, + "num_input_tokens_seen": 111916840, + "step": 166060 + }, + { + "epoch": 4.056995578139887, + "grad_norm": 0.0005221847095526755, + "learning_rate": 2.0900339523325528e-07, + "loss": 0.0, + "num_input_tokens_seen": 111920296, + "step": 166065 + }, + { + "epoch": 4.057117728971734, + "grad_norm": 0.0004611036856658757, + "learning_rate": 2.0895122352266194e-07, + "loss": 0.0, + "num_input_tokens_seen": 111923624, + "step": 166070 + }, + { + "epoch": 4.057239879803581, + "grad_norm": 0.004240771755576134, + "learning_rate": 2.0889905756478833e-07, + "loss": 0.0, + "num_input_tokens_seen": 111926888, + "step": 166075 + }, + { + "epoch": 4.057362030635429, + "grad_norm": 0.0005182889872230589, + "learning_rate": 2.0884689736001316e-07, + "loss": 0.0, + "num_input_tokens_seen": 111930152, + "step": 166080 + }, + { + "epoch": 4.0574841814672755, + "grad_norm": 0.00043798956903629005, + "learning_rate": 2.0879474290871656e-07, + "loss": 0.0, + "num_input_tokens_seen": 111933544, + "step": 166085 + }, + { + "epoch": 4.057606332299123, + "grad_norm": 0.0013579648220911622, + "learning_rate": 2.0874259421127706e-07, + "loss": 0.0, + "num_input_tokens_seen": 111936616, + "step": 166090 + }, + { + "epoch": 4.05772848313097, + "grad_norm": 0.0017074431525543332, + "learning_rate": 2.0869045126807427e-07, + "loss": 0.0, + "num_input_tokens_seen": 111939496, + "step": 166095 + }, + { + "epoch": 4.0578506339628175, + "grad_norm": 5.534076990443282e-05, + "learning_rate": 2.0863831407948763e-07, + "loss": 0.0, + "num_input_tokens_seen": 111942888, + "step": 166100 + }, + { + "epoch": 4.057972784794664, + "grad_norm": 0.0015134341083467007, + "learning_rate": 2.0858618264589577e-07, + "loss": 0.0, + "num_input_tokens_seen": 111946344, + "step": 166105 + }, + { + "epoch": 4.058094935626512, + "grad_norm": 38.33672332763672, + "learning_rate": 2.0853405696767823e-07, + "loss": 0.0293, + "num_input_tokens_seen": 111949288, + "step": 166110 + }, + { + "epoch": 4.058217086458359, + "grad_norm": 0.008847609162330627, + "learning_rate": 2.0848193704521378e-07, + "loss": 0.0, + "num_input_tokens_seen": 111952616, + "step": 166115 + }, + { + "epoch": 4.058339237290206, + "grad_norm": 5.5585911468369886e-05, + "learning_rate": 2.0842982287888145e-07, + "loss": 0.0, + "num_input_tokens_seen": 111956328, + "step": 166120 + }, + { + "epoch": 4.058461388122053, + "grad_norm": 0.0008487804443575442, + "learning_rate": 2.0837771446906073e-07, + "loss": 0.0, + "num_input_tokens_seen": 111959400, + "step": 166125 + }, + { + "epoch": 4.058583538953901, + "grad_norm": 0.00027049699565395713, + "learning_rate": 2.0832561181612985e-07, + "loss": 0.0, + "num_input_tokens_seen": 111962856, + "step": 166130 + }, + { + "epoch": 4.058705689785747, + "grad_norm": 0.002194001106545329, + "learning_rate": 2.082735149204683e-07, + "loss": 0.0, + "num_input_tokens_seen": 111966184, + "step": 166135 + }, + { + "epoch": 4.058827840617594, + "grad_norm": 0.0029670060612261295, + "learning_rate": 2.0822142378245444e-07, + "loss": 0.0, + "num_input_tokens_seen": 111969448, + "step": 166140 + }, + { + "epoch": 4.058949991449442, + "grad_norm": 0.0005519984406419098, + "learning_rate": 2.0816933840246776e-07, + "loss": 0.0418, + "num_input_tokens_seen": 111972904, + "step": 166145 + }, + { + "epoch": 4.0590721422812885, + "grad_norm": 0.0005892548360861838, + "learning_rate": 2.0811725878088615e-07, + "loss": 0.0, + "num_input_tokens_seen": 111975976, + "step": 166150 + }, + { + "epoch": 4.059194293113136, + "grad_norm": 0.0005409923614934087, + "learning_rate": 2.0806518491808923e-07, + "loss": 0.0004, + "num_input_tokens_seen": 111979624, + "step": 166155 + }, + { + "epoch": 4.059316443944983, + "grad_norm": 0.011323503218591213, + "learning_rate": 2.08013116814455e-07, + "loss": 0.0, + "num_input_tokens_seen": 111982952, + "step": 166160 + }, + { + "epoch": 4.0594385947768306, + "grad_norm": 0.03984846919775009, + "learning_rate": 2.079610544703626e-07, + "loss": 0.0, + "num_input_tokens_seen": 111986088, + "step": 166165 + }, + { + "epoch": 4.059560745608677, + "grad_norm": 0.01349696982651949, + "learning_rate": 2.0790899788619033e-07, + "loss": 0.0, + "num_input_tokens_seen": 111989416, + "step": 166170 + }, + { + "epoch": 4.059682896440525, + "grad_norm": 0.0021861952263861895, + "learning_rate": 2.0785694706231693e-07, + "loss": 0.0, + "num_input_tokens_seen": 111992360, + "step": 166175 + }, + { + "epoch": 4.059805047272372, + "grad_norm": 6.701894744765013e-05, + "learning_rate": 2.0780490199912103e-07, + "loss": 0.0, + "num_input_tokens_seen": 111996072, + "step": 166180 + }, + { + "epoch": 4.059927198104219, + "grad_norm": 0.014640755020081997, + "learning_rate": 2.0775286269698066e-07, + "loss": 0.0, + "num_input_tokens_seen": 111999272, + "step": 166185 + }, + { + "epoch": 4.060049348936066, + "grad_norm": 0.00035089225275442004, + "learning_rate": 2.077008291562745e-07, + "loss": 0.0, + "num_input_tokens_seen": 112002216, + "step": 166190 + }, + { + "epoch": 4.060171499767914, + "grad_norm": 4.5431013859342784e-05, + "learning_rate": 2.076488013773814e-07, + "loss": 0.0, + "num_input_tokens_seen": 112005288, + "step": 166195 + }, + { + "epoch": 4.06029365059976, + "grad_norm": 4.9018626668839715e-06, + "learning_rate": 2.0759677936067899e-07, + "loss": 0.0, + "num_input_tokens_seen": 112008808, + "step": 166200 + }, + { + "epoch": 4.060415801431608, + "grad_norm": 0.0014554978115484118, + "learning_rate": 2.0754476310654611e-07, + "loss": 0.0, + "num_input_tokens_seen": 112011880, + "step": 166205 + }, + { + "epoch": 4.060537952263455, + "grad_norm": 0.0021777262445539236, + "learning_rate": 2.074927526153607e-07, + "loss": 0.0, + "num_input_tokens_seen": 112015336, + "step": 166210 + }, + { + "epoch": 4.0606601030953025, + "grad_norm": 0.002060574246570468, + "learning_rate": 2.074407478875012e-07, + "loss": 0.0, + "num_input_tokens_seen": 112018536, + "step": 166215 + }, + { + "epoch": 4.060782253927149, + "grad_norm": 0.0008180902805179358, + "learning_rate": 2.073887489233459e-07, + "loss": 0.0, + "num_input_tokens_seen": 112021480, + "step": 166220 + }, + { + "epoch": 4.060904404758997, + "grad_norm": 0.00047382028424181044, + "learning_rate": 2.0733675572327258e-07, + "loss": 0.0, + "num_input_tokens_seen": 112024488, + "step": 166225 + }, + { + "epoch": 4.061026555590844, + "grad_norm": 0.004084159154444933, + "learning_rate": 2.0728476828765996e-07, + "loss": 0.0, + "num_input_tokens_seen": 112027688, + "step": 166230 + }, + { + "epoch": 4.06114870642269, + "grad_norm": 0.0006773903151042759, + "learning_rate": 2.0723278661688526e-07, + "loss": 0.0475, + "num_input_tokens_seen": 112031272, + "step": 166235 + }, + { + "epoch": 4.061270857254538, + "grad_norm": 0.0005431032041087747, + "learning_rate": 2.0718081071132732e-07, + "loss": 0.0, + "num_input_tokens_seen": 112034600, + "step": 166240 + }, + { + "epoch": 4.061393008086385, + "grad_norm": 5.953190702712163e-05, + "learning_rate": 2.0712884057136348e-07, + "loss": 0.0, + "num_input_tokens_seen": 112037672, + "step": 166245 + }, + { + "epoch": 4.061515158918232, + "grad_norm": 0.00018572367844171822, + "learning_rate": 2.07076876197372e-07, + "loss": 0.0, + "num_input_tokens_seen": 112041192, + "step": 166250 + }, + { + "epoch": 4.061637309750079, + "grad_norm": 0.0017650712979957461, + "learning_rate": 2.0702491758973105e-07, + "loss": 0.0, + "num_input_tokens_seen": 112044584, + "step": 166255 + }, + { + "epoch": 4.061759460581927, + "grad_norm": 0.00657630106434226, + "learning_rate": 2.0697296474881787e-07, + "loss": 0.0, + "num_input_tokens_seen": 112047784, + "step": 166260 + }, + { + "epoch": 4.0618816114137735, + "grad_norm": 0.00039903729339130223, + "learning_rate": 2.069210176750108e-07, + "loss": 0.0, + "num_input_tokens_seen": 112051112, + "step": 166265 + }, + { + "epoch": 4.062003762245621, + "grad_norm": 0.01160856056958437, + "learning_rate": 2.0686907636868746e-07, + "loss": 0.0, + "num_input_tokens_seen": 112054184, + "step": 166270 + }, + { + "epoch": 4.062125913077468, + "grad_norm": 0.0013919977936893702, + "learning_rate": 2.0681714083022527e-07, + "loss": 0.0, + "num_input_tokens_seen": 112057448, + "step": 166275 + }, + { + "epoch": 4.0622480639093155, + "grad_norm": 0.0015453466912731528, + "learning_rate": 2.0676521106000245e-07, + "loss": 0.0, + "num_input_tokens_seen": 112060520, + "step": 166280 + }, + { + "epoch": 4.062370214741162, + "grad_norm": 0.0008798211347311735, + "learning_rate": 2.0671328705839608e-07, + "loss": 0.0, + "num_input_tokens_seen": 112063848, + "step": 166285 + }, + { + "epoch": 4.06249236557301, + "grad_norm": 0.005919753108173609, + "learning_rate": 2.066613688257842e-07, + "loss": 0.0, + "num_input_tokens_seen": 112067240, + "step": 166290 + }, + { + "epoch": 4.062614516404857, + "grad_norm": 0.000499585410580039, + "learning_rate": 2.066094563625441e-07, + "loss": 0.0, + "num_input_tokens_seen": 112070248, + "step": 166295 + }, + { + "epoch": 4.062736667236704, + "grad_norm": 0.0014772225404158235, + "learning_rate": 2.065575496690537e-07, + "loss": 0.0, + "num_input_tokens_seen": 112073832, + "step": 166300 + }, + { + "epoch": 4.062858818068551, + "grad_norm": 9.564452193444595e-05, + "learning_rate": 2.0650564874568988e-07, + "loss": 0.0, + "num_input_tokens_seen": 112077096, + "step": 166305 + }, + { + "epoch": 4.062980968900399, + "grad_norm": 6.883578316774219e-05, + "learning_rate": 2.0645375359283045e-07, + "loss": 0.0, + "num_input_tokens_seen": 112080296, + "step": 166310 + }, + { + "epoch": 4.063103119732245, + "grad_norm": 0.037576161324977875, + "learning_rate": 2.0640186421085303e-07, + "loss": 0.0, + "num_input_tokens_seen": 112083880, + "step": 166315 + }, + { + "epoch": 4.063225270564092, + "grad_norm": 0.0019669902976602316, + "learning_rate": 2.063499806001344e-07, + "loss": 0.0, + "num_input_tokens_seen": 112086824, + "step": 166320 + }, + { + "epoch": 4.06334742139594, + "grad_norm": 0.00033250940032303333, + "learning_rate": 2.0629810276105252e-07, + "loss": 0.0, + "num_input_tokens_seen": 112090024, + "step": 166325 + }, + { + "epoch": 4.0634695722277865, + "grad_norm": 0.002947803121060133, + "learning_rate": 2.0624623069398407e-07, + "loss": 0.0, + "num_input_tokens_seen": 112093416, + "step": 166330 + }, + { + "epoch": 4.063591723059634, + "grad_norm": 0.0019779824651777744, + "learning_rate": 2.061943643993067e-07, + "loss": 0.0, + "num_input_tokens_seen": 112097000, + "step": 166335 + }, + { + "epoch": 4.063713873891481, + "grad_norm": 0.004801144357770681, + "learning_rate": 2.061425038773972e-07, + "loss": 0.0, + "num_input_tokens_seen": 112100456, + "step": 166340 + }, + { + "epoch": 4.0638360247233285, + "grad_norm": 0.002539390465244651, + "learning_rate": 2.0609064912863284e-07, + "loss": 0.0, + "num_input_tokens_seen": 112103656, + "step": 166345 + }, + { + "epoch": 4.063958175555175, + "grad_norm": 0.0009300903766416013, + "learning_rate": 2.0603880015339115e-07, + "loss": 0.0, + "num_input_tokens_seen": 112107048, + "step": 166350 + }, + { + "epoch": 4.064080326387023, + "grad_norm": 0.0006863299640826881, + "learning_rate": 2.059869569520486e-07, + "loss": 0.0, + "num_input_tokens_seen": 112110696, + "step": 166355 + }, + { + "epoch": 4.06420247721887, + "grad_norm": 0.0005132516380399466, + "learning_rate": 2.0593511952498277e-07, + "loss": 0.0, + "num_input_tokens_seen": 112113896, + "step": 166360 + }, + { + "epoch": 4.064324628050717, + "grad_norm": 0.00027452048379927874, + "learning_rate": 2.0588328787257004e-07, + "loss": 0.0, + "num_input_tokens_seen": 112117224, + "step": 166365 + }, + { + "epoch": 4.064446778882564, + "grad_norm": 0.00030005615553818643, + "learning_rate": 2.0583146199518787e-07, + "loss": 0.0, + "num_input_tokens_seen": 112120552, + "step": 166370 + }, + { + "epoch": 4.064568929714412, + "grad_norm": 0.01976819522678852, + "learning_rate": 2.0577964189321284e-07, + "loss": 0.0, + "num_input_tokens_seen": 112123880, + "step": 166375 + }, + { + "epoch": 4.064691080546258, + "grad_norm": 0.001043083262629807, + "learning_rate": 2.0572782756702168e-07, + "loss": 0.0, + "num_input_tokens_seen": 112127080, + "step": 166380 + }, + { + "epoch": 4.064813231378106, + "grad_norm": 0.0010621962137520313, + "learning_rate": 2.0567601901699173e-07, + "loss": 0.0, + "num_input_tokens_seen": 112130664, + "step": 166385 + }, + { + "epoch": 4.064935382209953, + "grad_norm": 0.0005313365836627781, + "learning_rate": 2.0562421624349903e-07, + "loss": 0.0, + "num_input_tokens_seen": 112133992, + "step": 166390 + }, + { + "epoch": 4.0650575330418, + "grad_norm": 3.122011185041629e-05, + "learning_rate": 2.0557241924692103e-07, + "loss": 0.0, + "num_input_tokens_seen": 112137448, + "step": 166395 + }, + { + "epoch": 4.065179683873647, + "grad_norm": 0.004067976027727127, + "learning_rate": 2.0552062802763382e-07, + "loss": 0.0, + "num_input_tokens_seen": 112140584, + "step": 166400 + }, + { + "epoch": 4.065301834705495, + "grad_norm": 0.0010395898716524243, + "learning_rate": 2.0546884258601427e-07, + "loss": 0.0001, + "num_input_tokens_seen": 112143976, + "step": 166405 + }, + { + "epoch": 4.0654239855373415, + "grad_norm": 0.00014114049554336816, + "learning_rate": 2.0541706292243921e-07, + "loss": 0.0, + "num_input_tokens_seen": 112147560, + "step": 166410 + }, + { + "epoch": 4.065546136369188, + "grad_norm": 0.001295996829867363, + "learning_rate": 2.0536528903728478e-07, + "loss": 0.0, + "num_input_tokens_seen": 112151016, + "step": 166415 + }, + { + "epoch": 4.065668287201036, + "grad_norm": 0.00015398616960737854, + "learning_rate": 2.053135209309279e-07, + "loss": 0.0, + "num_input_tokens_seen": 112154408, + "step": 166420 + }, + { + "epoch": 4.065790438032883, + "grad_norm": 5.002156103728339e-05, + "learning_rate": 2.0526175860374462e-07, + "loss": 0.0, + "num_input_tokens_seen": 112157864, + "step": 166425 + }, + { + "epoch": 4.06591258886473, + "grad_norm": 0.021989166736602783, + "learning_rate": 2.0521000205611162e-07, + "loss": 0.0, + "num_input_tokens_seen": 112161000, + "step": 166430 + }, + { + "epoch": 4.066034739696577, + "grad_norm": 0.00031291740015149117, + "learning_rate": 2.0515825128840548e-07, + "loss": 0.0, + "num_input_tokens_seen": 112164008, + "step": 166435 + }, + { + "epoch": 4.066156890528425, + "grad_norm": 0.00011651839304249734, + "learning_rate": 2.0510650630100212e-07, + "loss": 0.0, + "num_input_tokens_seen": 112167272, + "step": 166440 + }, + { + "epoch": 4.066279041360271, + "grad_norm": 0.00010879627370741218, + "learning_rate": 2.0505476709427827e-07, + "loss": 0.0, + "num_input_tokens_seen": 112170600, + "step": 166445 + }, + { + "epoch": 4.066401192192119, + "grad_norm": 0.0010120809311047196, + "learning_rate": 2.050030336686097e-07, + "loss": 0.0, + "num_input_tokens_seen": 112174056, + "step": 166450 + }, + { + "epoch": 4.066523343023966, + "grad_norm": 0.0003149090916849673, + "learning_rate": 2.0495130602437315e-07, + "loss": 0.0, + "num_input_tokens_seen": 112177192, + "step": 166455 + }, + { + "epoch": 4.066645493855813, + "grad_norm": 0.0001731503289192915, + "learning_rate": 2.048995841619443e-07, + "loss": 0.0, + "num_input_tokens_seen": 112180776, + "step": 166460 + }, + { + "epoch": 4.06676764468766, + "grad_norm": 0.00022798238205723464, + "learning_rate": 2.0484786808169975e-07, + "loss": 0.0, + "num_input_tokens_seen": 112184168, + "step": 166465 + }, + { + "epoch": 4.066889795519508, + "grad_norm": 0.002712154295295477, + "learning_rate": 2.0479615778401517e-07, + "loss": 0.0, + "num_input_tokens_seen": 112187880, + "step": 166470 + }, + { + "epoch": 4.0670119463513545, + "grad_norm": 0.00457811402156949, + "learning_rate": 2.0474445326926703e-07, + "loss": 0.0, + "num_input_tokens_seen": 112191592, + "step": 166475 + }, + { + "epoch": 4.067134097183202, + "grad_norm": 0.00020053704793099314, + "learning_rate": 2.0469275453783098e-07, + "loss": 0.0, + "num_input_tokens_seen": 112194792, + "step": 166480 + }, + { + "epoch": 4.067256248015049, + "grad_norm": 0.0011597948614507914, + "learning_rate": 2.046410615900832e-07, + "loss": 0.0, + "num_input_tokens_seen": 112198376, + "step": 166485 + }, + { + "epoch": 4.067378398846897, + "grad_norm": 0.0028763189911842346, + "learning_rate": 2.0458937442639968e-07, + "loss": 0.0, + "num_input_tokens_seen": 112201896, + "step": 166490 + }, + { + "epoch": 4.067500549678743, + "grad_norm": 0.008178922347724438, + "learning_rate": 2.0453769304715586e-07, + "loss": 0.0, + "num_input_tokens_seen": 112205160, + "step": 166495 + }, + { + "epoch": 4.06762270051059, + "grad_norm": 0.00021302708773873746, + "learning_rate": 2.0448601745272797e-07, + "loss": 0.0, + "num_input_tokens_seen": 112208232, + "step": 166500 + }, + { + "epoch": 4.067744851342438, + "grad_norm": 0.0008370587020181119, + "learning_rate": 2.044343476434919e-07, + "loss": 0.0, + "num_input_tokens_seen": 112212008, + "step": 166505 + }, + { + "epoch": 4.067867002174284, + "grad_norm": 0.001814281684346497, + "learning_rate": 2.0438268361982303e-07, + "loss": 0.0, + "num_input_tokens_seen": 112215464, + "step": 166510 + }, + { + "epoch": 4.067989153006132, + "grad_norm": 0.0003151059499941766, + "learning_rate": 2.0433102538209745e-07, + "loss": 0.0, + "num_input_tokens_seen": 112218856, + "step": 166515 + }, + { + "epoch": 4.068111303837979, + "grad_norm": 0.00011449768499005586, + "learning_rate": 2.0427937293069042e-07, + "loss": 0.0, + "num_input_tokens_seen": 112221992, + "step": 166520 + }, + { + "epoch": 4.068233454669826, + "grad_norm": 0.00038926705019548535, + "learning_rate": 2.0422772626597796e-07, + "loss": 0.0512, + "num_input_tokens_seen": 112225128, + "step": 166525 + }, + { + "epoch": 4.068355605501673, + "grad_norm": 19.979957580566406, + "learning_rate": 2.0417608538833563e-07, + "loss": 0.0204, + "num_input_tokens_seen": 112228328, + "step": 166530 + }, + { + "epoch": 4.068477756333521, + "grad_norm": 0.003946941811591387, + "learning_rate": 2.0412445029813863e-07, + "loss": 0.0, + "num_input_tokens_seen": 112231528, + "step": 166535 + }, + { + "epoch": 4.068599907165368, + "grad_norm": 0.04863838851451874, + "learning_rate": 2.0407282099576295e-07, + "loss": 0.0001, + "num_input_tokens_seen": 112234600, + "step": 166540 + }, + { + "epoch": 4.068722057997215, + "grad_norm": 0.00047266719047911465, + "learning_rate": 2.0402119748158352e-07, + "loss": 0.0, + "num_input_tokens_seen": 112237800, + "step": 166545 + }, + { + "epoch": 4.068844208829062, + "grad_norm": 0.0022663248237222433, + "learning_rate": 2.039695797559763e-07, + "loss": 0.0, + "num_input_tokens_seen": 112240872, + "step": 166550 + }, + { + "epoch": 4.06896635966091, + "grad_norm": 0.001182141830213368, + "learning_rate": 2.0391796781931615e-07, + "loss": 0.0, + "num_input_tokens_seen": 112244392, + "step": 166555 + }, + { + "epoch": 4.069088510492756, + "grad_norm": 0.0005252945702522993, + "learning_rate": 2.0386636167197868e-07, + "loss": 0.0, + "num_input_tokens_seen": 112247976, + "step": 166560 + }, + { + "epoch": 4.069210661324604, + "grad_norm": 0.00026759880711324513, + "learning_rate": 2.038147613143394e-07, + "loss": 0.0, + "num_input_tokens_seen": 112251112, + "step": 166565 + }, + { + "epoch": 4.069332812156451, + "grad_norm": 0.0017746612429618835, + "learning_rate": 2.0376316674677306e-07, + "loss": 0.0, + "num_input_tokens_seen": 112254184, + "step": 166570 + }, + { + "epoch": 4.069454962988298, + "grad_norm": 0.0013021057238802314, + "learning_rate": 2.0371157796965544e-07, + "loss": 0.0, + "num_input_tokens_seen": 112257576, + "step": 166575 + }, + { + "epoch": 4.069577113820145, + "grad_norm": 0.039894040673971176, + "learning_rate": 2.0365999498336138e-07, + "loss": 0.0, + "num_input_tokens_seen": 112260904, + "step": 166580 + }, + { + "epoch": 4.069699264651993, + "grad_norm": 0.0003660069196484983, + "learning_rate": 2.0360841778826576e-07, + "loss": 0.0, + "num_input_tokens_seen": 112264040, + "step": 166585 + }, + { + "epoch": 4.0698214154838395, + "grad_norm": 5.454904749058187e-05, + "learning_rate": 2.0355684638474412e-07, + "loss": 0.0, + "num_input_tokens_seen": 112267240, + "step": 166590 + }, + { + "epoch": 4.069943566315686, + "grad_norm": 0.0005420465604402125, + "learning_rate": 2.035052807731712e-07, + "loss": 0.0, + "num_input_tokens_seen": 112270696, + "step": 166595 + }, + { + "epoch": 4.070065717147534, + "grad_norm": 0.000395093928091228, + "learning_rate": 2.034537209539222e-07, + "loss": 0.0, + "num_input_tokens_seen": 112274152, + "step": 166600 + }, + { + "epoch": 4.070187867979381, + "grad_norm": 0.00028210054733790457, + "learning_rate": 2.0340216692737188e-07, + "loss": 0.0, + "num_input_tokens_seen": 112278056, + "step": 166605 + }, + { + "epoch": 4.070310018811228, + "grad_norm": 0.0013541615335270762, + "learning_rate": 2.0335061869389547e-07, + "loss": 0.0, + "num_input_tokens_seen": 112281448, + "step": 166610 + }, + { + "epoch": 4.070432169643075, + "grad_norm": 5.062710988568142e-05, + "learning_rate": 2.0329907625386733e-07, + "loss": 0.0, + "num_input_tokens_seen": 112284904, + "step": 166615 + }, + { + "epoch": 4.070554320474923, + "grad_norm": 0.0002720316406339407, + "learning_rate": 2.0324753960766262e-07, + "loss": 0.0, + "num_input_tokens_seen": 112288616, + "step": 166620 + }, + { + "epoch": 4.070676471306769, + "grad_norm": 0.039632584899663925, + "learning_rate": 2.0319600875565635e-07, + "loss": 0.0, + "num_input_tokens_seen": 112291752, + "step": 166625 + }, + { + "epoch": 4.070798622138617, + "grad_norm": 0.0007308509666472673, + "learning_rate": 2.031444836982228e-07, + "loss": 0.0, + "num_input_tokens_seen": 112295400, + "step": 166630 + }, + { + "epoch": 4.070920772970464, + "grad_norm": 0.0018601133488118649, + "learning_rate": 2.030929644357371e-07, + "loss": 0.0, + "num_input_tokens_seen": 112298664, + "step": 166635 + }, + { + "epoch": 4.071042923802311, + "grad_norm": 7.507337431889027e-05, + "learning_rate": 2.030414509685734e-07, + "loss": 0.0, + "num_input_tokens_seen": 112301736, + "step": 166640 + }, + { + "epoch": 4.071165074634158, + "grad_norm": 0.016003567725419998, + "learning_rate": 2.0298994329710694e-07, + "loss": 0.0, + "num_input_tokens_seen": 112305064, + "step": 166645 + }, + { + "epoch": 4.071287225466006, + "grad_norm": 0.0015702687669545412, + "learning_rate": 2.029384414217118e-07, + "loss": 0.0, + "num_input_tokens_seen": 112308328, + "step": 166650 + }, + { + "epoch": 4.0714093762978525, + "grad_norm": 0.00132664293050766, + "learning_rate": 2.0288694534276262e-07, + "loss": 0.0, + "num_input_tokens_seen": 112311592, + "step": 166655 + }, + { + "epoch": 4.0715315271297, + "grad_norm": 0.003941015340387821, + "learning_rate": 2.0283545506063426e-07, + "loss": 0.0, + "num_input_tokens_seen": 112314728, + "step": 166660 + }, + { + "epoch": 4.071653677961547, + "grad_norm": 0.0004780443850904703, + "learning_rate": 2.0278397057570063e-07, + "loss": 0.0005, + "num_input_tokens_seen": 112317672, + "step": 166665 + }, + { + "epoch": 4.0717758287933945, + "grad_norm": 0.026970183476805687, + "learning_rate": 2.0273249188833652e-07, + "loss": 0.0, + "num_input_tokens_seen": 112321128, + "step": 166670 + }, + { + "epoch": 4.071897979625241, + "grad_norm": 0.006872775498777628, + "learning_rate": 2.026810189989161e-07, + "loss": 0.0667, + "num_input_tokens_seen": 112324840, + "step": 166675 + }, + { + "epoch": 4.072020130457088, + "grad_norm": 0.0005837523494847119, + "learning_rate": 2.0262955190781393e-07, + "loss": 0.0, + "num_input_tokens_seen": 112327848, + "step": 166680 + }, + { + "epoch": 4.072142281288936, + "grad_norm": 0.001965815667062998, + "learning_rate": 2.025780906154041e-07, + "loss": 0.0, + "num_input_tokens_seen": 112331240, + "step": 166685 + }, + { + "epoch": 4.072264432120782, + "grad_norm": 0.00028454093262553215, + "learning_rate": 2.025266351220607e-07, + "loss": 0.0, + "num_input_tokens_seen": 112334312, + "step": 166690 + }, + { + "epoch": 4.07238658295263, + "grad_norm": 0.00013930317072663456, + "learning_rate": 2.0247518542815822e-07, + "loss": 0.0, + "num_input_tokens_seen": 112337256, + "step": 166695 + }, + { + "epoch": 4.072508733784477, + "grad_norm": 3.846707477350719e-05, + "learning_rate": 2.024237415340706e-07, + "loss": 0.0, + "num_input_tokens_seen": 112340648, + "step": 166700 + }, + { + "epoch": 4.072630884616324, + "grad_norm": 0.0019840323366224766, + "learning_rate": 2.023723034401722e-07, + "loss": 0.0, + "num_input_tokens_seen": 112343720, + "step": 166705 + }, + { + "epoch": 4.072753035448171, + "grad_norm": 0.0004893506411463022, + "learning_rate": 2.0232087114683672e-07, + "loss": 0.0, + "num_input_tokens_seen": 112346792, + "step": 166710 + }, + { + "epoch": 4.072875186280019, + "grad_norm": 0.0010090161813423038, + "learning_rate": 2.022694446544385e-07, + "loss": 0.0, + "num_input_tokens_seen": 112350888, + "step": 166715 + }, + { + "epoch": 4.0729973371118655, + "grad_norm": 0.00045400112867355347, + "learning_rate": 2.0221802396335164e-07, + "loss": 0.0003, + "num_input_tokens_seen": 112354408, + "step": 166720 + }, + { + "epoch": 4.073119487943713, + "grad_norm": 0.00026314205024391413, + "learning_rate": 2.0216660907394955e-07, + "loss": 0.0, + "num_input_tokens_seen": 112357544, + "step": 166725 + }, + { + "epoch": 4.07324163877556, + "grad_norm": 0.004530659411102533, + "learning_rate": 2.0211519998660687e-07, + "loss": 0.0001, + "num_input_tokens_seen": 112361000, + "step": 166730 + }, + { + "epoch": 4.0733637896074075, + "grad_norm": 0.0017997390823438764, + "learning_rate": 2.020637967016967e-07, + "loss": 0.0, + "num_input_tokens_seen": 112363944, + "step": 166735 + }, + { + "epoch": 4.073485940439254, + "grad_norm": 0.005322501994669437, + "learning_rate": 2.0201239921959346e-07, + "loss": 0.0, + "num_input_tokens_seen": 112367080, + "step": 166740 + }, + { + "epoch": 4.073608091271102, + "grad_norm": 0.0005989481578581035, + "learning_rate": 2.0196100754067046e-07, + "loss": 0.0, + "num_input_tokens_seen": 112370472, + "step": 166745 + }, + { + "epoch": 4.073730242102949, + "grad_norm": 0.003723002038896084, + "learning_rate": 2.0190962166530167e-07, + "loss": 0.0, + "num_input_tokens_seen": 112373800, + "step": 166750 + }, + { + "epoch": 4.073852392934796, + "grad_norm": 0.000128319050418213, + "learning_rate": 2.018582415938611e-07, + "loss": 0.0, + "num_input_tokens_seen": 112376872, + "step": 166755 + }, + { + "epoch": 4.073974543766643, + "grad_norm": 0.016991496086120605, + "learning_rate": 2.018068673267217e-07, + "loss": 0.0, + "num_input_tokens_seen": 112379816, + "step": 166760 + }, + { + "epoch": 4.07409669459849, + "grad_norm": 1.5704474208178e-05, + "learning_rate": 2.017554988642578e-07, + "loss": 0.0, + "num_input_tokens_seen": 112383144, + "step": 166765 + }, + { + "epoch": 4.074218845430337, + "grad_norm": 0.0006832077633589506, + "learning_rate": 2.0170413620684222e-07, + "loss": 0.0, + "num_input_tokens_seen": 112386344, + "step": 166770 + }, + { + "epoch": 4.074340996262184, + "grad_norm": 0.00029440299840644, + "learning_rate": 2.0165277935484926e-07, + "loss": 0.0, + "num_input_tokens_seen": 112389672, + "step": 166775 + }, + { + "epoch": 4.074463147094032, + "grad_norm": 4.601407272275537e-05, + "learning_rate": 2.016014283086518e-07, + "loss": 0.0, + "num_input_tokens_seen": 112392744, + "step": 166780 + }, + { + "epoch": 4.0745852979258785, + "grad_norm": 0.0020400311332195997, + "learning_rate": 2.0155008306862366e-07, + "loss": 0.0, + "num_input_tokens_seen": 112396136, + "step": 166785 + }, + { + "epoch": 4.074707448757726, + "grad_norm": 0.0016668371390551329, + "learning_rate": 2.0149874363513775e-07, + "loss": 0.0, + "num_input_tokens_seen": 112399720, + "step": 166790 + }, + { + "epoch": 4.074829599589573, + "grad_norm": 0.006800004281103611, + "learning_rate": 2.0144741000856813e-07, + "loss": 0.0, + "num_input_tokens_seen": 112402536, + "step": 166795 + }, + { + "epoch": 4.0749517504214205, + "grad_norm": 0.0035012788139283657, + "learning_rate": 2.0139608218928772e-07, + "loss": 0.0, + "num_input_tokens_seen": 112406120, + "step": 166800 + }, + { + "epoch": 4.075073901253267, + "grad_norm": 0.00147433637175709, + "learning_rate": 2.0134476017766943e-07, + "loss": 0.0, + "num_input_tokens_seen": 112409384, + "step": 166805 + }, + { + "epoch": 4.075196052085115, + "grad_norm": 0.020915621891617775, + "learning_rate": 2.0129344397408698e-07, + "loss": 0.0, + "num_input_tokens_seen": 112412648, + "step": 166810 + }, + { + "epoch": 4.075318202916962, + "grad_norm": 0.00011061589611927047, + "learning_rate": 2.0124213357891362e-07, + "loss": 0.0, + "num_input_tokens_seen": 112415720, + "step": 166815 + }, + { + "epoch": 4.075440353748809, + "grad_norm": 3.273623588029295e-05, + "learning_rate": 2.0119082899252216e-07, + "loss": 0.0, + "num_input_tokens_seen": 112419496, + "step": 166820 + }, + { + "epoch": 4.075562504580656, + "grad_norm": 0.00029083856497891247, + "learning_rate": 2.0113953021528595e-07, + "loss": 0.0, + "num_input_tokens_seen": 112422824, + "step": 166825 + }, + { + "epoch": 4.075684655412504, + "grad_norm": 0.00011449142039055005, + "learning_rate": 2.0108823724757772e-07, + "loss": 0.0, + "num_input_tokens_seen": 112426280, + "step": 166830 + }, + { + "epoch": 4.07580680624435, + "grad_norm": 0.0016644089482724667, + "learning_rate": 2.0103695008977083e-07, + "loss": 0.0, + "num_input_tokens_seen": 112429608, + "step": 166835 + }, + { + "epoch": 4.075928957076198, + "grad_norm": 0.0005640396266244352, + "learning_rate": 2.0098566874223833e-07, + "loss": 0.0, + "num_input_tokens_seen": 112432744, + "step": 166840 + }, + { + "epoch": 4.076051107908045, + "grad_norm": 0.0011225900379940867, + "learning_rate": 2.0093439320535267e-07, + "loss": 0.0, + "num_input_tokens_seen": 112436392, + "step": 166845 + }, + { + "epoch": 4.0761732587398924, + "grad_norm": 0.0030262626241892576, + "learning_rate": 2.008831234794872e-07, + "loss": 0.0, + "num_input_tokens_seen": 112440104, + "step": 166850 + }, + { + "epoch": 4.076295409571739, + "grad_norm": 0.0018415100639685988, + "learning_rate": 2.0083185956501447e-07, + "loss": 0.0, + "num_input_tokens_seen": 112443112, + "step": 166855 + }, + { + "epoch": 4.076417560403586, + "grad_norm": 0.0006190946442075074, + "learning_rate": 2.0078060146230758e-07, + "loss": 0.0, + "num_input_tokens_seen": 112446184, + "step": 166860 + }, + { + "epoch": 4.076539711235434, + "grad_norm": 1.6634949133731425e-05, + "learning_rate": 2.007293491717389e-07, + "loss": 0.0, + "num_input_tokens_seen": 112449640, + "step": 166865 + }, + { + "epoch": 4.07666186206728, + "grad_norm": 0.0008858484798111022, + "learning_rate": 2.0067810269368136e-07, + "loss": 0.0, + "num_input_tokens_seen": 112452904, + "step": 166870 + }, + { + "epoch": 4.076784012899128, + "grad_norm": 0.004251960664987564, + "learning_rate": 2.0062686202850797e-07, + "loss": 0.0, + "num_input_tokens_seen": 112456488, + "step": 166875 + }, + { + "epoch": 4.076906163730975, + "grad_norm": 5.69775584153831e-05, + "learning_rate": 2.005756271765907e-07, + "loss": 0.0, + "num_input_tokens_seen": 112459688, + "step": 166880 + }, + { + "epoch": 4.077028314562822, + "grad_norm": 0.0008310135453939438, + "learning_rate": 2.005243981383028e-07, + "loss": 0.0, + "num_input_tokens_seen": 112463400, + "step": 166885 + }, + { + "epoch": 4.077150465394669, + "grad_norm": 0.0076802265830338, + "learning_rate": 2.004731749140165e-07, + "loss": 0.0, + "num_input_tokens_seen": 112466600, + "step": 166890 + }, + { + "epoch": 4.077272616226517, + "grad_norm": 0.00616258243098855, + "learning_rate": 2.0042195750410406e-07, + "loss": 0.0, + "num_input_tokens_seen": 112470056, + "step": 166895 + }, + { + "epoch": 4.0773947670583635, + "grad_norm": 0.00012890664220321923, + "learning_rate": 2.0037074590893842e-07, + "loss": 0.0, + "num_input_tokens_seen": 112473576, + "step": 166900 + }, + { + "epoch": 4.077516917890211, + "grad_norm": 0.0006692970637232065, + "learning_rate": 2.0031954012889153e-07, + "loss": 0.0917, + "num_input_tokens_seen": 112476904, + "step": 166905 + }, + { + "epoch": 4.077639068722058, + "grad_norm": 0.000872828415594995, + "learning_rate": 2.0026834016433635e-07, + "loss": 0.0, + "num_input_tokens_seen": 112480488, + "step": 166910 + }, + { + "epoch": 4.0777612195539055, + "grad_norm": 0.0008254763088189065, + "learning_rate": 2.0021714601564464e-07, + "loss": 0.0, + "num_input_tokens_seen": 112484264, + "step": 166915 + }, + { + "epoch": 4.077883370385752, + "grad_norm": 0.0012822564458474517, + "learning_rate": 2.0016595768318922e-07, + "loss": 0.0, + "num_input_tokens_seen": 112487464, + "step": 166920 + }, + { + "epoch": 4.0780055212176, + "grad_norm": 0.0002115043462254107, + "learning_rate": 2.0011477516734175e-07, + "loss": 0.0, + "num_input_tokens_seen": 112491176, + "step": 166925 + }, + { + "epoch": 4.078127672049447, + "grad_norm": 0.0035075817722827196, + "learning_rate": 2.0006359846847487e-07, + "loss": 0.0, + "num_input_tokens_seen": 112493928, + "step": 166930 + }, + { + "epoch": 4.078249822881294, + "grad_norm": 0.00015296187484636903, + "learning_rate": 2.000124275869609e-07, + "loss": 0.0, + "num_input_tokens_seen": 112497576, + "step": 166935 + }, + { + "epoch": 4.078371973713141, + "grad_norm": 0.016465021297335625, + "learning_rate": 1.9996126252317146e-07, + "loss": 0.0, + "num_input_tokens_seen": 112500584, + "step": 166940 + }, + { + "epoch": 4.078494124544988, + "grad_norm": 0.002111559733748436, + "learning_rate": 1.9991010327747915e-07, + "loss": 0.0, + "num_input_tokens_seen": 112503784, + "step": 166945 + }, + { + "epoch": 4.078616275376835, + "grad_norm": 0.006358523387461901, + "learning_rate": 1.9985894985025542e-07, + "loss": 0.0, + "num_input_tokens_seen": 112507240, + "step": 166950 + }, + { + "epoch": 4.078738426208682, + "grad_norm": 0.000776168773882091, + "learning_rate": 1.99807802241873e-07, + "loss": 0.0, + "num_input_tokens_seen": 112510184, + "step": 166955 + }, + { + "epoch": 4.07886057704053, + "grad_norm": 2.3206490368465893e-05, + "learning_rate": 1.9975666045270323e-07, + "loss": 0.0, + "num_input_tokens_seen": 112514280, + "step": 166960 + }, + { + "epoch": 4.0789827278723765, + "grad_norm": 0.0020938734523952007, + "learning_rate": 1.9970552448311818e-07, + "loss": 0.0, + "num_input_tokens_seen": 112517608, + "step": 166965 + }, + { + "epoch": 4.079104878704224, + "grad_norm": 0.000599257240537554, + "learning_rate": 1.9965439433349008e-07, + "loss": 0.0, + "num_input_tokens_seen": 112520808, + "step": 166970 + }, + { + "epoch": 4.079227029536071, + "grad_norm": 0.0007910645217634737, + "learning_rate": 1.9960327000419032e-07, + "loss": 0.0, + "num_input_tokens_seen": 112524008, + "step": 166975 + }, + { + "epoch": 4.0793491803679185, + "grad_norm": 0.0006026038900017738, + "learning_rate": 1.9955215149559101e-07, + "loss": 0.0, + "num_input_tokens_seen": 112526952, + "step": 166980 + }, + { + "epoch": 4.079471331199765, + "grad_norm": 0.00035834472510032356, + "learning_rate": 1.9950103880806357e-07, + "loss": 0.0, + "num_input_tokens_seen": 112530664, + "step": 166985 + }, + { + "epoch": 4.079593482031613, + "grad_norm": 0.004703744780272245, + "learning_rate": 1.9944993194198012e-07, + "loss": 0.0018, + "num_input_tokens_seen": 112534184, + "step": 166990 + }, + { + "epoch": 4.07971563286346, + "grad_norm": 0.0003986228839494288, + "learning_rate": 1.9939883089771203e-07, + "loss": 0.0, + "num_input_tokens_seen": 112537576, + "step": 166995 + }, + { + "epoch": 4.079837783695307, + "grad_norm": 0.00031761577702127397, + "learning_rate": 1.9934773567563079e-07, + "loss": 0.0308, + "num_input_tokens_seen": 112540840, + "step": 167000 + }, + { + "epoch": 4.079959934527154, + "grad_norm": 0.0002825024421326816, + "learning_rate": 1.9929664627610842e-07, + "loss": 0.0392, + "num_input_tokens_seen": 112544232, + "step": 167005 + }, + { + "epoch": 4.080082085359002, + "grad_norm": 7.694314263062552e-05, + "learning_rate": 1.9924556269951587e-07, + "loss": 0.0359, + "num_input_tokens_seen": 112547624, + "step": 167010 + }, + { + "epoch": 4.080204236190848, + "grad_norm": 0.0013545994879677892, + "learning_rate": 1.9919448494622526e-07, + "loss": 0.0, + "num_input_tokens_seen": 112551272, + "step": 167015 + }, + { + "epoch": 4.080326387022696, + "grad_norm": 0.0005039193201810122, + "learning_rate": 1.9914341301660752e-07, + "loss": 0.0, + "num_input_tokens_seen": 112554344, + "step": 167020 + }, + { + "epoch": 4.080448537854543, + "grad_norm": 0.00019876591977663338, + "learning_rate": 1.9909234691103426e-07, + "loss": 0.0, + "num_input_tokens_seen": 112557736, + "step": 167025 + }, + { + "epoch": 4.08057068868639, + "grad_norm": 0.00017606680921744555, + "learning_rate": 1.9904128662987717e-07, + "loss": 0.0, + "num_input_tokens_seen": 112561128, + "step": 167030 + }, + { + "epoch": 4.080692839518237, + "grad_norm": 0.001958389300853014, + "learning_rate": 1.9899023217350697e-07, + "loss": 0.0, + "num_input_tokens_seen": 112564264, + "step": 167035 + }, + { + "epoch": 4.080814990350084, + "grad_norm": 0.0005842614336870611, + "learning_rate": 1.9893918354229554e-07, + "loss": 0.0, + "num_input_tokens_seen": 112567784, + "step": 167040 + }, + { + "epoch": 4.0809371411819315, + "grad_norm": 0.00013599508383776993, + "learning_rate": 1.9888814073661353e-07, + "loss": 0.0, + "num_input_tokens_seen": 112571112, + "step": 167045 + }, + { + "epoch": 4.081059292013778, + "grad_norm": 270.36590576171875, + "learning_rate": 1.9883710375683273e-07, + "loss": 0.054, + "num_input_tokens_seen": 112574760, + "step": 167050 + }, + { + "epoch": 4.081181442845626, + "grad_norm": 5.747499017161317e-05, + "learning_rate": 1.987860726033237e-07, + "loss": 0.0, + "num_input_tokens_seen": 112578152, + "step": 167055 + }, + { + "epoch": 4.081303593677473, + "grad_norm": 6.630839197896421e-05, + "learning_rate": 1.9873504727645784e-07, + "loss": 0.0, + "num_input_tokens_seen": 112581928, + "step": 167060 + }, + { + "epoch": 4.08142574450932, + "grad_norm": 0.0018387179588899016, + "learning_rate": 1.9868402777660652e-07, + "loss": 0.0, + "num_input_tokens_seen": 112585320, + "step": 167065 + }, + { + "epoch": 4.081547895341167, + "grad_norm": 0.0008636590791866183, + "learning_rate": 1.9863301410414024e-07, + "loss": 0.0, + "num_input_tokens_seen": 112588968, + "step": 167070 + }, + { + "epoch": 4.081670046173015, + "grad_norm": 0.006454425398260355, + "learning_rate": 1.9858200625943044e-07, + "loss": 0.0, + "num_input_tokens_seen": 112591976, + "step": 167075 + }, + { + "epoch": 4.081792197004861, + "grad_norm": 0.00018307649588678032, + "learning_rate": 1.9853100424284764e-07, + "loss": 0.0001, + "num_input_tokens_seen": 112595048, + "step": 167080 + }, + { + "epoch": 4.081914347836709, + "grad_norm": 0.0006819216068834066, + "learning_rate": 1.9848000805476284e-07, + "loss": 0.0, + "num_input_tokens_seen": 112598888, + "step": 167085 + }, + { + "epoch": 4.082036498668556, + "grad_norm": 0.005882292054593563, + "learning_rate": 1.9842901769554742e-07, + "loss": 0.0, + "num_input_tokens_seen": 112601768, + "step": 167090 + }, + { + "epoch": 4.082158649500403, + "grad_norm": 0.0009726961143314838, + "learning_rate": 1.9837803316557167e-07, + "loss": 0.0, + "num_input_tokens_seen": 112605096, + "step": 167095 + }, + { + "epoch": 4.08228080033225, + "grad_norm": 0.001027434947900474, + "learning_rate": 1.9832705446520625e-07, + "loss": 0.0, + "num_input_tokens_seen": 112608680, + "step": 167100 + }, + { + "epoch": 4.082402951164098, + "grad_norm": 0.00011184045433765277, + "learning_rate": 1.9827608159482235e-07, + "loss": 0.0, + "num_input_tokens_seen": 112612136, + "step": 167105 + }, + { + "epoch": 4.0825251019959445, + "grad_norm": 0.0006892028613947332, + "learning_rate": 1.9822511455479041e-07, + "loss": 0.0, + "num_input_tokens_seen": 112615656, + "step": 167110 + }, + { + "epoch": 4.082647252827792, + "grad_norm": 5.178718492970802e-05, + "learning_rate": 1.9817415334548093e-07, + "loss": 0.0, + "num_input_tokens_seen": 112618728, + "step": 167115 + }, + { + "epoch": 4.082769403659639, + "grad_norm": 0.0028919263277202845, + "learning_rate": 1.9812319796726452e-07, + "loss": 0.0, + "num_input_tokens_seen": 112621992, + "step": 167120 + }, + { + "epoch": 4.082891554491486, + "grad_norm": 0.00037826705374754965, + "learning_rate": 1.980722484205123e-07, + "loss": 0.0, + "num_input_tokens_seen": 112625192, + "step": 167125 + }, + { + "epoch": 4.083013705323333, + "grad_norm": 0.0008559504640288651, + "learning_rate": 1.9802130470559397e-07, + "loss": 0.0, + "num_input_tokens_seen": 112628392, + "step": 167130 + }, + { + "epoch": 4.08313585615518, + "grad_norm": 0.0019665653817355633, + "learning_rate": 1.9797036682288083e-07, + "loss": 0.0, + "num_input_tokens_seen": 112631976, + "step": 167135 + }, + { + "epoch": 4.083258006987028, + "grad_norm": 0.0028887365479022264, + "learning_rate": 1.9791943477274255e-07, + "loss": 0.0, + "num_input_tokens_seen": 112636136, + "step": 167140 + }, + { + "epoch": 4.083380157818874, + "grad_norm": 0.0007512049051001668, + "learning_rate": 1.9786850855554993e-07, + "loss": 0.0, + "num_input_tokens_seen": 112639656, + "step": 167145 + }, + { + "epoch": 4.083502308650722, + "grad_norm": 8.230555249610916e-05, + "learning_rate": 1.9781758817167348e-07, + "loss": 0.0, + "num_input_tokens_seen": 112642792, + "step": 167150 + }, + { + "epoch": 4.083624459482569, + "grad_norm": 0.0015477788401767612, + "learning_rate": 1.9776667362148303e-07, + "loss": 0.0, + "num_input_tokens_seen": 112646120, + "step": 167155 + }, + { + "epoch": 4.083746610314416, + "grad_norm": 0.0016721126157790422, + "learning_rate": 1.9771576490534935e-07, + "loss": 0.0, + "num_input_tokens_seen": 112649576, + "step": 167160 + }, + { + "epoch": 4.083868761146263, + "grad_norm": 0.000851157121360302, + "learning_rate": 1.976648620236422e-07, + "loss": 0.0, + "num_input_tokens_seen": 112652840, + "step": 167165 + }, + { + "epoch": 4.083990911978111, + "grad_norm": 0.00016223567945417017, + "learning_rate": 1.976139649767322e-07, + "loss": 0.0, + "num_input_tokens_seen": 112656040, + "step": 167170 + }, + { + "epoch": 4.084113062809958, + "grad_norm": 0.003818761557340622, + "learning_rate": 1.9756307376498905e-07, + "loss": 0.0, + "num_input_tokens_seen": 112659112, + "step": 167175 + }, + { + "epoch": 4.084235213641805, + "grad_norm": 0.0004354110569693148, + "learning_rate": 1.9751218838878304e-07, + "loss": 0.0, + "num_input_tokens_seen": 112662184, + "step": 167180 + }, + { + "epoch": 4.084357364473652, + "grad_norm": 0.025302061811089516, + "learning_rate": 1.9746130884848445e-07, + "loss": 0.0, + "num_input_tokens_seen": 112665512, + "step": 167185 + }, + { + "epoch": 4.0844795153055, + "grad_norm": 9.590527952241246e-06, + "learning_rate": 1.9741043514446288e-07, + "loss": 0.0, + "num_input_tokens_seen": 112668840, + "step": 167190 + }, + { + "epoch": 4.084601666137346, + "grad_norm": 0.01576736755669117, + "learning_rate": 1.973595672770887e-07, + "loss": 0.0, + "num_input_tokens_seen": 112672168, + "step": 167195 + }, + { + "epoch": 4.084723816969194, + "grad_norm": 0.00047771475510671735, + "learning_rate": 1.9730870524673172e-07, + "loss": 0.0, + "num_input_tokens_seen": 112675368, + "step": 167200 + }, + { + "epoch": 4.084845967801041, + "grad_norm": 8.826627890812233e-05, + "learning_rate": 1.972578490537614e-07, + "loss": 0.0, + "num_input_tokens_seen": 112678504, + "step": 167205 + }, + { + "epoch": 4.084968118632888, + "grad_norm": 0.00017720961477607489, + "learning_rate": 1.9720699869854817e-07, + "loss": 0.0, + "num_input_tokens_seen": 112681960, + "step": 167210 + }, + { + "epoch": 4.085090269464735, + "grad_norm": 0.0009463115711696446, + "learning_rate": 1.9715615418146138e-07, + "loss": 0.0, + "num_input_tokens_seen": 112685288, + "step": 167215 + }, + { + "epoch": 4.085212420296582, + "grad_norm": 1.9413335394347087e-05, + "learning_rate": 1.9710531550287112e-07, + "loss": 0.0, + "num_input_tokens_seen": 112688360, + "step": 167220 + }, + { + "epoch": 4.0853345711284295, + "grad_norm": 0.006827809847891331, + "learning_rate": 1.9705448266314685e-07, + "loss": 0.0, + "num_input_tokens_seen": 112692136, + "step": 167225 + }, + { + "epoch": 4.085456721960276, + "grad_norm": 6.61760859657079e-05, + "learning_rate": 1.9700365566265852e-07, + "loss": 0.0, + "num_input_tokens_seen": 112695336, + "step": 167230 + }, + { + "epoch": 4.085578872792124, + "grad_norm": 2.5695724616525695e-05, + "learning_rate": 1.9695283450177523e-07, + "loss": 0.0004, + "num_input_tokens_seen": 112698536, + "step": 167235 + }, + { + "epoch": 4.085701023623971, + "grad_norm": 0.002052685245871544, + "learning_rate": 1.9690201918086712e-07, + "loss": 0.0001, + "num_input_tokens_seen": 112701736, + "step": 167240 + }, + { + "epoch": 4.085823174455818, + "grad_norm": 0.004446287173777819, + "learning_rate": 1.9685120970030366e-07, + "loss": 0.0, + "num_input_tokens_seen": 112705064, + "step": 167245 + }, + { + "epoch": 4.085945325287665, + "grad_norm": 0.0018460979918017983, + "learning_rate": 1.9680040606045402e-07, + "loss": 0.0, + "num_input_tokens_seen": 112708520, + "step": 167250 + }, + { + "epoch": 4.086067476119513, + "grad_norm": 6.863919406896457e-05, + "learning_rate": 1.9674960826168807e-07, + "loss": 0.0, + "num_input_tokens_seen": 112711400, + "step": 167255 + }, + { + "epoch": 4.086189626951359, + "grad_norm": 0.00014254235429689288, + "learning_rate": 1.966988163043748e-07, + "loss": 0.0, + "num_input_tokens_seen": 112714472, + "step": 167260 + }, + { + "epoch": 4.086311777783207, + "grad_norm": 0.026957418769598007, + "learning_rate": 1.966480301888841e-07, + "loss": 0.0, + "num_input_tokens_seen": 112717864, + "step": 167265 + }, + { + "epoch": 4.086433928615054, + "grad_norm": 0.00010942335211439058, + "learning_rate": 1.9659724991558467e-07, + "loss": 0.0, + "num_input_tokens_seen": 112720872, + "step": 167270 + }, + { + "epoch": 4.086556079446901, + "grad_norm": 1.6432888514827937e-05, + "learning_rate": 1.9654647548484615e-07, + "loss": 0.0, + "num_input_tokens_seen": 112724648, + "step": 167275 + }, + { + "epoch": 4.086678230278748, + "grad_norm": 6.028258940204978e-05, + "learning_rate": 1.96495706897038e-07, + "loss": 0.0, + "num_input_tokens_seen": 112727848, + "step": 167280 + }, + { + "epoch": 4.086800381110596, + "grad_norm": 0.006534240674227476, + "learning_rate": 1.9644494415252887e-07, + "loss": 0.0, + "num_input_tokens_seen": 112731176, + "step": 167285 + }, + { + "epoch": 4.0869225319424425, + "grad_norm": 0.0022399064619094133, + "learning_rate": 1.9639418725168866e-07, + "loss": 0.0, + "num_input_tokens_seen": 112734312, + "step": 167290 + }, + { + "epoch": 4.08704468277429, + "grad_norm": 0.0006240478251129389, + "learning_rate": 1.963434361948857e-07, + "loss": 0.0, + "num_input_tokens_seen": 112737320, + "step": 167295 + }, + { + "epoch": 4.087166833606137, + "grad_norm": 0.0010657204547896981, + "learning_rate": 1.9629269098248967e-07, + "loss": 0.0, + "num_input_tokens_seen": 112740328, + "step": 167300 + }, + { + "epoch": 4.087288984437984, + "grad_norm": 0.00023298896849155426, + "learning_rate": 1.9624195161486945e-07, + "loss": 0.0, + "num_input_tokens_seen": 112743336, + "step": 167305 + }, + { + "epoch": 4.087411135269831, + "grad_norm": 2.725253943935968e-05, + "learning_rate": 1.961912180923936e-07, + "loss": 0.0, + "num_input_tokens_seen": 112746792, + "step": 167310 + }, + { + "epoch": 4.087533286101678, + "grad_norm": 0.0026628663763403893, + "learning_rate": 1.961404904154317e-07, + "loss": 0.0, + "num_input_tokens_seen": 112750312, + "step": 167315 + }, + { + "epoch": 4.087655436933526, + "grad_norm": 0.00037412403617054224, + "learning_rate": 1.960897685843521e-07, + "loss": 0.0, + "num_input_tokens_seen": 112753448, + "step": 167320 + }, + { + "epoch": 4.087777587765372, + "grad_norm": 0.0007068165577948093, + "learning_rate": 1.9603905259952426e-07, + "loss": 0.0, + "num_input_tokens_seen": 112756840, + "step": 167325 + }, + { + "epoch": 4.08789973859722, + "grad_norm": 7.83190771471709e-05, + "learning_rate": 1.9598834246131634e-07, + "loss": 0.0, + "num_input_tokens_seen": 112760296, + "step": 167330 + }, + { + "epoch": 4.088021889429067, + "grad_norm": 0.0009760848479345441, + "learning_rate": 1.9593763817009745e-07, + "loss": 0.0, + "num_input_tokens_seen": 112763496, + "step": 167335 + }, + { + "epoch": 4.088144040260914, + "grad_norm": 0.0001355950371362269, + "learning_rate": 1.958869397262366e-07, + "loss": 0.0, + "num_input_tokens_seen": 112767016, + "step": 167340 + }, + { + "epoch": 4.088266191092761, + "grad_norm": 0.0007737476262263954, + "learning_rate": 1.9583624713010183e-07, + "loss": 0.0, + "num_input_tokens_seen": 112769896, + "step": 167345 + }, + { + "epoch": 4.088388341924609, + "grad_norm": 0.000765020027756691, + "learning_rate": 1.9578556038206262e-07, + "loss": 0.0, + "num_input_tokens_seen": 112773032, + "step": 167350 + }, + { + "epoch": 4.0885104927564555, + "grad_norm": 0.00048364579561166465, + "learning_rate": 1.9573487948248668e-07, + "loss": 0.0, + "num_input_tokens_seen": 112776424, + "step": 167355 + }, + { + "epoch": 4.088632643588303, + "grad_norm": 0.002045189728960395, + "learning_rate": 1.9568420443174338e-07, + "loss": 0.0, + "num_input_tokens_seen": 112779496, + "step": 167360 + }, + { + "epoch": 4.08875479442015, + "grad_norm": 0.00012536265421658754, + "learning_rate": 1.9563353523020066e-07, + "loss": 0.0, + "num_input_tokens_seen": 112782504, + "step": 167365 + }, + { + "epoch": 4.0888769452519975, + "grad_norm": 0.0009548076777718961, + "learning_rate": 1.9558287187822707e-07, + "loss": 0.0, + "num_input_tokens_seen": 112786216, + "step": 167370 + }, + { + "epoch": 4.088999096083844, + "grad_norm": 6.926884816493839e-05, + "learning_rate": 1.955322143761916e-07, + "loss": 0.0, + "num_input_tokens_seen": 112789992, + "step": 167375 + }, + { + "epoch": 4.089121246915692, + "grad_norm": 0.00932652223855257, + "learning_rate": 1.9548156272446194e-07, + "loss": 0.0, + "num_input_tokens_seen": 112793192, + "step": 167380 + }, + { + "epoch": 4.089243397747539, + "grad_norm": 0.00042319862404838204, + "learning_rate": 1.954309169234071e-07, + "loss": 0.0, + "num_input_tokens_seen": 112796648, + "step": 167385 + }, + { + "epoch": 4.089365548579385, + "grad_norm": 5.152716039447114e-05, + "learning_rate": 1.9538027697339455e-07, + "loss": 0.0, + "num_input_tokens_seen": 112800104, + "step": 167390 + }, + { + "epoch": 4.089487699411233, + "grad_norm": 5.449349919217639e-05, + "learning_rate": 1.9532964287479325e-07, + "loss": 0.0, + "num_input_tokens_seen": 112803688, + "step": 167395 + }, + { + "epoch": 4.08960985024308, + "grad_norm": 0.00032072042813524604, + "learning_rate": 1.9527901462797136e-07, + "loss": 0.0, + "num_input_tokens_seen": 112807400, + "step": 167400 + }, + { + "epoch": 4.089732001074927, + "grad_norm": 0.0025550902355462313, + "learning_rate": 1.95228392233297e-07, + "loss": 0.0, + "num_input_tokens_seen": 112810792, + "step": 167405 + }, + { + "epoch": 4.089854151906774, + "grad_norm": 0.000510639336425811, + "learning_rate": 1.9517777569113792e-07, + "loss": 0.0, + "num_input_tokens_seen": 112814184, + "step": 167410 + }, + { + "epoch": 4.089976302738622, + "grad_norm": 2.887546543206554e-05, + "learning_rate": 1.9512716500186277e-07, + "loss": 0.0, + "num_input_tokens_seen": 112817896, + "step": 167415 + }, + { + "epoch": 4.0900984535704685, + "grad_norm": 0.00021238067711237818, + "learning_rate": 1.950765601658394e-07, + "loss": 0.0, + "num_input_tokens_seen": 112821800, + "step": 167420 + }, + { + "epoch": 4.090220604402316, + "grad_norm": 0.00016823512851260602, + "learning_rate": 1.950259611834355e-07, + "loss": 0.0, + "num_input_tokens_seen": 112825064, + "step": 167425 + }, + { + "epoch": 4.090342755234163, + "grad_norm": 0.005793475545942783, + "learning_rate": 1.9497536805501934e-07, + "loss": 0.0, + "num_input_tokens_seen": 112828200, + "step": 167430 + }, + { + "epoch": 4.0904649060660105, + "grad_norm": 0.00038167531602084637, + "learning_rate": 1.9492478078095909e-07, + "loss": 0.0, + "num_input_tokens_seen": 112832104, + "step": 167435 + }, + { + "epoch": 4.090587056897857, + "grad_norm": 0.00012043816968798637, + "learning_rate": 1.948741993616221e-07, + "loss": 0.0, + "num_input_tokens_seen": 112835496, + "step": 167440 + }, + { + "epoch": 4.090709207729705, + "grad_norm": 0.00020174685050733387, + "learning_rate": 1.948236237973767e-07, + "loss": 0.0, + "num_input_tokens_seen": 112838888, + "step": 167445 + }, + { + "epoch": 4.090831358561552, + "grad_norm": 0.001029736828058958, + "learning_rate": 1.9477305408859023e-07, + "loss": 0.0, + "num_input_tokens_seen": 112842856, + "step": 167450 + }, + { + "epoch": 4.090953509393399, + "grad_norm": 0.0009005250176414847, + "learning_rate": 1.9472249023563103e-07, + "loss": 0.0, + "num_input_tokens_seen": 112846184, + "step": 167455 + }, + { + "epoch": 4.091075660225246, + "grad_norm": 0.00013175183266866952, + "learning_rate": 1.9467193223886613e-07, + "loss": 0.0, + "num_input_tokens_seen": 112849512, + "step": 167460 + }, + { + "epoch": 4.091197811057094, + "grad_norm": 0.006021356210112572, + "learning_rate": 1.9462138009866357e-07, + "loss": 0.0, + "num_input_tokens_seen": 112853096, + "step": 167465 + }, + { + "epoch": 4.09131996188894, + "grad_norm": 2.9928800358902663e-05, + "learning_rate": 1.945708338153913e-07, + "loss": 0.0011, + "num_input_tokens_seen": 112855912, + "step": 167470 + }, + { + "epoch": 4.091442112720788, + "grad_norm": 2.2655483917333186e-05, + "learning_rate": 1.9452029338941623e-07, + "loss": 0.0, + "num_input_tokens_seen": 112859560, + "step": 167475 + }, + { + "epoch": 4.091564263552635, + "grad_norm": 0.0012740838574245572, + "learning_rate": 1.944697588211064e-07, + "loss": 0.0, + "num_input_tokens_seen": 112863080, + "step": 167480 + }, + { + "epoch": 4.0916864143844816, + "grad_norm": 0.020395029336214066, + "learning_rate": 1.9441923011082905e-07, + "loss": 0.0, + "num_input_tokens_seen": 112866408, + "step": 167485 + }, + { + "epoch": 4.091808565216329, + "grad_norm": 0.0010367101058363914, + "learning_rate": 1.943687072589516e-07, + "loss": 0.0, + "num_input_tokens_seen": 112869672, + "step": 167490 + }, + { + "epoch": 4.091930716048176, + "grad_norm": 0.0001547683059470728, + "learning_rate": 1.9431819026584196e-07, + "loss": 0.0, + "num_input_tokens_seen": 112872808, + "step": 167495 + }, + { + "epoch": 4.092052866880024, + "grad_norm": 0.0013382845791056752, + "learning_rate": 1.942676791318668e-07, + "loss": 0.0, + "num_input_tokens_seen": 112876072, + "step": 167500 + }, + { + "epoch": 4.09217501771187, + "grad_norm": 0.0001629707112442702, + "learning_rate": 1.942171738573941e-07, + "loss": 0.0, + "num_input_tokens_seen": 112879464, + "step": 167505 + }, + { + "epoch": 4.092297168543718, + "grad_norm": 7.951433872221969e-06, + "learning_rate": 1.941666744427909e-07, + "loss": 0.0, + "num_input_tokens_seen": 112882792, + "step": 167510 + }, + { + "epoch": 4.092419319375565, + "grad_norm": 0.0005250205867923796, + "learning_rate": 1.9411618088842396e-07, + "loss": 0.05, + "num_input_tokens_seen": 112886568, + "step": 167515 + }, + { + "epoch": 4.092541470207412, + "grad_norm": 0.004777814727276564, + "learning_rate": 1.9406569319466136e-07, + "loss": 0.0, + "num_input_tokens_seen": 112890088, + "step": 167520 + }, + { + "epoch": 4.092663621039259, + "grad_norm": 0.001972571946680546, + "learning_rate": 1.9401521136186937e-07, + "loss": 0.0, + "num_input_tokens_seen": 112893608, + "step": 167525 + }, + { + "epoch": 4.092785771871107, + "grad_norm": 0.00029019240173511207, + "learning_rate": 1.939647353904159e-07, + "loss": 0.0, + "num_input_tokens_seen": 112896552, + "step": 167530 + }, + { + "epoch": 4.0929079227029534, + "grad_norm": 0.01268207747489214, + "learning_rate": 1.9391426528066744e-07, + "loss": 0.0, + "num_input_tokens_seen": 112899880, + "step": 167535 + }, + { + "epoch": 4.093030073534801, + "grad_norm": 0.0008546076714992523, + "learning_rate": 1.938638010329915e-07, + "loss": 0.0, + "num_input_tokens_seen": 112902824, + "step": 167540 + }, + { + "epoch": 4.093152224366648, + "grad_norm": 0.001968494150787592, + "learning_rate": 1.9381334264775462e-07, + "loss": 0.0, + "num_input_tokens_seen": 112906344, + "step": 167545 + }, + { + "epoch": 4.0932743751984955, + "grad_norm": 0.001512790215201676, + "learning_rate": 1.9376289012532388e-07, + "loss": 0.0, + "num_input_tokens_seen": 112909928, + "step": 167550 + }, + { + "epoch": 4.093396526030342, + "grad_norm": 0.0003397958935238421, + "learning_rate": 1.937124434660664e-07, + "loss": 0.0, + "num_input_tokens_seen": 112913576, + "step": 167555 + }, + { + "epoch": 4.09351867686219, + "grad_norm": 0.00014833496243227273, + "learning_rate": 1.9366200267034882e-07, + "loss": 0.0, + "num_input_tokens_seen": 112916776, + "step": 167560 + }, + { + "epoch": 4.093640827694037, + "grad_norm": 2.349866190343164e-05, + "learning_rate": 1.9361156773853826e-07, + "loss": 0.0, + "num_input_tokens_seen": 112920104, + "step": 167565 + }, + { + "epoch": 4.093762978525883, + "grad_norm": 218.95103454589844, + "learning_rate": 1.9356113867100089e-07, + "loss": 0.0526, + "num_input_tokens_seen": 112923752, + "step": 167570 + }, + { + "epoch": 4.093885129357731, + "grad_norm": 0.00042152617243118584, + "learning_rate": 1.9351071546810428e-07, + "loss": 0.0, + "num_input_tokens_seen": 112926696, + "step": 167575 + }, + { + "epoch": 4.094007280189578, + "grad_norm": 0.0010712286457419395, + "learning_rate": 1.9346029813021425e-07, + "loss": 0.0, + "num_input_tokens_seen": 112929960, + "step": 167580 + }, + { + "epoch": 4.094129431021425, + "grad_norm": 6.560736892424757e-06, + "learning_rate": 1.9340988665769786e-07, + "loss": 0.0, + "num_input_tokens_seen": 112932904, + "step": 167585 + }, + { + "epoch": 4.094251581853272, + "grad_norm": 0.00016035842418204993, + "learning_rate": 1.9335948105092203e-07, + "loss": 0.0, + "num_input_tokens_seen": 112936168, + "step": 167590 + }, + { + "epoch": 4.09437373268512, + "grad_norm": 0.00047581008402630687, + "learning_rate": 1.9330908131025282e-07, + "loss": 0.0, + "num_input_tokens_seen": 112939624, + "step": 167595 + }, + { + "epoch": 4.0944958835169665, + "grad_norm": 0.006864218972623348, + "learning_rate": 1.9325868743605711e-07, + "loss": 0.0, + "num_input_tokens_seen": 112943016, + "step": 167600 + }, + { + "epoch": 4.094618034348814, + "grad_norm": 0.0001095055922633037, + "learning_rate": 1.93208299428701e-07, + "loss": 0.0, + "num_input_tokens_seen": 112946216, + "step": 167605 + }, + { + "epoch": 4.094740185180661, + "grad_norm": 0.00017072324408218265, + "learning_rate": 1.9315791728855136e-07, + "loss": 0.0, + "num_input_tokens_seen": 112949224, + "step": 167610 + }, + { + "epoch": 4.0948623360125085, + "grad_norm": 0.0022964330855757, + "learning_rate": 1.9310754101597437e-07, + "loss": 0.0, + "num_input_tokens_seen": 112952552, + "step": 167615 + }, + { + "epoch": 4.094984486844355, + "grad_norm": 0.0011054405476897955, + "learning_rate": 1.930571706113362e-07, + "loss": 0.0, + "num_input_tokens_seen": 112955880, + "step": 167620 + }, + { + "epoch": 4.095106637676203, + "grad_norm": 0.0005825211410410702, + "learning_rate": 1.9300680607500354e-07, + "loss": 0.0, + "num_input_tokens_seen": 112958760, + "step": 167625 + }, + { + "epoch": 4.09522878850805, + "grad_norm": 0.0003326810256112367, + "learning_rate": 1.9295644740734207e-07, + "loss": 0.0, + "num_input_tokens_seen": 112962216, + "step": 167630 + }, + { + "epoch": 4.095350939339897, + "grad_norm": 9.163653157884255e-05, + "learning_rate": 1.9290609460871876e-07, + "loss": 0.0, + "num_input_tokens_seen": 112966440, + "step": 167635 + }, + { + "epoch": 4.095473090171744, + "grad_norm": 0.0004953984171152115, + "learning_rate": 1.928557476794991e-07, + "loss": 0.0, + "num_input_tokens_seen": 112969448, + "step": 167640 + }, + { + "epoch": 4.095595241003592, + "grad_norm": 4.5307486288947985e-05, + "learning_rate": 1.928054066200495e-07, + "loss": 0.0698, + "num_input_tokens_seen": 112973288, + "step": 167645 + }, + { + "epoch": 4.095717391835438, + "grad_norm": 0.00015699061623308808, + "learning_rate": 1.9275507143073645e-07, + "loss": 0.0, + "num_input_tokens_seen": 112976744, + "step": 167650 + }, + { + "epoch": 4.095839542667285, + "grad_norm": 2.502801362425089e-05, + "learning_rate": 1.9270474211192534e-07, + "loss": 0.0, + "num_input_tokens_seen": 112979688, + "step": 167655 + }, + { + "epoch": 4.095961693499133, + "grad_norm": 0.0003214066382497549, + "learning_rate": 1.926544186639828e-07, + "loss": 0.0, + "num_input_tokens_seen": 112982888, + "step": 167660 + }, + { + "epoch": 4.0960838443309795, + "grad_norm": 0.0001289208885282278, + "learning_rate": 1.9260410108727408e-07, + "loss": 0.0, + "num_input_tokens_seen": 112986024, + "step": 167665 + }, + { + "epoch": 4.096205995162827, + "grad_norm": 0.0011240255553275347, + "learning_rate": 1.9255378938216583e-07, + "loss": 0.0, + "num_input_tokens_seen": 112989288, + "step": 167670 + }, + { + "epoch": 4.096328145994674, + "grad_norm": 0.00025505939265713096, + "learning_rate": 1.9250348354902335e-07, + "loss": 0.0, + "num_input_tokens_seen": 112992232, + "step": 167675 + }, + { + "epoch": 4.0964502968265215, + "grad_norm": 0.00030178253655321896, + "learning_rate": 1.9245318358821272e-07, + "loss": 0.0, + "num_input_tokens_seen": 112995688, + "step": 167680 + }, + { + "epoch": 4.096572447658368, + "grad_norm": 0.02885306626558304, + "learning_rate": 1.9240288950010008e-07, + "loss": 0.0, + "num_input_tokens_seen": 112998888, + "step": 167685 + }, + { + "epoch": 4.096694598490216, + "grad_norm": 0.0009121938492171466, + "learning_rate": 1.923526012850505e-07, + "loss": 0.0, + "num_input_tokens_seen": 113002600, + "step": 167690 + }, + { + "epoch": 4.096816749322063, + "grad_norm": 0.0020662290044128895, + "learning_rate": 1.9230231894343029e-07, + "loss": 0.0, + "num_input_tokens_seen": 113005672, + "step": 167695 + }, + { + "epoch": 4.09693890015391, + "grad_norm": 0.002329219365492463, + "learning_rate": 1.9225204247560467e-07, + "loss": 0.0, + "num_input_tokens_seen": 113009192, + "step": 167700 + }, + { + "epoch": 4.097061050985757, + "grad_norm": 0.002412031404674053, + "learning_rate": 1.9220177188193942e-07, + "loss": 0.0, + "num_input_tokens_seen": 113012264, + "step": 167705 + }, + { + "epoch": 4.097183201817605, + "grad_norm": 0.0004434922302607447, + "learning_rate": 1.9215150716280037e-07, + "loss": 0.0, + "num_input_tokens_seen": 113015336, + "step": 167710 + }, + { + "epoch": 4.097305352649451, + "grad_norm": 3.532558184815571e-05, + "learning_rate": 1.921012483185529e-07, + "loss": 0.0, + "num_input_tokens_seen": 113018984, + "step": 167715 + }, + { + "epoch": 4.097427503481299, + "grad_norm": 0.0013077000621706247, + "learning_rate": 1.9205099534956214e-07, + "loss": 0.0, + "num_input_tokens_seen": 113021928, + "step": 167720 + }, + { + "epoch": 4.097549654313146, + "grad_norm": 0.00023376866010949016, + "learning_rate": 1.9200074825619418e-07, + "loss": 0.0002, + "num_input_tokens_seen": 113025384, + "step": 167725 + }, + { + "epoch": 4.097671805144993, + "grad_norm": 0.00019923056242987514, + "learning_rate": 1.91950507038814e-07, + "loss": 0.0, + "num_input_tokens_seen": 113028904, + "step": 167730 + }, + { + "epoch": 4.09779395597684, + "grad_norm": 0.0014188940403982997, + "learning_rate": 1.9190027169778688e-07, + "loss": 0.0, + "num_input_tokens_seen": 113032168, + "step": 167735 + }, + { + "epoch": 4.097916106808688, + "grad_norm": 0.00020696816500276327, + "learning_rate": 1.9185004223347834e-07, + "loss": 0.0, + "num_input_tokens_seen": 113035240, + "step": 167740 + }, + { + "epoch": 4.0980382576405345, + "grad_norm": 0.0024865081068128347, + "learning_rate": 1.9179981864625394e-07, + "loss": 0.0, + "num_input_tokens_seen": 113038888, + "step": 167745 + }, + { + "epoch": 4.098160408472381, + "grad_norm": 0.0010457972530275583, + "learning_rate": 1.917496009364784e-07, + "loss": 0.0, + "num_input_tokens_seen": 113042408, + "step": 167750 + }, + { + "epoch": 4.098282559304229, + "grad_norm": 0.0010362849570810795, + "learning_rate": 1.9169938910451734e-07, + "loss": 0.0, + "num_input_tokens_seen": 113045672, + "step": 167755 + }, + { + "epoch": 4.098404710136076, + "grad_norm": 0.00017162924632430077, + "learning_rate": 1.9164918315073552e-07, + "loss": 0.0, + "num_input_tokens_seen": 113048872, + "step": 167760 + }, + { + "epoch": 4.098526860967923, + "grad_norm": 0.00047827913658693433, + "learning_rate": 1.915989830754985e-07, + "loss": 0.0002, + "num_input_tokens_seen": 113051944, + "step": 167765 + }, + { + "epoch": 4.09864901179977, + "grad_norm": 3.9480975829064846e-05, + "learning_rate": 1.915487888791708e-07, + "loss": 0.0, + "num_input_tokens_seen": 113054952, + "step": 167770 + }, + { + "epoch": 4.098771162631618, + "grad_norm": 0.003575035370886326, + "learning_rate": 1.9149860056211787e-07, + "loss": 0.0, + "num_input_tokens_seen": 113058024, + "step": 167775 + }, + { + "epoch": 4.098893313463464, + "grad_norm": 0.0023885369300842285, + "learning_rate": 1.9144841812470468e-07, + "loss": 0.0, + "num_input_tokens_seen": 113061288, + "step": 167780 + }, + { + "epoch": 4.099015464295312, + "grad_norm": 0.039635781198740005, + "learning_rate": 1.913982415672959e-07, + "loss": 0.0, + "num_input_tokens_seen": 113065448, + "step": 167785 + }, + { + "epoch": 4.099137615127159, + "grad_norm": 0.0010953181190416217, + "learning_rate": 1.9134807089025695e-07, + "loss": 0.0, + "num_input_tokens_seen": 113068712, + "step": 167790 + }, + { + "epoch": 4.099259765959006, + "grad_norm": 0.014914610423147678, + "learning_rate": 1.9129790609395192e-07, + "loss": 0.0, + "num_input_tokens_seen": 113071848, + "step": 167795 + }, + { + "epoch": 4.099381916790853, + "grad_norm": 0.0008530065533705056, + "learning_rate": 1.9124774717874603e-07, + "loss": 0.0, + "num_input_tokens_seen": 113074728, + "step": 167800 + }, + { + "epoch": 4.099504067622701, + "grad_norm": 0.00020005644182674587, + "learning_rate": 1.9119759414500447e-07, + "loss": 0.0, + "num_input_tokens_seen": 113078120, + "step": 167805 + }, + { + "epoch": 4.0996262184545476, + "grad_norm": 0.00026042881654575467, + "learning_rate": 1.9114744699309117e-07, + "loss": 0.0, + "num_input_tokens_seen": 113081832, + "step": 167810 + }, + { + "epoch": 4.099748369286395, + "grad_norm": 0.00039418303640559316, + "learning_rate": 1.9109730572337146e-07, + "loss": 0.058, + "num_input_tokens_seen": 113085032, + "step": 167815 + }, + { + "epoch": 4.099870520118242, + "grad_norm": 0.0019248060416430235, + "learning_rate": 1.9104717033620965e-07, + "loss": 0.0, + "num_input_tokens_seen": 113088296, + "step": 167820 + }, + { + "epoch": 4.09999267095009, + "grad_norm": 0.009397185407578945, + "learning_rate": 1.9099704083197023e-07, + "loss": 0.0, + "num_input_tokens_seen": 113091496, + "step": 167825 + }, + { + "epoch": 4.100114821781936, + "grad_norm": 0.001163032022304833, + "learning_rate": 1.9094691721101818e-07, + "loss": 0.0, + "num_input_tokens_seen": 113094824, + "step": 167830 + }, + { + "epoch": 4.100236972613784, + "grad_norm": 0.0006426633917726576, + "learning_rate": 1.908967994737175e-07, + "loss": 0.0001, + "num_input_tokens_seen": 113098408, + "step": 167835 + }, + { + "epoch": 4.100359123445631, + "grad_norm": 0.0019152211025357246, + "learning_rate": 1.908466876204331e-07, + "loss": 0.0, + "num_input_tokens_seen": 113101672, + "step": 167840 + }, + { + "epoch": 4.100481274277477, + "grad_norm": 0.0011807752307504416, + "learning_rate": 1.90796581651529e-07, + "loss": 0.0, + "num_input_tokens_seen": 113104616, + "step": 167845 + }, + { + "epoch": 4.100603425109325, + "grad_norm": 0.0011360227363184094, + "learning_rate": 1.9074648156737017e-07, + "loss": 0.0, + "num_input_tokens_seen": 113108136, + "step": 167850 + }, + { + "epoch": 4.100725575941172, + "grad_norm": 5.969356789137237e-05, + "learning_rate": 1.9069638736832016e-07, + "loss": 0.0, + "num_input_tokens_seen": 113112168, + "step": 167855 + }, + { + "epoch": 4.1008477267730195, + "grad_norm": 0.02083604969084263, + "learning_rate": 1.9064629905474384e-07, + "loss": 0.0, + "num_input_tokens_seen": 113115112, + "step": 167860 + }, + { + "epoch": 4.100969877604866, + "grad_norm": 0.00034951046109199524, + "learning_rate": 1.9059621662700554e-07, + "loss": 0.0325, + "num_input_tokens_seen": 113118248, + "step": 167865 + }, + { + "epoch": 4.101092028436714, + "grad_norm": 0.0008968613692559302, + "learning_rate": 1.9054614008546888e-07, + "loss": 0.0, + "num_input_tokens_seen": 113121896, + "step": 167870 + }, + { + "epoch": 4.101214179268561, + "grad_norm": 0.0006697792559862137, + "learning_rate": 1.9049606943049878e-07, + "loss": 0.0, + "num_input_tokens_seen": 113125224, + "step": 167875 + }, + { + "epoch": 4.101336330100408, + "grad_norm": 0.0009942372562363744, + "learning_rate": 1.9044600466245875e-07, + "loss": 0.0, + "num_input_tokens_seen": 113128424, + "step": 167880 + }, + { + "epoch": 4.101458480932255, + "grad_norm": 6.113015842856839e-05, + "learning_rate": 1.9039594578171336e-07, + "loss": 0.0, + "num_input_tokens_seen": 113131752, + "step": 167885 + }, + { + "epoch": 4.101580631764103, + "grad_norm": 0.0002360446087550372, + "learning_rate": 1.9034589278862612e-07, + "loss": 0.0, + "num_input_tokens_seen": 113134888, + "step": 167890 + }, + { + "epoch": 4.101702782595949, + "grad_norm": 0.00013911745918449014, + "learning_rate": 1.9029584568356138e-07, + "loss": 0.0, + "num_input_tokens_seen": 113138024, + "step": 167895 + }, + { + "epoch": 4.101824933427797, + "grad_norm": 8.226054342230782e-05, + "learning_rate": 1.902458044668832e-07, + "loss": 0.0, + "num_input_tokens_seen": 113141288, + "step": 167900 + }, + { + "epoch": 4.101947084259644, + "grad_norm": 0.0007206815644167364, + "learning_rate": 1.901957691389552e-07, + "loss": 0.0, + "num_input_tokens_seen": 113144616, + "step": 167905 + }, + { + "epoch": 4.102069235091491, + "grad_norm": 0.00041117100045084953, + "learning_rate": 1.9014573970014147e-07, + "loss": 0.0, + "num_input_tokens_seen": 113148520, + "step": 167910 + }, + { + "epoch": 4.102191385923338, + "grad_norm": 3.6475325032370165e-05, + "learning_rate": 1.9009571615080555e-07, + "loss": 0.0, + "num_input_tokens_seen": 113151912, + "step": 167915 + }, + { + "epoch": 4.102313536755186, + "grad_norm": 0.00016955210594460368, + "learning_rate": 1.900456984913117e-07, + "loss": 0.0, + "num_input_tokens_seen": 113155240, + "step": 167920 + }, + { + "epoch": 4.1024356875870325, + "grad_norm": 0.0006664685788564384, + "learning_rate": 1.8999568672202338e-07, + "loss": 0.0, + "num_input_tokens_seen": 113158696, + "step": 167925 + }, + { + "epoch": 4.102557838418879, + "grad_norm": 0.000883599161170423, + "learning_rate": 1.899456808433041e-07, + "loss": 0.0, + "num_input_tokens_seen": 113161832, + "step": 167930 + }, + { + "epoch": 4.102679989250727, + "grad_norm": 0.0010841061593964696, + "learning_rate": 1.898956808555179e-07, + "loss": 0.0, + "num_input_tokens_seen": 113165096, + "step": 167935 + }, + { + "epoch": 4.102802140082574, + "grad_norm": 0.0007180902175605297, + "learning_rate": 1.898456867590279e-07, + "loss": 0.0, + "num_input_tokens_seen": 113168360, + "step": 167940 + }, + { + "epoch": 4.102924290914421, + "grad_norm": 0.0037924540229141712, + "learning_rate": 1.897956985541983e-07, + "loss": 0.0, + "num_input_tokens_seen": 113171880, + "step": 167945 + }, + { + "epoch": 4.103046441746268, + "grad_norm": 0.0029051261954009533, + "learning_rate": 1.8974571624139201e-07, + "loss": 0.0, + "num_input_tokens_seen": 113174952, + "step": 167950 + }, + { + "epoch": 4.103168592578116, + "grad_norm": 0.002228027442470193, + "learning_rate": 1.8969573982097288e-07, + "loss": 0.0, + "num_input_tokens_seen": 113178152, + "step": 167955 + }, + { + "epoch": 4.103290743409962, + "grad_norm": 0.0008051015320234001, + "learning_rate": 1.8964576929330444e-07, + "loss": 0.0, + "num_input_tokens_seen": 113181480, + "step": 167960 + }, + { + "epoch": 4.10341289424181, + "grad_norm": 0.00023929915914777666, + "learning_rate": 1.895958046587497e-07, + "loss": 0.0, + "num_input_tokens_seen": 113185576, + "step": 167965 + }, + { + "epoch": 4.103535045073657, + "grad_norm": 0.00011397549678804353, + "learning_rate": 1.8954584591767241e-07, + "loss": 0.0, + "num_input_tokens_seen": 113189096, + "step": 167970 + }, + { + "epoch": 4.103657195905504, + "grad_norm": 8.767266263021156e-05, + "learning_rate": 1.8949589307043555e-07, + "loss": 0.0, + "num_input_tokens_seen": 113192360, + "step": 167975 + }, + { + "epoch": 4.103779346737351, + "grad_norm": 0.0018837309908121824, + "learning_rate": 1.8944594611740282e-07, + "loss": 0.0, + "num_input_tokens_seen": 113195368, + "step": 167980 + }, + { + "epoch": 4.103901497569199, + "grad_norm": 0.005669779144227505, + "learning_rate": 1.8939600505893693e-07, + "loss": 0.0, + "num_input_tokens_seen": 113199080, + "step": 167985 + }, + { + "epoch": 4.1040236484010455, + "grad_norm": 0.0024404674768447876, + "learning_rate": 1.8934606989540125e-07, + "loss": 0.0, + "num_input_tokens_seen": 113202920, + "step": 167990 + }, + { + "epoch": 4.104145799232893, + "grad_norm": 0.0001181487605208531, + "learning_rate": 1.8929614062715927e-07, + "loss": 0.0001, + "num_input_tokens_seen": 113206248, + "step": 167995 + }, + { + "epoch": 4.10426795006474, + "grad_norm": 0.0009015746763907373, + "learning_rate": 1.8924621725457357e-07, + "loss": 0.0, + "num_input_tokens_seen": 113210152, + "step": 168000 + }, + { + "epoch": 4.1043901008965875, + "grad_norm": 0.0009523396729491651, + "learning_rate": 1.8919629977800767e-07, + "loss": 0.0, + "num_input_tokens_seen": 113213416, + "step": 168005 + }, + { + "epoch": 4.104512251728434, + "grad_norm": 0.001039717230014503, + "learning_rate": 1.8914638819782414e-07, + "loss": 0.0, + "num_input_tokens_seen": 113216936, + "step": 168010 + }, + { + "epoch": 4.104634402560281, + "grad_norm": 9.482148925599176e-06, + "learning_rate": 1.8909648251438648e-07, + "loss": 0.0, + "num_input_tokens_seen": 113221288, + "step": 168015 + }, + { + "epoch": 4.104756553392129, + "grad_norm": 2.5004270355566405e-05, + "learning_rate": 1.8904658272805696e-07, + "loss": 0.0, + "num_input_tokens_seen": 113225128, + "step": 168020 + }, + { + "epoch": 4.104878704223975, + "grad_norm": 0.006195750553160906, + "learning_rate": 1.8899668883919907e-07, + "loss": 0.0, + "num_input_tokens_seen": 113228328, + "step": 168025 + }, + { + "epoch": 4.105000855055823, + "grad_norm": 0.0005033480701968074, + "learning_rate": 1.8894680084817516e-07, + "loss": 0.0, + "num_input_tokens_seen": 113231528, + "step": 168030 + }, + { + "epoch": 4.10512300588767, + "grad_norm": 0.001200584345497191, + "learning_rate": 1.8889691875534853e-07, + "loss": 0.0, + "num_input_tokens_seen": 113235048, + "step": 168035 + }, + { + "epoch": 4.105245156719517, + "grad_norm": 0.0023539774119853973, + "learning_rate": 1.8884704256108163e-07, + "loss": 0.0, + "num_input_tokens_seen": 113238504, + "step": 168040 + }, + { + "epoch": 4.105367307551364, + "grad_norm": 8.967569738160819e-05, + "learning_rate": 1.8879717226573698e-07, + "loss": 0.0, + "num_input_tokens_seen": 113241960, + "step": 168045 + }, + { + "epoch": 4.105489458383212, + "grad_norm": 0.0012145297368988395, + "learning_rate": 1.8874730786967752e-07, + "loss": 0.0, + "num_input_tokens_seen": 113245288, + "step": 168050 + }, + { + "epoch": 4.1056116092150585, + "grad_norm": 0.0001042317962856032, + "learning_rate": 1.8869744937326603e-07, + "loss": 0.0001, + "num_input_tokens_seen": 113248616, + "step": 168055 + }, + { + "epoch": 4.105733760046906, + "grad_norm": 0.001420485437847674, + "learning_rate": 1.886475967768647e-07, + "loss": 0.0, + "num_input_tokens_seen": 113253160, + "step": 168060 + }, + { + "epoch": 4.105855910878753, + "grad_norm": 0.00025929929688572884, + "learning_rate": 1.8859775008083646e-07, + "loss": 0.0, + "num_input_tokens_seen": 113256296, + "step": 168065 + }, + { + "epoch": 4.1059780617106005, + "grad_norm": 0.0011569394264370203, + "learning_rate": 1.8854790928554343e-07, + "loss": 0.0, + "num_input_tokens_seen": 113259624, + "step": 168070 + }, + { + "epoch": 4.106100212542447, + "grad_norm": 0.00021515910339076072, + "learning_rate": 1.8849807439134847e-07, + "loss": 0.0, + "num_input_tokens_seen": 113262952, + "step": 168075 + }, + { + "epoch": 4.106222363374295, + "grad_norm": 0.0018044417956843972, + "learning_rate": 1.8844824539861348e-07, + "loss": 0.0, + "num_input_tokens_seen": 113266856, + "step": 168080 + }, + { + "epoch": 4.106344514206142, + "grad_norm": 0.00029858926427550614, + "learning_rate": 1.883984223077012e-07, + "loss": 0.0, + "num_input_tokens_seen": 113270376, + "step": 168085 + }, + { + "epoch": 4.106466665037989, + "grad_norm": 0.00017355407180730253, + "learning_rate": 1.883486051189742e-07, + "loss": 0.0466, + "num_input_tokens_seen": 113273512, + "step": 168090 + }, + { + "epoch": 4.106588815869836, + "grad_norm": 0.00020966355805285275, + "learning_rate": 1.882987938327941e-07, + "loss": 0.0, + "num_input_tokens_seen": 113277096, + "step": 168095 + }, + { + "epoch": 4.106710966701684, + "grad_norm": 3.090937389060855e-05, + "learning_rate": 1.8824898844952374e-07, + "loss": 0.0, + "num_input_tokens_seen": 113280296, + "step": 168100 + }, + { + "epoch": 4.10683311753353, + "grad_norm": 0.0008340792264789343, + "learning_rate": 1.8819918896952492e-07, + "loss": 0.0, + "num_input_tokens_seen": 113284072, + "step": 168105 + }, + { + "epoch": 4.106955268365377, + "grad_norm": 5.1596016419352964e-05, + "learning_rate": 1.8814939539315987e-07, + "loss": 0.0, + "num_input_tokens_seen": 113287400, + "step": 168110 + }, + { + "epoch": 4.107077419197225, + "grad_norm": 0.00011981278657913208, + "learning_rate": 1.8809960772079103e-07, + "loss": 0.0, + "num_input_tokens_seen": 113290600, + "step": 168115 + }, + { + "epoch": 4.1071995700290715, + "grad_norm": 0.006958621088415384, + "learning_rate": 1.8804982595277995e-07, + "loss": 0.0, + "num_input_tokens_seen": 113293672, + "step": 168120 + }, + { + "epoch": 4.107321720860919, + "grad_norm": 0.00043330652988515794, + "learning_rate": 1.8800005008948928e-07, + "loss": 0.0, + "num_input_tokens_seen": 113296488, + "step": 168125 + }, + { + "epoch": 4.107443871692766, + "grad_norm": 0.00015095339040271938, + "learning_rate": 1.8795028013128055e-07, + "loss": 0.0, + "num_input_tokens_seen": 113299816, + "step": 168130 + }, + { + "epoch": 4.107566022524614, + "grad_norm": 0.00016991773736663163, + "learning_rate": 1.8790051607851553e-07, + "loss": 0.0, + "num_input_tokens_seen": 113303208, + "step": 168135 + }, + { + "epoch": 4.10768817335646, + "grad_norm": 9.44330167840235e-06, + "learning_rate": 1.878507579315567e-07, + "loss": 0.0, + "num_input_tokens_seen": 113306600, + "step": 168140 + }, + { + "epoch": 4.107810324188308, + "grad_norm": 0.0006781097617931664, + "learning_rate": 1.878010056907653e-07, + "loss": 0.0, + "num_input_tokens_seen": 113310120, + "step": 168145 + }, + { + "epoch": 4.107932475020155, + "grad_norm": 0.00011159246059833094, + "learning_rate": 1.877512593565037e-07, + "loss": 0.0, + "num_input_tokens_seen": 113313768, + "step": 168150 + }, + { + "epoch": 4.108054625852002, + "grad_norm": 1.1732379789464176e-05, + "learning_rate": 1.8770151892913322e-07, + "loss": 0.0, + "num_input_tokens_seen": 113317352, + "step": 168155 + }, + { + "epoch": 4.108176776683849, + "grad_norm": 0.4344090223312378, + "learning_rate": 1.8765178440901596e-07, + "loss": 0.0003, + "num_input_tokens_seen": 113320808, + "step": 168160 + }, + { + "epoch": 4.108298927515697, + "grad_norm": 0.00014278020535130054, + "learning_rate": 1.8760205579651323e-07, + "loss": 0.0, + "num_input_tokens_seen": 113324328, + "step": 168165 + }, + { + "epoch": 4.108421078347543, + "grad_norm": 0.00021015606762375683, + "learning_rate": 1.8755233309198704e-07, + "loss": 0.0, + "num_input_tokens_seen": 113327720, + "step": 168170 + }, + { + "epoch": 4.108543229179391, + "grad_norm": 0.00013545910769607872, + "learning_rate": 1.8750261629579867e-07, + "loss": 0.0, + "num_input_tokens_seen": 113330792, + "step": 168175 + }, + { + "epoch": 4.108665380011238, + "grad_norm": 0.0018144716741517186, + "learning_rate": 1.8745290540830972e-07, + "loss": 0.0, + "num_input_tokens_seen": 113333992, + "step": 168180 + }, + { + "epoch": 4.1087875308430855, + "grad_norm": 0.00032248892239294946, + "learning_rate": 1.87403200429882e-07, + "loss": 0.0, + "num_input_tokens_seen": 113337320, + "step": 168185 + }, + { + "epoch": 4.108909681674932, + "grad_norm": 4.1384933865629137e-05, + "learning_rate": 1.8735350136087658e-07, + "loss": 0.0, + "num_input_tokens_seen": 113340776, + "step": 168190 + }, + { + "epoch": 4.109031832506779, + "grad_norm": 0.0021187819074839354, + "learning_rate": 1.8730380820165537e-07, + "loss": 0.0, + "num_input_tokens_seen": 113344168, + "step": 168195 + }, + { + "epoch": 4.109153983338627, + "grad_norm": 0.00021328141156118363, + "learning_rate": 1.8725412095257908e-07, + "loss": 0.0, + "num_input_tokens_seen": 113347432, + "step": 168200 + }, + { + "epoch": 4.109276134170473, + "grad_norm": 7.013930735411122e-05, + "learning_rate": 1.8720443961400944e-07, + "loss": 0.0, + "num_input_tokens_seen": 113350952, + "step": 168205 + }, + { + "epoch": 4.109398285002321, + "grad_norm": 0.0004714243405032903, + "learning_rate": 1.8715476418630805e-07, + "loss": 0.0, + "num_input_tokens_seen": 113354344, + "step": 168210 + }, + { + "epoch": 4.109520435834168, + "grad_norm": 1.5150175386224873e-05, + "learning_rate": 1.871050946698356e-07, + "loss": 0.0, + "num_input_tokens_seen": 113357800, + "step": 168215 + }, + { + "epoch": 4.109642586666015, + "grad_norm": 1.0064273737953044e-05, + "learning_rate": 1.870554310649538e-07, + "loss": 0.0, + "num_input_tokens_seen": 113361064, + "step": 168220 + }, + { + "epoch": 4.109764737497862, + "grad_norm": 0.002622292609885335, + "learning_rate": 1.8700577337202327e-07, + "loss": 0.0, + "num_input_tokens_seen": 113364200, + "step": 168225 + }, + { + "epoch": 4.10988688832971, + "grad_norm": 0.007907216437160969, + "learning_rate": 1.8695612159140572e-07, + "loss": 0.0, + "num_input_tokens_seen": 113367720, + "step": 168230 + }, + { + "epoch": 4.1100090391615565, + "grad_norm": 0.01983102411031723, + "learning_rate": 1.8690647572346185e-07, + "loss": 0.0, + "num_input_tokens_seen": 113370984, + "step": 168235 + }, + { + "epoch": 4.110131189993404, + "grad_norm": 0.00025274226209148765, + "learning_rate": 1.868568357685526e-07, + "loss": 0.0, + "num_input_tokens_seen": 113373864, + "step": 168240 + }, + { + "epoch": 4.110253340825251, + "grad_norm": 0.0006391624920070171, + "learning_rate": 1.8680720172703946e-07, + "loss": 0.0, + "num_input_tokens_seen": 113376872, + "step": 168245 + }, + { + "epoch": 4.1103754916570985, + "grad_norm": 7.626259321114048e-05, + "learning_rate": 1.867575735992827e-07, + "loss": 0.0, + "num_input_tokens_seen": 113380840, + "step": 168250 + }, + { + "epoch": 4.110497642488945, + "grad_norm": 0.0005927463644184172, + "learning_rate": 1.8670795138564387e-07, + "loss": 0.0, + "num_input_tokens_seen": 113384040, + "step": 168255 + }, + { + "epoch": 4.110619793320793, + "grad_norm": 1.2069443073414732e-05, + "learning_rate": 1.8665833508648344e-07, + "loss": 0.0, + "num_input_tokens_seen": 113387752, + "step": 168260 + }, + { + "epoch": 4.11074194415264, + "grad_norm": 0.00023556312953587621, + "learning_rate": 1.8660872470216215e-07, + "loss": 0.0, + "num_input_tokens_seen": 113391208, + "step": 168265 + }, + { + "epoch": 4.110864094984487, + "grad_norm": 0.0015209665289148688, + "learning_rate": 1.8655912023304143e-07, + "loss": 0.0, + "num_input_tokens_seen": 113395560, + "step": 168270 + }, + { + "epoch": 4.110986245816334, + "grad_norm": 0.00024280369689222425, + "learning_rate": 1.865095216794812e-07, + "loss": 0.0001, + "num_input_tokens_seen": 113398888, + "step": 168275 + }, + { + "epoch": 4.111108396648181, + "grad_norm": 5.196717393118888e-05, + "learning_rate": 1.8645992904184282e-07, + "loss": 0.0, + "num_input_tokens_seen": 113401960, + "step": 168280 + }, + { + "epoch": 4.111230547480028, + "grad_norm": 0.00012093011901015416, + "learning_rate": 1.8641034232048647e-07, + "loss": 0.0, + "num_input_tokens_seen": 113405736, + "step": 168285 + }, + { + "epoch": 4.111352698311875, + "grad_norm": 0.00019129080465063453, + "learning_rate": 1.8636076151577317e-07, + "loss": 0.0, + "num_input_tokens_seen": 113409384, + "step": 168290 + }, + { + "epoch": 4.111474849143723, + "grad_norm": 5.972016879240982e-05, + "learning_rate": 1.8631118662806288e-07, + "loss": 0.0, + "num_input_tokens_seen": 113413224, + "step": 168295 + }, + { + "epoch": 4.1115969999755695, + "grad_norm": 6.202772783581167e-05, + "learning_rate": 1.8626161765771665e-07, + "loss": 0.0, + "num_input_tokens_seen": 113416488, + "step": 168300 + }, + { + "epoch": 4.111719150807417, + "grad_norm": 0.00035181641578674316, + "learning_rate": 1.8621205460509504e-07, + "loss": 0.0, + "num_input_tokens_seen": 113419688, + "step": 168305 + }, + { + "epoch": 4.111841301639264, + "grad_norm": 0.0006404395098797977, + "learning_rate": 1.861624974705579e-07, + "loss": 0.0, + "num_input_tokens_seen": 113423528, + "step": 168310 + }, + { + "epoch": 4.1119634524711115, + "grad_norm": 0.00018085418560076505, + "learning_rate": 1.8611294625446628e-07, + "loss": 0.0, + "num_input_tokens_seen": 113426792, + "step": 168315 + }, + { + "epoch": 4.112085603302958, + "grad_norm": 0.00031348931952379644, + "learning_rate": 1.8606340095717999e-07, + "loss": 0.0, + "num_input_tokens_seen": 113430312, + "step": 168320 + }, + { + "epoch": 4.112207754134806, + "grad_norm": 0.002586101181805134, + "learning_rate": 1.8601386157905974e-07, + "loss": 0.0, + "num_input_tokens_seen": 113434024, + "step": 168325 + }, + { + "epoch": 4.112329904966653, + "grad_norm": 0.000644766609184444, + "learning_rate": 1.8596432812046548e-07, + "loss": 0.0, + "num_input_tokens_seen": 113437736, + "step": 168330 + }, + { + "epoch": 4.1124520557985, + "grad_norm": 0.07879499346017838, + "learning_rate": 1.859148005817578e-07, + "loss": 0.0, + "num_input_tokens_seen": 113441000, + "step": 168335 + }, + { + "epoch": 4.112574206630347, + "grad_norm": 0.003919265698641539, + "learning_rate": 1.858652789632964e-07, + "loss": 0.0, + "num_input_tokens_seen": 113443816, + "step": 168340 + }, + { + "epoch": 4.112696357462195, + "grad_norm": 0.004266014322638512, + "learning_rate": 1.858157632654419e-07, + "loss": 0.0, + "num_input_tokens_seen": 113446952, + "step": 168345 + }, + { + "epoch": 4.112818508294041, + "grad_norm": 0.0012039493303745985, + "learning_rate": 1.8576625348855411e-07, + "loss": 0.0, + "num_input_tokens_seen": 113450088, + "step": 168350 + }, + { + "epoch": 4.112940659125889, + "grad_norm": 0.001107099698856473, + "learning_rate": 1.85716749632993e-07, + "loss": 0.0, + "num_input_tokens_seen": 113453416, + "step": 168355 + }, + { + "epoch": 4.113062809957736, + "grad_norm": 0.00014701222244184464, + "learning_rate": 1.8566725169911858e-07, + "loss": 0.0, + "num_input_tokens_seen": 113456552, + "step": 168360 + }, + { + "epoch": 4.113184960789583, + "grad_norm": 0.009403909556567669, + "learning_rate": 1.856177596872913e-07, + "loss": 0.0, + "num_input_tokens_seen": 113459880, + "step": 168365 + }, + { + "epoch": 4.11330711162143, + "grad_norm": 0.002776139182969928, + "learning_rate": 1.8556827359787042e-07, + "loss": 0.0, + "num_input_tokens_seen": 113463208, + "step": 168370 + }, + { + "epoch": 4.113429262453277, + "grad_norm": 0.0017793282167986035, + "learning_rate": 1.8551879343121635e-07, + "loss": 0.0, + "num_input_tokens_seen": 113466344, + "step": 168375 + }, + { + "epoch": 4.1135514132851245, + "grad_norm": 7.744233153061941e-05, + "learning_rate": 1.854693191876884e-07, + "loss": 0.0, + "num_input_tokens_seen": 113469928, + "step": 168380 + }, + { + "epoch": 4.113673564116971, + "grad_norm": 0.0009350012987852097, + "learning_rate": 1.8541985086764688e-07, + "loss": 0.0, + "num_input_tokens_seen": 113473384, + "step": 168385 + }, + { + "epoch": 4.113795714948819, + "grad_norm": 8.169966167770326e-05, + "learning_rate": 1.8537038847145116e-07, + "loss": 0.0, + "num_input_tokens_seen": 113476840, + "step": 168390 + }, + { + "epoch": 4.113917865780666, + "grad_norm": 0.014098173938691616, + "learning_rate": 1.8532093199946098e-07, + "loss": 0.0, + "num_input_tokens_seen": 113480232, + "step": 168395 + }, + { + "epoch": 4.114040016612513, + "grad_norm": 0.0029436154291033745, + "learning_rate": 1.852714814520364e-07, + "loss": 0.0, + "num_input_tokens_seen": 113483304, + "step": 168400 + }, + { + "epoch": 4.11416216744436, + "grad_norm": 0.08929237723350525, + "learning_rate": 1.852220368295364e-07, + "loss": 0.0, + "num_input_tokens_seen": 113486440, + "step": 168405 + }, + { + "epoch": 4.114284318276208, + "grad_norm": 0.0003501186438370496, + "learning_rate": 1.8517259813232122e-07, + "loss": 0.0, + "num_input_tokens_seen": 113489960, + "step": 168410 + }, + { + "epoch": 4.114406469108054, + "grad_norm": 0.022616511210799217, + "learning_rate": 1.851231653607499e-07, + "loss": 0.0, + "num_input_tokens_seen": 113493416, + "step": 168415 + }, + { + "epoch": 4.114528619939902, + "grad_norm": 0.00025067225215025246, + "learning_rate": 1.8507373851518204e-07, + "loss": 0.0, + "num_input_tokens_seen": 113496488, + "step": 168420 + }, + { + "epoch": 4.114650770771749, + "grad_norm": 0.437266081571579, + "learning_rate": 1.8502431759597737e-07, + "loss": 0.0002, + "num_input_tokens_seen": 113499560, + "step": 168425 + }, + { + "epoch": 4.114772921603596, + "grad_norm": 0.002507508499547839, + "learning_rate": 1.849749026034948e-07, + "loss": 0.0, + "num_input_tokens_seen": 113502696, + "step": 168430 + }, + { + "epoch": 4.114895072435443, + "grad_norm": 0.00028929513064213097, + "learning_rate": 1.8492549353809416e-07, + "loss": 0.0, + "num_input_tokens_seen": 113506216, + "step": 168435 + }, + { + "epoch": 4.115017223267291, + "grad_norm": 0.0007330722874030471, + "learning_rate": 1.8487609040013463e-07, + "loss": 0.0, + "num_input_tokens_seen": 113509736, + "step": 168440 + }, + { + "epoch": 4.1151393740991375, + "grad_norm": 0.000801653484813869, + "learning_rate": 1.8482669318997524e-07, + "loss": 0.0, + "num_input_tokens_seen": 113513512, + "step": 168445 + }, + { + "epoch": 4.115261524930985, + "grad_norm": 0.0018105350900441408, + "learning_rate": 1.8477730190797548e-07, + "loss": 0.0, + "num_input_tokens_seen": 113516648, + "step": 168450 + }, + { + "epoch": 4.115383675762832, + "grad_norm": 0.00022335609537549317, + "learning_rate": 1.8472791655449426e-07, + "loss": 0.0, + "num_input_tokens_seen": 113522280, + "step": 168455 + }, + { + "epoch": 4.115505826594679, + "grad_norm": 2.3306838556891307e-05, + "learning_rate": 1.8467853712989123e-07, + "loss": 0.0, + "num_input_tokens_seen": 113525160, + "step": 168460 + }, + { + "epoch": 4.115627977426526, + "grad_norm": 0.0005397560307756066, + "learning_rate": 1.8462916363452486e-07, + "loss": 0.0, + "num_input_tokens_seen": 113528936, + "step": 168465 + }, + { + "epoch": 4.115750128258373, + "grad_norm": 1.6428799426648766e-05, + "learning_rate": 1.8457979606875483e-07, + "loss": 0.0001, + "num_input_tokens_seen": 113531880, + "step": 168470 + }, + { + "epoch": 4.115872279090221, + "grad_norm": 0.0009806773159652948, + "learning_rate": 1.8453043443293946e-07, + "loss": 0.0, + "num_input_tokens_seen": 113535272, + "step": 168475 + }, + { + "epoch": 4.115994429922067, + "grad_norm": 8.44751630211249e-05, + "learning_rate": 1.8448107872743855e-07, + "loss": 0.0, + "num_input_tokens_seen": 113538408, + "step": 168480 + }, + { + "epoch": 4.116116580753915, + "grad_norm": 0.00013254370423965156, + "learning_rate": 1.8443172895261016e-07, + "loss": 0.0, + "num_input_tokens_seen": 113541928, + "step": 168485 + }, + { + "epoch": 4.116238731585762, + "grad_norm": 0.16897857189178467, + "learning_rate": 1.8438238510881365e-07, + "loss": 0.0001, + "num_input_tokens_seen": 113545000, + "step": 168490 + }, + { + "epoch": 4.1163608824176094, + "grad_norm": 0.0007797760772518814, + "learning_rate": 1.8433304719640807e-07, + "loss": 0.0, + "num_input_tokens_seen": 113548264, + "step": 168495 + }, + { + "epoch": 4.116483033249456, + "grad_norm": 0.0005675862776115537, + "learning_rate": 1.8428371521575182e-07, + "loss": 0.0, + "num_input_tokens_seen": 113552104, + "step": 168500 + }, + { + "epoch": 4.116605184081304, + "grad_norm": 0.002730336971580982, + "learning_rate": 1.842343891672039e-07, + "loss": 0.0, + "num_input_tokens_seen": 113555176, + "step": 168505 + }, + { + "epoch": 4.116727334913151, + "grad_norm": 2.090478847094346e-05, + "learning_rate": 1.8418506905112274e-07, + "loss": 0.0, + "num_input_tokens_seen": 113558824, + "step": 168510 + }, + { + "epoch": 4.116849485744998, + "grad_norm": 0.00023547621094621718, + "learning_rate": 1.8413575486786713e-07, + "loss": 0.0, + "num_input_tokens_seen": 113562024, + "step": 168515 + }, + { + "epoch": 4.116971636576845, + "grad_norm": 0.0004209127801004797, + "learning_rate": 1.8408644661779605e-07, + "loss": 0.0, + "num_input_tokens_seen": 113565544, + "step": 168520 + }, + { + "epoch": 4.117093787408693, + "grad_norm": 0.0011463210685178638, + "learning_rate": 1.8403714430126748e-07, + "loss": 0.0001, + "num_input_tokens_seen": 113568936, + "step": 168525 + }, + { + "epoch": 4.117215938240539, + "grad_norm": 0.0006217953632585704, + "learning_rate": 1.8398784791864052e-07, + "loss": 0.0, + "num_input_tokens_seen": 113572584, + "step": 168530 + }, + { + "epoch": 4.117338089072387, + "grad_norm": 0.011418608017265797, + "learning_rate": 1.839385574702732e-07, + "loss": 0.0, + "num_input_tokens_seen": 113575720, + "step": 168535 + }, + { + "epoch": 4.117460239904234, + "grad_norm": 0.00016718072583898902, + "learning_rate": 1.8388927295652444e-07, + "loss": 0.0, + "num_input_tokens_seen": 113579368, + "step": 168540 + }, + { + "epoch": 4.117582390736081, + "grad_norm": 5.693855928257108e-05, + "learning_rate": 1.838399943777521e-07, + "loss": 0.0, + "num_input_tokens_seen": 113582824, + "step": 168545 + }, + { + "epoch": 4.117704541567928, + "grad_norm": 0.00029498033109121025, + "learning_rate": 1.837907217343151e-07, + "loss": 0.0, + "num_input_tokens_seen": 113586344, + "step": 168550 + }, + { + "epoch": 4.117826692399775, + "grad_norm": 0.000566729751881212, + "learning_rate": 1.8374145502657157e-07, + "loss": 0.0, + "num_input_tokens_seen": 113589608, + "step": 168555 + }, + { + "epoch": 4.1179488432316225, + "grad_norm": 0.0009026674670167267, + "learning_rate": 1.8369219425487935e-07, + "loss": 0.0, + "num_input_tokens_seen": 113593576, + "step": 168560 + }, + { + "epoch": 4.118070994063469, + "grad_norm": 0.0011875737691298127, + "learning_rate": 1.836429394195974e-07, + "loss": 0.0, + "num_input_tokens_seen": 113597160, + "step": 168565 + }, + { + "epoch": 4.118193144895317, + "grad_norm": 0.00010860639304155484, + "learning_rate": 1.8359369052108332e-07, + "loss": 0.0, + "num_input_tokens_seen": 113601128, + "step": 168570 + }, + { + "epoch": 4.118315295727164, + "grad_norm": 0.0002416908391751349, + "learning_rate": 1.8354444755969579e-07, + "loss": 0.0, + "num_input_tokens_seen": 113604456, + "step": 168575 + }, + { + "epoch": 4.118437446559011, + "grad_norm": 8.761444405536167e-06, + "learning_rate": 1.8349521053579232e-07, + "loss": 0.0, + "num_input_tokens_seen": 113607976, + "step": 168580 + }, + { + "epoch": 4.118559597390858, + "grad_norm": 0.0002650410169735551, + "learning_rate": 1.8344597944973129e-07, + "loss": 0.0, + "num_input_tokens_seen": 113612008, + "step": 168585 + }, + { + "epoch": 4.118681748222706, + "grad_norm": 0.0030864370055496693, + "learning_rate": 1.8339675430187097e-07, + "loss": 0.0, + "num_input_tokens_seen": 113615272, + "step": 168590 + }, + { + "epoch": 4.118803899054552, + "grad_norm": 5.8644753153203055e-05, + "learning_rate": 1.8334753509256883e-07, + "loss": 0.0, + "num_input_tokens_seen": 113618408, + "step": 168595 + }, + { + "epoch": 4.1189260498864, + "grad_norm": 0.0005382047384046018, + "learning_rate": 1.8329832182218341e-07, + "loss": 0.0, + "num_input_tokens_seen": 113621800, + "step": 168600 + }, + { + "epoch": 4.119048200718247, + "grad_norm": 0.00048400447121821344, + "learning_rate": 1.8324911449107195e-07, + "loss": 0.0, + "num_input_tokens_seen": 113625448, + "step": 168605 + }, + { + "epoch": 4.119170351550094, + "grad_norm": 0.0004027804243378341, + "learning_rate": 1.831999130995926e-07, + "loss": 0.0, + "num_input_tokens_seen": 113628840, + "step": 168610 + }, + { + "epoch": 4.119292502381941, + "grad_norm": 0.0004047253751195967, + "learning_rate": 1.8315071764810342e-07, + "loss": 0.0, + "num_input_tokens_seen": 113631720, + "step": 168615 + }, + { + "epoch": 4.119414653213789, + "grad_norm": 3.090100653935224e-05, + "learning_rate": 1.8310152813696166e-07, + "loss": 0.0, + "num_input_tokens_seen": 113634664, + "step": 168620 + }, + { + "epoch": 4.1195368040456355, + "grad_norm": 12.117880821228027, + "learning_rate": 1.8305234456652562e-07, + "loss": 0.0003, + "num_input_tokens_seen": 113638824, + "step": 168625 + }, + { + "epoch": 4.119658954877483, + "grad_norm": 0.002483411692082882, + "learning_rate": 1.8300316693715234e-07, + "loss": 0.0, + "num_input_tokens_seen": 113642216, + "step": 168630 + }, + { + "epoch": 4.11978110570933, + "grad_norm": 0.0002661602047737688, + "learning_rate": 1.8295399524920008e-07, + "loss": 0.0, + "num_input_tokens_seen": 113645480, + "step": 168635 + }, + { + "epoch": 4.119903256541177, + "grad_norm": 0.0006768704042769969, + "learning_rate": 1.829048295030259e-07, + "loss": 0.0, + "num_input_tokens_seen": 113649000, + "step": 168640 + }, + { + "epoch": 4.120025407373024, + "grad_norm": 5.930757833993994e-05, + "learning_rate": 1.828556696989878e-07, + "loss": 0.0, + "num_input_tokens_seen": 113652200, + "step": 168645 + }, + { + "epoch": 4.120147558204871, + "grad_norm": 0.007106540724635124, + "learning_rate": 1.8280651583744278e-07, + "loss": 0.0477, + "num_input_tokens_seen": 113655336, + "step": 168650 + }, + { + "epoch": 4.120269709036719, + "grad_norm": 0.00035729375667870045, + "learning_rate": 1.8275736791874885e-07, + "loss": 0.0, + "num_input_tokens_seen": 113658728, + "step": 168655 + }, + { + "epoch": 4.120391859868565, + "grad_norm": 0.00504821864888072, + "learning_rate": 1.8270822594326308e-07, + "loss": 0.0, + "num_input_tokens_seen": 113661928, + "step": 168660 + }, + { + "epoch": 4.120514010700413, + "grad_norm": 0.008736646734178066, + "learning_rate": 1.826590899113427e-07, + "loss": 0.0, + "num_input_tokens_seen": 113665128, + "step": 168665 + }, + { + "epoch": 4.12063616153226, + "grad_norm": 9.719293302623555e-05, + "learning_rate": 1.8260995982334538e-07, + "loss": 0.0, + "num_input_tokens_seen": 113668456, + "step": 168670 + }, + { + "epoch": 4.120758312364107, + "grad_norm": 0.000476063578389585, + "learning_rate": 1.8256083567962843e-07, + "loss": 0.0, + "num_input_tokens_seen": 113671272, + "step": 168675 + }, + { + "epoch": 4.120880463195954, + "grad_norm": 0.00019063352374359965, + "learning_rate": 1.825117174805486e-07, + "loss": 0.0, + "num_input_tokens_seen": 113674920, + "step": 168680 + }, + { + "epoch": 4.121002614027802, + "grad_norm": 0.00020761760242749006, + "learning_rate": 1.8246260522646385e-07, + "loss": 0.0, + "num_input_tokens_seen": 113677928, + "step": 168685 + }, + { + "epoch": 4.1211247648596485, + "grad_norm": 0.007399700582027435, + "learning_rate": 1.8241349891773062e-07, + "loss": 0.0, + "num_input_tokens_seen": 113681064, + "step": 168690 + }, + { + "epoch": 4.121246915691496, + "grad_norm": 0.0027653025463223457, + "learning_rate": 1.8236439855470654e-07, + "loss": 0.0, + "num_input_tokens_seen": 113684264, + "step": 168695 + }, + { + "epoch": 4.121369066523343, + "grad_norm": 0.00011870275920955464, + "learning_rate": 1.8231530413774833e-07, + "loss": 0.0, + "num_input_tokens_seen": 113687400, + "step": 168700 + }, + { + "epoch": 4.1214912173551905, + "grad_norm": 4.691482172347605e-05, + "learning_rate": 1.82266215667213e-07, + "loss": 0.0, + "num_input_tokens_seen": 113690920, + "step": 168705 + }, + { + "epoch": 4.121613368187037, + "grad_norm": 0.0061193970032036304, + "learning_rate": 1.822171331434581e-07, + "loss": 0.0001, + "num_input_tokens_seen": 113694376, + "step": 168710 + }, + { + "epoch": 4.121735519018885, + "grad_norm": 6.885571201564744e-05, + "learning_rate": 1.8216805656683986e-07, + "loss": 0.0, + "num_input_tokens_seen": 113697576, + "step": 168715 + }, + { + "epoch": 4.121857669850732, + "grad_norm": 0.0019095209427177906, + "learning_rate": 1.8211898593771568e-07, + "loss": 0.0, + "num_input_tokens_seen": 113701352, + "step": 168720 + }, + { + "epoch": 4.121979820682579, + "grad_norm": 0.00321991671808064, + "learning_rate": 1.8206992125644205e-07, + "loss": 0.0001, + "num_input_tokens_seen": 113704616, + "step": 168725 + }, + { + "epoch": 4.122101971514426, + "grad_norm": 0.004618339240550995, + "learning_rate": 1.8202086252337611e-07, + "loss": 0.0, + "num_input_tokens_seen": 113707752, + "step": 168730 + }, + { + "epoch": 4.122224122346273, + "grad_norm": 0.0008676843135617673, + "learning_rate": 1.8197180973887428e-07, + "loss": 0.0, + "num_input_tokens_seen": 113711080, + "step": 168735 + }, + { + "epoch": 4.12234627317812, + "grad_norm": 0.0010523677337914705, + "learning_rate": 1.8192276290329345e-07, + "loss": 0.0, + "num_input_tokens_seen": 113714344, + "step": 168740 + }, + { + "epoch": 4.122468424009967, + "grad_norm": 0.002487873425707221, + "learning_rate": 1.8187372201699058e-07, + "loss": 0.0, + "num_input_tokens_seen": 113717544, + "step": 168745 + }, + { + "epoch": 4.122590574841815, + "grad_norm": 0.0005908418097533286, + "learning_rate": 1.8182468708032205e-07, + "loss": 0.0, + "num_input_tokens_seen": 113721000, + "step": 168750 + }, + { + "epoch": 4.1227127256736615, + "grad_norm": 0.000158902897965163, + "learning_rate": 1.8177565809364426e-07, + "loss": 0.0, + "num_input_tokens_seen": 113724712, + "step": 168755 + }, + { + "epoch": 4.122834876505509, + "grad_norm": 5.1024628191953525e-05, + "learning_rate": 1.817266350573141e-07, + "loss": 0.0, + "num_input_tokens_seen": 113728552, + "step": 168760 + }, + { + "epoch": 4.122957027337356, + "grad_norm": 0.001737323822453618, + "learning_rate": 1.816776179716878e-07, + "loss": 0.0, + "num_input_tokens_seen": 113732136, + "step": 168765 + }, + { + "epoch": 4.1230791781692036, + "grad_norm": 7.92595892562531e-05, + "learning_rate": 1.8162860683712212e-07, + "loss": 0.0, + "num_input_tokens_seen": 113735272, + "step": 168770 + }, + { + "epoch": 4.12320132900105, + "grad_norm": 0.0005044394638389349, + "learning_rate": 1.8157960165397312e-07, + "loss": 0.0, + "num_input_tokens_seen": 113738728, + "step": 168775 + }, + { + "epoch": 4.123323479832898, + "grad_norm": 0.0007095523760654032, + "learning_rate": 1.8153060242259765e-07, + "loss": 0.0021, + "num_input_tokens_seen": 113742056, + "step": 168780 + }, + { + "epoch": 4.123445630664745, + "grad_norm": 0.00040243950206786394, + "learning_rate": 1.8148160914335153e-07, + "loss": 0.0, + "num_input_tokens_seen": 113745640, + "step": 168785 + }, + { + "epoch": 4.123567781496592, + "grad_norm": 8.54555910336785e-05, + "learning_rate": 1.8143262181659157e-07, + "loss": 0.0, + "num_input_tokens_seen": 113749160, + "step": 168790 + }, + { + "epoch": 4.123689932328439, + "grad_norm": 0.00021532121172640473, + "learning_rate": 1.813836404426734e-07, + "loss": 0.0, + "num_input_tokens_seen": 113752808, + "step": 168795 + }, + { + "epoch": 4.123812083160287, + "grad_norm": 7.620219548698515e-05, + "learning_rate": 1.8133466502195372e-07, + "loss": 0.0, + "num_input_tokens_seen": 113756008, + "step": 168800 + }, + { + "epoch": 4.123934233992133, + "grad_norm": 0.0002450960746500641, + "learning_rate": 1.8128569555478867e-07, + "loss": 0.0, + "num_input_tokens_seen": 113758952, + "step": 168805 + }, + { + "epoch": 4.124056384823981, + "grad_norm": 2.163955468859058e-05, + "learning_rate": 1.812367320415341e-07, + "loss": 0.0, + "num_input_tokens_seen": 113762280, + "step": 168810 + }, + { + "epoch": 4.124178535655828, + "grad_norm": 4.057705154991709e-05, + "learning_rate": 1.811877744825464e-07, + "loss": 0.0, + "num_input_tokens_seen": 113765736, + "step": 168815 + }, + { + "epoch": 4.124300686487675, + "grad_norm": 0.005065068136900663, + "learning_rate": 1.8113882287818127e-07, + "loss": 0.0, + "num_input_tokens_seen": 113769128, + "step": 168820 + }, + { + "epoch": 4.124422837319522, + "grad_norm": 0.0001583754929015413, + "learning_rate": 1.8108987722879487e-07, + "loss": 0.0, + "num_input_tokens_seen": 113773160, + "step": 168825 + }, + { + "epoch": 4.124544988151369, + "grad_norm": 0.00011476362851681188, + "learning_rate": 1.8104093753474336e-07, + "loss": 0.0, + "num_input_tokens_seen": 113776552, + "step": 168830 + }, + { + "epoch": 4.124667138983217, + "grad_norm": 9.519248851574957e-05, + "learning_rate": 1.8099200379638212e-07, + "loss": 0.0, + "num_input_tokens_seen": 113779944, + "step": 168835 + }, + { + "epoch": 4.124789289815063, + "grad_norm": 365.7969970703125, + "learning_rate": 1.8094307601406767e-07, + "loss": 0.0028, + "num_input_tokens_seen": 113783592, + "step": 168840 + }, + { + "epoch": 4.124911440646911, + "grad_norm": 0.008785773068666458, + "learning_rate": 1.8089415418815512e-07, + "loss": 0.0, + "num_input_tokens_seen": 113787240, + "step": 168845 + }, + { + "epoch": 4.125033591478758, + "grad_norm": 1.7524176655570045e-05, + "learning_rate": 1.8084523831900089e-07, + "loss": 0.0, + "num_input_tokens_seen": 113790888, + "step": 168850 + }, + { + "epoch": 4.125155742310605, + "grad_norm": 0.00023619976127520204, + "learning_rate": 1.8079632840696023e-07, + "loss": 0.0, + "num_input_tokens_seen": 113794280, + "step": 168855 + }, + { + "epoch": 4.125277893142452, + "grad_norm": 1.563422119943425e-05, + "learning_rate": 1.807474244523891e-07, + "loss": 0.0, + "num_input_tokens_seen": 113797480, + "step": 168860 + }, + { + "epoch": 4.1254000439743, + "grad_norm": 5.026695362175815e-05, + "learning_rate": 1.806985264556432e-07, + "loss": 0.0, + "num_input_tokens_seen": 113800744, + "step": 168865 + }, + { + "epoch": 4.1255221948061465, + "grad_norm": 0.1911337971687317, + "learning_rate": 1.806496344170777e-07, + "loss": 0.0001, + "num_input_tokens_seen": 113804008, + "step": 168870 + }, + { + "epoch": 4.125644345637994, + "grad_norm": 0.0031086925882846117, + "learning_rate": 1.8060074833704863e-07, + "loss": 0.0, + "num_input_tokens_seen": 113807784, + "step": 168875 + }, + { + "epoch": 4.125766496469841, + "grad_norm": 2.847278847184498e-05, + "learning_rate": 1.8055186821591107e-07, + "loss": 0.0, + "num_input_tokens_seen": 113811240, + "step": 168880 + }, + { + "epoch": 4.1258886473016885, + "grad_norm": 0.00010783431935124099, + "learning_rate": 1.8050299405402102e-07, + "loss": 0.0526, + "num_input_tokens_seen": 113814696, + "step": 168885 + }, + { + "epoch": 4.126010798133535, + "grad_norm": 9.459498687647283e-05, + "learning_rate": 1.8045412585173336e-07, + "loss": 0.0, + "num_input_tokens_seen": 113818024, + "step": 168890 + }, + { + "epoch": 4.126132948965383, + "grad_norm": 4.6049019147176296e-05, + "learning_rate": 1.804052636094038e-07, + "loss": 0.0, + "num_input_tokens_seen": 113821032, + "step": 168895 + }, + { + "epoch": 4.12625509979723, + "grad_norm": 0.0009420507121831179, + "learning_rate": 1.8035640732738766e-07, + "loss": 0.0, + "num_input_tokens_seen": 113824488, + "step": 168900 + }, + { + "epoch": 4.126377250629076, + "grad_norm": 4.2329302232246846e-05, + "learning_rate": 1.8030755700604007e-07, + "loss": 0.0, + "num_input_tokens_seen": 113827816, + "step": 168905 + }, + { + "epoch": 4.126499401460924, + "grad_norm": 4.134025584789924e-05, + "learning_rate": 1.8025871264571668e-07, + "loss": 0.0, + "num_input_tokens_seen": 113831592, + "step": 168910 + }, + { + "epoch": 4.126621552292771, + "grad_norm": 0.002498138230293989, + "learning_rate": 1.8020987424677203e-07, + "loss": 0.0, + "num_input_tokens_seen": 113834856, + "step": 168915 + }, + { + "epoch": 4.126743703124618, + "grad_norm": 0.005550534464418888, + "learning_rate": 1.801610418095618e-07, + "loss": 0.0, + "num_input_tokens_seen": 113838440, + "step": 168920 + }, + { + "epoch": 4.126865853956465, + "grad_norm": 0.00012591673294082284, + "learning_rate": 1.801122153344412e-07, + "loss": 0.0, + "num_input_tokens_seen": 113841512, + "step": 168925 + }, + { + "epoch": 4.126988004788313, + "grad_norm": 2.2473011995316483e-05, + "learning_rate": 1.800633948217648e-07, + "loss": 0.0, + "num_input_tokens_seen": 113845032, + "step": 168930 + }, + { + "epoch": 4.1271101556201595, + "grad_norm": 0.00018835488299373537, + "learning_rate": 1.800145802718882e-07, + "loss": 0.0, + "num_input_tokens_seen": 113848232, + "step": 168935 + }, + { + "epoch": 4.127232306452007, + "grad_norm": 0.00011231577082071453, + "learning_rate": 1.799657716851659e-07, + "loss": 0.0, + "num_input_tokens_seen": 113851304, + "step": 168940 + }, + { + "epoch": 4.127354457283854, + "grad_norm": 6.715896597597748e-05, + "learning_rate": 1.7991696906195332e-07, + "loss": 0.0, + "num_input_tokens_seen": 113854824, + "step": 168945 + }, + { + "epoch": 4.1274766081157015, + "grad_norm": 0.026994898915290833, + "learning_rate": 1.7986817240260487e-07, + "loss": 0.0, + "num_input_tokens_seen": 113858472, + "step": 168950 + }, + { + "epoch": 4.127598758947548, + "grad_norm": 2.0782459614565596e-05, + "learning_rate": 1.7981938170747591e-07, + "loss": 0.0, + "num_input_tokens_seen": 113861800, + "step": 168955 + }, + { + "epoch": 4.127720909779396, + "grad_norm": 0.0001835390430642292, + "learning_rate": 1.7977059697692065e-07, + "loss": 0.0, + "num_input_tokens_seen": 113865192, + "step": 168960 + }, + { + "epoch": 4.127843060611243, + "grad_norm": 0.0012272525345906615, + "learning_rate": 1.7972181821129462e-07, + "loss": 0.0, + "num_input_tokens_seen": 113868456, + "step": 168965 + }, + { + "epoch": 4.12796521144309, + "grad_norm": 0.00022447301307693124, + "learning_rate": 1.7967304541095206e-07, + "loss": 0.0, + "num_input_tokens_seen": 113871784, + "step": 168970 + }, + { + "epoch": 4.128087362274937, + "grad_norm": 0.00019253823847975582, + "learning_rate": 1.7962427857624752e-07, + "loss": 0.0, + "num_input_tokens_seen": 113874984, + "step": 168975 + }, + { + "epoch": 4.128209513106785, + "grad_norm": 1.2826269085053355e-05, + "learning_rate": 1.7957551770753598e-07, + "loss": 0.0, + "num_input_tokens_seen": 113878120, + "step": 168980 + }, + { + "epoch": 4.128331663938631, + "grad_norm": 0.0008162598242051899, + "learning_rate": 1.795267628051721e-07, + "loss": 0.0012, + "num_input_tokens_seen": 113881128, + "step": 168985 + }, + { + "epoch": 4.128453814770479, + "grad_norm": 0.00021831518097314984, + "learning_rate": 1.7947801386951e-07, + "loss": 0.0, + "num_input_tokens_seen": 113884200, + "step": 168990 + }, + { + "epoch": 4.128575965602326, + "grad_norm": 0.0020679819863289595, + "learning_rate": 1.7942927090090476e-07, + "loss": 0.0, + "num_input_tokens_seen": 113888232, + "step": 168995 + }, + { + "epoch": 4.1286981164341725, + "grad_norm": 0.00011742662900360301, + "learning_rate": 1.793805338997104e-07, + "loss": 0.0, + "num_input_tokens_seen": 113891304, + "step": 169000 + }, + { + "epoch": 4.12882026726602, + "grad_norm": 4.990437446394935e-06, + "learning_rate": 1.793318028662817e-07, + "loss": 0.0, + "num_input_tokens_seen": 113894568, + "step": 169005 + }, + { + "epoch": 4.128942418097867, + "grad_norm": 3.3043686471501132e-06, + "learning_rate": 1.7928307780097264e-07, + "loss": 0.0, + "num_input_tokens_seen": 113897768, + "step": 169010 + }, + { + "epoch": 4.1290645689297145, + "grad_norm": 5.002227408112958e-05, + "learning_rate": 1.7923435870413773e-07, + "loss": 0.0, + "num_input_tokens_seen": 113901096, + "step": 169015 + }, + { + "epoch": 4.129186719761561, + "grad_norm": 7.49848986743018e-05, + "learning_rate": 1.7918564557613157e-07, + "loss": 0.0, + "num_input_tokens_seen": 113904424, + "step": 169020 + }, + { + "epoch": 4.129308870593409, + "grad_norm": 3.239734360249713e-05, + "learning_rate": 1.7913693841730792e-07, + "loss": 0.0, + "num_input_tokens_seen": 113907304, + "step": 169025 + }, + { + "epoch": 4.129431021425256, + "grad_norm": 8.012790203792974e-05, + "learning_rate": 1.7908823722802157e-07, + "loss": 0.0, + "num_input_tokens_seen": 113910440, + "step": 169030 + }, + { + "epoch": 4.129553172257103, + "grad_norm": 2.7389121896703728e-05, + "learning_rate": 1.7903954200862602e-07, + "loss": 0.0, + "num_input_tokens_seen": 113913512, + "step": 169035 + }, + { + "epoch": 4.12967532308895, + "grad_norm": 0.0003001104050781578, + "learning_rate": 1.78990852759476e-07, + "loss": 0.0, + "num_input_tokens_seen": 113916456, + "step": 169040 + }, + { + "epoch": 4.129797473920798, + "grad_norm": 2.86813810816966e-05, + "learning_rate": 1.789421694809251e-07, + "loss": 0.0, + "num_input_tokens_seen": 113919656, + "step": 169045 + }, + { + "epoch": 4.129919624752644, + "grad_norm": 7.979344809427857e-05, + "learning_rate": 1.788934921733276e-07, + "loss": 0.0, + "num_input_tokens_seen": 113923112, + "step": 169050 + }, + { + "epoch": 4.130041775584492, + "grad_norm": 0.00016391415556427091, + "learning_rate": 1.7884482083703766e-07, + "loss": 0.0, + "num_input_tokens_seen": 113926568, + "step": 169055 + }, + { + "epoch": 4.130163926416339, + "grad_norm": 0.001250895787961781, + "learning_rate": 1.787961554724091e-07, + "loss": 0.0, + "num_input_tokens_seen": 113929640, + "step": 169060 + }, + { + "epoch": 4.130286077248186, + "grad_norm": 0.0009996866574510932, + "learning_rate": 1.7874749607979556e-07, + "loss": 0.0, + "num_input_tokens_seen": 113933288, + "step": 169065 + }, + { + "epoch": 4.130408228080033, + "grad_norm": 6.69181245029904e-05, + "learning_rate": 1.7869884265955127e-07, + "loss": 0.0, + "num_input_tokens_seen": 113936232, + "step": 169070 + }, + { + "epoch": 4.130530378911881, + "grad_norm": 0.008389724418520927, + "learning_rate": 1.7865019521202973e-07, + "loss": 0.0, + "num_input_tokens_seen": 113939560, + "step": 169075 + }, + { + "epoch": 4.1306525297437275, + "grad_norm": 0.0025512150023132563, + "learning_rate": 1.7860155373758511e-07, + "loss": 0.0, + "num_input_tokens_seen": 113942888, + "step": 169080 + }, + { + "epoch": 4.130774680575575, + "grad_norm": 9.098489681491628e-05, + "learning_rate": 1.785529182365707e-07, + "loss": 0.0, + "num_input_tokens_seen": 113945896, + "step": 169085 + }, + { + "epoch": 4.130896831407422, + "grad_norm": 0.0035372497513890266, + "learning_rate": 1.7850428870934052e-07, + "loss": 0.0, + "num_input_tokens_seen": 113949288, + "step": 169090 + }, + { + "epoch": 4.131018982239269, + "grad_norm": 0.00010230082989437506, + "learning_rate": 1.7845566515624798e-07, + "loss": 0.0, + "num_input_tokens_seen": 113952488, + "step": 169095 + }, + { + "epoch": 4.131141133071116, + "grad_norm": 0.0007599542150273919, + "learning_rate": 1.7840704757764712e-07, + "loss": 0.0, + "num_input_tokens_seen": 113955432, + "step": 169100 + }, + { + "epoch": 4.131263283902963, + "grad_norm": 6.62624233882525e-06, + "learning_rate": 1.7835843597389088e-07, + "loss": 0.0002, + "num_input_tokens_seen": 113959208, + "step": 169105 + }, + { + "epoch": 4.131385434734811, + "grad_norm": 0.010468224994838238, + "learning_rate": 1.783098303453331e-07, + "loss": 0.0, + "num_input_tokens_seen": 113962920, + "step": 169110 + }, + { + "epoch": 4.131507585566657, + "grad_norm": 0.00017902413674164563, + "learning_rate": 1.7826123069232746e-07, + "loss": 0.0, + "num_input_tokens_seen": 113966184, + "step": 169115 + }, + { + "epoch": 4.131629736398505, + "grad_norm": 7.583387923659757e-05, + "learning_rate": 1.7821263701522694e-07, + "loss": 0.0, + "num_input_tokens_seen": 113969320, + "step": 169120 + }, + { + "epoch": 4.131751887230352, + "grad_norm": 0.01053790282458067, + "learning_rate": 1.7816404931438533e-07, + "loss": 0.0, + "num_input_tokens_seen": 113972584, + "step": 169125 + }, + { + "epoch": 4.131874038062199, + "grad_norm": 0.0004985374980606139, + "learning_rate": 1.781154675901556e-07, + "loss": 0.0, + "num_input_tokens_seen": 113975656, + "step": 169130 + }, + { + "epoch": 4.131996188894046, + "grad_norm": 0.0007123488467186689, + "learning_rate": 1.7806689184289136e-07, + "loss": 0.0, + "num_input_tokens_seen": 113979432, + "step": 169135 + }, + { + "epoch": 4.132118339725894, + "grad_norm": 1.9404064005357213e-05, + "learning_rate": 1.7801832207294587e-07, + "loss": 0.0, + "num_input_tokens_seen": 113982760, + "step": 169140 + }, + { + "epoch": 4.132240490557741, + "grad_norm": 4.020150299766101e-06, + "learning_rate": 1.7796975828067206e-07, + "loss": 0.0, + "num_input_tokens_seen": 113986856, + "step": 169145 + }, + { + "epoch": 4.132362641389588, + "grad_norm": 0.00013273171498440206, + "learning_rate": 1.7792120046642344e-07, + "loss": 0.0, + "num_input_tokens_seen": 113990568, + "step": 169150 + }, + { + "epoch": 4.132484792221435, + "grad_norm": 0.0015003958251327276, + "learning_rate": 1.7787264863055273e-07, + "loss": 0.0, + "num_input_tokens_seen": 113994536, + "step": 169155 + }, + { + "epoch": 4.132606943053283, + "grad_norm": 7.169459422584623e-05, + "learning_rate": 1.7782410277341352e-07, + "loss": 0.0, + "num_input_tokens_seen": 113997672, + "step": 169160 + }, + { + "epoch": 4.132729093885129, + "grad_norm": 0.00012075284757884219, + "learning_rate": 1.7777556289535834e-07, + "loss": 0.0, + "num_input_tokens_seen": 114001064, + "step": 169165 + }, + { + "epoch": 4.132851244716976, + "grad_norm": 4.0698819248063955e-06, + "learning_rate": 1.7772702899674063e-07, + "loss": 0.0, + "num_input_tokens_seen": 114004392, + "step": 169170 + }, + { + "epoch": 4.132973395548824, + "grad_norm": 0.000144742734846659, + "learning_rate": 1.7767850107791316e-07, + "loss": 0.0, + "num_input_tokens_seen": 114008168, + "step": 169175 + }, + { + "epoch": 4.1330955463806704, + "grad_norm": 0.0016985450638458133, + "learning_rate": 1.7762997913922862e-07, + "loss": 0.0, + "num_input_tokens_seen": 114011752, + "step": 169180 + }, + { + "epoch": 4.133217697212518, + "grad_norm": 6.355442565109115e-06, + "learning_rate": 1.7758146318104018e-07, + "loss": 0.0, + "num_input_tokens_seen": 114015144, + "step": 169185 + }, + { + "epoch": 4.133339848044365, + "grad_norm": 0.001065228134393692, + "learning_rate": 1.7753295320370043e-07, + "loss": 0.0, + "num_input_tokens_seen": 114018152, + "step": 169190 + }, + { + "epoch": 4.1334619988762125, + "grad_norm": 0.873834490776062, + "learning_rate": 1.7748444920756245e-07, + "loss": 0.0003, + "num_input_tokens_seen": 114021416, + "step": 169195 + }, + { + "epoch": 4.133584149708059, + "grad_norm": 0.00012380085536278784, + "learning_rate": 1.774359511929785e-07, + "loss": 0.0, + "num_input_tokens_seen": 114024616, + "step": 169200 + }, + { + "epoch": 4.133706300539907, + "grad_norm": 0.0001145715796155855, + "learning_rate": 1.773874591603015e-07, + "loss": 0.0, + "num_input_tokens_seen": 114027944, + "step": 169205 + }, + { + "epoch": 4.133828451371754, + "grad_norm": 1.7328757166978903e-05, + "learning_rate": 1.7733897310988456e-07, + "loss": 0.0, + "num_input_tokens_seen": 114031400, + "step": 169210 + }, + { + "epoch": 4.133950602203601, + "grad_norm": 3.638927228166722e-05, + "learning_rate": 1.7729049304207955e-07, + "loss": 0.0, + "num_input_tokens_seen": 114034792, + "step": 169215 + }, + { + "epoch": 4.134072753035448, + "grad_norm": 0.00015678427007514983, + "learning_rate": 1.7724201895723956e-07, + "loss": 0.0, + "num_input_tokens_seen": 114038568, + "step": 169220 + }, + { + "epoch": 4.134194903867296, + "grad_norm": 0.00022379684378392994, + "learning_rate": 1.7719355085571676e-07, + "loss": 0.0, + "num_input_tokens_seen": 114041896, + "step": 169225 + }, + { + "epoch": 4.134317054699142, + "grad_norm": 0.000458498892839998, + "learning_rate": 1.771450887378637e-07, + "loss": 0.0, + "num_input_tokens_seen": 114045224, + "step": 169230 + }, + { + "epoch": 4.13443920553099, + "grad_norm": 5.889030217076652e-05, + "learning_rate": 1.7709663260403307e-07, + "loss": 0.0, + "num_input_tokens_seen": 114048552, + "step": 169235 + }, + { + "epoch": 4.134561356362837, + "grad_norm": 0.000463417760329321, + "learning_rate": 1.7704818245457686e-07, + "loss": 0.0, + "num_input_tokens_seen": 114051816, + "step": 169240 + }, + { + "epoch": 4.134683507194684, + "grad_norm": 0.00025956856552511454, + "learning_rate": 1.7699973828984794e-07, + "loss": 0.0, + "num_input_tokens_seen": 114054952, + "step": 169245 + }, + { + "epoch": 4.134805658026531, + "grad_norm": 0.0002999178832396865, + "learning_rate": 1.76951300110198e-07, + "loss": 0.0, + "num_input_tokens_seen": 114058024, + "step": 169250 + }, + { + "epoch": 4.134927808858379, + "grad_norm": 3.823560109594837e-05, + "learning_rate": 1.7690286791597973e-07, + "loss": 0.0, + "num_input_tokens_seen": 114061352, + "step": 169255 + }, + { + "epoch": 4.1350499596902255, + "grad_norm": 2.4604967620689422e-05, + "learning_rate": 1.7685444170754503e-07, + "loss": 0.0, + "num_input_tokens_seen": 114064552, + "step": 169260 + }, + { + "epoch": 4.135172110522072, + "grad_norm": 1.1779735359596089e-05, + "learning_rate": 1.7680602148524649e-07, + "loss": 0.0, + "num_input_tokens_seen": 114067624, + "step": 169265 + }, + { + "epoch": 4.13529426135392, + "grad_norm": 0.0051346332766115665, + "learning_rate": 1.7675760724943567e-07, + "loss": 0.0, + "num_input_tokens_seen": 114071144, + "step": 169270 + }, + { + "epoch": 4.135416412185767, + "grad_norm": 0.0003757499507628381, + "learning_rate": 1.767091990004652e-07, + "loss": 0.0, + "num_input_tokens_seen": 114074472, + "step": 169275 + }, + { + "epoch": 4.135538563017614, + "grad_norm": 9.444287570659071e-06, + "learning_rate": 1.7666079673868695e-07, + "loss": 0.0, + "num_input_tokens_seen": 114078952, + "step": 169280 + }, + { + "epoch": 4.135660713849461, + "grad_norm": 7.637072849320248e-05, + "learning_rate": 1.7661240046445259e-07, + "loss": 0.0, + "num_input_tokens_seen": 114082536, + "step": 169285 + }, + { + "epoch": 4.135782864681309, + "grad_norm": 0.002652720781043172, + "learning_rate": 1.7656401017811451e-07, + "loss": 0.0, + "num_input_tokens_seen": 114085992, + "step": 169290 + }, + { + "epoch": 4.135905015513155, + "grad_norm": 5.934476575930603e-05, + "learning_rate": 1.7651562588002412e-07, + "loss": 0.0, + "num_input_tokens_seen": 114089192, + "step": 169295 + }, + { + "epoch": 4.136027166345003, + "grad_norm": 2.6133950086659752e-05, + "learning_rate": 1.7646724757053366e-07, + "loss": 0.0, + "num_input_tokens_seen": 114092456, + "step": 169300 + }, + { + "epoch": 4.13614931717685, + "grad_norm": 0.00019350307411514223, + "learning_rate": 1.7641887524999511e-07, + "loss": 0.0, + "num_input_tokens_seen": 114095592, + "step": 169305 + }, + { + "epoch": 4.136271468008697, + "grad_norm": 0.0005644158809445798, + "learning_rate": 1.7637050891875983e-07, + "loss": 0.0, + "num_input_tokens_seen": 114099048, + "step": 169310 + }, + { + "epoch": 4.136393618840544, + "grad_norm": 0.00013100756041239947, + "learning_rate": 1.7632214857717997e-07, + "loss": 0.0, + "num_input_tokens_seen": 114102248, + "step": 169315 + }, + { + "epoch": 4.136515769672392, + "grad_norm": 9.739145752973855e-05, + "learning_rate": 1.7627379422560672e-07, + "loss": 0.0, + "num_input_tokens_seen": 114105960, + "step": 169320 + }, + { + "epoch": 4.1366379205042385, + "grad_norm": 1.3738956113229506e-05, + "learning_rate": 1.762254458643919e-07, + "loss": 0.0, + "num_input_tokens_seen": 114109352, + "step": 169325 + }, + { + "epoch": 4.136760071336086, + "grad_norm": 0.00037023035110905766, + "learning_rate": 1.7617710349388758e-07, + "loss": 0.0005, + "num_input_tokens_seen": 114113000, + "step": 169330 + }, + { + "epoch": 4.136882222167933, + "grad_norm": 9.684554242994636e-05, + "learning_rate": 1.761287671144447e-07, + "loss": 0.0, + "num_input_tokens_seen": 114116072, + "step": 169335 + }, + { + "epoch": 4.1370043729997805, + "grad_norm": 1.5194071238511242e-05, + "learning_rate": 1.7608043672641516e-07, + "loss": 0.0, + "num_input_tokens_seen": 114119272, + "step": 169340 + }, + { + "epoch": 4.137126523831627, + "grad_norm": 6.645849498454481e-05, + "learning_rate": 1.7603211233015013e-07, + "loss": 0.0, + "num_input_tokens_seen": 114122664, + "step": 169345 + }, + { + "epoch": 4.137248674663475, + "grad_norm": 0.0013586204731836915, + "learning_rate": 1.7598379392600137e-07, + "loss": 0.0, + "num_input_tokens_seen": 114126120, + "step": 169350 + }, + { + "epoch": 4.137370825495322, + "grad_norm": 5.228986992733553e-05, + "learning_rate": 1.759354815143199e-07, + "loss": 0.0, + "num_input_tokens_seen": 114129448, + "step": 169355 + }, + { + "epoch": 4.137492976327168, + "grad_norm": 0.0018259503412991762, + "learning_rate": 1.7588717509545738e-07, + "loss": 0.0, + "num_input_tokens_seen": 114132584, + "step": 169360 + }, + { + "epoch": 4.137615127159016, + "grad_norm": 0.0023885120172053576, + "learning_rate": 1.75838874669765e-07, + "loss": 0.0, + "num_input_tokens_seen": 114135912, + "step": 169365 + }, + { + "epoch": 4.137737277990863, + "grad_norm": 0.00011633247777353972, + "learning_rate": 1.757905802375942e-07, + "loss": 0.0, + "num_input_tokens_seen": 114139304, + "step": 169370 + }, + { + "epoch": 4.13785942882271, + "grad_norm": 0.00010378471779404208, + "learning_rate": 1.7574229179929556e-07, + "loss": 0.0, + "num_input_tokens_seen": 114142504, + "step": 169375 + }, + { + "epoch": 4.137981579654557, + "grad_norm": 0.0009791553020477295, + "learning_rate": 1.7569400935522105e-07, + "loss": 0.0, + "num_input_tokens_seen": 114146344, + "step": 169380 + }, + { + "epoch": 4.138103730486405, + "grad_norm": 0.0005662897019647062, + "learning_rate": 1.7564573290572115e-07, + "loss": 0.0, + "num_input_tokens_seen": 114149800, + "step": 169385 + }, + { + "epoch": 4.1382258813182515, + "grad_norm": 1.4119503248366527e-05, + "learning_rate": 1.7559746245114737e-07, + "loss": 0.0, + "num_input_tokens_seen": 114152872, + "step": 169390 + }, + { + "epoch": 4.138348032150099, + "grad_norm": 0.00040378831909038126, + "learning_rate": 1.7554919799185041e-07, + "loss": 0.0, + "num_input_tokens_seen": 114156136, + "step": 169395 + }, + { + "epoch": 4.138470182981946, + "grad_norm": 0.00027075386606156826, + "learning_rate": 1.7550093952818168e-07, + "loss": 0.0, + "num_input_tokens_seen": 114159272, + "step": 169400 + }, + { + "epoch": 4.1385923338137935, + "grad_norm": 0.00012230666470713913, + "learning_rate": 1.7545268706049155e-07, + "loss": 0.0, + "num_input_tokens_seen": 114162920, + "step": 169405 + }, + { + "epoch": 4.13871448464564, + "grad_norm": 1.6268022591248155e-05, + "learning_rate": 1.7540444058913162e-07, + "loss": 0.0, + "num_input_tokens_seen": 114166248, + "step": 169410 + }, + { + "epoch": 4.138836635477488, + "grad_norm": 0.0005964138545095921, + "learning_rate": 1.7535620011445208e-07, + "loss": 0.0419, + "num_input_tokens_seen": 114169640, + "step": 169415 + }, + { + "epoch": 4.138958786309335, + "grad_norm": 0.00023118879471439868, + "learning_rate": 1.7530796563680406e-07, + "loss": 0.0, + "num_input_tokens_seen": 114173224, + "step": 169420 + }, + { + "epoch": 4.139080937141182, + "grad_norm": 0.0001617353700567037, + "learning_rate": 1.752597371565385e-07, + "loss": 0.0, + "num_input_tokens_seen": 114176424, + "step": 169425 + }, + { + "epoch": 4.139203087973029, + "grad_norm": 0.0001720954751363024, + "learning_rate": 1.7521151467400585e-07, + "loss": 0.0, + "num_input_tokens_seen": 114179880, + "step": 169430 + }, + { + "epoch": 4.139325238804877, + "grad_norm": 0.00021559244487434626, + "learning_rate": 1.7516329818955712e-07, + "loss": 0.0, + "num_input_tokens_seen": 114183016, + "step": 169435 + }, + { + "epoch": 4.139447389636723, + "grad_norm": 4.029813680972438e-06, + "learning_rate": 1.7511508770354243e-07, + "loss": 0.0, + "num_input_tokens_seen": 114186152, + "step": 169440 + }, + { + "epoch": 4.13956954046857, + "grad_norm": 0.00014288983948063105, + "learning_rate": 1.75066883216313e-07, + "loss": 0.0, + "num_input_tokens_seen": 114189544, + "step": 169445 + }, + { + "epoch": 4.139691691300418, + "grad_norm": 6.273949838941917e-05, + "learning_rate": 1.750186847282188e-07, + "loss": 0.0, + "num_input_tokens_seen": 114193000, + "step": 169450 + }, + { + "epoch": 4.1398138421322646, + "grad_norm": 7.3529131441318896e-06, + "learning_rate": 1.7497049223961058e-07, + "loss": 0.0, + "num_input_tokens_seen": 114196648, + "step": 169455 + }, + { + "epoch": 4.139935992964112, + "grad_norm": 0.007190991658717394, + "learning_rate": 1.749223057508391e-07, + "loss": 0.0, + "num_input_tokens_seen": 114200104, + "step": 169460 + }, + { + "epoch": 4.140058143795959, + "grad_norm": 0.004899477120488882, + "learning_rate": 1.748741252622543e-07, + "loss": 0.0, + "num_input_tokens_seen": 114204008, + "step": 169465 + }, + { + "epoch": 4.140180294627807, + "grad_norm": 4.425868246471509e-05, + "learning_rate": 1.7482595077420713e-07, + "loss": 0.0, + "num_input_tokens_seen": 114206888, + "step": 169470 + }, + { + "epoch": 4.140302445459653, + "grad_norm": 0.0001238979893969372, + "learning_rate": 1.7477778228704732e-07, + "loss": 0.0, + "num_input_tokens_seen": 114210408, + "step": 169475 + }, + { + "epoch": 4.140424596291501, + "grad_norm": 0.0001803866762202233, + "learning_rate": 1.7472961980112556e-07, + "loss": 0.0, + "num_input_tokens_seen": 114213416, + "step": 169480 + }, + { + "epoch": 4.140546747123348, + "grad_norm": 5.6156062782974914e-05, + "learning_rate": 1.746814633167921e-07, + "loss": 0.0, + "num_input_tokens_seen": 114217000, + "step": 169485 + }, + { + "epoch": 4.140668897955195, + "grad_norm": 0.0022617189679294825, + "learning_rate": 1.7463331283439664e-07, + "loss": 0.0, + "num_input_tokens_seen": 114220072, + "step": 169490 + }, + { + "epoch": 4.140791048787042, + "grad_norm": 5.546949978452176e-05, + "learning_rate": 1.7458516835429016e-07, + "loss": 0.0, + "num_input_tokens_seen": 114223592, + "step": 169495 + }, + { + "epoch": 4.14091319961889, + "grad_norm": 0.00012014965614071116, + "learning_rate": 1.7453702987682195e-07, + "loss": 0.0, + "num_input_tokens_seen": 114227304, + "step": 169500 + }, + { + "epoch": 4.1410353504507365, + "grad_norm": 0.0006364333676174283, + "learning_rate": 1.7448889740234273e-07, + "loss": 0.0, + "num_input_tokens_seen": 114231208, + "step": 169505 + }, + { + "epoch": 4.141157501282584, + "grad_norm": 0.0007857916643843055, + "learning_rate": 1.7444077093120214e-07, + "loss": 0.0, + "num_input_tokens_seen": 114234408, + "step": 169510 + }, + { + "epoch": 4.141279652114431, + "grad_norm": 0.00018345196440350264, + "learning_rate": 1.743926504637503e-07, + "loss": 0.0, + "num_input_tokens_seen": 114237736, + "step": 169515 + }, + { + "epoch": 4.1414018029462785, + "grad_norm": 0.0016277572140097618, + "learning_rate": 1.7434453600033728e-07, + "loss": 0.0, + "num_input_tokens_seen": 114241128, + "step": 169520 + }, + { + "epoch": 4.141523953778125, + "grad_norm": 1.3486666830431204e-05, + "learning_rate": 1.742964275413128e-07, + "loss": 0.0, + "num_input_tokens_seen": 114244584, + "step": 169525 + }, + { + "epoch": 4.141646104609972, + "grad_norm": 1.944943687703926e-05, + "learning_rate": 1.7424832508702692e-07, + "loss": 0.0, + "num_input_tokens_seen": 114247912, + "step": 169530 + }, + { + "epoch": 4.14176825544182, + "grad_norm": 0.0033633532002568245, + "learning_rate": 1.74200228637829e-07, + "loss": 0.0, + "num_input_tokens_seen": 114250984, + "step": 169535 + }, + { + "epoch": 4.141890406273666, + "grad_norm": 0.0007912274450063705, + "learning_rate": 1.7415213819406926e-07, + "loss": 0.0, + "num_input_tokens_seen": 114254248, + "step": 169540 + }, + { + "epoch": 4.142012557105514, + "grad_norm": 0.00033459014957770705, + "learning_rate": 1.741040537560976e-07, + "loss": 0.0001, + "num_input_tokens_seen": 114257384, + "step": 169545 + }, + { + "epoch": 4.142134707937361, + "grad_norm": 4.042280852445401e-05, + "learning_rate": 1.7405597532426297e-07, + "loss": 0.0, + "num_input_tokens_seen": 114260392, + "step": 169550 + }, + { + "epoch": 4.142256858769208, + "grad_norm": 0.0008538342663086951, + "learning_rate": 1.7400790289891588e-07, + "loss": 0.0, + "num_input_tokens_seen": 114263848, + "step": 169555 + }, + { + "epoch": 4.142379009601055, + "grad_norm": 1.0966689842462074e-05, + "learning_rate": 1.7395983648040513e-07, + "loss": 0.0, + "num_input_tokens_seen": 114267432, + "step": 169560 + }, + { + "epoch": 4.142501160432903, + "grad_norm": 0.0021321589592844248, + "learning_rate": 1.7391177606908081e-07, + "loss": 0.0, + "num_input_tokens_seen": 114270824, + "step": 169565 + }, + { + "epoch": 4.1426233112647495, + "grad_norm": 9.802708518691361e-06, + "learning_rate": 1.7386372166529218e-07, + "loss": 0.0, + "num_input_tokens_seen": 114274280, + "step": 169570 + }, + { + "epoch": 4.142745462096597, + "grad_norm": 8.670361239637714e-06, + "learning_rate": 1.7381567326938883e-07, + "loss": 0.0, + "num_input_tokens_seen": 114277608, + "step": 169575 + }, + { + "epoch": 4.142867612928444, + "grad_norm": 7.37422305974178e-05, + "learning_rate": 1.7376763088171998e-07, + "loss": 0.0, + "num_input_tokens_seen": 114281064, + "step": 169580 + }, + { + "epoch": 4.1429897637602915, + "grad_norm": 0.0022815996780991554, + "learning_rate": 1.737195945026354e-07, + "loss": 0.0, + "num_input_tokens_seen": 114284200, + "step": 169585 + }, + { + "epoch": 4.143111914592138, + "grad_norm": 0.0026343739591538906, + "learning_rate": 1.7367156413248408e-07, + "loss": 0.0, + "num_input_tokens_seen": 114287400, + "step": 169590 + }, + { + "epoch": 4.143234065423986, + "grad_norm": 9.867559128906578e-06, + "learning_rate": 1.7362353977161527e-07, + "loss": 0.0, + "num_input_tokens_seen": 114291112, + "step": 169595 + }, + { + "epoch": 4.143356216255833, + "grad_norm": 0.002449099440127611, + "learning_rate": 1.7357552142037856e-07, + "loss": 0.0, + "num_input_tokens_seen": 114295144, + "step": 169600 + }, + { + "epoch": 4.14347836708768, + "grad_norm": 8.740804332774132e-05, + "learning_rate": 1.735275090791226e-07, + "loss": 0.0, + "num_input_tokens_seen": 114298856, + "step": 169605 + }, + { + "epoch": 4.143600517919527, + "grad_norm": 8.52627053973265e-05, + "learning_rate": 1.73479502748197e-07, + "loss": 0.0, + "num_input_tokens_seen": 114301992, + "step": 169610 + }, + { + "epoch": 4.143722668751375, + "grad_norm": 1.3284211490827147e-05, + "learning_rate": 1.7343150242795102e-07, + "loss": 0.0, + "num_input_tokens_seen": 114305064, + "step": 169615 + }, + { + "epoch": 4.143844819583221, + "grad_norm": 0.0002733416040427983, + "learning_rate": 1.7338350811873314e-07, + "loss": 0.0, + "num_input_tokens_seen": 114308200, + "step": 169620 + }, + { + "epoch": 4.143966970415068, + "grad_norm": 0.0004196310183033347, + "learning_rate": 1.73335519820893e-07, + "loss": 0.0, + "num_input_tokens_seen": 114310952, + "step": 169625 + }, + { + "epoch": 4.144089121246916, + "grad_norm": 1.7216229025507346e-05, + "learning_rate": 1.732875375347791e-07, + "loss": 0.0, + "num_input_tokens_seen": 114314408, + "step": 169630 + }, + { + "epoch": 4.1442112720787625, + "grad_norm": 0.00020091682381462306, + "learning_rate": 1.7323956126074057e-07, + "loss": 0.0, + "num_input_tokens_seen": 114317928, + "step": 169635 + }, + { + "epoch": 4.14433342291061, + "grad_norm": 5.992214209982194e-05, + "learning_rate": 1.731915909991265e-07, + "loss": 0.0, + "num_input_tokens_seen": 114320808, + "step": 169640 + }, + { + "epoch": 4.144455573742457, + "grad_norm": 0.0003436962724663317, + "learning_rate": 1.7314362675028537e-07, + "loss": 0.0, + "num_input_tokens_seen": 114324584, + "step": 169645 + }, + { + "epoch": 4.1445777245743045, + "grad_norm": 0.00014694590936414897, + "learning_rate": 1.7309566851456647e-07, + "loss": 0.0, + "num_input_tokens_seen": 114328296, + "step": 169650 + }, + { + "epoch": 4.144699875406151, + "grad_norm": 0.05490154027938843, + "learning_rate": 1.7304771629231796e-07, + "loss": 0.0, + "num_input_tokens_seen": 114331624, + "step": 169655 + }, + { + "epoch": 4.144822026237999, + "grad_norm": 0.0006583016365766525, + "learning_rate": 1.7299977008388923e-07, + "loss": 0.0, + "num_input_tokens_seen": 114334824, + "step": 169660 + }, + { + "epoch": 4.144944177069846, + "grad_norm": 0.00024803922860883176, + "learning_rate": 1.729518298896282e-07, + "loss": 0.0, + "num_input_tokens_seen": 114337768, + "step": 169665 + }, + { + "epoch": 4.145066327901693, + "grad_norm": 1.9043955035158433e-05, + "learning_rate": 1.7290389570988406e-07, + "loss": 0.0, + "num_input_tokens_seen": 114340712, + "step": 169670 + }, + { + "epoch": 4.14518847873354, + "grad_norm": 0.0005053699715062976, + "learning_rate": 1.728559675450054e-07, + "loss": 0.0, + "num_input_tokens_seen": 114344232, + "step": 169675 + }, + { + "epoch": 4.145310629565388, + "grad_norm": 0.00019391553360037506, + "learning_rate": 1.7280804539534066e-07, + "loss": 0.0, + "num_input_tokens_seen": 114347752, + "step": 169680 + }, + { + "epoch": 4.145432780397234, + "grad_norm": 0.01149059645831585, + "learning_rate": 1.7276012926123807e-07, + "loss": 0.0, + "num_input_tokens_seen": 114351144, + "step": 169685 + }, + { + "epoch": 4.145554931229082, + "grad_norm": 3.013821697095409e-06, + "learning_rate": 1.7271221914304657e-07, + "loss": 0.0, + "num_input_tokens_seen": 114354408, + "step": 169690 + }, + { + "epoch": 4.145677082060929, + "grad_norm": 0.00018444034503772855, + "learning_rate": 1.7266431504111413e-07, + "loss": 0.0, + "num_input_tokens_seen": 114357544, + "step": 169695 + }, + { + "epoch": 4.145799232892776, + "grad_norm": 6.898889751028037e-06, + "learning_rate": 1.7261641695578943e-07, + "loss": 0.0, + "num_input_tokens_seen": 114360872, + "step": 169700 + }, + { + "epoch": 4.145921383724623, + "grad_norm": 5.794294338556938e-06, + "learning_rate": 1.7256852488742057e-07, + "loss": 0.0, + "num_input_tokens_seen": 114363944, + "step": 169705 + }, + { + "epoch": 4.14604353455647, + "grad_norm": 0.00010420309990877286, + "learning_rate": 1.7252063883635604e-07, + "loss": 0.0, + "num_input_tokens_seen": 114367080, + "step": 169710 + }, + { + "epoch": 4.1461656853883175, + "grad_norm": 0.00022488638933282346, + "learning_rate": 1.7247275880294388e-07, + "loss": 0.0, + "num_input_tokens_seen": 114370152, + "step": 169715 + }, + { + "epoch": 4.146287836220164, + "grad_norm": 0.000128116414998658, + "learning_rate": 1.7242488478753258e-07, + "loss": 0.0, + "num_input_tokens_seen": 114373928, + "step": 169720 + }, + { + "epoch": 4.146409987052012, + "grad_norm": 0.0007236965466290712, + "learning_rate": 1.723770167904699e-07, + "loss": 0.0, + "num_input_tokens_seen": 114377000, + "step": 169725 + }, + { + "epoch": 4.146532137883859, + "grad_norm": 4.138689109822735e-05, + "learning_rate": 1.723291548121042e-07, + "loss": 0.0, + "num_input_tokens_seen": 114380584, + "step": 169730 + }, + { + "epoch": 4.146654288715706, + "grad_norm": 2.3288272132049315e-05, + "learning_rate": 1.7228129885278364e-07, + "loss": 0.0, + "num_input_tokens_seen": 114383976, + "step": 169735 + }, + { + "epoch": 4.146776439547553, + "grad_norm": 0.00036748574348166585, + "learning_rate": 1.7223344891285584e-07, + "loss": 0.0, + "num_input_tokens_seen": 114387112, + "step": 169740 + }, + { + "epoch": 4.146898590379401, + "grad_norm": 0.0008554519154131413, + "learning_rate": 1.7218560499266943e-07, + "loss": 0.0, + "num_input_tokens_seen": 114390248, + "step": 169745 + }, + { + "epoch": 4.147020741211247, + "grad_norm": 0.0006384583539329469, + "learning_rate": 1.7213776709257165e-07, + "loss": 0.0, + "num_input_tokens_seen": 114393704, + "step": 169750 + }, + { + "epoch": 4.147142892043095, + "grad_norm": 4.4409707697923295e-06, + "learning_rate": 1.7208993521291092e-07, + "loss": 0.0, + "num_input_tokens_seen": 114397224, + "step": 169755 + }, + { + "epoch": 4.147265042874942, + "grad_norm": 3.955944976041792e-06, + "learning_rate": 1.7204210935403462e-07, + "loss": 0.0, + "num_input_tokens_seen": 114400168, + "step": 169760 + }, + { + "epoch": 4.147387193706789, + "grad_norm": 2.9546312362072058e-05, + "learning_rate": 1.7199428951629082e-07, + "loss": 0.0, + "num_input_tokens_seen": 114403240, + "step": 169765 + }, + { + "epoch": 4.147509344538636, + "grad_norm": 0.00019152191816829145, + "learning_rate": 1.7194647570002741e-07, + "loss": 0.0, + "num_input_tokens_seen": 114406120, + "step": 169770 + }, + { + "epoch": 4.147631495370484, + "grad_norm": 0.0013317177072167397, + "learning_rate": 1.718986679055918e-07, + "loss": 0.0, + "num_input_tokens_seen": 114409512, + "step": 169775 + }, + { + "epoch": 4.147753646202331, + "grad_norm": 0.00021308651776053011, + "learning_rate": 1.71850866133332e-07, + "loss": 0.0, + "num_input_tokens_seen": 114412968, + "step": 169780 + }, + { + "epoch": 4.147875797034178, + "grad_norm": 3.8212063373066485e-05, + "learning_rate": 1.718030703835952e-07, + "loss": 0.0, + "num_input_tokens_seen": 114415784, + "step": 169785 + }, + { + "epoch": 4.147997947866025, + "grad_norm": 7.068268314469606e-05, + "learning_rate": 1.717552806567295e-07, + "loss": 0.0, + "num_input_tokens_seen": 114418920, + "step": 169790 + }, + { + "epoch": 4.148120098697872, + "grad_norm": 1.8192162315244786e-05, + "learning_rate": 1.7170749695308228e-07, + "loss": 0.0, + "num_input_tokens_seen": 114421992, + "step": 169795 + }, + { + "epoch": 4.148242249529719, + "grad_norm": 0.0006818032707087696, + "learning_rate": 1.716597192730005e-07, + "loss": 0.0, + "num_input_tokens_seen": 114425192, + "step": 169800 + }, + { + "epoch": 4.148364400361566, + "grad_norm": 0.000235591855016537, + "learning_rate": 1.716119476168324e-07, + "loss": 0.0, + "num_input_tokens_seen": 114428520, + "step": 169805 + }, + { + "epoch": 4.148486551193414, + "grad_norm": 6.817103439971106e-06, + "learning_rate": 1.7156418198492473e-07, + "loss": 0.0, + "num_input_tokens_seen": 114432296, + "step": 169810 + }, + { + "epoch": 4.14860870202526, + "grad_norm": 0.0003154922742396593, + "learning_rate": 1.7151642237762543e-07, + "loss": 0.0, + "num_input_tokens_seen": 114435880, + "step": 169815 + }, + { + "epoch": 4.148730852857108, + "grad_norm": 0.002449472900480032, + "learning_rate": 1.7146866879528122e-07, + "loss": 0.0, + "num_input_tokens_seen": 114439208, + "step": 169820 + }, + { + "epoch": 4.148853003688955, + "grad_norm": 0.00023228446661960334, + "learning_rate": 1.714209212382398e-07, + "loss": 0.0, + "num_input_tokens_seen": 114442984, + "step": 169825 + }, + { + "epoch": 4.1489751545208025, + "grad_norm": 1.0799426490848418e-05, + "learning_rate": 1.7137317970684851e-07, + "loss": 0.0, + "num_input_tokens_seen": 114445992, + "step": 169830 + }, + { + "epoch": 4.149097305352649, + "grad_norm": 0.005137881729751825, + "learning_rate": 1.71325444201454e-07, + "loss": 0.0, + "num_input_tokens_seen": 114449192, + "step": 169835 + }, + { + "epoch": 4.149219456184497, + "grad_norm": 0.00022352815722115338, + "learning_rate": 1.7127771472240404e-07, + "loss": 0.0, + "num_input_tokens_seen": 114452392, + "step": 169840 + }, + { + "epoch": 4.149341607016344, + "grad_norm": 0.00036647875094786286, + "learning_rate": 1.7122999127004522e-07, + "loss": 0.0, + "num_input_tokens_seen": 114456104, + "step": 169845 + }, + { + "epoch": 4.149463757848191, + "grad_norm": 6.18748672422953e-05, + "learning_rate": 1.7118227384472482e-07, + "loss": 0.0, + "num_input_tokens_seen": 114459112, + "step": 169850 + }, + { + "epoch": 4.149585908680038, + "grad_norm": 0.0069971526972949505, + "learning_rate": 1.7113456244679014e-07, + "loss": 0.0, + "num_input_tokens_seen": 114463016, + "step": 169855 + }, + { + "epoch": 4.149708059511886, + "grad_norm": 0.00015763036208227277, + "learning_rate": 1.7108685707658754e-07, + "loss": 0.0716, + "num_input_tokens_seen": 114466088, + "step": 169860 + }, + { + "epoch": 4.149830210343732, + "grad_norm": 0.00014392206503544003, + "learning_rate": 1.7103915773446453e-07, + "loss": 0.0, + "num_input_tokens_seen": 114469416, + "step": 169865 + }, + { + "epoch": 4.14995236117558, + "grad_norm": 2.112441507051699e-05, + "learning_rate": 1.709914644207675e-07, + "loss": 0.0, + "num_input_tokens_seen": 114472552, + "step": 169870 + }, + { + "epoch": 4.150074512007427, + "grad_norm": 0.00011392030137358233, + "learning_rate": 1.7094377713584374e-07, + "loss": 0.0, + "num_input_tokens_seen": 114475560, + "step": 169875 + }, + { + "epoch": 4.150196662839274, + "grad_norm": 1.2522552424343303e-05, + "learning_rate": 1.7089609588003962e-07, + "loss": 0.0, + "num_input_tokens_seen": 114478888, + "step": 169880 + }, + { + "epoch": 4.150318813671121, + "grad_norm": 0.00044914192403666675, + "learning_rate": 1.7084842065370232e-07, + "loss": 0.0, + "num_input_tokens_seen": 114482088, + "step": 169885 + }, + { + "epoch": 4.150440964502968, + "grad_norm": 0.001731325639411807, + "learning_rate": 1.7080075145717798e-07, + "loss": 0.0, + "num_input_tokens_seen": 114485992, + "step": 169890 + }, + { + "epoch": 4.1505631153348155, + "grad_norm": 1.6076908650575206e-05, + "learning_rate": 1.707530882908139e-07, + "loss": 0.0, + "num_input_tokens_seen": 114488936, + "step": 169895 + }, + { + "epoch": 4.150685266166662, + "grad_norm": 0.4748183786869049, + "learning_rate": 1.707054311549565e-07, + "loss": 0.0001, + "num_input_tokens_seen": 114492584, + "step": 169900 + }, + { + "epoch": 4.15080741699851, + "grad_norm": 0.0008500635158270597, + "learning_rate": 1.706577800499519e-07, + "loss": 0.0, + "num_input_tokens_seen": 114495912, + "step": 169905 + }, + { + "epoch": 4.150929567830357, + "grad_norm": 3.9711067074676976e-05, + "learning_rate": 1.706101349761473e-07, + "loss": 0.0, + "num_input_tokens_seen": 114499432, + "step": 169910 + }, + { + "epoch": 4.151051718662204, + "grad_norm": 0.00018771788745652884, + "learning_rate": 1.7056249593388862e-07, + "loss": 0.0, + "num_input_tokens_seen": 114502248, + "step": 169915 + }, + { + "epoch": 4.151173869494051, + "grad_norm": 0.002624510321766138, + "learning_rate": 1.7051486292352258e-07, + "loss": 0.0, + "num_input_tokens_seen": 114505448, + "step": 169920 + }, + { + "epoch": 4.151296020325899, + "grad_norm": 3.772515265154652e-05, + "learning_rate": 1.704672359453958e-07, + "loss": 0.0, + "num_input_tokens_seen": 114508520, + "step": 169925 + }, + { + "epoch": 4.151418171157745, + "grad_norm": 0.000177039866684936, + "learning_rate": 1.7041961499985414e-07, + "loss": 0.0, + "num_input_tokens_seen": 114511656, + "step": 169930 + }, + { + "epoch": 4.151540321989593, + "grad_norm": 0.00017326019587926567, + "learning_rate": 1.703720000872444e-07, + "loss": 0.0, + "num_input_tokens_seen": 114515560, + "step": 169935 + }, + { + "epoch": 4.15166247282144, + "grad_norm": 0.004151183646172285, + "learning_rate": 1.703243912079123e-07, + "loss": 0.0, + "num_input_tokens_seen": 114518760, + "step": 169940 + }, + { + "epoch": 4.151784623653287, + "grad_norm": 3.284201739006676e-05, + "learning_rate": 1.702767883622045e-07, + "loss": 0.0, + "num_input_tokens_seen": 114522472, + "step": 169945 + }, + { + "epoch": 4.151906774485134, + "grad_norm": 1.2182783393654972e-05, + "learning_rate": 1.7022919155046722e-07, + "loss": 0.0, + "num_input_tokens_seen": 114526184, + "step": 169950 + }, + { + "epoch": 4.152028925316982, + "grad_norm": 1.4844258657831233e-05, + "learning_rate": 1.7018160077304633e-07, + "loss": 0.0, + "num_input_tokens_seen": 114529576, + "step": 169955 + }, + { + "epoch": 4.1521510761488285, + "grad_norm": 5.435540060716448e-06, + "learning_rate": 1.7013401603028822e-07, + "loss": 0.0, + "num_input_tokens_seen": 114532584, + "step": 169960 + }, + { + "epoch": 4.152273226980676, + "grad_norm": 0.0001301129232160747, + "learning_rate": 1.7008643732253848e-07, + "loss": 0.0, + "num_input_tokens_seen": 114535592, + "step": 169965 + }, + { + "epoch": 4.152395377812523, + "grad_norm": 0.00018831691704690456, + "learning_rate": 1.7003886465014362e-07, + "loss": 0.0, + "num_input_tokens_seen": 114538728, + "step": 169970 + }, + { + "epoch": 4.1525175286443705, + "grad_norm": 0.0016461594495922327, + "learning_rate": 1.6999129801344914e-07, + "loss": 0.0, + "num_input_tokens_seen": 114541992, + "step": 169975 + }, + { + "epoch": 4.152639679476217, + "grad_norm": 0.00039509753696620464, + "learning_rate": 1.699437374128011e-07, + "loss": 0.0, + "num_input_tokens_seen": 114545768, + "step": 169980 + }, + { + "epoch": 4.152761830308064, + "grad_norm": 0.032739847898483276, + "learning_rate": 1.698961828485458e-07, + "loss": 0.0001, + "num_input_tokens_seen": 114549096, + "step": 169985 + }, + { + "epoch": 4.152883981139912, + "grad_norm": 0.0007005423540249467, + "learning_rate": 1.698486343210288e-07, + "loss": 0.0, + "num_input_tokens_seen": 114552424, + "step": 169990 + }, + { + "epoch": 4.153006131971758, + "grad_norm": 0.0006590968114323914, + "learning_rate": 1.6980109183059544e-07, + "loss": 0.0, + "num_input_tokens_seen": 114555304, + "step": 169995 + }, + { + "epoch": 4.153128282803606, + "grad_norm": 8.95965495146811e-05, + "learning_rate": 1.6975355537759217e-07, + "loss": 0.0, + "num_input_tokens_seen": 114558440, + "step": 170000 + }, + { + "epoch": 4.153250433635453, + "grad_norm": 3.4148648410337046e-05, + "learning_rate": 1.6970602496236409e-07, + "loss": 0.0, + "num_input_tokens_seen": 114562088, + "step": 170005 + }, + { + "epoch": 4.1533725844673, + "grad_norm": 3.2041094527812675e-05, + "learning_rate": 1.6965850058525732e-07, + "loss": 0.0, + "num_input_tokens_seen": 114565160, + "step": 170010 + }, + { + "epoch": 4.153494735299147, + "grad_norm": 0.0001849216059781611, + "learning_rate": 1.6961098224661707e-07, + "loss": 0.0, + "num_input_tokens_seen": 114568232, + "step": 170015 + }, + { + "epoch": 4.153616886130995, + "grad_norm": 9.298501936427783e-06, + "learning_rate": 1.6956346994678926e-07, + "loss": 0.0, + "num_input_tokens_seen": 114572072, + "step": 170020 + }, + { + "epoch": 4.1537390369628415, + "grad_norm": 0.00011811690637841821, + "learning_rate": 1.695159636861191e-07, + "loss": 0.0, + "num_input_tokens_seen": 114575208, + "step": 170025 + }, + { + "epoch": 4.153861187794689, + "grad_norm": 0.0005981141002848744, + "learning_rate": 1.6946846346495248e-07, + "loss": 0.0, + "num_input_tokens_seen": 114578728, + "step": 170030 + }, + { + "epoch": 4.153983338626536, + "grad_norm": 8.729274122742936e-05, + "learning_rate": 1.6942096928363426e-07, + "loss": 0.0, + "num_input_tokens_seen": 114582312, + "step": 170035 + }, + { + "epoch": 4.1541054894583835, + "grad_norm": 2.739803858275991e-05, + "learning_rate": 1.6937348114251026e-07, + "loss": 0.0, + "num_input_tokens_seen": 114585768, + "step": 170040 + }, + { + "epoch": 4.15422764029023, + "grad_norm": 0.00011632242240011692, + "learning_rate": 1.693259990419259e-07, + "loss": 0.0, + "num_input_tokens_seen": 114589224, + "step": 170045 + }, + { + "epoch": 4.154349791122078, + "grad_norm": 0.000368999462807551, + "learning_rate": 1.69278522982226e-07, + "loss": 0.0, + "num_input_tokens_seen": 114592616, + "step": 170050 + }, + { + "epoch": 4.154471941953925, + "grad_norm": 0.00010005565854953602, + "learning_rate": 1.6923105296375638e-07, + "loss": 0.0, + "num_input_tokens_seen": 114596136, + "step": 170055 + }, + { + "epoch": 4.154594092785772, + "grad_norm": 0.00019996245100628585, + "learning_rate": 1.691835889868618e-07, + "loss": 0.0, + "num_input_tokens_seen": 114600488, + "step": 170060 + }, + { + "epoch": 4.154716243617619, + "grad_norm": 7.243484287755564e-05, + "learning_rate": 1.6913613105188785e-07, + "loss": 0.0, + "num_input_tokens_seen": 114603816, + "step": 170065 + }, + { + "epoch": 4.154838394449466, + "grad_norm": 1.4196218216966372e-05, + "learning_rate": 1.6908867915917924e-07, + "loss": 0.0, + "num_input_tokens_seen": 114607528, + "step": 170070 + }, + { + "epoch": 4.154960545281313, + "grad_norm": 4.784507837030105e-05, + "learning_rate": 1.6904123330908117e-07, + "loss": 0.0, + "num_input_tokens_seen": 114610856, + "step": 170075 + }, + { + "epoch": 4.15508269611316, + "grad_norm": 7.885733793955296e-05, + "learning_rate": 1.68993793501939e-07, + "loss": 0.0, + "num_input_tokens_seen": 114614248, + "step": 170080 + }, + { + "epoch": 4.155204846945008, + "grad_norm": 0.0017045887652784586, + "learning_rate": 1.6894635973809725e-07, + "loss": 0.0, + "num_input_tokens_seen": 114617512, + "step": 170085 + }, + { + "epoch": 4.1553269977768545, + "grad_norm": 0.00018425517191644758, + "learning_rate": 1.688989320179014e-07, + "loss": 0.0, + "num_input_tokens_seen": 114620776, + "step": 170090 + }, + { + "epoch": 4.155449148608702, + "grad_norm": 2.087597022182308e-05, + "learning_rate": 1.6885151034169577e-07, + "loss": 0.0, + "num_input_tokens_seen": 114623848, + "step": 170095 + }, + { + "epoch": 4.155571299440549, + "grad_norm": 0.0001408069219905883, + "learning_rate": 1.688040947098257e-07, + "loss": 0.0, + "num_input_tokens_seen": 114627176, + "step": 170100 + }, + { + "epoch": 4.155693450272397, + "grad_norm": 0.00032099412055686116, + "learning_rate": 1.6875668512263587e-07, + "loss": 0.0, + "num_input_tokens_seen": 114630952, + "step": 170105 + }, + { + "epoch": 4.155815601104243, + "grad_norm": 5.4309595725499094e-05, + "learning_rate": 1.6870928158047072e-07, + "loss": 0.1071, + "num_input_tokens_seen": 114634024, + "step": 170110 + }, + { + "epoch": 4.155937751936091, + "grad_norm": 0.00032432761508971453, + "learning_rate": 1.6866188408367553e-07, + "loss": 0.0, + "num_input_tokens_seen": 114637288, + "step": 170115 + }, + { + "epoch": 4.156059902767938, + "grad_norm": 0.04092469438910484, + "learning_rate": 1.6861449263259453e-07, + "loss": 0.0, + "num_input_tokens_seen": 114640680, + "step": 170120 + }, + { + "epoch": 4.156182053599785, + "grad_norm": 0.03474748134613037, + "learning_rate": 1.6856710722757273e-07, + "loss": 0.0, + "num_input_tokens_seen": 114644072, + "step": 170125 + }, + { + "epoch": 4.156304204431632, + "grad_norm": 0.0026629886124283075, + "learning_rate": 1.685197278689543e-07, + "loss": 0.0, + "num_input_tokens_seen": 114647528, + "step": 170130 + }, + { + "epoch": 4.15642635526348, + "grad_norm": 0.00020655507978517562, + "learning_rate": 1.6847235455708408e-07, + "loss": 0.0, + "num_input_tokens_seen": 114650792, + "step": 170135 + }, + { + "epoch": 4.1565485060953264, + "grad_norm": 0.0001946508709806949, + "learning_rate": 1.6842498729230682e-07, + "loss": 0.0, + "num_input_tokens_seen": 114653928, + "step": 170140 + }, + { + "epoch": 4.156670656927174, + "grad_norm": 0.00028996478067710996, + "learning_rate": 1.6837762607496654e-07, + "loss": 0.0, + "num_input_tokens_seen": 114658088, + "step": 170145 + }, + { + "epoch": 4.156792807759021, + "grad_norm": 8.436523785348982e-05, + "learning_rate": 1.6833027090540797e-07, + "loss": 0.0, + "num_input_tokens_seen": 114661288, + "step": 170150 + }, + { + "epoch": 4.156914958590868, + "grad_norm": 0.00046180543722584844, + "learning_rate": 1.6828292178397508e-07, + "loss": 0.0, + "num_input_tokens_seen": 114664552, + "step": 170155 + }, + { + "epoch": 4.157037109422715, + "grad_norm": 5.629775841953233e-06, + "learning_rate": 1.682355787110128e-07, + "loss": 0.0, + "num_input_tokens_seen": 114668840, + "step": 170160 + }, + { + "epoch": 4.157159260254562, + "grad_norm": 0.023796724155545235, + "learning_rate": 1.6818824168686486e-07, + "loss": 0.0, + "num_input_tokens_seen": 114671848, + "step": 170165 + }, + { + "epoch": 4.15728141108641, + "grad_norm": 3.74505361833144e-05, + "learning_rate": 1.6814091071187586e-07, + "loss": 0.0, + "num_input_tokens_seen": 114675240, + "step": 170170 + }, + { + "epoch": 4.157403561918256, + "grad_norm": 0.0002260417240904644, + "learning_rate": 1.6809358578639e-07, + "loss": 0.0224, + "num_input_tokens_seen": 114679016, + "step": 170175 + }, + { + "epoch": 4.157525712750104, + "grad_norm": 0.00018604165234137326, + "learning_rate": 1.680462669107512e-07, + "loss": 0.0, + "num_input_tokens_seen": 114682344, + "step": 170180 + }, + { + "epoch": 4.157647863581951, + "grad_norm": 0.00011431350139901042, + "learning_rate": 1.6799895408530385e-07, + "loss": 0.0, + "num_input_tokens_seen": 114685672, + "step": 170185 + }, + { + "epoch": 4.157770014413798, + "grad_norm": 0.000264197209617123, + "learning_rate": 1.679516473103917e-07, + "loss": 0.0, + "num_input_tokens_seen": 114689128, + "step": 170190 + }, + { + "epoch": 4.157892165245645, + "grad_norm": 8.455871284240857e-05, + "learning_rate": 1.6790434658635922e-07, + "loss": 0.0, + "num_input_tokens_seen": 114692136, + "step": 170195 + }, + { + "epoch": 4.158014316077493, + "grad_norm": 1.8691958757699467e-05, + "learning_rate": 1.6785705191354983e-07, + "loss": 0.0, + "num_input_tokens_seen": 114695592, + "step": 170200 + }, + { + "epoch": 4.1581364669093395, + "grad_norm": 0.00012280538794584572, + "learning_rate": 1.678097632923081e-07, + "loss": 0.0, + "num_input_tokens_seen": 114698856, + "step": 170205 + }, + { + "epoch": 4.158258617741187, + "grad_norm": 0.00011621593876043335, + "learning_rate": 1.677624807229776e-07, + "loss": 0.0, + "num_input_tokens_seen": 114702248, + "step": 170210 + }, + { + "epoch": 4.158380768573034, + "grad_norm": 0.0011803361121565104, + "learning_rate": 1.677152042059019e-07, + "loss": 0.0, + "num_input_tokens_seen": 114705448, + "step": 170215 + }, + { + "epoch": 4.1585029194048815, + "grad_norm": 0.00014835498586762697, + "learning_rate": 1.676679337414254e-07, + "loss": 0.0, + "num_input_tokens_seen": 114709096, + "step": 170220 + }, + { + "epoch": 4.158625070236728, + "grad_norm": 6.039536674506962e-05, + "learning_rate": 1.6762066932989128e-07, + "loss": 0.0, + "num_input_tokens_seen": 114712488, + "step": 170225 + }, + { + "epoch": 4.158747221068576, + "grad_norm": 0.001145469374023378, + "learning_rate": 1.6757341097164345e-07, + "loss": 0.0, + "num_input_tokens_seen": 114715752, + "step": 170230 + }, + { + "epoch": 4.158869371900423, + "grad_norm": 8.06782190920785e-06, + "learning_rate": 1.67526158667026e-07, + "loss": 0.0, + "num_input_tokens_seen": 114718760, + "step": 170235 + }, + { + "epoch": 4.15899152273227, + "grad_norm": 2.5670822651591152e-05, + "learning_rate": 1.67478912416382e-07, + "loss": 0.0, + "num_input_tokens_seen": 114722472, + "step": 170240 + }, + { + "epoch": 4.159113673564117, + "grad_norm": 0.0001012508655549027, + "learning_rate": 1.674316722200555e-07, + "loss": 0.0, + "num_input_tokens_seen": 114725928, + "step": 170245 + }, + { + "epoch": 4.159235824395964, + "grad_norm": 0.0019791119266301394, + "learning_rate": 1.6738443807838952e-07, + "loss": 0.0, + "num_input_tokens_seen": 114729768, + "step": 170250 + }, + { + "epoch": 4.159357975227811, + "grad_norm": 4.3414085666881874e-05, + "learning_rate": 1.6733720999172786e-07, + "loss": 0.0, + "num_input_tokens_seen": 114733224, + "step": 170255 + }, + { + "epoch": 4.159480126059658, + "grad_norm": 0.00107354368083179, + "learning_rate": 1.6728998796041428e-07, + "loss": 0.0, + "num_input_tokens_seen": 114736424, + "step": 170260 + }, + { + "epoch": 4.159602276891506, + "grad_norm": 6.924165063537657e-05, + "learning_rate": 1.6724277198479163e-07, + "loss": 0.0, + "num_input_tokens_seen": 114739368, + "step": 170265 + }, + { + "epoch": 4.1597244277233525, + "grad_norm": 8.328318654093891e-05, + "learning_rate": 1.6719556206520368e-07, + "loss": 0.0, + "num_input_tokens_seen": 114742376, + "step": 170270 + }, + { + "epoch": 4.1598465785552, + "grad_norm": 0.002161898883059621, + "learning_rate": 1.6714835820199347e-07, + "loss": 0.0, + "num_input_tokens_seen": 114745576, + "step": 170275 + }, + { + "epoch": 4.159968729387047, + "grad_norm": 3.9121092413552105e-05, + "learning_rate": 1.671011603955046e-07, + "loss": 0.0, + "num_input_tokens_seen": 114749032, + "step": 170280 + }, + { + "epoch": 4.1600908802188945, + "grad_norm": 1.349920694337925e-05, + "learning_rate": 1.670539686460799e-07, + "loss": 0.0, + "num_input_tokens_seen": 114752360, + "step": 170285 + }, + { + "epoch": 4.160213031050741, + "grad_norm": 0.004583634901791811, + "learning_rate": 1.6700678295406267e-07, + "loss": 0.0, + "num_input_tokens_seen": 114755752, + "step": 170290 + }, + { + "epoch": 4.160335181882589, + "grad_norm": 0.00012262725795153528, + "learning_rate": 1.6695960331979652e-07, + "loss": 0.0, + "num_input_tokens_seen": 114758952, + "step": 170295 + }, + { + "epoch": 4.160457332714436, + "grad_norm": 1.902866097225342e-05, + "learning_rate": 1.6691242974362417e-07, + "loss": 0.0, + "num_input_tokens_seen": 114762152, + "step": 170300 + }, + { + "epoch": 4.160579483546283, + "grad_norm": 0.0009349206229671836, + "learning_rate": 1.6686526222588847e-07, + "loss": 0.0, + "num_input_tokens_seen": 114765480, + "step": 170305 + }, + { + "epoch": 4.16070163437813, + "grad_norm": 4.975054707756499e-06, + "learning_rate": 1.6681810076693282e-07, + "loss": 0.0, + "num_input_tokens_seen": 114769192, + "step": 170310 + }, + { + "epoch": 4.160823785209978, + "grad_norm": 2.176424823119305e-05, + "learning_rate": 1.6677094536709991e-07, + "loss": 0.0436, + "num_input_tokens_seen": 114772328, + "step": 170315 + }, + { + "epoch": 4.160945936041824, + "grad_norm": 0.0011520327534526587, + "learning_rate": 1.6672379602673303e-07, + "loss": 0.0, + "num_input_tokens_seen": 114775720, + "step": 170320 + }, + { + "epoch": 4.161068086873672, + "grad_norm": 0.0003704590199049562, + "learning_rate": 1.666766527461745e-07, + "loss": 0.0001, + "num_input_tokens_seen": 114778856, + "step": 170325 + }, + { + "epoch": 4.161190237705519, + "grad_norm": 0.001671323669143021, + "learning_rate": 1.6662951552576787e-07, + "loss": 0.0, + "num_input_tokens_seen": 114782504, + "step": 170330 + }, + { + "epoch": 4.1613123885373655, + "grad_norm": 0.00016249853069894016, + "learning_rate": 1.6658238436585515e-07, + "loss": 0.0, + "num_input_tokens_seen": 114785704, + "step": 170335 + }, + { + "epoch": 4.161434539369213, + "grad_norm": 0.0018108749063685536, + "learning_rate": 1.665352592667798e-07, + "loss": 0.0, + "num_input_tokens_seen": 114788648, + "step": 170340 + }, + { + "epoch": 4.16155669020106, + "grad_norm": 2.1458145056385547e-05, + "learning_rate": 1.6648814022888403e-07, + "loss": 0.0, + "num_input_tokens_seen": 114791912, + "step": 170345 + }, + { + "epoch": 4.1616788410329075, + "grad_norm": 1.716206497803796e-05, + "learning_rate": 1.6644102725251063e-07, + "loss": 0.0, + "num_input_tokens_seen": 114795368, + "step": 170350 + }, + { + "epoch": 4.161800991864754, + "grad_norm": 0.0001412465499015525, + "learning_rate": 1.663939203380026e-07, + "loss": 0.0, + "num_input_tokens_seen": 114798696, + "step": 170355 + }, + { + "epoch": 4.161923142696602, + "grad_norm": 0.00016346178017556667, + "learning_rate": 1.6634681948570183e-07, + "loss": 0.0, + "num_input_tokens_seen": 114801704, + "step": 170360 + }, + { + "epoch": 4.162045293528449, + "grad_norm": 0.011184005998075008, + "learning_rate": 1.6629972469595155e-07, + "loss": 0.0, + "num_input_tokens_seen": 114804904, + "step": 170365 + }, + { + "epoch": 4.162167444360296, + "grad_norm": 0.00012003963638562709, + "learning_rate": 1.6625263596909368e-07, + "loss": 0.0, + "num_input_tokens_seen": 114808168, + "step": 170370 + }, + { + "epoch": 4.162289595192143, + "grad_norm": 1.699275162536651e-05, + "learning_rate": 1.6620555330547104e-07, + "loss": 0.0, + "num_input_tokens_seen": 114811432, + "step": 170375 + }, + { + "epoch": 4.162411746023991, + "grad_norm": 0.004154087509959936, + "learning_rate": 1.6615847670542572e-07, + "loss": 0.0, + "num_input_tokens_seen": 114815016, + "step": 170380 + }, + { + "epoch": 4.162533896855837, + "grad_norm": 0.014836370013654232, + "learning_rate": 1.661114061693002e-07, + "loss": 0.0, + "num_input_tokens_seen": 114818408, + "step": 170385 + }, + { + "epoch": 4.162656047687685, + "grad_norm": 0.0008278019959107041, + "learning_rate": 1.660643416974371e-07, + "loss": 0.0, + "num_input_tokens_seen": 114821544, + "step": 170390 + }, + { + "epoch": 4.162778198519532, + "grad_norm": 6.109980404289672e-06, + "learning_rate": 1.6601728329017818e-07, + "loss": 0.0, + "num_input_tokens_seen": 114825192, + "step": 170395 + }, + { + "epoch": 4.162900349351379, + "grad_norm": 0.0004017290484625846, + "learning_rate": 1.6597023094786612e-07, + "loss": 0.0122, + "num_input_tokens_seen": 114828776, + "step": 170400 + }, + { + "epoch": 4.163022500183226, + "grad_norm": 9.658478666096926e-05, + "learning_rate": 1.6592318467084255e-07, + "loss": 0.0, + "num_input_tokens_seen": 114831848, + "step": 170405 + }, + { + "epoch": 4.163144651015074, + "grad_norm": 0.00031634565675631166, + "learning_rate": 1.658761444594502e-07, + "loss": 0.0, + "num_input_tokens_seen": 114835304, + "step": 170410 + }, + { + "epoch": 4.1632668018469206, + "grad_norm": 0.0033153025433421135, + "learning_rate": 1.658291103140309e-07, + "loss": 0.0, + "num_input_tokens_seen": 114838696, + "step": 170415 + }, + { + "epoch": 4.163388952678767, + "grad_norm": 4.208605605526827e-05, + "learning_rate": 1.657820822349264e-07, + "loss": 0.0, + "num_input_tokens_seen": 114841768, + "step": 170420 + }, + { + "epoch": 4.163511103510615, + "grad_norm": 0.005869393702596426, + "learning_rate": 1.657350602224793e-07, + "loss": 0.0794, + "num_input_tokens_seen": 114845032, + "step": 170425 + }, + { + "epoch": 4.163633254342462, + "grad_norm": 2.9876026019337587e-05, + "learning_rate": 1.6568804427703088e-07, + "loss": 0.0, + "num_input_tokens_seen": 114848424, + "step": 170430 + }, + { + "epoch": 4.163755405174309, + "grad_norm": 6.542223127325997e-05, + "learning_rate": 1.6564103439892373e-07, + "loss": 0.0, + "num_input_tokens_seen": 114851816, + "step": 170435 + }, + { + "epoch": 4.163877556006156, + "grad_norm": 4.480009010876529e-05, + "learning_rate": 1.6559403058849909e-07, + "loss": 0.0, + "num_input_tokens_seen": 114855464, + "step": 170440 + }, + { + "epoch": 4.163999706838004, + "grad_norm": 8.49221851240145e-06, + "learning_rate": 1.6554703284609918e-07, + "loss": 0.0, + "num_input_tokens_seen": 114858856, + "step": 170445 + }, + { + "epoch": 4.16412185766985, + "grad_norm": 0.0001956393534783274, + "learning_rate": 1.6550004117206583e-07, + "loss": 0.0, + "num_input_tokens_seen": 114862376, + "step": 170450 + }, + { + "epoch": 4.164244008501698, + "grad_norm": 0.0013749731006100774, + "learning_rate": 1.6545305556674038e-07, + "loss": 0.0, + "num_input_tokens_seen": 114865256, + "step": 170455 + }, + { + "epoch": 4.164366159333545, + "grad_norm": 0.0001273680099984631, + "learning_rate": 1.6540607603046508e-07, + "loss": 0.0, + "num_input_tokens_seen": 114868520, + "step": 170460 + }, + { + "epoch": 4.1644883101653924, + "grad_norm": 8.07244359748438e-05, + "learning_rate": 1.653591025635811e-07, + "loss": 0.0, + "num_input_tokens_seen": 114871592, + "step": 170465 + }, + { + "epoch": 4.164610460997239, + "grad_norm": 0.0012089071096852422, + "learning_rate": 1.6531213516643028e-07, + "loss": 0.0, + "num_input_tokens_seen": 114874856, + "step": 170470 + }, + { + "epoch": 4.164732611829087, + "grad_norm": 3.042860953428317e-05, + "learning_rate": 1.6526517383935402e-07, + "loss": 0.0, + "num_input_tokens_seen": 114878248, + "step": 170475 + }, + { + "epoch": 4.164854762660934, + "grad_norm": 6.760417454643175e-05, + "learning_rate": 1.652182185826939e-07, + "loss": 0.0, + "num_input_tokens_seen": 114881576, + "step": 170480 + }, + { + "epoch": 4.164976913492781, + "grad_norm": 1.345566943200538e-05, + "learning_rate": 1.651712693967916e-07, + "loss": 0.0, + "num_input_tokens_seen": 114885352, + "step": 170485 + }, + { + "epoch": 4.165099064324628, + "grad_norm": 0.00022467400413006544, + "learning_rate": 1.6512432628198823e-07, + "loss": 0.0, + "num_input_tokens_seen": 114888872, + "step": 170490 + }, + { + "epoch": 4.165221215156476, + "grad_norm": 6.48816603643354e-06, + "learning_rate": 1.6507738923862546e-07, + "loss": 0.0, + "num_input_tokens_seen": 114892072, + "step": 170495 + }, + { + "epoch": 4.165343365988322, + "grad_norm": 0.00019495896412990987, + "learning_rate": 1.6503045826704433e-07, + "loss": 0.0, + "num_input_tokens_seen": 114895528, + "step": 170500 + }, + { + "epoch": 4.16546551682017, + "grad_norm": 0.0014716989826411009, + "learning_rate": 1.6498353336758653e-07, + "loss": 0.0, + "num_input_tokens_seen": 114899432, + "step": 170505 + }, + { + "epoch": 4.165587667652017, + "grad_norm": 3.384602678124793e-05, + "learning_rate": 1.649366145405927e-07, + "loss": 0.0, + "num_input_tokens_seen": 114902888, + "step": 170510 + }, + { + "epoch": 4.1657098184838635, + "grad_norm": 0.00020877330098301172, + "learning_rate": 1.6488970178640483e-07, + "loss": 0.0, + "num_input_tokens_seen": 114906216, + "step": 170515 + }, + { + "epoch": 4.165831969315711, + "grad_norm": 4.302657544030808e-05, + "learning_rate": 1.6484279510536358e-07, + "loss": 0.0, + "num_input_tokens_seen": 114909480, + "step": 170520 + }, + { + "epoch": 4.165954120147558, + "grad_norm": 0.009582330472767353, + "learning_rate": 1.6479589449780984e-07, + "loss": 0.0, + "num_input_tokens_seen": 114912680, + "step": 170525 + }, + { + "epoch": 4.1660762709794055, + "grad_norm": 0.00048023121780715883, + "learning_rate": 1.6474899996408532e-07, + "loss": 0.0, + "num_input_tokens_seen": 114915752, + "step": 170530 + }, + { + "epoch": 4.166198421811252, + "grad_norm": 8.170756336767226e-05, + "learning_rate": 1.647021115045305e-07, + "loss": 0.0, + "num_input_tokens_seen": 114919784, + "step": 170535 + }, + { + "epoch": 4.1663205726431, + "grad_norm": 0.0010021156631410122, + "learning_rate": 1.646552291194866e-07, + "loss": 0.0, + "num_input_tokens_seen": 114923496, + "step": 170540 + }, + { + "epoch": 4.166442723474947, + "grad_norm": 0.00017045924323610961, + "learning_rate": 1.6460835280929474e-07, + "loss": 0.0, + "num_input_tokens_seen": 114926888, + "step": 170545 + }, + { + "epoch": 4.166564874306794, + "grad_norm": 0.005076151341199875, + "learning_rate": 1.6456148257429537e-07, + "loss": 0.0, + "num_input_tokens_seen": 114930024, + "step": 170550 + }, + { + "epoch": 4.166687025138641, + "grad_norm": 0.00014168965572025627, + "learning_rate": 1.6451461841482994e-07, + "loss": 0.0002, + "num_input_tokens_seen": 114933352, + "step": 170555 + }, + { + "epoch": 4.166809175970489, + "grad_norm": 8.979089216154534e-06, + "learning_rate": 1.6446776033123866e-07, + "loss": 0.0, + "num_input_tokens_seen": 114936424, + "step": 170560 + }, + { + "epoch": 4.166931326802335, + "grad_norm": 0.00012583896750584245, + "learning_rate": 1.6442090832386246e-07, + "loss": 0.0, + "num_input_tokens_seen": 114939688, + "step": 170565 + }, + { + "epoch": 4.167053477634183, + "grad_norm": 0.0013897777535021305, + "learning_rate": 1.6437406239304253e-07, + "loss": 0.0, + "num_input_tokens_seen": 114942824, + "step": 170570 + }, + { + "epoch": 4.16717562846603, + "grad_norm": 0.0002839156368281692, + "learning_rate": 1.643272225391188e-07, + "loss": 0.0, + "num_input_tokens_seen": 114946472, + "step": 170575 + }, + { + "epoch": 4.167297779297877, + "grad_norm": 0.0005325472448021173, + "learning_rate": 1.6428038876243266e-07, + "loss": 0.0, + "num_input_tokens_seen": 114949800, + "step": 170580 + }, + { + "epoch": 4.167419930129724, + "grad_norm": 0.00025493022985756397, + "learning_rate": 1.6423356106332398e-07, + "loss": 0.0, + "num_input_tokens_seen": 114953064, + "step": 170585 + }, + { + "epoch": 4.167542080961572, + "grad_norm": 4.253402494214242e-06, + "learning_rate": 1.641867394421339e-07, + "loss": 0.0, + "num_input_tokens_seen": 114956840, + "step": 170590 + }, + { + "epoch": 4.1676642317934185, + "grad_norm": 0.00014483621635008603, + "learning_rate": 1.641399238992024e-07, + "loss": 0.0, + "num_input_tokens_seen": 114959848, + "step": 170595 + }, + { + "epoch": 4.167786382625266, + "grad_norm": 1.984128357435111e-05, + "learning_rate": 1.640931144348703e-07, + "loss": 0.0, + "num_input_tokens_seen": 114963176, + "step": 170600 + }, + { + "epoch": 4.167908533457113, + "grad_norm": 0.008027193136513233, + "learning_rate": 1.6404631104947798e-07, + "loss": 0.0, + "num_input_tokens_seen": 114966888, + "step": 170605 + }, + { + "epoch": 4.16803068428896, + "grad_norm": 0.002142030978575349, + "learning_rate": 1.6399951374336585e-07, + "loss": 0.0, + "num_input_tokens_seen": 114970344, + "step": 170610 + }, + { + "epoch": 4.168152835120807, + "grad_norm": 3.685107230921858e-06, + "learning_rate": 1.6395272251687386e-07, + "loss": 0.0, + "num_input_tokens_seen": 114973864, + "step": 170615 + }, + { + "epoch": 4.168274985952654, + "grad_norm": 8.950922165240627e-06, + "learning_rate": 1.6390593737034276e-07, + "loss": 0.0, + "num_input_tokens_seen": 114976744, + "step": 170620 + }, + { + "epoch": 4.168397136784502, + "grad_norm": 0.00010310253128409386, + "learning_rate": 1.6385915830411223e-07, + "loss": 0.0, + "num_input_tokens_seen": 114979944, + "step": 170625 + }, + { + "epoch": 4.168519287616348, + "grad_norm": 3.5585057048592716e-05, + "learning_rate": 1.6381238531852314e-07, + "loss": 0.0, + "num_input_tokens_seen": 114983144, + "step": 170630 + }, + { + "epoch": 4.168641438448196, + "grad_norm": 7.778478175168857e-05, + "learning_rate": 1.6376561841391501e-07, + "loss": 0.0, + "num_input_tokens_seen": 114986472, + "step": 170635 + }, + { + "epoch": 4.168763589280043, + "grad_norm": 0.00018595813889987767, + "learning_rate": 1.6371885759062853e-07, + "loss": 0.0, + "num_input_tokens_seen": 114989608, + "step": 170640 + }, + { + "epoch": 4.16888574011189, + "grad_norm": 0.00012617114407476038, + "learning_rate": 1.6367210284900324e-07, + "loss": 0.0, + "num_input_tokens_seen": 114992808, + "step": 170645 + }, + { + "epoch": 4.169007890943737, + "grad_norm": 0.000265518989181146, + "learning_rate": 1.636253541893795e-07, + "loss": 0.0, + "num_input_tokens_seen": 114995880, + "step": 170650 + }, + { + "epoch": 4.169130041775585, + "grad_norm": 0.0011026192223653197, + "learning_rate": 1.6357861161209695e-07, + "loss": 0.0, + "num_input_tokens_seen": 114999272, + "step": 170655 + }, + { + "epoch": 4.1692521926074315, + "grad_norm": 2.0162886357866228e-05, + "learning_rate": 1.6353187511749565e-07, + "loss": 0.0001, + "num_input_tokens_seen": 115002664, + "step": 170660 + }, + { + "epoch": 4.169374343439279, + "grad_norm": 4.777355206897482e-05, + "learning_rate": 1.6348514470591578e-07, + "loss": 0.0, + "num_input_tokens_seen": 115005800, + "step": 170665 + }, + { + "epoch": 4.169496494271126, + "grad_norm": 0.02091538906097412, + "learning_rate": 1.6343842037769673e-07, + "loss": 0.0, + "num_input_tokens_seen": 115009256, + "step": 170670 + }, + { + "epoch": 4.1696186451029735, + "grad_norm": 0.00016440707258880138, + "learning_rate": 1.6339170213317877e-07, + "loss": 0.0, + "num_input_tokens_seen": 115012776, + "step": 170675 + }, + { + "epoch": 4.16974079593482, + "grad_norm": 3.5134512472723145e-06, + "learning_rate": 1.6334498997270108e-07, + "loss": 0.0, + "num_input_tokens_seen": 115015912, + "step": 170680 + }, + { + "epoch": 4.169862946766668, + "grad_norm": 2.0491874238359742e-05, + "learning_rate": 1.6329828389660394e-07, + "loss": 0.0, + "num_input_tokens_seen": 115019432, + "step": 170685 + }, + { + "epoch": 4.169985097598515, + "grad_norm": 1.233548391610384e-05, + "learning_rate": 1.6325158390522642e-07, + "loss": 0.0, + "num_input_tokens_seen": 115022568, + "step": 170690 + }, + { + "epoch": 4.170107248430361, + "grad_norm": 0.00029678287683054805, + "learning_rate": 1.6320488999890847e-07, + "loss": 0.0, + "num_input_tokens_seen": 115026088, + "step": 170695 + }, + { + "epoch": 4.170229399262209, + "grad_norm": 0.0004376185534056276, + "learning_rate": 1.6315820217798992e-07, + "loss": 0.0001, + "num_input_tokens_seen": 115029480, + "step": 170700 + }, + { + "epoch": 4.170351550094056, + "grad_norm": 0.000860581174492836, + "learning_rate": 1.6311152044280973e-07, + "loss": 0.0, + "num_input_tokens_seen": 115033128, + "step": 170705 + }, + { + "epoch": 4.170473700925903, + "grad_norm": 4.864812581217848e-05, + "learning_rate": 1.6306484479370786e-07, + "loss": 0.0, + "num_input_tokens_seen": 115036392, + "step": 170710 + }, + { + "epoch": 4.17059585175775, + "grad_norm": 0.00042654649587348104, + "learning_rate": 1.6301817523102335e-07, + "loss": 0.0, + "num_input_tokens_seen": 115040104, + "step": 170715 + }, + { + "epoch": 4.170718002589598, + "grad_norm": 2.155587571905926e-05, + "learning_rate": 1.6297151175509606e-07, + "loss": 0.0, + "num_input_tokens_seen": 115043560, + "step": 170720 + }, + { + "epoch": 4.1708401534214445, + "grad_norm": 1.0149953595828265e-05, + "learning_rate": 1.6292485436626502e-07, + "loss": 0.0, + "num_input_tokens_seen": 115046888, + "step": 170725 + }, + { + "epoch": 4.170962304253292, + "grad_norm": 6.887714698677883e-05, + "learning_rate": 1.6287820306486944e-07, + "loss": 0.0, + "num_input_tokens_seen": 115050024, + "step": 170730 + }, + { + "epoch": 4.171084455085139, + "grad_norm": 4.840194378630258e-05, + "learning_rate": 1.628315578512488e-07, + "loss": 0.0, + "num_input_tokens_seen": 115053224, + "step": 170735 + }, + { + "epoch": 4.1712066059169866, + "grad_norm": 7.268014087458141e-06, + "learning_rate": 1.6278491872574218e-07, + "loss": 0.0, + "num_input_tokens_seen": 115056424, + "step": 170740 + }, + { + "epoch": 4.171328756748833, + "grad_norm": 0.002297777682542801, + "learning_rate": 1.6273828568868886e-07, + "loss": 0.0, + "num_input_tokens_seen": 115059624, + "step": 170745 + }, + { + "epoch": 4.171450907580681, + "grad_norm": 0.09131614118814468, + "learning_rate": 1.6269165874042788e-07, + "loss": 0.0, + "num_input_tokens_seen": 115062888, + "step": 170750 + }, + { + "epoch": 4.171573058412528, + "grad_norm": 0.00038020877400413156, + "learning_rate": 1.6264503788129825e-07, + "loss": 0.0, + "num_input_tokens_seen": 115066152, + "step": 170755 + }, + { + "epoch": 4.171695209244375, + "grad_norm": 8.89160655788146e-05, + "learning_rate": 1.625984231116394e-07, + "loss": 0.0001, + "num_input_tokens_seen": 115069864, + "step": 170760 + }, + { + "epoch": 4.171817360076222, + "grad_norm": 0.0017276030266657472, + "learning_rate": 1.625518144317898e-07, + "loss": 0.0, + "num_input_tokens_seen": 115073128, + "step": 170765 + }, + { + "epoch": 4.17193951090807, + "grad_norm": 0.00035557913361117244, + "learning_rate": 1.6250521184208888e-07, + "loss": 0.0, + "num_input_tokens_seen": 115076264, + "step": 170770 + }, + { + "epoch": 4.172061661739916, + "grad_norm": 6.348582974169403e-05, + "learning_rate": 1.624586153428751e-07, + "loss": 0.0397, + "num_input_tokens_seen": 115079976, + "step": 170775 + }, + { + "epoch": 4.172183812571763, + "grad_norm": 0.0005099988193251193, + "learning_rate": 1.624120249344878e-07, + "loss": 0.0, + "num_input_tokens_seen": 115083624, + "step": 170780 + }, + { + "epoch": 4.172305963403611, + "grad_norm": 0.0015803297283127904, + "learning_rate": 1.623654406172652e-07, + "loss": 0.0, + "num_input_tokens_seen": 115086824, + "step": 170785 + }, + { + "epoch": 4.172428114235458, + "grad_norm": 0.00015540645108558238, + "learning_rate": 1.6231886239154647e-07, + "loss": 0.0, + "num_input_tokens_seen": 115090536, + "step": 170790 + }, + { + "epoch": 4.172550265067305, + "grad_norm": 2.5943469154299237e-05, + "learning_rate": 1.6227229025767052e-07, + "loss": 0.0, + "num_input_tokens_seen": 115093480, + "step": 170795 + }, + { + "epoch": 4.172672415899152, + "grad_norm": 0.000604168395511806, + "learning_rate": 1.6222572421597558e-07, + "loss": 0.0, + "num_input_tokens_seen": 115096680, + "step": 170800 + }, + { + "epoch": 4.172794566731, + "grad_norm": 1.3740186659561004e-05, + "learning_rate": 1.621791642668008e-07, + "loss": 0.0, + "num_input_tokens_seen": 115100584, + "step": 170805 + }, + { + "epoch": 4.172916717562846, + "grad_norm": 8.320253073179629e-06, + "learning_rate": 1.621326104104842e-07, + "loss": 0.0, + "num_input_tokens_seen": 115104104, + "step": 170810 + }, + { + "epoch": 4.173038868394694, + "grad_norm": 7.703889423282817e-06, + "learning_rate": 1.620860626473648e-07, + "loss": 0.0, + "num_input_tokens_seen": 115107304, + "step": 170815 + }, + { + "epoch": 4.173161019226541, + "grad_norm": 2.276463965245057e-05, + "learning_rate": 1.6203952097778073e-07, + "loss": 0.0, + "num_input_tokens_seen": 115110696, + "step": 170820 + }, + { + "epoch": 4.173283170058388, + "grad_norm": 0.00026030722074210644, + "learning_rate": 1.6199298540207086e-07, + "loss": 0.0, + "num_input_tokens_seen": 115113896, + "step": 170825 + }, + { + "epoch": 4.173405320890235, + "grad_norm": 4.1503084503347054e-05, + "learning_rate": 1.6194645592057343e-07, + "loss": 0.0, + "num_input_tokens_seen": 115117288, + "step": 170830 + }, + { + "epoch": 4.173527471722083, + "grad_norm": 7.145016297727125e-06, + "learning_rate": 1.6189993253362655e-07, + "loss": 0.0, + "num_input_tokens_seen": 115120488, + "step": 170835 + }, + { + "epoch": 4.1736496225539295, + "grad_norm": 0.0001081978261936456, + "learning_rate": 1.6185341524156904e-07, + "loss": 0.0, + "num_input_tokens_seen": 115123880, + "step": 170840 + }, + { + "epoch": 4.173771773385777, + "grad_norm": 0.009872562251985073, + "learning_rate": 1.6180690404473862e-07, + "loss": 0.0, + "num_input_tokens_seen": 115127016, + "step": 170845 + }, + { + "epoch": 4.173893924217624, + "grad_norm": 0.001063234987668693, + "learning_rate": 1.6176039894347382e-07, + "loss": 0.0, + "num_input_tokens_seen": 115130408, + "step": 170850 + }, + { + "epoch": 4.1740160750494715, + "grad_norm": 9.49207696976373e-06, + "learning_rate": 1.6171389993811323e-07, + "loss": 0.0, + "num_input_tokens_seen": 115133928, + "step": 170855 + }, + { + "epoch": 4.174138225881318, + "grad_norm": 4.45809455413837e-05, + "learning_rate": 1.616674070289943e-07, + "loss": 0.0, + "num_input_tokens_seen": 115137704, + "step": 170860 + }, + { + "epoch": 4.174260376713166, + "grad_norm": 0.00028284022118896246, + "learning_rate": 1.6162092021645569e-07, + "loss": 0.0, + "num_input_tokens_seen": 115141288, + "step": 170865 + }, + { + "epoch": 4.174382527545013, + "grad_norm": 0.0019308615010231733, + "learning_rate": 1.6157443950083504e-07, + "loss": 0.0, + "num_input_tokens_seen": 115144616, + "step": 170870 + }, + { + "epoch": 4.174504678376859, + "grad_norm": 3.211089506294229e-06, + "learning_rate": 1.6152796488247078e-07, + "loss": 0.0, + "num_input_tokens_seen": 115148072, + "step": 170875 + }, + { + "epoch": 4.174626829208707, + "grad_norm": 7.433198788930895e-06, + "learning_rate": 1.614814963617005e-07, + "loss": 0.0, + "num_input_tokens_seen": 115151464, + "step": 170880 + }, + { + "epoch": 4.174748980040554, + "grad_norm": 0.00026477460050955415, + "learning_rate": 1.6143503393886225e-07, + "loss": 0.0, + "num_input_tokens_seen": 115154920, + "step": 170885 + }, + { + "epoch": 4.174871130872401, + "grad_norm": 0.0009165243827737868, + "learning_rate": 1.6138857761429436e-07, + "loss": 0.0, + "num_input_tokens_seen": 115158184, + "step": 170890 + }, + { + "epoch": 4.174993281704248, + "grad_norm": 3.91147805203218e-05, + "learning_rate": 1.6134212738833385e-07, + "loss": 0.0, + "num_input_tokens_seen": 115161256, + "step": 170895 + }, + { + "epoch": 4.175115432536096, + "grad_norm": 5.272724592941813e-05, + "learning_rate": 1.6129568326131936e-07, + "loss": 0.0, + "num_input_tokens_seen": 115164520, + "step": 170900 + }, + { + "epoch": 4.1752375833679425, + "grad_norm": 0.40251025557518005, + "learning_rate": 1.6124924523358795e-07, + "loss": 0.0002, + "num_input_tokens_seen": 115167848, + "step": 170905 + }, + { + "epoch": 4.17535973419979, + "grad_norm": 8.896431972971186e-05, + "learning_rate": 1.612028133054776e-07, + "loss": 0.0, + "num_input_tokens_seen": 115171624, + "step": 170910 + }, + { + "epoch": 4.175481885031637, + "grad_norm": 0.00010786348866531625, + "learning_rate": 1.611563874773263e-07, + "loss": 0.0, + "num_input_tokens_seen": 115175016, + "step": 170915 + }, + { + "epoch": 4.1756040358634845, + "grad_norm": 0.000168164013302885, + "learning_rate": 1.6110996774947127e-07, + "loss": 0.0, + "num_input_tokens_seen": 115178600, + "step": 170920 + }, + { + "epoch": 4.175726186695331, + "grad_norm": 0.0033566667698323727, + "learning_rate": 1.6106355412225003e-07, + "loss": 0.0, + "num_input_tokens_seen": 115181928, + "step": 170925 + }, + { + "epoch": 4.175848337527179, + "grad_norm": 2.072779716399964e-05, + "learning_rate": 1.610171465960005e-07, + "loss": 0.0, + "num_input_tokens_seen": 115185192, + "step": 170930 + }, + { + "epoch": 4.175970488359026, + "grad_norm": 0.003940373659133911, + "learning_rate": 1.6097074517105967e-07, + "loss": 0.0307, + "num_input_tokens_seen": 115188904, + "step": 170935 + }, + { + "epoch": 4.176092639190873, + "grad_norm": 8.632901881355792e-05, + "learning_rate": 1.609243498477656e-07, + "loss": 0.0, + "num_input_tokens_seen": 115192040, + "step": 170940 + }, + { + "epoch": 4.17621479002272, + "grad_norm": 0.002263226080685854, + "learning_rate": 1.6087796062645499e-07, + "loss": 0.0, + "num_input_tokens_seen": 115195240, + "step": 170945 + }, + { + "epoch": 4.176336940854568, + "grad_norm": 0.0014102370478212833, + "learning_rate": 1.608315775074658e-07, + "loss": 0.0, + "num_input_tokens_seen": 115198760, + "step": 170950 + }, + { + "epoch": 4.176459091686414, + "grad_norm": 8.044674177654088e-05, + "learning_rate": 1.6078520049113485e-07, + "loss": 0.0, + "num_input_tokens_seen": 115201960, + "step": 170955 + }, + { + "epoch": 4.176581242518261, + "grad_norm": 0.00010650189506122842, + "learning_rate": 1.6073882957779993e-07, + "loss": 0.0, + "num_input_tokens_seen": 115205096, + "step": 170960 + }, + { + "epoch": 4.176703393350109, + "grad_norm": 4.333862307248637e-05, + "learning_rate": 1.6069246476779774e-07, + "loss": 0.0, + "num_input_tokens_seen": 115208808, + "step": 170965 + }, + { + "epoch": 4.1768255441819555, + "grad_norm": 5.672447059623664e-06, + "learning_rate": 1.6064610606146567e-07, + "loss": 0.0, + "num_input_tokens_seen": 115211816, + "step": 170970 + }, + { + "epoch": 4.176947695013803, + "grad_norm": 2.8347905754344538e-05, + "learning_rate": 1.60599753459141e-07, + "loss": 0.0, + "num_input_tokens_seen": 115215912, + "step": 170975 + }, + { + "epoch": 4.17706984584565, + "grad_norm": 5.478051025420427e-05, + "learning_rate": 1.605534069611606e-07, + "loss": 0.0, + "num_input_tokens_seen": 115219240, + "step": 170980 + }, + { + "epoch": 4.1771919966774975, + "grad_norm": 8.117170182231348e-06, + "learning_rate": 1.6050706656786184e-07, + "loss": 0.0, + "num_input_tokens_seen": 115222632, + "step": 170985 + }, + { + "epoch": 4.177314147509344, + "grad_norm": 1.2525510101113468e-05, + "learning_rate": 1.6046073227958123e-07, + "loss": 0.0, + "num_input_tokens_seen": 115226088, + "step": 170990 + }, + { + "epoch": 4.177436298341192, + "grad_norm": 3.1837484129937366e-05, + "learning_rate": 1.6041440409665618e-07, + "loss": 0.0, + "num_input_tokens_seen": 115229736, + "step": 170995 + }, + { + "epoch": 4.177558449173039, + "grad_norm": 0.00021777056099381298, + "learning_rate": 1.603680820194232e-07, + "loss": 0.0, + "num_input_tokens_seen": 115233128, + "step": 171000 + }, + { + "epoch": 4.177680600004886, + "grad_norm": 0.00034911080729216337, + "learning_rate": 1.6032176604821933e-07, + "loss": 0.0, + "num_input_tokens_seen": 115236584, + "step": 171005 + }, + { + "epoch": 4.177802750836733, + "grad_norm": 3.63235485565383e-05, + "learning_rate": 1.6027545618338166e-07, + "loss": 0.0, + "num_input_tokens_seen": 115240296, + "step": 171010 + }, + { + "epoch": 4.177924901668581, + "grad_norm": 0.0006223563104867935, + "learning_rate": 1.6022915242524659e-07, + "loss": 0.0, + "num_input_tokens_seen": 115244648, + "step": 171015 + }, + { + "epoch": 4.178047052500427, + "grad_norm": 1.144667203334393e-05, + "learning_rate": 1.6018285477415116e-07, + "loss": 0.0, + "num_input_tokens_seen": 115248488, + "step": 171020 + }, + { + "epoch": 4.178169203332275, + "grad_norm": 4.863789945375174e-05, + "learning_rate": 1.6013656323043166e-07, + "loss": 0.0, + "num_input_tokens_seen": 115252072, + "step": 171025 + }, + { + "epoch": 4.178291354164122, + "grad_norm": 4.818748038815102e-06, + "learning_rate": 1.6009027779442519e-07, + "loss": 0.0, + "num_input_tokens_seen": 115255592, + "step": 171030 + }, + { + "epoch": 4.178413504995969, + "grad_norm": 0.0002015796781051904, + "learning_rate": 1.600439984664681e-07, + "loss": 0.0, + "num_input_tokens_seen": 115259112, + "step": 171035 + }, + { + "epoch": 4.178535655827816, + "grad_norm": 0.00025556082255207, + "learning_rate": 1.599977252468968e-07, + "loss": 0.0, + "num_input_tokens_seen": 115261864, + "step": 171040 + }, + { + "epoch": 4.178657806659663, + "grad_norm": 0.0024212037678807974, + "learning_rate": 1.5995145813604815e-07, + "loss": 0.0, + "num_input_tokens_seen": 115265000, + "step": 171045 + }, + { + "epoch": 4.1787799574915105, + "grad_norm": 4.863837602897547e-05, + "learning_rate": 1.5990519713425832e-07, + "loss": 0.0, + "num_input_tokens_seen": 115268520, + "step": 171050 + }, + { + "epoch": 4.178902108323357, + "grad_norm": 0.0003118966124020517, + "learning_rate": 1.5985894224186401e-07, + "loss": 0.0, + "num_input_tokens_seen": 115271656, + "step": 171055 + }, + { + "epoch": 4.179024259155205, + "grad_norm": 0.0437767393887043, + "learning_rate": 1.5981269345920123e-07, + "loss": 0.0, + "num_input_tokens_seen": 115274728, + "step": 171060 + }, + { + "epoch": 4.179146409987052, + "grad_norm": 1.679328124737367e-05, + "learning_rate": 1.5976645078660643e-07, + "loss": 0.0, + "num_input_tokens_seen": 115278440, + "step": 171065 + }, + { + "epoch": 4.179268560818899, + "grad_norm": 8.560314745409414e-05, + "learning_rate": 1.597202142244164e-07, + "loss": 0.0, + "num_input_tokens_seen": 115281832, + "step": 171070 + }, + { + "epoch": 4.179390711650746, + "grad_norm": 0.00014771960559301078, + "learning_rate": 1.5967398377296658e-07, + "loss": 0.0, + "num_input_tokens_seen": 115285160, + "step": 171075 + }, + { + "epoch": 4.179512862482594, + "grad_norm": 9.891157242236659e-05, + "learning_rate": 1.59627759432594e-07, + "loss": 0.0002, + "num_input_tokens_seen": 115288232, + "step": 171080 + }, + { + "epoch": 4.17963501331444, + "grad_norm": 3.114061109954491e-05, + "learning_rate": 1.5958154120363398e-07, + "loss": 0.0, + "num_input_tokens_seen": 115291304, + "step": 171085 + }, + { + "epoch": 4.179757164146288, + "grad_norm": 0.0009680325165390968, + "learning_rate": 1.595353290864233e-07, + "loss": 0.058, + "num_input_tokens_seen": 115294184, + "step": 171090 + }, + { + "epoch": 4.179879314978135, + "grad_norm": 1.1188540156581439e-05, + "learning_rate": 1.594891230812976e-07, + "loss": 0.0, + "num_input_tokens_seen": 115297512, + "step": 171095 + }, + { + "epoch": 4.180001465809982, + "grad_norm": 8.53309848025674e-06, + "learning_rate": 1.59442923188593e-07, + "loss": 0.0, + "num_input_tokens_seen": 115301032, + "step": 171100 + }, + { + "epoch": 4.180123616641829, + "grad_norm": 1.0541667506913655e-05, + "learning_rate": 1.5939672940864578e-07, + "loss": 0.0, + "num_input_tokens_seen": 115303912, + "step": 171105 + }, + { + "epoch": 4.180245767473677, + "grad_norm": 0.001356814056634903, + "learning_rate": 1.5935054174179142e-07, + "loss": 0.0, + "num_input_tokens_seen": 115306856, + "step": 171110 + }, + { + "epoch": 4.180367918305524, + "grad_norm": 1.8071999875246547e-05, + "learning_rate": 1.5930436018836635e-07, + "loss": 0.0, + "num_input_tokens_seen": 115310184, + "step": 171115 + }, + { + "epoch": 4.180490069137371, + "grad_norm": 0.00012888468336313963, + "learning_rate": 1.5925818474870578e-07, + "loss": 0.0693, + "num_input_tokens_seen": 115313064, + "step": 171120 + }, + { + "epoch": 4.180612219969218, + "grad_norm": 0.0025817484129220247, + "learning_rate": 1.59212015423146e-07, + "loss": 0.0, + "num_input_tokens_seen": 115316584, + "step": 171125 + }, + { + "epoch": 4.180734370801066, + "grad_norm": 0.00033562208409421146, + "learning_rate": 1.5916585221202238e-07, + "loss": 0.0, + "num_input_tokens_seen": 115319912, + "step": 171130 + }, + { + "epoch": 4.180856521632912, + "grad_norm": 1.0221515367447864e-05, + "learning_rate": 1.5911969511567113e-07, + "loss": 0.0, + "num_input_tokens_seen": 115323112, + "step": 171135 + }, + { + "epoch": 4.180978672464759, + "grad_norm": 0.000600189610850066, + "learning_rate": 1.5907354413442765e-07, + "loss": 0.0, + "num_input_tokens_seen": 115325992, + "step": 171140 + }, + { + "epoch": 4.181100823296607, + "grad_norm": 1.8580491087050177e-05, + "learning_rate": 1.590273992686273e-07, + "loss": 0.0, + "num_input_tokens_seen": 115329576, + "step": 171145 + }, + { + "epoch": 4.1812229741284535, + "grad_norm": 4.74736452815705e-06, + "learning_rate": 1.5898126051860606e-07, + "loss": 0.0, + "num_input_tokens_seen": 115332968, + "step": 171150 + }, + { + "epoch": 4.181345124960301, + "grad_norm": 7.126467153284466e-06, + "learning_rate": 1.589351278846991e-07, + "loss": 0.0, + "num_input_tokens_seen": 115336168, + "step": 171155 + }, + { + "epoch": 4.181467275792148, + "grad_norm": 5.6934957683552057e-05, + "learning_rate": 1.5888900136724203e-07, + "loss": 0.0, + "num_input_tokens_seen": 115339624, + "step": 171160 + }, + { + "epoch": 4.1815894266239955, + "grad_norm": 0.0008534068474546075, + "learning_rate": 1.5884288096657071e-07, + "loss": 0.0, + "num_input_tokens_seen": 115343208, + "step": 171165 + }, + { + "epoch": 4.181711577455842, + "grad_norm": 3.3917807741090655e-05, + "learning_rate": 1.5879676668302e-07, + "loss": 0.0, + "num_input_tokens_seen": 115346344, + "step": 171170 + }, + { + "epoch": 4.18183372828769, + "grad_norm": 0.00018031641957350075, + "learning_rate": 1.587506585169256e-07, + "loss": 0.0, + "num_input_tokens_seen": 115349608, + "step": 171175 + }, + { + "epoch": 4.181955879119537, + "grad_norm": 0.00013050250709056854, + "learning_rate": 1.5870455646862246e-07, + "loss": 0.0, + "num_input_tokens_seen": 115353192, + "step": 171180 + }, + { + "epoch": 4.182078029951384, + "grad_norm": 4.1380972106708214e-05, + "learning_rate": 1.5865846053844634e-07, + "loss": 0.0, + "num_input_tokens_seen": 115356328, + "step": 171185 + }, + { + "epoch": 4.182200180783231, + "grad_norm": 3.544270293787122e-05, + "learning_rate": 1.5861237072673194e-07, + "loss": 0.0, + "num_input_tokens_seen": 115359720, + "step": 171190 + }, + { + "epoch": 4.182322331615079, + "grad_norm": 4.615875513991341e-05, + "learning_rate": 1.585662870338147e-07, + "loss": 0.0, + "num_input_tokens_seen": 115363048, + "step": 171195 + }, + { + "epoch": 4.182444482446925, + "grad_norm": 0.00040074848220683634, + "learning_rate": 1.5852020946002998e-07, + "loss": 0.0, + "num_input_tokens_seen": 115366568, + "step": 171200 + }, + { + "epoch": 4.182566633278773, + "grad_norm": 4.493439064390259e-06, + "learning_rate": 1.584741380057123e-07, + "loss": 0.0, + "num_input_tokens_seen": 115369832, + "step": 171205 + }, + { + "epoch": 4.18268878411062, + "grad_norm": 0.004250842146575451, + "learning_rate": 1.584280726711974e-07, + "loss": 0.0, + "num_input_tokens_seen": 115373096, + "step": 171210 + }, + { + "epoch": 4.182810934942467, + "grad_norm": 8.616738341515884e-06, + "learning_rate": 1.5838201345681957e-07, + "loss": 0.0, + "num_input_tokens_seen": 115376040, + "step": 171215 + }, + { + "epoch": 4.182933085774314, + "grad_norm": 0.001658809371292591, + "learning_rate": 1.5833596036291408e-07, + "loss": 0.0, + "num_input_tokens_seen": 115380072, + "step": 171220 + }, + { + "epoch": 4.183055236606162, + "grad_norm": 0.002577235922217369, + "learning_rate": 1.5828991338981623e-07, + "loss": 0.0, + "num_input_tokens_seen": 115383464, + "step": 171225 + }, + { + "epoch": 4.1831773874380085, + "grad_norm": 0.0004686212632805109, + "learning_rate": 1.5824387253786043e-07, + "loss": 0.0, + "num_input_tokens_seen": 115386920, + "step": 171230 + }, + { + "epoch": 4.183299538269855, + "grad_norm": 0.002691245172172785, + "learning_rate": 1.581978378073814e-07, + "loss": 0.001, + "num_input_tokens_seen": 115390632, + "step": 171235 + }, + { + "epoch": 4.183421689101703, + "grad_norm": 2.6709578378358856e-05, + "learning_rate": 1.581518091987144e-07, + "loss": 0.0, + "num_input_tokens_seen": 115393512, + "step": 171240 + }, + { + "epoch": 4.18354383993355, + "grad_norm": 0.0002521930728107691, + "learning_rate": 1.5810578671219355e-07, + "loss": 0.0, + "num_input_tokens_seen": 115396776, + "step": 171245 + }, + { + "epoch": 4.183665990765397, + "grad_norm": 0.005020914599299431, + "learning_rate": 1.5805977034815409e-07, + "loss": 0.0, + "num_input_tokens_seen": 115399912, + "step": 171250 + }, + { + "epoch": 4.183788141597244, + "grad_norm": 0.000299194100080058, + "learning_rate": 1.5801376010693024e-07, + "loss": 0.0, + "num_input_tokens_seen": 115403048, + "step": 171255 + }, + { + "epoch": 4.183910292429092, + "grad_norm": 1.2573545973282307e-05, + "learning_rate": 1.5796775598885703e-07, + "loss": 0.0, + "num_input_tokens_seen": 115406248, + "step": 171260 + }, + { + "epoch": 4.184032443260938, + "grad_norm": 0.00023871743178460747, + "learning_rate": 1.5792175799426855e-07, + "loss": 0.0, + "num_input_tokens_seen": 115409512, + "step": 171265 + }, + { + "epoch": 4.184154594092786, + "grad_norm": 0.001035814406350255, + "learning_rate": 1.5787576612349973e-07, + "loss": 0.0, + "num_input_tokens_seen": 115412840, + "step": 171270 + }, + { + "epoch": 4.184276744924633, + "grad_norm": 0.0006627806578762829, + "learning_rate": 1.5782978037688466e-07, + "loss": 0.0, + "num_input_tokens_seen": 115415976, + "step": 171275 + }, + { + "epoch": 4.18439889575648, + "grad_norm": 1.587512633705046e-05, + "learning_rate": 1.5778380075475818e-07, + "loss": 0.0, + "num_input_tokens_seen": 115419432, + "step": 171280 + }, + { + "epoch": 4.184521046588327, + "grad_norm": 0.000416866154409945, + "learning_rate": 1.5773782725745412e-07, + "loss": 0.0, + "num_input_tokens_seen": 115422952, + "step": 171285 + }, + { + "epoch": 4.184643197420175, + "grad_norm": 0.024858905002474785, + "learning_rate": 1.576918598853072e-07, + "loss": 0.0, + "num_input_tokens_seen": 115426216, + "step": 171290 + }, + { + "epoch": 4.1847653482520215, + "grad_norm": 0.00018218440527562052, + "learning_rate": 1.5764589863865187e-07, + "loss": 0.0, + "num_input_tokens_seen": 115429864, + "step": 171295 + }, + { + "epoch": 4.184887499083869, + "grad_norm": 4.92211984237656e-06, + "learning_rate": 1.575999435178218e-07, + "loss": 0.0002, + "num_input_tokens_seen": 115433640, + "step": 171300 + }, + { + "epoch": 4.185009649915716, + "grad_norm": 0.00018195451411884278, + "learning_rate": 1.5755399452315176e-07, + "loss": 0.0, + "num_input_tokens_seen": 115436840, + "step": 171305 + }, + { + "epoch": 4.185131800747563, + "grad_norm": 1.3570603186963126e-05, + "learning_rate": 1.575080516549755e-07, + "loss": 0.0, + "num_input_tokens_seen": 115440360, + "step": 171310 + }, + { + "epoch": 4.18525395157941, + "grad_norm": 0.0008919899701140821, + "learning_rate": 1.5746211491362726e-07, + "loss": 0.0, + "num_input_tokens_seen": 115443880, + "step": 171315 + }, + { + "epoch": 4.185376102411257, + "grad_norm": 4.4831544073531404e-05, + "learning_rate": 1.5741618429944136e-07, + "loss": 0.0, + "num_input_tokens_seen": 115447144, + "step": 171320 + }, + { + "epoch": 4.185498253243105, + "grad_norm": 0.0010451078414916992, + "learning_rate": 1.5737025981275143e-07, + "loss": 0.0, + "num_input_tokens_seen": 115450600, + "step": 171325 + }, + { + "epoch": 4.185620404074951, + "grad_norm": 0.008530599996447563, + "learning_rate": 1.5732434145389185e-07, + "loss": 0.0, + "num_input_tokens_seen": 115453864, + "step": 171330 + }, + { + "epoch": 4.185742554906799, + "grad_norm": 7.52803825889714e-05, + "learning_rate": 1.57278429223196e-07, + "loss": 0.0, + "num_input_tokens_seen": 115457064, + "step": 171335 + }, + { + "epoch": 4.185864705738646, + "grad_norm": 0.00021106674103066325, + "learning_rate": 1.5723252312099832e-07, + "loss": 0.0, + "num_input_tokens_seen": 115460264, + "step": 171340 + }, + { + "epoch": 4.185986856570493, + "grad_norm": 0.000153837536345236, + "learning_rate": 1.5718662314763242e-07, + "loss": 0.0, + "num_input_tokens_seen": 115463528, + "step": 171345 + }, + { + "epoch": 4.18610900740234, + "grad_norm": 9.459636203246191e-06, + "learning_rate": 1.571407293034319e-07, + "loss": 0.0, + "num_input_tokens_seen": 115466600, + "step": 171350 + }, + { + "epoch": 4.186231158234188, + "grad_norm": 5.712966958526522e-05, + "learning_rate": 1.5709484158873088e-07, + "loss": 0.0, + "num_input_tokens_seen": 115470184, + "step": 171355 + }, + { + "epoch": 4.1863533090660345, + "grad_norm": 4.7370958782266825e-06, + "learning_rate": 1.570489600038627e-07, + "loss": 0.0, + "num_input_tokens_seen": 115473832, + "step": 171360 + }, + { + "epoch": 4.186475459897882, + "grad_norm": 1.8898492271546274e-05, + "learning_rate": 1.5700308454916132e-07, + "loss": 0.0, + "num_input_tokens_seen": 115477096, + "step": 171365 + }, + { + "epoch": 4.186597610729729, + "grad_norm": 0.0012110867537558079, + "learning_rate": 1.5695721522496007e-07, + "loss": 0.0, + "num_input_tokens_seen": 115480808, + "step": 171370 + }, + { + "epoch": 4.1867197615615765, + "grad_norm": 4.958015415468253e-05, + "learning_rate": 1.5691135203159277e-07, + "loss": 0.0, + "num_input_tokens_seen": 115484392, + "step": 171375 + }, + { + "epoch": 4.186841912393423, + "grad_norm": 0.000878807797562331, + "learning_rate": 1.5686549496939306e-07, + "loss": 0.0, + "num_input_tokens_seen": 115487912, + "step": 171380 + }, + { + "epoch": 4.186964063225271, + "grad_norm": 1.878697548818309e-05, + "learning_rate": 1.5681964403869408e-07, + "loss": 0.0, + "num_input_tokens_seen": 115490984, + "step": 171385 + }, + { + "epoch": 4.187086214057118, + "grad_norm": 51.99897003173828, + "learning_rate": 1.5677379923982958e-07, + "loss": 0.0162, + "num_input_tokens_seen": 115494376, + "step": 171390 + }, + { + "epoch": 4.187208364888965, + "grad_norm": 0.0018260630313307047, + "learning_rate": 1.5672796057313265e-07, + "loss": 0.0, + "num_input_tokens_seen": 115497576, + "step": 171395 + }, + { + "epoch": 4.187330515720812, + "grad_norm": 1.377374428557232e-05, + "learning_rate": 1.5668212803893698e-07, + "loss": 0.0, + "num_input_tokens_seen": 115500776, + "step": 171400 + }, + { + "epoch": 4.187452666552659, + "grad_norm": 0.00017338775796815753, + "learning_rate": 1.5663630163757558e-07, + "loss": 0.0, + "num_input_tokens_seen": 115504552, + "step": 171405 + }, + { + "epoch": 4.187574817384506, + "grad_norm": 1.9353059542481788e-05, + "learning_rate": 1.565904813693817e-07, + "loss": 0.0, + "num_input_tokens_seen": 115508136, + "step": 171410 + }, + { + "epoch": 4.187696968216353, + "grad_norm": 0.0007885847007855773, + "learning_rate": 1.5654466723468897e-07, + "loss": 0.0, + "num_input_tokens_seen": 115511400, + "step": 171415 + }, + { + "epoch": 4.187819119048201, + "grad_norm": 0.00039690814446657896, + "learning_rate": 1.5649885923383e-07, + "loss": 0.0, + "num_input_tokens_seen": 115514600, + "step": 171420 + }, + { + "epoch": 4.187941269880048, + "grad_norm": 0.0032051349990069866, + "learning_rate": 1.5645305736713854e-07, + "loss": 0.0, + "num_input_tokens_seen": 115517864, + "step": 171425 + }, + { + "epoch": 4.188063420711895, + "grad_norm": 7.87381868576631e-05, + "learning_rate": 1.56407261634947e-07, + "loss": 0.0, + "num_input_tokens_seen": 115521128, + "step": 171430 + }, + { + "epoch": 4.188185571543742, + "grad_norm": 2.181102172471583e-05, + "learning_rate": 1.563614720375891e-07, + "loss": 0.0, + "num_input_tokens_seen": 115525160, + "step": 171435 + }, + { + "epoch": 4.18830772237559, + "grad_norm": 0.00012077955761924386, + "learning_rate": 1.5631568857539712e-07, + "loss": 0.0, + "num_input_tokens_seen": 115528488, + "step": 171440 + }, + { + "epoch": 4.188429873207436, + "grad_norm": 0.00010481636854819953, + "learning_rate": 1.562699112487047e-07, + "loss": 0.0, + "num_input_tokens_seen": 115531688, + "step": 171445 + }, + { + "epoch": 4.188552024039284, + "grad_norm": 0.000120364515169058, + "learning_rate": 1.5622414005784434e-07, + "loss": 0.0, + "num_input_tokens_seen": 115535016, + "step": 171450 + }, + { + "epoch": 4.188674174871131, + "grad_norm": 0.0021958048455417156, + "learning_rate": 1.5617837500314879e-07, + "loss": 0.0, + "num_input_tokens_seen": 115538280, + "step": 171455 + }, + { + "epoch": 4.188796325702978, + "grad_norm": 4.293501842767e-05, + "learning_rate": 1.561326160849513e-07, + "loss": 0.0, + "num_input_tokens_seen": 115541352, + "step": 171460 + }, + { + "epoch": 4.188918476534825, + "grad_norm": 0.00024415075313299894, + "learning_rate": 1.5608686330358422e-07, + "loss": 0.0, + "num_input_tokens_seen": 115544744, + "step": 171465 + }, + { + "epoch": 4.189040627366673, + "grad_norm": 8.518228241882753e-06, + "learning_rate": 1.5604111665938035e-07, + "loss": 0.0, + "num_input_tokens_seen": 115547944, + "step": 171470 + }, + { + "epoch": 4.1891627781985195, + "grad_norm": 0.0003425255126785487, + "learning_rate": 1.5599537615267277e-07, + "loss": 0.0, + "num_input_tokens_seen": 115551016, + "step": 171475 + }, + { + "epoch": 4.189284929030367, + "grad_norm": 0.00011390036524971947, + "learning_rate": 1.5594964178379366e-07, + "loss": 0.0, + "num_input_tokens_seen": 115554472, + "step": 171480 + }, + { + "epoch": 4.189407079862214, + "grad_norm": 0.001087276148609817, + "learning_rate": 1.5590391355307587e-07, + "loss": 0.0, + "num_input_tokens_seen": 115557480, + "step": 171485 + }, + { + "epoch": 4.1895292306940615, + "grad_norm": 0.0009388430626131594, + "learning_rate": 1.5585819146085178e-07, + "loss": 0.0, + "num_input_tokens_seen": 115560424, + "step": 171490 + }, + { + "epoch": 4.189651381525908, + "grad_norm": 0.001759289763867855, + "learning_rate": 1.5581247550745402e-07, + "loss": 0.0, + "num_input_tokens_seen": 115564008, + "step": 171495 + }, + { + "epoch": 4.189773532357755, + "grad_norm": 1.796562173694838e-05, + "learning_rate": 1.557667656932149e-07, + "loss": 0.0, + "num_input_tokens_seen": 115567400, + "step": 171500 + }, + { + "epoch": 4.189895683189603, + "grad_norm": 7.069880666676909e-05, + "learning_rate": 1.5572106201846691e-07, + "loss": 0.0, + "num_input_tokens_seen": 115570792, + "step": 171505 + }, + { + "epoch": 4.190017834021449, + "grad_norm": 3.106020812992938e-05, + "learning_rate": 1.5567536448354257e-07, + "loss": 0.0, + "num_input_tokens_seen": 115573864, + "step": 171510 + }, + { + "epoch": 4.190139984853297, + "grad_norm": 0.0006465915939770639, + "learning_rate": 1.5562967308877395e-07, + "loss": 0.0, + "num_input_tokens_seen": 115577256, + "step": 171515 + }, + { + "epoch": 4.190262135685144, + "grad_norm": 5.1399223593762144e-05, + "learning_rate": 1.5558398783449366e-07, + "loss": 0.0501, + "num_input_tokens_seen": 115580776, + "step": 171520 + }, + { + "epoch": 4.190384286516991, + "grad_norm": 0.00033536626142449677, + "learning_rate": 1.5553830872103347e-07, + "loss": 0.0001, + "num_input_tokens_seen": 115584872, + "step": 171525 + }, + { + "epoch": 4.190506437348838, + "grad_norm": 32.08860778808594, + "learning_rate": 1.5549263574872585e-07, + "loss": 0.0444, + "num_input_tokens_seen": 115587880, + "step": 171530 + }, + { + "epoch": 4.190628588180686, + "grad_norm": 0.00031860574381425977, + "learning_rate": 1.554469689179032e-07, + "loss": 0.0, + "num_input_tokens_seen": 115590952, + "step": 171535 + }, + { + "epoch": 4.1907507390125325, + "grad_norm": 0.0002085407468257472, + "learning_rate": 1.5540130822889708e-07, + "loss": 0.0, + "num_input_tokens_seen": 115594536, + "step": 171540 + }, + { + "epoch": 4.19087288984438, + "grad_norm": 0.00019028125097975135, + "learning_rate": 1.5535565368204008e-07, + "loss": 0.0, + "num_input_tokens_seen": 115598056, + "step": 171545 + }, + { + "epoch": 4.190995040676227, + "grad_norm": 6.620458862016676e-06, + "learning_rate": 1.553100052776639e-07, + "loss": 0.0, + "num_input_tokens_seen": 115601384, + "step": 171550 + }, + { + "epoch": 4.1911171915080745, + "grad_norm": 0.0010951223084703088, + "learning_rate": 1.5526436301610035e-07, + "loss": 0.0, + "num_input_tokens_seen": 115604392, + "step": 171555 + }, + { + "epoch": 4.191239342339921, + "grad_norm": 0.00011559041013242677, + "learning_rate": 1.5521872689768178e-07, + "loss": 0.0, + "num_input_tokens_seen": 115607592, + "step": 171560 + }, + { + "epoch": 4.191361493171769, + "grad_norm": 0.00027739559300243855, + "learning_rate": 1.551730969227396e-07, + "loss": 0.0, + "num_input_tokens_seen": 115611048, + "step": 171565 + }, + { + "epoch": 4.191483644003616, + "grad_norm": 0.0003272799076512456, + "learning_rate": 1.5512747309160622e-07, + "loss": 0.0, + "num_input_tokens_seen": 115614568, + "step": 171570 + }, + { + "epoch": 4.191605794835463, + "grad_norm": 0.00016274228983093053, + "learning_rate": 1.5508185540461283e-07, + "loss": 0.0, + "num_input_tokens_seen": 115618088, + "step": 171575 + }, + { + "epoch": 4.19172794566731, + "grad_norm": 0.0005291857523843646, + "learning_rate": 1.5503624386209157e-07, + "loss": 0.0, + "num_input_tokens_seen": 115621544, + "step": 171580 + }, + { + "epoch": 4.191850096499157, + "grad_norm": 5.590125965682091e-06, + "learning_rate": 1.5499063846437387e-07, + "loss": 0.0, + "num_input_tokens_seen": 115625192, + "step": 171585 + }, + { + "epoch": 4.191972247331004, + "grad_norm": 0.00010487588588148355, + "learning_rate": 1.549450392117917e-07, + "loss": 0.0, + "num_input_tokens_seen": 115628904, + "step": 171590 + }, + { + "epoch": 4.192094398162851, + "grad_norm": 0.0001090022487915121, + "learning_rate": 1.5489944610467632e-07, + "loss": 0.0, + "num_input_tokens_seen": 115632232, + "step": 171595 + }, + { + "epoch": 4.192216548994699, + "grad_norm": 0.0007822644547559321, + "learning_rate": 1.5485385914335946e-07, + "loss": 0.0, + "num_input_tokens_seen": 115635624, + "step": 171600 + }, + { + "epoch": 4.1923386998265455, + "grad_norm": 2.5184515834553167e-05, + "learning_rate": 1.548082783281729e-07, + "loss": 0.0, + "num_input_tokens_seen": 115638760, + "step": 171605 + }, + { + "epoch": 4.192460850658393, + "grad_norm": 5.217688521952368e-05, + "learning_rate": 1.5476270365944766e-07, + "loss": 0.0, + "num_input_tokens_seen": 115641512, + "step": 171610 + }, + { + "epoch": 4.19258300149024, + "grad_norm": 8.979284757515416e-06, + "learning_rate": 1.547171351375155e-07, + "loss": 0.0, + "num_input_tokens_seen": 115645096, + "step": 171615 + }, + { + "epoch": 4.1927051523220875, + "grad_norm": 0.00022331729996949434, + "learning_rate": 1.546715727627076e-07, + "loss": 0.0, + "num_input_tokens_seen": 115648872, + "step": 171620 + }, + { + "epoch": 4.192827303153934, + "grad_norm": 5.690431407856522e-06, + "learning_rate": 1.5462601653535524e-07, + "loss": 0.0, + "num_input_tokens_seen": 115652328, + "step": 171625 + }, + { + "epoch": 4.192949453985782, + "grad_norm": 9.095601126318797e-05, + "learning_rate": 1.5458046645579014e-07, + "loss": 0.0, + "num_input_tokens_seen": 115655464, + "step": 171630 + }, + { + "epoch": 4.193071604817629, + "grad_norm": 0.00010419006866868585, + "learning_rate": 1.5453492252434308e-07, + "loss": 0.0, + "num_input_tokens_seen": 115659304, + "step": 171635 + }, + { + "epoch": 4.193193755649476, + "grad_norm": 0.0002787335542961955, + "learning_rate": 1.5448938474134575e-07, + "loss": 0.0, + "num_input_tokens_seen": 115663016, + "step": 171640 + }, + { + "epoch": 4.193315906481323, + "grad_norm": 0.0004578603256959468, + "learning_rate": 1.544438531071287e-07, + "loss": 0.0, + "num_input_tokens_seen": 115666664, + "step": 171645 + }, + { + "epoch": 4.193438057313171, + "grad_norm": 0.0024992485996335745, + "learning_rate": 1.5439832762202375e-07, + "loss": 0.0607, + "num_input_tokens_seen": 115670440, + "step": 171650 + }, + { + "epoch": 4.193560208145017, + "grad_norm": 0.00013882042549084872, + "learning_rate": 1.5435280828636143e-07, + "loss": 0.0, + "num_input_tokens_seen": 115674024, + "step": 171655 + }, + { + "epoch": 4.193682358976865, + "grad_norm": 7.745954644633457e-05, + "learning_rate": 1.543072951004728e-07, + "loss": 0.0, + "num_input_tokens_seen": 115677032, + "step": 171660 + }, + { + "epoch": 4.193804509808712, + "grad_norm": 0.00017201209266204387, + "learning_rate": 1.5426178806468926e-07, + "loss": 0.0, + "num_input_tokens_seen": 115680104, + "step": 171665 + }, + { + "epoch": 4.1939266606405585, + "grad_norm": 0.00019128096755594015, + "learning_rate": 1.5421628717934109e-07, + "loss": 0.0, + "num_input_tokens_seen": 115683304, + "step": 171670 + }, + { + "epoch": 4.194048811472406, + "grad_norm": 0.00016167706053238362, + "learning_rate": 1.5417079244475995e-07, + "loss": 0.0, + "num_input_tokens_seen": 115686568, + "step": 171675 + }, + { + "epoch": 4.194170962304253, + "grad_norm": 5.020221942686476e-05, + "learning_rate": 1.54125303861276e-07, + "loss": 0.0, + "num_input_tokens_seen": 115689960, + "step": 171680 + }, + { + "epoch": 4.1942931131361005, + "grad_norm": 0.00040699797682464123, + "learning_rate": 1.540798214292204e-07, + "loss": 0.0, + "num_input_tokens_seen": 115693096, + "step": 171685 + }, + { + "epoch": 4.194415263967947, + "grad_norm": 0.0012270803563296795, + "learning_rate": 1.54034345148924e-07, + "loss": 0.0, + "num_input_tokens_seen": 115696232, + "step": 171690 + }, + { + "epoch": 4.194537414799795, + "grad_norm": 5.7696772273629904e-05, + "learning_rate": 1.5398887502071722e-07, + "loss": 0.0, + "num_input_tokens_seen": 115700072, + "step": 171695 + }, + { + "epoch": 4.194659565631642, + "grad_norm": 0.02207312174141407, + "learning_rate": 1.5394341104493113e-07, + "loss": 0.0, + "num_input_tokens_seen": 115703336, + "step": 171700 + }, + { + "epoch": 4.194781716463489, + "grad_norm": 3.2915835618041456e-05, + "learning_rate": 1.538979532218959e-07, + "loss": 0.0, + "num_input_tokens_seen": 115706792, + "step": 171705 + }, + { + "epoch": 4.194903867295336, + "grad_norm": 0.0008636588463559747, + "learning_rate": 1.538525015519425e-07, + "loss": 0.0477, + "num_input_tokens_seen": 115710248, + "step": 171710 + }, + { + "epoch": 4.195026018127184, + "grad_norm": 0.0001473453885409981, + "learning_rate": 1.5380705603540112e-07, + "loss": 0.0, + "num_input_tokens_seen": 115713704, + "step": 171715 + }, + { + "epoch": 4.19514816895903, + "grad_norm": 0.0003014703397639096, + "learning_rate": 1.5376161667260235e-07, + "loss": 0.0, + "num_input_tokens_seen": 115717480, + "step": 171720 + }, + { + "epoch": 4.195270319790878, + "grad_norm": 0.0003513983974698931, + "learning_rate": 1.5371618346387704e-07, + "loss": 0.0, + "num_input_tokens_seen": 115721256, + "step": 171725 + }, + { + "epoch": 4.195392470622725, + "grad_norm": 2.8909251341247e-05, + "learning_rate": 1.5367075640955495e-07, + "loss": 0.0, + "num_input_tokens_seen": 115724264, + "step": 171730 + }, + { + "epoch": 4.195514621454572, + "grad_norm": 2.142676748917438e-05, + "learning_rate": 1.5362533550996704e-07, + "loss": 0.0, + "num_input_tokens_seen": 115727656, + "step": 171735 + }, + { + "epoch": 4.195636772286419, + "grad_norm": 0.00011070889740949497, + "learning_rate": 1.5357992076544314e-07, + "loss": 0.0, + "num_input_tokens_seen": 115731048, + "step": 171740 + }, + { + "epoch": 4.195758923118267, + "grad_norm": 3.502647814457305e-05, + "learning_rate": 1.5353451217631386e-07, + "loss": 0.0, + "num_input_tokens_seen": 115734696, + "step": 171745 + }, + { + "epoch": 4.195881073950114, + "grad_norm": 5.859508382854983e-05, + "learning_rate": 1.5348910974290907e-07, + "loss": 0.0, + "num_input_tokens_seen": 115738216, + "step": 171750 + }, + { + "epoch": 4.196003224781961, + "grad_norm": 0.0011176398256793618, + "learning_rate": 1.534437134655595e-07, + "loss": 0.0002, + "num_input_tokens_seen": 115742184, + "step": 171755 + }, + { + "epoch": 4.196125375613808, + "grad_norm": 0.00032299006124958396, + "learning_rate": 1.533983233445948e-07, + "loss": 0.0, + "num_input_tokens_seen": 115745448, + "step": 171760 + }, + { + "epoch": 4.196247526445655, + "grad_norm": 5.946209421381354e-05, + "learning_rate": 1.53352939380345e-07, + "loss": 0.0, + "num_input_tokens_seen": 115748776, + "step": 171765 + }, + { + "epoch": 4.196369677277502, + "grad_norm": 0.00039252001442946494, + "learning_rate": 1.5330756157314062e-07, + "loss": 0.0, + "num_input_tokens_seen": 115751976, + "step": 171770 + }, + { + "epoch": 4.196491828109349, + "grad_norm": 4.479515700950287e-05, + "learning_rate": 1.5326218992331119e-07, + "loss": 0.0001, + "num_input_tokens_seen": 115755432, + "step": 171775 + }, + { + "epoch": 4.196613978941197, + "grad_norm": 0.00012405213783495128, + "learning_rate": 1.5321682443118677e-07, + "loss": 0.0, + "num_input_tokens_seen": 115758952, + "step": 171780 + }, + { + "epoch": 4.1967361297730434, + "grad_norm": 0.012555737048387527, + "learning_rate": 1.5317146509709767e-07, + "loss": 0.0, + "num_input_tokens_seen": 115761960, + "step": 171785 + }, + { + "epoch": 4.196858280604891, + "grad_norm": 0.00039039074908941984, + "learning_rate": 1.5312611192137313e-07, + "loss": 0.0, + "num_input_tokens_seen": 115765480, + "step": 171790 + }, + { + "epoch": 4.196980431436738, + "grad_norm": 0.00015265199181158096, + "learning_rate": 1.5308076490434352e-07, + "loss": 0.0, + "num_input_tokens_seen": 115769256, + "step": 171795 + }, + { + "epoch": 4.1971025822685855, + "grad_norm": 0.025387544184923172, + "learning_rate": 1.5303542404633818e-07, + "loss": 0.0, + "num_input_tokens_seen": 115772392, + "step": 171800 + }, + { + "epoch": 4.197224733100432, + "grad_norm": 6.584433140233159e-05, + "learning_rate": 1.529900893476873e-07, + "loss": 0.0, + "num_input_tokens_seen": 115775592, + "step": 171805 + }, + { + "epoch": 4.19734688393228, + "grad_norm": 1.709511343506165e-05, + "learning_rate": 1.5294476080872009e-07, + "loss": 0.0, + "num_input_tokens_seen": 115778408, + "step": 171810 + }, + { + "epoch": 4.197469034764127, + "grad_norm": 3.4848755603889003e-05, + "learning_rate": 1.5289943842976638e-07, + "loss": 0.0, + "num_input_tokens_seen": 115781672, + "step": 171815 + }, + { + "epoch": 4.197591185595974, + "grad_norm": 2.3679769583395682e-05, + "learning_rate": 1.5285412221115602e-07, + "loss": 0.0, + "num_input_tokens_seen": 115785000, + "step": 171820 + }, + { + "epoch": 4.197713336427821, + "grad_norm": 0.00013830607349518687, + "learning_rate": 1.5280881215321805e-07, + "loss": 0.0, + "num_input_tokens_seen": 115788136, + "step": 171825 + }, + { + "epoch": 4.197835487259669, + "grad_norm": 0.005081809591501951, + "learning_rate": 1.527635082562826e-07, + "loss": 0.0, + "num_input_tokens_seen": 115791400, + "step": 171830 + }, + { + "epoch": 4.197957638091515, + "grad_norm": 0.0002572297817096114, + "learning_rate": 1.5271821052067846e-07, + "loss": 0.0, + "num_input_tokens_seen": 115794664, + "step": 171835 + }, + { + "epoch": 4.198079788923363, + "grad_norm": 8.138104021782055e-05, + "learning_rate": 1.526729189467355e-07, + "loss": 0.0009, + "num_input_tokens_seen": 115797992, + "step": 171840 + }, + { + "epoch": 4.19820193975521, + "grad_norm": 0.06563310325145721, + "learning_rate": 1.5262763353478315e-07, + "loss": 0.0, + "num_input_tokens_seen": 115801256, + "step": 171845 + }, + { + "epoch": 4.198324090587057, + "grad_norm": 8.636287384433672e-05, + "learning_rate": 1.5258235428515033e-07, + "loss": 0.0, + "num_input_tokens_seen": 115804520, + "step": 171850 + }, + { + "epoch": 4.198446241418904, + "grad_norm": 0.00015373101632576436, + "learning_rate": 1.5253708119816676e-07, + "loss": 0.0, + "num_input_tokens_seen": 115808040, + "step": 171855 + }, + { + "epoch": 4.198568392250751, + "grad_norm": 0.0005759844789281487, + "learning_rate": 1.524918142741616e-07, + "loss": 0.0, + "num_input_tokens_seen": 115811112, + "step": 171860 + }, + { + "epoch": 4.1986905430825985, + "grad_norm": 0.0023348061367869377, + "learning_rate": 1.5244655351346357e-07, + "loss": 0.0, + "num_input_tokens_seen": 115814440, + "step": 171865 + }, + { + "epoch": 4.198812693914445, + "grad_norm": 0.0023439086508005857, + "learning_rate": 1.5240129891640242e-07, + "loss": 0.0, + "num_input_tokens_seen": 115817640, + "step": 171870 + }, + { + "epoch": 4.198934844746293, + "grad_norm": 31.531675338745117, + "learning_rate": 1.523560504833068e-07, + "loss": 0.0524, + "num_input_tokens_seen": 115820712, + "step": 171875 + }, + { + "epoch": 4.19905699557814, + "grad_norm": 0.0009481237502768636, + "learning_rate": 1.5231080821450616e-07, + "loss": 0.0, + "num_input_tokens_seen": 115823656, + "step": 171880 + }, + { + "epoch": 4.199179146409987, + "grad_norm": 0.00029586380696855485, + "learning_rate": 1.522655721103291e-07, + "loss": 0.0, + "num_input_tokens_seen": 115827048, + "step": 171885 + }, + { + "epoch": 4.199301297241834, + "grad_norm": 0.0025091313291341066, + "learning_rate": 1.5222034217110502e-07, + "loss": 0.0, + "num_input_tokens_seen": 115830504, + "step": 171890 + }, + { + "epoch": 4.199423448073682, + "grad_norm": 0.00017374740855302662, + "learning_rate": 1.5217511839716245e-07, + "loss": 0.0, + "num_input_tokens_seen": 115833896, + "step": 171895 + }, + { + "epoch": 4.199545598905528, + "grad_norm": 0.0007191016338765621, + "learning_rate": 1.521299007888307e-07, + "loss": 0.125, + "num_input_tokens_seen": 115837672, + "step": 171900 + }, + { + "epoch": 4.199667749737376, + "grad_norm": 0.00038250116631388664, + "learning_rate": 1.5208468934643815e-07, + "loss": 0.0, + "num_input_tokens_seen": 115841256, + "step": 171905 + }, + { + "epoch": 4.199789900569223, + "grad_norm": 5.690229590982199e-05, + "learning_rate": 1.5203948407031375e-07, + "loss": 0.0, + "num_input_tokens_seen": 115844456, + "step": 171910 + }, + { + "epoch": 4.19991205140107, + "grad_norm": 0.00010644423309713602, + "learning_rate": 1.5199428496078648e-07, + "loss": 0.0, + "num_input_tokens_seen": 115847656, + "step": 171915 + }, + { + "epoch": 4.200034202232917, + "grad_norm": 1.8538225049269386e-05, + "learning_rate": 1.5194909201818473e-07, + "loss": 0.0, + "num_input_tokens_seen": 115850856, + "step": 171920 + }, + { + "epoch": 4.200156353064765, + "grad_norm": 0.009867326356470585, + "learning_rate": 1.5190390524283747e-07, + "loss": 0.0, + "num_input_tokens_seen": 115853928, + "step": 171925 + }, + { + "epoch": 4.2002785038966115, + "grad_norm": 0.00025367087800987065, + "learning_rate": 1.5185872463507287e-07, + "loss": 0.0, + "num_input_tokens_seen": 115857448, + "step": 171930 + }, + { + "epoch": 4.200400654728458, + "grad_norm": 6.811439379816875e-05, + "learning_rate": 1.518135501952198e-07, + "loss": 0.0, + "num_input_tokens_seen": 115860264, + "step": 171935 + }, + { + "epoch": 4.200522805560306, + "grad_norm": 0.00018472773081157357, + "learning_rate": 1.5176838192360686e-07, + "loss": 0.0, + "num_input_tokens_seen": 115863720, + "step": 171940 + }, + { + "epoch": 4.200644956392153, + "grad_norm": 0.00013160528033040464, + "learning_rate": 1.5172321982056223e-07, + "loss": 0.0, + "num_input_tokens_seen": 115866920, + "step": 171945 + }, + { + "epoch": 4.200767107224, + "grad_norm": 0.0074246665462851524, + "learning_rate": 1.516780638864148e-07, + "loss": 0.0, + "num_input_tokens_seen": 115870312, + "step": 171950 + }, + { + "epoch": 4.200889258055847, + "grad_norm": 0.00046449064393527806, + "learning_rate": 1.5163291412149226e-07, + "loss": 0.0, + "num_input_tokens_seen": 115873512, + "step": 171955 + }, + { + "epoch": 4.201011408887695, + "grad_norm": 9.282070823246613e-05, + "learning_rate": 1.515877705261237e-07, + "loss": 0.0, + "num_input_tokens_seen": 115877032, + "step": 171960 + }, + { + "epoch": 4.201133559719541, + "grad_norm": 2.614716322568711e-05, + "learning_rate": 1.5154263310063708e-07, + "loss": 0.0, + "num_input_tokens_seen": 115880360, + "step": 171965 + }, + { + "epoch": 4.201255710551389, + "grad_norm": 0.004874629434198141, + "learning_rate": 1.5149750184536036e-07, + "loss": 0.0, + "num_input_tokens_seen": 115883560, + "step": 171970 + }, + { + "epoch": 4.201377861383236, + "grad_norm": 1.403738042426994e-05, + "learning_rate": 1.5145237676062228e-07, + "loss": 0.0, + "num_input_tokens_seen": 115886824, + "step": 171975 + }, + { + "epoch": 4.201500012215083, + "grad_norm": 2.9235177862574346e-05, + "learning_rate": 1.5140725784675057e-07, + "loss": 0.0, + "num_input_tokens_seen": 115890216, + "step": 171980 + }, + { + "epoch": 4.20162216304693, + "grad_norm": 0.00021435305825434625, + "learning_rate": 1.5136214510407364e-07, + "loss": 0.0, + "num_input_tokens_seen": 115893736, + "step": 171985 + }, + { + "epoch": 4.201744313878778, + "grad_norm": 0.00011233131954213604, + "learning_rate": 1.5131703853291934e-07, + "loss": 0.0, + "num_input_tokens_seen": 115897448, + "step": 171990 + }, + { + "epoch": 4.2018664647106245, + "grad_norm": 0.0004522551316767931, + "learning_rate": 1.5127193813361595e-07, + "loss": 0.0, + "num_input_tokens_seen": 115900968, + "step": 171995 + }, + { + "epoch": 4.201988615542472, + "grad_norm": 1.637089553696569e-05, + "learning_rate": 1.512268439064911e-07, + "loss": 0.0, + "num_input_tokens_seen": 115904104, + "step": 172000 + }, + { + "epoch": 4.202110766374319, + "grad_norm": 0.0040295966900885105, + "learning_rate": 1.5118175585187286e-07, + "loss": 0.0, + "num_input_tokens_seen": 115907432, + "step": 172005 + }, + { + "epoch": 4.2022329172061665, + "grad_norm": 8.661628817208111e-05, + "learning_rate": 1.5113667397008957e-07, + "loss": 0.0, + "num_input_tokens_seen": 115910696, + "step": 172010 + }, + { + "epoch": 4.202355068038013, + "grad_norm": 1.427132428943878e-05, + "learning_rate": 1.5109159826146834e-07, + "loss": 0.0, + "num_input_tokens_seen": 115914408, + "step": 172015 + }, + { + "epoch": 4.202477218869861, + "grad_norm": 0.00014912939514033496, + "learning_rate": 1.510465287263376e-07, + "loss": 0.0, + "num_input_tokens_seen": 115917672, + "step": 172020 + }, + { + "epoch": 4.202599369701708, + "grad_norm": 4.3930493120569736e-05, + "learning_rate": 1.5100146536502468e-07, + "loss": 0.0001, + "num_input_tokens_seen": 115921064, + "step": 172025 + }, + { + "epoch": 4.202721520533554, + "grad_norm": 0.0004871827259194106, + "learning_rate": 1.5095640817785737e-07, + "loss": 0.0, + "num_input_tokens_seen": 115924264, + "step": 172030 + }, + { + "epoch": 4.202843671365402, + "grad_norm": 0.0010371499229222536, + "learning_rate": 1.509113571651638e-07, + "loss": 0.0, + "num_input_tokens_seen": 115927528, + "step": 172035 + }, + { + "epoch": 4.202965822197249, + "grad_norm": 0.00017510108591523021, + "learning_rate": 1.5086631232727086e-07, + "loss": 0.0, + "num_input_tokens_seen": 115930664, + "step": 172040 + }, + { + "epoch": 4.203087973029096, + "grad_norm": 4.109571091248654e-05, + "learning_rate": 1.508212736645067e-07, + "loss": 0.0, + "num_input_tokens_seen": 115933928, + "step": 172045 + }, + { + "epoch": 4.203210123860943, + "grad_norm": 0.00025954615557566285, + "learning_rate": 1.5077624117719845e-07, + "loss": 0.0, + "num_input_tokens_seen": 115937192, + "step": 172050 + }, + { + "epoch": 4.203332274692791, + "grad_norm": 3.810921043623239e-05, + "learning_rate": 1.507312148656741e-07, + "loss": 0.0, + "num_input_tokens_seen": 115940456, + "step": 172055 + }, + { + "epoch": 4.2034544255246375, + "grad_norm": 1.4034387277206406e-05, + "learning_rate": 1.5068619473026045e-07, + "loss": 0.0, + "num_input_tokens_seen": 115944040, + "step": 172060 + }, + { + "epoch": 4.203576576356485, + "grad_norm": 0.00018147245282307267, + "learning_rate": 1.506411807712854e-07, + "loss": 0.0, + "num_input_tokens_seen": 115947688, + "step": 172065 + }, + { + "epoch": 4.203698727188332, + "grad_norm": 1.4824206118646543e-05, + "learning_rate": 1.5059617298907624e-07, + "loss": 0.0, + "num_input_tokens_seen": 115951016, + "step": 172070 + }, + { + "epoch": 4.20382087802018, + "grad_norm": 0.00018080630979966372, + "learning_rate": 1.505511713839599e-07, + "loss": 0.0, + "num_input_tokens_seen": 115955048, + "step": 172075 + }, + { + "epoch": 4.203943028852026, + "grad_norm": 0.0012226704275235534, + "learning_rate": 1.5050617595626424e-07, + "loss": 0.0, + "num_input_tokens_seen": 115958248, + "step": 172080 + }, + { + "epoch": 4.204065179683874, + "grad_norm": 0.0011231348617002368, + "learning_rate": 1.5046118670631581e-07, + "loss": 0.0, + "num_input_tokens_seen": 115961320, + "step": 172085 + }, + { + "epoch": 4.204187330515721, + "grad_norm": 0.00010311927326256409, + "learning_rate": 1.504162036344422e-07, + "loss": 0.0004, + "num_input_tokens_seen": 115964584, + "step": 172090 + }, + { + "epoch": 4.204309481347568, + "grad_norm": 0.0007668191101402044, + "learning_rate": 1.503712267409707e-07, + "loss": 0.0, + "num_input_tokens_seen": 115967912, + "step": 172095 + }, + { + "epoch": 4.204431632179415, + "grad_norm": 0.00049975625006482, + "learning_rate": 1.5032625602622784e-07, + "loss": 0.0, + "num_input_tokens_seen": 115970984, + "step": 172100 + }, + { + "epoch": 4.204553783011263, + "grad_norm": 0.0009041104349307716, + "learning_rate": 1.5028129149054126e-07, + "loss": 0.0, + "num_input_tokens_seen": 115974248, + "step": 172105 + }, + { + "epoch": 4.2046759338431094, + "grad_norm": 3.793345968006179e-05, + "learning_rate": 1.5023633313423745e-07, + "loss": 0.0, + "num_input_tokens_seen": 115977512, + "step": 172110 + }, + { + "epoch": 4.204798084674957, + "grad_norm": 0.00024618953466415405, + "learning_rate": 1.5019138095764383e-07, + "loss": 0.0, + "num_input_tokens_seen": 115980648, + "step": 172115 + }, + { + "epoch": 4.204920235506804, + "grad_norm": 7.776911843393464e-06, + "learning_rate": 1.5014643496108682e-07, + "loss": 0.0, + "num_input_tokens_seen": 115983976, + "step": 172120 + }, + { + "epoch": 4.205042386338651, + "grad_norm": 0.00018268365238327533, + "learning_rate": 1.5010149514489356e-07, + "loss": 0.0, + "num_input_tokens_seen": 115987048, + "step": 172125 + }, + { + "epoch": 4.205164537170498, + "grad_norm": 0.0017578315455466509, + "learning_rate": 1.5005656150939095e-07, + "loss": 0.0, + "num_input_tokens_seen": 115990184, + "step": 172130 + }, + { + "epoch": 4.205286688002345, + "grad_norm": 0.00027843486168421805, + "learning_rate": 1.5001163405490547e-07, + "loss": 0.0, + "num_input_tokens_seen": 115993640, + "step": 172135 + }, + { + "epoch": 4.205408838834193, + "grad_norm": 0.00025260128313675523, + "learning_rate": 1.499667127817642e-07, + "loss": 0.0, + "num_input_tokens_seen": 115997096, + "step": 172140 + }, + { + "epoch": 4.205530989666039, + "grad_norm": 0.0006347659509629011, + "learning_rate": 1.4992179769029346e-07, + "loss": 0.0, + "num_input_tokens_seen": 116000296, + "step": 172145 + }, + { + "epoch": 4.205653140497887, + "grad_norm": 7.18262090231292e-05, + "learning_rate": 1.4987688878082028e-07, + "loss": 0.0, + "num_input_tokens_seen": 116003368, + "step": 172150 + }, + { + "epoch": 4.205775291329734, + "grad_norm": 0.000331896502757445, + "learning_rate": 1.4983198605367075e-07, + "loss": 0.0, + "num_input_tokens_seen": 116006248, + "step": 172155 + }, + { + "epoch": 4.205897442161581, + "grad_norm": 0.0006194012821651995, + "learning_rate": 1.4978708950917162e-07, + "loss": 0.0, + "num_input_tokens_seen": 116009448, + "step": 172160 + }, + { + "epoch": 4.206019592993428, + "grad_norm": 0.0014870319282636046, + "learning_rate": 1.4974219914764986e-07, + "loss": 0.0, + "num_input_tokens_seen": 116012712, + "step": 172165 + }, + { + "epoch": 4.206141743825276, + "grad_norm": 0.000584141060244292, + "learning_rate": 1.496973149694314e-07, + "loss": 0.0, + "num_input_tokens_seen": 116016424, + "step": 172170 + }, + { + "epoch": 4.2062638946571225, + "grad_norm": 0.0006933091790415347, + "learning_rate": 1.4965243697484253e-07, + "loss": 0.0, + "num_input_tokens_seen": 116019816, + "step": 172175 + }, + { + "epoch": 4.20638604548897, + "grad_norm": 0.00044879803317599, + "learning_rate": 1.4960756516421013e-07, + "loss": 0.0, + "num_input_tokens_seen": 116022888, + "step": 172180 + }, + { + "epoch": 4.206508196320817, + "grad_norm": 0.00214349920861423, + "learning_rate": 1.4956269953785993e-07, + "loss": 0.0, + "num_input_tokens_seen": 116026216, + "step": 172185 + }, + { + "epoch": 4.2066303471526645, + "grad_norm": 0.00808340311050415, + "learning_rate": 1.495178400961188e-07, + "loss": 0.0, + "num_input_tokens_seen": 116029480, + "step": 172190 + }, + { + "epoch": 4.206752497984511, + "grad_norm": 5.1568593335105106e-05, + "learning_rate": 1.4947298683931254e-07, + "loss": 0.0, + "num_input_tokens_seen": 116033000, + "step": 172195 + }, + { + "epoch": 4.206874648816359, + "grad_norm": 0.0003689782170113176, + "learning_rate": 1.4942813976776759e-07, + "loss": 0.0, + "num_input_tokens_seen": 116036392, + "step": 172200 + }, + { + "epoch": 4.206996799648206, + "grad_norm": 7.517338872276014e-06, + "learning_rate": 1.493832988818098e-07, + "loss": 0.0, + "num_input_tokens_seen": 116039976, + "step": 172205 + }, + { + "epoch": 4.207118950480052, + "grad_norm": 8.319402695633471e-05, + "learning_rate": 1.4933846418176578e-07, + "loss": 0.0, + "num_input_tokens_seen": 116043368, + "step": 172210 + }, + { + "epoch": 4.2072411013119, + "grad_norm": 0.0005808392306789756, + "learning_rate": 1.4929363566796082e-07, + "loss": 0.0, + "num_input_tokens_seen": 116046824, + "step": 172215 + }, + { + "epoch": 4.207363252143747, + "grad_norm": 0.0011009281734004617, + "learning_rate": 1.492488133407215e-07, + "loss": 0.0, + "num_input_tokens_seen": 116049896, + "step": 172220 + }, + { + "epoch": 4.207485402975594, + "grad_norm": 3.104091956629418e-05, + "learning_rate": 1.492039972003738e-07, + "loss": 0.0426, + "num_input_tokens_seen": 116053096, + "step": 172225 + }, + { + "epoch": 4.207607553807441, + "grad_norm": 0.00019769801292568445, + "learning_rate": 1.491591872472433e-07, + "loss": 0.0, + "num_input_tokens_seen": 116056168, + "step": 172230 + }, + { + "epoch": 4.207729704639289, + "grad_norm": 4.43961143901106e-05, + "learning_rate": 1.491143834816563e-07, + "loss": 0.0, + "num_input_tokens_seen": 116059560, + "step": 172235 + }, + { + "epoch": 4.2078518554711355, + "grad_norm": 0.0002230397949460894, + "learning_rate": 1.4906958590393802e-07, + "loss": 0.0, + "num_input_tokens_seen": 116062824, + "step": 172240 + }, + { + "epoch": 4.207974006302983, + "grad_norm": 4.6068620576988906e-05, + "learning_rate": 1.4902479451441464e-07, + "loss": 0.0, + "num_input_tokens_seen": 116066344, + "step": 172245 + }, + { + "epoch": 4.20809615713483, + "grad_norm": 1.1687574442476034e-05, + "learning_rate": 1.4898000931341204e-07, + "loss": 0.0, + "num_input_tokens_seen": 116069352, + "step": 172250 + }, + { + "epoch": 4.2082183079666775, + "grad_norm": 0.00023311935365200043, + "learning_rate": 1.4893523030125544e-07, + "loss": 0.0, + "num_input_tokens_seen": 116072872, + "step": 172255 + }, + { + "epoch": 4.208340458798524, + "grad_norm": 0.00037141842767596245, + "learning_rate": 1.4889045747827111e-07, + "loss": 0.0, + "num_input_tokens_seen": 116075880, + "step": 172260 + }, + { + "epoch": 4.208462609630372, + "grad_norm": 7.853261195123196e-05, + "learning_rate": 1.4884569084478394e-07, + "loss": 0.0, + "num_input_tokens_seen": 116079336, + "step": 172265 + }, + { + "epoch": 4.208584760462219, + "grad_norm": 5.044359568273649e-06, + "learning_rate": 1.4880093040112018e-07, + "loss": 0.0, + "num_input_tokens_seen": 116082280, + "step": 172270 + }, + { + "epoch": 4.208706911294066, + "grad_norm": 6.285287963692099e-05, + "learning_rate": 1.4875617614760493e-07, + "loss": 0.0, + "num_input_tokens_seen": 116085608, + "step": 172275 + }, + { + "epoch": 4.208829062125913, + "grad_norm": 0.0008630296215415001, + "learning_rate": 1.4871142808456349e-07, + "loss": 0.0, + "num_input_tokens_seen": 116088744, + "step": 172280 + }, + { + "epoch": 4.208951212957761, + "grad_norm": 0.001255490817129612, + "learning_rate": 1.4866668621232182e-07, + "loss": 0.0, + "num_input_tokens_seen": 116092008, + "step": 172285 + }, + { + "epoch": 4.209073363789607, + "grad_norm": 0.000520072877407074, + "learning_rate": 1.4862195053120464e-07, + "loss": 0.0, + "num_input_tokens_seen": 116095144, + "step": 172290 + }, + { + "epoch": 4.209195514621454, + "grad_norm": 0.00010253593791276217, + "learning_rate": 1.4857722104153792e-07, + "loss": 0.0, + "num_input_tokens_seen": 116099048, + "step": 172295 + }, + { + "epoch": 4.209317665453302, + "grad_norm": 0.0004774238623213023, + "learning_rate": 1.485324977436464e-07, + "loss": 0.0, + "num_input_tokens_seen": 116102504, + "step": 172300 + }, + { + "epoch": 4.2094398162851485, + "grad_norm": 0.000514692161232233, + "learning_rate": 1.4848778063785583e-07, + "loss": 0.0, + "num_input_tokens_seen": 116105512, + "step": 172305 + }, + { + "epoch": 4.209561967116996, + "grad_norm": 5.9682086430257186e-05, + "learning_rate": 1.4844306972449093e-07, + "loss": 0.0, + "num_input_tokens_seen": 116108648, + "step": 172310 + }, + { + "epoch": 4.209684117948843, + "grad_norm": 4.6664001274621114e-05, + "learning_rate": 1.4839836500387703e-07, + "loss": 0.0, + "num_input_tokens_seen": 116111912, + "step": 172315 + }, + { + "epoch": 4.2098062687806905, + "grad_norm": 0.0013582675019279122, + "learning_rate": 1.4835366647633963e-07, + "loss": 0.0, + "num_input_tokens_seen": 116115176, + "step": 172320 + }, + { + "epoch": 4.209928419612537, + "grad_norm": 4.392506070871605e-06, + "learning_rate": 1.48308974142203e-07, + "loss": 0.0002, + "num_input_tokens_seen": 116118312, + "step": 172325 + }, + { + "epoch": 4.210050570444385, + "grad_norm": 6.457888503064169e-06, + "learning_rate": 1.4826428800179303e-07, + "loss": 0.0, + "num_input_tokens_seen": 116121704, + "step": 172330 + }, + { + "epoch": 4.210172721276232, + "grad_norm": 0.0021146265789866447, + "learning_rate": 1.4821960805543388e-07, + "loss": 0.0, + "num_input_tokens_seen": 116125352, + "step": 172335 + }, + { + "epoch": 4.210294872108079, + "grad_norm": 1.908622834889684e-05, + "learning_rate": 1.4817493430345084e-07, + "loss": 0.0, + "num_input_tokens_seen": 116128296, + "step": 172340 + }, + { + "epoch": 4.210417022939926, + "grad_norm": 0.13494116067886353, + "learning_rate": 1.48130266746169e-07, + "loss": 0.0, + "num_input_tokens_seen": 116131944, + "step": 172345 + }, + { + "epoch": 4.210539173771774, + "grad_norm": 8.20185596239753e-06, + "learning_rate": 1.480856053839129e-07, + "loss": 0.0489, + "num_input_tokens_seen": 116135528, + "step": 172350 + }, + { + "epoch": 4.21066132460362, + "grad_norm": 7.075269240885973e-05, + "learning_rate": 1.4804095021700746e-07, + "loss": 0.0, + "num_input_tokens_seen": 116139304, + "step": 172355 + }, + { + "epoch": 4.210783475435468, + "grad_norm": 0.0006885943002998829, + "learning_rate": 1.4799630124577733e-07, + "loss": 0.0, + "num_input_tokens_seen": 116142760, + "step": 172360 + }, + { + "epoch": 4.210905626267315, + "grad_norm": 0.00013424194185063243, + "learning_rate": 1.4795165847054735e-07, + "loss": 0.0, + "num_input_tokens_seen": 116145832, + "step": 172365 + }, + { + "epoch": 4.211027777099162, + "grad_norm": 8.563858136767522e-05, + "learning_rate": 1.4790702189164194e-07, + "loss": 0.0, + "num_input_tokens_seen": 116148904, + "step": 172370 + }, + { + "epoch": 4.211149927931009, + "grad_norm": 0.00026543528656475246, + "learning_rate": 1.4786239150938594e-07, + "loss": 0.0, + "num_input_tokens_seen": 116152104, + "step": 172375 + }, + { + "epoch": 4.211272078762857, + "grad_norm": 0.0008876949432305992, + "learning_rate": 1.47817767324104e-07, + "loss": 0.0, + "num_input_tokens_seen": 116155816, + "step": 172380 + }, + { + "epoch": 4.2113942295947036, + "grad_norm": 9.343509009340778e-05, + "learning_rate": 1.4777314933612016e-07, + "loss": 0.0, + "num_input_tokens_seen": 116159016, + "step": 172385 + }, + { + "epoch": 4.21151638042655, + "grad_norm": 0.0004023423243779689, + "learning_rate": 1.4772853754575942e-07, + "loss": 0.0, + "num_input_tokens_seen": 116162344, + "step": 172390 + }, + { + "epoch": 4.211638531258398, + "grad_norm": 0.00024002035206649452, + "learning_rate": 1.4768393195334583e-07, + "loss": 0.0001, + "num_input_tokens_seen": 116165480, + "step": 172395 + }, + { + "epoch": 4.211760682090245, + "grad_norm": 0.0009489704389125109, + "learning_rate": 1.476393325592038e-07, + "loss": 0.0, + "num_input_tokens_seen": 116169000, + "step": 172400 + }, + { + "epoch": 4.211882832922092, + "grad_norm": 0.0010699955746531487, + "learning_rate": 1.475947393636582e-07, + "loss": 0.0, + "num_input_tokens_seen": 116172264, + "step": 172405 + }, + { + "epoch": 4.212004983753939, + "grad_norm": 2.9108750823070295e-05, + "learning_rate": 1.475501523670325e-07, + "loss": 0.0, + "num_input_tokens_seen": 116175848, + "step": 172410 + }, + { + "epoch": 4.212127134585787, + "grad_norm": 0.003317110938951373, + "learning_rate": 1.475055715696517e-07, + "loss": 0.0, + "num_input_tokens_seen": 116179560, + "step": 172415 + }, + { + "epoch": 4.212249285417633, + "grad_norm": 2.4914932510000654e-05, + "learning_rate": 1.4746099697183945e-07, + "loss": 0.0, + "num_input_tokens_seen": 116182824, + "step": 172420 + }, + { + "epoch": 4.212371436249481, + "grad_norm": 0.0004848266253247857, + "learning_rate": 1.4741642857392045e-07, + "loss": 0.0, + "num_input_tokens_seen": 116186280, + "step": 172425 + }, + { + "epoch": 4.212493587081328, + "grad_norm": 0.00012116871948819607, + "learning_rate": 1.4737186637621812e-07, + "loss": 0.0, + "num_input_tokens_seen": 116189736, + "step": 172430 + }, + { + "epoch": 4.2126157379131755, + "grad_norm": 0.019111763685941696, + "learning_rate": 1.4732731037905698e-07, + "loss": 0.0, + "num_input_tokens_seen": 116192872, + "step": 172435 + }, + { + "epoch": 4.212737888745022, + "grad_norm": 8.967510075308383e-05, + "learning_rate": 1.4728276058276122e-07, + "loss": 0.0, + "num_input_tokens_seen": 116195752, + "step": 172440 + }, + { + "epoch": 4.21286003957687, + "grad_norm": 0.0025152855087071657, + "learning_rate": 1.4723821698765437e-07, + "loss": 0.0, + "num_input_tokens_seen": 116198824, + "step": 172445 + }, + { + "epoch": 4.212982190408717, + "grad_norm": 0.0015792966587468982, + "learning_rate": 1.471936795940607e-07, + "loss": 0.0, + "num_input_tokens_seen": 116202024, + "step": 172450 + }, + { + "epoch": 4.213104341240564, + "grad_norm": 0.0015235176542773843, + "learning_rate": 1.4714914840230385e-07, + "loss": 0.0, + "num_input_tokens_seen": 116205032, + "step": 172455 + }, + { + "epoch": 4.213226492072411, + "grad_norm": 6.820956332376227e-05, + "learning_rate": 1.471046234127079e-07, + "loss": 0.0, + "num_input_tokens_seen": 116208296, + "step": 172460 + }, + { + "epoch": 4.213348642904259, + "grad_norm": 0.07334547489881516, + "learning_rate": 1.4706010462559638e-07, + "loss": 0.0, + "num_input_tokens_seen": 116211688, + "step": 172465 + }, + { + "epoch": 4.213470793736105, + "grad_norm": 0.0005423697293736041, + "learning_rate": 1.470155920412932e-07, + "loss": 0.0, + "num_input_tokens_seen": 116214824, + "step": 172470 + }, + { + "epoch": 4.213592944567952, + "grad_norm": 0.007120200432837009, + "learning_rate": 1.4697108566012228e-07, + "loss": 0.0, + "num_input_tokens_seen": 116218088, + "step": 172475 + }, + { + "epoch": 4.2137150953998, + "grad_norm": 0.01869882456958294, + "learning_rate": 1.46926585482407e-07, + "loss": 0.0, + "num_input_tokens_seen": 116221096, + "step": 172480 + }, + { + "epoch": 4.2138372462316465, + "grad_norm": 8.70914154802449e-05, + "learning_rate": 1.4688209150847085e-07, + "loss": 0.0, + "num_input_tokens_seen": 116224040, + "step": 172485 + }, + { + "epoch": 4.213959397063494, + "grad_norm": 0.0005563409649766982, + "learning_rate": 1.4683760373863785e-07, + "loss": 0.0, + "num_input_tokens_seen": 116227048, + "step": 172490 + }, + { + "epoch": 4.214081547895341, + "grad_norm": 5.803833482787013e-05, + "learning_rate": 1.4679312217323102e-07, + "loss": 0.0, + "num_input_tokens_seen": 116230824, + "step": 172495 + }, + { + "epoch": 4.2142036987271885, + "grad_norm": 1.2437372788554057e-05, + "learning_rate": 1.4674864681257438e-07, + "loss": 0.0, + "num_input_tokens_seen": 116233960, + "step": 172500 + }, + { + "epoch": 4.214325849559035, + "grad_norm": 0.004784159827977419, + "learning_rate": 1.4670417765699072e-07, + "loss": 0.0, + "num_input_tokens_seen": 116236904, + "step": 172505 + }, + { + "epoch": 4.214448000390883, + "grad_norm": 4.994161645299755e-05, + "learning_rate": 1.4665971470680417e-07, + "loss": 0.0, + "num_input_tokens_seen": 116240296, + "step": 172510 + }, + { + "epoch": 4.21457015122273, + "grad_norm": 0.0012720542727038264, + "learning_rate": 1.4661525796233732e-07, + "loss": 0.0, + "num_input_tokens_seen": 116244072, + "step": 172515 + }, + { + "epoch": 4.214692302054577, + "grad_norm": 0.0005264293286018074, + "learning_rate": 1.4657080742391414e-07, + "loss": 0.0, + "num_input_tokens_seen": 116247400, + "step": 172520 + }, + { + "epoch": 4.214814452886424, + "grad_norm": 0.010150831192731857, + "learning_rate": 1.465263630918574e-07, + "loss": 0.0, + "num_input_tokens_seen": 116250792, + "step": 172525 + }, + { + "epoch": 4.214936603718272, + "grad_norm": 0.0002481169649399817, + "learning_rate": 1.4648192496649047e-07, + "loss": 0.0, + "num_input_tokens_seen": 116253928, + "step": 172530 + }, + { + "epoch": 4.215058754550118, + "grad_norm": 4.520795846474357e-05, + "learning_rate": 1.464374930481368e-07, + "loss": 0.0, + "num_input_tokens_seen": 116257768, + "step": 172535 + }, + { + "epoch": 4.215180905381966, + "grad_norm": 8.352724398719147e-05, + "learning_rate": 1.46393067337119e-07, + "loss": 0.0, + "num_input_tokens_seen": 116261480, + "step": 172540 + }, + { + "epoch": 4.215303056213813, + "grad_norm": 4.275560422684066e-05, + "learning_rate": 1.4634864783376055e-07, + "loss": 0.0, + "num_input_tokens_seen": 116264808, + "step": 172545 + }, + { + "epoch": 4.21542520704566, + "grad_norm": 1.3493899132299703e-05, + "learning_rate": 1.4630423453838427e-07, + "loss": 0.0, + "num_input_tokens_seen": 116268520, + "step": 172550 + }, + { + "epoch": 4.215547357877507, + "grad_norm": 0.0005210431991145015, + "learning_rate": 1.4625982745131315e-07, + "loss": 0.0, + "num_input_tokens_seen": 116271720, + "step": 172555 + }, + { + "epoch": 4.215669508709354, + "grad_norm": 0.0029650393407791853, + "learning_rate": 1.4621542657287033e-07, + "loss": 0.0, + "num_input_tokens_seen": 116274984, + "step": 172560 + }, + { + "epoch": 4.2157916595412015, + "grad_norm": 6.22431471128948e-05, + "learning_rate": 1.4617103190337853e-07, + "loss": 0.0, + "num_input_tokens_seen": 116278504, + "step": 172565 + }, + { + "epoch": 4.215913810373048, + "grad_norm": 0.0007015995215624571, + "learning_rate": 1.4612664344316073e-07, + "loss": 0.0, + "num_input_tokens_seen": 116281896, + "step": 172570 + }, + { + "epoch": 4.216035961204896, + "grad_norm": 7.395140710286796e-05, + "learning_rate": 1.4608226119253942e-07, + "loss": 0.0, + "num_input_tokens_seen": 116285416, + "step": 172575 + }, + { + "epoch": 4.216158112036743, + "grad_norm": 0.0005374195170588791, + "learning_rate": 1.4603788515183792e-07, + "loss": 0.0, + "num_input_tokens_seen": 116288808, + "step": 172580 + }, + { + "epoch": 4.21628026286859, + "grad_norm": 2.6126805096282624e-05, + "learning_rate": 1.4599351532137848e-07, + "loss": 0.0, + "num_input_tokens_seen": 116292072, + "step": 172585 + }, + { + "epoch": 4.216402413700437, + "grad_norm": 0.0012434420641511679, + "learning_rate": 1.459491517014837e-07, + "loss": 0.0, + "num_input_tokens_seen": 116295784, + "step": 172590 + }, + { + "epoch": 4.216524564532285, + "grad_norm": 14.83011531829834, + "learning_rate": 1.4590479429247672e-07, + "loss": 0.1056, + "num_input_tokens_seen": 116299624, + "step": 172595 + }, + { + "epoch": 4.216646715364131, + "grad_norm": 3.7940932088531554e-05, + "learning_rate": 1.458604430946795e-07, + "loss": 0.0, + "num_input_tokens_seen": 116302696, + "step": 172600 + }, + { + "epoch": 4.216768866195979, + "grad_norm": 5.4114127124194056e-05, + "learning_rate": 1.45816098108415e-07, + "loss": 0.0, + "num_input_tokens_seen": 116306408, + "step": 172605 + }, + { + "epoch": 4.216891017027826, + "grad_norm": 1.4836638001725078e-05, + "learning_rate": 1.4577175933400554e-07, + "loss": 0.0, + "num_input_tokens_seen": 116309416, + "step": 172610 + }, + { + "epoch": 4.217013167859673, + "grad_norm": 0.0008437048527412117, + "learning_rate": 1.4572742677177375e-07, + "loss": 0.0, + "num_input_tokens_seen": 116312168, + "step": 172615 + }, + { + "epoch": 4.21713531869152, + "grad_norm": 3.5103963455185294e-05, + "learning_rate": 1.4568310042204156e-07, + "loss": 0.0, + "num_input_tokens_seen": 116315496, + "step": 172620 + }, + { + "epoch": 4.217257469523368, + "grad_norm": 0.005723453126847744, + "learning_rate": 1.4563878028513177e-07, + "loss": 0.0, + "num_input_tokens_seen": 116318888, + "step": 172625 + }, + { + "epoch": 4.2173796203552145, + "grad_norm": 0.001771946670487523, + "learning_rate": 1.4559446636136675e-07, + "loss": 0.0, + "num_input_tokens_seen": 116322088, + "step": 172630 + }, + { + "epoch": 4.217501771187062, + "grad_norm": 0.0040583363734185696, + "learning_rate": 1.4555015865106835e-07, + "loss": 0.0, + "num_input_tokens_seen": 116325288, + "step": 172635 + }, + { + "epoch": 4.217623922018909, + "grad_norm": 7.745297807559837e-06, + "learning_rate": 1.455058571545593e-07, + "loss": 0.0, + "num_input_tokens_seen": 116328744, + "step": 172640 + }, + { + "epoch": 4.2177460728507565, + "grad_norm": 0.0033273485023528337, + "learning_rate": 1.454615618721612e-07, + "loss": 0.0, + "num_input_tokens_seen": 116332584, + "step": 172645 + }, + { + "epoch": 4.217868223682603, + "grad_norm": 3.633387677837163e-05, + "learning_rate": 1.4541727280419647e-07, + "loss": 0.0, + "num_input_tokens_seen": 116336232, + "step": 172650 + }, + { + "epoch": 4.21799037451445, + "grad_norm": 0.00017991135246120393, + "learning_rate": 1.4537298995098745e-07, + "loss": 0.0, + "num_input_tokens_seen": 116339240, + "step": 172655 + }, + { + "epoch": 4.218112525346298, + "grad_norm": 3.644289608928375e-05, + "learning_rate": 1.4532871331285568e-07, + "loss": 0.0, + "num_input_tokens_seen": 116342760, + "step": 172660 + }, + { + "epoch": 4.218234676178144, + "grad_norm": 0.0003333684871904552, + "learning_rate": 1.4528444289012353e-07, + "loss": 0.0, + "num_input_tokens_seen": 116345896, + "step": 172665 + }, + { + "epoch": 4.218356827009992, + "grad_norm": 3.655048203654587e-05, + "learning_rate": 1.4524017868311268e-07, + "loss": 0.0001, + "num_input_tokens_seen": 116349160, + "step": 172670 + }, + { + "epoch": 4.218478977841839, + "grad_norm": 0.00015764005365781486, + "learning_rate": 1.4519592069214538e-07, + "loss": 0.0, + "num_input_tokens_seen": 116352296, + "step": 172675 + }, + { + "epoch": 4.218601128673686, + "grad_norm": 0.0005261016194708645, + "learning_rate": 1.4515166891754292e-07, + "loss": 0.0, + "num_input_tokens_seen": 116355304, + "step": 172680 + }, + { + "epoch": 4.218723279505533, + "grad_norm": 0.0004089116700924933, + "learning_rate": 1.4510742335962777e-07, + "loss": 0.0, + "num_input_tokens_seen": 116358568, + "step": 172685 + }, + { + "epoch": 4.218845430337381, + "grad_norm": 0.00021509006910491735, + "learning_rate": 1.4506318401872143e-07, + "loss": 0.0, + "num_input_tokens_seen": 116362408, + "step": 172690 + }, + { + "epoch": 4.2189675811692275, + "grad_norm": 0.00015573047858197242, + "learning_rate": 1.4501895089514525e-07, + "loss": 0.0, + "num_input_tokens_seen": 116366120, + "step": 172695 + }, + { + "epoch": 4.219089732001075, + "grad_norm": 4.082197392563103e-06, + "learning_rate": 1.449747239892215e-07, + "loss": 0.0, + "num_input_tokens_seen": 116369640, + "step": 172700 + }, + { + "epoch": 4.219211882832922, + "grad_norm": 8.524627628503367e-05, + "learning_rate": 1.449305033012712e-07, + "loss": 0.0, + "num_input_tokens_seen": 116372648, + "step": 172705 + }, + { + "epoch": 4.21933403366477, + "grad_norm": 0.0002137812552973628, + "learning_rate": 1.4488628883161658e-07, + "loss": 0.0, + "num_input_tokens_seen": 116376168, + "step": 172710 + }, + { + "epoch": 4.219456184496616, + "grad_norm": 0.00010211888002231717, + "learning_rate": 1.4484208058057866e-07, + "loss": 0.0, + "num_input_tokens_seen": 116379240, + "step": 172715 + }, + { + "epoch": 4.219578335328464, + "grad_norm": 0.0004470855346880853, + "learning_rate": 1.4479787854847904e-07, + "loss": 0.0002, + "num_input_tokens_seen": 116382440, + "step": 172720 + }, + { + "epoch": 4.219700486160311, + "grad_norm": 0.020762775093317032, + "learning_rate": 1.447536827356396e-07, + "loss": 0.0, + "num_input_tokens_seen": 116385640, + "step": 172725 + }, + { + "epoch": 4.219822636992158, + "grad_norm": 7.423523493343964e-05, + "learning_rate": 1.4470949314238112e-07, + "loss": 0.0, + "num_input_tokens_seen": 116388968, + "step": 172730 + }, + { + "epoch": 4.219944787824005, + "grad_norm": 0.0003863392921630293, + "learning_rate": 1.4466530976902557e-07, + "loss": 0.0, + "num_input_tokens_seen": 116392296, + "step": 172735 + }, + { + "epoch": 4.220066938655853, + "grad_norm": 1.6147825590451248e-05, + "learning_rate": 1.446211326158936e-07, + "loss": 0.0, + "num_input_tokens_seen": 116395368, + "step": 172740 + }, + { + "epoch": 4.220189089487699, + "grad_norm": 0.00059023208450526, + "learning_rate": 1.445769616833069e-07, + "loss": 0.0, + "num_input_tokens_seen": 116399080, + "step": 172745 + }, + { + "epoch": 4.220311240319546, + "grad_norm": 0.0002006474242080003, + "learning_rate": 1.4453279697158683e-07, + "loss": 0.0, + "num_input_tokens_seen": 116402152, + "step": 172750 + }, + { + "epoch": 4.220433391151394, + "grad_norm": 0.00967673771083355, + "learning_rate": 1.4448863848105407e-07, + "loss": 0.0, + "num_input_tokens_seen": 116405736, + "step": 172755 + }, + { + "epoch": 4.220555541983241, + "grad_norm": 0.00023713294649496675, + "learning_rate": 1.444444862120303e-07, + "loss": 0.0, + "num_input_tokens_seen": 116408488, + "step": 172760 + }, + { + "epoch": 4.220677692815088, + "grad_norm": 0.002359689911827445, + "learning_rate": 1.4440034016483614e-07, + "loss": 0.0, + "num_input_tokens_seen": 116412136, + "step": 172765 + }, + { + "epoch": 4.220799843646935, + "grad_norm": 0.0009306291467510164, + "learning_rate": 1.4435620033979302e-07, + "loss": 0.0, + "num_input_tokens_seen": 116415656, + "step": 172770 + }, + { + "epoch": 4.220921994478783, + "grad_norm": 4.442542831384344e-06, + "learning_rate": 1.443120667372215e-07, + "loss": 0.0, + "num_input_tokens_seen": 116419112, + "step": 172775 + }, + { + "epoch": 4.221044145310629, + "grad_norm": 0.0063672238029539585, + "learning_rate": 1.4426793935744287e-07, + "loss": 0.0, + "num_input_tokens_seen": 116422952, + "step": 172780 + }, + { + "epoch": 4.221166296142477, + "grad_norm": 4.459876072360203e-05, + "learning_rate": 1.442238182007781e-07, + "loss": 0.0, + "num_input_tokens_seen": 116426280, + "step": 172785 + }, + { + "epoch": 4.221288446974324, + "grad_norm": 5.368567872210406e-05, + "learning_rate": 1.4417970326754803e-07, + "loss": 0.0, + "num_input_tokens_seen": 116429352, + "step": 172790 + }, + { + "epoch": 4.221410597806171, + "grad_norm": 0.00011356353934388608, + "learning_rate": 1.44135594558073e-07, + "loss": 0.0305, + "num_input_tokens_seen": 116433000, + "step": 172795 + }, + { + "epoch": 4.221532748638018, + "grad_norm": 0.0008511125342920423, + "learning_rate": 1.4409149207267434e-07, + "loss": 0.0, + "num_input_tokens_seen": 116436392, + "step": 172800 + }, + { + "epoch": 4.221654899469866, + "grad_norm": 3.7000179872848094e-05, + "learning_rate": 1.4404739581167236e-07, + "loss": 0.0, + "num_input_tokens_seen": 116439720, + "step": 172805 + }, + { + "epoch": 4.2217770503017125, + "grad_norm": 3.221071165171452e-05, + "learning_rate": 1.4400330577538822e-07, + "loss": 0.0, + "num_input_tokens_seen": 116443240, + "step": 172810 + }, + { + "epoch": 4.22189920113356, + "grad_norm": 0.00017757921887096018, + "learning_rate": 1.43959221964142e-07, + "loss": 0.0, + "num_input_tokens_seen": 116446696, + "step": 172815 + }, + { + "epoch": 4.222021351965407, + "grad_norm": 0.00012855124077759683, + "learning_rate": 1.439151443782548e-07, + "loss": 0.0, + "num_input_tokens_seen": 116450792, + "step": 172820 + }, + { + "epoch": 4.2221435027972545, + "grad_norm": 2.032962402154226e-05, + "learning_rate": 1.4387107301804668e-07, + "loss": 0.0, + "num_input_tokens_seen": 116454184, + "step": 172825 + }, + { + "epoch": 4.222265653629101, + "grad_norm": 0.006621385924518108, + "learning_rate": 1.4382700788383873e-07, + "loss": 0.0, + "num_input_tokens_seen": 116457320, + "step": 172830 + }, + { + "epoch": 4.222387804460948, + "grad_norm": 8.379903192690108e-06, + "learning_rate": 1.4378294897595068e-07, + "loss": 0.0, + "num_input_tokens_seen": 116461480, + "step": 172835 + }, + { + "epoch": 4.222509955292796, + "grad_norm": 0.0001950185833266005, + "learning_rate": 1.4373889629470336e-07, + "loss": 0.0, + "num_input_tokens_seen": 116464872, + "step": 172840 + }, + { + "epoch": 4.222632106124642, + "grad_norm": 0.008499527350068092, + "learning_rate": 1.4369484984041735e-07, + "loss": 0.0, + "num_input_tokens_seen": 116468200, + "step": 172845 + }, + { + "epoch": 4.22275425695649, + "grad_norm": 0.0044451188296079636, + "learning_rate": 1.4365080961341246e-07, + "loss": 0.0, + "num_input_tokens_seen": 116471592, + "step": 172850 + }, + { + "epoch": 4.222876407788337, + "grad_norm": 7.82210463512456e-06, + "learning_rate": 1.4360677561400947e-07, + "loss": 0.0, + "num_input_tokens_seen": 116475240, + "step": 172855 + }, + { + "epoch": 4.222998558620184, + "grad_norm": 0.00022457803424913436, + "learning_rate": 1.435627478425282e-07, + "loss": 0.0, + "num_input_tokens_seen": 116479144, + "step": 172860 + }, + { + "epoch": 4.223120709452031, + "grad_norm": 0.00020594919624272734, + "learning_rate": 1.4351872629928907e-07, + "loss": 0.0, + "num_input_tokens_seen": 116482472, + "step": 172865 + }, + { + "epoch": 4.223242860283879, + "grad_norm": 0.0017935315845534205, + "learning_rate": 1.4347471098461194e-07, + "loss": 0.0, + "num_input_tokens_seen": 116485544, + "step": 172870 + }, + { + "epoch": 4.2233650111157255, + "grad_norm": 0.004511896055191755, + "learning_rate": 1.434307018988171e-07, + "loss": 0.0, + "num_input_tokens_seen": 116488936, + "step": 172875 + }, + { + "epoch": 4.223487161947573, + "grad_norm": 0.0023193045053631067, + "learning_rate": 1.4338669904222478e-07, + "loss": 0.0, + "num_input_tokens_seen": 116492008, + "step": 172880 + }, + { + "epoch": 4.22360931277942, + "grad_norm": 0.0012620068155229092, + "learning_rate": 1.4334270241515466e-07, + "loss": 0.0, + "num_input_tokens_seen": 116495400, + "step": 172885 + }, + { + "epoch": 4.2237314636112675, + "grad_norm": 0.0058755516074597836, + "learning_rate": 1.4329871201792698e-07, + "loss": 0.0, + "num_input_tokens_seen": 116498920, + "step": 172890 + }, + { + "epoch": 4.223853614443114, + "grad_norm": 0.00023124816652853042, + "learning_rate": 1.4325472785086147e-07, + "loss": 0.0, + "num_input_tokens_seen": 116502056, + "step": 172895 + }, + { + "epoch": 4.223975765274962, + "grad_norm": 0.009667105972766876, + "learning_rate": 1.4321074991427785e-07, + "loss": 0.0002, + "num_input_tokens_seen": 116505320, + "step": 172900 + }, + { + "epoch": 4.224097916106809, + "grad_norm": 0.005875737406313419, + "learning_rate": 1.431667782084962e-07, + "loss": 0.0, + "num_input_tokens_seen": 116508520, + "step": 172905 + }, + { + "epoch": 4.224220066938656, + "grad_norm": 21.689476013183594, + "learning_rate": 1.4312281273383608e-07, + "loss": 0.0572, + "num_input_tokens_seen": 116511720, + "step": 172910 + }, + { + "epoch": 4.224342217770503, + "grad_norm": 0.0006670998991467059, + "learning_rate": 1.4307885349061755e-07, + "loss": 0.0, + "num_input_tokens_seen": 116514792, + "step": 172915 + }, + { + "epoch": 4.22446436860235, + "grad_norm": 2.3548327590106055e-05, + "learning_rate": 1.4303490047915989e-07, + "loss": 0.0, + "num_input_tokens_seen": 116518056, + "step": 172920 + }, + { + "epoch": 4.224586519434197, + "grad_norm": 0.0005599820869974792, + "learning_rate": 1.429909536997831e-07, + "loss": 0.0, + "num_input_tokens_seen": 116521192, + "step": 172925 + }, + { + "epoch": 4.224708670266044, + "grad_norm": 0.07890927791595459, + "learning_rate": 1.4294701315280645e-07, + "loss": 0.0, + "num_input_tokens_seen": 116524264, + "step": 172930 + }, + { + "epoch": 4.224830821097892, + "grad_norm": 0.0008640710148029029, + "learning_rate": 1.4290307883854958e-07, + "loss": 0.0, + "num_input_tokens_seen": 116527784, + "step": 172935 + }, + { + "epoch": 4.2249529719297385, + "grad_norm": 7.143465336412191e-05, + "learning_rate": 1.4285915075733225e-07, + "loss": 0.0, + "num_input_tokens_seen": 116530920, + "step": 172940 + }, + { + "epoch": 4.225075122761586, + "grad_norm": 4.0271534089697525e-05, + "learning_rate": 1.428152289094735e-07, + "loss": 0.0, + "num_input_tokens_seen": 116534056, + "step": 172945 + }, + { + "epoch": 4.225197273593433, + "grad_norm": 8.894584607332945e-05, + "learning_rate": 1.4277131329529323e-07, + "loss": 0.0, + "num_input_tokens_seen": 116537384, + "step": 172950 + }, + { + "epoch": 4.2253194244252805, + "grad_norm": 1.7404921891284175e-05, + "learning_rate": 1.427274039151103e-07, + "loss": 0.0, + "num_input_tokens_seen": 116540968, + "step": 172955 + }, + { + "epoch": 4.225441575257127, + "grad_norm": 4.886610258836299e-05, + "learning_rate": 1.426835007692443e-07, + "loss": 0.0, + "num_input_tokens_seen": 116544168, + "step": 172960 + }, + { + "epoch": 4.225563726088975, + "grad_norm": 0.00024690740974619985, + "learning_rate": 1.4263960385801465e-07, + "loss": 0.0, + "num_input_tokens_seen": 116547112, + "step": 172965 + }, + { + "epoch": 4.225685876920822, + "grad_norm": 0.00024043295707087964, + "learning_rate": 1.4259571318174014e-07, + "loss": 0.0, + "num_input_tokens_seen": 116550120, + "step": 172970 + }, + { + "epoch": 4.225808027752669, + "grad_norm": 0.00130647758487612, + "learning_rate": 1.4255182874074045e-07, + "loss": 0.0, + "num_input_tokens_seen": 116553320, + "step": 172975 + }, + { + "epoch": 4.225930178584516, + "grad_norm": 0.0005790484719909728, + "learning_rate": 1.4250795053533438e-07, + "loss": 0.0, + "num_input_tokens_seen": 116556776, + "step": 172980 + }, + { + "epoch": 4.226052329416364, + "grad_norm": 0.0015895002288743854, + "learning_rate": 1.4246407856584132e-07, + "loss": 0.0, + "num_input_tokens_seen": 116559912, + "step": 172985 + }, + { + "epoch": 4.22617448024821, + "grad_norm": 0.0025169982109218836, + "learning_rate": 1.4242021283257976e-07, + "loss": 0.0, + "num_input_tokens_seen": 116563112, + "step": 172990 + }, + { + "epoch": 4.226296631080058, + "grad_norm": 1.5641830032109283e-05, + "learning_rate": 1.4237635333586938e-07, + "loss": 0.0, + "num_input_tokens_seen": 116566312, + "step": 172995 + }, + { + "epoch": 4.226418781911905, + "grad_norm": 0.011025205254554749, + "learning_rate": 1.423325000760287e-07, + "loss": 0.0529, + "num_input_tokens_seen": 116569768, + "step": 173000 + }, + { + "epoch": 4.226540932743752, + "grad_norm": 0.00022293122310657054, + "learning_rate": 1.422886530533769e-07, + "loss": 0.0, + "num_input_tokens_seen": 116573032, + "step": 173005 + }, + { + "epoch": 4.226663083575599, + "grad_norm": 3.6755925975739956e-05, + "learning_rate": 1.422448122682327e-07, + "loss": 0.0, + "num_input_tokens_seen": 116576296, + "step": 173010 + }, + { + "epoch": 4.226785234407446, + "grad_norm": 0.02970314212143421, + "learning_rate": 1.4220097772091478e-07, + "loss": 0.0002, + "num_input_tokens_seen": 116580008, + "step": 173015 + }, + { + "epoch": 4.2269073852392935, + "grad_norm": 0.0007604420534335077, + "learning_rate": 1.4215714941174227e-07, + "loss": 0.0, + "num_input_tokens_seen": 116583720, + "step": 173020 + }, + { + "epoch": 4.22702953607114, + "grad_norm": 0.0001692136429483071, + "learning_rate": 1.4211332734103343e-07, + "loss": 0.0, + "num_input_tokens_seen": 116586920, + "step": 173025 + }, + { + "epoch": 4.227151686902988, + "grad_norm": 2.0400722860358655e-05, + "learning_rate": 1.4206951150910727e-07, + "loss": 0.0, + "num_input_tokens_seen": 116590440, + "step": 173030 + }, + { + "epoch": 4.227273837734835, + "grad_norm": 5.274592331261374e-05, + "learning_rate": 1.420257019162826e-07, + "loss": 0.0, + "num_input_tokens_seen": 116593576, + "step": 173035 + }, + { + "epoch": 4.227395988566682, + "grad_norm": 0.00022966494725551456, + "learning_rate": 1.4198189856287746e-07, + "loss": 0.0, + "num_input_tokens_seen": 116597096, + "step": 173040 + }, + { + "epoch": 4.227518139398529, + "grad_norm": 0.001598756411112845, + "learning_rate": 1.4193810144921114e-07, + "loss": 0.0, + "num_input_tokens_seen": 116600488, + "step": 173045 + }, + { + "epoch": 4.227640290230377, + "grad_norm": 6.039086656528525e-05, + "learning_rate": 1.4189431057560142e-07, + "loss": 0.0, + "num_input_tokens_seen": 116604072, + "step": 173050 + }, + { + "epoch": 4.227762441062223, + "grad_norm": 3.7535726733040065e-05, + "learning_rate": 1.4185052594236702e-07, + "loss": 0.0, + "num_input_tokens_seen": 116607464, + "step": 173055 + }, + { + "epoch": 4.227884591894071, + "grad_norm": 0.00012207684630993754, + "learning_rate": 1.418067475498267e-07, + "loss": 0.0, + "num_input_tokens_seen": 116610856, + "step": 173060 + }, + { + "epoch": 4.228006742725918, + "grad_norm": 3.734017082024366e-05, + "learning_rate": 1.417629753982983e-07, + "loss": 0.0, + "num_input_tokens_seen": 116613864, + "step": 173065 + }, + { + "epoch": 4.2281288935577654, + "grad_norm": 0.0005823975661769509, + "learning_rate": 1.4171920948810056e-07, + "loss": 0.0, + "num_input_tokens_seen": 116616872, + "step": 173070 + }, + { + "epoch": 4.228251044389612, + "grad_norm": 0.00012418540427461267, + "learning_rate": 1.4167544981955148e-07, + "loss": 0.0, + "num_input_tokens_seen": 116620392, + "step": 173075 + }, + { + "epoch": 4.22837319522146, + "grad_norm": 3.818258846877143e-06, + "learning_rate": 1.4163169639296946e-07, + "loss": 0.0, + "num_input_tokens_seen": 116623976, + "step": 173080 + }, + { + "epoch": 4.228495346053307, + "grad_norm": 9.804531873669475e-05, + "learning_rate": 1.4158794920867245e-07, + "loss": 0.0, + "num_input_tokens_seen": 116627752, + "step": 173085 + }, + { + "epoch": 4.228617496885154, + "grad_norm": 0.0018733707256615162, + "learning_rate": 1.4154420826697888e-07, + "loss": 0.0, + "num_input_tokens_seen": 116631144, + "step": 173090 + }, + { + "epoch": 4.228739647717001, + "grad_norm": 0.00010759669385151938, + "learning_rate": 1.415004735682068e-07, + "loss": 0.0, + "num_input_tokens_seen": 116634792, + "step": 173095 + }, + { + "epoch": 4.228861798548848, + "grad_norm": 0.006699483375996351, + "learning_rate": 1.4145674511267425e-07, + "loss": 0.0, + "num_input_tokens_seen": 116638248, + "step": 173100 + }, + { + "epoch": 4.228983949380695, + "grad_norm": 4.7020821511978284e-05, + "learning_rate": 1.414130229006989e-07, + "loss": 0.0, + "num_input_tokens_seen": 116641448, + "step": 173105 + }, + { + "epoch": 4.229106100212542, + "grad_norm": 4.57140886283014e-05, + "learning_rate": 1.4136930693259918e-07, + "loss": 0.0, + "num_input_tokens_seen": 116645096, + "step": 173110 + }, + { + "epoch": 4.22922825104439, + "grad_norm": 0.0014901167014613748, + "learning_rate": 1.4132559720869264e-07, + "loss": 0.0, + "num_input_tokens_seen": 116648680, + "step": 173115 + }, + { + "epoch": 4.2293504018762365, + "grad_norm": 0.00018210777489002794, + "learning_rate": 1.4128189372929755e-07, + "loss": 0.0, + "num_input_tokens_seen": 116651880, + "step": 173120 + }, + { + "epoch": 4.229472552708084, + "grad_norm": 0.00023117540695238858, + "learning_rate": 1.4123819649473123e-07, + "loss": 0.0, + "num_input_tokens_seen": 116655208, + "step": 173125 + }, + { + "epoch": 4.229594703539931, + "grad_norm": 6.564979412360117e-05, + "learning_rate": 1.4119450550531198e-07, + "loss": 0.0, + "num_input_tokens_seen": 116658920, + "step": 173130 + }, + { + "epoch": 4.2297168543717785, + "grad_norm": 0.001030170125886798, + "learning_rate": 1.411508207613571e-07, + "loss": 0.0, + "num_input_tokens_seen": 116662120, + "step": 173135 + }, + { + "epoch": 4.229839005203625, + "grad_norm": 1.3802886314806528e-05, + "learning_rate": 1.4110714226318455e-07, + "loss": 0.0, + "num_input_tokens_seen": 116665512, + "step": 173140 + }, + { + "epoch": 4.229961156035473, + "grad_norm": 0.0002681001788005233, + "learning_rate": 1.4106347001111173e-07, + "loss": 0.0, + "num_input_tokens_seen": 116669160, + "step": 173145 + }, + { + "epoch": 4.23008330686732, + "grad_norm": 8.571257785661146e-05, + "learning_rate": 1.4101980400545643e-07, + "loss": 0.0, + "num_input_tokens_seen": 116672488, + "step": 173150 + }, + { + "epoch": 4.230205457699167, + "grad_norm": 0.0009145172662101686, + "learning_rate": 1.4097614424653624e-07, + "loss": 0.0, + "num_input_tokens_seen": 116675752, + "step": 173155 + }, + { + "epoch": 4.230327608531014, + "grad_norm": 1.8907823687186465e-05, + "learning_rate": 1.409324907346685e-07, + "loss": 0.0, + "num_input_tokens_seen": 116678952, + "step": 173160 + }, + { + "epoch": 4.230449759362862, + "grad_norm": 8.053556666709483e-05, + "learning_rate": 1.4088884347017094e-07, + "loss": 0.0, + "num_input_tokens_seen": 116682280, + "step": 173165 + }, + { + "epoch": 4.230571910194708, + "grad_norm": 0.005030336324125528, + "learning_rate": 1.4084520245336052e-07, + "loss": 0.0, + "num_input_tokens_seen": 116685480, + "step": 173170 + }, + { + "epoch": 4.230694061026556, + "grad_norm": 0.00014258605369832367, + "learning_rate": 1.408015676845551e-07, + "loss": 0.0, + "num_input_tokens_seen": 116688808, + "step": 173175 + }, + { + "epoch": 4.230816211858403, + "grad_norm": 0.001624719938263297, + "learning_rate": 1.4075793916407154e-07, + "loss": 0.0, + "num_input_tokens_seen": 116692008, + "step": 173180 + }, + { + "epoch": 4.2309383626902495, + "grad_norm": 0.00024306464183609933, + "learning_rate": 1.4071431689222735e-07, + "loss": 0.0, + "num_input_tokens_seen": 116695208, + "step": 173185 + }, + { + "epoch": 4.231060513522097, + "grad_norm": 0.0005330306012183428, + "learning_rate": 1.4067070086933996e-07, + "loss": 0.0, + "num_input_tokens_seen": 116698536, + "step": 173190 + }, + { + "epoch": 4.231182664353944, + "grad_norm": 1.0813733752002008e-05, + "learning_rate": 1.4062709109572623e-07, + "loss": 0.0, + "num_input_tokens_seen": 116702760, + "step": 173195 + }, + { + "epoch": 4.2313048151857915, + "grad_norm": 6.632073927903548e-05, + "learning_rate": 1.4058348757170367e-07, + "loss": 0.0, + "num_input_tokens_seen": 116706216, + "step": 173200 + }, + { + "epoch": 4.231426966017638, + "grad_norm": 0.02792537584900856, + "learning_rate": 1.4053989029758905e-07, + "loss": 0.0, + "num_input_tokens_seen": 116709416, + "step": 173205 + }, + { + "epoch": 4.231549116849486, + "grad_norm": 0.003411710960790515, + "learning_rate": 1.4049629927369934e-07, + "loss": 0.0, + "num_input_tokens_seen": 116713256, + "step": 173210 + }, + { + "epoch": 4.231671267681333, + "grad_norm": 0.002977790078148246, + "learning_rate": 1.40452714500352e-07, + "loss": 0.0, + "num_input_tokens_seen": 116716648, + "step": 173215 + }, + { + "epoch": 4.23179341851318, + "grad_norm": 0.034006424248218536, + "learning_rate": 1.4040913597786342e-07, + "loss": 0.0, + "num_input_tokens_seen": 116719592, + "step": 173220 + }, + { + "epoch": 4.231915569345027, + "grad_norm": 2.359913196414709e-05, + "learning_rate": 1.4036556370655105e-07, + "loss": 0.0, + "num_input_tokens_seen": 116722984, + "step": 173225 + }, + { + "epoch": 4.232037720176875, + "grad_norm": 1.3884271538699977e-05, + "learning_rate": 1.4032199768673124e-07, + "loss": 0.0, + "num_input_tokens_seen": 116726888, + "step": 173230 + }, + { + "epoch": 4.232159871008721, + "grad_norm": 0.0003605287929531187, + "learning_rate": 1.402784379187213e-07, + "loss": 0.0, + "num_input_tokens_seen": 116730536, + "step": 173235 + }, + { + "epoch": 4.232282021840569, + "grad_norm": 0.000187235651537776, + "learning_rate": 1.4023488440283771e-07, + "loss": 0.0001, + "num_input_tokens_seen": 116733800, + "step": 173240 + }, + { + "epoch": 4.232404172672416, + "grad_norm": 0.00020515448704827577, + "learning_rate": 1.4019133713939713e-07, + "loss": 0.0, + "num_input_tokens_seen": 116737448, + "step": 173245 + }, + { + "epoch": 4.232526323504263, + "grad_norm": 6.843862502137199e-05, + "learning_rate": 1.4014779612871673e-07, + "loss": 0.0, + "num_input_tokens_seen": 116740392, + "step": 173250 + }, + { + "epoch": 4.23264847433611, + "grad_norm": 0.00012958318984601647, + "learning_rate": 1.4010426137111265e-07, + "loss": 0.0451, + "num_input_tokens_seen": 116743464, + "step": 173255 + }, + { + "epoch": 4.232770625167958, + "grad_norm": 3.7418103602249175e-05, + "learning_rate": 1.4006073286690178e-07, + "loss": 0.0, + "num_input_tokens_seen": 116747240, + "step": 173260 + }, + { + "epoch": 4.2328927759998045, + "grad_norm": 3.6361041566124186e-05, + "learning_rate": 1.4001721061640038e-07, + "loss": 0.0, + "num_input_tokens_seen": 116750760, + "step": 173265 + }, + { + "epoch": 4.233014926831652, + "grad_norm": 0.00014582725998479873, + "learning_rate": 1.3997369461992513e-07, + "loss": 0.0, + "num_input_tokens_seen": 116753960, + "step": 173270 + }, + { + "epoch": 4.233137077663499, + "grad_norm": 1.2433220035745762e-05, + "learning_rate": 1.3993018487779262e-07, + "loss": 0.0, + "num_input_tokens_seen": 116757672, + "step": 173275 + }, + { + "epoch": 4.233259228495346, + "grad_norm": 0.0011232885299250484, + "learning_rate": 1.39886681390319e-07, + "loss": 0.0005, + "num_input_tokens_seen": 116760808, + "step": 173280 + }, + { + "epoch": 4.233381379327193, + "grad_norm": 8.350649295607582e-05, + "learning_rate": 1.3984318415782103e-07, + "loss": 0.0, + "num_input_tokens_seen": 116764008, + "step": 173285 + }, + { + "epoch": 4.23350353015904, + "grad_norm": 2.9423656087601557e-05, + "learning_rate": 1.3979969318061457e-07, + "loss": 0.0, + "num_input_tokens_seen": 116767528, + "step": 173290 + }, + { + "epoch": 4.233625680990888, + "grad_norm": 0.00022587507555726916, + "learning_rate": 1.3975620845901624e-07, + "loss": 0.0, + "num_input_tokens_seen": 116771176, + "step": 173295 + }, + { + "epoch": 4.233747831822734, + "grad_norm": 1.5688436178606935e-05, + "learning_rate": 1.3971272999334206e-07, + "loss": 0.0, + "num_input_tokens_seen": 116774568, + "step": 173300 + }, + { + "epoch": 4.233869982654582, + "grad_norm": 0.00046054826816543937, + "learning_rate": 1.3966925778390836e-07, + "loss": 0.0, + "num_input_tokens_seen": 116777768, + "step": 173305 + }, + { + "epoch": 4.233992133486429, + "grad_norm": 0.00010839592141564935, + "learning_rate": 1.3962579183103106e-07, + "loss": 0.0, + "num_input_tokens_seen": 116781352, + "step": 173310 + }, + { + "epoch": 4.234114284318276, + "grad_norm": 4.6599354391219094e-05, + "learning_rate": 1.3958233213502669e-07, + "loss": 0.0, + "num_input_tokens_seen": 116784680, + "step": 173315 + }, + { + "epoch": 4.234236435150123, + "grad_norm": 0.00041652043000794947, + "learning_rate": 1.3953887869621095e-07, + "loss": 0.0584, + "num_input_tokens_seen": 116787880, + "step": 173320 + }, + { + "epoch": 4.234358585981971, + "grad_norm": 0.0005544126615859568, + "learning_rate": 1.3949543151489973e-07, + "loss": 0.0, + "num_input_tokens_seen": 116790824, + "step": 173325 + }, + { + "epoch": 4.2344807368138175, + "grad_norm": 0.00018715529586188495, + "learning_rate": 1.3945199059140932e-07, + "loss": 0.0, + "num_input_tokens_seen": 116793896, + "step": 173330 + }, + { + "epoch": 4.234602887645665, + "grad_norm": 1.5036851436889265e-05, + "learning_rate": 1.3940855592605538e-07, + "loss": 0.0, + "num_input_tokens_seen": 116796840, + "step": 173335 + }, + { + "epoch": 4.234725038477512, + "grad_norm": 0.0010711431968957186, + "learning_rate": 1.3936512751915387e-07, + "loss": 0.0, + "num_input_tokens_seen": 116799976, + "step": 173340 + }, + { + "epoch": 4.2348471893093595, + "grad_norm": 0.000726178870536387, + "learning_rate": 1.3932170537102084e-07, + "loss": 0.0, + "num_input_tokens_seen": 116803432, + "step": 173345 + }, + { + "epoch": 4.234969340141206, + "grad_norm": 0.00036462812568061054, + "learning_rate": 1.3927828948197162e-07, + "loss": 0.0, + "num_input_tokens_seen": 116806504, + "step": 173350 + }, + { + "epoch": 4.235091490973054, + "grad_norm": 3.795464726863429e-05, + "learning_rate": 1.392348798523225e-07, + "loss": 0.0, + "num_input_tokens_seen": 116810152, + "step": 173355 + }, + { + "epoch": 4.235213641804901, + "grad_norm": 0.4274020493030548, + "learning_rate": 1.391914764823885e-07, + "loss": 0.0002, + "num_input_tokens_seen": 116813352, + "step": 173360 + }, + { + "epoch": 4.235335792636748, + "grad_norm": 5.8089312915399205e-06, + "learning_rate": 1.3914807937248575e-07, + "loss": 0.0, + "num_input_tokens_seen": 116816936, + "step": 173365 + }, + { + "epoch": 4.235457943468595, + "grad_norm": 7.85816228017211e-05, + "learning_rate": 1.3910468852292977e-07, + "loss": 0.0, + "num_input_tokens_seen": 116820072, + "step": 173370 + }, + { + "epoch": 4.235580094300442, + "grad_norm": 0.00032599607948213816, + "learning_rate": 1.3906130393403593e-07, + "loss": 0.0, + "num_input_tokens_seen": 116823592, + "step": 173375 + }, + { + "epoch": 4.235702245132289, + "grad_norm": 0.00011063476995332167, + "learning_rate": 1.3901792560612002e-07, + "loss": 0.0, + "num_input_tokens_seen": 116827240, + "step": 173380 + }, + { + "epoch": 4.235824395964136, + "grad_norm": 2.9659470328624593e-06, + "learning_rate": 1.3897455353949715e-07, + "loss": 0.0, + "num_input_tokens_seen": 116830632, + "step": 173385 + }, + { + "epoch": 4.235946546795984, + "grad_norm": 5.866341962246224e-05, + "learning_rate": 1.389311877344832e-07, + "loss": 0.0, + "num_input_tokens_seen": 116834152, + "step": 173390 + }, + { + "epoch": 4.236068697627831, + "grad_norm": 0.00014773337170481682, + "learning_rate": 1.38887828191393e-07, + "loss": 0.0, + "num_input_tokens_seen": 116837224, + "step": 173395 + }, + { + "epoch": 4.236190848459678, + "grad_norm": 0.00015105612692423165, + "learning_rate": 1.3884447491054207e-07, + "loss": 0.0, + "num_input_tokens_seen": 116840488, + "step": 173400 + }, + { + "epoch": 4.236312999291525, + "grad_norm": 8.750290726311505e-05, + "learning_rate": 1.3880112789224596e-07, + "loss": 0.0, + "num_input_tokens_seen": 116844392, + "step": 173405 + }, + { + "epoch": 4.236435150123373, + "grad_norm": 0.00471015740185976, + "learning_rate": 1.3875778713681975e-07, + "loss": 0.0, + "num_input_tokens_seen": 116847720, + "step": 173410 + }, + { + "epoch": 4.236557300955219, + "grad_norm": 4.11164146498777e-05, + "learning_rate": 1.3871445264457826e-07, + "loss": 0.0, + "num_input_tokens_seen": 116850984, + "step": 173415 + }, + { + "epoch": 4.236679451787067, + "grad_norm": 4.238776455167681e-05, + "learning_rate": 1.3867112441583718e-07, + "loss": 0.0536, + "num_input_tokens_seen": 116854120, + "step": 173420 + }, + { + "epoch": 4.236801602618914, + "grad_norm": 0.0008721463964320719, + "learning_rate": 1.3862780245091133e-07, + "loss": 0.0, + "num_input_tokens_seen": 116857448, + "step": 173425 + }, + { + "epoch": 4.236923753450761, + "grad_norm": 4.055196768604219e-05, + "learning_rate": 1.3858448675011558e-07, + "loss": 0.0, + "num_input_tokens_seen": 116860648, + "step": 173430 + }, + { + "epoch": 4.237045904282608, + "grad_norm": 0.0018774106865748763, + "learning_rate": 1.3854117731376515e-07, + "loss": 0.0, + "num_input_tokens_seen": 116864296, + "step": 173435 + }, + { + "epoch": 4.237168055114456, + "grad_norm": 0.00010131034650839865, + "learning_rate": 1.384978741421752e-07, + "loss": 0.0, + "num_input_tokens_seen": 116867496, + "step": 173440 + }, + { + "epoch": 4.2372902059463025, + "grad_norm": 0.0007216433878056705, + "learning_rate": 1.3845457723566024e-07, + "loss": 0.0563, + "num_input_tokens_seen": 116870632, + "step": 173445 + }, + { + "epoch": 4.237412356778149, + "grad_norm": 0.0008672663243487477, + "learning_rate": 1.3841128659453548e-07, + "loss": 0.0, + "num_input_tokens_seen": 116874024, + "step": 173450 + }, + { + "epoch": 4.237534507609997, + "grad_norm": 0.0017662736354395747, + "learning_rate": 1.3836800221911537e-07, + "loss": 0.0, + "num_input_tokens_seen": 116877544, + "step": 173455 + }, + { + "epoch": 4.237656658441844, + "grad_norm": 0.004523616284132004, + "learning_rate": 1.3832472410971485e-07, + "loss": 0.0, + "num_input_tokens_seen": 116880744, + "step": 173460 + }, + { + "epoch": 4.237778809273691, + "grad_norm": 0.00046189091517589986, + "learning_rate": 1.38281452266649e-07, + "loss": 0.0, + "num_input_tokens_seen": 116883816, + "step": 173465 + }, + { + "epoch": 4.237900960105538, + "grad_norm": 1.331346902588848e-05, + "learning_rate": 1.3823818669023202e-07, + "loss": 0.0, + "num_input_tokens_seen": 116887144, + "step": 173470 + }, + { + "epoch": 4.238023110937386, + "grad_norm": 5.324160156305879e-06, + "learning_rate": 1.3819492738077887e-07, + "loss": 0.0, + "num_input_tokens_seen": 116890472, + "step": 173475 + }, + { + "epoch": 4.238145261769232, + "grad_norm": 2.3222146034240723, + "learning_rate": 1.3815167433860387e-07, + "loss": 0.0004, + "num_input_tokens_seen": 116893480, + "step": 173480 + }, + { + "epoch": 4.23826741260108, + "grad_norm": 0.00231824885122478, + "learning_rate": 1.3810842756402184e-07, + "loss": 0.0, + "num_input_tokens_seen": 116896744, + "step": 173485 + }, + { + "epoch": 4.238389563432927, + "grad_norm": 0.00015672051813453436, + "learning_rate": 1.3806518705734694e-07, + "loss": 0.0, + "num_input_tokens_seen": 116900072, + "step": 173490 + }, + { + "epoch": 4.238511714264774, + "grad_norm": 0.0015891582006588578, + "learning_rate": 1.3802195281889383e-07, + "loss": 0.0, + "num_input_tokens_seen": 116903272, + "step": 173495 + }, + { + "epoch": 4.238633865096621, + "grad_norm": 0.0005230855895206332, + "learning_rate": 1.379787248489771e-07, + "loss": 0.0, + "num_input_tokens_seen": 116906728, + "step": 173500 + }, + { + "epoch": 4.238756015928469, + "grad_norm": 2.7853136998601258e-05, + "learning_rate": 1.379355031479108e-07, + "loss": 0.0, + "num_input_tokens_seen": 116910248, + "step": 173505 + }, + { + "epoch": 4.2388781667603155, + "grad_norm": 4.448258187039755e-05, + "learning_rate": 1.3789228771600959e-07, + "loss": 0.0, + "num_input_tokens_seen": 116913384, + "step": 173510 + }, + { + "epoch": 4.239000317592163, + "grad_norm": 0.00026565848384052515, + "learning_rate": 1.378490785535875e-07, + "loss": 0.0, + "num_input_tokens_seen": 116919144, + "step": 173515 + }, + { + "epoch": 4.23912246842401, + "grad_norm": 0.00017902874969877303, + "learning_rate": 1.378058756609587e-07, + "loss": 0.0, + "num_input_tokens_seen": 116922408, + "step": 173520 + }, + { + "epoch": 4.2392446192558575, + "grad_norm": 0.00046245678095147014, + "learning_rate": 1.3776267903843763e-07, + "loss": 0.0, + "num_input_tokens_seen": 116926056, + "step": 173525 + }, + { + "epoch": 4.239366770087704, + "grad_norm": 0.00010655471123754978, + "learning_rate": 1.3771948868633797e-07, + "loss": 0.0, + "num_input_tokens_seen": 116929448, + "step": 173530 + }, + { + "epoch": 4.239488920919552, + "grad_norm": 0.0017132902285084128, + "learning_rate": 1.3767630460497447e-07, + "loss": 0.0, + "num_input_tokens_seen": 116932648, + "step": 173535 + }, + { + "epoch": 4.239611071751399, + "grad_norm": 0.0010654488578438759, + "learning_rate": 1.3763312679466054e-07, + "loss": 0.0001, + "num_input_tokens_seen": 116936104, + "step": 173540 + }, + { + "epoch": 4.239733222583245, + "grad_norm": 6.303595000645146e-05, + "learning_rate": 1.375899552557106e-07, + "loss": 0.0, + "num_input_tokens_seen": 116939496, + "step": 173545 + }, + { + "epoch": 4.239855373415093, + "grad_norm": 3.1240680982591584e-05, + "learning_rate": 1.3754678998843838e-07, + "loss": 0.0, + "num_input_tokens_seen": 116942888, + "step": 173550 + }, + { + "epoch": 4.23997752424694, + "grad_norm": 9.1200927272439e-05, + "learning_rate": 1.3750363099315777e-07, + "loss": 0.0, + "num_input_tokens_seen": 116946216, + "step": 173555 + }, + { + "epoch": 4.240099675078787, + "grad_norm": 9.304591003456153e-06, + "learning_rate": 1.3746047827018302e-07, + "loss": 0.0, + "num_input_tokens_seen": 116949288, + "step": 173560 + }, + { + "epoch": 4.240221825910634, + "grad_norm": 1.8034832464763895e-05, + "learning_rate": 1.374173318198274e-07, + "loss": 0.0, + "num_input_tokens_seen": 116952872, + "step": 173565 + }, + { + "epoch": 4.240343976742482, + "grad_norm": 5.101427177578444e-06, + "learning_rate": 1.3737419164240527e-07, + "loss": 0.0, + "num_input_tokens_seen": 116956776, + "step": 173570 + }, + { + "epoch": 4.2404661275743285, + "grad_norm": 0.002323642373085022, + "learning_rate": 1.3733105773822973e-07, + "loss": 0.0, + "num_input_tokens_seen": 116960104, + "step": 173575 + }, + { + "epoch": 4.240588278406176, + "grad_norm": 127.75724792480469, + "learning_rate": 1.3728793010761497e-07, + "loss": 0.0139, + "num_input_tokens_seen": 116963240, + "step": 173580 + }, + { + "epoch": 4.240710429238023, + "grad_norm": 0.00025013191043399274, + "learning_rate": 1.372448087508742e-07, + "loss": 0.0, + "num_input_tokens_seen": 116966504, + "step": 173585 + }, + { + "epoch": 4.2408325800698705, + "grad_norm": 0.00014707444643136114, + "learning_rate": 1.3720169366832134e-07, + "loss": 0.0, + "num_input_tokens_seen": 116969512, + "step": 173590 + }, + { + "epoch": 4.240954730901717, + "grad_norm": 6.153630965854973e-05, + "learning_rate": 1.3715858486027e-07, + "loss": 0.0005, + "num_input_tokens_seen": 116972456, + "step": 173595 + }, + { + "epoch": 4.241076881733565, + "grad_norm": 7.514778189943172e-06, + "learning_rate": 1.371154823270332e-07, + "loss": 0.0, + "num_input_tokens_seen": 116975592, + "step": 173600 + }, + { + "epoch": 4.241199032565412, + "grad_norm": 4.237967004883103e-05, + "learning_rate": 1.3707238606892503e-07, + "loss": 0.0343, + "num_input_tokens_seen": 116978920, + "step": 173605 + }, + { + "epoch": 4.241321183397259, + "grad_norm": 0.000658139237202704, + "learning_rate": 1.3702929608625823e-07, + "loss": 0.0, + "num_input_tokens_seen": 116981928, + "step": 173610 + }, + { + "epoch": 4.241443334229106, + "grad_norm": 0.0018850330961868167, + "learning_rate": 1.369862123793468e-07, + "loss": 0.0, + "num_input_tokens_seen": 116985576, + "step": 173615 + }, + { + "epoch": 4.241565485060954, + "grad_norm": 0.00011951574560953304, + "learning_rate": 1.3694313494850362e-07, + "loss": 0.0, + "num_input_tokens_seen": 116988968, + "step": 173620 + }, + { + "epoch": 4.2416876358928, + "grad_norm": 0.0011580303544178605, + "learning_rate": 1.3690006379404217e-07, + "loss": 0.0, + "num_input_tokens_seen": 116992680, + "step": 173625 + }, + { + "epoch": 4.241809786724648, + "grad_norm": 0.0008965888991951942, + "learning_rate": 1.3685699891627568e-07, + "loss": 0.0003, + "num_input_tokens_seen": 116995752, + "step": 173630 + }, + { + "epoch": 4.241931937556495, + "grad_norm": 0.005259280558675528, + "learning_rate": 1.3681394031551706e-07, + "loss": 0.0, + "num_input_tokens_seen": 116999528, + "step": 173635 + }, + { + "epoch": 4.2420540883883415, + "grad_norm": 0.0002571164513938129, + "learning_rate": 1.367708879920798e-07, + "loss": 0.0, + "num_input_tokens_seen": 117002920, + "step": 173640 + }, + { + "epoch": 4.242176239220189, + "grad_norm": 0.00011767227988457307, + "learning_rate": 1.3672784194627663e-07, + "loss": 0.0, + "num_input_tokens_seen": 117006568, + "step": 173645 + }, + { + "epoch": 4.242298390052036, + "grad_norm": 0.0065977550111711025, + "learning_rate": 1.3668480217842072e-07, + "loss": 0.0, + "num_input_tokens_seen": 117009640, + "step": 173650 + }, + { + "epoch": 4.2424205408838835, + "grad_norm": 0.00029881237423978746, + "learning_rate": 1.3664176868882537e-07, + "loss": 0.0, + "num_input_tokens_seen": 117012840, + "step": 173655 + }, + { + "epoch": 4.24254269171573, + "grad_norm": 0.0006145496154204011, + "learning_rate": 1.3659874147780314e-07, + "loss": 0.0, + "num_input_tokens_seen": 117016296, + "step": 173660 + }, + { + "epoch": 4.242664842547578, + "grad_norm": 0.00022483298380393535, + "learning_rate": 1.365557205456672e-07, + "loss": 0.0, + "num_input_tokens_seen": 117019944, + "step": 173665 + }, + { + "epoch": 4.242786993379425, + "grad_norm": 0.0005453492049127817, + "learning_rate": 1.3651270589273023e-07, + "loss": 0.0, + "num_input_tokens_seen": 117023848, + "step": 173670 + }, + { + "epoch": 4.242909144211272, + "grad_norm": 0.0019332737429067492, + "learning_rate": 1.3646969751930504e-07, + "loss": 0.0, + "num_input_tokens_seen": 117027176, + "step": 173675 + }, + { + "epoch": 4.243031295043119, + "grad_norm": 0.0005564195453189313, + "learning_rate": 1.364266954257046e-07, + "loss": 0.0001, + "num_input_tokens_seen": 117030440, + "step": 173680 + }, + { + "epoch": 4.243153445874967, + "grad_norm": 0.10380977392196655, + "learning_rate": 1.3638369961224138e-07, + "loss": 0.0001, + "num_input_tokens_seen": 117033896, + "step": 173685 + }, + { + "epoch": 4.243275596706813, + "grad_norm": 0.00010214153735432774, + "learning_rate": 1.3634071007922841e-07, + "loss": 0.0, + "num_input_tokens_seen": 117037480, + "step": 173690 + }, + { + "epoch": 4.243397747538661, + "grad_norm": 0.00010061613284051418, + "learning_rate": 1.3629772682697794e-07, + "loss": 0.0, + "num_input_tokens_seen": 117040552, + "step": 173695 + }, + { + "epoch": 4.243519898370508, + "grad_norm": 0.0002196808491135016, + "learning_rate": 1.3625474985580277e-07, + "loss": 0.0, + "num_input_tokens_seen": 117044008, + "step": 173700 + }, + { + "epoch": 4.243642049202355, + "grad_norm": 1.6563162716920488e-05, + "learning_rate": 1.3621177916601522e-07, + "loss": 0.0, + "num_input_tokens_seen": 117047528, + "step": 173705 + }, + { + "epoch": 4.243764200034202, + "grad_norm": 0.0005563225131481886, + "learning_rate": 1.3616881475792796e-07, + "loss": 0.0, + "num_input_tokens_seen": 117051048, + "step": 173710 + }, + { + "epoch": 4.24388635086605, + "grad_norm": 0.0004958529025316238, + "learning_rate": 1.3612585663185372e-07, + "loss": 0.0, + "num_input_tokens_seen": 117054184, + "step": 173715 + }, + { + "epoch": 4.244008501697897, + "grad_norm": 0.0001543401594972238, + "learning_rate": 1.3608290478810448e-07, + "loss": 0.0, + "num_input_tokens_seen": 117057064, + "step": 173720 + }, + { + "epoch": 4.244130652529743, + "grad_norm": 0.0005324023077264428, + "learning_rate": 1.3603995922699252e-07, + "loss": 0.0, + "num_input_tokens_seen": 117060648, + "step": 173725 + }, + { + "epoch": 4.244252803361591, + "grad_norm": 0.0005624612094834447, + "learning_rate": 1.3599701994883062e-07, + "loss": 0.0, + "num_input_tokens_seen": 117063912, + "step": 173730 + }, + { + "epoch": 4.244374954193438, + "grad_norm": 0.005314116831868887, + "learning_rate": 1.3595408695393072e-07, + "loss": 0.0, + "num_input_tokens_seen": 117067240, + "step": 173735 + }, + { + "epoch": 4.244497105025285, + "grad_norm": 0.012981265783309937, + "learning_rate": 1.3591116024260496e-07, + "loss": 0.0, + "num_input_tokens_seen": 117070376, + "step": 173740 + }, + { + "epoch": 4.244619255857132, + "grad_norm": 0.0001835319126257673, + "learning_rate": 1.3586823981516559e-07, + "loss": 0.0, + "num_input_tokens_seen": 117073832, + "step": 173745 + }, + { + "epoch": 4.24474140668898, + "grad_norm": 0.00045120823779143393, + "learning_rate": 1.3582532567192506e-07, + "loss": 0.0, + "num_input_tokens_seen": 117077224, + "step": 173750 + }, + { + "epoch": 4.2448635575208264, + "grad_norm": 0.00046550267143175006, + "learning_rate": 1.3578241781319498e-07, + "loss": 0.0399, + "num_input_tokens_seen": 117080680, + "step": 173755 + }, + { + "epoch": 4.244985708352674, + "grad_norm": 2.941801540146116e-05, + "learning_rate": 1.357395162392878e-07, + "loss": 0.0, + "num_input_tokens_seen": 117083944, + "step": 173760 + }, + { + "epoch": 4.245107859184521, + "grad_norm": 0.00016879245231393725, + "learning_rate": 1.3569662095051504e-07, + "loss": 0.0, + "num_input_tokens_seen": 117087144, + "step": 173765 + }, + { + "epoch": 4.2452300100163685, + "grad_norm": 0.0005892272456549108, + "learning_rate": 1.35653731947189e-07, + "loss": 0.0, + "num_input_tokens_seen": 117091048, + "step": 173770 + }, + { + "epoch": 4.245352160848215, + "grad_norm": 7.808295777067542e-05, + "learning_rate": 1.3561084922962173e-07, + "loss": 0.0, + "num_input_tokens_seen": 117094312, + "step": 173775 + }, + { + "epoch": 4.245474311680063, + "grad_norm": 0.00033269840059801936, + "learning_rate": 1.355679727981246e-07, + "loss": 0.0, + "num_input_tokens_seen": 117097704, + "step": 173780 + }, + { + "epoch": 4.24559646251191, + "grad_norm": 9.472859528614208e-05, + "learning_rate": 1.3552510265300988e-07, + "loss": 0.0, + "num_input_tokens_seen": 117100904, + "step": 173785 + }, + { + "epoch": 4.245718613343757, + "grad_norm": 0.00011878240911755711, + "learning_rate": 1.3548223879458897e-07, + "loss": 0.0, + "num_input_tokens_seen": 117104488, + "step": 173790 + }, + { + "epoch": 4.245840764175604, + "grad_norm": 1.9101145880995318e-05, + "learning_rate": 1.35439381223174e-07, + "loss": 0.0, + "num_input_tokens_seen": 117107432, + "step": 173795 + }, + { + "epoch": 4.245962915007452, + "grad_norm": 0.0006267334683798254, + "learning_rate": 1.35396529939076e-07, + "loss": 0.0, + "num_input_tokens_seen": 117110696, + "step": 173800 + }, + { + "epoch": 4.246085065839298, + "grad_norm": 4.32271153840702e-05, + "learning_rate": 1.3535368494260712e-07, + "loss": 0.0, + "num_input_tokens_seen": 117114024, + "step": 173805 + }, + { + "epoch": 4.246207216671145, + "grad_norm": 0.00016103855159599334, + "learning_rate": 1.3531084623407897e-07, + "loss": 0.0, + "num_input_tokens_seen": 117117480, + "step": 173810 + }, + { + "epoch": 4.246329367502993, + "grad_norm": 0.00027206912636756897, + "learning_rate": 1.3526801381380272e-07, + "loss": 0.0, + "num_input_tokens_seen": 117120872, + "step": 173815 + }, + { + "epoch": 4.2464515183348395, + "grad_norm": 0.0004113983013667166, + "learning_rate": 1.3522518768209034e-07, + "loss": 0.0, + "num_input_tokens_seen": 117123880, + "step": 173820 + }, + { + "epoch": 4.246573669166687, + "grad_norm": 0.0003232089220546186, + "learning_rate": 1.3518236783925296e-07, + "loss": 0.0, + "num_input_tokens_seen": 117127144, + "step": 173825 + }, + { + "epoch": 4.246695819998534, + "grad_norm": 1.5161986993916798e-05, + "learning_rate": 1.3513955428560175e-07, + "loss": 0.0, + "num_input_tokens_seen": 117131048, + "step": 173830 + }, + { + "epoch": 4.2468179708303815, + "grad_norm": 4.181816620985046e-05, + "learning_rate": 1.3509674702144859e-07, + "loss": 0.0, + "num_input_tokens_seen": 117134376, + "step": 173835 + }, + { + "epoch": 4.246940121662228, + "grad_norm": 0.00016705627785995603, + "learning_rate": 1.350539460471042e-07, + "loss": 0.0, + "num_input_tokens_seen": 117137704, + "step": 173840 + }, + { + "epoch": 4.247062272494076, + "grad_norm": 3.5695707083505113e-06, + "learning_rate": 1.3501115136288044e-07, + "loss": 0.0, + "num_input_tokens_seen": 117141352, + "step": 173845 + }, + { + "epoch": 4.247184423325923, + "grad_norm": 0.0028615519404411316, + "learning_rate": 1.3496836296908797e-07, + "loss": 0.0, + "num_input_tokens_seen": 117144552, + "step": 173850 + }, + { + "epoch": 4.24730657415777, + "grad_norm": 0.0008766906685195863, + "learning_rate": 1.3492558086603855e-07, + "loss": 0.0, + "num_input_tokens_seen": 117147944, + "step": 173855 + }, + { + "epoch": 4.247428724989617, + "grad_norm": 0.0012991810217499733, + "learning_rate": 1.348828050540427e-07, + "loss": 0.0, + "num_input_tokens_seen": 117151272, + "step": 173860 + }, + { + "epoch": 4.247550875821465, + "grad_norm": 0.0005853850743733346, + "learning_rate": 1.3484003553341183e-07, + "loss": 0.0, + "num_input_tokens_seen": 117154536, + "step": 173865 + }, + { + "epoch": 4.247673026653311, + "grad_norm": 7.38587277737679e-06, + "learning_rate": 1.3479727230445704e-07, + "loss": 0.0, + "num_input_tokens_seen": 117158248, + "step": 173870 + }, + { + "epoch": 4.247795177485159, + "grad_norm": 0.010029040277004242, + "learning_rate": 1.3475451536748906e-07, + "loss": 0.0, + "num_input_tokens_seen": 117161320, + "step": 173875 + }, + { + "epoch": 4.247917328317006, + "grad_norm": 0.0016140680527314544, + "learning_rate": 1.347117647228192e-07, + "loss": 0.0513, + "num_input_tokens_seen": 117164840, + "step": 173880 + }, + { + "epoch": 4.248039479148853, + "grad_norm": 9.858083649305627e-05, + "learning_rate": 1.3466902037075788e-07, + "loss": 0.0, + "num_input_tokens_seen": 117168040, + "step": 173885 + }, + { + "epoch": 4.2481616299807, + "grad_norm": 0.0005197059363126755, + "learning_rate": 1.3462628231161632e-07, + "loss": 0.0, + "num_input_tokens_seen": 117171304, + "step": 173890 + }, + { + "epoch": 4.248283780812548, + "grad_norm": 0.0014987658942118287, + "learning_rate": 1.3458355054570515e-07, + "loss": 0.0, + "num_input_tokens_seen": 117175400, + "step": 173895 + }, + { + "epoch": 4.2484059316443945, + "grad_norm": 0.00018606704543344676, + "learning_rate": 1.3454082507333496e-07, + "loss": 0.0, + "num_input_tokens_seen": 117178344, + "step": 173900 + }, + { + "epoch": 4.248528082476241, + "grad_norm": 8.002785762073472e-05, + "learning_rate": 1.3449810589481702e-07, + "loss": 0.0, + "num_input_tokens_seen": 117181480, + "step": 173905 + }, + { + "epoch": 4.248650233308089, + "grad_norm": 4.034727317048237e-05, + "learning_rate": 1.3445539301046148e-07, + "loss": 0.0, + "num_input_tokens_seen": 117184872, + "step": 173910 + }, + { + "epoch": 4.248772384139936, + "grad_norm": 0.0015639587072655559, + "learning_rate": 1.3441268642057923e-07, + "loss": 0.0, + "num_input_tokens_seen": 117188200, + "step": 173915 + }, + { + "epoch": 4.248894534971783, + "grad_norm": 44.089595794677734, + "learning_rate": 1.3436998612548055e-07, + "loss": 0.0372, + "num_input_tokens_seen": 117191656, + "step": 173920 + }, + { + "epoch": 4.24901668580363, + "grad_norm": 0.004503658507019281, + "learning_rate": 1.3432729212547645e-07, + "loss": 0.0, + "num_input_tokens_seen": 117195176, + "step": 173925 + }, + { + "epoch": 4.249138836635478, + "grad_norm": 0.00021741162345279008, + "learning_rate": 1.3428460442087686e-07, + "loss": 0.0, + "num_input_tokens_seen": 117198312, + "step": 173930 + }, + { + "epoch": 4.249260987467324, + "grad_norm": 3.549739994923584e-05, + "learning_rate": 1.3424192301199267e-07, + "loss": 0.0001, + "num_input_tokens_seen": 117201512, + "step": 173935 + }, + { + "epoch": 4.249383138299172, + "grad_norm": 0.00024185300571843982, + "learning_rate": 1.3419924789913407e-07, + "loss": 0.0, + "num_input_tokens_seen": 117205032, + "step": 173940 + }, + { + "epoch": 4.249505289131019, + "grad_norm": 0.0010530028957873583, + "learning_rate": 1.3415657908261113e-07, + "loss": 0.0, + "num_input_tokens_seen": 117208424, + "step": 173945 + }, + { + "epoch": 4.249627439962866, + "grad_norm": 7.1865242716739886e-06, + "learning_rate": 1.3411391656273475e-07, + "loss": 0.0, + "num_input_tokens_seen": 117211560, + "step": 173950 + }, + { + "epoch": 4.249749590794713, + "grad_norm": 3.061044117202982e-05, + "learning_rate": 1.3407126033981464e-07, + "loss": 0.0, + "num_input_tokens_seen": 117215336, + "step": 173955 + }, + { + "epoch": 4.249871741626561, + "grad_norm": 0.0009061378077603877, + "learning_rate": 1.3402861041416124e-07, + "loss": 0.0, + "num_input_tokens_seen": 117218344, + "step": 173960 + }, + { + "epoch": 4.2499938924584075, + "grad_norm": 0.00015108681691344827, + "learning_rate": 1.3398596678608488e-07, + "loss": 0.0, + "num_input_tokens_seen": 117221928, + "step": 173965 + }, + { + "epoch": 4.250116043290255, + "grad_norm": 7.658931281184778e-06, + "learning_rate": 1.3394332945589526e-07, + "loss": 0.0, + "num_input_tokens_seen": 117225384, + "step": 173970 + }, + { + "epoch": 4.250238194122102, + "grad_norm": 0.00028781109722331166, + "learning_rate": 1.3390069842390295e-07, + "loss": 0.0, + "num_input_tokens_seen": 117228520, + "step": 173975 + }, + { + "epoch": 4.250311484621211, + "eval_loss": 0.337473601102829, + "eval_runtime": 47.7909, + "eval_samples_per_second": 761.337, + "eval_steps_per_second": 95.185, + "num_input_tokens_seen": 117230952, + "step": 173978 + }, + { + "epoch": 4.2503603449539495, + "grad_norm": 3.5128767194692045e-05, + "learning_rate": 1.3385807369041746e-07, + "loss": 0.0, + "num_input_tokens_seen": 117232040, + "step": 173980 + }, + { + "epoch": 4.250482495785796, + "grad_norm": 0.0010884815128520131, + "learning_rate": 1.338154552557491e-07, + "loss": 0.0, + "num_input_tokens_seen": 117235048, + "step": 173985 + }, + { + "epoch": 4.250604646617644, + "grad_norm": 0.00018114039266947657, + "learning_rate": 1.3377284312020787e-07, + "loss": 0.05, + "num_input_tokens_seen": 117238376, + "step": 173990 + }, + { + "epoch": 4.250726797449491, + "grad_norm": 0.00021715753246098757, + "learning_rate": 1.3373023728410338e-07, + "loss": 0.0, + "num_input_tokens_seen": 117242408, + "step": 173995 + }, + { + "epoch": 4.250848948281337, + "grad_norm": 0.01458763051778078, + "learning_rate": 1.336876377477457e-07, + "loss": 0.0, + "num_input_tokens_seen": 117245672, + "step": 174000 + }, + { + "epoch": 4.250971099113185, + "grad_norm": 3.400916466489434e-05, + "learning_rate": 1.3364504451144443e-07, + "loss": 0.0, + "num_input_tokens_seen": 117248872, + "step": 174005 + }, + { + "epoch": 4.251093249945032, + "grad_norm": 0.0003229551366530359, + "learning_rate": 1.3360245757550947e-07, + "loss": 0.0, + "num_input_tokens_seen": 117251880, + "step": 174010 + }, + { + "epoch": 4.251215400776879, + "grad_norm": 4.918500781059265e-05, + "learning_rate": 1.335598769402504e-07, + "loss": 0.0, + "num_input_tokens_seen": 117255528, + "step": 174015 + }, + { + "epoch": 4.251337551608726, + "grad_norm": 1.871642416517716e-05, + "learning_rate": 1.3351730260597693e-07, + "loss": 0.0, + "num_input_tokens_seen": 117258408, + "step": 174020 + }, + { + "epoch": 4.251459702440574, + "grad_norm": 0.00018231081776320934, + "learning_rate": 1.3347473457299885e-07, + "loss": 0.0, + "num_input_tokens_seen": 117262056, + "step": 174025 + }, + { + "epoch": 4.2515818532724206, + "grad_norm": 0.00022168007853906602, + "learning_rate": 1.3343217284162566e-07, + "loss": 0.0, + "num_input_tokens_seen": 117265448, + "step": 174030 + }, + { + "epoch": 4.251704004104268, + "grad_norm": 0.0004635670338757336, + "learning_rate": 1.333896174121665e-07, + "loss": 0.0, + "num_input_tokens_seen": 117268392, + "step": 174035 + }, + { + "epoch": 4.251826154936115, + "grad_norm": 0.00015483734023291618, + "learning_rate": 1.3334706828493137e-07, + "loss": 0.0, + "num_input_tokens_seen": 117271720, + "step": 174040 + }, + { + "epoch": 4.251948305767963, + "grad_norm": 0.0002787092234939337, + "learning_rate": 1.333045254602294e-07, + "loss": 0.0, + "num_input_tokens_seen": 117274856, + "step": 174045 + }, + { + "epoch": 4.252070456599809, + "grad_norm": 0.0005139851709827781, + "learning_rate": 1.3326198893836994e-07, + "loss": 0.0, + "num_input_tokens_seen": 117278120, + "step": 174050 + }, + { + "epoch": 4.252192607431657, + "grad_norm": 2.0777453755727038e-05, + "learning_rate": 1.3321945871966234e-07, + "loss": 0.0, + "num_input_tokens_seen": 117281256, + "step": 174055 + }, + { + "epoch": 4.252314758263504, + "grad_norm": 3.087955337832682e-05, + "learning_rate": 1.3317693480441615e-07, + "loss": 0.0, + "num_input_tokens_seen": 117284648, + "step": 174060 + }, + { + "epoch": 4.252436909095351, + "grad_norm": 0.002935598837211728, + "learning_rate": 1.3313441719294027e-07, + "loss": 0.0, + "num_input_tokens_seen": 117288552, + "step": 174065 + }, + { + "epoch": 4.252559059927198, + "grad_norm": 8.68289134814404e-06, + "learning_rate": 1.3309190588554432e-07, + "loss": 0.0, + "num_input_tokens_seen": 117292200, + "step": 174070 + }, + { + "epoch": 4.252681210759045, + "grad_norm": 4.14956193708349e-05, + "learning_rate": 1.330494008825369e-07, + "loss": 0.0, + "num_input_tokens_seen": 117295528, + "step": 174075 + }, + { + "epoch": 4.2528033615908925, + "grad_norm": 0.00044843723298981786, + "learning_rate": 1.330069021842275e-07, + "loss": 0.0, + "num_input_tokens_seen": 117298984, + "step": 174080 + }, + { + "epoch": 4.252925512422739, + "grad_norm": 0.00042743535595946014, + "learning_rate": 1.3296440979092527e-07, + "loss": 0.0, + "num_input_tokens_seen": 117302760, + "step": 174085 + }, + { + "epoch": 4.253047663254587, + "grad_norm": 0.00012097960279788822, + "learning_rate": 1.3292192370293887e-07, + "loss": 0.0, + "num_input_tokens_seen": 117305768, + "step": 174090 + }, + { + "epoch": 4.253169814086434, + "grad_norm": 0.00042267670505680144, + "learning_rate": 1.328794439205777e-07, + "loss": 0.0, + "num_input_tokens_seen": 117309224, + "step": 174095 + }, + { + "epoch": 4.253291964918281, + "grad_norm": 8.828636782709509e-05, + "learning_rate": 1.328369704441501e-07, + "loss": 0.0, + "num_input_tokens_seen": 117312488, + "step": 174100 + }, + { + "epoch": 4.253414115750128, + "grad_norm": 0.00013019546167925, + "learning_rate": 1.3279450327396568e-07, + "loss": 0.0, + "num_input_tokens_seen": 117315688, + "step": 174105 + }, + { + "epoch": 4.253536266581976, + "grad_norm": 0.00131877395324409, + "learning_rate": 1.3275204241033255e-07, + "loss": 0.0, + "num_input_tokens_seen": 117319336, + "step": 174110 + }, + { + "epoch": 4.253658417413822, + "grad_norm": 8.842379611451179e-05, + "learning_rate": 1.327095878535598e-07, + "loss": 0.0, + "num_input_tokens_seen": 117322344, + "step": 174115 + }, + { + "epoch": 4.25378056824567, + "grad_norm": 0.00011061535042244941, + "learning_rate": 1.3266713960395647e-07, + "loss": 0.0, + "num_input_tokens_seen": 117325288, + "step": 174120 + }, + { + "epoch": 4.253902719077517, + "grad_norm": 0.0001622894051251933, + "learning_rate": 1.3262469766183083e-07, + "loss": 0.0, + "num_input_tokens_seen": 117328744, + "step": 174125 + }, + { + "epoch": 4.254024869909364, + "grad_norm": 0.0030617835000157356, + "learning_rate": 1.325822620274918e-07, + "loss": 0.0, + "num_input_tokens_seen": 117331880, + "step": 174130 + }, + { + "epoch": 4.254147020741211, + "grad_norm": 3.148586620227434e-05, + "learning_rate": 1.325398327012479e-07, + "loss": 0.0, + "num_input_tokens_seen": 117335208, + "step": 174135 + }, + { + "epoch": 4.254269171573059, + "grad_norm": 5.484212579176528e-06, + "learning_rate": 1.324974096834075e-07, + "loss": 0.0, + "num_input_tokens_seen": 117338344, + "step": 174140 + }, + { + "epoch": 4.2543913224049055, + "grad_norm": 9.148853132501245e-05, + "learning_rate": 1.3245499297427943e-07, + "loss": 0.0, + "num_input_tokens_seen": 117341800, + "step": 174145 + }, + { + "epoch": 4.254513473236753, + "grad_norm": 0.0003981620538979769, + "learning_rate": 1.3241258257417177e-07, + "loss": 0.0001, + "num_input_tokens_seen": 117345064, + "step": 174150 + }, + { + "epoch": 4.2546356240686, + "grad_norm": 0.00012439371494110674, + "learning_rate": 1.323701784833934e-07, + "loss": 0.0, + "num_input_tokens_seen": 117348520, + "step": 174155 + }, + { + "epoch": 4.2547577749004475, + "grad_norm": 0.00014706332876812667, + "learning_rate": 1.3232778070225227e-07, + "loss": 0.0, + "num_input_tokens_seen": 117351976, + "step": 174160 + }, + { + "epoch": 4.254879925732294, + "grad_norm": 0.0016874118009582162, + "learning_rate": 1.3228538923105704e-07, + "loss": 0.0, + "num_input_tokens_seen": 117355432, + "step": 174165 + }, + { + "epoch": 4.255002076564141, + "grad_norm": 0.00014558476686943322, + "learning_rate": 1.3224300407011558e-07, + "loss": 0.0, + "num_input_tokens_seen": 117359016, + "step": 174170 + }, + { + "epoch": 4.255124227395989, + "grad_norm": 0.00010693442891351879, + "learning_rate": 1.3220062521973652e-07, + "loss": 0.0, + "num_input_tokens_seen": 117362728, + "step": 174175 + }, + { + "epoch": 4.255246378227835, + "grad_norm": 0.00010506354738026857, + "learning_rate": 1.3215825268022807e-07, + "loss": 0.0, + "num_input_tokens_seen": 117365928, + "step": 174180 + }, + { + "epoch": 4.255368529059683, + "grad_norm": 4.111505040782504e-05, + "learning_rate": 1.3211588645189809e-07, + "loss": 0.0, + "num_input_tokens_seen": 117369896, + "step": 174185 + }, + { + "epoch": 4.25549067989153, + "grad_norm": 0.0002986934850923717, + "learning_rate": 1.3207352653505488e-07, + "loss": 0.0, + "num_input_tokens_seen": 117373480, + "step": 174190 + }, + { + "epoch": 4.255612830723377, + "grad_norm": 0.0003481210151221603, + "learning_rate": 1.3203117293000632e-07, + "loss": 0.0, + "num_input_tokens_seen": 117376488, + "step": 174195 + }, + { + "epoch": 4.255734981555224, + "grad_norm": 6.517604924738407e-05, + "learning_rate": 1.3198882563706082e-07, + "loss": 0.0, + "num_input_tokens_seen": 117379560, + "step": 174200 + }, + { + "epoch": 4.255857132387072, + "grad_norm": 7.557028584415093e-05, + "learning_rate": 1.319464846565257e-07, + "loss": 0.0, + "num_input_tokens_seen": 117383016, + "step": 174205 + }, + { + "epoch": 4.2559792832189185, + "grad_norm": 3.513231058605015e-05, + "learning_rate": 1.3190414998870924e-07, + "loss": 0.0, + "num_input_tokens_seen": 117386792, + "step": 174210 + }, + { + "epoch": 4.256101434050766, + "grad_norm": 0.000536827661562711, + "learning_rate": 1.3186182163391957e-07, + "loss": 0.0, + "num_input_tokens_seen": 117390056, + "step": 174215 + }, + { + "epoch": 4.256223584882613, + "grad_norm": 0.0003994362778030336, + "learning_rate": 1.3181949959246398e-07, + "loss": 0.0, + "num_input_tokens_seen": 117393512, + "step": 174220 + }, + { + "epoch": 4.2563457357144605, + "grad_norm": 1.1261043255217373e-05, + "learning_rate": 1.3177718386465065e-07, + "loss": 0.0, + "num_input_tokens_seen": 117397224, + "step": 174225 + }, + { + "epoch": 4.256467886546307, + "grad_norm": 0.0023173654917627573, + "learning_rate": 1.3173487445078702e-07, + "loss": 0.0, + "num_input_tokens_seen": 117401064, + "step": 174230 + }, + { + "epoch": 4.256590037378155, + "grad_norm": 0.001880491035990417, + "learning_rate": 1.3169257135118118e-07, + "loss": 0.0, + "num_input_tokens_seen": 117404392, + "step": 174235 + }, + { + "epoch": 4.256712188210002, + "grad_norm": 0.000906631350517273, + "learning_rate": 1.316502745661402e-07, + "loss": 0.0, + "num_input_tokens_seen": 117407720, + "step": 174240 + }, + { + "epoch": 4.256834339041849, + "grad_norm": 5.04143608850427e-05, + "learning_rate": 1.316079840959723e-07, + "loss": 0.0, + "num_input_tokens_seen": 117411240, + "step": 174245 + }, + { + "epoch": 4.256956489873696, + "grad_norm": 0.01137853879481554, + "learning_rate": 1.3156569994098465e-07, + "loss": 0.0, + "num_input_tokens_seen": 117414504, + "step": 174250 + }, + { + "epoch": 4.257078640705544, + "grad_norm": 6.655412107647862e-06, + "learning_rate": 1.3152342210148447e-07, + "loss": 0.0, + "num_input_tokens_seen": 117418408, + "step": 174255 + }, + { + "epoch": 4.25720079153739, + "grad_norm": 0.0011601169826462865, + "learning_rate": 1.3148115057777997e-07, + "loss": 0.0, + "num_input_tokens_seen": 117422568, + "step": 174260 + }, + { + "epoch": 4.257322942369237, + "grad_norm": 0.0007189237512648106, + "learning_rate": 1.3143888537017788e-07, + "loss": 0.0, + "num_input_tokens_seen": 117425768, + "step": 174265 + }, + { + "epoch": 4.257445093201085, + "grad_norm": 8.994324161903933e-05, + "learning_rate": 1.3139662647898574e-07, + "loss": 0.0, + "num_input_tokens_seen": 117429096, + "step": 174270 + }, + { + "epoch": 4.2575672440329315, + "grad_norm": 0.00013263424625620246, + "learning_rate": 1.313543739045113e-07, + "loss": 0.0, + "num_input_tokens_seen": 117432168, + "step": 174275 + }, + { + "epoch": 4.257689394864779, + "grad_norm": 2.323191802133806e-05, + "learning_rate": 1.313121276470611e-07, + "loss": 0.0, + "num_input_tokens_seen": 117435688, + "step": 174280 + }, + { + "epoch": 4.257811545696626, + "grad_norm": 0.0006844392628408968, + "learning_rate": 1.3126988770694314e-07, + "loss": 0.0, + "num_input_tokens_seen": 117439016, + "step": 174285 + }, + { + "epoch": 4.2579336965284735, + "grad_norm": 6.209936691448092e-05, + "learning_rate": 1.31227654084464e-07, + "loss": 0.0, + "num_input_tokens_seen": 117442664, + "step": 174290 + }, + { + "epoch": 4.25805584736032, + "grad_norm": 4.204810466035269e-05, + "learning_rate": 1.3118542677993116e-07, + "loss": 0.0, + "num_input_tokens_seen": 117445992, + "step": 174295 + }, + { + "epoch": 4.258177998192168, + "grad_norm": 0.001022745156660676, + "learning_rate": 1.3114320579365134e-07, + "loss": 0.0, + "num_input_tokens_seen": 117449384, + "step": 174300 + }, + { + "epoch": 4.258300149024015, + "grad_norm": 0.0005625466583296657, + "learning_rate": 1.3110099112593199e-07, + "loss": 0.0, + "num_input_tokens_seen": 117452456, + "step": 174305 + }, + { + "epoch": 4.258422299855862, + "grad_norm": 7.460943379555829e-06, + "learning_rate": 1.3105878277707992e-07, + "loss": 0.0, + "num_input_tokens_seen": 117455784, + "step": 174310 + }, + { + "epoch": 4.258544450687709, + "grad_norm": 0.0010313538368791342, + "learning_rate": 1.3101658074740207e-07, + "loss": 0.0, + "num_input_tokens_seen": 117459048, + "step": 174315 + }, + { + "epoch": 4.258666601519557, + "grad_norm": 0.00015061290469020605, + "learning_rate": 1.3097438503720548e-07, + "loss": 0.0, + "num_input_tokens_seen": 117462568, + "step": 174320 + }, + { + "epoch": 4.258788752351403, + "grad_norm": 0.00010983618994941935, + "learning_rate": 1.309321956467968e-07, + "loss": 0.0, + "num_input_tokens_seen": 117465576, + "step": 174325 + }, + { + "epoch": 4.258910903183251, + "grad_norm": 7.537674537161365e-05, + "learning_rate": 1.308900125764828e-07, + "loss": 0.0, + "num_input_tokens_seen": 117469160, + "step": 174330 + }, + { + "epoch": 4.259033054015098, + "grad_norm": 0.001629429985769093, + "learning_rate": 1.3084783582657077e-07, + "loss": 0.0, + "num_input_tokens_seen": 117472744, + "step": 174335 + }, + { + "epoch": 4.2591552048469445, + "grad_norm": 0.00039150213706307113, + "learning_rate": 1.3080566539736691e-07, + "loss": 0.0, + "num_input_tokens_seen": 117476136, + "step": 174340 + }, + { + "epoch": 4.259277355678792, + "grad_norm": 2.7556236091186292e-05, + "learning_rate": 1.307635012891779e-07, + "loss": 0.0, + "num_input_tokens_seen": 117479528, + "step": 174345 + }, + { + "epoch": 4.259399506510639, + "grad_norm": 1.5702662494732067e-05, + "learning_rate": 1.3072134350231068e-07, + "loss": 0.0, + "num_input_tokens_seen": 117482920, + "step": 174350 + }, + { + "epoch": 4.2595216573424866, + "grad_norm": 0.012511259876191616, + "learning_rate": 1.3067919203707168e-07, + "loss": 0.0, + "num_input_tokens_seen": 117486248, + "step": 174355 + }, + { + "epoch": 4.259643808174333, + "grad_norm": 0.004491363186389208, + "learning_rate": 1.306370468937672e-07, + "loss": 0.0, + "num_input_tokens_seen": 117489192, + "step": 174360 + }, + { + "epoch": 4.259765959006181, + "grad_norm": 9.086837053473573e-06, + "learning_rate": 1.305949080727039e-07, + "loss": 0.0, + "num_input_tokens_seen": 117492264, + "step": 174365 + }, + { + "epoch": 4.259888109838028, + "grad_norm": 8.383391104871407e-05, + "learning_rate": 1.3055277557418854e-07, + "loss": 0.0, + "num_input_tokens_seen": 117495592, + "step": 174370 + }, + { + "epoch": 4.260010260669875, + "grad_norm": 0.0011324695078656077, + "learning_rate": 1.3051064939852706e-07, + "loss": 0.0, + "num_input_tokens_seen": 117498856, + "step": 174375 + }, + { + "epoch": 4.260132411501722, + "grad_norm": 8.879219967639074e-05, + "learning_rate": 1.3046852954602617e-07, + "loss": 0.0, + "num_input_tokens_seen": 117501736, + "step": 174380 + }, + { + "epoch": 4.26025456233357, + "grad_norm": 6.638761988142505e-06, + "learning_rate": 1.3042641601699178e-07, + "loss": 0.0, + "num_input_tokens_seen": 117505384, + "step": 174385 + }, + { + "epoch": 4.260376713165416, + "grad_norm": 0.00012476768461056054, + "learning_rate": 1.3038430881173035e-07, + "loss": 0.0, + "num_input_tokens_seen": 117508648, + "step": 174390 + }, + { + "epoch": 4.260498863997264, + "grad_norm": 5.976101601845585e-05, + "learning_rate": 1.303422079305484e-07, + "loss": 0.0, + "num_input_tokens_seen": 117511976, + "step": 174395 + }, + { + "epoch": 4.260621014829111, + "grad_norm": 1.2742106264340691e-05, + "learning_rate": 1.3030011337375158e-07, + "loss": 0.0, + "num_input_tokens_seen": 117515240, + "step": 174400 + }, + { + "epoch": 4.2607431656609585, + "grad_norm": 2.1465803001774475e-05, + "learning_rate": 1.3025802514164653e-07, + "loss": 0.0, + "num_input_tokens_seen": 117518376, + "step": 174405 + }, + { + "epoch": 4.260865316492805, + "grad_norm": 7.837494922569022e-05, + "learning_rate": 1.3021594323453878e-07, + "loss": 0.0, + "num_input_tokens_seen": 117521512, + "step": 174410 + }, + { + "epoch": 4.260987467324653, + "grad_norm": 0.0001668841578066349, + "learning_rate": 1.3017386765273487e-07, + "loss": 0.0, + "num_input_tokens_seen": 117524776, + "step": 174415 + }, + { + "epoch": 4.2611096181565, + "grad_norm": 0.0011746666859835386, + "learning_rate": 1.3013179839654033e-07, + "loss": 0.0, + "num_input_tokens_seen": 117527912, + "step": 174420 + }, + { + "epoch": 4.261231768988347, + "grad_norm": 7.951833140396047e-06, + "learning_rate": 1.3008973546626134e-07, + "loss": 0.0, + "num_input_tokens_seen": 117531240, + "step": 174425 + }, + { + "epoch": 4.261353919820194, + "grad_norm": 0.0004515565815381706, + "learning_rate": 1.3004767886220391e-07, + "loss": 0.0001, + "num_input_tokens_seen": 117535016, + "step": 174430 + }, + { + "epoch": 4.261476070652041, + "grad_norm": 4.313921454013325e-05, + "learning_rate": 1.3000562858467368e-07, + "loss": 0.0, + "num_input_tokens_seen": 117538856, + "step": 174435 + }, + { + "epoch": 4.261598221483888, + "grad_norm": 0.00011880762758664787, + "learning_rate": 1.2996358463397662e-07, + "loss": 0.0, + "num_input_tokens_seen": 117542312, + "step": 174440 + }, + { + "epoch": 4.261720372315735, + "grad_norm": 0.002191459061577916, + "learning_rate": 1.2992154701041836e-07, + "loss": 0.0, + "num_input_tokens_seen": 117545512, + "step": 174445 + }, + { + "epoch": 4.261842523147583, + "grad_norm": 2.773893584162579e-06, + "learning_rate": 1.2987951571430456e-07, + "loss": 0.0522, + "num_input_tokens_seen": 117549160, + "step": 174450 + }, + { + "epoch": 4.2619646739794295, + "grad_norm": 0.0002300427295267582, + "learning_rate": 1.2983749074594097e-07, + "loss": 0.0, + "num_input_tokens_seen": 117552616, + "step": 174455 + }, + { + "epoch": 4.262086824811277, + "grad_norm": 4.851877747569233e-05, + "learning_rate": 1.2979547210563313e-07, + "loss": 0.0, + "num_input_tokens_seen": 117555752, + "step": 174460 + }, + { + "epoch": 4.262208975643124, + "grad_norm": 0.008494346402585506, + "learning_rate": 1.297534597936869e-07, + "loss": 0.0, + "num_input_tokens_seen": 117558824, + "step": 174465 + }, + { + "epoch": 4.2623311264749715, + "grad_norm": 0.00031238130759447813, + "learning_rate": 1.2971145381040726e-07, + "loss": 0.0, + "num_input_tokens_seen": 117562088, + "step": 174470 + }, + { + "epoch": 4.262453277306818, + "grad_norm": 0.00039827567525207996, + "learning_rate": 1.296694541561003e-07, + "loss": 0.0, + "num_input_tokens_seen": 117565416, + "step": 174475 + }, + { + "epoch": 4.262575428138666, + "grad_norm": 0.0004993233596906066, + "learning_rate": 1.296274608310709e-07, + "loss": 0.0, + "num_input_tokens_seen": 117568808, + "step": 174480 + }, + { + "epoch": 4.262697578970513, + "grad_norm": 0.00011720485053956509, + "learning_rate": 1.2958547383562468e-07, + "loss": 0.0, + "num_input_tokens_seen": 117572072, + "step": 174485 + }, + { + "epoch": 4.26281972980236, + "grad_norm": 1.069751124305185e-05, + "learning_rate": 1.295434931700673e-07, + "loss": 0.0, + "num_input_tokens_seen": 117575784, + "step": 174490 + }, + { + "epoch": 4.262941880634207, + "grad_norm": 0.00021572900004684925, + "learning_rate": 1.295015188347035e-07, + "loss": 0.0, + "num_input_tokens_seen": 117579432, + "step": 174495 + }, + { + "epoch": 4.263064031466055, + "grad_norm": 0.002782398136332631, + "learning_rate": 1.2945955082983906e-07, + "loss": 0.0, + "num_input_tokens_seen": 117583016, + "step": 174500 + }, + { + "epoch": 4.263186182297901, + "grad_norm": 6.141927588032559e-05, + "learning_rate": 1.2941758915577862e-07, + "loss": 0.0318, + "num_input_tokens_seen": 117586408, + "step": 174505 + }, + { + "epoch": 4.263308333129749, + "grad_norm": 0.0004919608472846448, + "learning_rate": 1.293756338128279e-07, + "loss": 0.0, + "num_input_tokens_seen": 117589736, + "step": 174510 + }, + { + "epoch": 4.263430483961596, + "grad_norm": 0.0031189259607344866, + "learning_rate": 1.2933368480129148e-07, + "loss": 0.0, + "num_input_tokens_seen": 117592872, + "step": 174515 + }, + { + "epoch": 4.263552634793443, + "grad_norm": 5.286826490191743e-05, + "learning_rate": 1.2929174212147475e-07, + "loss": 0.0, + "num_input_tokens_seen": 117596456, + "step": 174520 + }, + { + "epoch": 4.26367478562529, + "grad_norm": 0.00019402038014959544, + "learning_rate": 1.2924980577368284e-07, + "loss": 0.0, + "num_input_tokens_seen": 117599912, + "step": 174525 + }, + { + "epoch": 4.263796936457137, + "grad_norm": 0.0013834084384143353, + "learning_rate": 1.2920787575822035e-07, + "loss": 0.0, + "num_input_tokens_seen": 117603240, + "step": 174530 + }, + { + "epoch": 4.2639190872889845, + "grad_norm": 1.8565729988040403e-05, + "learning_rate": 1.291659520753926e-07, + "loss": 0.0, + "num_input_tokens_seen": 117606952, + "step": 174535 + }, + { + "epoch": 4.264041238120831, + "grad_norm": 0.0015397652750834823, + "learning_rate": 1.2912403472550405e-07, + "loss": 0.0, + "num_input_tokens_seen": 117610472, + "step": 174540 + }, + { + "epoch": 4.264163388952679, + "grad_norm": 0.002840655390173197, + "learning_rate": 1.2908212370885997e-07, + "loss": 0.0, + "num_input_tokens_seen": 117613928, + "step": 174545 + }, + { + "epoch": 4.264285539784526, + "grad_norm": 0.00011604101746343076, + "learning_rate": 1.2904021902576467e-07, + "loss": 0.0, + "num_input_tokens_seen": 117617256, + "step": 174550 + }, + { + "epoch": 4.264407690616373, + "grad_norm": 0.0003640939248725772, + "learning_rate": 1.289983206765235e-07, + "loss": 0.0, + "num_input_tokens_seen": 117620584, + "step": 174555 + }, + { + "epoch": 4.26452984144822, + "grad_norm": 0.006663096137344837, + "learning_rate": 1.2895642866144075e-07, + "loss": 0.0, + "num_input_tokens_seen": 117624360, + "step": 174560 + }, + { + "epoch": 4.264651992280068, + "grad_norm": 0.0002884053101297468, + "learning_rate": 1.2891454298082084e-07, + "loss": 0.0, + "num_input_tokens_seen": 117627688, + "step": 174565 + }, + { + "epoch": 4.264774143111914, + "grad_norm": 0.00045368506107479334, + "learning_rate": 1.2887266363496897e-07, + "loss": 0.0, + "num_input_tokens_seen": 117631208, + "step": 174570 + }, + { + "epoch": 4.264896293943762, + "grad_norm": 7.1562126322533e-06, + "learning_rate": 1.2883079062418922e-07, + "loss": 0.0, + "num_input_tokens_seen": 117634728, + "step": 174575 + }, + { + "epoch": 4.265018444775609, + "grad_norm": 0.00015068422362674028, + "learning_rate": 1.2878892394878616e-07, + "loss": 0.0, + "num_input_tokens_seen": 117638248, + "step": 174580 + }, + { + "epoch": 4.265140595607456, + "grad_norm": 7.73434730945155e-05, + "learning_rate": 1.2874706360906462e-07, + "loss": 0.0, + "num_input_tokens_seen": 117642216, + "step": 174585 + }, + { + "epoch": 4.265262746439303, + "grad_norm": 0.00013224811118561774, + "learning_rate": 1.287052096053286e-07, + "loss": 0.0, + "num_input_tokens_seen": 117645352, + "step": 174590 + }, + { + "epoch": 4.265384897271151, + "grad_norm": 0.0004519324575085193, + "learning_rate": 1.2866336193788285e-07, + "loss": 0.0, + "num_input_tokens_seen": 117648360, + "step": 174595 + }, + { + "epoch": 4.2655070481029975, + "grad_norm": 0.00014004706463310868, + "learning_rate": 1.2862152060703135e-07, + "loss": 0.0, + "num_input_tokens_seen": 117651304, + "step": 174600 + }, + { + "epoch": 4.265629198934845, + "grad_norm": 3.622357689891942e-05, + "learning_rate": 1.2857968561307864e-07, + "loss": 0.0105, + "num_input_tokens_seen": 117654632, + "step": 174605 + }, + { + "epoch": 4.265751349766692, + "grad_norm": 7.538398494943976e-05, + "learning_rate": 1.285378569563287e-07, + "loss": 0.0, + "num_input_tokens_seen": 117657768, + "step": 174610 + }, + { + "epoch": 4.2658735005985395, + "grad_norm": 9.134790161624551e-05, + "learning_rate": 1.284960346370858e-07, + "loss": 0.0, + "num_input_tokens_seen": 117660968, + "step": 174615 + }, + { + "epoch": 4.265995651430386, + "grad_norm": 0.00016150051669683307, + "learning_rate": 1.284542186556543e-07, + "loss": 0.0, + "num_input_tokens_seen": 117664360, + "step": 174620 + }, + { + "epoch": 4.266117802262233, + "grad_norm": 0.0005944840959273279, + "learning_rate": 1.2841240901233796e-07, + "loss": 0.0001, + "num_input_tokens_seen": 117667880, + "step": 174625 + }, + { + "epoch": 4.266239953094081, + "grad_norm": 4.92603903694544e-05, + "learning_rate": 1.2837060570744128e-07, + "loss": 0.0, + "num_input_tokens_seen": 117671336, + "step": 174630 + }, + { + "epoch": 4.266362103925927, + "grad_norm": 0.00011865422129631042, + "learning_rate": 1.2832880874126784e-07, + "loss": 0.0, + "num_input_tokens_seen": 117674856, + "step": 174635 + }, + { + "epoch": 4.266484254757775, + "grad_norm": 0.0009908857755362988, + "learning_rate": 1.2828701811412168e-07, + "loss": 0.0, + "num_input_tokens_seen": 117678120, + "step": 174640 + }, + { + "epoch": 4.266606405589622, + "grad_norm": 2.8118483896832913e-05, + "learning_rate": 1.2824523382630692e-07, + "loss": 0.0, + "num_input_tokens_seen": 117681448, + "step": 174645 + }, + { + "epoch": 4.266728556421469, + "grad_norm": 0.00045708840480074286, + "learning_rate": 1.2820345587812743e-07, + "loss": 0.0, + "num_input_tokens_seen": 117684520, + "step": 174650 + }, + { + "epoch": 4.266850707253316, + "grad_norm": 9.125410542765167e-06, + "learning_rate": 1.281616842698866e-07, + "loss": 0.0, + "num_input_tokens_seen": 117688424, + "step": 174655 + }, + { + "epoch": 4.266972858085164, + "grad_norm": 7.427701348206028e-05, + "learning_rate": 1.2811991900188868e-07, + "loss": 0.0, + "num_input_tokens_seen": 117691432, + "step": 174660 + }, + { + "epoch": 4.2670950089170105, + "grad_norm": 4.319162599131232e-06, + "learning_rate": 1.2807816007443727e-07, + "loss": 0.0, + "num_input_tokens_seen": 117694760, + "step": 174665 + }, + { + "epoch": 4.267217159748858, + "grad_norm": 0.0005892039043828845, + "learning_rate": 1.2803640748783583e-07, + "loss": 0.0, + "num_input_tokens_seen": 117697896, + "step": 174670 + }, + { + "epoch": 4.267339310580705, + "grad_norm": 0.0002718200266826898, + "learning_rate": 1.279946612423881e-07, + "loss": 0.0, + "num_input_tokens_seen": 117702312, + "step": 174675 + }, + { + "epoch": 4.267461461412553, + "grad_norm": 8.382520900340751e-05, + "learning_rate": 1.2795292133839796e-07, + "loss": 0.0, + "num_input_tokens_seen": 117705576, + "step": 174680 + }, + { + "epoch": 4.267583612244399, + "grad_norm": 9.732329635880888e-05, + "learning_rate": 1.279111877761685e-07, + "loss": 0.0, + "num_input_tokens_seen": 117708904, + "step": 174685 + }, + { + "epoch": 4.267705763076247, + "grad_norm": 7.27354881746578e-06, + "learning_rate": 1.2786946055600367e-07, + "loss": 0.0, + "num_input_tokens_seen": 117712424, + "step": 174690 + }, + { + "epoch": 4.267827913908094, + "grad_norm": 0.002255277708172798, + "learning_rate": 1.2782773967820647e-07, + "loss": 0.0, + "num_input_tokens_seen": 117716328, + "step": 174695 + }, + { + "epoch": 4.26795006473994, + "grad_norm": 3.05923203995917e-05, + "learning_rate": 1.277860251430808e-07, + "loss": 0.0, + "num_input_tokens_seen": 117719528, + "step": 174700 + }, + { + "epoch": 4.268072215571788, + "grad_norm": 7.840585749363527e-05, + "learning_rate": 1.2774431695092958e-07, + "loss": 0.0, + "num_input_tokens_seen": 117722600, + "step": 174705 + }, + { + "epoch": 4.268194366403635, + "grad_norm": 3.6371238820720464e-05, + "learning_rate": 1.2770261510205616e-07, + "loss": 0.0, + "num_input_tokens_seen": 117726248, + "step": 174710 + }, + { + "epoch": 4.268316517235482, + "grad_norm": 0.0004068179405294359, + "learning_rate": 1.2766091959676427e-07, + "loss": 0.0, + "num_input_tokens_seen": 117730024, + "step": 174715 + }, + { + "epoch": 4.268438668067329, + "grad_norm": 0.000273067009402439, + "learning_rate": 1.276192304353565e-07, + "loss": 0.0, + "num_input_tokens_seen": 117733288, + "step": 174720 + }, + { + "epoch": 4.268560818899177, + "grad_norm": 3.6302051739767194e-05, + "learning_rate": 1.2757754761813667e-07, + "loss": 0.0, + "num_input_tokens_seen": 117736872, + "step": 174725 + }, + { + "epoch": 4.268682969731024, + "grad_norm": 2.0907571524730884e-05, + "learning_rate": 1.275358711454072e-07, + "loss": 0.0, + "num_input_tokens_seen": 117740392, + "step": 174730 + }, + { + "epoch": 4.268805120562871, + "grad_norm": 15.842265129089355, + "learning_rate": 1.2749420101747165e-07, + "loss": 0.0256, + "num_input_tokens_seen": 117743464, + "step": 174735 + }, + { + "epoch": 4.268927271394718, + "grad_norm": 0.00020856004266534, + "learning_rate": 1.2745253723463311e-07, + "loss": 0.0, + "num_input_tokens_seen": 117746984, + "step": 174740 + }, + { + "epoch": 4.269049422226566, + "grad_norm": 0.0001524622057331726, + "learning_rate": 1.2741087979719412e-07, + "loss": 0.0, + "num_input_tokens_seen": 117750952, + "step": 174745 + }, + { + "epoch": 4.269171573058412, + "grad_norm": 5.263303319225088e-05, + "learning_rate": 1.2736922870545829e-07, + "loss": 0.0, + "num_input_tokens_seen": 117754472, + "step": 174750 + }, + { + "epoch": 4.26929372389026, + "grad_norm": 0.0003677001514006406, + "learning_rate": 1.27327583959728e-07, + "loss": 0.0, + "num_input_tokens_seen": 117757608, + "step": 174755 + }, + { + "epoch": 4.269415874722107, + "grad_norm": 0.0001912468287628144, + "learning_rate": 1.2728594556030613e-07, + "loss": 0.0, + "num_input_tokens_seen": 117760808, + "step": 174760 + }, + { + "epoch": 4.269538025553954, + "grad_norm": 0.0002439589734422043, + "learning_rate": 1.2724431350749576e-07, + "loss": 0.0, + "num_input_tokens_seen": 117764072, + "step": 174765 + }, + { + "epoch": 4.269660176385801, + "grad_norm": 2.498049070709385e-05, + "learning_rate": 1.2720268780159927e-07, + "loss": 0.0, + "num_input_tokens_seen": 117767656, + "step": 174770 + }, + { + "epoch": 4.269782327217649, + "grad_norm": 7.077799818944186e-05, + "learning_rate": 1.2716106844291974e-07, + "loss": 0.0, + "num_input_tokens_seen": 117770792, + "step": 174775 + }, + { + "epoch": 4.2699044780494955, + "grad_norm": 5.7361143262824044e-05, + "learning_rate": 1.271194554317595e-07, + "loss": 0.0, + "num_input_tokens_seen": 117774056, + "step": 174780 + }, + { + "epoch": 4.270026628881343, + "grad_norm": 0.0009771619224920869, + "learning_rate": 1.2707784876842165e-07, + "loss": 0.0, + "num_input_tokens_seen": 117777320, + "step": 174785 + }, + { + "epoch": 4.27014877971319, + "grad_norm": 3.899524381267838e-05, + "learning_rate": 1.2703624845320826e-07, + "loss": 0.0, + "num_input_tokens_seen": 117780584, + "step": 174790 + }, + { + "epoch": 4.270270930545037, + "grad_norm": 0.0007512098527513444, + "learning_rate": 1.2699465448642198e-07, + "loss": 0.0, + "num_input_tokens_seen": 117783848, + "step": 174795 + }, + { + "epoch": 4.270393081376884, + "grad_norm": 3.748635572264902e-05, + "learning_rate": 1.269530668683656e-07, + "loss": 0.0, + "num_input_tokens_seen": 117787560, + "step": 174800 + }, + { + "epoch": 4.270515232208731, + "grad_norm": 0.003991847857832909, + "learning_rate": 1.2691148559934117e-07, + "loss": 0.0, + "num_input_tokens_seen": 117790760, + "step": 174805 + }, + { + "epoch": 4.270637383040579, + "grad_norm": 0.001877957722172141, + "learning_rate": 1.2686991067965147e-07, + "loss": 0.0, + "num_input_tokens_seen": 117793704, + "step": 174810 + }, + { + "epoch": 4.270759533872425, + "grad_norm": 0.0001509853609604761, + "learning_rate": 1.2682834210959847e-07, + "loss": 0.0, + "num_input_tokens_seen": 117796712, + "step": 174815 + }, + { + "epoch": 4.270881684704273, + "grad_norm": 1.75936638697749e-05, + "learning_rate": 1.2678677988948473e-07, + "loss": 0.0001, + "num_input_tokens_seen": 117799912, + "step": 174820 + }, + { + "epoch": 4.27100383553612, + "grad_norm": 0.000486937933601439, + "learning_rate": 1.2674522401961218e-07, + "loss": 0.0, + "num_input_tokens_seen": 117803112, + "step": 174825 + }, + { + "epoch": 4.271125986367967, + "grad_norm": 0.0001492535520810634, + "learning_rate": 1.2670367450028328e-07, + "loss": 0.0, + "num_input_tokens_seen": 117806376, + "step": 174830 + }, + { + "epoch": 4.271248137199814, + "grad_norm": 0.00012599291221704334, + "learning_rate": 1.2666213133180038e-07, + "loss": 0.0, + "num_input_tokens_seen": 117809896, + "step": 174835 + }, + { + "epoch": 4.271370288031662, + "grad_norm": 4.7090870793908834e-05, + "learning_rate": 1.2662059451446506e-07, + "loss": 0.0, + "num_input_tokens_seen": 117813224, + "step": 174840 + }, + { + "epoch": 4.2714924388635085, + "grad_norm": 0.00016614743799436837, + "learning_rate": 1.2657906404858e-07, + "loss": 0.0, + "num_input_tokens_seen": 117816488, + "step": 174845 + }, + { + "epoch": 4.271614589695356, + "grad_norm": 0.00242516677826643, + "learning_rate": 1.265375399344466e-07, + "loss": 0.0, + "num_input_tokens_seen": 117819368, + "step": 174850 + }, + { + "epoch": 4.271736740527203, + "grad_norm": 0.00045968478661961854, + "learning_rate": 1.2649602217236744e-07, + "loss": 0.0, + "num_input_tokens_seen": 117822312, + "step": 174855 + }, + { + "epoch": 4.2718588913590505, + "grad_norm": 0.00011602450103964657, + "learning_rate": 1.264545107626439e-07, + "loss": 0.0005, + "num_input_tokens_seen": 117825576, + "step": 174860 + }, + { + "epoch": 4.271981042190897, + "grad_norm": 3.8411395507864654e-05, + "learning_rate": 1.2641300570557834e-07, + "loss": 0.0, + "num_input_tokens_seen": 117828968, + "step": 174865 + }, + { + "epoch": 4.272103193022745, + "grad_norm": 5.47633899259381e-05, + "learning_rate": 1.2637150700147235e-07, + "loss": 0.0, + "num_input_tokens_seen": 117832808, + "step": 174870 + }, + { + "epoch": 4.272225343854592, + "grad_norm": 0.00022426110808737576, + "learning_rate": 1.2633001465062754e-07, + "loss": 0.0, + "num_input_tokens_seen": 117836136, + "step": 174875 + }, + { + "epoch": 4.272347494686439, + "grad_norm": 6.305616261670366e-05, + "learning_rate": 1.2628852865334606e-07, + "loss": 0.0, + "num_input_tokens_seen": 117839336, + "step": 174880 + }, + { + "epoch": 4.272469645518286, + "grad_norm": 0.00013498378393705934, + "learning_rate": 1.262470490099292e-07, + "loss": 0.0, + "num_input_tokens_seen": 117842536, + "step": 174885 + }, + { + "epoch": 4.272591796350133, + "grad_norm": 0.00016713308286853135, + "learning_rate": 1.262055757206788e-07, + "loss": 0.0, + "num_input_tokens_seen": 117845352, + "step": 174890 + }, + { + "epoch": 4.27271394718198, + "grad_norm": 1.3508079064195044e-05, + "learning_rate": 1.2616410878589666e-07, + "loss": 0.0, + "num_input_tokens_seen": 117848872, + "step": 174895 + }, + { + "epoch": 4.272836098013827, + "grad_norm": 0.00011501550761749968, + "learning_rate": 1.2612264820588403e-07, + "loss": 0.0, + "num_input_tokens_seen": 117852328, + "step": 174900 + }, + { + "epoch": 4.272958248845675, + "grad_norm": 0.0009163783397525549, + "learning_rate": 1.2608119398094276e-07, + "loss": 0.0, + "num_input_tokens_seen": 117855848, + "step": 174905 + }, + { + "epoch": 4.2730803996775215, + "grad_norm": 0.00014613720122724771, + "learning_rate": 1.260397461113738e-07, + "loss": 0.0, + "num_input_tokens_seen": 117859240, + "step": 174910 + }, + { + "epoch": 4.273202550509369, + "grad_norm": 9.100071474676952e-05, + "learning_rate": 1.2599830459747907e-07, + "loss": 0.0, + "num_input_tokens_seen": 117862632, + "step": 174915 + }, + { + "epoch": 4.273324701341216, + "grad_norm": 3.082215471295058e-06, + "learning_rate": 1.2595686943955964e-07, + "loss": 0.0, + "num_input_tokens_seen": 117866152, + "step": 174920 + }, + { + "epoch": 4.2734468521730635, + "grad_norm": 0.0011469591408967972, + "learning_rate": 1.2591544063791683e-07, + "loss": 0.0, + "num_input_tokens_seen": 117869544, + "step": 174925 + }, + { + "epoch": 4.27356900300491, + "grad_norm": 0.00022313492081593722, + "learning_rate": 1.2587401819285237e-07, + "loss": 0.0, + "num_input_tokens_seen": 117872488, + "step": 174930 + }, + { + "epoch": 4.273691153836758, + "grad_norm": 0.0006101438775658607, + "learning_rate": 1.2583260210466685e-07, + "loss": 0.0, + "num_input_tokens_seen": 117875880, + "step": 174935 + }, + { + "epoch": 4.273813304668605, + "grad_norm": 0.02029566280543804, + "learning_rate": 1.25791192373662e-07, + "loss": 0.0, + "num_input_tokens_seen": 117879144, + "step": 174940 + }, + { + "epoch": 4.273935455500452, + "grad_norm": 58.66333770751953, + "learning_rate": 1.2574978900013854e-07, + "loss": 0.0738, + "num_input_tokens_seen": 117882280, + "step": 174945 + }, + { + "epoch": 4.274057606332299, + "grad_norm": 8.697695011505857e-05, + "learning_rate": 1.2570839198439775e-07, + "loss": 0.0, + "num_input_tokens_seen": 117885608, + "step": 174950 + }, + { + "epoch": 4.274179757164147, + "grad_norm": 0.0003056648129131645, + "learning_rate": 1.256670013267409e-07, + "loss": 0.0, + "num_input_tokens_seen": 117888808, + "step": 174955 + }, + { + "epoch": 4.274301907995993, + "grad_norm": 0.0003528349625412375, + "learning_rate": 1.2562561702746888e-07, + "loss": 0.0, + "num_input_tokens_seen": 117891624, + "step": 174960 + }, + { + "epoch": 4.27442405882784, + "grad_norm": 0.0005818032077513635, + "learning_rate": 1.255842390868822e-07, + "loss": 0.0, + "num_input_tokens_seen": 117895080, + "step": 174965 + }, + { + "epoch": 4.274546209659688, + "grad_norm": 0.00101753999479115, + "learning_rate": 1.255428675052824e-07, + "loss": 0.0, + "num_input_tokens_seen": 117898280, + "step": 174970 + }, + { + "epoch": 4.2746683604915345, + "grad_norm": 0.00011096594971604645, + "learning_rate": 1.2550150228297007e-07, + "loss": 0.0, + "num_input_tokens_seen": 117901480, + "step": 174975 + }, + { + "epoch": 4.274790511323382, + "grad_norm": 3.41241029673256e-05, + "learning_rate": 1.254601434202458e-07, + "loss": 0.0, + "num_input_tokens_seen": 117904616, + "step": 174980 + }, + { + "epoch": 4.274912662155229, + "grad_norm": 0.000965735933277756, + "learning_rate": 1.2541879091741058e-07, + "loss": 0.0, + "num_input_tokens_seen": 117907752, + "step": 174985 + }, + { + "epoch": 4.2750348129870765, + "grad_norm": 9.309906999988016e-06, + "learning_rate": 1.253774447747653e-07, + "loss": 0.0, + "num_input_tokens_seen": 117910632, + "step": 174990 + }, + { + "epoch": 4.275156963818923, + "grad_norm": 0.002611256204545498, + "learning_rate": 1.253361049926104e-07, + "loss": 0.0, + "num_input_tokens_seen": 117914472, + "step": 174995 + }, + { + "epoch": 4.275279114650771, + "grad_norm": 3.897144051734358e-05, + "learning_rate": 1.252947715712468e-07, + "loss": 0.0, + "num_input_tokens_seen": 117917416, + "step": 175000 + }, + { + "epoch": 4.275401265482618, + "grad_norm": 2.1422400095616467e-05, + "learning_rate": 1.2525344451097465e-07, + "loss": 0.0, + "num_input_tokens_seen": 117920488, + "step": 175005 + }, + { + "epoch": 4.275523416314465, + "grad_norm": 0.0001733067911118269, + "learning_rate": 1.252121238120949e-07, + "loss": 0.0, + "num_input_tokens_seen": 117923944, + "step": 175010 + }, + { + "epoch": 4.275645567146312, + "grad_norm": 6.505786586785689e-05, + "learning_rate": 1.2517080947490765e-07, + "loss": 0.0, + "num_input_tokens_seen": 117927016, + "step": 175015 + }, + { + "epoch": 4.27576771797816, + "grad_norm": 0.0011223267065361142, + "learning_rate": 1.2512950149971357e-07, + "loss": 0.0174, + "num_input_tokens_seen": 117930152, + "step": 175020 + }, + { + "epoch": 4.275889868810006, + "grad_norm": 0.0003185166569892317, + "learning_rate": 1.250881998868134e-07, + "loss": 0.0, + "num_input_tokens_seen": 117933864, + "step": 175025 + }, + { + "epoch": 4.276012019641854, + "grad_norm": 0.0028908748645335436, + "learning_rate": 1.250469046365068e-07, + "loss": 0.0, + "num_input_tokens_seen": 117937320, + "step": 175030 + }, + { + "epoch": 4.276134170473701, + "grad_norm": 0.0006264683324843645, + "learning_rate": 1.2500561574909474e-07, + "loss": 0.0, + "num_input_tokens_seen": 117940520, + "step": 175035 + }, + { + "epoch": 4.2762563213055484, + "grad_norm": 7.769901276333258e-05, + "learning_rate": 1.2496433322487697e-07, + "loss": 0.0, + "num_input_tokens_seen": 117943976, + "step": 175040 + }, + { + "epoch": 4.276378472137395, + "grad_norm": 4.179087045486085e-05, + "learning_rate": 1.2492305706415397e-07, + "loss": 0.0, + "num_input_tokens_seen": 117947368, + "step": 175045 + }, + { + "epoch": 4.276500622969243, + "grad_norm": 0.00018700955843087286, + "learning_rate": 1.24881787267226e-07, + "loss": 0.0, + "num_input_tokens_seen": 117950376, + "step": 175050 + }, + { + "epoch": 4.27662277380109, + "grad_norm": 0.00018165944493375719, + "learning_rate": 1.2484052383439293e-07, + "loss": 0.0, + "num_input_tokens_seen": 117953704, + "step": 175055 + }, + { + "epoch": 4.276744924632936, + "grad_norm": 0.0002837112988345325, + "learning_rate": 1.247992667659551e-07, + "loss": 0.0, + "num_input_tokens_seen": 117956840, + "step": 175060 + }, + { + "epoch": 4.276867075464784, + "grad_norm": 0.0011048574233427644, + "learning_rate": 1.2475801606221236e-07, + "loss": 0.0, + "num_input_tokens_seen": 117960232, + "step": 175065 + }, + { + "epoch": 4.276989226296631, + "grad_norm": 0.00657190615311265, + "learning_rate": 1.247167717234646e-07, + "loss": 0.0, + "num_input_tokens_seen": 117963688, + "step": 175070 + }, + { + "epoch": 4.277111377128478, + "grad_norm": 0.0002077388489851728, + "learning_rate": 1.2467553375001204e-07, + "loss": 0.0, + "num_input_tokens_seen": 117966440, + "step": 175075 + }, + { + "epoch": 4.277233527960325, + "grad_norm": 4.731911394628696e-05, + "learning_rate": 1.2463430214215432e-07, + "loss": 0.0, + "num_input_tokens_seen": 117969448, + "step": 175080 + }, + { + "epoch": 4.277355678792173, + "grad_norm": 0.011321183294057846, + "learning_rate": 1.2459307690019162e-07, + "loss": 0.0, + "num_input_tokens_seen": 117972520, + "step": 175085 + }, + { + "epoch": 4.2774778296240195, + "grad_norm": 2.9748660381301306e-05, + "learning_rate": 1.2455185802442314e-07, + "loss": 0.0, + "num_input_tokens_seen": 117976040, + "step": 175090 + }, + { + "epoch": 4.277599980455867, + "grad_norm": 2.0650611986638978e-05, + "learning_rate": 1.2451064551514946e-07, + "loss": 0.0, + "num_input_tokens_seen": 117979048, + "step": 175095 + }, + { + "epoch": 4.277722131287714, + "grad_norm": 0.010599371045827866, + "learning_rate": 1.244694393726694e-07, + "loss": 0.0, + "num_input_tokens_seen": 117982248, + "step": 175100 + }, + { + "epoch": 4.2778442821195615, + "grad_norm": 6.258305802475661e-05, + "learning_rate": 1.2442823959728322e-07, + "loss": 0.0, + "num_input_tokens_seen": 117985832, + "step": 175105 + }, + { + "epoch": 4.277966432951408, + "grad_norm": 0.0001950976438820362, + "learning_rate": 1.2438704618929052e-07, + "loss": 0.0, + "num_input_tokens_seen": 117989224, + "step": 175110 + }, + { + "epoch": 4.278088583783256, + "grad_norm": 1.6232579582720064e-05, + "learning_rate": 1.2434585914899054e-07, + "loss": 0.0, + "num_input_tokens_seen": 117992552, + "step": 175115 + }, + { + "epoch": 4.278210734615103, + "grad_norm": 0.0004744456382468343, + "learning_rate": 1.2430467847668325e-07, + "loss": 0.0, + "num_input_tokens_seen": 117995880, + "step": 175120 + }, + { + "epoch": 4.27833288544695, + "grad_norm": 0.001792342634871602, + "learning_rate": 1.2426350417266762e-07, + "loss": 0.0, + "num_input_tokens_seen": 117998888, + "step": 175125 + }, + { + "epoch": 4.278455036278797, + "grad_norm": 0.00013714176020585, + "learning_rate": 1.2422233623724354e-07, + "loss": 0.0, + "num_input_tokens_seen": 118002088, + "step": 175130 + }, + { + "epoch": 4.278577187110645, + "grad_norm": 0.00023262936156243086, + "learning_rate": 1.2418117467070998e-07, + "loss": 0.0, + "num_input_tokens_seen": 118004904, + "step": 175135 + }, + { + "epoch": 4.278699337942491, + "grad_norm": 0.0003559202014002949, + "learning_rate": 1.241400194733665e-07, + "loss": 0.0, + "num_input_tokens_seen": 118007976, + "step": 175140 + }, + { + "epoch": 4.278821488774339, + "grad_norm": 6.383551226463169e-05, + "learning_rate": 1.2409887064551262e-07, + "loss": 0.0, + "num_input_tokens_seen": 118011112, + "step": 175145 + }, + { + "epoch": 4.278943639606186, + "grad_norm": 5.636045534629375e-05, + "learning_rate": 1.240577281874471e-07, + "loss": 0.0, + "num_input_tokens_seen": 118014504, + "step": 175150 + }, + { + "epoch": 4.2790657904380325, + "grad_norm": 0.0032909191213548183, + "learning_rate": 1.240165920994696e-07, + "loss": 0.0182, + "num_input_tokens_seen": 118017896, + "step": 175155 + }, + { + "epoch": 4.27918794126988, + "grad_norm": 2.825920546456473e-06, + "learning_rate": 1.2397546238187883e-07, + "loss": 0.0, + "num_input_tokens_seen": 118021416, + "step": 175160 + }, + { + "epoch": 4.279310092101727, + "grad_norm": 0.00020561976998578757, + "learning_rate": 1.239343390349743e-07, + "loss": 0.0, + "num_input_tokens_seen": 118025704, + "step": 175165 + }, + { + "epoch": 4.2794322429335745, + "grad_norm": 0.00010846881195902824, + "learning_rate": 1.2389322205905473e-07, + "loss": 0.0, + "num_input_tokens_seen": 118028968, + "step": 175170 + }, + { + "epoch": 4.279554393765421, + "grad_norm": 8.97041263669962e-06, + "learning_rate": 1.2385211145441943e-07, + "loss": 0.0, + "num_input_tokens_seen": 118031976, + "step": 175175 + }, + { + "epoch": 4.279676544597269, + "grad_norm": 0.00014840844960417598, + "learning_rate": 1.238110072213673e-07, + "loss": 0.0, + "num_input_tokens_seen": 118034792, + "step": 175180 + }, + { + "epoch": 4.279798695429116, + "grad_norm": 3.458599167061038e-05, + "learning_rate": 1.2376990936019694e-07, + "loss": 0.0, + "num_input_tokens_seen": 118037800, + "step": 175185 + }, + { + "epoch": 4.279920846260963, + "grad_norm": 0.0011223549954593182, + "learning_rate": 1.237288178712077e-07, + "loss": 0.0, + "num_input_tokens_seen": 118041320, + "step": 175190 + }, + { + "epoch": 4.28004299709281, + "grad_norm": 8.894487837096676e-05, + "learning_rate": 1.2368773275469801e-07, + "loss": 0.0, + "num_input_tokens_seen": 118044776, + "step": 175195 + }, + { + "epoch": 4.280165147924658, + "grad_norm": 0.00013673091598320752, + "learning_rate": 1.2364665401096686e-07, + "loss": 0.0, + "num_input_tokens_seen": 118048232, + "step": 175200 + }, + { + "epoch": 4.280287298756504, + "grad_norm": 0.00022629571321886033, + "learning_rate": 1.236055816403131e-07, + "loss": 0.0, + "num_input_tokens_seen": 118051944, + "step": 175205 + }, + { + "epoch": 4.280409449588352, + "grad_norm": 0.001033892622217536, + "learning_rate": 1.2356451564303504e-07, + "loss": 0.0, + "num_input_tokens_seen": 118054952, + "step": 175210 + }, + { + "epoch": 4.280531600420199, + "grad_norm": 0.0015593706630170345, + "learning_rate": 1.235234560194318e-07, + "loss": 0.0, + "num_input_tokens_seen": 118058152, + "step": 175215 + }, + { + "epoch": 4.280653751252046, + "grad_norm": 0.003671762067824602, + "learning_rate": 1.2348240276980148e-07, + "loss": 0.0305, + "num_input_tokens_seen": 118061160, + "step": 175220 + }, + { + "epoch": 4.280775902083893, + "grad_norm": 3.475360063021071e-05, + "learning_rate": 1.2344135589444315e-07, + "loss": 0.0, + "num_input_tokens_seen": 118064872, + "step": 175225 + }, + { + "epoch": 4.280898052915741, + "grad_norm": 5.875719580217265e-06, + "learning_rate": 1.2340031539365481e-07, + "loss": 0.0, + "num_input_tokens_seen": 118068328, + "step": 175230 + }, + { + "epoch": 4.2810202037475875, + "grad_norm": 0.002820430090650916, + "learning_rate": 1.233592812677352e-07, + "loss": 0.0, + "num_input_tokens_seen": 118071080, + "step": 175235 + }, + { + "epoch": 4.281142354579435, + "grad_norm": 0.002786210970953107, + "learning_rate": 1.2331825351698278e-07, + "loss": 0.0, + "num_input_tokens_seen": 118074472, + "step": 175240 + }, + { + "epoch": 4.281264505411282, + "grad_norm": 6.457543349824846e-05, + "learning_rate": 1.2327723214169572e-07, + "loss": 0.0, + "num_input_tokens_seen": 118077864, + "step": 175245 + }, + { + "epoch": 4.281386656243129, + "grad_norm": 9.612838039174676e-05, + "learning_rate": 1.2323621714217257e-07, + "loss": 0.0, + "num_input_tokens_seen": 118081192, + "step": 175250 + }, + { + "epoch": 4.281508807074976, + "grad_norm": 0.00032473396277055144, + "learning_rate": 1.2319520851871136e-07, + "loss": 0.0, + "num_input_tokens_seen": 118084712, + "step": 175255 + }, + { + "epoch": 4.281630957906823, + "grad_norm": 2.6731300749816e-05, + "learning_rate": 1.2315420627161032e-07, + "loss": 0.0, + "num_input_tokens_seen": 118087848, + "step": 175260 + }, + { + "epoch": 4.281753108738671, + "grad_norm": 0.0761445015668869, + "learning_rate": 1.2311321040116795e-07, + "loss": 0.0, + "num_input_tokens_seen": 118091112, + "step": 175265 + }, + { + "epoch": 4.281875259570517, + "grad_norm": 0.00014299452595878392, + "learning_rate": 1.230722209076822e-07, + "loss": 0.0, + "num_input_tokens_seen": 118094120, + "step": 175270 + }, + { + "epoch": 4.281997410402365, + "grad_norm": 7.086592086125165e-05, + "learning_rate": 1.2303123779145096e-07, + "loss": 0.0, + "num_input_tokens_seen": 118097384, + "step": 175275 + }, + { + "epoch": 4.282119561234212, + "grad_norm": 0.0003347352030687034, + "learning_rate": 1.2299026105277265e-07, + "loss": 0.0, + "num_input_tokens_seen": 118100776, + "step": 175280 + }, + { + "epoch": 4.282241712066059, + "grad_norm": 0.00018632085993885994, + "learning_rate": 1.2294929069194494e-07, + "loss": 0.0, + "num_input_tokens_seen": 118104168, + "step": 175285 + }, + { + "epoch": 4.282363862897906, + "grad_norm": 5.435491766547784e-05, + "learning_rate": 1.2290832670926576e-07, + "loss": 0.0, + "num_input_tokens_seen": 118107368, + "step": 175290 + }, + { + "epoch": 4.282486013729754, + "grad_norm": 6.169379048515111e-05, + "learning_rate": 1.2286736910503314e-07, + "loss": 0.0, + "num_input_tokens_seen": 118110440, + "step": 175295 + }, + { + "epoch": 4.2826081645616005, + "grad_norm": 0.006105211563408375, + "learning_rate": 1.2282641787954506e-07, + "loss": 0.0, + "num_input_tokens_seen": 118113448, + "step": 175300 + }, + { + "epoch": 4.282730315393448, + "grad_norm": 8.213877299567685e-05, + "learning_rate": 1.2278547303309905e-07, + "loss": 0.0, + "num_input_tokens_seen": 118116968, + "step": 175305 + }, + { + "epoch": 4.282852466225295, + "grad_norm": 0.00024381156254094094, + "learning_rate": 1.2274453456599333e-07, + "loss": 0.0, + "num_input_tokens_seen": 118120296, + "step": 175310 + }, + { + "epoch": 4.2829746170571426, + "grad_norm": 0.006086864974349737, + "learning_rate": 1.2270360247852496e-07, + "loss": 0.0, + "num_input_tokens_seen": 118123432, + "step": 175315 + }, + { + "epoch": 4.283096767888989, + "grad_norm": 0.0015530579257756472, + "learning_rate": 1.2266267677099219e-07, + "loss": 0.0, + "num_input_tokens_seen": 118127080, + "step": 175320 + }, + { + "epoch": 4.283218918720836, + "grad_norm": 0.04028932377696037, + "learning_rate": 1.2262175744369218e-07, + "loss": 0.0553, + "num_input_tokens_seen": 118130728, + "step": 175325 + }, + { + "epoch": 4.283341069552684, + "grad_norm": 0.0018163869390264153, + "learning_rate": 1.2258084449692286e-07, + "loss": 0.0, + "num_input_tokens_seen": 118134248, + "step": 175330 + }, + { + "epoch": 4.28346322038453, + "grad_norm": 0.00018046381592284888, + "learning_rate": 1.2253993793098171e-07, + "loss": 0.0, + "num_input_tokens_seen": 118137448, + "step": 175335 + }, + { + "epoch": 4.283585371216378, + "grad_norm": 7.681015267735347e-05, + "learning_rate": 1.2249903774616598e-07, + "loss": 0.0002, + "num_input_tokens_seen": 118140968, + "step": 175340 + }, + { + "epoch": 4.283707522048225, + "grad_norm": 0.0006761262775398791, + "learning_rate": 1.2245814394277354e-07, + "loss": 0.0, + "num_input_tokens_seen": 118144360, + "step": 175345 + }, + { + "epoch": 4.283829672880072, + "grad_norm": 0.0010945533867925406, + "learning_rate": 1.2241725652110124e-07, + "loss": 0.0, + "num_input_tokens_seen": 118147560, + "step": 175350 + }, + { + "epoch": 4.283951823711919, + "grad_norm": 0.0016349507495760918, + "learning_rate": 1.2237637548144664e-07, + "loss": 0.0, + "num_input_tokens_seen": 118151016, + "step": 175355 + }, + { + "epoch": 4.284073974543767, + "grad_norm": 0.0002284444635733962, + "learning_rate": 1.2233550082410737e-07, + "loss": 0.0, + "num_input_tokens_seen": 118154216, + "step": 175360 + }, + { + "epoch": 4.284196125375614, + "grad_norm": 0.00017028734146151692, + "learning_rate": 1.222946325493801e-07, + "loss": 0.0, + "num_input_tokens_seen": 118157800, + "step": 175365 + }, + { + "epoch": 4.284318276207461, + "grad_norm": 0.0015447017503902316, + "learning_rate": 1.222537706575627e-07, + "loss": 0.0, + "num_input_tokens_seen": 118160744, + "step": 175370 + }, + { + "epoch": 4.284440427039308, + "grad_norm": 0.0003525497450027615, + "learning_rate": 1.2221291514895182e-07, + "loss": 0.0, + "num_input_tokens_seen": 118164136, + "step": 175375 + }, + { + "epoch": 4.284562577871156, + "grad_norm": 1.5372748748632148e-05, + "learning_rate": 1.2217206602384455e-07, + "loss": 0.0, + "num_input_tokens_seen": 118167784, + "step": 175380 + }, + { + "epoch": 4.284684728703002, + "grad_norm": 0.0003744949062820524, + "learning_rate": 1.2213122328253833e-07, + "loss": 0.0, + "num_input_tokens_seen": 118171240, + "step": 175385 + }, + { + "epoch": 4.28480687953485, + "grad_norm": 7.806552457623184e-05, + "learning_rate": 1.2209038692532981e-07, + "loss": 0.0004, + "num_input_tokens_seen": 118174888, + "step": 175390 + }, + { + "epoch": 4.284929030366697, + "grad_norm": 0.00011152249498991296, + "learning_rate": 1.2204955695251628e-07, + "loss": 0.0, + "num_input_tokens_seen": 118177768, + "step": 175395 + }, + { + "epoch": 4.285051181198544, + "grad_norm": 0.00016263005090877414, + "learning_rate": 1.2200873336439442e-07, + "loss": 0.0, + "num_input_tokens_seen": 118180520, + "step": 175400 + }, + { + "epoch": 4.285173332030391, + "grad_norm": 9.192336437990889e-05, + "learning_rate": 1.2196791616126135e-07, + "loss": 0.0317, + "num_input_tokens_seen": 118183976, + "step": 175405 + }, + { + "epoch": 4.285295482862239, + "grad_norm": 0.0002289551921421662, + "learning_rate": 1.2192710534341343e-07, + "loss": 0.0, + "num_input_tokens_seen": 118186920, + "step": 175410 + }, + { + "epoch": 4.2854176336940855, + "grad_norm": 0.0002647794899530709, + "learning_rate": 1.2188630091114817e-07, + "loss": 0.0, + "num_input_tokens_seen": 118190440, + "step": 175415 + }, + { + "epoch": 4.285539784525932, + "grad_norm": 2.581018998171203e-05, + "learning_rate": 1.218455028647616e-07, + "loss": 0.0, + "num_input_tokens_seen": 118193448, + "step": 175420 + }, + { + "epoch": 4.28566193535778, + "grad_norm": 7.212234049802646e-05, + "learning_rate": 1.218047112045507e-07, + "loss": 0.0, + "num_input_tokens_seen": 118196520, + "step": 175425 + }, + { + "epoch": 4.285784086189627, + "grad_norm": 4.635051664081402e-05, + "learning_rate": 1.2176392593081242e-07, + "loss": 0.0, + "num_input_tokens_seen": 118199784, + "step": 175430 + }, + { + "epoch": 4.285906237021474, + "grad_norm": 8.119764243019745e-05, + "learning_rate": 1.2172314704384278e-07, + "loss": 0.0, + "num_input_tokens_seen": 118202984, + "step": 175435 + }, + { + "epoch": 4.286028387853321, + "grad_norm": 7.789898518240079e-05, + "learning_rate": 1.2168237454393893e-07, + "loss": 0.0, + "num_input_tokens_seen": 118206504, + "step": 175440 + }, + { + "epoch": 4.286150538685169, + "grad_norm": 2.6979070753441192e-05, + "learning_rate": 1.2164160843139693e-07, + "loss": 0.0, + "num_input_tokens_seen": 118209704, + "step": 175445 + }, + { + "epoch": 4.286272689517015, + "grad_norm": 1.372455335513223e-05, + "learning_rate": 1.2160084870651331e-07, + "loss": 0.0, + "num_input_tokens_seen": 118212776, + "step": 175450 + }, + { + "epoch": 4.286394840348863, + "grad_norm": 0.00017044198466464877, + "learning_rate": 1.2156009536958479e-07, + "loss": 0.0, + "num_input_tokens_seen": 118216552, + "step": 175455 + }, + { + "epoch": 4.28651699118071, + "grad_norm": 28.354991912841797, + "learning_rate": 1.2151934842090738e-07, + "loss": 0.0325, + "num_input_tokens_seen": 118219944, + "step": 175460 + }, + { + "epoch": 4.286639142012557, + "grad_norm": 1.995334059756715e-05, + "learning_rate": 1.2147860786077767e-07, + "loss": 0.0, + "num_input_tokens_seen": 118223784, + "step": 175465 + }, + { + "epoch": 4.286761292844404, + "grad_norm": 0.0001449656847398728, + "learning_rate": 1.2143787368949178e-07, + "loss": 0.0, + "num_input_tokens_seen": 118227048, + "step": 175470 + }, + { + "epoch": 4.286883443676252, + "grad_norm": 4.758282011607662e-05, + "learning_rate": 1.2139714590734607e-07, + "loss": 0.0, + "num_input_tokens_seen": 118230056, + "step": 175475 + }, + { + "epoch": 4.2870055945080985, + "grad_norm": 4.159102900302969e-06, + "learning_rate": 1.2135642451463635e-07, + "loss": 0.0, + "num_input_tokens_seen": 118233256, + "step": 175480 + }, + { + "epoch": 4.287127745339946, + "grad_norm": 0.0004711418878287077, + "learning_rate": 1.2131570951165936e-07, + "loss": 0.0, + "num_input_tokens_seen": 118236200, + "step": 175485 + }, + { + "epoch": 4.287249896171793, + "grad_norm": 0.008723760023713112, + "learning_rate": 1.212750008987109e-07, + "loss": 0.0, + "num_input_tokens_seen": 118239208, + "step": 175490 + }, + { + "epoch": 4.2873720470036405, + "grad_norm": 8.196967428375501e-06, + "learning_rate": 1.212342986760867e-07, + "loss": 0.0, + "num_input_tokens_seen": 118242920, + "step": 175495 + }, + { + "epoch": 4.287494197835487, + "grad_norm": 0.005483199842274189, + "learning_rate": 1.211936028440832e-07, + "loss": 0.0, + "num_input_tokens_seen": 118246632, + "step": 175500 + }, + { + "epoch": 4.287616348667335, + "grad_norm": 0.002007837640121579, + "learning_rate": 1.2115291340299604e-07, + "loss": 0.0, + "num_input_tokens_seen": 118249896, + "step": 175505 + }, + { + "epoch": 4.287738499499182, + "grad_norm": 0.00037572745350189507, + "learning_rate": 1.2111223035312136e-07, + "loss": 0.0, + "num_input_tokens_seen": 118253224, + "step": 175510 + }, + { + "epoch": 4.287860650331028, + "grad_norm": 0.00016909983241930604, + "learning_rate": 1.2107155369475496e-07, + "loss": 0.0, + "num_input_tokens_seen": 118256360, + "step": 175515 + }, + { + "epoch": 4.287982801162876, + "grad_norm": 0.00011647819337667897, + "learning_rate": 1.2103088342819256e-07, + "loss": 0.0, + "num_input_tokens_seen": 118259368, + "step": 175520 + }, + { + "epoch": 4.288104951994723, + "grad_norm": 0.0005521110142581165, + "learning_rate": 1.2099021955373013e-07, + "loss": 0.0, + "num_input_tokens_seen": 118262440, + "step": 175525 + }, + { + "epoch": 4.28822710282657, + "grad_norm": 0.001317873946391046, + "learning_rate": 1.2094956207166307e-07, + "loss": 0.0004, + "num_input_tokens_seen": 118265576, + "step": 175530 + }, + { + "epoch": 4.288349253658417, + "grad_norm": 0.0002650999231263995, + "learning_rate": 1.2090891098228739e-07, + "loss": 0.0, + "num_input_tokens_seen": 118269096, + "step": 175535 + }, + { + "epoch": 4.288471404490265, + "grad_norm": 0.0008484512218274176, + "learning_rate": 1.208682662858984e-07, + "loss": 0.0, + "num_input_tokens_seen": 118272616, + "step": 175540 + }, + { + "epoch": 4.2885935553221115, + "grad_norm": 7.143753464333713e-06, + "learning_rate": 1.208276279827919e-07, + "loss": 0.0, + "num_input_tokens_seen": 118276008, + "step": 175545 + }, + { + "epoch": 4.288715706153959, + "grad_norm": 9.948560909833759e-05, + "learning_rate": 1.2078699607326347e-07, + "loss": 0.0, + "num_input_tokens_seen": 118279144, + "step": 175550 + }, + { + "epoch": 4.288837856985806, + "grad_norm": 0.0017997337272390723, + "learning_rate": 1.2074637055760828e-07, + "loss": 0.0, + "num_input_tokens_seen": 118282152, + "step": 175555 + }, + { + "epoch": 4.2889600078176535, + "grad_norm": 4.9702899559633806e-05, + "learning_rate": 1.2070575143612217e-07, + "loss": 0.0, + "num_input_tokens_seen": 118284968, + "step": 175560 + }, + { + "epoch": 4.2890821586495, + "grad_norm": 4.725019607576542e-06, + "learning_rate": 1.2066513870910022e-07, + "loss": 0.0, + "num_input_tokens_seen": 118287912, + "step": 175565 + }, + { + "epoch": 4.289204309481348, + "grad_norm": 9.601834608474746e-05, + "learning_rate": 1.20624532376838e-07, + "loss": 0.0, + "num_input_tokens_seen": 118291112, + "step": 175570 + }, + { + "epoch": 4.289326460313195, + "grad_norm": 4.6068747906247154e-05, + "learning_rate": 1.205839324396305e-07, + "loss": 0.0, + "num_input_tokens_seen": 118294504, + "step": 175575 + }, + { + "epoch": 4.289448611145042, + "grad_norm": 6.677881174255162e-05, + "learning_rate": 1.2054333889777345e-07, + "loss": 0.0, + "num_input_tokens_seen": 118298024, + "step": 175580 + }, + { + "epoch": 4.289570761976889, + "grad_norm": 0.00011366893886588514, + "learning_rate": 1.2050275175156156e-07, + "loss": 0.0, + "num_input_tokens_seen": 118301352, + "step": 175585 + }, + { + "epoch": 4.289692912808736, + "grad_norm": 2.1709463908337057e-05, + "learning_rate": 1.2046217100129042e-07, + "loss": 0.0001, + "num_input_tokens_seen": 118304808, + "step": 175590 + }, + { + "epoch": 4.289815063640583, + "grad_norm": 4.48407263320405e-06, + "learning_rate": 1.2042159664725494e-07, + "loss": 0.0, + "num_input_tokens_seen": 118308072, + "step": 175595 + }, + { + "epoch": 4.28993721447243, + "grad_norm": 0.0007157354848459363, + "learning_rate": 1.2038102868975e-07, + "loss": 0.0, + "num_input_tokens_seen": 118311720, + "step": 175600 + }, + { + "epoch": 4.290059365304278, + "grad_norm": 7.197898867161712e-06, + "learning_rate": 1.2034046712907075e-07, + "loss": 0.0, + "num_input_tokens_seen": 118314984, + "step": 175605 + }, + { + "epoch": 4.2901815161361245, + "grad_norm": 5.3880936320638284e-05, + "learning_rate": 1.2029991196551248e-07, + "loss": 0.0, + "num_input_tokens_seen": 118318056, + "step": 175610 + }, + { + "epoch": 4.290303666967972, + "grad_norm": 0.014211894944310188, + "learning_rate": 1.2025936319936957e-07, + "loss": 0.0, + "num_input_tokens_seen": 118321640, + "step": 175615 + }, + { + "epoch": 4.290425817799819, + "grad_norm": 1.3704505363421049e-05, + "learning_rate": 1.2021882083093748e-07, + "loss": 0.0, + "num_input_tokens_seen": 118324840, + "step": 175620 + }, + { + "epoch": 4.2905479686316665, + "grad_norm": 0.00019162753596901894, + "learning_rate": 1.2017828486051052e-07, + "loss": 0.0, + "num_input_tokens_seen": 118328104, + "step": 175625 + }, + { + "epoch": 4.290670119463513, + "grad_norm": 5.376626722863875e-05, + "learning_rate": 1.2013775528838399e-07, + "loss": 0.0, + "num_input_tokens_seen": 118331496, + "step": 175630 + }, + { + "epoch": 4.290792270295361, + "grad_norm": 5.275745934341103e-05, + "learning_rate": 1.2009723211485212e-07, + "loss": 0.0001, + "num_input_tokens_seen": 118334696, + "step": 175635 + }, + { + "epoch": 4.290914421127208, + "grad_norm": 0.0004111059242859483, + "learning_rate": 1.2005671534020978e-07, + "loss": 0.0, + "num_input_tokens_seen": 118337832, + "step": 175640 + }, + { + "epoch": 4.291036571959055, + "grad_norm": 0.00015165227523539215, + "learning_rate": 1.2001620496475195e-07, + "loss": 0.0, + "num_input_tokens_seen": 118340776, + "step": 175645 + }, + { + "epoch": 4.291158722790902, + "grad_norm": 0.00014834836474619806, + "learning_rate": 1.1997570098877275e-07, + "loss": 0.0, + "num_input_tokens_seen": 118343912, + "step": 175650 + }, + { + "epoch": 4.29128087362275, + "grad_norm": 5.52527817490045e-05, + "learning_rate": 1.1993520341256713e-07, + "loss": 0.0, + "num_input_tokens_seen": 118347432, + "step": 175655 + }, + { + "epoch": 4.291403024454596, + "grad_norm": 0.0016621008981019258, + "learning_rate": 1.1989471223642923e-07, + "loss": 0.0, + "num_input_tokens_seen": 118350312, + "step": 175660 + }, + { + "epoch": 4.291525175286444, + "grad_norm": 0.0002743805234786123, + "learning_rate": 1.1985422746065367e-07, + "loss": 0.0, + "num_input_tokens_seen": 118353832, + "step": 175665 + }, + { + "epoch": 4.291647326118291, + "grad_norm": 1.5842942957533523e-05, + "learning_rate": 1.1981374908553522e-07, + "loss": 0.0, + "num_input_tokens_seen": 118357288, + "step": 175670 + }, + { + "epoch": 4.291769476950138, + "grad_norm": 0.004092568531632423, + "learning_rate": 1.1977327711136754e-07, + "loss": 0.0, + "num_input_tokens_seen": 118360488, + "step": 175675 + }, + { + "epoch": 4.291891627781985, + "grad_norm": 3.0272083677118644e-05, + "learning_rate": 1.1973281153844572e-07, + "loss": 0.0, + "num_input_tokens_seen": 118363816, + "step": 175680 + }, + { + "epoch": 4.292013778613832, + "grad_norm": 0.0012410653289407492, + "learning_rate": 1.1969235236706354e-07, + "loss": 0.0784, + "num_input_tokens_seen": 118367208, + "step": 175685 + }, + { + "epoch": 4.29213592944568, + "grad_norm": 1.507085016783094e-05, + "learning_rate": 1.196518995975152e-07, + "loss": 0.0, + "num_input_tokens_seen": 118370920, + "step": 175690 + }, + { + "epoch": 4.292258080277526, + "grad_norm": 0.0006038264837116003, + "learning_rate": 1.1961145323009526e-07, + "loss": 0.0, + "num_input_tokens_seen": 118374120, + "step": 175695 + }, + { + "epoch": 4.292380231109374, + "grad_norm": 2.109080924128648e-05, + "learning_rate": 1.1957101326509733e-07, + "loss": 0.0, + "num_input_tokens_seen": 118377256, + "step": 175700 + }, + { + "epoch": 4.292502381941221, + "grad_norm": 0.0002546267060097307, + "learning_rate": 1.195305797028161e-07, + "loss": 0.0, + "num_input_tokens_seen": 118380648, + "step": 175705 + }, + { + "epoch": 4.292624532773068, + "grad_norm": 6.967822992010042e-05, + "learning_rate": 1.194901525435451e-07, + "loss": 0.0, + "num_input_tokens_seen": 118383912, + "step": 175710 + }, + { + "epoch": 4.292746683604915, + "grad_norm": 5.2994387260696385e-06, + "learning_rate": 1.1944973178757868e-07, + "loss": 0.0, + "num_input_tokens_seen": 118387304, + "step": 175715 + }, + { + "epoch": 4.292868834436763, + "grad_norm": 2.5682649720693007e-05, + "learning_rate": 1.1940931743521044e-07, + "loss": 0.0, + "num_input_tokens_seen": 118390888, + "step": 175720 + }, + { + "epoch": 4.2929909852686094, + "grad_norm": 0.0005329764098860323, + "learning_rate": 1.1936890948673473e-07, + "loss": 0.0, + "num_input_tokens_seen": 118394536, + "step": 175725 + }, + { + "epoch": 4.293113136100457, + "grad_norm": 7.66331868362613e-05, + "learning_rate": 1.1932850794244497e-07, + "loss": 0.0, + "num_input_tokens_seen": 118397672, + "step": 175730 + }, + { + "epoch": 4.293235286932304, + "grad_norm": 0.00011966860620304942, + "learning_rate": 1.1928811280263517e-07, + "loss": 0.0, + "num_input_tokens_seen": 118401320, + "step": 175735 + }, + { + "epoch": 4.2933574377641515, + "grad_norm": 7.26673097233288e-05, + "learning_rate": 1.192477240675993e-07, + "loss": 0.0, + "num_input_tokens_seen": 118404840, + "step": 175740 + }, + { + "epoch": 4.293479588595998, + "grad_norm": 7.701585855102167e-05, + "learning_rate": 1.192073417376307e-07, + "loss": 0.0, + "num_input_tokens_seen": 118408232, + "step": 175745 + }, + { + "epoch": 4.293601739427846, + "grad_norm": 3.5217308322899044e-05, + "learning_rate": 1.1916696581302344e-07, + "loss": 0.0, + "num_input_tokens_seen": 118411432, + "step": 175750 + }, + { + "epoch": 4.293723890259693, + "grad_norm": 9.866907930700108e-05, + "learning_rate": 1.1912659629407063e-07, + "loss": 0.0, + "num_input_tokens_seen": 118415016, + "step": 175755 + }, + { + "epoch": 4.29384604109154, + "grad_norm": 0.0011521294945850968, + "learning_rate": 1.1908623318106626e-07, + "loss": 0.0, + "num_input_tokens_seen": 118418216, + "step": 175760 + }, + { + "epoch": 4.293968191923387, + "grad_norm": 0.003137110499665141, + "learning_rate": 1.1904587647430386e-07, + "loss": 0.0, + "num_input_tokens_seen": 118422056, + "step": 175765 + }, + { + "epoch": 4.294090342755235, + "grad_norm": 0.0001456877653254196, + "learning_rate": 1.1900552617407655e-07, + "loss": 0.0, + "num_input_tokens_seen": 118425512, + "step": 175770 + }, + { + "epoch": 4.294212493587081, + "grad_norm": 5.808260539197363e-06, + "learning_rate": 1.1896518228067831e-07, + "loss": 0.0, + "num_input_tokens_seen": 118428456, + "step": 175775 + }, + { + "epoch": 4.294334644418928, + "grad_norm": 7.820190148777328e-06, + "learning_rate": 1.189248447944019e-07, + "loss": 0.0, + "num_input_tokens_seen": 118431656, + "step": 175780 + }, + { + "epoch": 4.294456795250776, + "grad_norm": 1.0643769201124087e-05, + "learning_rate": 1.1888451371554132e-07, + "loss": 0.0, + "num_input_tokens_seen": 118435688, + "step": 175785 + }, + { + "epoch": 4.2945789460826225, + "grad_norm": 7.565869054815266e-06, + "learning_rate": 1.188441890443893e-07, + "loss": 0.0, + "num_input_tokens_seen": 118438824, + "step": 175790 + }, + { + "epoch": 4.29470109691447, + "grad_norm": 7.671902494621463e-06, + "learning_rate": 1.1880387078123955e-07, + "loss": 0.0, + "num_input_tokens_seen": 118442152, + "step": 175795 + }, + { + "epoch": 4.294823247746317, + "grad_norm": 0.0015023218002170324, + "learning_rate": 1.1876355892638513e-07, + "loss": 0.0, + "num_input_tokens_seen": 118445288, + "step": 175800 + }, + { + "epoch": 4.2949453985781645, + "grad_norm": 7.464071677532047e-05, + "learning_rate": 1.187232534801188e-07, + "loss": 0.0, + "num_input_tokens_seen": 118448552, + "step": 175805 + }, + { + "epoch": 4.295067549410011, + "grad_norm": 5.0487367843743414e-05, + "learning_rate": 1.1868295444273435e-07, + "loss": 0.0761, + "num_input_tokens_seen": 118451624, + "step": 175810 + }, + { + "epoch": 4.295189700241859, + "grad_norm": 4.657682802644558e-05, + "learning_rate": 1.1864266181452421e-07, + "loss": 0.0, + "num_input_tokens_seen": 118454952, + "step": 175815 + }, + { + "epoch": 4.295311851073706, + "grad_norm": 0.00018549639207776636, + "learning_rate": 1.1860237559578168e-07, + "loss": 0.0, + "num_input_tokens_seen": 118458536, + "step": 175820 + }, + { + "epoch": 4.295434001905553, + "grad_norm": 9.009638597490266e-05, + "learning_rate": 1.1856209578679998e-07, + "loss": 0.0, + "num_input_tokens_seen": 118461864, + "step": 175825 + }, + { + "epoch": 4.2955561527374, + "grad_norm": 0.01003610622137785, + "learning_rate": 1.1852182238787156e-07, + "loss": 0.0, + "num_input_tokens_seen": 118465192, + "step": 175830 + }, + { + "epoch": 4.295678303569248, + "grad_norm": 0.003888308070600033, + "learning_rate": 1.1848155539928972e-07, + "loss": 0.0, + "num_input_tokens_seen": 118468648, + "step": 175835 + }, + { + "epoch": 4.295800454401094, + "grad_norm": 0.00016373286780435592, + "learning_rate": 1.1844129482134702e-07, + "loss": 0.0, + "num_input_tokens_seen": 118472104, + "step": 175840 + }, + { + "epoch": 4.295922605232942, + "grad_norm": 5.8873829402728006e-05, + "learning_rate": 1.1840104065433642e-07, + "loss": 0.0, + "num_input_tokens_seen": 118475368, + "step": 175845 + }, + { + "epoch": 4.296044756064789, + "grad_norm": 0.0008571085636503994, + "learning_rate": 1.183607928985505e-07, + "loss": 0.0, + "num_input_tokens_seen": 118479144, + "step": 175850 + }, + { + "epoch": 4.2961669068966355, + "grad_norm": 1.4165329048410058e-05, + "learning_rate": 1.1832055155428189e-07, + "loss": 0.0, + "num_input_tokens_seen": 118482408, + "step": 175855 + }, + { + "epoch": 4.296289057728483, + "grad_norm": 5.2919556765118614e-05, + "learning_rate": 1.1828031662182358e-07, + "loss": 0.0, + "num_input_tokens_seen": 118485992, + "step": 175860 + }, + { + "epoch": 4.296411208560331, + "grad_norm": 6.386043878592318e-06, + "learning_rate": 1.1824008810146791e-07, + "loss": 0.0, + "num_input_tokens_seen": 118489128, + "step": 175865 + }, + { + "epoch": 4.2965333593921775, + "grad_norm": 2.0044681150466204e-05, + "learning_rate": 1.1819986599350751e-07, + "loss": 0.0, + "num_input_tokens_seen": 118492264, + "step": 175870 + }, + { + "epoch": 4.296655510224024, + "grad_norm": 0.012165850028395653, + "learning_rate": 1.1815965029823471e-07, + "loss": 0.0, + "num_input_tokens_seen": 118495848, + "step": 175875 + }, + { + "epoch": 4.296777661055872, + "grad_norm": 0.0007713138475082815, + "learning_rate": 1.181194410159424e-07, + "loss": 0.0, + "num_input_tokens_seen": 118499176, + "step": 175880 + }, + { + "epoch": 4.296899811887719, + "grad_norm": 6.350491457851604e-05, + "learning_rate": 1.1807923814692244e-07, + "loss": 0.0017, + "num_input_tokens_seen": 118502440, + "step": 175885 + }, + { + "epoch": 4.297021962719566, + "grad_norm": 0.00017253389523830265, + "learning_rate": 1.1803904169146773e-07, + "loss": 0.0, + "num_input_tokens_seen": 118506216, + "step": 175890 + }, + { + "epoch": 4.297144113551413, + "grad_norm": 6.567937816726044e-05, + "learning_rate": 1.179988516498701e-07, + "loss": 0.0, + "num_input_tokens_seen": 118509416, + "step": 175895 + }, + { + "epoch": 4.297266264383261, + "grad_norm": 0.00023607707407791167, + "learning_rate": 1.1795866802242216e-07, + "loss": 0.0346, + "num_input_tokens_seen": 118512488, + "step": 175900 + }, + { + "epoch": 4.297388415215107, + "grad_norm": 0.000546814757399261, + "learning_rate": 1.1791849080941618e-07, + "loss": 0.0, + "num_input_tokens_seen": 118515816, + "step": 175905 + }, + { + "epoch": 4.297510566046955, + "grad_norm": 0.0017294178251177073, + "learning_rate": 1.1787832001114384e-07, + "loss": 0.0, + "num_input_tokens_seen": 118519144, + "step": 175910 + }, + { + "epoch": 4.297632716878802, + "grad_norm": 9.488432988291606e-05, + "learning_rate": 1.1783815562789767e-07, + "loss": 0.0, + "num_input_tokens_seen": 118522792, + "step": 175915 + }, + { + "epoch": 4.297754867710649, + "grad_norm": 0.0001220635895151645, + "learning_rate": 1.1779799765997e-07, + "loss": 0.0, + "num_input_tokens_seen": 118525736, + "step": 175920 + }, + { + "epoch": 4.297877018542496, + "grad_norm": 0.001318514347076416, + "learning_rate": 1.1775784610765227e-07, + "loss": 0.0, + "num_input_tokens_seen": 118529064, + "step": 175925 + }, + { + "epoch": 4.297999169374344, + "grad_norm": 0.0556945838034153, + "learning_rate": 1.1771770097123701e-07, + "loss": 0.0, + "num_input_tokens_seen": 118532392, + "step": 175930 + }, + { + "epoch": 4.2981213202061905, + "grad_norm": 1.180052731797332e-05, + "learning_rate": 1.1767756225101566e-07, + "loss": 0.0, + "num_input_tokens_seen": 118535912, + "step": 175935 + }, + { + "epoch": 4.298243471038038, + "grad_norm": 0.0001700563880149275, + "learning_rate": 1.1763742994728077e-07, + "loss": 0.0, + "num_input_tokens_seen": 118539368, + "step": 175940 + }, + { + "epoch": 4.298365621869885, + "grad_norm": 1.8086886484525166e-05, + "learning_rate": 1.1759730406032342e-07, + "loss": 0.0, + "num_input_tokens_seen": 118542568, + "step": 175945 + }, + { + "epoch": 4.298487772701732, + "grad_norm": 6.41237638774328e-05, + "learning_rate": 1.1755718459043595e-07, + "loss": 0.0, + "num_input_tokens_seen": 118545512, + "step": 175950 + }, + { + "epoch": 4.298609923533579, + "grad_norm": 0.0019423235207796097, + "learning_rate": 1.1751707153791012e-07, + "loss": 0.0, + "num_input_tokens_seen": 118549160, + "step": 175955 + }, + { + "epoch": 4.298732074365426, + "grad_norm": 3.768239912460558e-05, + "learning_rate": 1.1747696490303727e-07, + "loss": 0.0, + "num_input_tokens_seen": 118553000, + "step": 175960 + }, + { + "epoch": 4.298854225197274, + "grad_norm": 0.00021600848413072526, + "learning_rate": 1.1743686468610958e-07, + "loss": 0.0, + "num_input_tokens_seen": 118556968, + "step": 175965 + }, + { + "epoch": 4.29897637602912, + "grad_norm": 0.0007384647615253925, + "learning_rate": 1.1739677088741817e-07, + "loss": 0.0, + "num_input_tokens_seen": 118559976, + "step": 175970 + }, + { + "epoch": 4.299098526860968, + "grad_norm": 0.0001238829572685063, + "learning_rate": 1.1735668350725481e-07, + "loss": 0.0001, + "num_input_tokens_seen": 118563368, + "step": 175975 + }, + { + "epoch": 4.299220677692815, + "grad_norm": 0.00313155772164464, + "learning_rate": 1.1731660254591124e-07, + "loss": 0.0, + "num_input_tokens_seen": 118566824, + "step": 175980 + }, + { + "epoch": 4.299342828524662, + "grad_norm": 0.001018465030938387, + "learning_rate": 1.172765280036786e-07, + "loss": 0.0, + "num_input_tokens_seen": 118569832, + "step": 175985 + }, + { + "epoch": 4.299464979356509, + "grad_norm": 0.03963853418827057, + "learning_rate": 1.1723645988084862e-07, + "loss": 0.0, + "num_input_tokens_seen": 118572968, + "step": 175990 + }, + { + "epoch": 4.299587130188357, + "grad_norm": 1.9976530893472955e-05, + "learning_rate": 1.1719639817771244e-07, + "loss": 0.0, + "num_input_tokens_seen": 118576680, + "step": 175995 + }, + { + "epoch": 4.2997092810202036, + "grad_norm": 0.0008707842789590359, + "learning_rate": 1.1715634289456156e-07, + "loss": 0.0, + "num_input_tokens_seen": 118580456, + "step": 176000 + }, + { + "epoch": 4.299831431852051, + "grad_norm": 0.0005335027817636728, + "learning_rate": 1.1711629403168733e-07, + "loss": 0.0, + "num_input_tokens_seen": 118583272, + "step": 176005 + }, + { + "epoch": 4.299953582683898, + "grad_norm": 0.0005570012144744396, + "learning_rate": 1.1707625158938062e-07, + "loss": 0.0, + "num_input_tokens_seen": 118586984, + "step": 176010 + }, + { + "epoch": 4.300075733515746, + "grad_norm": 9.334905917057768e-05, + "learning_rate": 1.1703621556793308e-07, + "loss": 0.0, + "num_input_tokens_seen": 118590440, + "step": 176015 + }, + { + "epoch": 4.300197884347592, + "grad_norm": 0.00020337030582595617, + "learning_rate": 1.1699618596763549e-07, + "loss": 0.0, + "num_input_tokens_seen": 118593896, + "step": 176020 + }, + { + "epoch": 4.30032003517944, + "grad_norm": 1.1005053238477558e-05, + "learning_rate": 1.1695616278877929e-07, + "loss": 0.0001, + "num_input_tokens_seen": 118597032, + "step": 176025 + }, + { + "epoch": 4.300442186011287, + "grad_norm": 1.5739868103992194e-05, + "learning_rate": 1.1691614603165522e-07, + "loss": 0.0, + "num_input_tokens_seen": 118600552, + "step": 176030 + }, + { + "epoch": 4.300564336843134, + "grad_norm": 7.899448974058032e-05, + "learning_rate": 1.1687613569655464e-07, + "loss": 0.0, + "num_input_tokens_seen": 118604072, + "step": 176035 + }, + { + "epoch": 4.300686487674981, + "grad_norm": 4.880525011685677e-05, + "learning_rate": 1.1683613178376816e-07, + "loss": 0.0, + "num_input_tokens_seen": 118607336, + "step": 176040 + }, + { + "epoch": 4.300808638506828, + "grad_norm": 0.00020250272064004093, + "learning_rate": 1.1679613429358681e-07, + "loss": 0.0, + "num_input_tokens_seen": 118610728, + "step": 176045 + }, + { + "epoch": 4.3009307893386755, + "grad_norm": 0.00022577719937544316, + "learning_rate": 1.1675614322630179e-07, + "loss": 0.0, + "num_input_tokens_seen": 118613736, + "step": 176050 + }, + { + "epoch": 4.301052940170522, + "grad_norm": 1.0512053449929226e-05, + "learning_rate": 1.1671615858220352e-07, + "loss": 0.0, + "num_input_tokens_seen": 118617000, + "step": 176055 + }, + { + "epoch": 4.30117509100237, + "grad_norm": 2.0839153876295313e-05, + "learning_rate": 1.16676180361583e-07, + "loss": 0.0001, + "num_input_tokens_seen": 118620264, + "step": 176060 + }, + { + "epoch": 4.301297241834217, + "grad_norm": 9.713278996059671e-05, + "learning_rate": 1.1663620856473078e-07, + "loss": 0.0, + "num_input_tokens_seen": 118623464, + "step": 176065 + }, + { + "epoch": 4.301419392666064, + "grad_norm": 1.1436190106905997e-05, + "learning_rate": 1.1659624319193751e-07, + "loss": 0.0, + "num_input_tokens_seen": 118627240, + "step": 176070 + }, + { + "epoch": 4.301541543497911, + "grad_norm": 0.00018494624237064272, + "learning_rate": 1.1655628424349428e-07, + "loss": 0.0, + "num_input_tokens_seen": 118630376, + "step": 176075 + }, + { + "epoch": 4.301663694329759, + "grad_norm": 0.00010769705113489181, + "learning_rate": 1.165163317196911e-07, + "loss": 0.0852, + "num_input_tokens_seen": 118633704, + "step": 176080 + }, + { + "epoch": 4.301785845161605, + "grad_norm": 0.01379750669002533, + "learning_rate": 1.1647638562081907e-07, + "loss": 0.0, + "num_input_tokens_seen": 118637352, + "step": 176085 + }, + { + "epoch": 4.301907995993453, + "grad_norm": 0.0006048534414730966, + "learning_rate": 1.1643644594716817e-07, + "loss": 0.0, + "num_input_tokens_seen": 118640616, + "step": 176090 + }, + { + "epoch": 4.3020301468253, + "grad_norm": 0.0024736644700169563, + "learning_rate": 1.1639651269902928e-07, + "loss": 0.0, + "num_input_tokens_seen": 118643944, + "step": 176095 + }, + { + "epoch": 4.302152297657147, + "grad_norm": 8.007245196495205e-05, + "learning_rate": 1.1635658587669239e-07, + "loss": 0.0, + "num_input_tokens_seen": 118647400, + "step": 176100 + }, + { + "epoch": 4.302274448488994, + "grad_norm": 0.000884940498508513, + "learning_rate": 1.1631666548044827e-07, + "loss": 0.0, + "num_input_tokens_seen": 118650664, + "step": 176105 + }, + { + "epoch": 4.302396599320842, + "grad_norm": 9.08693255041726e-05, + "learning_rate": 1.1627675151058703e-07, + "loss": 0.0, + "num_input_tokens_seen": 118653800, + "step": 176110 + }, + { + "epoch": 4.3025187501526885, + "grad_norm": 3.714384729391895e-05, + "learning_rate": 1.1623684396739885e-07, + "loss": 0.0, + "num_input_tokens_seen": 118656872, + "step": 176115 + }, + { + "epoch": 4.302640900984536, + "grad_norm": 5.196634901949437e-06, + "learning_rate": 1.161969428511741e-07, + "loss": 0.0001, + "num_input_tokens_seen": 118660392, + "step": 176120 + }, + { + "epoch": 4.302763051816383, + "grad_norm": 0.003202979452908039, + "learning_rate": 1.1615704816220284e-07, + "loss": 0.0, + "num_input_tokens_seen": 118663656, + "step": 176125 + }, + { + "epoch": 4.3028852026482305, + "grad_norm": 0.000134141169837676, + "learning_rate": 1.1611715990077531e-07, + "loss": 0.0, + "num_input_tokens_seen": 118666856, + "step": 176130 + }, + { + "epoch": 4.303007353480077, + "grad_norm": 0.0003105984069406986, + "learning_rate": 1.1607727806718138e-07, + "loss": 0.0, + "num_input_tokens_seen": 118670184, + "step": 176135 + }, + { + "epoch": 4.303129504311924, + "grad_norm": 0.00014293364074546844, + "learning_rate": 1.1603740266171124e-07, + "loss": 0.0, + "num_input_tokens_seen": 118673384, + "step": 176140 + }, + { + "epoch": 4.303251655143772, + "grad_norm": 0.00013589364243671298, + "learning_rate": 1.1599753368465515e-07, + "loss": 0.0, + "num_input_tokens_seen": 118676776, + "step": 176145 + }, + { + "epoch": 4.303373805975618, + "grad_norm": 0.002318460727110505, + "learning_rate": 1.159576711363025e-07, + "loss": 0.0, + "num_input_tokens_seen": 118679912, + "step": 176150 + }, + { + "epoch": 4.303495956807466, + "grad_norm": 0.000528425385709852, + "learning_rate": 1.1591781501694365e-07, + "loss": 0.0, + "num_input_tokens_seen": 118682984, + "step": 176155 + }, + { + "epoch": 4.303618107639313, + "grad_norm": 5.2127357776043937e-05, + "learning_rate": 1.15877965326868e-07, + "loss": 0.0, + "num_input_tokens_seen": 118686248, + "step": 176160 + }, + { + "epoch": 4.30374025847116, + "grad_norm": 0.00026971526676788926, + "learning_rate": 1.1583812206636556e-07, + "loss": 0.0, + "num_input_tokens_seen": 118689448, + "step": 176165 + }, + { + "epoch": 4.303862409303007, + "grad_norm": 0.0002909989270847291, + "learning_rate": 1.1579828523572632e-07, + "loss": 0.0, + "num_input_tokens_seen": 118692840, + "step": 176170 + }, + { + "epoch": 4.303984560134855, + "grad_norm": 0.0005220604944042861, + "learning_rate": 1.157584548352396e-07, + "loss": 0.0, + "num_input_tokens_seen": 118696040, + "step": 176175 + }, + { + "epoch": 4.3041067109667015, + "grad_norm": 2.770401624729857e-05, + "learning_rate": 1.157186308651955e-07, + "loss": 0.0, + "num_input_tokens_seen": 118699240, + "step": 176180 + }, + { + "epoch": 4.304228861798549, + "grad_norm": 0.010921383276581764, + "learning_rate": 1.1567881332588303e-07, + "loss": 0.0, + "num_input_tokens_seen": 118702376, + "step": 176185 + }, + { + "epoch": 4.304351012630396, + "grad_norm": 1.1668385923258029e-05, + "learning_rate": 1.1563900221759238e-07, + "loss": 0.0, + "num_input_tokens_seen": 118705704, + "step": 176190 + }, + { + "epoch": 4.3044731634622435, + "grad_norm": 7.894026202848181e-05, + "learning_rate": 1.1559919754061253e-07, + "loss": 0.0, + "num_input_tokens_seen": 118709032, + "step": 176195 + }, + { + "epoch": 4.30459531429409, + "grad_norm": 3.2366890081902966e-05, + "learning_rate": 1.155593992952334e-07, + "loss": 0.0, + "num_input_tokens_seen": 118712680, + "step": 176200 + }, + { + "epoch": 4.304717465125938, + "grad_norm": 3.0066912586335093e-05, + "learning_rate": 1.1551960748174405e-07, + "loss": 0.0, + "num_input_tokens_seen": 118716008, + "step": 176205 + }, + { + "epoch": 4.304839615957785, + "grad_norm": 0.00018369973986409605, + "learning_rate": 1.1547982210043417e-07, + "loss": 0.0, + "num_input_tokens_seen": 118719528, + "step": 176210 + }, + { + "epoch": 4.304961766789631, + "grad_norm": 0.0011245689820498228, + "learning_rate": 1.1544004315159284e-07, + "loss": 0.0, + "num_input_tokens_seen": 118723240, + "step": 176215 + }, + { + "epoch": 4.305083917621479, + "grad_norm": 2.8072752684238367e-05, + "learning_rate": 1.1540027063550939e-07, + "loss": 0.0, + "num_input_tokens_seen": 118726248, + "step": 176220 + }, + { + "epoch": 4.305206068453326, + "grad_norm": 7.902841753093526e-05, + "learning_rate": 1.1536050455247304e-07, + "loss": 0.0, + "num_input_tokens_seen": 118729512, + "step": 176225 + }, + { + "epoch": 4.305328219285173, + "grad_norm": 0.00010827074584085494, + "learning_rate": 1.1532074490277321e-07, + "loss": 0.0, + "num_input_tokens_seen": 118732584, + "step": 176230 + }, + { + "epoch": 4.30545037011702, + "grad_norm": 0.0008183018071576953, + "learning_rate": 1.152809916866987e-07, + "loss": 0.0, + "num_input_tokens_seen": 118735656, + "step": 176235 + }, + { + "epoch": 4.305572520948868, + "grad_norm": 0.0002630564267747104, + "learning_rate": 1.152412449045389e-07, + "loss": 0.0, + "num_input_tokens_seen": 118739368, + "step": 176240 + }, + { + "epoch": 4.3056946717807145, + "grad_norm": 0.00021008927433285862, + "learning_rate": 1.1520150455658261e-07, + "loss": 0.0794, + "num_input_tokens_seen": 118742632, + "step": 176245 + }, + { + "epoch": 4.305816822612562, + "grad_norm": 2.9066259230603464e-05, + "learning_rate": 1.1516177064311916e-07, + "loss": 0.0, + "num_input_tokens_seen": 118746280, + "step": 176250 + }, + { + "epoch": 4.305938973444409, + "grad_norm": 0.022015077993273735, + "learning_rate": 1.1512204316443719e-07, + "loss": 0.0, + "num_input_tokens_seen": 118749608, + "step": 176255 + }, + { + "epoch": 4.3060611242762565, + "grad_norm": 0.0004457909963093698, + "learning_rate": 1.1508232212082559e-07, + "loss": 0.0, + "num_input_tokens_seen": 118752936, + "step": 176260 + }, + { + "epoch": 4.306183275108103, + "grad_norm": 5.9687656175810844e-05, + "learning_rate": 1.1504260751257366e-07, + "loss": 0.0, + "num_input_tokens_seen": 118756648, + "step": 176265 + }, + { + "epoch": 4.306305425939951, + "grad_norm": 0.0004831032711081207, + "learning_rate": 1.1500289933996965e-07, + "loss": 0.0, + "num_input_tokens_seen": 118760488, + "step": 176270 + }, + { + "epoch": 4.306427576771798, + "grad_norm": 2.4277231204905547e-05, + "learning_rate": 1.1496319760330276e-07, + "loss": 0.0, + "num_input_tokens_seen": 118763944, + "step": 176275 + }, + { + "epoch": 4.306549727603645, + "grad_norm": 0.0004642781859729439, + "learning_rate": 1.149235023028614e-07, + "loss": 0.0, + "num_input_tokens_seen": 118767272, + "step": 176280 + }, + { + "epoch": 4.306671878435492, + "grad_norm": 0.0004993979819118977, + "learning_rate": 1.1488381343893461e-07, + "loss": 0.0001, + "num_input_tokens_seen": 118771048, + "step": 176285 + }, + { + "epoch": 4.30679402926734, + "grad_norm": 0.0004221405542921275, + "learning_rate": 1.1484413101181057e-07, + "loss": 0.0, + "num_input_tokens_seen": 118774120, + "step": 176290 + }, + { + "epoch": 4.306916180099186, + "grad_norm": 4.720066954178037e-06, + "learning_rate": 1.1480445502177805e-07, + "loss": 0.0, + "num_input_tokens_seen": 118777576, + "step": 176295 + }, + { + "epoch": 4.307038330931034, + "grad_norm": 0.00036887277383357286, + "learning_rate": 1.1476478546912582e-07, + "loss": 0.0, + "num_input_tokens_seen": 118781864, + "step": 176300 + }, + { + "epoch": 4.307160481762881, + "grad_norm": 0.00020793842850252986, + "learning_rate": 1.14725122354142e-07, + "loss": 0.0, + "num_input_tokens_seen": 118785448, + "step": 176305 + }, + { + "epoch": 4.3072826325947275, + "grad_norm": 0.0008742841309867799, + "learning_rate": 1.1468546567711545e-07, + "loss": 0.0, + "num_input_tokens_seen": 118788392, + "step": 176310 + }, + { + "epoch": 4.307404783426575, + "grad_norm": 8.221582538681105e-05, + "learning_rate": 1.1464581543833429e-07, + "loss": 0.0, + "num_input_tokens_seen": 118791464, + "step": 176315 + }, + { + "epoch": 4.307526934258422, + "grad_norm": 0.00010175922216149047, + "learning_rate": 1.1460617163808661e-07, + "loss": 0.0, + "num_input_tokens_seen": 118795432, + "step": 176320 + }, + { + "epoch": 4.30764908509027, + "grad_norm": 0.008732876740396023, + "learning_rate": 1.145665342766613e-07, + "loss": 0.0465, + "num_input_tokens_seen": 118798952, + "step": 176325 + }, + { + "epoch": 4.307771235922116, + "grad_norm": 0.003801588201895356, + "learning_rate": 1.14526903354346e-07, + "loss": 0.0, + "num_input_tokens_seen": 118801960, + "step": 176330 + }, + { + "epoch": 4.307893386753964, + "grad_norm": 0.0003274581686127931, + "learning_rate": 1.1448727887142951e-07, + "loss": 0.0, + "num_input_tokens_seen": 118805288, + "step": 176335 + }, + { + "epoch": 4.308015537585811, + "grad_norm": 2.4809673050185665e-05, + "learning_rate": 1.1444766082819945e-07, + "loss": 0.0, + "num_input_tokens_seen": 118808488, + "step": 176340 + }, + { + "epoch": 4.308137688417658, + "grad_norm": 0.0007116202614270151, + "learning_rate": 1.144080492249444e-07, + "loss": 0.0, + "num_input_tokens_seen": 118811624, + "step": 176345 + }, + { + "epoch": 4.308259839249505, + "grad_norm": 6.148311513243243e-05, + "learning_rate": 1.1436844406195211e-07, + "loss": 0.0, + "num_input_tokens_seen": 118814824, + "step": 176350 + }, + { + "epoch": 4.308381990081353, + "grad_norm": 0.000994387548416853, + "learning_rate": 1.1432884533951059e-07, + "loss": 0.0213, + "num_input_tokens_seen": 118817896, + "step": 176355 + }, + { + "epoch": 4.308504140913199, + "grad_norm": 0.0012195755261927843, + "learning_rate": 1.1428925305790815e-07, + "loss": 0.0, + "num_input_tokens_seen": 118821416, + "step": 176360 + }, + { + "epoch": 4.308626291745047, + "grad_norm": 0.0007472949801012874, + "learning_rate": 1.1424966721743224e-07, + "loss": 0.0, + "num_input_tokens_seen": 118824808, + "step": 176365 + }, + { + "epoch": 4.308748442576894, + "grad_norm": 0.001678556320257485, + "learning_rate": 1.1421008781837127e-07, + "loss": 0.0, + "num_input_tokens_seen": 118828392, + "step": 176370 + }, + { + "epoch": 4.3088705934087415, + "grad_norm": 0.00041926439735107124, + "learning_rate": 1.1417051486101248e-07, + "loss": 0.0, + "num_input_tokens_seen": 118831656, + "step": 176375 + }, + { + "epoch": 4.308992744240588, + "grad_norm": 6.319615931715816e-05, + "learning_rate": 1.1413094834564408e-07, + "loss": 0.0, + "num_input_tokens_seen": 118835048, + "step": 176380 + }, + { + "epoch": 4.309114895072436, + "grad_norm": 8.652818360133097e-05, + "learning_rate": 1.1409138827255382e-07, + "loss": 0.0, + "num_input_tokens_seen": 118838376, + "step": 176385 + }, + { + "epoch": 4.309237045904283, + "grad_norm": 0.00018144700152333826, + "learning_rate": 1.1405183464202916e-07, + "loss": 0.0, + "num_input_tokens_seen": 118841512, + "step": 176390 + }, + { + "epoch": 4.30935919673613, + "grad_norm": 0.0005828774883411825, + "learning_rate": 1.1401228745435799e-07, + "loss": 0.0, + "num_input_tokens_seen": 118844648, + "step": 176395 + }, + { + "epoch": 4.309481347567977, + "grad_norm": 6.49345474812435e-06, + "learning_rate": 1.1397274670982748e-07, + "loss": 0.0, + "num_input_tokens_seen": 118847848, + "step": 176400 + }, + { + "epoch": 4.309603498399824, + "grad_norm": 0.004258220084011555, + "learning_rate": 1.1393321240872578e-07, + "loss": 0.0, + "num_input_tokens_seen": 118850984, + "step": 176405 + }, + { + "epoch": 4.309725649231671, + "grad_norm": 0.00043068305240012705, + "learning_rate": 1.1389368455133985e-07, + "loss": 0.0, + "num_input_tokens_seen": 118854568, + "step": 176410 + }, + { + "epoch": 4.309847800063518, + "grad_norm": 0.0021215358283370733, + "learning_rate": 1.138541631379576e-07, + "loss": 0.0, + "num_input_tokens_seen": 118858088, + "step": 176415 + }, + { + "epoch": 4.309969950895366, + "grad_norm": 1.3441069313557819e-05, + "learning_rate": 1.138146481688662e-07, + "loss": 0.0, + "num_input_tokens_seen": 118861416, + "step": 176420 + }, + { + "epoch": 4.3100921017272125, + "grad_norm": 4.286822149879299e-05, + "learning_rate": 1.1377513964435292e-07, + "loss": 0.0, + "num_input_tokens_seen": 118865064, + "step": 176425 + }, + { + "epoch": 4.31021425255906, + "grad_norm": 0.0008951426716521382, + "learning_rate": 1.1373563756470527e-07, + "loss": 0.0, + "num_input_tokens_seen": 118868520, + "step": 176430 + }, + { + "epoch": 4.310336403390907, + "grad_norm": 0.0001404430076945573, + "learning_rate": 1.1369614193021027e-07, + "loss": 0.0, + "num_input_tokens_seen": 118871720, + "step": 176435 + }, + { + "epoch": 4.3104585542227545, + "grad_norm": 0.00013012201816309243, + "learning_rate": 1.1365665274115554e-07, + "loss": 0.0, + "num_input_tokens_seen": 118874792, + "step": 176440 + }, + { + "epoch": 4.310580705054601, + "grad_norm": 0.00011794355668826029, + "learning_rate": 1.1361716999782778e-07, + "loss": 0.0, + "num_input_tokens_seen": 118878248, + "step": 176445 + }, + { + "epoch": 4.310702855886449, + "grad_norm": 0.0004219324328005314, + "learning_rate": 1.135776937005144e-07, + "loss": 0.0, + "num_input_tokens_seen": 118881448, + "step": 176450 + }, + { + "epoch": 4.310825006718296, + "grad_norm": 0.0328974686563015, + "learning_rate": 1.1353822384950263e-07, + "loss": 0.0, + "num_input_tokens_seen": 118885096, + "step": 176455 + }, + { + "epoch": 4.310947157550143, + "grad_norm": 4.000966146122664e-05, + "learning_rate": 1.1349876044507922e-07, + "loss": 0.0, + "num_input_tokens_seen": 118888616, + "step": 176460 + }, + { + "epoch": 4.31106930838199, + "grad_norm": 0.00020574162772390991, + "learning_rate": 1.134593034875313e-07, + "loss": 0.0, + "num_input_tokens_seen": 118892072, + "step": 176465 + }, + { + "epoch": 4.311191459213838, + "grad_norm": 0.0006144284270703793, + "learning_rate": 1.1341985297714573e-07, + "loss": 0.0001, + "num_input_tokens_seen": 118895272, + "step": 176470 + }, + { + "epoch": 4.311313610045684, + "grad_norm": 5.5516093198093586e-06, + "learning_rate": 1.1338040891420941e-07, + "loss": 0.0, + "num_input_tokens_seen": 118898920, + "step": 176475 + }, + { + "epoch": 4.311435760877531, + "grad_norm": 3.77788492187392e-05, + "learning_rate": 1.1334097129900932e-07, + "loss": 0.0, + "num_input_tokens_seen": 118902568, + "step": 176480 + }, + { + "epoch": 4.311557911709379, + "grad_norm": 0.00012832036009058356, + "learning_rate": 1.1330154013183213e-07, + "loss": 0.0, + "num_input_tokens_seen": 118906024, + "step": 176485 + }, + { + "epoch": 4.311680062541226, + "grad_norm": 8.782917575445026e-05, + "learning_rate": 1.1326211541296471e-07, + "loss": 0.0, + "num_input_tokens_seen": 118909288, + "step": 176490 + }, + { + "epoch": 4.311802213373073, + "grad_norm": 5.389552461565472e-05, + "learning_rate": 1.1322269714269361e-07, + "loss": 0.0, + "num_input_tokens_seen": 118912360, + "step": 176495 + }, + { + "epoch": 4.31192436420492, + "grad_norm": 0.00011952647037105635, + "learning_rate": 1.1318328532130561e-07, + "loss": 0.0, + "num_input_tokens_seen": 118915496, + "step": 176500 + }, + { + "epoch": 4.3120465150367675, + "grad_norm": 0.0003824408631771803, + "learning_rate": 1.1314387994908726e-07, + "loss": 0.0, + "num_input_tokens_seen": 118918632, + "step": 176505 + }, + { + "epoch": 4.312168665868614, + "grad_norm": 4.39748982898891e-05, + "learning_rate": 1.1310448102632519e-07, + "loss": 0.0, + "num_input_tokens_seen": 118921640, + "step": 176510 + }, + { + "epoch": 4.312290816700462, + "grad_norm": 8.625312148069497e-06, + "learning_rate": 1.1306508855330576e-07, + "loss": 0.0, + "num_input_tokens_seen": 118925224, + "step": 176515 + }, + { + "epoch": 4.312412967532309, + "grad_norm": 2.4011906134546734e-05, + "learning_rate": 1.1302570253031573e-07, + "loss": 0.0, + "num_input_tokens_seen": 118928360, + "step": 176520 + }, + { + "epoch": 4.312535118364156, + "grad_norm": 0.0008898421074263752, + "learning_rate": 1.1298632295764143e-07, + "loss": 0.0, + "num_input_tokens_seen": 118931432, + "step": 176525 + }, + { + "epoch": 4.312657269196003, + "grad_norm": 6.401252176146954e-05, + "learning_rate": 1.1294694983556896e-07, + "loss": 0.0, + "num_input_tokens_seen": 118934824, + "step": 176530 + }, + { + "epoch": 4.312779420027851, + "grad_norm": 2.1623593056574464e-05, + "learning_rate": 1.1290758316438476e-07, + "loss": 0.0, + "num_input_tokens_seen": 118937832, + "step": 176535 + }, + { + "epoch": 4.312901570859697, + "grad_norm": 0.00032035564072430134, + "learning_rate": 1.1286822294437548e-07, + "loss": 0.0, + "num_input_tokens_seen": 118940904, + "step": 176540 + }, + { + "epoch": 4.313023721691545, + "grad_norm": 0.010348553769290447, + "learning_rate": 1.128288691758269e-07, + "loss": 0.0, + "num_input_tokens_seen": 118944552, + "step": 176545 + }, + { + "epoch": 4.313145872523392, + "grad_norm": 6.47408960503526e-05, + "learning_rate": 1.1278952185902557e-07, + "loss": 0.0001, + "num_input_tokens_seen": 118948136, + "step": 176550 + }, + { + "epoch": 4.313268023355239, + "grad_norm": 0.08188897371292114, + "learning_rate": 1.1275018099425738e-07, + "loss": 0.0, + "num_input_tokens_seen": 118951656, + "step": 176555 + }, + { + "epoch": 4.313390174187086, + "grad_norm": 0.013791157864034176, + "learning_rate": 1.1271084658180862e-07, + "loss": 0.0203, + "num_input_tokens_seen": 118954856, + "step": 176560 + }, + { + "epoch": 4.313512325018934, + "grad_norm": 0.0006009989883750677, + "learning_rate": 1.1267151862196501e-07, + "loss": 0.0, + "num_input_tokens_seen": 118958056, + "step": 176565 + }, + { + "epoch": 4.3136344758507805, + "grad_norm": 0.0009822046849876642, + "learning_rate": 1.1263219711501282e-07, + "loss": 0.0002, + "num_input_tokens_seen": 118961448, + "step": 176570 + }, + { + "epoch": 4.313756626682627, + "grad_norm": 0.00011430822632974014, + "learning_rate": 1.1259288206123818e-07, + "loss": 0.0, + "num_input_tokens_seen": 118965416, + "step": 176575 + }, + { + "epoch": 4.313878777514475, + "grad_norm": 37.28138732910156, + "learning_rate": 1.1255357346092653e-07, + "loss": 0.0641, + "num_input_tokens_seen": 118968552, + "step": 176580 + }, + { + "epoch": 4.314000928346322, + "grad_norm": 0.00012667212286032736, + "learning_rate": 1.125142713143642e-07, + "loss": 0.0, + "num_input_tokens_seen": 118971880, + "step": 176585 + }, + { + "epoch": 4.314123079178169, + "grad_norm": 0.0009236466721631587, + "learning_rate": 1.1247497562183661e-07, + "loss": 0.0, + "num_input_tokens_seen": 118975592, + "step": 176590 + }, + { + "epoch": 4.314245230010016, + "grad_norm": 0.0015617778990417719, + "learning_rate": 1.1243568638362988e-07, + "loss": 0.0001, + "num_input_tokens_seen": 118979432, + "step": 176595 + }, + { + "epoch": 4.314367380841864, + "grad_norm": 0.00024131375539582223, + "learning_rate": 1.1239640360002945e-07, + "loss": 0.0, + "num_input_tokens_seen": 118982952, + "step": 176600 + }, + { + "epoch": 4.31448953167371, + "grad_norm": 0.0003638735506683588, + "learning_rate": 1.1235712727132107e-07, + "loss": 0.0, + "num_input_tokens_seen": 118986024, + "step": 176605 + }, + { + "epoch": 4.314611682505558, + "grad_norm": 0.00020771405252162367, + "learning_rate": 1.1231785739779065e-07, + "loss": 0.0, + "num_input_tokens_seen": 118988904, + "step": 176610 + }, + { + "epoch": 4.314733833337405, + "grad_norm": 5.820173100801185e-06, + "learning_rate": 1.1227859397972328e-07, + "loss": 0.0, + "num_input_tokens_seen": 118992232, + "step": 176615 + }, + { + "epoch": 4.314855984169252, + "grad_norm": 0.0002828043943736702, + "learning_rate": 1.1223933701740484e-07, + "loss": 0.0, + "num_input_tokens_seen": 118995432, + "step": 176620 + }, + { + "epoch": 4.314978135001099, + "grad_norm": 4.8877256631385535e-05, + "learning_rate": 1.1220008651112089e-07, + "loss": 0.0, + "num_input_tokens_seen": 118998760, + "step": 176625 + }, + { + "epoch": 4.315100285832947, + "grad_norm": 1.4099057807470672e-05, + "learning_rate": 1.1216084246115642e-07, + "loss": 0.0, + "num_input_tokens_seen": 119002216, + "step": 176630 + }, + { + "epoch": 4.3152224366647935, + "grad_norm": 0.0008657033322378993, + "learning_rate": 1.1212160486779732e-07, + "loss": 0.0002, + "num_input_tokens_seen": 119005288, + "step": 176635 + }, + { + "epoch": 4.315344587496641, + "grad_norm": 4.1478779166936874e-05, + "learning_rate": 1.1208237373132845e-07, + "loss": 0.0, + "num_input_tokens_seen": 119008552, + "step": 176640 + }, + { + "epoch": 4.315466738328488, + "grad_norm": 6.402322469512001e-05, + "learning_rate": 1.1204314905203571e-07, + "loss": 0.0, + "num_input_tokens_seen": 119011880, + "step": 176645 + }, + { + "epoch": 4.315588889160336, + "grad_norm": 8.852753671817482e-05, + "learning_rate": 1.1200393083020376e-07, + "loss": 0.0, + "num_input_tokens_seen": 119015336, + "step": 176650 + }, + { + "epoch": 4.315711039992182, + "grad_norm": 3.6001690659759333e-06, + "learning_rate": 1.1196471906611826e-07, + "loss": 0.0, + "num_input_tokens_seen": 119018728, + "step": 176655 + }, + { + "epoch": 4.31583319082403, + "grad_norm": 1.8246606487082317e-05, + "learning_rate": 1.1192551376006398e-07, + "loss": 0.0, + "num_input_tokens_seen": 119022056, + "step": 176660 + }, + { + "epoch": 4.315955341655877, + "grad_norm": 4.046333197038621e-05, + "learning_rate": 1.1188631491232626e-07, + "loss": 0.0, + "num_input_tokens_seen": 119025448, + "step": 176665 + }, + { + "epoch": 4.316077492487723, + "grad_norm": 0.0010642303386703134, + "learning_rate": 1.1184712252319028e-07, + "loss": 0.0, + "num_input_tokens_seen": 119029544, + "step": 176670 + }, + { + "epoch": 4.316199643319571, + "grad_norm": 0.0003580785123631358, + "learning_rate": 1.1180793659294074e-07, + "loss": 0.0, + "num_input_tokens_seen": 119032808, + "step": 176675 + }, + { + "epoch": 4.316321794151418, + "grad_norm": 0.00015057233395054936, + "learning_rate": 1.1176875712186295e-07, + "loss": 0.0, + "num_input_tokens_seen": 119036328, + "step": 176680 + }, + { + "epoch": 4.3164439449832654, + "grad_norm": 6.504006159957498e-05, + "learning_rate": 1.1172958411024147e-07, + "loss": 0.0029, + "num_input_tokens_seen": 119039848, + "step": 176685 + }, + { + "epoch": 4.316566095815112, + "grad_norm": 0.0016117613995447755, + "learning_rate": 1.116904175583614e-07, + "loss": 0.0, + "num_input_tokens_seen": 119042984, + "step": 176690 + }, + { + "epoch": 4.31668824664696, + "grad_norm": 6.902215318405069e-06, + "learning_rate": 1.1165125746650771e-07, + "loss": 0.0143, + "num_input_tokens_seen": 119046760, + "step": 176695 + }, + { + "epoch": 4.316810397478807, + "grad_norm": 0.3850659430027008, + "learning_rate": 1.1161210383496478e-07, + "loss": 0.0001, + "num_input_tokens_seen": 119049960, + "step": 176700 + }, + { + "epoch": 4.316932548310654, + "grad_norm": 0.012357803992927074, + "learning_rate": 1.1157295666401789e-07, + "loss": 0.0, + "num_input_tokens_seen": 119052968, + "step": 176705 + }, + { + "epoch": 4.317054699142501, + "grad_norm": 0.00010526581172598526, + "learning_rate": 1.1153381595395117e-07, + "loss": 0.0, + "num_input_tokens_seen": 119056232, + "step": 176710 + }, + { + "epoch": 4.317176849974349, + "grad_norm": 2.6836085453396663e-05, + "learning_rate": 1.114946817050496e-07, + "loss": 0.0169, + "num_input_tokens_seen": 119059880, + "step": 176715 + }, + { + "epoch": 4.317299000806195, + "grad_norm": 0.000445863523054868, + "learning_rate": 1.1145555391759764e-07, + "loss": 0.0, + "num_input_tokens_seen": 119062952, + "step": 176720 + }, + { + "epoch": 4.317421151638043, + "grad_norm": 0.0002292140416102484, + "learning_rate": 1.1141643259187994e-07, + "loss": 0.0, + "num_input_tokens_seen": 119066088, + "step": 176725 + }, + { + "epoch": 4.31754330246989, + "grad_norm": 0.000498281151521951, + "learning_rate": 1.1137731772818105e-07, + "loss": 0.0, + "num_input_tokens_seen": 119069480, + "step": 176730 + }, + { + "epoch": 4.317665453301737, + "grad_norm": 0.00032792615820653737, + "learning_rate": 1.1133820932678506e-07, + "loss": 0.0, + "num_input_tokens_seen": 119072680, + "step": 176735 + }, + { + "epoch": 4.317787604133584, + "grad_norm": 0.00145147112198174, + "learning_rate": 1.1129910738797688e-07, + "loss": 0.0, + "num_input_tokens_seen": 119076200, + "step": 176740 + }, + { + "epoch": 4.317909754965432, + "grad_norm": 6.6974112087336835e-06, + "learning_rate": 1.1126001191204038e-07, + "loss": 0.0, + "num_input_tokens_seen": 119079016, + "step": 176745 + }, + { + "epoch": 4.3180319057972785, + "grad_norm": 0.0011043348349630833, + "learning_rate": 1.1122092289926033e-07, + "loss": 0.0, + "num_input_tokens_seen": 119082280, + "step": 176750 + }, + { + "epoch": 4.318154056629126, + "grad_norm": 0.00018441988504491746, + "learning_rate": 1.1118184034992062e-07, + "loss": 0.0, + "num_input_tokens_seen": 119085544, + "step": 176755 + }, + { + "epoch": 4.318276207460973, + "grad_norm": 9.656700422056019e-05, + "learning_rate": 1.1114276426430558e-07, + "loss": 0.0, + "num_input_tokens_seen": 119088616, + "step": 176760 + }, + { + "epoch": 4.31839835829282, + "grad_norm": 0.004439180716872215, + "learning_rate": 1.1110369464269964e-07, + "loss": 0.0001, + "num_input_tokens_seen": 119092776, + "step": 176765 + }, + { + "epoch": 4.318520509124667, + "grad_norm": 1.4901905160513707e-05, + "learning_rate": 1.1106463148538659e-07, + "loss": 0.0, + "num_input_tokens_seen": 119096616, + "step": 176770 + }, + { + "epoch": 4.318642659956514, + "grad_norm": 0.001198622863739729, + "learning_rate": 1.1102557479265074e-07, + "loss": 0.0, + "num_input_tokens_seen": 119100264, + "step": 176775 + }, + { + "epoch": 4.318764810788362, + "grad_norm": 0.0009047950152307749, + "learning_rate": 1.1098652456477586e-07, + "loss": 0.0, + "num_input_tokens_seen": 119103464, + "step": 176780 + }, + { + "epoch": 4.318886961620208, + "grad_norm": 5.245099964668043e-05, + "learning_rate": 1.1094748080204608e-07, + "loss": 0.0, + "num_input_tokens_seen": 119106472, + "step": 176785 + }, + { + "epoch": 4.319009112452056, + "grad_norm": 3.678392749861814e-05, + "learning_rate": 1.1090844350474559e-07, + "loss": 0.0, + "num_input_tokens_seen": 119109736, + "step": 176790 + }, + { + "epoch": 4.319131263283903, + "grad_norm": 3.22956548188813e-05, + "learning_rate": 1.1086941267315775e-07, + "loss": 0.0, + "num_input_tokens_seen": 119112936, + "step": 176795 + }, + { + "epoch": 4.31925341411575, + "grad_norm": 39.47646713256836, + "learning_rate": 1.1083038830756697e-07, + "loss": 0.0632, + "num_input_tokens_seen": 119116264, + "step": 176800 + }, + { + "epoch": 4.319375564947597, + "grad_norm": 0.0001879289629869163, + "learning_rate": 1.1079137040825648e-07, + "loss": 0.0, + "num_input_tokens_seen": 119119528, + "step": 176805 + }, + { + "epoch": 4.319497715779445, + "grad_norm": 0.0004902026848867536, + "learning_rate": 1.107523589755105e-07, + "loss": 0.0595, + "num_input_tokens_seen": 119122920, + "step": 176810 + }, + { + "epoch": 4.3196198666112915, + "grad_norm": 0.000874120625667274, + "learning_rate": 1.1071335400961245e-07, + "loss": 0.0, + "num_input_tokens_seen": 119126376, + "step": 176815 + }, + { + "epoch": 4.319742017443139, + "grad_norm": 0.0103354062885046, + "learning_rate": 1.1067435551084625e-07, + "loss": 0.0, + "num_input_tokens_seen": 119129448, + "step": 176820 + }, + { + "epoch": 4.319864168274986, + "grad_norm": 0.00011349384294589981, + "learning_rate": 1.1063536347949509e-07, + "loss": 0.0, + "num_input_tokens_seen": 119133096, + "step": 176825 + }, + { + "epoch": 4.3199863191068335, + "grad_norm": 0.0028983631636947393, + "learning_rate": 1.1059637791584298e-07, + "loss": 0.0, + "num_input_tokens_seen": 119136104, + "step": 176830 + }, + { + "epoch": 4.32010846993868, + "grad_norm": 0.00027363133267499506, + "learning_rate": 1.1055739882017323e-07, + "loss": 0.0, + "num_input_tokens_seen": 119139432, + "step": 176835 + }, + { + "epoch": 4.320230620770527, + "grad_norm": 0.0002914174401666969, + "learning_rate": 1.1051842619276918e-07, + "loss": 0.0, + "num_input_tokens_seen": 119142760, + "step": 176840 + }, + { + "epoch": 4.320352771602375, + "grad_norm": 0.00014641509915236384, + "learning_rate": 1.104794600339145e-07, + "loss": 0.0, + "num_input_tokens_seen": 119145960, + "step": 176845 + }, + { + "epoch": 4.320474922434221, + "grad_norm": 62.26630783081055, + "learning_rate": 1.104405003438923e-07, + "loss": 0.0638, + "num_input_tokens_seen": 119149032, + "step": 176850 + }, + { + "epoch": 4.320597073266069, + "grad_norm": 3.0409164537559263e-05, + "learning_rate": 1.1040154712298599e-07, + "loss": 0.0, + "num_input_tokens_seen": 119152552, + "step": 176855 + }, + { + "epoch": 4.320719224097916, + "grad_norm": 0.00017086489242501557, + "learning_rate": 1.1036260037147915e-07, + "loss": 0.0, + "num_input_tokens_seen": 119155752, + "step": 176860 + }, + { + "epoch": 4.320841374929763, + "grad_norm": 0.0013268385082483292, + "learning_rate": 1.1032366008965455e-07, + "loss": 0.0, + "num_input_tokens_seen": 119158888, + "step": 176865 + }, + { + "epoch": 4.32096352576161, + "grad_norm": 0.0006828519399277866, + "learning_rate": 1.1028472627779573e-07, + "loss": 0.0, + "num_input_tokens_seen": 119162088, + "step": 176870 + }, + { + "epoch": 4.321085676593458, + "grad_norm": 0.0033956593833863735, + "learning_rate": 1.1024579893618547e-07, + "loss": 0.0, + "num_input_tokens_seen": 119165032, + "step": 176875 + }, + { + "epoch": 4.3212078274253045, + "grad_norm": 5.517356839845888e-05, + "learning_rate": 1.102068780651072e-07, + "loss": 0.0, + "num_input_tokens_seen": 119168424, + "step": 176880 + }, + { + "epoch": 4.321329978257152, + "grad_norm": 8.954934855864849e-06, + "learning_rate": 1.1016796366484394e-07, + "loss": 0.0, + "num_input_tokens_seen": 119171560, + "step": 176885 + }, + { + "epoch": 4.321452129088999, + "grad_norm": 5.516711826203391e-05, + "learning_rate": 1.1012905573567843e-07, + "loss": 0.0, + "num_input_tokens_seen": 119174888, + "step": 176890 + }, + { + "epoch": 4.3215742799208465, + "grad_norm": 0.00040915783029049635, + "learning_rate": 1.1009015427789393e-07, + "loss": 0.0, + "num_input_tokens_seen": 119178536, + "step": 176895 + }, + { + "epoch": 4.321696430752693, + "grad_norm": 2.283164212713018e-05, + "learning_rate": 1.1005125929177306e-07, + "loss": 0.0, + "num_input_tokens_seen": 119181800, + "step": 176900 + }, + { + "epoch": 4.321818581584541, + "grad_norm": 0.000558803731109947, + "learning_rate": 1.1001237077759895e-07, + "loss": 0.0, + "num_input_tokens_seen": 119184808, + "step": 176905 + }, + { + "epoch": 4.321940732416388, + "grad_norm": 6.331290933303535e-06, + "learning_rate": 1.0997348873565404e-07, + "loss": 0.0, + "num_input_tokens_seen": 119188264, + "step": 176910 + }, + { + "epoch": 4.322062883248235, + "grad_norm": 0.0001938117202371359, + "learning_rate": 1.0993461316622132e-07, + "loss": 0.0, + "num_input_tokens_seen": 119191656, + "step": 176915 + }, + { + "epoch": 4.322185034080082, + "grad_norm": 2.994046917592641e-05, + "learning_rate": 1.0989574406958368e-07, + "loss": 0.0, + "num_input_tokens_seen": 119194856, + "step": 176920 + }, + { + "epoch": 4.32230718491193, + "grad_norm": 8.466270628559869e-06, + "learning_rate": 1.0985688144602346e-07, + "loss": 0.0, + "num_input_tokens_seen": 119198760, + "step": 176925 + }, + { + "epoch": 4.322429335743776, + "grad_norm": 0.000782399030867964, + "learning_rate": 1.0981802529582362e-07, + "loss": 0.0, + "num_input_tokens_seen": 119202280, + "step": 176930 + }, + { + "epoch": 4.322551486575623, + "grad_norm": 0.00056214421056211, + "learning_rate": 1.0977917561926642e-07, + "loss": 0.0, + "num_input_tokens_seen": 119205544, + "step": 176935 + }, + { + "epoch": 4.322673637407471, + "grad_norm": 0.004536370746791363, + "learning_rate": 1.0974033241663439e-07, + "loss": 0.0, + "num_input_tokens_seen": 119208808, + "step": 176940 + }, + { + "epoch": 4.3227957882393175, + "grad_norm": 0.0001978017098736018, + "learning_rate": 1.097014956882103e-07, + "loss": 0.0, + "num_input_tokens_seen": 119212264, + "step": 176945 + }, + { + "epoch": 4.322917939071165, + "grad_norm": 0.0022211032919585705, + "learning_rate": 1.0966266543427616e-07, + "loss": 0.0, + "num_input_tokens_seen": 119215976, + "step": 176950 + }, + { + "epoch": 4.323040089903012, + "grad_norm": 0.00013491483696270734, + "learning_rate": 1.0962384165511485e-07, + "loss": 0.0, + "num_input_tokens_seen": 119219560, + "step": 176955 + }, + { + "epoch": 4.3231622407348596, + "grad_norm": 4.585308852256276e-05, + "learning_rate": 1.0958502435100814e-07, + "loss": 0.0, + "num_input_tokens_seen": 119222952, + "step": 176960 + }, + { + "epoch": 4.323284391566706, + "grad_norm": 5.633817636407912e-05, + "learning_rate": 1.0954621352223892e-07, + "loss": 0.0, + "num_input_tokens_seen": 119226472, + "step": 176965 + }, + { + "epoch": 4.323406542398554, + "grad_norm": 0.00012583695934154093, + "learning_rate": 1.0950740916908896e-07, + "loss": 0.0, + "num_input_tokens_seen": 119229544, + "step": 176970 + }, + { + "epoch": 4.323528693230401, + "grad_norm": 0.0007696277461946011, + "learning_rate": 1.0946861129184048e-07, + "loss": 0.0, + "num_input_tokens_seen": 119232808, + "step": 176975 + }, + { + "epoch": 4.323650844062248, + "grad_norm": 0.0008008855511434376, + "learning_rate": 1.0942981989077615e-07, + "loss": 0.0, + "num_input_tokens_seen": 119235880, + "step": 176980 + }, + { + "epoch": 4.323772994894095, + "grad_norm": 1.950888690771535e-05, + "learning_rate": 1.093910349661774e-07, + "loss": 0.0, + "num_input_tokens_seen": 119239336, + "step": 176985 + }, + { + "epoch": 4.323895145725943, + "grad_norm": 0.0029932560864835978, + "learning_rate": 1.0935225651832691e-07, + "loss": 0.0, + "num_input_tokens_seen": 119242984, + "step": 176990 + }, + { + "epoch": 4.324017296557789, + "grad_norm": 0.04415324330329895, + "learning_rate": 1.0931348454750599e-07, + "loss": 0.0, + "num_input_tokens_seen": 119245992, + "step": 176995 + }, + { + "epoch": 4.324139447389637, + "grad_norm": 0.0006158613250590861, + "learning_rate": 1.0927471905399732e-07, + "loss": 0.0, + "num_input_tokens_seen": 119249192, + "step": 177000 + }, + { + "epoch": 4.324261598221484, + "grad_norm": 0.0008558622212149203, + "learning_rate": 1.0923596003808222e-07, + "loss": 0.0, + "num_input_tokens_seen": 119252264, + "step": 177005 + }, + { + "epoch": 4.3243837490533314, + "grad_norm": 8.662411710247397e-05, + "learning_rate": 1.091972075000428e-07, + "loss": 0.0, + "num_input_tokens_seen": 119255464, + "step": 177010 + }, + { + "epoch": 4.324505899885178, + "grad_norm": 2.8647489671129733e-05, + "learning_rate": 1.0915846144016117e-07, + "loss": 0.0, + "num_input_tokens_seen": 119259432, + "step": 177015 + }, + { + "epoch": 4.324628050717026, + "grad_norm": 0.000773418927565217, + "learning_rate": 1.0911972185871842e-07, + "loss": 0.0, + "num_input_tokens_seen": 119262632, + "step": 177020 + }, + { + "epoch": 4.324750201548873, + "grad_norm": 0.00012162028724560514, + "learning_rate": 1.0908098875599703e-07, + "loss": 0.0, + "num_input_tokens_seen": 119266280, + "step": 177025 + }, + { + "epoch": 4.324872352380719, + "grad_norm": 0.0016422343906015158, + "learning_rate": 1.0904226213227807e-07, + "loss": 0.0, + "num_input_tokens_seen": 119269352, + "step": 177030 + }, + { + "epoch": 4.324994503212567, + "grad_norm": 0.0027196877636015415, + "learning_rate": 1.0900354198784367e-07, + "loss": 0.0, + "num_input_tokens_seen": 119272360, + "step": 177035 + }, + { + "epoch": 4.325116654044414, + "grad_norm": 7.278865814441815e-05, + "learning_rate": 1.0896482832297515e-07, + "loss": 0.0, + "num_input_tokens_seen": 119275368, + "step": 177040 + }, + { + "epoch": 4.325238804876261, + "grad_norm": 1.766349669196643e-05, + "learning_rate": 1.0892612113795374e-07, + "loss": 0.0, + "num_input_tokens_seen": 119278312, + "step": 177045 + }, + { + "epoch": 4.325360955708108, + "grad_norm": 1.544174665468745e-05, + "learning_rate": 1.0888742043306154e-07, + "loss": 0.0, + "num_input_tokens_seen": 119281704, + "step": 177050 + }, + { + "epoch": 4.325483106539956, + "grad_norm": 9.286731255997438e-06, + "learning_rate": 1.0884872620857954e-07, + "loss": 0.0, + "num_input_tokens_seen": 119284712, + "step": 177055 + }, + { + "epoch": 4.3256052573718025, + "grad_norm": 0.0032419469207525253, + "learning_rate": 1.0881003846478942e-07, + "loss": 0.1786, + "num_input_tokens_seen": 119287912, + "step": 177060 + }, + { + "epoch": 4.32572740820365, + "grad_norm": 2.6196485123364255e-05, + "learning_rate": 1.0877135720197228e-07, + "loss": 0.0, + "num_input_tokens_seen": 119291368, + "step": 177065 + }, + { + "epoch": 4.325849559035497, + "grad_norm": 0.0016508515691384673, + "learning_rate": 1.0873268242040945e-07, + "loss": 0.0, + "num_input_tokens_seen": 119294696, + "step": 177070 + }, + { + "epoch": 4.3259717098673445, + "grad_norm": 7.895252201706171e-05, + "learning_rate": 1.0869401412038248e-07, + "loss": 0.0, + "num_input_tokens_seen": 119297960, + "step": 177075 + }, + { + "epoch": 4.326093860699191, + "grad_norm": 0.011798272840678692, + "learning_rate": 1.0865535230217226e-07, + "loss": 0.0, + "num_input_tokens_seen": 119301416, + "step": 177080 + }, + { + "epoch": 4.326216011531039, + "grad_norm": 0.002923740306869149, + "learning_rate": 1.0861669696606024e-07, + "loss": 0.0, + "num_input_tokens_seen": 119305000, + "step": 177085 + }, + { + "epoch": 4.326338162362886, + "grad_norm": 0.0003772857889998704, + "learning_rate": 1.0857804811232707e-07, + "loss": 0.0, + "num_input_tokens_seen": 119308328, + "step": 177090 + }, + { + "epoch": 4.326460313194733, + "grad_norm": 0.001669778604991734, + "learning_rate": 1.0853940574125419e-07, + "loss": 0.0, + "num_input_tokens_seen": 119311208, + "step": 177095 + }, + { + "epoch": 4.32658246402658, + "grad_norm": 0.00031803478486835957, + "learning_rate": 1.0850076985312262e-07, + "loss": 0.0, + "num_input_tokens_seen": 119314152, + "step": 177100 + }, + { + "epoch": 4.326704614858427, + "grad_norm": 9.132656850852072e-05, + "learning_rate": 1.0846214044821311e-07, + "loss": 0.0, + "num_input_tokens_seen": 119317544, + "step": 177105 + }, + { + "epoch": 4.326826765690274, + "grad_norm": 0.00018010310304816812, + "learning_rate": 1.084235175268069e-07, + "loss": 0.0, + "num_input_tokens_seen": 119320616, + "step": 177110 + }, + { + "epoch": 4.326948916522121, + "grad_norm": 0.0012703038519248366, + "learning_rate": 1.0838490108918452e-07, + "loss": 0.0, + "num_input_tokens_seen": 119324136, + "step": 177115 + }, + { + "epoch": 4.327071067353969, + "grad_norm": 0.00039456746890209615, + "learning_rate": 1.083462911356271e-07, + "loss": 0.0, + "num_input_tokens_seen": 119327912, + "step": 177120 + }, + { + "epoch": 4.3271932181858155, + "grad_norm": 5.931454506935552e-05, + "learning_rate": 1.0830768766641507e-07, + "loss": 0.0, + "num_input_tokens_seen": 119331432, + "step": 177125 + }, + { + "epoch": 4.327315369017663, + "grad_norm": 1.6832897017593496e-05, + "learning_rate": 1.0826909068182954e-07, + "loss": 0.0, + "num_input_tokens_seen": 119334888, + "step": 177130 + }, + { + "epoch": 4.32743751984951, + "grad_norm": 1.2521577446023002e-05, + "learning_rate": 1.0823050018215097e-07, + "loss": 0.0, + "num_input_tokens_seen": 119338408, + "step": 177135 + }, + { + "epoch": 4.3275596706813575, + "grad_norm": 7.616833318024874e-05, + "learning_rate": 1.0819191616766011e-07, + "loss": 0.0, + "num_input_tokens_seen": 119342184, + "step": 177140 + }, + { + "epoch": 4.327681821513204, + "grad_norm": 0.0011563415173441172, + "learning_rate": 1.0815333863863763e-07, + "loss": 0.0, + "num_input_tokens_seen": 119345768, + "step": 177145 + }, + { + "epoch": 4.327803972345052, + "grad_norm": 7.111600280040875e-05, + "learning_rate": 1.0811476759536364e-07, + "loss": 0.0, + "num_input_tokens_seen": 119348968, + "step": 177150 + }, + { + "epoch": 4.327926123176899, + "grad_norm": 3.4444997254468035e-06, + "learning_rate": 1.0807620303811915e-07, + "loss": 0.0, + "num_input_tokens_seen": 119351912, + "step": 177155 + }, + { + "epoch": 4.328048274008746, + "grad_norm": 0.0074299597181379795, + "learning_rate": 1.0803764496718426e-07, + "loss": 0.0, + "num_input_tokens_seen": 119355176, + "step": 177160 + }, + { + "epoch": 4.328170424840593, + "grad_norm": 9.750135177455377e-06, + "learning_rate": 1.0799909338283952e-07, + "loss": 0.0, + "num_input_tokens_seen": 119358248, + "step": 177165 + }, + { + "epoch": 4.328292575672441, + "grad_norm": 0.0002162015880458057, + "learning_rate": 1.0796054828536549e-07, + "loss": 0.0, + "num_input_tokens_seen": 119361576, + "step": 177170 + }, + { + "epoch": 4.328414726504287, + "grad_norm": 7.276174437720329e-05, + "learning_rate": 1.0792200967504206e-07, + "loss": 0.0, + "num_input_tokens_seen": 119364648, + "step": 177175 + }, + { + "epoch": 4.328536877336135, + "grad_norm": 0.00020344149379525334, + "learning_rate": 1.0788347755214999e-07, + "loss": 0.0, + "num_input_tokens_seen": 119368040, + "step": 177180 + }, + { + "epoch": 4.328659028167982, + "grad_norm": 0.0005671592662110925, + "learning_rate": 1.0784495191696897e-07, + "loss": 0.0, + "num_input_tokens_seen": 119371624, + "step": 177185 + }, + { + "epoch": 4.328781178999829, + "grad_norm": 0.00015273607277777046, + "learning_rate": 1.0780643276977941e-07, + "loss": 0.0, + "num_input_tokens_seen": 119374888, + "step": 177190 + }, + { + "epoch": 4.328903329831676, + "grad_norm": 1.7507405573269352e-05, + "learning_rate": 1.0776792011086166e-07, + "loss": 0.0, + "num_input_tokens_seen": 119378088, + "step": 177195 + }, + { + "epoch": 4.329025480663523, + "grad_norm": 0.00021398518583737314, + "learning_rate": 1.0772941394049528e-07, + "loss": 0.0, + "num_input_tokens_seen": 119381480, + "step": 177200 + }, + { + "epoch": 4.3291476314953705, + "grad_norm": 4.134556002099998e-05, + "learning_rate": 1.0769091425896093e-07, + "loss": 0.0002, + "num_input_tokens_seen": 119384744, + "step": 177205 + }, + { + "epoch": 4.329269782327217, + "grad_norm": 0.000264695001533255, + "learning_rate": 1.0765242106653805e-07, + "loss": 0.0002, + "num_input_tokens_seen": 119387880, + "step": 177210 + }, + { + "epoch": 4.329391933159065, + "grad_norm": 2.9532797270803712e-05, + "learning_rate": 1.0761393436350685e-07, + "loss": 0.0, + "num_input_tokens_seen": 119391144, + "step": 177215 + }, + { + "epoch": 4.329514083990912, + "grad_norm": 4.648505637305789e-05, + "learning_rate": 1.0757545415014702e-07, + "loss": 0.0, + "num_input_tokens_seen": 119394216, + "step": 177220 + }, + { + "epoch": 4.329636234822759, + "grad_norm": 0.0005457483348436654, + "learning_rate": 1.0753698042673853e-07, + "loss": 0.0, + "num_input_tokens_seen": 119397480, + "step": 177225 + }, + { + "epoch": 4.329758385654606, + "grad_norm": 7.230892515508458e-05, + "learning_rate": 1.074985131935614e-07, + "loss": 0.0, + "num_input_tokens_seen": 119400680, + "step": 177230 + }, + { + "epoch": 4.329880536486454, + "grad_norm": 7.861643098294735e-06, + "learning_rate": 1.0746005245089484e-07, + "loss": 0.0, + "num_input_tokens_seen": 119404840, + "step": 177235 + }, + { + "epoch": 4.3300026873183, + "grad_norm": 0.00010462554928380996, + "learning_rate": 1.0742159819901908e-07, + "loss": 0.0, + "num_input_tokens_seen": 119407912, + "step": 177240 + }, + { + "epoch": 4.330124838150148, + "grad_norm": 0.0006451192311942577, + "learning_rate": 1.0738315043821356e-07, + "loss": 0.0, + "num_input_tokens_seen": 119411176, + "step": 177245 + }, + { + "epoch": 4.330246988981995, + "grad_norm": 0.000196227862033993, + "learning_rate": 1.0734470916875771e-07, + "loss": 0.0, + "num_input_tokens_seen": 119414184, + "step": 177250 + }, + { + "epoch": 4.330369139813842, + "grad_norm": 0.0009764119167812169, + "learning_rate": 1.0730627439093131e-07, + "loss": 0.0, + "num_input_tokens_seen": 119417512, + "step": 177255 + }, + { + "epoch": 4.330491290645689, + "grad_norm": 7.502334483433515e-05, + "learning_rate": 1.0726784610501372e-07, + "loss": 0.0, + "num_input_tokens_seen": 119420392, + "step": 177260 + }, + { + "epoch": 4.330613441477537, + "grad_norm": 0.0002198718866566196, + "learning_rate": 1.0722942431128457e-07, + "loss": 0.0, + "num_input_tokens_seen": 119423848, + "step": 177265 + }, + { + "epoch": 4.3307355923093835, + "grad_norm": 0.000420448457589373, + "learning_rate": 1.0719100901002298e-07, + "loss": 0.0, + "num_input_tokens_seen": 119427240, + "step": 177270 + }, + { + "epoch": 4.330857743141231, + "grad_norm": 1.2290911399759352e-05, + "learning_rate": 1.0715260020150874e-07, + "loss": 0.0, + "num_input_tokens_seen": 119430248, + "step": 177275 + }, + { + "epoch": 4.330979893973078, + "grad_norm": 0.00271340599283576, + "learning_rate": 1.0711419788602072e-07, + "loss": 0.0, + "num_input_tokens_seen": 119433448, + "step": 177280 + }, + { + "epoch": 4.3311020448049256, + "grad_norm": 3.577460302039981e-05, + "learning_rate": 1.0707580206383837e-07, + "loss": 0.0, + "num_input_tokens_seen": 119436840, + "step": 177285 + }, + { + "epoch": 4.331224195636772, + "grad_norm": 9.350366599392146e-05, + "learning_rate": 1.0703741273524125e-07, + "loss": 0.0, + "num_input_tokens_seen": 119440232, + "step": 177290 + }, + { + "epoch": 4.331346346468619, + "grad_norm": 3.744398782146163e-05, + "learning_rate": 1.0699902990050791e-07, + "loss": 0.0, + "num_input_tokens_seen": 119443880, + "step": 177295 + }, + { + "epoch": 4.331468497300467, + "grad_norm": 0.00029137422097846866, + "learning_rate": 1.0696065355991812e-07, + "loss": 0.0, + "num_input_tokens_seen": 119447528, + "step": 177300 + }, + { + "epoch": 4.331590648132313, + "grad_norm": 0.00011451731552369893, + "learning_rate": 1.0692228371375045e-07, + "loss": 0.0, + "num_input_tokens_seen": 119451176, + "step": 177305 + }, + { + "epoch": 4.331712798964161, + "grad_norm": 8.536599489161745e-05, + "learning_rate": 1.0688392036228434e-07, + "loss": 0.0, + "num_input_tokens_seen": 119454440, + "step": 177310 + }, + { + "epoch": 4.331834949796008, + "grad_norm": 0.029383648186922073, + "learning_rate": 1.0684556350579832e-07, + "loss": 0.0, + "num_input_tokens_seen": 119458152, + "step": 177315 + }, + { + "epoch": 4.331957100627855, + "grad_norm": 7.236431702040136e-05, + "learning_rate": 1.0680721314457164e-07, + "loss": 0.0, + "num_input_tokens_seen": 119462120, + "step": 177320 + }, + { + "epoch": 4.332079251459702, + "grad_norm": 0.005610376596450806, + "learning_rate": 1.0676886927888329e-07, + "loss": 0.0, + "num_input_tokens_seen": 119465320, + "step": 177325 + }, + { + "epoch": 4.33220140229155, + "grad_norm": 0.020806120708584785, + "learning_rate": 1.0673053190901183e-07, + "loss": 0.0, + "num_input_tokens_seen": 119468776, + "step": 177330 + }, + { + "epoch": 4.332323553123397, + "grad_norm": 0.0010466218227520585, + "learning_rate": 1.0669220103523647e-07, + "loss": 0.0, + "num_input_tokens_seen": 119472360, + "step": 177335 + }, + { + "epoch": 4.332445703955244, + "grad_norm": 0.000505940755829215, + "learning_rate": 1.0665387665783532e-07, + "loss": 0.0, + "num_input_tokens_seen": 119475496, + "step": 177340 + }, + { + "epoch": 4.332567854787091, + "grad_norm": 0.0004687255423050374, + "learning_rate": 1.0661555877708783e-07, + "loss": 0.0, + "num_input_tokens_seen": 119478952, + "step": 177345 + }, + { + "epoch": 4.332690005618939, + "grad_norm": 0.002022866625338793, + "learning_rate": 1.0657724739327223e-07, + "loss": 0.0, + "num_input_tokens_seen": 119482152, + "step": 177350 + }, + { + "epoch": 4.332812156450785, + "grad_norm": 0.00021063839085400105, + "learning_rate": 1.0653894250666695e-07, + "loss": 0.0, + "num_input_tokens_seen": 119486696, + "step": 177355 + }, + { + "epoch": 4.332934307282633, + "grad_norm": 0.0020078979432582855, + "learning_rate": 1.06500644117551e-07, + "loss": 0.0, + "num_input_tokens_seen": 119490216, + "step": 177360 + }, + { + "epoch": 4.33305645811448, + "grad_norm": 9.417716682946775e-06, + "learning_rate": 1.0646235222620247e-07, + "loss": 0.0002, + "num_input_tokens_seen": 119493800, + "step": 177365 + }, + { + "epoch": 4.333178608946327, + "grad_norm": 1.2056245395797305e-05, + "learning_rate": 1.0642406683290028e-07, + "loss": 0.0, + "num_input_tokens_seen": 119497192, + "step": 177370 + }, + { + "epoch": 4.333300759778174, + "grad_norm": 0.001364095718599856, + "learning_rate": 1.0638578793792253e-07, + "loss": 0.0929, + "num_input_tokens_seen": 119500584, + "step": 177375 + }, + { + "epoch": 4.333422910610022, + "grad_norm": 5.21705842402298e-05, + "learning_rate": 1.0634751554154753e-07, + "loss": 0.0, + "num_input_tokens_seen": 119503656, + "step": 177380 + }, + { + "epoch": 4.3335450614418685, + "grad_norm": 4.9862387641042005e-06, + "learning_rate": 1.0630924964405396e-07, + "loss": 0.0, + "num_input_tokens_seen": 119507240, + "step": 177385 + }, + { + "epoch": 4.333667212273715, + "grad_norm": 0.0001016928072203882, + "learning_rate": 1.0627099024571984e-07, + "loss": 0.0, + "num_input_tokens_seen": 119510568, + "step": 177390 + }, + { + "epoch": 4.333789363105563, + "grad_norm": 0.0002153657260350883, + "learning_rate": 1.0623273734682347e-07, + "loss": 0.0, + "num_input_tokens_seen": 119513768, + "step": 177395 + }, + { + "epoch": 4.33391151393741, + "grad_norm": 0.0023736932780593634, + "learning_rate": 1.0619449094764299e-07, + "loss": 0.0, + "num_input_tokens_seen": 119517224, + "step": 177400 + }, + { + "epoch": 4.334033664769257, + "grad_norm": 0.000669497880153358, + "learning_rate": 1.0615625104845672e-07, + "loss": 0.0, + "num_input_tokens_seen": 119520808, + "step": 177405 + }, + { + "epoch": 4.334155815601104, + "grad_norm": 0.0003056761051993817, + "learning_rate": 1.0611801764954242e-07, + "loss": 0.0, + "num_input_tokens_seen": 119524072, + "step": 177410 + }, + { + "epoch": 4.334277966432952, + "grad_norm": 0.0005257625016383827, + "learning_rate": 1.0607979075117824e-07, + "loss": 0.0354, + "num_input_tokens_seen": 119527336, + "step": 177415 + }, + { + "epoch": 4.334400117264798, + "grad_norm": 6.078456499380991e-05, + "learning_rate": 1.0604157035364259e-07, + "loss": 0.0, + "num_input_tokens_seen": 119530856, + "step": 177420 + }, + { + "epoch": 4.334522268096646, + "grad_norm": 0.0018427494214847684, + "learning_rate": 1.0600335645721281e-07, + "loss": 0.0, + "num_input_tokens_seen": 119533864, + "step": 177425 + }, + { + "epoch": 4.334644418928493, + "grad_norm": 0.0004773819528054446, + "learning_rate": 1.0596514906216725e-07, + "loss": 0.0, + "num_input_tokens_seen": 119536808, + "step": 177430 + }, + { + "epoch": 4.33476656976034, + "grad_norm": 0.0014973797369748354, + "learning_rate": 1.0592694816878345e-07, + "loss": 0.0, + "num_input_tokens_seen": 119539880, + "step": 177435 + }, + { + "epoch": 4.334888720592187, + "grad_norm": 0.00011810551950475201, + "learning_rate": 1.0588875377733952e-07, + "loss": 0.0, + "num_input_tokens_seen": 119543272, + "step": 177440 + }, + { + "epoch": 4.335010871424035, + "grad_norm": 0.00020126704475842416, + "learning_rate": 1.0585056588811292e-07, + "loss": 0.0, + "num_input_tokens_seen": 119546792, + "step": 177445 + }, + { + "epoch": 4.3351330222558815, + "grad_norm": 0.0002205111231887713, + "learning_rate": 1.0581238450138163e-07, + "loss": 0.0, + "num_input_tokens_seen": 119549800, + "step": 177450 + }, + { + "epoch": 4.335255173087729, + "grad_norm": 0.7495196461677551, + "learning_rate": 1.0577420961742301e-07, + "loss": 0.0004, + "num_input_tokens_seen": 119553192, + "step": 177455 + }, + { + "epoch": 4.335377323919576, + "grad_norm": 0.0011778388870880008, + "learning_rate": 1.0573604123651503e-07, + "loss": 0.0, + "num_input_tokens_seen": 119556712, + "step": 177460 + }, + { + "epoch": 4.335499474751423, + "grad_norm": 0.00013111214502714574, + "learning_rate": 1.0569787935893514e-07, + "loss": 0.0, + "num_input_tokens_seen": 119559912, + "step": 177465 + }, + { + "epoch": 4.33562162558327, + "grad_norm": 0.001341344672255218, + "learning_rate": 1.0565972398496059e-07, + "loss": 0.0, + "num_input_tokens_seen": 119563240, + "step": 177470 + }, + { + "epoch": 4.335743776415117, + "grad_norm": 0.002520339796319604, + "learning_rate": 1.0562157511486902e-07, + "loss": 0.003, + "num_input_tokens_seen": 119566440, + "step": 177475 + }, + { + "epoch": 4.335865927246965, + "grad_norm": 0.00025319313863292336, + "learning_rate": 1.0558343274893821e-07, + "loss": 0.0, + "num_input_tokens_seen": 119569768, + "step": 177480 + }, + { + "epoch": 4.335988078078811, + "grad_norm": 0.018951229751110077, + "learning_rate": 1.0554529688744507e-07, + "loss": 0.0, + "num_input_tokens_seen": 119573416, + "step": 177485 + }, + { + "epoch": 4.336110228910659, + "grad_norm": 0.001201994833536446, + "learning_rate": 1.0550716753066724e-07, + "loss": 0.0, + "num_input_tokens_seen": 119576872, + "step": 177490 + }, + { + "epoch": 4.336232379742506, + "grad_norm": 6.68705761199817e-05, + "learning_rate": 1.0546904467888162e-07, + "loss": 0.0, + "num_input_tokens_seen": 119580328, + "step": 177495 + }, + { + "epoch": 4.336354530574353, + "grad_norm": 0.00011509162141010165, + "learning_rate": 1.0543092833236578e-07, + "loss": 0.0, + "num_input_tokens_seen": 119583464, + "step": 177500 + }, + { + "epoch": 4.3364766814062, + "grad_norm": 0.0015443543670699, + "learning_rate": 1.0539281849139703e-07, + "loss": 0.0, + "num_input_tokens_seen": 119586664, + "step": 177505 + }, + { + "epoch": 4.336598832238048, + "grad_norm": 0.0019911492709070444, + "learning_rate": 1.0535471515625216e-07, + "loss": 0.0, + "num_input_tokens_seen": 119590184, + "step": 177510 + }, + { + "epoch": 4.3367209830698945, + "grad_norm": 2.6939822419080883e-05, + "learning_rate": 1.0531661832720862e-07, + "loss": 0.0, + "num_input_tokens_seen": 119593192, + "step": 177515 + }, + { + "epoch": 4.336843133901742, + "grad_norm": 2.7527998099685647e-05, + "learning_rate": 1.0527852800454295e-07, + "loss": 0.0, + "num_input_tokens_seen": 119596456, + "step": 177520 + }, + { + "epoch": 4.336965284733589, + "grad_norm": 3.5799188481178135e-05, + "learning_rate": 1.0524044418853284e-07, + "loss": 0.0, + "num_input_tokens_seen": 119600424, + "step": 177525 + }, + { + "epoch": 4.3370874355654365, + "grad_norm": 2.5521441784803756e-05, + "learning_rate": 1.0520236687945461e-07, + "loss": 0.0, + "num_input_tokens_seen": 119603752, + "step": 177530 + }, + { + "epoch": 4.337209586397283, + "grad_norm": 0.0010590673191472888, + "learning_rate": 1.0516429607758548e-07, + "loss": 0.0, + "num_input_tokens_seen": 119607016, + "step": 177535 + }, + { + "epoch": 4.337331737229131, + "grad_norm": 0.0003094057319685817, + "learning_rate": 1.0512623178320235e-07, + "loss": 0.0, + "num_input_tokens_seen": 119610408, + "step": 177540 + }, + { + "epoch": 4.337453888060978, + "grad_norm": 0.0007048142142593861, + "learning_rate": 1.0508817399658187e-07, + "loss": 0.0, + "num_input_tokens_seen": 119613800, + "step": 177545 + }, + { + "epoch": 4.337576038892825, + "grad_norm": 5.7931225455831736e-05, + "learning_rate": 1.0505012271800107e-07, + "loss": 0.0, + "num_input_tokens_seen": 119616936, + "step": 177550 + }, + { + "epoch": 4.337698189724672, + "grad_norm": 0.0013491696445271373, + "learning_rate": 1.0501207794773647e-07, + "loss": 0.0, + "num_input_tokens_seen": 119620520, + "step": 177555 + }, + { + "epoch": 4.337820340556519, + "grad_norm": 3.8408492400776595e-05, + "learning_rate": 1.0497403968606455e-07, + "loss": 0.0, + "num_input_tokens_seen": 119623720, + "step": 177560 + }, + { + "epoch": 4.337942491388366, + "grad_norm": 9.544839122099802e-05, + "learning_rate": 1.049360079332624e-07, + "loss": 0.0, + "num_input_tokens_seen": 119627816, + "step": 177565 + }, + { + "epoch": 4.338064642220213, + "grad_norm": 0.0001397064042976126, + "learning_rate": 1.0489798268960615e-07, + "loss": 0.0, + "num_input_tokens_seen": 119631016, + "step": 177570 + }, + { + "epoch": 4.338186793052061, + "grad_norm": 0.013521953485906124, + "learning_rate": 1.0485996395537267e-07, + "loss": 0.0, + "num_input_tokens_seen": 119634472, + "step": 177575 + }, + { + "epoch": 4.3383089438839075, + "grad_norm": 0.005240561906248331, + "learning_rate": 1.0482195173083807e-07, + "loss": 0.0, + "num_input_tokens_seen": 119637608, + "step": 177580 + }, + { + "epoch": 4.338431094715755, + "grad_norm": 0.0003331290790811181, + "learning_rate": 1.0478394601627927e-07, + "loss": 0.0, + "num_input_tokens_seen": 119641064, + "step": 177585 + }, + { + "epoch": 4.338553245547602, + "grad_norm": 1.179240189230768e-05, + "learning_rate": 1.0474594681197213e-07, + "loss": 0.0, + "num_input_tokens_seen": 119644520, + "step": 177590 + }, + { + "epoch": 4.3386753963794495, + "grad_norm": 0.00010941450454993173, + "learning_rate": 1.0470795411819333e-07, + "loss": 0.0, + "num_input_tokens_seen": 119648104, + "step": 177595 + }, + { + "epoch": 4.338797547211296, + "grad_norm": 0.00044772823457606137, + "learning_rate": 1.0466996793521932e-07, + "loss": 0.0, + "num_input_tokens_seen": 119651624, + "step": 177600 + }, + { + "epoch": 4.338919698043144, + "grad_norm": 3.42298299074173e-05, + "learning_rate": 1.0463198826332587e-07, + "loss": 0.0, + "num_input_tokens_seen": 119655080, + "step": 177605 + }, + { + "epoch": 4.339041848874991, + "grad_norm": 0.00016116289771161973, + "learning_rate": 1.0459401510278965e-07, + "loss": 0.0, + "num_input_tokens_seen": 119658408, + "step": 177610 + }, + { + "epoch": 4.339163999706838, + "grad_norm": 5.474098361446522e-05, + "learning_rate": 1.0455604845388633e-07, + "loss": 0.0, + "num_input_tokens_seen": 119661800, + "step": 177615 + }, + { + "epoch": 4.339286150538685, + "grad_norm": 4.148006337345578e-05, + "learning_rate": 1.0451808831689247e-07, + "loss": 0.0, + "num_input_tokens_seen": 119664872, + "step": 177620 + }, + { + "epoch": 4.339408301370533, + "grad_norm": 0.021591929718852043, + "learning_rate": 1.0448013469208384e-07, + "loss": 0.0, + "num_input_tokens_seen": 119667816, + "step": 177625 + }, + { + "epoch": 4.339530452202379, + "grad_norm": 0.0004449640109669417, + "learning_rate": 1.0444218757973643e-07, + "loss": 0.0, + "num_input_tokens_seen": 119671720, + "step": 177630 + }, + { + "epoch": 4.339652603034227, + "grad_norm": 0.0003688403812702745, + "learning_rate": 1.0440424698012651e-07, + "loss": 0.0, + "num_input_tokens_seen": 119675112, + "step": 177635 + }, + { + "epoch": 4.339774753866074, + "grad_norm": 0.23644912242889404, + "learning_rate": 1.0436631289352959e-07, + "loss": 0.0001, + "num_input_tokens_seen": 119678312, + "step": 177640 + }, + { + "epoch": 4.339896904697921, + "grad_norm": 8.655286364955828e-05, + "learning_rate": 1.0432838532022204e-07, + "loss": 0.0, + "num_input_tokens_seen": 119681832, + "step": 177645 + }, + { + "epoch": 4.340019055529768, + "grad_norm": 0.0001912415900733322, + "learning_rate": 1.0429046426047905e-07, + "loss": 0.0, + "num_input_tokens_seen": 119685544, + "step": 177650 + }, + { + "epoch": 4.340141206361615, + "grad_norm": 0.0002560184511821717, + "learning_rate": 1.0425254971457697e-07, + "loss": 0.0, + "num_input_tokens_seen": 119688616, + "step": 177655 + }, + { + "epoch": 4.340263357193463, + "grad_norm": 0.0001207870664075017, + "learning_rate": 1.0421464168279137e-07, + "loss": 0.0, + "num_input_tokens_seen": 119692072, + "step": 177660 + }, + { + "epoch": 4.340385508025309, + "grad_norm": 0.02083778753876686, + "learning_rate": 1.0417674016539745e-07, + "loss": 0.0, + "num_input_tokens_seen": 119695272, + "step": 177665 + }, + { + "epoch": 4.340507658857157, + "grad_norm": 0.15552714467048645, + "learning_rate": 1.0413884516267158e-07, + "loss": 0.0, + "num_input_tokens_seen": 119699048, + "step": 177670 + }, + { + "epoch": 4.340629809689004, + "grad_norm": 1.4773806469747797e-05, + "learning_rate": 1.0410095667488872e-07, + "loss": 0.0, + "num_input_tokens_seen": 119702504, + "step": 177675 + }, + { + "epoch": 4.340751960520851, + "grad_norm": 1.469433027523337e-05, + "learning_rate": 1.040630747023249e-07, + "loss": 0.0, + "num_input_tokens_seen": 119706344, + "step": 177680 + }, + { + "epoch": 4.340874111352698, + "grad_norm": 0.0002408750297036022, + "learning_rate": 1.0402519924525511e-07, + "loss": 0.0, + "num_input_tokens_seen": 119710376, + "step": 177685 + }, + { + "epoch": 4.340996262184546, + "grad_norm": 4.3600357457762584e-05, + "learning_rate": 1.0398733030395512e-07, + "loss": 0.0, + "num_input_tokens_seen": 119713896, + "step": 177690 + }, + { + "epoch": 4.3411184130163925, + "grad_norm": 7.78894464019686e-05, + "learning_rate": 1.0394946787870052e-07, + "loss": 0.0, + "num_input_tokens_seen": 119717096, + "step": 177695 + }, + { + "epoch": 4.34124056384824, + "grad_norm": 9.142693306785077e-05, + "learning_rate": 1.0391161196976605e-07, + "loss": 0.0, + "num_input_tokens_seen": 119720104, + "step": 177700 + }, + { + "epoch": 4.341362714680087, + "grad_norm": 0.004934506490826607, + "learning_rate": 1.0387376257742763e-07, + "loss": 0.0, + "num_input_tokens_seen": 119723688, + "step": 177705 + }, + { + "epoch": 4.3414848655119345, + "grad_norm": 0.0002775702450890094, + "learning_rate": 1.0383591970196004e-07, + "loss": 0.0, + "num_input_tokens_seen": 119726888, + "step": 177710 + }, + { + "epoch": 4.341607016343781, + "grad_norm": 0.001972169615328312, + "learning_rate": 1.0379808334363893e-07, + "loss": 0.0, + "num_input_tokens_seen": 119730408, + "step": 177715 + }, + { + "epoch": 4.341729167175629, + "grad_norm": 2.0951976694050245e-05, + "learning_rate": 1.0376025350273898e-07, + "loss": 0.0, + "num_input_tokens_seen": 119733608, + "step": 177720 + }, + { + "epoch": 4.341851318007476, + "grad_norm": 9.501501335762441e-05, + "learning_rate": 1.0372243017953541e-07, + "loss": 0.0, + "num_input_tokens_seen": 119736872, + "step": 177725 + }, + { + "epoch": 4.341973468839322, + "grad_norm": 3.5114768252242357e-05, + "learning_rate": 1.0368461337430378e-07, + "loss": 0.0, + "num_input_tokens_seen": 119739944, + "step": 177730 + }, + { + "epoch": 4.34209561967117, + "grad_norm": 2.0370420315884985e-05, + "learning_rate": 1.0364680308731843e-07, + "loss": 0.0, + "num_input_tokens_seen": 119743848, + "step": 177735 + }, + { + "epoch": 4.342217770503017, + "grad_norm": 0.00045057680108584464, + "learning_rate": 1.036089993188548e-07, + "loss": 0.0, + "num_input_tokens_seen": 119747752, + "step": 177740 + }, + { + "epoch": 4.342339921334864, + "grad_norm": 0.00036081598955206573, + "learning_rate": 1.0357120206918746e-07, + "loss": 0.0, + "num_input_tokens_seen": 119750824, + "step": 177745 + }, + { + "epoch": 4.342462072166711, + "grad_norm": 0.0006354292272590101, + "learning_rate": 1.0353341133859161e-07, + "loss": 0.0, + "num_input_tokens_seen": 119754280, + "step": 177750 + }, + { + "epoch": 4.342584222998559, + "grad_norm": 0.00011101896961918101, + "learning_rate": 1.0349562712734173e-07, + "loss": 0.0, + "num_input_tokens_seen": 119757544, + "step": 177755 + }, + { + "epoch": 4.3427063738304055, + "grad_norm": 3.6241308407625183e-05, + "learning_rate": 1.03457849435713e-07, + "loss": 0.0, + "num_input_tokens_seen": 119760552, + "step": 177760 + }, + { + "epoch": 4.342828524662253, + "grad_norm": 6.376237433869392e-05, + "learning_rate": 1.034200782639797e-07, + "loss": 0.0, + "num_input_tokens_seen": 119763944, + "step": 177765 + }, + { + "epoch": 4.3429506754941, + "grad_norm": 0.00014923287380952388, + "learning_rate": 1.03382313612417e-07, + "loss": 0.0, + "num_input_tokens_seen": 119767272, + "step": 177770 + }, + { + "epoch": 4.3430728263259475, + "grad_norm": 6.0516202211147174e-05, + "learning_rate": 1.0334455548129928e-07, + "loss": 0.0, + "num_input_tokens_seen": 119771560, + "step": 177775 + }, + { + "epoch": 4.343194977157794, + "grad_norm": 0.001674599014222622, + "learning_rate": 1.0330680387090085e-07, + "loss": 0.0, + "num_input_tokens_seen": 119774952, + "step": 177780 + }, + { + "epoch": 4.343317127989642, + "grad_norm": 0.00041652386425994337, + "learning_rate": 1.032690587814965e-07, + "loss": 0.0, + "num_input_tokens_seen": 119778280, + "step": 177785 + }, + { + "epoch": 4.343439278821489, + "grad_norm": 2.3158287149271928e-05, + "learning_rate": 1.0323132021336101e-07, + "loss": 0.0, + "num_input_tokens_seen": 119781544, + "step": 177790 + }, + { + "epoch": 4.343561429653336, + "grad_norm": 4.255375461070798e-05, + "learning_rate": 1.0319358816676838e-07, + "loss": 0.0, + "num_input_tokens_seen": 119784680, + "step": 177795 + }, + { + "epoch": 4.343683580485183, + "grad_norm": 0.0006458880961872637, + "learning_rate": 1.0315586264199338e-07, + "loss": 0.0, + "num_input_tokens_seen": 119787688, + "step": 177800 + }, + { + "epoch": 4.343805731317031, + "grad_norm": 0.0010130023583769798, + "learning_rate": 1.0311814363930994e-07, + "loss": 0.0, + "num_input_tokens_seen": 119791208, + "step": 177805 + }, + { + "epoch": 4.343927882148877, + "grad_norm": 0.0003084522904828191, + "learning_rate": 1.0308043115899257e-07, + "loss": 0.0, + "num_input_tokens_seen": 119794344, + "step": 177810 + }, + { + "epoch": 4.344050032980725, + "grad_norm": 7.93716826592572e-05, + "learning_rate": 1.0304272520131586e-07, + "loss": 0.0, + "num_input_tokens_seen": 119797736, + "step": 177815 + }, + { + "epoch": 4.344172183812572, + "grad_norm": 5.822316961712204e-05, + "learning_rate": 1.0300502576655334e-07, + "loss": 0.0, + "num_input_tokens_seen": 119801256, + "step": 177820 + }, + { + "epoch": 4.3442943346444185, + "grad_norm": 0.0020748593378812075, + "learning_rate": 1.0296733285497982e-07, + "loss": 0.0, + "num_input_tokens_seen": 119804520, + "step": 177825 + }, + { + "epoch": 4.344416485476266, + "grad_norm": 0.00011760352936107665, + "learning_rate": 1.0292964646686897e-07, + "loss": 0.0, + "num_input_tokens_seen": 119807848, + "step": 177830 + }, + { + "epoch": 4.344538636308113, + "grad_norm": 0.0002966334577649832, + "learning_rate": 1.0289196660249521e-07, + "loss": 0.0, + "num_input_tokens_seen": 119811048, + "step": 177835 + }, + { + "epoch": 4.3446607871399605, + "grad_norm": 0.0029755146242678165, + "learning_rate": 1.0285429326213213e-07, + "loss": 0.0, + "num_input_tokens_seen": 119814248, + "step": 177840 + }, + { + "epoch": 4.344782937971807, + "grad_norm": 0.005637416150420904, + "learning_rate": 1.0281662644605394e-07, + "loss": 0.0, + "num_input_tokens_seen": 119817512, + "step": 177845 + }, + { + "epoch": 4.344905088803655, + "grad_norm": 9.653414053900633e-06, + "learning_rate": 1.0277896615453473e-07, + "loss": 0.0, + "num_input_tokens_seen": 119820648, + "step": 177850 + }, + { + "epoch": 4.345027239635502, + "grad_norm": 0.011159008368849754, + "learning_rate": 1.02741312387848e-07, + "loss": 0.0, + "num_input_tokens_seen": 119823976, + "step": 177855 + }, + { + "epoch": 4.345149390467349, + "grad_norm": 0.002734529320150614, + "learning_rate": 1.0270366514626793e-07, + "loss": 0.0, + "num_input_tokens_seen": 119827944, + "step": 177860 + }, + { + "epoch": 4.345271541299196, + "grad_norm": 1.2335614883340895e-05, + "learning_rate": 1.0266602443006822e-07, + "loss": 0.0, + "num_input_tokens_seen": 119831336, + "step": 177865 + }, + { + "epoch": 4.345393692131044, + "grad_norm": 0.00011353209265507758, + "learning_rate": 1.0262839023952241e-07, + "loss": 0.0, + "num_input_tokens_seen": 119835112, + "step": 177870 + }, + { + "epoch": 4.34551584296289, + "grad_norm": 0.0011685584904626012, + "learning_rate": 1.025907625749044e-07, + "loss": 0.0, + "num_input_tokens_seen": 119838568, + "step": 177875 + }, + { + "epoch": 4.345637993794738, + "grad_norm": 6.6461670940043405e-06, + "learning_rate": 1.0255314143648753e-07, + "loss": 0.0, + "num_input_tokens_seen": 119841832, + "step": 177880 + }, + { + "epoch": 4.345760144626585, + "grad_norm": 0.0003076045250054449, + "learning_rate": 1.025155268245459e-07, + "loss": 0.0, + "num_input_tokens_seen": 119844840, + "step": 177885 + }, + { + "epoch": 4.345882295458432, + "grad_norm": 0.0029748149681836367, + "learning_rate": 1.0247791873935241e-07, + "loss": 0.0, + "num_input_tokens_seen": 119849192, + "step": 177890 + }, + { + "epoch": 4.346004446290279, + "grad_norm": 2.7037634936277755e-05, + "learning_rate": 1.0244031718118118e-07, + "loss": 0.0, + "num_input_tokens_seen": 119852456, + "step": 177895 + }, + { + "epoch": 4.346126597122127, + "grad_norm": 5.461079490487464e-05, + "learning_rate": 1.0240272215030521e-07, + "loss": 0.0816, + "num_input_tokens_seen": 119855400, + "step": 177900 + }, + { + "epoch": 4.3462487479539735, + "grad_norm": 0.0004930661525577307, + "learning_rate": 1.0236513364699805e-07, + "loss": 0.0, + "num_input_tokens_seen": 119858472, + "step": 177905 + }, + { + "epoch": 4.346370898785821, + "grad_norm": 0.00023198813141789287, + "learning_rate": 1.0232755167153328e-07, + "loss": 0.0, + "num_input_tokens_seen": 119861672, + "step": 177910 + }, + { + "epoch": 4.346493049617668, + "grad_norm": 0.0025924292858690023, + "learning_rate": 1.0228997622418378e-07, + "loss": 0.0, + "num_input_tokens_seen": 119865384, + "step": 177915 + }, + { + "epoch": 4.346615200449515, + "grad_norm": 0.006062481552362442, + "learning_rate": 1.0225240730522322e-07, + "loss": 0.0213, + "num_input_tokens_seen": 119868328, + "step": 177920 + }, + { + "epoch": 4.346737351281362, + "grad_norm": 0.00014382071094587445, + "learning_rate": 1.0221484491492438e-07, + "loss": 0.0, + "num_input_tokens_seen": 119872040, + "step": 177925 + }, + { + "epoch": 4.346859502113209, + "grad_norm": 2.9611932404804975e-05, + "learning_rate": 1.0217728905356093e-07, + "loss": 0.0, + "num_input_tokens_seen": 119875048, + "step": 177930 + }, + { + "epoch": 4.346981652945057, + "grad_norm": 0.0008013474871404469, + "learning_rate": 1.0213973972140555e-07, + "loss": 0.0, + "num_input_tokens_seen": 119878248, + "step": 177935 + }, + { + "epoch": 4.347103803776903, + "grad_norm": 0.0003407940675970167, + "learning_rate": 1.0210219691873145e-07, + "loss": 0.0, + "num_input_tokens_seen": 119881704, + "step": 177940 + }, + { + "epoch": 4.347225954608751, + "grad_norm": 0.0001222924911417067, + "learning_rate": 1.0206466064581177e-07, + "loss": 0.0, + "num_input_tokens_seen": 119885288, + "step": 177945 + }, + { + "epoch": 4.347348105440598, + "grad_norm": 4.897462076769443e-06, + "learning_rate": 1.0202713090291937e-07, + "loss": 0.0, + "num_input_tokens_seen": 119888744, + "step": 177950 + }, + { + "epoch": 4.347470256272445, + "grad_norm": 0.0013736019609495997, + "learning_rate": 1.0198960769032728e-07, + "loss": 0.0, + "num_input_tokens_seen": 119892072, + "step": 177955 + }, + { + "epoch": 4.347592407104292, + "grad_norm": 0.00012519631127361208, + "learning_rate": 1.0195209100830815e-07, + "loss": 0.0001, + "num_input_tokens_seen": 119895080, + "step": 177960 + }, + { + "epoch": 4.34771455793614, + "grad_norm": 0.00036417320370674133, + "learning_rate": 1.0191458085713511e-07, + "loss": 0.0, + "num_input_tokens_seen": 119898408, + "step": 177965 + }, + { + "epoch": 4.347836708767987, + "grad_norm": 0.0004924045060761273, + "learning_rate": 1.0187707723708084e-07, + "loss": 0.0, + "num_input_tokens_seen": 119901544, + "step": 177970 + }, + { + "epoch": 4.347958859599834, + "grad_norm": 2.2906911908648908e-05, + "learning_rate": 1.0183958014841776e-07, + "loss": 0.0, + "num_input_tokens_seen": 119905192, + "step": 177975 + }, + { + "epoch": 4.348081010431681, + "grad_norm": 0.38317111134529114, + "learning_rate": 1.0180208959141912e-07, + "loss": 0.0002, + "num_input_tokens_seen": 119908456, + "step": 177980 + }, + { + "epoch": 4.348203161263529, + "grad_norm": 0.000214638261240907, + "learning_rate": 1.0176460556635702e-07, + "loss": 0.0001, + "num_input_tokens_seen": 119911656, + "step": 177985 + }, + { + "epoch": 4.348325312095375, + "grad_norm": 0.0001367831719107926, + "learning_rate": 1.0172712807350447e-07, + "loss": 0.0536, + "num_input_tokens_seen": 119915304, + "step": 177990 + }, + { + "epoch": 4.348447462927223, + "grad_norm": 0.001143554924055934, + "learning_rate": 1.0168965711313371e-07, + "loss": 0.0, + "num_input_tokens_seen": 119918568, + "step": 177995 + }, + { + "epoch": 4.34856961375907, + "grad_norm": 0.00035983105772174895, + "learning_rate": 1.016521926855174e-07, + "loss": 0.0, + "num_input_tokens_seen": 119921832, + "step": 178000 + }, + { + "epoch": 4.348691764590917, + "grad_norm": 6.349872273858637e-05, + "learning_rate": 1.0161473479092819e-07, + "loss": 0.0, + "num_input_tokens_seen": 119924904, + "step": 178005 + }, + { + "epoch": 4.348813915422764, + "grad_norm": 7.528601418016478e-05, + "learning_rate": 1.0157728342963801e-07, + "loss": 0.0, + "num_input_tokens_seen": 119928744, + "step": 178010 + }, + { + "epoch": 4.348936066254611, + "grad_norm": 9.19345548027195e-05, + "learning_rate": 1.0153983860191961e-07, + "loss": 0.0, + "num_input_tokens_seen": 119932200, + "step": 178015 + }, + { + "epoch": 4.3490582170864585, + "grad_norm": 0.004338003229349852, + "learning_rate": 1.0150240030804502e-07, + "loss": 0.0, + "num_input_tokens_seen": 119935592, + "step": 178020 + }, + { + "epoch": 4.349180367918305, + "grad_norm": 7.14230554876849e-05, + "learning_rate": 1.014649685482869e-07, + "loss": 0.0, + "num_input_tokens_seen": 119938728, + "step": 178025 + }, + { + "epoch": 4.349302518750153, + "grad_norm": 0.00011825282854260877, + "learning_rate": 1.0142754332291692e-07, + "loss": 0.0, + "num_input_tokens_seen": 119942376, + "step": 178030 + }, + { + "epoch": 4.349424669582, + "grad_norm": 0.0034788944758474827, + "learning_rate": 1.0139012463220764e-07, + "loss": 0.0, + "num_input_tokens_seen": 119945704, + "step": 178035 + }, + { + "epoch": 4.349546820413847, + "grad_norm": 0.00012874802632723004, + "learning_rate": 1.0135271247643117e-07, + "loss": 0.0, + "num_input_tokens_seen": 119948968, + "step": 178040 + }, + { + "epoch": 4.349668971245694, + "grad_norm": 5.9843339840881526e-05, + "learning_rate": 1.0131530685585931e-07, + "loss": 0.0, + "num_input_tokens_seen": 119952232, + "step": 178045 + }, + { + "epoch": 4.349791122077542, + "grad_norm": 0.00011922222620341927, + "learning_rate": 1.0127790777076439e-07, + "loss": 0.0, + "num_input_tokens_seen": 119956136, + "step": 178050 + }, + { + "epoch": 4.349913272909388, + "grad_norm": 0.00035078474320471287, + "learning_rate": 1.0124051522141819e-07, + "loss": 0.0, + "num_input_tokens_seen": 119959656, + "step": 178055 + }, + { + "epoch": 4.350035423741236, + "grad_norm": 0.007354686502367258, + "learning_rate": 1.0120312920809282e-07, + "loss": 0.0, + "num_input_tokens_seen": 119962792, + "step": 178060 + }, + { + "epoch": 4.350157574573083, + "grad_norm": 0.0004929696442559361, + "learning_rate": 1.0116574973105984e-07, + "loss": 0.0, + "num_input_tokens_seen": 119965992, + "step": 178065 + }, + { + "epoch": 4.35027972540493, + "grad_norm": 7.778491635690443e-06, + "learning_rate": 1.011283767905915e-07, + "loss": 0.0, + "num_input_tokens_seen": 119969448, + "step": 178070 + }, + { + "epoch": 4.350401876236777, + "grad_norm": 0.0005125360912643373, + "learning_rate": 1.0109101038695911e-07, + "loss": 0.0, + "num_input_tokens_seen": 119972520, + "step": 178075 + }, + { + "epoch": 4.350524027068625, + "grad_norm": 5.3846797527512535e-05, + "learning_rate": 1.0105365052043491e-07, + "loss": 0.0, + "num_input_tokens_seen": 119976104, + "step": 178080 + }, + { + "epoch": 4.3506461779004715, + "grad_norm": 9.910299559123814e-05, + "learning_rate": 1.0101629719129045e-07, + "loss": 0.0, + "num_input_tokens_seen": 119978984, + "step": 178085 + }, + { + "epoch": 4.350768328732318, + "grad_norm": 1.2183804756205063e-05, + "learning_rate": 1.0097895039979698e-07, + "loss": 0.0, + "num_input_tokens_seen": 119982056, + "step": 178090 + }, + { + "epoch": 4.350890479564166, + "grad_norm": 0.00019707833416759968, + "learning_rate": 1.0094161014622637e-07, + "loss": 0.0, + "num_input_tokens_seen": 119985384, + "step": 178095 + }, + { + "epoch": 4.351012630396013, + "grad_norm": 5.629958468489349e-05, + "learning_rate": 1.0090427643085043e-07, + "loss": 0.0, + "num_input_tokens_seen": 119988584, + "step": 178100 + }, + { + "epoch": 4.35113478122786, + "grad_norm": 3.05861140077468e-05, + "learning_rate": 1.0086694925394024e-07, + "loss": 0.0019, + "num_input_tokens_seen": 119991720, + "step": 178105 + }, + { + "epoch": 4.351256932059707, + "grad_norm": 0.00027523620519787073, + "learning_rate": 1.008296286157676e-07, + "loss": 0.0, + "num_input_tokens_seen": 119995432, + "step": 178110 + }, + { + "epoch": 4.351379082891555, + "grad_norm": 8.419102960033342e-05, + "learning_rate": 1.0079231451660352e-07, + "loss": 0.0, + "num_input_tokens_seen": 119998952, + "step": 178115 + }, + { + "epoch": 4.351501233723401, + "grad_norm": 0.0008841792005114257, + "learning_rate": 1.007550069567198e-07, + "loss": 0.0259, + "num_input_tokens_seen": 120001960, + "step": 178120 + }, + { + "epoch": 4.351623384555249, + "grad_norm": 0.0002478567766956985, + "learning_rate": 1.007177059363874e-07, + "loss": 0.0, + "num_input_tokens_seen": 120005160, + "step": 178125 + }, + { + "epoch": 4.351745535387096, + "grad_norm": 30.11157989501953, + "learning_rate": 1.0068041145587769e-07, + "loss": 0.0403, + "num_input_tokens_seen": 120009064, + "step": 178130 + }, + { + "epoch": 4.351867686218943, + "grad_norm": 5.561210491578095e-05, + "learning_rate": 1.00643123515462e-07, + "loss": 0.0, + "num_input_tokens_seen": 120012392, + "step": 178135 + }, + { + "epoch": 4.35198983705079, + "grad_norm": 9.507766723632812, + "learning_rate": 1.0060584211541134e-07, + "loss": 0.0134, + "num_input_tokens_seen": 120015720, + "step": 178140 + }, + { + "epoch": 4.352111987882638, + "grad_norm": 5.897363371332176e-05, + "learning_rate": 1.0056856725599704e-07, + "loss": 0.0, + "num_input_tokens_seen": 120019304, + "step": 178145 + }, + { + "epoch": 4.3522341387144845, + "grad_norm": 0.002634249161928892, + "learning_rate": 1.0053129893748991e-07, + "loss": 0.0, + "num_input_tokens_seen": 120022248, + "step": 178150 + }, + { + "epoch": 4.352356289546332, + "grad_norm": 9.353709174320102e-05, + "learning_rate": 1.0049403716016113e-07, + "loss": 0.0336, + "num_input_tokens_seen": 120025384, + "step": 178155 + }, + { + "epoch": 4.352478440378179, + "grad_norm": 0.0002733849687501788, + "learning_rate": 1.0045678192428175e-07, + "loss": 0.0, + "num_input_tokens_seen": 120029032, + "step": 178160 + }, + { + "epoch": 4.3526005912100265, + "grad_norm": 0.0362461693584919, + "learning_rate": 1.0041953323012242e-07, + "loss": 0.0, + "num_input_tokens_seen": 120032232, + "step": 178165 + }, + { + "epoch": 4.352722742041873, + "grad_norm": 0.00040877415449358523, + "learning_rate": 1.0038229107795448e-07, + "loss": 0.0, + "num_input_tokens_seen": 120035368, + "step": 178170 + }, + { + "epoch": 4.352844892873721, + "grad_norm": 0.004401254002004862, + "learning_rate": 1.0034505546804839e-07, + "loss": 0.0, + "num_input_tokens_seen": 120038888, + "step": 178175 + }, + { + "epoch": 4.352967043705568, + "grad_norm": 0.012119931168854237, + "learning_rate": 1.003078264006748e-07, + "loss": 0.0, + "num_input_tokens_seen": 120042664, + "step": 178180 + }, + { + "epoch": 4.353089194537414, + "grad_norm": 0.0002318314218427986, + "learning_rate": 1.0027060387610497e-07, + "loss": 0.0, + "num_input_tokens_seen": 120045672, + "step": 178185 + }, + { + "epoch": 4.353211345369262, + "grad_norm": 0.00025992945302277803, + "learning_rate": 1.0023338789460912e-07, + "loss": 0.0, + "num_input_tokens_seen": 120048744, + "step": 178190 + }, + { + "epoch": 4.353333496201109, + "grad_norm": 0.0011936393566429615, + "learning_rate": 1.0019617845645822e-07, + "loss": 0.0, + "num_input_tokens_seen": 120052200, + "step": 178195 + }, + { + "epoch": 4.353455647032956, + "grad_norm": 0.00025509262923151255, + "learning_rate": 1.0015897556192266e-07, + "loss": 0.0, + "num_input_tokens_seen": 120054952, + "step": 178200 + }, + { + "epoch": 4.353577797864803, + "grad_norm": 4.1563100239727646e-05, + "learning_rate": 1.0012177921127307e-07, + "loss": 0.0, + "num_input_tokens_seen": 120058024, + "step": 178205 + }, + { + "epoch": 4.353699948696651, + "grad_norm": 0.00012275317567400634, + "learning_rate": 1.0008458940477992e-07, + "loss": 0.0, + "num_input_tokens_seen": 120061288, + "step": 178210 + }, + { + "epoch": 4.3538220995284975, + "grad_norm": 0.001947722746990621, + "learning_rate": 1.0004740614271356e-07, + "loss": 0.0, + "num_input_tokens_seen": 120064424, + "step": 178215 + }, + { + "epoch": 4.353944250360345, + "grad_norm": 8.509887993568555e-05, + "learning_rate": 1.0001022942534476e-07, + "loss": 0.0001, + "num_input_tokens_seen": 120067688, + "step": 178220 + }, + { + "epoch": 4.354066401192192, + "grad_norm": 9.287018656323198e-06, + "learning_rate": 9.997305925294342e-08, + "loss": 0.0, + "num_input_tokens_seen": 120071464, + "step": 178225 + }, + { + "epoch": 4.3541885520240395, + "grad_norm": 0.00255648884922266, + "learning_rate": 9.993589562578031e-08, + "loss": 0.0, + "num_input_tokens_seen": 120074856, + "step": 178230 + }, + { + "epoch": 4.354310702855886, + "grad_norm": 0.0007204010616987944, + "learning_rate": 9.989873854412523e-08, + "loss": 0.0, + "num_input_tokens_seen": 120078056, + "step": 178235 + }, + { + "epoch": 4.354432853687734, + "grad_norm": 0.002324800007045269, + "learning_rate": 9.986158800824884e-08, + "loss": 0.0, + "num_input_tokens_seen": 120081448, + "step": 178240 + }, + { + "epoch": 4.354555004519581, + "grad_norm": 0.0022073762957006693, + "learning_rate": 9.982444401842083e-08, + "loss": 0.0, + "num_input_tokens_seen": 120084648, + "step": 178245 + }, + { + "epoch": 4.354677155351428, + "grad_norm": 3.476447818684392e-05, + "learning_rate": 9.978730657491164e-08, + "loss": 0.0, + "num_input_tokens_seen": 120087912, + "step": 178250 + }, + { + "epoch": 4.354799306183275, + "grad_norm": 0.00024955617845989764, + "learning_rate": 9.975017567799148e-08, + "loss": 0.0, + "num_input_tokens_seen": 120090920, + "step": 178255 + }, + { + "epoch": 4.354921457015123, + "grad_norm": 0.004400161094963551, + "learning_rate": 9.971305132792996e-08, + "loss": 0.0, + "num_input_tokens_seen": 120094248, + "step": 178260 + }, + { + "epoch": 4.355043607846969, + "grad_norm": 4.038333281641826e-05, + "learning_rate": 9.967593352499747e-08, + "loss": 0.0, + "num_input_tokens_seen": 120097128, + "step": 178265 + }, + { + "epoch": 4.355165758678817, + "grad_norm": 0.0003064874326810241, + "learning_rate": 9.963882226946363e-08, + "loss": 0.0, + "num_input_tokens_seen": 120100328, + "step": 178270 + }, + { + "epoch": 4.355287909510664, + "grad_norm": 1.765959314070642e-05, + "learning_rate": 9.960171756159851e-08, + "loss": 0.0, + "num_input_tokens_seen": 120104232, + "step": 178275 + }, + { + "epoch": 4.3554100603425105, + "grad_norm": 4.4604366848943755e-05, + "learning_rate": 9.956461940167193e-08, + "loss": 0.0, + "num_input_tokens_seen": 120107880, + "step": 178280 + }, + { + "epoch": 4.355532211174358, + "grad_norm": 0.0005809883587062359, + "learning_rate": 9.952752778995343e-08, + "loss": 0.0001, + "num_input_tokens_seen": 120111080, + "step": 178285 + }, + { + "epoch": 4.355654362006205, + "grad_norm": 0.0001975120831048116, + "learning_rate": 9.949044272671326e-08, + "loss": 0.0313, + "num_input_tokens_seen": 120114344, + "step": 178290 + }, + { + "epoch": 4.355776512838053, + "grad_norm": 0.0005515380762517452, + "learning_rate": 9.945336421222039e-08, + "loss": 0.0, + "num_input_tokens_seen": 120117736, + "step": 178295 + }, + { + "epoch": 4.355898663669899, + "grad_norm": 0.001049726502969861, + "learning_rate": 9.941629224674519e-08, + "loss": 0.0, + "num_input_tokens_seen": 120121064, + "step": 178300 + }, + { + "epoch": 4.356020814501747, + "grad_norm": 1.5405454178107902e-05, + "learning_rate": 9.937922683055677e-08, + "loss": 0.0, + "num_input_tokens_seen": 120124776, + "step": 178305 + }, + { + "epoch": 4.356142965333594, + "grad_norm": 8.174588583642617e-05, + "learning_rate": 9.93421679639248e-08, + "loss": 0.0, + "num_input_tokens_seen": 120128168, + "step": 178310 + }, + { + "epoch": 4.356265116165441, + "grad_norm": 8.779927156865597e-05, + "learning_rate": 9.930511564711907e-08, + "loss": 0.0, + "num_input_tokens_seen": 120131752, + "step": 178315 + }, + { + "epoch": 4.356387266997288, + "grad_norm": 0.0010885270312428474, + "learning_rate": 9.926806988040858e-08, + "loss": 0.0, + "num_input_tokens_seen": 120135272, + "step": 178320 + }, + { + "epoch": 4.356509417829136, + "grad_norm": 9.781593689695e-05, + "learning_rate": 9.923103066406314e-08, + "loss": 0.0, + "num_input_tokens_seen": 120138408, + "step": 178325 + }, + { + "epoch": 4.3566315686609824, + "grad_norm": 0.003006650833413005, + "learning_rate": 9.919399799835171e-08, + "loss": 0.0, + "num_input_tokens_seen": 120141928, + "step": 178330 + }, + { + "epoch": 4.35675371949283, + "grad_norm": 0.00023989882902242243, + "learning_rate": 9.915697188354399e-08, + "loss": 0.0, + "num_input_tokens_seen": 120145448, + "step": 178335 + }, + { + "epoch": 4.356875870324677, + "grad_norm": 0.00019699001859407872, + "learning_rate": 9.911995231990899e-08, + "loss": 0.0004, + "num_input_tokens_seen": 120148904, + "step": 178340 + }, + { + "epoch": 4.3569980211565245, + "grad_norm": 0.0009989457903429866, + "learning_rate": 9.908293930771594e-08, + "loss": 0.0, + "num_input_tokens_seen": 120152360, + "step": 178345 + }, + { + "epoch": 4.357120171988371, + "grad_norm": 0.001904218690469861, + "learning_rate": 9.904593284723417e-08, + "loss": 0.0, + "num_input_tokens_seen": 120155752, + "step": 178350 + }, + { + "epoch": 4.357242322820218, + "grad_norm": 0.000590673356782645, + "learning_rate": 9.90089329387327e-08, + "loss": 0.0, + "num_input_tokens_seen": 120159144, + "step": 178355 + }, + { + "epoch": 4.357364473652066, + "grad_norm": 0.0008590373327024281, + "learning_rate": 9.897193958248063e-08, + "loss": 0.0, + "num_input_tokens_seen": 120162536, + "step": 178360 + }, + { + "epoch": 4.357486624483912, + "grad_norm": 3.4686174331000075e-05, + "learning_rate": 9.893495277874686e-08, + "loss": 0.0, + "num_input_tokens_seen": 120165736, + "step": 178365 + }, + { + "epoch": 4.35760877531576, + "grad_norm": 7.88893667049706e-05, + "learning_rate": 9.889797252780064e-08, + "loss": 0.0, + "num_input_tokens_seen": 120168808, + "step": 178370 + }, + { + "epoch": 4.357730926147607, + "grad_norm": 0.0003943110932596028, + "learning_rate": 9.88609988299105e-08, + "loss": 0.0, + "num_input_tokens_seen": 120172200, + "step": 178375 + }, + { + "epoch": 4.357853076979454, + "grad_norm": 4.9077701987698674e-05, + "learning_rate": 9.882403168534581e-08, + "loss": 0.0, + "num_input_tokens_seen": 120175400, + "step": 178380 + }, + { + "epoch": 4.357975227811301, + "grad_norm": 1.5061576050356962e-05, + "learning_rate": 9.878707109437489e-08, + "loss": 0.0, + "num_input_tokens_seen": 120178856, + "step": 178385 + }, + { + "epoch": 4.358097378643149, + "grad_norm": 0.00023197197879198939, + "learning_rate": 9.875011705726699e-08, + "loss": 0.0, + "num_input_tokens_seen": 120181736, + "step": 178390 + }, + { + "epoch": 4.3582195294749955, + "grad_norm": 0.026410503312945366, + "learning_rate": 9.871316957429077e-08, + "loss": 0.0, + "num_input_tokens_seen": 120185000, + "step": 178395 + }, + { + "epoch": 4.358341680306843, + "grad_norm": 0.0004599327512551099, + "learning_rate": 9.867622864571445e-08, + "loss": 0.0, + "num_input_tokens_seen": 120188072, + "step": 178400 + }, + { + "epoch": 4.35846383113869, + "grad_norm": 0.006481677293777466, + "learning_rate": 9.863929427180706e-08, + "loss": 0.0, + "num_input_tokens_seen": 120191464, + "step": 178405 + }, + { + "epoch": 4.3585859819705375, + "grad_norm": 7.190836913650855e-05, + "learning_rate": 9.860236645283737e-08, + "loss": 0.0, + "num_input_tokens_seen": 120194920, + "step": 178410 + }, + { + "epoch": 4.358708132802384, + "grad_norm": 0.00027438238612376153, + "learning_rate": 9.856544518907362e-08, + "loss": 0.0536, + "num_input_tokens_seen": 120198376, + "step": 178415 + }, + { + "epoch": 4.358830283634232, + "grad_norm": 6.237047637114301e-05, + "learning_rate": 9.852853048078446e-08, + "loss": 0.0655, + "num_input_tokens_seen": 120201512, + "step": 178420 + }, + { + "epoch": 4.358952434466079, + "grad_norm": 0.00016815854178275913, + "learning_rate": 9.849162232823816e-08, + "loss": 0.0, + "num_input_tokens_seen": 120205608, + "step": 178425 + }, + { + "epoch": 4.359074585297926, + "grad_norm": 0.002402889309450984, + "learning_rate": 9.845472073170346e-08, + "loss": 0.0, + "num_input_tokens_seen": 120209128, + "step": 178430 + }, + { + "epoch": 4.359196736129773, + "grad_norm": 6.714063147228444e-06, + "learning_rate": 9.84178256914483e-08, + "loss": 0.0, + "num_input_tokens_seen": 120212520, + "step": 178435 + }, + { + "epoch": 4.359318886961621, + "grad_norm": 0.00024982349714264274, + "learning_rate": 9.83809372077412e-08, + "loss": 0.0, + "num_input_tokens_seen": 120215528, + "step": 178440 + }, + { + "epoch": 4.359441037793467, + "grad_norm": 0.00264274631626904, + "learning_rate": 9.834405528085066e-08, + "loss": 0.0, + "num_input_tokens_seen": 120219112, + "step": 178445 + }, + { + "epoch": 4.359563188625314, + "grad_norm": 3.579885378712788e-05, + "learning_rate": 9.830717991104443e-08, + "loss": 0.0, + "num_input_tokens_seen": 120222760, + "step": 178450 + }, + { + "epoch": 4.359685339457162, + "grad_norm": 0.0001969273725990206, + "learning_rate": 9.827031109859107e-08, + "loss": 0.0, + "num_input_tokens_seen": 120225832, + "step": 178455 + }, + { + "epoch": 4.3598074902890085, + "grad_norm": 0.0016006685327738523, + "learning_rate": 9.82334488437585e-08, + "loss": 0.0, + "num_input_tokens_seen": 120229096, + "step": 178460 + }, + { + "epoch": 4.359929641120856, + "grad_norm": 7.797037687851116e-05, + "learning_rate": 9.819659314681472e-08, + "loss": 0.0, + "num_input_tokens_seen": 120232552, + "step": 178465 + }, + { + "epoch": 4.360051791952703, + "grad_norm": 0.0005138301639817655, + "learning_rate": 9.815974400802807e-08, + "loss": 0.0, + "num_input_tokens_seen": 120236584, + "step": 178470 + }, + { + "epoch": 4.3601739427845505, + "grad_norm": 0.0052763535641133785, + "learning_rate": 9.812290142766622e-08, + "loss": 0.0, + "num_input_tokens_seen": 120239656, + "step": 178475 + }, + { + "epoch": 4.360296093616397, + "grad_norm": 0.0007992389146238565, + "learning_rate": 9.808606540599728e-08, + "loss": 0.0, + "num_input_tokens_seen": 120243112, + "step": 178480 + }, + { + "epoch": 4.360418244448245, + "grad_norm": 6.511791980301496e-06, + "learning_rate": 9.804923594328907e-08, + "loss": 0.0, + "num_input_tokens_seen": 120246568, + "step": 178485 + }, + { + "epoch": 4.360540395280092, + "grad_norm": 4.3418702261988074e-05, + "learning_rate": 9.801241303980934e-08, + "loss": 0.0011, + "num_input_tokens_seen": 120249960, + "step": 178490 + }, + { + "epoch": 4.360662546111939, + "grad_norm": 0.0003371757920831442, + "learning_rate": 9.7975596695826e-08, + "loss": 0.0, + "num_input_tokens_seen": 120253224, + "step": 178495 + }, + { + "epoch": 4.360784696943786, + "grad_norm": 0.0005492345080710948, + "learning_rate": 9.793878691160662e-08, + "loss": 0.0, + "num_input_tokens_seen": 120256360, + "step": 178500 + }, + { + "epoch": 4.360906847775634, + "grad_norm": 0.0005842145183123648, + "learning_rate": 9.79019836874192e-08, + "loss": 0.0, + "num_input_tokens_seen": 120259496, + "step": 178505 + }, + { + "epoch": 4.36102899860748, + "grad_norm": 8.086705202003941e-05, + "learning_rate": 9.786518702353097e-08, + "loss": 0.0, + "num_input_tokens_seen": 120263080, + "step": 178510 + }, + { + "epoch": 4.361151149439328, + "grad_norm": 4.444857040653005e-05, + "learning_rate": 9.782839692020994e-08, + "loss": 0.0, + "num_input_tokens_seen": 120267368, + "step": 178515 + }, + { + "epoch": 4.361273300271175, + "grad_norm": 0.09220793098211288, + "learning_rate": 9.779161337772323e-08, + "loss": 0.0001, + "num_input_tokens_seen": 120271144, + "step": 178520 + }, + { + "epoch": 4.361395451103022, + "grad_norm": 4.644213277060771e-06, + "learning_rate": 9.775483639633863e-08, + "loss": 0.0, + "num_input_tokens_seen": 120274024, + "step": 178525 + }, + { + "epoch": 4.361517601934869, + "grad_norm": 1.780526690708939e-05, + "learning_rate": 9.771806597632382e-08, + "loss": 0.0, + "num_input_tokens_seen": 120277480, + "step": 178530 + }, + { + "epoch": 4.361639752766717, + "grad_norm": 5.388813224271871e-05, + "learning_rate": 9.768130211794556e-08, + "loss": 0.0, + "num_input_tokens_seen": 120280424, + "step": 178535 + }, + { + "epoch": 4.3617619035985635, + "grad_norm": 1.0598834705888294e-05, + "learning_rate": 9.764454482147189e-08, + "loss": 0.0, + "num_input_tokens_seen": 120283304, + "step": 178540 + }, + { + "epoch": 4.36188405443041, + "grad_norm": 0.0008127638720907271, + "learning_rate": 9.760779408716946e-08, + "loss": 0.0, + "num_input_tokens_seen": 120287080, + "step": 178545 + }, + { + "epoch": 4.362006205262258, + "grad_norm": 0.0010838373564183712, + "learning_rate": 9.757104991530618e-08, + "loss": 0.0, + "num_input_tokens_seen": 120290728, + "step": 178550 + }, + { + "epoch": 4.362128356094105, + "grad_norm": 0.0002147614723071456, + "learning_rate": 9.753431230614873e-08, + "loss": 0.0001, + "num_input_tokens_seen": 120294504, + "step": 178555 + }, + { + "epoch": 4.362250506925952, + "grad_norm": 0.0016481346683576703, + "learning_rate": 9.749758125996444e-08, + "loss": 0.0435, + "num_input_tokens_seen": 120297832, + "step": 178560 + }, + { + "epoch": 4.362372657757799, + "grad_norm": 0.0002112577494699508, + "learning_rate": 9.746085677702065e-08, + "loss": 0.0, + "num_input_tokens_seen": 120301096, + "step": 178565 + }, + { + "epoch": 4.362494808589647, + "grad_norm": 0.000503468094393611, + "learning_rate": 9.742413885758416e-08, + "loss": 0.0, + "num_input_tokens_seen": 120304424, + "step": 178570 + }, + { + "epoch": 4.362616959421493, + "grad_norm": 0.0016552689485251904, + "learning_rate": 9.73874275019223e-08, + "loss": 0.0, + "num_input_tokens_seen": 120307688, + "step": 178575 + }, + { + "epoch": 4.362739110253341, + "grad_norm": 0.00026949195307679474, + "learning_rate": 9.735072271030165e-08, + "loss": 0.0, + "num_input_tokens_seen": 120311144, + "step": 178580 + }, + { + "epoch": 4.362861261085188, + "grad_norm": 0.00013682998542208225, + "learning_rate": 9.731402448298942e-08, + "loss": 0.0, + "num_input_tokens_seen": 120314216, + "step": 178585 + }, + { + "epoch": 4.362983411917035, + "grad_norm": 0.000252584955887869, + "learning_rate": 9.727733282025242e-08, + "loss": 0.0, + "num_input_tokens_seen": 120317800, + "step": 178590 + }, + { + "epoch": 4.363105562748882, + "grad_norm": 0.012169472873210907, + "learning_rate": 9.724064772235729e-08, + "loss": 0.0, + "num_input_tokens_seen": 120321576, + "step": 178595 + }, + { + "epoch": 4.36322771358073, + "grad_norm": 0.002420415636152029, + "learning_rate": 9.720396918957118e-08, + "loss": 0.0, + "num_input_tokens_seen": 120325096, + "step": 178600 + }, + { + "epoch": 4.3633498644125766, + "grad_norm": 4.463781078811735e-05, + "learning_rate": 9.716729722216055e-08, + "loss": 0.0, + "num_input_tokens_seen": 120328360, + "step": 178605 + }, + { + "epoch": 4.363472015244424, + "grad_norm": 0.00020588969346135855, + "learning_rate": 9.713063182039216e-08, + "loss": 0.0, + "num_input_tokens_seen": 120331496, + "step": 178610 + }, + { + "epoch": 4.363594166076271, + "grad_norm": 0.00028956696041859686, + "learning_rate": 9.709397298453259e-08, + "loss": 0.0, + "num_input_tokens_seen": 120334632, + "step": 178615 + }, + { + "epoch": 4.363716316908118, + "grad_norm": 0.0002490598999429494, + "learning_rate": 9.705732071484851e-08, + "loss": 0.0, + "num_input_tokens_seen": 120337832, + "step": 178620 + }, + { + "epoch": 4.363838467739965, + "grad_norm": 0.0003953216946683824, + "learning_rate": 9.70206750116066e-08, + "loss": 0.0, + "num_input_tokens_seen": 120341288, + "step": 178625 + }, + { + "epoch": 4.363960618571813, + "grad_norm": 0.0002987831539940089, + "learning_rate": 9.698403587507298e-08, + "loss": 0.0, + "num_input_tokens_seen": 120344616, + "step": 178630 + }, + { + "epoch": 4.36408276940366, + "grad_norm": 37.20766830444336, + "learning_rate": 9.694740330551465e-08, + "loss": 0.083, + "num_input_tokens_seen": 120347624, + "step": 178635 + }, + { + "epoch": 4.364204920235506, + "grad_norm": 1.8995802747667767e-05, + "learning_rate": 9.691077730319741e-08, + "loss": 0.0, + "num_input_tokens_seen": 120351336, + "step": 178640 + }, + { + "epoch": 4.364327071067354, + "grad_norm": 7.500431820517406e-06, + "learning_rate": 9.687415786838804e-08, + "loss": 0.0, + "num_input_tokens_seen": 120354664, + "step": 178645 + }, + { + "epoch": 4.364449221899201, + "grad_norm": 3.114115315838717e-05, + "learning_rate": 9.683754500135266e-08, + "loss": 0.0, + "num_input_tokens_seen": 120358184, + "step": 178650 + }, + { + "epoch": 4.3645713727310484, + "grad_norm": 0.0004918667254969478, + "learning_rate": 9.68009387023575e-08, + "loss": 0.0, + "num_input_tokens_seen": 120361192, + "step": 178655 + }, + { + "epoch": 4.364693523562895, + "grad_norm": 0.00686995405703783, + "learning_rate": 9.676433897166903e-08, + "loss": 0.0, + "num_input_tokens_seen": 120364328, + "step": 178660 + }, + { + "epoch": 4.364815674394743, + "grad_norm": 0.0003014057583641261, + "learning_rate": 9.6727745809553e-08, + "loss": 0.0, + "num_input_tokens_seen": 120367400, + "step": 178665 + }, + { + "epoch": 4.36493782522659, + "grad_norm": 4.572595935314894e-05, + "learning_rate": 9.669115921627602e-08, + "loss": 0.0, + "num_input_tokens_seen": 120370408, + "step": 178670 + }, + { + "epoch": 4.365059976058437, + "grad_norm": 2.4771761673036963e-05, + "learning_rate": 9.665457919210363e-08, + "loss": 0.0, + "num_input_tokens_seen": 120373672, + "step": 178675 + }, + { + "epoch": 4.365182126890284, + "grad_norm": 9.413428779225796e-05, + "learning_rate": 9.661800573730239e-08, + "loss": 0.0, + "num_input_tokens_seen": 120376744, + "step": 178680 + }, + { + "epoch": 4.365304277722132, + "grad_norm": 1.0507080332899932e-05, + "learning_rate": 9.658143885213776e-08, + "loss": 0.0, + "num_input_tokens_seen": 120380072, + "step": 178685 + }, + { + "epoch": 4.365426428553978, + "grad_norm": 0.00014622887829318643, + "learning_rate": 9.65448785368761e-08, + "loss": 0.0, + "num_input_tokens_seen": 120383272, + "step": 178690 + }, + { + "epoch": 4.365548579385826, + "grad_norm": 0.0007955287583172321, + "learning_rate": 9.650832479178283e-08, + "loss": 0.0, + "num_input_tokens_seen": 120386536, + "step": 178695 + }, + { + "epoch": 4.365670730217673, + "grad_norm": 0.00014913473569322377, + "learning_rate": 9.647177761712421e-08, + "loss": 0.0, + "num_input_tokens_seen": 120389928, + "step": 178700 + }, + { + "epoch": 4.36579288104952, + "grad_norm": 0.0032064183615148067, + "learning_rate": 9.643523701316591e-08, + "loss": 0.0, + "num_input_tokens_seen": 120393384, + "step": 178705 + }, + { + "epoch": 4.365915031881367, + "grad_norm": 0.00015113249537535012, + "learning_rate": 9.639870298017339e-08, + "loss": 0.0, + "num_input_tokens_seen": 120396456, + "step": 178710 + }, + { + "epoch": 4.366037182713214, + "grad_norm": 0.0001086229458451271, + "learning_rate": 9.636217551841253e-08, + "loss": 0.0, + "num_input_tokens_seen": 120399656, + "step": 178715 + }, + { + "epoch": 4.3661593335450615, + "grad_norm": 0.0002728144754655659, + "learning_rate": 9.632565462814923e-08, + "loss": 0.0, + "num_input_tokens_seen": 120402728, + "step": 178720 + }, + { + "epoch": 4.366281484376908, + "grad_norm": 8.51703662192449e-05, + "learning_rate": 9.628914030964863e-08, + "loss": 0.0, + "num_input_tokens_seen": 120405736, + "step": 178725 + }, + { + "epoch": 4.366403635208756, + "grad_norm": 0.003147940617054701, + "learning_rate": 9.625263256317661e-08, + "loss": 0.0, + "num_input_tokens_seen": 120409192, + "step": 178730 + }, + { + "epoch": 4.366525786040603, + "grad_norm": 0.0001306245248997584, + "learning_rate": 9.621613138899831e-08, + "loss": 0.0, + "num_input_tokens_seen": 120412520, + "step": 178735 + }, + { + "epoch": 4.36664793687245, + "grad_norm": 0.001566228223964572, + "learning_rate": 9.617963678737961e-08, + "loss": 0.0, + "num_input_tokens_seen": 120415656, + "step": 178740 + }, + { + "epoch": 4.366770087704297, + "grad_norm": 0.009327697567641735, + "learning_rate": 9.614314875858554e-08, + "loss": 0.0, + "num_input_tokens_seen": 120418856, + "step": 178745 + }, + { + "epoch": 4.366892238536145, + "grad_norm": 0.00037339047412388027, + "learning_rate": 9.610666730288152e-08, + "loss": 0.0, + "num_input_tokens_seen": 120422056, + "step": 178750 + }, + { + "epoch": 4.367014389367991, + "grad_norm": 0.00020661455346271396, + "learning_rate": 9.607019242053315e-08, + "loss": 0.0, + "num_input_tokens_seen": 120425384, + "step": 178755 + }, + { + "epoch": 4.367136540199839, + "grad_norm": 0.02003299444913864, + "learning_rate": 9.603372411180532e-08, + "loss": 0.0, + "num_input_tokens_seen": 120429160, + "step": 178760 + }, + { + "epoch": 4.367258691031686, + "grad_norm": 0.00031955703161656857, + "learning_rate": 9.599726237696359e-08, + "loss": 0.0, + "num_input_tokens_seen": 120432488, + "step": 178765 + }, + { + "epoch": 4.367380841863533, + "grad_norm": 4.561063906294294e-05, + "learning_rate": 9.596080721627264e-08, + "loss": 0.0, + "num_input_tokens_seen": 120435624, + "step": 178770 + }, + { + "epoch": 4.36750299269538, + "grad_norm": 8.527966201654635e-06, + "learning_rate": 9.592435862999793e-08, + "loss": 0.0, + "num_input_tokens_seen": 120439272, + "step": 178775 + }, + { + "epoch": 4.367625143527228, + "grad_norm": 0.00015384901780635118, + "learning_rate": 9.588791661840468e-08, + "loss": 0.0, + "num_input_tokens_seen": 120442280, + "step": 178780 + }, + { + "epoch": 4.3677472943590745, + "grad_norm": 0.00019443745259195566, + "learning_rate": 9.585148118175746e-08, + "loss": 0.0, + "num_input_tokens_seen": 120446248, + "step": 178785 + }, + { + "epoch": 4.367869445190922, + "grad_norm": 0.031488917768001556, + "learning_rate": 9.581505232032161e-08, + "loss": 0.0, + "num_input_tokens_seen": 120449064, + "step": 178790 + }, + { + "epoch": 4.367991596022769, + "grad_norm": 0.0008396423072554171, + "learning_rate": 9.577863003436182e-08, + "loss": 0.0, + "num_input_tokens_seen": 120452328, + "step": 178795 + }, + { + "epoch": 4.3681137468546165, + "grad_norm": 0.06349749118089676, + "learning_rate": 9.574221432414297e-08, + "loss": 0.0, + "num_input_tokens_seen": 120455208, + "step": 178800 + }, + { + "epoch": 4.368235897686463, + "grad_norm": 0.00028285704320296645, + "learning_rate": 9.57058051899301e-08, + "loss": 0.0, + "num_input_tokens_seen": 120458472, + "step": 178805 + }, + { + "epoch": 4.36835804851831, + "grad_norm": 0.003374557476490736, + "learning_rate": 9.566940263198764e-08, + "loss": 0.0, + "num_input_tokens_seen": 120462056, + "step": 178810 + }, + { + "epoch": 4.368480199350158, + "grad_norm": 0.00019568443531170487, + "learning_rate": 9.563300665058072e-08, + "loss": 0.0, + "num_input_tokens_seen": 120465640, + "step": 178815 + }, + { + "epoch": 4.368602350182004, + "grad_norm": 0.016990887001156807, + "learning_rate": 9.559661724597368e-08, + "loss": 0.0, + "num_input_tokens_seen": 120468520, + "step": 178820 + }, + { + "epoch": 4.368724501013852, + "grad_norm": 0.0021610939875245094, + "learning_rate": 9.556023441843142e-08, + "loss": 0.0, + "num_input_tokens_seen": 120471848, + "step": 178825 + }, + { + "epoch": 4.368846651845699, + "grad_norm": 0.0011525845620781183, + "learning_rate": 9.552385816821818e-08, + "loss": 0.0003, + "num_input_tokens_seen": 120475240, + "step": 178830 + }, + { + "epoch": 4.368968802677546, + "grad_norm": 0.0027562808245420456, + "learning_rate": 9.548748849559896e-08, + "loss": 0.0, + "num_input_tokens_seen": 120479208, + "step": 178835 + }, + { + "epoch": 4.369090953509393, + "grad_norm": 0.002062242478132248, + "learning_rate": 9.545112540083788e-08, + "loss": 0.0, + "num_input_tokens_seen": 120482152, + "step": 178840 + }, + { + "epoch": 4.369213104341241, + "grad_norm": 0.00020986916206311435, + "learning_rate": 9.541476888419942e-08, + "loss": 0.0, + "num_input_tokens_seen": 120485416, + "step": 178845 + }, + { + "epoch": 4.3693352551730875, + "grad_norm": 0.0021745937410742044, + "learning_rate": 9.537841894594823e-08, + "loss": 0.0, + "num_input_tokens_seen": 120488936, + "step": 178850 + }, + { + "epoch": 4.369457406004935, + "grad_norm": 0.00035431631840765476, + "learning_rate": 9.534207558634833e-08, + "loss": 0.0, + "num_input_tokens_seen": 120492584, + "step": 178855 + }, + { + "epoch": 4.369579556836782, + "grad_norm": 20.136585235595703, + "learning_rate": 9.53057388056644e-08, + "loss": 0.0931, + "num_input_tokens_seen": 120495272, + "step": 178860 + }, + { + "epoch": 4.3697017076686295, + "grad_norm": 0.00022699769760947675, + "learning_rate": 9.526940860416033e-08, + "loss": 0.0, + "num_input_tokens_seen": 120499560, + "step": 178865 + }, + { + "epoch": 4.369823858500476, + "grad_norm": 0.00014428774011321366, + "learning_rate": 9.523308498210036e-08, + "loss": 0.0, + "num_input_tokens_seen": 120502824, + "step": 178870 + }, + { + "epoch": 4.369946009332324, + "grad_norm": 0.0001790342212188989, + "learning_rate": 9.519676793974907e-08, + "loss": 0.0, + "num_input_tokens_seen": 120505896, + "step": 178875 + }, + { + "epoch": 4.370068160164171, + "grad_norm": 2.688972199393902e-05, + "learning_rate": 9.516045747736989e-08, + "loss": 0.0, + "num_input_tokens_seen": 120508840, + "step": 178880 + }, + { + "epoch": 4.370190310996018, + "grad_norm": 4.677814558817772e-06, + "learning_rate": 9.512415359522752e-08, + "loss": 0.0, + "num_input_tokens_seen": 120512040, + "step": 178885 + }, + { + "epoch": 4.370312461827865, + "grad_norm": 0.0043054185807704926, + "learning_rate": 9.508785629358552e-08, + "loss": 0.0, + "num_input_tokens_seen": 120515560, + "step": 178890 + }, + { + "epoch": 4.370434612659713, + "grad_norm": 3.436887345742434e-05, + "learning_rate": 9.50515655727081e-08, + "loss": 0.0, + "num_input_tokens_seen": 120519208, + "step": 178895 + }, + { + "epoch": 4.370556763491559, + "grad_norm": 0.0003368296311236918, + "learning_rate": 9.50152814328592e-08, + "loss": 0.0, + "num_input_tokens_seen": 120522344, + "step": 178900 + }, + { + "epoch": 4.370678914323406, + "grad_norm": 0.00037761012208648026, + "learning_rate": 9.497900387430236e-08, + "loss": 0.0, + "num_input_tokens_seen": 120525864, + "step": 178905 + }, + { + "epoch": 4.370801065155254, + "grad_norm": 0.001919149188324809, + "learning_rate": 9.494273289730181e-08, + "loss": 0.0, + "num_input_tokens_seen": 120529320, + "step": 178910 + }, + { + "epoch": 4.3709232159871005, + "grad_norm": 0.0013135827612131834, + "learning_rate": 9.490646850212103e-08, + "loss": 0.0, + "num_input_tokens_seen": 120532328, + "step": 178915 + }, + { + "epoch": 4.371045366818948, + "grad_norm": 1.2294826774450485e-05, + "learning_rate": 9.487021068902402e-08, + "loss": 0.0, + "num_input_tokens_seen": 120535976, + "step": 178920 + }, + { + "epoch": 4.371167517650795, + "grad_norm": 4.1211696952814236e-05, + "learning_rate": 9.483395945827399e-08, + "loss": 0.0, + "num_input_tokens_seen": 120539176, + "step": 178925 + }, + { + "epoch": 4.3712896684826426, + "grad_norm": 2.6847643312066793e-05, + "learning_rate": 9.479771481013488e-08, + "loss": 0.0, + "num_input_tokens_seen": 120542696, + "step": 178930 + }, + { + "epoch": 4.371411819314489, + "grad_norm": 0.0009733302285894752, + "learning_rate": 9.476147674487056e-08, + "loss": 0.0, + "num_input_tokens_seen": 120546152, + "step": 178935 + }, + { + "epoch": 4.371533970146337, + "grad_norm": 0.00021564547205343843, + "learning_rate": 9.472524526274394e-08, + "loss": 0.0, + "num_input_tokens_seen": 120549800, + "step": 178940 + }, + { + "epoch": 4.371656120978184, + "grad_norm": 0.0003852611407637596, + "learning_rate": 9.468902036401916e-08, + "loss": 0.0, + "num_input_tokens_seen": 120553448, + "step": 178945 + }, + { + "epoch": 4.371778271810031, + "grad_norm": 0.00020964071154594421, + "learning_rate": 9.465280204895909e-08, + "loss": 0.0, + "num_input_tokens_seen": 120556712, + "step": 178950 + }, + { + "epoch": 4.371900422641878, + "grad_norm": 0.00012947864888701588, + "learning_rate": 9.461659031782742e-08, + "loss": 0.0, + "num_input_tokens_seen": 120560168, + "step": 178955 + }, + { + "epoch": 4.372022573473726, + "grad_norm": 0.00023771810811012983, + "learning_rate": 9.45803851708874e-08, + "loss": 0.0, + "num_input_tokens_seen": 120563304, + "step": 178960 + }, + { + "epoch": 4.372144724305572, + "grad_norm": 0.0017591394716873765, + "learning_rate": 9.454418660840225e-08, + "loss": 0.0, + "num_input_tokens_seen": 120566504, + "step": 178965 + }, + { + "epoch": 4.37226687513742, + "grad_norm": 0.007710427511483431, + "learning_rate": 9.450799463063552e-08, + "loss": 0.0, + "num_input_tokens_seen": 120570152, + "step": 178970 + }, + { + "epoch": 4.372389025969267, + "grad_norm": 0.04205526411533356, + "learning_rate": 9.447180923785004e-08, + "loss": 0.0, + "num_input_tokens_seen": 120573800, + "step": 178975 + }, + { + "epoch": 4.372511176801114, + "grad_norm": 0.0001873615983640775, + "learning_rate": 9.443563043030922e-08, + "loss": 0.0, + "num_input_tokens_seen": 120577576, + "step": 178980 + }, + { + "epoch": 4.372633327632961, + "grad_norm": 2.1078389181639068e-05, + "learning_rate": 9.4399458208276e-08, + "loss": 0.0, + "num_input_tokens_seen": 120581288, + "step": 178985 + }, + { + "epoch": 4.372755478464808, + "grad_norm": 0.0005454168422147632, + "learning_rate": 9.436329257201359e-08, + "loss": 0.0, + "num_input_tokens_seen": 120584488, + "step": 178990 + }, + { + "epoch": 4.372877629296656, + "grad_norm": 0.0002987706393469125, + "learning_rate": 9.432713352178479e-08, + "loss": 0.0, + "num_input_tokens_seen": 120587688, + "step": 178995 + }, + { + "epoch": 4.372999780128502, + "grad_norm": 6.870987999718636e-05, + "learning_rate": 9.429098105785283e-08, + "loss": 0.0, + "num_input_tokens_seen": 120590888, + "step": 179000 + }, + { + "epoch": 4.37312193096035, + "grad_norm": 0.005530540365725756, + "learning_rate": 9.425483518048028e-08, + "loss": 0.0, + "num_input_tokens_seen": 120594152, + "step": 179005 + }, + { + "epoch": 4.373244081792197, + "grad_norm": 0.00047069930587895215, + "learning_rate": 9.421869588993025e-08, + "loss": 0.0, + "num_input_tokens_seen": 120597352, + "step": 179010 + }, + { + "epoch": 4.373366232624044, + "grad_norm": 0.00013837346341460943, + "learning_rate": 9.418256318646567e-08, + "loss": 0.0, + "num_input_tokens_seen": 120600616, + "step": 179015 + }, + { + "epoch": 4.373488383455891, + "grad_norm": 0.0004524173273239285, + "learning_rate": 9.414643707034886e-08, + "loss": 0.0, + "num_input_tokens_seen": 120603880, + "step": 179020 + }, + { + "epoch": 4.373610534287739, + "grad_norm": 0.0004959152429364622, + "learning_rate": 9.411031754184285e-08, + "loss": 0.0, + "num_input_tokens_seen": 120607656, + "step": 179025 + }, + { + "epoch": 4.3737326851195855, + "grad_norm": 0.00014898374502081424, + "learning_rate": 9.40742046012104e-08, + "loss": 0.0, + "num_input_tokens_seen": 120610920, + "step": 179030 + }, + { + "epoch": 4.373854835951433, + "grad_norm": 0.005233935080468655, + "learning_rate": 9.403809824871378e-08, + "loss": 0.0, + "num_input_tokens_seen": 120614440, + "step": 179035 + }, + { + "epoch": 4.37397698678328, + "grad_norm": 0.0002437441289657727, + "learning_rate": 9.400199848461598e-08, + "loss": 0.0, + "num_input_tokens_seen": 120617832, + "step": 179040 + }, + { + "epoch": 4.3740991376151275, + "grad_norm": 5.977055479888804e-05, + "learning_rate": 9.396590530917925e-08, + "loss": 0.0, + "num_input_tokens_seen": 120621416, + "step": 179045 + }, + { + "epoch": 4.374221288446974, + "grad_norm": 0.0006878579151816666, + "learning_rate": 9.392981872266626e-08, + "loss": 0.0, + "num_input_tokens_seen": 120625192, + "step": 179050 + }, + { + "epoch": 4.374343439278822, + "grad_norm": 1.9039856852032244e-05, + "learning_rate": 9.389373872533912e-08, + "loss": 0.0, + "num_input_tokens_seen": 120628328, + "step": 179055 + }, + { + "epoch": 4.374465590110669, + "grad_norm": 0.4868165850639343, + "learning_rate": 9.385766531746053e-08, + "loss": 0.0001, + "num_input_tokens_seen": 120631528, + "step": 179060 + }, + { + "epoch": 4.374587740942516, + "grad_norm": 10.62925910949707, + "learning_rate": 9.382159849929284e-08, + "loss": 0.0302, + "num_input_tokens_seen": 120634856, + "step": 179065 + }, + { + "epoch": 4.374709891774363, + "grad_norm": 0.0009652128792367876, + "learning_rate": 9.378553827109803e-08, + "loss": 0.0, + "num_input_tokens_seen": 120637928, + "step": 179070 + }, + { + "epoch": 4.37483204260621, + "grad_norm": 7.944925891933963e-05, + "learning_rate": 9.37494846331387e-08, + "loss": 0.0, + "num_input_tokens_seen": 120641064, + "step": 179075 + }, + { + "epoch": 4.374954193438057, + "grad_norm": 3.24042011925485e-05, + "learning_rate": 9.371343758567663e-08, + "loss": 0.0, + "num_input_tokens_seen": 120644136, + "step": 179080 + }, + { + "epoch": 4.375076344269904, + "grad_norm": 0.000675307703204453, + "learning_rate": 9.367739712897426e-08, + "loss": 0.0, + "num_input_tokens_seen": 120647592, + "step": 179085 + }, + { + "epoch": 4.375198495101752, + "grad_norm": 0.0001943126117112115, + "learning_rate": 9.364136326329386e-08, + "loss": 0.0, + "num_input_tokens_seen": 120650792, + "step": 179090 + }, + { + "epoch": 4.3753206459335985, + "grad_norm": 63.127132415771484, + "learning_rate": 9.360533598889708e-08, + "loss": 0.0913, + "num_input_tokens_seen": 120654120, + "step": 179095 + }, + { + "epoch": 4.375442796765446, + "grad_norm": 2.2227972294786014e-05, + "learning_rate": 9.356931530604617e-08, + "loss": 0.0555, + "num_input_tokens_seen": 120657256, + "step": 179100 + }, + { + "epoch": 4.375564947597293, + "grad_norm": 5.614551264443435e-05, + "learning_rate": 9.353330121500302e-08, + "loss": 0.0, + "num_input_tokens_seen": 120660392, + "step": 179105 + }, + { + "epoch": 4.3756870984291405, + "grad_norm": 0.0001968853030120954, + "learning_rate": 9.349729371602944e-08, + "loss": 0.0, + "num_input_tokens_seen": 120663720, + "step": 179110 + }, + { + "epoch": 4.375809249260987, + "grad_norm": 0.00011514556535985321, + "learning_rate": 9.346129280938742e-08, + "loss": 0.0, + "num_input_tokens_seen": 120667176, + "step": 179115 + }, + { + "epoch": 4.375931400092835, + "grad_norm": 0.00011827262642327696, + "learning_rate": 9.342529849533853e-08, + "loss": 0.0, + "num_input_tokens_seen": 120670888, + "step": 179120 + }, + { + "epoch": 4.376053550924682, + "grad_norm": 0.002016911981627345, + "learning_rate": 9.338931077414492e-08, + "loss": 0.0, + "num_input_tokens_seen": 120674280, + "step": 179125 + }, + { + "epoch": 4.376175701756529, + "grad_norm": 0.0008704157080501318, + "learning_rate": 9.335332964606802e-08, + "loss": 0.0, + "num_input_tokens_seen": 120677608, + "step": 179130 + }, + { + "epoch": 4.376297852588376, + "grad_norm": 0.0019470416009426117, + "learning_rate": 9.331735511136962e-08, + "loss": 0.0, + "num_input_tokens_seen": 120681064, + "step": 179135 + }, + { + "epoch": 4.376420003420224, + "grad_norm": 2.9816957976436242e-05, + "learning_rate": 9.328138717031109e-08, + "loss": 0.0, + "num_input_tokens_seen": 120684328, + "step": 179140 + }, + { + "epoch": 4.37654215425207, + "grad_norm": 4.998807344236411e-05, + "learning_rate": 9.324542582315442e-08, + "loss": 0.0489, + "num_input_tokens_seen": 120687400, + "step": 179145 + }, + { + "epoch": 4.376664305083918, + "grad_norm": 0.007195464801043272, + "learning_rate": 9.320947107016074e-08, + "loss": 0.0, + "num_input_tokens_seen": 120690600, + "step": 179150 + }, + { + "epoch": 4.376786455915765, + "grad_norm": 0.0022986922413110733, + "learning_rate": 9.317352291159164e-08, + "loss": 0.0, + "num_input_tokens_seen": 120694312, + "step": 179155 + }, + { + "epoch": 4.376908606747612, + "grad_norm": 0.00016899174079298973, + "learning_rate": 9.313758134770877e-08, + "loss": 0.0, + "num_input_tokens_seen": 120697576, + "step": 179160 + }, + { + "epoch": 4.377030757579459, + "grad_norm": 0.02144761197268963, + "learning_rate": 9.310164637877316e-08, + "loss": 0.0, + "num_input_tokens_seen": 120701160, + "step": 179165 + }, + { + "epoch": 4.377152908411306, + "grad_norm": 0.0012190442066639662, + "learning_rate": 9.306571800504648e-08, + "loss": 0.0, + "num_input_tokens_seen": 120705000, + "step": 179170 + }, + { + "epoch": 4.3772750592431535, + "grad_norm": 0.2229158878326416, + "learning_rate": 9.302979622678964e-08, + "loss": 0.0001, + "num_input_tokens_seen": 120708264, + "step": 179175 + }, + { + "epoch": 4.377397210075, + "grad_norm": 5.557996337302029e-05, + "learning_rate": 9.299388104426409e-08, + "loss": 0.0, + "num_input_tokens_seen": 120711400, + "step": 179180 + }, + { + "epoch": 4.377519360906848, + "grad_norm": 8.016775973374024e-05, + "learning_rate": 9.295797245773119e-08, + "loss": 0.0, + "num_input_tokens_seen": 120714344, + "step": 179185 + }, + { + "epoch": 4.377641511738695, + "grad_norm": 0.00011706854274962097, + "learning_rate": 9.29220704674516e-08, + "loss": 0.0, + "num_input_tokens_seen": 120717352, + "step": 179190 + }, + { + "epoch": 4.377763662570542, + "grad_norm": 0.004367952700704336, + "learning_rate": 9.288617507368701e-08, + "loss": 0.0, + "num_input_tokens_seen": 120720488, + "step": 179195 + }, + { + "epoch": 4.377885813402389, + "grad_norm": 0.0011716161388903856, + "learning_rate": 9.285028627669789e-08, + "loss": 0.0, + "num_input_tokens_seen": 120723752, + "step": 179200 + }, + { + "epoch": 4.378007964234237, + "grad_norm": 0.000715529196895659, + "learning_rate": 9.281440407674556e-08, + "loss": 0.0, + "num_input_tokens_seen": 120726888, + "step": 179205 + }, + { + "epoch": 4.378130115066083, + "grad_norm": 0.0004990265006199479, + "learning_rate": 9.277852847409107e-08, + "loss": 0.0, + "num_input_tokens_seen": 120730536, + "step": 179210 + }, + { + "epoch": 4.378252265897931, + "grad_norm": 3.416829349589534e-05, + "learning_rate": 9.274265946899484e-08, + "loss": 0.0, + "num_input_tokens_seen": 120734504, + "step": 179215 + }, + { + "epoch": 4.378374416729778, + "grad_norm": 0.00046856681001372635, + "learning_rate": 9.270679706171825e-08, + "loss": 0.0, + "num_input_tokens_seen": 120737448, + "step": 179220 + }, + { + "epoch": 4.378496567561625, + "grad_norm": 0.00010078339255414903, + "learning_rate": 9.267094125252161e-08, + "loss": 0.0001, + "num_input_tokens_seen": 120740328, + "step": 179225 + }, + { + "epoch": 4.378618718393472, + "grad_norm": 2.9010152502451092e-05, + "learning_rate": 9.26350920416662e-08, + "loss": 0.0, + "num_input_tokens_seen": 120743592, + "step": 179230 + }, + { + "epoch": 4.37874086922532, + "grad_norm": 3.963453309552278e-06, + "learning_rate": 9.259924942941222e-08, + "loss": 0.0, + "num_input_tokens_seen": 120746856, + "step": 179235 + }, + { + "epoch": 4.3788630200571665, + "grad_norm": 0.0013897567987442017, + "learning_rate": 9.256341341602059e-08, + "loss": 0.0003, + "num_input_tokens_seen": 120750504, + "step": 179240 + }, + { + "epoch": 4.378985170889013, + "grad_norm": 0.00011837323108920828, + "learning_rate": 9.25275840017521e-08, + "loss": 0.0, + "num_input_tokens_seen": 120754344, + "step": 179245 + }, + { + "epoch": 4.379107321720861, + "grad_norm": 0.00936560146510601, + "learning_rate": 9.249176118686686e-08, + "loss": 0.0, + "num_input_tokens_seen": 120757736, + "step": 179250 + }, + { + "epoch": 4.379229472552708, + "grad_norm": 7.882525824243203e-05, + "learning_rate": 9.245594497162579e-08, + "loss": 0.0, + "num_input_tokens_seen": 120761064, + "step": 179255 + }, + { + "epoch": 4.379351623384555, + "grad_norm": 0.0018617367604747415, + "learning_rate": 9.242013535628901e-08, + "loss": 0.0, + "num_input_tokens_seen": 120764456, + "step": 179260 + }, + { + "epoch": 4.379473774216402, + "grad_norm": 0.00010539888171479106, + "learning_rate": 9.238433234111731e-08, + "loss": 0.0, + "num_input_tokens_seen": 120767656, + "step": 179265 + }, + { + "epoch": 4.37959592504825, + "grad_norm": 0.0016833314439281821, + "learning_rate": 9.234853592637071e-08, + "loss": 0.0, + "num_input_tokens_seen": 120770792, + "step": 179270 + }, + { + "epoch": 4.379718075880096, + "grad_norm": 0.000337698234943673, + "learning_rate": 9.231274611230965e-08, + "loss": 0.084, + "num_input_tokens_seen": 120773992, + "step": 179275 + }, + { + "epoch": 4.379840226711944, + "grad_norm": 0.0006232039304450154, + "learning_rate": 9.227696289919462e-08, + "loss": 0.0, + "num_input_tokens_seen": 120777128, + "step": 179280 + }, + { + "epoch": 4.379962377543791, + "grad_norm": 0.0005635755369439721, + "learning_rate": 9.22411862872855e-08, + "loss": 0.0, + "num_input_tokens_seen": 120780264, + "step": 179285 + }, + { + "epoch": 4.380084528375638, + "grad_norm": 0.0004072504525538534, + "learning_rate": 9.220541627684286e-08, + "loss": 0.0, + "num_input_tokens_seen": 120783848, + "step": 179290 + }, + { + "epoch": 4.380206679207485, + "grad_norm": 0.0003334445063956082, + "learning_rate": 9.216965286812628e-08, + "loss": 0.0, + "num_input_tokens_seen": 120787304, + "step": 179295 + }, + { + "epoch": 4.380328830039333, + "grad_norm": 0.0065982588566839695, + "learning_rate": 9.213389606139643e-08, + "loss": 0.0182, + "num_input_tokens_seen": 120791016, + "step": 179300 + }, + { + "epoch": 4.38045098087118, + "grad_norm": 0.00016611551109235734, + "learning_rate": 9.2098145856913e-08, + "loss": 0.0001, + "num_input_tokens_seen": 120794344, + "step": 179305 + }, + { + "epoch": 4.380573131703027, + "grad_norm": 5.830096051795408e-05, + "learning_rate": 9.206240225493611e-08, + "loss": 0.0, + "num_input_tokens_seen": 120797736, + "step": 179310 + }, + { + "epoch": 4.380695282534874, + "grad_norm": 0.00011630048538791016, + "learning_rate": 9.202666525572545e-08, + "loss": 0.0, + "num_input_tokens_seen": 120800936, + "step": 179315 + }, + { + "epoch": 4.380817433366722, + "grad_norm": 0.0007063053781166673, + "learning_rate": 9.199093485954123e-08, + "loss": 0.0, + "num_input_tokens_seen": 120804840, + "step": 179320 + }, + { + "epoch": 4.380939584198568, + "grad_norm": 3.338679744047113e-05, + "learning_rate": 9.195521106664328e-08, + "loss": 0.0, + "num_input_tokens_seen": 120808168, + "step": 179325 + }, + { + "epoch": 4.381061735030416, + "grad_norm": 0.0004898302140645683, + "learning_rate": 9.191949387729103e-08, + "loss": 0.0, + "num_input_tokens_seen": 120811496, + "step": 179330 + }, + { + "epoch": 4.381183885862263, + "grad_norm": 0.0033150261733680964, + "learning_rate": 9.188378329174451e-08, + "loss": 0.0, + "num_input_tokens_seen": 120814760, + "step": 179335 + }, + { + "epoch": 4.3813060366941095, + "grad_norm": 6.164521892060293e-06, + "learning_rate": 9.184807931026351e-08, + "loss": 0.0, + "num_input_tokens_seen": 120818472, + "step": 179340 + }, + { + "epoch": 4.381428187525957, + "grad_norm": 0.0019105395767837763, + "learning_rate": 9.181238193310736e-08, + "loss": 0.0, + "num_input_tokens_seen": 120821352, + "step": 179345 + }, + { + "epoch": 4.381550338357804, + "grad_norm": 0.0002401101082796231, + "learning_rate": 9.177669116053599e-08, + "loss": 0.0, + "num_input_tokens_seen": 120824488, + "step": 179350 + }, + { + "epoch": 4.3816724891896515, + "grad_norm": 0.00011806233669631183, + "learning_rate": 9.174100699280862e-08, + "loss": 0.0, + "num_input_tokens_seen": 120827752, + "step": 179355 + }, + { + "epoch": 4.381794640021498, + "grad_norm": 0.017286479473114014, + "learning_rate": 9.170532943018517e-08, + "loss": 0.0302, + "num_input_tokens_seen": 120831656, + "step": 179360 + }, + { + "epoch": 4.381916790853346, + "grad_norm": 0.0006702401442453265, + "learning_rate": 9.166965847292474e-08, + "loss": 0.0, + "num_input_tokens_seen": 120834792, + "step": 179365 + }, + { + "epoch": 4.382038941685193, + "grad_norm": 0.0017308936221525073, + "learning_rate": 9.163399412128681e-08, + "loss": 0.0, + "num_input_tokens_seen": 120839272, + "step": 179370 + }, + { + "epoch": 4.38216109251704, + "grad_norm": 0.00014859286602586508, + "learning_rate": 9.159833637553094e-08, + "loss": 0.0, + "num_input_tokens_seen": 120842600, + "step": 179375 + }, + { + "epoch": 4.382283243348887, + "grad_norm": 0.0006171285640448332, + "learning_rate": 9.156268523591615e-08, + "loss": 0.0, + "num_input_tokens_seen": 120845864, + "step": 179380 + }, + { + "epoch": 4.382405394180735, + "grad_norm": 0.0025407453067600727, + "learning_rate": 9.152704070270201e-08, + "loss": 0.0, + "num_input_tokens_seen": 120849192, + "step": 179385 + }, + { + "epoch": 4.382527545012581, + "grad_norm": 0.0007126021664589643, + "learning_rate": 9.149140277614742e-08, + "loss": 0.0, + "num_input_tokens_seen": 120853096, + "step": 179390 + }, + { + "epoch": 4.382649695844429, + "grad_norm": 0.0014084700960665941, + "learning_rate": 9.145577145651173e-08, + "loss": 0.0, + "num_input_tokens_seen": 120856360, + "step": 179395 + }, + { + "epoch": 4.382771846676276, + "grad_norm": 0.00025574007304385304, + "learning_rate": 9.142014674405418e-08, + "loss": 0.0, + "num_input_tokens_seen": 120860072, + "step": 179400 + }, + { + "epoch": 4.382893997508123, + "grad_norm": 0.041199006140232086, + "learning_rate": 9.138452863903357e-08, + "loss": 0.0, + "num_input_tokens_seen": 120863016, + "step": 179405 + }, + { + "epoch": 4.38301614833997, + "grad_norm": 0.0009356053778901696, + "learning_rate": 9.134891714170911e-08, + "loss": 0.0, + "num_input_tokens_seen": 120866280, + "step": 179410 + }, + { + "epoch": 4.383138299171818, + "grad_norm": 0.020888380706310272, + "learning_rate": 9.131331225233985e-08, + "loss": 0.0, + "num_input_tokens_seen": 120869800, + "step": 179415 + }, + { + "epoch": 4.3832604500036645, + "grad_norm": 0.00015980943862814456, + "learning_rate": 9.127771397118434e-08, + "loss": 0.0, + "num_input_tokens_seen": 120872552, + "step": 179420 + }, + { + "epoch": 4.383382600835512, + "grad_norm": 0.0007625825819559395, + "learning_rate": 9.124212229850192e-08, + "loss": 0.0, + "num_input_tokens_seen": 120876200, + "step": 179425 + }, + { + "epoch": 4.383504751667359, + "grad_norm": 0.032647211104631424, + "learning_rate": 9.120653723455108e-08, + "loss": 0.0, + "num_input_tokens_seen": 120879016, + "step": 179430 + }, + { + "epoch": 4.383626902499206, + "grad_norm": 6.470428343163803e-05, + "learning_rate": 9.117095877959091e-08, + "loss": 0.0, + "num_input_tokens_seen": 120882408, + "step": 179435 + }, + { + "epoch": 4.383749053331053, + "grad_norm": 8.53855672175996e-05, + "learning_rate": 9.113538693387967e-08, + "loss": 0.0, + "num_input_tokens_seen": 120885608, + "step": 179440 + }, + { + "epoch": 4.3838712041629, + "grad_norm": 0.00017426212434656918, + "learning_rate": 9.109982169767671e-08, + "loss": 0.0, + "num_input_tokens_seen": 120888680, + "step": 179445 + }, + { + "epoch": 4.383993354994748, + "grad_norm": 0.02529120445251465, + "learning_rate": 9.106426307124004e-08, + "loss": 0.0, + "num_input_tokens_seen": 120891944, + "step": 179450 + }, + { + "epoch": 4.384115505826594, + "grad_norm": 3.9343041862593964e-05, + "learning_rate": 9.102871105482868e-08, + "loss": 0.0, + "num_input_tokens_seen": 120895336, + "step": 179455 + }, + { + "epoch": 4.384237656658442, + "grad_norm": 0.00017238286091014743, + "learning_rate": 9.099316564870086e-08, + "loss": 0.0, + "num_input_tokens_seen": 120898664, + "step": 179460 + }, + { + "epoch": 4.384359807490289, + "grad_norm": 24.275854110717773, + "learning_rate": 9.095762685311526e-08, + "loss": 0.058, + "num_input_tokens_seen": 120901800, + "step": 179465 + }, + { + "epoch": 4.384481958322136, + "grad_norm": 0.018933162093162537, + "learning_rate": 9.092209466833046e-08, + "loss": 0.0, + "num_input_tokens_seen": 120904808, + "step": 179470 + }, + { + "epoch": 4.384604109153983, + "grad_norm": 4.2884777940344065e-05, + "learning_rate": 9.088656909460446e-08, + "loss": 0.0, + "num_input_tokens_seen": 120908264, + "step": 179475 + }, + { + "epoch": 4.384726259985831, + "grad_norm": 0.00044881939538754523, + "learning_rate": 9.085105013219607e-08, + "loss": 0.0, + "num_input_tokens_seen": 120911272, + "step": 179480 + }, + { + "epoch": 4.3848484108176775, + "grad_norm": 0.0136734489351511, + "learning_rate": 9.08155377813633e-08, + "loss": 0.0, + "num_input_tokens_seen": 120914664, + "step": 179485 + }, + { + "epoch": 4.384970561649525, + "grad_norm": 0.0005159003776498139, + "learning_rate": 9.07800320423644e-08, + "loss": 0.0, + "num_input_tokens_seen": 120917992, + "step": 179490 + }, + { + "epoch": 4.385092712481372, + "grad_norm": 8.528042963007465e-05, + "learning_rate": 9.074453291545781e-08, + "loss": 0.0, + "num_input_tokens_seen": 120921192, + "step": 179495 + }, + { + "epoch": 4.3852148633132195, + "grad_norm": 6.457211566157639e-05, + "learning_rate": 9.070904040090132e-08, + "loss": 0.0, + "num_input_tokens_seen": 120924328, + "step": 179500 + }, + { + "epoch": 4.385337014145066, + "grad_norm": 3.3956439438043162e-06, + "learning_rate": 9.067355449895352e-08, + "loss": 0.0, + "num_input_tokens_seen": 120928104, + "step": 179505 + }, + { + "epoch": 4.385459164976914, + "grad_norm": 0.0062750293873250484, + "learning_rate": 9.063807520987199e-08, + "loss": 0.0, + "num_input_tokens_seen": 120931304, + "step": 179510 + }, + { + "epoch": 4.385581315808761, + "grad_norm": 0.0009548944071866572, + "learning_rate": 9.060260253391517e-08, + "loss": 0.0, + "num_input_tokens_seen": 120934568, + "step": 179515 + }, + { + "epoch": 4.385703466640608, + "grad_norm": 0.00014670187374576926, + "learning_rate": 9.056713647134084e-08, + "loss": 0.0, + "num_input_tokens_seen": 120937832, + "step": 179520 + }, + { + "epoch": 4.385825617472455, + "grad_norm": 0.002957995282486081, + "learning_rate": 9.053167702240672e-08, + "loss": 0.0, + "num_input_tokens_seen": 120940904, + "step": 179525 + }, + { + "epoch": 4.385947768304302, + "grad_norm": 0.055948711931705475, + "learning_rate": 9.049622418737102e-08, + "loss": 0.0, + "num_input_tokens_seen": 120944808, + "step": 179530 + }, + { + "epoch": 4.386069919136149, + "grad_norm": 0.0007765132468193769, + "learning_rate": 9.046077796649121e-08, + "loss": 0.0, + "num_input_tokens_seen": 120948200, + "step": 179535 + }, + { + "epoch": 4.386192069967996, + "grad_norm": 0.0014981003478169441, + "learning_rate": 9.042533836002541e-08, + "loss": 0.0, + "num_input_tokens_seen": 120951912, + "step": 179540 + }, + { + "epoch": 4.386314220799844, + "grad_norm": 0.0016036713495850563, + "learning_rate": 9.03899053682311e-08, + "loss": 0.0, + "num_input_tokens_seen": 120955496, + "step": 179545 + }, + { + "epoch": 4.3864363716316905, + "grad_norm": 0.0004156556387897581, + "learning_rate": 9.035447899136617e-08, + "loss": 0.0, + "num_input_tokens_seen": 120959336, + "step": 179550 + }, + { + "epoch": 4.386558522463538, + "grad_norm": 0.00033517173142172396, + "learning_rate": 9.031905922968797e-08, + "loss": 0.0, + "num_input_tokens_seen": 120963112, + "step": 179555 + }, + { + "epoch": 4.386680673295385, + "grad_norm": 5.0665068556554615e-05, + "learning_rate": 9.02836460834543e-08, + "loss": 0.0, + "num_input_tokens_seen": 120966376, + "step": 179560 + }, + { + "epoch": 4.3868028241272325, + "grad_norm": 0.00043228318099863827, + "learning_rate": 9.024823955292271e-08, + "loss": 0.0, + "num_input_tokens_seen": 120969896, + "step": 179565 + }, + { + "epoch": 4.386924974959079, + "grad_norm": 0.0023719624150544405, + "learning_rate": 9.021283963835058e-08, + "loss": 0.0, + "num_input_tokens_seen": 120973608, + "step": 179570 + }, + { + "epoch": 4.387047125790927, + "grad_norm": 0.00037452831747941673, + "learning_rate": 9.017744633999547e-08, + "loss": 0.0, + "num_input_tokens_seen": 120977000, + "step": 179575 + }, + { + "epoch": 4.387169276622774, + "grad_norm": 0.0002062133135041222, + "learning_rate": 9.01420596581145e-08, + "loss": 0.0, + "num_input_tokens_seen": 120980584, + "step": 179580 + }, + { + "epoch": 4.387291427454621, + "grad_norm": 0.0007104614633135498, + "learning_rate": 9.010667959296526e-08, + "loss": 0.0, + "num_input_tokens_seen": 120984168, + "step": 179585 + }, + { + "epoch": 4.387413578286468, + "grad_norm": 0.00010111812298418954, + "learning_rate": 9.007130614480507e-08, + "loss": 0.0, + "num_input_tokens_seen": 120987560, + "step": 179590 + }, + { + "epoch": 4.387535729118316, + "grad_norm": 0.0004167421138845384, + "learning_rate": 9.003593931389087e-08, + "loss": 0.0, + "num_input_tokens_seen": 120990696, + "step": 179595 + }, + { + "epoch": 4.387657879950162, + "grad_norm": 0.0004182478878647089, + "learning_rate": 9.000057910048042e-08, + "loss": 0.0, + "num_input_tokens_seen": 120993960, + "step": 179600 + }, + { + "epoch": 4.387780030782009, + "grad_norm": 0.0013876301236450672, + "learning_rate": 8.996522550483021e-08, + "loss": 0.0, + "num_input_tokens_seen": 120997224, + "step": 179605 + }, + { + "epoch": 4.387902181613857, + "grad_norm": 4.902793807559647e-05, + "learning_rate": 8.99298785271978e-08, + "loss": 0.0, + "num_input_tokens_seen": 121000680, + "step": 179610 + }, + { + "epoch": 4.388024332445704, + "grad_norm": 0.00018190659466199577, + "learning_rate": 8.989453816783998e-08, + "loss": 0.0, + "num_input_tokens_seen": 121003752, + "step": 179615 + }, + { + "epoch": 4.388146483277551, + "grad_norm": 4.526587417785777e-06, + "learning_rate": 8.985920442701411e-08, + "loss": 0.0, + "num_input_tokens_seen": 121007144, + "step": 179620 + }, + { + "epoch": 4.388268634109398, + "grad_norm": 0.002543956506997347, + "learning_rate": 8.982387730497665e-08, + "loss": 0.0001, + "num_input_tokens_seen": 121010152, + "step": 179625 + }, + { + "epoch": 4.388390784941246, + "grad_norm": 0.004739391151815653, + "learning_rate": 8.978855680198494e-08, + "loss": 0.0, + "num_input_tokens_seen": 121013288, + "step": 179630 + }, + { + "epoch": 4.388512935773092, + "grad_norm": 7.183388515841216e-05, + "learning_rate": 8.975324291829567e-08, + "loss": 0.0, + "num_input_tokens_seen": 121016680, + "step": 179635 + }, + { + "epoch": 4.38863508660494, + "grad_norm": 1.286260703636799e-05, + "learning_rate": 8.971793565416541e-08, + "loss": 0.0214, + "num_input_tokens_seen": 121020072, + "step": 179640 + }, + { + "epoch": 4.388757237436787, + "grad_norm": 0.0037381022702902555, + "learning_rate": 8.96826350098513e-08, + "loss": 0.0, + "num_input_tokens_seen": 121023336, + "step": 179645 + }, + { + "epoch": 4.388879388268634, + "grad_norm": 2.0466199202928692e-05, + "learning_rate": 8.964734098561e-08, + "loss": 0.0, + "num_input_tokens_seen": 121026664, + "step": 179650 + }, + { + "epoch": 4.389001539100481, + "grad_norm": 0.0005669654929079115, + "learning_rate": 8.961205358169788e-08, + "loss": 0.0001, + "num_input_tokens_seen": 121029672, + "step": 179655 + }, + { + "epoch": 4.389123689932329, + "grad_norm": 21.659992218017578, + "learning_rate": 8.957677279837195e-08, + "loss": 0.0325, + "num_input_tokens_seen": 121032744, + "step": 179660 + }, + { + "epoch": 4.3892458407641755, + "grad_norm": 1.6105790564324707e-05, + "learning_rate": 8.954149863588844e-08, + "loss": 0.0001, + "num_input_tokens_seen": 121036136, + "step": 179665 + }, + { + "epoch": 4.389367991596023, + "grad_norm": 4.675076706917025e-05, + "learning_rate": 8.950623109450428e-08, + "loss": 0.0, + "num_input_tokens_seen": 121039592, + "step": 179670 + }, + { + "epoch": 4.38949014242787, + "grad_norm": 0.006997786462306976, + "learning_rate": 8.947097017447546e-08, + "loss": 0.0002, + "num_input_tokens_seen": 121042920, + "step": 179675 + }, + { + "epoch": 4.3896122932597175, + "grad_norm": 0.0003096538130193949, + "learning_rate": 8.94357158760587e-08, + "loss": 0.0, + "num_input_tokens_seen": 121046568, + "step": 179680 + }, + { + "epoch": 4.389734444091564, + "grad_norm": 0.0009752101614139974, + "learning_rate": 8.940046819951052e-08, + "loss": 0.0, + "num_input_tokens_seen": 121050024, + "step": 179685 + }, + { + "epoch": 4.389856594923412, + "grad_norm": 0.0027850079350173473, + "learning_rate": 8.936522714508676e-08, + "loss": 0.0, + "num_input_tokens_seen": 121053096, + "step": 179690 + }, + { + "epoch": 4.389978745755259, + "grad_norm": 0.0012620454654097557, + "learning_rate": 8.932999271304432e-08, + "loss": 0.0, + "num_input_tokens_seen": 121056360, + "step": 179695 + }, + { + "epoch": 4.390100896587105, + "grad_norm": 0.0006452227826230228, + "learning_rate": 8.929476490363885e-08, + "loss": 0.0, + "num_input_tokens_seen": 121059688, + "step": 179700 + }, + { + "epoch": 4.390223047418953, + "grad_norm": 0.001190279726870358, + "learning_rate": 8.925954371712696e-08, + "loss": 0.0, + "num_input_tokens_seen": 121063720, + "step": 179705 + }, + { + "epoch": 4.3903451982508, + "grad_norm": 0.0012196673778817058, + "learning_rate": 8.922432915376443e-08, + "loss": 0.0, + "num_input_tokens_seen": 121067048, + "step": 179710 + }, + { + "epoch": 4.390467349082647, + "grad_norm": 0.01806124486029148, + "learning_rate": 8.918912121380761e-08, + "loss": 0.0, + "num_input_tokens_seen": 121070568, + "step": 179715 + }, + { + "epoch": 4.390589499914494, + "grad_norm": 0.001845500897616148, + "learning_rate": 8.915391989751265e-08, + "loss": 0.0, + "num_input_tokens_seen": 121073832, + "step": 179720 + }, + { + "epoch": 4.390711650746342, + "grad_norm": 0.0005148798227310181, + "learning_rate": 8.911872520513542e-08, + "loss": 0.0002, + "num_input_tokens_seen": 121076968, + "step": 179725 + }, + { + "epoch": 4.3908338015781885, + "grad_norm": 0.00020431082521099597, + "learning_rate": 8.908353713693162e-08, + "loss": 0.0014, + "num_input_tokens_seen": 121080552, + "step": 179730 + }, + { + "epoch": 4.390955952410036, + "grad_norm": 0.0008650731761008501, + "learning_rate": 8.90483556931575e-08, + "loss": 0.0, + "num_input_tokens_seen": 121084392, + "step": 179735 + }, + { + "epoch": 4.391078103241883, + "grad_norm": 0.0010449419496580958, + "learning_rate": 8.90131808740685e-08, + "loss": 0.0, + "num_input_tokens_seen": 121088040, + "step": 179740 + }, + { + "epoch": 4.3912002540737305, + "grad_norm": 0.0005244952626526356, + "learning_rate": 8.8978012679921e-08, + "loss": 0.0, + "num_input_tokens_seen": 121092008, + "step": 179745 + }, + { + "epoch": 4.391322404905577, + "grad_norm": 0.00010663815919542685, + "learning_rate": 8.894285111097011e-08, + "loss": 0.0318, + "num_input_tokens_seen": 121095400, + "step": 179750 + }, + { + "epoch": 4.391444555737425, + "grad_norm": 4.027450268040411e-05, + "learning_rate": 8.890769616747207e-08, + "loss": 0.0, + "num_input_tokens_seen": 121098344, + "step": 179755 + }, + { + "epoch": 4.391566706569272, + "grad_norm": 7.695380918448791e-05, + "learning_rate": 8.887254784968223e-08, + "loss": 0.0, + "num_input_tokens_seen": 121101736, + "step": 179760 + }, + { + "epoch": 4.391688857401119, + "grad_norm": 0.0006177126779220998, + "learning_rate": 8.88374061578564e-08, + "loss": 0.0, + "num_input_tokens_seen": 121104936, + "step": 179765 + }, + { + "epoch": 4.391811008232966, + "grad_norm": 0.22347557544708252, + "learning_rate": 8.880227109224981e-08, + "loss": 0.0, + "num_input_tokens_seen": 121108648, + "step": 179770 + }, + { + "epoch": 4.391933159064814, + "grad_norm": 3.539887256920338e-05, + "learning_rate": 8.876714265311824e-08, + "loss": 0.0, + "num_input_tokens_seen": 121111720, + "step": 179775 + }, + { + "epoch": 4.39205530989666, + "grad_norm": 0.0023835294414311647, + "learning_rate": 8.873202084071717e-08, + "loss": 0.0, + "num_input_tokens_seen": 121115368, + "step": 179780 + }, + { + "epoch": 4.392177460728508, + "grad_norm": 0.00017241346358787268, + "learning_rate": 8.869690565530185e-08, + "loss": 0.0, + "num_input_tokens_seen": 121118632, + "step": 179785 + }, + { + "epoch": 4.392299611560355, + "grad_norm": 0.00017596670659258962, + "learning_rate": 8.866179709712795e-08, + "loss": 0.0, + "num_input_tokens_seen": 121121512, + "step": 179790 + }, + { + "epoch": 4.3924217623922015, + "grad_norm": 0.0005292627029120922, + "learning_rate": 8.862669516645039e-08, + "loss": 0.0288, + "num_input_tokens_seen": 121125672, + "step": 179795 + }, + { + "epoch": 4.392543913224049, + "grad_norm": 0.00010504711826797575, + "learning_rate": 8.85915998635246e-08, + "loss": 0.0, + "num_input_tokens_seen": 121128680, + "step": 179800 + }, + { + "epoch": 4.392666064055896, + "grad_norm": 0.0018456984544172883, + "learning_rate": 8.855651118860608e-08, + "loss": 0.0299, + "num_input_tokens_seen": 121132264, + "step": 179805 + }, + { + "epoch": 4.3927882148877435, + "grad_norm": 0.0027914142701774836, + "learning_rate": 8.85214291419496e-08, + "loss": 0.0, + "num_input_tokens_seen": 121135528, + "step": 179810 + }, + { + "epoch": 4.39291036571959, + "grad_norm": 0.01401690673083067, + "learning_rate": 8.848635372381052e-08, + "loss": 0.0, + "num_input_tokens_seen": 121139304, + "step": 179815 + }, + { + "epoch": 4.393032516551438, + "grad_norm": 9.930646228895057e-06, + "learning_rate": 8.845128493444365e-08, + "loss": 0.0, + "num_input_tokens_seen": 121142568, + "step": 179820 + }, + { + "epoch": 4.393154667383285, + "grad_norm": 0.0008397593628615141, + "learning_rate": 8.841622277410455e-08, + "loss": 0.0, + "num_input_tokens_seen": 121146216, + "step": 179825 + }, + { + "epoch": 4.393276818215132, + "grad_norm": 0.0007986437994986773, + "learning_rate": 8.838116724304767e-08, + "loss": 0.0, + "num_input_tokens_seen": 121149480, + "step": 179830 + }, + { + "epoch": 4.393398969046979, + "grad_norm": 0.0004919094499200583, + "learning_rate": 8.834611834152806e-08, + "loss": 0.0, + "num_input_tokens_seen": 121152616, + "step": 179835 + }, + { + "epoch": 4.393521119878827, + "grad_norm": 0.00010752362140920013, + "learning_rate": 8.831107606980093e-08, + "loss": 0.0, + "num_input_tokens_seen": 121156392, + "step": 179840 + }, + { + "epoch": 4.393643270710673, + "grad_norm": 0.0016955259488895535, + "learning_rate": 8.827604042812054e-08, + "loss": 0.0, + "num_input_tokens_seen": 121159656, + "step": 179845 + }, + { + "epoch": 4.393765421542521, + "grad_norm": 0.010647977702319622, + "learning_rate": 8.824101141674234e-08, + "loss": 0.0, + "num_input_tokens_seen": 121162728, + "step": 179850 + }, + { + "epoch": 4.393887572374368, + "grad_norm": 0.0001815099676605314, + "learning_rate": 8.820598903592047e-08, + "loss": 0.0, + "num_input_tokens_seen": 121166248, + "step": 179855 + }, + { + "epoch": 4.394009723206215, + "grad_norm": 0.0004509559366852045, + "learning_rate": 8.817097328591005e-08, + "loss": 0.0, + "num_input_tokens_seen": 121169256, + "step": 179860 + }, + { + "epoch": 4.394131874038062, + "grad_norm": 6.720137119293213, + "learning_rate": 8.813596416696544e-08, + "loss": 0.0001, + "num_input_tokens_seen": 121172584, + "step": 179865 + }, + { + "epoch": 4.394254024869909, + "grad_norm": 0.0016880101757124066, + "learning_rate": 8.810096167934133e-08, + "loss": 0.0, + "num_input_tokens_seen": 121175848, + "step": 179870 + }, + { + "epoch": 4.3943761757017565, + "grad_norm": 0.0022065758239477873, + "learning_rate": 8.806596582329251e-08, + "loss": 0.0, + "num_input_tokens_seen": 121178856, + "step": 179875 + }, + { + "epoch": 4.394498326533603, + "grad_norm": 0.0007906149257905781, + "learning_rate": 8.8030976599073e-08, + "loss": 0.0, + "num_input_tokens_seen": 121182056, + "step": 179880 + }, + { + "epoch": 4.394620477365451, + "grad_norm": 0.00020999391563236713, + "learning_rate": 8.79959940069378e-08, + "loss": 0.0, + "num_input_tokens_seen": 121185832, + "step": 179885 + }, + { + "epoch": 4.394742628197298, + "grad_norm": 0.0006979668978601694, + "learning_rate": 8.796101804714084e-08, + "loss": 0.0, + "num_input_tokens_seen": 121189288, + "step": 179890 + }, + { + "epoch": 4.394864779029145, + "grad_norm": 0.0028894988354295492, + "learning_rate": 8.792604871993658e-08, + "loss": 0.0, + "num_input_tokens_seen": 121192680, + "step": 179895 + }, + { + "epoch": 4.394986929860992, + "grad_norm": 0.06202784553170204, + "learning_rate": 8.78910860255797e-08, + "loss": 0.0003, + "num_input_tokens_seen": 121196200, + "step": 179900 + }, + { + "epoch": 4.39510908069284, + "grad_norm": 8.067709131864831e-05, + "learning_rate": 8.78561299643239e-08, + "loss": 0.0, + "num_input_tokens_seen": 121199336, + "step": 179905 + }, + { + "epoch": 4.395231231524686, + "grad_norm": 0.0005007724976167083, + "learning_rate": 8.782118053642384e-08, + "loss": 0.0, + "num_input_tokens_seen": 121202472, + "step": 179910 + }, + { + "epoch": 4.395353382356534, + "grad_norm": 2.4386856239289045e-05, + "learning_rate": 8.778623774213345e-08, + "loss": 0.0, + "num_input_tokens_seen": 121205864, + "step": 179915 + }, + { + "epoch": 4.395475533188381, + "grad_norm": 6.757559458492324e-05, + "learning_rate": 8.775130158170697e-08, + "loss": 0.0, + "num_input_tokens_seen": 121208936, + "step": 179920 + }, + { + "epoch": 4.395597684020228, + "grad_norm": 0.00019384313782211393, + "learning_rate": 8.771637205539817e-08, + "loss": 0.0, + "num_input_tokens_seen": 121212136, + "step": 179925 + }, + { + "epoch": 4.395719834852075, + "grad_norm": 0.0002847821160685271, + "learning_rate": 8.768144916346155e-08, + "loss": 0.0, + "num_input_tokens_seen": 121215784, + "step": 179930 + }, + { + "epoch": 4.395841985683923, + "grad_norm": 0.00039346402627415955, + "learning_rate": 8.764653290615065e-08, + "loss": 0.0, + "num_input_tokens_seen": 121218920, + "step": 179935 + }, + { + "epoch": 4.39596413651577, + "grad_norm": 9.05127526493743e-05, + "learning_rate": 8.761162328371963e-08, + "loss": 0.0, + "num_input_tokens_seen": 121222696, + "step": 179940 + }, + { + "epoch": 4.396086287347617, + "grad_norm": 0.0003977123415097594, + "learning_rate": 8.757672029642238e-08, + "loss": 0.0, + "num_input_tokens_seen": 121225576, + "step": 179945 + }, + { + "epoch": 4.396208438179464, + "grad_norm": 2.6523548513068818e-05, + "learning_rate": 8.754182394451249e-08, + "loss": 0.0, + "num_input_tokens_seen": 121229288, + "step": 179950 + }, + { + "epoch": 4.396330589011312, + "grad_norm": 0.02637898176908493, + "learning_rate": 8.750693422824374e-08, + "loss": 0.0336, + "num_input_tokens_seen": 121232488, + "step": 179955 + }, + { + "epoch": 4.396452739843158, + "grad_norm": 0.0001352629333268851, + "learning_rate": 8.747205114787026e-08, + "loss": 0.0, + "num_input_tokens_seen": 121236200, + "step": 179960 + }, + { + "epoch": 4.396574890675005, + "grad_norm": 0.0008541085990145802, + "learning_rate": 8.743717470364532e-08, + "loss": 0.0, + "num_input_tokens_seen": 121239592, + "step": 179965 + }, + { + "epoch": 4.396697041506853, + "grad_norm": 0.0006383212166838348, + "learning_rate": 8.740230489582278e-08, + "loss": 0.0, + "num_input_tokens_seen": 121243240, + "step": 179970 + }, + { + "epoch": 4.3968191923386994, + "grad_norm": 0.0004918515332974494, + "learning_rate": 8.736744172465604e-08, + "loss": 0.0, + "num_input_tokens_seen": 121246760, + "step": 179975 + }, + { + "epoch": 4.396941343170547, + "grad_norm": 0.004548352677375078, + "learning_rate": 8.733258519039888e-08, + "loss": 0.0, + "num_input_tokens_seen": 121250280, + "step": 179980 + }, + { + "epoch": 4.397063494002394, + "grad_norm": 0.0026537757366895676, + "learning_rate": 8.729773529330442e-08, + "loss": 0.0, + "num_input_tokens_seen": 121252968, + "step": 179985 + }, + { + "epoch": 4.3971856448342415, + "grad_norm": 0.01826804131269455, + "learning_rate": 8.726289203362636e-08, + "loss": 0.0, + "num_input_tokens_seen": 121255912, + "step": 179990 + }, + { + "epoch": 4.397307795666088, + "grad_norm": 0.0012325093848630786, + "learning_rate": 8.722805541161826e-08, + "loss": 0.0, + "num_input_tokens_seen": 121259176, + "step": 179995 + }, + { + "epoch": 4.397429946497936, + "grad_norm": 0.00019591573800425977, + "learning_rate": 8.719322542753305e-08, + "loss": 0.0, + "num_input_tokens_seen": 121262248, + "step": 180000 + }, + { + "epoch": 4.397552097329783, + "grad_norm": 0.0003988531534560025, + "learning_rate": 8.715840208162439e-08, + "loss": 0.0, + "num_input_tokens_seen": 121265576, + "step": 180005 + }, + { + "epoch": 4.39767424816163, + "grad_norm": 0.00022057151363696903, + "learning_rate": 8.71235853741451e-08, + "loss": 0.0, + "num_input_tokens_seen": 121269096, + "step": 180010 + }, + { + "epoch": 4.397796398993477, + "grad_norm": 4.593012636178173e-05, + "learning_rate": 8.708877530534897e-08, + "loss": 0.0, + "num_input_tokens_seen": 121272360, + "step": 180015 + }, + { + "epoch": 4.397918549825325, + "grad_norm": 0.0004291969817131758, + "learning_rate": 8.705397187548846e-08, + "loss": 0.0, + "num_input_tokens_seen": 121275496, + "step": 180020 + }, + { + "epoch": 4.398040700657171, + "grad_norm": 0.0010543052339926362, + "learning_rate": 8.701917508481715e-08, + "loss": 0.0, + "num_input_tokens_seen": 121279272, + "step": 180025 + }, + { + "epoch": 4.398162851489019, + "grad_norm": 5.976646571070887e-05, + "learning_rate": 8.698438493358806e-08, + "loss": 0.0, + "num_input_tokens_seen": 121282472, + "step": 180030 + }, + { + "epoch": 4.398285002320866, + "grad_norm": 0.001080619520507753, + "learning_rate": 8.69496014220541e-08, + "loss": 0.0, + "num_input_tokens_seen": 121286056, + "step": 180035 + }, + { + "epoch": 4.398407153152713, + "grad_norm": 0.0015729547012597322, + "learning_rate": 8.691482455046806e-08, + "loss": 0.0, + "num_input_tokens_seen": 121289448, + "step": 180040 + }, + { + "epoch": 4.39852930398456, + "grad_norm": 0.0009298583609052002, + "learning_rate": 8.688005431908318e-08, + "loss": 0.0, + "num_input_tokens_seen": 121292840, + "step": 180045 + }, + { + "epoch": 4.398651454816408, + "grad_norm": 0.0070594120770692825, + "learning_rate": 8.684529072815206e-08, + "loss": 0.0, + "num_input_tokens_seen": 121296360, + "step": 180050 + }, + { + "epoch": 4.3987736056482545, + "grad_norm": 0.0002929186448454857, + "learning_rate": 8.681053377792768e-08, + "loss": 0.0, + "num_input_tokens_seen": 121299752, + "step": 180055 + }, + { + "epoch": 4.398895756480101, + "grad_norm": 0.00671204412356019, + "learning_rate": 8.677578346866254e-08, + "loss": 0.0, + "num_input_tokens_seen": 121303144, + "step": 180060 + }, + { + "epoch": 4.399017907311949, + "grad_norm": 0.0009458751883357763, + "learning_rate": 8.674103980060964e-08, + "loss": 0.0, + "num_input_tokens_seen": 121306856, + "step": 180065 + }, + { + "epoch": 4.399140058143796, + "grad_norm": 0.002767004305496812, + "learning_rate": 8.670630277402147e-08, + "loss": 0.0, + "num_input_tokens_seen": 121310248, + "step": 180070 + }, + { + "epoch": 4.399262208975643, + "grad_norm": 0.0006484072655439377, + "learning_rate": 8.667157238915079e-08, + "loss": 0.0, + "num_input_tokens_seen": 121313768, + "step": 180075 + }, + { + "epoch": 4.39938435980749, + "grad_norm": 0.00034164972021244466, + "learning_rate": 8.663684864624998e-08, + "loss": 0.0, + "num_input_tokens_seen": 121317736, + "step": 180080 + }, + { + "epoch": 4.399506510639338, + "grad_norm": 0.00013225118163973093, + "learning_rate": 8.660213154557172e-08, + "loss": 0.0, + "num_input_tokens_seen": 121320616, + "step": 180085 + }, + { + "epoch": 4.399628661471184, + "grad_norm": 0.0022325459867715836, + "learning_rate": 8.656742108736859e-08, + "loss": 0.0, + "num_input_tokens_seen": 121324328, + "step": 180090 + }, + { + "epoch": 4.399750812303032, + "grad_norm": 0.03179163113236427, + "learning_rate": 8.653271727189259e-08, + "loss": 0.0, + "num_input_tokens_seen": 121328296, + "step": 180095 + }, + { + "epoch": 4.399872963134879, + "grad_norm": 0.0007358693983405828, + "learning_rate": 8.649802009939666e-08, + "loss": 0.0, + "num_input_tokens_seen": 121331752, + "step": 180100 + }, + { + "epoch": 4.399995113966726, + "grad_norm": 0.0012749300803989172, + "learning_rate": 8.646332957013258e-08, + "loss": 0.0, + "num_input_tokens_seen": 121334952, + "step": 180105 + }, + { + "epoch": 4.400117264798573, + "grad_norm": 0.00021154101705178618, + "learning_rate": 8.642864568435281e-08, + "loss": 0.0, + "num_input_tokens_seen": 121338792, + "step": 180110 + }, + { + "epoch": 4.400239415630421, + "grad_norm": 7.006477244431153e-05, + "learning_rate": 8.639396844230995e-08, + "loss": 0.09, + "num_input_tokens_seen": 121342120, + "step": 180115 + }, + { + "epoch": 4.4003615664622675, + "grad_norm": 0.002877721330150962, + "learning_rate": 8.635929784425566e-08, + "loss": 0.105, + "num_input_tokens_seen": 121345576, + "step": 180120 + }, + { + "epoch": 4.400483717294115, + "grad_norm": 0.0005133976228535175, + "learning_rate": 8.632463389044253e-08, + "loss": 0.0, + "num_input_tokens_seen": 121349288, + "step": 180125 + }, + { + "epoch": 4.400605868125962, + "grad_norm": 0.00018399501277599484, + "learning_rate": 8.628997658112214e-08, + "loss": 0.0, + "num_input_tokens_seen": 121353448, + "step": 180130 + }, + { + "epoch": 4.4007280189578095, + "grad_norm": 0.0013751068618148565, + "learning_rate": 8.625532591654705e-08, + "loss": 0.0, + "num_input_tokens_seen": 121356648, + "step": 180135 + }, + { + "epoch": 4.400850169789656, + "grad_norm": 8.092206553556025e-05, + "learning_rate": 8.622068189696896e-08, + "loss": 0.0, + "num_input_tokens_seen": 121359912, + "step": 180140 + }, + { + "epoch": 4.400972320621504, + "grad_norm": 0.00022807734785601497, + "learning_rate": 8.618604452263967e-08, + "loss": 0.0, + "num_input_tokens_seen": 121363176, + "step": 180145 + }, + { + "epoch": 4.401094471453351, + "grad_norm": 3.466855196165852e-05, + "learning_rate": 8.615141379381141e-08, + "loss": 0.0, + "num_input_tokens_seen": 121366504, + "step": 180150 + }, + { + "epoch": 4.401216622285197, + "grad_norm": 0.0007562345126643777, + "learning_rate": 8.611678971073577e-08, + "loss": 0.0, + "num_input_tokens_seen": 121370088, + "step": 180155 + }, + { + "epoch": 4.401338773117045, + "grad_norm": 0.005128931254148483, + "learning_rate": 8.608217227366465e-08, + "loss": 0.0, + "num_input_tokens_seen": 121373736, + "step": 180160 + }, + { + "epoch": 4.401460923948892, + "grad_norm": 8.437899668933824e-05, + "learning_rate": 8.604756148284975e-08, + "loss": 0.0, + "num_input_tokens_seen": 121376936, + "step": 180165 + }, + { + "epoch": 4.401583074780739, + "grad_norm": 0.0020770810078829527, + "learning_rate": 8.601295733854297e-08, + "loss": 0.0, + "num_input_tokens_seen": 121380264, + "step": 180170 + }, + { + "epoch": 4.401705225612586, + "grad_norm": 0.00014438113430514932, + "learning_rate": 8.597835984099566e-08, + "loss": 0.0001, + "num_input_tokens_seen": 121383848, + "step": 180175 + }, + { + "epoch": 4.401827376444434, + "grad_norm": 8.756471288506873e-06, + "learning_rate": 8.59437689904594e-08, + "loss": 0.0, + "num_input_tokens_seen": 121387112, + "step": 180180 + }, + { + "epoch": 4.4019495272762805, + "grad_norm": 0.0019400314195081592, + "learning_rate": 8.590918478718623e-08, + "loss": 0.0, + "num_input_tokens_seen": 121390760, + "step": 180185 + }, + { + "epoch": 4.402071678108128, + "grad_norm": 0.0010442528873682022, + "learning_rate": 8.587460723142715e-08, + "loss": 0.0, + "num_input_tokens_seen": 121394088, + "step": 180190 + }, + { + "epoch": 4.402193828939975, + "grad_norm": 0.0024224664084613323, + "learning_rate": 8.584003632343384e-08, + "loss": 0.0, + "num_input_tokens_seen": 121397608, + "step": 180195 + }, + { + "epoch": 4.4023159797718225, + "grad_norm": 0.0025445320643484592, + "learning_rate": 8.580547206345767e-08, + "loss": 0.0, + "num_input_tokens_seen": 121401960, + "step": 180200 + }, + { + "epoch": 4.402438130603669, + "grad_norm": 0.0017734251450747252, + "learning_rate": 8.577091445174988e-08, + "loss": 0.0, + "num_input_tokens_seen": 121405544, + "step": 180205 + }, + { + "epoch": 4.402560281435517, + "grad_norm": 0.011874084360897541, + "learning_rate": 8.573636348856205e-08, + "loss": 0.0, + "num_input_tokens_seen": 121409128, + "step": 180210 + }, + { + "epoch": 4.402682432267364, + "grad_norm": 0.01571572758257389, + "learning_rate": 8.570181917414521e-08, + "loss": 0.0, + "num_input_tokens_seen": 121412008, + "step": 180215 + }, + { + "epoch": 4.402804583099211, + "grad_norm": 0.0001697017578408122, + "learning_rate": 8.56672815087508e-08, + "loss": 0.0, + "num_input_tokens_seen": 121415656, + "step": 180220 + }, + { + "epoch": 4.402926733931058, + "grad_norm": 0.00033848744351416826, + "learning_rate": 8.563275049262964e-08, + "loss": 0.0, + "num_input_tokens_seen": 121419240, + "step": 180225 + }, + { + "epoch": 4.403048884762905, + "grad_norm": 0.00022906869708094746, + "learning_rate": 8.559822612603318e-08, + "loss": 0.0, + "num_input_tokens_seen": 121422312, + "step": 180230 + }, + { + "epoch": 4.403171035594752, + "grad_norm": 0.005000482778996229, + "learning_rate": 8.556370840921235e-08, + "loss": 0.0, + "num_input_tokens_seen": 121425576, + "step": 180235 + }, + { + "epoch": 4.403293186426599, + "grad_norm": 0.0008071277989074588, + "learning_rate": 8.552919734241825e-08, + "loss": 0.0, + "num_input_tokens_seen": 121428776, + "step": 180240 + }, + { + "epoch": 4.403415337258447, + "grad_norm": 4.6842753363307565e-05, + "learning_rate": 8.549469292590161e-08, + "loss": 0.0, + "num_input_tokens_seen": 121431976, + "step": 180245 + }, + { + "epoch": 4.4035374880902936, + "grad_norm": 0.0004724858154077083, + "learning_rate": 8.546019515991376e-08, + "loss": 0.0, + "num_input_tokens_seen": 121434856, + "step": 180250 + }, + { + "epoch": 4.403659638922141, + "grad_norm": 8.139275450957939e-05, + "learning_rate": 8.542570404470529e-08, + "loss": 0.0, + "num_input_tokens_seen": 121438184, + "step": 180255 + }, + { + "epoch": 4.403781789753988, + "grad_norm": 0.0004135536146350205, + "learning_rate": 8.539121958052697e-08, + "loss": 0.0, + "num_input_tokens_seen": 121441768, + "step": 180260 + }, + { + "epoch": 4.403903940585836, + "grad_norm": 0.00012670079013332725, + "learning_rate": 8.535674176762986e-08, + "loss": 0.0, + "num_input_tokens_seen": 121444968, + "step": 180265 + }, + { + "epoch": 4.404026091417682, + "grad_norm": 0.0009698671055957675, + "learning_rate": 8.53222706062643e-08, + "loss": 0.0, + "num_input_tokens_seen": 121448296, + "step": 180270 + }, + { + "epoch": 4.40414824224953, + "grad_norm": 0.00019362542661838233, + "learning_rate": 8.52878060966813e-08, + "loss": 0.0001, + "num_input_tokens_seen": 121452072, + "step": 180275 + }, + { + "epoch": 4.404270393081377, + "grad_norm": 0.000198545036255382, + "learning_rate": 8.525334823913156e-08, + "loss": 0.0003, + "num_input_tokens_seen": 121455272, + "step": 180280 + }, + { + "epoch": 4.404392543913224, + "grad_norm": 0.00477649737149477, + "learning_rate": 8.521889703386532e-08, + "loss": 0.0, + "num_input_tokens_seen": 121458408, + "step": 180285 + }, + { + "epoch": 4.404514694745071, + "grad_norm": 0.0031864261254668236, + "learning_rate": 8.518445248113338e-08, + "loss": 0.0, + "num_input_tokens_seen": 121461736, + "step": 180290 + }, + { + "epoch": 4.404636845576919, + "grad_norm": 0.0018196194432675838, + "learning_rate": 8.515001458118609e-08, + "loss": 0.0, + "num_input_tokens_seen": 121465640, + "step": 180295 + }, + { + "epoch": 4.4047589964087654, + "grad_norm": 0.006328812334686518, + "learning_rate": 8.511558333427393e-08, + "loss": 0.0, + "num_input_tokens_seen": 121468968, + "step": 180300 + }, + { + "epoch": 4.404881147240613, + "grad_norm": 0.0008431719616055489, + "learning_rate": 8.508115874064759e-08, + "loss": 0.0, + "num_input_tokens_seen": 121472232, + "step": 180305 + }, + { + "epoch": 4.40500329807246, + "grad_norm": 0.0002700486802496016, + "learning_rate": 8.504674080055685e-08, + "loss": 0.0001, + "num_input_tokens_seen": 121475752, + "step": 180310 + }, + { + "epoch": 4.4051254489043075, + "grad_norm": 0.00010356915299780667, + "learning_rate": 8.501232951425252e-08, + "loss": 0.0, + "num_input_tokens_seen": 121479464, + "step": 180315 + }, + { + "epoch": 4.405247599736154, + "grad_norm": 0.0001315046101808548, + "learning_rate": 8.497792488198452e-08, + "loss": 0.0, + "num_input_tokens_seen": 121483048, + "step": 180320 + }, + { + "epoch": 4.405369750568001, + "grad_norm": 0.0014662779867649078, + "learning_rate": 8.494352690400319e-08, + "loss": 0.0, + "num_input_tokens_seen": 121486376, + "step": 180325 + }, + { + "epoch": 4.405491901399849, + "grad_norm": 0.0007219245890155435, + "learning_rate": 8.490913558055856e-08, + "loss": 0.0, + "num_input_tokens_seen": 121489448, + "step": 180330 + }, + { + "epoch": 4.405614052231695, + "grad_norm": 0.00012071897799614817, + "learning_rate": 8.487475091190088e-08, + "loss": 0.0, + "num_input_tokens_seen": 121492904, + "step": 180335 + }, + { + "epoch": 4.405736203063543, + "grad_norm": 4.9478978326078504e-05, + "learning_rate": 8.484037289828028e-08, + "loss": 0.0, + "num_input_tokens_seen": 121496424, + "step": 180340 + }, + { + "epoch": 4.40585835389539, + "grad_norm": 0.003782287472859025, + "learning_rate": 8.480600153994666e-08, + "loss": 0.0, + "num_input_tokens_seen": 121499496, + "step": 180345 + }, + { + "epoch": 4.405980504727237, + "grad_norm": 0.00012144062930019572, + "learning_rate": 8.477163683714972e-08, + "loss": 0.0, + "num_input_tokens_seen": 121502696, + "step": 180350 + }, + { + "epoch": 4.406102655559084, + "grad_norm": 0.0005082075367681682, + "learning_rate": 8.473727879013981e-08, + "loss": 0.0, + "num_input_tokens_seen": 121506088, + "step": 180355 + }, + { + "epoch": 4.406224806390932, + "grad_norm": 2.384355138929095e-05, + "learning_rate": 8.470292739916641e-08, + "loss": 0.0, + "num_input_tokens_seen": 121509672, + "step": 180360 + }, + { + "epoch": 4.4063469572227785, + "grad_norm": 0.0004144615086261183, + "learning_rate": 8.466858266447962e-08, + "loss": 0.0004, + "num_input_tokens_seen": 121513064, + "step": 180365 + }, + { + "epoch": 4.406469108054626, + "grad_norm": 0.0003986510564573109, + "learning_rate": 8.463424458632906e-08, + "loss": 0.0, + "num_input_tokens_seen": 121516456, + "step": 180370 + }, + { + "epoch": 4.406591258886473, + "grad_norm": 0.0025848003569990396, + "learning_rate": 8.459991316496452e-08, + "loss": 0.0, + "num_input_tokens_seen": 121519656, + "step": 180375 + }, + { + "epoch": 4.4067134097183205, + "grad_norm": 0.0050504072569310665, + "learning_rate": 8.456558840063555e-08, + "loss": 0.0, + "num_input_tokens_seen": 121522728, + "step": 180380 + }, + { + "epoch": 4.406835560550167, + "grad_norm": 0.00011555037781363353, + "learning_rate": 8.453127029359197e-08, + "loss": 0.0, + "num_input_tokens_seen": 121525608, + "step": 180385 + }, + { + "epoch": 4.406957711382015, + "grad_norm": 0.001160181942395866, + "learning_rate": 8.449695884408303e-08, + "loss": 0.0, + "num_input_tokens_seen": 121528936, + "step": 180390 + }, + { + "epoch": 4.407079862213862, + "grad_norm": 0.002147308550775051, + "learning_rate": 8.446265405235842e-08, + "loss": 0.0435, + "num_input_tokens_seen": 121532008, + "step": 180395 + }, + { + "epoch": 4.407202013045709, + "grad_norm": 15.700284957885742, + "learning_rate": 8.442835591866792e-08, + "loss": 0.0286, + "num_input_tokens_seen": 121535144, + "step": 180400 + }, + { + "epoch": 4.407324163877556, + "grad_norm": 8.615958358859643e-05, + "learning_rate": 8.439406444326047e-08, + "loss": 0.0, + "num_input_tokens_seen": 121538920, + "step": 180405 + }, + { + "epoch": 4.407446314709404, + "grad_norm": 0.00015841845015529543, + "learning_rate": 8.435977962638574e-08, + "loss": 0.0, + "num_input_tokens_seen": 121542120, + "step": 180410 + }, + { + "epoch": 4.40756846554125, + "grad_norm": 3.081785325775854e-05, + "learning_rate": 8.432550146829287e-08, + "loss": 0.0, + "num_input_tokens_seen": 121545512, + "step": 180415 + }, + { + "epoch": 4.407690616373097, + "grad_norm": 0.004024590831249952, + "learning_rate": 8.429122996923143e-08, + "loss": 0.0, + "num_input_tokens_seen": 121548776, + "step": 180420 + }, + { + "epoch": 4.407812767204945, + "grad_norm": 8.935786900110543e-05, + "learning_rate": 8.425696512945024e-08, + "loss": 0.0, + "num_input_tokens_seen": 121552232, + "step": 180425 + }, + { + "epoch": 4.4079349180367915, + "grad_norm": 0.0006876801489852369, + "learning_rate": 8.422270694919864e-08, + "loss": 0.0001, + "num_input_tokens_seen": 121555432, + "step": 180430 + }, + { + "epoch": 4.408057068868639, + "grad_norm": 6.320657848846167e-05, + "learning_rate": 8.41884554287261e-08, + "loss": 0.0, + "num_input_tokens_seen": 121558696, + "step": 180435 + }, + { + "epoch": 4.408179219700486, + "grad_norm": 0.01520298607647419, + "learning_rate": 8.415421056828132e-08, + "loss": 0.0, + "num_input_tokens_seen": 121562088, + "step": 180440 + }, + { + "epoch": 4.4083013705323335, + "grad_norm": 0.00019980291835963726, + "learning_rate": 8.411997236811352e-08, + "loss": 0.0, + "num_input_tokens_seen": 121565416, + "step": 180445 + }, + { + "epoch": 4.40842352136418, + "grad_norm": 4.798701047548093e-05, + "learning_rate": 8.408574082847164e-08, + "loss": 0.0001, + "num_input_tokens_seen": 121568616, + "step": 180450 + }, + { + "epoch": 4.408545672196028, + "grad_norm": 0.0002813969331327826, + "learning_rate": 8.405151594960447e-08, + "loss": 0.0, + "num_input_tokens_seen": 121572264, + "step": 180455 + }, + { + "epoch": 4.408667823027875, + "grad_norm": 0.0001570227468619123, + "learning_rate": 8.401729773176114e-08, + "loss": 0.0, + "num_input_tokens_seen": 121575528, + "step": 180460 + }, + { + "epoch": 4.408789973859722, + "grad_norm": 9.800500265555456e-05, + "learning_rate": 8.398308617519024e-08, + "loss": 0.0, + "num_input_tokens_seen": 121578536, + "step": 180465 + }, + { + "epoch": 4.408912124691569, + "grad_norm": 5.589954889728688e-05, + "learning_rate": 8.394888128014099e-08, + "loss": 0.0, + "num_input_tokens_seen": 121582184, + "step": 180470 + }, + { + "epoch": 4.409034275523417, + "grad_norm": 0.0007112363819032907, + "learning_rate": 8.391468304686155e-08, + "loss": 0.0, + "num_input_tokens_seen": 121585256, + "step": 180475 + }, + { + "epoch": 4.409156426355263, + "grad_norm": 0.0006815246306359768, + "learning_rate": 8.388049147560117e-08, + "loss": 0.0, + "num_input_tokens_seen": 121588712, + "step": 180480 + }, + { + "epoch": 4.409278577187111, + "grad_norm": 0.010045552626252174, + "learning_rate": 8.384630656660807e-08, + "loss": 0.0, + "num_input_tokens_seen": 121592104, + "step": 180485 + }, + { + "epoch": 4.409400728018958, + "grad_norm": 0.015227672643959522, + "learning_rate": 8.381212832013107e-08, + "loss": 0.0, + "num_input_tokens_seen": 121595240, + "step": 180490 + }, + { + "epoch": 4.4095228788508045, + "grad_norm": 0.0007747916970402002, + "learning_rate": 8.377795673641886e-08, + "loss": 0.0, + "num_input_tokens_seen": 121598440, + "step": 180495 + }, + { + "epoch": 4.409645029682652, + "grad_norm": 0.059335384517908096, + "learning_rate": 8.374379181571967e-08, + "loss": 0.0, + "num_input_tokens_seen": 121601704, + "step": 180500 + }, + { + "epoch": 4.409767180514499, + "grad_norm": 0.00015939258446451277, + "learning_rate": 8.37096335582822e-08, + "loss": 0.0, + "num_input_tokens_seen": 121604968, + "step": 180505 + }, + { + "epoch": 4.4098893313463465, + "grad_norm": 0.0002897983358707279, + "learning_rate": 8.367548196435447e-08, + "loss": 0.0, + "num_input_tokens_seen": 121608104, + "step": 180510 + }, + { + "epoch": 4.410011482178193, + "grad_norm": 0.00018529892258811742, + "learning_rate": 8.364133703418518e-08, + "loss": 0.0, + "num_input_tokens_seen": 121611112, + "step": 180515 + }, + { + "epoch": 4.410133633010041, + "grad_norm": 0.00022547252592630684, + "learning_rate": 8.360719876802269e-08, + "loss": 0.0388, + "num_input_tokens_seen": 121614504, + "step": 180520 + }, + { + "epoch": 4.410255783841888, + "grad_norm": 1.873561996035278e-05, + "learning_rate": 8.357306716611501e-08, + "loss": 0.0, + "num_input_tokens_seen": 121617896, + "step": 180525 + }, + { + "epoch": 4.410377934673735, + "grad_norm": 0.00014970744086895138, + "learning_rate": 8.35389422287106e-08, + "loss": 0.0, + "num_input_tokens_seen": 121621096, + "step": 180530 + }, + { + "epoch": 4.410500085505582, + "grad_norm": 0.0071835326962172985, + "learning_rate": 8.35048239560574e-08, + "loss": 0.0, + "num_input_tokens_seen": 121624616, + "step": 180535 + }, + { + "epoch": 4.41062223633743, + "grad_norm": 2.6335650545661338e-05, + "learning_rate": 8.347071234840374e-08, + "loss": 0.0, + "num_input_tokens_seen": 121627688, + "step": 180540 + }, + { + "epoch": 4.410744387169276, + "grad_norm": 0.00024163529451470822, + "learning_rate": 8.343660740599745e-08, + "loss": 0.0, + "num_input_tokens_seen": 121631144, + "step": 180545 + }, + { + "epoch": 4.410866538001124, + "grad_norm": 0.00012851272185798734, + "learning_rate": 8.340250912908675e-08, + "loss": 0.0, + "num_input_tokens_seen": 121634280, + "step": 180550 + }, + { + "epoch": 4.410988688832971, + "grad_norm": 3.004685277119279e-05, + "learning_rate": 8.336841751791946e-08, + "loss": 0.0, + "num_input_tokens_seen": 121637864, + "step": 180555 + }, + { + "epoch": 4.411110839664818, + "grad_norm": 0.000623641477432102, + "learning_rate": 8.333433257274369e-08, + "loss": 0.0, + "num_input_tokens_seen": 121641320, + "step": 180560 + }, + { + "epoch": 4.411232990496665, + "grad_norm": 6.0218488215468824e-05, + "learning_rate": 8.330025429380727e-08, + "loss": 0.0, + "num_input_tokens_seen": 121644584, + "step": 180565 + }, + { + "epoch": 4.411355141328513, + "grad_norm": 0.000500671798363328, + "learning_rate": 8.326618268135776e-08, + "loss": 0.0, + "num_input_tokens_seen": 121647592, + "step": 180570 + }, + { + "epoch": 4.4114772921603596, + "grad_norm": 0.00015049302601255476, + "learning_rate": 8.323211773564331e-08, + "loss": 0.0, + "num_input_tokens_seen": 121650664, + "step": 180575 + }, + { + "epoch": 4.411599442992207, + "grad_norm": 7.418448512908071e-05, + "learning_rate": 8.319805945691127e-08, + "loss": 0.0421, + "num_input_tokens_seen": 121653928, + "step": 180580 + }, + { + "epoch": 4.411721593824054, + "grad_norm": 7.855272997403517e-05, + "learning_rate": 8.316400784540966e-08, + "loss": 0.0, + "num_input_tokens_seen": 121657576, + "step": 180585 + }, + { + "epoch": 4.411843744655901, + "grad_norm": 0.00044542065006680787, + "learning_rate": 8.312996290138607e-08, + "loss": 0.0, + "num_input_tokens_seen": 121660904, + "step": 180590 + }, + { + "epoch": 4.411965895487748, + "grad_norm": 7.770089723635465e-05, + "learning_rate": 8.309592462508774e-08, + "loss": 0.0, + "num_input_tokens_seen": 121664232, + "step": 180595 + }, + { + "epoch": 4.412088046319595, + "grad_norm": 2.8739916615450056e-06, + "learning_rate": 8.306189301676281e-08, + "loss": 0.0, + "num_input_tokens_seen": 121667496, + "step": 180600 + }, + { + "epoch": 4.412210197151443, + "grad_norm": 0.0005004116101190448, + "learning_rate": 8.302786807665807e-08, + "loss": 0.0, + "num_input_tokens_seen": 121670440, + "step": 180605 + }, + { + "epoch": 4.412332347983289, + "grad_norm": 0.00010662328713806346, + "learning_rate": 8.299384980502144e-08, + "loss": 0.0003, + "num_input_tokens_seen": 121673576, + "step": 180610 + }, + { + "epoch": 4.412454498815137, + "grad_norm": 0.0006866508047096431, + "learning_rate": 8.295983820210028e-08, + "loss": 0.0, + "num_input_tokens_seen": 121676840, + "step": 180615 + }, + { + "epoch": 4.412576649646984, + "grad_norm": 0.014268654398620129, + "learning_rate": 8.292583326814173e-08, + "loss": 0.0003, + "num_input_tokens_seen": 121680104, + "step": 180620 + }, + { + "epoch": 4.4126988004788315, + "grad_norm": 0.0005359610659070313, + "learning_rate": 8.289183500339337e-08, + "loss": 0.0, + "num_input_tokens_seen": 121683496, + "step": 180625 + }, + { + "epoch": 4.412820951310678, + "grad_norm": 0.006035420577973127, + "learning_rate": 8.285784340810198e-08, + "loss": 0.0, + "num_input_tokens_seen": 121687016, + "step": 180630 + }, + { + "epoch": 4.412943102142526, + "grad_norm": 7.512760203098878e-05, + "learning_rate": 8.282385848251527e-08, + "loss": 0.0, + "num_input_tokens_seen": 121690152, + "step": 180635 + }, + { + "epoch": 4.413065252974373, + "grad_norm": 0.0006038388819433749, + "learning_rate": 8.278988022687994e-08, + "loss": 0.0, + "num_input_tokens_seen": 121693416, + "step": 180640 + }, + { + "epoch": 4.41318740380622, + "grad_norm": 0.10882148891687393, + "learning_rate": 8.275590864144333e-08, + "loss": 0.0001, + "num_input_tokens_seen": 121696808, + "step": 180645 + }, + { + "epoch": 4.413309554638067, + "grad_norm": 6.010133438394405e-05, + "learning_rate": 8.27219437264527e-08, + "loss": 0.0, + "num_input_tokens_seen": 121700712, + "step": 180650 + }, + { + "epoch": 4.413431705469915, + "grad_norm": 0.00010217710223514587, + "learning_rate": 8.268798548215472e-08, + "loss": 0.0001, + "num_input_tokens_seen": 121704808, + "step": 180655 + }, + { + "epoch": 4.413553856301761, + "grad_norm": 0.00018335915228817612, + "learning_rate": 8.265403390879633e-08, + "loss": 0.0, + "num_input_tokens_seen": 121707880, + "step": 180660 + }, + { + "epoch": 4.413676007133609, + "grad_norm": 0.00040197354974225163, + "learning_rate": 8.262008900662464e-08, + "loss": 0.0354, + "num_input_tokens_seen": 121711080, + "step": 180665 + }, + { + "epoch": 4.413798157965456, + "grad_norm": 0.002384263789281249, + "learning_rate": 8.258615077588627e-08, + "loss": 0.0, + "num_input_tokens_seen": 121714536, + "step": 180670 + }, + { + "epoch": 4.413920308797303, + "grad_norm": 0.0006682064849883318, + "learning_rate": 8.255221921682831e-08, + "loss": 0.0, + "num_input_tokens_seen": 121718120, + "step": 180675 + }, + { + "epoch": 4.41404245962915, + "grad_norm": 0.0005522278370335698, + "learning_rate": 8.251829432969726e-08, + "loss": 0.0, + "num_input_tokens_seen": 121721320, + "step": 180680 + }, + { + "epoch": 4.414164610460997, + "grad_norm": 0.0015256262850016356, + "learning_rate": 8.248437611474013e-08, + "loss": 0.0, + "num_input_tokens_seen": 121724776, + "step": 180685 + }, + { + "epoch": 4.4142867612928445, + "grad_norm": 7.958729838719591e-06, + "learning_rate": 8.245046457220317e-08, + "loss": 0.0, + "num_input_tokens_seen": 121728424, + "step": 180690 + }, + { + "epoch": 4.414408912124691, + "grad_norm": 0.00025719229597598314, + "learning_rate": 8.241655970233341e-08, + "loss": 0.0, + "num_input_tokens_seen": 121731496, + "step": 180695 + }, + { + "epoch": 4.414531062956539, + "grad_norm": 0.00022496600286103785, + "learning_rate": 8.238266150537699e-08, + "loss": 0.0, + "num_input_tokens_seen": 121734696, + "step": 180700 + }, + { + "epoch": 4.414653213788386, + "grad_norm": 0.0012882340233772993, + "learning_rate": 8.23487699815808e-08, + "loss": 0.0, + "num_input_tokens_seen": 121738664, + "step": 180705 + }, + { + "epoch": 4.414775364620233, + "grad_norm": 0.013423847034573555, + "learning_rate": 8.231488513119123e-08, + "loss": 0.0, + "num_input_tokens_seen": 121742120, + "step": 180710 + }, + { + "epoch": 4.41489751545208, + "grad_norm": 9.619421325623989e-05, + "learning_rate": 8.22810069544545e-08, + "loss": 0.0, + "num_input_tokens_seen": 121745960, + "step": 180715 + }, + { + "epoch": 4.415019666283928, + "grad_norm": 1.3759875400864985e-05, + "learning_rate": 8.224713545161732e-08, + "loss": 0.0, + "num_input_tokens_seen": 121749800, + "step": 180720 + }, + { + "epoch": 4.415141817115774, + "grad_norm": 9.072878310689703e-05, + "learning_rate": 8.221327062292571e-08, + "loss": 0.0, + "num_input_tokens_seen": 121753704, + "step": 180725 + }, + { + "epoch": 4.415263967947622, + "grad_norm": 4.327476199250668e-05, + "learning_rate": 8.217941246862614e-08, + "loss": 0.0, + "num_input_tokens_seen": 121756840, + "step": 180730 + }, + { + "epoch": 4.415386118779469, + "grad_norm": 0.00012532465916592628, + "learning_rate": 8.214556098896464e-08, + "loss": 0.0, + "num_input_tokens_seen": 121760296, + "step": 180735 + }, + { + "epoch": 4.415508269611316, + "grad_norm": 0.0005663962219841778, + "learning_rate": 8.211171618418744e-08, + "loss": 0.0, + "num_input_tokens_seen": 121764008, + "step": 180740 + }, + { + "epoch": 4.415630420443163, + "grad_norm": 0.0007085498655214906, + "learning_rate": 8.20778780545408e-08, + "loss": 0.0001, + "num_input_tokens_seen": 121767336, + "step": 180745 + }, + { + "epoch": 4.415752571275011, + "grad_norm": 0.0009212750592269003, + "learning_rate": 8.204404660027065e-08, + "loss": 0.0, + "num_input_tokens_seen": 121770600, + "step": 180750 + }, + { + "epoch": 4.4158747221068575, + "grad_norm": 2.3134165530791506e-05, + "learning_rate": 8.201022182162332e-08, + "loss": 0.0, + "num_input_tokens_seen": 121773864, + "step": 180755 + }, + { + "epoch": 4.415996872938704, + "grad_norm": 0.00025919542531482875, + "learning_rate": 8.197640371884429e-08, + "loss": 0.0, + "num_input_tokens_seen": 121777256, + "step": 180760 + }, + { + "epoch": 4.416119023770552, + "grad_norm": 0.00022408708173315972, + "learning_rate": 8.194259229218003e-08, + "loss": 0.0, + "num_input_tokens_seen": 121781096, + "step": 180765 + }, + { + "epoch": 4.4162411746023995, + "grad_norm": 0.0001324850891251117, + "learning_rate": 8.190878754187614e-08, + "loss": 0.0, + "num_input_tokens_seen": 121784616, + "step": 180770 + }, + { + "epoch": 4.416363325434246, + "grad_norm": 3.3088224881794304e-05, + "learning_rate": 8.187498946817828e-08, + "loss": 0.0, + "num_input_tokens_seen": 121787624, + "step": 180775 + }, + { + "epoch": 4.416485476266093, + "grad_norm": 4.933834497933276e-05, + "learning_rate": 8.18411980713326e-08, + "loss": 0.0002, + "num_input_tokens_seen": 121791272, + "step": 180780 + }, + { + "epoch": 4.416607627097941, + "grad_norm": 0.00011093864304712042, + "learning_rate": 8.180741335158458e-08, + "loss": 0.0, + "num_input_tokens_seen": 121794792, + "step": 180785 + }, + { + "epoch": 4.416729777929787, + "grad_norm": 0.006448894739151001, + "learning_rate": 8.177363530918013e-08, + "loss": 0.0, + "num_input_tokens_seen": 121798632, + "step": 180790 + }, + { + "epoch": 4.416851928761635, + "grad_norm": 4.5410546590574086e-05, + "learning_rate": 8.173986394436461e-08, + "loss": 0.0, + "num_input_tokens_seen": 121802024, + "step": 180795 + }, + { + "epoch": 4.416974079593482, + "grad_norm": 0.010403868742287159, + "learning_rate": 8.17060992573838e-08, + "loss": 0.0, + "num_input_tokens_seen": 121805736, + "step": 180800 + }, + { + "epoch": 4.417096230425329, + "grad_norm": 0.00030534606776200235, + "learning_rate": 8.167234124848344e-08, + "loss": 0.0, + "num_input_tokens_seen": 121808936, + "step": 180805 + }, + { + "epoch": 4.417218381257176, + "grad_norm": 0.0001068161946022883, + "learning_rate": 8.163858991790861e-08, + "loss": 0.0, + "num_input_tokens_seen": 121812008, + "step": 180810 + }, + { + "epoch": 4.417340532089024, + "grad_norm": 9.78023890638724e-05, + "learning_rate": 8.160484526590516e-08, + "loss": 0.0, + "num_input_tokens_seen": 121815592, + "step": 180815 + }, + { + "epoch": 4.4174626829208705, + "grad_norm": 0.0007254159427247941, + "learning_rate": 8.157110729271799e-08, + "loss": 0.0, + "num_input_tokens_seen": 121818600, + "step": 180820 + }, + { + "epoch": 4.417584833752718, + "grad_norm": 0.000565720722079277, + "learning_rate": 8.153737599859312e-08, + "loss": 0.0, + "num_input_tokens_seen": 121821672, + "step": 180825 + }, + { + "epoch": 4.417706984584565, + "grad_norm": 0.005168421193957329, + "learning_rate": 8.150365138377513e-08, + "loss": 0.0, + "num_input_tokens_seen": 121825512, + "step": 180830 + }, + { + "epoch": 4.4178291354164125, + "grad_norm": 9.393416985403746e-05, + "learning_rate": 8.146993344850973e-08, + "loss": 0.0, + "num_input_tokens_seen": 121828712, + "step": 180835 + }, + { + "epoch": 4.417951286248259, + "grad_norm": 0.0016235082875937223, + "learning_rate": 8.143622219304225e-08, + "loss": 0.0, + "num_input_tokens_seen": 121831784, + "step": 180840 + }, + { + "epoch": 4.418073437080107, + "grad_norm": 3.2696454582037404e-05, + "learning_rate": 8.140251761761741e-08, + "loss": 0.0, + "num_input_tokens_seen": 121835688, + "step": 180845 + }, + { + "epoch": 4.418195587911954, + "grad_norm": 0.0008309634868055582, + "learning_rate": 8.136881972248067e-08, + "loss": 0.0, + "num_input_tokens_seen": 121839016, + "step": 180850 + }, + { + "epoch": 4.4183177387438, + "grad_norm": 0.00010062227374874055, + "learning_rate": 8.133512850787682e-08, + "loss": 0.0, + "num_input_tokens_seen": 121842984, + "step": 180855 + }, + { + "epoch": 4.418439889575648, + "grad_norm": 0.000737546244636178, + "learning_rate": 8.130144397405114e-08, + "loss": 0.0392, + "num_input_tokens_seen": 121846504, + "step": 180860 + }, + { + "epoch": 4.418562040407495, + "grad_norm": 5.2958694141125306e-05, + "learning_rate": 8.12677661212483e-08, + "loss": 0.0, + "num_input_tokens_seen": 121849832, + "step": 180865 + }, + { + "epoch": 4.418684191239342, + "grad_norm": 0.004930529743432999, + "learning_rate": 8.123409494971356e-08, + "loss": 0.0, + "num_input_tokens_seen": 121853224, + "step": 180870 + }, + { + "epoch": 4.418806342071189, + "grad_norm": 0.012600020505487919, + "learning_rate": 8.120043045969161e-08, + "loss": 0.0, + "num_input_tokens_seen": 121856616, + "step": 180875 + }, + { + "epoch": 4.418928492903037, + "grad_norm": 2.7497879273141734e-05, + "learning_rate": 8.116677265142713e-08, + "loss": 0.0, + "num_input_tokens_seen": 121859752, + "step": 180880 + }, + { + "epoch": 4.4190506437348835, + "grad_norm": 0.00012461999722290784, + "learning_rate": 8.113312152516516e-08, + "loss": 0.0, + "num_input_tokens_seen": 121862888, + "step": 180885 + }, + { + "epoch": 4.419172794566731, + "grad_norm": 7.542503590229899e-05, + "learning_rate": 8.109947708115006e-08, + "loss": 0.0, + "num_input_tokens_seen": 121866344, + "step": 180890 + }, + { + "epoch": 4.419294945398578, + "grad_norm": 0.00015688169514760375, + "learning_rate": 8.106583931962674e-08, + "loss": 0.0, + "num_input_tokens_seen": 121869800, + "step": 180895 + }, + { + "epoch": 4.419417096230426, + "grad_norm": 3.380305861355737e-05, + "learning_rate": 8.103220824083989e-08, + "loss": 0.0, + "num_input_tokens_seen": 121873384, + "step": 180900 + }, + { + "epoch": 4.419539247062272, + "grad_norm": 0.0002742180950008333, + "learning_rate": 8.0998583845034e-08, + "loss": 0.0, + "num_input_tokens_seen": 121877224, + "step": 180905 + }, + { + "epoch": 4.41966139789412, + "grad_norm": 0.0028896431904286146, + "learning_rate": 8.096496613245363e-08, + "loss": 0.0, + "num_input_tokens_seen": 121880040, + "step": 180910 + }, + { + "epoch": 4.419783548725967, + "grad_norm": 6.235108594410121e-05, + "learning_rate": 8.093135510334304e-08, + "loss": 0.0, + "num_input_tokens_seen": 121883496, + "step": 180915 + }, + { + "epoch": 4.419905699557814, + "grad_norm": 0.004725324921309948, + "learning_rate": 8.089775075794691e-08, + "loss": 0.0, + "num_input_tokens_seen": 121886696, + "step": 180920 + }, + { + "epoch": 4.420027850389661, + "grad_norm": 0.001249430701136589, + "learning_rate": 8.086415309650962e-08, + "loss": 0.0, + "num_input_tokens_seen": 121890024, + "step": 180925 + }, + { + "epoch": 4.420150001221509, + "grad_norm": 0.00012448421330191195, + "learning_rate": 8.08305621192753e-08, + "loss": 0.0, + "num_input_tokens_seen": 121893288, + "step": 180930 + }, + { + "epoch": 4.420272152053355, + "grad_norm": 8.14038940006867e-05, + "learning_rate": 8.079697782648864e-08, + "loss": 0.0, + "num_input_tokens_seen": 121896680, + "step": 180935 + }, + { + "epoch": 4.420394302885203, + "grad_norm": 0.0013667421881109476, + "learning_rate": 8.076340021839323e-08, + "loss": 0.0, + "num_input_tokens_seen": 121899880, + "step": 180940 + }, + { + "epoch": 4.42051645371705, + "grad_norm": 0.003148171119391918, + "learning_rate": 8.072982929523398e-08, + "loss": 0.0, + "num_input_tokens_seen": 121903080, + "step": 180945 + }, + { + "epoch": 4.420638604548897, + "grad_norm": 0.014106903225183487, + "learning_rate": 8.069626505725435e-08, + "loss": 0.0, + "num_input_tokens_seen": 121906216, + "step": 180950 + }, + { + "epoch": 4.420760755380744, + "grad_norm": 0.00035607683821581304, + "learning_rate": 8.066270750469883e-08, + "loss": 0.0, + "num_input_tokens_seen": 121909160, + "step": 180955 + }, + { + "epoch": 4.420882906212591, + "grad_norm": 0.00014510061009787023, + "learning_rate": 8.062915663781145e-08, + "loss": 0.0, + "num_input_tokens_seen": 121912744, + "step": 180960 + }, + { + "epoch": 4.421005057044439, + "grad_norm": 0.006395295727998018, + "learning_rate": 8.059561245683622e-08, + "loss": 0.0, + "num_input_tokens_seen": 121916456, + "step": 180965 + }, + { + "epoch": 4.421127207876285, + "grad_norm": 0.0009115340071730316, + "learning_rate": 8.056207496201672e-08, + "loss": 0.0, + "num_input_tokens_seen": 121920424, + "step": 180970 + }, + { + "epoch": 4.421249358708133, + "grad_norm": 0.0064463368616998196, + "learning_rate": 8.052854415359744e-08, + "loss": 0.0, + "num_input_tokens_seen": 121923752, + "step": 180975 + }, + { + "epoch": 4.42137150953998, + "grad_norm": 8.19075430626981e-05, + "learning_rate": 8.049502003182173e-08, + "loss": 0.0, + "num_input_tokens_seen": 121926760, + "step": 180980 + }, + { + "epoch": 4.421493660371827, + "grad_norm": 7.664481381652877e-05, + "learning_rate": 8.046150259693341e-08, + "loss": 0.0, + "num_input_tokens_seen": 121930216, + "step": 180985 + }, + { + "epoch": 4.421615811203674, + "grad_norm": 2.986722029163502e-05, + "learning_rate": 8.042799184917647e-08, + "loss": 0.0, + "num_input_tokens_seen": 121933544, + "step": 180990 + }, + { + "epoch": 4.421737962035522, + "grad_norm": 0.00506412610411644, + "learning_rate": 8.039448778879465e-08, + "loss": 0.0, + "num_input_tokens_seen": 121937064, + "step": 180995 + }, + { + "epoch": 4.4218601128673685, + "grad_norm": 0.0002981481666211039, + "learning_rate": 8.036099041603117e-08, + "loss": 0.0, + "num_input_tokens_seen": 121940200, + "step": 181000 + }, + { + "epoch": 4.421982263699216, + "grad_norm": 0.0007899164338596165, + "learning_rate": 8.032749973113017e-08, + "loss": 0.0, + "num_input_tokens_seen": 121943784, + "step": 181005 + }, + { + "epoch": 4.422104414531063, + "grad_norm": 4.1584593418519944e-05, + "learning_rate": 8.029401573433481e-08, + "loss": 0.0, + "num_input_tokens_seen": 121946920, + "step": 181010 + }, + { + "epoch": 4.4222265653629105, + "grad_norm": 0.00011753295257221907, + "learning_rate": 8.026053842588876e-08, + "loss": 0.0, + "num_input_tokens_seen": 121950120, + "step": 181015 + }, + { + "epoch": 4.422348716194757, + "grad_norm": 0.00019384206098038703, + "learning_rate": 8.022706780603549e-08, + "loss": 0.0, + "num_input_tokens_seen": 121953768, + "step": 181020 + }, + { + "epoch": 4.422470867026605, + "grad_norm": 0.0009538192534819245, + "learning_rate": 8.019360387501839e-08, + "loss": 0.0, + "num_input_tokens_seen": 121957032, + "step": 181025 + }, + { + "epoch": 4.422593017858452, + "grad_norm": 0.0008440099190920591, + "learning_rate": 8.01601466330808e-08, + "loss": 0.0, + "num_input_tokens_seen": 121960872, + "step": 181030 + }, + { + "epoch": 4.422715168690299, + "grad_norm": 0.0011175043182447553, + "learning_rate": 8.012669608046596e-08, + "loss": 0.0, + "num_input_tokens_seen": 121964136, + "step": 181035 + }, + { + "epoch": 4.422837319522146, + "grad_norm": 0.02144869789481163, + "learning_rate": 8.009325221741736e-08, + "loss": 0.0, + "num_input_tokens_seen": 121967400, + "step": 181040 + }, + { + "epoch": 4.422959470353993, + "grad_norm": 0.015995550900697708, + "learning_rate": 8.00598150441778e-08, + "loss": 0.0, + "num_input_tokens_seen": 121970600, + "step": 181045 + }, + { + "epoch": 4.42308162118584, + "grad_norm": 0.0019130867440253496, + "learning_rate": 8.002638456099087e-08, + "loss": 0.0, + "num_input_tokens_seen": 121974120, + "step": 181050 + }, + { + "epoch": 4.423203772017687, + "grad_norm": 5.404177045420511e-06, + "learning_rate": 7.999296076809958e-08, + "loss": 0.0, + "num_input_tokens_seen": 121977640, + "step": 181055 + }, + { + "epoch": 4.423325922849535, + "grad_norm": 8.673696720506996e-05, + "learning_rate": 7.995954366574675e-08, + "loss": 0.0, + "num_input_tokens_seen": 121980776, + "step": 181060 + }, + { + "epoch": 4.4234480736813815, + "grad_norm": 4.539159999694675e-05, + "learning_rate": 7.992613325417574e-08, + "loss": 0.0, + "num_input_tokens_seen": 121984168, + "step": 181065 + }, + { + "epoch": 4.423570224513229, + "grad_norm": 3.427283445489593e-05, + "learning_rate": 7.989272953362924e-08, + "loss": 0.0, + "num_input_tokens_seen": 121987624, + "step": 181070 + }, + { + "epoch": 4.423692375345076, + "grad_norm": 0.0009406374301761389, + "learning_rate": 7.985933250435039e-08, + "loss": 0.0, + "num_input_tokens_seen": 121990952, + "step": 181075 + }, + { + "epoch": 4.4238145261769235, + "grad_norm": 6.120477337390184e-05, + "learning_rate": 7.9825942166582e-08, + "loss": 0.0, + "num_input_tokens_seen": 121994344, + "step": 181080 + }, + { + "epoch": 4.42393667700877, + "grad_norm": 0.0012227625120431185, + "learning_rate": 7.979255852056677e-08, + "loss": 0.0, + "num_input_tokens_seen": 121997416, + "step": 181085 + }, + { + "epoch": 4.424058827840618, + "grad_norm": 0.006647599395364523, + "learning_rate": 7.97591815665476e-08, + "loss": 0.0, + "num_input_tokens_seen": 122000552, + "step": 181090 + }, + { + "epoch": 4.424180978672465, + "grad_norm": 0.00015228458505589515, + "learning_rate": 7.972581130476707e-08, + "loss": 0.0, + "num_input_tokens_seen": 122003688, + "step": 181095 + }, + { + "epoch": 4.424303129504312, + "grad_norm": 0.00011203240865143016, + "learning_rate": 7.969244773546812e-08, + "loss": 0.0224, + "num_input_tokens_seen": 122006824, + "step": 181100 + }, + { + "epoch": 4.424425280336159, + "grad_norm": 4.6374192606890574e-05, + "learning_rate": 7.965909085889299e-08, + "loss": 0.0, + "num_input_tokens_seen": 122010344, + "step": 181105 + }, + { + "epoch": 4.424547431168007, + "grad_norm": 0.006114604417234659, + "learning_rate": 7.96257406752846e-08, + "loss": 0.0, + "num_input_tokens_seen": 122013352, + "step": 181110 + }, + { + "epoch": 4.424669581999853, + "grad_norm": 0.0006236334447748959, + "learning_rate": 7.959239718488553e-08, + "loss": 0.0, + "num_input_tokens_seen": 122017064, + "step": 181115 + }, + { + "epoch": 4.4247917328317, + "grad_norm": 0.00038256382686086, + "learning_rate": 7.955906038793791e-08, + "loss": 0.0, + "num_input_tokens_seen": 122020200, + "step": 181120 + }, + { + "epoch": 4.424913883663548, + "grad_norm": 0.000345156091498211, + "learning_rate": 7.952573028468456e-08, + "loss": 0.0, + "num_input_tokens_seen": 122023528, + "step": 181125 + }, + { + "epoch": 4.4250360344953945, + "grad_norm": 0.00016293360386043787, + "learning_rate": 7.949240687536751e-08, + "loss": 0.0001, + "num_input_tokens_seen": 122026600, + "step": 181130 + }, + { + "epoch": 4.425158185327242, + "grad_norm": 0.0001275143149541691, + "learning_rate": 7.945909016022934e-08, + "loss": 0.0, + "num_input_tokens_seen": 122029992, + "step": 181135 + }, + { + "epoch": 4.425280336159089, + "grad_norm": 0.00010861880582524464, + "learning_rate": 7.942578013951217e-08, + "loss": 0.0, + "num_input_tokens_seen": 122033000, + "step": 181140 + }, + { + "epoch": 4.4254024869909365, + "grad_norm": 0.00043809949420392513, + "learning_rate": 7.93924768134584e-08, + "loss": 0.0, + "num_input_tokens_seen": 122036200, + "step": 181145 + }, + { + "epoch": 4.425524637822783, + "grad_norm": 0.008996120654046535, + "learning_rate": 7.935918018231024e-08, + "loss": 0.0, + "num_input_tokens_seen": 122039848, + "step": 181150 + }, + { + "epoch": 4.425646788654631, + "grad_norm": 0.0006707753636874259, + "learning_rate": 7.932589024630953e-08, + "loss": 0.0, + "num_input_tokens_seen": 122043688, + "step": 181155 + }, + { + "epoch": 4.425768939486478, + "grad_norm": 0.004772162064909935, + "learning_rate": 7.929260700569884e-08, + "loss": 0.0, + "num_input_tokens_seen": 122047208, + "step": 181160 + }, + { + "epoch": 4.425891090318325, + "grad_norm": 0.0019871399272233248, + "learning_rate": 7.925933046071975e-08, + "loss": 0.0, + "num_input_tokens_seen": 122050600, + "step": 181165 + }, + { + "epoch": 4.426013241150172, + "grad_norm": 0.00029408876434899867, + "learning_rate": 7.922606061161464e-08, + "loss": 0.0, + "num_input_tokens_seen": 122053608, + "step": 181170 + }, + { + "epoch": 4.42613539198202, + "grad_norm": 0.10013458132743835, + "learning_rate": 7.919279745862505e-08, + "loss": 0.0, + "num_input_tokens_seen": 122056872, + "step": 181175 + }, + { + "epoch": 4.426257542813866, + "grad_norm": 3.233554161852226e-05, + "learning_rate": 7.915954100199328e-08, + "loss": 0.0, + "num_input_tokens_seen": 122060200, + "step": 181180 + }, + { + "epoch": 4.426379693645714, + "grad_norm": 0.0017619574209675193, + "learning_rate": 7.912629124196113e-08, + "loss": 0.0, + "num_input_tokens_seen": 122063464, + "step": 181185 + }, + { + "epoch": 4.426501844477561, + "grad_norm": 0.0008039613021537662, + "learning_rate": 7.909304817876994e-08, + "loss": 0.0, + "num_input_tokens_seen": 122066536, + "step": 181190 + }, + { + "epoch": 4.426623995309408, + "grad_norm": 0.0007726152543909848, + "learning_rate": 7.905981181266208e-08, + "loss": 0.0, + "num_input_tokens_seen": 122069736, + "step": 181195 + }, + { + "epoch": 4.426746146141255, + "grad_norm": 0.0002162769524147734, + "learning_rate": 7.90265821438788e-08, + "loss": 0.0, + "num_input_tokens_seen": 122073576, + "step": 181200 + }, + { + "epoch": 4.426868296973103, + "grad_norm": 7.136868953239173e-05, + "learning_rate": 7.89933591726618e-08, + "loss": 0.0, + "num_input_tokens_seen": 122076776, + "step": 181205 + }, + { + "epoch": 4.4269904478049495, + "grad_norm": 0.0007138107321225107, + "learning_rate": 7.896014289925312e-08, + "loss": 0.0, + "num_input_tokens_seen": 122079848, + "step": 181210 + }, + { + "epoch": 4.427112598636796, + "grad_norm": 2.201797724410426e-05, + "learning_rate": 7.892693332389378e-08, + "loss": 0.0, + "num_input_tokens_seen": 122083304, + "step": 181215 + }, + { + "epoch": 4.427234749468644, + "grad_norm": 1.4028590157977305e-05, + "learning_rate": 7.889373044682567e-08, + "loss": 0.0, + "num_input_tokens_seen": 122086504, + "step": 181220 + }, + { + "epoch": 4.427356900300491, + "grad_norm": 0.0016195435309782624, + "learning_rate": 7.886053426828998e-08, + "loss": 0.0, + "num_input_tokens_seen": 122089704, + "step": 181225 + }, + { + "epoch": 4.427479051132338, + "grad_norm": 0.00019918143516406417, + "learning_rate": 7.882734478852826e-08, + "loss": 0.0, + "num_input_tokens_seen": 122093224, + "step": 181230 + }, + { + "epoch": 4.427601201964185, + "grad_norm": 0.00010197334631811827, + "learning_rate": 7.8794162007782e-08, + "loss": 0.0, + "num_input_tokens_seen": 122096616, + "step": 181235 + }, + { + "epoch": 4.427723352796033, + "grad_norm": 0.002117699710652232, + "learning_rate": 7.876098592629221e-08, + "loss": 0.0, + "num_input_tokens_seen": 122100136, + "step": 181240 + }, + { + "epoch": 4.427845503627879, + "grad_norm": 4.120414814678952e-05, + "learning_rate": 7.872781654430039e-08, + "loss": 0.0, + "num_input_tokens_seen": 122103336, + "step": 181245 + }, + { + "epoch": 4.427967654459727, + "grad_norm": 1.0961198313452769e-05, + "learning_rate": 7.869465386204765e-08, + "loss": 0.0, + "num_input_tokens_seen": 122106792, + "step": 181250 + }, + { + "epoch": 4.428089805291574, + "grad_norm": 0.0006331568001769483, + "learning_rate": 7.866149787977528e-08, + "loss": 0.0, + "num_input_tokens_seen": 122109928, + "step": 181255 + }, + { + "epoch": 4.4282119561234214, + "grad_norm": 5.89434894209262e-05, + "learning_rate": 7.862834859772416e-08, + "loss": 0.0, + "num_input_tokens_seen": 122113128, + "step": 181260 + }, + { + "epoch": 4.428334106955268, + "grad_norm": 0.0022513335570693016, + "learning_rate": 7.859520601613545e-08, + "loss": 0.0, + "num_input_tokens_seen": 122116392, + "step": 181265 + }, + { + "epoch": 4.428456257787116, + "grad_norm": 0.0005072103813290596, + "learning_rate": 7.85620701352504e-08, + "loss": 0.0, + "num_input_tokens_seen": 122119848, + "step": 181270 + }, + { + "epoch": 4.428578408618963, + "grad_norm": 0.0003140374319627881, + "learning_rate": 7.852894095530993e-08, + "loss": 0.0, + "num_input_tokens_seen": 122123240, + "step": 181275 + }, + { + "epoch": 4.42870055945081, + "grad_norm": 3.0278168196673505e-05, + "learning_rate": 7.849581847655462e-08, + "loss": 0.0002, + "num_input_tokens_seen": 122126632, + "step": 181280 + }, + { + "epoch": 4.428822710282657, + "grad_norm": 0.00029192675719968975, + "learning_rate": 7.846270269922572e-08, + "loss": 0.0, + "num_input_tokens_seen": 122130600, + "step": 181285 + }, + { + "epoch": 4.428944861114505, + "grad_norm": 0.004352732561528683, + "learning_rate": 7.842959362356394e-08, + "loss": 0.0, + "num_input_tokens_seen": 122133736, + "step": 181290 + }, + { + "epoch": 4.429067011946351, + "grad_norm": 0.0026824241504073143, + "learning_rate": 7.839649124980985e-08, + "loss": 0.0, + "num_input_tokens_seen": 122136936, + "step": 181295 + }, + { + "epoch": 4.429189162778199, + "grad_norm": 0.0006657766061834991, + "learning_rate": 7.836339557820427e-08, + "loss": 0.0, + "num_input_tokens_seen": 122140648, + "step": 181300 + }, + { + "epoch": 4.429311313610046, + "grad_norm": 0.005454826634377241, + "learning_rate": 7.83303066089882e-08, + "loss": 0.0, + "num_input_tokens_seen": 122143976, + "step": 181305 + }, + { + "epoch": 4.4294334644418925, + "grad_norm": 0.00019215767679270357, + "learning_rate": 7.829722434240193e-08, + "loss": 0.0, + "num_input_tokens_seen": 122147496, + "step": 181310 + }, + { + "epoch": 4.42955561527374, + "grad_norm": 0.001787459827028215, + "learning_rate": 7.826414877868626e-08, + "loss": 0.0, + "num_input_tokens_seen": 122151016, + "step": 181315 + }, + { + "epoch": 4.429677766105587, + "grad_norm": 0.00047288238420151174, + "learning_rate": 7.823107991808143e-08, + "loss": 0.0, + "num_input_tokens_seen": 122154344, + "step": 181320 + }, + { + "epoch": 4.4297999169374345, + "grad_norm": 0.0002162763848900795, + "learning_rate": 7.819801776082813e-08, + "loss": 0.0, + "num_input_tokens_seen": 122157864, + "step": 181325 + }, + { + "epoch": 4.429922067769281, + "grad_norm": 0.0003336450026836246, + "learning_rate": 7.816496230716696e-08, + "loss": 0.0, + "num_input_tokens_seen": 122161448, + "step": 181330 + }, + { + "epoch": 4.430044218601129, + "grad_norm": 0.0003785964800044894, + "learning_rate": 7.813191355733806e-08, + "loss": 0.0, + "num_input_tokens_seen": 122165288, + "step": 181335 + }, + { + "epoch": 4.430166369432976, + "grad_norm": 0.0002000385575229302, + "learning_rate": 7.809887151158189e-08, + "loss": 0.0, + "num_input_tokens_seen": 122168360, + "step": 181340 + }, + { + "epoch": 4.430288520264823, + "grad_norm": 0.00092991505516693, + "learning_rate": 7.806583617013851e-08, + "loss": 0.0, + "num_input_tokens_seen": 122171368, + "step": 181345 + }, + { + "epoch": 4.43041067109667, + "grad_norm": 0.00020382586808409542, + "learning_rate": 7.80328075332486e-08, + "loss": 0.0, + "num_input_tokens_seen": 122175144, + "step": 181350 + }, + { + "epoch": 4.430532821928518, + "grad_norm": 0.0013917014002799988, + "learning_rate": 7.799978560115184e-08, + "loss": 0.0, + "num_input_tokens_seen": 122178344, + "step": 181355 + }, + { + "epoch": 4.430654972760364, + "grad_norm": 0.00021623856446240097, + "learning_rate": 7.79667703740886e-08, + "loss": 0.0, + "num_input_tokens_seen": 122181608, + "step": 181360 + }, + { + "epoch": 4.430777123592212, + "grad_norm": 0.00034316626260988414, + "learning_rate": 7.793376185229928e-08, + "loss": 0.0, + "num_input_tokens_seen": 122184872, + "step": 181365 + }, + { + "epoch": 4.430899274424059, + "grad_norm": 0.006357030943036079, + "learning_rate": 7.790076003602342e-08, + "loss": 0.0, + "num_input_tokens_seen": 122188136, + "step": 181370 + }, + { + "epoch": 4.431021425255906, + "grad_norm": 0.0008824463002383709, + "learning_rate": 7.78677649255014e-08, + "loss": 0.0, + "num_input_tokens_seen": 122191272, + "step": 181375 + }, + { + "epoch": 4.431143576087753, + "grad_norm": 0.00017288989329244941, + "learning_rate": 7.78347765209728e-08, + "loss": 0.0, + "num_input_tokens_seen": 122194408, + "step": 181380 + }, + { + "epoch": 4.4312657269196, + "grad_norm": 0.00015481859736610204, + "learning_rate": 7.780179482267802e-08, + "loss": 0.0, + "num_input_tokens_seen": 122198120, + "step": 181385 + }, + { + "epoch": 4.4313878777514475, + "grad_norm": 4.8036527005024254e-05, + "learning_rate": 7.77688198308566e-08, + "loss": 0.0, + "num_input_tokens_seen": 122201512, + "step": 181390 + }, + { + "epoch": 4.431510028583294, + "grad_norm": 5.7792276493273675e-05, + "learning_rate": 7.773585154574814e-08, + "loss": 0.0, + "num_input_tokens_seen": 122204904, + "step": 181395 + }, + { + "epoch": 4.431632179415142, + "grad_norm": 0.00042270554695278406, + "learning_rate": 7.770288996759289e-08, + "loss": 0.0, + "num_input_tokens_seen": 122208488, + "step": 181400 + }, + { + "epoch": 4.431754330246989, + "grad_norm": 0.0004007647221442312, + "learning_rate": 7.766993509663e-08, + "loss": 0.0, + "num_input_tokens_seen": 122211496, + "step": 181405 + }, + { + "epoch": 4.431876481078836, + "grad_norm": 0.00021725076658185571, + "learning_rate": 7.763698693309972e-08, + "loss": 0.0, + "num_input_tokens_seen": 122214696, + "step": 181410 + }, + { + "epoch": 4.431998631910683, + "grad_norm": 0.00569581426680088, + "learning_rate": 7.760404547724109e-08, + "loss": 0.0, + "num_input_tokens_seen": 122218024, + "step": 181415 + }, + { + "epoch": 4.432120782742531, + "grad_norm": 0.00361349293962121, + "learning_rate": 7.7571110729294e-08, + "loss": 0.0, + "num_input_tokens_seen": 122221864, + "step": 181420 + }, + { + "epoch": 4.432242933574377, + "grad_norm": 0.00037453541881404817, + "learning_rate": 7.753818268949808e-08, + "loss": 0.0, + "num_input_tokens_seen": 122225128, + "step": 181425 + }, + { + "epoch": 4.432365084406225, + "grad_norm": 7.587824075017124e-05, + "learning_rate": 7.750526135809232e-08, + "loss": 0.0, + "num_input_tokens_seen": 122228648, + "step": 181430 + }, + { + "epoch": 4.432487235238072, + "grad_norm": 0.0013005606597289443, + "learning_rate": 7.747234673531667e-08, + "loss": 0.0, + "num_input_tokens_seen": 122232040, + "step": 181435 + }, + { + "epoch": 4.432609386069919, + "grad_norm": 0.0004842895723413676, + "learning_rate": 7.743943882141013e-08, + "loss": 0.0, + "num_input_tokens_seen": 122235496, + "step": 181440 + }, + { + "epoch": 4.432731536901766, + "grad_norm": 0.00032378273317590356, + "learning_rate": 7.740653761661219e-08, + "loss": 0.0, + "num_input_tokens_seen": 122238696, + "step": 181445 + }, + { + "epoch": 4.432853687733614, + "grad_norm": 0.00021496588306035846, + "learning_rate": 7.737364312116202e-08, + "loss": 0.0, + "num_input_tokens_seen": 122242280, + "step": 181450 + }, + { + "epoch": 4.4329758385654605, + "grad_norm": 6.006818512105383e-05, + "learning_rate": 7.734075533529871e-08, + "loss": 0.0, + "num_input_tokens_seen": 122245480, + "step": 181455 + }, + { + "epoch": 4.433097989397308, + "grad_norm": 0.0027862994465976954, + "learning_rate": 7.730787425926188e-08, + "loss": 0.0, + "num_input_tokens_seen": 122249320, + "step": 181460 + }, + { + "epoch": 4.433220140229155, + "grad_norm": 0.00013754340761806816, + "learning_rate": 7.727499989329023e-08, + "loss": 0.0, + "num_input_tokens_seen": 122252776, + "step": 181465 + }, + { + "epoch": 4.4333422910610025, + "grad_norm": 0.0007522147498093545, + "learning_rate": 7.7242132237623e-08, + "loss": 0.0286, + "num_input_tokens_seen": 122256552, + "step": 181470 + }, + { + "epoch": 4.433464441892849, + "grad_norm": 0.00010326150368200615, + "learning_rate": 7.72092712924991e-08, + "loss": 0.0, + "num_input_tokens_seen": 122259880, + "step": 181475 + }, + { + "epoch": 4.433586592724696, + "grad_norm": 3.690972880576737e-05, + "learning_rate": 7.71764170581577e-08, + "loss": 0.0, + "num_input_tokens_seen": 122263208, + "step": 181480 + }, + { + "epoch": 4.433708743556544, + "grad_norm": 3.2292551622958854e-05, + "learning_rate": 7.714356953483747e-08, + "loss": 0.0, + "num_input_tokens_seen": 122266472, + "step": 181485 + }, + { + "epoch": 4.43383089438839, + "grad_norm": 0.0006843619630672038, + "learning_rate": 7.711072872277757e-08, + "loss": 0.0, + "num_input_tokens_seen": 122269800, + "step": 181490 + }, + { + "epoch": 4.433953045220238, + "grad_norm": 0.002597218146547675, + "learning_rate": 7.70778946222167e-08, + "loss": 0.0, + "num_input_tokens_seen": 122273128, + "step": 181495 + }, + { + "epoch": 4.434075196052085, + "grad_norm": 0.003175930818542838, + "learning_rate": 7.704506723339343e-08, + "loss": 0.0, + "num_input_tokens_seen": 122276200, + "step": 181500 + }, + { + "epoch": 4.434197346883932, + "grad_norm": 0.0005056135705672204, + "learning_rate": 7.701224655654682e-08, + "loss": 0.0, + "num_input_tokens_seen": 122279400, + "step": 181505 + }, + { + "epoch": 4.434319497715779, + "grad_norm": 4.015320428152336e-06, + "learning_rate": 7.69794325919153e-08, + "loss": 0.0, + "num_input_tokens_seen": 122282856, + "step": 181510 + }, + { + "epoch": 4.434441648547627, + "grad_norm": 0.08624907582998276, + "learning_rate": 7.694662533973762e-08, + "loss": 0.0, + "num_input_tokens_seen": 122286568, + "step": 181515 + }, + { + "epoch": 4.4345637993794735, + "grad_norm": 0.0014746385859325528, + "learning_rate": 7.691382480025244e-08, + "loss": 0.0, + "num_input_tokens_seen": 122289704, + "step": 181520 + }, + { + "epoch": 4.434685950211321, + "grad_norm": 0.002636983757838607, + "learning_rate": 7.688103097369803e-08, + "loss": 0.0002, + "num_input_tokens_seen": 122292904, + "step": 181525 + }, + { + "epoch": 4.434808101043168, + "grad_norm": 0.00015489933139178902, + "learning_rate": 7.68482438603133e-08, + "loss": 0.0, + "num_input_tokens_seen": 122296232, + "step": 181530 + }, + { + "epoch": 4.4349302518750155, + "grad_norm": 0.0012338929809629917, + "learning_rate": 7.681546346033618e-08, + "loss": 0.0, + "num_input_tokens_seen": 122299688, + "step": 181535 + }, + { + "epoch": 4.435052402706862, + "grad_norm": 0.0014644035836681724, + "learning_rate": 7.67826897740056e-08, + "loss": 0.0, + "num_input_tokens_seen": 122302696, + "step": 181540 + }, + { + "epoch": 4.43517455353871, + "grad_norm": 0.11627105623483658, + "learning_rate": 7.674992280155934e-08, + "loss": 0.0, + "num_input_tokens_seen": 122305768, + "step": 181545 + }, + { + "epoch": 4.435296704370557, + "grad_norm": 0.00040323357097804546, + "learning_rate": 7.671716254323601e-08, + "loss": 0.0, + "num_input_tokens_seen": 122308904, + "step": 181550 + }, + { + "epoch": 4.435418855202404, + "grad_norm": 0.00010230207408312708, + "learning_rate": 7.668440899927398e-08, + "loss": 0.0, + "num_input_tokens_seen": 122312168, + "step": 181555 + }, + { + "epoch": 4.435541006034251, + "grad_norm": 0.013337737880647182, + "learning_rate": 7.665166216991115e-08, + "loss": 0.0, + "num_input_tokens_seen": 122315816, + "step": 181560 + }, + { + "epoch": 4.435663156866099, + "grad_norm": 0.00021302149980328977, + "learning_rate": 7.66189220553859e-08, + "loss": 0.0, + "num_input_tokens_seen": 122318888, + "step": 181565 + }, + { + "epoch": 4.435785307697945, + "grad_norm": 0.00024847377790138125, + "learning_rate": 7.658618865593603e-08, + "loss": 0.0, + "num_input_tokens_seen": 122322088, + "step": 181570 + }, + { + "epoch": 4.435907458529792, + "grad_norm": 0.00015122335753403604, + "learning_rate": 7.655346197179979e-08, + "loss": 0.0, + "num_input_tokens_seen": 122325736, + "step": 181575 + }, + { + "epoch": 4.43602960936164, + "grad_norm": 0.0009328412124887109, + "learning_rate": 7.652074200321524e-08, + "loss": 0.0, + "num_input_tokens_seen": 122328936, + "step": 181580 + }, + { + "epoch": 4.436151760193487, + "grad_norm": 0.00010473511792952195, + "learning_rate": 7.648802875042038e-08, + "loss": 0.0, + "num_input_tokens_seen": 122332328, + "step": 181585 + }, + { + "epoch": 4.436273911025334, + "grad_norm": 0.00021845597075298429, + "learning_rate": 7.64553222136527e-08, + "loss": 0.0003, + "num_input_tokens_seen": 122335272, + "step": 181590 + }, + { + "epoch": 4.436396061857181, + "grad_norm": 0.00012159592733951285, + "learning_rate": 7.642262239315055e-08, + "loss": 0.0, + "num_input_tokens_seen": 122338792, + "step": 181595 + }, + { + "epoch": 4.436518212689029, + "grad_norm": 0.0010934515157714486, + "learning_rate": 7.638992928915144e-08, + "loss": 0.0, + "num_input_tokens_seen": 122341928, + "step": 181600 + }, + { + "epoch": 4.436640363520875, + "grad_norm": 0.020605893805623055, + "learning_rate": 7.635724290189305e-08, + "loss": 0.0, + "num_input_tokens_seen": 122345320, + "step": 181605 + }, + { + "epoch": 4.436762514352723, + "grad_norm": 0.013531841337680817, + "learning_rate": 7.632456323161319e-08, + "loss": 0.0, + "num_input_tokens_seen": 122349416, + "step": 181610 + }, + { + "epoch": 4.43688466518457, + "grad_norm": 0.0014700923347845674, + "learning_rate": 7.629189027854977e-08, + "loss": 0.0, + "num_input_tokens_seen": 122352616, + "step": 181615 + }, + { + "epoch": 4.437006816016417, + "grad_norm": 8.41475193738006e-05, + "learning_rate": 7.625922404293994e-08, + "loss": 0.0, + "num_input_tokens_seen": 122356264, + "step": 181620 + }, + { + "epoch": 4.437128966848264, + "grad_norm": 0.00010870520054595545, + "learning_rate": 7.622656452502174e-08, + "loss": 0.0663, + "num_input_tokens_seen": 122359656, + "step": 181625 + }, + { + "epoch": 4.437251117680112, + "grad_norm": 0.00017314977594651282, + "learning_rate": 7.61939117250322e-08, + "loss": 0.0, + "num_input_tokens_seen": 122362856, + "step": 181630 + }, + { + "epoch": 4.4373732685119585, + "grad_norm": 0.002724649151787162, + "learning_rate": 7.616126564320901e-08, + "loss": 0.0, + "num_input_tokens_seen": 122366120, + "step": 181635 + }, + { + "epoch": 4.437495419343806, + "grad_norm": 0.022800017148256302, + "learning_rate": 7.612862627978978e-08, + "loss": 0.0, + "num_input_tokens_seen": 122370216, + "step": 181640 + }, + { + "epoch": 4.437617570175653, + "grad_norm": 7.581769750686362e-05, + "learning_rate": 7.60959936350114e-08, + "loss": 0.0, + "num_input_tokens_seen": 122373672, + "step": 181645 + }, + { + "epoch": 4.4377397210075005, + "grad_norm": 0.0020787513349205256, + "learning_rate": 7.60633677091117e-08, + "loss": 0.0, + "num_input_tokens_seen": 122377704, + "step": 181650 + }, + { + "epoch": 4.437861871839347, + "grad_norm": 0.0002737126487772912, + "learning_rate": 7.60307485023276e-08, + "loss": 0.0, + "num_input_tokens_seen": 122380904, + "step": 181655 + }, + { + "epoch": 4.437984022671195, + "grad_norm": 1.887790858745575e-05, + "learning_rate": 7.599813601489646e-08, + "loss": 0.0, + "num_input_tokens_seen": 122384040, + "step": 181660 + }, + { + "epoch": 4.438106173503042, + "grad_norm": 0.0011298698373138905, + "learning_rate": 7.596553024705533e-08, + "loss": 0.0, + "num_input_tokens_seen": 122387176, + "step": 181665 + }, + { + "epoch": 4.438228324334888, + "grad_norm": 0.0046443212777376175, + "learning_rate": 7.593293119904132e-08, + "loss": 0.0, + "num_input_tokens_seen": 122390632, + "step": 181670 + }, + { + "epoch": 4.438350475166736, + "grad_norm": 0.00015271840675268322, + "learning_rate": 7.590033887109181e-08, + "loss": 0.0, + "num_input_tokens_seen": 122393960, + "step": 181675 + }, + { + "epoch": 4.438472625998583, + "grad_norm": 0.001776915742084384, + "learning_rate": 7.586775326344341e-08, + "loss": 0.0, + "num_input_tokens_seen": 122396648, + "step": 181680 + }, + { + "epoch": 4.43859477683043, + "grad_norm": 0.0008560987189412117, + "learning_rate": 7.583517437633335e-08, + "loss": 0.0, + "num_input_tokens_seen": 122400104, + "step": 181685 + }, + { + "epoch": 4.438716927662277, + "grad_norm": 5.613368557533249e-06, + "learning_rate": 7.580260220999845e-08, + "loss": 0.0, + "num_input_tokens_seen": 122403688, + "step": 181690 + }, + { + "epoch": 4.438839078494125, + "grad_norm": 0.00016497218166477978, + "learning_rate": 7.577003676467564e-08, + "loss": 0.0, + "num_input_tokens_seen": 122406888, + "step": 181695 + }, + { + "epoch": 4.4389612293259715, + "grad_norm": 0.0033229936379939318, + "learning_rate": 7.573747804060182e-08, + "loss": 0.0, + "num_input_tokens_seen": 122409832, + "step": 181700 + }, + { + "epoch": 4.439083380157819, + "grad_norm": 6.330916949082166e-05, + "learning_rate": 7.570492603801337e-08, + "loss": 0.0, + "num_input_tokens_seen": 122413224, + "step": 181705 + }, + { + "epoch": 4.439205530989666, + "grad_norm": 0.0011732260463759303, + "learning_rate": 7.567238075714755e-08, + "loss": 0.0, + "num_input_tokens_seen": 122416552, + "step": 181710 + }, + { + "epoch": 4.4393276818215135, + "grad_norm": 0.0009869820205494761, + "learning_rate": 7.56398421982406e-08, + "loss": 0.0, + "num_input_tokens_seen": 122419752, + "step": 181715 + }, + { + "epoch": 4.43944983265336, + "grad_norm": 0.0001454023295082152, + "learning_rate": 7.560731036152957e-08, + "loss": 0.0, + "num_input_tokens_seen": 122422824, + "step": 181720 + }, + { + "epoch": 4.439571983485208, + "grad_norm": 1.622208765184041e-05, + "learning_rate": 7.557478524725059e-08, + "loss": 0.0, + "num_input_tokens_seen": 122426024, + "step": 181725 + }, + { + "epoch": 4.439694134317055, + "grad_norm": 0.00026236390112899244, + "learning_rate": 7.554226685564047e-08, + "loss": 0.0, + "num_input_tokens_seen": 122429160, + "step": 181730 + }, + { + "epoch": 4.439816285148902, + "grad_norm": 10.125528335571289, + "learning_rate": 7.55097551869357e-08, + "loss": 0.0384, + "num_input_tokens_seen": 122432488, + "step": 181735 + }, + { + "epoch": 4.439938435980749, + "grad_norm": 0.0002785317483358085, + "learning_rate": 7.547725024137252e-08, + "loss": 0.0, + "num_input_tokens_seen": 122435624, + "step": 181740 + }, + { + "epoch": 4.440060586812596, + "grad_norm": 0.0014351366553455591, + "learning_rate": 7.544475201918765e-08, + "loss": 0.0, + "num_input_tokens_seen": 122438504, + "step": 181745 + }, + { + "epoch": 4.440182737644443, + "grad_norm": 0.001117666601203382, + "learning_rate": 7.5412260520617e-08, + "loss": 0.0, + "num_input_tokens_seen": 122441640, + "step": 181750 + }, + { + "epoch": 4.44030488847629, + "grad_norm": 0.0025107613764703274, + "learning_rate": 7.537977574589726e-08, + "loss": 0.0, + "num_input_tokens_seen": 122444776, + "step": 181755 + }, + { + "epoch": 4.440427039308138, + "grad_norm": 3.955272404709831e-05, + "learning_rate": 7.534729769526437e-08, + "loss": 0.0, + "num_input_tokens_seen": 122448040, + "step": 181760 + }, + { + "epoch": 4.4405491901399845, + "grad_norm": 0.0025206992868334055, + "learning_rate": 7.531482636895458e-08, + "loss": 0.0, + "num_input_tokens_seen": 122451496, + "step": 181765 + }, + { + "epoch": 4.440671340971832, + "grad_norm": 0.00026313986745662987, + "learning_rate": 7.528236176720426e-08, + "loss": 0.0, + "num_input_tokens_seen": 122454696, + "step": 181770 + }, + { + "epoch": 4.440793491803679, + "grad_norm": 0.023444993421435356, + "learning_rate": 7.52499038902491e-08, + "loss": 0.0, + "num_input_tokens_seen": 122458088, + "step": 181775 + }, + { + "epoch": 4.4409156426355265, + "grad_norm": 0.0010252405190840364, + "learning_rate": 7.521745273832558e-08, + "loss": 0.0, + "num_input_tokens_seen": 122461096, + "step": 181780 + }, + { + "epoch": 4.441037793467373, + "grad_norm": 0.0006436887779273093, + "learning_rate": 7.518500831166929e-08, + "loss": 0.0, + "num_input_tokens_seen": 122464168, + "step": 181785 + }, + { + "epoch": 4.441159944299221, + "grad_norm": 1.9545019313227385e-05, + "learning_rate": 7.515257061051661e-08, + "loss": 0.0, + "num_input_tokens_seen": 122467368, + "step": 181790 + }, + { + "epoch": 4.441282095131068, + "grad_norm": 9.000881982501596e-05, + "learning_rate": 7.5120139635103e-08, + "loss": 0.0, + "num_input_tokens_seen": 122470568, + "step": 181795 + }, + { + "epoch": 4.441404245962915, + "grad_norm": 8.998543489724398e-05, + "learning_rate": 7.508771538566461e-08, + "loss": 0.0, + "num_input_tokens_seen": 122473768, + "step": 181800 + }, + { + "epoch": 4.441526396794762, + "grad_norm": 0.0010170077439397573, + "learning_rate": 7.505529786243714e-08, + "loss": 0.0, + "num_input_tokens_seen": 122476776, + "step": 181805 + }, + { + "epoch": 4.44164854762661, + "grad_norm": 7.858948811190203e-05, + "learning_rate": 7.502288706565618e-08, + "loss": 0.0, + "num_input_tokens_seen": 122480296, + "step": 181810 + }, + { + "epoch": 4.441770698458456, + "grad_norm": 0.014259721152484417, + "learning_rate": 7.499048299555777e-08, + "loss": 0.0, + "num_input_tokens_seen": 122483432, + "step": 181815 + }, + { + "epoch": 4.441892849290304, + "grad_norm": 0.00014318434114102274, + "learning_rate": 7.495808565237716e-08, + "loss": 0.0308, + "num_input_tokens_seen": 122486888, + "step": 181820 + }, + { + "epoch": 4.442015000122151, + "grad_norm": 0.0010650715557858348, + "learning_rate": 7.492569503635015e-08, + "loss": 0.0, + "num_input_tokens_seen": 122490088, + "step": 181825 + }, + { + "epoch": 4.442137150953998, + "grad_norm": 0.00724154943600297, + "learning_rate": 7.489331114771247e-08, + "loss": 0.0, + "num_input_tokens_seen": 122493416, + "step": 181830 + }, + { + "epoch": 4.442259301785845, + "grad_norm": 4.4712283852277324e-05, + "learning_rate": 7.486093398669934e-08, + "loss": 0.0, + "num_input_tokens_seen": 122497576, + "step": 181835 + }, + { + "epoch": 4.442381452617692, + "grad_norm": 6.589079566765577e-05, + "learning_rate": 7.482856355354638e-08, + "loss": 0.0, + "num_input_tokens_seen": 122501480, + "step": 181840 + }, + { + "epoch": 4.4425036034495395, + "grad_norm": 0.015320822596549988, + "learning_rate": 7.479619984848884e-08, + "loss": 0.0, + "num_input_tokens_seen": 122504744, + "step": 181845 + }, + { + "epoch": 4.442625754281386, + "grad_norm": 5.367151970858686e-05, + "learning_rate": 7.476384287176241e-08, + "loss": 0.0, + "num_input_tokens_seen": 122507944, + "step": 181850 + }, + { + "epoch": 4.442747905113234, + "grad_norm": 0.014334838837385178, + "learning_rate": 7.473149262360201e-08, + "loss": 0.0, + "num_input_tokens_seen": 122511080, + "step": 181855 + }, + { + "epoch": 4.442870055945081, + "grad_norm": 0.0004377129953354597, + "learning_rate": 7.469914910424291e-08, + "loss": 0.0, + "num_input_tokens_seen": 122514600, + "step": 181860 + }, + { + "epoch": 4.442992206776928, + "grad_norm": 0.0004606062138918787, + "learning_rate": 7.46668123139208e-08, + "loss": 0.0, + "num_input_tokens_seen": 122517992, + "step": 181865 + }, + { + "epoch": 4.443114357608775, + "grad_norm": 0.00025587991694919765, + "learning_rate": 7.463448225287028e-08, + "loss": 0.0, + "num_input_tokens_seen": 122520872, + "step": 181870 + }, + { + "epoch": 4.443236508440623, + "grad_norm": 8.987699402496219e-05, + "learning_rate": 7.460215892132693e-08, + "loss": 0.0, + "num_input_tokens_seen": 122524008, + "step": 181875 + }, + { + "epoch": 4.443358659272469, + "grad_norm": 0.0005700733745470643, + "learning_rate": 7.456984231952535e-08, + "loss": 0.0, + "num_input_tokens_seen": 122527144, + "step": 181880 + }, + { + "epoch": 4.443480810104317, + "grad_norm": 0.0001441869098925963, + "learning_rate": 7.453753244770078e-08, + "loss": 0.0, + "num_input_tokens_seen": 122530856, + "step": 181885 + }, + { + "epoch": 4.443602960936164, + "grad_norm": 0.0013551083393394947, + "learning_rate": 7.450522930608838e-08, + "loss": 0.0, + "num_input_tokens_seen": 122534248, + "step": 181890 + }, + { + "epoch": 4.443725111768011, + "grad_norm": 4.1691670048749074e-05, + "learning_rate": 7.447293289492285e-08, + "loss": 0.0, + "num_input_tokens_seen": 122537192, + "step": 181895 + }, + { + "epoch": 4.443847262599858, + "grad_norm": 0.0007187697337940335, + "learning_rate": 7.444064321443899e-08, + "loss": 0.0, + "num_input_tokens_seen": 122540200, + "step": 181900 + }, + { + "epoch": 4.443969413431706, + "grad_norm": 0.005256319418549538, + "learning_rate": 7.440836026487184e-08, + "loss": 0.0, + "num_input_tokens_seen": 122543656, + "step": 181905 + }, + { + "epoch": 4.444091564263553, + "grad_norm": 0.001109532080590725, + "learning_rate": 7.43760840464559e-08, + "loss": 0.0, + "num_input_tokens_seen": 122546472, + "step": 181910 + }, + { + "epoch": 4.4442137150954, + "grad_norm": 0.006649449001997709, + "learning_rate": 7.434381455942617e-08, + "loss": 0.0, + "num_input_tokens_seen": 122550184, + "step": 181915 + }, + { + "epoch": 4.444335865927247, + "grad_norm": 0.00018596640438772738, + "learning_rate": 7.431155180401705e-08, + "loss": 0.0, + "num_input_tokens_seen": 122553320, + "step": 181920 + }, + { + "epoch": 4.444458016759095, + "grad_norm": 0.00020076056534890085, + "learning_rate": 7.427929578046354e-08, + "loss": 0.0, + "num_input_tokens_seen": 122556584, + "step": 181925 + }, + { + "epoch": 4.444580167590941, + "grad_norm": 5.85622874496039e-05, + "learning_rate": 7.424704648899972e-08, + "loss": 0.0, + "num_input_tokens_seen": 122559592, + "step": 181930 + }, + { + "epoch": 4.444702318422788, + "grad_norm": 0.00037898457958362997, + "learning_rate": 7.421480392986057e-08, + "loss": 0.0, + "num_input_tokens_seen": 122562664, + "step": 181935 + }, + { + "epoch": 4.444824469254636, + "grad_norm": 0.00010478322656126693, + "learning_rate": 7.418256810328016e-08, + "loss": 0.0, + "num_input_tokens_seen": 122565672, + "step": 181940 + }, + { + "epoch": 4.4449466200864824, + "grad_norm": 0.0002965153835248202, + "learning_rate": 7.415033900949319e-08, + "loss": 0.0, + "num_input_tokens_seen": 122569896, + "step": 181945 + }, + { + "epoch": 4.44506877091833, + "grad_norm": 0.0018205991946160793, + "learning_rate": 7.411811664873413e-08, + "loss": 0.0, + "num_input_tokens_seen": 122573480, + "step": 181950 + }, + { + "epoch": 4.445190921750177, + "grad_norm": 0.0012627599062398076, + "learning_rate": 7.408590102123701e-08, + "loss": 0.0, + "num_input_tokens_seen": 122576680, + "step": 181955 + }, + { + "epoch": 4.4453130725820245, + "grad_norm": 0.00049396394751966, + "learning_rate": 7.405369212723645e-08, + "loss": 0.0, + "num_input_tokens_seen": 122579944, + "step": 181960 + }, + { + "epoch": 4.445435223413871, + "grad_norm": 0.0006146155064925551, + "learning_rate": 7.402148996696622e-08, + "loss": 0.0, + "num_input_tokens_seen": 122583208, + "step": 181965 + }, + { + "epoch": 4.445557374245719, + "grad_norm": 0.0018971695099025965, + "learning_rate": 7.398929454066105e-08, + "loss": 0.0, + "num_input_tokens_seen": 122586472, + "step": 181970 + }, + { + "epoch": 4.445679525077566, + "grad_norm": 39.05138397216797, + "learning_rate": 7.395710584855452e-08, + "loss": 0.0631, + "num_input_tokens_seen": 122590312, + "step": 181975 + }, + { + "epoch": 4.445801675909413, + "grad_norm": 0.0018318736692890525, + "learning_rate": 7.392492389088112e-08, + "loss": 0.0, + "num_input_tokens_seen": 122593448, + "step": 181980 + }, + { + "epoch": 4.44592382674126, + "grad_norm": 0.00022619598894380033, + "learning_rate": 7.389274866787488e-08, + "loss": 0.0, + "num_input_tokens_seen": 122597352, + "step": 181985 + }, + { + "epoch": 4.446045977573108, + "grad_norm": 0.28438884019851685, + "learning_rate": 7.386058017976938e-08, + "loss": 0.0002, + "num_input_tokens_seen": 122600936, + "step": 181990 + }, + { + "epoch": 4.446168128404954, + "grad_norm": 0.0005298346513882279, + "learning_rate": 7.38284184267991e-08, + "loss": 0.0, + "num_input_tokens_seen": 122604520, + "step": 181995 + }, + { + "epoch": 4.446290279236802, + "grad_norm": 0.00031661076354794204, + "learning_rate": 7.379626340919754e-08, + "loss": 0.0, + "num_input_tokens_seen": 122607784, + "step": 182000 + }, + { + "epoch": 4.446412430068649, + "grad_norm": 0.0004995632916688919, + "learning_rate": 7.376411512719882e-08, + "loss": 0.0, + "num_input_tokens_seen": 122610536, + "step": 182005 + }, + { + "epoch": 4.4465345809004955, + "grad_norm": 0.00014909429592080414, + "learning_rate": 7.373197358103655e-08, + "loss": 0.0, + "num_input_tokens_seen": 122614568, + "step": 182010 + }, + { + "epoch": 4.446656731732343, + "grad_norm": 0.0005114256637170911, + "learning_rate": 7.369983877094432e-08, + "loss": 0.0, + "num_input_tokens_seen": 122617768, + "step": 182015 + }, + { + "epoch": 4.44677888256419, + "grad_norm": 0.0002833055623341352, + "learning_rate": 7.366771069715627e-08, + "loss": 0.0, + "num_input_tokens_seen": 122621096, + "step": 182020 + }, + { + "epoch": 4.4469010333960375, + "grad_norm": 0.00017942176782526076, + "learning_rate": 7.363558935990555e-08, + "loss": 0.0, + "num_input_tokens_seen": 122624616, + "step": 182025 + }, + { + "epoch": 4.447023184227884, + "grad_norm": 0.0020283800549805164, + "learning_rate": 7.360347475942618e-08, + "loss": 0.0, + "num_input_tokens_seen": 122627752, + "step": 182030 + }, + { + "epoch": 4.447145335059732, + "grad_norm": 0.00043666703277267516, + "learning_rate": 7.357136689595133e-08, + "loss": 0.0, + "num_input_tokens_seen": 122631400, + "step": 182035 + }, + { + "epoch": 4.447267485891579, + "grad_norm": 0.0004874668666161597, + "learning_rate": 7.35392657697147e-08, + "loss": 0.0, + "num_input_tokens_seen": 122634664, + "step": 182040 + }, + { + "epoch": 4.447389636723426, + "grad_norm": 0.0012796398950740695, + "learning_rate": 7.350717138094976e-08, + "loss": 0.0, + "num_input_tokens_seen": 122638056, + "step": 182045 + }, + { + "epoch": 4.447511787555273, + "grad_norm": 0.00035492394817993045, + "learning_rate": 7.347508372988986e-08, + "loss": 0.0, + "num_input_tokens_seen": 122641384, + "step": 182050 + }, + { + "epoch": 4.447633938387121, + "grad_norm": 0.00027042333385907114, + "learning_rate": 7.34430028167684e-08, + "loss": 0.0, + "num_input_tokens_seen": 122644520, + "step": 182055 + }, + { + "epoch": 4.447756089218967, + "grad_norm": 0.00047873600851744413, + "learning_rate": 7.341092864181853e-08, + "loss": 0.0, + "num_input_tokens_seen": 122648360, + "step": 182060 + }, + { + "epoch": 4.447878240050815, + "grad_norm": 0.0010061763459816575, + "learning_rate": 7.337886120527381e-08, + "loss": 0.0, + "num_input_tokens_seen": 122651560, + "step": 182065 + }, + { + "epoch": 4.448000390882662, + "grad_norm": 0.0006434638053178787, + "learning_rate": 7.334680050736707e-08, + "loss": 0.0, + "num_input_tokens_seen": 122654696, + "step": 182070 + }, + { + "epoch": 4.448122541714509, + "grad_norm": 0.00011786912364186719, + "learning_rate": 7.331474654833158e-08, + "loss": 0.0, + "num_input_tokens_seen": 122657832, + "step": 182075 + }, + { + "epoch": 4.448244692546356, + "grad_norm": 0.007387042045593262, + "learning_rate": 7.32826993284007e-08, + "loss": 0.0, + "num_input_tokens_seen": 122660840, + "step": 182080 + }, + { + "epoch": 4.448366843378204, + "grad_norm": 0.00154625263530761, + "learning_rate": 7.325065884780712e-08, + "loss": 0.0, + "num_input_tokens_seen": 122664168, + "step": 182085 + }, + { + "epoch": 4.4484889942100505, + "grad_norm": 0.0005491775227710605, + "learning_rate": 7.321862510678423e-08, + "loss": 0.0, + "num_input_tokens_seen": 122667880, + "step": 182090 + }, + { + "epoch": 4.448611145041898, + "grad_norm": 9.22619983612094e-06, + "learning_rate": 7.318659810556449e-08, + "loss": 0.0, + "num_input_tokens_seen": 122671720, + "step": 182095 + }, + { + "epoch": 4.448733295873745, + "grad_norm": 0.0003610023995861411, + "learning_rate": 7.31545778443814e-08, + "loss": 0.0, + "num_input_tokens_seen": 122675112, + "step": 182100 + }, + { + "epoch": 4.448855446705592, + "grad_norm": 5.4004536650609225e-05, + "learning_rate": 7.31225643234672e-08, + "loss": 0.0224, + "num_input_tokens_seen": 122678312, + "step": 182105 + }, + { + "epoch": 4.448977597537439, + "grad_norm": 0.0013011741684749722, + "learning_rate": 7.309055754305527e-08, + "loss": 0.0, + "num_input_tokens_seen": 122681320, + "step": 182110 + }, + { + "epoch": 4.449099748369286, + "grad_norm": 0.0007865259540267289, + "learning_rate": 7.305855750337809e-08, + "loss": 0.0, + "num_input_tokens_seen": 122684840, + "step": 182115 + }, + { + "epoch": 4.449221899201134, + "grad_norm": 0.0013168536825105548, + "learning_rate": 7.302656420466824e-08, + "loss": 0.0, + "num_input_tokens_seen": 122688552, + "step": 182120 + }, + { + "epoch": 4.44934405003298, + "grad_norm": 3.883744648192078e-05, + "learning_rate": 7.299457764715866e-08, + "loss": 0.0, + "num_input_tokens_seen": 122691880, + "step": 182125 + }, + { + "epoch": 4.449466200864828, + "grad_norm": 0.0002559190324973315, + "learning_rate": 7.296259783108171e-08, + "loss": 0.0, + "num_input_tokens_seen": 122695144, + "step": 182130 + }, + { + "epoch": 4.449588351696675, + "grad_norm": 0.0001097548010875471, + "learning_rate": 7.293062475667011e-08, + "loss": 0.0, + "num_input_tokens_seen": 122698536, + "step": 182135 + }, + { + "epoch": 4.449710502528522, + "grad_norm": 0.030722323805093765, + "learning_rate": 7.289865842415654e-08, + "loss": 0.0, + "num_input_tokens_seen": 122701736, + "step": 182140 + }, + { + "epoch": 4.449832653360369, + "grad_norm": 0.002170925959944725, + "learning_rate": 7.286669883377306e-08, + "loss": 0.0, + "num_input_tokens_seen": 122705128, + "step": 182145 + }, + { + "epoch": 4.449954804192217, + "grad_norm": 0.001756233279593289, + "learning_rate": 7.283474598575257e-08, + "loss": 0.0, + "num_input_tokens_seen": 122708072, + "step": 182150 + }, + { + "epoch": 4.4500769550240635, + "grad_norm": 2.314039011253044e-05, + "learning_rate": 7.280279988032689e-08, + "loss": 0.0, + "num_input_tokens_seen": 122711528, + "step": 182155 + }, + { + "epoch": 4.450199105855911, + "grad_norm": 0.00042975114774890244, + "learning_rate": 7.277086051772896e-08, + "loss": 0.0, + "num_input_tokens_seen": 122714536, + "step": 182160 + }, + { + "epoch": 4.450321256687758, + "grad_norm": 3.163126530125737e-05, + "learning_rate": 7.273892789819047e-08, + "loss": 0.0, + "num_input_tokens_seen": 122717864, + "step": 182165 + }, + { + "epoch": 4.4504434075196055, + "grad_norm": 0.003855722723528743, + "learning_rate": 7.270700202194391e-08, + "loss": 0.0, + "num_input_tokens_seen": 122721256, + "step": 182170 + }, + { + "epoch": 4.450565558351452, + "grad_norm": 0.000997714465484023, + "learning_rate": 7.267508288922153e-08, + "loss": 0.0, + "num_input_tokens_seen": 122724328, + "step": 182175 + }, + { + "epoch": 4.4506877091833, + "grad_norm": 0.00014899314555805176, + "learning_rate": 7.264317050025537e-08, + "loss": 0.0, + "num_input_tokens_seen": 122727656, + "step": 182180 + }, + { + "epoch": 4.450809860015147, + "grad_norm": 0.0003528865345288068, + "learning_rate": 7.261126485527757e-08, + "loss": 0.0, + "num_input_tokens_seen": 122731112, + "step": 182185 + }, + { + "epoch": 4.450932010846994, + "grad_norm": 0.00044632842764258385, + "learning_rate": 7.257936595451986e-08, + "loss": 0.0, + "num_input_tokens_seen": 122734312, + "step": 182190 + }, + { + "epoch": 4.451054161678841, + "grad_norm": 4.655039811041206e-05, + "learning_rate": 7.254747379821458e-08, + "loss": 0.0, + "num_input_tokens_seen": 122738472, + "step": 182195 + }, + { + "epoch": 4.451176312510688, + "grad_norm": 0.000397029856685549, + "learning_rate": 7.251558838659355e-08, + "loss": 0.0, + "num_input_tokens_seen": 122741608, + "step": 182200 + }, + { + "epoch": 4.451298463342535, + "grad_norm": 0.0004111557500436902, + "learning_rate": 7.24837097198887e-08, + "loss": 0.0, + "num_input_tokens_seen": 122744872, + "step": 182205 + }, + { + "epoch": 4.451420614174382, + "grad_norm": 5.328708721208386e-05, + "learning_rate": 7.245183779833163e-08, + "loss": 0.0, + "num_input_tokens_seen": 122748392, + "step": 182210 + }, + { + "epoch": 4.45154276500623, + "grad_norm": 0.00046480150194838643, + "learning_rate": 7.241997262215449e-08, + "loss": 0.0, + "num_input_tokens_seen": 122751912, + "step": 182215 + }, + { + "epoch": 4.4516649158380766, + "grad_norm": 0.00010292190563632175, + "learning_rate": 7.238811419158852e-08, + "loss": 0.0, + "num_input_tokens_seen": 122755240, + "step": 182220 + }, + { + "epoch": 4.451787066669924, + "grad_norm": 0.0004073938471265137, + "learning_rate": 7.2356262506866e-08, + "loss": 0.0, + "num_input_tokens_seen": 122758632, + "step": 182225 + }, + { + "epoch": 4.451909217501771, + "grad_norm": 0.01925656571984291, + "learning_rate": 7.232441756821794e-08, + "loss": 0.0, + "num_input_tokens_seen": 122761960, + "step": 182230 + }, + { + "epoch": 4.452031368333619, + "grad_norm": 0.011916612274944782, + "learning_rate": 7.229257937587641e-08, + "loss": 0.0, + "num_input_tokens_seen": 122765672, + "step": 182235 + }, + { + "epoch": 4.452153519165465, + "grad_norm": 1.697085281193722e-05, + "learning_rate": 7.226074793007264e-08, + "loss": 0.0, + "num_input_tokens_seen": 122768744, + "step": 182240 + }, + { + "epoch": 4.452275669997313, + "grad_norm": 1.4337666470964905e-05, + "learning_rate": 7.222892323103846e-08, + "loss": 0.0, + "num_input_tokens_seen": 122772136, + "step": 182245 + }, + { + "epoch": 4.45239782082916, + "grad_norm": 0.006274717394262552, + "learning_rate": 7.21971052790048e-08, + "loss": 0.0, + "num_input_tokens_seen": 122775528, + "step": 182250 + }, + { + "epoch": 4.452519971661007, + "grad_norm": 0.00027175049763172865, + "learning_rate": 7.216529407420357e-08, + "loss": 0.0, + "num_input_tokens_seen": 122779048, + "step": 182255 + }, + { + "epoch": 4.452642122492854, + "grad_norm": 0.00012441864237189293, + "learning_rate": 7.213348961686572e-08, + "loss": 0.0, + "num_input_tokens_seen": 122782568, + "step": 182260 + }, + { + "epoch": 4.452764273324702, + "grad_norm": 0.0012823011493310332, + "learning_rate": 7.210169190722271e-08, + "loss": 0.0523, + "num_input_tokens_seen": 122786088, + "step": 182265 + }, + { + "epoch": 4.4528864241565485, + "grad_norm": 7.73086940171197e-05, + "learning_rate": 7.206990094550592e-08, + "loss": 0.0002, + "num_input_tokens_seen": 122789608, + "step": 182270 + }, + { + "epoch": 4.453008574988396, + "grad_norm": 0.000146789156133309, + "learning_rate": 7.203811673194615e-08, + "loss": 0.0, + "num_input_tokens_seen": 122793000, + "step": 182275 + }, + { + "epoch": 4.453130725820243, + "grad_norm": 4.125279883737676e-05, + "learning_rate": 7.200633926677513e-08, + "loss": 0.0, + "num_input_tokens_seen": 122796456, + "step": 182280 + }, + { + "epoch": 4.4532528766520905, + "grad_norm": 0.002219364047050476, + "learning_rate": 7.197456855022333e-08, + "loss": 0.0, + "num_input_tokens_seen": 122799528, + "step": 182285 + }, + { + "epoch": 4.453375027483937, + "grad_norm": 0.0004296134866308421, + "learning_rate": 7.194280458252211e-08, + "loss": 0.0, + "num_input_tokens_seen": 122802472, + "step": 182290 + }, + { + "epoch": 4.453497178315784, + "grad_norm": 0.0001687946787569672, + "learning_rate": 7.191104736390252e-08, + "loss": 0.0, + "num_input_tokens_seen": 122805416, + "step": 182295 + }, + { + "epoch": 4.453619329147632, + "grad_norm": 0.00604760879650712, + "learning_rate": 7.187929689459527e-08, + "loss": 0.0005, + "num_input_tokens_seen": 122808424, + "step": 182300 + }, + { + "epoch": 4.453741479979478, + "grad_norm": 0.0005081015406176448, + "learning_rate": 7.18475531748317e-08, + "loss": 0.001, + "num_input_tokens_seen": 122811752, + "step": 182305 + }, + { + "epoch": 4.453863630811326, + "grad_norm": 0.0017769659170880914, + "learning_rate": 7.181581620484211e-08, + "loss": 0.0, + "num_input_tokens_seen": 122815080, + "step": 182310 + }, + { + "epoch": 4.453985781643173, + "grad_norm": 0.0003629335842560977, + "learning_rate": 7.178408598485775e-08, + "loss": 0.0, + "num_input_tokens_seen": 122818344, + "step": 182315 + }, + { + "epoch": 4.45410793247502, + "grad_norm": 0.0013918086187914014, + "learning_rate": 7.175236251510908e-08, + "loss": 0.0, + "num_input_tokens_seen": 122822312, + "step": 182320 + }, + { + "epoch": 4.454230083306867, + "grad_norm": 0.00013686173770111054, + "learning_rate": 7.172064579582682e-08, + "loss": 0.0, + "num_input_tokens_seen": 122825512, + "step": 182325 + }, + { + "epoch": 4.454352234138715, + "grad_norm": 0.0005737289902754128, + "learning_rate": 7.16889358272419e-08, + "loss": 0.0, + "num_input_tokens_seen": 122828904, + "step": 182330 + }, + { + "epoch": 4.4544743849705615, + "grad_norm": 0.0011010384187102318, + "learning_rate": 7.165723260958445e-08, + "loss": 0.1163, + "num_input_tokens_seen": 122832808, + "step": 182335 + }, + { + "epoch": 4.454596535802409, + "grad_norm": 0.0005671690450981259, + "learning_rate": 7.162553614308552e-08, + "loss": 0.0, + "num_input_tokens_seen": 122835880, + "step": 182340 + }, + { + "epoch": 4.454718686634256, + "grad_norm": 1.5716286725364625e-05, + "learning_rate": 7.159384642797528e-08, + "loss": 0.0, + "num_input_tokens_seen": 122839016, + "step": 182345 + }, + { + "epoch": 4.4548408374661035, + "grad_norm": 0.002666143700480461, + "learning_rate": 7.156216346448419e-08, + "loss": 0.0, + "num_input_tokens_seen": 122843048, + "step": 182350 + }, + { + "epoch": 4.45496298829795, + "grad_norm": 0.000196826717001386, + "learning_rate": 7.153048725284305e-08, + "loss": 0.0, + "num_input_tokens_seen": 122847016, + "step": 182355 + }, + { + "epoch": 4.455085139129798, + "grad_norm": 0.00038609313196502626, + "learning_rate": 7.14988177932817e-08, + "loss": 0.0, + "num_input_tokens_seen": 122850152, + "step": 182360 + }, + { + "epoch": 4.455207289961645, + "grad_norm": 0.0003739885869435966, + "learning_rate": 7.146715508603085e-08, + "loss": 0.0, + "num_input_tokens_seen": 122853096, + "step": 182365 + }, + { + "epoch": 4.455329440793491, + "grad_norm": 0.002237460808828473, + "learning_rate": 7.143549913132052e-08, + "loss": 0.0, + "num_input_tokens_seen": 122856488, + "step": 182370 + }, + { + "epoch": 4.455451591625339, + "grad_norm": 0.0001580318494234234, + "learning_rate": 7.140384992938108e-08, + "loss": 0.0, + "num_input_tokens_seen": 122859752, + "step": 182375 + }, + { + "epoch": 4.455573742457186, + "grad_norm": 0.0003379890986252576, + "learning_rate": 7.137220748044236e-08, + "loss": 0.0, + "num_input_tokens_seen": 122863336, + "step": 182380 + }, + { + "epoch": 4.455695893289033, + "grad_norm": 8.986893953988329e-05, + "learning_rate": 7.134057178473485e-08, + "loss": 0.0, + "num_input_tokens_seen": 122866472, + "step": 182385 + }, + { + "epoch": 4.45581804412088, + "grad_norm": 0.0007048301049508154, + "learning_rate": 7.130894284248856e-08, + "loss": 0.0, + "num_input_tokens_seen": 122870248, + "step": 182390 + }, + { + "epoch": 4.455940194952728, + "grad_norm": 32.200618743896484, + "learning_rate": 7.127732065393333e-08, + "loss": 0.0383, + "num_input_tokens_seen": 122873256, + "step": 182395 + }, + { + "epoch": 4.4560623457845745, + "grad_norm": 0.00011272566189290956, + "learning_rate": 7.12457052192994e-08, + "loss": 0.0, + "num_input_tokens_seen": 122876584, + "step": 182400 + }, + { + "epoch": 4.456184496616422, + "grad_norm": 0.00023590961063746363, + "learning_rate": 7.121409653881628e-08, + "loss": 0.0, + "num_input_tokens_seen": 122879976, + "step": 182405 + }, + { + "epoch": 4.456306647448269, + "grad_norm": 0.005138612352311611, + "learning_rate": 7.11824946127142e-08, + "loss": 0.0, + "num_input_tokens_seen": 122883240, + "step": 182410 + }, + { + "epoch": 4.4564287982801165, + "grad_norm": 0.0001687046606093645, + "learning_rate": 7.115089944122276e-08, + "loss": 0.0, + "num_input_tokens_seen": 122886312, + "step": 182415 + }, + { + "epoch": 4.456550949111963, + "grad_norm": 0.00014753674622625113, + "learning_rate": 7.111931102457192e-08, + "loss": 0.0, + "num_input_tokens_seen": 122889640, + "step": 182420 + }, + { + "epoch": 4.456673099943811, + "grad_norm": 0.00025895764701999724, + "learning_rate": 7.108772936299134e-08, + "loss": 0.0, + "num_input_tokens_seen": 122893288, + "step": 182425 + }, + { + "epoch": 4.456795250775658, + "grad_norm": 4.39085197285749e-05, + "learning_rate": 7.105615445671042e-08, + "loss": 0.0, + "num_input_tokens_seen": 122896552, + "step": 182430 + }, + { + "epoch": 4.456917401607505, + "grad_norm": 0.001786267152056098, + "learning_rate": 7.10245863059592e-08, + "loss": 0.0, + "num_input_tokens_seen": 122899880, + "step": 182435 + }, + { + "epoch": 4.457039552439352, + "grad_norm": 0.0021983960177749395, + "learning_rate": 7.099302491096681e-08, + "loss": 0.0, + "num_input_tokens_seen": 122903528, + "step": 182440 + }, + { + "epoch": 4.4571617032712, + "grad_norm": 0.00025607639690861106, + "learning_rate": 7.096147027196308e-08, + "loss": 0.0, + "num_input_tokens_seen": 122907176, + "step": 182445 + }, + { + "epoch": 4.457283854103046, + "grad_norm": 0.0005844164406880736, + "learning_rate": 7.092992238917761e-08, + "loss": 0.0, + "num_input_tokens_seen": 122910760, + "step": 182450 + }, + { + "epoch": 4.457406004934894, + "grad_norm": 0.0015613926807418466, + "learning_rate": 7.089838126283943e-08, + "loss": 0.0, + "num_input_tokens_seen": 122913960, + "step": 182455 + }, + { + "epoch": 4.457528155766741, + "grad_norm": 2.1202440620982088e-05, + "learning_rate": 7.086684689317834e-08, + "loss": 0.0, + "num_input_tokens_seen": 122916904, + "step": 182460 + }, + { + "epoch": 4.4576503065985875, + "grad_norm": 4.5927870814921334e-05, + "learning_rate": 7.083531928042319e-08, + "loss": 0.0, + "num_input_tokens_seen": 122920168, + "step": 182465 + }, + { + "epoch": 4.457772457430435, + "grad_norm": 0.004868772812187672, + "learning_rate": 7.080379842480378e-08, + "loss": 0.0, + "num_input_tokens_seen": 122923688, + "step": 182470 + }, + { + "epoch": 4.457894608262282, + "grad_norm": 8.599821740062907e-05, + "learning_rate": 7.077228432654881e-08, + "loss": 0.0, + "num_input_tokens_seen": 122926632, + "step": 182475 + }, + { + "epoch": 4.4580167590941295, + "grad_norm": 0.0005928333266638219, + "learning_rate": 7.074077698588777e-08, + "loss": 0.0, + "num_input_tokens_seen": 122930152, + "step": 182480 + }, + { + "epoch": 4.458138909925976, + "grad_norm": 0.00020691838290076703, + "learning_rate": 7.070927640304992e-08, + "loss": 0.0, + "num_input_tokens_seen": 122933288, + "step": 182485 + }, + { + "epoch": 4.458261060757824, + "grad_norm": 1.5695924957981333e-05, + "learning_rate": 7.067778257826395e-08, + "loss": 0.0, + "num_input_tokens_seen": 122936808, + "step": 182490 + }, + { + "epoch": 4.458383211589671, + "grad_norm": 0.0004510094877332449, + "learning_rate": 7.064629551175928e-08, + "loss": 0.0, + "num_input_tokens_seen": 122939880, + "step": 182495 + }, + { + "epoch": 4.458505362421518, + "grad_norm": 1.2007948160171509, + "learning_rate": 7.061481520376455e-08, + "loss": 0.0, + "num_input_tokens_seen": 122943080, + "step": 182500 + }, + { + "epoch": 4.458627513253365, + "grad_norm": 0.00013166008284315467, + "learning_rate": 7.058334165450885e-08, + "loss": 0.0, + "num_input_tokens_seen": 122946472, + "step": 182505 + }, + { + "epoch": 4.458749664085213, + "grad_norm": 0.00042926755850203335, + "learning_rate": 7.055187486422131e-08, + "loss": 0.0, + "num_input_tokens_seen": 122949672, + "step": 182510 + }, + { + "epoch": 4.458871814917059, + "grad_norm": 3.18528400384821e-05, + "learning_rate": 7.052041483313043e-08, + "loss": 0.0, + "num_input_tokens_seen": 122952936, + "step": 182515 + }, + { + "epoch": 4.458993965748907, + "grad_norm": 7.602172263432294e-05, + "learning_rate": 7.0488961561465e-08, + "loss": 0.0, + "num_input_tokens_seen": 122956136, + "step": 182520 + }, + { + "epoch": 4.459116116580754, + "grad_norm": 0.031247856095433235, + "learning_rate": 7.045751504945396e-08, + "loss": 0.0, + "num_input_tokens_seen": 122959336, + "step": 182525 + }, + { + "epoch": 4.459238267412601, + "grad_norm": 0.0004095268959645182, + "learning_rate": 7.04260752973258e-08, + "loss": 0.0, + "num_input_tokens_seen": 122962984, + "step": 182530 + }, + { + "epoch": 4.459360418244448, + "grad_norm": 0.0004039282212033868, + "learning_rate": 7.039464230530933e-08, + "loss": 0.0, + "num_input_tokens_seen": 122966056, + "step": 182535 + }, + { + "epoch": 4.459482569076296, + "grad_norm": 0.00013901021156925708, + "learning_rate": 7.036321607363294e-08, + "loss": 0.0286, + "num_input_tokens_seen": 122969128, + "step": 182540 + }, + { + "epoch": 4.459604719908143, + "grad_norm": 0.00012049246288370341, + "learning_rate": 7.033179660252541e-08, + "loss": 0.0, + "num_input_tokens_seen": 122972712, + "step": 182545 + }, + { + "epoch": 4.45972687073999, + "grad_norm": 9.773957572178915e-05, + "learning_rate": 7.030038389221493e-08, + "loss": 0.0, + "num_input_tokens_seen": 122976104, + "step": 182550 + }, + { + "epoch": 4.459849021571837, + "grad_norm": 0.0012148652458563447, + "learning_rate": 7.02689779429304e-08, + "loss": 0.0, + "num_input_tokens_seen": 122979304, + "step": 182555 + }, + { + "epoch": 4.459971172403684, + "grad_norm": 0.0008318567997775972, + "learning_rate": 7.023757875489967e-08, + "loss": 0.0, + "num_input_tokens_seen": 122982888, + "step": 182560 + }, + { + "epoch": 4.460093323235531, + "grad_norm": 0.00032979255774989724, + "learning_rate": 7.020618632835151e-08, + "loss": 0.0, + "num_input_tokens_seen": 122985896, + "step": 182565 + }, + { + "epoch": 4.460215474067378, + "grad_norm": 4.526316115516238e-05, + "learning_rate": 7.017480066351388e-08, + "loss": 0.0001, + "num_input_tokens_seen": 122989160, + "step": 182570 + }, + { + "epoch": 4.460337624899226, + "grad_norm": 0.005635230336338282, + "learning_rate": 7.014342176061517e-08, + "loss": 0.1477, + "num_input_tokens_seen": 122992424, + "step": 182575 + }, + { + "epoch": 4.460459775731072, + "grad_norm": 7.891910354373977e-05, + "learning_rate": 7.011204961988382e-08, + "loss": 0.0, + "num_input_tokens_seen": 122995880, + "step": 182580 + }, + { + "epoch": 4.46058192656292, + "grad_norm": 0.00021873510559089482, + "learning_rate": 7.008068424154756e-08, + "loss": 0.0, + "num_input_tokens_seen": 122998952, + "step": 182585 + }, + { + "epoch": 4.460704077394767, + "grad_norm": 2.3276723368326202e-05, + "learning_rate": 7.004932562583488e-08, + "loss": 0.0, + "num_input_tokens_seen": 123002024, + "step": 182590 + }, + { + "epoch": 4.4608262282266145, + "grad_norm": 0.00020752607088070363, + "learning_rate": 7.001797377297348e-08, + "loss": 0.0, + "num_input_tokens_seen": 123005480, + "step": 182595 + }, + { + "epoch": 4.460948379058461, + "grad_norm": 0.02167407050728798, + "learning_rate": 6.998662868319138e-08, + "loss": 0.0, + "num_input_tokens_seen": 123008808, + "step": 182600 + }, + { + "epoch": 4.461070529890309, + "grad_norm": 3.9221591578098014e-05, + "learning_rate": 6.9955290356717e-08, + "loss": 0.0, + "num_input_tokens_seen": 123012328, + "step": 182605 + }, + { + "epoch": 4.461192680722156, + "grad_norm": 0.0007778708823025227, + "learning_rate": 6.992395879377766e-08, + "loss": 0.0001, + "num_input_tokens_seen": 123015720, + "step": 182610 + }, + { + "epoch": 4.461314831554003, + "grad_norm": 0.00041512076859362423, + "learning_rate": 6.989263399460155e-08, + "loss": 0.0, + "num_input_tokens_seen": 123019048, + "step": 182615 + }, + { + "epoch": 4.46143698238585, + "grad_norm": 0.00015520087617915124, + "learning_rate": 6.986131595941624e-08, + "loss": 0.0001, + "num_input_tokens_seen": 123022568, + "step": 182620 + }, + { + "epoch": 4.461559133217698, + "grad_norm": 6.149549881229177e-05, + "learning_rate": 6.98300046884498e-08, + "loss": 0.0, + "num_input_tokens_seen": 123025640, + "step": 182625 + }, + { + "epoch": 4.461681284049544, + "grad_norm": 0.0043769595213234425, + "learning_rate": 6.97987001819298e-08, + "loss": 0.0, + "num_input_tokens_seen": 123029160, + "step": 182630 + }, + { + "epoch": 4.461803434881391, + "grad_norm": 0.0011604566825553775, + "learning_rate": 6.976740244008361e-08, + "loss": 0.0, + "num_input_tokens_seen": 123032360, + "step": 182635 + }, + { + "epoch": 4.461925585713239, + "grad_norm": 0.0051023769192397594, + "learning_rate": 6.973611146313929e-08, + "loss": 0.0, + "num_input_tokens_seen": 123035880, + "step": 182640 + }, + { + "epoch": 4.4620477365450855, + "grad_norm": 0.003443704219534993, + "learning_rate": 6.970482725132399e-08, + "loss": 0.0001, + "num_input_tokens_seen": 123038888, + "step": 182645 + }, + { + "epoch": 4.462169887376933, + "grad_norm": 0.00011538012040546164, + "learning_rate": 6.967354980486562e-08, + "loss": 0.0, + "num_input_tokens_seen": 123042088, + "step": 182650 + }, + { + "epoch": 4.46229203820878, + "grad_norm": 0.0013685214798897505, + "learning_rate": 6.964227912399123e-08, + "loss": 0.0, + "num_input_tokens_seen": 123045416, + "step": 182655 + }, + { + "epoch": 4.4624141890406275, + "grad_norm": 0.00011652199464151636, + "learning_rate": 6.961101520892831e-08, + "loss": 0.0, + "num_input_tokens_seen": 123048616, + "step": 182660 + }, + { + "epoch": 4.462536339872474, + "grad_norm": 0.005106240976601839, + "learning_rate": 6.957975805990469e-08, + "loss": 0.0, + "num_input_tokens_seen": 123051560, + "step": 182665 + }, + { + "epoch": 4.462658490704322, + "grad_norm": 0.003788114059716463, + "learning_rate": 6.954850767714704e-08, + "loss": 0.0, + "num_input_tokens_seen": 123055144, + "step": 182670 + }, + { + "epoch": 4.462780641536169, + "grad_norm": 3.244485560571775e-05, + "learning_rate": 6.951726406088309e-08, + "loss": 0.0, + "num_input_tokens_seen": 123059624, + "step": 182675 + }, + { + "epoch": 4.462902792368016, + "grad_norm": 0.001383076305501163, + "learning_rate": 6.948602721133967e-08, + "loss": 0.0, + "num_input_tokens_seen": 123063336, + "step": 182680 + }, + { + "epoch": 4.463024943199863, + "grad_norm": 0.0005897828377783298, + "learning_rate": 6.945479712874436e-08, + "loss": 0.0, + "num_input_tokens_seen": 123066728, + "step": 182685 + }, + { + "epoch": 4.463147094031711, + "grad_norm": 2.9718587029492483e-05, + "learning_rate": 6.942357381332387e-08, + "loss": 0.0, + "num_input_tokens_seen": 123070376, + "step": 182690 + }, + { + "epoch": 4.463269244863557, + "grad_norm": 0.00011758630716940388, + "learning_rate": 6.939235726530535e-08, + "loss": 0.0, + "num_input_tokens_seen": 123073512, + "step": 182695 + }, + { + "epoch": 4.463391395695405, + "grad_norm": 0.7688287496566772, + "learning_rate": 6.936114748491617e-08, + "loss": 0.0005, + "num_input_tokens_seen": 123076520, + "step": 182700 + }, + { + "epoch": 4.463513546527252, + "grad_norm": 0.0001084234390873462, + "learning_rate": 6.932994447238294e-08, + "loss": 0.0, + "num_input_tokens_seen": 123080360, + "step": 182705 + }, + { + "epoch": 4.463635697359099, + "grad_norm": 0.0003334138309583068, + "learning_rate": 6.929874822793269e-08, + "loss": 0.0, + "num_input_tokens_seen": 123084072, + "step": 182710 + }, + { + "epoch": 4.463757848190946, + "grad_norm": 0.0001240150013472885, + "learning_rate": 6.926755875179224e-08, + "loss": 0.0, + "num_input_tokens_seen": 123087208, + "step": 182715 + }, + { + "epoch": 4.463879999022794, + "grad_norm": 0.0007824696367606521, + "learning_rate": 6.923637604418853e-08, + "loss": 0.0, + "num_input_tokens_seen": 123090344, + "step": 182720 + }, + { + "epoch": 4.4640021498546405, + "grad_norm": 7.241334969876334e-05, + "learning_rate": 6.920520010534803e-08, + "loss": 0.0, + "num_input_tokens_seen": 123093608, + "step": 182725 + }, + { + "epoch": 4.464124300686487, + "grad_norm": 0.0003914496919605881, + "learning_rate": 6.91740309354979e-08, + "loss": 0.0, + "num_input_tokens_seen": 123097448, + "step": 182730 + }, + { + "epoch": 4.464246451518335, + "grad_norm": 0.0005804976099170744, + "learning_rate": 6.914286853486462e-08, + "loss": 0.0, + "num_input_tokens_seen": 123101352, + "step": 182735 + }, + { + "epoch": 4.464368602350182, + "grad_norm": 0.0001754326221998781, + "learning_rate": 6.911171290367457e-08, + "loss": 0.0, + "num_input_tokens_seen": 123104488, + "step": 182740 + }, + { + "epoch": 4.464490753182029, + "grad_norm": 0.0002568564668763429, + "learning_rate": 6.908056404215467e-08, + "loss": 0.0, + "num_input_tokens_seen": 123107752, + "step": 182745 + }, + { + "epoch": 4.464612904013876, + "grad_norm": 7.831337279640138e-05, + "learning_rate": 6.90494219505311e-08, + "loss": 0.0, + "num_input_tokens_seen": 123111336, + "step": 182750 + }, + { + "epoch": 4.464735054845724, + "grad_norm": 0.0005972188664600253, + "learning_rate": 6.901828662903054e-08, + "loss": 0.0172, + "num_input_tokens_seen": 123114536, + "step": 182755 + }, + { + "epoch": 4.46485720567757, + "grad_norm": 0.000675778544973582, + "learning_rate": 6.898715807787958e-08, + "loss": 0.0003, + "num_input_tokens_seen": 123117864, + "step": 182760 + }, + { + "epoch": 4.464979356509418, + "grad_norm": 8.06990938144736e-05, + "learning_rate": 6.895603629730429e-08, + "loss": 0.0, + "num_input_tokens_seen": 123120680, + "step": 182765 + }, + { + "epoch": 4.465101507341265, + "grad_norm": 0.012151413597166538, + "learning_rate": 6.892492128753124e-08, + "loss": 0.0, + "num_input_tokens_seen": 123124136, + "step": 182770 + }, + { + "epoch": 4.465223658173112, + "grad_norm": 0.0002190818777307868, + "learning_rate": 6.88938130487865e-08, + "loss": 0.0, + "num_input_tokens_seen": 123127528, + "step": 182775 + }, + { + "epoch": 4.465345809004959, + "grad_norm": 0.000651566602755338, + "learning_rate": 6.886271158129642e-08, + "loss": 0.0, + "num_input_tokens_seen": 123130600, + "step": 182780 + }, + { + "epoch": 4.465467959836807, + "grad_norm": 0.00015405859448947012, + "learning_rate": 6.883161688528715e-08, + "loss": 0.0, + "num_input_tokens_seen": 123134888, + "step": 182785 + }, + { + "epoch": 4.4655901106686535, + "grad_norm": 0.0005990671343170106, + "learning_rate": 6.880052896098465e-08, + "loss": 0.0, + "num_input_tokens_seen": 123138344, + "step": 182790 + }, + { + "epoch": 4.465712261500501, + "grad_norm": 0.0010906461393460631, + "learning_rate": 6.876944780861548e-08, + "loss": 0.0, + "num_input_tokens_seen": 123142056, + "step": 182795 + }, + { + "epoch": 4.465834412332348, + "grad_norm": 0.00044133083429187536, + "learning_rate": 6.873837342840516e-08, + "loss": 0.0, + "num_input_tokens_seen": 123145384, + "step": 182800 + }, + { + "epoch": 4.4659565631641955, + "grad_norm": 0.0014121757121756673, + "learning_rate": 6.870730582057993e-08, + "loss": 0.0001, + "num_input_tokens_seen": 123148712, + "step": 182805 + }, + { + "epoch": 4.466078713996042, + "grad_norm": 0.003218111116439104, + "learning_rate": 6.867624498536561e-08, + "loss": 0.0, + "num_input_tokens_seen": 123151848, + "step": 182810 + }, + { + "epoch": 4.46620086482789, + "grad_norm": 3.127128002233803e-05, + "learning_rate": 6.864519092298804e-08, + "loss": 0.0, + "num_input_tokens_seen": 123155112, + "step": 182815 + }, + { + "epoch": 4.466323015659737, + "grad_norm": 0.0003318258677609265, + "learning_rate": 6.861414363367335e-08, + "loss": 0.0, + "num_input_tokens_seen": 123158312, + "step": 182820 + }, + { + "epoch": 4.466445166491583, + "grad_norm": 1.8997519873664714e-05, + "learning_rate": 6.858310311764715e-08, + "loss": 0.0, + "num_input_tokens_seen": 123161640, + "step": 182825 + }, + { + "epoch": 4.466567317323431, + "grad_norm": 0.00020123532158322632, + "learning_rate": 6.855206937513491e-08, + "loss": 0.0, + "num_input_tokens_seen": 123165160, + "step": 182830 + }, + { + "epoch": 4.466689468155278, + "grad_norm": 5.2726703870575875e-05, + "learning_rate": 6.85210424063628e-08, + "loss": 0.0, + "num_input_tokens_seen": 123168808, + "step": 182835 + }, + { + "epoch": 4.466811618987125, + "grad_norm": 0.0004758377617690712, + "learning_rate": 6.849002221155598e-08, + "loss": 0.0, + "num_input_tokens_seen": 123172264, + "step": 182840 + }, + { + "epoch": 4.466933769818972, + "grad_norm": 0.0007463787333108485, + "learning_rate": 6.845900879094046e-08, + "loss": 0.0, + "num_input_tokens_seen": 123175528, + "step": 182845 + }, + { + "epoch": 4.46705592065082, + "grad_norm": 9.71447370829992e-05, + "learning_rate": 6.842800214474143e-08, + "loss": 0.0, + "num_input_tokens_seen": 123179176, + "step": 182850 + }, + { + "epoch": 4.4671780714826665, + "grad_norm": 0.0004629456379916519, + "learning_rate": 6.839700227318468e-08, + "loss": 0.0, + "num_input_tokens_seen": 123182376, + "step": 182855 + }, + { + "epoch": 4.467300222314514, + "grad_norm": 0.0009171870187856257, + "learning_rate": 6.836600917649538e-08, + "loss": 0.0, + "num_input_tokens_seen": 123185704, + "step": 182860 + }, + { + "epoch": 4.467422373146361, + "grad_norm": 7.736064617347438e-06, + "learning_rate": 6.833502285489911e-08, + "loss": 0.0, + "num_input_tokens_seen": 123189032, + "step": 182865 + }, + { + "epoch": 4.467544523978209, + "grad_norm": 0.0008894494967535138, + "learning_rate": 6.830404330862104e-08, + "loss": 0.0, + "num_input_tokens_seen": 123192808, + "step": 182870 + }, + { + "epoch": 4.467666674810055, + "grad_norm": 0.0012684657704085112, + "learning_rate": 6.827307053788667e-08, + "loss": 0.0, + "num_input_tokens_seen": 123196200, + "step": 182875 + }, + { + "epoch": 4.467788825641903, + "grad_norm": 0.004361152183264494, + "learning_rate": 6.8242104542921e-08, + "loss": 0.0, + "num_input_tokens_seen": 123199144, + "step": 182880 + }, + { + "epoch": 4.46791097647375, + "grad_norm": 0.00017511250916868448, + "learning_rate": 6.821114532394944e-08, + "loss": 0.0, + "num_input_tokens_seen": 123203048, + "step": 182885 + }, + { + "epoch": 4.468033127305597, + "grad_norm": 0.0025651047471910715, + "learning_rate": 6.818019288119714e-08, + "loss": 0.0, + "num_input_tokens_seen": 123205992, + "step": 182890 + }, + { + "epoch": 4.468155278137444, + "grad_norm": 0.00010991712042596191, + "learning_rate": 6.81492472148889e-08, + "loss": 0.0, + "num_input_tokens_seen": 123209256, + "step": 182895 + }, + { + "epoch": 4.468277428969291, + "grad_norm": 0.0003879454161506146, + "learning_rate": 6.811830832525023e-08, + "loss": 0.0, + "num_input_tokens_seen": 123212392, + "step": 182900 + }, + { + "epoch": 4.468399579801138, + "grad_norm": 0.00018364388961344957, + "learning_rate": 6.808737621250571e-08, + "loss": 0.0, + "num_input_tokens_seen": 123215784, + "step": 182905 + }, + { + "epoch": 4.468521730632986, + "grad_norm": 0.0015769852325320244, + "learning_rate": 6.805645087688039e-08, + "loss": 0.0, + "num_input_tokens_seen": 123219176, + "step": 182910 + }, + { + "epoch": 4.468643881464833, + "grad_norm": 0.024887003004550934, + "learning_rate": 6.80255323185993e-08, + "loss": 0.0, + "num_input_tokens_seen": 123222440, + "step": 182915 + }, + { + "epoch": 4.46876603229668, + "grad_norm": 1.2393422366585582e-05, + "learning_rate": 6.799462053788718e-08, + "loss": 0.0, + "num_input_tokens_seen": 123225448, + "step": 182920 + }, + { + "epoch": 4.468888183128527, + "grad_norm": 0.007184472866356373, + "learning_rate": 6.796371553496904e-08, + "loss": 0.0, + "num_input_tokens_seen": 123228456, + "step": 182925 + }, + { + "epoch": 4.469010333960374, + "grad_norm": 0.029829949140548706, + "learning_rate": 6.793281731006917e-08, + "loss": 0.0348, + "num_input_tokens_seen": 123231720, + "step": 182930 + }, + { + "epoch": 4.469132484792222, + "grad_norm": 0.0022584174294024706, + "learning_rate": 6.790192586341282e-08, + "loss": 0.0, + "num_input_tokens_seen": 123235432, + "step": 182935 + }, + { + "epoch": 4.469254635624068, + "grad_norm": 2.5439127057325095e-05, + "learning_rate": 6.787104119522425e-08, + "loss": 0.0, + "num_input_tokens_seen": 123239144, + "step": 182940 + }, + { + "epoch": 4.469376786455916, + "grad_norm": 0.00019265869923401624, + "learning_rate": 6.784016330572816e-08, + "loss": 0.0, + "num_input_tokens_seen": 123242472, + "step": 182945 + }, + { + "epoch": 4.469498937287763, + "grad_norm": 0.009062398225069046, + "learning_rate": 6.780929219514919e-08, + "loss": 0.0, + "num_input_tokens_seen": 123245608, + "step": 182950 + }, + { + "epoch": 4.46962108811961, + "grad_norm": 0.007672363892197609, + "learning_rate": 6.777842786371157e-08, + "loss": 0.0, + "num_input_tokens_seen": 123249064, + "step": 182955 + }, + { + "epoch": 4.469743238951457, + "grad_norm": 0.0006050781230442226, + "learning_rate": 6.774757031164025e-08, + "loss": 0.0, + "num_input_tokens_seen": 123252136, + "step": 182960 + }, + { + "epoch": 4.469865389783305, + "grad_norm": 4.5979195419931784e-05, + "learning_rate": 6.771671953915914e-08, + "loss": 0.0, + "num_input_tokens_seen": 123255784, + "step": 182965 + }, + { + "epoch": 4.4699875406151515, + "grad_norm": 9.688878344604746e-05, + "learning_rate": 6.768587554649286e-08, + "loss": 0.0, + "num_input_tokens_seen": 123259304, + "step": 182970 + }, + { + "epoch": 4.470109691446999, + "grad_norm": 0.0017482617404311895, + "learning_rate": 6.765503833386566e-08, + "loss": 0.0, + "num_input_tokens_seen": 123262376, + "step": 182975 + }, + { + "epoch": 4.470231842278846, + "grad_norm": 8.310518751386553e-05, + "learning_rate": 6.76242079015017e-08, + "loss": 0.0, + "num_input_tokens_seen": 123265384, + "step": 182980 + }, + { + "epoch": 4.4703539931106935, + "grad_norm": 2.6630152206053026e-05, + "learning_rate": 6.759338424962546e-08, + "loss": 0.0, + "num_input_tokens_seen": 123268904, + "step": 182985 + }, + { + "epoch": 4.47047614394254, + "grad_norm": 0.00017120575648732483, + "learning_rate": 6.756256737846067e-08, + "loss": 0.0, + "num_input_tokens_seen": 123272040, + "step": 182990 + }, + { + "epoch": 4.470598294774387, + "grad_norm": 0.0010852214181795716, + "learning_rate": 6.753175728823191e-08, + "loss": 0.0, + "num_input_tokens_seen": 123275624, + "step": 182995 + }, + { + "epoch": 4.470720445606235, + "grad_norm": 0.00042630190728232265, + "learning_rate": 6.750095397916289e-08, + "loss": 0.0, + "num_input_tokens_seen": 123279144, + "step": 183000 + }, + { + "epoch": 4.470842596438081, + "grad_norm": 0.000170299899764359, + "learning_rate": 6.747015745147777e-08, + "loss": 0.0, + "num_input_tokens_seen": 123282536, + "step": 183005 + }, + { + "epoch": 4.470964747269929, + "grad_norm": 0.0007768705254420638, + "learning_rate": 6.74393677054006e-08, + "loss": 0.0, + "num_input_tokens_seen": 123285928, + "step": 183010 + }, + { + "epoch": 4.471086898101776, + "grad_norm": 0.00016575297922827303, + "learning_rate": 6.740858474115496e-08, + "loss": 0.0, + "num_input_tokens_seen": 123289576, + "step": 183015 + }, + { + "epoch": 4.471209048933623, + "grad_norm": 0.0007921412470750511, + "learning_rate": 6.737780855896513e-08, + "loss": 0.0, + "num_input_tokens_seen": 123293160, + "step": 183020 + }, + { + "epoch": 4.47133119976547, + "grad_norm": 8.899014937924221e-05, + "learning_rate": 6.73470391590546e-08, + "loss": 0.0, + "num_input_tokens_seen": 123296808, + "step": 183025 + }, + { + "epoch": 4.471453350597318, + "grad_norm": 0.0014562405413016677, + "learning_rate": 6.73162765416474e-08, + "loss": 0.0, + "num_input_tokens_seen": 123299752, + "step": 183030 + }, + { + "epoch": 4.4715755014291645, + "grad_norm": 0.0005221511819399893, + "learning_rate": 6.728552070696691e-08, + "loss": 0.0, + "num_input_tokens_seen": 123303592, + "step": 183035 + }, + { + "epoch": 4.471697652261012, + "grad_norm": 0.0009294108022004366, + "learning_rate": 6.725477165523719e-08, + "loss": 0.0, + "num_input_tokens_seen": 123307304, + "step": 183040 + }, + { + "epoch": 4.471819803092859, + "grad_norm": 0.0023763030767440796, + "learning_rate": 6.722402938668158e-08, + "loss": 0.0, + "num_input_tokens_seen": 123310824, + "step": 183045 + }, + { + "epoch": 4.4719419539247065, + "grad_norm": 0.0003205655375495553, + "learning_rate": 6.719329390152361e-08, + "loss": 0.0, + "num_input_tokens_seen": 123314280, + "step": 183050 + }, + { + "epoch": 4.472064104756553, + "grad_norm": 3.7294415960786864e-05, + "learning_rate": 6.716256519998698e-08, + "loss": 0.0, + "num_input_tokens_seen": 123318056, + "step": 183055 + }, + { + "epoch": 4.472186255588401, + "grad_norm": 0.0005499294493347406, + "learning_rate": 6.713184328229505e-08, + "loss": 0.0, + "num_input_tokens_seen": 123321512, + "step": 183060 + }, + { + "epoch": 4.472308406420248, + "grad_norm": 0.0001556864590384066, + "learning_rate": 6.71011281486712e-08, + "loss": 0.0, + "num_input_tokens_seen": 123324840, + "step": 183065 + }, + { + "epoch": 4.472430557252095, + "grad_norm": 8.681939652888104e-05, + "learning_rate": 6.707041979933903e-08, + "loss": 0.0606, + "num_input_tokens_seen": 123327976, + "step": 183070 + }, + { + "epoch": 4.472552708083942, + "grad_norm": 0.006124210078269243, + "learning_rate": 6.703971823452149e-08, + "loss": 0.0, + "num_input_tokens_seen": 123331112, + "step": 183075 + }, + { + "epoch": 4.47267485891579, + "grad_norm": 0.0030316852498799562, + "learning_rate": 6.700902345444226e-08, + "loss": 0.0, + "num_input_tokens_seen": 123334440, + "step": 183080 + }, + { + "epoch": 4.472797009747636, + "grad_norm": 0.00030044937739148736, + "learning_rate": 6.697833545932419e-08, + "loss": 0.0, + "num_input_tokens_seen": 123339944, + "step": 183085 + }, + { + "epoch": 4.472919160579483, + "grad_norm": 0.0003357155655976385, + "learning_rate": 6.694765424939075e-08, + "loss": 0.0, + "num_input_tokens_seen": 123343208, + "step": 183090 + }, + { + "epoch": 4.473041311411331, + "grad_norm": 0.0010258235270157456, + "learning_rate": 6.691697982486478e-08, + "loss": 0.0, + "num_input_tokens_seen": 123346728, + "step": 183095 + }, + { + "epoch": 4.4731634622431775, + "grad_norm": 0.000255766324698925, + "learning_rate": 6.688631218596951e-08, + "loss": 0.0003, + "num_input_tokens_seen": 123349992, + "step": 183100 + }, + { + "epoch": 4.473285613075025, + "grad_norm": 0.00048594220425002277, + "learning_rate": 6.685565133292814e-08, + "loss": 0.0, + "num_input_tokens_seen": 123353064, + "step": 183105 + }, + { + "epoch": 4.473407763906872, + "grad_norm": 0.01167520321905613, + "learning_rate": 6.682499726596336e-08, + "loss": 0.0, + "num_input_tokens_seen": 123356072, + "step": 183110 + }, + { + "epoch": 4.4735299147387195, + "grad_norm": 0.009621109813451767, + "learning_rate": 6.67943499852982e-08, + "loss": 0.0, + "num_input_tokens_seen": 123359336, + "step": 183115 + }, + { + "epoch": 4.473652065570566, + "grad_norm": 0.0003300695971120149, + "learning_rate": 6.67637094911555e-08, + "loss": 0.0, + "num_input_tokens_seen": 123362664, + "step": 183120 + }, + { + "epoch": 4.473774216402414, + "grad_norm": 0.0014379281783476472, + "learning_rate": 6.67330757837582e-08, + "loss": 0.0193, + "num_input_tokens_seen": 123365992, + "step": 183125 + }, + { + "epoch": 4.473896367234261, + "grad_norm": 0.0006100056925788522, + "learning_rate": 6.670244886332888e-08, + "loss": 0.0, + "num_input_tokens_seen": 123369128, + "step": 183130 + }, + { + "epoch": 4.474018518066108, + "grad_norm": 0.00041016406612470746, + "learning_rate": 6.667182873009047e-08, + "loss": 0.0, + "num_input_tokens_seen": 123372648, + "step": 183135 + }, + { + "epoch": 4.474140668897955, + "grad_norm": 0.27162519097328186, + "learning_rate": 6.664121538426548e-08, + "loss": 0.0002, + "num_input_tokens_seen": 123376168, + "step": 183140 + }, + { + "epoch": 4.474262819729803, + "grad_norm": 0.00015165298827923834, + "learning_rate": 6.661060882607672e-08, + "loss": 0.0, + "num_input_tokens_seen": 123379880, + "step": 183145 + }, + { + "epoch": 4.474384970561649, + "grad_norm": 8.916952356230468e-05, + "learning_rate": 6.658000905574657e-08, + "loss": 0.0, + "num_input_tokens_seen": 123383208, + "step": 183150 + }, + { + "epoch": 4.474507121393497, + "grad_norm": 5.2944047638447955e-05, + "learning_rate": 6.654941607349773e-08, + "loss": 0.0, + "num_input_tokens_seen": 123386088, + "step": 183155 + }, + { + "epoch": 4.474629272225344, + "grad_norm": 0.004181950818747282, + "learning_rate": 6.651882987955249e-08, + "loss": 0.0, + "num_input_tokens_seen": 123389288, + "step": 183160 + }, + { + "epoch": 4.474751423057191, + "grad_norm": 0.0015895121032372117, + "learning_rate": 6.648825047413353e-08, + "loss": 0.0, + "num_input_tokens_seen": 123392488, + "step": 183165 + }, + { + "epoch": 4.474873573889038, + "grad_norm": 0.0012971092946827412, + "learning_rate": 6.645767785746292e-08, + "loss": 0.0, + "num_input_tokens_seen": 123395944, + "step": 183170 + }, + { + "epoch": 4.474995724720886, + "grad_norm": 0.0003645285905804485, + "learning_rate": 6.642711202976336e-08, + "loss": 0.0, + "num_input_tokens_seen": 123399400, + "step": 183175 + }, + { + "epoch": 4.4751178755527325, + "grad_norm": 0.0002894159115385264, + "learning_rate": 6.63965529912568e-08, + "loss": 0.0, + "num_input_tokens_seen": 123402920, + "step": 183180 + }, + { + "epoch": 4.475240026384579, + "grad_norm": 0.00044401903869584203, + "learning_rate": 6.636600074216569e-08, + "loss": 0.0, + "num_input_tokens_seen": 123406248, + "step": 183185 + }, + { + "epoch": 4.475362177216427, + "grad_norm": 0.001973674399778247, + "learning_rate": 6.633545528271211e-08, + "loss": 0.0, + "num_input_tokens_seen": 123410024, + "step": 183190 + }, + { + "epoch": 4.475484328048274, + "grad_norm": 0.0008897829684428871, + "learning_rate": 6.63049166131181e-08, + "loss": 0.0399, + "num_input_tokens_seen": 123413608, + "step": 183195 + }, + { + "epoch": 4.475606478880121, + "grad_norm": 0.00013381907774601132, + "learning_rate": 6.627438473360614e-08, + "loss": 0.0, + "num_input_tokens_seen": 123416552, + "step": 183200 + }, + { + "epoch": 4.475728629711968, + "grad_norm": 0.001655498635955155, + "learning_rate": 6.624385964439782e-08, + "loss": 0.0, + "num_input_tokens_seen": 123419752, + "step": 183205 + }, + { + "epoch": 4.475850780543816, + "grad_norm": 8.101667481241748e-05, + "learning_rate": 6.621334134571543e-08, + "loss": 0.0, + "num_input_tokens_seen": 123423144, + "step": 183210 + }, + { + "epoch": 4.475972931375662, + "grad_norm": 0.0003639574279077351, + "learning_rate": 6.618282983778056e-08, + "loss": 0.0, + "num_input_tokens_seen": 123426536, + "step": 183215 + }, + { + "epoch": 4.47609508220751, + "grad_norm": 0.00012090370728401467, + "learning_rate": 6.615232512081536e-08, + "loss": 0.0, + "num_input_tokens_seen": 123430184, + "step": 183220 + }, + { + "epoch": 4.476217233039357, + "grad_norm": 2.0068329831701703e-05, + "learning_rate": 6.612182719504189e-08, + "loss": 0.0, + "num_input_tokens_seen": 123433512, + "step": 183225 + }, + { + "epoch": 4.4763393838712044, + "grad_norm": 0.00010261212446494028, + "learning_rate": 6.609133606068141e-08, + "loss": 0.0, + "num_input_tokens_seen": 123436584, + "step": 183230 + }, + { + "epoch": 4.476461534703051, + "grad_norm": 0.0005591385997831821, + "learning_rate": 6.606085171795606e-08, + "loss": 0.0, + "num_input_tokens_seen": 123440040, + "step": 183235 + }, + { + "epoch": 4.476583685534899, + "grad_norm": 0.0006489804945886135, + "learning_rate": 6.603037416708734e-08, + "loss": 0.0007, + "num_input_tokens_seen": 123443368, + "step": 183240 + }, + { + "epoch": 4.476705836366746, + "grad_norm": 0.0007108654244802892, + "learning_rate": 6.599990340829697e-08, + "loss": 0.0, + "num_input_tokens_seen": 123446760, + "step": 183245 + }, + { + "epoch": 4.476827987198593, + "grad_norm": 0.0032975731883198023, + "learning_rate": 6.596943944180666e-08, + "loss": 0.0, + "num_input_tokens_seen": 123450408, + "step": 183250 + }, + { + "epoch": 4.47695013803044, + "grad_norm": 0.0004452765570022166, + "learning_rate": 6.593898226783757e-08, + "loss": 0.0, + "num_input_tokens_seen": 123453736, + "step": 183255 + }, + { + "epoch": 4.477072288862287, + "grad_norm": 2.435279020573944e-05, + "learning_rate": 6.590853188661161e-08, + "loss": 0.0, + "num_input_tokens_seen": 123457064, + "step": 183260 + }, + { + "epoch": 4.477194439694134, + "grad_norm": 0.03045794926583767, + "learning_rate": 6.587808829834984e-08, + "loss": 0.0, + "num_input_tokens_seen": 123460712, + "step": 183265 + }, + { + "epoch": 4.477316590525981, + "grad_norm": 0.00014943921996746212, + "learning_rate": 6.584765150327409e-08, + "loss": 0.0, + "num_input_tokens_seen": 123463848, + "step": 183270 + }, + { + "epoch": 4.477438741357829, + "grad_norm": 0.0033560895826667547, + "learning_rate": 6.58172215016053e-08, + "loss": 0.0, + "num_input_tokens_seen": 123467176, + "step": 183275 + }, + { + "epoch": 4.4775608921896755, + "grad_norm": 0.00017658942670095712, + "learning_rate": 6.578679829356514e-08, + "loss": 0.0, + "num_input_tokens_seen": 123470440, + "step": 183280 + }, + { + "epoch": 4.477683043021523, + "grad_norm": 0.00010721410217229277, + "learning_rate": 6.575638187937437e-08, + "loss": 0.0, + "num_input_tokens_seen": 123473832, + "step": 183285 + }, + { + "epoch": 4.47780519385337, + "grad_norm": 0.019060831516981125, + "learning_rate": 6.572597225925458e-08, + "loss": 0.0, + "num_input_tokens_seen": 123477928, + "step": 183290 + }, + { + "epoch": 4.4779273446852175, + "grad_norm": 0.0005212004762142897, + "learning_rate": 6.569556943342691e-08, + "loss": 0.0, + "num_input_tokens_seen": 123481512, + "step": 183295 + }, + { + "epoch": 4.478049495517064, + "grad_norm": 3.1600420475006104, + "learning_rate": 6.56651734021123e-08, + "loss": 0.0009, + "num_input_tokens_seen": 123485352, + "step": 183300 + }, + { + "epoch": 4.478171646348912, + "grad_norm": 0.0009134397841989994, + "learning_rate": 6.563478416553192e-08, + "loss": 0.0, + "num_input_tokens_seen": 123488232, + "step": 183305 + }, + { + "epoch": 4.478293797180759, + "grad_norm": 0.0004978424985893071, + "learning_rate": 6.560440172390658e-08, + "loss": 0.0, + "num_input_tokens_seen": 123491688, + "step": 183310 + }, + { + "epoch": 4.478415948012606, + "grad_norm": 0.00014363221998792142, + "learning_rate": 6.557402607745733e-08, + "loss": 0.0, + "num_input_tokens_seen": 123494888, + "step": 183315 + }, + { + "epoch": 4.478538098844453, + "grad_norm": 0.0022859908640384674, + "learning_rate": 6.55436572264052e-08, + "loss": 0.0, + "num_input_tokens_seen": 123498344, + "step": 183320 + }, + { + "epoch": 4.478660249676301, + "grad_norm": 0.000360700098099187, + "learning_rate": 6.551329517097092e-08, + "loss": 0.0, + "num_input_tokens_seen": 123501800, + "step": 183325 + }, + { + "epoch": 4.478782400508147, + "grad_norm": 0.0004340151499491185, + "learning_rate": 6.548293991137533e-08, + "loss": 0.0, + "num_input_tokens_seen": 123505448, + "step": 183330 + }, + { + "epoch": 4.478904551339995, + "grad_norm": 0.0074119605123996735, + "learning_rate": 6.54525914478391e-08, + "loss": 0.0, + "num_input_tokens_seen": 123508520, + "step": 183335 + }, + { + "epoch": 4.479026702171842, + "grad_norm": 0.00487268203869462, + "learning_rate": 6.542224978058309e-08, + "loss": 0.0, + "num_input_tokens_seen": 123511912, + "step": 183340 + }, + { + "epoch": 4.479148853003689, + "grad_norm": 0.0007920575444586575, + "learning_rate": 6.539191490982766e-08, + "loss": 0.0, + "num_input_tokens_seen": 123514984, + "step": 183345 + }, + { + "epoch": 4.479271003835536, + "grad_norm": 0.0023543655406683683, + "learning_rate": 6.536158683579374e-08, + "loss": 0.0, + "num_input_tokens_seen": 123517928, + "step": 183350 + }, + { + "epoch": 4.479393154667383, + "grad_norm": 0.00011763614020310342, + "learning_rate": 6.533126555870183e-08, + "loss": 0.0001, + "num_input_tokens_seen": 123521192, + "step": 183355 + }, + { + "epoch": 4.4795153054992305, + "grad_norm": 3.650221333373338e-05, + "learning_rate": 6.53009510787722e-08, + "loss": 0.0, + "num_input_tokens_seen": 123524264, + "step": 183360 + }, + { + "epoch": 4.479637456331077, + "grad_norm": 0.07241086661815643, + "learning_rate": 6.527064339622557e-08, + "loss": 0.0, + "num_input_tokens_seen": 123527464, + "step": 183365 + }, + { + "epoch": 4.479759607162925, + "grad_norm": 2.090701127599459e-05, + "learning_rate": 6.524034251128207e-08, + "loss": 0.0, + "num_input_tokens_seen": 123530984, + "step": 183370 + }, + { + "epoch": 4.479881757994772, + "grad_norm": 0.047290850430727005, + "learning_rate": 6.521004842416222e-08, + "loss": 0.0, + "num_input_tokens_seen": 123534376, + "step": 183375 + }, + { + "epoch": 4.480003908826619, + "grad_norm": 0.007085281889885664, + "learning_rate": 6.51797611350865e-08, + "loss": 0.0, + "num_input_tokens_seen": 123538024, + "step": 183380 + }, + { + "epoch": 4.480126059658466, + "grad_norm": 0.00010538164497120306, + "learning_rate": 6.514948064427484e-08, + "loss": 0.0, + "num_input_tokens_seen": 123541352, + "step": 183385 + }, + { + "epoch": 4.480248210490314, + "grad_norm": 0.00016254704678431153, + "learning_rate": 6.511920695194783e-08, + "loss": 0.0, + "num_input_tokens_seen": 123544552, + "step": 183390 + }, + { + "epoch": 4.48037036132216, + "grad_norm": 4.432534842635505e-05, + "learning_rate": 6.50889400583251e-08, + "loss": 0.0, + "num_input_tokens_seen": 123548072, + "step": 183395 + }, + { + "epoch": 4.480492512154008, + "grad_norm": 9.711547318147495e-05, + "learning_rate": 6.505867996362735e-08, + "loss": 0.0, + "num_input_tokens_seen": 123551656, + "step": 183400 + }, + { + "epoch": 4.480614662985855, + "grad_norm": 0.0009047918720170856, + "learning_rate": 6.502842666807406e-08, + "loss": 0.0, + "num_input_tokens_seen": 123555048, + "step": 183405 + }, + { + "epoch": 4.480736813817702, + "grad_norm": 0.00018528725195210427, + "learning_rate": 6.499818017188563e-08, + "loss": 0.05, + "num_input_tokens_seen": 123558184, + "step": 183410 + }, + { + "epoch": 4.480858964649549, + "grad_norm": 7.544342952314764e-05, + "learning_rate": 6.49679404752821e-08, + "loss": 0.0, + "num_input_tokens_seen": 123561512, + "step": 183415 + }, + { + "epoch": 4.480981115481397, + "grad_norm": 0.0016681732377037406, + "learning_rate": 6.493770757848294e-08, + "loss": 0.0, + "num_input_tokens_seen": 123565288, + "step": 183420 + }, + { + "epoch": 4.4811032663132435, + "grad_norm": 0.0003314499044790864, + "learning_rate": 6.490748148170844e-08, + "loss": 0.0, + "num_input_tokens_seen": 123568552, + "step": 183425 + }, + { + "epoch": 4.481225417145091, + "grad_norm": 0.00010013279097620398, + "learning_rate": 6.48772621851782e-08, + "loss": 0.0, + "num_input_tokens_seen": 123572136, + "step": 183430 + }, + { + "epoch": 4.481347567976938, + "grad_norm": 0.001971852732822299, + "learning_rate": 6.484704968911204e-08, + "loss": 0.0002, + "num_input_tokens_seen": 123575464, + "step": 183435 + }, + { + "epoch": 4.4814697188087855, + "grad_norm": 0.0005752836004830897, + "learning_rate": 6.481684399372955e-08, + "loss": 0.0, + "num_input_tokens_seen": 123579176, + "step": 183440 + }, + { + "epoch": 4.481591869640632, + "grad_norm": 5.8873403759207577e-05, + "learning_rate": 6.47866450992507e-08, + "loss": 0.0, + "num_input_tokens_seen": 123582504, + "step": 183445 + }, + { + "epoch": 4.481714020472479, + "grad_norm": 0.0018522852333262563, + "learning_rate": 6.475645300589472e-08, + "loss": 0.0, + "num_input_tokens_seen": 123585832, + "step": 183450 + }, + { + "epoch": 4.481836171304327, + "grad_norm": 0.0020564633887261152, + "learning_rate": 6.472626771388156e-08, + "loss": 0.0, + "num_input_tokens_seen": 123589352, + "step": 183455 + }, + { + "epoch": 4.481958322136173, + "grad_norm": 0.003389239078387618, + "learning_rate": 6.46960892234304e-08, + "loss": 0.0005, + "num_input_tokens_seen": 123592808, + "step": 183460 + }, + { + "epoch": 4.482080472968021, + "grad_norm": 7.917655602796003e-05, + "learning_rate": 6.466591753476092e-08, + "loss": 0.0, + "num_input_tokens_seen": 123596008, + "step": 183465 + }, + { + "epoch": 4.482202623799868, + "grad_norm": 0.00023252793471328914, + "learning_rate": 6.463575264809229e-08, + "loss": 0.0, + "num_input_tokens_seen": 123599592, + "step": 183470 + }, + { + "epoch": 4.482324774631715, + "grad_norm": 6.5754255047068e-05, + "learning_rate": 6.460559456364434e-08, + "loss": 0.0, + "num_input_tokens_seen": 123603048, + "step": 183475 + }, + { + "epoch": 4.482446925463562, + "grad_norm": 0.00039355934131890535, + "learning_rate": 6.457544328163578e-08, + "loss": 0.0, + "num_input_tokens_seen": 123606120, + "step": 183480 + }, + { + "epoch": 4.48256907629541, + "grad_norm": 0.00019612463074736297, + "learning_rate": 6.454529880228643e-08, + "loss": 0.0, + "num_input_tokens_seen": 123609256, + "step": 183485 + }, + { + "epoch": 4.4826912271272565, + "grad_norm": 0.0003407395852264017, + "learning_rate": 6.451516112581512e-08, + "loss": 0.0, + "num_input_tokens_seen": 123612520, + "step": 183490 + }, + { + "epoch": 4.482813377959104, + "grad_norm": 2.2714593796990812e-05, + "learning_rate": 6.448503025244134e-08, + "loss": 0.0, + "num_input_tokens_seen": 123615912, + "step": 183495 + }, + { + "epoch": 4.482935528790951, + "grad_norm": 1.2123967280786019e-05, + "learning_rate": 6.445490618238392e-08, + "loss": 0.0, + "num_input_tokens_seen": 123619176, + "step": 183500 + }, + { + "epoch": 4.4830576796227986, + "grad_norm": 0.0008833124884404242, + "learning_rate": 6.4424788915862e-08, + "loss": 0.0, + "num_input_tokens_seen": 123622696, + "step": 183505 + }, + { + "epoch": 4.483179830454645, + "grad_norm": 0.0001925179094541818, + "learning_rate": 6.439467845309488e-08, + "loss": 0.0, + "num_input_tokens_seen": 123625960, + "step": 183510 + }, + { + "epoch": 4.483301981286493, + "grad_norm": 0.0018321550451219082, + "learning_rate": 6.436457479430101e-08, + "loss": 0.0, + "num_input_tokens_seen": 123629032, + "step": 183515 + }, + { + "epoch": 4.48342413211834, + "grad_norm": 0.0010063842637464404, + "learning_rate": 6.433447793969982e-08, + "loss": 0.0, + "num_input_tokens_seen": 123632488, + "step": 183520 + }, + { + "epoch": 4.483546282950186, + "grad_norm": 0.00020096627122256905, + "learning_rate": 6.430438788950987e-08, + "loss": 0.0, + "num_input_tokens_seen": 123635944, + "step": 183525 + }, + { + "epoch": 4.483668433782034, + "grad_norm": 0.015927936881780624, + "learning_rate": 6.427430464395022e-08, + "loss": 0.0, + "num_input_tokens_seen": 123639464, + "step": 183530 + }, + { + "epoch": 4.483790584613882, + "grad_norm": 0.0010257528629153967, + "learning_rate": 6.424422820323938e-08, + "loss": 0.0001, + "num_input_tokens_seen": 123643304, + "step": 183535 + }, + { + "epoch": 4.483912735445728, + "grad_norm": 0.0001770484377630055, + "learning_rate": 6.421415856759616e-08, + "loss": 0.0, + "num_input_tokens_seen": 123646952, + "step": 183540 + }, + { + "epoch": 4.484034886277575, + "grad_norm": 0.0002756124595180154, + "learning_rate": 6.418409573723937e-08, + "loss": 0.0, + "num_input_tokens_seen": 123650280, + "step": 183545 + }, + { + "epoch": 4.484157037109423, + "grad_norm": 5.871909888810478e-05, + "learning_rate": 6.415403971238741e-08, + "loss": 0.0, + "num_input_tokens_seen": 123653352, + "step": 183550 + }, + { + "epoch": 4.48427918794127, + "grad_norm": 0.0005751706194132566, + "learning_rate": 6.412399049325922e-08, + "loss": 0.0005, + "num_input_tokens_seen": 123656552, + "step": 183555 + }, + { + "epoch": 4.484401338773117, + "grad_norm": 0.00021047874179203063, + "learning_rate": 6.409394808007307e-08, + "loss": 0.0, + "num_input_tokens_seen": 123659752, + "step": 183560 + }, + { + "epoch": 4.484523489604964, + "grad_norm": 7.142380491131917e-05, + "learning_rate": 6.406391247304732e-08, + "loss": 0.0, + "num_input_tokens_seen": 123663592, + "step": 183565 + }, + { + "epoch": 4.484645640436812, + "grad_norm": 4.248780169291422e-05, + "learning_rate": 6.403388367240059e-08, + "loss": 0.0, + "num_input_tokens_seen": 123666728, + "step": 183570 + }, + { + "epoch": 4.484767791268658, + "grad_norm": 0.001250158529728651, + "learning_rate": 6.400386167835115e-08, + "loss": 0.0, + "num_input_tokens_seen": 123669992, + "step": 183575 + }, + { + "epoch": 4.484889942100506, + "grad_norm": 0.001813249895349145, + "learning_rate": 6.397384649111748e-08, + "loss": 0.0, + "num_input_tokens_seen": 123673640, + "step": 183580 + }, + { + "epoch": 4.485012092932353, + "grad_norm": 0.0007400148897431791, + "learning_rate": 6.394383811091764e-08, + "loss": 0.0, + "num_input_tokens_seen": 123677032, + "step": 183585 + }, + { + "epoch": 4.4851342437642, + "grad_norm": 8.471470209769905e-05, + "learning_rate": 6.39138365379701e-08, + "loss": 0.0, + "num_input_tokens_seen": 123680168, + "step": 183590 + }, + { + "epoch": 4.485256394596047, + "grad_norm": 0.00019484623044263572, + "learning_rate": 6.388384177249273e-08, + "loss": 0.0, + "num_input_tokens_seen": 123683432, + "step": 183595 + }, + { + "epoch": 4.485378545427895, + "grad_norm": 7.977020868565887e-05, + "learning_rate": 6.385385381470388e-08, + "loss": 0.0, + "num_input_tokens_seen": 123686888, + "step": 183600 + }, + { + "epoch": 4.4855006962597415, + "grad_norm": 0.00033194624120369554, + "learning_rate": 6.382387266482182e-08, + "loss": 0.0002, + "num_input_tokens_seen": 123690280, + "step": 183605 + }, + { + "epoch": 4.485622847091589, + "grad_norm": 0.00013592727191280574, + "learning_rate": 6.379389832306415e-08, + "loss": 0.0, + "num_input_tokens_seen": 123693608, + "step": 183610 + }, + { + "epoch": 4.485744997923436, + "grad_norm": 0.23284316062927246, + "learning_rate": 6.376393078964915e-08, + "loss": 0.0, + "num_input_tokens_seen": 123696936, + "step": 183615 + }, + { + "epoch": 4.485867148755283, + "grad_norm": 0.00017789828416425735, + "learning_rate": 6.373397006479464e-08, + "loss": 0.0, + "num_input_tokens_seen": 123700072, + "step": 183620 + }, + { + "epoch": 4.48598929958713, + "grad_norm": 0.0004446000966709107, + "learning_rate": 6.370401614871845e-08, + "loss": 0.0, + "num_input_tokens_seen": 123703336, + "step": 183625 + }, + { + "epoch": 4.486111450418977, + "grad_norm": 6.071280949981883e-05, + "learning_rate": 6.367406904163863e-08, + "loss": 0.0, + "num_input_tokens_seen": 123706280, + "step": 183630 + }, + { + "epoch": 4.486233601250825, + "grad_norm": 0.000198695546714589, + "learning_rate": 6.364412874377267e-08, + "loss": 0.0, + "num_input_tokens_seen": 123709672, + "step": 183635 + }, + { + "epoch": 4.486355752082671, + "grad_norm": 0.00022004038328304887, + "learning_rate": 6.361419525533862e-08, + "loss": 0.0, + "num_input_tokens_seen": 123712680, + "step": 183640 + }, + { + "epoch": 4.486477902914519, + "grad_norm": 9.493048855802044e-05, + "learning_rate": 6.358426857655386e-08, + "loss": 0.0, + "num_input_tokens_seen": 123715752, + "step": 183645 + }, + { + "epoch": 4.486600053746366, + "grad_norm": 0.0011744924122467637, + "learning_rate": 6.355434870763632e-08, + "loss": 0.0, + "num_input_tokens_seen": 123719400, + "step": 183650 + }, + { + "epoch": 4.486722204578213, + "grad_norm": 0.002865522401407361, + "learning_rate": 6.352443564880328e-08, + "loss": 0.0, + "num_input_tokens_seen": 123722344, + "step": 183655 + }, + { + "epoch": 4.48684435541006, + "grad_norm": 0.0005735221202485263, + "learning_rate": 6.349452940027255e-08, + "loss": 0.0, + "num_input_tokens_seen": 123725864, + "step": 183660 + }, + { + "epoch": 4.486966506241908, + "grad_norm": 0.003035858040675521, + "learning_rate": 6.346462996226155e-08, + "loss": 0.0, + "num_input_tokens_seen": 123729512, + "step": 183665 + }, + { + "epoch": 4.4870886570737545, + "grad_norm": 1.2980992323718965e-05, + "learning_rate": 6.343473733498739e-08, + "loss": 0.0, + "num_input_tokens_seen": 123732904, + "step": 183670 + }, + { + "epoch": 4.487210807905602, + "grad_norm": 1.0696382560126949e-05, + "learning_rate": 6.340485151866793e-08, + "loss": 0.0, + "num_input_tokens_seen": 123736168, + "step": 183675 + }, + { + "epoch": 4.487332958737449, + "grad_norm": 0.0003565715451259166, + "learning_rate": 6.33749725135202e-08, + "loss": 0.0, + "num_input_tokens_seen": 123739240, + "step": 183680 + }, + { + "epoch": 4.4874551095692965, + "grad_norm": 0.0007983737159520388, + "learning_rate": 6.33451003197617e-08, + "loss": 0.0, + "num_input_tokens_seen": 123742504, + "step": 183685 + }, + { + "epoch": 4.487577260401143, + "grad_norm": 0.014135781675577164, + "learning_rate": 6.331523493760949e-08, + "loss": 0.0, + "num_input_tokens_seen": 123745896, + "step": 183690 + }, + { + "epoch": 4.487699411232991, + "grad_norm": 0.00018134378478862345, + "learning_rate": 6.328537636728071e-08, + "loss": 0.0, + "num_input_tokens_seen": 123749224, + "step": 183695 + }, + { + "epoch": 4.487821562064838, + "grad_norm": 0.00032579965773038566, + "learning_rate": 6.325552460899285e-08, + "loss": 0.0, + "num_input_tokens_seen": 123752360, + "step": 183700 + }, + { + "epoch": 4.487943712896685, + "grad_norm": 0.0001060232098097913, + "learning_rate": 6.322567966296255e-08, + "loss": 0.0, + "num_input_tokens_seen": 123755752, + "step": 183705 + }, + { + "epoch": 4.488065863728532, + "grad_norm": 0.0027526814956218004, + "learning_rate": 6.319584152940726e-08, + "loss": 0.0, + "num_input_tokens_seen": 123759528, + "step": 183710 + }, + { + "epoch": 4.488188014560379, + "grad_norm": 0.0005101757124066353, + "learning_rate": 6.316601020854362e-08, + "loss": 0.0, + "num_input_tokens_seen": 123762984, + "step": 183715 + }, + { + "epoch": 4.488310165392226, + "grad_norm": 8.717154560144991e-05, + "learning_rate": 6.313618570058876e-08, + "loss": 0.0, + "num_input_tokens_seen": 123766120, + "step": 183720 + }, + { + "epoch": 4.488432316224073, + "grad_norm": 0.00010378050501458347, + "learning_rate": 6.310636800575975e-08, + "loss": 0.0, + "num_input_tokens_seen": 123770152, + "step": 183725 + }, + { + "epoch": 4.488554467055921, + "grad_norm": 1.9653307390399277e-05, + "learning_rate": 6.307655712427295e-08, + "loss": 0.0, + "num_input_tokens_seen": 123773288, + "step": 183730 + }, + { + "epoch": 4.4886766178877675, + "grad_norm": 2.4535993361496367e-05, + "learning_rate": 6.304675305634566e-08, + "loss": 0.0, + "num_input_tokens_seen": 123776552, + "step": 183735 + }, + { + "epoch": 4.488798768719615, + "grad_norm": 0.0008791940053924918, + "learning_rate": 6.301695580219424e-08, + "loss": 0.0, + "num_input_tokens_seen": 123779688, + "step": 183740 + }, + { + "epoch": 4.488920919551462, + "grad_norm": 0.00033614938729442656, + "learning_rate": 6.298716536203563e-08, + "loss": 0.0, + "num_input_tokens_seen": 123783208, + "step": 183745 + }, + { + "epoch": 4.4890430703833095, + "grad_norm": 6.0099537222413346e-05, + "learning_rate": 6.295738173608622e-08, + "loss": 0.0, + "num_input_tokens_seen": 123786344, + "step": 183750 + }, + { + "epoch": 4.489165221215156, + "grad_norm": 0.00011174487008247524, + "learning_rate": 6.292760492456284e-08, + "loss": 0.0, + "num_input_tokens_seen": 123790184, + "step": 183755 + }, + { + "epoch": 4.489287372047004, + "grad_norm": 0.006128856446594, + "learning_rate": 6.289783492768208e-08, + "loss": 0.0, + "num_input_tokens_seen": 123793320, + "step": 183760 + }, + { + "epoch": 4.489409522878851, + "grad_norm": 0.004429202992469072, + "learning_rate": 6.286807174566033e-08, + "loss": 0.0, + "num_input_tokens_seen": 123796584, + "step": 183765 + }, + { + "epoch": 4.489531673710698, + "grad_norm": 0.0015801249537616968, + "learning_rate": 6.283831537871375e-08, + "loss": 0.0, + "num_input_tokens_seen": 123799528, + "step": 183770 + }, + { + "epoch": 4.489653824542545, + "grad_norm": 0.0002338515332667157, + "learning_rate": 6.280856582705929e-08, + "loss": 0.0, + "num_input_tokens_seen": 123802984, + "step": 183775 + }, + { + "epoch": 4.489775975374393, + "grad_norm": 0.00029317050939425826, + "learning_rate": 6.277882309091287e-08, + "loss": 0.0, + "num_input_tokens_seen": 123806184, + "step": 183780 + }, + { + "epoch": 4.489898126206239, + "grad_norm": 0.021793987601995468, + "learning_rate": 6.274908717049099e-08, + "loss": 0.0, + "num_input_tokens_seen": 123809384, + "step": 183785 + }, + { + "epoch": 4.490020277038087, + "grad_norm": 0.0002981332945637405, + "learning_rate": 6.27193580660098e-08, + "loss": 0.0, + "num_input_tokens_seen": 123812520, + "step": 183790 + }, + { + "epoch": 4.490142427869934, + "grad_norm": 0.000523839786183089, + "learning_rate": 6.26896357776856e-08, + "loss": 0.0, + "num_input_tokens_seen": 123815912, + "step": 183795 + }, + { + "epoch": 4.490264578701781, + "grad_norm": 0.0005471837939694524, + "learning_rate": 6.26599203057343e-08, + "loss": 0.0, + "num_input_tokens_seen": 123819240, + "step": 183800 + }, + { + "epoch": 4.490386729533628, + "grad_norm": 0.0008639732259325683, + "learning_rate": 6.263021165037241e-08, + "loss": 0.0, + "num_input_tokens_seen": 123822504, + "step": 183805 + }, + { + "epoch": 4.490508880365475, + "grad_norm": 0.00019813806284219027, + "learning_rate": 6.260050981181553e-08, + "loss": 0.0, + "num_input_tokens_seen": 123825832, + "step": 183810 + }, + { + "epoch": 4.4906310311973225, + "grad_norm": 8.793376036919653e-05, + "learning_rate": 6.257081479027993e-08, + "loss": 0.0, + "num_input_tokens_seen": 123829160, + "step": 183815 + }, + { + "epoch": 4.490753182029169, + "grad_norm": 0.00021990187815390527, + "learning_rate": 6.254112658598165e-08, + "loss": 0.0, + "num_input_tokens_seen": 123832296, + "step": 183820 + }, + { + "epoch": 4.490875332861017, + "grad_norm": 0.00017110860790126026, + "learning_rate": 6.25114451991362e-08, + "loss": 0.0, + "num_input_tokens_seen": 123836072, + "step": 183825 + }, + { + "epoch": 4.490997483692864, + "grad_norm": 0.00019226314907427877, + "learning_rate": 6.248177062995997e-08, + "loss": 0.0, + "num_input_tokens_seen": 123839144, + "step": 183830 + }, + { + "epoch": 4.491119634524711, + "grad_norm": 0.0013531792210415006, + "learning_rate": 6.245210287866821e-08, + "loss": 0.0, + "num_input_tokens_seen": 123842408, + "step": 183835 + }, + { + "epoch": 4.491241785356558, + "grad_norm": 0.00011543887376319617, + "learning_rate": 6.242244194547707e-08, + "loss": 0.0, + "num_input_tokens_seen": 123846440, + "step": 183840 + }, + { + "epoch": 4.491363936188406, + "grad_norm": 0.007675806060433388, + "learning_rate": 6.239278783060198e-08, + "loss": 0.0, + "num_input_tokens_seen": 123849448, + "step": 183845 + }, + { + "epoch": 4.491486087020252, + "grad_norm": 0.0008180846925824881, + "learning_rate": 6.236314053425873e-08, + "loss": 0.0, + "num_input_tokens_seen": 123853288, + "step": 183850 + }, + { + "epoch": 4.4916082378521, + "grad_norm": 0.00017951830523088574, + "learning_rate": 6.233350005666304e-08, + "loss": 0.0, + "num_input_tokens_seen": 123856872, + "step": 183855 + }, + { + "epoch": 4.491730388683947, + "grad_norm": 0.001047560479491949, + "learning_rate": 6.230386639803031e-08, + "loss": 0.0, + "num_input_tokens_seen": 123859816, + "step": 183860 + }, + { + "epoch": 4.491852539515794, + "grad_norm": 0.0005243326304480433, + "learning_rate": 6.227423955857614e-08, + "loss": 0.0, + "num_input_tokens_seen": 123863528, + "step": 183865 + }, + { + "epoch": 4.491974690347641, + "grad_norm": 27.46204376220703, + "learning_rate": 6.22446195385159e-08, + "loss": 0.0354, + "num_input_tokens_seen": 123867112, + "step": 183870 + }, + { + "epoch": 4.492096841179489, + "grad_norm": 0.002453556749969721, + "learning_rate": 6.221500633806487e-08, + "loss": 0.0, + "num_input_tokens_seen": 123870760, + "step": 183875 + }, + { + "epoch": 4.492218992011336, + "grad_norm": 3.372270384716103e-06, + "learning_rate": 6.218539995743865e-08, + "loss": 0.0, + "num_input_tokens_seen": 123873768, + "step": 183880 + }, + { + "epoch": 4.492341142843182, + "grad_norm": 0.04905255511403084, + "learning_rate": 6.215580039685243e-08, + "loss": 0.0, + "num_input_tokens_seen": 123876776, + "step": 183885 + }, + { + "epoch": 4.49246329367503, + "grad_norm": 0.00012224094825796783, + "learning_rate": 6.212620765652155e-08, + "loss": 0.0, + "num_input_tokens_seen": 123880040, + "step": 183890 + }, + { + "epoch": 4.492585444506877, + "grad_norm": 1.0237623428110965e-05, + "learning_rate": 6.209662173666097e-08, + "loss": 0.0, + "num_input_tokens_seen": 123883560, + "step": 183895 + }, + { + "epoch": 4.492707595338724, + "grad_norm": 1.5369896573247388e-05, + "learning_rate": 6.206704263748618e-08, + "loss": 0.0, + "num_input_tokens_seen": 123886952, + "step": 183900 + }, + { + "epoch": 4.492829746170571, + "grad_norm": 0.00015546464419458061, + "learning_rate": 6.203747035921192e-08, + "loss": 0.0, + "num_input_tokens_seen": 123889960, + "step": 183905 + }, + { + "epoch": 4.492951897002419, + "grad_norm": 0.0012587353121489286, + "learning_rate": 6.200790490205354e-08, + "loss": 0.0, + "num_input_tokens_seen": 123893480, + "step": 183910 + }, + { + "epoch": 4.4930740478342654, + "grad_norm": 0.0001071410661097616, + "learning_rate": 6.197834626622611e-08, + "loss": 0.0, + "num_input_tokens_seen": 123896616, + "step": 183915 + }, + { + "epoch": 4.493196198666113, + "grad_norm": 0.0003027370839845389, + "learning_rate": 6.194879445194434e-08, + "loss": 0.0, + "num_input_tokens_seen": 123899944, + "step": 183920 + }, + { + "epoch": 4.49331834949796, + "grad_norm": 1.3733631021750625e-05, + "learning_rate": 6.191924945942329e-08, + "loss": 0.0, + "num_input_tokens_seen": 123903400, + "step": 183925 + }, + { + "epoch": 4.4934405003298075, + "grad_norm": 0.0011689336970448494, + "learning_rate": 6.188971128887777e-08, + "loss": 0.0, + "num_input_tokens_seen": 123907304, + "step": 183930 + }, + { + "epoch": 4.493562651161654, + "grad_norm": 0.00024081453739199787, + "learning_rate": 6.18601799405224e-08, + "loss": 0.0, + "num_input_tokens_seen": 123910568, + "step": 183935 + }, + { + "epoch": 4.493684801993502, + "grad_norm": 0.0007175906794145703, + "learning_rate": 6.183065541457244e-08, + "loss": 0.0, + "num_input_tokens_seen": 123915112, + "step": 183940 + }, + { + "epoch": 4.493806952825349, + "grad_norm": 0.013144269585609436, + "learning_rate": 6.180113771124207e-08, + "loss": 0.0, + "num_input_tokens_seen": 123918632, + "step": 183945 + }, + { + "epoch": 4.493929103657196, + "grad_norm": 0.00019422074547037482, + "learning_rate": 6.177162683074633e-08, + "loss": 0.0, + "num_input_tokens_seen": 123921896, + "step": 183950 + }, + { + "epoch": 4.494051254489043, + "grad_norm": 2.715620576054789e-05, + "learning_rate": 6.174212277329949e-08, + "loss": 0.0, + "num_input_tokens_seen": 123925352, + "step": 183955 + }, + { + "epoch": 4.494173405320891, + "grad_norm": 0.00022808015637565404, + "learning_rate": 6.17126255391165e-08, + "loss": 0.0, + "num_input_tokens_seen": 123928872, + "step": 183960 + }, + { + "epoch": 4.494295556152737, + "grad_norm": 0.06994034349918365, + "learning_rate": 6.16831351284115e-08, + "loss": 0.0001, + "num_input_tokens_seen": 123932520, + "step": 183965 + }, + { + "epoch": 4.494417706984585, + "grad_norm": 0.030225621536374092, + "learning_rate": 6.165365154139924e-08, + "loss": 0.0394, + "num_input_tokens_seen": 123935848, + "step": 183970 + }, + { + "epoch": 4.494539857816432, + "grad_norm": 8.776846516411752e-05, + "learning_rate": 6.162417477829407e-08, + "loss": 0.0, + "num_input_tokens_seen": 123938984, + "step": 183975 + }, + { + "epoch": 4.4946620086482785, + "grad_norm": 7.617447408847511e-05, + "learning_rate": 6.159470483931006e-08, + "loss": 0.0863, + "num_input_tokens_seen": 123942632, + "step": 183980 + }, + { + "epoch": 4.494784159480126, + "grad_norm": 4.946303670294583e-05, + "learning_rate": 6.156524172466194e-08, + "loss": 0.0, + "num_input_tokens_seen": 123945832, + "step": 183985 + }, + { + "epoch": 4.494906310311973, + "grad_norm": 0.00012827737373299897, + "learning_rate": 6.153578543456361e-08, + "loss": 0.029, + "num_input_tokens_seen": 123949416, + "step": 183990 + }, + { + "epoch": 4.4950284611438205, + "grad_norm": 6.737570947734639e-05, + "learning_rate": 6.150633596922971e-08, + "loss": 0.0568, + "num_input_tokens_seen": 123952936, + "step": 183995 + }, + { + "epoch": 4.495150611975667, + "grad_norm": 0.001037980429828167, + "learning_rate": 6.147689332887396e-08, + "loss": 0.0, + "num_input_tokens_seen": 123956200, + "step": 184000 + }, + { + "epoch": 4.495272762807515, + "grad_norm": 0.0002838200598489493, + "learning_rate": 6.14474575137106e-08, + "loss": 0.0, + "num_input_tokens_seen": 123959400, + "step": 184005 + }, + { + "epoch": 4.495394913639362, + "grad_norm": 0.00040364160668104887, + "learning_rate": 6.141802852395406e-08, + "loss": 0.0, + "num_input_tokens_seen": 123962856, + "step": 184010 + }, + { + "epoch": 4.495517064471209, + "grad_norm": 0.005427889991551638, + "learning_rate": 6.138860635981779e-08, + "loss": 0.0, + "num_input_tokens_seen": 123966184, + "step": 184015 + }, + { + "epoch": 4.495639215303056, + "grad_norm": 0.007369643542915583, + "learning_rate": 6.135919102151632e-08, + "loss": 0.0, + "num_input_tokens_seen": 123969448, + "step": 184020 + }, + { + "epoch": 4.495761366134904, + "grad_norm": 2.715495793381706e-05, + "learning_rate": 6.132978250926302e-08, + "loss": 0.0, + "num_input_tokens_seen": 123973032, + "step": 184025 + }, + { + "epoch": 4.49588351696675, + "grad_norm": 0.008515347726643085, + "learning_rate": 6.130038082327205e-08, + "loss": 0.0, + "num_input_tokens_seen": 123976744, + "step": 184030 + }, + { + "epoch": 4.496005667798598, + "grad_norm": 0.0004892494180239737, + "learning_rate": 6.127098596375724e-08, + "loss": 0.0, + "num_input_tokens_seen": 123979880, + "step": 184035 + }, + { + "epoch": 4.496127818630445, + "grad_norm": 0.0005637376452796161, + "learning_rate": 6.124159793093231e-08, + "loss": 0.0, + "num_input_tokens_seen": 123983336, + "step": 184040 + }, + { + "epoch": 4.496249969462292, + "grad_norm": 3.558983053153497e-06, + "learning_rate": 6.121221672501108e-08, + "loss": 0.0, + "num_input_tokens_seen": 123986408, + "step": 184045 + }, + { + "epoch": 4.496372120294139, + "grad_norm": 10.293907165527344, + "learning_rate": 6.118284234620686e-08, + "loss": 0.0257, + "num_input_tokens_seen": 123990632, + "step": 184050 + }, + { + "epoch": 4.496494271125987, + "grad_norm": 0.0009266805136576295, + "learning_rate": 6.115347479473376e-08, + "loss": 0.0, + "num_input_tokens_seen": 123993704, + "step": 184055 + }, + { + "epoch": 4.4966164219578335, + "grad_norm": 2.9098644517944194e-05, + "learning_rate": 6.112411407080498e-08, + "loss": 0.0, + "num_input_tokens_seen": 123997224, + "step": 184060 + }, + { + "epoch": 4.496738572789681, + "grad_norm": 2.041774678218644e-05, + "learning_rate": 6.10947601746341e-08, + "loss": 0.0, + "num_input_tokens_seen": 124000872, + "step": 184065 + }, + { + "epoch": 4.496860723621528, + "grad_norm": 0.003889445448294282, + "learning_rate": 6.106541310643487e-08, + "loss": 0.0, + "num_input_tokens_seen": 124004200, + "step": 184070 + }, + { + "epoch": 4.496982874453375, + "grad_norm": 0.003433778416365385, + "learning_rate": 6.103607286642054e-08, + "loss": 0.0, + "num_input_tokens_seen": 124007656, + "step": 184075 + }, + { + "epoch": 4.497105025285222, + "grad_norm": 0.00015160914335865527, + "learning_rate": 6.100673945480417e-08, + "loss": 0.0, + "num_input_tokens_seen": 124011112, + "step": 184080 + }, + { + "epoch": 4.497227176117069, + "grad_norm": 0.0003066387725993991, + "learning_rate": 6.097741287179958e-08, + "loss": 0.0, + "num_input_tokens_seen": 124014440, + "step": 184085 + }, + { + "epoch": 4.497349326948917, + "grad_norm": 0.0002169539366150275, + "learning_rate": 6.094809311761961e-08, + "loss": 0.0099, + "num_input_tokens_seen": 124017832, + "step": 184090 + }, + { + "epoch": 4.497471477780763, + "grad_norm": 0.009181767702102661, + "learning_rate": 6.091878019247787e-08, + "loss": 0.0, + "num_input_tokens_seen": 124021160, + "step": 184095 + }, + { + "epoch": 4.497593628612611, + "grad_norm": 0.00010554036998655647, + "learning_rate": 6.088947409658718e-08, + "loss": 0.0, + "num_input_tokens_seen": 124024488, + "step": 184100 + }, + { + "epoch": 4.497715779444458, + "grad_norm": 0.00019654342031572014, + "learning_rate": 6.086017483016104e-08, + "loss": 0.0, + "num_input_tokens_seen": 124028072, + "step": 184105 + }, + { + "epoch": 4.497837930276305, + "grad_norm": 0.00013703553122468293, + "learning_rate": 6.083088239341206e-08, + "loss": 0.0, + "num_input_tokens_seen": 124031400, + "step": 184110 + }, + { + "epoch": 4.497960081108152, + "grad_norm": 2.7517999114934355e-05, + "learning_rate": 6.080159678655372e-08, + "loss": 0.0, + "num_input_tokens_seen": 124034600, + "step": 184115 + }, + { + "epoch": 4.49808223194, + "grad_norm": 9.830735507421196e-06, + "learning_rate": 6.077231800979865e-08, + "loss": 0.0, + "num_input_tokens_seen": 124037672, + "step": 184120 + }, + { + "epoch": 4.4982043827718465, + "grad_norm": 8.236825669882819e-05, + "learning_rate": 6.074304606335978e-08, + "loss": 0.0, + "num_input_tokens_seen": 124040616, + "step": 184125 + }, + { + "epoch": 4.498326533603694, + "grad_norm": 0.00047793277190066874, + "learning_rate": 6.071378094745039e-08, + "loss": 0.0, + "num_input_tokens_seen": 124043816, + "step": 184130 + }, + { + "epoch": 4.498448684435541, + "grad_norm": 4.4294542021816596e-05, + "learning_rate": 6.068452266228285e-08, + "loss": 0.0, + "num_input_tokens_seen": 124047272, + "step": 184135 + }, + { + "epoch": 4.4985708352673885, + "grad_norm": 5.49412434338592e-05, + "learning_rate": 6.065527120807024e-08, + "loss": 0.0, + "num_input_tokens_seen": 124050856, + "step": 184140 + }, + { + "epoch": 4.498692986099235, + "grad_norm": 8.162522863131016e-05, + "learning_rate": 6.062602658502491e-08, + "loss": 0.0, + "num_input_tokens_seen": 124054312, + "step": 184145 + }, + { + "epoch": 4.498815136931082, + "grad_norm": 0.033382777124643326, + "learning_rate": 6.059678879336005e-08, + "loss": 0.0, + "num_input_tokens_seen": 124057128, + "step": 184150 + }, + { + "epoch": 4.49893728776293, + "grad_norm": 0.004745765123516321, + "learning_rate": 6.056755783328782e-08, + "loss": 0.0, + "num_input_tokens_seen": 124060136, + "step": 184155 + }, + { + "epoch": 4.499059438594776, + "grad_norm": 2.5301449568360113e-05, + "learning_rate": 6.053833370502104e-08, + "loss": 0.0, + "num_input_tokens_seen": 124064040, + "step": 184160 + }, + { + "epoch": 4.499181589426624, + "grad_norm": 6.118205055827275e-05, + "learning_rate": 6.050911640877221e-08, + "loss": 0.0, + "num_input_tokens_seen": 124067880, + "step": 184165 + }, + { + "epoch": 4.499303740258471, + "grad_norm": 0.00027135369600728154, + "learning_rate": 6.047990594475372e-08, + "loss": 0.0, + "num_input_tokens_seen": 124070696, + "step": 184170 + }, + { + "epoch": 4.499425891090318, + "grad_norm": 3.677931090351194e-05, + "learning_rate": 6.045070231317817e-08, + "loss": 0.0, + "num_input_tokens_seen": 124074344, + "step": 184175 + }, + { + "epoch": 4.499548041922165, + "grad_norm": 0.006334454286843538, + "learning_rate": 6.042150551425784e-08, + "loss": 0.0, + "num_input_tokens_seen": 124077608, + "step": 184180 + }, + { + "epoch": 4.499670192754013, + "grad_norm": 0.0003608764673117548, + "learning_rate": 6.039231554820489e-08, + "loss": 0.0, + "num_input_tokens_seen": 124081384, + "step": 184185 + }, + { + "epoch": 4.4997923435858596, + "grad_norm": 3.4421696909703314e-05, + "learning_rate": 6.036313241523183e-08, + "loss": 0.0, + "num_input_tokens_seen": 124084968, + "step": 184190 + }, + { + "epoch": 4.499914494417707, + "grad_norm": 5.452797267935239e-06, + "learning_rate": 6.033395611555081e-08, + "loss": 0.0538, + "num_input_tokens_seen": 124088040, + "step": 184195 + }, + { + "epoch": 4.500036645249554, + "grad_norm": 2.9334744340303587e-06, + "learning_rate": 6.0304786649374e-08, + "loss": 0.0, + "num_input_tokens_seen": 124091240, + "step": 184200 + }, + { + "epoch": 4.500158796081402, + "grad_norm": 0.00040240155067294836, + "learning_rate": 6.027562401691344e-08, + "loss": 0.0, + "num_input_tokens_seen": 124095144, + "step": 184205 + }, + { + "epoch": 4.500280946913248, + "grad_norm": 0.000127659848658368, + "learning_rate": 6.024646821838142e-08, + "loss": 0.0, + "num_input_tokens_seen": 124098728, + "step": 184210 + }, + { + "epoch": 4.500329807245987, + "eval_loss": 0.3127802014350891, + "eval_runtime": 47.8244, + "eval_samples_per_second": 760.805, + "eval_steps_per_second": 95.119, + "num_input_tokens_seen": 124100264, + "step": 184212 + }, + { + "epoch": 4.500403097745096, + "grad_norm": 0.0016075852327048779, + "learning_rate": 6.021731925398976e-08, + "loss": 0.0, + "num_input_tokens_seen": 124102632, + "step": 184215 + }, + { + "epoch": 4.500525248576943, + "grad_norm": 0.0014234702102839947, + "learning_rate": 6.018817712395041e-08, + "loss": 0.0, + "num_input_tokens_seen": 124105832, + "step": 184220 + }, + { + "epoch": 4.50064739940879, + "grad_norm": 0.0013454908039420843, + "learning_rate": 6.015904182847564e-08, + "loss": 0.0, + "num_input_tokens_seen": 124109352, + "step": 184225 + }, + { + "epoch": 4.500769550240637, + "grad_norm": 0.0019803806208074093, + "learning_rate": 6.012991336777695e-08, + "loss": 0.0, + "num_input_tokens_seen": 124112552, + "step": 184230 + }, + { + "epoch": 4.500891701072485, + "grad_norm": 0.0009988030651584268, + "learning_rate": 6.01007917420665e-08, + "loss": 0.0, + "num_input_tokens_seen": 124115816, + "step": 184235 + }, + { + "epoch": 4.5010138519043315, + "grad_norm": 0.38556408882141113, + "learning_rate": 6.007167695155569e-08, + "loss": 0.0001, + "num_input_tokens_seen": 124119528, + "step": 184240 + }, + { + "epoch": 4.501136002736178, + "grad_norm": 0.01152084581553936, + "learning_rate": 6.004256899645665e-08, + "loss": 0.0, + "num_input_tokens_seen": 124122344, + "step": 184245 + }, + { + "epoch": 4.501258153568026, + "grad_norm": 0.00029359193285927176, + "learning_rate": 6.001346787698069e-08, + "loss": 0.0, + "num_input_tokens_seen": 124125416, + "step": 184250 + }, + { + "epoch": 4.501380304399873, + "grad_norm": 0.0015446188626810908, + "learning_rate": 5.998437359333964e-08, + "loss": 0.0, + "num_input_tokens_seen": 124129000, + "step": 184255 + }, + { + "epoch": 4.50150245523172, + "grad_norm": 0.0001862171193351969, + "learning_rate": 5.995528614574519e-08, + "loss": 0.0, + "num_input_tokens_seen": 124131880, + "step": 184260 + }, + { + "epoch": 4.501624606063567, + "grad_norm": 0.0001308717328356579, + "learning_rate": 5.992620553440863e-08, + "loss": 0.0, + "num_input_tokens_seen": 124134760, + "step": 184265 + }, + { + "epoch": 4.501746756895415, + "grad_norm": 4.519295907812193e-05, + "learning_rate": 5.989713175954169e-08, + "loss": 0.0, + "num_input_tokens_seen": 124138152, + "step": 184270 + }, + { + "epoch": 4.501868907727261, + "grad_norm": 2.6418254492455162e-05, + "learning_rate": 5.986806482135542e-08, + "loss": 0.0, + "num_input_tokens_seen": 124141352, + "step": 184275 + }, + { + "epoch": 4.501991058559109, + "grad_norm": 0.00016644655261188745, + "learning_rate": 5.983900472006175e-08, + "loss": 0.0, + "num_input_tokens_seen": 124144616, + "step": 184280 + }, + { + "epoch": 4.502113209390956, + "grad_norm": 0.02736012451350689, + "learning_rate": 5.980995145587165e-08, + "loss": 0.0, + "num_input_tokens_seen": 124147944, + "step": 184285 + }, + { + "epoch": 4.502235360222803, + "grad_norm": 0.0007747677154839039, + "learning_rate": 5.978090502899624e-08, + "loss": 0.0, + "num_input_tokens_seen": 124151400, + "step": 184290 + }, + { + "epoch": 4.50235751105465, + "grad_norm": 8.166118641383946e-05, + "learning_rate": 5.975186543964716e-08, + "loss": 0.0, + "num_input_tokens_seen": 124155368, + "step": 184295 + }, + { + "epoch": 4.502479661886498, + "grad_norm": 2.3084163331077434e-05, + "learning_rate": 5.972283268803536e-08, + "loss": 0.0, + "num_input_tokens_seen": 124158632, + "step": 184300 + }, + { + "epoch": 4.5026018127183445, + "grad_norm": 0.0006073784315958619, + "learning_rate": 5.969380677437208e-08, + "loss": 0.0, + "num_input_tokens_seen": 124162344, + "step": 184305 + }, + { + "epoch": 4.502723963550192, + "grad_norm": 8.503326534992084e-05, + "learning_rate": 5.966478769886818e-08, + "loss": 0.0, + "num_input_tokens_seen": 124165672, + "step": 184310 + }, + { + "epoch": 4.502846114382039, + "grad_norm": 0.00027897878317162395, + "learning_rate": 5.963577546173493e-08, + "loss": 0.0, + "num_input_tokens_seen": 124169320, + "step": 184315 + }, + { + "epoch": 4.5029682652138865, + "grad_norm": 0.0013202318223193288, + "learning_rate": 5.960677006318338e-08, + "loss": 0.0, + "num_input_tokens_seen": 124173608, + "step": 184320 + }, + { + "epoch": 4.503090416045733, + "grad_norm": 2.0044208213221282e-05, + "learning_rate": 5.9577771503424135e-08, + "loss": 0.0, + "num_input_tokens_seen": 124177576, + "step": 184325 + }, + { + "epoch": 4.503212566877581, + "grad_norm": 0.0001279815041925758, + "learning_rate": 5.954877978266848e-08, + "loss": 0.0, + "num_input_tokens_seen": 124181288, + "step": 184330 + }, + { + "epoch": 4.503334717709428, + "grad_norm": 0.0007320480654016137, + "learning_rate": 5.9519794901126907e-08, + "loss": 0.0, + "num_input_tokens_seen": 124184296, + "step": 184335 + }, + { + "epoch": 4.503456868541274, + "grad_norm": 0.0055288467556238174, + "learning_rate": 5.9490816859010364e-08, + "loss": 0.0, + "num_input_tokens_seen": 124187944, + "step": 184340 + }, + { + "epoch": 4.503579019373122, + "grad_norm": 0.002740490948781371, + "learning_rate": 5.946184565652967e-08, + "loss": 0.0, + "num_input_tokens_seen": 124191208, + "step": 184345 + }, + { + "epoch": 4.503701170204969, + "grad_norm": 2.137678711733315e-05, + "learning_rate": 5.943288129389523e-08, + "loss": 0.0, + "num_input_tokens_seen": 124194664, + "step": 184350 + }, + { + "epoch": 4.503823321036816, + "grad_norm": 0.009588432498276234, + "learning_rate": 5.940392377131809e-08, + "loss": 0.0, + "num_input_tokens_seen": 124198184, + "step": 184355 + }, + { + "epoch": 4.503945471868663, + "grad_norm": 0.0009689059224911034, + "learning_rate": 5.937497308900841e-08, + "loss": 0.0, + "num_input_tokens_seen": 124201768, + "step": 184360 + }, + { + "epoch": 4.504067622700511, + "grad_norm": 2.13335151784122e-05, + "learning_rate": 5.934602924717702e-08, + "loss": 0.0, + "num_input_tokens_seen": 124205096, + "step": 184365 + }, + { + "epoch": 4.5041897735323575, + "grad_norm": 0.00023117160890251398, + "learning_rate": 5.93170922460342e-08, + "loss": 0.0, + "num_input_tokens_seen": 124208104, + "step": 184370 + }, + { + "epoch": 4.504311924364205, + "grad_norm": 0.0012295347405597568, + "learning_rate": 5.9288162085790574e-08, + "loss": 0.0, + "num_input_tokens_seen": 124211112, + "step": 184375 + }, + { + "epoch": 4.504434075196052, + "grad_norm": 0.0013790687080472708, + "learning_rate": 5.9259238766656506e-08, + "loss": 0.0, + "num_input_tokens_seen": 124214568, + "step": 184380 + }, + { + "epoch": 4.5045562260278995, + "grad_norm": 0.0005128180491738021, + "learning_rate": 5.923032228884228e-08, + "loss": 0.0, + "num_input_tokens_seen": 124217704, + "step": 184385 + }, + { + "epoch": 4.504678376859746, + "grad_norm": 2.7716218028217554e-05, + "learning_rate": 5.920141265255818e-08, + "loss": 0.0, + "num_input_tokens_seen": 124220776, + "step": 184390 + }, + { + "epoch": 4.504800527691594, + "grad_norm": 0.002322013955563307, + "learning_rate": 5.9172509858014474e-08, + "loss": 0.0, + "num_input_tokens_seen": 124224168, + "step": 184395 + }, + { + "epoch": 4.504922678523441, + "grad_norm": 5.2793726354138926e-05, + "learning_rate": 5.914361390542133e-08, + "loss": 0.0, + "num_input_tokens_seen": 124227240, + "step": 184400 + }, + { + "epoch": 4.505044829355288, + "grad_norm": 7.494450255762786e-05, + "learning_rate": 5.91147247949888e-08, + "loss": 0.0, + "num_input_tokens_seen": 124230312, + "step": 184405 + }, + { + "epoch": 4.505166980187135, + "grad_norm": 0.006155565846711397, + "learning_rate": 5.908584252692694e-08, + "loss": 0.0, + "num_input_tokens_seen": 124233960, + "step": 184410 + }, + { + "epoch": 4.505289131018982, + "grad_norm": 0.0005912997294217348, + "learning_rate": 5.905696710144614e-08, + "loss": 0.0, + "num_input_tokens_seen": 124236968, + "step": 184415 + }, + { + "epoch": 4.505411281850829, + "grad_norm": 5.531529041036265e-06, + "learning_rate": 5.902809851875601e-08, + "loss": 0.0, + "num_input_tokens_seen": 124240232, + "step": 184420 + }, + { + "epoch": 4.505533432682677, + "grad_norm": 4.586255818139762e-05, + "learning_rate": 5.899923677906682e-08, + "loss": 0.0, + "num_input_tokens_seen": 124243432, + "step": 184425 + }, + { + "epoch": 4.505655583514524, + "grad_norm": 0.005010268650949001, + "learning_rate": 5.897038188258807e-08, + "loss": 0.0, + "num_input_tokens_seen": 124246696, + "step": 184430 + }, + { + "epoch": 4.5057777343463705, + "grad_norm": 7.66208268032642e-06, + "learning_rate": 5.894153382952993e-08, + "loss": 0.0, + "num_input_tokens_seen": 124249768, + "step": 184435 + }, + { + "epoch": 4.505899885178218, + "grad_norm": 2.6806714231497608e-05, + "learning_rate": 5.891269262010212e-08, + "loss": 0.0, + "num_input_tokens_seen": 124253224, + "step": 184440 + }, + { + "epoch": 4.506022036010065, + "grad_norm": 9.153199789579958e-05, + "learning_rate": 5.888385825451414e-08, + "loss": 0.0, + "num_input_tokens_seen": 124256168, + "step": 184445 + }, + { + "epoch": 4.5061441868419125, + "grad_norm": 0.04417189583182335, + "learning_rate": 5.885503073297604e-08, + "loss": 0.0, + "num_input_tokens_seen": 124260328, + "step": 184450 + }, + { + "epoch": 4.506266337673759, + "grad_norm": 0.000164502183906734, + "learning_rate": 5.8826210055697215e-08, + "loss": 0.0, + "num_input_tokens_seen": 124263912, + "step": 184455 + }, + { + "epoch": 4.506388488505607, + "grad_norm": 0.0004098447388969362, + "learning_rate": 5.879739622288748e-08, + "loss": 0.0117, + "num_input_tokens_seen": 124267240, + "step": 184460 + }, + { + "epoch": 4.506510639337454, + "grad_norm": 0.00029537943191826344, + "learning_rate": 5.876858923475603e-08, + "loss": 0.0, + "num_input_tokens_seen": 124270504, + "step": 184465 + }, + { + "epoch": 4.506632790169301, + "grad_norm": 4.8967107431963086e-05, + "learning_rate": 5.873978909151256e-08, + "loss": 0.0, + "num_input_tokens_seen": 124273512, + "step": 184470 + }, + { + "epoch": 4.506754941001148, + "grad_norm": 0.0012310813181102276, + "learning_rate": 5.871099579336669e-08, + "loss": 0.0, + "num_input_tokens_seen": 124277032, + "step": 184475 + }, + { + "epoch": 4.506877091832996, + "grad_norm": 0.0020224247127771378, + "learning_rate": 5.868220934052748e-08, + "loss": 0.0, + "num_input_tokens_seen": 124280808, + "step": 184480 + }, + { + "epoch": 4.506999242664842, + "grad_norm": 0.005152786150574684, + "learning_rate": 5.865342973320453e-08, + "loss": 0.0, + "num_input_tokens_seen": 124284328, + "step": 184485 + }, + { + "epoch": 4.50712139349669, + "grad_norm": 0.00016626408614683896, + "learning_rate": 5.862465697160712e-08, + "loss": 0.0, + "num_input_tokens_seen": 124287656, + "step": 184490 + }, + { + "epoch": 4.507243544328537, + "grad_norm": 0.0020967598538845778, + "learning_rate": 5.85958910559442e-08, + "loss": 0.0, + "num_input_tokens_seen": 124291432, + "step": 184495 + }, + { + "epoch": 4.507365695160384, + "grad_norm": 0.0016112698940560222, + "learning_rate": 5.8567131986425265e-08, + "loss": 0.0, + "num_input_tokens_seen": 124295272, + "step": 184500 + }, + { + "epoch": 4.507487845992231, + "grad_norm": 2.913714160968084e-05, + "learning_rate": 5.853837976325926e-08, + "loss": 0.0, + "num_input_tokens_seen": 124298536, + "step": 184505 + }, + { + "epoch": 4.507609996824078, + "grad_norm": 1.0782813660625834e-05, + "learning_rate": 5.8509634386655573e-08, + "loss": 0.0, + "num_input_tokens_seen": 124301992, + "step": 184510 + }, + { + "epoch": 4.507732147655926, + "grad_norm": 4.982833706890233e-05, + "learning_rate": 5.848089585682292e-08, + "loss": 0.0, + "num_input_tokens_seen": 124305384, + "step": 184515 + }, + { + "epoch": 4.507854298487773, + "grad_norm": 0.0008456270443275571, + "learning_rate": 5.8452164173970475e-08, + "loss": 0.0, + "num_input_tokens_seen": 124308840, + "step": 184520 + }, + { + "epoch": 4.50797644931962, + "grad_norm": 4.60719020338729e-05, + "learning_rate": 5.842343933830707e-08, + "loss": 0.0, + "num_input_tokens_seen": 124312552, + "step": 184525 + }, + { + "epoch": 4.508098600151467, + "grad_norm": 0.00022173790785018355, + "learning_rate": 5.839472135004164e-08, + "loss": 0.0, + "num_input_tokens_seen": 124315752, + "step": 184530 + }, + { + "epoch": 4.508220750983314, + "grad_norm": 0.00018909823847934604, + "learning_rate": 5.8366010209383254e-08, + "loss": 0.0, + "num_input_tokens_seen": 124319016, + "step": 184535 + }, + { + "epoch": 4.508342901815161, + "grad_norm": 0.011719591915607452, + "learning_rate": 5.833730591654029e-08, + "loss": 0.0, + "num_input_tokens_seen": 124322664, + "step": 184540 + }, + { + "epoch": 4.508465052647009, + "grad_norm": 0.00031967408722266555, + "learning_rate": 5.830860847172192e-08, + "loss": 0.0, + "num_input_tokens_seen": 124325608, + "step": 184545 + }, + { + "epoch": 4.508587203478855, + "grad_norm": 0.00010630823817336932, + "learning_rate": 5.827991787513642e-08, + "loss": 0.0014, + "num_input_tokens_seen": 124328744, + "step": 184550 + }, + { + "epoch": 4.508709354310703, + "grad_norm": 0.00011591524526011199, + "learning_rate": 5.825123412699284e-08, + "loss": 0.0, + "num_input_tokens_seen": 124332264, + "step": 184555 + }, + { + "epoch": 4.50883150514255, + "grad_norm": 0.00012368029274512082, + "learning_rate": 5.822255722749947e-08, + "loss": 0.0, + "num_input_tokens_seen": 124335784, + "step": 184560 + }, + { + "epoch": 4.5089536559743975, + "grad_norm": 0.000389666180126369, + "learning_rate": 5.819388717686491e-08, + "loss": 0.0, + "num_input_tokens_seen": 124339304, + "step": 184565 + }, + { + "epoch": 4.509075806806244, + "grad_norm": 6.443438905989751e-06, + "learning_rate": 5.816522397529788e-08, + "loss": 0.0, + "num_input_tokens_seen": 124342440, + "step": 184570 + }, + { + "epoch": 4.509197957638092, + "grad_norm": 9.82302753982367e-06, + "learning_rate": 5.813656762300656e-08, + "loss": 0.0, + "num_input_tokens_seen": 124345896, + "step": 184575 + }, + { + "epoch": 4.509320108469939, + "grad_norm": 0.00010355612903367728, + "learning_rate": 5.810791812019955e-08, + "loss": 0.0, + "num_input_tokens_seen": 124349352, + "step": 184580 + }, + { + "epoch": 4.509442259301786, + "grad_norm": 6.289218435995281e-05, + "learning_rate": 5.807927546708491e-08, + "loss": 0.0341, + "num_input_tokens_seen": 124352424, + "step": 184585 + }, + { + "epoch": 4.509564410133633, + "grad_norm": 0.0032984442077577114, + "learning_rate": 5.805063966387136e-08, + "loss": 0.0, + "num_input_tokens_seen": 124355688, + "step": 184590 + }, + { + "epoch": 4.509686560965481, + "grad_norm": 0.0005909492610953748, + "learning_rate": 5.8022010710766844e-08, + "loss": 0.0, + "num_input_tokens_seen": 124359016, + "step": 184595 + }, + { + "epoch": 4.509808711797327, + "grad_norm": 0.0003565122897271067, + "learning_rate": 5.799338860797953e-08, + "loss": 0.0, + "num_input_tokens_seen": 124362728, + "step": 184600 + }, + { + "epoch": 4.509930862629174, + "grad_norm": 0.00019694749789778143, + "learning_rate": 5.796477335571781e-08, + "loss": 0.0, + "num_input_tokens_seen": 124365864, + "step": 184605 + }, + { + "epoch": 4.510053013461022, + "grad_norm": 2.829977893270552e-05, + "learning_rate": 5.793616495418951e-08, + "loss": 0.0, + "num_input_tokens_seen": 124369320, + "step": 184610 + }, + { + "epoch": 4.5101751642928685, + "grad_norm": 0.0032207928597927094, + "learning_rate": 5.790756340360292e-08, + "loss": 0.0003, + "num_input_tokens_seen": 124372200, + "step": 184615 + }, + { + "epoch": 4.510297315124716, + "grad_norm": 0.00031161491642706096, + "learning_rate": 5.7878968704165754e-08, + "loss": 0.0, + "num_input_tokens_seen": 124375208, + "step": 184620 + }, + { + "epoch": 4.510419465956563, + "grad_norm": 0.0034799398854374886, + "learning_rate": 5.785038085608607e-08, + "loss": 0.0, + "num_input_tokens_seen": 124378728, + "step": 184625 + }, + { + "epoch": 4.5105416167884105, + "grad_norm": 0.0002824080584105104, + "learning_rate": 5.782179985957214e-08, + "loss": 0.0, + "num_input_tokens_seen": 124382568, + "step": 184630 + }, + { + "epoch": 4.510663767620257, + "grad_norm": 0.0002862545079551637, + "learning_rate": 5.779322571483125e-08, + "loss": 0.0, + "num_input_tokens_seen": 124385896, + "step": 184635 + }, + { + "epoch": 4.510785918452105, + "grad_norm": 0.03027668595314026, + "learning_rate": 5.7764658422071566e-08, + "loss": 0.0, + "num_input_tokens_seen": 124389032, + "step": 184640 + }, + { + "epoch": 4.510908069283952, + "grad_norm": 0.00020627176854759455, + "learning_rate": 5.773609798150059e-08, + "loss": 0.0, + "num_input_tokens_seen": 124392488, + "step": 184645 + }, + { + "epoch": 4.511030220115799, + "grad_norm": 2.99558541883016e-05, + "learning_rate": 5.7707544393326145e-08, + "loss": 0.0, + "num_input_tokens_seen": 124395816, + "step": 184650 + }, + { + "epoch": 4.511152370947646, + "grad_norm": 0.00017671234672889113, + "learning_rate": 5.767899765775608e-08, + "loss": 0.0414, + "num_input_tokens_seen": 124399272, + "step": 184655 + }, + { + "epoch": 4.511274521779494, + "grad_norm": 5.429242810350843e-05, + "learning_rate": 5.765045777499755e-08, + "loss": 0.0, + "num_input_tokens_seen": 124402600, + "step": 184660 + }, + { + "epoch": 4.51139667261134, + "grad_norm": 0.02834673970937729, + "learning_rate": 5.7621924745258624e-08, + "loss": 0.0, + "num_input_tokens_seen": 124405928, + "step": 184665 + }, + { + "epoch": 4.511518823443188, + "grad_norm": 0.0001638929679756984, + "learning_rate": 5.759339856874634e-08, + "loss": 0.0, + "num_input_tokens_seen": 124409128, + "step": 184670 + }, + { + "epoch": 4.511640974275035, + "grad_norm": 0.00017873983597382903, + "learning_rate": 5.7564879245668444e-08, + "loss": 0.0, + "num_input_tokens_seen": 124412904, + "step": 184675 + }, + { + "epoch": 4.5117631251068815, + "grad_norm": 0.009502957575023174, + "learning_rate": 5.753636677623208e-08, + "loss": 0.0, + "num_input_tokens_seen": 124416360, + "step": 184680 + }, + { + "epoch": 4.511885275938729, + "grad_norm": 0.00027789807063527405, + "learning_rate": 5.750786116064477e-08, + "loss": 0.0, + "num_input_tokens_seen": 124419688, + "step": 184685 + }, + { + "epoch": 4.512007426770577, + "grad_norm": 0.013086493127048016, + "learning_rate": 5.7479362399113995e-08, + "loss": 0.0, + "num_input_tokens_seen": 124423080, + "step": 184690 + }, + { + "epoch": 4.5121295776024235, + "grad_norm": 0.005364415235817432, + "learning_rate": 5.7450870491846715e-08, + "loss": 0.0, + "num_input_tokens_seen": 124426344, + "step": 184695 + }, + { + "epoch": 4.51225172843427, + "grad_norm": 1.4318278772407211e-05, + "learning_rate": 5.7422385439050095e-08, + "loss": 0.0, + "num_input_tokens_seen": 124429800, + "step": 184700 + }, + { + "epoch": 4.512373879266118, + "grad_norm": 8.754514419706538e-05, + "learning_rate": 5.7393907240931624e-08, + "loss": 0.0, + "num_input_tokens_seen": 124433000, + "step": 184705 + }, + { + "epoch": 4.512496030097965, + "grad_norm": 0.0005249512614682317, + "learning_rate": 5.736543589769816e-08, + "loss": 0.0, + "num_input_tokens_seen": 124436392, + "step": 184710 + }, + { + "epoch": 4.512618180929812, + "grad_norm": 0.00015164175420068204, + "learning_rate": 5.733697140955662e-08, + "loss": 0.0, + "num_input_tokens_seen": 124439656, + "step": 184715 + }, + { + "epoch": 4.512740331761659, + "grad_norm": 2.3307018636842258e-05, + "learning_rate": 5.73085137767142e-08, + "loss": 0.0003, + "num_input_tokens_seen": 124442984, + "step": 184720 + }, + { + "epoch": 4.512862482593507, + "grad_norm": 6.507845682790503e-05, + "learning_rate": 5.728006299937793e-08, + "loss": 0.0, + "num_input_tokens_seen": 124445992, + "step": 184725 + }, + { + "epoch": 4.512984633425353, + "grad_norm": 1.8531858586356975e-05, + "learning_rate": 5.7251619077754445e-08, + "loss": 0.0, + "num_input_tokens_seen": 124449256, + "step": 184730 + }, + { + "epoch": 4.513106784257201, + "grad_norm": 2.9950553653179668e-05, + "learning_rate": 5.72231820120509e-08, + "loss": 0.0, + "num_input_tokens_seen": 124452264, + "step": 184735 + }, + { + "epoch": 4.513228935089048, + "grad_norm": 0.00011571579670999199, + "learning_rate": 5.7194751802473793e-08, + "loss": 0.0, + "num_input_tokens_seen": 124455976, + "step": 184740 + }, + { + "epoch": 4.513351085920895, + "grad_norm": 1.489359692641301e-05, + "learning_rate": 5.716632844923008e-08, + "loss": 0.0, + "num_input_tokens_seen": 124458920, + "step": 184745 + }, + { + "epoch": 4.513473236752742, + "grad_norm": 0.0008117137476801872, + "learning_rate": 5.71379119525266e-08, + "loss": 0.0, + "num_input_tokens_seen": 124462504, + "step": 184750 + }, + { + "epoch": 4.51359538758459, + "grad_norm": 0.0002265637303935364, + "learning_rate": 5.710950231256961e-08, + "loss": 0.0, + "num_input_tokens_seen": 124465960, + "step": 184755 + }, + { + "epoch": 4.5137175384164365, + "grad_norm": 0.013958304189145565, + "learning_rate": 5.708109952956608e-08, + "loss": 0.0, + "num_input_tokens_seen": 124469672, + "step": 184760 + }, + { + "epoch": 4.513839689248284, + "grad_norm": 0.002560678403824568, + "learning_rate": 5.705270360372227e-08, + "loss": 0.0, + "num_input_tokens_seen": 124473000, + "step": 184765 + }, + { + "epoch": 4.513961840080131, + "grad_norm": 0.0003456149424891919, + "learning_rate": 5.702431453524503e-08, + "loss": 0.0402, + "num_input_tokens_seen": 124476328, + "step": 184770 + }, + { + "epoch": 4.514083990911978, + "grad_norm": 0.000688866013661027, + "learning_rate": 5.699593232434041e-08, + "loss": 0.0008, + "num_input_tokens_seen": 124479848, + "step": 184775 + }, + { + "epoch": 4.514206141743825, + "grad_norm": 1.5245567738020327e-05, + "learning_rate": 5.6967556971215027e-08, + "loss": 0.0, + "num_input_tokens_seen": 124483240, + "step": 184780 + }, + { + "epoch": 4.514328292575673, + "grad_norm": 3.2199197448790073e-05, + "learning_rate": 5.693918847607526e-08, + "loss": 0.0, + "num_input_tokens_seen": 124486696, + "step": 184785 + }, + { + "epoch": 4.51445044340752, + "grad_norm": 0.0001976760831894353, + "learning_rate": 5.691082683912729e-08, + "loss": 0.0, + "num_input_tokens_seen": 124490152, + "step": 184790 + }, + { + "epoch": 4.514572594239366, + "grad_norm": 0.004537543747574091, + "learning_rate": 5.688247206057761e-08, + "loss": 0.0, + "num_input_tokens_seen": 124494376, + "step": 184795 + }, + { + "epoch": 4.514694745071214, + "grad_norm": 0.00019870110554620624, + "learning_rate": 5.6854124140632285e-08, + "loss": 0.0, + "num_input_tokens_seen": 124497960, + "step": 184800 + }, + { + "epoch": 4.514816895903061, + "grad_norm": 0.00021802991977892816, + "learning_rate": 5.682578307949726e-08, + "loss": 0.0, + "num_input_tokens_seen": 124500904, + "step": 184805 + }, + { + "epoch": 4.514939046734908, + "grad_norm": 8.688728121342137e-05, + "learning_rate": 5.679744887737903e-08, + "loss": 0.0, + "num_input_tokens_seen": 124504360, + "step": 184810 + }, + { + "epoch": 4.515061197566755, + "grad_norm": 0.00013730123464483768, + "learning_rate": 5.676912153448321e-08, + "loss": 0.0, + "num_input_tokens_seen": 124508072, + "step": 184815 + }, + { + "epoch": 4.515183348398603, + "grad_norm": 0.00025287570315413177, + "learning_rate": 5.6740801051016197e-08, + "loss": 0.0, + "num_input_tokens_seen": 124511336, + "step": 184820 + }, + { + "epoch": 4.5153054992304495, + "grad_norm": 0.0004785667988471687, + "learning_rate": 5.671248742718371e-08, + "loss": 0.0, + "num_input_tokens_seen": 124515432, + "step": 184825 + }, + { + "epoch": 4.515427650062297, + "grad_norm": 0.002044425345957279, + "learning_rate": 5.66841806631918e-08, + "loss": 0.0, + "num_input_tokens_seen": 124519016, + "step": 184830 + }, + { + "epoch": 4.515549800894144, + "grad_norm": 0.00304840924218297, + "learning_rate": 5.66558807592461e-08, + "loss": 0.0, + "num_input_tokens_seen": 124522152, + "step": 184835 + }, + { + "epoch": 4.515671951725992, + "grad_norm": 0.00020235584815964103, + "learning_rate": 5.662758771555265e-08, + "loss": 0.0, + "num_input_tokens_seen": 124525480, + "step": 184840 + }, + { + "epoch": 4.515794102557838, + "grad_norm": 0.0005201268359087408, + "learning_rate": 5.659930153231718e-08, + "loss": 0.0, + "num_input_tokens_seen": 124528808, + "step": 184845 + }, + { + "epoch": 4.515916253389686, + "grad_norm": 0.0029112256597727537, + "learning_rate": 5.657102220974519e-08, + "loss": 0.0, + "num_input_tokens_seen": 124532456, + "step": 184850 + }, + { + "epoch": 4.516038404221533, + "grad_norm": 0.08507881313562393, + "learning_rate": 5.654274974804263e-08, + "loss": 0.0, + "num_input_tokens_seen": 124535976, + "step": 184855 + }, + { + "epoch": 4.51616055505338, + "grad_norm": 0.006632891483604908, + "learning_rate": 5.651448414741489e-08, + "loss": 0.0, + "num_input_tokens_seen": 124539048, + "step": 184860 + }, + { + "epoch": 4.516282705885227, + "grad_norm": 0.0012043018359690905, + "learning_rate": 5.648622540806758e-08, + "loss": 0.0, + "num_input_tokens_seen": 124542568, + "step": 184865 + }, + { + "epoch": 4.516404856717074, + "grad_norm": 0.0008988183690235019, + "learning_rate": 5.6457973530206206e-08, + "loss": 0.0, + "num_input_tokens_seen": 124545832, + "step": 184870 + }, + { + "epoch": 4.5165270075489214, + "grad_norm": 1.9462617274257354e-05, + "learning_rate": 5.6429728514036154e-08, + "loss": 0.0, + "num_input_tokens_seen": 124549032, + "step": 184875 + }, + { + "epoch": 4.516649158380768, + "grad_norm": 0.001904234173707664, + "learning_rate": 5.640149035976305e-08, + "loss": 0.0, + "num_input_tokens_seen": 124552616, + "step": 184880 + }, + { + "epoch": 4.516771309212616, + "grad_norm": 0.000173800130141899, + "learning_rate": 5.637325906759205e-08, + "loss": 0.0, + "num_input_tokens_seen": 124555624, + "step": 184885 + }, + { + "epoch": 4.516893460044463, + "grad_norm": 0.0005049865576438606, + "learning_rate": 5.634503463772855e-08, + "loss": 0.0, + "num_input_tokens_seen": 124558632, + "step": 184890 + }, + { + "epoch": 4.51701561087631, + "grad_norm": 0.001019941526465118, + "learning_rate": 5.631681707037772e-08, + "loss": 0.0, + "num_input_tokens_seen": 124561832, + "step": 184895 + }, + { + "epoch": 4.517137761708157, + "grad_norm": 5.146110197529197e-05, + "learning_rate": 5.628860636574495e-08, + "loss": 0.0, + "num_input_tokens_seen": 124565352, + "step": 184900 + }, + { + "epoch": 4.517259912540005, + "grad_norm": 2.4899445634218864e-05, + "learning_rate": 5.626040252403519e-08, + "loss": 0.0, + "num_input_tokens_seen": 124568680, + "step": 184905 + }, + { + "epoch": 4.517382063371851, + "grad_norm": 0.0002568171184975654, + "learning_rate": 5.623220554545349e-08, + "loss": 0.0, + "num_input_tokens_seen": 124571944, + "step": 184910 + }, + { + "epoch": 4.517504214203699, + "grad_norm": 0.00153777573723346, + "learning_rate": 5.6204015430205254e-08, + "loss": 0.0, + "num_input_tokens_seen": 124575848, + "step": 184915 + }, + { + "epoch": 4.517626365035546, + "grad_norm": 0.00015119351155590266, + "learning_rate": 5.6175832178495086e-08, + "loss": 0.0, + "num_input_tokens_seen": 124579048, + "step": 184920 + }, + { + "epoch": 4.517748515867393, + "grad_norm": 0.0012161422055214643, + "learning_rate": 5.614765579052827e-08, + "loss": 0.0, + "num_input_tokens_seen": 124582312, + "step": 184925 + }, + { + "epoch": 4.51787066669924, + "grad_norm": 0.000827375624794513, + "learning_rate": 5.6119486266509306e-08, + "loss": 0.0, + "num_input_tokens_seen": 124585896, + "step": 184930 + }, + { + "epoch": 4.517992817531088, + "grad_norm": 0.00010629823373164982, + "learning_rate": 5.6091323606643484e-08, + "loss": 0.0, + "num_input_tokens_seen": 124590248, + "step": 184935 + }, + { + "epoch": 4.5181149683629345, + "grad_norm": 0.0003775875666178763, + "learning_rate": 5.606316781113551e-08, + "loss": 0.0, + "num_input_tokens_seen": 124593960, + "step": 184940 + }, + { + "epoch": 4.518237119194782, + "grad_norm": 7.271223148563877e-05, + "learning_rate": 5.603501888018991e-08, + "loss": 0.0, + "num_input_tokens_seen": 124596968, + "step": 184945 + }, + { + "epoch": 4.518359270026629, + "grad_norm": 7.457828905899078e-05, + "learning_rate": 5.6006876814011725e-08, + "loss": 0.0, + "num_input_tokens_seen": 124600232, + "step": 184950 + }, + { + "epoch": 4.5184814208584765, + "grad_norm": 8.921558764996007e-05, + "learning_rate": 5.5978741612805244e-08, + "loss": 0.0, + "num_input_tokens_seen": 124603624, + "step": 184955 + }, + { + "epoch": 4.518603571690323, + "grad_norm": 0.005783631466329098, + "learning_rate": 5.5950613276775415e-08, + "loss": 0.0, + "num_input_tokens_seen": 124607016, + "step": 184960 + }, + { + "epoch": 4.51872572252217, + "grad_norm": 0.0010702416766434908, + "learning_rate": 5.5922491806126514e-08, + "loss": 0.0, + "num_input_tokens_seen": 124610344, + "step": 184965 + }, + { + "epoch": 4.518847873354018, + "grad_norm": 0.0003246046253480017, + "learning_rate": 5.589437720106327e-08, + "loss": 0.0, + "num_input_tokens_seen": 124613736, + "step": 184970 + }, + { + "epoch": 4.518970024185864, + "grad_norm": 3.850104258162901e-05, + "learning_rate": 5.586626946179007e-08, + "loss": 0.0, + "num_input_tokens_seen": 124617128, + "step": 184975 + }, + { + "epoch": 4.519092175017712, + "grad_norm": 0.00029245539917610586, + "learning_rate": 5.58381685885112e-08, + "loss": 0.0, + "num_input_tokens_seen": 124620392, + "step": 184980 + }, + { + "epoch": 4.519214325849559, + "grad_norm": 7.031484710751101e-05, + "learning_rate": 5.581007458143128e-08, + "loss": 0.0, + "num_input_tokens_seen": 124623784, + "step": 184985 + }, + { + "epoch": 4.519336476681406, + "grad_norm": 0.0002684597857296467, + "learning_rate": 5.578198744075424e-08, + "loss": 0.0, + "num_input_tokens_seen": 124627112, + "step": 184990 + }, + { + "epoch": 4.519458627513253, + "grad_norm": 0.00010617887164698914, + "learning_rate": 5.57539071666846e-08, + "loss": 0.0, + "num_input_tokens_seen": 124630376, + "step": 184995 + }, + { + "epoch": 4.519580778345101, + "grad_norm": 5.1525526941986755e-05, + "learning_rate": 5.572583375942675e-08, + "loss": 0.0, + "num_input_tokens_seen": 124633832, + "step": 185000 + }, + { + "epoch": 4.5197029291769475, + "grad_norm": 0.0009134263964369893, + "learning_rate": 5.569776721918451e-08, + "loss": 0.0, + "num_input_tokens_seen": 124637288, + "step": 185005 + }, + { + "epoch": 4.519825080008795, + "grad_norm": 0.0002533289953134954, + "learning_rate": 5.566970754616196e-08, + "loss": 0.0001, + "num_input_tokens_seen": 124640424, + "step": 185010 + }, + { + "epoch": 4.519947230840642, + "grad_norm": 0.01770016737282276, + "learning_rate": 5.564165474056337e-08, + "loss": 0.0, + "num_input_tokens_seen": 124643944, + "step": 185015 + }, + { + "epoch": 4.5200693816724895, + "grad_norm": 0.0004412032139953226, + "learning_rate": 5.5613608802592806e-08, + "loss": 0.0, + "num_input_tokens_seen": 124647528, + "step": 185020 + }, + { + "epoch": 4.520191532504336, + "grad_norm": 0.0002838908403646201, + "learning_rate": 5.558556973245387e-08, + "loss": 0.0, + "num_input_tokens_seen": 124651048, + "step": 185025 + }, + { + "epoch": 4.520313683336184, + "grad_norm": 5.34481341674109e-06, + "learning_rate": 5.555753753035064e-08, + "loss": 0.0, + "num_input_tokens_seen": 124654568, + "step": 185030 + }, + { + "epoch": 4.520435834168031, + "grad_norm": 0.0009813703363761306, + "learning_rate": 5.552951219648727e-08, + "loss": 0.0, + "num_input_tokens_seen": 124657704, + "step": 185035 + }, + { + "epoch": 4.520557984999877, + "grad_norm": 0.0004406738735269755, + "learning_rate": 5.550149373106716e-08, + "loss": 0.0, + "num_input_tokens_seen": 124661672, + "step": 185040 + }, + { + "epoch": 4.520680135831725, + "grad_norm": 0.00022535765310749412, + "learning_rate": 5.547348213429437e-08, + "loss": 0.0, + "num_input_tokens_seen": 124665128, + "step": 185045 + }, + { + "epoch": 4.520802286663573, + "grad_norm": 0.0007305620820261538, + "learning_rate": 5.544547740637229e-08, + "loss": 0.0, + "num_input_tokens_seen": 124668776, + "step": 185050 + }, + { + "epoch": 4.520924437495419, + "grad_norm": 2.776537621684838e-05, + "learning_rate": 5.5417479547504756e-08, + "loss": 0.0, + "num_input_tokens_seen": 124672488, + "step": 185055 + }, + { + "epoch": 4.521046588327266, + "grad_norm": 0.0010982746025547385, + "learning_rate": 5.538948855789549e-08, + "loss": 0.0, + "num_input_tokens_seen": 124675688, + "step": 185060 + }, + { + "epoch": 4.521168739159114, + "grad_norm": 3.9815080526750535e-05, + "learning_rate": 5.536150443774779e-08, + "loss": 0.0, + "num_input_tokens_seen": 124679016, + "step": 185065 + }, + { + "epoch": 4.5212908899909605, + "grad_norm": 0.0006533220293931663, + "learning_rate": 5.5333527187265474e-08, + "loss": 0.0, + "num_input_tokens_seen": 124682664, + "step": 185070 + }, + { + "epoch": 4.521413040822808, + "grad_norm": 0.0008570468053221703, + "learning_rate": 5.530555680665172e-08, + "loss": 0.0, + "num_input_tokens_seen": 124685544, + "step": 185075 + }, + { + "epoch": 4.521535191654655, + "grad_norm": 0.00027924918686039746, + "learning_rate": 5.5277593296110145e-08, + "loss": 0.0, + "num_input_tokens_seen": 124689384, + "step": 185080 + }, + { + "epoch": 4.5216573424865025, + "grad_norm": 1.934741339937318e-05, + "learning_rate": 5.5249636655843924e-08, + "loss": 0.0, + "num_input_tokens_seen": 124692456, + "step": 185085 + }, + { + "epoch": 4.521779493318349, + "grad_norm": 0.0005722602945752442, + "learning_rate": 5.5221686886056326e-08, + "loss": 0.0, + "num_input_tokens_seen": 124695784, + "step": 185090 + }, + { + "epoch": 4.521901644150197, + "grad_norm": 0.0003941961913369596, + "learning_rate": 5.519374398695098e-08, + "loss": 0.0, + "num_input_tokens_seen": 124699624, + "step": 185095 + }, + { + "epoch": 4.522023794982044, + "grad_norm": 4.406500738696195e-05, + "learning_rate": 5.516580795873071e-08, + "loss": 0.0713, + "num_input_tokens_seen": 124702888, + "step": 185100 + }, + { + "epoch": 4.522145945813891, + "grad_norm": 40.052284240722656, + "learning_rate": 5.513787880159892e-08, + "loss": 0.0463, + "num_input_tokens_seen": 124705960, + "step": 185105 + }, + { + "epoch": 4.522268096645738, + "grad_norm": 3.8516096537932754e-05, + "learning_rate": 5.5109956515758674e-08, + "loss": 0.0, + "num_input_tokens_seen": 124709160, + "step": 185110 + }, + { + "epoch": 4.522390247477586, + "grad_norm": 0.0001808811502996832, + "learning_rate": 5.508204110141279e-08, + "loss": 0.0, + "num_input_tokens_seen": 124712296, + "step": 185115 + }, + { + "epoch": 4.522512398309432, + "grad_norm": 0.0002272677666042, + "learning_rate": 5.505413255876457e-08, + "loss": 0.0, + "num_input_tokens_seen": 124716456, + "step": 185120 + }, + { + "epoch": 4.52263454914128, + "grad_norm": 0.0005010889144614339, + "learning_rate": 5.502623088801672e-08, + "loss": 0.0, + "num_input_tokens_seen": 124719784, + "step": 185125 + }, + { + "epoch": 4.522756699973127, + "grad_norm": 0.0003914596454706043, + "learning_rate": 5.4998336089372546e-08, + "loss": 0.0, + "num_input_tokens_seen": 124723368, + "step": 185130 + }, + { + "epoch": 4.5228788508049735, + "grad_norm": 8.751282621233258e-06, + "learning_rate": 5.497044816303442e-08, + "loss": 0.0, + "num_input_tokens_seen": 124726888, + "step": 185135 + }, + { + "epoch": 4.523001001636821, + "grad_norm": 0.00026843592058867216, + "learning_rate": 5.494256710920542e-08, + "loss": 0.0, + "num_input_tokens_seen": 124730216, + "step": 185140 + }, + { + "epoch": 4.523123152468669, + "grad_norm": 0.0016787907807156444, + "learning_rate": 5.4914692928088257e-08, + "loss": 0.0, + "num_input_tokens_seen": 124733544, + "step": 185145 + }, + { + "epoch": 4.5232453033005156, + "grad_norm": 0.000363263301551342, + "learning_rate": 5.488682561988556e-08, + "loss": 0.0, + "num_input_tokens_seen": 124736552, + "step": 185150 + }, + { + "epoch": 4.523367454132362, + "grad_norm": 1.498419078416191e-05, + "learning_rate": 5.485896518480026e-08, + "loss": 0.0, + "num_input_tokens_seen": 124739944, + "step": 185155 + }, + { + "epoch": 4.52348960496421, + "grad_norm": 0.0005063650896772742, + "learning_rate": 5.483111162303466e-08, + "loss": 0.0, + "num_input_tokens_seen": 124743144, + "step": 185160 + }, + { + "epoch": 4.523611755796057, + "grad_norm": 7.881235069362447e-05, + "learning_rate": 5.480326493479148e-08, + "loss": 0.0, + "num_input_tokens_seen": 124746856, + "step": 185165 + }, + { + "epoch": 4.523733906627904, + "grad_norm": 0.00028229813324287534, + "learning_rate": 5.477542512027311e-08, + "loss": 0.0, + "num_input_tokens_seen": 124749992, + "step": 185170 + }, + { + "epoch": 4.523856057459751, + "grad_norm": 0.0024047689512372017, + "learning_rate": 5.474759217968228e-08, + "loss": 0.0, + "num_input_tokens_seen": 124753512, + "step": 185175 + }, + { + "epoch": 4.523978208291599, + "grad_norm": 0.00021296401973813772, + "learning_rate": 5.4719766113220936e-08, + "loss": 0.0, + "num_input_tokens_seen": 124756904, + "step": 185180 + }, + { + "epoch": 4.524100359123445, + "grad_norm": 0.02283492125570774, + "learning_rate": 5.4691946921091804e-08, + "loss": 0.0, + "num_input_tokens_seen": 124759976, + "step": 185185 + }, + { + "epoch": 4.524222509955293, + "grad_norm": 0.0004252835351508111, + "learning_rate": 5.4664134603497166e-08, + "loss": 0.0, + "num_input_tokens_seen": 124763112, + "step": 185190 + }, + { + "epoch": 4.52434466078714, + "grad_norm": 3.6331872252048925e-05, + "learning_rate": 5.463632916063909e-08, + "loss": 0.0, + "num_input_tokens_seen": 124766568, + "step": 185195 + }, + { + "epoch": 4.5244668116189874, + "grad_norm": 4.691815047408454e-05, + "learning_rate": 5.460853059272008e-08, + "loss": 0.0, + "num_input_tokens_seen": 124769640, + "step": 185200 + }, + { + "epoch": 4.524588962450834, + "grad_norm": 4.6906861825846136e-05, + "learning_rate": 5.458073889994197e-08, + "loss": 0.0, + "num_input_tokens_seen": 124772904, + "step": 185205 + }, + { + "epoch": 4.524711113282682, + "grad_norm": 5.869155211257748e-05, + "learning_rate": 5.4552954082507154e-08, + "loss": 0.0, + "num_input_tokens_seen": 124776552, + "step": 185210 + }, + { + "epoch": 4.524833264114529, + "grad_norm": 0.04095650091767311, + "learning_rate": 5.452517614061736e-08, + "loss": 0.0, + "num_input_tokens_seen": 124780072, + "step": 185215 + }, + { + "epoch": 4.524955414946376, + "grad_norm": 0.08172313123941422, + "learning_rate": 5.4497405074474976e-08, + "loss": 0.0, + "num_input_tokens_seen": 124783208, + "step": 185220 + }, + { + "epoch": 4.525077565778223, + "grad_norm": 0.00012791799963451922, + "learning_rate": 5.446964088428174e-08, + "loss": 0.0, + "num_input_tokens_seen": 124786408, + "step": 185225 + }, + { + "epoch": 4.52519971661007, + "grad_norm": 0.0001636029628571123, + "learning_rate": 5.444188357023938e-08, + "loss": 0.0, + "num_input_tokens_seen": 124789800, + "step": 185230 + }, + { + "epoch": 4.525321867441917, + "grad_norm": 9.43487902986817e-05, + "learning_rate": 5.441413313255028e-08, + "loss": 0.0, + "num_input_tokens_seen": 124793192, + "step": 185235 + }, + { + "epoch": 4.525444018273764, + "grad_norm": 0.0008830385049805045, + "learning_rate": 5.4386389571415616e-08, + "loss": 0.0, + "num_input_tokens_seen": 124796392, + "step": 185240 + }, + { + "epoch": 4.525566169105612, + "grad_norm": 3.347361052874476e-05, + "learning_rate": 5.435865288703756e-08, + "loss": 0.0, + "num_input_tokens_seen": 124799592, + "step": 185245 + }, + { + "epoch": 4.5256883199374585, + "grad_norm": 1.570507993164938e-05, + "learning_rate": 5.433092307961784e-08, + "loss": 0.0, + "num_input_tokens_seen": 124802472, + "step": 185250 + }, + { + "epoch": 4.525810470769306, + "grad_norm": 0.000179529408342205, + "learning_rate": 5.4303200149357966e-08, + "loss": 0.0, + "num_input_tokens_seen": 124805480, + "step": 185255 + }, + { + "epoch": 4.525932621601153, + "grad_norm": 0.005788368638604879, + "learning_rate": 5.4275484096459546e-08, + "loss": 0.0, + "num_input_tokens_seen": 124809896, + "step": 185260 + }, + { + "epoch": 4.5260547724330005, + "grad_norm": 1.500210419180803e-05, + "learning_rate": 5.42477749211242e-08, + "loss": 0.0, + "num_input_tokens_seen": 124813672, + "step": 185265 + }, + { + "epoch": 4.526176923264847, + "grad_norm": 0.00018123764311894774, + "learning_rate": 5.422007262355344e-08, + "loss": 0.0, + "num_input_tokens_seen": 124816616, + "step": 185270 + }, + { + "epoch": 4.526299074096695, + "grad_norm": 0.0005241407197900116, + "learning_rate": 5.419237720394865e-08, + "loss": 0.0, + "num_input_tokens_seen": 124820456, + "step": 185275 + }, + { + "epoch": 4.526421224928542, + "grad_norm": 0.00012486950436141342, + "learning_rate": 5.416468866251123e-08, + "loss": 0.0, + "num_input_tokens_seen": 124823656, + "step": 185280 + }, + { + "epoch": 4.526543375760389, + "grad_norm": 0.0008724026847630739, + "learning_rate": 5.413700699944268e-08, + "loss": 0.0305, + "num_input_tokens_seen": 124826792, + "step": 185285 + }, + { + "epoch": 4.526665526592236, + "grad_norm": 0.10582155734300613, + "learning_rate": 5.4109332214944184e-08, + "loss": 0.0001, + "num_input_tokens_seen": 124829928, + "step": 185290 + }, + { + "epoch": 4.526787677424084, + "grad_norm": 0.00011101571726612747, + "learning_rate": 5.4081664309217126e-08, + "loss": 0.0, + "num_input_tokens_seen": 124832808, + "step": 185295 + }, + { + "epoch": 4.52690982825593, + "grad_norm": 6.935147393960506e-05, + "learning_rate": 5.405400328246246e-08, + "loss": 0.0, + "num_input_tokens_seen": 124835944, + "step": 185300 + }, + { + "epoch": 4.527031979087777, + "grad_norm": 0.0004977515200152993, + "learning_rate": 5.402634913488158e-08, + "loss": 0.0, + "num_input_tokens_seen": 124839656, + "step": 185305 + }, + { + "epoch": 4.527154129919625, + "grad_norm": 0.0001641338167246431, + "learning_rate": 5.399870186667554e-08, + "loss": 0.0, + "num_input_tokens_seen": 124842728, + "step": 185310 + }, + { + "epoch": 4.527276280751472, + "grad_norm": 0.00629672734066844, + "learning_rate": 5.3971061478045533e-08, + "loss": 0.0, + "num_input_tokens_seen": 124845864, + "step": 185315 + }, + { + "epoch": 4.527398431583319, + "grad_norm": 0.0005530017078854144, + "learning_rate": 5.3943427969192154e-08, + "loss": 0.0, + "num_input_tokens_seen": 124849512, + "step": 185320 + }, + { + "epoch": 4.527520582415166, + "grad_norm": 0.006828108802437782, + "learning_rate": 5.391580134031681e-08, + "loss": 0.0, + "num_input_tokens_seen": 124852904, + "step": 185325 + }, + { + "epoch": 4.5276427332470135, + "grad_norm": 0.033004239201545715, + "learning_rate": 5.388818159162034e-08, + "loss": 0.0, + "num_input_tokens_seen": 124856552, + "step": 185330 + }, + { + "epoch": 4.52776488407886, + "grad_norm": 0.00016071656136773527, + "learning_rate": 5.386056872330325e-08, + "loss": 0.0, + "num_input_tokens_seen": 124859880, + "step": 185335 + }, + { + "epoch": 4.527887034910708, + "grad_norm": 0.006571914069354534, + "learning_rate": 5.383296273556648e-08, + "loss": 0.0072, + "num_input_tokens_seen": 124862952, + "step": 185340 + }, + { + "epoch": 4.528009185742555, + "grad_norm": 7.050389103824273e-05, + "learning_rate": 5.380536362861121e-08, + "loss": 0.0, + "num_input_tokens_seen": 124866024, + "step": 185345 + }, + { + "epoch": 4.528131336574402, + "grad_norm": 2.7005646188626997e-05, + "learning_rate": 5.377777140263762e-08, + "loss": 0.0, + "num_input_tokens_seen": 124869672, + "step": 185350 + }, + { + "epoch": 4.528253487406249, + "grad_norm": 0.0005079191760160029, + "learning_rate": 5.375018605784665e-08, + "loss": 0.0, + "num_input_tokens_seen": 124873064, + "step": 185355 + }, + { + "epoch": 4.528375638238097, + "grad_norm": 0.00022228542366065085, + "learning_rate": 5.372260759443881e-08, + "loss": 0.0, + "num_input_tokens_seen": 124876136, + "step": 185360 + }, + { + "epoch": 4.528497789069943, + "grad_norm": 0.0002706579689402133, + "learning_rate": 5.36950360126146e-08, + "loss": 0.0, + "num_input_tokens_seen": 124879592, + "step": 185365 + }, + { + "epoch": 4.528619939901791, + "grad_norm": 2.9045746487099677e-05, + "learning_rate": 5.3667471312574766e-08, + "loss": 0.0, + "num_input_tokens_seen": 124883048, + "step": 185370 + }, + { + "epoch": 4.528742090733638, + "grad_norm": 0.00029516901122406125, + "learning_rate": 5.363991349451957e-08, + "loss": 0.0, + "num_input_tokens_seen": 124887080, + "step": 185375 + }, + { + "epoch": 4.528864241565485, + "grad_norm": 0.000799721572548151, + "learning_rate": 5.3612362558649536e-08, + "loss": 0.0, + "num_input_tokens_seen": 124890152, + "step": 185380 + }, + { + "epoch": 4.528986392397332, + "grad_norm": 0.000487153185531497, + "learning_rate": 5.358481850516483e-08, + "loss": 0.0, + "num_input_tokens_seen": 124893096, + "step": 185385 + }, + { + "epoch": 4.52910854322918, + "grad_norm": 0.0011373942252248526, + "learning_rate": 5.3557281334265957e-08, + "loss": 0.0, + "num_input_tokens_seen": 124896360, + "step": 185390 + }, + { + "epoch": 4.5292306940610265, + "grad_norm": 0.0003631273575592786, + "learning_rate": 5.352975104615298e-08, + "loss": 0.0489, + "num_input_tokens_seen": 124899560, + "step": 185395 + }, + { + "epoch": 4.529352844892873, + "grad_norm": 0.014406845904886723, + "learning_rate": 5.35022276410263e-08, + "loss": 0.0, + "num_input_tokens_seen": 124903144, + "step": 185400 + }, + { + "epoch": 4.529474995724721, + "grad_norm": 0.0010270047932863235, + "learning_rate": 5.347471111908608e-08, + "loss": 0.0, + "num_input_tokens_seen": 124906280, + "step": 185405 + }, + { + "epoch": 4.5295971465565685, + "grad_norm": 0.0006817655521444976, + "learning_rate": 5.3447201480532164e-08, + "loss": 0.0, + "num_input_tokens_seen": 124909800, + "step": 185410 + }, + { + "epoch": 4.529719297388415, + "grad_norm": 0.00252143875695765, + "learning_rate": 5.3419698725564956e-08, + "loss": 0.0, + "num_input_tokens_seen": 124912936, + "step": 185415 + }, + { + "epoch": 4.529841448220262, + "grad_norm": 7.715379615547135e-05, + "learning_rate": 5.3392202854384284e-08, + "loss": 0.0, + "num_input_tokens_seen": 124916264, + "step": 185420 + }, + { + "epoch": 4.52996359905211, + "grad_norm": 0.0008216107380576432, + "learning_rate": 5.3364713867189995e-08, + "loss": 0.0, + "num_input_tokens_seen": 124919208, + "step": 185425 + }, + { + "epoch": 4.530085749883956, + "grad_norm": 2.122437217622064e-05, + "learning_rate": 5.3337231764182366e-08, + "loss": 0.0, + "num_input_tokens_seen": 124922664, + "step": 185430 + }, + { + "epoch": 4.530207900715804, + "grad_norm": 0.00047716827248223126, + "learning_rate": 5.3309756545560694e-08, + "loss": 0.0, + "num_input_tokens_seen": 124926056, + "step": 185435 + }, + { + "epoch": 4.530330051547651, + "grad_norm": 0.00011559919221326709, + "learning_rate": 5.328228821152536e-08, + "loss": 0.0, + "num_input_tokens_seen": 124929576, + "step": 185440 + }, + { + "epoch": 4.530452202379498, + "grad_norm": 0.0006488763028755784, + "learning_rate": 5.325482676227566e-08, + "loss": 0.0, + "num_input_tokens_seen": 124932584, + "step": 185445 + }, + { + "epoch": 4.530574353211345, + "grad_norm": 0.00010975029726978391, + "learning_rate": 5.3227372198011657e-08, + "loss": 0.0, + "num_input_tokens_seen": 124935976, + "step": 185450 + }, + { + "epoch": 4.530696504043193, + "grad_norm": 7.937142072478309e-05, + "learning_rate": 5.319992451893274e-08, + "loss": 0.0, + "num_input_tokens_seen": 124939240, + "step": 185455 + }, + { + "epoch": 4.5308186548750395, + "grad_norm": 0.00013056962052360177, + "learning_rate": 5.3172483725238635e-08, + "loss": 0.0, + "num_input_tokens_seen": 124942312, + "step": 185460 + }, + { + "epoch": 4.530940805706887, + "grad_norm": 0.0003446988412179053, + "learning_rate": 5.3145049817128975e-08, + "loss": 0.0, + "num_input_tokens_seen": 124946600, + "step": 185465 + }, + { + "epoch": 4.531062956538734, + "grad_norm": 0.00012127536319894716, + "learning_rate": 5.311762279480314e-08, + "loss": 0.0, + "num_input_tokens_seen": 124949928, + "step": 185470 + }, + { + "epoch": 4.5311851073705816, + "grad_norm": 0.0005341236828826368, + "learning_rate": 5.309020265846076e-08, + "loss": 0.0, + "num_input_tokens_seen": 124953128, + "step": 185475 + }, + { + "epoch": 4.531307258202428, + "grad_norm": 0.0023311295080929995, + "learning_rate": 5.306278940830089e-08, + "loss": 0.0, + "num_input_tokens_seen": 124956712, + "step": 185480 + }, + { + "epoch": 4.531429409034276, + "grad_norm": 0.0015459018759429455, + "learning_rate": 5.3035383044523266e-08, + "loss": 0.0, + "num_input_tokens_seen": 124960552, + "step": 185485 + }, + { + "epoch": 4.531551559866123, + "grad_norm": 4.727648411062546e-05, + "learning_rate": 5.3007983567326943e-08, + "loss": 0.0, + "num_input_tokens_seen": 124963816, + "step": 185490 + }, + { + "epoch": 4.531673710697969, + "grad_norm": 0.0004621119878720492, + "learning_rate": 5.298059097691132e-08, + "loss": 0.0, + "num_input_tokens_seen": 124967080, + "step": 185495 + }, + { + "epoch": 4.531795861529817, + "grad_norm": 0.0013261305866762996, + "learning_rate": 5.295320527347558e-08, + "loss": 0.0, + "num_input_tokens_seen": 124970856, + "step": 185500 + }, + { + "epoch": 4.531918012361664, + "grad_norm": 6.2955332396086305e-06, + "learning_rate": 5.292582645721877e-08, + "loss": 0.0, + "num_input_tokens_seen": 124975208, + "step": 185505 + }, + { + "epoch": 4.532040163193511, + "grad_norm": 2.9312808692338876e-05, + "learning_rate": 5.2898454528340296e-08, + "loss": 0.0, + "num_input_tokens_seen": 124980648, + "step": 185510 + }, + { + "epoch": 4.532162314025358, + "grad_norm": 0.00013647187734022737, + "learning_rate": 5.287108948703878e-08, + "loss": 0.0254, + "num_input_tokens_seen": 124984168, + "step": 185515 + }, + { + "epoch": 4.532284464857206, + "grad_norm": 0.000533355341758579, + "learning_rate": 5.284373133351361e-08, + "loss": 0.0, + "num_input_tokens_seen": 124987816, + "step": 185520 + }, + { + "epoch": 4.532406615689053, + "grad_norm": 2.2797685232944787e-05, + "learning_rate": 5.2816380067963406e-08, + "loss": 0.0, + "num_input_tokens_seen": 124991144, + "step": 185525 + }, + { + "epoch": 4.5325287665209, + "grad_norm": 0.00010959566134260967, + "learning_rate": 5.278903569058735e-08, + "loss": 0.0, + "num_input_tokens_seen": 124994344, + "step": 185530 + }, + { + "epoch": 4.532650917352747, + "grad_norm": 0.0015156366862356663, + "learning_rate": 5.276169820158427e-08, + "loss": 0.0, + "num_input_tokens_seen": 124997288, + "step": 185535 + }, + { + "epoch": 4.532773068184595, + "grad_norm": 0.0022079667542129755, + "learning_rate": 5.27343676011528e-08, + "loss": 0.0, + "num_input_tokens_seen": 125001192, + "step": 185540 + }, + { + "epoch": 4.532895219016441, + "grad_norm": 0.022384779527783394, + "learning_rate": 5.270704388949188e-08, + "loss": 0.0, + "num_input_tokens_seen": 125004456, + "step": 185545 + }, + { + "epoch": 4.533017369848289, + "grad_norm": 0.0008203625911846757, + "learning_rate": 5.2679727066799905e-08, + "loss": 0.0, + "num_input_tokens_seen": 125008040, + "step": 185550 + }, + { + "epoch": 4.533139520680136, + "grad_norm": 6.327245500870049e-05, + "learning_rate": 5.265241713327584e-08, + "loss": 0.0, + "num_input_tokens_seen": 125011496, + "step": 185555 + }, + { + "epoch": 4.533261671511983, + "grad_norm": 0.00014428046415559947, + "learning_rate": 5.262511408911841e-08, + "loss": 0.0, + "num_input_tokens_seen": 125015016, + "step": 185560 + }, + { + "epoch": 4.53338382234383, + "grad_norm": 0.0006246797856874764, + "learning_rate": 5.2597817934525776e-08, + "loss": 0.0, + "num_input_tokens_seen": 125018408, + "step": 185565 + }, + { + "epoch": 4.533505973175677, + "grad_norm": 3.378166729817167e-05, + "learning_rate": 5.257052866969669e-08, + "loss": 0.0, + "num_input_tokens_seen": 125021992, + "step": 185570 + }, + { + "epoch": 4.5336281240075245, + "grad_norm": 0.004872548393905163, + "learning_rate": 5.2543246294829426e-08, + "loss": 0.0, + "num_input_tokens_seen": 125025384, + "step": 185575 + }, + { + "epoch": 4.533750274839372, + "grad_norm": 0.0018695322796702385, + "learning_rate": 5.2515970810122715e-08, + "loss": 0.0, + "num_input_tokens_seen": 125028712, + "step": 185580 + }, + { + "epoch": 4.533872425671219, + "grad_norm": 9.729518205858767e-05, + "learning_rate": 5.248870221577451e-08, + "loss": 0.0, + "num_input_tokens_seen": 125031848, + "step": 185585 + }, + { + "epoch": 4.533994576503066, + "grad_norm": 0.0005519227706827223, + "learning_rate": 5.2461440511983424e-08, + "loss": 0.0, + "num_input_tokens_seen": 125034984, + "step": 185590 + }, + { + "epoch": 4.534116727334913, + "grad_norm": 0.00024730851873755455, + "learning_rate": 5.243418569894764e-08, + "loss": 0.0, + "num_input_tokens_seen": 125038248, + "step": 185595 + }, + { + "epoch": 4.53423887816676, + "grad_norm": 0.0003327416779939085, + "learning_rate": 5.2406937776865225e-08, + "loss": 0.0, + "num_input_tokens_seen": 125041640, + "step": 185600 + }, + { + "epoch": 4.534361028998608, + "grad_norm": 0.00013707105244975537, + "learning_rate": 5.2379696745934455e-08, + "loss": 0.0, + "num_input_tokens_seen": 125044776, + "step": 185605 + }, + { + "epoch": 4.534483179830454, + "grad_norm": 0.0006666643312200904, + "learning_rate": 5.23524626063534e-08, + "loss": 0.0003, + "num_input_tokens_seen": 125047912, + "step": 185610 + }, + { + "epoch": 4.534605330662302, + "grad_norm": 0.0019504765514284372, + "learning_rate": 5.232523535832012e-08, + "loss": 0.0, + "num_input_tokens_seen": 125051496, + "step": 185615 + }, + { + "epoch": 4.534727481494149, + "grad_norm": 0.00022360155708156526, + "learning_rate": 5.229801500203268e-08, + "loss": 0.0, + "num_input_tokens_seen": 125054888, + "step": 185620 + }, + { + "epoch": 4.534849632325996, + "grad_norm": 0.00042555946856737137, + "learning_rate": 5.2270801537689035e-08, + "loss": 0.0, + "num_input_tokens_seen": 125058536, + "step": 185625 + }, + { + "epoch": 4.534971783157843, + "grad_norm": 0.004613831639289856, + "learning_rate": 5.2243594965486916e-08, + "loss": 0.0, + "num_input_tokens_seen": 125061928, + "step": 185630 + }, + { + "epoch": 4.535093933989691, + "grad_norm": 0.0004901207285001874, + "learning_rate": 5.221639528562438e-08, + "loss": 0.0, + "num_input_tokens_seen": 125065704, + "step": 185635 + }, + { + "epoch": 4.5352160848215375, + "grad_norm": 8.990954665932804e-05, + "learning_rate": 5.218920249829906e-08, + "loss": 0.0, + "num_input_tokens_seen": 125068968, + "step": 185640 + }, + { + "epoch": 4.535338235653385, + "grad_norm": 0.0013385992497205734, + "learning_rate": 5.216201660370878e-08, + "loss": 0.0, + "num_input_tokens_seen": 125072168, + "step": 185645 + }, + { + "epoch": 4.535460386485232, + "grad_norm": 0.0028935212176293135, + "learning_rate": 5.2134837602051174e-08, + "loss": 0.0, + "num_input_tokens_seen": 125076008, + "step": 185650 + }, + { + "epoch": 4.5355825373170795, + "grad_norm": 0.7927491068840027, + "learning_rate": 5.210766549352419e-08, + "loss": 0.0, + "num_input_tokens_seen": 125078888, + "step": 185655 + }, + { + "epoch": 4.535704688148926, + "grad_norm": 0.00037081216578371823, + "learning_rate": 5.2080500278325e-08, + "loss": 0.0, + "num_input_tokens_seen": 125082152, + "step": 185660 + }, + { + "epoch": 4.535826838980773, + "grad_norm": 0.00030138157308101654, + "learning_rate": 5.2053341956651566e-08, + "loss": 0.0, + "num_input_tokens_seen": 125085864, + "step": 185665 + }, + { + "epoch": 4.535948989812621, + "grad_norm": 0.0005100808339193463, + "learning_rate": 5.202619052870105e-08, + "loss": 0.0, + "num_input_tokens_seen": 125089256, + "step": 185670 + }, + { + "epoch": 4.536071140644468, + "grad_norm": 0.00035853905137628317, + "learning_rate": 5.19990459946712e-08, + "loss": 0.0, + "num_input_tokens_seen": 125092968, + "step": 185675 + }, + { + "epoch": 4.536193291476315, + "grad_norm": 2.2261907361098565e-05, + "learning_rate": 5.1971908354759065e-08, + "loss": 0.0, + "num_input_tokens_seen": 125096232, + "step": 185680 + }, + { + "epoch": 4.536315442308162, + "grad_norm": 0.0009718828368932009, + "learning_rate": 5.194477760916227e-08, + "loss": 0.0, + "num_input_tokens_seen": 125099880, + "step": 185685 + }, + { + "epoch": 4.536437593140009, + "grad_norm": 14.48056697845459, + "learning_rate": 5.1917653758078216e-08, + "loss": 0.0256, + "num_input_tokens_seen": 125103080, + "step": 185690 + }, + { + "epoch": 4.536559743971856, + "grad_norm": 0.00042473600478842854, + "learning_rate": 5.189053680170374e-08, + "loss": 0.0, + "num_input_tokens_seen": 125106856, + "step": 185695 + }, + { + "epoch": 4.536681894803704, + "grad_norm": 0.027816886082291603, + "learning_rate": 5.186342674023647e-08, + "loss": 0.0, + "num_input_tokens_seen": 125110248, + "step": 185700 + }, + { + "epoch": 4.5368040456355505, + "grad_norm": 0.0033893415238708258, + "learning_rate": 5.1836323573873354e-08, + "loss": 0.0, + "num_input_tokens_seen": 125113256, + "step": 185705 + }, + { + "epoch": 4.536926196467398, + "grad_norm": 1.5602147579193115, + "learning_rate": 5.180922730281134e-08, + "loss": 0.0006, + "num_input_tokens_seen": 125116392, + "step": 185710 + }, + { + "epoch": 4.537048347299245, + "grad_norm": 0.0005292331916280091, + "learning_rate": 5.178213792724795e-08, + "loss": 0.0, + "num_input_tokens_seen": 125119592, + "step": 185715 + }, + { + "epoch": 4.5371704981310925, + "grad_norm": 0.0013334914110600948, + "learning_rate": 5.175505544737968e-08, + "loss": 0.0, + "num_input_tokens_seen": 125122856, + "step": 185720 + }, + { + "epoch": 4.537292648962939, + "grad_norm": 0.0020089801400899887, + "learning_rate": 5.1727979863403826e-08, + "loss": 0.0, + "num_input_tokens_seen": 125126312, + "step": 185725 + }, + { + "epoch": 4.537414799794787, + "grad_norm": 0.00014795052993576974, + "learning_rate": 5.1700911175517114e-08, + "loss": 0.0474, + "num_input_tokens_seen": 125129448, + "step": 185730 + }, + { + "epoch": 4.537536950626634, + "grad_norm": 0.00015690297004766762, + "learning_rate": 5.167384938391639e-08, + "loss": 0.0, + "num_input_tokens_seen": 125133096, + "step": 185735 + }, + { + "epoch": 4.537659101458481, + "grad_norm": 0.005538203753530979, + "learning_rate": 5.1646794488798606e-08, + "loss": 0.0, + "num_input_tokens_seen": 125136424, + "step": 185740 + }, + { + "epoch": 4.537781252290328, + "grad_norm": 0.0001602565753273666, + "learning_rate": 5.161974649036027e-08, + "loss": 0.0, + "num_input_tokens_seen": 125139880, + "step": 185745 + }, + { + "epoch": 4.537903403122176, + "grad_norm": 0.00017262480105273426, + "learning_rate": 5.159270538879834e-08, + "loss": 0.0, + "num_input_tokens_seen": 125143400, + "step": 185750 + }, + { + "epoch": 4.538025553954022, + "grad_norm": 0.0003744983405340463, + "learning_rate": 5.156567118430921e-08, + "loss": 0.0, + "num_input_tokens_seen": 125146856, + "step": 185755 + }, + { + "epoch": 4.538147704785869, + "grad_norm": 5.3543702961178496e-05, + "learning_rate": 5.1538643877089724e-08, + "loss": 0.0, + "num_input_tokens_seen": 125150120, + "step": 185760 + }, + { + "epoch": 4.538269855617717, + "grad_norm": 0.00017326330998912454, + "learning_rate": 5.151162346733629e-08, + "loss": 0.0, + "num_input_tokens_seen": 125153000, + "step": 185765 + }, + { + "epoch": 4.538392006449564, + "grad_norm": 0.0007275182870216668, + "learning_rate": 5.1484609955245395e-08, + "loss": 0.0, + "num_input_tokens_seen": 125156584, + "step": 185770 + }, + { + "epoch": 4.538514157281411, + "grad_norm": 0.00010956819460261613, + "learning_rate": 5.145760334101368e-08, + "loss": 0.0, + "num_input_tokens_seen": 125159848, + "step": 185775 + }, + { + "epoch": 4.538636308113258, + "grad_norm": 0.00023068742302712053, + "learning_rate": 5.14306036248372e-08, + "loss": 0.0, + "num_input_tokens_seen": 125163240, + "step": 185780 + }, + { + "epoch": 4.5387584589451055, + "grad_norm": 0.0010408993111923337, + "learning_rate": 5.140361080691269e-08, + "loss": 0.0001, + "num_input_tokens_seen": 125166440, + "step": 185785 + }, + { + "epoch": 4.538880609776952, + "grad_norm": 0.0009393574437126517, + "learning_rate": 5.1376624887436105e-08, + "loss": 0.0, + "num_input_tokens_seen": 125169576, + "step": 185790 + }, + { + "epoch": 4.5390027606088, + "grad_norm": 0.0018601644551381469, + "learning_rate": 5.134964586660406e-08, + "loss": 0.0, + "num_input_tokens_seen": 125173032, + "step": 185795 + }, + { + "epoch": 4.539124911440647, + "grad_norm": 0.0002813244063872844, + "learning_rate": 5.13226737446123e-08, + "loss": 0.0, + "num_input_tokens_seen": 125176872, + "step": 185800 + }, + { + "epoch": 4.539247062272494, + "grad_norm": 0.0007881836500018835, + "learning_rate": 5.129570852165732e-08, + "loss": 0.0, + "num_input_tokens_seen": 125179880, + "step": 185805 + }, + { + "epoch": 4.539369213104341, + "grad_norm": 0.007888463325798512, + "learning_rate": 5.1268750197935196e-08, + "loss": 0.0, + "num_input_tokens_seen": 125183848, + "step": 185810 + }, + { + "epoch": 4.539491363936189, + "grad_norm": 0.0018474315293133259, + "learning_rate": 5.124179877364176e-08, + "loss": 0.0, + "num_input_tokens_seen": 125187304, + "step": 185815 + }, + { + "epoch": 4.539613514768035, + "grad_norm": 8.314439764944836e-05, + "learning_rate": 5.1214854248973316e-08, + "loss": 0.0, + "num_input_tokens_seen": 125191400, + "step": 185820 + }, + { + "epoch": 4.539735665599883, + "grad_norm": 1.6251518900389783e-05, + "learning_rate": 5.118791662412558e-08, + "loss": 0.0, + "num_input_tokens_seen": 125195432, + "step": 185825 + }, + { + "epoch": 4.53985781643173, + "grad_norm": 0.004052783828228712, + "learning_rate": 5.116098589929452e-08, + "loss": 0.0, + "num_input_tokens_seen": 125198888, + "step": 185830 + }, + { + "epoch": 4.539979967263577, + "grad_norm": 0.00011836458725156263, + "learning_rate": 5.1134062074675966e-08, + "loss": 0.0, + "num_input_tokens_seen": 125202536, + "step": 185835 + }, + { + "epoch": 4.540102118095424, + "grad_norm": 3.898305294569582e-05, + "learning_rate": 5.110714515046577e-08, + "loss": 0.0, + "num_input_tokens_seen": 125205672, + "step": 185840 + }, + { + "epoch": 4.540224268927272, + "grad_norm": 0.00021784893760923296, + "learning_rate": 5.108023512685966e-08, + "loss": 0.0, + "num_input_tokens_seen": 125209128, + "step": 185845 + }, + { + "epoch": 4.540346419759119, + "grad_norm": 9.264857362722978e-05, + "learning_rate": 5.105333200405315e-08, + "loss": 0.0, + "num_input_tokens_seen": 125212200, + "step": 185850 + }, + { + "epoch": 4.540468570590965, + "grad_norm": 0.0009690878796391189, + "learning_rate": 5.102643578224219e-08, + "loss": 0.0, + "num_input_tokens_seen": 125216296, + "step": 185855 + }, + { + "epoch": 4.540590721422813, + "grad_norm": 0.01024213619530201, + "learning_rate": 5.099954646162208e-08, + "loss": 0.0, + "num_input_tokens_seen": 125219368, + "step": 185860 + }, + { + "epoch": 4.54071287225466, + "grad_norm": 0.000201819246285595, + "learning_rate": 5.0972664042388534e-08, + "loss": 0.0, + "num_input_tokens_seen": 125222696, + "step": 185865 + }, + { + "epoch": 4.540835023086507, + "grad_norm": 0.0005003840196877718, + "learning_rate": 5.0945788524737186e-08, + "loss": 0.0, + "num_input_tokens_seen": 125226024, + "step": 185870 + }, + { + "epoch": 4.540957173918354, + "grad_norm": 0.004579643253237009, + "learning_rate": 5.0918919908863214e-08, + "loss": 0.0, + "num_input_tokens_seen": 125229224, + "step": 185875 + }, + { + "epoch": 4.541079324750202, + "grad_norm": 0.002748160855844617, + "learning_rate": 5.089205819496223e-08, + "loss": 0.0, + "num_input_tokens_seen": 125232616, + "step": 185880 + }, + { + "epoch": 4.5412014755820485, + "grad_norm": 0.0002458605740685016, + "learning_rate": 5.0865203383229305e-08, + "loss": 0.0402, + "num_input_tokens_seen": 125235560, + "step": 185885 + }, + { + "epoch": 4.541323626413896, + "grad_norm": 9.4459333922714e-05, + "learning_rate": 5.0838355473860174e-08, + "loss": 0.0, + "num_input_tokens_seen": 125239272, + "step": 185890 + }, + { + "epoch": 4.541445777245743, + "grad_norm": 0.00036171413375996053, + "learning_rate": 5.081151446704956e-08, + "loss": 0.0, + "num_input_tokens_seen": 125242600, + "step": 185895 + }, + { + "epoch": 4.5415679280775905, + "grad_norm": 0.0018198946490883827, + "learning_rate": 5.0784680362992884e-08, + "loss": 0.0, + "num_input_tokens_seen": 125246056, + "step": 185900 + }, + { + "epoch": 4.541690078909437, + "grad_norm": 0.0005793093587271869, + "learning_rate": 5.075785316188552e-08, + "loss": 0.0, + "num_input_tokens_seen": 125249448, + "step": 185905 + }, + { + "epoch": 4.541812229741285, + "grad_norm": 5.1052174967480823e-05, + "learning_rate": 5.073103286392222e-08, + "loss": 0.0, + "num_input_tokens_seen": 125252776, + "step": 185910 + }, + { + "epoch": 4.541934380573132, + "grad_norm": 3.680026202346198e-05, + "learning_rate": 5.070421946929837e-08, + "loss": 0.0, + "num_input_tokens_seen": 125255976, + "step": 185915 + }, + { + "epoch": 4.542056531404979, + "grad_norm": 3.232800372643396e-05, + "learning_rate": 5.06774129782086e-08, + "loss": 0.0, + "num_input_tokens_seen": 125259240, + "step": 185920 + }, + { + "epoch": 4.542178682236826, + "grad_norm": 0.00017554397345520556, + "learning_rate": 5.0650613390847975e-08, + "loss": 0.0, + "num_input_tokens_seen": 125262632, + "step": 185925 + }, + { + "epoch": 4.542300833068673, + "grad_norm": 0.002945692278444767, + "learning_rate": 5.0623820707411556e-08, + "loss": 0.0, + "num_input_tokens_seen": 125265448, + "step": 185930 + }, + { + "epoch": 4.54242298390052, + "grad_norm": 0.002603281522169709, + "learning_rate": 5.0597034928094084e-08, + "loss": 0.0, + "num_input_tokens_seen": 125268904, + "step": 185935 + }, + { + "epoch": 4.542545134732368, + "grad_norm": 1.2701857485808432e-05, + "learning_rate": 5.057025605309029e-08, + "loss": 0.0, + "num_input_tokens_seen": 125272424, + "step": 185940 + }, + { + "epoch": 4.542667285564215, + "grad_norm": 8.991216600406915e-05, + "learning_rate": 5.054348408259501e-08, + "loss": 0.0, + "num_input_tokens_seen": 125275624, + "step": 185945 + }, + { + "epoch": 4.5427894363960615, + "grad_norm": 0.00012111781688872725, + "learning_rate": 5.051671901680288e-08, + "loss": 0.0, + "num_input_tokens_seen": 125279080, + "step": 185950 + }, + { + "epoch": 4.542911587227909, + "grad_norm": 0.003195281373336911, + "learning_rate": 5.0489960855908395e-08, + "loss": 0.0, + "num_input_tokens_seen": 125282344, + "step": 185955 + }, + { + "epoch": 4.543033738059756, + "grad_norm": 0.0038429235573858023, + "learning_rate": 5.04632096001063e-08, + "loss": 0.0, + "num_input_tokens_seen": 125285608, + "step": 185960 + }, + { + "epoch": 4.5431558888916035, + "grad_norm": 1.9571771190385334e-05, + "learning_rate": 5.043646524959133e-08, + "loss": 0.0, + "num_input_tokens_seen": 125289640, + "step": 185965 + }, + { + "epoch": 4.54327803972345, + "grad_norm": 0.00022896616428624839, + "learning_rate": 5.0409727804557655e-08, + "loss": 0.0, + "num_input_tokens_seen": 125292968, + "step": 185970 + }, + { + "epoch": 4.543400190555298, + "grad_norm": 0.00019275565864518285, + "learning_rate": 5.03829972651999e-08, + "loss": 0.0, + "num_input_tokens_seen": 125296360, + "step": 185975 + }, + { + "epoch": 4.543522341387145, + "grad_norm": 0.0001385089854011312, + "learning_rate": 5.0356273631712357e-08, + "loss": 0.0, + "num_input_tokens_seen": 125299624, + "step": 185980 + }, + { + "epoch": 4.543644492218992, + "grad_norm": 0.00014744758664164692, + "learning_rate": 5.032955690428953e-08, + "loss": 0.0, + "num_input_tokens_seen": 125302888, + "step": 185985 + }, + { + "epoch": 4.543766643050839, + "grad_norm": 0.0001173034543171525, + "learning_rate": 5.030284708312549e-08, + "loss": 0.0, + "num_input_tokens_seen": 125306024, + "step": 185990 + }, + { + "epoch": 4.543888793882687, + "grad_norm": 0.00016871007392182946, + "learning_rate": 5.027614416841453e-08, + "loss": 0.0, + "num_input_tokens_seen": 125309352, + "step": 185995 + }, + { + "epoch": 4.544010944714533, + "grad_norm": 0.0008024271228350699, + "learning_rate": 5.024944816035104e-08, + "loss": 0.0, + "num_input_tokens_seen": 125312680, + "step": 186000 + }, + { + "epoch": 4.544133095546381, + "grad_norm": 0.013525069691240788, + "learning_rate": 5.0222759059128874e-08, + "loss": 0.0, + "num_input_tokens_seen": 125315752, + "step": 186005 + }, + { + "epoch": 4.544255246378228, + "grad_norm": 0.0012057870626449585, + "learning_rate": 5.0196076864942426e-08, + "loss": 0.0, + "num_input_tokens_seen": 125318760, + "step": 186010 + }, + { + "epoch": 4.544377397210075, + "grad_norm": 0.00020089205645490438, + "learning_rate": 5.0169401577985435e-08, + "loss": 0.0, + "num_input_tokens_seen": 125321832, + "step": 186015 + }, + { + "epoch": 4.544499548041922, + "grad_norm": 0.0002571505028754473, + "learning_rate": 5.014273319845197e-08, + "loss": 0.0, + "num_input_tokens_seen": 125324968, + "step": 186020 + }, + { + "epoch": 4.544621698873769, + "grad_norm": 6.082994514144957e-05, + "learning_rate": 5.01160717265362e-08, + "loss": 0.0, + "num_input_tokens_seen": 125328488, + "step": 186025 + }, + { + "epoch": 4.5447438497056165, + "grad_norm": 0.009646477177739143, + "learning_rate": 5.008941716243176e-08, + "loss": 0.0, + "num_input_tokens_seen": 125332520, + "step": 186030 + }, + { + "epoch": 4.544866000537464, + "grad_norm": 0.00025338766863569617, + "learning_rate": 5.0062769506332704e-08, + "loss": 0.0, + "num_input_tokens_seen": 125335656, + "step": 186035 + }, + { + "epoch": 4.544988151369311, + "grad_norm": 2.7880143534275703e-05, + "learning_rate": 5.003612875843266e-08, + "loss": 0.0, + "num_input_tokens_seen": 125338920, + "step": 186040 + }, + { + "epoch": 4.545110302201158, + "grad_norm": 0.0014782889047637582, + "learning_rate": 5.000949491892525e-08, + "loss": 0.0, + "num_input_tokens_seen": 125342120, + "step": 186045 + }, + { + "epoch": 4.545232453033005, + "grad_norm": 9.009351197164506e-05, + "learning_rate": 4.998286798800444e-08, + "loss": 0.0, + "num_input_tokens_seen": 125345576, + "step": 186050 + }, + { + "epoch": 4.545354603864852, + "grad_norm": 0.0061968364752829075, + "learning_rate": 4.995624796586362e-08, + "loss": 0.0, + "num_input_tokens_seen": 125349096, + "step": 186055 + }, + { + "epoch": 4.5454767546967, + "grad_norm": 1.4608061974286102e-05, + "learning_rate": 4.992963485269663e-08, + "loss": 0.0, + "num_input_tokens_seen": 125352104, + "step": 186060 + }, + { + "epoch": 4.545598905528546, + "grad_norm": 0.0016588385915383697, + "learning_rate": 4.990302864869678e-08, + "loss": 0.0, + "num_input_tokens_seen": 125355752, + "step": 186065 + }, + { + "epoch": 4.545721056360394, + "grad_norm": 2.921727718785405e-05, + "learning_rate": 4.987642935405767e-08, + "loss": 0.0, + "num_input_tokens_seen": 125359144, + "step": 186070 + }, + { + "epoch": 4.545843207192241, + "grad_norm": 0.00036771618761122227, + "learning_rate": 4.984983696897271e-08, + "loss": 0.0, + "num_input_tokens_seen": 125362728, + "step": 186075 + }, + { + "epoch": 4.545965358024088, + "grad_norm": 0.001058812951669097, + "learning_rate": 4.98232514936352e-08, + "loss": 0.0, + "num_input_tokens_seen": 125365992, + "step": 186080 + }, + { + "epoch": 4.546087508855935, + "grad_norm": 0.0024411482736468315, + "learning_rate": 4.979667292823875e-08, + "loss": 0.0, + "num_input_tokens_seen": 125370088, + "step": 186085 + }, + { + "epoch": 4.546209659687783, + "grad_norm": 5.861128738615662e-05, + "learning_rate": 4.9770101272976316e-08, + "loss": 0.0, + "num_input_tokens_seen": 125373544, + "step": 186090 + }, + { + "epoch": 4.5463318105196295, + "grad_norm": 0.00014239706797525287, + "learning_rate": 4.974353652804142e-08, + "loss": 0.0, + "num_input_tokens_seen": 125376872, + "step": 186095 + }, + { + "epoch": 4.546453961351477, + "grad_norm": 0.010437625460326672, + "learning_rate": 4.971697869362701e-08, + "loss": 0.0, + "num_input_tokens_seen": 125380264, + "step": 186100 + }, + { + "epoch": 4.546576112183324, + "grad_norm": 3.1913328712107614e-05, + "learning_rate": 4.969042776992649e-08, + "loss": 0.0, + "num_input_tokens_seen": 125383400, + "step": 186105 + }, + { + "epoch": 4.5466982630151715, + "grad_norm": 1.1161178917973302e-05, + "learning_rate": 4.9663883757132596e-08, + "loss": 0.0512, + "num_input_tokens_seen": 125386728, + "step": 186110 + }, + { + "epoch": 4.546820413847018, + "grad_norm": 0.0003454769030213356, + "learning_rate": 4.96373466554385e-08, + "loss": 0.0, + "num_input_tokens_seen": 125390440, + "step": 186115 + }, + { + "epoch": 4.546942564678865, + "grad_norm": 0.00020126953313592821, + "learning_rate": 4.961081646503751e-08, + "loss": 0.0, + "num_input_tokens_seen": 125393960, + "step": 186120 + }, + { + "epoch": 4.547064715510713, + "grad_norm": 0.0006144981598481536, + "learning_rate": 4.9584293186122004e-08, + "loss": 0.0, + "num_input_tokens_seen": 125397800, + "step": 186125 + }, + { + "epoch": 4.547186866342559, + "grad_norm": 0.000722163007594645, + "learning_rate": 4.95577768188854e-08, + "loss": 0.0, + "num_input_tokens_seen": 125401512, + "step": 186130 + }, + { + "epoch": 4.547309017174407, + "grad_norm": 5.897651135455817e-05, + "learning_rate": 4.953126736352009e-08, + "loss": 0.0, + "num_input_tokens_seen": 125404968, + "step": 186135 + }, + { + "epoch": 4.547431168006254, + "grad_norm": 0.002091348869726062, + "learning_rate": 4.950476482021915e-08, + "loss": 0.0, + "num_input_tokens_seen": 125408296, + "step": 186140 + }, + { + "epoch": 4.547553318838101, + "grad_norm": 0.0008667752845212817, + "learning_rate": 4.947826918917519e-08, + "loss": 0.0, + "num_input_tokens_seen": 125411176, + "step": 186145 + }, + { + "epoch": 4.547675469669948, + "grad_norm": 0.0008085620356723666, + "learning_rate": 4.945178047058096e-08, + "loss": 0.0, + "num_input_tokens_seen": 125414760, + "step": 186150 + }, + { + "epoch": 4.547797620501796, + "grad_norm": 0.001199282007291913, + "learning_rate": 4.942529866462908e-08, + "loss": 0.0, + "num_input_tokens_seen": 125418152, + "step": 186155 + }, + { + "epoch": 4.547919771333643, + "grad_norm": 0.00020651493105106056, + "learning_rate": 4.9398823771511944e-08, + "loss": 0.0, + "num_input_tokens_seen": 125421224, + "step": 186160 + }, + { + "epoch": 4.54804192216549, + "grad_norm": 0.0018644839292392135, + "learning_rate": 4.9372355791422406e-08, + "loss": 0.0, + "num_input_tokens_seen": 125424808, + "step": 186165 + }, + { + "epoch": 4.548164072997337, + "grad_norm": 0.000575466372538358, + "learning_rate": 4.934589472455264e-08, + "loss": 0.0, + "num_input_tokens_seen": 125428392, + "step": 186170 + }, + { + "epoch": 4.548286223829185, + "grad_norm": 0.0004205135628581047, + "learning_rate": 4.9319440571095164e-08, + "loss": 0.0, + "num_input_tokens_seen": 125431784, + "step": 186175 + }, + { + "epoch": 4.548408374661031, + "grad_norm": 0.0006393736694008112, + "learning_rate": 4.9292993331242595e-08, + "loss": 0.0, + "num_input_tokens_seen": 125435048, + "step": 186180 + }, + { + "epoch": 4.548530525492879, + "grad_norm": 0.00011096692469436675, + "learning_rate": 4.9266553005187005e-08, + "loss": 0.0, + "num_input_tokens_seen": 125438504, + "step": 186185 + }, + { + "epoch": 4.548652676324726, + "grad_norm": 0.00046578276669606566, + "learning_rate": 4.924011959312091e-08, + "loss": 0.0, + "num_input_tokens_seen": 125441832, + "step": 186190 + }, + { + "epoch": 4.548774827156572, + "grad_norm": 7.459130574716255e-05, + "learning_rate": 4.9213693095236154e-08, + "loss": 0.0, + "num_input_tokens_seen": 125445288, + "step": 186195 + }, + { + "epoch": 4.54889697798842, + "grad_norm": 0.00038041500374674797, + "learning_rate": 4.918727351172536e-08, + "loss": 0.0359, + "num_input_tokens_seen": 125448168, + "step": 186200 + }, + { + "epoch": 4.549019128820268, + "grad_norm": 0.0028200701344758272, + "learning_rate": 4.916086084278026e-08, + "loss": 0.0, + "num_input_tokens_seen": 125451816, + "step": 186205 + }, + { + "epoch": 4.5491412796521145, + "grad_norm": 0.0003451250959187746, + "learning_rate": 4.913445508859315e-08, + "loss": 0.0, + "num_input_tokens_seen": 125455208, + "step": 186210 + }, + { + "epoch": 4.549263430483961, + "grad_norm": 0.00034329271875321865, + "learning_rate": 4.91080562493561e-08, + "loss": 0.0, + "num_input_tokens_seen": 125458664, + "step": 186215 + }, + { + "epoch": 4.549385581315809, + "grad_norm": 0.000254724029218778, + "learning_rate": 4.908166432526106e-08, + "loss": 0.0, + "num_input_tokens_seen": 125461672, + "step": 186220 + }, + { + "epoch": 4.549507732147656, + "grad_norm": 0.0013476322637870908, + "learning_rate": 4.905527931649989e-08, + "loss": 0.0, + "num_input_tokens_seen": 125465256, + "step": 186225 + }, + { + "epoch": 4.549629882979503, + "grad_norm": 7.785054913256317e-05, + "learning_rate": 4.902890122326442e-08, + "loss": 0.0, + "num_input_tokens_seen": 125469224, + "step": 186230 + }, + { + "epoch": 4.54975203381135, + "grad_norm": 0.00030382751720026135, + "learning_rate": 4.900253004574673e-08, + "loss": 0.0, + "num_input_tokens_seen": 125472808, + "step": 186235 + }, + { + "epoch": 4.549874184643198, + "grad_norm": 9.079613846552093e-06, + "learning_rate": 4.8976165784138327e-08, + "loss": 0.0, + "num_input_tokens_seen": 125475944, + "step": 186240 + }, + { + "epoch": 4.549996335475044, + "grad_norm": 0.00020586224854923785, + "learning_rate": 4.894980843863106e-08, + "loss": 0.0, + "num_input_tokens_seen": 125479336, + "step": 186245 + }, + { + "epoch": 4.550118486306892, + "grad_norm": 0.00014247857325244695, + "learning_rate": 4.892345800941655e-08, + "loss": 0.0, + "num_input_tokens_seen": 125482536, + "step": 186250 + }, + { + "epoch": 4.550240637138739, + "grad_norm": 0.00018053391249850392, + "learning_rate": 4.889711449668654e-08, + "loss": 0.0, + "num_input_tokens_seen": 125485608, + "step": 186255 + }, + { + "epoch": 4.550362787970586, + "grad_norm": 0.0022484755609184504, + "learning_rate": 4.8870777900632543e-08, + "loss": 0.0, + "num_input_tokens_seen": 125488808, + "step": 186260 + }, + { + "epoch": 4.550484938802433, + "grad_norm": 0.0017215277766808867, + "learning_rate": 4.884444822144595e-08, + "loss": 0.0, + "num_input_tokens_seen": 125491880, + "step": 186265 + }, + { + "epoch": 4.550607089634281, + "grad_norm": 0.000490458682179451, + "learning_rate": 4.88181254593184e-08, + "loss": 0.0, + "num_input_tokens_seen": 125494696, + "step": 186270 + }, + { + "epoch": 4.5507292404661275, + "grad_norm": 7.075289613567293e-05, + "learning_rate": 4.8791809614441405e-08, + "loss": 0.0001, + "num_input_tokens_seen": 125498600, + "step": 186275 + }, + { + "epoch": 4.550851391297975, + "grad_norm": 7.022430509096012e-05, + "learning_rate": 4.8765500687006024e-08, + "loss": 0.0, + "num_input_tokens_seen": 125501800, + "step": 186280 + }, + { + "epoch": 4.550973542129822, + "grad_norm": 2.5926497983164154e-05, + "learning_rate": 4.873919867720389e-08, + "loss": 0.0, + "num_input_tokens_seen": 125505576, + "step": 186285 + }, + { + "epoch": 4.551095692961669, + "grad_norm": 0.0003630456340033561, + "learning_rate": 4.871290358522606e-08, + "loss": 0.0, + "num_input_tokens_seen": 125509480, + "step": 186290 + }, + { + "epoch": 4.551217843793516, + "grad_norm": 2.0731151380459778e-05, + "learning_rate": 4.868661541126407e-08, + "loss": 0.0, + "num_input_tokens_seen": 125512808, + "step": 186295 + }, + { + "epoch": 4.551339994625364, + "grad_norm": 0.004396742209792137, + "learning_rate": 4.866033415550863e-08, + "loss": 0.0, + "num_input_tokens_seen": 125515944, + "step": 186300 + }, + { + "epoch": 4.551462145457211, + "grad_norm": 0.004247918259352446, + "learning_rate": 4.863405981815116e-08, + "loss": 0.0, + "num_input_tokens_seen": 125519848, + "step": 186305 + }, + { + "epoch": 4.551584296289057, + "grad_norm": 0.008792792446911335, + "learning_rate": 4.860779239938284e-08, + "loss": 0.0, + "num_input_tokens_seen": 125523432, + "step": 186310 + }, + { + "epoch": 4.551706447120905, + "grad_norm": 0.0026869927532970905, + "learning_rate": 4.8581531899394404e-08, + "loss": 0.0, + "num_input_tokens_seen": 125527336, + "step": 186315 + }, + { + "epoch": 4.551828597952752, + "grad_norm": 2.581719309091568e-05, + "learning_rate": 4.8555278318377136e-08, + "loss": 0.0, + "num_input_tokens_seen": 125531048, + "step": 186320 + }, + { + "epoch": 4.551950748784599, + "grad_norm": 0.00021105869382154197, + "learning_rate": 4.852903165652167e-08, + "loss": 0.0, + "num_input_tokens_seen": 125534440, + "step": 186325 + }, + { + "epoch": 4.552072899616446, + "grad_norm": 0.0005000850069336593, + "learning_rate": 4.850279191401896e-08, + "loss": 0.0, + "num_input_tokens_seen": 125538024, + "step": 186330 + }, + { + "epoch": 4.552195050448294, + "grad_norm": 0.008285541087388992, + "learning_rate": 4.8476559091059966e-08, + "loss": 0.0, + "num_input_tokens_seen": 125541352, + "step": 186335 + }, + { + "epoch": 4.5523172012801405, + "grad_norm": 2.6498841180000454e-05, + "learning_rate": 4.845033318783531e-08, + "loss": 0.0, + "num_input_tokens_seen": 125545000, + "step": 186340 + }, + { + "epoch": 4.552439352111988, + "grad_norm": 0.00018164741049986333, + "learning_rate": 4.8424114204535846e-08, + "loss": 0.0, + "num_input_tokens_seen": 125548136, + "step": 186345 + }, + { + "epoch": 4.552561502943835, + "grad_norm": 1.322677871939959e-05, + "learning_rate": 4.83979021413522e-08, + "loss": 0.0, + "num_input_tokens_seen": 125551528, + "step": 186350 + }, + { + "epoch": 4.5526836537756825, + "grad_norm": 0.0001355686254100874, + "learning_rate": 4.837169699847476e-08, + "loss": 0.0, + "num_input_tokens_seen": 125554600, + "step": 186355 + }, + { + "epoch": 4.552805804607529, + "grad_norm": 0.00036094902316108346, + "learning_rate": 4.834549877609451e-08, + "loss": 0.0, + "num_input_tokens_seen": 125558056, + "step": 186360 + }, + { + "epoch": 4.552927955439377, + "grad_norm": 5.311130007612519e-05, + "learning_rate": 4.831930747440161e-08, + "loss": 0.0, + "num_input_tokens_seen": 125561320, + "step": 186365 + }, + { + "epoch": 4.553050106271224, + "grad_norm": 0.07028447091579437, + "learning_rate": 4.8293123093586795e-08, + "loss": 0.0, + "num_input_tokens_seen": 125564840, + "step": 186370 + }, + { + "epoch": 4.553172257103071, + "grad_norm": 0.00010695837409002706, + "learning_rate": 4.8266945633840264e-08, + "loss": 0.0, + "num_input_tokens_seen": 125568040, + "step": 186375 + }, + { + "epoch": 4.553294407934918, + "grad_norm": 1.5230105418595485e-05, + "learning_rate": 4.8240775095352517e-08, + "loss": 0.0, + "num_input_tokens_seen": 125570984, + "step": 186380 + }, + { + "epoch": 4.553416558766765, + "grad_norm": 0.00041354206041432917, + "learning_rate": 4.821461147831385e-08, + "loss": 0.0, + "num_input_tokens_seen": 125574440, + "step": 186385 + }, + { + "epoch": 4.553538709598612, + "grad_norm": 5.543339284486137e-05, + "learning_rate": 4.818845478291456e-08, + "loss": 0.0, + "num_input_tokens_seen": 125578152, + "step": 186390 + }, + { + "epoch": 4.553660860430459, + "grad_norm": 0.00212540989741683, + "learning_rate": 4.8162305009344705e-08, + "loss": 0.0, + "num_input_tokens_seen": 125581160, + "step": 186395 + }, + { + "epoch": 4.553783011262307, + "grad_norm": 0.00012192504800623283, + "learning_rate": 4.81361621577947e-08, + "loss": 0.0489, + "num_input_tokens_seen": 125584936, + "step": 186400 + }, + { + "epoch": 4.5539051620941535, + "grad_norm": 0.00018608868413139135, + "learning_rate": 4.81100262284545e-08, + "loss": 0.0, + "num_input_tokens_seen": 125588520, + "step": 186405 + }, + { + "epoch": 4.554027312926001, + "grad_norm": 0.000508465978782624, + "learning_rate": 4.808389722151418e-08, + "loss": 0.0, + "num_input_tokens_seen": 125592296, + "step": 186410 + }, + { + "epoch": 4.554149463757848, + "grad_norm": 8.924116991693154e-05, + "learning_rate": 4.8057775137163913e-08, + "loss": 0.0, + "num_input_tokens_seen": 125595624, + "step": 186415 + }, + { + "epoch": 4.5542716145896955, + "grad_norm": 2.7994163247058168e-05, + "learning_rate": 4.803165997559344e-08, + "loss": 0.0, + "num_input_tokens_seen": 125598952, + "step": 186420 + }, + { + "epoch": 4.554393765421542, + "grad_norm": 0.00013703010336030275, + "learning_rate": 4.800555173699283e-08, + "loss": 0.0, + "num_input_tokens_seen": 125601896, + "step": 186425 + }, + { + "epoch": 4.55451591625339, + "grad_norm": 0.0009500607848167419, + "learning_rate": 4.797945042155194e-08, + "loss": 0.0, + "num_input_tokens_seen": 125605224, + "step": 186430 + }, + { + "epoch": 4.554638067085237, + "grad_norm": 0.013281804509460926, + "learning_rate": 4.795335602946049e-08, + "loss": 0.0, + "num_input_tokens_seen": 125608552, + "step": 186435 + }, + { + "epoch": 4.554760217917084, + "grad_norm": 0.002333866897970438, + "learning_rate": 4.7927268560908343e-08, + "loss": 0.0, + "num_input_tokens_seen": 125612072, + "step": 186440 + }, + { + "epoch": 4.554882368748931, + "grad_norm": 9.072609827853739e-05, + "learning_rate": 4.7901188016085116e-08, + "loss": 0.0, + "num_input_tokens_seen": 125615464, + "step": 186445 + }, + { + "epoch": 4.555004519580779, + "grad_norm": 0.00028325567836873233, + "learning_rate": 4.787511439518066e-08, + "loss": 0.0, + "num_input_tokens_seen": 125618792, + "step": 186450 + }, + { + "epoch": 4.555126670412625, + "grad_norm": 7.057032053126022e-05, + "learning_rate": 4.784904769838427e-08, + "loss": 0.0, + "num_input_tokens_seen": 125622248, + "step": 186455 + }, + { + "epoch": 4.555248821244473, + "grad_norm": 4.032731521874666e-05, + "learning_rate": 4.782298792588591e-08, + "loss": 0.0686, + "num_input_tokens_seen": 125625896, + "step": 186460 + }, + { + "epoch": 4.55537097207632, + "grad_norm": 0.0002833160397130996, + "learning_rate": 4.7796935077874856e-08, + "loss": 0.0, + "num_input_tokens_seen": 125629288, + "step": 186465 + }, + { + "epoch": 4.555493122908167, + "grad_norm": 0.0002632912655826658, + "learning_rate": 4.7770889154540525e-08, + "loss": 0.0, + "num_input_tokens_seen": 125632488, + "step": 186470 + }, + { + "epoch": 4.555615273740014, + "grad_norm": 0.0001713380916044116, + "learning_rate": 4.774485015607244e-08, + "loss": 0.0, + "num_input_tokens_seen": 125635624, + "step": 186475 + }, + { + "epoch": 4.555737424571861, + "grad_norm": 2.570607466623187e-05, + "learning_rate": 4.7718818082659874e-08, + "loss": 0.0, + "num_input_tokens_seen": 125638696, + "step": 186480 + }, + { + "epoch": 4.555859575403709, + "grad_norm": 0.0002236905274912715, + "learning_rate": 4.769279293449213e-08, + "loss": 0.0, + "num_input_tokens_seen": 125642536, + "step": 186485 + }, + { + "epoch": 4.555981726235555, + "grad_norm": 0.00016828883963171393, + "learning_rate": 4.766677471175873e-08, + "loss": 0.0, + "num_input_tokens_seen": 125645608, + "step": 186490 + }, + { + "epoch": 4.556103877067403, + "grad_norm": 0.00016017680172808468, + "learning_rate": 4.7640763414648624e-08, + "loss": 0.0, + "num_input_tokens_seen": 125649000, + "step": 186495 + }, + { + "epoch": 4.55622602789925, + "grad_norm": 0.0006411911454051733, + "learning_rate": 4.761475904335099e-08, + "loss": 0.0, + "num_input_tokens_seen": 125652392, + "step": 186500 + }, + { + "epoch": 4.556348178731097, + "grad_norm": 0.0003673804458230734, + "learning_rate": 4.758876159805503e-08, + "loss": 0.0, + "num_input_tokens_seen": 125655720, + "step": 186505 + }, + { + "epoch": 4.556470329562944, + "grad_norm": 0.00026387552497908473, + "learning_rate": 4.7562771078949794e-08, + "loss": 0.0, + "num_input_tokens_seen": 125659048, + "step": 186510 + }, + { + "epoch": 4.556592480394792, + "grad_norm": 0.052026305347681046, + "learning_rate": 4.753678748622414e-08, + "loss": 0.0, + "num_input_tokens_seen": 125662824, + "step": 186515 + }, + { + "epoch": 4.5567146312266384, + "grad_norm": 0.0003993824648205191, + "learning_rate": 4.751081082006714e-08, + "loss": 0.0, + "num_input_tokens_seen": 125665832, + "step": 186520 + }, + { + "epoch": 4.556836782058486, + "grad_norm": 0.00013458718603942543, + "learning_rate": 4.748484108066786e-08, + "loss": 0.0, + "num_input_tokens_seen": 125669416, + "step": 186525 + }, + { + "epoch": 4.556958932890333, + "grad_norm": 0.0002635191776789725, + "learning_rate": 4.745887826821493e-08, + "loss": 0.0, + "num_input_tokens_seen": 125672424, + "step": 186530 + }, + { + "epoch": 4.5570810837221805, + "grad_norm": 5.497500751516782e-05, + "learning_rate": 4.743292238289731e-08, + "loss": 0.0305, + "num_input_tokens_seen": 125675688, + "step": 186535 + }, + { + "epoch": 4.557203234554027, + "grad_norm": 0.0003593292785808444, + "learning_rate": 4.7406973424903626e-08, + "loss": 0.0, + "num_input_tokens_seen": 125678696, + "step": 186540 + }, + { + "epoch": 4.557325385385875, + "grad_norm": 0.010443294420838356, + "learning_rate": 4.738103139442273e-08, + "loss": 0.0, + "num_input_tokens_seen": 125681768, + "step": 186545 + }, + { + "epoch": 4.557447536217722, + "grad_norm": 0.0002558843407314271, + "learning_rate": 4.7355096291643026e-08, + "loss": 0.0001, + "num_input_tokens_seen": 125685608, + "step": 186550 + }, + { + "epoch": 4.557569687049568, + "grad_norm": 0.0004936489858664572, + "learning_rate": 4.7329168116753473e-08, + "loss": 0.0, + "num_input_tokens_seen": 125689000, + "step": 186555 + }, + { + "epoch": 4.557691837881416, + "grad_norm": 0.003704722970724106, + "learning_rate": 4.7303246869942246e-08, + "loss": 0.0, + "num_input_tokens_seen": 125692008, + "step": 186560 + }, + { + "epoch": 4.557813988713264, + "grad_norm": 0.00018676824402064085, + "learning_rate": 4.727733255139832e-08, + "loss": 0.0001, + "num_input_tokens_seen": 125695464, + "step": 186565 + }, + { + "epoch": 4.55793613954511, + "grad_norm": 0.00029991735937073827, + "learning_rate": 4.725142516130975e-08, + "loss": 0.0, + "num_input_tokens_seen": 125698728, + "step": 186570 + }, + { + "epoch": 4.558058290376957, + "grad_norm": 0.0004272170190233737, + "learning_rate": 4.722552469986507e-08, + "loss": 0.0, + "num_input_tokens_seen": 125701992, + "step": 186575 + }, + { + "epoch": 4.558180441208805, + "grad_norm": 4.4216849346412346e-05, + "learning_rate": 4.719963116725256e-08, + "loss": 0.0, + "num_input_tokens_seen": 125705320, + "step": 186580 + }, + { + "epoch": 4.5583025920406515, + "grad_norm": 0.0008761108620092273, + "learning_rate": 4.717374456366074e-08, + "loss": 0.0, + "num_input_tokens_seen": 125708520, + "step": 186585 + }, + { + "epoch": 4.558424742872499, + "grad_norm": 0.0021364863496273756, + "learning_rate": 4.714786488927758e-08, + "loss": 0.0, + "num_input_tokens_seen": 125711912, + "step": 186590 + }, + { + "epoch": 4.558546893704346, + "grad_norm": 0.001226111315190792, + "learning_rate": 4.712199214429158e-08, + "loss": 0.0, + "num_input_tokens_seen": 125715176, + "step": 186595 + }, + { + "epoch": 4.5586690445361935, + "grad_norm": 0.0002580843574833125, + "learning_rate": 4.709612632889059e-08, + "loss": 0.0, + "num_input_tokens_seen": 125718248, + "step": 186600 + }, + { + "epoch": 4.55879119536804, + "grad_norm": 0.0003518314624670893, + "learning_rate": 4.7070267443263035e-08, + "loss": 0.0, + "num_input_tokens_seen": 125722088, + "step": 186605 + }, + { + "epoch": 4.558913346199888, + "grad_norm": 0.018274880945682526, + "learning_rate": 4.7044415487596744e-08, + "loss": 0.0, + "num_input_tokens_seen": 125725544, + "step": 186610 + }, + { + "epoch": 4.559035497031735, + "grad_norm": 1.0470458619238343e-05, + "learning_rate": 4.701857046207969e-08, + "loss": 0.0, + "num_input_tokens_seen": 125728872, + "step": 186615 + }, + { + "epoch": 4.559157647863582, + "grad_norm": 0.00013799651060253382, + "learning_rate": 4.699273236690005e-08, + "loss": 0.0, + "num_input_tokens_seen": 125732136, + "step": 186620 + }, + { + "epoch": 4.559279798695429, + "grad_norm": 1.64757548191119e-05, + "learning_rate": 4.6966901202245446e-08, + "loss": 0.0, + "num_input_tokens_seen": 125735400, + "step": 186625 + }, + { + "epoch": 4.559401949527277, + "grad_norm": 0.00010223350545857102, + "learning_rate": 4.694107696830407e-08, + "loss": 0.0, + "num_input_tokens_seen": 125739048, + "step": 186630 + }, + { + "epoch": 4.559524100359123, + "grad_norm": 0.000656140735372901, + "learning_rate": 4.691525966526333e-08, + "loss": 0.0, + "num_input_tokens_seen": 125742120, + "step": 186635 + }, + { + "epoch": 4.559646251190971, + "grad_norm": 0.0003749874304048717, + "learning_rate": 4.6889449293311176e-08, + "loss": 0.0, + "num_input_tokens_seen": 125745704, + "step": 186640 + }, + { + "epoch": 4.559768402022818, + "grad_norm": 0.0002589155628811568, + "learning_rate": 4.686364585263547e-08, + "loss": 0.0004, + "num_input_tokens_seen": 125748968, + "step": 186645 + }, + { + "epoch": 4.5598905528546645, + "grad_norm": 0.0012152070412412286, + "learning_rate": 4.6837849343423494e-08, + "loss": 0.0, + "num_input_tokens_seen": 125751976, + "step": 186650 + }, + { + "epoch": 4.560012703686512, + "grad_norm": 0.01056479662656784, + "learning_rate": 4.681205976586322e-08, + "loss": 0.0, + "num_input_tokens_seen": 125754920, + "step": 186655 + }, + { + "epoch": 4.56013485451836, + "grad_norm": 0.00018264618120156229, + "learning_rate": 4.6786277120142047e-08, + "loss": 0.0, + "num_input_tokens_seen": 125758504, + "step": 186660 + }, + { + "epoch": 4.5602570053502065, + "grad_norm": 0.02871021069586277, + "learning_rate": 4.676050140644727e-08, + "loss": 0.0, + "num_input_tokens_seen": 125761768, + "step": 186665 + }, + { + "epoch": 4.560379156182053, + "grad_norm": 3.788889080169611e-05, + "learning_rate": 4.673473262496663e-08, + "loss": 0.0, + "num_input_tokens_seen": 125765288, + "step": 186670 + }, + { + "epoch": 4.560501307013901, + "grad_norm": 0.04666898027062416, + "learning_rate": 4.670897077588731e-08, + "loss": 0.0, + "num_input_tokens_seen": 125769832, + "step": 186675 + }, + { + "epoch": 4.560623457845748, + "grad_norm": 0.0006879869033582509, + "learning_rate": 4.668321585939694e-08, + "loss": 0.0, + "num_input_tokens_seen": 125773032, + "step": 186680 + }, + { + "epoch": 4.560745608677595, + "grad_norm": 0.0001125606067944318, + "learning_rate": 4.665746787568248e-08, + "loss": 0.0, + "num_input_tokens_seen": 125776360, + "step": 186685 + }, + { + "epoch": 4.560867759509442, + "grad_norm": 0.02449299953877926, + "learning_rate": 4.663172682493144e-08, + "loss": 0.0, + "num_input_tokens_seen": 125779944, + "step": 186690 + }, + { + "epoch": 4.56098991034129, + "grad_norm": 0.00034439985756762326, + "learning_rate": 4.660599270733079e-08, + "loss": 0.0, + "num_input_tokens_seen": 125783464, + "step": 186695 + }, + { + "epoch": 4.561112061173136, + "grad_norm": 7.401494804071262e-05, + "learning_rate": 4.658026552306793e-08, + "loss": 0.0, + "num_input_tokens_seen": 125786792, + "step": 186700 + }, + { + "epoch": 4.561234212004984, + "grad_norm": 0.00045592195237986743, + "learning_rate": 4.6554545272329715e-08, + "loss": 0.0, + "num_input_tokens_seen": 125790056, + "step": 186705 + }, + { + "epoch": 4.561356362836831, + "grad_norm": 0.0002018619270529598, + "learning_rate": 4.6528831955303215e-08, + "loss": 0.0, + "num_input_tokens_seen": 125793320, + "step": 186710 + }, + { + "epoch": 4.561478513668678, + "grad_norm": 0.0051199872978031635, + "learning_rate": 4.6503125572175725e-08, + "loss": 0.0, + "num_input_tokens_seen": 125796648, + "step": 186715 + }, + { + "epoch": 4.561600664500525, + "grad_norm": 0.003007345600053668, + "learning_rate": 4.6477426123133765e-08, + "loss": 0.0, + "num_input_tokens_seen": 125799912, + "step": 186720 + }, + { + "epoch": 4.561722815332373, + "grad_norm": 0.00036833074409514666, + "learning_rate": 4.645173360836463e-08, + "loss": 0.0, + "num_input_tokens_seen": 125802920, + "step": 186725 + }, + { + "epoch": 4.5618449661642195, + "grad_norm": 0.0001462739601265639, + "learning_rate": 4.642604802805472e-08, + "loss": 0.0, + "num_input_tokens_seen": 125805928, + "step": 186730 + }, + { + "epoch": 4.561967116996067, + "grad_norm": 0.0020424015820026398, + "learning_rate": 4.640036938239111e-08, + "loss": 0.0, + "num_input_tokens_seen": 125808936, + "step": 186735 + }, + { + "epoch": 4.562089267827914, + "grad_norm": 0.0025019191671162844, + "learning_rate": 4.637469767156066e-08, + "loss": 0.0, + "num_input_tokens_seen": 125812264, + "step": 186740 + }, + { + "epoch": 4.562211418659761, + "grad_norm": 0.0013807759387418628, + "learning_rate": 4.634903289574976e-08, + "loss": 0.0, + "num_input_tokens_seen": 125816424, + "step": 186745 + }, + { + "epoch": 4.562333569491608, + "grad_norm": 0.0036531754303723574, + "learning_rate": 4.6323375055145386e-08, + "loss": 0.0, + "num_input_tokens_seen": 125819880, + "step": 186750 + }, + { + "epoch": 4.562455720323455, + "grad_norm": 0.00678299693390727, + "learning_rate": 4.629772414993371e-08, + "loss": 0.0, + "num_input_tokens_seen": 125823336, + "step": 186755 + }, + { + "epoch": 4.562577871155303, + "grad_norm": 0.0002474345965310931, + "learning_rate": 4.627208018030171e-08, + "loss": 0.0, + "num_input_tokens_seen": 125826536, + "step": 186760 + }, + { + "epoch": 4.562700021987149, + "grad_norm": 0.0001734690449666232, + "learning_rate": 4.6246443146435554e-08, + "loss": 0.0, + "num_input_tokens_seen": 125829352, + "step": 186765 + }, + { + "epoch": 4.562822172818997, + "grad_norm": 0.002393064321950078, + "learning_rate": 4.622081304852177e-08, + "loss": 0.0, + "num_input_tokens_seen": 125832872, + "step": 186770 + }, + { + "epoch": 4.562944323650844, + "grad_norm": 0.0003072379913646728, + "learning_rate": 4.619518988674686e-08, + "loss": 0.0, + "num_input_tokens_seen": 125836456, + "step": 186775 + }, + { + "epoch": 4.563066474482691, + "grad_norm": 0.00010231092164758593, + "learning_rate": 4.6169573661297034e-08, + "loss": 0.0, + "num_input_tokens_seen": 125839464, + "step": 186780 + }, + { + "epoch": 4.563188625314538, + "grad_norm": 0.01051098108291626, + "learning_rate": 4.6143964372358676e-08, + "loss": 0.0, + "num_input_tokens_seen": 125842920, + "step": 186785 + }, + { + "epoch": 4.563310776146386, + "grad_norm": 6.985938671277836e-05, + "learning_rate": 4.611836202011776e-08, + "loss": 0.0, + "num_input_tokens_seen": 125846056, + "step": 186790 + }, + { + "epoch": 4.5634329269782326, + "grad_norm": 0.001552744535729289, + "learning_rate": 4.609276660476069e-08, + "loss": 0.0001, + "num_input_tokens_seen": 125849512, + "step": 186795 + }, + { + "epoch": 4.56355507781008, + "grad_norm": 0.0006545864744111896, + "learning_rate": 4.606717812647387e-08, + "loss": 0.0, + "num_input_tokens_seen": 125853224, + "step": 186800 + }, + { + "epoch": 4.563677228641927, + "grad_norm": 0.00017454169574193656, + "learning_rate": 4.604159658544282e-08, + "loss": 0.0, + "num_input_tokens_seen": 125856424, + "step": 186805 + }, + { + "epoch": 4.563799379473775, + "grad_norm": 0.0006190399872139096, + "learning_rate": 4.601602198185406e-08, + "loss": 0.0, + "num_input_tokens_seen": 125859816, + "step": 186810 + }, + { + "epoch": 4.563921530305621, + "grad_norm": 0.0008284965879283845, + "learning_rate": 4.599045431589321e-08, + "loss": 0.0, + "num_input_tokens_seen": 125863656, + "step": 186815 + }, + { + "epoch": 4.564043681137468, + "grad_norm": 0.0007577021024189889, + "learning_rate": 4.596489358774658e-08, + "loss": 0.0, + "num_input_tokens_seen": 125866984, + "step": 186820 + }, + { + "epoch": 4.564165831969316, + "grad_norm": 9.084967314265668e-05, + "learning_rate": 4.593933979759967e-08, + "loss": 0.0, + "num_input_tokens_seen": 125870376, + "step": 186825 + }, + { + "epoch": 4.564287982801163, + "grad_norm": 0.0006819769041612744, + "learning_rate": 4.5913792945638465e-08, + "loss": 0.0, + "num_input_tokens_seen": 125873832, + "step": 186830 + }, + { + "epoch": 4.56441013363301, + "grad_norm": 0.0003621177456807345, + "learning_rate": 4.5888253032048906e-08, + "loss": 0.0, + "num_input_tokens_seen": 125877352, + "step": 186835 + }, + { + "epoch": 4.564532284464857, + "grad_norm": 0.0003841822035610676, + "learning_rate": 4.586272005701652e-08, + "loss": 0.0, + "num_input_tokens_seen": 125880616, + "step": 186840 + }, + { + "epoch": 4.5646544352967044, + "grad_norm": 6.918048165971413e-05, + "learning_rate": 4.5837194020727165e-08, + "loss": 0.0, + "num_input_tokens_seen": 125884456, + "step": 186845 + }, + { + "epoch": 4.564776586128551, + "grad_norm": 0.00011394621833460405, + "learning_rate": 4.581167492336624e-08, + "loss": 0.0, + "num_input_tokens_seen": 125888104, + "step": 186850 + }, + { + "epoch": 4.564898736960399, + "grad_norm": 0.00013341843441594392, + "learning_rate": 4.5786162765119596e-08, + "loss": 0.0, + "num_input_tokens_seen": 125891816, + "step": 186855 + }, + { + "epoch": 4.565020887792246, + "grad_norm": 0.006895654369145632, + "learning_rate": 4.576065754617253e-08, + "loss": 0.0, + "num_input_tokens_seen": 125894760, + "step": 186860 + }, + { + "epoch": 4.565143038624093, + "grad_norm": 0.0022672039922326803, + "learning_rate": 4.573515926671079e-08, + "loss": 0.0, + "num_input_tokens_seen": 125897768, + "step": 186865 + }, + { + "epoch": 4.56526518945594, + "grad_norm": 4.803660704055801e-05, + "learning_rate": 4.570966792691944e-08, + "loss": 0.0, + "num_input_tokens_seen": 125900840, + "step": 186870 + }, + { + "epoch": 4.565387340287788, + "grad_norm": 0.00014346172974910587, + "learning_rate": 4.568418352698411e-08, + "loss": 0.0, + "num_input_tokens_seen": 125904040, + "step": 186875 + }, + { + "epoch": 4.565509491119634, + "grad_norm": 0.0002021217514993623, + "learning_rate": 4.5658706067090215e-08, + "loss": 0.0, + "num_input_tokens_seen": 125907752, + "step": 186880 + }, + { + "epoch": 4.565631641951482, + "grad_norm": 0.0008756064344197512, + "learning_rate": 4.563323554742271e-08, + "loss": 0.0, + "num_input_tokens_seen": 125911464, + "step": 186885 + }, + { + "epoch": 4.565753792783329, + "grad_norm": 0.000680445518810302, + "learning_rate": 4.560777196816701e-08, + "loss": 0.0, + "num_input_tokens_seen": 125914920, + "step": 186890 + }, + { + "epoch": 4.565875943615176, + "grad_norm": 0.006386886816471815, + "learning_rate": 4.5582315329508405e-08, + "loss": 0.0, + "num_input_tokens_seen": 125918376, + "step": 186895 + }, + { + "epoch": 4.565998094447023, + "grad_norm": 0.002658127574250102, + "learning_rate": 4.5556865631631856e-08, + "loss": 0.0001, + "num_input_tokens_seen": 125922088, + "step": 186900 + }, + { + "epoch": 4.566120245278871, + "grad_norm": 0.00010610414756229147, + "learning_rate": 4.5531422874722555e-08, + "loss": 0.0, + "num_input_tokens_seen": 125925224, + "step": 186905 + }, + { + "epoch": 4.5662423961107175, + "grad_norm": 0.00031076581217348576, + "learning_rate": 4.5505987058965355e-08, + "loss": 0.0, + "num_input_tokens_seen": 125928360, + "step": 186910 + }, + { + "epoch": 4.566364546942564, + "grad_norm": 0.00034491971018724144, + "learning_rate": 4.548055818454544e-08, + "loss": 0.0, + "num_input_tokens_seen": 125931304, + "step": 186915 + }, + { + "epoch": 4.566486697774412, + "grad_norm": 0.0020436709746718407, + "learning_rate": 4.545513625164754e-08, + "loss": 0.0, + "num_input_tokens_seen": 125934504, + "step": 186920 + }, + { + "epoch": 4.5666088486062595, + "grad_norm": 0.0015757272485643625, + "learning_rate": 4.5429721260456633e-08, + "loss": 0.0246, + "num_input_tokens_seen": 125938280, + "step": 186925 + }, + { + "epoch": 4.566730999438106, + "grad_norm": 4.1816121665760875e-05, + "learning_rate": 4.5404313211157675e-08, + "loss": 0.0, + "num_input_tokens_seen": 125941416, + "step": 186930 + }, + { + "epoch": 4.566853150269953, + "grad_norm": 0.002388280350714922, + "learning_rate": 4.537891210393519e-08, + "loss": 0.0, + "num_input_tokens_seen": 125945576, + "step": 186935 + }, + { + "epoch": 4.566975301101801, + "grad_norm": 1.306969079450937e-05, + "learning_rate": 4.535351793897413e-08, + "loss": 0.0, + "num_input_tokens_seen": 125948648, + "step": 186940 + }, + { + "epoch": 4.567097451933647, + "grad_norm": 4.432052810443565e-05, + "learning_rate": 4.532813071645891e-08, + "loss": 0.0, + "num_input_tokens_seen": 125952040, + "step": 186945 + }, + { + "epoch": 4.567219602765495, + "grad_norm": 8.9752342319116e-05, + "learning_rate": 4.53027504365745e-08, + "loss": 0.0, + "num_input_tokens_seen": 125955432, + "step": 186950 + }, + { + "epoch": 4.567341753597342, + "grad_norm": 0.003290035994723439, + "learning_rate": 4.5277377099505076e-08, + "loss": 0.0, + "num_input_tokens_seen": 125958824, + "step": 186955 + }, + { + "epoch": 4.567463904429189, + "grad_norm": 3.403526352485642e-05, + "learning_rate": 4.5252010705435386e-08, + "loss": 0.0, + "num_input_tokens_seen": 125961896, + "step": 186960 + }, + { + "epoch": 4.567586055261036, + "grad_norm": 0.00044939748477190733, + "learning_rate": 4.522665125454994e-08, + "loss": 0.0, + "num_input_tokens_seen": 125965288, + "step": 186965 + }, + { + "epoch": 4.567708206092884, + "grad_norm": 0.005186882801353931, + "learning_rate": 4.5201298747033155e-08, + "loss": 0.0, + "num_input_tokens_seen": 125968680, + "step": 186970 + }, + { + "epoch": 4.5678303569247305, + "grad_norm": 0.00021564609778579324, + "learning_rate": 4.517595318306911e-08, + "loss": 0.0, + "num_input_tokens_seen": 125971816, + "step": 186975 + }, + { + "epoch": 4.567952507756578, + "grad_norm": 0.0006758919917047024, + "learning_rate": 4.5150614562842635e-08, + "loss": 0.0, + "num_input_tokens_seen": 125975144, + "step": 186980 + }, + { + "epoch": 4.568074658588425, + "grad_norm": 0.00018626233213581145, + "learning_rate": 4.51252828865375e-08, + "loss": 0.0, + "num_input_tokens_seen": 125978408, + "step": 186985 + }, + { + "epoch": 4.5681968094202725, + "grad_norm": 5.7257089792983606e-05, + "learning_rate": 4.5099958154338204e-08, + "loss": 0.0, + "num_input_tokens_seen": 125981544, + "step": 186990 + }, + { + "epoch": 4.568318960252119, + "grad_norm": 0.00012096945283701643, + "learning_rate": 4.507464036642883e-08, + "loss": 0.0, + "num_input_tokens_seen": 125984872, + "step": 186995 + }, + { + "epoch": 4.568441111083967, + "grad_norm": 0.00010047034447779879, + "learning_rate": 4.504932952299356e-08, + "loss": 0.0, + "num_input_tokens_seen": 125988328, + "step": 187000 + }, + { + "epoch": 4.568563261915814, + "grad_norm": 0.00630926201120019, + "learning_rate": 4.502402562421637e-08, + "loss": 0.0, + "num_input_tokens_seen": 125991208, + "step": 187005 + }, + { + "epoch": 4.56868541274766, + "grad_norm": 8.817094203550369e-05, + "learning_rate": 4.499872867028143e-08, + "loss": 0.0, + "num_input_tokens_seen": 125995304, + "step": 187010 + }, + { + "epoch": 4.568807563579508, + "grad_norm": 0.0006666062981821597, + "learning_rate": 4.4973438661372374e-08, + "loss": 0.0, + "num_input_tokens_seen": 125998504, + "step": 187015 + }, + { + "epoch": 4.568929714411355, + "grad_norm": 1.3411827239906415e-05, + "learning_rate": 4.494815559767351e-08, + "loss": 0.0, + "num_input_tokens_seen": 126002024, + "step": 187020 + }, + { + "epoch": 4.569051865243202, + "grad_norm": 0.0007918593473732471, + "learning_rate": 4.492287947936857e-08, + "loss": 0.0, + "num_input_tokens_seen": 126005096, + "step": 187025 + }, + { + "epoch": 4.569174016075049, + "grad_norm": 0.0014626365154981613, + "learning_rate": 4.4897610306641184e-08, + "loss": 0.0, + "num_input_tokens_seen": 126008232, + "step": 187030 + }, + { + "epoch": 4.569296166906897, + "grad_norm": 5.475069701788016e-05, + "learning_rate": 4.487234807967544e-08, + "loss": 0.0, + "num_input_tokens_seen": 126012136, + "step": 187035 + }, + { + "epoch": 4.5694183177387435, + "grad_norm": 0.0029703148175030947, + "learning_rate": 4.484709279865473e-08, + "loss": 0.0, + "num_input_tokens_seen": 126015848, + "step": 187040 + }, + { + "epoch": 4.569540468570591, + "grad_norm": 0.00021349718736018986, + "learning_rate": 4.482184446376291e-08, + "loss": 0.0, + "num_input_tokens_seen": 126019048, + "step": 187045 + }, + { + "epoch": 4.569662619402438, + "grad_norm": 0.0002331474534003064, + "learning_rate": 4.479660307518363e-08, + "loss": 0.0, + "num_input_tokens_seen": 126022632, + "step": 187050 + }, + { + "epoch": 4.5697847702342855, + "grad_norm": 0.0002785135293379426, + "learning_rate": 4.477136863310016e-08, + "loss": 0.0, + "num_input_tokens_seen": 126026216, + "step": 187055 + }, + { + "epoch": 4.569906921066132, + "grad_norm": 0.0015631432179361582, + "learning_rate": 4.474614113769648e-08, + "loss": 0.0, + "num_input_tokens_seen": 126029352, + "step": 187060 + }, + { + "epoch": 4.57002907189798, + "grad_norm": 2.8294090952840634e-05, + "learning_rate": 4.472092058915567e-08, + "loss": 0.0, + "num_input_tokens_seen": 126032616, + "step": 187065 + }, + { + "epoch": 4.570151222729827, + "grad_norm": 0.00030958701972849667, + "learning_rate": 4.469570698766134e-08, + "loss": 0.0, + "num_input_tokens_seen": 126035880, + "step": 187070 + }, + { + "epoch": 4.570273373561674, + "grad_norm": 0.00970099214464426, + "learning_rate": 4.46705003333967e-08, + "loss": 0.0, + "num_input_tokens_seen": 126039080, + "step": 187075 + }, + { + "epoch": 4.570395524393521, + "grad_norm": 0.00010253264917992055, + "learning_rate": 4.4645300626545146e-08, + "loss": 0.0, + "num_input_tokens_seen": 126042600, + "step": 187080 + }, + { + "epoch": 4.570517675225369, + "grad_norm": 0.048178721219301224, + "learning_rate": 4.462010786728998e-08, + "loss": 0.0, + "num_input_tokens_seen": 126045736, + "step": 187085 + }, + { + "epoch": 4.570639826057215, + "grad_norm": 0.0006714654737152159, + "learning_rate": 4.4594922055814275e-08, + "loss": 0.0, + "num_input_tokens_seen": 126049000, + "step": 187090 + }, + { + "epoch": 4.570761976889063, + "grad_norm": 0.004424653016030788, + "learning_rate": 4.456974319230145e-08, + "loss": 0.0, + "num_input_tokens_seen": 126053288, + "step": 187095 + }, + { + "epoch": 4.57088412772091, + "grad_norm": 2.338566628168337e-05, + "learning_rate": 4.454457127693412e-08, + "loss": 0.0, + "num_input_tokens_seen": 126056424, + "step": 187100 + }, + { + "epoch": 4.5710062785527565, + "grad_norm": 0.012012263759970665, + "learning_rate": 4.4519406309895924e-08, + "loss": 0.0, + "num_input_tokens_seen": 126059944, + "step": 187105 + }, + { + "epoch": 4.571128429384604, + "grad_norm": 0.5830227136611938, + "learning_rate": 4.4494248291369495e-08, + "loss": 0.0001, + "num_input_tokens_seen": 126063208, + "step": 187110 + }, + { + "epoch": 4.571250580216451, + "grad_norm": 0.003950295504182577, + "learning_rate": 4.44690972215378e-08, + "loss": 0.0, + "num_input_tokens_seen": 126066088, + "step": 187115 + }, + { + "epoch": 4.5713727310482986, + "grad_norm": 0.000485446973470971, + "learning_rate": 4.444395310058402e-08, + "loss": 0.0, + "num_input_tokens_seen": 126069288, + "step": 187120 + }, + { + "epoch": 4.571494881880145, + "grad_norm": 6.134338764240965e-05, + "learning_rate": 4.441881592869068e-08, + "loss": 0.0, + "num_input_tokens_seen": 126072680, + "step": 187125 + }, + { + "epoch": 4.571617032711993, + "grad_norm": 0.0011996165849268436, + "learning_rate": 4.439368570604085e-08, + "loss": 0.0, + "num_input_tokens_seen": 126076008, + "step": 187130 + }, + { + "epoch": 4.57173918354384, + "grad_norm": 0.00873605441302061, + "learning_rate": 4.436856243281706e-08, + "loss": 0.0, + "num_input_tokens_seen": 126079400, + "step": 187135 + }, + { + "epoch": 4.571861334375687, + "grad_norm": 0.000793572049587965, + "learning_rate": 4.434344610920204e-08, + "loss": 0.0, + "num_input_tokens_seen": 126082792, + "step": 187140 + }, + { + "epoch": 4.571983485207534, + "grad_norm": 2.3203620003187098e-05, + "learning_rate": 4.431833673537877e-08, + "loss": 0.0, + "num_input_tokens_seen": 126086184, + "step": 187145 + }, + { + "epoch": 4.572105636039382, + "grad_norm": 0.0007438582251779735, + "learning_rate": 4.4293234311529315e-08, + "loss": 0.0, + "num_input_tokens_seen": 126089576, + "step": 187150 + }, + { + "epoch": 4.572227786871228, + "grad_norm": 8.248721314885188e-06, + "learning_rate": 4.426813883783676e-08, + "loss": 0.0, + "num_input_tokens_seen": 126092776, + "step": 187155 + }, + { + "epoch": 4.572349937703076, + "grad_norm": 0.0004233851213939488, + "learning_rate": 4.424305031448328e-08, + "loss": 0.0, + "num_input_tokens_seen": 126096360, + "step": 187160 + }, + { + "epoch": 4.572472088534923, + "grad_norm": 0.0021329792216420174, + "learning_rate": 4.4217968741651403e-08, + "loss": 0.0, + "num_input_tokens_seen": 126099752, + "step": 187165 + }, + { + "epoch": 4.5725942393667705, + "grad_norm": 0.0002402208192506805, + "learning_rate": 4.419289411952354e-08, + "loss": 0.0, + "num_input_tokens_seen": 126102952, + "step": 187170 + }, + { + "epoch": 4.572716390198617, + "grad_norm": 0.0009508799994364381, + "learning_rate": 4.4167826448282095e-08, + "loss": 0.0, + "num_input_tokens_seen": 126106536, + "step": 187175 + }, + { + "epoch": 4.572838541030464, + "grad_norm": 0.00020646640041377395, + "learning_rate": 4.414276572810915e-08, + "loss": 0.0, + "num_input_tokens_seen": 126110376, + "step": 187180 + }, + { + "epoch": 4.572960691862312, + "grad_norm": 0.0008768560364842415, + "learning_rate": 4.411771195918723e-08, + "loss": 0.0, + "num_input_tokens_seen": 126113896, + "step": 187185 + }, + { + "epoch": 4.573082842694159, + "grad_norm": 3.4873282857006416e-05, + "learning_rate": 4.409266514169841e-08, + "loss": 0.0, + "num_input_tokens_seen": 126117224, + "step": 187190 + }, + { + "epoch": 4.573204993526006, + "grad_norm": 0.0007052597356960177, + "learning_rate": 4.406762527582475e-08, + "loss": 0.0017, + "num_input_tokens_seen": 126120552, + "step": 187195 + }, + { + "epoch": 4.573327144357853, + "grad_norm": 0.0013276163954287767, + "learning_rate": 4.404259236174846e-08, + "loss": 0.0, + "num_input_tokens_seen": 126124136, + "step": 187200 + }, + { + "epoch": 4.5734492951897, + "grad_norm": 0.0001082074231817387, + "learning_rate": 4.4017566399651596e-08, + "loss": 0.0, + "num_input_tokens_seen": 126127464, + "step": 187205 + }, + { + "epoch": 4.573571446021547, + "grad_norm": 1.8606428056955338e-05, + "learning_rate": 4.399254738971603e-08, + "loss": 0.0, + "num_input_tokens_seen": 126130728, + "step": 187210 + }, + { + "epoch": 4.573693596853395, + "grad_norm": 2.800193578877952e-05, + "learning_rate": 4.396753533212394e-08, + "loss": 0.0, + "num_input_tokens_seen": 126134376, + "step": 187215 + }, + { + "epoch": 4.5738157476852415, + "grad_norm": 0.0005040558171458542, + "learning_rate": 4.394253022705696e-08, + "loss": 0.0, + "num_input_tokens_seen": 126138152, + "step": 187220 + }, + { + "epoch": 4.573937898517089, + "grad_norm": 0.0006679038051515818, + "learning_rate": 4.3917532074697175e-08, + "loss": 0.0, + "num_input_tokens_seen": 126141224, + "step": 187225 + }, + { + "epoch": 4.574060049348936, + "grad_norm": 3.217871199012734e-05, + "learning_rate": 4.389254087522609e-08, + "loss": 0.0, + "num_input_tokens_seen": 126144552, + "step": 187230 + }, + { + "epoch": 4.5741822001807835, + "grad_norm": 0.00022618411458097398, + "learning_rate": 4.386755662882558e-08, + "loss": 0.0, + "num_input_tokens_seen": 126147880, + "step": 187235 + }, + { + "epoch": 4.57430435101263, + "grad_norm": 3.0717765184817836e-05, + "learning_rate": 4.384257933567759e-08, + "loss": 0.0, + "num_input_tokens_seen": 126151016, + "step": 187240 + }, + { + "epoch": 4.574426501844478, + "grad_norm": 0.0004580276436172426, + "learning_rate": 4.381760899596332e-08, + "loss": 0.0, + "num_input_tokens_seen": 126154984, + "step": 187245 + }, + { + "epoch": 4.574548652676325, + "grad_norm": 5.467039954965003e-05, + "learning_rate": 4.379264560986473e-08, + "loss": 0.0, + "num_input_tokens_seen": 126158120, + "step": 187250 + }, + { + "epoch": 4.574670803508172, + "grad_norm": 0.0004239397821947932, + "learning_rate": 4.376768917756313e-08, + "loss": 0.0, + "num_input_tokens_seen": 126161320, + "step": 187255 + }, + { + "epoch": 4.574792954340019, + "grad_norm": 0.029181107878684998, + "learning_rate": 4.374273969924014e-08, + "loss": 0.0, + "num_input_tokens_seen": 126164584, + "step": 187260 + }, + { + "epoch": 4.574915105171867, + "grad_norm": 1.1901041943929158e-05, + "learning_rate": 4.3717797175077064e-08, + "loss": 0.0, + "num_input_tokens_seen": 126167976, + "step": 187265 + }, + { + "epoch": 4.575037256003713, + "grad_norm": 0.00022315053502097726, + "learning_rate": 4.3692861605255424e-08, + "loss": 0.0, + "num_input_tokens_seen": 126171176, + "step": 187270 + }, + { + "epoch": 4.57515940683556, + "grad_norm": 0.00018542897305451334, + "learning_rate": 4.366793298995664e-08, + "loss": 0.0, + "num_input_tokens_seen": 126174696, + "step": 187275 + }, + { + "epoch": 4.575281557667408, + "grad_norm": 0.0003335888613946736, + "learning_rate": 4.364301132936177e-08, + "loss": 0.0, + "num_input_tokens_seen": 126178408, + "step": 187280 + }, + { + "epoch": 4.575403708499255, + "grad_norm": 4.691140566137619e-05, + "learning_rate": 4.3618096623652126e-08, + "loss": 0.0001, + "num_input_tokens_seen": 126181672, + "step": 187285 + }, + { + "epoch": 4.575525859331102, + "grad_norm": 0.0005547900800593197, + "learning_rate": 4.3593188873009e-08, + "loss": 0.0, + "num_input_tokens_seen": 126184808, + "step": 187290 + }, + { + "epoch": 4.575648010162949, + "grad_norm": 0.00023010231961961836, + "learning_rate": 4.356828807761326e-08, + "loss": 0.0, + "num_input_tokens_seen": 126187880, + "step": 187295 + }, + { + "epoch": 4.5757701609947965, + "grad_norm": 96.00919342041016, + "learning_rate": 4.354339423764641e-08, + "loss": 0.0667, + "num_input_tokens_seen": 126191400, + "step": 187300 + }, + { + "epoch": 4.575892311826643, + "grad_norm": 2.1266638214001432e-05, + "learning_rate": 4.3518507353289103e-08, + "loss": 0.0, + "num_input_tokens_seen": 126194728, + "step": 187305 + }, + { + "epoch": 4.576014462658491, + "grad_norm": 0.04412275552749634, + "learning_rate": 4.349362742472251e-08, + "loss": 0.0, + "num_input_tokens_seen": 126198120, + "step": 187310 + }, + { + "epoch": 4.576136613490338, + "grad_norm": 1.4897872461006045e-05, + "learning_rate": 4.34687544521275e-08, + "loss": 0.0, + "num_input_tokens_seen": 126201192, + "step": 187315 + }, + { + "epoch": 4.576258764322185, + "grad_norm": 0.0005763565422967076, + "learning_rate": 4.344388843568503e-08, + "loss": 0.0, + "num_input_tokens_seen": 126204008, + "step": 187320 + }, + { + "epoch": 4.576380915154032, + "grad_norm": 0.0006947465590201318, + "learning_rate": 4.3419029375575844e-08, + "loss": 0.0, + "num_input_tokens_seen": 126207912, + "step": 187325 + }, + { + "epoch": 4.57650306598588, + "grad_norm": 8.477483788738027e-05, + "learning_rate": 4.339417727198069e-08, + "loss": 0.0, + "num_input_tokens_seen": 126210856, + "step": 187330 + }, + { + "epoch": 4.576625216817726, + "grad_norm": 3.6006738810101524e-05, + "learning_rate": 4.336933212508054e-08, + "loss": 0.0, + "num_input_tokens_seen": 126214632, + "step": 187335 + }, + { + "epoch": 4.576747367649574, + "grad_norm": 0.00023908380535431206, + "learning_rate": 4.334449393505579e-08, + "loss": 0.0001, + "num_input_tokens_seen": 126219176, + "step": 187340 + }, + { + "epoch": 4.576869518481421, + "grad_norm": 0.04327535256743431, + "learning_rate": 4.331966270208731e-08, + "loss": 0.0, + "num_input_tokens_seen": 126222312, + "step": 187345 + }, + { + "epoch": 4.576991669313268, + "grad_norm": 0.0037969492841511965, + "learning_rate": 4.329483842635551e-08, + "loss": 0.0, + "num_input_tokens_seen": 126226600, + "step": 187350 + }, + { + "epoch": 4.577113820145115, + "grad_norm": 0.0007374720880761743, + "learning_rate": 4.3270021108040786e-08, + "loss": 0.0, + "num_input_tokens_seen": 126230248, + "step": 187355 + }, + { + "epoch": 4.577235970976963, + "grad_norm": 5.166658593225293e-05, + "learning_rate": 4.324521074732412e-08, + "loss": 0.0, + "num_input_tokens_seen": 126233704, + "step": 187360 + }, + { + "epoch": 4.5773581218088095, + "grad_norm": 9.717391367303208e-05, + "learning_rate": 4.3220407344385365e-08, + "loss": 0.0, + "num_input_tokens_seen": 126237096, + "step": 187365 + }, + { + "epoch": 4.577480272640656, + "grad_norm": 0.0003349487960804254, + "learning_rate": 4.3195610899405266e-08, + "loss": 0.0, + "num_input_tokens_seen": 126240552, + "step": 187370 + }, + { + "epoch": 4.577602423472504, + "grad_norm": 0.0029941475950181484, + "learning_rate": 4.317082141256401e-08, + "loss": 0.0, + "num_input_tokens_seen": 126243496, + "step": 187375 + }, + { + "epoch": 4.577724574304351, + "grad_norm": 6.836828106315807e-05, + "learning_rate": 4.314603888404189e-08, + "loss": 0.0, + "num_input_tokens_seen": 126246952, + "step": 187380 + }, + { + "epoch": 4.577846725136198, + "grad_norm": 0.0005417978391051292, + "learning_rate": 4.312126331401911e-08, + "loss": 0.0, + "num_input_tokens_seen": 126250344, + "step": 187385 + }, + { + "epoch": 4.577968875968045, + "grad_norm": 7.682857358304318e-06, + "learning_rate": 4.309649470267596e-08, + "loss": 0.0, + "num_input_tokens_seen": 126253608, + "step": 187390 + }, + { + "epoch": 4.578091026799893, + "grad_norm": 0.00015603537030983716, + "learning_rate": 4.3071733050192513e-08, + "loss": 0.0, + "num_input_tokens_seen": 126257064, + "step": 187395 + }, + { + "epoch": 4.578213177631739, + "grad_norm": 4.7801844630157575e-05, + "learning_rate": 4.304697835674864e-08, + "loss": 0.0, + "num_input_tokens_seen": 126260584, + "step": 187400 + }, + { + "epoch": 4.578335328463587, + "grad_norm": 0.001868409919552505, + "learning_rate": 4.302223062252475e-08, + "loss": 0.0, + "num_input_tokens_seen": 126263848, + "step": 187405 + }, + { + "epoch": 4.578457479295434, + "grad_norm": 0.02896163798868656, + "learning_rate": 4.2997489847700354e-08, + "loss": 0.0, + "num_input_tokens_seen": 126266984, + "step": 187410 + }, + { + "epoch": 4.578579630127281, + "grad_norm": 0.004452672321349382, + "learning_rate": 4.297275603245576e-08, + "loss": 0.0, + "num_input_tokens_seen": 126269800, + "step": 187415 + }, + { + "epoch": 4.578701780959128, + "grad_norm": 0.00024759912048466504, + "learning_rate": 4.29480291769706e-08, + "loss": 0.0, + "num_input_tokens_seen": 126273000, + "step": 187420 + }, + { + "epoch": 4.578823931790976, + "grad_norm": 0.0007450595730915666, + "learning_rate": 4.2923309281424734e-08, + "loss": 0.0, + "num_input_tokens_seen": 126276584, + "step": 187425 + }, + { + "epoch": 4.5789460826228225, + "grad_norm": 8.44811656861566e-05, + "learning_rate": 4.289859634599824e-08, + "loss": 0.0, + "num_input_tokens_seen": 126280488, + "step": 187430 + }, + { + "epoch": 4.57906823345467, + "grad_norm": 0.00011353510490152985, + "learning_rate": 4.28738903708703e-08, + "loss": 0.0, + "num_input_tokens_seen": 126284328, + "step": 187435 + }, + { + "epoch": 4.579190384286517, + "grad_norm": 0.0032558231614530087, + "learning_rate": 4.2849191356221116e-08, + "loss": 0.0, + "num_input_tokens_seen": 126287464, + "step": 187440 + }, + { + "epoch": 4.579312535118364, + "grad_norm": 0.00015557577717117965, + "learning_rate": 4.282449930222987e-08, + "loss": 0.0, + "num_input_tokens_seen": 126291112, + "step": 187445 + }, + { + "epoch": 4.579434685950211, + "grad_norm": 0.00022213808551896363, + "learning_rate": 4.27998142090763e-08, + "loss": 0.0001, + "num_input_tokens_seen": 126294760, + "step": 187450 + }, + { + "epoch": 4.579556836782059, + "grad_norm": 0.04265875741839409, + "learning_rate": 4.2775136076940054e-08, + "loss": 0.0, + "num_input_tokens_seen": 126298408, + "step": 187455 + }, + { + "epoch": 4.579678987613906, + "grad_norm": 3.826288775599096e-06, + "learning_rate": 4.275046490600043e-08, + "loss": 0.0, + "num_input_tokens_seen": 126302184, + "step": 187460 + }, + { + "epoch": 4.579801138445752, + "grad_norm": 0.0010683677392080426, + "learning_rate": 4.2725800696436945e-08, + "loss": 0.0, + "num_input_tokens_seen": 126306152, + "step": 187465 + }, + { + "epoch": 4.5799232892776, + "grad_norm": 0.004442038480192423, + "learning_rate": 4.270114344842879e-08, + "loss": 0.0, + "num_input_tokens_seen": 126309736, + "step": 187470 + }, + { + "epoch": 4.580045440109447, + "grad_norm": 0.0018260219367220998, + "learning_rate": 4.26764931621556e-08, + "loss": 0.0, + "num_input_tokens_seen": 126313256, + "step": 187475 + }, + { + "epoch": 4.580167590941294, + "grad_norm": 0.00024842077982611954, + "learning_rate": 4.265184983779624e-08, + "loss": 0.0, + "num_input_tokens_seen": 126316456, + "step": 187480 + }, + { + "epoch": 4.580289741773141, + "grad_norm": 0.009610271081328392, + "learning_rate": 4.262721347553033e-08, + "loss": 0.0, + "num_input_tokens_seen": 126319976, + "step": 187485 + }, + { + "epoch": 4.580411892604989, + "grad_norm": 0.00014046119758859277, + "learning_rate": 4.260258407553663e-08, + "loss": 0.0, + "num_input_tokens_seen": 126323496, + "step": 187490 + }, + { + "epoch": 4.580534043436836, + "grad_norm": 4.547784919850528e-05, + "learning_rate": 4.257796163799454e-08, + "loss": 0.0, + "num_input_tokens_seen": 126326888, + "step": 187495 + }, + { + "epoch": 4.580656194268683, + "grad_norm": 0.00014775693125557154, + "learning_rate": 4.2553346163083146e-08, + "loss": 0.0, + "num_input_tokens_seen": 126330280, + "step": 187500 + }, + { + "epoch": 4.58077834510053, + "grad_norm": 2.9446891858242452e-05, + "learning_rate": 4.2528737650981086e-08, + "loss": 0.0, + "num_input_tokens_seen": 126333160, + "step": 187505 + }, + { + "epoch": 4.580900495932378, + "grad_norm": 0.00039331192965619266, + "learning_rate": 4.250413610186765e-08, + "loss": 0.0, + "num_input_tokens_seen": 126336488, + "step": 187510 + }, + { + "epoch": 4.581022646764224, + "grad_norm": 0.00026621881988830864, + "learning_rate": 4.2479541515921816e-08, + "loss": 0.0, + "num_input_tokens_seen": 126339368, + "step": 187515 + }, + { + "epoch": 4.581144797596072, + "grad_norm": 1.4885709788359236e-05, + "learning_rate": 4.24549538933221e-08, + "loss": 0.0, + "num_input_tokens_seen": 126342440, + "step": 187520 + }, + { + "epoch": 4.581266948427919, + "grad_norm": 5.755107486038469e-05, + "learning_rate": 4.2430373234247696e-08, + "loss": 0.0, + "num_input_tokens_seen": 126346536, + "step": 187525 + }, + { + "epoch": 4.581389099259766, + "grad_norm": 0.0004279443237464875, + "learning_rate": 4.2405799538877016e-08, + "loss": 0.0, + "num_input_tokens_seen": 126350056, + "step": 187530 + }, + { + "epoch": 4.581511250091613, + "grad_norm": 0.0019634836353361607, + "learning_rate": 4.2381232807389035e-08, + "loss": 0.0, + "num_input_tokens_seen": 126353768, + "step": 187535 + }, + { + "epoch": 4.58163340092346, + "grad_norm": 8.21991270640865e-05, + "learning_rate": 4.2356673039962265e-08, + "loss": 0.0, + "num_input_tokens_seen": 126357096, + "step": 187540 + }, + { + "epoch": 4.5817555517553075, + "grad_norm": 0.00014001928502693772, + "learning_rate": 4.233212023677524e-08, + "loss": 0.0, + "num_input_tokens_seen": 126360360, + "step": 187545 + }, + { + "epoch": 4.581877702587155, + "grad_norm": 1.1686901416396722e-05, + "learning_rate": 4.2307574398006806e-08, + "loss": 0.0, + "num_input_tokens_seen": 126364328, + "step": 187550 + }, + { + "epoch": 4.581999853419002, + "grad_norm": 0.00013509398559108377, + "learning_rate": 4.228303552383516e-08, + "loss": 0.0, + "num_input_tokens_seen": 126367464, + "step": 187555 + }, + { + "epoch": 4.582122004250849, + "grad_norm": 4.4286636693868786e-05, + "learning_rate": 4.225850361443894e-08, + "loss": 0.0, + "num_input_tokens_seen": 126370856, + "step": 187560 + }, + { + "epoch": 4.582244155082696, + "grad_norm": 2.9124090360710397e-05, + "learning_rate": 4.223397866999634e-08, + "loss": 0.0, + "num_input_tokens_seen": 126374248, + "step": 187565 + }, + { + "epoch": 4.582366305914543, + "grad_norm": 0.0018620576011016965, + "learning_rate": 4.2209460690686096e-08, + "loss": 0.0, + "num_input_tokens_seen": 126377832, + "step": 187570 + }, + { + "epoch": 4.582488456746391, + "grad_norm": 0.00014488592569250613, + "learning_rate": 4.218494967668607e-08, + "loss": 0.0, + "num_input_tokens_seen": 126381352, + "step": 187575 + }, + { + "epoch": 4.582610607578237, + "grad_norm": 9.13929907255806e-05, + "learning_rate": 4.216044562817467e-08, + "loss": 0.0, + "num_input_tokens_seen": 126384872, + "step": 187580 + }, + { + "epoch": 4.582732758410085, + "grad_norm": 0.0017216145060956478, + "learning_rate": 4.213594854533031e-08, + "loss": 0.029, + "num_input_tokens_seen": 126388264, + "step": 187585 + }, + { + "epoch": 4.582854909241932, + "grad_norm": 0.0009816106176003814, + "learning_rate": 4.211145842833097e-08, + "loss": 0.0, + "num_input_tokens_seen": 126391400, + "step": 187590 + }, + { + "epoch": 4.582977060073779, + "grad_norm": 9.432035585632548e-05, + "learning_rate": 4.2086975277354606e-08, + "loss": 0.0, + "num_input_tokens_seen": 126394344, + "step": 187595 + }, + { + "epoch": 4.583099210905626, + "grad_norm": 4.532306775217876e-05, + "learning_rate": 4.206249909257953e-08, + "loss": 0.0, + "num_input_tokens_seen": 126397544, + "step": 187600 + }, + { + "epoch": 4.583221361737474, + "grad_norm": 0.0012224335223436356, + "learning_rate": 4.203802987418348e-08, + "loss": 0.0, + "num_input_tokens_seen": 126401320, + "step": 187605 + }, + { + "epoch": 4.5833435125693205, + "grad_norm": 0.0014491344336420298, + "learning_rate": 4.201356762234476e-08, + "loss": 0.0, + "num_input_tokens_seen": 126404904, + "step": 187610 + }, + { + "epoch": 4.583465663401168, + "grad_norm": 0.0010459222830832005, + "learning_rate": 4.1989112337240784e-08, + "loss": 0.0005, + "num_input_tokens_seen": 126408360, + "step": 187615 + }, + { + "epoch": 4.583587814233015, + "grad_norm": 0.00420120544731617, + "learning_rate": 4.1964664019049855e-08, + "loss": 0.0, + "num_input_tokens_seen": 126411560, + "step": 187620 + }, + { + "epoch": 4.5837099650648625, + "grad_norm": 2.1079378711874597e-05, + "learning_rate": 4.1940222667949385e-08, + "loss": 0.0, + "num_input_tokens_seen": 126414888, + "step": 187625 + }, + { + "epoch": 4.583832115896709, + "grad_norm": 0.0007706163451075554, + "learning_rate": 4.191578828411746e-08, + "loss": 0.0, + "num_input_tokens_seen": 126418600, + "step": 187630 + }, + { + "epoch": 4.583954266728556, + "grad_norm": 0.0009319090167991817, + "learning_rate": 4.18913608677316e-08, + "loss": 0.0, + "num_input_tokens_seen": 126421736, + "step": 187635 + }, + { + "epoch": 4.584076417560404, + "grad_norm": 8.97532154340297e-05, + "learning_rate": 4.1866940418969324e-08, + "loss": 0.0, + "num_input_tokens_seen": 126425064, + "step": 187640 + }, + { + "epoch": 4.58419856839225, + "grad_norm": 0.0001152338954852894, + "learning_rate": 4.1842526938008495e-08, + "loss": 0.0, + "num_input_tokens_seen": 126428456, + "step": 187645 + }, + { + "epoch": 4.584320719224098, + "grad_norm": 0.0002968577609863132, + "learning_rate": 4.181812042502641e-08, + "loss": 0.0, + "num_input_tokens_seen": 126431848, + "step": 187650 + }, + { + "epoch": 4.584442870055945, + "grad_norm": 0.012818018905818462, + "learning_rate": 4.179372088020083e-08, + "loss": 0.0, + "num_input_tokens_seen": 126435496, + "step": 187655 + }, + { + "epoch": 4.584565020887792, + "grad_norm": 6.266115815378726e-05, + "learning_rate": 4.176932830370894e-08, + "loss": 0.0, + "num_input_tokens_seen": 126438952, + "step": 187660 + }, + { + "epoch": 4.584687171719639, + "grad_norm": 0.0003587511891964823, + "learning_rate": 4.174494269572837e-08, + "loss": 0.0, + "num_input_tokens_seen": 126442216, + "step": 187665 + }, + { + "epoch": 4.584809322551487, + "grad_norm": 2.6462874302524142e-05, + "learning_rate": 4.172056405643609e-08, + "loss": 0.0, + "num_input_tokens_seen": 126445736, + "step": 187670 + }, + { + "epoch": 4.5849314733833335, + "grad_norm": 0.002904921304434538, + "learning_rate": 4.169619238600963e-08, + "loss": 0.0, + "num_input_tokens_seen": 126449064, + "step": 187675 + }, + { + "epoch": 4.585053624215181, + "grad_norm": 9.107970981858671e-05, + "learning_rate": 4.16718276846264e-08, + "loss": 0.0, + "num_input_tokens_seen": 126452328, + "step": 187680 + }, + { + "epoch": 4.585175775047028, + "grad_norm": 1.338696984021226e-05, + "learning_rate": 4.164746995246327e-08, + "loss": 0.0, + "num_input_tokens_seen": 126455912, + "step": 187685 + }, + { + "epoch": 4.5852979258788755, + "grad_norm": 0.0004112111055292189, + "learning_rate": 4.162311918969763e-08, + "loss": 0.0, + "num_input_tokens_seen": 126459368, + "step": 187690 + }, + { + "epoch": 4.585420076710722, + "grad_norm": 1.6203335690079257e-05, + "learning_rate": 4.1598775396506246e-08, + "loss": 0.0, + "num_input_tokens_seen": 126462952, + "step": 187695 + }, + { + "epoch": 4.58554222754257, + "grad_norm": 0.004849761724472046, + "learning_rate": 4.1574438573066526e-08, + "loss": 0.0028, + "num_input_tokens_seen": 126466088, + "step": 187700 + }, + { + "epoch": 4.585664378374417, + "grad_norm": 7.962723611854017e-05, + "learning_rate": 4.155010871955522e-08, + "loss": 0.0, + "num_input_tokens_seen": 126469288, + "step": 187705 + }, + { + "epoch": 4.585786529206263, + "grad_norm": 0.00017755954468157142, + "learning_rate": 4.1525785836149294e-08, + "loss": 0.0, + "num_input_tokens_seen": 126472744, + "step": 187710 + }, + { + "epoch": 4.585908680038111, + "grad_norm": 3.693220060085878e-05, + "learning_rate": 4.150146992302572e-08, + "loss": 0.0, + "num_input_tokens_seen": 126476456, + "step": 187715 + }, + { + "epoch": 4.586030830869959, + "grad_norm": 0.0013728952035307884, + "learning_rate": 4.147716098036103e-08, + "loss": 0.0, + "num_input_tokens_seen": 126479976, + "step": 187720 + }, + { + "epoch": 4.586152981701805, + "grad_norm": 0.0031150600407272577, + "learning_rate": 4.145285900833251e-08, + "loss": 0.0, + "num_input_tokens_seen": 126482984, + "step": 187725 + }, + { + "epoch": 4.586275132533652, + "grad_norm": 0.0005813776515424252, + "learning_rate": 4.142856400711647e-08, + "loss": 0.0, + "num_input_tokens_seen": 126486504, + "step": 187730 + }, + { + "epoch": 4.5863972833655, + "grad_norm": 0.0002588435309007764, + "learning_rate": 4.1404275976889666e-08, + "loss": 0.0, + "num_input_tokens_seen": 126489704, + "step": 187735 + }, + { + "epoch": 4.5865194341973465, + "grad_norm": 3.303175981272943e-05, + "learning_rate": 4.1379994917828956e-08, + "loss": 0.0001, + "num_input_tokens_seen": 126492712, + "step": 187740 + }, + { + "epoch": 4.586641585029194, + "grad_norm": 0.0034465112257748842, + "learning_rate": 4.135572083011074e-08, + "loss": 0.0, + "num_input_tokens_seen": 126495976, + "step": 187745 + }, + { + "epoch": 4.586763735861041, + "grad_norm": 2.2768550479668193e-05, + "learning_rate": 4.133145371391156e-08, + "loss": 0.0, + "num_input_tokens_seen": 126499304, + "step": 187750 + }, + { + "epoch": 4.5868858866928885, + "grad_norm": 0.00033273472217842937, + "learning_rate": 4.130719356940782e-08, + "loss": 0.0, + "num_input_tokens_seen": 126502760, + "step": 187755 + }, + { + "epoch": 4.587008037524735, + "grad_norm": 0.0005335175665095448, + "learning_rate": 4.128294039677605e-08, + "loss": 0.0, + "num_input_tokens_seen": 126506152, + "step": 187760 + }, + { + "epoch": 4.587130188356583, + "grad_norm": 0.00044096849160268903, + "learning_rate": 4.125869419619266e-08, + "loss": 0.0, + "num_input_tokens_seen": 126509608, + "step": 187765 + }, + { + "epoch": 4.58725233918843, + "grad_norm": 3.677398490253836e-05, + "learning_rate": 4.1234454967833844e-08, + "loss": 0.0, + "num_input_tokens_seen": 126512808, + "step": 187770 + }, + { + "epoch": 4.587374490020277, + "grad_norm": 8.349936251761392e-05, + "learning_rate": 4.121022271187602e-08, + "loss": 0.0, + "num_input_tokens_seen": 126516136, + "step": 187775 + }, + { + "epoch": 4.587496640852124, + "grad_norm": 0.005426215007901192, + "learning_rate": 4.1185997428495265e-08, + "loss": 0.0001, + "num_input_tokens_seen": 126519336, + "step": 187780 + }, + { + "epoch": 4.587618791683972, + "grad_norm": 6.722656689817086e-05, + "learning_rate": 4.1161779117868004e-08, + "loss": 0.0, + "num_input_tokens_seen": 126522856, + "step": 187785 + }, + { + "epoch": 4.587740942515818, + "grad_norm": 0.00011253041884629056, + "learning_rate": 4.113756778016997e-08, + "loss": 0.0, + "num_input_tokens_seen": 126525928, + "step": 187790 + }, + { + "epoch": 4.587863093347666, + "grad_norm": 4.811697181139607e-06, + "learning_rate": 4.1113363415577583e-08, + "loss": 0.0, + "num_input_tokens_seen": 126529448, + "step": 187795 + }, + { + "epoch": 4.587985244179513, + "grad_norm": 0.005408538971096277, + "learning_rate": 4.10891660242666e-08, + "loss": 0.0, + "num_input_tokens_seen": 126532904, + "step": 187800 + }, + { + "epoch": 4.58810739501136, + "grad_norm": 0.038692690432071686, + "learning_rate": 4.10649756064132e-08, + "loss": 0.0, + "num_input_tokens_seen": 126536104, + "step": 187805 + }, + { + "epoch": 4.588229545843207, + "grad_norm": 0.000823112262878567, + "learning_rate": 4.104079216219336e-08, + "loss": 0.0, + "num_input_tokens_seen": 126539304, + "step": 187810 + }, + { + "epoch": 4.588351696675055, + "grad_norm": 0.0004939865320920944, + "learning_rate": 4.101661569178261e-08, + "loss": 0.0, + "num_input_tokens_seen": 126542504, + "step": 187815 + }, + { + "epoch": 4.588473847506902, + "grad_norm": 7.609864405822009e-05, + "learning_rate": 4.099244619535702e-08, + "loss": 0.0, + "num_input_tokens_seen": 126545576, + "step": 187820 + }, + { + "epoch": 4.588595998338748, + "grad_norm": 0.0031696229707449675, + "learning_rate": 4.0968283673092244e-08, + "loss": 0.0, + "num_input_tokens_seen": 126549224, + "step": 187825 + }, + { + "epoch": 4.588718149170596, + "grad_norm": 0.0009845405584201217, + "learning_rate": 4.0944128125164014e-08, + "loss": 0.0, + "num_input_tokens_seen": 126552744, + "step": 187830 + }, + { + "epoch": 4.588840300002443, + "grad_norm": 0.0037003783509135246, + "learning_rate": 4.091997955174831e-08, + "loss": 0.0, + "num_input_tokens_seen": 126556840, + "step": 187835 + }, + { + "epoch": 4.58896245083429, + "grad_norm": 0.002434109104797244, + "learning_rate": 4.089583795302021e-08, + "loss": 0.0, + "num_input_tokens_seen": 126560680, + "step": 187840 + }, + { + "epoch": 4.589084601666137, + "grad_norm": 0.0022978431079536676, + "learning_rate": 4.0871703329155685e-08, + "loss": 0.0, + "num_input_tokens_seen": 126563880, + "step": 187845 + }, + { + "epoch": 4.589206752497985, + "grad_norm": 8.7481748778373e-06, + "learning_rate": 4.084757568033004e-08, + "loss": 0.0, + "num_input_tokens_seen": 126567720, + "step": 187850 + }, + { + "epoch": 4.5893289033298315, + "grad_norm": 0.000550804368685931, + "learning_rate": 4.082345500671869e-08, + "loss": 0.0, + "num_input_tokens_seen": 126570856, + "step": 187855 + }, + { + "epoch": 4.589451054161679, + "grad_norm": 0.0002906092850025743, + "learning_rate": 4.079934130849738e-08, + "loss": 0.0, + "num_input_tokens_seen": 126573800, + "step": 187860 + }, + { + "epoch": 4.589573204993526, + "grad_norm": 0.0010242760181427002, + "learning_rate": 4.077523458584109e-08, + "loss": 0.0, + "num_input_tokens_seen": 126577960, + "step": 187865 + }, + { + "epoch": 4.5896953558253735, + "grad_norm": 0.00287572480738163, + "learning_rate": 4.075113483892545e-08, + "loss": 0.0436, + "num_input_tokens_seen": 126581032, + "step": 187870 + }, + { + "epoch": 4.58981750665722, + "grad_norm": 0.0011697241570800543, + "learning_rate": 4.072704206792543e-08, + "loss": 0.0, + "num_input_tokens_seen": 126584808, + "step": 187875 + }, + { + "epoch": 4.589939657489068, + "grad_norm": 0.00040922340122051537, + "learning_rate": 4.070295627301656e-08, + "loss": 0.0, + "num_input_tokens_seen": 126588840, + "step": 187880 + }, + { + "epoch": 4.590061808320915, + "grad_norm": 1.8101340174325742e-05, + "learning_rate": 4.067887745437359e-08, + "loss": 0.0, + "num_input_tokens_seen": 126592232, + "step": 187885 + }, + { + "epoch": 4.590183959152762, + "grad_norm": 0.0011178995482623577, + "learning_rate": 4.0654805612171936e-08, + "loss": 0.0, + "num_input_tokens_seen": 126595560, + "step": 187890 + }, + { + "epoch": 4.590306109984609, + "grad_norm": 5.905812940909527e-05, + "learning_rate": 4.0630740746586564e-08, + "loss": 0.0, + "num_input_tokens_seen": 126598952, + "step": 187895 + }, + { + "epoch": 4.590428260816456, + "grad_norm": 0.013189355842769146, + "learning_rate": 4.060668285779256e-08, + "loss": 0.0, + "num_input_tokens_seen": 126602344, + "step": 187900 + }, + { + "epoch": 4.590550411648303, + "grad_norm": 7.253669900819659e-05, + "learning_rate": 4.0582631945964786e-08, + "loss": 0.0, + "num_input_tokens_seen": 126605736, + "step": 187905 + }, + { + "epoch": 4.590672562480151, + "grad_norm": 0.00012215471360832453, + "learning_rate": 4.055858801127809e-08, + "loss": 0.0002, + "num_input_tokens_seen": 126609064, + "step": 187910 + }, + { + "epoch": 4.590794713311998, + "grad_norm": 3.525710053509101e-05, + "learning_rate": 4.0534551053907464e-08, + "loss": 0.0, + "num_input_tokens_seen": 126612392, + "step": 187915 + }, + { + "epoch": 4.5909168641438445, + "grad_norm": 0.0003965279902331531, + "learning_rate": 4.0510521074027636e-08, + "loss": 0.0, + "num_input_tokens_seen": 126615528, + "step": 187920 + }, + { + "epoch": 4.591039014975692, + "grad_norm": 0.0001456565223634243, + "learning_rate": 4.0486498071813256e-08, + "loss": 0.0, + "num_input_tokens_seen": 126618920, + "step": 187925 + }, + { + "epoch": 4.591161165807539, + "grad_norm": 0.00045940681593492627, + "learning_rate": 4.0462482047439295e-08, + "loss": 0.0, + "num_input_tokens_seen": 126622120, + "step": 187930 + }, + { + "epoch": 4.5912833166393865, + "grad_norm": 0.0001200491824420169, + "learning_rate": 4.043847300108016e-08, + "loss": 0.0, + "num_input_tokens_seen": 126625576, + "step": 187935 + }, + { + "epoch": 4.591405467471233, + "grad_norm": 0.0006971561233513057, + "learning_rate": 4.041447093291062e-08, + "loss": 0.0, + "num_input_tokens_seen": 126629096, + "step": 187940 + }, + { + "epoch": 4.591527618303081, + "grad_norm": 0.00014549454499501735, + "learning_rate": 4.0390475843105066e-08, + "loss": 0.0, + "num_input_tokens_seen": 126632424, + "step": 187945 + }, + { + "epoch": 4.591649769134928, + "grad_norm": 0.00033066223841160536, + "learning_rate": 4.036648773183804e-08, + "loss": 0.0, + "num_input_tokens_seen": 126635880, + "step": 187950 + }, + { + "epoch": 4.591771919966775, + "grad_norm": 5.113217775942758e-05, + "learning_rate": 4.0342506599284175e-08, + "loss": 0.0, + "num_input_tokens_seen": 126639208, + "step": 187955 + }, + { + "epoch": 4.591894070798622, + "grad_norm": 0.0005086977034807205, + "learning_rate": 4.0318532445617557e-08, + "loss": 0.0, + "num_input_tokens_seen": 126643112, + "step": 187960 + }, + { + "epoch": 4.59201622163047, + "grad_norm": 2.0289666281314567e-05, + "learning_rate": 4.0294565271012825e-08, + "loss": 0.0, + "num_input_tokens_seen": 126646440, + "step": 187965 + }, + { + "epoch": 4.592138372462316, + "grad_norm": 1.0712580660765525e-05, + "learning_rate": 4.027060507564406e-08, + "loss": 0.0, + "num_input_tokens_seen": 126650024, + "step": 187970 + }, + { + "epoch": 4.592260523294164, + "grad_norm": 0.003349565202370286, + "learning_rate": 4.0246651859685675e-08, + "loss": 0.0, + "num_input_tokens_seen": 126653352, + "step": 187975 + }, + { + "epoch": 4.592382674126011, + "grad_norm": 14.837358474731445, + "learning_rate": 4.0222705623311645e-08, + "loss": 0.0354, + "num_input_tokens_seen": 126656552, + "step": 187980 + }, + { + "epoch": 4.592504824957858, + "grad_norm": 1.570095082570333e-05, + "learning_rate": 4.019876636669628e-08, + "loss": 0.0, + "num_input_tokens_seen": 126660072, + "step": 187985 + }, + { + "epoch": 4.592626975789705, + "grad_norm": 9.002363367471844e-05, + "learning_rate": 4.017483409001376e-08, + "loss": 0.0, + "num_input_tokens_seen": 126663272, + "step": 187990 + }, + { + "epoch": 4.592749126621552, + "grad_norm": 0.000561710970941931, + "learning_rate": 4.0150908793437854e-08, + "loss": 0.0, + "num_input_tokens_seen": 126666984, + "step": 187995 + }, + { + "epoch": 4.5928712774533995, + "grad_norm": 0.00041520060040056705, + "learning_rate": 4.0126990477142854e-08, + "loss": 0.0, + "num_input_tokens_seen": 126670504, + "step": 188000 + }, + { + "epoch": 4.592993428285246, + "grad_norm": 0.001887407386675477, + "learning_rate": 4.0103079141302507e-08, + "loss": 0.0, + "num_input_tokens_seen": 126673896, + "step": 188005 + }, + { + "epoch": 4.593115579117094, + "grad_norm": 0.0003722521068993956, + "learning_rate": 4.00791747860908e-08, + "loss": 0.0, + "num_input_tokens_seen": 126677096, + "step": 188010 + }, + { + "epoch": 4.593237729948941, + "grad_norm": 0.0004346818896010518, + "learning_rate": 4.005527741168147e-08, + "loss": 0.0, + "num_input_tokens_seen": 126680616, + "step": 188015 + }, + { + "epoch": 4.593359880780788, + "grad_norm": 0.007946284487843513, + "learning_rate": 4.003138701824826e-08, + "loss": 0.0, + "num_input_tokens_seen": 126684264, + "step": 188020 + }, + { + "epoch": 4.593482031612635, + "grad_norm": 0.00010074281453853473, + "learning_rate": 4.000750360596517e-08, + "loss": 0.0, + "num_input_tokens_seen": 126687336, + "step": 188025 + }, + { + "epoch": 4.593604182444483, + "grad_norm": 0.0014380852226167917, + "learning_rate": 3.998362717500558e-08, + "loss": 0.0, + "num_input_tokens_seen": 126690856, + "step": 188030 + }, + { + "epoch": 4.593726333276329, + "grad_norm": 0.00030201810295693576, + "learning_rate": 3.995975772554339e-08, + "loss": 0.0394, + "num_input_tokens_seen": 126694632, + "step": 188035 + }, + { + "epoch": 4.593848484108177, + "grad_norm": 0.009521891362965107, + "learning_rate": 3.9935895257751984e-08, + "loss": 0.0, + "num_input_tokens_seen": 126698024, + "step": 188040 + }, + { + "epoch": 4.593970634940024, + "grad_norm": 0.0001786075154086575, + "learning_rate": 3.9912039771804903e-08, + "loss": 0.0, + "num_input_tokens_seen": 126701544, + "step": 188045 + }, + { + "epoch": 4.594092785771871, + "grad_norm": 0.00011570060451049358, + "learning_rate": 3.98881912678759e-08, + "loss": 0.0, + "num_input_tokens_seen": 126704936, + "step": 188050 + }, + { + "epoch": 4.594214936603718, + "grad_norm": 9.638779738452286e-05, + "learning_rate": 3.986434974613806e-08, + "loss": 0.0, + "num_input_tokens_seen": 126708264, + "step": 188055 + }, + { + "epoch": 4.594337087435566, + "grad_norm": 2.7568939913180657e-05, + "learning_rate": 3.984051520676501e-08, + "loss": 0.0, + "num_input_tokens_seen": 126711272, + "step": 188060 + }, + { + "epoch": 4.5944592382674125, + "grad_norm": 5.386893008108018e-06, + "learning_rate": 3.981668764992985e-08, + "loss": 0.0, + "num_input_tokens_seen": 126714728, + "step": 188065 + }, + { + "epoch": 4.594581389099259, + "grad_norm": 3.274362825322896e-05, + "learning_rate": 3.979286707580598e-08, + "loss": 0.0, + "num_input_tokens_seen": 126718376, + "step": 188070 + }, + { + "epoch": 4.594703539931107, + "grad_norm": 0.0004523637762758881, + "learning_rate": 3.976905348456683e-08, + "loss": 0.0, + "num_input_tokens_seen": 126721384, + "step": 188075 + }, + { + "epoch": 4.5948256907629546, + "grad_norm": 0.12980130314826965, + "learning_rate": 3.9745246876385255e-08, + "loss": 0.0001, + "num_input_tokens_seen": 126724776, + "step": 188080 + }, + { + "epoch": 4.594947841594801, + "grad_norm": 0.0001941550726769492, + "learning_rate": 3.972144725143456e-08, + "loss": 0.0, + "num_input_tokens_seen": 126728168, + "step": 188085 + }, + { + "epoch": 4.595069992426648, + "grad_norm": 0.12191027402877808, + "learning_rate": 3.969765460988772e-08, + "loss": 0.0, + "num_input_tokens_seen": 126731176, + "step": 188090 + }, + { + "epoch": 4.595192143258496, + "grad_norm": 1.0134815056517255e-05, + "learning_rate": 3.9673868951918045e-08, + "loss": 0.0, + "num_input_tokens_seen": 126734504, + "step": 188095 + }, + { + "epoch": 4.595314294090342, + "grad_norm": 0.0001895191235234961, + "learning_rate": 3.9650090277698054e-08, + "loss": 0.0, + "num_input_tokens_seen": 126737512, + "step": 188100 + }, + { + "epoch": 4.59543644492219, + "grad_norm": 0.000430260319262743, + "learning_rate": 3.9626318587401066e-08, + "loss": 0.0, + "num_input_tokens_seen": 126740456, + "step": 188105 + }, + { + "epoch": 4.595558595754037, + "grad_norm": 3.98890369979199e-05, + "learning_rate": 3.960255388119971e-08, + "loss": 0.0, + "num_input_tokens_seen": 126743592, + "step": 188110 + }, + { + "epoch": 4.595680746585884, + "grad_norm": 7.241523417178541e-05, + "learning_rate": 3.957879615926696e-08, + "loss": 0.0, + "num_input_tokens_seen": 126747176, + "step": 188115 + }, + { + "epoch": 4.595802897417731, + "grad_norm": 0.00023456825874745846, + "learning_rate": 3.9555045421775566e-08, + "loss": 0.0, + "num_input_tokens_seen": 126751144, + "step": 188120 + }, + { + "epoch": 4.595925048249579, + "grad_norm": 7.77715613367036e-05, + "learning_rate": 3.9531301668898066e-08, + "loss": 0.0, + "num_input_tokens_seen": 126754664, + "step": 188125 + }, + { + "epoch": 4.596047199081426, + "grad_norm": 0.00020698907610494643, + "learning_rate": 3.950756490080742e-08, + "loss": 0.0, + "num_input_tokens_seen": 126757864, + "step": 188130 + }, + { + "epoch": 4.596169349913273, + "grad_norm": 0.0008199518779292703, + "learning_rate": 3.9483835117675947e-08, + "loss": 0.0, + "num_input_tokens_seen": 126761576, + "step": 188135 + }, + { + "epoch": 4.59629150074512, + "grad_norm": 0.00011563400767045096, + "learning_rate": 3.946011231967639e-08, + "loss": 0.0, + "num_input_tokens_seen": 126764968, + "step": 188140 + }, + { + "epoch": 4.596413651576968, + "grad_norm": 0.002665347419679165, + "learning_rate": 3.9436396506981383e-08, + "loss": 0.0, + "num_input_tokens_seen": 126768360, + "step": 188145 + }, + { + "epoch": 4.596535802408814, + "grad_norm": 4.5120101276552305e-05, + "learning_rate": 3.941268767976314e-08, + "loss": 0.0, + "num_input_tokens_seen": 126771944, + "step": 188150 + }, + { + "epoch": 4.596657953240662, + "grad_norm": 0.00019696805975399911, + "learning_rate": 3.938898583819428e-08, + "loss": 0.0, + "num_input_tokens_seen": 126775400, + "step": 188155 + }, + { + "epoch": 4.596780104072509, + "grad_norm": 0.0002855850907508284, + "learning_rate": 3.936529098244701e-08, + "loss": 0.0, + "num_input_tokens_seen": 126778856, + "step": 188160 + }, + { + "epoch": 4.5969022549043554, + "grad_norm": 0.0016741352155804634, + "learning_rate": 3.934160311269374e-08, + "loss": 0.0, + "num_input_tokens_seen": 126781864, + "step": 188165 + }, + { + "epoch": 4.597024405736203, + "grad_norm": 0.00040661616367287934, + "learning_rate": 3.931792222910679e-08, + "loss": 0.0, + "num_input_tokens_seen": 126785832, + "step": 188170 + }, + { + "epoch": 4.597146556568051, + "grad_norm": 1.6207712178584188e-05, + "learning_rate": 3.929424833185824e-08, + "loss": 0.0, + "num_input_tokens_seen": 126789352, + "step": 188175 + }, + { + "epoch": 4.5972687073998975, + "grad_norm": 6.939090962987393e-05, + "learning_rate": 3.9270581421120386e-08, + "loss": 0.0, + "num_input_tokens_seen": 126792424, + "step": 188180 + }, + { + "epoch": 4.597390858231744, + "grad_norm": 0.5053022503852844, + "learning_rate": 3.92469214970651e-08, + "loss": 0.0002, + "num_input_tokens_seen": 126795432, + "step": 188185 + }, + { + "epoch": 4.597513009063592, + "grad_norm": 0.0003090602986048907, + "learning_rate": 3.9223268559864796e-08, + "loss": 0.0, + "num_input_tokens_seen": 126798568, + "step": 188190 + }, + { + "epoch": 4.597635159895439, + "grad_norm": 0.00018028549675364047, + "learning_rate": 3.919962260969123e-08, + "loss": 0.0, + "num_input_tokens_seen": 126801832, + "step": 188195 + }, + { + "epoch": 4.597757310727286, + "grad_norm": 0.0002370340225752443, + "learning_rate": 3.917598364671637e-08, + "loss": 0.0, + "num_input_tokens_seen": 126805416, + "step": 188200 + }, + { + "epoch": 4.597879461559133, + "grad_norm": 0.0002550011849962175, + "learning_rate": 3.9152351671112305e-08, + "loss": 0.0, + "num_input_tokens_seen": 126808680, + "step": 188205 + }, + { + "epoch": 4.598001612390981, + "grad_norm": 1.735908335831482e-05, + "learning_rate": 3.9128726683050675e-08, + "loss": 0.0, + "num_input_tokens_seen": 126812072, + "step": 188210 + }, + { + "epoch": 4.598123763222827, + "grad_norm": 0.0027028501499444246, + "learning_rate": 3.9105108682703447e-08, + "loss": 0.0, + "num_input_tokens_seen": 126815528, + "step": 188215 + }, + { + "epoch": 4.598245914054675, + "grad_norm": 0.0037059872411191463, + "learning_rate": 3.908149767024238e-08, + "loss": 0.0, + "num_input_tokens_seen": 126818344, + "step": 188220 + }, + { + "epoch": 4.598368064886522, + "grad_norm": 9.531846046447754, + "learning_rate": 3.9057893645839005e-08, + "loss": 0.0488, + "num_input_tokens_seen": 126821800, + "step": 188225 + }, + { + "epoch": 4.598490215718369, + "grad_norm": 0.00041096939821727574, + "learning_rate": 3.903429660966517e-08, + "loss": 0.0001, + "num_input_tokens_seen": 126825384, + "step": 188230 + }, + { + "epoch": 4.598612366550216, + "grad_norm": 0.00016224953287746757, + "learning_rate": 3.901070656189231e-08, + "loss": 0.0, + "num_input_tokens_seen": 126828584, + "step": 188235 + }, + { + "epoch": 4.598734517382064, + "grad_norm": 0.0003495425044093281, + "learning_rate": 3.898712350269218e-08, + "loss": 0.0, + "num_input_tokens_seen": 126832232, + "step": 188240 + }, + { + "epoch": 4.5988566682139105, + "grad_norm": 0.00017650242079980671, + "learning_rate": 3.8963547432236064e-08, + "loss": 0.0, + "num_input_tokens_seen": 126835624, + "step": 188245 + }, + { + "epoch": 4.598978819045758, + "grad_norm": 0.00018021099094767123, + "learning_rate": 3.8939978350695625e-08, + "loss": 0.0, + "num_input_tokens_seen": 126839336, + "step": 188250 + }, + { + "epoch": 4.599100969877605, + "grad_norm": 2.55699414992705e-05, + "learning_rate": 3.8916416258242045e-08, + "loss": 0.0, + "num_input_tokens_seen": 126842600, + "step": 188255 + }, + { + "epoch": 4.599223120709452, + "grad_norm": 1.4513319911202416e-05, + "learning_rate": 3.889286115504686e-08, + "loss": 0.0, + "num_input_tokens_seen": 126845608, + "step": 188260 + }, + { + "epoch": 4.599345271541299, + "grad_norm": 8.852702740114182e-05, + "learning_rate": 3.886931304128127e-08, + "loss": 0.0, + "num_input_tokens_seen": 126848936, + "step": 188265 + }, + { + "epoch": 4.599467422373146, + "grad_norm": 0.0014767482643947005, + "learning_rate": 3.8845771917116466e-08, + "loss": 0.0, + "num_input_tokens_seen": 126851816, + "step": 188270 + }, + { + "epoch": 4.599589573204994, + "grad_norm": 0.0007634480716660619, + "learning_rate": 3.882223778272398e-08, + "loss": 0.0, + "num_input_tokens_seen": 126855272, + "step": 188275 + }, + { + "epoch": 4.59971172403684, + "grad_norm": 3.212130468455143e-05, + "learning_rate": 3.879871063827445e-08, + "loss": 0.0, + "num_input_tokens_seen": 126858792, + "step": 188280 + }, + { + "epoch": 4.599833874868688, + "grad_norm": 0.001050909049808979, + "learning_rate": 3.87751904839394e-08, + "loss": 0.0, + "num_input_tokens_seen": 126862504, + "step": 188285 + }, + { + "epoch": 4.599956025700535, + "grad_norm": 9.944814519258216e-05, + "learning_rate": 3.8751677319889485e-08, + "loss": 0.0, + "num_input_tokens_seen": 126865320, + "step": 188290 + }, + { + "epoch": 4.600078176532382, + "grad_norm": 0.000473526946734637, + "learning_rate": 3.872817114629601e-08, + "loss": 0.0, + "num_input_tokens_seen": 126868264, + "step": 188295 + }, + { + "epoch": 4.600200327364229, + "grad_norm": 5.8090405218536034e-05, + "learning_rate": 3.8704671963329935e-08, + "loss": 0.0, + "num_input_tokens_seen": 126871464, + "step": 188300 + }, + { + "epoch": 4.600322478196077, + "grad_norm": 0.00030619362951256335, + "learning_rate": 3.868117977116192e-08, + "loss": 0.0, + "num_input_tokens_seen": 126874728, + "step": 188305 + }, + { + "epoch": 4.6004446290279235, + "grad_norm": 0.0002061193372355774, + "learning_rate": 3.865769456996304e-08, + "loss": 0.0, + "num_input_tokens_seen": 126878120, + "step": 188310 + }, + { + "epoch": 4.600566779859771, + "grad_norm": 0.00028244717395864427, + "learning_rate": 3.863421635990394e-08, + "loss": 0.0682, + "num_input_tokens_seen": 126881448, + "step": 188315 + }, + { + "epoch": 4.600688930691618, + "grad_norm": 0.003838619915768504, + "learning_rate": 3.861074514115536e-08, + "loss": 0.0, + "num_input_tokens_seen": 126884584, + "step": 188320 + }, + { + "epoch": 4.6008110815234655, + "grad_norm": 4.583996633300558e-05, + "learning_rate": 3.858728091388819e-08, + "loss": 0.0, + "num_input_tokens_seen": 126887784, + "step": 188325 + }, + { + "epoch": 4.600933232355312, + "grad_norm": 0.00048598088324069977, + "learning_rate": 3.85638236782726e-08, + "loss": 0.0, + "num_input_tokens_seen": 126890920, + "step": 188330 + }, + { + "epoch": 4.601055383187159, + "grad_norm": 5.8064073527930304e-05, + "learning_rate": 3.854037343447969e-08, + "loss": 0.0, + "num_input_tokens_seen": 126893928, + "step": 188335 + }, + { + "epoch": 4.601177534019007, + "grad_norm": 0.00011497107334434986, + "learning_rate": 3.8516930182679765e-08, + "loss": 0.0, + "num_input_tokens_seen": 126897256, + "step": 188340 + }, + { + "epoch": 4.601299684850854, + "grad_norm": 2.2357677153195255e-05, + "learning_rate": 3.849349392304335e-08, + "loss": 0.0, + "num_input_tokens_seen": 126900392, + "step": 188345 + }, + { + "epoch": 4.601421835682701, + "grad_norm": 0.0005386814009398222, + "learning_rate": 3.8470064655740655e-08, + "loss": 0.0, + "num_input_tokens_seen": 126903656, + "step": 188350 + }, + { + "epoch": 4.601543986514548, + "grad_norm": 0.00010427878441987559, + "learning_rate": 3.844664238094242e-08, + "loss": 0.0, + "num_input_tokens_seen": 126906792, + "step": 188355 + }, + { + "epoch": 4.601666137346395, + "grad_norm": 0.00020836573094129562, + "learning_rate": 3.842322709881884e-08, + "loss": 0.0, + "num_input_tokens_seen": 126910440, + "step": 188360 + }, + { + "epoch": 4.601788288178242, + "grad_norm": 0.0036403266713023186, + "learning_rate": 3.839981880954013e-08, + "loss": 0.0, + "num_input_tokens_seen": 126913704, + "step": 188365 + }, + { + "epoch": 4.60191043901009, + "grad_norm": 2.5985518732341006e-05, + "learning_rate": 3.837641751327669e-08, + "loss": 0.0, + "num_input_tokens_seen": 126916904, + "step": 188370 + }, + { + "epoch": 4.6020325898419365, + "grad_norm": 2.1882120563532226e-05, + "learning_rate": 3.835302321019851e-08, + "loss": 0.0, + "num_input_tokens_seen": 126920296, + "step": 188375 + }, + { + "epoch": 4.602154740673784, + "grad_norm": 0.002450756961479783, + "learning_rate": 3.832963590047589e-08, + "loss": 0.0, + "num_input_tokens_seen": 126924456, + "step": 188380 + }, + { + "epoch": 4.602276891505631, + "grad_norm": 0.0028287044260650873, + "learning_rate": 3.830625558427869e-08, + "loss": 0.0005, + "num_input_tokens_seen": 126927656, + "step": 188385 + }, + { + "epoch": 4.6023990423374785, + "grad_norm": 0.00017941469559445977, + "learning_rate": 3.8282882261777e-08, + "loss": 0.0, + "num_input_tokens_seen": 126931240, + "step": 188390 + }, + { + "epoch": 4.602521193169325, + "grad_norm": 0.0003415448300074786, + "learning_rate": 3.825951593314103e-08, + "loss": 0.0, + "num_input_tokens_seen": 126934568, + "step": 188395 + }, + { + "epoch": 4.602643344001173, + "grad_norm": 0.0003005169564858079, + "learning_rate": 3.82361565985404e-08, + "loss": 0.0, + "num_input_tokens_seen": 126938024, + "step": 188400 + }, + { + "epoch": 4.60276549483302, + "grad_norm": 0.0007993972394615412, + "learning_rate": 3.8212804258145324e-08, + "loss": 0.0, + "num_input_tokens_seen": 126941544, + "step": 188405 + }, + { + "epoch": 4.602887645664867, + "grad_norm": 0.0006093989941291511, + "learning_rate": 3.818945891212522e-08, + "loss": 0.0, + "num_input_tokens_seen": 126944488, + "step": 188410 + }, + { + "epoch": 4.603009796496714, + "grad_norm": 0.00032665004255250096, + "learning_rate": 3.816612056065016e-08, + "loss": 0.0, + "num_input_tokens_seen": 126947816, + "step": 188415 + }, + { + "epoch": 4.603131947328562, + "grad_norm": 0.00045783931273035705, + "learning_rate": 3.814278920388969e-08, + "loss": 0.0, + "num_input_tokens_seen": 126951400, + "step": 188420 + }, + { + "epoch": 4.603254098160408, + "grad_norm": 0.03975209966301918, + "learning_rate": 3.811946484201378e-08, + "loss": 0.0764, + "num_input_tokens_seen": 126954536, + "step": 188425 + }, + { + "epoch": 4.603376248992255, + "grad_norm": 0.0011893432820215821, + "learning_rate": 3.809614747519174e-08, + "loss": 0.0, + "num_input_tokens_seen": 126957288, + "step": 188430 + }, + { + "epoch": 4.603498399824103, + "grad_norm": 9.247218258678913e-06, + "learning_rate": 3.8072837103593106e-08, + "loss": 0.0, + "num_input_tokens_seen": 126960744, + "step": 188435 + }, + { + "epoch": 4.60362055065595, + "grad_norm": 9.148565732175484e-05, + "learning_rate": 3.804953372738762e-08, + "loss": 0.0, + "num_input_tokens_seen": 126963688, + "step": 188440 + }, + { + "epoch": 4.603742701487797, + "grad_norm": 3.173185177729465e-05, + "learning_rate": 3.8026237346744596e-08, + "loss": 0.0, + "num_input_tokens_seen": 126967528, + "step": 188445 + }, + { + "epoch": 4.603864852319644, + "grad_norm": 0.00013901907368563116, + "learning_rate": 3.8002947961833565e-08, + "loss": 0.0, + "num_input_tokens_seen": 126970856, + "step": 188450 + }, + { + "epoch": 4.603987003151492, + "grad_norm": 0.0020034005865454674, + "learning_rate": 3.797966557282384e-08, + "loss": 0.062, + "num_input_tokens_seen": 126974056, + "step": 188455 + }, + { + "epoch": 4.604109153983338, + "grad_norm": 0.012726671993732452, + "learning_rate": 3.795639017988472e-08, + "loss": 0.0, + "num_input_tokens_seen": 126978408, + "step": 188460 + }, + { + "epoch": 4.604231304815186, + "grad_norm": 0.00035635806852951646, + "learning_rate": 3.793312178318553e-08, + "loss": 0.0, + "num_input_tokens_seen": 126981736, + "step": 188465 + }, + { + "epoch": 4.604353455647033, + "grad_norm": 6.302624387899414e-05, + "learning_rate": 3.7909860382895455e-08, + "loss": 0.0002, + "num_input_tokens_seen": 126984936, + "step": 188470 + }, + { + "epoch": 4.60447560647888, + "grad_norm": 1.5507221178268082e-05, + "learning_rate": 3.788660597918347e-08, + "loss": 0.0, + "num_input_tokens_seen": 126988072, + "step": 188475 + }, + { + "epoch": 4.604597757310727, + "grad_norm": 0.00047399455797858536, + "learning_rate": 3.786335857221912e-08, + "loss": 0.0, + "num_input_tokens_seen": 126991464, + "step": 188480 + }, + { + "epoch": 4.604719908142575, + "grad_norm": 3.249979636166245e-05, + "learning_rate": 3.7840118162171033e-08, + "loss": 0.0, + "num_input_tokens_seen": 126994792, + "step": 188485 + }, + { + "epoch": 4.6048420589744214, + "grad_norm": 0.014283166266977787, + "learning_rate": 3.781688474920852e-08, + "loss": 0.0, + "num_input_tokens_seen": 126997800, + "step": 188490 + }, + { + "epoch": 4.604964209806269, + "grad_norm": 9.774186037248e-05, + "learning_rate": 3.779365833350035e-08, + "loss": 0.0001, + "num_input_tokens_seen": 127001000, + "step": 188495 + }, + { + "epoch": 4.605086360638116, + "grad_norm": 1.9654957213788293e-05, + "learning_rate": 3.7770438915215586e-08, + "loss": 0.0, + "num_input_tokens_seen": 127004648, + "step": 188500 + }, + { + "epoch": 4.6052085114699635, + "grad_norm": 0.0008767848485149443, + "learning_rate": 3.7747226494522775e-08, + "loss": 0.0, + "num_input_tokens_seen": 127007976, + "step": 188505 + }, + { + "epoch": 4.60533066230181, + "grad_norm": 0.0007073588203638792, + "learning_rate": 3.7724021071591116e-08, + "loss": 0.0, + "num_input_tokens_seen": 127011624, + "step": 188510 + }, + { + "epoch": 4.605452813133658, + "grad_norm": 0.0001672497164690867, + "learning_rate": 3.770082264658925e-08, + "loss": 0.0, + "num_input_tokens_seen": 127014760, + "step": 188515 + }, + { + "epoch": 4.605574963965505, + "grad_norm": 0.0004079336067661643, + "learning_rate": 3.7677631219685704e-08, + "loss": 0.0, + "num_input_tokens_seen": 127018152, + "step": 188520 + }, + { + "epoch": 4.605697114797351, + "grad_norm": 0.0006070801755413413, + "learning_rate": 3.765444679104934e-08, + "loss": 0.0, + "num_input_tokens_seen": 127021544, + "step": 188525 + }, + { + "epoch": 4.605819265629199, + "grad_norm": 7.90668127592653e-05, + "learning_rate": 3.7631269360848706e-08, + "loss": 0.0, + "num_input_tokens_seen": 127024936, + "step": 188530 + }, + { + "epoch": 4.605941416461046, + "grad_norm": 7.476261089323089e-05, + "learning_rate": 3.7608098929252205e-08, + "loss": 0.0, + "num_input_tokens_seen": 127028136, + "step": 188535 + }, + { + "epoch": 4.606063567292893, + "grad_norm": 0.0001280046271858737, + "learning_rate": 3.7584935496428604e-08, + "loss": 0.0, + "num_input_tokens_seen": 127031336, + "step": 188540 + }, + { + "epoch": 4.60618571812474, + "grad_norm": 0.023287976160645485, + "learning_rate": 3.756177906254609e-08, + "loss": 0.0, + "num_input_tokens_seen": 127035048, + "step": 188545 + }, + { + "epoch": 4.606307868956588, + "grad_norm": 0.003366853343322873, + "learning_rate": 3.753862962777321e-08, + "loss": 0.0, + "num_input_tokens_seen": 127038568, + "step": 188550 + }, + { + "epoch": 4.6064300197884345, + "grad_norm": 0.0007673184736631811, + "learning_rate": 3.751548719227826e-08, + "loss": 0.0, + "num_input_tokens_seen": 127041704, + "step": 188555 + }, + { + "epoch": 4.606552170620282, + "grad_norm": 0.0015113946283236146, + "learning_rate": 3.749235175622967e-08, + "loss": 0.0, + "num_input_tokens_seen": 127045416, + "step": 188560 + }, + { + "epoch": 4.606674321452129, + "grad_norm": 0.00047893551527522504, + "learning_rate": 3.746922331979552e-08, + "loss": 0.0, + "num_input_tokens_seen": 127048744, + "step": 188565 + }, + { + "epoch": 4.6067964722839765, + "grad_norm": 0.0014517490053549409, + "learning_rate": 3.744610188314401e-08, + "loss": 0.0, + "num_input_tokens_seen": 127052264, + "step": 188570 + }, + { + "epoch": 4.606918623115823, + "grad_norm": 0.00027874510851688683, + "learning_rate": 3.7422987446443455e-08, + "loss": 0.0005, + "num_input_tokens_seen": 127055528, + "step": 188575 + }, + { + "epoch": 4.607040773947671, + "grad_norm": 1.3206047697167378e-05, + "learning_rate": 3.739988000986172e-08, + "loss": 0.0, + "num_input_tokens_seen": 127059048, + "step": 188580 + }, + { + "epoch": 4.607162924779518, + "grad_norm": 0.0002179906005039811, + "learning_rate": 3.7376779573567106e-08, + "loss": 0.0, + "num_input_tokens_seen": 127062504, + "step": 188585 + }, + { + "epoch": 4.607285075611365, + "grad_norm": 0.0002854499325621873, + "learning_rate": 3.735368613772727e-08, + "loss": 0.0, + "num_input_tokens_seen": 127065896, + "step": 188590 + }, + { + "epoch": 4.607407226443212, + "grad_norm": 0.0003474639088381082, + "learning_rate": 3.733059970251051e-08, + "loss": 0.0, + "num_input_tokens_seen": 127069160, + "step": 188595 + }, + { + "epoch": 4.60752937727506, + "grad_norm": 0.0037290072068572044, + "learning_rate": 3.7307520268084483e-08, + "loss": 0.0, + "num_input_tokens_seen": 127072552, + "step": 188600 + }, + { + "epoch": 4.607651528106906, + "grad_norm": 0.002940698992460966, + "learning_rate": 3.728444783461704e-08, + "loss": 0.0, + "num_input_tokens_seen": 127076008, + "step": 188605 + }, + { + "epoch": 4.607773678938754, + "grad_norm": 0.00025564880343154073, + "learning_rate": 3.726138240227628e-08, + "loss": 0.0, + "num_input_tokens_seen": 127079336, + "step": 188610 + }, + { + "epoch": 4.607895829770601, + "grad_norm": 0.000302201573504135, + "learning_rate": 3.723832397122939e-08, + "loss": 0.0, + "num_input_tokens_seen": 127082664, + "step": 188615 + }, + { + "epoch": 4.6080179806024475, + "grad_norm": 0.0005223817424848676, + "learning_rate": 3.721527254164458e-08, + "loss": 0.0, + "num_input_tokens_seen": 127085672, + "step": 188620 + }, + { + "epoch": 4.608140131434295, + "grad_norm": 0.00040818838169798255, + "learning_rate": 3.719222811368916e-08, + "loss": 0.0, + "num_input_tokens_seen": 127088872, + "step": 188625 + }, + { + "epoch": 4.608262282266142, + "grad_norm": 0.0005449874443002045, + "learning_rate": 3.716919068753088e-08, + "loss": 0.0, + "num_input_tokens_seen": 127092328, + "step": 188630 + }, + { + "epoch": 4.6083844330979895, + "grad_norm": 3.8353104173438624e-05, + "learning_rate": 3.714616026333728e-08, + "loss": 0.0, + "num_input_tokens_seen": 127095656, + "step": 188635 + }, + { + "epoch": 4.608506583929836, + "grad_norm": 0.0029568178579211235, + "learning_rate": 3.712313684127566e-08, + "loss": 0.0446, + "num_input_tokens_seen": 127099176, + "step": 188640 + }, + { + "epoch": 4.608628734761684, + "grad_norm": 0.00016009059618227184, + "learning_rate": 3.710012042151367e-08, + "loss": 0.0, + "num_input_tokens_seen": 127102440, + "step": 188645 + }, + { + "epoch": 4.608750885593531, + "grad_norm": 9.877441334538162e-05, + "learning_rate": 3.70771110042184e-08, + "loss": 0.0, + "num_input_tokens_seen": 127106216, + "step": 188650 + }, + { + "epoch": 4.608873036425378, + "grad_norm": 0.00012277076893951744, + "learning_rate": 3.70541085895576e-08, + "loss": 0.0, + "num_input_tokens_seen": 127109672, + "step": 188655 + }, + { + "epoch": 4.608995187257225, + "grad_norm": 0.0019494864391162992, + "learning_rate": 3.703111317769814e-08, + "loss": 0.0329, + "num_input_tokens_seen": 127112424, + "step": 188660 + }, + { + "epoch": 4.609117338089073, + "grad_norm": 0.03183303773403168, + "learning_rate": 3.700812476880744e-08, + "loss": 0.0, + "num_input_tokens_seen": 127115624, + "step": 188665 + }, + { + "epoch": 4.609239488920919, + "grad_norm": 0.000619585276581347, + "learning_rate": 3.6985143363052806e-08, + "loss": 0.0, + "num_input_tokens_seen": 127119016, + "step": 188670 + }, + { + "epoch": 4.609361639752767, + "grad_norm": 0.0004196378868073225, + "learning_rate": 3.696216896060112e-08, + "loss": 0.0, + "num_input_tokens_seen": 127122600, + "step": 188675 + }, + { + "epoch": 4.609483790584614, + "grad_norm": 0.00010272400686517358, + "learning_rate": 3.693920156161967e-08, + "loss": 0.0, + "num_input_tokens_seen": 127126312, + "step": 188680 + }, + { + "epoch": 4.609605941416461, + "grad_norm": 0.06050526350736618, + "learning_rate": 3.691624116627523e-08, + "loss": 0.0, + "num_input_tokens_seen": 127130536, + "step": 188685 + }, + { + "epoch": 4.609728092248308, + "grad_norm": 0.0011279457248747349, + "learning_rate": 3.6893287774735106e-08, + "loss": 0.0512, + "num_input_tokens_seen": 127133800, + "step": 188690 + }, + { + "epoch": 4.609850243080155, + "grad_norm": 0.0002741161733865738, + "learning_rate": 3.687034138716594e-08, + "loss": 0.0, + "num_input_tokens_seen": 127137832, + "step": 188695 + }, + { + "epoch": 4.6099723939120025, + "grad_norm": 0.019624967128038406, + "learning_rate": 3.6847402003734596e-08, + "loss": 0.0001, + "num_input_tokens_seen": 127141160, + "step": 188700 + }, + { + "epoch": 4.61009454474385, + "grad_norm": 1.8829721739166416e-05, + "learning_rate": 3.682446962460817e-08, + "loss": 0.0001, + "num_input_tokens_seen": 127144552, + "step": 188705 + }, + { + "epoch": 4.610216695575697, + "grad_norm": 0.00046129096881486475, + "learning_rate": 3.680154424995319e-08, + "loss": 0.0, + "num_input_tokens_seen": 127148136, + "step": 188710 + }, + { + "epoch": 4.610338846407544, + "grad_norm": 6.336476508295164e-05, + "learning_rate": 3.677862587993652e-08, + "loss": 0.0, + "num_input_tokens_seen": 127151656, + "step": 188715 + }, + { + "epoch": 4.610460997239391, + "grad_norm": 1.540117955300957e-05, + "learning_rate": 3.675571451472459e-08, + "loss": 0.0, + "num_input_tokens_seen": 127155112, + "step": 188720 + }, + { + "epoch": 4.610583148071238, + "grad_norm": 4.7324389015557244e-05, + "learning_rate": 3.673281015448437e-08, + "loss": 0.0, + "num_input_tokens_seen": 127158632, + "step": 188725 + }, + { + "epoch": 4.610705298903086, + "grad_norm": 0.00022235990036278963, + "learning_rate": 3.670991279938218e-08, + "loss": 0.0, + "num_input_tokens_seen": 127162216, + "step": 188730 + }, + { + "epoch": 4.610827449734932, + "grad_norm": 8.080943371169269e-05, + "learning_rate": 3.668702244958466e-08, + "loss": 0.0, + "num_input_tokens_seen": 127165864, + "step": 188735 + }, + { + "epoch": 4.61094960056678, + "grad_norm": 0.0002779005444608629, + "learning_rate": 3.6664139105258115e-08, + "loss": 0.0, + "num_input_tokens_seen": 127169256, + "step": 188740 + }, + { + "epoch": 4.611071751398627, + "grad_norm": 0.00119435612577945, + "learning_rate": 3.664126276656909e-08, + "loss": 0.0, + "num_input_tokens_seen": 127172648, + "step": 188745 + }, + { + "epoch": 4.611193902230474, + "grad_norm": 0.04748645797371864, + "learning_rate": 3.6618393433684006e-08, + "loss": 0.0, + "num_input_tokens_seen": 127176040, + "step": 188750 + }, + { + "epoch": 4.611316053062321, + "grad_norm": 0.0005814972682856023, + "learning_rate": 3.6595531106768826e-08, + "loss": 0.0001, + "num_input_tokens_seen": 127179176, + "step": 188755 + }, + { + "epoch": 4.611438203894169, + "grad_norm": 9.456976113142446e-05, + "learning_rate": 3.657267578599021e-08, + "loss": 0.0001, + "num_input_tokens_seen": 127182184, + "step": 188760 + }, + { + "epoch": 4.6115603547260156, + "grad_norm": 0.0006560469628311694, + "learning_rate": 3.654982747151436e-08, + "loss": 0.0, + "num_input_tokens_seen": 127185512, + "step": 188765 + }, + { + "epoch": 4.611682505557863, + "grad_norm": 4.1001934732776135e-05, + "learning_rate": 3.652698616350713e-08, + "loss": 0.0002, + "num_input_tokens_seen": 127189096, + "step": 188770 + }, + { + "epoch": 4.61180465638971, + "grad_norm": 0.001413383288308978, + "learning_rate": 3.650415186213485e-08, + "loss": 0.0, + "num_input_tokens_seen": 127192744, + "step": 188775 + }, + { + "epoch": 4.611926807221558, + "grad_norm": 0.06415407359600067, + "learning_rate": 3.648132456756348e-08, + "loss": 0.0, + "num_input_tokens_seen": 127195944, + "step": 188780 + }, + { + "epoch": 4.612048958053404, + "grad_norm": 0.47165924310684204, + "learning_rate": 3.645850427995911e-08, + "loss": 0.0002, + "num_input_tokens_seen": 127199912, + "step": 188785 + }, + { + "epoch": 4.612171108885251, + "grad_norm": 0.0006177661707624793, + "learning_rate": 3.643569099948773e-08, + "loss": 0.0, + "num_input_tokens_seen": 127203560, + "step": 188790 + }, + { + "epoch": 4.612293259717099, + "grad_norm": 0.0027711568400263786, + "learning_rate": 3.6412884726315095e-08, + "loss": 0.0, + "num_input_tokens_seen": 127206760, + "step": 188795 + }, + { + "epoch": 4.612415410548946, + "grad_norm": 5.98339902353473e-05, + "learning_rate": 3.639008546060718e-08, + "loss": 0.0, + "num_input_tokens_seen": 127210344, + "step": 188800 + }, + { + "epoch": 4.612537561380793, + "grad_norm": 0.0010224354919046164, + "learning_rate": 3.636729320252962e-08, + "loss": 0.0, + "num_input_tokens_seen": 127214504, + "step": 188805 + }, + { + "epoch": 4.61265971221264, + "grad_norm": 2.919272446888499e-05, + "learning_rate": 3.6344507952248525e-08, + "loss": 0.0002, + "num_input_tokens_seen": 127217896, + "step": 188810 + }, + { + "epoch": 4.6127818630444875, + "grad_norm": 6.360562838381156e-05, + "learning_rate": 3.632172970992919e-08, + "loss": 0.0, + "num_input_tokens_seen": 127221288, + "step": 188815 + }, + { + "epoch": 4.612904013876334, + "grad_norm": 0.00672942353412509, + "learning_rate": 3.6298958475737384e-08, + "loss": 0.0, + "num_input_tokens_seen": 127224808, + "step": 188820 + }, + { + "epoch": 4.613026164708182, + "grad_norm": 0.0002779511851258576, + "learning_rate": 3.6276194249838855e-08, + "loss": 0.0, + "num_input_tokens_seen": 127228456, + "step": 188825 + }, + { + "epoch": 4.613148315540029, + "grad_norm": 0.0013309585629031062, + "learning_rate": 3.625343703239903e-08, + "loss": 0.0, + "num_input_tokens_seen": 127231976, + "step": 188830 + }, + { + "epoch": 4.613270466371876, + "grad_norm": 0.00028830976225435734, + "learning_rate": 3.623068682358354e-08, + "loss": 0.0, + "num_input_tokens_seen": 127235368, + "step": 188835 + }, + { + "epoch": 4.613392617203723, + "grad_norm": 0.0012144579086452723, + "learning_rate": 3.620794362355761e-08, + "loss": 0.0, + "num_input_tokens_seen": 127238568, + "step": 188840 + }, + { + "epoch": 4.613514768035571, + "grad_norm": 7.327213825192302e-05, + "learning_rate": 3.6185207432486764e-08, + "loss": 0.0, + "num_input_tokens_seen": 127241896, + "step": 188845 + }, + { + "epoch": 4.613636918867417, + "grad_norm": 0.0004144865379203111, + "learning_rate": 3.616247825053631e-08, + "loss": 0.0, + "num_input_tokens_seen": 127245224, + "step": 188850 + }, + { + "epoch": 4.613759069699265, + "grad_norm": 8.64536632434465e-05, + "learning_rate": 3.6139756077871563e-08, + "loss": 0.0164, + "num_input_tokens_seen": 127248488, + "step": 188855 + }, + { + "epoch": 4.613881220531112, + "grad_norm": 7.078130875015631e-05, + "learning_rate": 3.6117040914657726e-08, + "loss": 0.0, + "num_input_tokens_seen": 127251624, + "step": 188860 + }, + { + "epoch": 4.614003371362959, + "grad_norm": 0.00017707333608996123, + "learning_rate": 3.6094332761059995e-08, + "loss": 0.0, + "num_input_tokens_seen": 127254824, + "step": 188865 + }, + { + "epoch": 4.614125522194806, + "grad_norm": 0.00012071825767634436, + "learning_rate": 3.6071631617243694e-08, + "loss": 0.0, + "num_input_tokens_seen": 127258216, + "step": 188870 + }, + { + "epoch": 4.614247673026654, + "grad_norm": 0.006401208695024252, + "learning_rate": 3.604893748337356e-08, + "loss": 0.0, + "num_input_tokens_seen": 127261736, + "step": 188875 + }, + { + "epoch": 4.6143698238585005, + "grad_norm": 6.188951374497265e-05, + "learning_rate": 3.6026250359614926e-08, + "loss": 0.0, + "num_input_tokens_seen": 127265256, + "step": 188880 + }, + { + "epoch": 4.614491974690347, + "grad_norm": 3.674550316645764e-05, + "learning_rate": 3.600357024613265e-08, + "loss": 0.0, + "num_input_tokens_seen": 127269096, + "step": 188885 + }, + { + "epoch": 4.614614125522195, + "grad_norm": 0.0004047330003231764, + "learning_rate": 3.598089714309172e-08, + "loss": 0.0, + "num_input_tokens_seen": 127272744, + "step": 188890 + }, + { + "epoch": 4.614736276354042, + "grad_norm": 0.0006450513028539717, + "learning_rate": 3.5958231050656985e-08, + "loss": 0.0, + "num_input_tokens_seen": 127276136, + "step": 188895 + }, + { + "epoch": 4.614858427185889, + "grad_norm": 0.00035003412631340325, + "learning_rate": 3.5935571968993215e-08, + "loss": 0.0, + "num_input_tokens_seen": 127279400, + "step": 188900 + }, + { + "epoch": 4.614980578017736, + "grad_norm": 9.33985211304389e-05, + "learning_rate": 3.5912919898265394e-08, + "loss": 0.0, + "num_input_tokens_seen": 127283112, + "step": 188905 + }, + { + "epoch": 4.615102728849584, + "grad_norm": 1.889674058475066e-05, + "learning_rate": 3.5890274838638044e-08, + "loss": 0.0, + "num_input_tokens_seen": 127286632, + "step": 188910 + }, + { + "epoch": 4.61522487968143, + "grad_norm": 0.001484995591454208, + "learning_rate": 3.5867636790275933e-08, + "loss": 0.0, + "num_input_tokens_seen": 127289832, + "step": 188915 + }, + { + "epoch": 4.615347030513278, + "grad_norm": 0.002284928457811475, + "learning_rate": 3.5845005753343704e-08, + "loss": 0.0, + "num_input_tokens_seen": 127292840, + "step": 188920 + }, + { + "epoch": 4.615469181345125, + "grad_norm": 0.0001229102781508118, + "learning_rate": 3.582238172800589e-08, + "loss": 0.0, + "num_input_tokens_seen": 127296104, + "step": 188925 + }, + { + "epoch": 4.615591332176972, + "grad_norm": 2.021228283410892e-05, + "learning_rate": 3.579976471442714e-08, + "loss": 0.0, + "num_input_tokens_seen": 127299624, + "step": 188930 + }, + { + "epoch": 4.615713483008819, + "grad_norm": 0.0012440208811312914, + "learning_rate": 3.577715471277176e-08, + "loss": 0.0, + "num_input_tokens_seen": 127303272, + "step": 188935 + }, + { + "epoch": 4.615835633840667, + "grad_norm": 0.000826433242764324, + "learning_rate": 3.5754551723204404e-08, + "loss": 0.0001, + "num_input_tokens_seen": 127307048, + "step": 188940 + }, + { + "epoch": 4.6159577846725135, + "grad_norm": 3.18732563755475e-05, + "learning_rate": 3.573195574588917e-08, + "loss": 0.0, + "num_input_tokens_seen": 127310568, + "step": 188945 + }, + { + "epoch": 4.616079935504361, + "grad_norm": 3.4316594792471733e-06, + "learning_rate": 3.570936678099046e-08, + "loss": 0.0, + "num_input_tokens_seen": 127314280, + "step": 188950 + }, + { + "epoch": 4.616202086336208, + "grad_norm": 0.0002651728573255241, + "learning_rate": 3.568678482867271e-08, + "loss": 0.0, + "num_input_tokens_seen": 127318056, + "step": 188955 + }, + { + "epoch": 4.616324237168055, + "grad_norm": 0.00045246246736496687, + "learning_rate": 3.5664209889099904e-08, + "loss": 0.0383, + "num_input_tokens_seen": 127321768, + "step": 188960 + }, + { + "epoch": 4.616446387999902, + "grad_norm": 0.0002699349424801767, + "learning_rate": 3.564164196243658e-08, + "loss": 0.0, + "num_input_tokens_seen": 127325352, + "step": 188965 + }, + { + "epoch": 4.61656853883175, + "grad_norm": 0.003011636668816209, + "learning_rate": 3.5619081048846364e-08, + "loss": 0.0, + "num_input_tokens_seen": 127328616, + "step": 188970 + }, + { + "epoch": 4.616690689663597, + "grad_norm": 0.00096094113541767, + "learning_rate": 3.559652714849359e-08, + "loss": 0.0, + "num_input_tokens_seen": 127331816, + "step": 188975 + }, + { + "epoch": 4.616812840495443, + "grad_norm": 6.9423522290890105e-06, + "learning_rate": 3.5573980261542345e-08, + "loss": 0.0, + "num_input_tokens_seen": 127334952, + "step": 188980 + }, + { + "epoch": 4.616934991327291, + "grad_norm": 0.000394241651520133, + "learning_rate": 3.5551440388156494e-08, + "loss": 0.0, + "num_input_tokens_seen": 127338344, + "step": 188985 + }, + { + "epoch": 4.617057142159138, + "grad_norm": 0.0008912490447983146, + "learning_rate": 3.552890752850002e-08, + "loss": 0.0, + "num_input_tokens_seen": 127341288, + "step": 188990 + }, + { + "epoch": 4.617179292990985, + "grad_norm": 0.0007487831171602011, + "learning_rate": 3.550638168273667e-08, + "loss": 0.0, + "num_input_tokens_seen": 127344744, + "step": 188995 + }, + { + "epoch": 4.617301443822832, + "grad_norm": 3.8883234083186835e-05, + "learning_rate": 3.5483862851030444e-08, + "loss": 0.0, + "num_input_tokens_seen": 127347944, + "step": 189000 + }, + { + "epoch": 4.61742359465468, + "grad_norm": 0.000263658759649843, + "learning_rate": 3.546135103354486e-08, + "loss": 0.0, + "num_input_tokens_seen": 127351208, + "step": 189005 + }, + { + "epoch": 4.6175457454865265, + "grad_norm": 0.002318422310054302, + "learning_rate": 3.54388462304438e-08, + "loss": 0.0001, + "num_input_tokens_seen": 127354344, + "step": 189010 + }, + { + "epoch": 4.617667896318374, + "grad_norm": 5.154572136234492e-05, + "learning_rate": 3.54163484418909e-08, + "loss": 0.0, + "num_input_tokens_seen": 127357416, + "step": 189015 + }, + { + "epoch": 4.617790047150221, + "grad_norm": 0.00021221258793957531, + "learning_rate": 3.53938576680497e-08, + "loss": 0.0, + "num_input_tokens_seen": 127360488, + "step": 189020 + }, + { + "epoch": 4.6179121979820685, + "grad_norm": 0.00020391370344441384, + "learning_rate": 3.5371373909083956e-08, + "loss": 0.0, + "num_input_tokens_seen": 127363816, + "step": 189025 + }, + { + "epoch": 4.618034348813915, + "grad_norm": 2.5762474251678213e-05, + "learning_rate": 3.534889716515688e-08, + "loss": 0.0, + "num_input_tokens_seen": 127367144, + "step": 189030 + }, + { + "epoch": 4.618156499645763, + "grad_norm": 6.636412763327826e-06, + "learning_rate": 3.532642743643221e-08, + "loss": 0.0, + "num_input_tokens_seen": 127369960, + "step": 189035 + }, + { + "epoch": 4.61827865047761, + "grad_norm": 6.774356006644666e-05, + "learning_rate": 3.5303964723073174e-08, + "loss": 0.0, + "num_input_tokens_seen": 127374248, + "step": 189040 + }, + { + "epoch": 4.618400801309457, + "grad_norm": 0.00014490798639599234, + "learning_rate": 3.528150902524319e-08, + "loss": 0.0, + "num_input_tokens_seen": 127377256, + "step": 189045 + }, + { + "epoch": 4.618522952141304, + "grad_norm": 3.6679371987702325e-05, + "learning_rate": 3.525906034310555e-08, + "loss": 0.0, + "num_input_tokens_seen": 127380904, + "step": 189050 + }, + { + "epoch": 4.618645102973151, + "grad_norm": 9.35463685891591e-05, + "learning_rate": 3.523661867682348e-08, + "loss": 0.0, + "num_input_tokens_seen": 127384424, + "step": 189055 + }, + { + "epoch": 4.618767253804998, + "grad_norm": 0.002642608480527997, + "learning_rate": 3.521418402656029e-08, + "loss": 0.0, + "num_input_tokens_seen": 127387496, + "step": 189060 + }, + { + "epoch": 4.618889404636846, + "grad_norm": 1.5124430319701787e-05, + "learning_rate": 3.519175639247895e-08, + "loss": 0.0, + "num_input_tokens_seen": 127390632, + "step": 189065 + }, + { + "epoch": 4.619011555468693, + "grad_norm": 0.0014844568213447928, + "learning_rate": 3.516933577474257e-08, + "loss": 0.0, + "num_input_tokens_seen": 127393640, + "step": 189070 + }, + { + "epoch": 4.6191337063005395, + "grad_norm": 0.0023881462402641773, + "learning_rate": 3.514692217351456e-08, + "loss": 0.0008, + "num_input_tokens_seen": 127396840, + "step": 189075 + }, + { + "epoch": 4.619255857132387, + "grad_norm": 5.589219290413894e-05, + "learning_rate": 3.512451558895735e-08, + "loss": 0.0, + "num_input_tokens_seen": 127400616, + "step": 189080 + }, + { + "epoch": 4.619378007964234, + "grad_norm": 0.0317891500890255, + "learning_rate": 3.510211602123447e-08, + "loss": 0.0, + "num_input_tokens_seen": 127403880, + "step": 189085 + }, + { + "epoch": 4.619500158796082, + "grad_norm": 0.00031270348699763417, + "learning_rate": 3.507972347050825e-08, + "loss": 0.0, + "num_input_tokens_seen": 127407464, + "step": 189090 + }, + { + "epoch": 4.619622309627928, + "grad_norm": 0.12114350497722626, + "learning_rate": 3.50573379369421e-08, + "loss": 0.0, + "num_input_tokens_seen": 127410728, + "step": 189095 + }, + { + "epoch": 4.619744460459776, + "grad_norm": 0.0023343341890722513, + "learning_rate": 3.503495942069834e-08, + "loss": 0.0, + "num_input_tokens_seen": 127413992, + "step": 189100 + }, + { + "epoch": 4.619866611291623, + "grad_norm": 4.485997123993002e-05, + "learning_rate": 3.501258792193984e-08, + "loss": 0.0, + "num_input_tokens_seen": 127417064, + "step": 189105 + }, + { + "epoch": 4.61998876212347, + "grad_norm": 8.163870006683283e-06, + "learning_rate": 3.499022344082947e-08, + "loss": 0.0, + "num_input_tokens_seen": 127420392, + "step": 189110 + }, + { + "epoch": 4.620110912955317, + "grad_norm": 0.04615478590130806, + "learning_rate": 3.4967865977529655e-08, + "loss": 0.0, + "num_input_tokens_seen": 127423784, + "step": 189115 + }, + { + "epoch": 4.620233063787165, + "grad_norm": 0.0004512005252763629, + "learning_rate": 3.494551553220326e-08, + "loss": 0.0, + "num_input_tokens_seen": 127426600, + "step": 189120 + }, + { + "epoch": 4.620355214619011, + "grad_norm": 0.0012671244330704212, + "learning_rate": 3.49231721050125e-08, + "loss": 0.0, + "num_input_tokens_seen": 127430056, + "step": 189125 + }, + { + "epoch": 4.620477365450859, + "grad_norm": 8.45419563120231e-05, + "learning_rate": 3.490083569612001e-08, + "loss": 0.0, + "num_input_tokens_seen": 127433384, + "step": 189130 + }, + { + "epoch": 4.620599516282706, + "grad_norm": 2.610344745335169e-05, + "learning_rate": 3.487850630568834e-08, + "loss": 0.0, + "num_input_tokens_seen": 127436776, + "step": 189135 + }, + { + "epoch": 4.6207216671145535, + "grad_norm": 0.0002742982469499111, + "learning_rate": 3.4856183933879566e-08, + "loss": 0.0, + "num_input_tokens_seen": 127440488, + "step": 189140 + }, + { + "epoch": 4.6208438179464, + "grad_norm": 0.0003484401968307793, + "learning_rate": 3.483386858085646e-08, + "loss": 0.0, + "num_input_tokens_seen": 127443880, + "step": 189145 + }, + { + "epoch": 4.620965968778247, + "grad_norm": 0.0003409306809771806, + "learning_rate": 3.481156024678089e-08, + "loss": 0.0, + "num_input_tokens_seen": 127447016, + "step": 189150 + }, + { + "epoch": 4.621088119610095, + "grad_norm": 0.0019779300782829523, + "learning_rate": 3.478925893181528e-08, + "loss": 0.0, + "num_input_tokens_seen": 127450536, + "step": 189155 + }, + { + "epoch": 4.621210270441941, + "grad_norm": 0.002628277288749814, + "learning_rate": 3.4766964636121834e-08, + "loss": 0.0, + "num_input_tokens_seen": 127454824, + "step": 189160 + }, + { + "epoch": 4.621332421273789, + "grad_norm": 0.0024878974072635174, + "learning_rate": 3.474467735986264e-08, + "loss": 0.0, + "num_input_tokens_seen": 127458216, + "step": 189165 + }, + { + "epoch": 4.621454572105636, + "grad_norm": 0.0001902437797980383, + "learning_rate": 3.4722397103199797e-08, + "loss": 0.0, + "num_input_tokens_seen": 127461672, + "step": 189170 + }, + { + "epoch": 4.621576722937483, + "grad_norm": 0.001445115078240633, + "learning_rate": 3.4700123866295174e-08, + "loss": 0.0, + "num_input_tokens_seen": 127464872, + "step": 189175 + }, + { + "epoch": 4.62169887376933, + "grad_norm": 6.11757131991908e-05, + "learning_rate": 3.4677857649311084e-08, + "loss": 0.0, + "num_input_tokens_seen": 127469992, + "step": 189180 + }, + { + "epoch": 4.621821024601178, + "grad_norm": 0.0021974225528538227, + "learning_rate": 3.4655598452409066e-08, + "loss": 0.0, + "num_input_tokens_seen": 127473448, + "step": 189185 + }, + { + "epoch": 4.6219431754330245, + "grad_norm": 0.002855875762179494, + "learning_rate": 3.4633346275751206e-08, + "loss": 0.0, + "num_input_tokens_seen": 127476904, + "step": 189190 + }, + { + "epoch": 4.622065326264872, + "grad_norm": 9.0890156570822e-05, + "learning_rate": 3.461110111949939e-08, + "loss": 0.0, + "num_input_tokens_seen": 127480168, + "step": 189195 + }, + { + "epoch": 4.622187477096719, + "grad_norm": 0.000494190666358918, + "learning_rate": 3.458886298381525e-08, + "loss": 0.0, + "num_input_tokens_seen": 127483944, + "step": 189200 + }, + { + "epoch": 4.6223096279285665, + "grad_norm": 0.001285312115214765, + "learning_rate": 3.456663186886055e-08, + "loss": 0.0, + "num_input_tokens_seen": 127487080, + "step": 189205 + }, + { + "epoch": 4.622431778760413, + "grad_norm": 0.0009309606975875795, + "learning_rate": 3.454440777479695e-08, + "loss": 0.0, + "num_input_tokens_seen": 127489896, + "step": 189210 + }, + { + "epoch": 4.622553929592261, + "grad_norm": 0.02289321832358837, + "learning_rate": 3.45221907017863e-08, + "loss": 0.0, + "num_input_tokens_seen": 127492840, + "step": 189215 + }, + { + "epoch": 4.622676080424108, + "grad_norm": 5.375369073590264e-05, + "learning_rate": 3.4499980649989716e-08, + "loss": 0.0, + "num_input_tokens_seen": 127496168, + "step": 189220 + }, + { + "epoch": 4.622798231255955, + "grad_norm": 3.526311047608033e-06, + "learning_rate": 3.447777761956894e-08, + "loss": 0.0343, + "num_input_tokens_seen": 127499560, + "step": 189225 + }, + { + "epoch": 4.622920382087802, + "grad_norm": 7.114775507943705e-05, + "learning_rate": 3.445558161068574e-08, + "loss": 0.0, + "num_input_tokens_seen": 127502696, + "step": 189230 + }, + { + "epoch": 4.62304253291965, + "grad_norm": 0.0013356233248487115, + "learning_rate": 3.443339262350098e-08, + "loss": 0.0, + "num_input_tokens_seen": 127506216, + "step": 189235 + }, + { + "epoch": 4.623164683751496, + "grad_norm": 9.928596409736201e-05, + "learning_rate": 3.441121065817643e-08, + "loss": 0.0004, + "num_input_tokens_seen": 127510056, + "step": 189240 + }, + { + "epoch": 4.623286834583343, + "grad_norm": 0.004730502609163523, + "learning_rate": 3.438903571487317e-08, + "loss": 0.0, + "num_input_tokens_seen": 127514280, + "step": 189245 + }, + { + "epoch": 4.623408985415191, + "grad_norm": 0.039514582604169846, + "learning_rate": 3.4366867793752755e-08, + "loss": 0.0001, + "num_input_tokens_seen": 127517864, + "step": 189250 + }, + { + "epoch": 4.6235311362470375, + "grad_norm": 0.00017600138380657881, + "learning_rate": 3.434470689497615e-08, + "loss": 0.0, + "num_input_tokens_seen": 127521256, + "step": 189255 + }, + { + "epoch": 4.623653287078885, + "grad_norm": 6.132919224910438e-05, + "learning_rate": 3.432255301870435e-08, + "loss": 0.0, + "num_input_tokens_seen": 127524584, + "step": 189260 + }, + { + "epoch": 4.623775437910732, + "grad_norm": 0.00012273050379008055, + "learning_rate": 3.4300406165098884e-08, + "loss": 0.0, + "num_input_tokens_seen": 127528040, + "step": 189265 + }, + { + "epoch": 4.6238975887425795, + "grad_norm": 0.012543193064630032, + "learning_rate": 3.427826633432051e-08, + "loss": 0.0, + "num_input_tokens_seen": 127531112, + "step": 189270 + }, + { + "epoch": 4.624019739574426, + "grad_norm": 0.0007053225999698043, + "learning_rate": 3.425613352653045e-08, + "loss": 0.0, + "num_input_tokens_seen": 127534440, + "step": 189275 + }, + { + "epoch": 4.624141890406274, + "grad_norm": 0.00041072419844567776, + "learning_rate": 3.423400774188945e-08, + "loss": 0.0, + "num_input_tokens_seen": 127538024, + "step": 189280 + }, + { + "epoch": 4.624264041238121, + "grad_norm": 0.0010990831069648266, + "learning_rate": 3.421188898055838e-08, + "loss": 0.0001, + "num_input_tokens_seen": 127541224, + "step": 189285 + }, + { + "epoch": 4.624386192069968, + "grad_norm": 0.000811230216640979, + "learning_rate": 3.4189777242698447e-08, + "loss": 0.0001, + "num_input_tokens_seen": 127544808, + "step": 189290 + }, + { + "epoch": 4.624508342901815, + "grad_norm": 0.00042687845416367054, + "learning_rate": 3.416767252847008e-08, + "loss": 0.0, + "num_input_tokens_seen": 127548328, + "step": 189295 + }, + { + "epoch": 4.624630493733663, + "grad_norm": 0.0013424467761069536, + "learning_rate": 3.4145574838034264e-08, + "loss": 0.0, + "num_input_tokens_seen": 127551464, + "step": 189300 + }, + { + "epoch": 4.624752644565509, + "grad_norm": 0.00041963253170251846, + "learning_rate": 3.412348417155153e-08, + "loss": 0.0, + "num_input_tokens_seen": 127554792, + "step": 189305 + }, + { + "epoch": 4.624874795397357, + "grad_norm": 3.8160214899107814e-05, + "learning_rate": 3.410140052918275e-08, + "loss": 0.0, + "num_input_tokens_seen": 127558056, + "step": 189310 + }, + { + "epoch": 4.624996946229204, + "grad_norm": 3.0968370992923155e-05, + "learning_rate": 3.4079323911088256e-08, + "loss": 0.0, + "num_input_tokens_seen": 127561064, + "step": 189315 + }, + { + "epoch": 4.6251190970610505, + "grad_norm": 2.0047538782819174e-05, + "learning_rate": 3.4057254317428674e-08, + "loss": 0.0, + "num_input_tokens_seen": 127564200, + "step": 189320 + }, + { + "epoch": 4.625241247892898, + "grad_norm": 0.0002797935449052602, + "learning_rate": 3.4035191748364665e-08, + "loss": 0.0, + "num_input_tokens_seen": 127567400, + "step": 189325 + }, + { + "epoch": 4.625363398724746, + "grad_norm": 0.0034103060606867075, + "learning_rate": 3.4013136204056436e-08, + "loss": 0.0, + "num_input_tokens_seen": 127570600, + "step": 189330 + }, + { + "epoch": 4.6254855495565925, + "grad_norm": 6.119706085883081e-05, + "learning_rate": 3.3991087684664634e-08, + "loss": 0.0, + "num_input_tokens_seen": 127574312, + "step": 189335 + }, + { + "epoch": 4.625607700388439, + "grad_norm": 0.0007159013766795397, + "learning_rate": 3.3969046190349346e-08, + "loss": 0.0, + "num_input_tokens_seen": 127577704, + "step": 189340 + }, + { + "epoch": 4.625729851220287, + "grad_norm": 0.00033408793387934566, + "learning_rate": 3.3947011721271126e-08, + "loss": 0.0, + "num_input_tokens_seen": 127581352, + "step": 189345 + }, + { + "epoch": 4.625852002052134, + "grad_norm": 2.0137898900429718e-05, + "learning_rate": 3.392498427758994e-08, + "loss": 0.0, + "num_input_tokens_seen": 127585000, + "step": 189350 + }, + { + "epoch": 4.625974152883981, + "grad_norm": 4.836769949179143e-05, + "learning_rate": 3.390296385946623e-08, + "loss": 0.0, + "num_input_tokens_seen": 127588136, + "step": 189355 + }, + { + "epoch": 4.626096303715828, + "grad_norm": 3.60465855919756e-05, + "learning_rate": 3.3880950467059964e-08, + "loss": 0.0, + "num_input_tokens_seen": 127591336, + "step": 189360 + }, + { + "epoch": 4.626218454547676, + "grad_norm": 0.00076403992716223, + "learning_rate": 3.385894410053125e-08, + "loss": 0.0, + "num_input_tokens_seen": 127594856, + "step": 189365 + }, + { + "epoch": 4.626340605379522, + "grad_norm": 0.00041453863377682865, + "learning_rate": 3.383694476004018e-08, + "loss": 0.0, + "num_input_tokens_seen": 127597992, + "step": 189370 + }, + { + "epoch": 4.62646275621137, + "grad_norm": 0.00014004397962708026, + "learning_rate": 3.381495244574673e-08, + "loss": 0.0, + "num_input_tokens_seen": 127601000, + "step": 189375 + }, + { + "epoch": 4.626584907043217, + "grad_norm": 0.0023981884587556124, + "learning_rate": 3.379296715781066e-08, + "loss": 0.0, + "num_input_tokens_seen": 127604328, + "step": 189380 + }, + { + "epoch": 4.626707057875064, + "grad_norm": 0.0008847627905197442, + "learning_rate": 3.377098889639229e-08, + "loss": 0.0, + "num_input_tokens_seen": 127607336, + "step": 189385 + }, + { + "epoch": 4.626829208706911, + "grad_norm": 0.00041764331399463117, + "learning_rate": 3.374901766165095e-08, + "loss": 0.0, + "num_input_tokens_seen": 127610728, + "step": 189390 + }, + { + "epoch": 4.626951359538759, + "grad_norm": 4.636571611627005e-05, + "learning_rate": 3.372705345374671e-08, + "loss": 0.0001, + "num_input_tokens_seen": 127613928, + "step": 189395 + }, + { + "epoch": 4.6270735103706055, + "grad_norm": 0.00011620017903624102, + "learning_rate": 3.370509627283924e-08, + "loss": 0.0, + "num_input_tokens_seen": 127616936, + "step": 189400 + }, + { + "epoch": 4.627195661202453, + "grad_norm": 0.00018765301501844078, + "learning_rate": 3.368314611908829e-08, + "loss": 0.0, + "num_input_tokens_seen": 127620200, + "step": 189405 + }, + { + "epoch": 4.6273178120343, + "grad_norm": 0.0007750758086331189, + "learning_rate": 3.3661202992653294e-08, + "loss": 0.0, + "num_input_tokens_seen": 127623400, + "step": 189410 + }, + { + "epoch": 4.627439962866147, + "grad_norm": 3.3744108804967254e-05, + "learning_rate": 3.3639266893693894e-08, + "loss": 0.0, + "num_input_tokens_seen": 127626664, + "step": 189415 + }, + { + "epoch": 4.627562113697994, + "grad_norm": 0.0006838897825218737, + "learning_rate": 3.361733782236986e-08, + "loss": 0.0, + "num_input_tokens_seen": 127629864, + "step": 189420 + }, + { + "epoch": 4.627684264529842, + "grad_norm": 0.0018917579436674714, + "learning_rate": 3.3595415778840284e-08, + "loss": 0.0, + "num_input_tokens_seen": 127633512, + "step": 189425 + }, + { + "epoch": 4.627806415361689, + "grad_norm": 2.9165235901018605e-05, + "learning_rate": 3.357350076326493e-08, + "loss": 0.0, + "num_input_tokens_seen": 127636840, + "step": 189430 + }, + { + "epoch": 4.627928566193535, + "grad_norm": 0.0006560211186297238, + "learning_rate": 3.355159277580289e-08, + "loss": 0.0, + "num_input_tokens_seen": 127640680, + "step": 189435 + }, + { + "epoch": 4.628050717025383, + "grad_norm": 0.00021043684682808816, + "learning_rate": 3.352969181661358e-08, + "loss": 0.0, + "num_input_tokens_seen": 127644072, + "step": 189440 + }, + { + "epoch": 4.62817286785723, + "grad_norm": 0.00036051214556209743, + "learning_rate": 3.350779788585645e-08, + "loss": 0.0, + "num_input_tokens_seen": 127647336, + "step": 189445 + }, + { + "epoch": 4.6282950186890774, + "grad_norm": 0.0009198451298289001, + "learning_rate": 3.348591098369047e-08, + "loss": 0.0, + "num_input_tokens_seen": 127651048, + "step": 189450 + }, + { + "epoch": 4.628417169520924, + "grad_norm": 1.5009105482022278e-05, + "learning_rate": 3.346403111027507e-08, + "loss": 0.0, + "num_input_tokens_seen": 127654312, + "step": 189455 + }, + { + "epoch": 4.628539320352772, + "grad_norm": 0.00016384219634346664, + "learning_rate": 3.344215826576913e-08, + "loss": 0.0, + "num_input_tokens_seen": 127657320, + "step": 189460 + }, + { + "epoch": 4.628661471184619, + "grad_norm": 0.00019981464720331132, + "learning_rate": 3.342029245033162e-08, + "loss": 0.0, + "num_input_tokens_seen": 127660712, + "step": 189465 + }, + { + "epoch": 4.628783622016466, + "grad_norm": 2.0608813429134898e-05, + "learning_rate": 3.339843366412198e-08, + "loss": 0.0, + "num_input_tokens_seen": 127664296, + "step": 189470 + }, + { + "epoch": 4.628905772848313, + "grad_norm": 15.964238166809082, + "learning_rate": 3.337658190729864e-08, + "loss": 0.0313, + "num_input_tokens_seen": 127667432, + "step": 189475 + }, + { + "epoch": 4.629027923680161, + "grad_norm": 0.0016988354036584496, + "learning_rate": 3.335473718002102e-08, + "loss": 0.0, + "num_input_tokens_seen": 127670568, + "step": 189480 + }, + { + "epoch": 4.629150074512007, + "grad_norm": 2.7631498596747406e-05, + "learning_rate": 3.333289948244755e-08, + "loss": 0.0, + "num_input_tokens_seen": 127674472, + "step": 189485 + }, + { + "epoch": 4.629272225343855, + "grad_norm": 8.974929369287565e-05, + "learning_rate": 3.331106881473744e-08, + "loss": 0.0, + "num_input_tokens_seen": 127677736, + "step": 189490 + }, + { + "epoch": 4.629394376175702, + "grad_norm": 3.777238089242019e-05, + "learning_rate": 3.328924517704912e-08, + "loss": 0.0631, + "num_input_tokens_seen": 127681128, + "step": 189495 + }, + { + "epoch": 4.629516527007549, + "grad_norm": 0.0008741427445784211, + "learning_rate": 3.326742856954135e-08, + "loss": 0.0, + "num_input_tokens_seen": 127684072, + "step": 189500 + }, + { + "epoch": 4.629638677839396, + "grad_norm": 0.0001225506712216884, + "learning_rate": 3.324561899237299e-08, + "loss": 0.0, + "num_input_tokens_seen": 127687208, + "step": 189505 + }, + { + "epoch": 4.629760828671243, + "grad_norm": 0.0024218896869570017, + "learning_rate": 3.322381644570238e-08, + "loss": 0.0, + "num_input_tokens_seen": 127690600, + "step": 189510 + }, + { + "epoch": 4.6298829795030905, + "grad_norm": 0.0008322626817971468, + "learning_rate": 3.3202020929688376e-08, + "loss": 0.0, + "num_input_tokens_seen": 127694120, + "step": 189515 + }, + { + "epoch": 4.630005130334937, + "grad_norm": 6.55809126328677e-05, + "learning_rate": 3.31802324444892e-08, + "loss": 0.0, + "num_input_tokens_seen": 127697128, + "step": 189520 + }, + { + "epoch": 4.630127281166785, + "grad_norm": 0.0007595704519189894, + "learning_rate": 3.31584509902636e-08, + "loss": 0.0, + "num_input_tokens_seen": 127700456, + "step": 189525 + }, + { + "epoch": 4.630249431998632, + "grad_norm": 0.0009916751878336072, + "learning_rate": 3.3136676567169566e-08, + "loss": 0.0, + "num_input_tokens_seen": 127703848, + "step": 189530 + }, + { + "epoch": 4.630371582830479, + "grad_norm": 0.000650413625407964, + "learning_rate": 3.3114909175365635e-08, + "loss": 0.0, + "num_input_tokens_seen": 127707240, + "step": 189535 + }, + { + "epoch": 4.630493733662326, + "grad_norm": 0.00022649105812888592, + "learning_rate": 3.3093148815010355e-08, + "loss": 0.0, + "num_input_tokens_seen": 127711208, + "step": 189540 + }, + { + "epoch": 4.630615884494174, + "grad_norm": 0.004471167456358671, + "learning_rate": 3.30713954862617e-08, + "loss": 0.0, + "num_input_tokens_seen": 127714664, + "step": 189545 + }, + { + "epoch": 4.63073803532602, + "grad_norm": 31.14780616760254, + "learning_rate": 3.304964918927788e-08, + "loss": 0.0619, + "num_input_tokens_seen": 127718120, + "step": 189550 + }, + { + "epoch": 4.630860186157868, + "grad_norm": 0.03578144684433937, + "learning_rate": 3.3027909924217114e-08, + "loss": 0.0, + "num_input_tokens_seen": 127722088, + "step": 189555 + }, + { + "epoch": 4.630982336989715, + "grad_norm": 0.0008296905434690416, + "learning_rate": 3.300617769123748e-08, + "loss": 0.0, + "num_input_tokens_seen": 127725416, + "step": 189560 + }, + { + "epoch": 4.631104487821562, + "grad_norm": 0.00012225030513945967, + "learning_rate": 3.2984452490497084e-08, + "loss": 0.0, + "num_input_tokens_seen": 127728232, + "step": 189565 + }, + { + "epoch": 4.631226638653409, + "grad_norm": 0.0011644281912595034, + "learning_rate": 3.29627343221538e-08, + "loss": 0.0, + "num_input_tokens_seen": 127731496, + "step": 189570 + }, + { + "epoch": 4.631348789485257, + "grad_norm": 0.00025070918491110206, + "learning_rate": 3.29410231863656e-08, + "loss": 0.0, + "num_input_tokens_seen": 127734440, + "step": 189575 + }, + { + "epoch": 4.6314709403171035, + "grad_norm": 0.0001868014078354463, + "learning_rate": 3.291931908329026e-08, + "loss": 0.0, + "num_input_tokens_seen": 127737832, + "step": 189580 + }, + { + "epoch": 4.63159309114895, + "grad_norm": 3.0223734938772395e-05, + "learning_rate": 3.289762201308599e-08, + "loss": 0.0, + "num_input_tokens_seen": 127741480, + "step": 189585 + }, + { + "epoch": 4.631715241980798, + "grad_norm": 7.385831122519448e-05, + "learning_rate": 3.28759319759101e-08, + "loss": 0.0, + "num_input_tokens_seen": 127744424, + "step": 189590 + }, + { + "epoch": 4.6318373928126455, + "grad_norm": 25.03947639465332, + "learning_rate": 3.2854248971920574e-08, + "loss": 0.0584, + "num_input_tokens_seen": 127747304, + "step": 189595 + }, + { + "epoch": 4.631959543644492, + "grad_norm": 5.329222767613828e-05, + "learning_rate": 3.283257300127529e-08, + "loss": 0.0, + "num_input_tokens_seen": 127750376, + "step": 189600 + }, + { + "epoch": 4.632081694476339, + "grad_norm": 0.0017787275137379766, + "learning_rate": 3.281090406413145e-08, + "loss": 0.0, + "num_input_tokens_seen": 127754152, + "step": 189605 + }, + { + "epoch": 4.632203845308187, + "grad_norm": 0.00028210910386405885, + "learning_rate": 3.2789242160647046e-08, + "loss": 0.0, + "num_input_tokens_seen": 127757480, + "step": 189610 + }, + { + "epoch": 4.632325996140033, + "grad_norm": 2.0327272068243474e-05, + "learning_rate": 3.276758729097928e-08, + "loss": 0.0, + "num_input_tokens_seen": 127760680, + "step": 189615 + }, + { + "epoch": 4.632448146971881, + "grad_norm": 0.0010251597268506885, + "learning_rate": 3.274593945528581e-08, + "loss": 0.0, + "num_input_tokens_seen": 127763880, + "step": 189620 + }, + { + "epoch": 4.632570297803728, + "grad_norm": 3.72401082131546e-05, + "learning_rate": 3.272429865372406e-08, + "loss": 0.0, + "num_input_tokens_seen": 127767592, + "step": 189625 + }, + { + "epoch": 4.632692448635575, + "grad_norm": 4.352164251031354e-05, + "learning_rate": 3.270266488645124e-08, + "loss": 0.0, + "num_input_tokens_seen": 127770664, + "step": 189630 + }, + { + "epoch": 4.632814599467422, + "grad_norm": 0.002941213781014085, + "learning_rate": 3.2681038153624995e-08, + "loss": 0.0, + "num_input_tokens_seen": 127773736, + "step": 189635 + }, + { + "epoch": 4.63293675029927, + "grad_norm": 0.0002367892739130184, + "learning_rate": 3.265941845540232e-08, + "loss": 0.0, + "num_input_tokens_seen": 127777128, + "step": 189640 + }, + { + "epoch": 4.6330589011311165, + "grad_norm": 0.00020610762294381857, + "learning_rate": 3.2637805791940645e-08, + "loss": 0.0, + "num_input_tokens_seen": 127780648, + "step": 189645 + }, + { + "epoch": 4.633181051962964, + "grad_norm": 7.007142994552851e-05, + "learning_rate": 3.2616200163396834e-08, + "loss": 0.0, + "num_input_tokens_seen": 127784360, + "step": 189650 + }, + { + "epoch": 4.633303202794811, + "grad_norm": 0.025431230664253235, + "learning_rate": 3.259460156992844e-08, + "loss": 0.0001, + "num_input_tokens_seen": 127787496, + "step": 189655 + }, + { + "epoch": 4.6334253536266585, + "grad_norm": 4.163160701864399e-05, + "learning_rate": 3.2573010011692105e-08, + "loss": 0.0003, + "num_input_tokens_seen": 127790504, + "step": 189660 + }, + { + "epoch": 4.633547504458505, + "grad_norm": 2.1552707039518282e-05, + "learning_rate": 3.2551425488845264e-08, + "loss": 0.0, + "num_input_tokens_seen": 127793960, + "step": 189665 + }, + { + "epoch": 4.633669655290353, + "grad_norm": 0.0002611360978335142, + "learning_rate": 3.252984800154446e-08, + "loss": 0.0, + "num_input_tokens_seen": 127797736, + "step": 189670 + }, + { + "epoch": 4.6337918061222, + "grad_norm": 1.6607929865131155e-05, + "learning_rate": 3.250827754994701e-08, + "loss": 0.0, + "num_input_tokens_seen": 127801320, + "step": 189675 + }, + { + "epoch": 4.633913956954046, + "grad_norm": 0.0010899596381932497, + "learning_rate": 3.248671413420956e-08, + "loss": 0.0, + "num_input_tokens_seen": 127804584, + "step": 189680 + }, + { + "epoch": 4.634036107785894, + "grad_norm": 3.3614189305808395e-05, + "learning_rate": 3.246515775448877e-08, + "loss": 0.0, + "num_input_tokens_seen": 127807784, + "step": 189685 + }, + { + "epoch": 4.634158258617742, + "grad_norm": 0.00045298977056518197, + "learning_rate": 3.2443608410941624e-08, + "loss": 0.0, + "num_input_tokens_seen": 127810920, + "step": 189690 + }, + { + "epoch": 4.634280409449588, + "grad_norm": 0.00048405147390440106, + "learning_rate": 3.2422066103725e-08, + "loss": 0.0, + "num_input_tokens_seen": 127814440, + "step": 189695 + }, + { + "epoch": 4.634402560281435, + "grad_norm": 0.0020768088288605213, + "learning_rate": 3.24005308329951e-08, + "loss": 0.0, + "num_input_tokens_seen": 127817832, + "step": 189700 + }, + { + "epoch": 4.634524711113283, + "grad_norm": 0.0004733783716801554, + "learning_rate": 3.237900259890902e-08, + "loss": 0.0183, + "num_input_tokens_seen": 127820968, + "step": 189705 + }, + { + "epoch": 4.6346468619451295, + "grad_norm": 0.03913038596510887, + "learning_rate": 3.2357481401622976e-08, + "loss": 0.0, + "num_input_tokens_seen": 127824232, + "step": 189710 + }, + { + "epoch": 4.634769012776977, + "grad_norm": 0.00033711790456436574, + "learning_rate": 3.233596724129361e-08, + "loss": 0.0, + "num_input_tokens_seen": 127827368, + "step": 189715 + }, + { + "epoch": 4.634891163608824, + "grad_norm": 0.0015374831855297089, + "learning_rate": 3.231446011807737e-08, + "loss": 0.0, + "num_input_tokens_seen": 127830760, + "step": 189720 + }, + { + "epoch": 4.6350133144406716, + "grad_norm": 0.00012308296572882682, + "learning_rate": 3.229296003213056e-08, + "loss": 0.0, + "num_input_tokens_seen": 127834024, + "step": 189725 + }, + { + "epoch": 4.635135465272518, + "grad_norm": 5.056528971181251e-05, + "learning_rate": 3.227146698360983e-08, + "loss": 0.0, + "num_input_tokens_seen": 127837352, + "step": 189730 + }, + { + "epoch": 4.635257616104366, + "grad_norm": 9.410569327883422e-05, + "learning_rate": 3.224998097267106e-08, + "loss": 0.0, + "num_input_tokens_seen": 127841064, + "step": 189735 + }, + { + "epoch": 4.635379766936213, + "grad_norm": 0.0031454835552722216, + "learning_rate": 3.222850199947091e-08, + "loss": 0.0, + "num_input_tokens_seen": 127844264, + "step": 189740 + }, + { + "epoch": 4.63550191776806, + "grad_norm": 0.018755445256829262, + "learning_rate": 3.2207030064165255e-08, + "loss": 0.0, + "num_input_tokens_seen": 127847656, + "step": 189745 + }, + { + "epoch": 4.635624068599907, + "grad_norm": 0.000981925637461245, + "learning_rate": 3.21855651669104e-08, + "loss": 0.0, + "num_input_tokens_seen": 127850920, + "step": 189750 + }, + { + "epoch": 4.635746219431755, + "grad_norm": 0.0001301287265960127, + "learning_rate": 3.2164107307862456e-08, + "loss": 0.0, + "num_input_tokens_seen": 127854632, + "step": 189755 + }, + { + "epoch": 4.635868370263601, + "grad_norm": 1.917783447424881e-05, + "learning_rate": 3.21426564871774e-08, + "loss": 0.0, + "num_input_tokens_seen": 127857896, + "step": 189760 + }, + { + "epoch": 4.635990521095449, + "grad_norm": 2.9853536034352146e-05, + "learning_rate": 3.212121270501134e-08, + "loss": 0.0, + "num_input_tokens_seen": 127861416, + "step": 189765 + }, + { + "epoch": 4.636112671927296, + "grad_norm": 0.0005437976797111332, + "learning_rate": 3.209977596152025e-08, + "loss": 0.0002, + "num_input_tokens_seen": 127864616, + "step": 189770 + }, + { + "epoch": 4.636234822759143, + "grad_norm": 3.973076672991738e-05, + "learning_rate": 3.207834625685968e-08, + "loss": 0.0, + "num_input_tokens_seen": 127867816, + "step": 189775 + }, + { + "epoch": 4.63635697359099, + "grad_norm": 0.00019530528516042978, + "learning_rate": 3.205692359118595e-08, + "loss": 0.0, + "num_input_tokens_seen": 127871464, + "step": 189780 + }, + { + "epoch": 4.636479124422837, + "grad_norm": 0.00014822077355347574, + "learning_rate": 3.203550796465437e-08, + "loss": 0.0, + "num_input_tokens_seen": 127874664, + "step": 189785 + }, + { + "epoch": 4.636601275254685, + "grad_norm": 9.349855099571869e-05, + "learning_rate": 3.2014099377421165e-08, + "loss": 0.0, + "num_input_tokens_seen": 127877864, + "step": 189790 + }, + { + "epoch": 4.636723426086531, + "grad_norm": 1.5588731912430376e-05, + "learning_rate": 3.199269782964165e-08, + "loss": 0.0, + "num_input_tokens_seen": 127880872, + "step": 189795 + }, + { + "epoch": 4.636845576918379, + "grad_norm": 0.0001132964389398694, + "learning_rate": 3.197130332147169e-08, + "loss": 0.0, + "num_input_tokens_seen": 127884328, + "step": 189800 + }, + { + "epoch": 4.636967727750226, + "grad_norm": 4.8849913582671434e-05, + "learning_rate": 3.194991585306661e-08, + "loss": 0.0, + "num_input_tokens_seen": 127887528, + "step": 189805 + }, + { + "epoch": 4.637089878582073, + "grad_norm": 5.665285061695613e-05, + "learning_rate": 3.1928535424582294e-08, + "loss": 0.0, + "num_input_tokens_seen": 127890408, + "step": 189810 + }, + { + "epoch": 4.63721202941392, + "grad_norm": 0.0005679251626133919, + "learning_rate": 3.190716203617394e-08, + "loss": 0.0, + "num_input_tokens_seen": 127893992, + "step": 189815 + }, + { + "epoch": 4.637334180245768, + "grad_norm": 0.00044258078560233116, + "learning_rate": 3.1885795687997094e-08, + "loss": 0.0675, + "num_input_tokens_seen": 127897192, + "step": 189820 + }, + { + "epoch": 4.6374563310776145, + "grad_norm": 0.00010819250019267201, + "learning_rate": 3.1864436380207196e-08, + "loss": 0.0, + "num_input_tokens_seen": 127900520, + "step": 189825 + }, + { + "epoch": 4.637578481909462, + "grad_norm": 0.0031544826924800873, + "learning_rate": 3.1843084112959437e-08, + "loss": 0.0, + "num_input_tokens_seen": 127904168, + "step": 189830 + }, + { + "epoch": 4.637700632741309, + "grad_norm": 0.00038662514998577535, + "learning_rate": 3.182173888640927e-08, + "loss": 0.0, + "num_input_tokens_seen": 127907560, + "step": 189835 + }, + { + "epoch": 4.6378227835731565, + "grad_norm": 0.0006948575028218329, + "learning_rate": 3.180040070071166e-08, + "loss": 0.0, + "num_input_tokens_seen": 127910824, + "step": 189840 + }, + { + "epoch": 4.637944934405003, + "grad_norm": 0.00039235371514223516, + "learning_rate": 3.1779069556022055e-08, + "loss": 0.0, + "num_input_tokens_seen": 127914152, + "step": 189845 + }, + { + "epoch": 4.638067085236851, + "grad_norm": 0.00019603042164817452, + "learning_rate": 3.175774545249554e-08, + "loss": 0.0, + "num_input_tokens_seen": 127917416, + "step": 189850 + }, + { + "epoch": 4.638189236068698, + "grad_norm": 7.47626181691885e-05, + "learning_rate": 3.1736428390287005e-08, + "loss": 0.0, + "num_input_tokens_seen": 127921256, + "step": 189855 + }, + { + "epoch": 4.638311386900545, + "grad_norm": 0.00021746759011875838, + "learning_rate": 3.171511836955176e-08, + "loss": 0.0, + "num_input_tokens_seen": 127924456, + "step": 189860 + }, + { + "epoch": 4.638433537732392, + "grad_norm": 0.0006375667289830744, + "learning_rate": 3.1693815390444466e-08, + "loss": 0.0, + "num_input_tokens_seen": 127927848, + "step": 189865 + }, + { + "epoch": 4.638555688564239, + "grad_norm": 0.001184274209663272, + "learning_rate": 3.1672519453120325e-08, + "loss": 0.0, + "num_input_tokens_seen": 127931368, + "step": 189870 + }, + { + "epoch": 4.638677839396086, + "grad_norm": 5.584588507190347e-05, + "learning_rate": 3.1651230557733996e-08, + "loss": 0.0, + "num_input_tokens_seen": 127934568, + "step": 189875 + }, + { + "epoch": 4.638799990227933, + "grad_norm": 0.001166929374448955, + "learning_rate": 3.162994870444036e-08, + "loss": 0.0, + "num_input_tokens_seen": 127937384, + "step": 189880 + }, + { + "epoch": 4.638922141059781, + "grad_norm": 0.00011990226630587131, + "learning_rate": 3.1608673893394164e-08, + "loss": 0.0, + "num_input_tokens_seen": 127941352, + "step": 189885 + }, + { + "epoch": 4.6390442918916275, + "grad_norm": 0.0003758897364605218, + "learning_rate": 3.158740612475019e-08, + "loss": 0.0, + "num_input_tokens_seen": 127944424, + "step": 189890 + }, + { + "epoch": 4.639166442723475, + "grad_norm": 0.0006660557119175792, + "learning_rate": 3.1566145398663094e-08, + "loss": 0.0, + "num_input_tokens_seen": 127948136, + "step": 189895 + }, + { + "epoch": 4.639288593555322, + "grad_norm": 1.3435801267623901, + "learning_rate": 3.1544891715287405e-08, + "loss": 0.0007, + "num_input_tokens_seen": 127951592, + "step": 189900 + }, + { + "epoch": 4.6394107443871695, + "grad_norm": 0.000565065536648035, + "learning_rate": 3.1523645074777783e-08, + "loss": 0.0001, + "num_input_tokens_seen": 127954728, + "step": 189905 + }, + { + "epoch": 4.639532895219016, + "grad_norm": 2.8673955966951326e-05, + "learning_rate": 3.150240547728877e-08, + "loss": 0.0, + "num_input_tokens_seen": 127958696, + "step": 189910 + }, + { + "epoch": 4.639655046050864, + "grad_norm": 9.768320160219446e-05, + "learning_rate": 3.1481172922974584e-08, + "loss": 0.0, + "num_input_tokens_seen": 127962152, + "step": 189915 + }, + { + "epoch": 4.639777196882711, + "grad_norm": 0.00023989586043171585, + "learning_rate": 3.145994741198998e-08, + "loss": 0.0, + "num_input_tokens_seen": 127965864, + "step": 189920 + }, + { + "epoch": 4.639899347714558, + "grad_norm": 1.8430131603963673e-05, + "learning_rate": 3.143872894448907e-08, + "loss": 0.0, + "num_input_tokens_seen": 127969192, + "step": 189925 + }, + { + "epoch": 4.640021498546405, + "grad_norm": 0.001935247448273003, + "learning_rate": 3.141751752062627e-08, + "loss": 0.0537, + "num_input_tokens_seen": 127972328, + "step": 189930 + }, + { + "epoch": 4.640143649378253, + "grad_norm": 0.0016746302135288715, + "learning_rate": 3.1396313140555684e-08, + "loss": 0.0, + "num_input_tokens_seen": 127975592, + "step": 189935 + }, + { + "epoch": 4.640265800210099, + "grad_norm": 0.00011264430213486776, + "learning_rate": 3.137511580443175e-08, + "loss": 0.0164, + "num_input_tokens_seen": 127978984, + "step": 189940 + }, + { + "epoch": 4.640387951041946, + "grad_norm": 0.0004881844506599009, + "learning_rate": 3.135392551240856e-08, + "loss": 0.0, + "num_input_tokens_seen": 127982312, + "step": 189945 + }, + { + "epoch": 4.640510101873794, + "grad_norm": 1.6157357094925828e-05, + "learning_rate": 3.1332742264639996e-08, + "loss": 0.0, + "num_input_tokens_seen": 127985704, + "step": 189950 + }, + { + "epoch": 4.640632252705641, + "grad_norm": 0.0005269379471428692, + "learning_rate": 3.131156606128038e-08, + "loss": 0.0, + "num_input_tokens_seen": 127988712, + "step": 189955 + }, + { + "epoch": 4.640754403537488, + "grad_norm": 0.0007660355186089873, + "learning_rate": 3.129039690248359e-08, + "loss": 0.0, + "num_input_tokens_seen": 127992296, + "step": 189960 + }, + { + "epoch": 4.640876554369335, + "grad_norm": 0.00013214911450631917, + "learning_rate": 3.12692347884036e-08, + "loss": 0.0, + "num_input_tokens_seen": 127995496, + "step": 189965 + }, + { + "epoch": 4.6409987052011825, + "grad_norm": 0.03304639831185341, + "learning_rate": 3.124807971919419e-08, + "loss": 0.0001, + "num_input_tokens_seen": 127998824, + "step": 189970 + }, + { + "epoch": 4.641120856033029, + "grad_norm": 0.00015475453983526677, + "learning_rate": 3.122693169500945e-08, + "loss": 0.0, + "num_input_tokens_seen": 128002088, + "step": 189975 + }, + { + "epoch": 4.641243006864877, + "grad_norm": 0.0007058978080749512, + "learning_rate": 3.120579071600282e-08, + "loss": 0.0, + "num_input_tokens_seen": 128005032, + "step": 189980 + }, + { + "epoch": 4.641365157696724, + "grad_norm": 0.0006571342819370329, + "learning_rate": 3.118465678232851e-08, + "loss": 0.0, + "num_input_tokens_seen": 128008872, + "step": 189985 + }, + { + "epoch": 4.641487308528571, + "grad_norm": 7.548756457254058e-06, + "learning_rate": 3.1163529894139837e-08, + "loss": 0.0, + "num_input_tokens_seen": 128012200, + "step": 189990 + }, + { + "epoch": 4.641609459360418, + "grad_norm": 0.0006949577946215868, + "learning_rate": 3.1142410051590573e-08, + "loss": 0.0, + "num_input_tokens_seen": 128015400, + "step": 189995 + }, + { + "epoch": 4.641731610192266, + "grad_norm": 0.00013040592602919787, + "learning_rate": 3.112129725483425e-08, + "loss": 0.0, + "num_input_tokens_seen": 128018664, + "step": 190000 + }, + { + "epoch": 4.641853761024112, + "grad_norm": 0.00020232108363416046, + "learning_rate": 3.1100191504024545e-08, + "loss": 0.0, + "num_input_tokens_seen": 128021864, + "step": 190005 + }, + { + "epoch": 4.64197591185596, + "grad_norm": 2.4844073777785525e-05, + "learning_rate": 3.1079092799314757e-08, + "loss": 0.0, + "num_input_tokens_seen": 128025320, + "step": 190010 + }, + { + "epoch": 4.642098062687807, + "grad_norm": 0.00045661506010219455, + "learning_rate": 3.105800114085844e-08, + "loss": 0.0, + "num_input_tokens_seen": 128028840, + "step": 190015 + }, + { + "epoch": 4.642220213519654, + "grad_norm": 0.00022646358411293477, + "learning_rate": 3.103691652880891e-08, + "loss": 0.0, + "num_input_tokens_seen": 128032360, + "step": 190020 + }, + { + "epoch": 4.642342364351501, + "grad_norm": 0.00022188508592080325, + "learning_rate": 3.1015838963319605e-08, + "loss": 0.0, + "num_input_tokens_seen": 128035560, + "step": 190025 + }, + { + "epoch": 4.642464515183349, + "grad_norm": 0.0008343648514710367, + "learning_rate": 3.099476844454374e-08, + "loss": 0.0, + "num_input_tokens_seen": 128039208, + "step": 190030 + }, + { + "epoch": 4.6425866660151955, + "grad_norm": 1.0348808245908003e-05, + "learning_rate": 3.0973704972634515e-08, + "loss": 0.0, + "num_input_tokens_seen": 128042536, + "step": 190035 + }, + { + "epoch": 4.642708816847042, + "grad_norm": 0.0011595974210649729, + "learning_rate": 3.095264854774515e-08, + "loss": 0.0, + "num_input_tokens_seen": 128045672, + "step": 190040 + }, + { + "epoch": 4.64283096767889, + "grad_norm": 0.00014588657359126955, + "learning_rate": 3.0931599170028745e-08, + "loss": 0.0, + "num_input_tokens_seen": 128048872, + "step": 190045 + }, + { + "epoch": 4.6429531185107376, + "grad_norm": 0.0019392999820411205, + "learning_rate": 3.0910556839638504e-08, + "loss": 0.0, + "num_input_tokens_seen": 128052392, + "step": 190050 + }, + { + "epoch": 4.643075269342584, + "grad_norm": 0.0005591996596194804, + "learning_rate": 3.0889521556727304e-08, + "loss": 0.0, + "num_input_tokens_seen": 128055784, + "step": 190055 + }, + { + "epoch": 4.643197420174431, + "grad_norm": 0.0012844577431678772, + "learning_rate": 3.086849332144803e-08, + "loss": 0.0, + "num_input_tokens_seen": 128059432, + "step": 190060 + }, + { + "epoch": 4.643319571006279, + "grad_norm": 6.231262523215264e-05, + "learning_rate": 3.084747213395389e-08, + "loss": 0.0, + "num_input_tokens_seen": 128062312, + "step": 190065 + }, + { + "epoch": 4.643441721838125, + "grad_norm": 8.152231021085754e-05, + "learning_rate": 3.0826457994397533e-08, + "loss": 0.0, + "num_input_tokens_seen": 128065384, + "step": 190070 + }, + { + "epoch": 4.643563872669973, + "grad_norm": 0.00023000915825832635, + "learning_rate": 3.080545090293196e-08, + "loss": 0.0, + "num_input_tokens_seen": 128069096, + "step": 190075 + }, + { + "epoch": 4.64368602350182, + "grad_norm": 0.0004738394927699119, + "learning_rate": 3.078445085970982e-08, + "loss": 0.0, + "num_input_tokens_seen": 128072744, + "step": 190080 + }, + { + "epoch": 4.643808174333667, + "grad_norm": 0.00019146144040860236, + "learning_rate": 3.076345786488377e-08, + "loss": 0.0, + "num_input_tokens_seen": 128076200, + "step": 190085 + }, + { + "epoch": 4.643930325165514, + "grad_norm": 0.00027293289895169437, + "learning_rate": 3.074247191860657e-08, + "loss": 0.0, + "num_input_tokens_seen": 128079464, + "step": 190090 + }, + { + "epoch": 4.644052475997362, + "grad_norm": 1.609481478226371e-05, + "learning_rate": 3.072149302103078e-08, + "loss": 0.0, + "num_input_tokens_seen": 128083240, + "step": 190095 + }, + { + "epoch": 4.644174626829209, + "grad_norm": 93.06655883789062, + "learning_rate": 3.0700521172309035e-08, + "loss": 0.0042, + "num_input_tokens_seen": 128086824, + "step": 190100 + }, + { + "epoch": 4.644296777661056, + "grad_norm": 0.00014359374472405761, + "learning_rate": 3.067955637259367e-08, + "loss": 0.0, + "num_input_tokens_seen": 128090472, + "step": 190105 + }, + { + "epoch": 4.644418928492903, + "grad_norm": 3.9056434616213664e-05, + "learning_rate": 3.065859862203746e-08, + "loss": 0.0, + "num_input_tokens_seen": 128093736, + "step": 190110 + }, + { + "epoch": 4.644541079324751, + "grad_norm": 0.0011020988458767533, + "learning_rate": 3.06376479207926e-08, + "loss": 0.0, + "num_input_tokens_seen": 128096744, + "step": 190115 + }, + { + "epoch": 4.644663230156597, + "grad_norm": 0.002124036429449916, + "learning_rate": 3.061670426901153e-08, + "loss": 0.0, + "num_input_tokens_seen": 128099752, + "step": 190120 + }, + { + "epoch": 4.644785380988445, + "grad_norm": 0.00013197159569244832, + "learning_rate": 3.059576766684635e-08, + "loss": 0.0, + "num_input_tokens_seen": 128102568, + "step": 190125 + }, + { + "epoch": 4.644907531820292, + "grad_norm": 0.003396212123334408, + "learning_rate": 3.0574838114449605e-08, + "loss": 0.0, + "num_input_tokens_seen": 128105448, + "step": 190130 + }, + { + "epoch": 4.6450296826521384, + "grad_norm": 0.00010021981870522723, + "learning_rate": 3.05539156119734e-08, + "loss": 0.0, + "num_input_tokens_seen": 128108520, + "step": 190135 + }, + { + "epoch": 4.645151833483986, + "grad_norm": 0.0007454652222804725, + "learning_rate": 3.053300015956983e-08, + "loss": 0.0, + "num_input_tokens_seen": 128111912, + "step": 190140 + }, + { + "epoch": 4.645273984315833, + "grad_norm": 0.0032192934304475784, + "learning_rate": 3.0512091757391114e-08, + "loss": 0.0, + "num_input_tokens_seen": 128115240, + "step": 190145 + }, + { + "epoch": 4.6453961351476805, + "grad_norm": 7.996581553015858e-05, + "learning_rate": 3.049119040558912e-08, + "loss": 0.0, + "num_input_tokens_seen": 128118696, + "step": 190150 + }, + { + "epoch": 4.645518285979527, + "grad_norm": 0.0012409802293404937, + "learning_rate": 3.047029610431595e-08, + "loss": 0.0, + "num_input_tokens_seen": 128121640, + "step": 190155 + }, + { + "epoch": 4.645640436811375, + "grad_norm": 0.00021187012316659093, + "learning_rate": 3.0449408853723715e-08, + "loss": 0.0, + "num_input_tokens_seen": 128124904, + "step": 190160 + }, + { + "epoch": 4.645762587643222, + "grad_norm": 0.00025122863007709384, + "learning_rate": 3.0428528653963946e-08, + "loss": 0.0, + "num_input_tokens_seen": 128128232, + "step": 190165 + }, + { + "epoch": 4.645884738475069, + "grad_norm": 0.0025913571007549763, + "learning_rate": 3.040765550518887e-08, + "loss": 0.0, + "num_input_tokens_seen": 128131176, + "step": 190170 + }, + { + "epoch": 4.646006889306916, + "grad_norm": 0.006195464637130499, + "learning_rate": 3.0386789407550017e-08, + "loss": 0.0, + "num_input_tokens_seen": 128134568, + "step": 190175 + }, + { + "epoch": 4.646129040138764, + "grad_norm": 0.0006205164827406406, + "learning_rate": 3.036593036119928e-08, + "loss": 0.0, + "num_input_tokens_seen": 128137704, + "step": 190180 + }, + { + "epoch": 4.64625119097061, + "grad_norm": 0.0023309632670134306, + "learning_rate": 3.034507836628841e-08, + "loss": 0.0, + "num_input_tokens_seen": 128141160, + "step": 190185 + }, + { + "epoch": 4.646373341802458, + "grad_norm": 0.0002740486233960837, + "learning_rate": 3.0324233422968747e-08, + "loss": 0.0, + "num_input_tokens_seen": 128145128, + "step": 190190 + }, + { + "epoch": 4.646495492634305, + "grad_norm": 4.773643740918487e-05, + "learning_rate": 3.030339553139216e-08, + "loss": 0.0, + "num_input_tokens_seen": 128148648, + "step": 190195 + }, + { + "epoch": 4.646617643466152, + "grad_norm": 9.275714546674863e-05, + "learning_rate": 3.0282564691709975e-08, + "loss": 0.0, + "num_input_tokens_seen": 128152360, + "step": 190200 + }, + { + "epoch": 4.646739794297999, + "grad_norm": 0.035588447004556656, + "learning_rate": 3.0261740904073965e-08, + "loss": 0.0, + "num_input_tokens_seen": 128155688, + "step": 190205 + }, + { + "epoch": 4.646861945129846, + "grad_norm": 0.018034692853689194, + "learning_rate": 3.024092416863533e-08, + "loss": 0.0, + "num_input_tokens_seen": 128158888, + "step": 190210 + }, + { + "epoch": 4.6469840959616935, + "grad_norm": 0.00043136090971529484, + "learning_rate": 3.02201144855454e-08, + "loss": 0.0, + "num_input_tokens_seen": 128162216, + "step": 190215 + }, + { + "epoch": 4.647106246793541, + "grad_norm": 0.00020243579638190567, + "learning_rate": 3.0199311854955837e-08, + "loss": 0.0, + "num_input_tokens_seen": 128165736, + "step": 190220 + }, + { + "epoch": 4.647228397625388, + "grad_norm": 0.00023691370734013617, + "learning_rate": 3.017851627701762e-08, + "loss": 0.0, + "num_input_tokens_seen": 128168808, + "step": 190225 + }, + { + "epoch": 4.647350548457235, + "grad_norm": 0.0003029266663361341, + "learning_rate": 3.015772775188219e-08, + "loss": 0.0, + "num_input_tokens_seen": 128172584, + "step": 190230 + }, + { + "epoch": 4.647472699289082, + "grad_norm": 0.0029733150731772184, + "learning_rate": 3.013694627970054e-08, + "loss": 0.0, + "num_input_tokens_seen": 128175912, + "step": 190235 + }, + { + "epoch": 4.647594850120929, + "grad_norm": 7.283730519702658e-05, + "learning_rate": 3.011617186062387e-08, + "loss": 0.0, + "num_input_tokens_seen": 128179240, + "step": 190240 + }, + { + "epoch": 4.647717000952777, + "grad_norm": 0.000707432976923883, + "learning_rate": 3.00954044948033e-08, + "loss": 0.0, + "num_input_tokens_seen": 128182376, + "step": 190245 + }, + { + "epoch": 4.647839151784623, + "grad_norm": 9.027096530189738e-05, + "learning_rate": 3.0074644182389694e-08, + "loss": 0.065, + "num_input_tokens_seen": 128185640, + "step": 190250 + }, + { + "epoch": 4.647961302616471, + "grad_norm": 0.0001138881707447581, + "learning_rate": 3.0053890923534273e-08, + "loss": 0.0, + "num_input_tokens_seen": 128188904, + "step": 190255 + }, + { + "epoch": 4.648083453448318, + "grad_norm": 0.00011583354353206232, + "learning_rate": 3.00331447183878e-08, + "loss": 0.0, + "num_input_tokens_seen": 128192104, + "step": 190260 + }, + { + "epoch": 4.648205604280165, + "grad_norm": 0.0005509683978743851, + "learning_rate": 3.0012405567101275e-08, + "loss": 0.0, + "num_input_tokens_seen": 128195368, + "step": 190265 + }, + { + "epoch": 4.648327755112012, + "grad_norm": 0.022481124848127365, + "learning_rate": 2.999167346982534e-08, + "loss": 0.0, + "num_input_tokens_seen": 128198824, + "step": 190270 + }, + { + "epoch": 4.64844990594386, + "grad_norm": 0.0005666337092407048, + "learning_rate": 2.997094842671099e-08, + "loss": 0.0, + "num_input_tokens_seen": 128201832, + "step": 190275 + }, + { + "epoch": 4.6485720567757065, + "grad_norm": 0.00038574362406507134, + "learning_rate": 2.9950230437908676e-08, + "loss": 0.0, + "num_input_tokens_seen": 128204776, + "step": 190280 + }, + { + "epoch": 4.648694207607554, + "grad_norm": 0.00022460322361439466, + "learning_rate": 2.992951950356926e-08, + "loss": 0.0, + "num_input_tokens_seen": 128208488, + "step": 190285 + }, + { + "epoch": 4.648816358439401, + "grad_norm": 0.00033724631066434085, + "learning_rate": 2.990881562384318e-08, + "loss": 0.0563, + "num_input_tokens_seen": 128212136, + "step": 190290 + }, + { + "epoch": 4.6489385092712485, + "grad_norm": 0.00024160873726941645, + "learning_rate": 2.9888118798881315e-08, + "loss": 0.0, + "num_input_tokens_seen": 128215400, + "step": 190295 + }, + { + "epoch": 4.649060660103095, + "grad_norm": 6.861024303361773e-05, + "learning_rate": 2.986742902883388e-08, + "loss": 0.0, + "num_input_tokens_seen": 128218856, + "step": 190300 + }, + { + "epoch": 4.649182810934942, + "grad_norm": 0.00023723019694443792, + "learning_rate": 2.984674631385142e-08, + "loss": 0.0001, + "num_input_tokens_seen": 128221736, + "step": 190305 + }, + { + "epoch": 4.64930496176679, + "grad_norm": 0.0017610428621992469, + "learning_rate": 2.982607065408427e-08, + "loss": 0.0, + "num_input_tokens_seen": 128225576, + "step": 190310 + }, + { + "epoch": 4.649427112598637, + "grad_norm": 0.046799734234809875, + "learning_rate": 2.980540204968307e-08, + "loss": 0.0001, + "num_input_tokens_seen": 128228776, + "step": 190315 + }, + { + "epoch": 4.649549263430484, + "grad_norm": 0.0007966930279508233, + "learning_rate": 2.9784740500797822e-08, + "loss": 0.0, + "num_input_tokens_seen": 128232232, + "step": 190320 + }, + { + "epoch": 4.649671414262331, + "grad_norm": 5.0024358642986044e-05, + "learning_rate": 2.9764086007578958e-08, + "loss": 0.0, + "num_input_tokens_seen": 128235560, + "step": 190325 + }, + { + "epoch": 4.649793565094178, + "grad_norm": 0.0002488717727828771, + "learning_rate": 2.974343857017647e-08, + "loss": 0.0, + "num_input_tokens_seen": 128239080, + "step": 190330 + }, + { + "epoch": 4.649915715926025, + "grad_norm": 0.003739925567060709, + "learning_rate": 2.9722798188740907e-08, + "loss": 0.0, + "num_input_tokens_seen": 128242088, + "step": 190335 + }, + { + "epoch": 4.650037866757873, + "grad_norm": 6.226352707017213e-05, + "learning_rate": 2.970216486342192e-08, + "loss": 0.0, + "num_input_tokens_seen": 128245224, + "step": 190340 + }, + { + "epoch": 4.6501600175897195, + "grad_norm": 0.0009980950271710753, + "learning_rate": 2.9681538594369837e-08, + "loss": 0.0, + "num_input_tokens_seen": 128248552, + "step": 190345 + }, + { + "epoch": 4.650282168421567, + "grad_norm": 0.0005805629189126194, + "learning_rate": 2.9660919381734652e-08, + "loss": 0.0, + "num_input_tokens_seen": 128252072, + "step": 190350 + }, + { + "epoch": 4.650404319253414, + "grad_norm": 4.1041876102099195e-05, + "learning_rate": 2.964030722566613e-08, + "loss": 0.0, + "num_input_tokens_seen": 128255528, + "step": 190355 + }, + { + "epoch": 4.6505264700852615, + "grad_norm": 0.00015798966342117637, + "learning_rate": 2.961970212631437e-08, + "loss": 0.0317, + "num_input_tokens_seen": 128259176, + "step": 190360 + }, + { + "epoch": 4.650648620917108, + "grad_norm": 0.00017236039275303483, + "learning_rate": 2.9599104083829153e-08, + "loss": 0.0, + "num_input_tokens_seen": 128262504, + "step": 190365 + }, + { + "epoch": 4.650770771748956, + "grad_norm": 0.00020189836504869163, + "learning_rate": 2.9578513098360235e-08, + "loss": 0.0, + "num_input_tokens_seen": 128265512, + "step": 190370 + }, + { + "epoch": 4.650892922580803, + "grad_norm": 0.00678278086706996, + "learning_rate": 2.9557929170057282e-08, + "loss": 0.0, + "num_input_tokens_seen": 128268776, + "step": 190375 + }, + { + "epoch": 4.65101507341265, + "grad_norm": 0.0014909114688634872, + "learning_rate": 2.9537352299070173e-08, + "loss": 0.0, + "num_input_tokens_seen": 128272232, + "step": 190380 + }, + { + "epoch": 4.651137224244497, + "grad_norm": 0.00044912658631801605, + "learning_rate": 2.9516782485548563e-08, + "loss": 0.0, + "num_input_tokens_seen": 128275624, + "step": 190385 + }, + { + "epoch": 4.651259375076345, + "grad_norm": 0.0003641039947979152, + "learning_rate": 2.9496219729641892e-08, + "loss": 0.0, + "num_input_tokens_seen": 128278888, + "step": 190390 + }, + { + "epoch": 4.651381525908191, + "grad_norm": 0.0002309923293069005, + "learning_rate": 2.947566403149959e-08, + "loss": 0.1665, + "num_input_tokens_seen": 128282152, + "step": 190395 + }, + { + "epoch": 4.651503676740038, + "grad_norm": 0.00023649900685995817, + "learning_rate": 2.9455115391271546e-08, + "loss": 0.0, + "num_input_tokens_seen": 128285800, + "step": 190400 + }, + { + "epoch": 4.651625827571886, + "grad_norm": 0.0006368455942720175, + "learning_rate": 2.9434573809106744e-08, + "loss": 0.0, + "num_input_tokens_seen": 128288936, + "step": 190405 + }, + { + "epoch": 4.6517479784037326, + "grad_norm": 0.10722141712903976, + "learning_rate": 2.9414039285154846e-08, + "loss": 0.0, + "num_input_tokens_seen": 128292008, + "step": 190410 + }, + { + "epoch": 4.65187012923558, + "grad_norm": 0.002036165911704302, + "learning_rate": 2.9393511819565063e-08, + "loss": 0.0, + "num_input_tokens_seen": 128295592, + "step": 190415 + }, + { + "epoch": 4.651992280067427, + "grad_norm": 0.004228158388286829, + "learning_rate": 2.9372991412486836e-08, + "loss": 0.0, + "num_input_tokens_seen": 128299112, + "step": 190420 + }, + { + "epoch": 4.652114430899275, + "grad_norm": 0.00015035481192171574, + "learning_rate": 2.9352478064069152e-08, + "loss": 0.0, + "num_input_tokens_seen": 128302568, + "step": 190425 + }, + { + "epoch": 4.652236581731121, + "grad_norm": 8.828377031022683e-05, + "learning_rate": 2.933197177446145e-08, + "loss": 0.0, + "num_input_tokens_seen": 128306024, + "step": 190430 + }, + { + "epoch": 4.652358732562969, + "grad_norm": 5.080635673948564e-06, + "learning_rate": 2.931147254381261e-08, + "loss": 0.0, + "num_input_tokens_seen": 128309480, + "step": 190435 + }, + { + "epoch": 4.652480883394816, + "grad_norm": 0.0010717608965933323, + "learning_rate": 2.9290980372271736e-08, + "loss": 0.0006, + "num_input_tokens_seen": 128312808, + "step": 190440 + }, + { + "epoch": 4.652603034226663, + "grad_norm": 4.420124605530873e-05, + "learning_rate": 2.927049525998815e-08, + "loss": 0.0, + "num_input_tokens_seen": 128316072, + "step": 190445 + }, + { + "epoch": 4.65272518505851, + "grad_norm": 0.006245040334761143, + "learning_rate": 2.925001720711051e-08, + "loss": 0.0002, + "num_input_tokens_seen": 128319720, + "step": 190450 + }, + { + "epoch": 4.652847335890358, + "grad_norm": 0.00013759582361672074, + "learning_rate": 2.9229546213787925e-08, + "loss": 0.0, + "num_input_tokens_seen": 128322984, + "step": 190455 + }, + { + "epoch": 4.6529694867222045, + "grad_norm": 0.003004379104822874, + "learning_rate": 2.920908228016916e-08, + "loss": 0.0, + "num_input_tokens_seen": 128326824, + "step": 190460 + }, + { + "epoch": 4.653091637554052, + "grad_norm": 0.0024323337711393833, + "learning_rate": 2.918862540640299e-08, + "loss": 0.0, + "num_input_tokens_seen": 128329896, + "step": 190465 + }, + { + "epoch": 4.653213788385899, + "grad_norm": 6.900146399857476e-05, + "learning_rate": 2.9168175592638288e-08, + "loss": 0.0, + "num_input_tokens_seen": 128332776, + "step": 190470 + }, + { + "epoch": 4.653335939217746, + "grad_norm": 0.00020709547970909625, + "learning_rate": 2.914773283902372e-08, + "loss": 0.0, + "num_input_tokens_seen": 128336360, + "step": 190475 + }, + { + "epoch": 4.653458090049593, + "grad_norm": 0.0002024859859375283, + "learning_rate": 2.9127297145708052e-08, + "loss": 0.0, + "num_input_tokens_seen": 128339624, + "step": 190480 + }, + { + "epoch": 4.653580240881441, + "grad_norm": 0.002173555316403508, + "learning_rate": 2.9106868512839722e-08, + "loss": 0.0, + "num_input_tokens_seen": 128343080, + "step": 190485 + }, + { + "epoch": 4.653702391713288, + "grad_norm": 0.0013618976809084415, + "learning_rate": 2.90864469405675e-08, + "loss": 0.0, + "num_input_tokens_seen": 128346280, + "step": 190490 + }, + { + "epoch": 4.653824542545134, + "grad_norm": 0.00013016993761993945, + "learning_rate": 2.906603242903971e-08, + "loss": 0.0, + "num_input_tokens_seen": 128349096, + "step": 190495 + }, + { + "epoch": 4.653946693376982, + "grad_norm": 0.0004320065781939775, + "learning_rate": 2.9045624978404793e-08, + "loss": 0.0, + "num_input_tokens_seen": 128352168, + "step": 190500 + }, + { + "epoch": 4.654068844208829, + "grad_norm": 0.0005773974698968232, + "learning_rate": 2.9025224588811402e-08, + "loss": 0.0, + "num_input_tokens_seen": 128355560, + "step": 190505 + }, + { + "epoch": 4.654190995040676, + "grad_norm": 0.004524011164903641, + "learning_rate": 2.9004831260407647e-08, + "loss": 0.0, + "num_input_tokens_seen": 128359016, + "step": 190510 + }, + { + "epoch": 4.654313145872523, + "grad_norm": 0.011500171385705471, + "learning_rate": 2.898444499334196e-08, + "loss": 0.0, + "num_input_tokens_seen": 128362344, + "step": 190515 + }, + { + "epoch": 4.654435296704371, + "grad_norm": 0.0002538670087233186, + "learning_rate": 2.896406578776256e-08, + "loss": 0.0, + "num_input_tokens_seen": 128365416, + "step": 190520 + }, + { + "epoch": 4.6545574475362175, + "grad_norm": 9.319341188529506e-05, + "learning_rate": 2.8943693643817656e-08, + "loss": 0.0, + "num_input_tokens_seen": 128368872, + "step": 190525 + }, + { + "epoch": 4.654679598368065, + "grad_norm": 3.173381264787167e-05, + "learning_rate": 2.8923328561655357e-08, + "loss": 0.0, + "num_input_tokens_seen": 128372392, + "step": 190530 + }, + { + "epoch": 4.654801749199912, + "grad_norm": 0.0012093858094885945, + "learning_rate": 2.8902970541423765e-08, + "loss": 0.0467, + "num_input_tokens_seen": 128375720, + "step": 190535 + }, + { + "epoch": 4.6549239000317595, + "grad_norm": 1.1120274393761065e-05, + "learning_rate": 2.8882619583270983e-08, + "loss": 0.0, + "num_input_tokens_seen": 128379496, + "step": 190540 + }, + { + "epoch": 4.655046050863606, + "grad_norm": 0.14474591612815857, + "learning_rate": 2.8862275687345004e-08, + "loss": 0.0, + "num_input_tokens_seen": 128383400, + "step": 190545 + }, + { + "epoch": 4.655168201695454, + "grad_norm": 0.07663882523775101, + "learning_rate": 2.8841938853793823e-08, + "loss": 0.0, + "num_input_tokens_seen": 128386792, + "step": 190550 + }, + { + "epoch": 4.655290352527301, + "grad_norm": 0.0017596009420230985, + "learning_rate": 2.8821609082765207e-08, + "loss": 0.0, + "num_input_tokens_seen": 128390120, + "step": 190555 + }, + { + "epoch": 4.655412503359148, + "grad_norm": 0.00014885960263200104, + "learning_rate": 2.880128637440704e-08, + "loss": 0.0, + "num_input_tokens_seen": 128393576, + "step": 190560 + }, + { + "epoch": 4.655534654190995, + "grad_norm": 0.0004244176670908928, + "learning_rate": 2.8780970728867204e-08, + "loss": 0.0, + "num_input_tokens_seen": 128397032, + "step": 190565 + }, + { + "epoch": 4.655656805022842, + "grad_norm": 6.144320650491863e-05, + "learning_rate": 2.8760662146293357e-08, + "loss": 0.0, + "num_input_tokens_seen": 128400168, + "step": 190570 + }, + { + "epoch": 4.655778955854689, + "grad_norm": 0.0005951565690338612, + "learning_rate": 2.874036062683327e-08, + "loss": 0.0, + "num_input_tokens_seen": 128403560, + "step": 190575 + }, + { + "epoch": 4.655901106686537, + "grad_norm": 0.006229200400412083, + "learning_rate": 2.8720066170634383e-08, + "loss": 0.0625, + "num_input_tokens_seen": 128406632, + "step": 190580 + }, + { + "epoch": 4.656023257518384, + "grad_norm": 1.0892995305766817e-05, + "learning_rate": 2.8699778777844574e-08, + "loss": 0.0, + "num_input_tokens_seen": 128410280, + "step": 190585 + }, + { + "epoch": 4.6561454083502305, + "grad_norm": 0.0011337065370753407, + "learning_rate": 2.867949844861106e-08, + "loss": 0.0, + "num_input_tokens_seen": 128414568, + "step": 190590 + }, + { + "epoch": 4.656267559182078, + "grad_norm": 0.00012795043585356325, + "learning_rate": 2.8659225183081613e-08, + "loss": 0.0, + "num_input_tokens_seen": 128417768, + "step": 190595 + }, + { + "epoch": 4.656389710013925, + "grad_norm": 0.0025257400702685118, + "learning_rate": 2.863895898140345e-08, + "loss": 0.0, + "num_input_tokens_seen": 128421032, + "step": 190600 + }, + { + "epoch": 4.6565118608457725, + "grad_norm": 0.0013095020549371839, + "learning_rate": 2.8618699843724115e-08, + "loss": 0.0, + "num_input_tokens_seen": 128424168, + "step": 190605 + }, + { + "epoch": 4.656634011677619, + "grad_norm": 0.0003252495953347534, + "learning_rate": 2.8598447770190938e-08, + "loss": 0.0, + "num_input_tokens_seen": 128427496, + "step": 190610 + }, + { + "epoch": 4.656756162509467, + "grad_norm": 0.00011898396769538522, + "learning_rate": 2.857820276095091e-08, + "loss": 0.0, + "num_input_tokens_seen": 128430824, + "step": 190615 + }, + { + "epoch": 4.656878313341314, + "grad_norm": 0.0004511339357122779, + "learning_rate": 2.855796481615158e-08, + "loss": 0.0, + "num_input_tokens_seen": 128434088, + "step": 190620 + }, + { + "epoch": 4.657000464173161, + "grad_norm": 3.237876444472931e-05, + "learning_rate": 2.8537733935940055e-08, + "loss": 0.0, + "num_input_tokens_seen": 128437864, + "step": 190625 + }, + { + "epoch": 4.657122615005008, + "grad_norm": 0.000573989178519696, + "learning_rate": 2.851751012046333e-08, + "loss": 0.0, + "num_input_tokens_seen": 128441576, + "step": 190630 + }, + { + "epoch": 4.657244765836856, + "grad_norm": 0.0007632462657056749, + "learning_rate": 2.8497293369868723e-08, + "loss": 0.0, + "num_input_tokens_seen": 128444520, + "step": 190635 + }, + { + "epoch": 4.657366916668702, + "grad_norm": 0.003807037603110075, + "learning_rate": 2.8477083684302904e-08, + "loss": 0.0, + "num_input_tokens_seen": 128448296, + "step": 190640 + }, + { + "epoch": 4.65748906750055, + "grad_norm": 0.0006615742458961904, + "learning_rate": 2.8456881063913195e-08, + "loss": 0.0, + "num_input_tokens_seen": 128451240, + "step": 190645 + }, + { + "epoch": 4.657611218332397, + "grad_norm": 1.733975477691274e-05, + "learning_rate": 2.843668550884626e-08, + "loss": 0.0, + "num_input_tokens_seen": 128454504, + "step": 190650 + }, + { + "epoch": 4.657733369164244, + "grad_norm": 0.0003319148381706327, + "learning_rate": 2.8416497019249086e-08, + "loss": 0.0, + "num_input_tokens_seen": 128457640, + "step": 190655 + }, + { + "epoch": 4.657855519996091, + "grad_norm": 0.0028955182060599327, + "learning_rate": 2.839631559526856e-08, + "loss": 0.0, + "num_input_tokens_seen": 128461096, + "step": 190660 + }, + { + "epoch": 4.657977670827938, + "grad_norm": 6.624946399824694e-05, + "learning_rate": 2.8376141237051234e-08, + "loss": 0.0, + "num_input_tokens_seen": 128464424, + "step": 190665 + }, + { + "epoch": 4.6580998216597855, + "grad_norm": 0.01330376137048006, + "learning_rate": 2.8355973944743982e-08, + "loss": 0.0, + "num_input_tokens_seen": 128467944, + "step": 190670 + }, + { + "epoch": 4.658221972491633, + "grad_norm": 0.0015481158625334501, + "learning_rate": 2.8335813718493474e-08, + "loss": 0.0, + "num_input_tokens_seen": 128471016, + "step": 190675 + }, + { + "epoch": 4.65834412332348, + "grad_norm": 0.00015667296247556806, + "learning_rate": 2.8315660558446252e-08, + "loss": 0.0, + "num_input_tokens_seen": 128474600, + "step": 190680 + }, + { + "epoch": 4.658466274155327, + "grad_norm": 0.0011868600267916918, + "learning_rate": 2.829551446474887e-08, + "loss": 0.0, + "num_input_tokens_seen": 128478056, + "step": 190685 + }, + { + "epoch": 4.658588424987174, + "grad_norm": 0.18346768617630005, + "learning_rate": 2.8275375437547876e-08, + "loss": 0.0001, + "num_input_tokens_seen": 128481256, + "step": 190690 + }, + { + "epoch": 4.658710575819021, + "grad_norm": 6.0428901633713394e-05, + "learning_rate": 2.825524347698971e-08, + "loss": 0.0, + "num_input_tokens_seen": 128484456, + "step": 190695 + }, + { + "epoch": 4.658832726650869, + "grad_norm": 0.0029399923514574766, + "learning_rate": 2.8235118583220918e-08, + "loss": 0.0, + "num_input_tokens_seen": 128487464, + "step": 190700 + }, + { + "epoch": 4.658954877482715, + "grad_norm": 0.0033414326608181, + "learning_rate": 2.8215000756387496e-08, + "loss": 0.0, + "num_input_tokens_seen": 128490728, + "step": 190705 + }, + { + "epoch": 4.659077028314563, + "grad_norm": 0.00032659669523127377, + "learning_rate": 2.8194889996636217e-08, + "loss": 0.0, + "num_input_tokens_seen": 128494120, + "step": 190710 + }, + { + "epoch": 4.65919917914641, + "grad_norm": 0.00039978005224838853, + "learning_rate": 2.8174786304112853e-08, + "loss": 0.0, + "num_input_tokens_seen": 128497192, + "step": 190715 + }, + { + "epoch": 4.659321329978257, + "grad_norm": 0.006133579649031162, + "learning_rate": 2.8154689678963948e-08, + "loss": 0.0002, + "num_input_tokens_seen": 128500456, + "step": 190720 + }, + { + "epoch": 4.659443480810104, + "grad_norm": 0.0007687496836297214, + "learning_rate": 2.8134600121335506e-08, + "loss": 0.0, + "num_input_tokens_seen": 128503528, + "step": 190725 + }, + { + "epoch": 4.659565631641952, + "grad_norm": 1.2737462520599365, + "learning_rate": 2.8114517631373623e-08, + "loss": 0.0002, + "num_input_tokens_seen": 128506536, + "step": 190730 + }, + { + "epoch": 4.659687782473799, + "grad_norm": 0.0026064272969961166, + "learning_rate": 2.8094442209224412e-08, + "loss": 0.0, + "num_input_tokens_seen": 128509992, + "step": 190735 + }, + { + "epoch": 4.659809933305646, + "grad_norm": 0.0006508814403787255, + "learning_rate": 2.8074373855033862e-08, + "loss": 0.0, + "num_input_tokens_seen": 128513576, + "step": 190740 + }, + { + "epoch": 4.659932084137493, + "grad_norm": 4.2336319893365726e-05, + "learning_rate": 2.8054312568947747e-08, + "loss": 0.0001, + "num_input_tokens_seen": 128516968, + "step": 190745 + }, + { + "epoch": 4.660054234969341, + "grad_norm": 0.0007426925003528595, + "learning_rate": 2.803425835111217e-08, + "loss": 0.0, + "num_input_tokens_seen": 128520232, + "step": 190750 + }, + { + "epoch": 4.660176385801187, + "grad_norm": 0.000315658311592415, + "learning_rate": 2.801421120167291e-08, + "loss": 0.0, + "num_input_tokens_seen": 128523560, + "step": 190755 + }, + { + "epoch": 4.660298536633034, + "grad_norm": 0.0018822962883859873, + "learning_rate": 2.7994171120775732e-08, + "loss": 0.0, + "num_input_tokens_seen": 128526696, + "step": 190760 + }, + { + "epoch": 4.660420687464882, + "grad_norm": 8.385728506254964e-06, + "learning_rate": 2.7974138108566414e-08, + "loss": 0.0, + "num_input_tokens_seen": 128529832, + "step": 190765 + }, + { + "epoch": 4.660542838296728, + "grad_norm": 8.750159031478688e-05, + "learning_rate": 2.7954112165190502e-08, + "loss": 0.0, + "num_input_tokens_seen": 128533736, + "step": 190770 + }, + { + "epoch": 4.660664989128576, + "grad_norm": 0.00037626747507601976, + "learning_rate": 2.793409329079377e-08, + "loss": 0.0, + "num_input_tokens_seen": 128537448, + "step": 190775 + }, + { + "epoch": 4.660787139960423, + "grad_norm": 2.4590988687123172e-05, + "learning_rate": 2.791408148552188e-08, + "loss": 0.0, + "num_input_tokens_seen": 128540840, + "step": 190780 + }, + { + "epoch": 4.6609092907922705, + "grad_norm": 0.00022164350957609713, + "learning_rate": 2.7894076749520158e-08, + "loss": 0.0001, + "num_input_tokens_seen": 128544360, + "step": 190785 + }, + { + "epoch": 4.661031441624117, + "grad_norm": 0.000510553945787251, + "learning_rate": 2.7874079082934155e-08, + "loss": 0.0, + "num_input_tokens_seen": 128548456, + "step": 190790 + }, + { + "epoch": 4.661153592455965, + "grad_norm": 0.0030823589768260717, + "learning_rate": 2.7854088485909312e-08, + "loss": 0.0, + "num_input_tokens_seen": 128551784, + "step": 190795 + }, + { + "epoch": 4.661275743287812, + "grad_norm": 0.00036780742811970413, + "learning_rate": 2.7834104958591176e-08, + "loss": 0.0, + "num_input_tokens_seen": 128554792, + "step": 190800 + }, + { + "epoch": 4.661397894119659, + "grad_norm": 7.580179226351902e-05, + "learning_rate": 2.7814128501124856e-08, + "loss": 0.0, + "num_input_tokens_seen": 128557736, + "step": 190805 + }, + { + "epoch": 4.661520044951506, + "grad_norm": 0.0011283807689324021, + "learning_rate": 2.7794159113655567e-08, + "loss": 0.0, + "num_input_tokens_seen": 128560808, + "step": 190810 + }, + { + "epoch": 4.661642195783354, + "grad_norm": 0.000357277225703001, + "learning_rate": 2.7774196796328752e-08, + "loss": 0.0, + "num_input_tokens_seen": 128563944, + "step": 190815 + }, + { + "epoch": 4.6617643466152, + "grad_norm": 0.0012690431904047728, + "learning_rate": 2.775424154928929e-08, + "loss": 0.0, + "num_input_tokens_seen": 128567976, + "step": 190820 + }, + { + "epoch": 4.661886497447048, + "grad_norm": 2.7468084226711653e-05, + "learning_rate": 2.7734293372682737e-08, + "loss": 0.0002, + "num_input_tokens_seen": 128571496, + "step": 190825 + }, + { + "epoch": 4.662008648278895, + "grad_norm": 0.00075659150024876, + "learning_rate": 2.771435226665364e-08, + "loss": 0.0, + "num_input_tokens_seen": 128574568, + "step": 190830 + }, + { + "epoch": 4.6621307991107415, + "grad_norm": 0.0006138571770861745, + "learning_rate": 2.769441823134755e-08, + "loss": 0.0, + "num_input_tokens_seen": 128577640, + "step": 190835 + }, + { + "epoch": 4.662252949942589, + "grad_norm": 0.004741390701383352, + "learning_rate": 2.7674491266909016e-08, + "loss": 0.0, + "num_input_tokens_seen": 128581160, + "step": 190840 + }, + { + "epoch": 4.662375100774437, + "grad_norm": 0.00347688514739275, + "learning_rate": 2.765457137348304e-08, + "loss": 0.0, + "num_input_tokens_seen": 128584104, + "step": 190845 + }, + { + "epoch": 4.6624972516062835, + "grad_norm": 7.782386092003435e-05, + "learning_rate": 2.7634658551214717e-08, + "loss": 0.0, + "num_input_tokens_seen": 128587368, + "step": 190850 + }, + { + "epoch": 4.66261940243813, + "grad_norm": 2.530582969484385e-05, + "learning_rate": 2.7614752800248608e-08, + "loss": 0.0, + "num_input_tokens_seen": 128590696, + "step": 190855 + }, + { + "epoch": 4.662741553269978, + "grad_norm": 0.00023780610354151577, + "learning_rate": 2.7594854120729594e-08, + "loss": 0.0, + "num_input_tokens_seen": 128594024, + "step": 190860 + }, + { + "epoch": 4.662863704101825, + "grad_norm": 5.801659062854014e-05, + "learning_rate": 2.7574962512802334e-08, + "loss": 0.0, + "num_input_tokens_seen": 128597544, + "step": 190865 + }, + { + "epoch": 4.662985854933672, + "grad_norm": 3.095933789154515e-05, + "learning_rate": 2.7555077976611385e-08, + "loss": 0.0, + "num_input_tokens_seen": 128603112, + "step": 190870 + }, + { + "epoch": 4.663108005765519, + "grad_norm": 0.0004294172103982419, + "learning_rate": 2.7535200512301626e-08, + "loss": 0.0, + "num_input_tokens_seen": 128606760, + "step": 190875 + }, + { + "epoch": 4.663230156597367, + "grad_norm": 9.709588630357757e-05, + "learning_rate": 2.7515330120017387e-08, + "loss": 0.0, + "num_input_tokens_seen": 128610024, + "step": 190880 + }, + { + "epoch": 4.663352307429213, + "grad_norm": 0.0002693548158276826, + "learning_rate": 2.7495466799903222e-08, + "loss": 0.0, + "num_input_tokens_seen": 128613480, + "step": 190885 + }, + { + "epoch": 4.663474458261061, + "grad_norm": 0.00010838372691068798, + "learning_rate": 2.7475610552103568e-08, + "loss": 0.0, + "num_input_tokens_seen": 128616808, + "step": 190890 + }, + { + "epoch": 4.663596609092908, + "grad_norm": 0.0002789755817502737, + "learning_rate": 2.7455761376762976e-08, + "loss": 0.0, + "num_input_tokens_seen": 128619944, + "step": 190895 + }, + { + "epoch": 4.663718759924755, + "grad_norm": 0.0007267400505952537, + "learning_rate": 2.7435919274025553e-08, + "loss": 0.0, + "num_input_tokens_seen": 128623016, + "step": 190900 + }, + { + "epoch": 4.663840910756602, + "grad_norm": 0.0002554529346525669, + "learning_rate": 2.7416084244035852e-08, + "loss": 0.0, + "num_input_tokens_seen": 128626152, + "step": 190905 + }, + { + "epoch": 4.66396306158845, + "grad_norm": 0.00016573209722992033, + "learning_rate": 2.739625628693776e-08, + "loss": 0.0, + "num_input_tokens_seen": 128629160, + "step": 190910 + }, + { + "epoch": 4.6640852124202965, + "grad_norm": 0.00022480067855212837, + "learning_rate": 2.737643540287593e-08, + "loss": 0.0, + "num_input_tokens_seen": 128632232, + "step": 190915 + }, + { + "epoch": 4.664207363252144, + "grad_norm": 0.001412676996551454, + "learning_rate": 2.7356621591994146e-08, + "loss": 0.0, + "num_input_tokens_seen": 128635688, + "step": 190920 + }, + { + "epoch": 4.664329514083991, + "grad_norm": 0.00020378133922349662, + "learning_rate": 2.733681485443662e-08, + "loss": 0.0, + "num_input_tokens_seen": 128639336, + "step": 190925 + }, + { + "epoch": 4.664451664915838, + "grad_norm": 5.148085983819328e-05, + "learning_rate": 2.731701519034735e-08, + "loss": 0.0, + "num_input_tokens_seen": 128642856, + "step": 190930 + }, + { + "epoch": 4.664573815747685, + "grad_norm": 4.189375613350421e-05, + "learning_rate": 2.729722259987044e-08, + "loss": 0.0, + "num_input_tokens_seen": 128646184, + "step": 190935 + }, + { + "epoch": 4.664695966579533, + "grad_norm": 0.00539232324808836, + "learning_rate": 2.7277437083149668e-08, + "loss": 0.0, + "num_input_tokens_seen": 128649576, + "step": 190940 + }, + { + "epoch": 4.66481811741138, + "grad_norm": 0.0003227377019356936, + "learning_rate": 2.725765864032914e-08, + "loss": 0.0, + "num_input_tokens_seen": 128653032, + "step": 190945 + }, + { + "epoch": 4.664940268243226, + "grad_norm": 0.0002500084519851953, + "learning_rate": 2.7237887271552406e-08, + "loss": 0.0, + "num_input_tokens_seen": 128656232, + "step": 190950 + }, + { + "epoch": 4.665062419075074, + "grad_norm": 0.0006451523513533175, + "learning_rate": 2.7218122976963465e-08, + "loss": 0.0, + "num_input_tokens_seen": 128659688, + "step": 190955 + }, + { + "epoch": 4.665184569906921, + "grad_norm": 0.0003521182225085795, + "learning_rate": 2.7198365756705976e-08, + "loss": 0.0, + "num_input_tokens_seen": 128663656, + "step": 190960 + }, + { + "epoch": 4.665306720738768, + "grad_norm": 0.0023154793307185173, + "learning_rate": 2.7178615610923606e-08, + "loss": 0.0, + "num_input_tokens_seen": 128666920, + "step": 190965 + }, + { + "epoch": 4.665428871570615, + "grad_norm": 4.0537401218898594e-05, + "learning_rate": 2.7158872539760014e-08, + "loss": 0.0, + "num_input_tokens_seen": 128670632, + "step": 190970 + }, + { + "epoch": 4.665551022402463, + "grad_norm": 0.00011217274004593492, + "learning_rate": 2.7139136543358754e-08, + "loss": 0.0, + "num_input_tokens_seen": 128673896, + "step": 190975 + }, + { + "epoch": 4.6656731732343095, + "grad_norm": 0.0001300013973377645, + "learning_rate": 2.711940762186349e-08, + "loss": 0.0, + "num_input_tokens_seen": 128677096, + "step": 190980 + }, + { + "epoch": 4.665795324066157, + "grad_norm": 0.0002665658830665052, + "learning_rate": 2.7099685775417324e-08, + "loss": 0.041, + "num_input_tokens_seen": 128680232, + "step": 190985 + }, + { + "epoch": 4.665917474898004, + "grad_norm": 0.0036078591365367174, + "learning_rate": 2.707997100416415e-08, + "loss": 0.0, + "num_input_tokens_seen": 128683752, + "step": 190990 + }, + { + "epoch": 4.6660396257298515, + "grad_norm": 0.00020211531955283135, + "learning_rate": 2.7060263308246956e-08, + "loss": 0.0, + "num_input_tokens_seen": 128687400, + "step": 190995 + }, + { + "epoch": 4.666161776561698, + "grad_norm": 0.0020623011514544487, + "learning_rate": 2.704056268780919e-08, + "loss": 0.0, + "num_input_tokens_seen": 128690472, + "step": 191000 + }, + { + "epoch": 4.666283927393546, + "grad_norm": 0.00035755732096731663, + "learning_rate": 2.7020869142994284e-08, + "loss": 0.0, + "num_input_tokens_seen": 128693992, + "step": 191005 + }, + { + "epoch": 4.666406078225393, + "grad_norm": 0.0050127855502069, + "learning_rate": 2.7001182673945354e-08, + "loss": 0.0, + "num_input_tokens_seen": 128697064, + "step": 191010 + }, + { + "epoch": 4.66652822905724, + "grad_norm": 0.0020575481466948986, + "learning_rate": 2.6981503280805395e-08, + "loss": 0.0, + "num_input_tokens_seen": 128700456, + "step": 191015 + }, + { + "epoch": 4.666650379889087, + "grad_norm": 5.921476986259222e-05, + "learning_rate": 2.6961830963717737e-08, + "loss": 0.0, + "num_input_tokens_seen": 128703464, + "step": 191020 + }, + { + "epoch": 4.666772530720934, + "grad_norm": 0.0003617874172050506, + "learning_rate": 2.694216572282526e-08, + "loss": 0.0, + "num_input_tokens_seen": 128706856, + "step": 191025 + }, + { + "epoch": 4.666894681552781, + "grad_norm": 0.015034444630146027, + "learning_rate": 2.692250755827119e-08, + "loss": 0.0, + "num_input_tokens_seen": 128710888, + "step": 191030 + }, + { + "epoch": 4.667016832384628, + "grad_norm": 0.0004685759777203202, + "learning_rate": 2.69028564701983e-08, + "loss": 0.0, + "num_input_tokens_seen": 128714408, + "step": 191035 + }, + { + "epoch": 4.667138983216476, + "grad_norm": 0.0005598337156698108, + "learning_rate": 2.6883212458749694e-08, + "loss": 0.0, + "num_input_tokens_seen": 128717544, + "step": 191040 + }, + { + "epoch": 4.6672611340483225, + "grad_norm": 1.4924536117177922e-05, + "learning_rate": 2.686357552406793e-08, + "loss": 0.0, + "num_input_tokens_seen": 128721000, + "step": 191045 + }, + { + "epoch": 4.66738328488017, + "grad_norm": 6.75343835609965e-05, + "learning_rate": 2.684394566629611e-08, + "loss": 0.0, + "num_input_tokens_seen": 128724200, + "step": 191050 + }, + { + "epoch": 4.667505435712017, + "grad_norm": 1.3195672181609552e-05, + "learning_rate": 2.682432288557679e-08, + "loss": 0.0, + "num_input_tokens_seen": 128727464, + "step": 191055 + }, + { + "epoch": 4.667627586543865, + "grad_norm": 0.0004628331807907671, + "learning_rate": 2.6804707182052633e-08, + "loss": 0.0, + "num_input_tokens_seen": 128730536, + "step": 191060 + }, + { + "epoch": 4.667749737375711, + "grad_norm": 0.005984678864479065, + "learning_rate": 2.6785098555866635e-08, + "loss": 0.0, + "num_input_tokens_seen": 128734120, + "step": 191065 + }, + { + "epoch": 4.667871888207559, + "grad_norm": 0.0022569738794118166, + "learning_rate": 2.676549700716102e-08, + "loss": 0.0, + "num_input_tokens_seen": 128737640, + "step": 191070 + }, + { + "epoch": 4.667994039039406, + "grad_norm": 0.00012861998402513564, + "learning_rate": 2.6745902536078558e-08, + "loss": 0.0, + "num_input_tokens_seen": 128740904, + "step": 191075 + }, + { + "epoch": 4.668116189871253, + "grad_norm": 3.7217909266473725e-05, + "learning_rate": 2.6726315142761578e-08, + "loss": 0.0, + "num_input_tokens_seen": 128744296, + "step": 191080 + }, + { + "epoch": 4.6682383407031, + "grad_norm": 0.018890826031565666, + "learning_rate": 2.670673482735275e-08, + "loss": 0.0, + "num_input_tokens_seen": 128747624, + "step": 191085 + }, + { + "epoch": 4.668360491534948, + "grad_norm": 2.0563811631291173e-05, + "learning_rate": 2.668716158999418e-08, + "loss": 0.0, + "num_input_tokens_seen": 128751208, + "step": 191090 + }, + { + "epoch": 4.668482642366794, + "grad_norm": 0.00013304762251209468, + "learning_rate": 2.6667595430828417e-08, + "loss": 0.0, + "num_input_tokens_seen": 128754600, + "step": 191095 + }, + { + "epoch": 4.668604793198641, + "grad_norm": 0.005235996562987566, + "learning_rate": 2.6648036349997792e-08, + "loss": 0.0, + "num_input_tokens_seen": 128757992, + "step": 191100 + }, + { + "epoch": 4.668726944030489, + "grad_norm": 0.022949010133743286, + "learning_rate": 2.662848434764431e-08, + "loss": 0.0, + "num_input_tokens_seen": 128761640, + "step": 191105 + }, + { + "epoch": 4.6688490948623365, + "grad_norm": 0.0007702650036662817, + "learning_rate": 2.6608939423910404e-08, + "loss": 0.0, + "num_input_tokens_seen": 128764584, + "step": 191110 + }, + { + "epoch": 4.668971245694183, + "grad_norm": 0.00018525759514886886, + "learning_rate": 2.6589401578938075e-08, + "loss": 0.0, + "num_input_tokens_seen": 128768040, + "step": 191115 + }, + { + "epoch": 4.66909339652603, + "grad_norm": 0.00010585349809844047, + "learning_rate": 2.6569870812869323e-08, + "loss": 0.0, + "num_input_tokens_seen": 128771368, + "step": 191120 + }, + { + "epoch": 4.669215547357878, + "grad_norm": 0.00026610007626004517, + "learning_rate": 2.6550347125846472e-08, + "loss": 0.0, + "num_input_tokens_seen": 128774632, + "step": 191125 + }, + { + "epoch": 4.669337698189724, + "grad_norm": 0.00032241616281680763, + "learning_rate": 2.6530830518011194e-08, + "loss": 0.0, + "num_input_tokens_seen": 128777704, + "step": 191130 + }, + { + "epoch": 4.669459849021572, + "grad_norm": 0.0034227308351546526, + "learning_rate": 2.651132098950559e-08, + "loss": 0.0, + "num_input_tokens_seen": 128781160, + "step": 191135 + }, + { + "epoch": 4.669581999853419, + "grad_norm": 0.1563401222229004, + "learning_rate": 2.6491818540471446e-08, + "loss": 0.0001, + "num_input_tokens_seen": 128784104, + "step": 191140 + }, + { + "epoch": 4.669704150685266, + "grad_norm": 8.37320931168506e-06, + "learning_rate": 2.6472323171050747e-08, + "loss": 0.0, + "num_input_tokens_seen": 128787752, + "step": 191145 + }, + { + "epoch": 4.669826301517113, + "grad_norm": 0.0012280684895813465, + "learning_rate": 2.6452834881385055e-08, + "loss": 0.0, + "num_input_tokens_seen": 128790888, + "step": 191150 + }, + { + "epoch": 4.669948452348961, + "grad_norm": 3.109731187578291e-05, + "learning_rate": 2.6433353671616142e-08, + "loss": 0.0, + "num_input_tokens_seen": 128793960, + "step": 191155 + }, + { + "epoch": 4.6700706031808075, + "grad_norm": 0.0015018595149740577, + "learning_rate": 2.64138795418859e-08, + "loss": 0.0, + "num_input_tokens_seen": 128797416, + "step": 191160 + }, + { + "epoch": 4.670192754012655, + "grad_norm": 0.0013169918674975634, + "learning_rate": 2.6394412492335648e-08, + "loss": 0.0, + "num_input_tokens_seen": 128801128, + "step": 191165 + }, + { + "epoch": 4.670314904844502, + "grad_norm": 0.0002888040617108345, + "learning_rate": 2.6374952523107286e-08, + "loss": 0.0, + "num_input_tokens_seen": 128804136, + "step": 191170 + }, + { + "epoch": 4.6704370556763495, + "grad_norm": 3.728661977220327e-05, + "learning_rate": 2.6355499634341916e-08, + "loss": 0.0, + "num_input_tokens_seen": 128807400, + "step": 191175 + }, + { + "epoch": 4.670559206508196, + "grad_norm": 0.00021267979172989726, + "learning_rate": 2.6336053826181314e-08, + "loss": 0.0, + "num_input_tokens_seen": 128811240, + "step": 191180 + }, + { + "epoch": 4.670681357340044, + "grad_norm": 7.831936090951785e-05, + "learning_rate": 2.6316615098766927e-08, + "loss": 0.0, + "num_input_tokens_seen": 128814440, + "step": 191185 + }, + { + "epoch": 4.670803508171891, + "grad_norm": 0.0009277883800677955, + "learning_rate": 2.6297183452239856e-08, + "loss": 0.0, + "num_input_tokens_seen": 128817960, + "step": 191190 + }, + { + "epoch": 4.670925659003737, + "grad_norm": 0.002798869274556637, + "learning_rate": 2.6277758886741664e-08, + "loss": 0.0, + "num_input_tokens_seen": 128821416, + "step": 191195 + }, + { + "epoch": 4.671047809835585, + "grad_norm": 0.0005861495155841112, + "learning_rate": 2.6258341402413454e-08, + "loss": 0.0, + "num_input_tokens_seen": 128824872, + "step": 191200 + }, + { + "epoch": 4.671169960667433, + "grad_norm": 0.0006913174293003976, + "learning_rate": 2.6238930999396557e-08, + "loss": 0.0, + "num_input_tokens_seen": 128828136, + "step": 191205 + }, + { + "epoch": 4.671292111499279, + "grad_norm": 0.003023535944521427, + "learning_rate": 2.6219527677831976e-08, + "loss": 0.0, + "num_input_tokens_seen": 128831720, + "step": 191210 + }, + { + "epoch": 4.671414262331126, + "grad_norm": 9.19162903301185e-06, + "learning_rate": 2.6200131437861038e-08, + "loss": 0.0, + "num_input_tokens_seen": 128834792, + "step": 191215 + }, + { + "epoch": 4.671536413162974, + "grad_norm": 0.020835041999816895, + "learning_rate": 2.6180742279624523e-08, + "loss": 0.0001, + "num_input_tokens_seen": 128837800, + "step": 191220 + }, + { + "epoch": 4.6716585639948205, + "grad_norm": 0.0006879670545458794, + "learning_rate": 2.616136020326365e-08, + "loss": 0.0, + "num_input_tokens_seen": 128840936, + "step": 191225 + }, + { + "epoch": 4.671780714826668, + "grad_norm": 0.0040020509622991085, + "learning_rate": 2.6141985208919305e-08, + "loss": 0.0, + "num_input_tokens_seen": 128844648, + "step": 191230 + }, + { + "epoch": 4.671902865658515, + "grad_norm": 0.02804373949766159, + "learning_rate": 2.6122617296732376e-08, + "loss": 0.0, + "num_input_tokens_seen": 128847976, + "step": 191235 + }, + { + "epoch": 4.6720250164903625, + "grad_norm": 0.0001148119117715396, + "learning_rate": 2.610325646684375e-08, + "loss": 0.0, + "num_input_tokens_seen": 128850984, + "step": 191240 + }, + { + "epoch": 4.672147167322209, + "grad_norm": 16.482566833496094, + "learning_rate": 2.6083902719393978e-08, + "loss": 0.1687, + "num_input_tokens_seen": 128854760, + "step": 191245 + }, + { + "epoch": 4.672269318154057, + "grad_norm": 7.6309333962854e-05, + "learning_rate": 2.606455605452418e-08, + "loss": 0.0, + "num_input_tokens_seen": 128858088, + "step": 191250 + }, + { + "epoch": 4.672391468985904, + "grad_norm": 0.00012644918751902878, + "learning_rate": 2.6045216472374898e-08, + "loss": 0.0, + "num_input_tokens_seen": 128861672, + "step": 191255 + }, + { + "epoch": 4.672513619817751, + "grad_norm": 0.0014578666305169463, + "learning_rate": 2.6025883973086693e-08, + "loss": 0.0, + "num_input_tokens_seen": 128865576, + "step": 191260 + }, + { + "epoch": 4.672635770649598, + "grad_norm": 5.851191599504091e-05, + "learning_rate": 2.600655855680034e-08, + "loss": 0.0, + "num_input_tokens_seen": 128868904, + "step": 191265 + }, + { + "epoch": 4.672757921481446, + "grad_norm": 0.0002196232817368582, + "learning_rate": 2.598724022365617e-08, + "loss": 0.0365, + "num_input_tokens_seen": 128872488, + "step": 191270 + }, + { + "epoch": 4.672880072313292, + "grad_norm": 4.39381183241494e-05, + "learning_rate": 2.5967928973794738e-08, + "loss": 0.0, + "num_input_tokens_seen": 128875944, + "step": 191275 + }, + { + "epoch": 4.67300222314514, + "grad_norm": 0.0006547580123879015, + "learning_rate": 2.59486248073566e-08, + "loss": 0.0, + "num_input_tokens_seen": 128879016, + "step": 191280 + }, + { + "epoch": 4.673124373976987, + "grad_norm": 0.00048621126916259527, + "learning_rate": 2.5929327724481976e-08, + "loss": 0.0, + "num_input_tokens_seen": 128882472, + "step": 191285 + }, + { + "epoch": 4.6732465248088335, + "grad_norm": 0.00038942944956943393, + "learning_rate": 2.5910037725311418e-08, + "loss": 0.0, + "num_input_tokens_seen": 128885352, + "step": 191290 + }, + { + "epoch": 4.673368675640681, + "grad_norm": 0.000668442458845675, + "learning_rate": 2.5890754809984928e-08, + "loss": 0.0, + "num_input_tokens_seen": 128889128, + "step": 191295 + }, + { + "epoch": 4.673490826472528, + "grad_norm": 0.00021932261006440967, + "learning_rate": 2.5871478978642945e-08, + "loss": 0.0, + "num_input_tokens_seen": 128892200, + "step": 191300 + }, + { + "epoch": 4.6736129773043755, + "grad_norm": 0.0009539284510537982, + "learning_rate": 2.5852210231425475e-08, + "loss": 0.0, + "num_input_tokens_seen": 128896104, + "step": 191305 + }, + { + "epoch": 4.673735128136222, + "grad_norm": 0.0003438375424593687, + "learning_rate": 2.5832948568472733e-08, + "loss": 0.0, + "num_input_tokens_seen": 128899304, + "step": 191310 + }, + { + "epoch": 4.67385727896807, + "grad_norm": 1.730235635477584e-05, + "learning_rate": 2.5813693989924944e-08, + "loss": 0.0, + "num_input_tokens_seen": 128902568, + "step": 191315 + }, + { + "epoch": 4.673979429799917, + "grad_norm": 0.00022493410506285727, + "learning_rate": 2.5794446495921994e-08, + "loss": 0.0, + "num_input_tokens_seen": 128906600, + "step": 191320 + }, + { + "epoch": 4.674101580631764, + "grad_norm": 0.0017360784113407135, + "learning_rate": 2.5775206086603772e-08, + "loss": 0.0, + "num_input_tokens_seen": 128910120, + "step": 191325 + }, + { + "epoch": 4.674223731463611, + "grad_norm": 2.6076373615069315e-05, + "learning_rate": 2.575597276211039e-08, + "loss": 0.0, + "num_input_tokens_seen": 128913064, + "step": 191330 + }, + { + "epoch": 4.674345882295459, + "grad_norm": 0.01238018274307251, + "learning_rate": 2.573674652258151e-08, + "loss": 0.0, + "num_input_tokens_seen": 128916456, + "step": 191335 + }, + { + "epoch": 4.674468033127305, + "grad_norm": 8.923443965613842e-05, + "learning_rate": 2.5717527368157134e-08, + "loss": 0.0, + "num_input_tokens_seen": 128919528, + "step": 191340 + }, + { + "epoch": 4.674590183959153, + "grad_norm": 0.00023435594630427659, + "learning_rate": 2.5698315298976813e-08, + "loss": 0.0, + "num_input_tokens_seen": 128923304, + "step": 191345 + }, + { + "epoch": 4.674712334791, + "grad_norm": 2.5178560463245958e-05, + "learning_rate": 2.5679110315180553e-08, + "loss": 0.0, + "num_input_tokens_seen": 128926696, + "step": 191350 + }, + { + "epoch": 4.674834485622847, + "grad_norm": 7.507026748498902e-05, + "learning_rate": 2.565991241690779e-08, + "loss": 0.0, + "num_input_tokens_seen": 128930472, + "step": 191355 + }, + { + "epoch": 4.674956636454694, + "grad_norm": 0.0008368153939954937, + "learning_rate": 2.564072160429831e-08, + "loss": 0.0, + "num_input_tokens_seen": 128933800, + "step": 191360 + }, + { + "epoch": 4.675078787286542, + "grad_norm": 0.0004949255962856114, + "learning_rate": 2.562153787749144e-08, + "loss": 0.0, + "num_input_tokens_seen": 128936936, + "step": 191365 + }, + { + "epoch": 4.6752009381183885, + "grad_norm": 0.011058218777179718, + "learning_rate": 2.5602361236626736e-08, + "loss": 0.0, + "num_input_tokens_seen": 128941032, + "step": 191370 + }, + { + "epoch": 4.675323088950236, + "grad_norm": 9.234155004378408e-05, + "learning_rate": 2.5583191681843973e-08, + "loss": 0.0, + "num_input_tokens_seen": 128944296, + "step": 191375 + }, + { + "epoch": 4.675445239782083, + "grad_norm": 0.0030573757831007242, + "learning_rate": 2.5564029213282157e-08, + "loss": 0.0, + "num_input_tokens_seen": 128947496, + "step": 191380 + }, + { + "epoch": 4.67556739061393, + "grad_norm": 0.0009128560195676982, + "learning_rate": 2.554487383108095e-08, + "loss": 0.0, + "num_input_tokens_seen": 128950568, + "step": 191385 + }, + { + "epoch": 4.675689541445777, + "grad_norm": 6.758284143870696e-05, + "learning_rate": 2.552572553537935e-08, + "loss": 0.0, + "num_input_tokens_seen": 128954408, + "step": 191390 + }, + { + "epoch": 4.675811692277624, + "grad_norm": 0.0006308990996330976, + "learning_rate": 2.5506584326316916e-08, + "loss": 0.0, + "num_input_tokens_seen": 128958248, + "step": 191395 + }, + { + "epoch": 4.675933843109472, + "grad_norm": 0.00012568126840051264, + "learning_rate": 2.5487450204032644e-08, + "loss": 0.0, + "num_input_tokens_seen": 128961320, + "step": 191400 + }, + { + "epoch": 4.676055993941318, + "grad_norm": 0.00015130749670788646, + "learning_rate": 2.546832316866576e-08, + "loss": 0.0, + "num_input_tokens_seen": 128964776, + "step": 191405 + }, + { + "epoch": 4.676178144773166, + "grad_norm": 0.0007944232784211636, + "learning_rate": 2.5449203220355377e-08, + "loss": 0.0, + "num_input_tokens_seen": 128968296, + "step": 191410 + }, + { + "epoch": 4.676300295605013, + "grad_norm": 2.92979439109331e-05, + "learning_rate": 2.5430090359240486e-08, + "loss": 0.0, + "num_input_tokens_seen": 128971752, + "step": 191415 + }, + { + "epoch": 4.6764224464368604, + "grad_norm": 0.00021781650139018893, + "learning_rate": 2.5410984585460203e-08, + "loss": 0.0, + "num_input_tokens_seen": 128974696, + "step": 191420 + }, + { + "epoch": 4.676544597268707, + "grad_norm": 0.0005129770725034177, + "learning_rate": 2.539188589915331e-08, + "loss": 0.0001, + "num_input_tokens_seen": 128978088, + "step": 191425 + }, + { + "epoch": 4.676666748100555, + "grad_norm": 0.0003045414632651955, + "learning_rate": 2.537279430045869e-08, + "loss": 0.0, + "num_input_tokens_seen": 128981288, + "step": 191430 + }, + { + "epoch": 4.676788898932402, + "grad_norm": 8.265865471912548e-05, + "learning_rate": 2.5353709789515344e-08, + "loss": 0.0, + "num_input_tokens_seen": 128985064, + "step": 191435 + }, + { + "epoch": 4.676911049764249, + "grad_norm": 9.866351319942623e-05, + "learning_rate": 2.5334632366461827e-08, + "loss": 0.0, + "num_input_tokens_seen": 128988584, + "step": 191440 + }, + { + "epoch": 4.677033200596096, + "grad_norm": 0.00010443492647027597, + "learning_rate": 2.5315562031437144e-08, + "loss": 0.0, + "num_input_tokens_seen": 128991784, + "step": 191445 + }, + { + "epoch": 4.677155351427944, + "grad_norm": 0.001498854486271739, + "learning_rate": 2.5296498784579845e-08, + "loss": 0.0, + "num_input_tokens_seen": 128995432, + "step": 191450 + }, + { + "epoch": 4.67727750225979, + "grad_norm": 3.8011914966773475e-06, + "learning_rate": 2.52774426260286e-08, + "loss": 0.0, + "num_input_tokens_seen": 128999016, + "step": 191455 + }, + { + "epoch": 4.677399653091637, + "grad_norm": 5.286881059873849e-05, + "learning_rate": 2.5258393555921855e-08, + "loss": 0.0, + "num_input_tokens_seen": 129002408, + "step": 191460 + }, + { + "epoch": 4.677521803923485, + "grad_norm": 1.5034872376418207e-05, + "learning_rate": 2.523935157439816e-08, + "loss": 0.0, + "num_input_tokens_seen": 129006248, + "step": 191465 + }, + { + "epoch": 4.677643954755332, + "grad_norm": 3.237137934775092e-05, + "learning_rate": 2.52203166815963e-08, + "loss": 0.0, + "num_input_tokens_seen": 129009640, + "step": 191470 + }, + { + "epoch": 4.677766105587179, + "grad_norm": 0.00026456735213287175, + "learning_rate": 2.520128887765438e-08, + "loss": 0.0, + "num_input_tokens_seen": 129012968, + "step": 191475 + }, + { + "epoch": 4.677888256419026, + "grad_norm": 0.00042985472828149796, + "learning_rate": 2.5182268162710962e-08, + "loss": 0.0, + "num_input_tokens_seen": 129016104, + "step": 191480 + }, + { + "epoch": 4.6780104072508735, + "grad_norm": 0.0021684430539608, + "learning_rate": 2.5163254536904155e-08, + "loss": 0.0, + "num_input_tokens_seen": 129019304, + "step": 191485 + }, + { + "epoch": 4.67813255808272, + "grad_norm": 0.0005304042715579271, + "learning_rate": 2.5144248000372403e-08, + "loss": 0.0, + "num_input_tokens_seen": 129022696, + "step": 191490 + }, + { + "epoch": 4.678254708914568, + "grad_norm": 0.0005591454682871699, + "learning_rate": 2.512524855325393e-08, + "loss": 0.0, + "num_input_tokens_seen": 129026152, + "step": 191495 + }, + { + "epoch": 4.678376859746415, + "grad_norm": 1.403722581017064e-05, + "learning_rate": 2.510625619568674e-08, + "loss": 0.0, + "num_input_tokens_seen": 129029608, + "step": 191500 + }, + { + "epoch": 4.678499010578262, + "grad_norm": 0.0003742658591363579, + "learning_rate": 2.5087270927809266e-08, + "loss": 0.0, + "num_input_tokens_seen": 129033576, + "step": 191505 + }, + { + "epoch": 4.678621161410109, + "grad_norm": 0.00023465380945708603, + "learning_rate": 2.506829274975919e-08, + "loss": 0.0852, + "num_input_tokens_seen": 129037672, + "step": 191510 + }, + { + "epoch": 4.678743312241957, + "grad_norm": 0.0006855735555291176, + "learning_rate": 2.504932166167484e-08, + "loss": 0.0, + "num_input_tokens_seen": 129041000, + "step": 191515 + }, + { + "epoch": 4.678865463073803, + "grad_norm": 0.006005747709423304, + "learning_rate": 2.503035766369399e-08, + "loss": 0.0, + "num_input_tokens_seen": 129044328, + "step": 191520 + }, + { + "epoch": 4.678987613905651, + "grad_norm": 1.0378267765045166, + "learning_rate": 2.5011400755954648e-08, + "loss": 0.0005, + "num_input_tokens_seen": 129047976, + "step": 191525 + }, + { + "epoch": 4.679109764737498, + "grad_norm": 0.0018501474987715483, + "learning_rate": 2.499245093859459e-08, + "loss": 0.0, + "num_input_tokens_seen": 129051304, + "step": 191530 + }, + { + "epoch": 4.679231915569345, + "grad_norm": 2.888561721192673e-05, + "learning_rate": 2.4973508211751816e-08, + "loss": 0.0, + "num_input_tokens_seen": 129054504, + "step": 191535 + }, + { + "epoch": 4.679354066401192, + "grad_norm": 0.0003150117408949882, + "learning_rate": 2.495457257556388e-08, + "loss": 0.0, + "num_input_tokens_seen": 129057512, + "step": 191540 + }, + { + "epoch": 4.67947621723304, + "grad_norm": 4.09619024139829e-05, + "learning_rate": 2.4935644030168456e-08, + "loss": 0.0, + "num_input_tokens_seen": 129061032, + "step": 191545 + }, + { + "epoch": 4.6795983680648865, + "grad_norm": 6.424232651625061e-06, + "learning_rate": 2.491672257570343e-08, + "loss": 0.0, + "num_input_tokens_seen": 129064296, + "step": 191550 + }, + { + "epoch": 4.679720518896733, + "grad_norm": 6.473718531196937e-05, + "learning_rate": 2.4897808212306026e-08, + "loss": 0.0, + "num_input_tokens_seen": 129067752, + "step": 191555 + }, + { + "epoch": 4.679842669728581, + "grad_norm": 4.298717976780608e-05, + "learning_rate": 2.4878900940114134e-08, + "loss": 0.0, + "num_input_tokens_seen": 129070952, + "step": 191560 + }, + { + "epoch": 4.6799648205604285, + "grad_norm": 0.00011635415285127237, + "learning_rate": 2.4860000759265308e-08, + "loss": 0.0501, + "num_input_tokens_seen": 129074280, + "step": 191565 + }, + { + "epoch": 4.680086971392275, + "grad_norm": 0.0006911892560310662, + "learning_rate": 2.4841107669896668e-08, + "loss": 0.0, + "num_input_tokens_seen": 129077672, + "step": 191570 + }, + { + "epoch": 4.680209122224122, + "grad_norm": 0.00032391652348451316, + "learning_rate": 2.4822221672145872e-08, + "loss": 0.0, + "num_input_tokens_seen": 129081384, + "step": 191575 + }, + { + "epoch": 4.68033127305597, + "grad_norm": 6.961155304452404e-05, + "learning_rate": 2.4803342766150036e-08, + "loss": 0.0, + "num_input_tokens_seen": 129084648, + "step": 191580 + }, + { + "epoch": 4.680453423887816, + "grad_norm": 0.00012113929551560432, + "learning_rate": 2.4784470952046722e-08, + "loss": 0.0, + "num_input_tokens_seen": 129087848, + "step": 191585 + }, + { + "epoch": 4.680575574719664, + "grad_norm": 0.0004131891764700413, + "learning_rate": 2.4765606229973034e-08, + "loss": 0.0, + "num_input_tokens_seen": 129091624, + "step": 191590 + }, + { + "epoch": 4.680697725551511, + "grad_norm": 0.0053316219709813595, + "learning_rate": 2.474674860006609e-08, + "loss": 0.0, + "num_input_tokens_seen": 129094504, + "step": 191595 + }, + { + "epoch": 4.680819876383358, + "grad_norm": 7.62042254791595e-05, + "learning_rate": 2.4727898062463226e-08, + "loss": 0.0, + "num_input_tokens_seen": 129098536, + "step": 191600 + }, + { + "epoch": 4.680942027215205, + "grad_norm": 0.002029527211561799, + "learning_rate": 2.4709054617301218e-08, + "loss": 0.0, + "num_input_tokens_seen": 129101800, + "step": 191605 + }, + { + "epoch": 4.681064178047053, + "grad_norm": 8.223400800488889e-05, + "learning_rate": 2.4690218264717398e-08, + "loss": 0.0, + "num_input_tokens_seen": 129105448, + "step": 191610 + }, + { + "epoch": 4.6811863288788995, + "grad_norm": 0.09763093292713165, + "learning_rate": 2.4671389004848663e-08, + "loss": 0.0001, + "num_input_tokens_seen": 129108904, + "step": 191615 + }, + { + "epoch": 4.681308479710747, + "grad_norm": 0.0001553054607938975, + "learning_rate": 2.465256683783179e-08, + "loss": 0.0, + "num_input_tokens_seen": 129112616, + "step": 191620 + }, + { + "epoch": 4.681430630542594, + "grad_norm": 0.0007918269839137793, + "learning_rate": 2.4633751763804e-08, + "loss": 0.0, + "num_input_tokens_seen": 129116264, + "step": 191625 + }, + { + "epoch": 4.6815527813744415, + "grad_norm": 6.41044243820943e-05, + "learning_rate": 2.461494378290174e-08, + "loss": 0.0, + "num_input_tokens_seen": 129119336, + "step": 191630 + }, + { + "epoch": 4.681674932206288, + "grad_norm": 0.001178376143798232, + "learning_rate": 2.4596142895262017e-08, + "loss": 0.0488, + "num_input_tokens_seen": 129122728, + "step": 191635 + }, + { + "epoch": 4.681797083038136, + "grad_norm": 0.0006886956398375332, + "learning_rate": 2.4577349101021495e-08, + "loss": 0.0, + "num_input_tokens_seen": 129126376, + "step": 191640 + }, + { + "epoch": 4.681919233869983, + "grad_norm": 0.0001021117132040672, + "learning_rate": 2.455856240031684e-08, + "loss": 0.0, + "num_input_tokens_seen": 129129256, + "step": 191645 + }, + { + "epoch": 4.682041384701829, + "grad_norm": 0.0002298145991517231, + "learning_rate": 2.4539782793284723e-08, + "loss": 0.0, + "num_input_tokens_seen": 129132648, + "step": 191650 + }, + { + "epoch": 4.682163535533677, + "grad_norm": 2.295691410836298e-05, + "learning_rate": 2.4521010280061592e-08, + "loss": 0.0, + "num_input_tokens_seen": 129136168, + "step": 191655 + }, + { + "epoch": 4.682285686365524, + "grad_norm": 0.0005900713731534779, + "learning_rate": 2.4502244860784115e-08, + "loss": 0.0, + "num_input_tokens_seen": 129139240, + "step": 191660 + }, + { + "epoch": 4.682407837197371, + "grad_norm": 0.0011430694721639156, + "learning_rate": 2.4483486535588628e-08, + "loss": 0.0, + "num_input_tokens_seen": 129142696, + "step": 191665 + }, + { + "epoch": 4.682529988029218, + "grad_norm": 0.0018797010416164994, + "learning_rate": 2.4464735304611682e-08, + "loss": 0.0402, + "num_input_tokens_seen": 129145704, + "step": 191670 + }, + { + "epoch": 4.682652138861066, + "grad_norm": 0.009413032792508602, + "learning_rate": 2.444599116798951e-08, + "loss": 0.0, + "num_input_tokens_seen": 129148904, + "step": 191675 + }, + { + "epoch": 4.6827742896929125, + "grad_norm": 0.00027055476675741374, + "learning_rate": 2.4427254125858444e-08, + "loss": 0.0, + "num_input_tokens_seen": 129152744, + "step": 191680 + }, + { + "epoch": 4.68289644052476, + "grad_norm": 0.0002803192473948002, + "learning_rate": 2.440852417835482e-08, + "loss": 0.0, + "num_input_tokens_seen": 129156264, + "step": 191685 + }, + { + "epoch": 4.683018591356607, + "grad_norm": 0.00013676565140485764, + "learning_rate": 2.4389801325614855e-08, + "loss": 0.0, + "num_input_tokens_seen": 129159912, + "step": 191690 + }, + { + "epoch": 4.6831407421884546, + "grad_norm": 9.485345799475908e-05, + "learning_rate": 2.4371085567774676e-08, + "loss": 0.0, + "num_input_tokens_seen": 129163112, + "step": 191695 + }, + { + "epoch": 4.683262893020301, + "grad_norm": 0.0976140946149826, + "learning_rate": 2.4352376904970275e-08, + "loss": 0.0348, + "num_input_tokens_seen": 129166120, + "step": 191700 + }, + { + "epoch": 4.683385043852149, + "grad_norm": 0.00010639366519171745, + "learning_rate": 2.4333675337337876e-08, + "loss": 0.0, + "num_input_tokens_seen": 129169576, + "step": 191705 + }, + { + "epoch": 4.683507194683996, + "grad_norm": 0.33715274930000305, + "learning_rate": 2.431498086501338e-08, + "loss": 0.0001, + "num_input_tokens_seen": 129173160, + "step": 191710 + }, + { + "epoch": 4.683629345515843, + "grad_norm": 0.0008040166576392949, + "learning_rate": 2.429629348813278e-08, + "loss": 0.0, + "num_input_tokens_seen": 129176424, + "step": 191715 + }, + { + "epoch": 4.68375149634769, + "grad_norm": 0.007210198789834976, + "learning_rate": 2.427761320683208e-08, + "loss": 0.0, + "num_input_tokens_seen": 129179880, + "step": 191720 + }, + { + "epoch": 4.683873647179537, + "grad_norm": 1.565586899232585e-05, + "learning_rate": 2.4258940021246842e-08, + "loss": 0.0, + "num_input_tokens_seen": 129183656, + "step": 191725 + }, + { + "epoch": 4.683995798011384, + "grad_norm": 0.0002123810991179198, + "learning_rate": 2.4240273931513176e-08, + "loss": 0.0, + "num_input_tokens_seen": 129187112, + "step": 191730 + }, + { + "epoch": 4.684117948843232, + "grad_norm": 0.0012347750598564744, + "learning_rate": 2.4221614937766643e-08, + "loss": 0.0, + "num_input_tokens_seen": 129190376, + "step": 191735 + }, + { + "epoch": 4.684240099675079, + "grad_norm": 0.0008376938058063388, + "learning_rate": 2.420296304014291e-08, + "loss": 0.0, + "num_input_tokens_seen": 129193576, + "step": 191740 + }, + { + "epoch": 4.684362250506926, + "grad_norm": 0.004786266479641199, + "learning_rate": 2.4184318238777756e-08, + "loss": 0.0, + "num_input_tokens_seen": 129196584, + "step": 191745 + }, + { + "epoch": 4.684484401338773, + "grad_norm": 0.005261608865112066, + "learning_rate": 2.4165680533806632e-08, + "loss": 0.0, + "num_input_tokens_seen": 129200104, + "step": 191750 + }, + { + "epoch": 4.68460655217062, + "grad_norm": 0.006853157188743353, + "learning_rate": 2.4147049925365314e-08, + "loss": 0.0, + "num_input_tokens_seen": 129203688, + "step": 191755 + }, + { + "epoch": 4.684728703002468, + "grad_norm": 0.0003582743520382792, + "learning_rate": 2.412842641358892e-08, + "loss": 0.0265, + "num_input_tokens_seen": 129207080, + "step": 191760 + }, + { + "epoch": 4.684850853834314, + "grad_norm": 0.0009606487583369017, + "learning_rate": 2.410980999861323e-08, + "loss": 0.0, + "num_input_tokens_seen": 129210152, + "step": 191765 + }, + { + "epoch": 4.684973004666162, + "grad_norm": 0.00018830588669516146, + "learning_rate": 2.4091200680573352e-08, + "loss": 0.0, + "num_input_tokens_seen": 129213608, + "step": 191770 + }, + { + "epoch": 4.685095155498009, + "grad_norm": 0.00023918184160720557, + "learning_rate": 2.4072598459604743e-08, + "loss": 0.0, + "num_input_tokens_seen": 129217384, + "step": 191775 + }, + { + "epoch": 4.685217306329856, + "grad_norm": 0.00038536760257557034, + "learning_rate": 2.4054003335842842e-08, + "loss": 0.0001, + "num_input_tokens_seen": 129220840, + "step": 191780 + }, + { + "epoch": 4.685339457161703, + "grad_norm": 0.0007668939069844782, + "learning_rate": 2.4035415309422657e-08, + "loss": 0.0, + "num_input_tokens_seen": 129223912, + "step": 191785 + }, + { + "epoch": 4.685461607993551, + "grad_norm": 1.1869547961396165e-05, + "learning_rate": 2.401683438047941e-08, + "loss": 0.0, + "num_input_tokens_seen": 129227304, + "step": 191790 + }, + { + "epoch": 4.6855837588253975, + "grad_norm": 0.0011348174884915352, + "learning_rate": 2.399826054914822e-08, + "loss": 0.0, + "num_input_tokens_seen": 129230824, + "step": 191795 + }, + { + "epoch": 4.685705909657245, + "grad_norm": 0.001253793016076088, + "learning_rate": 2.3979693815564305e-08, + "loss": 0.0, + "num_input_tokens_seen": 129234024, + "step": 191800 + }, + { + "epoch": 4.685828060489092, + "grad_norm": 0.11175274103879929, + "learning_rate": 2.3961134179862564e-08, + "loss": 0.0, + "num_input_tokens_seen": 129237288, + "step": 191805 + }, + { + "epoch": 4.6859502113209395, + "grad_norm": 0.000316588586429134, + "learning_rate": 2.3942581642177884e-08, + "loss": 0.0, + "num_input_tokens_seen": 129240680, + "step": 191810 + }, + { + "epoch": 4.686072362152786, + "grad_norm": 5.7418455980950966e-05, + "learning_rate": 2.392403620264538e-08, + "loss": 0.0, + "num_input_tokens_seen": 129243880, + "step": 191815 + }, + { + "epoch": 4.686194512984633, + "grad_norm": 3.4552344914118294e-06, + "learning_rate": 2.3905497861399616e-08, + "loss": 0.0, + "num_input_tokens_seen": 129247592, + "step": 191820 + }, + { + "epoch": 4.686316663816481, + "grad_norm": 0.1387357860803604, + "learning_rate": 2.388696661857581e-08, + "loss": 0.0, + "num_input_tokens_seen": 129250600, + "step": 191825 + }, + { + "epoch": 4.686438814648328, + "grad_norm": 0.004403110593557358, + "learning_rate": 2.3868442474308524e-08, + "loss": 0.0, + "num_input_tokens_seen": 129253672, + "step": 191830 + }, + { + "epoch": 4.686560965480175, + "grad_norm": 9.900266013573855e-05, + "learning_rate": 2.384992542873243e-08, + "loss": 0.0, + "num_input_tokens_seen": 129256808, + "step": 191835 + }, + { + "epoch": 4.686683116312022, + "grad_norm": 7.538765203207731e-05, + "learning_rate": 2.3831415481982198e-08, + "loss": 0.0, + "num_input_tokens_seen": 129259944, + "step": 191840 + }, + { + "epoch": 4.686805267143869, + "grad_norm": 0.0011306103551760316, + "learning_rate": 2.3812912634192495e-08, + "loss": 0.0, + "num_input_tokens_seen": 129263336, + "step": 191845 + }, + { + "epoch": 4.686927417975716, + "grad_norm": 0.0007455127197317779, + "learning_rate": 2.379441688549788e-08, + "loss": 0.0246, + "num_input_tokens_seen": 129266408, + "step": 191850 + }, + { + "epoch": 4.687049568807564, + "grad_norm": 0.0007126157288439572, + "learning_rate": 2.3775928236032806e-08, + "loss": 0.0, + "num_input_tokens_seen": 129269928, + "step": 191855 + }, + { + "epoch": 4.6871717196394105, + "grad_norm": 0.00044742575846612453, + "learning_rate": 2.3757446685931826e-08, + "loss": 0.0, + "num_input_tokens_seen": 129273256, + "step": 191860 + }, + { + "epoch": 4.687293870471258, + "grad_norm": 6.619530176976696e-05, + "learning_rate": 2.3738972235329168e-08, + "loss": 0.0024, + "num_input_tokens_seen": 129276200, + "step": 191865 + }, + { + "epoch": 4.687416021303105, + "grad_norm": 1.003375382424565e-05, + "learning_rate": 2.3720504884359282e-08, + "loss": 0.0, + "num_input_tokens_seen": 129279400, + "step": 191870 + }, + { + "epoch": 4.6875381721349525, + "grad_norm": 0.0061368816532194614, + "learning_rate": 2.3702044633156503e-08, + "loss": 0.0, + "num_input_tokens_seen": 129282664, + "step": 191875 + }, + { + "epoch": 4.687660322966799, + "grad_norm": 0.00020990378106944263, + "learning_rate": 2.3683591481855058e-08, + "loss": 0.0, + "num_input_tokens_seen": 129285864, + "step": 191880 + }, + { + "epoch": 4.687782473798647, + "grad_norm": 0.0008555403328500688, + "learning_rate": 2.3665145430589173e-08, + "loss": 0.0, + "num_input_tokens_seen": 129289128, + "step": 191885 + }, + { + "epoch": 4.687904624630494, + "grad_norm": 0.00013554960605688393, + "learning_rate": 2.364670647949285e-08, + "loss": 0.0, + "num_input_tokens_seen": 129292008, + "step": 191890 + }, + { + "epoch": 4.688026775462341, + "grad_norm": 5.329174382495694e-05, + "learning_rate": 2.3628274628700318e-08, + "loss": 0.0, + "num_input_tokens_seen": 129295336, + "step": 191895 + }, + { + "epoch": 4.688148926294188, + "grad_norm": 21.01401710510254, + "learning_rate": 2.3609849878345577e-08, + "loss": 0.0185, + "num_input_tokens_seen": 129298984, + "step": 191900 + }, + { + "epoch": 4.688271077126036, + "grad_norm": 0.0005068847676739097, + "learning_rate": 2.3591432228562634e-08, + "loss": 0.0, + "num_input_tokens_seen": 129302824, + "step": 191905 + }, + { + "epoch": 4.688393227957882, + "grad_norm": 0.004574902355670929, + "learning_rate": 2.3573021679485495e-08, + "loss": 0.0, + "num_input_tokens_seen": 129306408, + "step": 191910 + }, + { + "epoch": 4.688515378789729, + "grad_norm": 0.0023982757702469826, + "learning_rate": 2.3554618231247934e-08, + "loss": 0.0, + "num_input_tokens_seen": 129309992, + "step": 191915 + }, + { + "epoch": 4.688637529621577, + "grad_norm": 0.001703580841422081, + "learning_rate": 2.3536221883983854e-08, + "loss": 0.0001, + "num_input_tokens_seen": 129313576, + "step": 191920 + }, + { + "epoch": 4.6887596804534235, + "grad_norm": 0.000909437658265233, + "learning_rate": 2.3517832637826806e-08, + "loss": 0.0, + "num_input_tokens_seen": 129317160, + "step": 191925 + }, + { + "epoch": 4.688881831285271, + "grad_norm": 0.00022461153275799006, + "learning_rate": 2.349945049291091e-08, + "loss": 0.0, + "num_input_tokens_seen": 129320232, + "step": 191930 + }, + { + "epoch": 4.689003982117118, + "grad_norm": 1.9985896869911812e-05, + "learning_rate": 2.3481075449369614e-08, + "loss": 0.0, + "num_input_tokens_seen": 129323112, + "step": 191935 + }, + { + "epoch": 4.6891261329489655, + "grad_norm": 0.0005150790675543249, + "learning_rate": 2.34627075073367e-08, + "loss": 0.0022, + "num_input_tokens_seen": 129326504, + "step": 191940 + }, + { + "epoch": 4.689248283780812, + "grad_norm": 0.00022479926701635122, + "learning_rate": 2.3444346666945503e-08, + "loss": 0.0, + "num_input_tokens_seen": 129329512, + "step": 191945 + }, + { + "epoch": 4.68937043461266, + "grad_norm": 1.1208144314878155e-05, + "learning_rate": 2.3425992928329695e-08, + "loss": 0.0, + "num_input_tokens_seen": 129333480, + "step": 191950 + }, + { + "epoch": 4.689492585444507, + "grad_norm": 4.2835341446334496e-05, + "learning_rate": 2.340764629162284e-08, + "loss": 0.0, + "num_input_tokens_seen": 129336872, + "step": 191955 + }, + { + "epoch": 4.689614736276354, + "grad_norm": 0.0007139771478250623, + "learning_rate": 2.338930675695805e-08, + "loss": 0.0, + "num_input_tokens_seen": 129340072, + "step": 191960 + }, + { + "epoch": 4.689736887108201, + "grad_norm": 9.58927339524962e-05, + "learning_rate": 2.3370974324468997e-08, + "loss": 0.0, + "num_input_tokens_seen": 129343848, + "step": 191965 + }, + { + "epoch": 4.689859037940049, + "grad_norm": 7.770962110953405e-05, + "learning_rate": 2.3352648994288905e-08, + "loss": 0.0224, + "num_input_tokens_seen": 129347368, + "step": 191970 + }, + { + "epoch": 4.689981188771895, + "grad_norm": 0.00241994415409863, + "learning_rate": 2.3334330766551002e-08, + "loss": 0.0, + "num_input_tokens_seen": 129350888, + "step": 191975 + }, + { + "epoch": 4.690103339603743, + "grad_norm": 0.00035448124981485307, + "learning_rate": 2.331601964138863e-08, + "loss": 0.0, + "num_input_tokens_seen": 129354728, + "step": 191980 + }, + { + "epoch": 4.69022549043559, + "grad_norm": 0.0003735190839506686, + "learning_rate": 2.329771561893479e-08, + "loss": 0.0, + "num_input_tokens_seen": 129357864, + "step": 191985 + }, + { + "epoch": 4.690347641267437, + "grad_norm": 0.0018810693873092532, + "learning_rate": 2.3279418699322594e-08, + "loss": 0.0, + "num_input_tokens_seen": 129361832, + "step": 191990 + }, + { + "epoch": 4.690469792099284, + "grad_norm": 0.00018536254356149584, + "learning_rate": 2.3261128882685275e-08, + "loss": 0.0, + "num_input_tokens_seen": 129365096, + "step": 191995 + }, + { + "epoch": 4.690591942931132, + "grad_norm": 0.01687249168753624, + "learning_rate": 2.3242846169155728e-08, + "loss": 0.0, + "num_input_tokens_seen": 129368872, + "step": 192000 + }, + { + "epoch": 4.6907140937629785, + "grad_norm": 0.00037364265881478786, + "learning_rate": 2.3224570558866952e-08, + "loss": 0.0, + "num_input_tokens_seen": 129371944, + "step": 192005 + }, + { + "epoch": 4.690836244594825, + "grad_norm": 0.00013020077312830836, + "learning_rate": 2.320630205195173e-08, + "loss": 0.0, + "num_input_tokens_seen": 129375272, + "step": 192010 + }, + { + "epoch": 4.690958395426673, + "grad_norm": 0.00023853543098084629, + "learning_rate": 2.3188040648543073e-08, + "loss": 0.0, + "num_input_tokens_seen": 129378792, + "step": 192015 + }, + { + "epoch": 4.69108054625852, + "grad_norm": 0.01277604978531599, + "learning_rate": 2.3169786348773644e-08, + "loss": 0.0, + "num_input_tokens_seen": 129382568, + "step": 192020 + }, + { + "epoch": 4.691202697090367, + "grad_norm": 0.000663110229652375, + "learning_rate": 2.3151539152776345e-08, + "loss": 0.0, + "num_input_tokens_seen": 129385832, + "step": 192025 + }, + { + "epoch": 4.691324847922214, + "grad_norm": 0.0010855343425646424, + "learning_rate": 2.3133299060683732e-08, + "loss": 0.0, + "num_input_tokens_seen": 129389224, + "step": 192030 + }, + { + "epoch": 4.691446998754062, + "grad_norm": 0.00010959253268083557, + "learning_rate": 2.3115066072628585e-08, + "loss": 0.0, + "num_input_tokens_seen": 129392424, + "step": 192035 + }, + { + "epoch": 4.691569149585908, + "grad_norm": 0.0001751628442434594, + "learning_rate": 2.309684018874336e-08, + "loss": 0.0, + "num_input_tokens_seen": 129395880, + "step": 192040 + }, + { + "epoch": 4.691691300417756, + "grad_norm": 0.0004285105096641928, + "learning_rate": 2.3078621409160727e-08, + "loss": 0.0, + "num_input_tokens_seen": 129399464, + "step": 192045 + }, + { + "epoch": 4.691813451249603, + "grad_norm": 0.0003401923459023237, + "learning_rate": 2.306040973401313e-08, + "loss": 0.0, + "num_input_tokens_seen": 129402792, + "step": 192050 + }, + { + "epoch": 4.69193560208145, + "grad_norm": 0.021882670000195503, + "learning_rate": 2.3042205163432914e-08, + "loss": 0.0675, + "num_input_tokens_seen": 129406248, + "step": 192055 + }, + { + "epoch": 4.692057752913297, + "grad_norm": 0.0002050148177659139, + "learning_rate": 2.302400769755264e-08, + "loss": 0.0, + "num_input_tokens_seen": 129409640, + "step": 192060 + }, + { + "epoch": 4.692179903745145, + "grad_norm": 0.00010289058991475031, + "learning_rate": 2.300581733650453e-08, + "loss": 0.0, + "num_input_tokens_seen": 129413224, + "step": 192065 + }, + { + "epoch": 4.692302054576992, + "grad_norm": 0.00038634383236058056, + "learning_rate": 2.2987634080420815e-08, + "loss": 0.0, + "num_input_tokens_seen": 129416808, + "step": 192070 + }, + { + "epoch": 4.692424205408839, + "grad_norm": 0.00010601503163343295, + "learning_rate": 2.2969457929433946e-08, + "loss": 0.0, + "num_input_tokens_seen": 129420712, + "step": 192075 + }, + { + "epoch": 4.692546356240686, + "grad_norm": 0.001418996136635542, + "learning_rate": 2.295128888367581e-08, + "loss": 0.0, + "num_input_tokens_seen": 129424680, + "step": 192080 + }, + { + "epoch": 4.692668507072533, + "grad_norm": 0.00041618754039518535, + "learning_rate": 2.2933126943278758e-08, + "loss": 0.0, + "num_input_tokens_seen": 129428072, + "step": 192085 + }, + { + "epoch": 4.69279065790438, + "grad_norm": 3.8680613215547055e-05, + "learning_rate": 2.2914972108374896e-08, + "loss": 0.0, + "num_input_tokens_seen": 129431528, + "step": 192090 + }, + { + "epoch": 4.692912808736228, + "grad_norm": 0.000333454052451998, + "learning_rate": 2.2896824379096014e-08, + "loss": 0.0, + "num_input_tokens_seen": 129434984, + "step": 192095 + }, + { + "epoch": 4.693034959568075, + "grad_norm": 0.00013087606930639595, + "learning_rate": 2.2878683755574446e-08, + "loss": 0.0, + "num_input_tokens_seen": 129438568, + "step": 192100 + }, + { + "epoch": 4.6931571103999215, + "grad_norm": 0.002510525519028306, + "learning_rate": 2.2860550237941644e-08, + "loss": 0.0, + "num_input_tokens_seen": 129442408, + "step": 192105 + }, + { + "epoch": 4.693279261231769, + "grad_norm": 0.0001203200445161201, + "learning_rate": 2.284242382632995e-08, + "loss": 0.0, + "num_input_tokens_seen": 129445736, + "step": 192110 + }, + { + "epoch": 4.693401412063616, + "grad_norm": 0.0006897970451973379, + "learning_rate": 2.2824304520870808e-08, + "loss": 0.0, + "num_input_tokens_seen": 129448808, + "step": 192115 + }, + { + "epoch": 4.6935235628954635, + "grad_norm": 0.0007370402454398572, + "learning_rate": 2.2806192321696225e-08, + "loss": 0.0001, + "num_input_tokens_seen": 129452136, + "step": 192120 + }, + { + "epoch": 4.69364571372731, + "grad_norm": 6.168057007016614e-05, + "learning_rate": 2.278808722893788e-08, + "loss": 0.0, + "num_input_tokens_seen": 129455464, + "step": 192125 + }, + { + "epoch": 4.693767864559158, + "grad_norm": 0.00012389298353809863, + "learning_rate": 2.2769989242727328e-08, + "loss": 0.0, + "num_input_tokens_seen": 129458472, + "step": 192130 + }, + { + "epoch": 4.693890015391005, + "grad_norm": 6.390651105903089e-05, + "learning_rate": 2.2751898363196354e-08, + "loss": 0.0, + "num_input_tokens_seen": 129462120, + "step": 192135 + }, + { + "epoch": 4.694012166222852, + "grad_norm": 0.07547641545534134, + "learning_rate": 2.273381459047641e-08, + "loss": 0.0, + "num_input_tokens_seen": 129465832, + "step": 192140 + }, + { + "epoch": 4.694134317054699, + "grad_norm": 0.9471267461776733, + "learning_rate": 2.271573792469905e-08, + "loss": 0.0707, + "num_input_tokens_seen": 129468968, + "step": 192145 + }, + { + "epoch": 4.694256467886547, + "grad_norm": 0.0007499286439269781, + "learning_rate": 2.2697668365995514e-08, + "loss": 0.0, + "num_input_tokens_seen": 129472296, + "step": 192150 + }, + { + "epoch": 4.694378618718393, + "grad_norm": 0.00041503115789964795, + "learning_rate": 2.2679605914497578e-08, + "loss": 0.0002, + "num_input_tokens_seen": 129475368, + "step": 192155 + }, + { + "epoch": 4.694500769550241, + "grad_norm": 0.00028109681443311274, + "learning_rate": 2.2661550570336473e-08, + "loss": 0.0, + "num_input_tokens_seen": 129478696, + "step": 192160 + }, + { + "epoch": 4.694622920382088, + "grad_norm": 5.857786527485587e-05, + "learning_rate": 2.2643502333643205e-08, + "loss": 0.0, + "num_input_tokens_seen": 129482088, + "step": 192165 + }, + { + "epoch": 4.694745071213935, + "grad_norm": 3.311472391942516e-05, + "learning_rate": 2.2625461204549444e-08, + "loss": 0.0, + "num_input_tokens_seen": 129485416, + "step": 192170 + }, + { + "epoch": 4.694867222045782, + "grad_norm": 0.0005333948647603393, + "learning_rate": 2.26074271831862e-08, + "loss": 0.0, + "num_input_tokens_seen": 129488808, + "step": 192175 + }, + { + "epoch": 4.694989372877629, + "grad_norm": 0.0011380622163414955, + "learning_rate": 2.2589400269684477e-08, + "loss": 0.0359, + "num_input_tokens_seen": 129492200, + "step": 192180 + }, + { + "epoch": 4.6951115237094765, + "grad_norm": 0.006969318725168705, + "learning_rate": 2.2571380464175725e-08, + "loss": 0.0, + "num_input_tokens_seen": 129495464, + "step": 192185 + }, + { + "epoch": 4.695233674541324, + "grad_norm": 0.0009833202930167317, + "learning_rate": 2.2553367766790622e-08, + "loss": 0.0, + "num_input_tokens_seen": 129499112, + "step": 192190 + }, + { + "epoch": 4.695355825373171, + "grad_norm": 0.004456230904906988, + "learning_rate": 2.25353621776605e-08, + "loss": 0.0, + "num_input_tokens_seen": 129502824, + "step": 192195 + }, + { + "epoch": 4.695477976205018, + "grad_norm": 3.732401455636136e-05, + "learning_rate": 2.2517363696916037e-08, + "loss": 0.0667, + "num_input_tokens_seen": 129506088, + "step": 192200 + }, + { + "epoch": 4.695600127036865, + "grad_norm": 0.000706669467035681, + "learning_rate": 2.2499372324688125e-08, + "loss": 0.0, + "num_input_tokens_seen": 129509352, + "step": 192205 + }, + { + "epoch": 4.695722277868712, + "grad_norm": 0.00011496250226628035, + "learning_rate": 2.2481388061107888e-08, + "loss": 0.0029, + "num_input_tokens_seen": 129513064, + "step": 192210 + }, + { + "epoch": 4.69584442870056, + "grad_norm": 20.1938533782959, + "learning_rate": 2.2463410906305768e-08, + "loss": 0.0354, + "num_input_tokens_seen": 129516264, + "step": 192215 + }, + { + "epoch": 4.695966579532406, + "grad_norm": 0.0046359761618077755, + "learning_rate": 2.2445440860412777e-08, + "loss": 0.0, + "num_input_tokens_seen": 129519976, + "step": 192220 + }, + { + "epoch": 4.696088730364254, + "grad_norm": 0.0020351947750896215, + "learning_rate": 2.242747792355937e-08, + "loss": 0.0002, + "num_input_tokens_seen": 129523816, + "step": 192225 + }, + { + "epoch": 4.696210881196101, + "grad_norm": 0.0008622038294561207, + "learning_rate": 2.240952209587632e-08, + "loss": 0.0, + "num_input_tokens_seen": 129527080, + "step": 192230 + }, + { + "epoch": 4.696333032027948, + "grad_norm": 4.840063866140554e-06, + "learning_rate": 2.239157337749409e-08, + "loss": 0.0, + "num_input_tokens_seen": 129530408, + "step": 192235 + }, + { + "epoch": 4.696455182859795, + "grad_norm": 0.001740322564728558, + "learning_rate": 2.2373631768543344e-08, + "loss": 0.0, + "num_input_tokens_seen": 129533416, + "step": 192240 + }, + { + "epoch": 4.696577333691643, + "grad_norm": 0.001596541260369122, + "learning_rate": 2.2355697269154537e-08, + "loss": 0.0, + "num_input_tokens_seen": 129537064, + "step": 192245 + }, + { + "epoch": 4.6966994845234895, + "grad_norm": 0.00013007610687054694, + "learning_rate": 2.2337769879458014e-08, + "loss": 0.0, + "num_input_tokens_seen": 129540264, + "step": 192250 + }, + { + "epoch": 4.696821635355337, + "grad_norm": 6.054230107110925e-05, + "learning_rate": 2.231984959958422e-08, + "loss": 0.0, + "num_input_tokens_seen": 129544104, + "step": 192255 + }, + { + "epoch": 4.696943786187184, + "grad_norm": 0.00020400869834702462, + "learning_rate": 2.230193642966338e-08, + "loss": 0.0, + "num_input_tokens_seen": 129547176, + "step": 192260 + }, + { + "epoch": 4.6970659370190315, + "grad_norm": 0.004991667345166206, + "learning_rate": 2.2284030369825956e-08, + "loss": 0.0, + "num_input_tokens_seen": 129550056, + "step": 192265 + }, + { + "epoch": 4.697188087850878, + "grad_norm": 0.0008958621765486896, + "learning_rate": 2.226613142020195e-08, + "loss": 0.0004, + "num_input_tokens_seen": 129553448, + "step": 192270 + }, + { + "epoch": 4.697310238682725, + "grad_norm": 0.0023038180079311132, + "learning_rate": 2.2248239580921478e-08, + "loss": 0.0, + "num_input_tokens_seen": 129556968, + "step": 192275 + }, + { + "epoch": 4.697432389514573, + "grad_norm": 0.0013945907121524215, + "learning_rate": 2.2230354852114998e-08, + "loss": 0.0, + "num_input_tokens_seen": 129560296, + "step": 192280 + }, + { + "epoch": 4.697554540346419, + "grad_norm": 0.004303370136767626, + "learning_rate": 2.2212477233912285e-08, + "loss": 0.0, + "num_input_tokens_seen": 129564136, + "step": 192285 + }, + { + "epoch": 4.697676691178267, + "grad_norm": 0.00010136087803402916, + "learning_rate": 2.2194606726443465e-08, + "loss": 0.0, + "num_input_tokens_seen": 129567528, + "step": 192290 + }, + { + "epoch": 4.697798842010114, + "grad_norm": 0.08520669490098953, + "learning_rate": 2.2176743329838433e-08, + "loss": 0.0, + "num_input_tokens_seen": 129570984, + "step": 192295 + }, + { + "epoch": 4.697920992841961, + "grad_norm": 0.00013460438640322536, + "learning_rate": 2.215888704422708e-08, + "loss": 0.0, + "num_input_tokens_seen": 129573992, + "step": 192300 + }, + { + "epoch": 4.698043143673808, + "grad_norm": 7.838014425942674e-05, + "learning_rate": 2.214103786973931e-08, + "loss": 0.0, + "num_input_tokens_seen": 129578472, + "step": 192305 + }, + { + "epoch": 4.698165294505656, + "grad_norm": 0.00031517792376689613, + "learning_rate": 2.2123195806505013e-08, + "loss": 0.0, + "num_input_tokens_seen": 129581544, + "step": 192310 + }, + { + "epoch": 4.6982874453375025, + "grad_norm": 0.001368136378005147, + "learning_rate": 2.2105360854653865e-08, + "loss": 0.0, + "num_input_tokens_seen": 129584744, + "step": 192315 + }, + { + "epoch": 4.69840959616935, + "grad_norm": 0.000132130560814403, + "learning_rate": 2.2087533014315428e-08, + "loss": 0.0, + "num_input_tokens_seen": 129588072, + "step": 192320 + }, + { + "epoch": 4.698531747001197, + "grad_norm": 0.011298295110464096, + "learning_rate": 2.2069712285619602e-08, + "loss": 0.0, + "num_input_tokens_seen": 129591016, + "step": 192325 + }, + { + "epoch": 4.6986538978330445, + "grad_norm": 0.00041692276136018336, + "learning_rate": 2.2051898668695724e-08, + "loss": 0.0001, + "num_input_tokens_seen": 129594536, + "step": 192330 + }, + { + "epoch": 4.698776048664891, + "grad_norm": 4.01533288822975e-05, + "learning_rate": 2.203409216367358e-08, + "loss": 0.0, + "num_input_tokens_seen": 129597928, + "step": 192335 + }, + { + "epoch": 4.698898199496739, + "grad_norm": 0.0007659096154384315, + "learning_rate": 2.201629277068251e-08, + "loss": 0.0, + "num_input_tokens_seen": 129600872, + "step": 192340 + }, + { + "epoch": 4.699020350328586, + "grad_norm": 0.0030560491140931845, + "learning_rate": 2.1998500489852077e-08, + "loss": 0.0, + "num_input_tokens_seen": 129604776, + "step": 192345 + }, + { + "epoch": 4.699142501160432, + "grad_norm": 0.00016910966951400042, + "learning_rate": 2.1980715321311515e-08, + "loss": 0.0, + "num_input_tokens_seen": 129607848, + "step": 192350 + }, + { + "epoch": 4.69926465199228, + "grad_norm": 1.6891659470275044e-05, + "learning_rate": 2.1962937265190385e-08, + "loss": 0.0, + "num_input_tokens_seen": 129611240, + "step": 192355 + }, + { + "epoch": 4.699386802824128, + "grad_norm": 0.00044170417822897434, + "learning_rate": 2.194516632161769e-08, + "loss": 0.0, + "num_input_tokens_seen": 129614504, + "step": 192360 + }, + { + "epoch": 4.699508953655974, + "grad_norm": 0.0006980904727242887, + "learning_rate": 2.192740249072289e-08, + "loss": 0.0, + "num_input_tokens_seen": 129618088, + "step": 192365 + }, + { + "epoch": 4.699631104487821, + "grad_norm": 2.8264948923606426e-05, + "learning_rate": 2.1909645772634988e-08, + "loss": 0.0, + "num_input_tokens_seen": 129621608, + "step": 192370 + }, + { + "epoch": 4.699753255319669, + "grad_norm": 0.0015524571062996984, + "learning_rate": 2.1891896167483327e-08, + "loss": 0.0, + "num_input_tokens_seen": 129625192, + "step": 192375 + }, + { + "epoch": 4.6998754061515156, + "grad_norm": 0.0006360540282912552, + "learning_rate": 2.1874153675396802e-08, + "loss": 0.0, + "num_input_tokens_seen": 129628328, + "step": 192380 + }, + { + "epoch": 4.699997556983363, + "grad_norm": 0.00010307029151590541, + "learning_rate": 2.1856418296504642e-08, + "loss": 0.0, + "num_input_tokens_seen": 129631720, + "step": 192385 + }, + { + "epoch": 4.70011970781521, + "grad_norm": 0.00037360982969403267, + "learning_rate": 2.1838690030935524e-08, + "loss": 0.0, + "num_input_tokens_seen": 129635880, + "step": 192390 + }, + { + "epoch": 4.700241858647058, + "grad_norm": 0.001626503886654973, + "learning_rate": 2.1820968878818567e-08, + "loss": 0.0, + "num_input_tokens_seen": 129639144, + "step": 192395 + }, + { + "epoch": 4.700364009478904, + "grad_norm": 0.00037940763286314905, + "learning_rate": 2.180325484028278e-08, + "loss": 0.0, + "num_input_tokens_seen": 129642344, + "step": 192400 + }, + { + "epoch": 4.700486160310752, + "grad_norm": 0.0007786820060573518, + "learning_rate": 2.1785547915456727e-08, + "loss": 0.0, + "num_input_tokens_seen": 129645416, + "step": 192405 + }, + { + "epoch": 4.700608311142599, + "grad_norm": 3.812544309766963e-05, + "learning_rate": 2.17678481044693e-08, + "loss": 0.0, + "num_input_tokens_seen": 129649128, + "step": 192410 + }, + { + "epoch": 4.700730461974446, + "grad_norm": 5.064254582975991e-05, + "learning_rate": 2.1750155407449178e-08, + "loss": 0.0, + "num_input_tokens_seen": 129652456, + "step": 192415 + }, + { + "epoch": 4.700852612806293, + "grad_norm": 0.006544878240674734, + "learning_rate": 2.1732469824525035e-08, + "loss": 0.0, + "num_input_tokens_seen": 129655464, + "step": 192420 + }, + { + "epoch": 4.700974763638141, + "grad_norm": 0.0009501032182015479, + "learning_rate": 2.1714791355825434e-08, + "loss": 0.0, + "num_input_tokens_seen": 129659112, + "step": 192425 + }, + { + "epoch": 4.7010969144699875, + "grad_norm": 0.00031887463410384953, + "learning_rate": 2.1697120001479053e-08, + "loss": 0.0, + "num_input_tokens_seen": 129662888, + "step": 192430 + }, + { + "epoch": 4.701219065301835, + "grad_norm": 0.00013029456022195518, + "learning_rate": 2.167945576161434e-08, + "loss": 0.0, + "num_input_tokens_seen": 129666216, + "step": 192435 + }, + { + "epoch": 4.701341216133682, + "grad_norm": 7.15162095730193e-05, + "learning_rate": 2.166179863635975e-08, + "loss": 0.017, + "num_input_tokens_seen": 129669608, + "step": 192440 + }, + { + "epoch": 4.701463366965529, + "grad_norm": 0.021288808435201645, + "learning_rate": 2.164414862584385e-08, + "loss": 0.0, + "num_input_tokens_seen": 129672744, + "step": 192445 + }, + { + "epoch": 4.701585517797376, + "grad_norm": 0.00013626010331790894, + "learning_rate": 2.1626505730194645e-08, + "loss": 0.0, + "num_input_tokens_seen": 129676648, + "step": 192450 + }, + { + "epoch": 4.701707668629224, + "grad_norm": 0.0007354624103754759, + "learning_rate": 2.1608869949540808e-08, + "loss": 0.0, + "num_input_tokens_seen": 129680104, + "step": 192455 + }, + { + "epoch": 4.701829819461071, + "grad_norm": 7.168312731664628e-05, + "learning_rate": 2.1591241284010242e-08, + "loss": 0.0724, + "num_input_tokens_seen": 129683304, + "step": 192460 + }, + { + "epoch": 4.701951970292917, + "grad_norm": 4.4334596168482676e-05, + "learning_rate": 2.1573619733731507e-08, + "loss": 0.0002, + "num_input_tokens_seen": 129686312, + "step": 192465 + }, + { + "epoch": 4.702074121124765, + "grad_norm": 0.0033819645177572966, + "learning_rate": 2.1556005298832502e-08, + "loss": 0.0, + "num_input_tokens_seen": 129690408, + "step": 192470 + }, + { + "epoch": 4.702196271956612, + "grad_norm": 0.000799092638771981, + "learning_rate": 2.1538397979441348e-08, + "loss": 0.0, + "num_input_tokens_seen": 129693800, + "step": 192475 + }, + { + "epoch": 4.702318422788459, + "grad_norm": 2.3778502509230748e-05, + "learning_rate": 2.1520797775686273e-08, + "loss": 0.0, + "num_input_tokens_seen": 129697064, + "step": 192480 + }, + { + "epoch": 4.702440573620306, + "grad_norm": 0.0001850874105002731, + "learning_rate": 2.1503204687694952e-08, + "loss": 0.0, + "num_input_tokens_seen": 129700264, + "step": 192485 + }, + { + "epoch": 4.702562724452154, + "grad_norm": 0.00020812089496757835, + "learning_rate": 2.148561871559562e-08, + "loss": 0.0005, + "num_input_tokens_seen": 129703400, + "step": 192490 + }, + { + "epoch": 4.7026848752840005, + "grad_norm": 0.00045550725189968944, + "learning_rate": 2.1468039859516062e-08, + "loss": 0.0, + "num_input_tokens_seen": 129706600, + "step": 192495 + }, + { + "epoch": 4.702807026115848, + "grad_norm": 4.641317354980856e-05, + "learning_rate": 2.1450468119584066e-08, + "loss": 0.0, + "num_input_tokens_seen": 129710056, + "step": 192500 + }, + { + "epoch": 4.702929176947695, + "grad_norm": 0.00022677636297885329, + "learning_rate": 2.1432903495927523e-08, + "loss": 0.0, + "num_input_tokens_seen": 129713576, + "step": 192505 + }, + { + "epoch": 4.7030513277795425, + "grad_norm": 0.0009053811081685126, + "learning_rate": 2.1415345988674006e-08, + "loss": 0.0, + "num_input_tokens_seen": 129716840, + "step": 192510 + }, + { + "epoch": 4.703173478611389, + "grad_norm": 0.00039823848055675626, + "learning_rate": 2.1397795597951406e-08, + "loss": 0.0, + "num_input_tokens_seen": 129720040, + "step": 192515 + }, + { + "epoch": 4.703295629443237, + "grad_norm": 0.00016683421563357115, + "learning_rate": 2.1380252323887182e-08, + "loss": 0.0, + "num_input_tokens_seen": 129723048, + "step": 192520 + }, + { + "epoch": 4.703417780275084, + "grad_norm": 0.00031939937616698444, + "learning_rate": 2.1362716166609008e-08, + "loss": 0.0, + "num_input_tokens_seen": 129726760, + "step": 192525 + }, + { + "epoch": 4.703539931106931, + "grad_norm": 0.0009505918715149164, + "learning_rate": 2.1345187126244335e-08, + "loss": 0.0, + "num_input_tokens_seen": 129730088, + "step": 192530 + }, + { + "epoch": 4.703662081938778, + "grad_norm": 0.001424207934178412, + "learning_rate": 2.1327665202920732e-08, + "loss": 0.0, + "num_input_tokens_seen": 129733544, + "step": 192535 + }, + { + "epoch": 4.703784232770625, + "grad_norm": 0.0001100384906749241, + "learning_rate": 2.1310150396765646e-08, + "loss": 0.0, + "num_input_tokens_seen": 129736424, + "step": 192540 + }, + { + "epoch": 4.703906383602472, + "grad_norm": 0.00031706338631920516, + "learning_rate": 2.1292642707906316e-08, + "loss": 0.0, + "num_input_tokens_seen": 129739432, + "step": 192545 + }, + { + "epoch": 4.704028534434319, + "grad_norm": 0.01422285009175539, + "learning_rate": 2.127514213647008e-08, + "loss": 0.0, + "num_input_tokens_seen": 129743336, + "step": 192550 + }, + { + "epoch": 4.704150685266167, + "grad_norm": 3.073379775742069e-05, + "learning_rate": 2.1257648682584284e-08, + "loss": 0.0, + "num_input_tokens_seen": 129747112, + "step": 192555 + }, + { + "epoch": 4.7042728360980135, + "grad_norm": 0.00014186625776346773, + "learning_rate": 2.1240162346376266e-08, + "loss": 0.0, + "num_input_tokens_seen": 129750504, + "step": 192560 + }, + { + "epoch": 4.704394986929861, + "grad_norm": 4.058052581967786e-05, + "learning_rate": 2.1222683127972817e-08, + "loss": 0.0, + "num_input_tokens_seen": 129753640, + "step": 192565 + }, + { + "epoch": 4.704517137761708, + "grad_norm": 0.12598304450511932, + "learning_rate": 2.120521102750139e-08, + "loss": 0.0001, + "num_input_tokens_seen": 129757160, + "step": 192570 + }, + { + "epoch": 4.7046392885935555, + "grad_norm": 0.0008097590180113912, + "learning_rate": 2.1187746045088996e-08, + "loss": 0.0, + "num_input_tokens_seen": 129760296, + "step": 192575 + }, + { + "epoch": 4.704761439425402, + "grad_norm": 0.00015910847287159413, + "learning_rate": 2.1170288180862528e-08, + "loss": 0.0, + "num_input_tokens_seen": 129763880, + "step": 192580 + }, + { + "epoch": 4.70488359025725, + "grad_norm": 2.650478927535005e-05, + "learning_rate": 2.115283743494889e-08, + "loss": 0.0, + "num_input_tokens_seen": 129766952, + "step": 192585 + }, + { + "epoch": 4.705005741089097, + "grad_norm": 7.849488611100242e-05, + "learning_rate": 2.1135393807475198e-08, + "loss": 0.0, + "num_input_tokens_seen": 129770152, + "step": 192590 + }, + { + "epoch": 4.705127891920944, + "grad_norm": 0.0001116606654250063, + "learning_rate": 2.1117957298568133e-08, + "loss": 0.0, + "num_input_tokens_seen": 129773800, + "step": 192595 + }, + { + "epoch": 4.705250042752791, + "grad_norm": 0.0020123757421970367, + "learning_rate": 2.1100527908354704e-08, + "loss": 0.0, + "num_input_tokens_seen": 129776936, + "step": 192600 + }, + { + "epoch": 4.705372193584639, + "grad_norm": 0.0008393031312152743, + "learning_rate": 2.1083105636961363e-08, + "loss": 0.0, + "num_input_tokens_seen": 129780328, + "step": 192605 + }, + { + "epoch": 4.705494344416485, + "grad_norm": 0.00021688619744963944, + "learning_rate": 2.1065690484515007e-08, + "loss": 0.0, + "num_input_tokens_seen": 129783400, + "step": 192610 + }, + { + "epoch": 4.705616495248332, + "grad_norm": 2.179694092774298e-05, + "learning_rate": 2.1048282451142428e-08, + "loss": 0.0, + "num_input_tokens_seen": 129786536, + "step": 192615 + }, + { + "epoch": 4.70573864608018, + "grad_norm": 0.003509758971631527, + "learning_rate": 2.1030881536969857e-08, + "loss": 0.0, + "num_input_tokens_seen": 129789736, + "step": 192620 + }, + { + "epoch": 4.705860796912027, + "grad_norm": 2.4690003556315787e-05, + "learning_rate": 2.1013487742124192e-08, + "loss": 0.0, + "num_input_tokens_seen": 129793000, + "step": 192625 + }, + { + "epoch": 4.705982947743874, + "grad_norm": 0.012998932972550392, + "learning_rate": 2.0996101066731552e-08, + "loss": 0.0, + "num_input_tokens_seen": 129796456, + "step": 192630 + }, + { + "epoch": 4.706105098575721, + "grad_norm": 0.0010782252065837383, + "learning_rate": 2.097872151091873e-08, + "loss": 0.0, + "num_input_tokens_seen": 129799784, + "step": 192635 + }, + { + "epoch": 4.7062272494075685, + "grad_norm": 0.0006848773919045925, + "learning_rate": 2.0961349074811952e-08, + "loss": 0.0, + "num_input_tokens_seen": 129802792, + "step": 192640 + }, + { + "epoch": 4.706349400239415, + "grad_norm": 0.04416637495160103, + "learning_rate": 2.0943983758537453e-08, + "loss": 0.0, + "num_input_tokens_seen": 129806184, + "step": 192645 + }, + { + "epoch": 4.706471551071263, + "grad_norm": 0.00011389933206373826, + "learning_rate": 2.09266255622218e-08, + "loss": 0.0, + "num_input_tokens_seen": 129809960, + "step": 192650 + }, + { + "epoch": 4.70659370190311, + "grad_norm": 8.055933722062036e-05, + "learning_rate": 2.0909274485991003e-08, + "loss": 0.0, + "num_input_tokens_seen": 129812904, + "step": 192655 + }, + { + "epoch": 4.706715852734957, + "grad_norm": 2.3937989681144245e-05, + "learning_rate": 2.089193052997129e-08, + "loss": 0.0, + "num_input_tokens_seen": 129816552, + "step": 192660 + }, + { + "epoch": 4.706838003566804, + "grad_norm": 6.111896800575778e-05, + "learning_rate": 2.08745936942889e-08, + "loss": 0.0, + "num_input_tokens_seen": 129819816, + "step": 192665 + }, + { + "epoch": 4.706960154398652, + "grad_norm": 0.00013432465493679047, + "learning_rate": 2.0857263979069727e-08, + "loss": 0.0, + "num_input_tokens_seen": 129823400, + "step": 192670 + }, + { + "epoch": 4.707082305230498, + "grad_norm": 0.0024881605058908463, + "learning_rate": 2.0839941384439897e-08, + "loss": 0.0043, + "num_input_tokens_seen": 129826856, + "step": 192675 + }, + { + "epoch": 4.707204456062346, + "grad_norm": 0.0003247729910071939, + "learning_rate": 2.0822625910525415e-08, + "loss": 0.0, + "num_input_tokens_seen": 129830440, + "step": 192680 + }, + { + "epoch": 4.707326606894193, + "grad_norm": 3.557054515113123e-05, + "learning_rate": 2.0805317557452184e-08, + "loss": 0.0, + "num_input_tokens_seen": 129834664, + "step": 192685 + }, + { + "epoch": 4.70744875772604, + "grad_norm": 3.953570194425993e-05, + "learning_rate": 2.078801632534588e-08, + "loss": 0.0, + "num_input_tokens_seen": 129837992, + "step": 192690 + }, + { + "epoch": 4.707570908557887, + "grad_norm": 0.027544084936380386, + "learning_rate": 2.0770722214332736e-08, + "loss": 0.0, + "num_input_tokens_seen": 129841192, + "step": 192695 + }, + { + "epoch": 4.707693059389735, + "grad_norm": 0.00014391849981620908, + "learning_rate": 2.0753435224538095e-08, + "loss": 0.0, + "num_input_tokens_seen": 129844648, + "step": 192700 + }, + { + "epoch": 4.707815210221582, + "grad_norm": 0.006761842407286167, + "learning_rate": 2.0736155356087858e-08, + "loss": 0.0, + "num_input_tokens_seen": 129848232, + "step": 192705 + }, + { + "epoch": 4.707937361053428, + "grad_norm": 0.0003056692366953939, + "learning_rate": 2.0718882609107812e-08, + "loss": 0.0, + "num_input_tokens_seen": 129851816, + "step": 192710 + }, + { + "epoch": 4.708059511885276, + "grad_norm": 0.004368090070784092, + "learning_rate": 2.0701616983723414e-08, + "loss": 0.0, + "num_input_tokens_seen": 129855144, + "step": 192715 + }, + { + "epoch": 4.708181662717124, + "grad_norm": 0.0005663208430632949, + "learning_rate": 2.0684358480060228e-08, + "loss": 0.0, + "num_input_tokens_seen": 129858408, + "step": 192720 + }, + { + "epoch": 4.70830381354897, + "grad_norm": 0.00015874313248787075, + "learning_rate": 2.0667107098243818e-08, + "loss": 0.0, + "num_input_tokens_seen": 129862248, + "step": 192725 + }, + { + "epoch": 4.708425964380817, + "grad_norm": 1.957891981874127e-05, + "learning_rate": 2.0649862838399645e-08, + "loss": 0.0, + "num_input_tokens_seen": 129865640, + "step": 192730 + }, + { + "epoch": 4.708548115212665, + "grad_norm": 0.0003272563626524061, + "learning_rate": 2.0632625700652938e-08, + "loss": 0.0, + "num_input_tokens_seen": 129869416, + "step": 192735 + }, + { + "epoch": 4.708670266044511, + "grad_norm": 9.015477553475648e-05, + "learning_rate": 2.0615395685129266e-08, + "loss": 0.0, + "num_input_tokens_seen": 129872680, + "step": 192740 + }, + { + "epoch": 4.708792416876359, + "grad_norm": 9.212747681885958e-05, + "learning_rate": 2.059817279195397e-08, + "loss": 0.0, + "num_input_tokens_seen": 129875944, + "step": 192745 + }, + { + "epoch": 4.708914567708206, + "grad_norm": 0.0008575996034778655, + "learning_rate": 2.0580957021252067e-08, + "loss": 0.0, + "num_input_tokens_seen": 129879336, + "step": 192750 + }, + { + "epoch": 4.7090367185400535, + "grad_norm": 0.000842539535369724, + "learning_rate": 2.0563748373148894e-08, + "loss": 0.0, + "num_input_tokens_seen": 129882728, + "step": 192755 + }, + { + "epoch": 4.7091588693719, + "grad_norm": 0.0002445719437673688, + "learning_rate": 2.0546546847769574e-08, + "loss": 0.0, + "num_input_tokens_seen": 129885800, + "step": 192760 + }, + { + "epoch": 4.709281020203748, + "grad_norm": 0.00025933721917681396, + "learning_rate": 2.0529352445239234e-08, + "loss": 0.0, + "num_input_tokens_seen": 129889128, + "step": 192765 + }, + { + "epoch": 4.709403171035595, + "grad_norm": 0.00010074125020764768, + "learning_rate": 2.0512165165682882e-08, + "loss": 0.0, + "num_input_tokens_seen": 129893160, + "step": 192770 + }, + { + "epoch": 4.709525321867442, + "grad_norm": 5.31175146534224e-06, + "learning_rate": 2.049498500922553e-08, + "loss": 0.0001, + "num_input_tokens_seen": 129896168, + "step": 192775 + }, + { + "epoch": 4.709647472699289, + "grad_norm": 4.8221234465017915e-05, + "learning_rate": 2.0477811975992187e-08, + "loss": 0.0, + "num_input_tokens_seen": 129899176, + "step": 192780 + }, + { + "epoch": 4.709769623531137, + "grad_norm": 0.005339786410331726, + "learning_rate": 2.0460646066107533e-08, + "loss": 0.0245, + "num_input_tokens_seen": 129902888, + "step": 192785 + }, + { + "epoch": 4.709891774362983, + "grad_norm": 0.0022137362975627184, + "learning_rate": 2.0443487279696582e-08, + "loss": 0.0, + "num_input_tokens_seen": 129905896, + "step": 192790 + }, + { + "epoch": 4.710013925194831, + "grad_norm": 5.289226828608662e-05, + "learning_rate": 2.0426335616884005e-08, + "loss": 0.0, + "num_input_tokens_seen": 129909352, + "step": 192795 + }, + { + "epoch": 4.710136076026678, + "grad_norm": 0.0035220349673181772, + "learning_rate": 2.0409191077794595e-08, + "loss": 0.0, + "num_input_tokens_seen": 129912424, + "step": 192800 + }, + { + "epoch": 4.7102582268585245, + "grad_norm": 0.0060235122218728065, + "learning_rate": 2.039205366255303e-08, + "loss": 0.0, + "num_input_tokens_seen": 129915752, + "step": 192805 + }, + { + "epoch": 4.710380377690372, + "grad_norm": 0.00028744732844643295, + "learning_rate": 2.0374923371283992e-08, + "loss": 0.0, + "num_input_tokens_seen": 129919208, + "step": 192810 + }, + { + "epoch": 4.71050252852222, + "grad_norm": 0.0006107626832090318, + "learning_rate": 2.035780020411193e-08, + "loss": 0.0001, + "num_input_tokens_seen": 129922792, + "step": 192815 + }, + { + "epoch": 4.7106246793540665, + "grad_norm": 0.000291914155241102, + "learning_rate": 2.0340684161161414e-08, + "loss": 0.0, + "num_input_tokens_seen": 129926312, + "step": 192820 + }, + { + "epoch": 4.710746830185913, + "grad_norm": 1.9453102140687406e-05, + "learning_rate": 2.0323575242557123e-08, + "loss": 0.0, + "num_input_tokens_seen": 129929896, + "step": 192825 + }, + { + "epoch": 4.710868981017761, + "grad_norm": 0.00029763614293187857, + "learning_rate": 2.0306473448423066e-08, + "loss": 0.0, + "num_input_tokens_seen": 129933096, + "step": 192830 + }, + { + "epoch": 4.710991131849608, + "grad_norm": 0.001083080773241818, + "learning_rate": 2.0289378778883924e-08, + "loss": 0.0, + "num_input_tokens_seen": 129936488, + "step": 192835 + }, + { + "epoch": 4.711113282681455, + "grad_norm": 5.25053619639948e-05, + "learning_rate": 2.027229123406393e-08, + "loss": 0.0, + "num_input_tokens_seen": 129939944, + "step": 192840 + }, + { + "epoch": 4.711235433513302, + "grad_norm": 4.293541132938117e-05, + "learning_rate": 2.025521081408732e-08, + "loss": 0.0, + "num_input_tokens_seen": 129943400, + "step": 192845 + }, + { + "epoch": 4.71135758434515, + "grad_norm": 0.000599803461227566, + "learning_rate": 2.0238137519078436e-08, + "loss": 0.0, + "num_input_tokens_seen": 129946728, + "step": 192850 + }, + { + "epoch": 4.711479735176996, + "grad_norm": 0.00016343145398423076, + "learning_rate": 2.022107134916129e-08, + "loss": 0.0, + "num_input_tokens_seen": 129949736, + "step": 192855 + }, + { + "epoch": 4.711601886008844, + "grad_norm": 0.005201476626098156, + "learning_rate": 2.0204012304460005e-08, + "loss": 0.0, + "num_input_tokens_seen": 129952936, + "step": 192860 + }, + { + "epoch": 4.711724036840691, + "grad_norm": 0.026774518191814423, + "learning_rate": 2.0186960385098707e-08, + "loss": 0.0, + "num_input_tokens_seen": 129956136, + "step": 192865 + }, + { + "epoch": 4.711846187672538, + "grad_norm": 0.0007024348597042263, + "learning_rate": 2.0169915591201403e-08, + "loss": 0.0, + "num_input_tokens_seen": 129959528, + "step": 192870 + }, + { + "epoch": 4.711968338504385, + "grad_norm": 0.00012115760910091922, + "learning_rate": 2.0152877922891996e-08, + "loss": 0.0, + "num_input_tokens_seen": 129962728, + "step": 192875 + }, + { + "epoch": 4.712090489336233, + "grad_norm": 3.893441680702381e-05, + "learning_rate": 2.013584738029439e-08, + "loss": 0.0, + "num_input_tokens_seen": 129965928, + "step": 192880 + }, + { + "epoch": 4.7122126401680795, + "grad_norm": 1.953154060174711e-05, + "learning_rate": 2.0118823963532482e-08, + "loss": 0.0, + "num_input_tokens_seen": 129969704, + "step": 192885 + }, + { + "epoch": 4.712334790999927, + "grad_norm": 0.018396366387605667, + "learning_rate": 2.0101807672729953e-08, + "loss": 0.0, + "num_input_tokens_seen": 129972968, + "step": 192890 + }, + { + "epoch": 4.712456941831774, + "grad_norm": 5.830708323628642e-05, + "learning_rate": 2.0084798508010703e-08, + "loss": 0.0, + "num_input_tokens_seen": 129976168, + "step": 192895 + }, + { + "epoch": 4.712579092663621, + "grad_norm": 0.0005415156483650208, + "learning_rate": 2.006779646949841e-08, + "loss": 0.0, + "num_input_tokens_seen": 129979752, + "step": 192900 + }, + { + "epoch": 4.712701243495468, + "grad_norm": 5.077142486697994e-05, + "learning_rate": 2.0050801557316532e-08, + "loss": 0.0, + "num_input_tokens_seen": 129983016, + "step": 192905 + }, + { + "epoch": 4.712823394327315, + "grad_norm": 0.00036389665910974145, + "learning_rate": 2.003381377158897e-08, + "loss": 0.0, + "num_input_tokens_seen": 129986472, + "step": 192910 + }, + { + "epoch": 4.712945545159163, + "grad_norm": 0.002304884372279048, + "learning_rate": 2.0016833112438958e-08, + "loss": 0.0321, + "num_input_tokens_seen": 129989672, + "step": 192915 + }, + { + "epoch": 4.713067695991009, + "grad_norm": 1.98540001292713e-05, + "learning_rate": 1.9999859579990175e-08, + "loss": 0.0, + "num_input_tokens_seen": 129993000, + "step": 192920 + }, + { + "epoch": 4.713189846822857, + "grad_norm": 0.0007984357071109116, + "learning_rate": 1.9982893174366077e-08, + "loss": 0.0, + "num_input_tokens_seen": 129996136, + "step": 192925 + }, + { + "epoch": 4.713311997654704, + "grad_norm": 0.0018656195607036352, + "learning_rate": 1.996593389568979e-08, + "loss": 0.0, + "num_input_tokens_seen": 129999784, + "step": 192930 + }, + { + "epoch": 4.713434148486551, + "grad_norm": 0.0021639051847159863, + "learning_rate": 1.9948981744084992e-08, + "loss": 0.0, + "num_input_tokens_seen": 130003368, + "step": 192935 + }, + { + "epoch": 4.713556299318398, + "grad_norm": 0.0019073068397119641, + "learning_rate": 1.9932036719674696e-08, + "loss": 0.0, + "num_input_tokens_seen": 130006632, + "step": 192940 + }, + { + "epoch": 4.713678450150246, + "grad_norm": 0.0027616843581199646, + "learning_rate": 1.9915098822582353e-08, + "loss": 0.0, + "num_input_tokens_seen": 130009704, + "step": 192945 + }, + { + "epoch": 4.7138006009820925, + "grad_norm": 0.00016160483937710524, + "learning_rate": 1.9898168052930987e-08, + "loss": 0.0, + "num_input_tokens_seen": 130013352, + "step": 192950 + }, + { + "epoch": 4.71392275181394, + "grad_norm": 2.910471084760502e-05, + "learning_rate": 1.98812444108436e-08, + "loss": 0.0, + "num_input_tokens_seen": 130016552, + "step": 192955 + }, + { + "epoch": 4.714044902645787, + "grad_norm": 0.09401872009038925, + "learning_rate": 1.9864327896443655e-08, + "loss": 0.0, + "num_input_tokens_seen": 130019752, + "step": 192960 + }, + { + "epoch": 4.7141670534776345, + "grad_norm": 0.01751399040222168, + "learning_rate": 1.984741850985383e-08, + "loss": 0.0, + "num_input_tokens_seen": 130023720, + "step": 192965 + }, + { + "epoch": 4.714289204309481, + "grad_norm": 0.0027101587038487196, + "learning_rate": 1.9830516251197247e-08, + "loss": 0.0, + "num_input_tokens_seen": 130026728, + "step": 192970 + }, + { + "epoch": 4.714411355141328, + "grad_norm": 0.0004698086704593152, + "learning_rate": 1.9813621120596703e-08, + "loss": 0.0, + "num_input_tokens_seen": 130029992, + "step": 192975 + }, + { + "epoch": 4.714533505973176, + "grad_norm": 0.00010237730748485774, + "learning_rate": 1.97967331181752e-08, + "loss": 0.0, + "num_input_tokens_seen": 130033192, + "step": 192980 + }, + { + "epoch": 4.714655656805023, + "grad_norm": 3.8691145164193586e-05, + "learning_rate": 1.977985224405554e-08, + "loss": 0.0, + "num_input_tokens_seen": 130036968, + "step": 192985 + }, + { + "epoch": 4.71477780763687, + "grad_norm": 0.00014511265908367932, + "learning_rate": 1.9762978498360393e-08, + "loss": 0.0, + "num_input_tokens_seen": 130039912, + "step": 192990 + }, + { + "epoch": 4.714899958468717, + "grad_norm": 0.0002013398625422269, + "learning_rate": 1.9746111881212556e-08, + "loss": 0.0, + "num_input_tokens_seen": 130043368, + "step": 192995 + }, + { + "epoch": 4.715022109300564, + "grad_norm": 0.0014783921651542187, + "learning_rate": 1.9729252392734597e-08, + "loss": 0.0, + "num_input_tokens_seen": 130046568, + "step": 193000 + }, + { + "epoch": 4.715144260132411, + "grad_norm": 0.0002856815990526229, + "learning_rate": 1.9712400033049194e-08, + "loss": 0.0, + "num_input_tokens_seen": 130049640, + "step": 193005 + }, + { + "epoch": 4.715266410964259, + "grad_norm": 0.0011368409032002091, + "learning_rate": 1.9695554802278803e-08, + "loss": 0.0001, + "num_input_tokens_seen": 130053096, + "step": 193010 + }, + { + "epoch": 4.7153885617961055, + "grad_norm": 0.0006178556359373033, + "learning_rate": 1.9678716700546106e-08, + "loss": 0.0, + "num_input_tokens_seen": 130056424, + "step": 193015 + }, + { + "epoch": 4.715510712627953, + "grad_norm": 3.2776610169094056e-05, + "learning_rate": 1.9661885727973448e-08, + "loss": 0.0, + "num_input_tokens_seen": 130059816, + "step": 193020 + }, + { + "epoch": 4.7156328634598, + "grad_norm": 0.00012467149645090103, + "learning_rate": 1.964506188468318e-08, + "loss": 0.0, + "num_input_tokens_seen": 130062888, + "step": 193025 + }, + { + "epoch": 4.715755014291648, + "grad_norm": 0.00035838209441863, + "learning_rate": 1.9628245170797865e-08, + "loss": 0.0, + "num_input_tokens_seen": 130065960, + "step": 193030 + }, + { + "epoch": 4.715877165123494, + "grad_norm": 5.7584791647968814e-05, + "learning_rate": 1.9611435586439405e-08, + "loss": 0.0005, + "num_input_tokens_seen": 130069416, + "step": 193035 + }, + { + "epoch": 4.715999315955342, + "grad_norm": 7.922954559326172, + "learning_rate": 1.9594633131730488e-08, + "loss": 0.0224, + "num_input_tokens_seen": 130073320, + "step": 193040 + }, + { + "epoch": 4.716121466787189, + "grad_norm": 0.001085901283659041, + "learning_rate": 1.957783780679301e-08, + "loss": 0.0, + "num_input_tokens_seen": 130076456, + "step": 193045 + }, + { + "epoch": 4.716243617619036, + "grad_norm": 0.0024623856879770756, + "learning_rate": 1.9561049611749093e-08, + "loss": 0.0, + "num_input_tokens_seen": 130079528, + "step": 193050 + }, + { + "epoch": 4.716365768450883, + "grad_norm": 0.006301338318735361, + "learning_rate": 1.9544268546721087e-08, + "loss": 0.0, + "num_input_tokens_seen": 130083048, + "step": 193055 + }, + { + "epoch": 4.716487919282731, + "grad_norm": 4.267549957148731e-05, + "learning_rate": 1.9527494611830786e-08, + "loss": 0.0004, + "num_input_tokens_seen": 130086184, + "step": 193060 + }, + { + "epoch": 4.7166100701145774, + "grad_norm": 0.0012560115428641438, + "learning_rate": 1.951072780720031e-08, + "loss": 0.001, + "num_input_tokens_seen": 130089512, + "step": 193065 + }, + { + "epoch": 4.716732220946424, + "grad_norm": 0.0003190060087945312, + "learning_rate": 1.9493968132951455e-08, + "loss": 0.0, + "num_input_tokens_seen": 130092520, + "step": 193070 + }, + { + "epoch": 4.716854371778272, + "grad_norm": 0.00017790061247069389, + "learning_rate": 1.947721558920634e-08, + "loss": 0.0, + "num_input_tokens_seen": 130095912, + "step": 193075 + }, + { + "epoch": 4.7169765226101195, + "grad_norm": 5.468689778354019e-05, + "learning_rate": 1.946047017608654e-08, + "loss": 0.0, + "num_input_tokens_seen": 130099304, + "step": 193080 + }, + { + "epoch": 4.717098673441966, + "grad_norm": 0.00034660325036384165, + "learning_rate": 1.9443731893713954e-08, + "loss": 0.0, + "num_input_tokens_seen": 130102568, + "step": 193085 + }, + { + "epoch": 4.717220824273813, + "grad_norm": 4.338144208304584e-05, + "learning_rate": 1.9427000742210376e-08, + "loss": 0.0, + "num_input_tokens_seen": 130106216, + "step": 193090 + }, + { + "epoch": 4.717342975105661, + "grad_norm": 0.0010805005440488458, + "learning_rate": 1.9410276721697262e-08, + "loss": 0.0, + "num_input_tokens_seen": 130109544, + "step": 193095 + }, + { + "epoch": 4.717465125937507, + "grad_norm": 3.018993629666511e-05, + "learning_rate": 1.9393559832296404e-08, + "loss": 0.0, + "num_input_tokens_seen": 130112744, + "step": 193100 + }, + { + "epoch": 4.717587276769355, + "grad_norm": 0.0006496264250017703, + "learning_rate": 1.9376850074129257e-08, + "loss": 0.0, + "num_input_tokens_seen": 130116264, + "step": 193105 + }, + { + "epoch": 4.717709427601202, + "grad_norm": 3.441320586716756e-05, + "learning_rate": 1.9360147447317398e-08, + "loss": 0.0, + "num_input_tokens_seen": 130119720, + "step": 193110 + }, + { + "epoch": 4.717831578433049, + "grad_norm": 0.00019770261133089662, + "learning_rate": 1.9343451951982505e-08, + "loss": 0.0, + "num_input_tokens_seen": 130123240, + "step": 193115 + }, + { + "epoch": 4.717953729264896, + "grad_norm": 4.3698764784494415e-05, + "learning_rate": 1.9326763588245587e-08, + "loss": 0.0, + "num_input_tokens_seen": 130127080, + "step": 193120 + }, + { + "epoch": 4.718075880096744, + "grad_norm": 0.00031875044805929065, + "learning_rate": 1.931008235622844e-08, + "loss": 0.0, + "num_input_tokens_seen": 130130792, + "step": 193125 + }, + { + "epoch": 4.7181980309285905, + "grad_norm": 0.002110627479851246, + "learning_rate": 1.929340825605197e-08, + "loss": 0.0, + "num_input_tokens_seen": 130134184, + "step": 193130 + }, + { + "epoch": 4.718320181760438, + "grad_norm": 0.00013501415378414094, + "learning_rate": 1.927674128783763e-08, + "loss": 0.0, + "num_input_tokens_seen": 130137448, + "step": 193135 + }, + { + "epoch": 4.718442332592285, + "grad_norm": 0.0004318559367675334, + "learning_rate": 1.9260081451706654e-08, + "loss": 0.0, + "num_input_tokens_seen": 130140776, + "step": 193140 + }, + { + "epoch": 4.7185644834241325, + "grad_norm": 0.00030089804204180837, + "learning_rate": 1.9243428747780065e-08, + "loss": 0.0, + "num_input_tokens_seen": 130144744, + "step": 193145 + }, + { + "epoch": 4.718686634255979, + "grad_norm": 0.000204432217287831, + "learning_rate": 1.9226783176179208e-08, + "loss": 0.0, + "num_input_tokens_seen": 130148200, + "step": 193150 + }, + { + "epoch": 4.718808785087827, + "grad_norm": 0.00013242612476460636, + "learning_rate": 1.921014473702476e-08, + "loss": 0.0002, + "num_input_tokens_seen": 130151464, + "step": 193155 + }, + { + "epoch": 4.718930935919674, + "grad_norm": 0.0012007177574560046, + "learning_rate": 1.919351343043818e-08, + "loss": 0.0, + "num_input_tokens_seen": 130154664, + "step": 193160 + }, + { + "epoch": 4.71905308675152, + "grad_norm": 0.010337266139686108, + "learning_rate": 1.917688925654004e-08, + "loss": 0.0, + "num_input_tokens_seen": 130158184, + "step": 193165 + }, + { + "epoch": 4.719175237583368, + "grad_norm": 0.00024405837757512927, + "learning_rate": 1.9160272215451355e-08, + "loss": 0.0001, + "num_input_tokens_seen": 130161704, + "step": 193170 + }, + { + "epoch": 4.719297388415215, + "grad_norm": 0.002331462921574712, + "learning_rate": 1.914366230729303e-08, + "loss": 0.0, + "num_input_tokens_seen": 130165736, + "step": 193175 + }, + { + "epoch": 4.719419539247062, + "grad_norm": 0.00015924693434499204, + "learning_rate": 1.9127059532185853e-08, + "loss": 0.0, + "num_input_tokens_seen": 130169000, + "step": 193180 + }, + { + "epoch": 4.719541690078909, + "grad_norm": 0.0002346932451473549, + "learning_rate": 1.9110463890250506e-08, + "loss": 0.0, + "num_input_tokens_seen": 130172712, + "step": 193185 + }, + { + "epoch": 4.719663840910757, + "grad_norm": 0.00011881341197295114, + "learning_rate": 1.909387538160767e-08, + "loss": 0.0, + "num_input_tokens_seen": 130176872, + "step": 193190 + }, + { + "epoch": 4.7197859917426035, + "grad_norm": 0.0005221093888394535, + "learning_rate": 1.907729400637803e-08, + "loss": 0.0002, + "num_input_tokens_seen": 130180136, + "step": 193195 + }, + { + "epoch": 4.719908142574451, + "grad_norm": 0.00241106073372066, + "learning_rate": 1.9060719764682155e-08, + "loss": 0.0, + "num_input_tokens_seen": 130183656, + "step": 193200 + }, + { + "epoch": 4.720030293406298, + "grad_norm": 0.0005604327307082713, + "learning_rate": 1.9044152656640498e-08, + "loss": 0.0, + "num_input_tokens_seen": 130186728, + "step": 193205 + }, + { + "epoch": 4.7201524442381455, + "grad_norm": 0.002126067876815796, + "learning_rate": 1.902759268237364e-08, + "loss": 0.0, + "num_input_tokens_seen": 130189992, + "step": 193210 + }, + { + "epoch": 4.720274595069992, + "grad_norm": 0.0030112469103187323, + "learning_rate": 1.901103984200192e-08, + "loss": 0.0, + "num_input_tokens_seen": 130193192, + "step": 193215 + }, + { + "epoch": 4.72039674590184, + "grad_norm": 0.000552595010958612, + "learning_rate": 1.899449413564591e-08, + "loss": 0.0, + "num_input_tokens_seen": 130196712, + "step": 193220 + }, + { + "epoch": 4.720518896733687, + "grad_norm": 0.000356059375917539, + "learning_rate": 1.897795556342563e-08, + "loss": 0.0, + "num_input_tokens_seen": 130200104, + "step": 193225 + }, + { + "epoch": 4.720641047565534, + "grad_norm": 0.006424812134355307, + "learning_rate": 1.8961424125461756e-08, + "loss": 0.0, + "num_input_tokens_seen": 130203496, + "step": 193230 + }, + { + "epoch": 4.720763198397381, + "grad_norm": 0.002036610385403037, + "learning_rate": 1.8944899821874083e-08, + "loss": 0.0, + "num_input_tokens_seen": 130206952, + "step": 193235 + }, + { + "epoch": 4.720885349229228, + "grad_norm": 9.202076762448996e-05, + "learning_rate": 1.892838265278296e-08, + "loss": 0.0, + "num_input_tokens_seen": 130210088, + "step": 193240 + }, + { + "epoch": 4.721007500061075, + "grad_norm": 0.00014109651965554804, + "learning_rate": 1.891187261830862e-08, + "loss": 0.0, + "num_input_tokens_seen": 130213608, + "step": 193245 + }, + { + "epoch": 4.721129650892923, + "grad_norm": 0.0001980575470952317, + "learning_rate": 1.8895369718570865e-08, + "loss": 0.0, + "num_input_tokens_seen": 130217128, + "step": 193250 + }, + { + "epoch": 4.72125180172477, + "grad_norm": 0.0011710544349625707, + "learning_rate": 1.8878873953690035e-08, + "loss": 0.0, + "num_input_tokens_seen": 130220456, + "step": 193255 + }, + { + "epoch": 4.7213739525566165, + "grad_norm": 0.00030315376352518797, + "learning_rate": 1.8862385323785813e-08, + "loss": 0.0, + "num_input_tokens_seen": 130223784, + "step": 193260 + }, + { + "epoch": 4.721496103388464, + "grad_norm": 0.002116880612447858, + "learning_rate": 1.8845903828978216e-08, + "loss": 0.0, + "num_input_tokens_seen": 130227304, + "step": 193265 + }, + { + "epoch": 4.721618254220311, + "grad_norm": 4.5842771214665845e-06, + "learning_rate": 1.882942946938726e-08, + "loss": 0.0, + "num_input_tokens_seen": 130230632, + "step": 193270 + }, + { + "epoch": 4.7217404050521585, + "grad_norm": 0.00034518842585384846, + "learning_rate": 1.8812962245132403e-08, + "loss": 0.0, + "num_input_tokens_seen": 130234280, + "step": 193275 + }, + { + "epoch": 4.721862555884005, + "grad_norm": 0.00042717086034826934, + "learning_rate": 1.879650215633377e-08, + "loss": 0.0, + "num_input_tokens_seen": 130237800, + "step": 193280 + }, + { + "epoch": 4.721984706715853, + "grad_norm": 0.0005576208932325244, + "learning_rate": 1.8780049203110714e-08, + "loss": 0.0002, + "num_input_tokens_seen": 130241320, + "step": 193285 + }, + { + "epoch": 4.7221068575477, + "grad_norm": 0.0001096838095691055, + "learning_rate": 1.876360338558325e-08, + "loss": 0.0, + "num_input_tokens_seen": 130244712, + "step": 193290 + }, + { + "epoch": 4.722229008379547, + "grad_norm": 0.0005637186113744974, + "learning_rate": 1.8747164703870722e-08, + "loss": 0.0, + "num_input_tokens_seen": 130247848, + "step": 193295 + }, + { + "epoch": 4.722351159211394, + "grad_norm": 0.0004345967317931354, + "learning_rate": 1.8730733158092593e-08, + "loss": 0.0, + "num_input_tokens_seen": 130251432, + "step": 193300 + }, + { + "epoch": 4.722473310043242, + "grad_norm": 0.004918577615171671, + "learning_rate": 1.8714308748368657e-08, + "loss": 0.0, + "num_input_tokens_seen": 130254696, + "step": 193305 + }, + { + "epoch": 4.722595460875088, + "grad_norm": 0.00018379326502326876, + "learning_rate": 1.869789147481815e-08, + "loss": 0.0, + "num_input_tokens_seen": 130257896, + "step": 193310 + }, + { + "epoch": 4.722717611706936, + "grad_norm": 0.006261991336941719, + "learning_rate": 1.8681481337560534e-08, + "loss": 0.0, + "num_input_tokens_seen": 130261224, + "step": 193315 + }, + { + "epoch": 4.722839762538783, + "grad_norm": 0.0014488224405795336, + "learning_rate": 1.8665078336715046e-08, + "loss": 0.0, + "num_input_tokens_seen": 130264424, + "step": 193320 + }, + { + "epoch": 4.72296191337063, + "grad_norm": 0.000362872495315969, + "learning_rate": 1.8648682472401033e-08, + "loss": 0.0, + "num_input_tokens_seen": 130267752, + "step": 193325 + }, + { + "epoch": 4.723084064202477, + "grad_norm": 0.0027441405691206455, + "learning_rate": 1.8632293744737958e-08, + "loss": 0.0, + "num_input_tokens_seen": 130270952, + "step": 193330 + }, + { + "epoch": 4.723206215034324, + "grad_norm": 3.456345075392164e-05, + "learning_rate": 1.8615912153844617e-08, + "loss": 0.0, + "num_input_tokens_seen": 130274024, + "step": 193335 + }, + { + "epoch": 4.7233283658661716, + "grad_norm": 5.745082671637647e-05, + "learning_rate": 1.859953769984046e-08, + "loss": 0.0, + "num_input_tokens_seen": 130277672, + "step": 193340 + }, + { + "epoch": 4.723450516698019, + "grad_norm": 5.0481285143177956e-05, + "learning_rate": 1.8583170382844294e-08, + "loss": 0.0, + "num_input_tokens_seen": 130281064, + "step": 193345 + }, + { + "epoch": 4.723572667529866, + "grad_norm": 0.006787308491766453, + "learning_rate": 1.8566810202975348e-08, + "loss": 0.0, + "num_input_tokens_seen": 130284584, + "step": 193350 + }, + { + "epoch": 4.723694818361713, + "grad_norm": 0.0006456426926888525, + "learning_rate": 1.855045716035253e-08, + "loss": 0.0, + "num_input_tokens_seen": 130288232, + "step": 193355 + }, + { + "epoch": 4.72381696919356, + "grad_norm": 0.00033384087146259844, + "learning_rate": 1.853411125509463e-08, + "loss": 0.0, + "num_input_tokens_seen": 130291688, + "step": 193360 + }, + { + "epoch": 4.723939120025407, + "grad_norm": 0.00017671861860435456, + "learning_rate": 1.8517772487320892e-08, + "loss": 0.0, + "num_input_tokens_seen": 130294888, + "step": 193365 + }, + { + "epoch": 4.724061270857255, + "grad_norm": 0.00026331390836276114, + "learning_rate": 1.8501440857149776e-08, + "loss": 0.0, + "num_input_tokens_seen": 130298216, + "step": 193370 + }, + { + "epoch": 4.724183421689101, + "grad_norm": 0.00023717127623967826, + "learning_rate": 1.848511636470018e-08, + "loss": 0.0, + "num_input_tokens_seen": 130301608, + "step": 193375 + }, + { + "epoch": 4.724305572520949, + "grad_norm": 1.0042748726846185e-05, + "learning_rate": 1.8468799010090796e-08, + "loss": 0.0, + "num_input_tokens_seen": 130305064, + "step": 193380 + }, + { + "epoch": 4.724427723352796, + "grad_norm": 0.01970692165195942, + "learning_rate": 1.8452488793440414e-08, + "loss": 0.0, + "num_input_tokens_seen": 130308328, + "step": 193385 + }, + { + "epoch": 4.7245498741846434, + "grad_norm": 0.000939359306357801, + "learning_rate": 1.8436185714867382e-08, + "loss": 0.0, + "num_input_tokens_seen": 130312104, + "step": 193390 + }, + { + "epoch": 4.72467202501649, + "grad_norm": 0.000279458035947755, + "learning_rate": 1.8419889774490494e-08, + "loss": 0.0, + "num_input_tokens_seen": 130315432, + "step": 193395 + }, + { + "epoch": 4.724794175848338, + "grad_norm": 0.0003981810586992651, + "learning_rate": 1.8403600972428322e-08, + "loss": 0.0, + "num_input_tokens_seen": 130319080, + "step": 193400 + }, + { + "epoch": 4.724916326680185, + "grad_norm": 8.307830285048112e-05, + "learning_rate": 1.8387319308799e-08, + "loss": 0.0, + "num_input_tokens_seen": 130322216, + "step": 193405 + }, + { + "epoch": 4.725038477512032, + "grad_norm": 0.00042934861266985536, + "learning_rate": 1.837104478372109e-08, + "loss": 0.0, + "num_input_tokens_seen": 130325480, + "step": 193410 + }, + { + "epoch": 4.725160628343879, + "grad_norm": 3.530388858052902e-05, + "learning_rate": 1.835477739731306e-08, + "loss": 0.0, + "num_input_tokens_seen": 130329128, + "step": 193415 + }, + { + "epoch": 4.725282779175727, + "grad_norm": 7.023687794571742e-05, + "learning_rate": 1.8338517149693034e-08, + "loss": 0.0, + "num_input_tokens_seen": 130332584, + "step": 193420 + }, + { + "epoch": 4.725404930007573, + "grad_norm": 0.0024503623135387897, + "learning_rate": 1.8322264040979472e-08, + "loss": 0.0, + "num_input_tokens_seen": 130335848, + "step": 193425 + }, + { + "epoch": 4.72552708083942, + "grad_norm": 16.8475284576416, + "learning_rate": 1.8306018071290284e-08, + "loss": 0.0184, + "num_input_tokens_seen": 130339112, + "step": 193430 + }, + { + "epoch": 4.725649231671268, + "grad_norm": 4.4047746996511705e-06, + "learning_rate": 1.828977924074393e-08, + "loss": 0.0, + "num_input_tokens_seen": 130342568, + "step": 193435 + }, + { + "epoch": 4.7257713825031145, + "grad_norm": 0.0008143739541992545, + "learning_rate": 1.8273547549458203e-08, + "loss": 0.0002, + "num_input_tokens_seen": 130346152, + "step": 193440 + }, + { + "epoch": 4.725893533334962, + "grad_norm": 0.001741119078360498, + "learning_rate": 1.825732299755145e-08, + "loss": 0.0, + "num_input_tokens_seen": 130349224, + "step": 193445 + }, + { + "epoch": 4.726015684166809, + "grad_norm": 2.8913536880281754e-05, + "learning_rate": 1.824110558514136e-08, + "loss": 0.0, + "num_input_tokens_seen": 130352488, + "step": 193450 + }, + { + "epoch": 4.7261378349986565, + "grad_norm": 5.65302834729664e-05, + "learning_rate": 1.8224895312346055e-08, + "loss": 0.0, + "num_input_tokens_seen": 130355816, + "step": 193455 + }, + { + "epoch": 4.726259985830503, + "grad_norm": 0.0007669543847441673, + "learning_rate": 1.8208692179283446e-08, + "loss": 0.0, + "num_input_tokens_seen": 130359144, + "step": 193460 + }, + { + "epoch": 4.726382136662351, + "grad_norm": 7.570572779513896e-05, + "learning_rate": 1.8192496186071216e-08, + "loss": 0.0, + "num_input_tokens_seen": 130362216, + "step": 193465 + }, + { + "epoch": 4.726504287494198, + "grad_norm": 0.0005270456313155591, + "learning_rate": 1.8176307332827378e-08, + "loss": 0.0, + "num_input_tokens_seen": 130365032, + "step": 193470 + }, + { + "epoch": 4.726626438326045, + "grad_norm": 0.0011675796704366803, + "learning_rate": 1.8160125619669285e-08, + "loss": 0.0, + "num_input_tokens_seen": 130368680, + "step": 193475 + }, + { + "epoch": 4.726748589157892, + "grad_norm": 0.00010108354763360694, + "learning_rate": 1.8143951046714957e-08, + "loss": 0.0001, + "num_input_tokens_seen": 130372008, + "step": 193480 + }, + { + "epoch": 4.72687073998974, + "grad_norm": 0.00028355169342830777, + "learning_rate": 1.8127783614081958e-08, + "loss": 0.0, + "num_input_tokens_seen": 130375784, + "step": 193485 + }, + { + "epoch": 4.726992890821586, + "grad_norm": 0.00012076576967956498, + "learning_rate": 1.811162332188776e-08, + "loss": 0.0, + "num_input_tokens_seen": 130379816, + "step": 193490 + }, + { + "epoch": 4.727115041653434, + "grad_norm": 0.00043065642239525914, + "learning_rate": 1.809547017024993e-08, + "loss": 0.0, + "num_input_tokens_seen": 130383208, + "step": 193495 + }, + { + "epoch": 4.727237192485281, + "grad_norm": 0.00017358400509692729, + "learning_rate": 1.8079324159285925e-08, + "loss": 0.0, + "num_input_tokens_seen": 130386920, + "step": 193500 + }, + { + "epoch": 4.727359343317128, + "grad_norm": 0.0008371736039407551, + "learning_rate": 1.8063185289113326e-08, + "loss": 0.0, + "num_input_tokens_seen": 130390824, + "step": 193505 + }, + { + "epoch": 4.727481494148975, + "grad_norm": 0.1857682764530182, + "learning_rate": 1.8047053559849146e-08, + "loss": 0.0, + "num_input_tokens_seen": 130393960, + "step": 193510 + }, + { + "epoch": 4.727603644980823, + "grad_norm": 0.0001310663647018373, + "learning_rate": 1.8030928971610958e-08, + "loss": 0.0305, + "num_input_tokens_seen": 130397160, + "step": 193515 + }, + { + "epoch": 4.7277257958126695, + "grad_norm": 0.0009740307577885687, + "learning_rate": 1.8014811524516006e-08, + "loss": 0.0, + "num_input_tokens_seen": 130400552, + "step": 193520 + }, + { + "epoch": 4.727847946644516, + "grad_norm": 3.696549538290128e-05, + "learning_rate": 1.7998701218681413e-08, + "loss": 0.0, + "num_input_tokens_seen": 130403816, + "step": 193525 + }, + { + "epoch": 4.727970097476364, + "grad_norm": 6.525940261781216e-05, + "learning_rate": 1.798259805422453e-08, + "loss": 0.0, + "num_input_tokens_seen": 130407080, + "step": 193530 + }, + { + "epoch": 4.728092248308211, + "grad_norm": 1.0025220944953617e-05, + "learning_rate": 1.7966502031262154e-08, + "loss": 0.0, + "num_input_tokens_seen": 130410088, + "step": 193535 + }, + { + "epoch": 4.728214399140058, + "grad_norm": 0.0003596026508603245, + "learning_rate": 1.7950413149911638e-08, + "loss": 0.0, + "num_input_tokens_seen": 130413992, + "step": 193540 + }, + { + "epoch": 4.728336549971905, + "grad_norm": 8.720139157958329e-05, + "learning_rate": 1.7934331410289773e-08, + "loss": 0.0, + "num_input_tokens_seen": 130417576, + "step": 193545 + }, + { + "epoch": 4.728458700803753, + "grad_norm": 0.001967525575309992, + "learning_rate": 1.7918256812513576e-08, + "loss": 0.0, + "num_input_tokens_seen": 130420968, + "step": 193550 + }, + { + "epoch": 4.728580851635599, + "grad_norm": 0.002015329897403717, + "learning_rate": 1.790218935670007e-08, + "loss": 0.0, + "num_input_tokens_seen": 130424424, + "step": 193555 + }, + { + "epoch": 4.728703002467447, + "grad_norm": 0.0013729424681514502, + "learning_rate": 1.7886129042965826e-08, + "loss": 0.0, + "num_input_tokens_seen": 130428008, + "step": 193560 + }, + { + "epoch": 4.728825153299294, + "grad_norm": 0.0003414524544496089, + "learning_rate": 1.787007587142797e-08, + "loss": 0.0, + "num_input_tokens_seen": 130431080, + "step": 193565 + }, + { + "epoch": 4.728947304131141, + "grad_norm": 0.002865726361051202, + "learning_rate": 1.7854029842203078e-08, + "loss": 0.0, + "num_input_tokens_seen": 130434536, + "step": 193570 + }, + { + "epoch": 4.729069454962988, + "grad_norm": 0.00024214327277150005, + "learning_rate": 1.7837990955407723e-08, + "loss": 0.0, + "num_input_tokens_seen": 130437864, + "step": 193575 + }, + { + "epoch": 4.729191605794836, + "grad_norm": 0.001680655055679381, + "learning_rate": 1.782195921115881e-08, + "loss": 0.0, + "num_input_tokens_seen": 130440872, + "step": 193580 + }, + { + "epoch": 4.7293137566266825, + "grad_norm": 0.0011174753308296204, + "learning_rate": 1.7805934609572693e-08, + "loss": 0.0, + "num_input_tokens_seen": 130443880, + "step": 193585 + }, + { + "epoch": 4.72943590745853, + "grad_norm": 3.276481220382266e-05, + "learning_rate": 1.7789917150766054e-08, + "loss": 0.0, + "num_input_tokens_seen": 130447400, + "step": 193590 + }, + { + "epoch": 4.729558058290377, + "grad_norm": 0.005894262343645096, + "learning_rate": 1.7773906834855245e-08, + "loss": 0.0, + "num_input_tokens_seen": 130451240, + "step": 193595 + }, + { + "epoch": 4.729680209122224, + "grad_norm": 6.7249638959765434e-06, + "learning_rate": 1.7757903661956842e-08, + "loss": 0.0, + "num_input_tokens_seen": 130454760, + "step": 193600 + }, + { + "epoch": 4.729802359954071, + "grad_norm": 0.00039404755807481706, + "learning_rate": 1.774190763218719e-08, + "loss": 0.0, + "num_input_tokens_seen": 130458728, + "step": 193605 + }, + { + "epoch": 4.729924510785919, + "grad_norm": 3.2486277632415295e-05, + "learning_rate": 1.7725918745662426e-08, + "loss": 0.0, + "num_input_tokens_seen": 130462120, + "step": 193610 + }, + { + "epoch": 4.730046661617766, + "grad_norm": 0.00042969462811015546, + "learning_rate": 1.770993700249912e-08, + "loss": 0.0, + "num_input_tokens_seen": 130465768, + "step": 193615 + }, + { + "epoch": 4.730168812449612, + "grad_norm": 3.5601688068709336e-06, + "learning_rate": 1.7693962402813288e-08, + "loss": 0.0, + "num_input_tokens_seen": 130469288, + "step": 193620 + }, + { + "epoch": 4.73029096328146, + "grad_norm": 2.8810822186642326e-05, + "learning_rate": 1.7677994946721286e-08, + "loss": 0.0, + "num_input_tokens_seen": 130472808, + "step": 193625 + }, + { + "epoch": 4.730413114113307, + "grad_norm": 0.001526939682662487, + "learning_rate": 1.7662034634339017e-08, + "loss": 0.0, + "num_input_tokens_seen": 130476264, + "step": 193630 + }, + { + "epoch": 4.730535264945154, + "grad_norm": 0.00026932620676234365, + "learning_rate": 1.7646081465782614e-08, + "loss": 0.0, + "num_input_tokens_seen": 130479272, + "step": 193635 + }, + { + "epoch": 4.730657415777001, + "grad_norm": 0.0005830815061926842, + "learning_rate": 1.7630135441168203e-08, + "loss": 0.0, + "num_input_tokens_seen": 130482728, + "step": 193640 + }, + { + "epoch": 4.730779566608849, + "grad_norm": 0.00040932599222287536, + "learning_rate": 1.7614196560611583e-08, + "loss": 0.0, + "num_input_tokens_seen": 130486056, + "step": 193645 + }, + { + "epoch": 4.7309017174406955, + "grad_norm": 0.0009243565145879984, + "learning_rate": 1.7598264824228883e-08, + "loss": 0.0, + "num_input_tokens_seen": 130489128, + "step": 193650 + }, + { + "epoch": 4.731023868272543, + "grad_norm": 0.00019320863066241145, + "learning_rate": 1.7582340232135782e-08, + "loss": 0.0, + "num_input_tokens_seen": 130492456, + "step": 193655 + }, + { + "epoch": 4.73114601910439, + "grad_norm": 0.0003500950988382101, + "learning_rate": 1.7566422784448087e-08, + "loss": 0.0, + "num_input_tokens_seen": 130495912, + "step": 193660 + }, + { + "epoch": 4.7312681699362376, + "grad_norm": 0.0006958039593882859, + "learning_rate": 1.7550512481281698e-08, + "loss": 0.0, + "num_input_tokens_seen": 130499368, + "step": 193665 + }, + { + "epoch": 4.731390320768084, + "grad_norm": 2.7768734071287327e-05, + "learning_rate": 1.753460932275208e-08, + "loss": 0.0, + "num_input_tokens_seen": 130503016, + "step": 193670 + }, + { + "epoch": 4.731512471599932, + "grad_norm": 0.0002878334780689329, + "learning_rate": 1.751871330897514e-08, + "loss": 0.0, + "num_input_tokens_seen": 130506216, + "step": 193675 + }, + { + "epoch": 4.731634622431779, + "grad_norm": 6.573292921530083e-05, + "learning_rate": 1.7502824440066344e-08, + "loss": 0.0, + "num_input_tokens_seen": 130509544, + "step": 193680 + }, + { + "epoch": 4.731756773263626, + "grad_norm": 0.0005611705128103495, + "learning_rate": 1.7486942716141374e-08, + "loss": 0.0, + "num_input_tokens_seen": 130512552, + "step": 193685 + }, + { + "epoch": 4.731878924095473, + "grad_norm": 0.002522786846384406, + "learning_rate": 1.747106813731547e-08, + "loss": 0.0825, + "num_input_tokens_seen": 130515688, + "step": 193690 + }, + { + "epoch": 4.73200107492732, + "grad_norm": 0.0021701049990952015, + "learning_rate": 1.7455200703704432e-08, + "loss": 0.0, + "num_input_tokens_seen": 130518888, + "step": 193695 + }, + { + "epoch": 4.732123225759167, + "grad_norm": 0.00020151086209807545, + "learning_rate": 1.7439340415423164e-08, + "loss": 0.0, + "num_input_tokens_seen": 130522216, + "step": 193700 + }, + { + "epoch": 4.732245376591015, + "grad_norm": 0.0006844014278613031, + "learning_rate": 1.7423487272587577e-08, + "loss": 0.0, + "num_input_tokens_seen": 130525352, + "step": 193705 + }, + { + "epoch": 4.732367527422862, + "grad_norm": 0.003490231931209564, + "learning_rate": 1.740764127531258e-08, + "loss": 0.0, + "num_input_tokens_seen": 130529128, + "step": 193710 + }, + { + "epoch": 4.732489678254709, + "grad_norm": 0.010199329815804958, + "learning_rate": 1.739180242371341e-08, + "loss": 0.0, + "num_input_tokens_seen": 130532008, + "step": 193715 + }, + { + "epoch": 4.732611829086556, + "grad_norm": 0.0008412694442085922, + "learning_rate": 1.7375970717905418e-08, + "loss": 0.0, + "num_input_tokens_seen": 130535208, + "step": 193720 + }, + { + "epoch": 4.732733979918403, + "grad_norm": 0.00025614985497668386, + "learning_rate": 1.736014615800352e-08, + "loss": 0.0, + "num_input_tokens_seen": 130538344, + "step": 193725 + }, + { + "epoch": 4.732856130750251, + "grad_norm": 0.00017684178601484746, + "learning_rate": 1.734432874412306e-08, + "loss": 0.0, + "num_input_tokens_seen": 130541416, + "step": 193730 + }, + { + "epoch": 4.732978281582097, + "grad_norm": 9.338807285530493e-05, + "learning_rate": 1.732851847637895e-08, + "loss": 0.0, + "num_input_tokens_seen": 130544552, + "step": 193735 + }, + { + "epoch": 4.733100432413945, + "grad_norm": 0.0050024231895804405, + "learning_rate": 1.7312715354886098e-08, + "loss": 0.0, + "num_input_tokens_seen": 130548008, + "step": 193740 + }, + { + "epoch": 4.733222583245792, + "grad_norm": 0.00018156580335926265, + "learning_rate": 1.7296919379759635e-08, + "loss": 0.0, + "num_input_tokens_seen": 130551208, + "step": 193745 + }, + { + "epoch": 4.733344734077639, + "grad_norm": 0.00015616827295161784, + "learning_rate": 1.7281130551114132e-08, + "loss": 0.0, + "num_input_tokens_seen": 130554792, + "step": 193750 + }, + { + "epoch": 4.733466884909486, + "grad_norm": 2.225218850071542e-05, + "learning_rate": 1.726534886906461e-08, + "loss": 0.0, + "num_input_tokens_seen": 130558376, + "step": 193755 + }, + { + "epoch": 4.733589035741334, + "grad_norm": 0.0001064696698449552, + "learning_rate": 1.7249574333725868e-08, + "loss": 0.0, + "num_input_tokens_seen": 130561896, + "step": 193760 + }, + { + "epoch": 4.7337111865731805, + "grad_norm": 0.00014043239934835583, + "learning_rate": 1.723380694521237e-08, + "loss": 0.0, + "num_input_tokens_seen": 130565480, + "step": 193765 + }, + { + "epoch": 4.733833337405028, + "grad_norm": 0.0004173256456851959, + "learning_rate": 1.7218046703639134e-08, + "loss": 0.0, + "num_input_tokens_seen": 130568616, + "step": 193770 + }, + { + "epoch": 4.733955488236875, + "grad_norm": 0.0012830361956730485, + "learning_rate": 1.7202293609120512e-08, + "loss": 0.0, + "num_input_tokens_seen": 130572264, + "step": 193775 + }, + { + "epoch": 4.7340776390687225, + "grad_norm": 9.681628580437973e-05, + "learning_rate": 1.71865476617713e-08, + "loss": 0.0, + "num_input_tokens_seen": 130575848, + "step": 193780 + }, + { + "epoch": 4.734199789900569, + "grad_norm": 0.0009466626797802746, + "learning_rate": 1.7170808861705633e-08, + "loss": 0.0, + "num_input_tokens_seen": 130579496, + "step": 193785 + }, + { + "epoch": 4.734321940732416, + "grad_norm": 2.105771091009956e-05, + "learning_rate": 1.7155077209038416e-08, + "loss": 0.0, + "num_input_tokens_seen": 130582568, + "step": 193790 + }, + { + "epoch": 4.734444091564264, + "grad_norm": 0.008503005839884281, + "learning_rate": 1.713935270388367e-08, + "loss": 0.0, + "num_input_tokens_seen": 130585640, + "step": 193795 + }, + { + "epoch": 4.73456624239611, + "grad_norm": 7.453518628608435e-05, + "learning_rate": 1.712363534635597e-08, + "loss": 0.0, + "num_input_tokens_seen": 130588904, + "step": 193800 + }, + { + "epoch": 4.734688393227958, + "grad_norm": 0.00011131344945169985, + "learning_rate": 1.7107925136569557e-08, + "loss": 0.0, + "num_input_tokens_seen": 130592296, + "step": 193805 + }, + { + "epoch": 4.734810544059805, + "grad_norm": 4.882766370428726e-05, + "learning_rate": 1.7092222074638674e-08, + "loss": 0.0, + "num_input_tokens_seen": 130595432, + "step": 193810 + }, + { + "epoch": 4.734932694891652, + "grad_norm": 2.8152720915386453e-05, + "learning_rate": 1.7076526160677563e-08, + "loss": 0.0, + "num_input_tokens_seen": 130598568, + "step": 193815 + }, + { + "epoch": 4.735054845723499, + "grad_norm": 0.0005087078316137195, + "learning_rate": 1.7060837394800244e-08, + "loss": 0.0, + "num_input_tokens_seen": 130602856, + "step": 193820 + }, + { + "epoch": 4.735176996555347, + "grad_norm": 2.070388654829003e-05, + "learning_rate": 1.7045155777120844e-08, + "loss": 0.0, + "num_input_tokens_seen": 130606248, + "step": 193825 + }, + { + "epoch": 4.7352991473871935, + "grad_norm": 0.0008681362960487604, + "learning_rate": 1.7029481307753613e-08, + "loss": 0.0, + "num_input_tokens_seen": 130610408, + "step": 193830 + }, + { + "epoch": 4.735421298219041, + "grad_norm": 0.00028922062483616173, + "learning_rate": 1.7013813986812233e-08, + "loss": 0.0, + "num_input_tokens_seen": 130615080, + "step": 193835 + }, + { + "epoch": 4.735543449050888, + "grad_norm": 0.003609132720157504, + "learning_rate": 1.6998153814410943e-08, + "loss": 0.0, + "num_input_tokens_seen": 130618152, + "step": 193840 + }, + { + "epoch": 4.7356655998827355, + "grad_norm": 2.6319756216253154e-05, + "learning_rate": 1.6982500790663325e-08, + "loss": 0.0, + "num_input_tokens_seen": 130621672, + "step": 193845 + }, + { + "epoch": 4.735787750714582, + "grad_norm": 0.0005618033464998007, + "learning_rate": 1.6966854915683504e-08, + "loss": 0.0, + "num_input_tokens_seen": 130625064, + "step": 193850 + }, + { + "epoch": 4.73590990154643, + "grad_norm": 0.0007557669887319207, + "learning_rate": 1.6951216189585062e-08, + "loss": 0.0, + "num_input_tokens_seen": 130628456, + "step": 193855 + }, + { + "epoch": 4.736032052378277, + "grad_norm": 7.4730646701937076e-06, + "learning_rate": 1.693558461248168e-08, + "loss": 0.0, + "num_input_tokens_seen": 130631848, + "step": 193860 + }, + { + "epoch": 4.736154203210123, + "grad_norm": 0.00011738949979189783, + "learning_rate": 1.691996018448727e-08, + "loss": 0.0, + "num_input_tokens_seen": 130634920, + "step": 193865 + }, + { + "epoch": 4.736276354041971, + "grad_norm": 0.0007808083901181817, + "learning_rate": 1.6904342905715297e-08, + "loss": 0.0, + "num_input_tokens_seen": 130638120, + "step": 193870 + }, + { + "epoch": 4.736398504873819, + "grad_norm": 0.003294591326266527, + "learning_rate": 1.6888732776279336e-08, + "loss": 0.0, + "num_input_tokens_seen": 130641448, + "step": 193875 + }, + { + "epoch": 4.736520655705665, + "grad_norm": 7.634532084921375e-05, + "learning_rate": 1.6873129796292964e-08, + "loss": 0.0, + "num_input_tokens_seen": 130645096, + "step": 193880 + }, + { + "epoch": 4.736642806537512, + "grad_norm": 0.0010532429441809654, + "learning_rate": 1.685753396586953e-08, + "loss": 0.0, + "num_input_tokens_seen": 130648744, + "step": 193885 + }, + { + "epoch": 4.73676495736936, + "grad_norm": 0.00015523310867138207, + "learning_rate": 1.6841945285122727e-08, + "loss": 0.0, + "num_input_tokens_seen": 130652200, + "step": 193890 + }, + { + "epoch": 4.7368871082012065, + "grad_norm": 0.0006120402249507606, + "learning_rate": 1.6826363754165573e-08, + "loss": 0.0, + "num_input_tokens_seen": 130655528, + "step": 193895 + }, + { + "epoch": 4.737009259033054, + "grad_norm": 0.015912186354398727, + "learning_rate": 1.6810789373111644e-08, + "loss": 0.0, + "num_input_tokens_seen": 130658472, + "step": 193900 + }, + { + "epoch": 4.737131409864901, + "grad_norm": 0.0005159526481293142, + "learning_rate": 1.6795222142073962e-08, + "loss": 0.0, + "num_input_tokens_seen": 130661928, + "step": 193905 + }, + { + "epoch": 4.7372535606967485, + "grad_norm": 0.00624240655452013, + "learning_rate": 1.677966206116599e-08, + "loss": 0.0, + "num_input_tokens_seen": 130665448, + "step": 193910 + }, + { + "epoch": 4.737375711528595, + "grad_norm": 6.810051127104089e-05, + "learning_rate": 1.676410913050086e-08, + "loss": 0.0, + "num_input_tokens_seen": 130668712, + "step": 193915 + }, + { + "epoch": 4.737497862360443, + "grad_norm": 0.01036494504660368, + "learning_rate": 1.674856335019137e-08, + "loss": 0.0, + "num_input_tokens_seen": 130671976, + "step": 193920 + }, + { + "epoch": 4.73762001319229, + "grad_norm": 0.0002825463598128408, + "learning_rate": 1.6733024720350987e-08, + "loss": 0.0, + "num_input_tokens_seen": 130675240, + "step": 193925 + }, + { + "epoch": 4.737742164024137, + "grad_norm": 0.00041512216557748616, + "learning_rate": 1.6717493241092396e-08, + "loss": 0.0, + "num_input_tokens_seen": 130678376, + "step": 193930 + }, + { + "epoch": 4.737864314855984, + "grad_norm": 0.002276372630149126, + "learning_rate": 1.670196891252873e-08, + "loss": 0.0, + "num_input_tokens_seen": 130681768, + "step": 193935 + }, + { + "epoch": 4.737986465687832, + "grad_norm": 0.0005208499496802688, + "learning_rate": 1.668645173477279e-08, + "loss": 0.0, + "num_input_tokens_seen": 130684968, + "step": 193940 + }, + { + "epoch": 4.738108616519678, + "grad_norm": 0.0005167789640836418, + "learning_rate": 1.667094170793748e-08, + "loss": 0.0, + "num_input_tokens_seen": 130688744, + "step": 193945 + }, + { + "epoch": 4.738230767351526, + "grad_norm": 0.0034957481548190117, + "learning_rate": 1.6655438832135494e-08, + "loss": 0.0, + "num_input_tokens_seen": 130692392, + "step": 193950 + }, + { + "epoch": 4.738352918183373, + "grad_norm": 0.0005267342203296721, + "learning_rate": 1.6639943107479627e-08, + "loss": 0.0, + "num_input_tokens_seen": 130695976, + "step": 193955 + }, + { + "epoch": 4.7384750690152195, + "grad_norm": 0.0006037270068190992, + "learning_rate": 1.6624454534082678e-08, + "loss": 0.0, + "num_input_tokens_seen": 130699560, + "step": 193960 + }, + { + "epoch": 4.738597219847067, + "grad_norm": 0.00028693300555460155, + "learning_rate": 1.6608973112057113e-08, + "loss": 0.0, + "num_input_tokens_seen": 130703528, + "step": 193965 + }, + { + "epoch": 4.738719370678915, + "grad_norm": 0.01057329960167408, + "learning_rate": 1.659349884151573e-08, + "loss": 0.0, + "num_input_tokens_seen": 130706920, + "step": 193970 + }, + { + "epoch": 4.7388415215107615, + "grad_norm": 0.715209424495697, + "learning_rate": 1.6578031722570774e-08, + "loss": 0.0004, + "num_input_tokens_seen": 130710376, + "step": 193975 + }, + { + "epoch": 4.738963672342608, + "grad_norm": 1.2409607734298334e-05, + "learning_rate": 1.656257175533493e-08, + "loss": 0.0, + "num_input_tokens_seen": 130713704, + "step": 193980 + }, + { + "epoch": 4.739085823174456, + "grad_norm": 3.912892134394497e-05, + "learning_rate": 1.6547118939920556e-08, + "loss": 0.0, + "num_input_tokens_seen": 130717032, + "step": 193985 + }, + { + "epoch": 4.739207974006303, + "grad_norm": 0.0005390921141952276, + "learning_rate": 1.6531673276440118e-08, + "loss": 0.0, + "num_input_tokens_seen": 130720936, + "step": 193990 + }, + { + "epoch": 4.73933012483815, + "grad_norm": 0.00012799096293747425, + "learning_rate": 1.6516234765005855e-08, + "loss": 0.0001, + "num_input_tokens_seen": 130724136, + "step": 193995 + }, + { + "epoch": 4.739452275669997, + "grad_norm": 0.0004156142531428486, + "learning_rate": 1.6500803405730013e-08, + "loss": 0.0, + "num_input_tokens_seen": 130727784, + "step": 194000 + }, + { + "epoch": 4.739574426501845, + "grad_norm": 3.3402313420083374e-05, + "learning_rate": 1.6485379198724948e-08, + "loss": 0.0, + "num_input_tokens_seen": 130730856, + "step": 194005 + }, + { + "epoch": 4.739696577333691, + "grad_norm": 8.93677570275031e-05, + "learning_rate": 1.6469962144102568e-08, + "loss": 0.0, + "num_input_tokens_seen": 130734312, + "step": 194010 + }, + { + "epoch": 4.739818728165539, + "grad_norm": 1.1265015018580016e-05, + "learning_rate": 1.645455224197534e-08, + "loss": 0.0, + "num_input_tokens_seen": 130737256, + "step": 194015 + }, + { + "epoch": 4.739940878997386, + "grad_norm": 0.00013452736311592162, + "learning_rate": 1.6439149492455172e-08, + "loss": 0.0, + "num_input_tokens_seen": 130740456, + "step": 194020 + }, + { + "epoch": 4.740063029829233, + "grad_norm": 0.00022618900402449071, + "learning_rate": 1.642375389565387e-08, + "loss": 0.0, + "num_input_tokens_seen": 130743528, + "step": 194025 + }, + { + "epoch": 4.74018518066108, + "grad_norm": 0.003155721351504326, + "learning_rate": 1.640836545168378e-08, + "loss": 0.0, + "num_input_tokens_seen": 130746792, + "step": 194030 + }, + { + "epoch": 4.740307331492928, + "grad_norm": 0.0001418334140907973, + "learning_rate": 1.6392984160656486e-08, + "loss": 0.0, + "num_input_tokens_seen": 130750056, + "step": 194035 + }, + { + "epoch": 4.740429482324775, + "grad_norm": 0.00012222133227624, + "learning_rate": 1.6377610022683897e-08, + "loss": 0.0, + "num_input_tokens_seen": 130752744, + "step": 194040 + }, + { + "epoch": 4.740551633156622, + "grad_norm": 6.382190622389317e-05, + "learning_rate": 1.6362243037878032e-08, + "loss": 0.0, + "num_input_tokens_seen": 130755944, + "step": 194045 + }, + { + "epoch": 4.740673783988469, + "grad_norm": 16.01801300048828, + "learning_rate": 1.634688320635047e-08, + "loss": 0.0403, + "num_input_tokens_seen": 130759464, + "step": 194050 + }, + { + "epoch": 4.740795934820316, + "grad_norm": 9.348007733933628e-05, + "learning_rate": 1.6331530528212902e-08, + "loss": 0.0, + "num_input_tokens_seen": 130762984, + "step": 194055 + }, + { + "epoch": 4.740918085652163, + "grad_norm": 0.00011072187771787867, + "learning_rate": 1.6316185003577008e-08, + "loss": 0.0, + "num_input_tokens_seen": 130766248, + "step": 194060 + }, + { + "epoch": 4.74104023648401, + "grad_norm": 0.09227142482995987, + "learning_rate": 1.630084663255449e-08, + "loss": 0.0, + "num_input_tokens_seen": 130769448, + "step": 194065 + }, + { + "epoch": 4.741162387315858, + "grad_norm": 5.0311602535657585e-05, + "learning_rate": 1.628551541525669e-08, + "loss": 0.0, + "num_input_tokens_seen": 130773032, + "step": 194070 + }, + { + "epoch": 4.7412845381477045, + "grad_norm": 0.009662486612796783, + "learning_rate": 1.6270191351795194e-08, + "loss": 0.0, + "num_input_tokens_seen": 130776424, + "step": 194075 + }, + { + "epoch": 4.741406688979552, + "grad_norm": 0.00038499830407090485, + "learning_rate": 1.6254874442281574e-08, + "loss": 0.0, + "num_input_tokens_seen": 130779944, + "step": 194080 + }, + { + "epoch": 4.741528839811399, + "grad_norm": 0.01203479990363121, + "learning_rate": 1.623956468682708e-08, + "loss": 0.0, + "num_input_tokens_seen": 130783144, + "step": 194085 + }, + { + "epoch": 4.7416509906432465, + "grad_norm": 0.0028149220161139965, + "learning_rate": 1.6224262085543063e-08, + "loss": 0.0, + "num_input_tokens_seen": 130786216, + "step": 194090 + }, + { + "epoch": 4.741773141475093, + "grad_norm": 0.00014468298468273133, + "learning_rate": 1.6208966638540766e-08, + "loss": 0.0, + "num_input_tokens_seen": 130789416, + "step": 194095 + }, + { + "epoch": 4.741895292306941, + "grad_norm": 0.0025662481784820557, + "learning_rate": 1.619367834593155e-08, + "loss": 0.0, + "num_input_tokens_seen": 130792680, + "step": 194100 + }, + { + "epoch": 4.742017443138788, + "grad_norm": 0.0013403160264715552, + "learning_rate": 1.6178397207826434e-08, + "loss": 0.0, + "num_input_tokens_seen": 130795944, + "step": 194105 + }, + { + "epoch": 4.742139593970635, + "grad_norm": 0.00011099289258709177, + "learning_rate": 1.616312322433666e-08, + "loss": 0.0, + "num_input_tokens_seen": 130799400, + "step": 194110 + }, + { + "epoch": 4.742261744802482, + "grad_norm": 0.0006016991101205349, + "learning_rate": 1.6147856395573258e-08, + "loss": 0.0, + "num_input_tokens_seen": 130802984, + "step": 194115 + }, + { + "epoch": 4.74238389563433, + "grad_norm": 0.04025644809007645, + "learning_rate": 1.613259672164735e-08, + "loss": 0.0, + "num_input_tokens_seen": 130807016, + "step": 194120 + }, + { + "epoch": 4.742506046466176, + "grad_norm": 0.0006900572334416211, + "learning_rate": 1.611734420266986e-08, + "loss": 0.0, + "num_input_tokens_seen": 130810344, + "step": 194125 + }, + { + "epoch": 4.742628197298024, + "grad_norm": 0.026209458708763123, + "learning_rate": 1.6102098838751465e-08, + "loss": 0.0477, + "num_input_tokens_seen": 130813928, + "step": 194130 + }, + { + "epoch": 4.742750348129871, + "grad_norm": 0.002796885557472706, + "learning_rate": 1.6086860630003418e-08, + "loss": 0.0, + "num_input_tokens_seen": 130817064, + "step": 194135 + }, + { + "epoch": 4.742872498961718, + "grad_norm": 0.0003261294914409518, + "learning_rate": 1.6071629576536295e-08, + "loss": 0.0, + "num_input_tokens_seen": 130820456, + "step": 194140 + }, + { + "epoch": 4.742994649793565, + "grad_norm": 0.0002879606618080288, + "learning_rate": 1.6056405678460892e-08, + "loss": 0.0, + "num_input_tokens_seen": 130823848, + "step": 194145 + }, + { + "epoch": 4.743116800625412, + "grad_norm": 0.08680712431669235, + "learning_rate": 1.604118893588802e-08, + "loss": 0.0, + "num_input_tokens_seen": 130827240, + "step": 194150 + }, + { + "epoch": 4.7432389514572595, + "grad_norm": 4.954704490955919e-05, + "learning_rate": 1.6025979348928242e-08, + "loss": 0.0, + "num_input_tokens_seen": 130830952, + "step": 194155 + }, + { + "epoch": 4.743361102289106, + "grad_norm": 0.0006787815364077687, + "learning_rate": 1.601077691769226e-08, + "loss": 0.0, + "num_input_tokens_seen": 130834088, + "step": 194160 + }, + { + "epoch": 4.743483253120954, + "grad_norm": 0.00010076801117975265, + "learning_rate": 1.5995581642290534e-08, + "loss": 0.0, + "num_input_tokens_seen": 130837352, + "step": 194165 + }, + { + "epoch": 4.743605403952801, + "grad_norm": 1.2453545423340984e-05, + "learning_rate": 1.5980393522833536e-08, + "loss": 0.0, + "num_input_tokens_seen": 130840296, + "step": 194170 + }, + { + "epoch": 4.743727554784648, + "grad_norm": 0.0006471985834650695, + "learning_rate": 1.596521255943184e-08, + "loss": 0.0, + "num_input_tokens_seen": 130844200, + "step": 194175 + }, + { + "epoch": 4.743849705616495, + "grad_norm": 0.000188805308425799, + "learning_rate": 1.5950038752195806e-08, + "loss": 0.0, + "num_input_tokens_seen": 130847208, + "step": 194180 + }, + { + "epoch": 4.743971856448343, + "grad_norm": 0.0007714568055234849, + "learning_rate": 1.5934872101235785e-08, + "loss": 0.0, + "num_input_tokens_seen": 130851368, + "step": 194185 + }, + { + "epoch": 4.744094007280189, + "grad_norm": 4.891970456810668e-05, + "learning_rate": 1.5919712606662027e-08, + "loss": 0.0, + "num_input_tokens_seen": 130854568, + "step": 194190 + }, + { + "epoch": 4.744216158112037, + "grad_norm": 0.00020988327742088586, + "learning_rate": 1.590456026858478e-08, + "loss": 0.0, + "num_input_tokens_seen": 130858600, + "step": 194195 + }, + { + "epoch": 4.744338308943884, + "grad_norm": 0.00017921268590725958, + "learning_rate": 1.588941508711428e-08, + "loss": 0.0, + "num_input_tokens_seen": 130861864, + "step": 194200 + }, + { + "epoch": 4.744460459775731, + "grad_norm": 0.0008576527470722795, + "learning_rate": 1.5874277062360663e-08, + "loss": 0.0, + "num_input_tokens_seen": 130865192, + "step": 194205 + }, + { + "epoch": 4.744582610607578, + "grad_norm": 0.0070258015766739845, + "learning_rate": 1.5859146194433958e-08, + "loss": 0.0, + "num_input_tokens_seen": 130868392, + "step": 194210 + }, + { + "epoch": 4.744704761439426, + "grad_norm": 9.018840501084924e-05, + "learning_rate": 1.5844022483444296e-08, + "loss": 0.0, + "num_input_tokens_seen": 130871592, + "step": 194215 + }, + { + "epoch": 4.7448269122712725, + "grad_norm": 0.0017428853316232562, + "learning_rate": 1.582890592950159e-08, + "loss": 0.0, + "num_input_tokens_seen": 130874664, + "step": 194220 + }, + { + "epoch": 4.744949063103119, + "grad_norm": 0.007411367725580931, + "learning_rate": 1.581379653271586e-08, + "loss": 0.0, + "num_input_tokens_seen": 130878056, + "step": 194225 + }, + { + "epoch": 4.745071213934967, + "grad_norm": 0.001655631116591394, + "learning_rate": 1.57986942931968e-08, + "loss": 0.0, + "num_input_tokens_seen": 130881768, + "step": 194230 + }, + { + "epoch": 4.7451933647668145, + "grad_norm": 0.0011050160974264145, + "learning_rate": 1.5783599211054434e-08, + "loss": 0.0, + "num_input_tokens_seen": 130885224, + "step": 194235 + }, + { + "epoch": 4.745315515598661, + "grad_norm": 0.0002407036372460425, + "learning_rate": 1.5768511286398446e-08, + "loss": 0.0, + "num_input_tokens_seen": 130888424, + "step": 194240 + }, + { + "epoch": 4.745437666430508, + "grad_norm": 0.0037414035759866238, + "learning_rate": 1.575343051933853e-08, + "loss": 0.0, + "num_input_tokens_seen": 130891624, + "step": 194245 + }, + { + "epoch": 4.745559817262356, + "grad_norm": 0.00037726483424194157, + "learning_rate": 1.5738356909984372e-08, + "loss": 0.0, + "num_input_tokens_seen": 130895016, + "step": 194250 + }, + { + "epoch": 4.745681968094202, + "grad_norm": 0.0006118064629845321, + "learning_rate": 1.572329045844578e-08, + "loss": 0.0, + "num_input_tokens_seen": 130898856, + "step": 194255 + }, + { + "epoch": 4.74580411892605, + "grad_norm": 1.8054362953989767e-05, + "learning_rate": 1.5708231164831998e-08, + "loss": 0.0, + "num_input_tokens_seen": 130902056, + "step": 194260 + }, + { + "epoch": 4.745926269757897, + "grad_norm": 0.00450882688164711, + "learning_rate": 1.569317902925271e-08, + "loss": 0.0, + "num_input_tokens_seen": 130905384, + "step": 194265 + }, + { + "epoch": 4.746048420589744, + "grad_norm": 0.0017727892845869064, + "learning_rate": 1.5678134051817392e-08, + "loss": 0.0, + "num_input_tokens_seen": 130908712, + "step": 194270 + }, + { + "epoch": 4.746170571421591, + "grad_norm": 0.002703641774132848, + "learning_rate": 1.56630962326354e-08, + "loss": 0.0, + "num_input_tokens_seen": 130911976, + "step": 194275 + }, + { + "epoch": 4.746292722253439, + "grad_norm": 0.002856372855603695, + "learning_rate": 1.5648065571816193e-08, + "loss": 0.0, + "num_input_tokens_seen": 130914728, + "step": 194280 + }, + { + "epoch": 4.7464148730852855, + "grad_norm": 0.00012437388068065047, + "learning_rate": 1.5633042069469025e-08, + "loss": 0.0, + "num_input_tokens_seen": 130917992, + "step": 194285 + }, + { + "epoch": 4.746537023917133, + "grad_norm": 0.0003391630307305604, + "learning_rate": 1.561802572570303e-08, + "loss": 0.0, + "num_input_tokens_seen": 130921512, + "step": 194290 + }, + { + "epoch": 4.74665917474898, + "grad_norm": 4.5554650569101796e-05, + "learning_rate": 1.560301654062768e-08, + "loss": 0.0, + "num_input_tokens_seen": 130925224, + "step": 194295 + }, + { + "epoch": 4.7467813255808275, + "grad_norm": 2.3404105377267115e-05, + "learning_rate": 1.5588014514351766e-08, + "loss": 0.0, + "num_input_tokens_seen": 130928424, + "step": 194300 + }, + { + "epoch": 4.746903476412674, + "grad_norm": 0.0001046815377776511, + "learning_rate": 1.5573019646984765e-08, + "loss": 0.0001, + "num_input_tokens_seen": 130932520, + "step": 194305 + }, + { + "epoch": 4.747025627244522, + "grad_norm": 0.0001049903585226275, + "learning_rate": 1.5558031938635474e-08, + "loss": 0.0, + "num_input_tokens_seen": 130936168, + "step": 194310 + }, + { + "epoch": 4.747147778076369, + "grad_norm": 0.000262533692875877, + "learning_rate": 1.554305138941292e-08, + "loss": 0.0002, + "num_input_tokens_seen": 130939240, + "step": 194315 + }, + { + "epoch": 4.747269928908215, + "grad_norm": 0.000540116336196661, + "learning_rate": 1.5528077999426125e-08, + "loss": 0.0, + "num_input_tokens_seen": 130943080, + "step": 194320 + }, + { + "epoch": 4.747392079740063, + "grad_norm": 0.0008610247168689966, + "learning_rate": 1.5513111768784004e-08, + "loss": 0.0, + "num_input_tokens_seen": 130946536, + "step": 194325 + }, + { + "epoch": 4.747514230571911, + "grad_norm": 0.000537598563823849, + "learning_rate": 1.5498152697595245e-08, + "loss": 0.0, + "num_input_tokens_seen": 130950312, + "step": 194330 + }, + { + "epoch": 4.747636381403757, + "grad_norm": 0.00010188839951297268, + "learning_rate": 1.5483200785968765e-08, + "loss": 0.0, + "num_input_tokens_seen": 130954088, + "step": 194335 + }, + { + "epoch": 4.747758532235604, + "grad_norm": 1.9151226297253743e-05, + "learning_rate": 1.546825603401325e-08, + "loss": 0.0, + "num_input_tokens_seen": 130957608, + "step": 194340 + }, + { + "epoch": 4.747880683067452, + "grad_norm": 0.0001951848535099998, + "learning_rate": 1.5453318441837282e-08, + "loss": 0.0, + "num_input_tokens_seen": 130961384, + "step": 194345 + }, + { + "epoch": 4.748002833899299, + "grad_norm": 0.00010421617480460554, + "learning_rate": 1.5438388009549665e-08, + "loss": 0.0, + "num_input_tokens_seen": 130965096, + "step": 194350 + }, + { + "epoch": 4.748124984731146, + "grad_norm": 1.662445538386237e-05, + "learning_rate": 1.5423464737258974e-08, + "loss": 0.0, + "num_input_tokens_seen": 130968488, + "step": 194355 + }, + { + "epoch": 4.748247135562993, + "grad_norm": 0.0002314087760169059, + "learning_rate": 1.540854862507357e-08, + "loss": 0.0, + "num_input_tokens_seen": 130972520, + "step": 194360 + }, + { + "epoch": 4.748369286394841, + "grad_norm": 0.18845084309577942, + "learning_rate": 1.5393639673102033e-08, + "loss": 0.0001, + "num_input_tokens_seen": 130975720, + "step": 194365 + }, + { + "epoch": 4.748491437226687, + "grad_norm": 0.0035586815793067217, + "learning_rate": 1.537873788145283e-08, + "loss": 0.0, + "num_input_tokens_seen": 130978600, + "step": 194370 + }, + { + "epoch": 4.748613588058535, + "grad_norm": 0.0011052679037675261, + "learning_rate": 1.536384325023421e-08, + "loss": 0.0, + "num_input_tokens_seen": 130982120, + "step": 194375 + }, + { + "epoch": 4.748735738890382, + "grad_norm": 0.0009073576657101512, + "learning_rate": 1.534895577955464e-08, + "loss": 0.0, + "num_input_tokens_seen": 130985384, + "step": 194380 + }, + { + "epoch": 4.748857889722229, + "grad_norm": 0.004358983598649502, + "learning_rate": 1.5334075469522146e-08, + "loss": 0.0, + "num_input_tokens_seen": 130988968, + "step": 194385 + }, + { + "epoch": 4.748980040554076, + "grad_norm": 8.133276423905045e-05, + "learning_rate": 1.5319202320245305e-08, + "loss": 0.0, + "num_input_tokens_seen": 130992616, + "step": 194390 + }, + { + "epoch": 4.749102191385924, + "grad_norm": 0.029877588152885437, + "learning_rate": 1.5304336331831924e-08, + "loss": 0.0, + "num_input_tokens_seen": 130996072, + "step": 194395 + }, + { + "epoch": 4.7492243422177705, + "grad_norm": 0.02860754169523716, + "learning_rate": 1.5289477504390358e-08, + "loss": 0.0, + "num_input_tokens_seen": 130999144, + "step": 194400 + }, + { + "epoch": 4.749346493049618, + "grad_norm": 0.00028780565480701625, + "learning_rate": 1.527462583802852e-08, + "loss": 0.0, + "num_input_tokens_seen": 131002408, + "step": 194405 + }, + { + "epoch": 4.749468643881465, + "grad_norm": 0.0009346139850094914, + "learning_rate": 1.5259781332854436e-08, + "loss": 0.0, + "num_input_tokens_seen": 131005864, + "step": 194410 + }, + { + "epoch": 4.749590794713312, + "grad_norm": 0.0005739172920584679, + "learning_rate": 1.5244943988976135e-08, + "loss": 0.0, + "num_input_tokens_seen": 131009256, + "step": 194415 + }, + { + "epoch": 4.749712945545159, + "grad_norm": 0.00032403404475189745, + "learning_rate": 1.523011380650141e-08, + "loss": 0.0, + "num_input_tokens_seen": 131012904, + "step": 194420 + }, + { + "epoch": 4.749835096377006, + "grad_norm": 0.00260849273763597, + "learning_rate": 1.521529078553818e-08, + "loss": 0.0, + "num_input_tokens_seen": 131016232, + "step": 194425 + }, + { + "epoch": 4.749957247208854, + "grad_norm": 8.902177796699107e-06, + "learning_rate": 1.5200474926194363e-08, + "loss": 0.0006, + "num_input_tokens_seen": 131019624, + "step": 194430 + }, + { + "epoch": 4.7500793980407, + "grad_norm": 0.0015526276547461748, + "learning_rate": 1.518566622857742e-08, + "loss": 0.0058, + "num_input_tokens_seen": 131022824, + "step": 194435 + }, + { + "epoch": 4.750201548872548, + "grad_norm": 0.0007619461975991726, + "learning_rate": 1.5170864692795272e-08, + "loss": 0.0, + "num_input_tokens_seen": 131026792, + "step": 194440 + }, + { + "epoch": 4.750323699704395, + "grad_norm": 1.0548461432335898e-05, + "learning_rate": 1.5156070318955384e-08, + "loss": 0.0727, + "num_input_tokens_seen": 131029672, + "step": 194445 + }, + { + "epoch": 4.750348129870765, + "eval_loss": 0.3178049325942993, + "eval_runtime": 47.5919, + "eval_samples_per_second": 764.521, + "eval_steps_per_second": 95.584, + "num_input_tokens_seen": 131030440, + "step": 194446 + }, + { + "epoch": 4.750445850536242, + "grad_norm": 0.0001051591825671494, + "learning_rate": 1.514128310716556e-08, + "loss": 0.0, + "num_input_tokens_seen": 131033192, + "step": 194450 + }, + { + "epoch": 4.750568001368089, + "grad_norm": 7.242747233249247e-05, + "learning_rate": 1.512650305753316e-08, + "loss": 0.0, + "num_input_tokens_seen": 131036264, + "step": 194455 + }, + { + "epoch": 4.750690152199937, + "grad_norm": 0.00018513934628572315, + "learning_rate": 1.511173017016576e-08, + "loss": 0.0, + "num_input_tokens_seen": 131039144, + "step": 194460 + }, + { + "epoch": 4.7508123030317835, + "grad_norm": 0.0014160667778924108, + "learning_rate": 1.5096964445170723e-08, + "loss": 0.0, + "num_input_tokens_seen": 131042984, + "step": 194465 + }, + { + "epoch": 4.750934453863631, + "grad_norm": 0.0006216327892616391, + "learning_rate": 1.5082205882655518e-08, + "loss": 0.0, + "num_input_tokens_seen": 131046504, + "step": 194470 + }, + { + "epoch": 4.751056604695478, + "grad_norm": 7.211205956991762e-05, + "learning_rate": 1.506745448272728e-08, + "loss": 0.0, + "num_input_tokens_seen": 131049896, + "step": 194475 + }, + { + "epoch": 4.7511787555273255, + "grad_norm": 0.0027999139856547117, + "learning_rate": 1.5052710245493593e-08, + "loss": 0.0, + "num_input_tokens_seen": 131053288, + "step": 194480 + }, + { + "epoch": 4.751300906359172, + "grad_norm": 0.0013571522431448102, + "learning_rate": 1.503797317106148e-08, + "loss": 0.0, + "num_input_tokens_seen": 131056680, + "step": 194485 + }, + { + "epoch": 4.751423057191019, + "grad_norm": 0.0003400477871764451, + "learning_rate": 1.5023243259538078e-08, + "loss": 0.0, + "num_input_tokens_seen": 131060840, + "step": 194490 + }, + { + "epoch": 4.751545208022867, + "grad_norm": 0.004371670540422201, + "learning_rate": 1.5008520511030632e-08, + "loss": 0.0, + "num_input_tokens_seen": 131064104, + "step": 194495 + }, + { + "epoch": 4.751667358854714, + "grad_norm": 0.0003847415209747851, + "learning_rate": 1.499380492564617e-08, + "loss": 0.0, + "num_input_tokens_seen": 131067624, + "step": 194500 + }, + { + "epoch": 4.751789509686561, + "grad_norm": 0.0026928375009447336, + "learning_rate": 1.497909650349172e-08, + "loss": 0.0, + "num_input_tokens_seen": 131071144, + "step": 194505 + }, + { + "epoch": 4.751911660518408, + "grad_norm": 0.00046413298696279526, + "learning_rate": 1.4964395244674077e-08, + "loss": 0.0489, + "num_input_tokens_seen": 131074728, + "step": 194510 + }, + { + "epoch": 4.752033811350255, + "grad_norm": 0.0003477961290627718, + "learning_rate": 1.4949701149300385e-08, + "loss": 0.0, + "num_input_tokens_seen": 131077992, + "step": 194515 + }, + { + "epoch": 4.752155962182102, + "grad_norm": 2.7544947442947887e-05, + "learning_rate": 1.493501421747745e-08, + "loss": 0.0, + "num_input_tokens_seen": 131081192, + "step": 194520 + }, + { + "epoch": 4.75227811301395, + "grad_norm": 0.008760979399085045, + "learning_rate": 1.4920334449311957e-08, + "loss": 0.0, + "num_input_tokens_seen": 131084328, + "step": 194525 + }, + { + "epoch": 4.7524002638457965, + "grad_norm": 6.047858278179774e-06, + "learning_rate": 1.4905661844910934e-08, + "loss": 0.0, + "num_input_tokens_seen": 131087784, + "step": 194530 + }, + { + "epoch": 4.752522414677644, + "grad_norm": 0.00041074713226407766, + "learning_rate": 1.489099640438074e-08, + "loss": 0.0, + "num_input_tokens_seen": 131091112, + "step": 194535 + }, + { + "epoch": 4.752644565509491, + "grad_norm": 0.00014632332022301853, + "learning_rate": 1.4876338127828181e-08, + "loss": 0.0, + "num_input_tokens_seen": 131094248, + "step": 194540 + }, + { + "epoch": 4.7527667163413385, + "grad_norm": 0.0059760636650025845, + "learning_rate": 1.4861687015359947e-08, + "loss": 0.0, + "num_input_tokens_seen": 131097192, + "step": 194545 + }, + { + "epoch": 4.752888867173185, + "grad_norm": 7.05088023096323e-05, + "learning_rate": 1.4847043067082398e-08, + "loss": 0.0266, + "num_input_tokens_seen": 131100456, + "step": 194550 + }, + { + "epoch": 4.753011018005033, + "grad_norm": 3.3506712497910485e-06, + "learning_rate": 1.4832406283102228e-08, + "loss": 0.0313, + "num_input_tokens_seen": 131103720, + "step": 194555 + }, + { + "epoch": 4.75313316883688, + "grad_norm": 0.0002796973567456007, + "learning_rate": 1.4817776663525683e-08, + "loss": 0.0, + "num_input_tokens_seen": 131106920, + "step": 194560 + }, + { + "epoch": 4.753255319668727, + "grad_norm": 2.2053438442526385e-05, + "learning_rate": 1.4803154208459233e-08, + "loss": 0.0, + "num_input_tokens_seen": 131109992, + "step": 194565 + }, + { + "epoch": 4.753377470500574, + "grad_norm": 0.005914537236094475, + "learning_rate": 1.4788538918009242e-08, + "loss": 0.0, + "num_input_tokens_seen": 131113640, + "step": 194570 + }, + { + "epoch": 4.753499621332422, + "grad_norm": 0.0007829848327673972, + "learning_rate": 1.4773930792282064e-08, + "loss": 0.0, + "num_input_tokens_seen": 131116776, + "step": 194575 + }, + { + "epoch": 4.753621772164268, + "grad_norm": 0.00043016657582484186, + "learning_rate": 1.4759329831383837e-08, + "loss": 0.0, + "num_input_tokens_seen": 131120424, + "step": 194580 + }, + { + "epoch": 4.753743922996115, + "grad_norm": 0.0001220947888214141, + "learning_rate": 1.4744736035420702e-08, + "loss": 0.0, + "num_input_tokens_seen": 131123752, + "step": 194585 + }, + { + "epoch": 4.753866073827963, + "grad_norm": 6.0042086261091754e-05, + "learning_rate": 1.4730149404498905e-08, + "loss": 0.0, + "num_input_tokens_seen": 131126888, + "step": 194590 + }, + { + "epoch": 4.75398822465981, + "grad_norm": 0.0038243704475462437, + "learning_rate": 1.4715569938724359e-08, + "loss": 0.0, + "num_input_tokens_seen": 131130344, + "step": 194595 + }, + { + "epoch": 4.754110375491657, + "grad_norm": 7.801556785125285e-05, + "learning_rate": 1.4700997638203316e-08, + "loss": 0.0, + "num_input_tokens_seen": 131133800, + "step": 194600 + }, + { + "epoch": 4.754232526323504, + "grad_norm": 0.00026956311194226146, + "learning_rate": 1.468643250304158e-08, + "loss": 0.0, + "num_input_tokens_seen": 131136872, + "step": 194605 + }, + { + "epoch": 4.7543546771553515, + "grad_norm": 0.0015906771877780557, + "learning_rate": 1.4671874533345064e-08, + "loss": 0.0, + "num_input_tokens_seen": 131140200, + "step": 194610 + }, + { + "epoch": 4.754476827987198, + "grad_norm": 0.0002600103907752782, + "learning_rate": 1.4657323729219906e-08, + "loss": 0.0, + "num_input_tokens_seen": 131143912, + "step": 194615 + }, + { + "epoch": 4.754598978819046, + "grad_norm": 0.013368427753448486, + "learning_rate": 1.4642780090771467e-08, + "loss": 0.0, + "num_input_tokens_seen": 131147944, + "step": 194620 + }, + { + "epoch": 4.754721129650893, + "grad_norm": 0.00011110393825219944, + "learning_rate": 1.4628243618105996e-08, + "loss": 0.0, + "num_input_tokens_seen": 131151272, + "step": 194625 + }, + { + "epoch": 4.75484328048274, + "grad_norm": 0.001707794377580285, + "learning_rate": 1.4613714311328739e-08, + "loss": 0.0, + "num_input_tokens_seen": 131155112, + "step": 194630 + }, + { + "epoch": 4.754965431314587, + "grad_norm": 0.008413401432335377, + "learning_rate": 1.4599192170545838e-08, + "loss": 0.0, + "num_input_tokens_seen": 131158504, + "step": 194635 + }, + { + "epoch": 4.755087582146435, + "grad_norm": 0.0002867542498279363, + "learning_rate": 1.4584677195862538e-08, + "loss": 0.0, + "num_input_tokens_seen": 131161832, + "step": 194640 + }, + { + "epoch": 4.755209732978281, + "grad_norm": 0.004074745811522007, + "learning_rate": 1.4570169387384424e-08, + "loss": 0.0, + "num_input_tokens_seen": 131165864, + "step": 194645 + }, + { + "epoch": 4.755331883810129, + "grad_norm": 0.02270219847559929, + "learning_rate": 1.4555668745217186e-08, + "loss": 0.0, + "num_input_tokens_seen": 131169448, + "step": 194650 + }, + { + "epoch": 4.755454034641976, + "grad_norm": 4.968816938344389e-05, + "learning_rate": 1.4541175269466078e-08, + "loss": 0.0, + "num_input_tokens_seen": 131173096, + "step": 194655 + }, + { + "epoch": 4.755576185473823, + "grad_norm": 0.0008052657940424979, + "learning_rate": 1.4526688960236788e-08, + "loss": 0.0, + "num_input_tokens_seen": 131176232, + "step": 194660 + }, + { + "epoch": 4.75569833630567, + "grad_norm": 0.008516672067344189, + "learning_rate": 1.4512209817634235e-08, + "loss": 0.0, + "num_input_tokens_seen": 131179560, + "step": 194665 + }, + { + "epoch": 4.755820487137518, + "grad_norm": 0.0010770387016236782, + "learning_rate": 1.4497737841764114e-08, + "loss": 0.0, + "num_input_tokens_seen": 131182696, + "step": 194670 + }, + { + "epoch": 4.755942637969365, + "grad_norm": 0.0012830148916691542, + "learning_rate": 1.4483273032731447e-08, + "loss": 0.0, + "num_input_tokens_seen": 131186024, + "step": 194675 + }, + { + "epoch": 4.756064788801211, + "grad_norm": 5.277749369270168e-05, + "learning_rate": 1.4468815390641486e-08, + "loss": 0.0, + "num_input_tokens_seen": 131189416, + "step": 194680 + }, + { + "epoch": 4.756186939633059, + "grad_norm": 2.9087057555443607e-05, + "learning_rate": 1.4454364915599482e-08, + "loss": 0.0, + "num_input_tokens_seen": 131193064, + "step": 194685 + }, + { + "epoch": 4.756309090464906, + "grad_norm": 0.00029995886143296957, + "learning_rate": 1.4439921607710348e-08, + "loss": 0.0, + "num_input_tokens_seen": 131196392, + "step": 194690 + }, + { + "epoch": 4.756431241296753, + "grad_norm": 0.04478108510375023, + "learning_rate": 1.4425485467079113e-08, + "loss": 0.0, + "num_input_tokens_seen": 131199528, + "step": 194695 + }, + { + "epoch": 4.7565533921286, + "grad_norm": 0.00012941205932293087, + "learning_rate": 1.4411056493810913e-08, + "loss": 0.0, + "num_input_tokens_seen": 131203176, + "step": 194700 + }, + { + "epoch": 4.756675542960448, + "grad_norm": 0.010789011605083942, + "learning_rate": 1.4396634688010556e-08, + "loss": 0.0, + "num_input_tokens_seen": 131206248, + "step": 194705 + }, + { + "epoch": 4.7567976937922944, + "grad_norm": 0.001222478342242539, + "learning_rate": 1.4382220049783068e-08, + "loss": 0.0, + "num_input_tokens_seen": 131210088, + "step": 194710 + }, + { + "epoch": 4.756919844624142, + "grad_norm": 0.00024161135661415756, + "learning_rate": 1.436781257923303e-08, + "loss": 0.0, + "num_input_tokens_seen": 131213224, + "step": 194715 + }, + { + "epoch": 4.757041995455989, + "grad_norm": 0.03183262422680855, + "learning_rate": 1.4353412276465471e-08, + "loss": 0.0, + "num_input_tokens_seen": 131216488, + "step": 194720 + }, + { + "epoch": 4.7571641462878365, + "grad_norm": 0.0014592782827094197, + "learning_rate": 1.4339019141584973e-08, + "loss": 0.0, + "num_input_tokens_seen": 131219944, + "step": 194725 + }, + { + "epoch": 4.757286297119683, + "grad_norm": 0.00017539637337904423, + "learning_rate": 1.4324633174696343e-08, + "loss": 0.0, + "num_input_tokens_seen": 131225256, + "step": 194730 + }, + { + "epoch": 4.757408447951531, + "grad_norm": 2.7212019631406292e-05, + "learning_rate": 1.4310254375903941e-08, + "loss": 0.0, + "num_input_tokens_seen": 131228712, + "step": 194735 + }, + { + "epoch": 4.757530598783378, + "grad_norm": 0.0010347808711230755, + "learning_rate": 1.429588274531257e-08, + "loss": 0.0, + "num_input_tokens_seen": 131231912, + "step": 194740 + }, + { + "epoch": 4.757652749615225, + "grad_norm": 0.9352474212646484, + "learning_rate": 1.4281518283026595e-08, + "loss": 0.0004, + "num_input_tokens_seen": 131234984, + "step": 194745 + }, + { + "epoch": 4.757774900447072, + "grad_norm": 5.460303509607911e-05, + "learning_rate": 1.4267160989150595e-08, + "loss": 0.0, + "num_input_tokens_seen": 131237992, + "step": 194750 + }, + { + "epoch": 4.75789705127892, + "grad_norm": 0.0440170057117939, + "learning_rate": 1.4252810863788932e-08, + "loss": 0.0, + "num_input_tokens_seen": 131240744, + "step": 194755 + }, + { + "epoch": 4.758019202110766, + "grad_norm": 0.0032277258578687906, + "learning_rate": 1.4238467907045971e-08, + "loss": 0.0, + "num_input_tokens_seen": 131243752, + "step": 194760 + }, + { + "epoch": 4.758141352942614, + "grad_norm": 8.80181833053939e-05, + "learning_rate": 1.4224132119025956e-08, + "loss": 0.0, + "num_input_tokens_seen": 131246952, + "step": 194765 + }, + { + "epoch": 4.758263503774461, + "grad_norm": 8.081180567387491e-05, + "learning_rate": 1.420980349983325e-08, + "loss": 0.0, + "num_input_tokens_seen": 131250024, + "step": 194770 + }, + { + "epoch": 4.7583856546063075, + "grad_norm": 2.834494262060616e-05, + "learning_rate": 1.4195482049571993e-08, + "loss": 0.0, + "num_input_tokens_seen": 131253032, + "step": 194775 + }, + { + "epoch": 4.758507805438155, + "grad_norm": 4.941626684740186e-05, + "learning_rate": 1.4181167768346324e-08, + "loss": 0.0, + "num_input_tokens_seen": 131256360, + "step": 194780 + }, + { + "epoch": 4.758629956270002, + "grad_norm": 8.536856330465525e-05, + "learning_rate": 1.4166860656260271e-08, + "loss": 0.0, + "num_input_tokens_seen": 131259880, + "step": 194785 + }, + { + "epoch": 4.7587521071018495, + "grad_norm": 0.00026171712670475245, + "learning_rate": 1.4152560713418082e-08, + "loss": 0.0, + "num_input_tokens_seen": 131263464, + "step": 194790 + }, + { + "epoch": 4.758874257933696, + "grad_norm": 0.007480157073587179, + "learning_rate": 1.4138267939923565e-08, + "loss": 0.0, + "num_input_tokens_seen": 131266984, + "step": 194795 + }, + { + "epoch": 4.758996408765544, + "grad_norm": 5.9159250668017194e-05, + "learning_rate": 1.4123982335880746e-08, + "loss": 0.0, + "num_input_tokens_seen": 131270184, + "step": 194800 + }, + { + "epoch": 4.759118559597391, + "grad_norm": 0.004247388802468777, + "learning_rate": 1.4109703901393543e-08, + "loss": 0.0, + "num_input_tokens_seen": 131274536, + "step": 194805 + }, + { + "epoch": 4.759240710429238, + "grad_norm": 0.006303729489445686, + "learning_rate": 1.4095432636565763e-08, + "loss": 0.0, + "num_input_tokens_seen": 131278056, + "step": 194810 + }, + { + "epoch": 4.759362861261085, + "grad_norm": 2.7018289983971044e-05, + "learning_rate": 1.4081168541501099e-08, + "loss": 0.0, + "num_input_tokens_seen": 131281512, + "step": 194815 + }, + { + "epoch": 4.759485012092933, + "grad_norm": 9.589582623448223e-05, + "learning_rate": 1.4066911616303357e-08, + "loss": 0.0, + "num_input_tokens_seen": 131285096, + "step": 194820 + }, + { + "epoch": 4.759607162924779, + "grad_norm": 0.0005603454192169011, + "learning_rate": 1.4052661861076232e-08, + "loss": 0.0, + "num_input_tokens_seen": 131288680, + "step": 194825 + }, + { + "epoch": 4.759729313756627, + "grad_norm": 0.000639497535303235, + "learning_rate": 1.4038419275923419e-08, + "loss": 0.0, + "num_input_tokens_seen": 131291496, + "step": 194830 + }, + { + "epoch": 4.759851464588474, + "grad_norm": 0.00018495744734536856, + "learning_rate": 1.402418386094828e-08, + "loss": 0.0, + "num_input_tokens_seen": 131294440, + "step": 194835 + }, + { + "epoch": 4.759973615420321, + "grad_norm": 0.0032376840244978666, + "learning_rate": 1.400995561625451e-08, + "loss": 0.0, + "num_input_tokens_seen": 131297960, + "step": 194840 + }, + { + "epoch": 4.760095766252168, + "grad_norm": 4.197334055788815e-05, + "learning_rate": 1.3995734541945692e-08, + "loss": 0.0, + "num_input_tokens_seen": 131301480, + "step": 194845 + }, + { + "epoch": 4.760217917084015, + "grad_norm": 0.0002773664309643209, + "learning_rate": 1.3981520638124855e-08, + "loss": 0.0, + "num_input_tokens_seen": 131304872, + "step": 194850 + }, + { + "epoch": 4.7603400679158625, + "grad_norm": 0.0001326696656178683, + "learning_rate": 1.3967313904895805e-08, + "loss": 0.0, + "num_input_tokens_seen": 131308392, + "step": 194855 + }, + { + "epoch": 4.76046221874771, + "grad_norm": 2.312642573087942e-05, + "learning_rate": 1.3953114342361571e-08, + "loss": 0.001, + "num_input_tokens_seen": 131311912, + "step": 194860 + }, + { + "epoch": 4.760584369579557, + "grad_norm": 0.02015681564807892, + "learning_rate": 1.3938921950625515e-08, + "loss": 0.0, + "num_input_tokens_seen": 131315368, + "step": 194865 + }, + { + "epoch": 4.760706520411404, + "grad_norm": 0.0017596333054825664, + "learning_rate": 1.3924736729790775e-08, + "loss": 0.0, + "num_input_tokens_seen": 131319592, + "step": 194870 + }, + { + "epoch": 4.760828671243251, + "grad_norm": 0.00010401655890746042, + "learning_rate": 1.3910558679960715e-08, + "loss": 0.0, + "num_input_tokens_seen": 131323624, + "step": 194875 + }, + { + "epoch": 4.760950822075098, + "grad_norm": 0.0013329308712854981, + "learning_rate": 1.3896387801238141e-08, + "loss": 0.0087, + "num_input_tokens_seen": 131327400, + "step": 194880 + }, + { + "epoch": 4.761072972906946, + "grad_norm": 0.0004021950880996883, + "learning_rate": 1.3882224093726302e-08, + "loss": 0.0, + "num_input_tokens_seen": 131330600, + "step": 194885 + }, + { + "epoch": 4.761195123738792, + "grad_norm": 0.025195332244038582, + "learning_rate": 1.3868067557528228e-08, + "loss": 0.0, + "num_input_tokens_seen": 131333800, + "step": 194890 + }, + { + "epoch": 4.76131727457064, + "grad_norm": 0.00035623961593955755, + "learning_rate": 1.3853918192746839e-08, + "loss": 0.0, + "num_input_tokens_seen": 131336936, + "step": 194895 + }, + { + "epoch": 4.761439425402487, + "grad_norm": 7.331221422646195e-05, + "learning_rate": 1.3839775999484938e-08, + "loss": 0.0, + "num_input_tokens_seen": 131340456, + "step": 194900 + }, + { + "epoch": 4.761561576234334, + "grad_norm": 0.008651269599795341, + "learning_rate": 1.3825640977845333e-08, + "loss": 0.0, + "num_input_tokens_seen": 131344168, + "step": 194905 + }, + { + "epoch": 4.761683727066181, + "grad_norm": 0.00015639110642950982, + "learning_rate": 1.3811513127931052e-08, + "loss": 0.0, + "num_input_tokens_seen": 131347752, + "step": 194910 + }, + { + "epoch": 4.761805877898029, + "grad_norm": 0.39226239919662476, + "learning_rate": 1.379739244984468e-08, + "loss": 0.0002, + "num_input_tokens_seen": 131350696, + "step": 194915 + }, + { + "epoch": 4.7619280287298755, + "grad_norm": 5.454105485114269e-05, + "learning_rate": 1.3783278943688914e-08, + "loss": 0.0, + "num_input_tokens_seen": 131354216, + "step": 194920 + }, + { + "epoch": 4.762050179561723, + "grad_norm": 0.006486842874437571, + "learning_rate": 1.3769172609566337e-08, + "loss": 0.0, + "num_input_tokens_seen": 131357352, + "step": 194925 + }, + { + "epoch": 4.76217233039357, + "grad_norm": 0.00033026799792423844, + "learning_rate": 1.3755073447579646e-08, + "loss": 0.0, + "num_input_tokens_seen": 131360360, + "step": 194930 + }, + { + "epoch": 4.7622944812254175, + "grad_norm": 8.527412137482315e-05, + "learning_rate": 1.3740981457831424e-08, + "loss": 0.0, + "num_input_tokens_seen": 131363688, + "step": 194935 + }, + { + "epoch": 4.762416632057264, + "grad_norm": 0.00025074477889575064, + "learning_rate": 1.3726896640423924e-08, + "loss": 0.0, + "num_input_tokens_seen": 131367016, + "step": 194940 + }, + { + "epoch": 4.762538782889111, + "grad_norm": 0.0016728693153709173, + "learning_rate": 1.3712818995459841e-08, + "loss": 0.0, + "num_input_tokens_seen": 131370600, + "step": 194945 + }, + { + "epoch": 4.762660933720959, + "grad_norm": 0.0001442194334231317, + "learning_rate": 1.3698748523041314e-08, + "loss": 0.0, + "num_input_tokens_seen": 131373864, + "step": 194950 + }, + { + "epoch": 4.762783084552806, + "grad_norm": 4.660491686081514e-05, + "learning_rate": 1.368468522327082e-08, + "loss": 0.0, + "num_input_tokens_seen": 131377384, + "step": 194955 + }, + { + "epoch": 4.762905235384653, + "grad_norm": 9.689110811450519e-06, + "learning_rate": 1.3670629096250496e-08, + "loss": 0.0, + "num_input_tokens_seen": 131380520, + "step": 194960 + }, + { + "epoch": 4.7630273862165, + "grad_norm": 0.0014319338370114565, + "learning_rate": 1.3656580142082707e-08, + "loss": 0.0, + "num_input_tokens_seen": 131383912, + "step": 194965 + }, + { + "epoch": 4.763149537048347, + "grad_norm": 0.00041627997416071594, + "learning_rate": 1.3642538360869593e-08, + "loss": 0.0, + "num_input_tokens_seen": 131387240, + "step": 194970 + }, + { + "epoch": 4.763271687880194, + "grad_norm": 0.007283204235136509, + "learning_rate": 1.3628503752713183e-08, + "loss": 0.0, + "num_input_tokens_seen": 131390312, + "step": 194975 + }, + { + "epoch": 4.763393838712042, + "grad_norm": 0.0030106627382338047, + "learning_rate": 1.3614476317715618e-08, + "loss": 0.0, + "num_input_tokens_seen": 131393704, + "step": 194980 + }, + { + "epoch": 4.7635159895438886, + "grad_norm": 0.0012613601284101605, + "learning_rate": 1.3600456055978926e-08, + "loss": 0.0, + "num_input_tokens_seen": 131397160, + "step": 194985 + }, + { + "epoch": 4.763638140375736, + "grad_norm": 8.995272219181061e-05, + "learning_rate": 1.3586442967604916e-08, + "loss": 0.0, + "num_input_tokens_seen": 131400872, + "step": 194990 + }, + { + "epoch": 4.763760291207583, + "grad_norm": 0.00010999714868376032, + "learning_rate": 1.3572437052695729e-08, + "loss": 0.0, + "num_input_tokens_seen": 131403944, + "step": 194995 + }, + { + "epoch": 4.763882442039431, + "grad_norm": 0.001251238165423274, + "learning_rate": 1.355843831135306e-08, + "loss": 0.0, + "num_input_tokens_seen": 131407144, + "step": 195000 + }, + { + "epoch": 4.764004592871277, + "grad_norm": 4.152482870267704e-05, + "learning_rate": 1.3544446743678717e-08, + "loss": 0.0, + "num_input_tokens_seen": 131410728, + "step": 195005 + }, + { + "epoch": 4.764126743703125, + "grad_norm": 0.0001953756291186437, + "learning_rate": 1.3530462349774508e-08, + "loss": 0.0, + "num_input_tokens_seen": 131414440, + "step": 195010 + }, + { + "epoch": 4.764248894534972, + "grad_norm": 0.0006240036454983056, + "learning_rate": 1.3516485129742018e-08, + "loss": 0.0, + "num_input_tokens_seen": 131417896, + "step": 195015 + }, + { + "epoch": 4.764371045366819, + "grad_norm": 0.16515390574932098, + "learning_rate": 1.3502515083683164e-08, + "loss": 0.0, + "num_input_tokens_seen": 131420968, + "step": 195020 + }, + { + "epoch": 4.764493196198666, + "grad_norm": 0.0005905067082494497, + "learning_rate": 1.34885522116992e-08, + "loss": 0.0, + "num_input_tokens_seen": 131424104, + "step": 195025 + }, + { + "epoch": 4.764615347030514, + "grad_norm": 0.0006180317723192275, + "learning_rate": 1.3474596513891935e-08, + "loss": 0.0, + "num_input_tokens_seen": 131427304, + "step": 195030 + }, + { + "epoch": 4.7647374978623604, + "grad_norm": 0.008717228658497334, + "learning_rate": 1.3460647990362617e-08, + "loss": 0.0, + "num_input_tokens_seen": 131430760, + "step": 195035 + }, + { + "epoch": 4.764859648694207, + "grad_norm": 0.0009299147059209645, + "learning_rate": 1.3446706641212946e-08, + "loss": 0.0, + "num_input_tokens_seen": 131433768, + "step": 195040 + }, + { + "epoch": 4.764981799526055, + "grad_norm": 0.0001631972409086302, + "learning_rate": 1.3432772466544062e-08, + "loss": 0.0, + "num_input_tokens_seen": 131437096, + "step": 195045 + }, + { + "epoch": 4.765103950357902, + "grad_norm": 8.587415504734963e-05, + "learning_rate": 1.341884546645744e-08, + "loss": 0.0, + "num_input_tokens_seen": 131440296, + "step": 195050 + }, + { + "epoch": 4.765226101189749, + "grad_norm": 5.65875307074748e-05, + "learning_rate": 1.3404925641054331e-08, + "loss": 0.0, + "num_input_tokens_seen": 131443752, + "step": 195055 + }, + { + "epoch": 4.765348252021596, + "grad_norm": 4.379829624667764e-05, + "learning_rate": 1.3391012990436101e-08, + "loss": 0.0039, + "num_input_tokens_seen": 131446952, + "step": 195060 + }, + { + "epoch": 4.765470402853444, + "grad_norm": 0.0006057017599232495, + "learning_rate": 1.3377107514703667e-08, + "loss": 0.0, + "num_input_tokens_seen": 131450536, + "step": 195065 + }, + { + "epoch": 4.76559255368529, + "grad_norm": 6.288501026574522e-05, + "learning_rate": 1.3363209213958282e-08, + "loss": 0.0, + "num_input_tokens_seen": 131454184, + "step": 195070 + }, + { + "epoch": 4.765714704517138, + "grad_norm": 0.024181053042411804, + "learning_rate": 1.3349318088300976e-08, + "loss": 0.0, + "num_input_tokens_seen": 131457320, + "step": 195075 + }, + { + "epoch": 4.765836855348985, + "grad_norm": 0.0012301692040637136, + "learning_rate": 1.333543413783289e-08, + "loss": 0.0, + "num_input_tokens_seen": 131460520, + "step": 195080 + }, + { + "epoch": 4.765959006180832, + "grad_norm": 0.00588964531198144, + "learning_rate": 1.3321557362654833e-08, + "loss": 0.0, + "num_input_tokens_seen": 131463912, + "step": 195085 + }, + { + "epoch": 4.766081157012679, + "grad_norm": 0.0011522286804392934, + "learning_rate": 1.3307687762867836e-08, + "loss": 0.0, + "num_input_tokens_seen": 131467304, + "step": 195090 + }, + { + "epoch": 4.766203307844527, + "grad_norm": 0.026447875425219536, + "learning_rate": 1.3293825338572706e-08, + "loss": 0.0, + "num_input_tokens_seen": 131470888, + "step": 195095 + }, + { + "epoch": 4.7663254586763735, + "grad_norm": 0.00015927547065075487, + "learning_rate": 1.3279970089870251e-08, + "loss": 0.0, + "num_input_tokens_seen": 131474472, + "step": 195100 + }, + { + "epoch": 4.766447609508221, + "grad_norm": 0.0005589300417341292, + "learning_rate": 1.3266122016861392e-08, + "loss": 0.0, + "num_input_tokens_seen": 131477736, + "step": 195105 + }, + { + "epoch": 4.766569760340068, + "grad_norm": 0.0003874331305269152, + "learning_rate": 1.3252281119646491e-08, + "loss": 0.0, + "num_input_tokens_seen": 131481256, + "step": 195110 + }, + { + "epoch": 4.766691911171915, + "grad_norm": 0.00016673367645125836, + "learning_rate": 1.323844739832658e-08, + "loss": 0.0, + "num_input_tokens_seen": 131484072, + "step": 195115 + }, + { + "epoch": 4.766814062003762, + "grad_norm": 0.042626719921827316, + "learning_rate": 1.3224620853001911e-08, + "loss": 0.0, + "num_input_tokens_seen": 131486952, + "step": 195120 + }, + { + "epoch": 4.76693621283561, + "grad_norm": 0.000655999465379864, + "learning_rate": 1.3210801483773404e-08, + "loss": 0.0, + "num_input_tokens_seen": 131490344, + "step": 195125 + }, + { + "epoch": 4.767058363667457, + "grad_norm": 0.0005754061858169734, + "learning_rate": 1.3196989290741201e-08, + "loss": 0.0, + "num_input_tokens_seen": 131493864, + "step": 195130 + }, + { + "epoch": 4.767180514499303, + "grad_norm": 0.00032430619467049837, + "learning_rate": 1.3183184274005888e-08, + "loss": 0.0, + "num_input_tokens_seen": 131497576, + "step": 195135 + }, + { + "epoch": 4.767302665331151, + "grad_norm": 0.0011158830020576715, + "learning_rate": 1.316938643366805e-08, + "loss": 0.0, + "num_input_tokens_seen": 131500840, + "step": 195140 + }, + { + "epoch": 4.767424816162998, + "grad_norm": 3.952207407564856e-05, + "learning_rate": 1.3155595769827721e-08, + "loss": 0.0, + "num_input_tokens_seen": 131504168, + "step": 195145 + }, + { + "epoch": 4.767546966994845, + "grad_norm": 0.00012400586274452507, + "learning_rate": 1.3141812282585374e-08, + "loss": 0.0, + "num_input_tokens_seen": 131507432, + "step": 195150 + }, + { + "epoch": 4.767669117826692, + "grad_norm": 1.1847202586068306e-05, + "learning_rate": 1.3128035972041262e-08, + "loss": 0.0, + "num_input_tokens_seen": 131510888, + "step": 195155 + }, + { + "epoch": 4.76779126865854, + "grad_norm": 0.0014117275131866336, + "learning_rate": 1.3114266838295418e-08, + "loss": 0.0, + "num_input_tokens_seen": 131514408, + "step": 195160 + }, + { + "epoch": 4.7679134194903865, + "grad_norm": 0.0005267454544082284, + "learning_rate": 1.3100504881448093e-08, + "loss": 0.0, + "num_input_tokens_seen": 131517800, + "step": 195165 + }, + { + "epoch": 4.768035570322234, + "grad_norm": 0.0010505430400371552, + "learning_rate": 1.308675010159932e-08, + "loss": 0.0, + "num_input_tokens_seen": 131521384, + "step": 195170 + }, + { + "epoch": 4.768157721154081, + "grad_norm": 0.00037277350202202797, + "learning_rate": 1.307300249884924e-08, + "loss": 0.0, + "num_input_tokens_seen": 131524392, + "step": 195175 + }, + { + "epoch": 4.7682798719859285, + "grad_norm": 0.029164448380470276, + "learning_rate": 1.305926207329766e-08, + "loss": 0.0, + "num_input_tokens_seen": 131528168, + "step": 195180 + }, + { + "epoch": 4.768402022817775, + "grad_norm": 4.427357271197252e-05, + "learning_rate": 1.3045528825044615e-08, + "loss": 0.0, + "num_input_tokens_seen": 131531368, + "step": 195185 + }, + { + "epoch": 4.768524173649623, + "grad_norm": 2.8566098990268074e-05, + "learning_rate": 1.3031802754189913e-08, + "loss": 0.0, + "num_input_tokens_seen": 131534376, + "step": 195190 + }, + { + "epoch": 4.76864632448147, + "grad_norm": 3.755874058697373e-05, + "learning_rate": 1.3018083860833362e-08, + "loss": 0.0004, + "num_input_tokens_seen": 131537448, + "step": 195195 + }, + { + "epoch": 4.768768475313317, + "grad_norm": 2.5464059945079498e-05, + "learning_rate": 1.3004372145074883e-08, + "loss": 0.0, + "num_input_tokens_seen": 131540584, + "step": 195200 + }, + { + "epoch": 4.768890626145164, + "grad_norm": 0.0004309054056648165, + "learning_rate": 1.299066760701395e-08, + "loss": 0.0, + "num_input_tokens_seen": 131544360, + "step": 195205 + }, + { + "epoch": 4.769012776977011, + "grad_norm": 0.0006671411683782935, + "learning_rate": 1.2976970246750484e-08, + "loss": 0.0, + "num_input_tokens_seen": 131547816, + "step": 195210 + }, + { + "epoch": 4.769134927808858, + "grad_norm": 0.00020214584947098047, + "learning_rate": 1.2963280064383853e-08, + "loss": 0.0, + "num_input_tokens_seen": 131550824, + "step": 195215 + }, + { + "epoch": 4.769257078640706, + "grad_norm": 0.0008767805411480367, + "learning_rate": 1.2949597060013862e-08, + "loss": 0.0, + "num_input_tokens_seen": 131554280, + "step": 195220 + }, + { + "epoch": 4.769379229472553, + "grad_norm": 0.0031209036242216825, + "learning_rate": 1.2935921233739766e-08, + "loss": 0.0, + "num_input_tokens_seen": 131557480, + "step": 195225 + }, + { + "epoch": 4.7695013803043995, + "grad_norm": 0.0023440788500010967, + "learning_rate": 1.2922252585661153e-08, + "loss": 0.0, + "num_input_tokens_seen": 131560872, + "step": 195230 + }, + { + "epoch": 4.769623531136247, + "grad_norm": 4.385735155665316e-05, + "learning_rate": 1.2908591115877499e-08, + "loss": 0.0, + "num_input_tokens_seen": 131563944, + "step": 195235 + }, + { + "epoch": 4.769745681968094, + "grad_norm": 0.002352142706513405, + "learning_rate": 1.2894936824487945e-08, + "loss": 0.0, + "num_input_tokens_seen": 131567080, + "step": 195240 + }, + { + "epoch": 4.7698678327999415, + "grad_norm": 3.444106914685108e-05, + "learning_rate": 1.288128971159197e-08, + "loss": 0.0, + "num_input_tokens_seen": 131570408, + "step": 195245 + }, + { + "epoch": 4.769989983631788, + "grad_norm": 0.00010280736751155928, + "learning_rate": 1.2867649777288826e-08, + "loss": 0.0, + "num_input_tokens_seen": 131573608, + "step": 195250 + }, + { + "epoch": 4.770112134463636, + "grad_norm": 0.01051387283951044, + "learning_rate": 1.2854017021677543e-08, + "loss": 0.0, + "num_input_tokens_seen": 131578984, + "step": 195255 + }, + { + "epoch": 4.770234285295483, + "grad_norm": 0.00026178904226981103, + "learning_rate": 1.284039144485749e-08, + "loss": 0.0, + "num_input_tokens_seen": 131582120, + "step": 195260 + }, + { + "epoch": 4.77035643612733, + "grad_norm": 0.0002798176428768784, + "learning_rate": 1.2826773046927475e-08, + "loss": 0.0, + "num_input_tokens_seen": 131585448, + "step": 195265 + }, + { + "epoch": 4.770478586959177, + "grad_norm": 4.130896195420064e-05, + "learning_rate": 1.2813161827986752e-08, + "loss": 0.0, + "num_input_tokens_seen": 131589160, + "step": 195270 + }, + { + "epoch": 4.770600737791025, + "grad_norm": 0.0005915540968999267, + "learning_rate": 1.2799557788134241e-08, + "loss": 0.0, + "num_input_tokens_seen": 131592168, + "step": 195275 + }, + { + "epoch": 4.770722888622871, + "grad_norm": 0.00042665572254918516, + "learning_rate": 1.2785960927468863e-08, + "loss": 0.0, + "num_input_tokens_seen": 131595176, + "step": 195280 + }, + { + "epoch": 4.770845039454719, + "grad_norm": 7.571290916530415e-05, + "learning_rate": 1.2772371246089431e-08, + "loss": 0.0, + "num_input_tokens_seen": 131598376, + "step": 195285 + }, + { + "epoch": 4.770967190286566, + "grad_norm": 0.0002932634379249066, + "learning_rate": 1.2758788744094862e-08, + "loss": 0.0, + "num_input_tokens_seen": 131602984, + "step": 195290 + }, + { + "epoch": 4.771089341118413, + "grad_norm": 0.0005674429121427238, + "learning_rate": 1.2745213421583967e-08, + "loss": 0.0, + "num_input_tokens_seen": 131606248, + "step": 195295 + }, + { + "epoch": 4.77121149195026, + "grad_norm": 0.0014069292228668928, + "learning_rate": 1.2731645278655445e-08, + "loss": 0.0, + "num_input_tokens_seen": 131609512, + "step": 195300 + }, + { + "epoch": 4.771333642782107, + "grad_norm": 0.023213129490613937, + "learning_rate": 1.2718084315407995e-08, + "loss": 0.0, + "num_input_tokens_seen": 131612968, + "step": 195305 + }, + { + "epoch": 4.7714557936139546, + "grad_norm": 7.675900997128338e-05, + "learning_rate": 1.2704530531939982e-08, + "loss": 0.0, + "num_input_tokens_seen": 131616808, + "step": 195310 + }, + { + "epoch": 4.771577944445801, + "grad_norm": 0.00011649368389043957, + "learning_rate": 1.269098392835033e-08, + "loss": 0.0, + "num_input_tokens_seen": 131620008, + "step": 195315 + }, + { + "epoch": 4.771700095277649, + "grad_norm": 0.0007854030118323863, + "learning_rate": 1.26774445047374e-08, + "loss": 0.0, + "num_input_tokens_seen": 131623720, + "step": 195320 + }, + { + "epoch": 4.771822246109496, + "grad_norm": 0.05870318040251732, + "learning_rate": 1.266391226119956e-08, + "loss": 0.0, + "num_input_tokens_seen": 131627176, + "step": 195325 + }, + { + "epoch": 4.771944396941343, + "grad_norm": 6.638450577156618e-05, + "learning_rate": 1.2650387197835399e-08, + "loss": 0.0, + "num_input_tokens_seen": 131630888, + "step": 195330 + }, + { + "epoch": 4.77206654777319, + "grad_norm": 0.0003788423491641879, + "learning_rate": 1.2636869314743059e-08, + "loss": 0.0, + "num_input_tokens_seen": 131634536, + "step": 195335 + }, + { + "epoch": 4.772188698605038, + "grad_norm": 0.0008416434284299612, + "learning_rate": 1.2623358612021128e-08, + "loss": 0.0001, + "num_input_tokens_seen": 131637736, + "step": 195340 + }, + { + "epoch": 4.772310849436884, + "grad_norm": 0.0003930965031031519, + "learning_rate": 1.260985508976764e-08, + "loss": 0.0, + "num_input_tokens_seen": 131641000, + "step": 195345 + }, + { + "epoch": 4.772433000268732, + "grad_norm": 0.0031450181268155575, + "learning_rate": 1.2596358748080848e-08, + "loss": 0.0, + "num_input_tokens_seen": 131644712, + "step": 195350 + }, + { + "epoch": 4.772555151100579, + "grad_norm": 1.319029524893267e-05, + "learning_rate": 1.2582869587059008e-08, + "loss": 0.0, + "num_input_tokens_seen": 131648104, + "step": 195355 + }, + { + "epoch": 4.7726773019324265, + "grad_norm": 0.0007021019700914621, + "learning_rate": 1.2569387606800041e-08, + "loss": 0.0, + "num_input_tokens_seen": 131651368, + "step": 195360 + }, + { + "epoch": 4.772799452764273, + "grad_norm": 0.0006127048982307315, + "learning_rate": 1.2555912807402092e-08, + "loss": 0.0, + "num_input_tokens_seen": 131654632, + "step": 195365 + }, + { + "epoch": 4.772921603596121, + "grad_norm": 0.0014195807743817568, + "learning_rate": 1.254244518896308e-08, + "loss": 0.0, + "num_input_tokens_seen": 131657768, + "step": 195370 + }, + { + "epoch": 4.773043754427968, + "grad_norm": 4.549880395643413e-05, + "learning_rate": 1.2528984751581151e-08, + "loss": 0.0, + "num_input_tokens_seen": 131661160, + "step": 195375 + }, + { + "epoch": 4.773165905259814, + "grad_norm": 0.00033199405879713595, + "learning_rate": 1.2515531495353893e-08, + "loss": 0.0, + "num_input_tokens_seen": 131664808, + "step": 195380 + }, + { + "epoch": 4.773288056091662, + "grad_norm": 0.00022149684082251042, + "learning_rate": 1.2502085420379337e-08, + "loss": 0.0, + "num_input_tokens_seen": 131667688, + "step": 195385 + }, + { + "epoch": 4.77341020692351, + "grad_norm": 7.733409438515082e-05, + "learning_rate": 1.2488646526755187e-08, + "loss": 0.0, + "num_input_tokens_seen": 131671080, + "step": 195390 + }, + { + "epoch": 4.773532357755356, + "grad_norm": 0.0018248482374474406, + "learning_rate": 1.2475214814579248e-08, + "loss": 0.0, + "num_input_tokens_seen": 131674152, + "step": 195395 + }, + { + "epoch": 4.773654508587203, + "grad_norm": 8.610795339336619e-05, + "learning_rate": 1.2461790283949115e-08, + "loss": 0.0, + "num_input_tokens_seen": 131677416, + "step": 195400 + }, + { + "epoch": 4.773776659419051, + "grad_norm": 0.06222959980368614, + "learning_rate": 1.244837293496248e-08, + "loss": 0.0, + "num_input_tokens_seen": 131680616, + "step": 195405 + }, + { + "epoch": 4.7738988102508975, + "grad_norm": 0.003180848667398095, + "learning_rate": 1.2434962767716939e-08, + "loss": 0.0, + "num_input_tokens_seen": 131683944, + "step": 195410 + }, + { + "epoch": 4.774020961082745, + "grad_norm": 0.00010856594599317759, + "learning_rate": 1.2421559782309966e-08, + "loss": 0.0, + "num_input_tokens_seen": 131686952, + "step": 195415 + }, + { + "epoch": 4.774143111914592, + "grad_norm": 0.00012096684804419056, + "learning_rate": 1.2408163978839036e-08, + "loss": 0.0, + "num_input_tokens_seen": 131689896, + "step": 195420 + }, + { + "epoch": 4.7742652627464395, + "grad_norm": 0.000282303401036188, + "learning_rate": 1.2394775357401521e-08, + "loss": 0.0, + "num_input_tokens_seen": 131693608, + "step": 195425 + }, + { + "epoch": 4.774387413578286, + "grad_norm": 0.0012512014945968986, + "learning_rate": 1.2381393918094784e-08, + "loss": 0.0348, + "num_input_tokens_seen": 131697064, + "step": 195430 + }, + { + "epoch": 4.774509564410134, + "grad_norm": 0.0003305374411866069, + "learning_rate": 1.2368019661016304e-08, + "loss": 0.0, + "num_input_tokens_seen": 131700072, + "step": 195435 + }, + { + "epoch": 4.774631715241981, + "grad_norm": 0.0003950181417167187, + "learning_rate": 1.2354652586263226e-08, + "loss": 0.0, + "num_input_tokens_seen": 131703144, + "step": 195440 + }, + { + "epoch": 4.774753866073828, + "grad_norm": 0.0002270180848427117, + "learning_rate": 1.2341292693932692e-08, + "loss": 0.0, + "num_input_tokens_seen": 131706664, + "step": 195445 + }, + { + "epoch": 4.774876016905675, + "grad_norm": 0.0008159330463968217, + "learning_rate": 1.232793998412196e-08, + "loss": 0.0, + "num_input_tokens_seen": 131709992, + "step": 195450 + }, + { + "epoch": 4.774998167737523, + "grad_norm": 0.0028800824657082558, + "learning_rate": 1.2314594456928063e-08, + "loss": 0.0, + "num_input_tokens_seen": 131713576, + "step": 195455 + }, + { + "epoch": 4.775120318569369, + "grad_norm": 0.14265161752700806, + "learning_rate": 1.2301256112448144e-08, + "loss": 0.0, + "num_input_tokens_seen": 131716584, + "step": 195460 + }, + { + "epoch": 4.775242469401217, + "grad_norm": 6.738114461768419e-05, + "learning_rate": 1.2287924950779238e-08, + "loss": 0.0, + "num_input_tokens_seen": 131719912, + "step": 195465 + }, + { + "epoch": 4.775364620233064, + "grad_norm": 0.0014600591966882348, + "learning_rate": 1.2274600972018046e-08, + "loss": 0.0, + "num_input_tokens_seen": 131723688, + "step": 195470 + }, + { + "epoch": 4.7754867710649105, + "grad_norm": 0.08191387355327606, + "learning_rate": 1.226128417626171e-08, + "loss": 0.0, + "num_input_tokens_seen": 131726888, + "step": 195475 + }, + { + "epoch": 4.775608921896758, + "grad_norm": 0.00014762043429072946, + "learning_rate": 1.2247974563606823e-08, + "loss": 0.0, + "num_input_tokens_seen": 131730024, + "step": 195480 + }, + { + "epoch": 4.775731072728606, + "grad_norm": 0.0026744746137410402, + "learning_rate": 1.2234672134150525e-08, + "loss": 0.0, + "num_input_tokens_seen": 131733032, + "step": 195485 + }, + { + "epoch": 4.7758532235604525, + "grad_norm": 0.02477974258363247, + "learning_rate": 1.2221376887989298e-08, + "loss": 0.0, + "num_input_tokens_seen": 131736808, + "step": 195490 + }, + { + "epoch": 4.775975374392299, + "grad_norm": 7.066877151373774e-05, + "learning_rate": 1.2208088825219954e-08, + "loss": 0.0, + "num_input_tokens_seen": 131740328, + "step": 195495 + }, + { + "epoch": 4.776097525224147, + "grad_norm": 0.006642943248152733, + "learning_rate": 1.2194807945938967e-08, + "loss": 0.0, + "num_input_tokens_seen": 131743592, + "step": 195500 + }, + { + "epoch": 4.776219676055994, + "grad_norm": 0.0001629340258659795, + "learning_rate": 1.2181534250243041e-08, + "loss": 0.0, + "num_input_tokens_seen": 131746984, + "step": 195505 + }, + { + "epoch": 4.776341826887841, + "grad_norm": 0.0013602145481854677, + "learning_rate": 1.2168267738228765e-08, + "loss": 0.0, + "num_input_tokens_seen": 131750248, + "step": 195510 + }, + { + "epoch": 4.776463977719688, + "grad_norm": 0.04610900953412056, + "learning_rate": 1.2155008409992507e-08, + "loss": 0.0, + "num_input_tokens_seen": 131753768, + "step": 195515 + }, + { + "epoch": 4.776586128551536, + "grad_norm": 0.0009138291934505105, + "learning_rate": 1.2141756265630742e-08, + "loss": 0.0, + "num_input_tokens_seen": 131756904, + "step": 195520 + }, + { + "epoch": 4.776708279383382, + "grad_norm": 0.004156212322413921, + "learning_rate": 1.212851130523973e-08, + "loss": 0.0, + "num_input_tokens_seen": 131760296, + "step": 195525 + }, + { + "epoch": 4.77683043021523, + "grad_norm": 0.0008036352810449898, + "learning_rate": 1.211527352891606e-08, + "loss": 0.0, + "num_input_tokens_seen": 131763688, + "step": 195530 + }, + { + "epoch": 4.776952581047077, + "grad_norm": 0.0006484703626483679, + "learning_rate": 1.2102042936755652e-08, + "loss": 0.0, + "num_input_tokens_seen": 131767400, + "step": 195535 + }, + { + "epoch": 4.777074731878924, + "grad_norm": 3.612769432947971e-05, + "learning_rate": 1.2088819528854988e-08, + "loss": 0.0, + "num_input_tokens_seen": 131770920, + "step": 195540 + }, + { + "epoch": 4.777196882710771, + "grad_norm": 5.983481969451532e-05, + "learning_rate": 1.2075603305310211e-08, + "loss": 0.0, + "num_input_tokens_seen": 131774056, + "step": 195545 + }, + { + "epoch": 4.777319033542619, + "grad_norm": 0.006965314038097858, + "learning_rate": 1.2062394266217246e-08, + "loss": 0.0001, + "num_input_tokens_seen": 131777640, + "step": 195550 + }, + { + "epoch": 4.7774411843744655, + "grad_norm": 0.0002887119189836085, + "learning_rate": 1.2049192411672348e-08, + "loss": 0.0, + "num_input_tokens_seen": 131781032, + "step": 195555 + }, + { + "epoch": 4.777563335206313, + "grad_norm": 5.628536200674716e-06, + "learning_rate": 1.2035997741771442e-08, + "loss": 0.0, + "num_input_tokens_seen": 131784168, + "step": 195560 + }, + { + "epoch": 4.77768548603816, + "grad_norm": 6.626116373809054e-05, + "learning_rate": 1.202281025661045e-08, + "loss": 0.0, + "num_input_tokens_seen": 131787496, + "step": 195565 + }, + { + "epoch": 4.777807636870007, + "grad_norm": 0.0005148024065420032, + "learning_rate": 1.2009629956285405e-08, + "loss": 0.0, + "num_input_tokens_seen": 131790888, + "step": 195570 + }, + { + "epoch": 4.777929787701854, + "grad_norm": 0.00042017377563752234, + "learning_rate": 1.1996456840892011e-08, + "loss": 0.0, + "num_input_tokens_seen": 131793768, + "step": 195575 + }, + { + "epoch": 4.778051938533701, + "grad_norm": 0.002864888869225979, + "learning_rate": 1.1983290910526079e-08, + "loss": 0.0, + "num_input_tokens_seen": 131797416, + "step": 195580 + }, + { + "epoch": 4.778174089365549, + "grad_norm": 0.0014279955066740513, + "learning_rate": 1.197013216528342e-08, + "loss": 0.0, + "num_input_tokens_seen": 131800744, + "step": 195585 + }, + { + "epoch": 4.778296240197395, + "grad_norm": 8.794532732281368e-06, + "learning_rate": 1.1956980605259737e-08, + "loss": 0.0, + "num_input_tokens_seen": 131803944, + "step": 195590 + }, + { + "epoch": 4.778418391029243, + "grad_norm": 0.0006799935363233089, + "learning_rate": 1.1943836230550619e-08, + "loss": 0.0, + "num_input_tokens_seen": 131807208, + "step": 195595 + }, + { + "epoch": 4.77854054186109, + "grad_norm": 6.255536573007703e-05, + "learning_rate": 1.1930699041251657e-08, + "loss": 0.0, + "num_input_tokens_seen": 131810856, + "step": 195600 + }, + { + "epoch": 4.778662692692937, + "grad_norm": 0.0006646077963523567, + "learning_rate": 1.1917569037458553e-08, + "loss": 0.0007, + "num_input_tokens_seen": 131813800, + "step": 195605 + }, + { + "epoch": 4.778784843524784, + "grad_norm": 0.00029163400176912546, + "learning_rate": 1.1904446219266451e-08, + "loss": 0.0, + "num_input_tokens_seen": 131817384, + "step": 195610 + }, + { + "epoch": 4.778906994356632, + "grad_norm": 0.00019608518050517887, + "learning_rate": 1.1891330586771165e-08, + "loss": 0.0, + "num_input_tokens_seen": 131820648, + "step": 195615 + }, + { + "epoch": 4.7790291451884785, + "grad_norm": 4.607576920534484e-05, + "learning_rate": 1.1878222140067729e-08, + "loss": 0.0, + "num_input_tokens_seen": 131824168, + "step": 195620 + }, + { + "epoch": 4.779151296020326, + "grad_norm": 0.00015413403161801398, + "learning_rate": 1.1865120879251734e-08, + "loss": 0.0, + "num_input_tokens_seen": 131827240, + "step": 195625 + }, + { + "epoch": 4.779273446852173, + "grad_norm": 0.0007240389240905643, + "learning_rate": 1.1852026804418325e-08, + "loss": 0.0, + "num_input_tokens_seen": 131830184, + "step": 195630 + }, + { + "epoch": 4.779395597684021, + "grad_norm": 1.851908746175468e-05, + "learning_rate": 1.1838939915662761e-08, + "loss": 0.0, + "num_input_tokens_seen": 131833384, + "step": 195635 + }, + { + "epoch": 4.779517748515867, + "grad_norm": 0.005462095607072115, + "learning_rate": 1.1825860213080185e-08, + "loss": 0.0, + "num_input_tokens_seen": 131836968, + "step": 195640 + }, + { + "epoch": 4.779639899347715, + "grad_norm": 0.000327201618347317, + "learning_rate": 1.1812787696765747e-08, + "loss": 0.0, + "num_input_tokens_seen": 131840872, + "step": 195645 + }, + { + "epoch": 4.779762050179562, + "grad_norm": 9.929361112881452e-05, + "learning_rate": 1.1799722366814591e-08, + "loss": 0.0, + "num_input_tokens_seen": 131844008, + "step": 195650 + }, + { + "epoch": 4.779884201011409, + "grad_norm": 7.03208424965851e-05, + "learning_rate": 1.1786664223321529e-08, + "loss": 0.0004, + "num_input_tokens_seen": 131847336, + "step": 195655 + }, + { + "epoch": 4.780006351843256, + "grad_norm": 3.4296645026188344e-05, + "learning_rate": 1.177361326638171e-08, + "loss": 0.0, + "num_input_tokens_seen": 131850344, + "step": 195660 + }, + { + "epoch": 4.780128502675103, + "grad_norm": 0.00012804471771232784, + "learning_rate": 1.1760569496089946e-08, + "loss": 0.0, + "num_input_tokens_seen": 131853224, + "step": 195665 + }, + { + "epoch": 4.78025065350695, + "grad_norm": 0.0007066622492857277, + "learning_rate": 1.1747532912541159e-08, + "loss": 0.0, + "num_input_tokens_seen": 131856168, + "step": 195670 + }, + { + "epoch": 4.780372804338797, + "grad_norm": 0.0008149920031428337, + "learning_rate": 1.1734503515830053e-08, + "loss": 0.0, + "num_input_tokens_seen": 131859368, + "step": 195675 + }, + { + "epoch": 4.780494955170645, + "grad_norm": 0.0002996523689944297, + "learning_rate": 1.172148130605155e-08, + "loss": 0.0, + "num_input_tokens_seen": 131862760, + "step": 195680 + }, + { + "epoch": 4.780617106002492, + "grad_norm": 0.001203904626891017, + "learning_rate": 1.1708466283300245e-08, + "loss": 0.0, + "num_input_tokens_seen": 131866280, + "step": 195685 + }, + { + "epoch": 4.780739256834339, + "grad_norm": 6.254316394915804e-05, + "learning_rate": 1.1695458447670725e-08, + "loss": 0.0, + "num_input_tokens_seen": 131869992, + "step": 195690 + }, + { + "epoch": 4.780861407666186, + "grad_norm": 0.0009527397342026234, + "learning_rate": 1.1682457799257584e-08, + "loss": 0.0336, + "num_input_tokens_seen": 131873768, + "step": 195695 + }, + { + "epoch": 4.780983558498034, + "grad_norm": 0.003038577502593398, + "learning_rate": 1.1669464338155632e-08, + "loss": 0.0, + "num_input_tokens_seen": 131877672, + "step": 195700 + }, + { + "epoch": 4.78110570932988, + "grad_norm": 0.0008341276552528143, + "learning_rate": 1.1656478064459019e-08, + "loss": 0.0, + "num_input_tokens_seen": 131880616, + "step": 195705 + }, + { + "epoch": 4.781227860161728, + "grad_norm": 0.0013110843719914556, + "learning_rate": 1.1643498978262334e-08, + "loss": 0.0, + "num_input_tokens_seen": 131884072, + "step": 195710 + }, + { + "epoch": 4.781350010993575, + "grad_norm": 5.7564688177080825e-05, + "learning_rate": 1.1630527079660057e-08, + "loss": 0.0, + "num_input_tokens_seen": 131887464, + "step": 195715 + }, + { + "epoch": 4.781472161825422, + "grad_norm": 0.0017062549013644457, + "learning_rate": 1.1617562368746226e-08, + "loss": 0.0, + "num_input_tokens_seen": 131891048, + "step": 195720 + }, + { + "epoch": 4.781594312657269, + "grad_norm": 5.923534627072513e-05, + "learning_rate": 1.160460484561554e-08, + "loss": 0.0, + "num_input_tokens_seen": 131894312, + "step": 195725 + }, + { + "epoch": 4.781716463489117, + "grad_norm": 0.08431998640298843, + "learning_rate": 1.1591654510361926e-08, + "loss": 0.0, + "num_input_tokens_seen": 131897960, + "step": 195730 + }, + { + "epoch": 4.7818386143209635, + "grad_norm": 0.00012925357441417873, + "learning_rate": 1.157871136307964e-08, + "loss": 0.0001, + "num_input_tokens_seen": 131901544, + "step": 195735 + }, + { + "epoch": 4.78196076515281, + "grad_norm": 0.00012562771735247225, + "learning_rate": 1.1565775403862831e-08, + "loss": 0.0, + "num_input_tokens_seen": 131904680, + "step": 195740 + }, + { + "epoch": 4.782082915984658, + "grad_norm": 1.1264551176282112e-05, + "learning_rate": 1.1552846632805646e-08, + "loss": 0.0, + "num_input_tokens_seen": 131908008, + "step": 195745 + }, + { + "epoch": 4.7822050668165055, + "grad_norm": 0.00039110734360292554, + "learning_rate": 1.1539925050001897e-08, + "loss": 0.0, + "num_input_tokens_seen": 131911400, + "step": 195750 + }, + { + "epoch": 4.782327217648352, + "grad_norm": 4.613706187228672e-05, + "learning_rate": 1.1527010655545621e-08, + "loss": 0.0436, + "num_input_tokens_seen": 131915304, + "step": 195755 + }, + { + "epoch": 4.782449368480199, + "grad_norm": 0.0015119805466383696, + "learning_rate": 1.1514103449530966e-08, + "loss": 0.0, + "num_input_tokens_seen": 131918312, + "step": 195760 + }, + { + "epoch": 4.782571519312047, + "grad_norm": 0.0010038913460448384, + "learning_rate": 1.150120343205152e-08, + "loss": 0.0, + "num_input_tokens_seen": 131922280, + "step": 195765 + }, + { + "epoch": 4.782693670143893, + "grad_norm": 34.064022064208984, + "learning_rate": 1.1488310603201323e-08, + "loss": 0.0538, + "num_input_tokens_seen": 131925352, + "step": 195770 + }, + { + "epoch": 4.782815820975741, + "grad_norm": 0.00014960873522795737, + "learning_rate": 1.1475424963073853e-08, + "loss": 0.0, + "num_input_tokens_seen": 131928488, + "step": 195775 + }, + { + "epoch": 4.782937971807588, + "grad_norm": 0.004268994554877281, + "learning_rate": 1.1462546511763039e-08, + "loss": 0.0, + "num_input_tokens_seen": 131931560, + "step": 195780 + }, + { + "epoch": 4.783060122639435, + "grad_norm": 0.0008855354390107095, + "learning_rate": 1.1449675249362467e-08, + "loss": 0.0, + "num_input_tokens_seen": 131935400, + "step": 195785 + }, + { + "epoch": 4.783182273471282, + "grad_norm": 6.182612560223788e-05, + "learning_rate": 1.1436811175965732e-08, + "loss": 0.0, + "num_input_tokens_seen": 131938920, + "step": 195790 + }, + { + "epoch": 4.78330442430313, + "grad_norm": 1.5357125448645093e-05, + "learning_rate": 1.1423954291666427e-08, + "loss": 0.0, + "num_input_tokens_seen": 131942120, + "step": 195795 + }, + { + "epoch": 4.7834265751349765, + "grad_norm": 1.5569354218314402e-05, + "learning_rate": 1.141110459655803e-08, + "loss": 0.0, + "num_input_tokens_seen": 131945064, + "step": 195800 + }, + { + "epoch": 4.783548725966824, + "grad_norm": 0.0012987203663215041, + "learning_rate": 1.1398262090733913e-08, + "loss": 0.0, + "num_input_tokens_seen": 131948776, + "step": 195805 + }, + { + "epoch": 4.783670876798671, + "grad_norm": 0.0008184673497453332, + "learning_rate": 1.1385426774287555e-08, + "loss": 0.0, + "num_input_tokens_seen": 131951976, + "step": 195810 + }, + { + "epoch": 4.7837930276305185, + "grad_norm": 1.7329102774965577e-05, + "learning_rate": 1.1372598647312325e-08, + "loss": 0.0, + "num_input_tokens_seen": 131955560, + "step": 195815 + }, + { + "epoch": 4.783915178462365, + "grad_norm": 0.00024611057597212493, + "learning_rate": 1.1359777709901374e-08, + "loss": 0.0, + "num_input_tokens_seen": 131958888, + "step": 195820 + }, + { + "epoch": 4.784037329294213, + "grad_norm": 0.0007355216657742858, + "learning_rate": 1.134696396214807e-08, + "loss": 0.0, + "num_input_tokens_seen": 131962088, + "step": 195825 + }, + { + "epoch": 4.78415948012606, + "grad_norm": 0.001096261665225029, + "learning_rate": 1.1334157404145672e-08, + "loss": 0.0, + "num_input_tokens_seen": 131965352, + "step": 195830 + }, + { + "epoch": 4.784281630957906, + "grad_norm": 7.370772800641134e-05, + "learning_rate": 1.1321358035987106e-08, + "loss": 0.0, + "num_input_tokens_seen": 131968808, + "step": 195835 + }, + { + "epoch": 4.784403781789754, + "grad_norm": 0.0006600241176784039, + "learning_rate": 1.1308565857765517e-08, + "loss": 0.0, + "num_input_tokens_seen": 131972520, + "step": 195840 + }, + { + "epoch": 4.784525932621602, + "grad_norm": 0.0007484956295229495, + "learning_rate": 1.1295780869574056e-08, + "loss": 0.0, + "num_input_tokens_seen": 131975528, + "step": 195845 + }, + { + "epoch": 4.784648083453448, + "grad_norm": 0.00033427434391342103, + "learning_rate": 1.1283003071505426e-08, + "loss": 0.0, + "num_input_tokens_seen": 131978408, + "step": 195850 + }, + { + "epoch": 4.784770234285295, + "grad_norm": 0.00029287466895766556, + "learning_rate": 1.1270232463652884e-08, + "loss": 0.0, + "num_input_tokens_seen": 131981736, + "step": 195855 + }, + { + "epoch": 4.784892385117143, + "grad_norm": 2.4387059966102242e-05, + "learning_rate": 1.1257469046109135e-08, + "loss": 0.0, + "num_input_tokens_seen": 131985192, + "step": 195860 + }, + { + "epoch": 4.7850145359489895, + "grad_norm": 0.025505151599645615, + "learning_rate": 1.1244712818966995e-08, + "loss": 0.0, + "num_input_tokens_seen": 131988264, + "step": 195865 + }, + { + "epoch": 4.785136686780837, + "grad_norm": 9.816163219511509e-05, + "learning_rate": 1.1231963782319275e-08, + "loss": 0.0527, + "num_input_tokens_seen": 131992424, + "step": 195870 + }, + { + "epoch": 4.785258837612684, + "grad_norm": 4.145934508414939e-05, + "learning_rate": 1.1219221936258682e-08, + "loss": 0.0, + "num_input_tokens_seen": 131995560, + "step": 195875 + }, + { + "epoch": 4.7853809884445315, + "grad_norm": 0.00012785769649781287, + "learning_rate": 1.1206487280877807e-08, + "loss": 0.0, + "num_input_tokens_seen": 131998504, + "step": 195880 + }, + { + "epoch": 4.785503139276378, + "grad_norm": 0.0005638871225528419, + "learning_rate": 1.1193759816269243e-08, + "loss": 0.0, + "num_input_tokens_seen": 132001704, + "step": 195885 + }, + { + "epoch": 4.785625290108226, + "grad_norm": 0.000946306565310806, + "learning_rate": 1.1181039542525806e-08, + "loss": 0.0, + "num_input_tokens_seen": 132005416, + "step": 195890 + }, + { + "epoch": 4.785747440940073, + "grad_norm": 0.000836696068290621, + "learning_rate": 1.1168326459739642e-08, + "loss": 0.0, + "num_input_tokens_seen": 132008808, + "step": 195895 + }, + { + "epoch": 4.78586959177192, + "grad_norm": 0.00010829546954482794, + "learning_rate": 1.1155620568003455e-08, + "loss": 0.0, + "num_input_tokens_seen": 132011880, + "step": 195900 + }, + { + "epoch": 4.785991742603767, + "grad_norm": 0.00018229872512165457, + "learning_rate": 1.1142921867409505e-08, + "loss": 0.0, + "num_input_tokens_seen": 132015080, + "step": 195905 + }, + { + "epoch": 4.786113893435615, + "grad_norm": 0.003096227301284671, + "learning_rate": 1.1130230358050164e-08, + "loss": 0.0, + "num_input_tokens_seen": 132018344, + "step": 195910 + }, + { + "epoch": 4.786236044267461, + "grad_norm": 6.02893705945462e-05, + "learning_rate": 1.11175460400178e-08, + "loss": 0.0, + "num_input_tokens_seen": 132021352, + "step": 195915 + }, + { + "epoch": 4.786358195099309, + "grad_norm": 0.00012660080392379314, + "learning_rate": 1.1104868913404563e-08, + "loss": 0.0, + "num_input_tokens_seen": 132025064, + "step": 195920 + }, + { + "epoch": 4.786480345931156, + "grad_norm": 5.217445868765935e-05, + "learning_rate": 1.1092198978302824e-08, + "loss": 0.0, + "num_input_tokens_seen": 132028520, + "step": 195925 + }, + { + "epoch": 4.7866024967630025, + "grad_norm": 2.1660089259967208e-05, + "learning_rate": 1.107953623480451e-08, + "loss": 0.0, + "num_input_tokens_seen": 132032232, + "step": 195930 + }, + { + "epoch": 4.78672464759485, + "grad_norm": 7.575411291327327e-05, + "learning_rate": 1.1066880683001878e-08, + "loss": 0.0, + "num_input_tokens_seen": 132035496, + "step": 195935 + }, + { + "epoch": 4.786846798426697, + "grad_norm": 0.02140646055340767, + "learning_rate": 1.1054232322986857e-08, + "loss": 0.0001, + "num_input_tokens_seen": 132038760, + "step": 195940 + }, + { + "epoch": 4.7869689492585445, + "grad_norm": 2.3463653633370996e-05, + "learning_rate": 1.1041591154851371e-08, + "loss": 0.0, + "num_input_tokens_seen": 132041768, + "step": 195945 + }, + { + "epoch": 4.787091100090391, + "grad_norm": 0.00011079600517405197, + "learning_rate": 1.102895717868757e-08, + "loss": 0.0, + "num_input_tokens_seen": 132045416, + "step": 195950 + }, + { + "epoch": 4.787213250922239, + "grad_norm": 0.00012676662299782038, + "learning_rate": 1.1016330394587048e-08, + "loss": 0.0, + "num_input_tokens_seen": 132048680, + "step": 195955 + }, + { + "epoch": 4.787335401754086, + "grad_norm": 0.1253872811794281, + "learning_rate": 1.1003710802641842e-08, + "loss": 0.0, + "num_input_tokens_seen": 132052136, + "step": 195960 + }, + { + "epoch": 4.787457552585933, + "grad_norm": 0.0003003769088536501, + "learning_rate": 1.0991098402943655e-08, + "loss": 0.0, + "num_input_tokens_seen": 132056040, + "step": 195965 + }, + { + "epoch": 4.78757970341778, + "grad_norm": 0.0003315807261969894, + "learning_rate": 1.0978493195584193e-08, + "loss": 0.0, + "num_input_tokens_seen": 132060008, + "step": 195970 + }, + { + "epoch": 4.787701854249628, + "grad_norm": 0.0008391228620894253, + "learning_rate": 1.096589518065516e-08, + "loss": 0.0, + "num_input_tokens_seen": 132063528, + "step": 195975 + }, + { + "epoch": 4.787824005081474, + "grad_norm": 0.004577086307108402, + "learning_rate": 1.095330435824826e-08, + "loss": 0.0, + "num_input_tokens_seen": 132066856, + "step": 195980 + }, + { + "epoch": 4.787946155913322, + "grad_norm": 0.0018010676139965653, + "learning_rate": 1.0940720728454755e-08, + "loss": 0.0, + "num_input_tokens_seen": 132070120, + "step": 195985 + }, + { + "epoch": 4.788068306745169, + "grad_norm": 0.00010236025264021009, + "learning_rate": 1.092814429136646e-08, + "loss": 0.0, + "num_input_tokens_seen": 132073896, + "step": 195990 + }, + { + "epoch": 4.7881904575770164, + "grad_norm": 0.0017252841498702765, + "learning_rate": 1.0915575047074854e-08, + "loss": 0.0, + "num_input_tokens_seen": 132077096, + "step": 195995 + }, + { + "epoch": 4.788312608408863, + "grad_norm": 0.00012550843530334532, + "learning_rate": 1.090301299567098e-08, + "loss": 0.0, + "num_input_tokens_seen": 132080680, + "step": 196000 + }, + { + "epoch": 4.78843475924071, + "grad_norm": 0.0007126462296582758, + "learning_rate": 1.0890458137246539e-08, + "loss": 0.0, + "num_input_tokens_seen": 132083816, + "step": 196005 + }, + { + "epoch": 4.788556910072558, + "grad_norm": 0.0004484814126044512, + "learning_rate": 1.0877910471892793e-08, + "loss": 0.0, + "num_input_tokens_seen": 132087208, + "step": 196010 + }, + { + "epoch": 4.788679060904405, + "grad_norm": 0.0004872412246186286, + "learning_rate": 1.086536999970078e-08, + "loss": 0.0, + "num_input_tokens_seen": 132090472, + "step": 196015 + }, + { + "epoch": 4.788801211736252, + "grad_norm": 1.7085814761230722e-05, + "learning_rate": 1.0852836720761982e-08, + "loss": 0.0, + "num_input_tokens_seen": 132093928, + "step": 196020 + }, + { + "epoch": 4.788923362568099, + "grad_norm": 0.000753185071516782, + "learning_rate": 1.0840310635167216e-08, + "loss": 0.0, + "num_input_tokens_seen": 132097320, + "step": 196025 + }, + { + "epoch": 4.789045513399946, + "grad_norm": 0.0021108719520270824, + "learning_rate": 1.0827791743007852e-08, + "loss": 0.0, + "num_input_tokens_seen": 132100264, + "step": 196030 + }, + { + "epoch": 4.789167664231793, + "grad_norm": 5.6025430239969864e-05, + "learning_rate": 1.081528004437493e-08, + "loss": 0.0, + "num_input_tokens_seen": 132103848, + "step": 196035 + }, + { + "epoch": 4.789289815063641, + "grad_norm": 0.0003986261726822704, + "learning_rate": 1.0802775539359266e-08, + "loss": 0.0, + "num_input_tokens_seen": 132107176, + "step": 196040 + }, + { + "epoch": 4.7894119658954875, + "grad_norm": 4.800490569323301e-05, + "learning_rate": 1.0790278228051897e-08, + "loss": 0.0, + "num_input_tokens_seen": 132110376, + "step": 196045 + }, + { + "epoch": 4.789534116727335, + "grad_norm": 9.925442282110453e-05, + "learning_rate": 1.0777788110543751e-08, + "loss": 0.0, + "num_input_tokens_seen": 132113704, + "step": 196050 + }, + { + "epoch": 4.789656267559182, + "grad_norm": 2.5734510927577503e-05, + "learning_rate": 1.0765305186925532e-08, + "loss": 0.0, + "num_input_tokens_seen": 132117288, + "step": 196055 + }, + { + "epoch": 4.7897784183910295, + "grad_norm": 0.0013512138975784183, + "learning_rate": 1.075282945728806e-08, + "loss": 0.0, + "num_input_tokens_seen": 132120488, + "step": 196060 + }, + { + "epoch": 4.789900569222876, + "grad_norm": 0.00018341124814469367, + "learning_rate": 1.0740360921722146e-08, + "loss": 0.0, + "num_input_tokens_seen": 132123816, + "step": 196065 + }, + { + "epoch": 4.790022720054724, + "grad_norm": 0.0020230484660714865, + "learning_rate": 1.0727899580318388e-08, + "loss": 0.0, + "num_input_tokens_seen": 132126952, + "step": 196070 + }, + { + "epoch": 4.790144870886571, + "grad_norm": 1.877875729405787e-05, + "learning_rate": 1.071544543316738e-08, + "loss": 0.0, + "num_input_tokens_seen": 132130536, + "step": 196075 + }, + { + "epoch": 4.790267021718418, + "grad_norm": 0.00011027592699974775, + "learning_rate": 1.0702998480359827e-08, + "loss": 0.0, + "num_input_tokens_seen": 132133800, + "step": 196080 + }, + { + "epoch": 4.790389172550265, + "grad_norm": 0.0023058492224663496, + "learning_rate": 1.0690558721986209e-08, + "loss": 0.0, + "num_input_tokens_seen": 132136872, + "step": 196085 + }, + { + "epoch": 4.790511323382113, + "grad_norm": 0.0016999959480017424, + "learning_rate": 1.0678126158136791e-08, + "loss": 0.0001, + "num_input_tokens_seen": 132140392, + "step": 196090 + }, + { + "epoch": 4.790633474213959, + "grad_norm": 0.00752806244418025, + "learning_rate": 1.0665700788902277e-08, + "loss": 0.0, + "num_input_tokens_seen": 132143336, + "step": 196095 + }, + { + "epoch": 4.790755625045806, + "grad_norm": 1.3469006262312178e-05, + "learning_rate": 1.0653282614372705e-08, + "loss": 0.0, + "num_input_tokens_seen": 132146664, + "step": 196100 + }, + { + "epoch": 4.790877775877654, + "grad_norm": 0.0008614024263806641, + "learning_rate": 1.064087163463867e-08, + "loss": 0.0, + "num_input_tokens_seen": 132149928, + "step": 196105 + }, + { + "epoch": 4.790999926709501, + "grad_norm": 0.01275652926415205, + "learning_rate": 1.0628467849790323e-08, + "loss": 0.0, + "num_input_tokens_seen": 132153320, + "step": 196110 + }, + { + "epoch": 4.791122077541348, + "grad_norm": 0.0005827039130963385, + "learning_rate": 1.0616071259917925e-08, + "loss": 0.0, + "num_input_tokens_seen": 132156328, + "step": 196115 + }, + { + "epoch": 4.791244228373195, + "grad_norm": 0.00018269375141244382, + "learning_rate": 1.0603681865111402e-08, + "loss": 0.0, + "num_input_tokens_seen": 132159272, + "step": 196120 + }, + { + "epoch": 4.7913663792050425, + "grad_norm": 9.967612277250737e-05, + "learning_rate": 1.0591299665461128e-08, + "loss": 0.0, + "num_input_tokens_seen": 132162600, + "step": 196125 + }, + { + "epoch": 4.791488530036889, + "grad_norm": 0.0005747093237005174, + "learning_rate": 1.057892466105703e-08, + "loss": 0.0001, + "num_input_tokens_seen": 132165864, + "step": 196130 + }, + { + "epoch": 4.791610680868737, + "grad_norm": 0.000769525533542037, + "learning_rate": 1.056655685198915e-08, + "loss": 0.0, + "num_input_tokens_seen": 132169512, + "step": 196135 + }, + { + "epoch": 4.791732831700584, + "grad_norm": 0.00016920336929615587, + "learning_rate": 1.0554196238347302e-08, + "loss": 0.0, + "num_input_tokens_seen": 132172520, + "step": 196140 + }, + { + "epoch": 4.791854982532431, + "grad_norm": 0.00163973867893219, + "learning_rate": 1.0541842820221524e-08, + "loss": 0.0, + "num_input_tokens_seen": 132175656, + "step": 196145 + }, + { + "epoch": 4.791977133364278, + "grad_norm": 0.00014396561891771853, + "learning_rate": 1.0529496597701636e-08, + "loss": 0.0, + "num_input_tokens_seen": 132180008, + "step": 196150 + }, + { + "epoch": 4.792099284196126, + "grad_norm": 0.0024907325860112906, + "learning_rate": 1.0517157570877344e-08, + "loss": 0.0, + "num_input_tokens_seen": 132183400, + "step": 196155 + }, + { + "epoch": 4.792221435027972, + "grad_norm": 0.006735434755682945, + "learning_rate": 1.0504825739838353e-08, + "loss": 0.0, + "num_input_tokens_seen": 132186856, + "step": 196160 + }, + { + "epoch": 4.79234358585982, + "grad_norm": 1.2725016858894378e-05, + "learning_rate": 1.049250110467459e-08, + "loss": 0.0, + "num_input_tokens_seen": 132190248, + "step": 196165 + }, + { + "epoch": 4.792465736691667, + "grad_norm": 1.25408132589655e-05, + "learning_rate": 1.0480183665475317e-08, + "loss": 0.0, + "num_input_tokens_seen": 132193640, + "step": 196170 + }, + { + "epoch": 4.792587887523514, + "grad_norm": 0.00020013254834339023, + "learning_rate": 1.0467873422330464e-08, + "loss": 0.0, + "num_input_tokens_seen": 132196712, + "step": 196175 + }, + { + "epoch": 4.792710038355361, + "grad_norm": 0.000348270230460912, + "learning_rate": 1.0455570375329181e-08, + "loss": 0.0, + "num_input_tokens_seen": 132200232, + "step": 196180 + }, + { + "epoch": 4.792832189187209, + "grad_norm": 0.0007520915823988616, + "learning_rate": 1.0443274524561396e-08, + "loss": 0.0, + "num_input_tokens_seen": 132203432, + "step": 196185 + }, + { + "epoch": 4.7929543400190555, + "grad_norm": 9.265771950595081e-05, + "learning_rate": 1.043098587011615e-08, + "loss": 0.0, + "num_input_tokens_seen": 132206824, + "step": 196190 + }, + { + "epoch": 4.793076490850902, + "grad_norm": 5.9360292652854696e-05, + "learning_rate": 1.0418704412082924e-08, + "loss": 0.0001, + "num_input_tokens_seen": 132210088, + "step": 196195 + }, + { + "epoch": 4.79319864168275, + "grad_norm": 0.0014555418165400624, + "learning_rate": 1.0406430150551094e-08, + "loss": 0.0003, + "num_input_tokens_seen": 132213480, + "step": 196200 + }, + { + "epoch": 4.793320792514597, + "grad_norm": 0.0003992864803876728, + "learning_rate": 1.0394163085609808e-08, + "loss": 0.0, + "num_input_tokens_seen": 132216616, + "step": 196205 + }, + { + "epoch": 4.793442943346444, + "grad_norm": 4.887767499894835e-05, + "learning_rate": 1.038190321734833e-08, + "loss": 0.0, + "num_input_tokens_seen": 132220008, + "step": 196210 + }, + { + "epoch": 4.793565094178291, + "grad_norm": 0.0018320534145459533, + "learning_rate": 1.0369650545855813e-08, + "loss": 0.0003, + "num_input_tokens_seen": 132222952, + "step": 196215 + }, + { + "epoch": 4.793687245010139, + "grad_norm": 0.003153629833832383, + "learning_rate": 1.0357405071221404e-08, + "loss": 0.0, + "num_input_tokens_seen": 132226216, + "step": 196220 + }, + { + "epoch": 4.793809395841985, + "grad_norm": 0.00641963304951787, + "learning_rate": 1.0345166793534255e-08, + "loss": 0.0, + "num_input_tokens_seen": 132229224, + "step": 196225 + }, + { + "epoch": 4.793931546673833, + "grad_norm": 0.009071718901395798, + "learning_rate": 1.0332935712883073e-08, + "loss": 0.0, + "num_input_tokens_seen": 132232488, + "step": 196230 + }, + { + "epoch": 4.79405369750568, + "grad_norm": 8.992061339085922e-05, + "learning_rate": 1.032071182935701e-08, + "loss": 0.0, + "num_input_tokens_seen": 132235304, + "step": 196235 + }, + { + "epoch": 4.794175848337527, + "grad_norm": 0.00019319630519021302, + "learning_rate": 1.0308495143044993e-08, + "loss": 0.0, + "num_input_tokens_seen": 132238696, + "step": 196240 + }, + { + "epoch": 4.794297999169374, + "grad_norm": 5.871194298379123e-05, + "learning_rate": 1.029628565403573e-08, + "loss": 0.0001, + "num_input_tokens_seen": 132242024, + "step": 196245 + }, + { + "epoch": 4.794420150001222, + "grad_norm": 0.0004964267718605697, + "learning_rate": 1.028408336241804e-08, + "loss": 0.0, + "num_input_tokens_seen": 132245480, + "step": 196250 + }, + { + "epoch": 4.7945423008330685, + "grad_norm": 0.0026195256505161524, + "learning_rate": 1.0271888268280737e-08, + "loss": 0.0, + "num_input_tokens_seen": 132248936, + "step": 196255 + }, + { + "epoch": 4.794664451664916, + "grad_norm": 0.0009615510352887213, + "learning_rate": 1.0259700371712532e-08, + "loss": 0.0, + "num_input_tokens_seen": 132252584, + "step": 196260 + }, + { + "epoch": 4.794786602496763, + "grad_norm": 0.00019157327187713236, + "learning_rate": 1.0247519672801907e-08, + "loss": 0.0, + "num_input_tokens_seen": 132255784, + "step": 196265 + }, + { + "epoch": 4.7949087533286106, + "grad_norm": 0.001955286832526326, + "learning_rate": 1.0235346171637571e-08, + "loss": 0.0, + "num_input_tokens_seen": 132259368, + "step": 196270 + }, + { + "epoch": 4.795030904160457, + "grad_norm": 0.0010139269288629293, + "learning_rate": 1.0223179868308007e-08, + "loss": 0.0, + "num_input_tokens_seen": 132262824, + "step": 196275 + }, + { + "epoch": 4.795153054992305, + "grad_norm": 1.6927018805290572e-05, + "learning_rate": 1.02110207629017e-08, + "loss": 0.0, + "num_input_tokens_seen": 132266024, + "step": 196280 + }, + { + "epoch": 4.795275205824152, + "grad_norm": 3.738186933333054e-05, + "learning_rate": 1.0198868855507026e-08, + "loss": 0.0, + "num_input_tokens_seen": 132269672, + "step": 196285 + }, + { + "epoch": 4.795397356655998, + "grad_norm": 0.0002817772619891912, + "learning_rate": 1.0186724146212467e-08, + "loss": 0.0, + "num_input_tokens_seen": 132272872, + "step": 196290 + }, + { + "epoch": 4.795519507487846, + "grad_norm": 0.001034017652273178, + "learning_rate": 1.0174586635106285e-08, + "loss": 0.0, + "num_input_tokens_seen": 132275944, + "step": 196295 + }, + { + "epoch": 4.795641658319693, + "grad_norm": 0.00042883388232439756, + "learning_rate": 1.0162456322276747e-08, + "loss": 0.0, + "num_input_tokens_seen": 132279400, + "step": 196300 + }, + { + "epoch": 4.79576380915154, + "grad_norm": 0.00022619598894380033, + "learning_rate": 1.0150333207812001e-08, + "loss": 0.0, + "num_input_tokens_seen": 132283240, + "step": 196305 + }, + { + "epoch": 4.795885959983387, + "grad_norm": 1.1148900739499368e-05, + "learning_rate": 1.013821729180031e-08, + "loss": 0.0, + "num_input_tokens_seen": 132286248, + "step": 196310 + }, + { + "epoch": 4.796008110815235, + "grad_norm": 9.672602573118638e-06, + "learning_rate": 1.0126108574329718e-08, + "loss": 0.0, + "num_input_tokens_seen": 132289768, + "step": 196315 + }, + { + "epoch": 4.796130261647082, + "grad_norm": 0.0005740622291341424, + "learning_rate": 1.0114007055488261e-08, + "loss": 0.0453, + "num_input_tokens_seen": 132292840, + "step": 196320 + }, + { + "epoch": 4.796252412478929, + "grad_norm": 3.958161687478423e-05, + "learning_rate": 1.0101912735364092e-08, + "loss": 0.0275, + "num_input_tokens_seen": 132296168, + "step": 196325 + }, + { + "epoch": 4.796374563310776, + "grad_norm": 0.0006076816935092211, + "learning_rate": 1.0089825614045032e-08, + "loss": 0.0, + "num_input_tokens_seen": 132299688, + "step": 196330 + }, + { + "epoch": 4.796496714142624, + "grad_norm": 0.00510576693341136, + "learning_rate": 1.00777456916189e-08, + "loss": 0.0, + "num_input_tokens_seen": 132302952, + "step": 196335 + }, + { + "epoch": 4.79661886497447, + "grad_norm": 0.0004893884179182351, + "learning_rate": 1.0065672968173734e-08, + "loss": 0.0, + "num_input_tokens_seen": 132305960, + "step": 196340 + }, + { + "epoch": 4.796741015806318, + "grad_norm": 0.0009445503819733858, + "learning_rate": 1.0053607443797351e-08, + "loss": 0.0, + "num_input_tokens_seen": 132309096, + "step": 196345 + }, + { + "epoch": 4.796863166638165, + "grad_norm": 0.0003557786112651229, + "learning_rate": 1.0041549118577353e-08, + "loss": 0.0, + "num_input_tokens_seen": 132312552, + "step": 196350 + }, + { + "epoch": 4.796985317470012, + "grad_norm": 8.319457265315577e-05, + "learning_rate": 1.0029497992601443e-08, + "loss": 0.0003, + "num_input_tokens_seen": 132315624, + "step": 196355 + }, + { + "epoch": 4.797107468301859, + "grad_norm": 0.0003622810763772577, + "learning_rate": 1.001745406595722e-08, + "loss": 0.0, + "num_input_tokens_seen": 132319144, + "step": 196360 + }, + { + "epoch": 4.797229619133706, + "grad_norm": 0.0001322765019722283, + "learning_rate": 1.0005417338732502e-08, + "loss": 0.0, + "num_input_tokens_seen": 132322472, + "step": 196365 + }, + { + "epoch": 4.7973517699655535, + "grad_norm": 0.00015713920583948493, + "learning_rate": 9.993387811014553e-09, + "loss": 0.0, + "num_input_tokens_seen": 132325672, + "step": 196370 + }, + { + "epoch": 4.797473920797401, + "grad_norm": 0.001111492863856256, + "learning_rate": 9.98136548289097e-09, + "loss": 0.0, + "num_input_tokens_seen": 132328744, + "step": 196375 + }, + { + "epoch": 4.797596071629248, + "grad_norm": 8.227213402278721e-05, + "learning_rate": 9.969350354449236e-09, + "loss": 0.0, + "num_input_tokens_seen": 132331880, + "step": 196380 + }, + { + "epoch": 4.797718222461095, + "grad_norm": 0.0006079964805394411, + "learning_rate": 9.957342425776617e-09, + "loss": 0.0, + "num_input_tokens_seen": 132335208, + "step": 196385 + }, + { + "epoch": 4.797840373292942, + "grad_norm": 5.064927245257422e-05, + "learning_rate": 9.945341696960596e-09, + "loss": 0.0, + "num_input_tokens_seen": 132338536, + "step": 196390 + }, + { + "epoch": 4.797962524124789, + "grad_norm": 3.677351196529344e-05, + "learning_rate": 9.933348168088329e-09, + "loss": 0.0, + "num_input_tokens_seen": 132341928, + "step": 196395 + }, + { + "epoch": 4.798084674956637, + "grad_norm": 0.001421840162947774, + "learning_rate": 9.921361839246967e-09, + "loss": 0.0, + "num_input_tokens_seen": 132345256, + "step": 196400 + }, + { + "epoch": 4.798206825788483, + "grad_norm": 0.0006268465076573193, + "learning_rate": 9.909382710523773e-09, + "loss": 0.0, + "num_input_tokens_seen": 132348456, + "step": 196405 + }, + { + "epoch": 4.798328976620331, + "grad_norm": 0.00036963573074899614, + "learning_rate": 9.897410782005789e-09, + "loss": 0.0, + "num_input_tokens_seen": 132351464, + "step": 196410 + }, + { + "epoch": 4.798451127452178, + "grad_norm": 0.00029923420515842736, + "learning_rate": 9.885446053780278e-09, + "loss": 0.0, + "num_input_tokens_seen": 132354728, + "step": 196415 + }, + { + "epoch": 4.798573278284025, + "grad_norm": 5.575628892984241e-05, + "learning_rate": 9.87348852593406e-09, + "loss": 0.0, + "num_input_tokens_seen": 132358376, + "step": 196420 + }, + { + "epoch": 4.798695429115872, + "grad_norm": 3.5729637602344155e-05, + "learning_rate": 9.861538198554175e-09, + "loss": 0.0, + "num_input_tokens_seen": 132361448, + "step": 196425 + }, + { + "epoch": 4.79881757994772, + "grad_norm": 0.0001392970298184082, + "learning_rate": 9.849595071727445e-09, + "loss": 0.0, + "num_input_tokens_seen": 132364712, + "step": 196430 + }, + { + "epoch": 4.7989397307795665, + "grad_norm": 0.004172740038484335, + "learning_rate": 9.837659145540689e-09, + "loss": 0.0, + "num_input_tokens_seen": 132367656, + "step": 196435 + }, + { + "epoch": 4.799061881611414, + "grad_norm": 0.003073500469326973, + "learning_rate": 9.825730420080946e-09, + "loss": 0.0, + "num_input_tokens_seen": 132370920, + "step": 196440 + }, + { + "epoch": 4.799184032443261, + "grad_norm": 0.0002630621602293104, + "learning_rate": 9.813808895434706e-09, + "loss": 0.0, + "num_input_tokens_seen": 132374440, + "step": 196445 + }, + { + "epoch": 4.7993061832751085, + "grad_norm": 3.272489266237244e-05, + "learning_rate": 9.801894571688895e-09, + "loss": 0.0001, + "num_input_tokens_seen": 132377832, + "step": 196450 + }, + { + "epoch": 4.799428334106955, + "grad_norm": 0.0007254548254422843, + "learning_rate": 9.789987448930004e-09, + "loss": 0.0, + "num_input_tokens_seen": 132380904, + "step": 196455 + }, + { + "epoch": 4.799550484938802, + "grad_norm": 0.00751190772280097, + "learning_rate": 9.778087527244628e-09, + "loss": 0.0, + "num_input_tokens_seen": 132384104, + "step": 196460 + }, + { + "epoch": 4.79967263577065, + "grad_norm": 0.0009871211368590593, + "learning_rate": 9.766194806719364e-09, + "loss": 0.0, + "num_input_tokens_seen": 132387624, + "step": 196465 + }, + { + "epoch": 4.799794786602497, + "grad_norm": 0.0007149404264055192, + "learning_rate": 9.754309287440588e-09, + "loss": 0.0, + "num_input_tokens_seen": 132390568, + "step": 196470 + }, + { + "epoch": 4.799916937434344, + "grad_norm": 0.0004417687305249274, + "learning_rate": 9.742430969494896e-09, + "loss": 0.0, + "num_input_tokens_seen": 132393704, + "step": 196475 + }, + { + "epoch": 4.800039088266191, + "grad_norm": 0.0023151689674705267, + "learning_rate": 9.730559852968557e-09, + "loss": 0.0, + "num_input_tokens_seen": 132396968, + "step": 196480 + }, + { + "epoch": 4.800161239098038, + "grad_norm": 0.03932206705212593, + "learning_rate": 9.718695937948052e-09, + "loss": 0.0, + "num_input_tokens_seen": 132400488, + "step": 196485 + }, + { + "epoch": 4.800283389929885, + "grad_norm": 0.000937451608479023, + "learning_rate": 9.706839224519426e-09, + "loss": 0.0, + "num_input_tokens_seen": 132403368, + "step": 196490 + }, + { + "epoch": 4.800405540761733, + "grad_norm": 0.00011623586033238098, + "learning_rate": 9.694989712769053e-09, + "loss": 0.0, + "num_input_tokens_seen": 132406632, + "step": 196495 + }, + { + "epoch": 4.8005276915935795, + "grad_norm": 0.0004854142025578767, + "learning_rate": 9.683147402783088e-09, + "loss": 0.0, + "num_input_tokens_seen": 132409640, + "step": 196500 + }, + { + "epoch": 4.800649842425427, + "grad_norm": 0.0037219214718788862, + "learning_rate": 9.671312294647683e-09, + "loss": 0.0, + "num_input_tokens_seen": 132412840, + "step": 196505 + }, + { + "epoch": 4.800771993257274, + "grad_norm": 0.00020332216809038073, + "learning_rate": 9.659484388448768e-09, + "loss": 0.0001, + "num_input_tokens_seen": 132415976, + "step": 196510 + }, + { + "epoch": 4.8008941440891215, + "grad_norm": 0.00035678790300153196, + "learning_rate": 9.6476636842725e-09, + "loss": 0.0, + "num_input_tokens_seen": 132419688, + "step": 196515 + }, + { + "epoch": 4.801016294920968, + "grad_norm": 5.017863441025838e-05, + "learning_rate": 9.635850182204809e-09, + "loss": 0.0, + "num_input_tokens_seen": 132423272, + "step": 196520 + }, + { + "epoch": 4.801138445752816, + "grad_norm": 0.0006258144276216626, + "learning_rate": 9.624043882331511e-09, + "loss": 0.0, + "num_input_tokens_seen": 132426920, + "step": 196525 + }, + { + "epoch": 4.801260596584663, + "grad_norm": 0.0013677050592377782, + "learning_rate": 9.612244784738543e-09, + "loss": 0.0, + "num_input_tokens_seen": 132430056, + "step": 196530 + }, + { + "epoch": 4.80138274741651, + "grad_norm": 0.00019165143021382391, + "learning_rate": 9.600452889511835e-09, + "loss": 0.0, + "num_input_tokens_seen": 132433128, + "step": 196535 + }, + { + "epoch": 4.801504898248357, + "grad_norm": 0.004350326023995876, + "learning_rate": 9.588668196736871e-09, + "loss": 0.0, + "num_input_tokens_seen": 132436264, + "step": 196540 + }, + { + "epoch": 4.801627049080205, + "grad_norm": 0.0016057752072811127, + "learning_rate": 9.576890706499696e-09, + "loss": 0.0, + "num_input_tokens_seen": 132439720, + "step": 196545 + }, + { + "epoch": 4.801749199912051, + "grad_norm": 5.42028974450659e-05, + "learning_rate": 9.565120418885574e-09, + "loss": 0.0, + "num_input_tokens_seen": 132443176, + "step": 196550 + }, + { + "epoch": 4.801871350743898, + "grad_norm": 0.0002965881139971316, + "learning_rate": 9.553357333980438e-09, + "loss": 0.0, + "num_input_tokens_seen": 132446888, + "step": 196555 + }, + { + "epoch": 4.801993501575746, + "grad_norm": 11.83859920501709, + "learning_rate": 9.541601451869552e-09, + "loss": 0.0354, + "num_input_tokens_seen": 132450024, + "step": 196560 + }, + { + "epoch": 4.8021156524075925, + "grad_norm": 0.05751524493098259, + "learning_rate": 9.529852772638625e-09, + "loss": 0.0, + "num_input_tokens_seen": 132453800, + "step": 196565 + }, + { + "epoch": 4.80223780323944, + "grad_norm": 3.001950062753167e-06, + "learning_rate": 9.518111296372921e-09, + "loss": 0.0, + "num_input_tokens_seen": 132456936, + "step": 196570 + }, + { + "epoch": 4.802359954071287, + "grad_norm": 0.00021984051272738725, + "learning_rate": 9.506377023158042e-09, + "loss": 0.0, + "num_input_tokens_seen": 132459944, + "step": 196575 + }, + { + "epoch": 4.8024821049031345, + "grad_norm": 4.9385958845959976e-05, + "learning_rate": 9.494649953079137e-09, + "loss": 0.0, + "num_input_tokens_seen": 132463784, + "step": 196580 + }, + { + "epoch": 4.802604255734981, + "grad_norm": 0.0006307087605819106, + "learning_rate": 9.482930086221585e-09, + "loss": 0.0, + "num_input_tokens_seen": 132466792, + "step": 196585 + }, + { + "epoch": 4.802726406566829, + "grad_norm": 0.00309332855977118, + "learning_rate": 9.471217422670541e-09, + "loss": 0.0, + "num_input_tokens_seen": 132470696, + "step": 196590 + }, + { + "epoch": 4.802848557398676, + "grad_norm": 0.00015673524467274547, + "learning_rate": 9.459511962511268e-09, + "loss": 0.0, + "num_input_tokens_seen": 132474408, + "step": 196595 + }, + { + "epoch": 4.802970708230523, + "grad_norm": 0.000370059278793633, + "learning_rate": 9.44781370582881e-09, + "loss": 0.0, + "num_input_tokens_seen": 132477864, + "step": 196600 + }, + { + "epoch": 4.80309285906237, + "grad_norm": 0.009521500207483768, + "learning_rate": 9.436122652708212e-09, + "loss": 0.0, + "num_input_tokens_seen": 132480872, + "step": 196605 + }, + { + "epoch": 4.803215009894218, + "grad_norm": 0.00011173263192176819, + "learning_rate": 9.424438803234736e-09, + "loss": 0.0, + "num_input_tokens_seen": 132484264, + "step": 196610 + }, + { + "epoch": 4.803337160726064, + "grad_norm": 3.594495137804188e-05, + "learning_rate": 9.412762157493092e-09, + "loss": 0.0, + "num_input_tokens_seen": 132487848, + "step": 196615 + }, + { + "epoch": 4.803459311557912, + "grad_norm": 0.000330898241372779, + "learning_rate": 9.401092715568215e-09, + "loss": 0.0, + "num_input_tokens_seen": 132491304, + "step": 196620 + }, + { + "epoch": 4.803581462389759, + "grad_norm": 0.000155567133333534, + "learning_rate": 9.389430477545035e-09, + "loss": 0.0, + "num_input_tokens_seen": 132494824, + "step": 196625 + }, + { + "epoch": 4.8037036132216056, + "grad_norm": 0.00020720763131976128, + "learning_rate": 9.377775443508485e-09, + "loss": 0.0, + "num_input_tokens_seen": 132498280, + "step": 196630 + }, + { + "epoch": 4.803825764053453, + "grad_norm": 1.3492269317794126e-05, + "learning_rate": 9.366127613543051e-09, + "loss": 0.0, + "num_input_tokens_seen": 132501672, + "step": 196635 + }, + { + "epoch": 4.803947914885301, + "grad_norm": 1.2948934454470873e-05, + "learning_rate": 9.354486987733668e-09, + "loss": 0.0, + "num_input_tokens_seen": 132505000, + "step": 196640 + }, + { + "epoch": 4.804070065717148, + "grad_norm": 0.0005500533152371645, + "learning_rate": 9.342853566164932e-09, + "loss": 0.0, + "num_input_tokens_seen": 132508328, + "step": 196645 + }, + { + "epoch": 4.804192216548994, + "grad_norm": 7.395833381451666e-05, + "learning_rate": 9.331227348921333e-09, + "loss": 0.0, + "num_input_tokens_seen": 132511720, + "step": 196650 + }, + { + "epoch": 4.804314367380842, + "grad_norm": 6.505249621113762e-05, + "learning_rate": 9.319608336087582e-09, + "loss": 0.0, + "num_input_tokens_seen": 132515176, + "step": 196655 + }, + { + "epoch": 4.804436518212689, + "grad_norm": 0.00017917572404257953, + "learning_rate": 9.307996527747941e-09, + "loss": 0.0, + "num_input_tokens_seen": 132518760, + "step": 196660 + }, + { + "epoch": 4.804558669044536, + "grad_norm": 0.0001549017906654626, + "learning_rate": 9.296391923987235e-09, + "loss": 0.0, + "num_input_tokens_seen": 132521832, + "step": 196665 + }, + { + "epoch": 4.804680819876383, + "grad_norm": 0.004271478857845068, + "learning_rate": 9.284794524889505e-09, + "loss": 0.0, + "num_input_tokens_seen": 132524968, + "step": 196670 + }, + { + "epoch": 4.804802970708231, + "grad_norm": 0.00041260154102928936, + "learning_rate": 9.273204330539242e-09, + "loss": 0.0, + "num_input_tokens_seen": 132528616, + "step": 196675 + }, + { + "epoch": 4.8049251215400774, + "grad_norm": 0.00033596798311918974, + "learning_rate": 9.26162134102071e-09, + "loss": 0.0, + "num_input_tokens_seen": 132531880, + "step": 196680 + }, + { + "epoch": 4.805047272371925, + "grad_norm": 7.80812042648904e-05, + "learning_rate": 9.250045556418173e-09, + "loss": 0.0, + "num_input_tokens_seen": 132534888, + "step": 196685 + }, + { + "epoch": 4.805169423203772, + "grad_norm": 3.21592997352127e-05, + "learning_rate": 9.23847697681579e-09, + "loss": 0.0, + "num_input_tokens_seen": 132537960, + "step": 196690 + }, + { + "epoch": 4.8052915740356195, + "grad_norm": 0.00014071361511014402, + "learning_rate": 9.226915602297602e-09, + "loss": 0.0, + "num_input_tokens_seen": 132541096, + "step": 196695 + }, + { + "epoch": 4.805413724867466, + "grad_norm": 0.0008824639371596277, + "learning_rate": 9.215361432947877e-09, + "loss": 0.0, + "num_input_tokens_seen": 132544616, + "step": 196700 + }, + { + "epoch": 4.805535875699314, + "grad_norm": 3.565695442375727e-05, + "learning_rate": 9.203814468850547e-09, + "loss": 0.0, + "num_input_tokens_seen": 132548008, + "step": 196705 + }, + { + "epoch": 4.805658026531161, + "grad_norm": 0.00031765305902808905, + "learning_rate": 9.192274710089432e-09, + "loss": 0.0, + "num_input_tokens_seen": 132551784, + "step": 196710 + }, + { + "epoch": 4.805780177363008, + "grad_norm": 4.537498170975596e-05, + "learning_rate": 9.180742156748688e-09, + "loss": 0.0, + "num_input_tokens_seen": 132555112, + "step": 196715 + }, + { + "epoch": 4.805902328194855, + "grad_norm": 3.123639908153564e-05, + "learning_rate": 9.169216808912028e-09, + "loss": 0.0536, + "num_input_tokens_seen": 132558376, + "step": 196720 + }, + { + "epoch": 4.806024479026702, + "grad_norm": 2.5088884285651147e-05, + "learning_rate": 9.157698666663382e-09, + "loss": 0.0, + "num_input_tokens_seen": 132561896, + "step": 196725 + }, + { + "epoch": 4.806146629858549, + "grad_norm": 0.00033539420110173523, + "learning_rate": 9.146187730086463e-09, + "loss": 0.0, + "num_input_tokens_seen": 132564968, + "step": 196730 + }, + { + "epoch": 4.806268780690397, + "grad_norm": 0.0005353165324777365, + "learning_rate": 9.134683999264981e-09, + "loss": 0.0, + "num_input_tokens_seen": 132568168, + "step": 196735 + }, + { + "epoch": 4.806390931522244, + "grad_norm": 0.0009416808607056737, + "learning_rate": 9.123187474282535e-09, + "loss": 0.0, + "num_input_tokens_seen": 132571240, + "step": 196740 + }, + { + "epoch": 4.8065130823540905, + "grad_norm": 0.00034308669273741543, + "learning_rate": 9.111698155222724e-09, + "loss": 0.0, + "num_input_tokens_seen": 132575016, + "step": 196745 + }, + { + "epoch": 4.806635233185938, + "grad_norm": 0.00025203998666256666, + "learning_rate": 9.100216042169262e-09, + "loss": 0.0, + "num_input_tokens_seen": 132577832, + "step": 196750 + }, + { + "epoch": 4.806757384017785, + "grad_norm": 0.0003336328372824937, + "learning_rate": 9.088741135205525e-09, + "loss": 0.0, + "num_input_tokens_seen": 132581224, + "step": 196755 + }, + { + "epoch": 4.8068795348496325, + "grad_norm": 4.427826570463367e-05, + "learning_rate": 9.077273434415e-09, + "loss": 0.0001, + "num_input_tokens_seen": 132584424, + "step": 196760 + }, + { + "epoch": 4.807001685681479, + "grad_norm": 0.0014946991577744484, + "learning_rate": 9.065812939881067e-09, + "loss": 0.0, + "num_input_tokens_seen": 132587560, + "step": 196765 + }, + { + "epoch": 4.807123836513327, + "grad_norm": 0.0006305626593530178, + "learning_rate": 9.054359651687105e-09, + "loss": 0.0, + "num_input_tokens_seen": 132590568, + "step": 196770 + }, + { + "epoch": 4.807245987345174, + "grad_norm": 0.001171535113826394, + "learning_rate": 9.042913569916266e-09, + "loss": 0.0, + "num_input_tokens_seen": 132594024, + "step": 196775 + }, + { + "epoch": 4.807368138177021, + "grad_norm": 0.00044857600005343556, + "learning_rate": 9.03147469465193e-09, + "loss": 0.0, + "num_input_tokens_seen": 132597608, + "step": 196780 + }, + { + "epoch": 4.807490289008868, + "grad_norm": 0.001058773836120963, + "learning_rate": 9.020043025977253e-09, + "loss": 0.0, + "num_input_tokens_seen": 132601000, + "step": 196785 + }, + { + "epoch": 4.807612439840716, + "grad_norm": 0.0021331259049475193, + "learning_rate": 9.00861856397539e-09, + "loss": 0.0, + "num_input_tokens_seen": 132604584, + "step": 196790 + }, + { + "epoch": 4.807734590672562, + "grad_norm": 0.0007856449228711426, + "learning_rate": 8.997201308729385e-09, + "loss": 0.0, + "num_input_tokens_seen": 132607656, + "step": 196795 + }, + { + "epoch": 4.80785674150441, + "grad_norm": 2.964310442621354e-05, + "learning_rate": 8.985791260322283e-09, + "loss": 0.0, + "num_input_tokens_seen": 132611304, + "step": 196800 + }, + { + "epoch": 4.807978892336257, + "grad_norm": 0.001216082600876689, + "learning_rate": 8.97438841883713e-09, + "loss": 0.0, + "num_input_tokens_seen": 132614632, + "step": 196805 + }, + { + "epoch": 4.808101043168104, + "grad_norm": 5.0105814933776855, + "learning_rate": 8.962992784356749e-09, + "loss": 0.001, + "num_input_tokens_seen": 132618152, + "step": 196810 + }, + { + "epoch": 4.808223193999951, + "grad_norm": 0.0014316142769530416, + "learning_rate": 8.95160435696396e-09, + "loss": 0.0, + "num_input_tokens_seen": 132621544, + "step": 196815 + }, + { + "epoch": 4.808345344831798, + "grad_norm": 4.400141187943518e-05, + "learning_rate": 8.940223136741698e-09, + "loss": 0.0, + "num_input_tokens_seen": 132625128, + "step": 196820 + }, + { + "epoch": 4.8084674956636455, + "grad_norm": 8.361654181499034e-05, + "learning_rate": 8.928849123772674e-09, + "loss": 0.0, + "num_input_tokens_seen": 132628200, + "step": 196825 + }, + { + "epoch": 4.808589646495492, + "grad_norm": 0.0016611508326604962, + "learning_rate": 8.917482318139713e-09, + "loss": 0.0, + "num_input_tokens_seen": 132631848, + "step": 196830 + }, + { + "epoch": 4.80871179732734, + "grad_norm": 0.0007444024668075144, + "learning_rate": 8.906122719925302e-09, + "loss": 0.0, + "num_input_tokens_seen": 132634984, + "step": 196835 + }, + { + "epoch": 4.808833948159187, + "grad_norm": 0.000621675921138376, + "learning_rate": 8.894770329212154e-09, + "loss": 0.0, + "num_input_tokens_seen": 132638440, + "step": 196840 + }, + { + "epoch": 4.808956098991034, + "grad_norm": 0.005653452128171921, + "learning_rate": 8.883425146082868e-09, + "loss": 0.0, + "num_input_tokens_seen": 132641704, + "step": 196845 + }, + { + "epoch": 4.809078249822881, + "grad_norm": 0.00017273153935093433, + "learning_rate": 8.872087170619825e-09, + "loss": 0.0, + "num_input_tokens_seen": 132645288, + "step": 196850 + }, + { + "epoch": 4.809200400654729, + "grad_norm": 0.14124557375907898, + "learning_rate": 8.860756402905623e-09, + "loss": 0.0001, + "num_input_tokens_seen": 132648552, + "step": 196855 + }, + { + "epoch": 4.809322551486575, + "grad_norm": 0.00012055077240802348, + "learning_rate": 8.84943284302253e-09, + "loss": 0.0, + "num_input_tokens_seen": 132651368, + "step": 196860 + }, + { + "epoch": 4.809444702318423, + "grad_norm": 0.0024922320153564215, + "learning_rate": 8.838116491052927e-09, + "loss": 0.0, + "num_input_tokens_seen": 132654632, + "step": 196865 + }, + { + "epoch": 4.80956685315027, + "grad_norm": 0.0005170554504729807, + "learning_rate": 8.82680734707919e-09, + "loss": 0.0, + "num_input_tokens_seen": 132657960, + "step": 196870 + }, + { + "epoch": 4.809689003982117, + "grad_norm": 0.013109724968671799, + "learning_rate": 8.815505411183367e-09, + "loss": 0.0, + "num_input_tokens_seen": 132661288, + "step": 196875 + }, + { + "epoch": 4.809811154813964, + "grad_norm": 0.0007171641918830574, + "learning_rate": 8.804210683447944e-09, + "loss": 0.0, + "num_input_tokens_seen": 132664552, + "step": 196880 + }, + { + "epoch": 4.809933305645812, + "grad_norm": 2.0580151613103226e-05, + "learning_rate": 8.792923163954857e-09, + "loss": 0.0, + "num_input_tokens_seen": 132667624, + "step": 196885 + }, + { + "epoch": 4.8100554564776585, + "grad_norm": 0.0007847507367841899, + "learning_rate": 8.781642852786264e-09, + "loss": 0.0, + "num_input_tokens_seen": 132670888, + "step": 196890 + }, + { + "epoch": 4.810177607309506, + "grad_norm": 0.00033158838050439954, + "learning_rate": 8.770369750024099e-09, + "loss": 0.0, + "num_input_tokens_seen": 132674152, + "step": 196895 + }, + { + "epoch": 4.810299758141353, + "grad_norm": 0.00032024146639741957, + "learning_rate": 8.759103855750404e-09, + "loss": 0.0, + "num_input_tokens_seen": 132677160, + "step": 196900 + }, + { + "epoch": 4.8104219089732005, + "grad_norm": 0.00017308765382040292, + "learning_rate": 8.747845170047119e-09, + "loss": 0.0, + "num_input_tokens_seen": 132680360, + "step": 196905 + }, + { + "epoch": 4.810544059805047, + "grad_norm": 8.452088877675124e-06, + "learning_rate": 8.736593692996174e-09, + "loss": 0.0, + "num_input_tokens_seen": 132683944, + "step": 196910 + }, + { + "epoch": 4.810666210636894, + "grad_norm": 0.00010812583786901087, + "learning_rate": 8.725349424679396e-09, + "loss": 0.0, + "num_input_tokens_seen": 132687080, + "step": 196915 + }, + { + "epoch": 4.810788361468742, + "grad_norm": 0.0023433612659573555, + "learning_rate": 8.714112365178383e-09, + "loss": 0.0, + "num_input_tokens_seen": 132690344, + "step": 196920 + }, + { + "epoch": 4.810910512300588, + "grad_norm": 0.0006696759373880923, + "learning_rate": 8.702882514575072e-09, + "loss": 0.0, + "num_input_tokens_seen": 132693288, + "step": 196925 + }, + { + "epoch": 4.811032663132436, + "grad_norm": 2.063168358290568e-05, + "learning_rate": 8.691659872950951e-09, + "loss": 0.0, + "num_input_tokens_seen": 132696424, + "step": 196930 + }, + { + "epoch": 4.811154813964283, + "grad_norm": 0.0022210939787328243, + "learning_rate": 8.680444440387624e-09, + "loss": 0.0, + "num_input_tokens_seen": 132699304, + "step": 196935 + }, + { + "epoch": 4.81127696479613, + "grad_norm": 0.0010496609611436725, + "learning_rate": 8.669236216966913e-09, + "loss": 0.0, + "num_input_tokens_seen": 132702888, + "step": 196940 + }, + { + "epoch": 4.811399115627977, + "grad_norm": 0.00013030644913669676, + "learning_rate": 8.658035202770086e-09, + "loss": 0.0, + "num_input_tokens_seen": 132706728, + "step": 196945 + }, + { + "epoch": 4.811521266459825, + "grad_norm": 0.001109745935536921, + "learning_rate": 8.646841397878634e-09, + "loss": 0.0, + "num_input_tokens_seen": 132710248, + "step": 196950 + }, + { + "epoch": 4.8116434172916716, + "grad_norm": 0.00022087209799792618, + "learning_rate": 8.635654802374048e-09, + "loss": 0.0, + "num_input_tokens_seen": 132713640, + "step": 196955 + }, + { + "epoch": 4.811765568123519, + "grad_norm": 0.00040978117613121867, + "learning_rate": 8.624475416337596e-09, + "loss": 0.0, + "num_input_tokens_seen": 132717096, + "step": 196960 + }, + { + "epoch": 4.811887718955366, + "grad_norm": 0.0002552904188632965, + "learning_rate": 8.613303239850544e-09, + "loss": 0.0, + "num_input_tokens_seen": 132720552, + "step": 196965 + }, + { + "epoch": 4.812009869787214, + "grad_norm": 0.0022950470447540283, + "learning_rate": 8.602138272994274e-09, + "loss": 0.0, + "num_input_tokens_seen": 132723624, + "step": 196970 + }, + { + "epoch": 4.81213202061906, + "grad_norm": 0.0010927910916507244, + "learning_rate": 8.590980515849945e-09, + "loss": 0.0436, + "num_input_tokens_seen": 132727336, + "step": 196975 + }, + { + "epoch": 4.812254171450908, + "grad_norm": 8.401823288295418e-05, + "learning_rate": 8.579829968498486e-09, + "loss": 0.0, + "num_input_tokens_seen": 132730280, + "step": 196980 + }, + { + "epoch": 4.812376322282755, + "grad_norm": 0.0002500289410818368, + "learning_rate": 8.568686631021394e-09, + "loss": 0.0, + "num_input_tokens_seen": 132733288, + "step": 196985 + }, + { + "epoch": 4.812498473114601, + "grad_norm": 1.3336955817067064e-05, + "learning_rate": 8.557550503499378e-09, + "loss": 0.0, + "num_input_tokens_seen": 132736872, + "step": 196990 + }, + { + "epoch": 4.812620623946449, + "grad_norm": 7.999356967047788e-06, + "learning_rate": 8.546421586013486e-09, + "loss": 0.0, + "num_input_tokens_seen": 132740264, + "step": 196995 + }, + { + "epoch": 4.812742774778297, + "grad_norm": 0.000838687177747488, + "learning_rate": 8.535299878644653e-09, + "loss": 0.0, + "num_input_tokens_seen": 132743912, + "step": 197000 + }, + { + "epoch": 4.8128649256101435, + "grad_norm": 0.0011125325690954924, + "learning_rate": 8.524185381473815e-09, + "loss": 0.0, + "num_input_tokens_seen": 132747368, + "step": 197005 + }, + { + "epoch": 4.81298707644199, + "grad_norm": 0.0002906046574935317, + "learning_rate": 8.513078094581904e-09, + "loss": 0.0, + "num_input_tokens_seen": 132750376, + "step": 197010 + }, + { + "epoch": 4.813109227273838, + "grad_norm": 5.050322215538472e-05, + "learning_rate": 8.501978018049528e-09, + "loss": 0.0, + "num_input_tokens_seen": 132753640, + "step": 197015 + }, + { + "epoch": 4.813231378105685, + "grad_norm": 9.830085764406249e-05, + "learning_rate": 8.490885151957283e-09, + "loss": 0.0, + "num_input_tokens_seen": 132757736, + "step": 197020 + }, + { + "epoch": 4.813353528937532, + "grad_norm": 0.001108907745219767, + "learning_rate": 8.47979949638622e-09, + "loss": 0.0, + "num_input_tokens_seen": 132761000, + "step": 197025 + }, + { + "epoch": 4.813475679769379, + "grad_norm": 3.4584136301418766e-05, + "learning_rate": 8.468721051416606e-09, + "loss": 0.0, + "num_input_tokens_seen": 132764392, + "step": 197030 + }, + { + "epoch": 4.813597830601227, + "grad_norm": 0.001427344512194395, + "learning_rate": 8.457649817129153e-09, + "loss": 0.0, + "num_input_tokens_seen": 132767656, + "step": 197035 + }, + { + "epoch": 4.813719981433073, + "grad_norm": 0.00036199059104546905, + "learning_rate": 8.446585793604355e-09, + "loss": 0.0, + "num_input_tokens_seen": 132771048, + "step": 197040 + }, + { + "epoch": 4.813842132264921, + "grad_norm": 0.005828613881021738, + "learning_rate": 8.435528980922812e-09, + "loss": 0.0, + "num_input_tokens_seen": 132774312, + "step": 197045 + }, + { + "epoch": 4.813964283096768, + "grad_norm": 0.00045282530481927097, + "learning_rate": 8.424479379164684e-09, + "loss": 0.0, + "num_input_tokens_seen": 132777448, + "step": 197050 + }, + { + "epoch": 4.814086433928615, + "grad_norm": 0.00023736456932965666, + "learning_rate": 8.41343698841035e-09, + "loss": 0.0, + "num_input_tokens_seen": 132780584, + "step": 197055 + }, + { + "epoch": 4.814208584760462, + "grad_norm": 0.00018944533076137304, + "learning_rate": 8.402401808740411e-09, + "loss": 0.0, + "num_input_tokens_seen": 132784168, + "step": 197060 + }, + { + "epoch": 4.81433073559231, + "grad_norm": 0.0011267053196206689, + "learning_rate": 8.391373840234805e-09, + "loss": 0.0, + "num_input_tokens_seen": 132787624, + "step": 197065 + }, + { + "epoch": 4.8144528864241565, + "grad_norm": 0.00018865136371459812, + "learning_rate": 8.380353082973913e-09, + "loss": 0.0, + "num_input_tokens_seen": 132791144, + "step": 197070 + }, + { + "epoch": 4.814575037256004, + "grad_norm": 0.00015997131413314492, + "learning_rate": 8.369339537037668e-09, + "loss": 0.0, + "num_input_tokens_seen": 132794472, + "step": 197075 + }, + { + "epoch": 4.814697188087851, + "grad_norm": 0.012078426778316498, + "learning_rate": 8.358333202506451e-09, + "loss": 0.0, + "num_input_tokens_seen": 132797992, + "step": 197080 + }, + { + "epoch": 4.814819338919698, + "grad_norm": 0.00041595305083319545, + "learning_rate": 8.347334079459978e-09, + "loss": 0.0, + "num_input_tokens_seen": 132801512, + "step": 197085 + }, + { + "epoch": 4.814941489751545, + "grad_norm": 0.00453962991014123, + "learning_rate": 8.336342167978516e-09, + "loss": 0.0, + "num_input_tokens_seen": 132805032, + "step": 197090 + }, + { + "epoch": 4.815063640583393, + "grad_norm": 5.149428761797026e-05, + "learning_rate": 8.325357468142002e-09, + "loss": 0.0, + "num_input_tokens_seen": 132808744, + "step": 197095 + }, + { + "epoch": 4.81518579141524, + "grad_norm": 8.893240192264784e-06, + "learning_rate": 8.31437998003004e-09, + "loss": 0.0012, + "num_input_tokens_seen": 132812136, + "step": 197100 + }, + { + "epoch": 4.815307942247086, + "grad_norm": 0.001375921769067645, + "learning_rate": 8.303409703722786e-09, + "loss": 0.0, + "num_input_tokens_seen": 132815848, + "step": 197105 + }, + { + "epoch": 4.815430093078934, + "grad_norm": 5.135929677635431e-05, + "learning_rate": 8.292446639299732e-09, + "loss": 0.0, + "num_input_tokens_seen": 132818920, + "step": 197110 + }, + { + "epoch": 4.815552243910781, + "grad_norm": 0.10179103910923004, + "learning_rate": 8.281490786840927e-09, + "loss": 0.0006, + "num_input_tokens_seen": 132822312, + "step": 197115 + }, + { + "epoch": 4.815674394742628, + "grad_norm": 0.0015505808405578136, + "learning_rate": 8.270542146425751e-09, + "loss": 0.0, + "num_input_tokens_seen": 132826152, + "step": 197120 + }, + { + "epoch": 4.815796545574475, + "grad_norm": 2.3445218175766058e-05, + "learning_rate": 8.25960071813392e-09, + "loss": 0.0, + "num_input_tokens_seen": 132829672, + "step": 197125 + }, + { + "epoch": 4.815918696406323, + "grad_norm": 0.002743740798905492, + "learning_rate": 8.248666502045032e-09, + "loss": 0.0, + "num_input_tokens_seen": 132832552, + "step": 197130 + }, + { + "epoch": 4.8160408472381695, + "grad_norm": 3.8246111216722056e-05, + "learning_rate": 8.237739498238582e-09, + "loss": 0.0, + "num_input_tokens_seen": 132835880, + "step": 197135 + }, + { + "epoch": 4.816162998070017, + "grad_norm": 0.0019803382456302643, + "learning_rate": 8.226819706794063e-09, + "loss": 0.0, + "num_input_tokens_seen": 132839080, + "step": 197140 + }, + { + "epoch": 4.816285148901864, + "grad_norm": 0.0012779106618836522, + "learning_rate": 8.215907127790856e-09, + "loss": 0.0, + "num_input_tokens_seen": 132842536, + "step": 197145 + }, + { + "epoch": 4.8164072997337115, + "grad_norm": 5.748857438447885e-05, + "learning_rate": 8.205001761308228e-09, + "loss": 0.0, + "num_input_tokens_seen": 132845736, + "step": 197150 + }, + { + "epoch": 4.816529450565558, + "grad_norm": 2.8861688406323083e-05, + "learning_rate": 8.194103607425784e-09, + "loss": 0.0, + "num_input_tokens_seen": 132849192, + "step": 197155 + }, + { + "epoch": 4.816651601397406, + "grad_norm": 0.02581091783940792, + "learning_rate": 8.183212666222461e-09, + "loss": 0.0, + "num_input_tokens_seen": 132851944, + "step": 197160 + }, + { + "epoch": 4.816773752229253, + "grad_norm": 3.863290839944966e-05, + "learning_rate": 8.172328937777639e-09, + "loss": 0.0, + "num_input_tokens_seen": 132855272, + "step": 197165 + }, + { + "epoch": 4.8168959030611, + "grad_norm": 5.909180254093371e-05, + "learning_rate": 8.161452422170367e-09, + "loss": 0.0, + "num_input_tokens_seen": 132858408, + "step": 197170 + }, + { + "epoch": 4.817018053892947, + "grad_norm": 0.00025937153259292245, + "learning_rate": 8.150583119479803e-09, + "loss": 0.0, + "num_input_tokens_seen": 132861736, + "step": 197175 + }, + { + "epoch": 4.817140204724794, + "grad_norm": 0.0043001617304980755, + "learning_rate": 8.139721029784996e-09, + "loss": 0.0, + "num_input_tokens_seen": 132865256, + "step": 197180 + }, + { + "epoch": 4.817262355556641, + "grad_norm": 7.975584594532847e-05, + "learning_rate": 8.12886615316477e-09, + "loss": 0.0, + "num_input_tokens_seen": 132868712, + "step": 197185 + }, + { + "epoch": 4.817384506388488, + "grad_norm": 0.0008708810200914741, + "learning_rate": 8.118018489698396e-09, + "loss": 0.0, + "num_input_tokens_seen": 132871720, + "step": 197190 + }, + { + "epoch": 4.817506657220336, + "grad_norm": 0.0014090074691921473, + "learning_rate": 8.10717803946448e-09, + "loss": 0.0, + "num_input_tokens_seen": 132874856, + "step": 197195 + }, + { + "epoch": 4.8176288080521825, + "grad_norm": 0.012226647697389126, + "learning_rate": 8.096344802542066e-09, + "loss": 0.0, + "num_input_tokens_seen": 132878184, + "step": 197200 + }, + { + "epoch": 4.81775095888403, + "grad_norm": 8.560111382394098e-06, + "learning_rate": 8.085518779009648e-09, + "loss": 0.0, + "num_input_tokens_seen": 132881448, + "step": 197205 + }, + { + "epoch": 4.817873109715877, + "grad_norm": 0.0011228991206735373, + "learning_rate": 8.074699968946275e-09, + "loss": 0.0, + "num_input_tokens_seen": 132885288, + "step": 197210 + }, + { + "epoch": 4.8179952605477245, + "grad_norm": 0.0005283401696942747, + "learning_rate": 8.063888372430439e-09, + "loss": 0.0, + "num_input_tokens_seen": 132889256, + "step": 197215 + }, + { + "epoch": 4.818117411379571, + "grad_norm": 0.0003394389059394598, + "learning_rate": 8.053083989540743e-09, + "loss": 0.0, + "num_input_tokens_seen": 132892712, + "step": 197220 + }, + { + "epoch": 4.818239562211419, + "grad_norm": 1.3488976037479006e-05, + "learning_rate": 8.042286820355903e-09, + "loss": 0.0, + "num_input_tokens_seen": 132895976, + "step": 197225 + }, + { + "epoch": 4.818361713043266, + "grad_norm": 0.02327442169189453, + "learning_rate": 8.031496864954302e-09, + "loss": 0.0, + "num_input_tokens_seen": 132898792, + "step": 197230 + }, + { + "epoch": 4.818483863875113, + "grad_norm": 6.318982195807621e-05, + "learning_rate": 8.020714123414541e-09, + "loss": 0.0, + "num_input_tokens_seen": 132902248, + "step": 197235 + }, + { + "epoch": 4.81860601470696, + "grad_norm": 0.0002425254788249731, + "learning_rate": 8.009938595814892e-09, + "loss": 0.0, + "num_input_tokens_seen": 132905640, + "step": 197240 + }, + { + "epoch": 4.818728165538808, + "grad_norm": 0.00015229900600388646, + "learning_rate": 7.999170282233736e-09, + "loss": 0.0, + "num_input_tokens_seen": 132909032, + "step": 197245 + }, + { + "epoch": 4.818850316370654, + "grad_norm": 0.00026307988446205854, + "learning_rate": 7.988409182749567e-09, + "loss": 0.0, + "num_input_tokens_seen": 132912232, + "step": 197250 + }, + { + "epoch": 4.818972467202501, + "grad_norm": 6.763396231690422e-05, + "learning_rate": 7.977655297440433e-09, + "loss": 0.0, + "num_input_tokens_seen": 132915752, + "step": 197255 + }, + { + "epoch": 4.819094618034349, + "grad_norm": 0.00016935997700784355, + "learning_rate": 7.966908626384605e-09, + "loss": 0.0, + "num_input_tokens_seen": 132918824, + "step": 197260 + }, + { + "epoch": 4.819216768866196, + "grad_norm": 0.00012209487613290548, + "learning_rate": 7.956169169660242e-09, + "loss": 0.0, + "num_input_tokens_seen": 132922024, + "step": 197265 + }, + { + "epoch": 4.819338919698043, + "grad_norm": 4.2448813474038616e-05, + "learning_rate": 7.945436927345395e-09, + "loss": 0.0, + "num_input_tokens_seen": 132925736, + "step": 197270 + }, + { + "epoch": 4.81946107052989, + "grad_norm": 0.0004389749956317246, + "learning_rate": 7.93471189951822e-09, + "loss": 0.0, + "num_input_tokens_seen": 132929576, + "step": 197275 + }, + { + "epoch": 4.819583221361738, + "grad_norm": 0.0021197644528001547, + "learning_rate": 7.923994086256657e-09, + "loss": 0.0, + "num_input_tokens_seen": 132932776, + "step": 197280 + }, + { + "epoch": 4.819705372193584, + "grad_norm": 2.7764235710492358e-05, + "learning_rate": 7.913283487638645e-09, + "loss": 0.0, + "num_input_tokens_seen": 132936104, + "step": 197285 + }, + { + "epoch": 4.819827523025432, + "grad_norm": 6.35007891105488e-05, + "learning_rate": 7.902580103742008e-09, + "loss": 0.0, + "num_input_tokens_seen": 132939816, + "step": 197290 + }, + { + "epoch": 4.819949673857279, + "grad_norm": 0.0019520074129104614, + "learning_rate": 7.891883934644794e-09, + "loss": 0.0, + "num_input_tokens_seen": 132943336, + "step": 197295 + }, + { + "epoch": 4.820071824689126, + "grad_norm": 0.0001843513164203614, + "learning_rate": 7.8811949804245e-09, + "loss": 0.0, + "num_input_tokens_seen": 132946536, + "step": 197300 + }, + { + "epoch": 4.820193975520973, + "grad_norm": 0.0003039787115994841, + "learning_rate": 7.87051324115906e-09, + "loss": 0.0, + "num_input_tokens_seen": 132949864, + "step": 197305 + }, + { + "epoch": 4.820316126352821, + "grad_norm": 0.009050632826983929, + "learning_rate": 7.859838716926081e-09, + "loss": 0.0, + "num_input_tokens_seen": 132953064, + "step": 197310 + }, + { + "epoch": 4.820438277184667, + "grad_norm": 0.004172494634985924, + "learning_rate": 7.849171407803168e-09, + "loss": 0.0, + "num_input_tokens_seen": 132956008, + "step": 197315 + }, + { + "epoch": 4.820560428016515, + "grad_norm": 0.003063405863940716, + "learning_rate": 7.838511313868035e-09, + "loss": 0.0, + "num_input_tokens_seen": 132959400, + "step": 197320 + }, + { + "epoch": 4.820682578848362, + "grad_norm": 0.0004874825826846063, + "learning_rate": 7.827858435198176e-09, + "loss": 0.0, + "num_input_tokens_seen": 132962728, + "step": 197325 + }, + { + "epoch": 4.8208047296802095, + "grad_norm": 0.002249550772830844, + "learning_rate": 7.817212771870863e-09, + "loss": 0.0, + "num_input_tokens_seen": 132966312, + "step": 197330 + }, + { + "epoch": 4.820926880512056, + "grad_norm": 8.849135338095948e-05, + "learning_rate": 7.806574323963699e-09, + "loss": 0.0, + "num_input_tokens_seen": 132970024, + "step": 197335 + }, + { + "epoch": 4.821049031343904, + "grad_norm": 3.668137651402503e-05, + "learning_rate": 7.795943091553847e-09, + "loss": 0.0, + "num_input_tokens_seen": 132973224, + "step": 197340 + }, + { + "epoch": 4.821171182175751, + "grad_norm": 0.0004503615782596171, + "learning_rate": 7.78531907471891e-09, + "loss": 0.0, + "num_input_tokens_seen": 132976680, + "step": 197345 + }, + { + "epoch": 4.821293333007597, + "grad_norm": 2.815644438669551e-05, + "learning_rate": 7.774702273535937e-09, + "loss": 0.0, + "num_input_tokens_seen": 132980264, + "step": 197350 + }, + { + "epoch": 4.821415483839445, + "grad_norm": 0.0013015008298680186, + "learning_rate": 7.764092688082313e-09, + "loss": 0.0, + "num_input_tokens_seen": 132984040, + "step": 197355 + }, + { + "epoch": 4.821537634671293, + "grad_norm": 0.001261149882338941, + "learning_rate": 7.753490318434975e-09, + "loss": 0.0, + "num_input_tokens_seen": 132987560, + "step": 197360 + }, + { + "epoch": 4.821659785503139, + "grad_norm": 0.05001718923449516, + "learning_rate": 7.742895164671303e-09, + "loss": 0.0, + "num_input_tokens_seen": 132990952, + "step": 197365 + }, + { + "epoch": 4.821781936334986, + "grad_norm": 0.00034191334270872176, + "learning_rate": 7.732307226868017e-09, + "loss": 0.0, + "num_input_tokens_seen": 132993960, + "step": 197370 + }, + { + "epoch": 4.821904087166834, + "grad_norm": 0.0001335551933152601, + "learning_rate": 7.721726505102277e-09, + "loss": 0.0004, + "num_input_tokens_seen": 132997288, + "step": 197375 + }, + { + "epoch": 4.8220262379986805, + "grad_norm": 0.00773590337485075, + "learning_rate": 7.711152999451132e-09, + "loss": 0.0, + "num_input_tokens_seen": 133001192, + "step": 197380 + }, + { + "epoch": 4.822148388830528, + "grad_norm": 9.39232631935738e-05, + "learning_rate": 7.700586709991297e-09, + "loss": 0.0001, + "num_input_tokens_seen": 133004520, + "step": 197385 + }, + { + "epoch": 4.822270539662375, + "grad_norm": 0.0005778432823717594, + "learning_rate": 7.690027636799712e-09, + "loss": 0.0, + "num_input_tokens_seen": 133008232, + "step": 197390 + }, + { + "epoch": 4.8223926904942225, + "grad_norm": 0.008273705840110779, + "learning_rate": 7.679475779953093e-09, + "loss": 0.0, + "num_input_tokens_seen": 133011432, + "step": 197395 + }, + { + "epoch": 4.822514841326069, + "grad_norm": 0.00024316016060765833, + "learning_rate": 7.668931139528267e-09, + "loss": 0.0, + "num_input_tokens_seen": 133015080, + "step": 197400 + }, + { + "epoch": 4.822636992157917, + "grad_norm": 0.0011960206320509315, + "learning_rate": 7.658393715601951e-09, + "loss": 0.0, + "num_input_tokens_seen": 133018792, + "step": 197405 + }, + { + "epoch": 4.822759142989764, + "grad_norm": 0.0014048997545614839, + "learning_rate": 7.64786350825064e-09, + "loss": 0.0, + "num_input_tokens_seen": 133022248, + "step": 197410 + }, + { + "epoch": 4.822881293821611, + "grad_norm": 2.924349610111676e-05, + "learning_rate": 7.637340517551049e-09, + "loss": 0.0, + "num_input_tokens_seen": 133025896, + "step": 197415 + }, + { + "epoch": 4.823003444653458, + "grad_norm": 3.692340760608204e-05, + "learning_rate": 7.626824743579564e-09, + "loss": 0.0, + "num_input_tokens_seen": 133029864, + "step": 197420 + }, + { + "epoch": 4.823125595485306, + "grad_norm": 5.631545354845002e-05, + "learning_rate": 7.616316186412675e-09, + "loss": 0.0, + "num_input_tokens_seen": 133033448, + "step": 197425 + }, + { + "epoch": 4.823247746317152, + "grad_norm": 0.00086938840104267, + "learning_rate": 7.60581484612699e-09, + "loss": 0.0, + "num_input_tokens_seen": 133037224, + "step": 197430 + }, + { + "epoch": 4.823369897149, + "grad_norm": 2.341336767130997e-05, + "learning_rate": 7.59532072279867e-09, + "loss": 0.0, + "num_input_tokens_seen": 133041000, + "step": 197435 + }, + { + "epoch": 4.823492047980847, + "grad_norm": 0.0006846496253274381, + "learning_rate": 7.5848338165041e-09, + "loss": 0.0, + "num_input_tokens_seen": 133044392, + "step": 197440 + }, + { + "epoch": 4.8236141988126935, + "grad_norm": 0.00022248385357670486, + "learning_rate": 7.574354127319548e-09, + "loss": 0.0, + "num_input_tokens_seen": 133047976, + "step": 197445 + }, + { + "epoch": 4.823736349644541, + "grad_norm": 7.169241143856198e-05, + "learning_rate": 7.56388165532118e-09, + "loss": 0.0, + "num_input_tokens_seen": 133051240, + "step": 197450 + }, + { + "epoch": 4.823858500476388, + "grad_norm": 0.0003167959803249687, + "learning_rate": 7.553416400585267e-09, + "loss": 0.0, + "num_input_tokens_seen": 133054504, + "step": 197455 + }, + { + "epoch": 4.8239806513082355, + "grad_norm": 3.2362113415729254e-05, + "learning_rate": 7.542958363187746e-09, + "loss": 0.0, + "num_input_tokens_seen": 133057896, + "step": 197460 + }, + { + "epoch": 4.824102802140082, + "grad_norm": 0.000935925985686481, + "learning_rate": 7.532507543204891e-09, + "loss": 0.0, + "num_input_tokens_seen": 133060904, + "step": 197465 + }, + { + "epoch": 4.82422495297193, + "grad_norm": 1.3999424481880851e-05, + "learning_rate": 7.522063940712531e-09, + "loss": 0.0, + "num_input_tokens_seen": 133064296, + "step": 197470 + }, + { + "epoch": 4.824347103803777, + "grad_norm": 0.00016260526899714023, + "learning_rate": 7.511627555786715e-09, + "loss": 0.0, + "num_input_tokens_seen": 133067624, + "step": 197475 + }, + { + "epoch": 4.824469254635624, + "grad_norm": 4.1454304664512165e-06, + "learning_rate": 7.50119838850316e-09, + "loss": 0.0, + "num_input_tokens_seen": 133071720, + "step": 197480 + }, + { + "epoch": 4.824591405467471, + "grad_norm": 4.6204342652345076e-05, + "learning_rate": 7.490776438937918e-09, + "loss": 0.0, + "num_input_tokens_seen": 133075304, + "step": 197485 + }, + { + "epoch": 4.824713556299319, + "grad_norm": 0.00016602440155111253, + "learning_rate": 7.480361707166705e-09, + "loss": 0.0, + "num_input_tokens_seen": 133078568, + "step": 197490 + }, + { + "epoch": 4.824835707131165, + "grad_norm": 0.00016575964400544763, + "learning_rate": 7.469954193265238e-09, + "loss": 0.0, + "num_input_tokens_seen": 133082024, + "step": 197495 + }, + { + "epoch": 4.824957857963013, + "grad_norm": 0.0069565181620419025, + "learning_rate": 7.459553897309346e-09, + "loss": 0.0, + "num_input_tokens_seen": 133085544, + "step": 197500 + }, + { + "epoch": 4.82508000879486, + "grad_norm": 0.0015840993728488684, + "learning_rate": 7.4491608193744115e-09, + "loss": 0.0, + "num_input_tokens_seen": 133088680, + "step": 197505 + }, + { + "epoch": 4.825202159626707, + "grad_norm": 8.865137351676822e-05, + "learning_rate": 7.438774959536154e-09, + "loss": 0.0, + "num_input_tokens_seen": 133091752, + "step": 197510 + }, + { + "epoch": 4.825324310458554, + "grad_norm": 0.0001753615797497332, + "learning_rate": 7.428396317870067e-09, + "loss": 0.0, + "num_input_tokens_seen": 133094888, + "step": 197515 + }, + { + "epoch": 4.825446461290401, + "grad_norm": 2.2867747247801162e-05, + "learning_rate": 7.4180248944517575e-09, + "loss": 0.0, + "num_input_tokens_seen": 133098152, + "step": 197520 + }, + { + "epoch": 4.8255686121222485, + "grad_norm": 0.0006337054655887187, + "learning_rate": 7.407660689356388e-09, + "loss": 0.0, + "num_input_tokens_seen": 133101224, + "step": 197525 + }, + { + "epoch": 4.825690762954096, + "grad_norm": 0.004857445135712624, + "learning_rate": 7.397303702659674e-09, + "loss": 0.0, + "num_input_tokens_seen": 133104744, + "step": 197530 + }, + { + "epoch": 4.825812913785943, + "grad_norm": 0.0017321788473054767, + "learning_rate": 7.3869539344365575e-09, + "loss": 0.0, + "num_input_tokens_seen": 133108072, + "step": 197535 + }, + { + "epoch": 4.82593506461779, + "grad_norm": 0.0005325916572473943, + "learning_rate": 7.376611384762643e-09, + "loss": 0.0, + "num_input_tokens_seen": 133111784, + "step": 197540 + }, + { + "epoch": 4.826057215449637, + "grad_norm": 5.844299084856175e-05, + "learning_rate": 7.366276053712983e-09, + "loss": 0.0001, + "num_input_tokens_seen": 133115176, + "step": 197545 + }, + { + "epoch": 4.826179366281484, + "grad_norm": 0.0008696087752468884, + "learning_rate": 7.355947941362628e-09, + "loss": 0.0, + "num_input_tokens_seen": 133118312, + "step": 197550 + }, + { + "epoch": 4.826301517113332, + "grad_norm": 0.00013190713070798665, + "learning_rate": 7.345627047786851e-09, + "loss": 0.0, + "num_input_tokens_seen": 133121960, + "step": 197555 + }, + { + "epoch": 4.826423667945178, + "grad_norm": 0.0003237095370423049, + "learning_rate": 7.335313373060703e-09, + "loss": 0.0, + "num_input_tokens_seen": 133125096, + "step": 197560 + }, + { + "epoch": 4.826545818777026, + "grad_norm": 1.4504607861454133e-05, + "learning_rate": 7.325006917259124e-09, + "loss": 0.0, + "num_input_tokens_seen": 133128488, + "step": 197565 + }, + { + "epoch": 4.826667969608873, + "grad_norm": 0.00011755106970667839, + "learning_rate": 7.3147076804571665e-09, + "loss": 0.0, + "num_input_tokens_seen": 133132072, + "step": 197570 + }, + { + "epoch": 4.82679012044072, + "grad_norm": 0.0031064345967024565, + "learning_rate": 7.304415662729546e-09, + "loss": 0.0, + "num_input_tokens_seen": 133135400, + "step": 197575 + }, + { + "epoch": 4.826912271272567, + "grad_norm": 0.00012528127990663052, + "learning_rate": 7.294130864151315e-09, + "loss": 0.0, + "num_input_tokens_seen": 133138856, + "step": 197580 + }, + { + "epoch": 4.827034422104415, + "grad_norm": 0.0107027068734169, + "learning_rate": 7.2838532847971926e-09, + "loss": 0.0, + "num_input_tokens_seen": 133141992, + "step": 197585 + }, + { + "epoch": 4.8271565729362615, + "grad_norm": 0.018038859590888023, + "learning_rate": 7.273582924741783e-09, + "loss": 0.0, + "num_input_tokens_seen": 133145256, + "step": 197590 + }, + { + "epoch": 4.827278723768109, + "grad_norm": 0.000366037042113021, + "learning_rate": 7.263319784059918e-09, + "loss": 0.0, + "num_input_tokens_seen": 133148712, + "step": 197595 + }, + { + "epoch": 4.827400874599956, + "grad_norm": 0.01911904290318489, + "learning_rate": 7.253063862826203e-09, + "loss": 0.0, + "num_input_tokens_seen": 133152104, + "step": 197600 + }, + { + "epoch": 4.827523025431804, + "grad_norm": 0.0010948879644274712, + "learning_rate": 7.242815161115246e-09, + "loss": 0.0, + "num_input_tokens_seen": 133155432, + "step": 197605 + }, + { + "epoch": 4.82764517626365, + "grad_norm": 0.001213407376781106, + "learning_rate": 7.232573679001541e-09, + "loss": 0.0256, + "num_input_tokens_seen": 133158760, + "step": 197610 + }, + { + "epoch": 4.827767327095497, + "grad_norm": 0.00012105743371648714, + "learning_rate": 7.222339416559587e-09, + "loss": 0.0762, + "num_input_tokens_seen": 133163176, + "step": 197615 + }, + { + "epoch": 4.827889477927345, + "grad_norm": 4.5139000576455146e-05, + "learning_rate": 7.212112373863877e-09, + "loss": 0.0001, + "num_input_tokens_seen": 133166312, + "step": 197620 + }, + { + "epoch": 4.828011628759192, + "grad_norm": 0.00018658023327589035, + "learning_rate": 7.201892550988686e-09, + "loss": 0.0, + "num_input_tokens_seen": 133169640, + "step": 197625 + }, + { + "epoch": 4.828133779591039, + "grad_norm": 0.00011530852498253807, + "learning_rate": 7.191679948008289e-09, + "loss": 0.0, + "num_input_tokens_seen": 133173224, + "step": 197630 + }, + { + "epoch": 4.828255930422886, + "grad_norm": 7.312805246328935e-05, + "learning_rate": 7.1814745649971805e-09, + "loss": 0.0, + "num_input_tokens_seen": 133176680, + "step": 197635 + }, + { + "epoch": 4.8283780812547334, + "grad_norm": 0.00026635496760718524, + "learning_rate": 7.171276402029191e-09, + "loss": 0.0, + "num_input_tokens_seen": 133179496, + "step": 197640 + }, + { + "epoch": 4.82850023208658, + "grad_norm": 0.00010877988825086504, + "learning_rate": 7.161085459178928e-09, + "loss": 0.0001, + "num_input_tokens_seen": 133182760, + "step": 197645 + }, + { + "epoch": 4.828622382918428, + "grad_norm": 6.915336416568607e-05, + "learning_rate": 7.150901736520221e-09, + "loss": 0.0, + "num_input_tokens_seen": 133186216, + "step": 197650 + }, + { + "epoch": 4.828744533750275, + "grad_norm": 0.0005507735768333077, + "learning_rate": 7.140725234127231e-09, + "loss": 0.0, + "num_input_tokens_seen": 133189416, + "step": 197655 + }, + { + "epoch": 4.828866684582122, + "grad_norm": 5.71970667806454e-05, + "learning_rate": 7.130555952073792e-09, + "loss": 0.0, + "num_input_tokens_seen": 133193704, + "step": 197660 + }, + { + "epoch": 4.828988835413969, + "grad_norm": 3.259177537984215e-05, + "learning_rate": 7.120393890434173e-09, + "loss": 0.0, + "num_input_tokens_seen": 133197992, + "step": 197665 + }, + { + "epoch": 4.829110986245817, + "grad_norm": 0.002456663642078638, + "learning_rate": 7.1102390492819855e-09, + "loss": 0.0, + "num_input_tokens_seen": 133201640, + "step": 197670 + }, + { + "epoch": 4.829233137077663, + "grad_norm": 0.0064817629754543304, + "learning_rate": 7.100091428691279e-09, + "loss": 0.0, + "num_input_tokens_seen": 133205544, + "step": 197675 + }, + { + "epoch": 4.829355287909511, + "grad_norm": 6.728667358402163e-05, + "learning_rate": 7.089951028735663e-09, + "loss": 0.0, + "num_input_tokens_seen": 133208872, + "step": 197680 + }, + { + "epoch": 4.829477438741358, + "grad_norm": 4.01813886128366e-05, + "learning_rate": 7.079817849489078e-09, + "loss": 0.0, + "num_input_tokens_seen": 133212008, + "step": 197685 + }, + { + "epoch": 4.829599589573205, + "grad_norm": 0.002144238678738475, + "learning_rate": 7.069691891025132e-09, + "loss": 0.0, + "num_input_tokens_seen": 133215464, + "step": 197690 + }, + { + "epoch": 4.829721740405052, + "grad_norm": 0.0005259595345705748, + "learning_rate": 7.05957315341732e-09, + "loss": 0.0, + "num_input_tokens_seen": 133218536, + "step": 197695 + }, + { + "epoch": 4.8298438912369, + "grad_norm": 0.031700070947408676, + "learning_rate": 7.049461636739473e-09, + "loss": 0.0, + "num_input_tokens_seen": 133221544, + "step": 197700 + }, + { + "epoch": 4.8299660420687465, + "grad_norm": 6.699377263430506e-05, + "learning_rate": 7.039357341064978e-09, + "loss": 0.0, + "num_input_tokens_seen": 133224744, + "step": 197705 + }, + { + "epoch": 4.830088192900593, + "grad_norm": 3.650726648629643e-05, + "learning_rate": 7.0292602664673295e-09, + "loss": 0.0, + "num_input_tokens_seen": 133227944, + "step": 197710 + }, + { + "epoch": 4.830210343732441, + "grad_norm": 0.0007697915425524116, + "learning_rate": 7.019170413020026e-09, + "loss": 0.0174, + "num_input_tokens_seen": 133231784, + "step": 197715 + }, + { + "epoch": 4.8303324945642885, + "grad_norm": 9.95201407931745e-05, + "learning_rate": 7.009087780796452e-09, + "loss": 0.0, + "num_input_tokens_seen": 133235112, + "step": 197720 + }, + { + "epoch": 4.830454645396135, + "grad_norm": 0.00124736491125077, + "learning_rate": 6.999012369869773e-09, + "loss": 0.0, + "num_input_tokens_seen": 133238632, + "step": 197725 + }, + { + "epoch": 4.830576796227982, + "grad_norm": 0.00044174346840009093, + "learning_rate": 6.988944180313372e-09, + "loss": 0.0, + "num_input_tokens_seen": 133241896, + "step": 197730 + }, + { + "epoch": 4.83069894705983, + "grad_norm": 0.0006960714235901833, + "learning_rate": 6.978883212200526e-09, + "loss": 0.0, + "num_input_tokens_seen": 133244904, + "step": 197735 + }, + { + "epoch": 4.830821097891676, + "grad_norm": 0.009272739291191101, + "learning_rate": 6.968829465604287e-09, + "loss": 0.0, + "num_input_tokens_seen": 133247656, + "step": 197740 + }, + { + "epoch": 4.830943248723524, + "grad_norm": 4.248324330546893e-05, + "learning_rate": 6.9587829405978184e-09, + "loss": 0.0, + "num_input_tokens_seen": 133250984, + "step": 197745 + }, + { + "epoch": 4.831065399555371, + "grad_norm": 0.0006277945940382779, + "learning_rate": 6.948743637254173e-09, + "loss": 0.0, + "num_input_tokens_seen": 133255208, + "step": 197750 + }, + { + "epoch": 4.831187550387218, + "grad_norm": 0.00044739150325767696, + "learning_rate": 6.938711555646293e-09, + "loss": 0.0, + "num_input_tokens_seen": 133258600, + "step": 197755 + }, + { + "epoch": 4.831309701219065, + "grad_norm": 4.2511983338044956e-05, + "learning_rate": 6.928686695847341e-09, + "loss": 0.0009, + "num_input_tokens_seen": 133262120, + "step": 197760 + }, + { + "epoch": 4.831431852050913, + "grad_norm": 3.650227881735191e-05, + "learning_rate": 6.918669057929927e-09, + "loss": 0.0, + "num_input_tokens_seen": 133265448, + "step": 197765 + }, + { + "epoch": 4.8315540028827595, + "grad_norm": 0.0002340917126275599, + "learning_rate": 6.908658641967102e-09, + "loss": 0.0, + "num_input_tokens_seen": 133268584, + "step": 197770 + }, + { + "epoch": 4.831676153714607, + "grad_norm": 0.003371995175257325, + "learning_rate": 6.8986554480316985e-09, + "loss": 0.0, + "num_input_tokens_seen": 133271848, + "step": 197775 + }, + { + "epoch": 4.831798304546454, + "grad_norm": 0.03181646019220352, + "learning_rate": 6.888659476196323e-09, + "loss": 0.0, + "num_input_tokens_seen": 133275880, + "step": 197780 + }, + { + "epoch": 4.8319204553783015, + "grad_norm": 0.000140202566399239, + "learning_rate": 6.878670726533808e-09, + "loss": 0.0, + "num_input_tokens_seen": 133279656, + "step": 197785 + }, + { + "epoch": 4.832042606210148, + "grad_norm": 0.00034241325920447707, + "learning_rate": 6.868689199116651e-09, + "loss": 0.0, + "num_input_tokens_seen": 133283112, + "step": 197790 + }, + { + "epoch": 4.832164757041996, + "grad_norm": 0.0015882498119026423, + "learning_rate": 6.85871489401757e-09, + "loss": 0.0, + "num_input_tokens_seen": 133285992, + "step": 197795 + }, + { + "epoch": 4.832286907873843, + "grad_norm": 0.00044084640103392303, + "learning_rate": 6.8487478113089524e-09, + "loss": 0.0, + "num_input_tokens_seen": 133289512, + "step": 197800 + }, + { + "epoch": 4.832409058705689, + "grad_norm": 5.1393995818216354e-05, + "learning_rate": 6.838787951063407e-09, + "loss": 0.0, + "num_input_tokens_seen": 133292712, + "step": 197805 + }, + { + "epoch": 4.832531209537537, + "grad_norm": 0.0006298979860730469, + "learning_rate": 6.8288353133533205e-09, + "loss": 0.0, + "num_input_tokens_seen": 133296360, + "step": 197810 + }, + { + "epoch": 4.832653360369384, + "grad_norm": 0.004993734881281853, + "learning_rate": 6.818889898250968e-09, + "loss": 0.0, + "num_input_tokens_seen": 133299752, + "step": 197815 + }, + { + "epoch": 4.832775511201231, + "grad_norm": 215.03285217285156, + "learning_rate": 6.8089517058289584e-09, + "loss": 0.0019, + "num_input_tokens_seen": 133303272, + "step": 197820 + }, + { + "epoch": 4.832897662033078, + "grad_norm": 2.1080935766804032e-05, + "learning_rate": 6.7990207361593445e-09, + "loss": 0.0, + "num_input_tokens_seen": 133306664, + "step": 197825 + }, + { + "epoch": 4.833019812864926, + "grad_norm": 0.0006089547532610595, + "learning_rate": 6.789096989314291e-09, + "loss": 0.0, + "num_input_tokens_seen": 133310312, + "step": 197830 + }, + { + "epoch": 4.8331419636967725, + "grad_norm": 0.003080077702179551, + "learning_rate": 6.7791804653661855e-09, + "loss": 0.0, + "num_input_tokens_seen": 133313960, + "step": 197835 + }, + { + "epoch": 4.83326411452862, + "grad_norm": 8.677435107529163e-05, + "learning_rate": 6.769271164386969e-09, + "loss": 0.0, + "num_input_tokens_seen": 133316968, + "step": 197840 + }, + { + "epoch": 4.833386265360467, + "grad_norm": 0.0014505936997011304, + "learning_rate": 6.759369086448696e-09, + "loss": 0.0001, + "num_input_tokens_seen": 133319848, + "step": 197845 + }, + { + "epoch": 4.8335084161923145, + "grad_norm": 5.86692440265324e-05, + "learning_rate": 6.749474231623531e-09, + "loss": 0.0, + "num_input_tokens_seen": 133323048, + "step": 197850 + }, + { + "epoch": 4.833630567024161, + "grad_norm": 0.00010189796012127772, + "learning_rate": 6.739586599983416e-09, + "loss": 0.0, + "num_input_tokens_seen": 133326184, + "step": 197855 + }, + { + "epoch": 4.833752717856009, + "grad_norm": 6.377408226398984e-06, + "learning_rate": 6.7297061916000706e-09, + "loss": 0.0, + "num_input_tokens_seen": 133329512, + "step": 197860 + }, + { + "epoch": 4.833874868687856, + "grad_norm": 0.0003621731302700937, + "learning_rate": 6.719833006545439e-09, + "loss": 0.0, + "num_input_tokens_seen": 133332840, + "step": 197865 + }, + { + "epoch": 4.833997019519703, + "grad_norm": 0.031017715111374855, + "learning_rate": 6.709967044891351e-09, + "loss": 0.0, + "num_input_tokens_seen": 133336424, + "step": 197870 + }, + { + "epoch": 4.83411917035155, + "grad_norm": 0.0003073037078138441, + "learning_rate": 6.7001083067095285e-09, + "loss": 0.0, + "num_input_tokens_seen": 133339880, + "step": 197875 + }, + { + "epoch": 4.834241321183397, + "grad_norm": 4.218026151647791e-05, + "learning_rate": 6.690256792071802e-09, + "loss": 0.0, + "num_input_tokens_seen": 133343208, + "step": 197880 + }, + { + "epoch": 4.834363472015244, + "grad_norm": 0.0009448288474231958, + "learning_rate": 6.680412501049559e-09, + "loss": 0.0, + "num_input_tokens_seen": 133346664, + "step": 197885 + }, + { + "epoch": 4.834485622847092, + "grad_norm": 3.618423215812072e-05, + "learning_rate": 6.670575433714631e-09, + "loss": 0.0, + "num_input_tokens_seen": 133350440, + "step": 197890 + }, + { + "epoch": 4.834607773678939, + "grad_norm": 0.0009504237677901983, + "learning_rate": 6.660745590138406e-09, + "loss": 0.0, + "num_input_tokens_seen": 133353704, + "step": 197895 + }, + { + "epoch": 4.8347299245107855, + "grad_norm": 0.008153039962053299, + "learning_rate": 6.650922970392381e-09, + "loss": 0.0, + "num_input_tokens_seen": 133356648, + "step": 197900 + }, + { + "epoch": 4.834852075342633, + "grad_norm": 0.07207217812538147, + "learning_rate": 6.641107574548055e-09, + "loss": 0.0001, + "num_input_tokens_seen": 133361064, + "step": 197905 + }, + { + "epoch": 4.83497422617448, + "grad_norm": 0.08829605579376221, + "learning_rate": 6.6312994026768155e-09, + "loss": 0.0, + "num_input_tokens_seen": 133365160, + "step": 197910 + }, + { + "epoch": 4.8350963770063276, + "grad_norm": 0.005756590981036425, + "learning_rate": 6.621498454849939e-09, + "loss": 0.0, + "num_input_tokens_seen": 133368296, + "step": 197915 + }, + { + "epoch": 4.835218527838174, + "grad_norm": 9.277733624912798e-05, + "learning_rate": 6.6117047311387006e-09, + "loss": 0.0, + "num_input_tokens_seen": 133371304, + "step": 197920 + }, + { + "epoch": 4.835340678670022, + "grad_norm": 0.00010662163549568504, + "learning_rate": 6.601918231614267e-09, + "loss": 0.0279, + "num_input_tokens_seen": 133374760, + "step": 197925 + }, + { + "epoch": 4.835462829501869, + "grad_norm": 0.29649490118026733, + "learning_rate": 6.592138956347915e-09, + "loss": 0.0001, + "num_input_tokens_seen": 133378728, + "step": 197930 + }, + { + "epoch": 4.835584980333716, + "grad_norm": 0.0001025107194436714, + "learning_rate": 6.582366905410808e-09, + "loss": 0.0, + "num_input_tokens_seen": 133382376, + "step": 197935 + }, + { + "epoch": 4.835707131165563, + "grad_norm": 0.0010448351968079805, + "learning_rate": 6.57260207887389e-09, + "loss": 0.0, + "num_input_tokens_seen": 133385576, + "step": 197940 + }, + { + "epoch": 4.835829281997411, + "grad_norm": 0.009936603717505932, + "learning_rate": 6.562844476808216e-09, + "loss": 0.0, + "num_input_tokens_seen": 133388456, + "step": 197945 + }, + { + "epoch": 4.835951432829257, + "grad_norm": 9.89523614407517e-05, + "learning_rate": 6.553094099284617e-09, + "loss": 0.0, + "num_input_tokens_seen": 133393704, + "step": 197950 + }, + { + "epoch": 4.836073583661105, + "grad_norm": 0.00017748077516444027, + "learning_rate": 6.543350946374259e-09, + "loss": 0.0, + "num_input_tokens_seen": 133397096, + "step": 197955 + }, + { + "epoch": 4.836195734492952, + "grad_norm": 6.688635949103627e-06, + "learning_rate": 6.533615018147753e-09, + "loss": 0.0, + "num_input_tokens_seen": 133399976, + "step": 197960 + }, + { + "epoch": 4.8363178853247994, + "grad_norm": 0.0003063578624278307, + "learning_rate": 6.523886314676152e-09, + "loss": 0.0, + "num_input_tokens_seen": 133403688, + "step": 197965 + }, + { + "epoch": 4.836440036156646, + "grad_norm": 0.002320501022040844, + "learning_rate": 6.514164836029956e-09, + "loss": 0.0, + "num_input_tokens_seen": 133407336, + "step": 197970 + }, + { + "epoch": 4.836562186988493, + "grad_norm": 0.0012531977845355868, + "learning_rate": 6.504450582279997e-09, + "loss": 0.0002, + "num_input_tokens_seen": 133410536, + "step": 197975 + }, + { + "epoch": 4.836684337820341, + "grad_norm": 0.0012084580957889557, + "learning_rate": 6.494743553496884e-09, + "loss": 0.0, + "num_input_tokens_seen": 133413480, + "step": 197980 + }, + { + "epoch": 4.836806488652188, + "grad_norm": 9.320944809587672e-05, + "learning_rate": 6.485043749751229e-09, + "loss": 0.0, + "num_input_tokens_seen": 133417000, + "step": 197985 + }, + { + "epoch": 4.836928639484035, + "grad_norm": 0.005629129242151976, + "learning_rate": 6.47535117111353e-09, + "loss": 0.0, + "num_input_tokens_seen": 133420200, + "step": 197990 + }, + { + "epoch": 4.837050790315882, + "grad_norm": 1.2280200280656572e-05, + "learning_rate": 6.465665817654287e-09, + "loss": 0.0, + "num_input_tokens_seen": 133423528, + "step": 197995 + }, + { + "epoch": 4.837172941147729, + "grad_norm": 0.0018922107992693782, + "learning_rate": 6.455987689443998e-09, + "loss": 0.0, + "num_input_tokens_seen": 133426984, + "step": 198000 + }, + { + "epoch": 4.837295091979576, + "grad_norm": 0.008688423782587051, + "learning_rate": 6.446316786552941e-09, + "loss": 0.0, + "num_input_tokens_seen": 133430312, + "step": 198005 + }, + { + "epoch": 4.837417242811424, + "grad_norm": 6.29360947641544e-05, + "learning_rate": 6.436653109051615e-09, + "loss": 0.0, + "num_input_tokens_seen": 133434024, + "step": 198010 + }, + { + "epoch": 4.8375393936432705, + "grad_norm": 2.2677289962302893e-05, + "learning_rate": 6.426996657010075e-09, + "loss": 0.0, + "num_input_tokens_seen": 133437480, + "step": 198015 + }, + { + "epoch": 4.837661544475118, + "grad_norm": 0.011553088203072548, + "learning_rate": 6.4173474304987096e-09, + "loss": 0.0, + "num_input_tokens_seen": 133440936, + "step": 198020 + }, + { + "epoch": 4.837783695306965, + "grad_norm": 0.00016699406842235476, + "learning_rate": 6.407705429587573e-09, + "loss": 0.0, + "num_input_tokens_seen": 133444072, + "step": 198025 + }, + { + "epoch": 4.8379058461388125, + "grad_norm": 0.002082869876176119, + "learning_rate": 6.398070654346943e-09, + "loss": 0.0, + "num_input_tokens_seen": 133447848, + "step": 198030 + }, + { + "epoch": 4.838027996970659, + "grad_norm": 0.0001289423234993592, + "learning_rate": 6.3884431048467635e-09, + "loss": 0.0, + "num_input_tokens_seen": 133451432, + "step": 198035 + }, + { + "epoch": 4.838150147802507, + "grad_norm": 0.0005948066245764494, + "learning_rate": 6.378822781156978e-09, + "loss": 0.0, + "num_input_tokens_seen": 133454440, + "step": 198040 + }, + { + "epoch": 4.838272298634354, + "grad_norm": 0.0036121481098234653, + "learning_rate": 6.369209683347754e-09, + "loss": 0.0, + "num_input_tokens_seen": 133457704, + "step": 198045 + }, + { + "epoch": 4.838394449466201, + "grad_norm": 0.0004089911817573011, + "learning_rate": 6.3596038114888114e-09, + "loss": 0.0, + "num_input_tokens_seen": 133461096, + "step": 198050 + }, + { + "epoch": 4.838516600298048, + "grad_norm": 0.00018862300203181803, + "learning_rate": 6.350005165650207e-09, + "loss": 0.0, + "num_input_tokens_seen": 133464360, + "step": 198055 + }, + { + "epoch": 4.838638751129896, + "grad_norm": 0.004749796353280544, + "learning_rate": 6.340413745901551e-09, + "loss": 0.0, + "num_input_tokens_seen": 133467816, + "step": 198060 + }, + { + "epoch": 4.838760901961742, + "grad_norm": 2.8277538149268366e-05, + "learning_rate": 6.330829552312678e-09, + "loss": 0.0, + "num_input_tokens_seen": 133471080, + "step": 198065 + }, + { + "epoch": 4.838883052793589, + "grad_norm": 0.11721772700548172, + "learning_rate": 6.321252584953307e-09, + "loss": 0.0001, + "num_input_tokens_seen": 133474408, + "step": 198070 + }, + { + "epoch": 4.839005203625437, + "grad_norm": 0.003499187296256423, + "learning_rate": 6.31168284389294e-09, + "loss": 0.0, + "num_input_tokens_seen": 133477800, + "step": 198075 + }, + { + "epoch": 4.8391273544572835, + "grad_norm": 0.00037740456173196435, + "learning_rate": 6.302120329201411e-09, + "loss": 0.0, + "num_input_tokens_seen": 133480936, + "step": 198080 + }, + { + "epoch": 4.839249505289131, + "grad_norm": 0.00026468883152119815, + "learning_rate": 6.292565040947995e-09, + "loss": 0.0, + "num_input_tokens_seen": 133484392, + "step": 198085 + }, + { + "epoch": 4.839371656120978, + "grad_norm": 0.0002155925176339224, + "learning_rate": 6.283016979202416e-09, + "loss": 0.0, + "num_input_tokens_seen": 133488040, + "step": 198090 + }, + { + "epoch": 4.8394938069528255, + "grad_norm": 7.394074145850027e-06, + "learning_rate": 6.273476144034062e-09, + "loss": 0.0, + "num_input_tokens_seen": 133491112, + "step": 198095 + }, + { + "epoch": 4.839615957784672, + "grad_norm": 0.0001947157143149525, + "learning_rate": 6.2639425355122126e-09, + "loss": 0.0, + "num_input_tokens_seen": 133494504, + "step": 198100 + }, + { + "epoch": 4.83973810861652, + "grad_norm": 0.00013583162217400968, + "learning_rate": 6.254416153706254e-09, + "loss": 0.0, + "num_input_tokens_seen": 133497640, + "step": 198105 + }, + { + "epoch": 4.839860259448367, + "grad_norm": 0.0065417601726949215, + "learning_rate": 6.244896998685467e-09, + "loss": 0.0, + "num_input_tokens_seen": 133500968, + "step": 198110 + }, + { + "epoch": 4.839982410280214, + "grad_norm": 3.4384058380965143e-05, + "learning_rate": 6.235385070519017e-09, + "loss": 0.0, + "num_input_tokens_seen": 133504488, + "step": 198115 + }, + { + "epoch": 4.840104561112061, + "grad_norm": 6.624732122872956e-06, + "learning_rate": 6.225880369276293e-09, + "loss": 0.0, + "num_input_tokens_seen": 133507560, + "step": 198120 + }, + { + "epoch": 4.840226711943909, + "grad_norm": 0.7948578596115112, + "learning_rate": 6.216382895026129e-09, + "loss": 0.0001, + "num_input_tokens_seen": 133510952, + "step": 198125 + }, + { + "epoch": 4.840348862775755, + "grad_norm": 0.008793001994490623, + "learning_rate": 6.206892647837802e-09, + "loss": 0.0, + "num_input_tokens_seen": 133513768, + "step": 198130 + }, + { + "epoch": 4.840471013607603, + "grad_norm": 0.0005286111263558269, + "learning_rate": 6.197409627780148e-09, + "loss": 0.0, + "num_input_tokens_seen": 133517480, + "step": 198135 + }, + { + "epoch": 4.84059316443945, + "grad_norm": 7.7891701948829e-05, + "learning_rate": 6.187933834922332e-09, + "loss": 0.0, + "num_input_tokens_seen": 133520744, + "step": 198140 + }, + { + "epoch": 4.8407153152712965, + "grad_norm": 6.683762330794707e-05, + "learning_rate": 6.178465269333188e-09, + "loss": 0.0, + "num_input_tokens_seen": 133523944, + "step": 198145 + }, + { + "epoch": 4.840837466103144, + "grad_norm": 0.0002326093817828223, + "learning_rate": 6.16900393108144e-09, + "loss": 0.0, + "num_input_tokens_seen": 133527016, + "step": 198150 + }, + { + "epoch": 4.840959616934992, + "grad_norm": 5.887150109629147e-05, + "learning_rate": 6.159549820236032e-09, + "loss": 0.0, + "num_input_tokens_seen": 133530152, + "step": 198155 + }, + { + "epoch": 4.8410817677668385, + "grad_norm": 0.00042430704343132675, + "learning_rate": 6.150102936865797e-09, + "loss": 0.0, + "num_input_tokens_seen": 133533352, + "step": 198160 + }, + { + "epoch": 4.841203918598685, + "grad_norm": 0.0015933191170915961, + "learning_rate": 6.140663281039238e-09, + "loss": 0.0, + "num_input_tokens_seen": 133536488, + "step": 198165 + }, + { + "epoch": 4.841326069430533, + "grad_norm": 2.208988917118404e-05, + "learning_rate": 6.131230852825075e-09, + "loss": 0.0, + "num_input_tokens_seen": 133539944, + "step": 198170 + }, + { + "epoch": 4.84144822026238, + "grad_norm": 0.0014280682662501931, + "learning_rate": 6.1218056522919225e-09, + "loss": 0.0, + "num_input_tokens_seen": 133542952, + "step": 198175 + }, + { + "epoch": 4.841570371094227, + "grad_norm": 0.0014833662426099181, + "learning_rate": 6.11238767950839e-09, + "loss": 0.0, + "num_input_tokens_seen": 133546472, + "step": 198180 + }, + { + "epoch": 4.841692521926074, + "grad_norm": 3.1954394216882065e-05, + "learning_rate": 6.102976934542758e-09, + "loss": 0.0, + "num_input_tokens_seen": 133549608, + "step": 198185 + }, + { + "epoch": 4.841814672757922, + "grad_norm": 0.0003832554502878338, + "learning_rate": 6.0935734174637485e-09, + "loss": 0.0, + "num_input_tokens_seen": 133553064, + "step": 198190 + }, + { + "epoch": 4.841936823589768, + "grad_norm": 8.113325748126954e-05, + "learning_rate": 6.084177128339529e-09, + "loss": 0.0, + "num_input_tokens_seen": 133556776, + "step": 198195 + }, + { + "epoch": 4.842058974421616, + "grad_norm": 6.306503200903535e-05, + "learning_rate": 6.074788067238601e-09, + "loss": 0.0, + "num_input_tokens_seen": 133560488, + "step": 198200 + }, + { + "epoch": 4.842181125253463, + "grad_norm": 3.252805254305713e-05, + "learning_rate": 6.0654062342290204e-09, + "loss": 0.0, + "num_input_tokens_seen": 133563496, + "step": 198205 + }, + { + "epoch": 4.84230327608531, + "grad_norm": 0.0006236277404241264, + "learning_rate": 6.056031629379177e-09, + "loss": 0.0, + "num_input_tokens_seen": 133566504, + "step": 198210 + }, + { + "epoch": 4.842425426917157, + "grad_norm": 0.001088500372134149, + "learning_rate": 6.046664252757239e-09, + "loss": 0.0, + "num_input_tokens_seen": 133569832, + "step": 198215 + }, + { + "epoch": 4.842547577749005, + "grad_norm": 3.684737384901382e-05, + "learning_rate": 6.037304104431262e-09, + "loss": 0.0, + "num_input_tokens_seen": 133572904, + "step": 198220 + }, + { + "epoch": 4.8426697285808515, + "grad_norm": 4.16367947764229e-05, + "learning_rate": 6.027951184469416e-09, + "loss": 0.0, + "num_input_tokens_seen": 133576744, + "step": 198225 + }, + { + "epoch": 4.842791879412699, + "grad_norm": 0.00012082437751814723, + "learning_rate": 6.018605492939533e-09, + "loss": 0.0, + "num_input_tokens_seen": 133580648, + "step": 198230 + }, + { + "epoch": 4.842914030244546, + "grad_norm": 0.0002326148678548634, + "learning_rate": 6.009267029909892e-09, + "loss": 0.0, + "num_input_tokens_seen": 133584104, + "step": 198235 + }, + { + "epoch": 4.843036181076393, + "grad_norm": 0.0003760404360946268, + "learning_rate": 5.999935795447997e-09, + "loss": 0.0, + "num_input_tokens_seen": 133587496, + "step": 198240 + }, + { + "epoch": 4.84315833190824, + "grad_norm": 0.00015414123481605202, + "learning_rate": 5.990611789622013e-09, + "loss": 0.0, + "num_input_tokens_seen": 133590888, + "step": 198245 + }, + { + "epoch": 4.843280482740088, + "grad_norm": 1.6417672668467276e-05, + "learning_rate": 5.9812950124997765e-09, + "loss": 0.0, + "num_input_tokens_seen": 133594344, + "step": 198250 + }, + { + "epoch": 4.843402633571935, + "grad_norm": 1.9476810848573223e-05, + "learning_rate": 5.971985464148788e-09, + "loss": 0.0, + "num_input_tokens_seen": 133597608, + "step": 198255 + }, + { + "epoch": 4.843524784403781, + "grad_norm": 0.0008444825652986765, + "learning_rate": 5.962683144636882e-09, + "loss": 0.0, + "num_input_tokens_seen": 133600872, + "step": 198260 + }, + { + "epoch": 4.843646935235629, + "grad_norm": 0.00028568675043061376, + "learning_rate": 5.9533880540317826e-09, + "loss": 0.0, + "num_input_tokens_seen": 133604392, + "step": 198265 + }, + { + "epoch": 4.843769086067476, + "grad_norm": 0.002278596628457308, + "learning_rate": 5.944100192400992e-09, + "loss": 0.0, + "num_input_tokens_seen": 133607912, + "step": 198270 + }, + { + "epoch": 4.843891236899323, + "grad_norm": 0.01016619149595499, + "learning_rate": 5.93481955981201e-09, + "loss": 0.0, + "num_input_tokens_seen": 133610920, + "step": 198275 + }, + { + "epoch": 4.84401338773117, + "grad_norm": 0.00020297674927860498, + "learning_rate": 5.92554615633245e-09, + "loss": 0.0, + "num_input_tokens_seen": 133614760, + "step": 198280 + }, + { + "epoch": 4.844135538563018, + "grad_norm": 0.00021635602752212435, + "learning_rate": 5.916279982029704e-09, + "loss": 0.0, + "num_input_tokens_seen": 133618664, + "step": 198285 + }, + { + "epoch": 4.844257689394865, + "grad_norm": 1.835424336604774e-05, + "learning_rate": 5.90702103697105e-09, + "loss": 0.0, + "num_input_tokens_seen": 133621992, + "step": 198290 + }, + { + "epoch": 4.844379840226712, + "grad_norm": 0.00019787390192504972, + "learning_rate": 5.897769321223989e-09, + "loss": 0.0, + "num_input_tokens_seen": 133625192, + "step": 198295 + }, + { + "epoch": 4.844501991058559, + "grad_norm": 0.00028561081853695214, + "learning_rate": 5.888524834855802e-09, + "loss": 0.0, + "num_input_tokens_seen": 133628840, + "step": 198300 + }, + { + "epoch": 4.844624141890407, + "grad_norm": 4.879441257799044e-05, + "learning_rate": 5.879287577933545e-09, + "loss": 0.0, + "num_input_tokens_seen": 133632296, + "step": 198305 + }, + { + "epoch": 4.844746292722253, + "grad_norm": 7.078771886881441e-05, + "learning_rate": 5.870057550524499e-09, + "loss": 0.0, + "num_input_tokens_seen": 133635560, + "step": 198310 + }, + { + "epoch": 4.844868443554101, + "grad_norm": 0.00010384644701844081, + "learning_rate": 5.860834752695831e-09, + "loss": 0.0, + "num_input_tokens_seen": 133638568, + "step": 198315 + }, + { + "epoch": 4.844990594385948, + "grad_norm": 0.0012810814660042524, + "learning_rate": 5.851619184514489e-09, + "loss": 0.0, + "num_input_tokens_seen": 133642024, + "step": 198320 + }, + { + "epoch": 4.845112745217795, + "grad_norm": 0.00021210868726484478, + "learning_rate": 5.842410846047641e-09, + "loss": 0.0, + "num_input_tokens_seen": 133645544, + "step": 198325 + }, + { + "epoch": 4.845234896049642, + "grad_norm": 0.00038419957854785025, + "learning_rate": 5.833209737362121e-09, + "loss": 0.0, + "num_input_tokens_seen": 133648936, + "step": 198330 + }, + { + "epoch": 4.845357046881489, + "grad_norm": 0.00927771721035242, + "learning_rate": 5.8240158585249886e-09, + "loss": 0.0, + "num_input_tokens_seen": 133652584, + "step": 198335 + }, + { + "epoch": 4.8454791977133365, + "grad_norm": 6.750579632353038e-05, + "learning_rate": 5.814829209602856e-09, + "loss": 0.0, + "num_input_tokens_seen": 133656424, + "step": 198340 + }, + { + "epoch": 4.845601348545183, + "grad_norm": 0.0001237114774994552, + "learning_rate": 5.805649790662892e-09, + "loss": 0.0, + "num_input_tokens_seen": 133660328, + "step": 198345 + }, + { + "epoch": 4.845723499377031, + "grad_norm": 0.00010960324289044365, + "learning_rate": 5.796477601771488e-09, + "loss": 0.0, + "num_input_tokens_seen": 133664040, + "step": 198350 + }, + { + "epoch": 4.845845650208878, + "grad_norm": 0.0028228070586919785, + "learning_rate": 5.78731264299559e-09, + "loss": 0.0, + "num_input_tokens_seen": 133667496, + "step": 198355 + }, + { + "epoch": 4.845967801040725, + "grad_norm": 9.504000627202913e-05, + "learning_rate": 5.7781549144017e-09, + "loss": 0.0, + "num_input_tokens_seen": 133671464, + "step": 198360 + }, + { + "epoch": 4.846089951872572, + "grad_norm": 0.001479696249589324, + "learning_rate": 5.769004416056544e-09, + "loss": 0.0, + "num_input_tokens_seen": 133674920, + "step": 198365 + }, + { + "epoch": 4.84621210270442, + "grad_norm": 0.00032303182524628937, + "learning_rate": 5.759861148026624e-09, + "loss": 0.0, + "num_input_tokens_seen": 133678312, + "step": 198370 + }, + { + "epoch": 4.846334253536266, + "grad_norm": 0.003984017763286829, + "learning_rate": 5.75072511037833e-09, + "loss": 0.0, + "num_input_tokens_seen": 133681832, + "step": 198375 + }, + { + "epoch": 4.846456404368114, + "grad_norm": 6.143693462945521e-05, + "learning_rate": 5.741596303178276e-09, + "loss": 0.0, + "num_input_tokens_seen": 133685288, + "step": 198380 + }, + { + "epoch": 4.846578555199961, + "grad_norm": 0.00015785187133587897, + "learning_rate": 5.732474726492631e-09, + "loss": 0.0, + "num_input_tokens_seen": 133688616, + "step": 198385 + }, + { + "epoch": 4.846700706031808, + "grad_norm": 2.5543968149577267e-05, + "learning_rate": 5.723360380388009e-09, + "loss": 0.0, + "num_input_tokens_seen": 133691880, + "step": 198390 + }, + { + "epoch": 4.846822856863655, + "grad_norm": 0.000297121936455369, + "learning_rate": 5.714253264930357e-09, + "loss": 0.0, + "num_input_tokens_seen": 133695400, + "step": 198395 + }, + { + "epoch": 4.846945007695503, + "grad_norm": 3.050006489502266e-05, + "learning_rate": 5.7051533801861786e-09, + "loss": 0.0, + "num_input_tokens_seen": 133698664, + "step": 198400 + }, + { + "epoch": 4.8470671585273495, + "grad_norm": 0.000478239671792835, + "learning_rate": 5.696060726221641e-09, + "loss": 0.0, + "num_input_tokens_seen": 133702248, + "step": 198405 + }, + { + "epoch": 4.847189309359197, + "grad_norm": 0.00010278217087034136, + "learning_rate": 5.686975303102693e-09, + "loss": 0.0, + "num_input_tokens_seen": 133705832, + "step": 198410 + }, + { + "epoch": 4.847311460191044, + "grad_norm": 3.768475289689377e-05, + "learning_rate": 5.677897110895502e-09, + "loss": 0.0818, + "num_input_tokens_seen": 133709224, + "step": 198415 + }, + { + "epoch": 4.8474336110228915, + "grad_norm": 0.00011841404921142384, + "learning_rate": 5.6688261496661286e-09, + "loss": 0.0, + "num_input_tokens_seen": 133712488, + "step": 198420 + }, + { + "epoch": 4.847555761854738, + "grad_norm": 4.986863132216968e-06, + "learning_rate": 5.659762419480407e-09, + "loss": 0.0, + "num_input_tokens_seen": 133715880, + "step": 198425 + }, + { + "epoch": 4.847677912686585, + "grad_norm": 0.00031835006666369736, + "learning_rate": 5.650705920404397e-09, + "loss": 0.0, + "num_input_tokens_seen": 133719016, + "step": 198430 + }, + { + "epoch": 4.847800063518433, + "grad_norm": 6.428364486055216e-06, + "learning_rate": 5.641656652503934e-09, + "loss": 0.0, + "num_input_tokens_seen": 133722728, + "step": 198435 + }, + { + "epoch": 4.847922214350279, + "grad_norm": 0.0008753464790061116, + "learning_rate": 5.632614615844744e-09, + "loss": 0.0, + "num_input_tokens_seen": 133725864, + "step": 198440 + }, + { + "epoch": 4.848044365182127, + "grad_norm": 0.000149292332935147, + "learning_rate": 5.6235798104926625e-09, + "loss": 0.0, + "num_input_tokens_seen": 133729064, + "step": 198445 + }, + { + "epoch": 4.848166516013974, + "grad_norm": 4.988710497855209e-05, + "learning_rate": 5.614552236513304e-09, + "loss": 0.0, + "num_input_tokens_seen": 133732200, + "step": 198450 + }, + { + "epoch": 4.848288666845821, + "grad_norm": 0.0008496578666381538, + "learning_rate": 5.605531893972393e-09, + "loss": 0.0, + "num_input_tokens_seen": 133735784, + "step": 198455 + }, + { + "epoch": 4.848410817677668, + "grad_norm": 8.90137926035095e-06, + "learning_rate": 5.596518782935655e-09, + "loss": 0.0, + "num_input_tokens_seen": 133739304, + "step": 198460 + }, + { + "epoch": 4.848532968509516, + "grad_norm": 0.0003163012443110347, + "learning_rate": 5.587512903468372e-09, + "loss": 0.0, + "num_input_tokens_seen": 133742696, + "step": 198465 + }, + { + "epoch": 4.8486551193413625, + "grad_norm": 3.769802060560323e-05, + "learning_rate": 5.578514255636158e-09, + "loss": 0.0, + "num_input_tokens_seen": 133746472, + "step": 198470 + }, + { + "epoch": 4.84877727017321, + "grad_norm": 0.0006406178581528366, + "learning_rate": 5.5695228395045145e-09, + "loss": 0.0, + "num_input_tokens_seen": 133749416, + "step": 198475 + }, + { + "epoch": 4.848899421005057, + "grad_norm": 4.202740819891915e-05, + "learning_rate": 5.560538655138724e-09, + "loss": 0.0, + "num_input_tokens_seen": 133753000, + "step": 198480 + }, + { + "epoch": 4.8490215718369045, + "grad_norm": 0.00010983301035594195, + "learning_rate": 5.5515617026041796e-09, + "loss": 0.0, + "num_input_tokens_seen": 133756008, + "step": 198485 + }, + { + "epoch": 4.849143722668751, + "grad_norm": 0.0002412906615063548, + "learning_rate": 5.542591981966049e-09, + "loss": 0.0, + "num_input_tokens_seen": 133759528, + "step": 198490 + }, + { + "epoch": 4.849265873500599, + "grad_norm": 0.0010208688909187913, + "learning_rate": 5.5336294932898376e-09, + "loss": 0.0, + "num_input_tokens_seen": 133762920, + "step": 198495 + }, + { + "epoch": 4.849388024332446, + "grad_norm": 2.516420317988377e-05, + "learning_rate": 5.5246742366404915e-09, + "loss": 0.0, + "num_input_tokens_seen": 133766632, + "step": 198500 + }, + { + "epoch": 4.849510175164292, + "grad_norm": 0.0001550140732433647, + "learning_rate": 5.515726212083071e-09, + "loss": 0.0, + "num_input_tokens_seen": 133769832, + "step": 198505 + }, + { + "epoch": 4.84963232599614, + "grad_norm": 1.6459995094919577e-05, + "learning_rate": 5.506785419682969e-09, + "loss": 0.0, + "num_input_tokens_seen": 133773224, + "step": 198510 + }, + { + "epoch": 4.849754476827988, + "grad_norm": 0.00033926600008271635, + "learning_rate": 5.49785185950491e-09, + "loss": 0.0, + "num_input_tokens_seen": 133776616, + "step": 198515 + }, + { + "epoch": 4.849876627659834, + "grad_norm": 8.666397479828447e-05, + "learning_rate": 5.488925531613953e-09, + "loss": 0.0, + "num_input_tokens_seen": 133779752, + "step": 198520 + }, + { + "epoch": 4.849998778491681, + "grad_norm": 0.00028835254488512874, + "learning_rate": 5.480006436075046e-09, + "loss": 0.0, + "num_input_tokens_seen": 133783528, + "step": 198525 + }, + { + "epoch": 4.850120929323529, + "grad_norm": 0.0001398788153892383, + "learning_rate": 5.471094572953028e-09, + "loss": 0.0, + "num_input_tokens_seen": 133786920, + "step": 198530 + }, + { + "epoch": 4.8502430801553755, + "grad_norm": 0.00022068715770728886, + "learning_rate": 5.462189942312734e-09, + "loss": 0.0, + "num_input_tokens_seen": 133790248, + "step": 198535 + }, + { + "epoch": 4.850365230987223, + "grad_norm": 0.0027091875672340393, + "learning_rate": 5.453292544218779e-09, + "loss": 0.0, + "num_input_tokens_seen": 133793576, + "step": 198540 + }, + { + "epoch": 4.85048738181907, + "grad_norm": 0.0008957352256402373, + "learning_rate": 5.444402378736113e-09, + "loss": 0.0001, + "num_input_tokens_seen": 133797224, + "step": 198545 + }, + { + "epoch": 4.8506095326509175, + "grad_norm": 0.0001257900585187599, + "learning_rate": 5.435519445929237e-09, + "loss": 0.1013, + "num_input_tokens_seen": 133800360, + "step": 198550 + }, + { + "epoch": 4.850731683482764, + "grad_norm": 0.00018330544116906822, + "learning_rate": 5.426643745862658e-09, + "loss": 0.0, + "num_input_tokens_seen": 133803880, + "step": 198555 + }, + { + "epoch": 4.850853834314612, + "grad_norm": 0.006341997068375349, + "learning_rate": 5.4177752786011e-09, + "loss": 0.0, + "num_input_tokens_seen": 133807400, + "step": 198560 + }, + { + "epoch": 4.850975985146459, + "grad_norm": 6.26806213404052e-05, + "learning_rate": 5.408914044209068e-09, + "loss": 0.0, + "num_input_tokens_seen": 133810984, + "step": 198565 + }, + { + "epoch": 4.851098135978306, + "grad_norm": 0.0001525198749732226, + "learning_rate": 5.400060042750843e-09, + "loss": 0.0, + "num_input_tokens_seen": 133814632, + "step": 198570 + }, + { + "epoch": 4.851220286810153, + "grad_norm": 0.00013128323189448565, + "learning_rate": 5.391213274290929e-09, + "loss": 0.0, + "num_input_tokens_seen": 133817896, + "step": 198575 + }, + { + "epoch": 4.851342437642001, + "grad_norm": 0.0014685146743431687, + "learning_rate": 5.382373738893609e-09, + "loss": 0.0001, + "num_input_tokens_seen": 133820840, + "step": 198580 + }, + { + "epoch": 4.851464588473847, + "grad_norm": 8.187860657926649e-05, + "learning_rate": 5.3735414366232745e-09, + "loss": 0.0, + "num_input_tokens_seen": 133824232, + "step": 198585 + }, + { + "epoch": 4.851586739305695, + "grad_norm": 1.9650955437100492e-05, + "learning_rate": 5.3647163675439864e-09, + "loss": 0.0, + "num_input_tokens_seen": 133827688, + "step": 198590 + }, + { + "epoch": 4.851708890137542, + "grad_norm": 0.00018294328765477985, + "learning_rate": 5.3558985317200265e-09, + "loss": 0.0, + "num_input_tokens_seen": 133831272, + "step": 198595 + }, + { + "epoch": 4.8518310409693886, + "grad_norm": 0.00021778763039037585, + "learning_rate": 5.347087929215455e-09, + "loss": 0.0004, + "num_input_tokens_seen": 133835048, + "step": 198600 + }, + { + "epoch": 4.851953191801236, + "grad_norm": 0.0007337976712733507, + "learning_rate": 5.338284560094442e-09, + "loss": 0.0, + "num_input_tokens_seen": 133837992, + "step": 198605 + }, + { + "epoch": 4.852075342633084, + "grad_norm": 4.622937194653787e-05, + "learning_rate": 5.3294884244208246e-09, + "loss": 0.0, + "num_input_tokens_seen": 133841256, + "step": 198610 + }, + { + "epoch": 4.852197493464931, + "grad_norm": 2.2683816496282816e-05, + "learning_rate": 5.320699522258887e-09, + "loss": 0.0, + "num_input_tokens_seen": 133844520, + "step": 198615 + }, + { + "epoch": 4.852319644296777, + "grad_norm": 4.410344627103768e-05, + "learning_rate": 5.311917853672243e-09, + "loss": 0.0, + "num_input_tokens_seen": 133847848, + "step": 198620 + }, + { + "epoch": 4.852441795128625, + "grad_norm": 0.0010950923897325993, + "learning_rate": 5.303143418724843e-09, + "loss": 0.0, + "num_input_tokens_seen": 133851496, + "step": 198625 + }, + { + "epoch": 4.852563945960472, + "grad_norm": 0.0003131901612505317, + "learning_rate": 5.294376217480634e-09, + "loss": 0.0, + "num_input_tokens_seen": 133854568, + "step": 198630 + }, + { + "epoch": 4.852686096792319, + "grad_norm": 0.00022891550906933844, + "learning_rate": 5.285616250003233e-09, + "loss": 0.0001, + "num_input_tokens_seen": 133858024, + "step": 198635 + }, + { + "epoch": 4.852808247624166, + "grad_norm": 0.010083446279168129, + "learning_rate": 5.276863516356367e-09, + "loss": 0.0, + "num_input_tokens_seen": 133861224, + "step": 198640 + }, + { + "epoch": 4.852930398456014, + "grad_norm": 0.00024773701443336904, + "learning_rate": 5.268118016603651e-09, + "loss": 0.0, + "num_input_tokens_seen": 133864616, + "step": 198645 + }, + { + "epoch": 4.8530525492878605, + "grad_norm": 0.00037822252488695085, + "learning_rate": 5.259379750808812e-09, + "loss": 0.0, + "num_input_tokens_seen": 133867688, + "step": 198650 + }, + { + "epoch": 4.853174700119708, + "grad_norm": 0.0006301982211880386, + "learning_rate": 5.250648719035245e-09, + "loss": 0.0, + "num_input_tokens_seen": 133871336, + "step": 198655 + }, + { + "epoch": 4.853296850951555, + "grad_norm": 0.00013490190031006932, + "learning_rate": 5.241924921346564e-09, + "loss": 0.0, + "num_input_tokens_seen": 133875432, + "step": 198660 + }, + { + "epoch": 4.8534190017834025, + "grad_norm": 0.000527222000528127, + "learning_rate": 5.233208357806163e-09, + "loss": 0.0, + "num_input_tokens_seen": 133878760, + "step": 198665 + }, + { + "epoch": 4.853541152615249, + "grad_norm": 0.00030499850981868804, + "learning_rate": 5.224499028477436e-09, + "loss": 0.0, + "num_input_tokens_seen": 133882024, + "step": 198670 + }, + { + "epoch": 4.853663303447097, + "grad_norm": 0.0008157443953678012, + "learning_rate": 5.215796933423666e-09, + "loss": 0.0, + "num_input_tokens_seen": 133885544, + "step": 198675 + }, + { + "epoch": 4.853785454278944, + "grad_norm": 0.0001473993033869192, + "learning_rate": 5.207102072708247e-09, + "loss": 0.0, + "num_input_tokens_seen": 133888808, + "step": 198680 + }, + { + "epoch": 4.853907605110791, + "grad_norm": 0.0014335029991343617, + "learning_rate": 5.1984144463943505e-09, + "loss": 0.0, + "num_input_tokens_seen": 133892648, + "step": 198685 + }, + { + "epoch": 4.854029755942638, + "grad_norm": 0.03337578475475311, + "learning_rate": 5.1897340545451474e-09, + "loss": 0.0, + "num_input_tokens_seen": 133896360, + "step": 198690 + }, + { + "epoch": 4.854151906774485, + "grad_norm": 0.00011968166654696688, + "learning_rate": 5.181060897223699e-09, + "loss": 0.0, + "num_input_tokens_seen": 133899816, + "step": 198695 + }, + { + "epoch": 4.854274057606332, + "grad_norm": 0.00016990097356028855, + "learning_rate": 5.172394974493177e-09, + "loss": 0.0, + "num_input_tokens_seen": 133903144, + "step": 198700 + }, + { + "epoch": 4.854396208438179, + "grad_norm": 9.55967916524969e-05, + "learning_rate": 5.1637362864166424e-09, + "loss": 0.0, + "num_input_tokens_seen": 133906408, + "step": 198705 + }, + { + "epoch": 4.854518359270027, + "grad_norm": 0.0006632882286794484, + "learning_rate": 5.155084833056933e-09, + "loss": 0.0, + "num_input_tokens_seen": 133909672, + "step": 198710 + }, + { + "epoch": 4.8546405101018735, + "grad_norm": 0.0002882846456486732, + "learning_rate": 5.146440614476999e-09, + "loss": 0.0, + "num_input_tokens_seen": 133913192, + "step": 198715 + }, + { + "epoch": 4.854762660933721, + "grad_norm": 8.525528392056003e-05, + "learning_rate": 5.13780363073979e-09, + "loss": 0.0, + "num_input_tokens_seen": 133916584, + "step": 198720 + }, + { + "epoch": 4.854884811765568, + "grad_norm": 0.00025111655122600496, + "learning_rate": 5.129173881908033e-09, + "loss": 0.0, + "num_input_tokens_seen": 133919720, + "step": 198725 + }, + { + "epoch": 4.8550069625974155, + "grad_norm": 0.0003259789664298296, + "learning_rate": 5.120551368044568e-09, + "loss": 0.0, + "num_input_tokens_seen": 133922856, + "step": 198730 + }, + { + "epoch": 4.855129113429262, + "grad_norm": 3.2461808586958796e-05, + "learning_rate": 5.11193608921201e-09, + "loss": 0.0, + "num_input_tokens_seen": 133926312, + "step": 198735 + }, + { + "epoch": 4.85525126426111, + "grad_norm": 8.612728561274707e-05, + "learning_rate": 5.103328045472977e-09, + "loss": 0.0, + "num_input_tokens_seen": 133929512, + "step": 198740 + }, + { + "epoch": 4.855373415092957, + "grad_norm": 0.00030700431670993567, + "learning_rate": 5.094727236890195e-09, + "loss": 0.0, + "num_input_tokens_seen": 133932968, + "step": 198745 + }, + { + "epoch": 4.855495565924804, + "grad_norm": 0.0037068724632263184, + "learning_rate": 5.086133663526171e-09, + "loss": 0.0695, + "num_input_tokens_seen": 133936360, + "step": 198750 + }, + { + "epoch": 4.855617716756651, + "grad_norm": 1.6346239135600626e-05, + "learning_rate": 5.0775473254434094e-09, + "loss": 0.0, + "num_input_tokens_seen": 133939560, + "step": 198755 + }, + { + "epoch": 4.855739867588499, + "grad_norm": 0.0004788915684912354, + "learning_rate": 5.068968222704307e-09, + "loss": 0.0, + "num_input_tokens_seen": 133942952, + "step": 198760 + }, + { + "epoch": 4.855862018420345, + "grad_norm": 0.004332916811108589, + "learning_rate": 5.0603963553711435e-09, + "loss": 0.0, + "num_input_tokens_seen": 133945960, + "step": 198765 + }, + { + "epoch": 4.855984169252192, + "grad_norm": 6.013052552589215e-05, + "learning_rate": 5.051831723506539e-09, + "loss": 0.0, + "num_input_tokens_seen": 133949096, + "step": 198770 + }, + { + "epoch": 4.85610632008404, + "grad_norm": 0.0014203329337760806, + "learning_rate": 5.043274327172553e-09, + "loss": 0.0, + "num_input_tokens_seen": 133952680, + "step": 198775 + }, + { + "epoch": 4.856228470915887, + "grad_norm": 0.0002815816842485219, + "learning_rate": 5.034724166431581e-09, + "loss": 0.0, + "num_input_tokens_seen": 133955816, + "step": 198780 + }, + { + "epoch": 4.856350621747734, + "grad_norm": 0.008390041999518871, + "learning_rate": 5.026181241345573e-09, + "loss": 0.0, + "num_input_tokens_seen": 133959080, + "step": 198785 + }, + { + "epoch": 4.856472772579581, + "grad_norm": 4.61300733149983e-05, + "learning_rate": 5.017645551976812e-09, + "loss": 0.0, + "num_input_tokens_seen": 133962344, + "step": 198790 + }, + { + "epoch": 4.8565949234114285, + "grad_norm": 0.0011516953818500042, + "learning_rate": 5.009117098387472e-09, + "loss": 0.0, + "num_input_tokens_seen": 133965544, + "step": 198795 + }, + { + "epoch": 4.856717074243275, + "grad_norm": 1.3971000043966342e-05, + "learning_rate": 5.000595880639391e-09, + "loss": 0.0, + "num_input_tokens_seen": 133968680, + "step": 198800 + }, + { + "epoch": 4.856839225075123, + "grad_norm": 0.00022225634893402457, + "learning_rate": 4.9920818987945205e-09, + "loss": 0.0, + "num_input_tokens_seen": 133972072, + "step": 198805 + }, + { + "epoch": 4.85696137590697, + "grad_norm": 0.0014802911318838596, + "learning_rate": 4.98357515291492e-09, + "loss": 0.0, + "num_input_tokens_seen": 133975976, + "step": 198810 + }, + { + "epoch": 4.857083526738817, + "grad_norm": 0.0018839415861293674, + "learning_rate": 4.975075643062321e-09, + "loss": 0.0, + "num_input_tokens_seen": 133978920, + "step": 198815 + }, + { + "epoch": 4.857205677570664, + "grad_norm": 34.527950286865234, + "learning_rate": 4.966583369298782e-09, + "loss": 0.0706, + "num_input_tokens_seen": 133982312, + "step": 198820 + }, + { + "epoch": 4.857327828402512, + "grad_norm": 4.835088475374505e-05, + "learning_rate": 4.9580983316857005e-09, + "loss": 0.0, + "num_input_tokens_seen": 133986216, + "step": 198825 + }, + { + "epoch": 4.857449979234358, + "grad_norm": 9.118052548728883e-05, + "learning_rate": 4.9496205302850256e-09, + "loss": 0.0, + "num_input_tokens_seen": 133989288, + "step": 198830 + }, + { + "epoch": 4.857572130066206, + "grad_norm": 5.738848813052755e-06, + "learning_rate": 4.941149965158375e-09, + "loss": 0.0, + "num_input_tokens_seen": 133992680, + "step": 198835 + }, + { + "epoch": 4.857694280898053, + "grad_norm": 7.152935722842813e-05, + "learning_rate": 4.932686636367256e-09, + "loss": 0.0, + "num_input_tokens_seen": 133996008, + "step": 198840 + }, + { + "epoch": 4.8578164317299, + "grad_norm": 5.278225944493897e-05, + "learning_rate": 4.924230543973284e-09, + "loss": 0.0, + "num_input_tokens_seen": 133999528, + "step": 198845 + }, + { + "epoch": 4.857938582561747, + "grad_norm": 0.0007009048713371158, + "learning_rate": 4.915781688037967e-09, + "loss": 0.0, + "num_input_tokens_seen": 134002984, + "step": 198850 + }, + { + "epoch": 4.858060733393595, + "grad_norm": 0.00017325843509752303, + "learning_rate": 4.9073400686228115e-09, + "loss": 0.0, + "num_input_tokens_seen": 134007016, + "step": 198855 + }, + { + "epoch": 4.8581828842254415, + "grad_norm": 2.3501368559664115e-05, + "learning_rate": 4.89890568578899e-09, + "loss": 0.0, + "num_input_tokens_seen": 134010216, + "step": 198860 + }, + { + "epoch": 4.858305035057288, + "grad_norm": 0.0004582449037116021, + "learning_rate": 4.890478539598008e-09, + "loss": 0.0, + "num_input_tokens_seen": 134013416, + "step": 198865 + }, + { + "epoch": 4.858427185889136, + "grad_norm": 0.00030273612355813384, + "learning_rate": 4.8820586301112635e-09, + "loss": 0.0, + "num_input_tokens_seen": 134016680, + "step": 198870 + }, + { + "epoch": 4.8585493367209835, + "grad_norm": 8.884895942173898e-05, + "learning_rate": 4.873645957389705e-09, + "loss": 0.0, + "num_input_tokens_seen": 134019816, + "step": 198875 + }, + { + "epoch": 4.85867148755283, + "grad_norm": 8.290004916489124e-05, + "learning_rate": 4.865240521494729e-09, + "loss": 0.0, + "num_input_tokens_seen": 134023400, + "step": 198880 + }, + { + "epoch": 4.858793638384677, + "grad_norm": 0.0005973275983706117, + "learning_rate": 4.8568423224872866e-09, + "loss": 0.0134, + "num_input_tokens_seen": 134026664, + "step": 198885 + }, + { + "epoch": 4.858915789216525, + "grad_norm": 0.0023913774639368057, + "learning_rate": 4.848451360428551e-09, + "loss": 0.0, + "num_input_tokens_seen": 134029736, + "step": 198890 + }, + { + "epoch": 4.859037940048371, + "grad_norm": 0.0021553239785134792, + "learning_rate": 4.840067635379697e-09, + "loss": 0.0, + "num_input_tokens_seen": 134032808, + "step": 198895 + }, + { + "epoch": 4.859160090880219, + "grad_norm": 0.0007805172353982925, + "learning_rate": 4.83169114740134e-09, + "loss": 0.0, + "num_input_tokens_seen": 134036392, + "step": 198900 + }, + { + "epoch": 4.859282241712066, + "grad_norm": 0.00021608646784443408, + "learning_rate": 4.823321896554766e-09, + "loss": 0.0, + "num_input_tokens_seen": 134040168, + "step": 198905 + }, + { + "epoch": 4.859404392543913, + "grad_norm": 2.203617441409733e-05, + "learning_rate": 4.814959882900482e-09, + "loss": 0.0, + "num_input_tokens_seen": 134043624, + "step": 198910 + }, + { + "epoch": 4.85952654337576, + "grad_norm": 0.0034446585923433304, + "learning_rate": 4.806605106499661e-09, + "loss": 0.0, + "num_input_tokens_seen": 134046440, + "step": 198915 + }, + { + "epoch": 4.859648694207608, + "grad_norm": 0.32102784514427185, + "learning_rate": 4.7982575674128115e-09, + "loss": 0.0001, + "num_input_tokens_seen": 134049384, + "step": 198920 + }, + { + "epoch": 4.859770845039455, + "grad_norm": 0.00032789475517347455, + "learning_rate": 4.78991726570066e-09, + "loss": 0.0, + "num_input_tokens_seen": 134052392, + "step": 198925 + }, + { + "epoch": 4.859892995871302, + "grad_norm": 0.00021464366000145674, + "learning_rate": 4.7815842014239385e-09, + "loss": 0.0, + "num_input_tokens_seen": 134055976, + "step": 198930 + }, + { + "epoch": 4.860015146703149, + "grad_norm": 4.051731139043113e-06, + "learning_rate": 4.7732583746432635e-09, + "loss": 0.0, + "num_input_tokens_seen": 134060008, + "step": 198935 + }, + { + "epoch": 4.860137297534997, + "grad_norm": 0.00031672645127400756, + "learning_rate": 4.764939785419031e-09, + "loss": 0.0, + "num_input_tokens_seen": 134063400, + "step": 198940 + }, + { + "epoch": 4.860259448366843, + "grad_norm": 0.00017728647799231112, + "learning_rate": 4.756628433811971e-09, + "loss": 0.0, + "num_input_tokens_seen": 134066920, + "step": 198945 + }, + { + "epoch": 4.860381599198691, + "grad_norm": 8.471917681163177e-06, + "learning_rate": 4.7483243198823685e-09, + "loss": 0.0, + "num_input_tokens_seen": 134070312, + "step": 198950 + }, + { + "epoch": 4.860503750030538, + "grad_norm": 0.00016513862647116184, + "learning_rate": 4.740027443690509e-09, + "loss": 0.0, + "num_input_tokens_seen": 134073576, + "step": 198955 + }, + { + "epoch": 4.860625900862384, + "grad_norm": 2.3647400666959584e-05, + "learning_rate": 4.731737805297009e-09, + "loss": 0.0, + "num_input_tokens_seen": 134076840, + "step": 198960 + }, + { + "epoch": 4.860748051694232, + "grad_norm": 0.00016174823394976556, + "learning_rate": 4.723455404761933e-09, + "loss": 0.0001, + "num_input_tokens_seen": 134080040, + "step": 198965 + }, + { + "epoch": 4.860870202526079, + "grad_norm": 0.00017587091133464128, + "learning_rate": 4.715180242145678e-09, + "loss": 0.0, + "num_input_tokens_seen": 134082984, + "step": 198970 + }, + { + "epoch": 4.8609923533579265, + "grad_norm": 0.00035376264713704586, + "learning_rate": 4.706912317508305e-09, + "loss": 0.0, + "num_input_tokens_seen": 134086440, + "step": 198975 + }, + { + "epoch": 4.861114504189773, + "grad_norm": 0.0009537230944260955, + "learning_rate": 4.698651630909878e-09, + "loss": 0.0, + "num_input_tokens_seen": 134089768, + "step": 198980 + }, + { + "epoch": 4.861236655021621, + "grad_norm": 7.79301262809895e-05, + "learning_rate": 4.690398182410682e-09, + "loss": 0.0, + "num_input_tokens_seen": 134092968, + "step": 198985 + }, + { + "epoch": 4.861358805853468, + "grad_norm": 4.4560383685166016e-05, + "learning_rate": 4.682151972070558e-09, + "loss": 0.0, + "num_input_tokens_seen": 134096168, + "step": 198990 + }, + { + "epoch": 4.861480956685315, + "grad_norm": 3.626396573963575e-05, + "learning_rate": 4.673912999949459e-09, + "loss": 0.0, + "num_input_tokens_seen": 134099368, + "step": 198995 + }, + { + "epoch": 4.861603107517162, + "grad_norm": 0.0009027646156027913, + "learning_rate": 4.665681266107446e-09, + "loss": 0.0, + "num_input_tokens_seen": 134103528, + "step": 199000 + }, + { + "epoch": 4.86172525834901, + "grad_norm": 0.00018381788686383516, + "learning_rate": 4.657456770604362e-09, + "loss": 0.0, + "num_input_tokens_seen": 134106600, + "step": 199005 + }, + { + "epoch": 4.861847409180856, + "grad_norm": 0.0003359086695127189, + "learning_rate": 4.649239513499936e-09, + "loss": 0.0, + "num_input_tokens_seen": 134109864, + "step": 199010 + }, + { + "epoch": 4.861969560012704, + "grad_norm": 0.012157008051872253, + "learning_rate": 4.641029494853899e-09, + "loss": 0.0, + "num_input_tokens_seen": 134113448, + "step": 199015 + }, + { + "epoch": 4.862091710844551, + "grad_norm": 0.00029743279446847737, + "learning_rate": 4.632826714725979e-09, + "loss": 0.0, + "num_input_tokens_seen": 134116648, + "step": 199020 + }, + { + "epoch": 4.862213861676398, + "grad_norm": 0.03120971843600273, + "learning_rate": 4.624631173176019e-09, + "loss": 0.0, + "num_input_tokens_seen": 134120296, + "step": 199025 + }, + { + "epoch": 4.862336012508245, + "grad_norm": 0.0002550722274463624, + "learning_rate": 4.616442870263304e-09, + "loss": 0.0, + "num_input_tokens_seen": 134123624, + "step": 199030 + }, + { + "epoch": 4.862458163340093, + "grad_norm": 0.00016976814367808402, + "learning_rate": 4.608261806047675e-09, + "loss": 0.0, + "num_input_tokens_seen": 134126824, + "step": 199035 + }, + { + "epoch": 4.8625803141719395, + "grad_norm": 0.00021611245756503195, + "learning_rate": 4.600087980588418e-09, + "loss": 0.0, + "num_input_tokens_seen": 134130152, + "step": 199040 + }, + { + "epoch": 4.862702465003787, + "grad_norm": 0.00029622766305692494, + "learning_rate": 4.591921393945042e-09, + "loss": 0.0, + "num_input_tokens_seen": 134133800, + "step": 199045 + }, + { + "epoch": 4.862824615835634, + "grad_norm": 0.00036044183070771396, + "learning_rate": 4.583762046177053e-09, + "loss": 0.0, + "num_input_tokens_seen": 134137256, + "step": 199050 + }, + { + "epoch": 4.862946766667481, + "grad_norm": 0.006442390847951174, + "learning_rate": 4.575609937343517e-09, + "loss": 0.0, + "num_input_tokens_seen": 134140648, + "step": 199055 + }, + { + "epoch": 4.863068917499328, + "grad_norm": 0.0006749753374606371, + "learning_rate": 4.567465067504051e-09, + "loss": 0.0, + "num_input_tokens_seen": 134144040, + "step": 199060 + }, + { + "epoch": 4.863191068331175, + "grad_norm": 3.755068973987363e-05, + "learning_rate": 4.559327436717608e-09, + "loss": 0.0, + "num_input_tokens_seen": 134148200, + "step": 199065 + }, + { + "epoch": 4.863313219163023, + "grad_norm": 0.0005150276701897383, + "learning_rate": 4.5511970450434755e-09, + "loss": 0.0, + "num_input_tokens_seen": 134151528, + "step": 199070 + }, + { + "epoch": 4.863435369994869, + "grad_norm": 0.0005858474760316312, + "learning_rate": 4.543073892540828e-09, + "loss": 0.0, + "num_input_tokens_seen": 134154664, + "step": 199075 + }, + { + "epoch": 4.863557520826717, + "grad_norm": 0.00026716565480455756, + "learning_rate": 4.534957979268728e-09, + "loss": 0.0, + "num_input_tokens_seen": 134157672, + "step": 199080 + }, + { + "epoch": 4.863679671658564, + "grad_norm": 0.00024082417075987905, + "learning_rate": 4.526849305286129e-09, + "loss": 0.0, + "num_input_tokens_seen": 134160744, + "step": 199085 + }, + { + "epoch": 4.863801822490411, + "grad_norm": 0.0014010306913405657, + "learning_rate": 4.518747870651985e-09, + "loss": 0.0, + "num_input_tokens_seen": 134164392, + "step": 199090 + }, + { + "epoch": 4.863923973322258, + "grad_norm": 8.185812475858256e-06, + "learning_rate": 4.510653675425358e-09, + "loss": 0.0, + "num_input_tokens_seen": 134167208, + "step": 199095 + }, + { + "epoch": 4.864046124154106, + "grad_norm": 0.0003967114898841828, + "learning_rate": 4.502566719664869e-09, + "loss": 0.0, + "num_input_tokens_seen": 134170280, + "step": 199100 + }, + { + "epoch": 4.8641682749859525, + "grad_norm": 0.0006387066678144038, + "learning_rate": 4.494487003429581e-09, + "loss": 0.0, + "num_input_tokens_seen": 134173736, + "step": 199105 + }, + { + "epoch": 4.8642904258178, + "grad_norm": 5.824856089020614e-06, + "learning_rate": 4.486414526778115e-09, + "loss": 0.0, + "num_input_tokens_seen": 134177000, + "step": 199110 + }, + { + "epoch": 4.864412576649647, + "grad_norm": 0.00041367491940036416, + "learning_rate": 4.478349289769201e-09, + "loss": 0.0, + "num_input_tokens_seen": 134180904, + "step": 199115 + }, + { + "epoch": 4.8645347274814945, + "grad_norm": 0.00021077069686725736, + "learning_rate": 4.470291292461459e-09, + "loss": 0.0, + "num_input_tokens_seen": 134184168, + "step": 199120 + }, + { + "epoch": 4.864656878313341, + "grad_norm": 0.0019649332389235497, + "learning_rate": 4.462240534913508e-09, + "loss": 0.0, + "num_input_tokens_seen": 134187944, + "step": 199125 + }, + { + "epoch": 4.864779029145188, + "grad_norm": 3.090451718890108e-05, + "learning_rate": 4.45419701718397e-09, + "loss": 0.0, + "num_input_tokens_seen": 134191208, + "step": 199130 + }, + { + "epoch": 4.864901179977036, + "grad_norm": 0.00044984908890910447, + "learning_rate": 4.446160739331239e-09, + "loss": 0.0, + "num_input_tokens_seen": 134194472, + "step": 199135 + }, + { + "epoch": 4.865023330808883, + "grad_norm": 0.0001340339076705277, + "learning_rate": 4.4381317014138274e-09, + "loss": 0.0, + "num_input_tokens_seen": 134197480, + "step": 199140 + }, + { + "epoch": 4.86514548164073, + "grad_norm": 0.0010863590287044644, + "learning_rate": 4.4301099034901315e-09, + "loss": 0.0, + "num_input_tokens_seen": 134201128, + "step": 199145 + }, + { + "epoch": 4.865267632472577, + "grad_norm": 0.00024744641268625855, + "learning_rate": 4.422095345618437e-09, + "loss": 0.0, + "num_input_tokens_seen": 134204968, + "step": 199150 + }, + { + "epoch": 4.865389783304424, + "grad_norm": 3.0349398002726957e-05, + "learning_rate": 4.414088027857032e-09, + "loss": 0.0, + "num_input_tokens_seen": 134208616, + "step": 199155 + }, + { + "epoch": 4.865511934136271, + "grad_norm": 0.00012485562183428556, + "learning_rate": 4.406087950264092e-09, + "loss": 0.0, + "num_input_tokens_seen": 134211752, + "step": 199160 + }, + { + "epoch": 4.865634084968119, + "grad_norm": 2.2154621547088027e-05, + "learning_rate": 4.398095112898015e-09, + "loss": 0.0, + "num_input_tokens_seen": 134215144, + "step": 199165 + }, + { + "epoch": 4.8657562357999655, + "grad_norm": 0.0002064045111183077, + "learning_rate": 4.390109515816642e-09, + "loss": 0.0, + "num_input_tokens_seen": 134218728, + "step": 199170 + }, + { + "epoch": 4.865878386631813, + "grad_norm": 3.425988688832149e-05, + "learning_rate": 4.3821311590781505e-09, + "loss": 0.0, + "num_input_tokens_seen": 134222440, + "step": 199175 + }, + { + "epoch": 4.86600053746366, + "grad_norm": 8.955002704169601e-05, + "learning_rate": 4.374160042740716e-09, + "loss": 0.0, + "num_input_tokens_seen": 134225640, + "step": 199180 + }, + { + "epoch": 4.8661226882955075, + "grad_norm": 1.300713665841613e-05, + "learning_rate": 4.366196166862179e-09, + "loss": 0.0, + "num_input_tokens_seen": 134229096, + "step": 199185 + }, + { + "epoch": 4.866244839127354, + "grad_norm": 0.00013864053471479565, + "learning_rate": 4.358239531500385e-09, + "loss": 0.0, + "num_input_tokens_seen": 134232360, + "step": 199190 + }, + { + "epoch": 4.866366989959202, + "grad_norm": 0.0013332118978723884, + "learning_rate": 4.3502901367132864e-09, + "loss": 0.0, + "num_input_tokens_seen": 134235432, + "step": 199195 + }, + { + "epoch": 4.866489140791049, + "grad_norm": 6.434672104660422e-05, + "learning_rate": 4.342347982558614e-09, + "loss": 0.0, + "num_input_tokens_seen": 134239400, + "step": 199200 + }, + { + "epoch": 4.866611291622896, + "grad_norm": 0.00048381893429905176, + "learning_rate": 4.334413069094322e-09, + "loss": 0.0, + "num_input_tokens_seen": 134242984, + "step": 199205 + }, + { + "epoch": 4.866733442454743, + "grad_norm": 0.0002854143676813692, + "learning_rate": 4.326485396377921e-09, + "loss": 0.0, + "num_input_tokens_seen": 134246440, + "step": 199210 + }, + { + "epoch": 4.866855593286591, + "grad_norm": 5.3498759370995685e-05, + "learning_rate": 4.318564964467031e-09, + "loss": 0.0, + "num_input_tokens_seen": 134249896, + "step": 199215 + }, + { + "epoch": 4.866977744118437, + "grad_norm": 1.6442389096482657e-05, + "learning_rate": 4.3106517734194935e-09, + "loss": 0.0, + "num_input_tokens_seen": 134252968, + "step": 199220 + }, + { + "epoch": 4.867099894950284, + "grad_norm": 0.00855648797005415, + "learning_rate": 4.302745823292598e-09, + "loss": 0.0, + "num_input_tokens_seen": 134255976, + "step": 199225 + }, + { + "epoch": 4.867222045782132, + "grad_norm": 3.934272172045894e-05, + "learning_rate": 4.294847114143963e-09, + "loss": 0.0, + "num_input_tokens_seen": 134259944, + "step": 199230 + }, + { + "epoch": 4.867344196613979, + "grad_norm": 1.6429348761448637e-05, + "learning_rate": 4.286955646030988e-09, + "loss": 0.0, + "num_input_tokens_seen": 134263080, + "step": 199235 + }, + { + "epoch": 4.867466347445826, + "grad_norm": 0.009650076739490032, + "learning_rate": 4.279071419011182e-09, + "loss": 0.0, + "num_input_tokens_seen": 134266280, + "step": 199240 + }, + { + "epoch": 4.867588498277673, + "grad_norm": 0.00010051135905086994, + "learning_rate": 4.271194433141723e-09, + "loss": 0.0, + "num_input_tokens_seen": 134269864, + "step": 199245 + }, + { + "epoch": 4.867710649109521, + "grad_norm": 0.0030795312486588955, + "learning_rate": 4.263324688480008e-09, + "loss": 0.0, + "num_input_tokens_seen": 134273128, + "step": 199250 + }, + { + "epoch": 4.867832799941367, + "grad_norm": 2.8745831514243037e-05, + "learning_rate": 4.255462185083103e-09, + "loss": 0.0, + "num_input_tokens_seen": 134276456, + "step": 199255 + }, + { + "epoch": 4.867954950773215, + "grad_norm": 0.0016464096261188388, + "learning_rate": 4.2476069230084066e-09, + "loss": 0.0, + "num_input_tokens_seen": 134279592, + "step": 199260 + }, + { + "epoch": 4.868077101605062, + "grad_norm": 0.004813062958419323, + "learning_rate": 4.239758902312873e-09, + "loss": 0.0001, + "num_input_tokens_seen": 134284840, + "step": 199265 + }, + { + "epoch": 4.868199252436909, + "grad_norm": 0.0006078015430830419, + "learning_rate": 4.231918123053679e-09, + "loss": 0.0, + "num_input_tokens_seen": 134288424, + "step": 199270 + }, + { + "epoch": 4.868321403268756, + "grad_norm": 0.0001032560394378379, + "learning_rate": 4.22408458528778e-09, + "loss": 0.0, + "num_input_tokens_seen": 134291560, + "step": 199275 + }, + { + "epoch": 4.868443554100604, + "grad_norm": 0.0002666850632522255, + "learning_rate": 4.216258289072128e-09, + "loss": 0.0, + "num_input_tokens_seen": 134294760, + "step": 199280 + }, + { + "epoch": 4.8685657049324504, + "grad_norm": 0.0008626289200037718, + "learning_rate": 4.20843923446379e-09, + "loss": 0.0, + "num_input_tokens_seen": 134298600, + "step": 199285 + }, + { + "epoch": 4.868687855764298, + "grad_norm": 0.00012125779176130891, + "learning_rate": 4.200627421519498e-09, + "loss": 0.0, + "num_input_tokens_seen": 134301864, + "step": 199290 + }, + { + "epoch": 4.868810006596145, + "grad_norm": 0.00019722123397514224, + "learning_rate": 4.192822850295985e-09, + "loss": 0.0, + "num_input_tokens_seen": 134305064, + "step": 199295 + }, + { + "epoch": 4.8689321574279925, + "grad_norm": 0.00021909379574935883, + "learning_rate": 4.185025520850205e-09, + "loss": 0.0, + "num_input_tokens_seen": 134309224, + "step": 199300 + }, + { + "epoch": 4.869054308259839, + "grad_norm": 0.0006481913733296096, + "learning_rate": 4.1772354332386686e-09, + "loss": 0.0, + "num_input_tokens_seen": 134312488, + "step": 199305 + }, + { + "epoch": 4.869176459091687, + "grad_norm": 6.552576087415218e-05, + "learning_rate": 4.169452587518219e-09, + "loss": 0.0, + "num_input_tokens_seen": 134316328, + "step": 199310 + }, + { + "epoch": 4.869298609923534, + "grad_norm": 0.028785167261958122, + "learning_rate": 4.161676983745255e-09, + "loss": 0.0, + "num_input_tokens_seen": 134319592, + "step": 199315 + }, + { + "epoch": 4.86942076075538, + "grad_norm": 0.000281141052255407, + "learning_rate": 4.15390862197651e-09, + "loss": 0.0, + "num_input_tokens_seen": 134322856, + "step": 199320 + }, + { + "epoch": 4.869542911587228, + "grad_norm": 0.00023606415197718889, + "learning_rate": 4.146147502268383e-09, + "loss": 0.0, + "num_input_tokens_seen": 134326056, + "step": 199325 + }, + { + "epoch": 4.869665062419075, + "grad_norm": 4.8425907152704895e-05, + "learning_rate": 4.138393624677272e-09, + "loss": 0.0, + "num_input_tokens_seen": 134329384, + "step": 199330 + }, + { + "epoch": 4.869787213250922, + "grad_norm": 2.464874523866456e-05, + "learning_rate": 4.13064698925969e-09, + "loss": 0.0, + "num_input_tokens_seen": 134332328, + "step": 199335 + }, + { + "epoch": 4.869909364082769, + "grad_norm": 0.0005067095626145601, + "learning_rate": 4.122907596071812e-09, + "loss": 0.0, + "num_input_tokens_seen": 134335720, + "step": 199340 + }, + { + "epoch": 4.870031514914617, + "grad_norm": 0.00011708718375302851, + "learning_rate": 4.115175445170038e-09, + "loss": 0.0, + "num_input_tokens_seen": 134339560, + "step": 199345 + }, + { + "epoch": 4.8701536657464635, + "grad_norm": 0.0003647690755315125, + "learning_rate": 4.107450536610657e-09, + "loss": 0.0, + "num_input_tokens_seen": 134342632, + "step": 199350 + }, + { + "epoch": 4.870275816578311, + "grad_norm": 0.0004586986906360835, + "learning_rate": 4.099732870449624e-09, + "loss": 0.0, + "num_input_tokens_seen": 134346088, + "step": 199355 + }, + { + "epoch": 4.870397967410158, + "grad_norm": 8.667811925988644e-05, + "learning_rate": 4.092022446743337e-09, + "loss": 0.0, + "num_input_tokens_seen": 134350056, + "step": 199360 + }, + { + "epoch": 4.8705201182420055, + "grad_norm": 6.58802455291152e-05, + "learning_rate": 4.084319265547531e-09, + "loss": 0.0, + "num_input_tokens_seen": 134353256, + "step": 199365 + }, + { + "epoch": 4.870642269073852, + "grad_norm": 3.016005030076485e-05, + "learning_rate": 4.076623326918604e-09, + "loss": 0.0, + "num_input_tokens_seen": 134356456, + "step": 199370 + }, + { + "epoch": 4.8707644199057, + "grad_norm": 0.0002504217263776809, + "learning_rate": 4.068934630912291e-09, + "loss": 0.0, + "num_input_tokens_seen": 134359464, + "step": 199375 + }, + { + "epoch": 4.870886570737547, + "grad_norm": 0.0016258807154372334, + "learning_rate": 4.061253177584545e-09, + "loss": 0.0, + "num_input_tokens_seen": 134362664, + "step": 199380 + }, + { + "epoch": 4.871008721569394, + "grad_norm": 9.487319039180875e-05, + "learning_rate": 4.053578966991211e-09, + "loss": 0.0, + "num_input_tokens_seen": 134365928, + "step": 199385 + }, + { + "epoch": 4.871130872401241, + "grad_norm": 7.63261632528156e-05, + "learning_rate": 4.045911999188245e-09, + "loss": 0.0, + "num_input_tokens_seen": 134369256, + "step": 199390 + }, + { + "epoch": 4.871253023233088, + "grad_norm": 0.0032971366308629513, + "learning_rate": 4.038252274231157e-09, + "loss": 0.0, + "num_input_tokens_seen": 134372968, + "step": 199395 + }, + { + "epoch": 4.871375174064935, + "grad_norm": 0.0007157826912589371, + "learning_rate": 4.030599792175904e-09, + "loss": 0.0, + "num_input_tokens_seen": 134376488, + "step": 199400 + }, + { + "epoch": 4.871497324896783, + "grad_norm": 0.00042528269113972783, + "learning_rate": 4.022954553077884e-09, + "loss": 0.0, + "num_input_tokens_seen": 134379560, + "step": 199405 + }, + { + "epoch": 4.87161947572863, + "grad_norm": 0.00018838932737708092, + "learning_rate": 4.015316556992943e-09, + "loss": 0.0, + "num_input_tokens_seen": 134383016, + "step": 199410 + }, + { + "epoch": 4.8717416265604765, + "grad_norm": 53.78935241699219, + "learning_rate": 4.007685803976479e-09, + "loss": 0.079, + "num_input_tokens_seen": 134386344, + "step": 199415 + }, + { + "epoch": 4.871863777392324, + "grad_norm": 0.00014842470409348607, + "learning_rate": 4.0000622940838945e-09, + "loss": 0.0, + "num_input_tokens_seen": 134390056, + "step": 199420 + }, + { + "epoch": 4.871985928224171, + "grad_norm": 0.0018565324135124683, + "learning_rate": 3.99244602737081e-09, + "loss": 0.0, + "num_input_tokens_seen": 134393448, + "step": 199425 + }, + { + "epoch": 4.8721080790560185, + "grad_norm": 0.0063219680450856686, + "learning_rate": 3.9848370038926275e-09, + "loss": 0.0002, + "num_input_tokens_seen": 134396712, + "step": 199430 + }, + { + "epoch": 4.872230229887865, + "grad_norm": 0.00012290282757021487, + "learning_rate": 3.977235223704523e-09, + "loss": 0.0, + "num_input_tokens_seen": 134399656, + "step": 199435 + }, + { + "epoch": 4.872352380719713, + "grad_norm": 0.0008801176445558667, + "learning_rate": 3.969640686861897e-09, + "loss": 0.0, + "num_input_tokens_seen": 134403304, + "step": 199440 + }, + { + "epoch": 4.87247453155156, + "grad_norm": 0.000574213161598891, + "learning_rate": 3.962053393419929e-09, + "loss": 0.0, + "num_input_tokens_seen": 134406184, + "step": 199445 + }, + { + "epoch": 4.872596682383407, + "grad_norm": 0.005858482327312231, + "learning_rate": 3.954473343433795e-09, + "loss": 0.0, + "num_input_tokens_seen": 134409960, + "step": 199450 + }, + { + "epoch": 4.872718833215254, + "grad_norm": 0.001610560342669487, + "learning_rate": 3.946900536958675e-09, + "loss": 0.0, + "num_input_tokens_seen": 134413480, + "step": 199455 + }, + { + "epoch": 4.872840984047102, + "grad_norm": 5.890335887670517e-05, + "learning_rate": 3.939334974049635e-09, + "loss": 0.0, + "num_input_tokens_seen": 134416872, + "step": 199460 + }, + { + "epoch": 4.872963134878948, + "grad_norm": 0.0007327334024012089, + "learning_rate": 3.931776654761631e-09, + "loss": 0.0, + "num_input_tokens_seen": 134420008, + "step": 199465 + }, + { + "epoch": 4.873085285710796, + "grad_norm": 0.0007441366324201226, + "learning_rate": 3.924225579149621e-09, + "loss": 0.0, + "num_input_tokens_seen": 134423272, + "step": 199470 + }, + { + "epoch": 4.873207436542643, + "grad_norm": 0.0051063066348433495, + "learning_rate": 3.916681747268558e-09, + "loss": 0.0, + "num_input_tokens_seen": 134426536, + "step": 199475 + }, + { + "epoch": 4.87332958737449, + "grad_norm": 0.0001381008914904669, + "learning_rate": 3.909145159173289e-09, + "loss": 0.0, + "num_input_tokens_seen": 134430568, + "step": 199480 + }, + { + "epoch": 4.873451738206337, + "grad_norm": 1.1536059901118279e-05, + "learning_rate": 3.901615814918657e-09, + "loss": 0.0, + "num_input_tokens_seen": 134433640, + "step": 199485 + }, + { + "epoch": 4.873573889038184, + "grad_norm": 0.0011014309711754322, + "learning_rate": 3.894093714559399e-09, + "loss": 0.0, + "num_input_tokens_seen": 134436904, + "step": 199490 + }, + { + "epoch": 4.8736960398700315, + "grad_norm": 0.0012070384109392762, + "learning_rate": 3.886578858150247e-09, + "loss": 0.0, + "num_input_tokens_seen": 134440168, + "step": 199495 + }, + { + "epoch": 4.873818190701879, + "grad_norm": 4.059781349496916e-05, + "learning_rate": 3.879071245745713e-09, + "loss": 0.0, + "num_input_tokens_seen": 134443496, + "step": 199500 + }, + { + "epoch": 4.873940341533726, + "grad_norm": 0.001782463165000081, + "learning_rate": 3.871570877400643e-09, + "loss": 0.0, + "num_input_tokens_seen": 134447208, + "step": 199505 + }, + { + "epoch": 4.874062492365573, + "grad_norm": 0.00033074626117013395, + "learning_rate": 3.864077753169326e-09, + "loss": 0.0, + "num_input_tokens_seen": 134450792, + "step": 199510 + }, + { + "epoch": 4.87418464319742, + "grad_norm": 0.00011855031334562227, + "learning_rate": 3.8565918731063855e-09, + "loss": 0.0, + "num_input_tokens_seen": 134454952, + "step": 199515 + }, + { + "epoch": 4.874306794029267, + "grad_norm": 0.009077893570065498, + "learning_rate": 3.849113237266222e-09, + "loss": 0.0, + "num_input_tokens_seen": 134457960, + "step": 199520 + }, + { + "epoch": 4.874428944861115, + "grad_norm": 0.019870661199092865, + "learning_rate": 3.8416418457032365e-09, + "loss": 0.0, + "num_input_tokens_seen": 134461608, + "step": 199525 + }, + { + "epoch": 4.874551095692961, + "grad_norm": 0.0001336112036369741, + "learning_rate": 3.83417769847183e-09, + "loss": 0.0, + "num_input_tokens_seen": 134464360, + "step": 199530 + }, + { + "epoch": 4.874673246524809, + "grad_norm": 0.00011955316585954279, + "learning_rate": 3.826720795626181e-09, + "loss": 0.0, + "num_input_tokens_seen": 134467944, + "step": 199535 + }, + { + "epoch": 4.874795397356656, + "grad_norm": 0.0008456766954623163, + "learning_rate": 3.819271137220581e-09, + "loss": 0.0, + "num_input_tokens_seen": 134470952, + "step": 199540 + }, + { + "epoch": 4.874917548188503, + "grad_norm": 0.0008179315482266247, + "learning_rate": 3.8118287233090965e-09, + "loss": 0.0, + "num_input_tokens_seen": 134474536, + "step": 199545 + }, + { + "epoch": 4.87503969902035, + "grad_norm": 2.3027980205370113e-05, + "learning_rate": 3.804393553946017e-09, + "loss": 0.0, + "num_input_tokens_seen": 134477800, + "step": 199550 + }, + { + "epoch": 4.875161849852198, + "grad_norm": 0.003511697519570589, + "learning_rate": 3.7969656291853e-09, + "loss": 0.0, + "num_input_tokens_seen": 134481000, + "step": 199555 + }, + { + "epoch": 4.8752840006840445, + "grad_norm": 9.18523728614673e-05, + "learning_rate": 3.789544949081014e-09, + "loss": 0.0, + "num_input_tokens_seen": 134484584, + "step": 199560 + }, + { + "epoch": 4.875406151515892, + "grad_norm": 0.0005713349091820419, + "learning_rate": 3.7821315136871145e-09, + "loss": 0.0, + "num_input_tokens_seen": 134488168, + "step": 199565 + }, + { + "epoch": 4.875528302347739, + "grad_norm": 0.001412555342540145, + "learning_rate": 3.774725323057449e-09, + "loss": 0.0, + "num_input_tokens_seen": 134491432, + "step": 199570 + }, + { + "epoch": 4.875650453179587, + "grad_norm": 1.3871308510715608e-05, + "learning_rate": 3.767326377245972e-09, + "loss": 0.0, + "num_input_tokens_seen": 134494760, + "step": 199575 + }, + { + "epoch": 4.875772604011433, + "grad_norm": 0.00034401044831611216, + "learning_rate": 3.75993467630642e-09, + "loss": 0.0, + "num_input_tokens_seen": 134497896, + "step": 199580 + }, + { + "epoch": 4.87589475484328, + "grad_norm": 0.00021265115356072783, + "learning_rate": 3.752550220292638e-09, + "loss": 0.0, + "num_input_tokens_seen": 134501736, + "step": 199585 + }, + { + "epoch": 4.876016905675128, + "grad_norm": 0.0026902747340500355, + "learning_rate": 3.745173009258252e-09, + "loss": 0.0, + "num_input_tokens_seen": 134505128, + "step": 199590 + }, + { + "epoch": 4.876139056506974, + "grad_norm": 0.0028633566107600927, + "learning_rate": 3.737803043256993e-09, + "loss": 0.0, + "num_input_tokens_seen": 134508712, + "step": 199595 + }, + { + "epoch": 4.876261207338822, + "grad_norm": 0.00011423487012507394, + "learning_rate": 3.730440322342266e-09, + "loss": 0.0, + "num_input_tokens_seen": 134512424, + "step": 199600 + }, + { + "epoch": 4.876383358170669, + "grad_norm": 0.001742964843288064, + "learning_rate": 3.7230848465678033e-09, + "loss": 0.0, + "num_input_tokens_seen": 134515816, + "step": 199605 + }, + { + "epoch": 4.8765055090025164, + "grad_norm": 0.0001392453268636018, + "learning_rate": 3.7157366159870086e-09, + "loss": 0.0, + "num_input_tokens_seen": 134519144, + "step": 199610 + }, + { + "epoch": 4.876627659834363, + "grad_norm": 4.974427793058567e-05, + "learning_rate": 3.7083956306533936e-09, + "loss": 0.0, + "num_input_tokens_seen": 134522536, + "step": 199615 + }, + { + "epoch": 4.876749810666211, + "grad_norm": 0.00012248499842826277, + "learning_rate": 3.7010618906202494e-09, + "loss": 0.0, + "num_input_tokens_seen": 134525544, + "step": 199620 + }, + { + "epoch": 4.876871961498058, + "grad_norm": 0.00078915199264884, + "learning_rate": 3.693735395940978e-09, + "loss": 0.0, + "num_input_tokens_seen": 134528808, + "step": 199625 + }, + { + "epoch": 4.876994112329905, + "grad_norm": 0.0011299817124381661, + "learning_rate": 3.6864161466688694e-09, + "loss": 0.0, + "num_input_tokens_seen": 134532264, + "step": 199630 + }, + { + "epoch": 4.877116263161752, + "grad_norm": 0.0001292000088142231, + "learning_rate": 3.6791041428569926e-09, + "loss": 0.0, + "num_input_tokens_seen": 134535400, + "step": 199635 + }, + { + "epoch": 4.8772384139936, + "grad_norm": 0.0001341778552159667, + "learning_rate": 3.6717993845587493e-09, + "loss": 0.0, + "num_input_tokens_seen": 134539432, + "step": 199640 + }, + { + "epoch": 4.877360564825446, + "grad_norm": 0.0006362085114233196, + "learning_rate": 3.6645018718272082e-09, + "loss": 0.0, + "num_input_tokens_seen": 134542632, + "step": 199645 + }, + { + "epoch": 4.877482715657294, + "grad_norm": 0.005953234154731035, + "learning_rate": 3.6572116047153267e-09, + "loss": 0.0, + "num_input_tokens_seen": 134546024, + "step": 199650 + }, + { + "epoch": 4.877604866489141, + "grad_norm": 0.003755022771656513, + "learning_rate": 3.649928583276174e-09, + "loss": 0.0, + "num_input_tokens_seen": 134549160, + "step": 199655 + }, + { + "epoch": 4.8777270173209875, + "grad_norm": 0.012922837398946285, + "learning_rate": 3.6426528075627073e-09, + "loss": 0.0, + "num_input_tokens_seen": 134551848, + "step": 199660 + }, + { + "epoch": 4.877849168152835, + "grad_norm": 0.004435810260474682, + "learning_rate": 3.635384277627884e-09, + "loss": 0.0, + "num_input_tokens_seen": 134555624, + "step": 199665 + }, + { + "epoch": 4.877971318984683, + "grad_norm": 0.00010084949462907389, + "learning_rate": 3.6281229935245516e-09, + "loss": 0.0, + "num_input_tokens_seen": 134559080, + "step": 199670 + }, + { + "epoch": 4.8780934698165295, + "grad_norm": 0.00020235779811628163, + "learning_rate": 3.620868955305445e-09, + "loss": 0.0, + "num_input_tokens_seen": 134562280, + "step": 199675 + }, + { + "epoch": 4.878215620648376, + "grad_norm": 8.76703197718598e-05, + "learning_rate": 3.613622163023522e-09, + "loss": 0.0, + "num_input_tokens_seen": 134565800, + "step": 199680 + }, + { + "epoch": 4.878337771480224, + "grad_norm": 4.6558223402826115e-05, + "learning_rate": 3.606382616731185e-09, + "loss": 0.0, + "num_input_tokens_seen": 134569320, + "step": 199685 + }, + { + "epoch": 4.878459922312071, + "grad_norm": 0.03909289836883545, + "learning_rate": 3.599150316481281e-09, + "loss": 0.0464, + "num_input_tokens_seen": 134572712, + "step": 199690 + }, + { + "epoch": 4.878582073143918, + "grad_norm": 3.961454058298841e-05, + "learning_rate": 3.591925262326323e-09, + "loss": 0.0, + "num_input_tokens_seen": 134576104, + "step": 199695 + }, + { + "epoch": 4.878704223975765, + "grad_norm": 2.078626857837662e-05, + "learning_rate": 3.584707454318936e-09, + "loss": 0.0, + "num_input_tokens_seen": 134579368, + "step": 199700 + }, + { + "epoch": 4.878826374807613, + "grad_norm": 9.76806586550083e-06, + "learning_rate": 3.5774968925115223e-09, + "loss": 0.0, + "num_input_tokens_seen": 134582376, + "step": 199705 + }, + { + "epoch": 4.878948525639459, + "grad_norm": 6.536354339914396e-05, + "learning_rate": 3.5702935769565956e-09, + "loss": 0.0, + "num_input_tokens_seen": 134586024, + "step": 199710 + }, + { + "epoch": 4.879070676471307, + "grad_norm": 9.415813838131726e-05, + "learning_rate": 3.5630975077065583e-09, + "loss": 0.0, + "num_input_tokens_seen": 134589736, + "step": 199715 + }, + { + "epoch": 4.879192827303154, + "grad_norm": 0.0004944285610690713, + "learning_rate": 3.555908684813591e-09, + "loss": 0.0, + "num_input_tokens_seen": 134593384, + "step": 199720 + }, + { + "epoch": 4.879314978135001, + "grad_norm": 0.00047317787539213896, + "learning_rate": 3.5487271083300962e-09, + "loss": 0.0, + "num_input_tokens_seen": 134596968, + "step": 199725 + }, + { + "epoch": 4.879437128966848, + "grad_norm": 0.0010366524802520871, + "learning_rate": 3.5415527783082544e-09, + "loss": 0.0, + "num_input_tokens_seen": 134600424, + "step": 199730 + }, + { + "epoch": 4.879559279798696, + "grad_norm": 7.29838793631643e-05, + "learning_rate": 3.534385694800246e-09, + "loss": 0.0, + "num_input_tokens_seen": 134604072, + "step": 199735 + }, + { + "epoch": 4.8796814306305425, + "grad_norm": 6.1684686443186365e-06, + "learning_rate": 3.5272258578581405e-09, + "loss": 0.0, + "num_input_tokens_seen": 134607272, + "step": 199740 + }, + { + "epoch": 4.87980358146239, + "grad_norm": 4.777483263751492e-05, + "learning_rate": 3.5200732675341185e-09, + "loss": 0.0, + "num_input_tokens_seen": 134610408, + "step": 199745 + }, + { + "epoch": 4.879925732294237, + "grad_norm": 0.00011650729720713571, + "learning_rate": 3.512927923880249e-09, + "loss": 0.0, + "num_input_tokens_seen": 134613672, + "step": 199750 + }, + { + "epoch": 4.880047883126084, + "grad_norm": 0.001114787650294602, + "learning_rate": 3.505789826948269e-09, + "loss": 0.0, + "num_input_tokens_seen": 134616872, + "step": 199755 + }, + { + "epoch": 4.880170033957931, + "grad_norm": 0.0028762961737811565, + "learning_rate": 3.4986589767902476e-09, + "loss": 0.0, + "num_input_tokens_seen": 134620392, + "step": 199760 + }, + { + "epoch": 4.880292184789779, + "grad_norm": 5.495239292940823e-06, + "learning_rate": 3.4915353734580322e-09, + "loss": 0.0, + "num_input_tokens_seen": 134623528, + "step": 199765 + }, + { + "epoch": 4.880414335621626, + "grad_norm": 0.01798221282660961, + "learning_rate": 3.4844190170033596e-09, + "loss": 0.0, + "num_input_tokens_seen": 134626536, + "step": 199770 + }, + { + "epoch": 4.880536486453472, + "grad_norm": 2.4297723939525895e-05, + "learning_rate": 3.4773099074780765e-09, + "loss": 0.0, + "num_input_tokens_seen": 134629928, + "step": 199775 + }, + { + "epoch": 4.88065863728532, + "grad_norm": 0.0001187268499052152, + "learning_rate": 3.470208044933809e-09, + "loss": 0.031, + "num_input_tokens_seen": 134633256, + "step": 199780 + }, + { + "epoch": 4.880780788117167, + "grad_norm": 0.00022896818700246513, + "learning_rate": 3.463113429422182e-09, + "loss": 0.0, + "num_input_tokens_seen": 134636392, + "step": 199785 + }, + { + "epoch": 4.880902938949014, + "grad_norm": 0.00026889523724094033, + "learning_rate": 3.456026060994821e-09, + "loss": 0.0005, + "num_input_tokens_seen": 134639720, + "step": 199790 + }, + { + "epoch": 4.881025089780861, + "grad_norm": 0.00028634234331548214, + "learning_rate": 3.4489459397033514e-09, + "loss": 0.0, + "num_input_tokens_seen": 134643112, + "step": 199795 + }, + { + "epoch": 4.881147240612709, + "grad_norm": 8.220658492064103e-05, + "learning_rate": 3.441873065599066e-09, + "loss": 0.0, + "num_input_tokens_seen": 134646504, + "step": 199800 + }, + { + "epoch": 4.8812693914445555, + "grad_norm": 0.00017027057765517384, + "learning_rate": 3.4348074387337e-09, + "loss": 0.0, + "num_input_tokens_seen": 134650024, + "step": 199805 + }, + { + "epoch": 4.881391542276403, + "grad_norm": 6.944937922526151e-05, + "learning_rate": 3.4277490591583245e-09, + "loss": 0.0, + "num_input_tokens_seen": 134653544, + "step": 199810 + }, + { + "epoch": 4.88151369310825, + "grad_norm": 0.00022692351194564253, + "learning_rate": 3.420697926924454e-09, + "loss": 0.0, + "num_input_tokens_seen": 134657064, + "step": 199815 + }, + { + "epoch": 4.8816358439400975, + "grad_norm": 3.496105273370631e-05, + "learning_rate": 3.413654042083269e-09, + "loss": 0.0, + "num_input_tokens_seen": 134660584, + "step": 199820 + }, + { + "epoch": 4.881757994771944, + "grad_norm": 0.0008579176501370966, + "learning_rate": 3.406617404686063e-09, + "loss": 0.0, + "num_input_tokens_seen": 134664424, + "step": 199825 + }, + { + "epoch": 4.881880145603792, + "grad_norm": 0.0008436653297394514, + "learning_rate": 3.3995880147840163e-09, + "loss": 0.0, + "num_input_tokens_seen": 134667816, + "step": 199830 + }, + { + "epoch": 4.882002296435639, + "grad_norm": 6.152554851723835e-05, + "learning_rate": 3.392565872428199e-09, + "loss": 0.0, + "num_input_tokens_seen": 134671144, + "step": 199835 + }, + { + "epoch": 4.882124447267486, + "grad_norm": 0.00047535798512399197, + "learning_rate": 3.385550977669682e-09, + "loss": 0.0, + "num_input_tokens_seen": 134674152, + "step": 199840 + }, + { + "epoch": 4.882246598099333, + "grad_norm": 0.00028677177033387125, + "learning_rate": 3.3785433305595355e-09, + "loss": 0.0, + "num_input_tokens_seen": 134677480, + "step": 199845 + }, + { + "epoch": 4.88236874893118, + "grad_norm": 1.8712211385718547e-05, + "learning_rate": 3.371542931148608e-09, + "loss": 0.058, + "num_input_tokens_seen": 134680936, + "step": 199850 + }, + { + "epoch": 4.882490899763027, + "grad_norm": 0.0001424605870852247, + "learning_rate": 3.3645497794879684e-09, + "loss": 0.0002, + "num_input_tokens_seen": 134684264, + "step": 199855 + }, + { + "epoch": 4.882613050594875, + "grad_norm": 0.0006394163356162608, + "learning_rate": 3.3575638756283555e-09, + "loss": 0.0, + "num_input_tokens_seen": 134687208, + "step": 199860 + }, + { + "epoch": 4.882735201426722, + "grad_norm": 3.500204547890462e-05, + "learning_rate": 3.350585219620505e-09, + "loss": 0.0, + "num_input_tokens_seen": 134690728, + "step": 199865 + }, + { + "epoch": 4.8828573522585685, + "grad_norm": 0.004288224037736654, + "learning_rate": 3.343613811515378e-09, + "loss": 0.0, + "num_input_tokens_seen": 134694248, + "step": 199870 + }, + { + "epoch": 4.882979503090416, + "grad_norm": 0.00017929727619048208, + "learning_rate": 3.336649651363599e-09, + "loss": 0.0318, + "num_input_tokens_seen": 134698024, + "step": 199875 + }, + { + "epoch": 4.883101653922263, + "grad_norm": 1.3863214007869828e-05, + "learning_rate": 3.3296927392156836e-09, + "loss": 0.0, + "num_input_tokens_seen": 134701608, + "step": 199880 + }, + { + "epoch": 4.8832238047541106, + "grad_norm": 0.00016461328777950257, + "learning_rate": 3.3227430751223696e-09, + "loss": 0.0, + "num_input_tokens_seen": 134704616, + "step": 199885 + }, + { + "epoch": 4.883345955585957, + "grad_norm": 1.3935764400230255e-05, + "learning_rate": 3.3158006591340603e-09, + "loss": 0.0, + "num_input_tokens_seen": 134707752, + "step": 199890 + }, + { + "epoch": 4.883468106417805, + "grad_norm": 3.107173688476905e-05, + "learning_rate": 3.3088654913013825e-09, + "loss": 0.0, + "num_input_tokens_seen": 134711016, + "step": 199895 + }, + { + "epoch": 4.883590257249652, + "grad_norm": 0.00013317028060555458, + "learning_rate": 3.30193757167474e-09, + "loss": 0.0, + "num_input_tokens_seen": 134714536, + "step": 199900 + }, + { + "epoch": 4.883712408081499, + "grad_norm": 4.84615111417952e-06, + "learning_rate": 3.295016900304426e-09, + "loss": 0.0, + "num_input_tokens_seen": 134717544, + "step": 199905 + }, + { + "epoch": 4.883834558913346, + "grad_norm": 8.700467151356861e-05, + "learning_rate": 3.2881034772408444e-09, + "loss": 0.0, + "num_input_tokens_seen": 134720936, + "step": 199910 + }, + { + "epoch": 4.883956709745194, + "grad_norm": 6.082547042751685e-05, + "learning_rate": 3.281197302534289e-09, + "loss": 0.0, + "num_input_tokens_seen": 134724584, + "step": 199915 + }, + { + "epoch": 4.88407886057704, + "grad_norm": 7.671982166357338e-05, + "learning_rate": 3.2742983762349406e-09, + "loss": 0.0, + "num_input_tokens_seen": 134727720, + "step": 199920 + }, + { + "epoch": 4.884201011408888, + "grad_norm": 0.002736483933404088, + "learning_rate": 3.2674066983929826e-09, + "loss": 0.0, + "num_input_tokens_seen": 134731112, + "step": 199925 + }, + { + "epoch": 4.884323162240735, + "grad_norm": 0.00012476065603550524, + "learning_rate": 3.2605222690585967e-09, + "loss": 0.0, + "num_input_tokens_seen": 134734248, + "step": 199930 + }, + { + "epoch": 4.8844453130725825, + "grad_norm": 0.007749420590698719, + "learning_rate": 3.253645088281631e-09, + "loss": 0.0, + "num_input_tokens_seen": 134737320, + "step": 199935 + }, + { + "epoch": 4.884567463904429, + "grad_norm": 0.0003622000804170966, + "learning_rate": 3.2467751561123803e-09, + "loss": 0.0, + "num_input_tokens_seen": 134740648, + "step": 199940 + }, + { + "epoch": 4.884689614736276, + "grad_norm": 0.0007984722615219653, + "learning_rate": 3.2399124726005813e-09, + "loss": 0.0, + "num_input_tokens_seen": 134743976, + "step": 199945 + }, + { + "epoch": 4.884811765568124, + "grad_norm": 0.0003382827853783965, + "learning_rate": 3.2330570377963053e-09, + "loss": 0.0, + "num_input_tokens_seen": 134747112, + "step": 199950 + }, + { + "epoch": 4.88493391639997, + "grad_norm": 0.0002902325359173119, + "learning_rate": 3.2262088517492903e-09, + "loss": 0.0, + "num_input_tokens_seen": 134750696, + "step": 199955 + }, + { + "epoch": 4.885056067231818, + "grad_norm": 0.00018129262025468051, + "learning_rate": 3.2193679145093857e-09, + "loss": 0.0, + "num_input_tokens_seen": 134754344, + "step": 199960 + }, + { + "epoch": 4.885178218063665, + "grad_norm": 5.9551683079916984e-05, + "learning_rate": 3.21253422612644e-09, + "loss": 0.0, + "num_input_tokens_seen": 134758440, + "step": 199965 + }, + { + "epoch": 4.885300368895512, + "grad_norm": 9.369918188895099e-06, + "learning_rate": 3.205707786649858e-09, + "loss": 0.0, + "num_input_tokens_seen": 134761896, + "step": 199970 + }, + { + "epoch": 4.885422519727359, + "grad_norm": 0.00010241235577268526, + "learning_rate": 3.198888596129712e-09, + "loss": 0.0, + "num_input_tokens_seen": 134765608, + "step": 199975 + }, + { + "epoch": 4.885544670559207, + "grad_norm": 0.00010554419714026153, + "learning_rate": 3.1920766546151833e-09, + "loss": 0.0, + "num_input_tokens_seen": 134769064, + "step": 199980 + }, + { + "epoch": 4.8856668213910535, + "grad_norm": 0.01280683558434248, + "learning_rate": 3.185271962156011e-09, + "loss": 0.0, + "num_input_tokens_seen": 134772200, + "step": 199985 + }, + { + "epoch": 4.885788972222901, + "grad_norm": 8.941164560383186e-05, + "learning_rate": 3.1784745188017106e-09, + "loss": 0.0, + "num_input_tokens_seen": 134775400, + "step": 199990 + }, + { + "epoch": 4.885911123054748, + "grad_norm": 0.7302924990653992, + "learning_rate": 3.1716843246015757e-09, + "loss": 0.0004, + "num_input_tokens_seen": 134778600, + "step": 199995 + }, + { + "epoch": 4.8860332738865955, + "grad_norm": 0.00028197289793752134, + "learning_rate": 3.1649013796051226e-09, + "loss": 0.0, + "num_input_tokens_seen": 134782056, + "step": 200000 + }, + { + "epoch": 4.886155424718442, + "grad_norm": 0.0009042451856657863, + "learning_rate": 3.1581256838615346e-09, + "loss": 0.0001, + "num_input_tokens_seen": 134785448, + "step": 200005 + }, + { + "epoch": 4.88627757555029, + "grad_norm": 0.0001275439135497436, + "learning_rate": 3.1513572374203267e-09, + "loss": 0.0, + "num_input_tokens_seen": 134788968, + "step": 200010 + }, + { + "epoch": 4.886399726382137, + "grad_norm": 0.00046351307537406683, + "learning_rate": 3.1445960403304605e-09, + "loss": 0.0, + "num_input_tokens_seen": 134792488, + "step": 200015 + }, + { + "epoch": 4.886521877213983, + "grad_norm": 0.0031093384604901075, + "learning_rate": 3.13784209264123e-09, + "loss": 0.0, + "num_input_tokens_seen": 134795752, + "step": 200020 + }, + { + "epoch": 4.886644028045831, + "grad_norm": 4.4503143726615235e-05, + "learning_rate": 3.131095394401817e-09, + "loss": 0.0, + "num_input_tokens_seen": 134799400, + "step": 200025 + }, + { + "epoch": 4.886766178877679, + "grad_norm": 7.445557275786996e-05, + "learning_rate": 3.1243559456610726e-09, + "loss": 0.0, + "num_input_tokens_seen": 134802600, + "step": 200030 + }, + { + "epoch": 4.886888329709525, + "grad_norm": 0.00014683169138152152, + "learning_rate": 3.117623746468179e-09, + "loss": 0.0, + "num_input_tokens_seen": 134806056, + "step": 200035 + }, + { + "epoch": 4.887010480541372, + "grad_norm": 0.00024286813277285546, + "learning_rate": 3.110898796872097e-09, + "loss": 0.0, + "num_input_tokens_seen": 134810216, + "step": 200040 + }, + { + "epoch": 4.88713263137322, + "grad_norm": 0.000297984981443733, + "learning_rate": 3.1041810969216766e-09, + "loss": 0.0, + "num_input_tokens_seen": 134813288, + "step": 200045 + }, + { + "epoch": 4.8872547822050665, + "grad_norm": 0.0006580796907655895, + "learning_rate": 3.0974706466657676e-09, + "loss": 0.0001, + "num_input_tokens_seen": 134816360, + "step": 200050 + }, + { + "epoch": 4.887376933036914, + "grad_norm": 0.0007669659098610282, + "learning_rate": 3.09076744615322e-09, + "loss": 0.0001, + "num_input_tokens_seen": 134819816, + "step": 200055 + }, + { + "epoch": 4.887499083868761, + "grad_norm": 1.6302408766932786e-05, + "learning_rate": 3.0840714954326608e-09, + "loss": 0.0, + "num_input_tokens_seen": 134823016, + "step": 200060 + }, + { + "epoch": 4.8876212347006085, + "grad_norm": 0.00024084115284495056, + "learning_rate": 3.077382794552941e-09, + "loss": 0.0, + "num_input_tokens_seen": 134826472, + "step": 200065 + }, + { + "epoch": 4.887743385532455, + "grad_norm": 0.013386818580329418, + "learning_rate": 3.070701343562687e-09, + "loss": 0.0, + "num_input_tokens_seen": 134829800, + "step": 200070 + }, + { + "epoch": 4.887865536364303, + "grad_norm": 0.00042936980025842786, + "learning_rate": 3.064027142510306e-09, + "loss": 0.0, + "num_input_tokens_seen": 134833128, + "step": 200075 + }, + { + "epoch": 4.88798768719615, + "grad_norm": 0.001953166676685214, + "learning_rate": 3.057360191444536e-09, + "loss": 0.0, + "num_input_tokens_seen": 134836264, + "step": 200080 + }, + { + "epoch": 4.888109838027997, + "grad_norm": 0.0004152859910391271, + "learning_rate": 3.0507004904137823e-09, + "loss": 0.0, + "num_input_tokens_seen": 134839272, + "step": 200085 + }, + { + "epoch": 4.888231988859844, + "grad_norm": 3.0269908165792003e-05, + "learning_rate": 3.0440480394664516e-09, + "loss": 0.0, + "num_input_tokens_seen": 134842472, + "step": 200090 + }, + { + "epoch": 4.888354139691692, + "grad_norm": 0.0003708160074893385, + "learning_rate": 3.0374028386510596e-09, + "loss": 0.0, + "num_input_tokens_seen": 134845800, + "step": 200095 + }, + { + "epoch": 4.888476290523538, + "grad_norm": 0.005872996523976326, + "learning_rate": 3.0307648880156798e-09, + "loss": 0.0, + "num_input_tokens_seen": 134849064, + "step": 200100 + }, + { + "epoch": 4.888598441355386, + "grad_norm": 0.000277300423476845, + "learning_rate": 3.0241341876088287e-09, + "loss": 0.0, + "num_input_tokens_seen": 134852712, + "step": 200105 + }, + { + "epoch": 4.888720592187233, + "grad_norm": 0.007827439345419407, + "learning_rate": 3.0175107374785792e-09, + "loss": 0.0, + "num_input_tokens_seen": 134856040, + "step": 200110 + }, + { + "epoch": 4.8888427430190795, + "grad_norm": 7.76977394707501e-05, + "learning_rate": 3.0108945376732254e-09, + "loss": 0.0, + "num_input_tokens_seen": 134859560, + "step": 200115 + }, + { + "epoch": 4.888964893850927, + "grad_norm": 4.224982330924831e-05, + "learning_rate": 3.0042855882407293e-09, + "loss": 0.0, + "num_input_tokens_seen": 134862568, + "step": 200120 + }, + { + "epoch": 4.889087044682775, + "grad_norm": 0.0004493116866797209, + "learning_rate": 2.9976838892292746e-09, + "loss": 0.0, + "num_input_tokens_seen": 134866408, + "step": 200125 + }, + { + "epoch": 4.8892091955146215, + "grad_norm": 0.003806524444371462, + "learning_rate": 2.9910894406868224e-09, + "loss": 0.0, + "num_input_tokens_seen": 134870696, + "step": 200130 + }, + { + "epoch": 4.889331346346468, + "grad_norm": 2.1448315237648785e-05, + "learning_rate": 2.9845022426612243e-09, + "loss": 0.0, + "num_input_tokens_seen": 134874088, + "step": 200135 + }, + { + "epoch": 4.889453497178316, + "grad_norm": 2.3338936443906277e-05, + "learning_rate": 2.9779222952005524e-09, + "loss": 0.0, + "num_input_tokens_seen": 134877096, + "step": 200140 + }, + { + "epoch": 4.889575648010163, + "grad_norm": 5.836586933583021e-05, + "learning_rate": 2.971349598352657e-09, + "loss": 0.0, + "num_input_tokens_seen": 134880232, + "step": 200145 + }, + { + "epoch": 4.88969779884201, + "grad_norm": 0.004671205300837755, + "learning_rate": 2.9647841521652783e-09, + "loss": 0.0, + "num_input_tokens_seen": 134883560, + "step": 200150 + }, + { + "epoch": 4.889819949673857, + "grad_norm": 0.00043029917287640274, + "learning_rate": 2.9582259566860446e-09, + "loss": 0.0, + "num_input_tokens_seen": 134887208, + "step": 200155 + }, + { + "epoch": 4.889942100505705, + "grad_norm": 3.5230928915552795e-05, + "learning_rate": 2.9516750119629176e-09, + "loss": 0.0224, + "num_input_tokens_seen": 134890600, + "step": 200160 + }, + { + "epoch": 4.890064251337551, + "grad_norm": 6.732921610819176e-05, + "learning_rate": 2.9451313180431924e-09, + "loss": 0.0, + "num_input_tokens_seen": 134894120, + "step": 200165 + }, + { + "epoch": 4.890186402169399, + "grad_norm": 2.25527910515666e-05, + "learning_rate": 2.938594874974831e-09, + "loss": 0.0, + "num_input_tokens_seen": 134897384, + "step": 200170 + }, + { + "epoch": 4.890308553001246, + "grad_norm": 0.0002783486561384052, + "learning_rate": 2.9320656828050182e-09, + "loss": 0.0, + "num_input_tokens_seen": 134900904, + "step": 200175 + }, + { + "epoch": 4.890430703833093, + "grad_norm": 8.396808698307723e-05, + "learning_rate": 2.9255437415816044e-09, + "loss": 0.0, + "num_input_tokens_seen": 134903912, + "step": 200180 + }, + { + "epoch": 4.89055285466494, + "grad_norm": 0.004095216281712055, + "learning_rate": 2.9190290513516624e-09, + "loss": 0.0, + "num_input_tokens_seen": 134907048, + "step": 200185 + }, + { + "epoch": 4.890675005496788, + "grad_norm": 0.00025316732353530824, + "learning_rate": 2.9125216121628214e-09, + "loss": 0.0, + "num_input_tokens_seen": 134910184, + "step": 200190 + }, + { + "epoch": 4.8907971563286345, + "grad_norm": 4.9143873184220865e-05, + "learning_rate": 2.906021424062155e-09, + "loss": 0.0, + "num_input_tokens_seen": 134913192, + "step": 200195 + }, + { + "epoch": 4.890919307160482, + "grad_norm": 0.00027152185793966055, + "learning_rate": 2.8995284870971804e-09, + "loss": 0.0, + "num_input_tokens_seen": 134916456, + "step": 200200 + }, + { + "epoch": 4.891041457992329, + "grad_norm": 1.3070682143734302e-05, + "learning_rate": 2.893042801315082e-09, + "loss": 0.0, + "num_input_tokens_seen": 134919528, + "step": 200205 + }, + { + "epoch": 4.891163608824176, + "grad_norm": 8.361788786714897e-05, + "learning_rate": 2.8865643667629336e-09, + "loss": 0.0, + "num_input_tokens_seen": 134922920, + "step": 200210 + }, + { + "epoch": 4.891285759656023, + "grad_norm": 0.0013510880526155233, + "learning_rate": 2.8800931834878085e-09, + "loss": 0.0, + "num_input_tokens_seen": 134925992, + "step": 200215 + }, + { + "epoch": 4.89140791048787, + "grad_norm": 0.0006011762889102101, + "learning_rate": 2.873629251536891e-09, + "loss": 0.0, + "num_input_tokens_seen": 134928936, + "step": 200220 + }, + { + "epoch": 4.891530061319718, + "grad_norm": 0.0008538027759641409, + "learning_rate": 2.8671725709571437e-09, + "loss": 0.0, + "num_input_tokens_seen": 134932328, + "step": 200225 + }, + { + "epoch": 4.891652212151564, + "grad_norm": 0.00014728435780853033, + "learning_rate": 2.860723141795529e-09, + "loss": 0.0, + "num_input_tokens_seen": 134935592, + "step": 200230 + }, + { + "epoch": 4.891774362983412, + "grad_norm": 0.0003445665934123099, + "learning_rate": 2.8542809640988986e-09, + "loss": 0.0, + "num_input_tokens_seen": 134938664, + "step": 200235 + }, + { + "epoch": 4.891896513815259, + "grad_norm": 0.000725120073184371, + "learning_rate": 2.847846037914103e-09, + "loss": 0.0, + "num_input_tokens_seen": 134941608, + "step": 200240 + }, + { + "epoch": 4.892018664647106, + "grad_norm": 2.4838760509737767e-05, + "learning_rate": 2.841418363287995e-09, + "loss": 0.0001, + "num_input_tokens_seen": 134945192, + "step": 200245 + }, + { + "epoch": 4.892140815478953, + "grad_norm": 0.0050308541394770145, + "learning_rate": 2.834997940267425e-09, + "loss": 0.0, + "num_input_tokens_seen": 134948200, + "step": 200250 + }, + { + "epoch": 4.892262966310801, + "grad_norm": 0.0032318481244146824, + "learning_rate": 2.8285847688988006e-09, + "loss": 0.0, + "num_input_tokens_seen": 134951528, + "step": 200255 + }, + { + "epoch": 4.892385117142648, + "grad_norm": 0.0008068734314292669, + "learning_rate": 2.8221788492289733e-09, + "loss": 0.0, + "num_input_tokens_seen": 134954792, + "step": 200260 + }, + { + "epoch": 4.892507267974495, + "grad_norm": 0.0009322738042101264, + "learning_rate": 2.8157801813044613e-09, + "loss": 0.0, + "num_input_tokens_seen": 134958120, + "step": 200265 + }, + { + "epoch": 4.892629418806342, + "grad_norm": 0.0007301435107365251, + "learning_rate": 2.809388765171783e-09, + "loss": 0.0, + "num_input_tokens_seen": 134961128, + "step": 200270 + }, + { + "epoch": 4.89275156963819, + "grad_norm": 0.00023704532941337675, + "learning_rate": 2.8030046008774564e-09, + "loss": 0.0, + "num_input_tokens_seen": 134964264, + "step": 200275 + }, + { + "epoch": 4.892873720470036, + "grad_norm": 0.0002072526840493083, + "learning_rate": 2.79662768846789e-09, + "loss": 0.0, + "num_input_tokens_seen": 134967720, + "step": 200280 + }, + { + "epoch": 4.892995871301883, + "grad_norm": 4.86823009850923e-05, + "learning_rate": 2.7902580279894895e-09, + "loss": 0.0, + "num_input_tokens_seen": 134971368, + "step": 200285 + }, + { + "epoch": 4.893118022133731, + "grad_norm": 0.000519820605404675, + "learning_rate": 2.783895619488552e-09, + "loss": 0.0, + "num_input_tokens_seen": 134975080, + "step": 200290 + }, + { + "epoch": 4.893240172965578, + "grad_norm": 0.00029773195274174213, + "learning_rate": 2.7775404630112632e-09, + "loss": 0.0, + "num_input_tokens_seen": 134978472, + "step": 200295 + }, + { + "epoch": 4.893362323797425, + "grad_norm": 0.00020020749070681632, + "learning_rate": 2.7711925586040298e-09, + "loss": 0.0, + "num_input_tokens_seen": 134982248, + "step": 200300 + }, + { + "epoch": 4.893484474629272, + "grad_norm": 0.00055017473641783, + "learning_rate": 2.764851906312815e-09, + "loss": 0.0, + "num_input_tokens_seen": 134985512, + "step": 200305 + }, + { + "epoch": 4.8936066254611195, + "grad_norm": 0.00029676384292542934, + "learning_rate": 2.7585185061839154e-09, + "loss": 0.0, + "num_input_tokens_seen": 134988520, + "step": 200310 + }, + { + "epoch": 4.893728776292966, + "grad_norm": 0.00014414470933843404, + "learning_rate": 2.7521923582631833e-09, + "loss": 0.0, + "num_input_tokens_seen": 134992616, + "step": 200315 + }, + { + "epoch": 4.893850927124814, + "grad_norm": 0.00025701947743073106, + "learning_rate": 2.745873462596804e-09, + "loss": 0.0, + "num_input_tokens_seen": 134996008, + "step": 200320 + }, + { + "epoch": 4.893973077956661, + "grad_norm": 0.002546359086409211, + "learning_rate": 2.7395618192306292e-09, + "loss": 0.0, + "num_input_tokens_seen": 134999400, + "step": 200325 + }, + { + "epoch": 4.894095228788508, + "grad_norm": 0.0006520068855024874, + "learning_rate": 2.7332574282107335e-09, + "loss": 0.0, + "num_input_tokens_seen": 135002600, + "step": 200330 + }, + { + "epoch": 4.894217379620355, + "grad_norm": 0.0029449693392962217, + "learning_rate": 2.7269602895826362e-09, + "loss": 0.0, + "num_input_tokens_seen": 135005992, + "step": 200335 + }, + { + "epoch": 4.894339530452203, + "grad_norm": 0.0027736180927604437, + "learning_rate": 2.720670403392411e-09, + "loss": 0.0, + "num_input_tokens_seen": 135009128, + "step": 200340 + }, + { + "epoch": 4.894461681284049, + "grad_norm": 0.0016403611516579986, + "learning_rate": 2.7143877696856887e-09, + "loss": 0.0, + "num_input_tokens_seen": 135012648, + "step": 200345 + }, + { + "epoch": 4.894583832115897, + "grad_norm": 0.00010193864000029862, + "learning_rate": 2.70811238850821e-09, + "loss": 0.0, + "num_input_tokens_seen": 135015720, + "step": 200350 + }, + { + "epoch": 4.894705982947744, + "grad_norm": 2.756047797447536e-05, + "learning_rate": 2.701844259905495e-09, + "loss": 0.0, + "num_input_tokens_seen": 135019560, + "step": 200355 + }, + { + "epoch": 4.894828133779591, + "grad_norm": 4.996271673007868e-05, + "learning_rate": 2.6955833839232834e-09, + "loss": 0.0, + "num_input_tokens_seen": 135023400, + "step": 200360 + }, + { + "epoch": 4.894950284611438, + "grad_norm": 0.0004890519776381552, + "learning_rate": 2.6893297606069843e-09, + "loss": 0.0, + "num_input_tokens_seen": 135027432, + "step": 200365 + }, + { + "epoch": 4.895072435443286, + "grad_norm": 3.164946974720806e-05, + "learning_rate": 2.6830833900021166e-09, + "loss": 0.0, + "num_input_tokens_seen": 135030760, + "step": 200370 + }, + { + "epoch": 4.8951945862751325, + "grad_norm": 0.0010964064858853817, + "learning_rate": 2.6768442721541994e-09, + "loss": 0.0, + "num_input_tokens_seen": 135034600, + "step": 200375 + }, + { + "epoch": 4.895316737106979, + "grad_norm": 1.2080210581189021e-05, + "learning_rate": 2.670612407108419e-09, + "loss": 0.0, + "num_input_tokens_seen": 135038248, + "step": 200380 + }, + { + "epoch": 4.895438887938827, + "grad_norm": 0.0004444806545507163, + "learning_rate": 2.6643877949101834e-09, + "loss": 0.0, + "num_input_tokens_seen": 135041576, + "step": 200385 + }, + { + "epoch": 4.8955610387706745, + "grad_norm": 0.0008745653321966529, + "learning_rate": 2.6581704356047895e-09, + "loss": 0.0, + "num_input_tokens_seen": 135045288, + "step": 200390 + }, + { + "epoch": 4.895683189602521, + "grad_norm": 0.003511784365400672, + "learning_rate": 2.6519603292375347e-09, + "loss": 0.0, + "num_input_tokens_seen": 135048872, + "step": 200395 + }, + { + "epoch": 4.895805340434368, + "grad_norm": 1.117614192480687e-05, + "learning_rate": 2.645757475853383e-09, + "loss": 0.0, + "num_input_tokens_seen": 135052072, + "step": 200400 + }, + { + "epoch": 4.895927491266216, + "grad_norm": 0.00029673470999114215, + "learning_rate": 2.639561875497631e-09, + "loss": 0.0, + "num_input_tokens_seen": 135055336, + "step": 200405 + }, + { + "epoch": 4.896049642098062, + "grad_norm": 0.00034569003037177026, + "learning_rate": 2.6333735282151326e-09, + "loss": 0.0, + "num_input_tokens_seen": 135058984, + "step": 200410 + }, + { + "epoch": 4.89617179292991, + "grad_norm": 4.958968565915711e-05, + "learning_rate": 2.627192434050962e-09, + "loss": 0.0, + "num_input_tokens_seen": 135062248, + "step": 200415 + }, + { + "epoch": 4.896293943761757, + "grad_norm": 0.00011519034160301089, + "learning_rate": 2.621018593050195e-09, + "loss": 0.0, + "num_input_tokens_seen": 135065832, + "step": 200420 + }, + { + "epoch": 4.896416094593604, + "grad_norm": 0.00024000953999347985, + "learning_rate": 2.6148520052576838e-09, + "loss": 0.0, + "num_input_tokens_seen": 135069160, + "step": 200425 + }, + { + "epoch": 4.896538245425451, + "grad_norm": 0.0036131960805505514, + "learning_rate": 2.608692670718171e-09, + "loss": 0.0, + "num_input_tokens_seen": 135072552, + "step": 200430 + }, + { + "epoch": 4.896660396257299, + "grad_norm": 0.0005584946484304965, + "learning_rate": 2.6025405894766204e-09, + "loss": 0.0, + "num_input_tokens_seen": 135075688, + "step": 200435 + }, + { + "epoch": 4.8967825470891455, + "grad_norm": 1.2202312973386142e-05, + "learning_rate": 2.596395761577552e-09, + "loss": 0.0, + "num_input_tokens_seen": 135078824, + "step": 200440 + }, + { + "epoch": 4.896904697920993, + "grad_norm": 0.002158592687919736, + "learning_rate": 2.590258187065708e-09, + "loss": 0.0, + "num_input_tokens_seen": 135082280, + "step": 200445 + }, + { + "epoch": 4.89702684875284, + "grad_norm": 7.275992538779974e-05, + "learning_rate": 2.5841278659858303e-09, + "loss": 0.0, + "num_input_tokens_seen": 135085928, + "step": 200450 + }, + { + "epoch": 4.8971489995846875, + "grad_norm": 0.00630525778979063, + "learning_rate": 2.578004798382549e-09, + "loss": 0.0538, + "num_input_tokens_seen": 135089768, + "step": 200455 + }, + { + "epoch": 4.897271150416534, + "grad_norm": 2.0241757738403976e-05, + "learning_rate": 2.5718889843001632e-09, + "loss": 0.0, + "num_input_tokens_seen": 135093096, + "step": 200460 + }, + { + "epoch": 4.897393301248382, + "grad_norm": 7.490185089409351e-05, + "learning_rate": 2.5657804237833037e-09, + "loss": 0.0, + "num_input_tokens_seen": 135096552, + "step": 200465 + }, + { + "epoch": 4.897515452080229, + "grad_norm": 0.000123066536616534, + "learning_rate": 2.559679116876379e-09, + "loss": 0.0, + "num_input_tokens_seen": 135100072, + "step": 200470 + }, + { + "epoch": 4.897637602912075, + "grad_norm": 0.0003549766552168876, + "learning_rate": 2.5535850636237976e-09, + "loss": 0.0, + "num_input_tokens_seen": 135103528, + "step": 200475 + }, + { + "epoch": 4.897759753743923, + "grad_norm": 0.0001572407054482028, + "learning_rate": 2.5474982640697475e-09, + "loss": 0.0, + "num_input_tokens_seen": 135107240, + "step": 200480 + }, + { + "epoch": 4.89788190457577, + "grad_norm": 0.0003219839127268642, + "learning_rate": 2.5414187182586365e-09, + "loss": 0.0, + "num_input_tokens_seen": 135110952, + "step": 200485 + }, + { + "epoch": 4.898004055407617, + "grad_norm": 0.0002576294064056128, + "learning_rate": 2.5353464262345415e-09, + "loss": 0.0, + "num_input_tokens_seen": 135114728, + "step": 200490 + }, + { + "epoch": 4.898126206239464, + "grad_norm": 0.0020776099991053343, + "learning_rate": 2.5292813880417595e-09, + "loss": 0.0, + "num_input_tokens_seen": 135118120, + "step": 200495 + }, + { + "epoch": 4.898248357071312, + "grad_norm": 7.651347550563514e-05, + "learning_rate": 2.523223603724367e-09, + "loss": 0.0, + "num_input_tokens_seen": 135121640, + "step": 200500 + }, + { + "epoch": 4.8983705079031585, + "grad_norm": 5.8618898037821054e-05, + "learning_rate": 2.5171730733262175e-09, + "loss": 0.0, + "num_input_tokens_seen": 135125672, + "step": 200505 + }, + { + "epoch": 4.898492658735006, + "grad_norm": 0.002982416423037648, + "learning_rate": 2.511129796891609e-09, + "loss": 0.0, + "num_input_tokens_seen": 135129000, + "step": 200510 + }, + { + "epoch": 4.898614809566853, + "grad_norm": 0.0001612217165529728, + "learning_rate": 2.5050937744643952e-09, + "loss": 0.0318, + "num_input_tokens_seen": 135132392, + "step": 200515 + }, + { + "epoch": 4.8987369603987005, + "grad_norm": 0.018624404445290565, + "learning_rate": 2.4990650060883188e-09, + "loss": 0.0, + "num_input_tokens_seen": 135135656, + "step": 200520 + }, + { + "epoch": 4.898859111230547, + "grad_norm": 0.0567937046289444, + "learning_rate": 2.493043491807345e-09, + "loss": 0.0, + "num_input_tokens_seen": 135139496, + "step": 200525 + }, + { + "epoch": 4.898981262062395, + "grad_norm": 1.878110742836725e-05, + "learning_rate": 2.4870292316653275e-09, + "loss": 0.0, + "num_input_tokens_seen": 135142696, + "step": 200530 + }, + { + "epoch": 4.899103412894242, + "grad_norm": 2.994788155774586e-05, + "learning_rate": 2.481022225705898e-09, + "loss": 0.0, + "num_input_tokens_seen": 135145832, + "step": 200535 + }, + { + "epoch": 4.899225563726089, + "grad_norm": 0.00013597743236459792, + "learning_rate": 2.475022473972799e-09, + "loss": 0.0, + "num_input_tokens_seen": 135149608, + "step": 200540 + }, + { + "epoch": 4.899347714557936, + "grad_norm": 0.00014619667490478605, + "learning_rate": 2.469029976509662e-09, + "loss": 0.0, + "num_input_tokens_seen": 135153192, + "step": 200545 + }, + { + "epoch": 4.899469865389784, + "grad_norm": 0.0010466484818607569, + "learning_rate": 2.463044733360009e-09, + "loss": 0.0, + "num_input_tokens_seen": 135156712, + "step": 200550 + }, + { + "epoch": 4.89959201622163, + "grad_norm": 0.00010759360156953335, + "learning_rate": 2.4570667445673597e-09, + "loss": 0.0, + "num_input_tokens_seen": 135159912, + "step": 200555 + }, + { + "epoch": 4.899714167053478, + "grad_norm": 31.909116744995117, + "learning_rate": 2.4510960101752355e-09, + "loss": 0.0339, + "num_input_tokens_seen": 135163496, + "step": 200560 + }, + { + "epoch": 4.899836317885325, + "grad_norm": 7.701950380578637e-05, + "learning_rate": 2.4451325302270455e-09, + "loss": 0.0, + "num_input_tokens_seen": 135166888, + "step": 200565 + }, + { + "epoch": 4.8999584687171716, + "grad_norm": 0.00031009703525342047, + "learning_rate": 2.4391763047661997e-09, + "loss": 0.0002, + "num_input_tokens_seen": 135170216, + "step": 200570 + }, + { + "epoch": 4.900080619549019, + "grad_norm": 0.007576879113912582, + "learning_rate": 2.4332273338359965e-09, + "loss": 0.0, + "num_input_tokens_seen": 135173608, + "step": 200575 + }, + { + "epoch": 4.900202770380866, + "grad_norm": 0.029712717980146408, + "learning_rate": 2.4272856174796242e-09, + "loss": 0.0, + "num_input_tokens_seen": 135176552, + "step": 200580 + }, + { + "epoch": 4.900324921212714, + "grad_norm": 0.00031999516068026423, + "learning_rate": 2.421351155740381e-09, + "loss": 0.0, + "num_input_tokens_seen": 135180328, + "step": 200585 + }, + { + "epoch": 4.90044707204456, + "grad_norm": 0.00021247580298222601, + "learning_rate": 2.4154239486613438e-09, + "loss": 0.0, + "num_input_tokens_seen": 135184040, + "step": 200590 + }, + { + "epoch": 4.900569222876408, + "grad_norm": 3.182264481438324e-05, + "learning_rate": 2.4095039962857e-09, + "loss": 0.0, + "num_input_tokens_seen": 135187624, + "step": 200595 + }, + { + "epoch": 4.900691373708255, + "grad_norm": 1.529901783214882e-05, + "learning_rate": 2.4035912986564155e-09, + "loss": 0.0, + "num_input_tokens_seen": 135190760, + "step": 200600 + }, + { + "epoch": 4.900813524540102, + "grad_norm": 0.00029649061616510153, + "learning_rate": 2.3976858558165667e-09, + "loss": 0.0002, + "num_input_tokens_seen": 135194216, + "step": 200605 + }, + { + "epoch": 4.900935675371949, + "grad_norm": 5.1163326133973897e-05, + "learning_rate": 2.3917876678091197e-09, + "loss": 0.0, + "num_input_tokens_seen": 135197288, + "step": 200610 + }, + { + "epoch": 4.901057826203797, + "grad_norm": 0.00202845293097198, + "learning_rate": 2.385896734676818e-09, + "loss": 0.0, + "num_input_tokens_seen": 135200872, + "step": 200615 + }, + { + "epoch": 4.9011799770356435, + "grad_norm": 0.0005032019107602537, + "learning_rate": 2.3800130564627374e-09, + "loss": 0.0, + "num_input_tokens_seen": 135204008, + "step": 200620 + }, + { + "epoch": 4.901302127867491, + "grad_norm": 0.0008152702357620001, + "learning_rate": 2.3741366332094003e-09, + "loss": 0.0, + "num_input_tokens_seen": 135207208, + "step": 200625 + }, + { + "epoch": 4.901424278699338, + "grad_norm": 4.561661626212299e-06, + "learning_rate": 2.3682674649597725e-09, + "loss": 0.0, + "num_input_tokens_seen": 135210664, + "step": 200630 + }, + { + "epoch": 4.9015464295311855, + "grad_norm": 0.00010567594290478155, + "learning_rate": 2.3624055517562634e-09, + "loss": 0.0, + "num_input_tokens_seen": 135213800, + "step": 200635 + }, + { + "epoch": 4.901668580363032, + "grad_norm": 6.133651913842186e-05, + "learning_rate": 2.35655089364184e-09, + "loss": 0.0, + "num_input_tokens_seen": 135216744, + "step": 200640 + }, + { + "epoch": 4.901790731194879, + "grad_norm": 0.0007190249161794782, + "learning_rate": 2.350703490658912e-09, + "loss": 0.0, + "num_input_tokens_seen": 135220392, + "step": 200645 + }, + { + "epoch": 4.901912882026727, + "grad_norm": 0.00048561752191744745, + "learning_rate": 2.34486334284989e-09, + "loss": 0.0, + "num_input_tokens_seen": 135223592, + "step": 200650 + }, + { + "epoch": 4.902035032858574, + "grad_norm": 0.0003284842532593757, + "learning_rate": 2.3390304502575175e-09, + "loss": 0.0, + "num_input_tokens_seen": 135226856, + "step": 200655 + }, + { + "epoch": 4.902157183690421, + "grad_norm": 0.00022950841230340302, + "learning_rate": 2.3332048129238724e-09, + "loss": 0.0, + "num_input_tokens_seen": 135230568, + "step": 200660 + }, + { + "epoch": 4.902279334522268, + "grad_norm": 6.730855966452509e-05, + "learning_rate": 2.3273864308915867e-09, + "loss": 0.0, + "num_input_tokens_seen": 135233704, + "step": 200665 + }, + { + "epoch": 4.902401485354115, + "grad_norm": 3.094383282586932e-05, + "learning_rate": 2.321575304202961e-09, + "loss": 0.0, + "num_input_tokens_seen": 135237288, + "step": 200670 + }, + { + "epoch": 4.902523636185962, + "grad_norm": 0.00031161034712567925, + "learning_rate": 2.315771432900071e-09, + "loss": 0.0, + "num_input_tokens_seen": 135241000, + "step": 200675 + }, + { + "epoch": 4.90264578701781, + "grad_norm": 2.608428076200653e-05, + "learning_rate": 2.3099748170253287e-09, + "loss": 0.0, + "num_input_tokens_seen": 135244520, + "step": 200680 + }, + { + "epoch": 4.9027679378496565, + "grad_norm": 0.0007350142695941031, + "learning_rate": 2.3041854566206997e-09, + "loss": 0.0, + "num_input_tokens_seen": 135247976, + "step": 200685 + }, + { + "epoch": 4.902890088681504, + "grad_norm": 0.00028342122095637023, + "learning_rate": 2.298403351728484e-09, + "loss": 0.0, + "num_input_tokens_seen": 135251560, + "step": 200690 + }, + { + "epoch": 4.903012239513351, + "grad_norm": 0.00029275708948262036, + "learning_rate": 2.2926285023905368e-09, + "loss": 0.0, + "num_input_tokens_seen": 135254824, + "step": 200695 + }, + { + "epoch": 4.9031343903451985, + "grad_norm": 0.001582046621479094, + "learning_rate": 2.2868609086489355e-09, + "loss": 0.0, + "num_input_tokens_seen": 135257704, + "step": 200700 + }, + { + "epoch": 4.903256541177045, + "grad_norm": 4.7803659981582314e-05, + "learning_rate": 2.2811005705456466e-09, + "loss": 0.0, + "num_input_tokens_seen": 135261416, + "step": 200705 + }, + { + "epoch": 4.903378692008893, + "grad_norm": 4.758605427923612e-05, + "learning_rate": 2.2753474881226365e-09, + "loss": 0.0, + "num_input_tokens_seen": 135264424, + "step": 200710 + }, + { + "epoch": 4.90350084284074, + "grad_norm": 1.9773568055825308e-05, + "learning_rate": 2.2696016614216497e-09, + "loss": 0.0, + "num_input_tokens_seen": 135267816, + "step": 200715 + }, + { + "epoch": 4.903622993672587, + "grad_norm": 0.00011461249232525006, + "learning_rate": 2.263863090484319e-09, + "loss": 0.0, + "num_input_tokens_seen": 135271400, + "step": 200720 + }, + { + "epoch": 4.903745144504434, + "grad_norm": 2.939927435363643e-05, + "learning_rate": 2.2581317753527227e-09, + "loss": 0.0438, + "num_input_tokens_seen": 135274664, + "step": 200725 + }, + { + "epoch": 4.903867295336282, + "grad_norm": 9.464116010349244e-05, + "learning_rate": 2.252407716068272e-09, + "loss": 0.0, + "num_input_tokens_seen": 135278248, + "step": 200730 + }, + { + "epoch": 4.903989446168128, + "grad_norm": 0.006667073350399733, + "learning_rate": 2.2466909126726e-09, + "loss": 0.0, + "num_input_tokens_seen": 135281576, + "step": 200735 + }, + { + "epoch": 4.904111596999975, + "grad_norm": 0.015178213827311993, + "learning_rate": 2.2409813652074503e-09, + "loss": 0.0, + "num_input_tokens_seen": 135285160, + "step": 200740 + }, + { + "epoch": 4.904233747831823, + "grad_norm": 0.0013789625372737646, + "learning_rate": 2.2352790737142357e-09, + "loss": 0.0, + "num_input_tokens_seen": 135288552, + "step": 200745 + }, + { + "epoch": 4.90435589866367, + "grad_norm": 0.0009246623376384377, + "learning_rate": 2.2295840382344776e-09, + "loss": 0.0, + "num_input_tokens_seen": 135291944, + "step": 200750 + }, + { + "epoch": 4.904478049495517, + "grad_norm": 0.00013876563753001392, + "learning_rate": 2.2238962588094766e-09, + "loss": 0.0, + "num_input_tokens_seen": 135295208, + "step": 200755 + }, + { + "epoch": 4.904600200327364, + "grad_norm": 0.0006278780056163669, + "learning_rate": 2.2182157354807552e-09, + "loss": 0.0383, + "num_input_tokens_seen": 135298088, + "step": 200760 + }, + { + "epoch": 4.9047223511592115, + "grad_norm": 0.0015818560495972633, + "learning_rate": 2.212542468289502e-09, + "loss": 0.0001, + "num_input_tokens_seen": 135301864, + "step": 200765 + }, + { + "epoch": 4.904844501991058, + "grad_norm": 0.0005441773100756109, + "learning_rate": 2.206876457276907e-09, + "loss": 0.0, + "num_input_tokens_seen": 135304936, + "step": 200770 + }, + { + "epoch": 4.904966652822906, + "grad_norm": 0.0003268019063398242, + "learning_rate": 2.2012177024843816e-09, + "loss": 0.0, + "num_input_tokens_seen": 135308200, + "step": 200775 + }, + { + "epoch": 4.905088803654753, + "grad_norm": 0.0003277796204201877, + "learning_rate": 2.1955662039530032e-09, + "loss": 0.0021, + "num_input_tokens_seen": 135311464, + "step": 200780 + }, + { + "epoch": 4.9052109544866, + "grad_norm": 0.00046862257295288146, + "learning_rate": 2.189921961723851e-09, + "loss": 0.0, + "num_input_tokens_seen": 135314984, + "step": 200785 + }, + { + "epoch": 4.905333105318447, + "grad_norm": 0.0011110889026895165, + "learning_rate": 2.184284975837891e-09, + "loss": 0.0, + "num_input_tokens_seen": 135317928, + "step": 200790 + }, + { + "epoch": 4.905455256150295, + "grad_norm": 0.00011374321911716834, + "learning_rate": 2.178655246336203e-09, + "loss": 0.0, + "num_input_tokens_seen": 135321448, + "step": 200795 + }, + { + "epoch": 4.905577406982141, + "grad_norm": 0.0003457633429206908, + "learning_rate": 2.173032773259753e-09, + "loss": 0.0, + "num_input_tokens_seen": 135324648, + "step": 200800 + }, + { + "epoch": 4.905699557813989, + "grad_norm": 2.1189451217651367e-05, + "learning_rate": 2.167417556649287e-09, + "loss": 0.0, + "num_input_tokens_seen": 135328104, + "step": 200805 + }, + { + "epoch": 4.905821708645836, + "grad_norm": 0.00014341072528623044, + "learning_rate": 2.1618095965458826e-09, + "loss": 0.0, + "num_input_tokens_seen": 135331496, + "step": 200810 + }, + { + "epoch": 4.905943859477683, + "grad_norm": 56.895790100097656, + "learning_rate": 2.1562088929901745e-09, + "loss": 0.0402, + "num_input_tokens_seen": 135334504, + "step": 200815 + }, + { + "epoch": 4.90606601030953, + "grad_norm": 0.00114006910007447, + "learning_rate": 2.1506154460227965e-09, + "loss": 0.0, + "num_input_tokens_seen": 135338024, + "step": 200820 + }, + { + "epoch": 4.906188161141378, + "grad_norm": 4.435875962371938e-05, + "learning_rate": 2.145029255684605e-09, + "loss": 0.0, + "num_input_tokens_seen": 135341160, + "step": 200825 + }, + { + "epoch": 4.9063103119732245, + "grad_norm": 5.82519278395921e-05, + "learning_rate": 2.139450322016123e-09, + "loss": 0.0, + "num_input_tokens_seen": 135344360, + "step": 200830 + }, + { + "epoch": 4.906432462805071, + "grad_norm": 0.0002500818227417767, + "learning_rate": 2.1338786450579847e-09, + "loss": 0.0, + "num_input_tokens_seen": 135347752, + "step": 200835 + }, + { + "epoch": 4.906554613636919, + "grad_norm": 0.0010689017362892628, + "learning_rate": 2.1283142248507135e-09, + "loss": 0.0, + "num_input_tokens_seen": 135351080, + "step": 200840 + }, + { + "epoch": 4.906676764468766, + "grad_norm": 0.0010640647960826755, + "learning_rate": 2.1227570614346103e-09, + "loss": 0.0, + "num_input_tokens_seen": 135354536, + "step": 200845 + }, + { + "epoch": 4.906798915300613, + "grad_norm": 4.3444782932056114e-05, + "learning_rate": 2.117207154850309e-09, + "loss": 0.0, + "num_input_tokens_seen": 135357992, + "step": 200850 + }, + { + "epoch": 4.90692106613246, + "grad_norm": 0.014457025565207005, + "learning_rate": 2.111664505138111e-09, + "loss": 0.0, + "num_input_tokens_seen": 135361128, + "step": 200855 + }, + { + "epoch": 4.907043216964308, + "grad_norm": 0.0001936874759849161, + "learning_rate": 2.1061291123382063e-09, + "loss": 0.0, + "num_input_tokens_seen": 135364776, + "step": 200860 + }, + { + "epoch": 4.907165367796154, + "grad_norm": 0.00028388245846144855, + "learning_rate": 2.100600976491007e-09, + "loss": 0.0, + "num_input_tokens_seen": 135368104, + "step": 200865 + }, + { + "epoch": 4.907287518628002, + "grad_norm": 0.00029171412461437285, + "learning_rate": 2.095080097636592e-09, + "loss": 0.0, + "num_input_tokens_seen": 135370984, + "step": 200870 + }, + { + "epoch": 4.907409669459849, + "grad_norm": 0.00025791852385737, + "learning_rate": 2.089566475815152e-09, + "loss": 0.0, + "num_input_tokens_seen": 135374248, + "step": 200875 + }, + { + "epoch": 4.907531820291696, + "grad_norm": 0.0002960785641334951, + "learning_rate": 2.0840601110667654e-09, + "loss": 0.0, + "num_input_tokens_seen": 135377512, + "step": 200880 + }, + { + "epoch": 4.907653971123543, + "grad_norm": 0.0001221057027578354, + "learning_rate": 2.0785610034315114e-09, + "loss": 0.0, + "num_input_tokens_seen": 135380840, + "step": 200885 + }, + { + "epoch": 4.907776121955391, + "grad_norm": 0.0012774858623743057, + "learning_rate": 2.0730691529493583e-09, + "loss": 0.0, + "num_input_tokens_seen": 135384360, + "step": 200890 + }, + { + "epoch": 4.907898272787238, + "grad_norm": 0.0003208449052181095, + "learning_rate": 2.0675845596602737e-09, + "loss": 0.0, + "num_input_tokens_seen": 135387816, + "step": 200895 + }, + { + "epoch": 4.908020423619085, + "grad_norm": 0.0002654562995303422, + "learning_rate": 2.0621072236042257e-09, + "loss": 0.0, + "num_input_tokens_seen": 135390888, + "step": 200900 + }, + { + "epoch": 4.908142574450932, + "grad_norm": 0.000628930632956326, + "learning_rate": 2.0566371448208497e-09, + "loss": 0.0, + "num_input_tokens_seen": 135394216, + "step": 200905 + }, + { + "epoch": 4.908264725282779, + "grad_norm": 0.00024314632173627615, + "learning_rate": 2.0511743233500024e-09, + "loss": 0.0, + "num_input_tokens_seen": 135397608, + "step": 200910 + }, + { + "epoch": 4.908386876114626, + "grad_norm": 6.568645039806142e-05, + "learning_rate": 2.0457187592314294e-09, + "loss": 0.0, + "num_input_tokens_seen": 135401256, + "step": 200915 + }, + { + "epoch": 4.908509026946474, + "grad_norm": 0.00040447432547807693, + "learning_rate": 2.0402704525048776e-09, + "loss": 0.0, + "num_input_tokens_seen": 135404328, + "step": 200920 + }, + { + "epoch": 4.908631177778321, + "grad_norm": 0.0004772391403093934, + "learning_rate": 2.03482940320987e-09, + "loss": 0.0, + "num_input_tokens_seen": 135407528, + "step": 200925 + }, + { + "epoch": 4.908753328610167, + "grad_norm": 0.0001811403635656461, + "learning_rate": 2.029395611386042e-09, + "loss": 0.0, + "num_input_tokens_seen": 135410728, + "step": 200930 + }, + { + "epoch": 4.908875479442015, + "grad_norm": 0.0008391044102609158, + "learning_rate": 2.0239690770728068e-09, + "loss": 0.0, + "num_input_tokens_seen": 135414696, + "step": 200935 + }, + { + "epoch": 4.908997630273862, + "grad_norm": 5.66676590096904e-06, + "learning_rate": 2.018549800309688e-09, + "loss": 0.0, + "num_input_tokens_seen": 135418088, + "step": 200940 + }, + { + "epoch": 4.9091197811057095, + "grad_norm": 0.00012580875772982836, + "learning_rate": 2.0131377811360982e-09, + "loss": 0.0, + "num_input_tokens_seen": 135421544, + "step": 200945 + }, + { + "epoch": 4.909241931937556, + "grad_norm": 0.0003512984258122742, + "learning_rate": 2.0077330195914512e-09, + "loss": 0.0, + "num_input_tokens_seen": 135425064, + "step": 200950 + }, + { + "epoch": 4.909364082769404, + "grad_norm": 0.0013358528958633542, + "learning_rate": 2.0023355157149367e-09, + "loss": 0.0, + "num_input_tokens_seen": 135428712, + "step": 200955 + }, + { + "epoch": 4.909486233601251, + "grad_norm": 0.0003753567289095372, + "learning_rate": 1.9969452695458576e-09, + "loss": 0.0, + "num_input_tokens_seen": 135432040, + "step": 200960 + }, + { + "epoch": 4.909608384433098, + "grad_norm": 0.00010384988854639232, + "learning_rate": 1.9915622811235155e-09, + "loss": 0.0, + "num_input_tokens_seen": 135435304, + "step": 200965 + }, + { + "epoch": 4.909730535264945, + "grad_norm": 0.0036878047976642847, + "learning_rate": 1.9861865504868792e-09, + "loss": 0.0, + "num_input_tokens_seen": 135438504, + "step": 200970 + }, + { + "epoch": 4.909852686096793, + "grad_norm": 0.011057667434215546, + "learning_rate": 1.9808180776751393e-09, + "loss": 0.0, + "num_input_tokens_seen": 135441576, + "step": 200975 + }, + { + "epoch": 4.909974836928639, + "grad_norm": 0.0002773915184661746, + "learning_rate": 1.975456862727376e-09, + "loss": 0.0, + "num_input_tokens_seen": 135445160, + "step": 200980 + }, + { + "epoch": 4.910096987760487, + "grad_norm": 8.614901889814064e-05, + "learning_rate": 1.970102905682447e-09, + "loss": 0.0, + "num_input_tokens_seen": 135448872, + "step": 200985 + }, + { + "epoch": 4.910219138592334, + "grad_norm": 0.0002527591132093221, + "learning_rate": 1.964756206579432e-09, + "loss": 0.0001, + "num_input_tokens_seen": 135452328, + "step": 200990 + }, + { + "epoch": 4.910341289424181, + "grad_norm": 0.0005422373651526868, + "learning_rate": 1.959416765457189e-09, + "loss": 0.0001, + "num_input_tokens_seen": 135455976, + "step": 200995 + }, + { + "epoch": 4.910463440256028, + "grad_norm": 0.0001778609148459509, + "learning_rate": 1.954084582354465e-09, + "loss": 0.0, + "num_input_tokens_seen": 135459368, + "step": 201000 + }, + { + "epoch": 4.910585591087875, + "grad_norm": 0.015482158400118351, + "learning_rate": 1.948759657310006e-09, + "loss": 0.0, + "num_input_tokens_seen": 135462952, + "step": 201005 + }, + { + "epoch": 4.9107077419197225, + "grad_norm": 0.00010658630344551057, + "learning_rate": 1.9434419903626708e-09, + "loss": 0.0, + "num_input_tokens_seen": 135466856, + "step": 201010 + }, + { + "epoch": 4.91082989275157, + "grad_norm": 0.0001746755588101223, + "learning_rate": 1.9381315815510946e-09, + "loss": 0.0, + "num_input_tokens_seen": 135469992, + "step": 201015 + }, + { + "epoch": 4.910952043583417, + "grad_norm": 0.00026380535564385355, + "learning_rate": 1.9328284309138022e-09, + "loss": 0.0, + "num_input_tokens_seen": 135473576, + "step": 201020 + }, + { + "epoch": 4.911074194415264, + "grad_norm": 4.049741255585104e-05, + "learning_rate": 1.92753253848954e-09, + "loss": 0.0, + "num_input_tokens_seen": 135477160, + "step": 201025 + }, + { + "epoch": 4.911196345247111, + "grad_norm": 0.005961469374597073, + "learning_rate": 1.9222439043166116e-09, + "loss": 0.0, + "num_input_tokens_seen": 135480680, + "step": 201030 + }, + { + "epoch": 4.911318496078958, + "grad_norm": 0.00024113174004014581, + "learning_rate": 1.9169625284336523e-09, + "loss": 0.0, + "num_input_tokens_seen": 135485544, + "step": 201035 + }, + { + "epoch": 4.911440646910806, + "grad_norm": 1.186096324090613e-05, + "learning_rate": 1.9116884108789644e-09, + "loss": 0.0, + "num_input_tokens_seen": 135488808, + "step": 201040 + }, + { + "epoch": 4.911562797742652, + "grad_norm": 9.603124635759741e-05, + "learning_rate": 1.9064215516908513e-09, + "loss": 0.0, + "num_input_tokens_seen": 135492200, + "step": 201045 + }, + { + "epoch": 4.9116849485745, + "grad_norm": 1.8488022760720924e-05, + "learning_rate": 1.901161950907837e-09, + "loss": 0.0, + "num_input_tokens_seen": 135495912, + "step": 201050 + }, + { + "epoch": 4.911807099406347, + "grad_norm": 7.88636680226773e-05, + "learning_rate": 1.8959096085678915e-09, + "loss": 0.0, + "num_input_tokens_seen": 135498984, + "step": 201055 + }, + { + "epoch": 4.911929250238194, + "grad_norm": 0.0008046218426898122, + "learning_rate": 1.8906645247094288e-09, + "loss": 0.0, + "num_input_tokens_seen": 135502184, + "step": 201060 + }, + { + "epoch": 4.912051401070041, + "grad_norm": 3.1551830034004524e-05, + "learning_rate": 1.885426699370529e-09, + "loss": 0.0, + "num_input_tokens_seen": 135506088, + "step": 201065 + }, + { + "epoch": 4.912173551901889, + "grad_norm": 6.683461833745241e-05, + "learning_rate": 1.8801961325892735e-09, + "loss": 0.0, + "num_input_tokens_seen": 135509544, + "step": 201070 + }, + { + "epoch": 4.9122957027337355, + "grad_norm": 0.00046349395415745676, + "learning_rate": 1.874972824403631e-09, + "loss": 0.0, + "num_input_tokens_seen": 135512936, + "step": 201075 + }, + { + "epoch": 4.912417853565583, + "grad_norm": 0.008031395263969898, + "learning_rate": 1.869756774851683e-09, + "loss": 0.0, + "num_input_tokens_seen": 135516392, + "step": 201080 + }, + { + "epoch": 4.91254000439743, + "grad_norm": 0.000429147039540112, + "learning_rate": 1.8645479839712873e-09, + "loss": 0.0, + "num_input_tokens_seen": 135519912, + "step": 201085 + }, + { + "epoch": 4.9126621552292775, + "grad_norm": 3.720055246958509e-05, + "learning_rate": 1.8593464518004143e-09, + "loss": 0.0, + "num_input_tokens_seen": 135523240, + "step": 201090 + }, + { + "epoch": 4.912784306061124, + "grad_norm": 24.435461044311523, + "learning_rate": 1.8541521783768111e-09, + "loss": 0.0422, + "num_input_tokens_seen": 135526568, + "step": 201095 + }, + { + "epoch": 4.912906456892971, + "grad_norm": 0.00021874564117752016, + "learning_rate": 1.8489651637383363e-09, + "loss": 0.0, + "num_input_tokens_seen": 135530216, + "step": 201100 + }, + { + "epoch": 4.913028607724819, + "grad_norm": 0.0004242685972712934, + "learning_rate": 1.8437854079225158e-09, + "loss": 0.0, + "num_input_tokens_seen": 135533416, + "step": 201105 + }, + { + "epoch": 4.913150758556665, + "grad_norm": 3.266122075729072e-05, + "learning_rate": 1.8386129109673187e-09, + "loss": 0.0, + "num_input_tokens_seen": 135536360, + "step": 201110 + }, + { + "epoch": 4.913272909388513, + "grad_norm": 1137.6353759765625, + "learning_rate": 1.83344767291016e-09, + "loss": 0.041, + "num_input_tokens_seen": 135539624, + "step": 201115 + }, + { + "epoch": 4.91339506022036, + "grad_norm": 0.00044449593406170607, + "learning_rate": 1.828289693788565e-09, + "loss": 0.0, + "num_input_tokens_seen": 135543016, + "step": 201120 + }, + { + "epoch": 4.913517211052207, + "grad_norm": 0.0006559291505254805, + "learning_rate": 1.8231389736401703e-09, + "loss": 0.0, + "num_input_tokens_seen": 135546280, + "step": 201125 + }, + { + "epoch": 4.913639361884054, + "grad_norm": 0.00026207236805930734, + "learning_rate": 1.8179955125023905e-09, + "loss": 0.0, + "num_input_tokens_seen": 135549416, + "step": 201130 + }, + { + "epoch": 4.913761512715902, + "grad_norm": 3.2648542401148006e-05, + "learning_rate": 1.81285931041264e-09, + "loss": 0.0, + "num_input_tokens_seen": 135552872, + "step": 201135 + }, + { + "epoch": 4.9138836635477485, + "grad_norm": 0.00014412151358556002, + "learning_rate": 1.8077303674083332e-09, + "loss": 0.0, + "num_input_tokens_seen": 135556136, + "step": 201140 + }, + { + "epoch": 4.914005814379596, + "grad_norm": 0.0016105592949315906, + "learning_rate": 1.802608683526552e-09, + "loss": 0.0, + "num_input_tokens_seen": 135559784, + "step": 201145 + }, + { + "epoch": 4.914127965211443, + "grad_norm": 0.000432511733379215, + "learning_rate": 1.7974942588048213e-09, + "loss": 0.0, + "num_input_tokens_seen": 135562984, + "step": 201150 + }, + { + "epoch": 4.9142501160432905, + "grad_norm": 2.4063732780632563e-05, + "learning_rate": 1.7923870932801123e-09, + "loss": 0.0, + "num_input_tokens_seen": 135566056, + "step": 201155 + }, + { + "epoch": 4.914372266875137, + "grad_norm": 0.0001285884209210053, + "learning_rate": 1.7872871869896166e-09, + "loss": 0.0, + "num_input_tokens_seen": 135569704, + "step": 201160 + }, + { + "epoch": 4.914494417706985, + "grad_norm": 13.854092597961426, + "learning_rate": 1.7821945399705273e-09, + "loss": 0.0558, + "num_input_tokens_seen": 135573224, + "step": 201165 + }, + { + "epoch": 4.914616568538832, + "grad_norm": 0.0008157655247487128, + "learning_rate": 1.7771091522598146e-09, + "loss": 0.0, + "num_input_tokens_seen": 135576296, + "step": 201170 + }, + { + "epoch": 4.914738719370679, + "grad_norm": 0.0005658494192175567, + "learning_rate": 1.7720310238943381e-09, + "loss": 0.0, + "num_input_tokens_seen": 135580648, + "step": 201175 + }, + { + "epoch": 4.914860870202526, + "grad_norm": 0.019938549026846886, + "learning_rate": 1.76696015491129e-09, + "loss": 0.0, + "num_input_tokens_seen": 135583912, + "step": 201180 + }, + { + "epoch": 4.914983021034374, + "grad_norm": 0.00011997364345006645, + "learning_rate": 1.7618965453473078e-09, + "loss": 0.0, + "num_input_tokens_seen": 135586856, + "step": 201185 + }, + { + "epoch": 4.91510517186622, + "grad_norm": 0.0046823713928461075, + "learning_rate": 1.7568401952392509e-09, + "loss": 0.0, + "num_input_tokens_seen": 135590120, + "step": 201190 + }, + { + "epoch": 4.915227322698067, + "grad_norm": 1.848404099291656e-05, + "learning_rate": 1.7517911046240897e-09, + "loss": 0.0, + "num_input_tokens_seen": 135593000, + "step": 201195 + }, + { + "epoch": 4.915349473529915, + "grad_norm": 5.2997685997979715e-05, + "learning_rate": 1.7467492735383505e-09, + "loss": 0.0, + "num_input_tokens_seen": 135596456, + "step": 201200 + }, + { + "epoch": 4.9154716243617615, + "grad_norm": 0.00031060067703947425, + "learning_rate": 1.7417147020186706e-09, + "loss": 0.0, + "num_input_tokens_seen": 135600360, + "step": 201205 + }, + { + "epoch": 4.915593775193609, + "grad_norm": 3.5080083762295544e-05, + "learning_rate": 1.7366873901017987e-09, + "loss": 0.0, + "num_input_tokens_seen": 135603880, + "step": 201210 + }, + { + "epoch": 4.915715926025456, + "grad_norm": 0.00045931426575407386, + "learning_rate": 1.7316673378242609e-09, + "loss": 0.0, + "num_input_tokens_seen": 135607272, + "step": 201215 + }, + { + "epoch": 4.915838076857304, + "grad_norm": 0.00012492769747041166, + "learning_rate": 1.7266545452225835e-09, + "loss": 0.0, + "num_input_tokens_seen": 135610728, + "step": 201220 + }, + { + "epoch": 4.91596022768915, + "grad_norm": 0.0017665666528046131, + "learning_rate": 1.7216490123330707e-09, + "loss": 0.0, + "num_input_tokens_seen": 135613672, + "step": 201225 + }, + { + "epoch": 4.916082378520998, + "grad_norm": 0.011362847872078419, + "learning_rate": 1.716650739192249e-09, + "loss": 0.0, + "num_input_tokens_seen": 135617256, + "step": 201230 + }, + { + "epoch": 4.916204529352845, + "grad_norm": 0.002289575058966875, + "learning_rate": 1.711659725836534e-09, + "loss": 0.0, + "num_input_tokens_seen": 135620584, + "step": 201235 + }, + { + "epoch": 4.916326680184692, + "grad_norm": 0.0027904491871595383, + "learning_rate": 1.7066759723021185e-09, + "loss": 0.0, + "num_input_tokens_seen": 135623976, + "step": 201240 + }, + { + "epoch": 4.916448831016539, + "grad_norm": 4.122515383642167e-05, + "learning_rate": 1.7016994786251958e-09, + "loss": 0.0, + "num_input_tokens_seen": 135627112, + "step": 201245 + }, + { + "epoch": 4.916570981848387, + "grad_norm": 0.000364553474355489, + "learning_rate": 1.6967302448420707e-09, + "loss": 0.0001, + "num_input_tokens_seen": 135630056, + "step": 201250 + }, + { + "epoch": 4.9166931326802334, + "grad_norm": 6.166181265143678e-05, + "learning_rate": 1.6917682709887139e-09, + "loss": 0.0, + "num_input_tokens_seen": 135633256, + "step": 201255 + }, + { + "epoch": 4.916815283512081, + "grad_norm": 2.0448240320547484e-05, + "learning_rate": 1.6868135571015408e-09, + "loss": 0.0, + "num_input_tokens_seen": 135636648, + "step": 201260 + }, + { + "epoch": 4.916937434343928, + "grad_norm": 1.3415295143204276e-05, + "learning_rate": 1.6818661032161896e-09, + "loss": 0.0, + "num_input_tokens_seen": 135640360, + "step": 201265 + }, + { + "epoch": 4.917059585175775, + "grad_norm": 0.0004135376075282693, + "learning_rate": 1.6769259093689647e-09, + "loss": 0.0, + "num_input_tokens_seen": 135643880, + "step": 201270 + }, + { + "epoch": 4.917181736007622, + "grad_norm": 0.00034867238719016314, + "learning_rate": 1.6719929755956152e-09, + "loss": 0.0018, + "num_input_tokens_seen": 135647208, + "step": 201275 + }, + { + "epoch": 4.91730388683947, + "grad_norm": 7.221074338303879e-05, + "learning_rate": 1.6670673019320014e-09, + "loss": 0.0, + "num_input_tokens_seen": 135650280, + "step": 201280 + }, + { + "epoch": 4.917426037671317, + "grad_norm": 3.404384187888354e-05, + "learning_rate": 1.6621488884139834e-09, + "loss": 0.0, + "num_input_tokens_seen": 135653672, + "step": 201285 + }, + { + "epoch": 4.917548188503163, + "grad_norm": 0.01465871836990118, + "learning_rate": 1.6572377350774213e-09, + "loss": 0.0, + "num_input_tokens_seen": 135657064, + "step": 201290 + }, + { + "epoch": 4.917670339335011, + "grad_norm": 7.839166210033e-05, + "learning_rate": 1.6523338419578426e-09, + "loss": 0.0, + "num_input_tokens_seen": 135660328, + "step": 201295 + }, + { + "epoch": 4.917792490166858, + "grad_norm": 5.956729728495702e-05, + "learning_rate": 1.647437209091107e-09, + "loss": 0.0, + "num_input_tokens_seen": 135663464, + "step": 201300 + }, + { + "epoch": 4.917914640998705, + "grad_norm": 0.0019718848634511232, + "learning_rate": 1.6425478365126311e-09, + "loss": 0.0, + "num_input_tokens_seen": 135667432, + "step": 201305 + }, + { + "epoch": 4.918036791830552, + "grad_norm": 0.0001525900443084538, + "learning_rate": 1.6376657242581638e-09, + "loss": 0.0001, + "num_input_tokens_seen": 135670504, + "step": 201310 + }, + { + "epoch": 4.9181589426624, + "grad_norm": 0.0029563040006905794, + "learning_rate": 1.6327908723631213e-09, + "loss": 0.0, + "num_input_tokens_seen": 135673768, + "step": 201315 + }, + { + "epoch": 4.9182810934942465, + "grad_norm": 0.0002739654737524688, + "learning_rate": 1.6279232808629196e-09, + "loss": 0.0, + "num_input_tokens_seen": 135676840, + "step": 201320 + }, + { + "epoch": 4.918403244326094, + "grad_norm": 0.00016119072097353637, + "learning_rate": 1.6230629497929748e-09, + "loss": 0.0, + "num_input_tokens_seen": 135680104, + "step": 201325 + }, + { + "epoch": 4.918525395157941, + "grad_norm": 2.614900040498469e-05, + "learning_rate": 1.6182098791887033e-09, + "loss": 0.0, + "num_input_tokens_seen": 135683752, + "step": 201330 + }, + { + "epoch": 4.9186475459897885, + "grad_norm": 0.001245207036845386, + "learning_rate": 1.613364069085299e-09, + "loss": 0.0, + "num_input_tokens_seen": 135687272, + "step": 201335 + }, + { + "epoch": 4.918769696821635, + "grad_norm": 0.0006022427114658058, + "learning_rate": 1.608525519518067e-09, + "loss": 0.0, + "num_input_tokens_seen": 135691048, + "step": 201340 + }, + { + "epoch": 4.918891847653483, + "grad_norm": 0.0015529391821473837, + "learning_rate": 1.6036942305220902e-09, + "loss": 0.0, + "num_input_tokens_seen": 135694248, + "step": 201345 + }, + { + "epoch": 4.91901399848533, + "grad_norm": 2.1091949747642502e-05, + "learning_rate": 1.5988702021326738e-09, + "loss": 0.0, + "num_input_tokens_seen": 135698216, + "step": 201350 + }, + { + "epoch": 4.919136149317177, + "grad_norm": 0.0007461210479959846, + "learning_rate": 1.59405343438479e-09, + "loss": 0.0, + "num_input_tokens_seen": 135701544, + "step": 201355 + }, + { + "epoch": 4.919258300149024, + "grad_norm": 5.899009556742385e-05, + "learning_rate": 1.5892439273135216e-09, + "loss": 0.0, + "num_input_tokens_seen": 135705064, + "step": 201360 + }, + { + "epoch": 4.919380450980871, + "grad_norm": 1.099123437597882e-05, + "learning_rate": 1.5844416809537297e-09, + "loss": 0.0, + "num_input_tokens_seen": 135708264, + "step": 201365 + }, + { + "epoch": 4.919502601812718, + "grad_norm": 7.421234477078542e-05, + "learning_rate": 1.5796466953404974e-09, + "loss": 0.0, + "num_input_tokens_seen": 135711336, + "step": 201370 + }, + { + "epoch": 4.919624752644566, + "grad_norm": 0.007350210566073656, + "learning_rate": 1.5748589705085747e-09, + "loss": 0.0, + "num_input_tokens_seen": 135714984, + "step": 201375 + }, + { + "epoch": 4.919746903476413, + "grad_norm": 1.9945811800425872e-05, + "learning_rate": 1.5700785064928224e-09, + "loss": 0.0, + "num_input_tokens_seen": 135718440, + "step": 201380 + }, + { + "epoch": 4.9198690543082595, + "grad_norm": 0.000783449097070843, + "learning_rate": 1.5653053033279906e-09, + "loss": 0.0, + "num_input_tokens_seen": 135722536, + "step": 201385 + }, + { + "epoch": 4.919991205140107, + "grad_norm": 0.0006227453704923391, + "learning_rate": 1.5605393610488294e-09, + "loss": 0.0, + "num_input_tokens_seen": 135725992, + "step": 201390 + }, + { + "epoch": 4.920113355971954, + "grad_norm": 0.00017266077338717878, + "learning_rate": 1.5557806796899776e-09, + "loss": 0.0, + "num_input_tokens_seen": 135729256, + "step": 201395 + }, + { + "epoch": 4.9202355068038015, + "grad_norm": 0.0024521774612367153, + "learning_rate": 1.551029259286074e-09, + "loss": 0.0, + "num_input_tokens_seen": 135732392, + "step": 201400 + }, + { + "epoch": 4.920357657635648, + "grad_norm": 4.0178052586270496e-05, + "learning_rate": 1.546285099871647e-09, + "loss": 0.0, + "num_input_tokens_seen": 135735848, + "step": 201405 + }, + { + "epoch": 4.920479808467496, + "grad_norm": 0.00014618277782574296, + "learning_rate": 1.5415482014811132e-09, + "loss": 0.0, + "num_input_tokens_seen": 135739240, + "step": 201410 + }, + { + "epoch": 4.920601959299343, + "grad_norm": 0.0004449485568329692, + "learning_rate": 1.5368185641490005e-09, + "loss": 0.0, + "num_input_tokens_seen": 135742440, + "step": 201415 + }, + { + "epoch": 4.92072411013119, + "grad_norm": 0.0021590932738035917, + "learning_rate": 1.532096187909726e-09, + "loss": 0.0, + "num_input_tokens_seen": 135745384, + "step": 201420 + }, + { + "epoch": 4.920846260963037, + "grad_norm": 7.301468576770276e-05, + "learning_rate": 1.5273810727975955e-09, + "loss": 0.0, + "num_input_tokens_seen": 135748584, + "step": 201425 + }, + { + "epoch": 4.920968411794885, + "grad_norm": 0.19861288368701935, + "learning_rate": 1.522673218846915e-09, + "loss": 0.0, + "num_input_tokens_seen": 135752104, + "step": 201430 + }, + { + "epoch": 4.921090562626731, + "grad_norm": 0.6534940004348755, + "learning_rate": 1.5179726260918791e-09, + "loss": 0.0002, + "num_input_tokens_seen": 135755112, + "step": 201435 + }, + { + "epoch": 4.921212713458579, + "grad_norm": 0.00027803939883597195, + "learning_rate": 1.5132792945666827e-09, + "loss": 0.0002, + "num_input_tokens_seen": 135758376, + "step": 201440 + }, + { + "epoch": 4.921334864290426, + "grad_norm": 0.0002275543665746227, + "learning_rate": 1.5085932243055211e-09, + "loss": 0.0, + "num_input_tokens_seen": 135761320, + "step": 201445 + }, + { + "epoch": 4.921457015122273, + "grad_norm": 0.00024673750158399343, + "learning_rate": 1.5039144153424777e-09, + "loss": 0.0436, + "num_input_tokens_seen": 135764712, + "step": 201450 + }, + { + "epoch": 4.92157916595412, + "grad_norm": 0.0010997720528393984, + "learning_rate": 1.4992428677115255e-09, + "loss": 0.0, + "num_input_tokens_seen": 135767912, + "step": 201455 + }, + { + "epoch": 4.921701316785967, + "grad_norm": 0.0006602886132895947, + "learning_rate": 1.4945785814465262e-09, + "loss": 0.0, + "num_input_tokens_seen": 135771432, + "step": 201460 + }, + { + "epoch": 4.9218234676178145, + "grad_norm": 3.607894541346468e-05, + "learning_rate": 1.4899215565816748e-09, + "loss": 0.0, + "num_input_tokens_seen": 135774504, + "step": 201465 + }, + { + "epoch": 4.921945618449661, + "grad_norm": 0.0029618015978485346, + "learning_rate": 1.485271793150611e-09, + "loss": 0.0, + "num_input_tokens_seen": 135777832, + "step": 201470 + }, + { + "epoch": 4.922067769281509, + "grad_norm": 0.00795203447341919, + "learning_rate": 1.4806292911871965e-09, + "loss": 0.0, + "num_input_tokens_seen": 135781480, + "step": 201475 + }, + { + "epoch": 4.922189920113356, + "grad_norm": 0.0001877306931419298, + "learning_rate": 1.4759940507251822e-09, + "loss": 0.0, + "num_input_tokens_seen": 135785256, + "step": 201480 + }, + { + "epoch": 4.922312070945203, + "grad_norm": 0.0031501068733632565, + "learning_rate": 1.47136607179843e-09, + "loss": 0.0002, + "num_input_tokens_seen": 135788968, + "step": 201485 + }, + { + "epoch": 4.92243422177705, + "grad_norm": 0.005074269603937864, + "learning_rate": 1.4667453544403573e-09, + "loss": 0.0, + "num_input_tokens_seen": 135791784, + "step": 201490 + }, + { + "epoch": 4.922556372608898, + "grad_norm": 0.000645491003524512, + "learning_rate": 1.4621318986847154e-09, + "loss": 0.0, + "num_input_tokens_seen": 135794984, + "step": 201495 + }, + { + "epoch": 4.922678523440744, + "grad_norm": 8.181369776139036e-05, + "learning_rate": 1.4575257045650325e-09, + "loss": 0.0, + "num_input_tokens_seen": 135798568, + "step": 201500 + }, + { + "epoch": 4.922800674272592, + "grad_norm": 0.007807936519384384, + "learning_rate": 1.4529267721148375e-09, + "loss": 0.0037, + "num_input_tokens_seen": 135801704, + "step": 201505 + }, + { + "epoch": 4.922922825104439, + "grad_norm": 2.3001324734650552e-05, + "learning_rate": 1.4483351013675482e-09, + "loss": 0.0, + "num_input_tokens_seen": 135805288, + "step": 201510 + }, + { + "epoch": 4.923044975936286, + "grad_norm": 0.029084760695695877, + "learning_rate": 1.4437506923564714e-09, + "loss": 0.0, + "num_input_tokens_seen": 135809128, + "step": 201515 + }, + { + "epoch": 4.923167126768133, + "grad_norm": 0.0006325517897494137, + "learning_rate": 1.4391735451150245e-09, + "loss": 0.0, + "num_input_tokens_seen": 135812200, + "step": 201520 + }, + { + "epoch": 4.923289277599981, + "grad_norm": 0.00041451648576185107, + "learning_rate": 1.4346036596765142e-09, + "loss": 0.0, + "num_input_tokens_seen": 135815912, + "step": 201525 + }, + { + "epoch": 4.9234114284318276, + "grad_norm": 7.652008935110644e-05, + "learning_rate": 1.4300410360741365e-09, + "loss": 0.0, + "num_input_tokens_seen": 135819880, + "step": 201530 + }, + { + "epoch": 4.923533579263674, + "grad_norm": 0.0031949025578796864, + "learning_rate": 1.425485674341087e-09, + "loss": 0.0, + "num_input_tokens_seen": 135823400, + "step": 201535 + }, + { + "epoch": 4.923655730095522, + "grad_norm": 0.00017632555682212114, + "learning_rate": 1.4209375745105613e-09, + "loss": 0.0, + "num_input_tokens_seen": 135826408, + "step": 201540 + }, + { + "epoch": 4.92377788092737, + "grad_norm": 0.0007741831941530108, + "learning_rate": 1.4163967366154217e-09, + "loss": 0.0, + "num_input_tokens_seen": 135829928, + "step": 201545 + }, + { + "epoch": 4.923900031759216, + "grad_norm": 0.0008055263315327466, + "learning_rate": 1.4118631606889752e-09, + "loss": 0.0, + "num_input_tokens_seen": 135832808, + "step": 201550 + }, + { + "epoch": 4.924022182591063, + "grad_norm": 4.663014260586351e-05, + "learning_rate": 1.4073368467639735e-09, + "loss": 0.0, + "num_input_tokens_seen": 135836072, + "step": 201555 + }, + { + "epoch": 4.924144333422911, + "grad_norm": 0.002874054480344057, + "learning_rate": 1.40281779487339e-09, + "loss": 0.0, + "num_input_tokens_seen": 135839720, + "step": 201560 + }, + { + "epoch": 4.924266484254757, + "grad_norm": 0.0008985823951661587, + "learning_rate": 1.3983060050500872e-09, + "loss": 0.0, + "num_input_tokens_seen": 135843688, + "step": 201565 + }, + { + "epoch": 4.924388635086605, + "grad_norm": 0.0002772827574517578, + "learning_rate": 1.393801477327039e-09, + "loss": 0.0003, + "num_input_tokens_seen": 135847528, + "step": 201570 + }, + { + "epoch": 4.924510785918452, + "grad_norm": 0.0003808287438005209, + "learning_rate": 1.3893042117367748e-09, + "loss": 0.0, + "num_input_tokens_seen": 135851048, + "step": 201575 + }, + { + "epoch": 4.9246329367502995, + "grad_norm": 0.07541102170944214, + "learning_rate": 1.3848142083120462e-09, + "loss": 0.0, + "num_input_tokens_seen": 135854120, + "step": 201580 + }, + { + "epoch": 4.924755087582146, + "grad_norm": 0.0004984051920473576, + "learning_rate": 1.3803314670856047e-09, + "loss": 0.0, + "num_input_tokens_seen": 135857192, + "step": 201585 + }, + { + "epoch": 4.924877238413994, + "grad_norm": 0.0004298131098039448, + "learning_rate": 1.3758559880898691e-09, + "loss": 0.0, + "num_input_tokens_seen": 135861160, + "step": 201590 + }, + { + "epoch": 4.924999389245841, + "grad_norm": 0.00010070007556350902, + "learning_rate": 1.3713877713575905e-09, + "loss": 0.0, + "num_input_tokens_seen": 135864232, + "step": 201595 + }, + { + "epoch": 4.925121540077688, + "grad_norm": 0.0006499238079413772, + "learning_rate": 1.366926816921188e-09, + "loss": 0.0, + "num_input_tokens_seen": 135867752, + "step": 201600 + }, + { + "epoch": 4.925243690909535, + "grad_norm": 3.205785105819814e-05, + "learning_rate": 1.3624731248130794e-09, + "loss": 0.0, + "num_input_tokens_seen": 135871080, + "step": 201605 + }, + { + "epoch": 4.925365841741383, + "grad_norm": 0.0239634420722723, + "learning_rate": 1.3580266950656837e-09, + "loss": 0.0, + "num_input_tokens_seen": 135874152, + "step": 201610 + }, + { + "epoch": 4.925487992573229, + "grad_norm": 7.261833667755127, + "learning_rate": 1.3535875277113085e-09, + "loss": 0.0288, + "num_input_tokens_seen": 135877672, + "step": 201615 + }, + { + "epoch": 4.925610143405077, + "grad_norm": 1.7677119103609584e-05, + "learning_rate": 1.349155622782261e-09, + "loss": 0.0, + "num_input_tokens_seen": 135881000, + "step": 201620 + }, + { + "epoch": 4.925732294236924, + "grad_norm": 1.3271970601635985e-05, + "learning_rate": 1.3447309803107376e-09, + "loss": 0.0, + "num_input_tokens_seen": 135884520, + "step": 201625 + }, + { + "epoch": 4.9258544450687705, + "grad_norm": 0.00042806309647858143, + "learning_rate": 1.340313600328935e-09, + "loss": 0.0, + "num_input_tokens_seen": 135887912, + "step": 201630 + }, + { + "epoch": 4.925976595900618, + "grad_norm": 8.053508645389229e-05, + "learning_rate": 1.3359034828689385e-09, + "loss": 0.0, + "num_input_tokens_seen": 135890920, + "step": 201635 + }, + { + "epoch": 4.926098746732466, + "grad_norm": 0.002819041023030877, + "learning_rate": 1.3315006279629448e-09, + "loss": 0.0, + "num_input_tokens_seen": 135894120, + "step": 201640 + }, + { + "epoch": 4.9262208975643125, + "grad_norm": 0.0016093713929876685, + "learning_rate": 1.327105035642817e-09, + "loss": 0.0, + "num_input_tokens_seen": 135897576, + "step": 201645 + }, + { + "epoch": 4.926343048396159, + "grad_norm": 0.006175595335662365, + "learning_rate": 1.3227167059406407e-09, + "loss": 0.0, + "num_input_tokens_seen": 135900776, + "step": 201650 + }, + { + "epoch": 4.926465199228007, + "grad_norm": 0.00012842906289733946, + "learning_rate": 1.3183356388882794e-09, + "loss": 0.0, + "num_input_tokens_seen": 135904360, + "step": 201655 + }, + { + "epoch": 4.926587350059854, + "grad_norm": 9.985136421164498e-05, + "learning_rate": 1.3139618345175962e-09, + "loss": 0.0, + "num_input_tokens_seen": 135907496, + "step": 201660 + }, + { + "epoch": 4.926709500891701, + "grad_norm": 0.0026760785840451717, + "learning_rate": 1.3095952928603438e-09, + "loss": 0.0, + "num_input_tokens_seen": 135911016, + "step": 201665 + }, + { + "epoch": 4.926831651723548, + "grad_norm": 2.7883741495315917e-05, + "learning_rate": 1.3052360139483853e-09, + "loss": 0.0, + "num_input_tokens_seen": 135914344, + "step": 201670 + }, + { + "epoch": 4.926953802555396, + "grad_norm": 0.00010994355397997424, + "learning_rate": 1.3008839978133623e-09, + "loss": 0.0, + "num_input_tokens_seen": 135917480, + "step": 201675 + }, + { + "epoch": 4.927075953387242, + "grad_norm": 0.00031039112946018577, + "learning_rate": 1.296539244486916e-09, + "loss": 0.0, + "num_input_tokens_seen": 135921192, + "step": 201680 + }, + { + "epoch": 4.92719810421909, + "grad_norm": 9.488489740760997e-05, + "learning_rate": 1.292201754000688e-09, + "loss": 0.0, + "num_input_tokens_seen": 135925416, + "step": 201685 + }, + { + "epoch": 4.927320255050937, + "grad_norm": 0.04363443702459335, + "learning_rate": 1.2878715263860973e-09, + "loss": 0.0, + "num_input_tokens_seen": 135928488, + "step": 201690 + }, + { + "epoch": 4.927442405882784, + "grad_norm": 0.000588393013458699, + "learning_rate": 1.2835485616748964e-09, + "loss": 0.0, + "num_input_tokens_seen": 135931624, + "step": 201695 + }, + { + "epoch": 4.927564556714631, + "grad_norm": 0.0011282862396910787, + "learning_rate": 1.2792328598981716e-09, + "loss": 0.0, + "num_input_tokens_seen": 135935400, + "step": 201700 + }, + { + "epoch": 4.927686707546479, + "grad_norm": 0.0069139981642365456, + "learning_rate": 1.2749244210875643e-09, + "loss": 0.0, + "num_input_tokens_seen": 135938728, + "step": 201705 + }, + { + "epoch": 4.9278088583783255, + "grad_norm": 0.003317581955343485, + "learning_rate": 1.2706232452743826e-09, + "loss": 0.0, + "num_input_tokens_seen": 135941992, + "step": 201710 + }, + { + "epoch": 4.927931009210173, + "grad_norm": 0.00010448665852891281, + "learning_rate": 1.2663293324897128e-09, + "loss": 0.0, + "num_input_tokens_seen": 135945640, + "step": 201715 + }, + { + "epoch": 4.92805316004202, + "grad_norm": 0.00011394867760827765, + "learning_rate": 1.2620426827650854e-09, + "loss": 0.0, + "num_input_tokens_seen": 135948904, + "step": 201720 + }, + { + "epoch": 4.928175310873867, + "grad_norm": 0.0020071258768439293, + "learning_rate": 1.2577632961313644e-09, + "loss": 0.0002, + "num_input_tokens_seen": 135952360, + "step": 201725 + }, + { + "epoch": 4.928297461705714, + "grad_norm": 0.0017316938610747457, + "learning_rate": 1.2534911726199693e-09, + "loss": 0.0, + "num_input_tokens_seen": 135955688, + "step": 201730 + }, + { + "epoch": 4.928419612537561, + "grad_norm": 0.0001321783784078434, + "learning_rate": 1.2492263122616532e-09, + "loss": 0.0, + "num_input_tokens_seen": 135959080, + "step": 201735 + }, + { + "epoch": 4.928541763369409, + "grad_norm": 2.647805195010733e-05, + "learning_rate": 1.2449687150877242e-09, + "loss": 0.0, + "num_input_tokens_seen": 135963048, + "step": 201740 + }, + { + "epoch": 4.928663914201255, + "grad_norm": 0.00029094170895405114, + "learning_rate": 1.2407183811289357e-09, + "loss": 0.0006, + "num_input_tokens_seen": 135966120, + "step": 201745 + }, + { + "epoch": 4.928786065033103, + "grad_norm": 0.0003574473666958511, + "learning_rate": 1.2364753104163738e-09, + "loss": 0.0, + "num_input_tokens_seen": 135969448, + "step": 201750 + }, + { + "epoch": 4.92890821586495, + "grad_norm": 0.0008297132444567978, + "learning_rate": 1.232239502980681e-09, + "loss": 0.0146, + "num_input_tokens_seen": 135972520, + "step": 201755 + }, + { + "epoch": 4.929030366696797, + "grad_norm": 0.0002502483839634806, + "learning_rate": 1.228010958852832e-09, + "loss": 0.0, + "num_input_tokens_seen": 135975784, + "step": 201760 + }, + { + "epoch": 4.929152517528644, + "grad_norm": 2.7652360586216673e-05, + "learning_rate": 1.2237896780635803e-09, + "loss": 0.0, + "num_input_tokens_seen": 135978984, + "step": 201765 + }, + { + "epoch": 4.929274668360492, + "grad_norm": 0.0005179181462153792, + "learning_rate": 1.2195756606434571e-09, + "loss": 0.0, + "num_input_tokens_seen": 135982248, + "step": 201770 + }, + { + "epoch": 4.9293968191923385, + "grad_norm": 0.0005535013624466956, + "learning_rate": 1.2153689066233263e-09, + "loss": 0.0, + "num_input_tokens_seen": 135985960, + "step": 201775 + }, + { + "epoch": 4.929518970024186, + "grad_norm": 0.00016883248463273048, + "learning_rate": 1.2111694160336083e-09, + "loss": 0.0, + "num_input_tokens_seen": 135989800, + "step": 201780 + }, + { + "epoch": 4.929641120856033, + "grad_norm": 0.0014849066501483321, + "learning_rate": 1.2069771889049452e-09, + "loss": 0.0, + "num_input_tokens_seen": 135993384, + "step": 201785 + }, + { + "epoch": 4.9297632716878805, + "grad_norm": 0.00024154149286914617, + "learning_rate": 1.202792225267757e-09, + "loss": 0.0, + "num_input_tokens_seen": 135997032, + "step": 201790 + }, + { + "epoch": 4.929885422519727, + "grad_norm": 0.001064109499566257, + "learning_rate": 1.1986145251524637e-09, + "loss": 0.0, + "num_input_tokens_seen": 136000360, + "step": 201795 + }, + { + "epoch": 4.930007573351575, + "grad_norm": 8.710243128007278e-05, + "learning_rate": 1.1944440885895968e-09, + "loss": 0.0, + "num_input_tokens_seen": 136003560, + "step": 201800 + }, + { + "epoch": 4.930129724183422, + "grad_norm": 1.2830115338147152e-05, + "learning_rate": 1.190280915609354e-09, + "loss": 0.0, + "num_input_tokens_seen": 136006824, + "step": 201805 + }, + { + "epoch": 4.930251875015269, + "grad_norm": 5.2895011322107166e-05, + "learning_rate": 1.1861250062419336e-09, + "loss": 0.0, + "num_input_tokens_seen": 136010280, + "step": 201810 + }, + { + "epoch": 4.930374025847116, + "grad_norm": 4.9812042561825365e-05, + "learning_rate": 1.1819763605177557e-09, + "loss": 0.0, + "num_input_tokens_seen": 136013352, + "step": 201815 + }, + { + "epoch": 4.930496176678963, + "grad_norm": 0.000558437081053853, + "learning_rate": 1.1778349784669073e-09, + "loss": 0.0, + "num_input_tokens_seen": 136017064, + "step": 201820 + }, + { + "epoch": 4.93061832751081, + "grad_norm": 0.0009660838986746967, + "learning_rate": 1.1737008601194754e-09, + "loss": 0.0, + "num_input_tokens_seen": 136020648, + "step": 201825 + }, + { + "epoch": 4.930740478342657, + "grad_norm": 8.856524073053151e-05, + "learning_rate": 1.169574005505547e-09, + "loss": 0.0, + "num_input_tokens_seen": 136024168, + "step": 201830 + }, + { + "epoch": 4.930862629174505, + "grad_norm": 0.00048212928231805563, + "learning_rate": 1.1654544146550982e-09, + "loss": 0.0, + "num_input_tokens_seen": 136027432, + "step": 201835 + }, + { + "epoch": 4.9309847800063515, + "grad_norm": 0.004406995605677366, + "learning_rate": 1.161342087598105e-09, + "loss": 0.0, + "num_input_tokens_seen": 136030952, + "step": 201840 + }, + { + "epoch": 4.931106930838199, + "grad_norm": 0.0005184956826269627, + "learning_rate": 1.1572370243645434e-09, + "loss": 0.0, + "num_input_tokens_seen": 136035048, + "step": 201845 + }, + { + "epoch": 4.931229081670046, + "grad_norm": 0.00019828364020213485, + "learning_rate": 1.1531392249841675e-09, + "loss": 0.0, + "num_input_tokens_seen": 136038632, + "step": 201850 + }, + { + "epoch": 4.9313512325018936, + "grad_norm": 0.0024534957483410835, + "learning_rate": 1.1490486894868422e-09, + "loss": 0.0, + "num_input_tokens_seen": 136042088, + "step": 201855 + }, + { + "epoch": 4.93147338333374, + "grad_norm": 0.00020765556837432086, + "learning_rate": 1.1449654179022105e-09, + "loss": 0.0, + "num_input_tokens_seen": 136045416, + "step": 201860 + }, + { + "epoch": 4.931595534165588, + "grad_norm": 0.00010033969738287851, + "learning_rate": 1.1408894102601374e-09, + "loss": 0.0, + "num_input_tokens_seen": 136048552, + "step": 201865 + }, + { + "epoch": 4.931717684997435, + "grad_norm": 0.0007195353973656893, + "learning_rate": 1.1368206665901548e-09, + "loss": 0.0, + "num_input_tokens_seen": 136052200, + "step": 201870 + }, + { + "epoch": 4.931839835829282, + "grad_norm": 0.02347908727824688, + "learning_rate": 1.1327591869219055e-09, + "loss": 0.0, + "num_input_tokens_seen": 136055720, + "step": 201875 + }, + { + "epoch": 4.931961986661129, + "grad_norm": 0.006182366982102394, + "learning_rate": 1.1287049712849217e-09, + "loss": 0.0, + "num_input_tokens_seen": 136058728, + "step": 201880 + }, + { + "epoch": 4.932084137492977, + "grad_norm": 7.819590246072039e-05, + "learning_rate": 1.1246580197086242e-09, + "loss": 0.0, + "num_input_tokens_seen": 136061992, + "step": 201885 + }, + { + "epoch": 4.932206288324823, + "grad_norm": 0.0008825138211250305, + "learning_rate": 1.120618332222434e-09, + "loss": 0.0, + "num_input_tokens_seen": 136065192, + "step": 201890 + }, + { + "epoch": 4.93232843915667, + "grad_norm": 0.00010310118523193523, + "learning_rate": 1.1165859088558826e-09, + "loss": 0.0, + "num_input_tokens_seen": 136068584, + "step": 201895 + }, + { + "epoch": 4.932450589988518, + "grad_norm": 0.006327376700937748, + "learning_rate": 1.1125607496380584e-09, + "loss": 0.0, + "num_input_tokens_seen": 136071848, + "step": 201900 + }, + { + "epoch": 4.9325727408203655, + "grad_norm": 0.00017610739450901747, + "learning_rate": 1.108542854598382e-09, + "loss": 0.0, + "num_input_tokens_seen": 136075304, + "step": 201905 + }, + { + "epoch": 4.932694891652212, + "grad_norm": 5.935440185567131e-06, + "learning_rate": 1.1045322237660527e-09, + "loss": 0.0, + "num_input_tokens_seen": 136078312, + "step": 201910 + }, + { + "epoch": 4.932817042484059, + "grad_norm": 0.0005160932778380811, + "learning_rate": 1.1005288571702687e-09, + "loss": 0.0, + "num_input_tokens_seen": 136081896, + "step": 201915 + }, + { + "epoch": 4.932939193315907, + "grad_norm": 0.0001272395602427423, + "learning_rate": 1.0965327548401183e-09, + "loss": 0.0, + "num_input_tokens_seen": 136085480, + "step": 201920 + }, + { + "epoch": 4.933061344147753, + "grad_norm": 0.0070088147185742855, + "learning_rate": 1.092543916804689e-09, + "loss": 0.0, + "num_input_tokens_seen": 136089128, + "step": 201925 + }, + { + "epoch": 4.933183494979601, + "grad_norm": 2.4680461137904786e-05, + "learning_rate": 1.088562343092847e-09, + "loss": 0.0, + "num_input_tokens_seen": 136092584, + "step": 201930 + }, + { + "epoch": 4.933305645811448, + "grad_norm": 0.0004222989082336426, + "learning_rate": 1.084588033733791e-09, + "loss": 0.0, + "num_input_tokens_seen": 136095976, + "step": 201935 + }, + { + "epoch": 4.933427796643295, + "grad_norm": 7.98075197963044e-05, + "learning_rate": 1.0806209887561646e-09, + "loss": 0.0, + "num_input_tokens_seen": 136099624, + "step": 201940 + }, + { + "epoch": 4.933549947475142, + "grad_norm": 0.0002973111695609987, + "learning_rate": 1.0766612081889448e-09, + "loss": 0.0, + "num_input_tokens_seen": 136103272, + "step": 201945 + }, + { + "epoch": 4.93367209830699, + "grad_norm": 0.015840444713830948, + "learning_rate": 1.0727086920609973e-09, + "loss": 0.0, + "num_input_tokens_seen": 136106472, + "step": 201950 + }, + { + "epoch": 4.9337942491388365, + "grad_norm": 0.0004685459425672889, + "learning_rate": 1.068763440400966e-09, + "loss": 0.0, + "num_input_tokens_seen": 136109864, + "step": 201955 + }, + { + "epoch": 4.933916399970684, + "grad_norm": 2.5634884877945296e-05, + "learning_rate": 1.0648254532376055e-09, + "loss": 0.0, + "num_input_tokens_seen": 136113192, + "step": 201960 + }, + { + "epoch": 4.934038550802531, + "grad_norm": 2.3012027668301016e-05, + "learning_rate": 1.0608947305994487e-09, + "loss": 0.0, + "num_input_tokens_seen": 136116456, + "step": 201965 + }, + { + "epoch": 4.9341607016343785, + "grad_norm": 0.0007491564028896391, + "learning_rate": 1.0569712725151392e-09, + "loss": 0.0, + "num_input_tokens_seen": 136119848, + "step": 201970 + }, + { + "epoch": 4.934282852466225, + "grad_norm": 0.0005802642554044724, + "learning_rate": 1.0530550790132098e-09, + "loss": 0.0, + "num_input_tokens_seen": 136122664, + "step": 201975 + }, + { + "epoch": 4.934405003298073, + "grad_norm": 6.311324978014454e-05, + "learning_rate": 1.0491461501221932e-09, + "loss": 0.0279, + "num_input_tokens_seen": 136126056, + "step": 201980 + }, + { + "epoch": 4.93452715412992, + "grad_norm": 2.978726479341276e-05, + "learning_rate": 1.0452444858705113e-09, + "loss": 0.0, + "num_input_tokens_seen": 136129832, + "step": 201985 + }, + { + "epoch": 4.934649304961766, + "grad_norm": 0.002146832412108779, + "learning_rate": 1.0413500862864743e-09, + "loss": 0.0, + "num_input_tokens_seen": 136132968, + "step": 201990 + }, + { + "epoch": 4.934771455793614, + "grad_norm": 0.0019067926332354546, + "learning_rate": 1.0374629513983935e-09, + "loss": 0.0, + "num_input_tokens_seen": 136136488, + "step": 201995 + }, + { + "epoch": 4.934893606625462, + "grad_norm": 2.646358552738093e-05, + "learning_rate": 1.0335830812345792e-09, + "loss": 0.0, + "num_input_tokens_seen": 136140136, + "step": 202000 + }, + { + "epoch": 4.935015757457308, + "grad_norm": 0.0009238035418093204, + "learning_rate": 1.0297104758232311e-09, + "loss": 0.0, + "num_input_tokens_seen": 136143272, + "step": 202005 + }, + { + "epoch": 4.935137908289155, + "grad_norm": 0.0005739349289797246, + "learning_rate": 1.0258451351925491e-09, + "loss": 0.0, + "num_input_tokens_seen": 136146408, + "step": 202010 + }, + { + "epoch": 4.935260059121003, + "grad_norm": 4.488255399337504e-06, + "learning_rate": 1.0219870593706215e-09, + "loss": 0.0, + "num_input_tokens_seen": 136149480, + "step": 202015 + }, + { + "epoch": 4.9353822099528495, + "grad_norm": 0.006313847843557596, + "learning_rate": 1.0181362483854262e-09, + "loss": 0.0, + "num_input_tokens_seen": 136152616, + "step": 202020 + }, + { + "epoch": 4.935504360784697, + "grad_norm": 0.00014504387218039483, + "learning_rate": 1.0142927022650516e-09, + "loss": 0.0, + "num_input_tokens_seen": 136155688, + "step": 202025 + }, + { + "epoch": 4.935626511616544, + "grad_norm": 0.00022232808987610042, + "learning_rate": 1.0104564210374756e-09, + "loss": 0.0, + "num_input_tokens_seen": 136159016, + "step": 202030 + }, + { + "epoch": 4.9357486624483915, + "grad_norm": 0.0006274926709011197, + "learning_rate": 1.0066274047305645e-09, + "loss": 0.0, + "num_input_tokens_seen": 136162600, + "step": 202035 + }, + { + "epoch": 4.935870813280238, + "grad_norm": 0.00015691977750975639, + "learning_rate": 1.0028056533720742e-09, + "loss": 0.0, + "num_input_tokens_seen": 136165864, + "step": 202040 + }, + { + "epoch": 4.935992964112086, + "grad_norm": 0.0005563534214161336, + "learning_rate": 9.98991166989982e-10, + "loss": 0.0, + "num_input_tokens_seen": 136169128, + "step": 202045 + }, + { + "epoch": 4.936115114943933, + "grad_norm": 0.0008726372034288943, + "learning_rate": 9.951839456119327e-10, + "loss": 0.0, + "num_input_tokens_seen": 136173224, + "step": 202050 + }, + { + "epoch": 4.93623726577578, + "grad_norm": 0.0030929571948945522, + "learning_rate": 9.913839892654596e-10, + "loss": 0.0, + "num_input_tokens_seen": 136176936, + "step": 202055 + }, + { + "epoch": 4.936359416607627, + "grad_norm": 0.00030830607283860445, + "learning_rate": 9.875912979784296e-10, + "loss": 0.0, + "num_input_tokens_seen": 136180392, + "step": 202060 + }, + { + "epoch": 4.936481567439475, + "grad_norm": 3.1983137887436897e-05, + "learning_rate": 9.83805871778376e-10, + "loss": 0.0, + "num_input_tokens_seen": 136183400, + "step": 202065 + }, + { + "epoch": 4.936603718271321, + "grad_norm": 0.00014813434972893447, + "learning_rate": 9.800277106927213e-10, + "loss": 0.0, + "num_input_tokens_seen": 136186792, + "step": 202070 + }, + { + "epoch": 4.936725869103169, + "grad_norm": 0.0002679832396097481, + "learning_rate": 9.762568147491102e-10, + "loss": 0.0, + "num_input_tokens_seen": 136190376, + "step": 202075 + }, + { + "epoch": 4.936848019935016, + "grad_norm": 4.157804505666718e-05, + "learning_rate": 9.72493183974743e-10, + "loss": 0.0, + "num_input_tokens_seen": 136193384, + "step": 202080 + }, + { + "epoch": 4.9369701707668625, + "grad_norm": 0.0003346680023241788, + "learning_rate": 9.687368183972644e-10, + "loss": 0.0, + "num_input_tokens_seen": 136196840, + "step": 202085 + }, + { + "epoch": 4.93709232159871, + "grad_norm": 1.67577982210787e-05, + "learning_rate": 9.649877180437637e-10, + "loss": 0.0, + "num_input_tokens_seen": 136200360, + "step": 202090 + }, + { + "epoch": 4.937214472430557, + "grad_norm": 0.009562824852764606, + "learning_rate": 9.612458829415527e-10, + "loss": 0.0, + "num_input_tokens_seen": 136203944, + "step": 202095 + }, + { + "epoch": 4.9373366232624045, + "grad_norm": 0.001202791347168386, + "learning_rate": 9.575113131178315e-10, + "loss": 0.0, + "num_input_tokens_seen": 136207528, + "step": 202100 + }, + { + "epoch": 4.937458774094251, + "grad_norm": 0.0003408331540413201, + "learning_rate": 9.537840085998006e-10, + "loss": 0.041, + "num_input_tokens_seen": 136210472, + "step": 202105 + }, + { + "epoch": 4.937580924926099, + "grad_norm": 0.00019882139167748392, + "learning_rate": 9.500639694146606e-10, + "loss": 0.0, + "num_input_tokens_seen": 136213544, + "step": 202110 + }, + { + "epoch": 4.937703075757946, + "grad_norm": 1.4465913409367204e-05, + "learning_rate": 9.46351195589279e-10, + "loss": 0.0, + "num_input_tokens_seen": 136216616, + "step": 202115 + }, + { + "epoch": 4.937825226589793, + "grad_norm": 7.180091051850468e-05, + "learning_rate": 9.426456871508559e-10, + "loss": 0.0, + "num_input_tokens_seen": 136220008, + "step": 202120 + }, + { + "epoch": 4.93794737742164, + "grad_norm": 1.3721350114792585e-05, + "learning_rate": 9.38947444126148e-10, + "loss": 0.0, + "num_input_tokens_seen": 136223080, + "step": 202125 + }, + { + "epoch": 4.938069528253488, + "grad_norm": 0.0034140758216381073, + "learning_rate": 9.352564665421337e-10, + "loss": 0.0, + "num_input_tokens_seen": 136226664, + "step": 202130 + }, + { + "epoch": 4.938191679085334, + "grad_norm": 0.0001253626251127571, + "learning_rate": 9.315727544256801e-10, + "loss": 0.0, + "num_input_tokens_seen": 136229928, + "step": 202135 + }, + { + "epoch": 4.938313829917182, + "grad_norm": 0.0001573254558024928, + "learning_rate": 9.27896307803433e-10, + "loss": 0.0, + "num_input_tokens_seen": 136232936, + "step": 202140 + }, + { + "epoch": 4.938435980749029, + "grad_norm": 0.00011776157043641433, + "learning_rate": 9.242271267023705e-10, + "loss": 0.0, + "num_input_tokens_seen": 136236264, + "step": 202145 + }, + { + "epoch": 4.938558131580876, + "grad_norm": 0.0010260837152600288, + "learning_rate": 9.20565211149027e-10, + "loss": 0.0002, + "num_input_tokens_seen": 136239400, + "step": 202150 + }, + { + "epoch": 4.938680282412723, + "grad_norm": 9.85598744591698e-05, + "learning_rate": 9.169105611699369e-10, + "loss": 0.0, + "num_input_tokens_seen": 136242536, + "step": 202155 + }, + { + "epoch": 4.93880243324457, + "grad_norm": 3.4223503462271765e-05, + "learning_rate": 9.132631767919674e-10, + "loss": 0.0, + "num_input_tokens_seen": 136245736, + "step": 202160 + }, + { + "epoch": 4.9389245840764175, + "grad_norm": 9.328880878456403e-06, + "learning_rate": 9.096230580413201e-10, + "loss": 0.0, + "num_input_tokens_seen": 136249064, + "step": 202165 + }, + { + "epoch": 4.939046734908265, + "grad_norm": 0.0008164794999174774, + "learning_rate": 9.05990204944751e-10, + "loss": 0.0, + "num_input_tokens_seen": 136252456, + "step": 202170 + }, + { + "epoch": 4.939168885740112, + "grad_norm": 0.00014976067177485675, + "learning_rate": 9.023646175284616e-10, + "loss": 0.0, + "num_input_tokens_seen": 136256104, + "step": 202175 + }, + { + "epoch": 4.939291036571959, + "grad_norm": 2.89030449494021e-05, + "learning_rate": 8.987462958189862e-10, + "loss": 0.0, + "num_input_tokens_seen": 136259432, + "step": 202180 + }, + { + "epoch": 4.939413187403806, + "grad_norm": 0.0013129940489307046, + "learning_rate": 8.95135239842415e-10, + "loss": 0.0, + "num_input_tokens_seen": 136262888, + "step": 202185 + }, + { + "epoch": 4.939535338235653, + "grad_norm": 0.01688556745648384, + "learning_rate": 8.915314496252824e-10, + "loss": 0.0, + "num_input_tokens_seen": 136265896, + "step": 202190 + }, + { + "epoch": 4.939657489067501, + "grad_norm": 0.0009134452557191253, + "learning_rate": 8.879349251935675e-10, + "loss": 0.0, + "num_input_tokens_seen": 136269288, + "step": 202195 + }, + { + "epoch": 4.939779639899347, + "grad_norm": 0.0002523370203562081, + "learning_rate": 8.843456665735827e-10, + "loss": 0.0, + "num_input_tokens_seen": 136272552, + "step": 202200 + }, + { + "epoch": 4.939901790731195, + "grad_norm": 0.00024008109176065773, + "learning_rate": 8.807636737913071e-10, + "loss": 0.0, + "num_input_tokens_seen": 136275688, + "step": 202205 + }, + { + "epoch": 4.940023941563042, + "grad_norm": 0.0004295228864066303, + "learning_rate": 8.771889468728311e-10, + "loss": 0.0, + "num_input_tokens_seen": 136278632, + "step": 202210 + }, + { + "epoch": 4.940146092394889, + "grad_norm": 3.370600097696297e-05, + "learning_rate": 8.736214858442448e-10, + "loss": 0.0, + "num_input_tokens_seen": 136282152, + "step": 202215 + }, + { + "epoch": 4.940268243226736, + "grad_norm": 7.857001764932647e-05, + "learning_rate": 8.700612907314164e-10, + "loss": 0.0, + "num_input_tokens_seen": 136285160, + "step": 202220 + }, + { + "epoch": 4.940390394058584, + "grad_norm": 4.169629391981289e-05, + "learning_rate": 8.665083615602142e-10, + "loss": 0.0576, + "num_input_tokens_seen": 136288424, + "step": 202225 + }, + { + "epoch": 4.940512544890431, + "grad_norm": 5.379470530897379e-05, + "learning_rate": 8.629626983565064e-10, + "loss": 0.1, + "num_input_tokens_seen": 136291560, + "step": 202230 + }, + { + "epoch": 4.940634695722278, + "grad_norm": 0.0005903999553993344, + "learning_rate": 8.5942430114605e-10, + "loss": 0.0, + "num_input_tokens_seen": 136294376, + "step": 202235 + }, + { + "epoch": 4.940756846554125, + "grad_norm": 1.558191615913529e-05, + "learning_rate": 8.558931699546023e-10, + "loss": 0.0, + "num_input_tokens_seen": 136297384, + "step": 202240 + }, + { + "epoch": 4.940878997385973, + "grad_norm": 0.0004315730766393244, + "learning_rate": 8.523693048078096e-10, + "loss": 0.0, + "num_input_tokens_seen": 136300968, + "step": 202245 + }, + { + "epoch": 4.941001148217819, + "grad_norm": 0.005034905392676592, + "learning_rate": 8.488527057313177e-10, + "loss": 0.0, + "num_input_tokens_seen": 136304552, + "step": 202250 + }, + { + "epoch": 4.941123299049666, + "grad_norm": 2.2010641259839758e-05, + "learning_rate": 8.45343372750773e-10, + "loss": 0.0, + "num_input_tokens_seen": 136308520, + "step": 202255 + }, + { + "epoch": 4.941245449881514, + "grad_norm": 0.00024630461120978, + "learning_rate": 8.418413058915997e-10, + "loss": 0.0, + "num_input_tokens_seen": 136312104, + "step": 202260 + }, + { + "epoch": 4.941367600713361, + "grad_norm": 5.707953096134588e-05, + "learning_rate": 8.383465051792216e-10, + "loss": 0.0002, + "num_input_tokens_seen": 136315304, + "step": 202265 + }, + { + "epoch": 4.941489751545208, + "grad_norm": 0.0013829541858285666, + "learning_rate": 8.34858970639285e-10, + "loss": 0.0, + "num_input_tokens_seen": 136318312, + "step": 202270 + }, + { + "epoch": 4.941611902377055, + "grad_norm": 0.0006188564002513885, + "learning_rate": 8.31378702296881e-10, + "loss": 0.0376, + "num_input_tokens_seen": 136322024, + "step": 202275 + }, + { + "epoch": 4.9417340532089025, + "grad_norm": 0.0002772464358713478, + "learning_rate": 8.279057001774336e-10, + "loss": 0.0, + "num_input_tokens_seen": 136325160, + "step": 202280 + }, + { + "epoch": 4.941856204040749, + "grad_norm": 0.00021161598851904273, + "learning_rate": 8.244399643062561e-10, + "loss": 0.0001, + "num_input_tokens_seen": 136328296, + "step": 202285 + }, + { + "epoch": 4.941978354872597, + "grad_norm": 2.8740387278958224e-05, + "learning_rate": 8.209814947084392e-10, + "loss": 0.0, + "num_input_tokens_seen": 136332584, + "step": 202290 + }, + { + "epoch": 4.942100505704444, + "grad_norm": 0.00011233813711442053, + "learning_rate": 8.175302914092963e-10, + "loss": 0.0, + "num_input_tokens_seen": 136335912, + "step": 202295 + }, + { + "epoch": 4.942222656536291, + "grad_norm": 4.50963998446241e-05, + "learning_rate": 8.140863544336963e-10, + "loss": 0.0, + "num_input_tokens_seen": 136338856, + "step": 202300 + }, + { + "epoch": 4.942344807368138, + "grad_norm": 0.04743233323097229, + "learning_rate": 8.106496838069521e-10, + "loss": 0.0, + "num_input_tokens_seen": 136341928, + "step": 202305 + }, + { + "epoch": 4.942466958199986, + "grad_norm": 0.00042162369936704636, + "learning_rate": 8.072202795538219e-10, + "loss": 0.0, + "num_input_tokens_seen": 136345256, + "step": 202310 + }, + { + "epoch": 4.942589109031832, + "grad_norm": 2.6135880034416914e-05, + "learning_rate": 8.037981416992857e-10, + "loss": 0.0, + "num_input_tokens_seen": 136348456, + "step": 202315 + }, + { + "epoch": 4.94271125986368, + "grad_norm": 0.0002569279167801142, + "learning_rate": 8.003832702683233e-10, + "loss": 0.0, + "num_input_tokens_seen": 136351720, + "step": 202320 + }, + { + "epoch": 4.942833410695527, + "grad_norm": 0.0001678541739238426, + "learning_rate": 7.969756652858039e-10, + "loss": 0.0, + "num_input_tokens_seen": 136355752, + "step": 202325 + }, + { + "epoch": 4.942955561527374, + "grad_norm": 4.449135303730145e-05, + "learning_rate": 7.935753267763745e-10, + "loss": 0.0, + "num_input_tokens_seen": 136359016, + "step": 202330 + }, + { + "epoch": 4.943077712359221, + "grad_norm": 2.2052936401451007e-05, + "learning_rate": 7.901822547647929e-10, + "loss": 0.0855, + "num_input_tokens_seen": 136361704, + "step": 202335 + }, + { + "epoch": 4.943199863191069, + "grad_norm": 0.0033455921802669764, + "learning_rate": 7.867964492758172e-10, + "loss": 0.0, + "num_input_tokens_seen": 136365096, + "step": 202340 + }, + { + "epoch": 4.9433220140229155, + "grad_norm": 0.0006569712422788143, + "learning_rate": 7.834179103339833e-10, + "loss": 0.0, + "num_input_tokens_seen": 136368744, + "step": 202345 + }, + { + "epoch": 4.943444164854762, + "grad_norm": 101.92816162109375, + "learning_rate": 7.800466379638271e-10, + "loss": 0.0501, + "num_input_tokens_seen": 136372392, + "step": 202350 + }, + { + "epoch": 4.94356631568661, + "grad_norm": 0.00010263787407893687, + "learning_rate": 7.766826321899955e-10, + "loss": 0.0, + "num_input_tokens_seen": 136376168, + "step": 202355 + }, + { + "epoch": 4.943688466518457, + "grad_norm": 0.002447111066430807, + "learning_rate": 7.733258930369135e-10, + "loss": 0.0, + "num_input_tokens_seen": 136379368, + "step": 202360 + }, + { + "epoch": 4.943810617350304, + "grad_norm": 0.00013913783186580986, + "learning_rate": 7.69976420528895e-10, + "loss": 0.0, + "num_input_tokens_seen": 136382888, + "step": 202365 + }, + { + "epoch": 4.943932768182151, + "grad_norm": 0.0009353599161840975, + "learning_rate": 7.666342146904759e-10, + "loss": 0.0, + "num_input_tokens_seen": 136386472, + "step": 202370 + }, + { + "epoch": 4.944054919013999, + "grad_norm": 0.0001008975159493275, + "learning_rate": 7.632992755457479e-10, + "loss": 0.0, + "num_input_tokens_seen": 136389864, + "step": 202375 + }, + { + "epoch": 4.944177069845845, + "grad_norm": 0.016027770936489105, + "learning_rate": 7.599716031191361e-10, + "loss": 0.0, + "num_input_tokens_seen": 136393512, + "step": 202380 + }, + { + "epoch": 4.944299220677693, + "grad_norm": 0.0003744078567251563, + "learning_rate": 7.566511974347322e-10, + "loss": 0.0, + "num_input_tokens_seen": 136396712, + "step": 202385 + }, + { + "epoch": 4.94442137150954, + "grad_norm": 9.113190026255324e-05, + "learning_rate": 7.533380585167393e-10, + "loss": 0.0001, + "num_input_tokens_seen": 136400104, + "step": 202390 + }, + { + "epoch": 4.944543522341387, + "grad_norm": 0.00021912145894020796, + "learning_rate": 7.50032186389249e-10, + "loss": 0.0, + "num_input_tokens_seen": 136403432, + "step": 202395 + }, + { + "epoch": 4.944665673173234, + "grad_norm": 0.00018315493070986122, + "learning_rate": 7.467335810762421e-10, + "loss": 0.0, + "num_input_tokens_seen": 136406888, + "step": 202400 + }, + { + "epoch": 4.944787824005082, + "grad_norm": 5.3936601034365594e-06, + "learning_rate": 7.434422426018105e-10, + "loss": 0.0, + "num_input_tokens_seen": 136410280, + "step": 202405 + }, + { + "epoch": 4.9449099748369285, + "grad_norm": 0.0018063209718093276, + "learning_rate": 7.401581709898241e-10, + "loss": 0.0, + "num_input_tokens_seen": 136413544, + "step": 202410 + }, + { + "epoch": 4.945032125668776, + "grad_norm": 0.0017041267128661275, + "learning_rate": 7.368813662641527e-10, + "loss": 0.0, + "num_input_tokens_seen": 136416872, + "step": 202415 + }, + { + "epoch": 4.945154276500623, + "grad_norm": 0.0022793838288635015, + "learning_rate": 7.336118284486659e-10, + "loss": 0.0, + "num_input_tokens_seen": 136420008, + "step": 202420 + }, + { + "epoch": 4.94527642733247, + "grad_norm": 0.005071424413472414, + "learning_rate": 7.303495575671226e-10, + "loss": 0.0, + "num_input_tokens_seen": 136423656, + "step": 202425 + }, + { + "epoch": 4.945398578164317, + "grad_norm": 0.000549265940207988, + "learning_rate": 7.270945536431705e-10, + "loss": 0.0, + "num_input_tokens_seen": 136427048, + "step": 202430 + }, + { + "epoch": 4.945520728996165, + "grad_norm": 6.238095375010744e-05, + "learning_rate": 7.238468167006795e-10, + "loss": 0.0, + "num_input_tokens_seen": 136432552, + "step": 202435 + }, + { + "epoch": 4.945642879828012, + "grad_norm": 4.154796260991134e-05, + "learning_rate": 7.206063467630752e-10, + "loss": 0.0, + "num_input_tokens_seen": 136435816, + "step": 202440 + }, + { + "epoch": 4.945765030659858, + "grad_norm": 0.0002437532675685361, + "learning_rate": 7.173731438540054e-10, + "loss": 0.0, + "num_input_tokens_seen": 136439080, + "step": 202445 + }, + { + "epoch": 4.945887181491706, + "grad_norm": 0.00020424068497959524, + "learning_rate": 7.141472079970068e-10, + "loss": 0.0, + "num_input_tokens_seen": 136442024, + "step": 202450 + }, + { + "epoch": 4.946009332323553, + "grad_norm": 9.218160266755149e-05, + "learning_rate": 7.109285392155051e-10, + "loss": 0.0, + "num_input_tokens_seen": 136445352, + "step": 202455 + }, + { + "epoch": 4.9461314831554, + "grad_norm": 9.915697592077777e-05, + "learning_rate": 7.077171375329261e-10, + "loss": 0.0, + "num_input_tokens_seen": 136448616, + "step": 202460 + }, + { + "epoch": 4.946253633987247, + "grad_norm": 0.000816051266156137, + "learning_rate": 7.045130029725843e-10, + "loss": 0.0, + "num_input_tokens_seen": 136451816, + "step": 202465 + }, + { + "epoch": 4.946375784819095, + "grad_norm": 0.09224054962396622, + "learning_rate": 7.013161355577945e-10, + "loss": 0.0, + "num_input_tokens_seen": 136455720, + "step": 202470 + }, + { + "epoch": 4.9464979356509415, + "grad_norm": 0.0026561652775853872, + "learning_rate": 6.981265353117605e-10, + "loss": 0.0, + "num_input_tokens_seen": 136459304, + "step": 202475 + }, + { + "epoch": 4.946620086482789, + "grad_norm": 0.032723743468523026, + "learning_rate": 6.949442022577967e-10, + "loss": 0.0, + "num_input_tokens_seen": 136462632, + "step": 202480 + }, + { + "epoch": 4.946742237314636, + "grad_norm": 9.901898010866717e-06, + "learning_rate": 6.917691364188849e-10, + "loss": 0.0, + "num_input_tokens_seen": 136466152, + "step": 202485 + }, + { + "epoch": 4.9468643881464835, + "grad_norm": 0.44688522815704346, + "learning_rate": 6.886013378183397e-10, + "loss": 0.0001, + "num_input_tokens_seen": 136469352, + "step": 202490 + }, + { + "epoch": 4.94698653897833, + "grad_norm": 2.6838359190151095e-05, + "learning_rate": 6.854408064790318e-10, + "loss": 0.0, + "num_input_tokens_seen": 136473512, + "step": 202495 + }, + { + "epoch": 4.947108689810178, + "grad_norm": 0.0001793210831237957, + "learning_rate": 6.822875424239427e-10, + "loss": 0.0, + "num_input_tokens_seen": 136477416, + "step": 202500 + }, + { + "epoch": 4.947230840642025, + "grad_norm": 6.435633258661255e-05, + "learning_rate": 6.79141545676054e-10, + "loss": 0.0, + "num_input_tokens_seen": 136480616, + "step": 202505 + }, + { + "epoch": 4.947352991473872, + "grad_norm": 0.00011103512952104211, + "learning_rate": 6.760028162582365e-10, + "loss": 0.0, + "num_input_tokens_seen": 136483752, + "step": 202510 + }, + { + "epoch": 4.947475142305719, + "grad_norm": 0.00047665010788477957, + "learning_rate": 6.728713541933606e-10, + "loss": 0.0, + "num_input_tokens_seen": 136486760, + "step": 202515 + }, + { + "epoch": 4.947597293137566, + "grad_norm": 0.00012407048780005425, + "learning_rate": 6.697471595040749e-10, + "loss": 0.0, + "num_input_tokens_seen": 136490088, + "step": 202520 + }, + { + "epoch": 4.947719443969413, + "grad_norm": 9.188240073854104e-05, + "learning_rate": 6.6663023221325e-10, + "loss": 0.0, + "num_input_tokens_seen": 136493928, + "step": 202525 + }, + { + "epoch": 4.947841594801261, + "grad_norm": 0.0026419502682983875, + "learning_rate": 6.635205723434234e-10, + "loss": 0.0, + "num_input_tokens_seen": 136497832, + "step": 202530 + }, + { + "epoch": 4.947963745633108, + "grad_norm": 2.2201460524229333e-05, + "learning_rate": 6.604181799172437e-10, + "loss": 0.0, + "num_input_tokens_seen": 136500968, + "step": 202535 + }, + { + "epoch": 4.948085896464955, + "grad_norm": 0.0008757863542996347, + "learning_rate": 6.573230549573594e-10, + "loss": 0.0, + "num_input_tokens_seen": 136504296, + "step": 202540 + }, + { + "epoch": 4.948208047296802, + "grad_norm": 0.0021094870753586292, + "learning_rate": 6.54235197486197e-10, + "loss": 0.0, + "num_input_tokens_seen": 136508008, + "step": 202545 + }, + { + "epoch": 4.948330198128649, + "grad_norm": 0.00016692510689608753, + "learning_rate": 6.511546075261831e-10, + "loss": 0.0, + "num_input_tokens_seen": 136511656, + "step": 202550 + }, + { + "epoch": 4.948452348960497, + "grad_norm": 0.0003622740041464567, + "learning_rate": 6.480812850997442e-10, + "loss": 0.0, + "num_input_tokens_seen": 136514856, + "step": 202555 + }, + { + "epoch": 4.948574499792343, + "grad_norm": 0.00023636213154532015, + "learning_rate": 6.450152302293066e-10, + "loss": 0.0, + "num_input_tokens_seen": 136518888, + "step": 202560 + }, + { + "epoch": 4.948696650624191, + "grad_norm": 0.0003470900119282305, + "learning_rate": 6.41956442937186e-10, + "loss": 0.0, + "num_input_tokens_seen": 136522344, + "step": 202565 + }, + { + "epoch": 4.948818801456038, + "grad_norm": 0.005127554759383202, + "learning_rate": 6.389049232454757e-10, + "loss": 0.0, + "num_input_tokens_seen": 136525544, + "step": 202570 + }, + { + "epoch": 4.948940952287885, + "grad_norm": 6.562341877724975e-05, + "learning_rate": 6.358606711763803e-10, + "loss": 0.0, + "num_input_tokens_seen": 136528872, + "step": 202575 + }, + { + "epoch": 4.949063103119732, + "grad_norm": 0.0006907072965987027, + "learning_rate": 6.328236867522152e-10, + "loss": 0.0, + "num_input_tokens_seen": 136532264, + "step": 202580 + }, + { + "epoch": 4.94918525395158, + "grad_norm": 21.064266204833984, + "learning_rate": 6.297939699948518e-10, + "loss": 0.0615, + "num_input_tokens_seen": 136535272, + "step": 202585 + }, + { + "epoch": 4.9493074047834265, + "grad_norm": 6.251333252293989e-05, + "learning_rate": 6.267715209264945e-10, + "loss": 0.0, + "num_input_tokens_seen": 136538664, + "step": 202590 + }, + { + "epoch": 4.949429555615274, + "grad_norm": 0.0009637015173211694, + "learning_rate": 6.237563395690149e-10, + "loss": 0.0, + "num_input_tokens_seen": 136542120, + "step": 202595 + }, + { + "epoch": 4.949551706447121, + "grad_norm": 0.0023319462779909372, + "learning_rate": 6.207484259443952e-10, + "loss": 0.0, + "num_input_tokens_seen": 136545768, + "step": 202600 + }, + { + "epoch": 4.9496738572789685, + "grad_norm": 0.2298898547887802, + "learning_rate": 6.177477800745067e-10, + "loss": 0.0, + "num_input_tokens_seen": 136548968, + "step": 202605 + }, + { + "epoch": 4.949796008110815, + "grad_norm": 0.0007136244676075876, + "learning_rate": 6.147544019812212e-10, + "loss": 0.0, + "num_input_tokens_seen": 136552168, + "step": 202610 + }, + { + "epoch": 4.949918158942662, + "grad_norm": 0.00026461650850251317, + "learning_rate": 6.117682916861877e-10, + "loss": 0.0, + "num_input_tokens_seen": 136555432, + "step": 202615 + }, + { + "epoch": 4.95004030977451, + "grad_norm": 8.239582530222833e-05, + "learning_rate": 6.087894492111667e-10, + "loss": 0.0, + "num_input_tokens_seen": 136558952, + "step": 202620 + }, + { + "epoch": 4.950162460606357, + "grad_norm": 0.00012572153354994953, + "learning_rate": 6.058178745778076e-10, + "loss": 0.0, + "num_input_tokens_seen": 136562216, + "step": 202625 + }, + { + "epoch": 4.950284611438204, + "grad_norm": 0.00047976983478292823, + "learning_rate": 6.028535678077595e-10, + "loss": 0.0, + "num_input_tokens_seen": 136565864, + "step": 202630 + }, + { + "epoch": 4.950406762270051, + "grad_norm": 0.004869820084422827, + "learning_rate": 5.998965289225611e-10, + "loss": 0.0, + "num_input_tokens_seen": 136569128, + "step": 202635 + }, + { + "epoch": 4.950528913101898, + "grad_norm": 4.379753227112815e-05, + "learning_rate": 5.969467579437504e-10, + "loss": 0.0, + "num_input_tokens_seen": 136572200, + "step": 202640 + }, + { + "epoch": 4.950651063933745, + "grad_norm": 1.0504995771043468e-05, + "learning_rate": 5.940042548927548e-10, + "loss": 0.0, + "num_input_tokens_seen": 136575592, + "step": 202645 + }, + { + "epoch": 4.950773214765593, + "grad_norm": 6.727211439283565e-05, + "learning_rate": 5.910690197908908e-10, + "loss": 0.0, + "num_input_tokens_seen": 136578920, + "step": 202650 + }, + { + "epoch": 4.9508953655974395, + "grad_norm": 4.358491423772648e-05, + "learning_rate": 5.881410526595854e-10, + "loss": 0.0, + "num_input_tokens_seen": 136581992, + "step": 202655 + }, + { + "epoch": 4.951017516429287, + "grad_norm": 0.00016454195429105312, + "learning_rate": 5.85220353520266e-10, + "loss": 0.0, + "num_input_tokens_seen": 136585576, + "step": 202660 + }, + { + "epoch": 4.951139667261134, + "grad_norm": 5.1774670282611623e-05, + "learning_rate": 5.823069223939159e-10, + "loss": 0.0, + "num_input_tokens_seen": 136589480, + "step": 202665 + }, + { + "epoch": 4.9512618180929815, + "grad_norm": 0.000128003244753927, + "learning_rate": 5.794007593018512e-10, + "loss": 0.0, + "num_input_tokens_seen": 136592808, + "step": 202670 + }, + { + "epoch": 4.951383968924828, + "grad_norm": 0.0002087876491714269, + "learning_rate": 5.765018642652775e-10, + "loss": 0.0, + "num_input_tokens_seen": 136596200, + "step": 202675 + }, + { + "epoch": 4.951506119756676, + "grad_norm": 0.001459281425923109, + "learning_rate": 5.736102373050666e-10, + "loss": 0.0, + "num_input_tokens_seen": 136599848, + "step": 202680 + }, + { + "epoch": 4.951628270588523, + "grad_norm": 5.585969483945519e-05, + "learning_rate": 5.707258784424241e-10, + "loss": 0.0, + "num_input_tokens_seen": 136603048, + "step": 202685 + }, + { + "epoch": 4.95175042142037, + "grad_norm": 0.0025811134837567806, + "learning_rate": 5.678487876983329e-10, + "loss": 0.0, + "num_input_tokens_seen": 136607080, + "step": 202690 + }, + { + "epoch": 4.951872572252217, + "grad_norm": 0.0004729589563794434, + "learning_rate": 5.649789650936654e-10, + "loss": 0.0, + "num_input_tokens_seen": 136610344, + "step": 202695 + }, + { + "epoch": 4.951994723084065, + "grad_norm": 0.00126547587569803, + "learning_rate": 5.621164106491827e-10, + "loss": 0.0, + "num_input_tokens_seen": 136613800, + "step": 202700 + }, + { + "epoch": 4.952116873915911, + "grad_norm": 0.00017267995281144977, + "learning_rate": 5.592611243858681e-10, + "loss": 0.0, + "num_input_tokens_seen": 136617064, + "step": 202705 + }, + { + "epoch": 4.952239024747758, + "grad_norm": 0.0011236423160880804, + "learning_rate": 5.564131063244826e-10, + "loss": 0.0, + "num_input_tokens_seen": 136620392, + "step": 202710 + }, + { + "epoch": 4.952361175579606, + "grad_norm": 0.0035760272294282913, + "learning_rate": 5.535723564855654e-10, + "loss": 0.0346, + "num_input_tokens_seen": 136623528, + "step": 202715 + }, + { + "epoch": 4.9524833264114525, + "grad_norm": 0.04028363898396492, + "learning_rate": 5.507388748899889e-10, + "loss": 0.0, + "num_input_tokens_seen": 136626728, + "step": 202720 + }, + { + "epoch": 4.9526054772433, + "grad_norm": 8.850402082316577e-05, + "learning_rate": 5.479126615581808e-10, + "loss": 0.0, + "num_input_tokens_seen": 136630120, + "step": 202725 + }, + { + "epoch": 4.952727628075147, + "grad_norm": 8.48881700221682e-06, + "learning_rate": 5.450937165109026e-10, + "loss": 0.0437, + "num_input_tokens_seen": 136633448, + "step": 202730 + }, + { + "epoch": 4.9528497789069945, + "grad_norm": 0.0008506285957992077, + "learning_rate": 5.422820397683603e-10, + "loss": 0.0, + "num_input_tokens_seen": 136636648, + "step": 202735 + }, + { + "epoch": 4.952971929738841, + "grad_norm": 0.0005138792330399156, + "learning_rate": 5.394776313512039e-10, + "loss": 0.0, + "num_input_tokens_seen": 136640424, + "step": 202740 + }, + { + "epoch": 4.953094080570689, + "grad_norm": 0.0001250112400157377, + "learning_rate": 5.366804912798617e-10, + "loss": 0.0, + "num_input_tokens_seen": 136644008, + "step": 202745 + }, + { + "epoch": 4.953216231402536, + "grad_norm": 0.0009678230853751302, + "learning_rate": 5.338906195745396e-10, + "loss": 0.0, + "num_input_tokens_seen": 136647528, + "step": 202750 + }, + { + "epoch": 4.953338382234383, + "grad_norm": 0.0006666900007985532, + "learning_rate": 5.311080162556658e-10, + "loss": 0.0, + "num_input_tokens_seen": 136651048, + "step": 202755 + }, + { + "epoch": 4.95346053306623, + "grad_norm": 0.0007440054323524237, + "learning_rate": 5.283326813433353e-10, + "loss": 0.0, + "num_input_tokens_seen": 136654568, + "step": 202760 + }, + { + "epoch": 4.953582683898078, + "grad_norm": 0.006778331473469734, + "learning_rate": 5.255646148577542e-10, + "loss": 0.0, + "num_input_tokens_seen": 136658152, + "step": 202765 + }, + { + "epoch": 4.953704834729924, + "grad_norm": 4.8525103920837864e-05, + "learning_rate": 5.228038168191284e-10, + "loss": 0.0, + "num_input_tokens_seen": 136661608, + "step": 202770 + }, + { + "epoch": 4.953826985561772, + "grad_norm": 0.0004143420956097543, + "learning_rate": 5.200502872475531e-10, + "loss": 0.0, + "num_input_tokens_seen": 136664744, + "step": 202775 + }, + { + "epoch": 4.953949136393619, + "grad_norm": 2.905308429035358e-05, + "learning_rate": 5.173040261629014e-10, + "loss": 0.0, + "num_input_tokens_seen": 136668584, + "step": 202780 + }, + { + "epoch": 4.9540712872254655, + "grad_norm": 0.0058879847638309, + "learning_rate": 5.145650335853791e-10, + "loss": 0.0, + "num_input_tokens_seen": 136671912, + "step": 202785 + }, + { + "epoch": 4.954193438057313, + "grad_norm": 0.00014649657532572746, + "learning_rate": 5.118333095346372e-10, + "loss": 0.0, + "num_input_tokens_seen": 136675368, + "step": 202790 + }, + { + "epoch": 4.954315588889161, + "grad_norm": 1.4093015124672092e-05, + "learning_rate": 5.091088540307708e-10, + "loss": 0.0, + "num_input_tokens_seen": 136678888, + "step": 202795 + }, + { + "epoch": 4.9544377397210075, + "grad_norm": 0.0002563645539339632, + "learning_rate": 5.06391667093431e-10, + "loss": 0.0, + "num_input_tokens_seen": 136682600, + "step": 202800 + }, + { + "epoch": 4.954559890552854, + "grad_norm": 0.0004114148614462465, + "learning_rate": 5.036817487424905e-10, + "loss": 0.0, + "num_input_tokens_seen": 136686184, + "step": 202805 + }, + { + "epoch": 4.954682041384702, + "grad_norm": 0.0030616470612585545, + "learning_rate": 5.009790989974893e-10, + "loss": 0.0, + "num_input_tokens_seen": 136689320, + "step": 202810 + }, + { + "epoch": 4.954804192216549, + "grad_norm": 0.00034004944609478116, + "learning_rate": 4.982837178783006e-10, + "loss": 0.0, + "num_input_tokens_seen": 136692584, + "step": 202815 + }, + { + "epoch": 4.954926343048396, + "grad_norm": 0.0015535084530711174, + "learning_rate": 4.955956054044641e-10, + "loss": 0.0001, + "num_input_tokens_seen": 136696104, + "step": 202820 + }, + { + "epoch": 4.955048493880243, + "grad_norm": 0.007306914310902357, + "learning_rate": 4.929147615954088e-10, + "loss": 0.0, + "num_input_tokens_seen": 136699496, + "step": 202825 + }, + { + "epoch": 4.955170644712091, + "grad_norm": 0.00029027010896243155, + "learning_rate": 4.902411864707856e-10, + "loss": 0.0, + "num_input_tokens_seen": 136703080, + "step": 202830 + }, + { + "epoch": 4.955292795543937, + "grad_norm": 0.00010325389303034171, + "learning_rate": 4.875748800499124e-10, + "loss": 0.0, + "num_input_tokens_seen": 136706536, + "step": 202835 + }, + { + "epoch": 4.955414946375785, + "grad_norm": 0.00011646476195892319, + "learning_rate": 4.849158423522181e-10, + "loss": 0.0, + "num_input_tokens_seen": 136709608, + "step": 202840 + }, + { + "epoch": 4.955537097207632, + "grad_norm": 0.0016139474464580417, + "learning_rate": 4.822640733971317e-10, + "loss": 0.0, + "num_input_tokens_seen": 136712872, + "step": 202845 + }, + { + "epoch": 4.955659248039479, + "grad_norm": 0.0004955488257110119, + "learning_rate": 4.796195732038599e-10, + "loss": 0.0, + "num_input_tokens_seen": 136715752, + "step": 202850 + }, + { + "epoch": 4.955781398871326, + "grad_norm": 0.0006088154623284936, + "learning_rate": 4.769823417914987e-10, + "loss": 0.0, + "num_input_tokens_seen": 136718952, + "step": 202855 + }, + { + "epoch": 4.955903549703174, + "grad_norm": 0.0019494992448017001, + "learning_rate": 4.743523791794768e-10, + "loss": 0.0, + "num_input_tokens_seen": 136722344, + "step": 202860 + }, + { + "epoch": 4.956025700535021, + "grad_norm": 0.00047076281043700874, + "learning_rate": 4.717296853867791e-10, + "loss": 0.0, + "num_input_tokens_seen": 136725608, + "step": 202865 + }, + { + "epoch": 4.956147851366868, + "grad_norm": 9.878385753836483e-05, + "learning_rate": 4.691142604325016e-10, + "loss": 0.0, + "num_input_tokens_seen": 136729384, + "step": 202870 + }, + { + "epoch": 4.956270002198715, + "grad_norm": 0.0019373642280697823, + "learning_rate": 4.665061043356289e-10, + "loss": 0.0, + "num_input_tokens_seen": 136732584, + "step": 202875 + }, + { + "epoch": 4.956392153030562, + "grad_norm": 0.0004523659299593419, + "learning_rate": 4.639052171152569e-10, + "loss": 0.0, + "num_input_tokens_seen": 136735976, + "step": 202880 + }, + { + "epoch": 4.956514303862409, + "grad_norm": 2.4888262487365864e-05, + "learning_rate": 4.6131159879014834e-10, + "loss": 0.0, + "num_input_tokens_seen": 136739432, + "step": 202885 + }, + { + "epoch": 4.956636454694257, + "grad_norm": 0.001991266617551446, + "learning_rate": 4.5872524937917713e-10, + "loss": 0.0, + "num_input_tokens_seen": 136742760, + "step": 202890 + }, + { + "epoch": 4.956758605526104, + "grad_norm": 5.5790322221582755e-05, + "learning_rate": 4.5614616890121693e-10, + "loss": 0.0, + "num_input_tokens_seen": 136746088, + "step": 202895 + }, + { + "epoch": 4.9568807563579504, + "grad_norm": 0.004070554859936237, + "learning_rate": 4.535743573750306e-10, + "loss": 0.0, + "num_input_tokens_seen": 136749544, + "step": 202900 + }, + { + "epoch": 4.957002907189798, + "grad_norm": 0.0001542905520182103, + "learning_rate": 4.5100981481938085e-10, + "loss": 0.0, + "num_input_tokens_seen": 136753064, + "step": 202905 + }, + { + "epoch": 4.957125058021645, + "grad_norm": 0.0006492988322861493, + "learning_rate": 4.484525412526974e-10, + "loss": 0.0717, + "num_input_tokens_seen": 136756136, + "step": 202910 + }, + { + "epoch": 4.9572472088534925, + "grad_norm": 0.0013533460441976786, + "learning_rate": 4.4590253669385404e-10, + "loss": 0.0043, + "num_input_tokens_seen": 136759336, + "step": 202915 + }, + { + "epoch": 4.957369359685339, + "grad_norm": 1.4056084182811901e-05, + "learning_rate": 4.4335980116116946e-10, + "loss": 0.0, + "num_input_tokens_seen": 136762728, + "step": 202920 + }, + { + "epoch": 4.957491510517187, + "grad_norm": 5.709950710297562e-05, + "learning_rate": 4.4082433467318436e-10, + "loss": 0.0, + "num_input_tokens_seen": 136765992, + "step": 202925 + }, + { + "epoch": 4.957613661349034, + "grad_norm": 0.002161990851163864, + "learning_rate": 4.382961372484395e-10, + "loss": 0.0, + "num_input_tokens_seen": 136770024, + "step": 202930 + }, + { + "epoch": 4.957735812180881, + "grad_norm": 1.5122742297535297e-05, + "learning_rate": 4.3577520890525353e-10, + "loss": 0.0001, + "num_input_tokens_seen": 136773160, + "step": 202935 + }, + { + "epoch": 4.957857963012728, + "grad_norm": 0.001000594929791987, + "learning_rate": 4.332615496619452e-10, + "loss": 0.0, + "num_input_tokens_seen": 136776872, + "step": 202940 + }, + { + "epoch": 4.957980113844576, + "grad_norm": 0.0017755540320649743, + "learning_rate": 4.3075515953683306e-10, + "loss": 0.0, + "num_input_tokens_seen": 136780648, + "step": 202945 + }, + { + "epoch": 4.958102264676422, + "grad_norm": 6.09001363045536e-05, + "learning_rate": 4.2825603854801385e-10, + "loss": 0.0, + "num_input_tokens_seen": 136783912, + "step": 202950 + }, + { + "epoch": 4.95822441550827, + "grad_norm": 0.0006439242861233652, + "learning_rate": 4.257641867139172e-10, + "loss": 0.0001, + "num_input_tokens_seen": 136787048, + "step": 202955 + }, + { + "epoch": 4.958346566340117, + "grad_norm": 2.781920375127811e-05, + "learning_rate": 4.2327960405241783e-10, + "loss": 0.0, + "num_input_tokens_seen": 136790952, + "step": 202960 + }, + { + "epoch": 4.958468717171964, + "grad_norm": 0.0003997894236817956, + "learning_rate": 4.2080229058172325e-10, + "loss": 0.0, + "num_input_tokens_seen": 136794600, + "step": 202965 + }, + { + "epoch": 4.958590868003811, + "grad_norm": 0.03980407118797302, + "learning_rate": 4.183322463198191e-10, + "loss": 0.0, + "num_input_tokens_seen": 136797992, + "step": 202970 + }, + { + "epoch": 4.958713018835658, + "grad_norm": 4.830466423300095e-06, + "learning_rate": 4.1586947128458006e-10, + "loss": 0.0, + "num_input_tokens_seen": 136801640, + "step": 202975 + }, + { + "epoch": 4.9588351696675055, + "grad_norm": 0.033779554069042206, + "learning_rate": 4.134139654941027e-10, + "loss": 0.0, + "num_input_tokens_seen": 136804968, + "step": 202980 + }, + { + "epoch": 4.958957320499352, + "grad_norm": 5.306170351104811e-05, + "learning_rate": 4.109657289660395e-10, + "loss": 0.0, + "num_input_tokens_seen": 136807848, + "step": 202985 + }, + { + "epoch": 4.9590794713312, + "grad_norm": 0.00020911532919853926, + "learning_rate": 4.085247617183762e-10, + "loss": 0.0, + "num_input_tokens_seen": 136811368, + "step": 202990 + }, + { + "epoch": 4.959201622163047, + "grad_norm": 0.00023600812710355967, + "learning_rate": 4.0609106376876537e-10, + "loss": 0.0, + "num_input_tokens_seen": 136814376, + "step": 202995 + }, + { + "epoch": 4.959323772994894, + "grad_norm": 9.761666296981275e-05, + "learning_rate": 4.036646351348594e-10, + "loss": 0.0, + "num_input_tokens_seen": 136817704, + "step": 203000 + }, + { + "epoch": 4.959445923826741, + "grad_norm": 1.3856853001925629e-05, + "learning_rate": 4.012454758344219e-10, + "loss": 0.0, + "num_input_tokens_seen": 136821096, + "step": 203005 + }, + { + "epoch": 4.959568074658589, + "grad_norm": 4.735983020509593e-05, + "learning_rate": 3.988335858849945e-10, + "loss": 0.0, + "num_input_tokens_seen": 136825320, + "step": 203010 + }, + { + "epoch": 4.959690225490435, + "grad_norm": 0.002790002152323723, + "learning_rate": 3.964289653040076e-10, + "loss": 0.0, + "num_input_tokens_seen": 136828520, + "step": 203015 + }, + { + "epoch": 4.959812376322283, + "grad_norm": 0.001175225363112986, + "learning_rate": 3.940316141091138e-10, + "loss": 0.0, + "num_input_tokens_seen": 136831784, + "step": 203020 + }, + { + "epoch": 4.95993452715413, + "grad_norm": 0.002540030749514699, + "learning_rate": 3.9164153231774353e-10, + "loss": 0.0, + "num_input_tokens_seen": 136835688, + "step": 203025 + }, + { + "epoch": 4.960056677985977, + "grad_norm": 0.0004663171130232513, + "learning_rate": 3.8925871994710536e-10, + "loss": 0.0738, + "num_input_tokens_seen": 136838760, + "step": 203030 + }, + { + "epoch": 4.960178828817824, + "grad_norm": 0.005429753102362156, + "learning_rate": 3.868831770147407e-10, + "loss": 0.0, + "num_input_tokens_seen": 136842024, + "step": 203035 + }, + { + "epoch": 4.960300979649672, + "grad_norm": 0.00017984755686484277, + "learning_rate": 3.8451490353774706e-10, + "loss": 0.0, + "num_input_tokens_seen": 136845736, + "step": 203040 + }, + { + "epoch": 4.9604231304815185, + "grad_norm": 3.470600495347753e-05, + "learning_rate": 3.8215389953355494e-10, + "loss": 0.0, + "num_input_tokens_seen": 136849128, + "step": 203045 + }, + { + "epoch": 4.960545281313365, + "grad_norm": 0.005800565704703331, + "learning_rate": 3.7980016501903966e-10, + "loss": 0.0, + "num_input_tokens_seen": 136852520, + "step": 203050 + }, + { + "epoch": 4.960667432145213, + "grad_norm": 0.00013009316171519458, + "learning_rate": 3.774537000116318e-10, + "loss": 0.0001, + "num_input_tokens_seen": 136856360, + "step": 203055 + }, + { + "epoch": 4.9607895829770605, + "grad_norm": 0.0001301606243941933, + "learning_rate": 3.7511450452809565e-10, + "loss": 0.0, + "num_input_tokens_seen": 136859432, + "step": 203060 + }, + { + "epoch": 4.960911733808907, + "grad_norm": 0.00011180521687492728, + "learning_rate": 3.727825785857508e-10, + "loss": 0.0, + "num_input_tokens_seen": 136862504, + "step": 203065 + }, + { + "epoch": 4.961033884640754, + "grad_norm": 0.00028714933432638645, + "learning_rate": 3.704579222012505e-10, + "loss": 0.0, + "num_input_tokens_seen": 136865320, + "step": 203070 + }, + { + "epoch": 4.961156035472602, + "grad_norm": 5.161049557500519e-05, + "learning_rate": 3.681405353916922e-10, + "loss": 0.0, + "num_input_tokens_seen": 136868520, + "step": 203075 + }, + { + "epoch": 4.961278186304448, + "grad_norm": 0.00584859075024724, + "learning_rate": 3.658304181739513e-10, + "loss": 0.0397, + "num_input_tokens_seen": 136871720, + "step": 203080 + }, + { + "epoch": 4.961400337136296, + "grad_norm": 0.0003809731570072472, + "learning_rate": 3.635275705646812e-10, + "loss": 0.0, + "num_input_tokens_seen": 136875176, + "step": 203085 + }, + { + "epoch": 4.961522487968143, + "grad_norm": 0.001918514259159565, + "learning_rate": 3.612319925807572e-10, + "loss": 0.0, + "num_input_tokens_seen": 136878440, + "step": 203090 + }, + { + "epoch": 4.96164463879999, + "grad_norm": 0.002260788343846798, + "learning_rate": 3.589436842388327e-10, + "loss": 0.0, + "num_input_tokens_seen": 136881512, + "step": 203095 + }, + { + "epoch": 4.961766789631837, + "grad_norm": 0.0006037589628249407, + "learning_rate": 3.56662645555561e-10, + "loss": 0.0003, + "num_input_tokens_seen": 136884392, + "step": 203100 + }, + { + "epoch": 4.961888940463685, + "grad_norm": 0.002064619679003954, + "learning_rate": 3.543888765473735e-10, + "loss": 0.0, + "num_input_tokens_seen": 136887592, + "step": 203105 + }, + { + "epoch": 4.9620110912955315, + "grad_norm": 0.0017396878683939576, + "learning_rate": 3.521223772311455e-10, + "loss": 0.0, + "num_input_tokens_seen": 136890600, + "step": 203110 + }, + { + "epoch": 4.962133242127379, + "grad_norm": 0.0003977522428613156, + "learning_rate": 3.498631476229752e-10, + "loss": 0.0, + "num_input_tokens_seen": 136894056, + "step": 203115 + }, + { + "epoch": 4.962255392959226, + "grad_norm": 0.005513790063560009, + "learning_rate": 3.47611187739516e-10, + "loss": 0.0, + "num_input_tokens_seen": 136897448, + "step": 203120 + }, + { + "epoch": 4.9623775437910735, + "grad_norm": 0.00010466863022884354, + "learning_rate": 3.453664975971993e-10, + "loss": 0.0, + "num_input_tokens_seen": 136900840, + "step": 203125 + }, + { + "epoch": 4.96249969462292, + "grad_norm": 6.562995258718729e-05, + "learning_rate": 3.4312907721212316e-10, + "loss": 0.0, + "num_input_tokens_seen": 136904104, + "step": 203130 + }, + { + "epoch": 4.962621845454768, + "grad_norm": 2.7876367312273942e-05, + "learning_rate": 3.4089892660082997e-10, + "loss": 0.0513, + "num_input_tokens_seen": 136907752, + "step": 203135 + }, + { + "epoch": 4.962743996286615, + "grad_norm": 0.00010341319284634665, + "learning_rate": 3.38676045779307e-10, + "loss": 0.0, + "num_input_tokens_seen": 136911400, + "step": 203140 + }, + { + "epoch": 4.962866147118461, + "grad_norm": 0.0016331837978214025, + "learning_rate": 3.364604347637634e-10, + "loss": 0.0, + "num_input_tokens_seen": 136914472, + "step": 203145 + }, + { + "epoch": 4.962988297950309, + "grad_norm": 0.00025190794258378446, + "learning_rate": 3.342520935704085e-10, + "loss": 0.0, + "num_input_tokens_seen": 136918056, + "step": 203150 + }, + { + "epoch": 4.963110448782157, + "grad_norm": 8.690000686328858e-05, + "learning_rate": 3.3205102221534054e-10, + "loss": 0.0, + "num_input_tokens_seen": 136921576, + "step": 203155 + }, + { + "epoch": 4.963232599614003, + "grad_norm": 0.0013419648166745901, + "learning_rate": 3.2985722071432465e-10, + "loss": 0.0, + "num_input_tokens_seen": 136925032, + "step": 203160 + }, + { + "epoch": 4.96335475044585, + "grad_norm": 0.0007638754323124886, + "learning_rate": 3.276706890835701e-10, + "loss": 0.0, + "num_input_tokens_seen": 136928232, + "step": 203165 + }, + { + "epoch": 4.963476901277698, + "grad_norm": 5.1917631935793906e-05, + "learning_rate": 3.2549142733884203e-10, + "loss": 0.0, + "num_input_tokens_seen": 136931688, + "step": 203170 + }, + { + "epoch": 4.9635990521095446, + "grad_norm": 0.00024687673430889845, + "learning_rate": 3.2331943549601673e-10, + "loss": 0.0, + "num_input_tokens_seen": 136935016, + "step": 203175 + }, + { + "epoch": 4.963721202941392, + "grad_norm": 0.31552761793136597, + "learning_rate": 3.211547135708592e-10, + "loss": 0.0001, + "num_input_tokens_seen": 136938408, + "step": 203180 + }, + { + "epoch": 4.963843353773239, + "grad_norm": 0.00021553314581979066, + "learning_rate": 3.1899726157913476e-10, + "loss": 0.0, + "num_input_tokens_seen": 136941736, + "step": 203185 + }, + { + "epoch": 4.963965504605087, + "grad_norm": 0.00010820927855093032, + "learning_rate": 3.168470795366085e-10, + "loss": 0.0, + "num_input_tokens_seen": 136945128, + "step": 203190 + }, + { + "epoch": 4.964087655436933, + "grad_norm": 8.269635873148218e-05, + "learning_rate": 3.1470416745882353e-10, + "loss": 0.0, + "num_input_tokens_seen": 136948456, + "step": 203195 + }, + { + "epoch": 4.964209806268781, + "grad_norm": 1.1428928701207042e-05, + "learning_rate": 3.1256852536143407e-10, + "loss": 0.0, + "num_input_tokens_seen": 136951464, + "step": 203200 + }, + { + "epoch": 4.964331957100628, + "grad_norm": 0.0011048450833186507, + "learning_rate": 3.1044015325987217e-10, + "loss": 0.0, + "num_input_tokens_seen": 136954664, + "step": 203205 + }, + { + "epoch": 4.964454107932475, + "grad_norm": 0.000956613221205771, + "learning_rate": 3.0831905116968093e-10, + "loss": 0.0, + "num_input_tokens_seen": 136957928, + "step": 203210 + }, + { + "epoch": 4.964576258764322, + "grad_norm": 3.0835537472739816e-05, + "learning_rate": 3.062052191062925e-10, + "loss": 0.0, + "num_input_tokens_seen": 136961640, + "step": 203215 + }, + { + "epoch": 4.96469840959617, + "grad_norm": 0.0006256560445763171, + "learning_rate": 3.040986570851389e-10, + "loss": 0.0, + "num_input_tokens_seen": 136965032, + "step": 203220 + }, + { + "epoch": 4.9648205604280164, + "grad_norm": 0.0019232897320762277, + "learning_rate": 3.019993651213193e-10, + "loss": 0.0, + "num_input_tokens_seen": 136968872, + "step": 203225 + }, + { + "epoch": 4.964942711259864, + "grad_norm": 8.002047252375633e-05, + "learning_rate": 2.999073432303767e-10, + "loss": 0.0, + "num_input_tokens_seen": 136972584, + "step": 203230 + }, + { + "epoch": 4.965064862091711, + "grad_norm": 9.228551061823964e-05, + "learning_rate": 2.9782259142729913e-10, + "loss": 0.0001, + "num_input_tokens_seen": 136976232, + "step": 203235 + }, + { + "epoch": 4.965187012923558, + "grad_norm": 0.00013471365673467517, + "learning_rate": 2.957451097274077e-10, + "loss": 0.0, + "num_input_tokens_seen": 136980136, + "step": 203240 + }, + { + "epoch": 4.965309163755405, + "grad_norm": 0.0009636140894144773, + "learning_rate": 2.9367489814569044e-10, + "loss": 0.0, + "num_input_tokens_seen": 136983208, + "step": 203245 + }, + { + "epoch": 4.965431314587252, + "grad_norm": 9.399078408023342e-06, + "learning_rate": 2.9161195669735736e-10, + "loss": 0.0, + "num_input_tokens_seen": 136986920, + "step": 203250 + }, + { + "epoch": 4.9655534654191, + "grad_norm": 0.0016986504197120667, + "learning_rate": 2.8955628539717447e-10, + "loss": 0.0, + "num_input_tokens_seen": 136989864, + "step": 203255 + }, + { + "epoch": 4.965675616250946, + "grad_norm": 0.00014009641017764807, + "learning_rate": 2.8750788426035175e-10, + "loss": 0.0, + "num_input_tokens_seen": 136993192, + "step": 203260 + }, + { + "epoch": 4.965797767082794, + "grad_norm": 0.00019577737839426845, + "learning_rate": 2.854667533015442e-10, + "loss": 0.0, + "num_input_tokens_seen": 136996264, + "step": 203265 + }, + { + "epoch": 4.965919917914641, + "grad_norm": 5.1051170885330066e-05, + "learning_rate": 2.834328925358509e-10, + "loss": 0.0, + "num_input_tokens_seen": 136999528, + "step": 203270 + }, + { + "epoch": 4.966042068746488, + "grad_norm": 0.0007095492910593748, + "learning_rate": 2.814063019778157e-10, + "loss": 0.0, + "num_input_tokens_seen": 137002536, + "step": 203275 + }, + { + "epoch": 4.966164219578335, + "grad_norm": 0.0004173467750661075, + "learning_rate": 2.7938698164231556e-10, + "loss": 0.0, + "num_input_tokens_seen": 137005544, + "step": 203280 + }, + { + "epoch": 4.966286370410183, + "grad_norm": 0.00018406417802907526, + "learning_rate": 2.773749315440055e-10, + "loss": 0.0234, + "num_input_tokens_seen": 137008808, + "step": 203285 + }, + { + "epoch": 4.9664085212420295, + "grad_norm": 0.0005171273369342089, + "learning_rate": 2.753701516975404e-10, + "loss": 0.0, + "num_input_tokens_seen": 137012008, + "step": 203290 + }, + { + "epoch": 4.966530672073877, + "grad_norm": 0.0019284519366919994, + "learning_rate": 2.7337264211746427e-10, + "loss": 0.0, + "num_input_tokens_seen": 137015272, + "step": 203295 + }, + { + "epoch": 4.966652822905724, + "grad_norm": 0.0029952942859381437, + "learning_rate": 2.713824028183209e-10, + "loss": 0.0, + "num_input_tokens_seen": 137018280, + "step": 203300 + }, + { + "epoch": 4.9667749737375715, + "grad_norm": 1.635344779060688e-05, + "learning_rate": 2.693994338145433e-10, + "loss": 0.0, + "num_input_tokens_seen": 137021864, + "step": 203305 + }, + { + "epoch": 4.966897124569418, + "grad_norm": 7.378828013315797e-05, + "learning_rate": 2.6742373512056435e-10, + "loss": 0.0, + "num_input_tokens_seen": 137025640, + "step": 203310 + }, + { + "epoch": 4.967019275401266, + "grad_norm": 0.00014544688747264445, + "learning_rate": 2.6545530675081695e-10, + "loss": 0.0, + "num_input_tokens_seen": 137028968, + "step": 203315 + }, + { + "epoch": 4.967141426233113, + "grad_norm": 3.913308682967909e-05, + "learning_rate": 2.6349414871962297e-10, + "loss": 0.0008, + "num_input_tokens_seen": 137032104, + "step": 203320 + }, + { + "epoch": 4.96726357706496, + "grad_norm": 0.00011158483539475128, + "learning_rate": 2.615402610411932e-10, + "loss": 0.0, + "num_input_tokens_seen": 137035624, + "step": 203325 + }, + { + "epoch": 4.967385727896807, + "grad_norm": 1.9497132598189637e-05, + "learning_rate": 2.595936437296276e-10, + "loss": 0.0, + "num_input_tokens_seen": 137039016, + "step": 203330 + }, + { + "epoch": 4.967507878728654, + "grad_norm": 1.9880870240740478e-05, + "learning_rate": 2.576542967993589e-10, + "loss": 0.0, + "num_input_tokens_seen": 137042344, + "step": 203335 + }, + { + "epoch": 4.967630029560501, + "grad_norm": 0.0005132934893481433, + "learning_rate": 2.55722220264154e-10, + "loss": 0.0, + "num_input_tokens_seen": 137045672, + "step": 203340 + }, + { + "epoch": 4.967752180392348, + "grad_norm": 0.000174630869878456, + "learning_rate": 2.5379741413833475e-10, + "loss": 0.0, + "num_input_tokens_seen": 137049128, + "step": 203345 + }, + { + "epoch": 4.967874331224196, + "grad_norm": 0.0001575258356751874, + "learning_rate": 2.5187987843577897e-10, + "loss": 0.0, + "num_input_tokens_seen": 137052904, + "step": 203350 + }, + { + "epoch": 4.9679964820560425, + "grad_norm": 6.655186734860763e-05, + "learning_rate": 2.499696131704754e-10, + "loss": 0.0, + "num_input_tokens_seen": 137056616, + "step": 203355 + }, + { + "epoch": 4.96811863288789, + "grad_norm": 5.819553734909277e-06, + "learning_rate": 2.4806661835630185e-10, + "loss": 0.0, + "num_input_tokens_seen": 137060008, + "step": 203360 + }, + { + "epoch": 4.968240783719737, + "grad_norm": 0.013717192225158215, + "learning_rate": 2.461708940070251e-10, + "loss": 0.0, + "num_input_tokens_seen": 137063208, + "step": 203365 + }, + { + "epoch": 4.9683629345515845, + "grad_norm": 0.00012144362699473277, + "learning_rate": 2.4428244013652287e-10, + "loss": 0.0, + "num_input_tokens_seen": 137066728, + "step": 203370 + }, + { + "epoch": 4.968485085383431, + "grad_norm": 0.00012399266415741295, + "learning_rate": 2.4240125675856206e-10, + "loss": 0.0, + "num_input_tokens_seen": 137070056, + "step": 203375 + }, + { + "epoch": 4.968607236215279, + "grad_norm": 0.003136326791718602, + "learning_rate": 2.405273438866873e-10, + "loss": 0.0, + "num_input_tokens_seen": 137073192, + "step": 203380 + }, + { + "epoch": 4.968729387047126, + "grad_norm": 0.0038233192171901464, + "learning_rate": 2.3866070153466534e-10, + "loss": 0.0008, + "num_input_tokens_seen": 137076264, + "step": 203385 + }, + { + "epoch": 4.968851537878973, + "grad_norm": 0.0008583197486586869, + "learning_rate": 2.368013297159299e-10, + "loss": 0.0, + "num_input_tokens_seen": 137079528, + "step": 203390 + }, + { + "epoch": 4.96897368871082, + "grad_norm": 5.924261131440289e-05, + "learning_rate": 2.349492284441368e-10, + "loss": 0.0, + "num_input_tokens_seen": 137082728, + "step": 203395 + }, + { + "epoch": 4.969095839542668, + "grad_norm": 0.0002450200554449111, + "learning_rate": 2.331043977327196e-10, + "loss": 0.0, + "num_input_tokens_seen": 137086376, + "step": 203400 + }, + { + "epoch": 4.969217990374514, + "grad_norm": 0.0019818120636045933, + "learning_rate": 2.312668375950011e-10, + "loss": 0.0, + "num_input_tokens_seen": 137089704, + "step": 203405 + }, + { + "epoch": 4.969340141206361, + "grad_norm": 3.149250187561847e-05, + "learning_rate": 2.2943654804441493e-10, + "loss": 0.0, + "num_input_tokens_seen": 137093160, + "step": 203410 + }, + { + "epoch": 4.969462292038209, + "grad_norm": 0.0010760186705738306, + "learning_rate": 2.2761352909428377e-10, + "loss": 0.0, + "num_input_tokens_seen": 137096552, + "step": 203415 + }, + { + "epoch": 4.969584442870056, + "grad_norm": 2.086194399453234e-05, + "learning_rate": 2.2579778075793031e-10, + "loss": 0.0, + "num_input_tokens_seen": 137099816, + "step": 203420 + }, + { + "epoch": 4.969706593701903, + "grad_norm": 1.81776522367727e-05, + "learning_rate": 2.2398930304834417e-10, + "loss": 0.0, + "num_input_tokens_seen": 137103208, + "step": 203425 + }, + { + "epoch": 4.96982874453375, + "grad_norm": 0.00011152262595715001, + "learning_rate": 2.2218809597895906e-10, + "loss": 0.0, + "num_input_tokens_seen": 137106600, + "step": 203430 + }, + { + "epoch": 4.9699508953655975, + "grad_norm": 0.0004978838260285556, + "learning_rate": 2.203941595626535e-10, + "loss": 0.0, + "num_input_tokens_seen": 137110056, + "step": 203435 + }, + { + "epoch": 4.970073046197444, + "grad_norm": 0.000971376255620271, + "learning_rate": 2.186074938125282e-10, + "loss": 0.0, + "num_input_tokens_seen": 137113320, + "step": 203440 + }, + { + "epoch": 4.970195197029292, + "grad_norm": 0.00025585308321751654, + "learning_rate": 2.1682809874168373e-10, + "loss": 0.0, + "num_input_tokens_seen": 137116456, + "step": 203445 + }, + { + "epoch": 4.970317347861139, + "grad_norm": 4.208882819511928e-05, + "learning_rate": 2.150559743628877e-10, + "loss": 0.0, + "num_input_tokens_seen": 137119784, + "step": 203450 + }, + { + "epoch": 4.970439498692986, + "grad_norm": 0.00013479522021953017, + "learning_rate": 2.132911206891297e-10, + "loss": 0.0, + "num_input_tokens_seen": 137123368, + "step": 203455 + }, + { + "epoch": 4.970561649524833, + "grad_norm": 0.0004235265660099685, + "learning_rate": 2.115335377332883e-10, + "loss": 0.0, + "num_input_tokens_seen": 137126824, + "step": 203460 + }, + { + "epoch": 4.970683800356681, + "grad_norm": 2.3727705411147326e-05, + "learning_rate": 2.0978322550802007e-10, + "loss": 0.0, + "num_input_tokens_seen": 137130280, + "step": 203465 + }, + { + "epoch": 4.970805951188527, + "grad_norm": 0.0004997532814741135, + "learning_rate": 2.080401840262036e-10, + "loss": 0.0, + "num_input_tokens_seen": 137133736, + "step": 203470 + }, + { + "epoch": 4.970928102020375, + "grad_norm": 0.0019565869588404894, + "learning_rate": 2.063044133003844e-10, + "loss": 0.0, + "num_input_tokens_seen": 137137128, + "step": 203475 + }, + { + "epoch": 4.971050252852222, + "grad_norm": 0.0008852098835632205, + "learning_rate": 2.04575913343219e-10, + "loss": 0.0, + "num_input_tokens_seen": 137140456, + "step": 203480 + }, + { + "epoch": 4.971172403684069, + "grad_norm": 4.9233127356274053e-05, + "learning_rate": 2.0285468416725294e-10, + "loss": 0.0, + "num_input_tokens_seen": 137143656, + "step": 203485 + }, + { + "epoch": 4.971294554515916, + "grad_norm": 9.417891851626337e-05, + "learning_rate": 2.0114072578503172e-10, + "loss": 0.0001, + "num_input_tokens_seen": 137146792, + "step": 203490 + }, + { + "epoch": 4.971416705347764, + "grad_norm": 10.676913261413574, + "learning_rate": 1.9943403820910086e-10, + "loss": 0.0011, + "num_input_tokens_seen": 137149800, + "step": 203495 + }, + { + "epoch": 4.9715388561796106, + "grad_norm": 0.0032459113281220198, + "learning_rate": 1.9773462145178387e-10, + "loss": 0.0, + "num_input_tokens_seen": 137153064, + "step": 203500 + }, + { + "epoch": 4.971661007011457, + "grad_norm": 0.0002514548250474036, + "learning_rate": 1.960424755254042e-10, + "loss": 0.0, + "num_input_tokens_seen": 137156328, + "step": 203505 + }, + { + "epoch": 4.971783157843305, + "grad_norm": 2.9430975700961426e-05, + "learning_rate": 1.9435760044239635e-10, + "loss": 0.0, + "num_input_tokens_seen": 137159400, + "step": 203510 + }, + { + "epoch": 4.971905308675153, + "grad_norm": 0.0017755551962181926, + "learning_rate": 1.9267999621486174e-10, + "loss": 0.0, + "num_input_tokens_seen": 137162600, + "step": 203515 + }, + { + "epoch": 4.972027459506999, + "grad_norm": 0.00012700166553258896, + "learning_rate": 1.9100966285512388e-10, + "loss": 0.0, + "num_input_tokens_seen": 137166568, + "step": 203520 + }, + { + "epoch": 4.972149610338846, + "grad_norm": 0.00025107708643190563, + "learning_rate": 1.8934660037528417e-10, + "loss": 0.0, + "num_input_tokens_seen": 137170088, + "step": 203525 + }, + { + "epoch": 4.972271761170694, + "grad_norm": 9.3789476522943e-06, + "learning_rate": 1.8769080878744402e-10, + "loss": 0.0, + "num_input_tokens_seen": 137173608, + "step": 203530 + }, + { + "epoch": 4.97239391200254, + "grad_norm": 0.00017873231263365597, + "learning_rate": 1.860422881035939e-10, + "loss": 0.0, + "num_input_tokens_seen": 137178984, + "step": 203535 + }, + { + "epoch": 4.972516062834388, + "grad_norm": 0.009690695442259312, + "learning_rate": 1.8440103833572417e-10, + "loss": 0.0, + "num_input_tokens_seen": 137182056, + "step": 203540 + }, + { + "epoch": 4.972638213666235, + "grad_norm": 0.001154441386461258, + "learning_rate": 1.8276705949593629e-10, + "loss": 0.0, + "num_input_tokens_seen": 137185064, + "step": 203545 + }, + { + "epoch": 4.9727603644980825, + "grad_norm": 7.696470675000455e-06, + "learning_rate": 1.8114035159588758e-10, + "loss": 0.0224, + "num_input_tokens_seen": 137187944, + "step": 203550 + }, + { + "epoch": 4.972882515329929, + "grad_norm": 0.0008099049446173012, + "learning_rate": 1.7952091464756846e-10, + "loss": 0.0, + "num_input_tokens_seen": 137190952, + "step": 203555 + }, + { + "epoch": 4.973004666161777, + "grad_norm": 8.639370207674801e-05, + "learning_rate": 1.7790874866263628e-10, + "loss": 0.0, + "num_input_tokens_seen": 137194408, + "step": 203560 + }, + { + "epoch": 4.973126816993624, + "grad_norm": 0.0008555944077670574, + "learning_rate": 1.7630385365285938e-10, + "loss": 0.0, + "num_input_tokens_seen": 137197736, + "step": 203565 + }, + { + "epoch": 4.973248967825471, + "grad_norm": 0.0013645697617903352, + "learning_rate": 1.7470622962989511e-10, + "loss": 0.0, + "num_input_tokens_seen": 137201064, + "step": 203570 + }, + { + "epoch": 4.973371118657318, + "grad_norm": 5.793929813080467e-05, + "learning_rate": 1.7311587660551186e-10, + "loss": 0.0, + "num_input_tokens_seen": 137204456, + "step": 203575 + }, + { + "epoch": 4.973493269489166, + "grad_norm": 0.0002381923550274223, + "learning_rate": 1.7153279459103386e-10, + "loss": 0.0, + "num_input_tokens_seen": 137208168, + "step": 203580 + }, + { + "epoch": 4.973615420321012, + "grad_norm": 0.001816401956602931, + "learning_rate": 1.699569835981185e-10, + "loss": 0.0, + "num_input_tokens_seen": 137211304, + "step": 203585 + }, + { + "epoch": 4.97373757115286, + "grad_norm": 8.881182293407619e-05, + "learning_rate": 1.6838844363820103e-10, + "loss": 0.0, + "num_input_tokens_seen": 137214696, + "step": 203590 + }, + { + "epoch": 4.973859721984707, + "grad_norm": 0.016806993633508682, + "learning_rate": 1.668271747227168e-10, + "loss": 0.0, + "num_input_tokens_seen": 137218408, + "step": 203595 + }, + { + "epoch": 4.9739818728165535, + "grad_norm": 0.01145798061043024, + "learning_rate": 1.6527317686299002e-10, + "loss": 0.0, + "num_input_tokens_seen": 137221864, + "step": 203600 + }, + { + "epoch": 4.974104023648401, + "grad_norm": 2.2194937628228217e-05, + "learning_rate": 1.63726450070234e-10, + "loss": 0.0, + "num_input_tokens_seen": 137225192, + "step": 203605 + }, + { + "epoch": 4.974226174480248, + "grad_norm": 0.003558725118637085, + "learning_rate": 1.62186994355884e-10, + "loss": 0.0, + "num_input_tokens_seen": 137228392, + "step": 203610 + }, + { + "epoch": 4.9743483253120955, + "grad_norm": 0.0013230282347649336, + "learning_rate": 1.6065480973104228e-10, + "loss": 0.0, + "num_input_tokens_seen": 137232296, + "step": 203615 + }, + { + "epoch": 4.974470476143942, + "grad_norm": 0.001185271656140685, + "learning_rate": 1.5912989620681107e-10, + "loss": 0.0, + "num_input_tokens_seen": 137235304, + "step": 203620 + }, + { + "epoch": 4.97459262697579, + "grad_norm": 0.001854298054240644, + "learning_rate": 1.5761225379429255e-10, + "loss": 0.0, + "num_input_tokens_seen": 137238120, + "step": 203625 + }, + { + "epoch": 4.974714777807637, + "grad_norm": 2.3406746549881063e-05, + "learning_rate": 1.56101882504478e-10, + "loss": 0.0, + "num_input_tokens_seen": 137241320, + "step": 203630 + }, + { + "epoch": 4.974836928639484, + "grad_norm": 0.00031124529778026044, + "learning_rate": 1.5459878234846958e-10, + "loss": 0.0, + "num_input_tokens_seen": 137245160, + "step": 203635 + }, + { + "epoch": 4.974959079471331, + "grad_norm": 0.0002666858781594783, + "learning_rate": 1.5310295333725853e-10, + "loss": 0.0, + "num_input_tokens_seen": 137248616, + "step": 203640 + }, + { + "epoch": 4.975081230303179, + "grad_norm": 0.0014295884175226092, + "learning_rate": 1.5161439548150301e-10, + "loss": 0.0001, + "num_input_tokens_seen": 137251688, + "step": 203645 + }, + { + "epoch": 4.975203381135025, + "grad_norm": 0.024229461327195168, + "learning_rate": 1.501331087920832e-10, + "loss": 0.0, + "num_input_tokens_seen": 137255016, + "step": 203650 + }, + { + "epoch": 4.975325531966873, + "grad_norm": 0.006210799794644117, + "learning_rate": 1.4865909327987924e-10, + "loss": 0.0, + "num_input_tokens_seen": 137258408, + "step": 203655 + }, + { + "epoch": 4.97544768279872, + "grad_norm": 5.4780171922175214e-05, + "learning_rate": 1.4719234895566034e-10, + "loss": 0.0, + "num_input_tokens_seen": 137261480, + "step": 203660 + }, + { + "epoch": 4.975569833630567, + "grad_norm": 4.419705874170177e-06, + "learning_rate": 1.457328758298626e-10, + "loss": 0.0, + "num_input_tokens_seen": 137264808, + "step": 203665 + }, + { + "epoch": 4.975691984462414, + "grad_norm": 0.004317359533160925, + "learning_rate": 1.4428067391325515e-10, + "loss": 0.0, + "num_input_tokens_seen": 137268328, + "step": 203670 + }, + { + "epoch": 4.975814135294261, + "grad_norm": 8.27832191134803e-05, + "learning_rate": 1.4283574321627413e-10, + "loss": 0.0001, + "num_input_tokens_seen": 137271464, + "step": 203675 + }, + { + "epoch": 4.9759362861261085, + "grad_norm": 0.0003701518871821463, + "learning_rate": 1.4139808374968864e-10, + "loss": 0.0, + "num_input_tokens_seen": 137274472, + "step": 203680 + }, + { + "epoch": 4.976058436957956, + "grad_norm": 0.00020802194194402546, + "learning_rate": 1.3996769552371279e-10, + "loss": 0.0, + "num_input_tokens_seen": 137277928, + "step": 203685 + }, + { + "epoch": 4.976180587789803, + "grad_norm": 0.00158477493096143, + "learning_rate": 1.3854457854878265e-10, + "loss": 0.0, + "num_input_tokens_seen": 137281320, + "step": 203690 + }, + { + "epoch": 4.97630273862165, + "grad_norm": 2.6810001145349815e-05, + "learning_rate": 1.3712873283533433e-10, + "loss": 0.0, + "num_input_tokens_seen": 137284648, + "step": 203695 + }, + { + "epoch": 4.976424889453497, + "grad_norm": 0.0019791999366134405, + "learning_rate": 1.3572015839358187e-10, + "loss": 0.0, + "num_input_tokens_seen": 137288232, + "step": 203700 + }, + { + "epoch": 4.976547040285344, + "grad_norm": 0.0001736970734782517, + "learning_rate": 1.3431885523385034e-10, + "loss": 0.0, + "num_input_tokens_seen": 137291432, + "step": 203705 + }, + { + "epoch": 4.976669191117192, + "grad_norm": 6.91135719534941e-05, + "learning_rate": 1.329248233662428e-10, + "loss": 0.0, + "num_input_tokens_seen": 137294760, + "step": 203710 + }, + { + "epoch": 4.976791341949038, + "grad_norm": 0.00016507727559655905, + "learning_rate": 1.3153806280097323e-10, + "loss": 0.0, + "num_input_tokens_seen": 137298280, + "step": 203715 + }, + { + "epoch": 4.976913492780886, + "grad_norm": 7.36595347916591e-06, + "learning_rate": 1.3015857354803372e-10, + "loss": 0.0, + "num_input_tokens_seen": 137301608, + "step": 203720 + }, + { + "epoch": 4.977035643612733, + "grad_norm": 0.005928873550146818, + "learning_rate": 1.2878635561752726e-10, + "loss": 0.0, + "num_input_tokens_seen": 137304488, + "step": 203725 + }, + { + "epoch": 4.97715779444458, + "grad_norm": 1.852934292401187e-05, + "learning_rate": 1.2742140901944587e-10, + "loss": 0.0, + "num_input_tokens_seen": 137308520, + "step": 203730 + }, + { + "epoch": 4.977279945276427, + "grad_norm": 6.925003253854811e-05, + "learning_rate": 1.2606373376367052e-10, + "loss": 0.0, + "num_input_tokens_seen": 137312040, + "step": 203735 + }, + { + "epoch": 4.977402096108275, + "grad_norm": 0.00015649801935069263, + "learning_rate": 1.2471332986008222e-10, + "loss": 0.0, + "num_input_tokens_seen": 137315560, + "step": 203740 + }, + { + "epoch": 4.9775242469401215, + "grad_norm": 6.381532875820994e-05, + "learning_rate": 1.233701973185619e-10, + "loss": 0.0, + "num_input_tokens_seen": 137318952, + "step": 203745 + }, + { + "epoch": 4.977646397771969, + "grad_norm": 3.099068271694705e-05, + "learning_rate": 1.2203433614876858e-10, + "loss": 0.0, + "num_input_tokens_seen": 137322408, + "step": 203750 + }, + { + "epoch": 4.977768548603816, + "grad_norm": 0.00028246158035472035, + "learning_rate": 1.2070574636058318e-10, + "loss": 0.0, + "num_input_tokens_seen": 137325800, + "step": 203755 + }, + { + "epoch": 4.9778906994356635, + "grad_norm": 8.343757508555427e-05, + "learning_rate": 1.1938442796344263e-10, + "loss": 0.0, + "num_input_tokens_seen": 137329448, + "step": 203760 + }, + { + "epoch": 4.97801285026751, + "grad_norm": 0.000519098830409348, + "learning_rate": 1.1807038096711685e-10, + "loss": 0.0, + "num_input_tokens_seen": 137332776, + "step": 203765 + }, + { + "epoch": 4.978135001099357, + "grad_norm": 0.00046904810005798936, + "learning_rate": 1.1676360538115381e-10, + "loss": 0.0, + "num_input_tokens_seen": 137336808, + "step": 203770 + }, + { + "epoch": 4.978257151931205, + "grad_norm": 0.0006193334120325744, + "learning_rate": 1.154641012149904e-10, + "loss": 0.0, + "num_input_tokens_seen": 137340392, + "step": 203775 + }, + { + "epoch": 4.978379302763052, + "grad_norm": 0.00027628708630800247, + "learning_rate": 1.1417186847806349e-10, + "loss": 0.0, + "num_input_tokens_seen": 137343592, + "step": 203780 + }, + { + "epoch": 4.978501453594899, + "grad_norm": 0.0007421595510095358, + "learning_rate": 1.12886907179921e-10, + "loss": 0.0, + "num_input_tokens_seen": 137347496, + "step": 203785 + }, + { + "epoch": 4.978623604426746, + "grad_norm": 0.0011555387172847986, + "learning_rate": 1.1160921732977779e-10, + "loss": 0.0, + "num_input_tokens_seen": 137350760, + "step": 203790 + }, + { + "epoch": 4.978745755258593, + "grad_norm": 0.0014732476556673646, + "learning_rate": 1.1033879893684872e-10, + "loss": 0.0, + "num_input_tokens_seen": 137354216, + "step": 203795 + }, + { + "epoch": 4.97886790609044, + "grad_norm": 0.0026653846725821495, + "learning_rate": 1.0907565201057067e-10, + "loss": 0.0, + "num_input_tokens_seen": 137357416, + "step": 203800 + }, + { + "epoch": 4.978990056922288, + "grad_norm": 0.00010707018373068422, + "learning_rate": 1.0781977655993645e-10, + "loss": 0.0, + "num_input_tokens_seen": 137360424, + "step": 203805 + }, + { + "epoch": 4.9791122077541345, + "grad_norm": 0.0009073491673916578, + "learning_rate": 1.0657117259427195e-10, + "loss": 0.0, + "num_input_tokens_seen": 137363368, + "step": 203810 + }, + { + "epoch": 4.979234358585982, + "grad_norm": 0.0016944017261266708, + "learning_rate": 1.0532984012256995e-10, + "loss": 0.0, + "num_input_tokens_seen": 137367208, + "step": 203815 + }, + { + "epoch": 4.979356509417829, + "grad_norm": 0.00020370505808386952, + "learning_rate": 1.0409577915382328e-10, + "loss": 0.0, + "num_input_tokens_seen": 137370344, + "step": 203820 + }, + { + "epoch": 4.979478660249677, + "grad_norm": 3.5763987398240715e-05, + "learning_rate": 1.0286898969702473e-10, + "loss": 0.1607, + "num_input_tokens_seen": 137373480, + "step": 203825 + }, + { + "epoch": 4.979600811081523, + "grad_norm": 0.00012906512711197138, + "learning_rate": 1.016494717610561e-10, + "loss": 0.0, + "num_input_tokens_seen": 137377000, + "step": 203830 + }, + { + "epoch": 4.979722961913371, + "grad_norm": 0.0001898586779134348, + "learning_rate": 1.0043722535491018e-10, + "loss": 0.0, + "num_input_tokens_seen": 137380712, + "step": 203835 + }, + { + "epoch": 4.979845112745218, + "grad_norm": 0.0006985256914049387, + "learning_rate": 9.92322504872467e-11, + "loss": 0.0, + "num_input_tokens_seen": 137384360, + "step": 203840 + }, + { + "epoch": 4.979967263577065, + "grad_norm": 0.001462116721086204, + "learning_rate": 9.803454716694748e-11, + "loss": 0.0, + "num_input_tokens_seen": 137387560, + "step": 203845 + }, + { + "epoch": 4.980089414408912, + "grad_norm": 0.0007516284240409732, + "learning_rate": 9.684411540267224e-11, + "loss": 0.0, + "num_input_tokens_seen": 137391016, + "step": 203850 + }, + { + "epoch": 4.98021156524076, + "grad_norm": 0.003561106277629733, + "learning_rate": 9.566095520308071e-11, + "loss": 0.0, + "num_input_tokens_seen": 137394472, + "step": 203855 + }, + { + "epoch": 4.980333716072606, + "grad_norm": 0.0006031613447703421, + "learning_rate": 9.448506657683264e-11, + "loss": 0.0, + "num_input_tokens_seen": 137397352, + "step": 203860 + }, + { + "epoch": 4.980455866904453, + "grad_norm": 0.15565507113933563, + "learning_rate": 9.331644953236573e-11, + "loss": 0.0001, + "num_input_tokens_seen": 137400808, + "step": 203865 + }, + { + "epoch": 4.980578017736301, + "grad_norm": 2.2073681975598447e-05, + "learning_rate": 9.21551040783397e-11, + "loss": 0.0, + "num_input_tokens_seen": 137403880, + "step": 203870 + }, + { + "epoch": 4.980700168568148, + "grad_norm": 1.1446640201029368e-05, + "learning_rate": 9.100103022297023e-11, + "loss": 0.0, + "num_input_tokens_seen": 137406824, + "step": 203875 + }, + { + "epoch": 4.980822319399995, + "grad_norm": 0.00042873574420809746, + "learning_rate": 8.985422797491704e-11, + "loss": 0.0002, + "num_input_tokens_seen": 137410216, + "step": 203880 + }, + { + "epoch": 4.980944470231842, + "grad_norm": 0.000481167109683156, + "learning_rate": 8.871469734228477e-11, + "loss": 0.0, + "num_input_tokens_seen": 137414056, + "step": 203885 + }, + { + "epoch": 4.98106662106369, + "grad_norm": 6.701985694235191e-05, + "learning_rate": 8.758243833351109e-11, + "loss": 0.0, + "num_input_tokens_seen": 137417192, + "step": 203890 + }, + { + "epoch": 4.981188771895536, + "grad_norm": 0.0001698703272268176, + "learning_rate": 8.645745095681167e-11, + "loss": 0.0, + "num_input_tokens_seen": 137420264, + "step": 203895 + }, + { + "epoch": 4.981310922727384, + "grad_norm": 0.00016953774320427328, + "learning_rate": 8.533973522029114e-11, + "loss": 0.0, + "num_input_tokens_seen": 137423400, + "step": 203900 + }, + { + "epoch": 4.981433073559231, + "grad_norm": 0.000789462763350457, + "learning_rate": 8.422929113216515e-11, + "loss": 0.0, + "num_input_tokens_seen": 137426408, + "step": 203905 + }, + { + "epoch": 4.981555224391078, + "grad_norm": 0.001563973375596106, + "learning_rate": 8.312611870042729e-11, + "loss": 0.0, + "num_input_tokens_seen": 137430184, + "step": 203910 + }, + { + "epoch": 4.981677375222925, + "grad_norm": 0.992311954498291, + "learning_rate": 8.203021793318221e-11, + "loss": 0.0002, + "num_input_tokens_seen": 137433448, + "step": 203915 + }, + { + "epoch": 4.981799526054773, + "grad_norm": 0.00013686691818293184, + "learning_rate": 8.094158883831248e-11, + "loss": 0.0, + "num_input_tokens_seen": 137437032, + "step": 203920 + }, + { + "epoch": 4.9819216768866195, + "grad_norm": 0.0002493971842341125, + "learning_rate": 7.98602314238117e-11, + "loss": 0.0, + "num_input_tokens_seen": 137440680, + "step": 203925 + }, + { + "epoch": 4.982043827718467, + "grad_norm": 0.0005622510798275471, + "learning_rate": 7.878614569745146e-11, + "loss": 0.0, + "num_input_tokens_seen": 137443688, + "step": 203930 + }, + { + "epoch": 4.982165978550314, + "grad_norm": 8.922885172069073e-05, + "learning_rate": 7.771933166722532e-11, + "loss": 0.0001, + "num_input_tokens_seen": 137447144, + "step": 203935 + }, + { + "epoch": 4.9822881293821615, + "grad_norm": 9.467336349189281e-05, + "learning_rate": 7.665978934068285e-11, + "loss": 0.0, + "num_input_tokens_seen": 137450344, + "step": 203940 + }, + { + "epoch": 4.982410280214008, + "grad_norm": 1.9983550373581238e-05, + "learning_rate": 7.560751872559557e-11, + "loss": 0.0, + "num_input_tokens_seen": 137453800, + "step": 203945 + }, + { + "epoch": 4.982532431045856, + "grad_norm": 0.0004079849168192595, + "learning_rate": 7.456251982973505e-11, + "loss": 0.0, + "num_input_tokens_seen": 137456808, + "step": 203950 + }, + { + "epoch": 4.982654581877703, + "grad_norm": 3.181111242156476e-05, + "learning_rate": 7.352479266053979e-11, + "loss": 0.0, + "num_input_tokens_seen": 137460072, + "step": 203955 + }, + { + "epoch": 4.982776732709549, + "grad_norm": 0.000325191009324044, + "learning_rate": 7.249433722567033e-11, + "loss": 0.0, + "num_input_tokens_seen": 137463720, + "step": 203960 + }, + { + "epoch": 4.982898883541397, + "grad_norm": 4.00619137508329e-05, + "learning_rate": 7.147115353245414e-11, + "loss": 0.0, + "num_input_tokens_seen": 137467560, + "step": 203965 + }, + { + "epoch": 4.983021034373244, + "grad_norm": 0.017873620614409447, + "learning_rate": 7.045524158855176e-11, + "loss": 0.0, + "num_input_tokens_seen": 137470888, + "step": 203970 + }, + { + "epoch": 4.983143185205091, + "grad_norm": 0.00022479586186818779, + "learning_rate": 6.944660140117964e-11, + "loss": 0.0, + "num_input_tokens_seen": 137474088, + "step": 203975 + }, + { + "epoch": 4.983265336036938, + "grad_norm": 0.014547290280461311, + "learning_rate": 6.844523297777627e-11, + "loss": 0.0, + "num_input_tokens_seen": 137477160, + "step": 203980 + }, + { + "epoch": 4.983387486868786, + "grad_norm": 5.238172161625698e-05, + "learning_rate": 6.745113632566912e-11, + "loss": 0.0, + "num_input_tokens_seen": 137480552, + "step": 203985 + }, + { + "epoch": 4.9835096377006325, + "grad_norm": 0.0010335510596632957, + "learning_rate": 6.64643114518526e-11, + "loss": 0.0, + "num_input_tokens_seen": 137484072, + "step": 203990 + }, + { + "epoch": 4.98363178853248, + "grad_norm": 7.121911039575934e-05, + "learning_rate": 6.548475836376521e-11, + "loss": 0.0, + "num_input_tokens_seen": 137487464, + "step": 203995 + }, + { + "epoch": 4.983753939364327, + "grad_norm": 0.0017879261868074536, + "learning_rate": 6.451247706840136e-11, + "loss": 0.0, + "num_input_tokens_seen": 137491560, + "step": 204000 + }, + { + "epoch": 4.9838760901961745, + "grad_norm": 0.0008008818840608001, + "learning_rate": 6.354746757286645e-11, + "loss": 0.0, + "num_input_tokens_seen": 137494632, + "step": 204005 + }, + { + "epoch": 4.983998241028021, + "grad_norm": 0.002612438052892685, + "learning_rate": 6.258972988415489e-11, + "loss": 0.0, + "num_input_tokens_seen": 137497896, + "step": 204010 + }, + { + "epoch": 4.984120391859869, + "grad_norm": 4.5757096813758835e-05, + "learning_rate": 6.16392640091501e-11, + "loss": 0.0, + "num_input_tokens_seen": 137501288, + "step": 204015 + }, + { + "epoch": 4.984242542691716, + "grad_norm": 0.00022237258963286877, + "learning_rate": 6.069606995495746e-11, + "loss": 0.0, + "num_input_tokens_seen": 137504680, + "step": 204020 + }, + { + "epoch": 4.984364693523563, + "grad_norm": 0.00012107265501981601, + "learning_rate": 5.976014772834937e-11, + "loss": 0.0, + "num_input_tokens_seen": 137507688, + "step": 204025 + }, + { + "epoch": 4.98448684435541, + "grad_norm": 5.114965824759565e-05, + "learning_rate": 5.883149733609816e-11, + "loss": 0.0, + "num_input_tokens_seen": 137510824, + "step": 204030 + }, + { + "epoch": 4.984608995187257, + "grad_norm": 2.346815927012358e-05, + "learning_rate": 5.7910118784976204e-11, + "loss": 0.0, + "num_input_tokens_seen": 137514088, + "step": 204035 + }, + { + "epoch": 4.984731146019104, + "grad_norm": 8.091831841738895e-05, + "learning_rate": 5.699601208164484e-11, + "loss": 0.0, + "num_input_tokens_seen": 137517928, + "step": 204040 + }, + { + "epoch": 4.984853296850952, + "grad_norm": 0.002588754054158926, + "learning_rate": 5.6089177232765406e-11, + "loss": 0.0, + "num_input_tokens_seen": 137521128, + "step": 204045 + }, + { + "epoch": 4.984975447682799, + "grad_norm": 0.00010856986773433164, + "learning_rate": 5.518961424499924e-11, + "loss": 0.0492, + "num_input_tokens_seen": 137524584, + "step": 204050 + }, + { + "epoch": 4.9850975985146455, + "grad_norm": 0.0001354777195956558, + "learning_rate": 5.4297323124896655e-11, + "loss": 0.0, + "num_input_tokens_seen": 137528296, + "step": 204055 + }, + { + "epoch": 4.985219749346493, + "grad_norm": 1.4176566764945164e-05, + "learning_rate": 5.341230387878593e-11, + "loss": 0.0, + "num_input_tokens_seen": 137532456, + "step": 204060 + }, + { + "epoch": 4.98534190017834, + "grad_norm": 0.00031324996962212026, + "learning_rate": 5.253455651332839e-11, + "loss": 0.0, + "num_input_tokens_seen": 137535784, + "step": 204065 + }, + { + "epoch": 4.9854640510101875, + "grad_norm": 0.00043233123142272234, + "learning_rate": 5.166408103474129e-11, + "loss": 0.0, + "num_input_tokens_seen": 137538920, + "step": 204070 + }, + { + "epoch": 4.985586201842034, + "grad_norm": 0.0011373285669833422, + "learning_rate": 5.080087744946393e-11, + "loss": 0.0, + "num_input_tokens_seen": 137541992, + "step": 204075 + }, + { + "epoch": 4.985708352673882, + "grad_norm": 0.0024712735321372747, + "learning_rate": 4.994494576360253e-11, + "loss": 0.0, + "num_input_tokens_seen": 137545960, + "step": 204080 + }, + { + "epoch": 4.985830503505729, + "grad_norm": 1.2956945283804089e-05, + "learning_rate": 4.909628598359639e-11, + "loss": 0.0, + "num_input_tokens_seen": 137549352, + "step": 204085 + }, + { + "epoch": 4.985952654337576, + "grad_norm": 1.887368671305012e-05, + "learning_rate": 4.82548981154407e-11, + "loss": 0.0, + "num_input_tokens_seen": 137552488, + "step": 204090 + }, + { + "epoch": 4.986074805169423, + "grad_norm": 9.684948963695206e-06, + "learning_rate": 4.742078216535272e-11, + "loss": 0.0, + "num_input_tokens_seen": 137555816, + "step": 204095 + }, + { + "epoch": 4.986196956001271, + "grad_norm": 0.0011596221011132002, + "learning_rate": 4.6593938139438685e-11, + "loss": 0.0, + "num_input_tokens_seen": 137559976, + "step": 204100 + }, + { + "epoch": 4.986319106833117, + "grad_norm": 0.0002005387214012444, + "learning_rate": 4.577436604358276e-11, + "loss": 0.0, + "num_input_tokens_seen": 137563368, + "step": 204105 + }, + { + "epoch": 4.986441257664965, + "grad_norm": 2.454880450386554e-05, + "learning_rate": 4.496206588378015e-11, + "loss": 0.0, + "num_input_tokens_seen": 137566760, + "step": 204110 + }, + { + "epoch": 4.986563408496812, + "grad_norm": 0.00022712937789037824, + "learning_rate": 4.4157037666026075e-11, + "loss": 0.0, + "num_input_tokens_seen": 137570088, + "step": 204115 + }, + { + "epoch": 4.986685559328659, + "grad_norm": 0.0013797935098409653, + "learning_rate": 4.335928139609368e-11, + "loss": 0.0, + "num_input_tokens_seen": 137573928, + "step": 204120 + }, + { + "epoch": 4.986807710160506, + "grad_norm": 0.00011030215682694688, + "learning_rate": 4.2568797079867156e-11, + "loss": 0.0003, + "num_input_tokens_seen": 137577128, + "step": 204125 + }, + { + "epoch": 4.986929860992353, + "grad_norm": 0.025388803333044052, + "learning_rate": 4.1785584723008635e-11, + "loss": 0.0, + "num_input_tokens_seen": 137580264, + "step": 204130 + }, + { + "epoch": 4.9870520118242005, + "grad_norm": 1.8925420590676367e-05, + "learning_rate": 4.100964433118026e-11, + "loss": 0.0, + "num_input_tokens_seen": 137583464, + "step": 204135 + }, + { + "epoch": 4.987174162656048, + "grad_norm": 0.0001776775170583278, + "learning_rate": 4.0240975910155186e-11, + "loss": 0.0, + "num_input_tokens_seen": 137586728, + "step": 204140 + }, + { + "epoch": 4.987296313487895, + "grad_norm": 0.023028668016195297, + "learning_rate": 3.9479579465373504e-11, + "loss": 0.0, + "num_input_tokens_seen": 137589864, + "step": 204145 + }, + { + "epoch": 4.987418464319742, + "grad_norm": 0.0003330595209263265, + "learning_rate": 3.8725455002608377e-11, + "loss": 0.0, + "num_input_tokens_seen": 137593064, + "step": 204150 + }, + { + "epoch": 4.987540615151589, + "grad_norm": 0.000414291862398386, + "learning_rate": 3.797860252707785e-11, + "loss": 0.0, + "num_input_tokens_seen": 137596520, + "step": 204155 + }, + { + "epoch": 4.987662765983436, + "grad_norm": 0.00011527969763847068, + "learning_rate": 3.7239022044333045e-11, + "loss": 0.0, + "num_input_tokens_seen": 137599720, + "step": 204160 + }, + { + "epoch": 4.987784916815284, + "grad_norm": 0.00047657510731369257, + "learning_rate": 3.6506713559703025e-11, + "loss": 0.0, + "num_input_tokens_seen": 137605352, + "step": 204165 + }, + { + "epoch": 4.98790706764713, + "grad_norm": 3.753498094738461e-05, + "learning_rate": 3.578167707862789e-11, + "loss": 0.0, + "num_input_tokens_seen": 137608360, + "step": 204170 + }, + { + "epoch": 4.988029218478978, + "grad_norm": 0.00023259302543010563, + "learning_rate": 3.506391260621466e-11, + "loss": 0.0, + "num_input_tokens_seen": 137611816, + "step": 204175 + }, + { + "epoch": 4.988151369310825, + "grad_norm": 0.002862380351871252, + "learning_rate": 3.4353420147903435e-11, + "loss": 0.0, + "num_input_tokens_seen": 137615144, + "step": 204180 + }, + { + "epoch": 4.9882735201426724, + "grad_norm": 0.0003798987891059369, + "learning_rate": 3.365019970869021e-11, + "loss": 0.0, + "num_input_tokens_seen": 137618536, + "step": 204185 + }, + { + "epoch": 4.988395670974519, + "grad_norm": 0.0195639468729496, + "learning_rate": 3.295425129368201e-11, + "loss": 0.0, + "num_input_tokens_seen": 137621800, + "step": 204190 + }, + { + "epoch": 4.988517821806367, + "grad_norm": 2.1513849787879735e-05, + "learning_rate": 3.226557490798587e-11, + "loss": 0.0, + "num_input_tokens_seen": 137624872, + "step": 204195 + }, + { + "epoch": 4.988639972638214, + "grad_norm": 5.544390660361387e-05, + "learning_rate": 3.158417055670881e-11, + "loss": 0.0579, + "num_input_tokens_seen": 137627944, + "step": 204200 + }, + { + "epoch": 4.988762123470061, + "grad_norm": 0.00016343161405529827, + "learning_rate": 3.0910038244624794e-11, + "loss": 0.0, + "num_input_tokens_seen": 137631592, + "step": 204205 + }, + { + "epoch": 4.988884274301908, + "grad_norm": 0.0017932208720594645, + "learning_rate": 3.0243177976729816e-11, + "loss": 0.0, + "num_input_tokens_seen": 137634664, + "step": 204210 + }, + { + "epoch": 4.989006425133756, + "grad_norm": 0.0006000014836899936, + "learning_rate": 2.9583589757908863e-11, + "loss": 0.0, + "num_input_tokens_seen": 137637800, + "step": 204215 + }, + { + "epoch": 4.989128575965602, + "grad_norm": 1.5821640772628598e-05, + "learning_rate": 2.893127359282488e-11, + "loss": 0.0, + "num_input_tokens_seen": 137641512, + "step": 204220 + }, + { + "epoch": 4.989250726797449, + "grad_norm": 0.00039087023469619453, + "learning_rate": 2.8286229486362833e-11, + "loss": 0.0, + "num_input_tokens_seen": 137644648, + "step": 204225 + }, + { + "epoch": 4.989372877629297, + "grad_norm": 60.79288864135742, + "learning_rate": 2.764845744318567e-11, + "loss": 0.0763, + "num_input_tokens_seen": 137648232, + "step": 204230 + }, + { + "epoch": 4.9894950284611435, + "grad_norm": 9.158341708825901e-05, + "learning_rate": 2.701795746795632e-11, + "loss": 0.0, + "num_input_tokens_seen": 137651752, + "step": 204235 + }, + { + "epoch": 4.989617179292991, + "grad_norm": 0.00457747234031558, + "learning_rate": 2.6394729565115682e-11, + "loss": 0.0, + "num_input_tokens_seen": 137655016, + "step": 204240 + }, + { + "epoch": 4.989739330124838, + "grad_norm": 0.00026916858041659, + "learning_rate": 2.5778773739326687e-11, + "loss": 0.0, + "num_input_tokens_seen": 137658664, + "step": 204245 + }, + { + "epoch": 4.9898614809566855, + "grad_norm": 0.00012310463353060186, + "learning_rate": 2.517008999503023e-11, + "loss": 0.0, + "num_input_tokens_seen": 137662440, + "step": 204250 + }, + { + "epoch": 4.989983631788532, + "grad_norm": 0.00021796536748297513, + "learning_rate": 2.4568678336667203e-11, + "loss": 0.0, + "num_input_tokens_seen": 137666344, + "step": 204255 + }, + { + "epoch": 4.99010578262038, + "grad_norm": 0.006101370323449373, + "learning_rate": 2.3974538768567475e-11, + "loss": 0.0, + "num_input_tokens_seen": 137669544, + "step": 204260 + }, + { + "epoch": 4.990227933452227, + "grad_norm": 0.00022258379613049328, + "learning_rate": 2.338767129517194e-11, + "loss": 0.0, + "num_input_tokens_seen": 137672744, + "step": 204265 + }, + { + "epoch": 4.990350084284074, + "grad_norm": 0.0004926729016005993, + "learning_rate": 2.280807592058842e-11, + "loss": 0.112, + "num_input_tokens_seen": 137676200, + "step": 204270 + }, + { + "epoch": 4.990472235115921, + "grad_norm": 2.890416180889588e-05, + "learning_rate": 2.2235752649146787e-11, + "loss": 0.0, + "num_input_tokens_seen": 137679592, + "step": 204275 + }, + { + "epoch": 4.990594385947769, + "grad_norm": 0.0013556944904848933, + "learning_rate": 2.1670701484954866e-11, + "loss": 0.0, + "num_input_tokens_seen": 137682984, + "step": 204280 + }, + { + "epoch": 4.990716536779615, + "grad_norm": 0.08208898454904556, + "learning_rate": 2.1112922432120482e-11, + "loss": 0.0, + "num_input_tokens_seen": 137686056, + "step": 204285 + }, + { + "epoch": 4.990838687611463, + "grad_norm": 0.000291264004772529, + "learning_rate": 2.056241549475146e-11, + "loss": 0.0, + "num_input_tokens_seen": 137689192, + "step": 204290 + }, + { + "epoch": 4.99096083844331, + "grad_norm": 0.007216623052954674, + "learning_rate": 2.0019180676733582e-11, + "loss": 0.0, + "num_input_tokens_seen": 137692648, + "step": 204295 + }, + { + "epoch": 4.9910829892751565, + "grad_norm": 0.0006970735266804695, + "learning_rate": 1.948321798217467e-11, + "loss": 0.0, + "num_input_tokens_seen": 137695848, + "step": 204300 + }, + { + "epoch": 4.991205140107004, + "grad_norm": 1.272992722078925e-05, + "learning_rate": 1.8954527414849488e-11, + "loss": 0.0, + "num_input_tokens_seen": 137698856, + "step": 204305 + }, + { + "epoch": 4.991327290938852, + "grad_norm": 1.9237311789765954e-05, + "learning_rate": 1.843310897875483e-11, + "loss": 0.0, + "num_input_tokens_seen": 137702120, + "step": 204310 + }, + { + "epoch": 4.9914494417706985, + "grad_norm": 0.0004398747405502945, + "learning_rate": 1.7918962677443418e-11, + "loss": 0.0, + "num_input_tokens_seen": 137705704, + "step": 204315 + }, + { + "epoch": 4.991571592602545, + "grad_norm": 7.332904351642355e-05, + "learning_rate": 1.741208851491205e-11, + "loss": 0.0, + "num_input_tokens_seen": 137709096, + "step": 204320 + }, + { + "epoch": 4.991693743434393, + "grad_norm": 6.562047929037362e-05, + "learning_rate": 1.6912486494602417e-11, + "loss": 0.0, + "num_input_tokens_seen": 137712744, + "step": 204325 + }, + { + "epoch": 4.99181589426624, + "grad_norm": 0.0015275224577635527, + "learning_rate": 1.6420156620289283e-11, + "loss": 0.0, + "num_input_tokens_seen": 137715880, + "step": 204330 + }, + { + "epoch": 4.991938045098087, + "grad_norm": 0.04927076771855354, + "learning_rate": 1.593509889563638e-11, + "loss": 0.0, + "num_input_tokens_seen": 137719528, + "step": 204335 + }, + { + "epoch": 4.992060195929934, + "grad_norm": 0.0021134349517524242, + "learning_rate": 1.5457313323974374e-11, + "loss": 0.0, + "num_input_tokens_seen": 137722856, + "step": 204340 + }, + { + "epoch": 4.992182346761782, + "grad_norm": 0.0008005241979844868, + "learning_rate": 1.4986799908855985e-11, + "loss": 0.0, + "num_input_tokens_seen": 137726312, + "step": 204345 + }, + { + "epoch": 4.992304497593628, + "grad_norm": 6.598847539862618e-05, + "learning_rate": 1.45235586537229e-11, + "loss": 0.0, + "num_input_tokens_seen": 137729576, + "step": 204350 + }, + { + "epoch": 4.992426648425476, + "grad_norm": 12.575538635253906, + "learning_rate": 1.4067589562016812e-11, + "loss": 0.0302, + "num_input_tokens_seen": 137732584, + "step": 204355 + }, + { + "epoch": 4.992548799257323, + "grad_norm": 0.001489661866798997, + "learning_rate": 1.361889263695737e-11, + "loss": 0.0, + "num_input_tokens_seen": 137735976, + "step": 204360 + }, + { + "epoch": 4.99267095008917, + "grad_norm": 0.002886560745537281, + "learning_rate": 1.3177467881764214e-11, + "loss": 0.0, + "num_input_tokens_seen": 137739944, + "step": 204365 + }, + { + "epoch": 4.992793100921017, + "grad_norm": 0.00042491499334573746, + "learning_rate": 1.274331529976802e-11, + "loss": 0.0, + "num_input_tokens_seen": 137743336, + "step": 204370 + }, + { + "epoch": 4.992915251752865, + "grad_norm": 1.1034517228836194e-05, + "learning_rate": 1.2316434893966387e-11, + "loss": 0.0, + "num_input_tokens_seen": 137746728, + "step": 204375 + }, + { + "epoch": 4.9930374025847115, + "grad_norm": 0.00016755687829572707, + "learning_rate": 1.1896826667689985e-11, + "loss": 0.0, + "num_input_tokens_seen": 137750312, + "step": 204380 + }, + { + "epoch": 4.993159553416559, + "grad_norm": 0.00024167363881133497, + "learning_rate": 1.1484490623825394e-11, + "loss": 0.0, + "num_input_tokens_seen": 137753192, + "step": 204385 + }, + { + "epoch": 4.993281704248406, + "grad_norm": 2.5365121473441832e-05, + "learning_rate": 1.1079426765370215e-11, + "loss": 0.0, + "num_input_tokens_seen": 137756328, + "step": 204390 + }, + { + "epoch": 4.993403855080253, + "grad_norm": 0.0004141927638556808, + "learning_rate": 1.068163509532205e-11, + "loss": 0.0, + "num_input_tokens_seen": 137759656, + "step": 204395 + }, + { + "epoch": 4.9935260059121, + "grad_norm": 0.00025063075008802116, + "learning_rate": 1.0291115616567481e-11, + "loss": 0.0, + "num_input_tokens_seen": 137762792, + "step": 204400 + }, + { + "epoch": 4.993648156743948, + "grad_norm": 1.626476841920521e-05, + "learning_rate": 9.907868331882063e-12, + "loss": 0.0, + "num_input_tokens_seen": 137766056, + "step": 204405 + }, + { + "epoch": 4.993770307575795, + "grad_norm": 0.0001084671457647346, + "learning_rate": 9.5318932442634e-12, + "loss": 0.0, + "num_input_tokens_seen": 137769512, + "step": 204410 + }, + { + "epoch": 4.993892458407641, + "grad_norm": 0.0017103019636124372, + "learning_rate": 9.163190356153982e-12, + "loss": 0.0, + "num_input_tokens_seen": 137772904, + "step": 204415 + }, + { + "epoch": 4.994014609239489, + "grad_norm": 0.0003728297306224704, + "learning_rate": 8.80175967044039e-12, + "loss": 0.0, + "num_input_tokens_seen": 137775912, + "step": 204420 + }, + { + "epoch": 4.994136760071336, + "grad_norm": 9.055395639734343e-05, + "learning_rate": 8.447601189676135e-12, + "loss": 0.0, + "num_input_tokens_seen": 137779496, + "step": 204425 + }, + { + "epoch": 4.994258910903183, + "grad_norm": 0.0002516876847948879, + "learning_rate": 8.100714916414731e-12, + "loss": 0.0, + "num_input_tokens_seen": 137782504, + "step": 204430 + }, + { + "epoch": 4.99438106173503, + "grad_norm": 0.0004563323745969683, + "learning_rate": 7.76110085320969e-12, + "loss": 0.0, + "num_input_tokens_seen": 137785896, + "step": 204435 + }, + { + "epoch": 4.994503212566878, + "grad_norm": 0.0009458388667553663, + "learning_rate": 7.428759002614527e-12, + "loss": 0.0, + "num_input_tokens_seen": 137789928, + "step": 204440 + }, + { + "epoch": 4.9946253633987245, + "grad_norm": 0.056152962148189545, + "learning_rate": 7.103689366849686e-12, + "loss": 0.0, + "num_input_tokens_seen": 137793512, + "step": 204445 + }, + { + "epoch": 4.994747514230572, + "grad_norm": 0.0003752903430722654, + "learning_rate": 6.7858919484686805e-12, + "loss": 0.0, + "num_input_tokens_seen": 137796776, + "step": 204450 + }, + { + "epoch": 4.994869665062419, + "grad_norm": 9.508341463515535e-05, + "learning_rate": 6.4753667496919575e-12, + "loss": 0.0, + "num_input_tokens_seen": 137800168, + "step": 204455 + }, + { + "epoch": 4.9949918158942666, + "grad_norm": 0.000608145899605006, + "learning_rate": 6.172113772850984e-12, + "loss": 0.0, + "num_input_tokens_seen": 137803816, + "step": 204460 + }, + { + "epoch": 4.995113966726113, + "grad_norm": 0.0006833765655755997, + "learning_rate": 5.876133019944163e-12, + "loss": 0.0, + "num_input_tokens_seen": 137806888, + "step": 204465 + }, + { + "epoch": 4.995236117557961, + "grad_norm": 0.00020582752767950296, + "learning_rate": 5.587424493413984e-12, + "loss": 0.0, + "num_input_tokens_seen": 137810088, + "step": 204470 + }, + { + "epoch": 4.995358268389808, + "grad_norm": 0.0001288704515900463, + "learning_rate": 5.3059881951478254e-12, + "loss": 0.0, + "num_input_tokens_seen": 137813608, + "step": 204475 + }, + { + "epoch": 4.995480419221655, + "grad_norm": 0.00013208498421590775, + "learning_rate": 5.031824127255113e-12, + "loss": 0.0, + "num_input_tokens_seen": 137817064, + "step": 204480 + }, + { + "epoch": 4.995602570053502, + "grad_norm": 0.0015713643515482545, + "learning_rate": 4.764932291734247e-12, + "loss": 0.0, + "num_input_tokens_seen": 137820264, + "step": 204485 + }, + { + "epoch": 4.995724720885349, + "grad_norm": 0.00032959875534288585, + "learning_rate": 4.505312690583629e-12, + "loss": 0.0, + "num_input_tokens_seen": 137824040, + "step": 204490 + }, + { + "epoch": 4.995846871717196, + "grad_norm": 0.002318650484085083, + "learning_rate": 4.252965325579616e-12, + "loss": 0.0, + "num_input_tokens_seen": 137826920, + "step": 204495 + }, + { + "epoch": 4.995969022549043, + "grad_norm": 6.434229726437479e-05, + "learning_rate": 4.0078901987206096e-12, + "loss": 0.0, + "num_input_tokens_seen": 137830568, + "step": 204500 + }, + { + "epoch": 4.996091173380891, + "grad_norm": 0.0012508381623774767, + "learning_rate": 3.770087311560921e-12, + "loss": 0.0, + "num_input_tokens_seen": 137834088, + "step": 204505 + }, + { + "epoch": 4.996213324212738, + "grad_norm": 0.0002872292825486511, + "learning_rate": 3.5395566659879307e-12, + "loss": 0.0, + "num_input_tokens_seen": 137837160, + "step": 204510 + }, + { + "epoch": 4.996335475044585, + "grad_norm": 0.0004731950757559389, + "learning_rate": 3.3162982636669722e-12, + "loss": 0.0, + "num_input_tokens_seen": 137840424, + "step": 204515 + }, + { + "epoch": 4.996457625876432, + "grad_norm": 0.011159982532262802, + "learning_rate": 3.1003121061523583e-12, + "loss": 0.0, + "num_input_tokens_seen": 137843944, + "step": 204520 + }, + { + "epoch": 4.99657977670828, + "grad_norm": 0.00016125261026900262, + "learning_rate": 2.8915981951094236e-12, + "loss": 0.0, + "num_input_tokens_seen": 137847080, + "step": 204525 + }, + { + "epoch": 4.996701927540126, + "grad_norm": 0.00041134297498501837, + "learning_rate": 2.690156531981458e-12, + "loss": 0.0, + "num_input_tokens_seen": 137850792, + "step": 204530 + }, + { + "epoch": 4.996824078371974, + "grad_norm": 0.000439604016719386, + "learning_rate": 2.4959871183227732e-12, + "loss": 0.0, + "num_input_tokens_seen": 137853928, + "step": 204535 + }, + { + "epoch": 4.996946229203821, + "grad_norm": 0.002617679536342621, + "learning_rate": 2.309089955354615e-12, + "loss": 0.0, + "num_input_tokens_seen": 137856936, + "step": 204540 + }, + { + "epoch": 4.997068380035668, + "grad_norm": 7.568387081846595e-05, + "learning_rate": 2.1294650446312955e-12, + "loss": 0.0, + "num_input_tokens_seen": 137860584, + "step": 204545 + }, + { + "epoch": 4.997190530867515, + "grad_norm": 0.0002223595802206546, + "learning_rate": 1.9571123873740602e-12, + "loss": 0.0, + "num_input_tokens_seen": 137863976, + "step": 204550 + }, + { + "epoch": 4.997312681699363, + "grad_norm": 4.211713530821726e-05, + "learning_rate": 1.7920319849151766e-12, + "loss": 0.0, + "num_input_tokens_seen": 137867496, + "step": 204555 + }, + { + "epoch": 4.9974348325312095, + "grad_norm": 0.00966684427112341, + "learning_rate": 1.6342238382538453e-12, + "loss": 0.0, + "num_input_tokens_seen": 137871272, + "step": 204560 + }, + { + "epoch": 4.997556983363056, + "grad_norm": 0.0009443532908335328, + "learning_rate": 1.4836879488333564e-12, + "loss": 0.0, + "num_input_tokens_seen": 137874280, + "step": 204565 + }, + { + "epoch": 4.997679134194904, + "grad_norm": 7.629985702806152e-06, + "learning_rate": 1.340424317430866e-12, + "loss": 0.0, + "num_input_tokens_seen": 137877672, + "step": 204570 + }, + { + "epoch": 4.9978012850267515, + "grad_norm": 5.480439995153574e-06, + "learning_rate": 1.2044329453786417e-12, + "loss": 0.0, + "num_input_tokens_seen": 137881192, + "step": 204575 + }, + { + "epoch": 4.997923435858598, + "grad_norm": 3.845514220301993e-05, + "learning_rate": 1.0757138334538396e-12, + "loss": 0.0, + "num_input_tokens_seen": 137884712, + "step": 204580 + }, + { + "epoch": 4.998045586690445, + "grad_norm": 0.0005065813893452287, + "learning_rate": 9.542669827666827e-13, + "loss": 0.0, + "num_input_tokens_seen": 137887720, + "step": 204585 + }, + { + "epoch": 4.998167737522293, + "grad_norm": 1.7385242244927213e-05, + "learning_rate": 8.400923939833049e-13, + "loss": 0.0, + "num_input_tokens_seen": 137891240, + "step": 204590 + }, + { + "epoch": 4.998289888354139, + "grad_norm": 0.004977858159691095, + "learning_rate": 7.331900682139292e-13, + "loss": 0.0, + "num_input_tokens_seen": 137894696, + "step": 204595 + }, + { + "epoch": 4.998412039185987, + "grad_norm": 0.0001772203977452591, + "learning_rate": 6.335600059026447e-13, + "loss": 0.0, + "num_input_tokens_seen": 137898152, + "step": 204600 + }, + { + "epoch": 4.998534190017834, + "grad_norm": 0.00018388996249996126, + "learning_rate": 5.412022080486523e-13, + "loss": 0.0, + "num_input_tokens_seen": 137901288, + "step": 204605 + }, + { + "epoch": 4.998656340849681, + "grad_norm": 0.0003622965596150607, + "learning_rate": 4.561166752070633e-13, + "loss": 0.0, + "num_input_tokens_seen": 137905000, + "step": 204610 + }, + { + "epoch": 4.998778491681528, + "grad_norm": 1.2093821169401053e-05, + "learning_rate": 3.7830340804401173e-13, + "loss": 0.0, + "num_input_tokens_seen": 137909032, + "step": 204615 + }, + { + "epoch": 4.998900642513376, + "grad_norm": 0.11611104011535645, + "learning_rate": 3.07762407114609e-13, + "loss": 0.0, + "num_input_tokens_seen": 137912168, + "step": 204620 + }, + { + "epoch": 4.9990227933452225, + "grad_norm": 0.004503564443439245, + "learning_rate": 2.444936728629443e-13, + "loss": 0.0, + "num_input_tokens_seen": 137915560, + "step": 204625 + }, + { + "epoch": 4.99914494417707, + "grad_norm": 3.479938823147677e-05, + "learning_rate": 1.884972058441292e-13, + "loss": 0.0, + "num_input_tokens_seen": 137918888, + "step": 204630 + }, + { + "epoch": 4.999267095008917, + "grad_norm": 0.00011234768317081034, + "learning_rate": 1.3977300639123058e-13, + "loss": 0.0, + "num_input_tokens_seen": 137922344, + "step": 204635 + }, + { + "epoch": 4.9993892458407645, + "grad_norm": 0.0003669550933409482, + "learning_rate": 9.832107483731533e-14, + "loss": 0.0, + "num_input_tokens_seen": 137925800, + "step": 204640 + }, + { + "epoch": 4.999511396672611, + "grad_norm": 0.0006305554416030645, + "learning_rate": 6.414141162647269e-14, + "loss": 0.0, + "num_input_tokens_seen": 137929256, + "step": 204645 + }, + { + "epoch": 4.999633547504459, + "grad_norm": 0.00038496809429489076, + "learning_rate": 3.723401686972494e-14, + "loss": 0.0, + "num_input_tokens_seen": 137932264, + "step": 204650 + }, + { + "epoch": 4.999755698336306, + "grad_norm": 0.0001226778404088691, + "learning_rate": 1.7598890900138997e-14, + "loss": 0.0, + "num_input_tokens_seen": 137935272, + "step": 204655 + }, + { + "epoch": 4.999877849168152, + "grad_norm": 0.00022551536676473916, + "learning_rate": 5.236033606692558e-15, + "loss": 0.0013, + "num_input_tokens_seen": 137938536, + "step": 204660 + }, + { + "epoch": 5.0, + "grad_norm": 0.000892703770659864, + "learning_rate": 1.454454334748334e-16, + "loss": 0.0, + "num_input_tokens_seen": 137941664, + "step": 204665 + }, + { + "epoch": 5.0, + "num_input_tokens_seen": 137941664, + "step": 204665, + "total_flos": 8.054243640264622e+17, + "train_loss": 0.052412337061547826, + "train_runtime": 15894.9677, + "train_samples_per_second": 103.008, + "train_steps_per_second": 12.876 + } + ], + "logging_steps": 5, + "max_steps": 204665, + "num_input_tokens_seen": 137941664, + "num_train_epochs": 5, + "save_steps": 10234, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 8.054243640264622e+17, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..97183f7 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60a36b4a7a224ca5cf10921ad2f03cce32ed0aef96de468fb7ec1842fdbbbb8f +size 6289 diff --git a/training_eval_loss.png b/training_eval_loss.png new file mode 100644 index 0000000..4ad5880 Binary files /dev/null and b/training_eval_loss.png differ diff --git a/training_loss.png b/training_loss.png new file mode 100644 index 0000000..cc9d960 Binary files /dev/null and b/training_loss.png differ